Linux-2.6.12-rc2v2.6.12-rc2

Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
author: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 15:20:36 -0700
committer: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 15:20:36 -0700
commit: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree: 0bba044c4ce775e45a88a51686b5d9f90697ea9d /net
download: linux-3.10-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.tar.gz
linux-3.10-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.tar.bz2
linux-3.10-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.zip
668 files changed, 356308 insertions, 0 deletions
diff --git a/net/802/Makefile b/net/802/Makefile
new file mode 100644
index 00000000000..01861929591
--- /dev/null
+++ b/net/802/Makefile
@@ -0,0 +1,15 @@
+#
+# Makefile for the Linux 802.x protocol layers.
+#
+
+obj-y			:= p8023.o
+
+# Check the p8022 selections against net/core/Makefile.
+obj-$(CONFIG_SYSCTL)	+= sysctl_net_802.o
+obj-$(CONFIG_LLC)	+= p8022.o psnap.o
+obj-$(CONFIG_TR)	+= p8022.o psnap.o tr.o sysctl_net_802.o
+obj-$(CONFIG_NET_FC)	+=                 fc.o
+obj-$(CONFIG_FDDI)	+=                 fddi.o
+obj-$(CONFIG_HIPPI)	+=                 hippi.o
+obj-$(CONFIG_IPX)	+= p8022.o psnap.o
+obj-$(CONFIG_ATALK)	+= p8022.o psnap.o
diff --git a/net/802/fc.c b/net/802/fc.c
new file mode 100644
index 00000000000..640d34e026c
--- /dev/null
+++ b/net/802/fc.c
@@ -0,0 +1,130 @@
+/*
+ * NET3:	Fibre Channel device handling subroutines
+ * 
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *		Vineet Abraham <vma@iol.unh.edu>
+ *		v 1.0 03/22/99
+ */
+
+#include <linux/config.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/fcdevice.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/timer.h>
+#include <linux/net.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <net/arp.h>
+
+/*
+ *	Put the headers on a Fibre Channel packet. 
+ */
+ 
+static int fc_header(struct sk_buff *skb, struct net_device *dev,
+		     unsigned short type,
+		     void *daddr, void *saddr, unsigned len) 
+{
+	struct fch_hdr *fch;
+	int hdr_len;
+
+	/* 
+	 * Add the 802.2 SNAP header if IP as the IPv4 code calls  
+	 * dev->hard_header directly.
+	 */
+	if (type == ETH_P_IP || type == ETH_P_ARP)
+	{
+		struct fcllc *fcllc;
+
+		hdr_len = sizeof(struct fch_hdr) + sizeof(struct fcllc);
+		fch = (struct fch_hdr *)skb_push(skb, hdr_len);
+		fcllc = (struct fcllc *)(fch+1);
+		fcllc->dsap = fcllc->ssap = EXTENDED_SAP;
+		fcllc->llc = UI_CMD;
+		fcllc->protid[0] = fcllc->protid[1] = fcllc->protid[2] = 0x00;
+		fcllc->ethertype = htons(type);
+	}
+	else
+	{
+		hdr_len = sizeof(struct fch_hdr);
+		fch = (struct fch_hdr *)skb_push(skb, hdr_len);	
+	}
+
+	if(saddr)
+		memcpy(fch->saddr,saddr,dev->addr_len);
+	else
+		memcpy(fch->saddr,dev->dev_addr,dev->addr_len);
+
+	if(daddr) 
+	{
+		memcpy(fch->daddr,daddr,dev->addr_len);
+		return(hdr_len);
+	}
+	return -hdr_len;
+}
+	
+/*
+ *	A neighbour discovery of some species (eg arp) has completed. We
+ *	can now send the packet.
+ */
+ 
+static int fc_rebuild_header(struct sk_buff *skb) 
+{
+	struct fch_hdr *fch=(struct fch_hdr *)skb->data;
+	struct fcllc *fcllc=(struct fcllc *)(skb->data+sizeof(struct fch_hdr));
+	if(fcllc->ethertype != htons(ETH_P_IP)) {
+		printk("fc_rebuild_header: Don't know how to resolve type %04X addresses ?\n",(unsigned int)htons(fcllc->ethertype));
+		return 0;
+	}
+#ifdef CONFIG_INET
+	return arp_find(fch->daddr, skb);
+#else
+	return 0;
+#endif
+}
+
+static void fc_setup(struct net_device *dev)
+{
+	dev->hard_header	= fc_header;
+	dev->rebuild_header	= fc_rebuild_header;
+                
+	dev->type		= ARPHRD_IEEE802;
+	dev->hard_header_len	= FC_HLEN;
+	dev->mtu		= 2024;
+	dev->addr_len		= FC_ALEN;
+	dev->tx_queue_len	= 100; /* Long queues on fc */
+	dev->flags		= IFF_BROADCAST;
+
+	memset(dev->broadcast, 0xFF, FC_ALEN);
+}
+
+/**
+ * alloc_fcdev - Register fibre channel device
+ * @sizeof_priv: Size of additional driver-private structure to be allocated
+ *	for this fibre channel device
+ *
+ * Fill in the fields of the device structure with fibre channel-generic values.
+ *
+ * Constructs a new net device, complete with a private data area of
+ * size @sizeof_priv.  A 32-byte (not bit) alignment is enforced for
+ * this private data area.
+ */
+struct net_device *alloc_fcdev(int sizeof_priv)
+{
+	return alloc_netdev(sizeof_priv, "fc%d", fc_setup);
+}
+EXPORT_SYMBOL(alloc_fcdev);
diff --git a/net/802/fddi.c b/net/802/fddi.c
new file mode 100644
index 00000000000..f9a31a9f70f
--- /dev/null
+++ b/net/802/fddi.c
@@ -0,0 +1,210 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		FDDI-type device handling.
+ *
+ * Version:	@(#)fddi.c	1.0.0	08/12/96
+ *
+ * Authors:	Lawrence V. Stefani, <stefani@lkg.dec.com>
+ *
+ *		fddi.c is based on previous eth.c and tr.c work by
+ *			Ross Biro, <bir7@leland.Stanford.Edu>
+ *			Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *			Mark Evans, <evansmp@uhura.aston.ac.uk>
+ *			Florian La Roche, <rzsfl@rz.uni-sb.de>
+ *			Alan Cox, <gw4pts@gw4pts.ampr.org>
+ * 
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *	Changes
+ *		Alan Cox		:	New arp/rebuild header
+ *		Maciej W. Rozycki	:	IPv6 support
+ */
+ 
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/system.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/fddidevice.h>
+#include <linux/if_ether.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <net/arp.h>
+#include <net/sock.h>
+
+/*
+ * Create the FDDI MAC header for an arbitrary protocol layer
+ *
+ * saddr=NULL	means use device source address
+ * daddr=NULL	means leave destination address (eg unresolved arp)
+ */
+
+static int fddi_header(struct sk_buff *skb, struct net_device *dev,
+		       unsigned short type,
+		       void *daddr, void *saddr, unsigned len)
+{
+	int hl = FDDI_K_SNAP_HLEN;
+	struct fddihdr *fddi;
+	
+	if(type != ETH_P_IP && type != ETH_P_IPV6 && type != ETH_P_ARP)
+		hl=FDDI_K_8022_HLEN-3;
+	fddi = (struct fddihdr *)skb_push(skb, hl);
+	fddi->fc			 = FDDI_FC_K_ASYNC_LLC_DEF;
+	if(type == ETH_P_IP || type == ETH_P_IPV6 || type == ETH_P_ARP)
+	{
+		fddi->hdr.llc_snap.dsap		 = FDDI_EXTENDED_SAP;
+		fddi->hdr.llc_snap.ssap		 = FDDI_EXTENDED_SAP;
+		fddi->hdr.llc_snap.ctrl		 = FDDI_UI_CMD;
+		fddi->hdr.llc_snap.oui[0]	 = 0x00;
+		fddi->hdr.llc_snap.oui[1]	 = 0x00;
+		fddi->hdr.llc_snap.oui[2]	 = 0x00;
+		fddi->hdr.llc_snap.ethertype	 = htons(type);
+	}
+
+	/* Set the source and destination hardware addresses */
+	 
+	if (saddr != NULL)
+		memcpy(fddi->saddr, saddr, dev->addr_len);
+	else
+		memcpy(fddi->saddr, dev->dev_addr, dev->addr_len);
+
+	if (daddr != NULL)
+	{
+		memcpy(fddi->daddr, daddr, dev->addr_len);
+		return(hl);
+	}
+
+	return(-hl);
+}
+
+
+/*
+ * Rebuild the FDDI MAC header. This is called after an ARP
+ * (or in future other address resolution) has completed on
+ * this sk_buff.  We now let ARP fill in the other fields.
+ */
+ 
+static int fddi_rebuild_header(struct sk_buff	*skb)
+{
+	struct fddihdr *fddi = (struct fddihdr *)skb->data;
+
+#ifdef CONFIG_INET
+	if (fddi->hdr.llc_snap.ethertype == __constant_htons(ETH_P_IP))
+		/* Try to get ARP to resolve the header and fill destination address */
+		return arp_find(fddi->daddr, skb);
+	else
+#endif	
+	{
+		printk("%s: Don't know how to resolve type %02X addresses.\n",
+		       skb->dev->name, htons(fddi->hdr.llc_snap.ethertype));
+		return(0);
+	}
+}
+
+
+/*
+ * Determine the packet's protocol ID and fill in skb fields.
+ * This routine is called before an incoming packet is passed
+ * up.  It's used to fill in specific skb fields and to set
+ * the proper pointer to the start of packet data (skb->data).
+ */
+ 
+unsigned short fddi_type_trans(struct sk_buff *skb, struct net_device *dev)
+{
+	struct fddihdr *fddi = (struct fddihdr *)skb->data;
+	unsigned short type;
+	
+	/*
+	 * Set mac.raw field to point to FC byte, set data field to point
+	 * to start of packet data.  Assume 802.2 SNAP frames for now.
+	 */
+
+	skb->mac.raw = skb->data;	/* point to frame control (FC) */
+	
+	if(fddi->hdr.llc_8022_1.dsap==0xe0)
+	{
+		skb_pull(skb, FDDI_K_8022_HLEN-3);
+		type = __constant_htons(ETH_P_802_2);
+	}
+	else
+	{
+		skb_pull(skb, FDDI_K_SNAP_HLEN);		/* adjust for 21 byte header */
+		type=fddi->hdr.llc_snap.ethertype;
+	}
+	
+	/* Set packet type based on destination address and flag settings */
+			
+	if (*fddi->daddr & 0x01)
+	{
+		if (memcmp(fddi->daddr, dev->broadcast, FDDI_K_ALEN) == 0)
+			skb->pkt_type = PACKET_BROADCAST;
+		else
+			skb->pkt_type = PACKET_MULTICAST;
+	}
+	
+	else if (dev->flags & IFF_PROMISC)
+	{
+		if (memcmp(fddi->daddr, dev->dev_addr, FDDI_K_ALEN))
+			skb->pkt_type = PACKET_OTHERHOST;
+	}
+
+	/* Assume 802.2 SNAP frames, for now */
+
+	return(type);
+}
+
+EXPORT_SYMBOL(fddi_type_trans);
+
+static int fddi_change_mtu(struct net_device *dev, int new_mtu)
+{
+	if ((new_mtu < FDDI_K_SNAP_HLEN) || (new_mtu > FDDI_K_SNAP_DLEN))
+		return(-EINVAL);
+	dev->mtu = new_mtu;
+	return(0);
+}
+
+static void fddi_setup(struct net_device *dev)
+{
+	dev->change_mtu		= fddi_change_mtu;
+	dev->hard_header	= fddi_header;
+	dev->rebuild_header	= fddi_rebuild_header;
+
+	dev->type		= ARPHRD_FDDI;
+	dev->hard_header_len	= FDDI_K_SNAP_HLEN+3;	/* Assume 802.2 SNAP hdr len + 3 pad bytes */
+	dev->mtu		= FDDI_K_SNAP_DLEN;	/* Assume max payload of 802.2 SNAP frame */
+	dev->addr_len		= FDDI_K_ALEN;
+	dev->tx_queue_len	= 100;			/* Long queues on FDDI */
+	dev->flags		= IFF_BROADCAST | IFF_MULTICAST;
+	
+	memset(dev->broadcast, 0xFF, FDDI_K_ALEN);
+}
+
+/**
+ * alloc_fddidev - Register FDDI device
+ * @sizeof_priv: Size of additional driver-private structure to be allocated
+ *	for this FDDI device
+ *
+ * Fill in the fields of the device structure with FDDI-generic values.
+ *
+ * Constructs a new net device, complete with a private data area of
+ * size @sizeof_priv.  A 32-byte (not bit) alignment is enforced for
+ * this private data area.
+ */
+struct net_device *alloc_fddidev(int sizeof_priv)
+{
+	return alloc_netdev(sizeof_priv, "fddi%d", fddi_setup);
+}
+EXPORT_SYMBOL(alloc_fddidev);
diff --git a/net/802/hippi.c b/net/802/hippi.c
new file mode 100644
index 00000000000..4eb135c0afb
--- /dev/null
+++ b/net/802/hippi.c
@@ -0,0 +1,234 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		HIPPI-type device handling.
+ *
+ * Version:	@(#)hippi.c	1.0.0	05/29/97
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Mark Evans, <evansmp@uhura.aston.ac.uk>
+ *		Florian  La Roche, <rzsfl@rz.uni-sb.de>
+ *		Alan Cox, <gw4pts@gw4pts.ampr.org>
+ *		Jes Sorensen, <Jes.Sorensen@cern.ch>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/hippidevice.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <net/arp.h>
+#include <net/sock.h>
+#include <asm/uaccess.h>
+#include <asm/checksum.h>
+#include <asm/system.h>
+
+/*
+ * Create the HIPPI MAC header for an arbitrary protocol layer 
+ *
+ * saddr=NULL	means use device source address
+ * daddr=NULL	means leave destination address (eg unresolved arp)
+ */
+
+static int hippi_header(struct sk_buff *skb, struct net_device *dev,
+			unsigned short type, void *daddr, void *saddr,
+			unsigned len)
+{
+	struct hippi_hdr *hip = (struct hippi_hdr *)skb_push(skb, HIPPI_HLEN);
+
+	if (!len){
+		len = skb->len - HIPPI_HLEN;
+		printk("hippi_header(): length not supplied\n");
+	}
+
+	/*
+	 * Due to the stupidity of the little endian byte-order we
+	 * have to set the fp field this way.
+	 */
+	hip->fp.fixed		= __constant_htonl(0x04800018);
+	hip->fp.d2_size		= htonl(len + 8);
+	hip->le.fc		= 0;
+	hip->le.double_wide	= 0;	/* only HIPPI 800 for the time being */
+	hip->le.message_type	= 0;	/* Data PDU */
+
+	hip->le.dest_addr_type	= 2;	/* 12 bit SC address */
+	hip->le.src_addr_type	= 2;	/* 12 bit SC address */
+
+	memcpy(hip->le.src_switch_addr, dev->dev_addr + 3, 3);
+	memset(&hip->le.reserved, 0, 16);
+
+	hip->snap.dsap		= HIPPI_EXTENDED_SAP;
+	hip->snap.ssap		= HIPPI_EXTENDED_SAP;
+	hip->snap.ctrl		= HIPPI_UI_CMD;
+	hip->snap.oui[0]	= 0x00;
+	hip->snap.oui[1]	= 0x00;
+	hip->snap.oui[2]	= 0x00;
+	hip->snap.ethertype	= htons(type);
+
+	if (daddr)
+	{
+		memcpy(hip->le.dest_switch_addr, daddr + 3, 3);
+		memcpy(&skb->private.ifield, daddr + 2, 4);
+		return HIPPI_HLEN;
+	}
+	return -((int)HIPPI_HLEN);
+}
+
+
+/*
+ * Rebuild the HIPPI MAC header. This is called after an ARP has
+ * completed on this sk_buff. We now let ARP fill in the other fields.
+ */
+
+static int hippi_rebuild_header(struct sk_buff *skb)
+{
+	struct hippi_hdr *hip = (struct hippi_hdr *)skb->data;
+
+	/*
+	 * Only IP is currently supported
+	 */
+	 
+	if(hip->snap.ethertype != __constant_htons(ETH_P_IP)) 
+	{
+		printk(KERN_DEBUG "%s: unable to resolve type %X addresses.\n",skb->dev->name,ntohs(hip->snap.ethertype));
+		return 0;
+	}
+
+	/*
+	 * We don't support dynamic ARP on HIPPI, but we use the ARP
+	 * static ARP tables to hold the I-FIELDs.
+	 */
+	return arp_find(hip->le.daddr, skb);
+}
+
+
+/*
+ *	Determine the packet's protocol ID.
+ */
+ 
+unsigned short hippi_type_trans(struct sk_buff *skb, struct net_device *dev)
+{
+	struct hippi_hdr *hip;
+	
+	hip = (struct hippi_hdr *) skb->data;
+
+	/*
+	 * This is actually wrong ... question is if we really should
+	 * set the raw address here.
+	 */
+	 skb->mac.raw = skb->data;
+	 skb_pull(skb, HIPPI_HLEN);
+
+	/*
+	 * No fancy promisc stuff here now.
+	 */
+
+	return hip->snap.ethertype;
+}
+
+EXPORT_SYMBOL(hippi_type_trans);
+
+static int hippi_change_mtu(struct net_device *dev, int new_mtu)
+{
+	/*
+	 * HIPPI's got these nice large MTUs.
+	 */
+	if ((new_mtu < 68) || (new_mtu > 65280))
+		return -EINVAL;
+	dev->mtu = new_mtu;
+	return(0);
+}
+
+/*
+ * For HIPPI we will actually use the lower 4 bytes of the hardware
+ * address as the I-FIELD rather than the actual hardware address.
+ */
+static int hippi_mac_addr(struct net_device *dev, void *p)
+{
+	struct sockaddr *addr = p;
+	if (netif_running(dev))
+		return -EBUSY;
+	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+	return 0;
+}
+
+static int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p)
+{
+	/* Never send broadcast/multicast ARP messages */
+	p->mcast_probes = 0;
+ 
+	/* In IPv6 unicast probes are valid even on NBMA,
+	* because they are encapsulated in normal IPv6 protocol.
+	* Should be a generic flag. 
+	*/
+	if (p->tbl->family != AF_INET6)
+		p->ucast_probes = 0;
+	return 0;
+}
+
+static void hippi_setup(struct net_device *dev)
+{
+	dev->set_multicast_list		= NULL;
+	dev->change_mtu			= hippi_change_mtu;
+	dev->hard_header		= hippi_header;
+	dev->rebuild_header 		= hippi_rebuild_header;
+	dev->set_mac_address 		= hippi_mac_addr;
+	dev->hard_header_parse		= NULL;
+	dev->hard_header_cache		= NULL;
+	dev->header_cache_update	= NULL;
+	dev->neigh_setup 		= hippi_neigh_setup_dev; 
+
+	/*
+	 * We don't support HIPPI `ARP' for the time being, and probably
+	 * never will unless someone else implements it. However we
+	 * still need a fake ARPHRD to make ifconfig and friends play ball.
+	 */
+	dev->type		= ARPHRD_HIPPI;
+	dev->hard_header_len 	= HIPPI_HLEN;
+	dev->mtu		= 65280;
+	dev->addr_len		= HIPPI_ALEN;
+	dev->tx_queue_len	= 25 /* 5 */;
+	memset(dev->broadcast, 0xFF, HIPPI_ALEN);
+
+
+	/*
+	 * HIPPI doesn't support broadcast+multicast and we only use
+	 * static ARP tables. ARP is disabled by hippi_neigh_setup_dev. 
+	 */
+	dev->flags = 0; 
+}
+
+/**
+ * alloc_hippi_dev - Register HIPPI device
+ * @sizeof_priv: Size of additional driver-private structure to be allocated
+ *	for this HIPPI device
+ *
+ * Fill in the fields of the device structure with HIPPI-generic values.
+ *
+ * Constructs a new net device, complete with a private data area of
+ * size @sizeof_priv.  A 32-byte (not bit) alignment is enforced for
+ * this private data area.
+ */
+
+struct net_device *alloc_hippi_dev(int sizeof_priv)
+{
+	return alloc_netdev(sizeof_priv, "hip%d", hippi_setup);
+}
+
+EXPORT_SYMBOL(alloc_hippi_dev);
diff --git a/net/802/p8022.c b/net/802/p8022.c
new file mode 100644
index 00000000000..5ae63416df6
--- /dev/null
+++ b/net/802/p8022.c
@@ -0,0 +1,65 @@
+/*
+ *	NET3:	Support for 802.2 demultiplexing off Ethernet (Token ring
+ *		is kept separate see p8022tr.c)
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *		Demultiplex 802.2 encoded protocols. We match the entry by the
+ *		SSAP/DSAP pair and then deliver to the registered datalink that
+ *		matches. The control byte is ignored and handling of such items
+ *		is up to the routine passed the frame.
+ *
+ *		Unlike the 802.3 datalink we have a list of 802.2 entries as
+ *		there are multiple protocols to demux. The list is currently
+ *		short (3 or 4 entries at most). The current demux assumes this.
+ */
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/datalink.h>
+#include <linux/mm.h>
+#include <linux/in.h>
+#include <linux/init.h>
+#include <net/llc.h>
+#include <net/p8022.h>
+
+static int p8022_request(struct datalink_proto *dl, struct sk_buff *skb,
+			 unsigned char *dest)
+{
+	llc_build_and_send_ui_pkt(dl->sap, skb, dest, dl->sap->laddr.lsap);
+	return 0;
+}
+
+struct datalink_proto *register_8022_client(unsigned char type,
+					    int (*func)(struct sk_buff *skb,
+							struct net_device *dev,
+							struct packet_type *pt))
+{
+	struct datalink_proto *proto;
+
+	proto = kmalloc(sizeof(*proto), GFP_ATOMIC);
+	if (proto) {
+		proto->type[0]		= type;
+		proto->header_length	= 3;
+		proto->request		= p8022_request;
+		proto->sap = llc_sap_open(type, func);
+		if (!proto->sap) {
+			kfree(proto);
+			proto = NULL;
+		}
+	}
+	return proto;
+}
+
+void unregister_8022_client(struct datalink_proto *proto)
+{
+	llc_sap_close(proto->sap);
+	kfree(proto);
+}
+
+EXPORT_SYMBOL(register_8022_client);
+EXPORT_SYMBOL(unregister_8022_client);
+
+MODULE_LICENSE("GPL");
diff --git a/net/802/p8023.c b/net/802/p8023.c
new file mode 100644
index 00000000000..a0b61b40225
--- /dev/null
+++ b/net/802/p8023.c
@@ -0,0 +1,61 @@
+/*
+ *	NET3:	802.3 data link hooks used for IPX 802.3
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ *	802.3 isn't really a protocol data link layer. Some old IPX stuff
+ *	uses it however. Note that there is only one 802.3 protocol layer
+ *	in the system. We don't currently support different protocols
+ *	running raw 802.3 on different devices. Thankfully nobody else
+ *	has done anything like the old IPX.
+ */
+
+#include <linux/in.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+
+#include <net/datalink.h>
+
+/*
+ *	Place an 802.3 header on a packet. The driver will do the mac
+ *	addresses, we just need to give it the buffer length.
+ */
+static int p8023_request(struct datalink_proto *dl,
+			 struct sk_buff *skb, unsigned char *dest_node)
+{
+	struct net_device *dev = skb->dev;
+
+	dev->hard_header(skb, dev, ETH_P_802_3, dest_node, NULL, skb->len);
+	return dev_queue_xmit(skb);
+}
+
+/*
+ *	Create an 802.3 client. Note there can be only one 802.3 client
+ */
+struct datalink_proto *make_8023_client(void)
+{
+	struct datalink_proto *proto = kmalloc(sizeof(*proto), GFP_ATOMIC);
+
+	if (proto) {
+		proto->header_length = 0;
+		proto->request	     = p8023_request;
+	}
+	return proto;
+}
+
+/*
+ *	Destroy the 802.3 client.
+ */
+void destroy_8023_client(struct datalink_proto *dl)
+{
+	if (dl)
+		kfree(dl);
+}
+
+EXPORT_SYMBOL(destroy_8023_client);
+EXPORT_SYMBOL(make_8023_client);
diff --git a/net/802/psnap.c b/net/802/psnap.c
new file mode 100644
index 00000000000..1053821ddf9
--- /dev/null
+++ b/net/802/psnap.c
@@ -0,0 +1,159 @@
+/*
+ *	SNAP data link layer. Derived from 802.2
+ *
+ *		Alan Cox <Alan.Cox@linux.org>,
+ *		from the 802.2 layer by Greg Page.
+ *		Merged in additions from Greg Page's psnap.c.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/datalink.h>
+#include <net/llc.h>
+#include <net/psnap.h>
+#include <linux/mm.h>
+#include <linux/in.h>
+#include <linux/init.h>
+
+static LIST_HEAD(snap_list);
+static DEFINE_SPINLOCK(snap_lock);
+static struct llc_sap *snap_sap;
+
+/*
+ *	Find a snap client by matching the 5 bytes.
+ */
+static struct datalink_proto *find_snap_client(unsigned char *desc)
+{
+	struct list_head *entry;
+	struct datalink_proto *proto = NULL, *p;
+
+	list_for_each_rcu(entry, &snap_list) {
+		p = list_entry(entry, struct datalink_proto, node);
+		if (!memcmp(p->type, desc, 5)) {
+			proto = p;
+			break;
+		}
+	}
+	return proto;
+}
+
+/*
+ *	A SNAP packet has arrived
+ */
+static int snap_rcv(struct sk_buff *skb, struct net_device *dev,
+		    struct packet_type *pt)
+{
+	int rc = 1;
+	struct datalink_proto *proto;
+	static struct packet_type snap_packet_type = {
+		.type = __constant_htons(ETH_P_SNAP),
+	};
+
+	rcu_read_lock();
+	proto = find_snap_client(skb->h.raw);
+	if (proto) {
+		/* Pass the frame on. */
+		skb->h.raw  += 5;
+		skb_pull(skb, 5);
+		rc = proto->rcvfunc(skb, dev, &snap_packet_type);
+	} else {
+		skb->sk = NULL;
+		kfree_skb(skb);
+		rc = 1;
+	}
+
+	rcu_read_unlock();
+	return rc;
+}
+
+/*
+ *	Put a SNAP header on a frame and pass to 802.2
+ */
+static int snap_request(struct datalink_proto *dl,
+			struct sk_buff *skb, u8 *dest)
+{
+	memcpy(skb_push(skb, 5), dl->type, 5);
+	llc_build_and_send_ui_pkt(snap_sap, skb, dest, snap_sap->laddr.lsap);
+	return 0;
+}
+
+/*
+ *	Set up the SNAP layer
+ */
+EXPORT_SYMBOL(register_snap_client);
+EXPORT_SYMBOL(unregister_snap_client);
+
+static char snap_err_msg[] __initdata =
+	KERN_CRIT "SNAP - unable to register with 802.2\n";
+
+static int __init snap_init(void)
+{
+	snap_sap = llc_sap_open(0xAA, snap_rcv);
+
+	if (!snap_sap)
+		printk(snap_err_msg);
+
+	return 0;
+}
+
+module_init(snap_init);
+
+static void __exit snap_exit(void)
+{
+	llc_sap_close(snap_sap);
+}
+
+module_exit(snap_exit);
+
+
+/*
+ *	Register SNAP clients. We don't yet use this for IP.
+ */
+struct datalink_proto *register_snap_client(unsigned char *desc,
+					    int (*rcvfunc)(struct sk_buff *,
+						    	   struct net_device *,
+							   struct packet_type *))
+{
+	struct datalink_proto *proto = NULL;
+
+	spin_lock_bh(&snap_lock);
+
+	if (find_snap_client(desc))
+		goto out;
+
+	proto = kmalloc(sizeof(*proto), GFP_ATOMIC);
+	if (proto) {
+		memcpy(proto->type, desc,5);
+		proto->rcvfunc		= rcvfunc;
+		proto->header_length	= 5 + 3; /* snap + 802.2 */
+		proto->request		= snap_request;
+		list_add_rcu(&proto->node, &snap_list);
+	}
+out:
+	spin_unlock_bh(&snap_lock);
+
+	synchronize_net();
+	return proto;
+}
+
+/*
+ *	Unregister SNAP clients. Protocols no longer want to play with us ...
+ */
+void unregister_snap_client(struct datalink_proto *proto)
+{
+	spin_lock_bh(&snap_lock);
+	list_del_rcu(&proto->node);
+	spin_unlock_bh(&snap_lock);
+
+	synchronize_net();
+
+	kfree(proto);
+}
+
+MODULE_LICENSE("GPL");
diff --git a/net/802/sysctl_net_802.c b/net/802/sysctl_net_802.c
new file mode 100644
index 00000000000..36079630c49
--- /dev/null
+++ b/net/802/sysctl_net_802.c
@@ -0,0 +1,33 @@
+/* -*- linux-c -*-
+ *		sysctl_net_802.c: sysctl interface to net 802 subsystem.
+ *
+ *		Begun April 1, 1996, Mike Shaver.
+ *		Added /proc/sys/net/802 directory entry (empty =) ). [MS]
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/mm.h>
+#include <linux/sysctl.h>
+#include <linux/config.h>
+
+#ifdef CONFIG_TR
+extern int sysctl_tr_rif_timeout;
+#endif
+
+struct ctl_table tr_table[] = {
+#ifdef CONFIG_TR
+	{
+		.ctl_name	= NET_TR_RIF_TIMEOUT,
+		.procname	= "rif_timeout",
+		.data		= &sysctl_tr_rif_timeout,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+#endif /* CONFIG_TR */
+	{ 0 },
+};
diff --git a/net/802/tr.c b/net/802/tr.c
new file mode 100644
index 00000000000..85293ccf7ef
--- /dev/null
+++ b/net/802/tr.c
@@ -0,0 +1,645 @@
+/*
+ * NET3:	Token ring device handling subroutines
+ * 
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Fixes:       3 Feb 97 Paul Norton <pnorton@cts.com> Minor routing fixes.
+ *              Added rif table to /proc/net/tr_rif and rif timeout to
+ *              /proc/sys/net/token-ring/rif_timeout.
+ *              22 Jun 98 Paul Norton <p.norton@computer.org> Rearranged
+ *              tr_header and tr_type_trans to handle passing IPX SNAP and
+ *              802.2 through the correct layers. Eliminated tr_reformat.
+ *        
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/trdevice.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/timer.h>
+#include <linux/net.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <net/arp.h>
+
+static void tr_add_rif_info(struct trh_hdr *trh, struct net_device *dev);
+static void rif_check_expire(unsigned long dummy);
+
+#define TR_SR_DEBUG 0
+
+/*
+ *	Each RIF entry we learn is kept this way
+ */
+ 
+struct rif_cache_s {	
+	unsigned char addr[TR_ALEN];
+	int iface;
+	__u16 rcf;
+	__u16 rseg[8];
+	struct rif_cache_s *next;
+	unsigned long last_used;
+	unsigned char local_ring;
+};
+
+#define RIF_TABLE_SIZE 32
+
+/*
+ *	We hash the RIF cache 32 ways. We do after all have to look it
+ *	up a lot.
+ */
+ 
+static struct rif_cache_s *rif_table[RIF_TABLE_SIZE];
+
+static DEFINE_SPINLOCK(rif_lock);
+
+
+/*
+ *	Garbage disposal timer.
+ */
+ 
+static struct timer_list rif_timer;
+
+int sysctl_tr_rif_timeout = 60*10*HZ;
+
+static inline unsigned long rif_hash(const unsigned char *addr)
+{
+	unsigned long x;
+
+	x = addr[0];
+	x = (x << 2) ^ addr[1];
+	x = (x << 2) ^ addr[2];
+	x = (x << 2) ^ addr[3];
+	x = (x << 2) ^ addr[4];
+	x = (x << 2) ^ addr[5];
+
+	x ^= x >> 8;
+
+	return x & (RIF_TABLE_SIZE - 1);
+}
+
+/*
+ *	Put the headers on a token ring packet. Token ring source routing
+ *	makes this a little more exciting than on ethernet.
+ */
+ 
+static int tr_header(struct sk_buff *skb, struct net_device *dev,
+		     unsigned short type,
+		     void *daddr, void *saddr, unsigned len) 
+{
+	struct trh_hdr *trh;
+	int hdr_len;
+
+	/* 
+	 * Add the 802.2 SNAP header if IP as the IPv4/IPv6 code calls  
+	 * dev->hard_header directly.
+	 */
+	if (type == ETH_P_IP || type == ETH_P_IPV6 || type == ETH_P_ARP)
+	{
+		struct trllc *trllc;
+
+		hdr_len = sizeof(struct trh_hdr) + sizeof(struct trllc);
+		trh = (struct trh_hdr *)skb_push(skb, hdr_len);
+		trllc = (struct trllc *)(trh+1);
+		trllc->dsap = trllc->ssap = EXTENDED_SAP;
+		trllc->llc = UI_CMD;
+		trllc->protid[0] = trllc->protid[1] = trllc->protid[2] = 0x00;
+		trllc->ethertype = htons(type);
+	}
+	else
+	{
+		hdr_len = sizeof(struct trh_hdr);
+		trh = (struct trh_hdr *)skb_push(skb, hdr_len);	
+	}
+
+	trh->ac=AC;
+	trh->fc=LLC_FRAME;
+
+	if(saddr)
+		memcpy(trh->saddr,saddr,dev->addr_len);
+	else
+		memcpy(trh->saddr,dev->dev_addr,dev->addr_len);
+
+	/*
+	 *	Build the destination and then source route the frame
+	 */
+	 
+	if(daddr) 
+	{
+		memcpy(trh->daddr,daddr,dev->addr_len);
+		tr_source_route(skb,trh,dev);
+		return(hdr_len);
+	}
+
+	return -hdr_len;
+}
+	
+/*
+ *	A neighbour discovery of some species (eg arp) has completed. We
+ *	can now send the packet.
+ */
+ 
+static int tr_rebuild_header(struct sk_buff *skb) 
+{
+	struct trh_hdr *trh=(struct trh_hdr *)skb->data;
+	struct trllc *trllc=(struct trllc *)(skb->data+sizeof(struct trh_hdr));
+	struct net_device *dev = skb->dev;
+
+	/*
+	 *	FIXME: We don't yet support IPv6 over token rings
+	 */
+	 
+	if(trllc->ethertype != htons(ETH_P_IP)) {
+		printk("tr_rebuild_header: Don't know how to resolve type %04X addresses ?\n",(unsigned int)htons(trllc->ethertype));
+		return 0;
+	}
+
+#ifdef CONFIG_INET
+	if(arp_find(trh->daddr, skb)) {
+			return 1;
+	}
+	else 
+#endif	
+	{	
+		tr_source_route(skb,trh,dev); 
+		return 0;
+	}
+}
+	
+/*
+ *	Some of this is a bit hackish. We intercept RIF information
+ *	used for source routing. We also grab IP directly and don't feed
+ *	it via SNAP.
+ */
+ 
+unsigned short tr_type_trans(struct sk_buff *skb, struct net_device *dev) 
+{
+
+	struct trh_hdr *trh=(struct trh_hdr *)skb->data;
+	struct trllc *trllc;
+	unsigned riflen=0;
+	
+	skb->mac.raw = skb->data;
+	
+       	if(trh->saddr[0] & TR_RII)
+		riflen = (ntohs(trh->rcf) & TR_RCF_LEN_MASK) >> 8;
+
+	trllc = (struct trllc *)(skb->data+sizeof(struct trh_hdr)-TR_MAXRIFLEN+riflen);
+
+	skb_pull(skb,sizeof(struct trh_hdr)-TR_MAXRIFLEN+riflen);
+
+	if(*trh->daddr & 0x80) 
+	{
+		if(!memcmp(trh->daddr,dev->broadcast,TR_ALEN)) 	
+			skb->pkt_type=PACKET_BROADCAST;
+		else
+			skb->pkt_type=PACKET_MULTICAST;
+	}
+	else if ( (trh->daddr[0] & 0x01) && (trh->daddr[1] & 0x00) && (trh->daddr[2] & 0x5E))
+	{
+		skb->pkt_type=PACKET_MULTICAST;
+	}
+	else if(dev->flags & IFF_PROMISC) 
+	{
+		if(memcmp(trh->daddr, dev->dev_addr, TR_ALEN))
+			skb->pkt_type=PACKET_OTHERHOST;
+	}
+
+	if ((skb->pkt_type != PACKET_BROADCAST) &&
+	    (skb->pkt_type != PACKET_MULTICAST))
+		tr_add_rif_info(trh,dev) ; 
+
+	/*
+	 * Strip the SNAP header from ARP packets since we don't 
+	 * pass them through to the 802.2/SNAP layers.
+	 */
+
+	if (trllc->dsap == EXTENDED_SAP &&
+	    (trllc->ethertype == ntohs(ETH_P_IP) ||
+	     trllc->ethertype == ntohs(ETH_P_IPV6) ||
+	     trllc->ethertype == ntohs(ETH_P_ARP)))
+	{
+		skb_pull(skb, sizeof(struct trllc));
+		return trllc->ethertype;
+	}
+
+	return ntohs(ETH_P_802_2);
+}
+
+/*
+ *	We try to do source routing... 
+ */
+
+void tr_source_route(struct sk_buff *skb,struct trh_hdr *trh,struct net_device *dev) 
+{
+	int slack;
+	unsigned int hash;
+	struct rif_cache_s *entry;
+	unsigned char *olddata;
+	static const unsigned char mcast_func_addr[] 
+		= {0xC0,0x00,0x00,0x04,0x00,0x00};
+	
+	spin_lock_bh(&rif_lock);
+
+	/*
+	 *	Broadcasts are single route as stated in RFC 1042 
+	 */
+	if( (!memcmp(&(trh->daddr[0]),&(dev->broadcast[0]),TR_ALEN)) ||
+	    (!memcmp(&(trh->daddr[0]),&(mcast_func_addr[0]), TR_ALEN))  )
+	{
+		trh->rcf=htons((((sizeof(trh->rcf)) << 8) & TR_RCF_LEN_MASK)  
+			       | TR_RCF_FRAME2K | TR_RCF_LIMITED_BROADCAST);
+		trh->saddr[0]|=TR_RII;
+	}
+	else 
+	{
+		hash = rif_hash(trh->daddr);
+		/*
+		 *	Walk the hash table and look for an entry
+		 */
+		for(entry=rif_table[hash];entry && memcmp(&(entry->addr[0]),&(trh->daddr[0]),TR_ALEN);entry=entry->next);
+
+		/*
+		 *	If we found an entry we can route the frame.
+		 */
+		if(entry) 
+		{
+#if TR_SR_DEBUG
+printk("source routing for %02X:%02X:%02X:%02X:%02X:%02X\n",trh->daddr[0],
+		  trh->daddr[1],trh->daddr[2],trh->daddr[3],trh->daddr[4],trh->daddr[5]);
+#endif
+			if(!entry->local_ring && (ntohs(entry->rcf) & TR_RCF_LEN_MASK) >> 8)
+			{
+				trh->rcf=entry->rcf;
+				memcpy(&trh->rseg[0],&entry->rseg[0],8*sizeof(unsigned short));
+				trh->rcf^=htons(TR_RCF_DIR_BIT);	
+				trh->rcf&=htons(0x1fff);	/* Issam Chehab <ichehab@madge1.demon.co.uk> */
+
+				trh->saddr[0]|=TR_RII;
+#if TR_SR_DEBUG
+				printk("entry found with rcf %04x\n", entry->rcf);
+			}
+			else
+			{
+				printk("entry found but without rcf length, local=%02x\n", entry->local_ring);
+#endif
+			}
+			entry->last_used=jiffies;
+		}
+		else 
+		{
+			/*
+			 *	Without the information we simply have to shout
+			 *	on the wire. The replies should rapidly clean this
+			 *	situation up.
+			 */
+			trh->rcf=htons((((sizeof(trh->rcf)) << 8) & TR_RCF_LEN_MASK)  
+				       | TR_RCF_FRAME2K | TR_RCF_LIMITED_BROADCAST);
+			trh->saddr[0]|=TR_RII;
+#if TR_SR_DEBUG
+			printk("no entry in rif table found - broadcasting frame\n");
+#endif
+		}
+	}
+
+	/* Compress the RIF here so we don't have to do it in the driver(s) */
+	if (!(trh->saddr[0] & 0x80))
+		slack = 18;
+	else 
+		slack = 18 - ((ntohs(trh->rcf) & TR_RCF_LEN_MASK)>>8);
+	olddata = skb->data;
+	spin_unlock_bh(&rif_lock);
+
+	skb_pull(skb, slack);
+	memmove(skb->data, olddata, sizeof(struct trh_hdr) - slack);
+}
+
+/*
+ *	We have learned some new RIF information for our source
+ *	routing.
+ */
+ 
+static void tr_add_rif_info(struct trh_hdr *trh, struct net_device *dev)
+{
+	unsigned int hash, rii_p = 0;
+	struct rif_cache_s *entry;
+
+
+	spin_lock_bh(&rif_lock);
+	
+	/*
+	 *	Firstly see if the entry exists
+	 */
+
+       	if(trh->saddr[0] & TR_RII)
+	{
+		trh->saddr[0]&=0x7f;
+		if (((ntohs(trh->rcf) & TR_RCF_LEN_MASK) >> 8) > 2)
+		{
+			rii_p = 1;
+	        }
+	}
+
+	hash = rif_hash(trh->saddr);
+	for(entry=rif_table[hash];entry && memcmp(&(entry->addr[0]),&(trh->saddr[0]),TR_ALEN);entry=entry->next);
+
+	if(entry==NULL) 
+	{
+#if TR_SR_DEBUG
+printk("adding rif_entry: addr:%02X:%02X:%02X:%02X:%02X:%02X rcf:%04X\n",
+		trh->saddr[0],trh->saddr[1],trh->saddr[2],
+       		trh->saddr[3],trh->saddr[4],trh->saddr[5],
+		ntohs(trh->rcf));
+#endif
+		/*
+		 *	Allocate our new entry. A failure to allocate loses
+		 *	use the information. This is harmless.
+		 *
+		 *	FIXME: We ought to keep some kind of cache size
+		 *	limiting and adjust the timers to suit.
+		 */
+		entry=kmalloc(sizeof(struct rif_cache_s),GFP_ATOMIC);
+
+		if(!entry) 
+		{
+			printk(KERN_DEBUG "tr.c: Couldn't malloc rif cache entry !\n");
+			spin_unlock_bh(&rif_lock);
+			return;
+		}
+
+		memcpy(&(entry->addr[0]),&(trh->saddr[0]),TR_ALEN);
+		entry->iface = dev->ifindex;
+		entry->next=rif_table[hash];
+		entry->last_used=jiffies;
+		rif_table[hash]=entry;
+
+		if (rii_p)
+		{
+			entry->rcf = trh->rcf & htons((unsigned short)~TR_RCF_BROADCAST_MASK);
+			memcpy(&(entry->rseg[0]),&(trh->rseg[0]),8*sizeof(unsigned short));
+			entry->local_ring = 0;
+			trh->saddr[0]|=TR_RII; /* put the routing indicator back for tcpdump */
+		}
+		else
+		{
+			entry->local_ring = 1;
+		}
+	} 	
+	else	/* Y. Tahara added */
+	{ 
+		/*
+		 *	Update existing entries
+		 */
+		if (!entry->local_ring) 
+		    if (entry->rcf != (trh->rcf & htons((unsigned short)~TR_RCF_BROADCAST_MASK)) &&
+			 !(trh->rcf & htons(TR_RCF_BROADCAST_MASK)))
+		    {
+#if TR_SR_DEBUG
+printk("updating rif_entry: addr:%02X:%02X:%02X:%02X:%02X:%02X rcf:%04X\n",
+		trh->saddr[0],trh->saddr[1],trh->saddr[2],
+		trh->saddr[3],trh->saddr[4],trh->saddr[5],
+		ntohs(trh->rcf));
+#endif
+			    entry->rcf = trh->rcf & htons((unsigned short)~TR_RCF_BROADCAST_MASK);
+        		    memcpy(&(entry->rseg[0]),&(trh->rseg[0]),8*sizeof(unsigned short));
+		    }                                         
+           	entry->last_used=jiffies;               
+	}
+	spin_unlock_bh(&rif_lock);
+}
+
+/*
+ *	Scan the cache with a timer and see what we need to throw out.
+ */
+
+static void rif_check_expire(unsigned long dummy) 
+{
+	int i;
+	unsigned long next_interval = jiffies + sysctl_tr_rif_timeout/2;
+
+	spin_lock_bh(&rif_lock);
+	
+	for(i =0; i < RIF_TABLE_SIZE; i++) {
+		struct rif_cache_s *entry, **pentry;
+		
+		pentry = rif_table+i;
+		while((entry=*pentry) != NULL) {
+			unsigned long expires
+				= entry->last_used + sysctl_tr_rif_timeout;
+
+			if (time_before_eq(expires, jiffies)) {
+				*pentry = entry->next;
+				kfree(entry);
+			} else {
+				pentry = &entry->next;
+
+				if (time_before(expires, next_interval))
+					next_interval = expires;
+			}
+		}
+	}
+	
+	spin_unlock_bh(&rif_lock);
+
+	mod_timer(&rif_timer, next_interval);
+
+}
+
+/*
+ *	Generate the /proc/net information for the token ring RIF
+ *	routing.
+ */
+ 
+#ifdef CONFIG_PROC_FS
+
+static struct rif_cache_s *rif_get_idx(loff_t pos)
+{
+	int i;
+	struct rif_cache_s *entry;
+	loff_t off = 0;
+
+	for(i = 0; i < RIF_TABLE_SIZE; i++) 
+		for(entry = rif_table[i]; entry; entry = entry->next) {
+			if (off == pos)
+				return entry;
+			++off;
+		}
+
+	return NULL;
+}
+
+static void *rif_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	spin_lock_bh(&rif_lock);
+
+	return *pos ? rif_get_idx(*pos - 1) : SEQ_START_TOKEN;
+}
+
+static void *rif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	int i;
+	struct rif_cache_s *ent = v;
+
+	++*pos;
+
+	if (v == SEQ_START_TOKEN) {
+		i = -1;
+		goto scan;
+	}
+
+	if (ent->next) 
+		return ent->next;
+
+	i = rif_hash(ent->addr);
+ scan:
+	while (++i < RIF_TABLE_SIZE) {
+		if ((ent = rif_table[i]) != NULL)
+			return ent;
+	}
+	return NULL;
+}
+
+static void rif_seq_stop(struct seq_file *seq, void *v)
+{
+	spin_unlock_bh(&rif_lock);
+}
+
+static int rif_seq_show(struct seq_file *seq, void *v)
+{
+	int j, rcf_len, segment, brdgnmb;
+	struct rif_cache_s *entry = v;
+
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq,
+		     "if     TR address       TTL   rcf   routing segments\n");
+	else {
+		struct net_device *dev = dev_get_by_index(entry->iface);
+		long ttl = (long) (entry->last_used + sysctl_tr_rif_timeout)
+				- (long) jiffies;
+
+		seq_printf(seq, "%s %02X:%02X:%02X:%02X:%02X:%02X %7li ",
+			   dev?dev->name:"?",
+			   entry->addr[0],entry->addr[1],entry->addr[2],
+			   entry->addr[3],entry->addr[4],entry->addr[5],
+			   ttl/HZ);
+
+			if (entry->local_ring)
+			        seq_puts(seq, "local\n");
+			else {
+
+				seq_printf(seq, "%04X", ntohs(entry->rcf));
+				rcf_len = ((ntohs(entry->rcf) & TR_RCF_LEN_MASK)>>8)-2; 
+				if (rcf_len)
+				        rcf_len >>= 1;
+				for(j = 1; j < rcf_len; j++) {
+					if(j==1) {
+						segment=ntohs(entry->rseg[j-1])>>4;
+						seq_printf(seq,"  %03X",segment);
+					};
+					segment=ntohs(entry->rseg[j])>>4;
+					brdgnmb=ntohs(entry->rseg[j-1])&0x00f;
+					seq_printf(seq,"-%01X-%03X",brdgnmb,segment);
+				}
+				seq_putc(seq, '\n');
+			}
+	   	}
+	return 0;
+}
+
+
+static struct seq_operations rif_seq_ops = {
+	.start = rif_seq_start,
+	.next  = rif_seq_next,
+	.stop  = rif_seq_stop,
+	.show  = rif_seq_show,
+};
+
+static int rif_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &rif_seq_ops);
+}
+
+static struct file_operations rif_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = rif_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+};
+
+#endif
+
+static void tr_setup(struct net_device *dev)
+{
+	/*
+	 *	Configure and register
+	 */
+	
+	dev->hard_header	= tr_header;
+	dev->rebuild_header	= tr_rebuild_header;
+
+	dev->type		= ARPHRD_IEEE802_TR;
+	dev->hard_header_len	= TR_HLEN;
+	dev->mtu		= 2000;
+	dev->addr_len		= TR_ALEN;
+	dev->tx_queue_len	= 100;	/* Long queues on tr */
+	
+	memset(dev->broadcast,0xFF, TR_ALEN);
+
+	/* New-style flags. */
+	dev->flags		= IFF_BROADCAST | IFF_MULTICAST ;
+}
+
+/**
+ * alloc_trdev - Register token ring device
+ * @sizeof_priv: Size of additional driver-private structure to be allocated
+ *	for this token ring device
+ *
+ * Fill in the fields of the device structure with token ring-generic values.
+ *
+ * Constructs a new net device, complete with a private data area of
+ * size @sizeof_priv.  A 32-byte (not bit) alignment is enforced for
+ * this private data area.
+ */
+struct net_device *alloc_trdev(int sizeof_priv)
+{
+	return alloc_netdev(sizeof_priv, "tr%d", tr_setup);
+}
+
+/*
+ *	Called during bootup.  We don't actually have to initialise
+ *	too much for this.
+ */
+
+static int __init rif_init(void)
+{
+	init_timer(&rif_timer);
+	rif_timer.expires  = sysctl_tr_rif_timeout;
+	rif_timer.data     = 0L;
+	rif_timer.function = rif_check_expire;
+	add_timer(&rif_timer);
+
+	proc_net_fops_create("tr_rif", S_IRUGO, &rif_seq_fops);
+	return 0;
+}
+
+module_init(rif_init);
+
+EXPORT_SYMBOL(tr_source_route);
+EXPORT_SYMBOL(tr_type_trans);
+EXPORT_SYMBOL(alloc_trdev);
diff --git a/net/8021q/Makefile b/net/8021q/Makefile
new file mode 100644
index 00000000000..97feb44dbdc
--- /dev/null
+++ b/net/8021q/Makefile
@@ -0,0 +1,12 @@
+#
+# Makefile for the Linux VLAN layer.
+#
+
+obj-$(CONFIG_VLAN_8021Q) += 8021q.o
+
+8021q-objs := vlan.o vlan_dev.o
+
+ifeq ($(CONFIG_PROC_FS),y)
+8021q-objs += vlanproc.o
+endif
+
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
new file mode 100644
index 00000000000..1f6d31670bc
--- /dev/null
+++ b/net/8021q/vlan.c
@@ -0,0 +1,774 @@
+/*
+ * INET		802.1Q VLAN
+ *		Ethernet-type device handling.
+ *
+ * Authors:	Ben Greear <greearb@candelatech.com>
+ *              Please send support related email to: vlan@scry.wanfear.com
+ *              VLAN Home Page: http://www.candelatech.com/~greear/vlan.html
+ * 
+ * Fixes:
+ *              Fix for packet capture - Nick Eggleston <nick@dccinc.com>;
+ *		Add HW acceleration hooks - David S. Miller <davem@redhat.com>;
+ *		Correct all the locking - David S. Miller <davem@redhat.com>;
+ *		Use hash table for VLAN groups - David S. Miller <davem@redhat.com>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/uaccess.h> /* for copy_from_user */
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/datalink.h>
+#include <linux/mm.h>
+#include <linux/in.h>
+#include <linux/init.h>
+#include <net/p8022.h>
+#include <net/arp.h>
+#include <linux/rtnetlink.h>
+#include <linux/notifier.h>
+
+#include <linux/if_vlan.h>
+#include "vlan.h"
+#include "vlanproc.h"
+
+#define DRV_VERSION "1.8"
+
+/* Global VLAN variables */
+
+/* Our listing of VLAN group(s) */
+static struct hlist_head vlan_group_hash[VLAN_GRP_HASH_SIZE];
+#define vlan_grp_hashfn(IDX)	((((IDX) >> VLAN_GRP_HASH_SHIFT) ^ (IDX)) & VLAN_GRP_HASH_MASK)
+
+static char vlan_fullname[] = "802.1Q VLAN Support";
+static char vlan_version[] = DRV_VERSION;
+static char vlan_copyright[] = "Ben Greear <greearb@candelatech.com>";
+static char vlan_buggyright[] = "David S. Miller <davem@redhat.com>";
+
+static int vlan_device_event(struct notifier_block *, unsigned long, void *);
+static int vlan_ioctl_handler(void __user *);
+static int unregister_vlan_dev(struct net_device *, unsigned short );
+
+static struct notifier_block vlan_notifier_block = {
+	.notifier_call = vlan_device_event,
+};
+
+/* These may be changed at run-time through IOCTLs */
+
+/* Determines interface naming scheme. */
+unsigned short vlan_name_type = VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD;
+
+static struct packet_type vlan_packet_type = {
+	.type = __constant_htons(ETH_P_8021Q),
+	.func = vlan_skb_recv, /* VLAN receive method */
+};
+
+/* Bits of netdev state that are propagated from real device to virtual */
+#define VLAN_LINK_STATE_MASK \
+	((1<<__LINK_STATE_PRESENT)|(1<<__LINK_STATE_NOCARRIER))
+
+/* End of global variables definitions. */
+
+/*
+ * Function vlan_proto_init (pro)
+ *
+ *    Initialize VLAN protocol layer, 
+ *
+ */
+static int __init vlan_proto_init(void)
+{
+	int err;
+
+	printk(VLAN_INF "%s v%s %s\n",
+	       vlan_fullname, vlan_version, vlan_copyright);
+	printk(VLAN_INF "All bugs added by %s\n",
+	       vlan_buggyright);
+
+	/* proc file system initialization */
+	err = vlan_proc_init();
+	if (err < 0) {
+		printk(KERN_ERR 
+		       "%s %s: can't create entry in proc filesystem!\n",
+		       __FUNCTION__, VLAN_NAME);
+		return err;
+	}
+
+	dev_add_pack(&vlan_packet_type);
+
+	/* Register us to receive netdevice events */
+	err = register_netdevice_notifier(&vlan_notifier_block);
+	if (err < 0) {
+		dev_remove_pack(&vlan_packet_type);
+		vlan_proc_cleanup();
+		return err;
+	}
+
+	vlan_ioctl_set(vlan_ioctl_handler);
+
+	return 0;
+}
+
+/* Cleanup all vlan devices 
+ * Note: devices that have been registered that but not
+ * brought up will exist but have no module ref count.
+ */
+static void __exit vlan_cleanup_devices(void)
+{
+	struct net_device *dev, *nxt;
+
+	rtnl_lock();
+	for (dev = dev_base; dev; dev = nxt) {
+		nxt = dev->next;
+		if (dev->priv_flags & IFF_802_1Q_VLAN) {
+			unregister_vlan_dev(VLAN_DEV_INFO(dev)->real_dev,
+					    VLAN_DEV_INFO(dev)->vlan_id);
+
+			unregister_netdevice(dev);
+		}
+	}
+	rtnl_unlock();
+}
+
+/*
+ *     Module 'remove' entry point.
+ *     o delete /proc/net/router directory and static entries.
+ */ 
+static void __exit vlan_cleanup_module(void)
+{
+	int i;
+
+	vlan_ioctl_set(NULL);
+
+	/* Un-register us from receiving netdevice events */
+	unregister_netdevice_notifier(&vlan_notifier_block);
+
+	dev_remove_pack(&vlan_packet_type);
+	vlan_cleanup_devices();
+
+	/* This table must be empty if there are no module
+	 * references left.
+	 */
+	for (i = 0; i < VLAN_GRP_HASH_SIZE; i++) {
+		BUG_ON(!hlist_empty(&vlan_group_hash[i]));
+	}
+	vlan_proc_cleanup();
+
+	synchronize_net();
+}
+
+module_init(vlan_proto_init);
+module_exit(vlan_cleanup_module);
+
+/* Must be invoked with RCU read lock (no preempt) */
+static struct vlan_group *__vlan_find_group(int real_dev_ifindex)
+{
+	struct vlan_group *grp;
+	struct hlist_node *n;
+	int hash = vlan_grp_hashfn(real_dev_ifindex);
+
+	hlist_for_each_entry_rcu(grp, n, &vlan_group_hash[hash], hlist) {
+		if (grp->real_dev_ifindex == real_dev_ifindex)
+			return grp;
+	}
+
+	return NULL;
+}
+
+/*  Find the protocol handler.  Assumes VID < VLAN_VID_MASK.
+ *
+ * Must be invoked with RCU read lock (no preempt)
+ */
+struct net_device *__find_vlan_dev(struct net_device *real_dev,
+				   unsigned short VID)
+{
+	struct vlan_group *grp = __vlan_find_group(real_dev->ifindex);
+
+	if (grp)
+                return grp->vlan_devices[VID];
+
+	return NULL;
+}
+
+static void vlan_rcu_free(struct rcu_head *rcu)
+{
+	kfree(container_of(rcu, struct vlan_group, rcu));
+}
+
+
+/* This returns 0 if everything went fine.
+ * It will return 1 if the group was killed as a result.
+ * A negative return indicates failure.
+ *
+ * The RTNL lock must be held.
+ */
+static int unregister_vlan_dev(struct net_device *real_dev,
+			       unsigned short vlan_id)
+{
+	struct net_device *dev = NULL;
+	int real_dev_ifindex = real_dev->ifindex;
+	struct vlan_group *grp;
+	int i, ret;
+
+#ifdef VLAN_DEBUG
+	printk(VLAN_DBG "%s: VID: %i\n", __FUNCTION__, vlan_id);
+#endif
+
+	/* sanity check */
+	if (vlan_id >= VLAN_VID_MASK)
+		return -EINVAL;
+
+	ASSERT_RTNL();
+	grp = __vlan_find_group(real_dev_ifindex);
+
+	ret = 0;
+
+	if (grp) {
+		dev = grp->vlan_devices[vlan_id];
+		if (dev) {
+			/* Remove proc entry */
+			vlan_proc_rem_dev(dev);
+
+			/* Take it out of our own structures, but be sure to
+			 * interlock with HW accelerating devices or SW vlan
+			 * input packet processing.
+			 */
+			if (real_dev->features &
+			    (NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_FILTER)) {
+				real_dev->vlan_rx_kill_vid(real_dev, vlan_id);
+			}
+
+			grp->vlan_devices[vlan_id] = NULL;
+			synchronize_net();
+
+
+			/* Caller unregisters (and if necessary, puts)
+			 * VLAN device, but we get rid of the reference to
+			 * real_dev here.
+			 */
+			dev_put(real_dev);
+
+			/* If the group is now empty, kill off the
+			 * group.
+			 */
+			for (i = 0; i < VLAN_VID_MASK; i++)
+				if (grp->vlan_devices[i])
+					break;
+
+			if (i == VLAN_VID_MASK) {
+				if (real_dev->features & NETIF_F_HW_VLAN_RX)
+					real_dev->vlan_rx_register(real_dev, NULL);
+
+				hlist_del_rcu(&grp->hlist);
+
+				/* Free the group, after all cpu's are done. */
+				call_rcu(&grp->rcu, vlan_rcu_free);
+
+				grp = NULL;
+				ret = 1;
+			}
+		}
+	}
+
+        return ret;
+}
+
+static int unregister_vlan_device(const char *vlan_IF_name)
+{
+	struct net_device *dev = NULL;
+	int ret;
+
+
+	dev = dev_get_by_name(vlan_IF_name);
+	ret = -EINVAL;
+	if (dev) {
+		if (dev->priv_flags & IFF_802_1Q_VLAN) {
+			rtnl_lock();
+
+			ret = unregister_vlan_dev(VLAN_DEV_INFO(dev)->real_dev,
+						  VLAN_DEV_INFO(dev)->vlan_id);
+
+			dev_put(dev);
+			unregister_netdevice(dev);
+
+			rtnl_unlock();
+
+			if (ret == 1)
+				ret = 0;
+		} else {
+			printk(VLAN_ERR 
+			       "%s: ERROR:	Tried to remove a non-vlan device "
+			       "with VLAN code, name: %s  priv_flags: %hX\n",
+			       __FUNCTION__, dev->name, dev->priv_flags);
+			dev_put(dev);
+			ret = -EPERM;
+		}
+	} else {
+#ifdef VLAN_DEBUG
+		printk(VLAN_DBG "%s: WARNING: Could not find dev.\n", __FUNCTION__);
+#endif
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static void vlan_setup(struct net_device *new_dev)
+{
+	SET_MODULE_OWNER(new_dev);
+	    
+	/* new_dev->ifindex = 0;  it will be set when added to
+	 * the global list.
+	 * iflink is set as well.
+	 */
+	new_dev->get_stats = vlan_dev_get_stats;
+
+	/* Make this thing known as a VLAN device */
+	new_dev->priv_flags |= IFF_802_1Q_VLAN;
+				
+	/* Set us up to have no queue, as the underlying Hardware device
+	 * can do all the queueing we could want.
+	 */
+	new_dev->tx_queue_len = 0;
+
+	/* set up method calls */
+	new_dev->change_mtu = vlan_dev_change_mtu;
+	new_dev->open = vlan_dev_open;
+	new_dev->stop = vlan_dev_stop;
+	new_dev->set_mac_address = vlan_dev_set_mac_address;
+	new_dev->set_multicast_list = vlan_dev_set_multicast_list;
+	new_dev->destructor = free_netdev;
+	new_dev->do_ioctl = vlan_dev_ioctl;
+}
+
+/*  Attach a VLAN device to a mac address (ie Ethernet Card).
+ *  Returns the device that was created, or NULL if there was
+ *  an error of some kind.
+ */
+static struct net_device *register_vlan_device(const char *eth_IF_name,
+					       unsigned short VLAN_ID)
+{
+	struct vlan_group *grp;
+	struct net_device *new_dev;
+	struct net_device *real_dev; /* the ethernet device */
+	char name[IFNAMSIZ];
+
+#ifdef VLAN_DEBUG
+	printk(VLAN_DBG "%s: if_name -:%s:-	vid: %i\n",
+		__FUNCTION__, eth_IF_name, VLAN_ID);
+#endif
+
+	if (VLAN_ID >= VLAN_VID_MASK)
+		goto out_ret_null;
+
+	/* find the device relating to eth_IF_name. */
+	real_dev = dev_get_by_name(eth_IF_name);
+	if (!real_dev)
+		goto out_ret_null;
+
+	if (real_dev->features & NETIF_F_VLAN_CHALLENGED) {
+		printk(VLAN_DBG "%s: VLANs not supported on %s.\n",
+			__FUNCTION__, real_dev->name);
+		goto out_put_dev;
+	}
+
+	if ((real_dev->features & NETIF_F_HW_VLAN_RX) &&
+	    (real_dev->vlan_rx_register == NULL ||
+	     real_dev->vlan_rx_kill_vid == NULL)) {
+		printk(VLAN_DBG "%s: Device %s has buggy VLAN hw accel.\n",
+			__FUNCTION__, real_dev->name);
+		goto out_put_dev;
+	}
+
+	if ((real_dev->features & NETIF_F_HW_VLAN_FILTER) &&
+	    (real_dev->vlan_rx_add_vid == NULL ||
+	     real_dev->vlan_rx_kill_vid == NULL)) {
+		printk(VLAN_DBG "%s: Device %s has buggy VLAN hw accel.\n",
+			__FUNCTION__, real_dev->name);
+		goto out_put_dev;
+	}
+
+	/* From this point on, all the data structures must remain
+	 * consistent.
+	 */
+	rtnl_lock();
+
+	/* The real device must be up and operating in order to
+	 * assosciate a VLAN device with it.
+	 */
+	if (!(real_dev->flags & IFF_UP))
+		goto out_unlock;
+
+	if (__find_vlan_dev(real_dev, VLAN_ID) != NULL) {
+		/* was already registered. */
+		printk(VLAN_DBG "%s: ALREADY had VLAN registered\n", __FUNCTION__);
+		goto out_unlock;
+	}
+
+	/* Gotta set up the fields for the device. */
+#ifdef VLAN_DEBUG
+	printk(VLAN_DBG "About to allocate name, vlan_name_type: %i\n",
+	       vlan_name_type);
+#endif
+	switch (vlan_name_type) {
+	case VLAN_NAME_TYPE_RAW_PLUS_VID:
+		/* name will look like:	 eth1.0005 */
+		snprintf(name, IFNAMSIZ, "%s.%.4i", real_dev->name, VLAN_ID);
+		break;
+	case VLAN_NAME_TYPE_PLUS_VID_NO_PAD:
+		/* Put our vlan.VID in the name.
+		 * Name will look like:	 vlan5
+		 */
+		snprintf(name, IFNAMSIZ, "vlan%i", VLAN_ID);
+		break;
+	case VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD:
+		/* Put our vlan.VID in the name.
+		 * Name will look like:	 eth0.5
+		 */
+		snprintf(name, IFNAMSIZ, "%s.%i", real_dev->name, VLAN_ID);
+		break;
+	case VLAN_NAME_TYPE_PLUS_VID:
+		/* Put our vlan.VID in the name.
+		 * Name will look like:	 vlan0005
+		 */
+	default:
+		snprintf(name, IFNAMSIZ, "vlan%.4i", VLAN_ID);
+	};
+		    
+	new_dev = alloc_netdev(sizeof(struct vlan_dev_info), name,
+			       vlan_setup);
+	if (new_dev == NULL)
+		goto out_unlock;
+
+#ifdef VLAN_DEBUG
+	printk(VLAN_DBG "Allocated new name -:%s:-\n", new_dev->name);
+#endif
+	/* IFF_BROADCAST|IFF_MULTICAST; ??? */
+	new_dev->flags = real_dev->flags;
+	new_dev->flags &= ~IFF_UP;
+
+	new_dev->state = real_dev->state & VLAN_LINK_STATE_MASK;
+
+	/* need 4 bytes for extra VLAN header info,
+	 * hope the underlying device can handle it.
+	 */
+	new_dev->mtu = real_dev->mtu;
+
+	/* TODO: maybe just assign it to be ETHERNET? */
+	new_dev->type = real_dev->type;
+
+	new_dev->hard_header_len = real_dev->hard_header_len;
+	if (!(real_dev->features & NETIF_F_HW_VLAN_TX)) {
+		/* Regular ethernet + 4 bytes (18 total). */
+		new_dev->hard_header_len += VLAN_HLEN;
+	}
+
+	VLAN_MEM_DBG("new_dev->priv malloc, addr: %p  size: %i\n",
+		     new_dev->priv,
+		     sizeof(struct vlan_dev_info));
+	    
+	memcpy(new_dev->broadcast, real_dev->broadcast, real_dev->addr_len);
+	memcpy(new_dev->dev_addr, real_dev->dev_addr, real_dev->addr_len);
+	new_dev->addr_len = real_dev->addr_len;
+
+	if (real_dev->features & NETIF_F_HW_VLAN_TX) {
+		new_dev->hard_header = real_dev->hard_header;
+		new_dev->hard_start_xmit = vlan_dev_hwaccel_hard_start_xmit;
+		new_dev->rebuild_header = real_dev->rebuild_header;
+	} else {
+		new_dev->hard_header = vlan_dev_hard_header;
+		new_dev->hard_start_xmit = vlan_dev_hard_start_xmit;
+		new_dev->rebuild_header = vlan_dev_rebuild_header;
+	}
+	new_dev->hard_header_parse = real_dev->hard_header_parse;
+
+	VLAN_DEV_INFO(new_dev)->vlan_id = VLAN_ID; /* 1 through VLAN_VID_MASK */
+	VLAN_DEV_INFO(new_dev)->real_dev = real_dev;
+	VLAN_DEV_INFO(new_dev)->dent = NULL;
+	VLAN_DEV_INFO(new_dev)->flags = 1;
+
+#ifdef VLAN_DEBUG
+	printk(VLAN_DBG "About to go find the group for idx: %i\n",
+	       real_dev->ifindex);
+#endif
+	    
+	if (register_netdevice(new_dev))
+		goto out_free_newdev;
+
+	/* So, got the sucker initialized, now lets place
+	 * it into our local structure.
+	 */
+	grp = __vlan_find_group(real_dev->ifindex);
+
+	/* Note, we are running under the RTNL semaphore
+	 * so it cannot "appear" on us.
+	 */
+	if (!grp) { /* need to add a new group */
+		grp = kmalloc(sizeof(struct vlan_group), GFP_KERNEL);
+		if (!grp)
+			goto out_free_unregister;
+					
+		/* printk(KERN_ALERT "VLAN REGISTER:  Allocated new group.\n"); */
+		memset(grp, 0, sizeof(struct vlan_group));
+		grp->real_dev_ifindex = real_dev->ifindex;
+
+		hlist_add_head_rcu(&grp->hlist, 
+				   &vlan_group_hash[vlan_grp_hashfn(real_dev->ifindex)]);
+
+		if (real_dev->features & NETIF_F_HW_VLAN_RX)
+			real_dev->vlan_rx_register(real_dev, grp);
+	}
+	    
+	grp->vlan_devices[VLAN_ID] = new_dev;
+
+	if (vlan_proc_add_dev(new_dev)<0)/* create it's proc entry */
+            	printk(KERN_WARNING "VLAN: failed to add proc entry for %s\n",
+					                 new_dev->name);
+
+	if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
+		real_dev->vlan_rx_add_vid(real_dev, VLAN_ID);
+
+	rtnl_unlock();
+
+
+#ifdef VLAN_DEBUG
+	printk(VLAN_DBG "Allocated new device successfully, returning.\n");
+#endif
+	return new_dev;
+
+out_free_unregister:
+	unregister_netdev(new_dev);
+	goto out_unlock;
+
+out_free_newdev:
+	free_netdev(new_dev);
+
+out_unlock:
+	rtnl_unlock();
+
+out_put_dev:
+	dev_put(real_dev);
+
+out_ret_null:
+	return NULL;
+}
+
+static int vlan_device_event(struct notifier_block *unused, unsigned long event, void *ptr)
+{
+	struct net_device *dev = ptr;
+	struct vlan_group *grp = __vlan_find_group(dev->ifindex);
+	int i, flgs;
+	struct net_device *vlandev;
+
+	if (!grp)
+		goto out;
+
+	/* It is OK that we do not hold the group lock right now,
+	 * as we run under the RTNL lock.
+	 */
+
+	switch (event) {
+	case NETDEV_CHANGE:
+		/* Propagate real device state to vlan devices */
+		flgs = dev->state & VLAN_LINK_STATE_MASK;
+		for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+			vlandev = grp->vlan_devices[i];
+			if (!vlandev)
+				continue;
+
+			if ((vlandev->state & VLAN_LINK_STATE_MASK) != flgs) {
+				vlandev->state = (vlandev->state &~ VLAN_LINK_STATE_MASK) 
+					| flgs;
+				netdev_state_change(vlandev);
+			}
+		}
+		break;
+
+	case NETDEV_DOWN:
+		/* Put all VLANs for this dev in the down state too.  */
+		for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+			vlandev = grp->vlan_devices[i];
+			if (!vlandev)
+				continue;
+
+			flgs = vlandev->flags;
+			if (!(flgs & IFF_UP))
+				continue;
+
+			dev_change_flags(vlandev, flgs & ~IFF_UP);
+		}
+		break;
+
+	case NETDEV_UP:
+		/* Put all VLANs for this dev in the up state too.  */
+		for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+			vlandev = grp->vlan_devices[i];
+			if (!vlandev)
+				continue;
+				
+			flgs = vlandev->flags;
+			if (flgs & IFF_UP)
+				continue;
+
+			dev_change_flags(vlandev, flgs | IFF_UP);
+		}
+		break;
+		
+	case NETDEV_UNREGISTER:
+		/* Delete all VLANs for this dev. */
+		for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+			int ret;
+
+			vlandev = grp->vlan_devices[i];
+			if (!vlandev)
+				continue;
+
+			ret = unregister_vlan_dev(dev,
+						  VLAN_DEV_INFO(vlandev)->vlan_id);
+
+			unregister_netdevice(vlandev);
+
+			/* Group was destroyed? */
+			if (ret == 1)
+				break;
+		}
+		break;
+	};
+
+out:
+	return NOTIFY_DONE;
+}
+
+/*
+ *	VLAN IOCTL handler.
+ *	o execute requested action or pass command to the device driver
+ *   arg is really a struct vlan_ioctl_args __user *.
+ */
+static int vlan_ioctl_handler(void __user *arg)
+{
+	int err = 0;
+	unsigned short vid = 0;
+	struct vlan_ioctl_args args;
+
+	if (copy_from_user(&args, arg, sizeof(struct vlan_ioctl_args)))
+		return -EFAULT;
+
+	/* Null terminate this sucker, just in case. */
+	args.device1[23] = 0;
+	args.u.device2[23] = 0;
+
+#ifdef VLAN_DEBUG
+	printk(VLAN_DBG "%s: args.cmd: %x\n", __FUNCTION__, args.cmd);
+#endif
+
+	switch (args.cmd) {
+	case SET_VLAN_INGRESS_PRIORITY_CMD:
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+		err = vlan_dev_set_ingress_priority(args.device1,
+						    args.u.skb_priority,
+						    args.vlan_qos);
+		break;
+
+	case SET_VLAN_EGRESS_PRIORITY_CMD:
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+		err = vlan_dev_set_egress_priority(args.device1,
+						   args.u.skb_priority,
+						   args.vlan_qos);
+		break;
+
+	case SET_VLAN_FLAG_CMD:
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+		err = vlan_dev_set_vlan_flag(args.device1,
+					     args.u.flag,
+					     args.vlan_qos);
+		break;
+
+	case SET_VLAN_NAME_TYPE_CMD:
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+		if ((args.u.name_type >= 0) &&
+		    (args.u.name_type < VLAN_NAME_TYPE_HIGHEST)) {
+			vlan_name_type = args.u.name_type;
+			err = 0;
+		} else {
+			err = -EINVAL;
+		}
+		break;
+
+	case ADD_VLAN_CMD:
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+		/* we have been given the name of the Ethernet Device we want to
+		 * talk to:  args.dev1	 We also have the
+		 * VLAN ID:  args.u.VID
+		 */
+		if (register_vlan_device(args.device1, args.u.VID)) {
+			err = 0;
+		} else {
+			err = -EINVAL;
+		}
+		break;
+
+	case DEL_VLAN_CMD:
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+		/* Here, the args.dev1 is the actual VLAN we want
+		 * to get rid of.
+		 */
+		err = unregister_vlan_device(args.device1);
+		break;
+
+	case GET_VLAN_INGRESS_PRIORITY_CMD:
+		/* TODO:  Implement
+		   err = vlan_dev_get_ingress_priority(args);
+		   if (copy_to_user((void*)arg, &args,
+		        sizeof(struct vlan_ioctl_args))) {
+		        err = -EFAULT;
+		   }
+		*/
+		err = -EINVAL;
+		break;
+	case GET_VLAN_EGRESS_PRIORITY_CMD:
+		/* TODO:  Implement
+		   err = vlan_dev_get_egress_priority(args.device1, &(args.args);
+		   if (copy_to_user((void*)arg, &args,
+		        sizeof(struct vlan_ioctl_args))) {
+		        err = -EFAULT;
+		   }
+		*/
+		err = -EINVAL;
+		break;
+	case GET_VLAN_REALDEV_NAME_CMD:
+		err = vlan_dev_get_realdev_name(args.device1, args.u.device2);
+		if (copy_to_user(arg, &args,
+				 sizeof(struct vlan_ioctl_args))) {
+			err = -EFAULT;
+		}
+		break;
+
+	case GET_VLAN_VID_CMD:
+		err = vlan_dev_get_vid(args.device1, &vid);
+		args.u.VID = vid;
+		if (copy_to_user(arg, &args,
+				 sizeof(struct vlan_ioctl_args))) {
+                      err = -EFAULT;
+		}
+		break;
+
+	default:
+		/* pass on to underlying device instead?? */
+		printk(VLAN_DBG "%s: Unknown VLAN CMD: %x \n",
+			__FUNCTION__, args.cmd);
+		return -EINVAL;
+	};
+
+	return err;
+}
+
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_VERSION);
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
new file mode 100644
index 00000000000..508b1fa1454
--- /dev/null
+++ b/net/8021q/vlan.h
@@ -0,0 +1,72 @@
+#ifndef __BEN_VLAN_802_1Q_INC__
+#define __BEN_VLAN_802_1Q_INC__
+
+#include <linux/if_vlan.h>
+
+/*  Uncomment this if you want debug traces to be shown. */
+/* #define VLAN_DEBUG */
+
+#define VLAN_ERR KERN_ERR
+#define VLAN_INF KERN_INFO
+#define VLAN_DBG KERN_ALERT /* change these... to debug, having a hard time
+                             * changing the log level at run-time..for some reason.
+                             */
+
+/*
+
+These I use for memory debugging.  I feared a leak at one time, but
+I never found it..and the problem seems to have dissappeared.  Still,
+I'll bet they might prove useful again... --Ben
+
+
+#define VLAN_MEM_DBG(x, y, z) printk(VLAN_DBG "%s:  "  x, __FUNCTION__, y, z);
+#define VLAN_FMEM_DBG(x, y) printk(VLAN_DBG "%s:  " x, __FUNCTION__, y);
+*/
+
+/* This way they don't do anything! */
+#define VLAN_MEM_DBG(x, y, z) 
+#define VLAN_FMEM_DBG(x, y)
+
+
+extern unsigned short vlan_name_type;
+
+#define VLAN_GRP_HASH_SHIFT	5
+#define VLAN_GRP_HASH_SIZE	(1 << VLAN_GRP_HASH_SHIFT)
+#define VLAN_GRP_HASH_MASK	(VLAN_GRP_HASH_SIZE - 1)
+
+/*  Find a VLAN device by the MAC address of its Ethernet device, and
+ *  it's VLAN ID.  The default configuration is to have VLAN's scope
+ *  to be box-wide, so the MAC will be ignored.  The mac will only be
+ *  looked at if we are configured to have a separate set of VLANs per
+ *  each MAC addressable interface.  Note that this latter option does
+ *  NOT follow the spec for VLANs, but may be useful for doing very
+ *  large quantities of VLAN MUX/DEMUX onto FrameRelay or ATM PVCs.
+ *
+ *  Must be invoked with rcu_read_lock (ie preempt disabled)
+ *  or with RTNL.
+ */
+struct net_device *__find_vlan_dev(struct net_device* real_dev,
+				   unsigned short VID); /* vlan.c */
+
+/* found in vlan_dev.c */
+int vlan_dev_rebuild_header(struct sk_buff *skb);
+int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
+                  struct packet_type* ptype);
+int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
+                         unsigned short type, void *daddr, void *saddr,
+                         unsigned len);
+int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev);
+int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb, struct net_device *dev);
+int vlan_dev_change_mtu(struct net_device *dev, int new_mtu);
+int vlan_dev_set_mac_address(struct net_device *dev, void* addr);
+int vlan_dev_open(struct net_device* dev);
+int vlan_dev_stop(struct net_device* dev);
+int vlan_dev_ioctl(struct net_device* dev, struct ifreq *ifr, int cmd);
+int vlan_dev_set_ingress_priority(char* dev_name, __u32 skb_prio, short vlan_prio);
+int vlan_dev_set_egress_priority(char* dev_name, __u32 skb_prio, short vlan_prio);
+int vlan_dev_set_vlan_flag(char* dev_name, __u32 flag, short flag_val);
+int vlan_dev_get_realdev_name(const char* dev_name, char* result);
+int vlan_dev_get_vid(const char* dev_name, unsigned short* result);
+void vlan_dev_set_multicast_list(struct net_device *vlan_dev);
+
+#endif /* !(__BEN_VLAN_802_1Q_INC__) */
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
new file mode 100644
index 00000000000..49c48741351
--- /dev/null
+++ b/net/8021q/vlan_dev.c
@@ -0,0 +1,890 @@
+/* -*- linux-c -*-
+ * INET		802.1Q VLAN
+ *		Ethernet-type device handling.
+ *
+ * Authors:	Ben Greear <greearb@candelatech.com>
+ *              Please send support related email to: vlan@scry.wanfear.com
+ *              VLAN Home Page: http://www.candelatech.com/~greear/vlan.html
+ * 
+ * Fixes:       Mar 22 2001: Martin Bokaemper <mbokaemper@unispherenetworks.com>
+ *                - reset skb->pkt_type on incoming packets when MAC was changed
+ *                - see that changed MAC is saddr for outgoing packets
+ *              Oct 20, 2001:  Ard van Breeman:
+ *                - Fix MC-list, finally.
+ *                - Flush MC-list on VLAN destroy.
+ *                
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/in.h>
+#include <linux/init.h>
+#include <asm/uaccess.h> /* for copy_from_user */
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <net/datalink.h>
+#include <net/p8022.h>
+#include <net/arp.h>
+
+#include "vlan.h"
+#include "vlanproc.h"
+#include <linux/if_vlan.h>
+#include <net/ip.h>
+
+/*
+ *	Rebuild the Ethernet MAC header. This is called after an ARP
+ *	(or in future other address resolution) has completed on this
+ *	sk_buff. We now let ARP fill in the other fields.
+ *
+ *	This routine CANNOT use cached dst->neigh!
+ *	Really, it is used only when dst->neigh is wrong.
+ *
+ * TODO:  This needs a checkup, I'm ignorant here. --BLG
+ */
+int vlan_dev_rebuild_header(struct sk_buff *skb)
+{
+	struct net_device *dev = skb->dev;
+	struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
+
+	switch (veth->h_vlan_encapsulated_proto) {
+#ifdef CONFIG_INET
+	case __constant_htons(ETH_P_IP):
+
+		/* TODO:  Confirm this will work with VLAN headers... */
+		return arp_find(veth->h_dest, skb);
+#endif	
+	default:
+		printk(VLAN_DBG
+		       "%s: unable to resolve type %X addresses.\n", 
+		       dev->name, (int)veth->h_vlan_encapsulated_proto);
+	 
+		memcpy(veth->h_source, dev->dev_addr, ETH_ALEN);
+		break;
+	};
+
+	return 0;
+}
+
+static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
+{
+	if (VLAN_DEV_INFO(skb->dev)->flags & 1) {
+		if (skb_shared(skb) || skb_cloned(skb)) {
+			struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
+			kfree_skb(skb);
+			skb = nskb;
+		}
+		if (skb) {
+			/* Lifted from Gleb's VLAN code... */
+			memmove(skb->data - ETH_HLEN,
+				skb->data - VLAN_ETH_HLEN, 12);
+			skb->mac.raw += VLAN_HLEN;
+		}
+	}
+
+	return skb;
+}
+
+/*
+ *	Determine the packet's protocol ID. The rule here is that we 
+ *	assume 802.3 if the type field is short enough to be a length.
+ *	This is normal practice and works for any 'now in use' protocol.
+ *
+ *  Also, at this point we assume that we ARE dealing exclusively with
+ *  VLAN packets, or packets that should be made into VLAN packets based
+ *  on a default VLAN ID.
+ *
+ *  NOTE:  Should be similar to ethernet/eth.c.
+ *
+ *  SANITY NOTE:  This method is called when a packet is moving up the stack
+ *                towards userland.  To get here, it would have already passed
+ *                through the ethernet/eth.c eth_type_trans() method.
+ *  SANITY NOTE 2: We are referencing to the VLAN_HDR frields, which MAY be
+ *                 stored UNALIGNED in the memory.  RISC systems don't like
+ *                 such cases very much...
+ *  SANITY NOTE 2a:  According to Dave Miller & Alexey, it will always be aligned,
+ *                 so there doesn't need to be any of the unaligned stuff.  It has
+ *                 been commented out now...  --Ben
+ *
+ */
+int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
+                  struct packet_type* ptype)
+{
+	unsigned char *rawp = NULL;
+	struct vlan_hdr *vhdr = (struct vlan_hdr *)(skb->data);
+	unsigned short vid;
+	struct net_device_stats *stats;
+	unsigned short vlan_TCI;
+	unsigned short proto;
+
+	/* vlan_TCI = ntohs(get_unaligned(&vhdr->h_vlan_TCI)); */
+	vlan_TCI = ntohs(vhdr->h_vlan_TCI);
+
+	vid = (vlan_TCI & VLAN_VID_MASK);
+
+#ifdef VLAN_DEBUG
+	printk(VLAN_DBG "%s: skb: %p vlan_id: %hx\n",
+		__FUNCTION__, skb, vid);
+#endif
+
+	/* Ok, we will find the correct VLAN device, strip the header,
+	 * and then go on as usual.
+	 */
+
+	/* We have 12 bits of vlan ID.
+	 *
+	 * We must not drop allow preempt until we hold a
+	 * reference to the device (netif_rx does that) or we
+	 * fail.
+	 */
+
+	rcu_read_lock();
+	skb->dev = __find_vlan_dev(dev, vid);
+	if (!skb->dev) {
+		rcu_read_unlock();
+
+#ifdef VLAN_DEBUG
+		printk(VLAN_DBG "%s: ERROR: No net_device for VID: %i on dev: %s [%i]\n",
+			__FUNCTION__, (unsigned int)(vid), dev->name, dev->ifindex);
+#endif
+		kfree_skb(skb);
+		return -1;
+	}
+
+	skb->dev->last_rx = jiffies;
+
+	/* Bump the rx counters for the VLAN device. */
+	stats = vlan_dev_get_stats(skb->dev);
+	stats->rx_packets++;
+	stats->rx_bytes += skb->len;
+
+	skb_pull(skb, VLAN_HLEN); /* take off the VLAN header (4 bytes currently) */
+
+	/* Ok, lets check to make sure the device (dev) we
+	 * came in on is what this VLAN is attached to.
+	 */
+
+	if (dev != VLAN_DEV_INFO(skb->dev)->real_dev) {
+		rcu_read_unlock();
+
+#ifdef VLAN_DEBUG
+		printk(VLAN_DBG "%s: dropping skb: %p because came in on wrong device, dev: %s  real_dev: %s, skb_dev: %s\n",
+			__FUNCTION__, skb, dev->name, 
+			VLAN_DEV_INFO(skb->dev)->real_dev->name, 
+			skb->dev->name);
+#endif
+		kfree_skb(skb);
+		stats->rx_errors++;
+		return -1;
+	}
+
+	/*
+	 * Deal with ingress priority mapping.
+	 */
+	skb->priority = vlan_get_ingress_priority(skb->dev, ntohs(vhdr->h_vlan_TCI));
+
+#ifdef VLAN_DEBUG
+	printk(VLAN_DBG "%s: priority: %lu  for TCI: %hu (hbo)\n",
+		__FUNCTION__, (unsigned long)(skb->priority), 
+		ntohs(vhdr->h_vlan_TCI));
+#endif
+
+	/* The ethernet driver already did the pkt_type calculations
+	 * for us...
+	 */
+	switch (skb->pkt_type) {
+	case PACKET_BROADCAST: /* Yeah, stats collect these together.. */
+		// stats->broadcast ++; // no such counter :-(
+		break;
+
+	case PACKET_MULTICAST:
+		stats->multicast++;
+		break;
+
+	case PACKET_OTHERHOST: 
+		/* Our lower layer thinks this is not local, let's make sure.
+		 * This allows the VLAN to have a different MAC than the underlying
+		 * device, and still route correctly.
+		 */
+		if (memcmp(eth_hdr(skb)->h_dest, skb->dev->dev_addr, ETH_ALEN) == 0) {
+			/* It is for our (changed) MAC-address! */
+			skb->pkt_type = PACKET_HOST;
+		}
+		break;
+	default:
+		break;
+	};
+
+	/*  Was a VLAN packet, grab the encapsulated protocol, which the layer
+	 * three protocols care about.
+	 */
+	/* proto = get_unaligned(&vhdr->h_vlan_encapsulated_proto); */
+	proto = vhdr->h_vlan_encapsulated_proto;
+
+	skb->protocol = proto;
+	if (ntohs(proto) >= 1536) {
+		/* place it back on the queue to be handled by
+		 * true layer 3 protocols.
+		 */
+
+		/* See if we are configured to re-write the VLAN header
+		 * to make it look like ethernet...
+		 */
+		skb = vlan_check_reorder_header(skb);
+
+		/* Can be null if skb-clone fails when re-ordering */
+		if (skb) {
+			netif_rx(skb);
+		} else {
+			/* TODO:  Add a more specific counter here. */
+			stats->rx_errors++;
+		}
+		rcu_read_unlock();
+		return 0;
+	}
+
+	rawp = skb->data;
+
+	/*
+	 * This is a magic hack to spot IPX packets. Older Novell breaks
+	 * the protocol design and runs IPX over 802.3 without an 802.2 LLC
+	 * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
+	 * won't work for fault tolerant netware but does for the rest.
+	 */
+	if (*(unsigned short *)rawp == 0xFFFF) {
+		skb->protocol = __constant_htons(ETH_P_802_3);
+		/* place it back on the queue to be handled by true layer 3 protocols.
+		 */
+
+		/* See if we are configured to re-write the VLAN header
+		 * to make it look like ethernet...
+		 */
+		skb = vlan_check_reorder_header(skb);
+
+		/* Can be null if skb-clone fails when re-ordering */
+		if (skb) {
+			netif_rx(skb);
+		} else {
+			/* TODO:  Add a more specific counter here. */
+			stats->rx_errors++;
+		}
+		rcu_read_unlock();
+		return 0;
+	}
+
+	/*
+	 *	Real 802.2 LLC
+	 */
+	skb->protocol = __constant_htons(ETH_P_802_2);
+	/* place it back on the queue to be handled by upper layer protocols.
+	 */
+
+	/* See if we are configured to re-write the VLAN header
+	 * to make it look like ethernet...
+	 */
+	skb = vlan_check_reorder_header(skb);
+
+	/* Can be null if skb-clone fails when re-ordering */
+	if (skb) {
+		netif_rx(skb);
+	} else {
+		/* TODO:  Add a more specific counter here. */
+		stats->rx_errors++;
+	}
+	rcu_read_unlock();
+	return 0;
+}
+
+static inline unsigned short vlan_dev_get_egress_qos_mask(struct net_device* dev,
+							  struct sk_buff* skb)
+{
+	struct vlan_priority_tci_mapping *mp =
+		VLAN_DEV_INFO(dev)->egress_priority_map[(skb->priority & 0xF)];
+
+	while (mp) {
+		if (mp->priority == skb->priority) {
+			return mp->vlan_qos; /* This should already be shifted to mask
+					      * correctly with the VLAN's TCI
+					      */
+		}
+		mp = mp->next;
+	}
+	return 0;
+}
+
+/*
+ *	Create the VLAN header for an arbitrary protocol layer 
+ *
+ *	saddr=NULL	means use device source address
+ *	daddr=NULL	means leave destination address (eg unresolved arp)
+ *
+ *  This is called when the SKB is moving down the stack towards the
+ *  physical devices.
+ */
+int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
+                         unsigned short type, void *daddr, void *saddr,
+                         unsigned len)
+{
+	struct vlan_hdr *vhdr;
+	unsigned short veth_TCI = 0;
+	int rc = 0;
+	int build_vlan_header = 0;
+	struct net_device *vdev = dev; /* save this for the bottom of the method */
+
+#ifdef VLAN_DEBUG
+	printk(VLAN_DBG "%s: skb: %p type: %hx len: %x vlan_id: %hx, daddr: %p\n",
+		__FUNCTION__, skb, type, len, VLAN_DEV_INFO(dev)->vlan_id, daddr);
+#endif
+
+	/* build vlan header only if re_order_header flag is NOT set.  This
+	 * fixes some programs that get confused when they see a VLAN device
+	 * sending a frame that is VLAN encoded (the consensus is that the VLAN
+	 * device should look completely like an Ethernet device when the
+	 * REORDER_HEADER flag is set)	The drawback to this is some extra 
+	 * header shuffling in the hard_start_xmit.  Users can turn off this
+	 * REORDER behaviour with the vconfig tool.
+	 */
+	build_vlan_header = ((VLAN_DEV_INFO(dev)->flags & 1) == 0);
+
+	if (build_vlan_header) {
+		vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN);
+
+		/* build the four bytes that make this a VLAN header. */
+
+		/* Now, construct the second two bytes. This field looks something
+		 * like:
+		 * usr_priority: 3 bits	 (high bits)
+		 * CFI		 1 bit
+		 * VLAN ID	 12 bits (low bits)
+		 *
+		 */
+		veth_TCI = VLAN_DEV_INFO(dev)->vlan_id;
+		veth_TCI |= vlan_dev_get_egress_qos_mask(dev, skb);
+
+		vhdr->h_vlan_TCI = htons(veth_TCI);
+
+		/*
+		 *  Set the protocol type.
+		 *  For a packet of type ETH_P_802_3 we put the length in here instead.
+		 *  It is up to the 802.2 layer to carry protocol information.
+		 */
+
+		if (type != ETH_P_802_3) {
+			vhdr->h_vlan_encapsulated_proto = htons(type);
+		} else {
+			vhdr->h_vlan_encapsulated_proto = htons(len);
+		}
+	}
+
+	/* Before delegating work to the lower layer, enter our MAC-address */
+	if (saddr == NULL)
+		saddr = dev->dev_addr;
+
+	dev = VLAN_DEV_INFO(dev)->real_dev;
+
+	/* MPLS can send us skbuffs w/out enough space.	 This check will grow the
+	 * skb if it doesn't have enough headroom.  Not a beautiful solution, so
+	 * I'll tick a counter so that users can know it's happening...	 If they
+	 * care...
+	 */
+
+	/* NOTE:  This may still break if the underlying device is not the final
+	 * device (and thus there are more headers to add...)  It should work for
+	 * good-ole-ethernet though.
+	 */
+	if (skb_headroom(skb) < dev->hard_header_len) {
+		struct sk_buff *sk_tmp = skb;
+		skb = skb_realloc_headroom(sk_tmp, dev->hard_header_len);
+		kfree_skb(sk_tmp);
+		if (skb == NULL) {
+			struct net_device_stats *stats = vlan_dev_get_stats(vdev);
+			stats->tx_dropped++;
+			return -ENOMEM;
+		}
+		VLAN_DEV_INFO(vdev)->cnt_inc_headroom_on_tx++;
+#ifdef VLAN_DEBUG
+		printk(VLAN_DBG "%s: %s: had to grow skb.\n", __FUNCTION__, vdev->name);
+#endif
+	}
+
+	if (build_vlan_header) {
+		/* Now make the underlying real hard header */
+		rc = dev->hard_header(skb, dev, ETH_P_8021Q, daddr, saddr, len + VLAN_HLEN);
+
+		if (rc > 0) {
+			rc += VLAN_HLEN;
+		} else if (rc < 0) {
+			rc -= VLAN_HLEN;
+		}
+	} else {
+		/* If here, then we'll just make a normal looking ethernet frame,
+		 * but, the hard_start_xmit method will insert the tag (it has to
+		 * be able to do this for bridged and other skbs that don't come
+		 * down the protocol stack in an orderly manner.
+		 */
+		rc = dev->hard_header(skb, dev, type, daddr, saddr, len);
+	}
+
+	return rc;
+}
+
+int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct net_device_stats *stats = vlan_dev_get_stats(dev);
+	struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
+
+	/* Handle non-VLAN frames if they are sent to us, for example by DHCP.
+	 *
+	 * NOTE: THIS ASSUMES DIX ETHERNET, SPECIFICALLY NOT SUPPORTING
+	 * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs...
+	 */
+
+	if (veth->h_vlan_proto != __constant_htons(ETH_P_8021Q)) {
+		int orig_headroom = skb_headroom(skb);
+		unsigned short veth_TCI;
+
+		/* This is not a VLAN frame...but we can fix that! */
+		VLAN_DEV_INFO(dev)->cnt_encap_on_xmit++;
+
+#ifdef VLAN_DEBUG
+		printk(VLAN_DBG "%s: proto to encap: 0x%hx (hbo)\n",
+			__FUNCTION__, htons(veth->h_vlan_proto));
+#endif
+		/* Construct the second two bytes. This field looks something
+		 * like:
+		 * usr_priority: 3 bits	 (high bits)
+		 * CFI		 1 bit
+		 * VLAN ID	 12 bits (low bits)
+		 */
+		veth_TCI = VLAN_DEV_INFO(dev)->vlan_id;
+		veth_TCI |= vlan_dev_get_egress_qos_mask(dev, skb);
+
+		skb = __vlan_put_tag(skb, veth_TCI);
+		if (!skb) {
+			stats->tx_dropped++;
+			return 0;
+		}
+
+		if (orig_headroom < VLAN_HLEN) {
+			VLAN_DEV_INFO(dev)->cnt_inc_headroom_on_tx++;
+		}
+	}
+
+#ifdef VLAN_DEBUG
+	printk(VLAN_DBG "%s: about to send skb: %p to dev: %s\n",
+		__FUNCTION__, skb, skb->dev->name);
+	printk(VLAN_DBG "  %2hx.%2hx.%2hx.%2xh.%2hx.%2hx %2hx.%2hx.%2hx.%2hx.%2hx.%2hx %4hx %4hx %4hx\n",
+	       veth->h_dest[0], veth->h_dest[1], veth->h_dest[2], veth->h_dest[3], veth->h_dest[4], veth->h_dest[5],
+	       veth->h_source[0], veth->h_source[1], veth->h_source[2], veth->h_source[3], veth->h_source[4], veth->h_source[5],
+	       veth->h_vlan_proto, veth->h_vlan_TCI, veth->h_vlan_encapsulated_proto);
+#endif
+
+	stats->tx_packets++; /* for statics only */
+	stats->tx_bytes += skb->len;
+
+	skb->dev = VLAN_DEV_INFO(dev)->real_dev;
+	dev_queue_xmit(skb);
+
+	return 0;
+}
+
+int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct net_device_stats *stats = vlan_dev_get_stats(dev);
+	unsigned short veth_TCI;
+
+	/* Construct the second two bytes. This field looks something
+	 * like:
+	 * usr_priority: 3 bits	 (high bits)
+	 * CFI		 1 bit
+	 * VLAN ID	 12 bits (low bits)
+	 */
+	veth_TCI = VLAN_DEV_INFO(dev)->vlan_id;
+	veth_TCI |= vlan_dev_get_egress_qos_mask(dev, skb);
+	skb = __vlan_hwaccel_put_tag(skb, veth_TCI);
+
+	stats->tx_packets++;
+	stats->tx_bytes += skb->len;
+
+	skb->dev = VLAN_DEV_INFO(dev)->real_dev;
+	dev_queue_xmit(skb);
+
+	return 0;
+}
+
+int vlan_dev_change_mtu(struct net_device *dev, int new_mtu)
+{
+	/* TODO: gotta make sure the underlying layer can handle it,
+	 * maybe an IFF_VLAN_CAPABLE flag for devices?
+	 */
+	if (VLAN_DEV_INFO(dev)->real_dev->mtu < new_mtu)
+		return -ERANGE;
+
+	dev->mtu = new_mtu;
+
+	return 0;
+}
+
+int vlan_dev_set_ingress_priority(char *dev_name, __u32 skb_prio, short vlan_prio)
+{
+	struct net_device *dev = dev_get_by_name(dev_name);
+
+	if (dev) {
+		if (dev->priv_flags & IFF_802_1Q_VLAN) {
+			/* see if a priority mapping exists.. */
+			VLAN_DEV_INFO(dev)->ingress_priority_map[vlan_prio & 0x7] = skb_prio;
+			dev_put(dev);
+			return 0;
+		}
+
+		dev_put(dev);
+	}
+	return -EINVAL;
+}
+
+int vlan_dev_set_egress_priority(char *dev_name, __u32 skb_prio, short vlan_prio)
+{
+	struct net_device *dev = dev_get_by_name(dev_name);
+	struct vlan_priority_tci_mapping *mp = NULL;
+	struct vlan_priority_tci_mapping *np;
+   
+	if (dev) {
+		if (dev->priv_flags & IFF_802_1Q_VLAN) {
+			/* See if a priority mapping exists.. */
+			mp = VLAN_DEV_INFO(dev)->egress_priority_map[skb_prio & 0xF];
+			while (mp) {
+				if (mp->priority == skb_prio) {
+					mp->vlan_qos = ((vlan_prio << 13) & 0xE000);
+					dev_put(dev);
+					return 0;
+				}
+				mp = mp->next;
+			}
+
+			/* Create a new mapping then. */
+			mp = VLAN_DEV_INFO(dev)->egress_priority_map[skb_prio & 0xF];
+			np = kmalloc(sizeof(struct vlan_priority_tci_mapping), GFP_KERNEL);
+			if (np) {
+				np->next = mp;
+				np->priority = skb_prio;
+				np->vlan_qos = ((vlan_prio << 13) & 0xE000);
+				VLAN_DEV_INFO(dev)->egress_priority_map[skb_prio & 0xF] = np;
+				dev_put(dev);
+				return 0;
+			} else {
+				dev_put(dev);
+				return -ENOBUFS;
+			}
+		}
+		dev_put(dev);
+	}
+	return -EINVAL;
+}
+
+/* Flags are defined in the vlan_dev_info class in include/linux/if_vlan.h file. */
+int vlan_dev_set_vlan_flag(char *dev_name, __u32 flag, short flag_val)
+{
+	struct net_device *dev = dev_get_by_name(dev_name);
+
+	if (dev) {
+		if (dev->priv_flags & IFF_802_1Q_VLAN) {
+			/* verify flag is supported */
+			if (flag == 1) {
+				if (flag_val) {
+					VLAN_DEV_INFO(dev)->flags |= 1;
+				} else {
+					VLAN_DEV_INFO(dev)->flags &= ~1;
+				}
+				dev_put(dev);
+				return 0;
+			} else {
+				printk(KERN_ERR  "%s: flag %i is not valid.\n",
+					__FUNCTION__, (int)(flag));
+				dev_put(dev);
+				return -EINVAL;
+			}
+		} else {
+			printk(KERN_ERR 
+			       "%s: %s is not a vlan device, priv_flags: %hX.\n",
+			       __FUNCTION__, dev->name, dev->priv_flags);
+			dev_put(dev);
+		}
+	} else {
+		printk(KERN_ERR  "%s: Could not find device: %s\n", 
+			__FUNCTION__, dev_name);
+	}
+
+	return -EINVAL;
+}
+
+
+int vlan_dev_get_realdev_name(const char *dev_name, char* result)
+{
+	struct net_device *dev = dev_get_by_name(dev_name);
+	int rv = 0;
+	if (dev) {
+		if (dev->priv_flags & IFF_802_1Q_VLAN) {
+			strncpy(result, VLAN_DEV_INFO(dev)->real_dev->name, 23);
+			rv = 0;
+		} else {
+			rv = -EINVAL;
+		}
+		dev_put(dev);
+	} else {
+		rv = -ENODEV;
+	}
+	return rv;
+}
+
+int vlan_dev_get_vid(const char *dev_name, unsigned short* result)
+{
+	struct net_device *dev = dev_get_by_name(dev_name);
+	int rv = 0;
+	if (dev) {
+		if (dev->priv_flags & IFF_802_1Q_VLAN) {
+			*result = VLAN_DEV_INFO(dev)->vlan_id;
+			rv = 0;
+		} else {
+			rv = -EINVAL;
+		}
+		dev_put(dev);
+	} else {
+		rv = -ENODEV;
+	}
+	return rv;
+}
+
+
+int vlan_dev_set_mac_address(struct net_device *dev, void *addr_struct_p)
+{
+	struct sockaddr *addr = (struct sockaddr *)(addr_struct_p);
+	int i;
+
+	if (netif_running(dev))
+		return -EBUSY;
+
+	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+
+	printk("%s: Setting MAC address to ", dev->name);
+	for (i = 0; i < 6; i++)
+		printk(" %2.2x", dev->dev_addr[i]);
+	printk(".\n");
+
+	if (memcmp(VLAN_DEV_INFO(dev)->real_dev->dev_addr,
+		   dev->dev_addr,
+		   dev->addr_len) != 0) {
+		if (!(VLAN_DEV_INFO(dev)->real_dev->flags & IFF_PROMISC)) {
+			int flgs = VLAN_DEV_INFO(dev)->real_dev->flags;
+
+			/* Increment our in-use promiscuity counter */
+			dev_set_promiscuity(VLAN_DEV_INFO(dev)->real_dev, 1);
+
+			/* Make PROMISC visible to the user. */
+			flgs |= IFF_PROMISC;
+			printk("VLAN (%s):  Setting underlying device (%s) to promiscious mode.\n",
+			       dev->name, VLAN_DEV_INFO(dev)->real_dev->name);
+			dev_change_flags(VLAN_DEV_INFO(dev)->real_dev, flgs);
+		}
+	} else {
+		printk("VLAN (%s):  Underlying device (%s) has same MAC, not checking promiscious mode.\n",
+		       dev->name, VLAN_DEV_INFO(dev)->real_dev->name);
+	}
+
+	return 0;
+}
+
+static inline int vlan_dmi_equals(struct dev_mc_list *dmi1,
+                                  struct dev_mc_list *dmi2)
+{
+	return ((dmi1->dmi_addrlen == dmi2->dmi_addrlen) &&
+		(memcmp(dmi1->dmi_addr, dmi2->dmi_addr, dmi1->dmi_addrlen) == 0));
+}
+
+/** dmi is a single entry into a dev_mc_list, a single node.  mc_list is
+ *  an entire list, and we'll iterate through it.
+ */
+static int vlan_should_add_mc(struct dev_mc_list *dmi, struct dev_mc_list *mc_list)
+{
+	struct dev_mc_list *idmi;
+
+	for (idmi = mc_list; idmi != NULL; ) {
+		if (vlan_dmi_equals(dmi, idmi)) {
+			if (dmi->dmi_users > idmi->dmi_users)
+				return 1;
+			else
+				return 0;
+		} else {
+			idmi = idmi->next;
+		}
+	}
+
+	return 1;
+}
+
+static inline void vlan_destroy_mc_list(struct dev_mc_list *mc_list)
+{
+	struct dev_mc_list *dmi = mc_list;
+	struct dev_mc_list *next;
+
+	while(dmi) {
+		next = dmi->next;
+		kfree(dmi);
+		dmi = next;
+	}
+}
+
+static void vlan_copy_mc_list(struct dev_mc_list *mc_list, struct vlan_dev_info *vlan_info)
+{
+	struct dev_mc_list *dmi, *new_dmi;
+
+	vlan_destroy_mc_list(vlan_info->old_mc_list);
+	vlan_info->old_mc_list = NULL;
+
+	for (dmi = mc_list; dmi != NULL; dmi = dmi->next) {
+		new_dmi = kmalloc(sizeof(*new_dmi), GFP_ATOMIC);
+		if (new_dmi == NULL) {
+			printk(KERN_ERR "vlan: cannot allocate memory. "
+			       "Multicast may not work properly from now.\n");
+			return;
+		}
+
+		/* Copy whole structure, then make new 'next' pointer */
+		*new_dmi = *dmi;
+		new_dmi->next = vlan_info->old_mc_list;
+		vlan_info->old_mc_list = new_dmi;
+	}
+}
+
+static void vlan_flush_mc_list(struct net_device *dev)
+{
+	struct dev_mc_list *dmi = dev->mc_list;
+
+	while (dmi) {
+		printk(KERN_DEBUG "%s: del %.2x:%.2x:%.2x:%.2x:%.2x:%.2x mcast address from vlan interface\n",
+		       dev->name,
+		       dmi->dmi_addr[0],
+		       dmi->dmi_addr[1],
+		       dmi->dmi_addr[2],
+		       dmi->dmi_addr[3],
+		       dmi->dmi_addr[4],
+		       dmi->dmi_addr[5]);
+		dev_mc_delete(dev, dmi->dmi_addr, dmi->dmi_addrlen, 0);
+		dmi = dev->mc_list;
+	}
+
+	/* dev->mc_list is NULL by the time we get here. */
+	vlan_destroy_mc_list(VLAN_DEV_INFO(dev)->old_mc_list);
+	VLAN_DEV_INFO(dev)->old_mc_list = NULL;
+}
+
+int vlan_dev_open(struct net_device *dev)
+{
+	if (!(VLAN_DEV_INFO(dev)->real_dev->flags & IFF_UP))
+		return -ENETDOWN;
+
+	return 0;
+}
+
+int vlan_dev_stop(struct net_device *dev)
+{
+	vlan_flush_mc_list(dev);
+	return 0;
+}
+
+int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+	struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev;
+	struct ifreq ifrr;
+	int err = -EOPNOTSUPP;
+
+	strncpy(ifrr.ifr_name, real_dev->name, IFNAMSIZ);
+	ifrr.ifr_ifru = ifr->ifr_ifru;
+
+	switch(cmd) {
+	case SIOCGMIIPHY:
+	case SIOCGMIIREG:
+	case SIOCSMIIREG:
+		if (real_dev->do_ioctl && netif_device_present(real_dev)) 
+			err = real_dev->do_ioctl(real_dev, &ifrr, cmd);
+		break;
+
+	case SIOCETHTOOL:
+		err = dev_ethtool(&ifrr);
+	}
+
+	if (!err) 
+		ifr->ifr_ifru = ifrr.ifr_ifru;
+
+	return err;
+}
+
+/** Taken from Gleb + Lennert's VLAN code, and modified... */
+void vlan_dev_set_multicast_list(struct net_device *vlan_dev)
+{
+	struct dev_mc_list *dmi;
+	struct net_device *real_dev;
+	int inc;
+
+	if (vlan_dev && (vlan_dev->priv_flags & IFF_802_1Q_VLAN)) {
+		/* Then it's a real vlan device, as far as we can tell.. */
+		real_dev = VLAN_DEV_INFO(vlan_dev)->real_dev;
+
+		/* compare the current promiscuity to the last promisc we had.. */
+		inc = vlan_dev->promiscuity - VLAN_DEV_INFO(vlan_dev)->old_promiscuity;
+		if (inc) {
+			printk(KERN_INFO "%s: dev_set_promiscuity(master, %d)\n",
+			       vlan_dev->name, inc);
+			dev_set_promiscuity(real_dev, inc); /* found in dev.c */
+			VLAN_DEV_INFO(vlan_dev)->old_promiscuity = vlan_dev->promiscuity;
+		}
+
+		inc = vlan_dev->allmulti - VLAN_DEV_INFO(vlan_dev)->old_allmulti;
+		if (inc) {
+			printk(KERN_INFO "%s: dev_set_allmulti(master, %d)\n",
+			       vlan_dev->name, inc);
+			dev_set_allmulti(real_dev, inc); /* dev.c */
+			VLAN_DEV_INFO(vlan_dev)->old_allmulti = vlan_dev->allmulti;
+		}
+
+		/* looking for addresses to add to master's list */
+		for (dmi = vlan_dev->mc_list; dmi != NULL; dmi = dmi->next) {
+			if (vlan_should_add_mc(dmi, VLAN_DEV_INFO(vlan_dev)->old_mc_list)) {
+				dev_mc_add(real_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0);
+				printk(KERN_DEBUG "%s: add %.2x:%.2x:%.2x:%.2x:%.2x:%.2x mcast address to master interface\n",
+				       vlan_dev->name,
+				       dmi->dmi_addr[0],
+				       dmi->dmi_addr[1],
+				       dmi->dmi_addr[2],
+				       dmi->dmi_addr[3],
+				       dmi->dmi_addr[4],
+				       dmi->dmi_addr[5]);
+			}
+		}
+
+		/* looking for addresses to delete from master's list */
+		for (dmi = VLAN_DEV_INFO(vlan_dev)->old_mc_list; dmi != NULL; dmi = dmi->next) {
+			if (vlan_should_add_mc(dmi, vlan_dev->mc_list)) {
+				/* if we think we should add it to the new list, then we should really
+				 * delete it from the real list on the underlying device.
+				 */
+				dev_mc_delete(real_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0);
+				printk(KERN_DEBUG "%s: del %.2x:%.2x:%.2x:%.2x:%.2x:%.2x mcast address from master interface\n",
+				       vlan_dev->name,
+				       dmi->dmi_addr[0],
+				       dmi->dmi_addr[1],
+				       dmi->dmi_addr[2],
+				       dmi->dmi_addr[3],
+				       dmi->dmi_addr[4],
+				       dmi->dmi_addr[5]);
+			}
+		}
+
+		/* save multicast list */
+		vlan_copy_mc_list(vlan_dev->mc_list, VLAN_DEV_INFO(vlan_dev));
+	}
+}
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
new file mode 100644
index 00000000000..c32d27af0a3
--- /dev/null
+++ b/net/8021q/vlanproc.c
@@ -0,0 +1,357 @@
+/******************************************************************************
+ * vlanproc.c	VLAN Module. /proc filesystem interface.
+ *
+ *		This module is completely hardware-independent and provides
+ *		access to the router using Linux /proc filesystem.
+ *
+ * Author:	Ben Greear, <greearb@candelatech.com> coppied from wanproc.c
+ *               by: Gene Kozin	<genek@compuserve.com>
+ *
+ * Copyright:	(c) 1998 Ben Greear
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ * ============================================================================
+ * Jan 20, 1998        Ben Greear     Initial Version
+ *****************************************************************************/
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/stddef.h>	/* offsetof(), etc. */
+#include <linux/errno.h>	/* return codes */
+#include <linux/kernel.h>
+#include <linux/slab.h>		/* kmalloc(), kfree() */
+#include <linux/mm.h>		/* verify_area(), etc. */
+#include <linux/string.h>	/* inline mem*, str* functions */
+#include <linux/init.h>		/* __initfunc et al. */
+#include <asm/byteorder.h>	/* htons(), etc. */
+#include <asm/uaccess.h>	/* copy_to_user */
+#include <asm/io.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/fs.h>
+#include <linux/netdevice.h>
+#include <linux/if_vlan.h>
+#include "vlanproc.h"
+#include "vlan.h"
+
+/****** Function Prototypes *************************************************/
+
+/* Methods for preparing data for reading proc entries */
+static int vlan_seq_show(struct seq_file *seq, void *v);
+static void *vlan_seq_start(struct seq_file *seq, loff_t *pos);
+static void *vlan_seq_next(struct seq_file *seq, void *v, loff_t *pos);
+static void vlan_seq_stop(struct seq_file *seq, void *);
+static int vlandev_seq_show(struct seq_file *seq, void *v);
+
+/*
+ *	Global Data
+ */
+
+
+/*
+ *	Names of the proc directory entries 
+ */
+
+static const char name_root[]	 = "vlan";
+static const char name_conf[]	 = "config";
+
+/*
+ *	Structures for interfacing with the /proc filesystem.
+ *	VLAN creates its own directory /proc/net/vlan with the folowing
+ *	entries:
+ *	config		device status/configuration
+ *	<device>	entry for each  device
+ */
+
+/*
+ *	Generic /proc/net/vlan/<file> file and inode operations 
+ */
+
+static struct seq_operations vlan_seq_ops = {
+	.start = vlan_seq_start,
+	.next = vlan_seq_next,
+	.stop = vlan_seq_stop,
+	.show = vlan_seq_show,
+};
+
+static int vlan_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &vlan_seq_ops);
+}
+
+static struct file_operations vlan_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = vlan_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+};
+
+/*
+ *	/proc/net/vlan/<device> file and inode operations
+ */
+
+static int vlandev_seq_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, vlandev_seq_show, PDE(inode)->data);
+}
+
+static struct file_operations vlandev_fops = {
+	.owner = THIS_MODULE,
+	.open    = vlandev_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = single_release,
+};
+
+/*
+ * Proc filesystem derectory entries.
+ */
+
+/*
+ *	/proc/net/vlan 
+ */
+
+static struct proc_dir_entry *proc_vlan_dir;
+
+/*
+ *	/proc/net/vlan/config 
+ */
+
+static struct proc_dir_entry *proc_vlan_conf;
+
+/* Strings */
+static const char *vlan_name_type_str[VLAN_NAME_TYPE_HIGHEST] = {
+    [VLAN_NAME_TYPE_RAW_PLUS_VID]       = "VLAN_NAME_TYPE_RAW_PLUS_VID",
+    [VLAN_NAME_TYPE_PLUS_VID_NO_PAD]	= "VLAN_NAME_TYPE_PLUS_VID_NO_PAD",
+    [VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD]= "VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD",
+    [VLAN_NAME_TYPE_PLUS_VID]		= "VLAN_NAME_TYPE_PLUS_VID",
+};
+/*
+ *	Interface functions
+ */
+
+/*
+ *	Clean up /proc/net/vlan entries
+ */
+
+void vlan_proc_cleanup(void)
+{
+	if (proc_vlan_conf)
+		remove_proc_entry(name_conf, proc_vlan_dir);
+
+	if (proc_vlan_dir)
+		proc_net_remove(name_root);
+
+	/* Dynamically added entries should be cleaned up as their vlan_device
+	 * is removed, so we should not have to take care of it here...
+	 */
+}
+
+/*
+ *	Create /proc/net/vlan entries
+ */
+
+int __init vlan_proc_init(void)
+{
+	proc_vlan_dir = proc_mkdir(name_root, proc_net);
+	if (proc_vlan_dir) {
+		proc_vlan_conf = create_proc_entry(name_conf,
+						   S_IFREG|S_IRUSR|S_IWUSR,
+						   proc_vlan_dir);
+		if (proc_vlan_conf) {
+			proc_vlan_conf->proc_fops = &vlan_fops;
+			return 0;
+		}
+	}
+	vlan_proc_cleanup();
+	return -ENOBUFS;
+}
+
+/*
+ *	Add directory entry for VLAN device.
+ */
+
+int vlan_proc_add_dev (struct net_device *vlandev)
+{
+	struct vlan_dev_info *dev_info = VLAN_DEV_INFO(vlandev);
+
+	if (!(vlandev->priv_flags & IFF_802_1Q_VLAN)) {
+		printk(KERN_ERR
+		       "ERROR:	vlan_proc_add, device -:%s:- is NOT a VLAN\n",
+		       vlandev->name);
+		return -EINVAL;
+	}
+
+	dev_info->dent = create_proc_entry(vlandev->name,
+					   S_IFREG|S_IRUSR|S_IWUSR,
+					   proc_vlan_dir);
+	if (!dev_info->dent)
+		return -ENOBUFS;
+
+	dev_info->dent->proc_fops = &vlandev_fops;
+	dev_info->dent->data = vlandev;
+
+#ifdef VLAN_DEBUG
+	printk(KERN_ERR "vlan_proc_add, device -:%s:- being added.\n",
+	       vlandev->name);
+#endif
+	return 0;
+}
+
+/*
+ *	Delete directory entry for VLAN device.
+ */
+int vlan_proc_rem_dev(struct net_device *vlandev)
+{
+	if (!vlandev) {
+		printk(VLAN_ERR "%s: invalid argument: %p\n",
+			__FUNCTION__, vlandev);
+		return -EINVAL;
+	}
+
+	if (!(vlandev->priv_flags & IFF_802_1Q_VLAN)) {
+		printk(VLAN_DBG "%s: invalid argument, device: %s is not a VLAN device, priv_flags: 0x%4hX.\n",
+			__FUNCTION__, vlandev->name, vlandev->priv_flags);
+		return -EINVAL;
+	}
+
+#ifdef VLAN_DEBUG
+	printk(VLAN_DBG "%s: dev: %p\n", __FUNCTION__, vlandev);
+#endif
+
+	/** NOTE:  This will consume the memory pointed to by dent, it seems. */
+	if (VLAN_DEV_INFO(vlandev)->dent) {
+		remove_proc_entry(VLAN_DEV_INFO(vlandev)->dent->name, proc_vlan_dir);
+		VLAN_DEV_INFO(vlandev)->dent = NULL;
+	}
+
+	return 0;
+}
+
+/****** Proc filesystem entry points ****************************************/
+
+/*
+ * The following few functions build the content of /proc/net/vlan/config
+ */
+
+/* starting at dev, find a VLAN device */
+static struct net_device *vlan_skip(struct net_device *dev) 
+{
+	while (dev && !(dev->priv_flags & IFF_802_1Q_VLAN)) 
+		dev = dev->next;
+
+	return dev;
+}
+
+/* start read of /proc/net/vlan/config */ 
+static void *vlan_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct net_device *dev;
+	loff_t i = 1;
+
+	read_lock(&dev_base_lock);
+
+	if (*pos == 0)
+		return SEQ_START_TOKEN;
+	
+	for (dev = vlan_skip(dev_base); dev && i < *pos; 
+	     dev = vlan_skip(dev->next), ++i);
+		
+	return  (i == *pos) ? dev : NULL;
+} 
+
+static void *vlan_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	++*pos;
+
+	return vlan_skip((v == SEQ_START_TOKEN)  
+			    ? dev_base 
+			    : ((struct net_device *)v)->next);
+}
+
+static void vlan_seq_stop(struct seq_file *seq, void *v)
+{
+	read_unlock(&dev_base_lock);
+}
+
+static int vlan_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN) {
+		const char *nmtype = NULL;
+
+		seq_puts(seq, "VLAN Dev name	 | VLAN ID\n");
+
+		if (vlan_name_type < ARRAY_SIZE(vlan_name_type_str))
+		    nmtype =  vlan_name_type_str[vlan_name_type];
+
+		seq_printf(seq, "Name-Type: %s\n", 
+			   nmtype ? nmtype :  "UNKNOWN" );
+	} else {
+		const struct net_device *vlandev = v;
+		const struct vlan_dev_info *dev_info = VLAN_DEV_INFO(vlandev);
+
+		seq_printf(seq, "%-15s| %d  | %s\n",  vlandev->name,  
+			   dev_info->vlan_id,    dev_info->real_dev->name);
+	}
+	return 0;
+}
+
+static int vlandev_seq_show(struct seq_file *seq, void *offset)
+{
+	struct net_device *vlandev = (struct net_device *) seq->private;
+	const struct vlan_dev_info *dev_info = VLAN_DEV_INFO(vlandev);
+	struct net_device_stats *stats;
+	static const char fmt[] = "%30s %12lu\n";
+	int i;
+
+	if ((vlandev == NULL) || (!(vlandev->priv_flags & IFF_802_1Q_VLAN)))
+		return 0;
+
+	seq_printf(seq, "%s  VID: %d	 REORDER_HDR: %i  dev->priv_flags: %hx\n",
+		       vlandev->name, dev_info->vlan_id,
+		       (int)(dev_info->flags & 1), vlandev->priv_flags);
+
+
+	stats = vlan_dev_get_stats(vlandev);
+
+	seq_printf(seq, fmt, "total frames received", stats->rx_packets);
+	seq_printf(seq, fmt, "total bytes received", stats->rx_bytes);
+	seq_printf(seq, fmt, "Broadcast/Multicast Rcvd", stats->multicast);
+	seq_puts(seq, "\n");
+	seq_printf(seq, fmt, "total frames transmitted", stats->tx_packets);
+	seq_printf(seq, fmt, "total bytes transmitted", stats->tx_bytes);
+	seq_printf(seq, fmt, "total headroom inc", 
+		   dev_info->cnt_inc_headroom_on_tx);
+	seq_printf(seq, fmt, "total encap on xmit", 
+		   dev_info->cnt_encap_on_xmit);
+	seq_printf(seq, "Device: %s", dev_info->real_dev->name);
+	/* now show all PRIORITY mappings relating to this VLAN */
+	seq_printf(seq, 
+		       "\nINGRESS priority mappings: 0:%lu  1:%lu  2:%lu  3:%lu  4:%lu  5:%lu  6:%lu 7:%lu\n",
+		       dev_info->ingress_priority_map[0],
+		       dev_info->ingress_priority_map[1],
+		       dev_info->ingress_priority_map[2],
+		       dev_info->ingress_priority_map[3],
+		       dev_info->ingress_priority_map[4],
+		       dev_info->ingress_priority_map[5],
+		       dev_info->ingress_priority_map[6],
+		       dev_info->ingress_priority_map[7]);
+
+	seq_printf(seq, "EGRESSS priority Mappings: ");
+	for (i = 0; i < 16; i++) {
+		const struct vlan_priority_tci_mapping *mp
+			= dev_info->egress_priority_map[i];
+		while (mp) {
+			seq_printf(seq, "%lu:%hu ",
+				   mp->priority, ((mp->vlan_qos >> 13) & 0x7));
+			mp = mp->next;
+		}
+	}
+	seq_puts(seq, "\n");
+
+	return 0;
+}
diff --git a/net/8021q/vlanproc.h b/net/8021q/vlanproc.h
new file mode 100644
index 00000000000..f908ee332fd
--- /dev/null
+++ b/net/8021q/vlanproc.h
@@ -0,0 +1,19 @@
+#ifndef __BEN_VLAN_PROC_INC__
+#define __BEN_VLAN_PROC_INC__
+
+#ifdef CONFIG_PROC_FS
+int vlan_proc_init(void);
+int vlan_proc_rem_dev(struct net_device *vlandev);
+int vlan_proc_add_dev (struct net_device *vlandev);
+void vlan_proc_cleanup (void);
+
+#else /* No CONFIG_PROC_FS */
+
+#define vlan_proc_init()	(0)
+#define vlan_proc_cleanup()	do {} while(0)
+#define vlan_proc_add_dev(dev)	({(void)(dev), 0;})
+#define vlan_proc_rem_dev(dev)	({(void)(dev), 0;})
+
+#endif
+
+#endif /* !(__BEN_VLAN_PROC_INC__) */
diff --git a/net/Kconfig b/net/Kconfig
new file mode 100644
index 00000000000..9251b28e8d5
--- /dev/null
+++ b/net/Kconfig
@@ -0,0 +1,646 @@
+#
+# Network configuration
+#
+
+menu "Networking support"
+
+config NET
+	bool "Networking support"
+	---help---
+	  Unless you really know what you are doing, you should say Y here.
+	  The reason is that some programs need kernel networking support even
+	  when running on a stand-alone machine that isn't connected to any
+	  other computer. If you are upgrading from an older kernel, you
+	  should consider updating your networking tools too because changes
+	  in the kernel and the tools often go hand in hand. The tools are
+	  contained in the package net-tools, the location and version number
+	  of which are given in <file:Documentation/Changes>.
+
+	  For a general introduction to Linux networking, it is highly
+	  recommended to read the NET-HOWTO, available from
+	  <http://www.tldp.org/docs.html#howto>.
+
+menu "Networking options"
+	depends on NET
+
+config PACKET
+	tristate "Packet socket"
+	---help---
+	  The Packet protocol is used by applications which communicate
+	  directly with network devices without an intermediate network
+	  protocol implemented in the kernel, e.g. tcpdump.  If you want them
+	  to work, choose Y.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called af_packet.
+
+	  If unsure, say Y.
+
+config PACKET_MMAP
+	bool "Packet socket: mmapped IO"
+	depends on PACKET
+	help
+	  If you say Y here, the Packet protocol driver will use an IO
+	  mechanism that results in faster communication.
+
+	  If unsure, say N.
+
+config UNIX
+	tristate "Unix domain sockets"
+	---help---
+	  If you say Y here, you will include support for Unix domain sockets;
+	  sockets are the standard Unix mechanism for establishing and
+	  accessing network connections.  Many commonly used programs such as
+	  the X Window system and syslog use these sockets even if your
+	  machine is not connected to any network.  Unless you are working on
+	  an embedded system or something similar, you therefore definitely
+	  want to say Y here.
+
+	  To compile this driver as a module, choose M here: the module will be
+	  called unix.  Note that several important services won't work
+	  correctly if you say M here and then neglect to load the module.
+
+	  Say Y unless you know what you are doing.
+
+config NET_KEY
+	tristate "PF_KEY sockets"
+	select XFRM
+	---help---
+	  PF_KEYv2 socket family, compatible to KAME ones.
+	  They are required if you are going to use IPsec tools ported
+	  from KAME.
+
+	  Say Y unless you know what you are doing.
+
+config INET
+	bool "TCP/IP networking"
+	---help---
+	  These are the protocols used on the Internet and on most local
+	  Ethernets. It is highly recommended to say Y here (this will enlarge
+	  your kernel by about 144 KB), since some programs (e.g. the X window
+	  system) use TCP/IP even if your machine is not connected to any
+	  other computer. You will get the so-called loopback device which
+	  allows you to ping yourself (great fun, that!).
+
+	  For an excellent introduction to Linux networking, please read the
+	  Linux Networking HOWTO, available from
+	  <http://www.tldp.org/docs.html#howto>.
+
+	  If you say Y here and also to "/proc file system support" and
+	  "Sysctl support" below, you can change various aspects of the
+	  behavior of the TCP/IP code by writing to the (virtual) files in
+	  /proc/sys/net/ipv4/*; the options are explained in the file
+	  <file:Documentation/networking/ip-sysctl.txt>.
+
+	  Short answer: say Y.
+
+source "net/ipv4/Kconfig"
+
+#   IPv6 as module will cause a CRASH if you try to unload it
+config IPV6
+	tristate "The IPv6 protocol"
+	depends on INET
+	default m
+	select CRYPTO if IPV6_PRIVACY
+	select CRYPTO_MD5 if IPV6_PRIVACY
+	---help---
+	  This is complemental support for the IP version 6.
+	  You will still be able to do traditional IPv4 networking as well.
+
+	  For general information about IPv6, see
+	  <http://playground.sun.com/pub/ipng/html/ipng-main.html>.
+	  For Linux IPv6 development information, see <http://www.linux-ipv6.org>.
+	  For specific information about IPv6 under Linux, read the HOWTO at
+	  <http://www.bieringer.de/linux/IPv6/>.
+
+	  To compile this protocol support as a module, choose M here: the 
+	  module will be called ipv6.
+
+source "net/ipv6/Kconfig"
+
+menuconfig NETFILTER
+	bool "Network packet filtering (replaces ipchains)"
+	---help---
+	  Netfilter is a framework for filtering and mangling network packets
+	  that pass through your Linux box.
+
+	  The most common use of packet filtering is to run your Linux box as
+	  a firewall protecting a local network from the Internet. The type of
+	  firewall provided by this kernel support is called a "packet
+	  filter", which means that it can reject individual network packets
+	  based on type, source, destination etc. The other kind of firewall,
+	  a "proxy-based" one, is more secure but more intrusive and more
+	  bothersome to set up; it inspects the network traffic much more
+	  closely, modifies it and has knowledge about the higher level
+	  protocols, which a packet filter lacks. Moreover, proxy-based
+	  firewalls often require changes to the programs running on the local
+	  clients. Proxy-based firewalls don't need support by the kernel, but
+	  they are often combined with a packet filter, which only works if
+	  you say Y here.
+
+	  You should also say Y here if you intend to use your Linux box as
+	  the gateway to the Internet for a local network of machines without
+	  globally valid IP addresses. This is called "masquerading": if one
+	  of the computers on your local network wants to send something to
+	  the outside, your box can "masquerade" as that computer, i.e. it
+	  forwards the traffic to the intended outside destination, but
+	  modifies the packets to make it look like they came from the
+	  firewall box itself. It works both ways: if the outside host
+	  replies, the Linux box will silently forward the traffic to the
+	  correct local computer. This way, the computers on your local net
+	  are completely invisible to the outside world, even though they can
+	  reach the outside and can receive replies. It is even possible to
+	  run globally visible servers from within a masqueraded local network
+	  using a mechanism called portforwarding. Masquerading is also often
+	  called NAT (Network Address Translation).
+
+	  Another use of Netfilter is in transparent proxying: if a machine on
+	  the local network tries to connect to an outside host, your Linux
+	  box can transparently forward the traffic to a local server,
+	  typically a caching proxy server.
+
+	  Yet another use of Netfilter is building a bridging firewall. Using
+	  a bridge with Network packet filtering enabled makes iptables "see"
+	  the bridged traffic. For filtering on the lower network and Ethernet
+	  protocols over the bridge, use ebtables (under bridge netfilter
+	  configuration).
+
+	  Various modules exist for netfilter which replace the previous
+	  masquerading (ipmasqadm), packet filtering (ipchains), transparent
+	  proxying, and portforwarding mechanisms. Please see
+	  <file:Documentation/Changes> under "iptables" for the location of
+	  these packages.
+
+	  Make sure to say N to "Fast switching" below if you intend to say Y
+	  here, as Fast switching currently bypasses netfilter.
+
+	  Chances are that you should say Y here if you compile a kernel which
+	  will run as a router and N for regular hosts. If unsure, say N.
+
+if NETFILTER
+
+config NETFILTER_DEBUG
+	bool "Network packet filtering debugging"
+	depends on NETFILTER
+	help
+	  You can say Y here if you want to get additional messages useful in
+	  debugging the netfilter code.
+
+config BRIDGE_NETFILTER
+	bool "Bridged IP/ARP packets filtering"
+	depends on BRIDGE && NETFILTER && INET
+	default y
+	---help---
+	  Enabling this option will let arptables resp. iptables see bridged
+	  ARP resp. IP traffic. If you want a bridging firewall, you probably
+	  want this option enabled.
+	  Enabling or disabling this option doesn't enable or disable
+	  ebtables.
+
+	  If unsure, say N.
+
+source "net/ipv4/netfilter/Kconfig"
+source "net/ipv6/netfilter/Kconfig"
+source "net/decnet/netfilter/Kconfig"
+source "net/bridge/netfilter/Kconfig"
+
+endif
+
+config XFRM
+       bool
+       depends on NET
+
+source "net/xfrm/Kconfig"
+
+source "net/sctp/Kconfig"
+
+config ATM
+	tristate "Asynchronous Transfer Mode (ATM) (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	---help---
+	  ATM is a high-speed networking technology for Local Area Networks
+	  and Wide Area Networks.  It uses a fixed packet size and is
+	  connection oriented, allowing for the negotiation of minimum
+	  bandwidth requirements.
+
+	  In order to participate in an ATM network, your Linux box needs an
+	  ATM networking card. If you have that, say Y here and to the driver
+	  of your ATM card below.
+
+	  Note that you need a set of user-space programs to actually make use
+	  of ATM.  See the file <file:Documentation/networking/atm.txt> for
+	  further details.
+
+config ATM_CLIP
+	tristate "Classical IP over ATM (EXPERIMENTAL)"
+	depends on ATM && INET
+	help
+	  Classical IP over ATM for PVCs and SVCs, supporting InARP and
+	  ATMARP. If you want to communication with other IP hosts on your ATM
+	  network, you will typically either say Y here or to "LAN Emulation
+	  (LANE)" below.
+
+config ATM_CLIP_NO_ICMP
+	bool "Do NOT send ICMP if no neighbour (EXPERIMENTAL)"
+	depends on ATM_CLIP
+	help
+	  Normally, an "ICMP host unreachable" message is sent if a neighbour
+	  cannot be reached because there is no VC to it in the kernel's
+	  ATMARP table. This may cause problems when ATMARP table entries are
+	  briefly removed during revalidation. If you say Y here, packets to
+	  such neighbours are silently discarded instead.
+
+config ATM_LANE
+	tristate "LAN Emulation (LANE) support (EXPERIMENTAL)"
+	depends on ATM
+	help
+	  LAN Emulation emulates services of existing LANs across an ATM
+	  network. Besides operating as a normal ATM end station client, Linux
+	  LANE client can also act as an proxy client bridging packets between
+	  ELAN and Ethernet segments. You need LANE if you want to try MPOA.
+
+config ATM_MPOA
+	tristate "Multi-Protocol Over ATM (MPOA) support (EXPERIMENTAL)"
+	depends on ATM && INET && ATM_LANE!=n
+	help
+	  Multi-Protocol Over ATM allows ATM edge devices such as routers,
+	  bridges and ATM attached hosts establish direct ATM VCs across
+	  subnetwork boundaries. These shortcut connections bypass routers
+	  enhancing overall network performance.
+
+config ATM_BR2684
+	tristate "RFC1483/2684 Bridged protocols"
+	depends on ATM && INET
+	help
+	  ATM PVCs can carry ethernet PDUs according to rfc2684 (formerly 1483)
+	  This device will act like an ethernet from the kernels point of view,
+	  with the traffic being carried by ATM PVCs (currently 1 PVC/device).
+	  This is sometimes used over DSL lines.  If in doubt, say N.
+
+config ATM_BR2684_IPFILTER
+	bool "Per-VC IP filter kludge"
+	depends on ATM_BR2684
+	help
+	  This is an experimental mechanism for users who need to terminating a
+	  large number of IP-only vcc's.  Do not enable this unless you are sure
+	  you know what you are doing.
+
+config BRIDGE
+	tristate "802.1d Ethernet Bridging"
+	---help---
+	  If you say Y here, then your Linux box will be able to act as an
+	  Ethernet bridge, which means that the different Ethernet segments it
+	  is connected to will appear as one Ethernet to the participants.
+	  Several such bridges can work together to create even larger
+	  networks of Ethernets using the IEEE 802.1 spanning tree algorithm.
+	  As this is a standard, Linux bridges will cooperate properly with
+	  other third party bridge products.
+
+	  In order to use the Ethernet bridge, you'll need the bridge
+	  configuration tools; see <file:Documentation/networking/bridge.txt>
+	  for location. Please read the Bridge mini-HOWTO for more
+	  information.
+
+	  If you enable iptables support along with the bridge support then you
+	  turn your bridge into a bridging IP firewall.
+	  iptables will then see the IP packets being bridged, so you need to
+	  take this into account when setting up your firewall rules.
+	  Enabling arptables support when bridging will let arptables see
+	  bridged ARP traffic in the arptables FORWARD chain.
+
+	  To compile this code as a module, choose M here: the module
+	  will be called bridge.
+
+	  If unsure, say N.
+
+config VLAN_8021Q
+	tristate "802.1Q VLAN Support"
+	---help---
+	  Select this and you will be able to create 802.1Q VLAN interfaces
+	  on your ethernet interfaces.  802.1Q VLAN supports almost
+	  everything a regular ethernet interface does, including
+	  firewalling, bridging, and of course IP traffic.  You will need
+	  the 'vconfig' tool from the VLAN project in order to effectively
+	  use VLANs.  See the VLAN web page for more information:
+	  <http://www.candelatech.com/~greear/vlan.html>
+
+	  To compile this code as a module, choose M here: the module
+	  will be called 8021q.
+
+	  If unsure, say N.
+
+config DECNET
+	tristate "DECnet Support"
+	---help---
+	  The DECnet networking protocol was used in many products made by
+	  Digital (now Compaq).  It provides reliable stream and sequenced
+	  packet communications over which run a variety of services similar
+	  to those which run over TCP/IP.
+
+	  To find some tools to use with the kernel layer support, please
+	  look at Patrick Caulfield's web site:
+	  <http://linux-decnet.sourceforge.net/>.
+
+	  More detailed documentation is available in
+	  <file:Documentation/networking/decnet.txt>.
+
+	  Be sure to say Y to "/proc file system support" and "Sysctl support"
+	  below when using DECnet, since you will need sysctl support to aid
+	  in configuration at run time.
+
+	  The DECnet code is also available as a module ( = code which can be
+	  inserted in and removed from the running kernel whenever you want).
+	  The module is called decnet.
+
+source "net/decnet/Kconfig"
+
+source "net/llc/Kconfig"
+
+config IPX
+	tristate "The IPX protocol"
+	select LLC
+	---help---
+	  This is support for the Novell networking protocol, IPX, commonly
+	  used for local networks of Windows machines.  You need it if you
+	  want to access Novell NetWare file or print servers using the Linux
+	  Novell client ncpfs (available from
+	  <ftp://platan.vc.cvut.cz/pub/linux/ncpfs/>) or from
+	  within the Linux DOS emulator DOSEMU (read the DOSEMU-HOWTO,
+	  available from <http://www.tldp.org/docs.html#howto>).  In order
+	  to do the former, you'll also have to say Y to "NCP file system
+	  support", below.
+
+	  IPX is similar in scope to IP, while SPX, which runs on top of IPX,
+	  is similar to TCP. There is also experimental support for SPX in
+	  Linux (see "SPX networking", below).
+
+	  To turn your Linux box into a fully featured NetWare file server and
+	  IPX router, say Y here and fetch either lwared from
+	  <ftp://ibiblio.org/pub/Linux/system/network/daemons/> or
+	  mars_nwe from <ftp://www.compu-art.de/mars_nwe/>. For more
+	  information, read the IPX-HOWTO available from
+	  <http://www.tldp.org/docs.html#howto>.
+
+	  General information about how to connect Linux, Windows machines and
+	  Macs is on the WWW at <http://www.eats.com/linux_mac_win.html>.
+
+	  The IPX driver would enlarge your kernel by about 16 KB. To compile
+	  this driver as a module, choose M here: the module will be called ipx.
+	  Unless you want to integrate your Linux box with a local Novell
+	  network, say N.
+
+source "net/ipx/Kconfig"
+
+config ATALK
+	tristate "Appletalk protocol support"
+	select LLC
+	---help---
+	  AppleTalk is the protocol that Apple computers can use to communicate
+	  on a network.  If your Linux box is connected to such a network and you
+	  wish to connect to it, say Y.  You will need to use the netatalk package
+	  so that your Linux box can act as a print and file server for Macs as
+	  well as access AppleTalk printers.  Check out
+	  <http://www.zettabyte.net/netatalk/> on the WWW for details.
+	  EtherTalk is the name used for AppleTalk over Ethernet and the
+	  cheaper and slower LocalTalk is AppleTalk over a proprietary Apple
+	  network using serial links.  EtherTalk and LocalTalk are fully
+	  supported by Linux.
+
+	  General information about how to connect Linux, Windows machines and
+	  Macs is on the WWW at <http://www.eats.com/linux_mac_win.html>.  The
+	  NET-3-HOWTO, available from
+	  <http://www.tldp.org/docs.html#howto>, contains valuable
+	  information as well.
+
+	  To compile this driver as a module, choose M here: the module will be
+	  called appletalk. You almost certainly want to compile it as a
+	  module so you can restart your AppleTalk stack without rebooting
+	  your machine. I hear that the GNU boycott of Apple is over, so
+	  even politically correct people are allowed to say Y here.
+
+source "drivers/net/appletalk/Kconfig"
+
+config X25
+	tristate "CCITT X.25 Packet Layer (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	---help---
+	  X.25 is a set of standardized network protocols, similar in scope to
+	  frame relay; the one physical line from your box to the X.25 network
+	  entry point can carry several logical point-to-point connections
+	  (called "virtual circuits") to other computers connected to the X.25
+	  network. Governments, banks, and other organizations tend to use it
+	  to connect to each other or to form Wide Area Networks (WANs). Many
+	  countries have public X.25 networks. X.25 consists of two
+	  protocols: the higher level Packet Layer Protocol (PLP) (say Y here
+	  if you want that) and the lower level data link layer protocol LAPB
+	  (say Y to "LAPB Data Link Driver" below if you want that).
+
+	  You can read more about X.25 at <http://www.sangoma.com/x25.htm> and
+	  <http://www.cisco.com/univercd/cc/td/doc/product/software/ios11/cbook/cx25.htm>.
+	  Information about X.25 for Linux is contained in the files
+	  <file:Documentation/networking/x25.txt> and
+	  <file:Documentation/networking/x25-iface.txt>.
+
+	  One connects to an X.25 network either with a dedicated network card
+	  using the X.21 protocol (not yet supported by Linux) or one can do
+	  X.25 over a standard telephone line using an ordinary modem (say Y
+	  to "X.25 async driver" below) or over Ethernet using an ordinary
+	  Ethernet card and the LAPB over Ethernet (say Y to "LAPB Data Link
+	  Driver" and "LAPB over Ethernet driver" below).
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called x25. If unsure, say N.
+
+config LAPB
+	tristate "LAPB Data Link Driver (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	---help---
+	  Link Access Procedure, Balanced (LAPB) is the data link layer (i.e.
+	  the lower) part of the X.25 protocol. It offers a reliable
+	  connection service to exchange data frames with one other host, and
+	  it is used to transport higher level protocols (mostly X.25 Packet
+	  Layer, the higher part of X.25, but others are possible as well).
+	  Usually, LAPB is used with specialized X.21 network cards, but Linux
+	  currently supports LAPB only over Ethernet connections. If you want
+	  to use LAPB connections over Ethernet, say Y here and to "LAPB over
+	  Ethernet driver" below. Read
+	  <file:Documentation/networking/lapb-module.txt> for technical
+	  details.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called lapb.  If unsure, say N.
+
+config NET_DIVERT
+	bool "Frame Diverter (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	---help---
+	  The Frame Diverter allows you to divert packets from the
+	  network, that are not aimed at the interface receiving it (in
+	  promisc. mode). Typically, a Linux box setup as an Ethernet bridge
+	  with the Frames Diverter on, can do some *really* transparent www
+	  caching using a Squid proxy for example.
+
+	  This is very useful when you don't want to change your router's
+	  config (or if you simply don't have access to it).
+
+	  The other possible usages of diverting Ethernet Frames are
+	  numberous:
+	  - reroute smtp traffic to another interface
+	  - traffic-shape certain network streams
+	  - transparently proxy smtp connections
+	  - etc...
+
+	  For more informations, please refer to:
+	  <http://diverter.sourceforge.net/>
+	  <http://perso.wanadoo.fr/magpie/EtherDivert.html>
+
+	  If unsure, say N.
+
+config ECONET
+	tristate "Acorn Econet/AUN protocols (EXPERIMENTAL)"
+	depends on EXPERIMENTAL && INET
+	---help---
+	  Econet is a fairly old and slow networking protocol mainly used by
+	  Acorn computers to access file and print servers. It uses native
+	  Econet network cards. AUN is an implementation of the higher level
+	  parts of Econet that runs over ordinary Ethernet connections, on
+	  top of the UDP packet protocol, which in turn runs on top of the
+	  Internet protocol IP.
+
+	  If you say Y here, you can choose with the next two options whether
+	  to send Econet/AUN traffic over a UDP Ethernet connection or over
+	  a native Econet network card.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called econet.
+
+config ECONET_AUNUDP
+	bool "AUN over UDP"
+	depends on ECONET
+	help
+	  Say Y here if you want to send Econet/AUN traffic over a UDP
+	  connection (UDP is a packet based protocol that runs on top of the
+	  Internet protocol IP) using an ordinary Ethernet network card.
+
+config ECONET_NATIVE
+	bool "Native Econet"
+	depends on ECONET
+	help
+	  Say Y here if you have a native Econet network card installed in
+	  your computer.
+
+config WAN_ROUTER
+	tristate "WAN router"
+	depends on EXPERIMENTAL
+	---help---
+	  Wide Area Networks (WANs), such as X.25, frame relay and leased
+	  lines, are used to interconnect Local Area Networks (LANs) over vast
+	  distances with data transfer rates significantly higher than those
+	  achievable with commonly used asynchronous modem connections.
+	  Usually, a quite expensive external device called a `WAN router' is
+	  needed to connect to a WAN.
+
+	  As an alternative, WAN routing can be built into the Linux kernel.
+	  With relatively inexpensive WAN interface cards available on the
+	  market, a perfectly usable router can be built for less than half
+	  the price of an external router.  If you have one of those cards and
+	  wish to use your Linux box as a WAN router, say Y here and also to
+	  the WAN driver for your card, below.  You will then need the
+	  wan-tools package which is available from <ftp://ftp.sangoma.com/>.
+	  Read <file:Documentation/networking/wan-router.txt> for more
+	  information.
+
+	  To compile WAN routing support as a module, choose M here: the
+	  module will be called wanrouter.
+
+	  If unsure, say N.
+
+menu "QoS and/or fair queueing"
+
+config NET_SCHED
+	bool "QoS and/or fair queueing"
+	---help---
+	  When the kernel has several packets to send out over a network
+	  device, it has to decide which ones to send first, which ones to
+	  delay, and which ones to drop. This is the job of the packet
+	  scheduler, and several different algorithms for how to do this
+	  "fairly" have been proposed.
+
+	  If you say N here, you will get the standard packet scheduler, which
+	  is a FIFO (first come, first served). If you say Y here, you will be
+	  able to choose from among several alternative algorithms which can
+	  then be attached to different network devices. This is useful for
+	  example if some of your network devices are real time devices that
+	  need a certain minimum data flow rate, or if you need to limit the
+	  maximum data flow rate for traffic which matches specified criteria.
+	  This code is considered to be experimental.
+
+	  To administer these schedulers, you'll need the user-level utilities
+	  from the package iproute2+tc at <ftp://ftp.tux.org/pub/net/ip-routing/>.
+	  That package also contains some documentation; for more, check out
+	  <http://snafu.freedom.org/linux2.2/iproute-notes.html>.
+
+	  This Quality of Service (QoS) support will enable you to use
+	  Differentiated Services (diffserv) and Resource Reservation Protocol
+	  (RSVP) on your Linux router if you also say Y to "QoS support",
+	  "Packet classifier API" and to some classifiers below. Documentation
+	  and software is at <http://diffserv.sourceforge.net/>.
+
+	  If you say Y here and to "/proc file system" below, you will be able
+	  to read status information about packet schedulers from the file
+	  /proc/net/psched.
+
+	  The available schedulers are listed in the following questions; you
+	  can say Y to as many as you like. If unsure, say N now.
+
+source "net/sched/Kconfig"
+
+endmenu
+
+menu "Network testing"
+
+config NET_PKTGEN
+	tristate "Packet Generator (USE WITH CAUTION)"
+	depends on PROC_FS
+	---help---
+	  This module will inject preconfigured packets, at a configurable
+	  rate, out of a given interface.  It is used for network interface
+	  stress testing and performance analysis.  If you don't understand
+	  what was just said, you don't need it: say N.
+
+	  Documentation on how to use the packet generator can be found
+	  at <file:Documentation/networking/pktgen.txt>.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called pktgen.
+
+endmenu
+
+endmenu
+
+config NETPOLL
+	def_bool NETCONSOLE
+
+config NETPOLL_RX
+	bool "Netpoll support for trapping incoming packets"
+	default n
+	depends on NETPOLL
+
+config NETPOLL_TRAP
+	bool "Netpoll traffic trapping"
+	default n
+	depends on NETPOLL
+
+config NET_POLL_CONTROLLER
+	def_bool NETPOLL
+
+source "net/ax25/Kconfig"
+
+source "net/irda/Kconfig"
+
+source "net/bluetooth/Kconfig"
+
+source "drivers/net/Kconfig"
+
+endmenu
+
diff --git a/net/Makefile b/net/Makefile
new file mode 100644
index 00000000000..8e2bdc025ab
--- /dev/null
+++ b/net/Makefile
@@ -0,0 +1,48 @@
+#
+# Makefile for the linux networking.
+#
+# 2 Sep 2000, Christoph Hellwig <hch@infradead.org>
+# Rewritten to use lists instead of if-statements.
+#
+
+obj-y	:= nonet.o
+
+obj-$(CONFIG_NET)		:= socket.o core/
+
+tmp-$(CONFIG_COMPAT) 		:= compat.o
+obj-$(CONFIG_NET)		+= $(tmp-y)
+
+# LLC has to be linked before the files in net/802/
+obj-$(CONFIG_LLC)		+= llc/
+obj-$(CONFIG_NET)		+= ethernet/ 802/ sched/ netlink/
+obj-$(CONFIG_INET)		+= ipv4/
+obj-$(CONFIG_XFRM)		+= xfrm/
+obj-$(CONFIG_UNIX)		+= unix/
+ifneq ($(CONFIG_IPV6),)
+obj-y				+= ipv6/
+endif
+obj-$(CONFIG_PACKET)		+= packet/
+obj-$(CONFIG_NET_KEY)		+= key/
+obj-$(CONFIG_NET_SCHED)		+= sched/
+obj-$(CONFIG_BRIDGE)		+= bridge/
+obj-$(CONFIG_IPX)		+= ipx/
+obj-$(CONFIG_ATALK)		+= appletalk/
+obj-$(CONFIG_WAN_ROUTER)	+= wanrouter/
+obj-$(CONFIG_X25)		+= x25/
+obj-$(CONFIG_LAPB)		+= lapb/
+obj-$(CONFIG_NETROM)		+= netrom/
+obj-$(CONFIG_ROSE)		+= rose/
+obj-$(CONFIG_AX25)		+= ax25/
+obj-$(CONFIG_IRDA)		+= irda/
+obj-$(CONFIG_BT)		+= bluetooth/
+obj-$(CONFIG_SUNRPC)		+= sunrpc/
+obj-$(CONFIG_RXRPC)		+= rxrpc/
+obj-$(CONFIG_ATM)		+= atm/
+obj-$(CONFIG_DECNET)		+= decnet/
+obj-$(CONFIG_ECONET)		+= econet/
+obj-$(CONFIG_VLAN_8021Q)	+= 8021q/
+obj-$(CONFIG_IP_SCTP)		+= sctp/
+
+ifeq ($(CONFIG_NET),y)
+obj-$(CONFIG_SYSCTL)		+= sysctl_net.o
+endif
diff --git a/net/TUNABLE b/net/TUNABLE
new file mode 100644
index 00000000000..9913211f07a
--- /dev/null
+++ b/net/TUNABLE
@@ -0,0 +1,50 @@
+The following parameters should be tunable at compile time. Some of them
+exist as sysctls too.
+
+This is far from complete
+
+Item			Description
+----------------------------------------------------------------------------
+MAX_LINKS		Maximum number of netlink minor devices. (1-32)
+RIF_TABLE_SIZE		Token ring RIF cache size (tunable)
+AARP_HASH_SIZE		Size of Appletalk hash table (tunable)
+AX25_DEF_T1		AX.25 parameters. These are all tunable via
+AX25_DEF_T2		SIOCAX25SETPARMS
+AX25_DEF_T3		T1-T3,N2 have the meanings in the specification
+AX25_DEF_N2
+AX25_DEF_AXDEFMODE	8 = normal 128 is PE1CHL extended
+AX25_DEF_IPDEFMODE	'D' - datagram  'V' - virtual connection
+AX25_DEF_BACKOFF	'E'xponential 'L'inear
+AX25_DEF_NETROM		Allow netrom 1=Y
+AX25_DF_TEXT		Allow PID=Text 1=Y
+AX25_DEF_WINDOW		Window for normal mode
+AX25_DEF_EWINDOW	Window for PE1CHL mode
+AX25_DEF_DIGI		1 for inband 2 for cross band 3 for both
+AX25_DEF_CONMODE	Allow connected modes 1=Yes
+AX25_ROUTE_MAX		AX.25 route cache size - no currently tunable
+Unnamed (16)		Number of protocol hash slots (tunable)
+DEV_NUMBUFFS		Number of priority levels (not easily tunable)
+Unnamed (300)		Maximum packet backlog queue (tunable)
+MAX_IOVEC		Maximum number of iovecs in a message (tunable)
+MIN_WINDOW		Offered minimum window (tunable)
+MAX_WINDOW		Offered maximum window (tunable)
+MAX_HEADER		Largest physical header (tunable)
+MAX_ADDR_LEN		Largest physical address (tunable)
+SOCK_ARRAY_SIZE		IP socket array hash size (tunable)
+IP_MAX_MEMBERSHIPS	Largest number of groups per socket (BSD style) (tunable)
+16			Hard coded constant for amount of room allowed for
+			cache align and faster forwarding (tunable)
+IP_FRAG_TIME		Time we hold a fragment for. (tunable)
+PORT_MASQ_BEGIN		First port reserved for masquerade (tunable)
+PORT_MASQ_END		Last port used for masquerade	(tunable)
+MASQUERADE_EXPIRE_TCP_FIN	Time we keep a masquerade for after a FIN
+MASQUERADE_EXPIRE_UDP	Time we keep a UDP masquerade for (tunable)
+MAXVIFS			Maximum mrouted vifs (1-32)
+MFC_LINES		Lines in the multicast router cache (tunable)
+
+NetROM parameters are tunable via an ioctl passing a struct
+
+4000			Size a Unix domain socket malloc falls back to 
+			(tunable) should be 8K - a bit for 8K machines like
+			the ALPHA
+
diff --git a/net/appletalk/Makefile b/net/appletalk/Makefile
new file mode 100644
index 00000000000..5cda56edef5
--- /dev/null
+++ b/net/appletalk/Makefile
@@ -0,0 +1,9 @@
+#
+# Makefile for the Linux AppleTalk layer.
+#
+
+obj-$(CONFIG_ATALK) += appletalk.o
+
+appletalk-y			:= aarp.o ddp.o dev.o
+appletalk-$(CONFIG_PROC_FS)	+= atalk_proc.o
+appletalk-$(CONFIG_SYSCTL)	+= sysctl_net_atalk.o
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
new file mode 100644
index 00000000000..54640c01b50
--- /dev/null
+++ b/net/appletalk/aarp.c
@@ -0,0 +1,1069 @@
+/*
+ *	AARP:		An implementation of the AppleTalk AARP protocol for
+ *			Ethernet 'ELAP'.
+ *
+ *		Alan Cox  <Alan.Cox@linux.org>
+ *
+ *	This doesn't fit cleanly with the IP arp. Potentially we can use
+ *	the generic neighbour discovery code to clean this up.
+ *
+ *	FIXME:
+ *		We ought to handle the retransmits with a single list and a
+ *	separate fast timer for when it is needed.
+ *		Use neighbour discovery code.
+ *		Token Ring Support.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *
+ *	References:
+ *		Inside AppleTalk (2nd Ed).
+ *	Fixes:
+ *		Jaume Grau	-	flush caches on AARP_PROBE
+ *		Rob Newberry	-	Added proxy AARP and AARP proc fs,
+ *					moved probing from DDP module.
+ *		Arnaldo C. Melo -	don't mangle rx packets
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/if_arp.h>
+#include <net/sock.h>
+#include <net/datalink.h>
+#include <net/psnap.h>
+#include <linux/atalk.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+int sysctl_aarp_expiry_time = AARP_EXPIRY_TIME;
+int sysctl_aarp_tick_time = AARP_TICK_TIME;
+int sysctl_aarp_retransmit_limit = AARP_RETRANSMIT_LIMIT;
+int sysctl_aarp_resolve_time = AARP_RESOLVE_TIME;
+
+/* Lists of aarp entries */
+/**
+ *	struct aarp_entry - AARP entry
+ *	@last_sent - Last time we xmitted the aarp request
+ *	@packet_queue - Queue of frames wait for resolution
+ *	@status - Used for proxy AARP
+ *	expires_at - Entry expiry time
+ *	target_addr - DDP Address
+ *	dev - Device to use
+ *	hwaddr - Physical i/f address of target/router
+ *	xmit_count - When this hits 10 we give up
+ *	next - Next entry in chain
+ */
+struct aarp_entry {
+	/* These first two are only used for unresolved entries */
+	unsigned long		last_sent;
+	struct sk_buff_head	packet_queue;
+	int			status;
+	unsigned long		expires_at;
+	struct atalk_addr	target_addr;
+	struct net_device	*dev;
+	char			hwaddr[6];
+	unsigned short		xmit_count;
+	struct aarp_entry	*next;
+};
+
+/* Hashed list of resolved, unresolved and proxy entries */
+static struct aarp_entry *resolved[AARP_HASH_SIZE];
+static struct aarp_entry *unresolved[AARP_HASH_SIZE];
+static struct aarp_entry *proxies[AARP_HASH_SIZE];
+static int unresolved_count;
+
+/* One lock protects it all. */
+static DEFINE_RWLOCK(aarp_lock);
+
+/* Used to walk the list and purge/kick entries.  */
+static struct timer_list aarp_timer;
+
+/*
+ *	Delete an aarp queue
+ *
+ *	Must run under aarp_lock.
+ */
+static void __aarp_expire(struct aarp_entry *a)
+{
+	skb_queue_purge(&a->packet_queue);
+	kfree(a);
+}
+
+/*
+ *	Send an aarp queue entry request
+ *
+ *	Must run under aarp_lock.
+ */
+static void __aarp_send_query(struct aarp_entry *a)
+{
+	static unsigned char aarp_eth_multicast[ETH_ALEN] =
+					{ 0x09, 0x00, 0x07, 0xFF, 0xFF, 0xFF };
+	struct net_device *dev = a->dev;
+	struct elapaarp *eah;
+	int len = dev->hard_header_len + sizeof(*eah) + aarp_dl->header_length;
+	struct sk_buff *skb = alloc_skb(len, GFP_ATOMIC);
+	struct atalk_addr *sat = atalk_find_dev_addr(dev);
+
+	if (!skb)
+		return;
+
+	if (!sat) {
+		kfree_skb(skb);
+		return;
+	}
+
+	/* Set up the buffer */
+	skb_reserve(skb, dev->hard_header_len + aarp_dl->header_length);
+	skb->nh.raw      = skb->h.raw = skb_put(skb, sizeof(*eah));
+	skb->protocol    = htons(ETH_P_ATALK);
+	skb->dev	 = dev;
+	eah		 = aarp_hdr(skb);
+
+	/* Set up the ARP */
+	eah->hw_type	 = htons(AARP_HW_TYPE_ETHERNET);
+	eah->pa_type	 = htons(ETH_P_ATALK);
+	eah->hw_len	 = ETH_ALEN;
+	eah->pa_len	 = AARP_PA_ALEN;
+	eah->function	 = htons(AARP_REQUEST);
+
+	memcpy(eah->hw_src, dev->dev_addr, ETH_ALEN);
+
+	eah->pa_src_zero = 0;
+	eah->pa_src_net	 = sat->s_net;
+	eah->pa_src_node = sat->s_node;
+
+	memset(eah->hw_dst, '\0', ETH_ALEN);
+
+	eah->pa_dst_zero = 0;
+	eah->pa_dst_net	 = a->target_addr.s_net;
+	eah->pa_dst_node = a->target_addr.s_node;
+
+	/* Send it */
+	aarp_dl->request(aarp_dl, skb, aarp_eth_multicast);
+	/* Update the sending count */
+	a->xmit_count++;
+	a->last_sent = jiffies;
+}
+
+/* This runs under aarp_lock and in softint context, so only atomic memory
+ * allocations can be used. */
+static void aarp_send_reply(struct net_device *dev, struct atalk_addr *us,
+			    struct atalk_addr *them, unsigned char *sha)
+{
+	struct elapaarp *eah;
+	int len = dev->hard_header_len + sizeof(*eah) + aarp_dl->header_length;
+	struct sk_buff *skb = alloc_skb(len, GFP_ATOMIC);
+
+	if (!skb)
+		return;
+
+	/* Set up the buffer */
+	skb_reserve(skb, dev->hard_header_len + aarp_dl->header_length);
+	skb->nh.raw      = skb->h.raw = skb_put(skb, sizeof(*eah));
+	skb->protocol    = htons(ETH_P_ATALK);
+	skb->dev	 = dev;
+	eah		 = aarp_hdr(skb);
+
+	/* Set up the ARP */
+	eah->hw_type	 = htons(AARP_HW_TYPE_ETHERNET);
+	eah->pa_type	 = htons(ETH_P_ATALK);
+	eah->hw_len	 = ETH_ALEN;
+	eah->pa_len	 = AARP_PA_ALEN;
+	eah->function	 = htons(AARP_REPLY);
+
+	memcpy(eah->hw_src, dev->dev_addr, ETH_ALEN);
+
+	eah->pa_src_zero = 0;
+	eah->pa_src_net	 = us->s_net;
+	eah->pa_src_node = us->s_node;
+
+	if (!sha)
+		memset(eah->hw_dst, '\0', ETH_ALEN);
+	else
+		memcpy(eah->hw_dst, sha, ETH_ALEN);
+
+	eah->pa_dst_zero = 0;
+	eah->pa_dst_net	 = them->s_net;
+	eah->pa_dst_node = them->s_node;
+
+	/* Send it */
+	aarp_dl->request(aarp_dl, skb, sha);
+}
+
+/*
+ *	Send probe frames. Called from aarp_probe_network and
+ *	aarp_proxy_probe_network.
+ */
+
+static void aarp_send_probe(struct net_device *dev, struct atalk_addr *us)
+{
+	struct elapaarp *eah;
+	int len = dev->hard_header_len + sizeof(*eah) + aarp_dl->header_length;
+	struct sk_buff *skb = alloc_skb(len, GFP_ATOMIC);
+	static unsigned char aarp_eth_multicast[ETH_ALEN] =
+					{ 0x09, 0x00, 0x07, 0xFF, 0xFF, 0xFF };
+
+	if (!skb)
+		return;
+
+	/* Set up the buffer */
+	skb_reserve(skb, dev->hard_header_len + aarp_dl->header_length);
+	skb->nh.raw      = skb->h.raw = skb_put(skb, sizeof(*eah));
+	skb->protocol    = htons(ETH_P_ATALK);
+	skb->dev	 = dev;
+	eah		 = aarp_hdr(skb);
+
+	/* Set up the ARP */
+	eah->hw_type	 = htons(AARP_HW_TYPE_ETHERNET);
+	eah->pa_type	 = htons(ETH_P_ATALK);
+	eah->hw_len	 = ETH_ALEN;
+	eah->pa_len	 = AARP_PA_ALEN;
+	eah->function	 = htons(AARP_PROBE);
+
+	memcpy(eah->hw_src, dev->dev_addr, ETH_ALEN);
+
+	eah->pa_src_zero = 0;
+	eah->pa_src_net	 = us->s_net;
+	eah->pa_src_node = us->s_node;
+
+	memset(eah->hw_dst, '\0', ETH_ALEN);
+
+	eah->pa_dst_zero = 0;
+	eah->pa_dst_net	 = us->s_net;
+	eah->pa_dst_node = us->s_node;
+
+	/* Send it */
+	aarp_dl->request(aarp_dl, skb, aarp_eth_multicast);
+}
+
+/*
+ *	Handle an aarp timer expire
+ *
+ *	Must run under the aarp_lock.
+ */
+
+static void __aarp_expire_timer(struct aarp_entry **n)
+{
+	struct aarp_entry *t;
+
+	while (*n)
+		/* Expired ? */
+		if (time_after(jiffies, (*n)->expires_at)) {
+			t = *n;
+			*n = (*n)->next;
+			__aarp_expire(t);
+		} else
+			n = &((*n)->next);
+}
+
+/*
+ *	Kick all pending requests 5 times a second.
+ *
+ *	Must run under the aarp_lock.
+ */
+static void __aarp_kick(struct aarp_entry **n)
+{
+	struct aarp_entry *t;
+
+	while (*n)
+		/* Expired: if this will be the 11th tx, we delete instead. */
+		if ((*n)->xmit_count >= sysctl_aarp_retransmit_limit) {
+			t = *n;
+			*n = (*n)->next;
+			__aarp_expire(t);
+		} else {
+			__aarp_send_query(*n);
+			n = &((*n)->next);
+		}
+}
+
+/*
+ *	A device has gone down. Take all entries referring to the device
+ *	and remove them.
+ *
+ *	Must run under the aarp_lock.
+ */
+static void __aarp_expire_device(struct aarp_entry **n, struct net_device *dev)
+{
+	struct aarp_entry *t;
+
+	while (*n)
+		if ((*n)->dev == dev) {
+			t = *n;
+			*n = (*n)->next;
+			__aarp_expire(t);
+		} else
+			n = &((*n)->next);
+}
+
+/* Handle the timer event */
+static void aarp_expire_timeout(unsigned long unused)
+{
+	int ct;
+
+	write_lock_bh(&aarp_lock);
+
+	for (ct = 0; ct < AARP_HASH_SIZE; ct++) {
+		__aarp_expire_timer(&resolved[ct]);
+		__aarp_kick(&unresolved[ct]);
+		__aarp_expire_timer(&unresolved[ct]);
+		__aarp_expire_timer(&proxies[ct]);
+	}
+
+	write_unlock_bh(&aarp_lock);
+	mod_timer(&aarp_timer, jiffies +
+			       (unresolved_count ? sysctl_aarp_tick_time :
+				sysctl_aarp_expiry_time));
+}
+
+/* Network device notifier chain handler. */
+static int aarp_device_event(struct notifier_block *this, unsigned long event,
+			     void *ptr)
+{
+	int ct;
+
+	if (event == NETDEV_DOWN) {
+		write_lock_bh(&aarp_lock);
+
+		for (ct = 0; ct < AARP_HASH_SIZE; ct++) {
+			__aarp_expire_device(&resolved[ct], ptr);
+			__aarp_expire_device(&unresolved[ct], ptr);
+			__aarp_expire_device(&proxies[ct], ptr);
+		}
+
+		write_unlock_bh(&aarp_lock);
+	}
+	return NOTIFY_DONE;
+}
+
+/* Expire all entries in a hash chain */
+static void __aarp_expire_all(struct aarp_entry **n)
+{
+	struct aarp_entry *t;
+
+	while (*n) {
+		t = *n;
+		*n = (*n)->next;
+		__aarp_expire(t);
+	}
+}
+
+/* Cleanup all hash chains -- module unloading */
+static void aarp_purge(void)
+{
+	int ct;
+
+	write_lock_bh(&aarp_lock);
+	for (ct = 0; ct < AARP_HASH_SIZE; ct++) {
+		__aarp_expire_all(&resolved[ct]);
+		__aarp_expire_all(&unresolved[ct]);
+		__aarp_expire_all(&proxies[ct]);
+	}
+	write_unlock_bh(&aarp_lock);
+}
+
+/*
+ *	Create a new aarp entry.  This must use GFP_ATOMIC because it
+ *	runs while holding spinlocks.
+ */
+static struct aarp_entry *aarp_alloc(void)
+{
+	struct aarp_entry *a = kmalloc(sizeof(*a), GFP_ATOMIC);
+
+	if (a)
+		skb_queue_head_init(&a->packet_queue);
+	return a;
+}
+
+/*
+ * Find an entry. We might return an expired but not yet purged entry. We
+ * don't care as it will do no harm.
+ *
+ * This must run under the aarp_lock.
+ */
+static struct aarp_entry *__aarp_find_entry(struct aarp_entry *list,
+					    struct net_device *dev,
+					    struct atalk_addr *sat)
+{
+	while (list) {
+		if (list->target_addr.s_net == sat->s_net &&
+		    list->target_addr.s_node == sat->s_node &&
+		    list->dev == dev)
+			break;
+		list = list->next;
+	}
+
+	return list;
+}
+
+/* Called from the DDP code, and thus must be exported. */
+void aarp_proxy_remove(struct net_device *dev, struct atalk_addr *sa)
+{
+	int hash = sa->s_node % (AARP_HASH_SIZE - 1);
+	struct aarp_entry *a;
+
+	write_lock_bh(&aarp_lock);
+
+	a = __aarp_find_entry(proxies[hash], dev, sa);
+	if (a)
+		a->expires_at = jiffies - 1;
+
+	write_unlock_bh(&aarp_lock);
+}
+
+/* This must run under aarp_lock. */
+static struct atalk_addr *__aarp_proxy_find(struct net_device *dev,
+					    struct atalk_addr *sa)
+{
+	int hash = sa->s_node % (AARP_HASH_SIZE - 1);
+	struct aarp_entry *a = __aarp_find_entry(proxies[hash], dev, sa);
+
+	return a ? sa : NULL;
+}
+
+/*
+ * Probe a Phase 1 device or a device that requires its Net:Node to
+ * be set via an ioctl.
+ */
+static void aarp_send_probe_phase1(struct atalk_iface *iface)
+{
+	struct ifreq atreq;
+	struct sockaddr_at *sa = (struct sockaddr_at *)&atreq.ifr_addr;
+
+	sa->sat_addr.s_node = iface->address.s_node;
+	sa->sat_addr.s_net = ntohs(iface->address.s_net);
+
+	/* We pass the Net:Node to the drivers/cards by a Device ioctl. */
+	if (!(iface->dev->do_ioctl(iface->dev, &atreq, SIOCSIFADDR))) {
+		(void)iface->dev->do_ioctl(iface->dev, &atreq, SIOCGIFADDR);
+		if (iface->address.s_net != htons(sa->sat_addr.s_net) ||
+		    iface->address.s_node != sa->sat_addr.s_node)
+			iface->status |= ATIF_PROBE_FAIL;
+
+		iface->address.s_net  = htons(sa->sat_addr.s_net);
+		iface->address.s_node = sa->sat_addr.s_node;
+	}
+}
+
+
+void aarp_probe_network(struct atalk_iface *atif)
+{
+	if (atif->dev->type == ARPHRD_LOCALTLK ||
+	    atif->dev->type == ARPHRD_PPP)
+		aarp_send_probe_phase1(atif);
+	else {
+		unsigned int count;
+
+		for (count = 0; count < AARP_RETRANSMIT_LIMIT; count++) {
+			aarp_send_probe(atif->dev, &atif->address);
+
+			/* Defer 1/10th */
+			current->state = TASK_INTERRUPTIBLE;
+			schedule_timeout(HZ / 10);
+
+			if (atif->status & ATIF_PROBE_FAIL)
+				break;
+		}
+	}
+}
+
+int aarp_proxy_probe_network(struct atalk_iface *atif, struct atalk_addr *sa)
+{
+	int hash, retval = -EPROTONOSUPPORT;
+	struct aarp_entry *entry;
+	unsigned int count;
+
+	/*
+	 * we don't currently support LocalTalk or PPP for proxy AARP;
+	 * if someone wants to try and add it, have fun
+	 */
+	if (atif->dev->type == ARPHRD_LOCALTLK ||
+	    atif->dev->type == ARPHRD_PPP)
+		goto out;
+
+	/*
+	 * create a new AARP entry with the flags set to be published --
+	 * we need this one to hang around even if it's in use
+	 */
+	entry = aarp_alloc();
+	retval = -ENOMEM;
+	if (!entry)
+		goto out;
+
+	entry->expires_at = -1;
+	entry->status = ATIF_PROBE;
+	entry->target_addr.s_node = sa->s_node;
+	entry->target_addr.s_net = sa->s_net;
+	entry->dev = atif->dev;
+
+	write_lock_bh(&aarp_lock);
+
+	hash = sa->s_node % (AARP_HASH_SIZE - 1);
+	entry->next = proxies[hash];
+	proxies[hash] = entry;
+
+	for (count = 0; count < AARP_RETRANSMIT_LIMIT; count++) {
+		aarp_send_probe(atif->dev, sa);
+
+		/* Defer 1/10th */
+		current->state = TASK_INTERRUPTIBLE;
+		write_unlock_bh(&aarp_lock);
+		schedule_timeout(HZ / 10);
+		write_lock_bh(&aarp_lock);
+
+		if (entry->status & ATIF_PROBE_FAIL)
+			break;
+	}
+
+	if (entry->status & ATIF_PROBE_FAIL) {
+		entry->expires_at = jiffies - 1; /* free the entry */
+		retval = -EADDRINUSE; /* return network full */
+	} else { /* clear the probing flag */
+		entry->status &= ~ATIF_PROBE;
+		retval = 1;
+	}
+
+	write_unlock_bh(&aarp_lock);
+out:
+	return retval;
+}
+
+/* Send a DDP frame */
+int aarp_send_ddp(struct net_device *dev, struct sk_buff *skb,
+		  struct atalk_addr *sa, void *hwaddr)
+{
+	static char ddp_eth_multicast[ETH_ALEN] =
+		{ 0x09, 0x00, 0x07, 0xFF, 0xFF, 0xFF };
+	int hash;
+	struct aarp_entry *a;
+
+	skb->nh.raw = skb->data;
+
+	/* Check for LocalTalk first */
+	if (dev->type == ARPHRD_LOCALTLK) {
+		struct atalk_addr *at = atalk_find_dev_addr(dev);
+		struct ddpehdr *ddp = (struct ddpehdr *)skb->data;
+		int ft = 2;
+
+		/*
+		 * Compressible ?
+		 *
+		 * IFF: src_net == dest_net == device_net
+		 * (zero matches anything)
+		 */
+
+		if ((!ddp->deh_snet || at->s_net == ddp->deh_snet) &&
+		    (!ddp->deh_dnet || at->s_net == ddp->deh_dnet)) {
+			skb_pull(skb, sizeof(*ddp) - 4);
+
+			/*
+			 *	The upper two remaining bytes are the port
+			 *	numbers	we just happen to need. Now put the
+			 *	length in the lower two.
+			 */
+			*((__u16 *)skb->data) = htons(skb->len);
+			ft = 1;
+		}
+		/*
+		 * Nice and easy. No AARP type protocols occur here so we can
+		 * just shovel it out with a 3 byte LLAP header
+		 */
+
+		skb_push(skb, 3);
+		skb->data[0] = sa->s_node;
+		skb->data[1] = at->s_node;
+		skb->data[2] = ft;
+		skb->dev     = dev;
+		goto sendit;
+	}
+
+	/* On a PPP link we neither compress nor aarp.  */
+	if (dev->type == ARPHRD_PPP) {
+		skb->protocol = htons(ETH_P_PPPTALK);
+		skb->dev = dev;
+		goto sendit;
+	}
+
+	/* Non ELAP we cannot do. */
+	if (dev->type != ARPHRD_ETHER)
+		return -1;
+
+	skb->dev = dev;
+	skb->protocol = htons(ETH_P_ATALK);
+	hash = sa->s_node % (AARP_HASH_SIZE - 1);
+
+	/* Do we have a resolved entry? */
+	if (sa->s_node == ATADDR_BCAST) {
+		/* Send it */
+		ddp_dl->request(ddp_dl, skb, ddp_eth_multicast);
+		goto sent;
+	}
+
+	write_lock_bh(&aarp_lock);
+	a = __aarp_find_entry(resolved[hash], dev, sa);
+
+	if (a) { /* Return 1 and fill in the address */
+		a->expires_at = jiffies + (sysctl_aarp_expiry_time * 10);
+		ddp_dl->request(ddp_dl, skb, a->hwaddr);
+		write_unlock_bh(&aarp_lock);
+		goto sent;
+	}
+
+	/* Do we have an unresolved entry: This is the less common path */
+	a = __aarp_find_entry(unresolved[hash], dev, sa);
+	if (a) { /* Queue onto the unresolved queue */
+		skb_queue_tail(&a->packet_queue, skb);
+		goto out_unlock;
+	}
+
+	/* Allocate a new entry */
+	a = aarp_alloc();
+	if (!a) {
+		/* Whoops slipped... good job it's an unreliable protocol 8) */
+		write_unlock_bh(&aarp_lock);
+		return -1;
+	}
+
+	/* Set up the queue */
+	skb_queue_tail(&a->packet_queue, skb);
+	a->expires_at	 = jiffies + sysctl_aarp_resolve_time;
+	a->dev		 = dev;
+	a->next		 = unresolved[hash];
+	a->target_addr	 = *sa;
+	a->xmit_count	 = 0;
+	unresolved[hash] = a;
+	unresolved_count++;
+
+	/* Send an initial request for the address */
+	__aarp_send_query(a);
+
+	/*
+	 * Switch to fast timer if needed (That is if this is the first
+	 * unresolved entry to get added)
+	 */
+
+	if (unresolved_count == 1)
+		mod_timer(&aarp_timer, jiffies + sysctl_aarp_tick_time);
+
+	/* Now finally, it is safe to drop the lock. */
+out_unlock:
+	write_unlock_bh(&aarp_lock);
+
+	/* Tell the ddp layer we have taken over for this frame. */
+	return 0;
+
+sendit:
+	if (skb->sk)
+		skb->priority = skb->sk->sk_priority;
+	dev_queue_xmit(skb);
+sent:
+	return 1;
+}
+
+/*
+ *	An entry in the aarp unresolved queue has become resolved. Send
+ *	all the frames queued under it.
+ *
+ *	Must run under aarp_lock.
+ */
+static void __aarp_resolved(struct aarp_entry **list, struct aarp_entry *a,
+			    int hash)
+{
+	struct sk_buff *skb;
+
+	while (*list)
+		if (*list == a) {
+			unresolved_count--;
+			*list = a->next;
+
+			/* Move into the resolved list */
+			a->next = resolved[hash];
+			resolved[hash] = a;
+
+			/* Kick frames off */
+			while ((skb = skb_dequeue(&a->packet_queue)) != NULL) {
+				a->expires_at = jiffies +
+						sysctl_aarp_expiry_time * 10;
+				ddp_dl->request(ddp_dl, skb, a->hwaddr);
+			}
+		} else
+			list = &((*list)->next);
+}
+
+/*
+ *	This is called by the SNAP driver whenever we see an AARP SNAP
+ *	frame. We currently only support Ethernet.
+ */
+static int aarp_rcv(struct sk_buff *skb, struct net_device *dev,
+		    struct packet_type *pt)
+{
+	struct elapaarp *ea = aarp_hdr(skb);
+	int hash, ret = 0;
+	__u16 function;
+	struct aarp_entry *a;
+	struct atalk_addr sa, *ma, da;
+	struct atalk_iface *ifa;
+
+	/* We only do Ethernet SNAP AARP. */
+	if (dev->type != ARPHRD_ETHER)
+		goto out0;
+
+	/* Frame size ok? */
+	if (!skb_pull(skb, sizeof(*ea)))
+		goto out0;
+
+	function = ntohs(ea->function);
+
+	/* Sanity check fields. */
+	if (function < AARP_REQUEST || function > AARP_PROBE ||
+	    ea->hw_len != ETH_ALEN || ea->pa_len != AARP_PA_ALEN ||
+	    ea->pa_src_zero || ea->pa_dst_zero)
+		goto out0;
+
+	/* Looks good. */
+	hash = ea->pa_src_node % (AARP_HASH_SIZE - 1);
+
+	/* Build an address. */
+	sa.s_node = ea->pa_src_node;
+	sa.s_net = ea->pa_src_net;
+
+	/* Process the packet. Check for replies of me. */
+	ifa = atalk_find_dev(dev);
+	if (!ifa)
+		goto out1;
+
+	if (ifa->status & ATIF_PROBE &&
+	    ifa->address.s_node == ea->pa_dst_node &&
+	    ifa->address.s_net == ea->pa_dst_net) {
+		ifa->status |= ATIF_PROBE_FAIL; /* Fail the probe (in use) */
+		goto out1;
+	}
+
+	/* Check for replies of proxy AARP entries */
+	da.s_node = ea->pa_dst_node;
+	da.s_net  = ea->pa_dst_net;
+
+	write_lock_bh(&aarp_lock);
+	a = __aarp_find_entry(proxies[hash], dev, &da);
+
+	if (a && a->status & ATIF_PROBE) {
+		a->status |= ATIF_PROBE_FAIL;
+		/*
+		 * we do not respond to probe or request packets for
+		 * this address while we are probing this address
+		 */
+		goto unlock;
+	}
+
+	switch (function) {
+		case AARP_REPLY:
+			if (!unresolved_count)	/* Speed up */
+				break;
+
+			/* Find the entry.  */
+			a = __aarp_find_entry(unresolved[hash], dev, &sa);
+			if (!a || dev != a->dev)
+				break;
+
+			/* We can fill one in - this is good. */
+			memcpy(a->hwaddr, ea->hw_src, ETH_ALEN);
+			__aarp_resolved(&unresolved[hash], a, hash);
+			if (!unresolved_count)
+				mod_timer(&aarp_timer,
+					  jiffies + sysctl_aarp_expiry_time);
+			break;
+
+		case AARP_REQUEST:
+		case AARP_PROBE:
+
+			/*
+			 * If it is my address set ma to my address and reply.
+			 * We can treat probe and request the same.  Probe
+			 * simply means we shouldn't cache the querying host,
+			 * as in a probe they are proposing an address not
+			 * using one.
+			 *
+			 * Support for proxy-AARP added. We check if the
+			 * address is one of our proxies before we toss the
+			 * packet out.
+			 */
+
+			sa.s_node = ea->pa_dst_node;
+			sa.s_net  = ea->pa_dst_net;
+
+			/* See if we have a matching proxy. */
+			ma = __aarp_proxy_find(dev, &sa);
+			if (!ma)
+				ma = &ifa->address;
+			else { /* We need to make a copy of the entry. */
+				da.s_node = sa.s_node;
+				da.s_net = da.s_net;
+				ma = &da;
+			}
+
+			if (function == AARP_PROBE) {
+				/*
+				 * A probe implies someone trying to get an
+				 * address. So as a precaution flush any
+				 * entries we have for this address.
+				 */
+				struct aarp_entry *a;
+
+				a = __aarp_find_entry(resolved[sa.s_node %
+							  (AARP_HASH_SIZE - 1)],
+						      skb->dev, &sa);
+
+				/*
+				 * Make it expire next tick - that avoids us
+				 * getting into a probe/flush/learn/probe/
+				 * flush/learn cycle during probing of a slow
+				 * to respond host addr.
+				 */
+				if (a) {
+					a->expires_at = jiffies - 1;
+					mod_timer(&aarp_timer, jiffies +
+							sysctl_aarp_tick_time);
+				}
+			}
+
+			if (sa.s_node != ma->s_node)
+				break;
+
+			if (sa.s_net && ma->s_net && sa.s_net != ma->s_net)
+				break;
+
+			sa.s_node = ea->pa_src_node;
+			sa.s_net = ea->pa_src_net;
+
+			/* aarp_my_address has found the address to use for us.
+			*/
+			aarp_send_reply(dev, ma, &sa, ea->hw_src);
+			break;
+	}
+
+unlock:
+	write_unlock_bh(&aarp_lock);
+out1:
+	ret = 1;
+out0:
+	kfree_skb(skb);
+	return ret;
+}
+
+static struct notifier_block aarp_notifier = {
+	.notifier_call = aarp_device_event,
+};
+
+static unsigned char aarp_snap_id[] = { 0x00, 0x00, 0x00, 0x80, 0xF3 };
+
+void __init aarp_proto_init(void)
+{
+	aarp_dl = register_snap_client(aarp_snap_id, aarp_rcv);
+	if (!aarp_dl)
+		printk(KERN_CRIT "Unable to register AARP with SNAP.\n");
+	init_timer(&aarp_timer);
+	aarp_timer.function = aarp_expire_timeout;
+	aarp_timer.data	    = 0;
+	aarp_timer.expires  = jiffies + sysctl_aarp_expiry_time;
+	add_timer(&aarp_timer);
+	register_netdevice_notifier(&aarp_notifier);
+}
+
+/* Remove the AARP entries associated with a device. */
+void aarp_device_down(struct net_device *dev)
+{
+	int ct;
+
+	write_lock_bh(&aarp_lock);
+
+	for (ct = 0; ct < AARP_HASH_SIZE; ct++) {
+		__aarp_expire_device(&resolved[ct], dev);
+		__aarp_expire_device(&unresolved[ct], dev);
+		__aarp_expire_device(&proxies[ct], dev);
+	}
+
+	write_unlock_bh(&aarp_lock);
+}
+
+#ifdef CONFIG_PROC_FS
+struct aarp_iter_state {
+	int bucket;
+	struct aarp_entry **table;
+};
+
+/*
+ * Get the aarp entry that is in the chain described
+ * by the iterator. 
+ * If pos is set then skip till that index.
+ * pos = 1 is the first entry
+ */
+static struct aarp_entry *iter_next(struct aarp_iter_state *iter, loff_t *pos)
+{
+	int ct = iter->bucket;
+	struct aarp_entry **table = iter->table;
+	loff_t off = 0;
+	struct aarp_entry *entry;
+	
+ rescan:
+	while(ct < AARP_HASH_SIZE) {
+		for (entry = table[ct]; entry; entry = entry->next) {
+			if (!pos || ++off == *pos) {
+				iter->table = table;
+				iter->bucket = ct;
+				return entry;
+			}
+		}
+		++ct;
+	}
+
+	if (table == resolved) {
+		ct = 0;
+		table = unresolved;
+		goto rescan;
+	}
+	if (table == unresolved) {
+		ct = 0;
+		table = proxies;
+		goto rescan;
+	}
+	return NULL;
+}
+
+static void *aarp_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct aarp_iter_state *iter = seq->private;
+
+	read_lock_bh(&aarp_lock);
+	iter->table     = resolved;
+	iter->bucket    = 0;
+
+	return *pos ? iter_next(iter, pos) : SEQ_START_TOKEN;
+}
+
+static void *aarp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct aarp_entry *entry = v;
+	struct aarp_iter_state *iter = seq->private;
+
+	++*pos;
+
+	/* first line after header */
+	if (v == SEQ_START_TOKEN) 
+		entry = iter_next(iter, NULL);
+		
+	/* next entry in current bucket */
+	else if (entry->next)
+		entry = entry->next;
+
+	/* next bucket or table */
+	else {
+		++iter->bucket;
+		entry = iter_next(iter, NULL);
+	}
+	return entry;
+}
+
+static void aarp_seq_stop(struct seq_file *seq, void *v)
+{
+	read_unlock_bh(&aarp_lock);
+}
+
+static const char *dt2str(unsigned long ticks)
+{
+	static char buf[32];
+
+	sprintf(buf, "%ld.%02ld", ticks / HZ, ((ticks % HZ) * 100 ) / HZ);
+
+	return buf;
+}
+
+static int aarp_seq_show(struct seq_file *seq, void *v)
+{
+	struct aarp_iter_state *iter = seq->private;
+	struct aarp_entry *entry = v;
+	unsigned long now = jiffies;
+
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq, 
+			 "Address  Interface   Hardware Address"
+			 "   Expires LastSend  Retry Status\n");
+	else {
+		seq_printf(seq, "%04X:%02X  %-12s",
+			   ntohs(entry->target_addr.s_net),
+			   (unsigned int) entry->target_addr.s_node,
+			   entry->dev ? entry->dev->name : "????");
+		seq_printf(seq, "%02X:%02X:%02X:%02X:%02X:%02X",
+			   entry->hwaddr[0] & 0xFF,
+			   entry->hwaddr[1] & 0xFF,
+			   entry->hwaddr[2] & 0xFF,
+			   entry->hwaddr[3] & 0xFF,
+			   entry->hwaddr[4] & 0xFF,
+			   entry->hwaddr[5] & 0xFF);
+		seq_printf(seq, " %8s",
+			   dt2str((long)entry->expires_at - (long)now));
+		if (iter->table == unresolved)
+			seq_printf(seq, " %8s %6hu",
+				   dt2str(now - entry->last_sent),
+				   entry->xmit_count);
+		else
+			seq_puts(seq, "                ");
+		seq_printf(seq, " %s\n",
+			   (iter->table == resolved) ? "resolved"
+			   : (iter->table == unresolved) ? "unresolved"
+			   : (iter->table == proxies) ? "proxies"
+			   : "unknown");
+	}				 
+	return 0;
+}
+
+static struct seq_operations aarp_seq_ops = {
+	.start  = aarp_seq_start,
+	.next   = aarp_seq_next,
+	.stop   = aarp_seq_stop,
+	.show   = aarp_seq_show,
+};
+
+static int aarp_seq_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	int rc = -ENOMEM;
+	struct aarp_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+       
+	if (!s)
+		goto out;
+
+	rc = seq_open(file, &aarp_seq_ops);
+	if (rc)
+		goto out_kfree;
+
+	seq	     = file->private_data;
+	seq->private = s;
+	memset(s, 0, sizeof(*s));
+out:
+	return rc;
+out_kfree:
+	kfree(s);
+	goto out;
+}
+
+struct file_operations atalk_seq_arp_fops = {
+	.owner		= THIS_MODULE,
+	.open           = aarp_seq_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release	= seq_release_private,
+};
+#endif
+
+/* General module cleanup. Called from cleanup_module() in ddp.c. */
+void aarp_cleanup_module(void)
+{
+	del_timer_sync(&aarp_timer);
+	unregister_netdevice_notifier(&aarp_notifier);
+	unregister_snap_client(aarp_dl);
+	aarp_purge();
+}
diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c
new file mode 100644
index 00000000000..dc4048dd98c
--- /dev/null
+++ b/net/appletalk/atalk_proc.c
@@ -0,0 +1,321 @@
+/*
+ * 	atalk_proc.c - proc support for Appletalk
+ *
+ * 	Copyright(c) Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *	This program is free software; you can redistribute it and/or modify it
+ *	under the terms of the GNU General Public License as published by the
+ *	Free Software Foundation, version 2.
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <net/sock.h>
+#include <linux/atalk.h>
+
+
+static __inline__ struct atalk_iface *atalk_get_interface_idx(loff_t pos)
+{
+	struct atalk_iface *i;
+
+	for (i = atalk_interfaces; pos && i; i = i->next)
+		--pos;
+
+	return i;
+}
+
+static void *atalk_seq_interface_start(struct seq_file *seq, loff_t *pos)
+{
+	loff_t l = *pos;
+
+	read_lock_bh(&atalk_interfaces_lock);
+	return l ? atalk_get_interface_idx(--l) : SEQ_START_TOKEN;
+}
+
+static void *atalk_seq_interface_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct atalk_iface *i;
+
+	++*pos;
+	if (v == SEQ_START_TOKEN) {
+		i = NULL;
+		if (atalk_interfaces)
+			i = atalk_interfaces;
+		goto out;
+	}
+	i = v;
+	i = i->next;
+out:
+	return i;
+}
+
+static void atalk_seq_interface_stop(struct seq_file *seq, void *v)
+{
+	read_unlock_bh(&atalk_interfaces_lock);
+}
+
+static int atalk_seq_interface_show(struct seq_file *seq, void *v)
+{
+	struct atalk_iface *iface;
+
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(seq, "Interface        Address   Networks  "
+			      "Status\n");
+		goto out;
+	}
+
+	iface = v;
+	seq_printf(seq, "%-16s %04X:%02X  %04X-%04X  %d\n",
+		   iface->dev->name, ntohs(iface->address.s_net),
+		   iface->address.s_node, ntohs(iface->nets.nr_firstnet),
+		   ntohs(iface->nets.nr_lastnet), iface->status);
+out:
+	return 0;
+}
+
+static __inline__ struct atalk_route *atalk_get_route_idx(loff_t pos)
+{
+	struct atalk_route *r;
+
+	for (r = atalk_routes; pos && r; r = r->next)
+		--pos;
+
+	return r;
+}
+
+static void *atalk_seq_route_start(struct seq_file *seq, loff_t *pos)
+{
+	loff_t l = *pos;
+
+	read_lock_bh(&atalk_routes_lock);
+	return l ? atalk_get_route_idx(--l) : SEQ_START_TOKEN;
+}
+
+static void *atalk_seq_route_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct atalk_route *r;
+
+	++*pos;
+	if (v == SEQ_START_TOKEN) {
+		r = NULL;
+		if (atalk_routes)
+			r = atalk_routes;
+		goto out;
+	}
+	r = v;
+	r = r->next;
+out:
+	return r;
+}
+
+static void atalk_seq_route_stop(struct seq_file *seq, void *v)
+{
+	read_unlock_bh(&atalk_routes_lock);
+}
+
+static int atalk_seq_route_show(struct seq_file *seq, void *v)
+{
+	struct atalk_route *rt;
+
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(seq, "Target        Router  Flags Dev\n");
+		goto out;
+	}
+
+	if (atrtr_default.dev) {
+		rt = &atrtr_default;
+		seq_printf(seq, "Default     %04X:%02X  %-4d  %s\n",
+			       ntohs(rt->gateway.s_net), rt->gateway.s_node,
+			       rt->flags, rt->dev->name);
+	}
+
+	rt = v;
+	seq_printf(seq, "%04X:%02X     %04X:%02X  %-4d  %s\n",
+		   ntohs(rt->target.s_net), rt->target.s_node,
+		   ntohs(rt->gateway.s_net), rt->gateway.s_node,
+		   rt->flags, rt->dev->name);
+out:
+	return 0;
+}
+
+static __inline__ struct sock *atalk_get_socket_idx(loff_t pos)
+{
+	struct sock *s;
+	struct hlist_node *node;
+
+	sk_for_each(s, node, &atalk_sockets)
+		if (!pos--)
+			goto found;
+	s = NULL;
+found:
+	return s;
+}
+
+static void *atalk_seq_socket_start(struct seq_file *seq, loff_t *pos)
+{
+	loff_t l = *pos;
+
+	read_lock_bh(&atalk_sockets_lock);
+	return l ? atalk_get_socket_idx(--l) : SEQ_START_TOKEN;
+}
+
+static void *atalk_seq_socket_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct sock *i;
+
+	++*pos;
+	if (v == SEQ_START_TOKEN) {
+		i = sk_head(&atalk_sockets);
+		goto out;
+	}
+	i = sk_next(v);
+out:
+	return i;
+}
+
+static void atalk_seq_socket_stop(struct seq_file *seq, void *v)
+{
+	read_unlock_bh(&atalk_sockets_lock);
+}
+
+static int atalk_seq_socket_show(struct seq_file *seq, void *v)
+{
+	struct sock *s;
+	struct atalk_sock *at;
+
+	if (v == SEQ_START_TOKEN) {
+		seq_printf(seq, "Type Local_addr  Remote_addr Tx_queue "
+				"Rx_queue St UID\n");
+		goto out;
+	}
+
+	s = v;
+	at = at_sk(s);
+
+	seq_printf(seq, "%02X   %04X:%02X:%02X  %04X:%02X:%02X  %08X:%08X "
+			"%02X %d\n",
+		   s->sk_type, ntohs(at->src_net), at->src_node, at->src_port,
+		   ntohs(at->dest_net), at->dest_node, at->dest_port,
+		   atomic_read(&s->sk_wmem_alloc),
+		   atomic_read(&s->sk_rmem_alloc),
+		   s->sk_state, SOCK_INODE(s->sk_socket)->i_uid);
+out:
+	return 0;
+}
+
+static struct seq_operations atalk_seq_interface_ops = {
+	.start  = atalk_seq_interface_start,
+	.next   = atalk_seq_interface_next,
+	.stop   = atalk_seq_interface_stop,
+	.show   = atalk_seq_interface_show,
+};
+
+static struct seq_operations atalk_seq_route_ops = {
+	.start  = atalk_seq_route_start,
+	.next   = atalk_seq_route_next,
+	.stop   = atalk_seq_route_stop,
+	.show   = atalk_seq_route_show,
+};
+
+static struct seq_operations atalk_seq_socket_ops = {
+	.start  = atalk_seq_socket_start,
+	.next   = atalk_seq_socket_next,
+	.stop   = atalk_seq_socket_stop,
+	.show   = atalk_seq_socket_show,
+};
+
+static int atalk_seq_interface_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &atalk_seq_interface_ops);
+}
+
+static int atalk_seq_route_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &atalk_seq_route_ops);
+}
+
+static int atalk_seq_socket_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &atalk_seq_socket_ops);
+}
+
+static struct file_operations atalk_seq_interface_fops = {
+	.owner		= THIS_MODULE,
+	.open		= atalk_seq_interface_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static struct file_operations atalk_seq_route_fops = {
+	.owner		= THIS_MODULE,
+	.open		= atalk_seq_route_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static struct file_operations atalk_seq_socket_fops = {
+	.owner		= THIS_MODULE,
+	.open		= atalk_seq_socket_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static struct proc_dir_entry *atalk_proc_dir;
+
+int __init atalk_proc_init(void)
+{
+	struct proc_dir_entry *p;
+	int rc = -ENOMEM;
+
+	atalk_proc_dir = proc_mkdir("atalk", proc_net);
+	if (!atalk_proc_dir)
+		goto out;
+	atalk_proc_dir->owner = THIS_MODULE;
+
+	p = create_proc_entry("interface", S_IRUGO, atalk_proc_dir);
+	if (!p)
+		goto out_interface;
+	p->proc_fops = &atalk_seq_interface_fops;
+
+	p = create_proc_entry("route", S_IRUGO, atalk_proc_dir);
+	if (!p)
+		goto out_route;
+	p->proc_fops = &atalk_seq_route_fops;
+
+	p = create_proc_entry("socket", S_IRUGO, atalk_proc_dir);
+	if (!p)
+		goto out_socket;
+	p->proc_fops = &atalk_seq_socket_fops;
+
+	p = create_proc_entry("arp", S_IRUGO, atalk_proc_dir);
+	if (!p) 
+		goto out_arp;
+	p->proc_fops = &atalk_seq_arp_fops;
+
+	rc = 0;
+out:
+	return rc;
+out_arp:
+	remove_proc_entry("socket", atalk_proc_dir);
+out_socket:
+	remove_proc_entry("route", atalk_proc_dir);
+out_route:
+	remove_proc_entry("interface", atalk_proc_dir);
+out_interface:
+	remove_proc_entry("atalk", proc_net);
+	goto out;
+}
+
+void __exit atalk_proc_exit(void)
+{
+	remove_proc_entry("interface", atalk_proc_dir);
+	remove_proc_entry("route", atalk_proc_dir);
+	remove_proc_entry("socket", atalk_proc_dir);
+	remove_proc_entry("arp", atalk_proc_dir);
+	remove_proc_entry("atalk", proc_net);
+}
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
new file mode 100644
index 00000000000..d1fea5c3dda
--- /dev/null
+++ b/net/appletalk/ddp.c
@@ -0,0 +1,1931 @@
+/*
+ *	DDP:	An implementation of the AppleTalk DDP protocol for
+ *		Ethernet 'ELAP'.
+ *
+ *		Alan Cox  <Alan.Cox@linux.org>
+ *
+ *		With more than a little assistance from
+ *
+ *		Wesley Craig <netatalk@umich.edu>
+ *
+ *	Fixes:
+ *		Neil Horman		:	Added missing device ioctls
+ *		Michael Callahan	:	Made routing work
+ *		Wesley Craig		:	Fix probing to listen to a
+ *						passed node id.
+ *		Alan Cox		:	Added send/recvmsg support
+ *		Alan Cox		:	Moved at. to protinfo in
+ *						socket.
+ *		Alan Cox		:	Added firewall hooks.
+ *		Alan Cox		:	Supports new ARPHRD_LOOPBACK
+ *		Christer Weinigel	: 	Routing and /proc fixes.
+ *		Bradford Johnson	:	LocalTalk.
+ *		Tom Dyas		:	Module support.
+ *		Alan Cox		:	Hooks for PPP (based on the
+ *						LocalTalk hook).
+ *		Alan Cox		:	Posix bits
+ *		Alan Cox/Mike Freeman	:	Possible fix to NBP problems
+ *		Bradford Johnson	:	IP-over-DDP (experimental)
+ *		Jay Schulist		:	Moved IP-over-DDP to its own
+ *						driver file. (ipddp.c & ipddp.h)
+ *		Jay Schulist		:	Made work as module with 
+ *						AppleTalk drivers, cleaned it.
+ *		Rob Newberry		:	Added proxy AARP and AARP
+ *						procfs, moved probing to AARP
+ *						module.
+ *              Adrian Sun/ 
+ *              Michael Zuelsdorff      :       fix for net.0 packets. don't 
+ *                                              allow illegal ether/tokentalk
+ *                                              port assignment. we lose a 
+ *                                              valid localtalk port as a 
+ *                                              result.
+ *		Arnaldo C. de Melo	:	Cleanup, in preparation for
+ *						shared skb support 8)
+ *		Arnaldo C. de Melo	:	Move proc stuff to atalk_proc.c,
+ *						use seq_file
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ * 
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/tcp.h>
+#include <linux/if_arp.h>
+#include <linux/termios.h>	/* For TIOCOUTQ/INQ */
+#include <net/datalink.h>
+#include <net/psnap.h>
+#include <net/sock.h>
+#include <net/route.h>
+#include <linux/atalk.h>
+
+struct datalink_proto *ddp_dl, *aarp_dl;
+static struct proto_ops atalk_dgram_ops;
+
+/**************************************************************************\
+*                                                                          *
+* Handlers for the socket list.                                            *
+*                                                                          *
+\**************************************************************************/
+
+HLIST_HEAD(atalk_sockets);
+DEFINE_RWLOCK(atalk_sockets_lock);
+
+static inline void __atalk_insert_socket(struct sock *sk)
+{
+	sk_add_node(sk, &atalk_sockets);
+}
+
+static inline void atalk_remove_socket(struct sock *sk)
+{
+	write_lock_bh(&atalk_sockets_lock);
+	sk_del_node_init(sk);
+	write_unlock_bh(&atalk_sockets_lock);
+}
+
+static struct sock *atalk_search_socket(struct sockaddr_at *to,
+					struct atalk_iface *atif)
+{
+	struct sock *s;
+	struct hlist_node *node;
+
+	read_lock_bh(&atalk_sockets_lock);
+	sk_for_each(s, node, &atalk_sockets) {
+		struct atalk_sock *at = at_sk(s);
+
+		if (to->sat_port != at->src_port)
+			continue;
+
+	    	if (to->sat_addr.s_net == ATADDR_ANYNET &&
+		    to->sat_addr.s_node == ATADDR_BCAST &&
+		    at->src_net == atif->address.s_net)
+			goto found;
+
+	    	if (to->sat_addr.s_net == at->src_net &&
+		    (to->sat_addr.s_node == at->src_node ||
+		     to->sat_addr.s_node == ATADDR_BCAST ||
+		     to->sat_addr.s_node == ATADDR_ANYNODE))
+			goto found;
+
+	    	/* XXXX.0 -- we got a request for this router. make sure
+		 * that the node is appropriately set. */
+		if (to->sat_addr.s_node == ATADDR_ANYNODE &&
+		    to->sat_addr.s_net != ATADDR_ANYNET &&
+		    atif->address.s_node == at->src_node) {
+			to->sat_addr.s_node = atif->address.s_node;
+			goto found;
+		}
+	}
+	s = NULL;
+found:
+	read_unlock_bh(&atalk_sockets_lock);
+	return s;
+}
+
+/**
+ * atalk_find_or_insert_socket - Try to find a socket matching ADDR
+ * @sk - socket to insert in the list if it is not there already
+ * @sat - address to search for
+ *
+ * Try to find a socket matching ADDR in the socket list, if found then return
+ * it. If not, insert SK into the socket list.
+ *
+ * This entire operation must execute atomically.
+ */
+static struct sock *atalk_find_or_insert_socket(struct sock *sk,
+						struct sockaddr_at *sat)
+{
+	struct sock *s;
+	struct hlist_node *node;
+	struct atalk_sock *at;
+
+	write_lock_bh(&atalk_sockets_lock);
+	sk_for_each(s, node, &atalk_sockets) {
+		at = at_sk(s);
+
+		if (at->src_net == sat->sat_addr.s_net &&
+		    at->src_node == sat->sat_addr.s_node &&
+		    at->src_port == sat->sat_port)
+			goto found;
+	}
+	s = NULL;
+	__atalk_insert_socket(sk); /* Wheee, it's free, assign and insert. */
+found:
+	write_unlock_bh(&atalk_sockets_lock);
+	return s;
+}
+
+static void atalk_destroy_timer(unsigned long data)
+{
+	struct sock *sk = (struct sock *)data;
+
+	if (atomic_read(&sk->sk_wmem_alloc) ||
+	    atomic_read(&sk->sk_rmem_alloc)) {
+		sk->sk_timer.expires = jiffies + SOCK_DESTROY_TIME;
+		add_timer(&sk->sk_timer);
+	} else
+		sock_put(sk);
+}
+
+static inline void atalk_destroy_socket(struct sock *sk)
+{
+	atalk_remove_socket(sk);
+	skb_queue_purge(&sk->sk_receive_queue);
+
+	if (atomic_read(&sk->sk_wmem_alloc) ||
+	    atomic_read(&sk->sk_rmem_alloc)) {
+		init_timer(&sk->sk_timer);
+		sk->sk_timer.expires	= jiffies + SOCK_DESTROY_TIME;
+		sk->sk_timer.function	= atalk_destroy_timer;
+		sk->sk_timer.data	= (unsigned long)sk;
+		add_timer(&sk->sk_timer);
+	} else
+		sock_put(sk);
+}
+
+/**************************************************************************\
+*                                                                          *
+* Routing tables for the AppleTalk socket layer.                           *
+*                                                                          *
+\**************************************************************************/
+
+/* Anti-deadlock ordering is atalk_routes_lock --> iface_lock -DaveM */
+struct atalk_route *atalk_routes;
+DEFINE_RWLOCK(atalk_routes_lock);
+
+struct atalk_iface *atalk_interfaces;
+DEFINE_RWLOCK(atalk_interfaces_lock);
+
+/* For probing devices or in a routerless network */
+struct atalk_route atrtr_default;
+
+/* AppleTalk interface control */
+/*
+ * Drop a device. Doesn't drop any of its routes - that is the caller's
+ * problem. Called when we down the interface or delete the address.
+ */
+static void atif_drop_device(struct net_device *dev)
+{
+	struct atalk_iface **iface = &atalk_interfaces;
+	struct atalk_iface *tmp;
+
+	write_lock_bh(&atalk_interfaces_lock);
+	while ((tmp = *iface) != NULL) {
+		if (tmp->dev == dev) {
+			*iface = tmp->next;
+			dev_put(dev);
+			kfree(tmp);
+			dev->atalk_ptr = NULL;
+		} else
+			iface = &tmp->next;
+	}
+	write_unlock_bh(&atalk_interfaces_lock);
+}
+
+static struct atalk_iface *atif_add_device(struct net_device *dev,
+					   struct atalk_addr *sa)
+{
+	struct atalk_iface *iface = kmalloc(sizeof(*iface), GFP_KERNEL);
+
+	if (!iface)
+		goto out;
+
+	memset(iface, 0, sizeof(*iface));
+	dev_hold(dev);
+	iface->dev = dev;
+	dev->atalk_ptr = iface;
+	iface->address = *sa;
+	iface->status = 0;
+
+	write_lock_bh(&atalk_interfaces_lock);
+	iface->next = atalk_interfaces;
+	atalk_interfaces = iface;
+	write_unlock_bh(&atalk_interfaces_lock);
+out:
+	return iface;
+}
+
+/* Perform phase 2 AARP probing on our tentative address */
+static int atif_probe_device(struct atalk_iface *atif)
+{
+	int netrange = ntohs(atif->nets.nr_lastnet) -
+			ntohs(atif->nets.nr_firstnet) + 1;
+	int probe_net = ntohs(atif->address.s_net);
+	int probe_node = atif->address.s_node;
+	int netct, nodect;
+
+	/* Offset the network we start probing with */
+	if (probe_net == ATADDR_ANYNET) {
+		probe_net = ntohs(atif->nets.nr_firstnet);
+		if (netrange)
+			probe_net += jiffies % netrange;
+	}
+	if (probe_node == ATADDR_ANYNODE)
+		probe_node = jiffies & 0xFF;
+
+	/* Scan the networks */
+	atif->status |= ATIF_PROBE;
+	for (netct = 0; netct <= netrange; netct++) {
+		/* Sweep the available nodes from a given start */
+		atif->address.s_net = htons(probe_net);
+		for (nodect = 0; nodect < 256; nodect++) {
+			atif->address.s_node = (nodect + probe_node) & 0xFF;
+			if (atif->address.s_node > 0 &&
+			    atif->address.s_node < 254) {
+				/* Probe a proposed address */
+				aarp_probe_network(atif);
+
+				if (!(atif->status & ATIF_PROBE_FAIL)) {
+					atif->status &= ~ATIF_PROBE;
+					return 0;
+				}
+			}
+			atif->status &= ~ATIF_PROBE_FAIL;
+		}
+		probe_net++;
+		if (probe_net > ntohs(atif->nets.nr_lastnet))
+			probe_net = ntohs(atif->nets.nr_firstnet);
+	}
+	atif->status &= ~ATIF_PROBE;
+
+	return -EADDRINUSE;	/* Network is full... */
+}
+
+
+/* Perform AARP probing for a proxy address */
+static int atif_proxy_probe_device(struct atalk_iface *atif,
+				   struct atalk_addr* proxy_addr)
+{
+	int netrange = ntohs(atif->nets.nr_lastnet) -
+			ntohs(atif->nets.nr_firstnet) + 1;
+	/* we probe the interface's network */
+	int probe_net = ntohs(atif->address.s_net);
+	int probe_node = ATADDR_ANYNODE;	    /* we'll take anything */
+	int netct, nodect;
+
+	/* Offset the network we start probing with */
+	if (probe_net == ATADDR_ANYNET) {
+		probe_net = ntohs(atif->nets.nr_firstnet);
+		if (netrange)
+			probe_net += jiffies % netrange;
+	}
+
+	if (probe_node == ATADDR_ANYNODE)
+		probe_node = jiffies & 0xFF;
+		
+	/* Scan the networks */
+	for (netct = 0; netct <= netrange; netct++) {
+		/* Sweep the available nodes from a given start */
+		proxy_addr->s_net = htons(probe_net);
+		for (nodect = 0; nodect < 256; nodect++) {
+			proxy_addr->s_node = (nodect + probe_node) & 0xFF;
+			if (proxy_addr->s_node > 0 &&
+			    proxy_addr->s_node < 254) {
+				/* Tell AARP to probe a proposed address */
+				int ret = aarp_proxy_probe_network(atif,
+								    proxy_addr);
+
+				if (ret != -EADDRINUSE)
+					return ret;
+			}
+		}
+		probe_net++;
+		if (probe_net > ntohs(atif->nets.nr_lastnet))
+			probe_net = ntohs(atif->nets.nr_firstnet);
+	}
+
+	return -EADDRINUSE;	/* Network is full... */
+}
+
+
+struct atalk_addr *atalk_find_dev_addr(struct net_device *dev)
+{
+	struct atalk_iface *iface = dev->atalk_ptr;
+	return iface ? &iface->address : NULL;
+}
+
+static struct atalk_addr *atalk_find_primary(void)
+{
+	struct atalk_iface *fiface = NULL;
+	struct atalk_addr *retval;
+	struct atalk_iface *iface;
+
+	/*
+	 * Return a point-to-point interface only if
+	 * there is no non-ptp interface available.
+	 */
+	read_lock_bh(&atalk_interfaces_lock);
+	for (iface = atalk_interfaces; iface; iface = iface->next) {
+		if (!fiface && !(iface->dev->flags & IFF_LOOPBACK))
+			fiface = iface;
+		if (!(iface->dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))) {
+			retval = &iface->address;
+			goto out;
+		}
+	}
+
+	if (fiface)
+		retval = &fiface->address;
+	else if (atalk_interfaces)
+		retval = &atalk_interfaces->address;
+	else
+		retval = NULL;
+out:
+	read_unlock_bh(&atalk_interfaces_lock);
+	return retval;
+}
+
+/*
+ * Find a match for 'any network' - ie any of our interfaces with that
+ * node number will do just nicely.
+ */
+static struct atalk_iface *atalk_find_anynet(int node, struct net_device *dev)
+{
+	struct atalk_iface *iface = dev->atalk_ptr;
+
+	if (!iface || iface->status & ATIF_PROBE)
+		goto out_err;
+
+	if (node != ATADDR_BCAST &&
+	    iface->address.s_node != node &&
+	    node != ATADDR_ANYNODE)
+		goto out_err;
+out:
+	return iface;
+out_err:
+	iface = NULL;
+	goto out;
+}
+
+/* Find a match for a specific network:node pair */
+static struct atalk_iface *atalk_find_interface(int net, int node)
+{
+	struct atalk_iface *iface;
+
+	read_lock_bh(&atalk_interfaces_lock);
+	for (iface = atalk_interfaces; iface; iface = iface->next) {
+		if ((node == ATADDR_BCAST ||
+		     node == ATADDR_ANYNODE ||
+		     iface->address.s_node == node) &&
+		    iface->address.s_net == net &&
+		    !(iface->status & ATIF_PROBE))
+			break;
+
+		/* XXXX.0 -- net.0 returns the iface associated with net */
+		if (node == ATADDR_ANYNODE && net != ATADDR_ANYNET &&
+		    ntohs(iface->nets.nr_firstnet) <= ntohs(net) &&
+		    ntohs(net) <= ntohs(iface->nets.nr_lastnet))
+		        break;
+	}
+	read_unlock_bh(&atalk_interfaces_lock);
+	return iface;
+}
+
+
+/*
+ * Find a route for an AppleTalk packet. This ought to get cached in
+ * the socket (later on...). We know about host routes and the fact
+ * that a route must be direct to broadcast.
+ */
+static struct atalk_route *atrtr_find(struct atalk_addr *target)
+{
+	/*
+	 * we must search through all routes unless we find a 
+	 * host route, because some host routes might overlap
+	 * network routes
+	 */
+	struct atalk_route *net_route = NULL;
+	struct atalk_route *r;
+	
+	read_lock_bh(&atalk_routes_lock);
+	for (r = atalk_routes; r; r = r->next) {
+		if (!(r->flags & RTF_UP))
+			continue;
+
+		if (r->target.s_net == target->s_net) {
+			if (r->flags & RTF_HOST) {
+				/*
+				 * if this host route is for the target,
+				 * the we're done
+				 */
+				if (r->target.s_node == target->s_node)
+					goto out;
+			} else
+				/*
+				 * this route will work if there isn't a
+				 * direct host route, so cache it
+				 */
+				net_route = r;
+		}
+	}
+	
+	/* 
+	 * if we found a network route but not a direct host
+	 * route, then return it
+	 */
+	if (net_route)
+		r = net_route;
+	else if (atrtr_default.dev)
+		r = &atrtr_default;
+	else /* No route can be found */
+		r = NULL;
+out:
+	read_unlock_bh(&atalk_routes_lock);
+	return r;
+}
+
+
+/*
+ * Given an AppleTalk network, find the device to use. This can be
+ * a simple lookup.
+ */
+struct net_device *atrtr_get_dev(struct atalk_addr *sa)
+{
+	struct atalk_route *atr = atrtr_find(sa);
+	return atr ? atr->dev : NULL;
+}
+
+/* Set up a default router */
+static void atrtr_set_default(struct net_device *dev)
+{
+	atrtr_default.dev	     = dev;
+	atrtr_default.flags	     = RTF_UP;
+	atrtr_default.gateway.s_net  = htons(0);
+	atrtr_default.gateway.s_node = 0;
+}
+
+/*
+ * Add a router. Basically make sure it looks valid and stuff the
+ * entry in the list. While it uses netranges we always set them to one
+ * entry to work like netatalk.
+ */
+static int atrtr_create(struct rtentry *r, struct net_device *devhint)
+{
+	struct sockaddr_at *ta = (struct sockaddr_at *)&r->rt_dst;
+	struct sockaddr_at *ga = (struct sockaddr_at *)&r->rt_gateway;
+	struct atalk_route *rt;
+	struct atalk_iface *iface, *riface;
+	int retval = -EINVAL;
+
+	/*
+	 * Fixme: Raise/Lower a routing change semaphore for these
+	 * operations.
+	 */
+
+	/* Validate the request */
+	if (ta->sat_family != AF_APPLETALK ||
+	    (!devhint && ga->sat_family != AF_APPLETALK))
+		goto out;
+
+	/* Now walk the routing table and make our decisions */
+	write_lock_bh(&atalk_routes_lock);
+	for (rt = atalk_routes; rt; rt = rt->next) {
+		if (r->rt_flags != rt->flags)
+			continue;
+
+		if (ta->sat_addr.s_net == rt->target.s_net) {
+			if (!(rt->flags & RTF_HOST))
+				break;
+			if (ta->sat_addr.s_node == rt->target.s_node)
+				break;
+		}
+	}
+
+	if (!devhint) {
+		riface = NULL;
+
+		read_lock_bh(&atalk_interfaces_lock);
+		for (iface = atalk_interfaces; iface; iface = iface->next) {
+			if (!riface &&
+			    ntohs(ga->sat_addr.s_net) >=
+			    		ntohs(iface->nets.nr_firstnet) &&
+			    ntohs(ga->sat_addr.s_net) <=
+			    		ntohs(iface->nets.nr_lastnet))
+				riface = iface;
+
+			if (ga->sat_addr.s_net == iface->address.s_net &&
+			    ga->sat_addr.s_node == iface->address.s_node)
+				riface = iface;
+		}		
+		read_unlock_bh(&atalk_interfaces_lock);
+
+		retval = -ENETUNREACH;
+		if (!riface)
+			goto out_unlock;
+
+		devhint = riface->dev;
+	}
+
+	if (!rt) {
+		rt = kmalloc(sizeof(*rt), GFP_ATOMIC);
+
+		retval = -ENOBUFS;
+		if (!rt)
+			goto out_unlock;
+		memset(rt, 0, sizeof(*rt));
+
+		rt->next = atalk_routes;
+		atalk_routes = rt;
+	}
+
+	/* Fill in the routing entry */
+	rt->target  = ta->sat_addr;
+	rt->dev     = devhint;
+	rt->flags   = r->rt_flags;
+	rt->gateway = ga->sat_addr;
+
+	retval = 0;
+out_unlock:
+	write_unlock_bh(&atalk_routes_lock);
+out:
+	return retval;
+}
+
+/* Delete a route. Find it and discard it */
+static int atrtr_delete(struct atalk_addr * addr)
+{
+	struct atalk_route **r = &atalk_routes;
+	int retval = 0;
+	struct atalk_route *tmp;
+
+	write_lock_bh(&atalk_routes_lock);
+	while ((tmp = *r) != NULL) {
+		if (tmp->target.s_net == addr->s_net &&
+		    (!(tmp->flags&RTF_GATEWAY) ||
+		     tmp->target.s_node == addr->s_node)) {
+			*r = tmp->next;
+			dev_put(tmp->dev);
+			kfree(tmp);
+			goto out;
+		}
+		r = &tmp->next;
+	}
+	retval = -ENOENT;
+out:
+	write_unlock_bh(&atalk_routes_lock);
+	return retval;
+}
+
+/*
+ * Called when a device is downed. Just throw away any routes
+ * via it.
+ */
+static void atrtr_device_down(struct net_device *dev)
+{
+	struct atalk_route **r = &atalk_routes;
+	struct atalk_route *tmp;
+
+	write_lock_bh(&atalk_routes_lock);
+	while ((tmp = *r) != NULL) {
+		if (tmp->dev == dev) {
+			*r = tmp->next;
+			dev_put(dev);
+			kfree(tmp);
+		} else
+			r = &tmp->next;
+	}
+	write_unlock_bh(&atalk_routes_lock);
+
+	if (atrtr_default.dev == dev)
+		atrtr_set_default(NULL);
+}
+
+/* Actually down the interface */
+static inline void atalk_dev_down(struct net_device *dev)
+{
+	atrtr_device_down(dev);	/* Remove all routes for the device */
+	aarp_device_down(dev);	/* Remove AARP entries for the device */
+	atif_drop_device(dev);	/* Remove the device */
+}
+
+/*
+ * A device event has occurred. Watch for devices going down and
+ * delete our use of them (iface and route).
+ */
+static int ddp_device_event(struct notifier_block *this, unsigned long event,
+			    void *ptr)
+{
+	if (event == NETDEV_DOWN)
+		/* Discard any use of this */
+	        atalk_dev_down(ptr);
+
+	return NOTIFY_DONE;
+}
+
+/* ioctl calls. Shouldn't even need touching */
+/* Device configuration ioctl calls */
+static int atif_ioctl(int cmd, void __user *arg)
+{
+	static char aarp_mcast[6] = { 0x09, 0x00, 0x00, 0xFF, 0xFF, 0xFF };
+	struct ifreq atreq;
+	struct atalk_netrange *nr;
+	struct sockaddr_at *sa;
+	struct net_device *dev;
+	struct atalk_iface *atif;
+	int ct;
+	int limit;
+	struct rtentry rtdef;
+	int add_route;
+
+	if (copy_from_user(&atreq, arg, sizeof(atreq)))
+		return -EFAULT;
+
+	dev = __dev_get_by_name(atreq.ifr_name);
+	if (!dev)
+		return -ENODEV;
+
+	sa = (struct sockaddr_at *)&atreq.ifr_addr;
+	atif = atalk_find_dev(dev);
+
+	switch (cmd) {
+		case SIOCSIFADDR:
+			if (!capable(CAP_NET_ADMIN))
+				return -EPERM;
+			if (sa->sat_family != AF_APPLETALK)
+				return -EINVAL;
+			if (dev->type != ARPHRD_ETHER &&
+			    dev->type != ARPHRD_LOOPBACK &&
+			    dev->type != ARPHRD_LOCALTLK &&
+			    dev->type != ARPHRD_PPP)
+				return -EPROTONOSUPPORT;
+
+			nr = (struct atalk_netrange *)&sa->sat_zero[0];
+			add_route = 1;
+
+			/*
+			 * if this is a point-to-point iface, and we already
+			 * have an iface for this AppleTalk address, then we
+			 * should not add a route
+			 */
+			if ((dev->flags & IFF_POINTOPOINT) &&
+			    atalk_find_interface(sa->sat_addr.s_net,
+				    		 sa->sat_addr.s_node)) {
+				printk(KERN_DEBUG "AppleTalk: point-to-point "
+						  "interface added with "
+						  "existing address\n");
+				add_route = 0;
+			}
+			
+			/*
+			 * Phase 1 is fine on LocalTalk but we don't do
+			 * EtherTalk phase 1. Anyone wanting to add it go ahead.
+			 */
+			if (dev->type == ARPHRD_ETHER && nr->nr_phase != 2)
+				return -EPROTONOSUPPORT;
+			if (sa->sat_addr.s_node == ATADDR_BCAST ||
+			    sa->sat_addr.s_node == 254)
+				return -EINVAL;
+			if (atif) {
+				/* Already setting address */
+				if (atif->status & ATIF_PROBE)
+					return -EBUSY;
+
+				atif->address.s_net  = sa->sat_addr.s_net;
+				atif->address.s_node = sa->sat_addr.s_node;
+				atrtr_device_down(dev);	/* Flush old routes */
+			} else {
+				atif = atif_add_device(dev, &sa->sat_addr);
+				if (!atif)
+					return -ENOMEM;
+			}
+			atif->nets = *nr;
+
+			/*
+			 * Check if the chosen address is used. If so we
+			 * error and atalkd will try another.
+			 */
+
+			if (!(dev->flags & IFF_LOOPBACK) &&
+			    !(dev->flags & IFF_POINTOPOINT) &&
+			    atif_probe_device(atif) < 0) {
+				atif_drop_device(dev);
+				return -EADDRINUSE;
+			}
+
+			/* Hey it worked - add the direct routes */
+			sa = (struct sockaddr_at *)&rtdef.rt_gateway;
+			sa->sat_family = AF_APPLETALK;
+			sa->sat_addr.s_net  = atif->address.s_net;
+			sa->sat_addr.s_node = atif->address.s_node;
+			sa = (struct sockaddr_at *)&rtdef.rt_dst;
+			rtdef.rt_flags = RTF_UP;
+			sa->sat_family = AF_APPLETALK;
+			sa->sat_addr.s_node = ATADDR_ANYNODE;
+			if (dev->flags & IFF_LOOPBACK ||
+			    dev->flags & IFF_POINTOPOINT)
+				rtdef.rt_flags |= RTF_HOST;
+
+			/* Routerless initial state */
+			if (nr->nr_firstnet == htons(0) &&
+			    nr->nr_lastnet == htons(0xFFFE)) {
+				sa->sat_addr.s_net = atif->address.s_net;
+				atrtr_create(&rtdef, dev);
+				atrtr_set_default(dev);
+			} else {
+				limit = ntohs(nr->nr_lastnet);
+				if (limit - ntohs(nr->nr_firstnet) > 4096) {
+					printk(KERN_WARNING "Too many routes/"
+							    "iface.\n");
+					return -EINVAL;
+				}
+				if (add_route)
+					for (ct = ntohs(nr->nr_firstnet);
+					     ct <= limit; ct++) {
+						sa->sat_addr.s_net = htons(ct);
+						atrtr_create(&rtdef, dev);
+					}
+			}
+			dev_mc_add(dev, aarp_mcast, 6, 1);
+			return 0;
+
+		case SIOCGIFADDR:
+			if (!atif)
+				return -EADDRNOTAVAIL;
+
+			sa->sat_family = AF_APPLETALK;
+			sa->sat_addr = atif->address;
+			break;
+
+		case SIOCGIFBRDADDR:
+			if (!atif)
+				return -EADDRNOTAVAIL;
+
+			sa->sat_family = AF_APPLETALK;
+			sa->sat_addr.s_net = atif->address.s_net;
+			sa->sat_addr.s_node = ATADDR_BCAST;
+			break;
+
+	        case SIOCATALKDIFADDR:
+	        case SIOCDIFADDR:
+			if (!capable(CAP_NET_ADMIN))
+				return -EPERM;
+			if (sa->sat_family != AF_APPLETALK)
+				return -EINVAL;
+			atalk_dev_down(dev);
+			break;			
+
+		case SIOCSARP:
+			if (!capable(CAP_NET_ADMIN))
+                                return -EPERM;
+                        if (sa->sat_family != AF_APPLETALK)
+                                return -EINVAL;
+                        if (!atif)
+                                return -EADDRNOTAVAIL;
+
+                        /*
+                         * for now, we only support proxy AARP on ELAP;
+                         * we should be able to do it for LocalTalk, too.
+                         */
+                        if (dev->type != ARPHRD_ETHER)
+                                return -EPROTONOSUPPORT;
+
+                        /*
+                         * atif points to the current interface on this network;
+                         * we aren't concerned about its current status (at
+			 * least for now), but it has all the settings about
+			 * the network we're going to probe. Consequently, it
+			 * must exist.
+                         */
+                        if (!atif)
+                                return -EADDRNOTAVAIL;
+
+                        nr = (struct atalk_netrange *)&(atif->nets);
+                        /*
+                         * Phase 1 is fine on Localtalk but we don't do
+                         * Ethertalk phase 1. Anyone wanting to add it go ahead.
+                         */
+                        if (dev->type == ARPHRD_ETHER && nr->nr_phase != 2)
+                                return -EPROTONOSUPPORT;
+
+                        if (sa->sat_addr.s_node == ATADDR_BCAST ||
+			    sa->sat_addr.s_node == 254)
+                                return -EINVAL;
+
+                        /*
+                         * Check if the chosen address is used. If so we
+                         * error and ATCP will try another.
+                         */
+                      	if (atif_proxy_probe_device(atif, &(sa->sat_addr)) < 0)
+                      		return -EADDRINUSE;
+                      	
+			/*
+                         * We now have an address on the local network, and
+			 * the AARP code will defend it for us until we take it
+			 * down. We don't set up any routes right now, because
+			 * ATCP will install them manually via SIOCADDRT.
+                         */
+                        break;
+
+                case SIOCDARP:
+                        if (!capable(CAP_NET_ADMIN))
+                                return -EPERM;
+                        if (sa->sat_family != AF_APPLETALK)
+                                return -EINVAL;
+                        if (!atif)
+                                return -EADDRNOTAVAIL;
+
+                        /* give to aarp module to remove proxy entry */
+                        aarp_proxy_remove(atif->dev, &(sa->sat_addr));
+                        return 0;
+	}
+
+	return copy_to_user(arg, &atreq, sizeof(atreq)) ? -EFAULT : 0;
+}
+
+/* Routing ioctl() calls */
+static int atrtr_ioctl(unsigned int cmd, void __user *arg)
+{
+	struct rtentry rt;
+
+	if (copy_from_user(&rt, arg, sizeof(rt)))
+		return -EFAULT;
+
+	switch (cmd) {
+		case SIOCDELRT:
+			if (rt.rt_dst.sa_family != AF_APPLETALK)
+				return -EINVAL;
+			return atrtr_delete(&((struct sockaddr_at *)
+						&rt.rt_dst)->sat_addr);
+
+		case SIOCADDRT: {
+			struct net_device *dev = NULL;
+			if (rt.rt_dev) {
+				char name[IFNAMSIZ];
+				if (copy_from_user(name, rt.rt_dev, IFNAMSIZ-1))
+					return -EFAULT;
+				name[IFNAMSIZ-1] = '\0';
+				dev = __dev_get_by_name(name);
+				if (!dev)
+					return -ENODEV;
+			}			
+			return atrtr_create(&rt, dev);
+		}
+	}
+	return -EINVAL;
+}
+
+/**************************************************************************\
+*                                                                          *
+* Handling for system calls applied via the various interfaces to an       *
+* AppleTalk socket object.                                                 *
+*                                                                          *
+\**************************************************************************/
+
+/*
+ * Checksum: This is 'optional'. It's quite likely also a good
+ * candidate for assembler hackery 8)
+ */
+static unsigned long atalk_sum_partial(const unsigned char *data, 
+				       int len, unsigned long sum)
+{
+	/* This ought to be unwrapped neatly. I'll trust gcc for now */
+	while (len--) {
+		sum += *data;
+		sum <<= 1;
+		if (sum & 0x10000) {
+			sum++;
+			sum &= 0xffff;
+		}
+		data++;
+	}
+	return sum;
+}
+
+/*  Checksum skb data --  similar to skb_checksum  */
+static unsigned long atalk_sum_skb(const struct sk_buff *skb, int offset,
+				   int len, unsigned long sum)
+{
+	int start = skb_headlen(skb);
+	int i, copy;
+
+	/* checksum stuff in header space */
+	if ( (copy = start - offset) > 0) {
+		if (copy > len)
+			copy = len;
+		sum = atalk_sum_partial(skb->data + offset, copy, sum);
+		if ( (len -= copy) == 0) 
+			return sum;
+
+		offset += copy;
+	}
+
+	/* checksum stuff in frags */
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+
+		BUG_TRAP(start <= offset + len);
+
+		end = start + skb_shinfo(skb)->frags[i].size;
+		if ((copy = end - offset) > 0) {
+			u8 *vaddr;
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+			if (copy > len)
+				copy = len;
+			vaddr = kmap_skb_frag(frag);
+			sum = atalk_sum_partial(vaddr + frag->page_offset +
+						  offset - start, copy, sum);
+			kunmap_skb_frag(vaddr);
+
+			if (!(len -= copy))
+				return sum;
+			offset += copy;
+		}
+		start = end;
+	}
+
+	if (skb_shinfo(skb)->frag_list) {
+		struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+		for (; list; list = list->next) {
+			int end;
+
+			BUG_TRAP(start <= offset + len);
+
+			end = start + list->len;
+			if ((copy = end - offset) > 0) {
+				if (copy > len)
+					copy = len;
+				sum = atalk_sum_skb(list, offset - start,
+						    copy, sum);
+				if ((len -= copy) == 0)
+					return sum;
+				offset += copy;
+			}
+			start = end;
+		}
+	}
+
+	BUG_ON(len > 0);
+
+	return sum;
+}
+
+static unsigned short atalk_checksum(const struct sk_buff *skb, int len)
+{
+	unsigned long sum;
+
+	/* skip header 4 bytes */
+	sum = atalk_sum_skb(skb, 4, len-4, 0);
+
+	/* Use 0xFFFF for 0. 0 itself means none */
+	return sum ? htons((unsigned short)sum) : 0xFFFF;
+}
+
+static struct proto ddp_proto = {
+	.name	  = "DDP",
+	.owner	  = THIS_MODULE,
+	.obj_size = sizeof(struct atalk_sock),
+};
+
+/*
+ * Create a socket. Initialise the socket, blank the addresses
+ * set the state.
+ */
+static int atalk_create(struct socket *sock, int protocol)
+{
+	struct sock *sk;
+	int rc = -ESOCKTNOSUPPORT;
+
+	/*
+	 * We permit SOCK_DGRAM and RAW is an extension. It is trivial to do
+	 * and gives you the full ELAP frame. Should be handy for CAP 8) 
+	 */
+	if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
+		goto out;
+	rc = -ENOMEM;
+	sk = sk_alloc(PF_APPLETALK, GFP_KERNEL, &ddp_proto, 1);
+	if (!sk)
+		goto out;
+	rc = 0;
+	sock->ops = &atalk_dgram_ops;
+	sock_init_data(sock, sk);
+
+	/* Checksums on by default */
+	sock_set_flag(sk, SOCK_ZAPPED);
+out:
+	return rc;
+}
+
+/* Free a socket. No work needed */
+static int atalk_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+
+	if (sk) {
+		sock_orphan(sk);
+		sock->sk = NULL;
+		atalk_destroy_socket(sk);
+	}
+	return 0;
+}
+
+/**
+ * atalk_pick_and_bind_port - Pick a source port when one is not given
+ * @sk - socket to insert into the tables
+ * @sat - address to search for
+ *
+ * Pick a source port when one is not given. If we can find a suitable free
+ * one, we insert the socket into the tables using it.
+ *
+ * This whole operation must be atomic.
+ */
+static int atalk_pick_and_bind_port(struct sock *sk, struct sockaddr_at *sat)
+{
+	int retval;
+
+	write_lock_bh(&atalk_sockets_lock);
+
+	for (sat->sat_port = ATPORT_RESERVED;
+	     sat->sat_port < ATPORT_LAST;
+	     sat->sat_port++) {
+		struct sock *s;
+		struct hlist_node *node;
+
+		sk_for_each(s, node, &atalk_sockets) {
+			struct atalk_sock *at = at_sk(s);
+
+			if (at->src_net == sat->sat_addr.s_net &&
+			    at->src_node == sat->sat_addr.s_node &&
+			    at->src_port == sat->sat_port)
+				goto try_next_port;
+		}
+
+		/* Wheee, it's free, assign and insert. */
+		__atalk_insert_socket(sk);
+		at_sk(sk)->src_port = sat->sat_port;
+		retval = 0;
+		goto out;
+
+try_next_port:;
+	}
+
+	retval = -EBUSY;
+out:
+	write_unlock_bh(&atalk_sockets_lock);
+	return retval;
+}
+
+static int atalk_autobind(struct sock *sk)
+{
+	struct atalk_sock *at = at_sk(sk);
+	struct sockaddr_at sat;
+	struct atalk_addr *ap = atalk_find_primary();
+	int n = -EADDRNOTAVAIL;
+
+	if (!ap || ap->s_net == htons(ATADDR_ANYNET))
+		goto out;
+
+	at->src_net  = sat.sat_addr.s_net  = ap->s_net;
+	at->src_node = sat.sat_addr.s_node = ap->s_node;
+
+	n = atalk_pick_and_bind_port(sk, &sat);
+	if (!n)
+		sock_reset_flag(sk, SOCK_ZAPPED);
+out:
+	return n;
+}
+
+/* Set the address 'our end' of the connection */
+static int atalk_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+{
+	struct sockaddr_at *addr = (struct sockaddr_at *)uaddr;
+	struct sock *sk = sock->sk;
+	struct atalk_sock *at = at_sk(sk);
+
+	if (!sock_flag(sk, SOCK_ZAPPED) ||
+	    addr_len != sizeof(struct sockaddr_at))
+		return -EINVAL;
+
+	if (addr->sat_family != AF_APPLETALK)
+		return -EAFNOSUPPORT;
+
+	if (addr->sat_addr.s_net == htons(ATADDR_ANYNET)) {
+		struct atalk_addr *ap = atalk_find_primary();
+
+		if (!ap)
+			return -EADDRNOTAVAIL;
+
+		at->src_net  = addr->sat_addr.s_net = ap->s_net;
+		at->src_node = addr->sat_addr.s_node= ap->s_node;
+	} else {
+		if (!atalk_find_interface(addr->sat_addr.s_net,
+					  addr->sat_addr.s_node))
+			return -EADDRNOTAVAIL;
+
+		at->src_net  = addr->sat_addr.s_net;
+		at->src_node = addr->sat_addr.s_node;
+	}
+
+	if (addr->sat_port == ATADDR_ANYPORT) {
+		int n = atalk_pick_and_bind_port(sk, addr);
+
+		if (n < 0)
+			return n;
+	} else {
+		at->src_port = addr->sat_port;
+
+		if (atalk_find_or_insert_socket(sk, addr))
+			return -EADDRINUSE;
+	}
+
+	sock_reset_flag(sk, SOCK_ZAPPED);
+	return 0;
+}
+
+/* Set the address we talk to */
+static int atalk_connect(struct socket *sock, struct sockaddr *uaddr,
+			 int addr_len, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct atalk_sock *at = at_sk(sk);
+	struct sockaddr_at *addr;
+
+	sk->sk_state   = TCP_CLOSE;
+	sock->state = SS_UNCONNECTED;
+
+	if (addr_len != sizeof(*addr))
+		return -EINVAL;
+
+	addr = (struct sockaddr_at *)uaddr;
+
+	if (addr->sat_family != AF_APPLETALK)
+		return -EAFNOSUPPORT;
+
+	if (addr->sat_addr.s_node == ATADDR_BCAST &&
+	    !sock_flag(sk, SOCK_BROADCAST)) {
+#if 1	
+		printk(KERN_WARNING "%s is broken and did not set "
+				    "SO_BROADCAST. It will break when 2.2 is "
+				    "released.\n",
+			current->comm);
+#else
+		return -EACCES;
+#endif			
+	}
+
+	if (sock_flag(sk, SOCK_ZAPPED))
+		if (atalk_autobind(sk) < 0)
+			return -EBUSY;
+
+	if (!atrtr_get_dev(&addr->sat_addr))
+		return -ENETUNREACH;
+
+	at->dest_port = addr->sat_port;
+	at->dest_net  = addr->sat_addr.s_net;
+	at->dest_node = addr->sat_addr.s_node;
+
+	sock->state  = SS_CONNECTED;
+	sk->sk_state = TCP_ESTABLISHED;
+	return 0;
+}
+
+/*
+ * Find the name of an AppleTalk socket. Just copy the right
+ * fields into the sockaddr.
+ */
+static int atalk_getname(struct socket *sock, struct sockaddr *uaddr,
+			 int *uaddr_len, int peer)
+{
+	struct sockaddr_at sat;
+	struct sock *sk = sock->sk;
+	struct atalk_sock *at = at_sk(sk);
+
+	if (sock_flag(sk, SOCK_ZAPPED))
+		if (atalk_autobind(sk) < 0)
+			return -ENOBUFS;
+
+	*uaddr_len = sizeof(struct sockaddr_at);
+
+	if (peer) {
+		if (sk->sk_state != TCP_ESTABLISHED)
+			return -ENOTCONN;
+
+		sat.sat_addr.s_net  = at->dest_net;
+		sat.sat_addr.s_node = at->dest_node;
+		sat.sat_port	    = at->dest_port;
+	} else {
+		sat.sat_addr.s_net  = at->src_net;
+		sat.sat_addr.s_node = at->src_node;
+		sat.sat_port	    = at->src_port;
+	}
+
+	sat.sat_family = AF_APPLETALK;
+	memcpy(uaddr, &sat, sizeof(sat));
+	return 0;
+}
+
+#if defined(CONFIG_IPDDP) || defined(CONFIG_IPDDP_MODULE)
+static __inline__ int is_ip_over_ddp(struct sk_buff *skb)
+{
+        return skb->data[12] == 22;
+}
+
+static int handle_ip_over_ddp(struct sk_buff *skb)
+{
+        struct net_device *dev = __dev_get_by_name("ipddp0");
+	struct net_device_stats *stats;
+
+	/* This needs to be able to handle ipddp"N" devices */
+        if (!dev)
+                return -ENODEV;
+
+        skb->protocol = htons(ETH_P_IP);
+        skb_pull(skb, 13);
+        skb->dev   = dev;
+        skb->h.raw = skb->data;
+
+	stats = dev->priv;
+        stats->rx_packets++;
+        stats->rx_bytes += skb->len + 13;
+        netif_rx(skb);  /* Send the SKB up to a higher place. */
+	return 0;
+}
+#else
+/* make it easy for gcc to optimize this test out, i.e. kill the code */
+#define is_ip_over_ddp(skb) 0
+#define handle_ip_over_ddp(skb) 0
+#endif
+
+static void atalk_route_packet(struct sk_buff *skb, struct net_device *dev,
+			       struct ddpehdr *ddp, struct ddpebits *ddphv,
+			       int origlen)
+{
+	struct atalk_route *rt;
+	struct atalk_addr ta;
+
+	/*
+	 * Don't route multicast, etc., packets, or packets sent to "this
+	 * network" 
+	 */
+	if (skb->pkt_type != PACKET_HOST || !ddp->deh_dnet) {
+		/*
+		 * FIXME:
+		 *
+		 * Can it ever happen that a packet is from a PPP iface and
+		 * needs to be broadcast onto the default network?
+		 */
+		if (dev->type == ARPHRD_PPP)
+			printk(KERN_DEBUG "AppleTalk: didn't forward broadcast "
+					  "packet received from PPP iface\n");
+		goto free_it;
+	}
+
+	ta.s_net  = ddp->deh_dnet;
+	ta.s_node = ddp->deh_dnode;
+
+	/* Route the packet */
+	rt = atrtr_find(&ta);
+	if (!rt || ddphv->deh_hops == DDP_MAXHOPS)
+		goto free_it;
+	/* FIXME: use skb->cb to be able to use shared skbs */
+	ddphv->deh_hops++;
+
+	/*
+	 * Route goes through another gateway, so set the target to the
+	 * gateway instead.
+	 */
+
+	if (rt->flags & RTF_GATEWAY) {
+		ta.s_net  = rt->gateway.s_net;
+		ta.s_node = rt->gateway.s_node;
+	}
+
+        /* Fix up skb->len field */
+        skb_trim(skb, min_t(unsigned int, origlen,
+			    (rt->dev->hard_header_len +
+			     ddp_dl->header_length + ddphv->deh_len)));
+
+	/* Mend the byte order */
+	/* FIXME: use skb->cb to be able to use shared skbs */
+	*((__u16 *)ddp) = ntohs(*((__u16 *)ddphv));
+
+	/*
+	 * Send the buffer onwards
+	 *
+	 * Now we must always be careful. If it's come from LocalTalk to
+	 * EtherTalk it might not fit
+	 *
+	 * Order matters here: If a packet has to be copied to make a new
+	 * headroom (rare hopefully) then it won't need unsharing.
+	 *
+	 * Note. ddp-> becomes invalid at the realloc.
+	 */
+	if (skb_headroom(skb) < 22) {
+		/* 22 bytes - 12 ether, 2 len, 3 802.2 5 snap */
+		struct sk_buff *nskb = skb_realloc_headroom(skb, 32);
+		kfree_skb(skb);
+		if (!nskb) 
+			goto out;
+		skb = nskb;
+	} else
+		skb = skb_unshare(skb, GFP_ATOMIC);
+	
+	/*
+	 * If the buffer didn't vanish into the lack of space bitbucket we can
+	 * send it.
+	 */
+	if (skb && aarp_send_ddp(rt->dev, skb, &ta, NULL) == -1)
+		goto free_it;
+out:
+	return;
+free_it:
+	kfree_skb(skb);
+}
+
+/**
+ *	atalk_rcv - Receive a packet (in skb) from device dev
+ *	@skb - packet received
+ *	@dev - network device where the packet comes from
+ *	@pt - packet type
+ *
+ *	Receive a packet (in skb) from device dev. This has come from the SNAP
+ *	decoder, and on entry skb->h.raw is the DDP header, skb->len is the DDP
+ *	header, skb->len is the DDP length. The physical headers have been
+ *	extracted. PPP should probably pass frames marked as for this layer.
+ *	[ie ARPHRD_ETHERTALK]
+ */
+static int atalk_rcv(struct sk_buff *skb, struct net_device *dev,
+		     struct packet_type *pt)
+{
+	struct ddpehdr *ddp;
+	struct sock *sock;
+	struct atalk_iface *atif;
+	struct sockaddr_at tosat;
+        int origlen;
+        struct ddpebits ddphv;
+
+	/* Don't mangle buffer if shared */
+	if (!(skb = skb_share_check(skb, GFP_ATOMIC))) 
+		goto out;
+		
+	/* Size check and make sure header is contiguous */
+	if (!pskb_may_pull(skb, sizeof(*ddp)))
+		goto freeit;
+
+	ddp = ddp_hdr(skb);
+
+	/*
+	 *	Fix up the length field	[Ok this is horrible but otherwise
+	 *	I end up with unions of bit fields and messy bit field order
+	 *	compiler/endian dependencies..]
+	 */
+	*((__u16 *)&ddphv) = ntohs(*((__u16 *)ddp));
+
+	/* Trim buffer in case of stray trailing data */
+	origlen = skb->len;
+	skb_trim(skb, min_t(unsigned int, skb->len, ddphv.deh_len));
+
+	/*
+	 * Size check to see if ddp->deh_len was crap
+	 * (Otherwise we'll detonate most spectacularly
+	 * in the middle of recvmsg()).
+	 */
+	if (skb->len < sizeof(*ddp))
+		goto freeit;
+
+	/*
+	 * Any checksums. Note we don't do htons() on this == is assumed to be
+	 * valid for net byte orders all over the networking code...
+	 */
+	if (ddp->deh_sum &&
+	    atalk_checksum(skb, ddphv.deh_len) != ddp->deh_sum)
+		/* Not a valid AppleTalk frame - dustbin time */
+		goto freeit;
+
+	/* Check the packet is aimed at us */
+	if (!ddp->deh_dnet)	/* Net 0 is 'this network' */
+		atif = atalk_find_anynet(ddp->deh_dnode, dev);
+	else
+		atif = atalk_find_interface(ddp->deh_dnet, ddp->deh_dnode);
+
+	/* Not ours, so we route the packet via the correct AppleTalk iface */
+	if (!atif) {
+		atalk_route_packet(skb, dev, ddp, &ddphv, origlen);
+		goto out;
+	}
+
+	/* if IP over DDP is not selected this code will be optimized out */
+	if (is_ip_over_ddp(skb))
+		return handle_ip_over_ddp(skb);
+	/*
+	 * Which socket - atalk_search_socket() looks for a *full match*
+	 * of the <net, node, port> tuple.
+	 */
+	tosat.sat_addr.s_net  = ddp->deh_dnet;
+	tosat.sat_addr.s_node = ddp->deh_dnode;
+	tosat.sat_port	      = ddp->deh_dport;
+
+	sock = atalk_search_socket(&tosat, atif);
+	if (!sock) /* But not one of our sockets */
+		goto freeit;
+
+	/* Queue packet (standard) */
+	skb->sk = sock;
+
+	if (sock_queue_rcv_skb(sock, skb) < 0)
+		goto freeit;
+out:
+	return 0;
+freeit:
+	kfree_skb(skb);
+	goto out;
+}
+
+/*
+ * Receive a LocalTalk frame. We make some demands on the caller here.
+ * Caller must provide enough headroom on the packet to pull the short
+ * header and append a long one.
+ */
+static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev,
+			struct packet_type *pt)
+{
+	/* Expand any short form frames */
+	if (skb->mac.raw[2] == 1) {
+		struct ddpehdr *ddp;
+		/* Find our address */
+		struct atalk_addr *ap = atalk_find_dev_addr(dev);
+
+		if (!ap || skb->len < sizeof(struct ddpshdr))
+			goto freeit;
+
+		/* Don't mangle buffer if shared */
+		if (!(skb = skb_share_check(skb, GFP_ATOMIC))) 
+			return 0;
+
+		/*
+		 * The push leaves us with a ddephdr not an shdr, and
+		 * handily the port bytes in the right place preset.
+		 */
+		ddp = (struct ddpehdr *) skb_push(skb, sizeof(*ddp) - 4);
+
+		/* Now fill in the long header */
+
+	 	/*
+	 	 * These two first. The mac overlays the new source/dest
+	 	 * network information so we MUST copy these before
+	 	 * we write the network numbers !
+	 	 */
+
+		ddp->deh_dnode = skb->mac.raw[0];     /* From physical header */
+		ddp->deh_snode = skb->mac.raw[1];     /* From physical header */
+
+		ddp->deh_dnet  = ap->s_net;	/* Network number */
+		ddp->deh_snet  = ap->s_net;
+		ddp->deh_sum   = 0;		/* No checksum */
+		/*
+		 * Not sure about this bit...
+		 */
+		ddp->deh_len   = skb->len;
+		ddp->deh_hops  = DDP_MAXHOPS;	/* Non routable, so force a drop
+						   if we slip up later */
+		/* Mend the byte order */
+		*((__u16 *)ddp) = htons(*((__u16 *)ddp));
+	}
+	skb->h.raw = skb->data;
+
+	return atalk_rcv(skb, dev, pt);
+freeit:
+	kfree_skb(skb);
+	return 0;
+}
+
+static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
+			 size_t len)
+{
+	struct sock *sk = sock->sk;
+	struct atalk_sock *at = at_sk(sk);
+	struct sockaddr_at *usat = (struct sockaddr_at *)msg->msg_name;
+	int flags = msg->msg_flags;
+	int loopback = 0;
+	struct sockaddr_at local_satalk, gsat;
+	struct sk_buff *skb;
+	struct net_device *dev;
+	struct ddpehdr *ddp;
+	int size;
+	struct atalk_route *rt;
+	int err;
+
+	if (flags & ~(MSG_DONTWAIT|MSG_CMSG_COMPAT))
+		return -EINVAL;
+
+	if (len > DDP_MAXSZ)
+		return -EMSGSIZE;
+
+	if (usat) {
+		if (sock_flag(sk, SOCK_ZAPPED))
+			if (atalk_autobind(sk) < 0)
+				return -EBUSY;
+
+		if (msg->msg_namelen < sizeof(*usat) ||
+		    usat->sat_family != AF_APPLETALK)
+			return -EINVAL;
+
+		/* netatalk doesn't implement this check */
+		if (usat->sat_addr.s_node == ATADDR_BCAST &&
+		    !sock_flag(sk, SOCK_BROADCAST)) {
+			printk(KERN_INFO "SO_BROADCAST: Fix your netatalk as "
+					 "it will break before 2.2\n");
+#if 0
+			return -EPERM;
+#endif
+		}
+	} else {
+		if (sk->sk_state != TCP_ESTABLISHED)
+			return -ENOTCONN;
+		usat = &local_satalk;
+		usat->sat_family      = AF_APPLETALK;
+		usat->sat_port	      = at->dest_port;
+		usat->sat_addr.s_node = at->dest_node;
+		usat->sat_addr.s_net  = at->dest_net;
+	}
+
+	/* Build a packet */
+	SOCK_DEBUG(sk, "SK %p: Got address.\n", sk);
+
+	/* For headers */
+	size = sizeof(struct ddpehdr) + len + ddp_dl->header_length;
+
+	if (usat->sat_addr.s_net || usat->sat_addr.s_node == ATADDR_ANYNODE) {
+		rt = atrtr_find(&usat->sat_addr);
+		if (!rt)
+			return -ENETUNREACH;
+
+		dev = rt->dev;
+	} else {
+		struct atalk_addr at_hint;
+
+		at_hint.s_node = 0;
+		at_hint.s_net  = at->src_net;
+
+		rt = atrtr_find(&at_hint);
+		if (!rt)
+			return -ENETUNREACH;
+
+		dev = rt->dev;
+	}
+
+	SOCK_DEBUG(sk, "SK %p: Size needed %d, device %s\n",
+			sk, size, dev->name);
+
+	size += dev->hard_header_len;
+	skb = sock_alloc_send_skb(sk, size, (flags & MSG_DONTWAIT), &err);
+	if (!skb)
+		return err;
+	
+	skb->sk = sk;
+	skb_reserve(skb, ddp_dl->header_length);
+	skb_reserve(skb, dev->hard_header_len);
+	skb->dev = dev;
+
+	SOCK_DEBUG(sk, "SK %p: Begin build.\n", sk);
+
+	ddp = (struct ddpehdr *)skb_put(skb, sizeof(struct ddpehdr));
+	ddp->deh_pad  = 0;
+	ddp->deh_hops = 0;
+	ddp->deh_len  = len + sizeof(*ddp);
+	/*
+	 * Fix up the length field [Ok this is horrible but otherwise
+	 * I end up with unions of bit fields and messy bit field order
+	 * compiler/endian dependencies..
+	 */
+	*((__u16 *)ddp) = ntohs(*((__u16 *)ddp));
+
+	ddp->deh_dnet  = usat->sat_addr.s_net;
+	ddp->deh_snet  = at->src_net;
+	ddp->deh_dnode = usat->sat_addr.s_node;
+	ddp->deh_snode = at->src_node;
+	ddp->deh_dport = usat->sat_port;
+	ddp->deh_sport = at->src_port;
+
+	SOCK_DEBUG(sk, "SK %p: Copy user data (%Zd bytes).\n", sk, len);
+
+	err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
+	if (err) {
+		kfree_skb(skb);
+		return -EFAULT;
+	}
+
+	if (sk->sk_no_check == 1)
+		ddp->deh_sum = 0;
+	else
+		ddp->deh_sum = atalk_checksum(skb, len + sizeof(*ddp));
+
+	/*
+	 * Loopback broadcast packets to non gateway targets (ie routes
+	 * to group we are in)
+	 */
+	if (ddp->deh_dnode == ATADDR_BCAST &&
+	    !(rt->flags & RTF_GATEWAY) && !(dev->flags & IFF_LOOPBACK)) {
+		struct sk_buff *skb2 = skb_copy(skb, GFP_KERNEL);
+
+		if (skb2) {
+			loopback = 1;
+			SOCK_DEBUG(sk, "SK %p: send out(copy).\n", sk);
+			if (aarp_send_ddp(dev, skb2,
+					  &usat->sat_addr, NULL) == -1)
+				kfree_skb(skb2);
+				/* else queued/sent above in the aarp queue */
+		}
+	}
+
+	if (dev->flags & IFF_LOOPBACK || loopback) {
+		SOCK_DEBUG(sk, "SK %p: Loop back.\n", sk);
+		/* loop back */
+		skb_orphan(skb);
+		ddp_dl->request(ddp_dl, skb, dev->dev_addr);
+	} else {
+		SOCK_DEBUG(sk, "SK %p: send out.\n", sk);
+		if (rt->flags & RTF_GATEWAY) {
+		    gsat.sat_addr = rt->gateway;
+		    usat = &gsat;
+		}
+
+		if (aarp_send_ddp(dev, skb, &usat->sat_addr, NULL) == -1)
+			kfree_skb(skb);
+		/* else queued/sent above in the aarp queue */
+	}
+	SOCK_DEBUG(sk, "SK %p: Done write (%Zd).\n", sk, len);
+
+	return len;
+}
+
+static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
+			 size_t size, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct sockaddr_at *sat = (struct sockaddr_at *)msg->msg_name;
+	struct ddpehdr *ddp;
+	int copied = 0;
+	int err = 0;
+        struct ddpebits ddphv;
+	struct sk_buff *skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
+						flags & MSG_DONTWAIT, &err);
+	if (!skb)
+		return err;
+
+	/* FIXME: use skb->cb to be able to use shared skbs */
+	ddp = ddp_hdr(skb);
+	*((__u16 *)&ddphv) = ntohs(*((__u16 *)ddp));
+
+	if (sk->sk_type == SOCK_RAW) {
+		copied = ddphv.deh_len;
+		if (copied > size) {
+			copied = size;
+			msg->msg_flags |= MSG_TRUNC;
+		}
+
+		err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	} else {
+		copied = ddphv.deh_len - sizeof(*ddp);
+		if (copied > size) {
+			copied = size;
+			msg->msg_flags |= MSG_TRUNC;
+		}
+		err = skb_copy_datagram_iovec(skb, sizeof(*ddp),
+					      msg->msg_iov, copied);
+	}
+
+	if (!err) {
+		if (sat) {
+			sat->sat_family      = AF_APPLETALK;
+			sat->sat_port        = ddp->deh_sport;
+			sat->sat_addr.s_node = ddp->deh_snode;
+			sat->sat_addr.s_net  = ddp->deh_snet;
+		}
+		msg->msg_namelen = sizeof(*sat);
+	}
+
+	skb_free_datagram(sk, skb);	/* Free the datagram. */
+	return err ? : copied;
+}
+
+
+/*
+ * AppleTalk ioctl calls.
+ */
+static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	int rc = -EINVAL;
+	struct sock *sk = sock->sk;
+	void __user *argp = (void __user *)arg;
+
+	switch (cmd) {
+		/* Protocol layer */
+		case TIOCOUTQ: {
+			long amount = sk->sk_sndbuf -
+				      atomic_read(&sk->sk_wmem_alloc);
+
+			if (amount < 0)
+				amount = 0;
+			rc = put_user(amount, (int __user *)argp);
+			break;
+		}
+		case TIOCINQ: {
+			/*
+			 * These two are safe on a single CPU system as only
+			 * user tasks fiddle here
+			 */
+			struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
+			long amount = 0;
+
+			if (skb)
+				amount = skb->len - sizeof(struct ddpehdr);
+			rc = put_user(amount, (int __user *)argp);
+			break;
+		}
+		case SIOCGSTAMP:
+			rc = sock_get_timestamp(sk, argp);
+			break;
+		/* Routing */
+		case SIOCADDRT:
+		case SIOCDELRT:
+			rc = -EPERM;
+			if (capable(CAP_NET_ADMIN))
+				rc = atrtr_ioctl(cmd, argp);
+			break;
+		/* Interface */
+		case SIOCGIFADDR:
+		case SIOCSIFADDR:
+		case SIOCGIFBRDADDR:
+		case SIOCATALKDIFADDR:
+		case SIOCDIFADDR:
+		case SIOCSARP:		/* proxy AARP */
+		case SIOCDARP:		/* proxy AARP */
+			rtnl_lock();
+			rc = atif_ioctl(cmd, argp);
+			rtnl_unlock();
+			break;
+		/* Physical layer ioctl calls */
+		case SIOCSIFLINK:
+		case SIOCGIFHWADDR:
+		case SIOCSIFHWADDR:
+		case SIOCGIFFLAGS:
+		case SIOCSIFFLAGS:
+		case SIOCGIFTXQLEN:
+		case SIOCSIFTXQLEN:
+		case SIOCGIFMTU:
+		case SIOCGIFCONF:
+		case SIOCADDMULTI:
+		case SIOCDELMULTI:
+		case SIOCGIFCOUNT:
+		case SIOCGIFINDEX:
+		case SIOCGIFNAME:
+			rc = dev_ioctl(cmd, argp);
+			break;
+	}
+
+	return rc;
+}
+
+static struct net_proto_family atalk_family_ops = {
+	.family		= PF_APPLETALK,
+	.create		= atalk_create,
+	.owner		= THIS_MODULE,
+};
+
+static struct proto_ops SOCKOPS_WRAPPED(atalk_dgram_ops) = {
+	.family		= PF_APPLETALK,
+	.owner		= THIS_MODULE,
+	.release	= atalk_release,
+	.bind		= atalk_bind,
+	.connect	= atalk_connect,
+	.socketpair	= sock_no_socketpair,
+	.accept		= sock_no_accept,
+	.getname	= atalk_getname,
+	.poll		= datagram_poll,
+	.ioctl		= atalk_ioctl,
+	.listen		= sock_no_listen,
+	.shutdown	= sock_no_shutdown,
+	.setsockopt	= sock_no_setsockopt,
+	.getsockopt	= sock_no_getsockopt,
+	.sendmsg	= atalk_sendmsg,
+	.recvmsg	= atalk_recvmsg,
+	.mmap		= sock_no_mmap,
+	.sendpage	= sock_no_sendpage,
+};
+
+#include <linux/smp_lock.h>
+SOCKOPS_WRAP(atalk_dgram, PF_APPLETALK);
+
+static struct notifier_block ddp_notifier = {
+	.notifier_call	= ddp_device_event,
+};
+
+static struct packet_type ltalk_packet_type = {
+	.type		= __constant_htons(ETH_P_LOCALTALK),
+	.func		= ltalk_rcv,
+};
+
+static struct packet_type ppptalk_packet_type = {
+	.type		= __constant_htons(ETH_P_PPPTALK),
+	.func		= atalk_rcv,
+};
+
+static unsigned char ddp_snap_id[] = { 0x08, 0x00, 0x07, 0x80, 0x9B };
+
+/* Export symbols for use by drivers when AppleTalk is a module */
+EXPORT_SYMBOL(aarp_send_ddp);
+EXPORT_SYMBOL(atrtr_get_dev);
+EXPORT_SYMBOL(atalk_find_dev_addr);
+
+static char atalk_err_snap[] __initdata =
+	KERN_CRIT "Unable to register DDP with SNAP.\n";
+
+/* Called by proto.c on kernel start up */
+static int __init atalk_init(void)
+{
+	int rc = proto_register(&ddp_proto, 0);
+
+	if (rc != 0)
+		goto out;
+
+	(void)sock_register(&atalk_family_ops);
+	ddp_dl = register_snap_client(ddp_snap_id, atalk_rcv);
+	if (!ddp_dl)
+		printk(atalk_err_snap);
+
+	dev_add_pack(&ltalk_packet_type);
+	dev_add_pack(&ppptalk_packet_type);
+
+	register_netdevice_notifier(&ddp_notifier);
+	aarp_proto_init();
+	atalk_proc_init();
+	atalk_register_sysctl();
+out:
+	return rc;
+}
+module_init(atalk_init);
+
+/*
+ * No explicit module reference count manipulation is needed in the
+ * protocol. Socket layer sets module reference count for us
+ * and interfaces reference counting is done
+ * by the network device layer.
+ *
+ * Ergo, before the AppleTalk module can be removed, all AppleTalk
+ * sockets be closed from user space.
+ */
+static void __exit atalk_exit(void)
+{
+#ifdef CONFIG_SYSCTL
+	atalk_unregister_sysctl();
+#endif /* CONFIG_SYSCTL */
+	atalk_proc_exit();
+	aarp_cleanup_module();	/* General aarp clean-up. */
+	unregister_netdevice_notifier(&ddp_notifier);
+	dev_remove_pack(&ltalk_packet_type);
+	dev_remove_pack(&ppptalk_packet_type);
+	unregister_snap_client(ddp_dl);
+	sock_unregister(PF_APPLETALK);
+	proto_unregister(&ddp_proto);
+}
+module_exit(atalk_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Alan Cox <Alan.Cox@linux.org>");
+MODULE_DESCRIPTION("AppleTalk 0.20\n");
+MODULE_ALIAS_NETPROTO(PF_APPLETALK);
diff --git a/net/appletalk/dev.c b/net/appletalk/dev.c
new file mode 100644
index 00000000000..76598445d84
--- /dev/null
+++ b/net/appletalk/dev.c
@@ -0,0 +1,43 @@
+/*
+ * Moved here from drivers/net/net_init.c, which is:
+ *	Written 1993,1994,1995 by Donald Becker.
+ */
+
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/if_ltalk.h>
+
+static int ltalk_change_mtu(struct net_device *dev, int mtu)
+{
+	return -EINVAL;
+}
+
+static int ltalk_mac_addr(struct net_device *dev, void *addr)
+{	
+	return -EINVAL;
+}
+
+void ltalk_setup(struct net_device *dev)
+{
+	/* Fill in the fields of the device structure with localtalk-generic values. */
+	
+	dev->change_mtu		= ltalk_change_mtu;
+	dev->hard_header	= NULL;
+	dev->rebuild_header 	= NULL;
+	dev->set_mac_address 	= ltalk_mac_addr;
+	dev->hard_header_cache	= NULL;
+	dev->header_cache_update= NULL;
+
+	dev->type		= ARPHRD_LOCALTLK;
+	dev->hard_header_len 	= LTALK_HLEN;
+	dev->mtu		= LTALK_MTU;
+	dev->addr_len		= LTALK_ALEN;
+	dev->tx_queue_len	= 10;	
+	
+	dev->broadcast[0]	= 0xFF;
+
+	dev->flags		= IFF_BROADCAST|IFF_MULTICAST|IFF_NOARP;
+}
+EXPORT_SYMBOL(ltalk_setup);
diff --git a/net/appletalk/sysctl_net_atalk.c b/net/appletalk/sysctl_net_atalk.c
new file mode 100644
index 00000000000..af7f0604395
--- /dev/null
+++ b/net/appletalk/sysctl_net_atalk.c
@@ -0,0 +1,83 @@
+/*
+ * sysctl_net_atalk.c: sysctl interface to net AppleTalk subsystem.
+ *
+ * Begun April 1, 1996, Mike Shaver.
+ * Added /proc/sys/net/atalk directory entry (empty =) ). [MS]
+ * Dynamic registration, added aarp entries. (5/30/97 Chris Horn)
+ */
+
+#include <linux/config.h>
+#include <linux/sysctl.h>
+#include <net/sock.h>
+#include <linux/atalk.h>
+
+static struct ctl_table atalk_table[] = {
+	{
+		.ctl_name	= NET_ATALK_AARP_EXPIRY_TIME,
+		.procname	= "aarp-expiry-time",
+		.data		= &sysctl_aarp_expiry_time,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+		.strategy	= &sysctl_jiffies,
+	},
+	{
+		.ctl_name	= NET_ATALK_AARP_TICK_TIME,
+		.procname	= "aarp-tick-time",
+		.data		= &sysctl_aarp_tick_time,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+		.strategy	= &sysctl_jiffies,
+	},
+	{
+		.ctl_name	= NET_ATALK_AARP_RETRANSMIT_LIMIT,
+		.procname	= "aarp-retransmit-limit",
+		.data		= &sysctl_aarp_retransmit_limit,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_ATALK_AARP_RESOLVE_TIME,
+		.procname	= "aarp-resolve-time",
+		.data		= &sysctl_aarp_resolve_time,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+		.strategy	= &sysctl_jiffies,
+	},
+	{ 0 },
+};
+
+static struct ctl_table atalk_dir_table[] = {
+	{
+		.ctl_name	= NET_ATALK,
+		.procname	= "appletalk",
+		.mode		= 0555,
+		.child		= atalk_table,
+	},
+	{ 0 },
+};
+
+static struct ctl_table atalk_root_table[] = {
+	{
+		.ctl_name	= CTL_NET,
+		.procname	= "net",
+		.mode		= 0555,
+		.child		= atalk_dir_table,
+	},
+	{ 0 },
+};
+
+static struct ctl_table_header *atalk_table_header;
+
+void atalk_register_sysctl(void)
+{
+	atalk_table_header = register_sysctl_table(atalk_root_table, 1);
+}
+
+void atalk_unregister_sysctl(void)
+{
+	unregister_sysctl_table(atalk_table_header);
+}
diff --git a/net/atm/Makefile b/net/atm/Makefile
new file mode 100644
index 00000000000..d5818751f6b
--- /dev/null
+++ b/net/atm/Makefile
@@ -0,0 +1,18 @@
+#
+# Makefile for the ATM Protocol Families.
+#
+
+atm-y		:= addr.o pvc.o signaling.o svc.o ioctl.o common.o atm_misc.o raw.o resources.o
+mpoa-objs	:= mpc.o mpoa_caches.o mpoa_proc.o
+
+obj-$(CONFIG_ATM) += atm.o
+obj-$(CONFIG_ATM_CLIP) += clip.o
+atm-$(subst m,y,$(CONFIG_ATM_CLIP)) += ipcommon.o
+obj-$(CONFIG_ATM_BR2684) += br2684.o
+atm-$(subst m,y,$(CONFIG_ATM_BR2684)) += ipcommon.o
+atm-$(subst m,y,$(CONFIG_NET_SCH_ATM)) += ipcommon.o
+atm-$(CONFIG_PROC_FS) += proc.o
+
+obj-$(CONFIG_ATM_LANE) += lec.o
+obj-$(CONFIG_ATM_MPOA) += mpoa.o
+obj-$(CONFIG_PPPOATM) += pppoatm.o
diff --git a/net/atm/addr.c b/net/atm/addr.c
new file mode 100644
index 00000000000..1c8867f7f54
--- /dev/null
+++ b/net/atm/addr.c
@@ -0,0 +1,134 @@
+/* net/atm/addr.c - Local ATM address registry */
+
+/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
+
+#include <linux/atm.h>
+#include <linux/atmdev.h>
+#include <linux/sched.h>
+#include <asm/uaccess.h>
+
+#include "signaling.h"
+#include "addr.h"
+
+static int check_addr(struct sockaddr_atmsvc *addr)
+{
+	int i;
+
+	if (addr->sas_family != AF_ATMSVC)
+		return -EAFNOSUPPORT;
+	if (!*addr->sas_addr.pub)
+		return *addr->sas_addr.prv ? 0 : -EINVAL;
+	for (i = 1; i < ATM_E164_LEN + 1; i++)	/* make sure it's \0-terminated */
+		if (!addr->sas_addr.pub[i])
+			return 0;
+	return -EINVAL;
+}
+
+static int identical(struct sockaddr_atmsvc *a, struct sockaddr_atmsvc *b)
+{
+	if (*a->sas_addr.prv)
+		if (memcmp(a->sas_addr.prv, b->sas_addr.prv, ATM_ESA_LEN))
+			return 0;
+	if (!*a->sas_addr.pub)
+		return !*b->sas_addr.pub;
+	if (!*b->sas_addr.pub)
+		return 0;
+	return !strcmp(a->sas_addr.pub, b->sas_addr.pub);
+}
+
+static void notify_sigd(struct atm_dev *dev)
+{
+	struct sockaddr_atmpvc pvc;
+
+	pvc.sap_addr.itf = dev->number;
+	sigd_enq(NULL, as_itf_notify, NULL, &pvc, NULL);
+}
+
+void atm_reset_addr(struct atm_dev *dev)
+{
+	unsigned long flags;
+	struct atm_dev_addr *this, *p;
+
+	spin_lock_irqsave(&dev->lock, flags);
+	list_for_each_entry_safe(this, p, &dev->local, entry)
+	    kfree(this);
+	spin_unlock_irqrestore(&dev->lock, flags);
+	notify_sigd(dev);
+}
+
+int atm_add_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr)
+{
+	unsigned long flags;
+	struct atm_dev_addr *this;
+	int error;
+
+	error = check_addr(addr);
+	if (error)
+		return error;
+	spin_lock_irqsave(&dev->lock, flags);
+	list_for_each_entry(this, &dev->local, entry) {
+		if (identical(&this->addr, addr)) {
+			spin_unlock_irqrestore(&dev->lock, flags);
+			return -EEXIST;
+		}
+	}
+	this = kmalloc(sizeof(struct atm_dev_addr), GFP_ATOMIC);
+	if (!this) {
+		spin_unlock_irqrestore(&dev->lock, flags);
+		return -ENOMEM;
+	}
+	this->addr = *addr;
+	list_add(&this->entry, &dev->local);
+	spin_unlock_irqrestore(&dev->lock, flags);
+	notify_sigd(dev);
+	return 0;
+}
+
+int atm_del_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr)
+{
+	unsigned long flags;
+	struct atm_dev_addr *this;
+	int error;
+
+	error = check_addr(addr);
+	if (error)
+		return error;
+	spin_lock_irqsave(&dev->lock, flags);
+	list_for_each_entry(this, &dev->local, entry) {
+		if (identical(&this->addr, addr)) {
+			list_del(&this->entry);
+			spin_unlock_irqrestore(&dev->lock, flags);
+			kfree(this);
+			notify_sigd(dev);
+			return 0;
+		}
+	}
+	spin_unlock_irqrestore(&dev->lock, flags);
+	return -ENOENT;
+}
+
+int atm_get_addr(struct atm_dev *dev, struct sockaddr_atmsvc __user * buf,
+		 size_t size)
+{
+	unsigned long flags;
+	struct atm_dev_addr *this;
+	int total = 0, error;
+	struct sockaddr_atmsvc *tmp_buf, *tmp_bufp;
+
+	spin_lock_irqsave(&dev->lock, flags);
+	list_for_each_entry(this, &dev->local, entry)
+	    total += sizeof(struct sockaddr_atmsvc);
+	tmp_buf = tmp_bufp = kmalloc(total, GFP_ATOMIC);
+	if (!tmp_buf) {
+		spin_unlock_irqrestore(&dev->lock, flags);
+		return -ENOMEM;
+	}
+	list_for_each_entry(this, &dev->local, entry)
+	    memcpy(tmp_bufp++, &this->addr, sizeof(struct sockaddr_atmsvc));
+	spin_unlock_irqrestore(&dev->lock, flags);
+	error = total > size ? -E2BIG : total;
+	if (copy_to_user(buf, tmp_buf, total < size ? total : size))
+		error = -EFAULT;
+	kfree(tmp_buf);
+	return error;
+}
diff --git a/net/atm/addr.h b/net/atm/addr.h
new file mode 100644
index 00000000000..3099d21feea
--- /dev/null
+++ b/net/atm/addr.h
@@ -0,0 +1,18 @@
+/* net/atm/addr.h - Local ATM address registry */
+
+/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
+
+
+#ifndef NET_ATM_ADDR_H
+#define NET_ATM_ADDR_H
+
+#include <linux/atm.h>
+#include <linux/atmdev.h>
+
+
+void atm_reset_addr(struct atm_dev *dev);
+int atm_add_addr(struct atm_dev *dev,struct sockaddr_atmsvc *addr);
+int atm_del_addr(struct atm_dev *dev,struct sockaddr_atmsvc *addr);
+int atm_get_addr(struct atm_dev *dev,struct sockaddr_atmsvc __user *buf,size_t size);
+
+#endif
diff --git a/net/atm/atm_misc.c b/net/atm/atm_misc.c
new file mode 100644
index 00000000000..b2113c3454a
--- /dev/null
+++ b/net/atm/atm_misc.c
@@ -0,0 +1,106 @@
+/* net/atm/atm_misc.c - Various functions for use by ATM drivers */
+
+/* Written 1995-2000 by Werner Almesberger, EPFL ICA */
+
+
+#include <linux/module.h>
+#include <linux/atm.h>
+#include <linux/atmdev.h>
+#include <linux/skbuff.h>
+#include <linux/sonet.h>
+#include <linux/bitops.h>
+#include <asm/atomic.h>
+#include <asm/errno.h>
+
+
+int atm_charge(struct atm_vcc *vcc,int truesize)
+{
+	atm_force_charge(vcc,truesize);
+	if (atomic_read(&sk_atm(vcc)->sk_rmem_alloc) <= sk_atm(vcc)->sk_rcvbuf)
+		return 1;
+	atm_return(vcc,truesize);
+	atomic_inc(&vcc->stats->rx_drop);
+	return 0;
+}
+
+
+struct sk_buff *atm_alloc_charge(struct atm_vcc *vcc,int pdu_size,
+    int gfp_flags)
+{
+	struct sock *sk = sk_atm(vcc);
+	int guess = atm_guess_pdu2truesize(pdu_size);
+
+	atm_force_charge(vcc,guess);
+	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) {
+		struct sk_buff *skb = alloc_skb(pdu_size,gfp_flags);
+
+		if (skb) {
+			atomic_add(skb->truesize-guess,
+				   &sk->sk_rmem_alloc);
+			return skb;
+		}
+	}
+	atm_return(vcc,guess);
+	atomic_inc(&vcc->stats->rx_drop);
+	return NULL;
+}
+
+
+/*
+ * atm_pcr_goal returns the positive PCR if it should be rounded up, the
+ * negative PCR if it should be rounded down, and zero if the maximum available
+ * bandwidth should be used.
+ *
+ * The rules are as follows (* = maximum, - = absent (0), x = value "x",
+ * (x+ = x or next value above x, x- = x or next value below):
+ *
+ *	min max pcr	result		min max pcr	result
+ *	-   -   -	* (UBR only)	x   -   -	x+
+ *	-   -   *	*		x   -   *	*
+ *	-   -   z	z-		x   -   z	z-
+ *	-   *   -	*		x   *   -	x+
+ *	-   *   *	*		x   *   *	*
+ *	-   *   z	z-		x   *   z	z-
+ *	-   y   -	y-		x   y   -	x+
+ *	-   y   *	y-		x   y   *	y-
+ *	-   y   z	z-		x   y   z	z-
+ *
+ * All non-error cases can be converted with the following simple set of rules:
+ *
+ *   if pcr == z then z-
+ *   else if min == x && pcr == - then x+
+ *     else if max == y then y-
+ *	 else *
+ */
+
+
+int atm_pcr_goal(struct atm_trafprm *tp)
+{
+	if (tp->pcr && tp->pcr != ATM_MAX_PCR) return -tp->pcr;
+	if (tp->min_pcr && !tp->pcr) return tp->min_pcr;
+	if (tp->max_pcr != ATM_MAX_PCR) return -tp->max_pcr;
+	return 0;
+}
+
+
+void sonet_copy_stats(struct k_sonet_stats *from,struct sonet_stats *to)
+{
+#define __HANDLE_ITEM(i) to->i = atomic_read(&from->i)
+	__SONET_ITEMS
+#undef __HANDLE_ITEM
+}
+
+
+void sonet_subtract_stats(struct k_sonet_stats *from,struct sonet_stats *to)
+{
+#define __HANDLE_ITEM(i) atomic_sub(to->i,&from->i)
+	__SONET_ITEMS
+#undef __HANDLE_ITEM
+}
+
+
+EXPORT_SYMBOL(atm_charge);
+EXPORT_SYMBOL(atm_alloc_charge);
+EXPORT_SYMBOL(atm_pcr_goal);
+EXPORT_SYMBOL(sonet_copy_stats);
+EXPORT_SYMBOL(sonet_subtract_stats);
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
new file mode 100644
index 00000000000..e6954cf1459
--- /dev/null
+++ b/net/atm/br2684.c
@@ -0,0 +1,824 @@
+/*
+Experimental ethernet netdevice using ATM AAL5 as underlying carrier
+(RFC1483 obsoleted by RFC2684) for Linux 2.4
+Author: Marcell GAL, 2000, XDSL Ltd, Hungary
+*/
+
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/etherdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/ip.h>
+#include <asm/uaccess.h>
+#include <net/arp.h>
+#include <linux/atm.h>
+#include <linux/atmdev.h>
+#include <linux/seq_file.h>
+
+#include <linux/atmbr2684.h>
+
+#include "common.h"
+#include "ipcommon.h"
+
+/*
+ * Define this to use a version of the code which interacts with the higher
+ * layers in a more intellegent way, by always reserving enough space for
+ * our header at the begining of the packet.  However, there may still be
+ * some problems with programs like tcpdump.  In 2.5 we'll sort out what
+ * we need to do to get this perfect.  For now we just will copy the packet
+ * if we need space for the header
+ */
+/* #define FASTER_VERSION */
+
+#ifdef DEBUG
+#define DPRINTK(format, args...) printk(KERN_DEBUG "br2684: " format, ##args)
+#else
+#define DPRINTK(format, args...)
+#endif
+
+#ifdef SKB_DEBUG
+static void skb_debug(const struct sk_buff *skb)
+{
+#define NUM2PRINT 50
+	char buf[NUM2PRINT * 3 + 1];	/* 3 chars per byte */
+	int i = 0;
+	for (i = 0; i < skb->len && i < NUM2PRINT; i++) {
+		sprintf(buf + i * 3, "%2.2x ", 0xff & skb->data[i]);
+	}
+	printk(KERN_DEBUG "br2684: skb: %s\n", buf);
+}
+#else
+#define skb_debug(skb)	do {} while (0)
+#endif
+
+static unsigned char llc_oui_pid_pad[] =
+    { 0xAA, 0xAA, 0x03, 0x00, 0x80, 0xC2, 0x00, 0x07, 0x00, 0x00 };
+#define PADLEN	(2)
+
+enum br2684_encaps {
+	e_vc  = BR2684_ENCAPS_VC,
+	e_llc = BR2684_ENCAPS_LLC,
+};
+
+struct br2684_vcc {
+	struct atm_vcc  *atmvcc;
+	struct net_device *device;
+	/* keep old push,pop functions for chaining */
+	void (*old_push)(struct atm_vcc *vcc,struct sk_buff *skb);
+	/* void (*old_pop)(struct atm_vcc *vcc,struct sk_buff *skb); */
+	enum br2684_encaps encaps;
+	struct list_head brvccs;
+#ifdef CONFIG_ATM_BR2684_IPFILTER
+	struct br2684_filter filter;
+#endif /* CONFIG_ATM_BR2684_IPFILTER */
+#ifndef FASTER_VERSION
+	unsigned copies_needed, copies_failed;
+#endif /* FASTER_VERSION */
+};
+
+struct br2684_dev {
+	struct net_device *net_dev;
+	struct list_head br2684_devs;
+	int number;
+	struct list_head brvccs; /* one device <=> one vcc (before xmas) */
+	struct net_device_stats stats;
+	int mac_was_set;
+};
+
+/*
+ * This lock should be held for writing any time the list of devices or
+ * their attached vcc's could be altered.  It should be held for reading
+ * any time these are being queried.  Note that we sometimes need to
+ * do read-locking under interrupt context, so write locking must block
+ * the current CPU's interrupts
+ */
+static DEFINE_RWLOCK(devs_lock);
+
+static LIST_HEAD(br2684_devs);
+
+static inline struct br2684_dev *BRPRIV(const struct net_device *net_dev)
+{
+	return (struct br2684_dev *) net_dev->priv;
+}
+
+static inline struct net_device *list_entry_brdev(const struct list_head *le)
+{
+	return list_entry(le, struct br2684_dev, br2684_devs)->net_dev;
+}
+
+static inline struct br2684_vcc *BR2684_VCC(const struct atm_vcc *atmvcc)
+{
+	return (struct br2684_vcc *) (atmvcc->user_back);
+}
+
+static inline struct br2684_vcc *list_entry_brvcc(const struct list_head *le)
+{
+	return list_entry(le, struct br2684_vcc, brvccs);
+}
+
+/* Caller should hold read_lock(&devs_lock) */
+static struct net_device *br2684_find_dev(const struct br2684_if_spec *s)
+{
+	struct list_head *lh;
+	struct net_device *net_dev;
+	switch (s->method) {
+	case BR2684_FIND_BYNUM:
+		list_for_each(lh, &br2684_devs) {
+			net_dev = list_entry_brdev(lh);
+			if (BRPRIV(net_dev)->number == s->spec.devnum)
+				return net_dev;
+		}
+		break;
+	case BR2684_FIND_BYIFNAME:
+		list_for_each(lh, &br2684_devs) {
+			net_dev = list_entry_brdev(lh);
+			if (!strncmp(net_dev->name, s->spec.ifname, IFNAMSIZ))
+				return net_dev;
+		}
+		break;
+	}
+	return NULL;
+}
+
+/*
+ * Send a packet out a particular vcc.  Not to useful right now, but paves
+ * the way for multiple vcc's per itf.  Returns true if we can send,
+ * otherwise false
+ */
+static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev,
+	struct br2684_vcc *brvcc)
+{
+	struct atm_vcc *atmvcc;
+#ifdef FASTER_VERSION
+	if (brvcc->encaps == e_llc)
+		memcpy(skb_push(skb, 8), llc_oui_pid_pad, 8);
+	/* last 2 bytes of llc_oui_pid_pad are managed by header routines;
+	   yes, you got it: 8 + 2 = sizeof(llc_oui_pid_pad)
+	 */
+#else
+	int minheadroom = (brvcc->encaps == e_llc) ? 10 : 2;
+	if (skb_headroom(skb) < minheadroom) {
+		struct sk_buff *skb2 = skb_realloc_headroom(skb, minheadroom);
+		brvcc->copies_needed++;
+		dev_kfree_skb(skb);
+		if (skb2 == NULL) {
+			brvcc->copies_failed++;
+			return 0;
+		}
+		skb = skb2;
+	}
+	skb_push(skb, minheadroom);
+	if (brvcc->encaps == e_llc)
+		memcpy(skb->data, llc_oui_pid_pad, 10);
+	else
+		memset(skb->data, 0, 2);
+#endif /* FASTER_VERSION */
+	skb_debug(skb);
+
+	ATM_SKB(skb)->vcc = atmvcc = brvcc->atmvcc;
+	DPRINTK("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, atmvcc, atmvcc->dev);
+	if (!atm_may_send(atmvcc, skb->truesize)) {
+		/* we free this here for now, because we cannot know in a higher 
+			layer whether the skb point it supplied wasn't freed yet.
+			now, it always is.
+		*/
+		dev_kfree_skb(skb);
+		return 0;
+		}
+	atomic_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc);
+	ATM_SKB(skb)->atm_options = atmvcc->atm_options;
+	brdev->stats.tx_packets++;
+	brdev->stats.tx_bytes += skb->len;
+	atmvcc->send(atmvcc, skb);
+	return 1;
+}
+
+static inline struct br2684_vcc *pick_outgoing_vcc(struct sk_buff *skb,
+	struct br2684_dev *brdev)
+{
+	return list_empty(&brdev->brvccs) ? NULL :
+	    list_entry_brvcc(brdev->brvccs.next); /* 1 vcc/dev right now */
+}
+
+static int br2684_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct br2684_dev *brdev = BRPRIV(dev);
+	struct br2684_vcc *brvcc;
+
+	DPRINTK("br2684_start_xmit, skb->dst=%p\n", skb->dst);
+	read_lock(&devs_lock);
+	brvcc = pick_outgoing_vcc(skb, brdev);
+	if (brvcc == NULL) {
+		DPRINTK("no vcc attached to dev %s\n", dev->name);
+		brdev->stats.tx_errors++;
+		brdev->stats.tx_carrier_errors++;
+		/* netif_stop_queue(dev); */
+		dev_kfree_skb(skb);
+		read_unlock(&devs_lock);
+		return -EUNATCH;
+	}
+	if (!br2684_xmit_vcc(skb, brdev, brvcc)) {
+		/*
+		 * We should probably use netif_*_queue() here, but that
+		 * involves added complication.  We need to walk before
+		 * we can run
+		 */
+		/* don't free here! this pointer might be no longer valid!
+		dev_kfree_skb(skb);
+		*/
+		brdev->stats.tx_errors++;
+		brdev->stats.tx_fifo_errors++;
+	}
+	read_unlock(&devs_lock);
+	return 0;
+}
+
+static struct net_device_stats *br2684_get_stats(struct net_device *dev)
+{
+	DPRINTK("br2684_get_stats\n");
+	return &BRPRIV(dev)->stats;
+}
+
+#ifdef FASTER_VERSION
+/*
+ * These mirror eth_header and eth_header_cache.  They are not usually
+ * exported for use in modules, so we grab them from net_device
+ * after ether_setup() is done with it.  Bit of a hack.
+ */
+static int (*my_eth_header)(struct sk_buff *, struct net_device *,
+	unsigned short, void *, void *, unsigned);
+static int (*my_eth_header_cache)(struct neighbour *, struct hh_cache *);
+
+static int
+br2684_header(struct sk_buff *skb, struct net_device *dev,
+	      unsigned short type, void *daddr, void *saddr, unsigned len)
+{
+	u16 *pad_before_eth;
+	int t = my_eth_header(skb, dev, type, daddr, saddr, len);
+	if (t > 0) {
+		pad_before_eth = (u16 *) skb_push(skb, 2);
+		*pad_before_eth = 0;
+		return dev->hard_header_len;	/* or return 16; ? */
+	} else
+		return t;
+}
+
+static int
+br2684_header_cache(struct neighbour *neigh, struct hh_cache *hh)
+{
+/* hh_data is 16 bytes long. if encaps is ether-llc we need 24, so
+xmit will add the additional header part in that case */
+	u16 *pad_before_eth = (u16 *)(hh->hh_data);
+	int t = my_eth_header_cache(neigh, hh);
+	DPRINTK("br2684_header_cache, neigh=%p, hh_cache=%p\n", neigh, hh);
+	if (t < 0)
+		return t;
+	else {
+		*pad_before_eth = 0;
+		hh->hh_len = PADLEN + ETH_HLEN;
+	}
+	return 0;
+}
+
+/*
+ * This is similar to eth_type_trans, which cannot be used because of
+ * our dev->hard_header_len
+ */
+static inline unsigned short br_type_trans(struct sk_buff *skb,
+					       struct net_device *dev)
+{
+	struct ethhdr *eth;
+	unsigned char *rawp;
+	eth = eth_hdr(skb);
+
+	if (*eth->h_dest & 1) {
+		if (memcmp(eth->h_dest, dev->broadcast, ETH_ALEN) == 0)
+			skb->pkt_type = PACKET_BROADCAST;
+		else
+			skb->pkt_type = PACKET_MULTICAST;
+	}
+
+	else if (memcmp(eth->h_dest, dev->dev_addr, ETH_ALEN))
+		skb->pkt_type = PACKET_OTHERHOST;
+
+	if (ntohs(eth->h_proto) >= 1536)
+		return eth->h_proto;
+
+	rawp = skb->data;
+
+	/*
+	 * This is a magic hack to spot IPX packets. Older Novell breaks
+	 * the protocol design and runs IPX over 802.3 without an 802.2 LLC
+	 * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
+	 * won't work for fault tolerant netware but does for the rest.
+	 */
+	if (*(unsigned short *) rawp == 0xFFFF)
+		return htons(ETH_P_802_3);
+
+	/*
+	 * Real 802.2 LLC
+	 */
+	return htons(ETH_P_802_2);
+}
+#endif /* FASTER_VERSION */
+
+/*
+ * We remember when the MAC gets set, so we don't override it later with
+ * the ESI of the ATM card of the first VC
+ */
+static int (*my_eth_mac_addr)(struct net_device *, void *);
+static int br2684_mac_addr(struct net_device *dev, void *p)
+{
+	int err = my_eth_mac_addr(dev, p);
+	if (!err)
+		BRPRIV(dev)->mac_was_set = 1;
+	return err;
+}
+
+#ifdef CONFIG_ATM_BR2684_IPFILTER
+/* this IOCTL is experimental. */
+static int br2684_setfilt(struct atm_vcc *atmvcc, void __user *arg)
+{
+	struct br2684_vcc *brvcc;
+	struct br2684_filter_set fs;
+
+	if (copy_from_user(&fs, arg, sizeof fs))
+		return -EFAULT;
+	if (fs.ifspec.method != BR2684_FIND_BYNOTHING) {
+		/*
+		 * This is really a per-vcc thing, but we can also search
+		 * by device
+		 */
+		struct br2684_dev *brdev;
+		read_lock(&devs_lock);
+		brdev = BRPRIV(br2684_find_dev(&fs.ifspec));
+		if (brdev == NULL || list_empty(&brdev->brvccs) ||
+		    brdev->brvccs.next != brdev->brvccs.prev)  /* >1 VCC */
+			brvcc = NULL;
+		else
+			brvcc = list_entry_brvcc(brdev->brvccs.next);
+		read_unlock(&devs_lock);
+		if (brvcc == NULL)
+			return -ESRCH;
+	} else
+		brvcc = BR2684_VCC(atmvcc);
+	memcpy(&brvcc->filter, &fs.filter, sizeof(brvcc->filter));
+	return 0;
+}
+
+/* Returns 1 if packet should be dropped */
+static inline int
+packet_fails_filter(u16 type, struct br2684_vcc *brvcc, struct sk_buff *skb)
+{
+	if (brvcc->filter.netmask == 0)
+		return 0;			/* no filter in place */
+	if (type == __constant_htons(ETH_P_IP) &&
+	    (((struct iphdr *) (skb->data))->daddr & brvcc->filter.
+	     netmask) == brvcc->filter.prefix)
+		return 0;
+	if (type == __constant_htons(ETH_P_ARP))
+		return 0;
+	/* TODO: we should probably filter ARPs too.. don't want to have
+	 *   them returning values that don't make sense, or is that ok?
+	 */
+	return 1;		/* drop */
+}
+#endif /* CONFIG_ATM_BR2684_IPFILTER */
+
+static void br2684_close_vcc(struct br2684_vcc *brvcc)
+{
+	DPRINTK("removing VCC %p from dev %p\n", brvcc, brvcc->device);
+	write_lock_irq(&devs_lock);
+	list_del(&brvcc->brvccs);
+	write_unlock_irq(&devs_lock);
+	brvcc->atmvcc->user_back = NULL;	/* what about vcc->recvq ??? */
+	brvcc->old_push(brvcc->atmvcc, NULL);	/* pass on the bad news */
+	kfree(brvcc);
+	module_put(THIS_MODULE);
+}
+
+/* when AAL5 PDU comes in: */
+static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
+{
+	struct br2684_vcc *brvcc = BR2684_VCC(atmvcc);
+	struct net_device *net_dev = brvcc->device;
+	struct br2684_dev *brdev = BRPRIV(net_dev);
+	int plen = sizeof(llc_oui_pid_pad) + ETH_HLEN;
+
+	DPRINTK("br2684_push\n");
+
+	if (unlikely(skb == NULL)) {
+		/* skb==NULL means VCC is being destroyed */
+		br2684_close_vcc(brvcc);
+		if (list_empty(&brdev->brvccs)) {
+			read_lock(&devs_lock);
+			list_del(&brdev->br2684_devs);
+			read_unlock(&devs_lock);
+			unregister_netdev(net_dev);
+			free_netdev(net_dev);
+		}
+		return;
+	}
+
+	skb_debug(skb);
+	atm_return(atmvcc, skb->truesize);
+	DPRINTK("skb from brdev %p\n", brdev);
+	if (brvcc->encaps == e_llc) {
+		/* let us waste some time for checking the encapsulation.
+		   Note, that only 7 char is checked so frames with a valid FCS
+		   are also accepted (but FCS is not checked of course) */
+		if (memcmp(skb->data, llc_oui_pid_pad, 7)) {
+			brdev->stats.rx_errors++;
+			dev_kfree_skb(skb);
+			return;
+		}
+
+		/* Strip FCS if present */
+		if (skb->len > 7 && skb->data[7] == 0x01)
+			__skb_trim(skb, skb->len - 4);
+	} else {
+		plen = PADLEN + ETH_HLEN;	/* pad, dstmac,srcmac, ethtype */
+		/* first 2 chars should be 0 */
+		if (*((u16 *) (skb->data)) != 0) {
+			brdev->stats.rx_errors++;
+			dev_kfree_skb(skb);
+			return;
+		}
+	}
+	if (skb->len < plen) {
+		brdev->stats.rx_errors++;
+		dev_kfree_skb(skb);	/* dev_ not needed? */
+		return;
+	}
+
+#ifdef FASTER_VERSION
+	/* FIXME: tcpdump shows that pointer to mac header is 2 bytes earlier,
+	   than should be. What else should I set? */
+	skb_pull(skb, plen);
+	skb->mac.raw = ((char *) (skb->data)) - ETH_HLEN;
+	skb->pkt_type = PACKET_HOST;
+#ifdef CONFIG_BR2684_FAST_TRANS
+	skb->protocol = ((u16 *) skb->data)[-1];
+#else				/* some protocols might require this: */
+	skb->protocol = br_type_trans(skb, net_dev);
+#endif /* CONFIG_BR2684_FAST_TRANS */
+#else
+	skb_pull(skb, plen - ETH_HLEN);
+	skb->protocol = eth_type_trans(skb, net_dev);
+#endif /* FASTER_VERSION */
+#ifdef CONFIG_ATM_BR2684_IPFILTER
+	if (unlikely(packet_fails_filter(skb->protocol, brvcc, skb))) {
+		brdev->stats.rx_dropped++;
+		dev_kfree_skb(skb);
+		return;
+	}
+#endif /* CONFIG_ATM_BR2684_IPFILTER */
+	skb->dev = net_dev;
+	ATM_SKB(skb)->vcc = atmvcc;	/* needed ? */
+	DPRINTK("received packet's protocol: %x\n", ntohs(skb->protocol));
+	skb_debug(skb);
+	if (unlikely(!(net_dev->flags & IFF_UP))) {
+		/* sigh, interface is down */
+		brdev->stats.rx_dropped++;
+		dev_kfree_skb(skb);
+		return;
+	}
+	brdev->stats.rx_packets++;
+	brdev->stats.rx_bytes += skb->len;
+	memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data));
+	netif_rx(skb);
+}
+
+static int br2684_regvcc(struct atm_vcc *atmvcc, void __user *arg)
+{
+/* assign a vcc to a dev
+Note: we do not have explicit unassign, but look at _push()
+*/
+	int err;
+	struct br2684_vcc *brvcc;
+	struct sk_buff_head copy;
+	struct sk_buff *skb;
+	struct br2684_dev *brdev;
+	struct net_device *net_dev;
+	struct atm_backend_br2684 be;
+
+	if (copy_from_user(&be, arg, sizeof be))
+		return -EFAULT;
+	brvcc = kmalloc(sizeof(struct br2684_vcc), GFP_KERNEL);
+	if (!brvcc)
+		return -ENOMEM;
+	memset(brvcc, 0, sizeof(struct br2684_vcc));
+	write_lock_irq(&devs_lock);
+	net_dev = br2684_find_dev(&be.ifspec);
+	if (net_dev == NULL) {
+		printk(KERN_ERR
+		    "br2684: tried to attach to non-existant device\n");
+		err = -ENXIO;
+		goto error;
+	}
+	brdev = BRPRIV(net_dev);
+	if (atmvcc->push == NULL) {
+		err = -EBADFD;
+		goto error;
+	}
+	if (!list_empty(&brdev->brvccs)) {
+		/* Only 1 VCC/dev right now */
+		err = -EEXIST;
+		goto error;
+	}
+	if (be.fcs_in != BR2684_FCSIN_NO || be.fcs_out != BR2684_FCSOUT_NO ||
+	    be.fcs_auto || be.has_vpiid || be.send_padding || (be.encaps !=
+	    BR2684_ENCAPS_VC && be.encaps != BR2684_ENCAPS_LLC) ||
+	    be.min_size != 0) {
+		err = -EINVAL;
+		goto error;
+	}
+	DPRINTK("br2684_regvcc vcc=%p, encaps=%d, brvcc=%p\n", atmvcc, be.encaps,
+		brvcc);
+	if (list_empty(&brdev->brvccs) && !brdev->mac_was_set) {
+		unsigned char *esi = atmvcc->dev->esi;
+		if (esi[0] | esi[1] | esi[2] | esi[3] | esi[4] | esi[5])
+			memcpy(net_dev->dev_addr, esi, net_dev->addr_len);
+		else
+			net_dev->dev_addr[2] = 1;
+	}
+	list_add(&brvcc->brvccs, &brdev->brvccs);
+	write_unlock_irq(&devs_lock);
+	brvcc->device = net_dev;
+	brvcc->atmvcc = atmvcc;
+	atmvcc->user_back = brvcc;
+	brvcc->encaps = (enum br2684_encaps) be.encaps;
+	brvcc->old_push = atmvcc->push;
+	barrier();
+	atmvcc->push = br2684_push;
+	skb_queue_head_init(&copy);
+	skb_migrate(&sk_atm(atmvcc)->sk_receive_queue, &copy);
+	while ((skb = skb_dequeue(&copy)) != NULL) {
+		BRPRIV(skb->dev)->stats.rx_bytes -= skb->len;
+		BRPRIV(skb->dev)->stats.rx_packets--;
+		br2684_push(atmvcc, skb);
+	}
+	__module_get(THIS_MODULE);
+	return 0;
+    error:
+	write_unlock_irq(&devs_lock);
+	kfree(brvcc);
+	return err;
+}
+
+static void br2684_setup(struct net_device *netdev)
+{
+	struct br2684_dev *brdev = BRPRIV(netdev);
+
+	ether_setup(netdev);
+	brdev->net_dev = netdev;
+
+#ifdef FASTER_VERSION
+	my_eth_header = netdev->hard_header;
+	netdev->hard_header = br2684_header;
+	my_eth_header_cache = netdev->hard_header_cache;
+	netdev->hard_header_cache = br2684_header_cache;
+	netdev->hard_header_len = sizeof(llc_oui_pid_pad) + ETH_HLEN;	/* 10 + 14 */
+#endif
+	my_eth_mac_addr = netdev->set_mac_address;
+	netdev->set_mac_address = br2684_mac_addr;
+	netdev->hard_start_xmit = br2684_start_xmit;
+	netdev->get_stats = br2684_get_stats;
+
+	INIT_LIST_HEAD(&brdev->brvccs);
+}
+
+static int br2684_create(void __user *arg)
+{
+	int err;
+	struct net_device *netdev;
+	struct br2684_dev *brdev;
+	struct atm_newif_br2684 ni;
+
+	DPRINTK("br2684_create\n");
+
+	if (copy_from_user(&ni, arg, sizeof ni)) {
+		return -EFAULT;
+	}
+	if (ni.media != BR2684_MEDIA_ETHERNET || ni.mtu != 1500) {
+		return -EINVAL;
+	}
+
+	netdev = alloc_netdev(sizeof(struct br2684_dev),
+			      ni.ifname[0] ? ni.ifname : "nas%d",
+			      br2684_setup);
+	if (!netdev)
+		return -ENOMEM;
+
+	brdev = BRPRIV(netdev);
+
+	DPRINTK("registered netdev %s\n", netdev->name);
+	/* open, stop, do_ioctl ? */
+	err = register_netdev(netdev);
+	if (err < 0) {
+		printk(KERN_ERR "br2684_create: register_netdev failed\n");
+		free_netdev(netdev);
+		return err;
+	}
+
+	write_lock_irq(&devs_lock);
+	brdev->number = list_empty(&br2684_devs) ? 1 :
+	    BRPRIV(list_entry_brdev(br2684_devs.prev))->number + 1;
+	list_add_tail(&brdev->br2684_devs, &br2684_devs);
+	write_unlock_irq(&devs_lock);
+	return 0;
+}
+
+/*
+ * This handles ioctls actually performed on our vcc - we must return
+ * -ENOIOCTLCMD for any unrecognized ioctl
+ */
+static int br2684_ioctl(struct socket *sock, unsigned int cmd,
+	unsigned long arg)
+{
+	struct atm_vcc *atmvcc = ATM_SD(sock);
+	void __user *argp = (void __user *)arg;
+
+	int err;
+	switch(cmd) {
+	case ATM_SETBACKEND:
+	case ATM_NEWBACKENDIF: {
+		atm_backend_t b;
+		err = get_user(b, (atm_backend_t __user *) argp);
+		if (err)
+			return -EFAULT;
+		if (b != ATM_BACKEND_BR2684)
+			return -ENOIOCTLCMD;
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+		if (cmd == ATM_SETBACKEND)
+			return br2684_regvcc(atmvcc, argp);
+		else
+			return br2684_create(argp);
+		}
+#ifdef CONFIG_ATM_BR2684_IPFILTER
+	case BR2684_SETFILT:
+		if (atmvcc->push != br2684_push)
+			return -ENOIOCTLCMD;
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+		err = br2684_setfilt(atmvcc, argp);
+		return err;
+#endif /* CONFIG_ATM_BR2684_IPFILTER */
+	}
+	return -ENOIOCTLCMD;
+}
+
+static struct atm_ioctl br2684_ioctl_ops = {
+	.owner	= THIS_MODULE,
+	.ioctl	= br2684_ioctl,
+};
+
+
+#ifdef CONFIG_PROC_FS
+static void *br2684_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	loff_t offs = 0;
+	struct br2684_dev *brd;
+
+	read_lock(&devs_lock);
+
+	list_for_each_entry(brd, &br2684_devs, br2684_devs) {
+		if (offs == *pos)
+			return brd;
+		++offs;
+	}
+	return NULL;
+}
+
+static void *br2684_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct br2684_dev *brd = v;
+
+	++*pos;
+
+	brd = list_entry(brd->br2684_devs.next, 
+			 struct br2684_dev, br2684_devs);
+	return (&brd->br2684_devs != &br2684_devs) ? brd : NULL;
+}
+
+static void br2684_seq_stop(struct seq_file *seq, void *v)
+{
+	read_unlock(&devs_lock);
+}
+
+static int br2684_seq_show(struct seq_file *seq, void *v)
+{
+	const struct br2684_dev *brdev = v;
+	const struct net_device *net_dev = brdev->net_dev;
+	const struct br2684_vcc *brvcc;
+
+	seq_printf(seq, "dev %.16s: num=%d, mac=%02X:%02X:"
+		       "%02X:%02X:%02X:%02X (%s)\n", net_dev->name,
+		       brdev->number,
+		       net_dev->dev_addr[0],
+		       net_dev->dev_addr[1],
+		       net_dev->dev_addr[2],
+		       net_dev->dev_addr[3],
+		       net_dev->dev_addr[4],
+		       net_dev->dev_addr[5],
+		       brdev->mac_was_set ? "set" : "auto");
+
+	list_for_each_entry(brvcc, &brdev->brvccs, brvccs) {
+		seq_printf(seq, "  vcc %d.%d.%d: encaps=%s"
+#ifndef FASTER_VERSION
+				    ", failed copies %u/%u"
+#endif /* FASTER_VERSION */
+				    "\n", brvcc->atmvcc->dev->number,
+				    brvcc->atmvcc->vpi, brvcc->atmvcc->vci,
+				    (brvcc->encaps == e_llc) ? "LLC" : "VC"
+#ifndef FASTER_VERSION
+				    , brvcc->copies_failed
+				    , brvcc->copies_needed
+#endif /* FASTER_VERSION */
+				    );
+#ifdef CONFIG_ATM_BR2684_IPFILTER
+#define b1(var, byte)	((u8 *) &brvcc->filter.var)[byte]
+#define bs(var)		b1(var, 0), b1(var, 1), b1(var, 2), b1(var, 3)
+			if (brvcc->filter.netmask != 0)
+				seq_printf(seq, "    filter=%d.%d.%d.%d/"
+						"%d.%d.%d.%d\n",
+						bs(prefix), bs(netmask));
+#undef bs
+#undef b1
+#endif /* CONFIG_ATM_BR2684_IPFILTER */
+	}
+	return 0;
+}
+
+static struct seq_operations br2684_seq_ops = {
+	.start = br2684_seq_start,
+	.next  = br2684_seq_next,
+	.stop  = br2684_seq_stop,
+	.show  = br2684_seq_show,
+};
+
+static int br2684_proc_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &br2684_seq_ops);
+}
+
+static struct file_operations br2684_proc_ops = {
+	.owner   = THIS_MODULE,
+	.open    = br2684_proc_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+};
+
+extern struct proc_dir_entry *atm_proc_root;	/* from proc.c */
+#endif
+
+static int __init br2684_init(void)
+{
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry *p;
+	if ((p = create_proc_entry("br2684", 0, atm_proc_root)) == NULL)
+		return -ENOMEM;
+	p->proc_fops = &br2684_proc_ops;
+#endif
+	register_atm_ioctl(&br2684_ioctl_ops);
+	return 0;
+}
+
+static void __exit br2684_exit(void)
+{
+	struct net_device *net_dev;
+	struct br2684_dev *brdev;
+	struct br2684_vcc *brvcc;
+	deregister_atm_ioctl(&br2684_ioctl_ops);
+
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("br2684", atm_proc_root);
+#endif
+
+	while (!list_empty(&br2684_devs)) {
+		net_dev = list_entry_brdev(br2684_devs.next);
+		brdev = BRPRIV(net_dev);
+		while (!list_empty(&brdev->brvccs)) {
+			brvcc = list_entry_brvcc(brdev->brvccs.next);
+			br2684_close_vcc(brvcc);
+		}
+
+		list_del(&brdev->br2684_devs);
+		unregister_netdev(net_dev);
+		free_netdev(net_dev);
+	}
+}
+
+module_init(br2684_init);
+module_exit(br2684_exit);
+
+MODULE_AUTHOR("Marcell GAL");
+MODULE_DESCRIPTION("RFC2684 bridged protocols over ATM/AAL5");
+MODULE_LICENSE("GPL");
diff --git a/net/atm/clip.c b/net/atm/clip.c
new file mode 100644
index 00000000000..28dab55a438
--- /dev/null
+++ b/net/atm/clip.c
@@ -0,0 +1,1045 @@
+/* net/atm/clip.c - RFC1577 Classical IP over ATM */
+
+/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
+
+
+#include <linux/config.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/kernel.h> /* for UINT_MAX */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/wait.h>
+#include <linux/timer.h>
+#include <linux/if_arp.h> /* for some manifest constants */
+#include <linux/notifier.h>
+#include <linux/atm.h>
+#include <linux/atmdev.h>
+#include <linux/atmclip.h>
+#include <linux/atmarp.h>
+#include <linux/ip.h> /* for net/route.h */
+#include <linux/in.h> /* for struct sockaddr_in */
+#include <linux/if.h> /* for IFF_UP */
+#include <linux/inetdevice.h>
+#include <linux/bitops.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/rcupdate.h>
+#include <linux/jhash.h>
+#include <net/route.h> /* for struct rtable and routing */
+#include <net/icmp.h> /* icmp_send */
+#include <asm/param.h> /* for HZ */
+#include <asm/byteorder.h> /* for htons etc. */
+#include <asm/system.h> /* save/restore_flags */
+#include <asm/uaccess.h>
+#include <asm/atomic.h>
+
+#include "common.h"
+#include "resources.h"
+#include "ipcommon.h"
+#include <net/atmclip.h>
+
+
+#if 0
+#define DPRINTK(format,args...) printk(format,##args)
+#else
+#define DPRINTK(format,args...)
+#endif
+
+
+static struct net_device *clip_devs;
+static struct atm_vcc *atmarpd;
+static struct neigh_table clip_tbl;
+static struct timer_list idle_timer;
+static int start_timer = 1;
+
+
+static int to_atmarpd(enum atmarp_ctrl_type type,int itf,unsigned long ip)
+{
+	struct sock *sk;
+	struct atmarp_ctrl *ctrl;
+	struct sk_buff *skb;
+
+	DPRINTK("to_atmarpd(%d)\n",type);
+	if (!atmarpd) return -EUNATCH;
+	skb = alloc_skb(sizeof(struct atmarp_ctrl),GFP_ATOMIC);
+	if (!skb) return -ENOMEM;
+	ctrl = (struct atmarp_ctrl *) skb_put(skb,sizeof(struct atmarp_ctrl));
+	ctrl->type = type;
+	ctrl->itf_num = itf;
+	ctrl->ip = ip;
+	atm_force_charge(atmarpd,skb->truesize);
+
+	sk = sk_atm(atmarpd);
+	skb_queue_tail(&sk->sk_receive_queue, skb);
+	sk->sk_data_ready(sk, skb->len);
+	return 0;
+}
+
+
+static void link_vcc(struct clip_vcc *clip_vcc,struct atmarp_entry *entry)
+{
+	DPRINTK("link_vcc %p to entry %p (neigh %p)\n",clip_vcc,entry,
+	    entry->neigh);
+	clip_vcc->entry = entry;
+	clip_vcc->xoff = 0; /* @@@ may overrun buffer by one packet */
+	clip_vcc->next = entry->vccs;
+	entry->vccs = clip_vcc;
+	entry->neigh->used = jiffies;
+}
+
+
+static void unlink_clip_vcc(struct clip_vcc *clip_vcc)
+{
+	struct atmarp_entry *entry = clip_vcc->entry;
+	struct clip_vcc **walk;
+
+	if (!entry) {
+		printk(KERN_CRIT "!clip_vcc->entry (clip_vcc %p)\n",clip_vcc);
+		return;
+	}
+	spin_lock_bh(&entry->neigh->dev->xmit_lock);	/* block clip_start_xmit() */
+	entry->neigh->used = jiffies;
+	for (walk = &entry->vccs; *walk; walk = &(*walk)->next)
+		if (*walk == clip_vcc) {
+			int error;
+
+			*walk = clip_vcc->next; /* atomic */
+			clip_vcc->entry = NULL;
+			if (clip_vcc->xoff)
+				netif_wake_queue(entry->neigh->dev);
+			if (entry->vccs)
+				goto out;
+			entry->expires = jiffies-1;
+				/* force resolution or expiration */
+			error = neigh_update(entry->neigh, NULL, NUD_NONE,
+					     NEIGH_UPDATE_F_ADMIN);
+			if (error)
+				printk(KERN_CRIT "unlink_clip_vcc: "
+				    "neigh_update failed with %d\n",error);
+			goto out;
+		}
+	printk(KERN_CRIT "ATMARP: unlink_clip_vcc failed (entry %p, vcc "
+	  "0x%p)\n",entry,clip_vcc);
+out:
+	spin_unlock_bh(&entry->neigh->dev->xmit_lock);
+}
+
+/* The neighbour entry n->lock is held. */
+static int neigh_check_cb(struct neighbour *n)
+{
+	struct atmarp_entry *entry = NEIGH2ENTRY(n);
+	struct clip_vcc *cv;
+
+	for (cv = entry->vccs; cv; cv = cv->next) {
+		unsigned long exp = cv->last_use + cv->idle_timeout;
+
+		if (cv->idle_timeout && time_after(jiffies, exp)) {
+			DPRINTK("releasing vcc %p->%p of entry %p\n",
+				cv, cv->vcc, entry);
+			vcc_release_async(cv->vcc, -ETIMEDOUT);
+		}
+	}
+
+	if (entry->vccs || time_before(jiffies, entry->expires))
+		return 0;
+
+	if (atomic_read(&n->refcnt) > 1) {
+		struct sk_buff *skb;
+
+		DPRINTK("destruction postponed with ref %d\n",
+			atomic_read(&n->refcnt));
+
+		while ((skb = skb_dequeue(&n->arp_queue)) != NULL) 
+			dev_kfree_skb(skb);
+
+		return 0;
+	}
+
+	DPRINTK("expired neigh %p\n",n);
+	return 1;
+}
+
+static void idle_timer_check(unsigned long dummy)
+{
+	write_lock(&clip_tbl.lock);
+	__neigh_for_each_release(&clip_tbl, neigh_check_cb);
+	mod_timer(&idle_timer, jiffies+CLIP_CHECK_INTERVAL*HZ);
+	write_unlock(&clip_tbl.lock);
+}
+
+static int clip_arp_rcv(struct sk_buff *skb)
+{
+	struct atm_vcc *vcc;
+
+	DPRINTK("clip_arp_rcv\n");
+	vcc = ATM_SKB(skb)->vcc;
+	if (!vcc || !atm_charge(vcc,skb->truesize)) {
+		dev_kfree_skb_any(skb);
+		return 0;
+	}
+	DPRINTK("pushing to %p\n",vcc);
+	DPRINTK("using %p\n",CLIP_VCC(vcc)->old_push);
+	CLIP_VCC(vcc)->old_push(vcc,skb);
+	return 0;
+}
+
+static const unsigned char llc_oui[] = {
+	0xaa,	/* DSAP: non-ISO */
+	0xaa,	/* SSAP: non-ISO */
+	0x03,	/* Ctrl: Unnumbered Information Command PDU */
+	0x00,	/* OUI: EtherType */
+	0x00,
+	0x00 };
+
+static void clip_push(struct atm_vcc *vcc,struct sk_buff *skb)
+{
+	struct clip_vcc *clip_vcc = CLIP_VCC(vcc);
+
+	DPRINTK("clip push\n");
+	if (!skb) {
+		DPRINTK("removing VCC %p\n",clip_vcc);
+		if (clip_vcc->entry) unlink_clip_vcc(clip_vcc);
+		clip_vcc->old_push(vcc,NULL); /* pass on the bad news */
+		kfree(clip_vcc);
+		return;
+	}
+	atm_return(vcc,skb->truesize);
+	skb->dev = clip_vcc->entry ? clip_vcc->entry->neigh->dev : clip_devs;
+		/* clip_vcc->entry == NULL if we don't have an IP address yet */
+	if (!skb->dev) {
+		dev_kfree_skb_any(skb);
+		return;
+	}
+	ATM_SKB(skb)->vcc = vcc;
+	skb->mac.raw = skb->data;
+	if (!clip_vcc->encap || skb->len < RFC1483LLC_LEN || memcmp(skb->data,
+	    llc_oui,sizeof(llc_oui))) skb->protocol = htons(ETH_P_IP);
+	else {
+		skb->protocol = ((u16 *) skb->data)[3];
+		skb_pull(skb,RFC1483LLC_LEN);
+		if (skb->protocol == htons(ETH_P_ARP)) {
+			PRIV(skb->dev)->stats.rx_packets++;
+			PRIV(skb->dev)->stats.rx_bytes += skb->len;
+			clip_arp_rcv(skb);
+			return;
+		}
+	}
+	clip_vcc->last_use = jiffies;
+	PRIV(skb->dev)->stats.rx_packets++;
+	PRIV(skb->dev)->stats.rx_bytes += skb->len;
+	memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data));
+	netif_rx(skb);
+}
+
+
+/*
+ * Note: these spinlocks _must_not_ block on non-SMP. The only goal is that
+ * clip_pop is atomic with respect to the critical section in clip_start_xmit.
+ */
+
+
+static void clip_pop(struct atm_vcc *vcc,struct sk_buff *skb)
+{
+	struct clip_vcc *clip_vcc = CLIP_VCC(vcc);
+	struct net_device *dev = skb->dev;
+	int old;
+	unsigned long flags;
+
+	DPRINTK("clip_pop(vcc %p)\n",vcc);
+	clip_vcc->old_pop(vcc,skb);
+	/* skb->dev == NULL in outbound ARP packets */
+	if (!dev) return;
+	spin_lock_irqsave(&PRIV(dev)->xoff_lock,flags);
+	if (atm_may_send(vcc,0)) {
+		old = xchg(&clip_vcc->xoff,0);
+		if (old) netif_wake_queue(dev);
+	}
+	spin_unlock_irqrestore(&PRIV(dev)->xoff_lock,flags);
+}
+
+
+static void clip_neigh_destroy(struct neighbour *neigh)
+{
+	DPRINTK("clip_neigh_destroy (neigh %p)\n",neigh);
+	if (NEIGH2ENTRY(neigh)->vccs)
+		printk(KERN_CRIT "clip_neigh_destroy: vccs != NULL !!!\n");
+	NEIGH2ENTRY(neigh)->vccs = (void *) 0xdeadbeef;
+}
+
+
+static void clip_neigh_solicit(struct neighbour *neigh,struct sk_buff *skb)
+{
+	DPRINTK("clip_neigh_solicit (neigh %p, skb %p)\n",neigh,skb);
+	to_atmarpd(act_need,PRIV(neigh->dev)->number,NEIGH2ENTRY(neigh)->ip);
+}
+
+
+static void clip_neigh_error(struct neighbour *neigh,struct sk_buff *skb)
+{
+#ifndef CONFIG_ATM_CLIP_NO_ICMP
+	icmp_send(skb,ICMP_DEST_UNREACH,ICMP_HOST_UNREACH,0);
+#endif
+	kfree_skb(skb);
+}
+
+
+static struct neigh_ops clip_neigh_ops = {
+	.family =		AF_INET,
+	.destructor =		clip_neigh_destroy,
+	.solicit =		clip_neigh_solicit,
+	.error_report =		clip_neigh_error,
+	.output =		dev_queue_xmit,
+	.connected_output =	dev_queue_xmit,
+	.hh_output =		dev_queue_xmit,
+	.queue_xmit =		dev_queue_xmit,
+};
+
+
+static int clip_constructor(struct neighbour *neigh)
+{
+	struct atmarp_entry *entry = NEIGH2ENTRY(neigh);
+	struct net_device *dev = neigh->dev;
+	struct in_device *in_dev;
+	struct neigh_parms *parms;
+
+	DPRINTK("clip_constructor (neigh %p, entry %p)\n",neigh,entry);
+	neigh->type = inet_addr_type(entry->ip);
+	if (neigh->type != RTN_UNICAST) return -EINVAL;
+
+	rcu_read_lock();
+	in_dev = rcu_dereference(__in_dev_get(dev));
+	if (!in_dev) {
+		rcu_read_unlock();
+		return -EINVAL;
+	}
+
+	parms = in_dev->arp_parms;
+	__neigh_parms_put(neigh->parms);
+	neigh->parms = neigh_parms_clone(parms);
+	rcu_read_unlock();
+
+	neigh->ops = &clip_neigh_ops;
+	neigh->output = neigh->nud_state & NUD_VALID ?
+	    neigh->ops->connected_output : neigh->ops->output;
+	entry->neigh = neigh;
+	entry->vccs = NULL;
+	entry->expires = jiffies-1;
+	return 0;
+}
+
+static u32 clip_hash(const void *pkey, const struct net_device *dev)
+{
+	return jhash_2words(*(u32 *)pkey, dev->ifindex, clip_tbl.hash_rnd);
+}
+
+static struct neigh_table clip_tbl = {
+	.family 	= AF_INET,
+	.entry_size 	= sizeof(struct neighbour)+sizeof(struct atmarp_entry),
+	.key_len 	= 4,
+	.hash 		= clip_hash,
+	.constructor 	= clip_constructor,
+	.id 		= "clip_arp_cache",
+
+	/* parameters are copied from ARP ... */
+	.parms = {
+		.tbl 			= &clip_tbl,
+		.base_reachable_time 	= 30 * HZ,
+		.retrans_time 		= 1 * HZ,
+		.gc_staletime 		= 60 * HZ,
+		.reachable_time 	= 30 * HZ,
+		.delay_probe_time 	= 5 * HZ,
+		.queue_len 		= 3,
+		.ucast_probes 		= 3,
+		.mcast_probes 		= 3,
+		.anycast_delay 		= 1 * HZ,
+		.proxy_delay 		= (8 * HZ) / 10,
+		.proxy_qlen 		= 64,
+		.locktime 		= 1 * HZ,
+	},
+	.gc_interval 	= 30 * HZ,
+	.gc_thresh1 	= 128,
+	.gc_thresh2 	= 512,
+	.gc_thresh3 	= 1024,
+};
+
+
+/* @@@ copy bh locking from arp.c -- need to bh-enable atm code before */
+
+/*
+ * We play with the resolve flag: 0 and 1 have the usual meaning, but -1 means
+ * to allocate the neighbour entry but not to ask atmarpd for resolution. Also,
+ * don't increment the usage count. This is used to create entries in
+ * clip_setentry.
+ */
+
+
+static int clip_encap(struct atm_vcc *vcc,int mode)
+{
+	CLIP_VCC(vcc)->encap = mode;
+	return 0;
+}
+
+
+static int clip_start_xmit(struct sk_buff *skb,struct net_device *dev)
+{
+	struct clip_priv *clip_priv = PRIV(dev);
+	struct atmarp_entry *entry;
+	struct atm_vcc *vcc;
+	int old;
+	unsigned long flags;
+
+	DPRINTK("clip_start_xmit (skb %p)\n",skb);
+	if (!skb->dst) {
+		printk(KERN_ERR "clip_start_xmit: skb->dst == NULL\n");
+		dev_kfree_skb(skb);
+		clip_priv->stats.tx_dropped++;
+		return 0;
+	}
+	if (!skb->dst->neighbour) {
+#if 0
+		skb->dst->neighbour = clip_find_neighbour(skb->dst,1);
+		if (!skb->dst->neighbour) {
+			dev_kfree_skb(skb); /* lost that one */
+			clip_priv->stats.tx_dropped++;
+			return 0;
+		}
+#endif
+		printk(KERN_ERR "clip_start_xmit: NO NEIGHBOUR !\n");
+		dev_kfree_skb(skb);
+		clip_priv->stats.tx_dropped++;
+		return 0;
+	}
+	entry = NEIGH2ENTRY(skb->dst->neighbour);
+	if (!entry->vccs) {
+		if (time_after(jiffies, entry->expires)) {
+			/* should be resolved */
+			entry->expires = jiffies+ATMARP_RETRY_DELAY*HZ;
+			to_atmarpd(act_need,PRIV(dev)->number,entry->ip);
+		}
+		if (entry->neigh->arp_queue.qlen < ATMARP_MAX_UNRES_PACKETS)
+			skb_queue_tail(&entry->neigh->arp_queue,skb);
+		else {
+			dev_kfree_skb(skb);
+			clip_priv->stats.tx_dropped++;
+		}
+		return 0;
+	}
+	DPRINTK("neigh %p, vccs %p\n",entry,entry->vccs);
+	ATM_SKB(skb)->vcc = vcc = entry->vccs->vcc;
+	DPRINTK("using neighbour %p, vcc %p\n",skb->dst->neighbour,vcc);
+	if (entry->vccs->encap) {
+		void *here;
+
+		here = skb_push(skb,RFC1483LLC_LEN);
+		memcpy(here,llc_oui,sizeof(llc_oui));
+		((u16 *) here)[3] = skb->protocol;
+	}
+	atomic_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
+	ATM_SKB(skb)->atm_options = vcc->atm_options;
+	entry->vccs->last_use = jiffies;
+	DPRINTK("atm_skb(%p)->vcc(%p)->dev(%p)\n",skb,vcc,vcc->dev);
+	old = xchg(&entry->vccs->xoff,1); /* assume XOFF ... */
+	if (old) {
+		printk(KERN_WARNING "clip_start_xmit: XOFF->XOFF transition\n");
+		return 0;
+	}
+	clip_priv->stats.tx_packets++;
+	clip_priv->stats.tx_bytes += skb->len;
+	(void) vcc->send(vcc,skb);
+	if (atm_may_send(vcc,0)) {
+		entry->vccs->xoff = 0;
+		return 0;
+	}
+	spin_lock_irqsave(&clip_priv->xoff_lock,flags);
+	netif_stop_queue(dev); /* XOFF -> throttle immediately */
+	barrier();
+	if (!entry->vccs->xoff)
+		netif_start_queue(dev);
+		/* Oh, we just raced with clip_pop. netif_start_queue should be
+		   good enough, because nothing should really be asleep because
+		   of the brief netif_stop_queue. If this isn't true or if it
+		   changes, use netif_wake_queue instead. */
+	spin_unlock_irqrestore(&clip_priv->xoff_lock,flags);
+	return 0;
+}
+
+
+static struct net_device_stats *clip_get_stats(struct net_device *dev)
+{
+	return &PRIV(dev)->stats;
+}
+
+
+static int clip_mkip(struct atm_vcc *vcc,int timeout)
+{
+	struct clip_vcc *clip_vcc;
+	struct sk_buff_head copy;
+	struct sk_buff *skb;
+
+	if (!vcc->push) return -EBADFD;
+	clip_vcc = kmalloc(sizeof(struct clip_vcc),GFP_KERNEL);
+	if (!clip_vcc) return -ENOMEM;
+	DPRINTK("mkip clip_vcc %p vcc %p\n",clip_vcc,vcc);
+	clip_vcc->vcc = vcc;
+	vcc->user_back = clip_vcc;
+	set_bit(ATM_VF_IS_CLIP, &vcc->flags);
+	clip_vcc->entry = NULL;
+	clip_vcc->xoff = 0;
+	clip_vcc->encap = 1;
+	clip_vcc->last_use = jiffies;
+	clip_vcc->idle_timeout = timeout*HZ;
+	clip_vcc->old_push = vcc->push;
+	clip_vcc->old_pop = vcc->pop;
+	vcc->push = clip_push;
+	vcc->pop = clip_pop;
+	skb_queue_head_init(&copy);
+	skb_migrate(&sk_atm(vcc)->sk_receive_queue, &copy);
+	/* re-process everything received between connection setup and MKIP */
+	while ((skb = skb_dequeue(&copy)) != NULL)
+		if (!clip_devs) {
+			atm_return(vcc,skb->truesize);
+			kfree_skb(skb);
+		}
+		else {
+			unsigned int len = skb->len;
+
+			clip_push(vcc,skb);
+			PRIV(skb->dev)->stats.rx_packets--;
+			PRIV(skb->dev)->stats.rx_bytes -= len;
+		}
+	return 0;
+}
+
+
+static int clip_setentry(struct atm_vcc *vcc,u32 ip)
+{
+	struct neighbour *neigh;
+	struct atmarp_entry *entry;
+	int error;
+	struct clip_vcc *clip_vcc;
+	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, .tos = 1 } } };
+	struct rtable *rt;
+
+	if (vcc->push != clip_push) {
+		printk(KERN_WARNING "clip_setentry: non-CLIP VCC\n");
+		return -EBADF;
+	}
+	clip_vcc = CLIP_VCC(vcc);
+	if (!ip) {
+		if (!clip_vcc->entry) {
+			printk(KERN_ERR "hiding hidden ATMARP entry\n");
+			return 0;
+		}
+		DPRINTK("setentry: remove\n");
+		unlink_clip_vcc(clip_vcc);
+		return 0;
+	}
+	error = ip_route_output_key(&rt,&fl);
+	if (error) return error;
+	neigh = __neigh_lookup(&clip_tbl,&ip,rt->u.dst.dev,1);
+	ip_rt_put(rt);
+	if (!neigh)
+		return -ENOMEM;
+	entry = NEIGH2ENTRY(neigh);
+	if (entry != clip_vcc->entry) {
+		if (!clip_vcc->entry) DPRINTK("setentry: add\n");
+		else {
+			DPRINTK("setentry: update\n");
+			unlink_clip_vcc(clip_vcc);
+		}
+		link_vcc(clip_vcc,entry);
+	}
+	error = neigh_update(neigh, llc_oui, NUD_PERMANENT, 
+			     NEIGH_UPDATE_F_OVERRIDE|NEIGH_UPDATE_F_ADMIN);
+	neigh_release(neigh);
+	return error;
+}
+
+
+static void clip_setup(struct net_device *dev)
+{
+	dev->hard_start_xmit = clip_start_xmit;
+	/* sg_xmit ... */
+	dev->get_stats = clip_get_stats;
+	dev->type = ARPHRD_ATM;
+	dev->hard_header_len = RFC1483LLC_LEN;
+	dev->mtu = RFC1626_MTU;
+	dev->tx_queue_len = 100; /* "normal" queue (packets) */
+	    /* When using a "real" qdisc, the qdisc determines the queue */
+	    /* length. tx_queue_len is only used for the default case, */
+	    /* without any more elaborate queuing. 100 is a reasonable */
+	    /* compromise between decent burst-tolerance and protection */
+	    /* against memory hogs. */
+}
+
+
+static int clip_create(int number)
+{
+	struct net_device *dev;
+	struct clip_priv *clip_priv;
+	int error;
+
+	if (number != -1) {
+		for (dev = clip_devs; dev; dev = PRIV(dev)->next)
+			if (PRIV(dev)->number == number) return -EEXIST;
+	}
+	else {
+		number = 0;
+		for (dev = clip_devs; dev; dev = PRIV(dev)->next)
+			if (PRIV(dev)->number >= number)
+				number = PRIV(dev)->number+1;
+	}
+	dev = alloc_netdev(sizeof(struct clip_priv), "", clip_setup);
+	if (!dev)
+		return -ENOMEM;
+	clip_priv = PRIV(dev);
+	sprintf(dev->name,"atm%d",number);
+	spin_lock_init(&clip_priv->xoff_lock);
+	clip_priv->number = number;
+	error = register_netdev(dev);
+	if (error) {
+		free_netdev(dev);
+		return error;
+	}
+	clip_priv->next = clip_devs;
+	clip_devs = dev;
+	DPRINTK("registered (net:%s)\n",dev->name);
+	return number;
+}
+
+
+static int clip_device_event(struct notifier_block *this,unsigned long event,
+    void *dev)
+{
+	/* ignore non-CLIP devices */
+	if (((struct net_device *) dev)->type != ARPHRD_ATM ||
+	    ((struct net_device *) dev)->hard_start_xmit != clip_start_xmit)
+		return NOTIFY_DONE;
+	switch (event) {
+		case NETDEV_UP:
+			DPRINTK("clip_device_event NETDEV_UP\n");
+			(void) to_atmarpd(act_up,PRIV(dev)->number,0);
+			break;
+		case NETDEV_GOING_DOWN:
+			DPRINTK("clip_device_event NETDEV_DOWN\n");
+			(void) to_atmarpd(act_down,PRIV(dev)->number,0);
+			break;
+		case NETDEV_CHANGE:
+		case NETDEV_CHANGEMTU:
+			DPRINTK("clip_device_event NETDEV_CHANGE*\n");
+			(void) to_atmarpd(act_change,PRIV(dev)->number,0);
+			break;
+		case NETDEV_REBOOT:
+		case NETDEV_REGISTER:
+		case NETDEV_DOWN:
+			DPRINTK("clip_device_event %ld\n",event);
+			/* ignore */
+			break;
+		default:
+			printk(KERN_WARNING "clip_device_event: unknown event "
+			    "%ld\n",event);
+			break;
+	}
+	return NOTIFY_DONE;
+}
+
+
+static int clip_inet_event(struct notifier_block *this,unsigned long event,
+    void *ifa)
+{
+	struct in_device *in_dev;
+
+	in_dev = ((struct in_ifaddr *) ifa)->ifa_dev;
+	if (!in_dev || !in_dev->dev) {
+		printk(KERN_WARNING "clip_inet_event: no device\n");
+		return NOTIFY_DONE;
+	}
+	/*
+	 * Transitions are of the down-change-up type, so it's sufficient to
+	 * handle the change on up.
+	 */
+	if (event != NETDEV_UP) return NOTIFY_DONE;
+	return clip_device_event(this,NETDEV_CHANGE,in_dev->dev);
+}
+
+
+static struct notifier_block clip_dev_notifier = {
+	clip_device_event,
+	NULL,
+	0
+};
+
+
+
+static struct notifier_block clip_inet_notifier = {
+	clip_inet_event,
+	NULL,
+	0
+};
+
+
+
+static void atmarpd_close(struct atm_vcc *vcc)
+{
+	DPRINTK("atmarpd_close\n");
+	atmarpd = NULL; /* assumed to be atomic */
+	barrier();
+	unregister_inetaddr_notifier(&clip_inet_notifier);
+	unregister_netdevice_notifier(&clip_dev_notifier);
+	if (skb_peek(&sk_atm(vcc)->sk_receive_queue))
+		printk(KERN_ERR "atmarpd_close: closing with requests "
+		    "pending\n");
+	skb_queue_purge(&sk_atm(vcc)->sk_receive_queue);
+	DPRINTK("(done)\n");
+	module_put(THIS_MODULE);
+}
+
+
+static struct atmdev_ops atmarpd_dev_ops = {
+	.close = atmarpd_close
+};
+
+
+static struct atm_dev atmarpd_dev = {
+	.ops =			&atmarpd_dev_ops,
+	.type =			"arpd",
+	.number = 		999,
+	.lock =			SPIN_LOCK_UNLOCKED
+};
+
+
+static int atm_init_atmarp(struct atm_vcc *vcc)
+{
+	if (atmarpd) return -EADDRINUSE;
+	if (start_timer) {
+		start_timer = 0;
+		init_timer(&idle_timer);
+		idle_timer.expires = jiffies+CLIP_CHECK_INTERVAL*HZ;
+		idle_timer.function = idle_timer_check;
+		add_timer(&idle_timer);
+	}
+	atmarpd = vcc;
+	set_bit(ATM_VF_META,&vcc->flags);
+	set_bit(ATM_VF_READY,&vcc->flags);
+	    /* allow replies and avoid getting closed if signaling dies */
+	vcc->dev = &atmarpd_dev;
+	vcc_insert_socket(sk_atm(vcc));
+	vcc->push = NULL;
+	vcc->pop = NULL; /* crash */
+	vcc->push_oam = NULL; /* crash */
+	if (register_netdevice_notifier(&clip_dev_notifier))
+		printk(KERN_ERR "register_netdevice_notifier failed\n");
+	if (register_inetaddr_notifier(&clip_inet_notifier))
+		printk(KERN_ERR "register_inetaddr_notifier failed\n");
+	return 0;
+}
+
+static int clip_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	struct atm_vcc *vcc = ATM_SD(sock);
+	int err = 0;
+
+	switch (cmd) {
+		case SIOCMKCLIP:
+		case ATMARPD_CTRL:
+		case ATMARP_MKIP:
+		case ATMARP_SETENTRY:
+		case ATMARP_ENCAP:
+			if (!capable(CAP_NET_ADMIN))
+				return -EPERM;
+			break;
+		default:
+			return -ENOIOCTLCMD;
+	}
+
+	switch (cmd) {
+		case SIOCMKCLIP:
+			err = clip_create(arg);
+			break;
+		case ATMARPD_CTRL:
+			err = atm_init_atmarp(vcc);
+			if (!err) {
+				sock->state = SS_CONNECTED;
+				__module_get(THIS_MODULE);
+			}
+			break;
+		case ATMARP_MKIP:
+			err = clip_mkip(vcc ,arg);
+			break;
+		case ATMARP_SETENTRY:
+			err = clip_setentry(vcc, arg);
+			break;
+		case ATMARP_ENCAP:
+			err = clip_encap(vcc, arg);
+			break;
+	}
+	return err;
+}
+
+static struct atm_ioctl clip_ioctl_ops = {
+	.owner 	= THIS_MODULE,
+	.ioctl	= clip_ioctl,
+};
+
+#ifdef CONFIG_PROC_FS
+
+static void svc_addr(struct seq_file *seq, struct sockaddr_atmsvc *addr)
+{
+	static int code[] = { 1,2,10,6,1,0 };
+	static int e164[] = { 1,8,4,6,1,0 };
+
+	if (*addr->sas_addr.pub) {
+		seq_printf(seq, "%s", addr->sas_addr.pub);
+		if (*addr->sas_addr.prv)
+			seq_putc(seq, '+');
+	} else if (!*addr->sas_addr.prv) {
+		seq_printf(seq, "%s", "(none)");
+		return;
+	}
+	if (*addr->sas_addr.prv) {
+		unsigned char *prv = addr->sas_addr.prv;
+		int *fields;
+		int i, j;
+
+		fields = *prv == ATM_AFI_E164 ? e164 : code;
+		for (i = 0; fields[i]; i++) {
+			for (j = fields[i]; j; j--)
+				seq_printf(seq, "%02X", *prv++);
+			if (fields[i+1])
+				seq_putc(seq, '.');
+		}
+	}
+}
+
+/* This means the neighbour entry has no attached VCC objects. */
+#define SEQ_NO_VCC_TOKEN	((void *) 2)
+
+static void atmarp_info(struct seq_file *seq, struct net_device *dev,
+			struct atmarp_entry *entry, struct clip_vcc *clip_vcc)
+{
+	unsigned long exp;
+	char buf[17];
+	int svc, llc, off;
+
+	svc = ((clip_vcc == SEQ_NO_VCC_TOKEN) ||
+	       (sk_atm(clip_vcc->vcc)->sk_family == AF_ATMSVC));
+
+	llc = ((clip_vcc == SEQ_NO_VCC_TOKEN) ||
+	       clip_vcc->encap);
+
+	if (clip_vcc == SEQ_NO_VCC_TOKEN)
+		exp = entry->neigh->used;
+	else
+		exp = clip_vcc->last_use;
+
+	exp = (jiffies - exp) / HZ;
+
+	seq_printf(seq, "%-6s%-4s%-4s%5ld ",
+		   dev->name,
+		   svc ? "SVC" : "PVC",
+		   llc ? "LLC" : "NULL",
+		   exp);
+
+	off = scnprintf(buf, sizeof(buf) - 1, "%d.%d.%d.%d",
+			NIPQUAD(entry->ip));
+	while (off < 16)
+		buf[off++] = ' ';
+	buf[off] = '\0';
+	seq_printf(seq, "%s", buf);
+
+	if (clip_vcc == SEQ_NO_VCC_TOKEN) {
+		if (time_before(jiffies, entry->expires))
+			seq_printf(seq, "(resolving)\n");
+		else
+			seq_printf(seq, "(expired, ref %d)\n",
+				   atomic_read(&entry->neigh->refcnt));
+	} else if (!svc) {
+		seq_printf(seq, "%d.%d.%d\n",
+			   clip_vcc->vcc->dev->number,
+			   clip_vcc->vcc->vpi,
+			   clip_vcc->vcc->vci);
+	} else {
+		svc_addr(seq, &clip_vcc->vcc->remote);
+		seq_putc(seq, '\n');
+	}
+}
+
+struct clip_seq_state {
+	/* This member must be first. */
+	struct neigh_seq_state ns;
+
+	/* Local to clip specific iteration. */
+	struct clip_vcc *vcc;
+};
+
+static struct clip_vcc *clip_seq_next_vcc(struct atmarp_entry *e,
+					  struct clip_vcc *curr)
+{
+	if (!curr) {
+		curr = e->vccs;
+		if (!curr)
+			return SEQ_NO_VCC_TOKEN;
+		return curr;
+	}
+	if (curr == SEQ_NO_VCC_TOKEN)
+		return NULL;
+
+	curr = curr->next;
+
+	return curr;
+}
+
+static void *clip_seq_vcc_walk(struct clip_seq_state *state,
+			       struct atmarp_entry *e, loff_t *pos)
+{
+	struct clip_vcc *vcc = state->vcc;
+
+	vcc = clip_seq_next_vcc(e, vcc);
+	if (vcc && pos != NULL) {
+		while (*pos) {
+			vcc = clip_seq_next_vcc(e, vcc);
+			if (!vcc)
+				break;
+			--(*pos);
+		}
+	}
+	state->vcc = vcc;
+
+	return vcc;
+}
+  
+static void *clip_seq_sub_iter(struct neigh_seq_state *_state,
+			       struct neighbour *n, loff_t *pos)
+{
+	struct clip_seq_state *state = (struct clip_seq_state *) _state;
+
+	return clip_seq_vcc_walk(state, NEIGH2ENTRY(n), pos);
+}
+
+static void *clip_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	return neigh_seq_start(seq, pos, &clip_tbl, NEIGH_SEQ_NEIGH_ONLY);
+}
+
+static int clip_seq_show(struct seq_file *seq, void *v)
+{
+	static char atm_arp_banner[] = 
+		"IPitf TypeEncp Idle IP address      ATM address\n";
+
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(seq, atm_arp_banner);
+	} else {
+		struct clip_seq_state *state = seq->private;
+		struct neighbour *n = v;
+		struct clip_vcc *vcc = state->vcc;
+
+		atmarp_info(seq, n->dev, NEIGH2ENTRY(n), vcc);
+	}
+  	return 0;
+}
+
+static struct seq_operations arp_seq_ops = {
+	.start	= clip_seq_start,
+	.next	= neigh_seq_next,
+	.stop	= neigh_seq_stop,
+	.show	= clip_seq_show,
+};
+
+static int arp_seq_open(struct inode *inode, struct file *file)
+{
+	struct clip_seq_state *state;
+	struct seq_file *seq;
+	int rc = -EAGAIN;
+
+	state = kmalloc(sizeof(*state), GFP_KERNEL);
+	if (!state) {
+		rc = -ENOMEM;
+		goto out_kfree;
+	}
+	memset(state, 0, sizeof(*state));
+	state->ns.neigh_sub_iter = clip_seq_sub_iter;
+
+	rc = seq_open(file, &arp_seq_ops);
+	if (rc)
+		goto out_kfree;
+
+	seq = file->private_data;
+	seq->private = state;
+out:
+	return rc;
+
+out_kfree:
+	kfree(state);
+	goto out;
+}
+
+static struct file_operations arp_seq_fops = {
+	.open		= arp_seq_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release_private,
+	.owner		= THIS_MODULE
+};
+#endif
+
+static int __init atm_clip_init(void)
+{
+	neigh_table_init(&clip_tbl);
+
+	clip_tbl_hook = &clip_tbl;
+	register_atm_ioctl(&clip_ioctl_ops);
+
+#ifdef CONFIG_PROC_FS
+{
+	struct proc_dir_entry *p;
+
+	p = create_proc_entry("arp", S_IRUGO, atm_proc_root);
+	if (p)
+		p->proc_fops = &arp_seq_fops;
+}
+#endif
+
+	return 0;
+}
+
+static void __exit atm_clip_exit(void)
+{
+	struct net_device *dev, *next;
+
+	remove_proc_entry("arp", atm_proc_root);
+
+	deregister_atm_ioctl(&clip_ioctl_ops);
+
+	/* First, stop the idle timer, so it stops banging
+	 * on the table.
+	 */
+	if (start_timer == 0)
+		del_timer(&idle_timer);
+
+	/* Next, purge the table, so that the device
+	 * unregister loop below does not hang due to
+	 * device references remaining in the table.
+	 */
+	neigh_ifdown(&clip_tbl, NULL);
+
+	dev = clip_devs;
+	while (dev) {
+		next = PRIV(dev)->next;
+		unregister_netdev(dev);
+		free_netdev(dev);
+		dev = next;
+	}
+
+	/* Now it is safe to fully shutdown whole table. */
+	neigh_table_clear(&clip_tbl);
+
+	clip_tbl_hook = NULL;
+}
+
+module_init(atm_clip_init);
+module_exit(atm_clip_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/net/atm/common.c b/net/atm/common.c
new file mode 100644
index 00000000000..6d16be334ea
--- /dev/null
+++ b/net/atm/common.c
@@ -0,0 +1,804 @@
+/* net/atm/common.c - ATM sockets (common part for PVC and SVC) */
+
+/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
+
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kmod.h>
+#include <linux/net.h>		/* struct socket, struct proto_ops */
+#include <linux/atm.h>		/* ATM stuff */
+#include <linux/atmdev.h>
+#include <linux/socket.h>	/* SOL_SOCKET */
+#include <linux/errno.h>	/* error codes */
+#include <linux/capability.h>
+#include <linux/mm.h>		/* verify_area */
+#include <linux/sched.h>
+#include <linux/time.h>		/* struct timeval */
+#include <linux/skbuff.h>
+#include <linux/bitops.h>
+#include <linux/init.h>
+#include <net/sock.h>		/* struct sock */
+
+#include <asm/uaccess.h>
+#include <asm/atomic.h>
+#include <asm/poll.h>
+
+
+#include "resources.h"		/* atm_find_dev */
+#include "common.h"		/* prototypes */
+#include "protocols.h"		/* atm_init_<transport> */
+#include "addr.h"		/* address registry */
+#include "signaling.h"		/* for WAITING and sigd_attach */
+
+
+#if 0
+#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
+#else
+#define DPRINTK(format,args...)
+#endif
+
+struct hlist_head vcc_hash[VCC_HTABLE_SIZE];
+DEFINE_RWLOCK(vcc_sklist_lock);
+
+static void __vcc_insert_socket(struct sock *sk)
+{
+	struct atm_vcc *vcc = atm_sk(sk);
+	struct hlist_head *head = &vcc_hash[vcc->vci &
+					(VCC_HTABLE_SIZE - 1)];
+	sk->sk_hashent = vcc->vci & (VCC_HTABLE_SIZE - 1);
+	sk_add_node(sk, head);
+}
+
+void vcc_insert_socket(struct sock *sk)
+{
+	write_lock_irq(&vcc_sklist_lock);
+	__vcc_insert_socket(sk);
+	write_unlock_irq(&vcc_sklist_lock);
+}
+
+static void vcc_remove_socket(struct sock *sk)
+{
+	write_lock_irq(&vcc_sklist_lock);
+	sk_del_node_init(sk);
+	write_unlock_irq(&vcc_sklist_lock);
+}
+
+
+static struct sk_buff *alloc_tx(struct atm_vcc *vcc,unsigned int size)
+{
+	struct sk_buff *skb;
+	struct sock *sk = sk_atm(vcc);
+
+	if (atomic_read(&sk->sk_wmem_alloc) && !atm_may_send(vcc, size)) {
+		DPRINTK("Sorry: wmem_alloc = %d, size = %d, sndbuf = %d\n",
+			atomic_read(&sk->sk_wmem_alloc), size,
+			sk->sk_sndbuf);
+		return NULL;
+	}
+	while (!(skb = alloc_skb(size,GFP_KERNEL))) schedule();
+	DPRINTK("AlTx %d += %d\n", atomic_read(&sk->sk_wmem_alloc),
+		skb->truesize);
+	atomic_add(skb->truesize, &sk->sk_wmem_alloc);
+	return skb;
+}
+
+
+EXPORT_SYMBOL(vcc_hash);
+EXPORT_SYMBOL(vcc_sklist_lock);
+EXPORT_SYMBOL(vcc_insert_socket);
+
+static void vcc_sock_destruct(struct sock *sk)
+{
+	if (atomic_read(&sk->sk_rmem_alloc))
+		printk(KERN_DEBUG "vcc_sock_destruct: rmem leakage (%d bytes) detected.\n", atomic_read(&sk->sk_rmem_alloc));
+
+	if (atomic_read(&sk->sk_wmem_alloc))
+		printk(KERN_DEBUG "vcc_sock_destruct: wmem leakage (%d bytes) detected.\n", atomic_read(&sk->sk_wmem_alloc));
+}
+
+static void vcc_def_wakeup(struct sock *sk)
+{
+	read_lock(&sk->sk_callback_lock);
+	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+		wake_up(sk->sk_sleep);
+	read_unlock(&sk->sk_callback_lock);
+}
+
+static inline int vcc_writable(struct sock *sk)
+{
+	struct atm_vcc *vcc = atm_sk(sk);
+
+	return (vcc->qos.txtp.max_sdu +
+	        atomic_read(&sk->sk_wmem_alloc)) <= sk->sk_sndbuf;
+}
+
+static void vcc_write_space(struct sock *sk)
+{       
+	read_lock(&sk->sk_callback_lock);
+
+	if (vcc_writable(sk)) {
+		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+			wake_up_interruptible(sk->sk_sleep);
+
+		sk_wake_async(sk, 2, POLL_OUT);
+	}
+
+	read_unlock(&sk->sk_callback_lock);
+}
+
+static struct proto vcc_proto = {
+	.name	  = "VCC",
+	.owner	  = THIS_MODULE,
+	.obj_size = sizeof(struct atm_vcc),
+};
+ 
+int vcc_create(struct socket *sock, int protocol, int family)
+{
+	struct sock *sk;
+	struct atm_vcc *vcc;
+
+	sock->sk = NULL;
+	if (sock->type == SOCK_STREAM)
+		return -EINVAL;
+	sk = sk_alloc(family, GFP_KERNEL, &vcc_proto, 1);
+	if (!sk)
+		return -ENOMEM;
+	sock_init_data(sock, sk);
+	sk->sk_state_change = vcc_def_wakeup;
+	sk->sk_write_space = vcc_write_space;
+
+	vcc = atm_sk(sk);
+	vcc->dev = NULL;
+	memset(&vcc->local,0,sizeof(struct sockaddr_atmsvc));
+	memset(&vcc->remote,0,sizeof(struct sockaddr_atmsvc));
+	vcc->qos.txtp.max_sdu = 1 << 16; /* for meta VCs */
+	atomic_set(&sk->sk_wmem_alloc, 0);
+	atomic_set(&sk->sk_rmem_alloc, 0);
+	vcc->push = NULL;
+	vcc->pop = NULL;
+	vcc->push_oam = NULL;
+	vcc->vpi = vcc->vci = 0; /* no VCI/VPI yet */
+	vcc->atm_options = vcc->aal_options = 0;
+	sk->sk_destruct = vcc_sock_destruct;
+	return 0;
+}
+
+
+static void vcc_destroy_socket(struct sock *sk)
+{
+	struct atm_vcc *vcc = atm_sk(sk);
+	struct sk_buff *skb;
+
+	set_bit(ATM_VF_CLOSE, &vcc->flags);
+	clear_bit(ATM_VF_READY, &vcc->flags);
+	if (vcc->dev) {
+		if (vcc->dev->ops->close)
+			vcc->dev->ops->close(vcc);
+		if (vcc->push)
+			vcc->push(vcc, NULL); /* atmarpd has no push */
+
+		vcc_remove_socket(sk);	/* no more receive */
+
+		while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+			atm_return(vcc,skb->truesize);
+			kfree_skb(skb);
+		}
+
+		module_put(vcc->dev->ops->owner);
+		atm_dev_put(vcc->dev);
+	}
+}
+
+
+int vcc_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+
+	if (sk) {
+		lock_sock(sk);
+		vcc_destroy_socket(sock->sk);
+		release_sock(sk);
+		sock_put(sk);
+	}
+
+	return 0;
+}
+
+
+void vcc_release_async(struct atm_vcc *vcc, int reply)
+{
+	struct sock *sk = sk_atm(vcc);
+
+	set_bit(ATM_VF_CLOSE, &vcc->flags);
+	sk->sk_shutdown |= RCV_SHUTDOWN;
+	sk->sk_err = -reply;
+	clear_bit(ATM_VF_WAITING, &vcc->flags);
+	sk->sk_state_change(sk);
+}
+
+
+EXPORT_SYMBOL(vcc_release_async);
+
+
+static int adjust_tp(struct atm_trafprm *tp,unsigned char aal)
+{
+	int max_sdu;
+
+	if (!tp->traffic_class) return 0;
+	switch (aal) {
+		case ATM_AAL0:
+			max_sdu = ATM_CELL_SIZE-1;
+			break;
+		case ATM_AAL34:
+			max_sdu = ATM_MAX_AAL34_PDU;
+			break;
+		default:
+			printk(KERN_WARNING "ATM: AAL problems ... "
+			    "(%d)\n",aal);
+			/* fall through */
+		case ATM_AAL5:
+			max_sdu = ATM_MAX_AAL5_PDU;
+	}
+	if (!tp->max_sdu) tp->max_sdu = max_sdu;
+	else if (tp->max_sdu > max_sdu) return -EINVAL;
+	if (!tp->max_cdv) tp->max_cdv = ATM_MAX_CDV;
+	return 0;
+}
+
+
+static int check_ci(struct atm_vcc *vcc, short vpi, int vci)
+{
+	struct hlist_head *head = &vcc_hash[vci &
+					(VCC_HTABLE_SIZE - 1)];
+	struct hlist_node *node;
+	struct sock *s;
+	struct atm_vcc *walk;
+
+	sk_for_each(s, node, head) {
+		walk = atm_sk(s);
+		if (walk->dev != vcc->dev)
+			continue;
+		if (test_bit(ATM_VF_ADDR, &walk->flags) && walk->vpi == vpi &&
+		    walk->vci == vci && ((walk->qos.txtp.traffic_class !=
+		    ATM_NONE && vcc->qos.txtp.traffic_class != ATM_NONE) ||
+		    (walk->qos.rxtp.traffic_class != ATM_NONE &&
+		    vcc->qos.rxtp.traffic_class != ATM_NONE)))
+			return -EADDRINUSE;
+	}
+
+	/* allow VCCs with same VPI/VCI iff they don't collide on
+	   TX/RX (but we may refuse such sharing for other reasons,
+	   e.g. if protocol requires to have both channels) */
+
+	return 0;
+}
+
+
+static int find_ci(struct atm_vcc *vcc, short *vpi, int *vci)
+{
+	static short p;        /* poor man's per-device cache */
+	static int c;
+	short old_p;
+	int old_c;
+	int err;
+
+	if (*vpi != ATM_VPI_ANY && *vci != ATM_VCI_ANY) {
+		err = check_ci(vcc, *vpi, *vci);
+		return err;
+	}
+	/* last scan may have left values out of bounds for current device */
+	if (*vpi != ATM_VPI_ANY)
+		p = *vpi;
+	else if (p >= 1 << vcc->dev->ci_range.vpi_bits)
+		p = 0;
+	if (*vci != ATM_VCI_ANY)
+		c = *vci;
+	else if (c < ATM_NOT_RSV_VCI || c >= 1 << vcc->dev->ci_range.vci_bits)
+			c = ATM_NOT_RSV_VCI;
+	old_p = p;
+	old_c = c;
+	do {
+		if (!check_ci(vcc, p, c)) {
+			*vpi = p;
+			*vci = c;
+			return 0;
+		}
+		if (*vci == ATM_VCI_ANY) {
+			c++;
+			if (c >= 1 << vcc->dev->ci_range.vci_bits)
+				c = ATM_NOT_RSV_VCI;
+		}
+		if ((c == ATM_NOT_RSV_VCI || *vci != ATM_VCI_ANY) &&
+		    *vpi == ATM_VPI_ANY) {
+			p++;
+			if (p >= 1 << vcc->dev->ci_range.vpi_bits) p = 0;
+		}
+	}
+	while (old_p != p || old_c != c);
+	return -EADDRINUSE;
+}
+
+
+static int __vcc_connect(struct atm_vcc *vcc, struct atm_dev *dev, short vpi,
+			 int vci)
+{
+	struct sock *sk = sk_atm(vcc);
+	int error;
+
+	if ((vpi != ATM_VPI_UNSPEC && vpi != ATM_VPI_ANY &&
+	    vpi >> dev->ci_range.vpi_bits) || (vci != ATM_VCI_UNSPEC &&
+	    vci != ATM_VCI_ANY && vci >> dev->ci_range.vci_bits))
+		return -EINVAL;
+	if (vci > 0 && vci < ATM_NOT_RSV_VCI && !capable(CAP_NET_BIND_SERVICE))
+		return -EPERM;
+	error = 0;
+	if (!try_module_get(dev->ops->owner))
+		return -ENODEV;
+	vcc->dev = dev;
+	write_lock_irq(&vcc_sklist_lock);
+	if ((error = find_ci(vcc, &vpi, &vci))) {
+		write_unlock_irq(&vcc_sklist_lock);
+		goto fail_module_put;
+	}
+	vcc->vpi = vpi;
+	vcc->vci = vci;
+	__vcc_insert_socket(sk);
+	write_unlock_irq(&vcc_sklist_lock);
+	switch (vcc->qos.aal) {
+		case ATM_AAL0:
+			error = atm_init_aal0(vcc);
+			vcc->stats = &dev->stats.aal0;
+			break;
+		case ATM_AAL34:
+			error = atm_init_aal34(vcc);
+			vcc->stats = &dev->stats.aal34;
+			break;
+		case ATM_NO_AAL:
+			/* ATM_AAL5 is also used in the "0 for default" case */
+			vcc->qos.aal = ATM_AAL5;
+			/* fall through */
+		case ATM_AAL5:
+			error = atm_init_aal5(vcc);
+			vcc->stats = &dev->stats.aal5;
+			break;
+		default:
+			error = -EPROTOTYPE;
+	}
+	if (!error) error = adjust_tp(&vcc->qos.txtp,vcc->qos.aal);
+	if (!error) error = adjust_tp(&vcc->qos.rxtp,vcc->qos.aal);
+	if (error)
+		goto fail;
+	DPRINTK("VCC %d.%d, AAL %d\n",vpi,vci,vcc->qos.aal);
+	DPRINTK("  TX: %d, PCR %d..%d, SDU %d\n",vcc->qos.txtp.traffic_class,
+	    vcc->qos.txtp.min_pcr,vcc->qos.txtp.max_pcr,vcc->qos.txtp.max_sdu);
+	DPRINTK("  RX: %d, PCR %d..%d, SDU %d\n",vcc->qos.rxtp.traffic_class,
+	    vcc->qos.rxtp.min_pcr,vcc->qos.rxtp.max_pcr,vcc->qos.rxtp.max_sdu);
+
+	if (dev->ops->open) {
+		if ((error = dev->ops->open(vcc)))
+			goto fail;
+	}
+	return 0;
+
+fail:
+	vcc_remove_socket(sk);
+fail_module_put:
+	module_put(dev->ops->owner);
+	/* ensure we get dev module ref count correct */
+	vcc->dev = NULL;
+	return error;
+}
+
+
+int vcc_connect(struct socket *sock, int itf, short vpi, int vci)
+{
+	struct atm_dev *dev;
+	struct atm_vcc *vcc = ATM_SD(sock);
+	int error;
+
+	DPRINTK("vcc_connect (vpi %d, vci %d)\n",vpi,vci);
+	if (sock->state == SS_CONNECTED)
+		return -EISCONN;
+	if (sock->state != SS_UNCONNECTED)
+		return -EINVAL;
+	if (!(vpi || vci))
+		return -EINVAL;
+
+	if (vpi != ATM_VPI_UNSPEC && vci != ATM_VCI_UNSPEC)
+		clear_bit(ATM_VF_PARTIAL,&vcc->flags);
+	else
+		if (test_bit(ATM_VF_PARTIAL,&vcc->flags))
+			return -EINVAL;
+	DPRINTK("vcc_connect (TX: cl %d,bw %d-%d,sdu %d; "
+	    "RX: cl %d,bw %d-%d,sdu %d,AAL %s%d)\n",
+	    vcc->qos.txtp.traffic_class,vcc->qos.txtp.min_pcr,
+	    vcc->qos.txtp.max_pcr,vcc->qos.txtp.max_sdu,
+	    vcc->qos.rxtp.traffic_class,vcc->qos.rxtp.min_pcr,
+	    vcc->qos.rxtp.max_pcr,vcc->qos.rxtp.max_sdu,
+	    vcc->qos.aal == ATM_AAL5 ? "" : vcc->qos.aal == ATM_AAL0 ? "" :
+	    " ??? code ",vcc->qos.aal == ATM_AAL0 ? 0 : vcc->qos.aal);
+	if (!test_bit(ATM_VF_HASQOS, &vcc->flags))
+		return -EBADFD;
+	if (vcc->qos.txtp.traffic_class == ATM_ANYCLASS ||
+	    vcc->qos.rxtp.traffic_class == ATM_ANYCLASS)
+		return -EINVAL;
+	if (itf != ATM_ITF_ANY) {
+		dev = atm_dev_lookup(itf);
+		if (!dev)
+			return -ENODEV;
+		error = __vcc_connect(vcc, dev, vpi, vci);
+		if (error) {
+			atm_dev_put(dev);
+			return error;
+		}
+	} else {
+		struct list_head *p, *next;
+
+		dev = NULL;
+		spin_lock(&atm_dev_lock);
+		list_for_each_safe(p, next, &atm_devs) {
+			dev = list_entry(p, struct atm_dev, dev_list);
+			atm_dev_hold(dev);
+			spin_unlock(&atm_dev_lock);
+			if (!__vcc_connect(vcc, dev, vpi, vci))
+				break;
+			atm_dev_put(dev);
+			dev = NULL;
+			spin_lock(&atm_dev_lock);
+		}
+		spin_unlock(&atm_dev_lock);
+		if (!dev)
+			return -ENODEV;
+	}
+	if (vpi == ATM_VPI_UNSPEC || vci == ATM_VCI_UNSPEC)
+		set_bit(ATM_VF_PARTIAL,&vcc->flags);
+	if (test_bit(ATM_VF_READY,&ATM_SD(sock)->flags))
+		sock->state = SS_CONNECTED;
+	return 0;
+}
+
+
+int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
+		size_t size, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct atm_vcc *vcc;
+	struct sk_buff *skb;
+	int copied, error = -EINVAL;
+
+	if (sock->state != SS_CONNECTED)
+		return -ENOTCONN;
+	if (flags & ~MSG_DONTWAIT)		/* only handle MSG_DONTWAIT */
+		return -EOPNOTSUPP;
+	vcc = ATM_SD(sock);
+	if (test_bit(ATM_VF_RELEASED,&vcc->flags) ||
+	    test_bit(ATM_VF_CLOSE,&vcc->flags) ||
+	    !test_bit(ATM_VF_READY, &vcc->flags))
+		return 0;
+
+	skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &error);
+	if (!skb)
+		return error;
+
+	copied = skb->len; 
+	if (copied > size) {
+		copied = size; 
+		msg->msg_flags |= MSG_TRUNC;
+	}
+
+        error = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+        if (error)
+                return error;
+        sock_recv_timestamp(msg, sk, skb);
+        DPRINTK("RcvM %d -= %d\n", atomic_read(&sk->rmem_alloc), skb->truesize);
+        atm_return(vcc, skb->truesize);
+        skb_free_datagram(sk, skb);
+        return copied;
+}
+
+
+int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
+		size_t total_len)
+{
+	struct sock *sk = sock->sk;
+	DEFINE_WAIT(wait);
+	struct atm_vcc *vcc;
+	struct sk_buff *skb;
+	int eff,error;
+	const void __user *buff;
+	int size;
+
+	lock_sock(sk);
+	if (sock->state != SS_CONNECTED) {
+		error = -ENOTCONN;
+		goto out;
+	}
+	if (m->msg_name) {
+		error = -EISCONN;
+		goto out;
+	}
+	if (m->msg_iovlen != 1) {
+		error = -ENOSYS; /* fix this later @@@ */
+		goto out;
+	}
+	buff = m->msg_iov->iov_base;
+	size = m->msg_iov->iov_len;
+	vcc = ATM_SD(sock);
+	if (test_bit(ATM_VF_RELEASED, &vcc->flags) ||
+	    test_bit(ATM_VF_CLOSE, &vcc->flags) ||
+	    !test_bit(ATM_VF_READY, &vcc->flags)) {
+		error = -EPIPE;
+		send_sig(SIGPIPE, current, 0);
+		goto out;
+	}
+	if (!size) {
+		error = 0;
+		goto out;
+	}
+	if (size < 0 || size > vcc->qos.txtp.max_sdu) {
+		error = -EMSGSIZE;
+		goto out;
+	}
+	/* verify_area is done by net/socket.c */
+	eff = (size+3) & ~3; /* align to word boundary */
+	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	error = 0;
+	while (!(skb = alloc_tx(vcc,eff))) {
+		if (m->msg_flags & MSG_DONTWAIT) {
+			error = -EAGAIN;
+			break;
+		}
+		schedule();
+		if (signal_pending(current)) {
+			error = -ERESTARTSYS;
+			break;
+		}
+		if (test_bit(ATM_VF_RELEASED,&vcc->flags) ||
+		    test_bit(ATM_VF_CLOSE,&vcc->flags) ||
+		    !test_bit(ATM_VF_READY,&vcc->flags)) {
+			error = -EPIPE;
+			send_sig(SIGPIPE, current, 0);
+			break;
+		}
+		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	}
+	finish_wait(sk->sk_sleep, &wait);
+	if (error)
+		goto out;
+	skb->dev = NULL; /* for paths shared with net_device interfaces */
+	ATM_SKB(skb)->atm_options = vcc->atm_options;
+	if (copy_from_user(skb_put(skb,size),buff,size)) {
+		kfree_skb(skb);
+		error = -EFAULT;
+		goto out;
+	}
+	if (eff != size) memset(skb->data+size,0,eff-size);
+	error = vcc->dev->ops->send(vcc,skb);
+	error = error ? error : size;
+out:
+	release_sock(sk);
+	return error;
+}
+
+
+unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
+{
+	struct sock *sk = sock->sk;
+	struct atm_vcc *vcc;
+	unsigned int mask;
+
+	poll_wait(file, sk->sk_sleep, wait);
+	mask = 0;
+
+	vcc = ATM_SD(sock);
+
+	/* exceptional events */
+	if (sk->sk_err)
+		mask = POLLERR;
+
+	if (test_bit(ATM_VF_RELEASED, &vcc->flags) ||
+	    test_bit(ATM_VF_CLOSE, &vcc->flags))
+		mask |= POLLHUP;
+
+	/* readable? */
+	if (!skb_queue_empty(&sk->sk_receive_queue))
+		mask |= POLLIN | POLLRDNORM;
+
+	/* writable? */
+	if (sock->state == SS_CONNECTING &&
+	    test_bit(ATM_VF_WAITING, &vcc->flags))
+		return mask;
+
+	if (vcc->qos.txtp.traffic_class != ATM_NONE &&
+	    vcc_writable(sk))
+		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+
+	return mask;
+}
+
+
+static int atm_change_qos(struct atm_vcc *vcc,struct atm_qos *qos)
+{
+	int error;
+
+	/*
+	 * Don't let the QoS change the already connected AAL type nor the
+	 * traffic class.
+	 */
+	if (qos->aal != vcc->qos.aal ||
+	    qos->rxtp.traffic_class != vcc->qos.rxtp.traffic_class ||
+	    qos->txtp.traffic_class != vcc->qos.txtp.traffic_class)
+		return -EINVAL;
+	error = adjust_tp(&qos->txtp,qos->aal);
+	if (!error) error = adjust_tp(&qos->rxtp,qos->aal);
+	if (error) return error;
+	if (!vcc->dev->ops->change_qos) return -EOPNOTSUPP;
+	if (sk_atm(vcc)->sk_family == AF_ATMPVC)
+		return vcc->dev->ops->change_qos(vcc,qos,ATM_MF_SET);
+	return svc_change_qos(vcc,qos);
+}
+
+
+static int check_tp(struct atm_trafprm *tp)
+{
+	/* @@@ Should be merged with adjust_tp */
+	if (!tp->traffic_class || tp->traffic_class == ATM_ANYCLASS) return 0;
+	if (tp->traffic_class != ATM_UBR && !tp->min_pcr && !tp->pcr &&
+	    !tp->max_pcr) return -EINVAL;
+	if (tp->min_pcr == ATM_MAX_PCR) return -EINVAL;
+	if (tp->min_pcr && tp->max_pcr && tp->max_pcr != ATM_MAX_PCR &&
+	    tp->min_pcr > tp->max_pcr) return -EINVAL;
+	/*
+	 * We allow pcr to be outside [min_pcr,max_pcr], because later
+	 * adjustment may still push it in the valid range.
+	 */
+	return 0;
+}
+
+
+static int check_qos(struct atm_qos *qos)
+{
+	int error;
+
+	if (!qos->txtp.traffic_class && !qos->rxtp.traffic_class)
+                return -EINVAL;
+	if (qos->txtp.traffic_class != qos->rxtp.traffic_class &&
+	    qos->txtp.traffic_class && qos->rxtp.traffic_class &&
+	    qos->txtp.traffic_class != ATM_ANYCLASS &&
+	    qos->rxtp.traffic_class != ATM_ANYCLASS) return -EINVAL;
+	error = check_tp(&qos->txtp);
+	if (error) return error;
+	return check_tp(&qos->rxtp);
+}
+
+int vcc_setsockopt(struct socket *sock, int level, int optname,
+		   char __user *optval, int optlen)
+{
+	struct atm_vcc *vcc;
+	unsigned long value;
+	int error;
+
+	if (__SO_LEVEL_MATCH(optname, level) && optlen != __SO_SIZE(optname))
+		return -EINVAL;
+
+	vcc = ATM_SD(sock);
+	switch (optname) {
+		case SO_ATMQOS:
+			{
+				struct atm_qos qos;
+
+				if (copy_from_user(&qos,optval,sizeof(qos)))
+					return -EFAULT;
+				error = check_qos(&qos);
+				if (error) return error;
+				if (sock->state == SS_CONNECTED)
+					return atm_change_qos(vcc,&qos);
+				if (sock->state != SS_UNCONNECTED)
+					return -EBADFD;
+				vcc->qos = qos;
+				set_bit(ATM_VF_HASQOS,&vcc->flags);
+				return 0;
+			}
+		case SO_SETCLP:
+			if (get_user(value,(unsigned long __user *)optval))
+				return -EFAULT;
+			if (value) vcc->atm_options |= ATM_ATMOPT_CLP;
+			else vcc->atm_options &= ~ATM_ATMOPT_CLP;
+			return 0;
+		default:
+			if (level == SOL_SOCKET) return -EINVAL;
+			break;
+	}
+	if (!vcc->dev || !vcc->dev->ops->setsockopt) return -EINVAL;
+	return vcc->dev->ops->setsockopt(vcc,level,optname,optval,optlen);
+}
+
+
+int vcc_getsockopt(struct socket *sock, int level, int optname,
+		   char __user *optval, int __user *optlen)
+{
+	struct atm_vcc *vcc;
+	int len;
+
+	if (get_user(len, optlen))
+		return -EFAULT;
+	if (__SO_LEVEL_MATCH(optname, level) && len != __SO_SIZE(optname))
+		return -EINVAL;
+
+	vcc = ATM_SD(sock);
+	switch (optname) {
+		case SO_ATMQOS:
+			if (!test_bit(ATM_VF_HASQOS,&vcc->flags))
+				return -EINVAL;
+			return copy_to_user(optval,&vcc->qos,sizeof(vcc->qos)) ?
+			    -EFAULT : 0;
+		case SO_SETCLP:
+			return put_user(vcc->atm_options & ATM_ATMOPT_CLP ? 1 :
+			  0,(unsigned long __user *)optval) ? -EFAULT : 0;
+		case SO_ATMPVC:
+			{
+				struct sockaddr_atmpvc pvc;
+
+				if (!vcc->dev ||
+				    !test_bit(ATM_VF_ADDR,&vcc->flags))
+					return -ENOTCONN;
+				pvc.sap_family = AF_ATMPVC;
+				pvc.sap_addr.itf = vcc->dev->number;
+				pvc.sap_addr.vpi = vcc->vpi;
+				pvc.sap_addr.vci = vcc->vci;
+				return copy_to_user(optval,&pvc,sizeof(pvc)) ?
+				    -EFAULT : 0;
+			}
+		default:
+			if (level == SOL_SOCKET) return -EINVAL;
+			break;
+	}
+	if (!vcc->dev || !vcc->dev->ops->getsockopt) return -EINVAL;
+	return vcc->dev->ops->getsockopt(vcc, level, optname, optval, len);
+}
+
+static int __init atm_init(void)
+{
+	int error;
+
+	if ((error = proto_register(&vcc_proto, 0)) < 0)
+		goto out;
+
+	if ((error = atmpvc_init()) < 0) {
+		printk(KERN_ERR "atmpvc_init() failed with %d\n", error);
+		goto out_unregister_vcc_proto;
+	}
+	if ((error = atmsvc_init()) < 0) {
+		printk(KERN_ERR "atmsvc_init() failed with %d\n", error);
+		goto out_atmpvc_exit;
+	}
+        if ((error = atm_proc_init()) < 0) {
+		printk(KERN_ERR "atm_proc_init() failed with %d\n",error);
+		goto out_atmsvc_exit;
+	}
+out:
+	return error;
+out_atmsvc_exit:
+	atmsvc_exit();
+out_atmpvc_exit:
+	atmsvc_exit();
+out_unregister_vcc_proto:
+	proto_unregister(&vcc_proto);
+	goto out;
+}
+
+static void __exit atm_exit(void)
+{
+	atm_proc_exit();
+	atmsvc_exit();
+	atmpvc_exit();
+	proto_unregister(&vcc_proto);
+}
+
+module_init(atm_init);
+module_exit(atm_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETPROTO(PF_ATMPVC);
+MODULE_ALIAS_NETPROTO(PF_ATMSVC);
diff --git a/net/atm/common.h b/net/atm/common.h
new file mode 100644
index 00000000000..e49ed41c0e3
--- /dev/null
+++ b/net/atm/common.h
@@ -0,0 +1,50 @@
+/* net/atm/common.h - ATM sockets (common part for PVC and SVC) */
+ 
+/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
+
+
+#ifndef NET_ATM_COMMON_H
+#define NET_ATM_COMMON_H
+
+#include <linux/net.h>
+#include <linux/poll.h> /* for poll_table */
+
+
+int vcc_create(struct socket *sock, int protocol, int family);
+int vcc_release(struct socket *sock);
+int vcc_connect(struct socket *sock, int itf, short vpi, int vci);
+int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
+		size_t size, int flags);
+int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
+		size_t total_len);
+unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait);
+int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
+int vcc_setsockopt(struct socket *sock, int level, int optname,
+		   char __user *optval, int optlen);
+int vcc_getsockopt(struct socket *sock, int level, int optname,
+		   char __user *optval, int __user *optlen);
+
+int atmpvc_init(void);
+void atmpvc_exit(void);
+int atmsvc_init(void);
+void atmsvc_exit(void);
+
+#ifdef CONFIG_PROC_FS
+int atm_proc_init(void);
+void atm_proc_exit(void);
+#else
+static inline int atm_proc_init(void)
+{
+	return 0;
+}
+
+static inline void atm_proc_exit(void)
+{
+	/* nothing */
+}
+#endif /* CONFIG_PROC_FS */
+
+/* SVC */
+int svc_change_qos(struct atm_vcc *vcc,struct atm_qos *qos);
+
+#endif
diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c
new file mode 100644
index 00000000000..4dbb5af34a5
--- /dev/null
+++ b/net/atm/ioctl.c
@@ -0,0 +1,139 @@
+/* ATM ioctl handling */
+
+/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
+/* 2003 John Levon  <levon@movementarian.org> */
+
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kmod.h>
+#include <linux/net.h>		/* struct socket, struct proto_ops */
+#include <linux/atm.h>		/* ATM stuff */
+#include <linux/atmdev.h>
+#include <linux/atmclip.h>	/* CLIP_*ENCAP */
+#include <linux/atmarp.h>	/* manifest constants */
+#include <linux/sonet.h>	/* for ioctls */
+#include <linux/atmsvc.h>
+#include <linux/atmmpc.h>
+#include <net/atmclip.h>
+#include <linux/atmlec.h>
+#include <asm/ioctls.h>
+
+#include "resources.h"
+#include "signaling.h"		/* for WAITING and sigd_attach */
+
+
+static DECLARE_MUTEX(ioctl_mutex);
+static LIST_HEAD(ioctl_list);
+
+
+void register_atm_ioctl(struct atm_ioctl *ioctl)
+{
+	down(&ioctl_mutex);
+	list_add_tail(&ioctl->list, &ioctl_list);
+	up(&ioctl_mutex);
+}
+
+void deregister_atm_ioctl(struct atm_ioctl *ioctl)
+{
+	down(&ioctl_mutex);
+	list_del(&ioctl->list);
+	up(&ioctl_mutex);
+}
+
+EXPORT_SYMBOL(register_atm_ioctl);
+EXPORT_SYMBOL(deregister_atm_ioctl);
+
+int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	struct sock *sk = sock->sk;
+	struct atm_vcc *vcc;
+	int error;
+	struct list_head * pos;
+	void __user *argp = (void __user *)arg;
+
+	vcc = ATM_SD(sock);
+	switch (cmd) {
+		case SIOCOUTQ:
+			if (sock->state != SS_CONNECTED ||
+			    !test_bit(ATM_VF_READY, &vcc->flags)) {
+				error =  -EINVAL;
+				goto done;
+			}
+			error = put_user(sk->sk_sndbuf -
+					 atomic_read(&sk->sk_wmem_alloc),
+					 (int __user *) argp) ? -EFAULT : 0;
+			goto done;
+		case SIOCINQ:
+			{
+				struct sk_buff *skb;
+
+				if (sock->state != SS_CONNECTED) {
+					error = -EINVAL;
+					goto done;
+				}
+				skb = skb_peek(&sk->sk_receive_queue);
+				error = put_user(skb ? skb->len : 0,
+					 	 (int __user *)argp) ? -EFAULT : 0;
+				goto done;
+			}
+		case SIOCGSTAMP: /* borrowed from IP */
+			error = sock_get_timestamp(sk, argp);
+			goto done;
+		case ATM_SETSC:
+			printk(KERN_WARNING "ATM_SETSC is obsolete\n");
+			error = 0;
+			goto done;
+		case ATMSIGD_CTRL:
+			if (!capable(CAP_NET_ADMIN)) {
+				error = -EPERM;
+				goto done;
+			}
+			/*
+			 * The user/kernel protocol for exchanging signalling
+			 * info uses kernel pointers as opaque references,
+			 * so the holder of the file descriptor can scribble
+			 * on the kernel... so we should make sure that we
+			 * have the same privledges that /proc/kcore needs
+			 */
+			if (!capable(CAP_SYS_RAWIO)) {
+				error = -EPERM;
+				goto done;
+			}
+			error = sigd_attach(vcc);
+			if (!error)
+				sock->state = SS_CONNECTED;
+			goto done;
+		default:
+			break;
+	}
+
+	if (cmd == ATMMPC_CTRL || cmd == ATMMPC_DATA)
+		request_module("mpoa");
+	if (cmd == ATMARPD_CTRL)
+		request_module("clip");
+	if (cmd == ATMLEC_CTRL)
+		request_module("lec");
+
+	error = -ENOIOCTLCMD;
+
+	down(&ioctl_mutex);
+	list_for_each(pos, &ioctl_list) {
+		struct atm_ioctl * ic = list_entry(pos, struct atm_ioctl, list);
+		if (try_module_get(ic->owner)) {
+			error = ic->ioctl(sock, cmd, arg);
+			module_put(ic->owner);
+			if (error != -ENOIOCTLCMD)
+				break;
+		}
+	}
+	up(&ioctl_mutex);
+
+	if (error != -ENOIOCTLCMD)
+		goto done;
+
+	error = atm_dev_ioctl(cmd, argp);
+
+done:
+	return error;
+}
diff --git a/net/atm/ipcommon.c b/net/atm/ipcommon.c
new file mode 100644
index 00000000000..181a3002d8a
--- /dev/null
+++ b/net/atm/ipcommon.c
@@ -0,0 +1,61 @@
+/* net/atm/ipcommon.c - Common items for all ways of doing IP over ATM */
+
+/* Written 1996-2000 by Werner Almesberger, EPFL LRC/ICA */
+
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/in.h>
+#include <linux/atmdev.h>
+#include <linux/atmclip.h>
+
+#include "common.h"
+#include "ipcommon.h"
+
+
+#if 0
+#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
+#else
+#define DPRINTK(format,args...)
+#endif
+
+
+/*
+ * skb_migrate appends the list at "from" to "to", emptying "from" in the
+ * process. skb_migrate is atomic with respect to all other skb operations on
+ * "from" and "to". Note that it locks both lists at the same time, so beware
+ * of potential deadlocks.
+ *
+ * This function should live in skbuff.c or skbuff.h.
+ */
+
+
+void skb_migrate(struct sk_buff_head *from,struct sk_buff_head *to)
+{
+	struct sk_buff *skb;
+	unsigned long flags;
+	struct sk_buff *skb_from = (struct sk_buff *) from;
+	struct sk_buff *skb_to = (struct sk_buff *) to;
+	struct sk_buff *prev;
+
+	spin_lock_irqsave(&from->lock,flags);
+	spin_lock(&to->lock);
+	prev = from->prev;
+	from->next->prev = to->prev;
+	prev->next = skb_to;
+	to->prev->next = from->next;
+	to->prev = from->prev;
+	for (skb = from->next; skb != skb_to; skb = skb->next)
+		skb->list = to;
+	to->qlen += from->qlen;
+	spin_unlock(&to->lock);
+	from->prev = skb_from;
+	from->next = skb_from;
+	from->qlen = 0;
+	spin_unlock_irqrestore(&from->lock,flags);
+}
+
+
+EXPORT_SYMBOL(skb_migrate);
diff --git a/net/atm/ipcommon.h b/net/atm/ipcommon.h
new file mode 100644
index 00000000000..d72165f6093
--- /dev/null
+++ b/net/atm/ipcommon.h
@@ -0,0 +1,22 @@
+/* net/atm/ipcommon.h - Common items for all ways of doing IP over ATM */
+
+/* Written 1996-2000 by Werner Almesberger, EPFL LRC/ICA */
+
+
+#ifndef NET_ATM_IPCOMMON_H
+#define NET_ATM_IPCOMMON_H
+
+
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/atmdev.h>
+
+/*
+ * Appends all skbs from "from" to "to". The operation is atomic with respect
+ * to all other skb operations on "from" or "to".
+ */
+
+void skb_migrate(struct sk_buff_head *from,struct sk_buff_head *to);
+
+#endif
diff --git a/net/atm/lec.c b/net/atm/lec.c
new file mode 100644
index 00000000000..a0752487026
--- /dev/null
+++ b/net/atm/lec.c
@@ -0,0 +1,2538 @@
+/*
+ * lec.c: Lan Emulation driver 
+ * Marko Kiiskila mkiiskila@yahoo.com
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+
+/* We are ethernet device */
+#include <linux/if_ether.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <net/sock.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <asm/byteorder.h>
+#include <asm/uaccess.h>
+#include <net/arp.h>
+#include <net/dst.h>
+#include <linux/proc_fs.h>
+#include <linux/spinlock.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+/* TokenRing if needed */
+#ifdef CONFIG_TR
+#include <linux/trdevice.h>
+#endif
+
+/* And atm device */
+#include <linux/atmdev.h>
+#include <linux/atmlec.h>
+
+/* Proxy LEC knows about bridging */
+#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
+#include <linux/if_bridge.h>
+#include "../bridge/br_private.h"
+
+static unsigned char bridge_ula_lec[] = {0x01, 0x80, 0xc2, 0x00, 0x00};
+#endif
+
+/* Modular too */
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include "lec.h"
+#include "lec_arpc.h"
+#include "resources.h"
+
+#if 0
+#define DPRINTK printk
+#else
+#define DPRINTK(format,args...)
+#endif
+
+#define DUMP_PACKETS 0 /* 0 = None,
+                        * 1 = 30 first bytes
+                        * 2 = Whole packet
+                        */
+
+#define LEC_UNRES_QUE_LEN 8 /* number of tx packets to queue for a
+                               single destination while waiting for SVC */
+
+static int lec_open(struct net_device *dev);
+static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev);
+static int lec_close(struct net_device *dev);
+static struct net_device_stats *lec_get_stats(struct net_device *dev);
+static void lec_init(struct net_device *dev);
+static struct lec_arp_table* lec_arp_find(struct lec_priv *priv,
+                                                     unsigned char *mac_addr);
+static int lec_arp_remove(struct lec_priv *priv,
+				     struct lec_arp_table *to_remove);
+/* LANE2 functions */
+static void lane2_associate_ind (struct net_device *dev, u8 *mac_address,
+                          u8 *tlvs, u32 sizeoftlvs);
+static int lane2_resolve(struct net_device *dev, u8 *dst_mac, int force,
+                  u8 **tlvs, u32 *sizeoftlvs);
+static int lane2_associate_req (struct net_device *dev, u8 *lan_dst,
+                         u8 *tlvs, u32 sizeoftlvs);
+
+static int lec_addr_delete(struct lec_priv *priv, unsigned char *atm_addr, 
+			   unsigned long permanent);
+static void lec_arp_check_empties(struct lec_priv *priv,
+				  struct atm_vcc *vcc, struct sk_buff *skb);
+static void lec_arp_destroy(struct lec_priv *priv);
+static void lec_arp_init(struct lec_priv *priv);
+static struct atm_vcc* lec_arp_resolve(struct lec_priv *priv,
+				       unsigned char *mac_to_find,
+				       int is_rdesc,
+				       struct lec_arp_table **ret_entry);
+static void lec_arp_update(struct lec_priv *priv, unsigned char *mac_addr,
+			   unsigned char *atm_addr, unsigned long remoteflag,
+			   unsigned int targetless_le_arp);
+static void lec_flush_complete(struct lec_priv *priv, unsigned long tran_id);
+static int lec_mcast_make(struct lec_priv *priv, struct atm_vcc *vcc);
+static void lec_set_flush_tran_id(struct lec_priv *priv,
+				  unsigned char *atm_addr,
+				  unsigned long tran_id);
+static void lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data,
+			  struct atm_vcc *vcc,
+			  void (*old_push)(struct atm_vcc *vcc, struct sk_buff *skb));
+static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc);
+
+static struct lane2_ops lane2_ops = {
+	lane2_resolve,         /* resolve,             spec 3.1.3 */
+	lane2_associate_req,   /* associate_req,       spec 3.1.4 */
+	NULL                  /* associate indicator, spec 3.1.5 */
+};
+
+static unsigned char bus_mac[ETH_ALEN] = {0xff,0xff,0xff,0xff,0xff,0xff};
+
+/* Device structures */
+static struct net_device *dev_lec[MAX_LEC_ITF];
+
+#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
+static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev)
+{
+        struct ethhdr *eth;
+        char *buff;
+        struct lec_priv *priv;
+
+        /* Check if this is a BPDU. If so, ask zeppelin to send
+         * LE_TOPOLOGY_REQUEST with the same value of Topology Change bit
+         * as the Config BPDU has */
+        eth = (struct ethhdr *)skb->data;
+        buff = skb->data + skb->dev->hard_header_len;
+        if (*buff++ == 0x42 && *buff++ == 0x42 && *buff++ == 0x03) {
+		struct sock *sk;
+                struct sk_buff *skb2;
+                struct atmlec_msg *mesg;
+
+                skb2 = alloc_skb(sizeof(struct atmlec_msg), GFP_ATOMIC);
+                if (skb2 == NULL) return;
+                skb2->len = sizeof(struct atmlec_msg);
+                mesg = (struct atmlec_msg *)skb2->data;
+                mesg->type = l_topology_change;
+                buff += 4;
+                mesg->content.normal.flag = *buff & 0x01; /* 0x01 is topology change */
+
+                priv = (struct lec_priv *)dev->priv;
+                atm_force_charge(priv->lecd, skb2->truesize);
+		sk = sk_atm(priv->lecd);
+                skb_queue_tail(&sk->sk_receive_queue, skb2);
+                sk->sk_data_ready(sk, skb2->len);
+        }
+
+        return;
+}
+#endif /* defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) */
+
+/*
+ * Modelled after tr_type_trans
+ * All multicast and ARE or STE frames go to BUS.
+ * Non source routed frames go by destination address.
+ * Last hop source routed frames go by destination address.
+ * Not last hop source routed frames go by _next_ route descriptor.
+ * Returns pointer to destination MAC address or fills in rdesc
+ * and returns NULL.
+ */
+#ifdef CONFIG_TR
+static unsigned char *get_tr_dst(unsigned char *packet, unsigned char *rdesc)
+{
+        struct trh_hdr *trh;
+        int riflen, num_rdsc;
+        
+        trh = (struct trh_hdr *)packet;
+        if (trh->daddr[0] & (uint8_t)0x80)
+                return bus_mac; /* multicast */
+
+        if (trh->saddr[0] & TR_RII) {
+                riflen = (ntohs(trh->rcf) & TR_RCF_LEN_MASK) >> 8;
+                if ((ntohs(trh->rcf) >> 13) != 0)
+                        return bus_mac; /* ARE or STE */
+        }
+        else
+                return trh->daddr; /* not source routed */
+
+        if (riflen < 6)
+                return trh->daddr; /* last hop, source routed */
+                
+        /* riflen is 6 or more, packet has more than one route descriptor */
+        num_rdsc = (riflen/2) - 1;
+        memset(rdesc, 0, ETH_ALEN);
+        /* offset 4 comes from LAN destination field in LE control frames */
+        if (trh->rcf & htons((uint16_t)TR_RCF_DIR_BIT))
+                memcpy(&rdesc[4], &trh->rseg[num_rdsc-2], sizeof(uint16_t));
+        else {
+                memcpy(&rdesc[4], &trh->rseg[1], sizeof(uint16_t));
+                rdesc[5] = ((ntohs(trh->rseg[0]) & 0x000f) | (rdesc[5] & 0xf0));
+        }
+
+        return NULL;
+}
+#endif /* CONFIG_TR */
+
+/*
+ * Open/initialize the netdevice. This is called (in the current kernel)
+ * sometime after booting when the 'ifconfig' program is run.
+ *
+ * This routine should set everything up anew at each open, even
+ * registers that "should" only need to be set once at boot, so that
+ * there is non-reboot way to recover if something goes wrong.
+ */
+
+static int 
+lec_open(struct net_device *dev)
+{
+        struct lec_priv *priv = (struct lec_priv *)dev->priv;
+        
+	netif_start_queue(dev);
+        memset(&priv->stats,0,sizeof(struct net_device_stats));
+        
+        return 0;
+}
+
+static __inline__ void
+lec_send(struct atm_vcc *vcc, struct sk_buff *skb, struct lec_priv *priv)
+{
+	ATM_SKB(skb)->vcc = vcc;
+	ATM_SKB(skb)->atm_options = vcc->atm_options;
+
+	atomic_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
+	if (vcc->send(vcc, skb) < 0) {
+		priv->stats.tx_dropped++;
+		return;
+	}
+
+	priv->stats.tx_packets++;
+	priv->stats.tx_bytes += skb->len;
+}
+
+static void
+lec_tx_timeout(struct net_device *dev)
+{
+	printk(KERN_INFO "%s: tx timeout\n", dev->name);
+	dev->trans_start = jiffies;
+	netif_wake_queue(dev);
+}
+
+static int 
+lec_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+        struct sk_buff *skb2;
+        struct lec_priv *priv = (struct lec_priv *)dev->priv;
+        struct lecdatahdr_8023 *lec_h;
+        struct atm_vcc *vcc;
+	struct lec_arp_table *entry;
+        unsigned char *dst;
+	int min_frame_size;
+#ifdef CONFIG_TR
+        unsigned char rdesc[ETH_ALEN]; /* Token Ring route descriptor */
+#endif
+        int is_rdesc;
+#if DUMP_PACKETS > 0
+        char buf[300];
+        int i=0;
+#endif /* DUMP_PACKETS >0 */
+        
+        DPRINTK("lec_start_xmit called\n");  
+        if (!priv->lecd) {
+                printk("%s:No lecd attached\n",dev->name);
+                priv->stats.tx_errors++;
+                netif_stop_queue(dev);
+                return -EUNATCH;
+        } 
+
+        DPRINTK("skbuff head:%lx data:%lx tail:%lx end:%lx\n",
+                (long)skb->head, (long)skb->data, (long)skb->tail,
+                (long)skb->end);
+#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
+        if (memcmp(skb->data, bridge_ula_lec, sizeof(bridge_ula_lec)) == 0)
+                lec_handle_bridge(skb, dev);
+#endif
+
+        /* Make sure we have room for lec_id */
+        if (skb_headroom(skb) < 2) {
+
+                DPRINTK("lec_start_xmit: reallocating skb\n");
+                skb2 = skb_realloc_headroom(skb, LEC_HEADER_LEN);
+                kfree_skb(skb);
+                if (skb2 == NULL) return 0;
+                skb = skb2;
+        }
+        skb_push(skb, 2);
+
+        /* Put le header to place, works for TokenRing too */
+        lec_h = (struct lecdatahdr_8023*)skb->data;
+        lec_h->le_header = htons(priv->lecid); 
+
+#ifdef CONFIG_TR
+        /* Ugly. Use this to realign Token Ring packets for
+         * e.g. PCA-200E driver. */
+        if (priv->is_trdev) {
+                skb2 = skb_realloc_headroom(skb, LEC_HEADER_LEN);
+                kfree_skb(skb);
+                if (skb2 == NULL) return 0;
+                skb = skb2;
+        }
+#endif
+
+#if DUMP_PACKETS > 0
+        printk("%s: send datalen:%ld lecid:%4.4x\n", dev->name,
+               skb->len, priv->lecid);
+#if DUMP_PACKETS >= 2
+        for(i=0;i<skb->len && i <99;i++) {
+                sprintf(buf+i*3,"%2.2x ",0xff&skb->data[i]);
+        }
+#elif DUMP_PACKETS >= 1
+        for(i=0;i<skb->len && i < 30;i++) {
+                sprintf(buf+i*3,"%2.2x ", 0xff&skb->data[i]);
+        }
+#endif /* DUMP_PACKETS >= 1 */
+        if (i==skb->len)
+                printk("%s\n",buf);
+        else
+                printk("%s...\n",buf);
+#endif /* DUMP_PACKETS > 0 */
+
+        /* Minimum ethernet-frame size */
+#ifdef CONFIG_TR
+        if (priv->is_trdev)
+                min_frame_size = LEC_MINIMUM_8025_SIZE;
+	else
+#endif
+        min_frame_size = LEC_MINIMUM_8023_SIZE;
+        if (skb->len < min_frame_size) {
+                if ((skb->len + skb_tailroom(skb)) < min_frame_size) {
+                        skb2 = skb_copy_expand(skb, 0,
+                            min_frame_size - skb->truesize, GFP_ATOMIC);
+                                dev_kfree_skb(skb);
+                        if (skb2 == NULL) {
+                                priv->stats.tx_dropped++;
+                                return 0;
+                        }
+                        skb = skb2;
+                }
+		skb_put(skb, min_frame_size - skb->len);
+        }
+        
+        /* Send to right vcc */
+        is_rdesc = 0;
+        dst = lec_h->h_dest;
+#ifdef CONFIG_TR
+        if (priv->is_trdev) {
+                dst = get_tr_dst(skb->data+2, rdesc);
+                if (dst == NULL) {
+                        dst = rdesc;
+                        is_rdesc = 1;
+                }
+        }
+#endif
+        entry = NULL;
+        vcc = lec_arp_resolve(priv, dst, is_rdesc, &entry);
+        DPRINTK("%s:vcc:%p vcc_flags:%x, entry:%p\n", dev->name,
+                vcc, vcc?vcc->flags:0, entry);
+        if (!vcc || !test_bit(ATM_VF_READY,&vcc->flags)) {    
+                if (entry && (entry->tx_wait.qlen < LEC_UNRES_QUE_LEN)) {
+                        DPRINTK("%s:lec_start_xmit: queuing packet, ", dev->name);
+                        DPRINTK("MAC address 0x%02x:%02x:%02x:%02x:%02x:%02x\n",
+                                lec_h->h_dest[0], lec_h->h_dest[1], lec_h->h_dest[2],
+                                lec_h->h_dest[3], lec_h->h_dest[4], lec_h->h_dest[5]);
+                        skb_queue_tail(&entry->tx_wait, skb);
+                } else {
+                        DPRINTK("%s:lec_start_xmit: tx queue full or no arp entry, dropping, ", dev->name);
+                        DPRINTK("MAC address 0x%02x:%02x:%02x:%02x:%02x:%02x\n",
+                                lec_h->h_dest[0], lec_h->h_dest[1], lec_h->h_dest[2],
+                                lec_h->h_dest[3], lec_h->h_dest[4], lec_h->h_dest[5]);
+                        priv->stats.tx_dropped++;
+                        dev_kfree_skb(skb);
+                }
+                return 0;
+        }
+                
+#if DUMP_PACKETS > 0                    
+        printk("%s:sending to vpi:%d vci:%d\n", dev->name,
+               vcc->vpi, vcc->vci);       
+#endif /* DUMP_PACKETS > 0 */
+                
+        while (entry && (skb2 = skb_dequeue(&entry->tx_wait))) {
+                DPRINTK("lec.c: emptying tx queue, ");
+                DPRINTK("MAC address 0x%02x:%02x:%02x:%02x:%02x:%02x\n",
+                        lec_h->h_dest[0], lec_h->h_dest[1], lec_h->h_dest[2],
+                        lec_h->h_dest[3], lec_h->h_dest[4], lec_h->h_dest[5]);
+		lec_send(vcc, skb2, priv);
+        }
+
+	lec_send(vcc, skb, priv);
+
+	if (!atm_may_send(vcc, 0)) {
+		struct lec_vcc_priv *vpriv = LEC_VCC_PRIV(vcc);
+
+		vpriv->xoff = 1;
+		netif_stop_queue(dev);
+
+		/*
+		 * vcc->pop() might have occurred in between, making
+		 * the vcc usuable again.  Since xmit is serialized,
+		 * this is the only situation we have to re-test.
+		 */
+
+		if (atm_may_send(vcc, 0))
+			netif_wake_queue(dev);
+	}
+
+	dev->trans_start = jiffies;
+        return 0;
+}
+
+/* The inverse routine to net_open(). */
+static int 
+lec_close(struct net_device *dev) 
+{
+        netif_stop_queue(dev);
+        return 0;
+}
+
+/*
+ * Get the current statistics.
+ * This may be called with the card open or closed.
+ */
+static struct net_device_stats *
+lec_get_stats(struct net_device *dev)
+{
+        return &((struct lec_priv *)dev->priv)->stats;
+}
+
+static int 
+lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb)
+{
+	unsigned long flags;
+        struct net_device *dev = (struct net_device*)vcc->proto_data;
+        struct lec_priv *priv = (struct lec_priv*)dev->priv;
+        struct atmlec_msg *mesg;
+        struct lec_arp_table *entry;
+        int i;
+        char *tmp; /* FIXME */
+
+	atomic_sub(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
+        mesg = (struct atmlec_msg *)skb->data;
+        tmp = skb->data;
+        tmp += sizeof(struct atmlec_msg);
+        DPRINTK("%s: msg from zeppelin:%d\n", dev->name, mesg->type);
+        switch(mesg->type) {
+        case l_set_mac_addr:
+                for (i=0;i<6;i++) {
+                        dev->dev_addr[i] = mesg->content.normal.mac_addr[i];
+                }    
+                break;
+        case l_del_mac_addr:
+                for(i=0;i<6;i++) {
+                        dev->dev_addr[i] = 0;
+                }
+                break;
+        case l_addr_delete:
+                lec_addr_delete(priv, mesg->content.normal.atm_addr, 
+                                mesg->content.normal.flag);
+                break;
+        case l_topology_change:
+                priv->topology_change = mesg->content.normal.flag;  
+                break;
+        case l_flush_complete:
+                lec_flush_complete(priv, mesg->content.normal.flag);
+                break;
+        case l_narp_req: /* LANE2: see 7.1.35 in the lane2 spec */
+		spin_lock_irqsave(&priv->lec_arp_lock, flags);
+                entry = lec_arp_find(priv, mesg->content.normal.mac_addr);
+                lec_arp_remove(priv, entry);
+		spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
+
+                if (mesg->content.normal.no_source_le_narp)
+                        break;
+                /* FALL THROUGH */
+        case l_arp_update:
+                lec_arp_update(priv, mesg->content.normal.mac_addr,
+                               mesg->content.normal.atm_addr,
+                               mesg->content.normal.flag,
+                               mesg->content.normal.targetless_le_arp);
+                DPRINTK("lec: in l_arp_update\n");
+                if (mesg->sizeoftlvs != 0) { /* LANE2 3.1.5 */
+                        DPRINTK("lec: LANE2 3.1.5, got tlvs, size %d\n", mesg->sizeoftlvs);
+                        lane2_associate_ind(dev,
+                                            mesg->content.normal.mac_addr,
+                                            tmp, mesg->sizeoftlvs);
+                }
+                break;
+        case l_config:
+                priv->maximum_unknown_frame_count = 
+                        mesg->content.config.maximum_unknown_frame_count;
+                priv->max_unknown_frame_time = 
+                        (mesg->content.config.max_unknown_frame_time*HZ);
+                priv->max_retry_count = 
+                        mesg->content.config.max_retry_count;
+                priv->aging_time = (mesg->content.config.aging_time*HZ);
+                priv->forward_delay_time = 
+                        (mesg->content.config.forward_delay_time*HZ);
+                priv->arp_response_time = 
+                        (mesg->content.config.arp_response_time*HZ);
+                priv->flush_timeout = (mesg->content.config.flush_timeout*HZ);
+                priv->path_switching_delay = 
+                        (mesg->content.config.path_switching_delay*HZ);
+                priv->lane_version = mesg->content.config.lane_version; /* LANE2 */
+		priv->lane2_ops = NULL;
+		if (priv->lane_version > 1)
+			priv->lane2_ops = &lane2_ops;
+		if (dev->change_mtu(dev, mesg->content.config.mtu))
+			printk("%s: change_mtu to %d failed\n", dev->name,
+			    mesg->content.config.mtu);
+		priv->is_proxy = mesg->content.config.is_proxy;
+                break;
+        case l_flush_tran_id:
+                lec_set_flush_tran_id(priv, mesg->content.normal.atm_addr,
+                                      mesg->content.normal.flag);
+                break;
+        case l_set_lecid:
+                priv->lecid=(unsigned short)(0xffff&mesg->content.normal.flag);
+                break;
+        case l_should_bridge: {
+#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
+                struct net_bridge_fdb_entry *f;
+
+                DPRINTK("%s: bridge zeppelin asks about 0x%02x:%02x:%02x:%02x:%02x:%02x\n",
+                        dev->name,
+                        mesg->content.proxy.mac_addr[0], mesg->content.proxy.mac_addr[1],
+                        mesg->content.proxy.mac_addr[2], mesg->content.proxy.mac_addr[3],
+                        mesg->content.proxy.mac_addr[4], mesg->content.proxy.mac_addr[5]);
+
+                if (br_fdb_get_hook == NULL || dev->br_port == NULL)
+                        break;
+
+                f = br_fdb_get_hook(dev->br_port->br, mesg->content.proxy.mac_addr);
+                if (f != NULL &&
+                    f->dst->dev != dev &&
+                    f->dst->state == BR_STATE_FORWARDING) {
+                                /* hit from bridge table, send LE_ARP_RESPONSE */
+                        struct sk_buff *skb2;
+			struct sock *sk;
+
+                        DPRINTK("%s: entry found, responding to zeppelin\n", dev->name);
+                        skb2 = alloc_skb(sizeof(struct atmlec_msg), GFP_ATOMIC);
+                        if (skb2 == NULL) {
+                                br_fdb_put_hook(f);
+                                break;
+                        }
+                        skb2->len = sizeof(struct atmlec_msg);
+                        memcpy(skb2->data, mesg, sizeof(struct atmlec_msg));
+                        atm_force_charge(priv->lecd, skb2->truesize);
+			sk = sk_atm(priv->lecd);
+                        skb_queue_tail(&sk->sk_receive_queue, skb2);
+                        sk->sk_data_ready(sk, skb2->len);
+                }
+                if (f != NULL) br_fdb_put_hook(f);
+#endif /* defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) */
+                }
+                break;
+        default:
+                printk("%s: Unknown message type %d\n", dev->name, mesg->type);
+                dev_kfree_skb(skb);
+                return -EINVAL;
+        }
+        dev_kfree_skb(skb);
+        return 0;
+}
+
+static void 
+lec_atm_close(struct atm_vcc *vcc)
+{
+        struct sk_buff *skb;
+        struct net_device *dev = (struct net_device *)vcc->proto_data;
+        struct lec_priv *priv = (struct lec_priv *)dev->priv;
+
+        priv->lecd = NULL;
+        /* Do something needful? */
+
+        netif_stop_queue(dev);
+        lec_arp_destroy(priv);
+
+        if (skb_peek(&sk_atm(vcc)->sk_receive_queue))
+		printk("%s lec_atm_close: closing with messages pending\n",
+                       dev->name);
+        while ((skb = skb_dequeue(&sk_atm(vcc)->sk_receive_queue)) != NULL) {
+                atm_return(vcc, skb->truesize);
+		dev_kfree_skb(skb);
+        }
+  
+	printk("%s: Shut down!\n", dev->name);
+        module_put(THIS_MODULE);
+}
+
+static struct atmdev_ops lecdev_ops = {
+        .close	= lec_atm_close,
+        .send	= lec_atm_send
+};
+
+static struct atm_dev lecatm_dev = {
+	.ops	= &lecdev_ops,
+	.type	= "lec",
+	.number	= 999,	/* dummy device number */
+	.lock	= SPIN_LOCK_UNLOCKED
+};
+
+/*
+ * LANE2: new argument struct sk_buff *data contains
+ * the LE_ARP based TLVs introduced in the LANE2 spec
+ */
+static int 
+send_to_lecd(struct lec_priv *priv, atmlec_msg_type type, 
+             unsigned char *mac_addr, unsigned char *atm_addr,
+             struct sk_buff *data)
+{
+	struct sock *sk;
+	struct sk_buff *skb;
+	struct atmlec_msg *mesg;
+
+	if (!priv || !priv->lecd) {
+		return -1;
+	}
+	skb = alloc_skb(sizeof(struct atmlec_msg), GFP_ATOMIC);
+	if (!skb)
+		return -1;
+	skb->len = sizeof(struct atmlec_msg);
+	mesg = (struct atmlec_msg *)skb->data;
+        memset(mesg, 0, sizeof(struct atmlec_msg));
+	mesg->type = type;
+        if (data != NULL)
+                mesg->sizeoftlvs = data->len;
+	if (mac_addr)
+		memcpy(&mesg->content.normal.mac_addr, mac_addr, ETH_ALEN);
+        else
+                mesg->content.normal.targetless_le_arp = 1;
+	if (atm_addr)
+		memcpy(&mesg->content.normal.atm_addr, atm_addr, ATM_ESA_LEN);
+
+        atm_force_charge(priv->lecd, skb->truesize);
+	sk = sk_atm(priv->lecd);
+	skb_queue_tail(&sk->sk_receive_queue, skb);
+        sk->sk_data_ready(sk, skb->len);
+
+        if (data != NULL) {
+                DPRINTK("lec: about to send %d bytes of data\n", data->len);
+                atm_force_charge(priv->lecd, data->truesize);
+                skb_queue_tail(&sk->sk_receive_queue, data);
+                sk->sk_data_ready(sk, skb->len);
+        }
+
+        return 0;
+}
+
+/* shamelessly stolen from drivers/net/net_init.c */
+static int lec_change_mtu(struct net_device *dev, int new_mtu)
+{
+        if ((new_mtu < 68) || (new_mtu > 18190))
+                return -EINVAL;
+        dev->mtu = new_mtu;
+        return 0;
+}
+
+static void lec_set_multicast_list(struct net_device *dev)
+{
+	/* by default, all multicast frames arrive over the bus.
+         * eventually support selective multicast service
+         */
+        return;
+}
+
+static void 
+lec_init(struct net_device *dev)
+{
+        dev->change_mtu = lec_change_mtu;
+        dev->open = lec_open;
+        dev->stop = lec_close;
+        dev->hard_start_xmit = lec_start_xmit;
+	dev->tx_timeout = lec_tx_timeout;
+
+        dev->get_stats = lec_get_stats;
+        dev->set_multicast_list = lec_set_multicast_list;
+        dev->do_ioctl  = NULL;
+        printk("%s: Initialized!\n",dev->name);
+        return;
+}
+
+static unsigned char lec_ctrl_magic[] = {
+        0xff,
+        0x00,
+        0x01,
+        0x01 };
+
+static void 
+lec_push(struct atm_vcc *vcc, struct sk_buff *skb)
+{
+        struct net_device *dev = (struct net_device *)vcc->proto_data;
+        struct lec_priv *priv = (struct lec_priv *)dev->priv; 
+
+#if DUMP_PACKETS >0
+        int i=0;
+        char buf[300];
+
+        printk("%s: lec_push vcc vpi:%d vci:%d\n", dev->name,
+               vcc->vpi, vcc->vci);
+#endif
+        if (!skb) {
+                DPRINTK("%s: null skb\n",dev->name);
+                lec_vcc_close(priv, vcc);
+                return;
+        }
+#if DUMP_PACKETS > 0
+        printk("%s: rcv datalen:%ld lecid:%4.4x\n", dev->name,
+               skb->len, priv->lecid);
+#if DUMP_PACKETS >= 2
+        for(i=0;i<skb->len && i <99;i++) {
+                sprintf(buf+i*3,"%2.2x ",0xff&skb->data[i]);
+        }
+#elif DUMP_PACKETS >= 1
+        for(i=0;i<skb->len && i < 30;i++) {
+                sprintf(buf+i*3,"%2.2x ", 0xff&skb->data[i]);
+        }
+#endif /* DUMP_PACKETS >= 1 */
+        if (i==skb->len)
+                printk("%s\n",buf);
+        else
+                printk("%s...\n",buf);
+#endif /* DUMP_PACKETS > 0 */
+        if (memcmp(skb->data, lec_ctrl_magic, 4) ==0) { /* Control frame, to daemon*/
+		struct sock *sk = sk_atm(vcc);
+
+                DPRINTK("%s: To daemon\n",dev->name);
+                skb_queue_tail(&sk->sk_receive_queue, skb);
+                sk->sk_data_ready(sk, skb->len);
+        } else { /* Data frame, queue to protocol handlers */
+                unsigned char *dst;
+
+                atm_return(vcc,skb->truesize);
+                if (*(uint16_t *)skb->data == htons(priv->lecid) ||
+                    !priv->lecd ||
+                    !(dev->flags & IFF_UP)) { 
+                        /* Probably looping back, or if lecd is missing,
+                           lecd has gone down */
+                        DPRINTK("Ignoring frame...\n");
+                        dev_kfree_skb(skb);
+                        return;
+                }
+#ifdef CONFIG_TR
+                if (priv->is_trdev) dst = ((struct lecdatahdr_8025 *)skb->data)->h_dest;
+                else
+#endif
+                dst = ((struct lecdatahdr_8023 *)skb->data)->h_dest;
+
+                if (!(dst[0]&0x01) &&   /* Never filter Multi/Broadcast */
+                    !priv->is_proxy &&  /* Proxy wants all the packets */
+		    memcmp(dst, dev->dev_addr, dev->addr_len)) {
+                        dev_kfree_skb(skb);
+                        return;
+                }
+                if (priv->lec_arp_empty_ones) {
+                        lec_arp_check_empties(priv, vcc, skb);
+                }
+                skb->dev = dev;
+                skb_pull(skb, 2); /* skip lec_id */
+#ifdef CONFIG_TR
+                if (priv->is_trdev) skb->protocol = tr_type_trans(skb, dev);
+                else
+#endif
+                skb->protocol = eth_type_trans(skb, dev);
+                priv->stats.rx_packets++;
+                priv->stats.rx_bytes += skb->len;
+                memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data));
+                netif_rx(skb);
+        }
+}
+
+static void
+lec_pop(struct atm_vcc *vcc, struct sk_buff *skb)
+{
+	struct lec_vcc_priv *vpriv = LEC_VCC_PRIV(vcc);
+	struct net_device *dev = skb->dev;
+
+	if (vpriv == NULL) {
+		printk("lec_pop(): vpriv = NULL!?!?!?\n");
+		return;
+	}
+
+	vpriv->old_pop(vcc, skb);
+
+	if (vpriv->xoff && atm_may_send(vcc, 0)) {
+		vpriv->xoff = 0;
+		if (netif_running(dev) && netif_queue_stopped(dev))
+			netif_wake_queue(dev);
+	}
+}
+
+static int 
+lec_vcc_attach(struct atm_vcc *vcc, void __user *arg)
+{
+	struct lec_vcc_priv *vpriv;
+        int bytes_left;
+        struct atmlec_ioc ioc_data;
+
+        /* Lecd must be up in this case */
+        bytes_left = copy_from_user(&ioc_data, arg, sizeof(struct atmlec_ioc));
+        if (bytes_left != 0) {
+                printk("lec: lec_vcc_attach, copy from user failed for %d bytes\n",
+                       bytes_left);
+        }
+        if (ioc_data.dev_num < 0 || ioc_data.dev_num >= MAX_LEC_ITF || 
+            !dev_lec[ioc_data.dev_num])
+                return -EINVAL;
+	if (!(vpriv = kmalloc(sizeof(struct lec_vcc_priv), GFP_KERNEL)))
+		return -ENOMEM;
+	vpriv->xoff = 0;
+	vpriv->old_pop = vcc->pop;
+	vcc->user_back = vpriv;
+	vcc->pop = lec_pop;
+        lec_vcc_added(dev_lec[ioc_data.dev_num]->priv, 
+                      &ioc_data, vcc, vcc->push);
+        vcc->proto_data = dev_lec[ioc_data.dev_num];
+        vcc->push = lec_push;
+        return 0;
+}
+
+static int 
+lec_mcast_attach(struct atm_vcc *vcc, int arg)
+{
+        if (arg <0 || arg >= MAX_LEC_ITF || !dev_lec[arg])
+                return -EINVAL;
+        vcc->proto_data = dev_lec[arg];
+        return (lec_mcast_make((struct lec_priv*)dev_lec[arg]->priv, vcc));
+}
+
+/* Initialize device. */
+static int 
+lecd_attach(struct atm_vcc *vcc, int arg)
+{  
+        int i;
+        struct lec_priv *priv;
+
+        if (arg<0)
+                i = 0;
+        else
+                i = arg;
+#ifdef CONFIG_TR
+        if (arg >= MAX_LEC_ITF)
+                return -EINVAL;
+#else /* Reserve the top NUM_TR_DEVS for TR */
+        if (arg >= (MAX_LEC_ITF-NUM_TR_DEVS))
+                return -EINVAL;
+#endif
+        if (!dev_lec[i]) {
+                int is_trdev, size;
+
+                is_trdev = 0;
+                if (i >= (MAX_LEC_ITF - NUM_TR_DEVS))
+                        is_trdev = 1;
+
+                size = sizeof(struct lec_priv);
+#ifdef CONFIG_TR
+                if (is_trdev)
+                        dev_lec[i] = alloc_trdev(size);
+                else
+#endif
+                dev_lec[i] = alloc_etherdev(size);
+                if (!dev_lec[i])
+                        return -ENOMEM;
+                snprintf(dev_lec[i]->name, IFNAMSIZ, "lec%d", i);
+                if (register_netdev(dev_lec[i])) {
+                        free_netdev(dev_lec[i]);
+                        return -EINVAL;
+                }
+
+                priv = dev_lec[i]->priv;
+                priv->is_trdev = is_trdev;
+                lec_init(dev_lec[i]);
+        } else {
+                priv = dev_lec[i]->priv;
+                if (priv->lecd)
+                        return -EADDRINUSE;
+        }
+        lec_arp_init(priv);
+	priv->itfnum = i;  /* LANE2 addition */
+        priv->lecd = vcc;
+        vcc->dev = &lecatm_dev;
+        vcc_insert_socket(sk_atm(vcc));
+        
+        vcc->proto_data = dev_lec[i];
+	set_bit(ATM_VF_META,&vcc->flags);
+	set_bit(ATM_VF_READY,&vcc->flags);
+
+        /* Set default values to these variables */
+        priv->maximum_unknown_frame_count = 1;
+        priv->max_unknown_frame_time = (1*HZ);
+        priv->vcc_timeout_period = (1200*HZ);
+        priv->max_retry_count = 1;
+        priv->aging_time = (300*HZ);
+        priv->forward_delay_time = (15*HZ);
+        priv->topology_change = 0;
+        priv->arp_response_time = (1*HZ);
+        priv->flush_timeout = (4*HZ);
+        priv->path_switching_delay = (6*HZ);
+
+        if (dev_lec[i]->flags & IFF_UP) {
+                netif_start_queue(dev_lec[i]);
+        }
+        __module_get(THIS_MODULE);
+        return i;
+}
+
+#ifdef CONFIG_PROC_FS
+static char* lec_arp_get_status_string(unsigned char status)
+{
+	static char *lec_arp_status_string[] = {
+		"ESI_UNKNOWN       ",
+		"ESI_ARP_PENDING   ",
+		"ESI_VC_PENDING    ",
+		"<Undefined>       ",
+		"ESI_FLUSH_PENDING ",
+		"ESI_FORWARD_DIRECT"
+	};
+
+	if (status > ESI_FORWARD_DIRECT)
+		status = 3;	/* ESI_UNDEFINED */
+	return lec_arp_status_string[status];
+}
+
+static void lec_info(struct seq_file *seq, struct lec_arp_table *entry)
+{
+	int i;
+
+	for (i = 0; i < ETH_ALEN; i++)
+		seq_printf(seq, "%2.2x", entry->mac_addr[i] & 0xff);
+	seq_printf(seq, " ");
+	for (i = 0; i < ATM_ESA_LEN; i++)
+		seq_printf(seq, "%2.2x", entry->atm_addr[i] & 0xff);
+	seq_printf(seq, " %s %4.4x", lec_arp_get_status_string(entry->status),
+		   entry->flags & 0xffff);
+	if (entry->vcc)
+		seq_printf(seq, "%3d %3d ", entry->vcc->vpi, entry->vcc->vci);
+	else
+	        seq_printf(seq, "        ");
+	if (entry->recv_vcc) {
+		seq_printf(seq, "     %3d %3d", entry->recv_vcc->vpi,
+			   entry->recv_vcc->vci);
+        }
+        seq_putc(seq, '\n');
+}
+
+
+struct lec_state {
+	unsigned long flags;
+	struct lec_priv *locked;
+	struct lec_arp_table *entry;
+	struct net_device *dev;
+	int itf;
+	int arp_table;
+	int misc_table;
+};
+
+static void *lec_tbl_walk(struct lec_state *state, struct lec_arp_table *tbl,
+			  loff_t *l)
+{
+	struct lec_arp_table *e = state->entry;
+
+	if (!e)
+		e = tbl;
+	if (e == (void *)1) {
+		e = tbl;
+		--*l;
+	}
+	for (; e; e = e->next) {
+		if (--*l < 0)
+			break;
+	}
+	state->entry = e;
+	return (*l < 0) ? state : NULL;
+}
+
+static void *lec_arp_walk(struct lec_state *state, loff_t *l,
+			      struct lec_priv *priv)
+{
+	void *v = NULL;
+	int p;
+
+	for (p = state->arp_table; p < LEC_ARP_TABLE_SIZE; p++) {
+		v = lec_tbl_walk(state, priv->lec_arp_tables[p], l);
+		if (v)
+			break;
+	}
+	state->arp_table = p;
+	return v;
+}
+
+static void *lec_misc_walk(struct lec_state *state, loff_t *l,
+			   struct lec_priv *priv)
+{
+	struct lec_arp_table *lec_misc_tables[] = {
+		priv->lec_arp_empty_ones,
+		priv->lec_no_forward,
+		priv->mcast_fwds
+	};
+	void *v = NULL;
+	int q;
+
+	for (q = state->misc_table; q < ARRAY_SIZE(lec_misc_tables); q++) {
+		v = lec_tbl_walk(state, lec_misc_tables[q], l);
+		if (v)
+			break;
+	}
+	state->misc_table = q;
+	return v;
+}
+
+static void *lec_priv_walk(struct lec_state *state, loff_t *l,
+			   struct lec_priv *priv)
+{
+	if (!state->locked) {
+		state->locked = priv;
+		spin_lock_irqsave(&priv->lec_arp_lock, state->flags);
+	}
+	if (!lec_arp_walk(state, l, priv) &&
+	    !lec_misc_walk(state, l, priv)) {
+		spin_unlock_irqrestore(&priv->lec_arp_lock, state->flags);
+		state->locked = NULL;
+		/* Partial state reset for the next time we get called */
+		state->arp_table = state->misc_table = 0;
+	}
+	return state->locked;
+}
+
+static void *lec_itf_walk(struct lec_state *state, loff_t *l)
+{
+	struct net_device *dev;
+	void *v;
+
+	dev = state->dev ? state->dev : dev_lec[state->itf];
+	v = (dev && dev->priv) ? lec_priv_walk(state, l, dev->priv) : NULL;
+	if (!v && dev) {
+		dev_put(dev);
+		/* Partial state reset for the next time we get called */
+		dev = NULL;
+	}
+	state->dev = dev;
+	return v;
+}
+
+static void *lec_get_idx(struct lec_state *state, loff_t l)
+{
+	void *v = NULL;
+
+	for (; state->itf < MAX_LEC_ITF; state->itf++) {
+		v = lec_itf_walk(state, &l);
+		if (v)
+			break;
+	}
+	return v; 
+}
+
+static void *lec_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct lec_state *state = seq->private;
+
+	state->itf = 0;
+	state->dev = NULL;
+	state->locked = NULL;
+	state->arp_table = 0;
+	state->misc_table = 0;
+	state->entry = (void *)1;
+
+	return *pos ? lec_get_idx(state, *pos) : (void*)1;
+}
+
+static void lec_seq_stop(struct seq_file *seq, void *v)
+{
+	struct lec_state *state = seq->private;
+
+	if (state->dev) {
+		spin_unlock_irqrestore(&state->locked->lec_arp_lock,
+				       state->flags);
+		dev_put(state->dev);
+	}
+}
+
+static void *lec_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct lec_state *state = seq->private;
+
+	v = lec_get_idx(state, 1);
+	*pos += !!PTR_ERR(v);
+	return v;
+}
+
+static int lec_seq_show(struct seq_file *seq, void *v)
+{
+	static char lec_banner[] = "Itf  MAC          ATM destination" 
+		"                          Status            Flags "
+		"VPI/VCI Recv VPI/VCI\n";
+
+	if (v == (void *)1)
+		seq_puts(seq, lec_banner);
+	else {
+		struct lec_state *state = seq->private;
+		struct net_device *dev = state->dev; 
+
+		seq_printf(seq, "%s ", dev->name);
+		lec_info(seq, state->entry);
+	}
+	return 0;
+}
+
+static struct seq_operations lec_seq_ops = {
+	.start	= lec_seq_start,
+	.next	= lec_seq_next,
+	.stop	= lec_seq_stop,
+	.show	= lec_seq_show,
+};
+
+static int lec_seq_open(struct inode *inode, struct file *file)
+{
+	struct lec_state *state;
+	struct seq_file *seq;
+	int rc = -EAGAIN;
+
+	state = kmalloc(sizeof(*state), GFP_KERNEL);
+	if (!state) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	rc = seq_open(file, &lec_seq_ops);
+	if (rc)
+		goto out_kfree;
+	seq = file->private_data;
+	seq->private = state;
+out:
+	return rc;
+
+out_kfree:
+	kfree(state);
+	goto out;
+}
+
+static int lec_seq_release(struct inode *inode, struct file *file)
+{
+	return seq_release_private(inode, file);
+}
+
+static struct file_operations lec_seq_fops = {
+	.owner		= THIS_MODULE,
+	.open		= lec_seq_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= lec_seq_release,
+};
+#endif
+
+static int lane_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	struct atm_vcc *vcc = ATM_SD(sock);
+	int err = 0;
+	
+	switch (cmd) {
+		case ATMLEC_CTRL: 
+		case ATMLEC_MCAST:
+		case ATMLEC_DATA:
+			if (!capable(CAP_NET_ADMIN))
+				return -EPERM;
+			break;
+		default:
+			return -ENOIOCTLCMD;
+	}
+
+	switch (cmd) {
+		case ATMLEC_CTRL:
+			err = lecd_attach(vcc, (int) arg);
+			if (err >= 0)
+				sock->state = SS_CONNECTED;
+			break;
+		case ATMLEC_MCAST:
+			err = lec_mcast_attach(vcc, (int) arg);
+			break;
+		case ATMLEC_DATA:
+			err = lec_vcc_attach(vcc, (void __user *) arg);
+			break;
+	}
+
+	return err;
+}
+
+static struct atm_ioctl lane_ioctl_ops = {
+	.owner  = THIS_MODULE,
+	.ioctl  = lane_ioctl,
+};
+
+static int __init lane_module_init(void)
+{
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry *p;
+
+	p = create_proc_entry("lec", S_IRUGO, atm_proc_root);
+	if (p)
+		p->proc_fops = &lec_seq_fops;
+#endif
+
+	register_atm_ioctl(&lane_ioctl_ops);
+        printk("lec.c: " __DATE__ " " __TIME__ " initialized\n");
+        return 0;
+}
+
+static void __exit lane_module_cleanup(void)
+{
+        int i;
+        struct lec_priv *priv;
+
+	remove_proc_entry("lec", atm_proc_root);
+
+	deregister_atm_ioctl(&lane_ioctl_ops);
+
+        for (i = 0; i < MAX_LEC_ITF; i++) {
+                if (dev_lec[i] != NULL) {
+                        priv = (struct lec_priv *)dev_lec[i]->priv;
+			unregister_netdev(dev_lec[i]);
+                        free_netdev(dev_lec[i]);
+                        dev_lec[i] = NULL;
+                }
+        }
+
+        return;                                    
+}
+
+module_init(lane_module_init);
+module_exit(lane_module_cleanup);
+
+/*
+ * LANE2: 3.1.3, LE_RESOLVE.request
+ * Non force allocates memory and fills in *tlvs, fills in *sizeoftlvs.
+ * If sizeoftlvs == NULL the default TLVs associated with with this
+ * lec will be used.
+ * If dst_mac == NULL, targetless LE_ARP will be sent
+ */
+static int lane2_resolve(struct net_device *dev, u8 *dst_mac, int force,
+    u8 **tlvs, u32 *sizeoftlvs)
+{
+	unsigned long flags;
+        struct lec_priv *priv = (struct lec_priv *)dev->priv;
+        struct lec_arp_table *table;
+        struct sk_buff *skb;
+        int retval;
+
+        if (force == 0) {
+		spin_lock_irqsave(&priv->lec_arp_lock, flags);
+                table = lec_arp_find(priv, dst_mac);
+		spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
+                if(table == NULL)
+                        return -1;
+                
+                *tlvs = kmalloc(table->sizeoftlvs, GFP_ATOMIC);
+                if (*tlvs == NULL)
+                        return -1;
+                
+                memcpy(*tlvs, table->tlvs, table->sizeoftlvs);
+                *sizeoftlvs = table->sizeoftlvs;
+                
+                return 0;
+        }
+
+	if (sizeoftlvs == NULL)
+		retval = send_to_lecd(priv, l_arp_xmt, dst_mac, NULL, NULL);
+		
+	else {
+		skb = alloc_skb(*sizeoftlvs, GFP_ATOMIC);
+		if (skb == NULL)
+			return -1;
+		skb->len = *sizeoftlvs;
+		memcpy(skb->data, *tlvs, *sizeoftlvs);
+		retval = send_to_lecd(priv, l_arp_xmt, dst_mac, NULL, skb);
+	}
+        return retval;
+}        
+
+
+/*
+ * LANE2: 3.1.4, LE_ASSOCIATE.request
+ * Associate the *tlvs with the *lan_dst address.
+ * Will overwrite any previous association
+ * Returns 1 for success, 0 for failure (out of memory)
+ *
+ */
+static int lane2_associate_req (struct net_device *dev, u8 *lan_dst,
+                         u8 *tlvs, u32 sizeoftlvs)
+{
+        int retval;
+        struct sk_buff *skb;
+        struct lec_priv *priv = (struct lec_priv*)dev->priv;
+
+        if ( memcmp(lan_dst, dev->dev_addr, ETH_ALEN) != 0 )
+                return (0);       /* not our mac address */
+
+        kfree(priv->tlvs); /* NULL if there was no previous association */
+
+        priv->tlvs = kmalloc(sizeoftlvs, GFP_KERNEL);
+        if (priv->tlvs == NULL)
+                return (0);
+        priv->sizeoftlvs = sizeoftlvs;
+        memcpy(priv->tlvs, tlvs, sizeoftlvs);
+
+        skb = alloc_skb(sizeoftlvs, GFP_ATOMIC);
+        if (skb == NULL)
+                return 0;
+        skb->len = sizeoftlvs;
+        memcpy(skb->data, tlvs, sizeoftlvs);
+        retval = send_to_lecd(priv, l_associate_req, NULL, NULL, skb);
+        if (retval != 0)
+                printk("lec.c: lane2_associate_req() failed\n");
+        /* If the previous association has changed we must
+         * somehow notify other LANE entities about the change
+         */
+        return (1);
+}
+
+/*
+ * LANE2: 3.1.5, LE_ASSOCIATE.indication
+ *
+ */
+static void lane2_associate_ind (struct net_device *dev, u8 *mac_addr,
+    u8 *tlvs, u32 sizeoftlvs)
+{
+#if 0
+        int i = 0;
+#endif
+	struct lec_priv *priv = (struct lec_priv *)dev->priv;
+#if 0 /* Why have the TLVs in LE_ARP entries since we do not use them? When you
+         uncomment this code, make sure the TLVs get freed when entry is killed */
+        struct lec_arp_table *entry = lec_arp_find(priv, mac_addr);
+
+        if (entry == NULL)
+                return;     /* should not happen */
+
+        kfree(entry->tlvs);
+
+        entry->tlvs = kmalloc(sizeoftlvs, GFP_KERNEL);
+        if (entry->tlvs == NULL)
+                return;
+
+        entry->sizeoftlvs = sizeoftlvs;
+        memcpy(entry->tlvs, tlvs, sizeoftlvs);
+#endif
+#if 0
+        printk("lec.c: lane2_associate_ind()\n");
+        printk("dump of tlvs, sizeoftlvs=%d\n", sizeoftlvs);
+        while (i < sizeoftlvs)
+                printk("%02x ", tlvs[i++]);
+        
+        printk("\n");
+#endif
+
+        /* tell MPOA about the TLVs we saw */
+        if (priv->lane2_ops && priv->lane2_ops->associate_indicator) {
+                priv->lane2_ops->associate_indicator(dev, mac_addr,
+                                                     tlvs, sizeoftlvs);
+        }
+        return;
+}
+
+/*
+ * Here starts what used to lec_arpc.c
+ *
+ * lec_arpc.c was added here when making
+ * lane client modular. October 1997
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <asm/param.h>
+#include <asm/atomic.h>
+#include <linux/inetdevice.h>
+#include <net/route.h>
+
+
+#if 0
+#define DPRINTK(format,args...)
+/*
+#define DPRINTK printk
+*/
+#endif
+#define DEBUG_ARP_TABLE 0
+
+#define LEC_ARP_REFRESH_INTERVAL (3*HZ)
+
+static void lec_arp_check_expire(unsigned long data);
+static void lec_arp_expire_arp(unsigned long data);
+
+/* 
+ * Arp table funcs
+ */
+
+#define HASH(ch) (ch & (LEC_ARP_TABLE_SIZE -1))
+
+/*
+ * Initialization of arp-cache
+ */
+static void 
+lec_arp_init(struct lec_priv *priv)
+{
+        unsigned short i;
+
+        for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
+                priv->lec_arp_tables[i] = NULL;
+        }        
+	spin_lock_init(&priv->lec_arp_lock);
+        init_timer(&priv->lec_arp_timer);
+        priv->lec_arp_timer.expires = jiffies + LEC_ARP_REFRESH_INTERVAL;
+        priv->lec_arp_timer.data = (unsigned long)priv;
+        priv->lec_arp_timer.function = lec_arp_check_expire;
+        add_timer(&priv->lec_arp_timer);
+}
+
+static void
+lec_arp_clear_vccs(struct lec_arp_table *entry)
+{
+        if (entry->vcc) {
+		struct atm_vcc *vcc = entry->vcc;
+		struct lec_vcc_priv *vpriv = LEC_VCC_PRIV(vcc);
+		struct net_device *dev = (struct net_device*) vcc->proto_data;
+
+                vcc->pop = vpriv->old_pop;
+		if (vpriv->xoff)
+			netif_wake_queue(dev);
+		kfree(vpriv);
+		vcc->user_back = NULL;
+                vcc->push = entry->old_push;
+		vcc_release_async(vcc, -EPIPE);
+                vcc = NULL;
+        }
+        if (entry->recv_vcc) {
+                entry->recv_vcc->push = entry->old_recv_push;
+		vcc_release_async(entry->recv_vcc, -EPIPE);
+                entry->recv_vcc = NULL;
+        }        
+}
+
+/*
+ * Insert entry to lec_arp_table
+ * LANE2: Add to the end of the list to satisfy 8.1.13
+ */
+static inline void 
+lec_arp_add(struct lec_priv *priv, struct lec_arp_table *to_add)
+{
+        unsigned short place;
+        struct lec_arp_table *tmp;
+
+        place = HASH(to_add->mac_addr[ETH_ALEN-1]);
+        tmp = priv->lec_arp_tables[place];
+        to_add->next = NULL;
+        if (tmp == NULL)
+                priv->lec_arp_tables[place] = to_add;
+  
+        else {  /* add to the end */
+                while (tmp->next)
+                        tmp = tmp->next;
+                tmp->next = to_add;
+        }
+
+        DPRINTK("LEC_ARP: Added entry:%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n",
+                0xff&to_add->mac_addr[0], 0xff&to_add->mac_addr[1],
+                0xff&to_add->mac_addr[2], 0xff&to_add->mac_addr[3],
+                0xff&to_add->mac_addr[4], 0xff&to_add->mac_addr[5]);
+}
+
+/*
+ * Remove entry from lec_arp_table
+ */
+static int 
+lec_arp_remove(struct lec_priv *priv,
+               struct lec_arp_table *to_remove)
+{
+        unsigned short place;
+        struct lec_arp_table *tmp;
+        int remove_vcc=1;
+
+        if (!to_remove) {
+                return -1;
+        }
+        place = HASH(to_remove->mac_addr[ETH_ALEN-1]);
+        tmp = priv->lec_arp_tables[place];
+        if (tmp == to_remove) {
+                priv->lec_arp_tables[place] = tmp->next;
+        } else {
+                while(tmp && tmp->next != to_remove) {
+                        tmp = tmp->next;
+                }
+                if (!tmp) {/* Entry was not found */
+                        return -1;
+                }
+        }
+        tmp->next = to_remove->next;
+        del_timer(&to_remove->timer);
+  
+        /* If this is the only MAC connected to this VCC, also tear down
+           the VCC */
+        if (to_remove->status >= ESI_FLUSH_PENDING) {
+                /*
+                 * ESI_FLUSH_PENDING, ESI_FORWARD_DIRECT
+                 */
+                for(place = 0; place < LEC_ARP_TABLE_SIZE; place++) {
+                        for(tmp = priv->lec_arp_tables[place]; tmp != NULL; tmp = tmp->next) {
+                                if (memcmp(tmp->atm_addr, to_remove->atm_addr,
+                                           ATM_ESA_LEN)==0) {
+                                        remove_vcc=0;
+                                        break;
+                                }
+                        }
+                }
+                if (remove_vcc)
+                        lec_arp_clear_vccs(to_remove);
+        }
+        skb_queue_purge(&to_remove->tx_wait); /* FIXME: good place for this? */
+
+        DPRINTK("LEC_ARP: Removed entry:%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n",
+                0xff&to_remove->mac_addr[0], 0xff&to_remove->mac_addr[1],
+                0xff&to_remove->mac_addr[2], 0xff&to_remove->mac_addr[3],
+                0xff&to_remove->mac_addr[4], 0xff&to_remove->mac_addr[5]);
+        return 0;
+}
+
+#if DEBUG_ARP_TABLE
+static char*
+get_status_string(unsigned char st)
+{
+        switch(st) {
+        case ESI_UNKNOWN:
+                return "ESI_UNKNOWN";
+        case ESI_ARP_PENDING:
+                return "ESI_ARP_PENDING";
+        case ESI_VC_PENDING:
+                return "ESI_VC_PENDING";
+        case ESI_FLUSH_PENDING:
+                return "ESI_FLUSH_PENDING";
+        case ESI_FORWARD_DIRECT:
+                return "ESI_FORWARD_DIRECT";
+        default:
+                return "<UNKNOWN>";
+        }
+}
+#endif
+
+static void
+dump_arp_table(struct lec_priv *priv)
+{
+#if DEBUG_ARP_TABLE
+        int i,j, offset;
+        struct lec_arp_table *rulla;
+        char buf[1024];
+        struct lec_arp_table **lec_arp_tables =
+                (struct lec_arp_table **)priv->lec_arp_tables;
+        struct lec_arp_table *lec_arp_empty_ones =
+                (struct lec_arp_table *)priv->lec_arp_empty_ones;
+        struct lec_arp_table *lec_no_forward =
+                (struct lec_arp_table *)priv->lec_no_forward;
+        struct lec_arp_table *mcast_fwds = priv->mcast_fwds;
+
+
+        printk("Dump %p:\n",priv);
+        for (i=0;i<LEC_ARP_TABLE_SIZE;i++) {
+                rulla = lec_arp_tables[i];
+                offset = 0;
+                offset += sprintf(buf,"%d: %p\n",i, rulla);
+                while (rulla) {
+                        offset += sprintf(buf+offset,"Mac:");
+                        for(j=0;j<ETH_ALEN;j++) {
+                                offset+=sprintf(buf+offset,
+                                                "%2.2x ",
+                                                rulla->mac_addr[j]&0xff);
+                        }
+                        offset +=sprintf(buf+offset,"Atm:");
+                        for(j=0;j<ATM_ESA_LEN;j++) {
+                                offset+=sprintf(buf+offset,
+                                                "%2.2x ",
+                                                rulla->atm_addr[j]&0xff);
+                        }      
+                        offset+=sprintf(buf+offset,
+                                        "Vcc vpi:%d vci:%d, Recv_vcc vpi:%d vci:%d Last_used:%lx, Timestamp:%lx, No_tries:%d ",
+                                        rulla->vcc?rulla->vcc->vpi:0, 
+                                        rulla->vcc?rulla->vcc->vci:0,
+                                        rulla->recv_vcc?rulla->recv_vcc->vpi:0,
+                                        rulla->recv_vcc?rulla->recv_vcc->vci:0,
+                                        rulla->last_used,
+                                        rulla->timestamp, rulla->no_tries);
+                        offset+=sprintf(buf+offset,
+                                        "Flags:%x, Packets_flooded:%x, Status: %s ",
+                                        rulla->flags, rulla->packets_flooded, 
+                                        get_status_string(rulla->status));
+                        offset+=sprintf(buf+offset,"->%p\n",rulla->next);
+                        rulla = rulla->next;
+                }
+                printk("%s",buf);
+        }
+        rulla = lec_no_forward;
+        if (rulla)
+                printk("No forward\n");  
+        while(rulla) {
+                offset=0;
+                offset += sprintf(buf+offset,"Mac:");
+                for(j=0;j<ETH_ALEN;j++) {
+                        offset+=sprintf(buf+offset,"%2.2x ",
+                                        rulla->mac_addr[j]&0xff);
+                }
+                offset +=sprintf(buf+offset,"Atm:");
+                for(j=0;j<ATM_ESA_LEN;j++) {
+                        offset+=sprintf(buf+offset,"%2.2x ",
+                                        rulla->atm_addr[j]&0xff);
+                }      
+                offset+=sprintf(buf+offset,
+                                "Vcc vpi:%d vci:%d, Recv_vcc vpi:%d vci:%d Last_used:%lx, Timestamp:%lx, No_tries:%d ",
+                                rulla->vcc?rulla->vcc->vpi:0, 
+                                rulla->vcc?rulla->vcc->vci:0, 
+                                rulla->recv_vcc?rulla->recv_vcc->vpi:0,
+                                rulla->recv_vcc?rulla->recv_vcc->vci:0,
+                                rulla->last_used, 
+                                rulla->timestamp, rulla->no_tries);
+                offset+=sprintf(buf+offset,
+                                "Flags:%x, Packets_flooded:%x, Status: %s ",
+                                rulla->flags, rulla->packets_flooded, 
+                                get_status_string(rulla->status));
+                offset+=sprintf(buf+offset,"->%lx\n",(long)rulla->next);
+                rulla = rulla->next;
+                printk("%s",buf);
+        }
+        rulla = lec_arp_empty_ones;
+        if (rulla)
+                printk("Empty ones\n");  
+        while(rulla) {
+                offset=0;
+                offset += sprintf(buf+offset,"Mac:");
+                for(j=0;j<ETH_ALEN;j++) {
+                        offset+=sprintf(buf+offset,"%2.2x ",
+                                        rulla->mac_addr[j]&0xff);
+                }
+                offset +=sprintf(buf+offset,"Atm:");
+                for(j=0;j<ATM_ESA_LEN;j++) {
+                        offset+=sprintf(buf+offset,"%2.2x ",
+                                        rulla->atm_addr[j]&0xff);
+                }      
+                offset+=sprintf(buf+offset,
+                                "Vcc vpi:%d vci:%d, Recv_vcc vpi:%d vci:%d Last_used:%lx, Timestamp:%lx, No_tries:%d ",
+                                rulla->vcc?rulla->vcc->vpi:0, 
+                                rulla->vcc?rulla->vcc->vci:0, 
+                                rulla->recv_vcc?rulla->recv_vcc->vpi:0,
+                                rulla->recv_vcc?rulla->recv_vcc->vci:0,
+                                rulla->last_used, 
+                                rulla->timestamp, rulla->no_tries);
+                offset+=sprintf(buf+offset,
+                                "Flags:%x, Packets_flooded:%x, Status: %s ",
+                                rulla->flags, rulla->packets_flooded, 
+                                get_status_string(rulla->status));
+                offset+=sprintf(buf+offset,"->%lx\n",(long)rulla->next);
+                rulla = rulla->next;
+                printk("%s",buf);
+        }
+
+        rulla = mcast_fwds;
+        if (rulla)
+                printk("Multicast Forward VCCs\n");  
+        while(rulla) {
+                offset=0;
+                offset += sprintf(buf+offset,"Mac:");
+                for(j=0;j<ETH_ALEN;j++) {
+                        offset+=sprintf(buf+offset,"%2.2x ",
+                                        rulla->mac_addr[j]&0xff);
+                }
+                offset +=sprintf(buf+offset,"Atm:");
+                for(j=0;j<ATM_ESA_LEN;j++) {
+                        offset+=sprintf(buf+offset,"%2.2x ",
+                                        rulla->atm_addr[j]&0xff);
+                }      
+                offset+=sprintf(buf+offset,
+                                "Vcc vpi:%d vci:%d, Recv_vcc vpi:%d vci:%d Last_used:%lx, Timestamp:%lx, No_tries:%d ",
+                                rulla->vcc?rulla->vcc->vpi:0, 
+                                rulla->vcc?rulla->vcc->vci:0, 
+                                rulla->recv_vcc?rulla->recv_vcc->vpi:0,
+                                rulla->recv_vcc?rulla->recv_vcc->vci:0,
+                                rulla->last_used, 
+                                rulla->timestamp, rulla->no_tries);
+                offset+=sprintf(buf+offset,
+                                "Flags:%x, Packets_flooded:%x, Status: %s ",
+                                rulla->flags, rulla->packets_flooded, 
+                                get_status_string(rulla->status));
+                offset+=sprintf(buf+offset,"->%lx\n",(long)rulla->next);
+                rulla = rulla->next;
+                printk("%s",buf);
+        }
+
+#endif
+}
+
+/*
+ * Destruction of arp-cache
+ */
+static void
+lec_arp_destroy(struct lec_priv *priv)
+{
+	unsigned long flags;
+        struct lec_arp_table *entry, *next;
+        int i;
+
+        del_timer_sync(&priv->lec_arp_timer);
+        
+        /*
+         * Remove all entries
+         */
+
+	spin_lock_irqsave(&priv->lec_arp_lock, flags);
+        for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
+                for(entry = priv->lec_arp_tables[i]; entry != NULL; entry=next) {
+                        next = entry->next;
+                        lec_arp_remove(priv, entry);
+                        kfree(entry);
+                }
+        }
+        entry = priv->lec_arp_empty_ones;
+        while(entry) {
+                next = entry->next;
+                del_timer_sync(&entry->timer);
+                lec_arp_clear_vccs(entry);
+                kfree(entry);
+                entry = next;
+        }
+        priv->lec_arp_empty_ones = NULL;
+        entry = priv->lec_no_forward;
+        while(entry) {
+                next = entry->next;
+                del_timer_sync(&entry->timer);
+                lec_arp_clear_vccs(entry);
+                kfree(entry);
+                entry = next;
+        }
+        priv->lec_no_forward = NULL;
+        entry = priv->mcast_fwds;
+        while(entry) {
+                next = entry->next;
+                /* No timer, LANEv2 7.1.20 and 2.3.5.3 */
+                lec_arp_clear_vccs(entry);
+                kfree(entry);
+                entry = next;
+        }
+        priv->mcast_fwds = NULL;
+        priv->mcast_vcc = NULL;
+        memset(priv->lec_arp_tables, 0, 
+               sizeof(struct lec_arp_table *) * LEC_ARP_TABLE_SIZE);
+	spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
+}
+
+
+/* 
+ * Find entry by mac_address
+ */
+static struct lec_arp_table*
+lec_arp_find(struct lec_priv *priv,
+             unsigned char *mac_addr)
+{
+        unsigned short place;
+        struct lec_arp_table *to_return;
+
+        DPRINTK("LEC_ARP: lec_arp_find :%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n",
+                mac_addr[0]&0xff, mac_addr[1]&0xff, mac_addr[2]&0xff, 
+                mac_addr[3]&0xff, mac_addr[4]&0xff, mac_addr[5]&0xff);
+        place = HASH(mac_addr[ETH_ALEN-1]);
+  
+        to_return = priv->lec_arp_tables[place];
+        while(to_return) {
+                if (memcmp(mac_addr, to_return->mac_addr, ETH_ALEN) == 0) {
+                        return to_return;
+                }
+                to_return = to_return->next;
+        }
+        return NULL;
+}
+
+static struct lec_arp_table*
+make_entry(struct lec_priv *priv, unsigned char *mac_addr)
+{
+        struct lec_arp_table *to_return;
+
+        to_return = (struct lec_arp_table *) kmalloc(sizeof(struct lec_arp_table),
+						     GFP_ATOMIC);
+        if (!to_return) {
+                printk("LEC: Arp entry kmalloc failed\n");
+                return NULL;
+        }
+        memset(to_return, 0, sizeof(struct lec_arp_table));
+        memcpy(to_return->mac_addr, mac_addr, ETH_ALEN);
+        init_timer(&to_return->timer);
+        to_return->timer.function = lec_arp_expire_arp;
+        to_return->timer.data = (unsigned long) to_return;
+        to_return->last_used = jiffies;
+        to_return->priv = priv;
+        skb_queue_head_init(&to_return->tx_wait);
+        return to_return;
+}
+
+/*
+ *
+ * Arp sent timer expired
+ *
+ */
+static void
+lec_arp_expire_arp(unsigned long data)
+{
+        struct lec_arp_table *entry;
+
+        entry = (struct lec_arp_table *)data;
+
+        DPRINTK("lec_arp_expire_arp\n");
+        if (entry->status == ESI_ARP_PENDING) {
+                if (entry->no_tries <= entry->priv->max_retry_count) {
+                        if (entry->is_rdesc)
+                                send_to_lecd(entry->priv, l_rdesc_arp_xmt, entry->mac_addr, NULL, NULL);
+                        else
+                                send_to_lecd(entry->priv, l_arp_xmt, entry->mac_addr, NULL, NULL);
+                        entry->no_tries++;
+                }
+                mod_timer(&entry->timer, jiffies + (1*HZ));
+        }
+}
+
+/*
+ *
+ * Unknown/unused vcc expire, remove associated entry
+ *
+ */
+static void
+lec_arp_expire_vcc(unsigned long data)
+{
+	unsigned long flags;
+        struct lec_arp_table *to_remove = (struct lec_arp_table*)data;
+        struct lec_priv *priv = (struct lec_priv *)to_remove->priv;
+        struct lec_arp_table *entry = NULL;
+
+        del_timer(&to_remove->timer);
+
+        DPRINTK("LEC_ARP %p %p: lec_arp_expire_vcc vpi:%d vci:%d\n",
+                to_remove, priv, 
+                to_remove->vcc?to_remove->recv_vcc->vpi:0,
+                to_remove->vcc?to_remove->recv_vcc->vci:0);
+        DPRINTK("eo:%p nf:%p\n",priv->lec_arp_empty_ones,priv->lec_no_forward);
+
+	spin_lock_irqsave(&priv->lec_arp_lock, flags);
+        if (to_remove == priv->lec_arp_empty_ones)
+                priv->lec_arp_empty_ones = to_remove->next;
+        else {
+                entry = priv->lec_arp_empty_ones;
+                while (entry && entry->next != to_remove)
+                        entry = entry->next;
+                if (entry)
+                        entry->next = to_remove->next;
+        }
+        if (!entry) {
+                if (to_remove == priv->lec_no_forward) {
+                        priv->lec_no_forward = to_remove->next;
+                } else {
+                        entry = priv->lec_no_forward;
+                        while (entry && entry->next != to_remove)
+                                entry = entry->next;
+                        if (entry)
+                                entry->next = to_remove->next;
+                }
+	}
+	spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
+
+        lec_arp_clear_vccs(to_remove);
+        kfree(to_remove);
+}
+
+/*
+ * Expire entries.
+ * 1. Re-set timer
+ * 2. For each entry, delete entries that have aged past the age limit.
+ * 3. For each entry, depending on the status of the entry, perform
+ *    the following maintenance.
+ *    a. If status is ESI_VC_PENDING or ESI_ARP_PENDING then if the
+ *       tick_count is above the max_unknown_frame_time, clear
+ *       the tick_count to zero and clear the packets_flooded counter
+ *       to zero. This supports the packet rate limit per address
+ *       while flooding unknowns.
+ *    b. If the status is ESI_FLUSH_PENDING and the tick_count is greater
+ *       than or equal to the path_switching_delay, change the status
+ *       to ESI_FORWARD_DIRECT. This causes the flush period to end
+ *       regardless of the progress of the flush protocol.
+ */
+static void
+lec_arp_check_expire(unsigned long data)
+{
+	unsigned long flags;
+        struct lec_priv *priv = (struct lec_priv *)data;
+        struct lec_arp_table *entry, *next;
+        unsigned long now;
+        unsigned long time_to_check;
+        int i;
+
+        DPRINTK("lec_arp_check_expire %p\n",priv);
+        DPRINTK("expire: eo:%p nf:%p\n",priv->lec_arp_empty_ones,
+                priv->lec_no_forward);
+	now = jiffies;
+	spin_lock_irqsave(&priv->lec_arp_lock, flags);
+	for(i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
+		for(entry = priv->lec_arp_tables[i]; entry != NULL; ) {
+			if ((entry->flags) & LEC_REMOTE_FLAG && 
+			    priv->topology_change)
+				time_to_check = priv->forward_delay_time;
+			else
+				time_to_check = priv->aging_time;
+
+			DPRINTK("About to expire: %lx - %lx > %lx\n",
+				now,entry->last_used, time_to_check);
+			if( time_after(now, entry->last_used+
+			   time_to_check) && 
+			    !(entry->flags & LEC_PERMANENT_FLAG) &&
+			    !(entry->mac_addr[0] & 0x01) ) { /* LANE2: 7.1.20 */
+				/* Remove entry */
+				DPRINTK("LEC:Entry timed out\n");
+				next = entry->next;      
+				lec_arp_remove(priv, entry);
+				kfree(entry);
+				entry = next;
+			} else {
+				/* Something else */
+				if ((entry->status == ESI_VC_PENDING ||
+				     entry->status == ESI_ARP_PENDING) 
+				    && time_after_eq(now,
+				    entry->timestamp +
+				    priv->max_unknown_frame_time)) {
+					entry->timestamp = jiffies;
+					entry->packets_flooded = 0;
+					if (entry->status == ESI_VC_PENDING)
+						send_to_lecd(priv, l_svc_setup, entry->mac_addr, entry->atm_addr, NULL);
+				}
+				if (entry->status == ESI_FLUSH_PENDING 
+				   &&
+				   time_after_eq(now, entry->timestamp+
+				   priv->path_switching_delay)) {
+					struct sk_buff *skb;
+
+					while ((skb = skb_dequeue(&entry->tx_wait)) != NULL)
+						lec_send(entry->vcc, skb, entry->priv);
+					entry->last_used = jiffies;
+					entry->status = 
+						ESI_FORWARD_DIRECT;
+				}
+				entry = entry->next;
+			}
+		}
+	}
+	spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
+
+        mod_timer(&priv->lec_arp_timer, jiffies + LEC_ARP_REFRESH_INTERVAL);
+}
+/*
+ * Try to find vcc where mac_address is attached.
+ * 
+ */
+static struct atm_vcc*
+lec_arp_resolve(struct lec_priv *priv, unsigned char *mac_to_find,
+		int is_rdesc, struct lec_arp_table **ret_entry)
+{
+	unsigned long flags;
+        struct lec_arp_table *entry;
+	struct atm_vcc *found;
+
+        if (mac_to_find[0] & 0x01) {
+                switch (priv->lane_version) {
+                case 1:
+                        return priv->mcast_vcc;
+                        break;
+                case 2:  /* LANE2 wants arp for multicast addresses */
+                        if ( memcmp(mac_to_find, bus_mac, ETH_ALEN) == 0)
+                                return priv->mcast_vcc;
+                        break;
+                default:
+                        break;
+                }
+        }
+
+	spin_lock_irqsave(&priv->lec_arp_lock, flags);
+        entry = lec_arp_find(priv, mac_to_find);
+  
+        if (entry) {
+                if (entry->status == ESI_FORWARD_DIRECT) {
+                        /* Connection Ok */
+                        entry->last_used = jiffies;
+                        *ret_entry = entry;
+                        found = entry->vcc;
+			goto out;
+                }
+                /* Data direct VC not yet set up, check to see if the unknown
+                   frame count is greater than the limit. If the limit has
+                   not been reached, allow the caller to send packet to
+                   BUS. */
+                if (entry->status != ESI_FLUSH_PENDING &&
+                    entry->packets_flooded<priv->maximum_unknown_frame_count) {
+                        entry->packets_flooded++;
+                        DPRINTK("LEC_ARP: Flooding..\n");
+                        found = priv->mcast_vcc;
+			goto out;
+                }
+		/* We got here because entry->status == ESI_FLUSH_PENDING
+		 * or BUS flood limit was reached for an entry which is
+		 * in ESI_ARP_PENDING or ESI_VC_PENDING state.
+		 */
+                *ret_entry = entry;
+                DPRINTK("lec: entry->status %d entry->vcc %p\n", entry->status, entry->vcc);
+                found = NULL;
+        } else {
+                /* No matching entry was found */
+                entry = make_entry(priv, mac_to_find);
+                DPRINTK("LEC_ARP: Making entry\n");
+                if (!entry) {
+                        found = priv->mcast_vcc;
+			goto out;
+                }
+                lec_arp_add(priv, entry);
+                /* We want arp-request(s) to be sent */
+                entry->packets_flooded =1;
+                entry->status = ESI_ARP_PENDING;
+                entry->no_tries = 1;
+                entry->last_used = entry->timestamp = jiffies;
+                entry->is_rdesc = is_rdesc;
+                if (entry->is_rdesc)
+                        send_to_lecd(priv, l_rdesc_arp_xmt, mac_to_find, NULL, NULL);
+                else
+                        send_to_lecd(priv, l_arp_xmt, mac_to_find, NULL, NULL);
+                entry->timer.expires = jiffies + (1*HZ);
+                entry->timer.function = lec_arp_expire_arp;
+                add_timer(&entry->timer);
+                found = priv->mcast_vcc;
+        }
+
+out:
+	spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
+	return found;
+}
+
+static int
+lec_addr_delete(struct lec_priv *priv, unsigned char *atm_addr, 
+                unsigned long permanent)
+{
+	unsigned long flags;
+        struct lec_arp_table *entry, *next;
+        int i;
+
+        DPRINTK("lec_addr_delete\n");
+	spin_lock_irqsave(&priv->lec_arp_lock, flags);
+        for(i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
+                for(entry = priv->lec_arp_tables[i]; entry != NULL; entry = next) {
+                        next = entry->next;
+                        if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN)
+                            && (permanent || 
+                                !(entry->flags & LEC_PERMANENT_FLAG))) {
+				lec_arp_remove(priv, entry);
+                                kfree(entry);
+                        }
+			spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
+                        return 0;
+                }
+        }
+	spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
+        return -1;
+}
+
+/*
+ * Notifies:  Response to arp_request (atm_addr != NULL) 
+ */
+static void
+lec_arp_update(struct lec_priv *priv, unsigned char *mac_addr,
+               unsigned char *atm_addr, unsigned long remoteflag,
+               unsigned int targetless_le_arp)
+{
+	unsigned long flags;
+        struct lec_arp_table *entry, *tmp;
+        int i;
+
+        DPRINTK("lec:%s", (targetless_le_arp) ? "targetless ": " ");
+        DPRINTK("lec_arp_update mac:%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n",
+                mac_addr[0],mac_addr[1],mac_addr[2],mac_addr[3],
+                mac_addr[4],mac_addr[5]);
+
+	spin_lock_irqsave(&priv->lec_arp_lock, flags);
+        entry = lec_arp_find(priv, mac_addr);
+        if (entry == NULL && targetless_le_arp)
+                goto out;   /* LANE2: ignore targetless LE_ARPs for which
+                             * we have no entry in the cache. 7.1.30
+                             */
+        if (priv->lec_arp_empty_ones) {
+                entry = priv->lec_arp_empty_ones;
+                if (!memcmp(entry->atm_addr, atm_addr, ATM_ESA_LEN)) {
+                        priv->lec_arp_empty_ones = entry->next;
+                } else {
+                        while(entry->next && memcmp(entry->next->atm_addr, 
+                                                    atm_addr, ATM_ESA_LEN))
+                                entry = entry->next;
+                        if (entry->next) {
+                                tmp = entry;
+                                entry = entry->next;
+                                tmp->next = entry->next;
+                        } else
+                                entry = NULL;
+                        
+                }
+                if (entry) {
+                        del_timer(&entry->timer);
+                        tmp = lec_arp_find(priv, mac_addr);
+                        if (tmp) {
+                                del_timer(&tmp->timer);
+                                tmp->status = ESI_FORWARD_DIRECT;
+                                memcpy(tmp->atm_addr, atm_addr, ATM_ESA_LEN);
+                                tmp->vcc = entry->vcc;
+                                tmp->old_push = entry->old_push;
+                                tmp->last_used = jiffies;
+                                del_timer(&entry->timer);
+                                kfree(entry);
+                                entry=tmp;
+                        } else {
+                                entry->status = ESI_FORWARD_DIRECT;
+                                memcpy(entry->mac_addr, mac_addr, ETH_ALEN);
+                                entry->last_used = jiffies;
+                                lec_arp_add(priv, entry);
+                        }
+                        if (remoteflag)
+                                entry->flags|=LEC_REMOTE_FLAG;
+                        else
+                                entry->flags&=~LEC_REMOTE_FLAG;
+                        DPRINTK("After update\n");
+                        dump_arp_table(priv);
+                        goto out;
+                }
+        }
+        entry = lec_arp_find(priv, mac_addr);
+        if (!entry) {
+                entry = make_entry(priv, mac_addr);
+                if (!entry)
+			goto out;
+                entry->status = ESI_UNKNOWN;
+                lec_arp_add(priv, entry);
+                /* Temporary, changes before end of function */
+        }
+        memcpy(entry->atm_addr, atm_addr, ATM_ESA_LEN);
+        del_timer(&entry->timer);
+        for(i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
+                for(tmp = priv->lec_arp_tables[i]; tmp; tmp=tmp->next) {
+                        if (entry != tmp &&
+                            !memcmp(tmp->atm_addr, atm_addr,
+                                    ATM_ESA_LEN)) { 
+                                /* Vcc to this host exists */
+                                if (tmp->status > ESI_VC_PENDING) {
+                                        /*
+                                         * ESI_FLUSH_PENDING,
+                                         * ESI_FORWARD_DIRECT
+                                         */
+                                        entry->vcc = tmp->vcc;
+                                        entry->old_push=tmp->old_push;
+                                }
+                                entry->status=tmp->status;
+                                break;
+                        }
+                }
+        }
+        if (remoteflag)
+                entry->flags|=LEC_REMOTE_FLAG;
+        else
+                entry->flags&=~LEC_REMOTE_FLAG;
+        if (entry->status == ESI_ARP_PENDING ||
+            entry->status == ESI_UNKNOWN) {
+                entry->status = ESI_VC_PENDING;
+                send_to_lecd(priv, l_svc_setup, entry->mac_addr, atm_addr, NULL);
+        }
+        DPRINTK("After update2\n");
+        dump_arp_table(priv);
+out:
+	spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
+}
+
+/*
+ * Notifies: Vcc setup ready 
+ */
+static void
+lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data,
+              struct atm_vcc *vcc,
+              void (*old_push)(struct atm_vcc *vcc, struct sk_buff *skb))
+{
+	unsigned long flags;
+        struct lec_arp_table *entry;
+        int i, found_entry=0;
+
+	spin_lock_irqsave(&priv->lec_arp_lock, flags);
+        if (ioc_data->receive == 2) {
+                /* Vcc for Multicast Forward. No timer, LANEv2 7.1.20 and 2.3.5.3 */
+
+                DPRINTK("LEC_ARP: Attaching mcast forward\n");
+#if 0
+                entry = lec_arp_find(priv, bus_mac);
+                if (!entry) {
+                        printk("LEC_ARP: Multicast entry not found!\n");
+			goto out;
+                }
+                memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN);
+                entry->recv_vcc = vcc;
+                entry->old_recv_push = old_push;
+#endif
+                entry = make_entry(priv, bus_mac);
+                if (entry == NULL)
+			goto out;
+                del_timer(&entry->timer);
+                memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN);
+                entry->recv_vcc = vcc;
+                entry->old_recv_push = old_push;
+                entry->next = priv->mcast_fwds;
+                priv->mcast_fwds = entry;
+                goto out;
+        } else if (ioc_data->receive == 1) {
+                /* Vcc which we don't want to make default vcc, attach it
+                   anyway. */
+                DPRINTK("LEC_ARP:Attaching data direct, not default :%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n",
+                        ioc_data->atm_addr[0],ioc_data->atm_addr[1],
+                        ioc_data->atm_addr[2],ioc_data->atm_addr[3],
+                        ioc_data->atm_addr[4],ioc_data->atm_addr[5],
+                        ioc_data->atm_addr[6],ioc_data->atm_addr[7],
+                        ioc_data->atm_addr[8],ioc_data->atm_addr[9],
+                        ioc_data->atm_addr[10],ioc_data->atm_addr[11],
+                        ioc_data->atm_addr[12],ioc_data->atm_addr[13],
+                        ioc_data->atm_addr[14],ioc_data->atm_addr[15],
+                        ioc_data->atm_addr[16],ioc_data->atm_addr[17],
+                        ioc_data->atm_addr[18],ioc_data->atm_addr[19]);
+                entry = make_entry(priv, bus_mac);
+                if (entry == NULL)
+			goto out;
+                memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN);
+                memset(entry->mac_addr, 0, ETH_ALEN);
+                entry->recv_vcc = vcc;
+                entry->old_recv_push = old_push;
+                entry->status = ESI_UNKNOWN;
+                entry->timer.expires = jiffies + priv->vcc_timeout_period;
+                entry->timer.function = lec_arp_expire_vcc;
+                add_timer(&entry->timer);
+                entry->next = priv->lec_no_forward;
+                priv->lec_no_forward = entry;
+		dump_arp_table(priv);
+		goto out;
+        }
+        DPRINTK("LEC_ARP:Attaching data direct, default:%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n",
+                ioc_data->atm_addr[0],ioc_data->atm_addr[1],
+                ioc_data->atm_addr[2],ioc_data->atm_addr[3],
+                ioc_data->atm_addr[4],ioc_data->atm_addr[5],
+                ioc_data->atm_addr[6],ioc_data->atm_addr[7],
+                ioc_data->atm_addr[8],ioc_data->atm_addr[9],
+                ioc_data->atm_addr[10],ioc_data->atm_addr[11],
+                ioc_data->atm_addr[12],ioc_data->atm_addr[13],
+                ioc_data->atm_addr[14],ioc_data->atm_addr[15],
+                ioc_data->atm_addr[16],ioc_data->atm_addr[17],
+                ioc_data->atm_addr[18],ioc_data->atm_addr[19]);
+        for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
+                for (entry = priv->lec_arp_tables[i]; entry; entry=entry->next) {
+                        if (memcmp(ioc_data->atm_addr, entry->atm_addr, 
+                                   ATM_ESA_LEN)==0) {
+                                DPRINTK("LEC_ARP: Attaching data direct\n");
+                                DPRINTK("Currently -> Vcc: %d, Rvcc:%d\n",
+                                        entry->vcc?entry->vcc->vci:0,
+                                        entry->recv_vcc?entry->recv_vcc->vci:0);
+                                found_entry=1;
+                                del_timer(&entry->timer);
+                                entry->vcc = vcc;
+                                entry->old_push = old_push;
+                                if (entry->status == ESI_VC_PENDING) {
+                                        if(priv->maximum_unknown_frame_count
+                                           ==0)
+                                                entry->status = 
+                                                        ESI_FORWARD_DIRECT;
+                                        else {
+                                                entry->timestamp = jiffies;
+                                                entry->status = 
+                                                        ESI_FLUSH_PENDING;
+#if 0
+                                                send_to_lecd(priv,l_flush_xmt,
+                                                             NULL,
+                                                             entry->atm_addr,
+                                                             NULL);
+#endif
+                                        }
+                                } else {
+                                        /* They were forming a connection
+                                           to us, and we to them. Our
+                                           ATM address is numerically lower
+                                           than theirs, so we make connection
+                                           we formed into default VCC (8.1.11).
+                                           Connection they made gets torn
+                                           down. This might confuse some
+                                           clients. Can be changed if
+                                           someone reports trouble... */
+                                        ;
+                                }
+                        }
+                }
+        }
+        if (found_entry) {
+                DPRINTK("After vcc was added\n");
+                dump_arp_table(priv);
+		goto out;
+        }
+        /* Not found, snatch address from first data packet that arrives from
+           this vcc */
+        entry = make_entry(priv, bus_mac);
+        if (!entry)
+		goto out;
+        entry->vcc = vcc;
+        entry->old_push = old_push;
+        memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN);
+        memset(entry->mac_addr, 0, ETH_ALEN);
+        entry->status = ESI_UNKNOWN;
+        entry->next = priv->lec_arp_empty_ones;
+        priv->lec_arp_empty_ones = entry;
+        entry->timer.expires = jiffies + priv->vcc_timeout_period;
+        entry->timer.function = lec_arp_expire_vcc;
+        add_timer(&entry->timer);
+        DPRINTK("After vcc was added\n");
+	dump_arp_table(priv);
+out:
+	spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
+}
+
+static void
+lec_flush_complete(struct lec_priv *priv, unsigned long tran_id)
+{
+	unsigned long flags;
+        struct lec_arp_table *entry;
+        int i;
+  
+        DPRINTK("LEC:lec_flush_complete %lx\n",tran_id);
+	spin_lock_irqsave(&priv->lec_arp_lock, flags);
+        for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
+                for (entry = priv->lec_arp_tables[i]; entry; entry=entry->next) {
+                        if (entry->flush_tran_id == tran_id &&
+                            entry->status == ESI_FLUSH_PENDING) {
+			        struct sk_buff *skb;
+
+ 				while ((skb = skb_dequeue(&entry->tx_wait)) != NULL)
+					lec_send(entry->vcc, skb, entry->priv);
+                                entry->status = ESI_FORWARD_DIRECT;
+                                DPRINTK("LEC_ARP: Flushed\n");
+                        }
+                }
+        }
+	spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
+        dump_arp_table(priv);
+}
+
+static void
+lec_set_flush_tran_id(struct lec_priv *priv,
+                      unsigned char *atm_addr, unsigned long tran_id)
+{
+	unsigned long flags;
+        struct lec_arp_table *entry;
+        int i;
+
+	spin_lock_irqsave(&priv->lec_arp_lock, flags);
+        for (i = 0; i < LEC_ARP_TABLE_SIZE; i++)
+                for(entry = priv->lec_arp_tables[i]; entry; entry=entry->next)
+                        if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN)) {
+                                entry->flush_tran_id = tran_id;
+                                DPRINTK("Set flush transaction id to %lx for %p\n",tran_id,entry);
+                        }
+	spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
+}
+
+static int 
+lec_mcast_make(struct lec_priv *priv, struct atm_vcc *vcc)
+{
+	unsigned long flags;
+        unsigned char mac_addr[] = {
+                0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+        struct lec_arp_table *to_add;
+	struct lec_vcc_priv *vpriv;
+	int err = 0;
+  
+	if (!(vpriv = kmalloc(sizeof(struct lec_vcc_priv), GFP_KERNEL)))
+		return -ENOMEM;
+	vpriv->xoff = 0;
+	vpriv->old_pop = vcc->pop;
+	vcc->user_back = vpriv;
+        vcc->pop = lec_pop;
+	spin_lock_irqsave(&priv->lec_arp_lock, flags);
+        to_add = make_entry(priv, mac_addr);
+        if (!to_add) {
+		vcc->pop = vpriv->old_pop;
+		kfree(vpriv);
+                err = -ENOMEM;
+		goto out;
+        }
+        memcpy(to_add->atm_addr, vcc->remote.sas_addr.prv, ATM_ESA_LEN);
+        to_add->status = ESI_FORWARD_DIRECT;
+        to_add->flags |= LEC_PERMANENT_FLAG;
+        to_add->vcc = vcc;
+        to_add->old_push = vcc->push;
+        vcc->push = lec_push;
+        priv->mcast_vcc = vcc;
+        lec_arp_add(priv, to_add);
+out:
+	spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
+        return err;
+}
+
+static void
+lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc)
+{
+	unsigned long flags;
+        struct lec_arp_table *entry, *next;
+        int i;
+
+        DPRINTK("LEC_ARP: lec_vcc_close vpi:%d vci:%d\n",vcc->vpi,vcc->vci);
+        dump_arp_table(priv);
+	spin_lock_irqsave(&priv->lec_arp_lock, flags);
+        for(i=0;i<LEC_ARP_TABLE_SIZE;i++) {
+                for(entry = priv->lec_arp_tables[i];entry; entry=next) {
+                        next = entry->next;
+                        if (vcc == entry->vcc) {
+                                lec_arp_remove(priv, entry);
+                                kfree(entry);
+                                if (priv->mcast_vcc == vcc) {
+                                        priv->mcast_vcc = NULL;
+                                }
+                        }
+                }
+        }
+
+        entry = priv->lec_arp_empty_ones;
+        priv->lec_arp_empty_ones = NULL;
+        while (entry != NULL) {
+                next = entry->next;
+                if (entry->vcc == vcc) { /* leave it out from the list */
+                        lec_arp_clear_vccs(entry);
+                        del_timer(&entry->timer);
+                        kfree(entry);
+                }
+                else {              /* put it back to the list */
+                        entry->next = priv->lec_arp_empty_ones;
+                        priv->lec_arp_empty_ones = entry;
+                }
+                entry = next;
+        }
+        
+        entry = priv->lec_no_forward;
+        priv->lec_no_forward = NULL;
+        while (entry != NULL) {
+                next = entry->next;
+                if (entry->recv_vcc == vcc) {
+                        lec_arp_clear_vccs(entry);
+                        del_timer(&entry->timer);
+                        kfree(entry);
+                }
+                else {
+                        entry->next = priv->lec_no_forward;
+                        priv->lec_no_forward = entry;
+                }
+                entry = next;
+        }
+
+        entry = priv->mcast_fwds;
+        priv->mcast_fwds = NULL;
+        while (entry != NULL) {
+                next = entry->next;
+                if (entry->recv_vcc == vcc) {
+                        lec_arp_clear_vccs(entry);
+                        /* No timer, LANEv2 7.1.20 and 2.3.5.3 */
+                        kfree(entry);
+                }
+                else {
+                        entry->next = priv->mcast_fwds;
+                        priv->mcast_fwds = entry;
+                }
+                entry = next;
+        }
+
+	spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
+	dump_arp_table(priv);
+}
+
+static void
+lec_arp_check_empties(struct lec_priv *priv,
+                      struct atm_vcc *vcc, struct sk_buff *skb)
+{
+        unsigned long flags;
+        struct lec_arp_table *entry, *prev;
+        struct lecdatahdr_8023 *hdr = (struct lecdatahdr_8023 *)skb->data;
+        unsigned char *src;
+#ifdef CONFIG_TR
+        struct lecdatahdr_8025 *tr_hdr = (struct lecdatahdr_8025 *)skb->data;
+
+        if (priv->is_trdev) src = tr_hdr->h_source;
+        else
+#endif
+        src = hdr->h_source;
+
+	spin_lock_irqsave(&priv->lec_arp_lock, flags);
+        entry = priv->lec_arp_empty_ones;
+        if (vcc == entry->vcc) {
+                del_timer(&entry->timer);
+                memcpy(entry->mac_addr, src, ETH_ALEN);
+                entry->status = ESI_FORWARD_DIRECT;
+                entry->last_used = jiffies;
+                priv->lec_arp_empty_ones = entry->next;
+                /* We might have got an entry */
+                if ((prev = lec_arp_find(priv,src))) {
+                        lec_arp_remove(priv, prev);
+                        kfree(prev);
+                }
+                lec_arp_add(priv, entry);
+		goto out;
+        }
+        prev = entry;
+        entry = entry->next;
+        while (entry && entry->vcc != vcc) {
+                prev= entry;
+                entry = entry->next;
+        }
+        if (!entry) {
+                DPRINTK("LEC_ARP: Arp_check_empties: entry not found!\n");
+		goto out;
+        }
+        del_timer(&entry->timer);
+        memcpy(entry->mac_addr, src, ETH_ALEN);
+        entry->status = ESI_FORWARD_DIRECT;
+        entry->last_used = jiffies;
+        prev->next = entry->next;
+        if ((prev = lec_arp_find(priv, src))) {
+                lec_arp_remove(priv, prev);
+                kfree(prev);
+        }
+        lec_arp_add(priv, entry);
+out:
+	spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
+}
+MODULE_LICENSE("GPL");
diff --git a/net/atm/lec.h b/net/atm/lec.h
new file mode 100644
index 00000000000..6606082b29a
--- /dev/null
+++ b/net/atm/lec.h
@@ -0,0 +1,142 @@
+/*
+ *
+ * Lan Emulation client header file
+ *
+ * Marko Kiiskila mkiiskila@yahoo.com
+ *
+ */
+
+#ifndef _LEC_H_
+#define _LEC_H_
+
+#include <linux/config.h>
+#include <linux/atmdev.h>
+#include <linux/netdevice.h>
+#include <linux/atmlec.h>
+
+#define LEC_HEADER_LEN 16
+
+struct lecdatahdr_8023 {
+  unsigned short le_header;
+  unsigned char h_dest[ETH_ALEN];
+  unsigned char h_source[ETH_ALEN];
+  unsigned short h_type;
+};
+
+struct lecdatahdr_8025 {
+  unsigned short le_header;
+  unsigned char ac_pad;
+  unsigned char fc;
+  unsigned char h_dest[ETH_ALEN];
+  unsigned char h_source[ETH_ALEN];
+};
+
+#define LEC_MINIMUM_8023_SIZE   62
+#define LEC_MINIMUM_8025_SIZE   16
+
+/*
+ * Operations that LANE2 capable device can do. Two first functions
+ * are used to make the device do things. See spec 3.1.3 and 3.1.4.
+ *
+ * The third function is intented for the MPOA component sitting on
+ * top of the LANE device. The MPOA component assigns it's own function
+ * to (*associate_indicator)() and the LANE device will use that
+ * function to tell about TLVs it sees floating through.
+ *
+ */
+struct lane2_ops {
+	int  (*resolve)(struct net_device *dev, u8 *dst_mac, int force,
+                        u8 **tlvs, u32 *sizeoftlvs);
+        int  (*associate_req)(struct net_device *dev, u8 *lan_dst,
+                              u8 *tlvs, u32 sizeoftlvs);
+	void (*associate_indicator)(struct net_device *dev, u8 *mac_addr,
+                                    u8 *tlvs, u32 sizeoftlvs);
+};
+
+/*
+ * ATM LAN Emulation supports both LLC & Dix Ethernet EtherType
+ * frames. 
+ * 1. Dix Ethernet EtherType frames encoded by placing EtherType
+ *    field in h_type field. Data follows immediatelly after header.
+ * 2. LLC Data frames whose total length, including LLC field and data,
+ *    but not padding required to meet the minimum data frame length, 
+ *    is less than 1536(0x0600) MUST be encoded by placing that length
+ *    in the h_type field. The LLC field follows header immediatelly.
+ * 3. LLC data frames longer than this maximum MUST be encoded by placing
+ *    the value 0 in the h_type field.
+ *
+ */
+
+/* Hash table size */
+#define LEC_ARP_TABLE_SIZE 16
+
+struct lec_priv {
+        struct net_device_stats stats;
+        unsigned short lecid;      /* Lecid of this client */
+        struct lec_arp_table *lec_arp_empty_ones;
+        /* Used for storing VCC's that don't have a MAC address attached yet */
+        struct lec_arp_table *lec_arp_tables[LEC_ARP_TABLE_SIZE];
+        /* Actual LE ARP table */
+        struct lec_arp_table *lec_no_forward;
+        /* Used for storing VCC's (and forward packets from) which are to
+           age out by not using them to forward packets. 
+           This is because to some LE clients there will be 2 VCCs. Only
+           one of them gets used. */
+        struct lec_arp_table *mcast_fwds;
+        /* With LANEv2 it is possible that BUS (or a special multicast server)
+           establishes multiple Multicast Forward VCCs to us. This list
+           collects all those VCCs. LANEv1 client has only one item in this
+           list. These entries are not aged out. */
+        spinlock_t lec_arp_lock;
+        struct atm_vcc *mcast_vcc; /* Default Multicast Send VCC */
+        struct atm_vcc *lecd;
+        struct timer_list lec_arp_timer;
+        /* C10 */
+        unsigned int maximum_unknown_frame_count;
+/* Within the period of time defined by this variable, the client will send 
+   no more than C10 frames to BUS for a given unicast destination. (C11) */
+        unsigned long max_unknown_frame_time;
+/* If no traffic has been sent in this vcc for this period of time,
+   vcc will be torn down (C12)*/
+        unsigned long vcc_timeout_period;
+/* An LE Client MUST not retry an LE_ARP_REQUEST for a 
+   given frame's LAN Destination more than maximum retry count times,
+   after the first LEC_ARP_REQUEST (C13)*/
+        unsigned short max_retry_count;
+/* Max time the client will maintain an entry in its arp cache in
+   absence of a verification of that relationship (C17)*/
+        unsigned long aging_time;
+/* Max time the client will maintain an entry in cache when
+   topology change flag is true (C18) */
+        unsigned long forward_delay_time;
+/* Topology change flag  (C19)*/
+        int topology_change;
+/* Max time the client expects an LE_ARP_REQUEST/LE_ARP_RESPONSE
+   cycle to take (C20)*/
+        unsigned long arp_response_time;
+/* Time limit ot wait to receive an LE_FLUSH_RESPONSE after the
+   LE_FLUSH_REQUEST has been sent before taking recover action. (C21)*/
+        unsigned long flush_timeout;
+/* The time since sending a frame to the bus after which the
+   LE Client may assume that the frame has been either discarded or
+   delivered to the recipient (C22) */
+        unsigned long path_switching_delay;
+
+        u8 *tlvs;          /* LANE2: TLVs are new                */
+        u32 sizeoftlvs;    /* The size of the tlv array in bytes */
+        int lane_version;  /* LANE2                              */
+	int itfnum;        /* e.g. 2 for lec2, 5 for lec5        */
+        struct lane2_ops *lane2_ops; /* can be NULL for LANE v1  */
+        int is_proxy;      /* bridge between ATM and Ethernet    */
+        int is_trdev;      /* Device type, 0 = Ethernet, 1 = TokenRing */
+};
+
+struct lec_vcc_priv {
+	void (*old_pop)(struct atm_vcc *vcc, struct sk_buff *skb);
+	int xoff;
+};
+
+#define LEC_VCC_PRIV(vcc)	((struct lec_vcc_priv *)((vcc)->user_back))
+
+#endif /* _LEC_H_ */
+
diff --git a/net/atm/lec_arpc.h b/net/atm/lec_arpc.h
new file mode 100644
index 00000000000..39744809464
--- /dev/null
+++ b/net/atm/lec_arpc.h
@@ -0,0 +1,92 @@
+/*
+ * Lec arp cache
+ * Marko Kiiskila mkiiskila@yahoo.com
+ *
+ */
+#ifndef _LEC_ARP_H
+#define _LEC_ARP_H
+#include <linux/atm.h>
+#include <linux/atmdev.h>
+#include <linux/if_ether.h>
+#include <linux/atmlec.h>
+
+struct lec_arp_table {
+        struct lec_arp_table *next;          /* Linked entry list */
+        unsigned char atm_addr[ATM_ESA_LEN]; /* Atm address */
+        unsigned char mac_addr[ETH_ALEN];    /* Mac address */
+        int is_rdesc;                        /* Mac address is a route descriptor */
+        struct atm_vcc *vcc;                 /* Vcc this entry is attached */
+        struct atm_vcc *recv_vcc;            /* Vcc we receive data from */
+        void (*old_push)(struct atm_vcc *vcc,struct sk_buff *skb); 
+                                             /* Push that leads to daemon */
+        void (*old_recv_push)(struct atm_vcc *vcc, struct sk_buff *skb);
+                                             /* Push that leads to daemon */
+        void (*old_close)(struct atm_vcc *vcc);
+                                             /* We want to see when this
+                                              * vcc gets closed */
+        unsigned long last_used;             /* For expiry */
+        unsigned long timestamp;             /* Used for various timestamping
+                                              * things:
+                                              * 1. FLUSH started 
+                                              *    (status=ESI_FLUSH_PENDING)
+                                              * 2. Counting to 
+                                              *    max_unknown_frame_time
+                                              *    (status=ESI_ARP_PENDING||
+                                              *     status=ESI_VC_PENDING)
+                                              */
+        unsigned char no_tries;              /* No of times arp retry has been 
+                                                tried */
+        unsigned char status;                /* Status of this entry */
+        unsigned short flags;                /* Flags for this entry */
+        unsigned short packets_flooded;      /* Data packets flooded */
+        unsigned long flush_tran_id;         /* Transaction id in flush protocol */
+        struct timer_list timer;             /* Arping timer */
+        struct lec_priv *priv;               /* Pointer back */
+
+        u8  *tlvs;             /* LANE2: Each MAC address can have TLVs    */
+        u32 sizeoftlvs;        /* associated with it. sizeoftlvs tells the */
+                               /* the length of the tlvs array             */
+        struct sk_buff_head tx_wait; /* wait queue for outgoing packets    */
+};
+
+struct tlv {                   /* LANE2: Template tlv struct for accessing */
+                               /* the tlvs in the lec_arp_table->tlvs array*/
+        u32 type;
+        u8  length;
+        u8  value[255];
+};
+
+/* Status fields */
+#define ESI_UNKNOWN 0       /*
+                             * Next packet sent to this mac address
+                             * causes ARP-request to be sent 
+                             */
+#define ESI_ARP_PENDING 1   /*
+                             * There is no ATM address associated with this
+                             * 48-bit address.  The LE-ARP protocol is in
+                             * progress.
+                             */
+#define ESI_VC_PENDING 2    /*
+                             * There is a valid ATM address associated with 
+                             * this 48-bit address but there is no VC set 
+                             * up to that ATM address.  The signaling 
+                             * protocol is in process.
+                             */
+#define ESI_FLUSH_PENDING 4 /*
+                             * The LEC has been notified of the FLUSH_START
+                             * status and it is assumed that the flush 
+                             * protocol is in process.
+                             */
+#define ESI_FORWARD_DIRECT 5 /*
+                              * Either the Path Switching Delay (C22) has 
+                              * elapsed or the LEC has notified the Mapping 
+                              * that the flush protocol has completed.  In 
+                              * either case, it is safe to forward packets 
+                              * to this address via the data direct VC.
+                              */
+
+/* Flag values */
+#define LEC_REMOTE_FLAG      0x0001
+#define LEC_PERMANENT_FLAG   0x0002
+
+#endif
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
new file mode 100644
index 00000000000..17a81ebe7e6
--- /dev/null
+++ b/net/atm/mpc.c
@@ -0,0 +1,1514 @@
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/timer.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+#include <linux/seq_file.h>
+
+/* We are an ethernet device */
+#include <linux/if_ether.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <net/sock.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <asm/byteorder.h>
+#include <asm/uaccess.h>
+#include <net/checksum.h>   /* for ip_fast_csum() */
+#include <net/arp.h>
+#include <net/dst.h>
+#include <linux/proc_fs.h>
+
+/* And atm device */
+#include <linux/atmdev.h>
+#include <linux/atmlec.h>
+#include <linux/atmmpc.h>
+/* Modular too */
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include "lec.h"
+#include "mpc.h"
+#include "resources.h"
+
+/*
+ * mpc.c: Implementation of MPOA client kernel part 
+ */
+
+#if 0
+#define dprintk printk   /* debug */
+#else
+#define dprintk(format,args...)
+#endif
+
+#if 0
+#define ddprintk printk  /* more debug */
+#else
+#define ddprintk(format,args...)
+#endif
+
+
+
+#define MPOA_TAG_LEN 4
+
+/* mpc_daemon -> kernel */
+static void MPOA_trigger_rcvd (struct k_message *msg, struct mpoa_client *mpc);
+static void MPOA_res_reply_rcvd(struct k_message *msg, struct mpoa_client *mpc);
+static void ingress_purge_rcvd(struct k_message *msg, struct mpoa_client *mpc);
+static void egress_purge_rcvd(struct k_message *msg, struct mpoa_client *mpc);
+static void mps_death(struct k_message *msg, struct mpoa_client *mpc);
+static void clean_up(struct k_message *msg, struct mpoa_client *mpc, int action);
+static void MPOA_cache_impos_rcvd(struct k_message *msg, struct mpoa_client *mpc);
+static void set_mpc_ctrl_addr_rcvd(struct k_message *mesg, struct mpoa_client *mpc);
+static void set_mps_mac_addr_rcvd(struct k_message *mesg, struct mpoa_client *mpc);
+
+static uint8_t *copy_macs(struct mpoa_client *mpc, uint8_t *router_mac,
+			  uint8_t *tlvs, uint8_t mps_macs, uint8_t device_type);
+static void purge_egress_shortcut(struct atm_vcc *vcc, eg_cache_entry *entry);
+
+static void send_set_mps_ctrl_addr(char *addr, struct mpoa_client *mpc);
+static void mpoad_close(struct atm_vcc *vcc);
+static int msg_from_mpoad(struct atm_vcc *vcc, struct sk_buff *skb);
+
+static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb);
+static int mpc_send_packet(struct sk_buff *skb, struct net_device *dev);
+static int mpoa_event_listener(struct notifier_block *mpoa_notifier, unsigned long event, void *dev);
+static void mpc_timer_refresh(void);
+static void mpc_cache_check( unsigned long checking_time  );
+
+static struct llc_snap_hdr llc_snap_mpoa_ctrl = {
+	0xaa, 0xaa, 0x03,
+	{0x00, 0x00, 0x5e},
+	{0x00, 0x03}         /* For MPOA control PDUs */
+};        
+static struct llc_snap_hdr llc_snap_mpoa_data = {
+	0xaa, 0xaa, 0x03,
+	{0x00, 0x00, 0x00},
+	{0x08, 0x00}         /* This is for IP PDUs only */
+};        
+static struct llc_snap_hdr llc_snap_mpoa_data_tagged = {
+	0xaa, 0xaa, 0x03,
+	{0x00, 0x00, 0x00},
+	{0x88, 0x4c}         /* This is for tagged data PDUs */
+};        
+
+static struct notifier_block mpoa_notifier = {
+	mpoa_event_listener,
+	NULL,
+	0
+};
+
+#ifdef CONFIG_PROC_FS
+extern int mpc_proc_init(void);
+extern void mpc_proc_clean(void);
+#endif
+
+struct mpoa_client *mpcs = NULL; /* FIXME */
+static struct atm_mpoa_qos *qos_head = NULL;
+static struct timer_list mpc_timer = TIMER_INITIALIZER(NULL, 0, 0);
+
+
+static struct mpoa_client *find_mpc_by_itfnum(int itf)
+{
+	struct mpoa_client *mpc;
+	
+	mpc = mpcs;  /* our global linked list */
+	while (mpc != NULL) {
+		if (mpc->dev_num == itf)
+			return mpc;
+		mpc = mpc->next;    
+	}
+
+	return NULL;   /* not found */
+}
+
+static struct mpoa_client *find_mpc_by_vcc(struct atm_vcc *vcc)
+{
+	struct mpoa_client *mpc;
+	
+	mpc = mpcs;  /* our global linked list */
+	while (mpc != NULL) {
+		if (mpc->mpoad_vcc == vcc)
+			return mpc;
+		mpc = mpc->next;
+	}
+
+	return NULL;   /* not found */
+}
+
+static struct mpoa_client *find_mpc_by_lec(struct net_device *dev)
+{
+	struct mpoa_client *mpc;
+	
+	mpc = mpcs;  /* our global linked list */
+	while (mpc != NULL) {
+		if (mpc->dev == dev)
+			return mpc;
+		mpc = mpc->next;
+	}
+
+	return NULL;   /* not found */
+}
+
+/*
+ * Functions for managing QoS list
+ */
+
+/*
+ * Overwrites the old entry or makes a new one.
+ */
+struct atm_mpoa_qos *atm_mpoa_add_qos(uint32_t dst_ip, struct atm_qos *qos)
+{
+	struct atm_mpoa_qos *entry;
+
+	entry = atm_mpoa_search_qos(dst_ip);
+	if (entry != NULL) {
+		entry->qos = *qos;
+		return entry;
+	}
+
+	entry = kmalloc(sizeof(struct atm_mpoa_qos), GFP_KERNEL);
+	if (entry == NULL) {
+		printk("mpoa: atm_mpoa_add_qos: out of memory\n");
+		return entry;
+	}
+
+	entry->ipaddr = dst_ip;
+	entry->qos = *qos;
+
+	entry->next = qos_head;
+	qos_head = entry;
+
+	return entry;
+}
+
+struct atm_mpoa_qos *atm_mpoa_search_qos(uint32_t dst_ip)
+{
+	struct atm_mpoa_qos *qos;
+
+	qos = qos_head;
+	while( qos != NULL ){
+		if(qos->ipaddr == dst_ip) {
+			break;
+		}
+		qos = qos->next;
+	}
+
+	return qos;
+}        
+
+/*
+ * Returns 0 for failure
+ */
+int atm_mpoa_delete_qos(struct atm_mpoa_qos *entry)
+{
+
+	struct atm_mpoa_qos *curr;
+
+	if (entry == NULL) return 0;
+	if (entry == qos_head) {
+		qos_head = qos_head->next;
+		kfree(entry);
+		return 1;
+	}
+
+	curr = qos_head;
+	while (curr != NULL) {
+		if (curr->next == entry) {
+			curr->next = entry->next;
+			kfree(entry);
+			return 1;
+		}
+		curr = curr->next;
+	}
+
+	return 0;
+}
+
+/* this is buggered - we need locking for qos_head */
+void atm_mpoa_disp_qos(struct seq_file *m)
+{
+	unsigned char *ip;
+	char ipaddr[16];
+	struct atm_mpoa_qos *qos;
+
+	qos = qos_head;
+	seq_printf(m, "QoS entries for shortcuts:\n");
+	seq_printf(m, "IP address\n  TX:max_pcr pcr     min_pcr max_cdv max_sdu\n  RX:max_pcr pcr     min_pcr max_cdv max_sdu\n");
+
+	ipaddr[sizeof(ipaddr)-1] = '\0';
+	while (qos != NULL) {
+		ip = (unsigned char *)&qos->ipaddr;
+		sprintf(ipaddr, "%u.%u.%u.%u", NIPQUAD(ip));
+		seq_printf(m, "%u.%u.%u.%u\n     %-7d %-7d %-7d %-7d %-7d\n     %-7d %-7d %-7d %-7d %-7d\n",
+				NIPQUAD(ipaddr),
+				qos->qos.txtp.max_pcr, qos->qos.txtp.pcr, qos->qos.txtp.min_pcr, qos->qos.txtp.max_cdv, qos->qos.txtp.max_sdu,
+				qos->qos.rxtp.max_pcr, qos->qos.rxtp.pcr, qos->qos.rxtp.min_pcr, qos->qos.rxtp.max_cdv, qos->qos.rxtp.max_sdu);
+		qos = qos->next;
+	}
+}
+
+static struct net_device *find_lec_by_itfnum(int itf)
+{
+	struct net_device *dev;
+	char name[IFNAMSIZ];
+
+	sprintf(name, "lec%d", itf);
+	dev = dev_get_by_name(name);
+	
+	return dev;
+}
+
+static struct mpoa_client *alloc_mpc(void)
+{
+	struct mpoa_client *mpc;
+
+	mpc = kmalloc(sizeof (struct mpoa_client), GFP_KERNEL);
+	if (mpc == NULL)
+		return NULL;
+	memset(mpc, 0, sizeof(struct mpoa_client));
+	rwlock_init(&mpc->ingress_lock);
+	rwlock_init(&mpc->egress_lock);
+	mpc->next = mpcs;
+	atm_mpoa_init_cache(mpc);
+
+	mpc->parameters.mpc_p1 = MPC_P1;
+	mpc->parameters.mpc_p2 = MPC_P2;
+	memset(mpc->parameters.mpc_p3,0,sizeof(mpc->parameters.mpc_p3));
+	mpc->parameters.mpc_p4 = MPC_P4;
+	mpc->parameters.mpc_p5 = MPC_P5; 
+	mpc->parameters.mpc_p6 = MPC_P6;
+	
+	mpcs = mpc;
+	
+	return mpc;
+}
+
+/*
+ *
+ * start_mpc() puts the MPC on line. All the packets destined
+ * to the lec underneath us are now being monitored and 
+ * shortcuts will be established.
+ *
+ */
+static void start_mpc(struct mpoa_client *mpc, struct net_device *dev)
+{
+	
+	dprintk("mpoa: (%s) start_mpc:\n", mpc->dev->name); 
+	if (dev->hard_start_xmit == NULL) {
+		printk("mpoa: (%s) start_mpc: dev->hard_start_xmit == NULL, not starting\n",
+		       dev->name);
+		return;
+	}
+	mpc->old_hard_start_xmit = dev->hard_start_xmit;
+	dev->hard_start_xmit = mpc_send_packet;
+
+	return;
+}
+
+static void stop_mpc(struct mpoa_client *mpc)
+{
+	
+	dprintk("mpoa: (%s) stop_mpc:", mpc->dev->name); 
+
+	/* Lets not nullify lec device's dev->hard_start_xmit */
+	if (mpc->dev->hard_start_xmit != mpc_send_packet) {
+		dprintk(" mpc already stopped, not fatal\n");
+		return;
+	}
+	dprintk("\n");
+	mpc->dev->hard_start_xmit = mpc->old_hard_start_xmit;
+	mpc->old_hard_start_xmit = NULL;
+	/* close_shortcuts(mpc);    ??? FIXME */
+	
+	return;
+}
+
+static const char *mpoa_device_type_string(char type) __attribute__ ((unused));
+
+static const char *mpoa_device_type_string(char type)
+{
+	switch(type) {
+	case NON_MPOA:
+		return "non-MPOA device";
+		break;
+	case MPS:
+		return "MPS";
+		break;
+	case MPC:
+		return "MPC";
+		break;
+	case MPS_AND_MPC:
+		return "both MPS and MPC";
+		break;
+	default:
+		return "unspecified (non-MPOA) device";
+		break;
+	}
+
+	return ""; /* not reached */
+}
+
+/*
+ * lec device calls this via its dev->priv->lane2_ops->associate_indicator()
+ * when it sees a TLV in LE_ARP packet.
+ * We fill in the pointer above when we see a LANE2 lec initializing
+ * See LANE2 spec 3.1.5
+ *
+ * Quite a big and ugly function but when you look at it
+ * all it does is to try to locate and parse MPOA Device
+ * Type TLV.
+ * We give our lec a pointer to this function and when the
+ * lec sees a TLV it uses the pointer to call this function.
+ *
+ */
+static void lane2_assoc_ind(struct net_device *dev, uint8_t *mac_addr,
+			    uint8_t *tlvs, uint32_t sizeoftlvs)
+{
+	uint32_t type;
+	uint8_t length, mpoa_device_type, number_of_mps_macs;
+	uint8_t *end_of_tlvs;
+	struct mpoa_client *mpc;
+	
+	mpoa_device_type = number_of_mps_macs = 0; /* silence gcc */
+	dprintk("mpoa: (%s) lane2_assoc_ind: received TLV(s), ", dev->name);
+	dprintk("total length of all TLVs %d\n", sizeoftlvs);
+	mpc = find_mpc_by_lec(dev); /* Sampo-Fix: moved here from below */
+	if (mpc == NULL) {
+		printk("mpoa: (%s) lane2_assoc_ind: no mpc\n", dev->name);
+		return;
+	}
+	end_of_tlvs = tlvs + sizeoftlvs;
+	while (end_of_tlvs - tlvs >= 5) {
+		type = (tlvs[0] << 24) | (tlvs[1] << 16) | (tlvs[2] << 8) | tlvs[3];
+		length = tlvs[4];
+		tlvs += 5;
+		dprintk("    type 0x%x length %02x\n", type, length);
+		if (tlvs + length > end_of_tlvs) {
+			printk("TLV value extends past its buffer, aborting parse\n");
+			return;
+		}
+		
+		if (type == 0) {
+			printk("mpoa: (%s) lane2_assoc_ind: TLV type was 0, returning\n", dev->name);
+			return;
+		}
+
+		if (type != TLV_MPOA_DEVICE_TYPE) {
+			tlvs += length;
+			continue;  /* skip other TLVs */
+		}
+		mpoa_device_type = *tlvs++;
+		number_of_mps_macs = *tlvs++;
+		dprintk("mpoa: (%s) MPOA device type '%s', ", dev->name, mpoa_device_type_string(mpoa_device_type));
+		if (mpoa_device_type == MPS_AND_MPC &&
+		    length < (42 + number_of_mps_macs*ETH_ALEN)) { /* :) */
+			printk("\nmpoa: (%s) lane2_assoc_ind: short MPOA Device Type TLV\n",
+			       dev->name);
+			continue;
+		}
+		if ((mpoa_device_type == MPS || mpoa_device_type == MPC)
+		    && length < 22 + number_of_mps_macs*ETH_ALEN) {
+			printk("\nmpoa: (%s) lane2_assoc_ind: short MPOA Device Type TLV\n",
+				dev->name);
+			continue;
+		}
+		if (mpoa_device_type != MPS && mpoa_device_type != MPS_AND_MPC) {
+			dprintk("ignoring non-MPS device\n");
+			if (mpoa_device_type == MPC) tlvs += 20;
+			continue;  /* we are only interested in MPSs */
+		}
+		if (number_of_mps_macs == 0 && mpoa_device_type == MPS_AND_MPC) {
+			printk("\nmpoa: (%s) lane2_assoc_ind: MPS_AND_MPC has zero MACs\n", dev->name);
+			continue;  /* someone should read the spec */
+		}
+		dprintk("this MPS has %d MAC addresses\n", number_of_mps_macs);
+		
+		/* ok, now we can go and tell our daemon the control address of MPS */
+		send_set_mps_ctrl_addr(tlvs, mpc);
+		
+		tlvs = copy_macs(mpc, mac_addr, tlvs, number_of_mps_macs, mpoa_device_type);
+		if (tlvs == NULL) return;
+	}
+	if (end_of_tlvs - tlvs != 0)
+		printk("mpoa: (%s) lane2_assoc_ind: ignoring %Zd bytes of trailing TLV carbage\n",
+		       dev->name, end_of_tlvs - tlvs);
+	return;
+}
+
+/*
+ * Store at least advertizing router's MAC address
+ * plus the possible MAC address(es) to mpc->mps_macs.
+ * For a freshly allocated MPOA client mpc->mps_macs == 0.
+ */
+static uint8_t *copy_macs(struct mpoa_client *mpc, uint8_t *router_mac,
+			  uint8_t *tlvs, uint8_t mps_macs, uint8_t device_type)
+{
+	int num_macs;
+	num_macs = (mps_macs > 1) ? mps_macs : 1;
+
+	if (mpc->number_of_mps_macs != num_macs) { /* need to reallocate? */
+		if (mpc->number_of_mps_macs != 0) kfree(mpc->mps_macs);
+		mpc->number_of_mps_macs = 0;
+		mpc->mps_macs = kmalloc(num_macs*ETH_ALEN, GFP_KERNEL);
+		if (mpc->mps_macs == NULL) {
+			printk("mpoa: (%s) copy_macs: out of mem\n", mpc->dev->name);
+			return NULL;
+		}
+	}
+	memcpy(mpc->mps_macs, router_mac, ETH_ALEN);
+	tlvs += 20; if (device_type == MPS_AND_MPC) tlvs += 20;
+	if (mps_macs > 0)
+		memcpy(mpc->mps_macs, tlvs, mps_macs*ETH_ALEN);
+	tlvs += mps_macs*ETH_ALEN;
+	mpc->number_of_mps_macs = num_macs;
+
+	return tlvs;
+}
+
+static int send_via_shortcut(struct sk_buff *skb, struct mpoa_client *mpc)
+{
+	in_cache_entry *entry;
+	struct iphdr *iph;
+	char *buff;
+	uint32_t ipaddr = 0;
+
+	static struct {
+		struct llc_snap_hdr hdr;
+		uint32_t tag;
+	} tagged_llc_snap_hdr = {
+		{0xaa, 0xaa, 0x03, {0x00, 0x00, 0x00}, {0x88, 0x4c}},
+		0
+	};
+
+	buff = skb->data + mpc->dev->hard_header_len;
+	iph = (struct iphdr *)buff;
+	ipaddr = iph->daddr;
+
+	ddprintk("mpoa: (%s) send_via_shortcut: ipaddr 0x%x\n", mpc->dev->name, ipaddr);        
+
+	entry = mpc->in_ops->get(ipaddr, mpc);
+	if (entry == NULL) {
+		entry = mpc->in_ops->add_entry(ipaddr, mpc);
+		if (entry != NULL) mpc->in_ops->put(entry);
+		return 1;
+	}
+	if (mpc->in_ops->cache_hit(entry, mpc) != OPEN){   /* threshold not exceeded or VCC not ready */
+		ddprintk("mpoa: (%s) send_via_shortcut: cache_hit: returns != OPEN\n", mpc->dev->name);        
+		mpc->in_ops->put(entry);
+		return 1;
+	}
+
+	ddprintk("mpoa: (%s) send_via_shortcut: using shortcut\n", mpc->dev->name);        
+	/* MPOA spec A.1.4, MPOA client must decrement IP ttl at least by one */
+	if (iph->ttl <= 1) {
+		ddprintk("mpoa: (%s) send_via_shortcut: IP ttl = %u, using LANE\n", mpc->dev->name, iph->ttl);        
+		mpc->in_ops->put(entry);
+		return 1;
+	}
+	iph->ttl--;
+	iph->check = 0;
+	iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
+
+	if (entry->ctrl_info.tag != 0) {
+		ddprintk("mpoa: (%s) send_via_shortcut: adding tag 0x%x\n", mpc->dev->name, entry->ctrl_info.tag);
+		tagged_llc_snap_hdr.tag = entry->ctrl_info.tag;
+		skb_pull(skb, ETH_HLEN);                       /* get rid of Eth header */
+		skb_push(skb, sizeof(tagged_llc_snap_hdr));    /* add LLC/SNAP header   */
+		memcpy(skb->data, &tagged_llc_snap_hdr, sizeof(tagged_llc_snap_hdr));
+	} else {
+		skb_pull(skb, ETH_HLEN);                        /* get rid of Eth header */
+		skb_push(skb, sizeof(struct llc_snap_hdr));     /* add LLC/SNAP header + tag  */
+		memcpy(skb->data, &llc_snap_mpoa_data, sizeof(struct llc_snap_hdr));
+	}
+
+	atomic_add(skb->truesize, &sk_atm(entry->shortcut)->sk_wmem_alloc);
+	ATM_SKB(skb)->atm_options = entry->shortcut->atm_options;
+	entry->shortcut->send(entry->shortcut, skb);
+	entry->packets_fwded++;
+	mpc->in_ops->put(entry);
+
+	return 0;
+}
+
+/*
+ * Probably needs some error checks and locking, not sure...
+ */
+static int mpc_send_packet(struct sk_buff *skb, struct net_device *dev)
+{
+	int retval;
+	struct mpoa_client *mpc;
+	struct ethhdr *eth;
+	int i = 0;
+	
+	mpc = find_mpc_by_lec(dev); /* this should NEVER fail */
+	if(mpc == NULL) {
+		printk("mpoa: (%s) mpc_send_packet: no MPC found\n", dev->name);
+		goto non_ip;
+	}
+
+	eth = (struct ethhdr *)skb->data;
+	if (eth->h_proto != htons(ETH_P_IP))
+		goto non_ip; /* Multi-Protocol Over ATM :-) */
+
+	while (i < mpc->number_of_mps_macs) {
+		if (memcmp(eth->h_dest, (mpc->mps_macs + i*ETH_ALEN), ETH_ALEN) == 0)
+			if ( send_via_shortcut(skb, mpc) == 0 )           /* try shortcut */
+				return 0;                                 /* success!     */
+		i++;
+	}
+
+ non_ip:
+	retval = mpc->old_hard_start_xmit(skb,dev);
+	
+	return retval;
+}
+
+static int atm_mpoa_vcc_attach(struct atm_vcc *vcc, void __user *arg)
+{
+	int bytes_left;
+	struct mpoa_client *mpc;
+	struct atmmpc_ioc ioc_data;
+	in_cache_entry *in_entry;
+	uint32_t  ipaddr;
+	unsigned char *ip;
+
+	bytes_left = copy_from_user(&ioc_data, arg, sizeof(struct atmmpc_ioc));
+	if (bytes_left != 0) {
+		printk("mpoa: mpc_vcc_attach: Short read (missed %d bytes) from userland\n", bytes_left);
+		return -EFAULT;
+	}
+	ipaddr = ioc_data.ipaddr;
+	if (ioc_data.dev_num < 0 || ioc_data.dev_num >= MAX_LEC_ITF)
+		return -EINVAL;
+	
+	mpc = find_mpc_by_itfnum(ioc_data.dev_num);
+	if (mpc == NULL)
+		return -EINVAL;
+	
+	if (ioc_data.type == MPC_SOCKET_INGRESS) {
+		in_entry = mpc->in_ops->get(ipaddr, mpc);
+		if (in_entry == NULL || in_entry->entry_state < INGRESS_RESOLVED) {
+			printk("mpoa: (%s) mpc_vcc_attach: did not find RESOLVED entry from ingress cache\n",
+				mpc->dev->name);
+			if (in_entry != NULL) mpc->in_ops->put(in_entry);
+			return -EINVAL;
+		}
+		ip = (unsigned char*)&in_entry->ctrl_info.in_dst_ip;
+		printk("mpoa: (%s) mpc_vcc_attach: attaching ingress SVC, entry = %u.%u.%u.%u\n",
+		       mpc->dev->name, ip[0], ip[1], ip[2], ip[3]);
+		in_entry->shortcut = vcc;
+		mpc->in_ops->put(in_entry);
+	} else {
+		printk("mpoa: (%s) mpc_vcc_attach: attaching egress SVC\n", mpc->dev->name);
+	}
+
+	vcc->proto_data = mpc->dev;
+	vcc->push = mpc_push;
+
+	return 0;
+}
+
+/*
+ *
+ */
+static void mpc_vcc_close(struct atm_vcc *vcc, struct net_device *dev)
+{
+	struct mpoa_client *mpc;
+	in_cache_entry *in_entry;
+	eg_cache_entry *eg_entry;
+	
+	mpc = find_mpc_by_lec(dev);
+	if (mpc == NULL) {
+		printk("mpoa: (%s) mpc_vcc_close: close for unknown MPC\n", dev->name);
+		return;
+	}
+
+	dprintk("mpoa: (%s) mpc_vcc_close:\n", dev->name);
+	in_entry = mpc->in_ops->get_by_vcc(vcc, mpc);
+	if (in_entry) {
+		unsigned char *ip __attribute__ ((unused)) =
+		    (unsigned char *)&in_entry->ctrl_info.in_dst_ip;
+		dprintk("mpoa: (%s) mpc_vcc_close: ingress SVC closed ip = %u.%u.%u.%u\n",
+		       mpc->dev->name, ip[0], ip[1], ip[2], ip[3]);
+		in_entry->shortcut = NULL;
+		mpc->in_ops->put(in_entry);
+	}
+	eg_entry = mpc->eg_ops->get_by_vcc(vcc, mpc);
+	if (eg_entry) {
+		dprintk("mpoa: (%s) mpc_vcc_close: egress SVC closed\n", mpc->dev->name);
+		eg_entry->shortcut = NULL;
+		mpc->eg_ops->put(eg_entry);
+	}
+
+	if (in_entry == NULL && eg_entry == NULL)
+		dprintk("mpoa: (%s) mpc_vcc_close:  unused vcc closed\n", dev->name);
+
+	return;
+}
+
+static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb)
+{
+	struct net_device *dev = (struct net_device *)vcc->proto_data;
+	struct sk_buff *new_skb;
+	eg_cache_entry *eg;
+	struct mpoa_client *mpc;
+	uint32_t tag;
+	char *tmp;
+	
+	ddprintk("mpoa: (%s) mpc_push:\n", dev->name);
+	if (skb == NULL) {
+		dprintk("mpoa: (%s) mpc_push: null skb, closing VCC\n", dev->name);
+		mpc_vcc_close(vcc, dev);
+		return;
+	}
+	
+	skb->dev = dev;
+	if (memcmp(skb->data, &llc_snap_mpoa_ctrl, sizeof(struct llc_snap_hdr)) == 0) {
+		struct sock *sk = sk_atm(vcc);
+
+		dprintk("mpoa: (%s) mpc_push: control packet arrived\n", dev->name);
+		/* Pass control packets to daemon */
+		skb_queue_tail(&sk->sk_receive_queue, skb);
+		sk->sk_data_ready(sk, skb->len);
+		return;
+	}
+
+	/* data coming over the shortcut */
+	atm_return(vcc, skb->truesize);
+
+	mpc = find_mpc_by_lec(dev);
+	if (mpc == NULL) {
+		printk("mpoa: (%s) mpc_push: unknown MPC\n", dev->name);
+		return;
+	}
+
+	if (memcmp(skb->data, &llc_snap_mpoa_data_tagged, sizeof(struct llc_snap_hdr)) == 0) { /* MPOA tagged data */
+		ddprintk("mpoa: (%s) mpc_push: tagged data packet arrived\n", dev->name);
+
+	} else if (memcmp(skb->data, &llc_snap_mpoa_data, sizeof(struct llc_snap_hdr)) == 0) { /* MPOA data */
+		printk("mpoa: (%s) mpc_push: non-tagged data packet arrived\n", dev->name);
+		printk("           mpc_push: non-tagged data unsupported, purging\n");
+		dev_kfree_skb_any(skb);
+		return;
+	} else {
+		printk("mpoa: (%s) mpc_push: garbage arrived, purging\n", dev->name);
+		dev_kfree_skb_any(skb);
+		return;
+	}
+
+	tmp = skb->data + sizeof(struct llc_snap_hdr);
+	tag = *(uint32_t *)tmp;
+
+	eg = mpc->eg_ops->get_by_tag(tag, mpc);
+	if (eg == NULL) {
+		printk("mpoa: (%s) mpc_push: Didn't find egress cache entry, tag = %u\n",
+		       dev->name,tag);
+		purge_egress_shortcut(vcc, NULL);
+		dev_kfree_skb_any(skb);
+		return;
+	}
+	
+	/*
+	 * See if ingress MPC is using shortcut we opened as a return channel.
+	 * This means we have a bi-directional vcc opened by us.
+	 */ 
+	if (eg->shortcut == NULL) {
+		eg->shortcut = vcc;
+		printk("mpoa: (%s) mpc_push: egress SVC in use\n", dev->name);
+	}
+
+	skb_pull(skb, sizeof(struct llc_snap_hdr) + sizeof(tag)); /* get rid of LLC/SNAP header */
+	new_skb = skb_realloc_headroom(skb, eg->ctrl_info.DH_length); /* LLC/SNAP is shorter than MAC header :( */
+	dev_kfree_skb_any(skb);
+	if (new_skb == NULL){
+		mpc->eg_ops->put(eg);
+		return;
+	}
+	skb_push(new_skb, eg->ctrl_info.DH_length);     /* add MAC header */
+	memcpy(new_skb->data, eg->ctrl_info.DLL_header, eg->ctrl_info.DH_length);
+	new_skb->protocol = eth_type_trans(new_skb, dev);
+	new_skb->nh.raw = new_skb->data;
+
+	eg->latest_ip_addr = new_skb->nh.iph->saddr;
+	eg->packets_rcvd++;
+	mpc->eg_ops->put(eg);
+
+	memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data));
+	netif_rx(new_skb);
+
+	return;
+}
+
+static struct atmdev_ops mpc_ops = { /* only send is required */
+	.close	= mpoad_close,
+	.send	= msg_from_mpoad
+};
+
+static struct atm_dev mpc_dev = {
+	.ops	= &mpc_ops,
+	.type	= "mpc",
+	.number	= 42,
+	.lock	= SPIN_LOCK_UNLOCKED
+	/* members not explicitly initialised will be 0 */
+};
+
+static int atm_mpoa_mpoad_attach (struct atm_vcc *vcc, int arg)
+{
+	struct mpoa_client *mpc;
+	struct lec_priv *priv;
+	int err;
+	
+	if (mpcs == NULL) {
+		init_timer(&mpc_timer);
+		mpc_timer_refresh();
+
+		/* This lets us now how our LECs are doing */
+		err = register_netdevice_notifier(&mpoa_notifier);
+		if (err < 0) {
+			del_timer(&mpc_timer);
+			return err;
+		}
+	}
+	
+	mpc = find_mpc_by_itfnum(arg);
+	if (mpc == NULL) {
+		dprintk("mpoa: mpoad_attach: allocating new mpc for itf %d\n", arg);
+		mpc = alloc_mpc();
+		if (mpc == NULL)
+			return -ENOMEM;
+		mpc->dev_num = arg;
+		mpc->dev = find_lec_by_itfnum(arg); /* NULL if there was no lec */
+	}
+	if (mpc->mpoad_vcc) {
+		printk("mpoa: mpoad_attach: mpoad is already present for itf %d\n", arg);
+		return -EADDRINUSE;
+	}
+
+	if (mpc->dev) { /* check if the lec is LANE2 capable */
+		priv = (struct lec_priv *)mpc->dev->priv;
+		if (priv->lane_version < 2) {
+			dev_put(mpc->dev);
+			mpc->dev = NULL;
+		} else
+			priv->lane2_ops->associate_indicator = lane2_assoc_ind;  
+	}
+
+	mpc->mpoad_vcc = vcc;
+	vcc->dev = &mpc_dev;
+	vcc_insert_socket(sk_atm(vcc));
+	set_bit(ATM_VF_META,&vcc->flags);
+	set_bit(ATM_VF_READY,&vcc->flags);
+
+	if (mpc->dev) {
+		char empty[ATM_ESA_LEN];
+		memset(empty, 0, ATM_ESA_LEN);
+		
+		start_mpc(mpc, mpc->dev);
+		/* set address if mpcd e.g. gets killed and restarted.
+		 * If we do not do it now we have to wait for the next LE_ARP
+		 */
+		if ( memcmp(mpc->mps_ctrl_addr, empty, ATM_ESA_LEN) != 0 )
+			send_set_mps_ctrl_addr(mpc->mps_ctrl_addr, mpc);
+	}
+
+	__module_get(THIS_MODULE);
+	return arg;
+}
+
+static void send_set_mps_ctrl_addr(char *addr, struct mpoa_client *mpc)
+{
+	struct k_message mesg;
+
+	memcpy (mpc->mps_ctrl_addr, addr, ATM_ESA_LEN);
+	
+	mesg.type = SET_MPS_CTRL_ADDR;
+	memcpy(mesg.MPS_ctrl, addr, ATM_ESA_LEN);
+	msg_to_mpoad(&mesg, mpc);
+
+	return;
+}
+
+static void mpoad_close(struct atm_vcc *vcc)
+{
+	struct mpoa_client *mpc;
+	struct sk_buff *skb;
+
+	mpc = find_mpc_by_vcc(vcc);
+	if (mpc == NULL) {
+		printk("mpoa: mpoad_close: did not find MPC\n");
+		return;
+	}
+	if (!mpc->mpoad_vcc) {
+		printk("mpoa: mpoad_close: close for non-present mpoad\n");
+		return;
+	}
+	
+	mpc->mpoad_vcc = NULL;
+	if (mpc->dev) {
+		struct lec_priv *priv = (struct lec_priv *)mpc->dev->priv;
+		priv->lane2_ops->associate_indicator = NULL;
+		stop_mpc(mpc);
+		dev_put(mpc->dev);
+	}
+
+	mpc->in_ops->destroy_cache(mpc);
+	mpc->eg_ops->destroy_cache(mpc);
+
+	while ((skb = skb_dequeue(&sk_atm(vcc)->sk_receive_queue))) {
+		atm_return(vcc, skb->truesize);
+		kfree_skb(skb);
+	}
+	
+	printk("mpoa: (%s) going down\n",
+		(mpc->dev) ? mpc->dev->name : "<unknown>");
+	module_put(THIS_MODULE);
+
+	return;
+}
+
+/*
+ *
+ */
+static int msg_from_mpoad(struct atm_vcc *vcc, struct sk_buff *skb)
+{
+	
+	struct mpoa_client *mpc = find_mpc_by_vcc(vcc);
+	struct k_message *mesg = (struct k_message*)skb->data;
+	atomic_sub(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
+	
+	if (mpc == NULL) {
+		printk("mpoa: msg_from_mpoad: no mpc found\n");
+		return 0;
+	}
+	dprintk("mpoa: (%s) msg_from_mpoad:", (mpc->dev) ? mpc->dev->name : "<unknown>");
+	switch(mesg->type) {
+	case MPOA_RES_REPLY_RCVD:
+		dprintk(" mpoa_res_reply_rcvd\n");
+		MPOA_res_reply_rcvd(mesg, mpc);
+		break;
+	case MPOA_TRIGGER_RCVD:
+		dprintk(" mpoa_trigger_rcvd\n");
+		MPOA_trigger_rcvd(mesg, mpc);
+		break;
+	case INGRESS_PURGE_RCVD:
+		dprintk(" nhrp_purge_rcvd\n");
+		ingress_purge_rcvd(mesg, mpc);
+		break;
+	case EGRESS_PURGE_RCVD:
+		dprintk(" egress_purge_reply_rcvd\n");
+		egress_purge_rcvd(mesg, mpc);
+		break;
+	case MPS_DEATH:
+		dprintk(" mps_death\n");
+		mps_death(mesg, mpc);
+		break;
+	case CACHE_IMPOS_RCVD:
+		dprintk(" cache_impos_rcvd\n");
+		MPOA_cache_impos_rcvd(mesg, mpc);
+		break;
+	case SET_MPC_CTRL_ADDR:
+		dprintk(" set_mpc_ctrl_addr\n");
+		set_mpc_ctrl_addr_rcvd(mesg, mpc);
+		break;
+	case SET_MPS_MAC_ADDR:
+		dprintk(" set_mps_mac_addr\n");
+		set_mps_mac_addr_rcvd(mesg, mpc);
+		break;
+	case CLEAN_UP_AND_EXIT:
+		dprintk(" clean_up_and_exit\n");
+		clean_up(mesg, mpc, DIE);
+		break;
+	case RELOAD:
+		dprintk(" reload\n");
+		clean_up(mesg, mpc, RELOAD);
+		break;
+	case SET_MPC_PARAMS:
+		dprintk(" set_mpc_params\n");
+		mpc->parameters = mesg->content.params;
+		break;
+	default:
+		dprintk(" unknown message %d\n", mesg->type);
+		break;
+	}
+	kfree_skb(skb);
+
+	return 0;
+}
+
+/* Remember that this function may not do things that sleep */
+int msg_to_mpoad(struct k_message *mesg, struct mpoa_client *mpc)
+{
+	struct sk_buff *skb;
+	struct sock *sk;
+
+	if (mpc == NULL || !mpc->mpoad_vcc) {
+		printk("mpoa: msg_to_mpoad: mesg %d to a non-existent mpoad\n", mesg->type);
+		return -ENXIO;
+	}
+
+	skb = alloc_skb(sizeof(struct k_message), GFP_ATOMIC);
+	if (skb == NULL)
+		return -ENOMEM;
+	skb_put(skb, sizeof(struct k_message));
+	memcpy(skb->data, mesg, sizeof(struct k_message));
+	atm_force_charge(mpc->mpoad_vcc, skb->truesize);
+	
+	sk = sk_atm(mpc->mpoad_vcc);
+	skb_queue_tail(&sk->sk_receive_queue, skb);
+	sk->sk_data_ready(sk, skb->len);
+
+	return 0;
+}
+
+static int mpoa_event_listener(struct notifier_block *mpoa_notifier, unsigned long event, void *dev_ptr)
+{
+	struct net_device *dev;
+	struct mpoa_client *mpc;
+	struct lec_priv *priv;
+
+	dev = (struct net_device *)dev_ptr;
+	if (dev->name == NULL || strncmp(dev->name, "lec", 3))
+		return NOTIFY_DONE; /* we are only interested in lec:s */
+	
+	switch (event) {
+	case NETDEV_REGISTER:       /* a new lec device was allocated */
+		priv = (struct lec_priv *)dev->priv;
+		if (priv->lane_version < 2)
+			break;
+		priv->lane2_ops->associate_indicator = lane2_assoc_ind;
+		mpc = find_mpc_by_itfnum(priv->itfnum);
+		if (mpc == NULL) {
+			dprintk("mpoa: mpoa_event_listener: allocating new mpc for %s\n",
+			       dev->name);
+			mpc = alloc_mpc();
+			if (mpc == NULL) {
+				printk("mpoa: mpoa_event_listener: no new mpc");
+				break;
+			}
+		}
+		mpc->dev_num = priv->itfnum;
+		mpc->dev = dev;
+		dev_hold(dev);
+		dprintk("mpoa: (%s) was initialized\n", dev->name);
+		break;
+	case NETDEV_UNREGISTER:
+		/* the lec device was deallocated */
+		mpc = find_mpc_by_lec(dev);
+		if (mpc == NULL)
+			break;
+		dprintk("mpoa: device (%s) was deallocated\n", dev->name);
+		stop_mpc(mpc);
+		dev_put(mpc->dev);
+		mpc->dev = NULL;
+		break;
+	case NETDEV_UP:
+		/* the dev was ifconfig'ed up */
+		mpc = find_mpc_by_lec(dev);
+		if (mpc == NULL)
+			break;
+		if (mpc->mpoad_vcc != NULL) {
+			start_mpc(mpc, dev);
+		}
+		break;
+	case NETDEV_DOWN:
+		/* the dev was ifconfig'ed down */
+		/* this means that the flow of packets from the
+		 * upper layer stops
+		 */
+		mpc = find_mpc_by_lec(dev);
+		if (mpc == NULL)
+			break;
+		if (mpc->mpoad_vcc != NULL) {
+			stop_mpc(mpc);
+		}
+		break;
+	case NETDEV_REBOOT:
+	case NETDEV_CHANGE:
+	case NETDEV_CHANGEMTU:
+	case NETDEV_CHANGEADDR:
+	case NETDEV_GOING_DOWN:
+		break;
+	default:
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+/*
+ * Functions which are called after a message is received from mpcd.
+ * Msg is reused on purpose.
+ */
+
+
+static void MPOA_trigger_rcvd(struct k_message *msg, struct mpoa_client *mpc)
+{
+	uint32_t dst_ip = msg->content.in_info.in_dst_ip;
+	in_cache_entry *entry;
+
+	entry = mpc->in_ops->get(dst_ip, mpc);
+	if(entry == NULL){
+		entry = mpc->in_ops->add_entry(dst_ip, mpc);
+		entry->entry_state = INGRESS_RESOLVING;
+		msg->type = SND_MPOA_RES_RQST;
+		msg->content.in_info = entry->ctrl_info;
+		msg_to_mpoad(msg, mpc);
+		do_gettimeofday(&(entry->reply_wait));
+		mpc->in_ops->put(entry);
+		return;
+	}
+	
+	if(entry->entry_state == INGRESS_INVALID){
+		entry->entry_state = INGRESS_RESOLVING;
+		msg->type = SND_MPOA_RES_RQST;
+		msg->content.in_info = entry->ctrl_info;
+		msg_to_mpoad(msg, mpc);
+		do_gettimeofday(&(entry->reply_wait));
+		mpc->in_ops->put(entry);
+		return;
+	}
+	
+	printk("mpoa: (%s) MPOA_trigger_rcvd: entry already in resolving state\n",
+		(mpc->dev) ? mpc->dev->name : "<unknown>");
+	mpc->in_ops->put(entry);
+	return;
+}
+
+/*
+ * Things get complicated because we have to check if there's an egress
+ * shortcut with suitable traffic parameters we could use. 
+ */
+static void check_qos_and_open_shortcut(struct k_message *msg, struct mpoa_client *client, in_cache_entry *entry)
+{
+	uint32_t dst_ip = msg->content.in_info.in_dst_ip;
+	unsigned char *ip __attribute__ ((unused)) = (unsigned char *)&dst_ip;
+	struct atm_mpoa_qos *qos = atm_mpoa_search_qos(dst_ip);
+	eg_cache_entry *eg_entry = client->eg_ops->get_by_src_ip(dst_ip, client);
+
+	if(eg_entry && eg_entry->shortcut){
+		if(eg_entry->shortcut->qos.txtp.traffic_class &
+		   msg->qos.txtp.traffic_class &
+		   (qos ? qos->qos.txtp.traffic_class : ATM_UBR | ATM_CBR)){
+			    if(eg_entry->shortcut->qos.txtp.traffic_class == ATM_UBR)
+				    entry->shortcut = eg_entry->shortcut;
+			    else if(eg_entry->shortcut->qos.txtp.max_pcr > 0)
+				    entry->shortcut = eg_entry->shortcut;
+		}
+	 	if(entry->shortcut){
+			dprintk("mpoa: (%s) using egress SVC to reach %u.%u.%u.%u\n",client->dev->name, NIPQUAD(ip));
+			client->eg_ops->put(eg_entry);
+			return;
+		}
+	}
+	if (eg_entry != NULL)
+		client->eg_ops->put(eg_entry);
+
+	/* No luck in the egress cache we must open an ingress SVC */
+	msg->type = OPEN_INGRESS_SVC;
+	if (qos && (qos->qos.txtp.traffic_class == msg->qos.txtp.traffic_class))
+	{
+		msg->qos = qos->qos;
+		printk("mpoa: (%s) trying to get a CBR shortcut\n",client->dev->name);
+    	}
+	else memset(&msg->qos,0,sizeof(struct atm_qos));
+	msg_to_mpoad(msg, client);
+	return;
+}
+
+static void MPOA_res_reply_rcvd(struct k_message *msg, struct mpoa_client *mpc)
+{
+	unsigned char *ip;
+
+	uint32_t dst_ip = msg->content.in_info.in_dst_ip;
+	in_cache_entry *entry = mpc->in_ops->get(dst_ip, mpc);
+	ip = (unsigned char *)&dst_ip;
+	dprintk("mpoa: (%s) MPOA_res_reply_rcvd: ip %u.%u.%u.%u\n", mpc->dev->name, NIPQUAD(ip));
+	ddprintk("mpoa: (%s) MPOA_res_reply_rcvd() entry = %p", mpc->dev->name, entry);
+	if(entry == NULL){
+		printk("\nmpoa: (%s) ARGH, received res. reply for an entry that doesn't exist.\n", mpc->dev->name);
+		return;
+	}
+	ddprintk(" entry_state = %d ", entry->entry_state);	
+
+	if (entry->entry_state == INGRESS_RESOLVED) {
+		printk("\nmpoa: (%s) MPOA_res_reply_rcvd for RESOLVED entry!\n", mpc->dev->name);
+		mpc->in_ops->put(entry);
+		return;
+	}
+
+	entry->ctrl_info = msg->content.in_info;
+	do_gettimeofday(&(entry->tv));
+	do_gettimeofday(&(entry->reply_wait)); /* Used in refreshing func from now on */
+	entry->refresh_time = 0;
+	ddprintk("entry->shortcut = %p\n", entry->shortcut);
+
+	if(entry->entry_state == INGRESS_RESOLVING && entry->shortcut != NULL){
+		entry->entry_state = INGRESS_RESOLVED; 
+		mpc->in_ops->put(entry);
+		return; /* Shortcut already open... */
+	}
+
+	if (entry->shortcut != NULL) {
+		printk("mpoa: (%s) MPOA_res_reply_rcvd: entry->shortcut != NULL, impossible!\n",
+		       mpc->dev->name);
+		mpc->in_ops->put(entry);
+		return;
+	}
+	
+	check_qos_and_open_shortcut(msg, mpc, entry);
+	entry->entry_state = INGRESS_RESOLVED;
+	mpc->in_ops->put(entry);
+
+	return;
+
+}
+
+static void ingress_purge_rcvd(struct k_message *msg, struct mpoa_client *mpc)
+{
+	uint32_t dst_ip = msg->content.in_info.in_dst_ip;
+	uint32_t mask = msg->ip_mask;
+	unsigned char *ip = (unsigned char *)&dst_ip;
+	in_cache_entry *entry = mpc->in_ops->get_with_mask(dst_ip, mpc, mask);
+
+	if(entry == NULL){
+		printk("mpoa: (%s) ingress_purge_rcvd: purge for a non-existing entry, ", mpc->dev->name);
+		printk("ip = %u.%u.%u.%u\n", ip[0], ip[1], ip[2], ip[3]);
+		return;
+	}
+
+	do {
+		dprintk("mpoa: (%s) ingress_purge_rcvd: removing an ingress entry, ip = %u.%u.%u.%u\n" ,
+			mpc->dev->name, ip[0], ip[1], ip[2], ip[3]);
+		write_lock_bh(&mpc->ingress_lock);
+		mpc->in_ops->remove_entry(entry, mpc);
+		write_unlock_bh(&mpc->ingress_lock);
+		mpc->in_ops->put(entry);
+		entry = mpc->in_ops->get_with_mask(dst_ip, mpc, mask);
+	} while (entry != NULL);
+
+	return;
+} 
+
+static void egress_purge_rcvd(struct k_message *msg, struct mpoa_client *mpc)
+{
+	uint32_t cache_id = msg->content.eg_info.cache_id;
+	eg_cache_entry *entry = mpc->eg_ops->get_by_cache_id(cache_id, mpc);
+	
+	if (entry == NULL) {
+		dprintk("mpoa: (%s) egress_purge_rcvd: purge for a non-existing entry\n", mpc->dev->name);
+		return;
+	}
+
+	write_lock_irq(&mpc->egress_lock);
+	mpc->eg_ops->remove_entry(entry, mpc);
+	write_unlock_irq(&mpc->egress_lock);
+
+	mpc->eg_ops->put(entry);
+
+	return;
+} 
+
+static void purge_egress_shortcut(struct atm_vcc *vcc, eg_cache_entry *entry)
+{
+	struct sock *sk;
+	struct k_message *purge_msg;
+	struct sk_buff *skb;
+
+	dprintk("mpoa: purge_egress_shortcut: entering\n");
+	if (vcc == NULL) {
+		printk("mpoa: purge_egress_shortcut: vcc == NULL\n");
+		return;
+	}
+
+	skb = alloc_skb(sizeof(struct k_message), GFP_ATOMIC);
+	if (skb == NULL) {
+		 printk("mpoa: purge_egress_shortcut: out of memory\n");
+		return;
+	}
+
+	skb_put(skb, sizeof(struct k_message));
+	memset(skb->data, 0, sizeof(struct k_message));
+	purge_msg = (struct k_message *)skb->data;
+	purge_msg->type = DATA_PLANE_PURGE;
+	if (entry != NULL)
+		purge_msg->content.eg_info = entry->ctrl_info;
+
+	atm_force_charge(vcc, skb->truesize);
+
+	sk = sk_atm(vcc);
+	skb_queue_tail(&sk->sk_receive_queue, skb);
+	sk->sk_data_ready(sk, skb->len);
+	dprintk("mpoa: purge_egress_shortcut: exiting:\n");
+
+	return;
+}
+
+/*
+ * Our MPS died. Tell our daemon to send NHRP data plane purge to each
+ * of the egress shortcuts we have.
+ */
+static void mps_death( struct k_message * msg, struct mpoa_client * mpc )
+{
+	eg_cache_entry *entry;
+
+	dprintk("mpoa: (%s) mps_death:\n", mpc->dev->name);
+
+	if(memcmp(msg->MPS_ctrl, mpc->mps_ctrl_addr, ATM_ESA_LEN)){
+		printk("mpoa: (%s) mps_death: wrong MPS\n", mpc->dev->name);
+		return;
+	}
+
+	/* FIXME: This knows too much of the cache structure */
+	read_lock_irq(&mpc->egress_lock);
+	entry = mpc->eg_cache;
+	while (entry != NULL) {
+		purge_egress_shortcut(entry->shortcut, entry);
+		entry = entry->next;
+	}
+	read_unlock_irq(&mpc->egress_lock);
+
+	mpc->in_ops->destroy_cache(mpc);
+	mpc->eg_ops->destroy_cache(mpc);
+
+	return;
+}
+
+static void MPOA_cache_impos_rcvd( struct k_message * msg, struct mpoa_client * mpc)
+{
+	uint16_t holding_time;
+	eg_cache_entry *entry = mpc->eg_ops->get_by_cache_id(msg->content.eg_info.cache_id, mpc);
+	
+	holding_time = msg->content.eg_info.holding_time;
+	dprintk("mpoa: (%s) MPOA_cache_impos_rcvd: entry = %p, holding_time = %u\n",
+	       mpc->dev->name, entry, holding_time);
+	if(entry == NULL && holding_time) {
+		entry = mpc->eg_ops->add_entry(msg, mpc);
+		mpc->eg_ops->put(entry);
+		return;
+	}
+	if(holding_time){
+		mpc->eg_ops->update(entry, holding_time);
+		return;
+	}
+	
+	write_lock_irq(&mpc->egress_lock);
+	mpc->eg_ops->remove_entry(entry, mpc);
+	write_unlock_irq(&mpc->egress_lock);
+
+	mpc->eg_ops->put(entry);
+	
+	return;
+}
+
+static void set_mpc_ctrl_addr_rcvd(struct k_message *mesg, struct mpoa_client *mpc)
+{
+	struct lec_priv *priv;
+	int i, retval ;
+
+	uint8_t tlv[4 + 1 + 1 + 1 + ATM_ESA_LEN];
+
+	tlv[0] = 00; tlv[1] = 0xa0; tlv[2] = 0x3e; tlv[3] = 0x2a; /* type  */
+	tlv[4] = 1 + 1 + ATM_ESA_LEN;  /* length                           */
+	tlv[5] = 0x02;                 /* MPOA client                      */
+	tlv[6] = 0x00;                 /* number of MPS MAC addresses      */
+
+	memcpy(&tlv[7], mesg->MPS_ctrl, ATM_ESA_LEN); /* MPC ctrl ATM addr */
+	memcpy(mpc->our_ctrl_addr, mesg->MPS_ctrl, ATM_ESA_LEN);
+
+	dprintk("mpoa: (%s) setting MPC ctrl ATM address to ",
+	       (mpc->dev) ? mpc->dev->name : "<unknown>");
+	for (i = 7; i < sizeof(tlv); i++)
+		dprintk("%02x ", tlv[i]);
+	dprintk("\n");
+
+	if (mpc->dev) {
+		priv = (struct lec_priv *)mpc->dev->priv;
+		retval = priv->lane2_ops->associate_req(mpc->dev, mpc->dev->dev_addr, tlv, sizeof(tlv));
+		if (retval == 0)
+			printk("mpoa: (%s) MPOA device type TLV association failed\n", mpc->dev->name);
+		retval = priv->lane2_ops->resolve(mpc->dev, NULL, 1, NULL, NULL);
+		if (retval < 0)
+			printk("mpoa: (%s) targetless LE_ARP request failed\n", mpc->dev->name);
+	}
+
+	return;
+}
+
+static void set_mps_mac_addr_rcvd(struct k_message *msg, struct mpoa_client *client)
+{
+
+	if(client->number_of_mps_macs)
+		kfree(client->mps_macs);
+	client->number_of_mps_macs = 0;
+	client->mps_macs = kmalloc(ETH_ALEN,GFP_KERNEL);
+	if (client->mps_macs == NULL) {
+		printk("mpoa: set_mps_mac_addr_rcvd: out of memory\n");
+		return;
+	}
+	client->number_of_mps_macs = 1;
+	memcpy(client->mps_macs, msg->MPS_ctrl, ETH_ALEN);
+	
+	return;
+}
+
+/*
+ * purge egress cache and tell daemon to 'action' (DIE, RELOAD)
+ */
+static void clean_up(struct k_message *msg, struct mpoa_client *mpc, int action)
+{
+
+	eg_cache_entry *entry;
+	msg->type = SND_EGRESS_PURGE;
+
+
+	/* FIXME: This knows too much of the cache structure */
+	read_lock_irq(&mpc->egress_lock);
+	entry = mpc->eg_cache;
+	while (entry != NULL){
+		    msg->content.eg_info = entry->ctrl_info;
+		    dprintk("mpoa: cache_id %u\n", entry->ctrl_info.cache_id);
+		    msg_to_mpoad(msg, mpc);
+		    entry = entry->next;
+	}
+	read_unlock_irq(&mpc->egress_lock);
+
+	msg->type = action;
+	msg_to_mpoad(msg, mpc);
+	return;
+}
+
+static void mpc_timer_refresh(void)
+{
+	mpc_timer.expires = jiffies + (MPC_P2 * HZ);
+	mpc_timer.data = mpc_timer.expires;
+	mpc_timer.function = mpc_cache_check;
+	add_timer(&mpc_timer);
+	
+	return;
+}
+
+static void mpc_cache_check( unsigned long checking_time  )
+{
+	struct mpoa_client *mpc = mpcs;
+	static unsigned long previous_resolving_check_time;
+	static unsigned long previous_refresh_time;
+	
+	while( mpc != NULL ){
+		mpc->in_ops->clear_count(mpc);
+		mpc->eg_ops->clear_expired(mpc);
+		if(checking_time - previous_resolving_check_time > mpc->parameters.mpc_p4 * HZ ){
+			mpc->in_ops->check_resolving(mpc);
+			previous_resolving_check_time = checking_time;
+		}
+		if(checking_time - previous_refresh_time > mpc->parameters.mpc_p5 * HZ ){
+			mpc->in_ops->refresh(mpc);
+			previous_refresh_time = checking_time;
+		}
+		mpc = mpc->next;
+	}
+	mpc_timer_refresh();
+	
+	return;
+}
+
+static int atm_mpoa_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	int err = 0;
+	struct atm_vcc *vcc = ATM_SD(sock);
+
+	if (cmd != ATMMPC_CTRL && cmd != ATMMPC_DATA)
+		return -ENOIOCTLCMD;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	switch (cmd) {
+		case ATMMPC_CTRL:
+			err = atm_mpoa_mpoad_attach(vcc, (int)arg);
+			if (err >= 0)
+				sock->state = SS_CONNECTED;
+			break;
+		case ATMMPC_DATA:
+			err = atm_mpoa_vcc_attach(vcc, (void __user *)arg);
+			break;
+		default:
+			break;
+	}
+	return err;
+}
+
+
+static struct atm_ioctl atm_ioctl_ops = {
+	.owner	= THIS_MODULE,
+	.ioctl	= atm_mpoa_ioctl,
+};
+
+static __init int atm_mpoa_init(void)
+{
+	register_atm_ioctl(&atm_ioctl_ops);
+
+#ifdef CONFIG_PROC_FS
+	if (mpc_proc_init() != 0)
+		printk(KERN_INFO "mpoa: failed to initialize /proc/mpoa\n");
+	else
+		printk(KERN_INFO "mpoa: /proc/mpoa initialized\n");
+#endif
+
+	printk("mpc.c: " __DATE__ " " __TIME__ " initialized\n");
+
+	return 0;
+}
+
+static void __exit atm_mpoa_cleanup(void)
+{
+	struct mpoa_client *mpc, *tmp;
+	struct atm_mpoa_qos *qos, *nextqos;
+	struct lec_priv *priv;
+
+#ifdef CONFIG_PROC_FS
+	mpc_proc_clean();
+#endif
+
+	del_timer(&mpc_timer);
+	unregister_netdevice_notifier(&mpoa_notifier);
+	deregister_atm_ioctl(&atm_ioctl_ops);
+
+	mpc = mpcs;
+	mpcs = NULL;
+	while (mpc != NULL) {
+		tmp = mpc->next;
+		if (mpc->dev != NULL) {
+			stop_mpc(mpc);
+			priv = (struct lec_priv *)mpc->dev->priv;
+			if (priv->lane2_ops != NULL)
+				priv->lane2_ops->associate_indicator = NULL;
+		}
+		ddprintk("mpoa: cleanup_module: about to clear caches\n");
+		mpc->in_ops->destroy_cache(mpc);
+		mpc->eg_ops->destroy_cache(mpc);
+		ddprintk("mpoa: cleanup_module: caches cleared\n");
+		kfree(mpc->mps_macs);
+		memset(mpc, 0, sizeof(struct mpoa_client));
+		ddprintk("mpoa: cleanup_module: about to kfree %p\n", mpc);
+		kfree(mpc);
+		ddprintk("mpoa: cleanup_module: next mpc is at %p\n", tmp);
+		mpc = tmp;
+	}
+
+	qos = qos_head;
+	qos_head = NULL;
+	while (qos != NULL) {
+		nextqos = qos->next;
+		dprintk("mpoa: cleanup_module: freeing qos entry %p\n", qos);
+		kfree(qos);
+		qos = nextqos;
+	}
+
+	return;
+}
+
+module_init(atm_mpoa_init);
+module_exit(atm_mpoa_cleanup);
+
+MODULE_LICENSE("GPL");
diff --git a/net/atm/mpc.h b/net/atm/mpc.h
new file mode 100644
index 00000000000..863ddf6079e
--- /dev/null
+++ b/net/atm/mpc.h
@@ -0,0 +1,53 @@
+#ifndef _MPC_H_
+#define _MPC_H_
+
+#include <linux/types.h>
+#include <linux/atm.h>
+#include <linux/atmmpc.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include "mpoa_caches.h"
+
+/* kernel -> mpc-daemon */
+int msg_to_mpoad(struct k_message *msg, struct mpoa_client *mpc);
+
+struct mpoa_client {
+        struct mpoa_client *next;
+        struct net_device *dev;      /* lec in question                     */
+        int dev_num;                 /* e.g. 2 for lec2                     */
+        int (*old_hard_start_xmit)(struct sk_buff *skb, struct net_device *dev);
+        struct atm_vcc *mpoad_vcc;   /* control channel to mpoad            */
+        uint8_t mps_ctrl_addr[ATM_ESA_LEN];  /* MPS control ATM address     */
+        uint8_t our_ctrl_addr[ATM_ESA_LEN];  /* MPC's control ATM address   */
+
+        rwlock_t ingress_lock;
+        struct in_cache_ops *in_ops; /* ingress cache operations            */
+        in_cache_entry *in_cache;    /* the ingress cache of this MPC       */
+
+        rwlock_t egress_lock;
+        struct eg_cache_ops *eg_ops; /* egress cache operations             */
+        eg_cache_entry *eg_cache;    /* the egress  cache of this MPC       */
+
+        uint8_t *mps_macs;           /* array of MPS MAC addresses, >=1     */
+        int number_of_mps_macs;      /* number of the above MAC addresses   */
+        struct mpc_parameters parameters;  /* parameters for this client    */
+};
+
+
+struct atm_mpoa_qos {
+        struct atm_mpoa_qos *next;
+        uint32_t ipaddr;
+        struct atm_qos qos;
+};
+
+
+/* MPOA QoS operations */
+struct atm_mpoa_qos *atm_mpoa_add_qos(uint32_t dst_ip, struct atm_qos *qos);
+struct atm_mpoa_qos *atm_mpoa_search_qos(uint32_t dst_ip);
+int atm_mpoa_delete_qos(struct atm_mpoa_qos *qos);
+
+/* Display QoS entries. This is for the procfs */
+struct seq_file;
+void atm_mpoa_disp_qos(struct seq_file *m);
+
+#endif /* _MPC_H_ */
diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c
new file mode 100644
index 00000000000..64ddebb6406
--- /dev/null
+++ b/net/atm/mpoa_caches.c
@@ -0,0 +1,576 @@
+#include <linux/types.h>
+#include <linux/atmmpc.h>
+#include <linux/time.h>
+
+#include "mpoa_caches.h"
+#include "mpc.h"
+
+/*
+ * mpoa_caches.c: Implementation of ingress and egress cache
+ * handling functions
+ */
+
+#if 0
+#define dprintk printk    /* debug */
+#else
+#define dprintk(format,args...)
+#endif
+
+#if 0
+#define ddprintk printk  /* more debug */
+#else
+#define ddprintk(format,args...)
+#endif
+
+static in_cache_entry *in_cache_get(uint32_t dst_ip,
+				    struct mpoa_client *client)
+{
+	in_cache_entry *entry;
+
+	read_lock_bh(&client->ingress_lock);
+	entry = client->in_cache;
+	while(entry != NULL){
+		if( entry->ctrl_info.in_dst_ip == dst_ip ){
+			atomic_inc(&entry->use);
+			read_unlock_bh(&client->ingress_lock);
+			return entry;
+		}
+		entry = entry->next;
+	}
+	read_unlock_bh(&client->ingress_lock);
+
+	return NULL;
+}
+
+static in_cache_entry *in_cache_get_with_mask(uint32_t dst_ip,
+					      struct mpoa_client *client,
+					      uint32_t mask)
+{
+	in_cache_entry *entry;
+
+	read_lock_bh(&client->ingress_lock);
+	entry = client->in_cache;
+	while(entry != NULL){
+		if((entry->ctrl_info.in_dst_ip & mask)  == (dst_ip & mask )){
+			atomic_inc(&entry->use);
+			read_unlock_bh(&client->ingress_lock);
+			return entry;
+		}
+		entry = entry->next;
+	}
+	read_unlock_bh(&client->ingress_lock);
+
+	return NULL;
+
+}
+
+static in_cache_entry *in_cache_get_by_vcc(struct atm_vcc *vcc,
+					   struct mpoa_client *client )
+{
+	in_cache_entry *entry;
+
+	read_lock_bh(&client->ingress_lock);
+	entry = client->in_cache;
+	while(entry != NULL){
+		if(entry->shortcut == vcc) {
+			atomic_inc(&entry->use);
+			read_unlock_bh(&client->ingress_lock);
+			return entry;
+		}
+		entry = entry->next;
+	}
+	read_unlock_bh(&client->ingress_lock);
+
+	return NULL;
+}
+
+static in_cache_entry *in_cache_add_entry(uint32_t dst_ip,
+					  struct mpoa_client *client)
+{
+	unsigned char *ip __attribute__ ((unused)) = (unsigned char *)&dst_ip;
+	in_cache_entry* entry = kmalloc(sizeof(in_cache_entry), GFP_KERNEL);
+
+	if (entry == NULL) {
+		printk("mpoa: mpoa_caches.c: new_in_cache_entry: out of memory\n");
+		return NULL;
+	}
+
+	dprintk("mpoa: mpoa_caches.c: adding an ingress entry, ip = %u.%u.%u.%u\n", ip[0], ip[1], ip[2], ip[3]);
+	memset(entry,0,sizeof(in_cache_entry));
+
+	atomic_set(&entry->use, 1);
+	dprintk("mpoa: mpoa_caches.c: new_in_cache_entry: about to lock\n");
+	write_lock_bh(&client->ingress_lock);
+	entry->next = client->in_cache;
+	entry->prev = NULL;
+	if (client->in_cache != NULL)
+		client->in_cache->prev = entry;
+	client->in_cache = entry;
+
+	memcpy(entry->MPS_ctrl_ATM_addr, client->mps_ctrl_addr, ATM_ESA_LEN);
+	entry->ctrl_info.in_dst_ip = dst_ip;
+	do_gettimeofday(&(entry->tv));
+	entry->retry_time = client->parameters.mpc_p4;
+	entry->count = 1;
+	entry->entry_state = INGRESS_INVALID;
+	entry->ctrl_info.holding_time = HOLDING_TIME_DEFAULT;
+	atomic_inc(&entry->use);
+
+	write_unlock_bh(&client->ingress_lock);
+	dprintk("mpoa: mpoa_caches.c: new_in_cache_entry: unlocked\n");
+
+	return entry;
+}
+
+static int cache_hit(in_cache_entry *entry, struct mpoa_client *mpc)
+{
+	struct atm_mpoa_qos *qos;
+	struct k_message msg;
+
+	entry->count++;
+	if(entry->entry_state == INGRESS_RESOLVED && entry->shortcut != NULL)
+		return OPEN;
+
+	if(entry->entry_state == INGRESS_REFRESHING){
+		if(entry->count > mpc->parameters.mpc_p1){
+			msg.type = SND_MPOA_RES_RQST;
+			msg.content.in_info = entry->ctrl_info;
+			memcpy(msg.MPS_ctrl, mpc->mps_ctrl_addr, ATM_ESA_LEN);
+			qos = atm_mpoa_search_qos(entry->ctrl_info.in_dst_ip);
+			if (qos != NULL) msg.qos = qos->qos;
+			msg_to_mpoad(&msg, mpc);
+			do_gettimeofday(&(entry->reply_wait));
+			entry->entry_state = INGRESS_RESOLVING;
+		}
+		if(entry->shortcut != NULL)
+			return OPEN;
+		return CLOSED;
+	}
+
+	if(entry->entry_state == INGRESS_RESOLVING && entry->shortcut != NULL)
+		return OPEN;
+
+	if( entry->count > mpc->parameters.mpc_p1 &&
+	    entry->entry_state == INGRESS_INVALID){
+		unsigned char *ip __attribute__ ((unused)) =
+		    (unsigned char *)&entry->ctrl_info.in_dst_ip;
+
+		dprintk("mpoa: (%s) mpoa_caches.c: threshold exceeded for ip %u.%u.%u.%u, sending MPOA res req\n", mpc->dev->name, ip[0], ip[1], ip[2], ip[3]);
+		entry->entry_state = INGRESS_RESOLVING;
+		msg.type =  SND_MPOA_RES_RQST;
+		memcpy(msg.MPS_ctrl, mpc->mps_ctrl_addr, ATM_ESA_LEN );
+		msg.content.in_info = entry->ctrl_info;
+		qos = atm_mpoa_search_qos(entry->ctrl_info.in_dst_ip);
+		if (qos != NULL) msg.qos = qos->qos;
+		msg_to_mpoad( &msg, mpc);
+		do_gettimeofday(&(entry->reply_wait));
+	}
+
+	return CLOSED;
+}
+
+static void in_cache_put(in_cache_entry *entry)
+{
+	if (atomic_dec_and_test(&entry->use)) {
+		memset(entry, 0, sizeof(in_cache_entry));
+		kfree(entry);
+	}
+
+	return;
+}
+
+/*
+ * This should be called with write lock on
+ */
+static void in_cache_remove_entry(in_cache_entry *entry,
+				  struct mpoa_client *client)
+{
+	struct atm_vcc *vcc;
+	struct k_message msg;
+	unsigned char *ip;
+
+	vcc = entry->shortcut;
+	ip = (unsigned char *)&entry->ctrl_info.in_dst_ip;
+	dprintk("mpoa: mpoa_caches.c: removing an ingress entry, ip = %u.%u.%u.%u\n",ip[0], ip[1], ip[2], ip[3]);
+
+	if (entry->prev != NULL)
+		entry->prev->next = entry->next;
+	else
+		client->in_cache = entry->next;
+	if (entry->next != NULL)
+		entry->next->prev = entry->prev;
+	client->in_ops->put(entry);
+	if(client->in_cache == NULL && client->eg_cache == NULL){
+		msg.type = STOP_KEEP_ALIVE_SM;
+		msg_to_mpoad(&msg,client);
+	}
+
+	/* Check if the egress side still uses this VCC */
+	if (vcc != NULL) {
+		eg_cache_entry *eg_entry = client->eg_ops->get_by_vcc(vcc, client);
+		if (eg_entry != NULL) {
+			client->eg_ops->put(eg_entry);
+			return;
+		}
+		vcc_release_async(vcc, -EPIPE);
+	}
+
+	return;
+}
+
+
+/* Call this every MPC-p2 seconds... Not exactly correct solution,
+   but an easy one... */
+static void clear_count_and_expired(struct mpoa_client *client)
+{
+	unsigned char *ip;
+	in_cache_entry *entry, *next_entry;
+	struct timeval now;
+
+	do_gettimeofday(&now);
+
+	write_lock_bh(&client->ingress_lock);
+	entry = client->in_cache;
+	while(entry != NULL){
+		entry->count=0;
+		next_entry = entry->next;
+		if((now.tv_sec - entry->tv.tv_sec)
+		   > entry->ctrl_info.holding_time){
+			ip = (unsigned char*)&entry->ctrl_info.in_dst_ip;
+			dprintk("mpoa: mpoa_caches.c: holding time expired, ip = %u.%u.%u.%u\n", NIPQUAD(ip));
+			client->in_ops->remove_entry(entry, client);
+		}
+		entry = next_entry;
+	}
+	write_unlock_bh(&client->ingress_lock);
+
+	return;
+}
+
+/* Call this every MPC-p4 seconds. */
+static void check_resolving_entries(struct mpoa_client *client)
+{
+
+	struct atm_mpoa_qos *qos;
+	in_cache_entry *entry;
+	struct timeval now;
+	struct k_message msg;
+
+	do_gettimeofday( &now );
+
+	read_lock_bh(&client->ingress_lock);
+	entry = client->in_cache;
+	while( entry != NULL ){
+		if(entry->entry_state == INGRESS_RESOLVING){
+			if(now.tv_sec - entry->hold_down.tv_sec < client->parameters.mpc_p6){
+				entry = entry->next;                      /* Entry in hold down */
+				continue;
+			}
+			if( (now.tv_sec - entry->reply_wait.tv_sec) >
+			    entry->retry_time ){
+				entry->retry_time = MPC_C1*( entry->retry_time );
+				if(entry->retry_time > client->parameters.mpc_p5){
+					/* Retry time maximum exceeded, put entry in hold down. */
+					do_gettimeofday(&(entry->hold_down));
+					entry->retry_time = client->parameters.mpc_p4;
+					entry = entry->next;
+					continue;
+				}
+				/* Ask daemon to send a resolution request. */
+				memset(&(entry->hold_down),0,sizeof(struct timeval));
+				msg.type = SND_MPOA_RES_RTRY;
+				memcpy(msg.MPS_ctrl, client->mps_ctrl_addr, ATM_ESA_LEN);
+				msg.content.in_info = entry->ctrl_info;
+				qos = atm_mpoa_search_qos(entry->ctrl_info.in_dst_ip);
+				if (qos != NULL) msg.qos = qos->qos;
+				msg_to_mpoad(&msg, client);
+				do_gettimeofday(&(entry->reply_wait));
+			}
+		}
+		entry = entry->next;
+	}
+	read_unlock_bh(&client->ingress_lock);
+}
+
+/* Call this every MPC-p5 seconds. */
+static void refresh_entries(struct mpoa_client *client)
+{
+	struct timeval now;
+	struct in_cache_entry *entry = client->in_cache;
+
+	ddprintk("mpoa: mpoa_caches.c: refresh_entries\n");
+	do_gettimeofday(&now);
+
+	read_lock_bh(&client->ingress_lock);
+	while( entry != NULL ){
+		if( entry->entry_state == INGRESS_RESOLVED ){
+			if(!(entry->refresh_time))
+				entry->refresh_time = (2*(entry->ctrl_info.holding_time))/3;
+			if( (now.tv_sec - entry->reply_wait.tv_sec) > entry->refresh_time ){
+				dprintk("mpoa: mpoa_caches.c: refreshing an entry.\n");
+				entry->entry_state = INGRESS_REFRESHING;
+
+			}
+		}
+		entry = entry->next;
+	}
+	read_unlock_bh(&client->ingress_lock);
+}
+
+static void in_destroy_cache(struct mpoa_client *mpc)
+{
+	write_lock_irq(&mpc->ingress_lock);
+	while(mpc->in_cache != NULL)
+		mpc->in_ops->remove_entry(mpc->in_cache, mpc);
+	write_unlock_irq(&mpc->ingress_lock);
+
+	return;
+}
+
+static eg_cache_entry *eg_cache_get_by_cache_id(uint32_t cache_id, struct mpoa_client *mpc)
+{
+	eg_cache_entry *entry;
+
+	read_lock_irq(&mpc->egress_lock);
+	entry = mpc->eg_cache;
+	while(entry != NULL){
+		if(entry->ctrl_info.cache_id == cache_id){
+			atomic_inc(&entry->use);
+			read_unlock_irq(&mpc->egress_lock);
+			return entry;
+		}
+		entry = entry->next;
+	}
+	read_unlock_irq(&mpc->egress_lock);
+
+	return NULL;
+}
+
+/* This can be called from any context since it saves CPU flags */
+static eg_cache_entry *eg_cache_get_by_tag(uint32_t tag, struct mpoa_client *mpc)
+{
+	unsigned long flags;
+	eg_cache_entry *entry;
+
+	read_lock_irqsave(&mpc->egress_lock, flags);
+	entry = mpc->eg_cache;
+	while (entry != NULL){
+		if (entry->ctrl_info.tag == tag) {
+			atomic_inc(&entry->use);
+			read_unlock_irqrestore(&mpc->egress_lock, flags);
+			return entry;
+		}
+		entry = entry->next;
+	}
+	read_unlock_irqrestore(&mpc->egress_lock, flags);
+
+	return NULL;
+}
+
+/* This can be called from any context since it saves CPU flags */
+static eg_cache_entry *eg_cache_get_by_vcc(struct atm_vcc *vcc, struct mpoa_client *mpc)
+{
+	unsigned long flags;
+	eg_cache_entry *entry;
+
+	read_lock_irqsave(&mpc->egress_lock, flags);
+	entry = mpc->eg_cache;
+	while (entry != NULL){
+		if (entry->shortcut == vcc) {
+			atomic_inc(&entry->use);
+	       		read_unlock_irqrestore(&mpc->egress_lock, flags);
+			return entry;
+		}
+		entry = entry->next;
+	}
+	read_unlock_irqrestore(&mpc->egress_lock, flags);
+
+	return NULL;
+}
+
+static eg_cache_entry *eg_cache_get_by_src_ip(uint32_t ipaddr, struct mpoa_client *mpc)
+{
+	eg_cache_entry *entry;
+
+	read_lock_irq(&mpc->egress_lock);
+	entry = mpc->eg_cache;
+	while(entry != NULL){
+		if(entry->latest_ip_addr == ipaddr) {
+			atomic_inc(&entry->use);
+	       		read_unlock_irq(&mpc->egress_lock);
+			return entry;
+		}
+		entry = entry->next;
+	}
+	read_unlock_irq(&mpc->egress_lock);
+
+	return NULL;
+}
+
+static void eg_cache_put(eg_cache_entry *entry)
+{
+	if (atomic_dec_and_test(&entry->use)) {
+		memset(entry, 0, sizeof(eg_cache_entry));
+		kfree(entry);
+	}
+
+	return;
+}
+
+/*
+ * This should be called with write lock on
+ */
+static void eg_cache_remove_entry(eg_cache_entry *entry,
+				  struct mpoa_client *client)
+{
+	struct atm_vcc *vcc;
+	struct k_message msg;
+
+	vcc = entry->shortcut;
+	dprintk("mpoa: mpoa_caches.c: removing an egress entry.\n");
+	if (entry->prev != NULL)
+		entry->prev->next = entry->next;
+	else
+		client->eg_cache = entry->next;
+	if (entry->next != NULL)
+		entry->next->prev = entry->prev;
+	client->eg_ops->put(entry);
+	if(client->in_cache == NULL && client->eg_cache == NULL){
+		msg.type = STOP_KEEP_ALIVE_SM;
+		msg_to_mpoad(&msg,client);
+	}
+
+	/* Check if the ingress side still uses this VCC */
+	if (vcc != NULL) {
+		in_cache_entry *in_entry = client->in_ops->get_by_vcc(vcc, client);
+		if (in_entry != NULL) {
+			client->in_ops->put(in_entry);
+			return;
+		}
+		vcc_release_async(vcc, -EPIPE);
+	}
+
+	return;
+}
+
+static eg_cache_entry *eg_cache_add_entry(struct k_message *msg, struct mpoa_client *client)
+{
+	unsigned char *ip;
+	eg_cache_entry *entry = kmalloc(sizeof(eg_cache_entry), GFP_KERNEL);
+
+	if (entry == NULL) {
+		printk("mpoa: mpoa_caches.c: new_eg_cache_entry: out of memory\n");
+		return NULL;
+	}
+
+	ip = (unsigned char *)&msg->content.eg_info.eg_dst_ip;
+	dprintk("mpoa: mpoa_caches.c: adding an egress entry, ip = %u.%u.%u.%u, this should be our IP\n", NIPQUAD(ip));
+	memset(entry, 0, sizeof(eg_cache_entry));
+
+	atomic_set(&entry->use, 1);
+	dprintk("mpoa: mpoa_caches.c: new_eg_cache_entry: about to lock\n");
+	write_lock_irq(&client->egress_lock);
+	entry->next = client->eg_cache;
+	entry->prev = NULL;
+	if (client->eg_cache != NULL)
+		client->eg_cache->prev = entry;
+	client->eg_cache = entry;
+
+	memcpy(entry->MPS_ctrl_ATM_addr, client->mps_ctrl_addr, ATM_ESA_LEN);
+	entry->ctrl_info = msg->content.eg_info;
+	do_gettimeofday(&(entry->tv));
+	entry->entry_state = EGRESS_RESOLVED;
+	dprintk("mpoa: mpoa_caches.c: new_eg_cache_entry cache_id %lu\n", ntohl(entry->ctrl_info.cache_id));
+	ip = (unsigned char *)&entry->ctrl_info.mps_ip;
+	dprintk("mpoa: mpoa_caches.c: mps_ip = %u.%u.%u.%u\n", NIPQUAD(ip));
+	atomic_inc(&entry->use);
+
+	write_unlock_irq(&client->egress_lock);
+	dprintk("mpoa: mpoa_caches.c: new_eg_cache_entry: unlocked\n");
+
+	return entry;
+}
+
+static void update_eg_cache_entry(eg_cache_entry * entry, uint16_t holding_time)
+{
+	do_gettimeofday(&(entry->tv));
+	entry->entry_state = EGRESS_RESOLVED;
+	entry->ctrl_info.holding_time = holding_time;
+
+	return;
+}
+
+static void clear_expired(struct mpoa_client *client)
+{
+	eg_cache_entry *entry, *next_entry;
+	struct timeval now;
+	struct k_message msg;
+
+	do_gettimeofday(&now);
+
+	write_lock_irq(&client->egress_lock);
+	entry = client->eg_cache;
+	while(entry != NULL){
+		next_entry = entry->next;
+		if((now.tv_sec - entry->tv.tv_sec)
+		   > entry->ctrl_info.holding_time){
+			msg.type = SND_EGRESS_PURGE;
+			msg.content.eg_info = entry->ctrl_info;
+			dprintk("mpoa: mpoa_caches.c: egress_cache: holding time expired, cache_id = %lu.\n",ntohl(entry->ctrl_info.cache_id));
+			msg_to_mpoad(&msg, client);
+			client->eg_ops->remove_entry(entry, client);
+		}
+		entry = next_entry;
+	}
+	write_unlock_irq(&client->egress_lock);
+
+	return;
+}
+
+static void eg_destroy_cache(struct mpoa_client *mpc)
+{
+	write_lock_irq(&mpc->egress_lock);
+	while(mpc->eg_cache != NULL)
+		mpc->eg_ops->remove_entry(mpc->eg_cache, mpc);
+	write_unlock_irq(&mpc->egress_lock);
+
+	return;
+}
+
+
+
+static struct in_cache_ops ingress_ops = {
+	in_cache_add_entry,               /* add_entry       */
+	in_cache_get,                     /* get             */
+	in_cache_get_with_mask,           /* get_with_mask   */
+	in_cache_get_by_vcc,              /* get_by_vcc      */
+	in_cache_put,                     /* put             */
+	in_cache_remove_entry,            /* remove_entry    */
+	cache_hit,                        /* cache_hit       */
+	clear_count_and_expired,          /* clear_count     */
+	check_resolving_entries,          /* check_resolving */
+	refresh_entries,                  /* refresh         */
+	in_destroy_cache                  /* destroy_cache   */
+};
+
+static struct eg_cache_ops egress_ops = {
+	eg_cache_add_entry,               /* add_entry        */
+	eg_cache_get_by_cache_id,         /* get_by_cache_id  */
+	eg_cache_get_by_tag,              /* get_by_tag       */
+	eg_cache_get_by_vcc,              /* get_by_vcc       */
+	eg_cache_get_by_src_ip,           /* get_by_src_ip    */
+	eg_cache_put,                     /* put              */
+	eg_cache_remove_entry,            /* remove_entry     */
+	update_eg_cache_entry,            /* update           */
+	clear_expired,                    /* clear_expired    */
+	eg_destroy_cache                  /* destroy_cache    */
+};
+
+
+void atm_mpoa_init_cache(struct mpoa_client *mpc)
+{
+	mpc->in_ops = &ingress_ops;
+	mpc->eg_ops = &egress_ops;
+
+	return;
+}
diff --git a/net/atm/mpoa_caches.h b/net/atm/mpoa_caches.h
new file mode 100644
index 00000000000..6c9886a03d0
--- /dev/null
+++ b/net/atm/mpoa_caches.h
@@ -0,0 +1,96 @@
+#ifndef MPOA_CACHES_H
+#define MPOA_CACHES_H
+
+#include <linux/netdevice.h>
+#include <linux/types.h>
+#include <linux/atm.h>
+#include <linux/atmdev.h>
+#include <linux/atmmpc.h>
+
+struct mpoa_client;
+
+void atm_mpoa_init_cache(struct mpoa_client *mpc);
+
+typedef struct in_cache_entry {
+        struct in_cache_entry *next;
+        struct in_cache_entry *prev;
+        struct timeval  tv;
+        struct timeval  reply_wait;
+        struct timeval  hold_down;
+        uint32_t  packets_fwded;
+        uint16_t  entry_state; 
+        uint32_t retry_time;
+        uint32_t refresh_time;
+        uint32_t count;
+        struct   atm_vcc *shortcut;
+        uint8_t  MPS_ctrl_ATM_addr[ATM_ESA_LEN];
+        struct   in_ctrl_info ctrl_info;
+        atomic_t use;
+} in_cache_entry;
+
+struct in_cache_ops{
+        in_cache_entry *(*add_entry)(uint32_t dst_ip,
+                                      struct mpoa_client *client);
+        in_cache_entry *(*get)(uint32_t dst_ip, struct mpoa_client *client);
+        in_cache_entry *(*get_with_mask)(uint32_t dst_ip, 
+					 struct mpoa_client *client,
+					 uint32_t mask);
+        in_cache_entry *(*get_by_vcc)(struct atm_vcc *vcc, 
+                                      struct mpoa_client *client);
+        void            (*put)(in_cache_entry *entry);
+        void            (*remove_entry)(in_cache_entry *delEntry,
+					struct mpoa_client *client );
+        int             (*cache_hit)(in_cache_entry *entry,
+                                     struct mpoa_client *client);
+        void            (*clear_count)(struct mpoa_client *client);
+        void            (*check_resolving)(struct mpoa_client *client);
+        void            (*refresh)(struct mpoa_client *client);
+        void            (*destroy_cache)(struct mpoa_client *mpc);
+};
+
+typedef struct eg_cache_entry{
+        struct               eg_cache_entry *next;
+        struct               eg_cache_entry *prev;
+        struct               timeval  tv;
+        uint8_t              MPS_ctrl_ATM_addr[ATM_ESA_LEN];
+        struct atm_vcc       *shortcut;
+        uint32_t             packets_rcvd;
+        uint16_t             entry_state;
+        uint32_t             latest_ip_addr;    /* The src IP address of the last packet */
+        struct eg_ctrl_info  ctrl_info;
+        atomic_t             use;
+} eg_cache_entry;
+
+struct eg_cache_ops{
+        eg_cache_entry *(*add_entry)(struct k_message *msg, struct mpoa_client *client);
+        eg_cache_entry *(*get_by_cache_id)(uint32_t cache_id, struct mpoa_client *client);
+        eg_cache_entry *(*get_by_tag)(uint32_t cache_id, struct mpoa_client *client);
+        eg_cache_entry *(*get_by_vcc)(struct atm_vcc *vcc, struct mpoa_client *client);
+        eg_cache_entry *(*get_by_src_ip)(uint32_t ipaddr, struct mpoa_client *client);
+        void            (*put)(eg_cache_entry *entry);
+        void            (*remove_entry)(eg_cache_entry *entry, struct mpoa_client *client);
+        void            (*update)(eg_cache_entry *entry, uint16_t holding_time);
+        void            (*clear_expired)(struct mpoa_client *client);
+        void            (*destroy_cache)(struct mpoa_client *mpc);
+};
+
+
+/* Ingress cache entry states */
+
+#define INGRESS_REFRESHING 3
+#define INGRESS_RESOLVED   2
+#define INGRESS_RESOLVING  1
+#define INGRESS_INVALID    0
+
+/* VCC states */
+
+#define OPEN   1
+#define CLOSED 0 
+
+/* Egress cache entry states */
+
+#define EGRESS_RESOLVED 2
+#define EGRESS_PURGE    1
+#define EGRESS_INVALID  0
+
+#endif
diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c
new file mode 100644
index 00000000000..60834b5a14d
--- /dev/null
+++ b/net/atm/mpoa_proc.c
@@ -0,0 +1,305 @@
+#include <linux/config.h>
+
+#ifdef CONFIG_PROC_FS
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/string.h> 
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/time.h>
+#include <linux/seq_file.h>
+#include <asm/uaccess.h>
+#include <linux/atmmpc.h>
+#include <linux/atm.h>
+#include "mpc.h"
+#include "mpoa_caches.h"
+
+/*
+ * mpoa_proc.c: Implementation MPOA client's proc
+ * file system statistics 
+ */
+
+#if 1
+#define dprintk printk   /* debug */
+#else
+#define dprintk(format,args...)
+#endif
+
+#define STAT_FILE_NAME "mpc"     /* Our statistic file's name */
+
+extern struct mpoa_client *mpcs;
+extern struct proc_dir_entry *atm_proc_root;  /* from proc.c. */
+
+static int proc_mpc_open(struct inode *inode, struct file *file);
+static ssize_t proc_mpc_write(struct file *file, const char __user *buff,
+                              size_t nbytes, loff_t *ppos);
+
+static int parse_qos(const char *buff);
+
+/*
+ *   Define allowed FILE OPERATIONS
+ */
+static struct file_operations mpc_file_operations = {
+	.owner =	THIS_MODULE,
+	.open =		proc_mpc_open,
+	.read =		seq_read,
+	.llseek =	seq_lseek,
+	.write =	proc_mpc_write,
+	.release =	seq_release,
+};
+
+/*
+ * Returns the state of an ingress cache entry as a string
+ */
+static const char *ingress_state_string(int state){
+        switch(state) {
+	case INGRESS_RESOLVING:
+	        return "resolving  ";
+		break;
+	case INGRESS_RESOLVED:
+                return "resolved   ";
+		break;
+	case INGRESS_INVALID:
+	        return "invalid    ";
+		break;
+	case INGRESS_REFRESHING:
+	        return "refreshing ";
+		break;
+	default:
+	       return "";
+	}
+}
+
+/*
+ * Returns the state of an egress cache entry as a string
+ */
+static const char *egress_state_string(int state){
+        switch(state) {
+	case EGRESS_RESOLVED:
+	        return "resolved   ";
+		break;
+	case EGRESS_PURGE:
+                return "purge      ";
+		break;
+	case EGRESS_INVALID:
+	        return "invalid    ";
+		break;
+	default:
+	       return "";
+	}
+}
+
+/*
+ * FIXME: mpcs (and per-mpc lists) have no locking whatsoever.
+ */
+
+static void *mpc_start(struct seq_file *m, loff_t *pos)
+{
+	loff_t l = *pos;
+	struct mpoa_client *mpc;
+
+	if (!l--)
+		return SEQ_START_TOKEN;
+	for (mpc = mpcs; mpc; mpc = mpc->next)
+		if (!l--)
+			return mpc;
+	return NULL;
+}
+
+static void *mpc_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct mpoa_client *p = v;
+	(*pos)++;
+	return v == SEQ_START_TOKEN ? mpcs : p->next;
+}
+
+static void mpc_stop(struct seq_file *m, void *v)
+{
+}
+
+/*
+ * READING function - called when the /proc/atm/mpoa file is read from.
+ */
+static int mpc_show(struct seq_file *m, void *v)
+{
+	struct mpoa_client *mpc = v;
+	unsigned char *temp;
+	int i;
+	in_cache_entry *in_entry;
+	eg_cache_entry *eg_entry;
+	struct timeval now;
+	unsigned char ip_string[16];
+
+	if (v == SEQ_START_TOKEN) {
+		atm_mpoa_disp_qos(m);
+		return 0;
+	}
+
+	seq_printf(m, "\nInterface %d:\n\n", mpc->dev_num);  
+	seq_printf(m, "Ingress Entries:\nIP address      State      Holding time  Packets fwded  VPI  VCI\n");
+	do_gettimeofday(&now);
+
+	for (in_entry = mpc->in_cache; in_entry; in_entry = in_entry->next) {
+		temp = (unsigned char *)&in_entry->ctrl_info.in_dst_ip;
+		sprintf(ip_string,"%d.%d.%d.%d", temp[0], temp[1], temp[2], temp[3]);
+		seq_printf(m, "%-16s%s%-14lu%-12u",
+			      ip_string,
+			      ingress_state_string(in_entry->entry_state),
+			      in_entry->ctrl_info.holding_time-(now.tv_sec-in_entry->tv.tv_sec),
+			      in_entry->packets_fwded);
+		if (in_entry->shortcut)
+			seq_printf(m, "   %-3d  %-3d",in_entry->shortcut->vpi,in_entry->shortcut->vci);
+		seq_printf(m, "\n");
+	}
+
+	seq_printf(m, "\n");
+	seq_printf(m, "Egress Entries:\nIngress MPC ATM addr\nCache-id        State      Holding time  Packets recvd  Latest IP addr   VPI VCI\n");
+	for (eg_entry = mpc->eg_cache; eg_entry; eg_entry = eg_entry->next) {
+		unsigned char *p = eg_entry->ctrl_info.in_MPC_data_ATM_addr;
+		for(i = 0; i < ATM_ESA_LEN; i++)
+			seq_printf(m, "%02x", p[i]);
+		seq_printf(m, "\n%-16lu%s%-14lu%-15u",
+			   (unsigned long)ntohl(eg_entry->ctrl_info.cache_id),
+			   egress_state_string(eg_entry->entry_state),
+			   (eg_entry->ctrl_info.holding_time-(now.tv_sec-eg_entry->tv.tv_sec)),
+			   eg_entry->packets_rcvd);
+		
+		/* latest IP address */
+		temp = (unsigned char *)&eg_entry->latest_ip_addr;
+		sprintf(ip_string, "%d.%d.%d.%d", temp[0], temp[1], temp[2], temp[3]);
+		seq_printf(m, "%-16s", ip_string);
+
+		if (eg_entry->shortcut)
+			seq_printf(m, " %-3d %-3d",eg_entry->shortcut->vpi,eg_entry->shortcut->vci);
+		seq_printf(m, "\n");
+	}
+	seq_printf(m, "\n");
+	return 0;
+}
+
+static struct seq_operations mpc_op = {
+	.start =	mpc_start,
+	.next =		mpc_next,
+	.stop =		mpc_stop,
+	.show =		mpc_show
+};
+
+static int proc_mpc_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &mpc_op);
+}
+
+static ssize_t proc_mpc_write(struct file *file, const char __user *buff,
+                              size_t nbytes, loff_t *ppos)
+{
+        char *page, *p;
+	unsigned len;
+
+        if (nbytes == 0)
+		return 0;
+
+        if (nbytes >= PAGE_SIZE)
+		nbytes = PAGE_SIZE-1;
+
+        page = (char *)__get_free_page(GFP_KERNEL);
+        if (!page)
+		return -ENOMEM;
+
+        for (p = page, len = 0; len < nbytes; p++, len++) {
+                if (get_user(*p, buff++)) {
+			free_page((unsigned long)page);
+			return -EFAULT;
+		}
+                if (*p == '\0' || *p == '\n')
+                        break;
+        }
+
+        *p = '\0';
+
+	if (!parse_qos(page))
+                printk("mpoa: proc_mpc_write: could not parse '%s'\n", page);
+
+        free_page((unsigned long)page);
+        
+        return len;
+}
+
+static int parse_qos(const char *buff)
+{
+        /* possible lines look like this
+         * add 130.230.54.142 tx=max_pcr,max_sdu rx=max_pcr,max_sdu
+         */
+        unsigned char ip[4]; 
+	int tx_pcr, tx_sdu, rx_pcr, rx_sdu;
+        uint32_t ipaddr;
+	struct atm_qos qos; 
+        
+        memset(&qos, 0, sizeof(struct atm_qos));
+
+	if (sscanf(buff, "del %hhu.%hhu.%hhu.%hhu",
+			ip, ip+1, ip+2, ip+3) == 4) {
+		ipaddr = *(uint32_t *)ip;
+		return atm_mpoa_delete_qos(atm_mpoa_search_qos(ipaddr));
+	}
+
+	if (sscanf(buff, "add %hhu.%hhu.%hhu.%hhu tx=%d,%d rx=tx",
+			ip, ip+1, ip+2, ip+3, &tx_pcr, &tx_sdu) == 6) {
+		rx_pcr = tx_pcr;
+		rx_sdu = tx_sdu;
+	} else if (sscanf(buff, "add %hhu.%hhu.%hhu.%hhu tx=%d,%d rx=%d,%d",
+		ip, ip+1, ip+2, ip+3, &tx_pcr, &tx_sdu, &rx_pcr, &rx_sdu) != 8)
+		return 0;
+
+        ipaddr = *(uint32_t *)ip;
+	qos.txtp.traffic_class = ATM_CBR;
+	qos.txtp.max_pcr = tx_pcr;
+	qos.txtp.max_sdu = tx_sdu;
+	qos.rxtp.traffic_class = ATM_CBR;
+	qos.rxtp.max_pcr = rx_pcr;
+	qos.rxtp.max_sdu = rx_sdu;
+        qos.aal = ATM_AAL5;
+	dprintk("mpoa: mpoa_proc.c: parse_qos(): setting qos paramameters to tx=%d,%d rx=%d,%d\n",
+		qos.txtp.max_pcr,
+		qos.txtp.max_sdu,
+		qos.rxtp.max_pcr,
+		qos.rxtp.max_sdu
+		);
+
+	atm_mpoa_add_qos(ipaddr, &qos);
+	return 1;
+}
+
+/*
+ * INITIALIZATION function - called when module is initialized/loaded.
+ */
+int mpc_proc_init(void)
+{
+	struct proc_dir_entry *p;
+
+        p = create_proc_entry(STAT_FILE_NAME, 0, atm_proc_root);
+	if (!p) {
+                printk(KERN_ERR "Unable to initialize /proc/atm/%s\n", STAT_FILE_NAME);
+                return -ENOMEM;
+        }
+	p->proc_fops = &mpc_file_operations;
+	p->owner = THIS_MODULE;
+	return 0;
+}
+
+/*
+ * DELETING function - called when module is removed.
+ */
+void mpc_proc_clean(void)
+{
+	remove_proc_entry(STAT_FILE_NAME,atm_proc_root);
+}
+
+
+#endif /* CONFIG_PROC_FS */
+
+
+
+
+
+
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
new file mode 100644
index 00000000000..58f4a2b5aeb
--- /dev/null
+++ b/net/atm/pppoatm.c
@@ -0,0 +1,369 @@
+/* net/atm/pppoatm.c - RFC2364 PPP over ATM/AAL5 */
+
+/* Copyright 1999-2000 by Mitchell Blank Jr */
+/* Based on clip.c; 1995-1999 by Werner Almesberger, EPFL LRC/ICA */
+/* And on ppp_async.c; Copyright 1999 Paul Mackerras */
+/* And help from Jens Axboe */
+
+/*
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ * This driver provides the encapsulation and framing for sending
+ * and receiving PPP frames in ATM AAL5 PDUs.
+ */
+
+/*
+ * One shortcoming of this driver is that it does not comply with
+ * section 8 of RFC2364 - we are supposed to detect a change
+ * in encapsulation and immediately abort the connection (in order
+ * to avoid a black-hole being created if our peer loses state
+ * and changes encapsulation unilaterally.  However, since the
+ * ppp_generic layer actually does the decapsulation, we need
+ * a way of notifying it when we _think_ there might be a problem)
+ * There's two cases:
+ *   1.	LLC-encapsulation was missing when it was enabled.  In
+ *	this case, we should tell the upper layer "tear down
+ *	this session if this skb looks ok to you"
+ *   2.	LLC-encapsulation was present when it was disabled.  Then
+ *	we need to tell the upper layer "this packet may be
+ *	ok, but if its in error tear down the session"
+ * These hooks are not yet available in ppp_generic
+ */
+
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/atm.h>
+#include <linux/atmdev.h>
+#include <linux/ppp_defs.h>
+#include <linux/if_ppp.h>
+#include <linux/ppp_channel.h>
+#include <linux/atmppp.h>
+
+#include "common.h"
+
+#if 0
+#define DPRINTK(format, args...) \
+	printk(KERN_DEBUG "pppoatm: " format, ##args)
+#else
+#define DPRINTK(format, args...)
+#endif
+
+enum pppoatm_encaps {
+	e_autodetect = PPPOATM_ENCAPS_AUTODETECT,
+	e_vc = PPPOATM_ENCAPS_VC,
+	e_llc = PPPOATM_ENCAPS_LLC,
+};
+
+struct pppoatm_vcc {
+	struct atm_vcc	*atmvcc;	/* VCC descriptor */
+	void (*old_push)(struct atm_vcc *, struct sk_buff *);
+	void (*old_pop)(struct atm_vcc *, struct sk_buff *);
+					/* keep old push/pop for detaching */
+	enum pppoatm_encaps encaps;
+	int flags;			/* SC_COMP_PROT - compress protocol */
+	struct ppp_channel chan;	/* interface to generic ppp layer */
+	struct tasklet_struct wakeup_tasklet;
+};
+
+/*
+ * Header used for LLC Encapsulated PPP (4 bytes) followed by the LCP protocol
+ * ID (0xC021) used in autodetection
+ */
+static const unsigned char pppllc[6] = { 0xFE, 0xFE, 0x03, 0xCF, 0xC0, 0x21 };
+#define LLC_LEN		(4)
+
+static inline struct pppoatm_vcc *atmvcc_to_pvcc(const struct atm_vcc *atmvcc)
+{
+	return (struct pppoatm_vcc *) (atmvcc->user_back);
+}
+
+static inline struct pppoatm_vcc *chan_to_pvcc(const struct ppp_channel *chan)
+{
+	return (struct pppoatm_vcc *) (chan->private);
+}
+
+/*
+ * We can't do this directly from our _pop handler, since the ppp code
+ * doesn't want to be called in interrupt context, so we do it from
+ * a tasklet
+ */
+static void pppoatm_wakeup_sender(unsigned long arg)
+{
+	ppp_output_wakeup((struct ppp_channel *) arg);
+}
+
+/*
+ * This gets called every time the ATM card has finished sending our
+ * skb.  The ->old_pop will take care up normal atm flow control,
+ * but we also need to wake up the device if we blocked it
+ */
+static void pppoatm_pop(struct atm_vcc *atmvcc, struct sk_buff *skb)
+{
+	struct pppoatm_vcc *pvcc = atmvcc_to_pvcc(atmvcc);
+	pvcc->old_pop(atmvcc, skb);
+	/*
+	 * We don't really always want to do this since it's
+	 * really inefficient - it would be much better if we could
+	 * test if we had actually throttled the generic layer.
+	 * Unfortunately then there would be a nasty SMP race where
+	 * we could clear that flag just as we refuse another packet.
+	 * For now we do the safe thing.
+	 */
+	tasklet_schedule(&pvcc->wakeup_tasklet);
+}
+
+/*
+ * Unbind from PPP - currently we only do this when closing the socket,
+ * but we could put this into an ioctl if need be
+ */
+static void pppoatm_unassign_vcc(struct atm_vcc *atmvcc)
+{
+	struct pppoatm_vcc *pvcc;
+	pvcc = atmvcc_to_pvcc(atmvcc);
+	atmvcc->push = pvcc->old_push;
+	atmvcc->pop = pvcc->old_pop;
+	tasklet_kill(&pvcc->wakeup_tasklet);
+	ppp_unregister_channel(&pvcc->chan);
+	atmvcc->user_back = NULL;
+	kfree(pvcc);
+	/* Gee, I hope we have the big kernel lock here... */
+	module_put(THIS_MODULE);
+}
+
+/* Called when an AAL5 PDU comes in */
+static void pppoatm_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
+{
+	struct pppoatm_vcc *pvcc = atmvcc_to_pvcc(atmvcc);
+	DPRINTK("pppoatm push\n");
+	if (skb == NULL) {			/* VCC was closed */
+		DPRINTK("removing ATMPPP VCC %p\n", pvcc);
+		pppoatm_unassign_vcc(atmvcc);
+		atmvcc->push(atmvcc, NULL);	/* Pass along bad news */
+		return;
+	}
+	atm_return(atmvcc, skb->truesize);
+	switch (pvcc->encaps) {
+	case e_llc:
+		if (skb->len < LLC_LEN ||
+		    memcmp(skb->data, pppllc, LLC_LEN))
+			goto error;
+		skb_pull(skb, LLC_LEN);
+		break;
+	case e_autodetect:
+		if (pvcc->chan.ppp == NULL) {	/* Not bound yet! */
+			kfree_skb(skb);
+			return;
+		}
+		if (skb->len >= sizeof(pppllc) &&
+		    !memcmp(skb->data, pppllc, sizeof(pppllc))) {
+			pvcc->encaps = e_llc;
+			skb_pull(skb, LLC_LEN);
+			break;
+		}
+		if (skb->len >= (sizeof(pppllc) - LLC_LEN) &&
+		    !memcmp(skb->data, &pppllc[LLC_LEN],
+		    sizeof(pppllc) - LLC_LEN)) {
+			pvcc->encaps = e_vc;
+			pvcc->chan.mtu += LLC_LEN;
+			break;
+		}
+		DPRINTK("(unit %d): Couldn't autodetect yet "
+		    "(skb: %02X %02X %02X %02X %02X %02X)\n",
+		    pvcc->chan.unit,
+		    skb->data[0], skb->data[1], skb->data[2],
+		    skb->data[3], skb->data[4], skb->data[5]);
+		goto error;
+	case e_vc:
+		break;
+	}
+	ppp_input(&pvcc->chan, skb);
+	return;
+    error:
+	kfree_skb(skb);
+	ppp_input_error(&pvcc->chan, 0);
+}
+
+/*
+ * Called by the ppp_generic.c to send a packet - returns true if packet
+ * was accepted.  If we return false, then it's our job to call
+ * ppp_output_wakeup(chan) when we're feeling more up to it.
+ * Note that in the ENOMEM case (as opposed to the !atm_may_send case)
+ * we should really drop the packet, but the generic layer doesn't
+ * support this yet.  We just return 'DROP_PACKET' which we actually define
+ * as success, just to be clear what we're really doing.
+ */
+#define DROP_PACKET 1
+static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb)
+{
+	struct pppoatm_vcc *pvcc = chan_to_pvcc(chan);
+	ATM_SKB(skb)->vcc = pvcc->atmvcc;
+	DPRINTK("(unit %d): pppoatm_send (skb=0x%p, vcc=0x%p)\n",
+	    pvcc->chan.unit, skb, pvcc->atmvcc);
+	if (skb->data[0] == '\0' && (pvcc->flags & SC_COMP_PROT))
+		(void) skb_pull(skb, 1);
+	switch (pvcc->encaps) {		/* LLC encapsulation needed */
+	case e_llc:
+		if (skb_headroom(skb) < LLC_LEN) {
+			struct sk_buff *n;
+			n = skb_realloc_headroom(skb, LLC_LEN);
+			if (n != NULL &&
+			    !atm_may_send(pvcc->atmvcc, n->truesize)) {
+				kfree_skb(n);
+				goto nospace;
+			}
+			kfree_skb(skb);
+			if ((skb = n) == NULL)
+				return DROP_PACKET;
+		} else if (!atm_may_send(pvcc->atmvcc, skb->truesize))
+			goto nospace;
+		memcpy(skb_push(skb, LLC_LEN), pppllc, LLC_LEN);
+		break;
+	case e_vc:
+		if (!atm_may_send(pvcc->atmvcc, skb->truesize))
+			goto nospace;
+		break;
+	case e_autodetect:
+		DPRINTK("(unit %d): Trying to send without setting encaps!\n",
+		    pvcc->chan.unit);
+		kfree_skb(skb);
+		return 1;
+	}
+
+	atomic_add(skb->truesize, &sk_atm(ATM_SKB(skb)->vcc)->sk_wmem_alloc);
+	ATM_SKB(skb)->atm_options = ATM_SKB(skb)->vcc->atm_options;
+	DPRINTK("(unit %d): atm_skb(%p)->vcc(%p)->dev(%p)\n",
+	    pvcc->chan.unit, skb, ATM_SKB(skb)->vcc,
+	    ATM_SKB(skb)->vcc->dev);
+	return ATM_SKB(skb)->vcc->send(ATM_SKB(skb)->vcc, skb)
+	    ? DROP_PACKET : 1;
+    nospace:
+	/*
+	 * We don't have space to send this SKB now, but we might have
+	 * already applied SC_COMP_PROT compression, so may need to undo
+	 */
+	if ((pvcc->flags & SC_COMP_PROT) && skb_headroom(skb) > 0 &&
+	    skb->data[-1] == '\0')
+		(void) skb_push(skb, 1);
+	return 0;
+}
+
+/* This handles ioctls sent to the /dev/ppp interface */
+static int pppoatm_devppp_ioctl(struct ppp_channel *chan, unsigned int cmd,
+	unsigned long arg)
+{
+	switch (cmd) {
+	case PPPIOCGFLAGS:
+		return put_user(chan_to_pvcc(chan)->flags, (int __user *) arg)
+		    ? -EFAULT : 0;
+	case PPPIOCSFLAGS:
+		return get_user(chan_to_pvcc(chan)->flags, (int __user *) arg)
+		    ? -EFAULT : 0;
+	}
+	return -ENOTTY;
+}
+
+static /*const*/ struct ppp_channel_ops pppoatm_ops = {
+	.start_xmit = pppoatm_send,
+	.ioctl = pppoatm_devppp_ioctl,
+};
+
+static int pppoatm_assign_vcc(struct atm_vcc *atmvcc, void __user *arg)
+{
+	struct atm_backend_ppp be;
+	struct pppoatm_vcc *pvcc;
+	int err;
+	/*
+	 * Each PPPoATM instance has its own tasklet - this is just a
+	 * prototypical one used to initialize them
+	 */
+	static const DECLARE_TASKLET(tasklet_proto, pppoatm_wakeup_sender, 0);
+	if (copy_from_user(&be, arg, sizeof be))
+		return -EFAULT;
+	if (be.encaps != PPPOATM_ENCAPS_AUTODETECT &&
+	    be.encaps != PPPOATM_ENCAPS_VC && be.encaps != PPPOATM_ENCAPS_LLC)
+		return -EINVAL;
+	pvcc = kmalloc(sizeof(*pvcc), GFP_KERNEL);
+	if (pvcc == NULL)
+		return -ENOMEM;
+	memset(pvcc, 0, sizeof(*pvcc));
+	pvcc->atmvcc = atmvcc;
+	pvcc->old_push = atmvcc->push;
+	pvcc->old_pop = atmvcc->pop;
+	pvcc->encaps = (enum pppoatm_encaps) be.encaps;
+	pvcc->chan.private = pvcc;
+	pvcc->chan.ops = &pppoatm_ops;
+	pvcc->chan.mtu = atmvcc->qos.txtp.max_sdu - PPP_HDRLEN -
+	    (be.encaps == e_vc ? 0 : LLC_LEN);
+	pvcc->wakeup_tasklet = tasklet_proto;
+	pvcc->wakeup_tasklet.data = (unsigned long) &pvcc->chan;
+	if ((err = ppp_register_channel(&pvcc->chan)) != 0) {
+		kfree(pvcc);
+		return err;
+	}
+	atmvcc->user_back = pvcc;
+	atmvcc->push = pppoatm_push;
+	atmvcc->pop = pppoatm_pop;
+	__module_get(THIS_MODULE);
+	return 0;
+}
+
+/*
+ * This handles ioctls actually performed on our vcc - we must return
+ * -ENOIOCTLCMD for any unrecognized ioctl
+ */
+static int pppoatm_ioctl(struct socket *sock, unsigned int cmd,
+	unsigned long arg)
+{
+	struct atm_vcc *atmvcc = ATM_SD(sock);
+	void __user *argp = (void __user *)arg;
+
+	if (cmd != ATM_SETBACKEND && atmvcc->push != pppoatm_push)
+		return -ENOIOCTLCMD;
+	switch (cmd) {
+	case ATM_SETBACKEND: {
+		atm_backend_t b;
+		if (get_user(b, (atm_backend_t __user *) argp))
+			return -EFAULT;
+		if (b != ATM_BACKEND_PPP)
+			return -ENOIOCTLCMD;
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+		return pppoatm_assign_vcc(atmvcc, argp);
+		}
+	case PPPIOCGCHAN:
+		return put_user(ppp_channel_index(&atmvcc_to_pvcc(atmvcc)->
+		    chan), (int __user *) argp) ? -EFAULT : 0;
+	case PPPIOCGUNIT:
+		return put_user(ppp_unit_number(&atmvcc_to_pvcc(atmvcc)->
+		    chan), (int __user *) argp) ? -EFAULT : 0;
+	}
+	return -ENOIOCTLCMD;
+}
+
+static struct atm_ioctl pppoatm_ioctl_ops = {
+	.owner	= THIS_MODULE,
+	.ioctl	= pppoatm_ioctl,
+};
+
+static int __init pppoatm_init(void)
+{
+	register_atm_ioctl(&pppoatm_ioctl_ops);
+	return 0;
+}
+
+static void __exit pppoatm_exit(void)
+{
+	deregister_atm_ioctl(&pppoatm_ioctl_ops);
+}
+
+module_init(pppoatm_init);
+module_exit(pppoatm_exit);
+
+MODULE_AUTHOR("Mitchell Blank Jr <mitch@sfgoth.com>");
+MODULE_DESCRIPTION("RFC2364 PPP over ATM/AAL5");
+MODULE_LICENSE("GPL");
diff --git a/net/atm/proc.c b/net/atm/proc.c
new file mode 100644
index 00000000000..4041054e528
--- /dev/null
+++ b/net/atm/proc.c
@@ -0,0 +1,514 @@
+/* net/atm/proc.c - ATM /proc interface
+ *
+ * Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA
+ *
+ * seq_file api usage by romieu@fr.zoreil.com
+ *
+ * Evaluating the efficiency of the whole thing if left as an exercise to
+ * the reader.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h> /* for EXPORT_SYMBOL */
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/stat.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/errno.h>
+#include <linux/atm.h>
+#include <linux/atmdev.h>
+#include <linux/netdevice.h>
+#include <linux/atmclip.h>
+#include <linux/init.h> /* for __init */
+#include <net/atmclip.h>
+#include <asm/uaccess.h>
+#include <asm/atomic.h>
+#include <asm/param.h> /* for HZ */
+#include "resources.h"
+#include "common.h" /* atm_proc_init prototype */
+#include "signaling.h" /* to get sigd - ugly too */
+
+static ssize_t proc_dev_atm_read(struct file *file,char __user *buf,size_t count,
+    loff_t *pos);
+
+static struct file_operations proc_atm_dev_ops = {
+	.owner =	THIS_MODULE,
+	.read =		proc_dev_atm_read,
+};
+
+static void add_stats(struct seq_file *seq, const char *aal,
+  const struct k_atm_aal_stats *stats)
+{
+	seq_printf(seq, "%s ( %d %d %d %d %d )", aal,
+	    atomic_read(&stats->tx),atomic_read(&stats->tx_err),
+	    atomic_read(&stats->rx),atomic_read(&stats->rx_err),
+	    atomic_read(&stats->rx_drop));
+}
+
+static void atm_dev_info(struct seq_file *seq, const struct atm_dev *dev)
+{
+	int i;
+
+	seq_printf(seq, "%3d %-8s", dev->number, dev->type);
+	for (i = 0; i < ESI_LEN; i++)
+		seq_printf(seq, "%02x", dev->esi[i]);
+	seq_puts(seq, "  ");
+	add_stats(seq, "0", &dev->stats.aal0);
+	seq_puts(seq, "  ");
+	add_stats(seq, "5", &dev->stats.aal5);
+	seq_printf(seq, "\t[%d]", atomic_read(&dev->refcnt));
+	seq_putc(seq, '\n');
+}
+
+struct vcc_state {
+	int bucket;
+	struct sock *sk;
+	int family;
+};
+
+static inline int compare_family(struct sock *sk, int family)
+{
+	return !family || (sk->sk_family == family);
+}
+
+static int __vcc_walk(struct sock **sock, int family, int *bucket, loff_t l)
+{
+	struct sock *sk = *sock;
+
+	if (sk == (void *)1) {
+		for (*bucket = 0; *bucket < VCC_HTABLE_SIZE; ++*bucket) {
+			struct hlist_head *head = &vcc_hash[*bucket];
+
+			sk = hlist_empty(head) ? NULL : __sk_head(head);
+			if (sk)
+				break;
+		}
+		l--;
+	} 
+try_again:
+	for (; sk; sk = sk_next(sk)) {
+		l -= compare_family(sk, family);
+		if (l < 0)
+			goto out;
+	}
+	if (!sk && ++*bucket < VCC_HTABLE_SIZE) {
+		sk = sk_head(&vcc_hash[*bucket]);
+		goto try_again;
+	}
+	sk = (void *)1;
+out:
+	*sock = sk;
+	return (l < 0);
+}
+
+static inline void *vcc_walk(struct vcc_state *state, loff_t l)
+{
+	return __vcc_walk(&state->sk, state->family, &state->bucket, l) ?
+	       state : NULL;
+}
+
+static int __vcc_seq_open(struct inode *inode, struct file *file,
+	int family, struct seq_operations *ops)
+{
+	struct vcc_state *state;
+	struct seq_file *seq;
+	int rc = -ENOMEM;
+
+	state = kmalloc(sizeof(*state), GFP_KERNEL);
+	if (!state)
+		goto out;
+
+	rc = seq_open(file, ops);
+	if (rc)
+		goto out_kfree;
+
+	state->family = family;
+
+	seq = file->private_data;
+	seq->private = state;
+out:
+	return rc;
+out_kfree:
+	kfree(state);
+	goto out;
+}
+
+static int vcc_seq_release(struct inode *inode, struct file *file)
+{
+	return seq_release_private(inode, file);
+}
+
+static void *vcc_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct vcc_state *state = seq->private;
+	loff_t left = *pos;
+
+	read_lock(&vcc_sklist_lock);
+	state->sk = (void *)1;
+	return left ? vcc_walk(state, left) : (void *)1;
+}
+
+static void vcc_seq_stop(struct seq_file *seq, void *v)
+{
+	read_unlock(&vcc_sklist_lock);
+}
+
+static void *vcc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct vcc_state *state = seq->private;
+
+	v = vcc_walk(state, 1);
+	*pos += !!PTR_ERR(v);
+	return v;
+}
+
+static void pvc_info(struct seq_file *seq, struct atm_vcc *vcc)
+{
+	static const char *class_name[] = { "off","UBR","CBR","VBR","ABR" };
+	static const char *aal_name[] = {
+		"---",	"1",	"2",	"3/4",	/*  0- 3 */
+		"???",	"5",	"???",	"???",	/*  4- 7 */
+		"???",	"???",	"???",	"???",	/*  8-11 */
+		"???",	"0",	"???",	"???"};	/* 12-15 */
+
+	seq_printf(seq, "%3d %3d %5d %-3s %7d %-5s %7d %-6s",
+	    vcc->dev->number,vcc->vpi,vcc->vci,
+	    vcc->qos.aal >= sizeof(aal_name)/sizeof(aal_name[0]) ? "err" :
+	    aal_name[vcc->qos.aal],vcc->qos.rxtp.min_pcr,
+	    class_name[vcc->qos.rxtp.traffic_class],vcc->qos.txtp.min_pcr,
+	    class_name[vcc->qos.txtp.traffic_class]);
+	if (test_bit(ATM_VF_IS_CLIP, &vcc->flags)) {
+		struct clip_vcc *clip_vcc = CLIP_VCC(vcc);
+		struct net_device *dev;
+
+		dev = clip_vcc->entry ? clip_vcc->entry->neigh->dev : NULL;
+		seq_printf(seq, "CLIP, Itf:%s, Encap:",
+		    dev ? dev->name : "none?");
+		seq_printf(seq, "%s", clip_vcc->encap ? "LLC/SNAP" : "None");
+	}
+	seq_putc(seq, '\n');
+}
+
+static const char *vcc_state(struct atm_vcc *vcc)
+{
+	static const char *map[] = { ATM_VS2TXT_MAP };
+
+	return map[ATM_VF2VS(vcc->flags)];
+}
+
+static void vcc_info(struct seq_file *seq, struct atm_vcc *vcc)
+{
+	struct sock *sk = sk_atm(vcc);
+
+	seq_printf(seq, "%p ", vcc);
+	if (!vcc->dev)
+		seq_printf(seq, "Unassigned    ");
+	else 
+		seq_printf(seq, "%3d %3d %5d ", vcc->dev->number, vcc->vpi,
+			vcc->vci);
+	switch (sk->sk_family) {
+		case AF_ATMPVC:
+			seq_printf(seq, "PVC");
+			break;
+		case AF_ATMSVC:
+			seq_printf(seq, "SVC");
+			break;
+		default:
+			seq_printf(seq, "%3d", sk->sk_family);
+	}
+	seq_printf(seq, " %04lx  %5d %7d/%7d %7d/%7d [%d]\n", vcc->flags, sk->sk_err,
+		  atomic_read(&sk->sk_wmem_alloc), sk->sk_sndbuf,
+		  atomic_read(&sk->sk_rmem_alloc), sk->sk_rcvbuf,
+		  atomic_read(&sk->sk_refcnt));
+}
+
+static void svc_info(struct seq_file *seq, struct atm_vcc *vcc)
+{
+	if (!vcc->dev)
+		seq_printf(seq, sizeof(void *) == 4 ?
+			   "N/A@%p%10s" : "N/A@%p%2s", vcc, "");
+	else
+		seq_printf(seq, "%3d %3d %5d         ",
+			   vcc->dev->number, vcc->vpi, vcc->vci);
+	seq_printf(seq, "%-10s ", vcc_state(vcc));
+	seq_printf(seq, "%s%s", vcc->remote.sas_addr.pub,
+	    *vcc->remote.sas_addr.pub && *vcc->remote.sas_addr.prv ? "+" : "");
+	if (*vcc->remote.sas_addr.prv) {
+		int i;
+
+		for (i = 0; i < ATM_ESA_LEN; i++)
+			seq_printf(seq, "%02x", vcc->remote.sas_addr.prv[i]);
+	}
+	seq_putc(seq, '\n');
+}
+
+static int atm_dev_seq_show(struct seq_file *seq, void *v)
+{
+	static char atm_dev_banner[] =
+		"Itf Type    ESI/\"MAC\"addr "
+		"AAL(TX,err,RX,err,drop) ...               [refcnt]\n";
+ 
+	if (v == (void *)1)
+		seq_puts(seq, atm_dev_banner);
+	else {
+		struct atm_dev *dev = list_entry(v, struct atm_dev, dev_list);
+
+		atm_dev_info(seq, dev);
+	}
+ 	return 0;
+}
+ 
+static struct seq_operations atm_dev_seq_ops = {
+	.start	= atm_dev_seq_start,
+	.next	= atm_dev_seq_next,
+	.stop	= atm_dev_seq_stop,
+	.show	= atm_dev_seq_show,
+};
+ 
+static int atm_dev_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &atm_dev_seq_ops);
+}
+ 
+static struct file_operations devices_seq_fops = {
+	.open		= atm_dev_seq_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static int pvc_seq_show(struct seq_file *seq, void *v)
+{
+	static char atm_pvc_banner[] = 
+		"Itf VPI VCI   AAL RX(PCR,Class) TX(PCR,Class)\n";
+
+	if (v == (void *)1)
+		seq_puts(seq, atm_pvc_banner);
+	else {
+		struct vcc_state *state = seq->private;
+		struct atm_vcc *vcc = atm_sk(state->sk);
+
+		pvc_info(seq, vcc);
+	}
+	return 0;
+}
+
+static struct seq_operations pvc_seq_ops = {
+	.start	= vcc_seq_start,
+	.next	= vcc_seq_next,
+	.stop	= vcc_seq_stop,
+	.show	= pvc_seq_show,
+};
+
+static int pvc_seq_open(struct inode *inode, struct file *file)
+{
+	return __vcc_seq_open(inode, file, PF_ATMPVC, &pvc_seq_ops);
+}
+
+static struct file_operations pvc_seq_fops = {
+	.open		= pvc_seq_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= vcc_seq_release,
+};
+
+static int vcc_seq_show(struct seq_file *seq, void *v)
+{
+ 	if (v == (void *)1) {
+ 		seq_printf(seq, sizeof(void *) == 4 ? "%-8s%s" : "%-16s%s",
+ 			"Address ", "Itf VPI VCI   Fam Flags Reply "
+ 			"Send buffer     Recv buffer      [refcnt]\n");
+ 	} else {
+ 		struct vcc_state *state = seq->private;
+ 		struct atm_vcc *vcc = atm_sk(state->sk);
+  
+ 		vcc_info(seq, vcc);
+ 	}
+  	return 0;
+}
+  
+static struct seq_operations vcc_seq_ops = {
+ 	.start	= vcc_seq_start,
+ 	.next	= vcc_seq_next,
+ 	.stop	= vcc_seq_stop,
+ 	.show	= vcc_seq_show,
+};
+ 
+static int vcc_seq_open(struct inode *inode, struct file *file)
+{
+ 	return __vcc_seq_open(inode, file, 0, &vcc_seq_ops);
+}
+ 
+static struct file_operations vcc_seq_fops = {
+	.open		= vcc_seq_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= vcc_seq_release,
+};
+
+static int svc_seq_show(struct seq_file *seq, void *v)
+{
+	static char atm_svc_banner[] = 
+		"Itf VPI VCI           State      Remote\n";
+
+	if (v == (void *)1)
+		seq_puts(seq, atm_svc_banner);
+	else {
+		struct vcc_state *state = seq->private;
+		struct atm_vcc *vcc = atm_sk(state->sk);
+
+		svc_info(seq, vcc);
+	}
+	return 0;
+}
+
+static struct seq_operations svc_seq_ops = {
+	.start	= vcc_seq_start,
+	.next	= vcc_seq_next,
+	.stop	= vcc_seq_stop,
+	.show	= svc_seq_show,
+};
+
+static int svc_seq_open(struct inode *inode, struct file *file)
+{
+	return __vcc_seq_open(inode, file, PF_ATMSVC, &svc_seq_ops);
+}
+
+static struct file_operations svc_seq_fops = {
+	.open		= svc_seq_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= vcc_seq_release,
+};
+
+static ssize_t proc_dev_atm_read(struct file *file, char __user *buf,
+				 size_t count, loff_t *pos)
+{
+	struct atm_dev *dev;
+	unsigned long page;
+	int length;
+
+	if (count == 0) return 0;
+	page = get_zeroed_page(GFP_KERNEL);
+	if (!page) return -ENOMEM;
+	dev = PDE(file->f_dentry->d_inode)->data;
+	if (!dev->ops->proc_read)
+		length = -EINVAL;
+	else {
+		length = dev->ops->proc_read(dev,pos,(char *) page);
+		if (length > count) length = -EINVAL;
+	}
+	if (length >= 0) {
+		if (copy_to_user(buf,(char *) page,length)) length = -EFAULT;
+		(*pos)++;
+	}
+	free_page(page);
+	return length;
+}
+
+
+struct proc_dir_entry *atm_proc_root;
+EXPORT_SYMBOL(atm_proc_root);
+
+
+int atm_proc_dev_register(struct atm_dev *dev)
+{
+	int digits,num;
+	int error;
+
+	/* No proc info */
+	if (!dev->ops->proc_read)
+		return 0;
+
+	error = -ENOMEM;
+	digits = 0;
+	for (num = dev->number; num; num /= 10) digits++;
+	if (!digits) digits++;
+
+	dev->proc_name = kmalloc(strlen(dev->type) + digits + 2, GFP_KERNEL);
+	if (!dev->proc_name)
+		goto err_out;
+	sprintf(dev->proc_name,"%s:%d",dev->type, dev->number);
+
+	dev->proc_entry = create_proc_entry(dev->proc_name, 0, atm_proc_root);
+	if (!dev->proc_entry)
+		goto err_free_name;
+	dev->proc_entry->data = dev;
+	dev->proc_entry->proc_fops = &proc_atm_dev_ops;
+	dev->proc_entry->owner = THIS_MODULE;
+	return 0;
+err_free_name:
+	kfree(dev->proc_name);
+err_out:
+	return error;
+}
+
+
+void atm_proc_dev_deregister(struct atm_dev *dev)
+{
+	if (!dev->ops->proc_read)
+		return;
+
+	remove_proc_entry(dev->proc_name, atm_proc_root);
+	kfree(dev->proc_name);
+}
+
+static struct atm_proc_entry {
+	char *name;
+	struct file_operations *proc_fops;
+	struct proc_dir_entry *dirent;
+} atm_proc_ents[] = {
+	{ .name = "devices",	.proc_fops = &devices_seq_fops },
+	{ .name = "pvc",	.proc_fops = &pvc_seq_fops },
+	{ .name = "svc",	.proc_fops = &svc_seq_fops },
+	{ .name = "vc",		.proc_fops = &vcc_seq_fops },
+	{ .name = NULL,		.proc_fops = NULL }
+};
+
+static void atm_proc_dirs_remove(void)
+{
+	static struct atm_proc_entry *e;
+
+	for (e = atm_proc_ents; e->name; e++) {
+		if (e->dirent) 
+			remove_proc_entry(e->name, atm_proc_root);
+	}
+	remove_proc_entry("net/atm", NULL);
+}
+
+int __init atm_proc_init(void)
+{
+	static struct atm_proc_entry *e;
+	int ret;
+
+	atm_proc_root = proc_mkdir("net/atm",NULL);
+	if (!atm_proc_root)
+		goto err_out;
+	for (e = atm_proc_ents; e->name; e++) {
+		struct proc_dir_entry *dirent;
+
+		dirent = create_proc_entry(e->name, S_IRUGO, atm_proc_root);
+		if (!dirent)
+			goto err_out_remove;
+		dirent->proc_fops = e->proc_fops;
+		dirent->owner = THIS_MODULE;
+		e->dirent = dirent;
+	}
+	ret = 0;
+out:
+	return ret;
+
+err_out_remove:
+	atm_proc_dirs_remove();
+err_out:
+	ret = -ENOMEM;
+	goto out;
+}
+
+void __exit atm_proc_exit(void)
+{
+	atm_proc_dirs_remove();
+}
diff --git a/net/atm/protocols.h b/net/atm/protocols.h
new file mode 100644
index 00000000000..acdfc856222
--- /dev/null
+++ b/net/atm/protocols.h
@@ -0,0 +1,13 @@
+/* net/atm/protocols.h - ATM protocol handler entry points */
+
+/* Written 1995-1997 by Werner Almesberger, EPFL LRC */
+
+
+#ifndef NET_ATM_PROTOCOLS_H
+#define NET_ATM_PROTOCOLS_H
+
+int atm_init_aal0(struct atm_vcc *vcc);	/* "raw" AAL0 */
+int atm_init_aal34(struct atm_vcc *vcc);/* "raw" AAL3/4 transport */
+int atm_init_aal5(struct atm_vcc *vcc);	/* "raw" AAL5 transport */
+
+#endif
diff --git a/net/atm/pvc.c b/net/atm/pvc.c
new file mode 100644
index 00000000000..2684a92da22
--- /dev/null
+++ b/net/atm/pvc.c
@@ -0,0 +1,155 @@
+/* net/atm/pvc.c - ATM PVC sockets */
+
+/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
+
+
+#include <linux/config.h>
+#include <linux/net.h>		/* struct socket, struct proto_ops */
+#include <linux/atm.h>		/* ATM stuff */
+#include <linux/atmdev.h>	/* ATM devices */
+#include <linux/errno.h>	/* error codes */
+#include <linux/kernel.h>	/* printk */
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/bitops.h>
+#include <net/sock.h>		/* for sock_no_* */
+
+#include "resources.h"		/* devs and vccs */
+#include "common.h"		/* common for PVCs and SVCs */
+
+
+static int pvc_shutdown(struct socket *sock,int how)
+{
+	return 0;
+}
+
+
+static int pvc_bind(struct socket *sock,struct sockaddr *sockaddr,
+    int sockaddr_len)
+{
+	struct sock *sk = sock->sk;
+	struct sockaddr_atmpvc *addr;
+	struct atm_vcc *vcc;
+	int error;
+
+	if (sockaddr_len != sizeof(struct sockaddr_atmpvc)) return -EINVAL;
+	addr = (struct sockaddr_atmpvc *) sockaddr;
+	if (addr->sap_family != AF_ATMPVC) return -EAFNOSUPPORT;
+	lock_sock(sk);
+	vcc = ATM_SD(sock);
+	if (!test_bit(ATM_VF_HASQOS, &vcc->flags)) {
+		error = -EBADFD;
+		goto out;
+	}
+	if (test_bit(ATM_VF_PARTIAL,&vcc->flags)) {
+		if (vcc->vpi != ATM_VPI_UNSPEC) addr->sap_addr.vpi = vcc->vpi;
+		if (vcc->vci != ATM_VCI_UNSPEC) addr->sap_addr.vci = vcc->vci;
+	}
+	error = vcc_connect(sock, addr->sap_addr.itf, addr->sap_addr.vpi,
+			    addr->sap_addr.vci);
+out:
+	release_sock(sk);
+	return error;
+}
+
+
+static int pvc_connect(struct socket *sock,struct sockaddr *sockaddr,
+    int sockaddr_len,int flags)
+{
+	return pvc_bind(sock,sockaddr,sockaddr_len);
+}
+
+static int pvc_setsockopt(struct socket *sock, int level, int optname,
+			  char __user *optval, int optlen)
+{
+	struct sock *sk = sock->sk;
+	int error;
+
+	lock_sock(sk);
+	error = vcc_setsockopt(sock, level, optname, optval, optlen);
+	release_sock(sk);
+	return error;
+}
+
+
+static int pvc_getsockopt(struct socket *sock, int level, int optname,
+		          char __user *optval, int __user *optlen)
+{
+	struct sock *sk = sock->sk;
+	int error;
+
+	lock_sock(sk);
+	error = vcc_getsockopt(sock, level, optname, optval, optlen);
+	release_sock(sk);
+	return error;
+}
+
+
+static int pvc_getname(struct socket *sock,struct sockaddr *sockaddr,
+    int *sockaddr_len,int peer)
+{
+	struct sockaddr_atmpvc *addr;
+	struct atm_vcc *vcc = ATM_SD(sock);
+
+	if (!vcc->dev || !test_bit(ATM_VF_ADDR,&vcc->flags)) return -ENOTCONN;
+        *sockaddr_len = sizeof(struct sockaddr_atmpvc);
+	addr = (struct sockaddr_atmpvc *) sockaddr;
+	addr->sap_family = AF_ATMPVC;
+	addr->sap_addr.itf = vcc->dev->number;
+	addr->sap_addr.vpi = vcc->vpi;
+	addr->sap_addr.vci = vcc->vci;
+	return 0;
+}
+
+
+static struct proto_ops pvc_proto_ops = {
+	.family =	PF_ATMPVC,
+	.owner =	THIS_MODULE,
+
+	.release =	vcc_release,
+	.bind =		pvc_bind,
+	.connect =	pvc_connect,
+	.socketpair =	sock_no_socketpair,
+	.accept =	sock_no_accept,
+	.getname =	pvc_getname,
+	.poll =		vcc_poll,
+	.ioctl =	vcc_ioctl,
+	.listen =	sock_no_listen,
+	.shutdown =	pvc_shutdown,
+	.setsockopt =	pvc_setsockopt,
+	.getsockopt =	pvc_getsockopt,
+	.sendmsg =	vcc_sendmsg,
+	.recvmsg =	vcc_recvmsg,
+	.mmap =		sock_no_mmap,
+	.sendpage =	sock_no_sendpage,
+};
+
+
+static int pvc_create(struct socket *sock,int protocol)
+{
+	sock->ops = &pvc_proto_ops;
+	return vcc_create(sock, protocol, PF_ATMPVC);
+}
+
+
+static struct net_proto_family pvc_family_ops = {
+	.family = PF_ATMPVC,
+	.create = pvc_create,
+	.owner = THIS_MODULE,
+};
+
+
+/*
+ *	Initialize the ATM PVC protocol family
+ */
+
+
+int __init atmpvc_init(void)
+{
+	return sock_register(&pvc_family_ops);
+}
+
+void atmpvc_exit(void)
+{
+	sock_unregister(PF_ATMPVC);
+}
diff --git a/net/atm/raw.c b/net/atm/raw.c
new file mode 100644
index 00000000000..4a0466e91aa
--- /dev/null
+++ b/net/atm/raw.c
@@ -0,0 +1,98 @@
+/* net/atm/raw.c - Raw AAL0 and AAL5 transports */
+
+/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
+
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/atmdev.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/mm.h>
+
+#include "common.h"
+#include "protocols.h"
+
+
+#if 0
+#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
+#else
+#define DPRINTK(format,args...)
+#endif
+
+
+/*
+ * SKB == NULL indicates that the link is being closed
+ */
+
+static void atm_push_raw(struct atm_vcc *vcc,struct sk_buff *skb)
+{
+	if (skb) {
+		struct sock *sk = sk_atm(vcc);
+
+		skb_queue_tail(&sk->sk_receive_queue, skb);
+		sk->sk_data_ready(sk, skb->len);
+	}
+}
+
+
+static void atm_pop_raw(struct atm_vcc *vcc,struct sk_buff *skb)
+{
+	struct sock *sk = sk_atm(vcc);
+
+	DPRINTK("APopR (%d) %d -= %d\n", vcc->vci, sk->sk_wmem_alloc,
+		skb->truesize);
+	atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
+	dev_kfree_skb_any(skb);
+	sk->sk_write_space(sk);
+}
+
+
+static int atm_send_aal0(struct atm_vcc *vcc,struct sk_buff *skb)
+{
+	/*
+	 * Note that if vpi/vci are _ANY or _UNSPEC the below will
+	 * still work
+	 */
+	if (!capable(CAP_NET_ADMIN) &&
+            (((u32 *) skb->data)[0] & (ATM_HDR_VPI_MASK | ATM_HDR_VCI_MASK)) !=
+            ((vcc->vpi << ATM_HDR_VPI_SHIFT) | (vcc->vci << ATM_HDR_VCI_SHIFT)))
+	    {
+		kfree_skb(skb);
+		return -EADDRNOTAVAIL;
+        }
+	return vcc->dev->ops->send(vcc,skb);
+}
+
+
+int atm_init_aal0(struct atm_vcc *vcc)
+{
+	vcc->push = atm_push_raw;
+	vcc->pop = atm_pop_raw;
+	vcc->push_oam = NULL;
+	vcc->send = atm_send_aal0;
+	return 0;
+}
+
+
+int atm_init_aal34(struct atm_vcc *vcc)
+{
+	vcc->push = atm_push_raw;
+	vcc->pop = atm_pop_raw;
+	vcc->push_oam = NULL;
+	vcc->send = vcc->dev->ops->send;
+	return 0;
+}
+
+
+int atm_init_aal5(struct atm_vcc *vcc)
+{
+	vcc->push = atm_push_raw;
+	vcc->pop = atm_pop_raw;
+	vcc->push_oam = NULL;
+	vcc->send = vcc->dev->ops->send;
+	return 0;
+}
+
+
+EXPORT_SYMBOL(atm_init_aal5);
diff --git a/net/atm/resources.c b/net/atm/resources.c
new file mode 100644
index 00000000000..33f1685dbb7
--- /dev/null
+++ b/net/atm/resources.c
@@ -0,0 +1,432 @@
+/* net/atm/resources.c - Statically allocated resources */
+
+/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
+
+/* Fixes
+ * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ * 2002/01 - don't free the whole struct sock on sk->destruct time,
+ * 	     use the default destruct function initialized by sock_init_data */
+
+
+#include <linux/config.h>
+#include <linux/ctype.h>
+#include <linux/string.h>
+#include <linux/atmdev.h>
+#include <linux/sonet.h>
+#include <linux/kernel.h> /* for barrier */
+#include <linux/module.h>
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <net/sock.h>	 /* for struct sock */
+
+#include "common.h"
+#include "resources.h"
+#include "addr.h"
+
+
+LIST_HEAD(atm_devs);
+DEFINE_SPINLOCK(atm_dev_lock);
+
+static struct atm_dev *__alloc_atm_dev(const char *type)
+{
+	struct atm_dev *dev;
+
+	dev = kmalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		return NULL;
+	memset(dev, 0, sizeof(*dev));
+	dev->type = type;
+	dev->signal = ATM_PHY_SIG_UNKNOWN;
+	dev->link_rate = ATM_OC3_PCR;
+	spin_lock_init(&dev->lock);
+	INIT_LIST_HEAD(&dev->local);
+
+	return dev;
+}
+
+static void __free_atm_dev(struct atm_dev *dev)
+{
+	kfree(dev);
+}
+
+static struct atm_dev *__atm_dev_lookup(int number)
+{
+	struct atm_dev *dev;
+	struct list_head *p;
+
+	list_for_each(p, &atm_devs) {
+		dev = list_entry(p, struct atm_dev, dev_list);
+		if ((dev->ops) && (dev->number == number)) {
+			atm_dev_hold(dev);
+			return dev;
+		}
+	}
+	return NULL;
+}
+
+struct atm_dev *atm_dev_lookup(int number)
+{
+	struct atm_dev *dev;
+
+	spin_lock(&atm_dev_lock);
+	dev = __atm_dev_lookup(number);
+	spin_unlock(&atm_dev_lock);
+	return dev;
+}
+
+struct atm_dev *atm_dev_register(const char *type, const struct atmdev_ops *ops,
+				 int number, unsigned long *flags)
+{
+	struct atm_dev *dev, *inuse;
+
+	dev = __alloc_atm_dev(type);
+	if (!dev) {
+		printk(KERN_ERR "atm_dev_register: no space for dev %s\n",
+		    type);
+		return NULL;
+	}
+	spin_lock(&atm_dev_lock);
+	if (number != -1) {
+		if ((inuse = __atm_dev_lookup(number))) {
+			atm_dev_put(inuse);
+			spin_unlock(&atm_dev_lock);
+			__free_atm_dev(dev);
+			return NULL;
+		}
+		dev->number = number;
+	} else {
+		dev->number = 0;
+		while ((inuse = __atm_dev_lookup(dev->number))) {
+			atm_dev_put(inuse);
+			dev->number++;
+		}
+	}
+
+	dev->ops = ops;
+	if (flags)
+		dev->flags = *flags;
+	else
+		memset(&dev->flags, 0, sizeof(dev->flags));
+	memset(&dev->stats, 0, sizeof(dev->stats));
+	atomic_set(&dev->refcnt, 1);
+	list_add_tail(&dev->dev_list, &atm_devs);
+	spin_unlock(&atm_dev_lock);
+
+	if (atm_proc_dev_register(dev) < 0) {
+		printk(KERN_ERR "atm_dev_register: "
+		       "atm_proc_dev_register failed for dev %s\n",
+		       type);
+		spin_lock(&atm_dev_lock);
+		list_del(&dev->dev_list);
+		spin_unlock(&atm_dev_lock);
+		__free_atm_dev(dev);
+		return NULL;
+	}
+
+	return dev;
+}
+
+
+void atm_dev_deregister(struct atm_dev *dev)
+{
+	unsigned long warning_time;
+
+	atm_proc_dev_deregister(dev);
+
+	spin_lock(&atm_dev_lock);
+	list_del(&dev->dev_list);
+	spin_unlock(&atm_dev_lock);
+
+        warning_time = jiffies;
+        while (atomic_read(&dev->refcnt) != 1) {
+                msleep(250);
+                if ((jiffies - warning_time) > 10 * HZ) {
+                        printk(KERN_EMERG "atm_dev_deregister: waiting for "
+                               "dev %d to become free. Usage count = %d\n",
+                               dev->number, atomic_read(&dev->refcnt));
+                        warning_time = jiffies;
+                }
+        }
+
+	__free_atm_dev(dev);
+}
+
+void shutdown_atm_dev(struct atm_dev *dev)
+{
+	if (atomic_read(&dev->refcnt) > 1) {
+		set_bit(ATM_DF_CLOSE, &dev->flags);
+		return;
+	}
+	if (dev->ops->dev_close)
+		dev->ops->dev_close(dev);
+	atm_dev_deregister(dev);
+}
+
+
+static void copy_aal_stats(struct k_atm_aal_stats *from,
+    struct atm_aal_stats *to)
+{
+#define __HANDLE_ITEM(i) to->i = atomic_read(&from->i)
+	__AAL_STAT_ITEMS
+#undef __HANDLE_ITEM
+}
+
+
+static void subtract_aal_stats(struct k_atm_aal_stats *from,
+    struct atm_aal_stats *to)
+{
+#define __HANDLE_ITEM(i) atomic_sub(to->i, &from->i)
+	__AAL_STAT_ITEMS
+#undef __HANDLE_ITEM
+}
+
+
+static int fetch_stats(struct atm_dev *dev, struct atm_dev_stats __user *arg, int zero)
+{
+	struct atm_dev_stats tmp;
+	int error = 0;
+
+	copy_aal_stats(&dev->stats.aal0, &tmp.aal0);
+	copy_aal_stats(&dev->stats.aal34, &tmp.aal34);
+	copy_aal_stats(&dev->stats.aal5, &tmp.aal5);
+	if (arg)
+		error = copy_to_user(arg, &tmp, sizeof(tmp));
+	if (zero && !error) {
+		subtract_aal_stats(&dev->stats.aal0, &tmp.aal0);
+		subtract_aal_stats(&dev->stats.aal34, &tmp.aal34);
+		subtract_aal_stats(&dev->stats.aal5, &tmp.aal5);
+	}
+	return error ? -EFAULT : 0;
+}
+
+
+int atm_dev_ioctl(unsigned int cmd, void __user *arg)
+{
+	void __user *buf;
+	int error, len, number, size = 0;
+	struct atm_dev *dev;
+	struct list_head *p;
+	int *tmp_buf, *tmp_p;
+	struct atm_iobuf __user *iobuf = arg;
+	struct atmif_sioc __user *sioc = arg;
+	switch (cmd) {
+		case ATM_GETNAMES:
+			if (get_user(buf, &iobuf->buffer))
+				return -EFAULT;
+			if (get_user(len, &iobuf->length))
+				return -EFAULT;
+			spin_lock(&atm_dev_lock);
+			list_for_each(p, &atm_devs)
+				size += sizeof(int);
+			if (size > len) {
+				spin_unlock(&atm_dev_lock);
+				return -E2BIG;
+			}
+			tmp_buf = kmalloc(size, GFP_ATOMIC);
+			if (!tmp_buf) {
+				spin_unlock(&atm_dev_lock);
+				return -ENOMEM;
+			}
+			tmp_p = tmp_buf;
+			list_for_each(p, &atm_devs) {
+				dev = list_entry(p, struct atm_dev, dev_list);
+				*tmp_p++ = dev->number;
+			}
+			spin_unlock(&atm_dev_lock);
+		        error = ((copy_to_user(buf, tmp_buf, size)) ||
+					put_user(size, &iobuf->length))
+						? -EFAULT : 0;
+			kfree(tmp_buf);
+			return error;
+		default:
+			break;
+	}
+
+	if (get_user(buf, &sioc->arg))
+		return -EFAULT;
+	if (get_user(len, &sioc->length))
+		return -EFAULT;
+	if (get_user(number, &sioc->number))
+		return -EFAULT;
+
+	if (!(dev = atm_dev_lookup(number)))
+		return -ENODEV;
+	
+	switch (cmd) {
+		case ATM_GETTYPE:
+			size = strlen(dev->type) + 1;
+			if (copy_to_user(buf, dev->type, size)) {
+				error = -EFAULT;
+				goto done;
+			}
+			break;
+		case ATM_GETESI:
+			size = ESI_LEN;
+			if (copy_to_user(buf, dev->esi, size)) {
+				error = -EFAULT;
+				goto done;
+			}
+			break;
+		case ATM_SETESI:
+			{
+				int i;
+
+				for (i = 0; i < ESI_LEN; i++)
+					if (dev->esi[i]) {
+						error = -EEXIST;
+						goto done;
+					}
+			}
+			/* fall through */
+		case ATM_SETESIF:
+			{
+				unsigned char esi[ESI_LEN];
+
+				if (!capable(CAP_NET_ADMIN)) {
+					error = -EPERM;
+					goto done;
+				}
+				if (copy_from_user(esi, buf, ESI_LEN)) {
+					error = -EFAULT;
+					goto done;
+				}
+				memcpy(dev->esi, esi, ESI_LEN);
+				error =  ESI_LEN;
+				goto done;
+			}
+		case ATM_GETSTATZ:
+			if (!capable(CAP_NET_ADMIN)) {
+				error = -EPERM;
+				goto done;
+			}
+			/* fall through */
+		case ATM_GETSTAT:
+			size = sizeof(struct atm_dev_stats);
+			error = fetch_stats(dev, buf, cmd == ATM_GETSTATZ);
+			if (error)
+				goto done;
+			break;
+		case ATM_GETCIRANGE:
+			size = sizeof(struct atm_cirange);
+			if (copy_to_user(buf, &dev->ci_range, size)) {
+				error = -EFAULT;
+				goto done;
+			}
+			break;
+		case ATM_GETLINKRATE:
+			size = sizeof(int);
+			if (copy_to_user(buf, &dev->link_rate, size)) {
+				error = -EFAULT;
+				goto done;
+			}
+			break;
+		case ATM_RSTADDR:
+			if (!capable(CAP_NET_ADMIN)) {
+				error = -EPERM;
+				goto done;
+			}
+			atm_reset_addr(dev);
+			break;
+		case ATM_ADDADDR:
+		case ATM_DELADDR:
+			if (!capable(CAP_NET_ADMIN)) {
+				error = -EPERM;
+				goto done;
+			}
+			{
+				struct sockaddr_atmsvc addr;
+
+				if (copy_from_user(&addr, buf, sizeof(addr))) {
+					error = -EFAULT;
+					goto done;
+				}
+				if (cmd == ATM_ADDADDR)
+					error = atm_add_addr(dev, &addr);
+				else
+					error = atm_del_addr(dev, &addr);
+				goto done;
+			}
+		case ATM_GETADDR:
+			error = atm_get_addr(dev, buf, len);
+			if (error < 0)
+				goto done;
+			size = error;
+			/* may return 0, but later on size == 0 means "don't
+			   write the length" */
+			error = put_user(size, &sioc->length)
+				? -EFAULT : 0;
+			goto done;
+		case ATM_SETLOOP:
+			if (__ATM_LM_XTRMT((int) (unsigned long) buf) &&
+			    __ATM_LM_XTLOC((int) (unsigned long) buf) >
+			    __ATM_LM_XTRMT((int) (unsigned long) buf)) {
+				error = -EINVAL;
+				goto done;
+			}
+			/* fall through */
+		case ATM_SETCIRANGE:
+		case SONET_GETSTATZ:
+		case SONET_SETDIAG:
+		case SONET_CLRDIAG:
+		case SONET_SETFRAMING:
+			if (!capable(CAP_NET_ADMIN)) {
+				error = -EPERM;
+				goto done;
+			}
+			/* fall through */
+		default:
+			if (!dev->ops->ioctl) {
+				error = -EINVAL;
+				goto done;
+			}
+			size = dev->ops->ioctl(dev, cmd, buf);
+			if (size < 0) {
+				error = (size == -ENOIOCTLCMD ? -EINVAL : size);
+				goto done;
+			}
+	}
+	
+	if (size)
+		error = put_user(size, &sioc->length)
+			? -EFAULT : 0;
+	else
+		error = 0;
+done:
+	atm_dev_put(dev);
+	return error;
+}
+
+static __inline__ void *dev_get_idx(loff_t left)
+{
+	struct list_head *p;
+
+	list_for_each(p, &atm_devs) {
+		if (!--left)
+			break;
+	}
+	return (p != &atm_devs) ? p : NULL;
+}
+
+void *atm_dev_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ 	spin_lock(&atm_dev_lock);
+	return *pos ? dev_get_idx(*pos) : (void *) 1;
+}
+
+void atm_dev_seq_stop(struct seq_file *seq, void *v)
+{
+ 	spin_unlock(&atm_dev_lock);
+}
+ 
+void *atm_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	++*pos;
+	v = (v == (void *)1) ? atm_devs.next : ((struct list_head *)v)->next;
+	return (v == &atm_devs) ? NULL : v;
+}
+
+
+EXPORT_SYMBOL(atm_dev_register);
+EXPORT_SYMBOL(atm_dev_deregister);
+EXPORT_SYMBOL(atm_dev_lookup);
+EXPORT_SYMBOL(shutdown_atm_dev);
diff --git a/net/atm/resources.h b/net/atm/resources.h
new file mode 100644
index 00000000000..12910619dbb
--- /dev/null
+++ b/net/atm/resources.h
@@ -0,0 +1,46 @@
+/* net/atm/resources.h - ATM-related resources */
+
+/* Written 1995-1998 by Werner Almesberger, EPFL LRC/ICA */
+
+
+#ifndef NET_ATM_RESOURCES_H
+#define NET_ATM_RESOURCES_H
+
+#include <linux/config.h>
+#include <linux/atmdev.h>
+
+
+extern struct list_head atm_devs;
+extern spinlock_t atm_dev_lock;
+
+
+int atm_dev_ioctl(unsigned int cmd, void __user *arg);
+
+
+#ifdef CONFIG_PROC_FS
+
+#include <linux/proc_fs.h>
+
+void *atm_dev_seq_start(struct seq_file *seq, loff_t *pos);
+void atm_dev_seq_stop(struct seq_file *seq, void *v);
+void *atm_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos);
+
+
+int atm_proc_dev_register(struct atm_dev *dev);
+void atm_proc_dev_deregister(struct atm_dev *dev);
+
+#else
+
+static inline int atm_proc_dev_register(struct atm_dev *dev)
+{
+	return 0;
+}
+
+static inline void atm_proc_dev_deregister(struct atm_dev *dev)
+{
+	/* nothing */
+}
+
+#endif /* CONFIG_PROC_FS */
+
+#endif
diff --git a/net/atm/signaling.c b/net/atm/signaling.c
new file mode 100644
index 00000000000..6ff803154c0
--- /dev/null
+++ b/net/atm/signaling.c
@@ -0,0 +1,280 @@
+/* net/atm/signaling.c - ATM signaling */
+
+/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
+
+
+#include <linux/errno.h>	/* error codes */
+#include <linux/kernel.h>	/* printk */
+#include <linux/skbuff.h>
+#include <linux/wait.h>
+#include <linux/sched.h>	/* jiffies and HZ */
+#include <linux/atm.h>		/* ATM stuff */
+#include <linux/atmsap.h>
+#include <linux/atmsvc.h>
+#include <linux/atmdev.h>
+#include <linux/bitops.h>
+
+#include "resources.h"
+#include "signaling.h"
+
+
+#undef WAIT_FOR_DEMON		/* #define this if system calls on SVC sockets
+				   should block until the demon runs.
+				   Danger: may cause nasty hangs if the demon
+				   crashes. */
+
+#if 0
+#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
+#else
+#define DPRINTK(format,args...)
+#endif
+
+
+struct atm_vcc *sigd = NULL;
+#ifdef WAIT_FOR_DEMON
+static DECLARE_WAIT_QUEUE_HEAD(sigd_sleep);
+#endif
+
+
+static void sigd_put_skb(struct sk_buff *skb)
+{
+#ifdef WAIT_FOR_DEMON
+	static unsigned long silence;
+	DECLARE_WAITQUEUE(wait,current);
+
+	add_wait_queue(&sigd_sleep,&wait);
+	while (!sigd) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		if (time_after(jiffies, silence) || silence == 0) {
+			printk(KERN_INFO "atmsvc: waiting for signaling demon "
+			    "...\n");
+			silence = (jiffies+30*HZ)|1;
+		}
+		schedule();
+	}
+	current->state = TASK_RUNNING;
+	remove_wait_queue(&sigd_sleep,&wait);
+#else
+	if (!sigd) {
+		printk(KERN_WARNING "atmsvc: no signaling demon\n");
+		kfree_skb(skb);
+		return;
+	}
+#endif
+	atm_force_charge(sigd,skb->truesize);
+	skb_queue_tail(&sk_atm(sigd)->sk_receive_queue,skb);
+	sk_atm(sigd)->sk_data_ready(sk_atm(sigd), skb->len);
+}
+
+
+static void modify_qos(struct atm_vcc *vcc,struct atmsvc_msg *msg)
+{
+	struct sk_buff *skb;
+
+	if (test_bit(ATM_VF_RELEASED,&vcc->flags) ||
+	    !test_bit(ATM_VF_READY,&vcc->flags))
+		return;
+	msg->type = as_error;
+	if (!vcc->dev->ops->change_qos) msg->reply = -EOPNOTSUPP;
+	else {
+		/* should lock VCC */
+		msg->reply = vcc->dev->ops->change_qos(vcc,&msg->qos,
+		    msg->reply);
+		if (!msg->reply) msg->type = as_okay;
+	}
+	/*
+	 * Should probably just turn around the old skb. But the, the buffer
+	 * space accounting needs to follow the change too. Maybe later.
+	 */
+	while (!(skb = alloc_skb(sizeof(struct atmsvc_msg),GFP_KERNEL)))
+		schedule();
+	*(struct atmsvc_msg *) skb_put(skb,sizeof(struct atmsvc_msg)) = *msg;
+	sigd_put_skb(skb);
+}
+
+
+static int sigd_send(struct atm_vcc *vcc,struct sk_buff *skb)
+{
+	struct atmsvc_msg *msg;
+	struct atm_vcc *session_vcc;
+	struct sock *sk;
+
+	msg = (struct atmsvc_msg *) skb->data;
+	atomic_sub(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
+	DPRINTK("sigd_send %d (0x%lx)\n",(int) msg->type,
+	  (unsigned long) msg->vcc);
+	vcc = *(struct atm_vcc **) &msg->vcc;
+	sk = sk_atm(vcc);
+
+	switch (msg->type) {
+		case as_okay:
+			sk->sk_err = -msg->reply;
+			clear_bit(ATM_VF_WAITING, &vcc->flags);
+			if (!*vcc->local.sas_addr.prv &&
+			    !*vcc->local.sas_addr.pub) {
+				vcc->local.sas_family = AF_ATMSVC;
+				memcpy(vcc->local.sas_addr.prv,
+				    msg->local.sas_addr.prv,ATM_ESA_LEN);
+				memcpy(vcc->local.sas_addr.pub,
+				    msg->local.sas_addr.pub,ATM_E164_LEN+1);
+			}
+			session_vcc = vcc->session ? vcc->session : vcc;
+			if (session_vcc->vpi || session_vcc->vci) break;
+			session_vcc->itf = msg->pvc.sap_addr.itf;
+			session_vcc->vpi = msg->pvc.sap_addr.vpi;
+			session_vcc->vci = msg->pvc.sap_addr.vci;
+			if (session_vcc->vpi || session_vcc->vci)
+				session_vcc->qos = msg->qos;
+			break;
+		case as_error:
+			clear_bit(ATM_VF_REGIS,&vcc->flags);
+			clear_bit(ATM_VF_READY,&vcc->flags);
+			sk->sk_err = -msg->reply;
+			clear_bit(ATM_VF_WAITING, &vcc->flags);
+			break;
+		case as_indicate:
+			vcc = *(struct atm_vcc **) &msg->listen_vcc;
+			DPRINTK("as_indicate!!!\n");
+			lock_sock(sk);
+			if (sk_acceptq_is_full(sk)) {
+				sigd_enq(NULL,as_reject,vcc,NULL,NULL);
+				dev_kfree_skb(skb);
+				goto as_indicate_complete;
+			}
+			sk->sk_ack_backlog++;
+			skb_queue_tail(&sk->sk_receive_queue, skb);
+			DPRINTK("waking sk->sk_sleep 0x%p\n", sk->sk_sleep);
+			sk->sk_state_change(sk);
+as_indicate_complete:
+			release_sock(sk);
+			return 0;
+		case as_close:
+			set_bit(ATM_VF_RELEASED,&vcc->flags);
+			vcc_release_async(vcc, msg->reply);
+			goto out;
+		case as_modify:
+			modify_qos(vcc,msg);
+			break;
+		case as_addparty:
+		case as_dropparty:
+			sk->sk_err_soft = msg->reply;	/* < 0 failure, otherwise ep_ref */
+			clear_bit(ATM_VF_WAITING, &vcc->flags);
+			break;
+		default:
+			printk(KERN_ALERT "sigd_send: bad message type %d\n",
+			    (int) msg->type);
+			return -EINVAL;
+	}
+	sk->sk_state_change(sk);
+out:
+	dev_kfree_skb(skb);
+	return 0;
+}
+
+
+void sigd_enq2(struct atm_vcc *vcc,enum atmsvc_msg_type type,
+    struct atm_vcc *listen_vcc,const struct sockaddr_atmpvc *pvc,
+    const struct sockaddr_atmsvc *svc,const struct atm_qos *qos,int reply)
+{
+	struct sk_buff *skb;
+	struct atmsvc_msg *msg;
+	static unsigned session = 0;
+
+	DPRINTK("sigd_enq %d (0x%p)\n",(int) type,vcc);
+	while (!(skb = alloc_skb(sizeof(struct atmsvc_msg),GFP_KERNEL)))
+		schedule();
+	msg = (struct atmsvc_msg *) skb_put(skb,sizeof(struct atmsvc_msg));
+	memset(msg,0,sizeof(*msg));
+	msg->type = type;
+	*(struct atm_vcc **) &msg->vcc = vcc;
+	*(struct atm_vcc **) &msg->listen_vcc = listen_vcc;
+	msg->reply = reply;
+	if (qos) msg->qos = *qos;
+	if (vcc) msg->sap = vcc->sap;
+	if (svc) msg->svc = *svc;
+	if (vcc) msg->local = vcc->local;
+	if (pvc) msg->pvc = *pvc;
+	if (vcc) {
+		if (type == as_connect && test_bit(ATM_VF_SESSION, &vcc->flags))
+			msg->session = ++session;
+			/* every new pmp connect gets the next session number */
+	}
+	sigd_put_skb(skb);
+	if (vcc) set_bit(ATM_VF_REGIS,&vcc->flags);
+}
+
+
+void sigd_enq(struct atm_vcc *vcc,enum atmsvc_msg_type type,
+    struct atm_vcc *listen_vcc,const struct sockaddr_atmpvc *pvc,
+    const struct sockaddr_atmsvc *svc)
+{
+	sigd_enq2(vcc,type,listen_vcc,pvc,svc,vcc ? &vcc->qos : NULL,0);
+	/* other ISP applications may use "reply" */
+}
+
+
+static void purge_vcc(struct atm_vcc *vcc)
+{
+	if (sk_atm(vcc)->sk_family == PF_ATMSVC &&
+	    !test_bit(ATM_VF_META,&vcc->flags)) {
+		set_bit(ATM_VF_RELEASED,&vcc->flags);
+		vcc_release_async(vcc, -EUNATCH);
+	}
+}
+
+
+static void sigd_close(struct atm_vcc *vcc)
+{
+	struct hlist_node *node;
+	struct sock *s;
+	int i;
+
+	DPRINTK("sigd_close\n");
+	sigd = NULL;
+	if (skb_peek(&sk_atm(vcc)->sk_receive_queue))
+		printk(KERN_ERR "sigd_close: closing with requests pending\n");
+	skb_queue_purge(&sk_atm(vcc)->sk_receive_queue);
+
+	read_lock(&vcc_sklist_lock);
+	for(i = 0; i < VCC_HTABLE_SIZE; ++i) {
+		struct hlist_head *head = &vcc_hash[i];
+
+		sk_for_each(s, node, head) {
+			struct atm_vcc *vcc = atm_sk(s);
+
+			if (vcc->dev)
+				purge_vcc(vcc);
+		}
+	}
+	read_unlock(&vcc_sklist_lock);
+}
+
+
+static struct atmdev_ops sigd_dev_ops = {
+	.close = sigd_close,
+	.send =	sigd_send
+};
+
+
+static struct atm_dev sigd_dev = {
+	.ops =		&sigd_dev_ops,
+	.type =		"sig",
+	.number =	999,
+	.lock =		SPIN_LOCK_UNLOCKED
+};
+
+
+int sigd_attach(struct atm_vcc *vcc)
+{
+	if (sigd) return -EADDRINUSE;
+	DPRINTK("sigd_attach\n");
+	sigd = vcc;
+	vcc->dev = &sigd_dev;
+	vcc_insert_socket(sk_atm(vcc));
+	set_bit(ATM_VF_META,&vcc->flags);
+	set_bit(ATM_VF_READY,&vcc->flags);
+#ifdef WAIT_FOR_DEMON
+	wake_up(&sigd_sleep);
+#endif
+	return 0;
+}
diff --git a/net/atm/signaling.h b/net/atm/signaling.h
new file mode 100644
index 00000000000..434ead45571
--- /dev/null
+++ b/net/atm/signaling.h
@@ -0,0 +1,30 @@
+/* net/atm/signaling.h - ATM signaling */
+ 
+/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
+ 
+
+#ifndef NET_ATM_SIGNALING_H
+#define NET_ATM_SIGNALING_H
+
+#include <linux/atm.h>
+#include <linux/atmdev.h>
+#include <linux/atmsvc.h>
+
+
+extern struct atm_vcc *sigd; /* needed in svc_release */
+
+
+/*
+ * sigd_enq is a wrapper for sigd_enq2, covering the more common cases, and
+ * avoiding huge lists of null values.
+ */
+
+void sigd_enq2(struct atm_vcc *vcc,enum atmsvc_msg_type type,
+    struct atm_vcc *listen_vcc,const struct sockaddr_atmpvc *pvc,
+    const struct sockaddr_atmsvc *svc,const struct atm_qos *qos,int reply);
+void sigd_enq(struct atm_vcc *vcc,enum atmsvc_msg_type type,
+    struct atm_vcc *listen_vcc,const struct sockaddr_atmpvc *pvc,
+    const struct sockaddr_atmsvc *svc);
+int sigd_attach(struct atm_vcc *vcc);
+
+#endif
diff --git a/net/atm/svc.c b/net/atm/svc.c
new file mode 100644
index 00000000000..02f5374a51f
--- /dev/null
+++ b/net/atm/svc.c
@@ -0,0 +1,674 @@
+/* net/atm/svc.c - ATM SVC sockets */
+
+/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
+
+
+#include <linux/string.h>
+#include <linux/net.h>		/* struct socket, struct proto_ops */
+#include <linux/errno.h>	/* error codes */
+#include <linux/kernel.h>	/* printk */
+#include <linux/skbuff.h>
+#include <linux/wait.h>
+#include <linux/sched.h>	/* jiffies and HZ */
+#include <linux/fcntl.h>	/* O_NONBLOCK */
+#include <linux/init.h>
+#include <linux/atm.h>		/* ATM stuff */
+#include <linux/atmsap.h>
+#include <linux/atmsvc.h>
+#include <linux/atmdev.h>
+#include <linux/bitops.h>
+#include <net/sock.h>		/* for sock_no_* */
+#include <asm/uaccess.h>
+
+#include "resources.h"
+#include "common.h"		/* common for PVCs and SVCs */
+#include "signaling.h"
+#include "addr.h"
+
+
+#if 0
+#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
+#else
+#define DPRINTK(format,args...)
+#endif
+
+
+static int svc_create(struct socket *sock,int protocol);
+
+
+/*
+ * Note: since all this is still nicely synchronized with the signaling demon,
+ *       there's no need to protect sleep loops with clis. If signaling is
+ *       moved into the kernel, that would change.
+ */
+
+
+static int svc_shutdown(struct socket *sock,int how)
+{
+	return 0;
+}
+
+
+static void svc_disconnect(struct atm_vcc *vcc)
+{
+	DEFINE_WAIT(wait);
+	struct sk_buff *skb;
+	struct sock *sk = sk_atm(vcc);
+
+	DPRINTK("svc_disconnect %p\n",vcc);
+	if (test_bit(ATM_VF_REGIS,&vcc->flags)) {
+		prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE);
+		sigd_enq(vcc,as_close,NULL,NULL,NULL);
+		while (!test_bit(ATM_VF_RELEASED,&vcc->flags) && sigd) {
+			schedule();
+			prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE);
+		}
+		finish_wait(sk->sk_sleep, &wait);
+	}
+	/* beware - socket is still in use by atmsigd until the last
+	   as_indicate has been answered */
+	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+		atm_return(vcc, skb->truesize);
+		DPRINTK("LISTEN REL\n");
+		sigd_enq2(NULL,as_reject,vcc,NULL,NULL,&vcc->qos,0);
+		dev_kfree_skb(skb);
+	}
+	clear_bit(ATM_VF_REGIS, &vcc->flags);
+	/* ... may retry later */
+}
+
+
+static int svc_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	struct atm_vcc *vcc;
+
+	if (sk)  {
+		vcc = ATM_SD(sock);
+		DPRINTK("svc_release %p\n", vcc);
+		clear_bit(ATM_VF_READY, &vcc->flags);
+		/* VCC pointer is used as a reference, so we must not free it
+		   (thereby subjecting it to re-use) before all pending connections
+	           are closed */
+		svc_disconnect(vcc);
+		vcc_release(sock);
+	}
+	return 0;
+}
+
+
+static int svc_bind(struct socket *sock,struct sockaddr *sockaddr,
+    int sockaddr_len)
+{
+	DEFINE_WAIT(wait);
+	struct sock *sk = sock->sk;
+	struct sockaddr_atmsvc *addr;
+	struct atm_vcc *vcc;
+	int error;
+
+	if (sockaddr_len != sizeof(struct sockaddr_atmsvc))
+		return -EINVAL;
+	lock_sock(sk);
+	if (sock->state == SS_CONNECTED) {
+		error = -EISCONN;
+		goto out;
+	}
+	if (sock->state != SS_UNCONNECTED) {
+		error = -EINVAL;
+		goto out;
+	}
+	vcc = ATM_SD(sock);
+	if (test_bit(ATM_VF_SESSION, &vcc->flags)) {
+		error = -EINVAL;
+		goto out;
+	}
+	addr = (struct sockaddr_atmsvc *) sockaddr;
+	if (addr->sas_family != AF_ATMSVC) {
+		error = -EAFNOSUPPORT;
+		goto out;
+	}
+	clear_bit(ATM_VF_BOUND,&vcc->flags);
+	    /* failing rebind will kill old binding */
+	/* @@@ check memory (de)allocation on rebind */
+	if (!test_bit(ATM_VF_HASQOS,&vcc->flags)) {
+		error = -EBADFD;
+		goto out;
+	}
+	vcc->local = *addr;
+	set_bit(ATM_VF_WAITING, &vcc->flags);
+	prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE);
+	sigd_enq(vcc,as_bind,NULL,NULL,&vcc->local);
+	while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
+		schedule();
+		prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE);
+	}
+	finish_wait(sk->sk_sleep, &wait);
+	clear_bit(ATM_VF_REGIS,&vcc->flags); /* doesn't count */
+	if (!sigd) {
+		error = -EUNATCH;
+		goto out;
+	}
+        if (!sk->sk_err)
+		set_bit(ATM_VF_BOUND,&vcc->flags);
+	error = -sk->sk_err;
+out:
+	release_sock(sk);
+	return error;
+}
+
+
+static int svc_connect(struct socket *sock,struct sockaddr *sockaddr,
+    int sockaddr_len,int flags)
+{
+	DEFINE_WAIT(wait);
+	struct sock *sk = sock->sk;
+	struct sockaddr_atmsvc *addr;
+	struct atm_vcc *vcc = ATM_SD(sock);
+	int error;
+
+	DPRINTK("svc_connect %p\n",vcc);
+	lock_sock(sk);
+	if (sockaddr_len != sizeof(struct sockaddr_atmsvc)) {
+		error = -EINVAL;
+		goto out;
+	}
+
+	switch (sock->state) {
+	default:
+		error = -EINVAL;
+		goto out;
+	case SS_CONNECTED:
+		error = -EISCONN;
+		goto out;
+	case SS_CONNECTING:
+		if (test_bit(ATM_VF_WAITING, &vcc->flags)) {
+			error = -EALREADY;
+			goto out;
+		}
+		sock->state = SS_UNCONNECTED;
+		if (sk->sk_err) {
+			error = -sk->sk_err;
+			goto out;
+		}
+		break;
+	case SS_UNCONNECTED:
+		addr = (struct sockaddr_atmsvc *) sockaddr;
+		if (addr->sas_family != AF_ATMSVC) {
+			error = -EAFNOSUPPORT;
+			goto out;
+		}
+		if (!test_bit(ATM_VF_HASQOS, &vcc->flags)) {
+			error = -EBADFD;
+			goto out;
+		}
+		if (vcc->qos.txtp.traffic_class == ATM_ANYCLASS ||
+		    vcc->qos.rxtp.traffic_class == ATM_ANYCLASS) {
+			error = -EINVAL;
+			goto out;
+		}
+		if (!vcc->qos.txtp.traffic_class &&
+		    !vcc->qos.rxtp.traffic_class) {
+			error = -EINVAL;
+			goto out;
+		}
+		vcc->remote = *addr;
+		set_bit(ATM_VF_WAITING, &vcc->flags);
+		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		sigd_enq(vcc,as_connect,NULL,NULL,&vcc->remote);
+		if (flags & O_NONBLOCK) {
+			finish_wait(sk->sk_sleep, &wait);
+			sock->state = SS_CONNECTING;
+			error = -EINPROGRESS;
+			goto out;
+		}
+		error = 0;
+		while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
+			schedule();
+			if (!signal_pending(current)) {
+				prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+				continue;
+			}
+			DPRINTK("*ABORT*\n");
+			/*
+			 * This is tricky:
+			 *   Kernel ---close--> Demon
+			 *   Kernel <--close--- Demon
+		         * or
+			 *   Kernel ---close--> Demon
+			 *   Kernel <--error--- Demon
+			 * or
+			 *   Kernel ---close--> Demon
+			 *   Kernel <--okay---- Demon
+			 *   Kernel <--close--- Demon
+			 */
+			sigd_enq(vcc,as_close,NULL,NULL,NULL);
+			while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
+				prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+				schedule();
+			}
+			if (!sk->sk_err)
+				while (!test_bit(ATM_VF_RELEASED,&vcc->flags)
+				    && sigd) {
+					prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+					schedule();
+				}
+			clear_bit(ATM_VF_REGIS,&vcc->flags);
+			clear_bit(ATM_VF_RELEASED,&vcc->flags);
+			clear_bit(ATM_VF_CLOSE,&vcc->flags);
+			    /* we're gone now but may connect later */
+			error = -EINTR;
+			break;
+		}
+		finish_wait(sk->sk_sleep, &wait);
+		if (error)
+			goto out;
+		if (!sigd) {
+			error = -EUNATCH;
+			goto out;
+		}
+		if (sk->sk_err) {
+			error = -sk->sk_err;
+			goto out;
+		}
+	}
+/*
+ * Not supported yet
+ *
+ * #ifndef CONFIG_SINGLE_SIGITF
+ */
+	vcc->qos.txtp.max_pcr = SELECT_TOP_PCR(vcc->qos.txtp);
+	vcc->qos.txtp.pcr = 0;
+	vcc->qos.txtp.min_pcr = 0;
+/*
+ * #endif
+ */
+	if (!(error = vcc_connect(sock, vcc->itf, vcc->vpi, vcc->vci)))
+		sock->state = SS_CONNECTED;
+	else
+		(void) svc_disconnect(vcc);
+out:
+	release_sock(sk);
+	return error;
+}
+
+
+static int svc_listen(struct socket *sock,int backlog)
+{
+	DEFINE_WAIT(wait);
+	struct sock *sk = sock->sk;
+	struct atm_vcc *vcc = ATM_SD(sock);
+	int error;
+
+	DPRINTK("svc_listen %p\n",vcc);
+	lock_sock(sk);
+	/* let server handle listen on unbound sockets */
+	if (test_bit(ATM_VF_SESSION,&vcc->flags)) {
+		error = -EINVAL;
+		goto out;
+	}
+	set_bit(ATM_VF_WAITING, &vcc->flags);
+	prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE);
+	sigd_enq(vcc,as_listen,NULL,NULL,&vcc->local);
+	while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
+		schedule();
+		prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE);
+	}
+	finish_wait(sk->sk_sleep, &wait);
+	if (!sigd) {
+		error = -EUNATCH;
+		goto out;
+	}
+	set_bit(ATM_VF_LISTEN,&vcc->flags);
+	sk->sk_max_ack_backlog = backlog > 0 ? backlog : ATM_BACKLOG_DEFAULT;
+	error = -sk->sk_err;
+out:
+	release_sock(sk);
+	return error;
+}
+
+
+static int svc_accept(struct socket *sock,struct socket *newsock,int flags)
+{
+	struct sock *sk = sock->sk;
+	struct sk_buff *skb;
+	struct atmsvc_msg *msg;
+	struct atm_vcc *old_vcc = ATM_SD(sock);
+	struct atm_vcc *new_vcc;
+	int error;
+
+	lock_sock(sk);
+
+	error = svc_create(newsock,0);
+	if (error)
+		goto out;
+
+	new_vcc = ATM_SD(newsock);
+
+	DPRINTK("svc_accept %p -> %p\n",old_vcc,new_vcc);
+	while (1) {
+		DEFINE_WAIT(wait);
+
+		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		while (!(skb = skb_dequeue(&sk->sk_receive_queue)) &&
+		       sigd) {
+			if (test_bit(ATM_VF_RELEASED,&old_vcc->flags)) break;
+			if (test_bit(ATM_VF_CLOSE,&old_vcc->flags)) {
+				error = -sk->sk_err;
+				break;
+			}
+			if (flags & O_NONBLOCK) {
+				error = -EAGAIN;
+				break;
+			}
+			release_sock(sk);
+			schedule();
+			lock_sock(sk);
+			if (signal_pending(current)) {
+				error = -ERESTARTSYS;
+				break;
+			}
+			prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		}
+		finish_wait(sk->sk_sleep, &wait);
+		if (error)
+			goto out;
+		if (!skb) {
+			error = -EUNATCH;
+			goto out;
+		}
+		msg = (struct atmsvc_msg *) skb->data;
+		new_vcc->qos = msg->qos;
+		set_bit(ATM_VF_HASQOS,&new_vcc->flags);
+		new_vcc->remote = msg->svc;
+		new_vcc->local = msg->local;
+		new_vcc->sap = msg->sap;
+		error = vcc_connect(newsock, msg->pvc.sap_addr.itf,
+				    msg->pvc.sap_addr.vpi, msg->pvc.sap_addr.vci);
+		dev_kfree_skb(skb);
+		sk->sk_ack_backlog--;
+		if (error) {
+			sigd_enq2(NULL,as_reject,old_vcc,NULL,NULL,
+			    &old_vcc->qos,error);
+			error = error == -EAGAIN ? -EBUSY : error;
+			goto out;
+		}
+		/* wait should be short, so we ignore the non-blocking flag */
+		set_bit(ATM_VF_WAITING, &new_vcc->flags);
+		prepare_to_wait(sk_atm(new_vcc)->sk_sleep, &wait, TASK_UNINTERRUPTIBLE);
+		sigd_enq(new_vcc,as_accept,old_vcc,NULL,NULL);
+		while (test_bit(ATM_VF_WAITING, &new_vcc->flags) && sigd) {
+			release_sock(sk);
+			schedule();
+			lock_sock(sk);
+			prepare_to_wait(sk_atm(new_vcc)->sk_sleep, &wait, TASK_UNINTERRUPTIBLE);
+		}
+		finish_wait(sk_atm(new_vcc)->sk_sleep, &wait);
+		if (!sigd) {
+			error = -EUNATCH;
+			goto out;
+		}
+		if (!sk_atm(new_vcc)->sk_err)
+			break;
+		if (sk_atm(new_vcc)->sk_err != ERESTARTSYS) {
+			error = -sk_atm(new_vcc)->sk_err;
+			goto out;
+		}
+	}
+	newsock->state = SS_CONNECTED;
+out:
+	release_sock(sk);
+	return error;
+}
+
+
+static int svc_getname(struct socket *sock,struct sockaddr *sockaddr,
+    int *sockaddr_len,int peer)
+{
+	struct sockaddr_atmsvc *addr;
+
+	*sockaddr_len = sizeof(struct sockaddr_atmsvc);
+	addr = (struct sockaddr_atmsvc *) sockaddr;
+	memcpy(addr,peer ? &ATM_SD(sock)->remote : &ATM_SD(sock)->local,
+	    sizeof(struct sockaddr_atmsvc));
+	return 0;
+}
+
+
+int svc_change_qos(struct atm_vcc *vcc,struct atm_qos *qos)
+{
+	struct sock *sk = sk_atm(vcc);
+	DEFINE_WAIT(wait);
+
+	set_bit(ATM_VF_WAITING, &vcc->flags);
+	prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE);
+	sigd_enq2(vcc,as_modify,NULL,NULL,&vcc->local,qos,0);
+	while (test_bit(ATM_VF_WAITING, &vcc->flags) &&
+	       !test_bit(ATM_VF_RELEASED, &vcc->flags) && sigd) {
+		schedule();
+		prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE);
+	}
+	finish_wait(sk->sk_sleep, &wait);
+	if (!sigd) return -EUNATCH;
+	return -sk->sk_err;
+}
+
+
+static int svc_setsockopt(struct socket *sock, int level, int optname,
+			  char __user *optval, int optlen)
+{
+	struct sock *sk = sock->sk;
+	struct atm_vcc *vcc = ATM_SD(sock);
+	int value, error = 0;
+
+	lock_sock(sk);
+	switch (optname) {
+		case SO_ATMSAP:
+			if (level != SOL_ATM || optlen != sizeof(struct atm_sap)) {
+				error = -EINVAL;
+				goto out;
+			}
+			if (copy_from_user(&vcc->sap, optval, optlen)) {
+				error = -EFAULT;
+				goto out;
+			}
+			set_bit(ATM_VF_HASSAP, &vcc->flags);
+			break;
+ 		case SO_MULTIPOINT:
+			if (level != SOL_ATM || optlen != sizeof(int)) {
+				error = -EINVAL;
+				goto out;
+			}
+ 			if (get_user(value, (int __user *) optval)) {
+ 				error = -EFAULT;
+				goto out;
+			}
+			if (value == 1) {
+				set_bit(ATM_VF_SESSION, &vcc->flags);
+			} else if (value == 0) {
+				clear_bit(ATM_VF_SESSION, &vcc->flags);
+			} else {
+				error = -EINVAL;
+			}
+  			break;
+		default:
+			error = vcc_setsockopt(sock, level, optname,
+					       optval, optlen);
+	}
+
+out:
+	release_sock(sk);
+	return error;
+}
+
+
+static int svc_getsockopt(struct socket *sock,int level,int optname,
+    char __user *optval,int __user *optlen)
+{
+	struct sock *sk = sock->sk;
+	int error = 0, len;
+
+	lock_sock(sk);
+	if (!__SO_LEVEL_MATCH(optname, level) || optname != SO_ATMSAP) {
+		error = vcc_getsockopt(sock, level, optname, optval, optlen);
+		goto out;
+	}
+	if (get_user(len, optlen)) {
+		error = -EFAULT;
+		goto out;
+	}
+	if (len != sizeof(struct atm_sap)) {
+		error = -EINVAL;
+		goto out;
+	}
+	if (copy_to_user(optval, &ATM_SD(sock)->sap, sizeof(struct atm_sap))) {
+		error = -EFAULT;
+		goto out;
+	}
+out:
+	release_sock(sk);
+	return error;
+}
+
+
+static int svc_addparty(struct socket *sock, struct sockaddr *sockaddr,
+			int sockaddr_len, int flags)
+{
+	DEFINE_WAIT(wait);
+	struct sock *sk = sock->sk;
+	struct atm_vcc *vcc = ATM_SD(sock);
+	int error;
+
+	lock_sock(sk);
+	set_bit(ATM_VF_WAITING, &vcc->flags);
+	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	sigd_enq(vcc, as_addparty, NULL, NULL,
+	         (struct sockaddr_atmsvc *) sockaddr);
+	if (flags & O_NONBLOCK) {
+		finish_wait(sk->sk_sleep, &wait);
+		error = -EINPROGRESS;
+		goto out;
+	}
+	DPRINTK("svc_addparty added wait queue\n");
+	while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
+		schedule();
+		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	}
+	finish_wait(sk->sk_sleep, &wait);
+	error = xchg(&sk->sk_err_soft, 0);
+out:
+	release_sock(sk);
+	return error;
+}
+
+
+static int svc_dropparty(struct socket *sock, int ep_ref)
+{
+	DEFINE_WAIT(wait);
+	struct sock *sk = sock->sk;
+	struct atm_vcc *vcc = ATM_SD(sock);
+	int error;
+
+	lock_sock(sk);
+	set_bit(ATM_VF_WAITING, &vcc->flags);
+	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	sigd_enq2(vcc, as_dropparty, NULL, NULL, NULL, NULL, ep_ref);
+	while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
+		schedule();
+		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	}
+	finish_wait(sk->sk_sleep, &wait);
+	if (!sigd) {
+		error = -EUNATCH;
+		goto out;
+	}
+	error = xchg(&sk->sk_err_soft, 0);
+out:
+	release_sock(sk);
+	return error;
+}
+
+
+static int svc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+ 	int error, ep_ref;
+ 	struct sockaddr_atmsvc sa;
+	struct atm_vcc *vcc = ATM_SD(sock);
+  
+	switch (cmd) {
+ 		case ATM_ADDPARTY:
+ 			if (!test_bit(ATM_VF_SESSION, &vcc->flags))
+ 				return -EINVAL;
+ 			if (copy_from_user(&sa, (void __user *) arg, sizeof(sa)))
+				return -EFAULT;
+ 			error = svc_addparty(sock, (struct sockaddr *) &sa, sizeof(sa), 0);
+ 			break;
+ 		case ATM_DROPPARTY:
+ 			if (!test_bit(ATM_VF_SESSION, &vcc->flags))
+ 				return -EINVAL;
+ 			if (copy_from_user(&ep_ref, (void __user *) arg, sizeof(int)))
+				return -EFAULT;
+ 			error = svc_dropparty(sock, ep_ref);
+ 			break;
+  		default:
+			error = vcc_ioctl(sock, cmd, arg);
+	}
+
+	return error;
+}
+
+static struct proto_ops svc_proto_ops = {
+	.family =	PF_ATMSVC,
+	.owner =	THIS_MODULE,
+
+	.release =	svc_release,
+	.bind =		svc_bind,
+	.connect =	svc_connect,
+	.socketpair =	sock_no_socketpair,
+	.accept =	svc_accept,
+	.getname =	svc_getname,
+	.poll =		vcc_poll,
+	.ioctl =	svc_ioctl,
+	.listen =	svc_listen,
+	.shutdown =	svc_shutdown,
+	.setsockopt =	svc_setsockopt,
+	.getsockopt =	svc_getsockopt,
+	.sendmsg =	vcc_sendmsg,
+	.recvmsg =	vcc_recvmsg,
+	.mmap =		sock_no_mmap,
+	.sendpage =	sock_no_sendpage,
+};
+
+
+static int svc_create(struct socket *sock,int protocol)
+{
+	int error;
+
+	sock->ops = &svc_proto_ops;
+	error = vcc_create(sock, protocol, AF_ATMSVC);
+	if (error) return error;
+	ATM_SD(sock)->local.sas_family = AF_ATMSVC;
+	ATM_SD(sock)->remote.sas_family = AF_ATMSVC;
+	return 0;
+}
+
+
+static struct net_proto_family svc_family_ops = {
+	.family = PF_ATMSVC,
+	.create = svc_create,
+	.owner = THIS_MODULE,
+};
+
+
+/*
+ *	Initialize the ATM SVC protocol family
+ */
+
+int __init atmsvc_init(void)
+{
+	return sock_register(&svc_family_ops);
+}
+
+void atmsvc_exit(void)
+{
+	sock_unregister(PF_ATMSVC);
+}
diff --git a/net/ax25/Kconfig b/net/ax25/Kconfig
new file mode 100644
index 00000000000..a8993a04172
--- /dev/null
+++ b/net/ax25/Kconfig
@@ -0,0 +1,110 @@
+#
+# Amateur Radio protocols and AX.25 device configuration
+#
+# 19971130	Now in an own category to make correct compilation of the
+#		AX.25 stuff easier...
+#		Joerg Reuter DL1BKE <jreuter@yaina.de>
+# 19980129	Moved to net/ax25/Config.in, sourcing device drivers.
+
+menuconfig HAMRADIO
+	depends on NET
+	bool "Amateur Radio support"
+	help
+	  If you want to connect your Linux box to an amateur radio, answer Y
+	  here. You want to read <http://www.tapr.org/tapr/html/pkthome.html> and
+	  the AX25-HOWTO, available from <http://www.tldp.org/docs.html#howto>.
+
+	  Note that the answer to this question won't directly affect the
+	  kernel: saying N will just cause the configurator to skip all
+	  the questions about amateur radio.
+
+comment "Packet Radio protocols"
+	depends on HAMRADIO && NET
+
+config AX25
+	tristate "Amateur Radio AX.25 Level 2 protocol"
+	depends on HAMRADIO && NET
+	---help---
+	  This is the protocol used for computer communication over amateur
+	  radio. It is either used by itself for point-to-point links, or to
+	  carry other protocols such as tcp/ip. To use it, you need a device
+	  that connects your Linux box to your amateur radio. You can either
+	  use a low speed TNC (a Terminal Node Controller acts as a kind of
+	  modem connecting your computer's serial port to your radio's
+	  microphone input and speaker output) supporting the KISS protocol or
+	  one of the various SCC cards that are supported by the generic Z8530
+	  or the DMA SCC driver. Another option are the Baycom modem serial
+	  and parallel port hacks or the sound card modem (supported by their
+	  own drivers). If you say Y here, you also have to say Y to one of
+	  those drivers.
+
+	  Information about where to get supporting software for Linux amateur
+	  radio as well as information about how to configure an AX.25 port is
+	  contained in the AX25-HOWTO, available from
+	  <http://www.tldp.org/docs.html#howto>. You might also want to
+	  check out the file <file:Documentation/networking/ax25.txt> in the
+	  kernel source. More information about digital amateur radio in
+	  general is on the WWW at
+	  <http://www.tapr.org/tapr/html/pkthome.html>.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called ax25.
+
+config AX25_DAMA_SLAVE
+	bool "AX.25 DAMA Slave support"
+	depends on AX25
+	help
+	  DAMA is a mechanism to prevent collisions when doing AX.25
+	  networking. A DAMA server (called "master") accepts incoming traffic
+	  from clients (called "slaves") and redistributes it to other slaves.
+	  If you say Y here, your Linux box will act as a DAMA slave; this is
+	  transparent in that you don't have to do any special DAMA
+	  configuration. (Linux cannot yet act as a DAMA server.) If unsure,
+	  say N.
+
+#	 bool '    AX.25 DAMA Master support' CONFIG_AX25_DAMA_MASTER
+config NETROM
+	tristate "Amateur Radio NET/ROM protocol"
+	depends on AX25
+	---help---
+	  NET/ROM is a network layer protocol on top of AX.25 useful for
+	  routing.
+
+	  A comprehensive listing of all the software for Linux amateur radio
+	  users as well as information about how to configure an AX.25 port is
+	  contained in the AX25-HOWTO, available from
+	  <http://www.tldp.org/docs.html#howto>. You also might want to
+	  check out the file <file:Documentation/networking/ax25.txt>. More
+	  information about digital amateur radio in general is on the WWW at
+	  <http://www.tapr.org/tapr/html/pkthome.html>.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called netrom.
+
+config ROSE
+	tristate "Amateur Radio X.25 PLP (Rose)"
+	depends on AX25
+	---help---
+	  The Packet Layer Protocol (PLP) is a way to route packets over X.25
+	  connections in general and amateur radio AX.25 connections in
+	  particular, essentially an alternative to NET/ROM.
+
+	  A comprehensive listing of all the software for Linux amateur radio
+	  users as well as information about how to configure an AX.25 port is
+	  contained in the AX25-HOWTO, available from
+	  <http://www.tldp.org/docs.html#howto>.  You also might want to
+	  check out the file <file:Documentation/networking/ax25.txt>. More
+	  information about digital amateur radio in general is on the WWW at
+	  <http://www.tapr.org/tapr/html/pkthome.html>.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called rose.
+
+
+menu "AX.25 network device drivers"
+	depends on HAMRADIO && NET && AX25!=n
+
+source "drivers/net/hamradio/Kconfig"
+
+endmenu
+
diff --git a/net/ax25/Makefile b/net/ax25/Makefile
new file mode 100644
index 00000000000..43c46d2cafb
--- /dev/null
+++ b/net/ax25/Makefile
@@ -0,0 +1,11 @@
+#
+# Makefile for the Linux AX.25 layer.
+#
+
+obj-$(CONFIG_AX25) += ax25.o
+
+ax25-y	 := ax25_addr.o ax25_dev.o ax25_iface.o ax25_in.o ax25_ip.o ax25_out.o \
+	    ax25_route.o ax25_std_in.o ax25_std_subr.o ax25_std_timer.o \
+	    ax25_subr.o ax25_timer.o ax25_uid.o af_ax25.o
+ax25-$(CONFIG_AX25_DAMA_SLAVE) += ax25_ds_in.o ax25_ds_subr.o ax25_ds_timer.o
+ax25-$(CONFIG_SYSCTL) += sysctl_net_ax25.o
diff --git a/net/ax25/TODO b/net/ax25/TODO
new file mode 100644
index 00000000000..4089c49e45c
--- /dev/null
+++ b/net/ax25/TODO
@@ -0,0 +1,24 @@
+Do the ax25_list_lock, ax25_dev_lock, linkfail_lockreally, ax25_frag_lock and
+listen_lock have to be bh-safe?
+
+Do the netrom and rose locks have to be bh-safe?
+
+A device might be deleted after lookup in the SIOCADDRT ioctl but before it's
+being used.
+
+Routes to a device being taken down might be deleted by ax25_rt_device_down
+but added by somebody else before the device has been deleted fully.
+
+Massive amounts of lock_kernel / unlock_kernel are just a temporary solution to
+get around the removal of SOCKOPS_WRAP.  A serious locking strategy has to be
+implemented.
+
+The ax25_rt_find_route synopsys is pervert but I somehow had to deal with
+the race caused by the static variable in it's previous implementation.
+
+Implement proper socket locking in netrom and rose.
+
+Check socket locking when ax25_rcv is sending to raw sockets.  In particular
+ax25_send_to_raw() seems fishy.  Heck - ax25_rcv is fishy.
+
+Handle XID and TEST frames properly.
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
new file mode 100644
index 00000000000..33b1a376302
--- /dev/null
+++ b/net/ax25/af_ax25.c
@@ -0,0 +1,2050 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk)
+ * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
+ * Copyright (C) Darryl Miles G7LED (dlm@g7led.demon.co.uk)
+ * Copyright (C) Steven Whitehouse GW7RRM (stevew@acm.org)
+ * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de)
+ * Copyright (C) Hans-Joachim Hetscher DD8NE (dd8ne@bnv-bamberg.de)
+ * Copyright (C) Hans Alblas PE1AYX (hans@esrac.ele.tue.nl)
+ * Copyright (C) Frederic Rible F1OAT (frible@teaser.fr)
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <net/ax25.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/fcntl.h>
+#include <linux/termios.h>	/* For TIOCINQ/OUTQ */
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+#include <linux/netfilter.h>
+#include <linux/sysctl.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <net/tcp.h>
+#include <net/ip.h>
+#include <net/arp.h>
+
+
+
+HLIST_HEAD(ax25_list);
+DEFINE_SPINLOCK(ax25_list_lock);
+
+static struct proto_ops ax25_proto_ops;
+
+static void ax25_free_sock(struct sock *sk)
+{
+	ax25_cb_put(ax25_sk(sk));
+}
+
+/*
+ *	Socket removal during an interrupt is now safe.
+ */
+static void ax25_cb_del(ax25_cb *ax25)
+{
+	if (!hlist_unhashed(&ax25->ax25_node)) {
+		spin_lock_bh(&ax25_list_lock);
+		hlist_del_init(&ax25->ax25_node);
+		spin_unlock_bh(&ax25_list_lock);
+		ax25_cb_put(ax25);
+	}
+}
+
+/*
+ *	Kill all bound sockets on a dropped device.
+ */
+static void ax25_kill_by_device(struct net_device *dev)
+{
+	ax25_dev *ax25_dev;
+	ax25_cb *s;
+	struct hlist_node *node;
+
+	if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL)
+		return;
+
+	spin_lock_bh(&ax25_list_lock);
+	ax25_for_each(s, node, &ax25_list) {
+		if (s->ax25_dev == ax25_dev) {
+			s->ax25_dev = NULL;
+			ax25_disconnect(s, ENETUNREACH);
+		}
+	}
+	spin_unlock_bh(&ax25_list_lock);
+}
+
+/*
+ *	Handle device status changes.
+ */
+static int ax25_device_event(struct notifier_block *this, unsigned long event,
+	void *ptr)
+{
+	struct net_device *dev = (struct net_device *)ptr;
+
+	/* Reject non AX.25 devices */
+	if (dev->type != ARPHRD_AX25)
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case NETDEV_UP:
+		ax25_dev_device_up(dev);
+		break;
+	case NETDEV_DOWN:
+		ax25_kill_by_device(dev);
+		ax25_rt_device_down(dev);
+		ax25_dev_device_down(dev);
+		break;
+	default:
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+/*
+ *	Add a socket to the bound sockets list.
+ */
+void ax25_cb_add(ax25_cb *ax25)
+{
+	spin_lock_bh(&ax25_list_lock);
+	ax25_cb_hold(ax25);
+	hlist_add_head(&ax25->ax25_node, &ax25_list);
+	spin_unlock_bh(&ax25_list_lock);
+}
+
+/*
+ *	Find a socket that wants to accept the SABM we have just
+ *	received.
+ */
+struct sock *ax25_find_listener(ax25_address *addr, int digi,
+	struct net_device *dev, int type)
+{
+	ax25_cb *s;
+	struct hlist_node *node;
+
+	spin_lock_bh(&ax25_list_lock);
+	ax25_for_each(s, node, &ax25_list) {
+		if ((s->iamdigi && !digi) || (!s->iamdigi && digi))
+			continue;
+		if (s->sk && !ax25cmp(&s->source_addr, addr) &&
+		    s->sk->sk_type == type && s->sk->sk_state == TCP_LISTEN) {
+			/* If device is null we match any device */
+			if (s->ax25_dev == NULL || s->ax25_dev->dev == dev) {
+				sock_hold(s->sk);
+				spin_unlock_bh(&ax25_list_lock);
+				return s->sk;
+			}
+		}
+	}
+	spin_unlock_bh(&ax25_list_lock);
+
+	return NULL;
+}
+
+/*
+ *	Find an AX.25 socket given both ends.
+ */
+struct sock *ax25_get_socket(ax25_address *my_addr, ax25_address *dest_addr,
+	int type)
+{
+	struct sock *sk = NULL;
+	ax25_cb *s;
+	struct hlist_node *node;
+
+	spin_lock_bh(&ax25_list_lock);
+	ax25_for_each(s, node, &ax25_list) {
+		if (s->sk && !ax25cmp(&s->source_addr, my_addr) &&
+		    !ax25cmp(&s->dest_addr, dest_addr) &&
+		    s->sk->sk_type == type) {
+			sk = s->sk;
+			sock_hold(sk);
+			break;
+		}
+	}
+
+	spin_unlock_bh(&ax25_list_lock);
+
+	return sk;
+}
+
+/*
+ *	Find an AX.25 control block given both ends. It will only pick up
+ *	floating AX.25 control blocks or non Raw socket bound control blocks.
+ */
+ax25_cb *ax25_find_cb(ax25_address *src_addr, ax25_address *dest_addr,
+	ax25_digi *digi, struct net_device *dev)
+{
+	ax25_cb *s;
+	struct hlist_node *node;
+
+	spin_lock_bh(&ax25_list_lock);
+	ax25_for_each(s, node, &ax25_list) {
+		if (s->sk && s->sk->sk_type != SOCK_SEQPACKET)
+			continue;
+		if (s->ax25_dev == NULL)
+			continue;
+		if (ax25cmp(&s->source_addr, src_addr) == 0 && ax25cmp(&s->dest_addr, dest_addr) == 0 && s->ax25_dev->dev == dev) {
+			if (digi != NULL && digi->ndigi != 0) {
+				if (s->digipeat == NULL)
+					continue;
+				if (ax25digicmp(s->digipeat, digi) != 0)
+					continue;
+			} else {
+				if (s->digipeat != NULL && s->digipeat->ndigi != 0)
+					continue;
+			}
+			ax25_cb_hold(s);
+			spin_unlock_bh(&ax25_list_lock);
+
+			return s;
+		}
+	}
+	spin_unlock_bh(&ax25_list_lock);
+
+	return NULL;
+}
+
+void ax25_send_to_raw(ax25_address *addr, struct sk_buff *skb, int proto)
+{
+	ax25_cb *s;
+	struct sk_buff *copy;
+	struct hlist_node *node;
+
+	spin_lock_bh(&ax25_list_lock);
+	ax25_for_each(s, node, &ax25_list) {
+		if (s->sk != NULL && ax25cmp(&s->source_addr, addr) == 0 &&
+		    s->sk->sk_type == SOCK_RAW &&
+		    s->sk->sk_protocol == proto &&
+		    s->ax25_dev->dev == skb->dev &&
+		    atomic_read(&s->sk->sk_rmem_alloc) <= s->sk->sk_rcvbuf) {
+			if ((copy = skb_clone(skb, GFP_ATOMIC)) == NULL)
+				continue;
+			if (sock_queue_rcv_skb(s->sk, copy) != 0)
+				kfree_skb(copy);
+		}
+	}
+	spin_unlock_bh(&ax25_list_lock);
+}
+
+/*
+ *	Deferred destroy.
+ */
+void ax25_destroy_socket(ax25_cb *);
+
+/*
+ *	Handler for deferred kills.
+ */
+static void ax25_destroy_timer(unsigned long data)
+{
+	ax25_cb *ax25=(ax25_cb *)data;
+	struct sock *sk;
+	
+	sk=ax25->sk;
+	
+	bh_lock_sock(sk);
+	sock_hold(sk);
+	ax25_destroy_socket(ax25);
+	bh_unlock_sock(sk);
+	sock_put(sk);
+}
+
+/*
+ *	This is called from user mode and the timers. Thus it protects itself
+ *	against interrupt users but doesn't worry about being called during
+ *	work. Once it is removed from the queue no interrupt or bottom half
+ *	will touch it and we are (fairly 8-) ) safe.
+ */
+void ax25_destroy_socket(ax25_cb *ax25)
+{
+	struct sk_buff *skb;
+
+	ax25_cb_del(ax25);
+
+	ax25_stop_heartbeat(ax25);
+	ax25_stop_t1timer(ax25);
+	ax25_stop_t2timer(ax25);
+	ax25_stop_t3timer(ax25);
+	ax25_stop_idletimer(ax25);
+
+	ax25_clear_queues(ax25);	/* Flush the queues */
+
+	if (ax25->sk != NULL) {
+		while ((skb = skb_dequeue(&ax25->sk->sk_receive_queue)) != NULL) {
+			if (skb->sk != ax25->sk) {
+				/* A pending connection */
+				ax25_cb *sax25 = ax25_sk(skb->sk);
+
+				/* Queue the unaccepted socket for death */
+				sock_orphan(skb->sk);
+
+				ax25_start_heartbeat(sax25);
+				sax25->state = AX25_STATE_0;
+			}
+
+			kfree_skb(skb);
+		}
+		skb_queue_purge(&ax25->sk->sk_write_queue);
+	}
+
+	if (ax25->sk != NULL) {
+		if (atomic_read(&ax25->sk->sk_wmem_alloc) ||
+		    atomic_read(&ax25->sk->sk_rmem_alloc)) {
+			/* Defer: outstanding buffers */
+			init_timer(&ax25->dtimer);
+			ax25->dtimer.expires  = jiffies + 2 * HZ;
+			ax25->dtimer.function = ax25_destroy_timer;
+			ax25->dtimer.data     = (unsigned long)ax25;
+			add_timer(&ax25->dtimer);
+		} else {
+			struct sock *sk=ax25->sk;
+			ax25->sk=NULL;
+			sock_put(sk);
+		}
+	} else {
+		ax25_cb_put(ax25);
+	}
+}
+
+/*
+ * dl1bke 960311: set parameters for existing AX.25 connections,
+ *		  includes a KILL command to abort any connection.
+ *		  VERY useful for debugging ;-)
+ */
+static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg)
+{
+	struct ax25_ctl_struct ax25_ctl;
+	ax25_digi digi;
+	ax25_dev *ax25_dev;
+	ax25_cb *ax25;
+	unsigned int k;
+
+	if (copy_from_user(&ax25_ctl, arg, sizeof(ax25_ctl)))
+		return -EFAULT;
+
+	if ((ax25_dev = ax25_addr_ax25dev(&ax25_ctl.port_addr)) == NULL)
+		return -ENODEV;
+
+	if (ax25_ctl.digi_count > AX25_MAX_DIGIS)
+		return -EINVAL;
+
+	digi.ndigi = ax25_ctl.digi_count;
+	for (k = 0; k < digi.ndigi; k++)
+		digi.calls[k] = ax25_ctl.digi_addr[k];
+
+	if ((ax25 = ax25_find_cb(&ax25_ctl.source_addr, &ax25_ctl.dest_addr, &digi, ax25_dev->dev)) == NULL)
+		return -ENOTCONN;
+
+	switch (ax25_ctl.cmd) {
+	case AX25_KILL:
+		ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND);
+#ifdef CONFIG_AX25_DAMA_SLAVE
+		if (ax25_dev->dama.slave && ax25->ax25_dev->values[AX25_VALUES_PROTOCOL] == AX25_PROTO_DAMA_SLAVE)
+			ax25_dama_off(ax25);
+#endif
+		ax25_disconnect(ax25, ENETRESET);
+		break;
+
+  	case AX25_WINDOW:
+  		if (ax25->modulus == AX25_MODULUS) {
+  			if (ax25_ctl.arg < 1 || ax25_ctl.arg > 7)
+  				return -EINVAL;
+  		} else {
+  			if (ax25_ctl.arg < 1 || ax25_ctl.arg > 63)
+  				return -EINVAL;
+  		}
+  		ax25->window = ax25_ctl.arg;
+  		break;
+
+  	case AX25_T1:
+		if (ax25_ctl.arg < 1)
+  			return -EINVAL;
+  		ax25->rtt = (ax25_ctl.arg * HZ) / 2;
+  		ax25->t1  = ax25_ctl.arg * HZ;
+  		break;
+
+  	case AX25_T2:
+  		if (ax25_ctl.arg < 1)
+  			return -EINVAL;
+  		ax25->t2 = ax25_ctl.arg * HZ;
+  		break;
+
+  	case AX25_N2:
+  		if (ax25_ctl.arg < 1 || ax25_ctl.arg > 31)
+			return -EINVAL;
+  		ax25->n2count = 0;
+  		ax25->n2 = ax25_ctl.arg;
+  		break;
+
+  	case AX25_T3:
+  		if (ax25_ctl.arg < 0)
+  			return -EINVAL;
+  		ax25->t3 = ax25_ctl.arg * HZ;
+  		break;
+
+  	case AX25_IDLE:
+  		if (ax25_ctl.arg < 0)
+  			return -EINVAL;
+  		ax25->idle = ax25_ctl.arg * 60 * HZ;
+  		break;
+
+  	case AX25_PACLEN:
+  		if (ax25_ctl.arg < 16 || ax25_ctl.arg > 65535)
+  			return -EINVAL;
+  		ax25->paclen = ax25_ctl.arg;
+  		break;
+
+  	default:
+  		return -EINVAL;
+	  }
+
+	return 0;
+}
+
+/*
+ *	Fill in a created AX.25 created control block with the default
+ *	values for a particular device.
+ */
+void ax25_fillin_cb(ax25_cb *ax25, ax25_dev *ax25_dev)
+{
+	ax25->ax25_dev = ax25_dev;
+
+	if (ax25->ax25_dev != NULL) {
+		ax25->rtt     = ax25_dev->values[AX25_VALUES_T1] / 2;
+		ax25->t1      = ax25_dev->values[AX25_VALUES_T1];
+		ax25->t2      = ax25_dev->values[AX25_VALUES_T2];
+		ax25->t3      = ax25_dev->values[AX25_VALUES_T3];
+		ax25->n2      = ax25_dev->values[AX25_VALUES_N2];
+		ax25->paclen  = ax25_dev->values[AX25_VALUES_PACLEN];
+		ax25->idle    = ax25_dev->values[AX25_VALUES_IDLE];
+		ax25->backoff = ax25_dev->values[AX25_VALUES_BACKOFF];
+
+		if (ax25_dev->values[AX25_VALUES_AXDEFMODE]) {
+			ax25->modulus = AX25_EMODULUS;
+			ax25->window  = ax25_dev->values[AX25_VALUES_EWINDOW];
+		} else {
+			ax25->modulus = AX25_MODULUS;
+			ax25->window  = ax25_dev->values[AX25_VALUES_WINDOW];
+		}
+	} else {
+		ax25->rtt     = AX25_DEF_T1 / 2;
+		ax25->t1      = AX25_DEF_T1;
+		ax25->t2      = AX25_DEF_T2;
+		ax25->t3      = AX25_DEF_T3;
+		ax25->n2      = AX25_DEF_N2;
+		ax25->paclen  = AX25_DEF_PACLEN;
+		ax25->idle    = AX25_DEF_IDLE;
+		ax25->backoff = AX25_DEF_BACKOFF;
+
+		if (AX25_DEF_AXDEFMODE) {
+			ax25->modulus = AX25_EMODULUS;
+			ax25->window  = AX25_DEF_EWINDOW;
+		} else {
+			ax25->modulus = AX25_MODULUS;
+			ax25->window  = AX25_DEF_WINDOW;
+		}
+	}
+}
+
+/*
+ * Create an empty AX.25 control block.
+ */
+ax25_cb *ax25_create_cb(void)
+{
+	ax25_cb *ax25;
+
+	if ((ax25 = kmalloc(sizeof(*ax25), GFP_ATOMIC)) == NULL)
+		return NULL;
+
+	memset(ax25, 0x00, sizeof(*ax25));
+	atomic_set(&ax25->refcount, 1);
+
+	skb_queue_head_init(&ax25->write_queue);
+	skb_queue_head_init(&ax25->frag_queue);
+	skb_queue_head_init(&ax25->ack_queue);
+	skb_queue_head_init(&ax25->reseq_queue);
+
+	init_timer(&ax25->timer);
+	init_timer(&ax25->t1timer);
+	init_timer(&ax25->t2timer);
+	init_timer(&ax25->t3timer);
+	init_timer(&ax25->idletimer);
+
+	ax25_fillin_cb(ax25, NULL);
+
+	ax25->state = AX25_STATE_0;
+
+	return ax25;
+}
+
+/*
+ *	Handling for system calls applied via the various interfaces to an
+ *	AX25 socket object
+ */
+
+static int ax25_setsockopt(struct socket *sock, int level, int optname,
+	char __user *optval, int optlen)
+{
+	struct sock *sk = sock->sk;
+	ax25_cb *ax25;
+	struct net_device *dev;
+	char devname[IFNAMSIZ];
+	int opt, res = 0;
+
+	if (level != SOL_AX25)
+		return -ENOPROTOOPT;
+
+	if (optlen < sizeof(int))
+		return -EINVAL;
+
+	if (get_user(opt, (int __user *)optval))
+		return -EFAULT;
+
+	lock_sock(sk);
+	ax25 = ax25_sk(sk);
+
+	switch (optname) {
+	case AX25_WINDOW:
+		if (ax25->modulus == AX25_MODULUS) {
+			if (opt < 1 || opt > 7) {
+				res = -EINVAL;
+				break;
+			}
+		} else {
+			if (opt < 1 || opt > 63) {
+				res = -EINVAL;
+				break;
+			}
+		}
+		ax25->window = opt;
+		break;
+
+	case AX25_T1:
+		if (opt < 1) {
+			res = -EINVAL;
+			break;
+		}
+		ax25->rtt = (opt * HZ) / 2;
+		ax25->t1  = opt * HZ;
+		break;
+
+	case AX25_T2:
+		if (opt < 1) {
+			res = -EINVAL;
+			break;
+		}
+		ax25->t2 = opt * HZ;
+		break;
+
+	case AX25_N2:
+		if (opt < 1 || opt > 31) {
+			res = -EINVAL;
+			break;
+		}
+		ax25->n2 = opt;
+		break;
+
+	case AX25_T3:
+		if (opt < 1) {
+			res = -EINVAL;
+			break;
+		}
+		ax25->t3 = opt * HZ;
+		break;
+
+	case AX25_IDLE:
+		if (opt < 0) {
+			res = -EINVAL;
+			break;
+		}
+		ax25->idle = opt * 60 * HZ;
+		break;
+
+	case AX25_BACKOFF:
+		if (opt < 0 || opt > 2) {
+			res = -EINVAL;
+			break;
+		}
+		ax25->backoff = opt;
+		break;
+
+	case AX25_EXTSEQ:
+		ax25->modulus = opt ? AX25_EMODULUS : AX25_MODULUS;
+		break;
+
+	case AX25_PIDINCL:
+		ax25->pidincl = opt ? 1 : 0;
+		break;
+
+	case AX25_IAMDIGI:
+		ax25->iamdigi = opt ? 1 : 0;
+		break;
+
+	case AX25_PACLEN:
+		if (opt < 16 || opt > 65535) {
+			res = -EINVAL;
+			break;
+		}
+		ax25->paclen = opt;
+		break;
+
+	case SO_BINDTODEVICE:
+		if (optlen > IFNAMSIZ)
+			optlen=IFNAMSIZ;
+		if (copy_from_user(devname, optval, optlen)) {
+		res = -EFAULT;
+			break;
+		}
+
+		dev = dev_get_by_name(devname);
+		if (dev == NULL) {
+			res = -ENODEV;
+			break;
+		}
+
+		if (sk->sk_type == SOCK_SEQPACKET &&
+		   (sock->state != SS_UNCONNECTED ||
+		    sk->sk_state == TCP_LISTEN)) {
+			res = -EADDRNOTAVAIL;
+			dev_put(dev);
+			break;
+		}
+
+		ax25->ax25_dev = ax25_dev_ax25dev(dev);
+		ax25_fillin_cb(ax25, ax25->ax25_dev);
+		break;
+
+	default:
+		res = -ENOPROTOOPT;
+	}
+	release_sock(sk);
+
+	return res;
+}
+
+static int ax25_getsockopt(struct socket *sock, int level, int optname,
+	char __user *optval, int __user *optlen)
+{
+	struct sock *sk = sock->sk;
+	ax25_cb *ax25;
+	struct ax25_dev *ax25_dev;
+	char devname[IFNAMSIZ];
+	void *valptr;
+	int val = 0;
+	int maxlen, length;
+
+	if (level != SOL_AX25)
+		return -ENOPROTOOPT;
+
+	if (get_user(maxlen, optlen))
+		return -EFAULT;
+
+	if (maxlen < 1)
+		return -EFAULT;
+
+	valptr = (void *) &val;
+	length = min_t(unsigned int, maxlen, sizeof(int));
+
+	lock_sock(sk);
+	ax25 = ax25_sk(sk);
+
+	switch (optname) {
+	case AX25_WINDOW:
+		val = ax25->window;
+		break;
+
+	case AX25_T1:
+		val = ax25->t1 / HZ;
+		break;
+
+	case AX25_T2:
+		val = ax25->t2 / HZ;
+		break;
+
+	case AX25_N2:
+		val = ax25->n2;
+		break;
+
+	case AX25_T3:
+		val = ax25->t3 / HZ;
+		break;
+
+	case AX25_IDLE:
+		val = ax25->idle / (60 * HZ);
+		break;
+
+	case AX25_BACKOFF:
+		val = ax25->backoff;
+		break;
+
+	case AX25_EXTSEQ:
+		val = (ax25->modulus == AX25_EMODULUS);
+		break;
+
+	case AX25_PIDINCL:
+		val = ax25->pidincl;
+		break;
+
+	case AX25_IAMDIGI:
+		val = ax25->iamdigi;
+		break;
+
+	case AX25_PACLEN:
+		val = ax25->paclen;
+		break;
+
+	case SO_BINDTODEVICE:
+		ax25_dev = ax25->ax25_dev;
+
+		if (ax25_dev != NULL && ax25_dev->dev != NULL) {
+			strlcpy(devname, ax25_dev->dev->name, sizeof(devname));
+			length = strlen(devname) + 1;
+		} else {
+			*devname = '\0';
+			length = 1;
+		}
+
+		valptr = (void *) devname;
+		break;
+
+	default:
+		release_sock(sk);
+		return -ENOPROTOOPT;
+	}
+	release_sock(sk);
+
+	if (put_user(length, optlen))
+		return -EFAULT;
+
+	return copy_to_user(optval, valptr, length) ? -EFAULT : 0;
+}
+
+static int ax25_listen(struct socket *sock, int backlog)
+{
+	struct sock *sk = sock->sk;
+	int res = 0;
+
+	lock_sock(sk);
+	if (sk->sk_type == SOCK_SEQPACKET && sk->sk_state != TCP_LISTEN) {
+		sk->sk_max_ack_backlog = backlog;
+		sk->sk_state           = TCP_LISTEN;
+		goto out;
+	}
+	res = -EOPNOTSUPP;
+
+out:
+	release_sock(sk);
+
+	return res;
+}
+
+/*
+ * XXX: when creating ax25_sock we should update the .obj_size setting
+ * below.
+ */
+static struct proto ax25_proto = {
+	.name	  = "AX25",
+	.owner	  = THIS_MODULE,
+	.obj_size = sizeof(struct sock),
+};
+
+static int ax25_create(struct socket *sock, int protocol)
+{
+	struct sock *sk;
+	ax25_cb *ax25;
+
+	switch (sock->type) {
+	case SOCK_DGRAM:
+		if (protocol == 0 || protocol == PF_AX25)
+			protocol = AX25_P_TEXT;
+		break;
+
+	case SOCK_SEQPACKET:
+		switch (protocol) {
+		case 0:
+		case PF_AX25:	/* For CLX */
+			protocol = AX25_P_TEXT;
+			break;
+		case AX25_P_SEGMENT:
+#ifdef CONFIG_INET
+		case AX25_P_ARP:
+		case AX25_P_IP:
+#endif
+#ifdef CONFIG_NETROM
+		case AX25_P_NETROM:
+#endif
+#ifdef CONFIG_ROSE
+		case AX25_P_ROSE:
+#endif
+			return -ESOCKTNOSUPPORT;
+#ifdef CONFIG_NETROM_MODULE
+		case AX25_P_NETROM:
+			if (ax25_protocol_is_registered(AX25_P_NETROM))
+				return -ESOCKTNOSUPPORT;
+#endif
+#ifdef CONFIG_ROSE_MODULE
+		case AX25_P_ROSE:
+			if (ax25_protocol_is_registered(AX25_P_ROSE))
+				return -ESOCKTNOSUPPORT;
+#endif
+		default:
+			break;
+		}
+		break;
+
+	case SOCK_RAW:
+		break;
+	default:
+		return -ESOCKTNOSUPPORT;
+	}
+
+	if ((sk = sk_alloc(PF_AX25, GFP_ATOMIC, &ax25_proto, 1)) == NULL)
+		return -ENOMEM;
+
+	ax25 = sk->sk_protinfo = ax25_create_cb();
+	if (!ax25) {
+		sk_free(sk);
+		return -ENOMEM;
+	}
+
+	sock_init_data(sock, sk);
+
+	sk->sk_destruct = ax25_free_sock;
+	sock->ops    = &ax25_proto_ops;
+	sk->sk_protocol = protocol;
+
+	ax25->sk    = sk;
+
+	return 0;
+}
+
+struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev)
+{
+	struct sock *sk;
+	ax25_cb *ax25, *oax25;
+
+	if ((sk = sk_alloc(PF_AX25, GFP_ATOMIC, osk->sk_prot, 1)) == NULL)
+		return NULL;
+
+	if ((ax25 = ax25_create_cb()) == NULL) {
+		sk_free(sk);
+		return NULL;
+	}
+
+	switch (osk->sk_type) {
+	case SOCK_DGRAM:
+		break;
+	case SOCK_SEQPACKET:
+		break;
+	default:
+		sk_free(sk);
+		ax25_cb_put(ax25);
+		return NULL;
+	}
+
+	sock_init_data(NULL, sk);
+
+	sk->sk_destruct = ax25_free_sock;
+	sk->sk_type     = osk->sk_type;
+	sk->sk_socket   = osk->sk_socket;
+	sk->sk_priority = osk->sk_priority;
+	sk->sk_protocol = osk->sk_protocol;
+	sk->sk_rcvbuf   = osk->sk_rcvbuf;
+	sk->sk_sndbuf   = osk->sk_sndbuf;
+	sk->sk_state    = TCP_ESTABLISHED;
+	sk->sk_sleep    = osk->sk_sleep;
+
+	if (sock_flag(osk, SOCK_DBG))
+		sock_set_flag(sk, SOCK_DBG);
+
+	if (sock_flag(osk, SOCK_ZAPPED))
+		sock_set_flag(sk, SOCK_ZAPPED);
+
+	oax25 = ax25_sk(osk);
+
+	ax25->modulus = oax25->modulus;
+	ax25->backoff = oax25->backoff;
+	ax25->pidincl = oax25->pidincl;
+	ax25->iamdigi = oax25->iamdigi;
+	ax25->rtt     = oax25->rtt;
+	ax25->t1      = oax25->t1;
+	ax25->t2      = oax25->t2;
+	ax25->t3      = oax25->t3;
+	ax25->n2      = oax25->n2;
+	ax25->idle    = oax25->idle;
+	ax25->paclen  = oax25->paclen;
+	ax25->window  = oax25->window;
+
+	ax25->ax25_dev    = ax25_dev;
+	ax25->source_addr = oax25->source_addr;
+
+	if (oax25->digipeat != NULL) {
+		if ((ax25->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) {
+			sk_free(sk);
+			ax25_cb_put(ax25);
+			return NULL;
+		}
+
+		memcpy(ax25->digipeat, oax25->digipeat, sizeof(ax25_digi));
+	}
+
+	sk->sk_protinfo = ax25;
+	ax25->sk    = sk;
+
+	return sk;
+}
+
+static int ax25_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	ax25_cb *ax25;
+
+	if (sk == NULL)
+		return 0;
+
+	sock_hold(sk);
+	sock_orphan(sk);
+	lock_sock(sk);
+	ax25 = ax25_sk(sk);
+
+	if (sk->sk_type == SOCK_SEQPACKET) {
+		switch (ax25->state) {
+		case AX25_STATE_0:
+			release_sock(sk);
+			ax25_disconnect(ax25, 0);
+			lock_sock(sk);
+			ax25_destroy_socket(ax25);
+			break;
+
+		case AX25_STATE_1:
+		case AX25_STATE_2:
+			ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND);
+			release_sock(sk);
+			ax25_disconnect(ax25, 0);
+			lock_sock(sk);
+			ax25_destroy_socket(ax25);
+			break;
+
+		case AX25_STATE_3:
+		case AX25_STATE_4:
+			ax25_clear_queues(ax25);
+			ax25->n2count = 0;
+
+			switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
+			case AX25_PROTO_STD_SIMPLEX:
+			case AX25_PROTO_STD_DUPLEX:
+				ax25_send_control(ax25,
+						  AX25_DISC,
+						  AX25_POLLON,
+						  AX25_COMMAND);
+				ax25_stop_t2timer(ax25);
+				ax25_stop_t3timer(ax25);
+				ax25_stop_idletimer(ax25);
+				break;
+#ifdef CONFIG_AX25_DAMA_SLAVE
+			case AX25_PROTO_DAMA_SLAVE:
+				ax25_stop_t3timer(ax25);
+				ax25_stop_idletimer(ax25);
+				break;
+#endif
+			}
+			ax25_calculate_t1(ax25);
+			ax25_start_t1timer(ax25);
+			ax25->state = AX25_STATE_2;
+			sk->sk_state                = TCP_CLOSE;
+			sk->sk_shutdown            |= SEND_SHUTDOWN;
+			sk->sk_state_change(sk);
+			sock_set_flag(sk, SOCK_DESTROY);
+			break;
+
+		default:
+			break;
+		}
+	} else {
+		sk->sk_state     = TCP_CLOSE;
+		sk->sk_shutdown |= SEND_SHUTDOWN;
+		sk->sk_state_change(sk);
+		ax25_destroy_socket(ax25);
+	}
+
+	sock->sk   = NULL;
+	release_sock(sk);
+	sock_put(sk);
+
+	return 0;
+}
+
+/*
+ *	We support a funny extension here so you can (as root) give any callsign
+ *	digipeated via a local address as source. This hack is obsolete now
+ *	that we've implemented support for SO_BINDTODEVICE. It is however small
+ *	and trivially backward compatible.
+ */
+static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+{
+	struct sock *sk = sock->sk;
+	struct full_sockaddr_ax25 *addr = (struct full_sockaddr_ax25 *)uaddr;
+	ax25_dev *ax25_dev = NULL;
+	ax25_address *call;
+	ax25_cb *ax25;
+	int err = 0;
+
+	if (addr_len != sizeof(struct sockaddr_ax25) &&
+	    addr_len != sizeof(struct full_sockaddr_ax25)) {
+		/* support for old structure may go away some time */
+		if ((addr_len < sizeof(struct sockaddr_ax25) + sizeof(ax25_address) * 6) ||
+		    (addr_len > sizeof(struct full_sockaddr_ax25))) {
+			return -EINVAL;
+	}
+
+		printk(KERN_WARNING "ax25_bind(): %s uses old (6 digipeater) socket structure.\n",
+			current->comm);
+	}
+
+	if (addr->fsa_ax25.sax25_family != AF_AX25)
+		return -EINVAL;
+
+	call = ax25_findbyuid(current->euid);
+	if (call == NULL && ax25_uid_policy && !capable(CAP_NET_ADMIN)) {
+		return -EACCES;
+	}
+
+	lock_sock(sk);
+
+	ax25 = ax25_sk(sk);
+	if (!sock_flag(sk, SOCK_ZAPPED)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (call == NULL)
+		ax25->source_addr = addr->fsa_ax25.sax25_call;
+	else
+		ax25->source_addr = *call;
+
+	/*
+	 * User already set interface with SO_BINDTODEVICE
+	 */
+	if (ax25->ax25_dev != NULL)
+		goto done;
+
+	if (addr_len > sizeof(struct sockaddr_ax25) && addr->fsa_ax25.sax25_ndigis == 1) {
+		if (ax25cmp(&addr->fsa_digipeater[0], &null_ax25_address) != 0 &&
+		    (ax25_dev = ax25_addr_ax25dev(&addr->fsa_digipeater[0])) == NULL) {
+			err = -EADDRNOTAVAIL;
+			goto out;
+		}
+	} else {
+		if ((ax25_dev = ax25_addr_ax25dev(&addr->fsa_ax25.sax25_call)) == NULL) {
+			err = -EADDRNOTAVAIL;
+			goto out;
+		}
+	}
+
+	if (ax25_dev != NULL)
+		ax25_fillin_cb(ax25, ax25_dev);
+
+done:
+	ax25_cb_add(ax25);
+	sock_reset_flag(sk, SOCK_ZAPPED);
+
+out:
+	release_sock(sk);
+
+	return 0;
+}
+
+/*
+ *	FIXME: nonblock behaviour looks like it may have a bug.
+ */
+static int ax25_connect(struct socket *sock, struct sockaddr *uaddr,
+	int addr_len, int flags)
+{
+	struct sock *sk = sock->sk;
+	ax25_cb *ax25 = ax25_sk(sk), *ax25t;
+	struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)uaddr;
+	ax25_digi *digi = NULL;
+	int ct = 0, err = 0;
+
+	/*
+	 * some sanity checks. code further down depends on this
+	 */
+
+	if (addr_len == sizeof(struct sockaddr_ax25)) {
+		/* support for this will go away in early 2.5.x */
+		printk(KERN_WARNING "ax25_connect(): %s uses obsolete socket structure\n",
+			current->comm);
+	}
+	else if (addr_len != sizeof(struct full_sockaddr_ax25)) {
+		/* support for old structure may go away some time */
+		if ((addr_len < sizeof(struct sockaddr_ax25) + sizeof(ax25_address) * 6) ||
+		    (addr_len > sizeof(struct full_sockaddr_ax25))) {
+			return -EINVAL;
+		}
+
+		printk(KERN_WARNING "ax25_connect(): %s uses old (6 digipeater) socket structure.\n",
+			current->comm);
+	}
+
+	if (fsa->fsa_ax25.sax25_family != AF_AX25)
+		return -EINVAL;
+
+	lock_sock(sk);
+
+	/* deal with restarts */
+	if (sock->state == SS_CONNECTING) {
+		switch (sk->sk_state) {
+		case TCP_SYN_SENT: /* still trying */
+			err = -EINPROGRESS;
+			goto out;
+
+		case TCP_ESTABLISHED: /* connection established */
+			sock->state = SS_CONNECTED;
+			goto out;
+
+		case TCP_CLOSE: /* connection refused */
+			sock->state = SS_UNCONNECTED;
+			err = -ECONNREFUSED;
+			goto out;
+		}
+	}
+
+	if (sk->sk_state == TCP_ESTABLISHED && sk->sk_type == SOCK_SEQPACKET) {
+		err = -EISCONN;	/* No reconnect on a seqpacket socket */
+		goto out;
+	}
+
+	sk->sk_state   = TCP_CLOSE;
+	sock->state = SS_UNCONNECTED;
+
+	if (ax25->digipeat != NULL) {
+		kfree(ax25->digipeat);
+		ax25->digipeat = NULL;
+	}
+
+	/*
+	 *	Handle digi-peaters to be used.
+	 */
+	if (addr_len > sizeof(struct sockaddr_ax25) &&
+	    fsa->fsa_ax25.sax25_ndigis != 0) {
+		/* Valid number of digipeaters ? */
+		if (fsa->fsa_ax25.sax25_ndigis < 1 || fsa->fsa_ax25.sax25_ndigis > AX25_MAX_DIGIS) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		if ((digi = kmalloc(sizeof(ax25_digi), GFP_KERNEL)) == NULL) {
+			err = -ENOBUFS;
+			goto out;
+		}
+
+		digi->ndigi      = fsa->fsa_ax25.sax25_ndigis;
+		digi->lastrepeat = -1;
+
+		while (ct < fsa->fsa_ax25.sax25_ndigis) {
+			if ((fsa->fsa_digipeater[ct].ax25_call[6] &
+			     AX25_HBIT) && ax25->iamdigi) {
+				digi->repeated[ct] = 1;
+				digi->lastrepeat   = ct;
+			} else {
+				digi->repeated[ct] = 0;
+			}
+			digi->calls[ct] = fsa->fsa_digipeater[ct];
+			ct++;
+		}
+	}
+
+	/*
+	 *	Must bind first - autobinding in this may or may not work. If
+	 *	the socket is already bound, check to see if the device has
+	 *	been filled in, error if it hasn't.
+	 */
+	if (sock_flag(sk, SOCK_ZAPPED)) {
+		/* check if we can remove this feature. It is broken. */
+		printk(KERN_WARNING "ax25_connect(): %s uses autobind, please contact jreuter@yaina.de\n",
+			current->comm);
+		if ((err = ax25_rt_autobind(ax25, &fsa->fsa_ax25.sax25_call)) < 0) {
+			kfree(digi);
+			goto out;
+		}
+
+		ax25_fillin_cb(ax25, ax25->ax25_dev);
+		ax25_cb_add(ax25);
+	} else {
+		if (ax25->ax25_dev == NULL) {
+			kfree(digi);
+			err = -EHOSTUNREACH;
+			goto out;
+		}
+	}
+
+	if (sk->sk_type == SOCK_SEQPACKET &&
+	    (ax25t=ax25_find_cb(&ax25->source_addr, &fsa->fsa_ax25.sax25_call, digi,
+		    	 ax25->ax25_dev->dev))) {
+		kfree(digi);
+		err = -EADDRINUSE;		/* Already such a connection */
+		ax25_cb_put(ax25t);
+		goto out;
+	}
+
+	ax25->dest_addr = fsa->fsa_ax25.sax25_call;
+	ax25->digipeat  = digi;
+
+	/* First the easy one */
+	if (sk->sk_type != SOCK_SEQPACKET) {
+		sock->state = SS_CONNECTED;
+		sk->sk_state   = TCP_ESTABLISHED;
+		goto out;
+	}
+
+	/* Move to connecting socket, ax.25 lapb WAIT_UA.. */
+	sock->state        = SS_CONNECTING;
+	sk->sk_state          = TCP_SYN_SENT;
+
+	switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
+	case AX25_PROTO_STD_SIMPLEX:
+	case AX25_PROTO_STD_DUPLEX:
+		ax25_std_establish_data_link(ax25);
+		break;
+
+#ifdef CONFIG_AX25_DAMA_SLAVE
+	case AX25_PROTO_DAMA_SLAVE:
+		ax25->modulus = AX25_MODULUS;
+		ax25->window  = ax25->ax25_dev->values[AX25_VALUES_WINDOW];
+		if (ax25->ax25_dev->dama.slave)
+			ax25_ds_establish_data_link(ax25);
+		else
+			ax25_std_establish_data_link(ax25);
+		break;
+#endif
+	}
+
+	ax25->state = AX25_STATE_1;
+
+	ax25_start_heartbeat(ax25);
+
+	/* Now the loop */
+	if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK)) {
+		err = -EINPROGRESS;
+		goto out;
+	}
+
+	if (sk->sk_state == TCP_SYN_SENT) {
+		struct task_struct *tsk = current;
+		DECLARE_WAITQUEUE(wait, tsk);
+
+		add_wait_queue(sk->sk_sleep, &wait);
+		for (;;) {
+			if (sk->sk_state != TCP_SYN_SENT)
+				break;
+			set_current_state(TASK_INTERRUPTIBLE);
+			release_sock(sk);
+			if (!signal_pending(tsk)) {
+				schedule();
+				lock_sock(sk);
+				continue;
+			}
+			current->state = TASK_RUNNING;
+			remove_wait_queue(sk->sk_sleep, &wait);
+			return -ERESTARTSYS;
+		}
+		current->state = TASK_RUNNING;
+		remove_wait_queue(sk->sk_sleep, &wait);
+	}
+
+	if (sk->sk_state != TCP_ESTABLISHED) {
+		/* Not in ABM, not in WAIT_UA -> failed */
+		sock->state = SS_UNCONNECTED;
+		err = sock_error(sk);	/* Always set at this point */
+		goto out;
+	}
+
+	sock->state = SS_CONNECTED;
+
+	err=0;
+out:
+	release_sock(sk);
+
+	return err;
+}
+
+
+static int ax25_accept(struct socket *sock, struct socket *newsock, int flags)
+{
+	struct task_struct *tsk = current;
+	DECLARE_WAITQUEUE(wait, tsk);
+	struct sk_buff *skb;
+	struct sock *newsk;
+	struct sock *sk;
+	int err = 0;
+
+	if (sock->state != SS_UNCONNECTED)
+		return -EINVAL;
+
+	if ((sk = sock->sk) == NULL)
+		return -EINVAL;
+
+	lock_sock(sk);
+	if (sk->sk_type != SOCK_SEQPACKET) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (sk->sk_state != TCP_LISTEN) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/*
+	 *	The read queue this time is holding sockets ready to use
+	 *	hooked into the SABM we saved
+	 */
+	add_wait_queue(sk->sk_sleep, &wait);
+	for (;;) {
+		skb = skb_dequeue(&sk->sk_receive_queue);
+		if (skb)
+			break;
+
+		release_sock(sk);
+		current->state = TASK_INTERRUPTIBLE;
+		if (flags & O_NONBLOCK) {
+			current->state = TASK_RUNNING;
+			remove_wait_queue(sk->sk_sleep, &wait);
+			return -EWOULDBLOCK;
+		}
+		if (!signal_pending(tsk)) {
+			schedule();
+			lock_sock(sk);
+			continue;
+		}
+		current->state = TASK_RUNNING;
+		remove_wait_queue(sk->sk_sleep, &wait);
+		return -ERESTARTSYS;
+	}
+	current->state = TASK_RUNNING;
+	remove_wait_queue(sk->sk_sleep, &wait);
+
+	newsk		 = skb->sk;
+	newsk->sk_socket = newsock;
+	newsk->sk_sleep	 = &newsock->wait;
+
+	/* Now attach up the new socket */
+	kfree_skb(skb);
+	sk->sk_ack_backlog--;
+	newsock->sk    = newsk;
+	newsock->state = SS_CONNECTED;
+
+out:
+	release_sock(sk);
+
+	return err;
+}
+
+static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
+	int *uaddr_len, int peer)
+{
+	struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)uaddr;
+	struct sock *sk = sock->sk;
+	unsigned char ndigi, i;
+	ax25_cb *ax25;
+	int err = 0;
+
+	lock_sock(sk);
+	ax25 = ax25_sk(sk);
+
+	if (peer != 0) {
+		if (sk->sk_state != TCP_ESTABLISHED) {
+			err = -ENOTCONN;
+			goto out;
+		}
+
+		fsa->fsa_ax25.sax25_family = AF_AX25;
+		fsa->fsa_ax25.sax25_call   = ax25->dest_addr;
+		fsa->fsa_ax25.sax25_ndigis = 0;
+
+		if (ax25->digipeat != NULL) {
+			ndigi = ax25->digipeat->ndigi;
+			fsa->fsa_ax25.sax25_ndigis = ndigi;
+			for (i = 0; i < ndigi; i++)
+				fsa->fsa_digipeater[i] =
+						ax25->digipeat->calls[i];
+		}
+	} else {
+		fsa->fsa_ax25.sax25_family = AF_AX25;
+		fsa->fsa_ax25.sax25_call   = ax25->source_addr;
+		fsa->fsa_ax25.sax25_ndigis = 1;
+		if (ax25->ax25_dev != NULL) {
+			memcpy(&fsa->fsa_digipeater[0],
+			       ax25->ax25_dev->dev->dev_addr, AX25_ADDR_LEN);
+		} else {
+			fsa->fsa_digipeater[0] = null_ax25_address;
+		}
+	}
+	*uaddr_len = sizeof (struct full_sockaddr_ax25);
+
+out:
+	release_sock(sk);
+
+	return err;
+}
+
+static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
+			struct msghdr *msg, size_t len)
+{
+	struct sockaddr_ax25 *usax = (struct sockaddr_ax25 *)msg->msg_name;
+	struct sock *sk = sock->sk;
+	struct sockaddr_ax25 sax;
+	struct sk_buff *skb;
+	ax25_digi dtmp, *dp;
+	unsigned char *asmptr;
+	ax25_cb *ax25;
+	size_t size;
+	int lv, err, addr_len = msg->msg_namelen;
+
+	if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_CMSG_COMPAT))
+		return -EINVAL;
+
+	lock_sock(sk);
+	ax25 = ax25_sk(sk);
+
+	if (sock_flag(sk, SOCK_ZAPPED)) {
+		err = -EADDRNOTAVAIL;
+		goto out;
+	}
+
+	if (sk->sk_shutdown & SEND_SHUTDOWN) {
+		send_sig(SIGPIPE, current, 0);
+		err = -EPIPE;
+		goto out;
+	}
+
+	if (ax25->ax25_dev == NULL) {
+		err = -ENETUNREACH;
+		goto out;
+	}
+
+	if (len > ax25->ax25_dev->dev->mtu) {
+		err = -EMSGSIZE;
+		goto out;
+	}
+		
+	if (usax != NULL) {
+		if (usax->sax25_family != AF_AX25) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		if (addr_len == sizeof(struct sockaddr_ax25)) {
+			printk(KERN_WARNING "ax25_sendmsg(): %s uses obsolete socket structure\n",
+				current->comm);
+		}
+		else if (addr_len != sizeof(struct full_sockaddr_ax25)) {
+			/* support for old structure may go away some time */
+			if ((addr_len < sizeof(struct sockaddr_ax25) + sizeof(ax25_address) * 6) ||
+		    	    (addr_len > sizeof(struct full_sockaddr_ax25))) {
+		    		err = -EINVAL;
+				goto out;
+			}
+
+			printk(KERN_WARNING "ax25_sendmsg(): %s uses old (6 digipeater) socket structure.\n",
+				current->comm);
+		}
+
+		if (addr_len > sizeof(struct sockaddr_ax25) && usax->sax25_ndigis != 0) {
+			int ct           = 0;
+			struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)usax;
+
+			/* Valid number of digipeaters ? */
+			if (usax->sax25_ndigis < 1 || usax->sax25_ndigis > AX25_MAX_DIGIS) {
+				err = -EINVAL;
+				goto out;
+			}
+
+			dtmp.ndigi      = usax->sax25_ndigis;
+
+			while (ct < usax->sax25_ndigis) {
+				dtmp.repeated[ct] = 0;
+				dtmp.calls[ct]    = fsa->fsa_digipeater[ct];
+				ct++;
+			}
+
+			dtmp.lastrepeat = 0;
+		}
+
+		sax = *usax;
+		if (sk->sk_type == SOCK_SEQPACKET &&
+		    ax25cmp(&ax25->dest_addr, &sax.sax25_call)) {
+			err = -EISCONN;
+			goto out;
+		}
+		if (usax->sax25_ndigis == 0)
+			dp = NULL;
+		else
+			dp = &dtmp;
+	} else {
+		/*
+		 *	FIXME: 1003.1g - if the socket is like this because
+		 *	it has become closed (not started closed) and is VC
+		 *	we ought to SIGPIPE, EPIPE
+		 */
+		if (sk->sk_state != TCP_ESTABLISHED) {
+			err = -ENOTCONN;
+			goto out;
+		}
+		sax.sax25_family = AF_AX25;
+		sax.sax25_call   = ax25->dest_addr;
+		dp = ax25->digipeat;
+	}
+
+	SOCK_DEBUG(sk, "AX.25: sendto: Addresses built.\n");
+
+	/* Build a packet */
+	SOCK_DEBUG(sk, "AX.25: sendto: building packet.\n");
+
+	/* Assume the worst case */
+	size = len + ax25->ax25_dev->dev->hard_header_len;
+
+	skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT, &err);
+	if (skb == NULL)
+		goto out;
+
+	skb_reserve(skb, size - len);
+
+	SOCK_DEBUG(sk, "AX.25: Appending user data\n");
+
+	/* User data follows immediately after the AX.25 data */
+	if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
+		err = -EFAULT;
+		kfree_skb(skb);
+		goto out;
+	}
+
+	skb->nh.raw = skb->data;
+
+	/* Add the PID if one is not supplied by the user in the skb */
+	if (!ax25->pidincl) {
+		asmptr  = skb_push(skb, 1);
+		*asmptr = sk->sk_protocol;
+	}
+
+	SOCK_DEBUG(sk, "AX.25: Transmitting buffer\n");
+
+	if (sk->sk_type == SOCK_SEQPACKET) {
+		/* Connected mode sockets go via the LAPB machine */
+		if (sk->sk_state != TCP_ESTABLISHED) {
+			kfree_skb(skb);
+			err = -ENOTCONN;
+			goto out;
+		}
+
+		/* Shove it onto the queue and kick */
+		ax25_output(ax25, ax25->paclen, skb);
+
+		err = len;
+		goto out;
+	}
+
+	asmptr = skb_push(skb, 1 + ax25_addr_size(dp));
+
+	SOCK_DEBUG(sk, "Building AX.25 Header (dp=%p).\n", dp);
+
+	if (dp != NULL)
+		SOCK_DEBUG(sk, "Num digipeaters=%d\n", dp->ndigi);
+
+	/* Build an AX.25 header */
+	asmptr += (lv = ax25_addr_build(asmptr, &ax25->source_addr,
+					&sax.sax25_call, dp,
+					AX25_COMMAND, AX25_MODULUS));
+
+	SOCK_DEBUG(sk, "Built header (%d bytes)\n",lv);
+
+	skb->h.raw = asmptr;
+
+	SOCK_DEBUG(sk, "base=%p pos=%p\n", skb->data, asmptr);
+
+	*asmptr = AX25_UI;
+
+	/* Datagram frames go straight out of the door as UI */
+	skb->dev = ax25->ax25_dev->dev;
+
+	ax25_queue_xmit(skb);
+
+	err = len;
+
+out:
+	release_sock(sk);
+
+	return err;
+}
+
+static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock,
+	struct msghdr *msg, size_t size, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct sk_buff *skb;
+	int copied;
+	int err = 0;
+
+	lock_sock(sk);
+	/*
+	 * 	This works for seqpacket too. The receiver has ordered the
+	 *	queue for us! We do one quick check first though
+	 */
+	if (sk->sk_type == SOCK_SEQPACKET && sk->sk_state != TCP_ESTABLISHED) {
+		err =  -ENOTCONN;
+		goto out;
+	}
+
+	/* Now we can treat all alike */
+	skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
+	                        flags & MSG_DONTWAIT, &err);
+	if (skb == NULL)
+		goto out;
+
+	if (!ax25_sk(sk)->pidincl)
+		skb_pull(skb, 1);		/* Remove PID */
+
+	skb->h.raw = skb->data;
+	copied     = skb->len;
+
+	if (copied > size) {
+		copied = size;
+		msg->msg_flags |= MSG_TRUNC;
+	}
+
+	skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+
+	if (msg->msg_namelen != 0) {
+		struct sockaddr_ax25 *sax = (struct sockaddr_ax25 *)msg->msg_name;
+		ax25_digi digi;
+		ax25_address src;
+
+		ax25_addr_parse(skb->mac.raw+1, skb->data-skb->mac.raw-1, &src, NULL, &digi, NULL, NULL);
+
+		sax->sax25_family = AF_AX25;
+		/* We set this correctly, even though we may not let the
+		   application know the digi calls further down (because it
+		   did NOT ask to know them).  This could get political... **/
+		sax->sax25_ndigis = digi.ndigi;
+		sax->sax25_call   = src;
+
+		if (sax->sax25_ndigis != 0) {
+			int ct;
+			struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)sax;
+
+			for (ct = 0; ct < digi.ndigi; ct++)
+				fsa->fsa_digipeater[ct] = digi.calls[ct];
+		}
+		msg->msg_namelen = sizeof(struct full_sockaddr_ax25);
+	}
+
+	skb_free_datagram(sk, skb);
+	err = copied;
+
+out:
+	release_sock(sk);
+
+	return err;
+}
+
+static int ax25_shutdown(struct socket *sk, int how)
+{
+	/* FIXME - generate DM and RNR states */
+	return -EOPNOTSUPP;
+}
+
+static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	struct sock *sk = sock->sk;
+	void __user *argp = (void __user *)arg;
+	int res = 0;
+
+	lock_sock(sk);
+	switch (cmd) {
+	case TIOCOUTQ: {
+		long amount;
+		amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc);
+		if (amount < 0)
+			amount = 0;
+		res = put_user(amount, (int __user *)argp);
+		break;
+	}
+
+	case TIOCINQ: {
+		struct sk_buff *skb;
+		long amount = 0L;
+		/* These two are safe on a single CPU system as only user tasks fiddle here */
+		if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL)
+			amount = skb->len;
+		res = put_user(amount, (int __user *)argp);
+		break;
+	}
+
+	case SIOCGSTAMP:
+		if (sk != NULL) {
+			res = sock_get_timestamp(sk, argp);
+			break;
+	 	}
+		res = -EINVAL;
+		break;
+
+	case SIOCAX25ADDUID:	/* Add a uid to the uid/call map table */
+	case SIOCAX25DELUID:	/* Delete a uid from the uid/call map table */
+	case SIOCAX25GETUID: {
+		struct sockaddr_ax25 sax25;
+		if (copy_from_user(&sax25, argp, sizeof(sax25))) {
+			res = -EFAULT;
+			break;
+		}
+		res = ax25_uid_ioctl(cmd, &sax25);
+		break;
+	}
+
+	case SIOCAX25NOUID: {	/* Set the default policy (default/bar) */
+		long amount;
+		if (!capable(CAP_NET_ADMIN)) {
+			res = -EPERM;
+			break;
+		}
+		if (get_user(amount, (long __user *)argp)) {
+			res = -EFAULT;
+			break;
+		}
+		if (amount > AX25_NOUID_BLOCK) {
+			res = -EINVAL;
+			break;
+		}
+		ax25_uid_policy = amount;
+		res = 0;
+		break;
+	}
+
+	case SIOCADDRT:
+	case SIOCDELRT:
+	case SIOCAX25OPTRT:
+		if (!capable(CAP_NET_ADMIN)) {
+			res = -EPERM;
+			break;
+		}
+		res = ax25_rt_ioctl(cmd, argp);
+		break;
+
+	case SIOCAX25CTLCON:
+		if (!capable(CAP_NET_ADMIN)) {
+			res = -EPERM;
+			break;
+		}
+		res = ax25_ctl_ioctl(cmd, argp);
+		break;
+
+	case SIOCAX25GETINFO:
+	case SIOCAX25GETINFOOLD: {
+		ax25_cb *ax25 = ax25_sk(sk);
+		struct ax25_info_struct ax25_info;
+
+		ax25_info.t1        = ax25->t1   / HZ;
+		ax25_info.t2        = ax25->t2   / HZ;
+		ax25_info.t3        = ax25->t3   / HZ;
+		ax25_info.idle      = ax25->idle / (60 * HZ);
+		ax25_info.n2        = ax25->n2;
+		ax25_info.t1timer   = ax25_display_timer(&ax25->t1timer)   / HZ;
+		ax25_info.t2timer   = ax25_display_timer(&ax25->t2timer)   / HZ;
+		ax25_info.t3timer   = ax25_display_timer(&ax25->t3timer)   / HZ;
+		ax25_info.idletimer = ax25_display_timer(&ax25->idletimer) / (60 * HZ);
+		ax25_info.n2count   = ax25->n2count;
+		ax25_info.state     = ax25->state;
+		ax25_info.rcv_q     = atomic_read(&sk->sk_rmem_alloc);
+		ax25_info.snd_q     = atomic_read(&sk->sk_wmem_alloc);
+		ax25_info.vs        = ax25->vs;
+		ax25_info.vr        = ax25->vr;
+		ax25_info.va        = ax25->va;
+		ax25_info.vs_max    = ax25->vs; /* reserved */
+		ax25_info.paclen    = ax25->paclen;
+		ax25_info.window    = ax25->window;
+
+		/* old structure? */
+		if (cmd == SIOCAX25GETINFOOLD) {
+			static int warned = 0;
+			if (!warned) {
+				printk(KERN_INFO "%s uses old SIOCAX25GETINFO\n",
+					current->comm);
+				warned=1;
+			}
+
+			if (copy_to_user(argp, &ax25_info, sizeof(struct ax25_info_struct_deprecated))) {
+				res = -EFAULT;
+				break;
+			}
+		} else {
+			if (copy_to_user(argp, &ax25_info, sizeof(struct ax25_info_struct))) {
+				res = -EINVAL;
+				break;
+			}
+		}
+		res = 0;
+		break;
+	}
+
+	case SIOCAX25ADDFWD:
+	case SIOCAX25DELFWD: {
+		struct ax25_fwd_struct ax25_fwd;
+		if (!capable(CAP_NET_ADMIN)) {
+			res = -EPERM;
+			break;
+		}
+		if (copy_from_user(&ax25_fwd, argp, sizeof(ax25_fwd))) {
+			res = -EFAULT;
+			break;
+		}
+		res = ax25_fwd_ioctl(cmd, &ax25_fwd);
+		break;
+	}
+
+	case SIOCGIFADDR:
+	case SIOCSIFADDR:
+	case SIOCGIFDSTADDR:
+	case SIOCSIFDSTADDR:
+	case SIOCGIFBRDADDR:
+	case SIOCSIFBRDADDR:
+	case SIOCGIFNETMASK:
+	case SIOCSIFNETMASK:
+	case SIOCGIFMETRIC:
+	case SIOCSIFMETRIC:
+		res = -EINVAL;
+		break;
+
+	default:
+		res = dev_ioctl(cmd, argp);
+		break;
+	}
+	release_sock(sk);
+
+	return res;
+}
+
+#ifdef CONFIG_PROC_FS
+
+static void *ax25_info_start(struct seq_file *seq, loff_t *pos)
+{
+	struct ax25_cb *ax25;
+	struct hlist_node *node;
+	int i = 0;
+
+	spin_lock_bh(&ax25_list_lock);
+	ax25_for_each(ax25, node, &ax25_list) {
+		if (i == *pos)
+			return ax25;
+		++i;
+	}
+	return NULL;
+}
+
+static void *ax25_info_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	++*pos;
+
+	return hlist_entry( ((struct ax25_cb *)v)->ax25_node.next,
+			    struct ax25_cb, ax25_node);
+}
+	
+static void ax25_info_stop(struct seq_file *seq, void *v)
+{
+	spin_unlock_bh(&ax25_list_lock);
+}
+
+static int ax25_info_show(struct seq_file *seq, void *v)
+{
+	ax25_cb *ax25 = v;
+	int k;
+
+
+	/*
+	 * New format:
+	 * magic dev src_addr dest_addr,digi1,digi2,.. st vs vr va t1 t1 t2 t2 t3 t3 idle idle n2 n2 rtt window paclen Snd-Q Rcv-Q inode
+	 */
+
+	seq_printf(seq, "%8.8lx %s %s%s ",
+		   (long) ax25,
+		   ax25->ax25_dev == NULL? "???" : ax25->ax25_dev->dev->name,
+		   ax2asc(&ax25->source_addr),
+		   ax25->iamdigi? "*":"");
+	seq_printf(seq, "%s", ax2asc(&ax25->dest_addr));
+
+	for (k=0; (ax25->digipeat != NULL) && (k < ax25->digipeat->ndigi); k++) {
+		seq_printf(seq, ",%s%s",
+			   ax2asc(&ax25->digipeat->calls[k]),
+			   ax25->digipeat->repeated[k]? "*":"");
+	}
+
+	seq_printf(seq, " %d %d %d %d %lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %d %d",
+		   ax25->state,
+		   ax25->vs, ax25->vr, ax25->va,
+		   ax25_display_timer(&ax25->t1timer) / HZ, ax25->t1 / HZ,
+		   ax25_display_timer(&ax25->t2timer) / HZ, ax25->t2 / HZ,
+		   ax25_display_timer(&ax25->t3timer) / HZ, ax25->t3 / HZ,
+		   ax25_display_timer(&ax25->idletimer) / (60 * HZ),
+		   ax25->idle / (60 * HZ),
+		   ax25->n2count, ax25->n2,
+		   ax25->rtt / HZ,
+		   ax25->window,
+		   ax25->paclen);
+
+	if (ax25->sk != NULL) {
+		bh_lock_sock(ax25->sk);
+		seq_printf(seq," %d %d %ld\n",
+			   atomic_read(&ax25->sk->sk_wmem_alloc),
+			   atomic_read(&ax25->sk->sk_rmem_alloc),
+			   ax25->sk->sk_socket != NULL ? SOCK_INODE(ax25->sk->sk_socket)->i_ino : 0L);
+		bh_unlock_sock(ax25->sk);
+	} else {
+		seq_puts(seq, " * * *\n");
+	}
+	return 0;
+}
+
+static struct seq_operations ax25_info_seqops = {
+	.start = ax25_info_start,
+	.next = ax25_info_next,
+	.stop = ax25_info_stop,
+	.show = ax25_info_show,
+};
+
+static int ax25_info_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &ax25_info_seqops);
+}
+
+static struct file_operations ax25_info_fops = {
+	.owner = THIS_MODULE,
+	.open = ax25_info_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+#endif
+
+static struct net_proto_family ax25_family_ops = {
+	.family =	PF_AX25,
+	.create =	ax25_create,
+	.owner	=	THIS_MODULE,
+};
+
+static struct proto_ops ax25_proto_ops = {
+	.family =	PF_AX25,
+	.owner =	THIS_MODULE,
+	.release =	ax25_release,
+	.bind =		ax25_bind,
+	.connect =	ax25_connect,
+	.socketpair =	sock_no_socketpair,
+	.accept =	ax25_accept,
+	.getname =	ax25_getname,
+	.poll =		datagram_poll,
+	.ioctl =	ax25_ioctl,
+	.listen =	ax25_listen,
+	.shutdown =	ax25_shutdown,
+	.setsockopt =	ax25_setsockopt,
+	.getsockopt =	ax25_getsockopt,
+	.sendmsg =	ax25_sendmsg,
+	.recvmsg =	ax25_recvmsg,
+	.mmap =		sock_no_mmap,
+	.sendpage =	sock_no_sendpage,
+};
+
+/*
+ *	Called by socket.c on kernel start up
+ */
+static struct packet_type ax25_packet_type = {
+	.type	=	__constant_htons(ETH_P_AX25),
+	.dev	=	NULL,				/* All devices */
+	.func	=	ax25_kiss_rcv,
+};
+
+static struct notifier_block ax25_dev_notifier = {
+	.notifier_call =ax25_device_event,
+};
+
+EXPORT_SYMBOL(ax25_encapsulate);
+EXPORT_SYMBOL(ax25_rebuild_header);
+EXPORT_SYMBOL(ax25_findbyuid);
+EXPORT_SYMBOL(ax25_find_cb);
+EXPORT_SYMBOL(ax25_linkfail_register);
+EXPORT_SYMBOL(ax25_linkfail_release);
+EXPORT_SYMBOL(ax25_listen_register);
+EXPORT_SYMBOL(ax25_listen_release);
+EXPORT_SYMBOL(ax25_protocol_register);
+EXPORT_SYMBOL(ax25_protocol_release);
+EXPORT_SYMBOL(ax25_send_frame);
+EXPORT_SYMBOL(ax25_uid_policy);
+EXPORT_SYMBOL(ax25cmp);
+EXPORT_SYMBOL(ax2asc);
+EXPORT_SYMBOL(asc2ax);
+EXPORT_SYMBOL(null_ax25_address);
+EXPORT_SYMBOL(ax25_display_timer);
+
+static int __init ax25_init(void)
+{
+	int rc = proto_register(&ax25_proto, 0);
+
+	if (rc != 0)
+		goto out;
+
+	sock_register(&ax25_family_ops);
+	dev_add_pack(&ax25_packet_type);
+	register_netdevice_notifier(&ax25_dev_notifier);
+	ax25_register_sysctl();
+
+	proc_net_fops_create("ax25_route", S_IRUGO, &ax25_route_fops);
+	proc_net_fops_create("ax25", S_IRUGO, &ax25_info_fops);
+	proc_net_fops_create("ax25_calls", S_IRUGO, &ax25_uid_fops);
+out:
+	return rc;
+}
+module_init(ax25_init);
+
+
+MODULE_AUTHOR("Jonathan Naylor G4KLX <g4klx@g4klx.demon.co.uk>");
+MODULE_DESCRIPTION("The amateur radio AX.25 link layer protocol");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETPROTO(PF_AX25);
+
+static void __exit ax25_exit(void)
+{
+	proc_net_remove("ax25_route");
+	proc_net_remove("ax25");
+	proc_net_remove("ax25_calls");
+	ax25_rt_free();
+	ax25_uid_free();
+	ax25_dev_free();
+
+	ax25_unregister_sysctl();
+	unregister_netdevice_notifier(&ax25_dev_notifier);
+
+	dev_remove_pack(&ax25_packet_type);
+
+	sock_unregister(PF_AX25);
+	proto_unregister(&ax25_proto);
+}
+module_exit(ax25_exit);
diff --git a/net/ax25/ax25_addr.c b/net/ax25/ax25_addr.c
new file mode 100644
index 00000000000..f4fa6dfb846
--- /dev/null
+++ b/net/ax25/ax25_addr.c
@@ -0,0 +1,290 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
+ */
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <net/ax25.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+
+/*
+ *	The null address is defined as a callsign of all spaces with an
+ *	SSID of zero.
+ */
+ax25_address null_ax25_address = {{0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x00}};
+
+/*
+ *	ax25 -> ascii conversion
+ */
+char *ax2asc(ax25_address *a)
+{
+	static char buf[11];
+	char c, *s;
+	int n;
+
+	for (n = 0, s = buf; n < 6; n++) {
+		c = (a->ax25_call[n] >> 1) & 0x7F;
+
+		if (c != ' ') *s++ = c;
+	}
+
+	*s++ = '-';
+
+	if ((n = ((a->ax25_call[6] >> 1) & 0x0F)) > 9) {
+		*s++ = '1';
+		n -= 10;
+	}
+
+	*s++ = n + '0';
+	*s++ = '\0';
+
+	if (*buf == '\0' || *buf == '-')
+	   return "*";
+
+	return buf;
+
+}
+
+/*
+ *	ascii -> ax25 conversion
+ */
+ax25_address *asc2ax(char *callsign)
+{
+	static ax25_address addr;
+	char *s;
+	int n;
+
+	for (s = callsign, n = 0; n < 6; n++) {
+		if (*s != '\0' && *s != '-')
+			addr.ax25_call[n] = *s++;
+		else
+			addr.ax25_call[n] = ' ';
+		addr.ax25_call[n] <<= 1;
+		addr.ax25_call[n] &= 0xFE;
+	}
+
+	if (*s++ == '\0') {
+		addr.ax25_call[6] = 0x00;
+		return &addr;
+	}
+
+	addr.ax25_call[6] = *s++ - '0';
+
+	if (*s != '\0') {
+		addr.ax25_call[6] *= 10;
+		addr.ax25_call[6] += *s++ - '0';
+	}
+
+	addr.ax25_call[6] <<= 1;
+	addr.ax25_call[6] &= 0x1E;
+
+	return &addr;
+}
+
+/*
+ *	Compare two ax.25 addresses
+ */
+int ax25cmp(ax25_address *a, ax25_address *b)
+{
+	int ct = 0;
+
+	while (ct < 6) {
+		if ((a->ax25_call[ct] & 0xFE) != (b->ax25_call[ct] & 0xFE))	/* Clean off repeater bits */
+			return 1;
+		ct++;
+	}
+
+ 	if ((a->ax25_call[ct] & 0x1E) == (b->ax25_call[ct] & 0x1E))	/* SSID without control bit */
+ 		return 0;
+
+ 	return 2;			/* Partial match */
+}
+
+/*
+ *	Compare two AX.25 digipeater paths.
+ */
+int ax25digicmp(ax25_digi *digi1, ax25_digi *digi2)
+{
+	int i;
+
+	if (digi1->ndigi != digi2->ndigi)
+		return 1;
+
+	if (digi1->lastrepeat != digi2->lastrepeat)
+		return 1;
+
+	for (i = 0; i < digi1->ndigi; i++)
+		if (ax25cmp(&digi1->calls[i], &digi2->calls[i]) != 0)
+			return 1;
+
+	return 0;
+}
+
+/*
+ *	Given an AX.25 address pull of to, from, digi list, command/response and the start of data
+ *
+ */
+unsigned char *ax25_addr_parse(unsigned char *buf, int len, ax25_address *src, ax25_address *dest, ax25_digi *digi, int *flags, int *dama)
+{
+	int d = 0;
+
+	if (len < 14) return NULL;
+
+	if (flags != NULL) {
+		*flags = 0;
+
+		if (buf[6] & AX25_CBIT)
+			*flags = AX25_COMMAND;
+		if (buf[13] & AX25_CBIT)
+			*flags = AX25_RESPONSE;
+	}
+
+	if (dama != NULL)
+		*dama = ~buf[13] & AX25_DAMA_FLAG;
+
+	/* Copy to, from */
+	if (dest != NULL)
+		memcpy(dest, buf + 0, AX25_ADDR_LEN);
+	if (src != NULL)
+		memcpy(src,  buf + 7, AX25_ADDR_LEN);
+
+	buf += 2 * AX25_ADDR_LEN;
+	len -= 2 * AX25_ADDR_LEN;
+
+	digi->lastrepeat = -1;
+	digi->ndigi      = 0;
+
+	while (!(buf[-1] & AX25_EBIT)) {
+		if (d >= AX25_MAX_DIGIS)  return NULL;	/* Max of 6 digis */
+		if (len < 7) return NULL;	/* Short packet */
+
+		memcpy(&digi->calls[d], buf, AX25_ADDR_LEN);
+		digi->ndigi = d + 1;
+
+		if (buf[6] & AX25_HBIT) {
+			digi->repeated[d] = 1;
+			digi->lastrepeat  = d;
+		} else {
+			digi->repeated[d] = 0;
+		}
+
+		buf += AX25_ADDR_LEN;
+		len -= AX25_ADDR_LEN;
+		d++;
+	}
+
+	return buf;
+}
+
+/*
+ *	Assemble an AX.25 header from the bits
+ */
+int ax25_addr_build(unsigned char *buf, ax25_address *src, ax25_address *dest, ax25_digi *d, int flag, int modulus)
+{
+	int len = 0;
+	int ct  = 0;
+
+	memcpy(buf, dest, AX25_ADDR_LEN);
+	buf[6] &= ~(AX25_EBIT | AX25_CBIT);
+	buf[6] |= AX25_SSSID_SPARE;
+
+	if (flag == AX25_COMMAND) buf[6] |= AX25_CBIT;
+
+	buf += AX25_ADDR_LEN;
+	len += AX25_ADDR_LEN;
+
+	memcpy(buf, src, AX25_ADDR_LEN);
+	buf[6] &= ~(AX25_EBIT | AX25_CBIT);
+	buf[6] &= ~AX25_SSSID_SPARE;
+
+	if (modulus == AX25_MODULUS)
+		buf[6] |= AX25_SSSID_SPARE;
+	else
+		buf[6] |= AX25_ESSID_SPARE;
+
+	if (flag == AX25_RESPONSE) buf[6] |= AX25_CBIT;
+
+	/*
+	 *	Fast path the normal digiless path
+	 */
+	if (d == NULL || d->ndigi == 0) {
+		buf[6] |= AX25_EBIT;
+		return 2 * AX25_ADDR_LEN;
+	}
+
+	buf += AX25_ADDR_LEN;
+	len += AX25_ADDR_LEN;
+
+	while (ct < d->ndigi) {
+		memcpy(buf, &d->calls[ct], AX25_ADDR_LEN);
+
+		if (d->repeated[ct])
+			buf[6] |= AX25_HBIT;
+		else
+			buf[6] &= ~AX25_HBIT;
+
+		buf[6] &= ~AX25_EBIT;
+		buf[6] |= AX25_SSSID_SPARE;
+
+		buf += AX25_ADDR_LEN;
+		len += AX25_ADDR_LEN;
+		ct++;
+	}
+
+	buf[-1] |= AX25_EBIT;
+
+	return len;
+}
+
+int ax25_addr_size(ax25_digi *dp)
+{
+	if (dp == NULL)
+		return 2 * AX25_ADDR_LEN;
+
+	return AX25_ADDR_LEN * (2 + dp->ndigi);
+}
+
+/*
+ *	Reverse Digipeat List. May not pass both parameters as same struct
+ */
+void ax25_digi_invert(ax25_digi *in, ax25_digi *out)
+{
+	int ct;
+
+	out->ndigi      = in->ndigi;
+	out->lastrepeat = in->ndigi - in->lastrepeat - 2;
+
+	/* Invert the digipeaters */
+	for (ct = 0; ct < in->ndigi; ct++) {
+		out->calls[ct] = in->calls[in->ndigi - ct - 1];
+
+		if (ct <= out->lastrepeat) {
+			out->calls[ct].ax25_call[6] |= AX25_HBIT;
+			out->repeated[ct]            = 1;
+		} else {
+			out->calls[ct].ax25_call[6] &= ~AX25_HBIT;
+			out->repeated[ct]            = 0;
+		}
+	}
+}
+
diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
new file mode 100644
index 00000000000..dab77efe34a
--- /dev/null
+++ b/net/ax25/ax25_dev.c
@@ -0,0 +1,208 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
+ */
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/spinlock.h>
+#include <net/ax25.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+
+ax25_dev *ax25_dev_list;
+DEFINE_SPINLOCK(ax25_dev_lock);
+
+ax25_dev *ax25_addr_ax25dev(ax25_address *addr)
+{
+	ax25_dev *ax25_dev, *res = NULL;
+
+	spin_lock_bh(&ax25_dev_lock);
+	for (ax25_dev = ax25_dev_list; ax25_dev != NULL; ax25_dev = ax25_dev->next)
+		if (ax25cmp(addr, (ax25_address *)ax25_dev->dev->dev_addr) == 0) {
+			res = ax25_dev;
+		}
+	spin_unlock_bh(&ax25_dev_lock);
+
+	return res;
+}
+
+/*
+ *	This is called when an interface is brought up. These are
+ *	reasonable defaults.
+ */
+void ax25_dev_device_up(struct net_device *dev)
+{
+	ax25_dev *ax25_dev;
+
+	if ((ax25_dev = kmalloc(sizeof(*ax25_dev), GFP_ATOMIC)) == NULL) {
+		printk(KERN_ERR "AX.25: ax25_dev_device_up - out of memory\n");
+		return;
+	}
+
+	ax25_unregister_sysctl();
+
+	memset(ax25_dev, 0x00, sizeof(*ax25_dev));
+
+	dev->ax25_ptr     = ax25_dev;
+	ax25_dev->dev     = dev;
+	dev_hold(dev);
+	ax25_dev->forward = NULL;
+
+	ax25_dev->values[AX25_VALUES_IPDEFMODE] = AX25_DEF_IPDEFMODE;
+	ax25_dev->values[AX25_VALUES_AXDEFMODE] = AX25_DEF_AXDEFMODE;
+	ax25_dev->values[AX25_VALUES_BACKOFF]   = AX25_DEF_BACKOFF;
+	ax25_dev->values[AX25_VALUES_CONMODE]   = AX25_DEF_CONMODE;
+	ax25_dev->values[AX25_VALUES_WINDOW]    = AX25_DEF_WINDOW;
+	ax25_dev->values[AX25_VALUES_EWINDOW]   = AX25_DEF_EWINDOW;
+	ax25_dev->values[AX25_VALUES_T1]        = AX25_DEF_T1;
+	ax25_dev->values[AX25_VALUES_T2]        = AX25_DEF_T2;
+	ax25_dev->values[AX25_VALUES_T3]        = AX25_DEF_T3;
+	ax25_dev->values[AX25_VALUES_IDLE]	= AX25_DEF_IDLE;
+	ax25_dev->values[AX25_VALUES_N2]        = AX25_DEF_N2;
+	ax25_dev->values[AX25_VALUES_PACLEN]	= AX25_DEF_PACLEN;
+	ax25_dev->values[AX25_VALUES_PROTOCOL]  = AX25_DEF_PROTOCOL;
+	ax25_dev->values[AX25_VALUES_DS_TIMEOUT]= AX25_DEF_DS_TIMEOUT;
+
+#if defined(CONFIG_AX25_DAMA_SLAVE) || defined(CONFIG_AX25_DAMA_MASTER)
+	init_timer(&ax25_dev->dama.slave_timer);
+#endif
+
+	spin_lock_bh(&ax25_dev_lock);
+	ax25_dev->next = ax25_dev_list;
+	ax25_dev_list  = ax25_dev;
+	spin_unlock_bh(&ax25_dev_lock);
+
+	ax25_register_sysctl();
+}
+
+void ax25_dev_device_down(struct net_device *dev)
+{
+	ax25_dev *s, *ax25_dev;
+
+	if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL)
+		return;
+
+	ax25_unregister_sysctl();
+
+	spin_lock_bh(&ax25_dev_lock);
+
+#ifdef CONFIG_AX25_DAMA_SLAVE
+	ax25_ds_del_timer(ax25_dev);
+#endif
+
+	/*
+	 *	Remove any packet forwarding that points to this device.
+	 */
+	for (s = ax25_dev_list; s != NULL; s = s->next)
+		if (s->forward == dev)
+			s->forward = NULL;
+
+	if ((s = ax25_dev_list) == ax25_dev) {
+		ax25_dev_list = s->next;
+		spin_unlock_bh(&ax25_dev_lock);
+		dev_put(dev);
+		kfree(ax25_dev);
+		ax25_register_sysctl();
+		return;
+	}
+
+	while (s != NULL && s->next != NULL) {
+		if (s->next == ax25_dev) {
+			s->next = ax25_dev->next;
+			spin_unlock_bh(&ax25_dev_lock);
+			dev_put(dev);
+			kfree(ax25_dev);
+			ax25_register_sysctl();
+			return;
+		}
+
+		s = s->next;
+	}
+	spin_unlock_bh(&ax25_dev_lock);
+	dev->ax25_ptr = NULL;
+
+	ax25_register_sysctl();
+}
+
+int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd)
+{
+	ax25_dev *ax25_dev, *fwd_dev;
+
+	if ((ax25_dev = ax25_addr_ax25dev(&fwd->port_from)) == NULL)
+		return -EINVAL;
+
+	switch (cmd) {
+	case SIOCAX25ADDFWD:
+		if ((fwd_dev = ax25_addr_ax25dev(&fwd->port_to)) == NULL)
+			return -EINVAL;
+		if (ax25_dev->forward != NULL)
+			return -EINVAL;
+		ax25_dev->forward = fwd_dev->dev;
+		break;
+
+	case SIOCAX25DELFWD:
+		if (ax25_dev->forward == NULL)
+			return -EINVAL;
+		ax25_dev->forward = NULL;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+struct net_device *ax25_fwd_dev(struct net_device *dev)
+{
+	ax25_dev *ax25_dev;
+
+	if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL)
+		return dev;
+
+	if (ax25_dev->forward == NULL)
+		return dev;
+
+	return ax25_dev->forward;
+}
+
+/*
+ *	Free all memory associated with device structures.
+ */
+void __exit ax25_dev_free(void)
+{
+	ax25_dev *s, *ax25_dev;
+
+	spin_lock_bh(&ax25_dev_lock);
+	ax25_dev = ax25_dev_list;
+	while (ax25_dev != NULL) {
+		s        = ax25_dev;
+		dev_put(ax25_dev->dev);
+		ax25_dev = ax25_dev->next;
+		kfree(s);
+	}
+	ax25_dev_list = NULL;
+	spin_unlock_bh(&ax25_dev_lock);
+}
diff --git a/net/ax25/ax25_ds_in.c b/net/ax25/ax25_ds_in.c
new file mode 100644
index 00000000000..8adc0022cf5
--- /dev/null
+++ b/net/ax25/ax25_ds_in.c
@@ -0,0 +1,305 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
+ * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de)
+ */
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <net/ax25.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/ip.h>			/* For ip_rcv */
+#include <net/tcp.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+
+/*
+ *	State machine for state 1, Awaiting Connection State.
+ *	The handling of the timer(s) is in file ax25_ds_timer.c.
+ *	Handling of state 0 and connection release is in ax25.c.
+ */
+static int ax25_ds_state1_machine(ax25_cb *ax25, struct sk_buff *skb, int frametype, int pf, int type)
+{
+	switch (frametype) {
+	case AX25_SABM:
+		ax25->modulus = AX25_MODULUS;
+		ax25->window  = ax25->ax25_dev->values[AX25_VALUES_WINDOW];
+		ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE);
+		break;
+
+	case AX25_SABME:
+		ax25->modulus = AX25_EMODULUS;
+		ax25->window  =  ax25->ax25_dev->values[AX25_VALUES_EWINDOW];
+		ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE);
+		break;
+
+	case AX25_DISC:
+		ax25_send_control(ax25, AX25_DM, pf, AX25_RESPONSE);
+		break;
+
+	case AX25_UA:
+		ax25_calculate_rtt(ax25);
+		ax25_stop_t1timer(ax25);
+		ax25_start_t3timer(ax25);
+		ax25_start_idletimer(ax25);
+		ax25->vs      = 0;
+		ax25->va      = 0;
+		ax25->vr      = 0;
+		ax25->state   = AX25_STATE_3;
+		ax25->n2count = 0;
+		if (ax25->sk != NULL) {
+			bh_lock_sock(ax25->sk);
+			ax25->sk->sk_state = TCP_ESTABLISHED;
+			/*
+			 * For WAIT_SABM connections we will produce an accept
+			 * ready socket here
+			 */
+			if (!sock_flag(ax25->sk, SOCK_DEAD))
+				ax25->sk->sk_state_change(ax25->sk);
+			bh_unlock_sock(ax25->sk);
+		}
+		ax25_dama_on(ax25);
+
+		/* according to DK4EG�s spec we are required to
+		 * send a RR RESPONSE FINAL NR=0.
+		 */
+
+		ax25_std_enquiry_response(ax25);
+		break;
+
+	case AX25_DM:
+		if (pf)
+			ax25_disconnect(ax25, ECONNREFUSED);
+		break;
+
+	default:
+		if (pf)
+			ax25_send_control(ax25, AX25_SABM, AX25_POLLON, AX25_COMMAND);
+		break;
+	}
+
+	return 0;
+}
+
+/*
+ *	State machine for state 2, Awaiting Release State.
+ *	The handling of the timer(s) is in file ax25_ds_timer.c
+ *	Handling of state 0 and connection release is in ax25.c.
+ */
+static int ax25_ds_state2_machine(ax25_cb *ax25, struct sk_buff *skb, int frametype, int pf, int type)
+{
+	switch (frametype) {
+	case AX25_SABM:
+	case AX25_SABME:
+		ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND);
+		ax25_dama_off(ax25);
+		break;
+
+	case AX25_DISC:
+		ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE);
+		ax25_dama_off(ax25);
+		ax25_disconnect(ax25, 0);
+		break;
+
+	case AX25_DM:
+	case AX25_UA:
+		if (pf) {
+			ax25_dama_off(ax25);
+			ax25_disconnect(ax25, 0);
+		}
+		break;
+
+	case AX25_I:
+	case AX25_REJ:
+	case AX25_RNR:
+	case AX25_RR:
+		if (pf) {
+			ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND);
+			ax25_dama_off(ax25);
+		}
+		break;
+
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+/*
+ *	State machine for state 3, Connected State.
+ *	The handling of the timer(s) is in file ax25_timer.c
+ *	Handling of state 0 and connection release is in ax25.c.
+ */
+static int ax25_ds_state3_machine(ax25_cb *ax25, struct sk_buff *skb, int frametype, int ns, int nr, int pf, int type)
+{
+	int queued = 0;
+
+	switch (frametype) {
+	case AX25_SABM:
+	case AX25_SABME:
+		if (frametype == AX25_SABM) {
+			ax25->modulus   = AX25_MODULUS;
+			ax25->window    = ax25->ax25_dev->values[AX25_VALUES_WINDOW];
+		} else {
+			ax25->modulus   = AX25_EMODULUS;
+			ax25->window    = ax25->ax25_dev->values[AX25_VALUES_EWINDOW];
+		}
+		ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE);
+		ax25_stop_t1timer(ax25);
+		ax25_start_t3timer(ax25);
+		ax25_start_idletimer(ax25);
+		ax25->condition = 0x00;
+		ax25->vs        = 0;
+		ax25->va        = 0;
+		ax25->vr        = 0;
+		ax25_requeue_frames(ax25);
+		ax25_dama_on(ax25);
+		break;
+
+	case AX25_DISC:
+		ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE);
+		ax25_dama_off(ax25);
+		ax25_disconnect(ax25, 0);
+		break;
+
+	case AX25_DM:
+		ax25_dama_off(ax25);
+		ax25_disconnect(ax25, ECONNRESET);
+		break;
+
+	case AX25_RR:
+	case AX25_RNR:
+		if (frametype == AX25_RR)
+			ax25->condition &= ~AX25_COND_PEER_RX_BUSY;
+		else
+			ax25->condition |= AX25_COND_PEER_RX_BUSY;
+
+		if (ax25_validate_nr(ax25, nr)) {
+			if (ax25_check_iframes_acked(ax25, nr))
+				ax25->n2count=0;
+			if (type == AX25_COMMAND && pf)
+				ax25_ds_enquiry_response(ax25);
+		} else {
+			ax25_ds_nr_error_recovery(ax25);
+			ax25->state = AX25_STATE_1;
+		}
+		break;
+
+	case AX25_REJ:
+		ax25->condition &= ~AX25_COND_PEER_RX_BUSY;
+
+		if (ax25_validate_nr(ax25, nr)) {
+			if (ax25->va != nr)
+				ax25->n2count=0;
+
+			ax25_frames_acked(ax25, nr);
+			ax25_calculate_rtt(ax25);
+			ax25_stop_t1timer(ax25);
+			ax25_start_t3timer(ax25);
+			ax25_requeue_frames(ax25);
+
+			if (type == AX25_COMMAND && pf)
+				ax25_ds_enquiry_response(ax25);
+		} else {
+			ax25_ds_nr_error_recovery(ax25);
+			ax25->state = AX25_STATE_1;
+		}
+		break;
+
+	case AX25_I:
+		if (!ax25_validate_nr(ax25, nr)) {
+			ax25_ds_nr_error_recovery(ax25);
+			ax25->state = AX25_STATE_1;
+			break;
+		}
+		if (ax25->condition & AX25_COND_PEER_RX_BUSY) {
+			ax25_frames_acked(ax25, nr);
+			ax25->n2count = 0;
+		} else {
+			if (ax25_check_iframes_acked(ax25, nr))
+				ax25->n2count = 0;
+		}
+		if (ax25->condition & AX25_COND_OWN_RX_BUSY) {
+			if (pf) ax25_ds_enquiry_response(ax25);
+			break;
+		}
+		if (ns == ax25->vr) {
+			ax25->vr = (ax25->vr + 1) % ax25->modulus;
+			queued = ax25_rx_iframe(ax25, skb);
+			if (ax25->condition & AX25_COND_OWN_RX_BUSY)
+				ax25->vr = ns;	/* ax25->vr - 1 */
+			ax25->condition &= ~AX25_COND_REJECT;
+			if (pf) {
+				ax25_ds_enquiry_response(ax25);
+			} else {
+				if (!(ax25->condition & AX25_COND_ACK_PENDING)) {
+					ax25->condition |= AX25_COND_ACK_PENDING;
+					ax25_start_t2timer(ax25);
+				}
+			}
+		} else {
+			if (ax25->condition & AX25_COND_REJECT) {
+				if (pf) ax25_ds_enquiry_response(ax25);
+			} else {
+				ax25->condition |= AX25_COND_REJECT;
+				ax25_ds_enquiry_response(ax25);
+				ax25->condition &= ~AX25_COND_ACK_PENDING;
+			}
+		}
+		break;
+
+	case AX25_FRMR:
+	case AX25_ILLEGAL:
+		ax25_ds_establish_data_link(ax25);
+		ax25->state = AX25_STATE_1;
+		break;
+
+	default:
+		break;
+	}
+
+	return queued;
+}
+
+/*
+ *	Higher level upcall for a LAPB frame
+ */
+int ax25_ds_frame_in(ax25_cb *ax25, struct sk_buff *skb, int type)
+{
+	int queued = 0, frametype, ns, nr, pf;
+
+	frametype = ax25_decode(ax25, skb, &ns, &nr, &pf);
+
+	switch (ax25->state) {
+	case AX25_STATE_1:
+		queued = ax25_ds_state1_machine(ax25, skb, frametype, pf, type);
+		break;
+	case AX25_STATE_2:
+		queued = ax25_ds_state2_machine(ax25, skb, frametype, pf, type);
+		break;
+	case AX25_STATE_3:
+		queued = ax25_ds_state3_machine(ax25, skb, frametype, ns, nr, pf, type);
+		break;
+	}
+
+	return queued;
+}
+
diff --git a/net/ax25/ax25_ds_subr.c b/net/ax25/ax25_ds_subr.c
new file mode 100644
index 00000000000..10ffd2beba3
--- /dev/null
+++ b/net/ax25/ax25_ds_subr.c
@@ -0,0 +1,212 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
+ * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de)
+ */
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/spinlock.h>
+#include <linux/net.h>
+#include <net/ax25.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+
+void ax25_ds_nr_error_recovery(ax25_cb *ax25)
+{
+	ax25_ds_establish_data_link(ax25);
+}
+
+/*
+ *	dl1bke 960114: transmit I frames on DAMA poll
+ */
+void ax25_ds_enquiry_response(ax25_cb *ax25)
+{
+	ax25_cb *ax25o;
+	struct hlist_node *node;
+
+	/* Please note that neither DK4EG�s nor DG2FEF�s
+	 * DAMA spec mention the following behaviour as seen
+	 * with TheFirmware:
+	 *
+	 * 	DB0ACH->DL1BKE <RR C P R0> [DAMA]
+	 *	DL1BKE->DB0ACH <I NR=0 NS=0>
+	 *	DL1BKE-7->DB0PRA-6 DB0ACH <I C S3 R5>
+	 *	DL1BKE->DB0ACH <RR R F R0>
+	 *
+	 * The Flexnet DAMA Master implementation apparently
+	 * insists on the "proper" AX.25 behaviour:
+	 *
+	 * 	DB0ACH->DL1BKE <RR C P R0> [DAMA]
+	 *	DL1BKE->DB0ACH <RR R F R0>
+	 *	DL1BKE->DB0ACH <I NR=0 NS=0>
+	 *	DL1BKE-7->DB0PRA-6 DB0ACH <I C S3 R5>
+	 *
+	 * Flexnet refuses to send us *any* I frame if we send
+	 * a REJ in case AX25_COND_REJECT is set. It is superfluous in
+	 * this mode anyway (a RR or RNR invokes the retransmission).
+	 * Is this a Flexnet bug?
+	 */
+
+	ax25_std_enquiry_response(ax25);
+
+	if (!(ax25->condition & AX25_COND_PEER_RX_BUSY)) {
+		ax25_requeue_frames(ax25);
+		ax25_kick(ax25);
+	}
+
+	if (ax25->state == AX25_STATE_1 || ax25->state == AX25_STATE_2 || skb_peek(&ax25->ack_queue) != NULL)
+		ax25_ds_t1_timeout(ax25);
+	else
+		ax25->n2count = 0;
+
+	ax25_start_t3timer(ax25);
+	ax25_ds_set_timer(ax25->ax25_dev);
+
+	spin_lock_bh(&ax25_list_lock);
+	ax25_for_each(ax25o, node, &ax25_list) {
+		if (ax25o == ax25)
+			continue;
+
+		if (ax25o->ax25_dev != ax25->ax25_dev)
+			continue;
+
+		if (ax25o->state == AX25_STATE_1 || ax25o->state == AX25_STATE_2) {
+			ax25_ds_t1_timeout(ax25o);
+			continue;
+		}
+
+		if (!(ax25o->condition & AX25_COND_PEER_RX_BUSY) && ax25o->state == AX25_STATE_3) {
+			ax25_requeue_frames(ax25o);
+			ax25_kick(ax25o);
+		}
+
+		if (ax25o->state == AX25_STATE_1 || ax25o->state == AX25_STATE_2 || skb_peek(&ax25o->ack_queue) != NULL)
+			ax25_ds_t1_timeout(ax25o);
+
+		/* do not start T3 for listening sockets (tnx DD8NE) */
+
+		if (ax25o->state != AX25_STATE_0)
+			ax25_start_t3timer(ax25o);
+	}
+	spin_unlock_bh(&ax25_list_lock);
+}
+
+void ax25_ds_establish_data_link(ax25_cb *ax25)
+{
+	ax25->condition &= AX25_COND_DAMA_MODE;
+	ax25->n2count    = 0;
+	ax25_calculate_t1(ax25);
+	ax25_start_t1timer(ax25);
+	ax25_stop_t2timer(ax25);
+	ax25_start_t3timer(ax25);
+}
+
+/*
+ *	:::FIXME:::
+ *	This is a kludge. Not all drivers recognize kiss commands.
+ *	We need a driver level  request to switch duplex mode, that does
+ *	either SCC changing, PI config or KISS as required. Currently
+ *	this request isn't reliable.
+ */
+static void ax25_kiss_cmd(ax25_dev *ax25_dev, unsigned char cmd, unsigned char param)
+{
+	struct sk_buff *skb;
+	unsigned char *p;
+
+	if (ax25_dev->dev == NULL)
+		return;
+
+	if ((skb = alloc_skb(2, GFP_ATOMIC)) == NULL)
+		return;
+
+	skb->nh.raw = skb->data;
+	p = skb_put(skb, 2);
+
+	*p++ = cmd;
+	*p++ = param;
+
+	skb->dev      = ax25_dev->dev;
+	skb->protocol = htons(ETH_P_AX25);
+
+	dev_queue_xmit(skb);
+}
+
+/*
+ *	A nasty problem arises if we count the number of DAMA connections
+ *	wrong, especially when connections on the device already existed
+ *	and our network node (or the sysop) decides to turn on DAMA Master
+ *	mode. We thus flag the 'real' slave connections with
+ *	ax25->dama_slave=1 and look on every disconnect if still slave
+ *	connections exist.
+ */
+static int ax25_check_dama_slave(ax25_dev *ax25_dev)
+{
+	ax25_cb *ax25;
+	int res = 0;
+	struct hlist_node *node;
+
+	spin_lock_bh(&ax25_list_lock);
+	ax25_for_each(ax25, node, &ax25_list)
+		if (ax25->ax25_dev == ax25_dev && (ax25->condition & AX25_COND_DAMA_MODE) && ax25->state > AX25_STATE_1) {
+			res = 1;
+			break;
+		}
+	spin_unlock_bh(&ax25_list_lock);
+
+	return res;
+}
+
+static void ax25_dev_dama_on(ax25_dev *ax25_dev)
+{
+	if (ax25_dev == NULL)
+		return;
+
+	if (ax25_dev->dama.slave == 0)
+		ax25_kiss_cmd(ax25_dev, 5, 1);
+
+	ax25_dev->dama.slave = 1;
+	ax25_ds_set_timer(ax25_dev);
+}
+
+void ax25_dev_dama_off(ax25_dev *ax25_dev)
+{
+	if (ax25_dev == NULL)
+		return;
+
+	if (ax25_dev->dama.slave && !ax25_check_dama_slave(ax25_dev)) {
+		ax25_kiss_cmd(ax25_dev, 5, 0);
+		ax25_dev->dama.slave = 0;
+		ax25_ds_del_timer(ax25_dev);
+	}
+}
+
+void ax25_dama_on(ax25_cb *ax25)
+{
+	ax25_dev_dama_on(ax25->ax25_dev);
+	ax25->condition |= AX25_COND_DAMA_MODE;
+}
+
+void ax25_dama_off(ax25_cb *ax25)
+{
+	ax25->condition &= ~AX25_COND_DAMA_MODE;
+	ax25_dev_dama_off(ax25->ax25_dev);
+}
+
diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c
new file mode 100644
index 00000000000..3a8b67316fc
--- /dev/null
+++ b/net/ax25/ax25_ds_timer.c
@@ -0,0 +1,241 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
+ * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de)
+ */
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/spinlock.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <net/tcp.h>
+#include <net/ax25.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+
+static void ax25_ds_timeout(unsigned long);
+
+/*
+ *	Add DAMA slave timeout timer to timer list.
+ *	Unlike the connection based timers the timeout function gets
+ *	triggered every second. Please note that NET_AX25_DAMA_SLAVE_TIMEOUT
+ *	(aka /proc/sys/net/ax25/{dev}/dama_slave_timeout) is still in
+ *	1/10th of a second.
+ */
+
+static void ax25_ds_add_timer(ax25_dev *ax25_dev)
+{
+	struct timer_list *t = &ax25_dev->dama.slave_timer;
+	t->data		= (unsigned long) ax25_dev;
+	t->function	= &ax25_ds_timeout;
+	t->expires	= jiffies + HZ;
+	add_timer(t);
+}
+
+void ax25_ds_del_timer(ax25_dev *ax25_dev)
+{
+	if (ax25_dev)
+		del_timer(&ax25_dev->dama.slave_timer);
+}
+
+void ax25_ds_set_timer(ax25_dev *ax25_dev)
+{
+	if (ax25_dev == NULL)		/* paranoia */
+		return;
+
+	del_timer(&ax25_dev->dama.slave_timer);
+	ax25_dev->dama.slave_timeout = ax25_dev->values[AX25_VALUES_DS_TIMEOUT] / 10;
+	ax25_ds_add_timer(ax25_dev);
+}
+
+/*
+ *	DAMA Slave Timeout
+ *	Silently discard all (slave) connections in case our master forgot us...
+ */
+
+static void ax25_ds_timeout(unsigned long arg)
+{
+	ax25_dev *ax25_dev = (struct ax25_dev *) arg;
+	ax25_cb *ax25;
+	struct hlist_node *node;
+
+	if (ax25_dev == NULL || !ax25_dev->dama.slave)
+		return;			/* Yikes! */
+
+	if (!ax25_dev->dama.slave_timeout || --ax25_dev->dama.slave_timeout) {
+		ax25_ds_set_timer(ax25_dev);
+		return;
+	}
+
+	spin_lock_bh(&ax25_list_lock);
+	ax25_for_each(ax25, node, &ax25_list) {
+		if (ax25->ax25_dev != ax25_dev || !(ax25->condition & AX25_COND_DAMA_MODE))
+			continue;
+
+		ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND);
+		ax25_disconnect(ax25, ETIMEDOUT);
+	}
+	spin_unlock_bh(&ax25_list_lock);
+
+	ax25_dev_dama_off(ax25_dev);
+}
+
+void ax25_ds_heartbeat_expiry(ax25_cb *ax25)
+{
+	struct sock *sk=ax25->sk;
+
+	if (sk)
+		bh_lock_sock(sk);
+
+	switch (ax25->state) {
+
+	case AX25_STATE_0:
+		/* Magic here: If we listen() and a new link dies before it
+		   is accepted() it isn't 'dead' so doesn't get removed. */
+		if (!sk || sock_flag(sk, SOCK_DESTROY) ||
+		    (sk->sk_state == TCP_LISTEN &&
+		     sock_flag(sk, SOCK_DEAD))) {
+			if (sk) {
+				sock_hold(sk);
+				ax25_destroy_socket(ax25);
+				sock_put(sk);
+				bh_unlock_sock(sk);
+			} else
+				ax25_destroy_socket(ax25);
+			return;
+		}
+		break;
+
+	case AX25_STATE_3:
+		/*
+		 * Check the state of the receive buffer.
+		 */
+		if (sk != NULL) {
+			if (atomic_read(&sk->sk_rmem_alloc) <
+			    (sk->sk_rcvbuf / 2) &&
+			    (ax25->condition & AX25_COND_OWN_RX_BUSY)) {
+				ax25->condition &= ~AX25_COND_OWN_RX_BUSY;
+				ax25->condition &= ~AX25_COND_ACK_PENDING;
+				break;
+			}
+		}
+		break;
+	}
+
+	if (sk)
+		bh_unlock_sock(sk);
+
+	ax25_start_heartbeat(ax25);
+}
+
+/* dl1bke 960114: T3 works much like the IDLE timeout, but
+ *                gets reloaded with every frame for this
+ *		  connection.
+ */
+void ax25_ds_t3timer_expiry(ax25_cb *ax25)
+{
+	ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND);
+	ax25_dama_off(ax25);
+	ax25_disconnect(ax25, ETIMEDOUT);
+}
+
+/* dl1bke 960228: close the connection when IDLE expires.
+ *		  unlike T3 this timer gets reloaded only on
+ *		  I frames.
+ */
+void ax25_ds_idletimer_expiry(ax25_cb *ax25)
+{
+	ax25_clear_queues(ax25);
+
+	ax25->n2count = 0;
+	ax25->state = AX25_STATE_2;
+
+	ax25_calculate_t1(ax25);
+	ax25_start_t1timer(ax25);
+	ax25_stop_t3timer(ax25);
+
+	if (ax25->sk != NULL) {
+		bh_lock_sock(ax25->sk);
+		ax25->sk->sk_state     = TCP_CLOSE;
+		ax25->sk->sk_err       = 0;
+		ax25->sk->sk_shutdown |= SEND_SHUTDOWN;
+		if (!sock_flag(ax25->sk, SOCK_DEAD)) {
+			ax25->sk->sk_state_change(ax25->sk);
+			sock_set_flag(ax25->sk, SOCK_DEAD);
+		}
+		bh_unlock_sock(ax25->sk);
+	}
+}
+
+/* dl1bke 960114: The DAMA protocol requires to send data and SABM/DISC
+ *                within the poll of any connected channel. Remember
+ *                that we are not allowed to send anything unless we
+ *                get polled by the Master.
+ *
+ *                Thus we'll have to do parts of our T1 handling in
+ *                ax25_enquiry_response().
+ */
+void ax25_ds_t1_timeout(ax25_cb *ax25)
+{
+	switch (ax25->state) {
+	case AX25_STATE_1:
+		if (ax25->n2count == ax25->n2) {
+			if (ax25->modulus == AX25_MODULUS) {
+				ax25_disconnect(ax25, ETIMEDOUT);
+				return;
+			} else {
+				ax25->modulus = AX25_MODULUS;
+				ax25->window  = ax25->ax25_dev->values[AX25_VALUES_WINDOW];
+				ax25->n2count = 0;
+				ax25_send_control(ax25, AX25_SABM, AX25_POLLOFF, AX25_COMMAND);
+			}
+		} else {
+			ax25->n2count++;
+			if (ax25->modulus == AX25_MODULUS)
+				ax25_send_control(ax25, AX25_SABM, AX25_POLLOFF, AX25_COMMAND);
+			else
+				ax25_send_control(ax25, AX25_SABME, AX25_POLLOFF, AX25_COMMAND);
+		}
+		break;
+
+	case AX25_STATE_2:
+		if (ax25->n2count == ax25->n2) {
+			ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND);
+			ax25_disconnect(ax25, ETIMEDOUT);
+			return;
+		} else {
+			ax25->n2count++;
+		}
+		break;
+
+	case AX25_STATE_3:
+		if (ax25->n2count == ax25->n2) {
+			ax25_send_control(ax25, AX25_DM, AX25_POLLON, AX25_RESPONSE);
+			ax25_disconnect(ax25, ETIMEDOUT);
+			return;
+		} else {
+			ax25->n2count++;
+		}
+		break;
+	}
+
+	ax25_calculate_t1(ax25);
+	ax25_start_t1timer(ax25);
+}
diff --git a/net/ax25/ax25_iface.c b/net/ax25/ax25_iface.c
new file mode 100644
index 00000000000..d68aff10072
--- /dev/null
+++ b/net/ax25/ax25_iface.c
@@ -0,0 +1,266 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
+ */
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <net/ax25.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+
+static struct protocol_struct {
+	struct protocol_struct *next;
+	unsigned int pid;
+	int (*func)(struct sk_buff *, ax25_cb *);
+} *protocol_list = NULL;
+static DEFINE_RWLOCK(protocol_list_lock);
+
+static struct linkfail_struct {
+	struct linkfail_struct *next;
+	void (*func)(ax25_cb *, int);
+} *linkfail_list = NULL;
+static DEFINE_SPINLOCK(linkfail_lock);
+
+static struct listen_struct {
+	struct listen_struct *next;
+	ax25_address  callsign;
+	struct net_device *dev;
+} *listen_list = NULL;
+static DEFINE_SPINLOCK(listen_lock);
+
+int ax25_protocol_register(unsigned int pid,
+	int (*func)(struct sk_buff *, ax25_cb *))
+{
+	struct protocol_struct *protocol;
+
+	if (pid == AX25_P_TEXT || pid == AX25_P_SEGMENT)
+		return 0;
+#ifdef CONFIG_INET
+	if (pid == AX25_P_IP || pid == AX25_P_ARP)
+		return 0;
+#endif
+	if ((protocol = kmalloc(sizeof(*protocol), GFP_ATOMIC)) == NULL)
+		return 0;
+
+	protocol->pid  = pid;
+	protocol->func = func;
+
+	write_lock(&protocol_list_lock);
+	protocol->next = protocol_list;
+	protocol_list  = protocol;
+	write_unlock(&protocol_list_lock);
+
+	return 1;
+}
+
+void ax25_protocol_release(unsigned int pid)
+{
+	struct protocol_struct *s, *protocol;
+
+	write_lock(&protocol_list_lock);
+	protocol = protocol_list;
+	if (protocol == NULL) {
+		write_unlock(&protocol_list_lock);
+		return;
+	}
+
+	if (protocol->pid == pid) {
+		protocol_list = protocol->next;
+		write_unlock(&protocol_list_lock);
+		kfree(protocol);
+		return;
+	}
+
+	while (protocol != NULL && protocol->next != NULL) {
+		if (protocol->next->pid == pid) {
+			s = protocol->next;
+			protocol->next = protocol->next->next;
+			write_unlock(&protocol_list_lock);
+			kfree(s);
+			return;
+		}
+
+		protocol = protocol->next;
+	}
+	write_unlock(&protocol_list_lock);
+}
+
+int ax25_linkfail_register(void (*func)(ax25_cb *, int))
+{
+	struct linkfail_struct *linkfail;
+
+	if ((linkfail = kmalloc(sizeof(*linkfail), GFP_ATOMIC)) == NULL)
+		return 0;
+
+	linkfail->func = func;
+
+	spin_lock_bh(&linkfail_lock);
+	linkfail->next = linkfail_list;
+	linkfail_list  = linkfail;
+	spin_unlock_bh(&linkfail_lock);
+
+	return 1;
+}
+
+void ax25_linkfail_release(void (*func)(ax25_cb *, int))
+{
+	struct linkfail_struct *s, *linkfail;
+
+	spin_lock_bh(&linkfail_lock);
+	linkfail = linkfail_list;
+	if (linkfail == NULL) {
+		spin_unlock_bh(&linkfail_lock);
+		return;
+	}
+
+	if (linkfail->func == func) {
+		linkfail_list = linkfail->next;
+		spin_unlock_bh(&linkfail_lock);
+		kfree(linkfail);
+		return;
+	}
+
+	while (linkfail != NULL && linkfail->next != NULL) {
+		if (linkfail->next->func == func) {
+			s = linkfail->next;
+			linkfail->next = linkfail->next->next;
+			spin_unlock_bh(&linkfail_lock);
+			kfree(s);
+			return;
+		}
+
+		linkfail = linkfail->next;
+	}
+	spin_unlock_bh(&linkfail_lock);
+}
+
+int ax25_listen_register(ax25_address *callsign, struct net_device *dev)
+{
+	struct listen_struct *listen;
+
+	if (ax25_listen_mine(callsign, dev))
+		return 0;
+
+	if ((listen = kmalloc(sizeof(*listen), GFP_ATOMIC)) == NULL)
+		return 0;
+
+	listen->callsign = *callsign;
+	listen->dev      = dev;
+
+	spin_lock_bh(&listen_lock);
+	listen->next = listen_list;
+	listen_list  = listen;
+	spin_unlock_bh(&listen_lock);
+
+	return 1;
+}
+
+void ax25_listen_release(ax25_address *callsign, struct net_device *dev)
+{
+	struct listen_struct *s, *listen;
+
+	spin_lock_bh(&listen_lock);
+	listen = listen_list;
+	if (listen == NULL) {
+		spin_unlock_bh(&listen_lock);
+		return;
+	}
+
+	if (ax25cmp(&listen->callsign, callsign) == 0 && listen->dev == dev) {
+		listen_list = listen->next;
+		spin_unlock_bh(&listen_lock);
+		kfree(listen);
+		return;
+	}
+
+	while (listen != NULL && listen->next != NULL) {
+		if (ax25cmp(&listen->next->callsign, callsign) == 0 && listen->next->dev == dev) {
+			s = listen->next;
+			listen->next = listen->next->next;
+			spin_unlock_bh(&listen_lock);
+			kfree(s);
+			return;
+		}
+
+		listen = listen->next;
+	}
+	spin_unlock_bh(&listen_lock);
+}
+
+int (*ax25_protocol_function(unsigned int pid))(struct sk_buff *, ax25_cb *)
+{
+	int (*res)(struct sk_buff *, ax25_cb *) = NULL;
+	struct protocol_struct *protocol;
+
+	read_lock(&protocol_list_lock);
+	for (protocol = protocol_list; protocol != NULL; protocol = protocol->next)
+		if (protocol->pid == pid) {
+			res = protocol->func;
+			break;
+		}
+	read_unlock(&protocol_list_lock);
+
+	return res;
+}
+
+int ax25_listen_mine(ax25_address *callsign, struct net_device *dev)
+{
+	struct listen_struct *listen;
+
+	spin_lock_bh(&listen_lock);
+	for (listen = listen_list; listen != NULL; listen = listen->next)
+		if (ax25cmp(&listen->callsign, callsign) == 0 && (listen->dev == dev || listen->dev == NULL)) {
+			spin_unlock_bh(&listen_lock);
+			return 1;
+	}
+	spin_unlock_bh(&listen_lock);
+
+	return 0;
+}
+
+void ax25_link_failed(ax25_cb *ax25, int reason)
+{
+	struct linkfail_struct *linkfail;
+
+	spin_lock_bh(&linkfail_lock);
+	for (linkfail = linkfail_list; linkfail != NULL; linkfail = linkfail->next)
+		(linkfail->func)(ax25, reason);
+	spin_unlock_bh(&linkfail_lock);
+}
+
+int ax25_protocol_is_registered(unsigned int pid)
+{
+	struct protocol_struct *protocol;
+	int res = 0;
+
+	read_lock(&protocol_list_lock);
+	for (protocol = protocol_list; protocol != NULL; protocol = protocol->next)
+		if (protocol->pid == pid) {
+			res = 1;
+			break;
+		}
+	read_unlock(&protocol_list_lock);
+
+	return res;
+}
diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c
new file mode 100644
index 00000000000..3dc808fde33
--- /dev/null
+++ b/net/ax25/ax25_in.c
@@ -0,0 +1,470 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk)
+ * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
+ * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de)
+ * Copyright (C) Hans-Joachim Hetscher DD8NE (dd8ne@bnv-bamberg.de)
+ */
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <net/ax25.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter.h>
+#include <net/sock.h>
+#include <net/ip.h>			/* For ip_rcv */
+#include <net/tcp.h>
+#include <net/arp.h>			/* For arp_rcv */
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+
+/*
+ *	Given a fragment, queue it on the fragment queue and if the fragment
+ *	is complete, send it back to ax25_rx_iframe.
+ */
+static int ax25_rx_fragment(ax25_cb *ax25, struct sk_buff *skb)
+{
+	struct sk_buff *skbn, *skbo;
+
+	if (ax25->fragno != 0) {
+		if (!(*skb->data & AX25_SEG_FIRST)) {
+			if ((ax25->fragno - 1) == (*skb->data & AX25_SEG_REM)) {
+				/* Enqueue fragment */
+				ax25->fragno = *skb->data & AX25_SEG_REM;
+				skb_pull(skb, 1);	/* skip fragno */
+				ax25->fraglen += skb->len;
+				skb_queue_tail(&ax25->frag_queue, skb);
+
+				/* Last fragment received ? */
+				if (ax25->fragno == 0) {
+					skbn = alloc_skb(AX25_MAX_HEADER_LEN +
+							 ax25->fraglen,
+							 GFP_ATOMIC);
+					if (!skbn) {
+						skb_queue_purge(&ax25->frag_queue);
+						return 1;
+					}
+
+					skb_reserve(skbn, AX25_MAX_HEADER_LEN);
+
+					skbn->dev   = ax25->ax25_dev->dev;
+					skbn->h.raw = skbn->data;
+					skbn->nh.raw = skbn->data;
+
+					/* Copy data from the fragments */
+					while ((skbo = skb_dequeue(&ax25->frag_queue)) != NULL) {
+						memcpy(skb_put(skbn, skbo->len), skbo->data, skbo->len);
+						kfree_skb(skbo);
+					}
+
+					ax25->fraglen = 0;
+
+					if (ax25_rx_iframe(ax25, skbn) == 0)
+						kfree_skb(skbn);
+				}
+
+				return 1;
+			}
+		}
+	} else {
+		/* First fragment received */
+		if (*skb->data & AX25_SEG_FIRST) {
+			skb_queue_purge(&ax25->frag_queue);
+			ax25->fragno = *skb->data & AX25_SEG_REM;
+			skb_pull(skb, 1);		/* skip fragno */
+			ax25->fraglen = skb->len;
+			skb_queue_tail(&ax25->frag_queue, skb);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ *	This is where all valid I frames are sent to, to be dispatched to
+ *	whichever protocol requires them.
+ */
+int ax25_rx_iframe(ax25_cb *ax25, struct sk_buff *skb)
+{
+	int (*func)(struct sk_buff *, ax25_cb *);
+	volatile int queued = 0;
+	unsigned char pid;
+
+	if (skb == NULL) return 0;
+
+	ax25_start_idletimer(ax25);
+
+	pid = *skb->data;
+
+#ifdef CONFIG_INET
+	if (pid == AX25_P_IP) {
+		/* working around a TCP bug to keep additional listeners
+		 * happy. TCP re-uses the buffer and destroys the original
+		 * content.
+		 */
+		struct sk_buff *skbn = skb_copy(skb, GFP_ATOMIC);
+		if (skbn != NULL) {
+			kfree_skb(skb);
+			skb = skbn;
+		}
+
+		skb_pull(skb, 1);	/* Remove PID */
+		skb->h.raw    = skb->data;
+		skb->nh.raw   = skb->data;
+		skb->dev      = ax25->ax25_dev->dev;
+		skb->pkt_type = PACKET_HOST;
+		skb->protocol = htons(ETH_P_IP);
+		ip_rcv(skb, skb->dev, NULL);	/* Wrong ptype */
+		return 1;
+	}
+#endif
+	if (pid == AX25_P_SEGMENT) {
+		skb_pull(skb, 1);	/* Remove PID */
+		return ax25_rx_fragment(ax25, skb);
+	}
+
+	if ((func = ax25_protocol_function(pid)) != NULL) {
+		skb_pull(skb, 1);	/* Remove PID */
+		return (*func)(skb, ax25);
+	}
+
+	if (ax25->sk != NULL && ax25->ax25_dev->values[AX25_VALUES_CONMODE] == 2) {
+		if ((!ax25->pidincl && ax25->sk->sk_protocol == pid) ||
+		    ax25->pidincl) {
+			if (sock_queue_rcv_skb(ax25->sk, skb) == 0)
+				queued = 1;
+			else
+				ax25->condition |= AX25_COND_OWN_RX_BUSY;
+		}
+	}
+
+	return queued;
+}
+
+/*
+ *	Higher level upcall for a LAPB frame
+ */
+static int ax25_process_rx_frame(ax25_cb *ax25, struct sk_buff *skb, int type, int dama)
+{
+	int queued = 0;
+
+	if (ax25->state == AX25_STATE_0)
+		return 0;
+
+	switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
+	case AX25_PROTO_STD_SIMPLEX:
+	case AX25_PROTO_STD_DUPLEX:
+		queued = ax25_std_frame_in(ax25, skb, type);
+		break;
+
+#ifdef CONFIG_AX25_DAMA_SLAVE
+	case AX25_PROTO_DAMA_SLAVE:
+		if (dama || ax25->ax25_dev->dama.slave)
+			queued = ax25_ds_frame_in(ax25, skb, type);
+		else
+			queued = ax25_std_frame_in(ax25, skb, type);
+		break;
+#endif
+	}
+
+	return queued;
+}
+
+static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
+	ax25_address *dev_addr, struct packet_type *ptype)
+{
+	ax25_address src, dest, *next_digi = NULL;
+	int type = 0, mine = 0, dama;
+	struct sock *make, *sk;
+	ax25_digi dp, reverse_dp;
+	ax25_cb *ax25;
+	ax25_dev *ax25_dev;
+
+	/*
+	 *	Process the AX.25/LAPB frame.
+	 */
+
+	skb->h.raw = skb->data;
+
+	if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) {
+		kfree_skb(skb);
+		return 0;
+	}
+
+	/*
+	 *	Parse the address header.
+	 */
+
+	if (ax25_addr_parse(skb->data, skb->len, &src, &dest, &dp, &type, &dama) == NULL) {
+		kfree_skb(skb);
+		return 0;
+	}
+
+	/*
+	 *	Ours perhaps ?
+	 */
+	if (dp.lastrepeat + 1 < dp.ndigi)		/* Not yet digipeated completely */
+		next_digi = &dp.calls[dp.lastrepeat + 1];
+
+	/*
+	 *	Pull of the AX.25 headers leaving the CTRL/PID bytes
+	 */
+	skb_pull(skb, ax25_addr_size(&dp));
+
+	/* For our port addresses ? */
+	if (ax25cmp(&dest, dev_addr) == 0 && dp.lastrepeat + 1 == dp.ndigi)
+		mine = 1;
+
+	/* Also match on any registered callsign from L3/4 */
+	if (!mine && ax25_listen_mine(&dest, dev) && dp.lastrepeat + 1 == dp.ndigi)
+		mine = 1;
+
+	/* UI frame - bypass LAPB processing */
+	if ((*skb->data & ~0x10) == AX25_UI && dp.lastrepeat + 1 == dp.ndigi) {
+		skb->h.raw = skb->data + 2;		/* skip control and pid */
+
+		ax25_send_to_raw(&dest, skb, skb->data[1]);
+
+		if (!mine && ax25cmp(&dest, (ax25_address *)dev->broadcast) != 0) {
+			kfree_skb(skb);
+			return 0;
+		}
+
+		/* Now we are pointing at the pid byte */
+		switch (skb->data[1]) {
+#ifdef CONFIG_INET
+		case AX25_P_IP:
+			skb_pull(skb,2);		/* drop PID/CTRL */
+			skb->h.raw    = skb->data;
+			skb->nh.raw   = skb->data;
+			skb->dev      = dev;
+			skb->pkt_type = PACKET_HOST;
+			skb->protocol = htons(ETH_P_IP);
+			ip_rcv(skb, dev, ptype);	/* Note ptype here is the wrong one, fix me later */
+			break;
+
+		case AX25_P_ARP:
+			skb_pull(skb,2);
+			skb->h.raw    = skb->data;
+			skb->nh.raw   = skb->data;
+			skb->dev      = dev;
+			skb->pkt_type = PACKET_HOST;
+			skb->protocol = htons(ETH_P_ARP);
+			arp_rcv(skb, dev, ptype);	/* Note ptype here is wrong... */
+			break;
+#endif
+		case AX25_P_TEXT:
+			/* Now find a suitable dgram socket */
+			sk = ax25_get_socket(&dest, &src, SOCK_DGRAM);
+			if (sk != NULL) {
+				bh_lock_sock(sk);
+				if (atomic_read(&sk->sk_rmem_alloc) >=
+				    sk->sk_rcvbuf) {
+					kfree_skb(skb);
+				} else {
+					/*
+					 *	Remove the control and PID.
+					 */
+					skb_pull(skb, 2);
+					if (sock_queue_rcv_skb(sk, skb) != 0)
+						kfree_skb(skb);
+				}
+				bh_unlock_sock(sk);
+				sock_put(sk);
+			} else {
+				kfree_skb(skb);
+			}
+			break;
+
+		default:
+			kfree_skb(skb);	/* Will scan SOCK_AX25 RAW sockets */
+			break;
+		}
+
+		return 0;
+	}
+
+	/*
+	 *	Is connected mode supported on this device ?
+	 *	If not, should we DM the incoming frame (except DMs) or
+	 *	silently ignore them. For now we stay quiet.
+	 */
+	if (ax25_dev->values[AX25_VALUES_CONMODE] == 0) {
+		kfree_skb(skb);
+		return 0;
+	}
+
+	/* LAPB */
+
+	/* AX.25 state 1-4 */
+
+	ax25_digi_invert(&dp, &reverse_dp);
+
+	if ((ax25 = ax25_find_cb(&dest, &src, &reverse_dp, dev)) != NULL) {
+		/*
+		 *	Process the frame. If it is queued up internally it
+		 *	returns one otherwise we free it immediately. This
+		 *	routine itself wakes the user context layers so we do
+		 *	no further work
+		 */
+		if (ax25_process_rx_frame(ax25, skb, type, dama) == 0)
+			kfree_skb(skb);
+
+		ax25_cb_put(ax25);
+		return 0;
+	}
+
+	/* AX.25 state 0 (disconnected) */
+
+	/* a) received not a SABM(E) */
+
+	if ((*skb->data & ~AX25_PF) != AX25_SABM &&
+	    (*skb->data & ~AX25_PF) != AX25_SABME) {
+		/*
+		 *	Never reply to a DM. Also ignore any connects for
+		 *	addresses that are not our interfaces and not a socket.
+		 */
+		if ((*skb->data & ~AX25_PF) != AX25_DM && mine)
+			ax25_return_dm(dev, &src, &dest, &dp);
+
+		kfree_skb(skb);
+		return 0;
+	}
+
+	/* b) received SABM(E) */
+
+	if (dp.lastrepeat + 1 == dp.ndigi)
+		sk = ax25_find_listener(&dest, 0, dev, SOCK_SEQPACKET);
+	else
+		sk = ax25_find_listener(next_digi, 1, dev, SOCK_SEQPACKET);
+
+	if (sk != NULL) {
+		bh_lock_sock(sk);
+		if (sk_acceptq_is_full(sk) ||
+		    (make = ax25_make_new(sk, ax25_dev)) == NULL) {
+			if (mine)
+				ax25_return_dm(dev, &src, &dest, &dp);
+			kfree_skb(skb);
+			bh_unlock_sock(sk);
+			sock_put(sk);
+
+			return 0;
+		}
+
+		ax25 = ax25_sk(make);
+		skb_set_owner_r(skb, make);
+		skb_queue_head(&sk->sk_receive_queue, skb);
+
+		make->sk_state = TCP_ESTABLISHED;
+
+		sk->sk_ack_backlog++;
+		bh_unlock_sock(sk);
+	} else {
+		if (!mine) {
+			kfree_skb(skb);
+			return 0;
+		}
+
+		if ((ax25 = ax25_create_cb()) == NULL) {
+			ax25_return_dm(dev, &src, &dest, &dp);
+			kfree_skb(skb);
+			return 0;
+		}
+
+		ax25_fillin_cb(ax25, ax25_dev);
+	}
+
+	ax25->source_addr = dest;
+	ax25->dest_addr   = src;
+
+	/*
+	 *	Sort out any digipeated paths.
+	 */
+	if (dp.ndigi && !ax25->digipeat &&
+	    (ax25->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) {
+		kfree_skb(skb);
+		ax25_destroy_socket(ax25);
+		if (sk)
+			sock_put(sk);
+		return 0;
+	}
+
+	if (dp.ndigi == 0) {
+		if (ax25->digipeat != NULL) {
+			kfree(ax25->digipeat);
+			ax25->digipeat = NULL;
+		}
+	} else {
+		/* Reverse the source SABM's path */
+		memcpy(ax25->digipeat, &reverse_dp, sizeof(ax25_digi));
+	}
+
+	if ((*skb->data & ~AX25_PF) == AX25_SABME) {
+		ax25->modulus = AX25_EMODULUS;
+		ax25->window  = ax25_dev->values[AX25_VALUES_EWINDOW];
+	} else {
+		ax25->modulus = AX25_MODULUS;
+		ax25->window  = ax25_dev->values[AX25_VALUES_WINDOW];
+	}
+
+	ax25_send_control(ax25, AX25_UA, AX25_POLLON, AX25_RESPONSE);
+
+#ifdef CONFIG_AX25_DAMA_SLAVE
+	if (dama && ax25->ax25_dev->values[AX25_VALUES_PROTOCOL] == AX25_PROTO_DAMA_SLAVE)
+		ax25_dama_on(ax25);
+#endif
+
+	ax25->state = AX25_STATE_3;
+
+	ax25_cb_add(ax25);
+
+	ax25_start_heartbeat(ax25);
+	ax25_start_t3timer(ax25);
+	ax25_start_idletimer(ax25);
+
+	if (sk) {
+		if (!sock_flag(sk, SOCK_DEAD))
+			sk->sk_data_ready(sk, skb->len);
+		sock_put(sk);
+	} else
+		kfree_skb(skb);
+
+	return 0;
+}
+
+/*
+ *	Receive an AX.25 frame via a SLIP interface.
+ */
+int ax25_kiss_rcv(struct sk_buff *skb, struct net_device *dev,
+		  struct packet_type *ptype)
+{
+	skb->sk = NULL;		/* Initially we don't know who it's for */
+	skb->destructor = NULL;	/* Who initializes this, dammit?! */
+
+	if ((*skb->data & 0x0F) != 0) {
+		kfree_skb(skb);	/* Not a KISS data frame */
+		return 0;
+	}
+
+	skb_pull(skb, AX25_KISS_HEADER_LEN);	/* Remove the KISS byte */
+
+	return ax25_rcv(skb, dev, (ax25_address *)dev->dev_addr, ptype);
+}
diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
new file mode 100644
index 00000000000..04d711344d5
--- /dev/null
+++ b/net/ax25/ax25_ip.c
@@ -0,0 +1,225 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
+ */
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <net/ax25.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/fcntl.h>
+#include <linux/termios.h>	/* For TIOCINQ/OUTQ */
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+#include <linux/netfilter.h>
+#include <linux/sysctl.h>
+#include <net/ip.h>
+#include <net/arp.h>
+
+/*
+ *	IP over AX.25 encapsulation.
+ */
+
+/*
+ *	Shove an AX.25 UI header on an IP packet and handle ARP
+ */
+
+#ifdef CONFIG_INET
+
+int ax25_encapsulate(struct sk_buff *skb, struct net_device *dev, unsigned short type, void *daddr, void *saddr, unsigned len)
+{
+	unsigned char *buff;
+
+	/* they sometimes come back to us... */
+	if (type == ETH_P_AX25)
+		return 0;
+
+  	/* header is an AX.25 UI frame from us to them */
+ 	buff = skb_push(skb, AX25_HEADER_LEN);
+  	*buff++ = 0x00;	/* KISS DATA */
+
+	if (daddr != NULL)
+		memcpy(buff, daddr, dev->addr_len);	/* Address specified */
+
+  	buff[6] &= ~AX25_CBIT;
+  	buff[6] &= ~AX25_EBIT;
+  	buff[6] |= AX25_SSSID_SPARE;
+  	buff    += AX25_ADDR_LEN;
+
+  	if (saddr != NULL)
+  		memcpy(buff, saddr, dev->addr_len);
+  	else
+  		memcpy(buff, dev->dev_addr, dev->addr_len);
+
+  	buff[6] &= ~AX25_CBIT;
+  	buff[6] |= AX25_EBIT;
+  	buff[6] |= AX25_SSSID_SPARE;
+  	buff    += AX25_ADDR_LEN;
+
+  	*buff++  = AX25_UI;	/* UI */
+
+  	/* Append a suitable AX.25 PID */
+  	switch (type) {
+  	case ETH_P_IP:
+  		*buff++ = AX25_P_IP;
+ 		break;
+  	case ETH_P_ARP:
+  		*buff++ = AX25_P_ARP;
+  		break;
+  	default:
+  		printk(KERN_ERR "AX.25: ax25_encapsulate - wrong protocol type 0x%2.2x\n", type);
+  		*buff++ = 0;
+  		break;
+ 	}
+
+	if (daddr != NULL)
+	  	return AX25_HEADER_LEN;
+
+	return -AX25_HEADER_LEN;	/* Unfinished header */
+}
+
+int ax25_rebuild_header(struct sk_buff *skb)
+{
+	struct sk_buff *ourskb;
+	unsigned char *bp  = skb->data;
+	struct net_device *dev;
+	ax25_address *src, *dst;
+	ax25_dev *ax25_dev;
+	ax25_route _route, *route = &_route;
+	ax25_cb *ax25;
+
+	dst = (ax25_address *)(bp + 1);
+	src = (ax25_address *)(bp + 8);
+
+  	if (arp_find(bp + 1, skb))
+  		return 1;
+
+	route = ax25_rt_find_route(route, dst, NULL);
+	dev      = route->dev;
+
+	if (dev == NULL)
+		dev = skb->dev;
+
+        if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) {
+                goto put;
+	}
+
+	if (bp[16] == AX25_P_IP) {
+		if (route->ip_mode == 'V' || (route->ip_mode == ' ' && ax25_dev->values[AX25_VALUES_IPDEFMODE])) {
+			/*
+			 *	We copy the buffer and release the original thereby
+			 *	keeping it straight
+			 *
+			 *	Note: we report 1 back so the caller will
+			 *	not feed the frame direct to the physical device
+			 *	We don't want that to happen. (It won't be upset
+			 *	as we have pulled the frame from the queue by
+			 *	freeing it).
+			 *
+			 *	NB: TCP modifies buffers that are still
+			 *	on a device queue, thus we use skb_copy()
+			 *      instead of using skb_clone() unless this
+			 *	gets fixed.
+			 */
+
+			ax25_address src_c;
+			ax25_address dst_c;
+
+			if ((ourskb = skb_copy(skb, GFP_ATOMIC)) == NULL) {
+				kfree_skb(skb);
+				goto put;
+			}
+
+			if (skb->sk != NULL)
+				skb_set_owner_w(ourskb, skb->sk);
+
+			kfree_skb(skb);
+			/* dl9sau: bugfix
+			 * after kfree_skb(), dst and src which were pointer
+			 * to bp which is part of skb->data would not be valid
+			 * anymore hope that after skb_pull(ourskb, ..) our
+			 * dsc_c and src_c will not become invalid
+			 */
+			bp  = ourskb->data;
+			dst_c = *(ax25_address *)(bp + 1);
+			src_c = *(ax25_address *)(bp + 8);
+
+			skb_pull(ourskb, AX25_HEADER_LEN - 1);	/* Keep PID */
+			ourskb->nh.raw = ourskb->data;
+
+			ax25=ax25_send_frame(
+			    ourskb, 
+			    ax25_dev->values[AX25_VALUES_PACLEN], 
+			    &src_c,
+			    &dst_c, route->digipeat, dev);
+			if (ax25) {
+				ax25_cb_put(ax25);
+			}
+			goto put;
+		}
+	}
+
+  	bp[7]  &= ~AX25_CBIT;
+  	bp[7]  &= ~AX25_EBIT;
+  	bp[7]  |= AX25_SSSID_SPARE;
+
+  	bp[14] &= ~AX25_CBIT;
+  	bp[14] |= AX25_EBIT;
+  	bp[14] |= AX25_SSSID_SPARE;
+
+	skb_pull(skb, AX25_KISS_HEADER_LEN);
+
+	if (route->digipeat != NULL) {
+		if ((ourskb = ax25_rt_build_path(skb, src, dst, route->digipeat)) == NULL) {
+			kfree_skb(skb);
+			goto put;
+		}
+
+		skb = ourskb;
+	}
+
+	skb->dev      = dev;
+
+	ax25_queue_xmit(skb);
+
+put:
+	ax25_put_route(route);
+
+  	return 1;
+}
+
+#else	/* INET */
+
+int ax25_encapsulate(struct sk_buff *skb, struct net_device *dev, unsigned short type, void *daddr, void *saddr, unsigned len)
+{
+	return -AX25_HEADER_LEN;
+}
+
+int ax25_rebuild_header(struct sk_buff *skb)
+{
+	return 1;
+}
+
+#endif
+
diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c
new file mode 100644
index 00000000000..3475a3ac934
--- /dev/null
+++ b/net/ax25/ax25_out.c
@@ -0,0 +1,383 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk)
+ * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
+ * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de)
+ */
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/spinlock.h>
+#include <linux/net.h>
+#include <net/ax25.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter.h>
+#include <net/sock.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+
+static DEFINE_SPINLOCK(ax25_frag_lock);
+
+ax25_cb *ax25_send_frame(struct sk_buff *skb, int paclen, ax25_address *src, ax25_address *dest, ax25_digi *digi, struct net_device *dev)
+{
+	ax25_dev *ax25_dev;
+	ax25_cb *ax25;
+
+	/*
+	 * Take the default packet length for the device if zero is
+	 * specified.
+	 */
+	if (paclen == 0) {
+		if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL)
+			return NULL;
+
+		paclen = ax25_dev->values[AX25_VALUES_PACLEN];
+	}
+
+	/*
+	 * Look for an existing connection.
+	 */
+	if ((ax25 = ax25_find_cb(src, dest, digi, dev)) != NULL) {
+		ax25_output(ax25, paclen, skb);
+		return ax25;		/* It already existed */
+	}
+
+	if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL)
+		return NULL;
+
+	if ((ax25 = ax25_create_cb()) == NULL)
+		return NULL;
+
+	ax25_fillin_cb(ax25, ax25_dev);
+
+	ax25->source_addr = *src;
+	ax25->dest_addr   = *dest;
+
+	if (digi != NULL) {
+		if ((ax25->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) {
+			ax25_cb_put(ax25);
+			return NULL;
+		}
+		memcpy(ax25->digipeat, digi, sizeof(ax25_digi));
+	}
+
+	switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
+	case AX25_PROTO_STD_SIMPLEX:
+	case AX25_PROTO_STD_DUPLEX:
+		ax25_std_establish_data_link(ax25);
+		break;
+
+#ifdef CONFIG_AX25_DAMA_SLAVE
+	case AX25_PROTO_DAMA_SLAVE:
+		if (ax25_dev->dama.slave)
+			ax25_ds_establish_data_link(ax25);
+		else
+			ax25_std_establish_data_link(ax25);
+		break;
+#endif
+	}
+
+	ax25_cb_add(ax25);
+
+	ax25->state = AX25_STATE_1;
+
+	ax25_start_heartbeat(ax25);
+
+	ax25_output(ax25, paclen, skb);
+
+	return ax25;			/* We had to create it */
+}
+
+/*
+ *	All outgoing AX.25 I frames pass via this routine. Therefore this is
+ *	where the fragmentation of frames takes place. If fragment is set to
+ *	zero then we are not allowed to do fragmentation, even if the frame
+ *	is too large.
+ */
+void ax25_output(ax25_cb *ax25, int paclen, struct sk_buff *skb)
+{
+	struct sk_buff *skbn;
+	unsigned char *p;
+	int frontlen, len, fragno, ka9qfrag, first = 1;
+
+	if ((skb->len - 1) > paclen) {
+		if (*skb->data == AX25_P_TEXT) {
+			skb_pull(skb, 1); /* skip PID */
+			ka9qfrag = 0;
+		} else {
+			paclen -= 2;	/* Allow for fragment control info */
+			ka9qfrag = 1;
+		}
+
+		fragno = skb->len / paclen;
+		if (skb->len % paclen == 0) fragno--;
+
+		frontlen = skb_headroom(skb);	/* Address space + CTRL */
+
+		while (skb->len > 0) {
+			spin_lock_bh(&ax25_frag_lock);
+			if ((skbn = alloc_skb(paclen + 2 + frontlen, GFP_ATOMIC)) == NULL) {
+				spin_unlock_bh(&ax25_frag_lock);
+				printk(KERN_CRIT "AX.25: ax25_output - out of memory\n");
+				return;
+			}
+
+			if (skb->sk != NULL)
+				skb_set_owner_w(skbn, skb->sk);
+
+			spin_unlock_bh(&ax25_frag_lock);
+
+			len = (paclen > skb->len) ? skb->len : paclen;
+
+			if (ka9qfrag == 1) {
+				skb_reserve(skbn, frontlen + 2);
+				skbn->nh.raw = skbn->data + (skb->nh.raw - skb->data);
+				memcpy(skb_put(skbn, len), skb->data, len);
+				p = skb_push(skbn, 2);
+
+				*p++ = AX25_P_SEGMENT;
+
+				*p = fragno--;
+				if (first) {
+					*p |= AX25_SEG_FIRST;
+					first = 0;
+				}
+			} else {
+				skb_reserve(skbn, frontlen + 1);
+				skbn->nh.raw = skbn->data + (skb->nh.raw - skb->data);
+				memcpy(skb_put(skbn, len), skb->data, len);
+				p = skb_push(skbn, 1);
+				*p = AX25_P_TEXT;
+			}
+
+			skb_pull(skb, len);
+			skb_queue_tail(&ax25->write_queue, skbn); /* Throw it on the queue */
+		}
+
+		kfree_skb(skb);
+	} else {
+		skb_queue_tail(&ax25->write_queue, skb);	  /* Throw it on the queue */
+	}
+
+	switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
+	case AX25_PROTO_STD_SIMPLEX:
+	case AX25_PROTO_STD_DUPLEX:
+		ax25_kick(ax25);
+		break;
+
+#ifdef CONFIG_AX25_DAMA_SLAVE
+	/*
+	 * A DAMA slave is _required_ to work as normal AX.25L2V2
+	 * if no DAMA master is available.
+	 */
+	case AX25_PROTO_DAMA_SLAVE:
+		if (!ax25->ax25_dev->dama.slave) ax25_kick(ax25);
+		break;
+#endif
+	}
+}
+
+/*
+ *  This procedure is passed a buffer descriptor for an iframe. It builds
+ *  the rest of the control part of the frame and then writes it out.
+ */
+static void ax25_send_iframe(ax25_cb *ax25, struct sk_buff *skb, int poll_bit)
+{
+	unsigned char *frame;
+
+	if (skb == NULL)
+		return;
+
+	skb->nh.raw = skb->data;
+
+	if (ax25->modulus == AX25_MODULUS) {
+		frame = skb_push(skb, 1);
+
+		*frame = AX25_I;
+		*frame |= (poll_bit) ? AX25_PF : 0;
+		*frame |= (ax25->vr << 5);
+		*frame |= (ax25->vs << 1);
+	} else {
+		frame = skb_push(skb, 2);
+
+		frame[0] = AX25_I;
+		frame[0] |= (ax25->vs << 1);
+		frame[1] = (poll_bit) ? AX25_EPF : 0;
+		frame[1] |= (ax25->vr << 1);
+	}
+
+	ax25_start_idletimer(ax25);
+
+	ax25_transmit_buffer(ax25, skb, AX25_COMMAND);
+}
+
+void ax25_kick(ax25_cb *ax25)
+{
+	struct sk_buff *skb, *skbn;
+	int last = 1;
+	unsigned short start, end, next;
+
+	if (ax25->state != AX25_STATE_3 && ax25->state != AX25_STATE_4)
+		return;
+
+	if (ax25->condition & AX25_COND_PEER_RX_BUSY)
+		return;
+
+	if (skb_peek(&ax25->write_queue) == NULL)
+		return;
+
+	start = (skb_peek(&ax25->ack_queue) == NULL) ? ax25->va : ax25->vs;
+	end   = (ax25->va + ax25->window) % ax25->modulus;
+
+	if (start == end)
+		return;
+
+	ax25->vs = start;
+
+	/*
+	 * Transmit data until either we're out of data to send or
+	 * the window is full. Send a poll on the final I frame if
+	 * the window is filled.
+	 */
+
+	/*
+	 * Dequeue the frame and copy it.
+	 */
+	skb  = skb_dequeue(&ax25->write_queue);
+
+	do {
+		if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL) {
+			skb_queue_head(&ax25->write_queue, skb);
+			break;
+		}
+
+		if (skb->sk != NULL)
+			skb_set_owner_w(skbn, skb->sk);
+
+		next = (ax25->vs + 1) % ax25->modulus;
+		last = (next == end);
+
+		/*
+		 * Transmit the frame copy.
+		 * bke 960114: do not set the Poll bit on the last frame
+		 * in DAMA mode.
+		 */
+		switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
+		case AX25_PROTO_STD_SIMPLEX:
+		case AX25_PROTO_STD_DUPLEX:
+			ax25_send_iframe(ax25, skbn, (last) ? AX25_POLLON : AX25_POLLOFF);
+			break;
+
+#ifdef CONFIG_AX25_DAMA_SLAVE
+		case AX25_PROTO_DAMA_SLAVE:
+			ax25_send_iframe(ax25, skbn, AX25_POLLOFF);
+			break;
+#endif
+		}
+
+		ax25->vs = next;
+
+		/*
+		 * Requeue the original data frame.
+		 */
+		skb_queue_tail(&ax25->ack_queue, skb);
+
+	} while (!last && (skb = skb_dequeue(&ax25->write_queue)) != NULL);
+
+	ax25->condition &= ~AX25_COND_ACK_PENDING;
+
+	if (!ax25_t1timer_running(ax25)) {
+		ax25_stop_t3timer(ax25);
+		ax25_calculate_t1(ax25);
+		ax25_start_t1timer(ax25);
+	}
+}
+
+void ax25_transmit_buffer(ax25_cb *ax25, struct sk_buff *skb, int type)
+{
+	struct sk_buff *skbn;
+	unsigned char *ptr;
+	int headroom;
+
+	if (ax25->ax25_dev == NULL) {
+		ax25_disconnect(ax25, ENETUNREACH);
+		return;
+	}
+
+	headroom = ax25_addr_size(ax25->digipeat);
+
+	if (skb_headroom(skb) < headroom) {
+		if ((skbn = skb_realloc_headroom(skb, headroom)) == NULL) {
+			printk(KERN_CRIT "AX.25: ax25_transmit_buffer - out of memory\n");
+			kfree_skb(skb);
+			return;
+		}
+
+		if (skb->sk != NULL)
+			skb_set_owner_w(skbn, skb->sk);
+
+		kfree_skb(skb);
+		skb = skbn;
+	}
+
+	ptr = skb_push(skb, headroom);
+
+	ax25_addr_build(ptr, &ax25->source_addr, &ax25->dest_addr, ax25->digipeat, type, ax25->modulus);
+
+	skb->dev = ax25->ax25_dev->dev;
+
+	ax25_queue_xmit(skb);
+}
+
+/*
+ *	A small shim to dev_queue_xmit to add the KISS control byte, and do
+ *	any packet forwarding in operation.
+ */
+void ax25_queue_xmit(struct sk_buff *skb)
+{
+	unsigned char *ptr;
+
+	skb->protocol = htons(ETH_P_AX25);
+	skb->dev      = ax25_fwd_dev(skb->dev);
+
+	ptr  = skb_push(skb, 1);
+	*ptr = 0x00;			/* KISS */
+
+	dev_queue_xmit(skb);
+}
+
+int ax25_check_iframes_acked(ax25_cb *ax25, unsigned short nr)
+{
+	if (ax25->vs == nr) {
+		ax25_frames_acked(ax25, nr);
+		ax25_calculate_rtt(ax25);
+		ax25_stop_t1timer(ax25);
+		ax25_start_t3timer(ax25);
+		return 1;
+	} else {
+		if (ax25->va != nr) {
+			ax25_frames_acked(ax25, nr);
+			ax25_calculate_t1(ax25);
+			ax25_start_t1timer(ax25);
+			return 1;
+		}
+	}
+	return 0;
+}
+
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
new file mode 100644
index 00000000000..44b99b1ff9f
--- /dev/null
+++ b/net/ax25/ax25_route.c
@@ -0,0 +1,534 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk)
+ * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
+ * Copyright (C) Steven Whitehouse GW7RRM (stevew@acm.org)
+ * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de)
+ * Copyright (C) Hans-Joachim Hetscher DD8NE (dd8ne@bnv-bamberg.de)
+ * Copyright (C) Frederic Rible F1OAT (frible@teaser.fr)
+ */
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/timer.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <net/ax25.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <net/sock.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+
+static ax25_route *ax25_route_list;
+static DEFINE_RWLOCK(ax25_route_lock);
+
+static ax25_route *ax25_get_route(ax25_address *, struct net_device *);
+
+void ax25_rt_device_down(struct net_device *dev)
+{
+	ax25_route *s, *t, *ax25_rt;
+
+	write_lock(&ax25_route_lock);
+	ax25_rt = ax25_route_list;
+	while (ax25_rt != NULL) {
+		s       = ax25_rt;
+		ax25_rt = ax25_rt->next;
+
+		if (s->dev == dev) {
+			if (ax25_route_list == s) {
+				ax25_route_list = s->next;
+				if (s->digipeat != NULL)
+					kfree(s->digipeat);
+				kfree(s);
+			} else {
+				for (t = ax25_route_list; t != NULL; t = t->next) {
+					if (t->next == s) {
+						t->next = s->next;
+						if (s->digipeat != NULL)
+							kfree(s->digipeat);
+						kfree(s);
+						break;
+					}
+				}
+			}
+		}
+	}
+	write_unlock(&ax25_route_lock);
+}
+
+static int ax25_rt_add(struct ax25_routes_struct *route)
+{
+	ax25_route *ax25_rt;
+	ax25_dev *ax25_dev;
+	int i;
+
+	if ((ax25_dev = ax25_addr_ax25dev(&route->port_addr)) == NULL)
+		return -EINVAL;
+	if (route->digi_count > AX25_MAX_DIGIS)
+		return -EINVAL;
+
+	write_lock(&ax25_route_lock);
+
+	ax25_rt = ax25_route_list;
+	while (ax25_rt != NULL) {
+		if (ax25cmp(&ax25_rt->callsign, &route->dest_addr) == 0 &&
+		            ax25_rt->dev == ax25_dev->dev) {
+			if (ax25_rt->digipeat != NULL) {
+				kfree(ax25_rt->digipeat);
+				ax25_rt->digipeat = NULL;
+			}
+			if (route->digi_count != 0) {
+				if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) {
+					write_unlock(&ax25_route_lock);
+					return -ENOMEM;
+				}
+				ax25_rt->digipeat->lastrepeat = -1;
+				ax25_rt->digipeat->ndigi      = route->digi_count;
+				for (i = 0; i < route->digi_count; i++) {
+					ax25_rt->digipeat->repeated[i] = 0;
+					ax25_rt->digipeat->calls[i]    = route->digi_addr[i];
+				}
+			}
+			write_unlock(&ax25_route_lock);
+			return 0;
+		}
+		ax25_rt = ax25_rt->next;
+	}
+
+	if ((ax25_rt = kmalloc(sizeof(ax25_route), GFP_ATOMIC)) == NULL) {
+		write_unlock(&ax25_route_lock);
+		return -ENOMEM;
+	}
+
+	atomic_set(&ax25_rt->ref, 0);
+	ax25_rt->callsign     = route->dest_addr;
+	ax25_rt->dev          = ax25_dev->dev;
+	ax25_rt->digipeat     = NULL;
+	ax25_rt->ip_mode      = ' ';
+	if (route->digi_count != 0) {
+		if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) {
+			write_unlock(&ax25_route_lock);
+			kfree(ax25_rt);
+			return -ENOMEM;
+		}
+		ax25_rt->digipeat->lastrepeat = -1;
+		ax25_rt->digipeat->ndigi      = route->digi_count;
+		for (i = 0; i < route->digi_count; i++) {
+			ax25_rt->digipeat->repeated[i] = 0;
+			ax25_rt->digipeat->calls[i]    = route->digi_addr[i];
+		}
+	}
+	ax25_rt->next   = ax25_route_list;
+	ax25_route_list = ax25_rt;
+	write_unlock(&ax25_route_lock);
+
+	return 0;
+}
+
+static void ax25_rt_destroy(ax25_route *ax25_rt)
+{
+	if (atomic_read(&ax25_rt->ref) == 0) {
+		if (ax25_rt->digipeat != NULL)
+			kfree(ax25_rt->digipeat);
+		kfree(ax25_rt);
+		return;
+	}
+
+	/*
+	 * Uh...  Route is still in use; we can't yet destroy it.  Retry later.
+	 */
+	init_timer(&ax25_rt->timer);
+	ax25_rt->timer.data	= (unsigned long) ax25_rt;
+	ax25_rt->timer.function	= (void *) ax25_rt_destroy;
+	ax25_rt->timer.expires	= jiffies + 5 * HZ;
+
+	add_timer(&ax25_rt->timer);
+}
+
+static int ax25_rt_del(struct ax25_routes_struct *route)
+{
+	ax25_route *s, *t, *ax25_rt;
+	ax25_dev *ax25_dev;
+
+	if ((ax25_dev = ax25_addr_ax25dev(&route->port_addr)) == NULL)
+		return -EINVAL;
+
+	write_lock(&ax25_route_lock);
+
+	ax25_rt = ax25_route_list;
+	while (ax25_rt != NULL) {
+		s       = ax25_rt;
+		ax25_rt = ax25_rt->next;
+		if (s->dev == ax25_dev->dev &&
+		    ax25cmp(&route->dest_addr, &s->callsign) == 0) {
+			if (ax25_route_list == s) {
+				ax25_route_list = s->next;
+				ax25_rt_destroy(s);
+			} else {
+				for (t = ax25_route_list; t != NULL; t = t->next) {
+					if (t->next == s) {
+						t->next = s->next;
+						ax25_rt_destroy(s);
+						break;
+					}
+				}
+			}
+		}
+	}
+	write_unlock(&ax25_route_lock);
+
+	return 0;
+}
+
+static int ax25_rt_opt(struct ax25_route_opt_struct *rt_option)
+{
+	ax25_route *ax25_rt;
+	ax25_dev *ax25_dev;
+	int err = 0;
+
+	if ((ax25_dev = ax25_addr_ax25dev(&rt_option->port_addr)) == NULL)
+		return -EINVAL;
+
+	write_lock(&ax25_route_lock);
+
+	ax25_rt = ax25_route_list;
+	while (ax25_rt != NULL) {
+		if (ax25_rt->dev == ax25_dev->dev &&
+		    ax25cmp(&rt_option->dest_addr, &ax25_rt->callsign) == 0) {
+			switch (rt_option->cmd) {
+			case AX25_SET_RT_IPMODE:
+				switch (rt_option->arg) {
+				case ' ':
+				case 'D':
+				case 'V':
+					ax25_rt->ip_mode = rt_option->arg;
+					break;
+				default:
+					err = -EINVAL;
+					goto out;
+				}
+				break;
+			default:
+				err = -EINVAL;
+				goto out;
+			}
+		}
+		ax25_rt = ax25_rt->next;
+	}
+
+out:
+	write_unlock(&ax25_route_lock);
+	return err;
+}
+
+int ax25_rt_ioctl(unsigned int cmd, void __user *arg)
+{
+	struct ax25_route_opt_struct rt_option;
+	struct ax25_routes_struct route;
+
+	switch (cmd) {
+	case SIOCADDRT:
+		if (copy_from_user(&route, arg, sizeof(route)))
+			return -EFAULT;
+		return ax25_rt_add(&route);
+
+	case SIOCDELRT:
+		if (copy_from_user(&route, arg, sizeof(route)))
+			return -EFAULT;
+		return ax25_rt_del(&route);
+
+	case SIOCAX25OPTRT:
+		if (copy_from_user(&rt_option, arg, sizeof(rt_option)))
+			return -EFAULT;
+		return ax25_rt_opt(&rt_option);
+
+	default:
+		return -EINVAL;
+	}
+}
+
+#ifdef CONFIG_PROC_FS
+
+static void *ax25_rt_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct ax25_route *ax25_rt;
+	int i = 1;
+ 
+ 	read_lock(&ax25_route_lock);
+	if (*pos == 0)
+		return SEQ_START_TOKEN;
+
+	for (ax25_rt = ax25_route_list; ax25_rt != NULL; ax25_rt = ax25_rt->next) {
+		if (i == *pos)
+			return ax25_rt;
+		++i;
+	}
+
+	return NULL;
+}
+
+static void *ax25_rt_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	++*pos;
+	return (v == SEQ_START_TOKEN) ? ax25_route_list : 
+		((struct ax25_route *) v)->next;
+}
+
+static void ax25_rt_seq_stop(struct seq_file *seq, void *v)
+{
+	read_unlock(&ax25_route_lock);
+}
+
+static int ax25_rt_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq, "callsign  dev  mode digipeaters\n");
+	else {
+		struct ax25_route *ax25_rt = v;
+		const char *callsign;
+		int i;
+
+		if (ax25cmp(&ax25_rt->callsign, &null_ax25_address) == 0)
+			callsign = "default";
+		else
+			callsign = ax2asc(&ax25_rt->callsign);
+
+		seq_printf(seq, "%-9s %-4s",
+			callsign,
+			ax25_rt->dev ? ax25_rt->dev->name : "???");
+
+		switch (ax25_rt->ip_mode) {
+		case 'V':
+			seq_puts(seq, "   vc");
+			break;
+		case 'D':
+			seq_puts(seq, "   dg");
+			break;
+		default:
+			seq_puts(seq, "    *");
+			break;
+		}
+
+		if (ax25_rt->digipeat != NULL)
+			for (i = 0; i < ax25_rt->digipeat->ndigi; i++)
+				seq_printf(seq, " %s", ax2asc(&ax25_rt->digipeat->calls[i]));
+
+		seq_puts(seq, "\n");
+	}
+	return 0;
+}
+
+static struct seq_operations ax25_rt_seqops = {
+	.start = ax25_rt_seq_start,
+	.next = ax25_rt_seq_next,
+	.stop = ax25_rt_seq_stop,
+	.show = ax25_rt_seq_show,
+};
+
+static int ax25_rt_info_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &ax25_rt_seqops);
+}
+
+struct file_operations ax25_route_fops = {
+	.owner = THIS_MODULE,
+	.open = ax25_rt_info_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+#endif
+
+/*
+ *	Find AX.25 route
+ *
+ *	Only routes with a refernce rout of zero can be destroyed.
+ */
+static ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev)
+{
+	ax25_route *ax25_spe_rt = NULL;
+	ax25_route *ax25_def_rt = NULL;
+	ax25_route *ax25_rt;
+
+	read_lock(&ax25_route_lock);
+	/*
+	 *	Bind to the physical interface we heard them on, or the default
+	 *	route if none is found;
+	 */
+	for (ax25_rt = ax25_route_list; ax25_rt != NULL; ax25_rt = ax25_rt->next) {
+		if (dev == NULL) {
+			if (ax25cmp(&ax25_rt->callsign, addr) == 0 && ax25_rt->dev != NULL)
+				ax25_spe_rt = ax25_rt;
+			if (ax25cmp(&ax25_rt->callsign, &null_ax25_address) == 0 && ax25_rt->dev != NULL)
+				ax25_def_rt = ax25_rt;
+		} else {
+			if (ax25cmp(&ax25_rt->callsign, addr) == 0 && ax25_rt->dev == dev)
+				ax25_spe_rt = ax25_rt;
+			if (ax25cmp(&ax25_rt->callsign, &null_ax25_address) == 0 && ax25_rt->dev == dev)
+				ax25_def_rt = ax25_rt;
+		}
+	}
+
+	ax25_rt = ax25_def_rt;
+	if (ax25_spe_rt != NULL)
+		ax25_rt = ax25_spe_rt;
+
+	if (ax25_rt != NULL)
+		atomic_inc(&ax25_rt->ref);
+
+	read_unlock(&ax25_route_lock);
+
+	return ax25_rt;
+}
+
+/*
+ *	Adjust path: If you specify a default route and want to connect
+ *      a target on the digipeater path but w/o having a special route
+ *	set before, the path has to be truncated from your target on.
+ */
+static inline void ax25_adjust_path(ax25_address *addr, ax25_digi *digipeat)
+{
+	int k;
+
+	for (k = 0; k < digipeat->ndigi; k++) {
+		if (ax25cmp(addr, &digipeat->calls[k]) == 0)
+			break;
+	}
+
+	digipeat->ndigi = k;
+}
+
+
+/*
+ *	Find which interface to use.
+ */
+int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr)
+{
+	ax25_route *ax25_rt;
+	ax25_address *call;
+	int err;
+
+	if ((ax25_rt = ax25_get_route(addr, NULL)) == NULL)
+		return -EHOSTUNREACH;
+
+	if ((ax25->ax25_dev = ax25_dev_ax25dev(ax25_rt->dev)) == NULL) {
+		err = -EHOSTUNREACH;
+		goto put;
+	}
+
+	if ((call = ax25_findbyuid(current->euid)) == NULL) {
+		if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE)) {
+			err = -EPERM;
+			goto put;
+		}
+		call = (ax25_address *)ax25->ax25_dev->dev->dev_addr;
+	}
+
+	ax25->source_addr = *call;
+
+	if (ax25_rt->digipeat != NULL) {
+		if ((ax25->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) {
+			err = -ENOMEM;
+			goto put;
+		}
+		memcpy(ax25->digipeat, ax25_rt->digipeat, sizeof(ax25_digi));
+		ax25_adjust_path(addr, ax25->digipeat);
+	}
+
+	if (ax25->sk != NULL) {
+		bh_lock_sock(ax25->sk);
+		sock_reset_flag(ax25->sk, SOCK_ZAPPED);
+		bh_unlock_sock(ax25->sk);
+	}
+
+put:
+	ax25_put_route(ax25_rt);
+
+	return 0;
+}
+
+ax25_route *ax25_rt_find_route(ax25_route * route, ax25_address *addr,
+	struct net_device *dev)
+{
+	ax25_route *ax25_rt;
+
+	if ((ax25_rt = ax25_get_route(addr, dev)))
+		return ax25_rt;
+
+	route->next     = NULL;
+	atomic_set(&route->ref, 1);
+	route->callsign = *addr;
+	route->dev      = dev;
+	route->digipeat = NULL;
+	route->ip_mode  = ' ';
+
+	return route;
+}
+
+struct sk_buff *ax25_rt_build_path(struct sk_buff *skb, ax25_address *src,
+	ax25_address *dest, ax25_digi *digi)
+{
+	struct sk_buff *skbn;
+	unsigned char *bp;
+	int len;
+
+	len = digi->ndigi * AX25_ADDR_LEN;
+
+	if (skb_headroom(skb) < len) {
+		if ((skbn = skb_realloc_headroom(skb, len)) == NULL) {
+			printk(KERN_CRIT "AX.25: ax25_dg_build_path - out of memory\n");
+			return NULL;
+		}
+
+		if (skb->sk != NULL)
+			skb_set_owner_w(skbn, skb->sk);
+
+		kfree_skb(skb);
+
+		skb = skbn;
+	}
+
+	bp = skb_push(skb, len);
+
+	ax25_addr_build(bp, src, dest, digi, AX25_COMMAND, AX25_MODULUS);
+
+	return skb;
+}
+
+/*
+ *	Free all memory associated with routing structures.
+ */
+void __exit ax25_rt_free(void)
+{
+	ax25_route *s, *ax25_rt = ax25_route_list;
+
+	write_lock(&ax25_route_lock);
+	while (ax25_rt != NULL) {
+		s       = ax25_rt;
+		ax25_rt = ax25_rt->next;
+
+		if (s->digipeat != NULL)
+			kfree(s->digipeat);
+
+		kfree(s);
+	}
+	write_unlock(&ax25_route_lock);
+}
diff --git a/net/ax25/ax25_std_in.c b/net/ax25/ax25_std_in.c
new file mode 100644
index 00000000000..7131873322c
--- /dev/null
+++ b/net/ax25/ax25_std_in.c
@@ -0,0 +1,449 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk)
+ * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
+ * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de)
+ * Copyright (C) Hans-Joachim Hetscher DD8NE (dd8ne@bnv-bamberg.de)
+ *
+ * Most of this code is based on the SDL diagrams published in the 7th ARRL
+ * Computer Networking Conference papers. The diagrams have mistakes in them,
+ * but are mostly correct. Before you modify the code could you read the SDL
+ * diagrams as the code is not obvious and probably very easy to break.
+ */
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <net/ax25.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/ip.h>			/* For ip_rcv */
+#include <net/tcp.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+
+/*
+ *	State machine for state 1, Awaiting Connection State.
+ *	The handling of the timer(s) is in file ax25_std_timer.c.
+ *	Handling of state 0 and connection release is in ax25.c.
+ */
+static int ax25_std_state1_machine(ax25_cb *ax25, struct sk_buff *skb, int frametype, int pf, int type)
+{
+	switch (frametype) {
+	case AX25_SABM:
+		ax25->modulus = AX25_MODULUS;
+		ax25->window  = ax25->ax25_dev->values[AX25_VALUES_WINDOW];
+		ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE);
+		break;
+
+	case AX25_SABME:
+		ax25->modulus = AX25_EMODULUS;
+		ax25->window  = ax25->ax25_dev->values[AX25_VALUES_EWINDOW];
+		ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE);
+		break;
+
+	case AX25_DISC:
+		ax25_send_control(ax25, AX25_DM, pf, AX25_RESPONSE);
+		break;
+
+	case AX25_UA:
+		if (pf) {
+			ax25_calculate_rtt(ax25);
+			ax25_stop_t1timer(ax25);
+			ax25_start_t3timer(ax25);
+			ax25_start_idletimer(ax25);
+			ax25->vs      = 0;
+			ax25->va      = 0;
+			ax25->vr      = 0;
+			ax25->state   = AX25_STATE_3;
+			ax25->n2count = 0;
+			if (ax25->sk != NULL) {
+				bh_lock_sock(ax25->sk);
+				ax25->sk->sk_state = TCP_ESTABLISHED;
+				/* For WAIT_SABM connections we will produce an accept ready socket here */
+				if (!sock_flag(ax25->sk, SOCK_DEAD))
+					ax25->sk->sk_state_change(ax25->sk);
+				bh_unlock_sock(ax25->sk);
+			}
+		}
+		break;
+
+	case AX25_DM:
+		if (pf) {
+			if (ax25->modulus == AX25_MODULUS) {
+				ax25_disconnect(ax25, ECONNREFUSED);
+			} else {
+				ax25->modulus = AX25_MODULUS;
+				ax25->window  = ax25->ax25_dev->values[AX25_VALUES_WINDOW];
+			}
+		}
+		break;
+
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+/*
+ *	State machine for state 2, Awaiting Release State.
+ *	The handling of the timer(s) is in file ax25_std_timer.c
+ *	Handling of state 0 and connection release is in ax25.c.
+ */
+static int ax25_std_state2_machine(ax25_cb *ax25, struct sk_buff *skb, int frametype, int pf, int type)
+{
+	switch (frametype) {
+	case AX25_SABM:
+	case AX25_SABME:
+		ax25_send_control(ax25, AX25_DM, pf, AX25_RESPONSE);
+		break;
+
+	case AX25_DISC:
+		ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE);
+		ax25_disconnect(ax25, 0);
+		break;
+
+	case AX25_DM:
+	case AX25_UA:
+		if (pf)
+			ax25_disconnect(ax25, 0);
+		break;
+
+	case AX25_I:
+	case AX25_REJ:
+	case AX25_RNR:
+	case AX25_RR:
+		if (pf) ax25_send_control(ax25, AX25_DM, AX25_POLLON, AX25_RESPONSE);
+		break;
+
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+/*
+ *	State machine for state 3, Connected State.
+ *	The handling of the timer(s) is in file ax25_std_timer.c
+ *	Handling of state 0 and connection release is in ax25.c.
+ */
+static int ax25_std_state3_machine(ax25_cb *ax25, struct sk_buff *skb, int frametype, int ns, int nr, int pf, int type)
+{
+	int queued = 0;
+
+	switch (frametype) {
+	case AX25_SABM:
+	case AX25_SABME:
+		if (frametype == AX25_SABM) {
+			ax25->modulus = AX25_MODULUS;
+			ax25->window  = ax25->ax25_dev->values[AX25_VALUES_WINDOW];
+		} else {
+			ax25->modulus = AX25_EMODULUS;
+			ax25->window  = ax25->ax25_dev->values[AX25_VALUES_EWINDOW];
+		}
+		ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE);
+		ax25_stop_t1timer(ax25);
+		ax25_stop_t2timer(ax25);
+		ax25_start_t3timer(ax25);
+		ax25_start_idletimer(ax25);
+		ax25->condition = 0x00;
+		ax25->vs        = 0;
+		ax25->va        = 0;
+		ax25->vr        = 0;
+		ax25_requeue_frames(ax25);
+		break;
+
+	case AX25_DISC:
+		ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE);
+		ax25_disconnect(ax25, 0);
+		break;
+
+	case AX25_DM:
+		ax25_disconnect(ax25, ECONNRESET);
+		break;
+
+	case AX25_RR:
+	case AX25_RNR:
+		if (frametype == AX25_RR)
+			ax25->condition &= ~AX25_COND_PEER_RX_BUSY;
+		else
+			ax25->condition |= AX25_COND_PEER_RX_BUSY;
+		if (type == AX25_COMMAND && pf)
+			ax25_std_enquiry_response(ax25);
+		if (ax25_validate_nr(ax25, nr)) {
+			ax25_check_iframes_acked(ax25, nr);
+		} else {
+			ax25_std_nr_error_recovery(ax25);
+			ax25->state = AX25_STATE_1;
+		}
+		break;
+
+	case AX25_REJ:
+		ax25->condition &= ~AX25_COND_PEER_RX_BUSY;
+		if (type == AX25_COMMAND && pf)
+			ax25_std_enquiry_response(ax25);
+		if (ax25_validate_nr(ax25, nr)) {
+			ax25_frames_acked(ax25, nr);
+			ax25_calculate_rtt(ax25);
+			ax25_stop_t1timer(ax25);
+			ax25_start_t3timer(ax25);
+			ax25_requeue_frames(ax25);
+		} else {
+			ax25_std_nr_error_recovery(ax25);
+			ax25->state = AX25_STATE_1;
+		}
+		break;
+
+	case AX25_I:
+		if (!ax25_validate_nr(ax25, nr)) {
+			ax25_std_nr_error_recovery(ax25);
+			ax25->state = AX25_STATE_1;
+			break;
+		}
+		if (ax25->condition & AX25_COND_PEER_RX_BUSY) {
+			ax25_frames_acked(ax25, nr);
+		} else {
+			ax25_check_iframes_acked(ax25, nr);
+		}
+		if (ax25->condition & AX25_COND_OWN_RX_BUSY) {
+			if (pf) ax25_std_enquiry_response(ax25);
+			break;
+		}
+		if (ns == ax25->vr) {
+			ax25->vr = (ax25->vr + 1) % ax25->modulus;
+			queued = ax25_rx_iframe(ax25, skb);
+			if (ax25->condition & AX25_COND_OWN_RX_BUSY)
+				ax25->vr = ns;	/* ax25->vr - 1 */
+			ax25->condition &= ~AX25_COND_REJECT;
+			if (pf) {
+				ax25_std_enquiry_response(ax25);
+			} else {
+				if (!(ax25->condition & AX25_COND_ACK_PENDING)) {
+					ax25->condition |= AX25_COND_ACK_PENDING;
+					ax25_start_t2timer(ax25);
+				}
+			}
+		} else {
+			if (ax25->condition & AX25_COND_REJECT) {
+				if (pf) ax25_std_enquiry_response(ax25);
+			} else {
+				ax25->condition |= AX25_COND_REJECT;
+				ax25_send_control(ax25, AX25_REJ, pf, AX25_RESPONSE);
+				ax25->condition &= ~AX25_COND_ACK_PENDING;
+			}
+		}
+		break;
+
+	case AX25_FRMR:
+	case AX25_ILLEGAL:
+		ax25_std_establish_data_link(ax25);
+		ax25->state = AX25_STATE_1;
+		break;
+
+	default:
+		break;
+	}
+
+	return queued;
+}
+
+/*
+ *	State machine for state 4, Timer Recovery State.
+ *	The handling of the timer(s) is in file ax25_std_timer.c
+ *	Handling of state 0 and connection release is in ax25.c.
+ */
+static int ax25_std_state4_machine(ax25_cb *ax25, struct sk_buff *skb, int frametype, int ns, int nr, int pf, int type)
+{
+	int queued = 0;
+
+	switch (frametype) {
+	case AX25_SABM:
+	case AX25_SABME:
+		if (frametype == AX25_SABM) {
+			ax25->modulus = AX25_MODULUS;
+			ax25->window  = ax25->ax25_dev->values[AX25_VALUES_WINDOW];
+		} else {
+			ax25->modulus = AX25_EMODULUS;
+			ax25->window  = ax25->ax25_dev->values[AX25_VALUES_EWINDOW];
+		}
+		ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE);
+		ax25_stop_t1timer(ax25);
+		ax25_stop_t2timer(ax25);
+		ax25_start_t3timer(ax25);
+		ax25_start_idletimer(ax25);
+		ax25->condition = 0x00;
+		ax25->vs        = 0;
+		ax25->va        = 0;
+		ax25->vr        = 0;
+		ax25->state     = AX25_STATE_3;
+		ax25->n2count   = 0;
+		ax25_requeue_frames(ax25);
+		break;
+
+	case AX25_DISC:
+		ax25_send_control(ax25, AX25_UA, pf, AX25_RESPONSE);
+		ax25_disconnect(ax25, 0);
+		break;
+
+	case AX25_DM:
+		ax25_disconnect(ax25, ECONNRESET);
+		break;
+
+	case AX25_RR:
+	case AX25_RNR:
+		if (frametype == AX25_RR)
+			ax25->condition &= ~AX25_COND_PEER_RX_BUSY;
+		else
+			ax25->condition |= AX25_COND_PEER_RX_BUSY;
+		if (type == AX25_RESPONSE && pf) {
+			ax25_stop_t1timer(ax25);
+			ax25->n2count = 0;
+			if (ax25_validate_nr(ax25, nr)) {
+				ax25_frames_acked(ax25, nr);
+				if (ax25->vs == ax25->va) {
+					ax25_start_t3timer(ax25);
+					ax25->state   = AX25_STATE_3;
+				} else {
+					ax25_requeue_frames(ax25);
+				}
+			} else {
+				ax25_std_nr_error_recovery(ax25);
+				ax25->state = AX25_STATE_1;
+			}
+			break;
+		}
+		if (type == AX25_COMMAND && pf)
+			ax25_std_enquiry_response(ax25);
+		if (ax25_validate_nr(ax25, nr)) {
+			ax25_frames_acked(ax25, nr);
+		} else {
+			ax25_std_nr_error_recovery(ax25);
+			ax25->state = AX25_STATE_1;
+		}
+		break;
+
+	case AX25_REJ:
+		ax25->condition &= ~AX25_COND_PEER_RX_BUSY;
+		if (pf && type == AX25_RESPONSE) {
+			ax25_stop_t1timer(ax25);
+			ax25->n2count = 0;
+			if (ax25_validate_nr(ax25, nr)) {
+				ax25_frames_acked(ax25, nr);
+				if (ax25->vs == ax25->va) {
+					ax25_start_t3timer(ax25);
+					ax25->state   = AX25_STATE_3;
+				} else {
+					ax25_requeue_frames(ax25);
+				}
+			} else {
+				ax25_std_nr_error_recovery(ax25);
+				ax25->state = AX25_STATE_1;
+			}
+			break;
+		}
+		if (type == AX25_COMMAND && pf)
+			ax25_std_enquiry_response(ax25);
+		if (ax25_validate_nr(ax25, nr)) {
+			ax25_frames_acked(ax25, nr);
+			ax25_requeue_frames(ax25);
+		} else {
+			ax25_std_nr_error_recovery(ax25);
+			ax25->state = AX25_STATE_1;
+		}
+		break;
+
+	case AX25_I:
+		if (!ax25_validate_nr(ax25, nr)) {
+			ax25_std_nr_error_recovery(ax25);
+			ax25->state = AX25_STATE_1;
+			break;
+		}
+		ax25_frames_acked(ax25, nr);
+		if (ax25->condition & AX25_COND_OWN_RX_BUSY) {
+			if (pf)
+				ax25_std_enquiry_response(ax25);
+			break;
+		}
+		if (ns == ax25->vr) {
+			ax25->vr = (ax25->vr + 1) % ax25->modulus;
+			queued = ax25_rx_iframe(ax25, skb);
+			if (ax25->condition & AX25_COND_OWN_RX_BUSY)
+				ax25->vr = ns;	/* ax25->vr - 1 */
+			ax25->condition &= ~AX25_COND_REJECT;
+			if (pf) {
+				ax25_std_enquiry_response(ax25);
+			} else {
+				if (!(ax25->condition & AX25_COND_ACK_PENDING)) {
+					ax25->condition |= AX25_COND_ACK_PENDING;
+					ax25_start_t2timer(ax25);
+				}
+			}
+		} else {
+			if (ax25->condition & AX25_COND_REJECT) {
+				if (pf) ax25_std_enquiry_response(ax25);
+			} else {
+				ax25->condition |= AX25_COND_REJECT;
+				ax25_send_control(ax25, AX25_REJ, pf, AX25_RESPONSE);
+				ax25->condition &= ~AX25_COND_ACK_PENDING;
+			}
+		}
+		break;
+
+	case AX25_FRMR:
+	case AX25_ILLEGAL:
+		ax25_std_establish_data_link(ax25);
+		ax25->state = AX25_STATE_1;
+		break;
+
+	default:
+		break;
+	}
+
+	return queued;
+}
+
+/*
+ *	Higher level upcall for a LAPB frame
+ */
+int ax25_std_frame_in(ax25_cb *ax25, struct sk_buff *skb, int type)
+{
+	int queued = 0, frametype, ns, nr, pf;
+
+	frametype = ax25_decode(ax25, skb, &ns, &nr, &pf);
+
+	switch (ax25->state) {
+	case AX25_STATE_1:
+		queued = ax25_std_state1_machine(ax25, skb, frametype, pf, type);
+		break;
+	case AX25_STATE_2:
+		queued = ax25_std_state2_machine(ax25, skb, frametype, pf, type);
+		break;
+	case AX25_STATE_3:
+		queued = ax25_std_state3_machine(ax25, skb, frametype, ns, nr, pf, type);
+		break;
+	case AX25_STATE_4:
+		queued = ax25_std_state4_machine(ax25, skb, frametype, ns, nr, pf, type);
+		break;
+	}
+
+	ax25_kick(ax25);
+
+	return queued;
+}
diff --git a/net/ax25/ax25_std_subr.c b/net/ax25/ax25_std_subr.c
new file mode 100644
index 00000000000..2b3c801ae48
--- /dev/null
+++ b/net/ax25/ax25_std_subr.c
@@ -0,0 +1,88 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
+ */
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <net/ax25.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+
+/*
+ * The following routines are taken from page 170 of the 7th ARRL Computer
+ * Networking Conference paper, as is the whole state machine.
+ */
+
+void ax25_std_nr_error_recovery(ax25_cb *ax25)
+{
+	ax25_std_establish_data_link(ax25);
+}
+
+void ax25_std_establish_data_link(ax25_cb *ax25)
+{
+	ax25->condition = 0x00;
+	ax25->n2count   = 0;
+
+	if (ax25->modulus == AX25_MODULUS)
+		ax25_send_control(ax25, AX25_SABM, AX25_POLLON, AX25_COMMAND);
+	else
+		ax25_send_control(ax25, AX25_SABME, AX25_POLLON, AX25_COMMAND);
+
+	ax25_calculate_t1(ax25);
+	ax25_stop_idletimer(ax25);
+	ax25_stop_t3timer(ax25);
+	ax25_stop_t2timer(ax25);
+	ax25_start_t1timer(ax25);
+}
+
+void ax25_std_transmit_enquiry(ax25_cb *ax25)
+{
+	if (ax25->condition & AX25_COND_OWN_RX_BUSY)
+		ax25_send_control(ax25, AX25_RNR, AX25_POLLON, AX25_COMMAND);
+	else
+		ax25_send_control(ax25, AX25_RR, AX25_POLLON, AX25_COMMAND);
+
+	ax25->condition &= ~AX25_COND_ACK_PENDING;
+
+	ax25_calculate_t1(ax25);
+	ax25_start_t1timer(ax25);
+}
+
+void ax25_std_enquiry_response(ax25_cb *ax25)
+{
+	if (ax25->condition & AX25_COND_OWN_RX_BUSY)
+		ax25_send_control(ax25, AX25_RNR, AX25_POLLON, AX25_RESPONSE);
+	else
+		ax25_send_control(ax25, AX25_RR, AX25_POLLON, AX25_RESPONSE);
+
+	ax25->condition &= ~AX25_COND_ACK_PENDING;
+}
+
+void ax25_std_timeout_response(ax25_cb *ax25)
+{
+	if (ax25->condition & AX25_COND_OWN_RX_BUSY)
+		ax25_send_control(ax25, AX25_RNR, AX25_POLLOFF, AX25_RESPONSE);
+	else
+		ax25_send_control(ax25, AX25_RR, AX25_POLLOFF, AX25_RESPONSE);
+
+	ax25->condition &= ~AX25_COND_ACK_PENDING;
+}
diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c
new file mode 100644
index 00000000000..066897bc074
--- /dev/null
+++ b/net/ax25/ax25_std_timer.c
@@ -0,0 +1,177 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk)
+ * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
+ * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de)
+ * Copyright (C) Frederic Rible F1OAT (frible@teaser.fr)
+ */
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <net/ax25.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+
+void ax25_std_heartbeat_expiry(ax25_cb *ax25)
+{
+	struct sock *sk=ax25->sk;
+	
+	if (sk)
+		bh_lock_sock(sk);
+
+	switch (ax25->state) {
+	case AX25_STATE_0:
+		/* Magic here: If we listen() and a new link dies before it
+		   is accepted() it isn't 'dead' so doesn't get removed. */
+		if (!sk || sock_flag(sk, SOCK_DESTROY) ||
+		    (sk->sk_state == TCP_LISTEN &&
+		     sock_flag(sk, SOCK_DEAD))) {
+			if (sk) {
+				sock_hold(sk);
+				ax25_destroy_socket(ax25);
+				bh_unlock_sock(sk);
+				sock_put(sk);
+			} else
+				ax25_destroy_socket(ax25);
+			return;
+		}
+		break;
+
+	case AX25_STATE_3:
+	case AX25_STATE_4:
+		/*
+		 * Check the state of the receive buffer.
+		 */
+		if (sk != NULL) {
+			if (atomic_read(&sk->sk_rmem_alloc) <
+			    (sk->sk_rcvbuf / 2) &&
+			    (ax25->condition & AX25_COND_OWN_RX_BUSY)) {
+				ax25->condition &= ~AX25_COND_OWN_RX_BUSY;
+				ax25->condition &= ~AX25_COND_ACK_PENDING;
+				ax25_send_control(ax25, AX25_RR, AX25_POLLOFF, AX25_RESPONSE);
+				break;
+			}
+		}
+	}
+
+	if (sk)
+		bh_unlock_sock(sk);
+
+	ax25_start_heartbeat(ax25);
+}
+
+void ax25_std_t2timer_expiry(ax25_cb *ax25)
+{
+	if (ax25->condition & AX25_COND_ACK_PENDING) {
+		ax25->condition &= ~AX25_COND_ACK_PENDING;
+		ax25_std_timeout_response(ax25);
+	}
+}
+
+void ax25_std_t3timer_expiry(ax25_cb *ax25)
+{
+	ax25->n2count = 0;
+	ax25_std_transmit_enquiry(ax25);
+	ax25->state   = AX25_STATE_4;
+}
+
+void ax25_std_idletimer_expiry(ax25_cb *ax25)
+{
+	ax25_clear_queues(ax25);
+
+	ax25->n2count = 0;
+	ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND);
+	ax25->state   = AX25_STATE_2;
+
+	ax25_calculate_t1(ax25);
+	ax25_start_t1timer(ax25);
+	ax25_stop_t2timer(ax25);
+	ax25_stop_t3timer(ax25);
+
+	if (ax25->sk != NULL) {
+		bh_lock_sock(ax25->sk);
+		ax25->sk->sk_state     = TCP_CLOSE;
+		ax25->sk->sk_err       = 0;
+		ax25->sk->sk_shutdown |= SEND_SHUTDOWN;
+		if (!sock_flag(ax25->sk, SOCK_DEAD)) {
+			ax25->sk->sk_state_change(ax25->sk);
+			sock_set_flag(ax25->sk, SOCK_DEAD);
+		}
+		bh_unlock_sock(ax25->sk);
+	}
+}
+
+void ax25_std_t1timer_expiry(ax25_cb *ax25)
+{
+	switch (ax25->state) {
+	case AX25_STATE_1:
+		if (ax25->n2count == ax25->n2) {
+			if (ax25->modulus == AX25_MODULUS) {
+				ax25_disconnect(ax25, ETIMEDOUT);
+				return;
+			} else {
+				ax25->modulus = AX25_MODULUS;
+				ax25->window  = ax25->ax25_dev->values[AX25_VALUES_WINDOW];
+				ax25->n2count = 0;
+				ax25_send_control(ax25, AX25_SABM, AX25_POLLON, AX25_COMMAND);
+			}
+		} else {
+			ax25->n2count++;
+			if (ax25->modulus == AX25_MODULUS)
+				ax25_send_control(ax25, AX25_SABM, AX25_POLLON, AX25_COMMAND);
+			else
+				ax25_send_control(ax25, AX25_SABME, AX25_POLLON, AX25_COMMAND);
+		}
+		break;
+
+	case AX25_STATE_2:
+		if (ax25->n2count == ax25->n2) {
+			ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND);
+			ax25_disconnect(ax25, ETIMEDOUT);
+			return;
+		} else {
+			ax25->n2count++;
+			ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND);
+		}
+		break;
+
+	case AX25_STATE_3:
+		ax25->n2count = 1;
+		ax25_std_transmit_enquiry(ax25);
+		ax25->state   = AX25_STATE_4;
+		break;
+
+	case AX25_STATE_4:
+		if (ax25->n2count == ax25->n2) {
+			ax25_send_control(ax25, AX25_DM, AX25_POLLON, AX25_RESPONSE);
+			ax25_disconnect(ax25, ETIMEDOUT);
+			return;
+		} else {
+			ax25->n2count++;
+			ax25_std_transmit_enquiry(ax25);
+		}
+		break;
+	}
+
+	ax25_calculate_t1(ax25);
+	ax25_start_t1timer(ax25);
+}
diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
new file mode 100644
index 00000000000..8cf72707af8
--- /dev/null
+++ b/net/ax25/ax25_subr.c
@@ -0,0 +1,295 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk)
+ * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
+ * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de)
+ * Copyright (C) Frederic Rible F1OAT (frible@teaser.fr)
+ */
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <net/ax25.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+
+/*
+ *	This routine purges all the queues of frames.
+ */
+void ax25_clear_queues(ax25_cb *ax25)
+{
+	skb_queue_purge(&ax25->write_queue);
+	skb_queue_purge(&ax25->ack_queue);
+	skb_queue_purge(&ax25->reseq_queue);
+	skb_queue_purge(&ax25->frag_queue);
+}
+
+/*
+ * This routine purges the input queue of those frames that have been
+ * acknowledged. This replaces the boxes labelled "V(a) <- N(r)" on the
+ * SDL diagram.
+ */
+void ax25_frames_acked(ax25_cb *ax25, unsigned short nr)
+{
+	struct sk_buff *skb;
+
+	/*
+	 * Remove all the ack-ed frames from the ack queue.
+	 */
+	if (ax25->va != nr) {
+		while (skb_peek(&ax25->ack_queue) != NULL && ax25->va != nr) {
+		        skb = skb_dequeue(&ax25->ack_queue);
+			kfree_skb(skb);
+			ax25->va = (ax25->va + 1) % ax25->modulus;
+		}
+	}
+}
+
+void ax25_requeue_frames(ax25_cb *ax25)
+{
+        struct sk_buff *skb, *skb_prev = NULL;
+
+	/*
+	 * Requeue all the un-ack-ed frames on the output queue to be picked
+	 * up by ax25_kick called from the timer. This arrangement handles the
+	 * possibility of an empty output queue.
+	 */
+	while ((skb = skb_dequeue(&ax25->ack_queue)) != NULL) {
+		if (skb_prev == NULL)
+			skb_queue_head(&ax25->write_queue, skb);
+		else
+			skb_append(skb_prev, skb);
+		skb_prev = skb;
+	}
+}
+
+/*
+ *	Validate that the value of nr is between va and vs. Return true or
+ *	false for testing.
+ */
+int ax25_validate_nr(ax25_cb *ax25, unsigned short nr)
+{
+	unsigned short vc = ax25->va;
+
+	while (vc != ax25->vs) {
+		if (nr == vc) return 1;
+		vc = (vc + 1) % ax25->modulus;
+	}
+
+	if (nr == ax25->vs) return 1;
+
+	return 0;
+}
+
+/*
+ *	This routine is the centralised routine for parsing the control
+ *	information for the different frame formats.
+ */
+int ax25_decode(ax25_cb *ax25, struct sk_buff *skb, int *ns, int *nr, int *pf)
+{
+	unsigned char *frame;
+	int frametype = AX25_ILLEGAL;
+
+	frame = skb->data;
+	*ns = *nr = *pf = 0;
+
+	if (ax25->modulus == AX25_MODULUS) {
+		if ((frame[0] & AX25_S) == 0) {
+			frametype = AX25_I;			/* I frame - carries NR/NS/PF */
+			*ns = (frame[0] >> 1) & 0x07;
+			*nr = (frame[0] >> 5) & 0x07;
+			*pf = frame[0] & AX25_PF;
+		} else if ((frame[0] & AX25_U) == 1) { 	/* S frame - take out PF/NR */
+			frametype = frame[0] & 0x0F;
+			*nr = (frame[0] >> 5) & 0x07;
+			*pf = frame[0] & AX25_PF;
+		} else if ((frame[0] & AX25_U) == 3) { 	/* U frame - take out PF */
+			frametype = frame[0] & ~AX25_PF;
+			*pf = frame[0] & AX25_PF;
+		}
+		skb_pull(skb, 1);
+	} else {
+		if ((frame[0] & AX25_S) == 0) {
+			frametype = AX25_I;			/* I frame - carries NR/NS/PF */
+			*ns = (frame[0] >> 1) & 0x7F;
+			*nr = (frame[1] >> 1) & 0x7F;
+			*pf = frame[1] & AX25_EPF;
+			skb_pull(skb, 2);
+		} else if ((frame[0] & AX25_U) == 1) { 	/* S frame - take out PF/NR */
+			frametype = frame[0] & 0x0F;
+			*nr = (frame[1] >> 1) & 0x7F;
+			*pf = frame[1] & AX25_EPF;
+			skb_pull(skb, 2);
+		} else if ((frame[0] & AX25_U) == 3) { 	/* U frame - take out PF */
+			frametype = frame[0] & ~AX25_PF;
+			*pf = frame[0] & AX25_PF;
+			skb_pull(skb, 1);
+		}
+	}
+
+	return frametype;
+}
+
+/*
+ *	This routine is called when the HDLC layer internally  generates a
+ *	command or  response  for  the remote machine ( eg. RR, UA etc. ).
+ *	Only supervisory or unnumbered frames are processed.
+ */
+void ax25_send_control(ax25_cb *ax25, int frametype, int poll_bit, int type)
+{
+	struct sk_buff *skb;
+	unsigned char  *dptr;
+
+	if ((skb = alloc_skb(ax25->ax25_dev->dev->hard_header_len + 2, GFP_ATOMIC)) == NULL)
+		return;
+
+	skb_reserve(skb, ax25->ax25_dev->dev->hard_header_len);
+
+	skb->nh.raw = skb->data;
+
+	/* Assume a response - address structure for DTE */
+	if (ax25->modulus == AX25_MODULUS) {
+		dptr = skb_put(skb, 1);
+		*dptr = frametype;
+		*dptr |= (poll_bit) ? AX25_PF : 0;
+		if ((frametype & AX25_U) == AX25_S)		/* S frames carry NR */
+			*dptr |= (ax25->vr << 5);
+	} else {
+		if ((frametype & AX25_U) == AX25_U) {
+			dptr = skb_put(skb, 1);
+			*dptr = frametype;
+			*dptr |= (poll_bit) ? AX25_PF : 0;
+		} else {
+			dptr = skb_put(skb, 2);
+			dptr[0] = frametype;
+			dptr[1] = (ax25->vr << 1);
+			dptr[1] |= (poll_bit) ? AX25_EPF : 0;
+		}
+	}
+
+	ax25_transmit_buffer(ax25, skb, type);
+}
+
+/*
+ *	Send a 'DM' to an unknown connection attempt, or an invalid caller.
+ *
+ *	Note: src here is the sender, thus it's the target of the DM
+ */
+void ax25_return_dm(struct net_device *dev, ax25_address *src, ax25_address *dest, ax25_digi *digi)
+{
+	struct sk_buff *skb;
+	char *dptr;
+	ax25_digi retdigi;
+
+	if (dev == NULL)
+		return;
+
+	if ((skb = alloc_skb(dev->hard_header_len + 1, GFP_ATOMIC)) == NULL)
+		return;	/* Next SABM will get DM'd */
+
+	skb_reserve(skb, dev->hard_header_len);
+	skb->nh.raw = skb->data;
+
+	ax25_digi_invert(digi, &retdigi);
+
+	dptr = skb_put(skb, 1);
+
+	*dptr = AX25_DM | AX25_PF;
+
+	/*
+	 *	Do the address ourselves
+	 */
+	dptr  = skb_push(skb, ax25_addr_size(digi));
+	dptr += ax25_addr_build(dptr, dest, src, &retdigi, AX25_RESPONSE, AX25_MODULUS);
+
+	skb->dev      = dev;
+
+	ax25_queue_xmit(skb);
+}
+
+/*
+ *	Exponential backoff for AX.25
+ */
+void ax25_calculate_t1(ax25_cb *ax25)
+{
+	int n, t = 2;
+
+	switch (ax25->backoff) {
+	case 0:
+		break;
+
+	case 1:
+		t += 2 * ax25->n2count;
+		break;
+
+	case 2:
+		for (n = 0; n < ax25->n2count; n++)
+			t *= 2;
+		if (t > 8) t = 8;
+		break;
+	}
+
+	ax25->t1 = t * ax25->rtt;
+}
+
+/*
+ *	Calculate the Round Trip Time
+ */
+void ax25_calculate_rtt(ax25_cb *ax25)
+{
+	if (ax25->backoff == 0)
+		return;
+
+	if (ax25_t1timer_running(ax25) && ax25->n2count == 0)
+		ax25->rtt = (9 * ax25->rtt + ax25->t1 - ax25_display_timer(&ax25->t1timer)) / 10;
+
+	if (ax25->rtt < AX25_T1CLAMPLO)
+		ax25->rtt = AX25_T1CLAMPLO;
+
+	if (ax25->rtt > AX25_T1CLAMPHI)
+		ax25->rtt = AX25_T1CLAMPHI;
+}
+
+void ax25_disconnect(ax25_cb *ax25, int reason)
+{
+	ax25_clear_queues(ax25);
+
+	ax25_stop_t1timer(ax25);
+	ax25_stop_t2timer(ax25);
+	ax25_stop_t3timer(ax25);
+	ax25_stop_idletimer(ax25);
+
+	ax25->state = AX25_STATE_0;
+
+	ax25_link_failed(ax25, reason);
+
+	if (ax25->sk != NULL) {
+		bh_lock_sock(ax25->sk);
+		ax25->sk->sk_state     = TCP_CLOSE;
+		ax25->sk->sk_err       = reason;
+		ax25->sk->sk_shutdown |= SEND_SHUTDOWN;
+		if (!sock_flag(ax25->sk, SOCK_DEAD)) {
+			ax25->sk->sk_state_change(ax25->sk);
+			sock_set_flag(ax25->sk, SOCK_DEAD);
+		}
+		bh_unlock_sock(ax25->sk);
+	}
+}
diff --git a/net/ax25/ax25_timer.c b/net/ax25/ax25_timer.c
new file mode 100644
index 00000000000..7a6b50a1455
--- /dev/null
+++ b/net/ax25/ax25_timer.c
@@ -0,0 +1,243 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk)
+ * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
+ * Copyright (C) Tomi Manninen OH2BNS (oh2bns@sral.fi)
+ * Copyright (C) Darryl Miles G7LED (dlm@g7led.demon.co.uk)
+ * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de)
+ * Copyright (C) Frederic Rible F1OAT (frible@teaser.fr)
+ * Copyright (C) 2002 Ralf Baechle DO1GRB (ralf@gnu.org)
+ */
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <net/ax25.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+
+static void ax25_heartbeat_expiry(unsigned long);
+static void ax25_t1timer_expiry(unsigned long);
+static void ax25_t2timer_expiry(unsigned long);
+static void ax25_t3timer_expiry(unsigned long);
+static void ax25_idletimer_expiry(unsigned long);
+
+void ax25_start_heartbeat(ax25_cb *ax25)
+{
+	del_timer(&ax25->timer);
+
+	ax25->timer.data     = (unsigned long)ax25;
+	ax25->timer.function = &ax25_heartbeat_expiry;
+	ax25->timer.expires  = jiffies + 5 * HZ;
+
+	add_timer(&ax25->timer);
+}
+
+void ax25_start_t1timer(ax25_cb *ax25)
+{
+	del_timer(&ax25->t1timer);
+
+	ax25->t1timer.data     = (unsigned long)ax25;
+	ax25->t1timer.function = &ax25_t1timer_expiry;
+	ax25->t1timer.expires  = jiffies + ax25->t1;
+
+	add_timer(&ax25->t1timer);
+}
+
+void ax25_start_t2timer(ax25_cb *ax25)
+{
+	del_timer(&ax25->t2timer);
+
+	ax25->t2timer.data     = (unsigned long)ax25;
+	ax25->t2timer.function = &ax25_t2timer_expiry;
+	ax25->t2timer.expires  = jiffies + ax25->t2;
+
+	add_timer(&ax25->t2timer);
+}
+
+void ax25_start_t3timer(ax25_cb *ax25)
+{
+	del_timer(&ax25->t3timer);
+
+	if (ax25->t3 > 0) {
+		ax25->t3timer.data     = (unsigned long)ax25;
+		ax25->t3timer.function = &ax25_t3timer_expiry;
+		ax25->t3timer.expires  = jiffies + ax25->t3;
+
+		add_timer(&ax25->t3timer);
+	}
+}
+
+void ax25_start_idletimer(ax25_cb *ax25)
+{
+	del_timer(&ax25->idletimer);
+
+	if (ax25->idle > 0) {
+		ax25->idletimer.data     = (unsigned long)ax25;
+		ax25->idletimer.function = &ax25_idletimer_expiry;
+		ax25->idletimer.expires  = jiffies + ax25->idle;
+
+		add_timer(&ax25->idletimer);
+	}
+}
+
+void ax25_stop_heartbeat(ax25_cb *ax25)
+{
+	del_timer(&ax25->timer);
+}
+
+void ax25_stop_t1timer(ax25_cb *ax25)
+{
+	del_timer(&ax25->t1timer);
+}
+
+void ax25_stop_t2timer(ax25_cb *ax25)
+{
+	del_timer(&ax25->t2timer);
+}
+
+void ax25_stop_t3timer(ax25_cb *ax25)
+{
+	del_timer(&ax25->t3timer);
+}
+
+void ax25_stop_idletimer(ax25_cb *ax25)
+{
+	del_timer(&ax25->idletimer);
+}
+
+int ax25_t1timer_running(ax25_cb *ax25)
+{
+	return timer_pending(&ax25->t1timer);
+}
+
+unsigned long ax25_display_timer(struct timer_list *timer)
+{
+	if (!timer_pending(timer))
+		return 0;
+
+	return timer->expires - jiffies;
+}
+
+static void ax25_heartbeat_expiry(unsigned long param)
+{
+	int proto = AX25_PROTO_STD_SIMPLEX;
+	ax25_cb *ax25 = (ax25_cb *)param;
+
+	if (ax25->ax25_dev)
+		proto = ax25->ax25_dev->values[AX25_VALUES_PROTOCOL];
+
+	switch (proto) {
+	case AX25_PROTO_STD_SIMPLEX:
+	case AX25_PROTO_STD_DUPLEX:
+		ax25_std_heartbeat_expiry(ax25);
+		break;
+
+#ifdef CONFIG_AX25_DAMA_SLAVE
+	case AX25_PROTO_DAMA_SLAVE:
+		if (ax25->ax25_dev->dama.slave)
+			ax25_ds_heartbeat_expiry(ax25);
+		else
+			ax25_std_heartbeat_expiry(ax25);
+		break;
+#endif
+	}
+}
+
+static void ax25_t1timer_expiry(unsigned long param)
+{
+	ax25_cb *ax25 = (ax25_cb *)param;
+
+	switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
+	case AX25_PROTO_STD_SIMPLEX:
+	case AX25_PROTO_STD_DUPLEX:
+		ax25_std_t1timer_expiry(ax25);
+		break;
+
+#ifdef CONFIG_AX25_DAMA_SLAVE
+	case AX25_PROTO_DAMA_SLAVE:
+		if (!ax25->ax25_dev->dama.slave)
+			ax25_std_t1timer_expiry(ax25);
+		break;
+#endif
+	}
+}
+
+static void ax25_t2timer_expiry(unsigned long param)
+{
+	ax25_cb *ax25 = (ax25_cb *)param;
+
+	switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
+	case AX25_PROTO_STD_SIMPLEX:
+	case AX25_PROTO_STD_DUPLEX:
+		ax25_std_t2timer_expiry(ax25);
+		break;
+
+#ifdef CONFIG_AX25_DAMA_SLAVE
+	case AX25_PROTO_DAMA_SLAVE:
+		if (!ax25->ax25_dev->dama.slave)
+			ax25_std_t2timer_expiry(ax25);
+		break;
+#endif
+	}
+}
+
+static void ax25_t3timer_expiry(unsigned long param)
+{
+	ax25_cb *ax25 = (ax25_cb *)param;
+
+	switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
+	case AX25_PROTO_STD_SIMPLEX:
+	case AX25_PROTO_STD_DUPLEX:
+		ax25_std_t3timer_expiry(ax25);
+		break;
+
+#ifdef CONFIG_AX25_DAMA_SLAVE
+	case AX25_PROTO_DAMA_SLAVE:
+		if (ax25->ax25_dev->dama.slave)
+			ax25_ds_t3timer_expiry(ax25);
+		else
+			ax25_std_t3timer_expiry(ax25);
+		break;
+#endif
+	}
+}
+
+static void ax25_idletimer_expiry(unsigned long param)
+{
+	ax25_cb *ax25 = (ax25_cb *)param;
+
+	switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
+	case AX25_PROTO_STD_SIMPLEX:
+	case AX25_PROTO_STD_DUPLEX:
+		ax25_std_idletimer_expiry(ax25);
+		break;
+
+#ifdef CONFIG_AX25_DAMA_SLAVE
+	case AX25_PROTO_DAMA_SLAVE:
+		if (ax25->ax25_dev->dama.slave)
+			ax25_ds_idletimer_expiry(ax25);
+		else
+			ax25_std_idletimer_expiry(ax25);
+		break;
+#endif
+	}
+}
diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c
new file mode 100644
index 00000000000..cea6b7d1972
--- /dev/null
+++ b/net/ax25/ax25_uid.c
@@ -0,0 +1,228 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
+ */
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/spinlock.h>
+#include <net/ax25.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/stat.h>
+#include <linux/netfilter.h>
+#include <linux/sysctl.h>
+#include <net/ip.h>
+#include <net/arp.h>
+
+/*
+ *	Callsign/UID mapper. This is in kernel space for security on multi-amateur machines.
+ */
+
+static ax25_uid_assoc *ax25_uid_list;
+static DEFINE_RWLOCK(ax25_uid_lock);
+
+int ax25_uid_policy = 0;
+
+ax25_address *ax25_findbyuid(uid_t uid)
+{
+	ax25_uid_assoc *ax25_uid;
+	ax25_address *res = NULL;
+
+	read_lock(&ax25_uid_lock);
+	for (ax25_uid = ax25_uid_list; ax25_uid != NULL; ax25_uid = ax25_uid->next) {
+		if (ax25_uid->uid == uid) {
+			res = &ax25_uid->call;
+			break;
+		}
+	}
+	read_unlock(&ax25_uid_lock);
+
+	return NULL;
+}
+
+int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax)
+{
+	ax25_uid_assoc *s, *ax25_uid;
+	unsigned long res;
+
+	switch (cmd) {
+	case SIOCAX25GETUID:
+		res = -ENOENT;
+		read_lock(&ax25_uid_lock);
+		for (ax25_uid = ax25_uid_list; ax25_uid != NULL; ax25_uid = ax25_uid->next) {
+			if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0) {
+				res = ax25_uid->uid;
+				break;
+			}
+		}
+		read_unlock(&ax25_uid_lock);
+
+		return res;
+
+	case SIOCAX25ADDUID:
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+		if (ax25_findbyuid(sax->sax25_uid))
+			return -EEXIST;
+		if (sax->sax25_uid == 0)
+			return -EINVAL;
+		if ((ax25_uid = kmalloc(sizeof(*ax25_uid), GFP_KERNEL)) == NULL)
+			return -ENOMEM;
+
+		ax25_uid->uid  = sax->sax25_uid;
+		ax25_uid->call = sax->sax25_call;
+
+		write_lock(&ax25_uid_lock);
+		ax25_uid->next = ax25_uid_list;
+		ax25_uid_list  = ax25_uid;
+		write_unlock(&ax25_uid_lock);
+
+		return 0;
+
+	case SIOCAX25DELUID:
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+
+		write_lock(&ax25_uid_lock);
+		for (ax25_uid = ax25_uid_list; ax25_uid != NULL; ax25_uid = ax25_uid->next) {
+			if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0) {
+				break;
+			}
+		}
+		if (ax25_uid == NULL) {
+			write_unlock(&ax25_uid_lock);
+			return -ENOENT;
+		}
+		if ((s = ax25_uid_list) == ax25_uid) {
+			ax25_uid_list = s->next;
+			write_unlock(&ax25_uid_lock);
+			kfree(ax25_uid);
+			return 0;
+		}
+		while (s != NULL && s->next != NULL) {
+			if (s->next == ax25_uid) {
+				s->next = ax25_uid->next;
+				write_unlock(&ax25_uid_lock);
+				kfree(ax25_uid);
+				return 0;
+			}
+			s = s->next;
+		}
+		write_unlock(&ax25_uid_lock);
+
+		return -ENOENT;
+
+	default:
+		return -EINVAL;
+	}
+
+	return -EINVAL;	/*NOTREACHED */
+}
+
+#ifdef CONFIG_PROC_FS
+
+static void *ax25_uid_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct ax25_uid_assoc *pt;
+	int i = 1;
+
+	read_lock(&ax25_uid_lock);
+	if (*pos == 0)
+		return SEQ_START_TOKEN;
+
+	for (pt = ax25_uid_list; pt != NULL; pt = pt->next) {
+		if (i == *pos)
+			return pt;
+		++i;
+	}
+	return NULL;
+}
+
+static void *ax25_uid_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	++*pos;
+	return (v == SEQ_START_TOKEN) ? ax25_uid_list : 
+		((struct ax25_uid_assoc *) v)->next;
+}
+
+static void ax25_uid_seq_stop(struct seq_file *seq, void *v)
+{
+	read_unlock(&ax25_uid_lock);
+}
+
+static int ax25_uid_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN)
+		seq_printf(seq, "Policy: %d\n", ax25_uid_policy);
+	else {
+		struct ax25_uid_assoc *pt = v;
+		
+
+		seq_printf(seq, "%6d %s\n", pt->uid, ax2asc(&pt->call));
+	}
+	return 0;
+}
+
+static struct seq_operations ax25_uid_seqops = {
+	.start = ax25_uid_seq_start,
+	.next = ax25_uid_seq_next,
+	.stop = ax25_uid_seq_stop,
+	.show = ax25_uid_seq_show,
+};
+
+static int ax25_uid_info_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &ax25_uid_seqops);
+}
+
+struct file_operations ax25_uid_fops = {
+	.owner = THIS_MODULE,
+	.open = ax25_uid_info_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+#endif
+
+/*
+ *	Free all memory associated with UID/Callsign structures.
+ */
+void __exit ax25_uid_free(void)
+{
+	ax25_uid_assoc *s, *ax25_uid;
+
+	write_lock(&ax25_uid_lock);
+	ax25_uid = ax25_uid_list;
+	while (ax25_uid != NULL) {
+		s        = ax25_uid;
+		ax25_uid = ax25_uid->next;
+
+		kfree(s);
+	}
+	ax25_uid_list = NULL;
+	write_unlock(&ax25_uid_lock);
+}
diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c
new file mode 100644
index 00000000000..f67711f2ee9
--- /dev/null
+++ b/net/ax25/sysctl_net_ax25.c
@@ -0,0 +1,262 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright (C) 1996 Mike Shaver (shaver@zeroknowledge.com)
+ */
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/sysctl.h>
+#include <linux/spinlock.h>
+#include <net/ax25.h>
+
+static int min_ipdefmode[1],    	max_ipdefmode[] = {1};
+static int min_axdefmode[1],            max_axdefmode[] = {1};
+static int min_backoff[1],		max_backoff[] = {2};
+static int min_conmode[1],		max_conmode[] = {2};
+static int min_window[] = {1},		max_window[] = {7};
+static int min_ewindow[] = {1},		max_ewindow[] = {63};
+static int min_t1[] = {1},		max_t1[] = {30 * HZ};
+static int min_t2[] = {1},		max_t2[] = {20 * HZ};
+static int min_t3[1],   		max_t3[] = {3600 * HZ};
+static int min_idle[1],  		max_idle[] = {65535 * HZ};
+static int min_n2[] = {1},		max_n2[] = {31};
+static int min_paclen[] = {1},		max_paclen[] = {512};
+static int min_proto[1],		max_proto[] = {3};
+static int min_ds_timeout[1],   	max_ds_timeout[] = {65535 * HZ};
+
+static struct ctl_table_header *ax25_table_header;
+
+static ctl_table *ax25_table;
+static int ax25_table_size;
+
+static ctl_table ax25_dir_table[] = {
+	{
+		.ctl_name	= NET_AX25,
+		.procname	= "ax25",
+		.mode		= 0555,
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table ax25_root_table[] = {
+	{
+		.ctl_name	= CTL_NET,
+		.procname	= "net",
+		.mode		= 0555,
+		.child		= ax25_dir_table
+	},
+	{ .ctl_name = 0 }
+};
+
+static const ctl_table ax25_param_table[] = {
+	{
+		.ctl_name	= NET_AX25_IP_DEFAULT_MODE,
+		.procname	= "ip_default_mode",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_ipdefmode,
+		.extra2		= &max_ipdefmode
+	},
+	{
+		.ctl_name	= NET_AX25_DEFAULT_MODE,
+		.procname	= "ax25_default_mode",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_axdefmode,
+		.extra2		= &max_axdefmode
+	},
+	{
+		.ctl_name	= NET_AX25_BACKOFF_TYPE,
+		.procname	= "backoff_type",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_backoff,
+		.extra2		= &max_backoff
+	},
+	{
+		.ctl_name	= NET_AX25_CONNECT_MODE,
+		.procname	= "connect_mode",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_conmode,
+		.extra2		= &max_conmode
+	},
+	{
+		.ctl_name	= NET_AX25_STANDARD_WINDOW,
+		.procname	= "standard_window_size",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_window,
+		.extra2		= &max_window
+	},
+	{
+		.ctl_name	= NET_AX25_EXTENDED_WINDOW,
+		.procname	= "extended_window_size",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_ewindow,
+		.extra2		= &max_ewindow
+	},
+	{
+		.ctl_name	= NET_AX25_T1_TIMEOUT,
+		.procname	= "t1_timeout",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_t1,
+		.extra2		= &max_t1
+	},
+	{
+		.ctl_name	= NET_AX25_T2_TIMEOUT,
+		.procname	= "t2_timeout",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_t2,
+		.extra2		= &max_t2
+	},
+	{
+		.ctl_name	= NET_AX25_T3_TIMEOUT,
+		.procname	= "t3_timeout",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_t3,
+		.extra2		= &max_t3
+	},
+	{
+		.ctl_name	= NET_AX25_IDLE_TIMEOUT,
+		.procname	= "idle_timeout",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_idle,
+		.extra2		= &max_idle
+	},
+	{
+		.ctl_name	= NET_AX25_N2,
+		.procname	= "maximum_retry_count",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_n2,
+		.extra2		= &max_n2
+	},
+	{
+		.ctl_name	= NET_AX25_PACLEN,
+		.procname	= "maximum_packet_length",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_paclen,
+		.extra2		= &max_paclen
+	},
+	{
+		.ctl_name	= NET_AX25_PROTOCOL,
+		.procname	= "protocol",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_proto,
+		.extra2		= &max_proto
+	},
+	{
+		.ctl_name	= NET_AX25_DAMA_SLAVE_TIMEOUT,
+		.procname	= "dama_slave_timeout",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_ds_timeout,
+		.extra2		= &max_ds_timeout
+	},
+	{ .ctl_name = 0 }	/* that's all, folks! */
+};
+
+void ax25_register_sysctl(void)
+{
+	ax25_dev *ax25_dev;
+	int n, k;
+
+	spin_lock_bh(&ax25_dev_lock);
+	for (ax25_table_size = sizeof(ctl_table), ax25_dev = ax25_dev_list; ax25_dev != NULL; ax25_dev = ax25_dev->next)
+		ax25_table_size += sizeof(ctl_table);
+
+	if ((ax25_table = kmalloc(ax25_table_size, GFP_ATOMIC)) == NULL) {
+		spin_unlock_bh(&ax25_dev_lock);
+		return;
+	}
+
+	memset(ax25_table, 0x00, ax25_table_size);
+
+	for (n = 0, ax25_dev = ax25_dev_list; ax25_dev != NULL; ax25_dev = ax25_dev->next) {
+		ctl_table *child = kmalloc(sizeof(ax25_param_table), GFP_ATOMIC);
+		if (!child) {
+			while (n--)
+				kfree(ax25_table[n].child);
+			kfree(ax25_table);
+			spin_unlock_bh(&ax25_dev_lock);
+			return;
+		}
+		memcpy(child, ax25_param_table, sizeof(ax25_param_table));
+		ax25_table[n].child = ax25_dev->systable = child;
+		ax25_table[n].ctl_name     = n + 1;
+		ax25_table[n].procname     = ax25_dev->dev->name;
+		ax25_table[n].mode         = 0555;
+
+#ifndef CONFIG_AX25_DAMA_SLAVE
+		/*
+		 * We do not wish to have a representation of this parameter
+		 * in /proc/sys/ when configured *not* to include the
+		 * AX.25 DAMA slave code, do we?
+		 */
+
+		child[AX25_VALUES_DS_TIMEOUT].procname = NULL;
+#endif
+
+		child[AX25_MAX_VALUES].ctl_name = 0;	/* just in case... */
+
+		for (k = 0; k < AX25_MAX_VALUES; k++)
+			child[k].data = &ax25_dev->values[k];
+
+		n++;
+	}
+	spin_unlock_bh(&ax25_dev_lock);
+
+	ax25_dir_table[0].child = ax25_table;
+
+	ax25_table_header = register_sysctl_table(ax25_root_table, 1);
+}
+
+void ax25_unregister_sysctl(void)
+{
+	ctl_table *p;
+	unregister_sysctl_table(ax25_table_header);
+
+	ax25_dir_table[0].child = NULL;
+	for (p = ax25_table; p->ctl_name; p++)
+		kfree(p->child);
+	kfree(ax25_table);
+}
diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
new file mode 100644
index 00000000000..6929490d095
--- /dev/null
+++ b/net/bluetooth/Kconfig
@@ -0,0 +1,63 @@
+#
+# Bluetooth subsystem configuration
+#
+
+menuconfig BT
+	depends on NET
+	tristate "Bluetooth subsystem support"
+	help
+	  Bluetooth is low-cost, low-power, short-range wireless technology.
+	  It was designed as a replacement for cables and other short-range
+	  technologies like IrDA.  Bluetooth operates in personal area range
+	  that typically extends up to 10 meters.  More information about
+	  Bluetooth can be found at <http://www.bluetooth.com/>.
+
+	  Linux Bluetooth subsystem consist of several layers:
+	     Bluetooth Core (HCI device and connection manager, scheduler)
+	     HCI Device drivers (Interface to the hardware)
+	     SCO Module (SCO audio links)
+	     L2CAP Module (Logical Link Control and Adaptation Protocol)
+	     RFCOMM Module (RFCOMM Protocol)  
+	     BNEP Module (Bluetooth Network Encapsulation Protocol)
+	     CMTP Module (CAPI Message Transport Protocol)
+	     HIDP Module (Human Interface Device Protocol)
+
+	  Say Y here to compile Bluetooth support into the kernel or say M to
+	  compile it as module (bluetooth).
+
+	  To use Linux Bluetooth subsystem, you will need several user-space
+	  utilities like hciconfig and hcid.  These utilities and updates to
+	  Bluetooth kernel modules are provided in the BlueZ packages.
+	  For more information, see <http://www.bluez.org/>.
+
+config BT_L2CAP
+	tristate "L2CAP protocol support"
+	depends on BT
+	help
+	  L2CAP (Logical Link Control and Adaptation Protocol) provides
+	  connection oriented and connection-less data transport.  L2CAP
+	  support is required for most Bluetooth applications.
+
+	  Say Y here to compile L2CAP support into the kernel or say M to
+	  compile it as module (l2cap).
+
+config BT_SCO
+	tristate "SCO links support"
+	depends on BT
+	help
+	  SCO link provides voice transport over Bluetooth.  SCO support is
+	  required for voice applications like Headset and Audio.
+
+	  Say Y here to compile SCO support into the kernel or say M to
+	  compile it as module (sco).
+
+source "net/bluetooth/rfcomm/Kconfig"
+
+source "net/bluetooth/bnep/Kconfig"
+
+source "net/bluetooth/cmtp/Kconfig"
+
+source "net/bluetooth/hidp/Kconfig"
+
+source "drivers/bluetooth/Kconfig"
+
diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
new file mode 100644
index 00000000000..d1e433f7d67
--- /dev/null
+++ b/net/bluetooth/Makefile
@@ -0,0 +1,13 @@
+#
+# Makefile for the Linux Bluetooth subsystem.
+#
+
+obj-$(CONFIG_BT)	+= bluetooth.o
+obj-$(CONFIG_BT_L2CAP)	+= l2cap.o
+obj-$(CONFIG_BT_SCO)	+= sco.o
+obj-$(CONFIG_BT_RFCOMM)	+= rfcomm/
+obj-$(CONFIG_BT_BNEP)	+= bnep/
+obj-$(CONFIG_BT_CMTP)	+= cmtp/
+obj-$(CONFIG_BT_HIDP)	+= hidp/
+
+bluetooth-objs := af_bluetooth.o hci_core.o hci_conn.o hci_event.o hci_sock.o hci_sysfs.o lib.o
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
new file mode 100644
index 00000000000..1650c6bf699
--- /dev/null
+++ b/net/bluetooth/af_bluetooth.c
@@ -0,0 +1,355 @@
+/* 
+   BlueZ - Bluetooth protocol stack for Linux
+   Copyright (C) 2000-2001 Qualcomm Incorporated
+
+   Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License version 2 as
+   published by the Free Software Foundation;
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
+   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES 
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, 
+   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS 
+   SOFTWARE IS DISCLAIMED.
+*/
+
+/* Bluetooth address family and sockets. */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/proc_fs.h>
+#include <net/sock.h>
+
+#if defined(CONFIG_KMOD)
+#include <linux/kmod.h>
+#endif
+
+#include <net/bluetooth/bluetooth.h>
+
+#ifndef CONFIG_BT_SOCK_DEBUG
+#undef  BT_DBG
+#define BT_DBG(D...)
+#endif
+
+#define VERSION "2.7"
+
+struct proc_dir_entry *proc_bt;
+EXPORT_SYMBOL(proc_bt);
+
+/* Bluetooth sockets */
+#define BT_MAX_PROTO	8
+static struct net_proto_family *bt_proto[BT_MAX_PROTO];
+
+int bt_sock_register(int proto, struct net_proto_family *ops)
+{
+	if (proto < 0 || proto >= BT_MAX_PROTO)
+		return -EINVAL;
+
+	if (bt_proto[proto])
+		return -EEXIST;
+
+	bt_proto[proto] = ops;
+	return 0;
+}
+EXPORT_SYMBOL(bt_sock_register);
+
+int bt_sock_unregister(int proto)
+{
+	if (proto < 0 || proto >= BT_MAX_PROTO)
+		return -EINVAL;
+
+	if (!bt_proto[proto])
+		return -ENOENT;
+
+	bt_proto[proto] = NULL;
+	return 0;
+}
+EXPORT_SYMBOL(bt_sock_unregister);
+
+static int bt_sock_create(struct socket *sock, int proto)
+{
+	int err = 0;
+
+	if (proto < 0 || proto >= BT_MAX_PROTO)
+		return -EINVAL;
+
+#if defined(CONFIG_KMOD)
+	if (!bt_proto[proto]) {
+		request_module("bt-proto-%d", proto);
+	}
+#endif
+	err = -EPROTONOSUPPORT;
+	if (bt_proto[proto] && try_module_get(bt_proto[proto]->owner)) {
+		err = bt_proto[proto]->create(sock, proto);
+		module_put(bt_proto[proto]->owner);
+	}
+	return err; 
+}
+
+void bt_sock_link(struct bt_sock_list *l, struct sock *sk)
+{
+	write_lock_bh(&l->lock);
+	sk_add_node(sk, &l->head);
+	write_unlock_bh(&l->lock);
+}
+EXPORT_SYMBOL(bt_sock_link);
+
+void bt_sock_unlink(struct bt_sock_list *l, struct sock *sk)
+{
+	write_lock_bh(&l->lock);
+	sk_del_node_init(sk);
+	write_unlock_bh(&l->lock);
+}
+EXPORT_SYMBOL(bt_sock_unlink);
+
+void bt_accept_enqueue(struct sock *parent, struct sock *sk)
+{
+	BT_DBG("parent %p, sk %p", parent, sk);
+
+	sock_hold(sk);
+	list_add_tail(&bt_sk(sk)->accept_q, &bt_sk(parent)->accept_q);
+	bt_sk(sk)->parent = parent;
+	parent->sk_ack_backlog++;
+}
+EXPORT_SYMBOL(bt_accept_enqueue);
+
+void bt_accept_unlink(struct sock *sk)
+{
+	BT_DBG("sk %p state %d", sk, sk->sk_state);
+
+	list_del_init(&bt_sk(sk)->accept_q);
+	bt_sk(sk)->parent->sk_ack_backlog--;
+	bt_sk(sk)->parent = NULL;
+	sock_put(sk);
+}
+EXPORT_SYMBOL(bt_accept_unlink);
+
+struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock)
+{
+	struct list_head *p, *n;
+	struct sock *sk;
+
+	BT_DBG("parent %p", parent);
+
+	list_for_each_safe(p, n, &bt_sk(parent)->accept_q) {
+		sk = (struct sock *) list_entry(p, struct bt_sock, accept_q);
+
+		lock_sock(sk);
+
+		/* FIXME: Is this check still needed */
+		if (sk->sk_state == BT_CLOSED) {
+			release_sock(sk);
+			bt_accept_unlink(sk);
+			continue;
+		}
+
+		if (sk->sk_state == BT_CONNECTED || !newsock) {
+			bt_accept_unlink(sk);
+			if (newsock)
+				sock_graft(sk, newsock);
+			release_sock(sk);
+			return sk;
+		}
+
+		release_sock(sk);
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(bt_accept_dequeue);
+
+int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
+	struct msghdr *msg, size_t len, int flags)
+{
+	int noblock = flags & MSG_DONTWAIT;
+	struct sock *sk = sock->sk;
+	struct sk_buff *skb;
+	size_t copied;
+	int err;
+
+	BT_DBG("sock %p sk %p len %d", sock, sk, len);
+
+	if (flags & (MSG_OOB))
+		return -EOPNOTSUPP;
+
+	if (!(skb = skb_recv_datagram(sk, flags, noblock, &err))) {
+		if (sk->sk_shutdown & RCV_SHUTDOWN)
+			return 0;
+		return err;
+	}
+
+	msg->msg_namelen = 0;
+
+	copied = skb->len;
+	if (len < copied) {
+		msg->msg_flags |= MSG_TRUNC;
+		copied = len;
+	}
+
+	skb->h.raw = skb->data;
+	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+
+	skb_free_datagram(sk, skb);
+
+	return err ? : copied;
+}
+EXPORT_SYMBOL(bt_sock_recvmsg);
+
+static inline unsigned int bt_accept_poll(struct sock *parent)
+{
+	struct list_head *p, *n;
+	struct sock *sk;
+
+	list_for_each_safe(p, n, &bt_sk(parent)->accept_q) {
+		sk = (struct sock *) list_entry(p, struct bt_sock, accept_q);
+		if (sk->sk_state == BT_CONNECTED)
+			return POLLIN | POLLRDNORM;
+	}
+
+	return 0;
+}
+
+unsigned int bt_sock_poll(struct file * file, struct socket *sock, poll_table *wait)
+{
+	struct sock *sk = sock->sk;
+	unsigned int mask = 0;
+
+	BT_DBG("sock %p, sk %p", sock, sk);
+
+	poll_wait(file, sk->sk_sleep, wait);
+
+	if (sk->sk_state == BT_LISTEN)
+		return bt_accept_poll(sk);
+
+	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+		mask |= POLLERR;
+
+	if (sk->sk_shutdown == SHUTDOWN_MASK)
+		mask |= POLLHUP;
+
+	if (!skb_queue_empty(&sk->sk_receive_queue) || 
+			(sk->sk_shutdown & RCV_SHUTDOWN))
+		mask |= POLLIN | POLLRDNORM;
+
+	if (sk->sk_state == BT_CLOSED)
+		mask |= POLLHUP;
+
+	if (sk->sk_state == BT_CONNECT ||
+			sk->sk_state == BT_CONNECT2 ||
+			sk->sk_state == BT_CONFIG)
+		return mask;
+
+	if (sock_writeable(sk))
+		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+	else
+		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+
+	return mask;
+}
+EXPORT_SYMBOL(bt_sock_poll);
+
+int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	int err = 0;
+
+	BT_DBG("sk %p", sk);
+
+	add_wait_queue(sk->sk_sleep, &wait);
+	while (sk->sk_state != state) {
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		if (!timeo) {
+			err = -EAGAIN;
+			break;
+		}
+
+		if (signal_pending(current)) {
+			err = sock_intr_errno(timeo);
+			break;
+		}
+
+		release_sock(sk);
+		timeo = schedule_timeout(timeo);
+		lock_sock(sk);
+
+		if (sk->sk_err) {
+			err = sock_error(sk);
+			break;
+		}
+	}
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(sk->sk_sleep, &wait);
+	return err;
+}
+EXPORT_SYMBOL(bt_sock_wait_state);
+
+static struct net_proto_family bt_sock_family_ops = {
+	.owner	= THIS_MODULE,
+	.family	= PF_BLUETOOTH,
+	.create	= bt_sock_create,
+};
+
+extern int hci_sock_init(void);
+extern int hci_sock_cleanup(void);
+
+extern int bt_sysfs_init(void);
+extern int bt_sysfs_cleanup(void);
+
+static int __init bt_init(void)
+{
+	BT_INFO("Core ver %s", VERSION);
+
+	proc_bt = proc_mkdir("bluetooth", NULL);
+	if (proc_bt)
+		proc_bt->owner = THIS_MODULE;
+
+	sock_register(&bt_sock_family_ops);
+
+	BT_INFO("HCI device and connection manager initialized");
+
+	bt_sysfs_init();
+
+	hci_sock_init();
+
+	return 0;
+}
+
+static void __exit bt_exit(void)
+{
+	hci_sock_cleanup();
+
+	bt_sysfs_cleanup();
+
+	sock_unregister(PF_BLUETOOTH);
+
+	remove_proc_entry("bluetooth", NULL);
+}
+
+subsys_initcall(bt_init);
+module_exit(bt_exit);
+
+MODULE_AUTHOR("Maxim Krasnyansky <maxk@qualcomm.com>, Marcel Holtmann <marcel@holtmann.org>");
+MODULE_DESCRIPTION("Bluetooth Core ver " VERSION);
+MODULE_VERSION(VERSION);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETPROTO(PF_BLUETOOTH);
diff --git a/net/bluetooth/bnep/Kconfig b/net/bluetooth/bnep/Kconfig
new file mode 100644
index 00000000000..35158b036d5
--- /dev/null
+++ b/net/bluetooth/bnep/Kconfig
@@ -0,0 +1,24 @@
+config BT_BNEP
+	tristate "BNEP protocol support"
+	depends on BT && BT_L2CAP
+	select CRC32
+	help
+	  BNEP (Bluetooth Network Encapsulation Protocol) is Ethernet
+	  emulation layer on top of Bluetooth.  BNEP is required for
+	  Bluetooth PAN (Personal Area Network).
+
+	  Say Y here to compile BNEP support into the kernel or say M to
+	  compile it as module (bnep).
+
+config BT_BNEP_MC_FILTER
+	bool "Multicast filter support"
+	depends on BT_BNEP
+	help
+	  This option enables the multicast filter support for BNEP.
+
+config BT_BNEP_PROTO_FILTER
+	bool "Protocol filter support"
+	depends on BT_BNEP
+	help
+	  This option enables the protocol filter support for BNEP.
+
diff --git a/net/bluetooth/bnep/Makefile b/net/bluetooth/bnep/Makefile
new file mode 100644
index 00000000000..c7821e76ca5
--- /dev/null
+++ b/net/bluetooth/bnep/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the Linux Bluetooth BNEP layer.
+#
+
+obj-$(CONFIG_BT_BNEP) += bnep.o
+
+bnep-objs := core.o sock.o netdev.o
diff --git a/net/bluetooth/bnep/bnep.h b/net/bluetooth/bnep/bnep.h
new file mode 100644
index 00000000000..bbb1ed7097a
--- /dev/null
+++ b/net/bluetooth/bnep/bnep.h
@@ -0,0 +1,184 @@
+/*
+  BNEP protocol definition for Linux Bluetooth stack (BlueZ).
+  Copyright (C) 2002 Maxim Krasnyansky <maxk@qualcomm.com>
+	
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License, version 2, as
+  published by the Free Software Foundation.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+*/
+
+/*
+ * $Id: bnep.h,v 1.5 2002/08/04 21:23:58 maxk Exp $
+ */
+
+#ifndef _BNEP_H
+#define _BNEP_H
+
+#include <linux/types.h>
+#include <linux/crc32.h>
+#include <net/bluetooth/bluetooth.h>
+
+// Limits
+#define BNEP_MAX_PROTO_FILTERS     5
+#define BNEP_MAX_MULTICAST_FILTERS 20
+
+// UUIDs
+#define BNEP_BASE_UUID 0x0000000000001000800000805F9B34FB
+#define BNEP_UUID16    0x02
+#define BNEP_UUID32    0x04
+#define BNEP_UUID128   0x16
+
+#define BNEP_SVC_PANU  0x1115
+#define BNEP_SVC_NAP   0x1116
+#define BNEP_SVC_GN    0x1117
+
+// Packet types
+#define BNEP_GENERAL               0x00
+#define BNEP_CONTROL               0x01
+#define BNEP_COMPRESSED            0x02
+#define BNEP_COMPRESSED_SRC_ONLY   0x03
+#define BNEP_COMPRESSED_DST_ONLY   0x04
+
+// Control types
+#define BNEP_CMD_NOT_UNDERSTOOD    0x00
+#define BNEP_SETUP_CONN_REQ        0x01
+#define BNEP_SETUP_CONN_RSP        0x02
+#define BNEP_FILTER_NET_TYPE_SET   0x03
+#define BNEP_FILTER_NET_TYPE_RSP   0x04
+#define BNEP_FILTER_MULTI_ADDR_SET 0x05
+#define BNEP_FILTER_MULTI_ADDR_RSP 0x06
+
+// Extension types
+#define BNEP_EXT_CONTROL           0x00
+
+// Response messages 
+#define BNEP_SUCCESS               0x00
+
+#define BNEP_CONN_INVALID_DST      0x01
+#define BNEP_CONN_INVALID_SRC      0x02
+#define BNEP_CONN_INVALID_SVC      0x03
+#define BNEP_CONN_NOT_ALLOWED      0x04
+
+#define BNEP_FILTER_UNSUPPORTED_REQ    0x01
+#define BNEP_FILTER_INVALID_RANGE      0x02
+#define BNEP_FILTER_INVALID_MCADDR     0x02
+#define BNEP_FILTER_LIMIT_REACHED      0x03
+#define BNEP_FILTER_DENIED_SECURITY    0x04
+
+// L2CAP settings
+#define BNEP_MTU         1691
+#define BNEP_PSM	 0x0f
+#define BNEP_FLUSH_TO    0xffff
+#define BNEP_CONNECT_TO  15
+#define BNEP_FILTER_TO   15
+
+// Headers 
+#define BNEP_TYPE_MASK	 0x7f
+#define BNEP_EXT_HEADER	 0x80
+
+struct bnep_setup_conn_req {
+	__u8  type;
+	__u8  ctrl;
+	__u8  uuid_size;
+	__u8  service[0];
+} __attribute__((packed));
+
+struct bnep_set_filter_req {
+	__u8  type;
+	__u8  ctrl;
+	__u16 len;
+	__u8  list[0];
+} __attribute__((packed));
+
+struct bnep_control_rsp {
+	__u8  type;
+	__u8  ctrl;
+	__u16 resp;
+} __attribute__((packed));
+
+struct bnep_ext_hdr {
+	__u8  type;
+	__u8  len;
+	__u8  data[0];
+} __attribute__((packed));
+
+/* BNEP ioctl defines */
+#define BNEPCONNADD	_IOW('B', 200, int)
+#define BNEPCONNDEL	_IOW('B', 201, int)
+#define BNEPGETCONNLIST	_IOR('B', 210, int)
+#define BNEPGETCONNINFO	_IOR('B', 211, int)
+
+struct bnep_connadd_req {
+	int   sock;       // Connected socket
+	__u32 flags;
+	__u16 role;
+	char  device[16]; // Name of the Ethernet device
+};
+
+struct bnep_conndel_req {
+	__u32 flags;
+	__u8  dst[ETH_ALEN];
+};
+
+struct bnep_conninfo {
+	__u32 flags;
+	__u16 role;
+	__u16 state;	
+	__u8  dst[ETH_ALEN];
+	char  device[16];
+};
+
+struct bnep_connlist_req {
+	__u32  cnum;
+	struct bnep_conninfo __user *ci;
+};
+
+struct bnep_proto_filter {
+	__u16 start;
+	__u16 end;
+};
+
+int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock);
+int bnep_del_connection(struct bnep_conndel_req *req);
+int bnep_get_connlist(struct bnep_connlist_req *req);
+int bnep_get_conninfo(struct bnep_conninfo *ci);
+
+// BNEP sessions
+struct bnep_session {
+	struct list_head list;
+	
+	unsigned int  role;
+        unsigned long state;
+        unsigned long flags;
+	atomic_t      killed;
+
+	struct ethhdr eh;
+	struct msghdr msg;
+
+	struct bnep_proto_filter proto_filter[BNEP_MAX_PROTO_FILTERS];
+	u64    mc_filter;
+	
+	struct socket    *sock;
+	struct net_device *dev;
+	struct net_device_stats stats;
+};
+
+void bnep_net_setup(struct net_device *dev);
+int bnep_sock_init(void);
+int bnep_sock_cleanup(void);
+
+static inline int bnep_mc_hash(__u8 *addr)
+{
+        return (crc32_be(~0, addr, ETH_ALEN) >> 26);
+}
+
+#endif
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
new file mode 100644
index 00000000000..682bf20af52
--- /dev/null
+++ b/net/bluetooth/bnep/core.c
@@ -0,0 +1,713 @@
+/* 
+   BNEP implementation for Linux Bluetooth stack (BlueZ).
+   Copyright (C) 2001-2002 Inventel Systemes
+   Written 2001-2002 by
+	Cl�ment Moreau <clement.moreau@inventel.fr>
+	David Libault  <david.libault@inventel.fr>
+
+   Copyright (C) 2002 Maxim Krasnyansky <maxk@qualcomm.com>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License version 2 as
+   published by the Free Software Foundation;
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
+   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES 
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, 
+   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS 
+   SOFTWARE IS DISCLAIMED.
+*/
+
+/*
+ * $Id: core.c,v 1.20 2002/08/04 21:23:58 maxk Exp $
+ */ 
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/init.h>
+#include <linux/wait.h>
+#include <linux/errno.h>
+#include <linux/smp_lock.h>
+#include <linux/net.h>
+#include <net/sock.h>
+
+#include <linux/socket.h>
+#include <linux/file.h>
+
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+
+#include <asm/unaligned.h>
+
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/l2cap.h>
+
+#include "bnep.h"
+
+#ifndef CONFIG_BT_BNEP_DEBUG
+#undef  BT_DBG
+#define BT_DBG(D...)
+#endif
+
+#define VERSION "1.2"
+
+static LIST_HEAD(bnep_session_list);
+static DECLARE_RWSEM(bnep_session_sem);
+
+static struct bnep_session *__bnep_get_session(u8 *dst)
+{
+	struct bnep_session *s;
+	struct list_head *p;
+
+	BT_DBG("");
+
+	list_for_each(p, &bnep_session_list) {
+		s = list_entry(p, struct bnep_session, list);	
+		if (!memcmp(dst, s->eh.h_source, ETH_ALEN))
+			return s;
+	}
+	return NULL;
+}
+
+static void __bnep_link_session(struct bnep_session *s)
+{
+	/* It's safe to call __module_get() here because sessions are added
+	   by the socket layer which has to hold the refference to this module.
+	 */
+	__module_get(THIS_MODULE);
+	list_add(&s->list, &bnep_session_list);	
+}
+
+static void __bnep_unlink_session(struct bnep_session *s)
+{
+	list_del(&s->list);
+	module_put(THIS_MODULE);
+}
+
+static int bnep_send(struct bnep_session *s, void *data, size_t len)
+{
+	struct socket *sock = s->sock;
+	struct kvec iv = { data, len };
+
+	return kernel_sendmsg(sock, &s->msg, &iv, 1, len);
+}
+
+static int bnep_send_rsp(struct bnep_session *s, u8 ctrl, u16 resp)
+{
+	struct bnep_control_rsp rsp;
+	rsp.type = BNEP_CONTROL;
+	rsp.ctrl = ctrl;
+	rsp.resp = htons(resp);
+	return bnep_send(s, &rsp, sizeof(rsp));
+}
+
+#ifdef CONFIG_BT_BNEP_PROTO_FILTER
+static inline void bnep_set_default_proto_filter(struct bnep_session *s)
+{
+	/* (IPv4, ARP)  */
+	s->proto_filter[0].start = htons(0x0800);
+	s->proto_filter[0].end   = htons(0x0806);
+	/* (RARP, AppleTalk) */
+	s->proto_filter[1].start = htons(0x8035);
+	s->proto_filter[1].end   = htons(0x80F3);
+	/* (IPX, IPv6) */
+	s->proto_filter[2].start = htons(0x8137);
+	s->proto_filter[2].end   = htons(0x86DD);
+}
+#endif
+
+static int bnep_ctrl_set_netfilter(struct bnep_session *s, u16 *data, int len)
+{
+	int n;
+
+	if (len < 2)
+		return -EILSEQ;
+
+	n = ntohs(get_unaligned(data));
+	data++; len -= 2;
+
+	if (len < n)
+		return -EILSEQ;
+
+	BT_DBG("filter len %d", n);
+
+#ifdef CONFIG_BT_BNEP_PROTO_FILTER
+	n /= 4;
+	if (n <= BNEP_MAX_PROTO_FILTERS) {
+		struct bnep_proto_filter *f = s->proto_filter;
+		int i;
+
+		for (i = 0; i < n; i++) {
+			f[i].start = get_unaligned(data++);
+			f[i].end   = get_unaligned(data++);
+
+			BT_DBG("proto filter start %d end %d",
+				f[i].start, f[i].end);
+		}
+
+		if (i < BNEP_MAX_PROTO_FILTERS)
+			memset(f + i, 0, sizeof(*f));
+
+		if (n == 0)
+			bnep_set_default_proto_filter(s);
+
+		bnep_send_rsp(s, BNEP_FILTER_NET_TYPE_RSP, BNEP_SUCCESS);
+	} else {
+		bnep_send_rsp(s, BNEP_FILTER_NET_TYPE_RSP, BNEP_FILTER_LIMIT_REACHED);
+	}
+#else
+	bnep_send_rsp(s, BNEP_FILTER_NET_TYPE_RSP, BNEP_FILTER_UNSUPPORTED_REQ);
+#endif
+	return 0;
+}
+
+static int bnep_ctrl_set_mcfilter(struct bnep_session *s, u8 *data, int len)
+{
+	int n;
+
+	if (len < 2)
+		return -EILSEQ;
+
+	n = ntohs(get_unaligned((u16 *) data)); 
+	data += 2; len -= 2;
+
+	if (len < n)
+		return -EILSEQ;
+
+	BT_DBG("filter len %d", n);
+
+#ifdef CONFIG_BT_BNEP_MC_FILTER
+	n /= (ETH_ALEN * 2);
+
+	if (n > 0) {
+		s->mc_filter = 0;
+
+		/* Always send broadcast */
+		set_bit(bnep_mc_hash(s->dev->broadcast), (ulong *) &s->mc_filter);
+
+		/* Add address ranges to the multicast hash */
+		for (; n > 0; n--) {
+			u8 a1[6], *a2;
+
+			memcpy(a1, data, ETH_ALEN); data += ETH_ALEN;
+			a2 = data; data += ETH_ALEN;
+	
+			BT_DBG("mc filter %s -> %s",
+				batostr((void *) a1), batostr((void *) a2));
+
+			#define INCA(a) { int i = 5; while (i >=0 && ++a[i--] == 0); }
+
+			/* Iterate from a1 to a2 */
+			set_bit(bnep_mc_hash(a1), (ulong *) &s->mc_filter);
+			while (memcmp(a1, a2, 6) < 0 && s->mc_filter != ~0LL) {
+				INCA(a1);
+				set_bit(bnep_mc_hash(a1), (ulong *) &s->mc_filter);
+			}
+		}
+	}
+
+	BT_DBG("mc filter hash 0x%llx", s->mc_filter);
+
+	bnep_send_rsp(s, BNEP_FILTER_MULTI_ADDR_RSP, BNEP_SUCCESS);
+#else
+	bnep_send_rsp(s, BNEP_FILTER_MULTI_ADDR_RSP, BNEP_FILTER_UNSUPPORTED_REQ);
+#endif
+	return 0;
+}
+
+static int bnep_rx_control(struct bnep_session *s, void *data, int len)
+{
+	u8  cmd = *(u8 *)data;
+	int err = 0;
+
+	data++; len--;
+
+	switch (cmd) {
+	case BNEP_CMD_NOT_UNDERSTOOD:
+	case BNEP_SETUP_CONN_REQ:
+	case BNEP_SETUP_CONN_RSP:
+	case BNEP_FILTER_NET_TYPE_RSP:
+	case BNEP_FILTER_MULTI_ADDR_RSP:
+		/* Ignore these for now */
+		break;
+
+	case BNEP_FILTER_NET_TYPE_SET:
+		err = bnep_ctrl_set_netfilter(s, data, len);
+		break;
+
+	case BNEP_FILTER_MULTI_ADDR_SET:
+		err = bnep_ctrl_set_mcfilter(s, data, len);
+		break;
+
+	default: {
+			u8 pkt[3];
+			pkt[0] = BNEP_CONTROL;
+			pkt[1] = BNEP_CMD_NOT_UNDERSTOOD;
+			pkt[2] = cmd;
+			bnep_send(s, pkt, sizeof(pkt));
+		}
+		break;
+	}
+
+	return err;
+}
+
+static int bnep_rx_extension(struct bnep_session *s, struct sk_buff *skb)
+{
+	struct bnep_ext_hdr *h;
+	int err = 0;
+
+	do {
+		h = (void *) skb->data;
+		if (!skb_pull(skb, sizeof(*h))) {
+			err = -EILSEQ;
+			break;
+		}
+
+		BT_DBG("type 0x%x len %d", h->type, h->len);
+	
+		switch (h->type & BNEP_TYPE_MASK) {
+		case BNEP_EXT_CONTROL:
+			bnep_rx_control(s, skb->data, skb->len);
+			break;
+
+		default:
+			/* Unknown extension, skip it. */
+			break;
+		}
+
+		if (!skb_pull(skb, h->len)) {
+			err = -EILSEQ;
+			break;
+		}
+	} while (!err && (h->type & BNEP_EXT_HEADER));
+	
+	return err;
+}
+
+static u8 __bnep_rx_hlen[] = {
+	ETH_HLEN,     /* BNEP_GENERAL */
+	0,            /* BNEP_CONTROL */
+	2,            /* BNEP_COMPRESSED */
+	ETH_ALEN + 2, /* BNEP_COMPRESSED_SRC_ONLY */
+	ETH_ALEN + 2  /* BNEP_COMPRESSED_DST_ONLY */
+};
+#define BNEP_RX_TYPES	(sizeof(__bnep_rx_hlen) - 1)
+
+static inline int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb)
+{
+	struct net_device *dev = s->dev;
+	struct sk_buff *nskb;
+	u8 type;
+
+	dev->last_rx = jiffies;
+	s->stats.rx_bytes += skb->len;
+
+	type = *(u8 *) skb->data; skb_pull(skb, 1);
+
+	if ((type & BNEP_TYPE_MASK) > BNEP_RX_TYPES)
+		goto badframe;
+	
+	if ((type & BNEP_TYPE_MASK) == BNEP_CONTROL) {
+		bnep_rx_control(s, skb->data, skb->len);
+		kfree_skb(skb);
+		return 0;
+	}
+
+	skb->mac.raw = skb->data;
+
+	/* Verify and pull out header */
+	if (!skb_pull(skb, __bnep_rx_hlen[type & BNEP_TYPE_MASK]))
+		goto badframe;
+
+	s->eh.h_proto = get_unaligned((u16 *) (skb->data - 2));
+
+	if (type & BNEP_EXT_HEADER) {
+		if (bnep_rx_extension(s, skb) < 0)
+			goto badframe;
+	}
+
+	/* Strip 802.1p header */
+	if (ntohs(s->eh.h_proto) == 0x8100) {
+		if (!skb_pull(skb, 4))
+			goto badframe;
+		s->eh.h_proto = get_unaligned((u16 *) (skb->data - 2));
+	}
+	
+	/* We have to alloc new skb and copy data here :(. Because original skb
+	 * may not be modified and because of the alignment requirements. */
+	nskb = alloc_skb(2 + ETH_HLEN + skb->len, GFP_KERNEL);
+	if (!nskb) {
+		s->stats.rx_dropped++;
+		kfree_skb(skb);
+		return -ENOMEM;
+	}
+	skb_reserve(nskb, 2);
+
+	/* Decompress header and construct ether frame */
+	switch (type & BNEP_TYPE_MASK) {
+	case BNEP_COMPRESSED:
+		memcpy(__skb_put(nskb, ETH_HLEN), &s->eh, ETH_HLEN);
+		break;
+	
+	case BNEP_COMPRESSED_SRC_ONLY:
+		memcpy(__skb_put(nskb, ETH_ALEN), s->eh.h_dest, ETH_ALEN);
+		memcpy(__skb_put(nskb, ETH_ALEN), skb->mac.raw, ETH_ALEN);
+		put_unaligned(s->eh.h_proto, (u16 *) __skb_put(nskb, 2));
+		break;
+
+	case BNEP_COMPRESSED_DST_ONLY:
+		memcpy(__skb_put(nskb, ETH_ALEN), skb->mac.raw, ETH_ALEN);
+		memcpy(__skb_put(nskb, ETH_ALEN + 2), s->eh.h_source, ETH_ALEN + 2);
+		break;
+
+	case BNEP_GENERAL:
+		memcpy(__skb_put(nskb, ETH_ALEN * 2), skb->mac.raw, ETH_ALEN * 2);
+		put_unaligned(s->eh.h_proto, (u16 *) __skb_put(nskb, 2));
+		break;
+	}
+
+	memcpy(__skb_put(nskb, skb->len), skb->data, skb->len);
+	kfree_skb(skb);
+	
+	s->stats.rx_packets++;
+	nskb->dev       = dev;
+	nskb->ip_summed = CHECKSUM_NONE;
+	nskb->protocol  = eth_type_trans(nskb, dev);
+	netif_rx_ni(nskb);
+	return 0;
+
+badframe:
+	s->stats.rx_errors++;
+	kfree_skb(skb);
+	return 0;
+}
+
+static u8 __bnep_tx_types[] = {
+	BNEP_GENERAL,
+	BNEP_COMPRESSED_SRC_ONLY,
+	BNEP_COMPRESSED_DST_ONLY,
+	BNEP_COMPRESSED
+};
+
+static inline int bnep_tx_frame(struct bnep_session *s, struct sk_buff *skb)
+{
+	struct ethhdr *eh = (void *) skb->data;
+	struct socket *sock = s->sock;
+	struct kvec iv[3];
+	int len = 0, il = 0;
+	u8 type = 0;
+
+	BT_DBG("skb %p dev %p type %d", skb, skb->dev, skb->pkt_type);
+
+	if (!skb->dev) {
+		/* Control frame sent by us */
+		goto send;
+	}
+
+	iv[il++] = (struct kvec) { &type, 1 };
+	len++;
+
+	if (!memcmp(eh->h_dest, s->eh.h_source, ETH_ALEN))
+		type |= 0x01;
+
+	if (!memcmp(eh->h_source, s->eh.h_dest, ETH_ALEN))
+		type |= 0x02;
+
+	if (type)
+		skb_pull(skb, ETH_ALEN * 2);
+
+	type = __bnep_tx_types[type];
+	switch (type) {
+	case BNEP_COMPRESSED_SRC_ONLY:
+		iv[il++] = (struct kvec) { eh->h_source, ETH_ALEN };
+		len += ETH_ALEN;
+		break;
+		
+	case BNEP_COMPRESSED_DST_ONLY:
+		iv[il++] = (struct kvec) { eh->h_dest, ETH_ALEN };
+		len += ETH_ALEN;
+		break;
+	}
+
+send:
+	iv[il++] = (struct kvec) { skb->data, skb->len };
+	len += skb->len;
+	
+	/* FIXME: linearize skb */
+	{
+		len = kernel_sendmsg(sock, &s->msg, iv, il, len);
+	}
+	kfree_skb(skb);
+
+	if (len > 0) {
+		s->stats.tx_bytes += len;
+		s->stats.tx_packets++;
+		return 0;
+	}
+
+	return len;
+}
+
+static int bnep_session(void *arg)
+{
+	struct bnep_session *s = arg;
+	struct net_device *dev = s->dev;
+	struct sock *sk = s->sock->sk;
+	struct sk_buff *skb;
+	wait_queue_t wait;
+
+	BT_DBG("");
+
+        daemonize("kbnepd %s", dev->name);
+	set_user_nice(current, -15);
+	current->flags |= PF_NOFREEZE;
+
+	init_waitqueue_entry(&wait, current);
+	add_wait_queue(sk->sk_sleep, &wait);
+	while (!atomic_read(&s->killed)) {
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		// RX
+		while ((skb = skb_dequeue(&sk->sk_receive_queue))) {
+			skb_orphan(skb);
+			bnep_rx_frame(s, skb);
+		}
+
+		if (sk->sk_state != BT_CONNECTED)
+			break;
+	
+		// TX
+		while ((skb = skb_dequeue(&sk->sk_write_queue)))
+			if (bnep_tx_frame(s, skb))
+				break;
+		netif_wake_queue(dev);
+	
+		schedule();
+	}
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(sk->sk_sleep, &wait);
+
+	/* Cleanup session */
+	down_write(&bnep_session_sem);
+
+	/* Delete network device */
+	unregister_netdev(dev);
+
+	/* Release the socket */
+	fput(s->sock->file);
+
+	__bnep_unlink_session(s);
+
+	up_write(&bnep_session_sem);
+	free_netdev(dev);
+	return 0;
+}
+
+int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock)
+{
+	struct net_device *dev;
+	struct bnep_session *s, *ss;
+	u8 dst[ETH_ALEN], src[ETH_ALEN];
+	int err;
+
+	BT_DBG("");
+
+	baswap((void *) dst, &bt_sk(sock->sk)->dst);
+	baswap((void *) src, &bt_sk(sock->sk)->src);
+
+	/* session struct allocated as private part of net_device */
+	dev = alloc_netdev(sizeof(struct bnep_session),
+			   (*req->device) ? req->device : "bnep%d",
+			   bnep_net_setup);
+	if (!dev) 
+		return ENOMEM;
+
+
+	down_write(&bnep_session_sem);
+
+	ss = __bnep_get_session(dst);
+	if (ss && ss->state == BT_CONNECTED) {
+		err = -EEXIST;
+		goto failed;
+	}
+
+	s = dev->priv;
+
+	/* This is rx header therefore addresses are swapped.
+	 * ie eh.h_dest is our local address. */
+	memcpy(s->eh.h_dest,   &src, ETH_ALEN);
+	memcpy(s->eh.h_source, &dst, ETH_ALEN);
+	memcpy(dev->dev_addr, s->eh.h_dest, ETH_ALEN);
+
+	s->dev = dev;
+	s->sock  = sock;
+	s->role  = req->role;
+	s->state = BT_CONNECTED;
+	
+	s->msg.msg_flags = MSG_NOSIGNAL;
+
+#ifdef CONFIG_BT_BNEP_MC_FILTER
+	/* Set default mc filter */
+	set_bit(bnep_mc_hash(dev->broadcast), (ulong *) &s->mc_filter);
+#endif
+
+#ifdef CONFIG_BT_BNEP_PROTO_FILTER
+	/* Set default protocol filter */
+	bnep_set_default_proto_filter(s);
+#endif
+
+	err = register_netdev(dev);
+	if (err) {
+		goto failed;
+	}
+
+	__bnep_link_session(s);
+	
+	err = kernel_thread(bnep_session, s, CLONE_KERNEL);
+	if (err < 0) {
+		/* Session thread start failed, gotta cleanup. */
+		unregister_netdev(dev);
+		__bnep_unlink_session(s);
+		goto failed;
+	}
+
+	up_write(&bnep_session_sem);
+	strcpy(req->device, dev->name);
+	return 0;
+
+failed:
+	up_write(&bnep_session_sem);
+	free_netdev(dev);
+	return err;
+}
+
+int bnep_del_connection(struct bnep_conndel_req *req)
+{
+	struct bnep_session *s;
+	int  err = 0;
+
+	BT_DBG("");
+
+	down_read(&bnep_session_sem);
+
+	s = __bnep_get_session(req->dst);
+	if (s) {
+		/* Wakeup user-space which is polling for socket errors.
+		 * This is temporary hack untill we have shutdown in L2CAP */
+		s->sock->sk->sk_err = EUNATCH;
+		
+		/* Kill session thread */
+		atomic_inc(&s->killed);
+		wake_up_interruptible(s->sock->sk->sk_sleep);
+	} else
+		err = -ENOENT;
+
+	up_read(&bnep_session_sem);
+	return err;
+}
+
+static void __bnep_copy_ci(struct bnep_conninfo *ci, struct bnep_session *s)
+{
+	memcpy(ci->dst, s->eh.h_source, ETH_ALEN);
+	strcpy(ci->device, s->dev->name);
+	ci->flags = s->flags;
+	ci->state = s->state;
+	ci->role  = s->role;
+}
+
+int bnep_get_connlist(struct bnep_connlist_req *req)
+{
+	struct list_head *p;
+	int err = 0, n = 0;
+
+	down_read(&bnep_session_sem);
+
+	list_for_each(p, &bnep_session_list) {
+		struct bnep_session *s;
+		struct bnep_conninfo ci;
+
+		s = list_entry(p, struct bnep_session, list);
+
+		__bnep_copy_ci(&ci, s);
+		
+		if (copy_to_user(req->ci, &ci, sizeof(ci))) {
+			err = -EFAULT;
+			break;
+		}
+
+		if (++n >= req->cnum)
+			break;
+
+		req->ci++;
+	}
+	req->cnum = n;
+
+	up_read(&bnep_session_sem);
+	return err;
+}
+
+int bnep_get_conninfo(struct bnep_conninfo *ci)
+{
+	struct bnep_session *s;
+	int err = 0;
+
+	down_read(&bnep_session_sem);
+
+	s = __bnep_get_session(ci->dst);
+	if (s)
+		__bnep_copy_ci(ci, s);
+	else
+		err = -ENOENT;
+
+	up_read(&bnep_session_sem);
+	return err;
+}
+
+static int __init bnep_init(void)
+{	
+	char flt[50] = "";
+
+	l2cap_load();
+
+#ifdef CONFIG_BT_BNEP_PROTO_FILTER
+	strcat(flt, "protocol ");
+#endif
+
+#ifdef CONFIG_BT_BNEP_MC_FILTER
+	strcat(flt, "multicast");
+#endif
+
+	BT_INFO("BNEP (Ethernet Emulation) ver %s", VERSION);
+	if (flt[0])
+		BT_INFO("BNEP filters: %s", flt);
+
+	bnep_sock_init();
+	return 0;
+}
+
+static void __exit bnep_exit(void)
+{
+	bnep_sock_cleanup();
+}
+
+module_init(bnep_init);
+module_exit(bnep_exit);
+
+MODULE_AUTHOR("David Libault <david.libault@inventel.fr>, Maxim Krasnyansky <maxk@qualcomm.com>");
+MODULE_DESCRIPTION("Bluetooth BNEP ver " VERSION);
+MODULE_VERSION(VERSION);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("bt-proto-4");
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
new file mode 100644
index 00000000000..921204f95f4
--- /dev/null
+++ b/net/bluetooth/bnep/netdev.c
@@ -0,0 +1,247 @@
+/* 
+   BNEP implementation for Linux Bluetooth stack (BlueZ).
+   Copyright (C) 2001-2002 Inventel Systemes
+   Written 2001-2002 by
+	Cl�ment Moreau <clement.moreau@inventel.fr>
+	David Libault  <david.libault@inventel.fr>
+
+   Copyright (C) 2002 Maxim Krasnyansky <maxk@qualcomm.com>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License version 2 as
+   published by the Free Software Foundation;
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
+   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES 
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, 
+   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS 
+   SOFTWARE IS DISCLAIMED.
+*/
+
+/*
+ * $Id: netdev.c,v 1.8 2002/08/04 21:23:58 maxk Exp $
+ */ 
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/socket.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/wait.h>
+
+#include <asm/unaligned.h>
+
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
+#include <net/bluetooth/l2cap.h>
+
+#include "bnep.h"
+
+#ifndef CONFIG_BT_BNEP_DEBUG
+#undef  BT_DBG
+#define BT_DBG( A... )
+#endif
+
+#define BNEP_TX_QUEUE_LEN 20
+
+static int bnep_net_open(struct net_device *dev)
+{
+	netif_start_queue(dev);
+	return 0;
+}
+
+static int bnep_net_close(struct net_device *dev)
+{
+	netif_stop_queue(dev);
+	return 0;
+}
+
+static struct net_device_stats *bnep_net_get_stats(struct net_device *dev)
+{
+	struct bnep_session *s = dev->priv;
+	return &s->stats;
+}
+
+static void bnep_net_set_mc_list(struct net_device *dev)
+{
+#ifdef CONFIG_BT_BNEP_MC_FILTER
+	struct bnep_session *s = dev->priv;
+	struct sock *sk = s->sock->sk;
+	struct bnep_set_filter_req *r;
+	struct sk_buff *skb;
+	int size;
+
+	BT_DBG("%s mc_count %d", dev->name, dev->mc_count);
+
+	size = sizeof(*r) + (BNEP_MAX_MULTICAST_FILTERS + 1) * ETH_ALEN * 2;
+	skb  = alloc_skb(size, GFP_ATOMIC);
+	if (!skb) {
+		BT_ERR("%s Multicast list allocation failed", dev->name);
+		return;
+	}
+
+	r = (void *) skb->data;
+	__skb_put(skb, sizeof(*r));
+
+	r->type = BNEP_CONTROL;
+	r->ctrl = BNEP_FILTER_MULTI_ADDR_SET;
+
+        if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) {
+		u8 start[ETH_ALEN] = { 0x01 };
+
+		/* Request all addresses */
+		memcpy(__skb_put(skb, ETH_ALEN), start, ETH_ALEN);
+		memcpy(__skb_put(skb, ETH_ALEN), dev->broadcast, ETH_ALEN);
+		r->len = htons(ETH_ALEN * 2);
+	} else {
+                struct dev_mc_list *dmi = dev->mc_list;
+		int i, len = skb->len;
+
+		if (dev->flags & IFF_BROADCAST) {
+			memcpy(__skb_put(skb, ETH_ALEN), dev->broadcast, ETH_ALEN);
+			memcpy(__skb_put(skb, ETH_ALEN), dev->broadcast, ETH_ALEN);
+		}	
+		
+		/* FIXME: We should group addresses here. */
+
+		for (i = 0; i < dev->mc_count && i < BNEP_MAX_MULTICAST_FILTERS; i++) {
+			memcpy(__skb_put(skb, ETH_ALEN), dmi->dmi_addr, ETH_ALEN);
+			memcpy(__skb_put(skb, ETH_ALEN), dmi->dmi_addr, ETH_ALEN);
+			dmi = dmi->next;
+		}
+		r->len = htons(skb->len - len);
+	}
+
+	skb_queue_tail(&sk->sk_write_queue, skb);
+	wake_up_interruptible(sk->sk_sleep);
+#endif
+}
+
+static int bnep_net_set_mac_addr(struct net_device *dev, void *arg)
+{
+	BT_DBG("%s", dev->name);
+	return 0;
+}
+
+static void bnep_net_timeout(struct net_device *dev)
+{
+	BT_DBG("net_timeout");
+	netif_wake_queue(dev);
+}
+
+static int bnep_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+	return -EINVAL;
+}
+
+#ifdef CONFIG_BT_BNEP_MC_FILTER
+static inline int bnep_net_mc_filter(struct sk_buff *skb, struct bnep_session *s)
+{
+	struct ethhdr *eh = (void *) skb->data;
+
+	if ((eh->h_dest[0] & 1) && !test_bit(bnep_mc_hash(eh->h_dest), (ulong *) &s->mc_filter))
+		return 1;
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_BT_BNEP_PROTO_FILTER
+/* Determine ether protocol. Based on eth_type_trans. */
+static inline u16 bnep_net_eth_proto(struct sk_buff *skb)
+{
+	struct ethhdr *eh = (void *) skb->data;
+	
+	if (ntohs(eh->h_proto) >= 1536)
+		return eh->h_proto;
+		
+	if (get_unaligned((u16 *) skb->data) == 0xFFFF)
+		return htons(ETH_P_802_3);
+		
+	return htons(ETH_P_802_2);
+}
+
+static inline int bnep_net_proto_filter(struct sk_buff *skb, struct bnep_session *s)
+{
+	u16 proto = bnep_net_eth_proto(skb);
+	struct bnep_proto_filter *f = s->proto_filter;
+	int i;
+	
+	for (i = 0; i < BNEP_MAX_PROTO_FILTERS && f[i].end; i++) {
+		if (proto >= f[i].start && proto <= f[i].end)
+			return 0;
+	}
+
+	BT_DBG("BNEP: filtered skb %p, proto 0x%.4x", skb, proto);
+	return 1;
+}
+#endif
+
+static int bnep_net_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct bnep_session *s = dev->priv;
+	struct sock *sk = s->sock->sk;
+
+	BT_DBG("skb %p, dev %p", skb, dev);
+
+#ifdef CONFIG_BT_BNEP_MC_FILTER
+	if (bnep_net_mc_filter(skb, s)) {
+		kfree_skb(skb);
+		return 0;
+	}
+#endif
+	
+#ifdef CONFIG_BT_BNEP_PROTO_FILTER
+	if (bnep_net_proto_filter(skb, s)) {
+		kfree_skb(skb);
+		return 0;
+	}
+#endif
+	
+	/*
+	 * We cannot send L2CAP packets from here as we are potentially in a bh.
+	 * So we have to queue them and wake up session thread which is sleeping
+	 * on the sk->sk_sleep.
+	 */
+	dev->trans_start = jiffies;
+	skb_queue_tail(&sk->sk_write_queue, skb);
+	wake_up_interruptible(sk->sk_sleep);
+
+	if (skb_queue_len(&sk->sk_write_queue) >= BNEP_TX_QUEUE_LEN) {
+		BT_DBG("tx queue is full");
+
+		/* Stop queuing.
+		 * Session thread will do netif_wake_queue() */
+		netif_stop_queue(dev);
+	}
+
+	return 0;
+}
+
+void bnep_net_setup(struct net_device *dev)
+{
+
+	memset(dev->broadcast, 0xff, ETH_ALEN);
+	dev->addr_len = ETH_ALEN;
+
+	ether_setup(dev);
+
+	dev->open            = bnep_net_open;
+	dev->stop            = bnep_net_close;
+	dev->hard_start_xmit = bnep_net_xmit;
+	dev->get_stats       = bnep_net_get_stats;
+	dev->do_ioctl        = bnep_net_ioctl;
+	dev->set_mac_address = bnep_net_set_mac_addr;
+	dev->set_multicast_list = bnep_net_set_mc_list;
+
+	dev->watchdog_timeo  = HZ * 2;
+	dev->tx_timeout      = bnep_net_timeout;
+}
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
new file mode 100644
index 00000000000..9a8d99a39b6
--- /dev/null
+++ b/net/bluetooth/bnep/sock.c
@@ -0,0 +1,237 @@
+/* 
+   BNEP implementation for Linux Bluetooth stack (BlueZ).
+   Copyright (C) 2001-2002 Inventel Systemes
+   Written 2001-2002 by
+	David Libault  <david.libault@inventel.fr>
+
+   Copyright (C) 2002 Maxim Krasnyansky <maxk@qualcomm.com>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License version 2 as
+   published by the Free Software Foundation;
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
+   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES 
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, 
+   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS 
+   SOFTWARE IS DISCLAIMED.
+*/
+
+/*
+ * $Id: sock.c,v 1.4 2002/08/04 21:23:58 maxk Exp $
+ */ 
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/poll.h>
+#include <linux/fcntl.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/ioctl.h>
+#include <linux/file.h>
+#include <linux/init.h>
+#include <net/sock.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include "bnep.h"
+
+#ifndef CONFIG_BT_BNEP_DEBUG
+#undef  BT_DBG
+#define BT_DBG( A... )
+#endif
+
+static int bnep_sock_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+
+	BT_DBG("sock %p sk %p", sock, sk);
+
+	if (!sk)
+		return 0;
+
+	sock_orphan(sk);
+	sock_put(sk);
+	return 0;
+}
+
+static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	struct bnep_connlist_req cl;
+	struct bnep_connadd_req  ca;
+	struct bnep_conndel_req  cd;
+	struct bnep_conninfo ci;
+	struct socket *nsock;
+	void __user *argp = (void __user *)arg;
+	int err;
+
+	BT_DBG("cmd %x arg %lx", cmd, arg);
+
+	switch (cmd) {
+	case BNEPCONNADD:
+		if (!capable(CAP_NET_ADMIN))
+			return -EACCES;
+
+		if (copy_from_user(&ca, argp, sizeof(ca)))
+			return -EFAULT;
+	
+		nsock = sockfd_lookup(ca.sock, &err);
+		if (!nsock)
+			return err;
+
+		if (nsock->sk->sk_state != BT_CONNECTED) {
+			fput(nsock->file);
+			return -EBADFD;
+		}
+
+		err = bnep_add_connection(&ca, nsock);
+		if (!err) {
+    			if (copy_to_user(argp, &ca, sizeof(ca)))
+				err = -EFAULT;
+		} else
+			fput(nsock->file);
+
+		return err;
+	
+	case BNEPCONNDEL:
+		if (!capable(CAP_NET_ADMIN))
+			return -EACCES;
+
+		if (copy_from_user(&cd, argp, sizeof(cd)))
+			return -EFAULT;
+	
+		return bnep_del_connection(&cd);
+
+	case BNEPGETCONNLIST:
+		if (copy_from_user(&cl, argp, sizeof(cl)))
+			return -EFAULT;
+
+		if (cl.cnum <= 0)
+			return -EINVAL;
+	
+		err = bnep_get_connlist(&cl);
+		if (!err && copy_to_user(argp, &cl, sizeof(cl)))
+			return -EFAULT;
+
+		return err;
+
+	case BNEPGETCONNINFO:
+		if (copy_from_user(&ci, argp, sizeof(ci)))
+			return -EFAULT;
+
+		err = bnep_get_conninfo(&ci);
+		if (!err && copy_to_user(argp, &ci, sizeof(ci)))
+			return -EFAULT;
+
+		return err;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static struct proto_ops bnep_sock_ops = {
+	.family     = PF_BLUETOOTH,
+	.owner      = THIS_MODULE,
+	.release    = bnep_sock_release,
+	.ioctl      = bnep_sock_ioctl,
+	.bind       = sock_no_bind,
+	.getname    = sock_no_getname,
+	.sendmsg    = sock_no_sendmsg,
+	.recvmsg    = sock_no_recvmsg,
+	.poll       = sock_no_poll,
+	.listen     = sock_no_listen,
+	.shutdown   = sock_no_shutdown,
+	.setsockopt = sock_no_setsockopt,
+	.getsockopt = sock_no_getsockopt,
+	.connect    = sock_no_connect,
+	.socketpair = sock_no_socketpair,
+	.accept     = sock_no_accept,
+	.mmap       = sock_no_mmap
+};
+
+static struct proto bnep_proto = {
+	.name		= "BNEP",
+	.owner		= THIS_MODULE,
+	.obj_size	= sizeof(struct bt_sock)
+};
+
+static int bnep_sock_create(struct socket *sock, int protocol)
+{
+	struct sock *sk;
+
+	BT_DBG("sock %p", sock);
+
+	if (sock->type != SOCK_RAW)
+		return -ESOCKTNOSUPPORT;
+
+	sk = sk_alloc(PF_BLUETOOTH, GFP_KERNEL, &bnep_proto, 1);
+	if (!sk)
+		return -ENOMEM;
+
+	sock_init_data(sock, sk);
+
+	sock->ops = &bnep_sock_ops;
+
+	sock->state = SS_UNCONNECTED;
+
+	sock_reset_flag(sk, SOCK_ZAPPED);
+
+	sk->sk_protocol = protocol;
+	sk->sk_state	= BT_OPEN;
+
+	return 0;
+}
+
+static struct net_proto_family bnep_sock_family_ops = {
+	.family = PF_BLUETOOTH,
+	.owner	= THIS_MODULE,
+	.create = bnep_sock_create
+};
+
+int __init bnep_sock_init(void)
+{
+	int err;
+
+	err = proto_register(&bnep_proto, 0);
+	if (err < 0)
+		return err;
+
+	err = bt_sock_register(BTPROTO_BNEP, &bnep_sock_family_ops);
+	if (err < 0)
+		goto error;
+
+	return 0;
+
+error:
+	BT_ERR("Can't register BNEP socket");
+	proto_unregister(&bnep_proto);
+	return err;
+}
+
+int __exit bnep_sock_cleanup(void)
+{
+	if (bt_sock_unregister(BTPROTO_BNEP) < 0)
+		BT_ERR("Can't unregister BNEP socket");
+
+	proto_unregister(&bnep_proto);
+
+	return 0;
+}
diff --git a/net/bluetooth/cmtp/Kconfig b/net/bluetooth/cmtp/Kconfig
new file mode 100644
index 00000000000..d6b0382f6f3
--- /dev/null
+++ b/net/bluetooth/cmtp/Kconfig
@@ -0,0 +1,11 @@
+config BT_CMTP
+	tristate "CMTP protocol support"
+	depends on BT && BT_L2CAP && ISDN_CAPI
+	help
+	  CMTP (CAPI Message Transport Protocol) is a transport layer
+	  for CAPI messages.  CMTP is required for the Bluetooth Common
+	  ISDN Access Profile.
+
+	  Say Y here to compile CMTP support into the kernel or say M to
+	  compile it as module (cmtp).
+
diff --git a/net/bluetooth/cmtp/Makefile b/net/bluetooth/cmtp/Makefile
new file mode 100644
index 00000000000..890a9a5a686
--- /dev/null
+++ b/net/bluetooth/cmtp/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the Linux Bluetooth CMTP layer
+#
+
+obj-$(CONFIG_BT_CMTP) += cmtp.o
+
+cmtp-objs := core.o sock.o capi.o
diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c
new file mode 100644
index 00000000000..1e5c030b72a
--- /dev/null
+++ b/net/bluetooth/cmtp/capi.c
@@ -0,0 +1,600 @@
+/* 
+   CMTP implementation for Linux Bluetooth stack (BlueZ).
+   Copyright (C) 2002-2003 Marcel Holtmann <marcel@holtmann.org>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License version 2 as
+   published by the Free Software Foundation;
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
+   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES 
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, 
+   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS 
+   SOFTWARE IS DISCLAIMED.
+*/
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/poll.h>
+#include <linux/fcntl.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/ioctl.h>
+#include <linux/file.h>
+#include <linux/wait.h>
+#include <net/sock.h>
+
+#include <linux/isdn/capilli.h>
+#include <linux/isdn/capicmd.h>
+#include <linux/isdn/capiutil.h>
+
+#include "cmtp.h"
+
+#ifndef CONFIG_BT_CMTP_DEBUG
+#undef  BT_DBG
+#define BT_DBG(D...)
+#endif
+
+#define CAPI_INTEROPERABILITY		0x20
+
+#define CAPI_INTEROPERABILITY_REQ	CAPICMD(CAPI_INTEROPERABILITY, CAPI_REQ)
+#define CAPI_INTEROPERABILITY_CONF	CAPICMD(CAPI_INTEROPERABILITY, CAPI_CONF)
+#define CAPI_INTEROPERABILITY_IND	CAPICMD(CAPI_INTEROPERABILITY, CAPI_IND)
+#define CAPI_INTEROPERABILITY_RESP	CAPICMD(CAPI_INTEROPERABILITY, CAPI_RESP)
+
+#define CAPI_INTEROPERABILITY_REQ_LEN	(CAPI_MSG_BASELEN + 2)
+#define CAPI_INTEROPERABILITY_CONF_LEN	(CAPI_MSG_BASELEN + 4)
+#define CAPI_INTEROPERABILITY_IND_LEN	(CAPI_MSG_BASELEN + 2)
+#define CAPI_INTEROPERABILITY_RESP_LEN	(CAPI_MSG_BASELEN + 2)
+
+#define CAPI_FUNCTION_REGISTER		0
+#define CAPI_FUNCTION_RELEASE		1
+#define CAPI_FUNCTION_GET_PROFILE	2
+#define CAPI_FUNCTION_GET_MANUFACTURER	3
+#define CAPI_FUNCTION_GET_VERSION	4
+#define CAPI_FUNCTION_GET_SERIAL_NUMBER	5
+#define CAPI_FUNCTION_MANUFACTURER	6
+#define CAPI_FUNCTION_LOOPBACK		7
+
+
+#define CMTP_MSGNUM	1
+#define CMTP_APPLID	2
+#define CMTP_MAPPING	3
+
+static struct cmtp_application *cmtp_application_add(struct cmtp_session *session, __u16 appl)
+{
+	struct cmtp_application *app = kmalloc(sizeof(*app), GFP_KERNEL);
+
+	BT_DBG("session %p application %p appl %d", session, app, appl);
+
+	if (!app)
+		return NULL;
+
+	memset(app, 0, sizeof(*app));
+
+	app->state = BT_OPEN;
+	app->appl = appl;
+
+	list_add_tail(&app->list, &session->applications);
+
+	return app;
+}
+
+static void cmtp_application_del(struct cmtp_session *session, struct cmtp_application *app)
+{
+	BT_DBG("session %p application %p", session, app);
+
+	if (app) {
+		list_del(&app->list);
+		kfree(app);
+	}
+}
+
+static struct cmtp_application *cmtp_application_get(struct cmtp_session *session, int pattern, __u16 value)
+{
+	struct cmtp_application *app;
+	struct list_head *p, *n;
+
+	list_for_each_safe(p, n, &session->applications) {
+		app = list_entry(p, struct cmtp_application, list);
+		switch (pattern) {
+		case CMTP_MSGNUM:
+			if (app->msgnum == value)
+				return app;
+			break;
+		case CMTP_APPLID:
+			if (app->appl == value)
+				return app;
+			break;
+		case CMTP_MAPPING:
+			if (app->mapping == value)
+				return app;
+			break;
+		}
+	}
+
+	return NULL;
+}
+
+static int cmtp_msgnum_get(struct cmtp_session *session)
+{
+	session->msgnum++;
+
+	if ((session->msgnum & 0xff) > 200)
+		session->msgnum = CMTP_INITIAL_MSGNUM + 1;
+
+	return session->msgnum;
+}
+
+static void cmtp_send_capimsg(struct cmtp_session *session, struct sk_buff *skb)
+{
+	struct cmtp_scb *scb = (void *) skb->cb;
+
+	BT_DBG("session %p skb %p len %d", session, skb, skb->len);
+
+	scb->id = -1;
+	scb->data = (CAPIMSG_COMMAND(skb->data) == CAPI_DATA_B3);
+
+	skb_queue_tail(&session->transmit, skb);
+
+	cmtp_schedule(session);
+}
+
+static void cmtp_send_interopmsg(struct cmtp_session *session,
+					__u8 subcmd, __u16 appl, __u16 msgnum,
+					__u16 function, unsigned char *buf, int len)
+{
+	struct sk_buff *skb;
+	unsigned char *s;
+
+	BT_DBG("session %p subcmd 0x%02x appl %d msgnum %d", session, subcmd, appl, msgnum);
+
+	if (!(skb = alloc_skb(CAPI_MSG_BASELEN + 6 + len, GFP_ATOMIC))) {
+		BT_ERR("Can't allocate memory for interoperability packet");
+		return;
+	}
+
+	s = skb_put(skb, CAPI_MSG_BASELEN + 6 + len);
+
+	capimsg_setu16(s, 0, CAPI_MSG_BASELEN + 6 + len);
+	capimsg_setu16(s, 2, appl);
+	capimsg_setu8 (s, 4, CAPI_INTEROPERABILITY);
+	capimsg_setu8 (s, 5, subcmd);
+	capimsg_setu16(s, 6, msgnum);
+
+	/* Interoperability selector (Bluetooth Device Management) */
+	capimsg_setu16(s, 8, 0x0001);
+
+	capimsg_setu8 (s, 10, 3 + len);
+	capimsg_setu16(s, 11, function);
+	capimsg_setu8 (s, 13, len);
+
+	if (len > 0)
+		memcpy(s + 14, buf, len);
+
+	cmtp_send_capimsg(session, skb);
+}
+
+static void cmtp_recv_interopmsg(struct cmtp_session *session, struct sk_buff *skb)
+{
+	struct capi_ctr *ctrl = &session->ctrl;
+	struct cmtp_application *application;
+	__u16 appl, msgnum, func, info;
+	__u32 controller;
+
+	BT_DBG("session %p skb %p len %d", session, skb, skb->len);
+
+	switch (CAPIMSG_SUBCOMMAND(skb->data)) {
+	case CAPI_CONF:
+		func = CAPIMSG_U16(skb->data, CAPI_MSG_BASELEN + 5);
+		info = CAPIMSG_U16(skb->data, CAPI_MSG_BASELEN + 8);
+
+		switch (func) {
+		case CAPI_FUNCTION_REGISTER:
+			msgnum = CAPIMSG_MSGID(skb->data);
+
+			application = cmtp_application_get(session, CMTP_MSGNUM, msgnum);
+			if (application) {
+				application->state = BT_CONNECTED;
+				application->msgnum = 0;
+				application->mapping = CAPIMSG_APPID(skb->data);
+				wake_up_interruptible(&session->wait);
+			}
+
+			break;
+
+		case CAPI_FUNCTION_RELEASE:
+			appl = CAPIMSG_APPID(skb->data);
+
+			application = cmtp_application_get(session, CMTP_MAPPING, appl);
+			if (application) {
+				application->state = BT_CLOSED;
+				application->msgnum = 0;
+				wake_up_interruptible(&session->wait);
+			}
+
+			break;
+
+		case CAPI_FUNCTION_GET_PROFILE:
+			controller = CAPIMSG_U16(skb->data, CAPI_MSG_BASELEN + 11);
+			msgnum = CAPIMSG_MSGID(skb->data);
+
+			if (!info && (msgnum == CMTP_INITIAL_MSGNUM)) {
+				session->ncontroller = controller;
+				wake_up_interruptible(&session->wait);
+				break;
+			}
+
+			if (!info && ctrl) {
+				memcpy(&ctrl->profile,
+					skb->data + CAPI_MSG_BASELEN + 11,
+					sizeof(capi_profile));
+				session->state = BT_CONNECTED;
+				capi_ctr_ready(ctrl);
+			}
+
+			break;
+
+		case CAPI_FUNCTION_GET_MANUFACTURER:
+			controller = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 10);
+
+			if (!info && ctrl) {
+				strncpy(ctrl->manu,
+					skb->data + CAPI_MSG_BASELEN + 15,
+					skb->data[CAPI_MSG_BASELEN + 14]);
+			}
+
+			break;
+
+		case CAPI_FUNCTION_GET_VERSION:
+			controller = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 12);
+
+			if (!info && ctrl) {
+				ctrl->version.majorversion = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 16);
+				ctrl->version.minorversion = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 20);
+				ctrl->version.majormanuversion = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 24);
+				ctrl->version.minormanuversion = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 28);
+			}
+
+			break;
+
+		case CAPI_FUNCTION_GET_SERIAL_NUMBER:
+			controller = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 12);
+
+			if (!info && ctrl) {
+				memset(ctrl->serial, 0, CAPI_SERIAL_LEN);
+				strncpy(ctrl->serial,
+					skb->data + CAPI_MSG_BASELEN + 17,
+					skb->data[CAPI_MSG_BASELEN + 16]);
+			}
+
+			break;
+		}
+
+		break;
+
+	case CAPI_IND:
+		func = CAPIMSG_U16(skb->data, CAPI_MSG_BASELEN + 3);
+
+		if (func == CAPI_FUNCTION_LOOPBACK) {
+			appl = CAPIMSG_APPID(skb->data);
+			msgnum = CAPIMSG_MSGID(skb->data);
+			cmtp_send_interopmsg(session, CAPI_RESP, appl, msgnum, func,
+						skb->data + CAPI_MSG_BASELEN + 6,
+						skb->data[CAPI_MSG_BASELEN + 5]);
+		}
+
+		break;
+	}
+
+	kfree_skb(skb);
+}
+
+void cmtp_recv_capimsg(struct cmtp_session *session, struct sk_buff *skb)
+{
+	struct capi_ctr *ctrl = &session->ctrl;
+	struct cmtp_application *application;
+	__u16 cmd, appl;
+	__u32 contr;
+
+	BT_DBG("session %p skb %p len %d", session, skb, skb->len);
+
+	if (CAPIMSG_COMMAND(skb->data) == CAPI_INTEROPERABILITY) {
+		cmtp_recv_interopmsg(session, skb);
+		return;
+	}
+
+	if (session->flags & (1 << CMTP_LOOPBACK)) {
+		kfree_skb(skb);
+		return;
+	}
+
+	cmd = CAPICMD(CAPIMSG_COMMAND(skb->data), CAPIMSG_SUBCOMMAND(skb->data));
+	appl = CAPIMSG_APPID(skb->data);
+	contr = CAPIMSG_CONTROL(skb->data);
+
+	application = cmtp_application_get(session, CMTP_MAPPING, appl);
+	if (application) {
+		appl = application->appl;
+		CAPIMSG_SETAPPID(skb->data, appl);
+	} else {
+		BT_ERR("Can't find application with id %d", appl);
+		kfree_skb(skb);
+		return;
+	}
+
+	if ((contr & 0x7f) == 0x01) {
+		contr = (contr & 0xffffff80) | session->num;
+		CAPIMSG_SETCONTROL(skb->data, contr);
+	}
+
+	if (!ctrl) {
+		BT_ERR("Can't find controller %d for message", session->num);
+		kfree_skb(skb);
+		return;
+	}
+
+	capi_ctr_handle_message(ctrl, appl, skb);
+}
+
+static int cmtp_load_firmware(struct capi_ctr *ctrl, capiloaddata *data)
+{
+	BT_DBG("ctrl %p data %p", ctrl, data);
+
+	return 0;
+}
+
+static void cmtp_reset_ctr(struct capi_ctr *ctrl)
+{
+	struct cmtp_session *session = ctrl->driverdata;
+
+	BT_DBG("ctrl %p", ctrl);
+
+	capi_ctr_reseted(ctrl);
+
+	atomic_inc(&session->terminate);
+	cmtp_schedule(session);
+}
+
+static void cmtp_register_appl(struct capi_ctr *ctrl, __u16 appl, capi_register_params *rp)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	struct cmtp_session *session = ctrl->driverdata;
+	struct cmtp_application *application;
+	unsigned long timeo = CMTP_INTEROP_TIMEOUT;
+	unsigned char buf[8];
+	int err = 0, nconn, want = rp->level3cnt;
+
+	BT_DBG("ctrl %p appl %d level3cnt %d datablkcnt %d datablklen %d",
+		ctrl, appl, rp->level3cnt, rp->datablkcnt, rp->datablklen);
+
+	application = cmtp_application_add(session, appl);
+	if (!application) {
+		BT_ERR("Can't allocate memory for new application");
+		return;
+	}
+
+	if (want < 0)
+		nconn = ctrl->profile.nbchannel * -want;
+	else
+		nconn = want;
+
+	if (nconn == 0)
+		nconn = ctrl->profile.nbchannel;
+
+	capimsg_setu16(buf, 0, nconn);
+	capimsg_setu16(buf, 2, rp->datablkcnt);
+	capimsg_setu16(buf, 4, rp->datablklen);
+
+	application->state = BT_CONFIG;
+	application->msgnum = cmtp_msgnum_get(session);
+
+	cmtp_send_interopmsg(session, CAPI_REQ, 0x0000, application->msgnum,
+				CAPI_FUNCTION_REGISTER, buf, 6);
+
+	add_wait_queue(&session->wait, &wait);
+	while (1) {
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		if (!timeo) {
+			err = -EAGAIN;
+			break;
+		}
+
+		if (application->state == BT_CLOSED) {
+			err = -application->err;
+			break;
+		}
+
+		if (application->state == BT_CONNECTED)
+			break;
+
+		if (signal_pending(current)) {
+			err = -EINTR;
+			break;
+		}
+
+		timeo = schedule_timeout(timeo);
+	}
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(&session->wait, &wait);
+
+	if (err) {
+		cmtp_application_del(session, application);
+		return;
+	}
+}
+
+static void cmtp_release_appl(struct capi_ctr *ctrl, __u16 appl)
+{
+	struct cmtp_session *session = ctrl->driverdata;
+	struct cmtp_application *application;
+
+	BT_DBG("ctrl %p appl %d", ctrl, appl);
+
+	application = cmtp_application_get(session, CMTP_APPLID, appl);
+	if (!application) {
+		BT_ERR("Can't find application");
+		return;
+	}
+
+	application->msgnum = cmtp_msgnum_get(session);
+
+	cmtp_send_interopmsg(session, CAPI_REQ, application->mapping, application->msgnum,
+				CAPI_FUNCTION_RELEASE, NULL, 0);
+
+	wait_event_interruptible_timeout(session->wait,
+			(application->state == BT_CLOSED), CMTP_INTEROP_TIMEOUT);
+
+	cmtp_application_del(session, application);
+}
+
+static u16 cmtp_send_message(struct capi_ctr *ctrl, struct sk_buff *skb)
+{
+	struct cmtp_session *session = ctrl->driverdata;
+	struct cmtp_application *application;
+	__u16 appl;
+	__u32 contr;
+
+	BT_DBG("ctrl %p skb %p", ctrl, skb);
+
+	appl = CAPIMSG_APPID(skb->data);
+	contr = CAPIMSG_CONTROL(skb->data);
+
+	application = cmtp_application_get(session, CMTP_APPLID, appl);
+	if ((!application) || (application->state != BT_CONNECTED)) {
+		BT_ERR("Can't find application with id %d", appl);
+		return CAPI_ILLAPPNR;
+	}
+
+	CAPIMSG_SETAPPID(skb->data, application->mapping);
+
+	if ((contr & 0x7f) == session->num) {
+		contr = (contr & 0xffffff80) | 0x01;
+		CAPIMSG_SETCONTROL(skb->data, contr);
+	}
+
+	cmtp_send_capimsg(session, skb);
+
+	return CAPI_NOERROR;
+}
+
+static char *cmtp_procinfo(struct capi_ctr *ctrl)
+{
+	return "CAPI Message Transport Protocol";
+}
+
+static int cmtp_ctr_read_proc(char *page, char **start, off_t off, int count, int *eof, struct capi_ctr *ctrl)
+{
+	struct cmtp_session *session = ctrl->driverdata;
+	struct cmtp_application *app;
+	struct list_head *p, *n;
+	int len = 0;
+
+	len += sprintf(page + len, "%s\n\n", cmtp_procinfo(ctrl));
+	len += sprintf(page + len, "addr %s\n", session->name);
+	len += sprintf(page + len, "ctrl %d\n", session->num);
+
+	list_for_each_safe(p, n, &session->applications) {
+		app = list_entry(p, struct cmtp_application, list);
+		len += sprintf(page + len, "appl %d -> %d\n", app->appl, app->mapping);
+	}
+
+	if (off + count >= len)
+		*eof = 1;
+
+	if (len < off)
+		return 0;
+
+	*start = page + off;
+
+	return ((count < len - off) ? count : len - off);
+}
+
+
+int cmtp_attach_device(struct cmtp_session *session)
+{
+	unsigned char buf[4];
+	long ret;
+
+	BT_DBG("session %p", session);
+
+	capimsg_setu32(buf, 0, 0);
+
+	cmtp_send_interopmsg(session, CAPI_REQ, 0xffff, CMTP_INITIAL_MSGNUM,
+				CAPI_FUNCTION_GET_PROFILE, buf, 4);
+
+	ret = wait_event_interruptible_timeout(session->wait,
+			session->ncontroller, CMTP_INTEROP_TIMEOUT);
+	
+	BT_INFO("Found %d CAPI controller(s) on device %s", session->ncontroller, session->name);
+
+	if (!ret)
+		return -ETIMEDOUT;
+
+	if (!session->ncontroller)
+		return -ENODEV;
+
+	if (session->ncontroller > 1)
+		BT_INFO("Setting up only CAPI controller 1");
+
+	session->ctrl.owner      = THIS_MODULE;
+	session->ctrl.driverdata = session;
+	strcpy(session->ctrl.name, session->name);
+
+	session->ctrl.driver_name   = "cmtp";
+	session->ctrl.load_firmware = cmtp_load_firmware;
+	session->ctrl.reset_ctr     = cmtp_reset_ctr;
+	session->ctrl.register_appl = cmtp_register_appl;
+	session->ctrl.release_appl  = cmtp_release_appl;
+	session->ctrl.send_message  = cmtp_send_message;
+
+	session->ctrl.procinfo      = cmtp_procinfo;
+	session->ctrl.ctr_read_proc = cmtp_ctr_read_proc;
+
+	if (attach_capi_ctr(&session->ctrl) < 0) {
+		BT_ERR("Can't attach new controller");
+		return -EBUSY;
+	}
+
+	session->num = session->ctrl.cnr;
+
+	BT_DBG("session %p num %d", session, session->num);
+
+	capimsg_setu32(buf, 0, 1);
+
+	cmtp_send_interopmsg(session, CAPI_REQ, 0xffff, cmtp_msgnum_get(session),
+				CAPI_FUNCTION_GET_MANUFACTURER, buf, 4);
+
+	cmtp_send_interopmsg(session, CAPI_REQ, 0xffff, cmtp_msgnum_get(session),
+				CAPI_FUNCTION_GET_VERSION, buf, 4);
+
+	cmtp_send_interopmsg(session, CAPI_REQ, 0xffff, cmtp_msgnum_get(session),
+				CAPI_FUNCTION_GET_SERIAL_NUMBER, buf, 4);
+
+	cmtp_send_interopmsg(session, CAPI_REQ, 0xffff, cmtp_msgnum_get(session),
+				CAPI_FUNCTION_GET_PROFILE, buf, 4);
+
+	return 0;
+}
+
+void cmtp_detach_device(struct cmtp_session *session)
+{
+	BT_DBG("session %p", session);
+
+	detach_capi_ctr(&session->ctrl);
+}
diff --git a/net/bluetooth/cmtp/cmtp.h b/net/bluetooth/cmtp/cmtp.h
new file mode 100644
index 00000000000..40e3dfec0cc
--- /dev/null
+++ b/net/bluetooth/cmtp/cmtp.h
@@ -0,0 +1,135 @@
+/* 
+   CMTP implementation for Linux Bluetooth stack (BlueZ).
+   Copyright (C) 2002-2003 Marcel Holtmann <marcel@holtmann.org>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License version 2 as
+   published by the Free Software Foundation;
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
+   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES 
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, 
+   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS 
+   SOFTWARE IS DISCLAIMED.
+*/
+
+#ifndef __CMTP_H
+#define __CMTP_H
+
+#include <linux/types.h>
+#include <net/bluetooth/bluetooth.h>
+
+#define BTNAMSIZ 18
+
+/* CMTP ioctl defines */
+#define CMTPCONNADD	_IOW('C', 200, int)
+#define CMTPCONNDEL	_IOW('C', 201, int)
+#define CMTPGETCONNLIST	_IOR('C', 210, int)
+#define CMTPGETCONNINFO	_IOR('C', 211, int)
+
+#define CMTP_LOOPBACK	0
+
+struct cmtp_connadd_req {
+	int   sock;	// Connected socket
+	__u32 flags;
+};
+
+struct cmtp_conndel_req {
+	bdaddr_t bdaddr;
+	__u32    flags;
+};
+
+struct cmtp_conninfo {
+	bdaddr_t bdaddr;
+	__u32    flags;
+	__u16    state;
+	int      num;
+};
+
+struct cmtp_connlist_req {
+	__u32  cnum;
+	struct cmtp_conninfo __user *ci;
+};
+
+int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock);
+int cmtp_del_connection(struct cmtp_conndel_req *req);
+int cmtp_get_connlist(struct cmtp_connlist_req *req);
+int cmtp_get_conninfo(struct cmtp_conninfo *ci);
+
+/* CMTP session defines */
+#define CMTP_INTEROP_TIMEOUT	(HZ * 5)
+#define CMTP_INITIAL_MSGNUM	0xff00
+
+struct cmtp_session {
+	struct list_head list;
+
+	struct socket *sock;
+
+	bdaddr_t bdaddr;
+
+	unsigned long state;
+	unsigned long flags;
+
+	uint mtu;
+
+	char name[BTNAMSIZ];
+
+	atomic_t terminate;
+
+	wait_queue_head_t wait;
+
+	int ncontroller;
+	int num;
+	struct capi_ctr ctrl;
+
+	struct list_head applications;
+
+	unsigned long blockids;
+	int msgnum;
+
+	struct sk_buff_head transmit;
+
+	struct sk_buff *reassembly[16];
+};
+
+struct cmtp_application {
+	struct list_head list;
+
+	unsigned long state;
+	int err;
+
+	__u16 appl;
+	__u16 mapping;
+
+	__u16 msgnum;
+};
+
+struct cmtp_scb {
+	int id;
+	int data;
+};
+
+int  cmtp_attach_device(struct cmtp_session *session);
+void cmtp_detach_device(struct cmtp_session *session);
+
+void cmtp_recv_capimsg(struct cmtp_session *session, struct sk_buff *skb);
+
+static inline void cmtp_schedule(struct cmtp_session *session)
+{
+	struct sock *sk = session->sock->sk;
+
+	wake_up_interruptible(sk->sk_sleep);
+}
+
+/* CMTP init defines */
+int cmtp_init_sockets(void);
+void cmtp_cleanup_sockets(void);
+
+#endif /* __CMTP_H */
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
new file mode 100644
index 00000000000..20ce04f2be8
--- /dev/null
+++ b/net/bluetooth/cmtp/core.c
@@ -0,0 +1,504 @@
+/* 
+   CMTP implementation for Linux Bluetooth stack (BlueZ).
+   Copyright (C) 2002-2003 Marcel Holtmann <marcel@holtmann.org>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License version 2 as
+   published by the Free Software Foundation;
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
+   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES 
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, 
+   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS 
+   SOFTWARE IS DISCLAIMED.
+*/
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/poll.h>
+#include <linux/fcntl.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/ioctl.h>
+#include <linux/file.h>
+#include <linux/init.h>
+#include <net/sock.h>
+
+#include <linux/isdn/capilli.h>
+
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/l2cap.h>
+
+#include "cmtp.h"
+
+#ifndef CONFIG_BT_CMTP_DEBUG
+#undef  BT_DBG
+#define BT_DBG(D...)
+#endif
+
+#define VERSION "1.0"
+
+static DECLARE_RWSEM(cmtp_session_sem);
+static LIST_HEAD(cmtp_session_list);
+
+static struct cmtp_session *__cmtp_get_session(bdaddr_t *bdaddr)
+{
+	struct cmtp_session *session;
+	struct list_head *p;
+
+	BT_DBG("");
+
+	list_for_each(p, &cmtp_session_list) {
+		session = list_entry(p, struct cmtp_session, list);
+		if (!bacmp(bdaddr, &session->bdaddr))
+			return session;
+	}
+	return NULL;
+}
+
+static void __cmtp_link_session(struct cmtp_session *session)
+{
+	__module_get(THIS_MODULE);
+	list_add(&session->list, &cmtp_session_list);
+}
+
+static void __cmtp_unlink_session(struct cmtp_session *session)
+{
+	list_del(&session->list);
+	module_put(THIS_MODULE);
+}
+
+static void __cmtp_copy_session(struct cmtp_session *session, struct cmtp_conninfo *ci)
+{
+	bacpy(&ci->bdaddr, &session->bdaddr);
+
+	ci->flags = session->flags;
+	ci->state = session->state;
+
+	ci->num = session->num;
+}
+
+
+static inline int cmtp_alloc_block_id(struct cmtp_session *session)
+{
+	int i, id = -1;
+
+	for (i = 0; i < 16; i++)
+		if (!test_and_set_bit(i, &session->blockids)) {
+			id = i;
+			break;
+		}
+
+	return id;
+}
+
+static inline void cmtp_free_block_id(struct cmtp_session *session, int id)
+{
+	clear_bit(id, &session->blockids);
+}
+
+static inline void cmtp_add_msgpart(struct cmtp_session *session, int id, const unsigned char *buf, int count)
+{
+	struct sk_buff *skb = session->reassembly[id], *nskb;
+	int size;
+
+	BT_DBG("session %p buf %p count %d", session, buf, count);
+
+	size = (skb) ? skb->len + count : count;
+
+	if (!(nskb = alloc_skb(size, GFP_ATOMIC))) {
+		BT_ERR("Can't allocate memory for CAPI message");
+		return;
+	}
+
+	if (skb && (skb->len > 0))
+		memcpy(skb_put(nskb, skb->len), skb->data, skb->len);
+
+	memcpy(skb_put(nskb, count), buf, count);
+
+	session->reassembly[id] = nskb;
+
+	if (skb)
+		kfree_skb(skb);
+}
+
+static inline int cmtp_recv_frame(struct cmtp_session *session, struct sk_buff *skb)
+{
+	__u8 hdr, hdrlen, id;
+	__u16 len;
+
+	BT_DBG("session %p skb %p len %d", session, skb, skb->len);
+
+	while (skb->len > 0) {
+		hdr = skb->data[0];
+
+		switch (hdr & 0xc0) {
+		case 0x40:
+			hdrlen = 2;
+			len = skb->data[1];
+			break;
+		case 0x80:
+			hdrlen = 3;
+			len = skb->data[1] | (skb->data[2] << 8);
+			break;
+		default:
+			hdrlen = 1;
+			len = 0;
+			break;
+		}
+
+		id = (hdr & 0x3c) >> 2;
+
+		BT_DBG("hdr 0x%02x hdrlen %d len %d id %d", hdr, hdrlen, len, id);
+
+		if (hdrlen + len > skb->len) {
+			BT_ERR("Wrong size or header information in CMTP frame");
+			break;
+		}
+
+		if (len == 0) {
+			skb_pull(skb, hdrlen);
+			continue;
+		}
+
+		switch (hdr & 0x03) {
+		case 0x00:
+			cmtp_add_msgpart(session, id, skb->data + hdrlen, len);
+			cmtp_recv_capimsg(session, session->reassembly[id]);
+			session->reassembly[id] = NULL;
+			break;
+		case 0x01:
+			cmtp_add_msgpart(session, id, skb->data + hdrlen, len);
+			break;
+		default:
+			if (session->reassembly[id] != NULL)
+				kfree_skb(session->reassembly[id]);
+			session->reassembly[id] = NULL;
+			break;
+		}
+
+		skb_pull(skb, hdrlen + len);
+	}
+
+	kfree_skb(skb);
+	return 0;
+}
+
+static int cmtp_send_frame(struct cmtp_session *session, unsigned char *data, int len)
+{
+	struct socket *sock = session->sock;
+	struct kvec iv = { data, len };
+	struct msghdr msg;
+
+	BT_DBG("session %p data %p len %d", session, data, len);
+
+	if (!len)
+		return 0;
+
+	memset(&msg, 0, sizeof(msg));
+
+	return kernel_sendmsg(sock, &msg, &iv, 1, len);
+}
+
+static int cmtp_process_transmit(struct cmtp_session *session)
+{
+	struct sk_buff *skb, *nskb;
+	unsigned char *hdr;
+	unsigned int size, tail;
+
+	BT_DBG("session %p", session);
+
+	if (!(nskb = alloc_skb(session->mtu, GFP_ATOMIC))) {
+		BT_ERR("Can't allocate memory for new frame");
+		return -ENOMEM;
+	}
+
+	while ((skb = skb_dequeue(&session->transmit))) {
+		struct cmtp_scb *scb = (void *) skb->cb;
+
+		if ((tail = (session->mtu - nskb->len)) < 5) {
+			cmtp_send_frame(session, nskb->data, nskb->len);
+			skb_trim(nskb, 0);
+			tail = session->mtu;
+		}
+
+		size = min_t(uint, ((tail < 258) ? (tail - 2) : (tail - 3)), skb->len);
+
+		if ((scb->id < 0) && ((scb->id = cmtp_alloc_block_id(session)) < 0)) {
+			skb_queue_head(&session->transmit, skb);
+			break;
+		}
+
+		if (size < 256) {
+			hdr = skb_put(nskb, 2);
+			hdr[0] = 0x40
+				| ((scb->id << 2) & 0x3c)
+				| ((skb->len == size) ? 0x00 : 0x01);
+			hdr[1] = size;
+		} else {
+			hdr = skb_put(nskb, 3);
+			hdr[0] = 0x80
+				| ((scb->id << 2) & 0x3c)
+				| ((skb->len == size) ? 0x00 : 0x01);
+			hdr[1] = size & 0xff;
+			hdr[2] = size >> 8;
+		}
+
+		memcpy(skb_put(nskb, size), skb->data, size);
+		skb_pull(skb, size);
+
+		if (skb->len > 0) {
+			skb_queue_head(&session->transmit, skb);
+		} else {
+			cmtp_free_block_id(session, scb->id);
+			if (scb->data) {
+				cmtp_send_frame(session, nskb->data, nskb->len);
+				skb_trim(nskb, 0);
+			}
+			kfree_skb(skb);
+		}
+	}
+
+	cmtp_send_frame(session, nskb->data, nskb->len);
+
+	kfree_skb(nskb);
+
+	return skb_queue_len(&session->transmit);
+}
+
+static int cmtp_session(void *arg)
+{
+	struct cmtp_session *session = arg;
+	struct sock *sk = session->sock->sk;
+	struct sk_buff *skb;
+	wait_queue_t wait;
+
+	BT_DBG("session %p", session);
+
+	daemonize("kcmtpd_ctr_%d", session->num);
+	set_user_nice(current, -15);
+	current->flags |= PF_NOFREEZE;
+
+	init_waitqueue_entry(&wait, current);
+	add_wait_queue(sk->sk_sleep, &wait);
+	while (!atomic_read(&session->terminate)) {
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		if (sk->sk_state != BT_CONNECTED)
+			break;
+
+		while ((skb = skb_dequeue(&sk->sk_receive_queue))) {
+			skb_orphan(skb);
+			cmtp_recv_frame(session, skb);
+		}
+
+		cmtp_process_transmit(session);
+
+		schedule();
+	}
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(sk->sk_sleep, &wait);
+
+	down_write(&cmtp_session_sem);
+
+	if (!(session->flags & (1 << CMTP_LOOPBACK)))
+		cmtp_detach_device(session);
+
+	fput(session->sock->file);
+
+	__cmtp_unlink_session(session);
+
+	up_write(&cmtp_session_sem);
+
+	kfree(session);
+	return 0;
+}
+
+int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock)
+{
+	struct cmtp_session *session, *s;
+	bdaddr_t src, dst;
+	int i, err;
+
+	BT_DBG("");
+
+	baswap(&src, &bt_sk(sock->sk)->src);
+	baswap(&dst, &bt_sk(sock->sk)->dst);
+
+	session = kmalloc(sizeof(struct cmtp_session), GFP_KERNEL);
+	if (!session) 
+		return -ENOMEM;
+	memset(session, 0, sizeof(struct cmtp_session));
+
+	down_write(&cmtp_session_sem);
+
+	s = __cmtp_get_session(&bt_sk(sock->sk)->dst);
+	if (s && s->state == BT_CONNECTED) {
+		err = -EEXIST;
+		goto failed;
+	}
+
+	bacpy(&session->bdaddr, &bt_sk(sock->sk)->dst);
+
+	session->mtu = min_t(uint, l2cap_pi(sock->sk)->omtu, l2cap_pi(sock->sk)->imtu);
+
+	BT_DBG("mtu %d", session->mtu);
+
+	sprintf(session->name, "%s", batostr(&dst));
+
+	session->sock  = sock;
+	session->state = BT_CONFIG;
+
+	init_waitqueue_head(&session->wait);
+
+	session->msgnum = CMTP_INITIAL_MSGNUM;
+
+	INIT_LIST_HEAD(&session->applications);
+
+	skb_queue_head_init(&session->transmit);
+
+	for (i = 0; i < 16; i++)
+		session->reassembly[i] = NULL;
+
+	session->flags = req->flags;
+
+	__cmtp_link_session(session);
+
+	err = kernel_thread(cmtp_session, session, CLONE_KERNEL);
+	if (err < 0)
+		goto unlink;
+
+	if (!(session->flags & (1 << CMTP_LOOPBACK))) {
+		err = cmtp_attach_device(session);
+		if (err < 0)
+			goto detach;
+	}
+
+	up_write(&cmtp_session_sem);
+	return 0;
+
+detach:
+	cmtp_detach_device(session);
+
+unlink:
+	__cmtp_unlink_session(session);
+
+failed:
+	up_write(&cmtp_session_sem);
+	kfree(session);
+	return err;
+}
+
+int cmtp_del_connection(struct cmtp_conndel_req *req)
+{
+	struct cmtp_session *session;
+	int err = 0;
+
+	BT_DBG("");
+
+	down_read(&cmtp_session_sem);
+
+	session = __cmtp_get_session(&req->bdaddr);
+	if (session) {
+		/* Flush the transmit queue */
+		skb_queue_purge(&session->transmit);
+
+		/* Kill session thread */
+		atomic_inc(&session->terminate);
+		cmtp_schedule(session);
+	} else
+		err = -ENOENT;
+
+	up_read(&cmtp_session_sem);
+	return err;
+}
+
+int cmtp_get_connlist(struct cmtp_connlist_req *req)
+{
+	struct list_head *p;
+	int err = 0, n = 0;
+
+	BT_DBG("");
+
+	down_read(&cmtp_session_sem);
+
+	list_for_each(p, &cmtp_session_list) {
+		struct cmtp_session *session;
+		struct cmtp_conninfo ci;
+
+		session = list_entry(p, struct cmtp_session, list);
+
+		__cmtp_copy_session(session, &ci);
+
+		if (copy_to_user(req->ci, &ci, sizeof(ci))) {
+			err = -EFAULT;
+			break;
+		}
+
+		if (++n >= req->cnum)
+			break;
+
+		req->ci++;
+	}
+	req->cnum = n;
+
+	up_read(&cmtp_session_sem);
+	return err;
+}
+
+int cmtp_get_conninfo(struct cmtp_conninfo *ci)
+{
+	struct cmtp_session *session;
+	int err = 0;
+
+	down_read(&cmtp_session_sem);
+
+	session = __cmtp_get_session(&ci->bdaddr);
+	if (session)
+		__cmtp_copy_session(session, ci);
+	else
+		err = -ENOENT;
+
+	up_read(&cmtp_session_sem);
+	return err;
+}
+
+
+static int __init cmtp_init(void)
+{
+	l2cap_load();
+
+	BT_INFO("CMTP (CAPI Emulation) ver %s", VERSION);
+
+	cmtp_init_sockets();
+
+	return 0;
+}
+
+static void __exit cmtp_exit(void)
+{
+	cmtp_cleanup_sockets();
+}
+
+module_init(cmtp_init);
+module_exit(cmtp_exit);
+
+MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
+MODULE_DESCRIPTION("Bluetooth CMTP ver " VERSION);
+MODULE_VERSION(VERSION);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("bt-proto-5");
diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c
new file mode 100644
index 00000000000..4c7f9e20dad
--- /dev/null
+++ b/net/bluetooth/cmtp/sock.c
@@ -0,0 +1,226 @@
+/* 
+   CMTP implementation for Linux Bluetooth stack (BlueZ).
+   Copyright (C) 2002-2003 Marcel Holtmann <marcel@holtmann.org>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License version 2 as
+   published by the Free Software Foundation;
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
+   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES 
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, 
+   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS 
+   SOFTWARE IS DISCLAIMED.
+*/
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/poll.h>
+#include <linux/fcntl.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/ioctl.h>
+#include <linux/file.h>
+#include <net/sock.h>
+
+#include <linux/isdn/capilli.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include "cmtp.h"
+
+#ifndef CONFIG_BT_CMTP_DEBUG
+#undef  BT_DBG
+#define BT_DBG(D...)
+#endif
+
+static int cmtp_sock_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+
+	BT_DBG("sock %p sk %p", sock, sk);
+
+	if (!sk)
+		return 0;
+
+	sock_orphan(sk);
+	sock_put(sk);
+
+	return 0;
+}
+
+static int cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	struct cmtp_connadd_req ca;
+	struct cmtp_conndel_req cd;
+	struct cmtp_connlist_req cl;
+	struct cmtp_conninfo ci;
+	struct socket *nsock;
+	void __user *argp = (void __user *)arg;
+	int err;
+
+	BT_DBG("cmd %x arg %lx", cmd, arg);
+
+	switch (cmd) {
+	case CMTPCONNADD:
+		if (!capable(CAP_NET_ADMIN))
+			return -EACCES;
+
+		if (copy_from_user(&ca, argp, sizeof(ca)))
+			return -EFAULT;
+
+		nsock = sockfd_lookup(ca.sock, &err);
+		if (!nsock)
+			return err;
+
+		if (nsock->sk->sk_state != BT_CONNECTED) {
+			fput(nsock->file);
+			return -EBADFD;
+		}
+
+		err = cmtp_add_connection(&ca, nsock);
+		if (!err) {
+			if (copy_to_user(argp, &ca, sizeof(ca)))
+				err = -EFAULT;
+		} else
+			fput(nsock->file);
+
+		return err;
+
+	case CMTPCONNDEL:
+		if (!capable(CAP_NET_ADMIN))
+			return -EACCES;
+
+		if (copy_from_user(&cd, argp, sizeof(cd)))
+			return -EFAULT;
+
+		return cmtp_del_connection(&cd);
+
+	case CMTPGETCONNLIST:
+		if (copy_from_user(&cl, argp, sizeof(cl)))
+			return -EFAULT;
+
+		if (cl.cnum <= 0)
+			return -EINVAL;
+
+		err = cmtp_get_connlist(&cl);
+		if (!err && copy_to_user(argp, &cl, sizeof(cl)))
+			return -EFAULT;
+
+		return err;
+
+	case CMTPGETCONNINFO:
+		if (copy_from_user(&ci, argp, sizeof(ci)))
+			return -EFAULT;
+
+		err = cmtp_get_conninfo(&ci);
+		if (!err && copy_to_user(argp, &ci, sizeof(ci)))
+			return -EFAULT;
+
+		return err;
+	}
+
+	return -EINVAL;
+}
+
+static struct proto_ops cmtp_sock_ops = {
+	.family		= PF_BLUETOOTH,
+	.owner		= THIS_MODULE,
+	.release	= cmtp_sock_release,
+	.ioctl		= cmtp_sock_ioctl,
+	.bind		= sock_no_bind,
+	.getname	= sock_no_getname,
+	.sendmsg	= sock_no_sendmsg,
+	.recvmsg	= sock_no_recvmsg,
+	.poll		= sock_no_poll,
+	.listen		= sock_no_listen,
+	.shutdown	= sock_no_shutdown,
+	.setsockopt	= sock_no_setsockopt,
+	.getsockopt	= sock_no_getsockopt,
+	.connect	= sock_no_connect,
+	.socketpair	= sock_no_socketpair,
+	.accept		= sock_no_accept,
+	.mmap		= sock_no_mmap
+};
+
+static struct proto cmtp_proto = {
+	.name		= "CMTP",
+	.owner		= THIS_MODULE,
+	.obj_size	= sizeof(struct bt_sock)
+};
+
+static int cmtp_sock_create(struct socket *sock, int protocol)
+{
+	struct sock *sk;
+
+	BT_DBG("sock %p", sock);
+
+	if (sock->type != SOCK_RAW)
+		return -ESOCKTNOSUPPORT;
+
+	sk = sk_alloc(PF_BLUETOOTH, GFP_KERNEL, &cmtp_proto, 1);
+	if (!sk)
+		return -ENOMEM;
+
+	sock_init_data(sock, sk);
+
+	sock->ops = &cmtp_sock_ops;
+
+	sock->state = SS_UNCONNECTED;
+
+	sock_reset_flag(sk, SOCK_ZAPPED);
+
+	sk->sk_protocol = protocol;
+	sk->sk_state    = BT_OPEN;
+
+	return 0;
+}
+
+static struct net_proto_family cmtp_sock_family_ops = {
+	.family	= PF_BLUETOOTH,
+	.owner	= THIS_MODULE,
+	.create	= cmtp_sock_create
+};
+
+int cmtp_init_sockets(void)
+{
+	int err;
+
+	err = proto_register(&cmtp_proto, 0);
+	if (err < 0)
+		return err;
+
+	err = bt_sock_register(BTPROTO_CMTP, &cmtp_sock_family_ops);
+	if (err < 0)
+		goto error;
+
+	return 0;
+
+error:
+	BT_ERR("Can't register CMTP socket");
+	proto_unregister(&cmtp_proto);
+	return err;
+}
+
+void cmtp_cleanup_sockets(void)
+{
+	if (bt_sock_unregister(BTPROTO_CMTP) < 0)
+		BT_ERR("Can't unregister CMTP socket");
+
+	proto_unregister(&cmtp_proto);
+}
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
new file mode 100644
index 00000000000..71762d7e997
--- /dev/null
+++ b/net/bluetooth/hci_conn.c
@@ -0,0 +1,471 @@
+/* 
+   BlueZ - Bluetooth protocol stack for Linux
+   Copyright (C) 2000-2001 Qualcomm Incorporated
+
+   Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License version 2 as
+   published by the Free Software Foundation;
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
+   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES 
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, 
+   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS 
+   SOFTWARE IS DISCLAIMED.
+*/
+
+/* Bluetooth HCI connection handling. */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/poll.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <net/sock.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/unaligned.h>
+
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
+
+#ifndef CONFIG_BT_HCI_CORE_DEBUG
+#undef  BT_DBG
+#define BT_DBG(D...)
+#endif
+
+static void hci_acl_connect(struct hci_conn *conn)
+{
+	struct hci_dev *hdev = conn->hdev;
+	struct inquiry_entry *ie;
+	struct hci_cp_create_conn cp;
+
+	BT_DBG("%p", conn);
+
+	conn->state = BT_CONNECT;
+	conn->out   = 1;
+	conn->link_mode = HCI_LM_MASTER;
+
+	memset(&cp, 0, sizeof(cp));
+	bacpy(&cp.bdaddr, &conn->dst);
+	cp.pscan_rep_mode = 0x02;
+
+	if ((ie = hci_inquiry_cache_lookup(hdev, &conn->dst)) &&
+			inquiry_entry_age(ie) <= INQUIRY_ENTRY_AGE_MAX) {
+		cp.pscan_rep_mode = ie->data.pscan_rep_mode;
+		cp.pscan_mode     = ie->data.pscan_mode;
+		cp.clock_offset   = ie->data.clock_offset | __cpu_to_le16(0x8000);
+		memcpy(conn->dev_class, ie->data.dev_class, 3);
+	}
+
+	cp.pkt_type = __cpu_to_le16(hdev->pkt_type & ACL_PTYPE_MASK);
+	if (lmp_rswitch_capable(hdev) && !(hdev->link_mode & HCI_LM_MASTER))
+		cp.role_switch	= 0x01;
+	else
+		cp.role_switch	= 0x00;
+		
+	hci_send_cmd(hdev, OGF_LINK_CTL, OCF_CREATE_CONN, sizeof(cp), &cp);
+}
+
+void hci_acl_disconn(struct hci_conn *conn, __u8 reason)
+{
+	struct hci_cp_disconnect cp;
+
+	BT_DBG("%p", conn);
+
+	conn->state = BT_DISCONN;
+
+	cp.handle = __cpu_to_le16(conn->handle);
+	cp.reason = reason;
+	hci_send_cmd(conn->hdev, OGF_LINK_CTL, OCF_DISCONNECT, sizeof(cp), &cp);
+}
+
+void hci_add_sco(struct hci_conn *conn, __u16 handle)
+{
+	struct hci_dev *hdev = conn->hdev;
+	struct hci_cp_add_sco cp;
+
+	BT_DBG("%p", conn);
+
+	conn->state = BT_CONNECT;
+	conn->out = 1;
+
+	cp.pkt_type = __cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK);
+	cp.handle   = __cpu_to_le16(handle);
+
+	hci_send_cmd(hdev, OGF_LINK_CTL, OCF_ADD_SCO, sizeof(cp), &cp);
+}
+
+static void hci_conn_timeout(unsigned long arg)
+{
+	struct hci_conn *conn = (void *)arg;
+	struct hci_dev  *hdev = conn->hdev;
+
+	BT_DBG("conn %p state %d", conn, conn->state);
+
+	if (atomic_read(&conn->refcnt))
+		return;
+
+	hci_dev_lock(hdev);
+ 	if (conn->state == BT_CONNECTED)
+		hci_acl_disconn(conn, 0x13);
+	else
+		conn->state = BT_CLOSED;
+	hci_dev_unlock(hdev);
+	return;
+}
+
+static void hci_conn_init_timer(struct hci_conn *conn)
+{
+	init_timer(&conn->timer);
+	conn->timer.function = hci_conn_timeout;
+	conn->timer.data = (unsigned long)conn;
+}
+
+struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
+{
+	struct hci_conn *conn;
+
+	BT_DBG("%s dst %s", hdev->name, batostr(dst));
+
+	if (!(conn = kmalloc(sizeof(struct hci_conn), GFP_ATOMIC)))
+		return NULL;
+	memset(conn, 0, sizeof(struct hci_conn));
+
+	bacpy(&conn->dst, dst);
+	conn->type   = type;
+	conn->hdev   = hdev;
+	conn->state  = BT_OPEN;
+
+	skb_queue_head_init(&conn->data_q);
+	hci_conn_init_timer(conn);
+
+	atomic_set(&conn->refcnt, 0);
+
+	hci_dev_hold(hdev);
+
+	tasklet_disable(&hdev->tx_task);
+
+	hci_conn_hash_add(hdev, conn);
+	if (hdev->notify)
+		hdev->notify(hdev, HCI_NOTIFY_CONN_ADD);
+
+	tasklet_enable(&hdev->tx_task);
+
+	return conn;
+}
+
+int hci_conn_del(struct hci_conn *conn)
+{
+	struct hci_dev *hdev = conn->hdev;
+
+	BT_DBG("%s conn %p handle %d", hdev->name, conn, conn->handle);
+
+	hci_conn_del_timer(conn);
+
+	if (conn->type == SCO_LINK) {
+		struct hci_conn *acl = conn->link;
+		if (acl) {
+			acl->link = NULL;
+			hci_conn_put(acl);
+		}
+	} else {
+		struct hci_conn *sco = conn->link;
+		if (sco)
+			sco->link = NULL;
+
+		/* Unacked frames */
+		hdev->acl_cnt += conn->sent;
+	}
+
+	tasklet_disable(&hdev->tx_task);
+
+	hci_conn_hash_del(hdev, conn);
+	if (hdev->notify)
+		hdev->notify(hdev, HCI_NOTIFY_CONN_DEL);
+
+	tasklet_enable(&hdev->tx_task);
+
+	skb_queue_purge(&conn->data_q);
+
+	hci_dev_put(hdev);
+
+	kfree(conn);
+	return 0;
+}
+
+struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src)
+{
+	int use_src = bacmp(src, BDADDR_ANY);
+	struct hci_dev *hdev = NULL;
+	struct list_head *p;
+
+	BT_DBG("%s -> %s", batostr(src), batostr(dst));
+
+	read_lock_bh(&hci_dev_list_lock);
+
+	list_for_each(p, &hci_dev_list) {
+		struct hci_dev *d = list_entry(p, struct hci_dev, list);
+
+		if (!test_bit(HCI_UP, &d->flags) || test_bit(HCI_RAW, &d->flags))
+			continue;
+
+		/* Simple routing: 
+		 *   No source address - find interface with bdaddr != dst
+		 *   Source address    - find interface with bdaddr == src
+		 */
+
+		if (use_src) {
+			if (!bacmp(&d->bdaddr, src)) {
+				hdev = d; break;
+			}
+		} else {
+			if (bacmp(&d->bdaddr, dst)) {
+				hdev = d; break;
+			}
+		}
+	}
+
+	if (hdev)
+		hdev = hci_dev_hold(hdev);
+
+	read_unlock_bh(&hci_dev_list_lock);
+	return hdev;
+}
+EXPORT_SYMBOL(hci_get_route);
+
+/* Create SCO or ACL connection.
+ * Device _must_ be locked */
+struct hci_conn * hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst)
+{
+	struct hci_conn *acl;
+
+	BT_DBG("%s dst %s", hdev->name, batostr(dst));
+
+	if (!(acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst))) {
+		if (!(acl = hci_conn_add(hdev, ACL_LINK, dst)))
+			return NULL;
+	}
+
+	hci_conn_hold(acl);
+
+	if (acl->state == BT_OPEN || acl->state == BT_CLOSED)
+		hci_acl_connect(acl);
+
+	if (type == SCO_LINK) {
+		struct hci_conn *sco;
+
+		if (!(sco = hci_conn_hash_lookup_ba(hdev, SCO_LINK, dst))) {
+			if (!(sco = hci_conn_add(hdev, SCO_LINK, dst))) {
+				hci_conn_put(acl);
+				return NULL;
+			}
+		}
+		acl->link = sco;
+		sco->link = acl;
+
+		hci_conn_hold(sco);
+
+		if (acl->state == BT_CONNECTED && 
+				(sco->state == BT_OPEN || sco->state == BT_CLOSED))
+			hci_add_sco(sco, acl->handle);
+
+		return sco;
+	} else {
+		return acl;
+	}
+}
+EXPORT_SYMBOL(hci_connect);
+
+/* Authenticate remote device */
+int hci_conn_auth(struct hci_conn *conn)
+{
+	BT_DBG("conn %p", conn);
+
+	if (conn->link_mode & HCI_LM_AUTH)
+		return 1;
+
+	if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) {
+		struct hci_cp_auth_requested cp;
+		cp.handle = __cpu_to_le16(conn->handle);
+		hci_send_cmd(conn->hdev, OGF_LINK_CTL, OCF_AUTH_REQUESTED, sizeof(cp), &cp);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(hci_conn_auth);
+
+/* Enable encryption */
+int hci_conn_encrypt(struct hci_conn *conn)
+{
+	BT_DBG("conn %p", conn);
+
+	if (conn->link_mode & HCI_LM_ENCRYPT)
+		return 1;
+
+	if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend))
+		return 0;
+
+	if (hci_conn_auth(conn)) {
+		struct hci_cp_set_conn_encrypt cp;
+		cp.handle  = __cpu_to_le16(conn->handle);
+		cp.encrypt = 1; 
+		hci_send_cmd(conn->hdev, OGF_LINK_CTL, OCF_SET_CONN_ENCRYPT, sizeof(cp), &cp);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(hci_conn_encrypt);
+
+/* Change link key */
+int hci_conn_change_link_key(struct hci_conn *conn)
+{
+	BT_DBG("conn %p", conn);
+
+	if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) {
+		struct hci_cp_change_conn_link_key cp;
+		cp.handle = __cpu_to_le16(conn->handle);
+		hci_send_cmd(conn->hdev, OGF_LINK_CTL, OCF_CHANGE_CONN_LINK_KEY, sizeof(cp), &cp);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(hci_conn_change_link_key);
+
+/* Switch role */
+int hci_conn_switch_role(struct hci_conn *conn, uint8_t role)
+{
+	BT_DBG("conn %p", conn);
+
+	if (!role && conn->link_mode & HCI_LM_MASTER)
+		return 1;
+
+	if (!test_and_set_bit(HCI_CONN_RSWITCH_PEND, &conn->pend)) {
+		struct hci_cp_switch_role cp;
+		bacpy(&cp.bdaddr, &conn->dst);
+		cp.role = role;
+		hci_send_cmd(conn->hdev, OGF_LINK_POLICY, OCF_SWITCH_ROLE, sizeof(cp), &cp);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(hci_conn_switch_role);
+
+/* Drop all connection on the device */
+void hci_conn_hash_flush(struct hci_dev *hdev)
+{
+	struct hci_conn_hash *h = &hdev->conn_hash;
+	struct list_head *p;
+
+	BT_DBG("hdev %s", hdev->name);
+
+	p = h->list.next;
+	while (p != &h->list) {
+		struct hci_conn *c;
+
+		c = list_entry(p, struct hci_conn, list);
+		p = p->next;
+
+		c->state = BT_CLOSED;
+
+		hci_proto_disconn_ind(c, 0x16);
+		hci_conn_del(c);
+	}
+}
+
+int hci_get_conn_list(void __user *arg)
+{
+	struct hci_conn_list_req req, *cl;
+	struct hci_conn_info *ci;
+	struct hci_dev *hdev;
+	struct list_head *p;
+	int n = 0, size, err;
+
+	if (copy_from_user(&req, arg, sizeof(req)))
+		return -EFAULT;
+
+	if (!req.conn_num || req.conn_num > (PAGE_SIZE * 2) / sizeof(*ci))
+		return -EINVAL;
+
+	size = sizeof(req) + req.conn_num * sizeof(*ci);
+
+	if (!(cl = (void *) kmalloc(size, GFP_KERNEL)))
+		return -ENOMEM;
+
+	if (!(hdev = hci_dev_get(req.dev_id))) {
+		kfree(cl);
+		return -ENODEV;
+	}
+
+	ci = cl->conn_info;
+
+	hci_dev_lock_bh(hdev);
+	list_for_each(p, &hdev->conn_hash.list) {
+		register struct hci_conn *c;
+		c = list_entry(p, struct hci_conn, list);
+
+		bacpy(&(ci + n)->bdaddr, &c->dst);
+		(ci + n)->handle = c->handle;
+		(ci + n)->type  = c->type;
+		(ci + n)->out   = c->out;
+		(ci + n)->state = c->state;
+		(ci + n)->link_mode = c->link_mode;
+		if (++n >= req.conn_num)
+			break;
+	}
+	hci_dev_unlock_bh(hdev);
+
+	cl->dev_id = hdev->id;
+	cl->conn_num = n;
+	size = sizeof(req) + n * sizeof(*ci);
+
+	hci_dev_put(hdev);
+
+	err = copy_to_user(arg, cl, size);
+	kfree(cl);
+
+	return err ? -EFAULT : 0;
+}
+
+int hci_get_conn_info(struct hci_dev *hdev, void __user *arg)
+{
+	struct hci_conn_info_req req;
+	struct hci_conn_info ci;
+	struct hci_conn *conn;
+	char __user *ptr = arg + sizeof(req);
+
+	if (copy_from_user(&req, arg, sizeof(req)))
+		return -EFAULT;
+
+	hci_dev_lock_bh(hdev);
+	conn = hci_conn_hash_lookup_ba(hdev, req.type, &req.bdaddr);
+	if (conn) {
+		bacpy(&ci.bdaddr, &conn->dst);
+		ci.handle = conn->handle;
+		ci.type  = conn->type;
+		ci.out   = conn->out;
+		ci.state = conn->state;
+		ci.link_mode = conn->link_mode;
+	}
+	hci_dev_unlock_bh(hdev);
+
+	if (!conn)
+		return -ENOENT;
+
+	return copy_to_user(ptr, &ci, sizeof(ci)) ? -EFAULT : 0;
+}
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
new file mode 100644
index 00000000000..860dba7bdd8
--- /dev/null
+++ b/net/bluetooth/hci_core.c
@@ -0,0 +1,1434 @@
+/* 
+   BlueZ - Bluetooth protocol stack for Linux
+   Copyright (C) 2000-2001 Qualcomm Incorporated
+
+   Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License version 2 as
+   published by the Free Software Foundation;
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
+   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES 
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, 
+   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS 
+   SOFTWARE IS DISCLAIMED.
+*/
+
+/* Bluetooth HCI core. */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kmod.h>
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/poll.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <net/sock.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/unaligned.h>
+
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
+
+#ifndef CONFIG_BT_HCI_CORE_DEBUG
+#undef  BT_DBG
+#define BT_DBG(D...)
+#endif
+
+static void hci_cmd_task(unsigned long arg);
+static void hci_rx_task(unsigned long arg);
+static void hci_tx_task(unsigned long arg);
+static void hci_notify(struct hci_dev *hdev, int event);
+
+static DEFINE_RWLOCK(hci_task_lock);
+
+/* HCI device list */
+LIST_HEAD(hci_dev_list);
+DEFINE_RWLOCK(hci_dev_list_lock);
+
+/* HCI callback list */
+LIST_HEAD(hci_cb_list);
+DEFINE_RWLOCK(hci_cb_list_lock);
+
+/* HCI protocols */
+#define HCI_MAX_PROTO	2
+struct hci_proto *hci_proto[HCI_MAX_PROTO];
+
+/* HCI notifiers list */
+static struct notifier_block *hci_notifier;
+
+/* ---- HCI notifications ---- */
+
+int hci_register_notifier(struct notifier_block *nb)
+{
+	return notifier_chain_register(&hci_notifier, nb);
+}
+
+int hci_unregister_notifier(struct notifier_block *nb)
+{
+	return notifier_chain_unregister(&hci_notifier, nb);
+}
+
+void hci_notify(struct hci_dev *hdev, int event)
+{
+	notifier_call_chain(&hci_notifier, event, hdev);
+}
+
+/* ---- HCI requests ---- */
+
+void hci_req_complete(struct hci_dev *hdev, int result)
+{
+	BT_DBG("%s result 0x%2.2x", hdev->name, result);
+
+	if (hdev->req_status == HCI_REQ_PEND) {
+		hdev->req_result = result;
+		hdev->req_status = HCI_REQ_DONE;
+		wake_up_interruptible(&hdev->req_wait_q);
+	}
+}
+
+static void hci_req_cancel(struct hci_dev *hdev, int err)
+{
+	BT_DBG("%s err 0x%2.2x", hdev->name, err);
+
+	if (hdev->req_status == HCI_REQ_PEND) {
+		hdev->req_result = err;
+		hdev->req_status = HCI_REQ_CANCELED;
+		wake_up_interruptible(&hdev->req_wait_q);
+	}
+}
+
+/* Execute request and wait for completion. */
+static int __hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *hdev, unsigned long opt), 
+				unsigned long opt, __u32 timeout)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	int err = 0;
+
+	BT_DBG("%s start", hdev->name);
+
+	hdev->req_status = HCI_REQ_PEND;
+
+	add_wait_queue(&hdev->req_wait_q, &wait);
+	set_current_state(TASK_INTERRUPTIBLE);
+
+	req(hdev, opt);
+	schedule_timeout(timeout);
+
+	remove_wait_queue(&hdev->req_wait_q, &wait);
+
+	if (signal_pending(current))
+		return -EINTR;
+
+	switch (hdev->req_status) {
+	case HCI_REQ_DONE:
+		err = -bt_err(hdev->req_result);
+		break;
+
+	case HCI_REQ_CANCELED:
+		err = -hdev->req_result;
+		break;
+
+	default:
+		err = -ETIMEDOUT;
+		break;
+	};
+
+	hdev->req_status = hdev->req_result = 0;
+
+	BT_DBG("%s end: err %d", hdev->name, err);
+
+	return err;
+}
+
+static inline int hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *hdev, unsigned long opt),
+				unsigned long opt, __u32 timeout)
+{
+	int ret;
+
+	/* Serialize all requests */
+	hci_req_lock(hdev);
+	ret = __hci_request(hdev, req, opt, timeout);
+	hci_req_unlock(hdev);
+
+	return ret;
+}
+
+static void hci_reset_req(struct hci_dev *hdev, unsigned long opt)
+{
+	BT_DBG("%s %ld", hdev->name, opt);
+
+	/* Reset device */
+	hci_send_cmd(hdev, OGF_HOST_CTL, OCF_RESET, 0, NULL);
+}
+
+static void hci_init_req(struct hci_dev *hdev, unsigned long opt)
+{
+	struct sk_buff *skb;
+	__u16 param;
+
+	BT_DBG("%s %ld", hdev->name, opt);
+
+	/* Driver initialization */
+
+	/* Special commands */
+	while ((skb = skb_dequeue(&hdev->driver_init))) {
+		skb->pkt_type = HCI_COMMAND_PKT;
+		skb->dev = (void *) hdev;
+		skb_queue_tail(&hdev->cmd_q, skb);
+		hci_sched_cmd(hdev);
+	}
+	skb_queue_purge(&hdev->driver_init);
+
+	/* Mandatory initialization */
+
+	/* Reset */
+	if (test_bit(HCI_QUIRK_RESET_ON_INIT, &hdev->quirks))
+			hci_send_cmd(hdev, OGF_HOST_CTL, OCF_RESET, 0, NULL);
+
+	/* Read Local Supported Features */
+	hci_send_cmd(hdev, OGF_INFO_PARAM, OCF_READ_LOCAL_FEATURES, 0, NULL);
+
+	/* Read Buffer Size (ACL mtu, max pkt, etc.) */
+	hci_send_cmd(hdev, OGF_INFO_PARAM, OCF_READ_BUFFER_SIZE, 0, NULL);
+
+#if 0
+	/* Host buffer size */
+	{
+		struct hci_cp_host_buffer_size cp;
+		cp.acl_mtu = __cpu_to_le16(HCI_MAX_ACL_SIZE);
+		cp.sco_mtu = HCI_MAX_SCO_SIZE;
+		cp.acl_max_pkt = __cpu_to_le16(0xffff);
+		cp.sco_max_pkt = __cpu_to_le16(0xffff);
+		hci_send_cmd(hdev, OGF_HOST_CTL, OCF_HOST_BUFFER_SIZE, sizeof(cp), &cp);
+	}
+#endif
+
+	/* Read BD Address */
+	hci_send_cmd(hdev, OGF_INFO_PARAM, OCF_READ_BD_ADDR, 0, NULL);
+
+	/* Read Voice Setting */
+	hci_send_cmd(hdev, OGF_HOST_CTL, OCF_READ_VOICE_SETTING, 0, NULL);
+
+	/* Optional initialization */
+
+	/* Clear Event Filters */
+	{
+		struct hci_cp_set_event_flt cp;
+		cp.flt_type  = HCI_FLT_CLEAR_ALL;
+		hci_send_cmd(hdev, OGF_HOST_CTL, OCF_SET_EVENT_FLT, sizeof(cp), &cp);
+	}
+
+	/* Page timeout ~20 secs */
+	param = __cpu_to_le16(0x8000);
+	hci_send_cmd(hdev, OGF_HOST_CTL, OCF_WRITE_PG_TIMEOUT, 2, &param);
+
+	/* Connection accept timeout ~20 secs */
+	param = __cpu_to_le16(0x7d00);
+	hci_send_cmd(hdev, OGF_HOST_CTL, OCF_WRITE_CA_TIMEOUT, 2, &param);
+}
+
+static void hci_scan_req(struct hci_dev *hdev, unsigned long opt)
+{
+	__u8 scan = opt;
+
+	BT_DBG("%s %x", hdev->name, scan);
+
+	/* Inquiry and Page scans */
+	hci_send_cmd(hdev, OGF_HOST_CTL, OCF_WRITE_SCAN_ENABLE, 1, &scan);
+}
+
+static void hci_auth_req(struct hci_dev *hdev, unsigned long opt)
+{
+	__u8 auth = opt;
+
+	BT_DBG("%s %x", hdev->name, auth);
+
+	/* Authentication */
+	hci_send_cmd(hdev, OGF_HOST_CTL, OCF_WRITE_AUTH_ENABLE, 1, &auth);
+}
+
+static void hci_encrypt_req(struct hci_dev *hdev, unsigned long opt)
+{
+	__u8 encrypt = opt;
+
+	BT_DBG("%s %x", hdev->name, encrypt);
+
+	/* Authentication */
+	hci_send_cmd(hdev, OGF_HOST_CTL, OCF_WRITE_ENCRYPT_MODE, 1, &encrypt);
+}
+
+/* Get HCI device by index. 
+ * Device is held on return. */
+struct hci_dev *hci_dev_get(int index)
+{
+	struct hci_dev *hdev = NULL;
+	struct list_head *p;
+
+	BT_DBG("%d", index);
+
+	if (index < 0)
+		return NULL;
+
+	read_lock(&hci_dev_list_lock);
+	list_for_each(p, &hci_dev_list) {
+		struct hci_dev *d = list_entry(p, struct hci_dev, list);
+		if (d->id == index) {
+			hdev = hci_dev_hold(d);
+			break;
+		}
+	}
+	read_unlock(&hci_dev_list_lock);
+	return hdev;
+}
+EXPORT_SYMBOL(hci_dev_get);
+
+/* ---- Inquiry support ---- */
+static void inquiry_cache_flush(struct hci_dev *hdev)
+{
+	struct inquiry_cache *cache = &hdev->inq_cache;
+	struct inquiry_entry *next  = cache->list, *e;
+
+	BT_DBG("cache %p", cache);
+
+	cache->list = NULL;
+	while ((e = next)) {
+		next = e->next;
+		kfree(e);
+	}
+}
+
+struct inquiry_entry *hci_inquiry_cache_lookup(struct hci_dev *hdev, bdaddr_t *bdaddr)
+{
+	struct inquiry_cache *cache = &hdev->inq_cache;
+	struct inquiry_entry *e;
+
+	BT_DBG("cache %p, %s", cache, batostr(bdaddr));
+
+	for (e = cache->list; e; e = e->next)
+		if (!bacmp(&e->data.bdaddr, bdaddr))
+			break;
+	return e;
+}
+
+void hci_inquiry_cache_update(struct hci_dev *hdev, struct inquiry_data *data)
+{
+	struct inquiry_cache *cache = &hdev->inq_cache;
+	struct inquiry_entry *e;
+
+	BT_DBG("cache %p, %s", cache, batostr(&data->bdaddr));
+
+	if (!(e = hci_inquiry_cache_lookup(hdev, &data->bdaddr))) {
+		/* Entry not in the cache. Add new one. */
+		if (!(e = kmalloc(sizeof(struct inquiry_entry), GFP_ATOMIC)))
+			return;
+		memset(e, 0, sizeof(struct inquiry_entry));
+		e->next     = cache->list;
+		cache->list = e;
+	}
+
+	memcpy(&e->data, data, sizeof(*data));
+	e->timestamp = jiffies;
+	cache->timestamp = jiffies;
+}
+
+static int inquiry_cache_dump(struct hci_dev *hdev, int num, __u8 *buf)
+{
+	struct inquiry_cache *cache = &hdev->inq_cache;
+	struct inquiry_info *info = (struct inquiry_info *) buf;
+	struct inquiry_entry *e;
+	int copied = 0;
+
+	for (e = cache->list; e && copied < num; e = e->next, copied++) {
+		struct inquiry_data *data = &e->data;
+		bacpy(&info->bdaddr, &data->bdaddr);
+		info->pscan_rep_mode	= data->pscan_rep_mode;
+		info->pscan_period_mode	= data->pscan_period_mode;
+		info->pscan_mode	= data->pscan_mode;
+		memcpy(info->dev_class, data->dev_class, 3);
+		info->clock_offset	= data->clock_offset;
+		info++;
+	}
+
+	BT_DBG("cache %p, copied %d", cache, copied);
+	return copied;
+}
+
+static void hci_inq_req(struct hci_dev *hdev, unsigned long opt)
+{
+	struct hci_inquiry_req *ir = (struct hci_inquiry_req *) opt;
+	struct hci_cp_inquiry cp;
+
+	BT_DBG("%s", hdev->name);
+
+	if (test_bit(HCI_INQUIRY, &hdev->flags))
+		return;
+
+	/* Start Inquiry */
+	memcpy(&cp.lap, &ir->lap, 3);
+	cp.length  = ir->length;
+	cp.num_rsp = ir->num_rsp;
+	hci_send_cmd(hdev, OGF_LINK_CTL, OCF_INQUIRY, sizeof(cp), &cp);
+}
+
+int hci_inquiry(void __user *arg)
+{
+	__u8 __user *ptr = arg;
+	struct hci_inquiry_req ir;
+	struct hci_dev *hdev;
+	int err = 0, do_inquiry = 0, max_rsp;
+	long timeo;
+	__u8 *buf;
+
+	if (copy_from_user(&ir, ptr, sizeof(ir)))
+		return -EFAULT;
+
+	if (!(hdev = hci_dev_get(ir.dev_id)))
+		return -ENODEV;
+
+	hci_dev_lock_bh(hdev);
+	if (inquiry_cache_age(hdev) > INQUIRY_CACHE_AGE_MAX || 
+					inquiry_cache_empty(hdev) ||
+					ir.flags & IREQ_CACHE_FLUSH) {
+		inquiry_cache_flush(hdev);
+		do_inquiry = 1;
+	}
+	hci_dev_unlock_bh(hdev);
+
+	timeo = ir.length * 2 * HZ;
+	if (do_inquiry && (err = hci_request(hdev, hci_inq_req, (unsigned long)&ir, timeo)) < 0)
+		goto done;
+
+	/* for unlimited number of responses we will use buffer with 255 entries */
+	max_rsp = (ir.num_rsp == 0) ? 255 : ir.num_rsp;
+
+	/* cache_dump can't sleep. Therefore we allocate temp buffer and then
+	 * copy it to the user space.
+	 */
+	if (!(buf = kmalloc(sizeof(struct inquiry_info) * max_rsp, GFP_KERNEL))) {
+		err = -ENOMEM;
+		goto done;
+	}
+
+	hci_dev_lock_bh(hdev);
+	ir.num_rsp = inquiry_cache_dump(hdev, max_rsp, buf);
+	hci_dev_unlock_bh(hdev);
+
+	BT_DBG("num_rsp %d", ir.num_rsp);
+
+	if (!copy_to_user(ptr, &ir, sizeof(ir))) {
+		ptr += sizeof(ir);
+		if (copy_to_user(ptr, buf, sizeof(struct inquiry_info) *
+					ir.num_rsp))
+			err = -EFAULT;
+	} else 
+		err = -EFAULT;
+
+	kfree(buf);
+
+done:
+	hci_dev_put(hdev);
+	return err;
+}
+
+/* ---- HCI ioctl helpers ---- */
+
+int hci_dev_open(__u16 dev)
+{
+	struct hci_dev *hdev;
+	int ret = 0;
+
+	if (!(hdev = hci_dev_get(dev)))
+		return -ENODEV;
+
+	BT_DBG("%s %p", hdev->name, hdev);
+
+	hci_req_lock(hdev);
+
+	if (test_bit(HCI_UP, &hdev->flags)) {
+		ret = -EALREADY;
+		goto done;
+	}
+
+	if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
+		set_bit(HCI_RAW, &hdev->flags);
+
+	if (hdev->open(hdev)) {
+		ret = -EIO;
+		goto done;
+	}
+
+	if (!test_bit(HCI_RAW, &hdev->flags)) {
+		atomic_set(&hdev->cmd_cnt, 1);
+		set_bit(HCI_INIT, &hdev->flags);
+
+		//__hci_request(hdev, hci_reset_req, 0, HZ);
+		ret = __hci_request(hdev, hci_init_req, 0, HCI_INIT_TIMEOUT);
+
+		clear_bit(HCI_INIT, &hdev->flags);
+	}
+
+	if (!ret) {
+		hci_dev_hold(hdev);
+		set_bit(HCI_UP, &hdev->flags);
+		hci_notify(hdev, HCI_DEV_UP);
+	} else {	
+		/* Init failed, cleanup */
+		tasklet_kill(&hdev->rx_task);
+		tasklet_kill(&hdev->tx_task);
+		tasklet_kill(&hdev->cmd_task);
+
+		skb_queue_purge(&hdev->cmd_q);
+		skb_queue_purge(&hdev->rx_q);
+
+		if (hdev->flush)
+			hdev->flush(hdev);
+
+		if (hdev->sent_cmd) {
+			kfree_skb(hdev->sent_cmd);
+			hdev->sent_cmd = NULL;
+		}
+
+		hdev->close(hdev);
+		hdev->flags = 0;
+	}
+
+done:
+	hci_req_unlock(hdev);
+	hci_dev_put(hdev);
+	return ret;
+}
+
+static int hci_dev_do_close(struct hci_dev *hdev)
+{
+	BT_DBG("%s %p", hdev->name, hdev);
+
+	hci_req_cancel(hdev, ENODEV);
+	hci_req_lock(hdev);
+
+	if (!test_and_clear_bit(HCI_UP, &hdev->flags)) {
+		hci_req_unlock(hdev);
+		return 0;
+	}
+
+	/* Kill RX and TX tasks */
+	tasklet_kill(&hdev->rx_task);
+	tasklet_kill(&hdev->tx_task);
+
+	hci_dev_lock_bh(hdev);
+	inquiry_cache_flush(hdev);
+	hci_conn_hash_flush(hdev);
+	hci_dev_unlock_bh(hdev);
+
+	hci_notify(hdev, HCI_DEV_DOWN);
+
+	if (hdev->flush)
+		hdev->flush(hdev);
+
+	/* Reset device */
+	skb_queue_purge(&hdev->cmd_q);
+	atomic_set(&hdev->cmd_cnt, 1);
+	if (!test_bit(HCI_RAW, &hdev->flags)) {
+		set_bit(HCI_INIT, &hdev->flags);
+		__hci_request(hdev, hci_reset_req, 0, HZ/4);
+		clear_bit(HCI_INIT, &hdev->flags);
+	}
+
+	/* Kill cmd task */
+	tasklet_kill(&hdev->cmd_task);
+
+	/* Drop queues */
+	skb_queue_purge(&hdev->rx_q);
+	skb_queue_purge(&hdev->cmd_q);
+	skb_queue_purge(&hdev->raw_q);
+
+	/* Drop last sent command */
+	if (hdev->sent_cmd) {
+		kfree_skb(hdev->sent_cmd);
+		hdev->sent_cmd = NULL;
+	}
+
+	/* After this point our queues are empty
+	 * and no tasks are scheduled. */
+	hdev->close(hdev);
+
+	/* Clear flags */
+	hdev->flags = 0;
+
+	hci_req_unlock(hdev);
+
+	hci_dev_put(hdev);
+	return 0;
+}
+
+int hci_dev_close(__u16 dev)
+{
+	struct hci_dev *hdev;
+	int err;
+
+	if (!(hdev = hci_dev_get(dev)))
+		return -ENODEV;
+	err = hci_dev_do_close(hdev);
+	hci_dev_put(hdev);
+	return err;
+}
+
+int hci_dev_reset(__u16 dev)
+{
+	struct hci_dev *hdev;
+	int ret = 0;
+
+	if (!(hdev = hci_dev_get(dev)))
+		return -ENODEV;
+
+	hci_req_lock(hdev);
+	tasklet_disable(&hdev->tx_task);
+
+	if (!test_bit(HCI_UP, &hdev->flags))
+		goto done;
+
+	/* Drop queues */
+	skb_queue_purge(&hdev->rx_q);
+	skb_queue_purge(&hdev->cmd_q);
+
+	hci_dev_lock_bh(hdev);
+	inquiry_cache_flush(hdev);
+	hci_conn_hash_flush(hdev);
+	hci_dev_unlock_bh(hdev);
+
+	if (hdev->flush)
+		hdev->flush(hdev);
+
+	atomic_set(&hdev->cmd_cnt, 1); 
+	hdev->acl_cnt = 0; hdev->sco_cnt = 0;
+
+	if (!test_bit(HCI_RAW, &hdev->flags))
+		ret = __hci_request(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT);
+
+done:
+	tasklet_enable(&hdev->tx_task);
+	hci_req_unlock(hdev);
+	hci_dev_put(hdev);
+	return ret;
+}
+
+int hci_dev_reset_stat(__u16 dev)
+{
+	struct hci_dev *hdev;
+	int ret = 0;
+
+	if (!(hdev = hci_dev_get(dev)))
+		return -ENODEV;
+
+	memset(&hdev->stat, 0, sizeof(struct hci_dev_stats));
+
+	hci_dev_put(hdev);
+
+	return ret;
+}
+
+int hci_dev_cmd(unsigned int cmd, void __user *arg)
+{
+	struct hci_dev *hdev;
+	struct hci_dev_req dr;
+	int err = 0;
+
+	if (copy_from_user(&dr, arg, sizeof(dr)))
+		return -EFAULT;
+
+	if (!(hdev = hci_dev_get(dr.dev_id)))
+		return -ENODEV;
+
+	switch (cmd) {
+	case HCISETAUTH:
+		err = hci_request(hdev, hci_auth_req, dr.dev_opt, HCI_INIT_TIMEOUT);
+		break;
+
+	case HCISETENCRYPT:
+		if (!lmp_encrypt_capable(hdev)) {
+			err = -EOPNOTSUPP;
+			break;
+		}
+
+		if (!test_bit(HCI_AUTH, &hdev->flags)) {
+			/* Auth must be enabled first */
+			err = hci_request(hdev, hci_auth_req,
+					dr.dev_opt, HCI_INIT_TIMEOUT);
+			if (err)
+				break;
+		}
+
+		err = hci_request(hdev, hci_encrypt_req,
+					dr.dev_opt, HCI_INIT_TIMEOUT);
+		break;
+
+	case HCISETSCAN:
+		err = hci_request(hdev, hci_scan_req, dr.dev_opt, HCI_INIT_TIMEOUT);
+		break;
+
+	case HCISETPTYPE:
+		hdev->pkt_type = (__u16) dr.dev_opt;
+		break;
+
+	case HCISETLINKPOL:
+		hdev->link_policy = (__u16) dr.dev_opt;
+		break;
+
+	case HCISETLINKMODE:
+		hdev->link_mode = ((__u16) dr.dev_opt) & (HCI_LM_MASTER | HCI_LM_ACCEPT);
+		break;
+
+	case HCISETACLMTU:
+		hdev->acl_mtu  = *((__u16 *)&dr.dev_opt + 1);
+		hdev->acl_pkts = *((__u16 *)&dr.dev_opt + 0);
+		break;
+
+	case HCISETSCOMTU:
+		hdev->sco_mtu  = *((__u16 *)&dr.dev_opt + 1);
+		hdev->sco_pkts = *((__u16 *)&dr.dev_opt + 0);
+		break;
+
+	default:
+		err = -EINVAL;
+		break;
+	}
+	hci_dev_put(hdev);
+	return err;
+}
+
+int hci_get_dev_list(void __user *arg)
+{
+	struct hci_dev_list_req *dl;
+	struct hci_dev_req *dr;
+	struct list_head *p;
+	int n = 0, size, err;
+	__u16 dev_num;
+
+	if (get_user(dev_num, (__u16 __user *) arg))
+		return -EFAULT;
+
+	if (!dev_num || dev_num > (PAGE_SIZE * 2) / sizeof(*dr))
+		return -EINVAL;
+
+	size = sizeof(*dl) + dev_num * sizeof(*dr);
+
+	if (!(dl = kmalloc(size, GFP_KERNEL)))
+		return -ENOMEM;
+
+	dr = dl->dev_req;
+
+	read_lock_bh(&hci_dev_list_lock);
+	list_for_each(p, &hci_dev_list) {
+		struct hci_dev *hdev;
+		hdev = list_entry(p, struct hci_dev, list);
+		(dr + n)->dev_id  = hdev->id;
+		(dr + n)->dev_opt = hdev->flags;
+		if (++n >= dev_num)
+			break;
+	}
+	read_unlock_bh(&hci_dev_list_lock);
+
+	dl->dev_num = n;
+	size = sizeof(*dl) + n * sizeof(*dr);
+
+	err = copy_to_user(arg, dl, size);
+	kfree(dl);
+
+	return err ? -EFAULT : 0;
+}
+
+int hci_get_dev_info(void __user *arg)
+{
+	struct hci_dev *hdev;
+	struct hci_dev_info di;
+	int err = 0;
+
+	if (copy_from_user(&di, arg, sizeof(di)))
+		return -EFAULT;
+
+	if (!(hdev = hci_dev_get(di.dev_id)))
+		return -ENODEV;
+
+	strcpy(di.name, hdev->name);
+	di.bdaddr   = hdev->bdaddr;
+	di.type     = hdev->type;
+	di.flags    = hdev->flags;
+	di.pkt_type = hdev->pkt_type;
+	di.acl_mtu  = hdev->acl_mtu;
+	di.acl_pkts = hdev->acl_pkts;
+	di.sco_mtu  = hdev->sco_mtu;
+	di.sco_pkts = hdev->sco_pkts;
+	di.link_policy = hdev->link_policy;
+	di.link_mode   = hdev->link_mode;
+
+	memcpy(&di.stat, &hdev->stat, sizeof(di.stat));
+	memcpy(&di.features, &hdev->features, sizeof(di.features));
+
+	if (copy_to_user(arg, &di, sizeof(di)))
+		err = -EFAULT;
+
+	hci_dev_put(hdev);
+
+	return err;
+}
+
+/* ---- Interface to HCI drivers ---- */
+
+/* Alloc HCI device */
+struct hci_dev *hci_alloc_dev(void)
+{
+	struct hci_dev *hdev;
+
+	hdev = kmalloc(sizeof(struct hci_dev), GFP_KERNEL);
+	if (!hdev)
+		return NULL;
+
+	memset(hdev, 0, sizeof(struct hci_dev));
+
+	skb_queue_head_init(&hdev->driver_init);
+
+	return hdev;
+}
+EXPORT_SYMBOL(hci_alloc_dev);
+
+/* Free HCI device */
+void hci_free_dev(struct hci_dev *hdev)
+{
+	skb_queue_purge(&hdev->driver_init);
+
+	/* will free via class release */
+	class_device_put(&hdev->class_dev);
+}
+EXPORT_SYMBOL(hci_free_dev);
+
+/* Register HCI device */
+int hci_register_dev(struct hci_dev *hdev)
+{
+	struct list_head *head = &hci_dev_list, *p;
+	int id = 0;
+
+	BT_DBG("%p name %s type %d owner %p", hdev, hdev->name, hdev->type, hdev->owner);
+
+	if (!hdev->open || !hdev->close || !hdev->destruct)
+		return -EINVAL;
+
+	write_lock_bh(&hci_dev_list_lock);
+
+	/* Find first available device id */
+	list_for_each(p, &hci_dev_list) {
+		if (list_entry(p, struct hci_dev, list)->id != id)
+			break;
+		head = p; id++;
+	}
+	
+	sprintf(hdev->name, "hci%d", id);
+	hdev->id = id;
+	list_add(&hdev->list, head);
+
+	atomic_set(&hdev->refcnt, 1);
+	spin_lock_init(&hdev->lock);
+
+	hdev->flags = 0;
+	hdev->pkt_type  = (HCI_DM1 | HCI_DH1 | HCI_HV1);
+	hdev->link_mode = (HCI_LM_ACCEPT);
+
+	tasklet_init(&hdev->cmd_task, hci_cmd_task,(unsigned long) hdev);
+	tasklet_init(&hdev->rx_task, hci_rx_task, (unsigned long) hdev);
+	tasklet_init(&hdev->tx_task, hci_tx_task, (unsigned long) hdev);
+
+	skb_queue_head_init(&hdev->rx_q);
+	skb_queue_head_init(&hdev->cmd_q);
+	skb_queue_head_init(&hdev->raw_q);
+
+	init_waitqueue_head(&hdev->req_wait_q);
+	init_MUTEX(&hdev->req_lock);
+
+	inquiry_cache_init(hdev);
+
+	hci_conn_hash_init(hdev);
+
+	memset(&hdev->stat, 0, sizeof(struct hci_dev_stats));
+
+	atomic_set(&hdev->promisc, 0);
+
+	write_unlock_bh(&hci_dev_list_lock);
+
+	hci_register_sysfs(hdev);
+
+	hci_notify(hdev, HCI_DEV_REG);
+
+	return id;
+}
+EXPORT_SYMBOL(hci_register_dev);
+
+/* Unregister HCI device */
+int hci_unregister_dev(struct hci_dev *hdev)
+{
+	BT_DBG("%p name %s type %d", hdev, hdev->name, hdev->type);
+
+	hci_unregister_sysfs(hdev);
+
+	write_lock_bh(&hci_dev_list_lock);
+	list_del(&hdev->list);
+	write_unlock_bh(&hci_dev_list_lock);
+
+	hci_dev_do_close(hdev);
+
+	hci_notify(hdev, HCI_DEV_UNREG);
+
+	__hci_dev_put(hdev);
+	return 0;
+}
+EXPORT_SYMBOL(hci_unregister_dev);
+
+/* Suspend HCI device */
+int hci_suspend_dev(struct hci_dev *hdev)
+{
+	hci_notify(hdev, HCI_DEV_SUSPEND);
+	return 0;
+}
+EXPORT_SYMBOL(hci_suspend_dev);
+
+/* Resume HCI device */
+int hci_resume_dev(struct hci_dev *hdev)
+{
+	hci_notify(hdev, HCI_DEV_RESUME);
+	return 0;
+}
+EXPORT_SYMBOL(hci_resume_dev);
+
+/* ---- Interface to upper protocols ---- */
+
+/* Register/Unregister protocols.
+ * hci_task_lock is used to ensure that no tasks are running. */
+int hci_register_proto(struct hci_proto *hp)
+{
+	int err = 0;
+
+	BT_DBG("%p name %s id %d", hp, hp->name, hp->id);
+
+	if (hp->id >= HCI_MAX_PROTO)
+		return -EINVAL;
+
+	write_lock_bh(&hci_task_lock);
+
+	if (!hci_proto[hp->id])
+		hci_proto[hp->id] = hp;
+	else
+		err = -EEXIST;
+
+	write_unlock_bh(&hci_task_lock);
+
+	return err;
+}
+EXPORT_SYMBOL(hci_register_proto);
+
+int hci_unregister_proto(struct hci_proto *hp)
+{
+	int err = 0;
+
+	BT_DBG("%p name %s id %d", hp, hp->name, hp->id);
+
+	if (hp->id >= HCI_MAX_PROTO)
+		return -EINVAL;
+
+	write_lock_bh(&hci_task_lock);
+
+	if (hci_proto[hp->id])
+		hci_proto[hp->id] = NULL;
+	else
+		err = -ENOENT;
+
+	write_unlock_bh(&hci_task_lock);
+
+	return err;
+}
+EXPORT_SYMBOL(hci_unregister_proto);
+
+int hci_register_cb(struct hci_cb *cb)
+{
+	BT_DBG("%p name %s", cb, cb->name);
+
+	write_lock_bh(&hci_cb_list_lock);
+	list_add(&cb->list, &hci_cb_list);
+	write_unlock_bh(&hci_cb_list_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(hci_register_cb);
+
+int hci_unregister_cb(struct hci_cb *cb)
+{
+	BT_DBG("%p name %s", cb, cb->name);
+
+	write_lock_bh(&hci_cb_list_lock);
+	list_del(&cb->list);
+	write_unlock_bh(&hci_cb_list_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(hci_unregister_cb);
+
+static int hci_send_frame(struct sk_buff *skb)
+{
+	struct hci_dev *hdev = (struct hci_dev *) skb->dev;
+
+	if (!hdev) {
+		kfree_skb(skb);
+		return -ENODEV;
+	}
+
+	BT_DBG("%s type %d len %d", hdev->name, skb->pkt_type, skb->len);
+
+	if (atomic_read(&hdev->promisc)) {
+		/* Time stamp */
+		do_gettimeofday(&skb->stamp);
+
+		hci_send_to_sock(hdev, skb);
+	}
+
+	/* Get rid of skb owner, prior to sending to the driver. */
+	skb_orphan(skb);
+
+	return hdev->send(skb);
+}
+
+/* Send HCI command */
+int hci_send_cmd(struct hci_dev *hdev, __u16 ogf, __u16 ocf, __u32 plen, void *param)
+{
+	int len = HCI_COMMAND_HDR_SIZE + plen;
+	struct hci_command_hdr *hdr;
+	struct sk_buff *skb;
+
+	BT_DBG("%s ogf 0x%x ocf 0x%x plen %d", hdev->name, ogf, ocf, plen);
+
+	skb = bt_skb_alloc(len, GFP_ATOMIC);
+	if (!skb) {
+		BT_ERR("%s Can't allocate memory for HCI command", hdev->name);
+		return -ENOMEM;
+	}
+
+	hdr = (struct hci_command_hdr *) skb_put(skb, HCI_COMMAND_HDR_SIZE);
+	hdr->opcode = __cpu_to_le16(hci_opcode_pack(ogf, ocf));
+	hdr->plen   = plen;
+
+	if (plen)
+		memcpy(skb_put(skb, plen), param, plen);
+
+	BT_DBG("skb len %d", skb->len);
+
+	skb->pkt_type = HCI_COMMAND_PKT;
+	skb->dev = (void *) hdev;
+	skb_queue_tail(&hdev->cmd_q, skb);
+	hci_sched_cmd(hdev);
+
+	return 0;
+}
+EXPORT_SYMBOL(hci_send_cmd);
+
+/* Get data from the previously sent command */
+void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 ogf, __u16 ocf)
+{
+	struct hci_command_hdr *hdr;
+
+	if (!hdev->sent_cmd)
+		return NULL;
+
+	hdr = (void *) hdev->sent_cmd->data;
+
+	if (hdr->opcode != __cpu_to_le16(hci_opcode_pack(ogf, ocf)))
+		return NULL;
+
+	BT_DBG("%s ogf 0x%x ocf 0x%x", hdev->name, ogf, ocf);
+
+	return hdev->sent_cmd->data + HCI_COMMAND_HDR_SIZE;
+}
+
+/* Send ACL data */
+static void hci_add_acl_hdr(struct sk_buff *skb, __u16 handle, __u16 flags)
+{
+	struct hci_acl_hdr *hdr;
+	int len = skb->len;
+
+	hdr = (struct hci_acl_hdr *) skb_push(skb, HCI_ACL_HDR_SIZE);
+	hdr->handle = __cpu_to_le16(hci_handle_pack(handle, flags));
+	hdr->dlen   = __cpu_to_le16(len);
+
+	skb->h.raw = (void *) hdr;
+}
+
+int hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags)
+{
+	struct hci_dev *hdev = conn->hdev;
+	struct sk_buff *list;
+
+	BT_DBG("%s conn %p flags 0x%x", hdev->name, conn, flags);
+
+	skb->dev = (void *) hdev;
+	skb->pkt_type = HCI_ACLDATA_PKT;
+	hci_add_acl_hdr(skb, conn->handle, flags | ACL_START);
+
+	if (!(list = skb_shinfo(skb)->frag_list)) {
+		/* Non fragmented */
+		BT_DBG("%s nonfrag skb %p len %d", hdev->name, skb, skb->len);
+
+		skb_queue_tail(&conn->data_q, skb);
+	} else {
+		/* Fragmented */
+		BT_DBG("%s frag %p len %d", hdev->name, skb, skb->len);
+
+		skb_shinfo(skb)->frag_list = NULL;
+
+		/* Queue all fragments atomically */
+		spin_lock_bh(&conn->data_q.lock);
+
+		__skb_queue_tail(&conn->data_q, skb);
+		do {
+			skb = list; list = list->next;
+			
+			skb->dev = (void *) hdev;
+			skb->pkt_type = HCI_ACLDATA_PKT;
+			hci_add_acl_hdr(skb, conn->handle, flags | ACL_CONT);
+
+			BT_DBG("%s frag %p len %d", hdev->name, skb, skb->len);
+
+			__skb_queue_tail(&conn->data_q, skb);
+		} while (list);
+
+		spin_unlock_bh(&conn->data_q.lock);
+	}
+
+	hci_sched_tx(hdev);
+	return 0;
+}
+EXPORT_SYMBOL(hci_send_acl);
+
+/* Send SCO data */
+int hci_send_sco(struct hci_conn *conn, struct sk_buff *skb)
+{
+	struct hci_dev *hdev = conn->hdev;
+	struct hci_sco_hdr hdr;
+
+	BT_DBG("%s len %d", hdev->name, skb->len);
+
+	if (skb->len > hdev->sco_mtu) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	hdr.handle = __cpu_to_le16(conn->handle);
+	hdr.dlen   = skb->len;
+
+	skb->h.raw = skb_push(skb, HCI_SCO_HDR_SIZE);
+	memcpy(skb->h.raw, &hdr, HCI_SCO_HDR_SIZE);
+
+	skb->dev = (void *) hdev;
+	skb->pkt_type = HCI_SCODATA_PKT;
+	skb_queue_tail(&conn->data_q, skb);
+	hci_sched_tx(hdev);
+	return 0;
+}
+EXPORT_SYMBOL(hci_send_sco);
+
+/* ---- HCI TX task (outgoing data) ---- */
+
+/* HCI Connection scheduler */
+static inline struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type, int *quote)
+{
+	struct hci_conn_hash *h = &hdev->conn_hash;
+	struct hci_conn  *conn = NULL;
+	int num = 0, min = ~0;
+	struct list_head *p;
+
+	/* We don't have to lock device here. Connections are always 
+	 * added and removed with TX task disabled. */
+	list_for_each(p, &h->list) {
+		struct hci_conn *c;
+		c = list_entry(p, struct hci_conn, list);
+
+		if (c->type != type || c->state != BT_CONNECTED
+				|| skb_queue_empty(&c->data_q))
+			continue;
+		num++;
+
+		if (c->sent < min) {
+			min  = c->sent;
+			conn = c;
+		}
+	}
+
+	if (conn) {
+		int cnt = (type == ACL_LINK ? hdev->acl_cnt : hdev->sco_cnt);
+		int q = cnt / num;
+		*quote = q ? q : 1;
+	} else
+		*quote = 0;
+
+	BT_DBG("conn %p quote %d", conn, *quote);
+	return conn;
+}
+
+static inline void hci_acl_tx_to(struct hci_dev *hdev)
+{
+	struct hci_conn_hash *h = &hdev->conn_hash;
+	struct list_head *p;
+	struct hci_conn  *c;
+
+	BT_ERR("%s ACL tx timeout", hdev->name);
+
+	/* Kill stalled connections */
+	list_for_each(p, &h->list) {
+		c = list_entry(p, struct hci_conn, list);
+		if (c->type == ACL_LINK && c->sent) {
+			BT_ERR("%s killing stalled ACL connection %s",
+				hdev->name, batostr(&c->dst));
+			hci_acl_disconn(c, 0x13);
+		}
+	}
+}
+
+static inline void hci_sched_acl(struct hci_dev *hdev)
+{
+	struct hci_conn *conn;
+	struct sk_buff *skb;
+	int quote;
+
+	BT_DBG("%s", hdev->name);
+
+	if (!test_bit(HCI_RAW, &hdev->flags)) {
+		/* ACL tx timeout must be longer than maximum
+		 * link supervision timeout (40.9 seconds) */
+		if (!hdev->acl_cnt && (jiffies - hdev->acl_last_tx) > (HZ * 45))
+			hci_acl_tx_to(hdev);
+	}
+
+	while (hdev->acl_cnt && (conn = hci_low_sent(hdev, ACL_LINK, &quote))) {
+		while (quote-- && (skb = skb_dequeue(&conn->data_q))) {
+			BT_DBG("skb %p len %d", skb, skb->len);
+			hci_send_frame(skb);
+			hdev->acl_last_tx = jiffies;
+
+			hdev->acl_cnt--;
+			conn->sent++;
+		}
+	}
+}
+
+/* Schedule SCO */
+static inline void hci_sched_sco(struct hci_dev *hdev)
+{
+	struct hci_conn *conn;
+	struct sk_buff *skb;
+	int quote;
+
+	BT_DBG("%s", hdev->name);
+
+	while (hdev->sco_cnt && (conn = hci_low_sent(hdev, SCO_LINK, &quote))) {
+		while (quote-- && (skb = skb_dequeue(&conn->data_q))) {
+			BT_DBG("skb %p len %d", skb, skb->len);
+			hci_send_frame(skb);
+
+			conn->sent++;
+			if (conn->sent == ~0)
+				conn->sent = 0;
+		}
+	}
+}
+
+static void hci_tx_task(unsigned long arg)
+{
+	struct hci_dev *hdev = (struct hci_dev *) arg;
+	struct sk_buff *skb;
+
+	read_lock(&hci_task_lock);
+
+	BT_DBG("%s acl %d sco %d", hdev->name, hdev->acl_cnt, hdev->sco_cnt);
+
+	/* Schedule queues and send stuff to HCI driver */
+
+	hci_sched_acl(hdev);
+
+	hci_sched_sco(hdev);
+
+	/* Send next queued raw (unknown type) packet */
+	while ((skb = skb_dequeue(&hdev->raw_q)))
+		hci_send_frame(skb);
+
+	read_unlock(&hci_task_lock);
+}
+
+/* ----- HCI RX task (incoming data proccessing) ----- */
+
+/* ACL data packet */
+static inline void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_acl_hdr *hdr = (void *) skb->data;
+	struct hci_conn *conn;
+	__u16 handle, flags;
+
+	skb_pull(skb, HCI_ACL_HDR_SIZE);
+
+	handle = __le16_to_cpu(hdr->handle);
+	flags  = hci_flags(handle);
+	handle = hci_handle(handle);
+
+	BT_DBG("%s len %d handle 0x%x flags 0x%x", hdev->name, skb->len, handle, flags);
+
+	hdev->stat.acl_rx++;
+
+	hci_dev_lock(hdev);
+	conn = hci_conn_hash_lookup_handle(hdev, handle);
+	hci_dev_unlock(hdev);
+	
+	if (conn) {
+		register struct hci_proto *hp;
+
+		/* Send to upper protocol */
+		if ((hp = hci_proto[HCI_PROTO_L2CAP]) && hp->recv_acldata) {
+			hp->recv_acldata(conn, skb, flags);
+			return;
+		}
+	} else {
+		BT_ERR("%s ACL packet for unknown connection handle %d", 
+			hdev->name, handle);
+	}
+
+	kfree_skb(skb);
+}
+
+/* SCO data packet */
+static inline void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_sco_hdr *hdr = (void *) skb->data;
+	struct hci_conn *conn;
+	__u16 handle;
+
+	skb_pull(skb, HCI_SCO_HDR_SIZE);
+
+	handle = __le16_to_cpu(hdr->handle);
+
+	BT_DBG("%s len %d handle 0x%x", hdev->name, skb->len, handle);
+
+	hdev->stat.sco_rx++;
+
+	hci_dev_lock(hdev);
+	conn = hci_conn_hash_lookup_handle(hdev, handle);
+	hci_dev_unlock(hdev);
+
+	if (conn) {
+		register struct hci_proto *hp;
+
+		/* Send to upper protocol */
+		if ((hp = hci_proto[HCI_PROTO_SCO]) && hp->recv_scodata) {
+			hp->recv_scodata(conn, skb);
+			return;
+		}
+	} else {
+		BT_ERR("%s SCO packet for unknown connection handle %d", 
+			hdev->name, handle);
+	}
+
+	kfree_skb(skb);
+}
+
+void hci_rx_task(unsigned long arg)
+{
+	struct hci_dev *hdev = (struct hci_dev *) arg;
+	struct sk_buff *skb;
+
+	BT_DBG("%s", hdev->name);
+
+	read_lock(&hci_task_lock);
+
+	while ((skb = skb_dequeue(&hdev->rx_q))) {
+		if (atomic_read(&hdev->promisc)) {
+			/* Send copy to the sockets */
+			hci_send_to_sock(hdev, skb);
+		}
+
+		if (test_bit(HCI_RAW, &hdev->flags)) {
+			kfree_skb(skb);
+			continue;
+		}
+
+		if (test_bit(HCI_INIT, &hdev->flags)) {
+			/* Don't process data packets in this states. */
+			switch (skb->pkt_type) {
+			case HCI_ACLDATA_PKT:
+			case HCI_SCODATA_PKT:
+				kfree_skb(skb);
+				continue;
+			};
+		}
+
+		/* Process frame */
+		switch (skb->pkt_type) {
+		case HCI_EVENT_PKT:
+			hci_event_packet(hdev, skb);
+			break;
+
+		case HCI_ACLDATA_PKT:
+			BT_DBG("%s ACL data packet", hdev->name);
+			hci_acldata_packet(hdev, skb);
+			break;
+
+		case HCI_SCODATA_PKT:
+			BT_DBG("%s SCO data packet", hdev->name);
+			hci_scodata_packet(hdev, skb);
+			break;
+
+		default:
+			kfree_skb(skb);
+			break;
+		}
+	}
+
+	read_unlock(&hci_task_lock);
+}
+
+static void hci_cmd_task(unsigned long arg)
+{
+	struct hci_dev *hdev = (struct hci_dev *) arg;
+	struct sk_buff *skb;
+
+	BT_DBG("%s cmd %d", hdev->name, atomic_read(&hdev->cmd_cnt));
+
+	if (!atomic_read(&hdev->cmd_cnt) && (jiffies - hdev->cmd_last_tx) > HZ) {
+		BT_ERR("%s command tx timeout", hdev->name);
+		atomic_set(&hdev->cmd_cnt, 1);
+	}
+
+	/* Send queued commands */
+	if (atomic_read(&hdev->cmd_cnt) && (skb = skb_dequeue(&hdev->cmd_q))) {
+		if (hdev->sent_cmd)
+			kfree_skb(hdev->sent_cmd);
+
+		if ((hdev->sent_cmd = skb_clone(skb, GFP_ATOMIC))) {
+			atomic_dec(&hdev->cmd_cnt);
+			hci_send_frame(skb);
+			hdev->cmd_last_tx = jiffies;
+		} else {
+			skb_queue_head(&hdev->cmd_q, skb);
+			hci_sched_cmd(hdev);
+		}
+	}
+}
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
new file mode 100644
index 00000000000..8ccba8ee997
--- /dev/null
+++ b/net/bluetooth/hci_event.c
@@ -0,0 +1,1044 @@
+/* 
+   BlueZ - Bluetooth protocol stack for Linux
+   Copyright (C) 2000-2001 Qualcomm Incorporated
+
+   Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License version 2 as
+   published by the Free Software Foundation;
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
+   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES 
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, 
+   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS 
+   SOFTWARE IS DISCLAIMED.
+*/
+
+/* Bluetooth HCI event handling. */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/poll.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <net/sock.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/unaligned.h>
+
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
+
+#ifndef CONFIG_BT_HCI_CORE_DEBUG
+#undef  BT_DBG
+#define BT_DBG(D...)
+#endif
+
+/* Handle HCI Event packets */
+
+/* Command Complete OGF LINK_CTL  */
+static void hci_cc_link_ctl(struct hci_dev *hdev, __u16 ocf, struct sk_buff *skb)
+{
+	__u8 status;
+
+	BT_DBG("%s ocf 0x%x", hdev->name, ocf);
+
+	switch (ocf) {
+	case OCF_INQUIRY_CANCEL:
+		status = *((__u8 *) skb->data);
+
+		if (status) {
+			BT_DBG("%s Inquiry cancel error: status 0x%x", hdev->name, status);
+		} else {
+			clear_bit(HCI_INQUIRY, &hdev->flags);
+			hci_req_complete(hdev, status);
+		}
+		break;
+
+	default:
+		BT_DBG("%s Command complete: ogf LINK_CTL ocf %x", hdev->name, ocf);
+		break;
+	}
+}
+
+/* Command Complete OGF LINK_POLICY  */
+static void hci_cc_link_policy(struct hci_dev *hdev, __u16 ocf, struct sk_buff *skb)
+{
+	struct hci_conn *conn;
+	struct hci_rp_role_discovery *rd;
+
+	BT_DBG("%s ocf 0x%x", hdev->name, ocf);
+
+	switch (ocf) {
+	case OCF_ROLE_DISCOVERY: 
+		rd = (void *) skb->data;
+
+		if (rd->status)
+			break;
+
+		hci_dev_lock(hdev);
+
+		conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(rd->handle));
+		if (conn) {
+			if (rd->role)
+				conn->link_mode &= ~HCI_LM_MASTER;
+			else
+				conn->link_mode |= HCI_LM_MASTER;
+		}
+
+		hci_dev_unlock(hdev);
+		break;
+
+	default:
+		BT_DBG("%s: Command complete: ogf LINK_POLICY ocf %x", 
+				hdev->name, ocf);
+		break;
+	}
+}
+
+/* Command Complete OGF HOST_CTL  */
+static void hci_cc_host_ctl(struct hci_dev *hdev, __u16 ocf, struct sk_buff *skb)
+{
+	__u8 status, param;
+	__u16 setting;
+	struct hci_rp_read_voice_setting *vs;
+	void *sent;
+
+	BT_DBG("%s ocf 0x%x", hdev->name, ocf);
+
+	switch (ocf) {
+	case OCF_RESET:
+		status = *((__u8 *) skb->data);
+		hci_req_complete(hdev, status);
+		break;
+
+	case OCF_SET_EVENT_FLT:
+		status = *((__u8 *) skb->data);
+		if (status) {
+			BT_DBG("%s SET_EVENT_FLT failed %d", hdev->name, status);
+		} else {
+			BT_DBG("%s SET_EVENT_FLT succeseful", hdev->name);
+		}
+		break;
+
+	case OCF_WRITE_AUTH_ENABLE:
+		sent = hci_sent_cmd_data(hdev, OGF_HOST_CTL, OCF_WRITE_AUTH_ENABLE);
+		if (!sent)
+			break;
+
+		status = *((__u8 *) skb->data);
+		param  = *((__u8 *) sent);
+
+		if (!status) {
+			if (param == AUTH_ENABLED)
+				set_bit(HCI_AUTH, &hdev->flags);
+			else
+				clear_bit(HCI_AUTH, &hdev->flags);
+		}
+		hci_req_complete(hdev, status);
+		break;
+
+	case OCF_WRITE_ENCRYPT_MODE:
+		sent = hci_sent_cmd_data(hdev, OGF_HOST_CTL, OCF_WRITE_ENCRYPT_MODE);
+		if (!sent)
+			break;
+
+		status = *((__u8 *) skb->data);
+		param  = *((__u8 *) sent);
+
+		if (!status) {
+			if (param)
+				set_bit(HCI_ENCRYPT, &hdev->flags);
+			else
+				clear_bit(HCI_ENCRYPT, &hdev->flags);
+		}
+		hci_req_complete(hdev, status);
+		break;
+
+	case OCF_WRITE_CA_TIMEOUT:
+		status = *((__u8 *) skb->data);
+		if (status) {
+			BT_DBG("%s OCF_WRITE_CA_TIMEOUT failed %d", hdev->name, status);
+		} else {
+			BT_DBG("%s OCF_WRITE_CA_TIMEOUT succeseful", hdev->name);
+		}
+		break;
+
+	case OCF_WRITE_PG_TIMEOUT:
+		status = *((__u8 *) skb->data);
+		if (status) {
+			BT_DBG("%s OCF_WRITE_PG_TIMEOUT failed %d", hdev->name, status);
+		} else {
+			BT_DBG("%s: OCF_WRITE_PG_TIMEOUT succeseful", hdev->name);
+		}
+		break;
+
+	case OCF_WRITE_SCAN_ENABLE:
+		sent = hci_sent_cmd_data(hdev, OGF_HOST_CTL, OCF_WRITE_SCAN_ENABLE);
+		if (!sent)
+			break;
+
+		status = *((__u8 *) skb->data);
+		param  = *((__u8 *) sent);
+
+		BT_DBG("param 0x%x", param);
+
+		if (!status) {
+			clear_bit(HCI_PSCAN, &hdev->flags);
+			clear_bit(HCI_ISCAN, &hdev->flags);
+			if (param & SCAN_INQUIRY) 
+				set_bit(HCI_ISCAN, &hdev->flags);
+
+			if (param & SCAN_PAGE) 
+				set_bit(HCI_PSCAN, &hdev->flags);
+		}
+		hci_req_complete(hdev, status);
+		break;
+
+	case OCF_READ_VOICE_SETTING:
+		vs = (struct hci_rp_read_voice_setting *) skb->data;
+
+		if (vs->status) {
+			BT_DBG("%s READ_VOICE_SETTING failed %d", hdev->name, vs->status);
+			break;
+		}
+
+		setting = __le16_to_cpu(vs->voice_setting);
+
+		if (hdev->voice_setting != setting ) {
+			hdev->voice_setting = setting;
+
+			BT_DBG("%s: voice setting 0x%04x", hdev->name, setting);
+
+			if (hdev->notify) {
+				tasklet_disable(&hdev->tx_task);
+				hdev->notify(hdev, HCI_NOTIFY_VOICE_SETTING);
+				tasklet_enable(&hdev->tx_task);
+			}
+		}
+		break;
+
+	case OCF_WRITE_VOICE_SETTING:
+		sent = hci_sent_cmd_data(hdev, OGF_HOST_CTL, OCF_WRITE_VOICE_SETTING);
+		if (!sent)
+			break;
+
+		status = *((__u8 *) skb->data);
+		setting = __le16_to_cpu(get_unaligned((__u16 *) sent));
+
+		if (!status && hdev->voice_setting != setting) {
+			hdev->voice_setting = setting;
+
+			BT_DBG("%s: voice setting 0x%04x", hdev->name, setting);
+
+			if (hdev->notify) {
+				tasklet_disable(&hdev->tx_task);
+				hdev->notify(hdev, HCI_NOTIFY_VOICE_SETTING);
+				tasklet_enable(&hdev->tx_task);
+			}
+		}
+		hci_req_complete(hdev, status);
+		break;
+
+	case OCF_HOST_BUFFER_SIZE:
+		status = *((__u8 *) skb->data);
+		if (status) {
+			BT_DBG("%s OCF_BUFFER_SIZE failed %d", hdev->name, status);
+			hci_req_complete(hdev, status);
+		}
+		break;
+
+	default:
+		BT_DBG("%s Command complete: ogf HOST_CTL ocf %x", hdev->name, ocf);
+		break;
+	}
+}
+
+/* Command Complete OGF INFO_PARAM  */
+static void hci_cc_info_param(struct hci_dev *hdev, __u16 ocf, struct sk_buff *skb)
+{
+	struct hci_rp_read_loc_features *lf;
+	struct hci_rp_read_buffer_size *bs;
+	struct hci_rp_read_bd_addr *ba;
+
+	BT_DBG("%s ocf 0x%x", hdev->name, ocf);
+
+	switch (ocf) {
+	case OCF_READ_LOCAL_FEATURES:
+		lf = (struct hci_rp_read_loc_features *) skb->data;
+
+		if (lf->status) {
+			BT_DBG("%s READ_LOCAL_FEATURES failed %d", hdev->name, lf->status);
+			break;
+		}
+
+		memcpy(hdev->features, lf->features, sizeof(hdev->features));
+
+		/* Adjust default settings according to features 
+		 * supported by device. */
+		if (hdev->features[0] & LMP_3SLOT)
+			hdev->pkt_type |= (HCI_DM3 | HCI_DH3);
+
+		if (hdev->features[0] & LMP_5SLOT)
+			hdev->pkt_type |= (HCI_DM5 | HCI_DH5);
+
+		if (hdev->features[1] & LMP_HV2)
+			hdev->pkt_type |= (HCI_HV2);
+
+		if (hdev->features[1] & LMP_HV3)
+			hdev->pkt_type |= (HCI_HV3);
+
+		BT_DBG("%s: features 0x%x 0x%x 0x%x", hdev->name, lf->features[0], lf->features[1], lf->features[2]);
+
+		break;
+
+	case OCF_READ_BUFFER_SIZE:
+		bs = (struct hci_rp_read_buffer_size *) skb->data;
+
+		if (bs->status) {
+			BT_DBG("%s READ_BUFFER_SIZE failed %d", hdev->name, bs->status);
+			hci_req_complete(hdev, bs->status);
+			break;
+		}
+
+		hdev->acl_mtu  = __le16_to_cpu(bs->acl_mtu);
+		hdev->sco_mtu  = bs->sco_mtu ? bs->sco_mtu : 64;
+		hdev->acl_pkts = hdev->acl_cnt = __le16_to_cpu(bs->acl_max_pkt);
+		hdev->sco_pkts = hdev->sco_cnt = __le16_to_cpu(bs->sco_max_pkt);
+
+		BT_DBG("%s mtu: acl %d, sco %d max_pkt: acl %d, sco %d", hdev->name,
+			hdev->acl_mtu, hdev->sco_mtu, hdev->acl_pkts, hdev->sco_pkts);
+		break;
+
+	case OCF_READ_BD_ADDR:
+		ba = (struct hci_rp_read_bd_addr *) skb->data;
+
+		if (!ba->status) {
+			bacpy(&hdev->bdaddr, &ba->bdaddr);
+		} else {
+			BT_DBG("%s: READ_BD_ADDR failed %d", hdev->name, ba->status);
+		}
+
+		hci_req_complete(hdev, ba->status);
+		break;
+
+	default:
+		BT_DBG("%s Command complete: ogf INFO_PARAM ocf %x", hdev->name, ocf);
+		break;
+	}
+}
+
+/* Command Status OGF LINK_CTL  */
+static inline void hci_cs_create_conn(struct hci_dev *hdev, __u8 status)
+{
+	struct hci_conn *conn;
+	struct hci_cp_create_conn *cp = hci_sent_cmd_data(hdev, OGF_LINK_CTL, OCF_CREATE_CONN);
+
+	if (!cp)
+		return;
+
+	hci_dev_lock(hdev);
+
+	conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->bdaddr);
+
+	BT_DBG("%s status 0x%x bdaddr %s conn %p", hdev->name,
+			status, batostr(&cp->bdaddr), conn);
+
+	if (status) {
+		if (conn && conn->state == BT_CONNECT) {
+			conn->state = BT_CLOSED;
+			hci_proto_connect_cfm(conn, status);
+			hci_conn_del(conn);
+		}
+	} else {
+		if (!conn) {
+			conn = hci_conn_add(hdev, ACL_LINK, &cp->bdaddr);
+			if (conn) {
+				conn->out = 1;
+				conn->link_mode |= HCI_LM_MASTER;
+			} else
+				BT_ERR("No memmory for new connection");
+		}
+	}
+
+	hci_dev_unlock(hdev);
+}
+
+static void hci_cs_link_ctl(struct hci_dev *hdev, __u16 ocf, __u8 status)
+{
+	BT_DBG("%s ocf 0x%x", hdev->name, ocf);
+
+	switch (ocf) {
+	case OCF_CREATE_CONN:
+		hci_cs_create_conn(hdev, status);
+		break;
+
+	case OCF_ADD_SCO:
+		if (status) {
+			struct hci_conn *acl, *sco;
+			struct hci_cp_add_sco *cp = hci_sent_cmd_data(hdev, OGF_LINK_CTL, OCF_ADD_SCO);
+			__u16 handle;
+
+			if (!cp)
+				break;
+
+			handle = __le16_to_cpu(cp->handle);
+
+			BT_DBG("%s Add SCO error: handle %d status 0x%x", hdev->name, handle, status);
+
+			hci_dev_lock(hdev);
+
+			acl = hci_conn_hash_lookup_handle(hdev, handle);
+			if (acl && (sco = acl->link)) {
+				sco->state = BT_CLOSED;
+
+				hci_proto_connect_cfm(sco, status);
+				hci_conn_del(sco);
+			}
+
+			hci_dev_unlock(hdev);
+		}
+		break;
+
+	case OCF_INQUIRY:
+		if (status) {
+			BT_DBG("%s Inquiry error: status 0x%x", hdev->name, status);
+			hci_req_complete(hdev, status);
+		} else {
+			set_bit(HCI_INQUIRY, &hdev->flags);
+		}
+		break;
+
+	default:
+		BT_DBG("%s Command status: ogf LINK_CTL ocf %x status %d", 
+			hdev->name, ocf, status);
+		break;
+	}
+}
+
+/* Command Status OGF LINK_POLICY */
+static void hci_cs_link_policy(struct hci_dev *hdev, __u16 ocf, __u8 status)
+{
+	BT_DBG("%s ocf 0x%x", hdev->name, ocf);
+
+	switch (ocf) {
+	default:
+		BT_DBG("%s Command status: ogf HOST_POLICY ocf %x", hdev->name, ocf);
+		break;
+	}
+}
+
+/* Command Status OGF HOST_CTL */
+static void hci_cs_host_ctl(struct hci_dev *hdev, __u16 ocf, __u8 status)
+{
+	BT_DBG("%s ocf 0x%x", hdev->name, ocf);
+
+	switch (ocf) {
+	default:
+		BT_DBG("%s Command status: ogf HOST_CTL ocf %x", hdev->name, ocf);
+		break;
+	}
+}
+
+/* Command Status OGF INFO_PARAM  */
+static void hci_cs_info_param(struct hci_dev *hdev, __u16 ocf, __u8 status)
+{
+	BT_DBG("%s: hci_cs_info_param: ocf 0x%x", hdev->name, ocf);
+
+	switch (ocf) {
+	default:
+		BT_DBG("%s Command status: ogf INFO_PARAM ocf %x", hdev->name, ocf);
+		break;
+	}
+}
+
+/* Inquiry Complete */
+static inline void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	__u8 status = *((__u8 *) skb->data);
+
+	BT_DBG("%s status %d", hdev->name, status);
+
+	clear_bit(HCI_INQUIRY, &hdev->flags);
+	hci_req_complete(hdev, status);
+}
+
+/* Inquiry Result */
+static inline void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct inquiry_info *info = (struct inquiry_info *) (skb->data + 1);
+	int num_rsp = *((__u8 *) skb->data);
+
+	BT_DBG("%s num_rsp %d", hdev->name, num_rsp);
+
+	hci_dev_lock(hdev);
+	for (; num_rsp; num_rsp--) {
+		struct inquiry_data data;
+		bacpy(&data.bdaddr, &info->bdaddr);
+		data.pscan_rep_mode	= info->pscan_rep_mode;
+		data.pscan_period_mode	= info->pscan_period_mode;
+		data.pscan_mode		= info->pscan_mode;
+		memcpy(data.dev_class, info->dev_class, 3);
+		data.clock_offset	= info->clock_offset;
+		data.rssi		= 0x00;
+		info++;
+		hci_inquiry_cache_update(hdev, &data);
+	}
+	hci_dev_unlock(hdev);
+}
+
+/* Inquiry Result With RSSI */
+static inline void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct inquiry_info_with_rssi *info = (struct inquiry_info_with_rssi *) (skb->data + 1);
+	int num_rsp = *((__u8 *) skb->data);
+
+	BT_DBG("%s num_rsp %d", hdev->name, num_rsp);
+
+	hci_dev_lock(hdev);
+	for (; num_rsp; num_rsp--) {
+		struct inquiry_data data;
+		bacpy(&data.bdaddr, &info->bdaddr);
+		data.pscan_rep_mode	= info->pscan_rep_mode;
+		data.pscan_period_mode	= info->pscan_period_mode;
+		data.pscan_mode		= 0x00;
+		memcpy(data.dev_class, info->dev_class, 3);
+		data.clock_offset	= info->clock_offset;
+		data.rssi		= info->rssi;
+		info++;
+		hci_inquiry_cache_update(hdev, &data);
+	}
+	hci_dev_unlock(hdev);
+}
+
+/* Connect Request */
+static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_ev_conn_request *ev = (struct hci_ev_conn_request *) skb->data;
+	int mask = hdev->link_mode;
+
+	BT_DBG("%s Connection request: %s type 0x%x", hdev->name,
+			batostr(&ev->bdaddr), ev->link_type);
+
+	mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ev->link_type);
+
+	if (mask & HCI_LM_ACCEPT) {
+		/* Connection accepted */
+		struct hci_conn *conn;
+		struct hci_cp_accept_conn_req cp;
+
+		hci_dev_lock(hdev);
+		conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr);
+		if (!conn) {
+			if (!(conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr))) {
+				BT_ERR("No memmory for new connection");
+				hci_dev_unlock(hdev);
+				return;
+			}
+		}
+		memcpy(conn->dev_class, ev->dev_class, 3);
+		conn->state = BT_CONNECT;
+		hci_dev_unlock(hdev);
+
+		bacpy(&cp.bdaddr, &ev->bdaddr);
+
+		if (lmp_rswitch_capable(hdev) && (mask & HCI_LM_MASTER))
+			cp.role = 0x00; /* Become master */
+		else
+			cp.role = 0x01; /* Remain slave */
+
+		hci_send_cmd(hdev, OGF_LINK_CTL, OCF_ACCEPT_CONN_REQ, sizeof(cp), &cp);
+	} else {
+		/* Connection rejected */
+		struct hci_cp_reject_conn_req cp;
+
+		bacpy(&cp.bdaddr, &ev->bdaddr);
+		cp.reason = 0x0f;
+		hci_send_cmd(hdev, OGF_LINK_CTL, OCF_REJECT_CONN_REQ, sizeof(cp), &cp);
+	}
+}
+
+/* Connect Complete */
+static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_ev_conn_complete *ev = (struct hci_ev_conn_complete *) skb->data;
+	struct hci_conn *conn = NULL;
+
+	BT_DBG("%s", hdev->name);
+
+	hci_dev_lock(hdev);
+
+	conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr);
+	if (!conn) {
+		hci_dev_unlock(hdev);
+		return;
+	}
+
+	if (!ev->status) {
+		conn->handle = __le16_to_cpu(ev->handle);
+		conn->state  = BT_CONNECTED;
+
+		if (test_bit(HCI_AUTH, &hdev->flags))
+			conn->link_mode |= HCI_LM_AUTH;
+
+		if (test_bit(HCI_ENCRYPT, &hdev->flags))
+			conn->link_mode |= HCI_LM_ENCRYPT;
+
+		/* Set link policy */
+		if (conn->type == ACL_LINK && hdev->link_policy) {
+			struct hci_cp_write_link_policy cp;
+			cp.handle = ev->handle;
+			cp.policy = __cpu_to_le16(hdev->link_policy);
+			hci_send_cmd(hdev, OGF_LINK_POLICY, OCF_WRITE_LINK_POLICY, sizeof(cp), &cp);
+		}
+
+		/* Set packet type for incoming connection */
+		if (!conn->out) {
+			struct hci_cp_change_conn_ptype cp;
+			cp.handle = ev->handle;
+			cp.pkt_type = (conn->type == ACL_LINK) ? 
+				__cpu_to_le16(hdev->pkt_type & ACL_PTYPE_MASK):
+				__cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK);
+
+			hci_send_cmd(hdev, OGF_LINK_CTL, OCF_CHANGE_CONN_PTYPE, sizeof(cp), &cp);
+		}
+	} else
+		conn->state = BT_CLOSED;
+
+	if (conn->type == ACL_LINK) {
+		struct hci_conn *sco = conn->link;
+		if (sco) {
+			if (!ev->status)
+				hci_add_sco(sco, conn->handle);
+			else {
+				hci_proto_connect_cfm(sco, ev->status);
+				hci_conn_del(sco);
+			}
+		}
+	}
+
+	hci_proto_connect_cfm(conn, ev->status);
+	if (ev->status)
+		hci_conn_del(conn);
+
+	hci_dev_unlock(hdev);
+}
+
+/* Disconnect Complete */
+static inline void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_ev_disconn_complete *ev = (struct hci_ev_disconn_complete *) skb->data;
+	struct hci_conn *conn = NULL;
+	__u16 handle = __le16_to_cpu(ev->handle);
+
+	BT_DBG("%s status %d", hdev->name, ev->status);
+
+	if (ev->status)
+		return;
+
+	hci_dev_lock(hdev);
+
+	conn = hci_conn_hash_lookup_handle(hdev, handle);
+	if (conn) {
+		conn->state = BT_CLOSED;
+		hci_proto_disconn_ind(conn, ev->reason);
+		hci_conn_del(conn);
+	}
+
+	hci_dev_unlock(hdev);
+}
+
+/* Number of completed packets */
+static inline void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_ev_num_comp_pkts *ev = (struct hci_ev_num_comp_pkts *) skb->data;
+	__u16 *ptr;
+	int i;
+
+	skb_pull(skb, sizeof(*ev));
+
+	BT_DBG("%s num_hndl %d", hdev->name, ev->num_hndl);
+
+	if (skb->len < ev->num_hndl * 4) {
+		BT_DBG("%s bad parameters", hdev->name);
+		return;
+	}
+
+	tasklet_disable(&hdev->tx_task);
+
+	for (i = 0, ptr = (__u16 *) skb->data; i < ev->num_hndl; i++) {
+		struct hci_conn *conn;
+		__u16  handle, count;
+
+		handle = __le16_to_cpu(get_unaligned(ptr++));
+		count  = __le16_to_cpu(get_unaligned(ptr++));
+
+		conn = hci_conn_hash_lookup_handle(hdev, handle);
+		if (conn) {
+			conn->sent -= count;
+
+			if (conn->type == SCO_LINK) {
+				if ((hdev->sco_cnt += count) > hdev->sco_pkts)
+					hdev->sco_cnt = hdev->sco_pkts;
+			} else {
+				if ((hdev->acl_cnt += count) > hdev->acl_pkts)
+					hdev->acl_cnt = hdev->acl_pkts;
+			}
+		}
+	}
+	hci_sched_tx(hdev);
+
+	tasklet_enable(&hdev->tx_task);
+}
+
+/* Role Change */
+static inline void hci_role_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_ev_role_change *ev = (struct hci_ev_role_change *) skb->data;
+	struct hci_conn *conn = NULL;
+
+	BT_DBG("%s status %d", hdev->name, ev->status);
+
+	hci_dev_lock(hdev);
+
+	conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
+	if (conn) {
+		if (!ev->status) {
+			if (ev->role)
+				conn->link_mode &= ~HCI_LM_MASTER;
+			else
+				conn->link_mode |= HCI_LM_MASTER;
+		}
+
+		clear_bit(HCI_CONN_RSWITCH_PEND, &conn->pend);
+
+		hci_role_switch_cfm(conn, ev->status, ev->role);
+	}
+
+	hci_dev_unlock(hdev);
+}
+
+/* Authentication Complete */
+static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_ev_auth_complete *ev = (struct hci_ev_auth_complete *) skb->data;
+	struct hci_conn *conn = NULL;
+	__u16 handle = __le16_to_cpu(ev->handle);
+
+	BT_DBG("%s status %d", hdev->name, ev->status);
+
+	hci_dev_lock(hdev);
+
+	conn = hci_conn_hash_lookup_handle(hdev, handle);
+	if (conn) {
+		if (!ev->status)
+			conn->link_mode |= HCI_LM_AUTH;
+
+		clear_bit(HCI_CONN_AUTH_PEND, &conn->pend);
+
+		hci_auth_cfm(conn, ev->status);
+
+		if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend)) {
+			if (!ev->status) {
+				struct hci_cp_set_conn_encrypt cp;
+				cp.handle  = __cpu_to_le16(conn->handle);
+				cp.encrypt = 1;
+				hci_send_cmd(conn->hdev, OGF_LINK_CTL,
+						OCF_SET_CONN_ENCRYPT,
+						sizeof(cp), &cp);
+			} else {
+				clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend);
+				hci_encrypt_cfm(conn, ev->status, 0x00);
+			}
+		}
+	}
+
+	hci_dev_unlock(hdev);
+}
+
+/* Encryption Change */
+static inline void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_ev_encrypt_change *ev = (struct hci_ev_encrypt_change *) skb->data;
+	struct hci_conn *conn = NULL;
+	__u16 handle = __le16_to_cpu(ev->handle);
+
+	BT_DBG("%s status %d", hdev->name, ev->status);
+
+	hci_dev_lock(hdev);
+
+	conn = hci_conn_hash_lookup_handle(hdev, handle);
+	if (conn) {
+		if (!ev->status) {
+			if (ev->encrypt)
+				conn->link_mode |= HCI_LM_ENCRYPT;
+			else
+				conn->link_mode &= ~HCI_LM_ENCRYPT;
+		}
+
+		clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend);
+
+		hci_encrypt_cfm(conn, ev->status, ev->encrypt);
+	}
+
+	hci_dev_unlock(hdev);
+}
+
+/* Change Connection Link Key Complete */
+static inline void hci_change_conn_link_key_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_ev_change_conn_link_key_complete *ev = (struct hci_ev_change_conn_link_key_complete *) skb->data;
+	struct hci_conn *conn = NULL;
+	__u16 handle = __le16_to_cpu(ev->handle);
+
+	BT_DBG("%s status %d", hdev->name, ev->status);
+
+	hci_dev_lock(hdev);
+
+	conn = hci_conn_hash_lookup_handle(hdev, handle);
+	if (conn) {
+		if (!ev->status)
+			conn->link_mode |= HCI_LM_SECURE;
+
+		clear_bit(HCI_CONN_AUTH_PEND, &conn->pend);
+
+		hci_key_change_cfm(conn, ev->status);
+	}
+
+	hci_dev_unlock(hdev);
+}
+
+/* Pin Code Request*/
+static inline void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+}
+
+/* Link Key Request */
+static inline void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+}
+
+/* Link Key Notification */
+static inline void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+}
+
+/* Clock Offset */
+static inline void hci_clock_offset_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_ev_clock_offset *ev = (struct hci_ev_clock_offset *) skb->data;
+	struct hci_conn *conn = NULL;
+	__u16 handle = __le16_to_cpu(ev->handle);
+
+	BT_DBG("%s status %d", hdev->name, ev->status);
+
+	hci_dev_lock(hdev);
+
+	conn = hci_conn_hash_lookup_handle(hdev, handle);
+	if (conn && !ev->status) {
+		struct inquiry_entry *ie;
+
+		if ((ie = hci_inquiry_cache_lookup(hdev, &conn->dst))) {
+			ie->data.clock_offset = ev->clock_offset;
+			ie->timestamp = jiffies;
+		}
+	}
+
+	hci_dev_unlock(hdev);
+}
+
+void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_event_hdr *hdr = (struct hci_event_hdr *) skb->data;
+	struct hci_ev_cmd_complete *ec;
+	struct hci_ev_cmd_status *cs;
+	u16 opcode, ocf, ogf;
+
+	skb_pull(skb, HCI_EVENT_HDR_SIZE);
+
+	BT_DBG("%s evt 0x%x", hdev->name, hdr->evt);
+
+	switch (hdr->evt) {
+	case HCI_EV_NUM_COMP_PKTS:
+		hci_num_comp_pkts_evt(hdev, skb);
+		break;
+
+	case HCI_EV_INQUIRY_COMPLETE:
+		hci_inquiry_complete_evt(hdev, skb);
+		break;
+
+	case HCI_EV_INQUIRY_RESULT:
+		hci_inquiry_result_evt(hdev, skb);
+		break;
+
+	case HCI_EV_INQUIRY_RESULT_WITH_RSSI:
+		hci_inquiry_result_with_rssi_evt(hdev, skb);
+		break;
+
+	case HCI_EV_CONN_REQUEST:
+		hci_conn_request_evt(hdev, skb);
+		break;
+
+	case HCI_EV_CONN_COMPLETE:
+		hci_conn_complete_evt(hdev, skb);
+		break;
+
+	case HCI_EV_DISCONN_COMPLETE:
+		hci_disconn_complete_evt(hdev, skb);
+		break;
+
+	case HCI_EV_ROLE_CHANGE:
+		hci_role_change_evt(hdev, skb);
+		break;
+
+	case HCI_EV_AUTH_COMPLETE:
+		hci_auth_complete_evt(hdev, skb);
+		break;
+
+	case HCI_EV_ENCRYPT_CHANGE:
+		hci_encrypt_change_evt(hdev, skb);
+		break;
+
+	case HCI_EV_CHANGE_CONN_LINK_KEY_COMPLETE:
+		hci_change_conn_link_key_complete_evt(hdev, skb);
+		break;
+
+	case HCI_EV_PIN_CODE_REQ:
+		hci_pin_code_request_evt(hdev, skb);
+		break;
+
+	case HCI_EV_LINK_KEY_REQ:
+		hci_link_key_request_evt(hdev, skb);
+		break;
+
+	case HCI_EV_LINK_KEY_NOTIFY:
+		hci_link_key_notify_evt(hdev, skb);
+		break;
+
+	case HCI_EV_CLOCK_OFFSET:
+		hci_clock_offset_evt(hdev, skb);
+		break;
+
+	case HCI_EV_CMD_STATUS:
+		cs = (struct hci_ev_cmd_status *) skb->data;
+		skb_pull(skb, sizeof(cs));
+
+		opcode = __le16_to_cpu(cs->opcode);
+		ogf = hci_opcode_ogf(opcode);
+		ocf = hci_opcode_ocf(opcode);
+
+		switch (ogf) {
+		case OGF_INFO_PARAM:
+			hci_cs_info_param(hdev, ocf, cs->status);
+			break;
+
+		case OGF_HOST_CTL:
+			hci_cs_host_ctl(hdev, ocf, cs->status);
+			break;
+
+		case OGF_LINK_CTL:
+			hci_cs_link_ctl(hdev, ocf, cs->status);
+			break;
+
+		case OGF_LINK_POLICY:
+			hci_cs_link_policy(hdev, ocf, cs->status);
+			break;
+
+		default:
+			BT_DBG("%s Command Status OGF %x", hdev->name, ogf);
+			break;
+		}
+
+		if (cs->ncmd) {
+			atomic_set(&hdev->cmd_cnt, 1);
+			if (!skb_queue_empty(&hdev->cmd_q))
+				hci_sched_cmd(hdev);
+		}
+		break;
+
+	case HCI_EV_CMD_COMPLETE:
+		ec = (struct hci_ev_cmd_complete *) skb->data;
+		skb_pull(skb, sizeof(*ec));
+
+		opcode = __le16_to_cpu(ec->opcode);
+		ogf = hci_opcode_ogf(opcode);
+		ocf = hci_opcode_ocf(opcode);
+
+		switch (ogf) {
+		case OGF_INFO_PARAM:
+			hci_cc_info_param(hdev, ocf, skb);
+			break;
+
+		case OGF_HOST_CTL:
+			hci_cc_host_ctl(hdev, ocf, skb);
+			break;
+
+		case OGF_LINK_CTL:
+			hci_cc_link_ctl(hdev, ocf, skb);
+			break;
+
+		case OGF_LINK_POLICY:
+			hci_cc_link_policy(hdev, ocf, skb);
+			break;
+
+		default:
+			BT_DBG("%s Command Completed OGF %x", hdev->name, ogf);
+			break;
+		}
+
+		if (ec->ncmd) {
+			atomic_set(&hdev->cmd_cnt, 1);
+			if (!skb_queue_empty(&hdev->cmd_q))
+				hci_sched_cmd(hdev);
+		}
+		break;
+	}
+
+	kfree_skb(skb);
+	hdev->stat.evt_rx++;
+}
+
+/* Generate internal stack event */
+void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data)
+{
+	struct hci_event_hdr *hdr;
+	struct hci_ev_stack_internal *ev;
+	struct sk_buff *skb;
+
+	skb = bt_skb_alloc(HCI_EVENT_HDR_SIZE + sizeof(*ev) + dlen, GFP_ATOMIC);
+	if (!skb)
+		return;
+
+	hdr = (void *) skb_put(skb, HCI_EVENT_HDR_SIZE);
+	hdr->evt  = HCI_EV_STACK_INTERNAL;
+	hdr->plen = sizeof(*ev) + dlen;
+
+	ev  = (void *) skb_put(skb, sizeof(*ev) + dlen);
+	ev->type = type;
+	memcpy(ev->data, data, dlen);
+
+	skb->pkt_type = HCI_EVENT_PKT;
+	skb->dev = (void *) hdev;
+	hci_send_to_sock(hdev, skb);
+	kfree_skb(skb);
+}
+EXPORT_SYMBOL(hci_si_event);
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
new file mode 100644
index 00000000000..c9792ba7512
--- /dev/null
+++ b/net/bluetooth/hci_sock.c
@@ -0,0 +1,707 @@
+/* 
+   BlueZ - Bluetooth protocol stack for Linux
+   Copyright (C) 2000-2001 Qualcomm Incorporated
+
+   Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License version 2 as
+   published by the Free Software Foundation;
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
+   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES 
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, 
+   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS 
+   SOFTWARE IS DISCLAIMED.
+*/
+
+/* Bluetooth HCI sockets. */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/poll.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <linux/socket.h>
+#include <linux/ioctl.h>
+#include <net/sock.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/unaligned.h>
+
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
+
+#ifndef CONFIG_BT_HCI_SOCK_DEBUG
+#undef  BT_DBG
+#define BT_DBG(D...)
+#endif
+
+/* ----- HCI socket interface ----- */
+
+static inline int hci_test_bit(int nr, void *addr)
+{
+	return *((__u32 *) addr + (nr >> 5)) & ((__u32) 1 << (nr & 31));
+}
+
+/* Security filter */
+static struct hci_sec_filter hci_sec_filter = {
+	/* Packet types */
+	0x10,
+	/* Events */
+	{ 0x1000d9fe, 0x0000300c },
+	/* Commands */
+	{
+		{ 0x0 },
+		/* OGF_LINK_CTL */
+		{ 0xbe000006, 0x00000001, 0x0000, 0x00 },
+		/* OGF_LINK_POLICY */
+		{ 0x00005200, 0x00000000, 0x0000, 0x00 },
+		/* OGF_HOST_CTL */
+		{ 0xaab00200, 0x2b402aaa, 0x0154, 0x00 },
+		/* OGF_INFO_PARAM */
+		{ 0x000002be, 0x00000000, 0x0000, 0x00 },
+		/* OGF_STATUS_PARAM */
+		{ 0x000000ea, 0x00000000, 0x0000, 0x00 }
+	}
+};
+
+static struct bt_sock_list hci_sk_list = {
+	.lock = RW_LOCK_UNLOCKED
+};
+
+/* Send frame to RAW socket */
+void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct sock *sk;
+	struct hlist_node *node;
+
+	BT_DBG("hdev %p len %d", hdev, skb->len);
+
+	read_lock(&hci_sk_list.lock);
+	sk_for_each(sk, node, &hci_sk_list.head) {
+		struct hci_filter *flt;
+		struct sk_buff *nskb;
+
+		if (sk->sk_state != BT_BOUND || hci_pi(sk)->hdev != hdev)
+			continue;
+
+		/* Don't send frame to the socket it came from */
+		if (skb->sk == sk)
+			continue;
+
+		/* Apply filter */
+		flt = &hci_pi(sk)->filter;
+
+		if (!test_bit((skb->pkt_type == HCI_VENDOR_PKT) ?
+				0 : (skb->pkt_type & HCI_FLT_TYPE_BITS), &flt->type_mask))
+			continue;
+
+		if (skb->pkt_type == HCI_EVENT_PKT) {
+			register int evt = (*(__u8 *)skb->data & HCI_FLT_EVENT_BITS);
+
+			if (!hci_test_bit(evt, &flt->event_mask))
+				continue;
+
+			if (flt->opcode && ((evt == HCI_EV_CMD_COMPLETE && 
+					flt->opcode != *(__u16 *)(skb->data + 3)) ||
+					(evt == HCI_EV_CMD_STATUS && 
+					flt->opcode != *(__u16 *)(skb->data + 4))))
+				continue;
+		}
+
+		if (!(nskb = skb_clone(skb, GFP_ATOMIC)))
+			continue;
+
+		/* Put type byte before the data */
+		memcpy(skb_push(nskb, 1), &nskb->pkt_type, 1);
+
+		if (sock_queue_rcv_skb(sk, nskb))
+			kfree_skb(nskb);
+	}
+	read_unlock(&hci_sk_list.lock);
+}
+
+static int hci_sock_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	struct hci_dev *hdev = hci_pi(sk)->hdev;
+
+	BT_DBG("sock %p sk %p", sock, sk);
+
+	if (!sk)
+		return 0;
+
+	bt_sock_unlink(&hci_sk_list, sk);
+
+	if (hdev) {
+		atomic_dec(&hdev->promisc);
+		hci_dev_put(hdev);
+	}
+
+	sock_orphan(sk);
+
+	skb_queue_purge(&sk->sk_receive_queue);
+	skb_queue_purge(&sk->sk_write_queue);
+
+	sock_put(sk);
+	return 0;
+}
+
+/* Ioctls that require bound socket */ 
+static inline int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg)
+{
+	struct hci_dev *hdev = hci_pi(sk)->hdev;
+
+	if (!hdev)
+		return -EBADFD;
+
+	switch (cmd) {
+	case HCISETRAW:
+		if (!capable(CAP_NET_ADMIN))
+			return -EACCES;
+
+		if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
+			return -EPERM;
+
+		if (arg)
+			set_bit(HCI_RAW, &hdev->flags);
+		else
+			clear_bit(HCI_RAW, &hdev->flags);
+
+		return 0;
+
+	case HCISETSECMGR:
+		if (!capable(CAP_NET_ADMIN))
+			return -EACCES;
+
+		if (arg)
+			set_bit(HCI_SECMGR, &hdev->flags);
+		else
+			clear_bit(HCI_SECMGR, &hdev->flags);
+
+		return 0;
+
+	case HCIGETCONNINFO:
+		return hci_get_conn_info(hdev, (void __user *)arg);
+
+	default:
+		if (hdev->ioctl)
+			return hdev->ioctl(hdev, cmd, arg);
+		return -EINVAL;
+	}
+}
+
+static int hci_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	struct sock *sk = sock->sk;
+	void __user *argp = (void __user *)arg;
+	int err;
+
+	BT_DBG("cmd %x arg %lx", cmd, arg);
+
+	switch (cmd) {
+	case HCIGETDEVLIST:
+		return hci_get_dev_list(argp);
+
+	case HCIGETDEVINFO:
+		return hci_get_dev_info(argp);
+
+	case HCIGETCONNLIST:
+		return hci_get_conn_list(argp);
+
+	case HCIDEVUP:
+		if (!capable(CAP_NET_ADMIN))
+			return -EACCES;
+		return hci_dev_open(arg);
+
+	case HCIDEVDOWN:
+		if (!capable(CAP_NET_ADMIN))
+			return -EACCES;
+		return hci_dev_close(arg);
+
+	case HCIDEVRESET:
+		if (!capable(CAP_NET_ADMIN))
+			return -EACCES;
+		return hci_dev_reset(arg);
+
+	case HCIDEVRESTAT:
+		if (!capable(CAP_NET_ADMIN))
+			return -EACCES;
+		return hci_dev_reset_stat(arg);
+
+	case HCISETSCAN:
+	case HCISETAUTH:
+	case HCISETENCRYPT:
+	case HCISETPTYPE:
+	case HCISETLINKPOL:
+	case HCISETLINKMODE:
+	case HCISETACLMTU:
+	case HCISETSCOMTU:
+		if (!capable(CAP_NET_ADMIN))
+			return -EACCES;
+		return hci_dev_cmd(cmd, argp);
+
+	case HCIINQUIRY:
+		return hci_inquiry(argp);
+
+	default:
+		lock_sock(sk);
+		err = hci_sock_bound_ioctl(sk, cmd, arg);
+		release_sock(sk);
+		return err;
+	}
+}
+
+static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
+{
+	struct sockaddr_hci *haddr = (struct sockaddr_hci *) addr;
+	struct sock *sk = sock->sk;
+	struct hci_dev *hdev = NULL;
+	int err = 0;
+
+	BT_DBG("sock %p sk %p", sock, sk);
+
+	if (!haddr || haddr->hci_family != AF_BLUETOOTH)
+		return -EINVAL;
+
+	lock_sock(sk);
+
+	if (hci_pi(sk)->hdev) {
+		err = -EALREADY;
+		goto done;
+	}
+
+	if (haddr->hci_dev != HCI_DEV_NONE) {
+		if (!(hdev = hci_dev_get(haddr->hci_dev))) {
+			err = -ENODEV;
+			goto done;
+		}
+
+		atomic_inc(&hdev->promisc);
+	}
+
+	hci_pi(sk)->hdev = hdev;
+	sk->sk_state = BT_BOUND;
+
+done:
+	release_sock(sk);
+	return err;
+}
+
+static int hci_sock_getname(struct socket *sock, struct sockaddr *addr, int *addr_len, int peer)
+{
+	struct sockaddr_hci *haddr = (struct sockaddr_hci *) addr;
+	struct sock *sk = sock->sk;
+
+	BT_DBG("sock %p sk %p", sock, sk);
+
+	lock_sock(sk);
+
+	*addr_len = sizeof(*haddr);
+	haddr->hci_family = AF_BLUETOOTH;
+	haddr->hci_dev    = hci_pi(sk)->hdev->id;
+
+	release_sock(sk);
+	return 0;
+}
+
+static inline void hci_sock_cmsg(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
+{
+	__u32 mask = hci_pi(sk)->cmsg_mask;
+
+	if (mask & HCI_CMSG_DIR)
+		put_cmsg(msg, SOL_HCI, HCI_CMSG_DIR, sizeof(int), &bt_cb(skb)->incoming);
+
+	if (mask & HCI_CMSG_TSTAMP)
+		put_cmsg(msg, SOL_HCI, HCI_CMSG_TSTAMP, sizeof(skb->stamp), &skb->stamp);
+}
+ 
+static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock, 
+				struct msghdr *msg, size_t len, int flags)
+{
+	int noblock = flags & MSG_DONTWAIT;
+	struct sock *sk = sock->sk;
+	struct sk_buff *skb;
+	int copied, err;
+
+	BT_DBG("sock %p, sk %p", sock, sk);
+
+	if (flags & (MSG_OOB))
+		return -EOPNOTSUPP;
+
+	if (sk->sk_state == BT_CLOSED)
+		return 0;
+
+	if (!(skb = skb_recv_datagram(sk, flags, noblock, &err)))
+		return err;
+
+	msg->msg_namelen = 0;
+
+	copied = skb->len;
+	if (len < copied) {
+		msg->msg_flags |= MSG_TRUNC;
+		copied = len;
+	}
+
+	skb->h.raw = skb->data;
+	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+
+	hci_sock_cmsg(sk, msg, skb);
+
+	skb_free_datagram(sk, skb);
+
+	return err ? : copied;
+}
+
+static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, 
+			    struct msghdr *msg, size_t len)
+{
+	struct sock *sk = sock->sk;
+	struct hci_dev *hdev;
+	struct sk_buff *skb;
+	int err;
+
+	BT_DBG("sock %p sk %p", sock, sk);
+
+	if (msg->msg_flags & MSG_OOB)
+		return -EOPNOTSUPP;
+
+	if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_NOSIGNAL|MSG_ERRQUEUE))
+		return -EINVAL;
+
+	if (len < 4 || len > HCI_MAX_FRAME_SIZE)
+		return -EINVAL;
+
+	lock_sock(sk);
+
+	if (!(hdev = hci_pi(sk)->hdev)) {
+		err = -EBADFD;
+		goto done;
+	}
+
+	if (!(skb = bt_skb_send_alloc(sk, len, msg->msg_flags & MSG_DONTWAIT, &err)))
+		goto done;
+
+	if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
+		err = -EFAULT;
+		goto drop;
+	}
+
+	skb->pkt_type = *((unsigned char *) skb->data);
+	skb_pull(skb, 1);
+	skb->dev = (void *) hdev;
+
+	if (skb->pkt_type == HCI_COMMAND_PKT) {
+		u16 opcode = __le16_to_cpu(get_unaligned((u16 *)skb->data));
+		u16 ogf = hci_opcode_ogf(opcode);
+		u16 ocf = hci_opcode_ocf(opcode);
+
+		if (((ogf > HCI_SFLT_MAX_OGF) ||
+				!hci_test_bit(ocf & HCI_FLT_OCF_BITS, &hci_sec_filter.ocf_mask[ogf])) &&
+					!capable(CAP_NET_RAW)) {
+			err = -EPERM;
+			goto drop;
+		}
+
+		if (test_bit(HCI_RAW, &hdev->flags) || (ogf == OGF_VENDOR_CMD)) {
+			skb_queue_tail(&hdev->raw_q, skb);
+			hci_sched_tx(hdev);
+		} else {
+			skb_queue_tail(&hdev->cmd_q, skb);
+			hci_sched_cmd(hdev);
+		}
+	} else {
+		if (!capable(CAP_NET_RAW)) {
+			err = -EPERM;
+			goto drop;
+		}
+
+		skb_queue_tail(&hdev->raw_q, skb);
+		hci_sched_tx(hdev);
+	}
+
+	err = len;
+
+done:
+	release_sock(sk);
+	return err;
+
+drop:
+	kfree_skb(skb);
+	goto done;
+}
+
+static int hci_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int len)
+{
+	struct hci_ufilter uf = { .opcode = 0 };
+	struct sock *sk = sock->sk;
+	int err = 0, opt = 0;
+
+	BT_DBG("sk %p, opt %d", sk, optname);
+
+	lock_sock(sk);
+
+	switch (optname) {
+	case HCI_DATA_DIR:
+		if (get_user(opt, (int __user *)optval)) {
+			err = -EFAULT;
+			break;
+		}
+
+		if (opt)
+			hci_pi(sk)->cmsg_mask |= HCI_CMSG_DIR;
+		else
+			hci_pi(sk)->cmsg_mask &= ~HCI_CMSG_DIR;
+		break;
+
+	case HCI_TIME_STAMP:
+		if (get_user(opt, (int __user *)optval)) {
+			err = -EFAULT;
+			break;
+		}
+
+		if (opt)
+			hci_pi(sk)->cmsg_mask |= HCI_CMSG_TSTAMP;
+		else
+			hci_pi(sk)->cmsg_mask &= ~HCI_CMSG_TSTAMP;
+		break;
+
+	case HCI_FILTER:
+		len = min_t(unsigned int, len, sizeof(uf));
+		if (copy_from_user(&uf, optval, len)) {
+			err = -EFAULT;
+			break;
+		}
+
+		if (!capable(CAP_NET_RAW)) {
+			uf.type_mask &= hci_sec_filter.type_mask;
+			uf.event_mask[0] &= *((u32 *) hci_sec_filter.event_mask + 0);
+			uf.event_mask[1] &= *((u32 *) hci_sec_filter.event_mask + 1);
+		}
+
+		{
+			struct hci_filter *f = &hci_pi(sk)->filter;
+
+			f->type_mask = uf.type_mask;
+			f->opcode    = uf.opcode;
+			*((u32 *) f->event_mask + 0) = uf.event_mask[0];
+			*((u32 *) f->event_mask + 1) = uf.event_mask[1];
+		}
+		break; 
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	release_sock(sk);
+	return err;
+}
+
+static int hci_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen)
+{
+	struct hci_ufilter uf;
+	struct sock *sk = sock->sk;
+	int len, opt; 
+
+	if (get_user(len, optlen))
+		return -EFAULT;
+
+	switch (optname) {
+	case HCI_DATA_DIR:
+		if (hci_pi(sk)->cmsg_mask & HCI_CMSG_DIR)
+			opt = 1;
+		else 
+			opt = 0;
+
+		if (put_user(opt, optval))
+			return -EFAULT;
+		break;
+
+	case HCI_TIME_STAMP:
+		if (hci_pi(sk)->cmsg_mask & HCI_CMSG_TSTAMP)
+			opt = 1;
+		else 
+			opt = 0;
+
+		if (put_user(opt, optval))
+			return -EFAULT;
+		break;
+
+	case HCI_FILTER:
+		{
+			struct hci_filter *f = &hci_pi(sk)->filter;
+
+			uf.type_mask = f->type_mask;
+			uf.opcode    = f->opcode;
+			uf.event_mask[0] = *((u32 *) f->event_mask + 0);
+			uf.event_mask[1] = *((u32 *) f->event_mask + 1);
+		}
+
+		len = min_t(unsigned int, len, sizeof(uf));
+		if (copy_to_user(optval, &uf, len))
+			return -EFAULT;
+		break;
+
+	default:
+		return -ENOPROTOOPT;
+		break;
+	}
+
+	return 0;
+}
+
+static struct proto_ops hci_sock_ops = {
+	.family		= PF_BLUETOOTH,
+	.owner		= THIS_MODULE,
+	.release	= hci_sock_release,
+	.bind		= hci_sock_bind,
+	.getname	= hci_sock_getname,
+	.sendmsg	= hci_sock_sendmsg,
+	.recvmsg	= hci_sock_recvmsg,
+	.ioctl		= hci_sock_ioctl,
+	.poll		= datagram_poll,
+	.listen		= sock_no_listen,
+	.shutdown	= sock_no_shutdown,
+	.setsockopt	= hci_sock_setsockopt,
+	.getsockopt	= hci_sock_getsockopt,
+	.connect	= sock_no_connect,
+	.socketpair	= sock_no_socketpair,
+	.accept		= sock_no_accept,
+	.mmap		= sock_no_mmap
+};
+
+static struct proto hci_sk_proto = {
+	.name		= "HCI",
+	.owner		= THIS_MODULE,
+	.obj_size	= sizeof(struct hci_pinfo)
+};
+
+static int hci_sock_create(struct socket *sock, int protocol)
+{
+	struct sock *sk;
+
+	BT_DBG("sock %p", sock);
+
+	if (sock->type != SOCK_RAW)
+		return -ESOCKTNOSUPPORT;
+
+	sock->ops = &hci_sock_ops;
+
+	sk = sk_alloc(PF_BLUETOOTH, GFP_KERNEL, &hci_sk_proto, 1);
+	if (!sk)
+		return -ENOMEM;
+
+	sock_init_data(sock, sk);
+
+	sock_reset_flag(sk, SOCK_ZAPPED);
+
+	sk->sk_protocol = protocol;
+
+	sock->state = SS_UNCONNECTED;
+	sk->sk_state = BT_OPEN;
+
+	bt_sock_link(&hci_sk_list, sk);
+	return 0;
+}
+
+static int hci_sock_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	struct hci_dev *hdev = (struct hci_dev *) ptr;
+	struct hci_ev_si_device ev;
+
+	BT_DBG("hdev %s event %ld", hdev->name, event);
+
+	/* Send event to sockets */
+	ev.event  = event;
+	ev.dev_id = hdev->id;
+	hci_si_event(NULL, HCI_EV_SI_DEVICE, sizeof(ev), &ev);
+
+	if (event == HCI_DEV_UNREG) {
+		struct sock *sk;
+		struct hlist_node *node;
+
+		/* Detach sockets from device */
+		read_lock(&hci_sk_list.lock);
+		sk_for_each(sk, node, &hci_sk_list.head) {
+			bh_lock_sock(sk);
+			if (hci_pi(sk)->hdev == hdev) {
+				hci_pi(sk)->hdev = NULL;
+				sk->sk_err = EPIPE;
+				sk->sk_state = BT_OPEN;
+				sk->sk_state_change(sk);
+
+				hci_dev_put(hdev);
+			}
+			bh_unlock_sock(sk);
+		}
+		read_unlock(&hci_sk_list.lock);
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct net_proto_family hci_sock_family_ops = {
+	.family	= PF_BLUETOOTH,
+	.owner	= THIS_MODULE,
+	.create	= hci_sock_create,
+};
+
+static struct notifier_block hci_sock_nblock = {
+	.notifier_call = hci_sock_dev_event
+};
+
+int __init hci_sock_init(void)
+{
+	int err;
+
+	err = proto_register(&hci_sk_proto, 0);
+	if (err < 0)
+		return err;
+
+	err = bt_sock_register(BTPROTO_HCI, &hci_sock_family_ops);
+	if (err < 0)
+		goto error;
+
+	hci_register_notifier(&hci_sock_nblock);
+
+	BT_INFO("HCI socket layer initialized");
+
+	return 0;
+
+error:
+	BT_ERR("HCI socket registration failed");
+	proto_unregister(&hci_sk_proto);
+	return err;
+}
+
+int __exit hci_sock_cleanup(void)
+{
+	if (bt_sock_unregister(BTPROTO_HCI) < 0)
+		BT_ERR("HCI socket unregistration failed");
+
+	hci_unregister_notifier(&hci_sock_nblock);
+
+	proto_unregister(&hci_sk_proto);
+
+	return 0;
+}
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
new file mode 100644
index 00000000000..7856bc26acc
--- /dev/null
+++ b/net/bluetooth/hci_sysfs.c
@@ -0,0 +1,153 @@
+/* Bluetooth HCI driver model support. */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
+
+#ifndef CONFIG_BT_HCI_CORE_DEBUG
+#undef  BT_DBG
+#define BT_DBG(D...)
+#endif
+
+static ssize_t show_name(struct class_device *cdev, char *buf)
+{
+	struct hci_dev *hdev = class_get_devdata(cdev);
+	return sprintf(buf, "%s\n", hdev->name);
+}
+
+static ssize_t show_type(struct class_device *cdev, char *buf)
+{
+	struct hci_dev *hdev = class_get_devdata(cdev);
+	return sprintf(buf, "%d\n", hdev->type);
+}
+
+static ssize_t show_address(struct class_device *cdev, char *buf)
+{
+	struct hci_dev *hdev = class_get_devdata(cdev);
+	bdaddr_t bdaddr;
+	baswap(&bdaddr, &hdev->bdaddr);
+	return sprintf(buf, "%s\n", batostr(&bdaddr));
+}
+
+static ssize_t show_flags(struct class_device *cdev, char *buf)
+{
+	struct hci_dev *hdev = class_get_devdata(cdev);
+	return sprintf(buf, "0x%lx\n", hdev->flags);
+}
+
+static ssize_t show_inquiry_cache(struct class_device *cdev, char *buf)
+{
+	struct hci_dev *hdev = class_get_devdata(cdev);
+	struct inquiry_cache *cache = &hdev->inq_cache;
+	struct inquiry_entry *e;
+	int n = 0;
+
+	hci_dev_lock_bh(hdev);
+
+	for (e = cache->list; e; e = e->next) {
+		struct inquiry_data *data = &e->data;
+		bdaddr_t bdaddr;
+		baswap(&bdaddr, &data->bdaddr);
+		n += sprintf(buf + n, "%s %d %d %d 0x%.2x%.2x%.2x 0x%.4x %d %u\n",
+				batostr(&bdaddr),
+				data->pscan_rep_mode, data->pscan_period_mode, data->pscan_mode,
+				data->dev_class[2], data->dev_class[1], data->dev_class[0],
+				__le16_to_cpu(data->clock_offset), data->rssi, e->timestamp);
+	}
+
+	hci_dev_unlock_bh(hdev);
+	return n;
+}
+
+static CLASS_DEVICE_ATTR(name, S_IRUGO, show_name, NULL);
+static CLASS_DEVICE_ATTR(type, S_IRUGO, show_type, NULL);
+static CLASS_DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
+static CLASS_DEVICE_ATTR(flags, S_IRUGO, show_flags, NULL);
+static CLASS_DEVICE_ATTR(inquiry_cache, S_IRUGO, show_inquiry_cache, NULL);
+
+static struct class_device_attribute *bt_attrs[] = {
+	&class_device_attr_name,
+	&class_device_attr_type,
+	&class_device_attr_address,
+	&class_device_attr_flags,
+	&class_device_attr_inquiry_cache,
+	NULL
+};
+
+#ifdef CONFIG_HOTPLUG
+static int bt_hotplug(struct class_device *cdev, char **envp, int num_envp, char *buf, int size)
+{
+	struct hci_dev *hdev = class_get_devdata(cdev);
+	int n, i = 0;
+
+	envp[i++] = buf;
+	n = snprintf(buf, size, "INTERFACE=%s", hdev->name) + 1;
+	buf += n;
+	size -= n;
+
+	if ((size <= 0) || (i >= num_envp))
+		return -ENOMEM;
+
+	envp[i] = NULL;
+	return 0;
+}
+#endif
+
+static void bt_release(struct class_device *cdev)
+{
+	struct hci_dev *hdev = class_get_devdata(cdev);
+
+	kfree(hdev);
+}
+
+static struct class bt_class = {
+	.name		= "bluetooth",
+	.release	= bt_release,
+#ifdef CONFIG_HOTPLUG
+	.hotplug	= bt_hotplug,
+#endif
+};
+
+int hci_register_sysfs(struct hci_dev *hdev)
+{
+	struct class_device *cdev = &hdev->class_dev;
+	unsigned int i;
+	int err;
+
+	BT_DBG("%p name %s type %d", hdev, hdev->name, hdev->type);
+
+	cdev->class = &bt_class;
+	class_set_devdata(cdev, hdev);
+
+	strlcpy(cdev->class_id, hdev->name, BUS_ID_SIZE);
+	err = class_device_register(cdev);
+	if (err < 0)
+		return err;
+
+	for (i = 0; bt_attrs[i]; i++)
+		class_device_create_file(cdev, bt_attrs[i]);
+
+	return 0;
+}
+
+void hci_unregister_sysfs(struct hci_dev *hdev)
+{
+	struct class_device * cdev = &hdev->class_dev;
+
+	BT_DBG("%p name %s type %d", hdev, hdev->name, hdev->type);
+
+	class_device_del(cdev);
+}
+
+int __init bt_sysfs_init(void)
+{
+	return class_register(&bt_class);
+}
+
+void __exit bt_sysfs_cleanup(void)
+{
+	class_unregister(&bt_class);
+}
diff --git a/net/bluetooth/hidp/Kconfig b/net/bluetooth/hidp/Kconfig
new file mode 100644
index 00000000000..4e958f7d941
--- /dev/null
+++ b/net/bluetooth/hidp/Kconfig
@@ -0,0 +1,12 @@
+config BT_HIDP
+	tristate "HIDP protocol support"
+	depends on BT && BT_L2CAP
+	select INPUT
+	help
+	  HIDP (Human Interface Device Protocol) is a transport layer
+	  for HID reports.  HIDP is required for the Bluetooth Human
+	  Interface Device Profile.
+
+	  Say Y here to compile HIDP support into the kernel or say M to
+	  compile it as module (hidp).
+
diff --git a/net/bluetooth/hidp/Makefile b/net/bluetooth/hidp/Makefile
new file mode 100644
index 00000000000..a9ee115696a
--- /dev/null
+++ b/net/bluetooth/hidp/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the Linux Bluetooth HIDP layer
+#
+
+obj-$(CONFIG_BT_HIDP) += hidp.o
+
+hidp-objs := core.o sock.o
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
new file mode 100644
index 00000000000..2cf98ceabcc
--- /dev/null
+++ b/net/bluetooth/hidp/core.c
@@ -0,0 +1,772 @@
+/* 
+   HIDP implementation for Linux Bluetooth stack (BlueZ).
+   Copyright (C) 2003-2004 Marcel Holtmann <marcel@holtmann.org>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License version 2 as
+   published by the Free Software Foundation;
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
+   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES 
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, 
+   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS 
+   SOFTWARE IS DISCLAIMED.
+*/
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/poll.h>
+#include <linux/fcntl.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/ioctl.h>
+#include <linux/file.h>
+#include <linux/init.h>
+#include <linux/wait.h>
+#include <net/sock.h>
+
+#include <linux/input.h>
+
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/l2cap.h>
+
+#include "hidp.h"
+
+#ifndef CONFIG_BT_HIDP_DEBUG
+#undef  BT_DBG
+#define BT_DBG(D...)
+#endif
+
+#define VERSION "1.1"
+
+static DECLARE_RWSEM(hidp_session_sem);
+static LIST_HEAD(hidp_session_list);
+
+static unsigned char hidp_keycode[256] = {
+	  0,  0,  0,  0, 30, 48, 46, 32, 18, 33, 34, 35, 23, 36, 37, 38,
+	 50, 49, 24, 25, 16, 19, 31, 20, 22, 47, 17, 45, 21, 44,  2,  3,
+	  4,  5,  6,  7,  8,  9, 10, 11, 28,  1, 14, 15, 57, 12, 13, 26,
+	 27, 43, 43, 39, 40, 41, 51, 52, 53, 58, 59, 60, 61, 62, 63, 64,
+	 65, 66, 67, 68, 87, 88, 99, 70,119,110,102,104,111,107,109,106,
+	105,108,103, 69, 98, 55, 74, 78, 96, 79, 80, 81, 75, 76, 77, 71,
+	 72, 73, 82, 83, 86,127,116,117,183,184,185,186,187,188,189,190,
+	191,192,193,194,134,138,130,132,128,129,131,137,133,135,136,113,
+	115,114,  0,  0,  0,121,  0, 89, 93,124, 92, 94, 95,  0,  0,  0,
+	122,123, 90, 91, 85,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+	 29, 42, 56,125, 97, 54,100,126,164,166,165,163,161,115,114,113,
+	150,158,159,128,136,177,178,176,142,152,173,140
+};
+
+static unsigned char hidp_mkeyspat[] = { 0x01, 0x01, 0x01, 0x01, 0x01, 0x01 };
+
+static struct hidp_session *__hidp_get_session(bdaddr_t *bdaddr)
+{
+	struct hidp_session *session;
+	struct list_head *p;
+
+	BT_DBG("");
+
+	list_for_each(p, &hidp_session_list) {
+		session = list_entry(p, struct hidp_session, list);
+		if (!bacmp(bdaddr, &session->bdaddr))
+			return session;
+	}
+	return NULL;
+}
+
+static void __hidp_link_session(struct hidp_session *session)
+{
+	__module_get(THIS_MODULE);
+	list_add(&session->list, &hidp_session_list);
+}
+
+static void __hidp_unlink_session(struct hidp_session *session)
+{
+	list_del(&session->list);
+	module_put(THIS_MODULE);
+}
+
+static void __hidp_copy_session(struct hidp_session *session, struct hidp_conninfo *ci)
+{
+	bacpy(&ci->bdaddr, &session->bdaddr);
+
+	ci->flags = session->flags;
+	ci->state = session->state;
+
+	ci->vendor  = 0x0000;
+	ci->product = 0x0000;
+	ci->version = 0x0000;
+	memset(ci->name, 0, 128);
+
+	if (session->input) {
+		ci->vendor  = session->input->id.vendor;
+		ci->product = session->input->id.product;
+		ci->version = session->input->id.version;
+		if (session->input->name)
+			strncpy(ci->name, session->input->name, 128);
+		else
+			strncpy(ci->name, "HID Boot Device", 128);
+	}
+}
+
+static int hidp_input_event(struct input_dev *dev, unsigned int type, unsigned int code, int value)
+{
+	struct hidp_session *session = dev->private;
+	struct sk_buff *skb;
+	unsigned char newleds;
+
+	BT_DBG("input %p type %d code %d value %d", dev, type, code, value);
+
+	if (type != EV_LED)
+		return -1;
+
+	newleds = (!!test_bit(LED_KANA,    dev->led) << 3) |
+		  (!!test_bit(LED_COMPOSE, dev->led) << 3) |
+		  (!!test_bit(LED_SCROLLL, dev->led) << 2) |
+		  (!!test_bit(LED_CAPSL,   dev->led) << 1) |
+		  (!!test_bit(LED_NUML,    dev->led));
+
+	if (session->leds == newleds)
+		return 0;
+
+	session->leds = newleds;
+
+	if (!(skb = alloc_skb(3, GFP_ATOMIC))) {
+		BT_ERR("Can't allocate memory for new frame");
+		return -ENOMEM;
+	}
+
+	*skb_put(skb, 1) = HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT;
+	*skb_put(skb, 1) = 0x01;
+	*skb_put(skb, 1) = newleds;
+
+	skb_queue_tail(&session->intr_transmit, skb);
+
+	hidp_schedule(session);
+
+	return 0;
+}
+
+static void hidp_input_report(struct hidp_session *session, struct sk_buff *skb)
+{
+	struct input_dev *dev = session->input;
+	unsigned char *keys = session->keys;
+	unsigned char *udata = skb->data + 1;
+	signed char *sdata = skb->data + 1;
+	int i, size = skb->len - 1;
+
+	switch (skb->data[0]) {
+	case 0x01:	/* Keyboard report */
+		for (i = 0; i < 8; i++)
+			input_report_key(dev, hidp_keycode[i + 224], (udata[0] >> i) & 1);
+
+		/* If all the key codes have been set to 0x01, it means
+		 * too many keys were pressed at the same time. */
+		if (!memcmp(udata + 2, hidp_mkeyspat, 6))
+			break;
+
+		for (i = 2; i < 8; i++) {
+			if (keys[i] > 3 && memscan(udata + 2, keys[i], 6) == udata + 8) {
+				if (hidp_keycode[keys[i]])
+					input_report_key(dev, hidp_keycode[keys[i]], 0);
+				else
+					BT_ERR("Unknown key (scancode %#x) released.", keys[i]);
+			}
+
+			if (udata[i] > 3 && memscan(keys + 2, udata[i], 6) == keys + 8) {
+				if (hidp_keycode[udata[i]])
+					input_report_key(dev, hidp_keycode[udata[i]], 1);
+				else
+					BT_ERR("Unknown key (scancode %#x) pressed.", udata[i]);
+			}
+		}
+
+		memcpy(keys, udata, 8);
+		break;
+
+	case 0x02:	/* Mouse report */
+		input_report_key(dev, BTN_LEFT,   sdata[0] & 0x01);
+		input_report_key(dev, BTN_RIGHT,  sdata[0] & 0x02);
+		input_report_key(dev, BTN_MIDDLE, sdata[0] & 0x04);
+		input_report_key(dev, BTN_SIDE,   sdata[0] & 0x08);
+		input_report_key(dev, BTN_EXTRA,  sdata[0] & 0x10);
+
+		input_report_rel(dev, REL_X, sdata[1]);
+		input_report_rel(dev, REL_Y, sdata[2]);
+
+		if (size > 3)
+			input_report_rel(dev, REL_WHEEL, sdata[3]);
+		break;
+	}
+
+	input_sync(dev);
+}
+
+static void hidp_idle_timeout(unsigned long arg)
+{
+	struct hidp_session *session = (struct hidp_session *) arg;
+
+	atomic_inc(&session->terminate);
+	hidp_schedule(session);
+}
+
+static inline void hidp_set_timer(struct hidp_session *session)
+{
+	if (session->idle_to > 0)
+		mod_timer(&session->timer, jiffies + HZ * session->idle_to);
+}
+
+static inline void hidp_del_timer(struct hidp_session *session)
+{
+	if (session->idle_to > 0)
+		del_timer(&session->timer);
+}
+
+static int __hidp_send_ctrl_message(struct hidp_session *session,
+			unsigned char hdr, unsigned char *data, int size)
+{
+	struct sk_buff *skb;
+
+	BT_DBG("session %p data %p size %d", session, data, size);
+
+	if (!(skb = alloc_skb(size + 1, GFP_ATOMIC))) {
+		BT_ERR("Can't allocate memory for new frame");
+		return -ENOMEM;
+	}
+
+	*skb_put(skb, 1) = hdr;
+	if (data && size > 0)
+		memcpy(skb_put(skb, size), data, size);
+
+	skb_queue_tail(&session->ctrl_transmit, skb);
+
+	return 0;
+}
+
+static int inline hidp_send_ctrl_message(struct hidp_session *session,
+			unsigned char hdr, unsigned char *data, int size)
+{
+	int err;
+
+	err = __hidp_send_ctrl_message(session, hdr, data, size);
+
+	hidp_schedule(session);
+
+	return err;
+}
+
+static inline void hidp_process_handshake(struct hidp_session *session, unsigned char param)
+{
+	BT_DBG("session %p param 0x%02x", session, param);
+
+	switch (param) {
+	case HIDP_HSHK_SUCCESSFUL:
+		/* FIXME: Call into SET_ GET_ handlers here */
+		break;
+
+	case HIDP_HSHK_NOT_READY:
+	case HIDP_HSHK_ERR_INVALID_REPORT_ID:
+	case HIDP_HSHK_ERR_UNSUPPORTED_REQUEST:
+	case HIDP_HSHK_ERR_INVALID_PARAMETER:
+		/* FIXME: Call into SET_ GET_ handlers here */
+		break;
+
+	case HIDP_HSHK_ERR_UNKNOWN:
+		break;
+
+	case HIDP_HSHK_ERR_FATAL:
+		/* Device requests a reboot, as this is the only way this error
+ 		 * can be recovered. */
+		__hidp_send_ctrl_message(session,
+			HIDP_TRANS_HID_CONTROL | HIDP_CTRL_SOFT_RESET, NULL, 0);
+		break;
+
+	default:
+		__hidp_send_ctrl_message(session,
+			HIDP_TRANS_HANDSHAKE | HIDP_HSHK_ERR_INVALID_PARAMETER, NULL, 0);
+		break;
+	}
+}
+
+static inline void hidp_process_hid_control(struct hidp_session *session, unsigned char param)
+{
+	BT_DBG("session %p param 0x%02x", session, param);
+
+	switch (param) {
+	case HIDP_CTRL_NOP:
+		break;
+
+	case HIDP_CTRL_VIRTUAL_CABLE_UNPLUG:
+		/* Flush the transmit queues */
+		skb_queue_purge(&session->ctrl_transmit);
+		skb_queue_purge(&session->intr_transmit);
+
+		/* Kill session thread */
+		atomic_inc(&session->terminate);
+		break;
+
+	case HIDP_CTRL_HARD_RESET:
+	case HIDP_CTRL_SOFT_RESET:
+	case HIDP_CTRL_SUSPEND:
+	case HIDP_CTRL_EXIT_SUSPEND:
+		/* FIXME: We have to parse these and return no error */
+		break;
+
+	default:
+		__hidp_send_ctrl_message(session,
+			HIDP_TRANS_HANDSHAKE | HIDP_HSHK_ERR_INVALID_PARAMETER, NULL, 0);
+		break;
+	}
+}
+
+static inline void hidp_process_data(struct hidp_session *session, struct sk_buff *skb, unsigned char param)
+{
+	BT_DBG("session %p skb %p len %d param 0x%02x", session, skb, skb->len, param);
+
+	switch (param) {
+	case HIDP_DATA_RTYPE_INPUT:
+		hidp_set_timer(session);
+
+		if (session->input)
+			hidp_input_report(session, skb);
+		break;
+
+	case HIDP_DATA_RTYPE_OTHER:
+	case HIDP_DATA_RTYPE_OUPUT:
+	case HIDP_DATA_RTYPE_FEATURE:
+		break;
+
+	default:
+		__hidp_send_ctrl_message(session,
+			HIDP_TRANS_HANDSHAKE | HIDP_HSHK_ERR_INVALID_PARAMETER, NULL, 0);
+	}
+}
+
+static inline void hidp_recv_ctrl_frame(struct hidp_session *session, struct sk_buff *skb)
+{
+	unsigned char hdr, type, param;
+
+	BT_DBG("session %p skb %p len %d", session, skb, skb->len);
+
+	hdr = skb->data[0];
+	skb_pull(skb, 1);
+
+	type = hdr & HIDP_HEADER_TRANS_MASK;
+	param = hdr & HIDP_HEADER_PARAM_MASK;
+
+	switch (type) {
+	case HIDP_TRANS_HANDSHAKE:
+		hidp_process_handshake(session, param);
+		break;
+
+	case HIDP_TRANS_HID_CONTROL:
+		hidp_process_hid_control(session, param);
+		break;
+
+	case HIDP_TRANS_DATA:
+		hidp_process_data(session, skb, param);
+		break;
+
+	default:
+		__hidp_send_ctrl_message(session,
+			HIDP_TRANS_HANDSHAKE | HIDP_HSHK_ERR_UNSUPPORTED_REQUEST, NULL, 0);
+		break;
+	}
+
+	kfree_skb(skb);
+}
+
+static inline void hidp_recv_intr_frame(struct hidp_session *session, struct sk_buff *skb)
+{
+	unsigned char hdr;
+
+	BT_DBG("session %p skb %p len %d", session, skb, skb->len);
+
+	hdr = skb->data[0];
+	skb_pull(skb, 1);
+
+	if (hdr == (HIDP_TRANS_DATA | HIDP_DATA_RTYPE_INPUT)) {
+		hidp_set_timer(session);
+		if (session->input)
+			hidp_input_report(session, skb);
+	} else {
+		BT_DBG("Unsupported protocol header 0x%02x", hdr);
+	}
+
+	kfree_skb(skb);
+}
+
+static int hidp_send_frame(struct socket *sock, unsigned char *data, int len)
+{
+	struct kvec iv = { data, len };
+	struct msghdr msg;
+
+	BT_DBG("sock %p data %p len %d", sock, data, len);
+
+	if (!len)
+		return 0;
+
+	memset(&msg, 0, sizeof(msg));
+
+	return kernel_sendmsg(sock, &msg, &iv, 1, len);
+}
+
+static int hidp_process_transmit(struct hidp_session *session)
+{
+	struct sk_buff *skb;
+
+	BT_DBG("session %p", session);
+
+	while ((skb = skb_dequeue(&session->ctrl_transmit))) {
+		if (hidp_send_frame(session->ctrl_sock, skb->data, skb->len) < 0) {
+			skb_queue_head(&session->ctrl_transmit, skb);
+			break;
+		}
+
+		hidp_set_timer(session);
+		kfree_skb(skb);
+	}
+
+	while ((skb = skb_dequeue(&session->intr_transmit))) {
+		if (hidp_send_frame(session->intr_sock, skb->data, skb->len) < 0) {
+			skb_queue_head(&session->intr_transmit, skb);
+			break;
+		}
+
+		hidp_set_timer(session);
+		kfree_skb(skb);
+	}
+
+	return skb_queue_len(&session->ctrl_transmit) +
+				skb_queue_len(&session->intr_transmit);
+}
+
+static int hidp_session(void *arg)
+{
+	struct hidp_session *session = arg;
+	struct sock *ctrl_sk = session->ctrl_sock->sk;
+	struct sock *intr_sk = session->intr_sock->sk;
+	struct sk_buff *skb;
+	int vendor = 0x0000, product = 0x0000;
+	wait_queue_t ctrl_wait, intr_wait;
+
+	BT_DBG("session %p", session);
+
+	if (session->input) {
+		vendor  = session->input->id.vendor;
+		product = session->input->id.product;
+	}
+
+	daemonize("khidpd_%04x%04x", vendor, product);
+	set_user_nice(current, -15);
+	current->flags |= PF_NOFREEZE;
+
+	init_waitqueue_entry(&ctrl_wait, current);
+	init_waitqueue_entry(&intr_wait, current);
+	add_wait_queue(ctrl_sk->sk_sleep, &ctrl_wait);
+	add_wait_queue(intr_sk->sk_sleep, &intr_wait);
+	while (!atomic_read(&session->terminate)) {
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		if (ctrl_sk->sk_state != BT_CONNECTED || intr_sk->sk_state != BT_CONNECTED)
+			break;
+
+		while ((skb = skb_dequeue(&ctrl_sk->sk_receive_queue))) {
+			skb_orphan(skb);
+			hidp_recv_ctrl_frame(session, skb);
+		}
+
+		while ((skb = skb_dequeue(&intr_sk->sk_receive_queue))) {
+			skb_orphan(skb);
+			hidp_recv_intr_frame(session, skb);
+		}
+
+		hidp_process_transmit(session);
+
+		schedule();
+	}
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(intr_sk->sk_sleep, &intr_wait);
+	remove_wait_queue(ctrl_sk->sk_sleep, &ctrl_wait);
+
+	down_write(&hidp_session_sem);
+
+	hidp_del_timer(session);
+
+	if (intr_sk->sk_state != BT_CONNECTED)
+		wait_event_timeout(*(ctrl_sk->sk_sleep), (ctrl_sk->sk_state == BT_CLOSED), HZ);
+
+	fput(session->ctrl_sock->file);
+
+	wait_event_timeout(*(intr_sk->sk_sleep), (intr_sk->sk_state == BT_CLOSED), HZ);
+
+	fput(session->intr_sock->file);
+
+	__hidp_unlink_session(session);
+
+	if (session->input) {
+		input_unregister_device(session->input);
+		kfree(session->input);
+	}
+
+	up_write(&hidp_session_sem);
+
+	kfree(session);
+	return 0;
+}
+
+static inline void hidp_setup_input(struct hidp_session *session, struct hidp_connadd_req *req)
+{
+	struct input_dev *input = session->input;
+	int i;
+
+	input->private = session;
+
+	input->id.bustype = BUS_BLUETOOTH;
+	input->id.vendor  = req->vendor;
+	input->id.product = req->product;
+	input->id.version = req->version;
+
+	if (req->subclass & 0x40) {
+		set_bit(EV_KEY, input->evbit);
+		set_bit(EV_LED, input->evbit);
+		set_bit(EV_REP, input->evbit);
+
+		set_bit(LED_NUML,    input->ledbit);
+		set_bit(LED_CAPSL,   input->ledbit);
+		set_bit(LED_SCROLLL, input->ledbit);
+		set_bit(LED_COMPOSE, input->ledbit);
+		set_bit(LED_KANA,    input->ledbit);
+
+		for (i = 0; i < sizeof(hidp_keycode); i++)
+			set_bit(hidp_keycode[i], input->keybit);
+		clear_bit(0, input->keybit);
+	}
+
+	if (req->subclass & 0x80) {
+		input->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
+		input->keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | BIT(BTN_RIGHT) | BIT(BTN_MIDDLE);
+		input->relbit[0] = BIT(REL_X) | BIT(REL_Y);
+		input->keybit[LONG(BTN_MOUSE)] |= BIT(BTN_SIDE) | BIT(BTN_EXTRA);
+		input->relbit[0] |= BIT(REL_WHEEL);
+	}
+
+	input->event = hidp_input_event;
+
+	input_register_device(input);
+}
+
+int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, struct socket *intr_sock)
+{
+	struct hidp_session *session, *s;
+	int err;
+
+	BT_DBG("");
+
+	if (bacmp(&bt_sk(ctrl_sock->sk)->src, &bt_sk(intr_sock->sk)->src) ||
+			bacmp(&bt_sk(ctrl_sock->sk)->dst, &bt_sk(intr_sock->sk)->dst))
+		return -ENOTUNIQ;
+
+	session = kmalloc(sizeof(struct hidp_session), GFP_KERNEL);
+	if (!session) 
+		return -ENOMEM;
+	memset(session, 0, sizeof(struct hidp_session));
+
+	session->input = kmalloc(sizeof(struct input_dev), GFP_KERNEL);
+	if (!session->input) {
+		kfree(session);
+		return -ENOMEM;
+	}
+	memset(session->input, 0, sizeof(struct input_dev));
+
+	down_write(&hidp_session_sem);
+
+	s = __hidp_get_session(&bt_sk(ctrl_sock->sk)->dst);
+	if (s && s->state == BT_CONNECTED) {
+		err = -EEXIST;
+		goto failed;
+	}
+
+	bacpy(&session->bdaddr, &bt_sk(ctrl_sock->sk)->dst);
+
+	session->ctrl_mtu = min_t(uint, l2cap_pi(ctrl_sock->sk)->omtu, l2cap_pi(ctrl_sock->sk)->imtu);
+	session->intr_mtu = min_t(uint, l2cap_pi(intr_sock->sk)->omtu, l2cap_pi(intr_sock->sk)->imtu);
+
+	BT_DBG("ctrl mtu %d intr mtu %d", session->ctrl_mtu, session->intr_mtu);
+
+	session->ctrl_sock = ctrl_sock;
+	session->intr_sock = intr_sock;
+	session->state     = BT_CONNECTED;
+
+	init_timer(&session->timer);
+
+	session->timer.function = hidp_idle_timeout;
+	session->timer.data     = (unsigned long) session;
+
+	skb_queue_head_init(&session->ctrl_transmit);
+	skb_queue_head_init(&session->intr_transmit);
+
+	session->flags   = req->flags & (1 << HIDP_BLUETOOTH_VENDOR_ID);
+	session->idle_to = req->idle_to;
+
+	if (session->input)
+		hidp_setup_input(session, req);
+
+	__hidp_link_session(session);
+
+	hidp_set_timer(session);
+
+	err = kernel_thread(hidp_session, session, CLONE_KERNEL);
+	if (err < 0)
+		goto unlink;
+
+	if (session->input) {
+		hidp_send_ctrl_message(session,
+			HIDP_TRANS_SET_PROTOCOL | HIDP_PROTO_BOOT, NULL, 0);
+		session->flags |= (1 << HIDP_BOOT_PROTOCOL_MODE);
+
+		session->leds = 0xff;
+		hidp_input_event(session->input, EV_LED, 0, 0);
+	}
+
+	up_write(&hidp_session_sem);
+	return 0;
+
+unlink:
+	hidp_del_timer(session);
+
+	__hidp_unlink_session(session);
+
+	if (session->input)
+		input_unregister_device(session->input);
+
+failed:
+	up_write(&hidp_session_sem);
+
+	if (session->input)
+		kfree(session->input);
+
+	kfree(session);
+	return err;
+}
+
+int hidp_del_connection(struct hidp_conndel_req *req)
+{
+	struct hidp_session *session;
+	int err = 0;
+
+	BT_DBG("");
+
+	down_read(&hidp_session_sem);
+
+	session = __hidp_get_session(&req->bdaddr);
+	if (session) {
+		if (req->flags & (1 << HIDP_VIRTUAL_CABLE_UNPLUG)) {
+			hidp_send_ctrl_message(session,
+				HIDP_TRANS_HID_CONTROL | HIDP_CTRL_VIRTUAL_CABLE_UNPLUG, NULL, 0);
+		} else {
+			/* Flush the transmit queues */
+			skb_queue_purge(&session->ctrl_transmit);
+			skb_queue_purge(&session->intr_transmit);
+
+			/* Kill session thread */
+			atomic_inc(&session->terminate);
+			hidp_schedule(session);
+		}
+	} else
+		err = -ENOENT;
+
+	up_read(&hidp_session_sem);
+	return err;
+}
+
+int hidp_get_connlist(struct hidp_connlist_req *req)
+{
+	struct list_head *p;
+	int err = 0, n = 0;
+
+	BT_DBG("");
+
+	down_read(&hidp_session_sem);
+
+	list_for_each(p, &hidp_session_list) {
+		struct hidp_session *session;
+		struct hidp_conninfo ci;
+
+		session = list_entry(p, struct hidp_session, list);
+
+		__hidp_copy_session(session, &ci);
+
+		if (copy_to_user(req->ci, &ci, sizeof(ci))) {
+			err = -EFAULT;
+			break;
+		}
+
+		if (++n >= req->cnum)
+			break;
+
+		req->ci++;
+	}
+	req->cnum = n;
+
+	up_read(&hidp_session_sem);
+	return err;
+}
+
+int hidp_get_conninfo(struct hidp_conninfo *ci)
+{
+	struct hidp_session *session;
+	int err = 0;
+
+	down_read(&hidp_session_sem);
+
+	session = __hidp_get_session(&ci->bdaddr);
+	if (session)
+		__hidp_copy_session(session, ci);
+	else
+		err = -ENOENT;
+
+	up_read(&hidp_session_sem);
+	return err;
+}
+
+static int __init hidp_init(void)
+{
+	l2cap_load();
+
+	BT_INFO("HIDP (Human Interface Emulation) ver %s", VERSION);
+
+	return hidp_init_sockets();
+}
+
+static void __exit hidp_exit(void)
+{
+	hidp_cleanup_sockets();
+}
+
+module_init(hidp_init);
+module_exit(hidp_exit);
+
+MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
+MODULE_DESCRIPTION("Bluetooth HIDP ver " VERSION);
+MODULE_VERSION(VERSION);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("bt-proto-6");
diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h
new file mode 100644
index 00000000000..c2775f587d2
--- /dev/null
+++ b/net/bluetooth/hidp/hidp.h
@@ -0,0 +1,167 @@
+/* 
+   HIDP implementation for Linux Bluetooth stack (BlueZ).
+   Copyright (C) 2003-2004 Marcel Holtmann <marcel@holtmann.org>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License version 2 as
+   published by the Free Software Foundation;
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
+   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES 
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, 
+   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS 
+   SOFTWARE IS DISCLAIMED.
+*/
+
+#ifndef __HIDP_H
+#define __HIDP_H
+
+#include <linux/types.h>
+#include <net/bluetooth/bluetooth.h>
+
+/* HIDP header masks */
+#define HIDP_HEADER_TRANS_MASK			0xf0
+#define HIDP_HEADER_PARAM_MASK			0x0f
+
+/* HIDP transaction types */
+#define HIDP_TRANS_HANDSHAKE			0x00
+#define HIDP_TRANS_HID_CONTROL			0x10
+#define HIDP_TRANS_GET_REPORT			0x40
+#define HIDP_TRANS_SET_REPORT			0x50
+#define HIDP_TRANS_GET_PROTOCOL			0x60
+#define HIDP_TRANS_SET_PROTOCOL			0x70
+#define HIDP_TRANS_GET_IDLE			0x80
+#define HIDP_TRANS_SET_IDLE			0x90
+#define HIDP_TRANS_DATA				0xa0
+#define HIDP_TRANS_DATC				0xb0
+
+/* HIDP handshake results */
+#define HIDP_HSHK_SUCCESSFUL			0x00
+#define HIDP_HSHK_NOT_READY			0x01
+#define HIDP_HSHK_ERR_INVALID_REPORT_ID		0x02
+#define HIDP_HSHK_ERR_UNSUPPORTED_REQUEST	0x03
+#define HIDP_HSHK_ERR_INVALID_PARAMETER		0x04
+#define HIDP_HSHK_ERR_UNKNOWN			0x0e
+#define HIDP_HSHK_ERR_FATAL			0x0f
+
+/* HIDP control operation parameters */
+#define HIDP_CTRL_NOP				0x00
+#define HIDP_CTRL_HARD_RESET			0x01
+#define HIDP_CTRL_SOFT_RESET			0x02
+#define HIDP_CTRL_SUSPEND			0x03
+#define HIDP_CTRL_EXIT_SUSPEND			0x04
+#define HIDP_CTRL_VIRTUAL_CABLE_UNPLUG		0x05
+
+/* HIDP data transaction headers */
+#define HIDP_DATA_RTYPE_MASK			0x03
+#define HIDP_DATA_RSRVD_MASK			0x0c
+#define HIDP_DATA_RTYPE_OTHER			0x00
+#define HIDP_DATA_RTYPE_INPUT			0x01
+#define HIDP_DATA_RTYPE_OUPUT			0x02
+#define HIDP_DATA_RTYPE_FEATURE			0x03
+
+/* HIDP protocol header parameters */
+#define HIDP_PROTO_BOOT				0x00
+#define HIDP_PROTO_REPORT			0x01
+
+/* HIDP ioctl defines */
+#define HIDPCONNADD	_IOW('H', 200, int)
+#define HIDPCONNDEL	_IOW('H', 201, int)
+#define HIDPGETCONNLIST	_IOR('H', 210, int)
+#define HIDPGETCONNINFO	_IOR('H', 211, int)
+
+#define HIDP_VIRTUAL_CABLE_UNPLUG	0
+#define HIDP_BOOT_PROTOCOL_MODE		1
+#define HIDP_BLUETOOTH_VENDOR_ID	9
+
+struct hidp_connadd_req {
+	int   ctrl_sock;	// Connected control socket
+	int   intr_sock;	// Connteted interrupt socket
+	__u16 parser;
+	__u16 rd_size;
+	__u8 *rd_data;
+	__u8  country;
+	__u8  subclass;
+	__u16 vendor;
+	__u16 product;
+	__u16 version;
+	__u32 flags;
+	__u32 idle_to;
+	char  name[128];
+};
+
+struct hidp_conndel_req {
+	bdaddr_t bdaddr;
+	__u32    flags;
+};
+
+struct hidp_conninfo {
+	bdaddr_t bdaddr;
+	__u32    flags;
+	__u16    state;
+	__u16    vendor;
+	__u16    product;
+	__u16    version;
+	char     name[128];
+};
+
+struct hidp_connlist_req {
+	__u32  cnum;
+	struct hidp_conninfo __user *ci;
+};
+
+int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, struct socket *intr_sock);
+int hidp_del_connection(struct hidp_conndel_req *req);
+int hidp_get_connlist(struct hidp_connlist_req *req);
+int hidp_get_conninfo(struct hidp_conninfo *ci);
+
+/* HIDP session defines */
+struct hidp_session {
+	struct list_head list;
+
+	struct socket *ctrl_sock;
+	struct socket *intr_sock;
+
+	bdaddr_t bdaddr;
+
+	unsigned long state;
+	unsigned long flags;
+	unsigned long idle_to;
+
+	uint ctrl_mtu;
+	uint intr_mtu;
+
+	atomic_t terminate;
+
+	unsigned char keys[8];
+	unsigned char leds;
+
+	struct input_dev *input;
+
+	struct timer_list timer;
+
+	struct sk_buff_head ctrl_transmit;
+	struct sk_buff_head intr_transmit;
+};
+
+static inline void hidp_schedule(struct hidp_session *session)
+{
+	struct sock *ctrl_sk = session->ctrl_sock->sk;
+	struct sock *intr_sk = session->intr_sock->sk;
+
+	wake_up_interruptible(ctrl_sk->sk_sleep);
+	wake_up_interruptible(intr_sk->sk_sleep);
+}
+
+/* HIDP init defines */
+extern int __init hidp_init_sockets(void);
+extern void __exit hidp_cleanup_sockets(void);
+
+#endif /* __HIDP_H */
diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c
new file mode 100644
index 00000000000..fabb36d4666
--- /dev/null
+++ b/net/bluetooth/hidp/sock.c
@@ -0,0 +1,232 @@
+/* 
+   HIDP implementation for Linux Bluetooth stack (BlueZ).
+   Copyright (C) 2003-2004 Marcel Holtmann <marcel@holtmann.org>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License version 2 as
+   published by the Free Software Foundation;
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
+   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES 
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, 
+   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS 
+   SOFTWARE IS DISCLAIMED.
+*/
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/poll.h>
+#include <linux/fcntl.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/ioctl.h>
+#include <linux/file.h>
+#include <linux/init.h>
+#include <net/sock.h>
+
+#include "hidp.h"
+
+#ifndef CONFIG_BT_HIDP_DEBUG
+#undef  BT_DBG
+#define BT_DBG(D...)
+#endif
+
+static int hidp_sock_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+
+	BT_DBG("sock %p sk %p", sock, sk);
+
+	if (!sk)
+		return 0;
+
+	sock_orphan(sk);
+	sock_put(sk);
+
+	return 0;
+}
+
+static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	void __user *argp = (void __user *) arg;
+	struct hidp_connadd_req ca;
+	struct hidp_conndel_req cd;
+	struct hidp_connlist_req cl;
+	struct hidp_conninfo ci;
+	struct socket *csock;
+	struct socket *isock;
+	int err;
+
+	BT_DBG("cmd %x arg %lx", cmd, arg);
+
+	switch (cmd) {
+	case HIDPCONNADD:
+		if (!capable(CAP_NET_ADMIN))
+			return -EACCES;
+
+		if (copy_from_user(&ca, argp, sizeof(ca)))
+			return -EFAULT;
+
+		csock = sockfd_lookup(ca.ctrl_sock, &err);
+		if (!csock)
+			return err;
+
+		isock = sockfd_lookup(ca.intr_sock, &err);
+		if (!isock) {
+			fput(csock->file);
+			return err;
+		}
+
+		if (csock->sk->sk_state != BT_CONNECTED || isock->sk->sk_state != BT_CONNECTED) {
+			fput(csock->file);
+			fput(isock->file);
+			return -EBADFD;
+		}
+
+		err = hidp_add_connection(&ca, csock, isock);
+		if (!err) {
+			if (copy_to_user(argp, &ca, sizeof(ca)))
+				err = -EFAULT;
+		} else {
+			fput(csock->file);
+			fput(isock->file);
+		}
+
+		return err;
+
+	case HIDPCONNDEL:
+		if (!capable(CAP_NET_ADMIN))
+			return -EACCES;
+
+		if (copy_from_user(&cd, argp, sizeof(cd)))
+			return -EFAULT;
+
+		return hidp_del_connection(&cd);
+
+	case HIDPGETCONNLIST:
+		if (copy_from_user(&cl, argp, sizeof(cl)))
+			return -EFAULT;
+
+		if (cl.cnum <= 0)
+			return -EINVAL;
+
+		err = hidp_get_connlist(&cl);
+		if (!err && copy_to_user(argp, &cl, sizeof(cl)))
+			return -EFAULT;
+
+		return err;
+
+	case HIDPGETCONNINFO:
+		if (copy_from_user(&ci, argp, sizeof(ci)))
+			return -EFAULT;
+
+		err = hidp_get_conninfo(&ci);
+		if (!err && copy_to_user(argp, &ci, sizeof(ci)))
+			return -EFAULT;
+
+		return err;
+	}
+
+	return -EINVAL;
+}
+
+static struct proto_ops hidp_sock_ops = {
+	.family		= PF_BLUETOOTH,
+	.owner		= THIS_MODULE,
+	.release	= hidp_sock_release,
+	.ioctl		= hidp_sock_ioctl,
+	.bind		= sock_no_bind,
+	.getname	= sock_no_getname,
+	.sendmsg	= sock_no_sendmsg,
+	.recvmsg	= sock_no_recvmsg,
+	.poll		= sock_no_poll,
+	.listen		= sock_no_listen,
+	.shutdown	= sock_no_shutdown,
+	.setsockopt	= sock_no_setsockopt,
+	.getsockopt	= sock_no_getsockopt,
+	.connect	= sock_no_connect,
+	.socketpair	= sock_no_socketpair,
+	.accept		= sock_no_accept,
+	.mmap		= sock_no_mmap
+};
+
+static struct proto hidp_proto = {
+	.name		= "HIDP",
+	.owner		= THIS_MODULE,
+	.obj_size	= sizeof(struct bt_sock)
+};
+
+static int hidp_sock_create(struct socket *sock, int protocol)
+{
+	struct sock *sk;
+
+	BT_DBG("sock %p", sock);
+
+	if (sock->type != SOCK_RAW)
+		return -ESOCKTNOSUPPORT;
+
+	sk = sk_alloc(PF_BLUETOOTH, GFP_KERNEL, &hidp_proto, 1);
+	if (!sk)
+		return -ENOMEM;
+
+	sock_init_data(sock, sk);
+
+	sock->ops = &hidp_sock_ops;
+
+	sock->state = SS_UNCONNECTED;
+
+	sock_reset_flag(sk, SOCK_ZAPPED);
+
+	sk->sk_protocol = protocol;
+	sk->sk_state	= BT_OPEN;
+
+	return 0;
+}
+
+static struct net_proto_family hidp_sock_family_ops = {
+	.family	= PF_BLUETOOTH,
+	.owner	= THIS_MODULE,
+	.create	= hidp_sock_create
+};
+
+int __init hidp_init_sockets(void)
+{
+	int err;
+
+	err = proto_register(&hidp_proto, 0);
+	if (err < 0)
+		return err;
+
+	err = bt_sock_register(BTPROTO_HIDP, &hidp_sock_family_ops);
+	if (err < 0)
+		goto error;
+
+	return 0;
+
+error:
+	BT_ERR("Can't register HIDP socket");
+	proto_unregister(&hidp_proto);
+	return err;
+}
+
+void __exit hidp_cleanup_sockets(void)
+{
+	if (bt_sock_unregister(BTPROTO_HIDP) < 0)
+		BT_ERR("Can't unregister HIDP socket");
+
+	proto_unregister(&hidp_proto);
+}
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
new file mode 100644
index 00000000000..c12babcf0b3
--- /dev/null
+++ b/net/bluetooth/l2cap.c
@@ -0,0 +1,2329 @@
+/* 
+   BlueZ - Bluetooth protocol stack for Linux
+   Copyright (C) 2000-2001 Qualcomm Incorporated
+
+   Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License version 2 as
+   published by the Free Software Foundation;
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
+   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES 
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, 
+   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS 
+   SOFTWARE IS DISCLAIMED.
+*/
+
+/* Bluetooth L2CAP core and sockets. */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/poll.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/socket.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/list.h>
+#include <net/sock.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/unaligned.h>
+
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
+#include <net/bluetooth/l2cap.h>
+
+#ifndef CONFIG_BT_L2CAP_DEBUG
+#undef  BT_DBG
+#define BT_DBG(D...)
+#endif
+
+#define VERSION "2.7"
+
+static struct proto_ops l2cap_sock_ops;
+
+static struct bt_sock_list l2cap_sk_list = {
+	.lock = RW_LOCK_UNLOCKED
+};
+
+static int l2cap_conn_del(struct hci_conn *conn, int err);
+
+static void __l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct sock *parent);
+static void l2cap_chan_del(struct sock *sk, int err);
+
+static void __l2cap_sock_close(struct sock *sk, int reason);
+static void l2cap_sock_close(struct sock *sk);
+static void l2cap_sock_kill(struct sock *sk);
+
+static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn,
+				u8 code, u8 ident, u16 dlen, void *data);
+
+/* ---- L2CAP timers ---- */
+static void l2cap_sock_timeout(unsigned long arg)
+{
+	struct sock *sk = (struct sock *) arg;
+
+	BT_DBG("sock %p state %d", sk, sk->sk_state);
+
+	bh_lock_sock(sk);
+	__l2cap_sock_close(sk, ETIMEDOUT);
+	bh_unlock_sock(sk);
+
+	l2cap_sock_kill(sk);
+	sock_put(sk);
+}
+
+static void l2cap_sock_set_timer(struct sock *sk, long timeout)
+{
+	BT_DBG("sk %p state %d timeout %ld", sk, sk->sk_state, timeout);
+	sk_reset_timer(sk, &sk->sk_timer, jiffies + timeout);
+}
+
+static void l2cap_sock_clear_timer(struct sock *sk)
+{
+	BT_DBG("sock %p state %d", sk, sk->sk_state);
+	sk_stop_timer(sk, &sk->sk_timer);
+}
+
+static void l2cap_sock_init_timer(struct sock *sk)
+{
+	init_timer(&sk->sk_timer);
+	sk->sk_timer.function = l2cap_sock_timeout;
+	sk->sk_timer.data = (unsigned long)sk;
+}
+
+/* ---- L2CAP connections ---- */
+static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status)
+{
+	struct l2cap_conn *conn;
+
+	if ((conn = hcon->l2cap_data))
+		return conn;
+
+	if (status)
+		return conn;
+
+	if (!(conn = kmalloc(sizeof(struct l2cap_conn), GFP_ATOMIC)))
+		return NULL;
+	memset(conn, 0, sizeof(struct l2cap_conn));
+
+	hcon->l2cap_data = conn;
+	conn->hcon = hcon;
+
+	conn->mtu = hcon->hdev->acl_mtu;
+	conn->src = &hcon->hdev->bdaddr;
+	conn->dst = &hcon->dst;
+
+	spin_lock_init(&conn->lock);
+	rwlock_init(&conn->chan_list.lock);
+
+	BT_DBG("hcon %p conn %p", hcon, conn);
+	return conn;
+}
+
+static int l2cap_conn_del(struct hci_conn *hcon, int err)
+{
+	struct l2cap_conn *conn;
+	struct sock *sk;
+
+	if (!(conn = hcon->l2cap_data)) 
+		return 0;
+
+	BT_DBG("hcon %p conn %p, err %d", hcon, conn, err);
+
+	if (conn->rx_skb)
+		kfree_skb(conn->rx_skb);
+
+	/* Kill channels */
+	while ((sk = conn->chan_list.head)) {
+		bh_lock_sock(sk);
+		l2cap_chan_del(sk, err);
+		bh_unlock_sock(sk);
+		l2cap_sock_kill(sk);
+	}
+
+	hcon->l2cap_data = NULL;
+	kfree(conn);
+	return 0;
+}
+
+static inline void l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct sock *parent)
+{
+	struct l2cap_chan_list *l = &conn->chan_list;
+	write_lock(&l->lock);
+	__l2cap_chan_add(conn, sk, parent);
+	write_unlock(&l->lock);
+}
+
+static inline u8 l2cap_get_ident(struct l2cap_conn *conn)
+{
+	u8 id;
+
+	/* Get next available identificator.
+	 *    1 - 128 are used by kernel.
+	 *  129 - 199 are reserved.
+	 *  200 - 254 are used by utilities like l2ping, etc.
+	 */
+
+	spin_lock(&conn->lock);
+
+	if (++conn->tx_ident > 128)
+		conn->tx_ident = 1;
+
+	id = conn->tx_ident;
+
+	spin_unlock(&conn->lock);
+
+	return id;
+}
+
+static inline int l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, void *data)
+{
+	struct sk_buff *skb = l2cap_build_cmd(conn, code, ident, len, data);
+
+	BT_DBG("code 0x%2.2x", code);
+
+	if (!skb)
+		return -ENOMEM;
+
+	return hci_send_acl(conn->hcon, skb, 0);
+}
+
+/* ---- Socket interface ---- */
+static struct sock *__l2cap_get_sock_by_addr(u16 psm, bdaddr_t *src)
+{
+	struct sock *sk;
+	struct hlist_node *node;
+	sk_for_each(sk, node, &l2cap_sk_list.head)
+		if (l2cap_pi(sk)->sport == psm && !bacmp(&bt_sk(sk)->src, src))
+			goto found;
+	sk = NULL;
+found:
+	return sk;
+}
+
+/* Find socket with psm and source bdaddr.
+ * Returns closest match.
+ */
+static struct sock *__l2cap_get_sock_by_psm(int state, u16 psm, bdaddr_t *src)
+{
+	struct sock *sk = NULL, *sk1 = NULL;
+	struct hlist_node *node;
+
+	sk_for_each(sk, node, &l2cap_sk_list.head) {
+		if (state && sk->sk_state != state)
+			continue;
+
+		if (l2cap_pi(sk)->psm == psm) {
+			/* Exact match. */
+			if (!bacmp(&bt_sk(sk)->src, src))
+				break;
+
+			/* Closest match */
+			if (!bacmp(&bt_sk(sk)->src, BDADDR_ANY))
+				sk1 = sk;
+		}
+	}
+	return node ? sk : sk1;
+}
+
+/* Find socket with given address (psm, src).
+ * Returns locked socket */
+static inline struct sock *l2cap_get_sock_by_psm(int state, u16 psm, bdaddr_t *src)
+{
+	struct sock *s;
+	read_lock(&l2cap_sk_list.lock);
+	s = __l2cap_get_sock_by_psm(state, psm, src);
+	if (s) bh_lock_sock(s);
+	read_unlock(&l2cap_sk_list.lock);
+	return s;
+}
+
+static void l2cap_sock_destruct(struct sock *sk)
+{
+	BT_DBG("sk %p", sk);
+
+	skb_queue_purge(&sk->sk_receive_queue);
+	skb_queue_purge(&sk->sk_write_queue);
+}
+
+static void l2cap_sock_cleanup_listen(struct sock *parent)
+{
+	struct sock *sk;
+
+	BT_DBG("parent %p", parent);
+
+	/* Close not yet accepted channels */
+	while ((sk = bt_accept_dequeue(parent, NULL)))
+		l2cap_sock_close(sk);
+
+	parent->sk_state  = BT_CLOSED;
+	sock_set_flag(parent, SOCK_ZAPPED);
+}
+
+/* Kill socket (only if zapped and orphan)
+ * Must be called on unlocked socket.
+ */
+static void l2cap_sock_kill(struct sock *sk)
+{
+	if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket)
+		return;
+
+	BT_DBG("sk %p state %d", sk, sk->sk_state);
+
+	/* Kill poor orphan */
+	bt_sock_unlink(&l2cap_sk_list, sk);
+	sock_set_flag(sk, SOCK_DEAD);
+	sock_put(sk);
+}
+
+static void __l2cap_sock_close(struct sock *sk, int reason)
+{
+	BT_DBG("sk %p state %d socket %p", sk, sk->sk_state, sk->sk_socket);
+
+	switch (sk->sk_state) {
+	case BT_LISTEN:
+		l2cap_sock_cleanup_listen(sk);
+		break;
+
+	case BT_CONNECTED:
+	case BT_CONFIG:
+	case BT_CONNECT2:
+		if (sk->sk_type == SOCK_SEQPACKET) {
+			struct l2cap_conn *conn = l2cap_pi(sk)->conn;
+			struct l2cap_disconn_req req;
+
+			sk->sk_state = BT_DISCONN;
+			l2cap_sock_set_timer(sk, sk->sk_sndtimeo);
+
+			req.dcid = __cpu_to_le16(l2cap_pi(sk)->dcid);
+			req.scid = __cpu_to_le16(l2cap_pi(sk)->scid);
+			l2cap_send_cmd(conn, l2cap_get_ident(conn),
+					L2CAP_DISCONN_REQ, sizeof(req), &req);
+		} else {
+			l2cap_chan_del(sk, reason);
+		}
+		break;
+
+	case BT_CONNECT:
+	case BT_DISCONN:
+		l2cap_chan_del(sk, reason);
+		break;
+
+	default:
+		sock_set_flag(sk, SOCK_ZAPPED);
+		break;
+	}
+}
+
+/* Must be called on unlocked socket. */
+static void l2cap_sock_close(struct sock *sk)
+{
+	l2cap_sock_clear_timer(sk);
+	lock_sock(sk);
+	__l2cap_sock_close(sk, ECONNRESET);
+	release_sock(sk);
+	l2cap_sock_kill(sk);
+}
+
+static void l2cap_sock_init(struct sock *sk, struct sock *parent)
+{
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+
+	BT_DBG("sk %p", sk);
+
+	if (parent) {
+		sk->sk_type = parent->sk_type;
+		pi->imtu = l2cap_pi(parent)->imtu;
+		pi->omtu = l2cap_pi(parent)->omtu;
+		pi->link_mode = l2cap_pi(parent)->link_mode;
+	} else {
+		pi->imtu = L2CAP_DEFAULT_MTU;
+		pi->omtu = 0;
+		pi->link_mode = 0;
+	}
+
+	/* Default config options */
+	pi->conf_mtu = L2CAP_DEFAULT_MTU;
+	pi->flush_to = L2CAP_DEFAULT_FLUSH_TO;
+}
+
+static struct proto l2cap_proto = {
+	.name		= "L2CAP",
+	.owner		= THIS_MODULE,
+	.obj_size	= sizeof(struct l2cap_pinfo)
+};
+
+static struct sock *l2cap_sock_alloc(struct socket *sock, int proto, int prio)
+{
+	struct sock *sk;
+
+	sk = sk_alloc(PF_BLUETOOTH, prio, &l2cap_proto, 1);
+	if (!sk)
+		return NULL;
+
+	sock_init_data(sock, sk);
+	INIT_LIST_HEAD(&bt_sk(sk)->accept_q);
+
+	sk->sk_destruct = l2cap_sock_destruct;
+	sk->sk_sndtimeo = L2CAP_CONN_TIMEOUT;
+
+	sock_reset_flag(sk, SOCK_ZAPPED);
+
+	sk->sk_protocol = proto;
+	sk->sk_state    = BT_OPEN;
+
+	l2cap_sock_init_timer(sk);
+
+	bt_sock_link(&l2cap_sk_list, sk);
+	return sk;
+}
+
+static int l2cap_sock_create(struct socket *sock, int protocol)
+{
+	struct sock *sk;
+
+	BT_DBG("sock %p", sock);
+
+	sock->state = SS_UNCONNECTED;
+
+	if (sock->type != SOCK_SEQPACKET &&
+			sock->type != SOCK_DGRAM && sock->type != SOCK_RAW)
+		return -ESOCKTNOSUPPORT;
+
+	if (sock->type == SOCK_RAW && !capable(CAP_NET_RAW))
+		return -EPERM;
+
+	sock->ops = &l2cap_sock_ops;
+
+	sk = l2cap_sock_alloc(sock, protocol, GFP_KERNEL);
+	if (!sk)
+		return -ENOMEM;
+
+	l2cap_sock_init(sk, NULL);
+	return 0;
+}
+
+static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
+{
+	struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr;
+	struct sock *sk = sock->sk;
+	int err = 0;
+
+	BT_DBG("sk %p, %s %d", sk, batostr(&la->l2_bdaddr), la->l2_psm);
+
+	if (!addr || addr->sa_family != AF_BLUETOOTH)
+		return -EINVAL;
+
+	lock_sock(sk);
+
+	if (sk->sk_state != BT_OPEN) {
+		err = -EBADFD;
+		goto done;
+	}
+
+	write_lock_bh(&l2cap_sk_list.lock);
+
+	if (la->l2_psm && __l2cap_get_sock_by_addr(la->l2_psm, &la->l2_bdaddr)) {
+		err = -EADDRINUSE;
+	} else {
+		/* Save source address */
+		bacpy(&bt_sk(sk)->src, &la->l2_bdaddr);
+		l2cap_pi(sk)->psm   = la->l2_psm;
+		l2cap_pi(sk)->sport = la->l2_psm;
+		sk->sk_state = BT_BOUND;
+	}
+
+	write_unlock_bh(&l2cap_sk_list.lock);
+
+done:
+	release_sock(sk);
+	return err;
+}
+
+static int l2cap_do_connect(struct sock *sk)
+{
+	bdaddr_t *src = &bt_sk(sk)->src;
+	bdaddr_t *dst = &bt_sk(sk)->dst;
+	struct l2cap_conn *conn;
+	struct hci_conn *hcon;
+	struct hci_dev *hdev;
+	int err = 0;
+
+	BT_DBG("%s -> %s psm 0x%2.2x", batostr(src), batostr(dst), l2cap_pi(sk)->psm);
+
+	if (!(hdev = hci_get_route(dst, src)))
+		return -EHOSTUNREACH;
+
+	hci_dev_lock_bh(hdev);
+
+	err = -ENOMEM;
+
+	hcon = hci_connect(hdev, ACL_LINK, dst);
+	if (!hcon)
+		goto done;
+
+	conn = l2cap_conn_add(hcon, 0);
+	if (!conn) {
+		hci_conn_put(hcon);
+		goto done;
+	}
+
+	err = 0;
+
+	/* Update source addr of the socket */
+	bacpy(src, conn->src);
+
+	l2cap_chan_add(conn, sk, NULL);
+
+	sk->sk_state = BT_CONNECT;
+	l2cap_sock_set_timer(sk, sk->sk_sndtimeo);
+
+	if (hcon->state == BT_CONNECTED) {
+		if (sk->sk_type == SOCK_SEQPACKET) {
+			struct l2cap_conn_req req;
+			l2cap_pi(sk)->ident = l2cap_get_ident(conn);
+			req.scid = __cpu_to_le16(l2cap_pi(sk)->scid);
+			req.psm  = l2cap_pi(sk)->psm;
+			l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
+					L2CAP_CONN_REQ, sizeof(req), &req);
+		} else {
+			l2cap_sock_clear_timer(sk);
+			sk->sk_state = BT_CONNECTED;
+		}
+	}
+
+done:
+	hci_dev_unlock_bh(hdev);
+	hci_dev_put(hdev);
+	return err;
+}
+
+static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags)
+{
+	struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr;
+	struct sock *sk = sock->sk;
+	int err = 0;
+
+	lock_sock(sk);
+
+	BT_DBG("sk %p", sk);
+
+	if (addr->sa_family != AF_BLUETOOTH || alen < sizeof(struct sockaddr_l2)) {
+		err = -EINVAL;
+		goto done;
+	}
+
+	if (sk->sk_type == SOCK_SEQPACKET && !la->l2_psm) {
+		err = -EINVAL;
+		goto done;
+	}
+
+	switch(sk->sk_state) {
+	case BT_CONNECT:
+	case BT_CONNECT2:
+	case BT_CONFIG:
+		/* Already connecting */
+		goto wait;
+
+	case BT_CONNECTED:
+		/* Already connected */
+		goto done;
+
+	case BT_OPEN:
+	case BT_BOUND:
+		/* Can connect */
+		break;
+
+	default:
+		err = -EBADFD;
+		goto done;
+	}
+
+	/* Set destination address and psm */
+	bacpy(&bt_sk(sk)->dst, &la->l2_bdaddr);
+	l2cap_pi(sk)->psm = la->l2_psm;
+
+	if ((err = l2cap_do_connect(sk)))
+		goto done;
+
+wait:
+	err = bt_sock_wait_state(sk, BT_CONNECTED,
+			sock_sndtimeo(sk, flags & O_NONBLOCK));
+done:
+	release_sock(sk);
+	return err;
+}
+
+static int l2cap_sock_listen(struct socket *sock, int backlog)
+{
+	struct sock *sk = sock->sk;
+	int err = 0;
+
+	BT_DBG("sk %p backlog %d", sk, backlog);
+
+	lock_sock(sk);
+
+	if (sk->sk_state != BT_BOUND || sock->type != SOCK_SEQPACKET) {
+		err = -EBADFD;
+		goto done;
+	}
+
+	if (!l2cap_pi(sk)->psm) {
+		bdaddr_t *src = &bt_sk(sk)->src;
+		u16 psm;
+
+		err = -EINVAL;
+
+		write_lock_bh(&l2cap_sk_list.lock);
+
+		for (psm = 0x1001; psm < 0x1100; psm += 2)
+			if (!__l2cap_get_sock_by_addr(psm, src)) {
+				l2cap_pi(sk)->psm   = htobs(psm);
+				l2cap_pi(sk)->sport = htobs(psm);
+				err = 0;
+				break;
+			}
+
+		write_unlock_bh(&l2cap_sk_list.lock);
+
+		if (err < 0)
+			goto done;
+	}
+
+	sk->sk_max_ack_backlog = backlog;
+	sk->sk_ack_backlog = 0;
+	sk->sk_state = BT_LISTEN;
+
+done:
+	release_sock(sk);
+	return err;
+}
+
+static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int flags)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	struct sock *sk = sock->sk, *nsk;
+	long timeo;
+	int err = 0;
+
+	lock_sock(sk);
+
+	if (sk->sk_state != BT_LISTEN) {
+		err = -EBADFD;
+		goto done;
+	}
+
+	timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+
+	BT_DBG("sk %p timeo %ld", sk, timeo);
+
+	/* Wait for an incoming connection. (wake-one). */
+	add_wait_queue_exclusive(sk->sk_sleep, &wait);
+	while (!(nsk = bt_accept_dequeue(sk, newsock))) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (!timeo) {
+			err = -EAGAIN;
+			break;
+		}
+
+		release_sock(sk);
+		timeo = schedule_timeout(timeo);
+		lock_sock(sk);
+
+		if (sk->sk_state != BT_LISTEN) {
+			err = -EBADFD;
+			break;
+		}
+
+		if (signal_pending(current)) {
+			err = sock_intr_errno(timeo);
+			break;
+		}
+	}
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(sk->sk_sleep, &wait);
+
+	if (err)
+		goto done;
+
+	newsock->state = SS_CONNECTED;
+
+	BT_DBG("new socket %p", nsk);
+
+done:
+	release_sock(sk);
+	return err;
+}
+
+static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *len, int peer)
+{
+	struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr;
+	struct sock *sk = sock->sk;
+
+	BT_DBG("sock %p, sk %p", sock, sk);
+
+	addr->sa_family = AF_BLUETOOTH;
+	*len = sizeof(struct sockaddr_l2);
+
+	if (peer)
+		bacpy(&la->l2_bdaddr, &bt_sk(sk)->dst);
+	else
+		bacpy(&la->l2_bdaddr, &bt_sk(sk)->src);
+
+	la->l2_psm = l2cap_pi(sk)->psm;
+	return 0;
+}
+
+static inline int l2cap_do_send(struct sock *sk, struct msghdr *msg, int len)
+{
+	struct l2cap_conn *conn = l2cap_pi(sk)->conn;
+	struct sk_buff *skb, **frag;
+	int err, hlen, count, sent=0;
+	struct l2cap_hdr *lh;
+
+	BT_DBG("sk %p len %d", sk, len);
+
+	/* First fragment (with L2CAP header) */
+	if (sk->sk_type == SOCK_DGRAM)
+		hlen = L2CAP_HDR_SIZE + 2;
+	else
+		hlen = L2CAP_HDR_SIZE;
+
+	count = min_t(unsigned int, (conn->mtu - hlen), len);
+
+	skb = bt_skb_send_alloc(sk, hlen + count,
+			msg->msg_flags & MSG_DONTWAIT, &err);
+	if (!skb)
+		return err;
+
+	/* Create L2CAP header */
+	lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
+	lh->cid = __cpu_to_le16(l2cap_pi(sk)->dcid);
+	lh->len = __cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE));
+
+	if (sk->sk_type == SOCK_DGRAM)
+		put_unaligned(l2cap_pi(sk)->psm, (u16 *) skb_put(skb, 2));
+
+	if (memcpy_fromiovec(skb_put(skb, count), msg->msg_iov, count)) {
+		err = -EFAULT;
+		goto fail;
+	}
+
+	sent += count;
+	len  -= count;
+
+	/* Continuation fragments (no L2CAP header) */
+	frag = &skb_shinfo(skb)->frag_list;
+	while (len) {
+		count = min_t(unsigned int, conn->mtu, len);
+
+		*frag = bt_skb_send_alloc(sk, count, msg->msg_flags & MSG_DONTWAIT, &err);
+		if (!*frag)
+			goto fail;
+		
+		if (memcpy_fromiovec(skb_put(*frag, count), msg->msg_iov, count)) {
+			err = -EFAULT;
+			goto fail;
+		}
+
+		sent += count;
+		len  -= count;
+
+		frag = &(*frag)->next;
+	}
+
+	if ((err = hci_send_acl(conn->hcon, skb, 0)) < 0)
+		goto fail;
+
+	return sent;
+
+fail:
+	kfree_skb(skb);
+	return err;
+}
+
+static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len)
+{
+	struct sock *sk = sock->sk;
+	int err = 0;
+
+	BT_DBG("sock %p, sk %p", sock, sk);
+
+	if (sk->sk_err)
+		return sock_error(sk);
+
+	if (msg->msg_flags & MSG_OOB)
+		return -EOPNOTSUPP;
+
+	/* Check outgoing MTU */
+	if (sk->sk_type != SOCK_RAW && len > l2cap_pi(sk)->omtu)
+		return -EINVAL;
+
+	lock_sock(sk);
+
+	if (sk->sk_state == BT_CONNECTED)
+		err = l2cap_do_send(sk, msg, len);
+	else
+		err = -ENOTCONN;
+
+	release_sock(sk);
+	return err;
+}
+
+static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
+{
+	struct sock *sk = sock->sk;
+	struct l2cap_options opts;
+	int err = 0, len;
+	u32 opt;
+
+	BT_DBG("sk %p", sk);
+
+	lock_sock(sk);
+
+	switch (optname) {
+	case L2CAP_OPTIONS:
+		len = min_t(unsigned int, sizeof(opts), optlen);
+		if (copy_from_user((char *) &opts, optval, len)) {
+			err = -EFAULT;
+			break;
+		}
+		l2cap_pi(sk)->imtu  = opts.imtu;
+		l2cap_pi(sk)->omtu  = opts.omtu;
+		break;
+
+	case L2CAP_LM:
+		if (get_user(opt, (u32 __user *) optval)) {
+			err = -EFAULT;
+			break;
+		}
+
+		l2cap_pi(sk)->link_mode = opt;
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	release_sock(sk);
+	return err;
+}
+
+static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen)
+{
+	struct sock *sk = sock->sk;
+	struct l2cap_options opts;
+	struct l2cap_conninfo cinfo;
+	int len, err = 0;
+
+	BT_DBG("sk %p", sk);
+
+	if (get_user(len, optlen))
+		return -EFAULT;
+
+	lock_sock(sk);
+
+	switch (optname) {
+	case L2CAP_OPTIONS:
+		opts.imtu     = l2cap_pi(sk)->imtu;
+		opts.omtu     = l2cap_pi(sk)->omtu;
+		opts.flush_to = l2cap_pi(sk)->flush_to;
+		opts.mode     = 0x00;
+
+		len = min_t(unsigned int, len, sizeof(opts));
+		if (copy_to_user(optval, (char *) &opts, len))
+			err = -EFAULT;
+
+		break;
+
+	case L2CAP_LM:
+		if (put_user(l2cap_pi(sk)->link_mode, (u32 __user *) optval))
+			err = -EFAULT;
+		break;
+
+	case L2CAP_CONNINFO:
+		if (sk->sk_state != BT_CONNECTED) {
+			err = -ENOTCONN;
+			break;
+		}
+
+		cinfo.hci_handle = l2cap_pi(sk)->conn->hcon->handle;
+		memcpy(cinfo.dev_class, l2cap_pi(sk)->conn->hcon->dev_class, 3);
+
+		len = min_t(unsigned int, len, sizeof(cinfo));
+		if (copy_to_user(optval, (char *) &cinfo, len))
+			err = -EFAULT;
+
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	release_sock(sk);
+	return err;
+}
+
+static int l2cap_sock_shutdown(struct socket *sock, int how)
+{
+	struct sock *sk = sock->sk;
+	int err = 0;
+
+	BT_DBG("sock %p, sk %p", sock, sk);
+
+	if (!sk)
+		return 0;
+
+	lock_sock(sk);
+	if (!sk->sk_shutdown) {
+		sk->sk_shutdown = SHUTDOWN_MASK;
+		l2cap_sock_clear_timer(sk);
+		__l2cap_sock_close(sk, 0);
+
+		if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime)
+			err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime);
+	}
+	release_sock(sk);
+	return err;
+}
+
+static int l2cap_sock_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	int err;
+
+	BT_DBG("sock %p, sk %p", sock, sk);
+
+	if (!sk)
+		return 0;
+
+	err = l2cap_sock_shutdown(sock, 2);
+
+	sock_orphan(sk);
+	l2cap_sock_kill(sk);
+	return err;
+}
+
+/* ---- L2CAP channels ---- */
+static struct sock *__l2cap_get_chan_by_dcid(struct l2cap_chan_list *l, u16 cid)
+{
+	struct sock *s;
+	for (s = l->head; s; s = l2cap_pi(s)->next_c) {
+		if (l2cap_pi(s)->dcid == cid)
+			break;
+	}
+	return s;
+}
+
+static struct sock *__l2cap_get_chan_by_scid(struct l2cap_chan_list *l, u16 cid)
+{
+	struct sock *s;
+	for (s = l->head; s; s = l2cap_pi(s)->next_c) {
+		if (l2cap_pi(s)->scid == cid)
+			break;
+	}
+	return s;
+}
+
+/* Find channel with given SCID.
+ * Returns locked socket */
+static inline struct sock *l2cap_get_chan_by_scid(struct l2cap_chan_list *l, u16 cid)
+{
+	struct sock *s;
+	read_lock(&l->lock);
+	s = __l2cap_get_chan_by_scid(l, cid);
+	if (s) bh_lock_sock(s);
+	read_unlock(&l->lock);
+	return s;
+}
+
+static struct sock *__l2cap_get_chan_by_ident(struct l2cap_chan_list *l, u8 ident)
+{
+	struct sock *s;
+	for (s = l->head; s; s = l2cap_pi(s)->next_c) {
+		if (l2cap_pi(s)->ident == ident)
+			break;
+	}
+	return s;
+}
+
+static inline struct sock *l2cap_get_chan_by_ident(struct l2cap_chan_list *l, u8 ident)
+{
+	struct sock *s;
+	read_lock(&l->lock);
+	s = __l2cap_get_chan_by_ident(l, ident);
+	if (s) bh_lock_sock(s);
+	read_unlock(&l->lock);
+	return s;
+}
+
+static u16 l2cap_alloc_cid(struct l2cap_chan_list *l)
+{
+	u16 cid = 0x0040;
+
+	for (; cid < 0xffff; cid++) {
+		if(!__l2cap_get_chan_by_scid(l, cid))
+			return cid;
+	}
+
+	return 0;
+}
+
+static inline void __l2cap_chan_link(struct l2cap_chan_list *l, struct sock *sk)
+{
+	sock_hold(sk);
+
+	if (l->head)
+		l2cap_pi(l->head)->prev_c = sk;
+
+	l2cap_pi(sk)->next_c = l->head;
+	l2cap_pi(sk)->prev_c = NULL;
+	l->head = sk;
+}
+
+static inline void l2cap_chan_unlink(struct l2cap_chan_list *l, struct sock *sk)
+{
+	struct sock *next = l2cap_pi(sk)->next_c, *prev = l2cap_pi(sk)->prev_c;
+
+	write_lock(&l->lock);
+	if (sk == l->head)
+		l->head = next;
+
+	if (next)
+		l2cap_pi(next)->prev_c = prev;
+	if (prev)
+		l2cap_pi(prev)->next_c = next;
+	write_unlock(&l->lock);
+
+	__sock_put(sk);
+}
+
+static void __l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct sock *parent)
+{
+	struct l2cap_chan_list *l = &conn->chan_list;
+
+	BT_DBG("conn %p, psm 0x%2.2x, dcid 0x%4.4x", conn, l2cap_pi(sk)->psm, l2cap_pi(sk)->dcid);
+
+	l2cap_pi(sk)->conn = conn;
+
+	if (sk->sk_type == SOCK_SEQPACKET) {
+		/* Alloc CID for connection-oriented socket */
+		l2cap_pi(sk)->scid = l2cap_alloc_cid(l);
+	} else if (sk->sk_type == SOCK_DGRAM) {
+		/* Connectionless socket */
+		l2cap_pi(sk)->scid = 0x0002;
+		l2cap_pi(sk)->dcid = 0x0002;
+		l2cap_pi(sk)->omtu = L2CAP_DEFAULT_MTU;
+	} else {
+		/* Raw socket can send/recv signalling messages only */
+		l2cap_pi(sk)->scid = 0x0001;
+		l2cap_pi(sk)->dcid = 0x0001;
+		l2cap_pi(sk)->omtu = L2CAP_DEFAULT_MTU;
+	}
+
+	__l2cap_chan_link(l, sk);
+
+	if (parent)
+		bt_accept_enqueue(parent, sk);
+}
+
+/* Delete channel. 
+ * Must be called on the locked socket. */
+static void l2cap_chan_del(struct sock *sk, int err)
+{
+	struct l2cap_conn *conn = l2cap_pi(sk)->conn;
+	struct sock *parent = bt_sk(sk)->parent;
+
+	l2cap_sock_clear_timer(sk);
+
+	BT_DBG("sk %p, conn %p, err %d", sk, conn, err);
+
+	if (conn) { 
+		/* Unlink from channel list */
+		l2cap_chan_unlink(&conn->chan_list, sk);
+		l2cap_pi(sk)->conn = NULL;
+		hci_conn_put(conn->hcon);
+	}
+
+	sk->sk_state  = BT_CLOSED;
+	sock_set_flag(sk, SOCK_ZAPPED);
+
+	if (err)
+		sk->sk_err = err;
+
+	if (parent) {
+		bt_accept_unlink(sk);
+		parent->sk_data_ready(parent, 0);
+	} else
+		sk->sk_state_change(sk);
+}
+
+static void l2cap_conn_ready(struct l2cap_conn *conn)
+{
+	struct l2cap_chan_list *l = &conn->chan_list;
+	struct sock *sk;
+
+	BT_DBG("conn %p", conn);
+
+	read_lock(&l->lock);
+
+	for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) {
+		bh_lock_sock(sk);
+
+		if (sk->sk_type != SOCK_SEQPACKET) {
+			l2cap_sock_clear_timer(sk);
+			sk->sk_state = BT_CONNECTED;
+			sk->sk_state_change(sk);
+		} else if (sk->sk_state == BT_CONNECT) {
+			struct l2cap_conn_req req;
+			l2cap_pi(sk)->ident = l2cap_get_ident(conn);
+			req.scid = __cpu_to_le16(l2cap_pi(sk)->scid);
+			req.psm  = l2cap_pi(sk)->psm;
+			l2cap_send_cmd(conn, l2cap_pi(sk)->ident, L2CAP_CONN_REQ, sizeof(req), &req);
+		}
+
+		bh_unlock_sock(sk);
+	}
+
+	read_unlock(&l->lock);
+}
+
+/* Notify sockets that we cannot guaranty reliability anymore */
+static void l2cap_conn_unreliable(struct l2cap_conn *conn, int err)
+{
+	struct l2cap_chan_list *l = &conn->chan_list;
+	struct sock *sk;
+
+	BT_DBG("conn %p", conn);
+
+	read_lock(&l->lock);
+	for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) {
+		if (l2cap_pi(sk)->link_mode & L2CAP_LM_RELIABLE)
+			sk->sk_err = err;
+	}
+	read_unlock(&l->lock);
+}
+
+static void l2cap_chan_ready(struct sock *sk)
+{
+	struct sock *parent = bt_sk(sk)->parent;
+
+	BT_DBG("sk %p, parent %p", sk, parent);
+
+	l2cap_pi(sk)->conf_state = 0;
+	l2cap_sock_clear_timer(sk);
+
+	if (!parent) {
+		/* Outgoing channel.
+		 * Wake up socket sleeping on connect.
+		 */
+		sk->sk_state = BT_CONNECTED;
+		sk->sk_state_change(sk);
+	} else {
+		/* Incoming channel.
+		 * Wake up socket sleeping on accept.
+		 */
+		parent->sk_data_ready(parent, 0);
+	}
+}
+
+/* Copy frame to all raw sockets on that connection */
+static void l2cap_raw_recv(struct l2cap_conn *conn, struct sk_buff *skb)
+{
+	struct l2cap_chan_list *l = &conn->chan_list;
+	struct sk_buff *nskb;
+	struct sock * sk;
+
+	BT_DBG("conn %p", conn);
+
+	read_lock(&l->lock);
+	for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) {
+		if (sk->sk_type != SOCK_RAW)
+			continue;
+
+		/* Don't send frame to the socket it came from */
+		if (skb->sk == sk)
+			continue;
+
+		if (!(nskb = skb_clone(skb, GFP_ATOMIC)))
+			continue;
+
+		if (sock_queue_rcv_skb(sk, nskb))
+			kfree_skb(nskb);
+	}
+	read_unlock(&l->lock);
+}
+
+/* ---- L2CAP signalling commands ---- */
+static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn,
+				u8 code, u8 ident, u16 dlen, void *data)
+{
+	struct sk_buff *skb, **frag;
+	struct l2cap_cmd_hdr *cmd;
+	struct l2cap_hdr *lh;
+	int len, count;
+
+	BT_DBG("conn %p, code 0x%2.2x, ident 0x%2.2x, len %d", conn, code, ident, dlen);
+
+	len = L2CAP_HDR_SIZE + L2CAP_CMD_HDR_SIZE + dlen;
+	count = min_t(unsigned int, conn->mtu, len);
+
+	skb = bt_skb_alloc(count, GFP_ATOMIC);
+	if (!skb)
+		return NULL;
+
+	lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
+	lh->len = __cpu_to_le16(L2CAP_CMD_HDR_SIZE + dlen);
+	lh->cid = __cpu_to_le16(0x0001);
+
+	cmd = (struct l2cap_cmd_hdr *) skb_put(skb, L2CAP_CMD_HDR_SIZE);
+	cmd->code  = code;
+	cmd->ident = ident;
+	cmd->len   = __cpu_to_le16(dlen);
+
+	if (dlen) {
+		count -= L2CAP_HDR_SIZE + L2CAP_CMD_HDR_SIZE;
+		memcpy(skb_put(skb, count), data, count);
+		data += count;
+	}
+
+	len -= skb->len;
+
+	/* Continuation fragments (no L2CAP header) */
+	frag = &skb_shinfo(skb)->frag_list;
+	while (len) {
+		count = min_t(unsigned int, conn->mtu, len);
+
+		*frag = bt_skb_alloc(count, GFP_ATOMIC);
+		if (!*frag)
+			goto fail;
+
+		memcpy(skb_put(*frag, count), data, count);
+
+		len  -= count;
+		data += count;
+
+		frag = &(*frag)->next;
+	}
+
+	return skb;
+
+fail:
+	kfree_skb(skb);
+	return NULL;
+}
+
+static inline int l2cap_get_conf_opt(void **ptr, int *type, int *olen, unsigned long *val)
+{
+	struct l2cap_conf_opt *opt = *ptr;
+	int len;
+
+	len = L2CAP_CONF_OPT_SIZE + opt->len;
+	*ptr += len;
+
+	*type = opt->type;
+	*olen = opt->len;
+
+	switch (opt->len) {
+	case 1:
+		*val = *((u8 *) opt->val);
+		break;
+
+	case 2:
+		*val = __le16_to_cpu(*((u16 *)opt->val));
+		break;
+
+	case 4:
+		*val = __le32_to_cpu(*((u32 *)opt->val));
+		break;
+
+	default:
+		*val = (unsigned long) opt->val;
+		break;
+	}
+
+	BT_DBG("type 0x%2.2x len %d val 0x%lx", *type, opt->len, *val);
+	return len;
+}
+
+static inline void l2cap_parse_conf_req(struct sock *sk, void *data, int len)
+{
+	int type, hint, olen; 
+	unsigned long val;
+	void *ptr = data;
+
+	BT_DBG("sk %p len %d", sk, len);
+
+	while (len >= L2CAP_CONF_OPT_SIZE) {
+		len -= l2cap_get_conf_opt(&ptr, &type, &olen, &val);
+
+		hint  = type & 0x80;
+		type &= 0x7f;
+
+		switch (type) {
+		case L2CAP_CONF_MTU:
+			l2cap_pi(sk)->conf_mtu = val;
+			break;
+
+		case L2CAP_CONF_FLUSH_TO:
+			l2cap_pi(sk)->flush_to = val;
+			break;
+
+		case L2CAP_CONF_QOS:
+			break;
+
+		default:
+			if (hint)
+				break;
+
+			/* FIXME: Reject unknown option */
+			break;
+		}
+	}
+}
+
+static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val)
+{
+	struct l2cap_conf_opt *opt = *ptr;
+
+	BT_DBG("type 0x%2.2x len %d val 0x%lx", type, len, val);
+
+	opt->type = type;
+	opt->len  = len;
+
+	switch (len) {
+	case 1:
+		*((u8 *) opt->val)  = val;
+		break;
+
+	case 2:
+		*((u16 *) opt->val) = __cpu_to_le16(val);
+		break;
+
+	case 4:
+		*((u32 *) opt->val) = __cpu_to_le32(val);
+		break;
+
+	default:
+		memcpy(opt->val, (void *) val, len);
+		break;
+	}
+
+	*ptr += L2CAP_CONF_OPT_SIZE + len;
+}
+
+static int l2cap_build_conf_req(struct sock *sk, void *data)
+{
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	struct l2cap_conf_req *req = data;
+	void *ptr = req->data;
+
+	BT_DBG("sk %p", sk);
+
+	if (pi->imtu != L2CAP_DEFAULT_MTU)
+		l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->imtu);
+
+	/* FIXME: Need actual value of the flush timeout */
+	//if (flush_to != L2CAP_DEFAULT_FLUSH_TO)
+	//   l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, 2, pi->flush_to);
+
+	req->dcid  = __cpu_to_le16(pi->dcid);
+	req->flags = __cpu_to_le16(0);
+
+	return ptr - data;
+}
+
+static inline int l2cap_conf_output(struct sock *sk, void **ptr)
+{
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	int result = 0;
+
+	/* Configure output options and let the other side know
+	 * which ones we don't like. */
+	if (pi->conf_mtu < pi->omtu) {
+		l2cap_add_conf_opt(ptr, L2CAP_CONF_MTU, 2, pi->omtu);
+		result = L2CAP_CONF_UNACCEPT;
+	} else {
+		pi->omtu = pi->conf_mtu;
+	}
+
+	BT_DBG("sk %p result %d", sk, result);
+	return result;
+}
+
+static int l2cap_build_conf_rsp(struct sock *sk, void *data, int *result)
+{
+	struct l2cap_conf_rsp *rsp = data;
+	void *ptr = rsp->data;
+	u16 flags = 0;
+
+	BT_DBG("sk %p complete %d", sk, result ? 1 : 0);
+
+	if (result)
+		*result = l2cap_conf_output(sk, &ptr);
+	else
+		flags = 0x0001;
+
+	rsp->scid   = __cpu_to_le16(l2cap_pi(sk)->dcid);
+	rsp->result = __cpu_to_le16(result ? *result : 0);
+	rsp->flags  = __cpu_to_le16(flags);
+
+	return ptr - data;
+}
+
+static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data)
+{
+	struct l2cap_chan_list *list = &conn->chan_list;
+	struct l2cap_conn_req *req = (struct l2cap_conn_req *) data;
+	struct l2cap_conn_rsp rsp;
+	struct sock *sk, *parent;
+	int result = 0, status = 0;
+
+	u16 dcid = 0, scid = __le16_to_cpu(req->scid);
+	u16 psm  = req->psm;
+
+	BT_DBG("psm 0x%2.2x scid 0x%4.4x", psm, scid);
+
+	/* Check if we have socket listening on psm */
+	parent = l2cap_get_sock_by_psm(BT_LISTEN, psm, conn->src);
+	if (!parent) {
+		result = L2CAP_CR_BAD_PSM;
+		goto sendresp;
+	}
+
+	result = L2CAP_CR_NO_MEM;
+
+	/* Check for backlog size */
+	if (sk_acceptq_is_full(parent)) {
+		BT_DBG("backlog full %d", parent->sk_ack_backlog); 
+		goto response;
+	}
+
+	sk = l2cap_sock_alloc(NULL, BTPROTO_L2CAP, GFP_ATOMIC);
+	if (!sk)
+		goto response;
+
+	write_lock(&list->lock);
+
+	/* Check if we already have channel with that dcid */
+	if (__l2cap_get_chan_by_dcid(list, scid)) {
+		write_unlock(&list->lock);
+		sock_set_flag(sk, SOCK_ZAPPED);
+		l2cap_sock_kill(sk);
+		goto response;
+	}
+
+	hci_conn_hold(conn->hcon);
+
+	l2cap_sock_init(sk, parent);
+	bacpy(&bt_sk(sk)->src, conn->src);
+	bacpy(&bt_sk(sk)->dst, conn->dst);
+	l2cap_pi(sk)->psm  = psm;
+	l2cap_pi(sk)->dcid = scid;
+
+	__l2cap_chan_add(conn, sk, parent);
+	dcid = l2cap_pi(sk)->scid;
+
+	l2cap_sock_set_timer(sk, sk->sk_sndtimeo);
+
+	/* Service level security */
+	result = L2CAP_CR_PEND;
+	status = L2CAP_CS_AUTHEN_PEND;
+	sk->sk_state = BT_CONNECT2;
+	l2cap_pi(sk)->ident = cmd->ident;
+
+	if ((l2cap_pi(sk)->link_mode & L2CAP_LM_ENCRYPT) ||
+			(l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE)) {
+		if (!hci_conn_encrypt(conn->hcon))
+			goto done;
+	} else if (l2cap_pi(sk)->link_mode & L2CAP_LM_AUTH) {
+		if (!hci_conn_auth(conn->hcon))
+			goto done;
+	}
+
+	sk->sk_state = BT_CONFIG;
+	result = status = 0;
+
+done:
+	write_unlock(&list->lock);
+
+response:
+	bh_unlock_sock(parent);
+
+sendresp:
+	rsp.scid   = __cpu_to_le16(scid);
+	rsp.dcid   = __cpu_to_le16(dcid);
+	rsp.result = __cpu_to_le16(result);
+	rsp.status = __cpu_to_le16(status);
+	l2cap_send_cmd(conn, cmd->ident, L2CAP_CONN_RSP, sizeof(rsp), &rsp);
+	return 0;
+}
+
+static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data)
+{
+	struct l2cap_conn_rsp *rsp = (struct l2cap_conn_rsp *) data;
+	u16 scid, dcid, result, status;
+	struct sock *sk;
+	u8 req[128];
+
+	scid   = __le16_to_cpu(rsp->scid);
+	dcid   = __le16_to_cpu(rsp->dcid);
+	result = __le16_to_cpu(rsp->result);
+	status = __le16_to_cpu(rsp->status);
+
+	BT_DBG("dcid 0x%4.4x scid 0x%4.4x result 0x%2.2x status 0x%2.2x", dcid, scid, result, status);
+
+	if (scid) {
+		if (!(sk = l2cap_get_chan_by_scid(&conn->chan_list, scid)))
+			return 0;
+	} else {
+		if (!(sk = l2cap_get_chan_by_ident(&conn->chan_list, cmd->ident)))
+			return 0;
+	}
+
+	switch (result) {
+	case L2CAP_CR_SUCCESS:
+		sk->sk_state = BT_CONFIG;
+		l2cap_pi(sk)->ident = 0;
+		l2cap_pi(sk)->dcid = dcid;
+		l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
+
+		l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
+					l2cap_build_conf_req(sk, req), req);
+		break;
+
+	case L2CAP_CR_PEND:
+		break;
+
+	default:
+		l2cap_chan_del(sk, ECONNREFUSED);
+		break;
+	}
+
+	bh_unlock_sock(sk);
+	return 0;
+}
+
+static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data)
+{
+	struct l2cap_conf_req *req = (struct l2cap_conf_req *) data;
+	u16 dcid, flags;
+	u8 rsp[64];
+	struct sock *sk;
+	int result;
+
+	dcid  = __le16_to_cpu(req->dcid);
+	flags = __le16_to_cpu(req->flags);
+
+	BT_DBG("dcid 0x%4.4x flags 0x%2.2x", dcid, flags);
+
+	if (!(sk = l2cap_get_chan_by_scid(&conn->chan_list, dcid)))
+		return -ENOENT;
+
+	l2cap_parse_conf_req(sk, req->data, cmd->len - sizeof(*req));
+
+	if (flags & 0x0001) {
+		/* Incomplete config. Send empty response. */
+		l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP,
+				l2cap_build_conf_rsp(sk, rsp, NULL), rsp);
+		goto unlock;
+	}
+
+	/* Complete config. */
+	l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP,
+			l2cap_build_conf_rsp(sk, rsp, &result), rsp);
+
+	if (result)
+		goto unlock;
+
+	/* Output config done */
+	l2cap_pi(sk)->conf_state |= L2CAP_CONF_OUTPUT_DONE;
+
+	if (l2cap_pi(sk)->conf_state & L2CAP_CONF_INPUT_DONE) {
+		sk->sk_state = BT_CONNECTED;
+		l2cap_chan_ready(sk);
+	} else if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT)) {
+		u8 req[64];
+		l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
+					l2cap_build_conf_req(sk, req), req);
+	}
+
+unlock:
+	bh_unlock_sock(sk);
+	return 0;
+}
+
+static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data)
+{
+	struct l2cap_conf_rsp *rsp = (struct l2cap_conf_rsp *)data;
+	u16 scid, flags, result;
+	struct sock *sk;
+
+	scid   = __le16_to_cpu(rsp->scid);
+	flags  = __le16_to_cpu(rsp->flags);
+	result = __le16_to_cpu(rsp->result);
+
+	BT_DBG("scid 0x%4.4x flags 0x%2.2x result 0x%2.2x", scid, flags, result);
+
+	if (!(sk = l2cap_get_chan_by_scid(&conn->chan_list, scid)))
+		return 0;
+
+	switch (result) {
+	case L2CAP_CONF_SUCCESS:
+		break;
+
+	case L2CAP_CONF_UNACCEPT:
+		if (++l2cap_pi(sk)->conf_retry < L2CAP_CONF_MAX_RETRIES) {
+			char req[128];
+			/* It does not make sense to adjust L2CAP parameters
+			 * that are currently defined in the spec. We simply
+			 * resend config request that we sent earlier. It is
+			 * stupid, but it helps qualification testing which
+			 * expects at least some response from us. */
+			l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
+						l2cap_build_conf_req(sk, req), req);
+			goto done;
+		}
+
+	default: 
+		sk->sk_state = BT_DISCONN;
+		sk->sk_err   = ECONNRESET;
+		l2cap_sock_set_timer(sk, HZ * 5);
+		{
+			struct l2cap_disconn_req req;
+			req.dcid = __cpu_to_le16(l2cap_pi(sk)->dcid);
+			req.scid = __cpu_to_le16(l2cap_pi(sk)->scid);
+			l2cap_send_cmd(conn, l2cap_get_ident(conn),
+					L2CAP_DISCONN_REQ, sizeof(req), &req);
+		}
+		goto done;
+	}
+
+	if (flags & 0x01)
+		goto done;
+
+	/* Input config done */
+	l2cap_pi(sk)->conf_state |= L2CAP_CONF_INPUT_DONE;
+
+	if (l2cap_pi(sk)->conf_state & L2CAP_CONF_OUTPUT_DONE) {
+		sk->sk_state = BT_CONNECTED;
+		l2cap_chan_ready(sk);
+	}
+
+done:
+	bh_unlock_sock(sk);
+	return 0;
+}
+
+static inline int l2cap_disconnect_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data)
+{
+	struct l2cap_disconn_req *req = (struct l2cap_disconn_req *) data;
+	struct l2cap_disconn_rsp rsp;
+	u16 dcid, scid;
+	struct sock *sk;
+
+	scid = __le16_to_cpu(req->scid);
+	dcid = __le16_to_cpu(req->dcid);
+
+	BT_DBG("scid 0x%4.4x dcid 0x%4.4x", scid, dcid);
+
+	if (!(sk = l2cap_get_chan_by_scid(&conn->chan_list, dcid)))
+		return 0;
+
+	rsp.dcid = __cpu_to_le16(l2cap_pi(sk)->scid);
+	rsp.scid = __cpu_to_le16(l2cap_pi(sk)->dcid);
+	l2cap_send_cmd(conn, cmd->ident, L2CAP_DISCONN_RSP, sizeof(rsp), &rsp);
+
+	sk->sk_shutdown = SHUTDOWN_MASK;
+
+	l2cap_chan_del(sk, ECONNRESET);
+	bh_unlock_sock(sk);
+
+	l2cap_sock_kill(sk);
+	return 0;
+}
+
+static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data)
+{
+	struct l2cap_disconn_rsp *rsp = (struct l2cap_disconn_rsp *) data;
+	u16 dcid, scid;
+	struct sock *sk;
+
+	scid = __le16_to_cpu(rsp->scid);
+	dcid = __le16_to_cpu(rsp->dcid);
+
+	BT_DBG("dcid 0x%4.4x scid 0x%4.4x", dcid, scid);
+
+	if (!(sk = l2cap_get_chan_by_scid(&conn->chan_list, scid)))
+		return 0;
+
+	l2cap_chan_del(sk, 0);
+	bh_unlock_sock(sk);
+
+	l2cap_sock_kill(sk);
+	return 0;
+}
+
+static inline int l2cap_information_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data)
+{
+	struct l2cap_info_req *req = (struct l2cap_info_req *) data;
+	struct l2cap_info_rsp rsp;
+	u16 type;
+
+	type = __le16_to_cpu(req->type);
+
+	BT_DBG("type 0x%4.4x", type);
+
+	rsp.type   = __cpu_to_le16(type);
+	rsp.result = __cpu_to_le16(L2CAP_IR_NOTSUPP);
+	l2cap_send_cmd(conn, cmd->ident, L2CAP_INFO_RSP, sizeof(rsp), &rsp);
+
+	return 0;
+}
+
+static inline int l2cap_information_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data)
+{
+	struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) data;
+	u16 type, result;
+
+	type   = __le16_to_cpu(rsp->type);
+	result = __le16_to_cpu(rsp->result);
+
+	BT_DBG("type 0x%4.4x result 0x%2.2x", type, result);
+
+	return 0;
+}
+
+static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb)
+{
+	u8 *data = skb->data;
+	int len = skb->len;
+	struct l2cap_cmd_hdr cmd;
+	int err = 0;
+
+	l2cap_raw_recv(conn, skb);
+
+	while (len >= L2CAP_CMD_HDR_SIZE) {
+		memcpy(&cmd, data, L2CAP_CMD_HDR_SIZE);
+		data += L2CAP_CMD_HDR_SIZE;
+		len  -= L2CAP_CMD_HDR_SIZE;
+
+		cmd.len = __le16_to_cpu(cmd.len);
+
+		BT_DBG("code 0x%2.2x len %d id 0x%2.2x", cmd.code, cmd.len, cmd.ident);
+
+		if (cmd.len > len || !cmd.ident) {
+			BT_DBG("corrupted command");
+			break;
+		}
+
+		switch (cmd.code) {
+		case L2CAP_COMMAND_REJ:
+			/* FIXME: We should process this */
+			break;
+
+		case L2CAP_CONN_REQ:
+			err = l2cap_connect_req(conn, &cmd, data);
+			break;
+
+		case L2CAP_CONN_RSP:
+			err = l2cap_connect_rsp(conn, &cmd, data);
+			break;
+
+		case L2CAP_CONF_REQ:
+			err = l2cap_config_req(conn, &cmd, data);
+			break;
+
+		case L2CAP_CONF_RSP:
+			err = l2cap_config_rsp(conn, &cmd, data);
+			break;
+
+		case L2CAP_DISCONN_REQ:
+			err = l2cap_disconnect_req(conn, &cmd, data);
+			break;
+
+		case L2CAP_DISCONN_RSP:
+			err = l2cap_disconnect_rsp(conn, &cmd, data);
+			break;
+
+		case L2CAP_ECHO_REQ:
+			l2cap_send_cmd(conn, cmd.ident, L2CAP_ECHO_RSP, cmd.len, data);
+			break;
+
+		case L2CAP_ECHO_RSP:
+			break;
+
+		case L2CAP_INFO_REQ:
+			err = l2cap_information_req(conn, &cmd, data);
+			break;
+
+		case L2CAP_INFO_RSP:
+			err = l2cap_information_rsp(conn, &cmd, data);
+			break;
+
+		default:
+			BT_ERR("Unknown signaling command 0x%2.2x", cmd.code);
+			err = -EINVAL;
+			break;
+		}
+
+		if (err) {
+			struct l2cap_cmd_rej rej;
+			BT_DBG("error %d", err);
+
+			/* FIXME: Map err to a valid reason */
+			rej.reason = __cpu_to_le16(0);
+			l2cap_send_cmd(conn, cmd.ident, L2CAP_COMMAND_REJ, sizeof(rej), &rej);
+		}
+
+		data += cmd.len;
+		len  -= cmd.len;
+	}
+
+	kfree_skb(skb);
+}
+
+static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk_buff *skb)
+{
+	struct sock *sk;
+
+	sk = l2cap_get_chan_by_scid(&conn->chan_list, cid);
+	if (!sk) {
+		BT_DBG("unknown cid 0x%4.4x", cid);
+		goto drop;
+	}
+
+	BT_DBG("sk %p, len %d", sk, skb->len);
+
+	if (sk->sk_state != BT_CONNECTED)
+		goto drop;
+
+	if (l2cap_pi(sk)->imtu < skb->len)
+		goto drop;
+
+	/* If socket recv buffers overflows we drop data here
+	 * which is *bad* because L2CAP has to be reliable.
+	 * But we don't have any other choice. L2CAP doesn't
+	 * provide flow control mechanism. */
+
+	if (!sock_queue_rcv_skb(sk, skb))
+		goto done;
+
+drop:
+	kfree_skb(skb);
+
+done:
+	if (sk) bh_unlock_sock(sk);
+	return 0;
+}
+
+static inline int l2cap_conless_channel(struct l2cap_conn *conn, u16 psm, struct sk_buff *skb)
+{
+	struct sock *sk;
+
+	sk = l2cap_get_sock_by_psm(0, psm, conn->src);
+	if (!sk)
+		goto drop;
+
+	BT_DBG("sk %p, len %d", sk, skb->len);
+
+	if (sk->sk_state != BT_BOUND && sk->sk_state != BT_CONNECTED)
+		goto drop;
+
+	if (l2cap_pi(sk)->imtu < skb->len)
+		goto drop;
+
+	if (!sock_queue_rcv_skb(sk, skb))
+		goto done;
+
+drop:
+	kfree_skb(skb);
+
+done:
+	if (sk) bh_unlock_sock(sk);
+	return 0;
+}
+
+static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb)
+{
+	struct l2cap_hdr *lh = (void *) skb->data;
+	u16 cid, psm, len;
+
+	skb_pull(skb, L2CAP_HDR_SIZE);
+	cid = __le16_to_cpu(lh->cid);
+	len = __le16_to_cpu(lh->len);
+
+	BT_DBG("len %d, cid 0x%4.4x", len, cid);
+
+	switch (cid) {
+	case 0x0001:
+		l2cap_sig_channel(conn, skb);
+		break;
+
+	case 0x0002:
+		psm = get_unaligned((u16 *) skb->data);
+		skb_pull(skb, 2);
+		l2cap_conless_channel(conn, psm, skb);
+		break;
+
+	default:
+		l2cap_data_channel(conn, cid, skb);
+		break;
+	}
+}
+
+/* ---- L2CAP interface with lower layer (HCI) ---- */
+
+static int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type)
+{
+	int exact = 0, lm1 = 0, lm2 = 0;
+	register struct sock *sk;
+	struct hlist_node *node;
+
+	if (type != ACL_LINK)
+		return 0;
+
+	BT_DBG("hdev %s, bdaddr %s", hdev->name, batostr(bdaddr));
+
+	/* Find listening sockets and check their link_mode */
+	read_lock(&l2cap_sk_list.lock);
+	sk_for_each(sk, node, &l2cap_sk_list.head) {
+		if (sk->sk_state != BT_LISTEN)
+			continue;
+
+		if (!bacmp(&bt_sk(sk)->src, &hdev->bdaddr)) {
+			lm1 |= (HCI_LM_ACCEPT | l2cap_pi(sk)->link_mode);
+			exact++;
+		} else if (!bacmp(&bt_sk(sk)->src, BDADDR_ANY))
+			lm2 |= (HCI_LM_ACCEPT | l2cap_pi(sk)->link_mode);
+	}
+	read_unlock(&l2cap_sk_list.lock);
+
+	return exact ? lm1 : lm2;
+}
+
+static int l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
+{
+	BT_DBG("hcon %p bdaddr %s status %d", hcon, batostr(&hcon->dst), status);
+
+	if (hcon->type != ACL_LINK)
+		return 0;
+
+	if (!status) {
+		struct l2cap_conn *conn;
+
+		conn = l2cap_conn_add(hcon, status);
+		if (conn)
+			l2cap_conn_ready(conn);
+	} else 
+		l2cap_conn_del(hcon, bt_err(status));
+
+	return 0;
+}
+
+static int l2cap_disconn_ind(struct hci_conn *hcon, u8 reason)
+{
+	BT_DBG("hcon %p reason %d", hcon, reason);
+
+	if (hcon->type != ACL_LINK)
+		return 0;
+
+	l2cap_conn_del(hcon, bt_err(reason));
+	return 0;
+}
+
+static int l2cap_auth_cfm(struct hci_conn *hcon, u8 status)
+{
+	struct l2cap_chan_list *l;
+	struct l2cap_conn *conn;
+	struct l2cap_conn_rsp rsp;
+	struct sock *sk;
+	int result;
+
+	if (!(conn = hcon->l2cap_data))
+		return 0;
+	l = &conn->chan_list;
+
+	BT_DBG("conn %p", conn);
+
+	read_lock(&l->lock);
+
+	for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) {
+		bh_lock_sock(sk);
+
+		if (sk->sk_state != BT_CONNECT2 ||
+				(l2cap_pi(sk)->link_mode & L2CAP_LM_ENCRYPT) ||
+				(l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE)) {
+			bh_unlock_sock(sk);
+			continue;
+		}
+
+		if (!status) {
+			sk->sk_state = BT_CONFIG;
+			result = 0;
+		} else {
+			sk->sk_state = BT_DISCONN;
+			l2cap_sock_set_timer(sk, HZ/10);
+			result = L2CAP_CR_SEC_BLOCK;
+		}
+
+		rsp.scid   = __cpu_to_le16(l2cap_pi(sk)->dcid);
+		rsp.dcid   = __cpu_to_le16(l2cap_pi(sk)->scid);
+		rsp.result = __cpu_to_le16(result);
+		rsp.status = __cpu_to_le16(0);
+		l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
+				L2CAP_CONN_RSP, sizeof(rsp), &rsp);
+
+		bh_unlock_sock(sk);
+	}
+
+	read_unlock(&l->lock);
+	return 0;
+}
+
+static int l2cap_encrypt_cfm(struct hci_conn *hcon, u8 status)
+{
+	struct l2cap_chan_list *l;
+	struct l2cap_conn *conn;
+	struct l2cap_conn_rsp rsp;
+	struct sock *sk;
+	int result;
+
+	if (!(conn = hcon->l2cap_data))
+		return 0;
+	l = &conn->chan_list;
+
+	BT_DBG("conn %p", conn);
+
+	read_lock(&l->lock);
+
+	for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) {
+		bh_lock_sock(sk);
+
+		if (sk->sk_state != BT_CONNECT2) {
+			bh_unlock_sock(sk);
+			continue;
+		}
+
+		if (!status) {
+			sk->sk_state = BT_CONFIG;
+			result = 0;
+		} else {
+			sk->sk_state = BT_DISCONN;
+			l2cap_sock_set_timer(sk, HZ/10);
+			result = L2CAP_CR_SEC_BLOCK;
+		}
+
+		rsp.scid   = __cpu_to_le16(l2cap_pi(sk)->dcid);
+		rsp.dcid   = __cpu_to_le16(l2cap_pi(sk)->scid);
+		rsp.result = __cpu_to_le16(result);
+		rsp.status = __cpu_to_le16(0);
+		l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
+				L2CAP_CONN_RSP, sizeof(rsp), &rsp);
+
+		if (l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE)
+			hci_conn_change_link_key(hcon);
+
+		bh_unlock_sock(sk);
+	}
+
+	read_unlock(&l->lock);
+	return 0;
+}
+
+static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
+{
+	struct l2cap_conn *conn = hcon->l2cap_data;
+
+	if (!conn && !(conn = l2cap_conn_add(hcon, 0)))
+		goto drop;
+
+	BT_DBG("conn %p len %d flags 0x%x", conn, skb->len, flags);
+
+	if (flags & ACL_START) {
+		struct l2cap_hdr *hdr;
+		int len;
+
+		if (conn->rx_len) {
+			BT_ERR("Unexpected start frame (len %d)", skb->len);
+			kfree_skb(conn->rx_skb);
+			conn->rx_skb = NULL;
+			conn->rx_len = 0;
+			l2cap_conn_unreliable(conn, ECOMM);
+		}
+
+		if (skb->len < 2) {
+			BT_ERR("Frame is too short (len %d)", skb->len);
+			l2cap_conn_unreliable(conn, ECOMM);
+			goto drop;
+		}
+
+		hdr = (struct l2cap_hdr *) skb->data;
+		len = __le16_to_cpu(hdr->len) + L2CAP_HDR_SIZE;
+
+		if (len == skb->len) {
+			/* Complete frame received */
+			l2cap_recv_frame(conn, skb);
+			return 0;
+		}
+
+		BT_DBG("Start: total len %d, frag len %d", len, skb->len);
+
+		if (skb->len > len) {
+			BT_ERR("Frame is too long (len %d, expected len %d)",
+				skb->len, len);
+			l2cap_conn_unreliable(conn, ECOMM);
+			goto drop;
+		}
+
+		/* Allocate skb for the complete frame (with header) */
+		if (!(conn->rx_skb = bt_skb_alloc(len, GFP_ATOMIC)))
+			goto drop;
+
+		memcpy(skb_put(conn->rx_skb, skb->len), skb->data, skb->len);
+		conn->rx_len = len - skb->len;
+	} else {
+		BT_DBG("Cont: frag len %d (expecting %d)", skb->len, conn->rx_len);
+
+		if (!conn->rx_len) {
+			BT_ERR("Unexpected continuation frame (len %d)", skb->len);
+			l2cap_conn_unreliable(conn, ECOMM);
+			goto drop;
+		}
+
+		if (skb->len > conn->rx_len) {
+			BT_ERR("Fragment is too long (len %d, expected %d)",
+					skb->len, conn->rx_len);
+			kfree_skb(conn->rx_skb);
+			conn->rx_skb = NULL;
+			conn->rx_len = 0;
+			l2cap_conn_unreliable(conn, ECOMM);
+			goto drop;
+		}
+
+		memcpy(skb_put(conn->rx_skb, skb->len), skb->data, skb->len);
+		conn->rx_len -= skb->len;
+
+		if (!conn->rx_len) {
+			/* Complete frame received */
+			l2cap_recv_frame(conn, conn->rx_skb);
+			conn->rx_skb = NULL;
+		}
+	}
+
+drop:
+	kfree_skb(skb);
+	return 0;
+}
+
+/* ---- Proc fs support ---- */
+#ifdef CONFIG_PROC_FS
+static void *l2cap_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct sock *sk;
+	struct hlist_node *node;
+	loff_t l = *pos;
+
+	read_lock_bh(&l2cap_sk_list.lock);
+
+	sk_for_each(sk, node, &l2cap_sk_list.head)
+		if (!l--)
+			goto found;
+	sk = NULL;
+found:
+	return sk;
+}
+
+static void *l2cap_seq_next(struct seq_file *seq, void *e, loff_t *pos)
+{
+	(*pos)++;
+	return sk_next(e);
+}
+
+static void l2cap_seq_stop(struct seq_file *seq, void *e)
+{
+	read_unlock_bh(&l2cap_sk_list.lock);
+}
+
+static int  l2cap_seq_show(struct seq_file *seq, void *e)
+{
+	struct sock *sk = e;
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+
+	seq_printf(seq, "%s %s %d %d 0x%4.4x 0x%4.4x %d %d 0x%x\n",
+			batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst), 
+			sk->sk_state, pi->psm, pi->scid, pi->dcid, pi->imtu,
+			pi->omtu, pi->link_mode);
+	return 0;
+}
+
+static struct seq_operations l2cap_seq_ops = {
+	.start	= l2cap_seq_start,
+	.next	= l2cap_seq_next,
+	.stop	= l2cap_seq_stop,
+	.show	= l2cap_seq_show 
+};
+
+static int l2cap_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &l2cap_seq_ops);
+}
+
+static struct file_operations l2cap_seq_fops = {
+	.owner		= THIS_MODULE,
+	.open		= l2cap_seq_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static int __init l2cap_proc_init(void)
+{
+	struct proc_dir_entry *p = create_proc_entry("l2cap", S_IRUGO, proc_bt);
+	if (!p)
+		return -ENOMEM;
+	p->owner     = THIS_MODULE;
+	p->proc_fops = &l2cap_seq_fops;
+	return 0;
+}
+
+static void __exit l2cap_proc_cleanup(void)
+{
+	remove_proc_entry("l2cap", proc_bt);
+}
+
+#else /* CONFIG_PROC_FS */
+
+static int __init l2cap_proc_init(void)
+{
+	return 0;
+}
+
+static void __exit l2cap_proc_cleanup(void)
+{
+	return;
+}
+#endif /* CONFIG_PROC_FS */
+
+static struct proto_ops l2cap_sock_ops = {
+	.family		= PF_BLUETOOTH,
+	.owner		= THIS_MODULE,
+	.release	= l2cap_sock_release,
+	.bind		= l2cap_sock_bind,
+	.connect	= l2cap_sock_connect,
+	.listen		= l2cap_sock_listen,
+	.accept		= l2cap_sock_accept,
+	.getname	= l2cap_sock_getname,
+	.sendmsg	= l2cap_sock_sendmsg,
+	.recvmsg	= bt_sock_recvmsg,
+	.poll		= bt_sock_poll,
+	.mmap		= sock_no_mmap,
+	.socketpair	= sock_no_socketpair,
+	.ioctl		= sock_no_ioctl,
+	.shutdown	= l2cap_sock_shutdown,
+	.setsockopt	= l2cap_sock_setsockopt,
+	.getsockopt	= l2cap_sock_getsockopt
+};
+
+static struct net_proto_family l2cap_sock_family_ops = {
+	.family	= PF_BLUETOOTH,
+	.owner	= THIS_MODULE,
+	.create	= l2cap_sock_create,
+};
+
+static struct hci_proto l2cap_hci_proto = {
+	.name		= "L2CAP",
+	.id		= HCI_PROTO_L2CAP,
+	.connect_ind	= l2cap_connect_ind,
+	.connect_cfm	= l2cap_connect_cfm,
+	.disconn_ind	= l2cap_disconn_ind,
+	.auth_cfm	= l2cap_auth_cfm,
+	.encrypt_cfm	= l2cap_encrypt_cfm,
+	.recv_acldata	= l2cap_recv_acldata
+};
+
+static int __init l2cap_init(void)
+{
+	int err;
+	
+	err = proto_register(&l2cap_proto, 0);
+	if (err < 0)
+		return err;
+
+	err = bt_sock_register(BTPROTO_L2CAP, &l2cap_sock_family_ops);
+	if (err < 0) {
+		BT_ERR("L2CAP socket registration failed");
+		goto error;
+	}
+
+	err = hci_register_proto(&l2cap_hci_proto);
+	if (err < 0) {
+		BT_ERR("L2CAP protocol registration failed");
+		bt_sock_unregister(BTPROTO_L2CAP);
+		goto error;
+	}
+
+	l2cap_proc_init();
+
+	BT_INFO("L2CAP ver %s", VERSION);
+	BT_INFO("L2CAP socket layer initialized");
+
+	return 0;
+
+error:
+	proto_unregister(&l2cap_proto);
+	return err;
+}
+
+static void __exit l2cap_exit(void)
+{
+	l2cap_proc_cleanup();
+
+	if (bt_sock_unregister(BTPROTO_L2CAP) < 0)
+		BT_ERR("L2CAP socket unregistration failed");
+
+	if (hci_unregister_proto(&l2cap_hci_proto) < 0)
+		BT_ERR("L2CAP protocol unregistration failed");
+
+	proto_unregister(&l2cap_proto);
+}
+
+void l2cap_load(void)
+{
+	/* Dummy function to trigger automatic L2CAP module loading by
+	 * other modules that use L2CAP sockets but don't use any other
+	 * symbols from it. */
+	return;
+}
+EXPORT_SYMBOL(l2cap_load);
+
+module_init(l2cap_init);
+module_exit(l2cap_exit);
+
+MODULE_AUTHOR("Maxim Krasnyansky <maxk@qualcomm.com>, Marcel Holtmann <marcel@holtmann.org>");
+MODULE_DESCRIPTION("Bluetooth L2CAP ver " VERSION);
+MODULE_VERSION(VERSION);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("bt-proto-0");
diff --git a/net/bluetooth/lib.c b/net/bluetooth/lib.c
new file mode 100644
index 00000000000..9efb0a09361
--- /dev/null
+++ b/net/bluetooth/lib.c
@@ -0,0 +1,178 @@
+/* 
+   BlueZ - Bluetooth protocol stack for Linux
+   Copyright (C) 2000-2001 Qualcomm Incorporated
+
+   Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License version 2 as
+   published by the Free Software Foundation;
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
+   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES 
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, 
+   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS 
+   SOFTWARE IS DISCLAIMED.
+*/
+
+/* Bluetooth kernel library. */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <asm/errno.h>
+
+#include <net/bluetooth/bluetooth.h>
+
+void bt_dump(char *pref, __u8 *buf, int count)
+{
+	char *ptr;
+	char line[100];
+	unsigned int i;
+
+	printk(KERN_INFO "%s: dump, len %d\n", pref, count);
+
+	ptr = line;
+	*ptr = 0;
+	for (i = 0; i < count; i++) {
+		ptr += sprintf(ptr, " %2.2X", buf[i]);
+
+		if (i && !((i + 1) % 20)) {
+			printk(KERN_INFO "%s:%s\n", pref, line);
+			ptr = line;
+			*ptr = 0;
+		}
+	}
+
+	if (line[0])
+		printk(KERN_INFO "%s:%s\n", pref, line);
+}
+EXPORT_SYMBOL(bt_dump);
+
+void baswap(bdaddr_t *dst, bdaddr_t *src)
+{
+	unsigned char *d = (unsigned char *) dst;
+	unsigned char *s = (unsigned char *) src;
+	unsigned int i;
+
+	for (i = 0; i < 6; i++)
+		d[i] = s[5 - i];
+}
+EXPORT_SYMBOL(baswap);
+
+char *batostr(bdaddr_t *ba)
+{
+	static char str[2][18];
+	static int i = 1;
+
+	i ^= 1;
+	sprintf(str[i], "%2.2X:%2.2X:%2.2X:%2.2X:%2.2X:%2.2X",
+		ba->b[0], ba->b[1], ba->b[2],
+		ba->b[3], ba->b[4], ba->b[5]);
+
+	return str[i];
+}
+EXPORT_SYMBOL(batostr);
+
+/* Bluetooth error codes to Unix errno mapping */
+int bt_err(__u16 code)
+{
+	switch (code) {
+	case 0:
+		return 0;
+
+	case 0x01:
+		return EBADRQC;
+
+	case 0x02:
+		return ENOTCONN;
+
+	case 0x03:
+		return EIO;
+
+	case 0x04:
+		return EHOSTDOWN;
+
+	case 0x05:
+		return EACCES;
+
+	case 0x06:
+		return EBADE;
+
+	case 0x07:
+		return ENOMEM;
+
+	case 0x08:
+		return ETIMEDOUT;
+
+	case 0x09:
+		return EMLINK;
+
+	case 0x0a:
+		return EMLINK;
+
+	case 0x0b:
+		return EALREADY;
+
+	case 0x0c:
+		return EBUSY;
+
+	case 0x0d:
+	case 0x0e:
+	case 0x0f:
+		return ECONNREFUSED;
+
+	case 0x10:
+		return ETIMEDOUT;
+
+	case 0x11:
+	case 0x27:
+	case 0x29:
+	case 0x20:
+		return EOPNOTSUPP;
+
+	case 0x12:
+		return EINVAL;
+
+	case 0x13:
+	case 0x14:
+	case 0x15:
+		return ECONNRESET;
+
+	case 0x16:
+		return ECONNABORTED;
+
+	case 0x17:
+		return ELOOP;
+
+	case 0x18:
+		return EACCES;
+
+	case 0x1a:
+		return EPROTONOSUPPORT;
+
+	case 0x1b:
+		return ECONNREFUSED;
+
+	case 0x19:
+	case 0x1e:
+	case 0x23:
+	case 0x24:
+	case 0x25:
+		return EPROTO;
+
+	default:
+		return ENOSYS;
+	}
+}
+EXPORT_SYMBOL(bt_err);
diff --git a/net/bluetooth/rfcomm/Kconfig b/net/bluetooth/rfcomm/Kconfig
new file mode 100644
index 00000000000..405a0e61e7d
--- /dev/null
+++ b/net/bluetooth/rfcomm/Kconfig
@@ -0,0 +1,17 @@
+config BT_RFCOMM
+	tristate "RFCOMM protocol support"
+	depends on BT && BT_L2CAP
+	help
+	  RFCOMM provides connection oriented stream transport.  RFCOMM
+	  support is required for Dialup Networking, OBEX and other Bluetooth
+	  applications.
+
+	  Say Y here to compile RFCOMM support into the kernel or say M to
+	  compile it as module (rfcomm).
+
+config BT_RFCOMM_TTY
+	bool "RFCOMM TTY support"
+	depends on BT_RFCOMM
+	help
+	  This option enables TTY emulation support for RFCOMM channels.
+
diff --git a/net/bluetooth/rfcomm/Makefile b/net/bluetooth/rfcomm/Makefile
new file mode 100644
index 00000000000..aecec45ec68
--- /dev/null
+++ b/net/bluetooth/rfcomm/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile for the Linux Bluetooth RFCOMM layer.
+#
+
+obj-$(CONFIG_BT_RFCOMM) += rfcomm.o
+
+rfcomm-y			:= core.o sock.o crc.o
+rfcomm-$(CONFIG_BT_RFCOMM_TTY)	+= tty.o
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
new file mode 100644
index 00000000000..e9e6fda66f1
--- /dev/null
+++ b/net/bluetooth/rfcomm/core.c
@@ -0,0 +1,2127 @@
+/* 
+   RFCOMM implementation for Linux Bluetooth stack (BlueZ).
+   Copyright (C) 2002 Maxim Krasnyansky <maxk@qualcomm.com>
+   Copyright (C) 2002 Marcel Holtmann <marcel@holtmann.org>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License version 2 as
+   published by the Free Software Foundation;
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
+   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES 
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, 
+   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS 
+   SOFTWARE IS DISCLAIMED.
+*/
+
+/* 
+   RPN support    -    Dirk Husemann <hud@zurich.ibm.com>
+*/
+
+/*
+ * Bluetooth RFCOMM core.
+ *
+ * $Id: core.c,v 1.42 2002/10/01 23:26:25 maxk Exp $
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/init.h>
+#include <linux/wait.h>
+#include <linux/net.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <net/sock.h>
+#include <asm/uaccess.h>
+#include <asm/unaligned.h>
+
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
+#include <net/bluetooth/l2cap.h>
+#include <net/bluetooth/rfcomm.h>
+
+#define VERSION "1.5"
+
+#ifndef CONFIG_BT_RFCOMM_DEBUG
+#undef  BT_DBG
+#define BT_DBG(D...)
+#endif
+
+#ifdef CONFIG_PROC_FS
+struct proc_dir_entry *proc_bt_rfcomm;
+#endif
+
+static struct task_struct *rfcomm_thread;
+
+static DECLARE_MUTEX(rfcomm_sem);
+#define rfcomm_lock()	down(&rfcomm_sem);
+#define rfcomm_unlock()	up(&rfcomm_sem);
+
+static unsigned long rfcomm_event;
+
+static LIST_HEAD(session_list);
+static atomic_t terminate, running;
+
+static int rfcomm_send_frame(struct rfcomm_session *s, u8 *data, int len);
+static int rfcomm_send_sabm(struct rfcomm_session *s, u8 dlci);
+static int rfcomm_send_disc(struct rfcomm_session *s, u8 dlci);
+static int rfcomm_queue_disc(struct rfcomm_dlc *d);
+static int rfcomm_send_nsc(struct rfcomm_session *s, int cr, u8 type);
+static int rfcomm_send_pn(struct rfcomm_session *s, int cr, struct rfcomm_dlc *d);
+static int rfcomm_send_msc(struct rfcomm_session *s, int cr, u8 dlci, u8 v24_sig);
+static int rfcomm_send_test(struct rfcomm_session *s, int cr, u8 *pattern, int len);
+static int rfcomm_send_credits(struct rfcomm_session *s, u8 addr, u8 credits);
+static void rfcomm_make_uih(struct sk_buff *skb, u8 addr);
+
+static void rfcomm_process_connect(struct rfcomm_session *s);
+
+static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bdaddr_t *dst, int *err);
+static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst);
+static void rfcomm_session_del(struct rfcomm_session *s);
+
+/* ---- RFCOMM frame parsing macros ---- */
+#define __get_dlci(b)     ((b & 0xfc) >> 2)
+#define __get_channel(b)  ((b & 0xf8) >> 3)
+#define __get_dir(b)      ((b & 0x04) >> 2)
+#define __get_type(b)     ((b & 0xef))
+
+#define __test_ea(b)      ((b & 0x01))
+#define __test_cr(b)      ((b & 0x02))
+#define __test_pf(b)      ((b & 0x10))
+
+#define __addr(cr, dlci)       (((dlci & 0x3f) << 2) | (cr << 1) | 0x01)
+#define __ctrl(type, pf)       (((type & 0xef) | (pf << 4)))
+#define __dlci(dir, chn)       (((chn & 0x1f) << 1) | dir)
+#define __srv_channel(dlci)    (dlci >> 1)
+#define __dir(dlci)            (dlci & 0x01)
+
+#define __len8(len)       (((len) << 1) | 1)
+#define __len16(len)      ((len) << 1)
+
+/* MCC macros */
+#define __mcc_type(cr, type)   (((type << 2) | (cr << 1) | 0x01))
+#define __get_mcc_type(b) ((b & 0xfc) >> 2)
+#define __get_mcc_len(b)  ((b & 0xfe) >> 1)
+
+/* RPN macros */
+#define __rpn_line_settings(data, stop, parity)  ((data & 0x3) | ((stop & 0x1) << 2) | ((parity & 0x3) << 3))
+#define __get_rpn_data_bits(line) ((line) & 0x3)
+#define __get_rpn_stop_bits(line) (((line) >> 2) & 0x1)
+#define __get_rpn_parity(line)    (((line) >> 3) & 0x3)
+
+static inline void rfcomm_schedule(uint event)
+{
+	if (!rfcomm_thread)
+		return;
+	//set_bit(event, &rfcomm_event);
+	set_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event);
+	wake_up_process(rfcomm_thread);
+}
+
+static inline void rfcomm_session_put(struct rfcomm_session *s)
+{
+	if (atomic_dec_and_test(&s->refcnt))
+		rfcomm_session_del(s);
+}
+
+/* ---- RFCOMM FCS computation ---- */
+
+/* CRC on 2 bytes */
+#define __crc(data) (rfcomm_crc_table[rfcomm_crc_table[0xff ^ data[0]] ^ data[1]])
+
+/* FCS on 2 bytes */ 
+static inline u8 __fcs(u8 *data)
+{
+	return (0xff - __crc(data));
+}
+
+/* FCS on 3 bytes */ 
+static inline u8 __fcs2(u8 *data)
+{
+	return (0xff - rfcomm_crc_table[__crc(data) ^ data[2]]);
+}
+
+/* Check FCS */
+static inline int __check_fcs(u8 *data, int type, u8 fcs)
+{
+	u8 f = __crc(data);
+
+	if (type != RFCOMM_UIH)
+		f = rfcomm_crc_table[f ^ data[2]];
+
+	return rfcomm_crc_table[f ^ fcs] != 0xcf;
+}
+
+/* ---- L2CAP callbacks ---- */
+static void rfcomm_l2state_change(struct sock *sk)
+{
+	BT_DBG("%p state %d", sk, sk->sk_state);
+	rfcomm_schedule(RFCOMM_SCHED_STATE);
+}
+
+static void rfcomm_l2data_ready(struct sock *sk, int bytes)
+{
+	BT_DBG("%p bytes %d", sk, bytes);
+	rfcomm_schedule(RFCOMM_SCHED_RX);
+}
+
+static int rfcomm_l2sock_create(struct socket **sock)
+{
+	int err;
+
+	BT_DBG("");
+
+	err = sock_create_kern(PF_BLUETOOTH, SOCK_SEQPACKET, BTPROTO_L2CAP, sock);
+	if (!err) {
+		struct sock *sk = (*sock)->sk;
+		sk->sk_data_ready   = rfcomm_l2data_ready;
+		sk->sk_state_change = rfcomm_l2state_change;
+	}
+	return err;
+}
+
+/* ---- RFCOMM DLCs ---- */
+static void rfcomm_dlc_timeout(unsigned long arg)
+{
+	struct rfcomm_dlc *d = (void *) arg;
+
+	BT_DBG("dlc %p state %ld", d, d->state);
+
+	set_bit(RFCOMM_TIMED_OUT, &d->flags);
+	rfcomm_dlc_put(d);
+	rfcomm_schedule(RFCOMM_SCHED_TIMEO);
+}
+
+static void rfcomm_dlc_set_timer(struct rfcomm_dlc *d, long timeout)
+{
+	BT_DBG("dlc %p state %ld timeout %ld", d, d->state, timeout);
+
+	if (!mod_timer(&d->timer, jiffies + timeout))
+		rfcomm_dlc_hold(d);
+}
+
+static void rfcomm_dlc_clear_timer(struct rfcomm_dlc *d)
+{
+	BT_DBG("dlc %p state %ld", d, d->state);
+
+	if (timer_pending(&d->timer) && del_timer(&d->timer))
+		rfcomm_dlc_put(d);
+}
+
+static void rfcomm_dlc_clear_state(struct rfcomm_dlc *d)
+{
+	BT_DBG("%p", d);
+
+	d->state      = BT_OPEN;
+	d->flags      = 0;
+	d->mscex      = 0;
+	d->mtu        = RFCOMM_DEFAULT_MTU;
+	d->v24_sig    = RFCOMM_V24_RTC | RFCOMM_V24_RTR | RFCOMM_V24_DV;
+
+	d->cfc        = RFCOMM_CFC_DISABLED;
+	d->rx_credits = RFCOMM_DEFAULT_CREDITS;
+}
+
+struct rfcomm_dlc *rfcomm_dlc_alloc(int prio)
+{
+	struct rfcomm_dlc *d = kmalloc(sizeof(*d), prio);
+	if (!d)
+		return NULL;
+	memset(d, 0, sizeof(*d));
+
+	init_timer(&d->timer);
+	d->timer.function = rfcomm_dlc_timeout;
+	d->timer.data = (unsigned long) d;
+
+	skb_queue_head_init(&d->tx_queue);
+	spin_lock_init(&d->lock);
+	atomic_set(&d->refcnt, 1);
+
+	rfcomm_dlc_clear_state(d);
+	
+	BT_DBG("%p", d);
+	return d;
+}
+
+void rfcomm_dlc_free(struct rfcomm_dlc *d)
+{
+	BT_DBG("%p", d);
+
+	skb_queue_purge(&d->tx_queue);
+	kfree(d);
+}
+
+static void rfcomm_dlc_link(struct rfcomm_session *s, struct rfcomm_dlc *d)
+{
+	BT_DBG("dlc %p session %p", d, s);
+
+	rfcomm_session_hold(s);
+
+	rfcomm_dlc_hold(d);
+	list_add(&d->list, &s->dlcs);
+	d->session = s;
+}
+
+static void rfcomm_dlc_unlink(struct rfcomm_dlc *d)
+{
+	struct rfcomm_session *s = d->session;
+
+	BT_DBG("dlc %p refcnt %d session %p", d, atomic_read(&d->refcnt), s);
+
+	list_del(&d->list);
+	d->session = NULL;
+	rfcomm_dlc_put(d);
+
+	rfcomm_session_put(s);
+}
+
+static struct rfcomm_dlc *rfcomm_dlc_get(struct rfcomm_session *s, u8 dlci)
+{
+	struct rfcomm_dlc *d;
+	struct list_head *p;
+
+	list_for_each(p, &s->dlcs) {
+		d = list_entry(p, struct rfcomm_dlc, list);
+		if (d->dlci == dlci)
+			return d;
+	}
+	return NULL;
+}
+
+static int __rfcomm_dlc_open(struct rfcomm_dlc *d, bdaddr_t *src, bdaddr_t *dst, u8 channel)
+{
+	struct rfcomm_session *s;
+	int err = 0;
+	u8 dlci;
+
+	BT_DBG("dlc %p state %ld %s %s channel %d", 
+			d, d->state, batostr(src), batostr(dst), channel);
+
+	if (channel < 1 || channel > 30)
+		return -EINVAL;
+
+	if (d->state != BT_OPEN && d->state != BT_CLOSED)
+		return 0;
+
+	s = rfcomm_session_get(src, dst);
+	if (!s) {
+		s = rfcomm_session_create(src, dst, &err);
+		if (!s)
+			return err;
+	}
+
+	dlci = __dlci(!s->initiator, channel);
+
+	/* Check if DLCI already exists */
+	if (rfcomm_dlc_get(s, dlci))
+		return -EBUSY;
+
+	rfcomm_dlc_clear_state(d);
+
+	d->dlci     = dlci;
+	d->addr     = __addr(s->initiator, dlci);
+	d->priority = 7;
+
+	d->state    = BT_CONFIG;
+	rfcomm_dlc_link(s, d);
+
+	d->mtu = s->mtu;
+	d->cfc = (s->cfc == RFCOMM_CFC_UNKNOWN) ? 0 : s->cfc;
+
+	if (s->state == BT_CONNECTED)
+		rfcomm_send_pn(s, 1, d);
+	rfcomm_dlc_set_timer(d, RFCOMM_CONN_TIMEOUT);
+	return 0;
+}
+
+int rfcomm_dlc_open(struct rfcomm_dlc *d, bdaddr_t *src, bdaddr_t *dst, u8 channel)
+{
+	int r;
+
+	rfcomm_lock();
+
+	r = __rfcomm_dlc_open(d, src, dst, channel);
+
+	rfcomm_unlock();
+	return r;
+}
+
+static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err)
+{
+	struct rfcomm_session *s = d->session;
+	if (!s)
+		return 0;
+
+	BT_DBG("dlc %p state %ld dlci %d err %d session %p",
+			d, d->state, d->dlci, err, s);
+
+	switch (d->state) {
+	case BT_CONNECTED:
+	case BT_CONFIG:
+	case BT_CONNECT:
+		d->state = BT_DISCONN;
+		if (skb_queue_empty(&d->tx_queue)) {
+			rfcomm_send_disc(s, d->dlci);
+			rfcomm_dlc_set_timer(d, RFCOMM_DISC_TIMEOUT);
+		} else {
+			rfcomm_queue_disc(d);
+			rfcomm_dlc_set_timer(d, RFCOMM_DISC_TIMEOUT * 2);
+		}
+		break;
+
+	default:
+		rfcomm_dlc_clear_timer(d);
+
+		rfcomm_dlc_lock(d);
+		d->state = BT_CLOSED;
+		d->state_change(d, err);
+		rfcomm_dlc_unlock(d);
+
+		skb_queue_purge(&d->tx_queue);
+		rfcomm_session_put(s);
+
+		rfcomm_dlc_unlink(d);
+	}
+
+	return 0;
+}
+
+int rfcomm_dlc_close(struct rfcomm_dlc *d, int err)
+{
+	int r;
+
+	rfcomm_lock();
+
+	r = __rfcomm_dlc_close(d, err);
+
+	rfcomm_unlock();
+	return r;
+}
+
+int rfcomm_dlc_send(struct rfcomm_dlc *d, struct sk_buff *skb)
+{
+	int len = skb->len;
+
+	if (d->state != BT_CONNECTED)
+		return -ENOTCONN;
+
+	BT_DBG("dlc %p mtu %d len %d", d, d->mtu, len);
+
+	if (len > d->mtu)
+		return -EINVAL;
+
+	rfcomm_make_uih(skb, d->addr);
+	skb_queue_tail(&d->tx_queue, skb);
+
+	if (!test_bit(RFCOMM_TX_THROTTLED, &d->flags))
+		rfcomm_schedule(RFCOMM_SCHED_TX);
+	return len;
+}
+
+void fastcall __rfcomm_dlc_throttle(struct rfcomm_dlc *d)
+{
+	BT_DBG("dlc %p state %ld", d, d->state);
+
+	if (!d->cfc) {
+		d->v24_sig |= RFCOMM_V24_FC;
+		set_bit(RFCOMM_MSC_PENDING, &d->flags);
+	}
+	rfcomm_schedule(RFCOMM_SCHED_TX);
+}
+
+void fastcall __rfcomm_dlc_unthrottle(struct rfcomm_dlc *d)
+{
+	BT_DBG("dlc %p state %ld", d, d->state);
+
+	if (!d->cfc) {
+		d->v24_sig &= ~RFCOMM_V24_FC;
+		set_bit(RFCOMM_MSC_PENDING, &d->flags);
+	}
+	rfcomm_schedule(RFCOMM_SCHED_TX);
+}
+
+/* 
+   Set/get modem status functions use _local_ status i.e. what we report
+   to the other side.
+   Remote status is provided by dlc->modem_status() callback.
+ */
+int rfcomm_dlc_set_modem_status(struct rfcomm_dlc *d, u8 v24_sig)
+{
+	BT_DBG("dlc %p state %ld v24_sig 0x%x", 
+			d, d->state, v24_sig);
+
+	if (test_bit(RFCOMM_RX_THROTTLED, &d->flags))
+		v24_sig |= RFCOMM_V24_FC;
+	else
+		v24_sig &= ~RFCOMM_V24_FC;
+	
+	d->v24_sig = v24_sig;
+
+	if (!test_and_set_bit(RFCOMM_MSC_PENDING, &d->flags))
+		rfcomm_schedule(RFCOMM_SCHED_TX);
+
+	return 0;
+}
+
+int rfcomm_dlc_get_modem_status(struct rfcomm_dlc *d, u8 *v24_sig)
+{
+	BT_DBG("dlc %p state %ld v24_sig 0x%x", 
+			d, d->state, d->v24_sig);
+
+	*v24_sig = d->v24_sig;
+	return 0;
+}
+
+/* ---- RFCOMM sessions ---- */
+static struct rfcomm_session *rfcomm_session_add(struct socket *sock, int state)
+{
+	struct rfcomm_session *s = kmalloc(sizeof(*s), GFP_KERNEL);
+	if (!s)
+		return NULL;
+	memset(s, 0, sizeof(*s));
+
+	BT_DBG("session %p sock %p", s, sock);
+
+	INIT_LIST_HEAD(&s->dlcs);
+	s->state = state;
+	s->sock  = sock;
+
+	s->mtu = RFCOMM_DEFAULT_MTU;
+	s->cfc = RFCOMM_CFC_UNKNOWN;
+
+	/* Do not increment module usage count for listening sessions.
+	 * Otherwise we won't be able to unload the module. */
+	if (state != BT_LISTEN)
+		if (!try_module_get(THIS_MODULE)) {
+			kfree(s);
+			return NULL;
+		}
+
+	list_add(&s->list, &session_list);
+
+	return s;
+}
+
+static void rfcomm_session_del(struct rfcomm_session *s)
+{
+	int state = s->state;
+
+	BT_DBG("session %p state %ld", s, s->state);
+
+	list_del(&s->list);
+
+	if (state == BT_CONNECTED)
+		rfcomm_send_disc(s, 0);
+
+	sock_release(s->sock);
+	kfree(s);
+
+	if (state != BT_LISTEN)
+		module_put(THIS_MODULE);
+}
+
+static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst)
+{
+	struct rfcomm_session *s;
+	struct list_head *p, *n;
+	struct bt_sock *sk;
+	list_for_each_safe(p, n, &session_list) {
+		s = list_entry(p, struct rfcomm_session, list);
+		sk = bt_sk(s->sock->sk); 
+
+		if ((!bacmp(src, BDADDR_ANY) || !bacmp(&sk->src, src)) &&
+				!bacmp(&sk->dst, dst))
+			return s;
+	}
+	return NULL;
+}
+
+static void rfcomm_session_close(struct rfcomm_session *s, int err)
+{
+	struct rfcomm_dlc *d;
+	struct list_head *p, *n;
+
+	BT_DBG("session %p state %ld err %d", s, s->state, err);
+
+	rfcomm_session_hold(s);
+
+	s->state = BT_CLOSED;
+
+	/* Close all dlcs */
+	list_for_each_safe(p, n, &s->dlcs) {
+		d = list_entry(p, struct rfcomm_dlc, list);
+		d->state = BT_CLOSED;
+		__rfcomm_dlc_close(d, err);
+	}
+
+	rfcomm_session_put(s);
+}
+
+static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bdaddr_t *dst, int *err)
+{
+	struct rfcomm_session *s = NULL;
+	struct sockaddr_l2 addr;
+	struct socket *sock;
+	struct sock *sk;
+
+	BT_DBG("%s %s", batostr(src), batostr(dst));
+
+	*err = rfcomm_l2sock_create(&sock);
+	if (*err < 0)
+		return NULL;
+
+	bacpy(&addr.l2_bdaddr, src);
+	addr.l2_family = AF_BLUETOOTH;
+	addr.l2_psm    = 0;
+	*err = sock->ops->bind(sock, (struct sockaddr *) &addr, sizeof(addr));
+	if (*err < 0)
+		goto failed;
+
+	/* Set L2CAP options */
+	sk = sock->sk;
+	lock_sock(sk);
+	l2cap_pi(sk)->imtu = RFCOMM_MAX_L2CAP_MTU;
+	release_sock(sk);
+
+	s = rfcomm_session_add(sock, BT_BOUND);
+	if (!s) {
+		*err = -ENOMEM;
+		goto failed;
+	}
+
+	rfcomm_session_hold(s);
+
+	s->initiator = 1;
+
+	bacpy(&addr.l2_bdaddr, dst);
+	addr.l2_family = AF_BLUETOOTH;
+	addr.l2_psm    = htobs(RFCOMM_PSM);
+	*err = sock->ops->connect(sock, (struct sockaddr *) &addr, sizeof(addr), O_NONBLOCK);
+	if (*err == 0 || *err == -EAGAIN)
+		return s;
+
+	rfcomm_session_del(s);
+	return NULL;
+
+failed:
+	sock_release(sock);
+	return NULL;
+}
+
+void rfcomm_session_getaddr(struct rfcomm_session *s, bdaddr_t *src, bdaddr_t *dst)
+{
+	struct sock *sk = s->sock->sk;
+	if (src)
+		bacpy(src, &bt_sk(sk)->src);
+	if (dst)
+		bacpy(dst, &bt_sk(sk)->dst);
+}
+
+/* ---- RFCOMM frame sending ---- */
+static int rfcomm_send_frame(struct rfcomm_session *s, u8 *data, int len)
+{
+	struct socket *sock = s->sock;
+	struct kvec iv = { data, len };
+	struct msghdr msg;
+
+	BT_DBG("session %p len %d", s, len);
+
+	memset(&msg, 0, sizeof(msg));
+
+	return kernel_sendmsg(sock, &msg, &iv, 1, len);
+}
+
+static int rfcomm_send_sabm(struct rfcomm_session *s, u8 dlci)
+{
+	struct rfcomm_cmd cmd;
+
+	BT_DBG("%p dlci %d", s, dlci);
+
+	cmd.addr = __addr(s->initiator, dlci);
+	cmd.ctrl = __ctrl(RFCOMM_SABM, 1);
+	cmd.len  = __len8(0);
+	cmd.fcs  = __fcs2((u8 *) &cmd);
+
+	return rfcomm_send_frame(s, (void *) &cmd, sizeof(cmd));
+}
+
+static int rfcomm_send_ua(struct rfcomm_session *s, u8 dlci)
+{
+	struct rfcomm_cmd cmd;
+
+	BT_DBG("%p dlci %d", s, dlci);
+
+	cmd.addr = __addr(!s->initiator, dlci);
+	cmd.ctrl = __ctrl(RFCOMM_UA, 1);
+	cmd.len  = __len8(0);
+	cmd.fcs  = __fcs2((u8 *) &cmd);
+
+	return rfcomm_send_frame(s, (void *) &cmd, sizeof(cmd));
+}
+
+static int rfcomm_send_disc(struct rfcomm_session *s, u8 dlci)
+{
+	struct rfcomm_cmd cmd;
+
+	BT_DBG("%p dlci %d", s, dlci);
+
+	cmd.addr = __addr(s->initiator, dlci);
+	cmd.ctrl = __ctrl(RFCOMM_DISC, 1);
+	cmd.len  = __len8(0);
+	cmd.fcs  = __fcs2((u8 *) &cmd);
+
+	return rfcomm_send_frame(s, (void *) &cmd, sizeof(cmd));
+}
+
+static int rfcomm_queue_disc(struct rfcomm_dlc *d)
+{
+	struct rfcomm_cmd *cmd;
+	struct sk_buff *skb;
+
+	BT_DBG("dlc %p dlci %d", d, d->dlci);
+
+	skb = alloc_skb(sizeof(*cmd), GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	cmd = (void *) __skb_put(skb, sizeof(*cmd));
+	cmd->addr = d->addr;
+	cmd->ctrl = __ctrl(RFCOMM_DISC, 1);
+	cmd->len  = __len8(0);
+	cmd->fcs  = __fcs2((u8 *) cmd);
+
+	skb_queue_tail(&d->tx_queue, skb);
+	rfcomm_schedule(RFCOMM_SCHED_TX);
+	return 0;
+}
+
+static int rfcomm_send_dm(struct rfcomm_session *s, u8 dlci)
+{
+	struct rfcomm_cmd cmd;
+
+	BT_DBG("%p dlci %d", s, dlci);
+
+	cmd.addr = __addr(!s->initiator, dlci);
+	cmd.ctrl = __ctrl(RFCOMM_DM, 1);
+	cmd.len  = __len8(0);
+	cmd.fcs  = __fcs2((u8 *) &cmd);
+
+	return rfcomm_send_frame(s, (void *) &cmd, sizeof(cmd));
+}
+
+static int rfcomm_send_nsc(struct rfcomm_session *s, int cr, u8 type)
+{
+	struct rfcomm_hdr *hdr;
+	struct rfcomm_mcc *mcc;
+	u8 buf[16], *ptr = buf;
+
+	BT_DBG("%p cr %d type %d", s, cr, type);
+
+	hdr = (void *) ptr; ptr += sizeof(*hdr);
+	hdr->addr = __addr(s->initiator, 0);
+	hdr->ctrl = __ctrl(RFCOMM_UIH, 0);
+	hdr->len  = __len8(sizeof(*mcc) + 1);
+
+	mcc = (void *) ptr; ptr += sizeof(*mcc);
+	mcc->type = __mcc_type(cr, RFCOMM_NSC);
+	mcc->len  = __len8(1);
+
+	/* Type that we didn't like */
+	*ptr = __mcc_type(cr, type); ptr++;
+
+	*ptr = __fcs(buf); ptr++;
+
+	return rfcomm_send_frame(s, buf, ptr - buf);
+}
+
+static int rfcomm_send_pn(struct rfcomm_session *s, int cr, struct rfcomm_dlc *d)
+{
+	struct rfcomm_hdr *hdr;
+	struct rfcomm_mcc *mcc;
+	struct rfcomm_pn  *pn;
+	u8 buf[16], *ptr = buf;
+
+	BT_DBG("%p cr %d dlci %d mtu %d", s, cr, d->dlci, d->mtu);
+
+	hdr = (void *) ptr; ptr += sizeof(*hdr);
+	hdr->addr = __addr(s->initiator, 0);
+	hdr->ctrl = __ctrl(RFCOMM_UIH, 0);
+	hdr->len  = __len8(sizeof(*mcc) + sizeof(*pn));
+
+	mcc = (void *) ptr; ptr += sizeof(*mcc);
+	mcc->type = __mcc_type(cr, RFCOMM_PN);
+	mcc->len  = __len8(sizeof(*pn));
+
+	pn = (void *) ptr; ptr += sizeof(*pn);
+	pn->dlci        = d->dlci;
+	pn->priority    = d->priority;
+	pn->ack_timer   = 0;
+	pn->max_retrans = 0;
+
+	if (s->cfc) {
+		pn->flow_ctrl = cr ? 0xf0 : 0xe0;
+		pn->credits = RFCOMM_DEFAULT_CREDITS;
+	} else {
+		pn->flow_ctrl = 0;
+		pn->credits   = 0;
+	}
+
+	pn->mtu = htobs(d->mtu);
+
+	*ptr = __fcs(buf); ptr++;
+
+	return rfcomm_send_frame(s, buf, ptr - buf);
+}
+
+static int rfcomm_send_rpn(struct rfcomm_session *s, int cr, u8 dlci,
+			   u8 bit_rate, u8 data_bits, u8 stop_bits,
+			   u8 parity, u8 flow_ctrl_settings, 
+			   u8 xon_char, u8 xoff_char, u16 param_mask)
+{
+	struct rfcomm_hdr *hdr;
+	struct rfcomm_mcc *mcc;
+	struct rfcomm_rpn *rpn;
+	u8 buf[16], *ptr = buf;
+
+	BT_DBG("%p cr %d dlci %d bit_r 0x%x data_b 0x%x stop_b 0x%x parity 0x%x"
+	       "flwc_s 0x%x xon_c 0x%x xoff_c 0x%x p_mask 0x%x", 
+			s, cr, dlci, bit_rate, data_bits, stop_bits, parity, 
+			flow_ctrl_settings, xon_char, xoff_char, param_mask);
+
+	hdr = (void *) ptr; ptr += sizeof(*hdr);
+	hdr->addr = __addr(s->initiator, 0);
+	hdr->ctrl = __ctrl(RFCOMM_UIH, 0);
+	hdr->len  = __len8(sizeof(*mcc) + sizeof(*rpn));
+
+	mcc = (void *) ptr; ptr += sizeof(*mcc);
+	mcc->type = __mcc_type(cr, RFCOMM_RPN);
+	mcc->len  = __len8(sizeof(*rpn));
+
+	rpn = (void *) ptr; ptr += sizeof(*rpn);
+	rpn->dlci          = __addr(1, dlci);
+	rpn->bit_rate      = bit_rate;
+	rpn->line_settings = __rpn_line_settings(data_bits, stop_bits, parity);
+	rpn->flow_ctrl     = flow_ctrl_settings;
+	rpn->xon_char      = xon_char;
+	rpn->xoff_char     = xoff_char;
+	rpn->param_mask    = param_mask;
+
+	*ptr = __fcs(buf); ptr++;
+
+	return rfcomm_send_frame(s, buf, ptr - buf);
+}
+
+static int rfcomm_send_rls(struct rfcomm_session *s, int cr, u8 dlci, u8 status)
+{
+	struct rfcomm_hdr *hdr;
+	struct rfcomm_mcc *mcc;
+	struct rfcomm_rls *rls;
+	u8 buf[16], *ptr = buf;
+
+	BT_DBG("%p cr %d status 0x%x", s, cr, status);
+
+	hdr = (void *) ptr; ptr += sizeof(*hdr);
+	hdr->addr = __addr(s->initiator, 0);
+	hdr->ctrl = __ctrl(RFCOMM_UIH, 0);
+	hdr->len  = __len8(sizeof(*mcc) + sizeof(*rls));
+
+	mcc = (void *) ptr; ptr += sizeof(*mcc);
+	mcc->type = __mcc_type(cr, RFCOMM_RLS);
+	mcc->len  = __len8(sizeof(*rls));
+
+	rls = (void *) ptr; ptr += sizeof(*rls);
+	rls->dlci   = __addr(1, dlci);
+	rls->status = status;
+
+	*ptr = __fcs(buf); ptr++;
+
+	return rfcomm_send_frame(s, buf, ptr - buf);
+}
+
+static int rfcomm_send_msc(struct rfcomm_session *s, int cr, u8 dlci, u8 v24_sig)
+{
+	struct rfcomm_hdr *hdr;
+	struct rfcomm_mcc *mcc;
+	struct rfcomm_msc *msc;
+	u8 buf[16], *ptr = buf;
+
+	BT_DBG("%p cr %d v24 0x%x", s, cr, v24_sig);
+
+	hdr = (void *) ptr; ptr += sizeof(*hdr);
+	hdr->addr = __addr(s->initiator, 0);
+	hdr->ctrl = __ctrl(RFCOMM_UIH, 0);
+	hdr->len  = __len8(sizeof(*mcc) + sizeof(*msc));
+
+	mcc = (void *) ptr; ptr += sizeof(*mcc);
+	mcc->type = __mcc_type(cr, RFCOMM_MSC);
+	mcc->len  = __len8(sizeof(*msc));
+
+	msc = (void *) ptr; ptr += sizeof(*msc);
+	msc->dlci    = __addr(1, dlci);
+	msc->v24_sig = v24_sig | 0x01;
+
+	*ptr = __fcs(buf); ptr++;
+
+	return rfcomm_send_frame(s, buf, ptr - buf);
+}
+
+static int rfcomm_send_fcoff(struct rfcomm_session *s, int cr)
+{
+	struct rfcomm_hdr *hdr;
+	struct rfcomm_mcc *mcc;
+	u8 buf[16], *ptr = buf;
+
+	BT_DBG("%p cr %d", s, cr);
+
+	hdr = (void *) ptr; ptr += sizeof(*hdr);
+	hdr->addr = __addr(s->initiator, 0);
+	hdr->ctrl = __ctrl(RFCOMM_UIH, 0);
+	hdr->len  = __len8(sizeof(*mcc));
+
+	mcc = (void *) ptr; ptr += sizeof(*mcc);
+	mcc->type = __mcc_type(cr, RFCOMM_FCOFF);
+	mcc->len  = __len8(0);
+
+	*ptr = __fcs(buf); ptr++;
+
+	return rfcomm_send_frame(s, buf, ptr - buf);
+}
+
+static int rfcomm_send_fcon(struct rfcomm_session *s, int cr)
+{
+	struct rfcomm_hdr *hdr;
+	struct rfcomm_mcc *mcc;
+	u8 buf[16], *ptr = buf;
+
+	BT_DBG("%p cr %d", s, cr);
+
+	hdr = (void *) ptr; ptr += sizeof(*hdr);
+	hdr->addr = __addr(s->initiator, 0);
+	hdr->ctrl = __ctrl(RFCOMM_UIH, 0);
+	hdr->len  = __len8(sizeof(*mcc));
+
+	mcc = (void *) ptr; ptr += sizeof(*mcc);
+	mcc->type = __mcc_type(cr, RFCOMM_FCON);
+	mcc->len  = __len8(0);
+
+	*ptr = __fcs(buf); ptr++;
+
+	return rfcomm_send_frame(s, buf, ptr - buf);
+}
+
+static int rfcomm_send_test(struct rfcomm_session *s, int cr, u8 *pattern, int len)
+{
+	struct socket *sock = s->sock;
+	struct kvec iv[3];
+	struct msghdr msg;
+	unsigned char hdr[5], crc[1];
+
+	if (len > 125)
+		return -EINVAL;
+
+	BT_DBG("%p cr %d", s, cr);
+
+	hdr[0] = __addr(s->initiator, 0);
+	hdr[1] = __ctrl(RFCOMM_UIH, 0);
+	hdr[2] = 0x01 | ((len + 2) << 1);
+	hdr[3] = 0x01 | ((cr & 0x01) << 1) | (RFCOMM_TEST << 2);
+	hdr[4] = 0x01 | (len << 1);
+
+	crc[0] = __fcs(hdr);
+
+	iv[0].iov_base = hdr;
+	iv[0].iov_len  = 5;
+	iv[1].iov_base = pattern;
+	iv[1].iov_len  = len;
+	iv[2].iov_base = crc;
+	iv[2].iov_len  = 1;
+
+	memset(&msg, 0, sizeof(msg));
+
+	return kernel_sendmsg(sock, &msg, iv, 3, 6 + len);
+}
+
+static int rfcomm_send_credits(struct rfcomm_session *s, u8 addr, u8 credits)
+{
+	struct rfcomm_hdr *hdr;
+	u8 buf[16], *ptr = buf;
+
+	BT_DBG("%p addr %d credits %d", s, addr, credits);
+
+	hdr = (void *) ptr; ptr += sizeof(*hdr);
+	hdr->addr = addr;
+	hdr->ctrl = __ctrl(RFCOMM_UIH, 1);
+	hdr->len  = __len8(0);
+
+	*ptr = credits; ptr++;
+
+	*ptr = __fcs(buf); ptr++;
+
+	return rfcomm_send_frame(s, buf, ptr - buf);
+}
+
+static void rfcomm_make_uih(struct sk_buff *skb, u8 addr)
+{
+	struct rfcomm_hdr *hdr;
+	int len = skb->len;
+	u8 *crc;
+
+	if (len > 127) {
+		hdr = (void *) skb_push(skb, 4);
+		put_unaligned(htobs(__len16(len)), (u16 *) &hdr->len);
+	} else {
+		hdr = (void *) skb_push(skb, 3);
+		hdr->len = __len8(len);
+	}
+	hdr->addr = addr;
+	hdr->ctrl = __ctrl(RFCOMM_UIH, 0);
+
+	crc = skb_put(skb, 1);
+	*crc = __fcs((void *) hdr);
+}
+
+/* ---- RFCOMM frame reception ---- */
+static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci)
+{
+	BT_DBG("session %p state %ld dlci %d", s, s->state, dlci);
+
+	if (dlci) {
+		/* Data channel */
+		struct rfcomm_dlc *d = rfcomm_dlc_get(s, dlci);
+		if (!d) {
+			rfcomm_send_dm(s, dlci);
+			return 0;
+		}
+
+		switch (d->state) {
+		case BT_CONNECT:
+			rfcomm_dlc_clear_timer(d);
+
+			rfcomm_dlc_lock(d);
+			d->state = BT_CONNECTED;
+			d->state_change(d, 0);
+			rfcomm_dlc_unlock(d);
+
+			rfcomm_send_msc(s, 1, dlci, d->v24_sig);
+			break;
+
+		case BT_DISCONN:
+			d->state = BT_CLOSED;
+			__rfcomm_dlc_close(d, 0);
+			break;
+		}
+	} else {
+		/* Control channel */
+		switch (s->state) {
+		case BT_CONNECT:
+			s->state = BT_CONNECTED;
+			rfcomm_process_connect(s);
+			break;
+		}
+	}
+	return 0;
+}
+
+static int rfcomm_recv_dm(struct rfcomm_session *s, u8 dlci)
+{
+	int err = 0;
+
+	BT_DBG("session %p state %ld dlci %d", s, s->state, dlci);
+
+	if (dlci) {
+		/* Data DLC */
+		struct rfcomm_dlc *d = rfcomm_dlc_get(s, dlci);
+		if (d) {
+			if (d->state == BT_CONNECT || d->state == BT_CONFIG)
+				err = ECONNREFUSED;
+			else
+				err = ECONNRESET;
+
+			d->state = BT_CLOSED;
+			__rfcomm_dlc_close(d, err);
+		}
+	} else {
+		if (s->state == BT_CONNECT)
+			err = ECONNREFUSED;
+		else
+			err = ECONNRESET;
+
+		s->state = BT_CLOSED;
+		rfcomm_session_close(s, err);
+	}
+	return 0;
+}
+
+static int rfcomm_recv_disc(struct rfcomm_session *s, u8 dlci)
+{
+	int err = 0;
+
+	BT_DBG("session %p state %ld dlci %d", s, s->state, dlci);
+
+	if (dlci) {
+		struct rfcomm_dlc *d = rfcomm_dlc_get(s, dlci);
+		if (d) {
+			rfcomm_send_ua(s, dlci);
+
+			if (d->state == BT_CONNECT || d->state == BT_CONFIG)
+				err = ECONNREFUSED;
+			else
+				err = ECONNRESET;
+
+			d->state = BT_CLOSED;
+			__rfcomm_dlc_close(d, err);
+		} else 
+			rfcomm_send_dm(s, dlci);
+			
+	} else {
+		rfcomm_send_ua(s, 0);
+
+		if (s->state == BT_CONNECT)
+			err = ECONNREFUSED;
+		else
+			err = ECONNRESET;
+
+		s->state = BT_CLOSED;
+		rfcomm_session_close(s, err);
+	}
+
+	return 0;
+}
+
+static inline int rfcomm_check_link_mode(struct rfcomm_dlc *d)
+{
+	struct sock *sk = d->session->sock->sk;
+
+	if (d->link_mode & (RFCOMM_LM_ENCRYPT | RFCOMM_LM_SECURE)) {
+		if (!hci_conn_encrypt(l2cap_pi(sk)->conn->hcon))
+			return 1;
+	} else if (d->link_mode & RFCOMM_LM_AUTH) {
+		if (!hci_conn_auth(l2cap_pi(sk)->conn->hcon))
+			return 1;
+	}
+
+	return 0;
+}
+
+static void rfcomm_dlc_accept(struct rfcomm_dlc *d)
+{
+	BT_DBG("dlc %p", d);
+
+	rfcomm_send_ua(d->session, d->dlci);
+
+	rfcomm_dlc_lock(d);
+	d->state = BT_CONNECTED;
+	d->state_change(d, 0);
+	rfcomm_dlc_unlock(d);
+
+	rfcomm_send_msc(d->session, 1, d->dlci, d->v24_sig);
+}
+
+static int rfcomm_recv_sabm(struct rfcomm_session *s, u8 dlci)
+{
+	struct rfcomm_dlc *d;
+	u8 channel;
+
+	BT_DBG("session %p state %ld dlci %d", s, s->state, dlci);
+
+	if (!dlci) {
+		rfcomm_send_ua(s, 0);
+
+		if (s->state == BT_OPEN) {
+			s->state = BT_CONNECTED;
+			rfcomm_process_connect(s);
+		}
+		return 0;
+	}
+
+	/* Check if DLC exists */
+	d = rfcomm_dlc_get(s, dlci);
+	if (d) {
+		if (d->state == BT_OPEN) {
+			/* DLC was previously opened by PN request */
+			if (rfcomm_check_link_mode(d)) {
+				set_bit(RFCOMM_AUTH_PENDING, &d->flags);
+				rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT);
+				return 0;
+			}
+
+			rfcomm_dlc_accept(d);
+		}
+		return 0;
+	}
+
+	/* Notify socket layer about incoming connection */
+	channel = __srv_channel(dlci);
+	if (rfcomm_connect_ind(s, channel, &d)) {
+		d->dlci = dlci;
+		d->addr = __addr(s->initiator, dlci);
+		rfcomm_dlc_link(s, d);
+
+		if (rfcomm_check_link_mode(d)) {
+			set_bit(RFCOMM_AUTH_PENDING, &d->flags);
+			rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT);
+			return 0;
+		}
+
+		rfcomm_dlc_accept(d);
+	} else {
+		rfcomm_send_dm(s, dlci);
+	}
+
+	return 0;
+}
+
+static int rfcomm_apply_pn(struct rfcomm_dlc *d, int cr, struct rfcomm_pn *pn)
+{
+	struct rfcomm_session *s = d->session;
+
+	BT_DBG("dlc %p state %ld dlci %d mtu %d fc 0x%x credits %d", 
+			d, d->state, d->dlci, pn->mtu, pn->flow_ctrl, pn->credits);
+
+	if (pn->flow_ctrl == 0xf0 || pn->flow_ctrl == 0xe0) {
+		d->cfc = s->cfc = RFCOMM_CFC_ENABLED;
+		d->tx_credits = pn->credits;
+	} else {
+		d->cfc = s->cfc = RFCOMM_CFC_DISABLED;
+		set_bit(RFCOMM_TX_THROTTLED, &d->flags);
+	}
+
+	d->priority = pn->priority;
+
+	d->mtu = s->mtu = btohs(pn->mtu);
+
+	return 0;
+}
+
+static int rfcomm_recv_pn(struct rfcomm_session *s, int cr, struct sk_buff *skb)
+{
+	struct rfcomm_pn *pn = (void *) skb->data;
+	struct rfcomm_dlc *d;
+	u8 dlci = pn->dlci;
+
+	BT_DBG("session %p state %ld dlci %d", s, s->state, dlci);
+
+	if (!dlci)
+		return 0;
+
+	d = rfcomm_dlc_get(s, dlci);
+	if (d) {
+		if (cr) {
+			/* PN request */
+			rfcomm_apply_pn(d, cr, pn);
+			rfcomm_send_pn(s, 0, d);
+		} else {
+			/* PN response */
+			switch (d->state) {
+			case BT_CONFIG:
+				rfcomm_apply_pn(d, cr, pn);
+
+				d->state = BT_CONNECT;
+				rfcomm_send_sabm(s, d->dlci);
+				break;
+			}
+		}
+	} else {
+		u8 channel = __srv_channel(dlci);
+
+		if (!cr)
+			return 0;
+
+		/* PN request for non existing DLC.
+		 * Assume incoming connection. */
+		if (rfcomm_connect_ind(s, channel, &d)) {
+			d->dlci = dlci;
+			d->addr = __addr(s->initiator, dlci);
+			rfcomm_dlc_link(s, d);
+
+			rfcomm_apply_pn(d, cr, pn);
+
+			d->state = BT_OPEN;
+			rfcomm_send_pn(s, 0, d);
+		} else {
+			rfcomm_send_dm(s, dlci);
+		}
+	}
+	return 0;
+}
+
+static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_buff *skb)
+{
+	struct rfcomm_rpn *rpn = (void *) skb->data;
+	u8 dlci = __get_dlci(rpn->dlci);
+
+	u8 bit_rate  = 0;
+	u8 data_bits = 0;
+	u8 stop_bits = 0;
+	u8 parity    = 0;
+	u8 flow_ctrl = 0;
+	u8 xon_char  = 0;
+	u8 xoff_char = 0;
+	u16 rpn_mask = RFCOMM_RPN_PM_ALL;
+	
+	BT_DBG("dlci %d cr %d len 0x%x bitr 0x%x line 0x%x flow 0x%x xonc 0x%x xoffc 0x%x pm 0x%x", 
+	       dlci, cr, len, rpn->bit_rate, rpn->line_settings, rpn->flow_ctrl,
+	       rpn->xon_char, rpn->xoff_char, rpn->param_mask);
+	
+	if (!cr) 
+		return 0;
+	
+	if (len == 1) {
+		/* request: return default setting */
+		bit_rate  = RFCOMM_RPN_BR_115200;
+		data_bits = RFCOMM_RPN_DATA_8;
+		stop_bits = RFCOMM_RPN_STOP_1;
+		parity    = RFCOMM_RPN_PARITY_NONE;
+		flow_ctrl = RFCOMM_RPN_FLOW_NONE;
+		xon_char  = RFCOMM_RPN_XON_CHAR;
+		xoff_char = RFCOMM_RPN_XOFF_CHAR;
+
+		goto rpn_out;
+	}
+	/* check for sane values: ignore/accept bit_rate, 8 bits, 1 stop bit, no parity,
+	                          no flow control lines, normal XON/XOFF chars */
+	if (rpn->param_mask & RFCOMM_RPN_PM_BITRATE) {
+		bit_rate = rpn->bit_rate;
+		if (bit_rate != RFCOMM_RPN_BR_115200) {
+			BT_DBG("RPN bit rate mismatch 0x%x", bit_rate);
+			bit_rate = RFCOMM_RPN_BR_115200;
+			rpn_mask ^= RFCOMM_RPN_PM_BITRATE;
+		}
+	}
+	if (rpn->param_mask & RFCOMM_RPN_PM_DATA) {
+		data_bits = __get_rpn_data_bits(rpn->line_settings);
+		if (data_bits != RFCOMM_RPN_DATA_8) {
+			BT_DBG("RPN data bits mismatch 0x%x", data_bits);
+			data_bits = RFCOMM_RPN_DATA_8;
+			rpn_mask ^= RFCOMM_RPN_PM_DATA;
+		}
+	}
+	if (rpn->param_mask & RFCOMM_RPN_PM_STOP) {
+		stop_bits = __get_rpn_stop_bits(rpn->line_settings);
+		if (stop_bits != RFCOMM_RPN_STOP_1) {
+			BT_DBG("RPN stop bits mismatch 0x%x", stop_bits);
+			stop_bits = RFCOMM_RPN_STOP_1;
+			rpn_mask ^= RFCOMM_RPN_PM_STOP;
+		}
+	}
+	if (rpn->param_mask & RFCOMM_RPN_PM_PARITY) {
+		parity = __get_rpn_parity(rpn->line_settings);
+		if (parity != RFCOMM_RPN_PARITY_NONE) {
+			BT_DBG("RPN parity mismatch 0x%x", parity);
+			parity = RFCOMM_RPN_PARITY_NONE;
+			rpn_mask ^= RFCOMM_RPN_PM_PARITY;
+		}
+	}
+	if (rpn->param_mask & RFCOMM_RPN_PM_FLOW) {
+		flow_ctrl = rpn->flow_ctrl;
+		if (flow_ctrl != RFCOMM_RPN_FLOW_NONE) {
+			BT_DBG("RPN flow ctrl mismatch 0x%x", flow_ctrl);
+			flow_ctrl = RFCOMM_RPN_FLOW_NONE;
+			rpn_mask ^= RFCOMM_RPN_PM_FLOW;
+		}
+	}
+	if (rpn->param_mask & RFCOMM_RPN_PM_XON) {
+		xon_char = rpn->xon_char;
+		if (xon_char != RFCOMM_RPN_XON_CHAR) {
+			BT_DBG("RPN XON char mismatch 0x%x", xon_char);
+			xon_char = RFCOMM_RPN_XON_CHAR;
+			rpn_mask ^= RFCOMM_RPN_PM_XON;
+		}
+	}
+	if (rpn->param_mask & RFCOMM_RPN_PM_XOFF) {
+		xoff_char = rpn->xoff_char;
+		if (xoff_char != RFCOMM_RPN_XOFF_CHAR) {
+			BT_DBG("RPN XOFF char mismatch 0x%x", xoff_char);
+			xoff_char = RFCOMM_RPN_XOFF_CHAR;
+			rpn_mask ^= RFCOMM_RPN_PM_XOFF;
+		}
+	}
+
+rpn_out:
+	rfcomm_send_rpn(s, 0, dlci, 
+			bit_rate, data_bits, stop_bits, parity, flow_ctrl,
+			xon_char, xoff_char, rpn_mask);
+
+	return 0;
+}
+
+static int rfcomm_recv_rls(struct rfcomm_session *s, int cr, struct sk_buff *skb)
+{
+	struct rfcomm_rls *rls = (void *) skb->data;
+	u8 dlci = __get_dlci(rls->dlci);
+
+	BT_DBG("dlci %d cr %d status 0x%x", dlci, cr, rls->status);
+	
+	if (!cr)
+		return 0;
+
+	/* FIXME: We should probably do something with this
+	   information here. But for now it's sufficient just
+	   to reply -- Bluetooth 1.1 says it's mandatory to 
+	   recognise and respond to RLS */
+
+	rfcomm_send_rls(s, 0, dlci, rls->status);
+
+	return 0;
+}
+
+static int rfcomm_recv_msc(struct rfcomm_session *s, int cr, struct sk_buff *skb)
+{
+	struct rfcomm_msc *msc = (void *) skb->data;
+	struct rfcomm_dlc *d;
+	u8 dlci = __get_dlci(msc->dlci);
+
+	BT_DBG("dlci %d cr %d v24 0x%x", dlci, cr, msc->v24_sig);
+
+	d = rfcomm_dlc_get(s, dlci);
+	if (!d) 
+		return 0;
+
+	if (cr) {
+		if (msc->v24_sig & RFCOMM_V24_FC && !d->cfc)
+			set_bit(RFCOMM_TX_THROTTLED, &d->flags);
+		else
+			clear_bit(RFCOMM_TX_THROTTLED, &d->flags);
+		
+		rfcomm_dlc_lock(d);
+		if (d->modem_status)
+			d->modem_status(d, msc->v24_sig);
+		rfcomm_dlc_unlock(d);
+		
+		rfcomm_send_msc(s, 0, dlci, msc->v24_sig);
+
+		d->mscex |= RFCOMM_MSCEX_RX;
+	} else 
+		d->mscex |= RFCOMM_MSCEX_TX;
+
+	return 0;
+}
+
+static int rfcomm_recv_mcc(struct rfcomm_session *s, struct sk_buff *skb)
+{
+	struct rfcomm_mcc *mcc = (void *) skb->data;
+	u8 type, cr, len;
+
+	cr   = __test_cr(mcc->type);
+	type = __get_mcc_type(mcc->type);
+	len  = __get_mcc_len(mcc->len);
+
+	BT_DBG("%p type 0x%x cr %d", s, type, cr);
+
+	skb_pull(skb, 2);
+
+	switch (type) {
+	case RFCOMM_PN:
+		rfcomm_recv_pn(s, cr, skb);
+		break;
+
+	case RFCOMM_RPN:
+		rfcomm_recv_rpn(s, cr, len, skb);
+		break;
+
+	case RFCOMM_RLS:
+		rfcomm_recv_rls(s, cr, skb);
+		break;
+
+	case RFCOMM_MSC:
+		rfcomm_recv_msc(s, cr, skb);
+		break;
+
+	case RFCOMM_FCOFF:
+		if (cr) {
+			set_bit(RFCOMM_TX_THROTTLED, &s->flags);
+			rfcomm_send_fcoff(s, 0);
+		}
+		break;
+
+	case RFCOMM_FCON:
+		if (cr) {
+			clear_bit(RFCOMM_TX_THROTTLED, &s->flags);
+			rfcomm_send_fcon(s, 0);
+		}
+		break;
+
+	case RFCOMM_TEST:
+		if (cr)
+			rfcomm_send_test(s, 0, skb->data, skb->len);
+		break;
+
+	case RFCOMM_NSC:
+		break;
+
+	default:
+		BT_ERR("Unknown control type 0x%02x", type);
+		rfcomm_send_nsc(s, cr, type);
+		break;
+	}
+	return 0;
+}
+
+static int rfcomm_recv_data(struct rfcomm_session *s, u8 dlci, int pf, struct sk_buff *skb)
+{
+	struct rfcomm_dlc *d;
+
+	BT_DBG("session %p state %ld dlci %d pf %d", s, s->state, dlci, pf);
+
+	d = rfcomm_dlc_get(s, dlci);
+	if (!d) {
+		rfcomm_send_dm(s, dlci);
+		goto drop;
+	}
+
+	if (pf && d->cfc) {
+		u8 credits = *(u8 *) skb->data; skb_pull(skb, 1);
+
+		d->tx_credits += credits;
+		if (d->tx_credits)
+			clear_bit(RFCOMM_TX_THROTTLED, &d->flags);
+	}
+
+	if (skb->len && d->state == BT_CONNECTED) {
+		rfcomm_dlc_lock(d);
+		d->rx_credits--;
+		d->data_ready(d, skb);
+		rfcomm_dlc_unlock(d);
+		return 0;
+	}
+
+drop:
+	kfree_skb(skb);
+	return 0;
+}
+
+static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb)
+{
+	struct rfcomm_hdr *hdr = (void *) skb->data;
+	u8 type, dlci, fcs;
+
+	dlci = __get_dlci(hdr->addr);
+	type = __get_type(hdr->ctrl);
+
+	/* Trim FCS */
+	skb->len--; skb->tail--;
+	fcs = *(u8 *) skb->tail;
+
+	if (__check_fcs(skb->data, type, fcs)) {
+		BT_ERR("bad checksum in packet");
+		kfree_skb(skb);
+		return -EILSEQ;
+	}
+
+	if (__test_ea(hdr->len))
+		skb_pull(skb, 3);
+	else
+		skb_pull(skb, 4);
+
+	switch (type) {
+	case RFCOMM_SABM:
+		if (__test_pf(hdr->ctrl))
+			rfcomm_recv_sabm(s, dlci);
+		break;
+
+	case RFCOMM_DISC:
+		if (__test_pf(hdr->ctrl))
+			rfcomm_recv_disc(s, dlci);
+		break;
+
+	case RFCOMM_UA:
+		if (__test_pf(hdr->ctrl))
+			rfcomm_recv_ua(s, dlci);
+		break;
+
+	case RFCOMM_DM:
+		rfcomm_recv_dm(s, dlci);
+		break;
+
+	case RFCOMM_UIH:
+		if (dlci)
+			return rfcomm_recv_data(s, dlci, __test_pf(hdr->ctrl), skb);
+
+		rfcomm_recv_mcc(s, skb);
+		break;
+
+	default:
+		BT_ERR("Unknown packet type 0x%02x\n", type);
+		break;
+	}
+	kfree_skb(skb);
+	return 0;
+}
+
+/* ---- Connection and data processing ---- */
+
+static void rfcomm_process_connect(struct rfcomm_session *s)
+{
+	struct rfcomm_dlc *d;
+	struct list_head *p, *n;
+
+	BT_DBG("session %p state %ld", s, s->state);
+
+	list_for_each_safe(p, n, &s->dlcs) {
+		d = list_entry(p, struct rfcomm_dlc, list);
+		if (d->state == BT_CONFIG) {
+			d->mtu = s->mtu;
+			rfcomm_send_pn(s, 1, d);
+		}
+	}
+}
+
+/* Send data queued for the DLC.
+ * Return number of frames left in the queue.
+ */
+static inline int rfcomm_process_tx(struct rfcomm_dlc *d)
+{
+	struct sk_buff *skb;
+	int err;
+
+	BT_DBG("dlc %p state %ld cfc %d rx_credits %d tx_credits %d", 
+			d, d->state, d->cfc, d->rx_credits, d->tx_credits);
+
+	/* Send pending MSC */
+	if (test_and_clear_bit(RFCOMM_MSC_PENDING, &d->flags))
+		rfcomm_send_msc(d->session, 1, d->dlci, d->v24_sig); 
+
+	if (d->cfc) {
+		/* CFC enabled. 
+		 * Give them some credits */
+		if (!test_bit(RFCOMM_RX_THROTTLED, &d->flags) &&
+			       	d->rx_credits <= (d->cfc >> 2)) {
+			rfcomm_send_credits(d->session, d->addr, d->cfc - d->rx_credits);
+			d->rx_credits = d->cfc;
+		}
+	} else {
+		/* CFC disabled.
+		 * Give ourselves some credits */
+		d->tx_credits = 5;
+	}
+
+	if (test_bit(RFCOMM_TX_THROTTLED, &d->flags))
+		return skb_queue_len(&d->tx_queue);
+
+	while (d->tx_credits && (skb = skb_dequeue(&d->tx_queue))) {
+		err = rfcomm_send_frame(d->session, skb->data, skb->len);
+		if (err < 0) {
+			skb_queue_head(&d->tx_queue, skb);
+			break;
+		}
+		kfree_skb(skb);
+		d->tx_credits--;
+	}
+
+	if (d->cfc && !d->tx_credits) {
+		/* We're out of TX credits.
+		 * Set TX_THROTTLED flag to avoid unnesary wakeups by dlc_send. */
+		set_bit(RFCOMM_TX_THROTTLED, &d->flags);
+	}
+
+	return skb_queue_len(&d->tx_queue);
+}
+
+static inline void rfcomm_process_dlcs(struct rfcomm_session *s)
+{
+	struct rfcomm_dlc *d;
+	struct list_head *p, *n;
+
+	BT_DBG("session %p state %ld", s, s->state);
+
+	list_for_each_safe(p, n, &s->dlcs) {
+		d = list_entry(p, struct rfcomm_dlc, list);
+
+		if (test_bit(RFCOMM_TIMED_OUT, &d->flags)) {
+			__rfcomm_dlc_close(d, ETIMEDOUT);
+			continue;
+		}
+
+		if (test_and_clear_bit(RFCOMM_AUTH_ACCEPT, &d->flags)) {
+			rfcomm_dlc_clear_timer(d);
+			rfcomm_dlc_accept(d);
+			if (d->link_mode & RFCOMM_LM_SECURE) {
+				struct sock *sk = s->sock->sk;
+				hci_conn_change_link_key(l2cap_pi(sk)->conn->hcon);
+			}
+			continue;
+		} else if (test_and_clear_bit(RFCOMM_AUTH_REJECT, &d->flags)) {
+			rfcomm_dlc_clear_timer(d);
+			rfcomm_send_dm(s, d->dlci);
+			__rfcomm_dlc_close(d, ECONNREFUSED);
+			continue;
+		}
+
+		if (test_bit(RFCOMM_TX_THROTTLED, &s->flags))
+			continue;
+
+		if ((d->state == BT_CONNECTED || d->state == BT_DISCONN) &&
+				d->mscex == RFCOMM_MSCEX_OK)
+			rfcomm_process_tx(d);
+	}
+}
+
+static inline void rfcomm_process_rx(struct rfcomm_session *s)
+{
+	struct socket *sock = s->sock;
+	struct sock *sk = sock->sk;
+	struct sk_buff *skb;
+
+	BT_DBG("session %p state %ld qlen %d", s, s->state, skb_queue_len(&sk->sk_receive_queue));
+
+	/* Get data directly from socket receive queue without copying it. */
+	while ((skb = skb_dequeue(&sk->sk_receive_queue))) {
+		skb_orphan(skb);
+		rfcomm_recv_frame(s, skb);
+	}
+
+	if (sk->sk_state == BT_CLOSED) {
+		if (!s->initiator)
+			rfcomm_session_put(s);
+
+		rfcomm_session_close(s, sk->sk_err);
+	}
+}
+
+static inline void rfcomm_accept_connection(struct rfcomm_session *s)
+{
+	struct socket *sock = s->sock, *nsock;
+	int err;
+
+	/* Fast check for a new connection.
+	 * Avoids unnesesary socket allocations. */
+	if (list_empty(&bt_sk(sock->sk)->accept_q))
+		return;
+
+	BT_DBG("session %p", s);
+
+	if (sock_create_lite(PF_BLUETOOTH, sock->type, BTPROTO_L2CAP, &nsock))
+		return;
+
+	nsock->ops  = sock->ops;
+
+	__module_get(nsock->ops->owner);
+
+	err = sock->ops->accept(sock, nsock, O_NONBLOCK);
+	if (err < 0) {
+		sock_release(nsock);
+		return;
+	}
+
+	/* Set our callbacks */
+	nsock->sk->sk_data_ready   = rfcomm_l2data_ready;
+	nsock->sk->sk_state_change = rfcomm_l2state_change;
+
+	s = rfcomm_session_add(nsock, BT_OPEN);
+	if (s) {
+		rfcomm_session_hold(s);
+		rfcomm_schedule(RFCOMM_SCHED_RX);
+	} else
+		sock_release(nsock);
+}
+
+static inline void rfcomm_check_connection(struct rfcomm_session *s)
+{
+	struct sock *sk = s->sock->sk;
+
+	BT_DBG("%p state %ld", s, s->state);
+
+	switch(sk->sk_state) {
+	case BT_CONNECTED:
+		s->state = BT_CONNECT;
+
+		/* We can adjust MTU on outgoing sessions.
+		 * L2CAP MTU minus UIH header and FCS. */
+		s->mtu = min(l2cap_pi(sk)->omtu, l2cap_pi(sk)->imtu) - 5;
+
+		rfcomm_send_sabm(s, 0);
+		break;
+
+	case BT_CLOSED:
+		s->state = BT_CLOSED;
+		rfcomm_session_close(s, sk->sk_err);
+		break;
+	}
+}
+
+static inline void rfcomm_process_sessions(void)
+{
+	struct list_head *p, *n;
+
+	rfcomm_lock();
+
+	list_for_each_safe(p, n, &session_list) {
+		struct rfcomm_session *s;
+		s = list_entry(p, struct rfcomm_session, list);
+
+		if (s->state == BT_LISTEN) {
+			rfcomm_accept_connection(s);
+			continue;
+		}
+
+		rfcomm_session_hold(s);
+
+		switch (s->state) {
+		case BT_BOUND:
+			rfcomm_check_connection(s);
+			break;
+
+		default:
+			rfcomm_process_rx(s);
+			break;
+		}
+
+		rfcomm_process_dlcs(s);
+
+		rfcomm_session_put(s);
+	}
+
+	rfcomm_unlock();
+}
+
+static void rfcomm_worker(void)
+{
+	BT_DBG("");
+
+	while (!atomic_read(&terminate)) {
+		if (!test_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event)) {
+			/* No pending events. Let's sleep.
+			 * Incoming connections and data will wake us up. */
+			set_current_state(TASK_INTERRUPTIBLE);
+			schedule();
+		}
+
+		/* Process stuff */
+		clear_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event);
+		rfcomm_process_sessions();
+	}
+	set_current_state(TASK_RUNNING);
+	return;
+}
+
+static int rfcomm_add_listener(bdaddr_t *ba)
+{
+	struct sockaddr_l2 addr;
+	struct socket *sock;
+	struct sock *sk;
+	struct rfcomm_session *s;
+	int    err = 0;
+
+	/* Create socket */
+	err = rfcomm_l2sock_create(&sock);
+	if (err < 0) { 
+		BT_ERR("Create socket failed %d", err);
+		return err;
+	}
+
+	/* Bind socket */
+	bacpy(&addr.l2_bdaddr, ba);
+	addr.l2_family = AF_BLUETOOTH;
+	addr.l2_psm    = htobs(RFCOMM_PSM);
+	err = sock->ops->bind(sock, (struct sockaddr *) &addr, sizeof(addr));
+	if (err < 0) {
+		BT_ERR("Bind failed %d", err);
+		goto failed;
+	}
+
+	/* Set L2CAP options */
+	sk = sock->sk;
+	lock_sock(sk);
+	l2cap_pi(sk)->imtu = RFCOMM_MAX_L2CAP_MTU;
+	release_sock(sk);
+
+	/* Start listening on the socket */
+	err = sock->ops->listen(sock, 10);
+	if (err) {
+		BT_ERR("Listen failed %d", err);
+		goto failed;
+	}
+
+	/* Add listening session */
+	s = rfcomm_session_add(sock, BT_LISTEN);
+	if (!s)
+		goto failed;
+
+	rfcomm_session_hold(s);
+	return 0;
+failed:
+	sock_release(sock);
+	return err;
+}
+
+static void rfcomm_kill_listener(void)
+{
+	struct rfcomm_session *s;
+	struct list_head *p, *n;
+
+	BT_DBG("");
+
+	list_for_each_safe(p, n, &session_list) {
+		s = list_entry(p, struct rfcomm_session, list);
+		rfcomm_session_del(s);
+	}
+}
+
+static int rfcomm_run(void *unused)
+{
+	rfcomm_thread = current;
+
+	atomic_inc(&running);
+
+	daemonize("krfcommd");
+	set_user_nice(current, -10);
+	current->flags |= PF_NOFREEZE;
+
+	BT_DBG("");
+
+	rfcomm_add_listener(BDADDR_ANY);
+
+	rfcomm_worker();
+
+	rfcomm_kill_listener();
+
+	atomic_dec(&running);
+	return 0;
+}
+
+static void rfcomm_auth_cfm(struct hci_conn *conn, u8 status)
+{
+	struct rfcomm_session *s;
+	struct rfcomm_dlc *d;
+	struct list_head *p, *n;
+
+	BT_DBG("conn %p status 0x%02x", conn, status);
+
+	s = rfcomm_session_get(&conn->hdev->bdaddr, &conn->dst);
+	if (!s)
+		return;
+
+	rfcomm_session_hold(s);
+
+	list_for_each_safe(p, n, &s->dlcs) {
+		d = list_entry(p, struct rfcomm_dlc, list);
+
+		if (d->link_mode & (RFCOMM_LM_ENCRYPT | RFCOMM_LM_SECURE))
+			continue;
+
+		if (!test_and_clear_bit(RFCOMM_AUTH_PENDING, &d->flags))
+			continue;
+
+		if (!status)
+			set_bit(RFCOMM_AUTH_ACCEPT, &d->flags);
+		else
+			set_bit(RFCOMM_AUTH_REJECT, &d->flags);
+	}
+
+	rfcomm_session_put(s);
+
+	rfcomm_schedule(RFCOMM_SCHED_AUTH);
+}
+
+static void rfcomm_encrypt_cfm(struct hci_conn *conn, u8 status, u8 encrypt)
+{
+	struct rfcomm_session *s;
+	struct rfcomm_dlc *d;
+	struct list_head *p, *n;
+
+	BT_DBG("conn %p status 0x%02x encrypt 0x%02x", conn, status, encrypt);
+
+	s = rfcomm_session_get(&conn->hdev->bdaddr, &conn->dst);
+	if (!s)
+		return;
+
+	rfcomm_session_hold(s);
+
+	list_for_each_safe(p, n, &s->dlcs) {
+		d = list_entry(p, struct rfcomm_dlc, list);
+
+		if (!test_and_clear_bit(RFCOMM_AUTH_PENDING, &d->flags))
+			continue;
+
+		if (!status && encrypt)
+			set_bit(RFCOMM_AUTH_ACCEPT, &d->flags);
+		else
+			set_bit(RFCOMM_AUTH_REJECT, &d->flags);
+	}
+
+	rfcomm_session_put(s);
+
+	rfcomm_schedule(RFCOMM_SCHED_AUTH);
+}
+
+static struct hci_cb rfcomm_cb = {
+	.name		= "RFCOMM",
+	.auth_cfm	= rfcomm_auth_cfm,
+	.encrypt_cfm	= rfcomm_encrypt_cfm
+};
+
+/* ---- Proc fs support ---- */
+#ifdef CONFIG_PROC_FS
+static void *rfcomm_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct rfcomm_session *s;
+	struct list_head *pp, *p;
+	loff_t l = *pos;
+
+	rfcomm_lock();
+
+	list_for_each(p, &session_list) {
+		s = list_entry(p, struct rfcomm_session, list);
+		list_for_each(pp, &s->dlcs)
+			if (!l--) {
+				seq->private = s;
+				return pp;
+			}
+	}
+	return NULL;
+}
+
+static void *rfcomm_seq_next(struct seq_file *seq, void *e, loff_t *pos)
+{
+	struct rfcomm_session *s = seq->private;
+	struct list_head *pp, *p = e;
+	(*pos)++;
+
+	if (p->next != &s->dlcs)
+		return p->next;
+
+	list_for_each(p, &session_list) {
+		s = list_entry(p, struct rfcomm_session, list);
+		__list_for_each(pp, &s->dlcs) {
+			seq->private = s;
+			return pp;
+		}
+	}
+	return NULL;
+}
+
+static void rfcomm_seq_stop(struct seq_file *seq, void *e)
+{
+	rfcomm_unlock();
+}
+
+static int  rfcomm_seq_show(struct seq_file *seq, void *e)
+{
+	struct rfcomm_session *s = seq->private;
+	struct sock *sk = s->sock->sk;
+	struct rfcomm_dlc *d = list_entry(e, struct rfcomm_dlc, list);
+
+	seq_printf(seq, "%s %s %ld %d %d %d %d\n",
+			batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst),
+			d->state, d->dlci, d->mtu, d->rx_credits, d->tx_credits);
+	return 0;
+}
+
+static struct seq_operations rfcomm_seq_ops = {
+	.start  = rfcomm_seq_start,
+	.next   = rfcomm_seq_next,
+	.stop   = rfcomm_seq_stop,
+	.show   = rfcomm_seq_show 
+};
+
+static int rfcomm_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &rfcomm_seq_ops);
+}
+
+static struct file_operations rfcomm_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = rfcomm_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+};
+
+static int  __init rfcomm_proc_init(void)
+{
+        struct proc_dir_entry *p;
+
+	proc_bt_rfcomm = proc_mkdir("rfcomm", proc_bt);
+	if (proc_bt_rfcomm) {
+		proc_bt_rfcomm->owner = THIS_MODULE;
+
+        	p = create_proc_entry("dlc", S_IRUGO, proc_bt_rfcomm);
+		if (p)
+        		p->proc_fops = &rfcomm_seq_fops;
+	}
+        return 0;
+}
+
+static void __exit rfcomm_proc_cleanup(void)
+{
+        remove_proc_entry("dlc", proc_bt_rfcomm);
+
+	remove_proc_entry("rfcomm", proc_bt);
+}
+
+#else /* CONFIG_PROC_FS */
+
+static int  __init rfcomm_proc_init(void)
+{
+        return 0;
+}
+
+static void __exit rfcomm_proc_cleanup(void)
+{
+        return;
+}
+#endif /* CONFIG_PROC_FS */
+
+/* ---- Initialization ---- */
+static int __init rfcomm_init(void)
+{
+	l2cap_load();
+
+	hci_register_cb(&rfcomm_cb);
+
+	kernel_thread(rfcomm_run, NULL, CLONE_KERNEL);
+
+	BT_INFO("RFCOMM ver %s", VERSION);
+
+	rfcomm_proc_init();
+
+	rfcomm_init_sockets();
+
+#ifdef CONFIG_BT_RFCOMM_TTY
+	rfcomm_init_ttys();
+#endif
+
+	return 0;
+}
+
+static void __exit rfcomm_exit(void)
+{
+	hci_unregister_cb(&rfcomm_cb);
+
+	/* Terminate working thread.
+	 * ie. Set terminate flag and wake it up */
+	atomic_inc(&terminate);
+	rfcomm_schedule(RFCOMM_SCHED_STATE);
+
+	/* Wait until thread is running */
+	while (atomic_read(&running))
+		schedule();
+
+#ifdef CONFIG_BT_RFCOMM_TTY
+	rfcomm_cleanup_ttys();
+#endif
+
+	rfcomm_cleanup_sockets();
+
+	rfcomm_proc_cleanup();
+}
+
+module_init(rfcomm_init);
+module_exit(rfcomm_exit);
+
+MODULE_AUTHOR("Maxim Krasnyansky <maxk@qualcomm.com>, Marcel Holtmann <marcel@holtmann.org>");
+MODULE_DESCRIPTION("Bluetooth RFCOMM ver " VERSION);
+MODULE_VERSION(VERSION);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("bt-proto-3");
diff --git a/net/bluetooth/rfcomm/crc.c b/net/bluetooth/rfcomm/crc.c
new file mode 100644
index 00000000000..1011bc4a869
--- /dev/null
+++ b/net/bluetooth/rfcomm/crc.c
@@ -0,0 +1,71 @@
+/* 
+   RFCOMM implementation for Linux Bluetooth stack (BlueZ).
+   Copyright (C) 2002 Maxim Krasnyansky <maxk@qualcomm.com>
+   Copyright (C) 2002 Marcel Holtmann <marcel@holtmann.org>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License version 2 as
+   published by the Free Software Foundation;
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
+   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES 
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, 
+   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS 
+   SOFTWARE IS DISCLAIMED.
+*/
+
+/*
+ * RFCOMM FCS calculation.
+ *
+ * $Id: crc.c,v 1.2 2002/09/21 09:54:32 holtmann Exp $
+ */
+
+/* reversed, 8-bit, poly=0x07 */
+unsigned char rfcomm_crc_table[256] = { 
+	0x00, 0x91, 0xe3, 0x72, 0x07, 0x96, 0xe4, 0x75,
+	0x0e, 0x9f, 0xed, 0x7c, 0x09, 0x98, 0xea, 0x7b,
+	0x1c, 0x8d, 0xff, 0x6e, 0x1b, 0x8a, 0xf8, 0x69,
+	0x12, 0x83, 0xf1, 0x60, 0x15, 0x84, 0xf6, 0x67,
+
+	0x38, 0xa9, 0xdb, 0x4a, 0x3f, 0xae, 0xdc, 0x4d,
+	0x36, 0xa7, 0xd5, 0x44, 0x31, 0xa0, 0xd2, 0x43,
+	0x24, 0xb5, 0xc7, 0x56, 0x23, 0xb2, 0xc0, 0x51,
+	0x2a, 0xbb, 0xc9, 0x58, 0x2d, 0xbc, 0xce, 0x5f,
+
+	0x70, 0xe1, 0x93, 0x02, 0x77, 0xe6, 0x94, 0x05,
+	0x7e, 0xef, 0x9d, 0x0c, 0x79, 0xe8, 0x9a, 0x0b,
+	0x6c, 0xfd, 0x8f, 0x1e, 0x6b, 0xfa, 0x88, 0x19,
+	0x62, 0xf3, 0x81, 0x10, 0x65, 0xf4, 0x86, 0x17,
+
+	0x48, 0xd9, 0xab, 0x3a, 0x4f, 0xde, 0xac, 0x3d,
+	0x46, 0xd7, 0xa5, 0x34, 0x41, 0xd0, 0xa2, 0x33,
+	0x54, 0xc5, 0xb7, 0x26, 0x53, 0xc2, 0xb0, 0x21,
+	0x5a, 0xcb, 0xb9, 0x28, 0x5d, 0xcc, 0xbe, 0x2f,
+
+	0xe0, 0x71, 0x03, 0x92, 0xe7, 0x76, 0x04, 0x95,
+	0xee, 0x7f, 0x0d, 0x9c, 0xe9, 0x78, 0x0a, 0x9b,
+	0xfc, 0x6d, 0x1f, 0x8e, 0xfb, 0x6a, 0x18, 0x89,
+	0xf2, 0x63, 0x11, 0x80, 0xf5, 0x64, 0x16, 0x87,
+
+	0xd8, 0x49, 0x3b, 0xaa, 0xdf, 0x4e, 0x3c, 0xad,
+	0xd6, 0x47, 0x35, 0xa4, 0xd1, 0x40, 0x32, 0xa3,
+	0xc4, 0x55, 0x27, 0xb6, 0xc3, 0x52, 0x20, 0xb1,
+	0xca, 0x5b, 0x29, 0xb8, 0xcd, 0x5c, 0x2e, 0xbf,
+
+	0x90, 0x01, 0x73, 0xe2, 0x97, 0x06, 0x74, 0xe5,
+	0x9e, 0x0f, 0x7d, 0xec, 0x99, 0x08, 0x7a, 0xeb,
+	0x8c, 0x1d, 0x6f, 0xfe, 0x8b, 0x1a, 0x68, 0xf9,
+	0x82, 0x13, 0x61, 0xf0, 0x85, 0x14, 0x66, 0xf7,
+
+	0xa8, 0x39, 0x4b, 0xda, 0xaf, 0x3e, 0x4c, 0xdd,
+	0xa6, 0x37, 0x45, 0xd4, 0xa1, 0x30, 0x42, 0xd3,
+	0xb4, 0x25, 0x57, 0xc6, 0xb3, 0x22, 0x50, 0xc1,
+	0xba, 0x2b, 0x59, 0xc8, 0xbd, 0x2c, 0x5e, 0xcf
+};
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
new file mode 100644
index 00000000000..640028a2183
--- /dev/null
+++ b/net/bluetooth/rfcomm/sock.c
@@ -0,0 +1,1010 @@
+/* 
+   RFCOMM implementation for Linux Bluetooth stack (BlueZ).
+   Copyright (C) 2002 Maxim Krasnyansky <maxk@qualcomm.com>
+   Copyright (C) 2002 Marcel Holtmann <marcel@holtmann.org>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License version 2 as
+   published by the Free Software Foundation;
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
+   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES 
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, 
+   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS 
+   SOFTWARE IS DISCLAIMED.
+*/
+
+/*
+ * RFCOMM sockets.
+ *
+ * $Id: sock.c,v 1.24 2002/10/03 01:00:34 maxk Exp $
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/poll.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/socket.h>
+#include <linux/skbuff.h>
+#include <linux/list.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <net/sock.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
+#include <net/bluetooth/l2cap.h>
+#include <net/bluetooth/rfcomm.h>
+
+#ifndef CONFIG_BT_RFCOMM_DEBUG
+#undef  BT_DBG
+#define BT_DBG(D...)
+#endif
+
+static struct proto_ops rfcomm_sock_ops;
+
+static struct bt_sock_list rfcomm_sk_list = {
+	.lock = RW_LOCK_UNLOCKED
+};
+
+static void rfcomm_sock_close(struct sock *sk);
+static void rfcomm_sock_kill(struct sock *sk);
+
+/* ---- DLC callbacks ----
+ *
+ * called under rfcomm_dlc_lock()
+ */
+static void rfcomm_sk_data_ready(struct rfcomm_dlc *d, struct sk_buff *skb)
+{
+	struct sock *sk = d->owner;
+	if (!sk)
+		return;
+
+	atomic_add(skb->len, &sk->sk_rmem_alloc);
+	skb_queue_tail(&sk->sk_receive_queue, skb);
+	sk->sk_data_ready(sk, skb->len);
+
+	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
+		rfcomm_dlc_throttle(d);
+}
+
+static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err)
+{
+	struct sock *sk = d->owner, *parent;
+	if (!sk)
+		return;
+
+	BT_DBG("dlc %p state %ld err %d", d, d->state, err);
+
+	bh_lock_sock(sk);
+
+	if (err)
+		sk->sk_err = err;
+
+	sk->sk_state = d->state;
+
+	parent = bt_sk(sk)->parent;
+	if (parent) {
+		if (d->state == BT_CLOSED) {
+			sock_set_flag(sk, SOCK_ZAPPED);
+			bt_accept_unlink(sk);
+		}
+		parent->sk_data_ready(parent, 0);
+	} else {
+		if (d->state == BT_CONNECTED)
+			rfcomm_session_getaddr(d->session, &bt_sk(sk)->src, NULL);
+		sk->sk_state_change(sk);
+	}
+
+	bh_unlock_sock(sk);
+
+	if (parent && sock_flag(sk, SOCK_ZAPPED)) {
+		/* We have to drop DLC lock here, otherwise
+		 * rfcomm_sock_destruct() will dead lock. */
+		rfcomm_dlc_unlock(d);
+		rfcomm_sock_kill(sk);
+		rfcomm_dlc_lock(d);
+	}
+}
+
+/* ---- Socket functions ---- */
+static struct sock *__rfcomm_get_sock_by_addr(u8 channel, bdaddr_t *src)
+{
+	struct sock *sk = NULL;
+	struct hlist_node *node;
+
+	sk_for_each(sk, node, &rfcomm_sk_list.head) {
+		if (rfcomm_pi(sk)->channel == channel && 
+				!bacmp(&bt_sk(sk)->src, src))
+			break;
+	}
+
+	return node ? sk : NULL;
+}
+
+/* Find socket with channel and source bdaddr.
+ * Returns closest match.
+ */
+static struct sock *__rfcomm_get_sock_by_channel(int state, u8 channel, bdaddr_t *src)
+{
+	struct sock *sk = NULL, *sk1 = NULL;
+	struct hlist_node *node;
+
+	sk_for_each(sk, node, &rfcomm_sk_list.head) {
+		if (state && sk->sk_state != state)
+			continue;
+
+		if (rfcomm_pi(sk)->channel == channel) {
+			/* Exact match. */
+			if (!bacmp(&bt_sk(sk)->src, src))
+				break;
+
+			/* Closest match */
+			if (!bacmp(&bt_sk(sk)->src, BDADDR_ANY))
+				sk1 = sk;
+		}
+	}
+	return node ? sk : sk1;
+}
+
+/* Find socket with given address (channel, src).
+ * Returns locked socket */
+static inline struct sock *rfcomm_get_sock_by_channel(int state, u8 channel, bdaddr_t *src)
+{
+	struct sock *s;
+	read_lock(&rfcomm_sk_list.lock);
+	s = __rfcomm_get_sock_by_channel(state, channel, src);
+	if (s) bh_lock_sock(s);
+	read_unlock(&rfcomm_sk_list.lock);
+	return s;
+}
+
+static void rfcomm_sock_destruct(struct sock *sk)
+{
+	struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc;
+
+	BT_DBG("sk %p dlc %p", sk, d);
+
+	skb_queue_purge(&sk->sk_receive_queue);
+	skb_queue_purge(&sk->sk_write_queue);
+
+	rfcomm_dlc_lock(d);
+	rfcomm_pi(sk)->dlc = NULL;
+
+	/* Detach DLC if it's owned by this socket */
+	if (d->owner == sk)
+		d->owner = NULL;
+	rfcomm_dlc_unlock(d);
+
+	rfcomm_dlc_put(d);
+}
+
+static void rfcomm_sock_cleanup_listen(struct sock *parent)
+{
+	struct sock *sk;
+
+	BT_DBG("parent %p", parent);
+
+	/* Close not yet accepted dlcs */
+	while ((sk = bt_accept_dequeue(parent, NULL))) {
+		rfcomm_sock_close(sk);
+		rfcomm_sock_kill(sk);
+	}
+
+	parent->sk_state  = BT_CLOSED;
+	sock_set_flag(parent, SOCK_ZAPPED);
+}
+
+/* Kill socket (only if zapped and orphan)
+ * Must be called on unlocked socket.
+ */
+static void rfcomm_sock_kill(struct sock *sk)
+{
+	if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket)
+		return;
+
+	BT_DBG("sk %p state %d refcnt %d", sk, sk->sk_state, atomic_read(&sk->sk_refcnt));
+
+	/* Kill poor orphan */
+	bt_sock_unlink(&rfcomm_sk_list, sk);
+	sock_set_flag(sk, SOCK_DEAD);
+	sock_put(sk);
+}
+
+static void __rfcomm_sock_close(struct sock *sk)
+{
+	struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc;
+
+	BT_DBG("sk %p state %d socket %p", sk, sk->sk_state, sk->sk_socket);
+
+	switch (sk->sk_state) {
+	case BT_LISTEN:
+		rfcomm_sock_cleanup_listen(sk);
+		break;
+
+	case BT_CONNECT:
+	case BT_CONNECT2:
+	case BT_CONFIG:
+	case BT_CONNECTED:
+		rfcomm_dlc_close(d, 0);
+
+	default:
+		sock_set_flag(sk, SOCK_ZAPPED);
+		break;
+	}
+}
+
+/* Close socket.
+ * Must be called on unlocked socket.
+ */
+static void rfcomm_sock_close(struct sock *sk)
+{
+	lock_sock(sk);
+	__rfcomm_sock_close(sk);
+	release_sock(sk);
+}
+
+static void rfcomm_sock_init(struct sock *sk, struct sock *parent)
+{
+	struct rfcomm_pinfo *pi = rfcomm_pi(sk);
+
+	BT_DBG("sk %p", sk);
+
+	if (parent) {
+		sk->sk_type = parent->sk_type;
+		pi->link_mode = rfcomm_pi(parent)->link_mode;
+	} else {
+		pi->link_mode = 0;
+	}
+
+	pi->dlc->link_mode = pi->link_mode;
+}
+
+static struct proto rfcomm_proto = {
+	.name		= "RFCOMM",
+	.owner		= THIS_MODULE,
+	.obj_size	= sizeof(struct rfcomm_pinfo)
+};
+
+static struct sock *rfcomm_sock_alloc(struct socket *sock, int proto, int prio)
+{
+	struct rfcomm_dlc *d;
+	struct sock *sk;
+
+	sk = sk_alloc(PF_BLUETOOTH, prio, &rfcomm_proto, 1);
+	if (!sk)
+		return NULL;
+
+	sock_init_data(sock, sk);
+	INIT_LIST_HEAD(&bt_sk(sk)->accept_q);
+
+	d = rfcomm_dlc_alloc(prio);
+	if (!d) {
+		sk_free(sk);
+		return NULL;
+	}
+
+	d->data_ready   = rfcomm_sk_data_ready;
+	d->state_change = rfcomm_sk_state_change;
+
+	rfcomm_pi(sk)->dlc = d;
+	d->owner = sk;
+
+	sk->sk_destruct = rfcomm_sock_destruct;
+	sk->sk_sndtimeo = RFCOMM_CONN_TIMEOUT;
+
+	sk->sk_sndbuf   = RFCOMM_MAX_CREDITS * RFCOMM_DEFAULT_MTU * 10;
+	sk->sk_rcvbuf   = RFCOMM_MAX_CREDITS * RFCOMM_DEFAULT_MTU * 10;
+
+	sock_reset_flag(sk, SOCK_ZAPPED);
+
+	sk->sk_protocol = proto;
+	sk->sk_state	= BT_OPEN;
+
+	bt_sock_link(&rfcomm_sk_list, sk);
+
+	BT_DBG("sk %p", sk);
+	return sk;
+}
+
+static int rfcomm_sock_create(struct socket *sock, int protocol)
+{
+	struct sock *sk;
+
+	BT_DBG("sock %p", sock);
+
+	sock->state = SS_UNCONNECTED;
+
+	if (sock->type != SOCK_STREAM && sock->type != SOCK_RAW)
+		return -ESOCKTNOSUPPORT;
+
+	sock->ops = &rfcomm_sock_ops;
+
+	if (!(sk = rfcomm_sock_alloc(sock, protocol, GFP_KERNEL)))
+		return -ENOMEM;
+
+	rfcomm_sock_init(sk, NULL);
+	return 0;
+}
+
+static int rfcomm_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
+{
+	struct sockaddr_rc *sa = (struct sockaddr_rc *) addr;
+	struct sock *sk = sock->sk;
+	int err = 0;
+
+	BT_DBG("sk %p %s", sk, batostr(&sa->rc_bdaddr));
+
+	if (!addr || addr->sa_family != AF_BLUETOOTH)
+		return -EINVAL;
+
+	lock_sock(sk);
+
+	if (sk->sk_state != BT_OPEN) {
+		err = -EBADFD;
+		goto done;
+	}
+
+	write_lock_bh(&rfcomm_sk_list.lock);
+
+	if (sa->rc_channel && __rfcomm_get_sock_by_addr(sa->rc_channel, &sa->rc_bdaddr)) {
+		err = -EADDRINUSE;
+	} else {
+		/* Save source address */
+		bacpy(&bt_sk(sk)->src, &sa->rc_bdaddr);
+		rfcomm_pi(sk)->channel = sa->rc_channel;
+		sk->sk_state = BT_BOUND;
+	}
+
+	write_unlock_bh(&rfcomm_sk_list.lock);
+
+done:
+	release_sock(sk);
+	return err;
+}
+
+static int rfcomm_sock_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags)
+{
+	struct sockaddr_rc *sa = (struct sockaddr_rc *) addr;
+	struct sock *sk = sock->sk;
+	struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc;
+	int err = 0;
+
+	BT_DBG("sk %p", sk);
+
+	if (addr->sa_family != AF_BLUETOOTH || alen < sizeof(struct sockaddr_rc))
+		return -EINVAL;
+
+	if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND)
+		return -EBADFD;
+
+	if (sk->sk_type != SOCK_STREAM)
+		return -EINVAL;
+
+	lock_sock(sk);
+
+	sk->sk_state = BT_CONNECT;
+	bacpy(&bt_sk(sk)->dst, &sa->rc_bdaddr);
+	rfcomm_pi(sk)->channel = sa->rc_channel;
+
+	err = rfcomm_dlc_open(d, &bt_sk(sk)->src, &sa->rc_bdaddr, sa->rc_channel);
+	if (!err)
+		err = bt_sock_wait_state(sk, BT_CONNECTED,
+				sock_sndtimeo(sk, flags & O_NONBLOCK));
+
+	release_sock(sk);
+	return err;
+}
+
+static int rfcomm_sock_listen(struct socket *sock, int backlog)
+{
+	struct sock *sk = sock->sk;
+	int err = 0;
+
+	BT_DBG("sk %p backlog %d", sk, backlog);
+
+	lock_sock(sk);
+
+	if (sk->sk_state != BT_BOUND) {
+		err = -EBADFD;
+		goto done;
+	}
+
+	if (!rfcomm_pi(sk)->channel) {
+		bdaddr_t *src = &bt_sk(sk)->src;
+		u8 channel;
+
+		err = -EINVAL;
+
+		write_lock_bh(&rfcomm_sk_list.lock);
+
+		for (channel = 1; channel < 31; channel++)
+			if (!__rfcomm_get_sock_by_addr(channel, src)) {
+				rfcomm_pi(sk)->channel = channel;
+				err = 0;
+				break;
+			}
+
+		write_unlock_bh(&rfcomm_sk_list.lock);
+
+		if (err < 0)
+			goto done;
+	}
+
+	sk->sk_max_ack_backlog = backlog;
+	sk->sk_ack_backlog = 0;
+	sk->sk_state = BT_LISTEN;
+
+done:
+	release_sock(sk);
+	return err;
+}
+
+static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int flags)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	struct sock *sk = sock->sk, *nsk;
+	long timeo;
+	int err = 0;
+
+	lock_sock(sk);
+
+	if (sk->sk_state != BT_LISTEN) {
+		err = -EBADFD;
+		goto done;
+	}
+
+	timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+
+	BT_DBG("sk %p timeo %ld", sk, timeo);
+
+	/* Wait for an incoming connection. (wake-one). */
+	add_wait_queue_exclusive(sk->sk_sleep, &wait);
+	while (!(nsk = bt_accept_dequeue(sk, newsock))) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (!timeo) {
+			err = -EAGAIN;
+			break;
+		}
+
+		release_sock(sk);
+		timeo = schedule_timeout(timeo);
+		lock_sock(sk);
+
+		if (sk->sk_state != BT_LISTEN) {
+			err = -EBADFD;
+			break;
+		}
+
+		if (signal_pending(current)) {
+			err = sock_intr_errno(timeo);
+			break;
+		}
+	}
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(sk->sk_sleep, &wait);
+
+	if (err)
+		goto done;
+
+	newsock->state = SS_CONNECTED;
+
+	BT_DBG("new socket %p", nsk);
+
+done:
+	release_sock(sk);
+	return err;
+}
+
+static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int *len, int peer)
+{
+	struct sockaddr_rc *sa = (struct sockaddr_rc *) addr;
+	struct sock *sk = sock->sk;
+
+	BT_DBG("sock %p, sk %p", sock, sk);
+
+	sa->rc_family  = AF_BLUETOOTH;
+	sa->rc_channel = rfcomm_pi(sk)->channel;
+	if (peer)
+		bacpy(&sa->rc_bdaddr, &bt_sk(sk)->dst);
+	else
+		bacpy(&sa->rc_bdaddr, &bt_sk(sk)->src);
+
+	*len = sizeof(struct sockaddr_rc);
+	return 0;
+}
+
+static int rfcomm_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
+			       struct msghdr *msg, size_t len)
+{
+	struct sock *sk = sock->sk;
+	struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc;
+	struct sk_buff *skb;
+	int err;
+	int sent = 0;
+
+	if (msg->msg_flags & MSG_OOB)
+		return -EOPNOTSUPP;
+
+	if (sk->sk_shutdown & SEND_SHUTDOWN)
+		return -EPIPE;
+
+	BT_DBG("sock %p, sk %p", sock, sk);
+
+	lock_sock(sk);
+
+	while (len) {
+		size_t size = min_t(size_t, len, d->mtu);
+		
+		skb = sock_alloc_send_skb(sk, size + RFCOMM_SKB_RESERVE,
+				msg->msg_flags & MSG_DONTWAIT, &err);
+		if (!skb)
+			break;
+		skb_reserve(skb, RFCOMM_SKB_HEAD_RESERVE);
+
+		err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
+		if (err) {
+			kfree_skb(skb);
+			sent = err;
+			break;
+		}
+
+		err = rfcomm_dlc_send(d, skb);
+		if (err < 0) {
+			kfree_skb(skb);
+			break;
+		}
+
+		sent += size;
+		len  -= size;
+	}
+
+	release_sock(sk);
+
+	return sent ? sent : err;
+}
+
+static long rfcomm_sock_data_wait(struct sock *sk, long timeo)
+{
+	DECLARE_WAITQUEUE(wait, current);
+
+	add_wait_queue(sk->sk_sleep, &wait);
+	for (;;) {
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		if (skb_queue_len(&sk->sk_receive_queue) || sk->sk_err || (sk->sk_shutdown & RCV_SHUTDOWN) ||
+				signal_pending(current) || !timeo)
+			break;
+
+		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+		release_sock(sk);
+		timeo = schedule_timeout(timeo);
+		lock_sock(sk);
+		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+	}
+
+	__set_current_state(TASK_RUNNING);
+	remove_wait_queue(sk->sk_sleep, &wait);
+	return timeo;
+}
+
+static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
+			       struct msghdr *msg, size_t size, int flags)
+{
+	struct sock *sk = sock->sk;
+	int err = 0;
+	size_t target, copied = 0;
+	long timeo;
+
+	if (flags & MSG_OOB)
+		return -EOPNOTSUPP;
+
+	msg->msg_namelen = 0;
+
+	BT_DBG("sk %p size %d", sk, size);
+
+	lock_sock(sk);
+
+	target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
+	timeo  = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+
+	do {
+		struct sk_buff *skb;
+		int chunk;
+
+		skb = skb_dequeue(&sk->sk_receive_queue);
+		if (!skb) {
+			if (copied >= target)
+				break;
+
+			if ((err = sock_error(sk)) != 0)
+				break;
+			if (sk->sk_shutdown & RCV_SHUTDOWN)
+				break;
+
+			err = -EAGAIN;
+			if (!timeo)
+				break;
+
+			timeo = rfcomm_sock_data_wait(sk, timeo);
+
+			if (signal_pending(current)) {
+				err = sock_intr_errno(timeo);
+				goto out;
+			}
+			continue;
+		}
+
+		chunk = min_t(unsigned int, skb->len, size);
+		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
+			skb_queue_head(&sk->sk_receive_queue, skb);
+			if (!copied)
+				copied = -EFAULT;
+			break;
+		}
+		copied += chunk;
+		size   -= chunk;
+
+		if (!(flags & MSG_PEEK)) {
+			atomic_sub(chunk, &sk->sk_rmem_alloc);
+
+			skb_pull(skb, chunk);
+			if (skb->len) {
+				skb_queue_head(&sk->sk_receive_queue, skb);
+				break;
+			}
+			kfree_skb(skb);
+
+		} else {
+			/* put message back and return */
+			skb_queue_head(&sk->sk_receive_queue, skb);
+			break;
+		}
+	} while (size);
+
+out:
+	if (atomic_read(&sk->sk_rmem_alloc) <= (sk->sk_rcvbuf >> 2))
+		rfcomm_dlc_unthrottle(rfcomm_pi(sk)->dlc);
+
+	release_sock(sk);
+	return copied ? : err;
+}
+
+static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
+{
+	struct sock *sk = sock->sk;
+	int err = 0;
+	u32 opt;
+
+	BT_DBG("sk %p", sk);
+
+	lock_sock(sk);
+
+	switch (optname) {
+	case RFCOMM_LM:
+		if (get_user(opt, (u32 __user *) optval)) {
+			err = -EFAULT;
+			break;
+		}
+
+		rfcomm_pi(sk)->link_mode = opt;
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	release_sock(sk);
+	return err;
+}
+
+static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen)
+{
+	struct sock *sk = sock->sk;
+	struct sock *l2cap_sk;
+	struct rfcomm_conninfo cinfo;
+	int len, err = 0;
+
+	BT_DBG("sk %p", sk);
+
+	if (get_user(len, optlen))
+		return -EFAULT;
+
+	lock_sock(sk);
+
+	switch (optname) {
+	case RFCOMM_LM:
+		if (put_user(rfcomm_pi(sk)->link_mode, (u32 __user *) optval))
+			err = -EFAULT;
+		break;
+
+	case RFCOMM_CONNINFO:
+		if (sk->sk_state != BT_CONNECTED) {
+			err = -ENOTCONN;
+			break;
+		}
+
+		l2cap_sk = rfcomm_pi(sk)->dlc->session->sock->sk;
+
+		cinfo.hci_handle = l2cap_pi(l2cap_sk)->conn->hcon->handle;
+		memcpy(cinfo.dev_class, l2cap_pi(l2cap_sk)->conn->hcon->dev_class, 3);
+
+		len = min_t(unsigned int, len, sizeof(cinfo));
+		if (copy_to_user(optval, (char *) &cinfo, len))
+			err = -EFAULT;
+
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	release_sock(sk);
+	return err;
+}
+
+static int rfcomm_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	struct sock *sk = sock->sk;
+	int err;
+
+	lock_sock(sk);
+
+#ifdef CONFIG_BT_RFCOMM_TTY
+	err = rfcomm_dev_ioctl(sk, cmd, (void __user *)arg);
+#else
+	err = -EOPNOTSUPP;
+#endif
+
+	release_sock(sk);
+	return err;
+}
+
+static int rfcomm_sock_shutdown(struct socket *sock, int how)
+{
+	struct sock *sk = sock->sk;
+	int err = 0;
+
+	BT_DBG("sock %p, sk %p", sock, sk);
+
+	if (!sk) return 0;
+
+	lock_sock(sk);
+	if (!sk->sk_shutdown) {
+		sk->sk_shutdown = SHUTDOWN_MASK;
+		__rfcomm_sock_close(sk);
+
+		if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime)
+			err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime);
+	}
+	release_sock(sk);
+	return err;
+}
+
+static int rfcomm_sock_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	int err;
+
+	BT_DBG("sock %p, sk %p", sock, sk);
+
+	if (!sk)
+		return 0;
+
+	err = rfcomm_sock_shutdown(sock, 2);
+
+	sock_orphan(sk);
+	rfcomm_sock_kill(sk);
+	return err;
+}
+
+/* ---- RFCOMM core layer callbacks ---- 
+ *
+ * called under rfcomm_lock()
+ */
+int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc **d)
+{
+	struct sock *sk, *parent;
+	bdaddr_t src, dst;
+	int result = 0;
+
+	BT_DBG("session %p channel %d", s, channel);
+
+	rfcomm_session_getaddr(s, &src, &dst);
+
+	/* Check if we have socket listening on channel */
+	parent = rfcomm_get_sock_by_channel(BT_LISTEN, channel, &src);
+	if (!parent)
+		return 0;
+
+	/* Check for backlog size */
+	if (sk_acceptq_is_full(parent)) {
+		BT_DBG("backlog full %d", parent->sk_ack_backlog); 
+		goto done;
+	}
+
+	sk = rfcomm_sock_alloc(NULL, BTPROTO_RFCOMM, GFP_ATOMIC);
+	if (!sk)
+		goto done;
+
+	rfcomm_sock_init(sk, parent);
+	bacpy(&bt_sk(sk)->src, &src);
+	bacpy(&bt_sk(sk)->dst, &dst);
+	rfcomm_pi(sk)->channel = channel;
+
+	sk->sk_state = BT_CONFIG;
+	bt_accept_enqueue(parent, sk);
+
+	/* Accept connection and return socket DLC */
+	*d = rfcomm_pi(sk)->dlc;
+	result = 1;
+
+done:
+	bh_unlock_sock(parent);
+	return result;
+}
+
+/* ---- Proc fs support ---- */
+#ifdef CONFIG_PROC_FS
+static void *rfcomm_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct sock *sk;
+	struct hlist_node *node;
+	loff_t l = *pos;
+
+	read_lock_bh(&rfcomm_sk_list.lock);
+
+	sk_for_each(sk, node, &rfcomm_sk_list.head)
+		if (!l--)
+			return sk;
+	return NULL;
+}
+
+static void *rfcomm_seq_next(struct seq_file *seq, void *e, loff_t *pos)
+{
+	struct sock *sk = e;
+	(*pos)++;
+	return sk_next(sk);
+}
+
+static void rfcomm_seq_stop(struct seq_file *seq, void *e)
+{
+	read_unlock_bh(&rfcomm_sk_list.lock);
+}
+
+static int  rfcomm_seq_show(struct seq_file *seq, void *e)
+{
+	struct sock *sk = e;
+	seq_printf(seq, "%s %s %d %d\n",
+			batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst),
+			sk->sk_state, rfcomm_pi(sk)->channel);
+	return 0;
+}
+
+static struct seq_operations rfcomm_seq_ops = {
+	.start  = rfcomm_seq_start,
+	.next   = rfcomm_seq_next,
+	.stop   = rfcomm_seq_stop,
+	.show   = rfcomm_seq_show 
+};
+
+static int rfcomm_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &rfcomm_seq_ops);
+}
+
+static struct file_operations rfcomm_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = rfcomm_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+};
+
+static int  __init rfcomm_sock_proc_init(void)
+{
+        struct proc_dir_entry *p = create_proc_entry("sock", S_IRUGO, proc_bt_rfcomm);
+        if (!p)
+                return -ENOMEM;
+        p->proc_fops = &rfcomm_seq_fops;
+        return 0;
+}
+
+static void __exit rfcomm_sock_proc_cleanup(void)
+{
+        remove_proc_entry("sock", proc_bt_rfcomm);
+}
+
+#else /* CONFIG_PROC_FS */
+
+static int  __init rfcomm_sock_proc_init(void)
+{
+        return 0;
+}
+
+static void __exit rfcomm_sock_proc_cleanup(void)
+{
+        return;
+}
+#endif /* CONFIG_PROC_FS */
+
+static struct proto_ops rfcomm_sock_ops = {
+	.family		= PF_BLUETOOTH,
+	.owner		= THIS_MODULE,
+	.release	= rfcomm_sock_release,
+	.bind		= rfcomm_sock_bind,
+	.connect	= rfcomm_sock_connect,
+	.listen		= rfcomm_sock_listen,
+	.accept		= rfcomm_sock_accept,
+	.getname	= rfcomm_sock_getname,
+	.sendmsg	= rfcomm_sock_sendmsg,
+	.recvmsg	= rfcomm_sock_recvmsg,
+	.shutdown	= rfcomm_sock_shutdown,
+	.setsockopt	= rfcomm_sock_setsockopt,
+	.getsockopt	= rfcomm_sock_getsockopt,
+	.ioctl		= rfcomm_sock_ioctl,
+	.poll		= bt_sock_poll,
+	.socketpair	= sock_no_socketpair,
+	.mmap		= sock_no_mmap
+};
+
+static struct net_proto_family rfcomm_sock_family_ops = {
+	.family		= PF_BLUETOOTH,
+	.owner		= THIS_MODULE,
+	.create		= rfcomm_sock_create
+};
+
+int  __init rfcomm_init_sockets(void)
+{
+	int err;
+
+	err = proto_register(&rfcomm_proto, 0);
+	if (err < 0)
+		return err;
+
+	err = bt_sock_register(BTPROTO_RFCOMM, &rfcomm_sock_family_ops);
+	if (err < 0)
+		goto error;
+
+	rfcomm_sock_proc_init();
+
+	BT_INFO("RFCOMM socket layer initialized");
+
+	return 0;
+
+error:
+	BT_ERR("RFCOMM socket layer registration failed");
+	proto_unregister(&rfcomm_proto);
+	return err;
+}
+
+void __exit rfcomm_cleanup_sockets(void)
+{
+	rfcomm_sock_proc_cleanup();
+
+	if (bt_sock_unregister(BTPROTO_RFCOMM) < 0)
+		BT_ERR("RFCOMM socket layer unregistration failed");
+
+	proto_unregister(&rfcomm_proto);
+}
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
new file mode 100644
index 00000000000..6d689200bcf
--- /dev/null
+++ b/net/bluetooth/rfcomm/tty.c
@@ -0,0 +1,930 @@
+/* 
+   RFCOMM implementation for Linux Bluetooth stack (BlueZ).
+   Copyright (C) 2002 Maxim Krasnyansky <maxk@qualcomm.com>
+   Copyright (C) 2002 Marcel Holtmann <marcel@holtmann.org>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License version 2 as
+   published by the Free Software Foundation;
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
+   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES 
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, 
+   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS 
+   SOFTWARE IS DISCLAIMED.
+*/
+
+/*
+ * RFCOMM TTY.
+ *
+ * $Id: tty.c,v 1.24 2002/10/03 01:54:38 holtmann Exp $
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/tty.h>
+#include <linux/tty_driver.h>
+#include <linux/tty_flip.h>
+
+#include <linux/slab.h>
+#include <linux/skbuff.h>
+
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/rfcomm.h>
+
+#ifndef CONFIG_BT_RFCOMM_DEBUG
+#undef  BT_DBG
+#define BT_DBG(D...)
+#endif
+
+#define RFCOMM_TTY_MAGIC 0x6d02		/* magic number for rfcomm struct */
+#define RFCOMM_TTY_PORTS RFCOMM_MAX_DEV	/* whole lotta rfcomm devices */
+#define RFCOMM_TTY_MAJOR 216		/* device node major id of the usb/bluetooth.c driver */
+#define RFCOMM_TTY_MINOR 0
+
+static struct tty_driver *rfcomm_tty_driver;
+
+struct rfcomm_dev {
+	struct list_head	list;
+	atomic_t		refcnt;
+
+	char			name[12];
+	int			id;
+	unsigned long		flags;
+	int			opened;
+	int			err;
+
+	bdaddr_t		src;
+	bdaddr_t		dst;
+	u8 			channel;
+
+	uint 			modem_status;
+
+	struct rfcomm_dlc	*dlc;
+	struct tty_struct	*tty;
+	wait_queue_head_t       wait;
+	struct tasklet_struct   wakeup_task;
+
+	atomic_t 		wmem_alloc;
+};
+
+static LIST_HEAD(rfcomm_dev_list);
+static DEFINE_RWLOCK(rfcomm_dev_lock);
+
+static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb);
+static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err);
+static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig);
+
+static void rfcomm_tty_wakeup(unsigned long arg);
+
+/* ---- Device functions ---- */
+static void rfcomm_dev_destruct(struct rfcomm_dev *dev)
+{
+	struct rfcomm_dlc *dlc = dev->dlc;
+
+	BT_DBG("dev %p dlc %p", dev, dlc);
+
+	rfcomm_dlc_lock(dlc);
+	/* Detach DLC if it's owned by this dev */
+	if (dlc->owner == dev)
+		dlc->owner = NULL;
+	rfcomm_dlc_unlock(dlc);
+
+	rfcomm_dlc_put(dlc);
+
+	tty_unregister_device(rfcomm_tty_driver, dev->id);
+
+	/* Refcount should only hit zero when called from rfcomm_dev_del()
+	   which will have taken us off the list. Everything else are
+	   refcounting bugs. */
+	BUG_ON(!list_empty(&dev->list));
+
+	kfree(dev);
+
+	/* It's safe to call module_put() here because socket still 
+	   holds reference to this module. */
+	module_put(THIS_MODULE);
+}
+
+static inline void rfcomm_dev_hold(struct rfcomm_dev *dev)
+{
+	atomic_inc(&dev->refcnt);
+}
+
+static inline void rfcomm_dev_put(struct rfcomm_dev *dev)
+{
+	/* The reason this isn't actually a race, as you no
+	   doubt have a little voice screaming at you in your
+	   head, is that the refcount should never actually
+	   reach zero unless the device has already been taken
+	   off the list, in rfcomm_dev_del(). And if that's not
+	   true, we'll hit the BUG() in rfcomm_dev_destruct()
+	   anyway. */
+	if (atomic_dec_and_test(&dev->refcnt))
+		rfcomm_dev_destruct(dev);
+}
+
+static struct rfcomm_dev *__rfcomm_dev_get(int id)
+{
+	struct rfcomm_dev *dev;
+	struct list_head  *p;
+
+	list_for_each(p, &rfcomm_dev_list) {
+		dev = list_entry(p, struct rfcomm_dev, list);
+		if (dev->id == id)
+			return dev;
+	}
+
+	return NULL;
+}
+
+static inline struct rfcomm_dev *rfcomm_dev_get(int id)
+{
+	struct rfcomm_dev *dev;
+
+	read_lock(&rfcomm_dev_lock);
+
+	dev = __rfcomm_dev_get(id);
+	if (dev)
+		rfcomm_dev_hold(dev);
+
+	read_unlock(&rfcomm_dev_lock);
+
+	return dev;
+}
+
+static int rfcomm_dev_add(struct rfcomm_dev_req *req, struct rfcomm_dlc *dlc)
+{
+	struct rfcomm_dev *dev;
+	struct list_head *head = &rfcomm_dev_list, *p;
+	int err = 0;
+
+	BT_DBG("id %d channel %d", req->dev_id, req->channel);
+	
+	dev = kmalloc(sizeof(struct rfcomm_dev), GFP_KERNEL);
+	if (!dev)
+		return -ENOMEM;
+	memset(dev, 0, sizeof(struct rfcomm_dev));
+
+	write_lock_bh(&rfcomm_dev_lock);
+
+	if (req->dev_id < 0) {
+		dev->id = 0;
+
+		list_for_each(p, &rfcomm_dev_list) {
+			if (list_entry(p, struct rfcomm_dev, list)->id != dev->id)
+				break;
+
+			dev->id++;
+			head = p;
+		}
+	} else {
+		dev->id = req->dev_id;
+
+		list_for_each(p, &rfcomm_dev_list) {
+			struct rfcomm_dev *entry = list_entry(p, struct rfcomm_dev, list);
+
+			if (entry->id == dev->id) {
+				err = -EADDRINUSE;
+				goto out;
+			}
+
+			if (entry->id > dev->id - 1)
+				break;
+
+			head = p;
+		}
+	}
+
+	if ((dev->id < 0) || (dev->id > RFCOMM_MAX_DEV - 1)) {
+		err = -ENFILE;
+		goto out;
+	}
+
+	sprintf(dev->name, "rfcomm%d", dev->id);
+
+	list_add(&dev->list, head);
+	atomic_set(&dev->refcnt, 1);
+
+	bacpy(&dev->src, &req->src);
+	bacpy(&dev->dst, &req->dst);
+	dev->channel = req->channel;
+
+	dev->flags = req->flags & 
+		((1 << RFCOMM_RELEASE_ONHUP) | (1 << RFCOMM_REUSE_DLC));
+
+	init_waitqueue_head(&dev->wait);
+	tasklet_init(&dev->wakeup_task, rfcomm_tty_wakeup, (unsigned long) dev);
+
+	rfcomm_dlc_lock(dlc);
+	dlc->data_ready   = rfcomm_dev_data_ready;
+	dlc->state_change = rfcomm_dev_state_change;
+	dlc->modem_status = rfcomm_dev_modem_status;
+
+	dlc->owner = dev;
+	dev->dlc   = dlc;
+	rfcomm_dlc_unlock(dlc);
+
+	/* It's safe to call __module_get() here because socket already 
+	   holds reference to this module. */
+	__module_get(THIS_MODULE);
+
+out:
+	write_unlock_bh(&rfcomm_dev_lock);
+
+	if (err) {
+		kfree(dev);
+		return err;
+	}
+
+	tty_register_device(rfcomm_tty_driver, dev->id, NULL);
+
+	return dev->id;
+}
+
+static void rfcomm_dev_del(struct rfcomm_dev *dev)
+{
+	BT_DBG("dev %p", dev);
+
+	write_lock_bh(&rfcomm_dev_lock);
+	list_del_init(&dev->list);
+	write_unlock_bh(&rfcomm_dev_lock);
+
+	rfcomm_dev_put(dev);
+}
+
+/* ---- Send buffer ---- */
+static inline unsigned int rfcomm_room(struct rfcomm_dlc *dlc)
+{
+	/* We can't let it be zero, because we don't get a callback
+	   when tx_credits becomes nonzero, hence we'd never wake up */
+	return dlc->mtu * (dlc->tx_credits?:1);
+}
+
+static void rfcomm_wfree(struct sk_buff *skb)
+{
+	struct rfcomm_dev *dev = (void *) skb->sk;
+	atomic_sub(skb->truesize, &dev->wmem_alloc);
+	if (test_bit(RFCOMM_TTY_ATTACHED, &dev->flags))
+		tasklet_schedule(&dev->wakeup_task);
+	rfcomm_dev_put(dev);
+}
+
+static inline void rfcomm_set_owner_w(struct sk_buff *skb, struct rfcomm_dev *dev)
+{
+	rfcomm_dev_hold(dev);
+	atomic_add(skb->truesize, &dev->wmem_alloc);
+	skb->sk = (void *) dev;
+	skb->destructor = rfcomm_wfree;
+}
+
+static struct sk_buff *rfcomm_wmalloc(struct rfcomm_dev *dev, unsigned long size, int priority)
+{
+	if (atomic_read(&dev->wmem_alloc) < rfcomm_room(dev->dlc)) {
+		struct sk_buff *skb = alloc_skb(size, priority);
+		if (skb) {
+			rfcomm_set_owner_w(skb, dev);
+			return skb;
+		}
+	}
+	return NULL;
+}
+
+/* ---- Device IOCTLs ---- */
+
+#define NOCAP_FLAGS ((1 << RFCOMM_REUSE_DLC) | (1 << RFCOMM_RELEASE_ONHUP))
+
+static int rfcomm_create_dev(struct sock *sk, void __user *arg)
+{
+	struct rfcomm_dev_req req;
+	struct rfcomm_dlc *dlc;
+	int id;
+
+	if (copy_from_user(&req, arg, sizeof(req)))
+		return -EFAULT;
+
+	BT_DBG("sk %p dev_id %id flags 0x%x", sk, req.dev_id, req.flags);
+
+	if (req.flags != NOCAP_FLAGS && !capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (req.flags & (1 << RFCOMM_REUSE_DLC)) {
+		/* Socket must be connected */
+		if (sk->sk_state != BT_CONNECTED)
+			return -EBADFD;
+
+		dlc = rfcomm_pi(sk)->dlc;
+		rfcomm_dlc_hold(dlc);
+	} else {
+		dlc = rfcomm_dlc_alloc(GFP_KERNEL);
+		if (!dlc)
+			return -ENOMEM;
+	}
+
+	id = rfcomm_dev_add(&req, dlc);
+	if (id < 0) {
+		rfcomm_dlc_put(dlc);
+		return id;
+	}
+
+	if (req.flags & (1 << RFCOMM_REUSE_DLC)) {
+		/* DLC is now used by device.
+		 * Socket must be disconnected */
+		sk->sk_state = BT_CLOSED;
+	}
+
+	return id;
+}
+
+static int rfcomm_release_dev(void __user *arg)
+{
+	struct rfcomm_dev_req req;
+	struct rfcomm_dev *dev;
+
+	if (copy_from_user(&req, arg, sizeof(req)))
+		return -EFAULT;
+
+	BT_DBG("dev_id %id flags 0x%x", req.dev_id, req.flags);
+
+	if (!(dev = rfcomm_dev_get(req.dev_id)))
+		return -ENODEV;
+
+	if (dev->flags != NOCAP_FLAGS && !capable(CAP_NET_ADMIN)) {
+		rfcomm_dev_put(dev);
+		return -EPERM;
+	}
+
+	if (req.flags & (1 << RFCOMM_HANGUP_NOW))
+		rfcomm_dlc_close(dev->dlc, 0);
+
+	rfcomm_dev_del(dev);
+	rfcomm_dev_put(dev);
+	return 0;
+}
+
+static int rfcomm_get_dev_list(void __user *arg)
+{
+	struct rfcomm_dev_list_req *dl;
+	struct rfcomm_dev_info *di;
+	struct list_head *p;
+	int n = 0, size, err;
+	u16 dev_num;
+
+	BT_DBG("");
+
+	if (get_user(dev_num, (u16 __user *) arg))
+		return -EFAULT;
+
+	if (!dev_num || dev_num > (PAGE_SIZE * 4) / sizeof(*di))
+		return -EINVAL;
+
+	size = sizeof(*dl) + dev_num * sizeof(*di);
+
+	if (!(dl = kmalloc(size, GFP_KERNEL)))
+		return -ENOMEM;
+
+	di = dl->dev_info;
+
+	read_lock_bh(&rfcomm_dev_lock);
+
+	list_for_each(p, &rfcomm_dev_list) {
+		struct rfcomm_dev *dev = list_entry(p, struct rfcomm_dev, list);
+		(di + n)->id      = dev->id;
+		(di + n)->flags   = dev->flags;
+		(di + n)->state   = dev->dlc->state;
+		(di + n)->channel = dev->channel;
+		bacpy(&(di + n)->src, &dev->src);
+		bacpy(&(di + n)->dst, &dev->dst);
+		if (++n >= dev_num)
+			break;
+	}
+
+	read_unlock_bh(&rfcomm_dev_lock);
+
+	dl->dev_num = n;
+	size = sizeof(*dl) + n * sizeof(*di);
+
+	err = copy_to_user(arg, dl, size);
+	kfree(dl);
+
+	return err ? -EFAULT : 0;
+}
+
+static int rfcomm_get_dev_info(void __user *arg)
+{
+	struct rfcomm_dev *dev;
+	struct rfcomm_dev_info di;
+	int err = 0;
+
+	BT_DBG("");
+
+	if (copy_from_user(&di, arg, sizeof(di)))
+		return -EFAULT;
+
+	if (!(dev = rfcomm_dev_get(di.id)))
+		return -ENODEV;
+
+	di.flags   = dev->flags;
+	di.channel = dev->channel;
+	di.state   = dev->dlc->state;
+	bacpy(&di.src, &dev->src);
+	bacpy(&di.dst, &dev->dst);
+
+	if (copy_to_user(arg, &di, sizeof(di)))
+		err = -EFAULT;
+
+	rfcomm_dev_put(dev);
+	return err;
+}
+
+int rfcomm_dev_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
+{
+	BT_DBG("cmd %d arg %p", cmd, arg);
+
+	switch (cmd) {
+	case RFCOMMCREATEDEV:
+		return rfcomm_create_dev(sk, arg);
+
+	case RFCOMMRELEASEDEV:
+		return rfcomm_release_dev(arg);
+
+	case RFCOMMGETDEVLIST:
+		return rfcomm_get_dev_list(arg);
+
+	case RFCOMMGETDEVINFO:
+		return rfcomm_get_dev_info(arg);
+	}
+
+	return -EINVAL;
+}
+
+/* ---- DLC callbacks ---- */
+static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb)
+{
+	struct rfcomm_dev *dev = dlc->owner;
+	struct tty_struct *tty;
+       
+	if (!dev || !(tty = dev->tty)) {
+		kfree_skb(skb);
+		return;
+	}
+
+	BT_DBG("dlc %p tty %p len %d", dlc, tty, skb->len);
+
+	if (test_bit(TTY_DONT_FLIP, &tty->flags)) {
+		register int i;
+		for (i = 0; i < skb->len; i++) {
+			if (tty->flip.count >= TTY_FLIPBUF_SIZE)
+				tty_flip_buffer_push(tty);
+
+			tty_insert_flip_char(tty, skb->data[i], 0);
+		}
+		tty_flip_buffer_push(tty);
+	} else
+		tty->ldisc.receive_buf(tty, skb->data, NULL, skb->len);
+
+	kfree_skb(skb);
+}
+
+static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err)
+{
+	struct rfcomm_dev *dev = dlc->owner;
+	if (!dev)
+		return;
+	
+	BT_DBG("dlc %p dev %p err %d", dlc, dev, err);
+
+	dev->err = err;
+	wake_up_interruptible(&dev->wait);
+
+	if (dlc->state == BT_CLOSED) {
+		if (!dev->tty) {
+			if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags)) {
+				rfcomm_dev_hold(dev);
+				rfcomm_dev_del(dev);
+
+				/* We have to drop DLC lock here, otherwise
+				   rfcomm_dev_put() will dead lock if it's
+				   the last reference. */
+				rfcomm_dlc_unlock(dlc);
+				rfcomm_dev_put(dev);
+				rfcomm_dlc_lock(dlc);
+			}
+		} else 
+			tty_hangup(dev->tty);
+	}
+}
+
+static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig)
+{
+	struct rfcomm_dev *dev = dlc->owner;
+	if (!dev)
+		return;
+	
+	BT_DBG("dlc %p dev %p v24_sig 0x%02x", dlc, dev, v24_sig);
+
+	dev->modem_status = 
+		((v24_sig & RFCOMM_V24_RTC) ? (TIOCM_DSR | TIOCM_DTR) : 0) |
+		((v24_sig & RFCOMM_V24_RTR) ? (TIOCM_RTS | TIOCM_CTS) : 0) |
+		((v24_sig & RFCOMM_V24_IC)  ? TIOCM_RI : 0) |
+		((v24_sig & RFCOMM_V24_DV)  ? TIOCM_CD : 0);
+}
+
+/* ---- TTY functions ---- */
+static void rfcomm_tty_wakeup(unsigned long arg)
+{
+	struct rfcomm_dev *dev = (void *) arg;
+	struct tty_struct *tty = dev->tty;
+	if (!tty)
+		return;
+
+	BT_DBG("dev %p tty %p", dev, tty);
+
+	if (test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags) && tty->ldisc.write_wakeup)
+                (tty->ldisc.write_wakeup)(tty);
+
+	wake_up_interruptible(&tty->write_wait);
+#ifdef SERIAL_HAVE_POLL_WAIT
+	wake_up_interruptible(&tty->poll_wait);
+#endif
+}
+
+static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	struct rfcomm_dev *dev;
+	struct rfcomm_dlc *dlc;
+	int err, id;
+
+        id = tty->index;
+
+	BT_DBG("tty %p id %d", tty, id);
+
+	/* We don't leak this refcount. For reasons which are not entirely
+	   clear, the TTY layer will call our ->close() method even if the
+	   open fails. We decrease the refcount there, and decreasing it
+	   here too would cause breakage. */
+	dev = rfcomm_dev_get(id);
+	if (!dev)
+		return -ENODEV;
+
+	BT_DBG("dev %p dst %s channel %d opened %d", dev, batostr(&dev->dst), dev->channel, dev->opened);
+
+	if (dev->opened++ != 0)
+		return 0;
+
+	dlc = dev->dlc;
+
+	/* Attach TTY and open DLC */
+
+	rfcomm_dlc_lock(dlc);
+	tty->driver_data = dev;
+	dev->tty = tty;
+	rfcomm_dlc_unlock(dlc);
+	set_bit(RFCOMM_TTY_ATTACHED, &dev->flags);
+
+	err = rfcomm_dlc_open(dlc, &dev->src, &dev->dst, dev->channel);
+	if (err < 0)
+		return err;
+
+	/* Wait for DLC to connect */
+	add_wait_queue(&dev->wait, &wait);
+	while (1) {
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		if (dlc->state == BT_CLOSED) {
+			err = -dev->err;
+			break;
+		}
+
+		if (dlc->state == BT_CONNECTED)
+			break;
+
+		if (signal_pending(current)) {
+			err = -EINTR;
+			break;
+		}
+
+		schedule();
+	}
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(&dev->wait, &wait);
+
+	return err;
+}
+
+static void rfcomm_tty_close(struct tty_struct *tty, struct file *filp)
+{
+	struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
+	if (!dev)
+		return;
+
+	BT_DBG("tty %p dev %p dlc %p opened %d", tty, dev, dev->dlc, dev->opened);
+
+	if (--dev->opened == 0) {
+		/* Close DLC and dettach TTY */
+		rfcomm_dlc_close(dev->dlc, 0);
+
+		clear_bit(RFCOMM_TTY_ATTACHED, &dev->flags);
+		tasklet_kill(&dev->wakeup_task);
+
+		rfcomm_dlc_lock(dev->dlc);
+		tty->driver_data = NULL;
+		dev->tty = NULL;
+		rfcomm_dlc_unlock(dev->dlc);
+	}
+
+	rfcomm_dev_put(dev);
+}
+
+static int rfcomm_tty_write(struct tty_struct *tty, const unsigned char *buf, int count)
+{
+	struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
+	struct rfcomm_dlc *dlc = dev->dlc;
+	struct sk_buff *skb;
+	int err = 0, sent = 0, size;
+
+	BT_DBG("tty %p count %d", tty, count);
+
+	while (count) {
+		size = min_t(uint, count, dlc->mtu);
+
+		skb = rfcomm_wmalloc(dev, size + RFCOMM_SKB_RESERVE, GFP_ATOMIC);
+		
+		if (!skb)
+			break;
+
+		skb_reserve(skb, RFCOMM_SKB_HEAD_RESERVE);
+
+		memcpy(skb_put(skb, size), buf + sent, size);
+
+		if ((err = rfcomm_dlc_send(dlc, skb)) < 0) {
+			kfree_skb(skb);
+			break;
+		}
+
+		sent  += size;
+		count -= size;
+	}
+
+	return sent ? sent : err;
+}
+
+static int rfcomm_tty_write_room(struct tty_struct *tty)
+{
+	struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
+	int room;
+
+	BT_DBG("tty %p", tty);
+
+	room = rfcomm_room(dev->dlc) - atomic_read(&dev->wmem_alloc);
+	if (room < 0)
+		room = 0;
+	return room;
+}
+
+static int rfcomm_tty_ioctl(struct tty_struct *tty, struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	BT_DBG("tty %p cmd 0x%02x", tty, cmd);
+
+	switch (cmd) {
+	case TCGETS:
+		BT_DBG("TCGETS is not supported");
+		return -ENOIOCTLCMD;
+
+	case TCSETS:
+		BT_DBG("TCSETS is not supported");
+		return -ENOIOCTLCMD;
+
+	case TIOCMIWAIT:
+		BT_DBG("TIOCMIWAIT");
+		break;
+
+	case TIOCGICOUNT:
+		BT_DBG("TIOCGICOUNT");
+		break;
+
+	case TIOCGSERIAL:
+		BT_ERR("TIOCGSERIAL is not supported");
+		return -ENOIOCTLCMD;
+
+	case TIOCSSERIAL:
+		BT_ERR("TIOCSSERIAL is not supported");
+		return -ENOIOCTLCMD;
+
+	case TIOCSERGSTRUCT:
+		BT_ERR("TIOCSERGSTRUCT is not supported");
+		return -ENOIOCTLCMD;
+
+	case TIOCSERGETLSR:
+		BT_ERR("TIOCSERGETLSR is not supported");
+		return -ENOIOCTLCMD;
+
+	case TIOCSERCONFIG:
+		BT_ERR("TIOCSERCONFIG is not supported");
+		return -ENOIOCTLCMD;
+
+	default:
+		return -ENOIOCTLCMD;	/* ioctls which we must ignore */
+
+	}
+
+	return -ENOIOCTLCMD;
+}
+
+#define RELEVANT_IFLAG(iflag) (iflag & (IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK))
+
+static void rfcomm_tty_set_termios(struct tty_struct *tty, struct termios *old)
+{
+	BT_DBG("tty %p", tty);
+
+	if ((tty->termios->c_cflag == old->c_cflag) &&
+		(RELEVANT_IFLAG(tty->termios->c_iflag) == RELEVANT_IFLAG(old->c_iflag)))
+		return;
+
+	/* handle turning off CRTSCTS */
+	if ((old->c_cflag & CRTSCTS) && !(tty->termios->c_cflag & CRTSCTS)) {
+		BT_DBG("turning off CRTSCTS");
+	}
+}
+
+static void rfcomm_tty_throttle(struct tty_struct *tty)
+{
+	struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
+
+	BT_DBG("tty %p dev %p", tty, dev);
+	
+	rfcomm_dlc_throttle(dev->dlc);
+}
+
+static void rfcomm_tty_unthrottle(struct tty_struct *tty)
+{
+	struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
+
+	BT_DBG("tty %p dev %p", tty, dev);
+	
+	rfcomm_dlc_unthrottle(dev->dlc);
+}
+
+static int rfcomm_tty_chars_in_buffer(struct tty_struct *tty)
+{
+	struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
+	struct rfcomm_dlc *dlc = dev->dlc;
+
+	BT_DBG("tty %p dev %p", tty, dev);
+
+	if (skb_queue_len(&dlc->tx_queue))
+		return dlc->mtu;
+
+	return 0;
+}
+
+static void rfcomm_tty_flush_buffer(struct tty_struct *tty)
+{
+	struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
+	if (!dev)
+		return;
+
+	BT_DBG("tty %p dev %p", tty, dev);
+
+	skb_queue_purge(&dev->dlc->tx_queue);
+
+	if (test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags) && tty->ldisc.write_wakeup)
+		tty->ldisc.write_wakeup(tty);
+}
+
+static void rfcomm_tty_send_xchar(struct tty_struct *tty, char ch)
+{
+	BT_DBG("tty %p ch %c", tty, ch);
+}
+
+static void rfcomm_tty_wait_until_sent(struct tty_struct *tty, int timeout)
+{
+	BT_DBG("tty %p timeout %d", tty, timeout);
+}
+
+static void rfcomm_tty_hangup(struct tty_struct *tty)
+{
+	struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
+	if (!dev)
+		return;
+
+	BT_DBG("tty %p dev %p", tty, dev);
+
+	rfcomm_tty_flush_buffer(tty);
+
+	if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags))
+		rfcomm_dev_del(dev);
+}
+
+static int rfcomm_tty_read_proc(char *buf, char **start, off_t offset, int len, int *eof, void *unused)
+{
+	return 0;
+}
+
+static int rfcomm_tty_tiocmget(struct tty_struct *tty, struct file *filp)
+{
+ 	struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
+
+	BT_DBG("tty %p dev %p", tty, dev);
+
+ 	return dev->modem_status;
+}
+
+static int rfcomm_tty_tiocmset(struct tty_struct *tty, struct file *filp, unsigned int set, unsigned int clear)
+{
+ 	struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
+ 	struct rfcomm_dlc *dlc = dev->dlc;
+ 	u8 v24_sig;
+
+	BT_DBG("tty %p dev %p set 0x%02x clear 0x%02x", tty, dev, set, clear);
+
+ 	rfcomm_dlc_get_modem_status(dlc, &v24_sig);
+
+ 	if (set & TIOCM_DSR || set & TIOCM_DTR)
+ 		v24_sig |= RFCOMM_V24_RTC;
+ 	if (set & TIOCM_RTS || set & TIOCM_CTS)
+ 		v24_sig |= RFCOMM_V24_RTR;
+ 	if (set & TIOCM_RI)
+ 		v24_sig |= RFCOMM_V24_IC;
+ 	if (set & TIOCM_CD)
+ 		v24_sig |= RFCOMM_V24_DV;
+
+ 	if (clear & TIOCM_DSR || clear & TIOCM_DTR)
+ 		v24_sig &= ~RFCOMM_V24_RTC;
+ 	if (clear & TIOCM_RTS || clear & TIOCM_CTS)
+ 		v24_sig &= ~RFCOMM_V24_RTR;
+ 	if (clear & TIOCM_RI)
+ 		v24_sig &= ~RFCOMM_V24_IC;
+ 	if (clear & TIOCM_CD)
+ 		v24_sig &= ~RFCOMM_V24_DV;
+
+ 	rfcomm_dlc_set_modem_status(dlc, v24_sig);
+
+ 	return 0;
+}
+
+/* ---- TTY structure ---- */
+
+static struct tty_operations rfcomm_ops = {
+	.open			= rfcomm_tty_open,
+	.close			= rfcomm_tty_close,
+	.write			= rfcomm_tty_write,
+	.write_room		= rfcomm_tty_write_room,
+	.chars_in_buffer	= rfcomm_tty_chars_in_buffer,
+	.flush_buffer		= rfcomm_tty_flush_buffer,
+	.ioctl			= rfcomm_tty_ioctl,
+	.throttle		= rfcomm_tty_throttle,
+	.unthrottle		= rfcomm_tty_unthrottle,
+	.set_termios		= rfcomm_tty_set_termios,
+	.send_xchar		= rfcomm_tty_send_xchar,
+	.hangup			= rfcomm_tty_hangup,
+	.wait_until_sent	= rfcomm_tty_wait_until_sent,
+	.read_proc		= rfcomm_tty_read_proc,
+	.tiocmget		= rfcomm_tty_tiocmget,
+	.tiocmset		= rfcomm_tty_tiocmset,
+};
+
+int rfcomm_init_ttys(void)
+{
+	rfcomm_tty_driver = alloc_tty_driver(RFCOMM_TTY_PORTS);
+	if (!rfcomm_tty_driver)
+		return -1;
+
+	rfcomm_tty_driver->owner	= THIS_MODULE;
+	rfcomm_tty_driver->driver_name	= "rfcomm";
+	rfcomm_tty_driver->devfs_name	= "bluetooth/rfcomm/";
+	rfcomm_tty_driver->name		= "rfcomm";
+	rfcomm_tty_driver->major	= RFCOMM_TTY_MAJOR;
+	rfcomm_tty_driver->minor_start	= RFCOMM_TTY_MINOR;
+	rfcomm_tty_driver->type		= TTY_DRIVER_TYPE_SERIAL;
+	rfcomm_tty_driver->subtype	= SERIAL_TYPE_NORMAL;
+	rfcomm_tty_driver->flags	= TTY_DRIVER_REAL_RAW | TTY_DRIVER_NO_DEVFS;
+	rfcomm_tty_driver->init_termios	= tty_std_termios;
+	rfcomm_tty_driver->init_termios.c_cflag	= B9600 | CS8 | CREAD | HUPCL | CLOCAL;
+	tty_set_operations(rfcomm_tty_driver, &rfcomm_ops);
+
+	if (tty_register_driver(rfcomm_tty_driver)) {
+		BT_ERR("Can't register RFCOMM TTY driver");
+		put_tty_driver(rfcomm_tty_driver);
+		return -1;
+	}
+
+	BT_INFO("RFCOMM TTY layer initialized");
+
+	return 0;
+}
+
+void rfcomm_cleanup_ttys(void)
+{
+	tty_unregister_driver(rfcomm_tty_driver);
+	put_tty_driver(rfcomm_tty_driver);
+}
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
new file mode 100644
index 00000000000..3e750ef09e6
--- /dev/null
+++ b/net/bluetooth/sco.c
@@ -0,0 +1,1071 @@
+/* 
+   BlueZ - Bluetooth protocol stack for Linux
+   Copyright (C) 2000-2001 Qualcomm Incorporated
+
+   Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License version 2 as
+   published by the Free Software Foundation;
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
+   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES 
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, 
+   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS 
+   SOFTWARE IS DISCLAIMED.
+*/
+
+/* Bluetooth SCO sockets. */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/poll.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/socket.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/list.h>
+#include <net/sock.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
+#include <net/bluetooth/sco.h>
+
+#ifndef CONFIG_BT_SCO_DEBUG
+#undef  BT_DBG
+#define BT_DBG(D...)
+#endif
+
+#define VERSION "0.4"
+
+static struct proto_ops sco_sock_ops;
+
+static struct bt_sock_list sco_sk_list = {
+	.lock = RW_LOCK_UNLOCKED
+};
+
+static void __sco_chan_add(struct sco_conn *conn, struct sock *sk, struct sock *parent);
+static void sco_chan_del(struct sock *sk, int err);
+
+static int  sco_conn_del(struct hci_conn *conn, int err);
+
+static void sco_sock_close(struct sock *sk);
+static void sco_sock_kill(struct sock *sk);
+
+/* ---- SCO timers ---- */
+static void sco_sock_timeout(unsigned long arg)
+{
+	struct sock *sk = (struct sock *) arg;
+
+	BT_DBG("sock %p state %d", sk, sk->sk_state);
+
+	bh_lock_sock(sk);
+	sk->sk_err = ETIMEDOUT;
+	sk->sk_state_change(sk);
+	bh_unlock_sock(sk);
+
+	sco_sock_kill(sk);
+	sock_put(sk);
+}
+
+static void sco_sock_set_timer(struct sock *sk, long timeout)
+{
+	BT_DBG("sock %p state %d timeout %ld", sk, sk->sk_state, timeout);
+	sk_reset_timer(sk, &sk->sk_timer, jiffies + timeout);
+}
+
+static void sco_sock_clear_timer(struct sock *sk)
+{
+	BT_DBG("sock %p state %d", sk, sk->sk_state);
+	sk_stop_timer(sk, &sk->sk_timer);
+}
+
+static void sco_sock_init_timer(struct sock *sk)
+{
+	init_timer(&sk->sk_timer);
+	sk->sk_timer.function = sco_sock_timeout;
+	sk->sk_timer.data = (unsigned long)sk;
+}
+
+/* ---- SCO connections ---- */
+static struct sco_conn *sco_conn_add(struct hci_conn *hcon, __u8 status)
+{
+	struct hci_dev *hdev = hcon->hdev;
+	struct sco_conn *conn;
+
+	if ((conn = hcon->sco_data))
+		return conn;
+
+	if (status)
+		return conn;
+
+	if (!(conn = kmalloc(sizeof(struct sco_conn), GFP_ATOMIC)))
+		return NULL;
+	memset(conn, 0, sizeof(struct sco_conn));
+
+	spin_lock_init(&conn->lock);
+
+	hcon->sco_data = conn;
+	conn->hcon = hcon;
+
+	conn->src = &hdev->bdaddr;
+	conn->dst = &hcon->dst;
+
+	if (hdev->sco_mtu > 0)
+		conn->mtu = hdev->sco_mtu;
+	else
+		conn->mtu = 60;
+
+	BT_DBG("hcon %p conn %p", hcon, conn);
+	return conn;
+}
+
+static inline struct sock *sco_chan_get(struct sco_conn *conn)
+{
+	struct sock *sk = NULL;
+	sco_conn_lock(conn);
+	sk = conn->sk;
+	sco_conn_unlock(conn);
+	return sk;
+}
+
+static int sco_conn_del(struct hci_conn *hcon, int err)
+{
+	struct sco_conn *conn;
+	struct sock *sk;
+
+	if (!(conn = hcon->sco_data)) 
+		return 0;
+
+	BT_DBG("hcon %p conn %p, err %d", hcon, conn, err);
+
+	/* Kill socket */
+	if ((sk = sco_chan_get(conn))) {
+		bh_lock_sock(sk);
+		sco_sock_clear_timer(sk);
+		sco_chan_del(sk, err);
+		bh_unlock_sock(sk);
+		sco_sock_kill(sk);
+	}
+
+	hcon->sco_data = NULL;
+	kfree(conn);
+	return 0;
+}
+
+static inline int sco_chan_add(struct sco_conn *conn, struct sock *sk, struct sock *parent)
+{
+	int err = 0;
+
+	sco_conn_lock(conn);
+	if (conn->sk) {
+		err = -EBUSY;
+	} else {
+		__sco_chan_add(conn, sk, parent);
+	}
+	sco_conn_unlock(conn);
+	return err;
+}
+
+static int sco_connect(struct sock *sk)
+{
+	bdaddr_t *src = &bt_sk(sk)->src;
+	bdaddr_t *dst = &bt_sk(sk)->dst;
+	struct sco_conn *conn;
+	struct hci_conn *hcon;
+	struct hci_dev  *hdev;
+	int err = 0;
+
+	BT_DBG("%s -> %s", batostr(src), batostr(dst));
+
+	if (!(hdev = hci_get_route(dst, src)))
+		return -EHOSTUNREACH;
+
+	hci_dev_lock_bh(hdev);
+
+	err = -ENOMEM;
+
+	hcon = hci_connect(hdev, SCO_LINK, dst);
+	if (!hcon)
+		goto done;
+
+	conn = sco_conn_add(hcon, 0);
+	if (!conn) {
+		hci_conn_put(hcon);
+		goto done;
+	}
+
+	/* Update source addr of the socket */
+	bacpy(src, conn->src);
+
+	err = sco_chan_add(conn, sk, NULL);
+	if (err)
+		goto done;
+
+	if (hcon->state == BT_CONNECTED) {
+		sco_sock_clear_timer(sk);
+		sk->sk_state = BT_CONNECTED;
+	} else {
+		sk->sk_state = BT_CONNECT;
+		sco_sock_set_timer(sk, sk->sk_sndtimeo);
+	}
+done:
+	hci_dev_unlock_bh(hdev);
+	hci_dev_put(hdev);
+	return err;
+}
+
+static inline int sco_send_frame(struct sock *sk, struct msghdr *msg, int len)
+{
+	struct sco_conn *conn = sco_pi(sk)->conn;
+	struct sk_buff *skb;
+	int err, count;
+
+	/* Check outgoing MTU */
+	if (len > conn->mtu)
+		return -EINVAL;
+
+	BT_DBG("sk %p len %d", sk, len);
+
+	count = min_t(unsigned int, conn->mtu, len);
+	if (!(skb = bt_skb_send_alloc(sk, count, msg->msg_flags & MSG_DONTWAIT, &err)))
+		return err;
+
+	if (memcpy_fromiovec(skb_put(skb, count), msg->msg_iov, count)) {
+		err = -EFAULT;
+		goto fail;
+	}
+
+	if ((err = hci_send_sco(conn->hcon, skb)) < 0)
+		goto fail;
+
+	return count;
+
+fail:
+	kfree_skb(skb);
+	return err;
+}
+
+static inline void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb)
+{
+	struct sock *sk = sco_chan_get(conn);
+
+	if (!sk)
+		goto drop;
+
+	BT_DBG("sk %p len %d", sk, skb->len);
+
+	if (sk->sk_state != BT_CONNECTED)
+		goto drop;
+
+	if (!sock_queue_rcv_skb(sk, skb))
+		return;
+
+drop:
+	kfree_skb(skb);
+	return;
+}
+
+/* -------- Socket interface ---------- */
+static struct sock *__sco_get_sock_by_addr(bdaddr_t *ba)
+{
+	struct sock *sk;
+	struct hlist_node *node;
+
+	sk_for_each(sk, node, &sco_sk_list.head)
+		if (!bacmp(&bt_sk(sk)->src, ba))
+			goto found;
+	sk = NULL;
+found:
+	return sk;
+}
+
+/* Find socket listening on source bdaddr.
+ * Returns closest match.
+ */
+static struct sock *sco_get_sock_listen(bdaddr_t *src)
+{
+	struct sock *sk = NULL, *sk1 = NULL;
+	struct hlist_node *node;
+
+	read_lock(&sco_sk_list.lock);
+
+	sk_for_each(sk, node, &sco_sk_list.head) {
+		if (sk->sk_state != BT_LISTEN)
+			continue;
+
+		/* Exact match. */
+		if (!bacmp(&bt_sk(sk)->src, src))
+			break;
+
+		/* Closest match */
+		if (!bacmp(&bt_sk(sk)->src, BDADDR_ANY))
+			sk1 = sk;
+	}
+
+	read_unlock(&sco_sk_list.lock);
+
+	return node ? sk : sk1;
+}
+
+static void sco_sock_destruct(struct sock *sk)
+{
+	BT_DBG("sk %p", sk);
+
+	skb_queue_purge(&sk->sk_receive_queue);
+	skb_queue_purge(&sk->sk_write_queue);
+}
+
+static void sco_sock_cleanup_listen(struct sock *parent)
+{
+	struct sock *sk;
+
+	BT_DBG("parent %p", parent);
+
+	/* Close not yet accepted channels */
+	while ((sk = bt_accept_dequeue(parent, NULL))) {
+		sco_sock_close(sk);
+		sco_sock_kill(sk);
+	}
+
+	parent->sk_state  = BT_CLOSED;
+	sock_set_flag(parent, SOCK_ZAPPED);
+}
+
+/* Kill socket (only if zapped and orphan)
+ * Must be called on unlocked socket.
+ */
+static void sco_sock_kill(struct sock *sk)
+{
+	if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket)
+		return;
+
+	BT_DBG("sk %p state %d", sk, sk->sk_state);
+
+	/* Kill poor orphan */
+	bt_sock_unlink(&sco_sk_list, sk);
+	sock_set_flag(sk, SOCK_DEAD);
+	sock_put(sk);
+}
+
+/* Close socket.
+ * Must be called on unlocked socket.
+ */
+static void sco_sock_close(struct sock *sk)
+{
+	struct sco_conn *conn;
+
+	sco_sock_clear_timer(sk);
+
+	lock_sock(sk);
+
+	conn = sco_pi(sk)->conn;
+
+	BT_DBG("sk %p state %d conn %p socket %p", sk, sk->sk_state, conn, sk->sk_socket);
+
+	switch (sk->sk_state) {
+	case BT_LISTEN:
+		sco_sock_cleanup_listen(sk);
+		break;
+
+	case BT_CONNECTED:
+	case BT_CONFIG:
+	case BT_CONNECT:
+	case BT_DISCONN:
+		sco_chan_del(sk, ECONNRESET);
+		break;
+
+	default:
+		sock_set_flag(sk, SOCK_ZAPPED);
+		break;
+	};
+
+	release_sock(sk);
+
+	sco_sock_kill(sk);
+}
+
+static void sco_sock_init(struct sock *sk, struct sock *parent)
+{
+	BT_DBG("sk %p", sk);
+
+	if (parent) 
+		sk->sk_type = parent->sk_type;
+}
+
+static struct proto sco_proto = {
+	.name		= "SCO",
+	.owner		= THIS_MODULE,
+	.obj_size	= sizeof(struct sco_pinfo)
+};
+
+static struct sock *sco_sock_alloc(struct socket *sock, int proto, int prio)
+{
+	struct sock *sk;
+
+	sk = sk_alloc(PF_BLUETOOTH, prio, &sco_proto, 1);
+	if (!sk)
+		return NULL;
+
+	sock_init_data(sock, sk);
+	INIT_LIST_HEAD(&bt_sk(sk)->accept_q);
+
+	sk->sk_destruct = sco_sock_destruct;
+	sk->sk_sndtimeo = SCO_CONN_TIMEOUT;
+
+	sock_reset_flag(sk, SOCK_ZAPPED);
+
+	sk->sk_protocol = proto;
+	sk->sk_state    = BT_OPEN;
+
+	sco_sock_init_timer(sk);
+
+	bt_sock_link(&sco_sk_list, sk);
+	return sk;
+}
+
+static int sco_sock_create(struct socket *sock, int protocol)
+{
+	struct sock *sk;
+
+	BT_DBG("sock %p", sock);
+
+	sock->state = SS_UNCONNECTED;
+
+	if (sock->type != SOCK_SEQPACKET)
+		return -ESOCKTNOSUPPORT;
+
+	sock->ops = &sco_sock_ops;
+
+	if (!(sk = sco_sock_alloc(sock, protocol, GFP_KERNEL)))
+		return -ENOMEM;
+
+	sco_sock_init(sk, NULL);
+	return 0;
+}
+
+static int sco_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
+{
+	struct sockaddr_sco *sa = (struct sockaddr_sco *) addr;
+	struct sock *sk = sock->sk;
+	bdaddr_t *src = &sa->sco_bdaddr;
+	int err = 0;
+
+	BT_DBG("sk %p %s", sk, batostr(&sa->sco_bdaddr));
+
+	if (!addr || addr->sa_family != AF_BLUETOOTH)
+		return -EINVAL;
+
+	lock_sock(sk);
+
+	if (sk->sk_state != BT_OPEN) {
+		err = -EBADFD;
+		goto done;
+	}
+
+	write_lock_bh(&sco_sk_list.lock);
+
+	if (bacmp(src, BDADDR_ANY) && __sco_get_sock_by_addr(src)) {
+		err = -EADDRINUSE;
+	} else {
+		/* Save source address */
+		bacpy(&bt_sk(sk)->src, &sa->sco_bdaddr);
+		sk->sk_state = BT_BOUND;
+	}
+
+	write_unlock_bh(&sco_sk_list.lock);
+
+done:
+	release_sock(sk);
+	return err;
+}
+
+static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags)
+{
+	struct sockaddr_sco *sa = (struct sockaddr_sco *) addr;
+	struct sock *sk = sock->sk;
+	int err = 0;
+
+
+	BT_DBG("sk %p", sk);
+
+	if (addr->sa_family != AF_BLUETOOTH || alen < sizeof(struct sockaddr_sco))
+		return -EINVAL;
+
+	if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND)
+		return -EBADFD;
+
+	if (sk->sk_type != SOCK_SEQPACKET)
+		return -EINVAL;
+
+	lock_sock(sk);
+
+	/* Set destination address and psm */
+	bacpy(&bt_sk(sk)->dst, &sa->sco_bdaddr);
+
+	if ((err = sco_connect(sk)))
+		goto done;
+
+	err = bt_sock_wait_state(sk, BT_CONNECTED, 
+			sock_sndtimeo(sk, flags & O_NONBLOCK));
+
+done:
+	release_sock(sk);
+	return err;
+}
+
+static int sco_sock_listen(struct socket *sock, int backlog)
+{
+	struct sock *sk = sock->sk;
+	int err = 0;
+
+	BT_DBG("sk %p backlog %d", sk, backlog);
+
+	lock_sock(sk);
+
+	if (sk->sk_state != BT_BOUND || sock->type != SOCK_SEQPACKET) {
+		err = -EBADFD;
+		goto done;
+	}
+
+	sk->sk_max_ack_backlog = backlog;
+	sk->sk_ack_backlog = 0;
+	sk->sk_state = BT_LISTEN;
+
+done:
+	release_sock(sk);
+	return err;
+}
+
+static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flags)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	struct sock *sk = sock->sk, *ch;
+	long timeo;
+	int err = 0;
+
+	lock_sock(sk);
+
+	if (sk->sk_state != BT_LISTEN) {
+		err = -EBADFD;
+		goto done;
+	}
+
+	timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+
+	BT_DBG("sk %p timeo %ld", sk, timeo);
+
+	/* Wait for an incoming connection. (wake-one). */
+	add_wait_queue_exclusive(sk->sk_sleep, &wait);
+	while (!(ch = bt_accept_dequeue(sk, newsock))) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (!timeo) {
+			err = -EAGAIN;
+			break;
+		}
+
+		release_sock(sk);
+		timeo = schedule_timeout(timeo);
+		lock_sock(sk);
+
+		if (sk->sk_state != BT_LISTEN) {
+			err = -EBADFD;
+			break;
+		}
+
+		if (signal_pending(current)) {
+			err = sock_intr_errno(timeo);
+			break;
+		}
+	}
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(sk->sk_sleep, &wait);
+
+	if (err)
+		goto done;
+
+	newsock->state = SS_CONNECTED;
+
+	BT_DBG("new socket %p", ch);
+
+done:
+	release_sock(sk);
+	return err;
+}
+
+static int sco_sock_getname(struct socket *sock, struct sockaddr *addr, int *len, int peer)
+{
+	struct sockaddr_sco *sa = (struct sockaddr_sco *) addr;
+	struct sock *sk = sock->sk;
+
+	BT_DBG("sock %p, sk %p", sock, sk);
+
+	addr->sa_family = AF_BLUETOOTH;
+	*len = sizeof(struct sockaddr_sco);
+
+	if (peer)
+		bacpy(&sa->sco_bdaddr, &bt_sk(sk)->dst);
+	else
+		bacpy(&sa->sco_bdaddr, &bt_sk(sk)->src);
+
+	return 0;
+}
+
+static int sco_sock_sendmsg(struct kiocb *iocb, struct socket *sock, 
+			    struct msghdr *msg, size_t len)
+{
+	struct sock *sk = sock->sk;
+	int err = 0;
+
+	BT_DBG("sock %p, sk %p", sock, sk);
+
+	if (sk->sk_err)
+		return sock_error(sk);
+
+	if (msg->msg_flags & MSG_OOB)
+		return -EOPNOTSUPP;
+
+	lock_sock(sk);
+
+	if (sk->sk_state == BT_CONNECTED)
+		err = sco_send_frame(sk, msg, len);
+	else
+		err = -ENOTCONN;
+
+	release_sock(sk);
+	return err;
+}
+
+static int sco_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
+{
+	struct sock *sk = sock->sk;
+	int err = 0;
+
+	BT_DBG("sk %p", sk);
+
+	lock_sock(sk);
+
+	switch (optname) {
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	release_sock(sk);
+	return err;
+}
+
+static int sco_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen)
+{
+	struct sock *sk = sock->sk;
+	struct sco_options opts;
+	struct sco_conninfo cinfo;
+	int len, err = 0; 
+
+	BT_DBG("sk %p", sk);
+
+	if (get_user(len, optlen))
+		return -EFAULT;
+
+	lock_sock(sk);
+
+	switch (optname) {
+	case SCO_OPTIONS:
+		if (sk->sk_state != BT_CONNECTED) {
+			err = -ENOTCONN;
+			break;
+		}
+
+		opts.mtu = sco_pi(sk)->conn->mtu;
+
+		BT_DBG("mtu %d", opts.mtu);
+
+		len = min_t(unsigned int, len, sizeof(opts));
+		if (copy_to_user(optval, (char *)&opts, len))
+			err = -EFAULT;
+
+		break;
+
+	case SCO_CONNINFO:
+		if (sk->sk_state != BT_CONNECTED) {
+			err = -ENOTCONN;
+			break;
+		}
+
+		cinfo.hci_handle = sco_pi(sk)->conn->hcon->handle;
+		memcpy(cinfo.dev_class, sco_pi(sk)->conn->hcon->dev_class, 3);
+
+		len = min_t(unsigned int, len, sizeof(cinfo));
+		if (copy_to_user(optval, (char *)&cinfo, len))
+			err = -EFAULT;
+
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	release_sock(sk);
+	return err;
+}
+
+static int sco_sock_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	int err = 0;
+
+	BT_DBG("sock %p, sk %p", sock, sk);
+
+	if (!sk)
+		return 0;
+
+	sco_sock_close(sk);
+
+	if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime) {
+		lock_sock(sk);
+		err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime);
+		release_sock(sk);
+	}
+
+	sock_orphan(sk);
+	sco_sock_kill(sk);
+	return err;
+}
+
+static void __sco_chan_add(struct sco_conn *conn, struct sock *sk, struct sock *parent)
+{
+	BT_DBG("conn %p", conn);
+
+	sco_pi(sk)->conn = conn;
+	conn->sk = sk;
+
+	if (parent)
+		bt_accept_enqueue(parent, sk);
+}
+
+/* Delete channel. 
+ * Must be called on the locked socket. */
+static void sco_chan_del(struct sock *sk, int err)
+{
+	struct sco_conn *conn;
+
+	conn = sco_pi(sk)->conn;
+
+	BT_DBG("sk %p, conn %p, err %d", sk, conn, err);
+
+	if (conn) { 
+		sco_conn_lock(conn);
+		conn->sk = NULL;
+		sco_pi(sk)->conn = NULL;
+		sco_conn_unlock(conn);
+		hci_conn_put(conn->hcon);
+	}
+
+	sk->sk_state = BT_CLOSED;
+	sk->sk_err   = err;
+	sk->sk_state_change(sk);
+
+	sock_set_flag(sk, SOCK_ZAPPED);
+}
+
+static void sco_conn_ready(struct sco_conn *conn)
+{
+	struct sock *parent, *sk;
+
+	BT_DBG("conn %p", conn);
+
+	sco_conn_lock(conn);
+
+	if ((sk = conn->sk)) {
+		sco_sock_clear_timer(sk);
+		bh_lock_sock(sk);
+		sk->sk_state = BT_CONNECTED;
+		sk->sk_state_change(sk);
+		bh_unlock_sock(sk);
+	} else {
+		parent = sco_get_sock_listen(conn->src);
+		if (!parent)
+			goto done;
+
+		bh_lock_sock(parent);
+
+		sk = sco_sock_alloc(NULL, BTPROTO_SCO, GFP_ATOMIC);
+		if (!sk) {
+			bh_unlock_sock(parent);
+			goto done;
+		}
+
+		sco_sock_init(sk, parent);
+
+		bacpy(&bt_sk(sk)->src, conn->src);
+		bacpy(&bt_sk(sk)->dst, conn->dst);
+
+		hci_conn_hold(conn->hcon);
+		__sco_chan_add(conn, sk, parent);
+
+		sk->sk_state = BT_CONNECTED;
+
+		/* Wake up parent */
+		parent->sk_data_ready(parent, 1);
+
+		bh_unlock_sock(parent);
+	}
+
+done:
+	sco_conn_unlock(conn);
+}
+
+/* ----- SCO interface with lower layer (HCI) ----- */
+static int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 type)
+{
+	BT_DBG("hdev %s, bdaddr %s", hdev->name, batostr(bdaddr));
+
+	/* Always accept connection */
+	return HCI_LM_ACCEPT;
+}
+
+static int sco_connect_cfm(struct hci_conn *hcon, __u8 status)
+{
+	BT_DBG("hcon %p bdaddr %s status %d", hcon, batostr(&hcon->dst), status);
+
+	if (hcon->type != SCO_LINK)
+		return 0;
+
+	if (!status) {
+		struct sco_conn *conn;
+
+		conn = sco_conn_add(hcon, status);
+		if (conn)
+			sco_conn_ready(conn);
+	} else 
+		sco_conn_del(hcon, bt_err(status));
+
+	return 0;
+}
+
+static int sco_disconn_ind(struct hci_conn *hcon, __u8 reason)
+{
+	BT_DBG("hcon %p reason %d", hcon, reason);
+
+	if (hcon->type != SCO_LINK)
+		return 0;
+
+	sco_conn_del(hcon, bt_err(reason));
+	return 0;
+}
+
+static int sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb)
+{
+	struct sco_conn *conn = hcon->sco_data;
+
+	if (!conn)
+		goto drop;
+
+	BT_DBG("conn %p len %d", conn, skb->len);
+
+	if (skb->len) {
+		sco_recv_frame(conn, skb);
+		return 0;
+	}
+
+drop:
+	kfree_skb(skb);	
+	return 0;
+}
+
+/* ---- Proc fs support ---- */
+#ifdef CONFIG_PROC_FS
+static void *sco_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct sock *sk;
+	struct hlist_node *node;
+	loff_t l = *pos;
+
+	read_lock_bh(&sco_sk_list.lock);
+
+	sk_for_each(sk, node, &sco_sk_list.head)
+		if (!l--)
+			goto found;
+	sk = NULL;
+found:
+	return sk;
+}
+
+static void *sco_seq_next(struct seq_file *seq, void *e, loff_t *pos)
+{
+	struct sock *sk = e;
+	(*pos)++;
+	return sk_next(sk);
+}
+
+static void sco_seq_stop(struct seq_file *seq, void *e)
+{
+	read_unlock_bh(&sco_sk_list.lock);
+}
+
+static int  sco_seq_show(struct seq_file *seq, void *e)
+{
+	struct sock *sk = e;
+	seq_printf(seq, "%s %s %d\n",
+			batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst), sk->sk_state);
+	return 0;
+}
+
+static struct seq_operations sco_seq_ops = {
+	.start	= sco_seq_start,
+	.next	= sco_seq_next,
+	.stop	= sco_seq_stop,
+	.show	= sco_seq_show 
+};
+
+static int sco_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &sco_seq_ops);
+}
+
+static struct file_operations sco_seq_fops = {
+	.owner		= THIS_MODULE,
+	.open		= sco_seq_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static int __init sco_proc_init(void)
+{
+	struct proc_dir_entry *p = create_proc_entry("sco", S_IRUGO, proc_bt);
+	if (!p)
+		return -ENOMEM;
+	p->owner     = THIS_MODULE;
+	p->proc_fops = &sco_seq_fops;
+	return 0;
+}
+
+static void __exit sco_proc_cleanup(void)
+{
+	remove_proc_entry("sco", proc_bt);
+}
+
+#else /* CONFIG_PROC_FS */
+
+static int __init sco_proc_init(void)
+{
+	return 0;
+}
+
+static void __exit sco_proc_cleanup(void)
+{
+	return;
+}
+#endif /* CONFIG_PROC_FS */
+
+static struct proto_ops sco_sock_ops = {
+	.family		= PF_BLUETOOTH,
+	.owner		= THIS_MODULE,
+	.release	= sco_sock_release,
+	.bind		= sco_sock_bind,
+	.connect	= sco_sock_connect,
+	.listen		= sco_sock_listen,
+	.accept		= sco_sock_accept,
+	.getname	= sco_sock_getname,
+	.sendmsg	= sco_sock_sendmsg,
+	.recvmsg	= bt_sock_recvmsg,
+	.poll		= bt_sock_poll,
+	.ioctl		= sock_no_ioctl,
+	.mmap		= sock_no_mmap,
+	.socketpair	= sock_no_socketpair,
+	.shutdown	= sock_no_shutdown,
+	.setsockopt	= sco_sock_setsockopt,
+	.getsockopt	= sco_sock_getsockopt
+};
+
+static struct net_proto_family sco_sock_family_ops = {
+	.family	= PF_BLUETOOTH,
+	.owner	= THIS_MODULE,
+	.create	= sco_sock_create,
+};
+
+static struct hci_proto sco_hci_proto = {
+	.name		= "SCO",
+	.id		= HCI_PROTO_SCO,
+	.connect_ind	= sco_connect_ind,
+	.connect_cfm	= sco_connect_cfm,
+	.disconn_ind	= sco_disconn_ind,
+	.recv_scodata	= sco_recv_scodata
+};
+
+static int __init sco_init(void)
+{
+	int err;
+
+	err = proto_register(&sco_proto, 0);
+	if (err < 0)
+		return err;
+
+	err = bt_sock_register(BTPROTO_SCO, &sco_sock_family_ops);
+	if (err < 0) {
+		BT_ERR("SCO socket registration failed");
+		goto error;
+	}
+
+	err = hci_register_proto(&sco_hci_proto);
+	if (err < 0) {
+		BT_ERR("SCO protocol registration failed");
+		bt_sock_unregister(BTPROTO_SCO);
+		goto error;
+	}
+
+	sco_proc_init();
+
+	BT_INFO("SCO (Voice Link) ver %s", VERSION);
+	BT_INFO("SCO socket layer initialized");
+
+	return 0;
+
+error:
+	proto_unregister(&sco_proto);
+	return err;
+}
+
+static void __exit sco_exit(void)
+{
+	sco_proc_cleanup();
+
+	if (bt_sock_unregister(BTPROTO_SCO) < 0)
+		BT_ERR("SCO socket unregistration failed");
+
+	if (hci_unregister_proto(&sco_hci_proto) < 0)
+		BT_ERR("SCO protocol unregistration failed");
+
+	proto_unregister(&sco_proto);
+}
+
+module_init(sco_init);
+module_exit(sco_exit);
+
+MODULE_AUTHOR("Maxim Krasnyansky <maxk@qualcomm.com>, Marcel Holtmann <marcel@holtmann.org>");
+MODULE_DESCRIPTION("Bluetooth SCO ver " VERSION);
+MODULE_VERSION(VERSION);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("bt-proto-2");
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
new file mode 100644
index 00000000000..59556e40e14
--- /dev/null
+++ b/net/bridge/Makefile
@@ -0,0 +1,15 @@
+#
+# Makefile for the IEEE 802.1d ethernet bridging layer.
+#
+
+obj-$(CONFIG_BRIDGE) += bridge.o
+
+bridge-y	:= br.o br_device.o br_fdb.o br_forward.o br_if.o br_input.o \
+			br_ioctl.o br_notify.o br_stp.o br_stp_bpdu.o \
+			br_stp_if.o br_stp_timer.o
+
+bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o
+
+bridge-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o
+
+obj-$(CONFIG_BRIDGE_NF_EBTABLES) += netfilter/
diff --git a/net/bridge/br.c b/net/bridge/br.c
new file mode 100644
index 00000000000..f8f184942aa
--- /dev/null
+++ b/net/bridge/br.c
@@ -0,0 +1,69 @@
+/*
+ *	Generic parts
+ *	Linux ethernet bridge
+ *
+ *	Authors:
+ *	Lennert Buytenhek		<buytenh@gnu.org>
+ *
+ *	$Id: br.c,v 1.47 2001/12/24 00:56:41 davem Exp $
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/init.h>
+
+#include "br_private.h"
+
+int (*br_should_route_hook) (struct sk_buff **pskb) = NULL;
+
+static int __init br_init(void)
+{
+	br_fdb_init();
+
+#ifdef CONFIG_BRIDGE_NETFILTER
+	if (br_netfilter_init())
+		return 1;
+#endif
+	brioctl_set(br_ioctl_deviceless_stub);
+	br_handle_frame_hook = br_handle_frame;
+
+	br_fdb_get_hook = br_fdb_get;
+	br_fdb_put_hook = br_fdb_put;
+
+	register_netdevice_notifier(&br_device_notifier);
+
+	return 0;
+}
+
+static void __exit br_deinit(void)
+{
+#ifdef CONFIG_BRIDGE_NETFILTER
+	br_netfilter_fini();
+#endif
+	unregister_netdevice_notifier(&br_device_notifier);
+	brioctl_set(NULL);
+
+	br_cleanup_bridges();
+
+	synchronize_net();
+
+	br_fdb_get_hook = NULL;
+	br_fdb_put_hook = NULL;
+
+	br_handle_frame_hook = NULL;
+	br_fdb_fini();
+}
+
+EXPORT_SYMBOL(br_should_route_hook);
+
+module_init(br_init)
+module_exit(br_deinit)
+MODULE_LICENSE("GPL");
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
new file mode 100644
index 00000000000..d9b72fde433
--- /dev/null
+++ b/net/bridge/br_device.c
@@ -0,0 +1,104 @@
+/*
+ *	Device handling code
+ *	Linux ethernet bridge
+ *
+ *	Authors:
+ *	Lennert Buytenhek		<buytenh@gnu.org>
+ *
+ *	$Id: br_device.c,v 1.6 2001/12/24 00:59:55 davem Exp $
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include "br_private.h"
+
+static struct net_device_stats *br_dev_get_stats(struct net_device *dev)
+{
+	struct net_bridge *br;
+
+	br = dev->priv;
+
+	return &br->statistics;
+}
+
+int br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct net_bridge *br = netdev_priv(dev);
+	const unsigned char *dest = skb->data;
+	struct net_bridge_fdb_entry *dst;
+
+	br->statistics.tx_packets++;
+	br->statistics.tx_bytes += skb->len;
+
+	skb->mac.raw = skb->data;
+	skb_pull(skb, ETH_HLEN);
+
+	rcu_read_lock();
+	if (dest[0] & 1) 
+		br_flood_deliver(br, skb, 0);
+	else if ((dst = __br_fdb_get(br, dest)) != NULL)
+		br_deliver(dst->dst, skb);
+	else
+		br_flood_deliver(br, skb, 0);
+
+	rcu_read_unlock();
+	return 0;
+}
+
+static int br_dev_open(struct net_device *dev)
+{
+	netif_start_queue(dev);
+
+	br_stp_enable_bridge(dev->priv);
+
+	return 0;
+}
+
+static void br_dev_set_multicast_list(struct net_device *dev)
+{
+}
+
+static int br_dev_stop(struct net_device *dev)
+{
+	br_stp_disable_bridge(dev->priv);
+
+	netif_stop_queue(dev);
+
+	return 0;
+}
+
+static int br_change_mtu(struct net_device *dev, int new_mtu)
+{
+	if ((new_mtu < 68) || new_mtu > br_min_mtu(dev->priv))
+		return -EINVAL;
+
+	dev->mtu = new_mtu;
+	return 0;
+}
+
+void br_dev_setup(struct net_device *dev)
+{
+	memset(dev->dev_addr, 0, ETH_ALEN);
+
+	ether_setup(dev);
+
+	dev->do_ioctl = br_dev_ioctl;
+	dev->get_stats = br_dev_get_stats;
+	dev->hard_start_xmit = br_dev_xmit;
+	dev->open = br_dev_open;
+	dev->set_multicast_list = br_dev_set_multicast_list;
+	dev->change_mtu = br_change_mtu;
+	dev->destructor = free_netdev;
+	SET_MODULE_OWNER(dev);
+	dev->stop = br_dev_stop;
+	dev->tx_queue_len = 0;
+	dev->set_mac_address = NULL;
+	dev->priv_flags = IFF_EBRIDGE;
+}
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
new file mode 100644
index 00000000000..e6c2200b7ca
--- /dev/null
+++ b/net/bridge/br_fdb.c
@@ -0,0 +1,368 @@
+/*
+ *	Forwarding database
+ *	Linux ethernet bridge
+ *
+ *	Authors:
+ *	Lennert Buytenhek		<buytenh@gnu.org>
+ *
+ *	$Id: br_fdb.c,v 1.6 2002/01/17 00:57:07 davem Exp $
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/times.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/jhash.h>
+#include <asm/atomic.h>
+#include "br_private.h"
+
+static kmem_cache_t *br_fdb_cache;
+static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
+		      const unsigned char *addr);
+
+void __init br_fdb_init(void)
+{
+	br_fdb_cache = kmem_cache_create("bridge_fdb_cache",
+					 sizeof(struct net_bridge_fdb_entry),
+					 0,
+					 SLAB_HWCACHE_ALIGN, NULL, NULL);
+}
+
+void __exit br_fdb_fini(void)
+{
+	kmem_cache_destroy(br_fdb_cache);
+}
+
+
+/* if topology_changing then use forward_delay (default 15 sec)
+ * otherwise keep longer (default 5 minutes)
+ */
+static __inline__ unsigned long hold_time(const struct net_bridge *br)
+{
+	return br->topology_change ? br->forward_delay : br->ageing_time;
+}
+
+static __inline__ int has_expired(const struct net_bridge *br,
+				  const struct net_bridge_fdb_entry *fdb)
+{
+	return !fdb->is_static 
+		&& time_before_eq(fdb->ageing_timer + hold_time(br), jiffies);
+}
+
+static __inline__ int br_mac_hash(const unsigned char *mac)
+{
+	return jhash(mac, ETH_ALEN, 0) & (BR_HASH_SIZE - 1);
+}
+
+static __inline__ void fdb_delete(struct net_bridge_fdb_entry *f)
+{
+	hlist_del_rcu(&f->hlist);
+	br_fdb_put(f);
+}
+
+void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
+{
+	struct net_bridge *br = p->br;
+	int i;
+	
+	spin_lock_bh(&br->hash_lock);
+
+	/* Search all chains since old address/hash is unknown */
+	for (i = 0; i < BR_HASH_SIZE; i++) {
+		struct hlist_node *h;
+		hlist_for_each(h, &br->hash[i]) {
+			struct net_bridge_fdb_entry *f;
+
+			f = hlist_entry(h, struct net_bridge_fdb_entry, hlist);
+			if (f->dst == p && f->is_local) {
+				/* maybe another port has same hw addr? */
+				struct net_bridge_port *op;
+				list_for_each_entry(op, &br->port_list, list) {
+					if (op != p && 
+					    !memcmp(op->dev->dev_addr,
+						    f->addr.addr, ETH_ALEN)) {
+						f->dst = op;
+						goto insert;
+					}
+				}
+
+				/* delete old one */
+				fdb_delete(f);
+				goto insert;
+			}
+		}
+	}
+ insert:
+	/* insert new address,  may fail if invalid address or dup. */
+	fdb_insert(br, p, newaddr);
+
+	spin_unlock_bh(&br->hash_lock);
+}
+
+void br_fdb_cleanup(unsigned long _data)
+{
+	struct net_bridge *br = (struct net_bridge *)_data;
+	unsigned long delay = hold_time(br);
+	int i;
+
+	spin_lock_bh(&br->hash_lock);
+	for (i = 0; i < BR_HASH_SIZE; i++) {
+		struct net_bridge_fdb_entry *f;
+		struct hlist_node *h, *n;
+
+		hlist_for_each_entry_safe(f, h, n, &br->hash[i], hlist) {
+			if (!f->is_static && 
+			    time_before_eq(f->ageing_timer + delay, jiffies)) 
+				fdb_delete(f);
+		}
+	}
+	spin_unlock_bh(&br->hash_lock);
+
+	mod_timer(&br->gc_timer, jiffies + HZ/10);
+}
+
+void br_fdb_delete_by_port(struct net_bridge *br, struct net_bridge_port *p)
+{
+	int i;
+
+	spin_lock_bh(&br->hash_lock);
+	for (i = 0; i < BR_HASH_SIZE; i++) {
+		struct hlist_node *h, *g;
+		
+		hlist_for_each_safe(h, g, &br->hash[i]) {
+			struct net_bridge_fdb_entry *f
+				= hlist_entry(h, struct net_bridge_fdb_entry, hlist);
+			if (f->dst != p) 
+				continue;
+
+			/*
+			 * if multiple ports all have the same device address
+			 * then when one port is deleted, assign
+			 * the local entry to other port
+			 */
+			if (f->is_local) {
+				struct net_bridge_port *op;
+				list_for_each_entry(op, &br->port_list, list) {
+					if (op != p && 
+					    !memcmp(op->dev->dev_addr,
+						    f->addr.addr, ETH_ALEN)) {
+						f->dst = op;
+						goto skip_delete;
+					}
+				}
+			}
+
+			fdb_delete(f);
+		skip_delete: ;
+		}
+	}
+	spin_unlock_bh(&br->hash_lock);
+}
+
+/* No locking or refcounting, assumes caller has no preempt (rcu_read_lock) */
+struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
+					  const unsigned char *addr)
+{
+	struct hlist_node *h;
+	struct net_bridge_fdb_entry *fdb;
+
+	hlist_for_each_entry_rcu(fdb, h, &br->hash[br_mac_hash(addr)], hlist) {
+		if (!memcmp(fdb->addr.addr, addr, ETH_ALEN)) {
+			if (unlikely(has_expired(br, fdb)))
+				break;
+			return fdb;
+		}
+	}
+
+	return NULL;
+}
+
+/* Interface used by ATM hook that keeps a ref count */
+struct net_bridge_fdb_entry *br_fdb_get(struct net_bridge *br, 
+					unsigned char *addr)
+{
+	struct net_bridge_fdb_entry *fdb;
+
+	rcu_read_lock();
+	fdb = __br_fdb_get(br, addr);
+	if (fdb) 
+		atomic_inc(&fdb->use_count);
+	rcu_read_unlock();
+	return fdb;
+}
+
+static void fdb_rcu_free(struct rcu_head *head)
+{
+	struct net_bridge_fdb_entry *ent
+		= container_of(head, struct net_bridge_fdb_entry, rcu);
+	kmem_cache_free(br_fdb_cache, ent);
+}
+
+/* Set entry up for deletion with RCU  */
+void br_fdb_put(struct net_bridge_fdb_entry *ent)
+{
+	if (atomic_dec_and_test(&ent->use_count))
+		call_rcu(&ent->rcu, fdb_rcu_free);
+}
+
+/*
+ * Fill buffer with forwarding table records in 
+ * the API format.
+ */
+int br_fdb_fillbuf(struct net_bridge *br, void *buf,
+		   unsigned long maxnum, unsigned long skip)
+{
+	struct __fdb_entry *fe = buf;
+	int i, num = 0;
+	struct hlist_node *h;
+	struct net_bridge_fdb_entry *f;
+
+	memset(buf, 0, maxnum*sizeof(struct __fdb_entry));
+
+	rcu_read_lock();
+	for (i = 0; i < BR_HASH_SIZE; i++) {
+		hlist_for_each_entry_rcu(f, h, &br->hash[i], hlist) {
+			if (num >= maxnum)
+				goto out;
+
+			if (has_expired(br, f)) 
+				continue;
+
+			if (skip) {
+				--skip;
+				continue;
+			}
+
+			/* convert from internal format to API */
+			memcpy(fe->mac_addr, f->addr.addr, ETH_ALEN);
+			fe->port_no = f->dst->port_no;
+			fe->is_local = f->is_local;
+			if (!f->is_static)
+				fe->ageing_timer_value = jiffies_to_clock_t(jiffies - f->ageing_timer);
+			++fe;
+			++num;
+		}
+	}
+
+ out:
+	rcu_read_unlock();
+
+	return num;
+}
+
+static inline struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head,
+						    const unsigned char *addr)
+{
+	struct hlist_node *h;
+	struct net_bridge_fdb_entry *fdb;
+
+	hlist_for_each_entry_rcu(fdb, h, head, hlist) {
+		if (!memcmp(fdb->addr.addr, addr, ETH_ALEN))
+			return fdb;
+	}
+	return NULL;
+}
+
+static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
+					       struct net_bridge_port *source,
+					       const unsigned char *addr, 
+					       int is_local)
+{
+	struct net_bridge_fdb_entry *fdb;
+
+	fdb = kmem_cache_alloc(br_fdb_cache, GFP_ATOMIC);
+	if (fdb) {
+		memcpy(fdb->addr.addr, addr, ETH_ALEN);
+		atomic_set(&fdb->use_count, 1);
+		hlist_add_head_rcu(&fdb->hlist, head);
+
+		fdb->dst = source;
+		fdb->is_local = is_local;
+		fdb->is_static = is_local;
+		fdb->ageing_timer = jiffies;
+	}
+	return fdb;
+}
+
+static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
+		  const unsigned char *addr)
+{
+	struct hlist_head *head = &br->hash[br_mac_hash(addr)];
+	struct net_bridge_fdb_entry *fdb;
+
+	if (!is_valid_ether_addr(addr))
+		return -EINVAL;
+
+	fdb = fdb_find(head, addr);
+	if (fdb) {
+		/* it is okay to have multiple ports with same 
+		 * address, just use the first one.
+		 */
+		if (fdb->is_local) 
+			return 0;
+
+		printk(KERN_WARNING "%s adding interface with same address "
+		       "as a received packet\n",
+		       source->dev->name);
+		fdb_delete(fdb);
+ 	}
+
+	if (!fdb_create(head, source, addr, 1))
+		return -ENOMEM;
+
+	return 0;
+}
+
+int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
+		  const unsigned char *addr)
+{
+	int ret;
+
+	spin_lock_bh(&br->hash_lock);
+	ret = fdb_insert(br, source, addr);
+	spin_unlock_bh(&br->hash_lock);
+	return ret;
+}
+
+void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
+		   const unsigned char *addr)
+{
+	struct hlist_head *head = &br->hash[br_mac_hash(addr)];
+	struct net_bridge_fdb_entry *fdb;
+
+	/* some users want to always flood. */
+	if (hold_time(br) == 0)
+		return;
+
+	rcu_read_lock();
+	fdb = fdb_find(head, addr);
+	if (likely(fdb)) {
+		/* attempt to update an entry for a local interface */
+		if (unlikely(fdb->is_local)) {
+			if (net_ratelimit()) 
+				printk(KERN_WARNING "%s: received packet with "
+				       " own address as source address\n",
+				       source->dev->name);
+		} else {
+			/* fastpath: update of existing entry */
+			fdb->dst = source;
+			fdb->ageing_timer = jiffies;
+		}
+	} else {
+		spin_lock_bh(&br->hash_lock);
+		if (!fdb_find(head, addr))
+			fdb_create(head, source, addr, 0);
+		/* else  we lose race and someone else inserts
+		 * it first, don't bother updating
+		 */
+		spin_unlock_bh(&br->hash_lock);
+	}
+	rcu_read_unlock();
+}
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
new file mode 100644
index 00000000000..ef9f2095f96
--- /dev/null
+++ b/net/bridge/br_forward.c
@@ -0,0 +1,159 @@
+/*
+ *	Forwarding decision
+ *	Linux ethernet bridge
+ *
+ *	Authors:
+ *	Lennert Buytenhek		<buytenh@gnu.org>
+ *
+ *	$Id: br_forward.c,v 1.4 2001/08/14 22:05:57 davem Exp $
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter_bridge.h>
+#include "br_private.h"
+
+static inline int should_deliver(const struct net_bridge_port *p, 
+				 const struct sk_buff *skb)
+{
+	if (skb->dev == p->dev ||
+	    p->state != BR_STATE_FORWARDING)
+		return 0;
+
+	return 1;
+}
+
+int br_dev_queue_push_xmit(struct sk_buff *skb)
+{
+	if (skb->len > skb->dev->mtu) 
+		kfree_skb(skb);
+	else {
+#ifdef CONFIG_BRIDGE_NETFILTER
+		/* ip_refrag calls ip_fragment, doesn't copy the MAC header. */
+		nf_bridge_maybe_copy_header(skb);
+#endif
+		skb_push(skb, ETH_HLEN);
+
+		dev_queue_xmit(skb);
+	}
+
+	return 0;
+}
+
+int br_forward_finish(struct sk_buff *skb)
+{
+	NF_HOOK(PF_BRIDGE, NF_BR_POST_ROUTING, skb, NULL, skb->dev,
+			br_dev_queue_push_xmit);
+
+	return 0;
+}
+
+static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
+{
+	skb->dev = to->dev;
+#ifdef CONFIG_NETFILTER_DEBUG
+	skb->nf_debug = 0;
+#endif
+	NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
+			br_forward_finish);
+}
+
+static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
+{
+	struct net_device *indev;
+
+	indev = skb->dev;
+	skb->dev = to->dev;
+	skb->ip_summed = CHECKSUM_NONE;
+
+	NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev,
+			br_forward_finish);
+}
+
+/* called with rcu_read_lock */
+void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
+{
+	if (should_deliver(to, skb)) {
+		__br_deliver(to, skb);
+		return;
+	}
+
+	kfree_skb(skb);
+}
+
+/* called with rcu_read_lock */
+void br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
+{
+	if (should_deliver(to, skb)) {
+		__br_forward(to, skb);
+		return;
+	}
+
+	kfree_skb(skb);
+}
+
+/* called under bridge lock */
+static void br_flood(struct net_bridge *br, struct sk_buff *skb, int clone,
+	void (*__packet_hook)(const struct net_bridge_port *p, 
+			      struct sk_buff *skb))
+{
+	struct net_bridge_port *p;
+	struct net_bridge_port *prev;
+
+	if (clone) {
+		struct sk_buff *skb2;
+
+		if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) {
+			br->statistics.tx_dropped++;
+			return;
+		}
+
+		skb = skb2;
+	}
+
+	prev = NULL;
+
+	list_for_each_entry_rcu(p, &br->port_list, list) {
+		if (should_deliver(p, skb)) {
+			if (prev != NULL) {
+				struct sk_buff *skb2;
+
+				if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) {
+					br->statistics.tx_dropped++;
+					kfree_skb(skb);
+					return;
+				}
+
+				__packet_hook(prev, skb2);
+			}
+
+			prev = p;
+		}
+	}
+
+	if (prev != NULL) {
+		__packet_hook(prev, skb);
+		return;
+	}
+
+	kfree_skb(skb);
+}
+
+
+/* called with rcu_read_lock */
+void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, int clone)
+{
+	br_flood(br, skb, clone, __br_deliver);
+}
+
+/* called under bridge lock */
+void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, int clone)
+{
+	br_flood(br, skb, clone, __br_forward);
+}
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
new file mode 100644
index 00000000000..69872bf3b87
--- /dev/null
+++ b/net/bridge/br_if.c
@@ -0,0 +1,388 @@
+/*
+ *	Userspace interface
+ *	Linux ethernet bridge
+ *
+ *	Authors:
+ *	Lennert Buytenhek		<buytenh@gnu.org>
+ *
+ *	$Id: br_if.c,v 1.7 2001/12/24 00:59:55 davem Exp $
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/ethtool.h>
+#include <linux/if_arp.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/rtnetlink.h>
+#include <net/sock.h>
+
+#include "br_private.h"
+
+/*
+ * Determine initial path cost based on speed.
+ * using recommendations from 802.1d standard
+ *
+ * Need to simulate user ioctl because not all device's that support
+ * ethtool, use ethtool_ops.  Also, since driver might sleep need to
+ * not be holding any locks.
+ */
+static int br_initial_port_cost(struct net_device *dev)
+{
+
+	struct ethtool_cmd ecmd = { ETHTOOL_GSET };
+	struct ifreq ifr;
+	mm_segment_t old_fs;
+	int err;
+
+	strncpy(ifr.ifr_name, dev->name, IFNAMSIZ);
+	ifr.ifr_data = (void __user *) &ecmd;
+
+	old_fs = get_fs();
+	set_fs(KERNEL_DS);
+	err = dev_ethtool(&ifr);
+	set_fs(old_fs);
+	
+	if (!err) {
+		switch(ecmd.speed) {
+		case SPEED_100:
+			return 19;
+		case SPEED_1000:
+			return 4;
+		case SPEED_10000:
+			return 2;
+		case SPEED_10:
+			return 100;
+		default:
+			pr_info("bridge: can't decode speed from %s: %d\n",
+				dev->name, ecmd.speed);
+			return 100;
+		}
+	}
+
+	/* Old silly heuristics based on name */
+	if (!strncmp(dev->name, "lec", 3))
+		return 7;
+
+	if (!strncmp(dev->name, "plip", 4))
+		return 2500;
+
+	return 100;	/* assume old 10Mbps */
+}
+
+static void destroy_nbp(struct net_bridge_port *p)
+{
+	struct net_device *dev = p->dev;
+
+	dev->br_port = NULL;
+	p->br = NULL;
+	p->dev = NULL;
+	dev_put(dev);
+
+	br_sysfs_freeif(p);
+}
+
+static void destroy_nbp_rcu(struct rcu_head *head)
+{
+	struct net_bridge_port *p =
+			container_of(head, struct net_bridge_port, rcu);
+	destroy_nbp(p);
+}
+
+/* called with RTNL */
+static void del_nbp(struct net_bridge_port *p)
+{
+	struct net_bridge *br = p->br;
+	struct net_device *dev = p->dev;
+
+	dev_set_promiscuity(dev, -1);
+
+	spin_lock_bh(&br->lock);
+	br_stp_disable_port(p);
+	spin_unlock_bh(&br->lock);
+
+	br_fdb_delete_by_port(br, p);
+
+	list_del_rcu(&p->list);
+
+	del_timer_sync(&p->message_age_timer);
+	del_timer_sync(&p->forward_delay_timer);
+	del_timer_sync(&p->hold_timer);
+	
+	call_rcu(&p->rcu, destroy_nbp_rcu);
+}
+
+/* called with RTNL */
+static void del_br(struct net_bridge *br)
+{
+	struct net_bridge_port *p, *n;
+
+	list_for_each_entry_safe(p, n, &br->port_list, list) {
+		br_sysfs_removeif(p);
+		del_nbp(p);
+	}
+
+	del_timer_sync(&br->gc_timer);
+
+	br_sysfs_delbr(br->dev);
+ 	unregister_netdevice(br->dev);
+}
+
+static struct net_device *new_bridge_dev(const char *name)
+{
+	struct net_bridge *br;
+	struct net_device *dev;
+
+	dev = alloc_netdev(sizeof(struct net_bridge), name,
+			   br_dev_setup);
+	
+	if (!dev)
+		return NULL;
+
+	br = netdev_priv(dev);
+	br->dev = dev;
+
+	spin_lock_init(&br->lock);
+	INIT_LIST_HEAD(&br->port_list);
+	spin_lock_init(&br->hash_lock);
+
+	br->bridge_id.prio[0] = 0x80;
+	br->bridge_id.prio[1] = 0x00;
+	memset(br->bridge_id.addr, 0, ETH_ALEN);
+
+	br->stp_enabled = 0;
+	br->designated_root = br->bridge_id;
+	br->root_path_cost = 0;
+	br->root_port = 0;
+	br->bridge_max_age = br->max_age = 20 * HZ;
+	br->bridge_hello_time = br->hello_time = 2 * HZ;
+	br->bridge_forward_delay = br->forward_delay = 15 * HZ;
+	br->topology_change = 0;
+	br->topology_change_detected = 0;
+	br->ageing_time = 300 * HZ;
+	INIT_LIST_HEAD(&br->age_list);
+
+	br_stp_timer_init(br);
+
+	return dev;
+}
+
+/* find an available port number */
+static int find_portno(struct net_bridge *br)
+{
+	int index;
+	struct net_bridge_port *p;
+	unsigned long *inuse;
+
+	inuse = kmalloc(BITS_TO_LONGS(BR_MAX_PORTS)*sizeof(unsigned long),
+			GFP_KERNEL);
+	if (!inuse)
+		return -ENOMEM;
+
+	memset(inuse, 0, BITS_TO_LONGS(BR_MAX_PORTS)*sizeof(unsigned long));
+	set_bit(0, inuse);	/* zero is reserved */
+	list_for_each_entry(p, &br->port_list, list) {
+		set_bit(p->port_no, inuse);
+	}
+	index = find_first_zero_bit(inuse, BR_MAX_PORTS);
+	kfree(inuse);
+
+	return (index >= BR_MAX_PORTS) ? -EXFULL : index;
+}
+
+/* called with RTNL */
+static struct net_bridge_port *new_nbp(struct net_bridge *br, 
+				       struct net_device *dev,
+				       unsigned long cost)
+{
+	int index;
+	struct net_bridge_port *p;
+	
+	index = find_portno(br);
+	if (index < 0)
+		return ERR_PTR(index);
+
+	p = kmalloc(sizeof(*p), GFP_KERNEL);
+	if (p == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	memset(p, 0, sizeof(*p));
+	p->br = br;
+	dev_hold(dev);
+	p->dev = dev;
+	p->path_cost = cost;
+ 	p->priority = 0x8000 >> BR_PORT_BITS;
+	dev->br_port = p;
+	p->port_no = index;
+	br_init_port(p);
+	p->state = BR_STATE_DISABLED;
+	kobject_init(&p->kobj);
+
+	return p;
+}
+
+int br_add_bridge(const char *name)
+{
+	struct net_device *dev;
+	int ret;
+
+	dev = new_bridge_dev(name);
+	if (!dev) 
+		return -ENOMEM;
+
+	rtnl_lock();
+	if (strchr(dev->name, '%')) {
+		ret = dev_alloc_name(dev, dev->name);
+		if (ret < 0)
+			goto err1;
+	}
+
+	ret = register_netdevice(dev);
+	if (ret)
+		goto err2;
+
+	/* network device kobject is not setup until
+	 * after rtnl_unlock does it's hotplug magic.
+	 * so hold reference to avoid race.
+	 */
+	dev_hold(dev);
+	rtnl_unlock();
+
+	ret = br_sysfs_addbr(dev);
+	dev_put(dev);
+
+	if (ret) 
+		unregister_netdev(dev);
+ out:
+	return ret;
+
+ err2:
+	free_netdev(dev);
+ err1:
+	rtnl_unlock();
+	goto out;
+}
+
+int br_del_bridge(const char *name)
+{
+	struct net_device *dev;
+	int ret = 0;
+
+	rtnl_lock();
+	dev = __dev_get_by_name(name);
+	if (dev == NULL) 
+		ret =  -ENXIO; 	/* Could not find device */
+
+	else if (!(dev->priv_flags & IFF_EBRIDGE)) {
+		/* Attempt to delete non bridge device! */
+		ret = -EPERM;
+	}
+
+	else if (dev->flags & IFF_UP) {
+		/* Not shutdown yet. */
+		ret = -EBUSY;
+	} 
+
+	else 
+		del_br(netdev_priv(dev));
+
+	rtnl_unlock();
+	return ret;
+}
+
+/* Mtu of the bridge pseudo-device 1500 or the minimum of the ports */
+int br_min_mtu(const struct net_bridge *br)
+{
+	const struct net_bridge_port *p;
+	int mtu = 0;
+
+	ASSERT_RTNL();
+
+	if (list_empty(&br->port_list))
+		mtu = 1500;
+	else {
+		list_for_each_entry(p, &br->port_list, list) {
+			if (!mtu  || p->dev->mtu < mtu)
+				mtu = p->dev->mtu;
+		}
+	}
+	return mtu;
+}
+
+/* called with RTNL */
+int br_add_if(struct net_bridge *br, struct net_device *dev)
+{
+	struct net_bridge_port *p;
+	int err = 0;
+
+	if (dev->flags & IFF_LOOPBACK || dev->type != ARPHRD_ETHER)
+		return -EINVAL;
+
+	if (dev->hard_start_xmit == br_dev_xmit)
+		return -ELOOP;
+
+	if (dev->br_port != NULL)
+		return -EBUSY;
+
+	if (IS_ERR(p = new_nbp(br, dev, br_initial_port_cost(dev))))
+		return PTR_ERR(p);
+
+ 	if ((err = br_fdb_insert(br, p, dev->dev_addr)))
+		destroy_nbp(p);
+ 
+	else if ((err = br_sysfs_addif(p)))
+		del_nbp(p);
+	else {
+		dev_set_promiscuity(dev, 1);
+
+		list_add_rcu(&p->list, &br->port_list);
+
+		spin_lock_bh(&br->lock);
+		br_stp_recalculate_bridge_id(br);
+		if ((br->dev->flags & IFF_UP) 
+		    && (dev->flags & IFF_UP) && netif_carrier_ok(dev))
+			br_stp_enable_port(p);
+		spin_unlock_bh(&br->lock);
+
+		dev_set_mtu(br->dev, br_min_mtu(br));
+	}
+
+	return err;
+}
+
+/* called with RTNL */
+int br_del_if(struct net_bridge *br, struct net_device *dev)
+{
+	struct net_bridge_port *p = dev->br_port;
+	
+	if (!p || p->br != br) 
+		return -EINVAL;
+
+	br_sysfs_removeif(p);
+	del_nbp(p);
+
+	spin_lock_bh(&br->lock);
+	br_stp_recalculate_bridge_id(br);
+	spin_unlock_bh(&br->lock);
+
+	return 0;
+}
+
+void __exit br_cleanup_bridges(void)
+{
+	struct net_device *dev, *nxt;
+
+	rtnl_lock();
+	for (dev = dev_base; dev; dev = nxt) {
+		nxt = dev->next;
+		if (dev->priv_flags & IFF_EBRIDGE)
+			del_br(dev->priv);
+	}
+	rtnl_unlock();
+
+}
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
new file mode 100644
index 00000000000..2b1cce46cab
--- /dev/null
+++ b/net/bridge/br_input.c
@@ -0,0 +1,144 @@
+/*
+ *	Handle incoming frames
+ *	Linux ethernet bridge
+ *
+ *	Authors:
+ *	Lennert Buytenhek		<buytenh@gnu.org>
+ *
+ *	$Id: br_input.c,v 1.10 2001/12/24 04:50:20 davem Exp $
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/netfilter_bridge.h>
+#include "br_private.h"
+
+const unsigned char bridge_ula[6] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
+
+static int br_pass_frame_up_finish(struct sk_buff *skb)
+{
+#ifdef CONFIG_NETFILTER_DEBUG
+	skb->nf_debug = 0;
+#endif
+	netif_rx(skb);
+
+	return 0;
+}
+
+static void br_pass_frame_up(struct net_bridge *br, struct sk_buff *skb)
+{
+	struct net_device *indev;
+
+	br->statistics.rx_packets++;
+	br->statistics.rx_bytes += skb->len;
+
+	indev = skb->dev;
+	skb->dev = br->dev;
+
+	NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL,
+			br_pass_frame_up_finish);
+}
+
+/* note: already called with rcu_read_lock (preempt_disabled) */
+int br_handle_frame_finish(struct sk_buff *skb)
+{
+	const unsigned char *dest = eth_hdr(skb)->h_dest;
+	struct net_bridge_port *p = skb->dev->br_port;
+	struct net_bridge *br = p->br;
+	struct net_bridge_fdb_entry *dst;
+	int passedup = 0;
+
+	if (br->dev->flags & IFF_PROMISC) {
+		struct sk_buff *skb2;
+
+		skb2 = skb_clone(skb, GFP_ATOMIC);
+		if (skb2 != NULL) {
+			passedup = 1;
+			br_pass_frame_up(br, skb2);
+		}
+	}
+
+	if (dest[0] & 1) {
+		br_flood_forward(br, skb, !passedup);
+		if (!passedup)
+			br_pass_frame_up(br, skb);
+		goto out;
+	}
+
+	dst = __br_fdb_get(br, dest);
+	if (dst != NULL && dst->is_local) {
+		if (!passedup)
+			br_pass_frame_up(br, skb);
+		else
+			kfree_skb(skb);
+		goto out;
+	}
+
+	if (dst != NULL) {
+		br_forward(dst->dst, skb);
+		goto out;
+	}
+
+	br_flood_forward(br, skb, 0);
+
+out:
+	return 0;
+}
+
+/*
+ * Called via br_handle_frame_hook.
+ * Return 0 if *pskb should be processed furthur
+ *	  1 if *pskb is handled
+ * note: already called with rcu_read_lock (preempt_disabled) 
+ */
+int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb)
+{
+	struct sk_buff *skb = *pskb;
+	const unsigned char *dest = eth_hdr(skb)->h_dest;
+
+	if (p->state == BR_STATE_DISABLED)
+		goto err;
+
+	if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
+		goto err;
+
+	if (p->state == BR_STATE_LEARNING ||
+	    p->state == BR_STATE_FORWARDING)
+		br_fdb_update(p->br, p, eth_hdr(skb)->h_source);
+
+	if (p->br->stp_enabled &&
+	    !memcmp(dest, bridge_ula, 5) &&
+	    !(dest[5] & 0xF0)) {
+		if (!dest[5]) {
+			NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev, 
+				NULL, br_stp_handle_bpdu);
+			return 1;
+		}
+	}
+
+	else if (p->state == BR_STATE_FORWARDING) {
+		if (br_should_route_hook) {
+			if (br_should_route_hook(pskb)) 
+				return 0;
+			skb = *pskb;
+			dest = eth_hdr(skb)->h_dest;
+		}
+
+		if (!memcmp(p->br->dev->dev_addr, dest, ETH_ALEN))
+			skb->pkt_type = PACKET_HOST;
+
+		NF_HOOK(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
+			br_handle_frame_finish);
+		return 1;
+	}
+
+err:
+	kfree_skb(skb);
+	return 1;
+}
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
new file mode 100644
index 00000000000..b8ce14b2218
--- /dev/null
+++ b/net/bridge/br_ioctl.c
@@ -0,0 +1,410 @@
+/*
+ *	Ioctl handler
+ *	Linux ethernet bridge
+ *
+ *	Authors:
+ *	Lennert Buytenhek		<buytenh@gnu.org>
+ *
+ *	$Id: br_ioctl.c,v 1.4 2000/11/08 05:16:40 davem Exp $
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/if_bridge.h>
+#include <linux/netdevice.h>
+#include <linux/times.h>
+#include <asm/uaccess.h>
+#include "br_private.h"
+
+/* called with RTNL */
+static int get_bridge_ifindices(int *indices, int num)
+{
+	struct net_device *dev;
+	int i = 0;
+
+	for (dev = dev_base; dev && i < num; dev = dev->next) {
+		if (dev->priv_flags & IFF_EBRIDGE) 
+			indices[i++] = dev->ifindex;
+	}
+
+	return i;
+}
+
+/* called with RTNL */
+static void get_port_ifindices(struct net_bridge *br, int *ifindices, int num)
+{
+	struct net_bridge_port *p;
+
+	list_for_each_entry(p, &br->port_list, list) {
+		if (p->port_no < num)
+			ifindices[p->port_no] = p->dev->ifindex;
+	}
+}
+
+/*
+ * Format up to a page worth of forwarding table entries
+ * userbuf -- where to copy result
+ * maxnum  -- maximum number of entries desired
+ *            (limited to a page for sanity)
+ * offset  -- number of records to skip
+ */
+static int get_fdb_entries(struct net_bridge *br, void __user *userbuf, 
+			   unsigned long maxnum, unsigned long offset)
+{
+	int num;
+	void *buf;
+	size_t size = maxnum * sizeof(struct __fdb_entry);
+
+	if (size > PAGE_SIZE) {
+		size = PAGE_SIZE;
+		maxnum = PAGE_SIZE/sizeof(struct __fdb_entry);
+	}
+
+	buf = kmalloc(size, GFP_USER);
+	if (!buf)
+		return -ENOMEM;
+	
+	num = br_fdb_fillbuf(br, buf, maxnum, offset);
+	if (num > 0) {
+		if (copy_to_user(userbuf, buf, num*sizeof(struct __fdb_entry)))
+			num = -EFAULT;
+	}
+	kfree(buf);
+
+	return num;
+}
+
+static int add_del_if(struct net_bridge *br, int ifindex, int isadd)
+{
+	struct net_device *dev;
+	int ret;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	dev = dev_get_by_index(ifindex);
+	if (dev == NULL)
+		return -EINVAL;
+	
+	if (isadd)
+		ret = br_add_if(br, dev);
+	else
+		ret = br_del_if(br, dev);
+
+	dev_put(dev);
+	return ret;
+}
+
+/*
+ * Legacy ioctl's through SIOCDEVPRIVATE
+ * This interface is deprecated because it was too difficult to
+ * to do the translation for 32/64bit ioctl compatability.
+ */
+static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+{
+	struct net_bridge *br = netdev_priv(dev);
+	unsigned long args[4];
+	
+	if (copy_from_user(args, rq->ifr_data, sizeof(args)))
+		return -EFAULT;
+
+	switch (args[0]) {
+	case BRCTL_ADD_IF:
+	case BRCTL_DEL_IF:
+		return add_del_if(br, args[1], args[0] == BRCTL_ADD_IF);
+
+	case BRCTL_GET_BRIDGE_INFO:
+	{
+		struct __bridge_info b;
+
+		memset(&b, 0, sizeof(struct __bridge_info));
+		rcu_read_lock();
+		memcpy(&b.designated_root, &br->designated_root, 8);
+		memcpy(&b.bridge_id, &br->bridge_id, 8);
+		b.root_path_cost = br->root_path_cost;
+		b.max_age = jiffies_to_clock_t(br->max_age);
+		b.hello_time = jiffies_to_clock_t(br->hello_time);
+		b.forward_delay = br->forward_delay;
+		b.bridge_max_age = br->bridge_max_age;
+		b.bridge_hello_time = br->bridge_hello_time;
+		b.bridge_forward_delay = jiffies_to_clock_t(br->bridge_forward_delay);
+		b.topology_change = br->topology_change;
+		b.topology_change_detected = br->topology_change_detected;
+		b.root_port = br->root_port;
+		b.stp_enabled = br->stp_enabled;
+		b.ageing_time = jiffies_to_clock_t(br->ageing_time);
+		b.hello_timer_value = br_timer_value(&br->hello_timer);
+		b.tcn_timer_value = br_timer_value(&br->tcn_timer);
+		b.topology_change_timer_value = br_timer_value(&br->topology_change_timer);
+		b.gc_timer_value = br_timer_value(&br->gc_timer);
+	        rcu_read_unlock();
+
+		if (copy_to_user((void __user *)args[1], &b, sizeof(b)))
+			return -EFAULT;
+
+		return 0;
+	}
+
+	case BRCTL_GET_PORT_LIST:
+	{
+		int num, *indices;
+
+		num = args[2];
+		if (num < 0)
+			return -EINVAL;
+		if (num == 0)
+			num = 256;
+		if (num > BR_MAX_PORTS)
+			num = BR_MAX_PORTS;
+
+		indices = kmalloc(num*sizeof(int), GFP_KERNEL);
+		if (indices == NULL)
+			return -ENOMEM;
+
+		memset(indices, 0, num*sizeof(int));
+
+		get_port_ifindices(br, indices, num);
+		if (copy_to_user((void __user *)args[1], indices, num*sizeof(int)))
+			num =  -EFAULT;
+		kfree(indices);
+		return num;
+	}
+
+	case BRCTL_SET_BRIDGE_FORWARD_DELAY:
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+
+		spin_lock_bh(&br->lock);
+		br->bridge_forward_delay = clock_t_to_jiffies(args[1]);
+		if (br_is_root_bridge(br))
+			br->forward_delay = br->bridge_forward_delay;
+		spin_unlock_bh(&br->lock);
+		return 0;
+
+	case BRCTL_SET_BRIDGE_HELLO_TIME:
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+
+		spin_lock_bh(&br->lock);
+		br->bridge_hello_time = clock_t_to_jiffies(args[1]);
+		if (br_is_root_bridge(br))
+			br->hello_time = br->bridge_hello_time;
+		spin_unlock_bh(&br->lock);
+		return 0;
+
+	case BRCTL_SET_BRIDGE_MAX_AGE:
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+
+		spin_lock_bh(&br->lock);
+		br->bridge_max_age = clock_t_to_jiffies(args[1]);
+		if (br_is_root_bridge(br))
+			br->max_age = br->bridge_max_age;
+		spin_unlock_bh(&br->lock);
+		return 0;
+
+	case BRCTL_SET_AGEING_TIME:
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+
+		br->ageing_time = clock_t_to_jiffies(args[1]);
+		return 0;
+
+	case BRCTL_GET_PORT_INFO:
+	{
+		struct __port_info p;
+		struct net_bridge_port *pt;
+
+		rcu_read_lock();
+		if ((pt = br_get_port(br, args[2])) == NULL) {
+			rcu_read_unlock();
+			return -EINVAL;
+		}
+
+		memset(&p, 0, sizeof(struct __port_info));
+		memcpy(&p.designated_root, &pt->designated_root, 8);
+		memcpy(&p.designated_bridge, &pt->designated_bridge, 8);
+		p.port_id = pt->port_id;
+		p.designated_port = pt->designated_port;
+		p.path_cost = pt->path_cost;
+		p.designated_cost = pt->designated_cost;
+		p.state = pt->state;
+		p.top_change_ack = pt->topology_change_ack;
+		p.config_pending = pt->config_pending;
+		p.message_age_timer_value = br_timer_value(&pt->message_age_timer);
+		p.forward_delay_timer_value = br_timer_value(&pt->forward_delay_timer);
+		p.hold_timer_value = br_timer_value(&pt->hold_timer);
+
+		rcu_read_unlock();
+
+		if (copy_to_user((void __user *)args[1], &p, sizeof(p)))
+			return -EFAULT;
+
+		return 0;
+	}
+
+	case BRCTL_SET_BRIDGE_STP_STATE:
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+
+		br->stp_enabled = args[1]?1:0;
+		return 0;
+
+	case BRCTL_SET_BRIDGE_PRIORITY:
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+
+		spin_lock_bh(&br->lock);
+		br_stp_set_bridge_priority(br, args[1]);
+		spin_unlock_bh(&br->lock);
+		return 0;
+
+	case BRCTL_SET_PORT_PRIORITY:
+	{
+		struct net_bridge_port *p;
+		int ret = 0;
+
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+
+		if (args[2] >= (1<<(16-BR_PORT_BITS)))
+			return -ERANGE;
+
+		spin_lock_bh(&br->lock);
+		if ((p = br_get_port(br, args[1])) == NULL) 
+			ret = -EINVAL;
+		else
+			br_stp_set_port_priority(p, args[2]);
+		spin_unlock_bh(&br->lock);
+		return ret;
+	}
+
+	case BRCTL_SET_PATH_COST:
+	{
+		struct net_bridge_port *p;
+		int ret = 0;
+
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+
+		spin_lock_bh(&br->lock);
+		if ((p = br_get_port(br, args[1])) == NULL)
+			ret = -EINVAL;
+		else
+			br_stp_set_path_cost(p, args[2]);
+		spin_unlock_bh(&br->lock);
+		return ret;
+	}
+
+	case BRCTL_GET_FDB_ENTRIES:
+		return get_fdb_entries(br, (void __user *)args[1], 
+				       args[2], args[3]);
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static int old_deviceless(void __user *uarg)
+{
+	unsigned long args[3];
+
+	if (copy_from_user(args, uarg, sizeof(args)))
+		return -EFAULT;
+
+	switch (args[0]) {
+	case BRCTL_GET_VERSION:
+		return BRCTL_VERSION;
+
+	case BRCTL_GET_BRIDGES:
+	{
+		int *indices;
+		int ret = 0;
+
+		if (args[2] >= 2048)
+			return -ENOMEM;
+		indices = kmalloc(args[2]*sizeof(int), GFP_KERNEL);
+		if (indices == NULL)
+			return -ENOMEM;
+
+		memset(indices, 0, args[2]*sizeof(int));
+		args[2] = get_bridge_ifindices(indices, args[2]);
+
+		ret = copy_to_user((void __user *)args[1], indices, args[2]*sizeof(int))
+			? -EFAULT : args[2];
+
+		kfree(indices);
+		return ret;
+	}
+
+	case BRCTL_ADD_BRIDGE:
+	case BRCTL_DEL_BRIDGE:
+	{
+		char buf[IFNAMSIZ];
+
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+
+		if (copy_from_user(buf, (void __user *)args[1], IFNAMSIZ))
+			return -EFAULT;
+
+		buf[IFNAMSIZ-1] = 0;
+
+		if (args[0] == BRCTL_ADD_BRIDGE)
+			return br_add_bridge(buf);
+
+		return br_del_bridge(buf);
+	}
+	}
+
+	return -EOPNOTSUPP;
+}
+
+int br_ioctl_deviceless_stub(unsigned int cmd, void __user *uarg)
+{
+	switch (cmd) {
+	case SIOCGIFBR:
+	case SIOCSIFBR:
+		return old_deviceless(uarg);
+		
+	case SIOCBRADDBR:
+	case SIOCBRDELBR:
+	{
+		char buf[IFNAMSIZ];
+
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+
+		if (copy_from_user(buf, uarg, IFNAMSIZ))
+			return -EFAULT;
+
+		buf[IFNAMSIZ-1] = 0;
+		if (cmd == SIOCBRADDBR)
+			return br_add_bridge(buf);
+
+		return br_del_bridge(buf);
+	}
+	}
+	return -EOPNOTSUPP;
+}
+
+int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+{
+	struct net_bridge *br = netdev_priv(dev);
+
+	switch(cmd) {
+	case SIOCDEVPRIVATE:
+		return old_dev_ioctl(dev, rq, cmd);
+
+	case SIOCBRADDIF:
+	case SIOCBRDELIF:
+		return add_del_if(br, rq->ifr_ifindex, cmd == SIOCBRADDIF);
+
+	}
+
+	pr_debug("Bridge does not support ioctl 0x%x\n", cmd);
+	return -EOPNOTSUPP;
+}
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
new file mode 100644
index 00000000000..be03d3ad264
--- /dev/null
+++ b/net/bridge/br_netfilter.c
@@ -0,0 +1,1087 @@
+/*
+ *	Handle firewalling
+ *	Linux ethernet bridge
+ *
+ *	Authors:
+ *	Lennert Buytenhek               <buytenh@gnu.org>
+ *	Bart De Schuymer (maintainer)	<bdschuym@pandora.be>
+ *
+ *	Changes:
+ *	Apr 29 2003: physdev module support (bdschuym)
+ *	Jun 19 2003: let arptables see bridged ARP traffic (bdschuym)
+ *	Oct 06 2003: filter encapsulated IP/ARP VLAN traffic on untagged bridge
+ *	             (bdschuym)
+ *	Sep 01 2004: add IPv6 filtering (bdschuym)
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ *	Lennert dedicates this file to Kerstin Wurdinger.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/ip.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/netfilter_bridge.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_arp.h>
+#include <linux/in_route.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <asm/uaccess.h>
+#include <asm/checksum.h>
+#include "br_private.h"
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+
+#define skb_origaddr(skb)	 (((struct bridge_skb_cb *) \
+				 (skb->nf_bridge->data))->daddr.ipv4)
+#define store_orig_dstaddr(skb)	 (skb_origaddr(skb) = (skb)->nh.iph->daddr)
+#define dnat_took_place(skb)	 (skb_origaddr(skb) != (skb)->nh.iph->daddr)
+
+#define has_bridge_parent(device)	((device)->br_port != NULL)
+#define bridge_parent(device)		((device)->br_port->br->dev)
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table_header *brnf_sysctl_header;
+static int brnf_call_iptables = 1;
+static int brnf_call_ip6tables = 1;
+static int brnf_call_arptables = 1;
+static int brnf_filter_vlan_tagged = 1;
+#else
+#define brnf_filter_vlan_tagged 1
+#endif
+
+#define IS_VLAN_IP (skb->protocol == __constant_htons(ETH_P_8021Q) &&    \
+	hdr->h_vlan_encapsulated_proto == __constant_htons(ETH_P_IP) &&  \
+	brnf_filter_vlan_tagged)
+#define IS_VLAN_IPV6 (skb->protocol == __constant_htons(ETH_P_8021Q) &&    \
+	hdr->h_vlan_encapsulated_proto == __constant_htons(ETH_P_IPV6) &&  \
+	brnf_filter_vlan_tagged)
+#define IS_VLAN_ARP (skb->protocol == __constant_htons(ETH_P_8021Q) &&   \
+	hdr->h_vlan_encapsulated_proto == __constant_htons(ETH_P_ARP) && \
+	brnf_filter_vlan_tagged)
+
+/* We need these fake structures to make netfilter happy --
+ * lots of places assume that skb->dst != NULL, which isn't
+ * all that unreasonable.
+ *
+ * Currently, we fill in the PMTU entry because netfilter
+ * refragmentation needs it, and the rt_flags entry because
+ * ipt_REJECT needs it.  Future netfilter modules might
+ * require us to fill additional fields. */
+static struct net_device __fake_net_device = {
+	.hard_header_len	= ETH_HLEN
+};
+
+static struct rtable __fake_rtable = {
+	.u = {
+		.dst = {
+			.__refcnt		= ATOMIC_INIT(1),
+			.dev			= &__fake_net_device,
+			.path			= &__fake_rtable.u.dst,
+			.metrics		= {[RTAX_MTU - 1] = 1500},
+		}
+	},
+	.rt_flags	= 0,
+};
+
+
+/* PF_BRIDGE/PRE_ROUTING *********************************************/
+/* Undo the changes made for ip6tables PREROUTING and continue the
+ * bridge PRE_ROUTING hook. */
+static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
+{
+	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+
+#ifdef CONFIG_NETFILTER_DEBUG
+	skb->nf_debug ^= (1 << NF_BR_PRE_ROUTING);
+#endif
+
+	if (nf_bridge->mask & BRNF_PKT_TYPE) {
+		skb->pkt_type = PACKET_OTHERHOST;
+		nf_bridge->mask ^= BRNF_PKT_TYPE;
+	}
+	nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;
+
+	skb->dst = (struct dst_entry *)&__fake_rtable;
+	dst_hold(skb->dst);
+
+	skb->dev = nf_bridge->physindev;
+	if (skb->protocol == __constant_htons(ETH_P_8021Q)) {
+		skb_push(skb, VLAN_HLEN);
+		skb->nh.raw -= VLAN_HLEN;
+	}
+	NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
+		       br_handle_frame_finish, 1);
+
+	return 0;
+}
+
+static void __br_dnat_complain(void)
+{
+	static unsigned long last_complaint;
+
+	if (jiffies - last_complaint >= 5 * HZ) {
+		printk(KERN_WARNING "Performing cross-bridge DNAT requires IP "
+			"forwarding to be enabled\n");
+		last_complaint = jiffies;
+	}
+}
+
+/* This requires some explaining. If DNAT has taken place,
+ * we will need to fix up the destination Ethernet address,
+ * and this is a tricky process.
+ *
+ * There are two cases to consider:
+ * 1. The packet was DNAT'ed to a device in the same bridge
+ *    port group as it was received on. We can still bridge
+ *    the packet.
+ * 2. The packet was DNAT'ed to a different device, either
+ *    a non-bridged device or another bridge port group.
+ *    The packet will need to be routed.
+ *
+ * The correct way of distinguishing between these two cases is to
+ * call ip_route_input() and to look at skb->dst->dev, which is
+ * changed to the destination device if ip_route_input() succeeds.
+ *
+ * Let us first consider the case that ip_route_input() succeeds:
+ *
+ * If skb->dst->dev equals the logical bridge device the packet
+ * came in on, we can consider this bridging. We then call
+ * skb->dst->output() which will make the packet enter br_nf_local_out()
+ * not much later. In that function it is assured that the iptables
+ * FORWARD chain is traversed for the packet.
+ *
+ * Otherwise, the packet is considered to be routed and we just
+ * change the destination MAC address so that the packet will
+ * later be passed up to the IP stack to be routed.
+ *
+ * Let us now consider the case that ip_route_input() fails:
+ *
+ * After a "echo '0' > /proc/sys/net/ipv4/ip_forward" ip_route_input()
+ * will fail, while __ip_route_output_key() will return success. The source
+ * address for __ip_route_output_key() is set to zero, so __ip_route_output_key
+ * thinks we're handling a locally generated packet and won't care
+ * if IP forwarding is allowed. We send a warning message to the users's
+ * log telling her to put IP forwarding on.
+ *
+ * ip_route_input() will also fail if there is no route available.
+ * In that case we just drop the packet.
+ *
+ * --Lennert, 20020411
+ * --Bart, 20020416 (updated)
+ * --Bart, 20021007 (updated) */
+static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
+{
+#ifdef CONFIG_NETFILTER_DEBUG
+	skb->nf_debug |= (1 << NF_BR_PRE_ROUTING) | (1 << NF_BR_FORWARD);
+#endif
+
+	if (skb->pkt_type == PACKET_OTHERHOST) {
+		skb->pkt_type = PACKET_HOST;
+		skb->nf_bridge->mask |= BRNF_PKT_TYPE;
+	}
+	skb->nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;
+
+	skb->dev = bridge_parent(skb->dev);
+	if (skb->protocol == __constant_htons(ETH_P_8021Q)) {
+		skb_pull(skb, VLAN_HLEN);
+		skb->nh.raw += VLAN_HLEN;
+	}
+	skb->dst->output(skb);
+	return 0;
+}
+
+static int br_nf_pre_routing_finish(struct sk_buff *skb)
+{
+	struct net_device *dev = skb->dev;
+	struct iphdr *iph = skb->nh.iph;
+	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+
+#ifdef CONFIG_NETFILTER_DEBUG
+	skb->nf_debug ^= (1 << NF_BR_PRE_ROUTING);
+#endif
+
+	if (nf_bridge->mask & BRNF_PKT_TYPE) {
+		skb->pkt_type = PACKET_OTHERHOST;
+		nf_bridge->mask ^= BRNF_PKT_TYPE;
+	}
+	nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;
+
+	if (dnat_took_place(skb)) {
+		if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos,
+		    dev)) {
+			struct rtable *rt;
+			struct flowi fl = { .nl_u = 
+			{ .ip4_u = { .daddr = iph->daddr, .saddr = 0 ,
+				     .tos = RT_TOS(iph->tos)} }, .proto = 0};
+
+			if (!ip_route_output_key(&rt, &fl)) {
+				/* Bridged-and-DNAT'ed traffic doesn't
+				 * require ip_forwarding. */
+				if (((struct dst_entry *)rt)->dev == dev) {
+					skb->dst = (struct dst_entry *)rt;
+					goto bridged_dnat;
+				}
+				__br_dnat_complain();
+				dst_release((struct dst_entry *)rt);
+			}
+			kfree_skb(skb);
+			return 0;
+		} else {
+			if (skb->dst->dev == dev) {
+bridged_dnat:
+				/* Tell br_nf_local_out this is a
+				 * bridged frame */
+				nf_bridge->mask |= BRNF_BRIDGED_DNAT;
+				skb->dev = nf_bridge->physindev;
+				if (skb->protocol ==
+				    __constant_htons(ETH_P_8021Q)) {
+					skb_push(skb, VLAN_HLEN);
+					skb->nh.raw -= VLAN_HLEN;
+				}
+				NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING,
+					       skb, skb->dev, NULL,
+					       br_nf_pre_routing_finish_bridge,
+					       1);
+				return 0;
+			}
+			memcpy(eth_hdr(skb)->h_dest, dev->dev_addr,
+			       ETH_ALEN);
+			skb->pkt_type = PACKET_HOST;
+		}
+	} else {
+		skb->dst = (struct dst_entry *)&__fake_rtable;
+		dst_hold(skb->dst);
+	}
+
+	skb->dev = nf_bridge->physindev;
+	if (skb->protocol == __constant_htons(ETH_P_8021Q)) {
+		skb_push(skb, VLAN_HLEN);
+		skb->nh.raw -= VLAN_HLEN;
+	}
+	NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
+		       br_handle_frame_finish, 1);
+
+	return 0;
+}
+
+/* Some common code for IPv4/IPv6 */
+static void setup_pre_routing(struct sk_buff *skb)
+{
+	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+
+	if (skb->pkt_type == PACKET_OTHERHOST) {
+		skb->pkt_type = PACKET_HOST;
+		nf_bridge->mask |= BRNF_PKT_TYPE;
+	}
+
+	nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING;
+	nf_bridge->physindev = skb->dev;
+	skb->dev = bridge_parent(skb->dev);
+}
+
+/* We only check the length. A bridge shouldn't do any hop-by-hop stuff anyway */
+static int check_hbh_len(struct sk_buff *skb)
+{
+	unsigned char *raw = (u8*)(skb->nh.ipv6h+1);
+	u32 pkt_len;
+	int off = raw - skb->nh.raw;
+	int len = (raw[1]+1)<<3;
+
+	if ((raw + len) - skb->data > skb_headlen(skb))
+		goto bad;
+
+	off += 2;
+	len -= 2;
+
+	while (len > 0) {
+		int optlen = raw[off+1]+2;
+
+		switch (skb->nh.raw[off]) {
+		case IPV6_TLV_PAD0:
+			optlen = 1;
+			break;
+
+		case IPV6_TLV_PADN:
+			break;
+
+		case IPV6_TLV_JUMBO:
+			if (skb->nh.raw[off+1] != 4 || (off&3) != 2)
+				goto bad;
+
+			pkt_len = ntohl(*(u32*)(skb->nh.raw+off+2));
+
+			if (pkt_len > skb->len - sizeof(struct ipv6hdr))
+				goto bad;
+			if (pkt_len + sizeof(struct ipv6hdr) < skb->len) {
+				if (__pskb_trim(skb,
+				    pkt_len + sizeof(struct ipv6hdr)))
+					goto bad;
+				if (skb->ip_summed == CHECKSUM_HW)
+					skb->ip_summed = CHECKSUM_NONE;
+			}
+			break;
+		default:
+			if (optlen > len)
+				goto bad;
+			break;
+		}
+		off += optlen;
+		len -= optlen;
+	}
+	if (len == 0)
+		return 0;
+bad:
+	return -1;
+
+}
+
+/* Replicate the checks that IPv6 does on packet reception and pass the packet
+ * to ip6tables, which doesn't support NAT, so things are fairly simple. */
+static unsigned int br_nf_pre_routing_ipv6(unsigned int hook,
+   struct sk_buff *skb, const struct net_device *in,
+   const struct net_device *out, int (*okfn)(struct sk_buff *))
+{
+	struct ipv6hdr *hdr;
+	u32 pkt_len;
+	struct nf_bridge_info *nf_bridge;
+
+	if (skb->len < sizeof(struct ipv6hdr))
+		goto inhdr_error;
+
+	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+		goto inhdr_error;
+
+	hdr = skb->nh.ipv6h;
+
+	if (hdr->version != 6)
+		goto inhdr_error;
+
+	pkt_len = ntohs(hdr->payload_len);
+
+	if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
+		if (pkt_len + sizeof(struct ipv6hdr) > skb->len)
+			goto inhdr_error;
+		if (pkt_len + sizeof(struct ipv6hdr) < skb->len) {
+			if (__pskb_trim(skb, pkt_len + sizeof(struct ipv6hdr)))
+				goto inhdr_error;
+			if (skb->ip_summed == CHECKSUM_HW)
+				skb->ip_summed = CHECKSUM_NONE;
+		}
+	}
+	if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb))
+			goto inhdr_error;
+
+#ifdef CONFIG_NETFILTER_DEBUG
+	skb->nf_debug ^= (1 << NF_IP6_PRE_ROUTING);
+#endif
+	if ((nf_bridge = nf_bridge_alloc(skb)) == NULL)
+		return NF_DROP;
+	setup_pre_routing(skb);
+
+	NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb->dev, NULL,
+		br_nf_pre_routing_finish_ipv6);
+
+	return NF_STOLEN;
+
+inhdr_error:
+	return NF_DROP;
+}
+
+/* Direct IPv6 traffic to br_nf_pre_routing_ipv6.
+ * Replicate the checks that IPv4 does on packet reception.
+ * Set skb->dev to the bridge device (i.e. parent of the
+ * receiving device) to make netfilter happy, the REDIRECT
+ * target in particular.  Save the original destination IP
+ * address to be able to detect DNAT afterwards. */
+static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
+   const struct net_device *in, const struct net_device *out,
+   int (*okfn)(struct sk_buff *))
+{
+	struct iphdr *iph;
+	__u32 len;
+	struct sk_buff *skb = *pskb;
+	struct nf_bridge_info *nf_bridge;
+	struct vlan_ethhdr *hdr = vlan_eth_hdr(*pskb);
+
+	if (skb->protocol == __constant_htons(ETH_P_IPV6) || IS_VLAN_IPV6) {
+#ifdef CONFIG_SYSCTL
+		if (!brnf_call_ip6tables)
+			return NF_ACCEPT;
+#endif
+		if ((skb = skb_share_check(*pskb, GFP_ATOMIC)) == NULL)
+			goto out;
+
+		if (skb->protocol == __constant_htons(ETH_P_8021Q)) {
+			skb_pull(skb, VLAN_HLEN);
+			(skb)->nh.raw += VLAN_HLEN;
+		}
+		return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn);
+	}
+#ifdef CONFIG_SYSCTL
+	if (!brnf_call_iptables)
+		return NF_ACCEPT;
+#endif
+
+	if (skb->protocol != __constant_htons(ETH_P_IP) && !IS_VLAN_IP)
+		return NF_ACCEPT;
+
+	if ((skb = skb_share_check(*pskb, GFP_ATOMIC)) == NULL)
+		goto out;
+
+	if (skb->protocol == __constant_htons(ETH_P_8021Q)) {
+		skb_pull(skb, VLAN_HLEN);
+		(skb)->nh.raw += VLAN_HLEN;
+	}
+
+	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+		goto inhdr_error;
+
+	iph = skb->nh.iph;
+	if (iph->ihl < 5 || iph->version != 4)
+		goto inhdr_error;
+
+	if (!pskb_may_pull(skb, 4*iph->ihl))
+		goto inhdr_error;
+
+	iph = skb->nh.iph;
+	if (ip_fast_csum((__u8 *)iph, iph->ihl) != 0)
+		goto inhdr_error;
+
+	len = ntohs(iph->tot_len);
+	if (skb->len < len || len < 4*iph->ihl)
+		goto inhdr_error;
+
+	if (skb->len > len) {
+		__pskb_trim(skb, len);
+		if (skb->ip_summed == CHECKSUM_HW)
+			skb->ip_summed = CHECKSUM_NONE;
+	}
+
+#ifdef CONFIG_NETFILTER_DEBUG
+	skb->nf_debug ^= (1 << NF_IP_PRE_ROUTING);
+#endif
+	if ((nf_bridge = nf_bridge_alloc(skb)) == NULL)
+		return NF_DROP;
+	setup_pre_routing(skb);
+	store_orig_dstaddr(skb);
+
+	NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL,
+		br_nf_pre_routing_finish);
+
+	return NF_STOLEN;
+
+inhdr_error:
+//	IP_INC_STATS_BH(IpInHdrErrors);
+out:
+	return NF_DROP;
+}
+
+
+/* PF_BRIDGE/LOCAL_IN ************************************************/
+/* The packet is locally destined, which requires a real
+ * dst_entry, so detach the fake one.  On the way up, the
+ * packet would pass through PRE_ROUTING again (which already
+ * took place when the packet entered the bridge), but we
+ * register an IPv4 PRE_ROUTING 'sabotage' hook that will
+ * prevent this from happening. */
+static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff **pskb,
+   const struct net_device *in, const struct net_device *out,
+   int (*okfn)(struct sk_buff *))
+{
+	struct sk_buff *skb = *pskb;
+
+	if (skb->dst == (struct dst_entry *)&__fake_rtable) {
+		dst_release(skb->dst);
+		skb->dst = NULL;
+	}
+
+	return NF_ACCEPT;
+}
+
+
+/* PF_BRIDGE/FORWARD *************************************************/
+static int br_nf_forward_finish(struct sk_buff *skb)
+{
+	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+	struct net_device *in;
+	struct vlan_ethhdr *hdr = vlan_eth_hdr(skb);
+
+#ifdef CONFIG_NETFILTER_DEBUG
+	skb->nf_debug ^= (1 << NF_BR_FORWARD);
+#endif
+
+	if (skb->protocol != __constant_htons(ETH_P_ARP) && !IS_VLAN_ARP) {
+		in = nf_bridge->physindev;
+		if (nf_bridge->mask & BRNF_PKT_TYPE) {
+			skb->pkt_type = PACKET_OTHERHOST;
+			nf_bridge->mask ^= BRNF_PKT_TYPE;
+		}
+	} else {
+		in = *((struct net_device **)(skb->cb));
+	}
+	if (skb->protocol == __constant_htons(ETH_P_8021Q)) {
+		skb_push(skb, VLAN_HLEN);
+		skb->nh.raw -= VLAN_HLEN;
+	}
+	NF_HOOK_THRESH(PF_BRIDGE, NF_BR_FORWARD, skb, in,
+			skb->dev, br_forward_finish, 1);
+	return 0;
+}
+
+/* This is the 'purely bridged' case.  For IP, we pass the packet to
+ * netfilter with indev and outdev set to the bridge device,
+ * but we are still able to filter on the 'real' indev/outdev
+ * because of the physdev module. For ARP, indev and outdev are the
+ * bridge ports. */
+static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb,
+   const struct net_device *in, const struct net_device *out,
+   int (*okfn)(struct sk_buff *))
+{
+	struct sk_buff *skb = *pskb;
+	struct nf_bridge_info *nf_bridge;
+	struct vlan_ethhdr *hdr = vlan_eth_hdr(skb);
+	int pf;
+
+	if (!skb->nf_bridge)
+		return NF_ACCEPT;
+
+	if (skb->protocol == __constant_htons(ETH_P_IP) || IS_VLAN_IP)
+		pf = PF_INET;
+	else
+		pf = PF_INET6;
+
+	if (skb->protocol == __constant_htons(ETH_P_8021Q)) {
+		skb_pull(*pskb, VLAN_HLEN);
+		(*pskb)->nh.raw += VLAN_HLEN;
+	}
+
+#ifdef CONFIG_NETFILTER_DEBUG
+	skb->nf_debug ^= (1 << NF_BR_FORWARD);
+#endif
+	nf_bridge = skb->nf_bridge;
+	if (skb->pkt_type == PACKET_OTHERHOST) {
+		skb->pkt_type = PACKET_HOST;
+		nf_bridge->mask |= BRNF_PKT_TYPE;
+	}
+
+	/* The physdev module checks on this */
+	nf_bridge->mask |= BRNF_BRIDGED;
+	nf_bridge->physoutdev = skb->dev;
+
+	NF_HOOK(pf, NF_IP_FORWARD, skb, bridge_parent(in),
+		bridge_parent(out), br_nf_forward_finish);
+
+	return NF_STOLEN;
+}
+
+static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb,
+   const struct net_device *in, const struct net_device *out,
+   int (*okfn)(struct sk_buff *))
+{
+	struct sk_buff *skb = *pskb;
+	struct vlan_ethhdr *hdr = vlan_eth_hdr(skb);
+	struct net_device **d = (struct net_device **)(skb->cb);
+
+#ifdef CONFIG_SYSCTL
+	if (!brnf_call_arptables)
+		return NF_ACCEPT;
+#endif
+
+	if (skb->protocol != __constant_htons(ETH_P_ARP)) {
+		if (!IS_VLAN_ARP)
+			return NF_ACCEPT;
+		skb_pull(*pskb, VLAN_HLEN);
+		(*pskb)->nh.raw += VLAN_HLEN;
+	}
+
+#ifdef CONFIG_NETFILTER_DEBUG
+	skb->nf_debug ^= (1 << NF_BR_FORWARD);
+#endif
+
+	if (skb->nh.arph->ar_pln != 4) {
+		if (IS_VLAN_ARP) {
+			skb_push(*pskb, VLAN_HLEN);
+			(*pskb)->nh.raw -= VLAN_HLEN;
+		}
+		return NF_ACCEPT;
+	}
+	*d = (struct net_device *)in;
+	NF_HOOK(NF_ARP, NF_ARP_FORWARD, skb, (struct net_device *)in,
+		(struct net_device *)out, br_nf_forward_finish);
+
+	return NF_STOLEN;
+}
+
+
+/* PF_BRIDGE/LOCAL_OUT ***********************************************/
+static int br_nf_local_out_finish(struct sk_buff *skb)
+{
+#ifdef CONFIG_NETFILTER_DEBUG
+	skb->nf_debug &= ~(1 << NF_BR_LOCAL_OUT);
+#endif
+	if (skb->protocol == __constant_htons(ETH_P_8021Q)) {
+		skb_push(skb, VLAN_HLEN);
+		skb->nh.raw -= VLAN_HLEN;
+	}
+
+	NF_HOOK_THRESH(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
+			br_forward_finish, NF_BR_PRI_FIRST + 1);
+
+	return 0;
+}
+
+/* This function sees both locally originated IP packets and forwarded
+ * IP packets (in both cases the destination device is a bridge
+ * device). It also sees bridged-and-DNAT'ed packets.
+ * To be able to filter on the physical bridge devices (with the physdev
+ * module), we steal packets destined to a bridge device away from the
+ * PF_INET/FORWARD and PF_INET/OUTPUT hook functions, and give them back later,
+ * when we have determined the real output device. This is done in here.
+ *
+ * If (nf_bridge->mask & BRNF_BRIDGED_DNAT) then the packet is bridged
+ * and we fake the PF_BRIDGE/FORWARD hook. The function br_nf_forward()
+ * will then fake the PF_INET/FORWARD hook. br_nf_local_out() has priority
+ * NF_BR_PRI_FIRST, so no relevant PF_BRIDGE/INPUT functions have been nor
+ * will be executed.
+ * Otherwise, if nf_bridge->physindev is NULL, the bridge-nf code never touched
+ * this packet before, and so the packet was locally originated. We fake
+ * the PF_INET/LOCAL_OUT hook.
+ * Finally, if nf_bridge->physindev isn't NULL, then the packet was IP routed,
+ * so we fake the PF_INET/FORWARD hook. ip_sabotage_out() makes sure
+ * even routed packets that didn't arrive on a bridge interface have their
+ * nf_bridge->physindev set. */
+static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb,
+   const struct net_device *in, const struct net_device *out,
+   int (*okfn)(struct sk_buff *))
+{
+	struct net_device *realindev, *realoutdev;
+	struct sk_buff *skb = *pskb;
+	struct nf_bridge_info *nf_bridge;
+	struct vlan_ethhdr *hdr = vlan_eth_hdr(skb);
+	int pf;
+
+	if (!skb->nf_bridge)
+		return NF_ACCEPT;
+
+	if (skb->protocol == __constant_htons(ETH_P_IP) || IS_VLAN_IP)
+		pf = PF_INET;
+	else
+		pf = PF_INET6;
+
+#ifdef CONFIG_NETFILTER_DEBUG
+	/* Sometimes we get packets with NULL ->dst here (for example,
+	 * running a dhcp client daemon triggers this). This should now
+	 * be fixed, but let's keep the check around. */
+	if (skb->dst == NULL) {
+		printk(KERN_CRIT "br_netfilter: skb->dst == NULL.");
+		return NF_ACCEPT;
+	}
+#endif
+
+	nf_bridge = skb->nf_bridge;
+	nf_bridge->physoutdev = skb->dev;
+	realindev = nf_bridge->physindev;
+
+	/* Bridged, take PF_BRIDGE/FORWARD.
+	 * (see big note in front of br_nf_pre_routing_finish) */
+	if (nf_bridge->mask & BRNF_BRIDGED_DNAT) {
+		if (nf_bridge->mask & BRNF_PKT_TYPE) {
+			skb->pkt_type = PACKET_OTHERHOST;
+			nf_bridge->mask ^= BRNF_PKT_TYPE;
+		}
+		if (skb->protocol == __constant_htons(ETH_P_8021Q)) {
+			skb_push(skb, VLAN_HLEN);
+			skb->nh.raw -= VLAN_HLEN;
+		}
+
+		NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev,
+			skb->dev, br_forward_finish);
+		goto out;
+	}
+	realoutdev = bridge_parent(skb->dev);
+
+#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+	/* iptables should match -o br0.x */
+	if (nf_bridge->netoutdev)
+		realoutdev = nf_bridge->netoutdev;
+#endif
+	if (skb->protocol == __constant_htons(ETH_P_8021Q)) {
+		skb_pull(skb, VLAN_HLEN);
+		(*pskb)->nh.raw += VLAN_HLEN;
+	}
+	/* IP forwarded traffic has a physindev, locally
+	 * generated traffic hasn't. */
+	if (realindev != NULL) {
+		if (!(nf_bridge->mask & BRNF_DONT_TAKE_PARENT) &&
+		    has_bridge_parent(realindev))
+			realindev = bridge_parent(realindev);
+
+		NF_HOOK_THRESH(pf, NF_IP_FORWARD, skb, realindev,
+			       realoutdev, br_nf_local_out_finish,
+			       NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD + 1);
+	} else {
+#ifdef CONFIG_NETFILTER_DEBUG
+		skb->nf_debug ^= (1 << NF_IP_LOCAL_OUT);
+#endif
+
+		NF_HOOK_THRESH(pf, NF_IP_LOCAL_OUT, skb, realindev,
+			       realoutdev, br_nf_local_out_finish,
+			       NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT + 1);
+	}
+
+out:
+	return NF_STOLEN;
+}
+
+
+/* PF_BRIDGE/POST_ROUTING ********************************************/
+static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
+   const struct net_device *in, const struct net_device *out,
+   int (*okfn)(struct sk_buff *))
+{
+	struct sk_buff *skb = *pskb;
+	struct nf_bridge_info *nf_bridge = (*pskb)->nf_bridge;
+	struct vlan_ethhdr *hdr = vlan_eth_hdr(skb);
+	struct net_device *realoutdev = bridge_parent(skb->dev);
+	int pf;
+
+#ifdef CONFIG_NETFILTER_DEBUG
+	/* Be very paranoid. This probably won't happen anymore, but let's
+	 * keep the check just to be sure... */
+	if (skb->mac.raw < skb->head || skb->mac.raw + ETH_HLEN > skb->data) {
+		printk(KERN_CRIT "br_netfilter: Argh!! br_nf_post_routing: "
+				 "bad mac.raw pointer.");
+		goto print_error;
+	}
+#endif
+
+	if (!nf_bridge)
+		return NF_ACCEPT;
+
+	if (skb->protocol == __constant_htons(ETH_P_IP) || IS_VLAN_IP)
+		pf = PF_INET;
+	else
+		pf = PF_INET6;
+
+#ifdef CONFIG_NETFILTER_DEBUG
+	if (skb->dst == NULL) {
+		printk(KERN_CRIT "br_netfilter: skb->dst == NULL.");
+		goto print_error;
+	}
+
+	skb->nf_debug ^= (1 << NF_IP_POST_ROUTING);
+#endif
+
+	/* We assume any code from br_dev_queue_push_xmit onwards doesn't care
+	 * about the value of skb->pkt_type. */
+	if (skb->pkt_type == PACKET_OTHERHOST) {
+		skb->pkt_type = PACKET_HOST;
+		nf_bridge->mask |= BRNF_PKT_TYPE;
+	}
+
+	if (skb->protocol == __constant_htons(ETH_P_8021Q)) {
+		skb_pull(skb, VLAN_HLEN);
+		skb->nh.raw += VLAN_HLEN;
+	}
+
+	nf_bridge_save_header(skb);
+
+#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+	if (nf_bridge->netoutdev)
+		realoutdev = nf_bridge->netoutdev;
+#endif
+	NF_HOOK(pf, NF_IP_POST_ROUTING, skb, NULL, realoutdev,
+	        br_dev_queue_push_xmit);
+
+	return NF_STOLEN;
+
+#ifdef CONFIG_NETFILTER_DEBUG
+print_error:
+	if (skb->dev != NULL) {
+		printk("[%s]", skb->dev->name);
+		if (has_bridge_parent(skb->dev))
+			printk("[%s]", bridge_parent(skb->dev)->name);
+	}
+	printk(" head:%p, raw:%p, data:%p\n", skb->head, skb->mac.raw,
+					      skb->data);
+	return NF_ACCEPT;
+#endif
+}
+
+
+/* IP/SABOTAGE *****************************************************/
+/* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING
+ * for the second time. */
+static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff **pskb,
+   const struct net_device *in, const struct net_device *out,
+   int (*okfn)(struct sk_buff *))
+{
+	if ((*pskb)->nf_bridge &&
+	    !((*pskb)->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) {
+		return NF_STOP;
+	}
+
+	return NF_ACCEPT;
+}
+
+/* Postpone execution of PF_INET(6)/FORWARD, PF_INET(6)/LOCAL_OUT
+ * and PF_INET(6)/POST_ROUTING until we have done the forwarding
+ * decision in the bridge code and have determined nf_bridge->physoutdev. */
+static unsigned int ip_sabotage_out(unsigned int hook, struct sk_buff **pskb,
+   const struct net_device *in, const struct net_device *out,
+   int (*okfn)(struct sk_buff *))
+{
+	struct sk_buff *skb = *pskb;
+
+	if ((out->hard_start_xmit == br_dev_xmit &&
+	    okfn != br_nf_forward_finish &&
+	    okfn != br_nf_local_out_finish &&
+	    okfn != br_dev_queue_push_xmit)
+#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+	    || ((out->priv_flags & IFF_802_1Q_VLAN) &&
+	    VLAN_DEV_INFO(out)->real_dev->hard_start_xmit == br_dev_xmit)
+#endif
+	    ) {
+		struct nf_bridge_info *nf_bridge;
+
+		if (!skb->nf_bridge) {
+#ifdef CONFIG_SYSCTL
+			/* This code is executed while in the IP(v6) stack,
+			   the version should be 4 or 6. We can't use
+			   skb->protocol because that isn't set on
+			   PF_INET(6)/LOCAL_OUT. */
+			struct iphdr *ip = skb->nh.iph;
+
+			if (ip->version == 4 && !brnf_call_iptables)
+				return NF_ACCEPT;
+			else if (ip->version == 6 && !brnf_call_ip6tables)
+				return NF_ACCEPT;
+#endif
+			if (hook == NF_IP_POST_ROUTING)
+				return NF_ACCEPT;
+			if (!nf_bridge_alloc(skb))
+				return NF_DROP;
+		}
+
+		nf_bridge = skb->nf_bridge;
+
+		/* This frame will arrive on PF_BRIDGE/LOCAL_OUT and we
+		 * will need the indev then. For a brouter, the real indev
+		 * can be a bridge port, so we make sure br_nf_local_out()
+		 * doesn't use the bridge parent of the indev by using
+		 * the BRNF_DONT_TAKE_PARENT mask. */
+		if (hook == NF_IP_FORWARD && nf_bridge->physindev == NULL) {
+			nf_bridge->mask &= BRNF_DONT_TAKE_PARENT;
+			nf_bridge->physindev = (struct net_device *)in;
+		}
+#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+		/* the iptables outdev is br0.x, not br0 */
+		if (out->priv_flags & IFF_802_1Q_VLAN)
+			nf_bridge->netoutdev = (struct net_device *)out;
+#endif
+		return NF_STOP;
+	}
+
+	return NF_ACCEPT;
+}
+
+/* For br_nf_local_out we need (prio = NF_BR_PRI_FIRST), to insure that innocent
+ * PF_BRIDGE/NF_BR_LOCAL_OUT functions don't get bridged traffic as input.
+ * For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
+ * ip_refrag() can return NF_STOLEN. */
+static struct nf_hook_ops br_nf_ops[] = {
+	{ .hook = br_nf_pre_routing, 
+	  .owner = THIS_MODULE, 
+	  .pf = PF_BRIDGE, 
+	  .hooknum = NF_BR_PRE_ROUTING, 
+	  .priority = NF_BR_PRI_BRNF, },
+	{ .hook = br_nf_local_in,
+	  .owner = THIS_MODULE,
+	  .pf = PF_BRIDGE,
+	  .hooknum = NF_BR_LOCAL_IN,
+	  .priority = NF_BR_PRI_BRNF, },
+	{ .hook = br_nf_forward_ip,
+	  .owner = THIS_MODULE,
+	  .pf = PF_BRIDGE,
+	  .hooknum = NF_BR_FORWARD,
+	  .priority = NF_BR_PRI_BRNF - 1, },
+	{ .hook = br_nf_forward_arp,
+	  .owner = THIS_MODULE,
+	  .pf = PF_BRIDGE,
+	  .hooknum = NF_BR_FORWARD,
+	  .priority = NF_BR_PRI_BRNF, },
+	{ .hook = br_nf_local_out,
+	  .owner = THIS_MODULE,
+	  .pf = PF_BRIDGE,
+	  .hooknum = NF_BR_LOCAL_OUT,
+	  .priority = NF_BR_PRI_FIRST, },
+	{ .hook = br_nf_post_routing,
+	  .owner = THIS_MODULE,
+	  .pf = PF_BRIDGE,
+	  .hooknum = NF_BR_POST_ROUTING,
+	  .priority = NF_BR_PRI_LAST, },
+	{ .hook = ip_sabotage_in,
+	  .owner = THIS_MODULE,
+	  .pf = PF_INET,
+	  .hooknum = NF_IP_PRE_ROUTING,
+	  .priority = NF_IP_PRI_FIRST, },
+	{ .hook = ip_sabotage_in,
+	  .owner = THIS_MODULE,
+	  .pf = PF_INET6,
+	  .hooknum = NF_IP6_PRE_ROUTING,
+	  .priority = NF_IP6_PRI_FIRST, },
+	{ .hook = ip_sabotage_out,
+	  .owner = THIS_MODULE,
+	  .pf = PF_INET,
+	  .hooknum = NF_IP_FORWARD,
+	  .priority = NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD, },
+	{ .hook = ip_sabotage_out,
+	  .owner = THIS_MODULE,
+	  .pf = PF_INET6,
+	  .hooknum = NF_IP6_FORWARD,
+	  .priority = NF_IP6_PRI_BRIDGE_SABOTAGE_FORWARD, },
+	{ .hook = ip_sabotage_out,
+	  .owner = THIS_MODULE,
+	  .pf = PF_INET,
+	  .hooknum = NF_IP_LOCAL_OUT,
+	  .priority = NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT, },
+	{ .hook = ip_sabotage_out,
+	  .owner = THIS_MODULE,
+	  .pf = PF_INET6,
+	  .hooknum = NF_IP6_LOCAL_OUT,
+	  .priority = NF_IP6_PRI_BRIDGE_SABOTAGE_LOCAL_OUT, },
+	{ .hook = ip_sabotage_out,
+	  .owner = THIS_MODULE,
+	  .pf = PF_INET,
+	  .hooknum = NF_IP_POST_ROUTING,
+	  .priority = NF_IP_PRI_FIRST, },
+	{ .hook = ip_sabotage_out,
+	  .owner = THIS_MODULE,
+	  .pf = PF_INET6,
+	  .hooknum = NF_IP6_POST_ROUTING,
+	  .priority = NF_IP6_PRI_FIRST, },
+};
+
+#ifdef CONFIG_SYSCTL
+static
+int brnf_sysctl_call_tables(ctl_table *ctl, int write, struct file * filp,
+			void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int ret;
+
+	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+
+	if (write && *(int *)(ctl->data))
+		*(int *)(ctl->data) = 1;
+	return ret;
+}
+
+static ctl_table brnf_table[] = {
+	{
+		.ctl_name	= NET_BRIDGE_NF_CALL_ARPTABLES,
+		.procname	= "bridge-nf-call-arptables",
+		.data		= &brnf_call_arptables,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &brnf_sysctl_call_tables,
+	},
+	{
+		.ctl_name	= NET_BRIDGE_NF_CALL_IPTABLES,
+		.procname	= "bridge-nf-call-iptables",
+		.data		= &brnf_call_iptables,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &brnf_sysctl_call_tables,
+	},
+	{
+		.ctl_name	= NET_BRIDGE_NF_CALL_IP6TABLES,
+		.procname	= "bridge-nf-call-ip6tables",
+		.data		= &brnf_call_ip6tables,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &brnf_sysctl_call_tables,
+	},
+	{
+		.ctl_name	= NET_BRIDGE_NF_FILTER_VLAN_TAGGED,
+		.procname	= "bridge-nf-filter-vlan-tagged",
+		.data		= &brnf_filter_vlan_tagged,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &brnf_sysctl_call_tables,
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table brnf_bridge_table[] = {
+	{
+		.ctl_name	= NET_BRIDGE,
+		.procname	= "bridge",
+		.mode		= 0555,
+		.child		= brnf_table,
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table brnf_net_table[] = {
+	{
+		.ctl_name	= CTL_NET,
+		.procname	= "net",
+		.mode		= 0555,
+		.child		= brnf_bridge_table,
+	},
+	{ .ctl_name = 0 }
+};
+#endif
+
+int br_netfilter_init(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(br_nf_ops); i++) {
+		int ret;
+
+		if ((ret = nf_register_hook(&br_nf_ops[i])) >= 0)
+			continue;
+
+		while (i--)
+			nf_unregister_hook(&br_nf_ops[i]);
+
+		return ret;
+	}
+
+#ifdef CONFIG_SYSCTL
+	brnf_sysctl_header = register_sysctl_table(brnf_net_table, 0);
+	if (brnf_sysctl_header == NULL) {
+		printk(KERN_WARNING "br_netfilter: can't register to sysctl.\n");
+		for (i = 0; i < ARRAY_SIZE(br_nf_ops); i++)
+			nf_unregister_hook(&br_nf_ops[i]);
+		return -EFAULT;
+	}
+#endif
+
+	printk(KERN_NOTICE "Bridge firewalling registered\n");
+
+	return 0;
+}
+
+void br_netfilter_fini(void)
+{
+	int i;
+
+	for (i = ARRAY_SIZE(br_nf_ops) - 1; i >= 0; i--)
+		nf_unregister_hook(&br_nf_ops[i]);
+#ifdef CONFIG_SYSCTL
+	unregister_sysctl_table(brnf_sysctl_header);
+#endif
+}
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
new file mode 100644
index 00000000000..f8fb49e3476
--- /dev/null
+++ b/net/bridge/br_notify.c
@@ -0,0 +1,87 @@
+/*
+ *	Device event handling
+ *	Linux ethernet bridge
+ *
+ *	Authors:
+ *	Lennert Buytenhek		<buytenh@gnu.org>
+ *
+ *	$Id: br_notify.c,v 1.2 2000/02/21 15:51:34 davem Exp $
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+
+#include "br_private.h"
+
+static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr);
+
+struct notifier_block br_device_notifier = {
+	.notifier_call = br_device_event
+};
+
+/*
+ * Handle changes in state of network devices enslaved to a bridge.
+ * 
+ * Note: don't care about up/down if bridge itself is down, because
+ *     port state is checked when bridge is brought up.
+ */
+static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr)
+{
+	struct net_device *dev = ptr;
+	struct net_bridge_port *p = dev->br_port;
+	struct net_bridge *br;
+
+	/* not a port of a bridge */
+	if (p == NULL)
+		return NOTIFY_DONE;
+
+	br = p->br;
+
+	spin_lock_bh(&br->lock);
+	switch (event) {
+	case NETDEV_CHANGEMTU:
+		dev_set_mtu(br->dev, br_min_mtu(br));
+		break;
+
+	case NETDEV_CHANGEADDR:
+		br_fdb_changeaddr(p, dev->dev_addr);
+		br_stp_recalculate_bridge_id(br);
+		break;
+
+	case NETDEV_CHANGE:	/* device is up but carrier changed */
+		if (!(br->dev->flags & IFF_UP))
+			break;
+
+		if (netif_carrier_ok(dev)) {
+			if (p->state == BR_STATE_DISABLED)
+				br_stp_enable_port(p);
+		} else {
+			if (p->state != BR_STATE_DISABLED)
+				br_stp_disable_port(p);
+		}
+		break;
+
+	case NETDEV_DOWN:
+		if (br->dev->flags & IFF_UP)
+			br_stp_disable_port(p);
+		break;
+
+	case NETDEV_UP:
+		if (netif_carrier_ok(dev) && (br->dev->flags & IFF_UP)) 
+			br_stp_enable_port(p);
+		break;
+
+	case NETDEV_UNREGISTER:
+		spin_unlock_bh(&br->lock);
+		br_del_if(br, dev);
+		goto done;
+	} 
+	spin_unlock_bh(&br->lock);
+
+ done:
+	return NOTIFY_DONE;
+}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
new file mode 100644
index 00000000000..54d63f1372a
--- /dev/null
+++ b/net/bridge/br_private.h
@@ -0,0 +1,244 @@
+/*
+ *	Linux ethernet bridge
+ *
+ *	Authors:
+ *	Lennert Buytenhek		<buytenh@gnu.org>
+ *
+ *	$Id: br_private.h,v 1.7 2001/12/24 00:59:55 davem Exp $
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _BR_PRIVATE_H
+#define _BR_PRIVATE_H
+
+#include <linux/netdevice.h>
+#include <linux/miscdevice.h>
+#include <linux/if_bridge.h>
+
+#define BR_HASH_BITS 8
+#define BR_HASH_SIZE (1 << BR_HASH_BITS)
+
+#define BR_HOLD_TIME (1*HZ)
+
+#define BR_PORT_BITS	10
+#define BR_MAX_PORTS	(1<<BR_PORT_BITS)
+
+typedef struct bridge_id bridge_id;
+typedef struct mac_addr mac_addr;
+typedef __u16 port_id;
+
+struct bridge_id
+{
+	unsigned char	prio[2];
+	unsigned char	addr[6];
+};
+
+struct mac_addr
+{
+	unsigned char	addr[6];
+};
+
+struct net_bridge_fdb_entry
+{
+	struct hlist_node		hlist;
+	struct net_bridge_port		*dst;
+
+	struct rcu_head			rcu;
+	atomic_t			use_count;
+	unsigned long			ageing_timer;
+	mac_addr			addr;
+	unsigned char			is_local;
+	unsigned char			is_static;
+};
+
+struct net_bridge_port
+{
+	struct net_bridge		*br;
+	struct net_device		*dev;
+	struct list_head		list;
+
+	/* STP */
+	u8				priority;
+	u8				state;
+	u16				port_no;
+	unsigned char			topology_change_ack;
+	unsigned char			config_pending;
+	port_id				port_id;
+	port_id				designated_port;
+	bridge_id			designated_root;
+	bridge_id			designated_bridge;
+	u32				path_cost;
+	u32				designated_cost;
+
+	struct timer_list		forward_delay_timer;
+	struct timer_list		hold_timer;
+	struct timer_list		message_age_timer;
+	struct kobject			kobj;
+	struct rcu_head			rcu;
+};
+
+struct net_bridge
+{
+	spinlock_t			lock;
+	struct list_head		port_list;
+	struct net_device		*dev;
+	struct net_device_stats		statistics;
+	spinlock_t			hash_lock;
+	struct hlist_head		hash[BR_HASH_SIZE];
+	struct list_head		age_list;
+
+	/* STP */
+	bridge_id			designated_root;
+	bridge_id			bridge_id;
+	u32				root_path_cost;
+	unsigned long			max_age;
+	unsigned long			hello_time;
+	unsigned long			forward_delay;
+	unsigned long			bridge_max_age;
+	unsigned long			ageing_time;
+	unsigned long			bridge_hello_time;
+	unsigned long			bridge_forward_delay;
+
+	u16				root_port;
+	unsigned char			stp_enabled;
+	unsigned char			topology_change;
+	unsigned char			topology_change_detected;
+
+	struct timer_list		hello_timer;
+	struct timer_list		tcn_timer;
+	struct timer_list		topology_change_timer;
+	struct timer_list		gc_timer;
+	struct kobject			ifobj;
+};
+
+extern struct notifier_block br_device_notifier;
+extern const unsigned char bridge_ula[6];
+
+/* called under bridge lock */
+static inline int br_is_root_bridge(const struct net_bridge *br)
+{
+	return !memcmp(&br->bridge_id, &br->designated_root, 8);
+}
+
+
+/* br_device.c */
+extern void br_dev_setup(struct net_device *dev);
+extern int br_dev_xmit(struct sk_buff *skb, struct net_device *dev);
+
+/* br_fdb.c */
+extern void br_fdb_init(void);
+extern void br_fdb_fini(void);
+extern void br_fdb_changeaddr(struct net_bridge_port *p,
+			      const unsigned char *newaddr);
+extern void br_fdb_cleanup(unsigned long arg);
+extern void br_fdb_delete_by_port(struct net_bridge *br,
+			   struct net_bridge_port *p);
+extern struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
+						 const unsigned char *addr);
+extern struct net_bridge_fdb_entry *br_fdb_get(struct net_bridge *br,
+					       unsigned char *addr);
+extern void br_fdb_put(struct net_bridge_fdb_entry *ent);
+extern int br_fdb_fillbuf(struct net_bridge *br, void *buf, 
+			  unsigned long count, unsigned long off);
+extern int br_fdb_insert(struct net_bridge *br,
+			 struct net_bridge_port *source,
+			 const unsigned char *addr);
+extern void br_fdb_update(struct net_bridge *br,
+			  struct net_bridge_port *source,
+			  const unsigned char *addr);
+
+/* br_forward.c */
+extern void br_deliver(const struct net_bridge_port *to,
+		struct sk_buff *skb);
+extern int br_dev_queue_push_xmit(struct sk_buff *skb);
+extern void br_forward(const struct net_bridge_port *to,
+		struct sk_buff *skb);
+extern int br_forward_finish(struct sk_buff *skb);
+extern void br_flood_deliver(struct net_bridge *br,
+		      struct sk_buff *skb,
+		      int clone);
+extern void br_flood_forward(struct net_bridge *br,
+		      struct sk_buff *skb,
+		      int clone);
+
+/* br_if.c */
+extern int br_add_bridge(const char *name);
+extern int br_del_bridge(const char *name);
+extern void br_cleanup_bridges(void);
+extern int br_add_if(struct net_bridge *br,
+	      struct net_device *dev);
+extern int br_del_if(struct net_bridge *br,
+	      struct net_device *dev);
+extern int br_min_mtu(const struct net_bridge *br);
+
+/* br_input.c */
+extern int br_handle_frame_finish(struct sk_buff *skb);
+extern int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb);
+
+/* br_ioctl.c */
+extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
+extern int br_ioctl_deviceless_stub(unsigned int cmd, void __user *arg);
+
+/* br_netfilter.c */
+extern int br_netfilter_init(void);
+extern void br_netfilter_fini(void);
+
+/* br_stp.c */
+extern void br_log_state(const struct net_bridge_port *p);
+extern struct net_bridge_port *br_get_port(struct net_bridge *br,
+				    	   u16 port_no);
+extern void br_init_port(struct net_bridge_port *p);
+extern void br_become_designated_port(struct net_bridge_port *p);
+
+/* br_stp_if.c */
+extern void br_stp_enable_bridge(struct net_bridge *br);
+extern void br_stp_disable_bridge(struct net_bridge *br);
+extern void br_stp_enable_port(struct net_bridge_port *p);
+extern void br_stp_disable_port(struct net_bridge_port *p);
+extern void br_stp_recalculate_bridge_id(struct net_bridge *br);
+extern void br_stp_set_bridge_priority(struct net_bridge *br,
+				       u16 newprio);
+extern void br_stp_set_port_priority(struct net_bridge_port *p,
+				     u8 newprio);
+extern void br_stp_set_path_cost(struct net_bridge_port *p,
+				 u32 path_cost);
+extern ssize_t br_show_bridge_id(char *buf, const struct bridge_id *id);
+
+/* br_stp_bpdu.c */
+extern int br_stp_handle_bpdu(struct sk_buff *skb);
+
+/* br_stp_timer.c */
+extern void br_stp_timer_init(struct net_bridge *br);
+extern void br_stp_port_timer_init(struct net_bridge_port *p);
+extern unsigned long br_timer_value(const struct timer_list *timer);
+
+/* br.c */
+extern struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
+						       unsigned char *addr);
+extern void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
+
+
+#ifdef CONFIG_SYSFS
+/* br_sysfs_if.c */
+extern int br_sysfs_addif(struct net_bridge_port *p);
+extern void br_sysfs_removeif(struct net_bridge_port *p);
+extern void br_sysfs_freeif(struct net_bridge_port *p);
+
+/* br_sysfs_br.c */
+extern int br_sysfs_addbr(struct net_device *dev);
+extern void br_sysfs_delbr(struct net_device *dev);
+
+#else
+
+#define br_sysfs_addif(p)	(0)
+#define br_sysfs_removeif(p)	do { } while(0)
+#define br_sysfs_freeif(p)	kfree(p)
+#define br_sysfs_addbr(dev)	(0)
+#define br_sysfs_delbr(dev)	do { } while(0)
+#endif /* CONFIG_SYSFS */
+
+#endif
diff --git a/net/bridge/br_private_stp.h b/net/bridge/br_private_stp.h
new file mode 100644
index 00000000000..e29f01ac1ad
--- /dev/null
+++ b/net/bridge/br_private_stp.h
@@ -0,0 +1,58 @@
+/*
+ *	Linux ethernet bridge
+ *
+ *	Authors:
+ *	Lennert Buytenhek		<buytenh@gnu.org>
+ *
+ *	$Id: br_private_stp.h,v 1.3 2001/02/05 06:03:47 davem Exp $
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _BR_PRIVATE_STP_H
+#define _BR_PRIVATE_STP_H
+
+#define BPDU_TYPE_CONFIG 0
+#define BPDU_TYPE_TCN 0x80
+
+struct br_config_bpdu
+{
+	unsigned	topology_change:1;
+	unsigned	topology_change_ack:1;
+	bridge_id	root;
+	int		root_path_cost;
+	bridge_id	bridge_id;
+	port_id		port_id;
+	int		message_age;
+	int		max_age;
+	int		hello_time;
+	int		forward_delay;
+};
+
+/* called under bridge lock */
+static inline int br_is_designated_port(const struct net_bridge_port *p)
+{
+	return !memcmp(&p->designated_bridge, &p->br->bridge_id, 8) &&
+		(p->designated_port == p->port_id);
+}
+
+
+/* br_stp.c */
+extern void br_become_root_bridge(struct net_bridge *br);
+extern void br_config_bpdu_generation(struct net_bridge *);
+extern void br_configuration_update(struct net_bridge *);
+extern void br_port_state_selection(struct net_bridge *);
+extern void br_received_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *bpdu);
+extern void br_received_tcn_bpdu(struct net_bridge_port *p);
+extern void br_transmit_config(struct net_bridge_port *p);
+extern void br_transmit_tcn(struct net_bridge *br);
+extern void br_topology_change_detection(struct net_bridge *br);
+
+/* br_stp_bpdu.c */
+extern void br_send_config_bpdu(struct net_bridge_port *, struct br_config_bpdu *);
+extern void br_send_tcn_bpdu(struct net_bridge_port *);
+
+#endif
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
new file mode 100644
index 00000000000..04ca0639a95
--- /dev/null
+++ b/net/bridge/br_stp.c
@@ -0,0 +1,459 @@
+/*
+ *	Spanning tree protocol; generic parts
+ *	Linux ethernet bridge
+ *
+ *	Authors:
+ *	Lennert Buytenhek		<buytenh@gnu.org>
+ *
+ *	$Id: br_stp.c,v 1.4 2000/06/19 10:13:35 davem Exp $
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/smp_lock.h>
+
+#include "br_private.h"
+#include "br_private_stp.h"
+
+/* since time values in bpdu are in jiffies and then scaled (1/256)
+ * before sending, make sure that is at least one.
+ */
+#define MESSAGE_AGE_INCR	((HZ < 256) ? 1 : (HZ/256))
+
+static const char *br_port_state_names[] = {
+	[BR_STATE_DISABLED] = "disabled", 
+	[BR_STATE_LISTENING] = "listening",
+	[BR_STATE_LEARNING] = "learning", 
+	[BR_STATE_FORWARDING] = "forwarding", 
+	[BR_STATE_BLOCKING] = "blocking",
+};
+
+void br_log_state(const struct net_bridge_port *p)
+{
+	pr_info("%s: port %d(%s) entering %s state\n",
+		p->br->dev->name, p->port_no, p->dev->name, 
+		br_port_state_names[p->state]);
+
+}
+
+/* called under bridge lock */
+struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no)
+{
+	struct net_bridge_port *p;
+
+	list_for_each_entry_rcu(p, &br->port_list, list) {
+		if (p->port_no == port_no)
+			return p;
+	}
+
+	return NULL;
+}
+
+/* called under bridge lock */
+static int br_should_become_root_port(const struct net_bridge_port *p, 
+				      u16 root_port)
+{
+	struct net_bridge *br;
+	struct net_bridge_port *rp;
+	int t;
+
+	br = p->br;
+	if (p->state == BR_STATE_DISABLED ||
+	    br_is_designated_port(p))
+		return 0;
+
+	if (memcmp(&br->bridge_id, &p->designated_root, 8) <= 0)
+		return 0;
+
+	if (!root_port)
+		return 1;
+
+	rp = br_get_port(br, root_port);
+
+	t = memcmp(&p->designated_root, &rp->designated_root, 8);
+	if (t < 0)
+		return 1;
+	else if (t > 0)
+		return 0;
+
+	if (p->designated_cost + p->path_cost <
+	    rp->designated_cost + rp->path_cost)
+		return 1;
+	else if (p->designated_cost + p->path_cost >
+		 rp->designated_cost + rp->path_cost)
+		return 0;
+
+	t = memcmp(&p->designated_bridge, &rp->designated_bridge, 8);
+	if (t < 0)
+		return 1;
+	else if (t > 0)
+		return 0;
+
+	if (p->designated_port < rp->designated_port)
+		return 1;
+	else if (p->designated_port > rp->designated_port)
+		return 0;
+
+	if (p->port_id < rp->port_id)
+		return 1;
+
+	return 0;
+}
+
+/* called under bridge lock */
+static void br_root_selection(struct net_bridge *br)
+{
+	struct net_bridge_port *p;
+	u16 root_port = 0;
+
+	list_for_each_entry(p, &br->port_list, list) {
+		if (br_should_become_root_port(p, root_port))
+			root_port = p->port_no;
+
+	}
+
+	br->root_port = root_port;
+
+	if (!root_port) {
+		br->designated_root = br->bridge_id;
+		br->root_path_cost = 0;
+	} else {
+		p = br_get_port(br, root_port);
+		br->designated_root = p->designated_root;
+		br->root_path_cost = p->designated_cost + p->path_cost;
+	}
+}
+
+/* called under bridge lock */
+void br_become_root_bridge(struct net_bridge *br)
+{
+	br->max_age = br->bridge_max_age;
+	br->hello_time = br->bridge_hello_time;
+	br->forward_delay = br->bridge_forward_delay;
+	br_topology_change_detection(br);
+	del_timer(&br->tcn_timer);
+
+	if (br->dev->flags & IFF_UP) {
+		br_config_bpdu_generation(br);
+		mod_timer(&br->hello_timer, jiffies + br->hello_time);
+	}
+}
+
+/* called under bridge lock */
+void br_transmit_config(struct net_bridge_port *p)
+{
+	struct br_config_bpdu bpdu;
+	struct net_bridge *br;
+
+
+	if (timer_pending(&p->hold_timer)) {
+		p->config_pending = 1;
+		return;
+	}
+
+	br = p->br;
+
+	bpdu.topology_change = br->topology_change;
+	bpdu.topology_change_ack = p->topology_change_ack;
+	bpdu.root = br->designated_root;
+	bpdu.root_path_cost = br->root_path_cost;
+	bpdu.bridge_id = br->bridge_id;
+	bpdu.port_id = p->port_id;
+	if (br_is_root_bridge(br))
+		bpdu.message_age = 0;
+	else {
+		struct net_bridge_port *root
+			= br_get_port(br, br->root_port);
+		bpdu.message_age = br->max_age
+			- (root->message_age_timer.expires - jiffies)
+			+ MESSAGE_AGE_INCR;
+	}
+	bpdu.max_age = br->max_age;
+	bpdu.hello_time = br->hello_time;
+	bpdu.forward_delay = br->forward_delay;
+
+	if (bpdu.message_age < br->max_age) {
+		br_send_config_bpdu(p, &bpdu);
+		p->topology_change_ack = 0;
+		p->config_pending = 0;
+		mod_timer(&p->hold_timer, jiffies + BR_HOLD_TIME);
+	}
+}
+
+/* called under bridge lock */
+static inline void br_record_config_information(struct net_bridge_port *p, 
+						const struct br_config_bpdu *bpdu)
+{
+	p->designated_root = bpdu->root;
+	p->designated_cost = bpdu->root_path_cost;
+	p->designated_bridge = bpdu->bridge_id;
+	p->designated_port = bpdu->port_id;
+
+	mod_timer(&p->message_age_timer, jiffies 
+		  + (p->br->max_age - bpdu->message_age));
+}
+
+/* called under bridge lock */
+static inline void br_record_config_timeout_values(struct net_bridge *br, 
+					    const struct br_config_bpdu *bpdu)
+{
+	br->max_age = bpdu->max_age;
+	br->hello_time = bpdu->hello_time;
+	br->forward_delay = bpdu->forward_delay;
+	br->topology_change = bpdu->topology_change;
+}
+
+/* called under bridge lock */
+void br_transmit_tcn(struct net_bridge *br)
+{
+	br_send_tcn_bpdu(br_get_port(br, br->root_port));
+}
+
+/* called under bridge lock */
+static int br_should_become_designated_port(const struct net_bridge_port *p)
+{
+	struct net_bridge *br;
+	int t;
+
+	br = p->br;
+	if (br_is_designated_port(p))
+		return 1;
+
+	if (memcmp(&p->designated_root, &br->designated_root, 8))
+		return 1;
+
+	if (br->root_path_cost < p->designated_cost)
+		return 1;
+	else if (br->root_path_cost > p->designated_cost)
+		return 0;
+
+	t = memcmp(&br->bridge_id, &p->designated_bridge, 8);
+	if (t < 0)
+		return 1;
+	else if (t > 0)
+		return 0;
+
+	if (p->port_id < p->designated_port)
+		return 1;
+
+	return 0;
+}
+
+/* called under bridge lock */
+static void br_designated_port_selection(struct net_bridge *br)
+{
+	struct net_bridge_port *p;
+
+	list_for_each_entry(p, &br->port_list, list) {
+		if (p->state != BR_STATE_DISABLED &&
+		    br_should_become_designated_port(p))
+			br_become_designated_port(p);
+
+	}
+}
+
+/* called under bridge lock */
+static int br_supersedes_port_info(struct net_bridge_port *p, struct br_config_bpdu *bpdu)
+{
+	int t;
+
+	t = memcmp(&bpdu->root, &p->designated_root, 8);
+	if (t < 0)
+		return 1;
+	else if (t > 0)
+		return 0;
+
+	if (bpdu->root_path_cost < p->designated_cost)
+		return 1;
+	else if (bpdu->root_path_cost > p->designated_cost)
+		return 0;
+
+	t = memcmp(&bpdu->bridge_id, &p->designated_bridge, 8);
+	if (t < 0)
+		return 1;
+	else if (t > 0)
+		return 0;
+
+	if (memcmp(&bpdu->bridge_id, &p->br->bridge_id, 8))
+		return 1;
+
+	if (bpdu->port_id <= p->designated_port)
+		return 1;
+
+	return 0;
+}
+
+/* called under bridge lock */
+static inline void br_topology_change_acknowledged(struct net_bridge *br)
+{
+	br->topology_change_detected = 0;
+	del_timer(&br->tcn_timer);
+}
+
+/* called under bridge lock */
+void br_topology_change_detection(struct net_bridge *br)
+{
+	int isroot = br_is_root_bridge(br);
+
+	pr_info("%s: topology change detected, %s\n", br->dev->name,
+		isroot ? "propagating" : "sending tcn bpdu");
+
+	if (isroot) {
+		br->topology_change = 1;
+		mod_timer(&br->topology_change_timer, jiffies
+			  + br->bridge_forward_delay + br->bridge_max_age);
+	} else if (!br->topology_change_detected) {
+		br_transmit_tcn(br);
+		mod_timer(&br->tcn_timer, jiffies + br->bridge_hello_time);
+	}
+
+	br->topology_change_detected = 1;
+}
+
+/* called under bridge lock */
+void br_config_bpdu_generation(struct net_bridge *br)
+{
+	struct net_bridge_port *p;
+
+	list_for_each_entry(p, &br->port_list, list) {
+		if (p->state != BR_STATE_DISABLED &&
+		    br_is_designated_port(p))
+			br_transmit_config(p);
+	}
+}
+
+/* called under bridge lock */
+static inline void br_reply(struct net_bridge_port *p)
+{
+	br_transmit_config(p);
+}
+
+/* called under bridge lock */
+void br_configuration_update(struct net_bridge *br)
+{
+	br_root_selection(br);
+	br_designated_port_selection(br);
+}
+
+/* called under bridge lock */
+void br_become_designated_port(struct net_bridge_port *p)
+{
+	struct net_bridge *br;
+
+	br = p->br;
+	p->designated_root = br->designated_root;
+	p->designated_cost = br->root_path_cost;
+	p->designated_bridge = br->bridge_id;
+	p->designated_port = p->port_id;
+}
+
+
+/* called under bridge lock */
+static void br_make_blocking(struct net_bridge_port *p)
+{
+	if (p->state != BR_STATE_DISABLED &&
+	    p->state != BR_STATE_BLOCKING) {
+		if (p->state == BR_STATE_FORWARDING ||
+		    p->state == BR_STATE_LEARNING)
+			br_topology_change_detection(p->br);
+
+		p->state = BR_STATE_BLOCKING;
+		br_log_state(p);
+		del_timer(&p->forward_delay_timer);
+	}
+}
+
+/* called under bridge lock */
+static void br_make_forwarding(struct net_bridge_port *p)
+{
+	if (p->state == BR_STATE_BLOCKING) {
+		if (p->br->stp_enabled) {
+			p->state = BR_STATE_LISTENING;
+		} else {
+			p->state = BR_STATE_LEARNING;
+		}
+		br_log_state(p);
+		mod_timer(&p->forward_delay_timer, jiffies + p->br->forward_delay);	}
+}
+
+/* called under bridge lock */
+void br_port_state_selection(struct net_bridge *br)
+{
+	struct net_bridge_port *p;
+
+	list_for_each_entry(p, &br->port_list, list) {
+		if (p->state != BR_STATE_DISABLED) {
+			if (p->port_no == br->root_port) {
+				p->config_pending = 0;
+				p->topology_change_ack = 0;
+				br_make_forwarding(p);
+			} else if (br_is_designated_port(p)) {
+				del_timer(&p->message_age_timer);
+				br_make_forwarding(p);
+			} else {
+				p->config_pending = 0;
+				p->topology_change_ack = 0;
+				br_make_blocking(p);
+			}
+		}
+
+	}
+}
+
+/* called under bridge lock */
+static inline void br_topology_change_acknowledge(struct net_bridge_port *p)
+{
+	p->topology_change_ack = 1;
+	br_transmit_config(p);
+}
+
+/* called under bridge lock */
+void br_received_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *bpdu)
+{
+	struct net_bridge *br;
+	int was_root;
+ 
+	br = p->br;
+	was_root = br_is_root_bridge(br);
+
+	if (br_supersedes_port_info(p, bpdu)) {
+		br_record_config_information(p, bpdu);
+		br_configuration_update(br);
+		br_port_state_selection(br);
+
+		if (!br_is_root_bridge(br) && was_root) {
+			del_timer(&br->hello_timer);
+			if (br->topology_change_detected) {
+				del_timer(&br->topology_change_timer);
+				br_transmit_tcn(br);
+
+				mod_timer(&br->tcn_timer, 
+					  jiffies + br->bridge_hello_time);
+			}
+		}
+
+		if (p->port_no == br->root_port) {
+			br_record_config_timeout_values(br, bpdu);
+			br_config_bpdu_generation(br);
+			if (bpdu->topology_change_ack)
+				br_topology_change_acknowledged(br);
+		}
+	} else if (br_is_designated_port(p)) {		
+		br_reply(p);		
+	}
+}
+
+/* called under bridge lock */
+void br_received_tcn_bpdu(struct net_bridge_port *p)
+{
+	if (br_is_designated_port(p)) {
+		pr_info("%s: received tcn bpdu on port %i(%s)\n",
+		       p->br->dev->name, p->port_no, p->dev->name);
+
+		br_topology_change_detection(p->br);
+		br_topology_change_acknowledge(p);
+	}
+}
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
new file mode 100644
index 00000000000..b91a875aca0
--- /dev/null
+++ b/net/bridge/br_stp_bpdu.c
@@ -0,0 +1,205 @@
+/*
+ *	Spanning tree protocol; BPDU handling
+ *	Linux ethernet bridge
+ *
+ *	Authors:
+ *	Lennert Buytenhek		<buytenh@gnu.org>
+ *
+ *	$Id: br_stp_bpdu.c,v 1.3 2001/11/10 02:35:25 davem Exp $
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/netfilter_bridge.h>
+
+#include "br_private.h"
+#include "br_private_stp.h"
+
+#define JIFFIES_TO_TICKS(j) (((j) << 8) / HZ)
+#define TICKS_TO_JIFFIES(j) (((j) * HZ) >> 8)
+
+static void br_send_bpdu(struct net_bridge_port *p, unsigned char *data, int length)
+{
+	struct net_device *dev;
+	struct sk_buff *skb;
+	int size;
+
+	if (!p->br->stp_enabled)
+		return;
+
+	size = length + 2*ETH_ALEN + 2;
+	if (size < 60)
+		size = 60;
+
+	dev = p->dev;
+
+	if ((skb = dev_alloc_skb(size)) == NULL) {
+		printk(KERN_INFO "br: memory squeeze!\n");
+		return;
+	}
+
+	skb->dev = dev;
+	skb->protocol = htons(ETH_P_802_2);
+	skb->mac.raw = skb_put(skb, size);
+	memcpy(skb->mac.raw, bridge_ula, ETH_ALEN);
+	memcpy(skb->mac.raw+ETH_ALEN, dev->dev_addr, ETH_ALEN);
+	skb->mac.raw[2*ETH_ALEN] = 0;
+	skb->mac.raw[2*ETH_ALEN+1] = length;
+	skb->nh.raw = skb->mac.raw + 2*ETH_ALEN + 2;
+	memcpy(skb->nh.raw, data, length);
+	memset(skb->nh.raw + length, 0xa5, size - length - 2*ETH_ALEN - 2);
+
+	NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
+		dev_queue_xmit);
+}
+
+static __inline__ void br_set_ticks(unsigned char *dest, int jiff)
+{
+	__u16 ticks;
+
+	ticks = JIFFIES_TO_TICKS(jiff);
+	dest[0] = (ticks >> 8) & 0xFF;
+	dest[1] = ticks & 0xFF;
+}
+
+static __inline__ int br_get_ticks(unsigned char *dest)
+{
+	return TICKS_TO_JIFFIES((dest[0] << 8) | dest[1]);
+}
+
+/* called under bridge lock */
+void br_send_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *bpdu)
+{
+	unsigned char buf[38];
+
+	buf[0] = 0x42;
+	buf[1] = 0x42;
+	buf[2] = 0x03;
+	buf[3] = 0;
+	buf[4] = 0;
+	buf[5] = 0;
+	buf[6] = BPDU_TYPE_CONFIG;
+	buf[7] = (bpdu->topology_change ? 0x01 : 0) |
+		(bpdu->topology_change_ack ? 0x80 : 0);
+	buf[8] = bpdu->root.prio[0];
+	buf[9] = bpdu->root.prio[1];
+	buf[10] = bpdu->root.addr[0];
+	buf[11] = bpdu->root.addr[1];
+	buf[12] = bpdu->root.addr[2];
+	buf[13] = bpdu->root.addr[3];
+	buf[14] = bpdu->root.addr[4];
+	buf[15] = bpdu->root.addr[5];
+	buf[16] = (bpdu->root_path_cost >> 24) & 0xFF;
+	buf[17] = (bpdu->root_path_cost >> 16) & 0xFF;
+	buf[18] = (bpdu->root_path_cost >> 8) & 0xFF;
+	buf[19] = bpdu->root_path_cost & 0xFF;
+	buf[20] = bpdu->bridge_id.prio[0];
+	buf[21] = bpdu->bridge_id.prio[1];
+	buf[22] = bpdu->bridge_id.addr[0];
+	buf[23] = bpdu->bridge_id.addr[1];
+	buf[24] = bpdu->bridge_id.addr[2];
+	buf[25] = bpdu->bridge_id.addr[3];
+	buf[26] = bpdu->bridge_id.addr[4];
+	buf[27] = bpdu->bridge_id.addr[5];
+	buf[28] = (bpdu->port_id >> 8) & 0xFF;
+	buf[29] = bpdu->port_id & 0xFF;
+
+	br_set_ticks(buf+30, bpdu->message_age);
+	br_set_ticks(buf+32, bpdu->max_age);
+	br_set_ticks(buf+34, bpdu->hello_time);
+	br_set_ticks(buf+36, bpdu->forward_delay);
+
+	br_send_bpdu(p, buf, 38);
+}
+
+/* called under bridge lock */
+void br_send_tcn_bpdu(struct net_bridge_port *p)
+{
+	unsigned char buf[7];
+
+	buf[0] = 0x42;
+	buf[1] = 0x42;
+	buf[2] = 0x03;
+	buf[3] = 0;
+	buf[4] = 0;
+	buf[5] = 0;
+	buf[6] = BPDU_TYPE_TCN;
+	br_send_bpdu(p, buf, 7);
+}
+
+static const unsigned char header[6] = {0x42, 0x42, 0x03, 0x00, 0x00, 0x00};
+
+/* NO locks */
+int br_stp_handle_bpdu(struct sk_buff *skb)
+{
+	struct net_bridge_port *p = skb->dev->br_port;
+	struct net_bridge *br = p->br;
+	unsigned char *buf;
+
+	/* need at least the 802 and STP headers */
+	if (!pskb_may_pull(skb, sizeof(header)+1) ||
+	    memcmp(skb->data, header, sizeof(header)))
+		goto err;
+
+	buf = skb_pull(skb, sizeof(header));
+
+	spin_lock_bh(&br->lock);
+	if (p->state == BR_STATE_DISABLED 
+	    || !(br->dev->flags & IFF_UP)
+	    || !br->stp_enabled)
+		goto out;
+
+	if (buf[0] == BPDU_TYPE_CONFIG) {
+		struct br_config_bpdu bpdu;
+
+		if (!pskb_may_pull(skb, 32))
+		    goto out;
+
+		buf = skb->data;
+		bpdu.topology_change = (buf[1] & 0x01) ? 1 : 0;
+		bpdu.topology_change_ack = (buf[1] & 0x80) ? 1 : 0;
+
+		bpdu.root.prio[0] = buf[2];
+		bpdu.root.prio[1] = buf[3];
+		bpdu.root.addr[0] = buf[4];
+		bpdu.root.addr[1] = buf[5];
+		bpdu.root.addr[2] = buf[6];
+		bpdu.root.addr[3] = buf[7];
+		bpdu.root.addr[4] = buf[8];
+		bpdu.root.addr[5] = buf[9];
+		bpdu.root_path_cost =
+			(buf[10] << 24) |
+			(buf[11] << 16) |
+			(buf[12] << 8) |
+			buf[13];
+		bpdu.bridge_id.prio[0] = buf[14];
+		bpdu.bridge_id.prio[1] = buf[15];
+		bpdu.bridge_id.addr[0] = buf[16];
+		bpdu.bridge_id.addr[1] = buf[17];
+		bpdu.bridge_id.addr[2] = buf[18];
+		bpdu.bridge_id.addr[3] = buf[19];
+		bpdu.bridge_id.addr[4] = buf[20];
+		bpdu.bridge_id.addr[5] = buf[21];
+		bpdu.port_id = (buf[22] << 8) | buf[23];
+
+		bpdu.message_age = br_get_ticks(buf+24);
+		bpdu.max_age = br_get_ticks(buf+26);
+		bpdu.hello_time = br_get_ticks(buf+28);
+		bpdu.forward_delay = br_get_ticks(buf+30);
+
+		br_received_config_bpdu(p, &bpdu);
+	}
+
+	else if (buf[0] == BPDU_TYPE_TCN) {
+		br_received_tcn_bpdu(p);
+	}
+ out:
+	spin_unlock_bh(&br->lock);
+ err:
+	kfree_skb(skb);
+	return 0;
+}
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
new file mode 100644
index 00000000000..0da11ff05fa
--- /dev/null
+++ b/net/bridge/br_stp_if.c
@@ -0,0 +1,225 @@
+/*
+ *	Spanning tree protocol; interface code
+ *	Linux ethernet bridge
+ *
+ *	Authors:
+ *	Lennert Buytenhek		<buytenh@gnu.org>
+ *
+ *	$Id: br_stp_if.c,v 1.4 2001/04/14 21:14:39 davem Exp $
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/smp_lock.h>
+
+#include "br_private.h"
+#include "br_private_stp.h"
+
+
+/* Port id is composed of priority and port number.
+ * NB: least significant bits of priority are dropped to
+ *     make room for more ports.
+ */
+static inline port_id br_make_port_id(__u8 priority, __u16 port_no)
+{
+	return ((u16)priority << BR_PORT_BITS) 
+		| (port_no & ((1<<BR_PORT_BITS)-1));
+}
+
+/* called under bridge lock */
+void br_init_port(struct net_bridge_port *p)
+{
+	p->port_id = br_make_port_id(p->priority, p->port_no);
+	br_become_designated_port(p);
+	p->state = BR_STATE_BLOCKING;
+	p->topology_change_ack = 0;
+	p->config_pending = 0;
+
+	br_stp_port_timer_init(p);
+}
+
+/* called under bridge lock */
+void br_stp_enable_bridge(struct net_bridge *br)
+{
+	struct net_bridge_port *p;
+
+	spin_lock_bh(&br->lock);
+	mod_timer(&br->hello_timer, jiffies + br->hello_time);
+	mod_timer(&br->gc_timer, jiffies + HZ/10);
+	
+	br_config_bpdu_generation(br);
+
+	list_for_each_entry(p, &br->port_list, list) {
+		if ((p->dev->flags & IFF_UP) && netif_carrier_ok(p->dev))
+			br_stp_enable_port(p);
+
+	}
+	spin_unlock_bh(&br->lock);
+}
+
+/* NO locks held */
+void br_stp_disable_bridge(struct net_bridge *br)
+{
+	struct net_bridge_port *p;
+
+	spin_lock(&br->lock);
+	list_for_each_entry(p, &br->port_list, list) {
+		if (p->state != BR_STATE_DISABLED)
+			br_stp_disable_port(p);
+
+	}
+
+	br->topology_change = 0;
+	br->topology_change_detected = 0;
+	spin_unlock(&br->lock);
+
+	del_timer_sync(&br->hello_timer);
+	del_timer_sync(&br->topology_change_timer);
+	del_timer_sync(&br->tcn_timer);
+	del_timer_sync(&br->gc_timer);
+}
+
+/* called under bridge lock */
+void br_stp_enable_port(struct net_bridge_port *p)
+{
+	br_init_port(p);
+	br_port_state_selection(p->br);
+}
+
+/* called under bridge lock */
+void br_stp_disable_port(struct net_bridge_port *p)
+{
+	struct net_bridge *br;
+	int wasroot;
+
+	br = p->br;
+	printk(KERN_INFO "%s: port %i(%s) entering %s state\n",
+	       br->dev->name, p->port_no, p->dev->name, "disabled");
+
+	wasroot = br_is_root_bridge(br);
+	br_become_designated_port(p);
+	p->state = BR_STATE_DISABLED;
+	p->topology_change_ack = 0;
+	p->config_pending = 0;
+
+	del_timer(&p->message_age_timer);
+	del_timer(&p->forward_delay_timer);
+	del_timer(&p->hold_timer);
+
+	br_configuration_update(br);
+
+	br_port_state_selection(br);
+
+	if (br_is_root_bridge(br) && !wasroot)
+		br_become_root_bridge(br);
+}
+
+/* called under bridge lock */
+static void br_stp_change_bridge_id(struct net_bridge *br, 
+				    const unsigned char *addr)
+{
+	unsigned char oldaddr[6];
+	struct net_bridge_port *p;
+	int wasroot;
+
+	wasroot = br_is_root_bridge(br);
+
+	memcpy(oldaddr, br->bridge_id.addr, ETH_ALEN);
+	memcpy(br->bridge_id.addr, addr, ETH_ALEN);
+	memcpy(br->dev->dev_addr, addr, ETH_ALEN);
+
+	list_for_each_entry(p, &br->port_list, list) {
+		if (!memcmp(p->designated_bridge.addr, oldaddr, ETH_ALEN))
+			memcpy(p->designated_bridge.addr, addr, ETH_ALEN);
+
+		if (!memcmp(p->designated_root.addr, oldaddr, ETH_ALEN))
+			memcpy(p->designated_root.addr, addr, ETH_ALEN);
+
+	}
+
+	br_configuration_update(br);
+	br_port_state_selection(br);
+	if (br_is_root_bridge(br) && !wasroot)
+		br_become_root_bridge(br);
+}
+
+static const unsigned char br_mac_zero[6];
+
+/* called under bridge lock */
+void br_stp_recalculate_bridge_id(struct net_bridge *br)
+{
+	const unsigned char *addr = br_mac_zero;
+	struct net_bridge_port *p;
+
+	list_for_each_entry(p, &br->port_list, list) {
+		if (addr == br_mac_zero ||
+		    memcmp(p->dev->dev_addr, addr, ETH_ALEN) < 0)
+			addr = p->dev->dev_addr;
+
+	}
+
+	if (memcmp(br->bridge_id.addr, addr, ETH_ALEN))
+		br_stp_change_bridge_id(br, addr);
+}
+
+/* called under bridge lock */
+void br_stp_set_bridge_priority(struct net_bridge *br, u16 newprio)
+{
+	struct net_bridge_port *p;
+	int wasroot;
+
+	wasroot = br_is_root_bridge(br);
+
+	list_for_each_entry(p, &br->port_list, list) {
+		if (p->state != BR_STATE_DISABLED &&
+		    br_is_designated_port(p)) {
+			p->designated_bridge.prio[0] = (newprio >> 8) & 0xFF;
+			p->designated_bridge.prio[1] = newprio & 0xFF;
+		}
+
+	}
+
+	br->bridge_id.prio[0] = (newprio >> 8) & 0xFF;
+	br->bridge_id.prio[1] = newprio & 0xFF;
+	br_configuration_update(br);
+	br_port_state_selection(br);
+	if (br_is_root_bridge(br) && !wasroot)
+		br_become_root_bridge(br);
+}
+
+/* called under bridge lock */
+void br_stp_set_port_priority(struct net_bridge_port *p, u8 newprio)
+{
+	port_id new_port_id = br_make_port_id(newprio, p->port_no);
+
+	if (br_is_designated_port(p))
+		p->designated_port = new_port_id;
+
+	p->port_id = new_port_id;
+	p->priority = newprio;
+	if (!memcmp(&p->br->bridge_id, &p->designated_bridge, 8) &&
+	    p->port_id < p->designated_port) {
+		br_become_designated_port(p);
+		br_port_state_selection(p->br);
+	}
+}
+
+/* called under bridge lock */
+void br_stp_set_path_cost(struct net_bridge_port *p, u32 path_cost)
+{
+	p->path_cost = path_cost;
+	br_configuration_update(p->br);
+	br_port_state_selection(p->br);
+}
+
+ssize_t br_show_bridge_id(char *buf, const struct bridge_id *id)
+{
+	return sprintf(buf, "%.2x%.2x.%.2x%.2x%.2x%.2x%.2x%.2x\n",
+	       id->prio[0], id->prio[1],
+	       id->addr[0], id->addr[1], id->addr[2],
+	       id->addr[3], id->addr[4], id->addr[5]);
+}
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
new file mode 100644
index 00000000000..9bef55f5642
--- /dev/null
+++ b/net/bridge/br_stp_timer.c
@@ -0,0 +1,188 @@
+/*
+ *	Spanning tree protocol; timer-related code
+ *	Linux ethernet bridge
+ *
+ *	Authors:
+ *	Lennert Buytenhek		<buytenh@gnu.org>
+ *
+ *	$Id: br_stp_timer.c,v 1.3 2000/05/05 02:17:17 davem Exp $
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/times.h>
+#include <linux/smp_lock.h>
+
+#include "br_private.h"
+#include "br_private_stp.h"
+
+/* called under bridge lock */
+static int br_is_designated_for_some_port(const struct net_bridge *br)
+{
+	struct net_bridge_port *p;
+
+	list_for_each_entry(p, &br->port_list, list) {
+		if (p->state != BR_STATE_DISABLED &&
+		    !memcmp(&p->designated_bridge, &br->bridge_id, 8)) 
+			return 1;
+	}
+
+	return 0;
+}
+
+static void br_hello_timer_expired(unsigned long arg)
+{
+	struct net_bridge *br = (struct net_bridge *)arg;
+	
+	pr_debug("%s: hello timer expired\n", br->dev->name);
+	spin_lock_bh(&br->lock);
+	if (br->dev->flags & IFF_UP) {
+		br_config_bpdu_generation(br);
+
+		mod_timer(&br->hello_timer, jiffies + br->hello_time);
+	}
+	spin_unlock_bh(&br->lock);
+}
+
+static void br_message_age_timer_expired(unsigned long arg)
+{
+	struct net_bridge_port *p = (struct net_bridge_port *) arg;
+	struct net_bridge *br = p->br;
+	const bridge_id *id = &p->designated_bridge;
+	int was_root;
+
+	if (p->state == BR_STATE_DISABLED)
+		return;
+
+	
+	pr_info("%s: neighbor %.2x%.2x.%.2x:%.2x:%.2x:%.2x:%.2x:%.2x lost on port %d(%s)\n",
+		br->dev->name, 
+		id->prio[0], id->prio[1], 
+		id->addr[0], id->addr[1], id->addr[2], 
+		id->addr[3], id->addr[4], id->addr[5],
+		p->port_no, p->dev->name);
+
+	/*
+	 * According to the spec, the message age timer cannot be
+	 * running when we are the root bridge. So..  this was_root
+	 * check is redundant. I'm leaving it in for now, though.
+	 */
+	spin_lock_bh(&br->lock);
+	if (p->state == BR_STATE_DISABLED)
+		goto unlock;
+	was_root = br_is_root_bridge(br);
+
+	br_become_designated_port(p);
+	br_configuration_update(br);
+	br_port_state_selection(br);
+	if (br_is_root_bridge(br) && !was_root)
+		br_become_root_bridge(br);
+ unlock:
+	spin_unlock_bh(&br->lock);
+}
+
+static void br_forward_delay_timer_expired(unsigned long arg)
+{
+	struct net_bridge_port *p = (struct net_bridge_port *) arg;
+	struct net_bridge *br = p->br;
+
+	pr_debug("%s: %d(%s) forward delay timer\n",
+		 br->dev->name, p->port_no, p->dev->name);
+	spin_lock_bh(&br->lock);
+	if (p->state == BR_STATE_LISTENING) {
+		p->state = BR_STATE_LEARNING;
+		mod_timer(&p->forward_delay_timer,
+			  jiffies + br->forward_delay);
+	} else if (p->state == BR_STATE_LEARNING) {
+		p->state = BR_STATE_FORWARDING;
+		if (br_is_designated_for_some_port(br))
+			br_topology_change_detection(br);
+	}
+	br_log_state(p);
+	spin_unlock_bh(&br->lock);
+}
+
+static void br_tcn_timer_expired(unsigned long arg)
+{
+	struct net_bridge *br = (struct net_bridge *) arg;
+
+	pr_debug("%s: tcn timer expired\n", br->dev->name);
+	spin_lock_bh(&br->lock);
+	if (br->dev->flags & IFF_UP) {
+		br_transmit_tcn(br);
+	
+		mod_timer(&br->tcn_timer,jiffies + br->bridge_hello_time);
+	}
+	spin_unlock_bh(&br->lock);
+}
+
+static void br_topology_change_timer_expired(unsigned long arg)
+{
+	struct net_bridge *br = (struct net_bridge *) arg;
+
+	pr_debug("%s: topo change timer expired\n", br->dev->name);
+	spin_lock_bh(&br->lock);
+	br->topology_change_detected = 0;
+	br->topology_change = 0;
+	spin_unlock_bh(&br->lock);
+}
+
+static void br_hold_timer_expired(unsigned long arg)
+{
+	struct net_bridge_port *p = (struct net_bridge_port *) arg;
+
+	pr_debug("%s: %d(%s) hold timer expired\n", 
+		 p->br->dev->name,  p->port_no, p->dev->name);
+
+	spin_lock_bh(&p->br->lock);
+	if (p->config_pending)
+		br_transmit_config(p);
+	spin_unlock_bh(&p->br->lock);
+}
+
+static inline void br_timer_init(struct timer_list *timer,
+			  void (*_function)(unsigned long),
+			  unsigned long _data)
+{
+	init_timer(timer);
+	timer->function = _function;
+	timer->data = _data;
+}
+
+void br_stp_timer_init(struct net_bridge *br)
+{
+	br_timer_init(&br->hello_timer, br_hello_timer_expired,
+		      (unsigned long) br);
+
+	br_timer_init(&br->tcn_timer, br_tcn_timer_expired, 
+		      (unsigned long) br);
+
+	br_timer_init(&br->topology_change_timer,
+		      br_topology_change_timer_expired,
+		      (unsigned long) br);
+
+	br_timer_init(&br->gc_timer, br_fdb_cleanup, (unsigned long) br);
+}
+
+void br_stp_port_timer_init(struct net_bridge_port *p)
+{
+	br_timer_init(&p->message_age_timer, br_message_age_timer_expired,
+		      (unsigned long) p);
+
+	br_timer_init(&p->forward_delay_timer, br_forward_delay_timer_expired,
+		      (unsigned long) p);
+		      
+	br_timer_init(&p->hold_timer, br_hold_timer_expired,
+		      (unsigned long) p);
+}	
+
+/* Report ticks left (in USER_HZ) used for API */
+unsigned long br_timer_value(const struct timer_list *timer)
+{
+	return timer_pending(timer)
+		? jiffies_to_clock_t(timer->expires - jiffies) : 0;
+}
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
new file mode 100644
index 00000000000..98cf53c81fa
--- /dev/null
+++ b/net/bridge/br_sysfs_br.c
@@ -0,0 +1,364 @@
+/*
+ *	Sysfs attributes of bridge ports
+ *	Linux ethernet bridge
+ *
+ *	Authors:
+ *	Stephen Hemminger		<shemminger@osdl.org>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/if_bridge.h>
+#include <linux/rtnetlink.h>
+#include <linux/spinlock.h>
+#include <linux/times.h>
+
+#include "br_private.h"
+
+#define to_class_dev(obj) container_of(obj,struct class_device,kobj)
+#define to_net_dev(class) container_of(class, struct net_device, class_dev)
+#define to_bridge(cd)	((struct net_bridge *)(to_net_dev(cd)->priv))
+
+/*
+ * Common code for storing bridge parameters.
+ */
+static ssize_t store_bridge_parm(struct class_device *cd,
+				 const char *buf, size_t len,
+				 void (*set)(struct net_bridge *, unsigned long))
+{
+	struct net_bridge *br = to_bridge(cd);
+	char *endp;
+	unsigned long val;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	val = simple_strtoul(buf, &endp, 0);
+	if (endp == buf)
+		return -EINVAL;
+
+	spin_lock_bh(&br->lock);
+	(*set)(br, val);
+	spin_unlock_bh(&br->lock);
+	return len;
+}
+
+
+static ssize_t show_forward_delay(struct class_device *cd, char *buf)
+{
+	struct net_bridge *br = to_bridge(cd);
+	return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->forward_delay));
+}
+
+static void set_forward_delay(struct net_bridge *br, unsigned long val)
+{
+	unsigned long delay = clock_t_to_jiffies(val);
+	br->forward_delay = delay;
+	if (br_is_root_bridge(br))
+		br->bridge_forward_delay = delay;
+}
+
+static ssize_t store_forward_delay(struct class_device *cd, const char *buf,
+				   size_t len)
+{
+	return store_bridge_parm(cd, buf, len, set_forward_delay);
+}
+static CLASS_DEVICE_ATTR(forward_delay, S_IRUGO | S_IWUSR,
+			 show_forward_delay, store_forward_delay);
+
+static ssize_t show_hello_time(struct class_device *cd, char *buf)
+{
+	return sprintf(buf, "%lu\n",
+		       jiffies_to_clock_t(to_bridge(cd)->hello_time));
+}
+
+static void set_hello_time(struct net_bridge *br, unsigned long val)
+{
+	unsigned long t = clock_t_to_jiffies(val);
+	br->hello_time = t;
+	if (br_is_root_bridge(br))
+		br->bridge_hello_time = t;
+}
+
+static ssize_t store_hello_time(struct class_device *cd, const char *buf,
+				size_t len)
+{
+	return store_bridge_parm(cd, buf, len, set_hello_time);
+}
+
+static CLASS_DEVICE_ATTR(hello_time, S_IRUGO | S_IWUSR, show_hello_time,
+			 store_hello_time);
+
+static ssize_t show_max_age(struct class_device *cd, char *buf)
+{
+	return sprintf(buf, "%lu\n",
+		       jiffies_to_clock_t(to_bridge(cd)->max_age));
+}
+
+static void set_max_age(struct net_bridge *br, unsigned long val)
+{
+	unsigned long t = clock_t_to_jiffies(val);
+	br->max_age = t;
+	if (br_is_root_bridge(br))
+		br->bridge_max_age = t;
+}
+
+static ssize_t store_max_age(struct class_device *cd, const char *buf,
+				size_t len)
+{
+	return store_bridge_parm(cd, buf, len, set_max_age);
+}
+
+static CLASS_DEVICE_ATTR(max_age, S_IRUGO | S_IWUSR, show_max_age,
+			 store_max_age);
+
+static ssize_t show_ageing_time(struct class_device *cd, char *buf)
+{
+	struct net_bridge *br = to_bridge(cd);
+	return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->ageing_time));
+}
+
+static void set_ageing_time(struct net_bridge *br, unsigned long val)
+{
+	br->ageing_time = clock_t_to_jiffies(val);
+}
+
+static ssize_t store_ageing_time(struct class_device *cd, const char *buf,
+				 size_t len)
+{
+	return store_bridge_parm(cd, buf, len, set_ageing_time);
+}
+
+static CLASS_DEVICE_ATTR(ageing_time, S_IRUGO | S_IWUSR, show_ageing_time,
+			 store_ageing_time);
+static ssize_t show_stp_state(struct class_device *cd, char *buf)
+{
+	struct net_bridge *br = to_bridge(cd);
+	return sprintf(buf, "%d\n", br->stp_enabled);
+}
+
+static void set_stp_state(struct net_bridge *br, unsigned long val)
+{
+	br->stp_enabled = val;
+}
+
+static ssize_t store_stp_state(struct class_device *cd,
+			       const char *buf, size_t len)
+{
+	return store_bridge_parm(cd, buf, len, set_stp_state);
+}
+
+static CLASS_DEVICE_ATTR(stp_state, S_IRUGO | S_IWUSR, show_stp_state,
+			 store_stp_state);
+
+static ssize_t show_priority(struct class_device *cd, char *buf)
+{
+	struct net_bridge *br = to_bridge(cd);
+	return sprintf(buf, "%d\n",
+		       (br->bridge_id.prio[0] << 8) | br->bridge_id.prio[1]);
+}
+
+static void set_priority(struct net_bridge *br, unsigned long val)
+{
+	br_stp_set_bridge_priority(br, (u16) val);
+}
+
+static ssize_t store_priority(struct class_device *cd,
+			       const char *buf, size_t len)
+{
+	return store_bridge_parm(cd, buf, len, set_priority);
+}
+static CLASS_DEVICE_ATTR(priority, S_IRUGO | S_IWUSR, show_priority,
+			 store_priority);
+
+static ssize_t show_root_id(struct class_device *cd, char *buf)
+{
+	return br_show_bridge_id(buf, &to_bridge(cd)->designated_root);
+}
+static CLASS_DEVICE_ATTR(root_id, S_IRUGO, show_root_id, NULL);
+
+static ssize_t show_bridge_id(struct class_device *cd, char *buf)
+{
+	return br_show_bridge_id(buf, &to_bridge(cd)->bridge_id);
+}
+static CLASS_DEVICE_ATTR(bridge_id, S_IRUGO, show_bridge_id, NULL);
+
+static ssize_t show_root_port(struct class_device *cd, char *buf)
+{
+	return sprintf(buf, "%d\n", to_bridge(cd)->root_port);
+}
+static CLASS_DEVICE_ATTR(root_port, S_IRUGO, show_root_port, NULL);
+
+static ssize_t show_root_path_cost(struct class_device *cd, char *buf)
+{
+	return sprintf(buf, "%d\n", to_bridge(cd)->root_path_cost);
+}
+static CLASS_DEVICE_ATTR(root_path_cost, S_IRUGO, show_root_path_cost, NULL);
+
+static ssize_t show_topology_change(struct class_device *cd, char *buf)
+{
+	return sprintf(buf, "%d\n", to_bridge(cd)->topology_change);
+}
+static CLASS_DEVICE_ATTR(topology_change, S_IRUGO, show_topology_change, NULL);
+
+static ssize_t show_topology_change_detected(struct class_device *cd, char *buf)
+{
+	struct net_bridge *br = to_bridge(cd);
+	return sprintf(buf, "%d\n", br->topology_change_detected);
+}
+static CLASS_DEVICE_ATTR(topology_change_detected, S_IRUGO, show_topology_change_detected, NULL);
+
+static ssize_t show_hello_timer(struct class_device *cd, char *buf)
+{
+	struct net_bridge *br = to_bridge(cd);
+	return sprintf(buf, "%ld\n", br_timer_value(&br->hello_timer));
+}
+static CLASS_DEVICE_ATTR(hello_timer, S_IRUGO, show_hello_timer, NULL);
+
+static ssize_t show_tcn_timer(struct class_device *cd, char *buf)
+{
+	struct net_bridge *br = to_bridge(cd);
+	return sprintf(buf, "%ld\n", br_timer_value(&br->tcn_timer));
+}
+static CLASS_DEVICE_ATTR(tcn_timer, S_IRUGO, show_tcn_timer, NULL);
+
+static ssize_t show_topology_change_timer(struct class_device *cd, char *buf)
+{
+	struct net_bridge *br = to_bridge(cd);
+	return sprintf(buf, "%ld\n", br_timer_value(&br->topology_change_timer));
+}
+static CLASS_DEVICE_ATTR(topology_change_timer, S_IRUGO, show_topology_change_timer, NULL);
+
+static ssize_t show_gc_timer(struct class_device *cd, char *buf)
+{
+	struct net_bridge *br = to_bridge(cd);
+	return sprintf(buf, "%ld\n", br_timer_value(&br->gc_timer));
+}
+static CLASS_DEVICE_ATTR(gc_timer, S_IRUGO, show_gc_timer, NULL);
+
+static struct attribute *bridge_attrs[] = {
+	&class_device_attr_forward_delay.attr,
+	&class_device_attr_hello_time.attr,
+	&class_device_attr_max_age.attr,
+	&class_device_attr_ageing_time.attr,
+	&class_device_attr_stp_state.attr,
+	&class_device_attr_priority.attr,
+	&class_device_attr_bridge_id.attr,
+	&class_device_attr_root_id.attr,
+	&class_device_attr_root_path_cost.attr,
+	&class_device_attr_root_port.attr,
+	&class_device_attr_topology_change.attr,
+	&class_device_attr_topology_change_detected.attr,
+	&class_device_attr_hello_timer.attr,
+	&class_device_attr_tcn_timer.attr,
+	&class_device_attr_topology_change_timer.attr,
+	&class_device_attr_gc_timer.attr,
+	NULL
+};
+
+static struct attribute_group bridge_group = {
+	.name = SYSFS_BRIDGE_ATTR,
+	.attrs = bridge_attrs,
+};
+
+/*
+ * Export the forwarding information table as a binary file
+ * The records are struct __fdb_entry.
+ *
+ * Returns the number of bytes read.
+ */
+static ssize_t brforward_read(struct kobject *kobj, char *buf,
+			   loff_t off, size_t count)
+{
+	struct class_device *cdev = to_class_dev(kobj);
+	struct net_bridge *br = to_bridge(cdev);
+	int n;
+
+	/* must read whole records */
+	if (off % sizeof(struct __fdb_entry) != 0)
+		return -EINVAL;
+
+	n =  br_fdb_fillbuf(br, buf, 
+			    count / sizeof(struct __fdb_entry),
+			    off / sizeof(struct __fdb_entry));
+
+	if (n > 0)
+		n *= sizeof(struct __fdb_entry);
+	
+	return n;
+}
+
+static struct bin_attribute bridge_forward = {
+	.attr = { .name = SYSFS_BRIDGE_FDB,
+		  .mode = S_IRUGO, 
+		  .owner = THIS_MODULE, },
+	.read = brforward_read,
+};
+
+/*
+ * Add entries in sysfs onto the existing network class device
+ * for the bridge.
+ *   Adds a attribute group "bridge" containing tuning parameters.
+ *   Binary attribute containing the forward table
+ *   Sub directory to hold links to interfaces.
+ *
+ * Note: the ifobj exists only to be a subdirectory
+ *   to hold links.  The ifobj exists in same data structure
+ *   as it's parent the bridge so reference counting works.
+ */
+int br_sysfs_addbr(struct net_device *dev)
+{
+	struct kobject *brobj = &dev->class_dev.kobj;
+	struct net_bridge *br = netdev_priv(dev);
+	int err;
+
+	err = sysfs_create_group(brobj, &bridge_group);
+	if (err) {
+		pr_info("%s: can't create group %s/%s\n",
+			__FUNCTION__, dev->name, bridge_group.name);
+		goto out1;
+	}
+
+	err = sysfs_create_bin_file(brobj, &bridge_forward);
+	if (err) {
+		pr_info("%s: can't create attribue file %s/%s\n",
+			__FUNCTION__, dev->name, bridge_forward.attr.name);
+		goto out2;
+	}
+
+	
+	kobject_set_name(&br->ifobj, SYSFS_BRIDGE_PORT_SUBDIR);
+	br->ifobj.ktype = NULL;
+	br->ifobj.kset = NULL;
+	br->ifobj.parent = brobj;
+
+	err = kobject_register(&br->ifobj);
+	if (err) {
+		pr_info("%s: can't add kobject (directory) %s/%s\n",
+			__FUNCTION__, dev->name, br->ifobj.name);
+		goto out3;
+	}
+	return 0;
+ out3:
+	sysfs_remove_bin_file(&dev->class_dev.kobj, &bridge_forward);
+ out2:
+	sysfs_remove_group(&dev->class_dev.kobj, &bridge_group);
+ out1:
+	return err;
+
+}
+
+void br_sysfs_delbr(struct net_device *dev)
+{
+	struct kobject *kobj = &dev->class_dev.kobj;
+	struct net_bridge *br = netdev_priv(dev);
+
+	kobject_unregister(&br->ifobj);
+	sysfs_remove_bin_file(kobj, &bridge_forward);
+	sysfs_remove_group(kobj, &bridge_group);
+}
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
new file mode 100644
index 00000000000..567249bf933
--- /dev/null
+++ b/net/bridge/br_sysfs_if.c
@@ -0,0 +1,269 @@
+/*
+ *	Sysfs attributes of bridge ports
+ *	Linux ethernet bridge
+ *
+ *	Authors:
+ *	Stephen Hemminger		<shemminger@osdl.org>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/if_bridge.h>
+#include <linux/rtnetlink.h>
+#include <linux/spinlock.h>
+
+#include "br_private.h"
+
+struct brport_attribute {
+	struct attribute	attr;
+	ssize_t (*show)(struct net_bridge_port *, char *);
+	ssize_t (*store)(struct net_bridge_port *, unsigned long);
+};
+
+#define BRPORT_ATTR(_name,_mode,_show,_store)		        \
+struct brport_attribute brport_attr_##_name = { 	        \
+	.attr = {.name = __stringify(_name), 			\
+		 .mode = _mode, 				\
+		 .owner = THIS_MODULE, },			\
+	.show	= _show,					\
+	.store	= _store,					\
+};
+
+static ssize_t show_path_cost(struct net_bridge_port *p, char *buf)
+{
+	return sprintf(buf, "%d\n", p->path_cost);
+}
+static ssize_t store_path_cost(struct net_bridge_port *p, unsigned long v)
+{
+	br_stp_set_path_cost(p, v);
+	return 0;
+}
+static BRPORT_ATTR(path_cost, S_IRUGO | S_IWUSR,
+		   show_path_cost, store_path_cost);
+
+static ssize_t show_priority(struct net_bridge_port *p, char *buf)
+{
+	return sprintf(buf, "%d\n", p->priority);
+}
+static ssize_t store_priority(struct net_bridge_port *p, unsigned long v)
+{
+	if (v >= (1<<(16-BR_PORT_BITS)))
+		return -ERANGE;
+	br_stp_set_port_priority(p, v);
+	return 0;
+}
+static BRPORT_ATTR(priority, S_IRUGO | S_IWUSR,
+			 show_priority, store_priority);
+
+static ssize_t show_designated_root(struct net_bridge_port *p, char *buf)
+{
+	return br_show_bridge_id(buf, &p->designated_root);
+}
+static BRPORT_ATTR(designated_root, S_IRUGO, show_designated_root, NULL);
+
+static ssize_t show_designated_bridge(struct net_bridge_port *p, char *buf)
+{
+	return br_show_bridge_id(buf, &p->designated_bridge);
+}
+static BRPORT_ATTR(designated_bridge, S_IRUGO, show_designated_bridge, NULL);
+
+static ssize_t show_designated_port(struct net_bridge_port *p, char *buf)
+{
+	return sprintf(buf, "%d\n", p->designated_port);
+}
+static BRPORT_ATTR(designated_port, S_IRUGO, show_designated_port, NULL);
+
+static ssize_t show_designated_cost(struct net_bridge_port *p, char *buf)
+{
+	return sprintf(buf, "%d\n", p->designated_cost);
+}
+static BRPORT_ATTR(designated_cost, S_IRUGO, show_designated_cost, NULL);
+
+static ssize_t show_port_id(struct net_bridge_port *p, char *buf)
+{
+	return sprintf(buf, "0x%x\n", p->port_id);
+}
+static BRPORT_ATTR(port_id, S_IRUGO, show_port_id, NULL);
+
+static ssize_t show_port_no(struct net_bridge_port *p, char *buf)
+{
+	return sprintf(buf, "0x%x\n", p->port_no);
+}
+
+static BRPORT_ATTR(port_no, S_IRUGO, show_port_no, NULL);
+
+static ssize_t show_change_ack(struct net_bridge_port *p, char *buf)
+{
+	return sprintf(buf, "%d\n", p->topology_change_ack);
+}
+static BRPORT_ATTR(change_ack, S_IRUGO, show_change_ack, NULL);
+
+static ssize_t show_config_pending(struct net_bridge_port *p, char *buf)
+{
+	return sprintf(buf, "%d\n", p->config_pending);
+}
+static BRPORT_ATTR(config_pending, S_IRUGO, show_config_pending, NULL);
+
+static ssize_t show_port_state(struct net_bridge_port *p, char *buf)
+{
+	return sprintf(buf, "%d\n", p->state);
+}
+static BRPORT_ATTR(state, S_IRUGO, show_port_state, NULL);
+
+static ssize_t show_message_age_timer(struct net_bridge_port *p,
+					    char *buf)
+{
+	return sprintf(buf, "%ld\n", br_timer_value(&p->message_age_timer));
+}
+static BRPORT_ATTR(message_age_timer, S_IRUGO, show_message_age_timer, NULL);
+
+static ssize_t show_forward_delay_timer(struct net_bridge_port *p,
+					    char *buf)
+{
+	return sprintf(buf, "%ld\n", br_timer_value(&p->forward_delay_timer));
+}
+static BRPORT_ATTR(forward_delay_timer, S_IRUGO, show_forward_delay_timer, NULL);
+
+static ssize_t show_hold_timer(struct net_bridge_port *p,
+					    char *buf)
+{
+	return sprintf(buf, "%ld\n", br_timer_value(&p->hold_timer));
+}
+static BRPORT_ATTR(hold_timer, S_IRUGO, show_hold_timer, NULL);
+
+static struct brport_attribute *brport_attrs[] = {
+	&brport_attr_path_cost,
+	&brport_attr_priority,
+	&brport_attr_port_id,
+	&brport_attr_port_no,
+	&brport_attr_designated_root,
+	&brport_attr_designated_bridge,
+	&brport_attr_designated_port,
+	&brport_attr_designated_cost,
+	&brport_attr_state,
+	&brport_attr_change_ack,
+	&brport_attr_config_pending,
+	&brport_attr_message_age_timer,
+	&brport_attr_forward_delay_timer,
+	&brport_attr_hold_timer,
+	NULL
+};
+
+#define to_brport_attr(_at) container_of(_at, struct brport_attribute, attr)
+#define to_brport(obj)	container_of(obj, struct net_bridge_port, kobj)
+
+static ssize_t brport_show(struct kobject * kobj,
+			   struct attribute * attr, char * buf)
+{
+	struct brport_attribute * brport_attr = to_brport_attr(attr);
+	struct net_bridge_port * p = to_brport(kobj);
+
+	return brport_attr->show(p, buf);
+}
+
+static ssize_t brport_store(struct kobject * kobj,
+			    struct attribute * attr,
+			    const char * buf, size_t count)
+{
+	struct brport_attribute * brport_attr = to_brport_attr(attr);
+	struct net_bridge_port * p = to_brport(kobj);
+	ssize_t ret = -EINVAL;
+	char *endp;
+	unsigned long val;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	val = simple_strtoul(buf, &endp, 0);
+	if (endp != buf) {
+		rtnl_lock();
+		if (p->dev && p->br && brport_attr->store) {
+			spin_lock_bh(&p->br->lock);
+			ret = brport_attr->store(p, val);
+			spin_unlock_bh(&p->br->lock);
+			if (ret == 0)
+				ret = count;
+		}
+		rtnl_unlock();
+	}
+	return ret;
+}
+
+/* called from kobject_put when port ref count goes to zero. */
+static void brport_release(struct kobject *kobj)
+{
+	kfree(container_of(kobj, struct net_bridge_port, kobj));
+}
+
+static struct sysfs_ops brport_sysfs_ops = {
+	.show = brport_show,
+	.store = brport_store,
+};
+
+static struct kobj_type brport_ktype = {
+	.sysfs_ops = &brport_sysfs_ops,
+	.release = brport_release,
+};
+
+
+/*
+ * Add sysfs entries to ethernet device added to a bridge.
+ * Creates a brport subdirectory with bridge attributes.
+ * Puts symlink in bridge's brport subdirectory
+ */
+int br_sysfs_addif(struct net_bridge_port *p)
+{
+	struct net_bridge *br = p->br;
+	struct brport_attribute **a;
+	int err;
+
+	ASSERT_RTNL();
+
+	kobject_set_name(&p->kobj, SYSFS_BRIDGE_PORT_ATTR);
+	p->kobj.ktype = &brport_ktype;
+	p->kobj.parent = &(p->dev->class_dev.kobj);
+	p->kobj.kset = NULL;
+
+	err = kobject_add(&p->kobj);
+	if(err)
+		goto out1;
+
+	err = sysfs_create_link(&p->kobj, &br->dev->class_dev.kobj, 
+				SYSFS_BRIDGE_PORT_LINK);
+	if (err)
+		goto out2;
+
+	for (a = brport_attrs; *a; ++a) {
+		err = sysfs_create_file(&p->kobj, &((*a)->attr));
+		if (err)
+			goto out2;
+	}
+
+	err = sysfs_create_link(&br->ifobj, &p->kobj, p->dev->name);
+	if (err)
+		goto out2;
+
+	return 0;
+ out2:
+	kobject_del(&p->kobj);
+ out1:
+	return err;
+}
+
+void br_sysfs_removeif(struct net_bridge_port *p)
+{
+	pr_debug("br_sysfs_removeif\n");
+	sysfs_remove_link(&p->br->ifobj, p->dev->name);
+	kobject_del(&p->kobj);
+}
+
+void br_sysfs_freeif(struct net_bridge_port *p)
+{
+	pr_debug("br_sysfs_freeif\n");
+	kobject_put(&p->kobj);
+}
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
new file mode 100644
index 00000000000..68ccef507b4
--- /dev/null
+++ b/net/bridge/netfilter/Kconfig
@@ -0,0 +1,211 @@
+#
+# Bridge netfilter configuration
+#
+
+menu "Bridge: Netfilter Configuration"
+	depends on BRIDGE && NETFILTER
+
+config BRIDGE_NF_EBTABLES
+	tristate "Ethernet Bridge tables (ebtables) support"
+	help
+	  ebtables is a general, extensible frame/packet identification
+	  framework. Say 'Y' or 'M' here if you want to do Ethernet
+	  filtering/NAT/brouting on the Ethernet bridge.
+#
+# tables
+#
+config BRIDGE_EBT_BROUTE
+	tristate "ebt: broute table support"
+	depends on BRIDGE_NF_EBTABLES
+	help
+	  The ebtables broute table is used to define rules that decide between
+	  bridging and routing frames, giving Linux the functionality of a
+	  brouter. See the man page for ebtables(8) and examples on the ebtables
+	  website.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config BRIDGE_EBT_T_FILTER
+	tristate "ebt: filter table support"
+	depends on BRIDGE_NF_EBTABLES
+	help
+	  The ebtables filter table is used to define frame filtering rules at
+	  local input, forwarding and local output. See the man page for
+	  ebtables(8).
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config BRIDGE_EBT_T_NAT
+	tristate "ebt: nat table support"
+	depends on BRIDGE_NF_EBTABLES
+	help
+	  The ebtables nat table is used to define rules that alter the MAC
+	  source address (MAC SNAT) or the MAC destination address (MAC DNAT).
+	  See the man page for ebtables(8).
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+#
+# matches
+#
+config BRIDGE_EBT_802_3
+	tristate "ebt: 802.3 filter support"
+	depends on BRIDGE_NF_EBTABLES
+	help
+	  This option adds matching support for 802.3 Ethernet frames.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config BRIDGE_EBT_AMONG
+	tristate "ebt: among filter support"
+	depends on BRIDGE_NF_EBTABLES
+	help
+	  This option adds the among match, which allows matching the MAC source
+	  and/or destination address on a list of addresses. Optionally,
+	  MAC/IP address pairs can be matched, f.e. for anti-spoofing rules.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config BRIDGE_EBT_ARP
+	tristate "ebt: ARP filter support"
+	depends on BRIDGE_NF_EBTABLES
+	help
+	  This option adds the ARP match, which allows ARP and RARP header field
+	  filtering.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config BRIDGE_EBT_IP
+	tristate "ebt: IP filter support"
+	depends on BRIDGE_NF_EBTABLES
+	help
+	  This option adds the IP match, which allows basic IP header field
+	  filtering.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config BRIDGE_EBT_LIMIT
+	tristate "ebt: limit match support"
+	depends on BRIDGE_NF_EBTABLES
+	help
+	  This option adds the limit match, which allows you to control
+	  the rate at which a rule can be matched. This match is the
+	  equivalent of the iptables limit match.
+
+	  If you want to compile it as a module, say M here and read
+	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
+
+config BRIDGE_EBT_MARK
+	tristate "ebt: mark filter support"
+	depends on BRIDGE_NF_EBTABLES
+	help
+	  This option adds the mark match, which allows matching frames based on
+	  the 'nfmark' value in the frame. This can be set by the mark target.
+	  This value is the same as the one used in the iptables mark match and
+	  target.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config BRIDGE_EBT_PKTTYPE
+	tristate "ebt: packet type filter support"
+	depends on BRIDGE_NF_EBTABLES
+	help
+	  This option adds the packet type match, which allows matching on the
+	  type of packet based on its Ethernet "class" (as determined by
+	  the generic networking code): broadcast, multicast,
+	  for this host alone or for another host.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config BRIDGE_EBT_STP
+	tristate "ebt: STP filter support"
+	depends on BRIDGE_NF_EBTABLES
+	help
+	  This option adds the Spanning Tree Protocol match, which
+	  allows STP header field filtering.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config BRIDGE_EBT_VLAN
+	tristate "ebt: 802.1Q VLAN filter support"
+	depends on BRIDGE_NF_EBTABLES
+	help
+	  This option adds the 802.1Q vlan match, which allows the filtering of
+	  802.1Q vlan fields.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+#
+# targets
+#
+config BRIDGE_EBT_ARPREPLY
+	tristate "ebt: arp reply target support"
+	depends on BRIDGE_NF_EBTABLES
+	help
+	  This option adds the arp reply target, which allows
+	  automatically sending arp replies to arp requests.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config BRIDGE_EBT_DNAT
+	tristate "ebt: dnat target support"
+	depends on BRIDGE_NF_EBTABLES
+	help
+	  This option adds the MAC DNAT target, which allows altering the MAC
+	  destination address of frames.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config BRIDGE_EBT_MARK_T
+	tristate "ebt: mark target support"
+	depends on BRIDGE_NF_EBTABLES
+	help
+	  This option adds the mark target, which allows marking frames by
+	  setting the 'nfmark' value in the frame.
+	  This value is the same as the one used in the iptables mark match and
+	  target.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config BRIDGE_EBT_REDIRECT
+	tristate "ebt: redirect target support"
+	depends on BRIDGE_NF_EBTABLES
+	help
+	  This option adds the MAC redirect target, which allows altering the MAC
+	  destination address of a frame to that of the device it arrived on.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config BRIDGE_EBT_SNAT
+	tristate "ebt: snat target support"
+	depends on BRIDGE_NF_EBTABLES
+	help
+	  This option adds the MAC SNAT target, which allows altering the MAC
+	  source address of frames.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+#
+# watchers
+#
+config BRIDGE_EBT_LOG
+	tristate "ebt: log support"
+	depends on BRIDGE_NF_EBTABLES
+	help
+	  This option adds the log watcher, that you can use in any rule
+	  in any ebtables table. It records info about the frame header
+	  to the syslog.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config BRIDGE_EBT_ULOG
+	tristate "ebt: ulog support"
+	depends on BRIDGE_NF_EBTABLES
+	help
+	  This option adds the ulog watcher, that you can use in any rule
+	  in any ebtables table. The packet is passed to a userspace
+	  logging daemon using netlink multicast sockets. This differs
+	  from the log watcher in the sense that the complete packet is
+	  sent to userspace instead of a descriptive text and that
+	  netlink multicast sockets are used instead of the syslog.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+endmenu
diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile
new file mode 100644
index 00000000000..8bf6d9f6e9d
--- /dev/null
+++ b/net/bridge/netfilter/Makefile
@@ -0,0 +1,32 @@
+#
+# Makefile for the netfilter modules for Link Layer filtering on a bridge.
+#
+
+obj-$(CONFIG_BRIDGE_NF_EBTABLES) += ebtables.o
+
+# tables
+obj-$(CONFIG_BRIDGE_EBT_BROUTE) += ebtable_broute.o
+obj-$(CONFIG_BRIDGE_EBT_T_FILTER) += ebtable_filter.o
+obj-$(CONFIG_BRIDGE_EBT_T_NAT) += ebtable_nat.o
+
+#matches
+obj-$(CONFIG_BRIDGE_EBT_802_3) += ebt_802_3.o
+obj-$(CONFIG_BRIDGE_EBT_AMONG) += ebt_among.o
+obj-$(CONFIG_BRIDGE_EBT_ARP) += ebt_arp.o
+obj-$(CONFIG_BRIDGE_EBT_IP) += ebt_ip.o
+obj-$(CONFIG_BRIDGE_EBT_LIMIT) += ebt_limit.o
+obj-$(CONFIG_BRIDGE_EBT_MARK) += ebt_mark_m.o
+obj-$(CONFIG_BRIDGE_EBT_PKTTYPE) += ebt_pkttype.o
+obj-$(CONFIG_BRIDGE_EBT_STP) += ebt_stp.o
+obj-$(CONFIG_BRIDGE_EBT_VLAN) += ebt_vlan.o
+
+# targets
+obj-$(CONFIG_BRIDGE_EBT_ARPREPLY) += ebt_arpreply.o
+obj-$(CONFIG_BRIDGE_EBT_MARK_T) += ebt_mark.o
+obj-$(CONFIG_BRIDGE_EBT_DNAT) += ebt_dnat.o
+obj-$(CONFIG_BRIDGE_EBT_REDIRECT) += ebt_redirect.o
+obj-$(CONFIG_BRIDGE_EBT_SNAT) += ebt_snat.o
+
+# watchers
+obj-$(CONFIG_BRIDGE_EBT_LOG) += ebt_log.o
+obj-$(CONFIG_BRIDGE_EBT_LOG) += ebt_ulog.o
diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c
new file mode 100644
index 00000000000..468ebdf4bc1
--- /dev/null
+++ b/net/bridge/netfilter/ebt_802_3.c
@@ -0,0 +1,73 @@
+/*
+ * 802_3
+ *
+ * Author:
+ * Chris Vitale csv@bluetail.com
+ *
+ * May 2003
+ * 
+ */
+
+#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/netfilter_bridge/ebt_802_3.h>
+#include <linux/module.h>
+
+static int ebt_filter_802_3(const struct sk_buff *skb, const struct net_device *in,
+   const struct net_device *out, const void *data, unsigned int datalen)
+{
+	struct ebt_802_3_info *info = (struct ebt_802_3_info *)data;
+	struct ebt_802_3_hdr *hdr = ebt_802_3_hdr(skb);
+	uint16_t type = hdr->llc.ui.ctrl & IS_UI ? hdr->llc.ui.type : hdr->llc.ni.type;
+
+	if (info->bitmask & EBT_802_3_SAP) {
+		if (FWINV(info->sap != hdr->llc.ui.ssap, EBT_802_3_SAP)) 
+				return EBT_NOMATCH;
+		if (FWINV(info->sap != hdr->llc.ui.dsap, EBT_802_3_SAP))
+				return EBT_NOMATCH;
+	}
+
+	if (info->bitmask & EBT_802_3_TYPE) {
+		if (!(hdr->llc.ui.dsap == CHECK_TYPE && hdr->llc.ui.ssap == CHECK_TYPE))
+			return EBT_NOMATCH;
+		if (FWINV(info->type != type, EBT_802_3_TYPE)) 
+			return EBT_NOMATCH;
+	}
+
+	return EBT_MATCH;
+}
+
+static struct ebt_match filter_802_3;
+static int ebt_802_3_check(const char *tablename, unsigned int hookmask,
+   const struct ebt_entry *e, void *data, unsigned int datalen)
+{
+	struct ebt_802_3_info *info = (struct ebt_802_3_info *)data;
+
+	if (datalen < sizeof(struct ebt_802_3_info))
+		return -EINVAL;
+	if (info->bitmask & ~EBT_802_3_MASK || info->invflags & ~EBT_802_3_MASK)
+		return -EINVAL;
+
+	return 0;
+}
+
+static struct ebt_match filter_802_3 =
+{
+	.name		= EBT_802_3_MATCH,
+	.match		= ebt_filter_802_3,
+	.check		= ebt_802_3_check,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ebt_register_match(&filter_802_3);
+}
+
+static void __exit fini(void)
+{
+	ebt_unregister_match(&filter_802_3);
+}
+
+module_init(init);
+module_exit(fini);
+MODULE_LICENSE("GPL");
diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c
new file mode 100644
index 00000000000..5a1f5e3bff1
--- /dev/null
+++ b/net/bridge/netfilter/ebt_among.c
@@ -0,0 +1,228 @@
+/*
+ *  ebt_among
+ *
+ *	Authors:
+ *	Grzegorz Borowiak <grzes@gnu.univ.gda.pl>
+ *
+ *  August, 2003
+ *
+ */
+
+#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/netfilter_bridge/ebt_among.h>
+#include <linux/ip.h>
+#include <linux/if_arp.h>
+#include <linux/module.h>
+
+static int ebt_mac_wormhash_contains(const struct ebt_mac_wormhash *wh,
+				     const char *mac, uint32_t ip)
+{
+	/* You may be puzzled as to how this code works.
+	 * Some tricks were used, refer to 
+	 * 	include/linux/netfilter_bridge/ebt_among.h
+	 * as there you can find a solution of this mystery.
+	 */
+	const struct ebt_mac_wormhash_tuple *p;
+	int start, limit, i;
+	uint32_t cmp[2] = { 0, 0 };
+	int key = (const unsigned char) mac[5];
+
+	memcpy(((char *) cmp) + 2, mac, 6);
+	start = wh->table[key];
+	limit = wh->table[key + 1];
+	if (ip) {
+		for (i = start; i < limit; i++) {
+			p = &wh->pool[i];
+			if (cmp[1] == p->cmp[1] && cmp[0] == p->cmp[0]) {
+				if (p->ip == 0 || p->ip == ip) {
+					return 1;
+				}
+			}
+		}
+	} else {
+		for (i = start; i < limit; i++) {
+			p = &wh->pool[i];
+			if (cmp[1] == p->cmp[1] && cmp[0] == p->cmp[0]) {
+				if (p->ip == 0) {
+					return 1;
+				}
+			}
+		}
+	}
+	return 0;
+}
+
+static int ebt_mac_wormhash_check_integrity(const struct ebt_mac_wormhash
+					    *wh)
+{
+	int i;
+
+	for (i = 0; i < 256; i++) {
+		if (wh->table[i] > wh->table[i + 1])
+			return -0x100 - i;
+		if (wh->table[i] < 0)
+			return -0x200 - i;
+		if (wh->table[i] > wh->poolsize)
+			return -0x300 - i;
+	}
+	if (wh->table[256] > wh->poolsize)
+		return -0xc00;
+	return 0;
+}
+
+static int get_ip_dst(const struct sk_buff *skb, uint32_t *addr)
+{
+	if (eth_hdr(skb)->h_proto == htons(ETH_P_IP)) {
+		struct iphdr _iph, *ih;
+
+		ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
+		if (ih == NULL)
+			return -1;
+		*addr = ih->daddr;
+	} else if (eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) {
+		struct arphdr _arph, *ah;
+		uint32_t buf, *bp;
+
+		ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
+		if (ah == NULL ||
+		    ah->ar_pln != sizeof(uint32_t) ||
+		    ah->ar_hln != ETH_ALEN)
+			return -1;
+		bp = skb_header_pointer(skb, sizeof(struct arphdr) +
+					2 * ETH_ALEN + sizeof(uint32_t),
+					sizeof(uint32_t), &buf);
+		if (bp == NULL)
+			return -1;
+		*addr = *bp;
+	}
+	return 0;
+}
+
+static int get_ip_src(const struct sk_buff *skb, uint32_t *addr)
+{
+	if (eth_hdr(skb)->h_proto == htons(ETH_P_IP)) {
+		struct iphdr _iph, *ih;
+
+		ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
+		if (ih == NULL)
+			return -1;
+		*addr = ih->saddr;
+	} else if (eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) {
+		struct arphdr _arph, *ah;
+		uint32_t buf, *bp;
+
+		ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
+		if (ah == NULL ||
+		    ah->ar_pln != sizeof(uint32_t) ||
+		    ah->ar_hln != ETH_ALEN)
+			return -1;
+		bp = skb_header_pointer(skb, sizeof(struct arphdr) +
+					ETH_ALEN, sizeof(uint32_t), &buf);
+		if (bp == NULL)
+			return -1;
+		*addr = *bp;
+	}
+	return 0;
+}
+
+static int ebt_filter_among(const struct sk_buff *skb,
+			    const struct net_device *in,
+			    const struct net_device *out, const void *data,
+			    unsigned int datalen)
+{
+	struct ebt_among_info *info = (struct ebt_among_info *) data;
+	const char *dmac, *smac;
+	const struct ebt_mac_wormhash *wh_dst, *wh_src;
+	uint32_t dip = 0, sip = 0;
+
+	wh_dst = ebt_among_wh_dst(info);
+	wh_src = ebt_among_wh_src(info);
+
+	if (wh_src) {
+		smac = eth_hdr(skb)->h_source;
+		if (get_ip_src(skb, &sip))
+			return EBT_NOMATCH;
+		if (!(info->bitmask & EBT_AMONG_SRC_NEG)) {
+			/* we match only if it contains */
+			if (!ebt_mac_wormhash_contains(wh_src, smac, sip))
+				return EBT_NOMATCH;
+		} else {
+			/* we match only if it DOES NOT contain */
+			if (ebt_mac_wormhash_contains(wh_src, smac, sip))
+				return EBT_NOMATCH;
+		}
+	}
+
+	if (wh_dst) {
+		dmac = eth_hdr(skb)->h_dest;
+		if (get_ip_dst(skb, &dip))
+			return EBT_NOMATCH;
+		if (!(info->bitmask & EBT_AMONG_DST_NEG)) {
+			/* we match only if it contains */
+			if (!ebt_mac_wormhash_contains(wh_dst, dmac, dip))
+				return EBT_NOMATCH;
+		} else {
+			/* we match only if it DOES NOT contain */
+			if (ebt_mac_wormhash_contains(wh_dst, dmac, dip))
+				return EBT_NOMATCH;
+		}
+	}
+
+	return EBT_MATCH;
+}
+
+static int ebt_among_check(const char *tablename, unsigned int hookmask,
+			   const struct ebt_entry *e, void *data,
+			   unsigned int datalen)
+{
+	struct ebt_among_info *info = (struct ebt_among_info *) data;
+	int expected_length = sizeof(struct ebt_among_info);
+	const struct ebt_mac_wormhash *wh_dst, *wh_src;
+	int err;
+
+	wh_dst = ebt_among_wh_dst(info);
+	wh_src = ebt_among_wh_src(info);
+	expected_length += ebt_mac_wormhash_size(wh_dst);
+	expected_length += ebt_mac_wormhash_size(wh_src);
+
+	if (datalen != EBT_ALIGN(expected_length)) {
+		printk(KERN_WARNING
+		       "ebtables: among: wrong size: %d"
+		       "against expected %d, rounded to %Zd\n",
+		       datalen, expected_length,
+		       EBT_ALIGN(expected_length));
+		return -EINVAL;
+	}
+	if (wh_dst && (err = ebt_mac_wormhash_check_integrity(wh_dst))) {
+		printk(KERN_WARNING
+		       "ebtables: among: dst integrity fail: %x\n", -err);
+		return -EINVAL;
+	}
+	if (wh_src && (err = ebt_mac_wormhash_check_integrity(wh_src))) {
+		printk(KERN_WARNING
+		       "ebtables: among: src integrity fail: %x\n", -err);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static struct ebt_match filter_among = {
+	.name		= EBT_AMONG_MATCH, 
+	.match		= ebt_filter_among, 
+	.check		= ebt_among_check,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ebt_register_match(&filter_among);
+}
+
+static void __exit fini(void)
+{
+	ebt_unregister_match(&filter_among);
+}
+
+module_init(init);
+module_exit(fini);
+MODULE_LICENSE("GPL");
diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c
new file mode 100644
index 00000000000..b94c48cb6e4
--- /dev/null
+++ b/net/bridge/netfilter/ebt_arp.c
@@ -0,0 +1,140 @@
+/*
+ *  ebt_arp
+ *
+ *	Authors:
+ *	Bart De Schuymer <bdschuym@pandora.be>
+ *	Tim Gardner <timg@tpi.com>
+ *
+ *  April, 2002
+ *
+ */
+
+#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/netfilter_bridge/ebt_arp.h>
+#include <linux/if_arp.h>
+#include <linux/if_ether.h>
+#include <linux/module.h>
+
+static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in,
+   const struct net_device *out, const void *data, unsigned int datalen)
+{
+	struct ebt_arp_info *info = (struct ebt_arp_info *)data;
+	struct arphdr _arph, *ah;
+
+	ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
+	if (ah == NULL)
+		return EBT_NOMATCH;
+	if (info->bitmask & EBT_ARP_OPCODE && FWINV(info->opcode !=
+	   ah->ar_op, EBT_ARP_OPCODE))
+		return EBT_NOMATCH;
+	if (info->bitmask & EBT_ARP_HTYPE && FWINV(info->htype !=
+	   ah->ar_hrd, EBT_ARP_HTYPE))
+		return EBT_NOMATCH;
+	if (info->bitmask & EBT_ARP_PTYPE && FWINV(info->ptype !=
+	   ah->ar_pro, EBT_ARP_PTYPE))
+		return EBT_NOMATCH;
+
+	if (info->bitmask & (EBT_ARP_SRC_IP | EBT_ARP_DST_IP)) {
+		uint32_t _addr, *ap;
+
+		/* IPv4 addresses are always 4 bytes */
+		if (ah->ar_pln != sizeof(uint32_t))
+			return EBT_NOMATCH;
+		if (info->bitmask & EBT_ARP_SRC_IP) {
+			ap = skb_header_pointer(skb, sizeof(struct arphdr) +
+						ah->ar_hln, sizeof(_addr),
+						&_addr);
+			if (ap == NULL)
+				return EBT_NOMATCH;
+			if (FWINV(info->saddr != (*ap & info->smsk),
+			   EBT_ARP_SRC_IP))
+				return EBT_NOMATCH;
+		}
+
+		if (info->bitmask & EBT_ARP_DST_IP) {
+			ap = skb_header_pointer(skb, sizeof(struct arphdr) +
+						2*ah->ar_hln+sizeof(uint32_t),
+						sizeof(_addr), &_addr);
+			if (ap == NULL)
+				return EBT_NOMATCH;
+			if (FWINV(info->daddr != (*ap & info->dmsk),
+			   EBT_ARP_DST_IP))
+				return EBT_NOMATCH;
+		}
+	}
+
+	if (info->bitmask & (EBT_ARP_SRC_MAC | EBT_ARP_DST_MAC)) {
+		unsigned char _mac[ETH_ALEN], *mp;
+		uint8_t verdict, i;
+
+		/* MAC addresses are 6 bytes */
+		if (ah->ar_hln != ETH_ALEN)
+			return EBT_NOMATCH;
+		if (info->bitmask & EBT_ARP_SRC_MAC) {
+			mp = skb_header_pointer(skb, sizeof(struct arphdr),
+						sizeof(_mac), &_mac);
+			if (mp == NULL)
+				return EBT_NOMATCH;
+			verdict = 0;
+			for (i = 0; i < 6; i++)
+				verdict |= (mp[i] ^ info->smaddr[i]) &
+				       info->smmsk[i];
+			if (FWINV(verdict != 0, EBT_ARP_SRC_MAC))
+				return EBT_NOMATCH;
+		}
+
+		if (info->bitmask & EBT_ARP_DST_MAC) {
+			mp = skb_header_pointer(skb, sizeof(struct arphdr) +
+						ah->ar_hln + ah->ar_pln,
+						sizeof(_mac), &_mac);
+			if (mp == NULL)
+				return EBT_NOMATCH;
+			verdict = 0;
+			for (i = 0; i < 6; i++)
+				verdict |= (mp[i] ^ info->dmaddr[i]) &
+					info->dmmsk[i];
+			if (FWINV(verdict != 0, EBT_ARP_DST_MAC))
+				return EBT_NOMATCH;
+		}
+	}
+
+	return EBT_MATCH;
+}
+
+static int ebt_arp_check(const char *tablename, unsigned int hookmask,
+   const struct ebt_entry *e, void *data, unsigned int datalen)
+{
+	struct ebt_arp_info *info = (struct ebt_arp_info *)data;
+
+	if (datalen != EBT_ALIGN(sizeof(struct ebt_arp_info)))
+		return -EINVAL;
+	if ((e->ethproto != htons(ETH_P_ARP) &&
+	   e->ethproto != htons(ETH_P_RARP)) ||
+	   e->invflags & EBT_IPROTO)
+		return -EINVAL;
+	if (info->bitmask & ~EBT_ARP_MASK || info->invflags & ~EBT_ARP_MASK)
+		return -EINVAL;
+	return 0;
+}
+
+static struct ebt_match filter_arp =
+{
+	.name		= EBT_ARP_MATCH,
+	.match		= ebt_filter_arp,
+	.check		= ebt_arp_check,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ebt_register_match(&filter_arp);
+}
+
+static void __exit fini(void)
+{
+	ebt_unregister_match(&filter_arp);
+}
+
+module_init(init);
+module_exit(fini);
+MODULE_LICENSE("GPL");
diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
new file mode 100644
index 00000000000..b934de90f7c
--- /dev/null
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -0,0 +1,97 @@
+/*
+ *  ebt_arpreply
+ *
+ *	Authors:
+ *	Grzegorz Borowiak <grzes@gnu.univ.gda.pl>
+ *	Bart De Schuymer <bdschuym@pandora.be>
+ *
+ *  August, 2003
+ *
+ */
+
+#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/netfilter_bridge/ebt_arpreply.h>
+#include <linux/if_arp.h>
+#include <net/arp.h>
+#include <linux/module.h>
+
+static int ebt_target_reply(struct sk_buff **pskb, unsigned int hooknr,
+   const struct net_device *in, const struct net_device *out,
+   const void *data, unsigned int datalen)
+{
+	struct ebt_arpreply_info *info = (struct ebt_arpreply_info *)data;
+	u32 _sip, *siptr, _dip, *diptr;
+	struct arphdr _ah, *ap;
+	unsigned char _sha[ETH_ALEN], *shp;
+	struct sk_buff *skb = *pskb;
+
+	ap = skb_header_pointer(skb, 0, sizeof(_ah), &_ah);
+	if (ap == NULL)
+		return EBT_DROP;
+
+	if (ap->ar_op != htons(ARPOP_REQUEST) ||
+	    ap->ar_hln != ETH_ALEN ||
+	    ap->ar_pro != htons(ETH_P_IP) ||
+	    ap->ar_pln != 4)
+		return EBT_CONTINUE;
+
+	shp = skb_header_pointer(skb, sizeof(_ah), ETH_ALEN, &_sha);
+	if (shp == NULL)
+		return EBT_DROP;
+
+	siptr = skb_header_pointer(skb, sizeof(_ah) + ETH_ALEN,
+				   sizeof(_sip), &_sip);
+	if (siptr == NULL)
+		return EBT_DROP;
+
+	diptr = skb_header_pointer(skb,
+				   sizeof(_ah) + 2 * ETH_ALEN + sizeof(_sip),
+				   sizeof(_dip), &_dip);
+	if (diptr == NULL)
+		return EBT_DROP;
+
+	arp_send(ARPOP_REPLY, ETH_P_ARP, *siptr, (struct net_device *)in,
+	         *diptr, shp, info->mac, shp);
+
+	return info->target;
+}
+
+static int ebt_target_reply_check(const char *tablename, unsigned int hookmask,
+   const struct ebt_entry *e, void *data, unsigned int datalen)
+{
+	struct ebt_arpreply_info *info = (struct ebt_arpreply_info *)data;
+
+	if (datalen != EBT_ALIGN(sizeof(struct ebt_arpreply_info)))
+		return -EINVAL;
+	if (BASE_CHAIN && info->target == EBT_RETURN)
+		return -EINVAL;
+	if (e->ethproto != htons(ETH_P_ARP) ||
+	    e->invflags & EBT_IPROTO)
+		return -EINVAL;
+	CLEAR_BASE_CHAIN_BIT;
+	if (strcmp(tablename, "nat") || hookmask & ~(1 << NF_BR_PRE_ROUTING))
+		return -EINVAL;
+	return 0;
+}
+
+static struct ebt_target reply_target =
+{
+	.name		= EBT_ARPREPLY_TARGET,
+	.target		= ebt_target_reply,
+	.check		= ebt_target_reply_check,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ebt_register_target(&reply_target);
+}
+
+static void __exit fini(void)
+{
+	ebt_unregister_target(&reply_target);
+}
+
+module_init(init);
+module_exit(fini);
+MODULE_LICENSE("GPL");
diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c
new file mode 100644
index 00000000000..f5463086c7b
--- /dev/null
+++ b/net/bridge/netfilter/ebt_dnat.c
@@ -0,0 +1,76 @@
+/*
+ *  ebt_dnat
+ *
+ *	Authors:
+ *	Bart De Schuymer <bdschuym@pandora.be>
+ *
+ *  June, 2002
+ *
+ */
+
+#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/netfilter_bridge/ebt_nat.h>
+#include <linux/module.h>
+#include <net/sock.h>
+
+static int ebt_target_dnat(struct sk_buff **pskb, unsigned int hooknr,
+   const struct net_device *in, const struct net_device *out,
+   const void *data, unsigned int datalen)
+{
+	struct ebt_nat_info *info = (struct ebt_nat_info *)data;
+
+	if (skb_shared(*pskb) || skb_cloned(*pskb)) {
+		struct sk_buff *nskb;
+
+		nskb = skb_copy(*pskb, GFP_ATOMIC);
+		if (!nskb)
+			return NF_DROP;
+		if ((*pskb)->sk)
+			skb_set_owner_w(nskb, (*pskb)->sk);
+		kfree_skb(*pskb);
+		*pskb = nskb;
+	}
+	memcpy(eth_hdr(*pskb)->h_dest, info->mac, ETH_ALEN);
+	return info->target;
+}
+
+static int ebt_target_dnat_check(const char *tablename, unsigned int hookmask,
+   const struct ebt_entry *e, void *data, unsigned int datalen)
+{
+	struct ebt_nat_info *info = (struct ebt_nat_info *)data;
+
+	if (BASE_CHAIN && info->target == EBT_RETURN)
+		return -EINVAL;
+	CLEAR_BASE_CHAIN_BIT;
+	if ( (strcmp(tablename, "nat") ||
+	   (hookmask & ~((1 << NF_BR_PRE_ROUTING) | (1 << NF_BR_LOCAL_OUT)))) &&
+	   (strcmp(tablename, "broute") || hookmask & ~(1 << NF_BR_BROUTING)) )
+		return -EINVAL;
+	if (datalen != EBT_ALIGN(sizeof(struct ebt_nat_info)))
+		return -EINVAL;
+	if (INVALID_TARGET)
+		return -EINVAL;
+	return 0;
+}
+
+static struct ebt_target dnat =
+{
+	.name		= EBT_DNAT_TARGET,
+	.target		= ebt_target_dnat,
+	.check		= ebt_target_dnat_check,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ebt_register_target(&dnat);
+}
+
+static void __exit fini(void)
+{
+	ebt_unregister_target(&dnat);
+}
+
+module_init(init);
+module_exit(fini);
+MODULE_LICENSE("GPL");
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
new file mode 100644
index 00000000000..7323805b972
--- /dev/null
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -0,0 +1,122 @@
+/*
+ *  ebt_ip
+ *
+ *	Authors:
+ *	Bart De Schuymer <bdschuym@pandora.be>
+ *
+ *  April, 2002
+ *
+ *  Changes:
+ *    added ip-sport and ip-dport
+ *    Innominate Security Technologies AG <mhopf@innominate.com>
+ *    September, 2002
+ */
+
+#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/netfilter_bridge/ebt_ip.h>
+#include <linux/ip.h>
+#include <linux/in.h>
+#include <linux/module.h>
+
+struct tcpudphdr {
+	uint16_t src;
+	uint16_t dst;
+};
+
+static int ebt_filter_ip(const struct sk_buff *skb, const struct net_device *in,
+   const struct net_device *out, const void *data,
+   unsigned int datalen)
+{
+	struct ebt_ip_info *info = (struct ebt_ip_info *)data;
+	struct iphdr _iph, *ih;
+	struct tcpudphdr _ports, *pptr;
+
+	ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
+	if (ih == NULL)
+		return EBT_NOMATCH;
+	if (info->bitmask & EBT_IP_TOS &&
+	   FWINV(info->tos != ih->tos, EBT_IP_TOS))
+		return EBT_NOMATCH;
+	if (info->bitmask & EBT_IP_SOURCE &&
+	   FWINV((ih->saddr & info->smsk) !=
+	   info->saddr, EBT_IP_SOURCE))
+		return EBT_NOMATCH;
+	if ((info->bitmask & EBT_IP_DEST) &&
+	   FWINV((ih->daddr & info->dmsk) !=
+	   info->daddr, EBT_IP_DEST))
+		return EBT_NOMATCH;
+	if (info->bitmask & EBT_IP_PROTO) {
+		if (FWINV(info->protocol != ih->protocol, EBT_IP_PROTO))
+			return EBT_NOMATCH;
+		if (!(info->bitmask & EBT_IP_DPORT) &&
+		    !(info->bitmask & EBT_IP_SPORT))
+			return EBT_MATCH;
+		pptr = skb_header_pointer(skb, ih->ihl*4,
+					  sizeof(_ports), &_ports);
+		if (pptr == NULL)
+			return EBT_NOMATCH;
+		if (info->bitmask & EBT_IP_DPORT) {
+			u32 dst = ntohs(pptr->dst);
+			if (FWINV(dst < info->dport[0] ||
+			          dst > info->dport[1],
+			          EBT_IP_DPORT))
+			return EBT_NOMATCH;
+		}
+		if (info->bitmask & EBT_IP_SPORT) {
+			u32 src = ntohs(pptr->src);
+			if (FWINV(src < info->sport[0] ||
+			          src > info->sport[1],
+			          EBT_IP_SPORT))
+			return EBT_NOMATCH;
+		}
+	}
+	return EBT_MATCH;
+}
+
+static int ebt_ip_check(const char *tablename, unsigned int hookmask,
+   const struct ebt_entry *e, void *data, unsigned int datalen)
+{
+	struct ebt_ip_info *info = (struct ebt_ip_info *)data;
+
+	if (datalen != EBT_ALIGN(sizeof(struct ebt_ip_info)))
+		return -EINVAL;
+	if (e->ethproto != htons(ETH_P_IP) ||
+	   e->invflags & EBT_IPROTO)
+		return -EINVAL;
+	if (info->bitmask & ~EBT_IP_MASK || info->invflags & ~EBT_IP_MASK)
+		return -EINVAL;
+	if (info->bitmask & (EBT_IP_DPORT | EBT_IP_SPORT)) {
+		if (info->invflags & EBT_IP_PROTO)
+			return -EINVAL;
+		if (info->protocol != IPPROTO_TCP &&
+		    info->protocol != IPPROTO_UDP)
+			 return -EINVAL;
+	}
+	if (info->bitmask & EBT_IP_DPORT && info->dport[0] > info->dport[1])
+		return -EINVAL;
+	if (info->bitmask & EBT_IP_SPORT && info->sport[0] > info->sport[1])
+		return -EINVAL;
+	return 0;
+}
+
+static struct ebt_match filter_ip =
+{
+	.name		= EBT_IP_MATCH,
+	.match		= ebt_filter_ip,
+	.check		= ebt_ip_check,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ebt_register_match(&filter_ip);
+}
+
+static void __exit fini(void)
+{
+	ebt_unregister_match(&filter_ip);
+}
+
+module_init(init);
+module_exit(fini);
+MODULE_LICENSE("GPL");
diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c
new file mode 100644
index 00000000000..637c8844cd5
--- /dev/null
+++ b/net/bridge/netfilter/ebt_limit.c
@@ -0,0 +1,113 @@
+/*
+ *  ebt_limit
+ *
+ *	Authors:
+ *	Tom Marshall <tommy@home.tig-grr.com>
+ *
+ *	Mostly copied from netfilter's ipt_limit.c, see that file for
+ *	more explanation
+ *
+ *  September, 2003
+ *
+ */
+
+#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/netfilter_bridge/ebt_limit.h>
+#include <linux/module.h>
+
+#include <linux/netdevice.h>
+#include <linux/spinlock.h>
+
+static DEFINE_SPINLOCK(limit_lock);
+
+#define MAX_CPJ (0xFFFFFFFF / (HZ*60*60*24))
+
+#define _POW2_BELOW2(x) ((x)|((x)>>1))
+#define _POW2_BELOW4(x) (_POW2_BELOW2(x)|_POW2_BELOW2((x)>>2))
+#define _POW2_BELOW8(x) (_POW2_BELOW4(x)|_POW2_BELOW4((x)>>4))
+#define _POW2_BELOW16(x) (_POW2_BELOW8(x)|_POW2_BELOW8((x)>>8))
+#define _POW2_BELOW32(x) (_POW2_BELOW16(x)|_POW2_BELOW16((x)>>16))
+#define POW2_BELOW32(x) ((_POW2_BELOW32(x)>>1) + 1)
+
+#define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
+
+static int ebt_limit_match(const struct sk_buff *skb,
+   const struct net_device *in, const struct net_device *out,
+   const void *data, unsigned int datalen)
+{
+	struct ebt_limit_info *info = (struct ebt_limit_info *)data;
+	unsigned long now = jiffies;
+
+	spin_lock_bh(&limit_lock);
+	info->credit += (now - xchg(&info->prev, now)) * CREDITS_PER_JIFFY;
+	if (info->credit > info->credit_cap)
+		info->credit = info->credit_cap;
+
+	if (info->credit >= info->cost) {
+		/* We're not limited. */
+		info->credit -= info->cost;
+		spin_unlock_bh(&limit_lock);
+		return EBT_MATCH;
+	}
+
+	spin_unlock_bh(&limit_lock);
+	return EBT_NOMATCH;
+}
+
+/* Precision saver. */
+static u_int32_t
+user2credits(u_int32_t user)
+{
+	/* If multiplying would overflow... */
+	if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY))
+		/* Divide first. */
+		return (user / EBT_LIMIT_SCALE) * HZ * CREDITS_PER_JIFFY;
+
+	return (user * HZ * CREDITS_PER_JIFFY) / EBT_LIMIT_SCALE;
+}
+
+static int ebt_limit_check(const char *tablename, unsigned int hookmask,
+   const struct ebt_entry *e, void *data, unsigned int datalen)
+{
+	struct ebt_limit_info *info = (struct ebt_limit_info *)data;
+
+	if (datalen != EBT_ALIGN(sizeof(struct ebt_limit_info)))
+		return -EINVAL;
+
+	/* Check for overflow. */
+	if (info->burst == 0 ||
+	    user2credits(info->avg * info->burst) < user2credits(info->avg)) {
+		printk("Overflow in ebt_limit, try lower: %u/%u\n",
+			info->avg, info->burst);
+		return -EINVAL;
+	}
+
+	/* User avg in seconds * EBT_LIMIT_SCALE: convert to jiffies * 128. */
+	info->prev = jiffies;
+	info->credit = user2credits(info->avg * info->burst);
+	info->credit_cap = user2credits(info->avg * info->burst);
+	info->cost = user2credits(info->avg);
+	return 0;
+}
+
+static struct ebt_match ebt_limit_reg =
+{
+	.name		= EBT_LIMIT_MATCH,
+	.match		= ebt_limit_match,
+	.check		= ebt_limit_check,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ebt_register_match(&ebt_limit_reg);
+}
+
+static void __exit fini(void)
+{
+	ebt_unregister_match(&ebt_limit_reg);
+}
+
+module_init(init);
+module_exit(fini);
+MODULE_LICENSE("GPL");
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
new file mode 100644
index 00000000000..e4ae34b8892
--- /dev/null
+++ b/net/bridge/netfilter/ebt_log.c
@@ -0,0 +1,171 @@
+/*
+ *  ebt_log
+ *
+ *	Authors:
+ *	Bart De Schuymer <bdschuym@pandora.be>
+ *
+ *  April, 2002
+ *
+ */
+
+#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/netfilter_bridge/ebt_log.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/if_arp.h>
+#include <linux/spinlock.h>
+
+static DEFINE_SPINLOCK(ebt_log_lock);
+
+static int ebt_log_check(const char *tablename, unsigned int hookmask,
+   const struct ebt_entry *e, void *data, unsigned int datalen)
+{
+	struct ebt_log_info *info = (struct ebt_log_info *)data;
+
+	if (datalen != EBT_ALIGN(sizeof(struct ebt_log_info)))
+		return -EINVAL;
+	if (info->bitmask & ~EBT_LOG_MASK)
+		return -EINVAL;
+	if (info->loglevel >= 8)
+		return -EINVAL;
+	info->prefix[EBT_LOG_PREFIX_SIZE - 1] = '\0';
+	return 0;
+}
+
+struct tcpudphdr
+{
+	uint16_t src;
+	uint16_t dst;
+};
+
+struct arppayload
+{
+	unsigned char mac_src[ETH_ALEN];
+	unsigned char ip_src[4];
+	unsigned char mac_dst[ETH_ALEN];
+	unsigned char ip_dst[4];
+};
+
+static void print_MAC(unsigned char *p)
+{
+	int i;
+
+	for (i = 0; i < ETH_ALEN; i++, p++)
+		printk("%02x%c", *p, i == ETH_ALEN - 1 ? ' ':':');
+}
+
+#define myNIPQUAD(a) a[0], a[1], a[2], a[3]
+static void ebt_log(const struct sk_buff *skb, unsigned int hooknr,
+   const struct net_device *in, const struct net_device *out,
+   const void *data, unsigned int datalen)
+{
+	struct ebt_log_info *info = (struct ebt_log_info *)data;
+	char level_string[4] = "< >";
+	union {struct iphdr iph; struct tcpudphdr ports;
+	       struct arphdr arph; struct arppayload arpp;} u;
+
+	level_string[1] = '0' + info->loglevel;
+	spin_lock_bh(&ebt_log_lock);
+	printk(level_string);
+	printk("%s IN=%s OUT=%s ", info->prefix, in ? in->name : "",
+	   out ? out->name : "");
+
+	printk("MAC source = ");
+	print_MAC(eth_hdr(skb)->h_source);
+	printk("MAC dest = ");
+	print_MAC(eth_hdr(skb)->h_dest);
+
+	printk("proto = 0x%04x", ntohs(eth_hdr(skb)->h_proto));
+
+	if ((info->bitmask & EBT_LOG_IP) && eth_hdr(skb)->h_proto ==
+	   htons(ETH_P_IP)){
+		struct iphdr _iph, *ih;
+
+		ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
+		if (ih == NULL) {
+			printk(" INCOMPLETE IP header");
+			goto out;
+		}
+		printk(" IP SRC=%u.%u.%u.%u IP DST=%u.%u.%u.%u,",
+		   NIPQUAD(ih->saddr), NIPQUAD(ih->daddr));
+		printk(" IP tos=0x%02X, IP proto=%d", u.iph.tos,
+		       ih->protocol);
+		if (ih->protocol == IPPROTO_TCP ||
+		    ih->protocol == IPPROTO_UDP) {
+			struct tcpudphdr _ports, *pptr;
+
+			pptr = skb_header_pointer(skb, ih->ihl*4,
+						  sizeof(_ports), &_ports);
+			if (pptr == NULL) {
+				printk(" INCOMPLETE TCP/UDP header");
+				goto out;
+			}
+			printk(" SPT=%u DPT=%u", ntohs(pptr->src),
+			   ntohs(pptr->dst));
+		}
+		goto out;
+	}
+
+	if ((info->bitmask & EBT_LOG_ARP) &&
+	    ((eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) ||
+	     (eth_hdr(skb)->h_proto == htons(ETH_P_RARP)))) {
+		struct arphdr _arph, *ah;
+
+		ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
+		if (ah == NULL) {
+			printk(" INCOMPLETE ARP header");
+			goto out;
+		}
+		printk(" ARP HTYPE=%d, PTYPE=0x%04x, OPCODE=%d",
+		       ntohs(ah->ar_hrd), ntohs(ah->ar_pro),
+		       ntohs(ah->ar_op));
+
+		/* If it's for Ethernet and the lengths are OK,
+		 * then log the ARP payload */
+		if (ah->ar_hrd == htons(1) &&
+		    ah->ar_hln == ETH_ALEN &&
+		    ah->ar_pln == sizeof(uint32_t)) {
+			struct arppayload _arpp, *ap;
+
+			ap = skb_header_pointer(skb, sizeof(u.arph),
+						sizeof(_arpp), &_arpp);
+			if (ap == NULL) {
+				printk(" INCOMPLETE ARP payload");
+				goto out;
+			}
+			printk(" ARP MAC SRC=");
+			print_MAC(ap->mac_src);
+			printk(" ARP IP SRC=%u.%u.%u.%u",
+			       myNIPQUAD(ap->ip_src));
+			printk(" ARP MAC DST=");
+			print_MAC(ap->mac_dst);
+			printk(" ARP IP DST=%u.%u.%u.%u",
+			       myNIPQUAD(ap->ip_dst));
+		}
+	}
+out:
+	printk("\n");
+	spin_unlock_bh(&ebt_log_lock);
+}
+
+static struct ebt_watcher log =
+{
+	.name		= EBT_LOG_WATCHER,
+	.watcher	= ebt_log,
+	.check		= ebt_log_check,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ebt_register_watcher(&log);
+}
+
+static void __exit fini(void)
+{
+	ebt_unregister_watcher(&log);
+}
+
+module_init(init);
+module_exit(fini);
+MODULE_LICENSE("GPL");
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
new file mode 100644
index 00000000000..02c632b4d32
--- /dev/null
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -0,0 +1,68 @@
+/*
+ *  ebt_mark
+ *
+ *	Authors:
+ *	Bart De Schuymer <bdschuym@pandora.be>
+ *
+ *  July, 2002
+ *
+ */
+
+/* The mark target can be used in any chain,
+ * I believe adding a mangle table just for marking is total overkill.
+ * Marking a frame doesn't really change anything in the frame anyway.
+ */
+
+#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/netfilter_bridge/ebt_mark_t.h>
+#include <linux/module.h>
+
+static int ebt_target_mark(struct sk_buff **pskb, unsigned int hooknr,
+   const struct net_device *in, const struct net_device *out,
+   const void *data, unsigned int datalen)
+{
+	struct ebt_mark_t_info *info = (struct ebt_mark_t_info *)data;
+
+	if ((*pskb)->nfmark != info->mark) {
+		(*pskb)->nfmark = info->mark;
+		(*pskb)->nfcache |= NFC_ALTERED;
+	}
+	return info->target;
+}
+
+static int ebt_target_mark_check(const char *tablename, unsigned int hookmask,
+   const struct ebt_entry *e, void *data, unsigned int datalen)
+{
+	struct ebt_mark_t_info *info = (struct ebt_mark_t_info *)data;
+
+	if (datalen != EBT_ALIGN(sizeof(struct ebt_mark_t_info)))
+		return -EINVAL;
+	if (BASE_CHAIN && info->target == EBT_RETURN)
+		return -EINVAL;
+	CLEAR_BASE_CHAIN_BIT;
+	if (INVALID_TARGET)
+		return -EINVAL;
+	return 0;
+}
+
+static struct ebt_target mark_target =
+{
+	.name		= EBT_MARK_TARGET,
+	.target		= ebt_target_mark,
+	.check		= ebt_target_mark_check,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ebt_register_target(&mark_target);
+}
+
+static void __exit fini(void)
+{
+	ebt_unregister_target(&mark_target);
+}
+
+module_init(init);
+module_exit(fini);
+MODULE_LICENSE("GPL");
diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c
new file mode 100644
index 00000000000..625102de149
--- /dev/null
+++ b/net/bridge/netfilter/ebt_mark_m.c
@@ -0,0 +1,62 @@
+/*
+ *  ebt_mark_m
+ *
+ *	Authors:
+ *	Bart De Schuymer <bdschuym@pandora.be>
+ *
+ *  July, 2002
+ *
+ */
+
+#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/netfilter_bridge/ebt_mark_m.h>
+#include <linux/module.h>
+
+static int ebt_filter_mark(const struct sk_buff *skb,
+   const struct net_device *in, const struct net_device *out, const void *data,
+   unsigned int datalen)
+{
+	struct ebt_mark_m_info *info = (struct ebt_mark_m_info *) data;
+
+	if (info->bitmask & EBT_MARK_OR)
+		return !(!!(skb->nfmark & info->mask) ^ info->invert);
+	return !(((skb->nfmark & info->mask) == info->mark) ^ info->invert);
+}
+
+static int ebt_mark_check(const char *tablename, unsigned int hookmask,
+   const struct ebt_entry *e, void *data, unsigned int datalen)
+{
+        struct ebt_mark_m_info *info = (struct ebt_mark_m_info *) data;
+
+	if (datalen != EBT_ALIGN(sizeof(struct ebt_mark_m_info)))
+		return -EINVAL;
+	if (info->bitmask & ~EBT_MARK_MASK)
+		return -EINVAL;
+	if ((info->bitmask & EBT_MARK_OR) && (info->bitmask & EBT_MARK_AND))
+		return -EINVAL;
+	if (!info->bitmask)
+		return -EINVAL;
+	return 0;
+}
+
+static struct ebt_match filter_mark =
+{
+	.name		= EBT_MARK_MATCH,
+	.match		= ebt_filter_mark,
+	.check		= ebt_mark_check,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ebt_register_match(&filter_mark);
+}
+
+static void __exit fini(void)
+{
+	ebt_unregister_match(&filter_mark);
+}
+
+module_init(init);
+module_exit(fini);
+MODULE_LICENSE("GPL");
diff --git a/net/bridge/netfilter/ebt_pkttype.c b/net/bridge/netfilter/ebt_pkttype.c
new file mode 100644
index 00000000000..ecd3b42b19b
--- /dev/null
+++ b/net/bridge/netfilter/ebt_pkttype.c
@@ -0,0 +1,59 @@
+/*
+ *  ebt_pkttype
+ *
+ *	Authors:
+ *	Bart De Schuymer <bdschuym@pandora.be>
+ *
+ *  April, 2003
+ *
+ */
+
+#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/netfilter_bridge/ebt_pkttype.h>
+#include <linux/module.h>
+
+static int ebt_filter_pkttype(const struct sk_buff *skb,
+   const struct net_device *in,
+   const struct net_device *out,
+   const void *data,
+   unsigned int datalen)
+{
+	struct ebt_pkttype_info *info = (struct ebt_pkttype_info *)data;
+
+	return (skb->pkt_type != info->pkt_type) ^ info->invert;
+}
+
+static int ebt_pkttype_check(const char *tablename, unsigned int hookmask,
+   const struct ebt_entry *e, void *data, unsigned int datalen)
+{
+	struct ebt_pkttype_info *info = (struct ebt_pkttype_info *)data;
+
+	if (datalen != EBT_ALIGN(sizeof(struct ebt_pkttype_info)))
+		return -EINVAL;
+	if (info->invert != 0 && info->invert != 1)
+		return -EINVAL;
+	/* Allow any pkt_type value */
+	return 0;
+}
+
+static struct ebt_match filter_pkttype =
+{
+	.name		= EBT_PKTTYPE_MATCH,
+	.match		= ebt_filter_pkttype,
+	.check		= ebt_pkttype_check,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ebt_register_match(&filter_pkttype);
+}
+
+static void __exit fini(void)
+{
+	ebt_unregister_match(&filter_pkttype);
+}
+
+module_init(init);
+module_exit(fini);
+MODULE_LICENSE("GPL");
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
new file mode 100644
index 00000000000..1538b438666
--- /dev/null
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -0,0 +1,81 @@
+/*
+ *  ebt_redirect
+ *
+ *	Authors:
+ *	Bart De Schuymer <bdschuym@pandora.be>
+ *
+ *  April, 2002
+ *
+ */
+
+#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/netfilter_bridge/ebt_redirect.h>
+#include <linux/module.h>
+#include <net/sock.h>
+#include "../br_private.h"
+
+static int ebt_target_redirect(struct sk_buff **pskb, unsigned int hooknr,
+   const struct net_device *in, const struct net_device *out,
+   const void *data, unsigned int datalen)
+{
+	struct ebt_redirect_info *info = (struct ebt_redirect_info *)data;
+
+	if (skb_shared(*pskb) || skb_cloned(*pskb)) {
+		struct sk_buff *nskb;
+
+		nskb = skb_copy(*pskb, GFP_ATOMIC);
+		if (!nskb)
+			return NF_DROP;
+		if ((*pskb)->sk)
+			skb_set_owner_w(nskb, (*pskb)->sk);
+		kfree_skb(*pskb);
+		*pskb = nskb;
+	}
+	if (hooknr != NF_BR_BROUTING)
+		memcpy(eth_hdr(*pskb)->h_dest,
+		       in->br_port->br->dev->dev_addr, ETH_ALEN);
+	else
+		memcpy(eth_hdr(*pskb)->h_dest, in->dev_addr, ETH_ALEN);
+	(*pskb)->pkt_type = PACKET_HOST;
+	return info->target;
+}
+
+static int ebt_target_redirect_check(const char *tablename, unsigned int hookmask,
+   const struct ebt_entry *e, void *data, unsigned int datalen)
+{
+	struct ebt_redirect_info *info = (struct ebt_redirect_info *)data;
+
+	if (datalen != EBT_ALIGN(sizeof(struct ebt_redirect_info)))
+		return -EINVAL;
+	if (BASE_CHAIN && info->target == EBT_RETURN)
+		return -EINVAL;
+	CLEAR_BASE_CHAIN_BIT;
+	if ( (strcmp(tablename, "nat") || hookmask & ~(1 << NF_BR_PRE_ROUTING)) &&
+	     (strcmp(tablename, "broute") || hookmask & ~(1 << NF_BR_BROUTING)) )
+		return -EINVAL;
+	if (INVALID_TARGET)
+		return -EINVAL;
+	return 0;
+}
+
+static struct ebt_target redirect_target =
+{
+	.name		= EBT_REDIRECT_TARGET,
+	.target		= ebt_target_redirect,
+	.check		= ebt_target_redirect_check,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ebt_register_target(&redirect_target);
+}
+
+static void __exit fini(void)
+{
+	ebt_unregister_target(&redirect_target);
+}
+
+module_init(init);
+module_exit(fini);
+MODULE_LICENSE("GPL");
diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c
new file mode 100644
index 00000000000..1529bdcb9a4
--- /dev/null
+++ b/net/bridge/netfilter/ebt_snat.c
@@ -0,0 +1,76 @@
+/*
+ *  ebt_snat
+ *
+ *	Authors:
+ *	Bart De Schuymer <bdschuym@pandora.be>
+ *
+ *  June, 2002
+ *
+ */
+
+#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/netfilter_bridge/ebt_nat.h>
+#include <linux/module.h>
+#include <net/sock.h>
+
+static int ebt_target_snat(struct sk_buff **pskb, unsigned int hooknr,
+   const struct net_device *in, const struct net_device *out,
+   const void *data, unsigned int datalen)
+{
+	struct ebt_nat_info *info = (struct ebt_nat_info *) data;
+
+	if (skb_shared(*pskb) || skb_cloned(*pskb)) {
+		struct sk_buff *nskb;
+
+		nskb = skb_copy(*pskb, GFP_ATOMIC);
+		if (!nskb)
+			return NF_DROP;
+		if ((*pskb)->sk)
+			skb_set_owner_w(nskb, (*pskb)->sk);
+		kfree_skb(*pskb);
+		*pskb = nskb;
+	}
+	memcpy(eth_hdr(*pskb)->h_source, info->mac, ETH_ALEN);
+	return info->target;
+}
+
+static int ebt_target_snat_check(const char *tablename, unsigned int hookmask,
+   const struct ebt_entry *e, void *data, unsigned int datalen)
+{
+	struct ebt_nat_info *info = (struct ebt_nat_info *) data;
+
+	if (datalen != EBT_ALIGN(sizeof(struct ebt_nat_info)))
+		return -EINVAL;
+	if (BASE_CHAIN && info->target == EBT_RETURN)
+		return -EINVAL;
+	CLEAR_BASE_CHAIN_BIT;
+	if (strcmp(tablename, "nat"))
+		return -EINVAL;
+	if (hookmask & ~(1 << NF_BR_POST_ROUTING))
+		return -EINVAL;
+	if (INVALID_TARGET)
+		return -EINVAL;
+	return 0;
+}
+
+static struct ebt_target snat =
+{
+	.name		= EBT_SNAT_TARGET,
+	.target		= ebt_target_snat,
+	.check		= ebt_target_snat_check,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ebt_register_target(&snat);
+}
+
+static void __exit fini(void)
+{
+	ebt_unregister_target(&snat);
+}
+
+module_init(init);
+module_exit(fini);
+MODULE_LICENSE("GPL");
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
new file mode 100644
index 00000000000..f8a8cdec16e
--- /dev/null
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -0,0 +1,194 @@
+/*
+ *  ebt_stp
+ *
+ *	Authors:
+ *	Bart De Schuymer <bdschuym@pandora.be>
+ *	Stephen Hemminger <shemminger@osdl.org>
+ *
+ *  July, 2003
+ */
+
+#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/netfilter_bridge/ebt_stp.h>
+#include <linux/module.h>
+
+#define BPDU_TYPE_CONFIG 0
+#define BPDU_TYPE_TCN 0x80
+
+struct stp_header {
+	uint8_t dsap;
+	uint8_t ssap;
+	uint8_t ctrl;
+	uint8_t pid;
+	uint8_t vers;
+	uint8_t type;
+};
+
+struct stp_config_pdu {
+	uint8_t flags;
+	uint8_t root[8];
+	uint8_t root_cost[4];
+	uint8_t sender[8];
+	uint8_t port[2];
+	uint8_t msg_age[2];
+	uint8_t max_age[2];
+	uint8_t hello_time[2];
+	uint8_t forward_delay[2];
+};
+
+#define NR16(p) (p[0] << 8 | p[1])
+#define NR32(p) ((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3])
+
+static int ebt_filter_config(struct ebt_stp_info *info,
+   struct stp_config_pdu *stpc)
+{
+	struct ebt_stp_config_info *c;
+	uint16_t v16;
+	uint32_t v32;
+	int verdict, i;
+
+	c = &info->config;
+	if ((info->bitmask & EBT_STP_FLAGS) &&
+	    FWINV(c->flags != stpc->flags, EBT_STP_FLAGS))
+		return EBT_NOMATCH;
+	if (info->bitmask & EBT_STP_ROOTPRIO) {
+		v16 = NR16(stpc->root);
+		if (FWINV(v16 < c->root_priol ||
+		    v16 > c->root_priou, EBT_STP_ROOTPRIO))
+			return EBT_NOMATCH;
+	}
+	if (info->bitmask & EBT_STP_ROOTADDR) {
+		verdict = 0;
+		for (i = 0; i < 6; i++)
+			verdict |= (stpc->root[2+i] ^ c->root_addr[i]) &
+			           c->root_addrmsk[i];
+		if (FWINV(verdict != 0, EBT_STP_ROOTADDR))
+			return EBT_NOMATCH;
+	}
+	if (info->bitmask & EBT_STP_ROOTCOST) {
+		v32 = NR32(stpc->root_cost);
+		if (FWINV(v32 < c->root_costl ||
+		    v32 > c->root_costu, EBT_STP_ROOTCOST))
+			return EBT_NOMATCH;
+	}
+	if (info->bitmask & EBT_STP_SENDERPRIO) {
+		v16 = NR16(stpc->sender);
+		if (FWINV(v16 < c->sender_priol ||
+		    v16 > c->sender_priou, EBT_STP_SENDERPRIO))
+			return EBT_NOMATCH;
+	}
+	if (info->bitmask & EBT_STP_SENDERADDR) {
+		verdict = 0;
+		for (i = 0; i < 6; i++)
+			verdict |= (stpc->sender[2+i] ^ c->sender_addr[i]) &
+			           c->sender_addrmsk[i];
+		if (FWINV(verdict != 0, EBT_STP_SENDERADDR))
+			return EBT_NOMATCH;
+	}
+	if (info->bitmask & EBT_STP_PORT) {
+		v16 = NR16(stpc->port);
+		if (FWINV(v16 < c->portl ||
+		    v16 > c->portu, EBT_STP_PORT))
+			return EBT_NOMATCH;
+	}
+	if (info->bitmask & EBT_STP_MSGAGE) {
+		v16 = NR16(stpc->msg_age);
+		if (FWINV(v16 < c->msg_agel ||
+		    v16 > c->msg_ageu, EBT_STP_MSGAGE))
+			return EBT_NOMATCH;
+	}
+	if (info->bitmask & EBT_STP_MAXAGE) {
+		v16 = NR16(stpc->max_age);
+		if (FWINV(v16 < c->max_agel ||
+		    v16 > c->max_ageu, EBT_STP_MAXAGE))
+			return EBT_NOMATCH;
+	}
+	if (info->bitmask & EBT_STP_HELLOTIME) {
+		v16 = NR16(stpc->hello_time);
+		if (FWINV(v16 < c->hello_timel ||
+		    v16 > c->hello_timeu, EBT_STP_HELLOTIME))
+			return EBT_NOMATCH;
+	}
+	if (info->bitmask & EBT_STP_FWDD) {
+		v16 = NR16(stpc->forward_delay);
+		if (FWINV(v16 < c->forward_delayl ||
+		    v16 > c->forward_delayu, EBT_STP_FWDD))
+			return EBT_NOMATCH;
+	}
+	return EBT_MATCH;
+}
+
+static int ebt_filter_stp(const struct sk_buff *skb, const struct net_device *in,
+   const struct net_device *out, const void *data, unsigned int datalen)
+{
+	struct ebt_stp_info *info = (struct ebt_stp_info *)data;
+	struct stp_header _stph, *sp;
+	uint8_t header[6] = {0x42, 0x42, 0x03, 0x00, 0x00, 0x00};
+
+	sp = skb_header_pointer(skb, 0, sizeof(_stph), &_stph);
+	if (sp == NULL)
+		return EBT_NOMATCH;
+
+	/* The stp code only considers these */
+	if (memcmp(sp, header, sizeof(header)))
+		return EBT_NOMATCH;
+
+	if (info->bitmask & EBT_STP_TYPE
+	    && FWINV(info->type != sp->type, EBT_STP_TYPE))
+		return EBT_NOMATCH;
+
+	if (sp->type == BPDU_TYPE_CONFIG &&
+	    info->bitmask & EBT_STP_CONFIG_MASK) {
+		struct stp_config_pdu _stpc, *st;
+
+		st = skb_header_pointer(skb, sizeof(_stph),
+					sizeof(_stpc), &_stpc);
+		if (st == NULL)
+			return EBT_NOMATCH;
+		return ebt_filter_config(info, st);
+	}
+	return EBT_MATCH;
+}
+
+static int ebt_stp_check(const char *tablename, unsigned int hookmask,
+   const struct ebt_entry *e, void *data, unsigned int datalen)
+{
+	struct ebt_stp_info *info = (struct ebt_stp_info *)data;
+	int len = EBT_ALIGN(sizeof(struct ebt_stp_info));
+	uint8_t bridge_ula[6] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
+	uint8_t msk[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+
+	if (info->bitmask & ~EBT_STP_MASK || info->invflags & ~EBT_STP_MASK ||
+	    !(info->bitmask & EBT_STP_MASK))
+		return -EINVAL;
+	if (datalen != len)
+		return -EINVAL;
+	/* Make sure the match only receives stp frames */
+	if (memcmp(e->destmac, bridge_ula, ETH_ALEN) ||
+	    memcmp(e->destmsk, msk, ETH_ALEN) || !(e->bitmask & EBT_DESTMAC))
+		return -EINVAL;
+
+	return 0;
+}
+
+static struct ebt_match filter_stp =
+{
+	.name		= EBT_STP_MATCH,
+	.match		= ebt_filter_stp,
+	.check		= ebt_stp_check,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ebt_register_match(&filter_stp);
+}
+
+static void __exit fini(void)
+{
+	ebt_unregister_match(&filter_stp);
+}
+
+module_init(init);
+module_exit(fini);
+MODULE_LICENSE("GPL");
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
new file mode 100644
index 00000000000..01af4fcef26
--- /dev/null
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -0,0 +1,295 @@
+/*
+ * netfilter module for userspace bridged Ethernet frames logging daemons
+ *
+ *	Authors:
+ *	Bart De Schuymer <bdschuym@pandora.be>
+ *
+ *  November, 2004
+ *
+ * Based on ipt_ULOG.c, which is
+ * (C) 2000-2002 by Harald Welte <laforge@netfilter.org>
+ *
+ * This module accepts two parameters: 
+ * 
+ * nlbufsiz:
+ *   The parameter specifies how big the buffer for each netlink multicast
+ * group is. e.g. If you say nlbufsiz=8192, up to eight kb of packets will
+ * get accumulated in the kernel until they are sent to userspace. It is
+ * NOT possible to allocate more than 128kB, and it is strongly discouraged,
+ * because atomically allocating 128kB inside the network rx softirq is not
+ * reliable. Please also keep in mind that this buffer size is allocated for
+ * each nlgroup you are using, so the total kernel memory usage increases
+ * by that factor.
+ *
+ * flushtimeout:
+ *   Specify, after how many hundredths of a second the queue should be
+ *   flushed even if it is not full yet.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/spinlock.h>
+#include <linux/socket.h>
+#include <linux/skbuff.h>
+#include <linux/kernel.h>
+#include <linux/timer.h>
+#include <linux/netlink.h>
+#include <linux/netdevice.h>
+#include <linux/module.h>
+#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/netfilter_bridge/ebt_ulog.h>
+#include <net/sock.h>
+#include "../br_private.h"
+
+#define PRINTR(format, args...) do { if (net_ratelimit()) \
+                                printk(format , ## args); } while (0)
+
+static unsigned int nlbufsiz = 4096;
+module_param(nlbufsiz, uint, 0600);
+MODULE_PARM_DESC(nlbufsiz, "netlink buffer size (number of bytes) "
+                           "(defaults to 4096)");
+
+static unsigned int flushtimeout = 10;
+module_param(flushtimeout, uint, 0600);
+MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths ofa second) "
+                               "(defaults to 10)");
+
+typedef struct {
+	unsigned int qlen;		/* number of nlmsgs' in the skb */
+	struct nlmsghdr *lastnlh;	/* netlink header of last msg in skb */
+	struct sk_buff *skb;		/* the pre-allocated skb */
+	struct timer_list timer;	/* the timer function */
+	spinlock_t lock;		/* the per-queue lock */
+} ebt_ulog_buff_t;
+
+static ebt_ulog_buff_t ulog_buffers[EBT_ULOG_MAXNLGROUPS];
+static struct sock *ebtulognl;
+
+/* send one ulog_buff_t to userspace */
+static void ulog_send(unsigned int nlgroup)
+{
+	ebt_ulog_buff_t *ub = &ulog_buffers[nlgroup];
+
+	if (timer_pending(&ub->timer))
+		del_timer(&ub->timer);
+
+	/* last nlmsg needs NLMSG_DONE */
+	if (ub->qlen > 1)
+		ub->lastnlh->nlmsg_type = NLMSG_DONE;
+
+	NETLINK_CB(ub->skb).dst_groups = 1 << nlgroup;
+	netlink_broadcast(ebtulognl, ub->skb, 0, 1 << nlgroup, GFP_ATOMIC);
+
+	ub->qlen = 0;
+	ub->skb = NULL;
+}
+
+/* timer function to flush queue in flushtimeout time */
+static void ulog_timer(unsigned long data)
+{
+	spin_lock_bh(&ulog_buffers[data].lock);
+	if (ulog_buffers[data].skb)
+		ulog_send(data);
+	spin_unlock_bh(&ulog_buffers[data].lock);
+}
+
+static struct sk_buff *ulog_alloc_skb(unsigned int size)
+{
+	struct sk_buff *skb;
+
+	skb = alloc_skb(nlbufsiz, GFP_ATOMIC);
+	if (!skb) {
+		PRINTR(KERN_ERR "ebt_ulog: can't alloc whole buffer "
+		       "of size %ub!\n", nlbufsiz);
+		if (size < nlbufsiz) {
+			/* try to allocate only as much as we need for
+			 * current packet */
+			skb = alloc_skb(size, GFP_ATOMIC);
+			if (!skb)
+				PRINTR(KERN_ERR "ebt_ulog: can't even allocate "
+				       "buffer of size %ub\n", size);
+		}
+	}
+
+	return skb;
+}
+
+static void ebt_ulog(const struct sk_buff *skb, unsigned int hooknr,
+   const struct net_device *in, const struct net_device *out,
+   const void *data, unsigned int datalen)
+{
+	ebt_ulog_packet_msg_t *pm;
+	size_t size, copy_len;
+	struct nlmsghdr *nlh;
+	struct ebt_ulog_info *uloginfo = (struct ebt_ulog_info *)data;
+	unsigned int group = uloginfo->nlgroup;
+	ebt_ulog_buff_t *ub = &ulog_buffers[group];
+	spinlock_t *lock = &ub->lock;
+
+	if ((uloginfo->cprange == 0) ||
+	    (uloginfo->cprange > skb->len + ETH_HLEN))
+		copy_len = skb->len + ETH_HLEN;
+	else
+		copy_len = uloginfo->cprange;
+
+	size = NLMSG_SPACE(sizeof(*pm) + copy_len);
+	if (size > nlbufsiz) {
+		PRINTR("ebt_ulog: Size %Zd needed, but nlbufsiz=%d\n",
+		       size, nlbufsiz);
+		return;
+	}
+
+	spin_lock_bh(lock);
+
+	if (!ub->skb) {
+		if (!(ub->skb = ulog_alloc_skb(size)))
+			goto alloc_failure;
+	} else if (size > skb_tailroom(ub->skb)) {
+		ulog_send(group);
+
+		if (!(ub->skb = ulog_alloc_skb(size)))
+			goto alloc_failure;
+	}
+
+	nlh = NLMSG_PUT(ub->skb, 0, ub->qlen, 0,
+	                size - NLMSG_ALIGN(sizeof(*nlh)));
+	ub->qlen++;
+
+	pm = NLMSG_DATA(nlh);
+
+	/* Fill in the ulog data */
+	pm->version = EBT_ULOG_VERSION;
+	do_gettimeofday(&pm->stamp);
+	if (ub->qlen == 1)
+		ub->skb->stamp = pm->stamp;
+	pm->data_len = copy_len;
+	pm->mark = skb->nfmark;
+	pm->hook = hooknr;
+	if (uloginfo->prefix != NULL)
+		strcpy(pm->prefix, uloginfo->prefix);
+	else
+		*(pm->prefix) = '\0';
+
+	if (in) {
+		strcpy(pm->physindev, in->name);
+		/* If in isn't a bridge, then physindev==indev */
+		if (in->br_port)
+			strcpy(pm->indev, in->br_port->br->dev->name);
+		else
+			strcpy(pm->indev, in->name);
+	} else
+		pm->indev[0] = pm->physindev[0] = '\0';
+
+	if (out) {
+		/* If out exists, then out is a bridge port */
+		strcpy(pm->physoutdev, out->name);
+		strcpy(pm->outdev, out->br_port->br->dev->name);
+	} else
+		pm->outdev[0] = pm->physoutdev[0] = '\0';
+
+	if (skb_copy_bits(skb, -ETH_HLEN, pm->data, copy_len) < 0)
+		BUG();
+
+	if (ub->qlen > 1)
+		ub->lastnlh->nlmsg_flags |= NLM_F_MULTI;
+
+	ub->lastnlh = nlh;
+
+	if (ub->qlen >= uloginfo->qthreshold)
+		ulog_send(group);
+	else if (!timer_pending(&ub->timer)) {
+		ub->timer.expires = jiffies + flushtimeout * HZ / 100;
+		add_timer(&ub->timer);
+	}
+
+unlock:
+	spin_unlock_bh(lock);
+
+	return;
+
+nlmsg_failure:
+	printk(KERN_CRIT "ebt_ulog: error during NLMSG_PUT. This should "
+	       "not happen, please report to author.\n");
+	goto unlock;
+alloc_failure:
+	goto unlock;
+}
+
+static int ebt_ulog_check(const char *tablename, unsigned int hookmask,
+   const struct ebt_entry *e, void *data, unsigned int datalen)
+{
+	struct ebt_ulog_info *uloginfo = (struct ebt_ulog_info *)data;
+
+	if (datalen != EBT_ALIGN(sizeof(struct ebt_ulog_info)) ||
+	    uloginfo->nlgroup > 31)
+		return -EINVAL;
+
+	uloginfo->prefix[EBT_ULOG_PREFIX_LEN - 1] = '\0';
+
+	if (uloginfo->qthreshold > EBT_ULOG_MAX_QLEN)
+		uloginfo->qthreshold = EBT_ULOG_MAX_QLEN;
+
+	return 0;
+}
+
+static struct ebt_watcher ulog = {
+	.name		= EBT_ULOG_WATCHER,
+	.watcher	= ebt_ulog,
+	.check		= ebt_ulog_check,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	int i, ret = 0;
+
+	if (nlbufsiz >= 128*1024) {
+		printk(KERN_NOTICE "ebt_ulog: Netlink buffer has to be <= 128kB,"
+		       " please try a smaller nlbufsiz parameter.\n");
+		return -EINVAL;
+	}
+
+	/* initialize ulog_buffers */
+	for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) {
+		init_timer(&ulog_buffers[i].timer);
+		ulog_buffers[i].timer.function = ulog_timer;
+		ulog_buffers[i].timer.data = i;
+		spin_lock_init(&ulog_buffers[i].lock);
+	}
+
+	ebtulognl = netlink_kernel_create(NETLINK_NFLOG, NULL);
+	if (!ebtulognl)
+		ret = -ENOMEM;
+	else if ((ret = ebt_register_watcher(&ulog)))
+		sock_release(ebtulognl->sk_socket);
+
+	return ret;
+}
+
+static void __exit fini(void)
+{
+	ebt_ulog_buff_t *ub;
+	int i;
+
+	ebt_unregister_watcher(&ulog);
+	for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) {
+		ub = &ulog_buffers[i];
+		if (timer_pending(&ub->timer))
+			del_timer(&ub->timer);
+		spin_lock_bh(&ub->lock);
+		if (ub->skb) {
+			kfree_skb(ub->skb);
+			ub->skb = NULL;
+		}
+		spin_unlock_bh(&ub->lock);
+	}
+	sock_release(ebtulognl->sk_socket);
+}
+
+module_init(init);
+module_exit(fini);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
+MODULE_DESCRIPTION("ebtables userspace logging module for bridged Ethernet"
+                   " frames");
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
new file mode 100644
index 00000000000..db60d734908
--- /dev/null
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -0,0 +1,195 @@
+/*
+ * Description: EBTables 802.1Q match extension kernelspace module.
+ * Authors: Nick Fedchik <nick@fedchik.org.ua>
+ *          Bart De Schuymer <bdschuym@pandora.be>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/netfilter_bridge/ebt_vlan.h>
+
+static int debug;
+#define MODULE_VERS "0.6"
+
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "debug=1 is turn on debug messages");
+MODULE_AUTHOR("Nick Fedchik <nick@fedchik.org.ua>");
+MODULE_DESCRIPTION("802.1Q match module (ebtables extension), v"
+		   MODULE_VERS);
+MODULE_LICENSE("GPL");
+
+
+#define DEBUG_MSG(args...) if (debug) printk (KERN_DEBUG "ebt_vlan: " args)
+#define INV_FLAG(_inv_flag_) (info->invflags & _inv_flag_) ? "!" : ""
+#define GET_BITMASK(_BIT_MASK_) info->bitmask & _BIT_MASK_
+#define SET_BITMASK(_BIT_MASK_) info->bitmask |= _BIT_MASK_
+#define EXIT_ON_MISMATCH(_MATCH_,_MASK_) {if (!((info->_MATCH_ == _MATCH_)^!!(info->invflags & _MASK_))) return EBT_NOMATCH;}
+
+static int
+ebt_filter_vlan(const struct sk_buff *skb,
+		const struct net_device *in,
+		const struct net_device *out,
+		const void *data, unsigned int datalen)
+{
+	struct ebt_vlan_info *info = (struct ebt_vlan_info *) data;
+	struct vlan_hdr _frame, *fp;
+
+	unsigned short TCI;	/* Whole TCI, given from parsed frame */
+	unsigned short id;	/* VLAN ID, given from frame TCI */
+	unsigned char prio;	/* user_priority, given from frame TCI */
+	/* VLAN encapsulated Type/Length field, given from orig frame */
+	unsigned short encap;
+
+	fp = skb_header_pointer(skb, 0, sizeof(_frame), &_frame);
+	if (fp == NULL)
+		return EBT_NOMATCH;
+
+	/* Tag Control Information (TCI) consists of the following elements:
+	 * - User_priority. The user_priority field is three bits in length,
+	 * interpreted as a binary number.
+	 * - Canonical Format Indicator (CFI). The Canonical Format Indicator
+	 * (CFI) is a single bit flag value. Currently ignored.
+	 * - VLAN Identifier (VID). The VID is encoded as
+	 * an unsigned binary number. */
+	TCI = ntohs(fp->h_vlan_TCI);
+	id = TCI & VLAN_VID_MASK;
+	prio = (TCI >> 13) & 0x7;
+	encap = fp->h_vlan_encapsulated_proto;
+
+	/* Checking VLAN Identifier (VID) */
+	if (GET_BITMASK(EBT_VLAN_ID))
+		EXIT_ON_MISMATCH(id, EBT_VLAN_ID);
+
+	/* Checking user_priority */
+	if (GET_BITMASK(EBT_VLAN_PRIO))
+		EXIT_ON_MISMATCH(prio, EBT_VLAN_PRIO);
+
+	/* Checking Encapsulated Proto (Length/Type) field */
+	if (GET_BITMASK(EBT_VLAN_ENCAP))
+		EXIT_ON_MISMATCH(encap, EBT_VLAN_ENCAP);
+
+	return EBT_MATCH;
+}
+
+static int
+ebt_check_vlan(const char *tablename,
+	       unsigned int hooknr,
+	       const struct ebt_entry *e, void *data, unsigned int datalen)
+{
+	struct ebt_vlan_info *info = (struct ebt_vlan_info *) data;
+
+	/* Parameters buffer overflow check */
+	if (datalen != EBT_ALIGN(sizeof(struct ebt_vlan_info))) {
+		DEBUG_MSG
+		    ("passed size %d is not eq to ebt_vlan_info (%Zd)\n",
+		     datalen, sizeof(struct ebt_vlan_info));
+		return -EINVAL;
+	}
+
+	/* Is it 802.1Q frame checked? */
+	if (e->ethproto != htons(ETH_P_8021Q)) {
+		DEBUG_MSG
+		    ("passed entry proto %2.4X is not 802.1Q (8100)\n",
+		     (unsigned short) ntohs(e->ethproto));
+		return -EINVAL;
+	}
+
+	/* Check for bitmask range
+	 * True if even one bit is out of mask */
+	if (info->bitmask & ~EBT_VLAN_MASK) {
+		DEBUG_MSG("bitmask %2X is out of mask (%2X)\n",
+			  info->bitmask, EBT_VLAN_MASK);
+		return -EINVAL;
+	}
+
+	/* Check for inversion flags range */
+	if (info->invflags & ~EBT_VLAN_MASK) {
+		DEBUG_MSG("inversion flags %2X is out of mask (%2X)\n",
+			  info->invflags, EBT_VLAN_MASK);
+		return -EINVAL;
+	}
+
+	/* Reserved VLAN ID (VID) values
+	 * -----------------------------
+	 * 0 - The null VLAN ID. 
+	 * 1 - The default Port VID (PVID)
+	 * 0x0FFF - Reserved for implementation use. 
+	 * if_vlan.h: VLAN_GROUP_ARRAY_LEN 4096. */
+	if (GET_BITMASK(EBT_VLAN_ID)) {
+		if (!!info->id) { /* if id!=0 => check vid range */
+			if (info->id > VLAN_GROUP_ARRAY_LEN) {
+				DEBUG_MSG
+				    ("id %d is out of range (1-4096)\n",
+				     info->id);
+				return -EINVAL;
+			}
+			/* Note: This is valid VLAN-tagged frame point.
+			 * Any value of user_priority are acceptable, 
+			 * but should be ignored according to 802.1Q Std.
+			 * So we just drop the prio flag. */
+			info->bitmask &= ~EBT_VLAN_PRIO;
+		}
+		/* Else, id=0 (null VLAN ID)  => user_priority range (any?) */
+	}
+
+	if (GET_BITMASK(EBT_VLAN_PRIO)) {
+		if ((unsigned char) info->prio > 7) {
+			DEBUG_MSG("prio %d is out of range (0-7)\n",
+			     info->prio);
+			return -EINVAL;
+		}
+	}
+	/* Check for encapsulated proto range - it is possible to be
+	 * any value for u_short range.
+	 * if_ether.h:  ETH_ZLEN        60   -  Min. octets in frame sans FCS */
+	if (GET_BITMASK(EBT_VLAN_ENCAP)) {
+		if ((unsigned short) ntohs(info->encap) < ETH_ZLEN) {
+			DEBUG_MSG
+			    ("encap frame length %d is less than minimal\n",
+			     ntohs(info->encap));
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static struct ebt_match filter_vlan = {
+	.name		= EBT_VLAN_MATCH,
+	.match		= ebt_filter_vlan,
+	.check		= ebt_check_vlan,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	DEBUG_MSG("ebtables 802.1Q extension module v"
+		  MODULE_VERS "\n");
+	DEBUG_MSG("module debug=%d\n", !!debug);
+	return ebt_register_match(&filter_vlan);
+}
+
+static void __exit fini(void)
+{
+	ebt_unregister_match(&filter_vlan);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
new file mode 100644
index 00000000000..1767c94cd3d
--- /dev/null
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -0,0 +1,86 @@
+/*
+ *  ebtable_broute
+ *
+ *	Authors:
+ *	Bart De Schuymer <bdschuym@pandora.be>
+ *
+ *  April, 2002
+ *
+ *  This table lets you choose between routing and bridging for frames
+ *  entering on a bridge enslaved nic. This table is traversed before any
+ *  other ebtables table. See net/bridge/br_input.c.
+ */
+
+#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/module.h>
+#include <linux/if_bridge.h>
+
+/* EBT_ACCEPT means the frame will be bridged
+ * EBT_DROP means the frame will be routed
+ */
+static struct ebt_entries initial_chain = {
+	.name		= "BROUTING",
+	.policy		= EBT_ACCEPT,
+};
+
+static struct ebt_replace initial_table =
+{
+	.name		= "broute",
+	.valid_hooks	= 1 << NF_BR_BROUTING,
+	.entries_size	= sizeof(struct ebt_entries),
+	.hook_entry	= {
+		[NF_BR_BROUTING]	= &initial_chain,
+	},
+	.entries	= (char *)&initial_chain,
+};
+
+static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
+{
+	if (valid_hooks & ~(1 << NF_BR_BROUTING))
+		return -EINVAL;
+	return 0;
+}
+
+static struct ebt_table broute_table =
+{
+	.name		= "broute",
+	.table		= &initial_table,
+	.valid_hooks	= 1 << NF_BR_BROUTING,
+	.lock		= RW_LOCK_UNLOCKED,
+	.check		= check,
+	.me		= THIS_MODULE,
+};
+
+static int ebt_broute(struct sk_buff **pskb)
+{
+	int ret;
+
+	ret = ebt_do_table(NF_BR_BROUTING, pskb, (*pskb)->dev, NULL,
+	   &broute_table);
+	if (ret == NF_DROP)
+		return 1; /* route it */
+	return 0; /* bridge it */
+}
+
+static int __init init(void)
+{
+	int ret;
+
+	ret = ebt_register_table(&broute_table);
+	if (ret < 0)
+		return ret;
+	/* see br_input.c */
+	br_should_route_hook = ebt_broute;
+	return ret;
+}
+
+static void __exit fini(void)
+{
+	br_should_route_hook = NULL;
+	synchronize_net();
+	ebt_unregister_table(&broute_table);
+}
+
+module_init(init);
+module_exit(fini);
+MODULE_LICENSE("GPL");
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
new file mode 100644
index 00000000000..c18666e0392
--- /dev/null
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -0,0 +1,123 @@
+/*
+ *  ebtable_filter
+ *
+ *	Authors:
+ *	Bart De Schuymer <bdschuym@pandora.be>
+ *
+ *  April, 2002
+ *
+ */
+
+#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/module.h>
+
+#define FILTER_VALID_HOOKS ((1 << NF_BR_LOCAL_IN) | (1 << NF_BR_FORWARD) | \
+   (1 << NF_BR_LOCAL_OUT))
+
+static struct ebt_entries initial_chains[] =
+{
+	{
+		.name	= "INPUT",
+		.policy	= EBT_ACCEPT,
+	},
+	{
+		.name	= "FORWARD",
+		.policy	= EBT_ACCEPT,
+	},
+	{
+		.name	= "OUTPUT",
+		.policy	= EBT_ACCEPT,
+	},
+};
+
+static struct ebt_replace initial_table =
+{
+	.name		= "filter",
+	.valid_hooks	= FILTER_VALID_HOOKS,
+	.entries_size	= 3 * sizeof(struct ebt_entries),
+	.hook_entry	= {
+		[NF_BR_LOCAL_IN]	= &initial_chains[0],
+		[NF_BR_FORWARD]		= &initial_chains[1],
+		[NF_BR_LOCAL_OUT]	= &initial_chains[2],
+	},
+	.entries	= (char *)initial_chains,
+};
+
+static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
+{
+	if (valid_hooks & ~FILTER_VALID_HOOKS)
+		return -EINVAL;
+	return 0;
+}
+
+static struct ebt_table frame_filter =
+{ 
+	.name		= "filter",
+	.table		= &initial_table,
+	.valid_hooks	= FILTER_VALID_HOOKS, 
+	.lock		= RW_LOCK_UNLOCKED,
+	.check		= check,
+	.me		= THIS_MODULE,
+};
+
+static unsigned int
+ebt_hook (unsigned int hook, struct sk_buff **pskb, const struct net_device *in,
+   const struct net_device *out, int (*okfn)(struct sk_buff *))
+{
+	return ebt_do_table(hook, pskb, in, out, &frame_filter);
+}
+
+static struct nf_hook_ops ebt_ops_filter[] = {
+	{
+		.hook		= ebt_hook,
+		.owner		= THIS_MODULE,
+		.pf		= PF_BRIDGE,
+		.hooknum	= NF_BR_LOCAL_IN,
+		.priority	= NF_BR_PRI_FILTER_BRIDGED,
+	},
+	{
+		.hook		= ebt_hook,
+		.owner		= THIS_MODULE,
+		.pf		= PF_BRIDGE,
+		.hooknum	= NF_BR_FORWARD,
+		.priority	= NF_BR_PRI_FILTER_BRIDGED,
+	},
+	{
+		.hook		= ebt_hook,
+		.owner		= THIS_MODULE,
+		.pf		= PF_BRIDGE,
+		.hooknum	= NF_BR_LOCAL_OUT,
+		.priority	= NF_BR_PRI_FILTER_OTHER,
+	},
+};
+
+static int __init init(void)
+{
+	int i, j, ret;
+
+	ret = ebt_register_table(&frame_filter);
+	if (ret < 0)
+		return ret;
+	for (i = 0; i < ARRAY_SIZE(ebt_ops_filter); i++)
+		if ((ret = nf_register_hook(&ebt_ops_filter[i])) < 0)
+			goto cleanup;
+	return ret;
+cleanup:
+	for (j = 0; j < i; j++)
+		nf_unregister_hook(&ebt_ops_filter[j]);
+	ebt_unregister_table(&frame_filter);
+	return ret;
+}
+
+static void __exit fini(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ebt_ops_filter); i++)
+		nf_unregister_hook(&ebt_ops_filter[i]);
+	ebt_unregister_table(&frame_filter);
+}
+
+module_init(init);
+module_exit(fini);
+MODULE_LICENSE("GPL");
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
new file mode 100644
index 00000000000..828cac2cc4a
--- /dev/null
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -0,0 +1,130 @@
+/*
+ *  ebtable_nat
+ *
+ *	Authors:
+ *	Bart De Schuymer <bdschuym@pandora.be>
+ *
+ *  April, 2002
+ *
+ */
+
+#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/module.h>
+
+#define NAT_VALID_HOOKS ((1 << NF_BR_PRE_ROUTING) | (1 << NF_BR_LOCAL_OUT) | \
+   (1 << NF_BR_POST_ROUTING))
+
+static struct ebt_entries initial_chains[] =
+{
+	{
+		.name	= "PREROUTING",
+		.policy	= EBT_ACCEPT,
+	},
+	{
+		.name	= "OUTPUT",
+		.policy	= EBT_ACCEPT,
+	},
+	{
+		.name	= "POSTROUTING",
+		.policy	= EBT_ACCEPT,
+	}
+};
+
+static struct ebt_replace initial_table =
+{
+	.name		= "nat",
+	.valid_hooks	= NAT_VALID_HOOKS,
+	.entries_size	= 3 * sizeof(struct ebt_entries),
+	.hook_entry	= {
+		[NF_BR_PRE_ROUTING]	= &initial_chains[0],
+		[NF_BR_LOCAL_OUT]	= &initial_chains[1],
+		[NF_BR_POST_ROUTING]	= &initial_chains[2],
+	},
+	.entries	= (char *)initial_chains,
+};
+
+static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
+{
+	if (valid_hooks & ~NAT_VALID_HOOKS)
+		return -EINVAL;
+	return 0;
+}
+
+static struct ebt_table frame_nat =
+{
+	.name		= "nat",
+	.table		= &initial_table,
+	.valid_hooks	= NAT_VALID_HOOKS,
+	.lock		= RW_LOCK_UNLOCKED,
+	.check		= check,
+	.me		= THIS_MODULE,
+};
+
+static unsigned int
+ebt_nat_dst(unsigned int hook, struct sk_buff **pskb, const struct net_device *in
+   , const struct net_device *out, int (*okfn)(struct sk_buff *))
+{
+	return ebt_do_table(hook, pskb, in, out, &frame_nat);
+}
+
+static unsigned int
+ebt_nat_src(unsigned int hook, struct sk_buff **pskb, const struct net_device *in
+   , const struct net_device *out, int (*okfn)(struct sk_buff *))
+{
+	return ebt_do_table(hook, pskb, in, out, &frame_nat);
+}
+
+static struct nf_hook_ops ebt_ops_nat[] = {
+	{
+		.hook		= ebt_nat_dst,
+		.owner		= THIS_MODULE,
+		.pf		= PF_BRIDGE,
+		.hooknum	= NF_BR_LOCAL_OUT,
+		.priority	= NF_BR_PRI_NAT_DST_OTHER,
+	},
+	{
+		.hook		= ebt_nat_src,
+		.owner		= THIS_MODULE,
+		.pf		= PF_BRIDGE,
+		.hooknum	= NF_BR_POST_ROUTING,
+		.priority	= NF_BR_PRI_NAT_SRC,
+	},
+	{
+		.hook		= ebt_nat_dst,
+		.owner		= THIS_MODULE,
+		.pf		= PF_BRIDGE,
+		.hooknum	= NF_BR_PRE_ROUTING,
+		.priority	= NF_BR_PRI_NAT_DST_BRIDGED,
+	},
+};
+
+static int __init init(void)
+{
+	int i, ret, j;
+
+	ret = ebt_register_table(&frame_nat);
+	if (ret < 0)
+		return ret;
+	for (i = 0; i < ARRAY_SIZE(ebt_ops_nat); i++)
+		if ((ret = nf_register_hook(&ebt_ops_nat[i])) < 0)
+			goto cleanup;
+	return ret;
+cleanup:
+	for (j = 0; j < i; j++)
+		nf_unregister_hook(&ebt_ops_nat[j]);
+	ebt_unregister_table(&frame_nat);
+	return ret;
+}
+
+static void __exit fini(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ebt_ops_nat); i++)
+		nf_unregister_hook(&ebt_ops_nat[i]);
+	ebt_unregister_table(&frame_nat);
+}
+
+module_init(init);
+module_exit(fini);
+MODULE_LICENSE("GPL");
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
new file mode 100644
index 00000000000..18ebc664769
--- /dev/null
+++ b/net/bridge/netfilter/ebtables.c
@@ -0,0 +1,1507 @@
+/*
+ *  ebtables
+ *
+ *  Author:
+ *  Bart De Schuymer		<bdschuym@pandora.be>
+ *
+ *  ebtables.c,v 2.0, July, 2002
+ *
+ *  This code is stongly inspired on the iptables code which is
+ *  Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+/* used for print_string */
+#include <linux/sched.h>
+#include <linux/tty.h>
+
+#include <linux/kmod.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/spinlock.h>
+#include <asm/uaccess.h>
+#include <linux/smp.h>
+#include <net/sock.h>
+/* needed for logical [in,out]-dev filtering */
+#include "../br_private.h"
+
+/* list_named_find */
+#define ASSERT_READ_LOCK(x)
+#define ASSERT_WRITE_LOCK(x)
+#include <linux/netfilter_ipv4/listhelp.h>
+
+#if 0
+/* use this for remote debugging
+ * Copyright (C) 1998 by Ori Pomerantz
+ * Print the string to the appropriate tty, the one
+ * the current task uses
+ */
+static void print_string(char *str)
+{
+	struct tty_struct *my_tty;
+
+	/* The tty for the current task */
+	my_tty = current->signal->tty;
+	if (my_tty != NULL) {
+		my_tty->driver->write(my_tty, 0, str, strlen(str));
+		my_tty->driver->write(my_tty, 0, "\015\012", 2);
+	}
+}
+
+#define BUGPRINT(args) print_string(args);
+#else
+#define BUGPRINT(format, args...) printk("kernel msg: ebtables bug: please "\
+                                         "report to author: "format, ## args)
+/* #define BUGPRINT(format, args...) */
+#endif
+#define MEMPRINT(format, args...) printk("kernel msg: ebtables "\
+                                         ": out of memory: "format, ## args)
+/* #define MEMPRINT(format, args...) */
+
+
+
+/*
+ * Each cpu has its own set of counters, so there is no need for write_lock in
+ * the softirq
+ * For reading or updating the counters, the user context needs to
+ * get a write_lock
+ */
+
+/* The size of each set of counters is altered to get cache alignment */
+#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
+#define COUNTER_OFFSET(n) (SMP_ALIGN(n * sizeof(struct ebt_counter)))
+#define COUNTER_BASE(c, n, cpu) ((struct ebt_counter *)(((char *)c) + \
+   COUNTER_OFFSET(n) * cpu))
+
+
+
+static DECLARE_MUTEX(ebt_mutex);
+static LIST_HEAD(ebt_tables);
+static LIST_HEAD(ebt_targets);
+static LIST_HEAD(ebt_matches);
+static LIST_HEAD(ebt_watchers);
+
+static struct ebt_target ebt_standard_target =
+{ {NULL, NULL}, EBT_STANDARD_TARGET, NULL, NULL, NULL, NULL};
+
+static inline int ebt_do_watcher (struct ebt_entry_watcher *w,
+   const struct sk_buff *skb, unsigned int hooknr, const struct net_device *in,
+   const struct net_device *out)
+{
+	w->u.watcher->watcher(skb, hooknr, in, out, w->data,
+	   w->watcher_size);
+	/* watchers don't give a verdict */
+	return 0;
+}
+
+static inline int ebt_do_match (struct ebt_entry_match *m,
+   const struct sk_buff *skb, const struct net_device *in,
+   const struct net_device *out)
+{
+	return m->u.match->match(skb, in, out, m->data,
+	   m->match_size);
+}
+
+static inline int ebt_dev_check(char *entry, const struct net_device *device)
+{
+	int i = 0;
+	char *devname = device->name;
+
+	if (*entry == '\0')
+		return 0;
+	if (!device)
+		return 1;
+	/* 1 is the wildcard token */
+	while (entry[i] != '\0' && entry[i] != 1 && entry[i] == devname[i])
+		i++;
+	return (devname[i] != entry[i] && entry[i] != 1);
+}
+
+#define FWINV2(bool,invflg) ((bool) ^ !!(e->invflags & invflg))
+/* process standard matches */
+static inline int ebt_basic_match(struct ebt_entry *e, struct ethhdr *h,
+   const struct net_device *in, const struct net_device *out)
+{
+	int verdict, i;
+
+	if (e->bitmask & EBT_802_3) {
+		if (FWINV2(ntohs(h->h_proto) >= 1536, EBT_IPROTO))
+			return 1;
+	} else if (!(e->bitmask & EBT_NOPROTO) &&
+	   FWINV2(e->ethproto != h->h_proto, EBT_IPROTO))
+		return 1;
+
+	if (FWINV2(ebt_dev_check(e->in, in), EBT_IIN))
+		return 1;
+	if (FWINV2(ebt_dev_check(e->out, out), EBT_IOUT))
+		return 1;
+	if ((!in || !in->br_port) ? 0 : FWINV2(ebt_dev_check(
+	   e->logical_in, in->br_port->br->dev), EBT_ILOGICALIN))
+		return 1;
+	if ((!out || !out->br_port) ? 0 : FWINV2(ebt_dev_check(
+	   e->logical_out, out->br_port->br->dev), EBT_ILOGICALOUT))
+		return 1;
+
+	if (e->bitmask & EBT_SOURCEMAC) {
+		verdict = 0;
+		for (i = 0; i < 6; i++)
+			verdict |= (h->h_source[i] ^ e->sourcemac[i]) &
+			   e->sourcemsk[i];
+		if (FWINV2(verdict != 0, EBT_ISOURCE) )
+			return 1;
+	}
+	if (e->bitmask & EBT_DESTMAC) {
+		verdict = 0;
+		for (i = 0; i < 6; i++)
+			verdict |= (h->h_dest[i] ^ e->destmac[i]) &
+			   e->destmsk[i];
+		if (FWINV2(verdict != 0, EBT_IDEST) )
+			return 1;
+	}
+	return 0;
+}
+
+/* Do some firewalling */
+unsigned int ebt_do_table (unsigned int hook, struct sk_buff **pskb,
+   const struct net_device *in, const struct net_device *out,
+   struct ebt_table *table)
+{
+	int i, nentries;
+	struct ebt_entry *point;
+	struct ebt_counter *counter_base, *cb_base;
+	struct ebt_entry_target *t;
+	int verdict, sp = 0;
+	struct ebt_chainstack *cs;
+	struct ebt_entries *chaininfo;
+	char *base;
+	struct ebt_table_info *private;
+
+	read_lock_bh(&table->lock);
+	private = table->private;
+	cb_base = COUNTER_BASE(private->counters, private->nentries,
+	   smp_processor_id());
+	if (private->chainstack)
+		cs = private->chainstack[smp_processor_id()];
+	else
+		cs = NULL;
+	chaininfo = private->hook_entry[hook];
+	nentries = private->hook_entry[hook]->nentries;
+	point = (struct ebt_entry *)(private->hook_entry[hook]->data);
+	counter_base = cb_base + private->hook_entry[hook]->counter_offset;
+	/* base for chain jumps */
+	base = private->entries;
+	i = 0;
+	while (i < nentries) {
+		if (ebt_basic_match(point, eth_hdr(*pskb), in, out))
+			goto letscontinue;
+
+		if (EBT_MATCH_ITERATE(point, ebt_do_match, *pskb, in, out) != 0)
+			goto letscontinue;
+
+		/* increase counter */
+		(*(counter_base + i)).pcnt++;
+		(*(counter_base + i)).bcnt+=(**pskb).len;
+
+		/* these should only watch: not modify, nor tell us
+		   what to do with the packet */
+		EBT_WATCHER_ITERATE(point, ebt_do_watcher, *pskb, hook, in,
+		   out);
+
+		t = (struct ebt_entry_target *)
+		   (((char *)point) + point->target_offset);
+		/* standard target */
+		if (!t->u.target->target)
+			verdict = ((struct ebt_standard_target *)t)->verdict;
+		else
+			verdict = t->u.target->target(pskb, hook,
+			   in, out, t->data, t->target_size);
+		if (verdict == EBT_ACCEPT) {
+			read_unlock_bh(&table->lock);
+			return NF_ACCEPT;
+		}
+		if (verdict == EBT_DROP) {
+			read_unlock_bh(&table->lock);
+			return NF_DROP;
+		}
+		if (verdict == EBT_RETURN) {
+letsreturn:
+#ifdef CONFIG_NETFILTER_DEBUG
+			if (sp == 0) {
+				BUGPRINT("RETURN on base chain");
+				/* act like this is EBT_CONTINUE */
+				goto letscontinue;
+			}
+#endif
+			sp--;
+			/* put all the local variables right */
+			i = cs[sp].n;
+			chaininfo = cs[sp].chaininfo;
+			nentries = chaininfo->nentries;
+			point = cs[sp].e;
+			counter_base = cb_base +
+			   chaininfo->counter_offset;
+			continue;
+		}
+		if (verdict == EBT_CONTINUE)
+			goto letscontinue;
+#ifdef CONFIG_NETFILTER_DEBUG
+		if (verdict < 0) {
+			BUGPRINT("bogus standard verdict\n");
+			read_unlock_bh(&table->lock);
+			return NF_DROP;
+		}
+#endif
+		/* jump to a udc */
+		cs[sp].n = i + 1;
+		cs[sp].chaininfo = chaininfo;
+		cs[sp].e = (struct ebt_entry *)
+		   (((char *)point) + point->next_offset);
+		i = 0;
+		chaininfo = (struct ebt_entries *) (base + verdict);
+#ifdef CONFIG_NETFILTER_DEBUG
+		if (chaininfo->distinguisher) {
+			BUGPRINT("jump to non-chain\n");
+			read_unlock_bh(&table->lock);
+			return NF_DROP;
+		}
+#endif
+		nentries = chaininfo->nentries;
+		point = (struct ebt_entry *)chaininfo->data;
+		counter_base = cb_base + chaininfo->counter_offset;
+		sp++;
+		continue;
+letscontinue:
+		point = (struct ebt_entry *)
+		   (((char *)point) + point->next_offset);
+		i++;
+	}
+
+	/* I actually like this :) */
+	if (chaininfo->policy == EBT_RETURN)
+		goto letsreturn;
+	if (chaininfo->policy == EBT_ACCEPT) {
+		read_unlock_bh(&table->lock);
+		return NF_ACCEPT;
+	}
+	read_unlock_bh(&table->lock);
+	return NF_DROP;
+}
+
+/* If it succeeds, returns element and locks mutex */
+static inline void *
+find_inlist_lock_noload(struct list_head *head, const char *name, int *error,
+   struct semaphore *mutex)
+{
+	void *ret;
+
+	*error = down_interruptible(mutex);
+	if (*error != 0)
+		return NULL;
+
+	ret = list_named_find(head, name);
+	if (!ret) {
+		*error = -ENOENT;
+		up(mutex);
+	}
+	return ret;
+}
+
+#ifndef CONFIG_KMOD
+#define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m))
+#else
+static void *
+find_inlist_lock(struct list_head *head, const char *name, const char *prefix,
+   int *error, struct semaphore *mutex)
+{
+	void *ret;
+
+	ret = find_inlist_lock_noload(head, name, error, mutex);
+	if (!ret) {
+		request_module("%s%s", prefix, name);
+		ret = find_inlist_lock_noload(head, name, error, mutex);
+	}
+	return ret;
+}
+#endif
+
+static inline struct ebt_table *
+find_table_lock(const char *name, int *error, struct semaphore *mutex)
+{
+	return find_inlist_lock(&ebt_tables, name, "ebtable_", error, mutex);
+}
+
+static inline struct ebt_match *
+find_match_lock(const char *name, int *error, struct semaphore *mutex)
+{
+	return find_inlist_lock(&ebt_matches, name, "ebt_", error, mutex);
+}
+
+static inline struct ebt_watcher *
+find_watcher_lock(const char *name, int *error, struct semaphore *mutex)
+{
+	return find_inlist_lock(&ebt_watchers, name, "ebt_", error, mutex);
+}
+
+static inline struct ebt_target *
+find_target_lock(const char *name, int *error, struct semaphore *mutex)
+{
+	return find_inlist_lock(&ebt_targets, name, "ebt_", error, mutex);
+}
+
+static inline int
+ebt_check_match(struct ebt_entry_match *m, struct ebt_entry *e,
+   const char *name, unsigned int hookmask, unsigned int *cnt)
+{
+	struct ebt_match *match;
+	int ret;
+
+	if (((char *)m) + m->match_size + sizeof(struct ebt_entry_match) >
+	   ((char *)e) + e->watchers_offset)
+		return -EINVAL;
+	match = find_match_lock(m->u.name, &ret, &ebt_mutex);
+	if (!match)
+		return ret;
+	m->u.match = match;
+	if (!try_module_get(match->me)) {
+		up(&ebt_mutex);
+		return -ENOENT;
+	}
+	up(&ebt_mutex);
+	if (match->check &&
+	   match->check(name, hookmask, e, m->data, m->match_size) != 0) {
+		BUGPRINT("match->check failed\n");
+		module_put(match->me);
+		return -EINVAL;
+	}
+	(*cnt)++;
+	return 0;
+}
+
+static inline int
+ebt_check_watcher(struct ebt_entry_watcher *w, struct ebt_entry *e,
+   const char *name, unsigned int hookmask, unsigned int *cnt)
+{
+	struct ebt_watcher *watcher;
+	int ret;
+
+	if (((char *)w) + w->watcher_size + sizeof(struct ebt_entry_watcher) >
+	   ((char *)e) + e->target_offset)
+		return -EINVAL;
+	watcher = find_watcher_lock(w->u.name, &ret, &ebt_mutex);
+	if (!watcher)
+		return ret;
+	w->u.watcher = watcher;
+	if (!try_module_get(watcher->me)) {
+		up(&ebt_mutex);
+		return -ENOENT;
+	}
+	up(&ebt_mutex);
+	if (watcher->check &&
+	   watcher->check(name, hookmask, e, w->data, w->watcher_size) != 0) {
+		BUGPRINT("watcher->check failed\n");
+		module_put(watcher->me);
+		return -EINVAL;
+	}
+	(*cnt)++;
+	return 0;
+}
+
+/*
+ * this one is very careful, as it is the first function
+ * to parse the userspace data
+ */
+static inline int
+ebt_check_entry_size_and_hooks(struct ebt_entry *e,
+   struct ebt_table_info *newinfo, char *base, char *limit,
+   struct ebt_entries **hook_entries, unsigned int *n, unsigned int *cnt,
+   unsigned int *totalcnt, unsigned int *udc_cnt, unsigned int valid_hooks)
+{
+	int i;
+
+	for (i = 0; i < NF_BR_NUMHOOKS; i++) {
+		if ((valid_hooks & (1 << i)) == 0)
+			continue;
+		if ( (char *)hook_entries[i] - base ==
+		   (char *)e - newinfo->entries)
+			break;
+	}
+	/* beginning of a new chain
+	   if i == NF_BR_NUMHOOKS it must be a user defined chain */
+	if (i != NF_BR_NUMHOOKS || !(e->bitmask & EBT_ENTRY_OR_ENTRIES)) {
+		if ((e->bitmask & EBT_ENTRY_OR_ENTRIES) != 0) {
+			/* we make userspace set this right,
+			   so there is no misunderstanding */
+			BUGPRINT("EBT_ENTRY_OR_ENTRIES shouldn't be set "
+			         "in distinguisher\n");
+			return -EINVAL;
+		}
+		/* this checks if the previous chain has as many entries
+		   as it said it has */
+		if (*n != *cnt) {
+			BUGPRINT("nentries does not equal the nr of entries "
+		                 "in the chain\n");
+			return -EINVAL;
+		}
+		/* before we look at the struct, be sure it is not too big */
+		if ((char *)hook_entries[i] + sizeof(struct ebt_entries)
+		   > limit) {
+			BUGPRINT("entries_size too small\n");
+			return -EINVAL;
+		}
+		if (((struct ebt_entries *)e)->policy != EBT_DROP &&
+		   ((struct ebt_entries *)e)->policy != EBT_ACCEPT) {
+			/* only RETURN from udc */
+			if (i != NF_BR_NUMHOOKS ||
+			   ((struct ebt_entries *)e)->policy != EBT_RETURN) {
+				BUGPRINT("bad policy\n");
+				return -EINVAL;
+			}
+		}
+		if (i == NF_BR_NUMHOOKS) /* it's a user defined chain */
+			(*udc_cnt)++;
+		else
+			newinfo->hook_entry[i] = (struct ebt_entries *)e;
+		if (((struct ebt_entries *)e)->counter_offset != *totalcnt) {
+			BUGPRINT("counter_offset != totalcnt");
+			return -EINVAL;
+		}
+		*n = ((struct ebt_entries *)e)->nentries;
+		*cnt = 0;
+		return 0;
+	}
+	/* a plain old entry, heh */
+	if (sizeof(struct ebt_entry) > e->watchers_offset ||
+	   e->watchers_offset > e->target_offset ||
+	   e->target_offset >= e->next_offset) {
+		BUGPRINT("entry offsets not in right order\n");
+		return -EINVAL;
+	}
+	/* this is not checked anywhere else */
+	if (e->next_offset - e->target_offset < sizeof(struct ebt_entry_target)) {
+		BUGPRINT("target size too small\n");
+		return -EINVAL;
+	}
+
+	(*cnt)++;
+	(*totalcnt)++;
+	return 0;
+}
+
+struct ebt_cl_stack
+{
+	struct ebt_chainstack cs;
+	int from;
+	unsigned int hookmask;
+};
+
+/*
+ * we need these positions to check that the jumps to a different part of the
+ * entries is a jump to the beginning of a new chain.
+ */
+static inline int
+ebt_get_udc_positions(struct ebt_entry *e, struct ebt_table_info *newinfo,
+   struct ebt_entries **hook_entries, unsigned int *n, unsigned int valid_hooks,
+   struct ebt_cl_stack *udc)
+{
+	int i;
+
+	/* we're only interested in chain starts */
+	if (e->bitmask & EBT_ENTRY_OR_ENTRIES)
+		return 0;
+	for (i = 0; i < NF_BR_NUMHOOKS; i++) {
+		if ((valid_hooks & (1 << i)) == 0)
+			continue;
+		if (newinfo->hook_entry[i] == (struct ebt_entries *)e)
+			break;
+	}
+	/* only care about udc */
+	if (i != NF_BR_NUMHOOKS)
+		return 0;
+
+	udc[*n].cs.chaininfo = (struct ebt_entries *)e;
+	/* these initialisations are depended on later in check_chainloops() */
+	udc[*n].cs.n = 0;
+	udc[*n].hookmask = 0;
+
+	(*n)++;
+	return 0;
+}
+
+static inline int
+ebt_cleanup_match(struct ebt_entry_match *m, unsigned int *i)
+{
+	if (i && (*i)-- == 0)
+		return 1;
+	if (m->u.match->destroy)
+		m->u.match->destroy(m->data, m->match_size);
+	module_put(m->u.match->me);
+
+	return 0;
+}
+
+static inline int
+ebt_cleanup_watcher(struct ebt_entry_watcher *w, unsigned int *i)
+{
+	if (i && (*i)-- == 0)
+		return 1;
+	if (w->u.watcher->destroy)
+		w->u.watcher->destroy(w->data, w->watcher_size);
+	module_put(w->u.watcher->me);
+
+	return 0;
+}
+
+static inline int
+ebt_cleanup_entry(struct ebt_entry *e, unsigned int *cnt)
+{
+	struct ebt_entry_target *t;
+
+	if ((e->bitmask & EBT_ENTRY_OR_ENTRIES) == 0)
+		return 0;
+	/* we're done */
+	if (cnt && (*cnt)-- == 0)
+		return 1;
+	EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, NULL);
+	EBT_MATCH_ITERATE(e, ebt_cleanup_match, NULL);
+	t = (struct ebt_entry_target *)(((char *)e) + e->target_offset);
+	if (t->u.target->destroy)
+		t->u.target->destroy(t->data, t->target_size);
+	module_put(t->u.target->me);
+
+	return 0;
+}
+
+static inline int
+ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo,
+   const char *name, unsigned int *cnt, unsigned int valid_hooks,
+   struct ebt_cl_stack *cl_s, unsigned int udc_cnt)
+{
+	struct ebt_entry_target *t;
+	struct ebt_target *target;
+	unsigned int i, j, hook = 0, hookmask = 0;
+	int ret;
+
+	/* don't mess with the struct ebt_entries */
+	if ((e->bitmask & EBT_ENTRY_OR_ENTRIES) == 0)
+		return 0;
+
+	if (e->bitmask & ~EBT_F_MASK) {
+		BUGPRINT("Unknown flag for bitmask\n");
+		return -EINVAL;
+	}
+	if (e->invflags & ~EBT_INV_MASK) {
+		BUGPRINT("Unknown flag for inv bitmask\n");
+		return -EINVAL;
+	}
+	if ( (e->bitmask & EBT_NOPROTO) && (e->bitmask & EBT_802_3) ) {
+		BUGPRINT("NOPROTO & 802_3 not allowed\n");
+		return -EINVAL;
+	}
+	/* what hook do we belong to? */
+	for (i = 0; i < NF_BR_NUMHOOKS; i++) {
+		if ((valid_hooks & (1 << i)) == 0)
+			continue;
+		if ((char *)newinfo->hook_entry[i] < (char *)e)
+			hook = i;
+		else
+			break;
+	}
+	/* (1 << NF_BR_NUMHOOKS) tells the check functions the rule is on
+	   a base chain */
+	if (i < NF_BR_NUMHOOKS)
+		hookmask = (1 << hook) | (1 << NF_BR_NUMHOOKS);
+	else {
+		for (i = 0; i < udc_cnt; i++)
+			if ((char *)(cl_s[i].cs.chaininfo) > (char *)e)
+				break;
+		if (i == 0)
+			hookmask = (1 << hook) | (1 << NF_BR_NUMHOOKS);
+		else
+			hookmask = cl_s[i - 1].hookmask;
+	}
+	i = 0;
+	ret = EBT_MATCH_ITERATE(e, ebt_check_match, e, name, hookmask, &i);
+	if (ret != 0)
+		goto cleanup_matches;
+	j = 0;
+	ret = EBT_WATCHER_ITERATE(e, ebt_check_watcher, e, name, hookmask, &j);
+	if (ret != 0)
+		goto cleanup_watchers;
+	t = (struct ebt_entry_target *)(((char *)e) + e->target_offset);
+	target = find_target_lock(t->u.name, &ret, &ebt_mutex);
+	if (!target)
+		goto cleanup_watchers;
+	if (!try_module_get(target->me)) {
+		up(&ebt_mutex);
+		ret = -ENOENT;
+		goto cleanup_watchers;
+	}
+	up(&ebt_mutex);
+
+	t->u.target = target;
+	if (t->u.target == &ebt_standard_target) {
+		if (e->target_offset + sizeof(struct ebt_standard_target) >
+		   e->next_offset) {
+			BUGPRINT("Standard target size too big\n");
+			ret = -EFAULT;
+			goto cleanup_watchers;
+		}
+		if (((struct ebt_standard_target *)t)->verdict <
+		   -NUM_STANDARD_TARGETS) {
+			BUGPRINT("Invalid standard target\n");
+			ret = -EFAULT;
+			goto cleanup_watchers;
+		}
+	} else if ((e->target_offset + t->target_size +
+	   sizeof(struct ebt_entry_target) > e->next_offset) ||
+	   (t->u.target->check &&
+	   t->u.target->check(name, hookmask, e, t->data, t->target_size) != 0)){
+		module_put(t->u.target->me);
+		ret = -EFAULT;
+		goto cleanup_watchers;
+	}
+	(*cnt)++;
+	return 0;
+cleanup_watchers:
+	EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, &j);
+cleanup_matches:
+	EBT_MATCH_ITERATE(e, ebt_cleanup_match, &i);
+	return ret;
+}
+
+/*
+ * checks for loops and sets the hook mask for udc
+ * the hook mask for udc tells us from which base chains the udc can be
+ * accessed. This mask is a parameter to the check() functions of the extensions
+ */
+static int check_chainloops(struct ebt_entries *chain, struct ebt_cl_stack *cl_s,
+   unsigned int udc_cnt, unsigned int hooknr, char *base)
+{
+	int i, chain_nr = -1, pos = 0, nentries = chain->nentries, verdict;
+	struct ebt_entry *e = (struct ebt_entry *)chain->data;
+	struct ebt_entry_target *t;
+
+	while (pos < nentries || chain_nr != -1) {
+		/* end of udc, go back one 'recursion' step */
+		if (pos == nentries) {
+			/* put back values of the time when this chain was called */
+			e = cl_s[chain_nr].cs.e;
+			if (cl_s[chain_nr].from != -1)
+				nentries =
+				cl_s[cl_s[chain_nr].from].cs.chaininfo->nentries;
+			else
+				nentries = chain->nentries;
+			pos = cl_s[chain_nr].cs.n;
+			/* make sure we won't see a loop that isn't one */
+			cl_s[chain_nr].cs.n = 0;
+			chain_nr = cl_s[chain_nr].from;
+			if (pos == nentries)
+				continue;
+		}
+		t = (struct ebt_entry_target *)
+		   (((char *)e) + e->target_offset);
+		if (strcmp(t->u.name, EBT_STANDARD_TARGET))
+			goto letscontinue;
+		if (e->target_offset + sizeof(struct ebt_standard_target) >
+		   e->next_offset) {
+			BUGPRINT("Standard target size too big\n");
+			return -1;
+		}
+		verdict = ((struct ebt_standard_target *)t)->verdict;
+		if (verdict >= 0) { /* jump to another chain */
+			struct ebt_entries *hlp2 =
+			   (struct ebt_entries *)(base + verdict);
+			for (i = 0; i < udc_cnt; i++)
+				if (hlp2 == cl_s[i].cs.chaininfo)
+					break;
+			/* bad destination or loop */
+			if (i == udc_cnt) {
+				BUGPRINT("bad destination\n");
+				return -1;
+			}
+			if (cl_s[i].cs.n) {
+				BUGPRINT("loop\n");
+				return -1;
+			}
+			/* this can't be 0, so the above test is correct */
+			cl_s[i].cs.n = pos + 1;
+			pos = 0;
+			cl_s[i].cs.e = ((void *)e + e->next_offset);
+			e = (struct ebt_entry *)(hlp2->data);
+			nentries = hlp2->nentries;
+			cl_s[i].from = chain_nr;
+			chain_nr = i;
+			/* this udc is accessible from the base chain for hooknr */
+			cl_s[i].hookmask |= (1 << hooknr);
+			continue;
+		}
+letscontinue:
+		e = (void *)e + e->next_offset;
+		pos++;
+	}
+	return 0;
+}
+
+/* do the parsing of the table/chains/entries/matches/watchers/targets, heh */
+static int translate_table(struct ebt_replace *repl,
+   struct ebt_table_info *newinfo)
+{
+	unsigned int i, j, k, udc_cnt;
+	int ret;
+	struct ebt_cl_stack *cl_s = NULL; /* used in the checking for chain loops */
+
+	i = 0;
+	while (i < NF_BR_NUMHOOKS && !(repl->valid_hooks & (1 << i)))
+		i++;
+	if (i == NF_BR_NUMHOOKS) {
+		BUGPRINT("No valid hooks specified\n");
+		return -EINVAL;
+	}
+	if (repl->hook_entry[i] != (struct ebt_entries *)repl->entries) {
+		BUGPRINT("Chains don't start at beginning\n");
+		return -EINVAL;
+	}
+	/* make sure chains are ordered after each other in same order
+	   as their corresponding hooks */
+	for (j = i + 1; j < NF_BR_NUMHOOKS; j++) {
+		if (!(repl->valid_hooks & (1 << j)))
+			continue;
+		if ( repl->hook_entry[j] <= repl->hook_entry[i] ) {
+			BUGPRINT("Hook order must be followed\n");
+			return -EINVAL;
+		}
+		i = j;
+	}
+
+	for (i = 0; i < NF_BR_NUMHOOKS; i++)
+		newinfo->hook_entry[i] = NULL;
+
+	newinfo->entries_size = repl->entries_size;
+	newinfo->nentries = repl->nentries;
+
+	/* do some early checkings and initialize some things */
+	i = 0; /* holds the expected nr. of entries for the chain */
+	j = 0; /* holds the up to now counted entries for the chain */
+	k = 0; /* holds the total nr. of entries, should equal
+	          newinfo->nentries afterwards */
+	udc_cnt = 0; /* will hold the nr. of user defined chains (udc) */
+	ret = EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size,
+	   ebt_check_entry_size_and_hooks, newinfo, repl->entries,
+	   repl->entries + repl->entries_size, repl->hook_entry, &i, &j, &k,
+	   &udc_cnt, repl->valid_hooks);
+
+	if (ret != 0)
+		return ret;
+
+	if (i != j) {
+		BUGPRINT("nentries does not equal the nr of entries in the "
+		         "(last) chain\n");
+		return -EINVAL;
+	}
+	if (k != newinfo->nentries) {
+		BUGPRINT("Total nentries is wrong\n");
+		return -EINVAL;
+	}
+
+	/* check if all valid hooks have a chain */
+	for (i = 0; i < NF_BR_NUMHOOKS; i++) {
+		if (newinfo->hook_entry[i] == NULL &&
+		   (repl->valid_hooks & (1 << i))) {
+			BUGPRINT("Valid hook without chain\n");
+			return -EINVAL;
+		}
+	}
+
+	/* get the location of the udc, put them in an array
+	   while we're at it, allocate the chainstack */
+	if (udc_cnt) {
+		/* this will get free'd in do_replace()/ebt_register_table()
+		   if an error occurs */
+		newinfo->chainstack = (struct ebt_chainstack **)
+		   vmalloc(num_possible_cpus() * sizeof(struct ebt_chainstack));
+		if (!newinfo->chainstack)
+			return -ENOMEM;
+		for (i = 0; i < num_possible_cpus(); i++) {
+			newinfo->chainstack[i] =
+			   vmalloc(udc_cnt * sizeof(struct ebt_chainstack));
+			if (!newinfo->chainstack[i]) {
+				while (i)
+					vfree(newinfo->chainstack[--i]);
+				vfree(newinfo->chainstack);
+				newinfo->chainstack = NULL;
+				return -ENOMEM;
+			}
+		}
+
+		cl_s = (struct ebt_cl_stack *)
+		   vmalloc(udc_cnt * sizeof(struct ebt_cl_stack));
+		if (!cl_s)
+			return -ENOMEM;
+		i = 0; /* the i'th udc */
+		EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size,
+		   ebt_get_udc_positions, newinfo, repl->hook_entry, &i,
+		   repl->valid_hooks, cl_s);
+		/* sanity check */
+		if (i != udc_cnt) {
+			BUGPRINT("i != udc_cnt\n");
+			vfree(cl_s);
+			return -EFAULT;
+		}
+	}
+
+	/* Check for loops */
+	for (i = 0; i < NF_BR_NUMHOOKS; i++)
+		if (repl->valid_hooks & (1 << i))
+			if (check_chainloops(newinfo->hook_entry[i],
+			   cl_s, udc_cnt, i, newinfo->entries)) {
+				if (cl_s)
+					vfree(cl_s);
+				return -EINVAL;
+			}
+
+	/* we now know the following (along with E=mc�):
+	   - the nr of entries in each chain is right
+	   - the size of the allocated space is right
+	   - all valid hooks have a corresponding chain
+	   - there are no loops
+	   - wrong data can still be on the level of a single entry
+	   - could be there are jumps to places that are not the
+	     beginning of a chain. This can only occur in chains that
+	     are not accessible from any base chains, so we don't care. */
+
+	/* used to know what we need to clean up if something goes wrong */
+	i = 0;
+	ret = EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size,
+	   ebt_check_entry, newinfo, repl->name, &i, repl->valid_hooks,
+	   cl_s, udc_cnt);
+	if (ret != 0) {
+		EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size,
+		   ebt_cleanup_entry, &i);
+	}
+	if (cl_s)
+		vfree(cl_s);
+	return ret;
+}
+
+/* called under write_lock */
+static void get_counters(struct ebt_counter *oldcounters,
+   struct ebt_counter *counters, unsigned int nentries)
+{
+	int i, cpu;
+	struct ebt_counter *counter_base;
+
+	/* counters of cpu 0 */
+	memcpy(counters, oldcounters,
+	   sizeof(struct ebt_counter) * nentries);
+	/* add other counters to those of cpu 0 */
+	for (cpu = 1; cpu < num_possible_cpus(); cpu++) {
+		counter_base = COUNTER_BASE(oldcounters, nentries, cpu);
+		for (i = 0; i < nentries; i++) {
+			counters[i].pcnt += counter_base[i].pcnt;
+			counters[i].bcnt += counter_base[i].bcnt;
+		}
+	}
+}
+
+/* replace the table */
+static int do_replace(void __user *user, unsigned int len)
+{
+	int ret, i, countersize;
+	struct ebt_table_info *newinfo;
+	struct ebt_replace tmp;
+	struct ebt_table *t;
+	struct ebt_counter *counterstmp = NULL;
+	/* used to be able to unlock earlier */
+	struct ebt_table_info *table;
+
+	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+		return -EFAULT;
+
+	if (len != sizeof(tmp) + tmp.entries_size) {
+		BUGPRINT("Wrong len argument\n");
+		return -EINVAL;
+	}
+
+	if (tmp.entries_size == 0) {
+		BUGPRINT("Entries_size never zero\n");
+		return -EINVAL;
+	}
+	countersize = COUNTER_OFFSET(tmp.nentries) * num_possible_cpus();
+	newinfo = (struct ebt_table_info *)
+	   vmalloc(sizeof(struct ebt_table_info) + countersize);
+	if (!newinfo)
+		return -ENOMEM;
+
+	if (countersize)
+		memset(newinfo->counters, 0, countersize);
+
+	newinfo->entries = (char *)vmalloc(tmp.entries_size);
+	if (!newinfo->entries) {
+		ret = -ENOMEM;
+		goto free_newinfo;
+	}
+	if (copy_from_user(
+	   newinfo->entries, tmp.entries, tmp.entries_size) != 0) {
+		BUGPRINT("Couldn't copy entries from userspace\n");
+		ret = -EFAULT;
+		goto free_entries;
+	}
+
+	/* the user wants counters back
+	   the check on the size is done later, when we have the lock */
+	if (tmp.num_counters) {
+		counterstmp = (struct ebt_counter *)
+		   vmalloc(tmp.num_counters * sizeof(struct ebt_counter));
+		if (!counterstmp) {
+			ret = -ENOMEM;
+			goto free_entries;
+		}
+	}
+	else
+		counterstmp = NULL;
+
+	/* this can get initialized by translate_table() */
+	newinfo->chainstack = NULL;
+	ret = translate_table(&tmp, newinfo);
+
+	if (ret != 0)
+		goto free_counterstmp;
+
+	t = find_table_lock(tmp.name, &ret, &ebt_mutex);
+	if (!t) {
+		ret = -ENOENT;
+		goto free_iterate;
+	}
+
+	/* the table doesn't like it */
+	if (t->check && (ret = t->check(newinfo, tmp.valid_hooks)))
+		goto free_unlock;
+
+	if (tmp.num_counters && tmp.num_counters != t->private->nentries) {
+		BUGPRINT("Wrong nr. of counters requested\n");
+		ret = -EINVAL;
+		goto free_unlock;
+	}
+
+	/* we have the mutex lock, so no danger in reading this pointer */
+	table = t->private;
+	/* make sure the table can only be rmmod'ed if it contains no rules */
+	if (!table->nentries && newinfo->nentries && !try_module_get(t->me)) {
+		ret = -ENOENT;
+		goto free_unlock;
+	} else if (table->nentries && !newinfo->nentries)
+		module_put(t->me);
+	/* we need an atomic snapshot of the counters */
+	write_lock_bh(&t->lock);
+	if (tmp.num_counters)
+		get_counters(t->private->counters, counterstmp,
+		   t->private->nentries);
+
+	t->private = newinfo;
+	write_unlock_bh(&t->lock);
+	up(&ebt_mutex);
+	/* so, a user can change the chains while having messed up her counter
+	   allocation. Only reason why this is done is because this way the lock
+	   is held only once, while this doesn't bring the kernel into a
+	   dangerous state. */
+	if (tmp.num_counters &&
+	   copy_to_user(tmp.counters, counterstmp,
+	   tmp.num_counters * sizeof(struct ebt_counter))) {
+		BUGPRINT("Couldn't copy counters to userspace\n");
+		ret = -EFAULT;
+	}
+	else
+		ret = 0;
+
+	/* decrease module count and free resources */
+	EBT_ENTRY_ITERATE(table->entries, table->entries_size,
+	   ebt_cleanup_entry, NULL);
+
+	vfree(table->entries);
+	if (table->chainstack) {
+		for (i = 0; i < num_possible_cpus(); i++)
+			vfree(table->chainstack[i]);
+		vfree(table->chainstack);
+	}
+	vfree(table);
+
+	if (counterstmp)
+		vfree(counterstmp);
+	return ret;
+
+free_unlock:
+	up(&ebt_mutex);
+free_iterate:
+	EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size,
+	   ebt_cleanup_entry, NULL);
+free_counterstmp:
+	if (counterstmp)
+		vfree(counterstmp);
+	/* can be initialized in translate_table() */
+	if (newinfo->chainstack) {
+		for (i = 0; i < num_possible_cpus(); i++)
+			vfree(newinfo->chainstack[i]);
+		vfree(newinfo->chainstack);
+	}
+free_entries:
+	if (newinfo->entries)
+		vfree(newinfo->entries);
+free_newinfo:
+	if (newinfo)
+		vfree(newinfo);
+	return ret;
+}
+
+int ebt_register_target(struct ebt_target *target)
+{
+	int ret;
+
+	ret = down_interruptible(&ebt_mutex);
+	if (ret != 0)
+		return ret;
+	if (!list_named_insert(&ebt_targets, target)) {
+		up(&ebt_mutex);
+		return -EEXIST;
+	}
+	up(&ebt_mutex);
+
+	return 0;
+}
+
+void ebt_unregister_target(struct ebt_target *target)
+{
+	down(&ebt_mutex);
+	LIST_DELETE(&ebt_targets, target);
+	up(&ebt_mutex);
+}
+
+int ebt_register_match(struct ebt_match *match)
+{
+	int ret;
+
+	ret = down_interruptible(&ebt_mutex);
+	if (ret != 0)
+		return ret;
+	if (!list_named_insert(&ebt_matches, match)) {
+		up(&ebt_mutex);
+		return -EEXIST;
+	}
+	up(&ebt_mutex);
+
+	return 0;
+}
+
+void ebt_unregister_match(struct ebt_match *match)
+{
+	down(&ebt_mutex);
+	LIST_DELETE(&ebt_matches, match);
+	up(&ebt_mutex);
+}
+
+int ebt_register_watcher(struct ebt_watcher *watcher)
+{
+	int ret;
+
+	ret = down_interruptible(&ebt_mutex);
+	if (ret != 0)
+		return ret;
+	if (!list_named_insert(&ebt_watchers, watcher)) {
+		up(&ebt_mutex);
+		return -EEXIST;
+	}
+	up(&ebt_mutex);
+
+	return 0;
+}
+
+void ebt_unregister_watcher(struct ebt_watcher *watcher)
+{
+	down(&ebt_mutex);
+	LIST_DELETE(&ebt_watchers, watcher);
+	up(&ebt_mutex);
+}
+
+int ebt_register_table(struct ebt_table *table)
+{
+	struct ebt_table_info *newinfo;
+	int ret, i, countersize;
+
+	if (!table || !table->table ||!table->table->entries ||
+	    table->table->entries_size == 0 ||
+	    table->table->counters || table->private) {
+		BUGPRINT("Bad table data for ebt_register_table!!!\n");
+		return -EINVAL;
+	}
+
+	countersize = COUNTER_OFFSET(table->table->nentries) * num_possible_cpus();
+	newinfo = (struct ebt_table_info *)
+	   vmalloc(sizeof(struct ebt_table_info) + countersize);
+	ret = -ENOMEM;
+	if (!newinfo)
+		return -ENOMEM;
+
+	newinfo->entries = (char *)vmalloc(table->table->entries_size);
+	if (!(newinfo->entries))
+		goto free_newinfo;
+
+	memcpy(newinfo->entries, table->table->entries,
+	   table->table->entries_size);
+
+	if (countersize)
+		memset(newinfo->counters, 0, countersize);
+
+	/* fill in newinfo and parse the entries */
+	newinfo->chainstack = NULL;
+	ret = translate_table(table->table, newinfo);
+	if (ret != 0) {
+		BUGPRINT("Translate_table failed\n");
+		goto free_chainstack;
+	}
+
+	if (table->check && table->check(newinfo, table->valid_hooks)) {
+		BUGPRINT("The table doesn't like its own initial data, lol\n");
+		return -EINVAL;
+	}
+
+	table->private = newinfo;
+	rwlock_init(&table->lock);
+	ret = down_interruptible(&ebt_mutex);
+	if (ret != 0)
+		goto free_chainstack;
+
+	if (list_named_find(&ebt_tables, table->name)) {
+		ret = -EEXIST;
+		BUGPRINT("Table name already exists\n");
+		goto free_unlock;
+	}
+
+	/* Hold a reference count if the chains aren't empty */
+	if (newinfo->nentries && !try_module_get(table->me)) {
+		ret = -ENOENT;
+		goto free_unlock;
+	}
+	list_prepend(&ebt_tables, table);
+	up(&ebt_mutex);
+	return 0;
+free_unlock:
+	up(&ebt_mutex);
+free_chainstack:
+	if (newinfo->chainstack) {
+		for (i = 0; i < num_possible_cpus(); i++)
+			vfree(newinfo->chainstack[i]);
+		vfree(newinfo->chainstack);
+	}
+	vfree(newinfo->entries);
+free_newinfo:
+	vfree(newinfo);
+	return ret;
+}
+
+void ebt_unregister_table(struct ebt_table *table)
+{
+	int i;
+
+	if (!table) {
+		BUGPRINT("Request to unregister NULL table!!!\n");
+		return;
+	}
+	down(&ebt_mutex);
+	LIST_DELETE(&ebt_tables, table);
+	up(&ebt_mutex);
+	if (table->private->entries)
+		vfree(table->private->entries);
+	if (table->private->chainstack) {
+		for (i = 0; i < num_possible_cpus(); i++)
+			vfree(table->private->chainstack[i]);
+		vfree(table->private->chainstack);
+	}
+	vfree(table->private);
+}
+
+/* userspace just supplied us with counters */
+static int update_counters(void __user *user, unsigned int len)
+{
+	int i, ret;
+	struct ebt_counter *tmp;
+	struct ebt_replace hlp;
+	struct ebt_table *t;
+
+	if (copy_from_user(&hlp, user, sizeof(hlp)))
+		return -EFAULT;
+
+	if (len != sizeof(hlp) + hlp.num_counters * sizeof(struct ebt_counter))
+		return -EINVAL;
+	if (hlp.num_counters == 0)
+		return -EINVAL;
+
+	if ( !(tmp = (struct ebt_counter *)
+	   vmalloc(hlp.num_counters * sizeof(struct ebt_counter))) ){
+		MEMPRINT("Update_counters && nomemory\n");
+		return -ENOMEM;
+	}
+
+	t = find_table_lock(hlp.name, &ret, &ebt_mutex);
+	if (!t)
+		goto free_tmp;
+
+	if (hlp.num_counters != t->private->nentries) {
+		BUGPRINT("Wrong nr of counters\n");
+		ret = -EINVAL;
+		goto unlock_mutex;
+	}
+
+	if ( copy_from_user(tmp, hlp.counters,
+	   hlp.num_counters * sizeof(struct ebt_counter)) ) {
+		BUGPRINT("Updata_counters && !cfu\n");
+		ret = -EFAULT;
+		goto unlock_mutex;
+	}
+
+	/* we want an atomic add of the counters */
+	write_lock_bh(&t->lock);
+
+	/* we add to the counters of the first cpu */
+	for (i = 0; i < hlp.num_counters; i++) {
+		t->private->counters[i].pcnt += tmp[i].pcnt;
+		t->private->counters[i].bcnt += tmp[i].bcnt;
+	}
+
+	write_unlock_bh(&t->lock);
+	ret = 0;
+unlock_mutex:
+	up(&ebt_mutex);
+free_tmp:
+	vfree(tmp);
+	return ret;
+}
+
+static inline int ebt_make_matchname(struct ebt_entry_match *m,
+   char *base, char *ubase)
+{
+	char *hlp = ubase - base + (char *)m;
+	if (copy_to_user(hlp, m->u.match->name, EBT_FUNCTION_MAXNAMELEN))
+		return -EFAULT;
+	return 0;
+}
+
+static inline int ebt_make_watchername(struct ebt_entry_watcher *w,
+   char *base, char *ubase)
+{
+	char *hlp = ubase - base + (char *)w;
+	if (copy_to_user(hlp , w->u.watcher->name, EBT_FUNCTION_MAXNAMELEN))
+		return -EFAULT;
+	return 0;
+}
+
+static inline int ebt_make_names(struct ebt_entry *e, char *base, char *ubase)
+{
+	int ret;
+	char *hlp;
+	struct ebt_entry_target *t;
+
+	if ((e->bitmask & EBT_ENTRY_OR_ENTRIES) == 0)
+		return 0;
+
+	hlp = ubase - base + (char *)e + e->target_offset;
+	t = (struct ebt_entry_target *)(((char *)e) + e->target_offset);
+	
+	ret = EBT_MATCH_ITERATE(e, ebt_make_matchname, base, ubase);
+	if (ret != 0)
+		return ret;
+	ret = EBT_WATCHER_ITERATE(e, ebt_make_watchername, base, ubase);
+	if (ret != 0)
+		return ret;
+	if (copy_to_user(hlp, t->u.target->name, EBT_FUNCTION_MAXNAMELEN))
+		return -EFAULT;
+	return 0;
+}
+
+/* called with ebt_mutex down */
+static int copy_everything_to_user(struct ebt_table *t, void __user *user,
+   int *len, int cmd)
+{
+	struct ebt_replace tmp;
+	struct ebt_counter *counterstmp, *oldcounters;
+	unsigned int entries_size, nentries;
+	char *entries;
+
+	if (cmd == EBT_SO_GET_ENTRIES) {
+		entries_size = t->private->entries_size;
+		nentries = t->private->nentries;
+		entries = t->private->entries;
+		oldcounters = t->private->counters;
+	} else {
+		entries_size = t->table->entries_size;
+		nentries = t->table->nentries;
+		entries = t->table->entries;
+		oldcounters = t->table->counters;
+	}
+
+	if (copy_from_user(&tmp, user, sizeof(tmp))) {
+		BUGPRINT("Cfu didn't work\n");
+		return -EFAULT;
+	}
+
+	if (*len != sizeof(struct ebt_replace) + entries_size +
+	   (tmp.num_counters? nentries * sizeof(struct ebt_counter): 0)) {
+		BUGPRINT("Wrong size\n");
+		return -EINVAL;
+	}
+
+	if (tmp.nentries != nentries) {
+		BUGPRINT("Nentries wrong\n");
+		return -EINVAL;
+	}
+
+	if (tmp.entries_size != entries_size) {
+		BUGPRINT("Wrong size\n");
+		return -EINVAL;
+	}
+
+	/* userspace might not need the counters */
+	if (tmp.num_counters) {
+		if (tmp.num_counters != nentries) {
+			BUGPRINT("Num_counters wrong\n");
+			return -EINVAL;
+		}
+		counterstmp = (struct ebt_counter *)
+		   vmalloc(nentries * sizeof(struct ebt_counter));
+		if (!counterstmp) {
+			MEMPRINT("Couldn't copy counters, out of memory\n");
+			return -ENOMEM;
+		}
+		write_lock_bh(&t->lock);
+		get_counters(oldcounters, counterstmp, nentries);
+		write_unlock_bh(&t->lock);
+
+		if (copy_to_user(tmp.counters, counterstmp,
+		   nentries * sizeof(struct ebt_counter))) {
+			BUGPRINT("Couldn't copy counters to userspace\n");
+			vfree(counterstmp);
+			return -EFAULT;
+		}
+		vfree(counterstmp);
+	}
+
+	if (copy_to_user(tmp.entries, entries, entries_size)) {
+		BUGPRINT("Couldn't copy entries to userspace\n");
+		return -EFAULT;
+	}
+	/* set the match/watcher/target names right */
+	return EBT_ENTRY_ITERATE(entries, entries_size,
+	   ebt_make_names, entries, tmp.entries);
+}
+
+static int do_ebt_set_ctl(struct sock *sk,
+	int cmd, void __user *user, unsigned int len)
+{
+	int ret;
+
+	switch(cmd) {
+	case EBT_SO_SET_ENTRIES:
+		ret = do_replace(user, len);
+		break;
+	case EBT_SO_SET_COUNTERS:
+		ret = update_counters(user, len);
+		break;
+	default:
+		ret = -EINVAL;
+  }
+	return ret;
+}
+
+static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+{
+	int ret;
+	struct ebt_replace tmp;
+	struct ebt_table *t;
+
+	if (copy_from_user(&tmp, user, sizeof(tmp)))
+		return -EFAULT;
+
+	t = find_table_lock(tmp.name, &ret, &ebt_mutex);
+	if (!t)
+		return ret;
+
+	switch(cmd) {
+	case EBT_SO_GET_INFO:
+	case EBT_SO_GET_INIT_INFO:
+		if (*len != sizeof(struct ebt_replace)){
+			ret = -EINVAL;
+			up(&ebt_mutex);
+			break;
+		}
+		if (cmd == EBT_SO_GET_INFO) {
+			tmp.nentries = t->private->nentries;
+			tmp.entries_size = t->private->entries_size;
+			tmp.valid_hooks = t->valid_hooks;
+		} else {
+			tmp.nentries = t->table->nentries;
+			tmp.entries_size = t->table->entries_size;
+			tmp.valid_hooks = t->table->valid_hooks;
+		}
+		up(&ebt_mutex);
+		if (copy_to_user(user, &tmp, *len) != 0){
+			BUGPRINT("c2u Didn't work\n");
+			ret = -EFAULT;
+			break;
+		}
+		ret = 0;
+		break;
+
+	case EBT_SO_GET_ENTRIES:
+	case EBT_SO_GET_INIT_ENTRIES:
+		ret = copy_everything_to_user(t, user, len, cmd);
+		up(&ebt_mutex);
+		break;
+
+	default:
+		up(&ebt_mutex);
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static struct nf_sockopt_ops ebt_sockopts =
+{ { NULL, NULL }, PF_INET, EBT_BASE_CTL, EBT_SO_SET_MAX + 1, do_ebt_set_ctl,
+    EBT_BASE_CTL, EBT_SO_GET_MAX + 1, do_ebt_get_ctl, 0, NULL
+};
+
+static int __init init(void)
+{
+	int ret;
+
+	down(&ebt_mutex);
+	list_named_insert(&ebt_targets, &ebt_standard_target);
+	up(&ebt_mutex);
+	if ((ret = nf_register_sockopt(&ebt_sockopts)) < 0)
+		return ret;
+
+	printk(KERN_NOTICE "Ebtables v2.0 registered\n");
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	nf_unregister_sockopt(&ebt_sockopts);
+	printk(KERN_NOTICE "Ebtables v2.0 unregistered\n");
+}
+
+EXPORT_SYMBOL(ebt_register_table);
+EXPORT_SYMBOL(ebt_unregister_table);
+EXPORT_SYMBOL(ebt_register_match);
+EXPORT_SYMBOL(ebt_unregister_match);
+EXPORT_SYMBOL(ebt_register_watcher);
+EXPORT_SYMBOL(ebt_unregister_watcher);
+EXPORT_SYMBOL(ebt_register_target);
+EXPORT_SYMBOL(ebt_unregister_target);
+EXPORT_SYMBOL(ebt_do_table);
+module_init(init);
+module_exit(fini);
+MODULE_LICENSE("GPL");
diff --git a/net/compat.c b/net/compat.c
new file mode 100644
index 00000000000..be5d936dc42
--- /dev/null
+++ b/net/compat.c
@@ -0,0 +1,605 @@
+/* 
+ * 32bit Socket syscall emulation. Based on arch/sparc64/kernel/sys_sparc32.c.
+ *
+ * Copyright (C) 2000		VA Linux Co
+ * Copyright (C) 2000		Don Dugger <n0ano@valinux.com>
+ * Copyright (C) 1999 		Arun Sharma <arun.sharma@intel.com>
+ * Copyright (C) 1997,1998 	Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 1997 		David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 2000		Hewlett-Packard Co.
+ * Copyright (C) 2000		David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 2000,2001	Andi Kleen, SuSE Labs 
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/file.h>
+#include <linux/icmpv6.h>
+#include <linux/socket.h>
+#include <linux/syscalls.h>
+#include <linux/filter.h>
+#include <linux/compat.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/security.h>
+
+#include <net/scm.h>
+#include <net/sock.h>
+#include <asm/uaccess.h>
+#include <net/compat.h>
+
+static inline int iov_from_user_compat_to_kern(struct iovec *kiov,
+					  struct compat_iovec __user *uiov32,
+					  int niov)
+{
+	int tot_len = 0;
+
+	while(niov > 0) {
+		compat_uptr_t buf;
+		compat_size_t len;
+
+		if(get_user(len, &uiov32->iov_len) ||
+		   get_user(buf, &uiov32->iov_base)) {
+			tot_len = -EFAULT;
+			break;
+		}
+		tot_len += len;
+		kiov->iov_base = compat_ptr(buf);
+		kiov->iov_len = (__kernel_size_t) len;
+		uiov32++;
+		kiov++;
+		niov--;
+	}
+	return tot_len;
+}
+
+int get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr __user *umsg)
+{
+	compat_uptr_t tmp1, tmp2, tmp3;
+
+	if (!access_ok(VERIFY_READ, umsg, sizeof(*umsg)) ||
+	    __get_user(tmp1, &umsg->msg_name) ||
+	    __get_user(kmsg->msg_namelen, &umsg->msg_namelen) ||
+	    __get_user(tmp2, &umsg->msg_iov) ||
+	    __get_user(kmsg->msg_iovlen, &umsg->msg_iovlen) ||
+	    __get_user(tmp3, &umsg->msg_control) ||
+	    __get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
+	    __get_user(kmsg->msg_flags, &umsg->msg_flags))
+		return -EFAULT;
+	kmsg->msg_name = compat_ptr(tmp1);
+	kmsg->msg_iov = compat_ptr(tmp2);
+	kmsg->msg_control = compat_ptr(tmp3);
+	return 0;
+}
+
+/* I've named the args so it is easy to tell whose space the pointers are in. */
+int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov,
+		   char *kern_address, int mode)
+{
+	int tot_len;
+
+	if(kern_msg->msg_namelen) {
+		if(mode==VERIFY_READ) {
+			int err = move_addr_to_kernel(kern_msg->msg_name,
+						      kern_msg->msg_namelen,
+						      kern_address);
+			if(err < 0)
+				return err;
+		}
+		kern_msg->msg_name = kern_address;
+	} else
+		kern_msg->msg_name = NULL;
+
+	if(kern_msg->msg_iovlen > UIO_FASTIOV) {
+		kern_iov = kmalloc(kern_msg->msg_iovlen * sizeof(struct iovec),
+				   GFP_KERNEL);
+		if(!kern_iov)
+			return -ENOMEM;
+	}
+
+	tot_len = iov_from_user_compat_to_kern(kern_iov,
+					  (struct compat_iovec __user *)kern_msg->msg_iov,
+					  kern_msg->msg_iovlen);
+	if(tot_len >= 0)
+		kern_msg->msg_iov = kern_iov;
+	else if(kern_msg->msg_iovlen > UIO_FASTIOV)
+		kfree(kern_iov);
+
+	return tot_len;
+}
+
+/* Bleech... */
+#define CMSG_COMPAT_ALIGN(len)	ALIGN((len), sizeof(s32))
+
+#define CMSG_COMPAT_DATA(cmsg)				\
+	((void __user *)((char __user *)(cmsg) + CMSG_COMPAT_ALIGN(sizeof(struct compat_cmsghdr))))
+#define CMSG_COMPAT_SPACE(len)				\
+	(CMSG_COMPAT_ALIGN(sizeof(struct compat_cmsghdr)) + CMSG_COMPAT_ALIGN(len))
+#define CMSG_COMPAT_LEN(len)				\
+	(CMSG_COMPAT_ALIGN(sizeof(struct compat_cmsghdr)) + (len))
+
+#define CMSG_COMPAT_FIRSTHDR(msg)			\
+	(((msg)->msg_controllen) >= sizeof(struct compat_cmsghdr) ?	\
+	 (struct compat_cmsghdr __user *)((msg)->msg_control) :		\
+	 (struct compat_cmsghdr __user *)NULL)
+
+#define CMSG_COMPAT_OK(ucmlen, ucmsg, mhdr) \
+	((ucmlen) >= sizeof(struct compat_cmsghdr) && \
+	 (ucmlen) <= (unsigned long) \
+	 ((mhdr)->msg_controllen - \
+	  ((char *)(ucmsg) - (char *)(mhdr)->msg_control)))
+
+static inline struct compat_cmsghdr __user *cmsg_compat_nxthdr(struct msghdr *msg,
+		struct compat_cmsghdr __user *cmsg, int cmsg_len)
+{
+	char __user *ptr = (char __user *)cmsg + CMSG_COMPAT_ALIGN(cmsg_len);
+	if ((unsigned long)(ptr + 1 - (char __user *)msg->msg_control) >
+			msg->msg_controllen)
+		return NULL;
+	return (struct compat_cmsghdr __user *)ptr;
+}
+
+/* There is a lot of hair here because the alignment rules (and
+ * thus placement) of cmsg headers and length are different for
+ * 32-bit apps.  -DaveM
+ */
+int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg,
+			       unsigned char *stackbuf, int stackbuf_size)
+{
+	struct compat_cmsghdr __user *ucmsg;
+	struct cmsghdr *kcmsg, *kcmsg_base;
+	compat_size_t ucmlen;
+	__kernel_size_t kcmlen, tmp;
+
+	kcmlen = 0;
+	kcmsg_base = kcmsg = (struct cmsghdr *)stackbuf;
+	ucmsg = CMSG_COMPAT_FIRSTHDR(kmsg);
+	while(ucmsg != NULL) {
+		if(get_user(ucmlen, &ucmsg->cmsg_len))
+			return -EFAULT;
+
+		/* Catch bogons. */
+		if (!CMSG_COMPAT_OK(ucmlen, ucmsg, kmsg))
+			return -EINVAL;
+
+		tmp = ((ucmlen - CMSG_COMPAT_ALIGN(sizeof(*ucmsg))) +
+		       CMSG_ALIGN(sizeof(struct cmsghdr)));
+		kcmlen += tmp;
+		ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, ucmlen);
+	}
+	if(kcmlen == 0)
+		return -EINVAL;
+
+	/* The kcmlen holds the 64-bit version of the control length.
+	 * It may not be modified as we do not stick it into the kmsg
+	 * until we have successfully copied over all of the data
+	 * from the user.
+	 */
+	if(kcmlen > stackbuf_size)
+		kcmsg_base = kcmsg = kmalloc(kcmlen, GFP_KERNEL);
+	if(kcmsg == NULL)
+		return -ENOBUFS;
+
+	/* Now copy them over neatly. */
+	memset(kcmsg, 0, kcmlen);
+	ucmsg = CMSG_COMPAT_FIRSTHDR(kmsg);
+	while(ucmsg != NULL) {
+		__get_user(ucmlen, &ucmsg->cmsg_len);
+		tmp = ((ucmlen - CMSG_COMPAT_ALIGN(sizeof(*ucmsg))) +
+		       CMSG_ALIGN(sizeof(struct cmsghdr)));
+		kcmsg->cmsg_len = tmp;
+		__get_user(kcmsg->cmsg_level, &ucmsg->cmsg_level);
+		__get_user(kcmsg->cmsg_type, &ucmsg->cmsg_type);
+
+		/* Copy over the data. */
+		if(copy_from_user(CMSG_DATA(kcmsg),
+				  CMSG_COMPAT_DATA(ucmsg),
+				  (ucmlen - CMSG_COMPAT_ALIGN(sizeof(*ucmsg)))))
+			goto out_free_efault;
+
+		/* Advance. */
+		kcmsg = (struct cmsghdr *)((char *)kcmsg + CMSG_ALIGN(tmp));
+		ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, ucmlen);
+	}
+
+	/* Ok, looks like we made it.  Hook it up and return success. */
+	kmsg->msg_control = kcmsg_base;
+	kmsg->msg_controllen = kcmlen;
+	return 0;
+
+out_free_efault:
+	if(kcmsg_base != (struct cmsghdr *)stackbuf)
+		kfree(kcmsg_base);
+	return -EFAULT;
+}
+
+int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *data)
+{
+	struct compat_timeval ctv;
+	struct compat_cmsghdr __user *cm = (struct compat_cmsghdr __user *) kmsg->msg_control;
+	struct compat_cmsghdr cmhdr;
+	int cmlen;
+
+	if(cm == NULL || kmsg->msg_controllen < sizeof(*cm)) {
+		kmsg->msg_flags |= MSG_CTRUNC;
+		return 0; /* XXX: return error? check spec. */
+	}
+
+	if (level == SOL_SOCKET && type == SO_TIMESTAMP) { 
+		struct timeval *tv = (struct timeval *)data;
+		ctv.tv_sec = tv->tv_sec;
+		ctv.tv_usec = tv->tv_usec;
+		data = &ctv;
+		len = sizeof(struct compat_timeval);
+	} 
+	
+	cmlen = CMSG_COMPAT_LEN(len);
+	if(kmsg->msg_controllen < cmlen) {
+		kmsg->msg_flags |= MSG_CTRUNC;
+		cmlen = kmsg->msg_controllen;
+	}
+	cmhdr.cmsg_level = level;
+	cmhdr.cmsg_type = type;
+	cmhdr.cmsg_len = cmlen;
+
+	if(copy_to_user(cm, &cmhdr, sizeof cmhdr))
+		return -EFAULT;
+	if(copy_to_user(CMSG_COMPAT_DATA(cm), data, cmlen - sizeof(struct compat_cmsghdr)))
+		return -EFAULT;
+	cmlen = CMSG_COMPAT_SPACE(len);
+	kmsg->msg_control += cmlen;
+	kmsg->msg_controllen -= cmlen;
+	return 0;
+}
+
+void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm)
+{
+	struct compat_cmsghdr __user *cm = (struct compat_cmsghdr __user *) kmsg->msg_control;
+	int fdmax = (kmsg->msg_controllen - sizeof(struct compat_cmsghdr)) / sizeof(int);
+	int fdnum = scm->fp->count;
+	struct file **fp = scm->fp->fp;
+	int __user *cmfptr;
+	int err = 0, i;
+
+	if (fdnum < fdmax)
+		fdmax = fdnum;
+
+	for (i = 0, cmfptr = (int __user *) CMSG_COMPAT_DATA(cm); i < fdmax; i++, cmfptr++) {
+		int new_fd;
+		err = security_file_receive(fp[i]);
+		if (err)
+			break;
+		err = get_unused_fd();
+		if (err < 0)
+			break;
+		new_fd = err;
+		err = put_user(new_fd, cmfptr);
+		if (err) {
+			put_unused_fd(new_fd);
+			break;
+		}
+		/* Bump the usage count and install the file. */
+		get_file(fp[i]);
+		fd_install(new_fd, fp[i]);
+	}
+
+	if (i > 0) {
+		int cmlen = CMSG_COMPAT_LEN(i * sizeof(int));
+		if (!err)
+			err = put_user(SOL_SOCKET, &cm->cmsg_level);
+		if (!err)
+			err = put_user(SCM_RIGHTS, &cm->cmsg_type);
+		if (!err)
+			err = put_user(cmlen, &cm->cmsg_len);
+		if (!err) {
+			cmlen = CMSG_COMPAT_SPACE(i * sizeof(int));
+			kmsg->msg_control += cmlen;
+			kmsg->msg_controllen -= cmlen;
+		}
+	}
+	if (i < fdnum)
+		kmsg->msg_flags |= MSG_CTRUNC;
+
+	/*
+	 * All of the files that fit in the message have had their
+	 * usage counts incremented, so we just free the list.
+	 */
+	__scm_destroy(scm);
+}
+
+/*
+ * For now, we assume that the compatibility and native version
+ * of struct ipt_entry are the same - sfr.  FIXME
+ */
+struct compat_ipt_replace {
+	char			name[IPT_TABLE_MAXNAMELEN];
+	u32			valid_hooks;
+	u32			num_entries;
+	u32			size;
+	u32			hook_entry[NF_IP_NUMHOOKS];
+	u32			underflow[NF_IP_NUMHOOKS];
+	u32			num_counters;
+	compat_uptr_t		counters;	/* struct ipt_counters * */
+	struct ipt_entry	entries[0];
+};
+
+static int do_netfilter_replace(int fd, int level, int optname,
+				char __user *optval, int optlen)
+{
+	struct compat_ipt_replace __user *urepl;
+	struct ipt_replace __user *repl_nat;
+	char name[IPT_TABLE_MAXNAMELEN];
+	u32 origsize, tmp32, num_counters;
+	unsigned int repl_nat_size;
+	int ret;
+	int i;
+	compat_uptr_t ucntrs;
+
+	urepl = (struct compat_ipt_replace __user *)optval;
+	if (get_user(origsize, &urepl->size))
+		return -EFAULT;
+
+	/* Hack: Causes ipchains to give correct error msg --RR */
+	if (optlen != sizeof(*urepl) + origsize)
+		return -ENOPROTOOPT;
+
+	/* XXX Assumes that size of ipt_entry is the same both in
+	 *     native and compat environments.
+	 */
+	repl_nat_size = sizeof(*repl_nat) + origsize;
+	repl_nat = compat_alloc_user_space(repl_nat_size);
+
+	ret = -EFAULT;
+	if (put_user(origsize, &repl_nat->size))
+		goto out;
+
+	if (!access_ok(VERIFY_READ, urepl, optlen) ||
+	    !access_ok(VERIFY_WRITE, repl_nat, optlen))
+		goto out;
+
+	if (__copy_from_user(name, urepl->name, sizeof(urepl->name)) ||
+	    __copy_to_user(repl_nat->name, name, sizeof(repl_nat->name)))
+		goto out;
+
+	if (__get_user(tmp32, &urepl->valid_hooks) ||
+	    __put_user(tmp32, &repl_nat->valid_hooks))
+		goto out;
+
+	if (__get_user(tmp32, &urepl->num_entries) ||
+	    __put_user(tmp32, &repl_nat->num_entries))
+		goto out;
+
+	if (__get_user(num_counters, &urepl->num_counters) ||
+	    __put_user(num_counters, &repl_nat->num_counters))
+		goto out;
+
+	if (__get_user(ucntrs, &urepl->counters) ||
+	    __put_user(compat_ptr(ucntrs), &repl_nat->counters))
+		goto out;
+
+	if (__copy_in_user(&repl_nat->entries[0],
+			   &urepl->entries[0],
+			   origsize))
+		goto out;
+
+	for (i = 0; i < NF_IP_NUMHOOKS; i++) {
+		if (__get_user(tmp32, &urepl->hook_entry[i]) ||
+		    __put_user(tmp32, &repl_nat->hook_entry[i]) ||
+		    __get_user(tmp32, &urepl->underflow[i]) ||
+		    __put_user(tmp32, &repl_nat->underflow[i]))
+			goto out;
+	}
+
+	/*
+	 * Since struct ipt_counters just contains two u_int64_t members
+	 * we can just do the access_ok check here and pass the (converted)
+	 * pointer into the standard syscall.  We hope that the pointer is
+	 * not misaligned ...
+	 */
+	if (!access_ok(VERIFY_WRITE, compat_ptr(ucntrs),
+		       num_counters * sizeof(struct ipt_counters)))
+		goto out;
+
+
+	ret = sys_setsockopt(fd, level, optname,
+			     (char __user *)repl_nat, repl_nat_size);
+
+out:
+	return ret;
+}
+
+/*
+ * A struct sock_filter is architecture independent.
+ */
+struct compat_sock_fprog {
+	u16		len;
+	compat_uptr_t	filter;		/* struct sock_filter * */
+};
+
+static int do_set_attach_filter(int fd, int level, int optname,
+				char __user *optval, int optlen)
+{
+	struct compat_sock_fprog __user *fprog32 = (struct compat_sock_fprog __user *)optval;
+	struct sock_fprog __user *kfprog = compat_alloc_user_space(sizeof(struct sock_fprog)); 
+	compat_uptr_t ptr;
+	u16 len;
+
+	if (!access_ok(VERIFY_READ, fprog32, sizeof(*fprog32)) ||
+	    !access_ok(VERIFY_WRITE, kfprog, sizeof(struct sock_fprog)) ||
+	    __get_user(len, &fprog32->len) ||
+	    __get_user(ptr, &fprog32->filter) ||
+	    __put_user(len, &kfprog->len) ||
+	    __put_user(compat_ptr(ptr), &kfprog->filter))
+		return -EFAULT;
+
+	return sys_setsockopt(fd, level, optname, (char __user *)kfprog, 
+			      sizeof(struct sock_fprog));
+}
+
+static int do_set_sock_timeout(int fd, int level, int optname, char __user *optval, int optlen)
+{
+	struct compat_timeval __user *up = (struct compat_timeval __user *) optval;
+	struct timeval ktime;
+	mm_segment_t old_fs;
+	int err;
+
+	if (optlen < sizeof(*up))
+		return -EINVAL;
+	if (!access_ok(VERIFY_READ, up, sizeof(*up)) ||
+	    __get_user(ktime.tv_sec, &up->tv_sec) ||
+	    __get_user(ktime.tv_usec, &up->tv_usec))
+		return -EFAULT;
+	old_fs = get_fs();
+	set_fs(KERNEL_DS);
+	err = sys_setsockopt(fd, level, optname, (char *) &ktime, sizeof(ktime));
+	set_fs(old_fs);
+
+	return err;
+}
+
+asmlinkage long compat_sys_setsockopt(int fd, int level, int optname,
+				char __user *optval, int optlen)
+{
+	/* SO_SET_REPLACE seems to be the same in all levels */
+	if (optname == IPT_SO_SET_REPLACE)
+		return do_netfilter_replace(fd, level, optname,
+					    optval, optlen);
+	if (level == SOL_SOCKET && optname == SO_ATTACH_FILTER)
+		return do_set_attach_filter(fd, level, optname,
+					    optval, optlen);
+	if (level == SOL_SOCKET &&
+	    (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO))
+		return do_set_sock_timeout(fd, level, optname, optval, optlen);
+
+	return sys_setsockopt(fd, level, optname, optval, optlen);
+}
+
+static int do_get_sock_timeout(int fd, int level, int optname,
+		char __user *optval, int __user *optlen)
+{
+	struct compat_timeval __user *up;
+	struct timeval ktime;
+	mm_segment_t old_fs;
+	int len, err;
+
+	up = (struct compat_timeval __user *) optval;
+	if (get_user(len, optlen))
+		return -EFAULT;
+	if (len < sizeof(*up))
+		return -EINVAL;
+	len = sizeof(ktime);
+	old_fs = get_fs();
+	set_fs(KERNEL_DS);
+	err = sys_getsockopt(fd, level, optname, (char *) &ktime, &len);
+	set_fs(old_fs);
+
+	if (!err) {
+		if (put_user(sizeof(*up), optlen) ||
+		    !access_ok(VERIFY_WRITE, up, sizeof(*up)) ||
+		    __put_user(ktime.tv_sec, &up->tv_sec) ||
+		    __put_user(ktime.tv_usec, &up->tv_usec))
+			err = -EFAULT;
+	}
+	return err;
+}
+
+asmlinkage long compat_sys_getsockopt(int fd, int level, int optname,
+				char __user *optval, int __user *optlen)
+{
+	if (level == SOL_SOCKET &&
+	    (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO))
+		return do_get_sock_timeout(fd, level, optname, optval, optlen);
+	return sys_getsockopt(fd, level, optname, optval, optlen);
+}
+
+/* Argument list sizes for compat_sys_socketcall */
+#define AL(x) ((x) * sizeof(u32))
+static unsigned char nas[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
+				AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
+				AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)};
+#undef AL
+
+asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags)
+{
+	return sys_sendmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
+}
+
+asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags)
+{
+	return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
+}
+
+asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
+{
+	int ret;
+	u32 a[6];
+	u32 a0, a1;
+				 
+	if (call < SYS_SOCKET || call > SYS_RECVMSG)
+		return -EINVAL;
+	if (copy_from_user(a, args, nas[call]))
+		return -EFAULT;
+	a0 = a[0];
+	a1 = a[1];
+	
+	switch(call) {
+	case SYS_SOCKET:
+		ret = sys_socket(a0, a1, a[2]);
+		break;
+	case SYS_BIND:
+		ret = sys_bind(a0, compat_ptr(a1), a[2]);
+		break;
+	case SYS_CONNECT:
+		ret = sys_connect(a0, compat_ptr(a1), a[2]);
+		break;
+	case SYS_LISTEN:
+		ret = sys_listen(a0, a1);
+		break;
+	case SYS_ACCEPT:
+		ret = sys_accept(a0, compat_ptr(a1), compat_ptr(a[2]));
+		break;
+	case SYS_GETSOCKNAME:
+		ret = sys_getsockname(a0, compat_ptr(a1), compat_ptr(a[2]));
+		break;
+	case SYS_GETPEERNAME:
+		ret = sys_getpeername(a0, compat_ptr(a1), compat_ptr(a[2]));
+		break;
+	case SYS_SOCKETPAIR:
+		ret = sys_socketpair(a0, a1, a[2], compat_ptr(a[3]));
+		break;
+	case SYS_SEND:
+		ret = sys_send(a0, compat_ptr(a1), a[2], a[3]);
+		break;
+	case SYS_SENDTO:
+		ret = sys_sendto(a0, compat_ptr(a1), a[2], a[3], compat_ptr(a[4]), a[5]);
+		break;
+	case SYS_RECV:
+		ret = sys_recv(a0, compat_ptr(a1), a[2], a[3]);
+		break;
+	case SYS_RECVFROM:
+		ret = sys_recvfrom(a0, compat_ptr(a1), a[2], a[3], compat_ptr(a[4]), compat_ptr(a[5]));
+		break;
+	case SYS_SHUTDOWN:
+		ret = sys_shutdown(a0,a1);
+		break;
+	case SYS_SETSOCKOPT:
+		ret = compat_sys_setsockopt(a0, a1, a[2],
+				compat_ptr(a[3]), a[4]);
+		break;
+	case SYS_GETSOCKOPT:
+		ret = compat_sys_getsockopt(a0, a1, a[2],
+				compat_ptr(a[3]), compat_ptr(a[4]));
+		break;
+	case SYS_SENDMSG:
+		ret = compat_sys_sendmsg(a0, compat_ptr(a1), a[2]);
+		break;
+	case SYS_RECVMSG:
+		ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	return ret;
+}
diff --git a/net/core/Makefile b/net/core/Makefile
new file mode 100644
index 00000000000..81f03243fe2
--- /dev/null
+++ b/net/core/Makefile
@@ -0,0 +1,17 @@
+#
+# Makefile for the Linux networking core.
+#
+
+obj-y := sock.o skbuff.o iovec.o datagram.o stream.o scm.o gen_stats.o gen_estimator.o
+
+obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
+
+obj-y		     += flow.o dev.o ethtool.o dev_mcast.o dst.o \
+			neighbour.o rtnetlink.o utils.o link_watch.o filter.o
+
+obj-$(CONFIG_SYSFS) += net-sysfs.o
+obj-$(CONFIG_NETFILTER) += netfilter.o
+obj-$(CONFIG_NET_DIVERT) += dv.o
+obj-$(CONFIG_NET_PKTGEN) += pktgen.o
+obj-$(CONFIG_NET_RADIO) += wireless.o
+obj-$(CONFIG_NETPOLL) += netpoll.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
new file mode 100644
index 00000000000..d1bfd279cc1
--- /dev/null
+++ b/net/core/datagram.c
@@ -0,0 +1,482 @@
+/*
+ *	SUCS NET3:
+ *
+ *	Generic datagram handling routines. These are generic for all
+ *	protocols. Possibly a generic IP version on top of these would
+ *	make sense. Not tonight however 8-).
+ *	This is used because UDP, RAW, PACKET, DDP, IPX, AX.25 and
+ *	NetROM layer all have identical poll code and mostly
+ *	identical recvmsg() code. So we share it here. The poll was
+ *	shared before but buried in udp.c so I moved it.
+ *
+ *	Authors:	Alan Cox <alan@redhat.com>. (datagram_poll() from old
+ *						     udp.c code)
+ *
+ *	Fixes:
+ *		Alan Cox	:	NULL return from skb_peek_copy()
+ *					understood
+ *		Alan Cox	:	Rewrote skb_read_datagram to avoid the
+ *					skb_peek_copy stuff.
+ *		Alan Cox	:	Added support for SOCK_SEQPACKET.
+ *					IPX can no longer use the SO_TYPE hack
+ *					but AX.25 now works right, and SPX is
+ *					feasible.
+ *		Alan Cox	:	Fixed write poll of non IP protocol
+ *					crash.
+ *		Florian  La Roche:	Changed for my new skbuff handling.
+ *		Darryl Miles	:	Fixed non-blocking SOCK_SEQPACKET.
+ *		Linus Torvalds	:	BSD semantic fixes.
+ *		Alan Cox	:	Datagram iovec handling
+ *		Darryl Miles	:	Fixed non-blocking SOCK_STREAM.
+ *		Alan Cox	:	POSIXisms
+ *		Pete Wyckoff    :       Unconnected accept() fix.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/inet.h>
+#include <linux/tcp.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/poll.h>
+#include <linux/highmem.h>
+
+#include <net/protocol.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/checksum.h>
+
+
+/*
+ *	Is a socket 'connection oriented' ?
+ */
+static inline int connection_based(struct sock *sk)
+{
+	return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
+}
+
+/*
+ * Wait for a packet..
+ */
+static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
+{
+	int error;
+	DEFINE_WAIT(wait);
+
+	prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+
+	/* Socket errors? */
+	error = sock_error(sk);
+	if (error)
+		goto out_err;
+
+	if (!skb_queue_empty(&sk->sk_receive_queue))
+		goto out;
+
+	/* Socket shut down? */
+	if (sk->sk_shutdown & RCV_SHUTDOWN)
+		goto out_noerr;
+
+	/* Sequenced packets can come disconnected.
+	 * If so we report the problem
+	 */
+	error = -ENOTCONN;
+	if (connection_based(sk) &&
+	    !(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN))
+		goto out_err;
+
+	/* handle signals */
+	if (signal_pending(current))
+		goto interrupted;
+
+	error = 0;
+	*timeo_p = schedule_timeout(*timeo_p);
+out:
+	finish_wait(sk->sk_sleep, &wait);
+	return error;
+interrupted:
+	error = sock_intr_errno(*timeo_p);
+out_err:
+	*err = error;
+	goto out;
+out_noerr:
+	*err = 0;
+	error = 1;
+	goto out;
+}
+
+/**
+ *	skb_recv_datagram - Receive a datagram skbuff
+ *	@sk - socket
+ *	@flags - MSG_ flags
+ *	@noblock - blocking operation?
+ *	@err - error code returned
+ *
+ *	Get a datagram skbuff, understands the peeking, nonblocking wakeups
+ *	and possible races. This replaces identical code in packet, raw and
+ *	udp, as well as the IPX AX.25 and Appletalk. It also finally fixes
+ *	the long standing peek and read race for datagram sockets. If you
+ *	alter this routine remember it must be re-entrant.
+ *
+ *	This function will lock the socket if a skb is returned, so the caller
+ *	needs to unlock the socket in that case (usually by calling
+ *	skb_free_datagram)
+ *
+ *	* It does not lock socket since today. This function is
+ *	* free of race conditions. This measure should/can improve
+ *	* significantly datagram socket latencies at high loads,
+ *	* when data copying to user space takes lots of time.
+ *	* (BTW I've just killed the last cli() in IP/IPv6/core/netlink/packet
+ *	*  8) Great win.)
+ *	*			                    --ANK (980729)
+ *
+ *	The order of the tests when we find no data waiting are specified
+ *	quite explicitly by POSIX 1003.1g, don't change them without having
+ *	the standard around please.
+ */
+struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
+				  int noblock, int *err)
+{
+	struct sk_buff *skb;
+	long timeo;
+	/*
+	 * Caller is allowed not to check sk->sk_err before skb_recv_datagram()
+	 */
+	int error = sock_error(sk);
+
+	if (error)
+		goto no_packet;
+
+	timeo = sock_rcvtimeo(sk, noblock);
+
+	do {
+		/* Again only user level code calls this function, so nothing
+		 * interrupt level will suddenly eat the receive_queue.
+		 *
+		 * Look at current nfs client by the way...
+		 * However, this function was corrent in any case. 8)
+		 */
+		if (flags & MSG_PEEK) {
+			unsigned long cpu_flags;
+
+			spin_lock_irqsave(&sk->sk_receive_queue.lock,
+					  cpu_flags);
+			skb = skb_peek(&sk->sk_receive_queue);
+			if (skb)
+				atomic_inc(&skb->users);
+			spin_unlock_irqrestore(&sk->sk_receive_queue.lock,
+					       cpu_flags);
+		} else
+			skb = skb_dequeue(&sk->sk_receive_queue);
+
+		if (skb)
+			return skb;
+
+		/* User doesn't want to wait */
+		error = -EAGAIN;
+		if (!timeo)
+			goto no_packet;
+
+	} while (!wait_for_packet(sk, err, &timeo));
+
+	return NULL;
+
+no_packet:
+	*err = error;
+	return NULL;
+}
+
+void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
+{
+	kfree_skb(skb);
+}
+
+/**
+ *	skb_copy_datagram_iovec - Copy a datagram to an iovec.
+ *	@skb - buffer to copy
+ *	@offset - offset in the buffer to start copying from
+ *	@iovec - io vector to copy to
+ *	@len - amount of data to copy from buffer to iovec
+ *
+ *	Note: the iovec is modified during the copy.
+ */
+int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
+			    struct iovec *to, int len)
+{
+	int start = skb_headlen(skb);
+	int i, copy = start - offset;
+
+	/* Copy header. */
+	if (copy > 0) {
+		if (copy > len)
+			copy = len;
+		if (memcpy_toiovec(to, skb->data + offset, copy))
+			goto fault;
+		if ((len -= copy) == 0)
+			return 0;
+		offset += copy;
+	}
+
+	/* Copy paged appendix. Hmm... why does this look so complicated? */
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+
+		BUG_TRAP(start <= offset + len);
+
+		end = start + skb_shinfo(skb)->frags[i].size;
+		if ((copy = end - offset) > 0) {
+			int err;
+			u8  *vaddr;
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+			struct page *page = frag->page;
+
+			if (copy > len)
+				copy = len;
+			vaddr = kmap(page);
+			err = memcpy_toiovec(to, vaddr + frag->page_offset +
+					     offset - start, copy);
+			kunmap(page);
+			if (err)
+				goto fault;
+			if (!(len -= copy))
+				return 0;
+			offset += copy;
+		}
+		start = end;
+	}
+
+	if (skb_shinfo(skb)->frag_list) {
+		struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+		for (; list; list = list->next) {
+			int end;
+
+			BUG_TRAP(start <= offset + len);
+
+			end = start + list->len;
+			if ((copy = end - offset) > 0) {
+				if (copy > len)
+					copy = len;
+				if (skb_copy_datagram_iovec(list,
+							    offset - start,
+							    to, copy))
+					goto fault;
+				if ((len -= copy) == 0)
+					return 0;
+				offset += copy;
+			}
+			start = end;
+		}
+	}
+	if (!len)
+		return 0;
+
+fault:
+	return -EFAULT;
+}
+
+static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
+				      u8 __user *to, int len,
+				      unsigned int *csump)
+{
+	int start = skb_headlen(skb);
+	int pos = 0;
+	int i, copy = start - offset;
+
+	/* Copy header. */
+	if (copy > 0) {
+		int err = 0;
+		if (copy > len)
+			copy = len;
+		*csump = csum_and_copy_to_user(skb->data + offset, to, copy,
+					       *csump, &err);
+		if (err)
+			goto fault;
+		if ((len -= copy) == 0)
+			return 0;
+		offset += copy;
+		to += copy;
+		pos = copy;
+	}
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+
+		BUG_TRAP(start <= offset + len);
+
+		end = start + skb_shinfo(skb)->frags[i].size;
+		if ((copy = end - offset) > 0) {
+			unsigned int csum2;
+			int err = 0;
+			u8  *vaddr;
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+			struct page *page = frag->page;
+
+			if (copy > len)
+				copy = len;
+			vaddr = kmap(page);
+			csum2 = csum_and_copy_to_user(vaddr +
+							frag->page_offset +
+							offset - start,
+						      to, copy, 0, &err);
+			kunmap(page);
+			if (err)
+				goto fault;
+			*csump = csum_block_add(*csump, csum2, pos);
+			if (!(len -= copy))
+				return 0;
+			offset += copy;
+			to += copy;
+			pos += copy;
+		}
+		start = end;
+	}
+
+	if (skb_shinfo(skb)->frag_list) {
+		struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+		for (; list; list=list->next) {
+			int end;
+
+			BUG_TRAP(start <= offset + len);
+
+			end = start + list->len;
+			if ((copy = end - offset) > 0) {
+				unsigned int csum2 = 0;
+				if (copy > len)
+					copy = len;
+				if (skb_copy_and_csum_datagram(list,
+							       offset - start,
+							       to, copy,
+							       &csum2))
+					goto fault;
+				*csump = csum_block_add(*csump, csum2, pos);
+				if ((len -= copy) == 0)
+					return 0;
+				offset += copy;
+				to += copy;
+				pos += copy;
+			}
+			start = end;
+		}
+	}
+	if (!len)
+		return 0;
+
+fault:
+	return -EFAULT;
+}
+
+/**
+ *	skb_copy_and_csum_datagram_iovec - Copy and checkum skb to user iovec.
+ *	@skb - skbuff
+ *	@hlen - hardware length
+ *	@iovec - io vector
+ * 
+ *	Caller _must_ check that skb will fit to this iovec.
+ *
+ *	Returns: 0       - success.
+ *		 -EINVAL - checksum failure.
+ *		 -EFAULT - fault during copy. Beware, in this case iovec
+ *			   can be modified!
+ */
+int skb_copy_and_csum_datagram_iovec(const struct sk_buff *skb,
+				     int hlen, struct iovec *iov)
+{
+	unsigned int csum;
+	int chunk = skb->len - hlen;
+
+	/* Skip filled elements.
+	 * Pretty silly, look at memcpy_toiovec, though 8)
+	 */
+	while (!iov->iov_len)
+		iov++;
+
+	if (iov->iov_len < chunk) {
+		if ((unsigned short)csum_fold(skb_checksum(skb, 0, chunk + hlen,
+							   skb->csum)))
+			goto csum_error;
+		if (skb_copy_datagram_iovec(skb, hlen, iov, chunk))
+			goto fault;
+	} else {
+		csum = csum_partial(skb->data, hlen, skb->csum);
+		if (skb_copy_and_csum_datagram(skb, hlen, iov->iov_base,
+					       chunk, &csum))
+			goto fault;
+		if ((unsigned short)csum_fold(csum))
+			goto csum_error;
+		iov->iov_len -= chunk;
+		iov->iov_base += chunk;
+	}
+	return 0;
+csum_error:
+	return -EINVAL;
+fault:
+	return -EFAULT;
+}
+
+/**
+ * 	datagram_poll - generic datagram poll
+ *	@file - file struct
+ *	@sock - socket
+ *	@wait - poll table
+ *
+ *	Datagram poll: Again totally generic. This also handles
+ *	sequenced packet sockets providing the socket receive queue
+ *	is only ever holding data ready to receive.
+ *
+ *	Note: when you _don't_ use this routine for this protocol,
+ *	and you use a different write policy from sock_writeable()
+ *	then please supply your own write_space callback.
+ */
+unsigned int datagram_poll(struct file *file, struct socket *sock,
+			   poll_table *wait)
+{
+	struct sock *sk = sock->sk;
+	unsigned int mask;
+
+	poll_wait(file, sk->sk_sleep, wait);
+	mask = 0;
+
+	/* exceptional events? */
+	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+		mask |= POLLERR;
+	if (sk->sk_shutdown == SHUTDOWN_MASK)
+		mask |= POLLHUP;
+
+	/* readable? */
+	if (!skb_queue_empty(&sk->sk_receive_queue) ||
+	    (sk->sk_shutdown & RCV_SHUTDOWN))
+		mask |= POLLIN | POLLRDNORM;
+
+	/* Connection-based need to check for termination and startup */
+	if (connection_based(sk)) {
+		if (sk->sk_state == TCP_CLOSE)
+			mask |= POLLHUP;
+		/* connection hasn't started yet? */
+		if (sk->sk_state == TCP_SYN_SENT)
+			return mask;
+	}
+
+	/* writable? */
+	if (sock_writeable(sk))
+		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+	else
+		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+
+	return mask;
+}
+
+EXPORT_SYMBOL(datagram_poll);
+EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec);
+EXPORT_SYMBOL(skb_copy_datagram_iovec);
+EXPORT_SYMBOL(skb_free_datagram);
+EXPORT_SYMBOL(skb_recv_datagram);
diff --git a/net/core/dev.c b/net/core/dev.c
new file mode 100644
index 00000000000..42344d90369
--- /dev/null
+++ b/net/core/dev.c
@@ -0,0 +1,3359 @@
+/*
+ * 	NET3	Protocol independent device support routines.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *	Derived from the non IP parts of dev.c 1.0.19
+ * 		Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *				Mark Evans, <evansmp@uhura.aston.ac.uk>
+ *
+ *	Additional Authors:
+ *		Florian la Roche <rzsfl@rz.uni-sb.de>
+ *		Alan Cox <gw4pts@gw4pts.ampr.org>
+ *		David Hinds <dahinds@users.sourceforge.net>
+ *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+ *		Adam Sulmicki <adam@cfar.umd.edu>
+ *              Pekka Riikonen <priikone@poesidon.pspt.fi>
+ *
+ *	Changes:
+ *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
+ *              			to 2 if register_netdev gets called
+ *              			before net_dev_init & also removed a
+ *              			few lines of code in the process.
+ *		Alan Cox	:	device private ioctl copies fields back.
+ *		Alan Cox	:	Transmit queue code does relevant
+ *					stunts to keep the queue safe.
+ *		Alan Cox	:	Fixed double lock.
+ *		Alan Cox	:	Fixed promisc NULL pointer trap
+ *		????????	:	Support the full private ioctl range
+ *		Alan Cox	:	Moved ioctl permission check into
+ *					drivers
+ *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
+ *		Alan Cox	:	100 backlog just doesn't cut it when
+ *					you start doing multicast video 8)
+ *		Alan Cox	:	Rewrote net_bh and list manager.
+ *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
+ *		Alan Cox	:	Took out transmit every packet pass
+ *					Saved a few bytes in the ioctl handler
+ *		Alan Cox	:	Network driver sets packet type before
+ *					calling netif_rx. Saves a function
+ *					call a packet.
+ *		Alan Cox	:	Hashed net_bh()
+ *		Richard Kooijman:	Timestamp fixes.
+ *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
+ *		Alan Cox	:	Device lock protection.
+ *		Alan Cox	: 	Fixed nasty side effect of device close
+ *					changes.
+ *		Rudi Cilibrasi	:	Pass the right thing to
+ *					set_mac_address()
+ *		Dave Miller	:	32bit quantity for the device lock to
+ *					make it work out on a Sparc.
+ *		Bjorn Ekwall	:	Added KERNELD hack.
+ *		Alan Cox	:	Cleaned up the backlog initialise.
+ *		Craig Metz	:	SIOCGIFCONF fix if space for under
+ *					1 device.
+ *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
+ *					is no device open function.
+ *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
+ *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
+ *		Cyrus Durgin	:	Cleaned for KMOD
+ *		Adam Sulmicki   :	Bug Fix : Network Device Unload
+ *					A network device unload needs to purge
+ *					the backlog queue.
+ *	Paul Rusty Russell	:	SIOCSIFNAME
+ *              Pekka Riikonen  :	Netdev boot-time settings code
+ *              Andrew Morton   :       Make unregister_netdevice wait
+ *              			indefinitely on dev->refcnt
+ * 		J Hadi Salim	:	- Backlog queue sampling
+ *				        - netif_rx() feedback
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/config.h>
+#include <linux/cpu.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <linux/rtnetlink.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/stat.h>
+#include <linux/if_bridge.h>
+#include <linux/divert.h>
+#include <net/dst.h>
+#include <net/pkt_sched.h>
+#include <net/checksum.h>
+#include <linux/highmem.h>
+#include <linux/init.h>
+#include <linux/kmod.h>
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <linux/netpoll.h>
+#include <linux/rcupdate.h>
+#include <linux/delay.h>
+#ifdef CONFIG_NET_RADIO
+#include <linux/wireless.h>		/* Note : will define WIRELESS_EXT */
+#include <net/iw_handler.h>
+#endif	/* CONFIG_NET_RADIO */
+#include <asm/current.h>
+
+/* This define, if set, will randomly drop a packet when congestion
+ * is more than moderate.  It helps fairness in the multi-interface
+ * case when one of them is a hog, but it kills performance for the
+ * single interface case so it is off now by default.
+ */
+#undef RAND_LIE
+
+/* Setting this will sample the queue lengths and thus congestion
+ * via a timer instead of as each packet is received.
+ */
+#undef OFFLINE_SAMPLE
+
+/*
+ *	The list of packet types we will receive (as opposed to discard)
+ *	and the routines to invoke.
+ *
+ *	Why 16. Because with 16 the only overlap we get on a hash of the
+ *	low nibble of the protocol value is RARP/SNAP/X.25.
+ *
+ *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
+ *             sure which should go first, but I bet it won't make much
+ *             difference if we are running VLANs.  The good news is that
+ *             this protocol won't be in the list unless compiled in, so
+ *             the average user (w/out VLANs) will not be adversly affected.
+ *             --BLG
+ *
+ *		0800	IP
+ *		8100    802.1Q VLAN
+ *		0001	802.3
+ *		0002	AX.25
+ *		0004	802.2
+ *		8035	RARP
+ *		0005	SNAP
+ *		0805	X.25
+ *		0806	ARP
+ *		8137	IPX
+ *		0009	Localtalk
+ *		86DD	IPv6
+ */
+
+static DEFINE_SPINLOCK(ptype_lock);
+static struct list_head ptype_base[16];	/* 16 way hashed list */
+static struct list_head ptype_all;		/* Taps */
+
+#ifdef OFFLINE_SAMPLE
+static void sample_queue(unsigned long dummy);
+static struct timer_list samp_timer = TIMER_INITIALIZER(sample_queue, 0, 0);
+#endif
+
+/*
+ * The @dev_base list is protected by @dev_base_lock and the rtln
+ * semaphore.
+ *
+ * Pure readers hold dev_base_lock for reading.
+ *
+ * Writers must hold the rtnl semaphore while they loop through the
+ * dev_base list, and hold dev_base_lock for writing when they do the
+ * actual updates.  This allows pure readers to access the list even
+ * while a writer is preparing to update it.
+ *
+ * To put it another way, dev_base_lock is held for writing only to
+ * protect against pure readers; the rtnl semaphore provides the
+ * protection against other writers.
+ *
+ * See, for example usages, register_netdevice() and
+ * unregister_netdevice(), which must be called with the rtnl
+ * semaphore held.
+ */
+struct net_device *dev_base;
+static struct net_device **dev_tail = &dev_base;
+DEFINE_RWLOCK(dev_base_lock);
+
+EXPORT_SYMBOL(dev_base);
+EXPORT_SYMBOL(dev_base_lock);
+
+#define NETDEV_HASHBITS	8
+static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
+static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
+
+static inline struct hlist_head *dev_name_hash(const char *name)
+{
+	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
+	return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
+}
+
+static inline struct hlist_head *dev_index_hash(int ifindex)
+{
+	return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
+}
+
+/*
+ *	Our notifier list
+ */
+
+static struct notifier_block *netdev_chain;
+
+/*
+ *	Device drivers call our routines to queue packets here. We empty the
+ *	queue in the local softnet handler.
+ */
+DEFINE_PER_CPU(struct softnet_data, softnet_data) = { 0, };
+
+#ifdef CONFIG_SYSFS
+extern int netdev_sysfs_init(void);
+extern int netdev_register_sysfs(struct net_device *);
+extern void netdev_unregister_sysfs(struct net_device *);
+#else
+#define netdev_sysfs_init()	 	(0)
+#define netdev_register_sysfs(dev)	(0)
+#define	netdev_unregister_sysfs(dev)	do { } while(0)
+#endif
+
+
+/*******************************************************************************
+
+		Protocol management and registration routines
+
+*******************************************************************************/
+
+/*
+ *	For efficiency
+ */
+
+int netdev_nit;
+
+/*
+ *	Add a protocol ID to the list. Now that the input handler is
+ *	smarter we can dispense with all the messy stuff that used to be
+ *	here.
+ *
+ *	BEWARE!!! Protocol handlers, mangling input packets,
+ *	MUST BE last in hash buckets and checking protocol handlers
+ *	MUST start from promiscuous ptype_all chain in net_bh.
+ *	It is true now, do not change it.
+ *	Explanation follows: if protocol handler, mangling packet, will
+ *	be the first on list, it is not able to sense, that packet
+ *	is cloned and should be copied-on-write, so that it will
+ *	change it and subsequent readers will get broken packet.
+ *							--ANK (980803)
+ */
+
+/**
+ *	dev_add_pack - add packet handler
+ *	@pt: packet type declaration
+ *
+ *	Add a protocol handler to the networking stack. The passed &packet_type
+ *	is linked into kernel lists and may not be freed until it has been
+ *	removed from the kernel lists.
+ *
+ *	This call does not sleep therefore it can not 
+ *	guarantee all CPU's that are in middle of receiving packets
+ *	will see the new packet type (until the next received packet).
+ */
+
+void dev_add_pack(struct packet_type *pt)
+{
+	int hash;
+
+	spin_lock_bh(&ptype_lock);
+	if (pt->type == htons(ETH_P_ALL)) {
+		netdev_nit++;
+		list_add_rcu(&pt->list, &ptype_all);
+	} else {
+		hash = ntohs(pt->type) & 15;
+		list_add_rcu(&pt->list, &ptype_base[hash]);
+	}
+	spin_unlock_bh(&ptype_lock);
+}
+
+extern void linkwatch_run_queue(void);
+
+
+
+/**
+ *	__dev_remove_pack	 - remove packet handler
+ *	@pt: packet type declaration
+ *
+ *	Remove a protocol handler that was previously added to the kernel
+ *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
+ *	from the kernel lists and can be freed or reused once this function
+ *	returns. 
+ *
+ *      The packet type might still be in use by receivers
+ *	and must not be freed until after all the CPU's have gone
+ *	through a quiescent state.
+ */
+void __dev_remove_pack(struct packet_type *pt)
+{
+	struct list_head *head;
+	struct packet_type *pt1;
+
+	spin_lock_bh(&ptype_lock);
+
+	if (pt->type == htons(ETH_P_ALL)) {
+		netdev_nit--;
+		head = &ptype_all;
+	} else
+		head = &ptype_base[ntohs(pt->type) & 15];
+
+	list_for_each_entry(pt1, head, list) {
+		if (pt == pt1) {
+			list_del_rcu(&pt->list);
+			goto out;
+		}
+	}
+
+	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
+out:
+	spin_unlock_bh(&ptype_lock);
+}
+/**
+ *	dev_remove_pack	 - remove packet handler
+ *	@pt: packet type declaration
+ *
+ *	Remove a protocol handler that was previously added to the kernel
+ *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
+ *	from the kernel lists and can be freed or reused once this function
+ *	returns.
+ *
+ *	This call sleeps to guarantee that no CPU is looking at the packet
+ *	type after return.
+ */
+void dev_remove_pack(struct packet_type *pt)
+{
+	__dev_remove_pack(pt);
+	
+	synchronize_net();
+}
+
+/******************************************************************************
+
+		      Device Boot-time Settings Routines
+
+*******************************************************************************/
+
+/* Boot time configuration table */
+static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
+
+/**
+ *	netdev_boot_setup_add	- add new setup entry
+ *	@name: name of the device
+ *	@map: configured settings for the device
+ *
+ *	Adds new setup entry to the dev_boot_setup list.  The function
+ *	returns 0 on error and 1 on success.  This is a generic routine to
+ *	all netdevices.
+ */
+static int netdev_boot_setup_add(char *name, struct ifmap *map)
+{
+	struct netdev_boot_setup *s;
+	int i;
+
+	s = dev_boot_setup;
+	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
+		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
+			memset(s[i].name, 0, sizeof(s[i].name));
+			strcpy(s[i].name, name);
+			memcpy(&s[i].map, map, sizeof(s[i].map));
+			break;
+		}
+	}
+
+	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
+}
+
+/**
+ *	netdev_boot_setup_check	- check boot time settings
+ *	@dev: the netdevice
+ *
+ * 	Check boot time settings for the device.
+ *	The found settings are set for the device to be used
+ *	later in the device probing.
+ *	Returns 0 if no settings found, 1 if they are.
+ */
+int netdev_boot_setup_check(struct net_device *dev)
+{
+	struct netdev_boot_setup *s = dev_boot_setup;
+	int i;
+
+	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
+		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
+		    !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
+			dev->irq 	= s[i].map.irq;
+			dev->base_addr 	= s[i].map.base_addr;
+			dev->mem_start 	= s[i].map.mem_start;
+			dev->mem_end 	= s[i].map.mem_end;
+			return 1;
+		}
+	}
+	return 0;
+}
+
+
+/**
+ *	netdev_boot_base	- get address from boot time settings
+ *	@prefix: prefix for network device
+ *	@unit: id for network device
+ *
+ * 	Check boot time settings for the base address of device.
+ *	The found settings are set for the device to be used
+ *	later in the device probing.
+ *	Returns 0 if no settings found.
+ */
+unsigned long netdev_boot_base(const char *prefix, int unit)
+{
+	const struct netdev_boot_setup *s = dev_boot_setup;
+	char name[IFNAMSIZ];
+	int i;
+
+	sprintf(name, "%s%d", prefix, unit);
+
+	/*
+	 * If device already registered then return base of 1
+	 * to indicate not to probe for this interface
+	 */
+	if (__dev_get_by_name(name))
+		return 1;
+
+	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
+		if (!strcmp(name, s[i].name))
+			return s[i].map.base_addr;
+	return 0;
+}
+
+/*
+ * Saves at boot time configured settings for any netdevice.
+ */
+int __init netdev_boot_setup(char *str)
+{
+	int ints[5];
+	struct ifmap map;
+
+	str = get_options(str, ARRAY_SIZE(ints), ints);
+	if (!str || !*str)
+		return 0;
+
+	/* Save settings */
+	memset(&map, 0, sizeof(map));
+	if (ints[0] > 0)
+		map.irq = ints[1];
+	if (ints[0] > 1)
+		map.base_addr = ints[2];
+	if (ints[0] > 2)
+		map.mem_start = ints[3];
+	if (ints[0] > 3)
+		map.mem_end = ints[4];
+
+	/* Add new entry to the list */
+	return netdev_boot_setup_add(str, &map);
+}
+
+__setup("netdev=", netdev_boot_setup);
+
+/*******************************************************************************
+
+			    Device Interface Subroutines
+
+*******************************************************************************/
+
+/**
+ *	__dev_get_by_name	- find a device by its name
+ *	@name: name to find
+ *
+ *	Find an interface by name. Must be called under RTNL semaphore
+ *	or @dev_base_lock. If the name is found a pointer to the device
+ *	is returned. If the name is not found then %NULL is returned. The
+ *	reference counters are not incremented so the caller must be
+ *	careful with locks.
+ */
+
+struct net_device *__dev_get_by_name(const char *name)
+{
+	struct hlist_node *p;
+
+	hlist_for_each(p, dev_name_hash(name)) {
+		struct net_device *dev
+			= hlist_entry(p, struct net_device, name_hlist);
+		if (!strncmp(dev->name, name, IFNAMSIZ))
+			return dev;
+	}
+	return NULL;
+}
+
+/**
+ *	dev_get_by_name		- find a device by its name
+ *	@name: name to find
+ *
+ *	Find an interface by name. This can be called from any
+ *	context and does its own locking. The returned handle has
+ *	the usage count incremented and the caller must use dev_put() to
+ *	release it when it is no longer needed. %NULL is returned if no
+ *	matching device is found.
+ */
+
+struct net_device *dev_get_by_name(const char *name)
+{
+	struct net_device *dev;
+
+	read_lock(&dev_base_lock);
+	dev = __dev_get_by_name(name);
+	if (dev)
+		dev_hold(dev);
+	read_unlock(&dev_base_lock);
+	return dev;
+}
+
+/**
+ *	__dev_get_by_index - find a device by its ifindex
+ *	@ifindex: index of device
+ *
+ *	Search for an interface by index. Returns %NULL if the device
+ *	is not found or a pointer to the device. The device has not
+ *	had its reference counter increased so the caller must be careful
+ *	about locking. The caller must hold either the RTNL semaphore
+ *	or @dev_base_lock.
+ */
+
+struct net_device *__dev_get_by_index(int ifindex)
+{
+	struct hlist_node *p;
+
+	hlist_for_each(p, dev_index_hash(ifindex)) {
+		struct net_device *dev
+			= hlist_entry(p, struct net_device, index_hlist);
+		if (dev->ifindex == ifindex)
+			return dev;
+	}
+	return NULL;
+}
+
+
+/**
+ *	dev_get_by_index - find a device by its ifindex
+ *	@ifindex: index of device
+ *
+ *	Search for an interface by index. Returns NULL if the device
+ *	is not found or a pointer to the device. The device returned has
+ *	had a reference added and the pointer is safe until the user calls
+ *	dev_put to indicate they have finished with it.
+ */
+
+struct net_device *dev_get_by_index(int ifindex)
+{
+	struct net_device *dev;
+
+	read_lock(&dev_base_lock);
+	dev = __dev_get_by_index(ifindex);
+	if (dev)
+		dev_hold(dev);
+	read_unlock(&dev_base_lock);
+	return dev;
+}
+
+/**
+ *	dev_getbyhwaddr - find a device by its hardware address
+ *	@type: media type of device
+ *	@ha: hardware address
+ *
+ *	Search for an interface by MAC address. Returns NULL if the device
+ *	is not found or a pointer to the device. The caller must hold the
+ *	rtnl semaphore. The returned device has not had its ref count increased
+ *	and the caller must therefore be careful about locking
+ *
+ *	BUGS:
+ *	If the API was consistent this would be __dev_get_by_hwaddr
+ */
+
+struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
+{
+	struct net_device *dev;
+
+	ASSERT_RTNL();
+
+	for (dev = dev_base; dev; dev = dev->next)
+		if (dev->type == type &&
+		    !memcmp(dev->dev_addr, ha, dev->addr_len))
+			break;
+	return dev;
+}
+
+struct net_device *dev_getfirstbyhwtype(unsigned short type)
+{
+	struct net_device *dev;
+
+	rtnl_lock();
+	for (dev = dev_base; dev; dev = dev->next) {
+		if (dev->type == type) {
+			dev_hold(dev);
+			break;
+		}
+	}
+	rtnl_unlock();
+	return dev;
+}
+
+EXPORT_SYMBOL(dev_getfirstbyhwtype);
+
+/**
+ *	dev_get_by_flags - find any device with given flags
+ *	@if_flags: IFF_* values
+ *	@mask: bitmask of bits in if_flags to check
+ *
+ *	Search for any interface with the given flags. Returns NULL if a device
+ *	is not found or a pointer to the device. The device returned has 
+ *	had a reference added and the pointer is safe until the user calls
+ *	dev_put to indicate they have finished with it.
+ */
+
+struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
+{
+	struct net_device *dev;
+
+	read_lock(&dev_base_lock);
+	for (dev = dev_base; dev != NULL; dev = dev->next) {
+		if (((dev->flags ^ if_flags) & mask) == 0) {
+			dev_hold(dev);
+			break;
+		}
+	}
+	read_unlock(&dev_base_lock);
+	return dev;
+}
+
+/**
+ *	dev_valid_name - check if name is okay for network device
+ *	@name: name string
+ *
+ *	Network device names need to be valid file names to
+ *	to allow sysfs to work
+ */
+static int dev_valid_name(const char *name)
+{
+	return !(*name == '\0' 
+		 || !strcmp(name, ".")
+		 || !strcmp(name, "..")
+		 || strchr(name, '/'));
+}
+
+/**
+ *	dev_alloc_name - allocate a name for a device
+ *	@dev: device
+ *	@name: name format string
+ *
+ *	Passed a format string - eg "lt%d" it will try and find a suitable
+ *	id. Not efficient for many devices, not called a lot. The caller
+ *	must hold the dev_base or rtnl lock while allocating the name and
+ *	adding the device in order to avoid duplicates. Returns the number
+ *	of the unit assigned or a negative errno code.
+ */
+
+int dev_alloc_name(struct net_device *dev, const char *name)
+{
+	int i = 0;
+	char buf[IFNAMSIZ];
+	const char *p;
+	const int max_netdevices = 8*PAGE_SIZE;
+	long *inuse;
+	struct net_device *d;
+
+	p = strnchr(name, IFNAMSIZ-1, '%');
+	if (p) {
+		/*
+		 * Verify the string as this thing may have come from
+		 * the user.  There must be either one "%d" and no other "%"
+		 * characters.
+		 */
+		if (p[1] != 'd' || strchr(p + 2, '%'))
+			return -EINVAL;
+
+		/* Use one page as a bit array of possible slots */
+		inuse = (long *) get_zeroed_page(GFP_ATOMIC);
+		if (!inuse)
+			return -ENOMEM;
+
+		for (d = dev_base; d; d = d->next) {
+			if (!sscanf(d->name, name, &i))
+				continue;
+			if (i < 0 || i >= max_netdevices)
+				continue;
+
+			/*  avoid cases where sscanf is not exact inverse of printf */
+			snprintf(buf, sizeof(buf), name, i);
+			if (!strncmp(buf, d->name, IFNAMSIZ))
+				set_bit(i, inuse);
+		}
+
+		i = find_first_zero_bit(inuse, max_netdevices);
+		free_page((unsigned long) inuse);
+	}
+
+	snprintf(buf, sizeof(buf), name, i);
+	if (!__dev_get_by_name(buf)) {
+		strlcpy(dev->name, buf, IFNAMSIZ);
+		return i;
+	}
+
+	/* It is possible to run out of possible slots
+	 * when the name is long and there isn't enough space left
+	 * for the digits, or if all bits are used.
+	 */
+	return -ENFILE;
+}
+
+
+/**
+ *	dev_change_name - change name of a device
+ *	@dev: device
+ *	@newname: name (or format string) must be at least IFNAMSIZ
+ *
+ *	Change name of a device, can pass format strings "eth%d".
+ *	for wildcarding.
+ */
+int dev_change_name(struct net_device *dev, char *newname)
+{
+	int err = 0;
+
+	ASSERT_RTNL();
+
+	if (dev->flags & IFF_UP)
+		return -EBUSY;
+
+	if (!dev_valid_name(newname))
+		return -EINVAL;
+
+	if (strchr(newname, '%')) {
+		err = dev_alloc_name(dev, newname);
+		if (err < 0)
+			return err;
+		strcpy(newname, dev->name);
+	}
+	else if (__dev_get_by_name(newname))
+		return -EEXIST;
+	else
+		strlcpy(dev->name, newname, IFNAMSIZ);
+
+	err = class_device_rename(&dev->class_dev, dev->name);
+	if (!err) {
+		hlist_del(&dev->name_hlist);
+		hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
+		notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
+	}
+
+	return err;
+}
+
+/**
+ *	netdev_state_change - device changes state
+ *	@dev: device to cause notification
+ *
+ *	Called to indicate a device has changed state. This function calls
+ *	the notifier chains for netdev_chain and sends a NEWLINK message
+ *	to the routing socket.
+ */
+void netdev_state_change(struct net_device *dev)
+{
+	if (dev->flags & IFF_UP) {
+		notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
+		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
+	}
+}
+
+/**
+ *	dev_load 	- load a network module
+ *	@name: name of interface
+ *
+ *	If a network interface is not present and the process has suitable
+ *	privileges this function loads the module. If module loading is not
+ *	available in this kernel then it becomes a nop.
+ */
+
+void dev_load(const char *name)
+{
+	struct net_device *dev;  
+
+	read_lock(&dev_base_lock);
+	dev = __dev_get_by_name(name);
+	read_unlock(&dev_base_lock);
+
+	if (!dev && capable(CAP_SYS_MODULE))
+		request_module("%s", name);
+}
+
+static int default_rebuild_header(struct sk_buff *skb)
+{
+	printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
+	       skb->dev ? skb->dev->name : "NULL!!!");
+	kfree_skb(skb);
+	return 1;
+}
+
+
+/**
+ *	dev_open	- prepare an interface for use.
+ *	@dev:	device to open
+ *
+ *	Takes a device from down to up state. The device's private open
+ *	function is invoked and then the multicast lists are loaded. Finally
+ *	the device is moved into the up state and a %NETDEV_UP message is
+ *	sent to the netdev notifier chain.
+ *
+ *	Calling this function on an active interface is a nop. On a failure
+ *	a negative errno code is returned.
+ */
+int dev_open(struct net_device *dev)
+{
+	int ret = 0;
+
+	/*
+	 *	Is it already up?
+	 */
+
+	if (dev->flags & IFF_UP)
+		return 0;
+
+	/*
+	 *	Is it even present?
+	 */
+	if (!netif_device_present(dev))
+		return -ENODEV;
+
+	/*
+	 *	Call device private open method
+	 */
+	set_bit(__LINK_STATE_START, &dev->state);
+	if (dev->open) {
+		ret = dev->open(dev);
+		if (ret)
+			clear_bit(__LINK_STATE_START, &dev->state);
+	}
+
+ 	/*
+	 *	If it went open OK then:
+	 */
+
+	if (!ret) {
+		/*
+		 *	Set the flags.
+		 */
+		dev->flags |= IFF_UP;
+
+		/*
+		 *	Initialize multicasting status
+		 */
+		dev_mc_upload(dev);
+
+		/*
+		 *	Wakeup transmit queue engine
+		 */
+		dev_activate(dev);
+
+		/*
+		 *	... and announce new interface.
+		 */
+		notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
+	}
+	return ret;
+}
+
+/**
+ *	dev_close - shutdown an interface.
+ *	@dev: device to shutdown
+ *
+ *	This function moves an active device into down state. A
+ *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
+ *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
+ *	chain.
+ */
+int dev_close(struct net_device *dev)
+{
+	if (!(dev->flags & IFF_UP))
+		return 0;
+
+	/*
+	 *	Tell people we are going down, so that they can
+	 *	prepare to death, when device is still operating.
+	 */
+	notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
+
+	dev_deactivate(dev);
+
+	clear_bit(__LINK_STATE_START, &dev->state);
+
+	/* Synchronize to scheduled poll. We cannot touch poll list,
+	 * it can be even on different cpu. So just clear netif_running(),
+	 * and wait when poll really will happen. Actually, the best place
+	 * for this is inside dev->stop() after device stopped its irq
+	 * engine, but this requires more changes in devices. */
+
+	smp_mb__after_clear_bit(); /* Commit netif_running(). */
+	while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
+		/* No hurry. */
+		current->state = TASK_INTERRUPTIBLE;
+		schedule_timeout(1);
+	}
+
+	/*
+	 *	Call the device specific close. This cannot fail.
+	 *	Only if device is UP
+	 *
+	 *	We allow it to be called even after a DETACH hot-plug
+	 *	event.
+	 */
+	if (dev->stop)
+		dev->stop(dev);
+
+	/*
+	 *	Device is now down.
+	 */
+
+	dev->flags &= ~IFF_UP;
+
+	/*
+	 * Tell people we are down
+	 */
+	notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
+
+	return 0;
+}
+
+
+/*
+ *	Device change register/unregister. These are not inline or static
+ *	as we export them to the world.
+ */
+
+/**
+ *	register_netdevice_notifier - register a network notifier block
+ *	@nb: notifier
+ *
+ *	Register a notifier to be called when network device events occur.
+ *	The notifier passed is linked into the kernel structures and must
+ *	not be reused until it has been unregistered. A negative errno code
+ *	is returned on a failure.
+ *
+ * 	When registered all registration and up events are replayed
+ *	to the new notifier to allow device to have a race free 
+ *	view of the network device list.
+ */
+
+int register_netdevice_notifier(struct notifier_block *nb)
+{
+	struct net_device *dev;
+	int err;
+
+	rtnl_lock();
+	err = notifier_chain_register(&netdev_chain, nb);
+	if (!err) {
+		for (dev = dev_base; dev; dev = dev->next) {
+			nb->notifier_call(nb, NETDEV_REGISTER, dev);
+
+			if (dev->flags & IFF_UP) 
+				nb->notifier_call(nb, NETDEV_UP, dev);
+		}
+	}
+	rtnl_unlock();
+	return err;
+}
+
+/**
+ *	unregister_netdevice_notifier - unregister a network notifier block
+ *	@nb: notifier
+ *
+ *	Unregister a notifier previously registered by
+ *	register_netdevice_notifier(). The notifier is unlinked into the
+ *	kernel structures and may then be reused. A negative errno code
+ *	is returned on a failure.
+ */
+
+int unregister_netdevice_notifier(struct notifier_block *nb)
+{
+	return notifier_chain_unregister(&netdev_chain, nb);
+}
+
+/**
+ *	call_netdevice_notifiers - call all network notifier blocks
+ *      @val: value passed unmodified to notifier function
+ *      @v:   pointer passed unmodified to notifier function
+ *
+ *	Call all network notifier blocks.  Parameters and return value
+ *	are as for notifier_call_chain().
+ */
+
+int call_netdevice_notifiers(unsigned long val, void *v)
+{
+	return notifier_call_chain(&netdev_chain, val, v);
+}
+
+/* When > 0 there are consumers of rx skb time stamps */
+static atomic_t netstamp_needed = ATOMIC_INIT(0);
+
+void net_enable_timestamp(void)
+{
+	atomic_inc(&netstamp_needed);
+}
+
+void net_disable_timestamp(void)
+{
+	atomic_dec(&netstamp_needed);
+}
+
+static inline void net_timestamp(struct timeval *stamp)
+{
+	if (atomic_read(&netstamp_needed))
+		do_gettimeofday(stamp);
+	else {
+		stamp->tv_sec = 0;
+		stamp->tv_usec = 0;
+	}
+}
+
+/*
+ *	Support routine. Sends outgoing frames to any network
+ *	taps currently in use.
+ */
+
+void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct packet_type *ptype;
+	net_timestamp(&skb->stamp);
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ptype, &ptype_all, list) {
+		/* Never send packets back to the socket
+		 * they originated from - MvS (miquels@drinkel.ow.org)
+		 */
+		if ((ptype->dev == dev || !ptype->dev) &&
+		    (ptype->af_packet_priv == NULL ||
+		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
+			struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
+			if (!skb2)
+				break;
+
+			/* skb->nh should be correctly
+			   set by sender, so that the second statement is
+			   just protection against buggy protocols.
+			 */
+			skb2->mac.raw = skb2->data;
+
+			if (skb2->nh.raw < skb2->data ||
+			    skb2->nh.raw > skb2->tail) {
+				if (net_ratelimit())
+					printk(KERN_CRIT "protocol %04x is "
+					       "buggy, dev %s\n",
+					       skb2->protocol, dev->name);
+				skb2->nh.raw = skb2->data;
+			}
+
+			skb2->h.raw = skb2->nh.raw;
+			skb2->pkt_type = PACKET_OUTGOING;
+			ptype->func(skb2, skb->dev, ptype);
+		}
+	}
+	rcu_read_unlock();
+}
+
+/*
+ * Invalidate hardware checksum when packet is to be mangled, and
+ * complete checksum manually on outgoing path.
+ */
+int skb_checksum_help(struct sk_buff *skb, int inward)
+{
+	unsigned int csum;
+	int ret = 0, offset = skb->h.raw - skb->data;
+
+	if (inward) {
+		skb->ip_summed = CHECKSUM_NONE;
+		goto out;
+	}
+
+	if (skb_cloned(skb)) {
+		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+		if (ret)
+			goto out;
+	}
+
+	if (offset > (int)skb->len)
+		BUG();
+	csum = skb_checksum(skb, offset, skb->len-offset, 0);
+
+	offset = skb->tail - skb->h.raw;
+	if (offset <= 0)
+		BUG();
+	if (skb->csum + 2 > offset)
+		BUG();
+
+	*(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
+	skb->ip_summed = CHECKSUM_NONE;
+out:	
+	return ret;
+}
+
+#ifdef CONFIG_HIGHMEM
+/* Actually, we should eliminate this check as soon as we know, that:
+ * 1. IOMMU is present and allows to map all the memory.
+ * 2. No high memory really exists on this machine.
+ */
+
+static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
+{
+	int i;
+
+	if (dev->features & NETIF_F_HIGHDMA)
+		return 0;
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+		if (PageHighMem(skb_shinfo(skb)->frags[i].page))
+			return 1;
+
+	return 0;
+}
+#else
+#define illegal_highdma(dev, skb)	(0)
+#endif
+
+extern void skb_release_data(struct sk_buff *);
+
+/* Keep head the same: replace data */
+int __skb_linearize(struct sk_buff *skb, int gfp_mask)
+{
+	unsigned int size;
+	u8 *data;
+	long offset;
+	struct skb_shared_info *ninfo;
+	int headerlen = skb->data - skb->head;
+	int expand = (skb->tail + skb->data_len) - skb->end;
+
+	if (skb_shared(skb))
+		BUG();
+
+	if (expand <= 0)
+		expand = 0;
+
+	size = skb->end - skb->head + expand;
+	size = SKB_DATA_ALIGN(size);
+	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
+	if (!data)
+		return -ENOMEM;
+
+	/* Copy entire thing */
+	if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len))
+		BUG();
+
+	/* Set up shinfo */
+	ninfo = (struct skb_shared_info*)(data + size);
+	atomic_set(&ninfo->dataref, 1);
+	ninfo->tso_size = skb_shinfo(skb)->tso_size;
+	ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
+	ninfo->nr_frags = 0;
+	ninfo->frag_list = NULL;
+
+	/* Offset between the two in bytes */
+	offset = data - skb->head;
+
+	/* Free old data. */
+	skb_release_data(skb);
+
+	skb->head = data;
+	skb->end  = data + size;
+
+	/* Set up new pointers */
+	skb->h.raw   += offset;
+	skb->nh.raw  += offset;
+	skb->mac.raw += offset;
+	skb->tail    += offset;
+	skb->data    += offset;
+
+	/* We are no longer a clone, even if we were. */
+	skb->cloned    = 0;
+
+	skb->tail     += skb->data_len;
+	skb->data_len  = 0;
+	return 0;
+}
+
+#define HARD_TX_LOCK(dev, cpu) {			\
+	if ((dev->features & NETIF_F_LLTX) == 0) {	\
+		spin_lock(&dev->xmit_lock);		\
+		dev->xmit_lock_owner = cpu;		\
+	}						\
+}
+
+#define HARD_TX_UNLOCK(dev) {				\
+	if ((dev->features & NETIF_F_LLTX) == 0) {	\
+		dev->xmit_lock_owner = -1;		\
+		spin_unlock(&dev->xmit_lock);		\
+	}						\
+}
+
+/**
+ *	dev_queue_xmit - transmit a buffer
+ *	@skb: buffer to transmit
+ *
+ *	Queue a buffer for transmission to a network device. The caller must
+ *	have set the device and priority and built the buffer before calling
+ *	this function. The function can be called from an interrupt.
+ *
+ *	A negative errno code is returned on a failure. A success does not
+ *	guarantee the frame will be transmitted as it may be dropped due
+ *	to congestion or traffic shaping.
+ */
+
+int dev_queue_xmit(struct sk_buff *skb)
+{
+	struct net_device *dev = skb->dev;
+	struct Qdisc *q;
+	int rc = -ENOMEM;
+
+	if (skb_shinfo(skb)->frag_list &&
+	    !(dev->features & NETIF_F_FRAGLIST) &&
+	    __skb_linearize(skb, GFP_ATOMIC))
+		goto out_kfree_skb;
+
+	/* Fragmented skb is linearized if device does not support SG,
+	 * or if at least one of fragments is in highmem and device
+	 * does not support DMA from it.
+	 */
+	if (skb_shinfo(skb)->nr_frags &&
+	    (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
+	    __skb_linearize(skb, GFP_ATOMIC))
+		goto out_kfree_skb;
+
+	/* If packet is not checksummed and device does not support
+	 * checksumming for this protocol, complete checksumming here.
+	 */
+	if (skb->ip_summed == CHECKSUM_HW &&
+	    (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) &&
+	     (!(dev->features & NETIF_F_IP_CSUM) ||
+	      skb->protocol != htons(ETH_P_IP))))
+	      	if (skb_checksum_help(skb, 0))
+	      		goto out_kfree_skb;
+
+	/* Disable soft irqs for various locks below. Also 
+	 * stops preemption for RCU. 
+	 */
+	local_bh_disable(); 
+
+	/* Updates of qdisc are serialized by queue_lock. 
+	 * The struct Qdisc which is pointed to by qdisc is now a 
+	 * rcu structure - it may be accessed without acquiring 
+	 * a lock (but the structure may be stale.) The freeing of the
+	 * qdisc will be deferred until it's known that there are no 
+	 * more references to it.
+	 * 
+	 * If the qdisc has an enqueue function, we still need to 
+	 * hold the queue_lock before calling it, since queue_lock
+	 * also serializes access to the device queue.
+	 */
+
+	q = rcu_dereference(dev->qdisc);
+#ifdef CONFIG_NET_CLS_ACT
+	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
+#endif
+	if (q->enqueue) {
+		/* Grab device queue */
+		spin_lock(&dev->queue_lock);
+
+		rc = q->enqueue(skb, q);
+
+		qdisc_run(dev);
+
+		spin_unlock(&dev->queue_lock);
+		rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
+		goto out;
+	}
+
+	/* The device has no queue. Common case for software devices:
+	   loopback, all the sorts of tunnels...
+
+	   Really, it is unlikely that xmit_lock protection is necessary here.
+	   (f.e. loopback and IP tunnels are clean ignoring statistics
+	   counters.)
+	   However, it is possible, that they rely on protection
+	   made by us here.
+
+	   Check this and shot the lock. It is not prone from deadlocks.
+	   Either shot noqueue qdisc, it is even simpler 8)
+	 */
+	if (dev->flags & IFF_UP) {
+		int cpu = smp_processor_id(); /* ok because BHs are off */
+
+		if (dev->xmit_lock_owner != cpu) {
+
+			HARD_TX_LOCK(dev, cpu);
+
+			if (!netif_queue_stopped(dev)) {
+				if (netdev_nit)
+					dev_queue_xmit_nit(skb, dev);
+
+				rc = 0;
+				if (!dev->hard_start_xmit(skb, dev)) {
+					HARD_TX_UNLOCK(dev);
+					goto out;
+				}
+			}
+			HARD_TX_UNLOCK(dev);
+			if (net_ratelimit())
+				printk(KERN_CRIT "Virtual device %s asks to "
+				       "queue packet!\n", dev->name);
+		} else {
+			/* Recursion is detected! It is possible,
+			 * unfortunately */
+			if (net_ratelimit())
+				printk(KERN_CRIT "Dead loop on virtual device "
+				       "%s, fix it urgently!\n", dev->name);
+		}
+	}
+
+	rc = -ENETDOWN;
+	local_bh_enable();
+
+out_kfree_skb:
+	kfree_skb(skb);
+	return rc;
+out:
+	local_bh_enable();
+	return rc;
+}
+
+
+/*=======================================================================
+			Receiver routines
+  =======================================================================*/
+
+int netdev_max_backlog = 300;
+int weight_p = 64;            /* old backlog weight */
+/* These numbers are selected based on intuition and some
+ * experimentatiom, if you have more scientific way of doing this
+ * please go ahead and fix things.
+ */
+int no_cong_thresh = 10;
+int no_cong = 20;
+int lo_cong = 100;
+int mod_cong = 290;
+
+DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
+
+
+static void get_sample_stats(int cpu)
+{
+#ifdef RAND_LIE
+	unsigned long rd;
+	int rq;
+#endif
+	struct softnet_data *sd = &per_cpu(softnet_data, cpu);
+	int blog = sd->input_pkt_queue.qlen;
+	int avg_blog = sd->avg_blog;
+
+	avg_blog = (avg_blog >> 1) + (blog >> 1);
+
+	if (avg_blog > mod_cong) {
+		/* Above moderate congestion levels. */
+		sd->cng_level = NET_RX_CN_HIGH;
+#ifdef RAND_LIE
+		rd = net_random();
+		rq = rd % netdev_max_backlog;
+		if (rq < avg_blog) /* unlucky bastard */
+			sd->cng_level = NET_RX_DROP;
+#endif
+	} else if (avg_blog > lo_cong) {
+		sd->cng_level = NET_RX_CN_MOD;
+#ifdef RAND_LIE
+		rd = net_random();
+		rq = rd % netdev_max_backlog;
+			if (rq < avg_blog) /* unlucky bastard */
+				sd->cng_level = NET_RX_CN_HIGH;
+#endif
+	} else if (avg_blog > no_cong)
+		sd->cng_level = NET_RX_CN_LOW;
+	else  /* no congestion */
+		sd->cng_level = NET_RX_SUCCESS;
+
+	sd->avg_blog = avg_blog;
+}
+
+#ifdef OFFLINE_SAMPLE
+static void sample_queue(unsigned long dummy)
+{
+/* 10 ms 0r 1ms -- i don't care -- JHS */
+	int next_tick = 1;
+	int cpu = smp_processor_id();
+
+	get_sample_stats(cpu);
+	next_tick += jiffies;
+	mod_timer(&samp_timer, next_tick);
+}
+#endif
+
+
+/**
+ *	netif_rx	-	post buffer to the network code
+ *	@skb: buffer to post
+ *
+ *	This function receives a packet from a device driver and queues it for
+ *	the upper (protocol) levels to process.  It always succeeds. The buffer
+ *	may be dropped during processing for congestion control or by the
+ *	protocol layers.
+ *
+ *	return values:
+ *	NET_RX_SUCCESS	(no congestion)
+ *	NET_RX_CN_LOW   (low congestion)
+ *	NET_RX_CN_MOD   (moderate congestion)
+ *	NET_RX_CN_HIGH  (high congestion)
+ *	NET_RX_DROP     (packet was dropped)
+ *
+ */
+
+int netif_rx(struct sk_buff *skb)
+{
+	int this_cpu;
+	struct softnet_data *queue;
+	unsigned long flags;
+
+	/* if netpoll wants it, pretend we never saw it */
+	if (netpoll_rx(skb))
+		return NET_RX_DROP;
+
+	if (!skb->stamp.tv_sec)
+		net_timestamp(&skb->stamp);
+
+	/*
+	 * The code is rearranged so that the path is the most
+	 * short when CPU is congested, but is still operating.
+	 */
+	local_irq_save(flags);
+	this_cpu = smp_processor_id();
+	queue = &__get_cpu_var(softnet_data);
+
+	__get_cpu_var(netdev_rx_stat).total++;
+	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
+		if (queue->input_pkt_queue.qlen) {
+			if (queue->throttle)
+				goto drop;
+
+enqueue:
+			dev_hold(skb->dev);
+			__skb_queue_tail(&queue->input_pkt_queue, skb);
+#ifndef OFFLINE_SAMPLE
+			get_sample_stats(this_cpu);
+#endif
+			local_irq_restore(flags);
+			return queue->cng_level;
+		}
+
+		if (queue->throttle)
+			queue->throttle = 0;
+
+		netif_rx_schedule(&queue->backlog_dev);
+		goto enqueue;
+	}
+
+	if (!queue->throttle) {
+		queue->throttle = 1;
+		__get_cpu_var(netdev_rx_stat).throttled++;
+	}
+
+drop:
+	__get_cpu_var(netdev_rx_stat).dropped++;
+	local_irq_restore(flags);
+
+	kfree_skb(skb);
+	return NET_RX_DROP;
+}
+
+int netif_rx_ni(struct sk_buff *skb)
+{
+	int err;
+
+	preempt_disable();
+	err = netif_rx(skb);
+	if (local_softirq_pending())
+		do_softirq();
+	preempt_enable();
+
+	return err;
+}
+
+EXPORT_SYMBOL(netif_rx_ni);
+
+static __inline__ void skb_bond(struct sk_buff *skb)
+{
+	struct net_device *dev = skb->dev;
+
+	if (dev->master) {
+		skb->real_dev = skb->dev;
+		skb->dev = dev->master;
+	}
+}
+
+static void net_tx_action(struct softirq_action *h)
+{
+	struct softnet_data *sd = &__get_cpu_var(softnet_data);
+
+	if (sd->completion_queue) {
+		struct sk_buff *clist;
+
+		local_irq_disable();
+		clist = sd->completion_queue;
+		sd->completion_queue = NULL;
+		local_irq_enable();
+
+		while (clist) {
+			struct sk_buff *skb = clist;
+			clist = clist->next;
+
+			BUG_TRAP(!atomic_read(&skb->users));
+			__kfree_skb(skb);
+		}
+	}
+
+	if (sd->output_queue) {
+		struct net_device *head;
+
+		local_irq_disable();
+		head = sd->output_queue;
+		sd->output_queue = NULL;
+		local_irq_enable();
+
+		while (head) {
+			struct net_device *dev = head;
+			head = head->next_sched;
+
+			smp_mb__before_clear_bit();
+			clear_bit(__LINK_STATE_SCHED, &dev->state);
+
+			if (spin_trylock(&dev->queue_lock)) {
+				qdisc_run(dev);
+				spin_unlock(&dev->queue_lock);
+			} else {
+				netif_schedule(dev);
+			}
+		}
+	}
+}
+
+static __inline__ int deliver_skb(struct sk_buff *skb,
+				  struct packet_type *pt_prev)
+{
+	atomic_inc(&skb->users);
+	return pt_prev->func(skb, skb->dev, pt_prev);
+}
+
+#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
+int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
+struct net_bridge;
+struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
+						unsigned char *addr);
+void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
+
+static __inline__ int handle_bridge(struct sk_buff **pskb,
+				    struct packet_type **pt_prev, int *ret)
+{
+	struct net_bridge_port *port;
+
+	if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
+	    (port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
+		return 0;
+
+	if (*pt_prev) {
+		*ret = deliver_skb(*pskb, *pt_prev);
+		*pt_prev = NULL;
+	} 
+	
+	return br_handle_frame_hook(port, pskb);
+}
+#else
+#define handle_bridge(skb, pt_prev, ret)	(0)
+#endif
+
+#ifdef CONFIG_NET_CLS_ACT
+/* TODO: Maybe we should just force sch_ingress to be compiled in
+ * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
+ * a compare and 2 stores extra right now if we dont have it on
+ * but have CONFIG_NET_CLS_ACT
+ * NOTE: This doesnt stop any functionality; if you dont have 
+ * the ingress scheduler, you just cant add policies on ingress.
+ *
+ */
+static int ing_filter(struct sk_buff *skb) 
+{
+	struct Qdisc *q;
+	struct net_device *dev = skb->dev;
+	int result = TC_ACT_OK;
+	
+	if (dev->qdisc_ingress) {
+		__u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
+		if (MAX_RED_LOOP < ttl++) {
+			printk("Redir loop detected Dropping packet (%s->%s)\n",
+				skb->input_dev?skb->input_dev->name:"??",skb->dev->name);
+			return TC_ACT_SHOT;
+		}
+
+		skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
+
+		skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
+		if (NULL == skb->input_dev) {
+			skb->input_dev = skb->dev;
+			printk("ing_filter:  fixed  %s out %s\n",skb->input_dev->name,skb->dev->name);
+		}
+		spin_lock(&dev->ingress_lock);
+		if ((q = dev->qdisc_ingress) != NULL)
+			result = q->enqueue(skb, q);
+		spin_unlock(&dev->ingress_lock);
+
+	}
+
+	return result;
+}
+#endif
+
+int netif_receive_skb(struct sk_buff *skb)
+{
+	struct packet_type *ptype, *pt_prev;
+	int ret = NET_RX_DROP;
+	unsigned short type;
+
+	/* if we've gotten here through NAPI, check netpoll */
+	if (skb->dev->poll && netpoll_rx(skb))
+		return NET_RX_DROP;
+
+	if (!skb->stamp.tv_sec)
+		net_timestamp(&skb->stamp);
+
+	skb_bond(skb);
+
+	__get_cpu_var(netdev_rx_stat).total++;
+
+	skb->h.raw = skb->nh.raw = skb->data;
+	skb->mac_len = skb->nh.raw - skb->mac.raw;
+
+	pt_prev = NULL;
+
+	rcu_read_lock();
+
+#ifdef CONFIG_NET_CLS_ACT
+	if (skb->tc_verd & TC_NCLS) {
+		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
+		goto ncls;
+	}
+#endif
+
+	list_for_each_entry_rcu(ptype, &ptype_all, list) {
+		if (!ptype->dev || ptype->dev == skb->dev) {
+			if (pt_prev) 
+				ret = deliver_skb(skb, pt_prev);
+			pt_prev = ptype;
+		}
+	}
+
+#ifdef CONFIG_NET_CLS_ACT
+	if (pt_prev) {
+		ret = deliver_skb(skb, pt_prev);
+		pt_prev = NULL; /* noone else should process this after*/
+	} else {
+		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
+	}
+
+	ret = ing_filter(skb);
+
+	if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
+		kfree_skb(skb);
+		goto out;
+	}
+
+	skb->tc_verd = 0;
+ncls:
+#endif
+
+	handle_diverter(skb);
+
+	if (handle_bridge(&skb, &pt_prev, &ret))
+		goto out;
+
+	type = skb->protocol;
+	list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
+		if (ptype->type == type &&
+		    (!ptype->dev || ptype->dev == skb->dev)) {
+			if (pt_prev) 
+				ret = deliver_skb(skb, pt_prev);
+			pt_prev = ptype;
+		}
+	}
+
+	if (pt_prev) {
+		ret = pt_prev->func(skb, skb->dev, pt_prev);
+	} else {
+		kfree_skb(skb);
+		/* Jamal, now you will not able to escape explaining
+		 * me how you were going to use this. :-)
+		 */
+		ret = NET_RX_DROP;
+	}
+
+out:
+	rcu_read_unlock();
+	return ret;
+}
+
+static int process_backlog(struct net_device *backlog_dev, int *budget)
+{
+	int work = 0;
+	int quota = min(backlog_dev->quota, *budget);
+	struct softnet_data *queue = &__get_cpu_var(softnet_data);
+	unsigned long start_time = jiffies;
+
+	for (;;) {
+		struct sk_buff *skb;
+		struct net_device *dev;
+
+		local_irq_disable();
+		skb = __skb_dequeue(&queue->input_pkt_queue);
+		if (!skb)
+			goto job_done;
+		local_irq_enable();
+
+		dev = skb->dev;
+
+		netif_receive_skb(skb);
+
+		dev_put(dev);
+
+		work++;
+
+		if (work >= quota || jiffies - start_time > 1)
+			break;
+
+	}
+
+	backlog_dev->quota -= work;
+	*budget -= work;
+	return -1;
+
+job_done:
+	backlog_dev->quota -= work;
+	*budget -= work;
+
+	list_del(&backlog_dev->poll_list);
+	smp_mb__before_clear_bit();
+	netif_poll_enable(backlog_dev);
+
+	if (queue->throttle)
+		queue->throttle = 0;
+	local_irq_enable();
+	return 0;
+}
+
+static void net_rx_action(struct softirq_action *h)
+{
+	struct softnet_data *queue = &__get_cpu_var(softnet_data);
+	unsigned long start_time = jiffies;
+	int budget = netdev_max_backlog;
+
+	
+	local_irq_disable();
+
+	while (!list_empty(&queue->poll_list)) {
+		struct net_device *dev;
+
+		if (budget <= 0 || jiffies - start_time > 1)
+			goto softnet_break;
+
+		local_irq_enable();
+
+		dev = list_entry(queue->poll_list.next,
+				 struct net_device, poll_list);
+		netpoll_poll_lock(dev);
+
+		if (dev->quota <= 0 || dev->poll(dev, &budget)) {
+			netpoll_poll_unlock(dev);
+			local_irq_disable();
+			list_del(&dev->poll_list);
+			list_add_tail(&dev->poll_list, &queue->poll_list);
+			if (dev->quota < 0)
+				dev->quota += dev->weight;
+			else
+				dev->quota = dev->weight;
+		} else {
+			netpoll_poll_unlock(dev);
+			dev_put(dev);
+			local_irq_disable();
+		}
+	}
+out:
+	local_irq_enable();
+	return;
+
+softnet_break:
+	__get_cpu_var(netdev_rx_stat).time_squeeze++;
+	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
+	goto out;
+}
+
+static gifconf_func_t * gifconf_list [NPROTO];
+
+/**
+ *	register_gifconf	-	register a SIOCGIF handler
+ *	@family: Address family
+ *	@gifconf: Function handler
+ *
+ *	Register protocol dependent address dumping routines. The handler
+ *	that is passed must not be freed or reused until it has been replaced
+ *	by another handler.
+ */
+int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
+{
+	if (family >= NPROTO)
+		return -EINVAL;
+	gifconf_list[family] = gifconf;
+	return 0;
+}
+
+
+/*
+ *	Map an interface index to its name (SIOCGIFNAME)
+ */
+
+/*
+ *	We need this ioctl for efficient implementation of the
+ *	if_indextoname() function required by the IPv6 API.  Without
+ *	it, we would have to search all the interfaces to find a
+ *	match.  --pb
+ */
+
+static int dev_ifname(struct ifreq __user *arg)
+{
+	struct net_device *dev;
+	struct ifreq ifr;
+
+	/*
+	 *	Fetch the caller's info block.
+	 */
+
+	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+		return -EFAULT;
+
+	read_lock(&dev_base_lock);
+	dev = __dev_get_by_index(ifr.ifr_ifindex);
+	if (!dev) {
+		read_unlock(&dev_base_lock);
+		return -ENODEV;
+	}
+
+	strcpy(ifr.ifr_name, dev->name);
+	read_unlock(&dev_base_lock);
+
+	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+		return -EFAULT;
+	return 0;
+}
+
+/*
+ *	Perform a SIOCGIFCONF call. This structure will change
+ *	size eventually, and there is nothing I can do about it.
+ *	Thus we will need a 'compatibility mode'.
+ */
+
+static int dev_ifconf(char __user *arg)
+{
+	struct ifconf ifc;
+	struct net_device *dev;
+	char __user *pos;
+	int len;
+	int total;
+	int i;
+
+	/*
+	 *	Fetch the caller's info block.
+	 */
+
+	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
+		return -EFAULT;
+
+	pos = ifc.ifc_buf;
+	len = ifc.ifc_len;
+
+	/*
+	 *	Loop over the interfaces, and write an info block for each.
+	 */
+
+	total = 0;
+	for (dev = dev_base; dev; dev = dev->next) {
+		for (i = 0; i < NPROTO; i++) {
+			if (gifconf_list[i]) {
+				int done;
+				if (!pos)
+					done = gifconf_list[i](dev, NULL, 0);
+				else
+					done = gifconf_list[i](dev, pos + total,
+							       len - total);
+				if (done < 0)
+					return -EFAULT;
+				total += done;
+			}
+		}
+  	}
+
+	/*
+	 *	All done.  Write the updated control block back to the caller.
+	 */
+	ifc.ifc_len = total;
+
+	/*
+	 * 	Both BSD and Solaris return 0 here, so we do too.
+	 */
+	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
+}
+
+#ifdef CONFIG_PROC_FS
+/*
+ *	This is invoked by the /proc filesystem handler to display a device
+ *	in detail.
+ */
+static __inline__ struct net_device *dev_get_idx(loff_t pos)
+{
+	struct net_device *dev;
+	loff_t i;
+
+	for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next);
+
+	return i == pos ? dev : NULL;
+}
+
+void *dev_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	read_lock(&dev_base_lock);
+	return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN;
+}
+
+void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	++*pos;
+	return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next;
+}
+
+void dev_seq_stop(struct seq_file *seq, void *v)
+{
+	read_unlock(&dev_base_lock);
+}
+
+static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
+{
+	if (dev->get_stats) {
+		struct net_device_stats *stats = dev->get_stats(dev);
+
+		seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
+				"%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
+			   dev->name, stats->rx_bytes, stats->rx_packets,
+			   stats->rx_errors,
+			   stats->rx_dropped + stats->rx_missed_errors,
+			   stats->rx_fifo_errors,
+			   stats->rx_length_errors + stats->rx_over_errors +
+			     stats->rx_crc_errors + stats->rx_frame_errors,
+			   stats->rx_compressed, stats->multicast,
+			   stats->tx_bytes, stats->tx_packets,
+			   stats->tx_errors, stats->tx_dropped,
+			   stats->tx_fifo_errors, stats->collisions,
+			   stats->tx_carrier_errors +
+			     stats->tx_aborted_errors +
+			     stats->tx_window_errors +
+			     stats->tx_heartbeat_errors,
+			   stats->tx_compressed);
+	} else
+		seq_printf(seq, "%6s: No statistics available.\n", dev->name);
+}
+
+/*
+ *	Called from the PROCfs module. This now uses the new arbitrary sized
+ *	/proc/net interface to create /proc/net/dev
+ */
+static int dev_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq, "Inter-|   Receive                            "
+			      "                    |  Transmit\n"
+			      " face |bytes    packets errs drop fifo frame "
+			      "compressed multicast|bytes    packets errs "
+			      "drop fifo colls carrier compressed\n");
+	else
+		dev_seq_printf_stats(seq, v);
+	return 0;
+}
+
+static struct netif_rx_stats *softnet_get_online(loff_t *pos)
+{
+	struct netif_rx_stats *rc = NULL;
+
+	while (*pos < NR_CPUS)
+	       	if (cpu_online(*pos)) {
+			rc = &per_cpu(netdev_rx_stat, *pos);
+			break;
+		} else
+			++*pos;
+	return rc;
+}
+
+static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	return softnet_get_online(pos);
+}
+
+static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	++*pos;
+	return softnet_get_online(pos);
+}
+
+static void softnet_seq_stop(struct seq_file *seq, void *v)
+{
+}
+
+static int softnet_seq_show(struct seq_file *seq, void *v)
+{
+	struct netif_rx_stats *s = v;
+
+	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
+		   s->total, s->dropped, s->time_squeeze, s->throttled,
+		   s->fastroute_hit, s->fastroute_success, s->fastroute_defer,
+		   s->fastroute_deferred_out,
+#if 0
+		   s->fastroute_latency_reduction
+#else
+		   s->cpu_collision
+#endif
+		  );
+	return 0;
+}
+
+static struct seq_operations dev_seq_ops = {
+	.start = dev_seq_start,
+	.next  = dev_seq_next,
+	.stop  = dev_seq_stop,
+	.show  = dev_seq_show,
+};
+
+static int dev_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &dev_seq_ops);
+}
+
+static struct file_operations dev_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = dev_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+};
+
+static struct seq_operations softnet_seq_ops = {
+	.start = softnet_seq_start,
+	.next  = softnet_seq_next,
+	.stop  = softnet_seq_stop,
+	.show  = softnet_seq_show,
+};
+
+static int softnet_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &softnet_seq_ops);
+}
+
+static struct file_operations softnet_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = softnet_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+};
+
+#ifdef WIRELESS_EXT
+extern int wireless_proc_init(void);
+#else
+#define wireless_proc_init() 0
+#endif
+
+static int __init dev_proc_init(void)
+{
+	int rc = -ENOMEM;
+
+	if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
+		goto out;
+	if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
+		goto out_dev;
+	if (wireless_proc_init())
+		goto out_softnet;
+	rc = 0;
+out:
+	return rc;
+out_softnet:
+	proc_net_remove("softnet_stat");
+out_dev:
+	proc_net_remove("dev");
+	goto out;
+}
+#else
+#define dev_proc_init() 0
+#endif	/* CONFIG_PROC_FS */
+
+
+/**
+ *	netdev_set_master	-	set up master/slave pair
+ *	@slave: slave device
+ *	@master: new master device
+ *
+ *	Changes the master device of the slave. Pass %NULL to break the
+ *	bonding. The caller must hold the RTNL semaphore. On a failure
+ *	a negative errno code is returned. On success the reference counts
+ *	are adjusted, %RTM_NEWLINK is sent to the routing socket and the
+ *	function returns zero.
+ */
+int netdev_set_master(struct net_device *slave, struct net_device *master)
+{
+	struct net_device *old = slave->master;
+
+	ASSERT_RTNL();
+
+	if (master) {
+		if (old)
+			return -EBUSY;
+		dev_hold(master);
+	}
+
+	slave->master = master;
+	
+	synchronize_net();
+
+	if (old)
+		dev_put(old);
+
+	if (master)
+		slave->flags |= IFF_SLAVE;
+	else
+		slave->flags &= ~IFF_SLAVE;
+
+	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
+	return 0;
+}
+
+/**
+ *	dev_set_promiscuity	- update promiscuity count on a device
+ *	@dev: device
+ *	@inc: modifier
+ *
+ *	Add or remove promsicuity from a device. While the count in the device
+ *	remains above zero the interface remains promiscuous. Once it hits zero
+ *	the device reverts back to normal filtering operation. A negative inc
+ *	value is used to drop promiscuity on the device.
+ */
+void dev_set_promiscuity(struct net_device *dev, int inc)
+{
+	unsigned short old_flags = dev->flags;
+
+	dev->flags |= IFF_PROMISC;
+	if ((dev->promiscuity += inc) == 0)
+		dev->flags &= ~IFF_PROMISC;
+	if (dev->flags ^ old_flags) {
+		dev_mc_upload(dev);
+		printk(KERN_INFO "device %s %s promiscuous mode\n",
+		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
+		       					       "left");
+	}
+}
+
+/**
+ *	dev_set_allmulti	- update allmulti count on a device
+ *	@dev: device
+ *	@inc: modifier
+ *
+ *	Add or remove reception of all multicast frames to a device. While the
+ *	count in the device remains above zero the interface remains listening
+ *	to all interfaces. Once it hits zero the device reverts back to normal
+ *	filtering operation. A negative @inc value is used to drop the counter
+ *	when releasing a resource needing all multicasts.
+ */
+
+void dev_set_allmulti(struct net_device *dev, int inc)
+{
+	unsigned short old_flags = dev->flags;
+
+	dev->flags |= IFF_ALLMULTI;
+	if ((dev->allmulti += inc) == 0)
+		dev->flags &= ~IFF_ALLMULTI;
+	if (dev->flags ^ old_flags)
+		dev_mc_upload(dev);
+}
+
+unsigned dev_get_flags(const struct net_device *dev)
+{
+	unsigned flags;
+
+	flags = (dev->flags & ~(IFF_PROMISC |
+				IFF_ALLMULTI |
+				IFF_RUNNING)) | 
+		(dev->gflags & (IFF_PROMISC |
+				IFF_ALLMULTI));
+
+	if (netif_running(dev) && netif_carrier_ok(dev))
+		flags |= IFF_RUNNING;
+
+	return flags;
+}
+
+int dev_change_flags(struct net_device *dev, unsigned flags)
+{
+	int ret;
+	int old_flags = dev->flags;
+
+	/*
+	 *	Set the flags on our device.
+	 */
+
+	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
+			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
+			       IFF_AUTOMEDIA)) |
+		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
+				    IFF_ALLMULTI));
+
+	/*
+	 *	Load in the correct multicast list now the flags have changed.
+	 */
+
+	dev_mc_upload(dev);
+
+	/*
+	 *	Have we downed the interface. We handle IFF_UP ourselves
+	 *	according to user attempts to set it, rather than blindly
+	 *	setting it.
+	 */
+
+	ret = 0;
+	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
+		ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
+
+		if (!ret)
+			dev_mc_upload(dev);
+	}
+
+	if (dev->flags & IFF_UP &&
+	    ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
+					  IFF_VOLATILE)))
+		notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
+
+	if ((flags ^ dev->gflags) & IFF_PROMISC) {
+		int inc = (flags & IFF_PROMISC) ? +1 : -1;
+		dev->gflags ^= IFF_PROMISC;
+		dev_set_promiscuity(dev, inc);
+	}
+
+	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
+	   is important. Some (broken) drivers set IFF_PROMISC, when
+	   IFF_ALLMULTI is requested not asking us and not reporting.
+	 */
+	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
+		int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
+		dev->gflags ^= IFF_ALLMULTI;
+		dev_set_allmulti(dev, inc);
+	}
+
+	if (old_flags ^ dev->flags)
+		rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags);
+
+	return ret;
+}
+
+int dev_set_mtu(struct net_device *dev, int new_mtu)
+{
+	int err;
+
+	if (new_mtu == dev->mtu)
+		return 0;
+
+	/*	MTU must be positive.	 */
+	if (new_mtu < 0)
+		return -EINVAL;
+
+	if (!netif_device_present(dev))
+		return -ENODEV;
+
+	err = 0;
+	if (dev->change_mtu)
+		err = dev->change_mtu(dev, new_mtu);
+	else
+		dev->mtu = new_mtu;
+	if (!err && dev->flags & IFF_UP)
+		notifier_call_chain(&netdev_chain,
+				    NETDEV_CHANGEMTU, dev);
+	return err;
+}
+
+int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
+{
+	int err;
+
+	if (!dev->set_mac_address)
+		return -EOPNOTSUPP;
+	if (sa->sa_family != dev->type)
+		return -EINVAL;
+	if (!netif_device_present(dev))
+		return -ENODEV;
+	err = dev->set_mac_address(dev, sa);
+	if (!err)
+		notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
+	return err;
+}
+
+/*
+ *	Perform the SIOCxIFxxx calls.
+ */
+static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
+{
+	int err;
+	struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
+
+	if (!dev)
+		return -ENODEV;
+
+	switch (cmd) {
+		case SIOCGIFFLAGS:	/* Get interface flags */
+			ifr->ifr_flags = dev_get_flags(dev);
+			return 0;
+
+		case SIOCSIFFLAGS:	/* Set interface flags */
+			return dev_change_flags(dev, ifr->ifr_flags);
+
+		case SIOCGIFMETRIC:	/* Get the metric on the interface
+					   (currently unused) */
+			ifr->ifr_metric = 0;
+			return 0;
+
+		case SIOCSIFMETRIC:	/* Set the metric on the interface
+					   (currently unused) */
+			return -EOPNOTSUPP;
+
+		case SIOCGIFMTU:	/* Get the MTU of a device */
+			ifr->ifr_mtu = dev->mtu;
+			return 0;
+
+		case SIOCSIFMTU:	/* Set the MTU of a device */
+			return dev_set_mtu(dev, ifr->ifr_mtu);
+
+		case SIOCGIFHWADDR:
+			if (!dev->addr_len)
+				memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
+			else
+				memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
+				       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
+			ifr->ifr_hwaddr.sa_family = dev->type;
+			return 0;
+
+		case SIOCSIFHWADDR:
+			return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
+
+		case SIOCSIFHWBROADCAST:
+			if (ifr->ifr_hwaddr.sa_family != dev->type)
+				return -EINVAL;
+			memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
+			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
+			notifier_call_chain(&netdev_chain,
+					    NETDEV_CHANGEADDR, dev);
+			return 0;
+
+		case SIOCGIFMAP:
+			ifr->ifr_map.mem_start = dev->mem_start;
+			ifr->ifr_map.mem_end   = dev->mem_end;
+			ifr->ifr_map.base_addr = dev->base_addr;
+			ifr->ifr_map.irq       = dev->irq;
+			ifr->ifr_map.dma       = dev->dma;
+			ifr->ifr_map.port      = dev->if_port;
+			return 0;
+
+		case SIOCSIFMAP:
+			if (dev->set_config) {
+				if (!netif_device_present(dev))
+					return -ENODEV;
+				return dev->set_config(dev, &ifr->ifr_map);
+			}
+			return -EOPNOTSUPP;
+
+		case SIOCADDMULTI:
+			if (!dev->set_multicast_list ||
+			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
+				return -EINVAL;
+			if (!netif_device_present(dev))
+				return -ENODEV;
+			return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
+					  dev->addr_len, 1);
+
+		case SIOCDELMULTI:
+			if (!dev->set_multicast_list ||
+			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
+				return -EINVAL;
+			if (!netif_device_present(dev))
+				return -ENODEV;
+			return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
+					     dev->addr_len, 1);
+
+		case SIOCGIFINDEX:
+			ifr->ifr_ifindex = dev->ifindex;
+			return 0;
+
+		case SIOCGIFTXQLEN:
+			ifr->ifr_qlen = dev->tx_queue_len;
+			return 0;
+
+		case SIOCSIFTXQLEN:
+			if (ifr->ifr_qlen < 0)
+				return -EINVAL;
+			dev->tx_queue_len = ifr->ifr_qlen;
+			return 0;
+
+		case SIOCSIFNAME:
+			ifr->ifr_newname[IFNAMSIZ-1] = '\0';
+			return dev_change_name(dev, ifr->ifr_newname);
+
+		/*
+		 *	Unknown or private ioctl
+		 */
+
+		default:
+			if ((cmd >= SIOCDEVPRIVATE &&
+			    cmd <= SIOCDEVPRIVATE + 15) ||
+			    cmd == SIOCBONDENSLAVE ||
+			    cmd == SIOCBONDRELEASE ||
+			    cmd == SIOCBONDSETHWADDR ||
+			    cmd == SIOCBONDSLAVEINFOQUERY ||
+			    cmd == SIOCBONDINFOQUERY ||
+			    cmd == SIOCBONDCHANGEACTIVE ||
+			    cmd == SIOCGMIIPHY ||
+			    cmd == SIOCGMIIREG ||
+			    cmd == SIOCSMIIREG ||
+			    cmd == SIOCBRADDIF ||
+			    cmd == SIOCBRDELIF ||
+			    cmd == SIOCWANDEV) {
+				err = -EOPNOTSUPP;
+				if (dev->do_ioctl) {
+					if (netif_device_present(dev))
+						err = dev->do_ioctl(dev, ifr,
+								    cmd);
+					else
+						err = -ENODEV;
+				}
+			} else
+				err = -EINVAL;
+
+	}
+	return err;
+}
+
+/*
+ *	This function handles all "interface"-type I/O control requests. The actual
+ *	'doing' part of this is dev_ifsioc above.
+ */
+
+/**
+ *	dev_ioctl	-	network device ioctl
+ *	@cmd: command to issue
+ *	@arg: pointer to a struct ifreq in user space
+ *
+ *	Issue ioctl functions to devices. This is normally called by the
+ *	user space syscall interfaces but can sometimes be useful for
+ *	other purposes. The return value is the return from the syscall if
+ *	positive or a negative errno code on error.
+ */
+
+int dev_ioctl(unsigned int cmd, void __user *arg)
+{
+	struct ifreq ifr;
+	int ret;
+	char *colon;
+
+	/* One special case: SIOCGIFCONF takes ifconf argument
+	   and requires shared lock, because it sleeps writing
+	   to user space.
+	 */
+
+	if (cmd == SIOCGIFCONF) {
+		rtnl_shlock();
+		ret = dev_ifconf((char __user *) arg);
+		rtnl_shunlock();
+		return ret;
+	}
+	if (cmd == SIOCGIFNAME)
+		return dev_ifname((struct ifreq __user *)arg);
+
+	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+		return -EFAULT;
+
+	ifr.ifr_name[IFNAMSIZ-1] = 0;
+
+	colon = strchr(ifr.ifr_name, ':');
+	if (colon)
+		*colon = 0;
+
+	/*
+	 *	See which interface the caller is talking about.
+	 */
+
+	switch (cmd) {
+		/*
+		 *	These ioctl calls:
+		 *	- can be done by all.
+		 *	- atomic and do not require locking.
+		 *	- return a value
+		 */
+		case SIOCGIFFLAGS:
+		case SIOCGIFMETRIC:
+		case SIOCGIFMTU:
+		case SIOCGIFHWADDR:
+		case SIOCGIFSLAVE:
+		case SIOCGIFMAP:
+		case SIOCGIFINDEX:
+		case SIOCGIFTXQLEN:
+			dev_load(ifr.ifr_name);
+			read_lock(&dev_base_lock);
+			ret = dev_ifsioc(&ifr, cmd);
+			read_unlock(&dev_base_lock);
+			if (!ret) {
+				if (colon)
+					*colon = ':';
+				if (copy_to_user(arg, &ifr,
+						 sizeof(struct ifreq)))
+					ret = -EFAULT;
+			}
+			return ret;
+
+		case SIOCETHTOOL:
+			dev_load(ifr.ifr_name);
+			rtnl_lock();
+			ret = dev_ethtool(&ifr);
+			rtnl_unlock();
+			if (!ret) {
+				if (colon)
+					*colon = ':';
+				if (copy_to_user(arg, &ifr,
+						 sizeof(struct ifreq)))
+					ret = -EFAULT;
+			}
+			return ret;
+
+		/*
+		 *	These ioctl calls:
+		 *	- require superuser power.
+		 *	- require strict serialization.
+		 *	- return a value
+		 */
+		case SIOCGMIIPHY:
+		case SIOCGMIIREG:
+		case SIOCSIFNAME:
+			if (!capable(CAP_NET_ADMIN))
+				return -EPERM;
+			dev_load(ifr.ifr_name);
+			rtnl_lock();
+			ret = dev_ifsioc(&ifr, cmd);
+			rtnl_unlock();
+			if (!ret) {
+				if (colon)
+					*colon = ':';
+				if (copy_to_user(arg, &ifr,
+						 sizeof(struct ifreq)))
+					ret = -EFAULT;
+			}
+			return ret;
+
+		/*
+		 *	These ioctl calls:
+		 *	- require superuser power.
+		 *	- require strict serialization.
+		 *	- do not return a value
+		 */
+		case SIOCSIFFLAGS:
+		case SIOCSIFMETRIC:
+		case SIOCSIFMTU:
+		case SIOCSIFMAP:
+		case SIOCSIFHWADDR:
+		case SIOCSIFSLAVE:
+		case SIOCADDMULTI:
+		case SIOCDELMULTI:
+		case SIOCSIFHWBROADCAST:
+		case SIOCSIFTXQLEN:
+		case SIOCSMIIREG:
+		case SIOCBONDENSLAVE:
+		case SIOCBONDRELEASE:
+		case SIOCBONDSETHWADDR:
+		case SIOCBONDSLAVEINFOQUERY:
+		case SIOCBONDINFOQUERY:
+		case SIOCBONDCHANGEACTIVE:
+		case SIOCBRADDIF:
+		case SIOCBRDELIF:
+			if (!capable(CAP_NET_ADMIN))
+				return -EPERM;
+			dev_load(ifr.ifr_name);
+			rtnl_lock();
+			ret = dev_ifsioc(&ifr, cmd);
+			rtnl_unlock();
+			return ret;
+
+		case SIOCGIFMEM:
+			/* Get the per device memory space. We can add this but
+			 * currently do not support it */
+		case SIOCSIFMEM:
+			/* Set the per device memory buffer space.
+			 * Not applicable in our case */
+		case SIOCSIFLINK:
+			return -EINVAL;
+
+		/*
+		 *	Unknown or private ioctl.
+		 */
+		default:
+			if (cmd == SIOCWANDEV ||
+			    (cmd >= SIOCDEVPRIVATE &&
+			     cmd <= SIOCDEVPRIVATE + 15)) {
+				dev_load(ifr.ifr_name);
+				rtnl_lock();
+				ret = dev_ifsioc(&ifr, cmd);
+				rtnl_unlock();
+				if (!ret && copy_to_user(arg, &ifr,
+							 sizeof(struct ifreq)))
+					ret = -EFAULT;
+				return ret;
+			}
+#ifdef WIRELESS_EXT
+			/* Take care of Wireless Extensions */
+			if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
+				/* If command is `set a parameter', or
+				 * `get the encoding parameters', check if
+				 * the user has the right to do it */
+				if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE) {
+					if (!capable(CAP_NET_ADMIN))
+						return -EPERM;
+				}
+				dev_load(ifr.ifr_name);
+				rtnl_lock();
+				/* Follow me in net/core/wireless.c */
+				ret = wireless_process_ioctl(&ifr, cmd);
+				rtnl_unlock();
+				if (IW_IS_GET(cmd) &&
+				    copy_to_user(arg, &ifr,
+					    	 sizeof(struct ifreq)))
+					ret = -EFAULT;
+				return ret;
+			}
+#endif	/* WIRELESS_EXT */
+			return -EINVAL;
+	}
+}
+
+
+/**
+ *	dev_new_index	-	allocate an ifindex
+ *
+ *	Returns a suitable unique value for a new device interface
+ *	number.  The caller must hold the rtnl semaphore or the
+ *	dev_base_lock to be sure it remains unique.
+ */
+static int dev_new_index(void)
+{
+	static int ifindex;
+	for (;;) {
+		if (++ifindex <= 0)
+			ifindex = 1;
+		if (!__dev_get_by_index(ifindex))
+			return ifindex;
+	}
+}
+
+static int dev_boot_phase = 1;
+
+/* Delayed registration/unregisteration */
+static DEFINE_SPINLOCK(net_todo_list_lock);
+static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
+
+static inline void net_set_todo(struct net_device *dev)
+{
+	spin_lock(&net_todo_list_lock);
+	list_add_tail(&dev->todo_list, &net_todo_list);
+	spin_unlock(&net_todo_list_lock);
+}
+
+/**
+ *	register_netdevice	- register a network device
+ *	@dev: device to register
+ *
+ *	Take a completed network device structure and add it to the kernel
+ *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
+ *	chain. 0 is returned on success. A negative errno code is returned
+ *	on a failure to set up the device, or if the name is a duplicate.
+ *
+ *	Callers must hold the rtnl semaphore. You may want
+ *	register_netdev() instead of this.
+ *
+ *	BUGS:
+ *	The locking appears insufficient to guarantee two parallel registers
+ *	will not get the same name.
+ */
+
+int register_netdevice(struct net_device *dev)
+{
+	struct hlist_head *head;
+	struct hlist_node *p;
+	int ret;
+
+	BUG_ON(dev_boot_phase);
+	ASSERT_RTNL();
+
+	/* When net_device's are persistent, this will be fatal. */
+	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
+
+	spin_lock_init(&dev->queue_lock);
+	spin_lock_init(&dev->xmit_lock);
+	dev->xmit_lock_owner = -1;
+#ifdef CONFIG_NET_CLS_ACT
+	spin_lock_init(&dev->ingress_lock);
+#endif
+
+	ret = alloc_divert_blk(dev);
+	if (ret)
+		goto out;
+
+	dev->iflink = -1;
+
+	/* Init, if this function is available */
+	if (dev->init) {
+		ret = dev->init(dev);
+		if (ret) {
+			if (ret > 0)
+				ret = -EIO;
+			goto out_err;
+		}
+	}
+ 
+	if (!dev_valid_name(dev->name)) {
+		ret = -EINVAL;
+		goto out_err;
+	}
+
+	dev->ifindex = dev_new_index();
+	if (dev->iflink == -1)
+		dev->iflink = dev->ifindex;
+
+	/* Check for existence of name */
+	head = dev_name_hash(dev->name);
+	hlist_for_each(p, head) {
+		struct net_device *d
+			= hlist_entry(p, struct net_device, name_hlist);
+		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
+			ret = -EEXIST;
+ 			goto out_err;
+		}
+ 	}
+
+	/* Fix illegal SG+CSUM combinations. */
+	if ((dev->features & NETIF_F_SG) &&
+	    !(dev->features & (NETIF_F_IP_CSUM |
+			       NETIF_F_NO_CSUM |
+			       NETIF_F_HW_CSUM))) {
+		printk("%s: Dropping NETIF_F_SG since no checksum feature.\n",
+		       dev->name);
+		dev->features &= ~NETIF_F_SG;
+	}
+
+	/* TSO requires that SG is present as well. */
+	if ((dev->features & NETIF_F_TSO) &&
+	    !(dev->features & NETIF_F_SG)) {
+		printk("%s: Dropping NETIF_F_TSO since no SG feature.\n",
+		       dev->name);
+		dev->features &= ~NETIF_F_TSO;
+	}
+
+	/*
+	 *	nil rebuild_header routine,
+	 *	that should be never called and used as just bug trap.
+	 */
+
+	if (!dev->rebuild_header)
+		dev->rebuild_header = default_rebuild_header;
+
+	/*
+	 *	Default initial state at registry is that the
+	 *	device is present.
+	 */
+
+	set_bit(__LINK_STATE_PRESENT, &dev->state);
+
+	dev->next = NULL;
+	dev_init_scheduler(dev);
+	write_lock_bh(&dev_base_lock);
+	*dev_tail = dev;
+	dev_tail = &dev->next;
+	hlist_add_head(&dev->name_hlist, head);
+	hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
+	dev_hold(dev);
+	dev->reg_state = NETREG_REGISTERING;
+	write_unlock_bh(&dev_base_lock);
+
+	/* Notify protocols, that a new device appeared. */
+	notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
+
+	/* Finish registration after unlock */
+	net_set_todo(dev);
+	ret = 0;
+
+out:
+	return ret;
+out_err:
+	free_divert_blk(dev);
+	goto out;
+}
+
+/**
+ *	register_netdev	- register a network device
+ *	@dev: device to register
+ *
+ *	Take a completed network device structure and add it to the kernel
+ *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
+ *	chain. 0 is returned on success. A negative errno code is returned
+ *	on a failure to set up the device, or if the name is a duplicate.
+ *
+ *	This is a wrapper around register_netdev that takes the rtnl semaphore
+ *	and expands the device name if you passed a format string to
+ *	alloc_netdev.
+ */
+int register_netdev(struct net_device *dev)
+{
+	int err;
+
+	rtnl_lock();
+
+	/*
+	 * If the name is a format string the caller wants us to do a
+	 * name allocation.
+	 */
+	if (strchr(dev->name, '%')) {
+		err = dev_alloc_name(dev, dev->name);
+		if (err < 0)
+			goto out;
+	}
+	
+	/*
+	 * Back compatibility hook. Kill this one in 2.5
+	 */
+	if (dev->name[0] == 0 || dev->name[0] == ' ') {
+		err = dev_alloc_name(dev, "eth%d");
+		if (err < 0)
+			goto out;
+	}
+
+	err = register_netdevice(dev);
+out:
+	rtnl_unlock();
+	return err;
+}
+EXPORT_SYMBOL(register_netdev);
+
+/*
+ * netdev_wait_allrefs - wait until all references are gone.
+ *
+ * This is called when unregistering network devices.
+ *
+ * Any protocol or device that holds a reference should register
+ * for netdevice notification, and cleanup and put back the
+ * reference if they receive an UNREGISTER event.
+ * We can get stuck here if buggy protocols don't correctly
+ * call dev_put. 
+ */
+static void netdev_wait_allrefs(struct net_device *dev)
+{
+	unsigned long rebroadcast_time, warning_time;
+
+	rebroadcast_time = warning_time = jiffies;
+	while (atomic_read(&dev->refcnt) != 0) {
+		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
+			rtnl_shlock();
+
+			/* Rebroadcast unregister notification */
+			notifier_call_chain(&netdev_chain,
+					    NETDEV_UNREGISTER, dev);
+
+			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
+				     &dev->state)) {
+				/* We must not have linkwatch events
+				 * pending on unregister. If this
+				 * happens, we simply run the queue
+				 * unscheduled, resulting in a noop
+				 * for this device.
+				 */
+				linkwatch_run_queue();
+			}
+
+			rtnl_shunlock();
+
+			rebroadcast_time = jiffies;
+		}
+
+		msleep(250);
+
+		if (time_after(jiffies, warning_time + 10 * HZ)) {
+			printk(KERN_EMERG "unregister_netdevice: "
+			       "waiting for %s to become free. Usage "
+			       "count = %d\n",
+			       dev->name, atomic_read(&dev->refcnt));
+			warning_time = jiffies;
+		}
+	}
+}
+
+/* The sequence is:
+ *
+ *	rtnl_lock();
+ *	...
+ *	register_netdevice(x1);
+ *	register_netdevice(x2);
+ *	...
+ *	unregister_netdevice(y1);
+ *	unregister_netdevice(y2);
+ *      ...
+ *	rtnl_unlock();
+ *	free_netdev(y1);
+ *	free_netdev(y2);
+ *
+ * We are invoked by rtnl_unlock() after it drops the semaphore.
+ * This allows us to deal with problems:
+ * 1) We can create/delete sysfs objects which invoke hotplug
+ *    without deadlocking with linkwatch via keventd.
+ * 2) Since we run with the RTNL semaphore not held, we can sleep
+ *    safely in order to wait for the netdev refcnt to drop to zero.
+ */
+static DECLARE_MUTEX(net_todo_run_mutex);
+void netdev_run_todo(void)
+{
+	struct list_head list = LIST_HEAD_INIT(list);
+	int err;
+
+
+	/* Need to guard against multiple cpu's getting out of order. */
+	down(&net_todo_run_mutex);
+
+	/* Not safe to do outside the semaphore.  We must not return
+	 * until all unregister events invoked by the local processor
+	 * have been completed (either by this todo run, or one on
+	 * another cpu).
+	 */
+	if (list_empty(&net_todo_list))
+		goto out;
+
+	/* Snapshot list, allow later requests */
+	spin_lock(&net_todo_list_lock);
+	list_splice_init(&net_todo_list, &list);
+	spin_unlock(&net_todo_list_lock);
+		
+	while (!list_empty(&list)) {
+		struct net_device *dev
+			= list_entry(list.next, struct net_device, todo_list);
+		list_del(&dev->todo_list);
+
+		switch(dev->reg_state) {
+		case NETREG_REGISTERING:
+			err = netdev_register_sysfs(dev);
+			if (err)
+				printk(KERN_ERR "%s: failed sysfs registration (%d)\n",
+				       dev->name, err);
+			dev->reg_state = NETREG_REGISTERED;
+			break;
+
+		case NETREG_UNREGISTERING:
+			netdev_unregister_sysfs(dev);
+			dev->reg_state = NETREG_UNREGISTERED;
+
+			netdev_wait_allrefs(dev);
+
+			/* paranoia */
+			BUG_ON(atomic_read(&dev->refcnt));
+			BUG_TRAP(!dev->ip_ptr);
+			BUG_TRAP(!dev->ip6_ptr);
+			BUG_TRAP(!dev->dn_ptr);
+
+
+			/* It must be the very last action, 
+			 * after this 'dev' may point to freed up memory.
+			 */
+			if (dev->destructor)
+				dev->destructor(dev);
+			break;
+
+		default:
+			printk(KERN_ERR "network todo '%s' but state %d\n",
+			       dev->name, dev->reg_state);
+			break;
+		}
+	}
+
+out:
+	up(&net_todo_run_mutex);
+}
+
+/**
+ *	alloc_netdev - allocate network device
+ *	@sizeof_priv:	size of private data to allocate space for
+ *	@name:		device name format string
+ *	@setup:		callback to initialize device
+ *
+ *	Allocates a struct net_device with private data area for driver use
+ *	and performs basic initialization.
+ */
+struct net_device *alloc_netdev(int sizeof_priv, const char *name,
+		void (*setup)(struct net_device *))
+{
+	void *p;
+	struct net_device *dev;
+	int alloc_size;
+
+	/* ensure 32-byte alignment of both the device and private area */
+	alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
+	alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
+
+	p = kmalloc(alloc_size, GFP_KERNEL);
+	if (!p) {
+		printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
+		return NULL;
+	}
+	memset(p, 0, alloc_size);
+
+	dev = (struct net_device *)
+		(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
+	dev->padded = (char *)dev - (char *)p;
+
+	if (sizeof_priv)
+		dev->priv = netdev_priv(dev);
+
+	setup(dev);
+	strcpy(dev->name, name);
+	return dev;
+}
+EXPORT_SYMBOL(alloc_netdev);
+
+/**
+ *	free_netdev - free network device
+ *	@dev: device
+ *
+ *	This function does the last stage of destroying an allocated device 
+ * 	interface. The reference to the device object is released.  
+ *	If this is the last reference then it will be freed.
+ */
+void free_netdev(struct net_device *dev)
+{
+#ifdef CONFIG_SYSFS
+	/*  Compatiablity with error handling in drivers */
+	if (dev->reg_state == NETREG_UNINITIALIZED) {
+		kfree((char *)dev - dev->padded);
+		return;
+	}
+
+	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
+	dev->reg_state = NETREG_RELEASED;
+
+	/* will free via class release */
+	class_device_put(&dev->class_dev);
+#else
+	kfree((char *)dev - dev->padded);
+#endif
+}
+ 
+/* Synchronize with packet receive processing. */
+void synchronize_net(void) 
+{
+	might_sleep();
+	synchronize_kernel();
+}
+
+/**
+ *	unregister_netdevice - remove device from the kernel
+ *	@dev: device
+ *
+ *	This function shuts down a device interface and removes it
+ *	from the kernel tables. On success 0 is returned, on a failure
+ *	a negative errno code is returned.
+ *
+ *	Callers must hold the rtnl semaphore.  You may want
+ *	unregister_netdev() instead of this.
+ */
+
+int unregister_netdevice(struct net_device *dev)
+{
+	struct net_device *d, **dp;
+
+	BUG_ON(dev_boot_phase);
+	ASSERT_RTNL();
+
+	/* Some devices call without registering for initialization unwind. */
+	if (dev->reg_state == NETREG_UNINITIALIZED) {
+		printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
+				  "was registered\n", dev->name, dev);
+		return -ENODEV;
+	}
+
+	BUG_ON(dev->reg_state != NETREG_REGISTERED);
+
+	/* If device is running, close it first. */
+	if (dev->flags & IFF_UP)
+		dev_close(dev);
+
+	/* And unlink it from device chain. */
+	for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) {
+		if (d == dev) {
+			write_lock_bh(&dev_base_lock);
+			hlist_del(&dev->name_hlist);
+			hlist_del(&dev->index_hlist);
+			if (dev_tail == &dev->next)
+				dev_tail = dp;
+			*dp = d->next;
+			write_unlock_bh(&dev_base_lock);
+			break;
+		}
+	}
+	if (!d) {
+		printk(KERN_ERR "unregister net_device: '%s' not found\n",
+		       dev->name);
+		return -ENODEV;
+	}
+
+	dev->reg_state = NETREG_UNREGISTERING;
+
+	synchronize_net();
+
+	/* Shutdown queueing discipline. */
+	dev_shutdown(dev);
+
+	
+	/* Notify protocols, that we are about to destroy
+	   this device. They should clean all the things.
+	*/
+	notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
+	
+	/*
+	 *	Flush the multicast chain
+	 */
+	dev_mc_discard(dev);
+
+	if (dev->uninit)
+		dev->uninit(dev);
+
+	/* Notifier chain MUST detach us from master device. */
+	BUG_TRAP(!dev->master);
+
+	free_divert_blk(dev);
+
+	/* Finish processing unregister after unlock */
+	net_set_todo(dev);
+
+	synchronize_net();
+
+	dev_put(dev);
+	return 0;
+}
+
+/**
+ *	unregister_netdev - remove device from the kernel
+ *	@dev: device
+ *
+ *	This function shuts down a device interface and removes it
+ *	from the kernel tables. On success 0 is returned, on a failure
+ *	a negative errno code is returned.
+ *
+ *	This is just a wrapper for unregister_netdevice that takes
+ *	the rtnl semaphore.  In general you want to use this and not
+ *	unregister_netdevice.
+ */
+void unregister_netdev(struct net_device *dev)
+{
+	rtnl_lock();
+	unregister_netdevice(dev);
+	rtnl_unlock();
+}
+
+EXPORT_SYMBOL(unregister_netdev);
+
+#ifdef CONFIG_HOTPLUG_CPU
+static int dev_cpu_callback(struct notifier_block *nfb,
+			    unsigned long action,
+			    void *ocpu)
+{
+	struct sk_buff **list_skb;
+	struct net_device **list_net;
+	struct sk_buff *skb;
+	unsigned int cpu, oldcpu = (unsigned long)ocpu;
+	struct softnet_data *sd, *oldsd;
+
+	if (action != CPU_DEAD)
+		return NOTIFY_OK;
+
+	local_irq_disable();
+	cpu = smp_processor_id();
+	sd = &per_cpu(softnet_data, cpu);
+	oldsd = &per_cpu(softnet_data, oldcpu);
+
+	/* Find end of our completion_queue. */
+	list_skb = &sd->completion_queue;
+	while (*list_skb)
+		list_skb = &(*list_skb)->next;
+	/* Append completion queue from offline CPU. */
+	*list_skb = oldsd->completion_queue;
+	oldsd->completion_queue = NULL;
+
+	/* Find end of our output_queue. */
+	list_net = &sd->output_queue;
+	while (*list_net)
+		list_net = &(*list_net)->next_sched;
+	/* Append output queue from offline CPU. */
+	*list_net = oldsd->output_queue;
+	oldsd->output_queue = NULL;
+
+	raise_softirq_irqoff(NET_TX_SOFTIRQ);
+	local_irq_enable();
+
+	/* Process offline CPU's input_pkt_queue */
+	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
+		netif_rx(skb);
+
+	return NOTIFY_OK;
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+
+/*
+ *	Initialize the DEV module. At boot time this walks the device list and
+ *	unhooks any devices that fail to initialise (normally hardware not
+ *	present) and leaves us with a valid list of present and active devices.
+ *
+ */
+
+/*
+ *       This is called single threaded during boot, so no need
+ *       to take the rtnl semaphore.
+ */
+static int __init net_dev_init(void)
+{
+	int i, rc = -ENOMEM;
+
+	BUG_ON(!dev_boot_phase);
+
+	net_random_init();
+
+	if (dev_proc_init())
+		goto out;
+
+	if (netdev_sysfs_init())
+		goto out;
+
+	INIT_LIST_HEAD(&ptype_all);
+	for (i = 0; i < 16; i++) 
+		INIT_LIST_HEAD(&ptype_base[i]);
+
+	for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
+		INIT_HLIST_HEAD(&dev_name_head[i]);
+
+	for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
+		INIT_HLIST_HEAD(&dev_index_head[i]);
+
+	/*
+	 *	Initialise the packet receive queues.
+	 */
+
+	for (i = 0; i < NR_CPUS; i++) {
+		struct softnet_data *queue;
+
+		queue = &per_cpu(softnet_data, i);
+		skb_queue_head_init(&queue->input_pkt_queue);
+		queue->throttle = 0;
+		queue->cng_level = 0;
+		queue->avg_blog = 10; /* arbitrary non-zero */
+		queue->completion_queue = NULL;
+		INIT_LIST_HEAD(&queue->poll_list);
+		set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
+		queue->backlog_dev.weight = weight_p;
+		queue->backlog_dev.poll = process_backlog;
+		atomic_set(&queue->backlog_dev.refcnt, 1);
+	}
+
+#ifdef OFFLINE_SAMPLE
+	samp_timer.expires = jiffies + (10 * HZ);
+	add_timer(&samp_timer);
+#endif
+
+	dev_boot_phase = 0;
+
+	open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
+	open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
+
+	hotcpu_notifier(dev_cpu_callback, 0);
+	dst_init();
+	dev_mcast_init();
+	rc = 0;
+out:
+	return rc;
+}
+
+subsys_initcall(net_dev_init);
+
+EXPORT_SYMBOL(__dev_get_by_index);
+EXPORT_SYMBOL(__dev_get_by_name);
+EXPORT_SYMBOL(__dev_remove_pack);
+EXPORT_SYMBOL(__skb_linearize);
+EXPORT_SYMBOL(dev_add_pack);
+EXPORT_SYMBOL(dev_alloc_name);
+EXPORT_SYMBOL(dev_close);
+EXPORT_SYMBOL(dev_get_by_flags);
+EXPORT_SYMBOL(dev_get_by_index);
+EXPORT_SYMBOL(dev_get_by_name);
+EXPORT_SYMBOL(dev_ioctl);
+EXPORT_SYMBOL(dev_open);
+EXPORT_SYMBOL(dev_queue_xmit);
+EXPORT_SYMBOL(dev_remove_pack);
+EXPORT_SYMBOL(dev_set_allmulti);
+EXPORT_SYMBOL(dev_set_promiscuity);
+EXPORT_SYMBOL(dev_change_flags);
+EXPORT_SYMBOL(dev_set_mtu);
+EXPORT_SYMBOL(dev_set_mac_address);
+EXPORT_SYMBOL(free_netdev);
+EXPORT_SYMBOL(netdev_boot_setup_check);
+EXPORT_SYMBOL(netdev_set_master);
+EXPORT_SYMBOL(netdev_state_change);
+EXPORT_SYMBOL(netif_receive_skb);
+EXPORT_SYMBOL(netif_rx);
+EXPORT_SYMBOL(register_gifconf);
+EXPORT_SYMBOL(register_netdevice);
+EXPORT_SYMBOL(register_netdevice_notifier);
+EXPORT_SYMBOL(skb_checksum_help);
+EXPORT_SYMBOL(synchronize_net);
+EXPORT_SYMBOL(unregister_netdevice);
+EXPORT_SYMBOL(unregister_netdevice_notifier);
+EXPORT_SYMBOL(net_enable_timestamp);
+EXPORT_SYMBOL(net_disable_timestamp);
+EXPORT_SYMBOL(dev_get_flags);
+
+#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
+EXPORT_SYMBOL(br_handle_frame_hook);
+EXPORT_SYMBOL(br_fdb_get_hook);
+EXPORT_SYMBOL(br_fdb_put_hook);
+#endif
+
+#ifdef CONFIG_KMOD
+EXPORT_SYMBOL(dev_load);
+#endif
+
+EXPORT_PER_CPU_SYMBOL(softnet_data);
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
new file mode 100644
index 00000000000..db098ff3cd6
--- /dev/null
+++ b/net/core/dev_mcast.c
@@ -0,0 +1,299 @@
+/*
+ *	Linux NET3:	Multicast List maintenance. 
+ *
+ *	Authors:
+ *		Tim Kordas <tjk@nostromo.eeap.cwru.edu> 
+ *		Richard Underwood <richard@wuzz.demon.co.uk>
+ *
+ *	Stir fried together from the IP multicast and CAP patches above
+ *		Alan Cox <Alan.Cox@linux.org>	
+ *
+ *	Fixes:
+ *		Alan Cox	:	Update the device on a real delete
+ *					rather than any time but...
+ *		Alan Cox	:	IFF_ALLMULTI support.
+ *		Alan Cox	: 	New format set_multicast_list() calls.
+ *		Gleb Natapov    :       Remove dev_mc_lock.
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h> 
+#include <linux/module.h> 
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/arp.h>
+
+
+/*
+ *	Device multicast list maintenance. 
+ *
+ *	This is used both by IP and by the user level maintenance functions. 
+ *	Unlike BSD we maintain a usage count on a given multicast address so 
+ *	that a casual user application can add/delete multicasts used by 
+ *	protocols without doing damage to the protocols when it deletes the
+ *	entries. It also helps IP as it tracks overlapping maps.
+ *
+ *	Device mc lists are changed by bh at least if IPv6 is enabled,
+ *	so that it must be bh protected.
+ *
+ *	We block accesses to device mc filters with dev->xmit_lock.
+ */
+
+/*
+ *	Update the multicast list into the physical NIC controller.
+ */
+ 
+static void __dev_mc_upload(struct net_device *dev)
+{
+	/* Don't do anything till we up the interface
+	 * [dev_open will call this function so the list will
+	 * stay sane]
+	 */
+
+	if (!(dev->flags&IFF_UP))
+		return;
+
+	/*
+	 *	Devices with no set multicast or which have been
+	 *	detached don't get set.
+	 */
+
+	if (dev->set_multicast_list == NULL ||
+	    !netif_device_present(dev))
+		return;
+
+	dev->set_multicast_list(dev);
+}
+
+void dev_mc_upload(struct net_device *dev)
+{
+	spin_lock_bh(&dev->xmit_lock);
+	__dev_mc_upload(dev);
+	spin_unlock_bh(&dev->xmit_lock);
+}
+
+/*
+ *	Delete a device level multicast
+ */
+ 
+int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
+{
+	int err = 0;
+	struct dev_mc_list *dmi, **dmip;
+
+	spin_lock_bh(&dev->xmit_lock);
+
+	for (dmip = &dev->mc_list; (dmi = *dmip) != NULL; dmip = &dmi->next) {
+		/*
+		 *	Find the entry we want to delete. The device could
+		 *	have variable length entries so check these too.
+		 */
+		if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 &&
+		    alen == dmi->dmi_addrlen) {
+			if (glbl) {
+				int old_glbl = dmi->dmi_gusers;
+				dmi->dmi_gusers = 0;
+				if (old_glbl == 0)
+					break;
+			}
+			if (--dmi->dmi_users)
+				goto done;
+
+			/*
+			 *	Last user. So delete the entry.
+			 */
+			*dmip = dmi->next;
+			dev->mc_count--;
+
+			kfree(dmi);
+
+			/*
+			 *	We have altered the list, so the card
+			 *	loaded filter is now wrong. Fix it
+			 */
+			__dev_mc_upload(dev);
+			
+			spin_unlock_bh(&dev->xmit_lock);
+			return 0;
+		}
+	}
+	err = -ENOENT;
+done:
+	spin_unlock_bh(&dev->xmit_lock);
+	return err;
+}
+
+/*
+ *	Add a device level multicast
+ */
+ 
+int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
+{
+	int err = 0;
+	struct dev_mc_list *dmi, *dmi1;
+
+	dmi1 = (struct dev_mc_list *)kmalloc(sizeof(*dmi), GFP_ATOMIC);
+
+	spin_lock_bh(&dev->xmit_lock);
+	for (dmi = dev->mc_list; dmi != NULL; dmi = dmi->next) {
+		if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 &&
+		    dmi->dmi_addrlen == alen) {
+			if (glbl) {
+				int old_glbl = dmi->dmi_gusers;
+				dmi->dmi_gusers = 1;
+				if (old_glbl)
+					goto done;
+			}
+			dmi->dmi_users++;
+			goto done;
+		}
+	}
+
+	if ((dmi = dmi1) == NULL) {
+		spin_unlock_bh(&dev->xmit_lock);
+		return -ENOMEM;
+	}
+	memcpy(dmi->dmi_addr, addr, alen);
+	dmi->dmi_addrlen = alen;
+	dmi->next = dev->mc_list;
+	dmi->dmi_users = 1;
+	dmi->dmi_gusers = glbl ? 1 : 0;
+	dev->mc_list = dmi;
+	dev->mc_count++;
+
+	__dev_mc_upload(dev);
+	
+	spin_unlock_bh(&dev->xmit_lock);
+	return 0;
+
+done:
+	spin_unlock_bh(&dev->xmit_lock);
+	if (dmi1)
+		kfree(dmi1);
+	return err;
+}
+
+/*
+ *	Discard multicast list when a device is downed
+ */
+
+void dev_mc_discard(struct net_device *dev)
+{
+	spin_lock_bh(&dev->xmit_lock);
+	
+	while (dev->mc_list != NULL) {
+		struct dev_mc_list *tmp = dev->mc_list;
+		dev->mc_list = tmp->next;
+		if (tmp->dmi_users > tmp->dmi_gusers)
+			printk("dev_mc_discard: multicast leakage! dmi_users=%d\n", tmp->dmi_users);
+		kfree(tmp);
+	}
+	dev->mc_count = 0;
+
+	spin_unlock_bh(&dev->xmit_lock);
+}
+
+#ifdef CONFIG_PROC_FS
+static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct net_device *dev;
+	loff_t off = 0;
+
+	read_lock(&dev_base_lock);
+	for (dev = dev_base; dev; dev = dev->next) {
+		if (off++ == *pos) 
+			return dev;
+	}
+	return NULL;
+}
+
+static void *dev_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct net_device *dev = v;
+	++*pos;
+	return dev->next;
+}
+
+static void dev_mc_seq_stop(struct seq_file *seq, void *v)
+{
+	read_unlock(&dev_base_lock);
+}
+
+
+static int dev_mc_seq_show(struct seq_file *seq, void *v)
+{
+	struct dev_mc_list *m;
+	struct net_device *dev = v;
+
+	spin_lock_bh(&dev->xmit_lock);
+	for (m = dev->mc_list; m; m = m->next) {
+		int i;
+
+		seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
+			   dev->name, m->dmi_users, m->dmi_gusers);
+
+		for (i = 0; i < m->dmi_addrlen; i++)
+			seq_printf(seq, "%02x", m->dmi_addr[i]);
+
+		seq_putc(seq, '\n');
+	}
+	spin_unlock_bh(&dev->xmit_lock);
+	return 0;
+}
+
+static struct seq_operations dev_mc_seq_ops = {
+	.start = dev_mc_seq_start,
+	.next  = dev_mc_seq_next,
+	.stop  = dev_mc_seq_stop,
+	.show  = dev_mc_seq_show,
+};
+
+static int dev_mc_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &dev_mc_seq_ops);
+}
+
+static struct file_operations dev_mc_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = dev_mc_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+};
+
+#endif
+
+void __init dev_mcast_init(void)
+{
+	proc_net_fops_create("dev_mcast", 0, &dev_mc_seq_fops);
+}
+
+EXPORT_SYMBOL(dev_mc_add);
+EXPORT_SYMBOL(dev_mc_delete);
+EXPORT_SYMBOL(dev_mc_upload);
diff --git a/net/core/dst.c b/net/core/dst.c
new file mode 100644
index 00000000000..3bf6cc43481
--- /dev/null
+++ b/net/core/dst.c
@@ -0,0 +1,276 @@
+/*
+ * net/core/dst.c	Protocol independent destination cache.
+ *
+ * Authors:		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <linux/bitops.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/sched.h>
+#include <linux/skbuff.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include <net/dst.h>
+
+/* Locking strategy:
+ * 1) Garbage collection state of dead destination cache
+ *    entries is protected by dst_lock.
+ * 2) GC is run only from BH context, and is the only remover
+ *    of entries.
+ * 3) Entries are added to the garbage list from both BH
+ *    and non-BH context, so local BH disabling is needed.
+ * 4) All operations modify state, so a spinlock is used.
+ */
+static struct dst_entry 	*dst_garbage_list;
+#if RT_CACHE_DEBUG >= 2 
+static atomic_t			 dst_total = ATOMIC_INIT(0);
+#endif
+static DEFINE_SPINLOCK(dst_lock);
+
+static unsigned long dst_gc_timer_expires;
+static unsigned long dst_gc_timer_inc = DST_GC_MAX;
+static void dst_run_gc(unsigned long);
+static void ___dst_free(struct dst_entry * dst);
+
+static struct timer_list dst_gc_timer =
+	TIMER_INITIALIZER(dst_run_gc, DST_GC_MIN, 0);
+
+static void dst_run_gc(unsigned long dummy)
+{
+	int    delayed = 0;
+	struct dst_entry * dst, **dstp;
+
+	if (!spin_trylock(&dst_lock)) {
+		mod_timer(&dst_gc_timer, jiffies + HZ/10);
+		return;
+	}
+
+
+	del_timer(&dst_gc_timer);
+	dstp = &dst_garbage_list;
+	while ((dst = *dstp) != NULL) {
+		if (atomic_read(&dst->__refcnt)) {
+			dstp = &dst->next;
+			delayed++;
+			continue;
+		}
+		*dstp = dst->next;
+
+		dst = dst_destroy(dst);
+		if (dst) {
+			/* NOHASH and still referenced. Unless it is already
+			 * on gc list, invalidate it and add to gc list.
+			 *
+			 * Note: this is temporary. Actually, NOHASH dst's
+			 * must be obsoleted when parent is obsoleted.
+			 * But we do not have state "obsoleted, but
+			 * referenced by parent", so it is right.
+			 */
+			if (dst->obsolete > 1)
+				continue;
+
+			___dst_free(dst);
+			dst->next = *dstp;
+			*dstp = dst;
+			dstp = &dst->next;
+		}
+	}
+	if (!dst_garbage_list) {
+		dst_gc_timer_inc = DST_GC_MAX;
+		goto out;
+	}
+	if ((dst_gc_timer_expires += dst_gc_timer_inc) > DST_GC_MAX)
+		dst_gc_timer_expires = DST_GC_MAX;
+	dst_gc_timer_inc += DST_GC_INC;
+	dst_gc_timer.expires = jiffies + dst_gc_timer_expires;
+#if RT_CACHE_DEBUG >= 2
+	printk("dst_total: %d/%d %ld\n",
+	       atomic_read(&dst_total), delayed,  dst_gc_timer_expires);
+#endif
+	add_timer(&dst_gc_timer);
+
+out:
+	spin_unlock(&dst_lock);
+}
+
+static int dst_discard_in(struct sk_buff *skb)
+{
+	kfree_skb(skb);
+	return 0;
+}
+
+static int dst_discard_out(struct sk_buff *skb)
+{
+	kfree_skb(skb);
+	return 0;
+}
+
+void * dst_alloc(struct dst_ops * ops)
+{
+	struct dst_entry * dst;
+
+	if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) {
+		if (ops->gc())
+			return NULL;
+	}
+	dst = kmem_cache_alloc(ops->kmem_cachep, SLAB_ATOMIC);
+	if (!dst)
+		return NULL;
+	memset(dst, 0, ops->entry_size);
+	atomic_set(&dst->__refcnt, 0);
+	dst->ops = ops;
+	dst->lastuse = jiffies;
+	dst->path = dst;
+	dst->input = dst_discard_in;
+	dst->output = dst_discard_out;
+#if RT_CACHE_DEBUG >= 2 
+	atomic_inc(&dst_total);
+#endif
+	atomic_inc(&ops->entries);
+	return dst;
+}
+
+static void ___dst_free(struct dst_entry * dst)
+{
+	/* The first case (dev==NULL) is required, when
+	   protocol module is unloaded.
+	 */
+	if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) {
+		dst->input = dst_discard_in;
+		dst->output = dst_discard_out;
+	}
+	dst->obsolete = 2;
+}
+
+void __dst_free(struct dst_entry * dst)
+{
+	spin_lock_bh(&dst_lock);
+	___dst_free(dst);
+	dst->next = dst_garbage_list;
+	dst_garbage_list = dst;
+	if (dst_gc_timer_inc > DST_GC_INC) {
+		dst_gc_timer_inc = DST_GC_INC;
+		dst_gc_timer_expires = DST_GC_MIN;
+		mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires);
+	}
+	spin_unlock_bh(&dst_lock);
+}
+
+struct dst_entry *dst_destroy(struct dst_entry * dst)
+{
+	struct dst_entry *child;
+	struct neighbour *neigh;
+	struct hh_cache *hh;
+
+	smp_rmb();
+
+again:
+	neigh = dst->neighbour;
+	hh = dst->hh;
+	child = dst->child;
+
+	dst->hh = NULL;
+	if (hh && atomic_dec_and_test(&hh->hh_refcnt))
+		kfree(hh);
+
+	if (neigh) {
+		dst->neighbour = NULL;
+		neigh_release(neigh);
+	}
+
+	atomic_dec(&dst->ops->entries);
+
+	if (dst->ops->destroy)
+		dst->ops->destroy(dst);
+	if (dst->dev)
+		dev_put(dst->dev);
+#if RT_CACHE_DEBUG >= 2 
+	atomic_dec(&dst_total);
+#endif
+	kmem_cache_free(dst->ops->kmem_cachep, dst);
+
+	dst = child;
+	if (dst) {
+		if (atomic_dec_and_test(&dst->__refcnt)) {
+			/* We were real parent of this dst, so kill child. */
+			if (dst->flags&DST_NOHASH)
+				goto again;
+		} else {
+			/* Child is still referenced, return it for freeing. */
+			if (dst->flags&DST_NOHASH)
+				return dst;
+			/* Child is still in his hash table */
+		}
+	}
+	return NULL;
+}
+
+/* Dirty hack. We did it in 2.2 (in __dst_free),
+ * we have _very_ good reasons not to repeat
+ * this mistake in 2.3, but we have no choice
+ * now. _It_ _is_ _explicit_ _deliberate_
+ * _race_ _condition_.
+ *
+ * Commented and originally written by Alexey.
+ */
+static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
+			      int unregister)
+{
+	if (dst->ops->ifdown)
+		dst->ops->ifdown(dst, dev, unregister);
+
+	if (dev != dst->dev)
+		return;
+
+	if (!unregister) {
+		dst->input = dst_discard_in;
+		dst->output = dst_discard_out;
+	} else {
+		dst->dev = &loopback_dev;
+		dev_hold(&loopback_dev);
+		dev_put(dev);
+		if (dst->neighbour && dst->neighbour->dev == dev) {
+			dst->neighbour->dev = &loopback_dev;
+			dev_put(dev);
+			dev_hold(&loopback_dev);
+		}
+	}
+}
+
+static int dst_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	struct net_device *dev = ptr;
+	struct dst_entry *dst;
+
+	switch (event) {
+	case NETDEV_UNREGISTER:
+	case NETDEV_DOWN:
+		spin_lock_bh(&dst_lock);
+		for (dst = dst_garbage_list; dst; dst = dst->next) {
+			dst_ifdown(dst, dev, event != NETDEV_DOWN);
+		}
+		spin_unlock_bh(&dst_lock);
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block dst_dev_notifier = {
+	.notifier_call	= dst_dev_event,
+};
+
+void __init dst_init(void)
+{
+	register_netdevice_notifier(&dst_dev_notifier);
+}
+
+EXPORT_SYMBOL(__dst_free);
+EXPORT_SYMBOL(dst_alloc);
+EXPORT_SYMBOL(dst_destroy);
diff --git a/net/core/dv.c b/net/core/dv.c
new file mode 100644
index 00000000000..3f25f4aa4e6
--- /dev/null
+++ b/net/core/dv.c
@@ -0,0 +1,548 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Generic frame diversion
+ *
+ * Authors:	
+ * 		Benoit LOCHER:	initial integration within the kernel with support for ethernet
+ * 		Dave Miller:	improvement on the code (correctness, performance and source files)
+ *
+ */
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <net/dst.h>
+#include <net/arp.h>
+#include <net/sock.h>
+#include <net/ipv6.h>
+#include <net/ip.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/checksum.h>
+#include <linux/divert.h>
+#include <linux/sockios.h>
+
+const char sysctl_divert_version[32]="0.46";	/* Current version */
+
+static int __init dv_init(void)
+{
+	return 0;
+}
+module_init(dv_init);
+
+/*
+ * Allocate a divert_blk for a device. This must be an ethernet nic.
+ */
+int alloc_divert_blk(struct net_device *dev)
+{
+	int alloc_size = (sizeof(struct divert_blk) + 3) & ~3;
+
+	dev->divert = NULL;
+	if (dev->type == ARPHRD_ETHER) {
+		dev->divert = (struct divert_blk *)
+			kmalloc(alloc_size, GFP_KERNEL);
+		if (dev->divert == NULL) {
+			printk(KERN_INFO "divert: unable to allocate divert_blk for %s\n",
+			       dev->name);
+			return -ENOMEM;
+		}
+
+		memset(dev->divert, 0, sizeof(struct divert_blk));
+		dev_hold(dev);
+	}
+
+	return 0;
+} 
+
+/*
+ * Free a divert_blk allocated by the above function, if it was 
+ * allocated on that device.
+ */
+void free_divert_blk(struct net_device *dev)
+{
+	if (dev->divert) {
+		kfree(dev->divert);
+		dev->divert=NULL;
+		dev_put(dev);
+	}
+}
+
+/*
+ * Adds a tcp/udp (source or dest) port to an array
+ */
+static int add_port(u16 ports[], u16 port)
+{
+	int i;
+
+	if (port == 0)
+		return -EINVAL;
+
+	/* Storing directly in network format for performance,
+	 * thanks Dave :)
+	 */
+	port = htons(port);
+
+	for (i = 0; i < MAX_DIVERT_PORTS; i++) {
+		if (ports[i] == port)
+			return -EALREADY;
+	}
+	
+	for (i = 0; i < MAX_DIVERT_PORTS; i++) {
+		if (ports[i] == 0) {
+			ports[i] = port;
+			return 0;
+		}
+	}
+
+	return -ENOBUFS;
+}
+
+/*
+ * Removes a port from an array tcp/udp (source or dest)
+ */
+static int remove_port(u16 ports[], u16 port)
+{
+	int i;
+
+	if (port == 0)
+		return -EINVAL;
+	
+	/* Storing directly in network format for performance,
+	 * thanks Dave !
+	 */
+	port = htons(port);
+
+	for (i = 0; i < MAX_DIVERT_PORTS; i++) {
+		if (ports[i] == port) {
+			ports[i] = 0;
+			return 0;
+		}
+	}
+
+	return -EINVAL;
+}
+
+/* Some basic sanity checks on the arguments passed to divert_ioctl() */
+static int check_args(struct divert_cf *div_cf, struct net_device **dev)
+{
+	char devname[32];
+	int ret;
+
+	if (dev == NULL)
+		return -EFAULT;
+	
+	/* GETVERSION: all other args are unused */
+	if (div_cf->cmd == DIVCMD_GETVERSION)
+		return 0;
+	
+	/* Network device index should reasonably be between 0 and 1000 :) */
+	if (div_cf->dev_index < 0 || div_cf->dev_index > 1000) 
+		return -EINVAL;
+			
+	/* Let's try to find the ifname */
+	sprintf(devname, "eth%d", div_cf->dev_index);
+	*dev = dev_get_by_name(devname);
+	
+	/* dev should NOT be null */
+	if (*dev == NULL)
+		return -EINVAL;
+
+	ret = 0;
+
+	/* user issuing the ioctl must be a super one :) */
+	if (!capable(CAP_SYS_ADMIN)) {
+		ret = -EPERM;
+		goto out;
+	}
+
+	/* Device must have a divert_blk member NOT null */
+	if ((*dev)->divert == NULL)
+		ret = -EINVAL;
+out:
+	dev_put(*dev);
+	return ret;
+}
+
+/*
+ * control function of the diverter
+ */
+#if 0
+#define	DVDBG(a)	\
+	printk(KERN_DEBUG "divert_ioctl() line %d %s\n", __LINE__, (a))
+#else
+#define	DVDBG(a)
+#endif
+
+int divert_ioctl(unsigned int cmd, struct divert_cf __user *arg)
+{
+	struct divert_cf	div_cf;
+	struct divert_blk	*div_blk;
+	struct net_device	*dev;
+	int			ret;
+
+	switch (cmd) {
+	case SIOCGIFDIVERT:
+		DVDBG("SIOCGIFDIVERT, copy_from_user");
+		if (copy_from_user(&div_cf, arg, sizeof(struct divert_cf)))
+			return -EFAULT;
+		DVDBG("before check_args");
+		ret = check_args(&div_cf, &dev);
+		if (ret)
+			return ret;
+		DVDBG("after checkargs");
+		div_blk = dev->divert;
+			
+		DVDBG("befre switch()");
+		switch (div_cf.cmd) {
+		case DIVCMD_GETSTATUS:
+			/* Now, just give the user the raw divert block
+			 * for him to play with :)
+			 */
+			if (copy_to_user(div_cf.arg1.ptr, dev->divert,
+					 sizeof(struct divert_blk)))
+				return -EFAULT;
+			break;
+
+		case DIVCMD_GETVERSION:
+			DVDBG("GETVERSION: checking ptr");
+			if (div_cf.arg1.ptr == NULL)
+				return -EINVAL;
+			DVDBG("GETVERSION: copying data to userland");
+			if (copy_to_user(div_cf.arg1.ptr,
+					 sysctl_divert_version, 32))
+				return -EFAULT;
+			DVDBG("GETVERSION: data copied");
+			break;
+
+		default:
+			return -EINVAL;
+		}
+
+		break;
+
+	case SIOCSIFDIVERT:
+		if (copy_from_user(&div_cf, arg, sizeof(struct divert_cf)))
+			return -EFAULT;
+
+		ret = check_args(&div_cf, &dev);
+		if (ret)
+			return ret;
+
+		div_blk = dev->divert;
+
+		switch(div_cf.cmd) {
+		case DIVCMD_RESET:
+			div_blk->divert = 0;
+			div_blk->protos = DIVERT_PROTO_NONE;
+			memset(div_blk->tcp_dst, 0,
+			       MAX_DIVERT_PORTS * sizeof(u16));
+			memset(div_blk->tcp_src, 0,
+			       MAX_DIVERT_PORTS * sizeof(u16));
+			memset(div_blk->udp_dst, 0,
+			       MAX_DIVERT_PORTS * sizeof(u16));
+			memset(div_blk->udp_src, 0,
+			       MAX_DIVERT_PORTS * sizeof(u16));
+			return 0;
+				
+		case DIVCMD_DIVERT:
+			switch(div_cf.arg1.int32) {
+			case DIVARG1_ENABLE:
+				if (div_blk->divert)
+					return -EALREADY;
+				div_blk->divert = 1;
+				break;
+
+			case DIVARG1_DISABLE:
+				if (!div_blk->divert)
+					return -EALREADY;
+				div_blk->divert = 0;
+				break;
+
+			default:
+				return -EINVAL;
+			}
+
+			break;
+
+		case DIVCMD_IP:
+			switch(div_cf.arg1.int32) {
+			case DIVARG1_ENABLE:
+				if (div_blk->protos & DIVERT_PROTO_IP)
+					return -EALREADY;
+				div_blk->protos |= DIVERT_PROTO_IP;
+				break;
+
+			case DIVARG1_DISABLE:
+				if (!(div_blk->protos & DIVERT_PROTO_IP))
+					return -EALREADY;
+				div_blk->protos &= ~DIVERT_PROTO_IP;
+				break;
+
+			default:
+				return -EINVAL;
+			}
+
+			break;
+
+		case DIVCMD_TCP:
+			switch(div_cf.arg1.int32) {
+			case DIVARG1_ENABLE:
+				if (div_blk->protos & DIVERT_PROTO_TCP)
+					return -EALREADY;
+				div_blk->protos |= DIVERT_PROTO_TCP;
+				break;
+
+			case DIVARG1_DISABLE:
+				if (!(div_blk->protos & DIVERT_PROTO_TCP))
+					return -EALREADY;
+				div_blk->protos &= ~DIVERT_PROTO_TCP;
+				break;
+
+			default:
+				return -EINVAL;
+			}
+
+			break;
+
+		case DIVCMD_TCPDST:
+			switch(div_cf.arg1.int32) {
+			case DIVARG1_ADD:
+				return add_port(div_blk->tcp_dst,
+						div_cf.arg2.uint16);
+				
+			case DIVARG1_REMOVE:
+				return remove_port(div_blk->tcp_dst,
+						   div_cf.arg2.uint16);
+
+			default:
+				return -EINVAL;
+			}
+
+			break;
+
+		case DIVCMD_TCPSRC:
+			switch(div_cf.arg1.int32) {
+			case DIVARG1_ADD:
+				return add_port(div_blk->tcp_src,
+						div_cf.arg2.uint16);
+
+			case DIVARG1_REMOVE:
+				return remove_port(div_blk->tcp_src,
+						   div_cf.arg2.uint16);
+
+			default:
+				return -EINVAL;
+			}
+
+			break;
+
+		case DIVCMD_UDP:
+			switch(div_cf.arg1.int32) {
+			case DIVARG1_ENABLE:
+				if (div_blk->protos & DIVERT_PROTO_UDP)
+					return -EALREADY;
+				div_blk->protos |= DIVERT_PROTO_UDP;
+				break;
+
+			case DIVARG1_DISABLE:
+				if (!(div_blk->protos & DIVERT_PROTO_UDP))
+					return -EALREADY;
+				div_blk->protos &= ~DIVERT_PROTO_UDP;
+				break;
+
+			default:
+				return -EINVAL;
+			}
+
+			break;
+
+		case DIVCMD_UDPDST:
+			switch(div_cf.arg1.int32) {
+			case DIVARG1_ADD:
+				return add_port(div_blk->udp_dst,
+						div_cf.arg2.uint16);
+
+			case DIVARG1_REMOVE:
+				return remove_port(div_blk->udp_dst,
+						   div_cf.arg2.uint16);
+
+			default:
+				return -EINVAL;
+			}
+
+			break;
+
+		case DIVCMD_UDPSRC:
+			switch(div_cf.arg1.int32) {
+			case DIVARG1_ADD:
+				return add_port(div_blk->udp_src,
+						div_cf.arg2.uint16);
+
+			case DIVARG1_REMOVE:
+				return remove_port(div_blk->udp_src,
+						   div_cf.arg2.uint16);
+
+			default:
+				return -EINVAL;
+			}
+
+			break;
+
+		case DIVCMD_ICMP:
+			switch(div_cf.arg1.int32) {
+			case DIVARG1_ENABLE:
+				if (div_blk->protos & DIVERT_PROTO_ICMP)
+					return -EALREADY;
+				div_blk->protos |= DIVERT_PROTO_ICMP;
+				break;
+
+			case DIVARG1_DISABLE:
+				if (!(div_blk->protos & DIVERT_PROTO_ICMP))
+					return -EALREADY;
+				div_blk->protos &= ~DIVERT_PROTO_ICMP;
+				break;
+
+			default:
+				return -EINVAL;
+			}
+
+			break;
+
+		default:
+			return -EINVAL;
+		}
+
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+
+/*
+ * Check if packet should have its dest mac address set to the box itself
+ * for diversion
+ */
+
+#define	ETH_DIVERT_FRAME(skb) \
+	memcpy(eth_hdr(skb), skb->dev->dev_addr, ETH_ALEN); \
+	skb->pkt_type=PACKET_HOST
+		
+void divert_frame(struct sk_buff *skb)
+{
+	struct ethhdr			*eth = eth_hdr(skb);
+	struct iphdr			*iph;
+	struct tcphdr			*tcph;
+	struct udphdr			*udph;
+	struct divert_blk		*divert = skb->dev->divert;
+	int				i, src, dst;
+	unsigned char			*skb_data_end = skb->data + skb->len;
+
+	/* Packet is already aimed at us, return */
+	if (!memcmp(eth, skb->dev->dev_addr, ETH_ALEN))
+		return;
+	
+	/* proto is not IP, do nothing */
+	if (eth->h_proto != htons(ETH_P_IP))
+		return;
+	
+	/* Divert all IP frames ? */
+	if (divert->protos & DIVERT_PROTO_IP) {
+		ETH_DIVERT_FRAME(skb);
+		return;
+	}
+	
+	/* Check for possible (maliciously) malformed IP frame (thanks Dave) */
+	iph = (struct iphdr *) skb->data;
+	if (((iph->ihl<<2)+(unsigned char*)(iph)) >= skb_data_end) {
+		printk(KERN_INFO "divert: malformed IP packet !\n");
+		return;
+	}
+
+	switch (iph->protocol) {
+	/* Divert all ICMP frames ? */
+	case IPPROTO_ICMP:
+		if (divert->protos & DIVERT_PROTO_ICMP) {
+			ETH_DIVERT_FRAME(skb);
+			return;
+		}
+		break;
+
+	/* Divert all TCP frames ? */
+	case IPPROTO_TCP:
+		if (divert->protos & DIVERT_PROTO_TCP) {
+			ETH_DIVERT_FRAME(skb);
+			return;
+		}
+
+		/* Check for possible (maliciously) malformed IP
+		 * frame (thanx Dave)
+		 */
+		tcph = (struct tcphdr *)
+			(((unsigned char *)iph) + (iph->ihl<<2));
+		if (((unsigned char *)(tcph+1)) >= skb_data_end) {
+			printk(KERN_INFO "divert: malformed TCP packet !\n");
+			return;
+		}
+
+		/* Divert some tcp dst/src ports only ?*/
+		for (i = 0; i < MAX_DIVERT_PORTS; i++) {
+			dst = divert->tcp_dst[i];
+			src = divert->tcp_src[i];
+			if ((dst && dst == tcph->dest) ||
+			    (src && src == tcph->source)) {
+				ETH_DIVERT_FRAME(skb);
+				return;
+			}
+		}
+		break;
+
+	/* Divert all UDP frames ? */
+	case IPPROTO_UDP:
+		if (divert->protos & DIVERT_PROTO_UDP) {
+			ETH_DIVERT_FRAME(skb);
+			return;
+		}
+
+		/* Check for possible (maliciously) malformed IP
+		 * packet (thanks Dave)
+		 */
+		udph = (struct udphdr *)
+			(((unsigned char *)iph) + (iph->ihl<<2));
+		if (((unsigned char *)(udph+1)) >= skb_data_end) {
+			printk(KERN_INFO
+			       "divert: malformed UDP packet !\n");
+			return;
+		}
+
+		/* Divert some udp dst/src ports only ? */
+		for (i = 0; i < MAX_DIVERT_PORTS; i++) {
+			dst = divert->udp_dst[i];
+			src = divert->udp_src[i];
+			if ((dst && dst == udph->dest) ||
+			    (src && src == udph->source)) {
+				ETH_DIVERT_FRAME(skb);
+				return;
+			}
+		}
+		break;
+	}
+}
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
new file mode 100644
index 00000000000..f05fde97c43
--- /dev/null
+++ b/net/core/ethtool.c
@@ -0,0 +1,819 @@
+/*
+ * net/core/ethtool.c - Ethtool ioctl handler
+ * Copyright (c) 2003 Matthew Wilcox <matthew@wil.cx>
+ *
+ * This file is where we call all the ethtool_ops commands to get
+ * the information ethtool needs.  We fall back to calling do_ioctl()
+ * for drivers which haven't been converted to ethtool_ops yet.
+ *
+ * It's GPL, stupid.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/ethtool.h>
+#include <linux/netdevice.h>
+#include <asm/uaccess.h>
+
+/* 
+ * Some useful ethtool_ops methods that're device independent.
+ * If we find that all drivers want to do the same thing here,
+ * we can turn these into dev_() function calls.
+ */
+
+u32 ethtool_op_get_link(struct net_device *dev)
+{
+	return netif_carrier_ok(dev) ? 1 : 0;
+}
+
+u32 ethtool_op_get_tx_csum(struct net_device *dev)
+{
+	return (dev->features & NETIF_F_IP_CSUM) != 0;
+}
+
+int ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
+{
+	if (data)
+		dev->features |= NETIF_F_IP_CSUM;
+	else
+		dev->features &= ~NETIF_F_IP_CSUM;
+
+	return 0;
+}
+
+u32 ethtool_op_get_sg(struct net_device *dev)
+{
+	return (dev->features & NETIF_F_SG) != 0;
+}
+
+int ethtool_op_set_sg(struct net_device *dev, u32 data)
+{
+	if (data)
+		dev->features |= NETIF_F_SG;
+	else
+		dev->features &= ~NETIF_F_SG;
+
+	return 0;
+}
+
+u32 ethtool_op_get_tso(struct net_device *dev)
+{
+	return (dev->features & NETIF_F_TSO) != 0;
+}
+
+int ethtool_op_set_tso(struct net_device *dev, u32 data)
+{
+	if (data)
+		dev->features |= NETIF_F_TSO;
+	else
+		dev->features &= ~NETIF_F_TSO;
+
+	return 0;
+}
+
+/* Handlers for each ethtool command */
+
+static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_cmd cmd = { ETHTOOL_GSET };
+	int err;
+
+	if (!dev->ethtool_ops->get_settings)
+		return -EOPNOTSUPP;
+
+	err = dev->ethtool_ops->get_settings(dev, &cmd);
+	if (err < 0)
+		return err;
+
+	if (copy_to_user(useraddr, &cmd, sizeof(cmd)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_cmd cmd;
+
+	if (!dev->ethtool_ops->set_settings)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
+		return -EFAULT;
+
+	return dev->ethtool_ops->set_settings(dev, &cmd);
+}
+
+static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_drvinfo info;
+	struct ethtool_ops *ops = dev->ethtool_ops;
+
+	if (!ops->get_drvinfo)
+		return -EOPNOTSUPP;
+
+	memset(&info, 0, sizeof(info));
+	info.cmd = ETHTOOL_GDRVINFO;
+	ops->get_drvinfo(dev, &info);
+
+	if (ops->self_test_count)
+		info.testinfo_len = ops->self_test_count(dev);
+	if (ops->get_stats_count)
+		info.n_stats = ops->get_stats_count(dev);
+	if (ops->get_regs_len)
+		info.regdump_len = ops->get_regs_len(dev);
+	if (ops->get_eeprom_len)
+		info.eedump_len = ops->get_eeprom_len(dev);
+
+	if (copy_to_user(useraddr, &info, sizeof(info)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_regs regs;
+	struct ethtool_ops *ops = dev->ethtool_ops;
+	void *regbuf;
+	int reglen, ret;
+
+	if (!ops->get_regs || !ops->get_regs_len)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&regs, useraddr, sizeof(regs)))
+		return -EFAULT;
+
+	reglen = ops->get_regs_len(dev);
+	if (regs.len > reglen)
+		regs.len = reglen;
+
+	regbuf = kmalloc(reglen, GFP_USER);
+	if (!regbuf)
+		return -ENOMEM;
+
+	ops->get_regs(dev, &regs, regbuf);
+
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, &regs, sizeof(regs)))
+		goto out;
+	useraddr += offsetof(struct ethtool_regs, data);
+	if (copy_to_user(useraddr, regbuf, regs.len))
+		goto out;
+	ret = 0;
+
+ out:
+	kfree(regbuf);
+	return ret;
+}
+
+static int ethtool_get_wol(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_wolinfo wol = { ETHTOOL_GWOL };
+
+	if (!dev->ethtool_ops->get_wol)
+		return -EOPNOTSUPP;
+
+	dev->ethtool_ops->get_wol(dev, &wol);
+
+	if (copy_to_user(useraddr, &wol, sizeof(wol)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_wol(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_wolinfo wol;
+
+	if (!dev->ethtool_ops->set_wol)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&wol, useraddr, sizeof(wol)))
+		return -EFAULT;
+
+	return dev->ethtool_ops->set_wol(dev, &wol);
+}
+
+static int ethtool_get_msglevel(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata = { ETHTOOL_GMSGLVL };
+
+	if (!dev->ethtool_ops->get_msglevel)
+		return -EOPNOTSUPP;
+
+	edata.data = dev->ethtool_ops->get_msglevel(dev);
+
+	if (copy_to_user(useraddr, &edata, sizeof(edata)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_msglevel(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata;
+
+	if (!dev->ethtool_ops->set_msglevel)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&edata, useraddr, sizeof(edata)))
+		return -EFAULT;
+
+	dev->ethtool_ops->set_msglevel(dev, edata.data);
+	return 0;
+}
+
+static int ethtool_nway_reset(struct net_device *dev)
+{
+	if (!dev->ethtool_ops->nway_reset)
+		return -EOPNOTSUPP;
+
+	return dev->ethtool_ops->nway_reset(dev);
+}
+
+static int ethtool_get_link(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_value edata = { ETHTOOL_GLINK };
+
+	if (!dev->ethtool_ops->get_link)
+		return -EOPNOTSUPP;
+
+	edata.data = dev->ethtool_ops->get_link(dev);
+
+	if (copy_to_user(useraddr, &edata, sizeof(edata)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_eeprom eeprom;
+	struct ethtool_ops *ops = dev->ethtool_ops;
+	u8 *data;
+	int ret;
+
+	if (!ops->get_eeprom || !ops->get_eeprom_len)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&eeprom, useraddr, sizeof(eeprom)))
+		return -EFAULT;
+
+	/* Check for wrap and zero */
+	if (eeprom.offset + eeprom.len <= eeprom.offset)
+		return -EINVAL;
+
+	/* Check for exceeding total eeprom len */
+	if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev))
+		return -EINVAL;
+
+	data = kmalloc(eeprom.len, GFP_USER);
+	if (!data)
+		return -ENOMEM;
+
+	ret = -EFAULT;
+	if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len))
+		goto out;
+
+	ret = ops->get_eeprom(dev, &eeprom, data);
+	if (ret)
+		goto out;
+
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, &eeprom, sizeof(eeprom)))
+		goto out;
+	if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len))
+		goto out;
+	ret = 0;
+
+ out:
+	kfree(data);
+	return ret;
+}
+
+static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_eeprom eeprom;
+	struct ethtool_ops *ops = dev->ethtool_ops;
+	u8 *data;
+	int ret;
+
+	if (!ops->set_eeprom || !ops->get_eeprom_len)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&eeprom, useraddr, sizeof(eeprom)))
+		return -EFAULT;
+
+	/* Check for wrap and zero */
+	if (eeprom.offset + eeprom.len <= eeprom.offset)
+		return -EINVAL;
+
+	/* Check for exceeding total eeprom len */
+	if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev))
+		return -EINVAL;
+
+	data = kmalloc(eeprom.len, GFP_USER);
+	if (!data)
+		return -ENOMEM;
+
+	ret = -EFAULT;
+	if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len))
+		goto out;
+
+	ret = ops->set_eeprom(dev, &eeprom, data);
+	if (ret)
+		goto out;
+
+	if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len))
+		ret = -EFAULT;
+
+ out:
+	kfree(data);
+	return ret;
+}
+
+static int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_coalesce coalesce = { ETHTOOL_GCOALESCE };
+
+	if (!dev->ethtool_ops->get_coalesce)
+		return -EOPNOTSUPP;
+
+	dev->ethtool_ops->get_coalesce(dev, &coalesce);
+
+	if (copy_to_user(useraddr, &coalesce, sizeof(coalesce)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_coalesce coalesce;
+
+	if (!dev->ethtool_ops->get_coalesce)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&coalesce, useraddr, sizeof(coalesce)))
+		return -EFAULT;
+
+	return dev->ethtool_ops->set_coalesce(dev, &coalesce);
+}
+
+static int ethtool_get_ringparam(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_ringparam ringparam = { ETHTOOL_GRINGPARAM };
+
+	if (!dev->ethtool_ops->get_ringparam)
+		return -EOPNOTSUPP;
+
+	dev->ethtool_ops->get_ringparam(dev, &ringparam);
+
+	if (copy_to_user(useraddr, &ringparam, sizeof(ringparam)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_ringparam ringparam;
+
+	if (!dev->ethtool_ops->set_ringparam)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&ringparam, useraddr, sizeof(ringparam)))
+		return -EFAULT;
+
+	return dev->ethtool_ops->set_ringparam(dev, &ringparam);
+}
+
+static int ethtool_get_pauseparam(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_pauseparam pauseparam = { ETHTOOL_GPAUSEPARAM };
+
+	if (!dev->ethtool_ops->get_pauseparam)
+		return -EOPNOTSUPP;
+
+	dev->ethtool_ops->get_pauseparam(dev, &pauseparam);
+
+	if (copy_to_user(useraddr, &pauseparam, sizeof(pauseparam)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_pauseparam(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_pauseparam pauseparam;
+
+	if (!dev->ethtool_ops->get_pauseparam)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&pauseparam, useraddr, sizeof(pauseparam)))
+		return -EFAULT;
+
+	return dev->ethtool_ops->set_pauseparam(dev, &pauseparam);
+}
+
+static int ethtool_get_rx_csum(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata = { ETHTOOL_GRXCSUM };
+
+	if (!dev->ethtool_ops->get_rx_csum)
+		return -EOPNOTSUPP;
+
+	edata.data = dev->ethtool_ops->get_rx_csum(dev);
+
+	if (copy_to_user(useraddr, &edata, sizeof(edata)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata;
+
+	if (!dev->ethtool_ops->set_rx_csum)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&edata, useraddr, sizeof(edata)))
+		return -EFAULT;
+
+	dev->ethtool_ops->set_rx_csum(dev, edata.data);
+	return 0;
+}
+
+static int ethtool_get_tx_csum(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata = { ETHTOOL_GTXCSUM };
+
+	if (!dev->ethtool_ops->get_tx_csum)
+		return -EOPNOTSUPP;
+
+	edata.data = dev->ethtool_ops->get_tx_csum(dev);
+
+	if (copy_to_user(useraddr, &edata, sizeof(edata)))
+		return -EFAULT;
+	return 0;
+}
+
+static int __ethtool_set_sg(struct net_device *dev, u32 data)
+{
+	int err;
+
+	if (!data && dev->ethtool_ops->set_tso) {
+		err = dev->ethtool_ops->set_tso(dev, 0);
+		if (err)
+			return err;
+	}
+
+	return dev->ethtool_ops->set_sg(dev, data);
+}
+
+static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata;
+	int err;
+
+	if (!dev->ethtool_ops->set_tx_csum)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&edata, useraddr, sizeof(edata)))
+		return -EFAULT;
+
+	if (!edata.data && dev->ethtool_ops->set_sg) {
+		err = __ethtool_set_sg(dev, 0);
+		if (err)
+			return err;
+	}
+
+	return dev->ethtool_ops->set_tx_csum(dev, edata.data);
+}
+
+static int ethtool_get_sg(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata = { ETHTOOL_GSG };
+
+	if (!dev->ethtool_ops->get_sg)
+		return -EOPNOTSUPP;
+
+	edata.data = dev->ethtool_ops->get_sg(dev);
+
+	if (copy_to_user(useraddr, &edata, sizeof(edata)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_sg(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata;
+
+	if (!dev->ethtool_ops->set_sg)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&edata, useraddr, sizeof(edata)))
+		return -EFAULT;
+
+	if (edata.data && 
+	    !(dev->features & (NETIF_F_IP_CSUM |
+			       NETIF_F_NO_CSUM |
+			       NETIF_F_HW_CSUM)))
+		return -EINVAL;
+
+	return __ethtool_set_sg(dev, edata.data);
+}
+
+static int ethtool_get_tso(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata = { ETHTOOL_GTSO };
+
+	if (!dev->ethtool_ops->get_tso)
+		return -EOPNOTSUPP;
+
+	edata.data = dev->ethtool_ops->get_tso(dev);
+
+	if (copy_to_user(useraddr, &edata, sizeof(edata)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_tso(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata;
+
+	if (!dev->ethtool_ops->set_tso)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&edata, useraddr, sizeof(edata)))
+		return -EFAULT;
+
+	if (edata.data && !(dev->features & NETIF_F_SG))
+		return -EINVAL;
+
+	return dev->ethtool_ops->set_tso(dev, edata.data);
+}
+
+static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_test test;
+	struct ethtool_ops *ops = dev->ethtool_ops;
+	u64 *data;
+	int ret;
+
+	if (!ops->self_test || !ops->self_test_count)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&test, useraddr, sizeof(test)))
+		return -EFAULT;
+
+	test.len = ops->self_test_count(dev);
+	data = kmalloc(test.len * sizeof(u64), GFP_USER);
+	if (!data)
+		return -ENOMEM;
+
+	ops->self_test(dev, &test, data);
+
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, &test, sizeof(test)))
+		goto out;
+	useraddr += sizeof(test);
+	if (copy_to_user(useraddr, data, test.len * sizeof(u64)))
+		goto out;
+	ret = 0;
+
+ out:
+	kfree(data);
+	return ret;
+}
+
+static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_gstrings gstrings;
+	struct ethtool_ops *ops = dev->ethtool_ops;
+	u8 *data;
+	int ret;
+
+	if (!ops->get_strings)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&gstrings, useraddr, sizeof(gstrings)))
+		return -EFAULT;
+
+	switch (gstrings.string_set) {
+	case ETH_SS_TEST:
+		if (!ops->self_test_count)
+			return -EOPNOTSUPP;
+		gstrings.len = ops->self_test_count(dev);
+		break;
+	case ETH_SS_STATS:
+		if (!ops->get_stats_count)
+			return -EOPNOTSUPP;
+		gstrings.len = ops->get_stats_count(dev);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER);
+	if (!data)
+		return -ENOMEM;
+
+	ops->get_strings(dev, gstrings.string_set, data);
+
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, &gstrings, sizeof(gstrings)))
+		goto out;
+	useraddr += sizeof(gstrings);
+	if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN))
+		goto out;
+	ret = 0;
+
+ out:
+	kfree(data);
+	return ret;
+}
+
+static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_value id;
+
+	if (!dev->ethtool_ops->phys_id)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&id, useraddr, sizeof(id)))
+		return -EFAULT;
+
+	return dev->ethtool_ops->phys_id(dev, id.data);
+}
+
+static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_stats stats;
+	struct ethtool_ops *ops = dev->ethtool_ops;
+	u64 *data;
+	int ret;
+
+	if (!ops->get_ethtool_stats || !ops->get_stats_count)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&stats, useraddr, sizeof(stats)))
+		return -EFAULT;
+
+	stats.n_stats = ops->get_stats_count(dev);
+	data = kmalloc(stats.n_stats * sizeof(u64), GFP_USER);
+	if (!data)
+		return -ENOMEM;
+
+	ops->get_ethtool_stats(dev, &stats, data);
+
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, &stats, sizeof(stats)))
+		goto out;
+	useraddr += sizeof(stats);
+	if (copy_to_user(useraddr, data, stats.n_stats * sizeof(u64)))
+		goto out;
+	ret = 0;
+
+ out:
+	kfree(data);
+	return ret;
+}
+
+/* The main entry point in this file.  Called from net/core/dev.c */
+
+int dev_ethtool(struct ifreq *ifr)
+{
+	struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
+	void __user *useraddr = ifr->ifr_data;
+	u32 ethcmd;
+	int rc;
+
+	/*
+	 * XXX: This can be pushed down into the ethtool_* handlers that
+	 * need it.  Keep existing behaviour for the moment.
+	 */
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (!dev || !netif_device_present(dev))
+		return -ENODEV;
+
+	if (!dev->ethtool_ops)
+		goto ioctl;
+
+	if (copy_from_user(&ethcmd, useraddr, sizeof (ethcmd)))
+		return -EFAULT;
+
+	if(dev->ethtool_ops->begin)
+		if ((rc = dev->ethtool_ops->begin(dev)) < 0)
+			return rc;
+
+	switch (ethcmd) {
+	case ETHTOOL_GSET:
+		rc = ethtool_get_settings(dev, useraddr);
+		break;
+	case ETHTOOL_SSET:
+		rc = ethtool_set_settings(dev, useraddr);
+		break;
+	case ETHTOOL_GDRVINFO:
+		rc = ethtool_get_drvinfo(dev, useraddr);
+
+		break;
+	case ETHTOOL_GREGS:
+		rc = ethtool_get_regs(dev, useraddr);
+		break;
+	case ETHTOOL_GWOL:
+		rc = ethtool_get_wol(dev, useraddr);
+		break;
+	case ETHTOOL_SWOL:
+		rc = ethtool_set_wol(dev, useraddr);
+		break;
+	case ETHTOOL_GMSGLVL:
+		rc = ethtool_get_msglevel(dev, useraddr);
+		break;
+	case ETHTOOL_SMSGLVL:
+		rc = ethtool_set_msglevel(dev, useraddr);
+		break;
+	case ETHTOOL_NWAY_RST:
+		rc = ethtool_nway_reset(dev);
+		break;
+	case ETHTOOL_GLINK:
+		rc = ethtool_get_link(dev, useraddr);
+		break;
+	case ETHTOOL_GEEPROM:
+		rc = ethtool_get_eeprom(dev, useraddr);
+		break;
+	case ETHTOOL_SEEPROM:
+		rc = ethtool_set_eeprom(dev, useraddr);
+		break;
+	case ETHTOOL_GCOALESCE:
+		rc = ethtool_get_coalesce(dev, useraddr);
+		break;
+	case ETHTOOL_SCOALESCE:
+		rc = ethtool_set_coalesce(dev, useraddr);
+		break;
+	case ETHTOOL_GRINGPARAM:
+		rc = ethtool_get_ringparam(dev, useraddr);
+		break;
+	case ETHTOOL_SRINGPARAM:
+		rc = ethtool_set_ringparam(dev, useraddr);
+		break;
+	case ETHTOOL_GPAUSEPARAM:
+		rc = ethtool_get_pauseparam(dev, useraddr);
+		break;
+	case ETHTOOL_SPAUSEPARAM:
+		rc = ethtool_set_pauseparam(dev, useraddr);
+		break;
+	case ETHTOOL_GRXCSUM:
+		rc = ethtool_get_rx_csum(dev, useraddr);
+		break;
+	case ETHTOOL_SRXCSUM:
+		rc = ethtool_set_rx_csum(dev, useraddr);
+		break;
+	case ETHTOOL_GTXCSUM:
+		rc = ethtool_get_tx_csum(dev, useraddr);
+		break;
+	case ETHTOOL_STXCSUM:
+		rc = ethtool_set_tx_csum(dev, useraddr);
+		break;
+	case ETHTOOL_GSG:
+		rc = ethtool_get_sg(dev, useraddr);
+		break;
+	case ETHTOOL_SSG:
+		rc = ethtool_set_sg(dev, useraddr);
+		break;
+	case ETHTOOL_GTSO:
+		rc = ethtool_get_tso(dev, useraddr);
+		break;
+	case ETHTOOL_STSO:
+		rc = ethtool_set_tso(dev, useraddr);
+		break;
+	case ETHTOOL_TEST:
+		rc = ethtool_self_test(dev, useraddr);
+		break;
+	case ETHTOOL_GSTRINGS:
+		rc = ethtool_get_strings(dev, useraddr);
+		break;
+	case ETHTOOL_PHYS_ID:
+		rc = ethtool_phys_id(dev, useraddr);
+		break;
+	case ETHTOOL_GSTATS:
+		rc = ethtool_get_stats(dev, useraddr);
+		break;
+	default:
+		rc =  -EOPNOTSUPP;
+	}
+	
+	if(dev->ethtool_ops->complete)
+		dev->ethtool_ops->complete(dev);
+	return rc;
+
+ ioctl:
+	if (dev->do_ioctl)
+		return dev->do_ioctl(dev, ifr, SIOCETHTOOL);
+	return -EOPNOTSUPP;
+}
+
+EXPORT_SYMBOL(dev_ethtool);
+EXPORT_SYMBOL(ethtool_op_get_link);
+EXPORT_SYMBOL(ethtool_op_get_sg);
+EXPORT_SYMBOL(ethtool_op_get_tso);
+EXPORT_SYMBOL(ethtool_op_get_tx_csum);
+EXPORT_SYMBOL(ethtool_op_set_sg);
+EXPORT_SYMBOL(ethtool_op_set_tso);
+EXPORT_SYMBOL(ethtool_op_set_tx_csum);
diff --git a/net/core/filter.c b/net/core/filter.c
new file mode 100644
index 00000000000..f3b88205ace
--- /dev/null
+++ b/net/core/filter.c
@@ -0,0 +1,432 @@
+/*
+ * Linux Socket Filter - Kernel level socket filtering
+ *
+ * Author:
+ *     Jay Schulist <jschlst@samba.org>
+ *
+ * Based on the design of:
+ *     - The Berkeley Packet Filter
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Andi Kleen - Fix a few bad bugs and races.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/fcntl.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/if_packet.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <linux/errno.h>
+#include <linux/timer.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <linux/filter.h>
+
+/* No hurry in this branch */
+static u8 *load_pointer(struct sk_buff *skb, int k)
+{
+	u8 *ptr = NULL;
+
+	if (k >= SKF_NET_OFF)
+		ptr = skb->nh.raw + k - SKF_NET_OFF;
+	else if (k >= SKF_LL_OFF)
+		ptr = skb->mac.raw + k - SKF_LL_OFF;
+
+	if (ptr >= skb->head && ptr < skb->tail)
+		return ptr;
+	return NULL;
+}
+
+/**
+ *	sk_run_filter	- 	run a filter on a socket
+ *	@skb: buffer to run the filter on
+ *	@filter: filter to apply
+ *	@flen: length of filter
+ *
+ * Decode and apply filter instructions to the skb->data.
+ * Return length to keep, 0 for none. skb is the data we are
+ * filtering, filter is the array of filter instructions, and
+ * len is the number of filter blocks in the array.
+ */
+ 
+int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
+{
+	unsigned char *data = skb->data;
+	/* len is UNSIGNED. Byte wide insns relies only on implicit
+	   type casts to prevent reading arbitrary memory locations.
+	 */
+	unsigned int len = skb->len-skb->data_len;
+	struct sock_filter *fentry;	/* We walk down these */
+	u32 A = 0;	   		/* Accumulator */
+	u32 X = 0;   			/* Index Register */
+	u32 mem[BPF_MEMWORDS];		/* Scratch Memory Store */
+	int k;
+	int pc;
+
+	/*
+	 * Process array of filter instructions.
+	 */
+	for (pc = 0; pc < flen; pc++) {
+		fentry = &filter[pc];
+			
+		switch (fentry->code) {
+		case BPF_ALU|BPF_ADD|BPF_X:
+			A += X;
+			continue;
+		case BPF_ALU|BPF_ADD|BPF_K:
+			A += fentry->k;
+			continue;
+		case BPF_ALU|BPF_SUB|BPF_X:
+			A -= X;
+			continue;
+		case BPF_ALU|BPF_SUB|BPF_K:
+			A -= fentry->k;
+			continue;
+		case BPF_ALU|BPF_MUL|BPF_X:
+			A *= X;
+			continue;
+		case BPF_ALU|BPF_MUL|BPF_K:
+			A *= fentry->k;
+			continue;
+		case BPF_ALU|BPF_DIV|BPF_X:
+			if (X == 0)
+				return 0;
+			A /= X;
+			continue;
+		case BPF_ALU|BPF_DIV|BPF_K:
+			if (fentry->k == 0)
+				return 0;
+			A /= fentry->k;
+			continue;
+		case BPF_ALU|BPF_AND|BPF_X:
+			A &= X;
+			continue;
+		case BPF_ALU|BPF_AND|BPF_K:
+			A &= fentry->k;
+			continue;
+		case BPF_ALU|BPF_OR|BPF_X:
+			A |= X;
+			continue;
+		case BPF_ALU|BPF_OR|BPF_K:
+			A |= fentry->k;
+			continue;
+		case BPF_ALU|BPF_LSH|BPF_X:
+			A <<= X;
+			continue;
+		case BPF_ALU|BPF_LSH|BPF_K:
+			A <<= fentry->k;
+			continue;
+		case BPF_ALU|BPF_RSH|BPF_X:
+			A >>= X;
+			continue;
+		case BPF_ALU|BPF_RSH|BPF_K:
+			A >>= fentry->k;
+			continue;
+		case BPF_ALU|BPF_NEG:
+			A = -A;
+			continue;
+		case BPF_JMP|BPF_JA:
+			pc += fentry->k;
+			continue;
+		case BPF_JMP|BPF_JGT|BPF_K:
+			pc += (A > fentry->k) ? fentry->jt : fentry->jf;
+			continue;
+		case BPF_JMP|BPF_JGE|BPF_K:
+			pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
+			continue;
+		case BPF_JMP|BPF_JEQ|BPF_K:
+			pc += (A == fentry->k) ? fentry->jt : fentry->jf;
+			continue;
+		case BPF_JMP|BPF_JSET|BPF_K:
+			pc += (A & fentry->k) ? fentry->jt : fentry->jf;
+			continue;
+		case BPF_JMP|BPF_JGT|BPF_X:
+			pc += (A > X) ? fentry->jt : fentry->jf;
+			continue;
+		case BPF_JMP|BPF_JGE|BPF_X:
+			pc += (A >= X) ? fentry->jt : fentry->jf;
+			continue;
+		case BPF_JMP|BPF_JEQ|BPF_X:
+			pc += (A == X) ? fentry->jt : fentry->jf;
+			continue;
+		case BPF_JMP|BPF_JSET|BPF_X:
+			pc += (A & X) ? fentry->jt : fentry->jf;
+			continue;
+		case BPF_LD|BPF_W|BPF_ABS:
+			k = fentry->k;
+ load_w:
+			if (k >= 0 && (unsigned int)(k+sizeof(u32)) <= len) {
+				A = ntohl(*(u32*)&data[k]);
+				continue;
+			}
+			if (k < 0) {
+				u8 *ptr;
+
+				if (k >= SKF_AD_OFF)
+					break;
+				ptr = load_pointer(skb, k);
+				if (ptr) {
+					A = ntohl(*(u32*)ptr);
+					continue;
+				}
+			} else {
+				u32 _tmp, *p;
+				p = skb_header_pointer(skb, k, 4, &_tmp);
+				if (p != NULL) {
+					A = ntohl(*p);
+					continue;
+				}
+			}
+			return 0;
+		case BPF_LD|BPF_H|BPF_ABS:
+			k = fentry->k;
+ load_h:
+			if (k >= 0 && (unsigned int)(k + sizeof(u16)) <= len) {
+				A = ntohs(*(u16*)&data[k]);
+				continue;
+			}
+			if (k < 0) {
+				u8 *ptr;
+
+				if (k >= SKF_AD_OFF)
+					break;
+				ptr = load_pointer(skb, k);
+				if (ptr) {
+					A = ntohs(*(u16*)ptr);
+					continue;
+				}
+			} else {
+				u16 _tmp, *p;
+				p = skb_header_pointer(skb, k, 2, &_tmp);
+				if (p != NULL) {
+					A = ntohs(*p);
+					continue;
+				}
+			}
+			return 0;
+		case BPF_LD|BPF_B|BPF_ABS:
+			k = fentry->k;
+load_b:
+			if (k >= 0 && (unsigned int)k < len) {
+				A = data[k];
+				continue;
+			}
+			if (k < 0) {
+				u8 *ptr;
+
+				if (k >= SKF_AD_OFF)
+					break;
+				ptr = load_pointer(skb, k);
+				if (ptr) {
+					A = *ptr;
+					continue;
+				}
+			} else {
+				u8 _tmp, *p;
+				p = skb_header_pointer(skb, k, 1, &_tmp);
+				if (p != NULL) {
+					A = *p;
+					continue;
+				}
+			}
+			return 0;
+		case BPF_LD|BPF_W|BPF_LEN:
+			A = len;
+			continue;
+		case BPF_LDX|BPF_W|BPF_LEN:
+			X = len;
+			continue;
+		case BPF_LD|BPF_W|BPF_IND:
+			k = X + fentry->k;
+			goto load_w;
+		case BPF_LD|BPF_H|BPF_IND:
+			k = X + fentry->k;
+			goto load_h;
+		case BPF_LD|BPF_B|BPF_IND:
+			k = X + fentry->k;
+			goto load_b;
+		case BPF_LDX|BPF_B|BPF_MSH:
+			if (fentry->k >= len)
+				return 0;
+			X = (data[fentry->k] & 0xf) << 2;
+			continue;
+		case BPF_LD|BPF_IMM:
+			A = fentry->k;
+			continue;
+		case BPF_LDX|BPF_IMM:
+			X = fentry->k;
+			continue;
+		case BPF_LD|BPF_MEM:
+			A = mem[fentry->k];
+			continue;
+		case BPF_LDX|BPF_MEM:
+			X = mem[fentry->k];
+			continue;
+		case BPF_MISC|BPF_TAX:
+			X = A;
+			continue;
+		case BPF_MISC|BPF_TXA:
+			A = X;
+			continue;
+		case BPF_RET|BPF_K:
+			return ((unsigned int)fentry->k);
+		case BPF_RET|BPF_A:
+			return ((unsigned int)A);
+		case BPF_ST:
+			mem[fentry->k] = A;
+			continue;
+		case BPF_STX:
+			mem[fentry->k] = X;
+			continue;
+		default:
+			/* Invalid instruction counts as RET */
+			return 0;
+		}
+
+		/*
+		 * Handle ancillary data, which are impossible
+		 * (or very difficult) to get parsing packet contents.
+		 */
+		switch (k-SKF_AD_OFF) {
+		case SKF_AD_PROTOCOL:
+			A = htons(skb->protocol);
+			continue;
+		case SKF_AD_PKTTYPE:
+			A = skb->pkt_type;
+			continue;
+		case SKF_AD_IFINDEX:
+			A = skb->dev->ifindex;
+			continue;
+		default:
+			return 0;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ *	sk_chk_filter - verify socket filter code
+ *	@filter: filter to verify
+ *	@flen: length of filter
+ *
+ * Check the user's filter code. If we let some ugly
+ * filter code slip through kaboom! The filter must contain
+ * no references or jumps that are out of range, no illegal instructions
+ * and no backward jumps. It must end with a RET instruction
+ *
+ * Returns 0 if the rule set is legal or a negative errno code if not.
+ */
+int sk_chk_filter(struct sock_filter *filter, int flen)
+{
+	struct sock_filter *ftest;
+	int pc;
+
+	if (((unsigned int)flen >= (~0U / sizeof(struct sock_filter))) || flen == 0)
+		return -EINVAL;
+
+	/* check the filter code now */
+	for (pc = 0; pc < flen; pc++) {
+		/* all jumps are forward as they are not signed */
+		ftest = &filter[pc];
+		if (BPF_CLASS(ftest->code) == BPF_JMP) {
+			/* but they mustn't jump off the end */
+			if (BPF_OP(ftest->code) == BPF_JA) {
+				/*
+				 * Note, the large ftest->k might cause loops.
+				 * Compare this with conditional jumps below,
+				 * where offsets are limited. --ANK (981016)
+				 */
+				if (ftest->k >= (unsigned)(flen-pc-1))
+					return -EINVAL;
+			} else {
+				/* for conditionals both must be safe */
+ 				if (pc + ftest->jt +1 >= flen ||
+				    pc + ftest->jf +1 >= flen)
+					return -EINVAL;
+			}
+		}
+
+		/* check that memory operations use valid addresses. */
+		if (ftest->k >= BPF_MEMWORDS) {
+			/* but it might not be a memory operation... */
+			switch (ftest->code) {
+			case BPF_ST:	
+			case BPF_STX:	
+			case BPF_LD|BPF_MEM:	
+			case BPF_LDX|BPF_MEM:	
+				return -EINVAL;
+			}
+		}
+	}
+
+	/*
+	 * The program must end with a return. We don't care where they
+	 * jumped within the script (its always forwards) but in the end
+	 * they _will_ hit this.
+	 */
+        return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL;
+}
+
+/**
+ *	sk_attach_filter - attach a socket filter
+ *	@fprog: the filter program
+ *	@sk: the socket to use
+ *
+ * Attach the user's filter code. We first run some sanity checks on
+ * it to make sure it does not explode on us later. If an error
+ * occurs or there is insufficient memory for the filter a negative
+ * errno code is returned. On success the return is zero.
+ */
+int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
+{
+	struct sk_filter *fp; 
+	unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
+	int err;
+
+	/* Make sure new filter is there and in the right amounts. */
+        if (fprog->filter == NULL || fprog->len > BPF_MAXINSNS)
+                return -EINVAL;
+
+	fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
+	if (!fp)
+		return -ENOMEM;
+	if (copy_from_user(fp->insns, fprog->filter, fsize)) {
+		sock_kfree_s(sk, fp, fsize+sizeof(*fp)); 
+		return -EFAULT;
+	}
+
+	atomic_set(&fp->refcnt, 1);
+	fp->len = fprog->len;
+
+	err = sk_chk_filter(fp->insns, fp->len);
+	if (!err) {
+		struct sk_filter *old_fp;
+
+		spin_lock_bh(&sk->sk_lock.slock);
+		old_fp = sk->sk_filter;
+		sk->sk_filter = fp;
+		spin_unlock_bh(&sk->sk_lock.slock);
+		fp = old_fp;
+	}
+
+	if (fp)
+		sk_filter_release(sk, fp);
+	return err;
+}
+
+EXPORT_SYMBOL(sk_chk_filter);
+EXPORT_SYMBOL(sk_run_filter);
diff --git a/net/core/flow.c b/net/core/flow.c
new file mode 100644
index 00000000000..f289570b15a
--- /dev/null
+++ b/net/core/flow.c
@@ -0,0 +1,371 @@
+/* flow.c: Generic flow cache.
+ *
+ * Copyright (C) 2003 Alexey N. Kuznetsov (kuznet@ms2.inr.ac.ru)
+ * Copyright (C) 2003 David S. Miller (davem@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/jhash.h>
+#include <linux/interrupt.h>
+#include <linux/mm.h>
+#include <linux/random.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/completion.h>
+#include <linux/percpu.h>
+#include <linux/bitops.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <net/flow.h>
+#include <asm/atomic.h>
+#include <asm/semaphore.h>
+
+struct flow_cache_entry {
+	struct flow_cache_entry	*next;
+	u16			family;
+	u8			dir;
+	struct flowi		key;
+	u32			genid;
+	void			*object;
+	atomic_t		*object_ref;
+};
+
+atomic_t flow_cache_genid = ATOMIC_INIT(0);
+
+static u32 flow_hash_shift;
+#define flow_hash_size	(1 << flow_hash_shift)
+static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL };
+
+#define flow_table(cpu) (per_cpu(flow_tables, cpu))
+
+static kmem_cache_t *flow_cachep;
+
+static int flow_lwm, flow_hwm;
+
+struct flow_percpu_info {
+	int hash_rnd_recalc;
+	u32 hash_rnd;
+	int count;
+} ____cacheline_aligned;
+static DEFINE_PER_CPU(struct flow_percpu_info, flow_hash_info) = { 0 };
+
+#define flow_hash_rnd_recalc(cpu) \
+	(per_cpu(flow_hash_info, cpu).hash_rnd_recalc)
+#define flow_hash_rnd(cpu) \
+	(per_cpu(flow_hash_info, cpu).hash_rnd)
+#define flow_count(cpu) \
+	(per_cpu(flow_hash_info, cpu).count)
+
+static struct timer_list flow_hash_rnd_timer;
+
+#define FLOW_HASH_RND_PERIOD	(10 * 60 * HZ)
+
+struct flow_flush_info {
+	atomic_t cpuleft;
+	struct completion completion;
+};
+static DEFINE_PER_CPU(struct tasklet_struct, flow_flush_tasklets) = { NULL };
+
+#define flow_flush_tasklet(cpu) (&per_cpu(flow_flush_tasklets, cpu))
+
+static void flow_cache_new_hashrnd(unsigned long arg)
+{
+	int i;
+
+	for_each_cpu(i)
+		flow_hash_rnd_recalc(i) = 1;
+
+	flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+	add_timer(&flow_hash_rnd_timer);
+}
+
+static void __flow_cache_shrink(int cpu, int shrink_to)
+{
+	struct flow_cache_entry *fle, **flp;
+	int i;
+
+	for (i = 0; i < flow_hash_size; i++) {
+		int k = 0;
+
+		flp = &flow_table(cpu)[i];
+		while ((fle = *flp) != NULL && k < shrink_to) {
+			k++;
+			flp = &fle->next;
+		}
+		while ((fle = *flp) != NULL) {
+			*flp = fle->next;
+			if (fle->object)
+				atomic_dec(fle->object_ref);
+			kmem_cache_free(flow_cachep, fle);
+			flow_count(cpu)--;
+		}
+	}
+}
+
+static void flow_cache_shrink(int cpu)
+{
+	int shrink_to = flow_lwm / flow_hash_size;
+
+	__flow_cache_shrink(cpu, shrink_to);
+}
+
+static void flow_new_hash_rnd(int cpu)
+{
+	get_random_bytes(&flow_hash_rnd(cpu), sizeof(u32));
+	flow_hash_rnd_recalc(cpu) = 0;
+
+	__flow_cache_shrink(cpu, 0);
+}
+
+static u32 flow_hash_code(struct flowi *key, int cpu)
+{
+	u32 *k = (u32 *) key;
+
+	return (jhash2(k, (sizeof(*key) / sizeof(u32)), flow_hash_rnd(cpu)) &
+		(flow_hash_size - 1));
+}
+
+#if (BITS_PER_LONG == 64)
+typedef u64 flow_compare_t;
+#else
+typedef u32 flow_compare_t;
+#endif
+
+extern void flowi_is_missized(void);
+
+/* I hear what you're saying, use memcmp.  But memcmp cannot make
+ * important assumptions that we can here, such as alignment and
+ * constant size.
+ */
+static int flow_key_compare(struct flowi *key1, struct flowi *key2)
+{
+	flow_compare_t *k1, *k1_lim, *k2;
+	const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t);
+
+	if (sizeof(struct flowi) % sizeof(flow_compare_t))
+		flowi_is_missized();
+
+	k1 = (flow_compare_t *) key1;
+	k1_lim = k1 + n_elem;
+
+	k2 = (flow_compare_t *) key2;
+
+	do {
+		if (*k1++ != *k2++)
+			return 1;
+	} while (k1 < k1_lim);
+
+	return 0;
+}
+
+void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
+			flow_resolve_t resolver)
+{
+	struct flow_cache_entry *fle, **head;
+	unsigned int hash;
+	int cpu;
+
+	local_bh_disable();
+	cpu = smp_processor_id();
+
+	fle = NULL;
+	/* Packet really early in init?  Making flow_cache_init a
+	 * pre-smp initcall would solve this.  --RR */
+	if (!flow_table(cpu))
+		goto nocache;
+
+	if (flow_hash_rnd_recalc(cpu))
+		flow_new_hash_rnd(cpu);
+	hash = flow_hash_code(key, cpu);
+
+	head = &flow_table(cpu)[hash];
+	for (fle = *head; fle; fle = fle->next) {
+		if (fle->family == family &&
+		    fle->dir == dir &&
+		    flow_key_compare(key, &fle->key) == 0) {
+			if (fle->genid == atomic_read(&flow_cache_genid)) {
+				void *ret = fle->object;
+
+				if (ret)
+					atomic_inc(fle->object_ref);
+				local_bh_enable();
+
+				return ret;
+			}
+			break;
+		}
+	}
+
+	if (!fle) {
+		if (flow_count(cpu) > flow_hwm)
+			flow_cache_shrink(cpu);
+
+		fle = kmem_cache_alloc(flow_cachep, SLAB_ATOMIC);
+		if (fle) {
+			fle->next = *head;
+			*head = fle;
+			fle->family = family;
+			fle->dir = dir;
+			memcpy(&fle->key, key, sizeof(*key));
+			fle->object = NULL;
+			flow_count(cpu)++;
+		}
+	}
+
+nocache:
+	{
+		void *obj;
+		atomic_t *obj_ref;
+
+		resolver(key, family, dir, &obj, &obj_ref);
+
+		if (fle) {
+			fle->genid = atomic_read(&flow_cache_genid);
+
+			if (fle->object)
+				atomic_dec(fle->object_ref);
+
+			fle->object = obj;
+			fle->object_ref = obj_ref;
+			if (obj)
+				atomic_inc(fle->object_ref);
+		}
+		local_bh_enable();
+
+		return obj;
+	}
+}
+
+static void flow_cache_flush_tasklet(unsigned long data)
+{
+	struct flow_flush_info *info = (void *)data;
+	int i;
+	int cpu;
+
+	cpu = smp_processor_id();
+	for (i = 0; i < flow_hash_size; i++) {
+		struct flow_cache_entry *fle;
+
+		fle = flow_table(cpu)[i];
+		for (; fle; fle = fle->next) {
+			unsigned genid = atomic_read(&flow_cache_genid);
+
+			if (!fle->object || fle->genid == genid)
+				continue;
+
+			fle->object = NULL;
+			atomic_dec(fle->object_ref);
+		}
+	}
+
+	if (atomic_dec_and_test(&info->cpuleft))
+		complete(&info->completion);
+}
+
+static void flow_cache_flush_per_cpu(void *) __attribute__((__unused__));
+static void flow_cache_flush_per_cpu(void *data)
+{
+	struct flow_flush_info *info = data;
+	int cpu;
+	struct tasklet_struct *tasklet;
+
+	cpu = smp_processor_id();
+
+	tasklet = flow_flush_tasklet(cpu);
+	tasklet->data = (unsigned long)info;
+	tasklet_schedule(tasklet);
+}
+
+void flow_cache_flush(void)
+{
+	struct flow_flush_info info;
+	static DECLARE_MUTEX(flow_flush_sem);
+
+	/* Don't want cpus going down or up during this. */
+	lock_cpu_hotplug();
+	down(&flow_flush_sem);
+	atomic_set(&info.cpuleft, num_online_cpus());
+	init_completion(&info.completion);
+
+	local_bh_disable();
+	smp_call_function(flow_cache_flush_per_cpu, &info, 1, 0);
+	flow_cache_flush_tasklet((unsigned long)&info);
+	local_bh_enable();
+
+	wait_for_completion(&info.completion);
+	up(&flow_flush_sem);
+	unlock_cpu_hotplug();
+}
+
+static void __devinit flow_cache_cpu_prepare(int cpu)
+{
+	struct tasklet_struct *tasklet;
+	unsigned long order;
+
+	for (order = 0;
+	     (PAGE_SIZE << order) <
+		     (sizeof(struct flow_cache_entry *)*flow_hash_size);
+	     order++)
+		/* NOTHING */;
+
+	flow_table(cpu) = (struct flow_cache_entry **)
+		__get_free_pages(GFP_KERNEL, order);
+	if (!flow_table(cpu))
+		panic("NET: failed to allocate flow cache order %lu\n", order);
+
+	memset(flow_table(cpu), 0, PAGE_SIZE << order);
+
+	flow_hash_rnd_recalc(cpu) = 1;
+	flow_count(cpu) = 0;
+
+	tasklet = flow_flush_tasklet(cpu);
+	tasklet_init(tasklet, flow_cache_flush_tasklet, 0);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static int flow_cache_cpu(struct notifier_block *nfb,
+			  unsigned long action,
+			  void *hcpu)
+{
+	if (action == CPU_DEAD)
+		__flow_cache_shrink((unsigned long)hcpu, 0);
+	return NOTIFY_OK;
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static int __init flow_cache_init(void)
+{
+	int i;
+
+	flow_cachep = kmem_cache_create("flow_cache",
+					sizeof(struct flow_cache_entry),
+					0, SLAB_HWCACHE_ALIGN,
+					NULL, NULL);
+
+	if (!flow_cachep)
+		panic("NET: failed to allocate flow cache slab\n");
+
+	flow_hash_shift = 10;
+	flow_lwm = 2 * flow_hash_size;
+	flow_hwm = 4 * flow_hash_size;
+
+	init_timer(&flow_hash_rnd_timer);
+	flow_hash_rnd_timer.function = flow_cache_new_hashrnd;
+	flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+	add_timer(&flow_hash_rnd_timer);
+
+	for_each_cpu(i)
+		flow_cache_cpu_prepare(i);
+
+	hotcpu_notifier(flow_cache_cpu, 0);
+	return 0;
+}
+
+module_init(flow_cache_init);
+
+EXPORT_SYMBOL(flow_cache_genid);
+EXPORT_SYMBOL(flow_cache_lookup);
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
new file mode 100644
index 00000000000..b07c029e821
--- /dev/null
+++ b/net/core/gen_estimator.c
@@ -0,0 +1,250 @@
+/*
+ * net/sched/gen_estimator.c	Simple rate estimator.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * Changes:
+ *              Jamal Hadi Salim - moved it to net/core and reshulfed
+ *              names to make it usable in general net subsystem.
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/init.h>
+#include <net/sock.h>
+#include <net/gen_stats.h>
+
+/*
+   This code is NOT intended to be used for statistics collection,
+   its purpose is to provide a base for statistical multiplexing
+   for controlled load service.
+   If you need only statistics, run a user level daemon which
+   periodically reads byte counters.
+
+   Unfortunately, rate estimation is not a very easy task.
+   F.e. I did not find a simple way to estimate the current peak rate
+   and even failed to formulate the problem 8)8)
+
+   So I preferred not to built an estimator into the scheduler,
+   but run this task separately.
+   Ideally, it should be kernel thread(s), but for now it runs
+   from timers, which puts apparent top bounds on the number of rated
+   flows, has minimal overhead on small, but is enough
+   to handle controlled load service, sets of aggregates.
+
+   We measure rate over A=(1<<interval) seconds and evaluate EWMA:
+
+   avrate = avrate*(1-W) + rate*W
+
+   where W is chosen as negative power of 2: W = 2^(-ewma_log)
+
+   The resulting time constant is:
+
+   T = A/(-ln(1-W))
+
+
+   NOTES.
+
+   * The stored value for avbps is scaled by 2^5, so that maximal
+     rate is ~1Gbit, avpps is scaled by 2^10.
+
+   * Minimal interval is HZ/4=250msec (it is the greatest common divisor
+     for HZ=100 and HZ=1024 8)), maximal interval
+     is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals
+     are too expensive, longer ones can be implemented
+     at user level painlessly.
+ */
+
+#define EST_MAX_INTERVAL	5
+
+struct gen_estimator
+{
+	struct gen_estimator	*next;
+	struct gnet_stats_basic	*bstats;
+	struct gnet_stats_rate_est	*rate_est;
+	spinlock_t		*stats_lock;
+	unsigned		interval;
+	int			ewma_log;
+	u64			last_bytes;
+	u32			last_packets;
+	u32			avpps;
+	u32			avbps;
+};
+
+struct gen_estimator_head
+{
+	struct timer_list	timer;
+	struct gen_estimator	*list;
+};
+
+static struct gen_estimator_head elist[EST_MAX_INTERVAL+1];
+
+/* Estimator array lock */
+static DEFINE_RWLOCK(est_lock);
+
+static void est_timer(unsigned long arg)
+{
+	int idx = (int)arg;
+	struct gen_estimator *e;
+
+	read_lock(&est_lock);
+	for (e = elist[idx].list; e; e = e->next) {
+		u64 nbytes;
+		u32 npackets;
+		u32 rate;
+
+		spin_lock(e->stats_lock);
+		nbytes = e->bstats->bytes;
+		npackets = e->bstats->packets;
+		rate = (nbytes - e->last_bytes)<<(7 - idx);
+		e->last_bytes = nbytes;
+		e->avbps += ((long)rate - (long)e->avbps) >> e->ewma_log;
+		e->rate_est->bps = (e->avbps+0xF)>>5;
+
+		rate = (npackets - e->last_packets)<<(12 - idx);
+		e->last_packets = npackets;
+		e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log;
+		e->rate_est->pps = (e->avpps+0x1FF)>>10;
+		spin_unlock(e->stats_lock);
+	}
+
+	mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4));
+	read_unlock(&est_lock);
+}
+
+/**
+ * gen_new_estimator - create a new rate estimator
+ * @bstats: basic statistics
+ * @rate_est: rate estimator statistics
+ * @stats_lock: statistics lock
+ * @opt: rate estimator configuration TLV
+ *
+ * Creates a new rate estimator with &bstats as source and &rate_est
+ * as destination. A new timer with the interval specified in the
+ * configuration TLV is created. Upon each interval, the latest statistics
+ * will be read from &bstats and the estimated rate will be stored in
+ * &rate_est with the statistics lock grabed during this period.
+ * 
+ * Returns 0 on success or a negative error code.
+ */
+int gen_new_estimator(struct gnet_stats_basic *bstats,
+	struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, struct rtattr *opt)
+{
+	struct gen_estimator *est;
+	struct gnet_estimator *parm = RTA_DATA(opt);
+
+	if (RTA_PAYLOAD(opt) < sizeof(*parm))
+		return -EINVAL;
+
+	if (parm->interval < -2 || parm->interval > 3)
+		return -EINVAL;
+
+	est = kmalloc(sizeof(*est), GFP_KERNEL);
+	if (est == NULL)
+		return -ENOBUFS;
+
+	memset(est, 0, sizeof(*est));
+	est->interval = parm->interval + 2;
+	est->bstats = bstats;
+	est->rate_est = rate_est;
+	est->stats_lock = stats_lock;
+	est->ewma_log = parm->ewma_log;
+	est->last_bytes = bstats->bytes;
+	est->avbps = rate_est->bps<<5;
+	est->last_packets = bstats->packets;
+	est->avpps = rate_est->pps<<10;
+
+	est->next = elist[est->interval].list;
+	if (est->next == NULL) {
+		init_timer(&elist[est->interval].timer);
+		elist[est->interval].timer.data = est->interval;
+		elist[est->interval].timer.expires = jiffies + ((HZ<<est->interval)/4);
+		elist[est->interval].timer.function = est_timer;
+		add_timer(&elist[est->interval].timer);
+	}
+	write_lock_bh(&est_lock);
+	elist[est->interval].list = est;
+	write_unlock_bh(&est_lock);
+	return 0;
+}
+
+/**
+ * gen_kill_estimator - remove a rate estimator
+ * @bstats: basic statistics
+ * @rate_est: rate estimator statistics
+ *
+ * Removes the rate estimator specified by &bstats and &rate_est
+ * and deletes the timer.
+ */
+void gen_kill_estimator(struct gnet_stats_basic *bstats,
+	struct gnet_stats_rate_est *rate_est)
+{
+	int idx;
+	struct gen_estimator *est, **pest;
+
+	for (idx=0; idx <= EST_MAX_INTERVAL; idx++) {
+		int killed = 0;
+		pest = &elist[idx].list;
+		while ((est=*pest) != NULL) {
+			if (est->rate_est != rate_est || est->bstats != bstats) {
+				pest = &est->next;
+				continue;
+			}
+
+			write_lock_bh(&est_lock);
+			*pest = est->next;
+			write_unlock_bh(&est_lock);
+
+			kfree(est);
+			killed++;
+		}
+		if (killed && elist[idx].list == NULL)
+			del_timer(&elist[idx].timer);
+	}
+}
+
+/**
+ * gen_replace_estimator - replace rate estimator configruation
+ * @bstats: basic statistics
+ * @rate_est: rate estimator statistics
+ * @stats_lock: statistics lock
+ * @opt: rate estimator configuration TLV
+ *
+ * Replaces the configuration of a rate estimator by calling
+ * gen_kill_estimator() and gen_new_estimator().
+ * 
+ * Returns 0 on success or a negative error code.
+ */
+int
+gen_replace_estimator(struct gnet_stats_basic *bstats,
+	struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock,
+	struct rtattr *opt)
+{
+    gen_kill_estimator(bstats, rate_est);
+    return gen_new_estimator(bstats, rate_est, stats_lock, opt);
+}
+    
+
+EXPORT_SYMBOL(gen_kill_estimator);
+EXPORT_SYMBOL(gen_new_estimator);
+EXPORT_SYMBOL(gen_replace_estimator);
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
new file mode 100644
index 00000000000..8f21490355f
--- /dev/null
+++ b/net/core/gen_stats.c
@@ -0,0 +1,239 @@
+/*
+ * net/core/gen_stats.c
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:  Thomas Graf <tgraf@suug.ch>
+ *           Jamal Hadi Salim
+ *           Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * See Documentation/networking/gen_stats.txt
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/socket.h>
+#include <linux/rtnetlink.h>
+#include <linux/gen_stats.h>
+#include <net/gen_stats.h>
+
+
+static inline int
+gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size)
+{
+	RTA_PUT(d->skb, type, size, buf);
+	return 0;
+
+rtattr_failure:
+	spin_unlock_bh(d->lock);
+	return -1;
+}
+
+/**
+ * gnet_stats_start_copy_compat - start dumping procedure in compatibility mode
+ * @skb: socket buffer to put statistics TLVs into
+ * @type: TLV type for top level statistic TLV
+ * @tc_stats_type: TLV type for backward compatibility struct tc_stats TLV
+ * @xstats_type: TLV type for backward compatibility xstats TLV
+ * @lock: statistics lock
+ * @d: dumping handle
+ *
+ * Initializes the dumping handle, grabs the statistic lock and appends
+ * an empty TLV header to the socket buffer for use a container for all
+ * other statistic TLVS.
+ *
+ * The dumping handle is marked to be in backward compatibility mode telling
+ * all gnet_stats_copy_XXX() functions to fill a local copy of struct tc_stats.
+ *
+ * Returns 0 on success or -1 if the room in the socket buffer was not sufficient.
+ */
+int
+gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
+	int xstats_type, spinlock_t *lock, struct gnet_dump *d)
+{
+	memset(d, 0, sizeof(*d));
+	
+	spin_lock_bh(lock);
+	d->lock = lock;
+	if (type)
+		d->tail = (struct rtattr *) skb->tail;
+	d->skb = skb;
+	d->compat_tc_stats = tc_stats_type;
+	d->compat_xstats = xstats_type;
+
+	if (d->tail)
+		return gnet_stats_copy(d, type, NULL, 0);
+
+	return 0;
+}
+
+/**
+ * gnet_stats_start_copy_compat - start dumping procedure in compatibility mode
+ * @skb: socket buffer to put statistics TLVs into
+ * @type: TLV type for top level statistic TLV
+ * @lock: statistics lock
+ * @d: dumping handle
+ *
+ * Initializes the dumping handle, grabs the statistic lock and appends
+ * an empty TLV header to the socket buffer for use a container for all
+ * other statistic TLVS.
+ *
+ * Returns 0 on success or -1 if the room in the socket buffer was not sufficient.
+ */
+int
+gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
+	struct gnet_dump *d)
+{
+	return gnet_stats_start_copy_compat(skb, type, 0, 0, lock, d);
+}
+
+/**
+ * gnet_stats_copy_basic - copy basic statistics into statistic TLV
+ * @d: dumping handle
+ * @b: basic statistics
+ *
+ * Appends the basic statistics to the top level TLV created by
+ * gnet_stats_start_copy().
+ *
+ * Returns 0 on success or -1 with the statistic lock released
+ * if the room in the socket buffer was not sufficient.
+ */
+int
+gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic *b)
+{
+	if (d->compat_tc_stats) {
+		d->tc_stats.bytes = b->bytes;
+		d->tc_stats.packets = b->packets;
+	}
+
+	if (d->tail)
+		return gnet_stats_copy(d, TCA_STATS_BASIC, b, sizeof(*b));
+
+	return 0;
+}
+
+/**
+ * gnet_stats_copy_rate_est - copy rate estimator statistics into statistics TLV
+ * @d: dumping handle
+ * @r: rate estimator statistics
+ *
+ * Appends the rate estimator statistics to the top level TLV created by
+ * gnet_stats_start_copy().
+ *
+ * Returns 0 on success or -1 with the statistic lock released
+ * if the room in the socket buffer was not sufficient.
+ */
+int
+gnet_stats_copy_rate_est(struct gnet_dump *d, struct gnet_stats_rate_est *r)
+{
+	if (d->compat_tc_stats) {
+		d->tc_stats.bps = r->bps;
+		d->tc_stats.pps = r->pps;
+	}
+
+	if (d->tail)
+		return gnet_stats_copy(d, TCA_STATS_RATE_EST, r, sizeof(*r));
+
+	return 0;
+}
+
+/**
+ * gnet_stats_copy_queue - copy queue statistics into statistics TLV
+ * @d: dumping handle
+ * @q: queue statistics
+ *
+ * Appends the queue statistics to the top level TLV created by
+ * gnet_stats_start_copy().
+ *
+ * Returns 0 on success or -1 with the statistic lock released
+ * if the room in the socket buffer was not sufficient.
+ */
+int
+gnet_stats_copy_queue(struct gnet_dump *d, struct gnet_stats_queue *q)
+{
+	if (d->compat_tc_stats) {
+		d->tc_stats.drops = q->drops;
+		d->tc_stats.qlen = q->qlen;
+		d->tc_stats.backlog = q->backlog;
+		d->tc_stats.overlimits = q->overlimits;
+	}
+
+	if (d->tail)
+		return gnet_stats_copy(d, TCA_STATS_QUEUE, q, sizeof(*q));
+
+	return 0;
+}
+
+/**
+ * gnet_stats_copy_app - copy application specific statistics into statistics TLV
+ * @d: dumping handle
+ * @st: application specific statistics data
+ * @len: length of data
+ *
+ * Appends the application sepecific statistics to the top level TLV created by
+ * gnet_stats_start_copy() and remembers the data for XSTATS if the dumping
+ * handle is in backward compatibility mode.
+ *
+ * Returns 0 on success or -1 with the statistic lock released
+ * if the room in the socket buffer was not sufficient.
+ */
+int
+gnet_stats_copy_app(struct gnet_dump *d, void *st, int len)
+{
+	if (d->compat_xstats) {
+		d->xstats = st;
+		d->xstats_len = len;
+	}
+
+	if (d->tail)
+		return gnet_stats_copy(d, TCA_STATS_APP, st, len);
+
+	return 0;
+}
+
+/**
+ * gnet_stats_finish_copy - finish dumping procedure
+ * @d: dumping handle
+ *
+ * Corrects the length of the top level TLV to include all TLVs added
+ * by gnet_stats_copy_XXX() calls. Adds the backward compatibility TLVs
+ * if gnet_stats_start_copy_compat() was used and releases the statistics
+ * lock.
+ *
+ * Returns 0 on success or -1 with the statistic lock released
+ * if the room in the socket buffer was not sufficient.
+ */
+int
+gnet_stats_finish_copy(struct gnet_dump *d)
+{
+	if (d->tail)
+		d->tail->rta_len = d->skb->tail - (u8 *) d->tail;
+
+	if (d->compat_tc_stats)
+		if (gnet_stats_copy(d, d->compat_tc_stats, &d->tc_stats,
+			sizeof(d->tc_stats)) < 0)
+			return -1;
+
+	if (d->compat_xstats && d->xstats) {
+		if (gnet_stats_copy(d, d->compat_xstats, d->xstats,
+			d->xstats_len) < 0)
+			return -1;
+	}
+
+	spin_unlock_bh(d->lock);
+	return 0;
+}
+
+
+EXPORT_SYMBOL(gnet_stats_start_copy);
+EXPORT_SYMBOL(gnet_stats_start_copy_compat);
+EXPORT_SYMBOL(gnet_stats_copy_basic);
+EXPORT_SYMBOL(gnet_stats_copy_rate_est);
+EXPORT_SYMBOL(gnet_stats_copy_queue);
+EXPORT_SYMBOL(gnet_stats_copy_app);
+EXPORT_SYMBOL(gnet_stats_finish_copy);
diff --git a/net/core/iovec.c b/net/core/iovec.c
new file mode 100644
index 00000000000..d57ace949ab
--- /dev/null
+++ b/net/core/iovec.c
@@ -0,0 +1,239 @@
+/*
+ *	iovec manipulation routines.
+ *
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *	Fixes:
+ *		Andrew Lunn	:	Errors in iovec copying.
+ *		Pedro Roque	:	Added memcpy_fromiovecend and
+ *					csum_..._fromiovecend.
+ *		Andi Kleen	:	fixed error handling for 2.1
+ *		Alexey Kuznetsov:	2.1 optimisations
+ *		Andi Kleen	:	Fix csum*fromiovecend for IPv6.
+ */
+
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <asm/uaccess.h>
+#include <asm/byteorder.h>
+#include <net/checksum.h>
+#include <net/sock.h>
+
+/*
+ *	Verify iovec. The caller must ensure that the iovec is big enough
+ *	to hold the message iovec.
+ *
+ *	Save time not doing verify_area. copy_*_user will make this work
+ *	in any case.
+ */
+
+int verify_iovec(struct msghdr *m, struct iovec *iov, char *address, int mode)
+{
+	int size, err, ct;
+	
+	if (m->msg_namelen) {
+		if (mode == VERIFY_READ) {
+			err = move_addr_to_kernel(m->msg_name, m->msg_namelen,
+						  address);
+			if (err < 0)
+				return err;
+		}
+		m->msg_name = address;
+	} else {
+		m->msg_name = NULL;
+	}
+
+	size = m->msg_iovlen * sizeof(struct iovec);
+	if (copy_from_user(iov, m->msg_iov, size))
+		return -EFAULT;
+
+	m->msg_iov = iov;
+	err = 0;
+
+	for (ct = 0; ct < m->msg_iovlen; ct++) {
+		err += iov[ct].iov_len;
+		/*
+		 * Goal is not to verify user data, but to prevent returning
+		 * negative value, which is interpreted as errno.
+		 * Overflow is still possible, but it is harmless.
+		 */
+		if (err < 0)
+			return -EMSGSIZE;
+	}
+
+	return err;
+}
+
+/*
+ *	Copy kernel to iovec. Returns -EFAULT on error.
+ *
+ *	Note: this modifies the original iovec.
+ */
+ 
+int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
+{
+	while (len > 0) {
+		if (iov->iov_len) {
+			int copy = min_t(unsigned int, iov->iov_len, len);
+			if (copy_to_user(iov->iov_base, kdata, copy))
+				return -EFAULT;
+			kdata += copy;
+			len -= copy;
+			iov->iov_len -= copy;
+			iov->iov_base += copy;
+		}
+		iov++;
+	}
+
+	return 0;
+}
+
+/*
+ *	Copy iovec to kernel. Returns -EFAULT on error.
+ *
+ *	Note: this modifies the original iovec.
+ */
+ 
+int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
+{
+	while (len > 0) {
+		if (iov->iov_len) {
+			int copy = min_t(unsigned int, len, iov->iov_len);
+			if (copy_from_user(kdata, iov->iov_base, copy))
+				return -EFAULT;
+			len -= copy;
+			kdata += copy;
+			iov->iov_base += copy;
+			iov->iov_len -= copy;
+		}
+		iov++;
+	}
+
+	return 0;
+}
+
+/*
+ *	For use with ip_build_xmit
+ */
+int memcpy_fromiovecend(unsigned char *kdata, struct iovec *iov, int offset,
+			int len)
+{
+	/* Skip over the finished iovecs */
+	while (offset >= iov->iov_len) {
+		offset -= iov->iov_len;
+		iov++;
+	}
+
+	while (len > 0) {
+		u8 __user *base = iov->iov_base + offset;
+		int copy = min_t(unsigned int, len, iov->iov_len - offset);
+
+		offset = 0;
+		if (copy_from_user(kdata, base, copy))
+			return -EFAULT;
+		len -= copy;
+		kdata += copy;
+		iov++;
+	}
+
+	return 0;
+}
+
+/*
+ *	And now for the all-in-one: copy and checksum from a user iovec
+ *	directly to a datagram
+ *	Calls to csum_partial but the last must be in 32 bit chunks
+ *
+ *	ip_build_xmit must ensure that when fragmenting only the last
+ *	call to this function will be unaligned also.
+ */
+int csum_partial_copy_fromiovecend(unsigned char *kdata, struct iovec *iov,
+				 int offset, unsigned int len, int *csump)
+{
+	int csum = *csump;
+	int partial_cnt = 0, err = 0;
+
+	/* Skip over the finished iovecs */
+	while (offset >= iov->iov_len) {
+		offset -= iov->iov_len;
+		iov++;
+	}
+
+	while (len > 0) {
+		u8 __user *base = iov->iov_base + offset;
+		int copy = min_t(unsigned int, len, iov->iov_len - offset);
+
+		offset = 0;
+
+		/* There is a remnant from previous iov. */
+		if (partial_cnt) {
+			int par_len = 4 - partial_cnt;
+
+			/* iov component is too short ... */
+			if (par_len > copy) {
+				if (copy_from_user(kdata, base, copy))
+					goto out_fault;
+				kdata += copy;
+				base += copy;
+				partial_cnt += copy;
+				len -= copy;
+				iov++;
+				if (len)
+					continue;
+				*csump = csum_partial(kdata - partial_cnt,
+							 partial_cnt, csum);
+				goto out;
+			}
+			if (copy_from_user(kdata, base, par_len))
+				goto out_fault;
+			csum = csum_partial(kdata - partial_cnt, 4, csum);
+			kdata += par_len;
+			base  += par_len;
+			copy  -= par_len;
+			len   -= par_len;
+			partial_cnt = 0;
+		}
+
+		if (len > copy) {
+			partial_cnt = copy % 4;
+			if (partial_cnt) {
+				copy -= partial_cnt;
+				if (copy_from_user(kdata + copy, base + copy,
+				 		partial_cnt))
+					goto out_fault;
+			}
+		}
+
+		if (copy) {
+			csum = csum_and_copy_from_user(base, kdata, copy,
+							csum, &err);
+			if (err)
+				goto out;
+		}
+		len   -= copy + partial_cnt;
+		kdata += copy + partial_cnt;
+		iov++;
+	}
+        *csump = csum;
+out:
+	return err;
+
+out_fault:
+	err = -EFAULT;
+	goto out;
+}
+
+EXPORT_SYMBOL(csum_partial_copy_fromiovecend);
+EXPORT_SYMBOL(memcpy_fromiovec);
+EXPORT_SYMBOL(memcpy_fromiovecend);
+EXPORT_SYMBOL(memcpy_toiovec);
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
new file mode 100644
index 00000000000..4859b7446c6
--- /dev/null
+++ b/net/core/link_watch.c
@@ -0,0 +1,137 @@
+/*
+ * Linux network device link state notification
+ *
+ * Author:
+ *     Stefan Rompf <sux@loplof.de>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/if.h>
+#include <net/sock.h>
+#include <linux/rtnetlink.h>
+#include <linux/jiffies.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <linux/bitops.h>
+#include <asm/types.h>
+
+
+enum lw_bits {
+	LW_RUNNING = 0,
+	LW_SE_USED
+};
+
+static unsigned long linkwatch_flags;
+static unsigned long linkwatch_nextevent;
+
+static void linkwatch_event(void *dummy);
+static DECLARE_WORK(linkwatch_work, linkwatch_event, NULL);
+
+static LIST_HEAD(lweventlist);
+static DEFINE_SPINLOCK(lweventlist_lock);
+
+struct lw_event {
+	struct list_head list;
+	struct net_device *dev;
+};
+
+/* Avoid kmalloc() for most systems */
+static struct lw_event singleevent;
+
+/* Must be called with the rtnl semaphore held */
+void linkwatch_run_queue(void)
+{
+	LIST_HEAD(head);
+	struct list_head *n, *next;
+
+	spin_lock_irq(&lweventlist_lock);
+	list_splice_init(&lweventlist, &head);
+	spin_unlock_irq(&lweventlist_lock);
+
+	list_for_each_safe(n, next, &head) {
+		struct lw_event *event = list_entry(n, struct lw_event, list);
+		struct net_device *dev = event->dev;
+
+		if (event == &singleevent) {
+			clear_bit(LW_SE_USED, &linkwatch_flags);
+		} else {
+			kfree(event);
+		}
+
+		/* We are about to handle this device,
+		 * so new events can be accepted
+		 */
+		clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
+
+		if (dev->flags & IFF_UP) {
+			netdev_state_change(dev);
+		}
+
+		dev_put(dev);
+	}
+}       
+
+
+static void linkwatch_event(void *dummy)
+{
+	/* Limit the number of linkwatch events to one
+	 * per second so that a runaway driver does not
+	 * cause a storm of messages on the netlink
+	 * socket
+	 */	
+	linkwatch_nextevent = jiffies + HZ;
+	clear_bit(LW_RUNNING, &linkwatch_flags);
+
+	rtnl_shlock();
+	linkwatch_run_queue();
+	rtnl_shunlock();
+}
+
+
+void linkwatch_fire_event(struct net_device *dev)
+{
+	if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) {
+		unsigned long flags;
+		struct lw_event *event;
+
+		if (test_and_set_bit(LW_SE_USED, &linkwatch_flags)) {
+			event = kmalloc(sizeof(struct lw_event), GFP_ATOMIC);
+
+			if (unlikely(event == NULL)) {
+				clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
+				return;
+			}
+		} else {
+			event = &singleevent;
+		}
+
+		dev_hold(dev);
+		event->dev = dev;
+
+		spin_lock_irqsave(&lweventlist_lock, flags);
+		list_add_tail(&event->list, &lweventlist);
+		spin_unlock_irqrestore(&lweventlist_lock, flags);
+
+		if (!test_and_set_bit(LW_RUNNING, &linkwatch_flags)) {
+			unsigned long thisevent = jiffies;
+
+			if (thisevent >= linkwatch_nextevent) {
+				schedule_work(&linkwatch_work);
+			} else {
+				schedule_delayed_work(&linkwatch_work, linkwatch_nextevent - thisevent);
+			}
+		}
+	}
+}
+
+EXPORT_SYMBOL(linkwatch_fire_event);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
new file mode 100644
index 00000000000..0a2f67bbef2
--- /dev/null
+++ b/net/core/neighbour.c
@@ -0,0 +1,2362 @@
+/*
+ *	Generic address resolution entity
+ *
+ *	Authors:
+ *	Pedro Roque		<roque@di.fc.ul.pt>
+ *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ *
+ *	Fixes:
+ *	Vitaly E. Lavrov	releasing NULL neighbor in neigh_add.
+ *	Harald Welte		Add neighbour cache statistics like rtstat
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/socket.h>
+#include <linux/sched.h>
+#include <linux/netdevice.h>
+#include <linux/proc_fs.h>
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+#include <linux/times.h>
+#include <net/neighbour.h>
+#include <net/dst.h>
+#include <net/sock.h>
+#include <linux/rtnetlink.h>
+#include <linux/random.h>
+
+#define NEIGH_DEBUG 1
+
+#define NEIGH_PRINTK(x...) printk(x)
+#define NEIGH_NOPRINTK(x...) do { ; } while(0)
+#define NEIGH_PRINTK0 NEIGH_PRINTK
+#define NEIGH_PRINTK1 NEIGH_NOPRINTK
+#define NEIGH_PRINTK2 NEIGH_NOPRINTK
+
+#if NEIGH_DEBUG >= 1
+#undef NEIGH_PRINTK1
+#define NEIGH_PRINTK1 NEIGH_PRINTK
+#endif
+#if NEIGH_DEBUG >= 2
+#undef NEIGH_PRINTK2
+#define NEIGH_PRINTK2 NEIGH_PRINTK
+#endif
+
+#define PNEIGH_HASHMASK		0xF
+
+static void neigh_timer_handler(unsigned long arg);
+#ifdef CONFIG_ARPD
+static void neigh_app_notify(struct neighbour *n);
+#endif
+static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
+void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev);
+
+static struct neigh_table *neigh_tables;
+static struct file_operations neigh_stat_seq_fops;
+
+/*
+   Neighbour hash table buckets are protected with rwlock tbl->lock.
+
+   - All the scans/updates to hash buckets MUST be made under this lock.
+   - NOTHING clever should be made under this lock: no callbacks
+     to protocol backends, no attempts to send something to network.
+     It will result in deadlocks, if backend/driver wants to use neighbour
+     cache.
+   - If the entry requires some non-trivial actions, increase
+     its reference count and release table lock.
+
+   Neighbour entries are protected:
+   - with reference count.
+   - with rwlock neigh->lock
+
+   Reference count prevents destruction.
+
+   neigh->lock mainly serializes ll address data and its validity state.
+   However, the same lock is used to protect another entry fields:
+    - timer
+    - resolution queue
+
+   Again, nothing clever shall be made under neigh->lock,
+   the most complicated procedure, which we allow is dev->hard_header.
+   It is supposed, that dev->hard_header is simplistic and does
+   not make callbacks to neighbour tables.
+
+   The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
+   list of neighbour tables. This list is used only in process context,
+ */
+
+static DEFINE_RWLOCK(neigh_tbl_lock);
+
+static int neigh_blackhole(struct sk_buff *skb)
+{
+	kfree_skb(skb);
+	return -ENETDOWN;
+}
+
+/*
+ * It is random distribution in the interval (1/2)*base...(3/2)*base.
+ * It corresponds to default IPv6 settings and is not overridable,
+ * because it is really reasonable choice.
+ */
+
+unsigned long neigh_rand_reach_time(unsigned long base)
+{
+	return (base ? (net_random() % base) + (base >> 1) : 0);
+}
+
+
+static int neigh_forced_gc(struct neigh_table *tbl)
+{
+	int shrunk = 0;
+	int i;
+
+	NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
+
+	write_lock_bh(&tbl->lock);
+	for (i = 0; i <= tbl->hash_mask; i++) {
+		struct neighbour *n, **np;
+
+		np = &tbl->hash_buckets[i];
+		while ((n = *np) != NULL) {
+			/* Neighbour record may be discarded if:
+			 * - nobody refers to it.
+			 * - it is not permanent
+			 */
+			write_lock(&n->lock);
+			if (atomic_read(&n->refcnt) == 1 &&
+			    !(n->nud_state & NUD_PERMANENT)) {
+				*np	= n->next;
+				n->dead = 1;
+				shrunk	= 1;
+				write_unlock(&n->lock);
+				neigh_release(n);
+				continue;
+			}
+			write_unlock(&n->lock);
+			np = &n->next;
+		}
+	}
+
+	tbl->last_flush = jiffies;
+
+	write_unlock_bh(&tbl->lock);
+
+	return shrunk;
+}
+
+static int neigh_del_timer(struct neighbour *n)
+{
+	if ((n->nud_state & NUD_IN_TIMER) &&
+	    del_timer(&n->timer)) {
+		neigh_release(n);
+		return 1;
+	}
+	return 0;
+}
+
+static void pneigh_queue_purge(struct sk_buff_head *list)
+{
+	struct sk_buff *skb;
+
+	while ((skb = skb_dequeue(list)) != NULL) {
+		dev_put(skb->dev);
+		kfree_skb(skb);
+	}
+}
+
+void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
+{
+	int i;
+
+	write_lock_bh(&tbl->lock);
+
+	for (i=0; i <= tbl->hash_mask; i++) {
+		struct neighbour *n, **np;
+
+		np = &tbl->hash_buckets[i];
+		while ((n = *np) != NULL) {
+			if (dev && n->dev != dev) {
+				np = &n->next;
+				continue;
+			}
+			*np = n->next;
+			write_lock_bh(&n->lock);
+			n->dead = 1;
+			neigh_del_timer(n);
+			write_unlock_bh(&n->lock);
+			neigh_release(n);
+		}
+	}
+
+        write_unlock_bh(&tbl->lock);
+}
+
+int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
+{
+	int i;
+
+	write_lock_bh(&tbl->lock);
+
+	for (i = 0; i <= tbl->hash_mask; i++) {
+		struct neighbour *n, **np = &tbl->hash_buckets[i];
+
+		while ((n = *np) != NULL) {
+			if (dev && n->dev != dev) {
+				np = &n->next;
+				continue;
+			}
+			*np = n->next;
+			write_lock(&n->lock);
+			neigh_del_timer(n);
+			n->dead = 1;
+
+			if (atomic_read(&n->refcnt) != 1) {
+				/* The most unpleasant situation.
+				   We must destroy neighbour entry,
+				   but someone still uses it.
+
+				   The destroy will be delayed until
+				   the last user releases us, but
+				   we must kill timers etc. and move
+				   it to safe state.
+				 */
+				skb_queue_purge(&n->arp_queue);
+				n->output = neigh_blackhole;
+				if (n->nud_state & NUD_VALID)
+					n->nud_state = NUD_NOARP;
+				else
+					n->nud_state = NUD_NONE;
+				NEIGH_PRINTK2("neigh %p is stray.\n", n);
+			}
+			write_unlock(&n->lock);
+			neigh_release(n);
+		}
+	}
+
+	pneigh_ifdown(tbl, dev);
+	write_unlock_bh(&tbl->lock);
+
+	del_timer_sync(&tbl->proxy_timer);
+	pneigh_queue_purge(&tbl->proxy_queue);
+	return 0;
+}
+
+static struct neighbour *neigh_alloc(struct neigh_table *tbl)
+{
+	struct neighbour *n = NULL;
+	unsigned long now = jiffies;
+	int entries;
+
+	entries = atomic_inc_return(&tbl->entries) - 1;
+	if (entries >= tbl->gc_thresh3 ||
+	    (entries >= tbl->gc_thresh2 &&
+	     time_after(now, tbl->last_flush + 5 * HZ))) {
+		if (!neigh_forced_gc(tbl) &&
+		    entries >= tbl->gc_thresh3)
+			goto out_entries;
+	}
+
+	n = kmem_cache_alloc(tbl->kmem_cachep, SLAB_ATOMIC);
+	if (!n)
+		goto out_entries;
+
+	memset(n, 0, tbl->entry_size);
+
+	skb_queue_head_init(&n->arp_queue);
+	rwlock_init(&n->lock);
+	n->updated	  = n->used = now;
+	n->nud_state	  = NUD_NONE;
+	n->output	  = neigh_blackhole;
+	n->parms	  = neigh_parms_clone(&tbl->parms);
+	init_timer(&n->timer);
+	n->timer.function = neigh_timer_handler;
+	n->timer.data	  = (unsigned long)n;
+
+	NEIGH_CACHE_STAT_INC(tbl, allocs);
+	n->tbl		  = tbl;
+	atomic_set(&n->refcnt, 1);
+	n->dead		  = 1;
+out:
+	return n;
+
+out_entries:
+	atomic_dec(&tbl->entries);
+	goto out;
+}
+
+static struct neighbour **neigh_hash_alloc(unsigned int entries)
+{
+	unsigned long size = entries * sizeof(struct neighbour *);
+	struct neighbour **ret;
+
+	if (size <= PAGE_SIZE) {
+		ret = kmalloc(size, GFP_ATOMIC);
+	} else {
+		ret = (struct neighbour **)
+			__get_free_pages(GFP_ATOMIC, get_order(size));
+	}
+	if (ret)
+		memset(ret, 0, size);
+
+	return ret;
+}
+
+static void neigh_hash_free(struct neighbour **hash, unsigned int entries)
+{
+	unsigned long size = entries * sizeof(struct neighbour *);
+
+	if (size <= PAGE_SIZE)
+		kfree(hash);
+	else
+		free_pages((unsigned long)hash, get_order(size));
+}
+
+static void neigh_hash_grow(struct neigh_table *tbl, unsigned long new_entries)
+{
+	struct neighbour **new_hash, **old_hash;
+	unsigned int i, new_hash_mask, old_entries;
+
+	NEIGH_CACHE_STAT_INC(tbl, hash_grows);
+
+	BUG_ON(new_entries & (new_entries - 1));
+	new_hash = neigh_hash_alloc(new_entries);
+	if (!new_hash)
+		return;
+
+	old_entries = tbl->hash_mask + 1;
+	new_hash_mask = new_entries - 1;
+	old_hash = tbl->hash_buckets;
+
+	get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
+	for (i = 0; i < old_entries; i++) {
+		struct neighbour *n, *next;
+
+		for (n = old_hash[i]; n; n = next) {
+			unsigned int hash_val = tbl->hash(n->primary_key, n->dev);
+
+			hash_val &= new_hash_mask;
+			next = n->next;
+
+			n->next = new_hash[hash_val];
+			new_hash[hash_val] = n;
+		}
+	}
+	tbl->hash_buckets = new_hash;
+	tbl->hash_mask = new_hash_mask;
+
+	neigh_hash_free(old_hash, old_entries);
+}
+
+struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
+			       struct net_device *dev)
+{
+	struct neighbour *n;
+	int key_len = tbl->key_len;
+	u32 hash_val = tbl->hash(pkey, dev) & tbl->hash_mask;
+	
+	NEIGH_CACHE_STAT_INC(tbl, lookups);
+
+	read_lock_bh(&tbl->lock);
+	for (n = tbl->hash_buckets[hash_val]; n; n = n->next) {
+		if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
+			neigh_hold(n);
+			NEIGH_CACHE_STAT_INC(tbl, hits);
+			break;
+		}
+	}
+	read_unlock_bh(&tbl->lock);
+	return n;
+}
+
+struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey)
+{
+	struct neighbour *n;
+	int key_len = tbl->key_len;
+	u32 hash_val = tbl->hash(pkey, NULL) & tbl->hash_mask;
+
+	NEIGH_CACHE_STAT_INC(tbl, lookups);
+
+	read_lock_bh(&tbl->lock);
+	for (n = tbl->hash_buckets[hash_val]; n; n = n->next) {
+		if (!memcmp(n->primary_key, pkey, key_len)) {
+			neigh_hold(n);
+			NEIGH_CACHE_STAT_INC(tbl, hits);
+			break;
+		}
+	}
+	read_unlock_bh(&tbl->lock);
+	return n;
+}
+
+struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
+			       struct net_device *dev)
+{
+	u32 hash_val;
+	int key_len = tbl->key_len;
+	int error;
+	struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
+
+	if (!n) {
+		rc = ERR_PTR(-ENOBUFS);
+		goto out;
+	}
+
+	memcpy(n->primary_key, pkey, key_len);
+	n->dev = dev;
+	dev_hold(dev);
+
+	/* Protocol specific setup. */
+	if (tbl->constructor &&	(error = tbl->constructor(n)) < 0) {
+		rc = ERR_PTR(error);
+		goto out_neigh_release;
+	}
+
+	/* Device specific setup. */
+	if (n->parms->neigh_setup &&
+	    (error = n->parms->neigh_setup(n)) < 0) {
+		rc = ERR_PTR(error);
+		goto out_neigh_release;
+	}
+
+	n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
+
+	write_lock_bh(&tbl->lock);
+
+	if (atomic_read(&tbl->entries) > (tbl->hash_mask + 1))
+		neigh_hash_grow(tbl, (tbl->hash_mask + 1) << 1);
+
+	hash_val = tbl->hash(pkey, dev) & tbl->hash_mask;
+
+	if (n->parms->dead) {
+		rc = ERR_PTR(-EINVAL);
+		goto out_tbl_unlock;
+	}
+
+	for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) {
+		if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
+			neigh_hold(n1);
+			rc = n1;
+			goto out_tbl_unlock;
+		}
+	}
+
+	n->next = tbl->hash_buckets[hash_val];
+	tbl->hash_buckets[hash_val] = n;
+	n->dead = 0;
+	neigh_hold(n);
+	write_unlock_bh(&tbl->lock);
+	NEIGH_PRINTK2("neigh %p is created.\n", n);
+	rc = n;
+out:
+	return rc;
+out_tbl_unlock:
+	write_unlock_bh(&tbl->lock);
+out_neigh_release:
+	neigh_release(n);
+	goto out;
+}
+
+struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
+				    struct net_device *dev, int creat)
+{
+	struct pneigh_entry *n;
+	int key_len = tbl->key_len;
+	u32 hash_val = *(u32 *)(pkey + key_len - 4);
+
+	hash_val ^= (hash_val >> 16);
+	hash_val ^= hash_val >> 8;
+	hash_val ^= hash_val >> 4;
+	hash_val &= PNEIGH_HASHMASK;
+
+	read_lock_bh(&tbl->lock);
+
+	for (n = tbl->phash_buckets[hash_val]; n; n = n->next) {
+		if (!memcmp(n->key, pkey, key_len) &&
+		    (n->dev == dev || !n->dev)) {
+			read_unlock_bh(&tbl->lock);
+			goto out;
+		}
+	}
+	read_unlock_bh(&tbl->lock);
+	n = NULL;
+	if (!creat)
+		goto out;
+
+	n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
+	if (!n)
+		goto out;
+
+	memcpy(n->key, pkey, key_len);
+	n->dev = dev;
+	if (dev)
+		dev_hold(dev);
+
+	if (tbl->pconstructor && tbl->pconstructor(n)) {
+		if (dev)
+			dev_put(dev);
+		kfree(n);
+		n = NULL;
+		goto out;
+	}
+
+	write_lock_bh(&tbl->lock);
+	n->next = tbl->phash_buckets[hash_val];
+	tbl->phash_buckets[hash_val] = n;
+	write_unlock_bh(&tbl->lock);
+out:
+	return n;
+}
+
+
+int pneigh_delete(struct neigh_table *tbl, const void *pkey,
+		  struct net_device *dev)
+{
+	struct pneigh_entry *n, **np;
+	int key_len = tbl->key_len;
+	u32 hash_val = *(u32 *)(pkey + key_len - 4);
+
+	hash_val ^= (hash_val >> 16);
+	hash_val ^= hash_val >> 8;
+	hash_val ^= hash_val >> 4;
+	hash_val &= PNEIGH_HASHMASK;
+
+	write_lock_bh(&tbl->lock);
+	for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
+	     np = &n->next) {
+		if (!memcmp(n->key, pkey, key_len) && n->dev == dev) {
+			*np = n->next;
+			write_unlock_bh(&tbl->lock);
+			if (tbl->pdestructor)
+				tbl->pdestructor(n);
+			if (n->dev)
+				dev_put(n->dev);
+			kfree(n);
+			return 0;
+		}
+	}
+	write_unlock_bh(&tbl->lock);
+	return -ENOENT;
+}
+
+static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
+{
+	struct pneigh_entry *n, **np;
+	u32 h;
+
+	for (h = 0; h <= PNEIGH_HASHMASK; h++) {
+		np = &tbl->phash_buckets[h];
+		while ((n = *np) != NULL) {
+			if (!dev || n->dev == dev) {
+				*np = n->next;
+				if (tbl->pdestructor)
+					tbl->pdestructor(n);
+				if (n->dev)
+					dev_put(n->dev);
+				kfree(n);
+				continue;
+			}
+			np = &n->next;
+		}
+	}
+	return -ENOENT;
+}
+
+
+/*
+ *	neighbour must already be out of the table;
+ *
+ */
+void neigh_destroy(struct neighbour *neigh)
+{
+	struct hh_cache *hh;
+
+	NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
+
+	if (!neigh->dead) {
+		printk(KERN_WARNING
+		       "Destroying alive neighbour %p\n", neigh);
+		dump_stack();
+		return;
+	}
+
+	if (neigh_del_timer(neigh))
+		printk(KERN_WARNING "Impossible event.\n");
+
+	while ((hh = neigh->hh) != NULL) {
+		neigh->hh = hh->hh_next;
+		hh->hh_next = NULL;
+		write_lock_bh(&hh->hh_lock);
+		hh->hh_output = neigh_blackhole;
+		write_unlock_bh(&hh->hh_lock);
+		if (atomic_dec_and_test(&hh->hh_refcnt))
+			kfree(hh);
+	}
+
+	if (neigh->ops && neigh->ops->destructor)
+		(neigh->ops->destructor)(neigh);
+
+	skb_queue_purge(&neigh->arp_queue);
+
+	dev_put(neigh->dev);
+	neigh_parms_put(neigh->parms);
+
+	NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
+
+	atomic_dec(&neigh->tbl->entries);
+	kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
+}
+
+/* Neighbour state is suspicious;
+   disable fast path.
+
+   Called with write_locked neigh.
+ */
+static void neigh_suspect(struct neighbour *neigh)
+{
+	struct hh_cache *hh;
+
+	NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
+
+	neigh->output = neigh->ops->output;
+
+	for (hh = neigh->hh; hh; hh = hh->hh_next)
+		hh->hh_output = neigh->ops->output;
+}
+
+/* Neighbour state is OK;
+   enable fast path.
+
+   Called with write_locked neigh.
+ */
+static void neigh_connect(struct neighbour *neigh)
+{
+	struct hh_cache *hh;
+
+	NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
+
+	neigh->output = neigh->ops->connected_output;
+
+	for (hh = neigh->hh; hh; hh = hh->hh_next)
+		hh->hh_output = neigh->ops->hh_output;
+}
+
+static void neigh_periodic_timer(unsigned long arg)
+{
+	struct neigh_table *tbl = (struct neigh_table *)arg;
+	struct neighbour *n, **np;
+	unsigned long expire, now = jiffies;
+
+	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
+
+	write_lock(&tbl->lock);
+
+	/*
+	 *	periodically recompute ReachableTime from random function
+	 */
+
+	if (time_after(now, tbl->last_rand + 300 * HZ)) {
+		struct neigh_parms *p;
+		tbl->last_rand = now;
+		for (p = &tbl->parms; p; p = p->next)
+			p->reachable_time =
+				neigh_rand_reach_time(p->base_reachable_time);
+	}
+
+	np = &tbl->hash_buckets[tbl->hash_chain_gc];
+	tbl->hash_chain_gc = ((tbl->hash_chain_gc + 1) & tbl->hash_mask);
+
+	while ((n = *np) != NULL) {
+		unsigned int state;
+
+		write_lock(&n->lock);
+
+		state = n->nud_state;
+		if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
+			write_unlock(&n->lock);
+			goto next_elt;
+		}
+
+		if (time_before(n->used, n->confirmed))
+			n->used = n->confirmed;
+
+		if (atomic_read(&n->refcnt) == 1 &&
+		    (state == NUD_FAILED ||
+		     time_after(now, n->used + n->parms->gc_staletime))) {
+			*np = n->next;
+			n->dead = 1;
+			write_unlock(&n->lock);
+			neigh_release(n);
+			continue;
+		}
+		write_unlock(&n->lock);
+
+next_elt:
+		np = &n->next;
+	}
+
+ 	/* Cycle through all hash buckets every base_reachable_time/2 ticks.
+ 	 * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
+ 	 * base_reachable_time.
+	 */
+	expire = tbl->parms.base_reachable_time >> 1;
+	expire /= (tbl->hash_mask + 1);
+	if (!expire)
+		expire = 1;
+
+ 	mod_timer(&tbl->gc_timer, now + expire);
+
+	write_unlock(&tbl->lock);
+}
+
+static __inline__ int neigh_max_probes(struct neighbour *n)
+{
+	struct neigh_parms *p = n->parms;
+	return (n->nud_state & NUD_PROBE ?
+		p->ucast_probes :
+		p->ucast_probes + p->app_probes + p->mcast_probes);
+}
+
+
+/* Called when a timer expires for a neighbour entry. */
+
+static void neigh_timer_handler(unsigned long arg)
+{
+	unsigned long now, next;
+	struct neighbour *neigh = (struct neighbour *)arg;
+	unsigned state;
+	int notify = 0;
+
+	write_lock(&neigh->lock);
+
+	state = neigh->nud_state;
+	now = jiffies;
+	next = now + HZ;
+
+	if (!(state & NUD_IN_TIMER)) {
+#ifndef CONFIG_SMP
+		printk(KERN_WARNING "neigh: timer & !nud_in_timer\n");
+#endif
+		goto out;
+	}
+
+	if (state & NUD_REACHABLE) {
+		if (time_before_eq(now, 
+				   neigh->confirmed + neigh->parms->reachable_time)) {
+			NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
+			next = neigh->confirmed + neigh->parms->reachable_time;
+		} else if (time_before_eq(now,
+					  neigh->used + neigh->parms->delay_probe_time)) {
+			NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
+			neigh->nud_state = NUD_DELAY;
+			neigh_suspect(neigh);
+			next = now + neigh->parms->delay_probe_time;
+		} else {
+			NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
+			neigh->nud_state = NUD_STALE;
+			neigh_suspect(neigh);
+		}
+	} else if (state & NUD_DELAY) {
+		if (time_before_eq(now, 
+				   neigh->confirmed + neigh->parms->delay_probe_time)) {
+			NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
+			neigh->nud_state = NUD_REACHABLE;
+			neigh_connect(neigh);
+			next = neigh->confirmed + neigh->parms->reachable_time;
+		} else {
+			NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
+			neigh->nud_state = NUD_PROBE;
+			atomic_set(&neigh->probes, 0);
+			next = now + neigh->parms->retrans_time;
+		}
+	} else {
+		/* NUD_PROBE|NUD_INCOMPLETE */
+		next = now + neigh->parms->retrans_time;
+	}
+
+	if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
+	    atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
+		struct sk_buff *skb;
+
+		neigh->nud_state = NUD_FAILED;
+		notify = 1;
+		NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
+		NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
+
+		/* It is very thin place. report_unreachable is very complicated
+		   routine. Particularly, it can hit the same neighbour entry!
+
+		   So that, we try to be accurate and avoid dead loop. --ANK
+		 */
+		while (neigh->nud_state == NUD_FAILED &&
+		       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
+			write_unlock(&neigh->lock);
+			neigh->ops->error_report(neigh, skb);
+			write_lock(&neigh->lock);
+		}
+		skb_queue_purge(&neigh->arp_queue);
+	}
+
+	if (neigh->nud_state & NUD_IN_TIMER) {
+		neigh_hold(neigh);
+		if (time_before(next, jiffies + HZ/2))
+			next = jiffies + HZ/2;
+		neigh->timer.expires = next;
+		add_timer(&neigh->timer);
+	}
+	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
+		struct sk_buff *skb = skb_peek(&neigh->arp_queue);
+		/* keep skb alive even if arp_queue overflows */
+		if (skb)
+			skb_get(skb);
+		write_unlock(&neigh->lock);
+		neigh->ops->solicit(neigh, skb);
+		atomic_inc(&neigh->probes);
+		if (skb)
+			kfree_skb(skb);
+	} else {
+out:
+		write_unlock(&neigh->lock);
+	}
+
+#ifdef CONFIG_ARPD
+	if (notify && neigh->parms->app_probes)
+		neigh_app_notify(neigh);
+#endif
+	neigh_release(neigh);
+}
+
+int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
+{
+	int rc;
+	unsigned long now;
+
+	write_lock_bh(&neigh->lock);
+
+	rc = 0;
+	if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
+		goto out_unlock_bh;
+
+	now = jiffies;
+	
+	if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
+		if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
+			atomic_set(&neigh->probes, neigh->parms->ucast_probes);
+			neigh->nud_state     = NUD_INCOMPLETE;
+			neigh_hold(neigh);
+			neigh->timer.expires = now + 1;
+			add_timer(&neigh->timer);
+		} else {
+			neigh->nud_state = NUD_FAILED;
+			write_unlock_bh(&neigh->lock);
+
+			if (skb)
+				kfree_skb(skb);
+			return 1;
+		}
+	} else if (neigh->nud_state & NUD_STALE) {
+		NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
+		neigh_hold(neigh);
+		neigh->nud_state = NUD_DELAY;
+		neigh->timer.expires = jiffies + neigh->parms->delay_probe_time;
+		add_timer(&neigh->timer);
+	}
+
+	if (neigh->nud_state == NUD_INCOMPLETE) {
+		if (skb) {
+			if (skb_queue_len(&neigh->arp_queue) >=
+			    neigh->parms->queue_len) {
+				struct sk_buff *buff;
+				buff = neigh->arp_queue.next;
+				__skb_unlink(buff, &neigh->arp_queue);
+				kfree_skb(buff);
+			}
+			__skb_queue_tail(&neigh->arp_queue, skb);
+		}
+		rc = 1;
+	}
+out_unlock_bh:
+	write_unlock_bh(&neigh->lock);
+	return rc;
+}
+
+static __inline__ void neigh_update_hhs(struct neighbour *neigh)
+{
+	struct hh_cache *hh;
+	void (*update)(struct hh_cache*, struct net_device*, unsigned char *) =
+		neigh->dev->header_cache_update;
+
+	if (update) {
+		for (hh = neigh->hh; hh; hh = hh->hh_next) {
+			write_lock_bh(&hh->hh_lock);
+			update(hh, neigh->dev, neigh->ha);
+			write_unlock_bh(&hh->hh_lock);
+		}
+	}
+}
+
+
+
+/* Generic update routine.
+   -- lladdr is new lladdr or NULL, if it is not supplied.
+   -- new    is new state.
+   -- flags
+	NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
+				if it is different.
+	NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
+				lladdr instead of overriding it 
+				if it is different.
+				It also allows to retain current state
+				if lladdr is unchanged.
+	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.
+
+	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing 
+				NTF_ROUTER flag.
+	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
+				a router.
+
+   Caller MUST hold reference count on the entry.
+ */
+
+int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
+		 u32 flags)
+{
+	u8 old;
+	int err;
+#ifdef CONFIG_ARPD
+	int notify = 0;
+#endif
+	struct net_device *dev;
+	int update_isrouter = 0;
+
+	write_lock_bh(&neigh->lock);
+
+	dev    = neigh->dev;
+	old    = neigh->nud_state;
+	err    = -EPERM;
+
+	if (!(flags & NEIGH_UPDATE_F_ADMIN) && 
+	    (old & (NUD_NOARP | NUD_PERMANENT)))
+		goto out;
+
+	if (!(new & NUD_VALID)) {
+		neigh_del_timer(neigh);
+		if (old & NUD_CONNECTED)
+			neigh_suspect(neigh);
+		neigh->nud_state = new;
+		err = 0;
+#ifdef CONFIG_ARPD
+		notify = old & NUD_VALID;
+#endif
+		goto out;
+	}
+
+	/* Compare new lladdr with cached one */
+	if (!dev->addr_len) {
+		/* First case: device needs no address. */
+		lladdr = neigh->ha;
+	} else if (lladdr) {
+		/* The second case: if something is already cached
+		   and a new address is proposed:
+		   - compare new & old
+		   - if they are different, check override flag
+		 */
+		if ((old & NUD_VALID) && 
+		    !memcmp(lladdr, neigh->ha, dev->addr_len))
+			lladdr = neigh->ha;
+	} else {
+		/* No address is supplied; if we know something,
+		   use it, otherwise discard the request.
+		 */
+		err = -EINVAL;
+		if (!(old & NUD_VALID))
+			goto out;
+		lladdr = neigh->ha;
+	}
+
+	if (new & NUD_CONNECTED)
+		neigh->confirmed = jiffies;
+	neigh->updated = jiffies;
+
+	/* If entry was valid and address is not changed,
+	   do not change entry state, if new one is STALE.
+	 */
+	err = 0;
+	update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
+	if (old & NUD_VALID) {
+		if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
+			update_isrouter = 0;
+			if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
+			    (old & NUD_CONNECTED)) {
+				lladdr = neigh->ha;
+				new = NUD_STALE;
+			} else
+				goto out;
+		} else {
+			if (lladdr == neigh->ha && new == NUD_STALE &&
+			    ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
+			     (old & NUD_CONNECTED))
+			    )
+				new = old;
+		}
+	}
+
+	if (new != old) {
+		neigh_del_timer(neigh);
+		if (new & NUD_IN_TIMER) {
+			neigh_hold(neigh);
+			neigh->timer.expires = jiffies + 
+						((new & NUD_REACHABLE) ? 
+						 neigh->parms->reachable_time : 0);
+			add_timer(&neigh->timer);
+		}
+		neigh->nud_state = new;
+	}
+
+	if (lladdr != neigh->ha) {
+		memcpy(&neigh->ha, lladdr, dev->addr_len);
+		neigh_update_hhs(neigh);
+		if (!(new & NUD_CONNECTED))
+			neigh->confirmed = jiffies -
+				      (neigh->parms->base_reachable_time << 1);
+#ifdef CONFIG_ARPD
+		notify = 1;
+#endif
+	}
+	if (new == old)
+		goto out;
+	if (new & NUD_CONNECTED)
+		neigh_connect(neigh);
+	else
+		neigh_suspect(neigh);
+	if (!(old & NUD_VALID)) {
+		struct sk_buff *skb;
+
+		/* Again: avoid dead loop if something went wrong */
+
+		while (neigh->nud_state & NUD_VALID &&
+		       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
+			struct neighbour *n1 = neigh;
+			write_unlock_bh(&neigh->lock);
+			/* On shaper/eql skb->dst->neighbour != neigh :( */
+			if (skb->dst && skb->dst->neighbour)
+				n1 = skb->dst->neighbour;
+			n1->output(skb);
+			write_lock_bh(&neigh->lock);
+		}
+		skb_queue_purge(&neigh->arp_queue);
+	}
+out:
+	if (update_isrouter) {
+		neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
+			(neigh->flags | NTF_ROUTER) :
+			(neigh->flags & ~NTF_ROUTER);
+	}
+	write_unlock_bh(&neigh->lock);
+#ifdef CONFIG_ARPD
+	if (notify && neigh->parms->app_probes)
+		neigh_app_notify(neigh);
+#endif
+	return err;
+}
+
+struct neighbour *neigh_event_ns(struct neigh_table *tbl,
+				 u8 *lladdr, void *saddr,
+				 struct net_device *dev)
+{
+	struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
+						 lladdr || !dev->addr_len);
+	if (neigh)
+		neigh_update(neigh, lladdr, NUD_STALE, 
+			     NEIGH_UPDATE_F_OVERRIDE);
+	return neigh;
+}
+
+static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
+			  u16 protocol)
+{
+	struct hh_cache	*hh;
+	struct net_device *dev = dst->dev;
+
+	for (hh = n->hh; hh; hh = hh->hh_next)
+		if (hh->hh_type == protocol)
+			break;
+
+	if (!hh && (hh = kmalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) {
+		memset(hh, 0, sizeof(struct hh_cache));
+		rwlock_init(&hh->hh_lock);
+		hh->hh_type = protocol;
+		atomic_set(&hh->hh_refcnt, 0);
+		hh->hh_next = NULL;
+		if (dev->hard_header_cache(n, hh)) {
+			kfree(hh);
+			hh = NULL;
+		} else {
+			atomic_inc(&hh->hh_refcnt);
+			hh->hh_next = n->hh;
+			n->hh	    = hh;
+			if (n->nud_state & NUD_CONNECTED)
+				hh->hh_output = n->ops->hh_output;
+			else
+				hh->hh_output = n->ops->output;
+		}
+	}
+	if (hh)	{
+		atomic_inc(&hh->hh_refcnt);
+		dst->hh = hh;
+	}
+}
+
+/* This function can be used in contexts, where only old dev_queue_xmit
+   worked, f.e. if you want to override normal output path (eql, shaper),
+   but resolution is not made yet.
+ */
+
+int neigh_compat_output(struct sk_buff *skb)
+{
+	struct net_device *dev = skb->dev;
+
+	__skb_pull(skb, skb->nh.raw - skb->data);
+
+	if (dev->hard_header &&
+	    dev->hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
+		    	     skb->len) < 0 &&
+	    dev->rebuild_header(skb))
+		return 0;
+
+	return dev_queue_xmit(skb);
+}
+
+/* Slow and careful. */
+
+int neigh_resolve_output(struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb->dst;
+	struct neighbour *neigh;
+	int rc = 0;
+
+	if (!dst || !(neigh = dst->neighbour))
+		goto discard;
+
+	__skb_pull(skb, skb->nh.raw - skb->data);
+
+	if (!neigh_event_send(neigh, skb)) {
+		int err;
+		struct net_device *dev = neigh->dev;
+		if (dev->hard_header_cache && !dst->hh) {
+			write_lock_bh(&neigh->lock);
+			if (!dst->hh)
+				neigh_hh_init(neigh, dst, dst->ops->protocol);
+			err = dev->hard_header(skb, dev, ntohs(skb->protocol),
+					       neigh->ha, NULL, skb->len);
+			write_unlock_bh(&neigh->lock);
+		} else {
+			read_lock_bh(&neigh->lock);
+			err = dev->hard_header(skb, dev, ntohs(skb->protocol),
+					       neigh->ha, NULL, skb->len);
+			read_unlock_bh(&neigh->lock);
+		}
+		if (err >= 0)
+			rc = neigh->ops->queue_xmit(skb);
+		else
+			goto out_kfree_skb;
+	}
+out:
+	return rc;
+discard:
+	NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
+		      dst, dst ? dst->neighbour : NULL);
+out_kfree_skb:
+	rc = -EINVAL;
+	kfree_skb(skb);
+	goto out;
+}
+
+/* As fast as possible without hh cache */
+
+int neigh_connected_output(struct sk_buff *skb)
+{
+	int err;
+	struct dst_entry *dst = skb->dst;
+	struct neighbour *neigh = dst->neighbour;
+	struct net_device *dev = neigh->dev;
+
+	__skb_pull(skb, skb->nh.raw - skb->data);
+
+	read_lock_bh(&neigh->lock);
+	err = dev->hard_header(skb, dev, ntohs(skb->protocol),
+			       neigh->ha, NULL, skb->len);
+	read_unlock_bh(&neigh->lock);
+	if (err >= 0)
+		err = neigh->ops->queue_xmit(skb);
+	else {
+		err = -EINVAL;
+		kfree_skb(skb);
+	}
+	return err;
+}
+
+static void neigh_proxy_process(unsigned long arg)
+{
+	struct neigh_table *tbl = (struct neigh_table *)arg;
+	long sched_next = 0;
+	unsigned long now = jiffies;
+	struct sk_buff *skb;
+
+	spin_lock(&tbl->proxy_queue.lock);
+
+	skb = tbl->proxy_queue.next;
+
+	while (skb != (struct sk_buff *)&tbl->proxy_queue) {
+		struct sk_buff *back = skb;
+		long tdif = back->stamp.tv_usec - now;
+
+		skb = skb->next;
+		if (tdif <= 0) {
+			struct net_device *dev = back->dev;
+			__skb_unlink(back, &tbl->proxy_queue);
+			if (tbl->proxy_redo && netif_running(dev))
+				tbl->proxy_redo(back);
+			else
+				kfree_skb(back);
+
+			dev_put(dev);
+		} else if (!sched_next || tdif < sched_next)
+			sched_next = tdif;
+	}
+	del_timer(&tbl->proxy_timer);
+	if (sched_next)
+		mod_timer(&tbl->proxy_timer, jiffies + sched_next);
+	spin_unlock(&tbl->proxy_queue.lock);
+}
+
+void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
+		    struct sk_buff *skb)
+{
+	unsigned long now = jiffies;
+	unsigned long sched_next = now + (net_random() % p->proxy_delay);
+
+	if (tbl->proxy_queue.qlen > p->proxy_qlen) {
+		kfree_skb(skb);
+		return;
+	}
+	skb->stamp.tv_sec  = LOCALLY_ENQUEUED;
+	skb->stamp.tv_usec = sched_next;
+
+	spin_lock(&tbl->proxy_queue.lock);
+	if (del_timer(&tbl->proxy_timer)) {
+		if (time_before(tbl->proxy_timer.expires, sched_next))
+			sched_next = tbl->proxy_timer.expires;
+	}
+	dst_release(skb->dst);
+	skb->dst = NULL;
+	dev_hold(skb->dev);
+	__skb_queue_tail(&tbl->proxy_queue, skb);
+	mod_timer(&tbl->proxy_timer, sched_next);
+	spin_unlock(&tbl->proxy_queue.lock);
+}
+
+
+struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
+				      struct neigh_table *tbl)
+{
+	struct neigh_parms *p = kmalloc(sizeof(*p), GFP_KERNEL);
+
+	if (p) {
+		memcpy(p, &tbl->parms, sizeof(*p));
+		p->tbl		  = tbl;
+		atomic_set(&p->refcnt, 1);
+		INIT_RCU_HEAD(&p->rcu_head);
+		p->reachable_time =
+				neigh_rand_reach_time(p->base_reachable_time);
+		if (dev && dev->neigh_setup && dev->neigh_setup(dev, p)) {
+			kfree(p);
+			return NULL;
+		}
+		p->sysctl_table = NULL;
+		write_lock_bh(&tbl->lock);
+		p->next		= tbl->parms.next;
+		tbl->parms.next = p;
+		write_unlock_bh(&tbl->lock);
+	}
+	return p;
+}
+
+static void neigh_rcu_free_parms(struct rcu_head *head)
+{
+	struct neigh_parms *parms =
+		container_of(head, struct neigh_parms, rcu_head);
+
+	neigh_parms_put(parms);
+}
+
+void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
+{
+	struct neigh_parms **p;
+
+	if (!parms || parms == &tbl->parms)
+		return;
+	write_lock_bh(&tbl->lock);
+	for (p = &tbl->parms.next; *p; p = &(*p)->next) {
+		if (*p == parms) {
+			*p = parms->next;
+			parms->dead = 1;
+			write_unlock_bh(&tbl->lock);
+			call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
+			return;
+		}
+	}
+	write_unlock_bh(&tbl->lock);
+	NEIGH_PRINTK1("neigh_parms_release: not found\n");
+}
+
+void neigh_parms_destroy(struct neigh_parms *parms)
+{
+	kfree(parms);
+}
+
+
+void neigh_table_init(struct neigh_table *tbl)
+{
+	unsigned long now = jiffies;
+	unsigned long phsize;
+
+	atomic_set(&tbl->parms.refcnt, 1);
+	INIT_RCU_HEAD(&tbl->parms.rcu_head);
+	tbl->parms.reachable_time =
+			  neigh_rand_reach_time(tbl->parms.base_reachable_time);
+
+	if (!tbl->kmem_cachep)
+		tbl->kmem_cachep = kmem_cache_create(tbl->id,
+						     tbl->entry_size,
+						     0, SLAB_HWCACHE_ALIGN,
+						     NULL, NULL);
+
+	if (!tbl->kmem_cachep)
+		panic("cannot create neighbour cache");
+
+	tbl->stats = alloc_percpu(struct neigh_statistics);
+	if (!tbl->stats)
+		panic("cannot create neighbour cache statistics");
+	
+#ifdef CONFIG_PROC_FS
+	tbl->pde = create_proc_entry(tbl->id, 0, proc_net_stat);
+	if (!tbl->pde) 
+		panic("cannot create neighbour proc dir entry");
+	tbl->pde->proc_fops = &neigh_stat_seq_fops;
+	tbl->pde->data = tbl;
+#endif
+
+	tbl->hash_mask = 1;
+	tbl->hash_buckets = neigh_hash_alloc(tbl->hash_mask + 1);
+
+	phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
+	tbl->phash_buckets = kmalloc(phsize, GFP_KERNEL);
+
+	if (!tbl->hash_buckets || !tbl->phash_buckets)
+		panic("cannot allocate neighbour cache hashes");
+
+	memset(tbl->phash_buckets, 0, phsize);
+
+	get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
+
+	rwlock_init(&tbl->lock);
+	init_timer(&tbl->gc_timer);
+	tbl->gc_timer.data     = (unsigned long)tbl;
+	tbl->gc_timer.function = neigh_periodic_timer;
+	tbl->gc_timer.expires  = now + 1;
+	add_timer(&tbl->gc_timer);
+
+	init_timer(&tbl->proxy_timer);
+	tbl->proxy_timer.data	  = (unsigned long)tbl;
+	tbl->proxy_timer.function = neigh_proxy_process;
+	skb_queue_head_init(&tbl->proxy_queue);
+
+	tbl->last_flush = now;
+	tbl->last_rand	= now + tbl->parms.reachable_time * 20;
+	write_lock(&neigh_tbl_lock);
+	tbl->next	= neigh_tables;
+	neigh_tables	= tbl;
+	write_unlock(&neigh_tbl_lock);
+}
+
+int neigh_table_clear(struct neigh_table *tbl)
+{
+	struct neigh_table **tp;
+
+	/* It is not clean... Fix it to unload IPv6 module safely */
+	del_timer_sync(&tbl->gc_timer);
+	del_timer_sync(&tbl->proxy_timer);
+	pneigh_queue_purge(&tbl->proxy_queue);
+	neigh_ifdown(tbl, NULL);
+	if (atomic_read(&tbl->entries))
+		printk(KERN_CRIT "neighbour leakage\n");
+	write_lock(&neigh_tbl_lock);
+	for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
+		if (*tp == tbl) {
+			*tp = tbl->next;
+			break;
+		}
+	}
+	write_unlock(&neigh_tbl_lock);
+
+	neigh_hash_free(tbl->hash_buckets, tbl->hash_mask + 1);
+	tbl->hash_buckets = NULL;
+
+	kfree(tbl->phash_buckets);
+	tbl->phash_buckets = NULL;
+
+	return 0;
+}
+
+int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct ndmsg *ndm = NLMSG_DATA(nlh);
+	struct rtattr **nda = arg;
+	struct neigh_table *tbl;
+	struct net_device *dev = NULL;
+	int err = -ENODEV;
+
+	if (ndm->ndm_ifindex &&
+	    (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
+		goto out;
+
+	read_lock(&neigh_tbl_lock);
+	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
+		struct rtattr *dst_attr = nda[NDA_DST - 1];
+		struct neighbour *n;
+
+		if (tbl->family != ndm->ndm_family)
+			continue;
+		read_unlock(&neigh_tbl_lock);
+
+		err = -EINVAL;
+		if (!dst_attr || RTA_PAYLOAD(dst_attr) < tbl->key_len)
+			goto out_dev_put;
+
+		if (ndm->ndm_flags & NTF_PROXY) {
+			err = pneigh_delete(tbl, RTA_DATA(dst_attr), dev);
+			goto out_dev_put;
+		}
+
+		if (!dev)
+			goto out;
+
+		n = neigh_lookup(tbl, RTA_DATA(dst_attr), dev);
+		if (n) {
+			err = neigh_update(n, NULL, NUD_FAILED, 
+					   NEIGH_UPDATE_F_OVERRIDE|
+					   NEIGH_UPDATE_F_ADMIN);
+			neigh_release(n);
+		}
+		goto out_dev_put;
+	}
+	read_unlock(&neigh_tbl_lock);
+	err = -EADDRNOTAVAIL;
+out_dev_put:
+	if (dev)
+		dev_put(dev);
+out:
+	return err;
+}
+
+int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct ndmsg *ndm = NLMSG_DATA(nlh);
+	struct rtattr **nda = arg;
+	struct neigh_table *tbl;
+	struct net_device *dev = NULL;
+	int err = -ENODEV;
+
+	if (ndm->ndm_ifindex &&
+	    (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
+		goto out;
+
+	read_lock(&neigh_tbl_lock);
+	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
+		struct rtattr *lladdr_attr = nda[NDA_LLADDR - 1];
+		struct rtattr *dst_attr = nda[NDA_DST - 1];
+		int override = 1;
+		struct neighbour *n;
+
+		if (tbl->family != ndm->ndm_family)
+			continue;
+		read_unlock(&neigh_tbl_lock);
+
+		err = -EINVAL;
+		if (!dst_attr || RTA_PAYLOAD(dst_attr) < tbl->key_len)
+			goto out_dev_put;
+
+		if (ndm->ndm_flags & NTF_PROXY) {
+			err = -ENOBUFS;
+			if (pneigh_lookup(tbl, RTA_DATA(dst_attr), dev, 1))
+				err = 0;
+			goto out_dev_put;
+		}
+
+		err = -EINVAL;
+		if (!dev)
+			goto out;
+		if (lladdr_attr && RTA_PAYLOAD(lladdr_attr) < dev->addr_len)
+			goto out_dev_put;
+	
+		n = neigh_lookup(tbl, RTA_DATA(dst_attr), dev);
+		if (n) {
+			if (nlh->nlmsg_flags & NLM_F_EXCL) {
+				err = -EEXIST;
+				neigh_release(n);
+				goto out_dev_put;
+			}
+			
+			override = nlh->nlmsg_flags & NLM_F_REPLACE;
+		} else if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
+			err = -ENOENT;
+			goto out_dev_put;
+		} else {
+			n = __neigh_lookup_errno(tbl, RTA_DATA(dst_attr), dev);
+			if (IS_ERR(n)) {
+				err = PTR_ERR(n);
+				goto out_dev_put;
+			}
+		}
+
+		err = neigh_update(n,
+				   lladdr_attr ? RTA_DATA(lladdr_attr) : NULL,
+				   ndm->ndm_state,
+				   (override ? NEIGH_UPDATE_F_OVERRIDE : 0) |
+				   NEIGH_UPDATE_F_ADMIN);
+
+		neigh_release(n);
+		goto out_dev_put;
+	}
+
+	read_unlock(&neigh_tbl_lock);
+	err = -EADDRNOTAVAIL;
+out_dev_put:
+	if (dev)
+		dev_put(dev);
+out:
+	return err;
+}
+
+
+static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n,
+			   u32 pid, u32 seq, int event)
+{
+	unsigned long now = jiffies;
+	unsigned char *b = skb->tail;
+	struct nda_cacheinfo ci;
+	int locked = 0;
+	u32 probes;
+	struct nlmsghdr *nlh = NLMSG_PUT(skb, pid, seq, event,
+					 sizeof(struct ndmsg));
+	struct ndmsg *ndm = NLMSG_DATA(nlh);
+
+	nlh->nlmsg_flags = pid ? NLM_F_MULTI : 0;
+	ndm->ndm_family	 = n->ops->family;
+	ndm->ndm_flags	 = n->flags;
+	ndm->ndm_type	 = n->type;
+	ndm->ndm_ifindex = n->dev->ifindex;
+	RTA_PUT(skb, NDA_DST, n->tbl->key_len, n->primary_key);
+	read_lock_bh(&n->lock);
+	locked		 = 1;
+	ndm->ndm_state	 = n->nud_state;
+	if (n->nud_state & NUD_VALID)
+		RTA_PUT(skb, NDA_LLADDR, n->dev->addr_len, n->ha);
+	ci.ndm_used	 = now - n->used;
+	ci.ndm_confirmed = now - n->confirmed;
+	ci.ndm_updated	 = now - n->updated;
+	ci.ndm_refcnt	 = atomic_read(&n->refcnt) - 1;
+	probes = atomic_read(&n->probes);
+	read_unlock_bh(&n->lock);
+	locked		 = 0;
+	RTA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
+	RTA_PUT(skb, NDA_PROBES, sizeof(probes), &probes);
+	nlh->nlmsg_len	 = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+	if (locked)
+		read_unlock_bh(&n->lock);
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+
+static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
+			    struct netlink_callback *cb)
+{
+	struct neighbour *n;
+	int rc, h, s_h = cb->args[1];
+	int idx, s_idx = idx = cb->args[2];
+
+	for (h = 0; h <= tbl->hash_mask; h++) {
+		if (h < s_h)
+			continue;
+		if (h > s_h)
+			s_idx = 0;
+		read_lock_bh(&tbl->lock);
+		for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next, idx++) {
+			if (idx < s_idx)
+				continue;
+			if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
+					    cb->nlh->nlmsg_seq,
+					    RTM_NEWNEIGH) <= 0) {
+				read_unlock_bh(&tbl->lock);
+				rc = -1;
+				goto out;
+			}
+		}
+		read_unlock_bh(&tbl->lock);
+	}
+	rc = skb->len;
+out:
+	cb->args[1] = h;
+	cb->args[2] = idx;
+	return rc;
+}
+
+int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct neigh_table *tbl;
+	int t, family, s_t;
+
+	read_lock(&neigh_tbl_lock);
+	family = ((struct rtgenmsg *)NLMSG_DATA(cb->nlh))->rtgen_family;
+	s_t = cb->args[0];
+
+	for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
+		if (t < s_t || (family && tbl->family != family))
+			continue;
+		if (t > s_t)
+			memset(&cb->args[1], 0, sizeof(cb->args) -
+						sizeof(cb->args[0]));
+		if (neigh_dump_table(tbl, skb, cb) < 0)
+			break;
+	}
+	read_unlock(&neigh_tbl_lock);
+
+	cb->args[0] = t;
+	return skb->len;
+}
+
+void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
+{
+	int chain;
+
+	read_lock_bh(&tbl->lock);
+	for (chain = 0; chain <= tbl->hash_mask; chain++) {
+		struct neighbour *n;
+
+		for (n = tbl->hash_buckets[chain]; n; n = n->next)
+			cb(n, cookie);
+	}
+	read_unlock_bh(&tbl->lock);
+}
+EXPORT_SYMBOL(neigh_for_each);
+
+/* The tbl->lock must be held as a writer and BH disabled. */
+void __neigh_for_each_release(struct neigh_table *tbl,
+			      int (*cb)(struct neighbour *))
+{
+	int chain;
+
+	for (chain = 0; chain <= tbl->hash_mask; chain++) {
+		struct neighbour *n, **np;
+
+		np = &tbl->hash_buckets[chain];
+		while ((n = *np) != NULL) {
+			int release;
+
+			write_lock(&n->lock);
+			release = cb(n);
+			if (release) {
+				*np = n->next;
+				n->dead = 1;
+			} else
+				np = &n->next;
+			write_unlock(&n->lock);
+			if (release)
+				neigh_release(n);
+		}
+	}
+}
+EXPORT_SYMBOL(__neigh_for_each_release);
+
+#ifdef CONFIG_PROC_FS
+
+static struct neighbour *neigh_get_first(struct seq_file *seq)
+{
+	struct neigh_seq_state *state = seq->private;
+	struct neigh_table *tbl = state->tbl;
+	struct neighbour *n = NULL;
+	int bucket = state->bucket;
+
+	state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
+	for (bucket = 0; bucket <= tbl->hash_mask; bucket++) {
+		n = tbl->hash_buckets[bucket];
+
+		while (n) {
+			if (state->neigh_sub_iter) {
+				loff_t fakep = 0;
+				void *v;
+
+				v = state->neigh_sub_iter(state, n, &fakep);
+				if (!v)
+					goto next;
+			}
+			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
+				break;
+			if (n->nud_state & ~NUD_NOARP)
+				break;
+		next:
+			n = n->next;
+		}
+
+		if (n)
+			break;
+	}
+	state->bucket = bucket;
+
+	return n;
+}
+
+static struct neighbour *neigh_get_next(struct seq_file *seq,
+					struct neighbour *n,
+					loff_t *pos)
+{
+	struct neigh_seq_state *state = seq->private;
+	struct neigh_table *tbl = state->tbl;
+
+	if (state->neigh_sub_iter) {
+		void *v = state->neigh_sub_iter(state, n, pos);
+		if (v)
+			return n;
+	}
+	n = n->next;
+
+	while (1) {
+		while (n) {
+			if (state->neigh_sub_iter) {
+				void *v = state->neigh_sub_iter(state, n, pos);
+				if (v)
+					return n;
+				goto next;
+			}
+			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
+				break;
+
+			if (n->nud_state & ~NUD_NOARP)
+				break;
+		next:
+			n = n->next;
+		}
+
+		if (n)
+			break;
+
+		if (++state->bucket > tbl->hash_mask)
+			break;
+
+		n = tbl->hash_buckets[state->bucket];
+	}
+
+	if (n && pos)
+		--(*pos);
+	return n;
+}
+
+static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
+{
+	struct neighbour *n = neigh_get_first(seq);
+
+	if (n) {
+		while (*pos) {
+			n = neigh_get_next(seq, n, pos);
+			if (!n)
+				break;
+		}
+	}
+	return *pos ? NULL : n;
+}
+
+static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
+{
+	struct neigh_seq_state *state = seq->private;
+	struct neigh_table *tbl = state->tbl;
+	struct pneigh_entry *pn = NULL;
+	int bucket = state->bucket;
+
+	state->flags |= NEIGH_SEQ_IS_PNEIGH;
+	for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
+		pn = tbl->phash_buckets[bucket];
+		if (pn)
+			break;
+	}
+	state->bucket = bucket;
+
+	return pn;
+}
+
+static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
+					    struct pneigh_entry *pn,
+					    loff_t *pos)
+{
+	struct neigh_seq_state *state = seq->private;
+	struct neigh_table *tbl = state->tbl;
+
+	pn = pn->next;
+	while (!pn) {
+		if (++state->bucket > PNEIGH_HASHMASK)
+			break;
+		pn = tbl->phash_buckets[state->bucket];
+		if (pn)
+			break;
+	}
+
+	if (pn && pos)
+		--(*pos);
+
+	return pn;
+}
+
+static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
+{
+	struct pneigh_entry *pn = pneigh_get_first(seq);
+
+	if (pn) {
+		while (*pos) {
+			pn = pneigh_get_next(seq, pn, pos);
+			if (!pn)
+				break;
+		}
+	}
+	return *pos ? NULL : pn;
+}
+
+static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
+{
+	struct neigh_seq_state *state = seq->private;
+	void *rc;
+
+	rc = neigh_get_idx(seq, pos);
+	if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
+		rc = pneigh_get_idx(seq, pos);
+
+	return rc;
+}
+
+void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
+{
+	struct neigh_seq_state *state = seq->private;
+	loff_t pos_minus_one;
+
+	state->tbl = tbl;
+	state->bucket = 0;
+	state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
+
+	read_lock_bh(&tbl->lock);
+
+	pos_minus_one = *pos - 1;
+	return *pos ? neigh_get_idx_any(seq, &pos_minus_one) : SEQ_START_TOKEN;
+}
+EXPORT_SYMBOL(neigh_seq_start);
+
+void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct neigh_seq_state *state;
+	void *rc;
+
+	if (v == SEQ_START_TOKEN) {
+		rc = neigh_get_idx(seq, pos);
+		goto out;
+	}
+
+	state = seq->private;
+	if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
+		rc = neigh_get_next(seq, v, NULL);
+		if (rc)
+			goto out;
+		if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
+			rc = pneigh_get_first(seq);
+	} else {
+		BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
+		rc = pneigh_get_next(seq, v, NULL);
+	}
+out:
+	++(*pos);
+	return rc;
+}
+EXPORT_SYMBOL(neigh_seq_next);
+
+void neigh_seq_stop(struct seq_file *seq, void *v)
+{
+	struct neigh_seq_state *state = seq->private;
+	struct neigh_table *tbl = state->tbl;
+
+	read_unlock_bh(&tbl->lock);
+}
+EXPORT_SYMBOL(neigh_seq_stop);
+
+/* statistics via seq_file */
+
+static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct proc_dir_entry *pde = seq->private;
+	struct neigh_table *tbl = pde->data;
+	int cpu;
+
+	if (*pos == 0)
+		return SEQ_START_TOKEN;
+	
+	for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) {
+		if (!cpu_possible(cpu))
+			continue;
+		*pos = cpu+1;
+		return per_cpu_ptr(tbl->stats, cpu);
+	}
+	return NULL;
+}
+
+static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct proc_dir_entry *pde = seq->private;
+	struct neigh_table *tbl = pde->data;
+	int cpu;
+
+	for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
+		if (!cpu_possible(cpu))
+			continue;
+		*pos = cpu+1;
+		return per_cpu_ptr(tbl->stats, cpu);
+	}
+	return NULL;
+}
+
+static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
+{
+
+}
+
+static int neigh_stat_seq_show(struct seq_file *seq, void *v)
+{
+	struct proc_dir_entry *pde = seq->private;
+	struct neigh_table *tbl = pde->data;
+	struct neigh_statistics *st = v;
+
+	if (v == SEQ_START_TOKEN) {
+		seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs forced_gc_goal_miss\n");
+		return 0;
+	}
+
+	seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
+			"%08lx %08lx  %08lx %08lx\n",
+		   atomic_read(&tbl->entries),
+
+		   st->allocs,
+		   st->destroys,
+		   st->hash_grows,
+
+		   st->lookups,
+		   st->hits,
+
+		   st->res_failed,
+
+		   st->rcv_probes_mcast,
+		   st->rcv_probes_ucast,
+
+		   st->periodic_gc_runs,
+		   st->forced_gc_runs
+		   );
+
+	return 0;
+}
+
+static struct seq_operations neigh_stat_seq_ops = {
+	.start	= neigh_stat_seq_start,
+	.next	= neigh_stat_seq_next,
+	.stop	= neigh_stat_seq_stop,
+	.show	= neigh_stat_seq_show,
+};
+
+static int neigh_stat_seq_open(struct inode *inode, struct file *file)
+{
+	int ret = seq_open(file, &neigh_stat_seq_ops);
+
+	if (!ret) {
+		struct seq_file *sf = file->private_data;
+		sf->private = PDE(inode);
+	}
+	return ret;
+};
+
+static struct file_operations neigh_stat_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open 	 = neigh_stat_seq_open,
+	.read	 = seq_read,
+	.llseek	 = seq_lseek,
+	.release = seq_release,
+};
+
+#endif /* CONFIG_PROC_FS */
+
+#ifdef CONFIG_ARPD
+void neigh_app_ns(struct neighbour *n)
+{
+	struct nlmsghdr  *nlh;
+	int size = NLMSG_SPACE(sizeof(struct ndmsg) + 256);
+	struct sk_buff *skb = alloc_skb(size, GFP_ATOMIC);
+
+	if (!skb)
+		return;
+
+	if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH) < 0) {
+		kfree_skb(skb);
+		return;
+	}
+	nlh			   = (struct nlmsghdr *)skb->data;
+	nlh->nlmsg_flags	   = NLM_F_REQUEST;
+	NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH;
+	netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC);
+}
+
+static void neigh_app_notify(struct neighbour *n)
+{
+	struct nlmsghdr *nlh;
+	int size = NLMSG_SPACE(sizeof(struct ndmsg) + 256);
+	struct sk_buff *skb = alloc_skb(size, GFP_ATOMIC);
+
+	if (!skb)
+		return;
+
+	if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH) < 0) {
+		kfree_skb(skb);
+		return;
+	}
+	nlh			   = (struct nlmsghdr *)skb->data;
+	NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH;
+	netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC);
+}
+
+#endif /* CONFIG_ARPD */
+
+#ifdef CONFIG_SYSCTL
+
+static struct neigh_sysctl_table {
+	struct ctl_table_header *sysctl_header;
+	ctl_table		neigh_vars[__NET_NEIGH_MAX];
+	ctl_table		neigh_dev[2];
+	ctl_table		neigh_neigh_dir[2];
+	ctl_table		neigh_proto_dir[2];
+	ctl_table		neigh_root_dir[2];
+} neigh_sysctl_template = {
+	.neigh_vars = {
+		{
+			.ctl_name	= NET_NEIGH_MCAST_SOLICIT,
+			.procname	= "mcast_solicit",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec,
+		},
+		{
+			.ctl_name	= NET_NEIGH_UCAST_SOLICIT,
+			.procname	= "ucast_solicit",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec,
+		},
+		{
+			.ctl_name	= NET_NEIGH_APP_SOLICIT,
+			.procname	= "app_solicit",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec,
+		},
+		{
+			.ctl_name	= NET_NEIGH_RETRANS_TIME,
+			.procname	= "retrans_time",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec_userhz_jiffies,
+		},
+		{
+			.ctl_name	= NET_NEIGH_REACHABLE_TIME,
+			.procname	= "base_reachable_time",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec_jiffies,
+			.strategy	= &sysctl_jiffies,
+		},
+		{
+			.ctl_name	= NET_NEIGH_DELAY_PROBE_TIME,
+			.procname	= "delay_first_probe_time",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec_jiffies,
+			.strategy	= &sysctl_jiffies,
+		},
+		{
+			.ctl_name	= NET_NEIGH_GC_STALE_TIME,
+			.procname	= "gc_stale_time",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec_jiffies,
+			.strategy	= &sysctl_jiffies,
+		},
+		{
+			.ctl_name	= NET_NEIGH_UNRES_QLEN,
+			.procname	= "unres_qlen",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec,
+		},
+		{
+			.ctl_name	= NET_NEIGH_PROXY_QLEN,
+			.procname	= "proxy_qlen",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec,
+		},
+		{
+			.ctl_name	= NET_NEIGH_ANYCAST_DELAY,
+			.procname	= "anycast_delay",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec_userhz_jiffies,
+		},
+		{
+			.ctl_name	= NET_NEIGH_PROXY_DELAY,
+			.procname	= "proxy_delay",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec_userhz_jiffies,
+		},
+		{
+			.ctl_name	= NET_NEIGH_LOCKTIME,
+			.procname	= "locktime",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec_userhz_jiffies,
+		},
+		{
+			.ctl_name	= NET_NEIGH_GC_INTERVAL,
+			.procname	= "gc_interval",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec_jiffies,
+			.strategy	= &sysctl_jiffies,
+		},
+		{
+			.ctl_name	= NET_NEIGH_GC_THRESH1,
+			.procname	= "gc_thresh1",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec,
+		},
+		{
+			.ctl_name	= NET_NEIGH_GC_THRESH2,
+			.procname	= "gc_thresh2",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec,
+		},
+		{
+			.ctl_name	= NET_NEIGH_GC_THRESH3,
+			.procname	= "gc_thresh3",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec,
+		},
+		{
+			.ctl_name	= NET_NEIGH_RETRANS_TIME_MS,
+			.procname	= "retrans_time_ms",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec_ms_jiffies,
+			.strategy	= &sysctl_ms_jiffies,
+		},
+		{
+			.ctl_name	= NET_NEIGH_REACHABLE_TIME_MS,
+			.procname	= "base_reachable_time_ms",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec_ms_jiffies,
+			.strategy	= &sysctl_ms_jiffies,
+		},
+	},
+	.neigh_dev = {
+		{
+			.ctl_name	= NET_PROTO_CONF_DEFAULT,
+			.procname	= "default",
+			.mode		= 0555,
+		},
+	},
+	.neigh_neigh_dir = {
+		{
+			.procname	= "neigh",
+			.mode		= 0555,
+		},
+	},
+	.neigh_proto_dir = {
+		{
+			.mode		= 0555,
+		},
+	},
+	.neigh_root_dir = {
+		{
+			.ctl_name	= CTL_NET,
+			.procname	= "net",
+			.mode		= 0555,
+		},
+	},
+};
+
+int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
+			  int p_id, int pdev_id, char *p_name, 
+			  proc_handler *handler, ctl_handler *strategy)
+{
+	struct neigh_sysctl_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
+	const char *dev_name_source = NULL;
+	char *dev_name = NULL;
+	int err = 0;
+
+	if (!t)
+		return -ENOBUFS;
+	memcpy(t, &neigh_sysctl_template, sizeof(*t));
+	t->neigh_vars[0].data  = &p->mcast_probes;
+	t->neigh_vars[1].data  = &p->ucast_probes;
+	t->neigh_vars[2].data  = &p->app_probes;
+	t->neigh_vars[3].data  = &p->retrans_time;
+	t->neigh_vars[4].data  = &p->base_reachable_time;
+	t->neigh_vars[5].data  = &p->delay_probe_time;
+	t->neigh_vars[6].data  = &p->gc_staletime;
+	t->neigh_vars[7].data  = &p->queue_len;
+	t->neigh_vars[8].data  = &p->proxy_qlen;
+	t->neigh_vars[9].data  = &p->anycast_delay;
+	t->neigh_vars[10].data = &p->proxy_delay;
+	t->neigh_vars[11].data = &p->locktime;
+
+	if (dev) {
+		dev_name_source = dev->name;
+		t->neigh_dev[0].ctl_name = dev->ifindex;
+		t->neigh_vars[12].procname = NULL;
+		t->neigh_vars[13].procname = NULL;
+		t->neigh_vars[14].procname = NULL;
+		t->neigh_vars[15].procname = NULL;
+	} else {
+ 		dev_name_source = t->neigh_dev[0].procname;
+		t->neigh_vars[12].data = (int *)(p + 1);
+		t->neigh_vars[13].data = (int *)(p + 1) + 1;
+		t->neigh_vars[14].data = (int *)(p + 1) + 2;
+		t->neigh_vars[15].data = (int *)(p + 1) + 3;
+	}
+
+	t->neigh_vars[16].data  = &p->retrans_time;
+	t->neigh_vars[17].data  = &p->base_reachable_time;
+
+	if (handler || strategy) {
+		/* RetransTime */
+		t->neigh_vars[3].proc_handler = handler;
+		t->neigh_vars[3].strategy = strategy;
+		t->neigh_vars[3].extra1 = dev;
+		/* ReachableTime */
+		t->neigh_vars[4].proc_handler = handler;
+		t->neigh_vars[4].strategy = strategy;
+		t->neigh_vars[4].extra1 = dev;
+		/* RetransTime (in milliseconds)*/
+		t->neigh_vars[16].proc_handler = handler;
+		t->neigh_vars[16].strategy = strategy;
+		t->neigh_vars[16].extra1 = dev;
+		/* ReachableTime (in milliseconds) */
+		t->neigh_vars[17].proc_handler = handler;
+		t->neigh_vars[17].strategy = strategy;
+		t->neigh_vars[17].extra1 = dev;
+	}
+
+	dev_name = net_sysctl_strdup(dev_name_source);
+	if (!dev_name) {
+		err = -ENOBUFS;
+		goto free;
+	}
+
+ 	t->neigh_dev[0].procname = dev_name;
+
+	t->neigh_neigh_dir[0].ctl_name = pdev_id;
+
+	t->neigh_proto_dir[0].procname = p_name;
+	t->neigh_proto_dir[0].ctl_name = p_id;
+
+	t->neigh_dev[0].child	       = t->neigh_vars;
+	t->neigh_neigh_dir[0].child    = t->neigh_dev;
+	t->neigh_proto_dir[0].child    = t->neigh_neigh_dir;
+	t->neigh_root_dir[0].child     = t->neigh_proto_dir;
+
+	t->sysctl_header = register_sysctl_table(t->neigh_root_dir, 0);
+	if (!t->sysctl_header) {
+		err = -ENOBUFS;
+		goto free_procname;
+	}
+	p->sysctl_table = t;
+	return 0;
+
+	/* error path */
+ free_procname:
+	kfree(dev_name);
+ free:
+	kfree(t);
+
+	return err;
+}
+
+void neigh_sysctl_unregister(struct neigh_parms *p)
+{
+	if (p->sysctl_table) {
+		struct neigh_sysctl_table *t = p->sysctl_table;
+		p->sysctl_table = NULL;
+		unregister_sysctl_table(t->sysctl_header);
+		kfree(t->neigh_dev[0].procname);
+		kfree(t);
+	}
+}
+
+#endif	/* CONFIG_SYSCTL */
+
+EXPORT_SYMBOL(__neigh_event_send);
+EXPORT_SYMBOL(neigh_add);
+EXPORT_SYMBOL(neigh_changeaddr);
+EXPORT_SYMBOL(neigh_compat_output);
+EXPORT_SYMBOL(neigh_connected_output);
+EXPORT_SYMBOL(neigh_create);
+EXPORT_SYMBOL(neigh_delete);
+EXPORT_SYMBOL(neigh_destroy);
+EXPORT_SYMBOL(neigh_dump_info);
+EXPORT_SYMBOL(neigh_event_ns);
+EXPORT_SYMBOL(neigh_ifdown);
+EXPORT_SYMBOL(neigh_lookup);
+EXPORT_SYMBOL(neigh_lookup_nodev);
+EXPORT_SYMBOL(neigh_parms_alloc);
+EXPORT_SYMBOL(neigh_parms_release);
+EXPORT_SYMBOL(neigh_rand_reach_time);
+EXPORT_SYMBOL(neigh_resolve_output);
+EXPORT_SYMBOL(neigh_table_clear);
+EXPORT_SYMBOL(neigh_table_init);
+EXPORT_SYMBOL(neigh_update);
+EXPORT_SYMBOL(neigh_update_hhs);
+EXPORT_SYMBOL(pneigh_enqueue);
+EXPORT_SYMBOL(pneigh_lookup);
+
+#ifdef CONFIG_ARPD
+EXPORT_SYMBOL(neigh_app_ns);
+#endif
+#ifdef CONFIG_SYSCTL
+EXPORT_SYMBOL(neigh_sysctl_register);
+EXPORT_SYMBOL(neigh_sysctl_unregister);
+#endif
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
new file mode 100644
index 00000000000..060f703659e
--- /dev/null
+++ b/net/core/net-sysfs.c
@@ -0,0 +1,461 @@
+/*
+ * net-sysfs.c - network device class and attributes
+ *
+ * Copyright (c) 2003 Stephen Hemminger <shemminger@osdl.org>
+ * 
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <net/sock.h>
+#include <linux/rtnetlink.h>
+#include <linux/wireless.h>
+
+#define to_class_dev(obj) container_of(obj,struct class_device,kobj)
+#define to_net_dev(class) container_of(class, struct net_device, class_dev)
+
+static const char fmt_hex[] = "%#x\n";
+static const char fmt_dec[] = "%d\n";
+static const char fmt_ulong[] = "%lu\n";
+
+static inline int dev_isalive(const struct net_device *dev) 
+{
+	return dev->reg_state == NETREG_REGISTERED;
+}
+
+/* use same locking rules as GIF* ioctl's */
+static ssize_t netdev_show(const struct class_device *cd, char *buf,
+			   ssize_t (*format)(const struct net_device *, char *))
+{
+	struct net_device *net = to_net_dev(cd);
+	ssize_t ret = -EINVAL;
+
+	read_lock(&dev_base_lock);
+	if (dev_isalive(net))
+		ret = (*format)(net, buf);
+	read_unlock(&dev_base_lock);
+
+	return ret;
+}
+
+/* generate a show function for simple field */
+#define NETDEVICE_SHOW(field, format_string)				\
+static ssize_t format_##field(const struct net_device *net, char *buf)	\
+{									\
+	return sprintf(buf, format_string, net->field);			\
+}									\
+static ssize_t show_##field(struct class_device *cd, char *buf)		\
+{									\
+	return netdev_show(cd, buf, format_##field);			\
+}
+
+
+/* use same locking and permission rules as SIF* ioctl's */
+static ssize_t netdev_store(struct class_device *dev,
+			    const char *buf, size_t len,
+			    int (*set)(struct net_device *, unsigned long))
+{
+	struct net_device *net = to_net_dev(dev);
+	char *endp;
+	unsigned long new;
+	int ret = -EINVAL;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	new = simple_strtoul(buf, &endp, 0);
+	if (endp == buf)
+		goto err;
+
+	rtnl_lock();
+	if (dev_isalive(net)) {
+		if ((ret = (*set)(net, new)) == 0)
+			ret = len;
+	}
+	rtnl_unlock();
+ err:
+	return ret;
+}
+
+/* generate a read-only network device class attribute */
+#define NETDEVICE_ATTR(field, format_string)				\
+NETDEVICE_SHOW(field, format_string)					\
+static CLASS_DEVICE_ATTR(field, S_IRUGO, show_##field, NULL)		\
+
+NETDEVICE_ATTR(addr_len, fmt_dec);
+NETDEVICE_ATTR(iflink, fmt_dec);
+NETDEVICE_ATTR(ifindex, fmt_dec);
+NETDEVICE_ATTR(features, fmt_hex);
+NETDEVICE_ATTR(type, fmt_dec);
+
+/* use same locking rules as GIFHWADDR ioctl's */
+static ssize_t format_addr(char *buf, const unsigned char *addr, int len)
+{
+	int i;
+	char *cp = buf;
+
+	for (i = 0; i < len; i++)
+		cp += sprintf(cp, "%02x%c", addr[i],
+			      i == (len - 1) ? '\n' : ':');
+	return cp - buf;
+}
+
+static ssize_t show_address(struct class_device *dev, char *buf)
+{
+	struct net_device *net = to_net_dev(dev);
+	ssize_t ret = -EINVAL;
+
+	read_lock(&dev_base_lock);
+	if (dev_isalive(net))
+	    ret = format_addr(buf, net->dev_addr, net->addr_len);
+	read_unlock(&dev_base_lock);
+	return ret;
+}
+
+static ssize_t show_broadcast(struct class_device *dev, char *buf)
+{
+	struct net_device *net = to_net_dev(dev);
+	if (dev_isalive(net))
+		return format_addr(buf, net->broadcast, net->addr_len);
+	return -EINVAL;
+}
+
+static ssize_t show_carrier(struct class_device *dev, char *buf)
+{
+	struct net_device *netdev = to_net_dev(dev);
+	if (netif_running(netdev)) {
+		return sprintf(buf, fmt_dec, !!netif_carrier_ok(netdev));
+	}
+	return -EINVAL;
+}
+
+static CLASS_DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
+static CLASS_DEVICE_ATTR(broadcast, S_IRUGO, show_broadcast, NULL);
+static CLASS_DEVICE_ATTR(carrier, S_IRUGO, show_carrier, NULL);
+
+/* read-write attributes */
+NETDEVICE_SHOW(mtu, fmt_dec);
+
+static int change_mtu(struct net_device *net, unsigned long new_mtu)
+{
+	return dev_set_mtu(net, (int) new_mtu);
+}
+
+static ssize_t store_mtu(struct class_device *dev, const char *buf, size_t len)
+{
+	return netdev_store(dev, buf, len, change_mtu);
+}
+
+static CLASS_DEVICE_ATTR(mtu, S_IRUGO | S_IWUSR, show_mtu, store_mtu);
+
+NETDEVICE_SHOW(flags, fmt_hex);
+
+static int change_flags(struct net_device *net, unsigned long new_flags)
+{
+	return dev_change_flags(net, (unsigned) new_flags);
+}
+
+static ssize_t store_flags(struct class_device *dev, const char *buf, size_t len)
+{
+	return netdev_store(dev, buf, len, change_flags);
+}
+
+static CLASS_DEVICE_ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags);
+
+NETDEVICE_SHOW(tx_queue_len, fmt_ulong);
+
+static int change_tx_queue_len(struct net_device *net, unsigned long new_len)
+{
+	net->tx_queue_len = new_len;
+	return 0;
+}
+
+static ssize_t store_tx_queue_len(struct class_device *dev, const char *buf, size_t len)
+{
+	return netdev_store(dev, buf, len, change_tx_queue_len);
+}
+
+static CLASS_DEVICE_ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, 
+			 store_tx_queue_len);
+
+
+static struct class_device_attribute *net_class_attributes[] = {
+	&class_device_attr_ifindex,
+	&class_device_attr_iflink,
+	&class_device_attr_addr_len,
+	&class_device_attr_tx_queue_len,
+	&class_device_attr_features,
+	&class_device_attr_mtu,
+	&class_device_attr_flags,
+	&class_device_attr_type,
+	&class_device_attr_address,
+	&class_device_attr_broadcast,
+	&class_device_attr_carrier,
+	NULL
+};
+
+/* Show a given an attribute in the statistics group */
+static ssize_t netstat_show(const struct class_device *cd, char *buf, 
+			    unsigned long offset)
+{
+	struct net_device *dev = to_net_dev(cd);
+	struct net_device_stats *stats;
+	ssize_t ret = -EINVAL;
+
+	if (offset > sizeof(struct net_device_stats) ||
+	    offset % sizeof(unsigned long) != 0)
+		WARN_ON(1);
+
+	read_lock(&dev_base_lock);
+	if (dev_isalive(dev) && dev->get_stats &&
+	    (stats = (*dev->get_stats)(dev))) 
+		ret = sprintf(buf, fmt_ulong,
+			      *(unsigned long *)(((u8 *) stats) + offset));
+
+	read_unlock(&dev_base_lock);
+	return ret;
+}
+
+/* generate a read-only statistics attribute */
+#define NETSTAT_ENTRY(name)						\
+static ssize_t show_##name(struct class_device *cd, char *buf) 		\
+{									\
+	return netstat_show(cd, buf, 					\
+			    offsetof(struct net_device_stats, name));	\
+}									\
+static CLASS_DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
+
+NETSTAT_ENTRY(rx_packets);
+NETSTAT_ENTRY(tx_packets);
+NETSTAT_ENTRY(rx_bytes);
+NETSTAT_ENTRY(tx_bytes);
+NETSTAT_ENTRY(rx_errors);
+NETSTAT_ENTRY(tx_errors);
+NETSTAT_ENTRY(rx_dropped);
+NETSTAT_ENTRY(tx_dropped);
+NETSTAT_ENTRY(multicast);
+NETSTAT_ENTRY(collisions);
+NETSTAT_ENTRY(rx_length_errors);
+NETSTAT_ENTRY(rx_over_errors);
+NETSTAT_ENTRY(rx_crc_errors);
+NETSTAT_ENTRY(rx_frame_errors);
+NETSTAT_ENTRY(rx_fifo_errors);
+NETSTAT_ENTRY(rx_missed_errors);
+NETSTAT_ENTRY(tx_aborted_errors);
+NETSTAT_ENTRY(tx_carrier_errors);
+NETSTAT_ENTRY(tx_fifo_errors);
+NETSTAT_ENTRY(tx_heartbeat_errors);
+NETSTAT_ENTRY(tx_window_errors);
+NETSTAT_ENTRY(rx_compressed);
+NETSTAT_ENTRY(tx_compressed);
+
+static struct attribute *netstat_attrs[] = {
+	&class_device_attr_rx_packets.attr,
+	&class_device_attr_tx_packets.attr,
+	&class_device_attr_rx_bytes.attr,
+	&class_device_attr_tx_bytes.attr,
+	&class_device_attr_rx_errors.attr,
+	&class_device_attr_tx_errors.attr,
+	&class_device_attr_rx_dropped.attr,
+	&class_device_attr_tx_dropped.attr,
+	&class_device_attr_multicast.attr,
+	&class_device_attr_collisions.attr,
+	&class_device_attr_rx_length_errors.attr,
+	&class_device_attr_rx_over_errors.attr,
+	&class_device_attr_rx_crc_errors.attr,
+	&class_device_attr_rx_frame_errors.attr,
+	&class_device_attr_rx_fifo_errors.attr,
+	&class_device_attr_rx_missed_errors.attr,
+	&class_device_attr_tx_aborted_errors.attr,
+	&class_device_attr_tx_carrier_errors.attr,
+	&class_device_attr_tx_fifo_errors.attr,
+	&class_device_attr_tx_heartbeat_errors.attr,
+	&class_device_attr_tx_window_errors.attr,
+	&class_device_attr_rx_compressed.attr,
+	&class_device_attr_tx_compressed.attr,
+	NULL
+};
+
+
+static struct attribute_group netstat_group = {
+	.name  = "statistics",
+	.attrs  = netstat_attrs,
+};
+
+#ifdef WIRELESS_EXT
+/* helper function that does all the locking etc for wireless stats */
+static ssize_t wireless_show(struct class_device *cd, char *buf,
+			     ssize_t (*format)(const struct iw_statistics *,
+					       char *))
+{
+	struct net_device *dev = to_net_dev(cd);
+	const struct iw_statistics *iw;
+	ssize_t ret = -EINVAL;
+	
+	read_lock(&dev_base_lock);
+	if (dev_isalive(dev) && dev->get_wireless_stats 
+	    && (iw = dev->get_wireless_stats(dev)) != NULL) 
+		ret = (*format)(iw, buf);
+	read_unlock(&dev_base_lock);
+
+	return ret;
+}
+
+/* show function template for wireless fields */
+#define WIRELESS_SHOW(name, field, format_string)			\
+static ssize_t format_iw_##name(const struct iw_statistics *iw, char *buf) \
+{									\
+	return sprintf(buf, format_string, iw->field);			\
+}									\
+static ssize_t show_iw_##name(struct class_device *cd, char *buf)	\
+{									\
+	return wireless_show(cd, buf, format_iw_##name);		\
+}									\
+static CLASS_DEVICE_ATTR(name, S_IRUGO, show_iw_##name, NULL)
+
+WIRELESS_SHOW(status, status, fmt_hex);
+WIRELESS_SHOW(link, qual.qual, fmt_dec);
+WIRELESS_SHOW(level, qual.level, fmt_dec);
+WIRELESS_SHOW(noise, qual.noise, fmt_dec);
+WIRELESS_SHOW(nwid, discard.nwid, fmt_dec);
+WIRELESS_SHOW(crypt, discard.code, fmt_dec);
+WIRELESS_SHOW(fragment, discard.fragment, fmt_dec);
+WIRELESS_SHOW(misc, discard.misc, fmt_dec);
+WIRELESS_SHOW(retries, discard.retries, fmt_dec);
+WIRELESS_SHOW(beacon, miss.beacon, fmt_dec);
+
+static struct attribute *wireless_attrs[] = {
+	&class_device_attr_status.attr,
+	&class_device_attr_link.attr,
+	&class_device_attr_level.attr,
+	&class_device_attr_noise.attr,
+	&class_device_attr_nwid.attr,
+	&class_device_attr_crypt.attr,
+	&class_device_attr_fragment.attr,
+	&class_device_attr_retries.attr,
+	&class_device_attr_misc.attr,
+	&class_device_attr_beacon.attr,
+	NULL
+};
+
+static struct attribute_group wireless_group = {
+	.name = "wireless",
+	.attrs = wireless_attrs,
+};
+#endif
+
+#ifdef CONFIG_HOTPLUG
+static int netdev_hotplug(struct class_device *cd, char **envp,
+			  int num_envp, char *buf, int size)
+{
+	struct net_device *dev = to_net_dev(cd);
+	int i = 0;
+	int n;
+
+	/* pass interface in env to hotplug. */
+	envp[i++] = buf;
+	n = snprintf(buf, size, "INTERFACE=%s", dev->name) + 1;
+	buf += n;
+	size -= n;
+
+	if ((size <= 0) || (i >= num_envp))
+		return -ENOMEM;
+
+	envp[i] = NULL;
+	return 0;
+}
+#endif
+
+/*
+ *	netdev_release -- destroy and free a dead device. 
+ *	Called when last reference to class_device kobject is gone.
+ */
+static void netdev_release(struct class_device *cd)
+{
+	struct net_device *dev 
+		= container_of(cd, struct net_device, class_dev);
+
+	BUG_ON(dev->reg_state != NETREG_RELEASED);
+
+	kfree((char *)dev - dev->padded);
+}
+
+static struct class net_class = {
+	.name = "net",
+	.release = netdev_release,
+#ifdef CONFIG_HOTPLUG
+	.hotplug = netdev_hotplug,
+#endif
+};
+
+void netdev_unregister_sysfs(struct net_device * net)
+{
+	struct class_device * class_dev = &(net->class_dev);
+
+	if (net->get_stats)
+		sysfs_remove_group(&class_dev->kobj, &netstat_group);
+
+#ifdef WIRELESS_EXT
+	if (net->get_wireless_stats)
+		sysfs_remove_group(&class_dev->kobj, &wireless_group);
+#endif
+	class_device_del(class_dev);
+
+}
+
+/* Create sysfs entries for network device. */
+int netdev_register_sysfs(struct net_device *net)
+{
+	struct class_device *class_dev = &(net->class_dev);
+	int i;
+	struct class_device_attribute *attr;
+	int ret;
+
+	class_dev->class = &net_class;
+	class_dev->class_data = net;
+
+	strlcpy(class_dev->class_id, net->name, BUS_ID_SIZE);
+	if ((ret = class_device_register(class_dev)))
+		goto out;
+
+	for (i = 0; (attr = net_class_attributes[i]) != NULL; i++) {
+		if ((ret = class_device_create_file(class_dev, attr)))
+		    goto out_unreg;
+	}
+
+
+	if (net->get_stats &&
+	    (ret = sysfs_create_group(&class_dev->kobj, &netstat_group)))
+		goto out_unreg; 
+
+#ifdef WIRELESS_EXT
+	if (net->get_wireless_stats &&
+	    (ret = sysfs_create_group(&class_dev->kobj, &wireless_group)))
+		goto out_cleanup; 
+
+	return 0;
+out_cleanup:
+	if (net->get_stats)
+		sysfs_remove_group(&class_dev->kobj, &netstat_group);
+#else
+	return 0;
+#endif
+
+out_unreg:
+	printk(KERN_WARNING "%s: sysfs attribute registration failed %d\n",
+	       net->name, ret);
+	class_device_unregister(class_dev);
+out:
+	return ret;
+}
+
+int netdev_sysfs_init(void)
+{
+	return class_register(&net_class);
+}
diff --git a/net/core/netfilter.c b/net/core/netfilter.c
new file mode 100644
index 00000000000..e51cfa46950
--- /dev/null
+++ b/net/core/netfilter.c
@@ -0,0 +1,799 @@
+/* netfilter.c: look after the filters for various protocols. 
+ * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
+ *
+ * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
+ * way.
+ *
+ * Rusty Russell (C)2000 -- This code is GPL.
+ *
+ * February 2000: Modified by James Morris to have 1 queue per protocol.
+ * 15-Mar-2000:   Added NF_REPEAT --RR.
+ * 08-May-2003:	  Internal logging interface added by Jozsef Kadlecsik.
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/netfilter.h>
+#include <net/protocol.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/wait.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/if.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/icmp.h>
+#include <net/sock.h>
+#include <net/route.h>
+#include <linux/ip.h>
+
+/* In this code, we can be waiting indefinitely for userspace to
+ * service a packet if a hook returns NF_QUEUE.  We could keep a count
+ * of skbuffs queued for userspace, and not deregister a hook unless
+ * this is zero, but that sucks.  Now, we simply check when the
+ * packets come back: if the hook is gone, the packet is discarded. */
+#ifdef CONFIG_NETFILTER_DEBUG
+#define NFDEBUG(format, args...)  printk(format , ## args)
+#else
+#define NFDEBUG(format, args...)
+#endif
+
+/* Sockopts only registered and called from user context, so
+   net locking would be overkill.  Also, [gs]etsockopt calls may
+   sleep. */
+static DECLARE_MUTEX(nf_sockopt_mutex);
+
+struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
+static LIST_HEAD(nf_sockopts);
+static DEFINE_SPINLOCK(nf_hook_lock);
+
+/* 
+ * A queue handler may be registered for each protocol.  Each is protected by
+ * long term mutex.  The handler must provide an an outfn() to accept packets
+ * for queueing and must reinject all packets it receives, no matter what.
+ */
+static struct nf_queue_handler_t {
+	nf_queue_outfn_t outfn;
+	void *data;
+} queue_handler[NPROTO];
+static DEFINE_RWLOCK(queue_handler_lock);
+
+int nf_register_hook(struct nf_hook_ops *reg)
+{
+	struct list_head *i;
+
+	spin_lock_bh(&nf_hook_lock);
+	list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) {
+		if (reg->priority < ((struct nf_hook_ops *)i)->priority)
+			break;
+	}
+	list_add_rcu(&reg->list, i->prev);
+	spin_unlock_bh(&nf_hook_lock);
+
+	synchronize_net();
+	return 0;
+}
+
+void nf_unregister_hook(struct nf_hook_ops *reg)
+{
+	spin_lock_bh(&nf_hook_lock);
+	list_del_rcu(&reg->list);
+	spin_unlock_bh(&nf_hook_lock);
+
+	synchronize_net();
+}
+
+/* Do exclusive ranges overlap? */
+static inline int overlap(int min1, int max1, int min2, int max2)
+{
+	return max1 > min2 && min1 < max2;
+}
+
+/* Functions to register sockopt ranges (exclusive). */
+int nf_register_sockopt(struct nf_sockopt_ops *reg)
+{
+	struct list_head *i;
+	int ret = 0;
+
+	if (down_interruptible(&nf_sockopt_mutex) != 0)
+		return -EINTR;
+
+	list_for_each(i, &nf_sockopts) {
+		struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i;
+		if (ops->pf == reg->pf
+		    && (overlap(ops->set_optmin, ops->set_optmax, 
+				reg->set_optmin, reg->set_optmax)
+			|| overlap(ops->get_optmin, ops->get_optmax, 
+				   reg->get_optmin, reg->get_optmax))) {
+			NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
+				ops->set_optmin, ops->set_optmax, 
+				ops->get_optmin, ops->get_optmax, 
+				reg->set_optmin, reg->set_optmax,
+				reg->get_optmin, reg->get_optmax);
+			ret = -EBUSY;
+			goto out;
+		}
+	}
+
+	list_add(&reg->list, &nf_sockopts);
+out:
+	up(&nf_sockopt_mutex);
+	return ret;
+}
+
+void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
+{
+	/* No point being interruptible: we're probably in cleanup_module() */
+ restart:
+	down(&nf_sockopt_mutex);
+	if (reg->use != 0) {
+		/* To be woken by nf_sockopt call... */
+		/* FIXME: Stuart Young's name appears gratuitously. */
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		reg->cleanup_task = current;
+		up(&nf_sockopt_mutex);
+		schedule();
+		goto restart;
+	}
+	list_del(&reg->list);
+	up(&nf_sockopt_mutex);
+}
+
+#ifdef CONFIG_NETFILTER_DEBUG
+#include <net/ip.h>
+#include <net/tcp.h>
+#include <linux/netfilter_ipv4.h>
+
+static void debug_print_hooks_ip(unsigned int nf_debug)
+{
+	if (nf_debug & (1 << NF_IP_PRE_ROUTING)) {
+		printk("PRE_ROUTING ");
+		nf_debug ^= (1 << NF_IP_PRE_ROUTING);
+	}
+	if (nf_debug & (1 << NF_IP_LOCAL_IN)) {
+		printk("LOCAL_IN ");
+		nf_debug ^= (1 << NF_IP_LOCAL_IN);
+	}
+	if (nf_debug & (1 << NF_IP_FORWARD)) {
+		printk("FORWARD ");
+		nf_debug ^= (1 << NF_IP_FORWARD);
+	}
+	if (nf_debug & (1 << NF_IP_LOCAL_OUT)) {
+		printk("LOCAL_OUT ");
+		nf_debug ^= (1 << NF_IP_LOCAL_OUT);
+	}
+	if (nf_debug & (1 << NF_IP_POST_ROUTING)) {
+		printk("POST_ROUTING ");
+		nf_debug ^= (1 << NF_IP_POST_ROUTING);
+	}
+	if (nf_debug)
+		printk("Crap bits: 0x%04X", nf_debug);
+	printk("\n");
+}
+
+static void nf_dump_skb(int pf, struct sk_buff *skb)
+{
+	printk("skb: pf=%i %s dev=%s len=%u\n", 
+	       pf,
+	       skb->sk ? "(owned)" : "(unowned)",
+	       skb->dev ? skb->dev->name : "(no dev)",
+	       skb->len);
+	switch (pf) {
+	case PF_INET: {
+		const struct iphdr *ip = skb->nh.iph;
+		__u32 *opt = (__u32 *) (ip + 1);
+		int opti;
+		__u16 src_port = 0, dst_port = 0;
+
+		if (ip->protocol == IPPROTO_TCP
+		    || ip->protocol == IPPROTO_UDP) {
+			struct tcphdr *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl);
+			src_port = ntohs(tcp->source);
+			dst_port = ntohs(tcp->dest);
+		}
+	
+		printk("PROTO=%d %u.%u.%u.%u:%hu %u.%u.%u.%u:%hu"
+		       " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu",
+		       ip->protocol, NIPQUAD(ip->saddr),
+		       src_port, NIPQUAD(ip->daddr),
+		       dst_port,
+		       ntohs(ip->tot_len), ip->tos, ntohs(ip->id),
+		       ntohs(ip->frag_off), ip->ttl);
+
+		for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++)
+			printk(" O=0x%8.8X", *opt++);
+		printk("\n");
+	}
+	}
+}
+
+void nf_debug_ip_local_deliver(struct sk_buff *skb)
+{
+	/* If it's a loopback packet, it must have come through
+	 * NF_IP_LOCAL_OUT, NF_IP_RAW_INPUT, NF_IP_PRE_ROUTING and
+	 * NF_IP_LOCAL_IN.  Otherwise, must have gone through
+	 * NF_IP_RAW_INPUT and NF_IP_PRE_ROUTING.  */
+	if (!skb->dev) {
+		printk("ip_local_deliver: skb->dev is NULL.\n");
+	}
+	else if (strcmp(skb->dev->name, "lo") == 0) {
+		if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
+				      | (1 << NF_IP_POST_ROUTING)
+				      | (1 << NF_IP_PRE_ROUTING)
+				      | (1 << NF_IP_LOCAL_IN))) {
+			printk("ip_local_deliver: bad loopback skb: ");
+			debug_print_hooks_ip(skb->nf_debug);
+			nf_dump_skb(PF_INET, skb);
+		}
+	}
+	else {
+		if (skb->nf_debug != ((1<<NF_IP_PRE_ROUTING)
+				      | (1<<NF_IP_LOCAL_IN))) {
+			printk("ip_local_deliver: bad non-lo skb: ");
+			debug_print_hooks_ip(skb->nf_debug);
+			nf_dump_skb(PF_INET, skb);
+		}
+	}
+}
+
+void nf_debug_ip_loopback_xmit(struct sk_buff *newskb)
+{
+	if (newskb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
+				 | (1 << NF_IP_POST_ROUTING))) {
+		printk("ip_dev_loopback_xmit: bad owned skb = %p: ", 
+		       newskb);
+		debug_print_hooks_ip(newskb->nf_debug);
+		nf_dump_skb(PF_INET, newskb);
+	}
+	/* Clear to avoid confusing input check */
+	newskb->nf_debug = 0;
+}
+
+void nf_debug_ip_finish_output2(struct sk_buff *skb)
+{
+	/* If it's owned, it must have gone through the
+	 * NF_IP_LOCAL_OUT and NF_IP_POST_ROUTING.
+	 * Otherwise, must have gone through
+	 * NF_IP_PRE_ROUTING, NF_IP_FORWARD and NF_IP_POST_ROUTING.
+	 */
+	if (skb->sk) {
+		if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
+				      | (1 << NF_IP_POST_ROUTING))) {
+			printk("ip_finish_output: bad owned skb = %p: ", skb);
+			debug_print_hooks_ip(skb->nf_debug);
+			nf_dump_skb(PF_INET, skb);
+		}
+	} else {
+		if (skb->nf_debug != ((1 << NF_IP_PRE_ROUTING)
+				      | (1 << NF_IP_FORWARD)
+				      | (1 << NF_IP_POST_ROUTING))) {
+			/* Fragments, entunnelled packets, TCP RSTs
+                           generated by ipt_REJECT will have no
+                           owners, but still may be local */
+			if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
+					      | (1 << NF_IP_POST_ROUTING))){
+				printk("ip_finish_output:"
+				       " bad unowned skb = %p: ",skb);
+				debug_print_hooks_ip(skb->nf_debug);
+				nf_dump_skb(PF_INET, skb);
+			}
+		}
+	}
+}
+#endif /*CONFIG_NETFILTER_DEBUG*/
+
+/* Call get/setsockopt() */
+static int nf_sockopt(struct sock *sk, int pf, int val, 
+		      char __user *opt, int *len, int get)
+{
+	struct list_head *i;
+	struct nf_sockopt_ops *ops;
+	int ret;
+
+	if (down_interruptible(&nf_sockopt_mutex) != 0)
+		return -EINTR;
+
+	list_for_each(i, &nf_sockopts) {
+		ops = (struct nf_sockopt_ops *)i;
+		if (ops->pf == pf) {
+			if (get) {
+				if (val >= ops->get_optmin
+				    && val < ops->get_optmax) {
+					ops->use++;
+					up(&nf_sockopt_mutex);
+					ret = ops->get(sk, val, opt, len);
+					goto out;
+				}
+			} else {
+				if (val >= ops->set_optmin
+				    && val < ops->set_optmax) {
+					ops->use++;
+					up(&nf_sockopt_mutex);
+					ret = ops->set(sk, val, opt, *len);
+					goto out;
+				}
+			}
+		}
+	}
+	up(&nf_sockopt_mutex);
+	return -ENOPROTOOPT;
+	
+ out:
+	down(&nf_sockopt_mutex);
+	ops->use--;
+	if (ops->cleanup_task)
+		wake_up_process(ops->cleanup_task);
+	up(&nf_sockopt_mutex);
+	return ret;
+}
+
+int nf_setsockopt(struct sock *sk, int pf, int val, char __user *opt,
+		  int len)
+{
+	return nf_sockopt(sk, pf, val, opt, &len, 0);
+}
+
+int nf_getsockopt(struct sock *sk, int pf, int val, char __user *opt, int *len)
+{
+	return nf_sockopt(sk, pf, val, opt, len, 1);
+}
+
+static unsigned int nf_iterate(struct list_head *head,
+			       struct sk_buff **skb,
+			       int hook,
+			       const struct net_device *indev,
+			       const struct net_device *outdev,
+			       struct list_head **i,
+			       int (*okfn)(struct sk_buff *),
+			       int hook_thresh)
+{
+	unsigned int verdict;
+
+	/*
+	 * The caller must not block between calls to this
+	 * function because of risk of continuing from deleted element.
+	 */
+	list_for_each_continue_rcu(*i, head) {
+		struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
+
+		if (hook_thresh > elem->priority)
+			continue;
+
+		/* Optimization: we don't need to hold module
+                   reference here, since function can't sleep. --RR */
+		verdict = elem->hook(hook, skb, indev, outdev, okfn);
+		if (verdict != NF_ACCEPT) {
+#ifdef CONFIG_NETFILTER_DEBUG
+			if (unlikely(verdict > NF_MAX_VERDICT)) {
+				NFDEBUG("Evil return from %p(%u).\n",
+				        elem->hook, hook);
+				continue;
+			}
+#endif
+			if (verdict != NF_REPEAT)
+				return verdict;
+			*i = (*i)->prev;
+		}
+	}
+	return NF_ACCEPT;
+}
+
+int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data)
+{      
+	int ret;
+
+	write_lock_bh(&queue_handler_lock);
+	if (queue_handler[pf].outfn)
+		ret = -EBUSY;
+	else {
+		queue_handler[pf].outfn = outfn;
+		queue_handler[pf].data = data;
+		ret = 0;
+	}
+	write_unlock_bh(&queue_handler_lock);
+
+	return ret;
+}
+
+/* The caller must flush their queue before this */
+int nf_unregister_queue_handler(int pf)
+{
+	write_lock_bh(&queue_handler_lock);
+	queue_handler[pf].outfn = NULL;
+	queue_handler[pf].data = NULL;
+	write_unlock_bh(&queue_handler_lock);
+	
+	return 0;
+}
+
+/* 
+ * Any packet that leaves via this function must come back 
+ * through nf_reinject().
+ */
+static int nf_queue(struct sk_buff *skb, 
+		    struct list_head *elem, 
+		    int pf, unsigned int hook,
+		    struct net_device *indev,
+		    struct net_device *outdev,
+		    int (*okfn)(struct sk_buff *))
+{
+	int status;
+	struct nf_info *info;
+#ifdef CONFIG_BRIDGE_NETFILTER
+	struct net_device *physindev = NULL;
+	struct net_device *physoutdev = NULL;
+#endif
+
+	/* QUEUE == DROP if noone is waiting, to be safe. */
+	read_lock(&queue_handler_lock);
+	if (!queue_handler[pf].outfn) {
+		read_unlock(&queue_handler_lock);
+		kfree_skb(skb);
+		return 1;
+	}
+
+	info = kmalloc(sizeof(*info), GFP_ATOMIC);
+	if (!info) {
+		if (net_ratelimit())
+			printk(KERN_ERR "OOM queueing packet %p\n",
+			       skb);
+		read_unlock(&queue_handler_lock);
+		kfree_skb(skb);
+		return 1;
+	}
+
+	*info = (struct nf_info) { 
+		(struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn };
+
+	/* If it's going away, ignore hook. */
+	if (!try_module_get(info->elem->owner)) {
+		read_unlock(&queue_handler_lock);
+		kfree(info);
+		return 0;
+	}
+
+	/* Bump dev refs so they don't vanish while packet is out */
+	if (indev) dev_hold(indev);
+	if (outdev) dev_hold(outdev);
+
+#ifdef CONFIG_BRIDGE_NETFILTER
+	if (skb->nf_bridge) {
+		physindev = skb->nf_bridge->physindev;
+		if (physindev) dev_hold(physindev);
+		physoutdev = skb->nf_bridge->physoutdev;
+		if (physoutdev) dev_hold(physoutdev);
+	}
+#endif
+
+	status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data);
+	read_unlock(&queue_handler_lock);
+
+	if (status < 0) {
+		/* James M doesn't say fuck enough. */
+		if (indev) dev_put(indev);
+		if (outdev) dev_put(outdev);
+#ifdef CONFIG_BRIDGE_NETFILTER
+		if (physindev) dev_put(physindev);
+		if (physoutdev) dev_put(physoutdev);
+#endif
+		module_put(info->elem->owner);
+		kfree(info);
+		kfree_skb(skb);
+		return 1;
+	}
+	return 1;
+}
+
+/* Returns 1 if okfn() needs to be executed by the caller,
+ * -EPERM for NF_DROP, 0 otherwise. */
+int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb,
+		 struct net_device *indev,
+		 struct net_device *outdev,
+		 int (*okfn)(struct sk_buff *),
+		 int hook_thresh)
+{
+	struct list_head *elem;
+	unsigned int verdict;
+	int ret = 0;
+
+	/* We may already have this, but read-locks nest anyway */
+	rcu_read_lock();
+
+#ifdef CONFIG_NETFILTER_DEBUG
+	if (unlikely((*pskb)->nf_debug & (1 << hook))) {
+		printk("nf_hook: hook %i already set.\n", hook);
+		nf_dump_skb(pf, *pskb);
+	}
+	(*pskb)->nf_debug |= (1 << hook);
+#endif
+
+	elem = &nf_hooks[pf][hook];
+next_hook:
+	verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev,
+			     outdev, &elem, okfn, hook_thresh);
+	if (verdict == NF_ACCEPT || verdict == NF_STOP) {
+		ret = 1;
+		goto unlock;
+	} else if (verdict == NF_DROP) {
+		kfree_skb(*pskb);
+		ret = -EPERM;
+	} else if (verdict == NF_QUEUE) {
+		NFDEBUG("nf_hook: Verdict = QUEUE.\n");
+		if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn))
+			goto next_hook;
+	}
+unlock:
+	rcu_read_unlock();
+	return ret;
+}
+
+void nf_reinject(struct sk_buff *skb, struct nf_info *info,
+		 unsigned int verdict)
+{
+	struct list_head *elem = &info->elem->list;
+	struct list_head *i;
+
+	rcu_read_lock();
+
+	/* Release those devices we held, or Alexey will kill me. */
+	if (info->indev) dev_put(info->indev);
+	if (info->outdev) dev_put(info->outdev);
+#ifdef CONFIG_BRIDGE_NETFILTER
+	if (skb->nf_bridge) {
+		if (skb->nf_bridge->physindev)
+			dev_put(skb->nf_bridge->physindev);
+		if (skb->nf_bridge->physoutdev)
+			dev_put(skb->nf_bridge->physoutdev);
+	}
+#endif
+
+	/* Drop reference to owner of hook which queued us. */
+	module_put(info->elem->owner);
+
+	list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) {
+		if (i == elem) 
+  			break;
+  	}
+  
+	if (elem == &nf_hooks[info->pf][info->hook]) {
+		/* The module which sent it to userspace is gone. */
+		NFDEBUG("%s: module disappeared, dropping packet.\n",
+			__FUNCTION__);
+		verdict = NF_DROP;
+	}
+
+	/* Continue traversal iff userspace said ok... */
+	if (verdict == NF_REPEAT) {
+		elem = elem->prev;
+		verdict = NF_ACCEPT;
+	}
+
+	if (verdict == NF_ACCEPT) {
+	next_hook:
+		verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
+				     &skb, info->hook, 
+				     info->indev, info->outdev, &elem,
+				     info->okfn, INT_MIN);
+	}
+
+	switch (verdict) {
+	case NF_ACCEPT:
+		info->okfn(skb);
+		break;
+
+	case NF_QUEUE:
+		if (!nf_queue(skb, elem, info->pf, info->hook, 
+			      info->indev, info->outdev, info->okfn))
+			goto next_hook;
+		break;
+	}
+	rcu_read_unlock();
+
+	if (verdict == NF_DROP)
+		kfree_skb(skb);
+
+	kfree(info);
+	return;
+}
+
+#ifdef CONFIG_INET
+/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
+int ip_route_me_harder(struct sk_buff **pskb)
+{
+	struct iphdr *iph = (*pskb)->nh.iph;
+	struct rtable *rt;
+	struct flowi fl = {};
+	struct dst_entry *odst;
+	unsigned int hh_len;
+
+	/* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
+	 * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook.
+	 */
+	if (inet_addr_type(iph->saddr) == RTN_LOCAL) {
+		fl.nl_u.ip4_u.daddr = iph->daddr;
+		fl.nl_u.ip4_u.saddr = iph->saddr;
+		fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
+		fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0;
+#ifdef CONFIG_IP_ROUTE_FWMARK
+		fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark;
+#endif
+		fl.proto = iph->protocol;
+		if (ip_route_output_key(&rt, &fl) != 0)
+			return -1;
+
+		/* Drop old route. */
+		dst_release((*pskb)->dst);
+		(*pskb)->dst = &rt->u.dst;
+	} else {
+		/* non-local src, find valid iif to satisfy
+		 * rp-filter when calling ip_route_input. */
+		fl.nl_u.ip4_u.daddr = iph->saddr;
+		if (ip_route_output_key(&rt, &fl) != 0)
+			return -1;
+
+		odst = (*pskb)->dst;
+		if (ip_route_input(*pskb, iph->daddr, iph->saddr,
+				   RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
+			dst_release(&rt->u.dst);
+			return -1;
+		}
+		dst_release(&rt->u.dst);
+		dst_release(odst);
+	}
+	
+	if ((*pskb)->dst->error)
+		return -1;
+
+	/* Change in oif may mean change in hh_len. */
+	hh_len = (*pskb)->dst->dev->hard_header_len;
+	if (skb_headroom(*pskb) < hh_len) {
+		struct sk_buff *nskb;
+
+		nskb = skb_realloc_headroom(*pskb, hh_len);
+		if (!nskb) 
+			return -1;
+		if ((*pskb)->sk)
+			skb_set_owner_w(nskb, (*pskb)->sk);
+		kfree_skb(*pskb);
+		*pskb = nskb;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(ip_route_me_harder);
+
+int skb_ip_make_writable(struct sk_buff **pskb, unsigned int writable_len)
+{
+	struct sk_buff *nskb;
+
+	if (writable_len > (*pskb)->len)
+		return 0;
+
+	/* Not exclusive use of packet?  Must copy. */
+	if (skb_shared(*pskb) || skb_cloned(*pskb))
+		goto copy_skb;
+
+	return pskb_may_pull(*pskb, writable_len);
+
+copy_skb:
+	nskb = skb_copy(*pskb, GFP_ATOMIC);
+	if (!nskb)
+		return 0;
+	BUG_ON(skb_is_nonlinear(nskb));
+
+	/* Rest of kernel will get very unhappy if we pass it a
+	   suddenly-orphaned skbuff */
+	if ((*pskb)->sk)
+		skb_set_owner_w(nskb, (*pskb)->sk);
+	kfree_skb(*pskb);
+	*pskb = nskb;
+	return 1;
+}
+EXPORT_SYMBOL(skb_ip_make_writable);
+#endif /*CONFIG_INET*/
+
+/* Internal logging interface, which relies on the real 
+   LOG target modules */
+
+#define NF_LOG_PREFIXLEN		128
+
+static nf_logfn *nf_logging[NPROTO]; /* = NULL */
+static int reported = 0;
+static DEFINE_SPINLOCK(nf_log_lock);
+
+int nf_log_register(int pf, nf_logfn *logfn)
+{
+	int ret = -EBUSY;
+
+	/* Any setup of logging members must be done before
+	 * substituting pointer. */
+	spin_lock(&nf_log_lock);
+	if (!nf_logging[pf]) {
+		rcu_assign_pointer(nf_logging[pf], logfn);
+		ret = 0;
+	}
+	spin_unlock(&nf_log_lock);
+	return ret;
+}		
+
+void nf_log_unregister(int pf, nf_logfn *logfn)
+{
+	spin_lock(&nf_log_lock);
+	if (nf_logging[pf] == logfn)
+		nf_logging[pf] = NULL;
+	spin_unlock(&nf_log_lock);
+
+	/* Give time to concurrent readers. */
+	synchronize_net();
+}		
+
+void nf_log_packet(int pf,
+		   unsigned int hooknum,
+		   const struct sk_buff *skb,
+		   const struct net_device *in,
+		   const struct net_device *out,
+		   const char *fmt, ...)
+{
+	va_list args;
+	char prefix[NF_LOG_PREFIXLEN];
+	nf_logfn *logfn;
+	
+	rcu_read_lock();
+	logfn = rcu_dereference(nf_logging[pf]);
+	if (logfn) {
+		va_start(args, fmt);
+		vsnprintf(prefix, sizeof(prefix), fmt, args);
+		va_end(args);
+		/* We must read logging before nf_logfn[pf] */
+		logfn(hooknum, skb, in, out, prefix);
+	} else if (!reported) {
+		printk(KERN_WARNING "nf_log_packet: can\'t log yet, "
+		       "no backend logging module loaded in!\n");
+		reported++;
+	}
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL(nf_log_register);
+EXPORT_SYMBOL(nf_log_unregister);
+EXPORT_SYMBOL(nf_log_packet);
+
+/* This does not belong here, but locally generated errors need it if connection
+   tracking in use: without this, connection may not be in hash table, and hence
+   manufactured ICMP or RST packets will not be associated with it. */
+void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
+
+void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
+{
+	void (*attach)(struct sk_buff *, struct sk_buff *);
+
+	if (skb->nfct && (attach = ip_ct_attach) != NULL) {
+		mb(); /* Just to be sure: must be read before executing this */
+		attach(new, skb);
+	}
+}
+
+void __init netfilter_init(void)
+{
+	int i, h;
+
+	for (i = 0; i < NPROTO; i++) {
+		for (h = 0; h < NF_MAX_HOOKS; h++)
+			INIT_LIST_HEAD(&nf_hooks[i][h]);
+	}
+}
+
+EXPORT_SYMBOL(ip_ct_attach);
+EXPORT_SYMBOL(nf_ct_attach);
+EXPORT_SYMBOL(nf_getsockopt);
+EXPORT_SYMBOL(nf_hook_slow);
+EXPORT_SYMBOL(nf_hooks);
+EXPORT_SYMBOL(nf_register_hook);
+EXPORT_SYMBOL(nf_register_queue_handler);
+EXPORT_SYMBOL(nf_register_sockopt);
+EXPORT_SYMBOL(nf_reinject);
+EXPORT_SYMBOL(nf_setsockopt);
+EXPORT_SYMBOL(nf_unregister_hook);
+EXPORT_SYMBOL(nf_unregister_queue_handler);
+EXPORT_SYMBOL(nf_unregister_sockopt);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
new file mode 100644
index 00000000000..a119696d552
--- /dev/null
+++ b/net/core/netpoll.c
@@ -0,0 +1,735 @@
+/*
+ * Common framework for low-level network console, dump, and debugger code
+ *
+ * Sep 8 2003  Matt Mackall <mpm@selenic.com>
+ *
+ * based on the netconsole code from:
+ *
+ * Copyright (C) 2001  Ingo Molnar <mingo@redhat.com>
+ * Copyright (C) 2002  Red Hat, Inc.
+ */
+
+#include <linux/smp_lock.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/string.h>
+#include <linux/inetdevice.h>
+#include <linux/inet.h>
+#include <linux/interrupt.h>
+#include <linux/netpoll.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/rcupdate.h>
+#include <linux/workqueue.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <asm/unaligned.h>
+
+/*
+ * We maintain a small pool of fully-sized skbs, to make sure the
+ * message gets out even in extreme OOM situations.
+ */
+
+#define MAX_UDP_CHUNK 1460
+#define MAX_SKBS 32
+#define MAX_QUEUE_DEPTH (MAX_SKBS / 2)
+
+static DEFINE_SPINLOCK(skb_list_lock);
+static int nr_skbs;
+static struct sk_buff *skbs;
+
+static DEFINE_SPINLOCK(queue_lock);
+static int queue_depth;
+static struct sk_buff *queue_head, *queue_tail;
+
+static atomic_t trapped;
+
+#define NETPOLL_RX_ENABLED  1
+#define NETPOLL_RX_DROP     2
+
+#define MAX_SKB_SIZE \
+		(MAX_UDP_CHUNK + sizeof(struct udphdr) + \
+				sizeof(struct iphdr) + sizeof(struct ethhdr))
+
+static void zap_completion_queue(void);
+
+static void queue_process(void *p)
+{
+	unsigned long flags;
+	struct sk_buff *skb;
+
+	while (queue_head) {
+		spin_lock_irqsave(&queue_lock, flags);
+
+		skb = queue_head;
+		queue_head = skb->next;
+		if (skb == queue_tail)
+			queue_head = NULL;
+
+		queue_depth--;
+
+		spin_unlock_irqrestore(&queue_lock, flags);
+
+		dev_queue_xmit(skb);
+	}
+}
+
+static DECLARE_WORK(send_queue, queue_process, NULL);
+
+void netpoll_queue(struct sk_buff *skb)
+{
+	unsigned long flags;
+
+	if (queue_depth == MAX_QUEUE_DEPTH) {
+		__kfree_skb(skb);
+		return;
+	}
+
+	spin_lock_irqsave(&queue_lock, flags);
+	if (!queue_head)
+		queue_head = skb;
+	else
+		queue_tail->next = skb;
+	queue_tail = skb;
+	queue_depth++;
+	spin_unlock_irqrestore(&queue_lock, flags);
+
+	schedule_work(&send_queue);
+}
+
+static int checksum_udp(struct sk_buff *skb, struct udphdr *uh,
+			     unsigned short ulen, u32 saddr, u32 daddr)
+{
+	if (uh->check == 0)
+		return 0;
+
+	if (skb->ip_summed == CHECKSUM_HW)
+		return csum_tcpudp_magic(
+			saddr, daddr, ulen, IPPROTO_UDP, skb->csum);
+
+	skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
+
+	return csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
+}
+
+/*
+ * Check whether delayed processing was scheduled for our NIC. If so,
+ * we attempt to grab the poll lock and use ->poll() to pump the card.
+ * If this fails, either we've recursed in ->poll() or it's already
+ * running on another CPU.
+ *
+ * Note: we don't mask interrupts with this lock because we're using
+ * trylock here and interrupts are already disabled in the softirq
+ * case. Further, we test the poll_owner to avoid recursion on UP
+ * systems where the lock doesn't exist.
+ *
+ * In cases where there is bi-directional communications, reading only
+ * one message at a time can lead to packets being dropped by the
+ * network adapter, forcing superfluous retries and possibly timeouts.
+ * Thus, we set our budget to greater than 1.
+ */
+static void poll_napi(struct netpoll *np)
+{
+	int budget = 16;
+
+	if (test_bit(__LINK_STATE_RX_SCHED, &np->dev->state) &&
+	    np->poll_owner != smp_processor_id() &&
+	    spin_trylock(&np->poll_lock)) {
+		np->rx_flags |= NETPOLL_RX_DROP;
+		atomic_inc(&trapped);
+
+		np->dev->poll(np->dev, &budget);
+
+		atomic_dec(&trapped);
+		np->rx_flags &= ~NETPOLL_RX_DROP;
+		spin_unlock(&np->poll_lock);
+	}
+}
+
+void netpoll_poll(struct netpoll *np)
+{
+	if(!np->dev || !netif_running(np->dev) || !np->dev->poll_controller)
+		return;
+
+	/* Process pending work on NIC */
+	np->dev->poll_controller(np->dev);
+	if (np->dev->poll)
+		poll_napi(np);
+
+	zap_completion_queue();
+}
+
+static void refill_skbs(void)
+{
+	struct sk_buff *skb;
+	unsigned long flags;
+
+	spin_lock_irqsave(&skb_list_lock, flags);
+	while (nr_skbs < MAX_SKBS) {
+		skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
+		if (!skb)
+			break;
+
+		skb->next = skbs;
+		skbs = skb;
+		nr_skbs++;
+	}
+	spin_unlock_irqrestore(&skb_list_lock, flags);
+}
+
+static void zap_completion_queue(void)
+{
+	unsigned long flags;
+	struct softnet_data *sd = &get_cpu_var(softnet_data);
+
+	if (sd->completion_queue) {
+		struct sk_buff *clist;
+
+		local_irq_save(flags);
+		clist = sd->completion_queue;
+		sd->completion_queue = NULL;
+		local_irq_restore(flags);
+
+		while (clist != NULL) {
+			struct sk_buff *skb = clist;
+			clist = clist->next;
+			if(skb->destructor)
+				dev_kfree_skb_any(skb); /* put this one back */
+			else
+				__kfree_skb(skb);
+		}
+	}
+
+	put_cpu_var(softnet_data);
+}
+
+static struct sk_buff * find_skb(struct netpoll *np, int len, int reserve)
+{
+	int once = 1, count = 0;
+	unsigned long flags;
+	struct sk_buff *skb = NULL;
+
+	zap_completion_queue();
+repeat:
+	if (nr_skbs < MAX_SKBS)
+		refill_skbs();
+
+	skb = alloc_skb(len, GFP_ATOMIC);
+
+	if (!skb) {
+		spin_lock_irqsave(&skb_list_lock, flags);
+		skb = skbs;
+		if (skb) {
+			skbs = skb->next;
+			skb->next = NULL;
+			nr_skbs--;
+		}
+		spin_unlock_irqrestore(&skb_list_lock, flags);
+	}
+
+	if(!skb) {
+		count++;
+		if (once && (count == 1000000)) {
+			printk("out of netpoll skbs!\n");
+			once = 0;
+		}
+		netpoll_poll(np);
+		goto repeat;
+	}
+
+	atomic_set(&skb->users, 1);
+	skb_reserve(skb, reserve);
+	return skb;
+}
+
+static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
+{
+	int status;
+
+repeat:
+	if(!np || !np->dev || !netif_running(np->dev)) {
+		__kfree_skb(skb);
+		return;
+	}
+
+	/* avoid recursion */
+	if(np->poll_owner == smp_processor_id() ||
+	   np->dev->xmit_lock_owner == smp_processor_id()) {
+		if (np->drop)
+			np->drop(skb);
+		else
+			__kfree_skb(skb);
+		return;
+	}
+
+	spin_lock(&np->dev->xmit_lock);
+	np->dev->xmit_lock_owner = smp_processor_id();
+
+	/*
+	 * network drivers do not expect to be called if the queue is
+	 * stopped.
+	 */
+	if (netif_queue_stopped(np->dev)) {
+		np->dev->xmit_lock_owner = -1;
+		spin_unlock(&np->dev->xmit_lock);
+
+		netpoll_poll(np);
+		goto repeat;
+	}
+
+	status = np->dev->hard_start_xmit(skb, np->dev);
+	np->dev->xmit_lock_owner = -1;
+	spin_unlock(&np->dev->xmit_lock);
+
+	/* transmit busy */
+	if(status) {
+		netpoll_poll(np);
+		goto repeat;
+	}
+}
+
+void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
+{
+	int total_len, eth_len, ip_len, udp_len;
+	struct sk_buff *skb;
+	struct udphdr *udph;
+	struct iphdr *iph;
+	struct ethhdr *eth;
+
+	udp_len = len + sizeof(*udph);
+	ip_len = eth_len = udp_len + sizeof(*iph);
+	total_len = eth_len + ETH_HLEN + NET_IP_ALIGN;
+
+	skb = find_skb(np, total_len, total_len - len);
+	if (!skb)
+		return;
+
+	memcpy(skb->data, msg, len);
+	skb->len += len;
+
+	udph = (struct udphdr *) skb_push(skb, sizeof(*udph));
+	udph->source = htons(np->local_port);
+	udph->dest = htons(np->remote_port);
+	udph->len = htons(udp_len);
+	udph->check = 0;
+
+	iph = (struct iphdr *)skb_push(skb, sizeof(*iph));
+
+	/* iph->version = 4; iph->ihl = 5; */
+	put_unaligned(0x45, (unsigned char *)iph);
+	iph->tos      = 0;
+	put_unaligned(htons(ip_len), &(iph->tot_len));
+	iph->id       = 0;
+	iph->frag_off = 0;
+	iph->ttl      = 64;
+	iph->protocol = IPPROTO_UDP;
+	iph->check    = 0;
+	put_unaligned(htonl(np->local_ip), &(iph->saddr));
+	put_unaligned(htonl(np->remote_ip), &(iph->daddr));
+	iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);
+
+	eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
+
+	eth->h_proto = htons(ETH_P_IP);
+	memcpy(eth->h_source, np->local_mac, 6);
+	memcpy(eth->h_dest, np->remote_mac, 6);
+
+	skb->dev = np->dev;
+
+	netpoll_send_skb(np, skb);
+}
+
+static void arp_reply(struct sk_buff *skb)
+{
+	struct arphdr *arp;
+	unsigned char *arp_ptr;
+	int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
+	u32 sip, tip;
+	struct sk_buff *send_skb;
+	struct netpoll *np = skb->dev->np;
+
+	if (!np) return;
+
+	/* No arp on this interface */
+	if (skb->dev->flags & IFF_NOARP)
+		return;
+
+	if (!pskb_may_pull(skb, (sizeof(struct arphdr) +
+				 (2 * skb->dev->addr_len) +
+				 (2 * sizeof(u32)))))
+		return;
+
+	skb->h.raw = skb->nh.raw = skb->data;
+	arp = skb->nh.arph;
+
+	if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
+	     arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
+	    arp->ar_pro != htons(ETH_P_IP) ||
+	    arp->ar_op != htons(ARPOP_REQUEST))
+		return;
+
+	arp_ptr = (unsigned char *)(arp+1) + skb->dev->addr_len;
+	memcpy(&sip, arp_ptr, 4);
+	arp_ptr += 4 + skb->dev->addr_len;
+	memcpy(&tip, arp_ptr, 4);
+
+	/* Should we ignore arp? */
+	if (tip != htonl(np->local_ip) || LOOPBACK(tip) || MULTICAST(tip))
+		return;
+
+	size = sizeof(struct arphdr) + 2 * (skb->dev->addr_len + 4);
+	send_skb = find_skb(np, size + LL_RESERVED_SPACE(np->dev),
+			    LL_RESERVED_SPACE(np->dev));
+
+	if (!send_skb)
+		return;
+
+	send_skb->nh.raw = send_skb->data;
+	arp = (struct arphdr *) skb_put(send_skb, size);
+	send_skb->dev = skb->dev;
+	send_skb->protocol = htons(ETH_P_ARP);
+
+	/* Fill the device header for the ARP frame */
+
+	if (np->dev->hard_header &&
+	    np->dev->hard_header(send_skb, skb->dev, ptype,
+				       np->remote_mac, np->local_mac,
+				       send_skb->len) < 0) {
+		kfree_skb(send_skb);
+		return;
+	}
+
+	/*
+	 * Fill out the arp protocol part.
+	 *
+	 * we only support ethernet device type,
+	 * which (according to RFC 1390) should always equal 1 (Ethernet).
+	 */
+
+	arp->ar_hrd = htons(np->dev->type);
+	arp->ar_pro = htons(ETH_P_IP);
+	arp->ar_hln = np->dev->addr_len;
+	arp->ar_pln = 4;
+	arp->ar_op = htons(type);
+
+	arp_ptr=(unsigned char *)(arp + 1);
+	memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
+	arp_ptr += np->dev->addr_len;
+	memcpy(arp_ptr, &tip, 4);
+	arp_ptr += 4;
+	memcpy(arp_ptr, np->remote_mac, np->dev->addr_len);
+	arp_ptr += np->dev->addr_len;
+	memcpy(arp_ptr, &sip, 4);
+
+	netpoll_send_skb(np, send_skb);
+}
+
+int __netpoll_rx(struct sk_buff *skb)
+{
+	int proto, len, ulen;
+	struct iphdr *iph;
+	struct udphdr *uh;
+	struct netpoll *np = skb->dev->np;
+
+	if (!np->rx_hook)
+		goto out;
+	if (skb->dev->type != ARPHRD_ETHER)
+		goto out;
+
+	/* check if netpoll clients need ARP */
+	if (skb->protocol == __constant_htons(ETH_P_ARP) &&
+	    atomic_read(&trapped)) {
+		arp_reply(skb);
+		return 1;
+	}
+
+	proto = ntohs(eth_hdr(skb)->h_proto);
+	if (proto != ETH_P_IP)
+		goto out;
+	if (skb->pkt_type == PACKET_OTHERHOST)
+		goto out;
+	if (skb_shared(skb))
+		goto out;
+
+	iph = (struct iphdr *)skb->data;
+	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+		goto out;
+	if (iph->ihl < 5 || iph->version != 4)
+		goto out;
+	if (!pskb_may_pull(skb, iph->ihl*4))
+		goto out;
+	if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
+		goto out;
+
+	len = ntohs(iph->tot_len);
+	if (skb->len < len || len < iph->ihl*4)
+		goto out;
+
+	if (iph->protocol != IPPROTO_UDP)
+		goto out;
+
+	len -= iph->ihl*4;
+	uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
+	ulen = ntohs(uh->len);
+
+	if (ulen != len)
+		goto out;
+	if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr) < 0)
+		goto out;
+	if (np->local_ip && np->local_ip != ntohl(iph->daddr))
+		goto out;
+	if (np->remote_ip && np->remote_ip != ntohl(iph->saddr))
+		goto out;
+	if (np->local_port && np->local_port != ntohs(uh->dest))
+		goto out;
+
+	np->rx_hook(np, ntohs(uh->source),
+		    (char *)(uh+1),
+		    ulen - sizeof(struct udphdr));
+
+	kfree_skb(skb);
+	return 1;
+
+out:
+	if (atomic_read(&trapped)) {
+		kfree_skb(skb);
+		return 1;
+	}
+
+	return 0;
+}
+
+int netpoll_parse_options(struct netpoll *np, char *opt)
+{
+	char *cur=opt, *delim;
+
+	if(*cur != '@') {
+		if ((delim = strchr(cur, '@')) == NULL)
+			goto parse_failed;
+		*delim=0;
+		np->local_port=simple_strtol(cur, NULL, 10);
+		cur=delim;
+	}
+	cur++;
+	printk(KERN_INFO "%s: local port %d\n", np->name, np->local_port);
+
+	if(*cur != '/') {
+		if ((delim = strchr(cur, '/')) == NULL)
+			goto parse_failed;
+		*delim=0;
+		np->local_ip=ntohl(in_aton(cur));
+		cur=delim;
+
+		printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n",
+		       np->name, HIPQUAD(np->local_ip));
+	}
+	cur++;
+
+	if ( *cur != ',') {
+		/* parse out dev name */
+		if ((delim = strchr(cur, ',')) == NULL)
+			goto parse_failed;
+		*delim=0;
+		strlcpy(np->dev_name, cur, sizeof(np->dev_name));
+		cur=delim;
+	}
+	cur++;
+
+	printk(KERN_INFO "%s: interface %s\n", np->name, np->dev_name);
+
+	if ( *cur != '@' ) {
+		/* dst port */
+		if ((delim = strchr(cur, '@')) == NULL)
+			goto parse_failed;
+		*delim=0;
+		np->remote_port=simple_strtol(cur, NULL, 10);
+		cur=delim;
+	}
+	cur++;
+	printk(KERN_INFO "%s: remote port %d\n", np->name, np->remote_port);
+
+	/* dst ip */
+	if ((delim = strchr(cur, '/')) == NULL)
+		goto parse_failed;
+	*delim=0;
+	np->remote_ip=ntohl(in_aton(cur));
+	cur=delim+1;
+
+	printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n",
+		       np->name, HIPQUAD(np->remote_ip));
+
+	if( *cur != 0 )
+	{
+		/* MAC address */
+		if ((delim = strchr(cur, ':')) == NULL)
+			goto parse_failed;
+		*delim=0;
+		np->remote_mac[0]=simple_strtol(cur, NULL, 16);
+		cur=delim+1;
+		if ((delim = strchr(cur, ':')) == NULL)
+			goto parse_failed;
+		*delim=0;
+		np->remote_mac[1]=simple_strtol(cur, NULL, 16);
+		cur=delim+1;
+		if ((delim = strchr(cur, ':')) == NULL)
+			goto parse_failed;
+		*delim=0;
+		np->remote_mac[2]=simple_strtol(cur, NULL, 16);
+		cur=delim+1;
+		if ((delim = strchr(cur, ':')) == NULL)
+			goto parse_failed;
+		*delim=0;
+		np->remote_mac[3]=simple_strtol(cur, NULL, 16);
+		cur=delim+1;
+		if ((delim = strchr(cur, ':')) == NULL)
+			goto parse_failed;
+		*delim=0;
+		np->remote_mac[4]=simple_strtol(cur, NULL, 16);
+		cur=delim+1;
+		np->remote_mac[5]=simple_strtol(cur, NULL, 16);
+	}
+
+	printk(KERN_INFO "%s: remote ethernet address "
+	       "%02x:%02x:%02x:%02x:%02x:%02x\n",
+	       np->name,
+	       np->remote_mac[0],
+	       np->remote_mac[1],
+	       np->remote_mac[2],
+	       np->remote_mac[3],
+	       np->remote_mac[4],
+	       np->remote_mac[5]);
+
+	return 0;
+
+ parse_failed:
+	printk(KERN_INFO "%s: couldn't parse config at %s!\n",
+	       np->name, cur);
+	return -1;
+}
+
+int netpoll_setup(struct netpoll *np)
+{
+	struct net_device *ndev = NULL;
+	struct in_device *in_dev;
+
+	np->poll_lock = SPIN_LOCK_UNLOCKED;
+	np->poll_owner = -1;
+
+	if (np->dev_name)
+		ndev = dev_get_by_name(np->dev_name);
+	if (!ndev) {
+		printk(KERN_ERR "%s: %s doesn't exist, aborting.\n",
+		       np->name, np->dev_name);
+		return -1;
+	}
+
+	np->dev = ndev;
+	ndev->np = np;
+
+	if (!ndev->poll_controller) {
+		printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
+		       np->name, np->dev_name);
+		goto release;
+	}
+
+	if (!netif_running(ndev)) {
+		unsigned long atmost, atleast;
+
+		printk(KERN_INFO "%s: device %s not up yet, forcing it\n",
+		       np->name, np->dev_name);
+
+		rtnl_shlock();
+		if (dev_change_flags(ndev, ndev->flags | IFF_UP) < 0) {
+			printk(KERN_ERR "%s: failed to open %s\n",
+			       np->name, np->dev_name);
+			rtnl_shunlock();
+			goto release;
+		}
+		rtnl_shunlock();
+
+		atleast = jiffies + HZ/10;
+ 		atmost = jiffies + 4*HZ;
+		while (!netif_carrier_ok(ndev)) {
+			if (time_after(jiffies, atmost)) {
+				printk(KERN_NOTICE
+				       "%s: timeout waiting for carrier\n",
+				       np->name);
+				break;
+			}
+			cond_resched();
+		}
+
+		/* If carrier appears to come up instantly, we don't
+		 * trust it and pause so that we don't pump all our
+		 * queued console messages into the bitbucket.
+		 */
+
+		if (time_before(jiffies, atleast)) {
+			printk(KERN_NOTICE "%s: carrier detect appears"
+			       " untrustworthy, waiting 4 seconds\n",
+			       np->name);
+			msleep(4000);
+		}
+	}
+
+	if (!memcmp(np->local_mac, "\0\0\0\0\0\0", 6) && ndev->dev_addr)
+		memcpy(np->local_mac, ndev->dev_addr, 6);
+
+	if (!np->local_ip) {
+		rcu_read_lock();
+		in_dev = __in_dev_get(ndev);
+
+		if (!in_dev || !in_dev->ifa_list) {
+			rcu_read_unlock();
+			printk(KERN_ERR "%s: no IP address for %s, aborting\n",
+			       np->name, np->dev_name);
+			goto release;
+		}
+
+		np->local_ip = ntohl(in_dev->ifa_list->ifa_local);
+		rcu_read_unlock();
+		printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n",
+		       np->name, HIPQUAD(np->local_ip));
+	}
+
+	if(np->rx_hook)
+		np->rx_flags = NETPOLL_RX_ENABLED;
+
+	return 0;
+
+ release:
+	ndev->np = NULL;
+	np->dev = NULL;
+	dev_put(ndev);
+	return -1;
+}
+
+void netpoll_cleanup(struct netpoll *np)
+{
+	if (np->dev)
+		np->dev->np = NULL;
+	dev_put(np->dev);
+	np->dev = NULL;
+}
+
+int netpoll_trap(void)
+{
+	return atomic_read(&trapped);
+}
+
+void netpoll_set_trap(int trap)
+{
+	if (trap)
+		atomic_inc(&trapped);
+	else
+		atomic_dec(&trapped);
+}
+
+EXPORT_SYMBOL(netpoll_set_trap);
+EXPORT_SYMBOL(netpoll_trap);
+EXPORT_SYMBOL(netpoll_parse_options);
+EXPORT_SYMBOL(netpoll_setup);
+EXPORT_SYMBOL(netpoll_cleanup);
+EXPORT_SYMBOL(netpoll_send_udp);
+EXPORT_SYMBOL(netpoll_poll);
+EXPORT_SYMBOL(netpoll_queue);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
new file mode 100644
index 00000000000..c57b06bc79f
--- /dev/null
+++ b/net/core/pktgen.c
@@ -0,0 +1,3132 @@
+/*
+ * Authors:
+ * Copyright 2001, 2002 by Robert Olsson <robert.olsson@its.uu.se>
+ *                             Uppsala University and
+ *                             Swedish University of Agricultural Sciences
+ *
+ * Alexey Kuznetsov  <kuznet@ms2.inr.ac.ru>
+ * Ben Greear <greearb@candelatech.com>
+ * Jens L��s <jens.laas@data.slu.se>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ *
+ * A tool for loading the network with preconfigurated packets.
+ * The tool is implemented as a linux module.  Parameters are output 
+ * device, delay (to hard_xmit), number of packets, and whether
+ * to use multiple SKBs or just the same one.
+ * pktgen uses the installed interface's output routine.
+ *
+ * Additional hacking by:
+ *
+ * Jens.Laas@data.slu.se
+ * Improved by ANK. 010120.
+ * Improved by ANK even more. 010212.
+ * MAC address typo fixed. 010417 --ro
+ * Integrated.  020301 --DaveM
+ * Added multiskb option 020301 --DaveM
+ * Scaling of results. 020417--sigurdur@linpro.no
+ * Significant re-work of the module:
+ *   *  Convert to threaded model to more efficiently be able to transmit
+ *       and receive on multiple interfaces at once.
+ *   *  Converted many counters to __u64 to allow longer runs.
+ *   *  Allow configuration of ranges, like min/max IP address, MACs,
+ *       and UDP-ports, for both source and destination, and can
+ *       set to use a random distribution or sequentially walk the range.
+ *   *  Can now change most values after starting.
+ *   *  Place 12-byte packet in UDP payload with magic number,
+ *       sequence number, and timestamp.
+ *   *  Add receiver code that detects dropped pkts, re-ordered pkts, and
+ *       latencies (with micro-second) precision.
+ *   *  Add IOCTL interface to easily get counters & configuration.
+ *   --Ben Greear <greearb@candelatech.com>
+ *
+ * Renamed multiskb to clone_skb and cleaned up sending core for two distinct 
+ * skb modes. A clone_skb=0 mode for Ben "ranges" work and a clone_skb != 0 
+ * as a "fastpath" with a configurable number of clones after alloc's.
+ * clone_skb=0 means all packets are allocated this also means ranges time 
+ * stamps etc can be used. clone_skb=100 means 1 malloc is followed by 100 
+ * clones.
+ *
+ * Also moved to /proc/net/pktgen/ 
+ * --ro
+ *
+ * Sept 10:  Fixed threading/locking.  Lots of bone-headed and more clever
+ *    mistakes.  Also merged in DaveM's patch in the -pre6 patch.
+ * --Ben Greear <greearb@candelatech.com>
+ *
+ * Integrated to 2.5.x 021029 --Lucio Maciel (luciomaciel@zipmail.com.br)
+ *
+ * 
+ * 021124 Finished major redesign and rewrite for new functionality.
+ * See Documentation/networking/pktgen.txt for how to use this.
+ *
+ * The new operation:
+ * For each CPU one thread/process is created at start. This process checks 
+ * for running devices in the if_list and sends packets until count is 0 it 
+ * also the thread checks the thread->control which is used for inter-process 
+ * communication. controlling process "posts" operations to the threads this 
+ * way. The if_lock should be possible to remove when add/rem_device is merged
+ * into this too.
+ *
+ * By design there should only be *one* "controlling" process. In practice 
+ * multiple write accesses gives unpredictable result. Understood by "write" 
+ * to /proc gives result code thats should be read be the "writer".
+ * For pratical use this should be no problem.
+ *
+ * Note when adding devices to a specific CPU there good idea to also assign 
+ * /proc/irq/XX/smp_affinity so TX-interrupts gets bound to the same CPU. 
+ * --ro
+ *
+ * Fix refcount off by one if first packet fails, potential null deref, 
+ * memleak 030710- KJP
+ *
+ * First "ranges" functionality for ipv6 030726 --ro
+ *
+ * Included flow support. 030802 ANK.
+ *
+ * Fixed unaligned access on IA-64 Grant Grundler <grundler@parisc-linux.org>
+ * 
+ * Remove if fix from added Harald Welte <laforge@netfilter.org> 040419
+ * ia64 compilation fix from  Aron Griffis <aron@hp.com> 040604
+ *
+ * New xmit() return, do_div and misc clean up by Stephen Hemminger 
+ * <shemminger@osdl.org> 040923
+ *
+ * Rany Dunlap fixed u64 printk compiler waring 
+ *
+ * Remove FCS from BW calculation.  Lennert Buytenhek <buytenh@wantstofly.org>
+ * New time handling. Lennert Buytenhek <buytenh@wantstofly.org> 041213
+ *
+ * Corrections from Nikolai Malykh (nmalykh@bilim.com) 
+ * Removed unused flags F_SET_SRCMAC & F_SET_SRCIP 041230
+ *
+ * interruptible_sleep_on_timeout() replaced Nishanth Aravamudan <nacc@us.ibm.com> 
+ * 050103
+ */
+#include <linux/sys.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/kernel.h>
+#include <linux/smp_lock.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/sched.h>
+#include <linux/unistd.h>
+#include <linux/string.h>
+#include <linux/ptrace.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/timer.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/inet.h>
+#include <linux/inetdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/if_arp.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/udp.h>
+#include <linux/proc_fs.h>
+#include <linux/wait.h>
+#include <net/checksum.h>
+#include <net/ipv6.h>
+#include <net/addrconf.h>
+#include <asm/byteorder.h>
+#include <linux/rcupdate.h>
+#include <asm/bitops.h>
+#include <asm/io.h>
+#include <asm/dma.h>
+#include <asm/uaccess.h>
+#include <asm/div64.h> /* do_div */
+#include <asm/timex.h>
+
+
+#define VERSION  "pktgen v2.61: Packet Generator for packet performance testing.\n"
+
+/* #define PG_DEBUG(a) a */
+#define PG_DEBUG(a) 
+
+/* The buckets are exponential in 'width' */
+#define LAT_BUCKETS_MAX 32
+#define IP_NAME_SZ 32
+
+/* Device flag bits */
+#define F_IPSRC_RND   (1<<0)  /* IP-Src Random  */
+#define F_IPDST_RND   (1<<1)  /* IP-Dst Random  */
+#define F_UDPSRC_RND  (1<<2)  /* UDP-Src Random */
+#define F_UDPDST_RND  (1<<3)  /* UDP-Dst Random */
+#define F_MACSRC_RND  (1<<4)  /* MAC-Src Random */
+#define F_MACDST_RND  (1<<5)  /* MAC-Dst Random */
+#define F_TXSIZE_RND  (1<<6)  /* Transmit size is random */
+#define F_IPV6        (1<<7)  /* Interface in IPV6 Mode */
+
+/* Thread control flag bits */
+#define T_TERMINATE   (1<<0)  
+#define T_STOP        (1<<1)  /* Stop run */
+#define T_RUN         (1<<2)  /* Start run */
+#define T_REMDEV      (1<<3)  /* Remove all devs */
+
+/* Locks */
+#define   thread_lock()        spin_lock(&_thread_lock)
+#define   thread_unlock()      spin_unlock(&_thread_lock)
+
+/* If lock -- can be removed after some work */
+#define   if_lock(t)           spin_lock(&(t->if_lock));
+#define   if_unlock(t)           spin_unlock(&(t->if_lock));
+
+/* Used to help with determining the pkts on receive */
+#define PKTGEN_MAGIC 0xbe9be955
+#define PG_PROC_DIR "pktgen"
+
+#define MAX_CFLOWS  65536
+
+struct flow_state
+{
+	__u32		cur_daddr;
+	int		count;
+};
+
+struct pktgen_dev {
+
+	/*
+	 * Try to keep frequent/infrequent used vars. separated.
+	 */
+
+        char ifname[32];
+        struct proc_dir_entry *proc_ent;
+        char result[512];
+        /* proc file names */
+        char fname[80];
+
+        struct pktgen_thread* pg_thread; /* the owner */
+        struct pktgen_dev *next; /* Used for chaining in the thread's run-queue */
+
+        int running;  /* if this changes to false, the test will stop */
+        
+        /* If min != max, then we will either do a linear iteration, or
+         * we will do a random selection from within the range.
+         */
+        __u32 flags;     
+
+        int min_pkt_size;    /* = ETH_ZLEN; */
+        int max_pkt_size;    /* = ETH_ZLEN; */
+        int nfrags;
+        __u32 delay_us;    /* Default delay */
+        __u32 delay_ns;
+        __u64 count;  /* Default No packets to send */
+        __u64 sofar;  /* How many pkts we've sent so far */
+        __u64 tx_bytes; /* How many bytes we've transmitted */
+        __u64 errors;    /* Errors when trying to transmit, pkts will be re-sent */
+
+        /* runtime counters relating to clone_skb */
+        __u64 next_tx_us;          /* timestamp of when to tx next */
+        __u32 next_tx_ns;
+        
+        __u64 allocated_skbs;
+        __u32 clone_count;
+	int last_ok;           /* Was last skb sent? 
+	                        * Or a failed transmit of some sort?  This will keep
+                                * sequence numbers in order, for example.
+				*/
+        __u64 started_at; /* micro-seconds */
+        __u64 stopped_at; /* micro-seconds */
+        __u64 idle_acc; /* micro-seconds */
+        __u32 seq_num;
+        
+        int clone_skb; /* Use multiple SKBs during packet gen.  If this number
+                          * is greater than 1, then that many coppies of the same
+                          * packet will be sent before a new packet is allocated.
+                          * For instance, if you want to send 1024 identical packets
+                          * before creating a new packet, set clone_skb to 1024.
+                          */
+        
+        char dst_min[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */
+        char dst_max[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */
+        char src_min[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */
+        char src_max[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */
+
+	struct in6_addr  in6_saddr;
+	struct in6_addr  in6_daddr;
+	struct in6_addr  cur_in6_daddr;
+	struct in6_addr  cur_in6_saddr;
+	/* For ranges */
+	struct in6_addr  min_in6_daddr;
+	struct in6_addr  max_in6_daddr;
+	struct in6_addr  min_in6_saddr;
+	struct in6_addr  max_in6_saddr;
+
+        /* If we're doing ranges, random or incremental, then this
+         * defines the min/max for those ranges.
+         */
+        __u32 saddr_min; /* inclusive, source IP address */
+        __u32 saddr_max; /* exclusive, source IP address */
+        __u32 daddr_min; /* inclusive, dest IP address */
+        __u32 daddr_max; /* exclusive, dest IP address */
+
+        __u16 udp_src_min; /* inclusive, source UDP port */
+        __u16 udp_src_max; /* exclusive, source UDP port */
+        __u16 udp_dst_min; /* inclusive, dest UDP port */
+        __u16 udp_dst_max; /* exclusive, dest UDP port */
+
+        __u32 src_mac_count; /* How many MACs to iterate through */
+        __u32 dst_mac_count; /* How many MACs to iterate through */
+        
+        unsigned char dst_mac[6];
+        unsigned char src_mac[6];
+        
+        __u32 cur_dst_mac_offset;
+        __u32 cur_src_mac_offset;
+        __u32 cur_saddr;
+        __u32 cur_daddr;
+        __u16 cur_udp_dst;
+        __u16 cur_udp_src;
+        __u32 cur_pkt_size;
+        
+        __u8 hh[14];
+        /* = { 
+           0x00, 0x80, 0xC8, 0x79, 0xB3, 0xCB, 
+           
+           We fill in SRC address later
+           0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+           0x08, 0x00
+           };
+        */
+        __u16 pad; /* pad out the hh struct to an even 16 bytes */
+
+        struct sk_buff* skb; /* skb we are to transmit next, mainly used for when we
+                              * are transmitting the same one multiple times
+                              */
+        struct net_device* odev; /* The out-going device.  Note that the device should
+                                  * have it's pg_info pointer pointing back to this
+                                  * device.  This will be set when the user specifies
+                                  * the out-going device name (not when the inject is
+                                  * started as it used to do.)
+                                  */
+	struct flow_state *flows;
+	unsigned cflows;         /* Concurrent flows (config) */
+	unsigned lflow;          /* Flow length  (config) */
+	unsigned nflows;         /* accumulated flows (stats) */
+};
+
+struct pktgen_hdr {
+        __u32 pgh_magic;
+        __u32 seq_num;
+	__u32 tv_sec;
+	__u32 tv_usec;
+};
+
+struct pktgen_thread {
+        spinlock_t if_lock;
+        struct pktgen_dev *if_list;           /* All device here */
+        struct pktgen_thread* next;
+        char name[32];
+        char fname[128]; /* name of proc file */
+        struct proc_dir_entry *proc_ent;
+        char result[512];
+        u32 max_before_softirq; /* We'll call do_softirq to prevent starvation. */
+        
+	/* Field for thread to receive "posted" events terminate, stop ifs etc.*/
+
+        u32 control;
+	int pid;
+	int cpu;
+
+        wait_queue_head_t queue;
+};
+
+#define REMOVE 1
+#define FIND   0
+
+/*  This code works around the fact that do_div cannot handle two 64-bit
+    numbers, and regular 64-bit division doesn't work on x86 kernels.
+    --Ben
+*/
+
+#define PG_DIV 0
+
+/* This was emailed to LMKL by: Chris Caputo <ccaputo@alt.net>
+ * Function copied/adapted/optimized from:
+ *
+ *  nemesis.sourceforge.net/browse/lib/static/intmath/ix86/intmath.c.html
+ *
+ * Copyright 1994, University of Cambridge Computer Laboratory
+ * All Rights Reserved.
+ *
+ */
+inline static s64 divremdi3(s64 x, s64 y, int type) 
+{
+        u64 a = (x < 0) ? -x : x;
+        u64 b = (y < 0) ? -y : y;
+        u64 res = 0, d = 1;
+
+        if (b > 0) {
+                while (b < a) {
+                        b <<= 1;
+                        d <<= 1;
+                }
+        }
+        
+        do {
+                if ( a >= b ) {
+                        a -= b;
+                        res += d;
+                }
+                b >>= 1;
+                d >>= 1;
+        }
+        while (d);
+
+        if (PG_DIV == type) {
+                return (((x ^ y) & (1ll<<63)) == 0) ? res : -(s64)res;
+        }
+        else {
+                return ((x & (1ll<<63)) == 0) ? a : -(s64)a;
+        }
+}
+
+/* End of hacks to deal with 64-bit math on x86 */
+
+/** Convert to miliseconds */
+static inline __u64 tv_to_ms(const struct timeval* tv) 
+{
+        __u64 ms = tv->tv_usec / 1000;
+        ms += (__u64)tv->tv_sec * (__u64)1000;
+        return ms;
+}
+
+
+/** Convert to micro-seconds */
+static inline __u64 tv_to_us(const struct timeval* tv) 
+{
+        __u64 us = tv->tv_usec;
+        us += (__u64)tv->tv_sec * (__u64)1000000;
+        return us;
+}
+
+static inline __u64 pg_div(__u64 n, __u32 base) {
+        __u64 tmp = n;
+        do_div(tmp, base);
+        /* printk("pktgen: pg_div, n: %llu  base: %d  rv: %llu\n",
+                  n, base, tmp); */
+        return tmp;
+}
+
+static inline __u64 pg_div64(__u64 n, __u64 base) 
+{
+        __u64 tmp = n;
+/*
+ * How do we know if the architectrure we are running on
+ * supports division with 64 bit base?
+ * 
+ */
+#if defined(__sparc_v9__) || defined(__powerpc64__) || defined(__alpha__) || defined(__x86_64__) || defined(__ia64__) 
+
+		do_div(tmp, base);
+#else
+		tmp = divremdi3(n, base, PG_DIV);
+#endif
+        return tmp;
+}
+
+static inline u32 pktgen_random(void)
+{
+#if 0
+	__u32 n;
+	get_random_bytes(&n, 4);
+	return n;
+#else
+	return net_random();
+#endif
+}
+
+static inline __u64 getCurMs(void) 
+{
+        struct timeval tv;
+        do_gettimeofday(&tv);
+        return tv_to_ms(&tv);
+}
+
+static inline __u64 getCurUs(void) 
+{
+        struct timeval tv;
+        do_gettimeofday(&tv);
+        return tv_to_us(&tv);
+}
+
+static inline __u64 tv_diff(const struct timeval* a, const struct timeval* b) 
+{
+        return tv_to_us(a) - tv_to_us(b);
+}
+
+
+/* old include end */
+
+static char version[] __initdata = VERSION;
+
+static ssize_t proc_pgctrl_read(struct file* file, char __user * buf, size_t count, loff_t *ppos);
+static ssize_t proc_pgctrl_write(struct file* file, const char __user * buf, size_t count, loff_t *ppos);
+static int proc_if_read(char *buf , char **start, off_t offset, int len, int *eof, void *data);
+
+static int proc_thread_read(char *buf , char **start, off_t offset, int len, int *eof, void *data);
+static int proc_if_write(struct file *file, const char __user *user_buffer, unsigned long count, void *data);
+static int proc_thread_write(struct file *file, const char __user *user_buffer, unsigned long count, void *data);
+static int create_proc_dir(void);
+static int remove_proc_dir(void);
+
+static int pktgen_remove_device(struct pktgen_thread* t, struct pktgen_dev *i);
+static int pktgen_add_device(struct pktgen_thread* t, const char* ifname);
+static struct pktgen_thread* pktgen_find_thread(const char* name);
+static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread* t, const char* ifname);
+static int pktgen_device_event(struct notifier_block *, unsigned long, void *);
+static void pktgen_run_all_threads(void);
+static void pktgen_stop_all_threads_ifs(void);
+static int pktgen_stop_device(struct pktgen_dev *pkt_dev);
+static void pktgen_stop(struct pktgen_thread* t);
+static void pktgen_clear_counters(struct pktgen_dev *pkt_dev);
+static struct pktgen_dev *pktgen_NN_threads(const char* dev_name, int remove);
+static unsigned int scan_ip6(const char *s,char ip[16]);
+static unsigned int fmt_ip6(char *s,const char ip[16]);
+
+/* Module parameters, defaults. */
+static int pg_count_d = 1000; /* 1000 pkts by default */
+static int pg_delay_d = 0;
+static int pg_clone_skb_d = 0;
+static int debug = 0;
+
+static spinlock_t _thread_lock = SPIN_LOCK_UNLOCKED;
+static struct pktgen_thread *pktgen_threads = NULL;
+
+static char module_fname[128];
+static struct proc_dir_entry *module_proc_ent = NULL;
+
+static struct notifier_block pktgen_notifier_block = {
+	.notifier_call = pktgen_device_event,
+};
+
+static struct file_operations pktgen_fops = {
+        .read     = proc_pgctrl_read,
+        .write    = proc_pgctrl_write,
+	/*  .ioctl    = pktgen_ioctl, later maybe */
+};
+
+/*
+ * /proc handling functions 
+ *
+ */
+
+static struct proc_dir_entry *pg_proc_dir = NULL;
+static int proc_pgctrl_read_eof=0;
+
+static ssize_t proc_pgctrl_read(struct file* file, char __user * buf,
+                                 size_t count, loff_t *ppos)
+{ 
+	char data[200];
+	int len = 0;
+
+	if(proc_pgctrl_read_eof) {
+		proc_pgctrl_read_eof=0;
+		len = 0;
+		goto out;
+	}
+
+	sprintf(data, "%s", VERSION); 
+
+	len = strlen(data);
+
+	if(len > count) {
+		len =-EFAULT;
+		goto out;
+	}  	
+
+	if (copy_to_user(buf, data, len)) {
+		len =-EFAULT;
+		goto out;
+	}  
+
+	*ppos += len;
+	proc_pgctrl_read_eof=1; /* EOF next call */
+
+ out:
+	return len;
+}
+
+static ssize_t proc_pgctrl_write(struct file* file,const char __user * buf,
+				 size_t count, loff_t *ppos)
+{
+	char *data = NULL;
+	int err = 0;
+
+        if (!capable(CAP_NET_ADMIN)){
+                err = -EPERM;
+		goto out;
+        }
+
+	data = (void*)vmalloc ((unsigned int)count);
+
+	if(!data) {
+		err = -ENOMEM;
+		goto out;
+	}
+	if (copy_from_user(data, buf, count)) {
+		err =-EFAULT;
+		goto out_free;
+	}  
+	data[count-1] = 0; /* Make string */
+
+	if (!strcmp(data, "stop")) 
+		pktgen_stop_all_threads_ifs();
+
+        else if (!strcmp(data, "start")) 
+		pktgen_run_all_threads();
+
+	else 
+		printk("pktgen: Unknown command: %s\n", data);
+
+	err = count;
+
+ out_free:
+	vfree (data);
+ out:
+        return err;
+}
+
+static int proc_if_read(char *buf , char **start, off_t offset,
+                           int len, int *eof, void *data)
+{
+	char *p;
+	int i;
+        struct pktgen_dev *pkt_dev = (struct pktgen_dev*)(data);
+        __u64 sa;
+        __u64 stopped;
+        __u64 now = getCurUs();
+        
+	p = buf;
+	p += sprintf(p, "Params: count %llu  min_pkt_size: %u  max_pkt_size: %u\n",
+		     (unsigned long long) pkt_dev->count,
+		     pkt_dev->min_pkt_size, pkt_dev->max_pkt_size);
+
+	p += sprintf(p, "     frags: %d  delay: %u  clone_skb: %d  ifname: %s\n",
+                     pkt_dev->nfrags, 1000*pkt_dev->delay_us+pkt_dev->delay_ns, pkt_dev->clone_skb, pkt_dev->ifname);
+
+	p += sprintf(p, "     flows: %u flowlen: %u\n", pkt_dev->cflows, pkt_dev->lflow);
+
+
+	if(pkt_dev->flags & F_IPV6) {
+		char b1[128], b2[128], b3[128];
+		fmt_ip6(b1,  pkt_dev->in6_saddr.s6_addr);
+		fmt_ip6(b2,  pkt_dev->min_in6_saddr.s6_addr);
+		fmt_ip6(b3,  pkt_dev->max_in6_saddr.s6_addr);
+		p += sprintf(p, "     saddr: %s  min_saddr: %s  max_saddr: %s\n", b1, b2, b3);
+
+		fmt_ip6(b1,  pkt_dev->in6_daddr.s6_addr);
+		fmt_ip6(b2,  pkt_dev->min_in6_daddr.s6_addr);
+		fmt_ip6(b3,  pkt_dev->max_in6_daddr.s6_addr);
+		p += sprintf(p, "     daddr: %s  min_daddr: %s  max_daddr: %s\n", b1, b2, b3);
+
+	} 
+	else 
+		p += sprintf(p, "     dst_min: %s  dst_max: %s\n     src_min: %s  src_max: %s\n",
+                     pkt_dev->dst_min, pkt_dev->dst_max, pkt_dev->src_min, pkt_dev->src_max);
+
+        p += sprintf(p, "     src_mac: ");
+
+	if ((pkt_dev->src_mac[0] == 0) && 
+	    (pkt_dev->src_mac[1] == 0) && 
+	    (pkt_dev->src_mac[2] == 0) && 
+	    (pkt_dev->src_mac[3] == 0) && 
+	    (pkt_dev->src_mac[4] == 0) && 
+	    (pkt_dev->src_mac[5] == 0)) 
+
+		for (i = 0; i < 6; i++) 
+			p += sprintf(p, "%02X%s", pkt_dev->odev->dev_addr[i], i == 5 ? "  " : ":");
+
+	else 
+		for (i = 0; i < 6; i++) 
+			p += sprintf(p, "%02X%s", pkt_dev->src_mac[i], i == 5 ? "  " : ":");
+
+        p += sprintf(p, "dst_mac: ");
+	for (i = 0; i < 6; i++) 
+		p += sprintf(p, "%02X%s", pkt_dev->dst_mac[i], i == 5 ? "\n" : ":");
+
+        p += sprintf(p, "     udp_src_min: %d  udp_src_max: %d  udp_dst_min: %d  udp_dst_max: %d\n",
+                     pkt_dev->udp_src_min, pkt_dev->udp_src_max, pkt_dev->udp_dst_min,
+                     pkt_dev->udp_dst_max);
+
+        p += sprintf(p, "     src_mac_count: %d  dst_mac_count: %d \n     Flags: ",
+                     pkt_dev->src_mac_count, pkt_dev->dst_mac_count);
+
+
+        if (pkt_dev->flags &  F_IPV6) 
+                p += sprintf(p, "IPV6  ");
+
+        if (pkt_dev->flags &  F_IPSRC_RND) 
+                p += sprintf(p, "IPSRC_RND  ");
+
+        if (pkt_dev->flags & F_IPDST_RND) 
+                p += sprintf(p, "IPDST_RND  ");
+        
+        if (pkt_dev->flags & F_TXSIZE_RND) 
+                p += sprintf(p, "TXSIZE_RND  ");
+        
+        if (pkt_dev->flags & F_UDPSRC_RND) 
+                p += sprintf(p, "UDPSRC_RND  ");
+        
+        if (pkt_dev->flags & F_UDPDST_RND) 
+                p += sprintf(p, "UDPDST_RND  ");
+        
+        if (pkt_dev->flags & F_MACSRC_RND) 
+                p += sprintf(p, "MACSRC_RND  ");
+        
+        if (pkt_dev->flags & F_MACDST_RND) 
+                p += sprintf(p, "MACDST_RND  ");
+
+        
+        p += sprintf(p, "\n");
+        
+        sa = pkt_dev->started_at;
+        stopped = pkt_dev->stopped_at;
+        if (pkt_dev->running) 
+                stopped = now; /* not really stopped, more like last-running-at */
+        
+        p += sprintf(p, "Current:\n     pkts-sofar: %llu  errors: %llu\n     started: %lluus  stopped: %lluus idle: %lluus\n",
+		     (unsigned long long) pkt_dev->sofar,
+		     (unsigned long long) pkt_dev->errors,
+		     (unsigned long long) sa,
+		     (unsigned long long) stopped, 
+		     (unsigned long long) pkt_dev->idle_acc);
+
+        p += sprintf(p, "     seq_num: %d  cur_dst_mac_offset: %d  cur_src_mac_offset: %d\n",
+                     pkt_dev->seq_num, pkt_dev->cur_dst_mac_offset, pkt_dev->cur_src_mac_offset);
+
+	if(pkt_dev->flags & F_IPV6) {
+		char b1[128], b2[128];
+		fmt_ip6(b1,  pkt_dev->cur_in6_daddr.s6_addr);
+		fmt_ip6(b2,  pkt_dev->cur_in6_saddr.s6_addr);
+		p += sprintf(p, "     cur_saddr: %s  cur_daddr: %s\n", b2, b1);
+	} 
+	else 
+		p += sprintf(p, "     cur_saddr: 0x%x  cur_daddr: 0x%x\n",
+                     pkt_dev->cur_saddr, pkt_dev->cur_daddr);
+
+
+	p += sprintf(p, "     cur_udp_dst: %d  cur_udp_src: %d\n",
+                     pkt_dev->cur_udp_dst, pkt_dev->cur_udp_src);
+
+	p += sprintf(p, "     flows: %u\n", pkt_dev->nflows);
+
+	if (pkt_dev->result[0])
+		p += sprintf(p, "Result: %s\n", pkt_dev->result);
+	else
+		p += sprintf(p, "Result: Idle\n");
+	*eof = 1;
+
+	return p - buf;
+}
+
+
+static int count_trail_chars(const char __user *user_buffer, unsigned int maxlen)
+{
+	int i;
+
+	for (i = 0; i < maxlen; i++) {
+                char c;
+                if (get_user(c, &user_buffer[i]))
+                        return -EFAULT;
+                switch (c) {
+		case '\"':
+		case '\n':
+		case '\r':
+		case '\t':
+		case ' ':
+		case '=':
+			break;
+		default:
+			goto done;
+		};
+	}
+done:
+	return i;
+}
+
+static unsigned long num_arg(const char __user *user_buffer, unsigned long maxlen, 
+			     unsigned long *num)
+{
+	int i = 0;
+	*num = 0;
+  
+	for(; i < maxlen; i++) {
+                char c;
+                if (get_user(c, &user_buffer[i]))
+                        return -EFAULT;
+                if ((c >= '0') && (c <= '9')) {
+			*num *= 10;
+			*num += c -'0';
+		} else
+			break;
+	}
+	return i;
+}
+
+static int strn_len(const char __user *user_buffer, unsigned int maxlen)
+{
+	int i = 0;
+
+	for(; i < maxlen; i++) {
+                char c;
+                if (get_user(c, &user_buffer[i]))
+                        return -EFAULT;
+                switch (c) {
+		case '\"':
+		case '\n':
+		case '\r':
+		case '\t':
+		case ' ':
+			goto done_str;
+			break;
+		default:
+			break;
+		};
+	}
+done_str:
+
+	return i;
+}
+
+static int proc_if_write(struct file *file, const char __user *user_buffer,
+                            unsigned long count, void *data)
+{
+	int i = 0, max, len;
+	char name[16], valstr[32];
+	unsigned long value = 0;
+        struct pktgen_dev *pkt_dev = (struct pktgen_dev*)(data);
+        char* pg_result = NULL;
+        int tmp = 0;
+	char buf[128];
+        
+        pg_result = &(pkt_dev->result[0]);
+        
+	if (count < 1) {
+		printk("pktgen: wrong command format\n");
+		return -EINVAL;
+	}
+  
+	max = count - i;
+	tmp = count_trail_chars(&user_buffer[i], max);
+        if (tmp < 0) { 
+		printk("pktgen: illegal format\n");
+		return tmp; 
+	}
+        i += tmp;
+        
+	/* Read variable name */
+
+	len = strn_len(&user_buffer[i], sizeof(name) - 1);
+        if (len < 0) { return len; }
+	memset(name, 0, sizeof(name));
+	if (copy_from_user(name, &user_buffer[i], len) )
+		return -EFAULT;
+	i += len;
+  
+	max = count -i;
+	len = count_trail_chars(&user_buffer[i], max);
+        if (len < 0) 
+                return len;
+        
+	i += len;
+
+	if (debug) {
+                char tb[count + 1];
+                if (copy_from_user(tb, user_buffer, count))
+			return -EFAULT;
+                tb[count] = 0;
+		printk("pktgen: %s,%lu  buffer -:%s:-\n", name, count, tb);
+        }
+
+	if (!strcmp(name, "min_pkt_size")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+                if (len < 0) { return len; }
+		i += len;
+		if (value < 14+20+8)
+			value = 14+20+8;
+                if (value != pkt_dev->min_pkt_size) {
+                        pkt_dev->min_pkt_size = value;
+                        pkt_dev->cur_pkt_size = value;
+                }
+		sprintf(pg_result, "OK: min_pkt_size=%u", pkt_dev->min_pkt_size);
+		return count;
+	}
+
+        if (!strcmp(name, "max_pkt_size")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+                if (len < 0) { return len; }
+		i += len;
+		if (value < 14+20+8)
+			value = 14+20+8;
+                if (value != pkt_dev->max_pkt_size) {
+                        pkt_dev->max_pkt_size = value;
+                        pkt_dev->cur_pkt_size = value;
+                }
+		sprintf(pg_result, "OK: max_pkt_size=%u", pkt_dev->max_pkt_size);
+		return count;
+	}
+
+        /* Shortcut for min = max */
+
+	if (!strcmp(name, "pkt_size")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+                if (len < 0) { return len; }
+		i += len;
+		if (value < 14+20+8)
+			value = 14+20+8;
+                if (value != pkt_dev->min_pkt_size) {
+                        pkt_dev->min_pkt_size = value;
+                        pkt_dev->max_pkt_size = value;
+                        pkt_dev->cur_pkt_size = value;
+                }
+		sprintf(pg_result, "OK: pkt_size=%u", pkt_dev->min_pkt_size);
+		return count;
+	}
+
+        if (!strcmp(name, "debug")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+                if (len < 0) { return len; }
+		i += len;
+                debug = value;
+		sprintf(pg_result, "OK: debug=%u", debug);
+		return count;
+	}
+
+        if (!strcmp(name, "frags")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+                if (len < 0) { return len; }
+		i += len;
+		pkt_dev->nfrags = value;
+		sprintf(pg_result, "OK: frags=%u", pkt_dev->nfrags);
+		return count;
+	}
+	if (!strcmp(name, "delay")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+                if (len < 0) { return len; }
+		i += len;
+		if (value == 0x7FFFFFFF) {
+			pkt_dev->delay_us = 0x7FFFFFFF;
+			pkt_dev->delay_ns = 0;
+		} else {
+			pkt_dev->delay_us = value / 1000;
+			pkt_dev->delay_ns = value % 1000;
+		}
+		sprintf(pg_result, "OK: delay=%u", 1000*pkt_dev->delay_us+pkt_dev->delay_ns);
+		return count;
+	}
+ 	if (!strcmp(name, "udp_src_min")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+                if (len < 0) { return len; }
+		i += len;
+                if (value != pkt_dev->udp_src_min) {
+                        pkt_dev->udp_src_min = value;
+                        pkt_dev->cur_udp_src = value;
+                }       
+		sprintf(pg_result, "OK: udp_src_min=%u", pkt_dev->udp_src_min);
+		return count;
+	}
+ 	if (!strcmp(name, "udp_dst_min")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+                if (len < 0) { return len; }
+		i += len;
+                if (value != pkt_dev->udp_dst_min) {
+                        pkt_dev->udp_dst_min = value;
+                        pkt_dev->cur_udp_dst = value;
+                }
+		sprintf(pg_result, "OK: udp_dst_min=%u", pkt_dev->udp_dst_min);
+		return count;
+	}
+ 	if (!strcmp(name, "udp_src_max")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+                if (len < 0) { return len; }
+		i += len;
+                if (value != pkt_dev->udp_src_max) {
+                        pkt_dev->udp_src_max = value;
+                        pkt_dev->cur_udp_src = value;
+                }
+		sprintf(pg_result, "OK: udp_src_max=%u", pkt_dev->udp_src_max);
+		return count;
+	}
+ 	if (!strcmp(name, "udp_dst_max")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+                if (len < 0) { return len; }
+		i += len;
+                if (value != pkt_dev->udp_dst_max) {
+                        pkt_dev->udp_dst_max = value;
+                        pkt_dev->cur_udp_dst = value;
+                }
+		sprintf(pg_result, "OK: udp_dst_max=%u", pkt_dev->udp_dst_max);
+		return count;
+	}
+	if (!strcmp(name, "clone_skb")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+                if (len < 0) { return len; }
+		i += len;
+                pkt_dev->clone_skb = value;
+	
+		sprintf(pg_result, "OK: clone_skb=%d", pkt_dev->clone_skb);
+		return count;
+	}
+	if (!strcmp(name, "count")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+                if (len < 0) { return len; }
+		i += len;
+		pkt_dev->count = value;
+		sprintf(pg_result, "OK: count=%llu",
+			(unsigned long long) pkt_dev->count);
+		return count;
+	}
+	if (!strcmp(name, "src_mac_count")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+                if (len < 0) { return len; }
+		i += len;
+		if (pkt_dev->src_mac_count != value) {
+                        pkt_dev->src_mac_count = value;
+                        pkt_dev->cur_src_mac_offset = 0;
+                }
+		sprintf(pg_result, "OK: src_mac_count=%d", pkt_dev->src_mac_count);
+		return count;
+	}
+	if (!strcmp(name, "dst_mac_count")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+                if (len < 0) { return len; }
+		i += len;
+		if (pkt_dev->dst_mac_count != value) {
+                        pkt_dev->dst_mac_count = value;
+                        pkt_dev->cur_dst_mac_offset = 0;
+                }
+		sprintf(pg_result, "OK: dst_mac_count=%d", pkt_dev->dst_mac_count);
+		return count;
+	}
+	if (!strcmp(name, "flag")) {
+                char f[32];
+                memset(f, 0, 32);
+		len = strn_len(&user_buffer[i], sizeof(f) - 1);
+                if (len < 0) { return len; }
+		if (copy_from_user(f, &user_buffer[i], len))
+			return -EFAULT;
+		i += len;
+                if (strcmp(f, "IPSRC_RND") == 0) 
+                        pkt_dev->flags |= F_IPSRC_RND;
+                
+                else if (strcmp(f, "!IPSRC_RND") == 0) 
+                        pkt_dev->flags &= ~F_IPSRC_RND;
+                
+                else if (strcmp(f, "TXSIZE_RND") == 0) 
+                        pkt_dev->flags |= F_TXSIZE_RND;
+                
+                else if (strcmp(f, "!TXSIZE_RND") == 0) 
+                        pkt_dev->flags &= ~F_TXSIZE_RND;
+                
+                else if (strcmp(f, "IPDST_RND") == 0) 
+                        pkt_dev->flags |= F_IPDST_RND;
+                
+                else if (strcmp(f, "!IPDST_RND") == 0) 
+                        pkt_dev->flags &= ~F_IPDST_RND;
+                
+                else if (strcmp(f, "UDPSRC_RND") == 0) 
+                        pkt_dev->flags |= F_UDPSRC_RND;
+                
+                else if (strcmp(f, "!UDPSRC_RND") == 0) 
+                        pkt_dev->flags &= ~F_UDPSRC_RND;
+                
+                else if (strcmp(f, "UDPDST_RND") == 0) 
+                        pkt_dev->flags |= F_UDPDST_RND;
+                
+                else if (strcmp(f, "!UDPDST_RND") == 0) 
+                        pkt_dev->flags &= ~F_UDPDST_RND;
+                
+                else if (strcmp(f, "MACSRC_RND") == 0) 
+                        pkt_dev->flags |= F_MACSRC_RND;
+                
+                else if (strcmp(f, "!MACSRC_RND") == 0) 
+                        pkt_dev->flags &= ~F_MACSRC_RND;
+                
+                else if (strcmp(f, "MACDST_RND") == 0) 
+                        pkt_dev->flags |= F_MACDST_RND;
+                
+                else if (strcmp(f, "!MACDST_RND") == 0) 
+                        pkt_dev->flags &= ~F_MACDST_RND;
+                
+                else {
+                        sprintf(pg_result, "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s",
+                                f,
+                                "IPSRC_RND, IPDST_RND, TXSIZE_RND, UDPSRC_RND, UDPDST_RND, MACSRC_RND, MACDST_RND\n");
+                        return count;
+                }
+		sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags);
+		return count;
+	}
+	if (!strcmp(name, "dst_min") || !strcmp(name, "dst")) {
+		len = strn_len(&user_buffer[i], sizeof(pkt_dev->dst_min) - 1);
+                if (len < 0) { return len; }
+
+                if (copy_from_user(buf, &user_buffer[i], len))
+			return -EFAULT;
+                buf[len] = 0;
+                if (strcmp(buf, pkt_dev->dst_min) != 0) {
+                        memset(pkt_dev->dst_min, 0, sizeof(pkt_dev->dst_min));
+                        strncpy(pkt_dev->dst_min, buf, len);
+                        pkt_dev->daddr_min = in_aton(pkt_dev->dst_min);
+                        pkt_dev->cur_daddr = pkt_dev->daddr_min;
+                }
+                if(debug)
+                        printk("pktgen: dst_min set to: %s\n", pkt_dev->dst_min);
+                i += len;
+		sprintf(pg_result, "OK: dst_min=%s", pkt_dev->dst_min);
+		return count;
+	}
+	if (!strcmp(name, "dst_max")) {
+		len = strn_len(&user_buffer[i], sizeof(pkt_dev->dst_max) - 1);
+                if (len < 0) { return len; }
+
+                if (copy_from_user(buf, &user_buffer[i], len))
+			return -EFAULT;
+
+                buf[len] = 0;
+                if (strcmp(buf, pkt_dev->dst_max) != 0) {
+                        memset(pkt_dev->dst_max, 0, sizeof(pkt_dev->dst_max));
+                        strncpy(pkt_dev->dst_max, buf, len);
+                        pkt_dev->daddr_max = in_aton(pkt_dev->dst_max);
+                        pkt_dev->cur_daddr = pkt_dev->daddr_max;
+                }
+		if(debug)
+			printk("pktgen: dst_max set to: %s\n", pkt_dev->dst_max);
+		i += len;
+		sprintf(pg_result, "OK: dst_max=%s", pkt_dev->dst_max);
+		return count;
+	}
+	if (!strcmp(name, "dst6")) {
+		len = strn_len(&user_buffer[i], sizeof(buf) - 1);
+                if (len < 0) return len; 
+
+		pkt_dev->flags |= F_IPV6;
+
+                if (copy_from_user(buf, &user_buffer[i], len))
+			return -EFAULT;
+                buf[len] = 0;
+
+		scan_ip6(buf, pkt_dev->in6_daddr.s6_addr);
+		fmt_ip6(buf,  pkt_dev->in6_daddr.s6_addr);
+
+		ipv6_addr_copy(&pkt_dev->cur_in6_daddr, &pkt_dev->in6_daddr);
+
+                if(debug) 
+			printk("pktgen: dst6 set to: %s\n", buf);
+
+                i += len;
+		sprintf(pg_result, "OK: dst6=%s", buf);
+		return count;
+	}
+	if (!strcmp(name, "dst6_min")) {
+		len = strn_len(&user_buffer[i], sizeof(buf) - 1);
+                if (len < 0) return len; 
+
+		pkt_dev->flags |= F_IPV6;
+
+                if (copy_from_user(buf, &user_buffer[i], len))
+			return -EFAULT;
+                buf[len] = 0;
+
+		scan_ip6(buf, pkt_dev->min_in6_daddr.s6_addr);
+		fmt_ip6(buf,  pkt_dev->min_in6_daddr.s6_addr);
+
+		ipv6_addr_copy(&pkt_dev->cur_in6_daddr, &pkt_dev->min_in6_daddr);
+                if(debug) 
+			printk("pktgen: dst6_min set to: %s\n", buf);
+
+                i += len;
+		sprintf(pg_result, "OK: dst6_min=%s", buf);
+		return count;
+	}
+	if (!strcmp(name, "dst6_max")) {
+		len = strn_len(&user_buffer[i], sizeof(buf) - 1);
+                if (len < 0) return len; 
+
+		pkt_dev->flags |= F_IPV6;
+
+                if (copy_from_user(buf, &user_buffer[i], len))
+			return -EFAULT;
+                buf[len] = 0;
+
+		scan_ip6(buf, pkt_dev->max_in6_daddr.s6_addr);
+		fmt_ip6(buf,  pkt_dev->max_in6_daddr.s6_addr);
+
+                if(debug) 
+			printk("pktgen: dst6_max set to: %s\n", buf);
+
+                i += len;
+		sprintf(pg_result, "OK: dst6_max=%s", buf);
+		return count;
+	}
+	if (!strcmp(name, "src6")) {
+		len = strn_len(&user_buffer[i], sizeof(buf) - 1);
+                if (len < 0) return len; 
+
+		pkt_dev->flags |= F_IPV6;
+
+                if (copy_from_user(buf, &user_buffer[i], len))
+			return -EFAULT;
+                buf[len] = 0;
+
+		scan_ip6(buf, pkt_dev->in6_saddr.s6_addr);
+		fmt_ip6(buf,  pkt_dev->in6_saddr.s6_addr);
+
+		ipv6_addr_copy(&pkt_dev->cur_in6_saddr, &pkt_dev->in6_saddr);
+
+                if(debug) 
+			printk("pktgen: src6 set to: %s\n", buf);
+		
+                i += len;
+		sprintf(pg_result, "OK: src6=%s", buf);
+		return count;
+	}
+	if (!strcmp(name, "src_min")) {
+		len = strn_len(&user_buffer[i], sizeof(pkt_dev->src_min) - 1);
+                if (len < 0) { return len; }
+                if (copy_from_user(buf, &user_buffer[i], len))
+			return -EFAULT;
+                buf[len] = 0;
+                if (strcmp(buf, pkt_dev->src_min) != 0) {
+                        memset(pkt_dev->src_min, 0, sizeof(pkt_dev->src_min));
+                        strncpy(pkt_dev->src_min, buf, len);
+                        pkt_dev->saddr_min = in_aton(pkt_dev->src_min);
+                        pkt_dev->cur_saddr = pkt_dev->saddr_min;
+                }
+		if(debug)
+			printk("pktgen: src_min set to: %s\n", pkt_dev->src_min);
+		i += len;
+		sprintf(pg_result, "OK: src_min=%s", pkt_dev->src_min);
+		return count;
+	}
+	if (!strcmp(name, "src_max")) {
+		len = strn_len(&user_buffer[i], sizeof(pkt_dev->src_max) - 1);
+                if (len < 0) { return len; }
+                if (copy_from_user(buf, &user_buffer[i], len))
+			return -EFAULT;
+                buf[len] = 0;
+                if (strcmp(buf, pkt_dev->src_max) != 0) {
+                        memset(pkt_dev->src_max, 0, sizeof(pkt_dev->src_max));
+                        strncpy(pkt_dev->src_max, buf, len);
+                        pkt_dev->saddr_max = in_aton(pkt_dev->src_max);
+                        pkt_dev->cur_saddr = pkt_dev->saddr_max;
+                }
+		if(debug)
+			printk("pktgen: src_max set to: %s\n", pkt_dev->src_max);
+		i += len;
+		sprintf(pg_result, "OK: src_max=%s", pkt_dev->src_max);
+		return count;
+	}
+	if (!strcmp(name, "dst_mac")) {
+		char *v = valstr;
+                unsigned char old_dmac[6];
+		unsigned char *m = pkt_dev->dst_mac;
+                memcpy(old_dmac, pkt_dev->dst_mac, 6);
+                
+		len = strn_len(&user_buffer[i], sizeof(valstr) - 1);
+                if (len < 0) { return len; }
+		memset(valstr, 0, sizeof(valstr));
+		if( copy_from_user(valstr, &user_buffer[i], len))
+			return -EFAULT;
+		i += len;
+
+		for(*m = 0;*v && m < pkt_dev->dst_mac + 6; v++) {
+			if (*v >= '0' && *v <= '9') {
+				*m *= 16;
+				*m += *v - '0';
+			}
+			if (*v >= 'A' && *v <= 'F') {
+				*m *= 16;
+				*m += *v - 'A' + 10;
+			}
+			if (*v >= 'a' && *v <= 'f') {
+				*m *= 16;
+				*m += *v - 'a' + 10;
+			}
+			if (*v == ':') {
+				m++;
+				*m = 0;
+			}
+		}
+
+		/* Set up Dest MAC */
+                if (memcmp(old_dmac, pkt_dev->dst_mac, 6) != 0) 
+                        memcpy(&(pkt_dev->hh[0]), pkt_dev->dst_mac, 6);
+                
+		sprintf(pg_result, "OK: dstmac");
+		return count;
+	}
+	if (!strcmp(name, "src_mac")) {
+		char *v = valstr;
+		unsigned char *m = pkt_dev->src_mac;
+
+		len = strn_len(&user_buffer[i], sizeof(valstr) - 1);
+                if (len < 0) { return len; }
+		memset(valstr, 0, sizeof(valstr));
+		if( copy_from_user(valstr, &user_buffer[i], len)) 
+			return -EFAULT;
+		i += len;
+
+		for(*m = 0;*v && m < pkt_dev->src_mac + 6; v++) {
+			if (*v >= '0' && *v <= '9') {
+				*m *= 16;
+				*m += *v - '0';
+			}
+			if (*v >= 'A' && *v <= 'F') {
+				*m *= 16;
+				*m += *v - 'A' + 10;
+			}
+			if (*v >= 'a' && *v <= 'f') {
+				*m *= 16;
+				*m += *v - 'a' + 10;
+			}
+			if (*v == ':') {
+				m++;
+				*m = 0;
+			}
+		}	  
+
+                sprintf(pg_result, "OK: srcmac");
+		return count;
+	}
+
+        if (!strcmp(name, "clear_counters")) {
+                pktgen_clear_counters(pkt_dev);
+                sprintf(pg_result, "OK: Clearing counters.\n");
+                return count;
+        }
+
+	if (!strcmp(name, "flows")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+                if (len < 0) { return len; }
+		i += len;
+		if (value > MAX_CFLOWS)
+			value = MAX_CFLOWS;
+
+		pkt_dev->cflows = value;
+		sprintf(pg_result, "OK: flows=%u", pkt_dev->cflows);
+		return count;
+	}
+
+	if (!strcmp(name, "flowlen")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+                if (len < 0) { return len; }
+		i += len;
+		pkt_dev->lflow = value;
+		sprintf(pg_result, "OK: flowlen=%u", pkt_dev->lflow);
+		return count;
+	}
+        
+	sprintf(pkt_dev->result, "No such parameter \"%s\"", name);
+	return -EINVAL;
+}
+
+static int proc_thread_read(char *buf , char **start, off_t offset,
+                               int len, int *eof, void *data)
+{
+	char *p;
+        struct pktgen_thread *t = (struct pktgen_thread*)(data);
+        struct pktgen_dev *pkt_dev = NULL;
+
+
+        if (!t) {
+                printk("pktgen: ERROR: could not find thread in proc_thread_read\n");
+                return -EINVAL;
+        }
+
+	p = buf;
+	p += sprintf(p, "Name: %s  max_before_softirq: %d\n",
+                     t->name, t->max_before_softirq);
+
+        p += sprintf(p, "Running: ");
+        
+        if_lock(t);
+        for(pkt_dev = t->if_list;pkt_dev; pkt_dev = pkt_dev->next) 
+		if(pkt_dev->running)
+			p += sprintf(p, "%s ", pkt_dev->ifname);
+        
+        p += sprintf(p, "\nStopped: ");
+
+        for(pkt_dev = t->if_list;pkt_dev; pkt_dev = pkt_dev->next) 
+		if(!pkt_dev->running)
+			p += sprintf(p, "%s ", pkt_dev->ifname);
+
+	if (t->result[0])
+		p += sprintf(p, "\nResult: %s\n", t->result);
+	else
+		p += sprintf(p, "\nResult: NA\n");
+
+	*eof = 1;
+
+        if_unlock(t);
+
+	return p - buf;
+}
+
+static int proc_thread_write(struct file *file, const char __user *user_buffer,
+                                unsigned long count, void *data)
+{
+	int i = 0, max, len, ret;
+	char name[40];
+        struct pktgen_thread *t;
+        char *pg_result;
+        unsigned long value = 0;
+        
+	if (count < 1) {
+		//	sprintf(pg_result, "Wrong command format");
+		return -EINVAL;
+	}
+  
+	max = count - i;
+        len = count_trail_chars(&user_buffer[i], max);
+        if (len < 0) 
+		return len; 
+     
+	i += len;
+  
+	/* Read variable name */
+
+	len = strn_len(&user_buffer[i], sizeof(name) - 1);
+        if (len < 0)  
+		return len; 
+	
+	memset(name, 0, sizeof(name));
+	if (copy_from_user(name, &user_buffer[i], len))
+		return -EFAULT;
+	i += len;
+  
+	max = count -i;
+	len = count_trail_chars(&user_buffer[i], max);
+        if (len < 0)  
+		return len; 
+	
+	i += len;
+
+	if (debug) 
+		printk("pktgen: t=%s, count=%lu\n", name, count);
+        
+
+        t = (struct pktgen_thread*)(data);
+	if(!t) {
+		printk("pktgen: ERROR: No thread\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	pg_result = &(t->result[0]);
+
+        if (!strcmp(name, "add_device")) {
+                char f[32];
+                memset(f, 0, 32);
+		len = strn_len(&user_buffer[i], sizeof(f) - 1);
+                if (len < 0) { 
+			ret = len; 
+			goto out;
+		}
+		if( copy_from_user(f, &user_buffer[i], len) )
+			return -EFAULT;
+		i += len;
+		thread_lock();
+                pktgen_add_device(t, f);
+		thread_unlock();
+                ret = count;
+                sprintf(pg_result, "OK: add_device=%s", f);
+		goto out;
+	}
+
+        if (!strcmp(name, "rem_device_all")) {
+		thread_lock();
+		t->control |= T_REMDEV;
+		thread_unlock();
+		current->state = TASK_INTERRUPTIBLE;
+		schedule_timeout(HZ/8);  /* Propagate thread->control  */
+		ret = count;
+                sprintf(pg_result, "OK: rem_device_all");
+		goto out;
+	}
+
+        if (!strcmp(name, "max_before_softirq")) {
+                len = num_arg(&user_buffer[i], 10, &value);
+		thread_lock();
+                t->max_before_softirq = value;
+		thread_unlock();
+                ret = count;
+                sprintf(pg_result, "OK: max_before_softirq=%lu", value);
+		goto out;
+	}
+
+	ret = -EINVAL;
+ out:
+
+	return ret;
+}
+
+static int create_proc_dir(void)
+{
+        int     len;
+        /*  does proc_dir already exists */
+        len = strlen(PG_PROC_DIR);
+
+        for (pg_proc_dir = proc_net->subdir; pg_proc_dir; pg_proc_dir=pg_proc_dir->next) {
+                if ((pg_proc_dir->namelen == len) &&
+		    (! memcmp(pg_proc_dir->name, PG_PROC_DIR, len))) 
+                        break;
+        }
+        
+        if (!pg_proc_dir) 
+                pg_proc_dir = create_proc_entry(PG_PROC_DIR, S_IFDIR, proc_net);
+        
+        if (!pg_proc_dir) 
+                return -ENODEV;
+        
+        return 0;
+}
+
+static int remove_proc_dir(void)
+{
+        remove_proc_entry(PG_PROC_DIR, proc_net);
+        return 0;
+}
+
+/* Think find or remove for NN */
+static struct pktgen_dev *__pktgen_NN_threads(const char* ifname, int remove) 
+{
+	struct pktgen_thread *t;
+	struct pktgen_dev *pkt_dev = NULL;
+
+        t = pktgen_threads;
+                
+	while (t) {
+		pkt_dev = pktgen_find_dev(t, ifname);
+		if (pkt_dev) {
+		                if(remove) { 
+				        if_lock(t);
+				        pktgen_remove_device(t, pkt_dev);
+				        if_unlock(t);
+				}
+			break;
+		}
+		t = t->next;
+	}
+        return pkt_dev;
+}
+
+static struct pktgen_dev *pktgen_NN_threads(const char* ifname, int remove) 
+{
+	struct pktgen_dev *pkt_dev = NULL;
+	thread_lock();
+	pkt_dev = __pktgen_NN_threads(ifname, remove);
+        thread_unlock();
+	return pkt_dev;
+}
+
+static int pktgen_device_event(struct notifier_block *unused, unsigned long event, void *ptr) 
+{
+	struct net_device *dev = (struct net_device *)(ptr);
+
+	/* It is OK that we do not hold the group lock right now,
+	 * as we run under the RTNL lock.
+	 */
+
+	switch (event) {
+	case NETDEV_CHANGEADDR:
+	case NETDEV_GOING_DOWN:
+	case NETDEV_DOWN:
+	case NETDEV_UP:
+		/* Ignore for now */
+		break;
+		
+	case NETDEV_UNREGISTER:
+                pktgen_NN_threads(dev->name, REMOVE);
+		break;
+	};
+
+	return NOTIFY_DONE;
+}
+
+/* Associate pktgen_dev with a device. */
+
+static struct net_device* pktgen_setup_dev(struct pktgen_dev *pkt_dev) {
+	struct net_device *odev;
+
+	/* Clean old setups */
+
+	if (pkt_dev->odev) {
+		dev_put(pkt_dev->odev);
+                pkt_dev->odev = NULL;
+        }
+
+	odev = dev_get_by_name(pkt_dev->ifname);
+
+	if (!odev) {
+		printk("pktgen: no such netdevice: \"%s\"\n", pkt_dev->ifname);
+		goto out;
+	}
+	if (odev->type != ARPHRD_ETHER) {
+		printk("pktgen: not an ethernet device: \"%s\"\n", pkt_dev->ifname);
+		goto out_put;
+	}
+	if (!netif_running(odev)) {
+		printk("pktgen: device is down: \"%s\"\n", pkt_dev->ifname);
+		goto out_put;
+	}
+	pkt_dev->odev = odev;
+	
+        return pkt_dev->odev;
+
+out_put:
+	dev_put(odev);
+out:
+ 	return NULL;
+
+}
+
+/* Read pkt_dev from the interface and set up internal pktgen_dev
+ * structure to have the right information to create/send packets
+ */
+static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
+{
+	/* Try once more, just in case it works now. */
+        if (!pkt_dev->odev) 
+                pktgen_setup_dev(pkt_dev);
+        
+        if (!pkt_dev->odev) {
+                printk("pktgen: ERROR: pkt_dev->odev == NULL in setup_inject.\n");
+                sprintf(pkt_dev->result, "ERROR: pkt_dev->odev == NULL in setup_inject.\n");
+                return;
+        }
+        
+        /* Default to the interface's mac if not explicitly set. */
+
+	if ((pkt_dev->src_mac[0] == 0) && 
+	    (pkt_dev->src_mac[1] == 0) && 
+	    (pkt_dev->src_mac[2] == 0) && 
+	    (pkt_dev->src_mac[3] == 0) && 
+	    (pkt_dev->src_mac[4] == 0) && 
+	    (pkt_dev->src_mac[5] == 0)) {
+
+	       memcpy(&(pkt_dev->hh[6]), pkt_dev->odev->dev_addr, 6);
+       }
+        /* Set up Dest MAC */
+        memcpy(&(pkt_dev->hh[0]), pkt_dev->dst_mac, 6);
+
+        /* Set up pkt size */
+        pkt_dev->cur_pkt_size = pkt_dev->min_pkt_size;
+	
+	if(pkt_dev->flags & F_IPV6) {
+		/*
+		 * Skip this automatic address setting until locks or functions 
+		 * gets exported
+		 */
+
+#ifdef NOTNOW
+		int i, set = 0, err=1;
+		struct inet6_dev *idev;
+
+		for(i=0; i< IN6_ADDR_HSIZE; i++)
+			if(pkt_dev->cur_in6_saddr.s6_addr[i]) {
+				set = 1;
+				break;
+			}
+
+		if(!set) {
+			
+			/*
+			 * Use linklevel address if unconfigured.
+			 *
+			 * use ipv6_get_lladdr if/when it's get exported
+			 */
+
+
+			read_lock(&addrconf_lock);
+			if ((idev = __in6_dev_get(pkt_dev->odev)) != NULL) {
+				struct inet6_ifaddr *ifp;
+
+				read_lock_bh(&idev->lock);
+				for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
+					if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) {
+						ipv6_addr_copy(&pkt_dev->cur_in6_saddr, &ifp->addr);
+						err = 0;
+						break;
+					}
+				}
+				read_unlock_bh(&idev->lock);
+			}
+			read_unlock(&addrconf_lock);
+			if(err)	printk("pktgen: ERROR: IPv6 link address not availble.\n");
+		}
+#endif
+	} 
+	else {
+		pkt_dev->saddr_min = 0;
+		pkt_dev->saddr_max = 0;
+		if (strlen(pkt_dev->src_min) == 0) {
+			
+			struct in_device *in_dev; 
+
+			rcu_read_lock();
+			in_dev = __in_dev_get(pkt_dev->odev);
+			if (in_dev) {
+				if (in_dev->ifa_list) {
+					pkt_dev->saddr_min = in_dev->ifa_list->ifa_address;
+					pkt_dev->saddr_max = pkt_dev->saddr_min;
+				}
+				__in_dev_put(in_dev);	
+			}
+			rcu_read_unlock();
+		}
+		else {
+			pkt_dev->saddr_min = in_aton(pkt_dev->src_min);
+			pkt_dev->saddr_max = in_aton(pkt_dev->src_max);
+		}
+
+		pkt_dev->daddr_min = in_aton(pkt_dev->dst_min);
+		pkt_dev->daddr_max = in_aton(pkt_dev->dst_max);
+	}
+        /* Initialize current values. */
+        pkt_dev->cur_dst_mac_offset = 0;
+        pkt_dev->cur_src_mac_offset = 0;
+        pkt_dev->cur_saddr = pkt_dev->saddr_min;
+        pkt_dev->cur_daddr = pkt_dev->daddr_min;
+        pkt_dev->cur_udp_dst = pkt_dev->udp_dst_min;
+        pkt_dev->cur_udp_src = pkt_dev->udp_src_min;
+	pkt_dev->nflows = 0;
+}
+
+static void spin(struct pktgen_dev *pkt_dev, __u64 spin_until_us)
+{
+	__u64 start;
+	__u64 now;
+
+	start = now = getCurUs();
+	printk(KERN_INFO "sleeping for %d\n", (int)(spin_until_us - now));
+	while (now < spin_until_us) {
+		/* TODO: optimise sleeping behavior */
+		if (spin_until_us - now > (1000000/HZ)+1) {
+			current->state = TASK_INTERRUPTIBLE;
+			schedule_timeout(1);
+		} else if (spin_until_us - now > 100) {
+			do_softirq();
+			if (!pkt_dev->running)
+				return;
+			if (need_resched())
+				schedule();
+		}
+
+		now = getCurUs();
+	}
+
+	pkt_dev->idle_acc += now - start;
+}
+
+
+/* Increment/randomize headers according to flags and current values
+ * for IP src/dest, UDP src/dst port, MAC-Addr src/dst
+ */
+static void mod_cur_headers(struct pktgen_dev *pkt_dev) {        
+        __u32 imn;
+        __u32 imx;
+	int  flow = 0;
+
+	if(pkt_dev->cflows)  {
+		flow = pktgen_random() % pkt_dev->cflows;
+		
+		if (pkt_dev->flows[flow].count > pkt_dev->lflow)
+			pkt_dev->flows[flow].count = 0;
+	}						
+
+
+	/*  Deal with source MAC */
+        if (pkt_dev->src_mac_count > 1) {
+                __u32 mc;
+                __u32 tmp;
+
+                if (pkt_dev->flags & F_MACSRC_RND) 
+                        mc = pktgen_random() % (pkt_dev->src_mac_count);
+                else {
+                        mc = pkt_dev->cur_src_mac_offset++;
+                        if (pkt_dev->cur_src_mac_offset > pkt_dev->src_mac_count) 
+                                pkt_dev->cur_src_mac_offset = 0;
+                }
+
+                tmp = pkt_dev->src_mac[5] + (mc & 0xFF);
+                pkt_dev->hh[11] = tmp;
+                tmp = (pkt_dev->src_mac[4] + ((mc >> 8) & 0xFF) + (tmp >> 8));
+                pkt_dev->hh[10] = tmp;
+                tmp = (pkt_dev->src_mac[3] + ((mc >> 16) & 0xFF) + (tmp >> 8));
+                pkt_dev->hh[9] = tmp;
+                tmp = (pkt_dev->src_mac[2] + ((mc >> 24) & 0xFF) + (tmp >> 8));
+                pkt_dev->hh[8] = tmp;
+                tmp = (pkt_dev->src_mac[1] + (tmp >> 8));
+                pkt_dev->hh[7] = tmp;        
+        }
+
+        /*  Deal with Destination MAC */
+        if (pkt_dev->dst_mac_count > 1) {
+                __u32 mc;
+                __u32 tmp;
+
+                if (pkt_dev->flags & F_MACDST_RND) 
+                        mc = pktgen_random() % (pkt_dev->dst_mac_count);
+
+                else {
+                        mc = pkt_dev->cur_dst_mac_offset++;
+                        if (pkt_dev->cur_dst_mac_offset > pkt_dev->dst_mac_count) {
+                                pkt_dev->cur_dst_mac_offset = 0;
+                        }
+                }
+
+                tmp = pkt_dev->dst_mac[5] + (mc & 0xFF);
+                pkt_dev->hh[5] = tmp;
+                tmp = (pkt_dev->dst_mac[4] + ((mc >> 8) & 0xFF) + (tmp >> 8));
+                pkt_dev->hh[4] = tmp;
+                tmp = (pkt_dev->dst_mac[3] + ((mc >> 16) & 0xFF) + (tmp >> 8));
+                pkt_dev->hh[3] = tmp;
+                tmp = (pkt_dev->dst_mac[2] + ((mc >> 24) & 0xFF) + (tmp >> 8));
+                pkt_dev->hh[2] = tmp;
+                tmp = (pkt_dev->dst_mac[1] + (tmp >> 8));
+                pkt_dev->hh[1] = tmp;        
+        }
+
+        if (pkt_dev->udp_src_min < pkt_dev->udp_src_max) {
+                if (pkt_dev->flags & F_UDPSRC_RND) 
+                        pkt_dev->cur_udp_src = ((pktgen_random() % (pkt_dev->udp_src_max - pkt_dev->udp_src_min)) + pkt_dev->udp_src_min);
+
+                else {
+			pkt_dev->cur_udp_src++;
+			if (pkt_dev->cur_udp_src >= pkt_dev->udp_src_max)
+				pkt_dev->cur_udp_src = pkt_dev->udp_src_min;
+                }
+        }
+
+        if (pkt_dev->udp_dst_min < pkt_dev->udp_dst_max) {
+                if (pkt_dev->flags & F_UDPDST_RND) {
+                        pkt_dev->cur_udp_dst = ((pktgen_random() % (pkt_dev->udp_dst_max - pkt_dev->udp_dst_min)) + pkt_dev->udp_dst_min);
+                }
+                else {
+			pkt_dev->cur_udp_dst++;
+			if (pkt_dev->cur_udp_dst >= pkt_dev->udp_dst_max) 
+				pkt_dev->cur_udp_dst = pkt_dev->udp_dst_min;
+                }
+        }
+
+	if (!(pkt_dev->flags & F_IPV6)) {
+
+		if ((imn = ntohl(pkt_dev->saddr_min)) < (imx = ntohl(pkt_dev->saddr_max))) {
+			__u32 t;
+			if (pkt_dev->flags & F_IPSRC_RND) 
+				t = ((pktgen_random() % (imx - imn)) + imn);
+			else {
+				t = ntohl(pkt_dev->cur_saddr);
+				t++;
+				if (t > imx) {
+					t = imn;
+				}
+			}
+			pkt_dev->cur_saddr = htonl(t);
+		}
+		
+		if (pkt_dev->cflows && pkt_dev->flows[flow].count != 0) {
+			pkt_dev->cur_daddr = pkt_dev->flows[flow].cur_daddr;
+		} else {
+
+			if ((imn = ntohl(pkt_dev->daddr_min)) < (imx = ntohl(pkt_dev->daddr_max))) {
+				__u32 t;
+				if (pkt_dev->flags & F_IPDST_RND) {
+
+					t = ((pktgen_random() % (imx - imn)) + imn);
+					t = htonl(t);
+
+					while( LOOPBACK(t) || MULTICAST(t) || BADCLASS(t) || ZERONET(t) ||  LOCAL_MCAST(t) ) {
+						t = ((pktgen_random() % (imx - imn)) + imn);
+						t = htonl(t);
+					}
+					pkt_dev->cur_daddr = t;
+				}
+				
+				else {
+					t = ntohl(pkt_dev->cur_daddr);
+					t++;
+					if (t > imx) {
+						t = imn;
+					}
+					pkt_dev->cur_daddr = htonl(t);
+				}
+			}
+			if(pkt_dev->cflows) {	
+				pkt_dev->flows[flow].cur_daddr = pkt_dev->cur_daddr;
+				pkt_dev->nflows++;
+			}
+		}
+	}
+	else /* IPV6 * */
+	{
+		if(pkt_dev->min_in6_daddr.s6_addr32[0] == 0 &&
+		   pkt_dev->min_in6_daddr.s6_addr32[1] == 0 &&
+		   pkt_dev->min_in6_daddr.s6_addr32[2] == 0 &&
+		   pkt_dev->min_in6_daddr.s6_addr32[3] == 0);
+		else {
+			int i;
+
+			/* Only random destinations yet */
+
+			for(i=0; i < 4; i++) {
+				pkt_dev->cur_in6_daddr.s6_addr32[i] =
+					((pktgen_random() |
+					  pkt_dev->min_in6_daddr.s6_addr32[i]) &
+					 pkt_dev->max_in6_daddr.s6_addr32[i]);
+			}
+ 		}
+	}
+
+        if (pkt_dev->min_pkt_size < pkt_dev->max_pkt_size) {
+                __u32 t;
+                if (pkt_dev->flags & F_TXSIZE_RND) {
+                        t = ((pktgen_random() % (pkt_dev->max_pkt_size - pkt_dev->min_pkt_size))
+                             + pkt_dev->min_pkt_size);
+                }
+                else {
+			t = pkt_dev->cur_pkt_size + 1;
+			if (t > pkt_dev->max_pkt_size) 
+				t = pkt_dev->min_pkt_size;
+                }
+                pkt_dev->cur_pkt_size = t;
+        }
+
+	pkt_dev->flows[flow].count++;
+}
+
+
+static struct sk_buff *fill_packet_ipv4(struct net_device *odev, 
+				   struct pktgen_dev *pkt_dev)
+{
+	struct sk_buff *skb = NULL;
+	__u8 *eth;
+	struct udphdr *udph;
+	int datalen, iplen;
+	struct iphdr *iph;
+        struct pktgen_hdr *pgh = NULL;
+        
+	skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + 16, GFP_ATOMIC);
+	if (!skb) {
+		sprintf(pkt_dev->result, "No memory");
+		return NULL;
+	}
+
+	skb_reserve(skb, 16);
+
+	/*  Reserve for ethernet and IP header  */
+	eth = (__u8 *) skb_push(skb, 14);
+	iph = (struct iphdr *)skb_put(skb, sizeof(struct iphdr));
+	udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr));
+
+        /* Update any of the values, used when we're incrementing various
+         * fields.
+         */
+        mod_cur_headers(pkt_dev);
+
+	memcpy(eth, pkt_dev->hh, 12);
+	*(u16*)&eth[12] = __constant_htons(ETH_P_IP);
+
+	datalen = pkt_dev->cur_pkt_size - 14 - 20 - 8; /* Eth + IPh + UDPh */
+	if (datalen < sizeof(struct pktgen_hdr)) 
+		datalen = sizeof(struct pktgen_hdr);
+        
+	udph->source = htons(pkt_dev->cur_udp_src);
+	udph->dest = htons(pkt_dev->cur_udp_dst);
+	udph->len = htons(datalen + 8); /* DATA + udphdr */
+	udph->check = 0;  /* No checksum */
+
+	iph->ihl = 5;
+	iph->version = 4;
+	iph->ttl = 32;
+	iph->tos = 0;
+	iph->protocol = IPPROTO_UDP; /* UDP */
+	iph->saddr = pkt_dev->cur_saddr;
+	iph->daddr = pkt_dev->cur_daddr;
+	iph->frag_off = 0;
+	iplen = 20 + 8 + datalen;
+	iph->tot_len = htons(iplen);
+	iph->check = 0;
+	iph->check = ip_fast_csum((void *) iph, iph->ihl);
+	skb->protocol = __constant_htons(ETH_P_IP);
+	skb->mac.raw = ((u8 *)iph) - 14;
+	skb->dev = odev;
+	skb->pkt_type = PACKET_HOST;
+
+	if (pkt_dev->nfrags <= 0) 
+                pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
+	else {
+		int frags = pkt_dev->nfrags;
+		int i;
+
+                pgh = (struct pktgen_hdr*)(((char*)(udph)) + 8);
+                
+		if (frags > MAX_SKB_FRAGS)
+			frags = MAX_SKB_FRAGS;
+		if (datalen > frags*PAGE_SIZE) {
+			skb_put(skb, datalen-frags*PAGE_SIZE);
+			datalen = frags*PAGE_SIZE;
+		}
+
+		i = 0;
+		while (datalen > 0) {
+			struct page *page = alloc_pages(GFP_KERNEL, 0);
+			skb_shinfo(skb)->frags[i].page = page;
+			skb_shinfo(skb)->frags[i].page_offset = 0;
+			skb_shinfo(skb)->frags[i].size =
+				(datalen < PAGE_SIZE ? datalen : PAGE_SIZE);
+			datalen -= skb_shinfo(skb)->frags[i].size;
+			skb->len += skb_shinfo(skb)->frags[i].size;
+			skb->data_len += skb_shinfo(skb)->frags[i].size;
+			i++;
+			skb_shinfo(skb)->nr_frags = i;
+		}
+
+		while (i < frags) {
+			int rem;
+
+			if (i == 0)
+				break;
+
+			rem = skb_shinfo(skb)->frags[i - 1].size / 2;
+			if (rem == 0)
+				break;
+
+			skb_shinfo(skb)->frags[i - 1].size -= rem;
+
+			skb_shinfo(skb)->frags[i] = skb_shinfo(skb)->frags[i - 1];
+			get_page(skb_shinfo(skb)->frags[i].page);
+			skb_shinfo(skb)->frags[i].page = skb_shinfo(skb)->frags[i - 1].page;
+			skb_shinfo(skb)->frags[i].page_offset += skb_shinfo(skb)->frags[i - 1].size;
+			skb_shinfo(skb)->frags[i].size = rem;
+			i++;
+			skb_shinfo(skb)->nr_frags = i;
+		}
+	}
+
+        /* Stamp the time, and sequence number, convert them to network byte order */
+
+        if (pgh) {
+              struct timeval timestamp;
+	      
+	      pgh->pgh_magic = htonl(PKTGEN_MAGIC);
+	      pgh->seq_num   = htonl(pkt_dev->seq_num);
+	      
+	      do_gettimeofday(&timestamp);
+	      pgh->tv_sec    = htonl(timestamp.tv_sec);
+	      pgh->tv_usec   = htonl(timestamp.tv_usec);
+        }
+        pkt_dev->seq_num++;
+        
+	return skb;
+}
+
+/*
+ * scan_ip6, fmt_ip taken from dietlibc-0.21 
+ * Author Felix von Leitner <felix-dietlibc@fefe.de>
+ *
+ * Slightly modified for kernel. 
+ * Should be candidate for net/ipv4/utils.c
+ * --ro
+ */
+
+static unsigned int scan_ip6(const char *s,char ip[16])
+{
+	unsigned int i;
+	unsigned int len=0;
+	unsigned long u;
+	char suffix[16];
+	unsigned int prefixlen=0;
+	unsigned int suffixlen=0;
+	__u32 tmp;
+
+	for (i=0; i<16; i++) ip[i]=0;
+
+	for (;;) {
+		if (*s == ':') {
+			len++;
+			if (s[1] == ':') {        /* Found "::", skip to part 2 */
+				s+=2;
+				len++;
+				break;
+			}
+			s++;
+		}
+		{
+			char *tmp;
+			u=simple_strtoul(s,&tmp,16);
+			i=tmp-s;
+		}
+
+		if (!i) return 0;
+		if (prefixlen==12 && s[i]=='.') {
+
+			/* the last 4 bytes may be written as IPv4 address */
+
+			tmp = in_aton(s);
+			memcpy((struct in_addr*)(ip+12), &tmp, sizeof(tmp));
+			return i+len;
+		}
+		ip[prefixlen++] = (u >> 8);
+		ip[prefixlen++] = (u & 255);
+		s += i; len += i;
+		if (prefixlen==16)
+			return len;
+	}
+
+/* part 2, after "::" */
+	for (;;) {
+		if (*s == ':') {
+			if (suffixlen==0)
+				break;
+			s++;
+			len++;
+		} else if (suffixlen!=0)
+			break;
+		{
+			char *tmp;
+			u=simple_strtol(s,&tmp,16);
+			i=tmp-s;
+		}
+		if (!i) {
+			if (*s) len--;
+			break;
+		}
+		if (suffixlen+prefixlen<=12 && s[i]=='.') {
+			tmp = in_aton(s);
+			memcpy((struct in_addr*)(suffix+suffixlen), &tmp, sizeof(tmp));
+			suffixlen+=4;
+			len+=strlen(s);
+			break;
+		}
+		suffix[suffixlen++] = (u >> 8);
+		suffix[suffixlen++] = (u & 255);
+		s += i; len += i;
+		if (prefixlen+suffixlen==16)
+			break;
+	}
+	for (i=0; i<suffixlen; i++)
+		ip[16-suffixlen+i] = suffix[i];
+	return len;
+}
+
+static char tohex(char hexdigit) {
+	return hexdigit>9?hexdigit+'a'-10:hexdigit+'0';
+}
+
+static int fmt_xlong(char* s,unsigned int i) {
+	char* bak=s;
+	*s=tohex((i>>12)&0xf); if (s!=bak || *s!='0') ++s;
+	*s=tohex((i>>8)&0xf); if (s!=bak || *s!='0') ++s;
+	*s=tohex((i>>4)&0xf); if (s!=bak || *s!='0') ++s;
+	*s=tohex(i&0xf);
+	return s-bak+1;
+}
+
+static unsigned int fmt_ip6(char *s,const char ip[16]) {
+	unsigned int len;
+	unsigned int i;
+	unsigned int temp;
+	unsigned int compressing;
+	int j;
+
+	len = 0; compressing = 0;
+	for (j=0; j<16; j+=2) {
+
+#ifdef V4MAPPEDPREFIX
+		if (j==12 && !memcmp(ip,V4mappedprefix,12)) {
+			inet_ntoa_r(*(struct in_addr*)(ip+12),s);
+			temp=strlen(s);
+			return len+temp;
+		}
+#endif
+		temp = ((unsigned long) (unsigned char) ip[j] << 8) +
+			(unsigned long) (unsigned char) ip[j+1];
+		if (temp == 0) {
+			if (!compressing) {
+				compressing=1;
+				if (j==0) {
+					*s++=':'; ++len;
+				}
+			}
+		} else {
+			if (compressing) {
+				compressing=0;
+				*s++=':'; ++len;
+			}
+			i = fmt_xlong(s,temp); len += i; s += i;
+			if (j<14) {
+				*s++ = ':';
+				++len;
+			}
+		}
+	}
+	if (compressing) {
+		*s++=':'; ++len;
+	}
+	*s=0;
+	return len;
+}
+
+static struct sk_buff *fill_packet_ipv6(struct net_device *odev, 
+				   struct pktgen_dev *pkt_dev)
+{
+	struct sk_buff *skb = NULL;
+	__u8 *eth;
+	struct udphdr *udph;
+	int datalen;
+	struct ipv6hdr *iph;
+        struct pktgen_hdr *pgh = NULL;
+        
+	skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + 16, GFP_ATOMIC);
+	if (!skb) {
+		sprintf(pkt_dev->result, "No memory");
+		return NULL;
+	}
+
+	skb_reserve(skb, 16);
+
+	/*  Reserve for ethernet and IP header  */
+	eth = (__u8 *) skb_push(skb, 14);
+	iph = (struct ipv6hdr *)skb_put(skb, sizeof(struct ipv6hdr));
+	udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr));
+
+
+        /* Update any of the values, used when we're incrementing various
+         * fields.
+         */
+	mod_cur_headers(pkt_dev);
+
+	
+	memcpy(eth, pkt_dev->hh, 12);
+	*(u16*)&eth[12] = __constant_htons(ETH_P_IPV6);
+	
+        
+	datalen = pkt_dev->cur_pkt_size-14- 
+		sizeof(struct ipv6hdr)-sizeof(struct udphdr); /* Eth + IPh + UDPh */
+
+	if (datalen < sizeof(struct pktgen_hdr)) { 
+		datalen = sizeof(struct pktgen_hdr);
+		if (net_ratelimit())
+			printk(KERN_INFO "pktgen: increased datalen to %d\n", datalen);
+	}
+
+	udph->source = htons(pkt_dev->cur_udp_src);
+	udph->dest = htons(pkt_dev->cur_udp_dst);
+	udph->len = htons(datalen + sizeof(struct udphdr)); 
+	udph->check = 0;  /* No checksum */
+
+	 *(u32*)iph = __constant_htonl(0x60000000); /* Version + flow */
+
+	iph->hop_limit = 32;
+
+	iph->payload_len = htons(sizeof(struct udphdr) + datalen);
+	iph->nexthdr = IPPROTO_UDP;
+
+	ipv6_addr_copy(&iph->daddr, &pkt_dev->cur_in6_daddr);
+	ipv6_addr_copy(&iph->saddr, &pkt_dev->cur_in6_saddr);
+
+	skb->mac.raw = ((u8 *)iph) - 14;
+	skb->protocol = __constant_htons(ETH_P_IPV6);
+	skb->dev = odev;
+	skb->pkt_type = PACKET_HOST;
+
+	if (pkt_dev->nfrags <= 0) 
+                pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
+	else {
+		int frags = pkt_dev->nfrags;
+		int i;
+
+                pgh = (struct pktgen_hdr*)(((char*)(udph)) + 8);
+                
+		if (frags > MAX_SKB_FRAGS)
+			frags = MAX_SKB_FRAGS;
+		if (datalen > frags*PAGE_SIZE) {
+			skb_put(skb, datalen-frags*PAGE_SIZE);
+			datalen = frags*PAGE_SIZE;
+		}
+
+		i = 0;
+		while (datalen > 0) {
+			struct page *page = alloc_pages(GFP_KERNEL, 0);
+			skb_shinfo(skb)->frags[i].page = page;
+			skb_shinfo(skb)->frags[i].page_offset = 0;
+			skb_shinfo(skb)->frags[i].size =
+				(datalen < PAGE_SIZE ? datalen : PAGE_SIZE);
+			datalen -= skb_shinfo(skb)->frags[i].size;
+			skb->len += skb_shinfo(skb)->frags[i].size;
+			skb->data_len += skb_shinfo(skb)->frags[i].size;
+			i++;
+			skb_shinfo(skb)->nr_frags = i;
+		}
+
+		while (i < frags) {
+			int rem;
+
+			if (i == 0)
+				break;
+
+			rem = skb_shinfo(skb)->frags[i - 1].size / 2;
+			if (rem == 0)
+				break;
+
+			skb_shinfo(skb)->frags[i - 1].size -= rem;
+
+			skb_shinfo(skb)->frags[i] = skb_shinfo(skb)->frags[i - 1];
+			get_page(skb_shinfo(skb)->frags[i].page);
+			skb_shinfo(skb)->frags[i].page = skb_shinfo(skb)->frags[i - 1].page;
+			skb_shinfo(skb)->frags[i].page_offset += skb_shinfo(skb)->frags[i - 1].size;
+			skb_shinfo(skb)->frags[i].size = rem;
+			i++;
+			skb_shinfo(skb)->nr_frags = i;
+		}
+	}
+
+        /* Stamp the time, and sequence number, convert them to network byte order */
+	/* should we update cloned packets too ? */
+        if (pgh) {
+              struct timeval timestamp;
+	      
+	      pgh->pgh_magic = htonl(PKTGEN_MAGIC);
+	      pgh->seq_num   = htonl(pkt_dev->seq_num);
+	      
+	      do_gettimeofday(&timestamp);
+	      pgh->tv_sec    = htonl(timestamp.tv_sec);
+	      pgh->tv_usec   = htonl(timestamp.tv_usec);
+        }
+        pkt_dev->seq_num++;
+        
+	return skb;
+}
+
+static inline struct sk_buff *fill_packet(struct net_device *odev, 
+				   struct pktgen_dev *pkt_dev)
+{
+	if(pkt_dev->flags & F_IPV6) 
+		return fill_packet_ipv6(odev, pkt_dev);
+	else
+		return fill_packet_ipv4(odev, pkt_dev);
+}
+
+static void pktgen_clear_counters(struct pktgen_dev *pkt_dev) 
+{
+        pkt_dev->seq_num = 1;
+        pkt_dev->idle_acc = 0;
+	pkt_dev->sofar = 0;
+        pkt_dev->tx_bytes = 0;
+        pkt_dev->errors = 0;
+}
+
+/* Set up structure for sending pkts, clear counters */
+
+static void pktgen_run(struct pktgen_thread *t)
+{
+        struct pktgen_dev *pkt_dev = NULL;
+	int started = 0;
+
+	PG_DEBUG(printk("pktgen: entering pktgen_run. %p\n", t));
+
+	if_lock(t);
+        for (pkt_dev = t->if_list; pkt_dev; pkt_dev = pkt_dev->next ) {
+
+		/*
+		 * setup odev and create initial packet.
+		 */
+		pktgen_setup_inject(pkt_dev);
+
+		if(pkt_dev->odev) { 
+			pktgen_clear_counters(pkt_dev);
+			pkt_dev->running = 1; /* Cranke yeself! */
+			pkt_dev->skb = NULL;
+			pkt_dev->started_at = getCurUs();
+			pkt_dev->next_tx_us = getCurUs(); /* Transmit immediately */
+			pkt_dev->next_tx_ns = 0;
+			
+			strcpy(pkt_dev->result, "Starting");
+			started++;
+		}
+		else 
+			strcpy(pkt_dev->result, "Error starting");
+	}
+	if_unlock(t);
+	if(started) t->control &= ~(T_STOP);
+}
+
+static void pktgen_stop_all_threads_ifs(void)
+{
+        struct pktgen_thread *t = pktgen_threads;
+
+	PG_DEBUG(printk("pktgen: entering pktgen_stop_all_threads.\n"));
+
+	thread_lock();
+	while(t) {
+		pktgen_stop(t);
+		t = t->next;
+	}
+       thread_unlock();
+}
+
+static int thread_is_running(struct pktgen_thread *t )
+{
+        struct pktgen_dev *next;
+        int res = 0;
+
+        for(next=t->if_list; next; next=next->next) { 
+		if(next->running) {
+			res = 1;
+			break;
+		}
+        }
+        return res;
+}
+
+static int pktgen_wait_thread_run(struct pktgen_thread *t )
+{
+        if_lock(t);
+
+        while(thread_is_running(t)) {
+
+                if_unlock(t);
+
+		msleep_interruptible(100); 
+
+                if (signal_pending(current)) 
+                        goto signal;
+                if_lock(t);
+        }
+        if_unlock(t);
+        return 1;
+ signal:
+        return 0;
+}
+
+static int pktgen_wait_all_threads_run(void)
+{
+	struct pktgen_thread *t = pktgen_threads;
+	int sig = 1;
+	
+	while (t) {
+		sig = pktgen_wait_thread_run(t);
+		if( sig == 0 ) break;
+		thread_lock();
+		t=t->next;
+		thread_unlock();
+	}
+	if(sig == 0) {
+		thread_lock();
+		while (t) {
+			t->control |= (T_STOP);
+			t=t->next;
+		}
+		thread_unlock();
+	}
+	return sig;
+}
+
+static void pktgen_run_all_threads(void)
+{
+        struct pktgen_thread *t = pktgen_threads;
+
+	PG_DEBUG(printk("pktgen: entering pktgen_run_all_threads.\n"));
+
+	thread_lock();
+
+	while(t) {
+		t->control |= (T_RUN);
+		t = t->next;
+	}
+	thread_unlock();
+
+	current->state = TASK_INTERRUPTIBLE;
+	schedule_timeout(HZ/8);  /* Propagate thread->control  */
+			
+	pktgen_wait_all_threads_run();
+}
+
+
+static void show_results(struct pktgen_dev *pkt_dev, int nr_frags)
+{
+       __u64 total_us, bps, mbps, pps, idle;
+       char *p = pkt_dev->result;
+
+       total_us = pkt_dev->stopped_at - pkt_dev->started_at;
+
+       idle = pkt_dev->idle_acc;
+
+       p += sprintf(p, "OK: %llu(c%llu+d%llu) usec, %llu (%dbyte,%dfrags)\n",
+                    (unsigned long long) total_us, 
+		    (unsigned long long)(total_us - idle), 
+		    (unsigned long long) idle,
+                    (unsigned long long) pkt_dev->sofar, 
+		    pkt_dev->cur_pkt_size, nr_frags);
+
+       pps = pkt_dev->sofar * USEC_PER_SEC;
+
+       while ((total_us >> 32) != 0) {
+               pps >>= 1;
+               total_us >>= 1;
+       }
+
+       do_div(pps, total_us);
+       
+       bps = pps * 8 * pkt_dev->cur_pkt_size;
+
+       mbps = bps;
+       do_div(mbps, 1000000);
+       p += sprintf(p, "  %llupps %lluMb/sec (%llubps) errors: %llu",
+                    (unsigned long long) pps, 
+		    (unsigned long long) mbps, 
+		    (unsigned long long) bps, 
+		    (unsigned long long) pkt_dev->errors);
+}
+ 
+
+/* Set stopped-at timer, remove from running list, do counters & statistics */
+
+static int pktgen_stop_device(struct pktgen_dev *pkt_dev) 
+{
+	
+        if (!pkt_dev->running) {
+                printk("pktgen: interface: %s is already stopped\n", pkt_dev->ifname);
+                return -EINVAL;
+        }
+
+        pkt_dev->stopped_at = getCurUs();
+        pkt_dev->running = 0;
+
+	show_results(pkt_dev, skb_shinfo(pkt_dev->skb)->nr_frags);
+
+	if (pkt_dev->skb) 
+		kfree_skb(pkt_dev->skb);
+
+	pkt_dev->skb = NULL;
+	
+        return 0;
+}
+
+static struct pktgen_dev *next_to_run(struct pktgen_thread *t )
+{
+	struct pktgen_dev *next, *best = NULL;
+        
+	if_lock(t);
+
+	for(next=t->if_list; next ; next=next->next) {
+		if(!next->running) continue;
+		if(best == NULL) best=next;
+		else if ( next->next_tx_us < best->next_tx_us) 
+			best =  next;
+	}
+	if_unlock(t);
+        return best;
+}
+
+static void pktgen_stop(struct pktgen_thread *t) {
+        struct pktgen_dev *next = NULL;
+
+	PG_DEBUG(printk("pktgen: entering pktgen_stop.\n"));
+
+        if_lock(t);
+
+        for(next=t->if_list; next; next=next->next)
+                pktgen_stop_device(next);
+
+        if_unlock(t);
+}
+
+static void pktgen_rem_all_ifs(struct pktgen_thread *t) 
+{
+        struct pktgen_dev *cur, *next = NULL;
+        
+        /* Remove all devices, free mem */
+ 
+        if_lock(t);
+
+        for(cur=t->if_list; cur; cur=next) { 
+		next = cur->next;
+		pktgen_remove_device(t, cur);
+	}
+
+        if_unlock(t);
+}
+
+static void pktgen_rem_thread(struct pktgen_thread *t) 
+{
+        /* Remove from the thread list */
+
+	struct pktgen_thread *tmp = pktgen_threads;
+
+        if (strlen(t->fname))
+                remove_proc_entry(t->fname, NULL);
+
+       thread_lock();
+
+	if (tmp == t)
+		pktgen_threads = tmp->next;
+	else {
+		while (tmp) {
+			if (tmp->next == t) {
+				tmp->next = t->next;
+				t->next = NULL;
+				break;
+			}
+			tmp = tmp->next;
+		}
+	}
+        thread_unlock();
+}
+
+static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
+{
+	struct net_device *odev = NULL;
+	__u64 idle_start = 0;
+	int ret;
+
+	odev = pkt_dev->odev;
+	
+	if (pkt_dev->delay_us || pkt_dev->delay_ns) {
+		u64 now;
+
+		now = getCurUs();
+		if (now < pkt_dev->next_tx_us)
+			spin(pkt_dev, pkt_dev->next_tx_us);
+
+		/* This is max DELAY, this has special meaning of
+		 * "never transmit"
+		 */
+		if (pkt_dev->delay_us == 0x7FFFFFFF) {
+			pkt_dev->next_tx_us = getCurUs() + pkt_dev->delay_us;
+			pkt_dev->next_tx_ns = pkt_dev->delay_ns;
+			goto out;
+		}
+	}
+	
+	if (netif_queue_stopped(odev) || need_resched()) {
+		idle_start = getCurUs();
+		
+		if (!netif_running(odev)) {
+			pktgen_stop_device(pkt_dev);
+			goto out;
+		}
+		if (need_resched()) 
+			schedule();
+		
+		pkt_dev->idle_acc += getCurUs() - idle_start;
+		
+		if (netif_queue_stopped(odev)) {
+			pkt_dev->next_tx_us = getCurUs(); /* TODO */
+			pkt_dev->next_tx_ns = 0;
+			goto out; /* Try the next interface */
+		}
+	}
+	
+	if (pkt_dev->last_ok || !pkt_dev->skb) {
+		if ((++pkt_dev->clone_count >= pkt_dev->clone_skb ) || (!pkt_dev->skb)) {
+			/* build a new pkt */
+			if (pkt_dev->skb) 
+				kfree_skb(pkt_dev->skb);
+			
+			pkt_dev->skb = fill_packet(odev, pkt_dev);
+			if (pkt_dev->skb == NULL) {
+				printk("pktgen: ERROR: couldn't allocate skb in fill_packet.\n");
+				schedule();
+				pkt_dev->clone_count--; /* back out increment, OOM */
+				goto out;
+			}
+			pkt_dev->allocated_skbs++;
+			pkt_dev->clone_count = 0; /* reset counter */
+		}
+	}
+	
+	spin_lock_bh(&odev->xmit_lock);
+	if (!netif_queue_stopped(odev)) {
+
+		atomic_inc(&(pkt_dev->skb->users));
+retry_now:
+		ret = odev->hard_start_xmit(pkt_dev->skb, odev);
+		if (likely(ret == NETDEV_TX_OK)) {
+			pkt_dev->last_ok = 1;    
+			pkt_dev->sofar++;
+			pkt_dev->seq_num++;
+			pkt_dev->tx_bytes += pkt_dev->cur_pkt_size;
+			
+		} else if (ret == NETDEV_TX_LOCKED 
+			   && (odev->features & NETIF_F_LLTX)) {
+			cpu_relax();
+			goto retry_now;
+		} else {  /* Retry it next time */
+			
+			atomic_dec(&(pkt_dev->skb->users));
+			
+			if (debug && net_ratelimit())
+				printk(KERN_INFO "pktgen: Hard xmit error\n");
+			
+			pkt_dev->errors++;
+			pkt_dev->last_ok = 0;
+		}
+
+		pkt_dev->next_tx_us = getCurUs();
+		pkt_dev->next_tx_ns = 0;
+
+		pkt_dev->next_tx_us += pkt_dev->delay_us;
+		pkt_dev->next_tx_ns += pkt_dev->delay_ns;
+
+		if (pkt_dev->next_tx_ns > 1000) {
+			pkt_dev->next_tx_us++;
+			pkt_dev->next_tx_ns -= 1000;
+		}
+	} 
+
+	else {  /* Retry it next time */
+                pkt_dev->last_ok = 0;
+                pkt_dev->next_tx_us = getCurUs(); /* TODO */
+		pkt_dev->next_tx_ns = 0;
+        }
+
+	spin_unlock_bh(&odev->xmit_lock);
+	
+	/* If pkt_dev->count is zero, then run forever */
+	if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) {
+		if (atomic_read(&(pkt_dev->skb->users)) != 1) {
+			idle_start = getCurUs();
+			while (atomic_read(&(pkt_dev->skb->users)) != 1) {
+				if (signal_pending(current)) {
+					break;
+				}
+				schedule();
+			}
+			pkt_dev->idle_acc += getCurUs() - idle_start;
+		}
+                
+		/* Done with this */
+		pktgen_stop_device(pkt_dev);
+	} 
+ out:;
+ }
+
+/* 
+ * Main loop of the thread goes here
+ */
+
+static void pktgen_thread_worker(struct pktgen_thread *t) 
+{
+	DEFINE_WAIT(wait);
+        struct pktgen_dev *pkt_dev = NULL;
+	int cpu = t->cpu;
+	sigset_t tmpsig;
+	u32 max_before_softirq;
+        u32 tx_since_softirq = 0;
+
+	daemonize("pktgen/%d", cpu);
+
+        /* Block all signals except SIGKILL, SIGSTOP and SIGTERM */
+
+        spin_lock_irq(&current->sighand->siglock);
+        tmpsig = current->blocked;
+        siginitsetinv(&current->blocked, 
+                      sigmask(SIGKILL) | 
+                      sigmask(SIGSTOP)| 
+                      sigmask(SIGTERM));
+
+        recalc_sigpending();
+        spin_unlock_irq(&current->sighand->siglock);
+
+	/* Migrate to the right CPU */
+	set_cpus_allowed(current, cpumask_of_cpu(cpu));
+        if (smp_processor_id() != cpu)
+                BUG();
+
+	init_waitqueue_head(&t->queue);
+
+	t->control &= ~(T_TERMINATE);
+	t->control &= ~(T_RUN);
+	t->control &= ~(T_STOP);
+	t->control &= ~(T_REMDEV);
+
+        t->pid = current->pid;        
+
+        PG_DEBUG(printk("pktgen: starting pktgen/%d:  pid=%d\n", cpu, current->pid));
+
+	max_before_softirq = t->max_before_softirq;
+        
+        __set_current_state(TASK_INTERRUPTIBLE);
+        mb();
+
+        while (1) {
+		
+		__set_current_state(TASK_RUNNING);
+
+		/*
+		 * Get next dev to xmit -- if any.
+		 */
+
+                pkt_dev = next_to_run(t);
+                
+                if (pkt_dev) {
+
+			pktgen_xmit(pkt_dev);
+
+			/*
+			 * We like to stay RUNNING but must also give
+			 * others fair share.
+			 */
+
+			tx_since_softirq += pkt_dev->last_ok;
+
+			if (tx_since_softirq > max_before_softirq) {
+				if (local_softirq_pending())
+					do_softirq();
+				tx_since_softirq = 0;
+			}
+		} else {
+			prepare_to_wait(&(t->queue), &wait, TASK_INTERRUPTIBLE);
+			schedule_timeout(HZ/10);
+			finish_wait(&(t->queue), &wait);
+		}
+
+                /* 
+		 * Back from sleep, either due to the timeout or signal.
+		 * We check if we have any "posted" work for us.
+		 */
+
+                if (t->control & T_TERMINATE || signal_pending(current)) 
+                        /* we received a request to terminate ourself */
+                        break;
+		
+
+		if(t->control & T_STOP) {
+			pktgen_stop(t);
+			t->control &= ~(T_STOP);
+		}
+
+		if(t->control & T_RUN) {
+			pktgen_run(t);
+			t->control &= ~(T_RUN);
+		}
+
+		if(t->control & T_REMDEV) {
+			pktgen_rem_all_ifs(t);
+			t->control &= ~(T_REMDEV);
+		}
+
+		if (need_resched()) 
+			schedule();
+        } 
+
+        PG_DEBUG(printk("pktgen: %s stopping all device\n", t->name));
+        pktgen_stop(t);
+
+        PG_DEBUG(printk("pktgen: %s removing all device\n", t->name));
+        pktgen_rem_all_ifs(t);
+
+        PG_DEBUG(printk("pktgen: %s removing thread.\n", t->name));
+        pktgen_rem_thread(t);
+}
+
+static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, const char* ifname) 
+{
+        struct pktgen_dev *pkt_dev = NULL;
+        if_lock(t);
+
+        for(pkt_dev=t->if_list; pkt_dev; pkt_dev = pkt_dev->next ) {
+                if (strcmp(pkt_dev->ifname, ifname) == 0) {
+                        break;
+                }
+        }
+
+        if_unlock(t);
+	PG_DEBUG(printk("pktgen: find_dev(%s) returning %p\n", ifname,pkt_dev));
+        return pkt_dev;
+}
+
+/* 
+ * Adds a dev at front of if_list. 
+ */
+
+static int add_dev_to_thread(struct pktgen_thread *t, struct pktgen_dev *pkt_dev) 
+{
+	int rv = 0;
+	
+        if_lock(t);
+
+        if (pkt_dev->pg_thread) {
+                printk("pktgen: ERROR:  already assigned to a thread.\n");
+                rv = -EBUSY;
+                goto out;
+        }
+	pkt_dev->next =t->if_list; t->if_list=pkt_dev;
+        pkt_dev->pg_thread = t;
+	pkt_dev->running = 0;
+
+ out:
+        if_unlock(t);        
+        return rv;
+}
+
+/* Called under thread lock */
+
+static int pktgen_add_device(struct pktgen_thread *t, const char* ifname) 
+{
+        struct pktgen_dev *pkt_dev;
+	
+	/* We don't allow a device to be on several threads */
+
+	if( (pkt_dev = __pktgen_NN_threads(ifname, FIND)) == NULL) {
+						   
+		pkt_dev = kmalloc(sizeof(struct pktgen_dev), GFP_KERNEL);
+                if (!pkt_dev) 
+                        return -ENOMEM;
+
+                memset(pkt_dev, 0, sizeof(struct pktgen_dev));
+
+		pkt_dev->flows = vmalloc(MAX_CFLOWS*sizeof(struct flow_state));
+		if (pkt_dev->flows == NULL) {
+			kfree(pkt_dev);
+			return -ENOMEM;
+		}
+		memset(pkt_dev->flows, 0, MAX_CFLOWS*sizeof(struct flow_state));
+
+		pkt_dev->min_pkt_size = ETH_ZLEN;
+                pkt_dev->max_pkt_size = ETH_ZLEN;
+                pkt_dev->nfrags = 0;
+                pkt_dev->clone_skb = pg_clone_skb_d;
+                pkt_dev->delay_us = pg_delay_d / 1000;
+                pkt_dev->delay_ns = pg_delay_d % 1000;
+                pkt_dev->count = pg_count_d;
+                pkt_dev->sofar = 0;
+                pkt_dev->udp_src_min = 9; /* sink port */
+                pkt_dev->udp_src_max = 9;
+                pkt_dev->udp_dst_min = 9;
+                pkt_dev->udp_dst_max = 9;
+
+                strncpy(pkt_dev->ifname, ifname, 31);
+                sprintf(pkt_dev->fname, "net/%s/%s", PG_PROC_DIR, ifname);
+
+                if (! pktgen_setup_dev(pkt_dev)) {
+                        printk("pktgen: ERROR: pktgen_setup_dev failed.\n");
+			if (pkt_dev->flows)
+				vfree(pkt_dev->flows);
+                        kfree(pkt_dev);
+                        return -ENODEV;
+                }
+
+                pkt_dev->proc_ent = create_proc_entry(pkt_dev->fname, 0600, NULL);
+                if (!pkt_dev->proc_ent) {
+                        printk("pktgen: cannot create %s procfs entry.\n", pkt_dev->fname);
+			if (pkt_dev->flows)
+				vfree(pkt_dev->flows);
+                        kfree(pkt_dev);
+                        return -EINVAL;
+                }
+                pkt_dev->proc_ent->read_proc = proc_if_read;
+                pkt_dev->proc_ent->write_proc = proc_if_write;
+                pkt_dev->proc_ent->data = (void*)(pkt_dev);
+		pkt_dev->proc_ent->owner = THIS_MODULE;
+
+                return add_dev_to_thread(t, pkt_dev);
+        }
+        else {
+                printk("pktgen: ERROR: interface already used.\n");
+                return -EBUSY;
+        }
+}
+
+static struct pktgen_thread *pktgen_find_thread(const char* name) 
+{
+        struct pktgen_thread *t = NULL;
+
+       thread_lock();
+
+        t = pktgen_threads;
+        while (t) {
+                if (strcmp(t->name, name) == 0) 
+                        break;
+
+                t = t->next;
+        }
+        thread_unlock();
+        return t;
+}
+
+static int pktgen_create_thread(const char* name, int cpu) 
+{
+        struct pktgen_thread *t = NULL;
+
+        if (strlen(name) > 31) {
+                printk("pktgen: ERROR:  Thread name cannot be more than 31 characters.\n");
+                return -EINVAL;
+        }
+        
+        if (pktgen_find_thread(name)) {
+                printk("pktgen: ERROR: thread: %s already exists\n", name);
+                return -EINVAL;
+        }
+
+        t = (struct pktgen_thread*)(kmalloc(sizeof(struct pktgen_thread), GFP_KERNEL));
+        if (!t) {
+                printk("pktgen: ERROR: out of memory, can't create new thread.\n");
+                return -ENOMEM;
+        }
+
+        memset(t, 0, sizeof(struct pktgen_thread));
+        strcpy(t->name, name);
+        spin_lock_init(&t->if_lock);
+	t->cpu = cpu;
+        
+        sprintf(t->fname, "net/%s/%s", PG_PROC_DIR, t->name);
+        t->proc_ent = create_proc_entry(t->fname, 0600, NULL);
+        if (!t->proc_ent) {
+                printk("pktgen: cannot create %s procfs entry.\n", t->fname);
+                kfree(t);
+                return -EINVAL;
+        }
+        t->proc_ent->read_proc = proc_thread_read;
+        t->proc_ent->write_proc = proc_thread_write;
+        t->proc_ent->data = (void*)(t);
+        t->proc_ent->owner = THIS_MODULE;
+
+        t->next = pktgen_threads;
+        pktgen_threads = t;
+
+	if (kernel_thread((void *) pktgen_thread_worker, (void *) t, 
+			  CLONE_FS | CLONE_FILES | CLONE_SIGHAND) < 0)
+		printk("pktgen: kernel_thread() failed for cpu %d\n", t->cpu);
+
+	return 0;
+}
+
+/* 
+ * Removes a device from the thread if_list. 
+ */
+static void _rem_dev_from_if_list(struct pktgen_thread *t, struct pktgen_dev *pkt_dev) 
+{
+	struct pktgen_dev *i, *prev = NULL;
+
+	i = t->if_list;
+
+	while(i) {
+		if(i == pkt_dev) {
+			if(prev) prev->next = i->next;
+			else t->if_list = NULL;
+			break;
+		}
+		prev = i;
+		i=i->next;
+	}
+}
+
+static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *pkt_dev) 
+{
+
+	PG_DEBUG(printk("pktgen: remove_device pkt_dev=%p\n", pkt_dev));
+
+        if (pkt_dev->running) { 
+                printk("pktgen:WARNING: trying to remove a running interface, stopping it now.\n");
+                pktgen_stop_device(pkt_dev);
+        }
+        
+        /* Dis-associate from the interface */
+
+	if (pkt_dev->odev) {
+		dev_put(pkt_dev->odev);
+                pkt_dev->odev = NULL;
+        }
+        
+	/* And update the thread if_list */
+
+	_rem_dev_from_if_list(t, pkt_dev);
+
+        /* Clean up proc file system */
+
+        if (strlen(pkt_dev->fname)) 
+                remove_proc_entry(pkt_dev->fname, NULL);
+
+	if (pkt_dev->flows)
+		vfree(pkt_dev->flows);
+	kfree(pkt_dev);
+        return 0;
+}
+
+static int __init pg_init(void) 
+{
+	int cpu;
+	printk(version);
+
+        module_fname[0] = 0;
+
+	create_proc_dir();
+
+        sprintf(module_fname, "net/%s/pgctrl", PG_PROC_DIR);
+        module_proc_ent = create_proc_entry(module_fname, 0600, NULL);
+        if (!module_proc_ent) {
+                printk("pktgen: ERROR: cannot create %s procfs entry.\n", module_fname);
+                return -EINVAL;
+        }
+
+        module_proc_ent->proc_fops =  &pktgen_fops;
+        module_proc_ent->data = NULL;
+
+	/* Register us to receive netdevice events */
+	register_netdevice_notifier(&pktgen_notifier_block);
+        
+	for (cpu = 0; cpu < NR_CPUS ; cpu++) {
+		char buf[30];
+
+		if (!cpu_online(cpu))
+			continue;
+
+                sprintf(buf, "kpktgend_%i", cpu);
+                pktgen_create_thread(buf, cpu);
+        }
+        return 0;        
+}
+
+static void __exit pg_cleanup(void)
+{
+	wait_queue_head_t queue;
+	init_waitqueue_head(&queue);
+
+        /* Stop all interfaces & threads */        
+
+        while (pktgen_threads) {
+                struct pktgen_thread *t = pktgen_threads;
+                pktgen_threads->control |= (T_TERMINATE);
+
+		wait_event_interruptible_timeout(queue, (t != pktgen_threads), HZ);
+        }
+
+        /* Un-register us from receiving netdevice events */
+	unregister_netdevice_notifier(&pktgen_notifier_block);
+
+        /* Clean up proc file system */
+
+        remove_proc_entry(module_fname, NULL);
+        
+	remove_proc_dir();
+}
+
+
+module_init(pg_init);
+module_exit(pg_cleanup);
+
+MODULE_AUTHOR("Robert Olsson <robert.olsson@its.uu.se");
+MODULE_DESCRIPTION("Packet Generator tool");
+MODULE_LICENSE("GPL");
+module_param(pg_count_d, int, 0);
+module_param(pg_delay_d, int, 0);
+module_param(pg_clone_skb_d, int, 0);
+module_param(debug, int, 0);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
new file mode 100644
index 00000000000..d69ad90e581
--- /dev/null
+++ b/net/core/rtnetlink.c
@@ -0,0 +1,711 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Routing netlink socket interface: protocol independent part.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *	Fixes:
+ *	Vitaly E. Lavrov		RTA_OK arithmetics was wrong.
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/capability.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/security.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/string.h>
+
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/arp.h>
+#include <net/route.h>
+#include <net/udp.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+
+DECLARE_MUTEX(rtnl_sem);
+
+void rtnl_lock(void)
+{
+	rtnl_shlock();
+}
+
+int rtnl_lock_interruptible(void)
+{
+	return down_interruptible(&rtnl_sem);
+}
+ 
+void rtnl_unlock(void)
+{
+	rtnl_shunlock();
+
+	netdev_run_todo();
+}
+
+int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len)
+{
+	memset(tb, 0, sizeof(struct rtattr*)*maxattr);
+
+	while (RTA_OK(rta, len)) {
+		unsigned flavor = rta->rta_type;
+		if (flavor && flavor <= maxattr)
+			tb[flavor-1] = rta;
+		rta = RTA_NEXT(rta, len);
+	}
+	return 0;
+}
+
+struct sock *rtnl;
+
+struct rtnetlink_link * rtnetlink_links[NPROTO];
+
+static const int rtm_min[(RTM_MAX+1-RTM_BASE)/4] =
+{
+	NLMSG_LENGTH(sizeof(struct ifinfomsg)),
+	NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
+	NLMSG_LENGTH(sizeof(struct rtmsg)),
+	NLMSG_LENGTH(sizeof(struct ndmsg)),
+	NLMSG_LENGTH(sizeof(struct rtmsg)),
+	NLMSG_LENGTH(sizeof(struct tcmsg)),
+	NLMSG_LENGTH(sizeof(struct tcmsg)),
+	NLMSG_LENGTH(sizeof(struct tcmsg)),
+	NLMSG_LENGTH(sizeof(struct tcamsg))
+};
+
+static const int rta_max[(RTM_MAX+1-RTM_BASE)/4] =
+{
+	IFLA_MAX,
+	IFA_MAX,
+	RTA_MAX,
+	NDA_MAX,
+	RTA_MAX,
+	TCA_MAX,
+	TCA_MAX,
+	TCA_MAX,
+	TCAA_MAX
+};
+
+void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
+{
+	struct rtattr *rta;
+	int size = RTA_LENGTH(attrlen);
+
+	rta = (struct rtattr*)skb_put(skb, RTA_ALIGN(size));
+	rta->rta_type = attrtype;
+	rta->rta_len = size;
+	memcpy(RTA_DATA(rta), data, attrlen);
+}
+
+size_t rtattr_strlcpy(char *dest, const struct rtattr *rta, size_t size)
+{
+	size_t ret = RTA_PAYLOAD(rta);
+	char *src = RTA_DATA(rta);
+
+	if (ret > 0 && src[ret - 1] == '\0')
+		ret--;
+	if (size > 0) {
+		size_t len = (ret >= size) ? size - 1 : ret;
+		memset(dest, 0, size);
+		memcpy(dest, src, len);
+	}
+	return ret;
+}
+
+int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
+{
+	int err = 0;
+
+	NETLINK_CB(skb).dst_groups = group;
+	if (echo)
+		atomic_inc(&skb->users);
+	netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL);
+	if (echo)
+		err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
+	return err;
+}
+
+int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
+{
+	struct rtattr *mx = (struct rtattr*)skb->tail;
+	int i;
+
+	RTA_PUT(skb, RTA_METRICS, 0, NULL);
+	for (i=0; i<RTAX_MAX; i++) {
+		if (metrics[i])
+			RTA_PUT(skb, i+1, sizeof(u32), metrics+i);
+	}
+	mx->rta_len = skb->tail - (u8*)mx;
+	if (mx->rta_len == RTA_LENGTH(0))
+		skb_trim(skb, (u8*)mx - skb->data);
+	return 0;
+
+rtattr_failure:
+	skb_trim(skb, (u8*)mx - skb->data);
+	return -1;
+}
+
+
+static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
+				 int type, u32 pid, u32 seq, u32 change)
+{
+	struct ifinfomsg *r;
+	struct nlmsghdr  *nlh;
+	unsigned char	 *b = skb->tail;
+
+	nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*r));
+	if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
+	r = NLMSG_DATA(nlh);
+	r->ifi_family = AF_UNSPEC;
+	r->ifi_type = dev->type;
+	r->ifi_index = dev->ifindex;
+	r->ifi_flags = dev_get_flags(dev);
+	r->ifi_change = change;
+
+	RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name);
+
+	if (1) {
+		u32 txqlen = dev->tx_queue_len;
+		RTA_PUT(skb, IFLA_TXQLEN, sizeof(txqlen), &txqlen);
+	}
+
+	if (1) {
+		u32 weight = dev->weight;
+		RTA_PUT(skb, IFLA_WEIGHT, sizeof(weight), &weight);
+	}
+
+	if (1) {
+		struct rtnl_link_ifmap map = {
+			.mem_start   = dev->mem_start,
+			.mem_end     = dev->mem_end,
+			.base_addr   = dev->base_addr,
+			.irq         = dev->irq,
+			.dma         = dev->dma,
+			.port        = dev->if_port,
+		};
+		RTA_PUT(skb, IFLA_MAP, sizeof(map), &map);
+	}
+
+	if (dev->addr_len) {
+		RTA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
+		RTA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast);
+	}
+
+	if (1) {
+		u32 mtu = dev->mtu;
+		RTA_PUT(skb, IFLA_MTU, sizeof(mtu), &mtu);
+	}
+
+	if (dev->ifindex != dev->iflink) {
+		u32 iflink = dev->iflink;
+		RTA_PUT(skb, IFLA_LINK, sizeof(iflink), &iflink);
+	}
+
+	if (dev->qdisc_sleeping)
+		RTA_PUT(skb, IFLA_QDISC,
+			strlen(dev->qdisc_sleeping->ops->id) + 1,
+			dev->qdisc_sleeping->ops->id);
+	
+	if (dev->master) {
+		u32 master = dev->master->ifindex;
+		RTA_PUT(skb, IFLA_MASTER, sizeof(master), &master);
+	}
+
+	if (dev->get_stats) {
+		unsigned long *stats = (unsigned long*)dev->get_stats(dev);
+		if (stats) {
+			struct rtattr  *a;
+			__u32	       *s;
+			int		i;
+			int		n = sizeof(struct rtnl_link_stats)/4;
+
+			a = __RTA_PUT(skb, IFLA_STATS, n*4);
+			s = RTA_DATA(a);
+			for (i=0; i<n; i++)
+				s[i] = stats[i];
+		}
+	}
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int idx;
+	int s_idx = cb->args[0];
+	struct net_device *dev;
+
+	read_lock(&dev_base_lock);
+	for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
+		if (idx < s_idx)
+			continue;
+		if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, 0) <= 0)
+			break;
+	}
+	read_unlock(&dev_base_lock);
+	cb->args[0] = idx;
+
+	return skb->len;
+}
+
+static int do_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct ifinfomsg  *ifm = NLMSG_DATA(nlh);
+	struct rtattr    **ida = arg;
+	struct net_device *dev;
+	int err, send_addr_notify = 0;
+
+	if (ifm->ifi_index >= 0)
+		dev = dev_get_by_index(ifm->ifi_index);
+	else if (ida[IFLA_IFNAME - 1]) {
+		char ifname[IFNAMSIZ];
+
+		if (rtattr_strlcpy(ifname, ida[IFLA_IFNAME - 1],
+		                   IFNAMSIZ) >= IFNAMSIZ)
+			return -EINVAL;
+		dev = dev_get_by_name(ifname);
+	} else
+		return -EINVAL;
+
+	if (!dev)
+		return -ENODEV;
+
+	err = -EINVAL;
+
+	if (ifm->ifi_flags)
+		dev_change_flags(dev, ifm->ifi_flags);
+
+	if (ida[IFLA_MAP - 1]) {
+		struct rtnl_link_ifmap *u_map;
+		struct ifmap k_map;
+
+		if (!dev->set_config) {
+			err = -EOPNOTSUPP;
+			goto out;
+		}
+
+		if (!netif_device_present(dev)) {
+			err = -ENODEV;
+			goto out;
+		}
+		
+		if (ida[IFLA_MAP - 1]->rta_len != RTA_LENGTH(sizeof(*u_map)))
+			goto out;
+
+		u_map = RTA_DATA(ida[IFLA_MAP - 1]);
+
+		k_map.mem_start = (unsigned long) u_map->mem_start;
+		k_map.mem_end = (unsigned long) u_map->mem_end;
+		k_map.base_addr = (unsigned short) u_map->base_addr;
+		k_map.irq = (unsigned char) u_map->irq;
+		k_map.dma = (unsigned char) u_map->dma;
+		k_map.port = (unsigned char) u_map->port;
+
+		err = dev->set_config(dev, &k_map);
+
+		if (err)
+			goto out;
+	}
+
+	if (ida[IFLA_ADDRESS - 1]) {
+		if (!dev->set_mac_address) {
+			err = -EOPNOTSUPP;
+			goto out;
+		}
+		if (!netif_device_present(dev)) {
+			err = -ENODEV;
+			goto out;
+		}
+		if (ida[IFLA_ADDRESS - 1]->rta_len != RTA_LENGTH(dev->addr_len))
+			goto out;
+
+		err = dev->set_mac_address(dev, RTA_DATA(ida[IFLA_ADDRESS - 1]));
+		if (err)
+			goto out;
+		send_addr_notify = 1;
+	}
+
+	if (ida[IFLA_BROADCAST - 1]) {
+		if (ida[IFLA_BROADCAST - 1]->rta_len != RTA_LENGTH(dev->addr_len))
+			goto out;
+		memcpy(dev->broadcast, RTA_DATA(ida[IFLA_BROADCAST - 1]),
+		       dev->addr_len);
+		send_addr_notify = 1;
+	}
+
+	if (ida[IFLA_MTU - 1]) {
+		if (ida[IFLA_MTU - 1]->rta_len != RTA_LENGTH(sizeof(u32)))
+			goto out;
+		err = dev_set_mtu(dev, *((u32 *) RTA_DATA(ida[IFLA_MTU - 1])));
+
+		if (err)
+			goto out;
+
+	}
+
+	if (ida[IFLA_TXQLEN - 1]) {
+		if (ida[IFLA_TXQLEN - 1]->rta_len != RTA_LENGTH(sizeof(u32)))
+			goto out;
+
+		dev->tx_queue_len = *((u32 *) RTA_DATA(ida[IFLA_TXQLEN - 1]));
+	}
+
+	if (ida[IFLA_WEIGHT - 1]) {
+		if (ida[IFLA_WEIGHT - 1]->rta_len != RTA_LENGTH(sizeof(u32)))
+			goto out;
+
+		dev->weight = *((u32 *) RTA_DATA(ida[IFLA_WEIGHT - 1]));
+	}
+
+	if (ifm->ifi_index >= 0 && ida[IFLA_IFNAME - 1]) {
+		char ifname[IFNAMSIZ];
+
+		if (rtattr_strlcpy(ifname, ida[IFLA_IFNAME - 1],
+		                   IFNAMSIZ) >= IFNAMSIZ)
+			goto out;
+		err = dev_change_name(dev, ifname);
+		if (err)
+			goto out;
+	}
+
+	err = 0;
+
+out:
+	if (send_addr_notify)
+		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+
+	dev_put(dev);
+	return err;
+}
+
+static int rtnetlink_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int idx;
+	int s_idx = cb->family;
+
+	if (s_idx == 0)
+		s_idx = 1;
+	for (idx=1; idx<NPROTO; idx++) {
+		int type = cb->nlh->nlmsg_type-RTM_BASE;
+		if (idx < s_idx || idx == PF_PACKET)
+			continue;
+		if (rtnetlink_links[idx] == NULL ||
+		    rtnetlink_links[idx][type].dumpit == NULL)
+			continue;
+		if (idx > s_idx)
+			memset(&cb->args[0], 0, sizeof(cb->args));
+		if (rtnetlink_links[idx][type].dumpit(skb, cb))
+			break;
+	}
+	cb->family = idx;
+
+	return skb->len;
+}
+
+void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
+{
+	struct sk_buff *skb;
+	int size = NLMSG_SPACE(sizeof(struct ifinfomsg) +
+			       sizeof(struct rtnl_link_ifmap) +
+			       sizeof(struct rtnl_link_stats) + 128);
+
+	skb = alloc_skb(size, GFP_KERNEL);
+	if (!skb)
+		return;
+
+	if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0, change) < 0) {
+		kfree_skb(skb);
+		return;
+	}
+	NETLINK_CB(skb).dst_groups = RTMGRP_LINK;
+	netlink_broadcast(rtnl, skb, 0, RTMGRP_LINK, GFP_KERNEL);
+}
+
+static int rtnetlink_done(struct netlink_callback *cb)
+{
+	return 0;
+}
+
+/* Protected by RTNL sempahore.  */
+static struct rtattr **rta_buf;
+static int rtattr_max;
+
+/* Process one rtnetlink message. */
+
+static __inline__ int
+rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
+{
+	struct rtnetlink_link *link;
+	struct rtnetlink_link *link_tab;
+	int sz_idx, kind;
+	int min_len;
+	int family;
+	int type;
+	int err;
+
+	/* Only requests are handled by kernel now */
+	if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
+		return 0;
+
+	type = nlh->nlmsg_type;
+
+	/* A control message: ignore them */
+	if (type < RTM_BASE)
+		return 0;
+
+	/* Unknown message: reply with EINVAL */
+	if (type > RTM_MAX)
+		goto err_inval;
+
+	type -= RTM_BASE;
+
+	/* All the messages must have at least 1 byte length */
+	if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct rtgenmsg)))
+		return 0;
+
+	family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family;
+	if (family >= NPROTO) {
+		*errp = -EAFNOSUPPORT;
+		return -1;
+	}
+
+	link_tab = rtnetlink_links[family];
+	if (link_tab == NULL)
+		link_tab = rtnetlink_links[PF_UNSPEC];
+	link = &link_tab[type];
+
+	sz_idx = type>>2;
+	kind = type&3;
+
+	if (kind != 2 && security_netlink_recv(skb)) {
+		*errp = -EPERM;
+		return -1;
+	}
+
+	if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
+		u32 rlen;
+
+		if (link->dumpit == NULL)
+			link = &(rtnetlink_links[PF_UNSPEC][type]);
+
+		if (link->dumpit == NULL)
+			goto err_inval;
+
+		if ((*errp = netlink_dump_start(rtnl, skb, nlh,
+						link->dumpit,
+						rtnetlink_done)) != 0) {
+			return -1;
+		}
+		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+		if (rlen > skb->len)
+			rlen = skb->len;
+		skb_pull(skb, rlen);
+		return -1;
+	}
+
+	memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *)));
+
+	min_len = rtm_min[sz_idx];
+	if (nlh->nlmsg_len < min_len)
+		goto err_inval;
+
+	if (nlh->nlmsg_len > min_len) {
+		int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
+		struct rtattr *attr = (void*)nlh + NLMSG_ALIGN(min_len);
+
+		while (RTA_OK(attr, attrlen)) {
+			unsigned flavor = attr->rta_type;
+			if (flavor) {
+				if (flavor > rta_max[sz_idx])
+					goto err_inval;
+				rta_buf[flavor-1] = attr;
+			}
+			attr = RTA_NEXT(attr, attrlen);
+		}
+	}
+
+	if (link->doit == NULL)
+		link = &(rtnetlink_links[PF_UNSPEC][type]);
+	if (link->doit == NULL)
+		goto err_inval;
+	err = link->doit(skb, nlh, (void *)&rta_buf[0]);
+
+	*errp = err;
+	return err;
+
+err_inval:
+	*errp = -EINVAL;
+	return -1;
+}
+
+/* 
+ * Process one packet of messages.
+ * Malformed skbs with wrong lengths of messages are discarded silently.
+ */
+
+static inline int rtnetlink_rcv_skb(struct sk_buff *skb)
+{
+	int err;
+	struct nlmsghdr * nlh;
+
+	while (skb->len >= NLMSG_SPACE(0)) {
+		u32 rlen;
+
+		nlh = (struct nlmsghdr *)skb->data;
+		if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
+			return 0;
+		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+		if (rlen > skb->len)
+			rlen = skb->len;
+		if (rtnetlink_rcv_msg(skb, nlh, &err)) {
+			/* Not error, but we must interrupt processing here:
+			 *   Note, that in this case we do not pull message
+			 *   from skb, it will be processed later.
+			 */
+			if (err == 0)
+				return -1;
+			netlink_ack(skb, nlh, err);
+		} else if (nlh->nlmsg_flags&NLM_F_ACK)
+			netlink_ack(skb, nlh, 0);
+		skb_pull(skb, rlen);
+	}
+
+	return 0;
+}
+
+/*
+ *  rtnetlink input queue processing routine:
+ *	- try to acquire shared lock. If it is failed, defer processing.
+ *	- feed skbs to rtnetlink_rcv_skb, until it refuse a message,
+ *	  that will occur, when a dump started and/or acquisition of
+ *	  exclusive lock failed.
+ */
+
+static void rtnetlink_rcv(struct sock *sk, int len)
+{
+	do {
+		struct sk_buff *skb;
+
+		if (rtnl_shlock_nowait())
+			return;
+
+		while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+			if (rtnetlink_rcv_skb(skb)) {
+				if (skb->len)
+					skb_queue_head(&sk->sk_receive_queue,
+						       skb);
+				else
+					kfree_skb(skb);
+				break;
+			}
+			kfree_skb(skb);
+		}
+
+		up(&rtnl_sem);
+
+		netdev_run_todo();
+	} while (rtnl && rtnl->sk_receive_queue.qlen);
+}
+
+static struct rtnetlink_link link_rtnetlink_table[RTM_MAX-RTM_BASE+1] =
+{
+	[RTM_GETLINK  - RTM_BASE] = { .dumpit = rtnetlink_dump_ifinfo },
+	[RTM_SETLINK  - RTM_BASE] = { .doit   = do_setlink	      },
+	[RTM_GETADDR  - RTM_BASE] = { .dumpit = rtnetlink_dump_all    },
+	[RTM_GETROUTE - RTM_BASE] = { .dumpit = rtnetlink_dump_all    },
+	[RTM_NEWNEIGH - RTM_BASE] = { .doit   = neigh_add	      },
+	[RTM_DELNEIGH - RTM_BASE] = { .doit   = neigh_delete	      },
+	[RTM_GETNEIGH - RTM_BASE] = { .dumpit = neigh_dump_info	      }
+};
+
+static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	struct net_device *dev = ptr;
+	switch (event) {
+	case NETDEV_UNREGISTER:
+		rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
+		break;
+	case NETDEV_REGISTER:
+		rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
+		break;
+	case NETDEV_UP:
+	case NETDEV_DOWN:
+		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
+		break;
+	case NETDEV_CHANGE:
+	case NETDEV_GOING_DOWN:
+		break;
+	default:
+		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block rtnetlink_dev_notifier = {
+	.notifier_call	= rtnetlink_event,
+};
+
+void __init rtnetlink_init(void)
+{
+	int i;
+
+	rtattr_max = 0;
+	for (i = 0; i < ARRAY_SIZE(rta_max); i++)
+		if (rta_max[i] > rtattr_max)
+			rtattr_max = rta_max[i];
+	rta_buf = kmalloc(rtattr_max * sizeof(struct rtattr *), GFP_KERNEL);
+	if (!rta_buf)
+		panic("rtnetlink_init: cannot allocate rta_buf\n");
+
+	rtnl = netlink_kernel_create(NETLINK_ROUTE, rtnetlink_rcv);
+	if (rtnl == NULL)
+		panic("rtnetlink_init: cannot initialize rtnetlink\n");
+	netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
+	register_netdevice_notifier(&rtnetlink_dev_notifier);
+	rtnetlink_links[PF_UNSPEC] = link_rtnetlink_table;
+	rtnetlink_links[PF_PACKET] = link_rtnetlink_table;
+}
+
+EXPORT_SYMBOL(__rta_fill);
+EXPORT_SYMBOL(rtattr_strlcpy);
+EXPORT_SYMBOL(rtattr_parse);
+EXPORT_SYMBOL(rtnetlink_links);
+EXPORT_SYMBOL(rtnetlink_put_metrics);
+EXPORT_SYMBOL(rtnl);
+EXPORT_SYMBOL(rtnl_lock);
+EXPORT_SYMBOL(rtnl_lock_interruptible);
+EXPORT_SYMBOL(rtnl_sem);
+EXPORT_SYMBOL(rtnl_unlock);
diff --git a/net/core/scm.c b/net/core/scm.c
new file mode 100644
index 00000000000..a2ebf30f6aa
--- /dev/null
+++ b/net/core/scm.c
@@ -0,0 +1,291 @@
+/* scm.c - Socket level control messages processing.
+ *
+ * Author:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *              Alignment and value checking mods by Craig Metz
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/stat.h>
+#include <linux/socket.h>
+#include <linux/file.h>
+#include <linux/fcntl.h>
+#include <linux/net.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/security.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include <net/protocol.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/compat.h>
+#include <net/scm.h>
+
+
+/*
+ *	Only allow a user to send credentials, that they could set with 
+ *	setu(g)id.
+ */
+
+static __inline__ int scm_check_creds(struct ucred *creds)
+{
+	if ((creds->pid == current->tgid || capable(CAP_SYS_ADMIN)) &&
+	    ((creds->uid == current->uid || creds->uid == current->euid ||
+	      creds->uid == current->suid) || capable(CAP_SETUID)) &&
+	    ((creds->gid == current->gid || creds->gid == current->egid ||
+	      creds->gid == current->sgid) || capable(CAP_SETGID))) {
+	       return 0;
+	}
+	return -EPERM;
+}
+
+static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
+{
+	int *fdp = (int*)CMSG_DATA(cmsg);
+	struct scm_fp_list *fpl = *fplp;
+	struct file **fpp;
+	int i, num;
+
+	num = (cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)))/sizeof(int);
+
+	if (num <= 0)
+		return 0;
+
+	if (num > SCM_MAX_FD)
+		return -EINVAL;
+
+	if (!fpl)
+	{
+		fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
+		if (!fpl)
+			return -ENOMEM;
+		*fplp = fpl;
+		fpl->count = 0;
+	}
+	fpp = &fpl->fp[fpl->count];
+
+	if (fpl->count + num > SCM_MAX_FD)
+		return -EINVAL;
+	
+	/*
+	 *	Verify the descriptors and increment the usage count.
+	 */
+	 
+	for (i=0; i< num; i++)
+	{
+		int fd = fdp[i];
+		struct file *file;
+
+		if (fd < 0 || !(file = fget(fd)))
+			return -EBADF;
+		*fpp++ = file;
+		fpl->count++;
+	}
+	return num;
+}
+
+void __scm_destroy(struct scm_cookie *scm)
+{
+	struct scm_fp_list *fpl = scm->fp;
+	int i;
+
+	if (fpl) {
+		scm->fp = NULL;
+		for (i=fpl->count-1; i>=0; i--)
+			fput(fpl->fp[i]);
+		kfree(fpl);
+	}
+}
+
+int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
+{
+	struct cmsghdr *cmsg;
+	int err;
+
+	for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg))
+	{
+		err = -EINVAL;
+
+		/* Verify that cmsg_len is at least sizeof(struct cmsghdr) */
+		/* The first check was omitted in <= 2.2.5. The reasoning was
+		   that parser checks cmsg_len in any case, so that
+		   additional check would be work duplication.
+		   But if cmsg_level is not SOL_SOCKET, we do not check 
+		   for too short ancillary data object at all! Oops.
+		   OK, let's add it...
+		 */
+		if (!CMSG_OK(msg, cmsg))
+			goto error;
+
+		if (cmsg->cmsg_level != SOL_SOCKET)
+			continue;
+
+		switch (cmsg->cmsg_type)
+		{
+		case SCM_RIGHTS:
+			err=scm_fp_copy(cmsg, &p->fp);
+			if (err<0)
+				goto error;
+			break;
+		case SCM_CREDENTIALS:
+			if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct ucred)))
+				goto error;
+			memcpy(&p->creds, CMSG_DATA(cmsg), sizeof(struct ucred));
+			err = scm_check_creds(&p->creds);
+			if (err)
+				goto error;
+			break;
+		default:
+			goto error;
+		}
+	}
+
+	if (p->fp && !p->fp->count)
+	{
+		kfree(p->fp);
+		p->fp = NULL;
+	}
+	return 0;
+	
+error:
+	scm_destroy(p);
+	return err;
+}
+
+int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
+{
+	struct cmsghdr __user *cm = (struct cmsghdr __user *)msg->msg_control;
+	struct cmsghdr cmhdr;
+	int cmlen = CMSG_LEN(len);
+	int err;
+
+	if (MSG_CMSG_COMPAT & msg->msg_flags)
+		return put_cmsg_compat(msg, level, type, len, data);
+
+	if (cm==NULL || msg->msg_controllen < sizeof(*cm)) {
+		msg->msg_flags |= MSG_CTRUNC;
+		return 0; /* XXX: return error? check spec. */
+	}
+	if (msg->msg_controllen < cmlen) {
+		msg->msg_flags |= MSG_CTRUNC;
+		cmlen = msg->msg_controllen;
+	}
+	cmhdr.cmsg_level = level;
+	cmhdr.cmsg_type = type;
+	cmhdr.cmsg_len = cmlen;
+
+	err = -EFAULT;
+	if (copy_to_user(cm, &cmhdr, sizeof cmhdr))
+		goto out; 
+	if (copy_to_user(CMSG_DATA(cm), data, cmlen - sizeof(struct cmsghdr)))
+		goto out;
+	cmlen = CMSG_SPACE(len);
+	msg->msg_control += cmlen;
+	msg->msg_controllen -= cmlen;
+	err = 0;
+out:
+	return err;
+}
+
+void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
+{
+	struct cmsghdr __user *cm = (struct cmsghdr __user*)msg->msg_control;
+
+	int fdmax = 0;
+	int fdnum = scm->fp->count;
+	struct file **fp = scm->fp->fp;
+	int __user *cmfptr;
+	int err = 0, i;
+
+	if (MSG_CMSG_COMPAT & msg->msg_flags) {
+		scm_detach_fds_compat(msg, scm);
+		return;
+	}
+
+	if (msg->msg_controllen > sizeof(struct cmsghdr))
+		fdmax = ((msg->msg_controllen - sizeof(struct cmsghdr))
+			 / sizeof(int));
+
+	if (fdnum < fdmax)
+		fdmax = fdnum;
+
+	for (i=0, cmfptr=(int __user *)CMSG_DATA(cm); i<fdmax; i++, cmfptr++)
+	{
+		int new_fd;
+		err = security_file_receive(fp[i]);
+		if (err)
+			break;
+		err = get_unused_fd();
+		if (err < 0)
+			break;
+		new_fd = err;
+		err = put_user(new_fd, cmfptr);
+		if (err) {
+			put_unused_fd(new_fd);
+			break;
+		}
+		/* Bump the usage count and install the file. */
+		get_file(fp[i]);
+		fd_install(new_fd, fp[i]);
+	}
+
+	if (i > 0)
+	{
+		int cmlen = CMSG_LEN(i*sizeof(int));
+		if (!err)
+			err = put_user(SOL_SOCKET, &cm->cmsg_level);
+		if (!err)
+			err = put_user(SCM_RIGHTS, &cm->cmsg_type);
+		if (!err)
+			err = put_user(cmlen, &cm->cmsg_len);
+		if (!err) {
+			cmlen = CMSG_SPACE(i*sizeof(int));
+			msg->msg_control += cmlen;
+			msg->msg_controllen -= cmlen;
+		}
+	}
+	if (i < fdnum || (fdnum && fdmax <= 0))
+		msg->msg_flags |= MSG_CTRUNC;
+
+	/*
+	 * All of the files that fit in the message have had their
+	 * usage counts incremented, so we just free the list.
+	 */
+	__scm_destroy(scm);
+}
+
+struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
+{
+	struct scm_fp_list *new_fpl;
+	int i;
+
+	if (!fpl)
+		return NULL;
+
+	new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL);
+	if (new_fpl) {
+		for (i=fpl->count-1; i>=0; i--)
+			get_file(fpl->fp[i]);
+		memcpy(new_fpl, fpl, sizeof(*fpl));
+	}
+	return new_fpl;
+}
+
+EXPORT_SYMBOL(__scm_destroy);
+EXPORT_SYMBOL(__scm_send);
+EXPORT_SYMBOL(put_cmsg);
+EXPORT_SYMBOL(scm_detach_fds);
+EXPORT_SYMBOL(scm_fp_dup);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
new file mode 100644
index 00000000000..bf02ca9f80a
--- /dev/null
+++ b/net/core/skbuff.c
@@ -0,0 +1,1460 @@
+/*
+ *	Routines having to do with the 'struct sk_buff' memory handlers.
+ *
+ *	Authors:	Alan Cox <iiitac@pyr.swan.ac.uk>
+ *			Florian La Roche <rzsfl@rz.uni-sb.de>
+ *
+ *	Version:	$Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $
+ *
+ *	Fixes:
+ *		Alan Cox	:	Fixed the worst of the load
+ *					balancer bugs.
+ *		Dave Platt	:	Interrupt stacking fix.
+ *	Richard Kooijman	:	Timestamp fixes.
+ *		Alan Cox	:	Changed buffer format.
+ *		Alan Cox	:	destructor hook for AF_UNIX etc.
+ *		Linus Torvalds	:	Better skb_clone.
+ *		Alan Cox	:	Added skb_copy.
+ *		Alan Cox	:	Added all the changed routines Linus
+ *					only put in the headers
+ *		Ray VanTassle	:	Fixed --skb->lock in free
+ *		Alan Cox	:	skb_copy copy arp field
+ *		Andi Kleen	:	slabified it.
+ *		Robert Olsson	:	Removed skb_head_pool
+ *
+ *	NOTE:
+ *		The __skb_ routines should be called with interrupts
+ *	disabled, or you better be *real* sure that the operation is atomic
+ *	with respect to whatever list is being frobbed (e.g. via lock_sock()
+ *	or via disabling bottom half handlers, etc).
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+/*
+ *	The functions in this file will not compile correctly with gcc 2.4.x
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/slab.h>
+#include <linux/netdevice.h>
+#ifdef CONFIG_NET_CLS_ACT
+#include <net/pkt_sched.h>
+#endif
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/cache.h>
+#include <linux/rtnetlink.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+
+#include <net/protocol.h>
+#include <net/dst.h>
+#include <net/sock.h>
+#include <net/checksum.h>
+#include <net/xfrm.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+static kmem_cache_t *skbuff_head_cache;
+
+/*
+ *	Keep out-of-line to prevent kernel bloat.
+ *	__builtin_return_address is not used because it is not always
+ *	reliable.
+ */
+
+/**
+ *	skb_over_panic	- 	private function
+ *	@skb: buffer
+ *	@sz: size
+ *	@here: address
+ *
+ *	Out of line support code for skb_put(). Not user callable.
+ */
+void skb_over_panic(struct sk_buff *skb, int sz, void *here)
+{
+	printk(KERN_INFO "skput:over: %p:%d put:%d dev:%s",
+		here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
+	BUG();
+}
+
+/**
+ *	skb_under_panic	- 	private function
+ *	@skb: buffer
+ *	@sz: size
+ *	@here: address
+ *
+ *	Out of line support code for skb_push(). Not user callable.
+ */
+
+void skb_under_panic(struct sk_buff *skb, int sz, void *here)
+{
+	printk(KERN_INFO "skput:under: %p:%d put:%d dev:%s",
+               here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
+	BUG();
+}
+
+/* 	Allocate a new skbuff. We do this ourselves so we can fill in a few
+ *	'private' fields and also do memory statistics to find all the
+ *	[BEEP] leaks.
+ *
+ */
+
+/**
+ *	alloc_skb	-	allocate a network buffer
+ *	@size: size to allocate
+ *	@gfp_mask: allocation mask
+ *
+ *	Allocate a new &sk_buff. The returned buffer has no headroom and a
+ *	tail room of size bytes. The object has a reference count of one.
+ *	The return is the buffer. On a failure the return is %NULL.
+ *
+ *	Buffers may only be allocated from interrupts using a @gfp_mask of
+ *	%GFP_ATOMIC.
+ */
+struct sk_buff *alloc_skb(unsigned int size, int gfp_mask)
+{
+	struct sk_buff *skb;
+	u8 *data;
+
+	/* Get the HEAD */
+	skb = kmem_cache_alloc(skbuff_head_cache,
+			       gfp_mask & ~__GFP_DMA);
+	if (!skb)
+		goto out;
+
+	/* Get the DATA. Size must match skb_add_mtu(). */
+	size = SKB_DATA_ALIGN(size);
+	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
+	if (!data)
+		goto nodata;
+
+	memset(skb, 0, offsetof(struct sk_buff, truesize));
+	skb->truesize = size + sizeof(struct sk_buff);
+	atomic_set(&skb->users, 1);
+	skb->head = data;
+	skb->data = data;
+	skb->tail = data;
+	skb->end  = data + size;
+
+	atomic_set(&(skb_shinfo(skb)->dataref), 1);
+	skb_shinfo(skb)->nr_frags  = 0;
+	skb_shinfo(skb)->tso_size = 0;
+	skb_shinfo(skb)->tso_segs = 0;
+	skb_shinfo(skb)->frag_list = NULL;
+out:
+	return skb;
+nodata:
+	kmem_cache_free(skbuff_head_cache, skb);
+	skb = NULL;
+	goto out;
+}
+
+/**
+ *	alloc_skb_from_cache	-	allocate a network buffer
+ *	@cp: kmem_cache from which to allocate the data area
+ *           (object size must be big enough for @size bytes + skb overheads)
+ *	@size: size to allocate
+ *	@gfp_mask: allocation mask
+ *
+ *	Allocate a new &sk_buff. The returned buffer has no headroom and
+ *	tail room of size bytes. The object has a reference count of one.
+ *	The return is the buffer. On a failure the return is %NULL.
+ *
+ *	Buffers may only be allocated from interrupts using a @gfp_mask of
+ *	%GFP_ATOMIC.
+ */
+struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
+				     unsigned int size, int gfp_mask)
+{
+	struct sk_buff *skb;
+	u8 *data;
+
+	/* Get the HEAD */
+	skb = kmem_cache_alloc(skbuff_head_cache,
+			       gfp_mask & ~__GFP_DMA);
+	if (!skb)
+		goto out;
+
+	/* Get the DATA. */
+	size = SKB_DATA_ALIGN(size);
+	data = kmem_cache_alloc(cp, gfp_mask);
+	if (!data)
+		goto nodata;
+
+	memset(skb, 0, offsetof(struct sk_buff, truesize));
+	skb->truesize = size + sizeof(struct sk_buff);
+	atomic_set(&skb->users, 1);
+	skb->head = data;
+	skb->data = data;
+	skb->tail = data;
+	skb->end  = data + size;
+
+	atomic_set(&(skb_shinfo(skb)->dataref), 1);
+	skb_shinfo(skb)->nr_frags  = 0;
+	skb_shinfo(skb)->tso_size = 0;
+	skb_shinfo(skb)->tso_segs = 0;
+	skb_shinfo(skb)->frag_list = NULL;
+out:
+	return skb;
+nodata:
+	kmem_cache_free(skbuff_head_cache, skb);
+	skb = NULL;
+	goto out;
+}
+
+
+static void skb_drop_fraglist(struct sk_buff *skb)
+{
+	struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+	skb_shinfo(skb)->frag_list = NULL;
+
+	do {
+		struct sk_buff *this = list;
+		list = list->next;
+		kfree_skb(this);
+	} while (list);
+}
+
+static void skb_clone_fraglist(struct sk_buff *skb)
+{
+	struct sk_buff *list;
+
+	for (list = skb_shinfo(skb)->frag_list; list; list = list->next)
+		skb_get(list);
+}
+
+void skb_release_data(struct sk_buff *skb)
+{
+	if (!skb->cloned ||
+	    !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
+			       &skb_shinfo(skb)->dataref)) {
+		if (skb_shinfo(skb)->nr_frags) {
+			int i;
+			for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+				put_page(skb_shinfo(skb)->frags[i].page);
+		}
+
+		if (skb_shinfo(skb)->frag_list)
+			skb_drop_fraglist(skb);
+
+		kfree(skb->head);
+	}
+}
+
+/*
+ *	Free an skbuff by memory without cleaning the state.
+ */
+void kfree_skbmem(struct sk_buff *skb)
+{
+	skb_release_data(skb);
+	kmem_cache_free(skbuff_head_cache, skb);
+}
+
+/**
+ *	__kfree_skb - private function
+ *	@skb: buffer
+ *
+ *	Free an sk_buff. Release anything attached to the buffer.
+ *	Clean the state. This is an internal helper function. Users should
+ *	always call kfree_skb
+ */
+
+void __kfree_skb(struct sk_buff *skb)
+{
+	if (skb->list) {
+	 	printk(KERN_WARNING "Warning: kfree_skb passed an skb still "
+		       "on a list (from %p).\n", NET_CALLER(skb));
+		BUG();
+	}
+
+	dst_release(skb->dst);
+#ifdef CONFIG_XFRM
+	secpath_put(skb->sp);
+#endif
+	if(skb->destructor) {
+		if (in_irq())
+			printk(KERN_WARNING "Warning: kfree_skb on "
+					    "hard IRQ %p\n", NET_CALLER(skb));
+		skb->destructor(skb);
+	}
+#ifdef CONFIG_NETFILTER
+	nf_conntrack_put(skb->nfct);
+#ifdef CONFIG_BRIDGE_NETFILTER
+	nf_bridge_put(skb->nf_bridge);
+#endif
+#endif
+/* XXX: IS this still necessary? - JHS */
+#ifdef CONFIG_NET_SCHED
+	skb->tc_index = 0;
+#ifdef CONFIG_NET_CLS_ACT
+	skb->tc_verd = 0;
+	skb->tc_classid = 0;
+#endif
+#endif
+
+	kfree_skbmem(skb);
+}
+
+/**
+ *	skb_clone	-	duplicate an sk_buff
+ *	@skb: buffer to clone
+ *	@gfp_mask: allocation priority
+ *
+ *	Duplicate an &sk_buff. The new one is not owned by a socket. Both
+ *	copies share the same packet data but not structure. The new
+ *	buffer has a reference count of 1. If the allocation fails the
+ *	function returns %NULL otherwise the new buffer is returned.
+ *
+ *	If this function is called from an interrupt gfp_mask() must be
+ *	%GFP_ATOMIC.
+ */
+
+struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
+{
+	struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
+
+	if (!n) 
+		return NULL;
+
+#define C(x) n->x = skb->x
+
+	n->next = n->prev = NULL;
+	n->list = NULL;
+	n->sk = NULL;
+	C(stamp);
+	C(dev);
+	C(real_dev);
+	C(h);
+	C(nh);
+	C(mac);
+	C(dst);
+	dst_clone(skb->dst);
+	C(sp);
+#ifdef CONFIG_INET
+	secpath_get(skb->sp);
+#endif
+	memcpy(n->cb, skb->cb, sizeof(skb->cb));
+	C(len);
+	C(data_len);
+	C(csum);
+	C(local_df);
+	n->cloned = 1;
+	n->nohdr = 0;
+	C(pkt_type);
+	C(ip_summed);
+	C(priority);
+	C(protocol);
+	C(security);
+	n->destructor = NULL;
+#ifdef CONFIG_NETFILTER
+	C(nfmark);
+	C(nfcache);
+	C(nfct);
+	nf_conntrack_get(skb->nfct);
+	C(nfctinfo);
+#ifdef CONFIG_NETFILTER_DEBUG
+	C(nf_debug);
+#endif
+#ifdef CONFIG_BRIDGE_NETFILTER
+	C(nf_bridge);
+	nf_bridge_get(skb->nf_bridge);
+#endif
+#endif /*CONFIG_NETFILTER*/
+#if defined(CONFIG_HIPPI)
+	C(private);
+#endif
+#ifdef CONFIG_NET_SCHED
+	C(tc_index);
+#ifdef CONFIG_NET_CLS_ACT
+	n->tc_verd = SET_TC_VERD(skb->tc_verd,0);
+	n->tc_verd = CLR_TC_OK2MUNGE(skb->tc_verd);
+	n->tc_verd = CLR_TC_MUNGED(skb->tc_verd);
+	C(input_dev);
+	C(tc_classid);
+#endif
+
+#endif
+	C(truesize);
+	atomic_set(&n->users, 1);
+	C(head);
+	C(data);
+	C(tail);
+	C(end);
+
+	atomic_inc(&(skb_shinfo(skb)->dataref));
+	skb->cloned = 1;
+
+	return n;
+}
+
+static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
+{
+	/*
+	 *	Shift between the two data areas in bytes
+	 */
+	unsigned long offset = new->data - old->data;
+
+	new->list	= NULL;
+	new->sk		= NULL;
+	new->dev	= old->dev;
+	new->real_dev	= old->real_dev;
+	new->priority	= old->priority;
+	new->protocol	= old->protocol;
+	new->dst	= dst_clone(old->dst);
+#ifdef CONFIG_INET
+	new->sp		= secpath_get(old->sp);
+#endif
+	new->h.raw	= old->h.raw + offset;
+	new->nh.raw	= old->nh.raw + offset;
+	new->mac.raw	= old->mac.raw + offset;
+	memcpy(new->cb, old->cb, sizeof(old->cb));
+	new->local_df	= old->local_df;
+	new->pkt_type	= old->pkt_type;
+	new->stamp	= old->stamp;
+	new->destructor = NULL;
+	new->security	= old->security;
+#ifdef CONFIG_NETFILTER
+	new->nfmark	= old->nfmark;
+	new->nfcache	= old->nfcache;
+	new->nfct	= old->nfct;
+	nf_conntrack_get(old->nfct);
+	new->nfctinfo	= old->nfctinfo;
+#ifdef CONFIG_NETFILTER_DEBUG
+	new->nf_debug	= old->nf_debug;
+#endif
+#ifdef CONFIG_BRIDGE_NETFILTER
+	new->nf_bridge	= old->nf_bridge;
+	nf_bridge_get(old->nf_bridge);
+#endif
+#endif
+#ifdef CONFIG_NET_SCHED
+#ifdef CONFIG_NET_CLS_ACT
+	new->tc_verd = old->tc_verd;
+#endif
+	new->tc_index	= old->tc_index;
+#endif
+	atomic_set(&new->users, 1);
+	skb_shinfo(new)->tso_size = skb_shinfo(old)->tso_size;
+	skb_shinfo(new)->tso_segs = skb_shinfo(old)->tso_segs;
+}
+
+/**
+ *	skb_copy	-	create private copy of an sk_buff
+ *	@skb: buffer to copy
+ *	@gfp_mask: allocation priority
+ *
+ *	Make a copy of both an &sk_buff and its data. This is used when the
+ *	caller wishes to modify the data and needs a private copy of the
+ *	data to alter. Returns %NULL on failure or the pointer to the buffer
+ *	on success. The returned buffer has a reference count of 1.
+ *
+ *	As by-product this function converts non-linear &sk_buff to linear
+ *	one, so that &sk_buff becomes completely private and caller is allowed
+ *	to modify all the data of returned buffer. This means that this
+ *	function is not recommended for use in circumstances when only
+ *	header is going to be modified. Use pskb_copy() instead.
+ */
+
+struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
+{
+	int headerlen = skb->data - skb->head;
+	/*
+	 *	Allocate the copy buffer
+	 */
+	struct sk_buff *n = alloc_skb(skb->end - skb->head + skb->data_len,
+				      gfp_mask);
+	if (!n)
+		return NULL;
+
+	/* Set the data pointer */
+	skb_reserve(n, headerlen);
+	/* Set the tail pointer and length */
+	skb_put(n, skb->len);
+	n->csum	     = skb->csum;
+	n->ip_summed = skb->ip_summed;
+
+	if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len))
+		BUG();
+
+	copy_skb_header(n, skb);
+	return n;
+}
+
+
+/**
+ *	pskb_copy	-	create copy of an sk_buff with private head.
+ *	@skb: buffer to copy
+ *	@gfp_mask: allocation priority
+ *
+ *	Make a copy of both an &sk_buff and part of its data, located
+ *	in header. Fragmented data remain shared. This is used when
+ *	the caller wishes to modify only header of &sk_buff and needs
+ *	private copy of the header to alter. Returns %NULL on failure
+ *	or the pointer to the buffer on success.
+ *	The returned buffer has a reference count of 1.
+ */
+
+struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask)
+{
+	/*
+	 *	Allocate the copy buffer
+	 */
+	struct sk_buff *n = alloc_skb(skb->end - skb->head, gfp_mask);
+
+	if (!n)
+		goto out;
+
+	/* Set the data pointer */
+	skb_reserve(n, skb->data - skb->head);
+	/* Set the tail pointer and length */
+	skb_put(n, skb_headlen(skb));
+	/* Copy the bytes */
+	memcpy(n->data, skb->data, n->len);
+	n->csum	     = skb->csum;
+	n->ip_summed = skb->ip_summed;
+
+	n->data_len  = skb->data_len;
+	n->len	     = skb->len;
+
+	if (skb_shinfo(skb)->nr_frags) {
+		int i;
+
+		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+			skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
+			get_page(skb_shinfo(n)->frags[i].page);
+		}
+		skb_shinfo(n)->nr_frags = i;
+	}
+
+	if (skb_shinfo(skb)->frag_list) {
+		skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
+		skb_clone_fraglist(n);
+	}
+
+	copy_skb_header(n, skb);
+out:
+	return n;
+}
+
+/**
+ *	pskb_expand_head - reallocate header of &sk_buff
+ *	@skb: buffer to reallocate
+ *	@nhead: room to add at head
+ *	@ntail: room to add at tail
+ *	@gfp_mask: allocation priority
+ *
+ *	Expands (or creates identical copy, if &nhead and &ntail are zero)
+ *	header of skb. &sk_buff itself is not changed. &sk_buff MUST have
+ *	reference count of 1. Returns zero in the case of success or error,
+ *	if expansion failed. In the last case, &sk_buff is not changed.
+ *
+ *	All the pointers pointing into skb header may change and must be
+ *	reloaded after call to this function.
+ */
+
+int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask)
+{
+	int i;
+	u8 *data;
+	int size = nhead + (skb->end - skb->head) + ntail;
+	long off;
+
+	if (skb_shared(skb))
+		BUG();
+
+	size = SKB_DATA_ALIGN(size);
+
+	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
+	if (!data)
+		goto nodata;
+
+	/* Copy only real data... and, alas, header. This should be
+	 * optimized for the cases when header is void. */
+	memcpy(data + nhead, skb->head, skb->tail - skb->head);
+	memcpy(data + size, skb->end, sizeof(struct skb_shared_info));
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+		get_page(skb_shinfo(skb)->frags[i].page);
+
+	if (skb_shinfo(skb)->frag_list)
+		skb_clone_fraglist(skb);
+
+	skb_release_data(skb);
+
+	off = (data + nhead) - skb->head;
+
+	skb->head     = data;
+	skb->end      = data + size;
+	skb->data    += off;
+	skb->tail    += off;
+	skb->mac.raw += off;
+	skb->h.raw   += off;
+	skb->nh.raw  += off;
+	skb->cloned   = 0;
+	skb->nohdr    = 0;
+	atomic_set(&skb_shinfo(skb)->dataref, 1);
+	return 0;
+
+nodata:
+	return -ENOMEM;
+}
+
+/* Make private copy of skb with writable head and some headroom */
+
+struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
+{
+	struct sk_buff *skb2;
+	int delta = headroom - skb_headroom(skb);
+
+	if (delta <= 0)
+		skb2 = pskb_copy(skb, GFP_ATOMIC);
+	else {
+		skb2 = skb_clone(skb, GFP_ATOMIC);
+		if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0,
+					     GFP_ATOMIC)) {
+			kfree_skb(skb2);
+			skb2 = NULL;
+		}
+	}
+	return skb2;
+}
+
+
+/**
+ *	skb_copy_expand	-	copy and expand sk_buff
+ *	@skb: buffer to copy
+ *	@newheadroom: new free bytes at head
+ *	@newtailroom: new free bytes at tail
+ *	@gfp_mask: allocation priority
+ *
+ *	Make a copy of both an &sk_buff and its data and while doing so
+ *	allocate additional space.
+ *
+ *	This is used when the caller wishes to modify the data and needs a
+ *	private copy of the data to alter as well as more space for new fields.
+ *	Returns %NULL on failure or the pointer to the buffer
+ *	on success. The returned buffer has a reference count of 1.
+ *
+ *	You must pass %GFP_ATOMIC as the allocation priority if this function
+ *	is called from an interrupt.
+ *
+ *	BUG ALERT: ip_summed is not copied. Why does this work? Is it used
+ *	only by netfilter in the cases when checksum is recalculated? --ANK
+ */
+struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
+				int newheadroom, int newtailroom, int gfp_mask)
+{
+	/*
+	 *	Allocate the copy buffer
+	 */
+	struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom,
+				      gfp_mask);
+	int head_copy_len, head_copy_off;
+
+	if (!n)
+		return NULL;
+
+	skb_reserve(n, newheadroom);
+
+	/* Set the tail pointer and length */
+	skb_put(n, skb->len);
+
+	head_copy_len = skb_headroom(skb);
+	head_copy_off = 0;
+	if (newheadroom <= head_copy_len)
+		head_copy_len = newheadroom;
+	else
+		head_copy_off = newheadroom - head_copy_len;
+
+	/* Copy the linear header and data. */
+	if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
+			  skb->len + head_copy_len))
+		BUG();
+
+	copy_skb_header(n, skb);
+
+	return n;
+}
+
+/**
+ *	skb_pad			-	zero pad the tail of an skb
+ *	@skb: buffer to pad
+ *	@pad: space to pad
+ *
+ *	Ensure that a buffer is followed by a padding area that is zero
+ *	filled. Used by network drivers which may DMA or transfer data
+ *	beyond the buffer end onto the wire.
+ *
+ *	May return NULL in out of memory cases.
+ */
+ 
+struct sk_buff *skb_pad(struct sk_buff *skb, int pad)
+{
+	struct sk_buff *nskb;
+	
+	/* If the skbuff is non linear tailroom is always zero.. */
+	if (skb_tailroom(skb) >= pad) {
+		memset(skb->data+skb->len, 0, pad);
+		return skb;
+	}
+	
+	nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + pad, GFP_ATOMIC);
+	kfree_skb(skb);
+	if (nskb)
+		memset(nskb->data+nskb->len, 0, pad);
+	return nskb;
+}	
+ 
+/* Trims skb to length len. It can change skb pointers, if "realloc" is 1.
+ * If realloc==0 and trimming is impossible without change of data,
+ * it is BUG().
+ */
+
+int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc)
+{
+	int offset = skb_headlen(skb);
+	int nfrags = skb_shinfo(skb)->nr_frags;
+	int i;
+
+	for (i = 0; i < nfrags; i++) {
+		int end = offset + skb_shinfo(skb)->frags[i].size;
+		if (end > len) {
+			if (skb_cloned(skb)) {
+				if (!realloc)
+					BUG();
+				if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+					return -ENOMEM;
+			}
+			if (len <= offset) {
+				put_page(skb_shinfo(skb)->frags[i].page);
+				skb_shinfo(skb)->nr_frags--;
+			} else {
+				skb_shinfo(skb)->frags[i].size = len - offset;
+			}
+		}
+		offset = end;
+	}
+
+	if (offset < len) {
+		skb->data_len -= skb->len - len;
+		skb->len       = len;
+	} else {
+		if (len <= skb_headlen(skb)) {
+			skb->len      = len;
+			skb->data_len = 0;
+			skb->tail     = skb->data + len;
+			if (skb_shinfo(skb)->frag_list && !skb_cloned(skb))
+				skb_drop_fraglist(skb);
+		} else {
+			skb->data_len -= skb->len - len;
+			skb->len       = len;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ *	__pskb_pull_tail - advance tail of skb header
+ *	@skb: buffer to reallocate
+ *	@delta: number of bytes to advance tail
+ *
+ *	The function makes a sense only on a fragmented &sk_buff,
+ *	it expands header moving its tail forward and copying necessary
+ *	data from fragmented part.
+ *
+ *	&sk_buff MUST have reference count of 1.
+ *
+ *	Returns %NULL (and &sk_buff does not change) if pull failed
+ *	or value of new tail of skb in the case of success.
+ *
+ *	All the pointers pointing into skb header may change and must be
+ *	reloaded after call to this function.
+ */
+
+/* Moves tail of skb head forward, copying data from fragmented part,
+ * when it is necessary.
+ * 1. It may fail due to malloc failure.
+ * 2. It may change skb pointers.
+ *
+ * It is pretty complicated. Luckily, it is called only in exceptional cases.
+ */
+unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
+{
+	/* If skb has not enough free space at tail, get new one
+	 * plus 128 bytes for future expansions. If we have enough
+	 * room at tail, reallocate without expansion only if skb is cloned.
+	 */
+	int i, k, eat = (skb->tail + delta) - skb->end;
+
+	if (eat > 0 || skb_cloned(skb)) {
+		if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0,
+				     GFP_ATOMIC))
+			return NULL;
+	}
+
+	if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta))
+		BUG();
+
+	/* Optimization: no fragments, no reasons to preestimate
+	 * size of pulled pages. Superb.
+	 */
+	if (!skb_shinfo(skb)->frag_list)
+		goto pull_pages;
+
+	/* Estimate size of pulled pages. */
+	eat = delta;
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		if (skb_shinfo(skb)->frags[i].size >= eat)
+			goto pull_pages;
+		eat -= skb_shinfo(skb)->frags[i].size;
+	}
+
+	/* If we need update frag list, we are in troubles.
+	 * Certainly, it possible to add an offset to skb data,
+	 * but taking into account that pulling is expected to
+	 * be very rare operation, it is worth to fight against
+	 * further bloating skb head and crucify ourselves here instead.
+	 * Pure masohism, indeed. 8)8)
+	 */
+	if (eat) {
+		struct sk_buff *list = skb_shinfo(skb)->frag_list;
+		struct sk_buff *clone = NULL;
+		struct sk_buff *insp = NULL;
+
+		do {
+			if (!list)
+				BUG();
+
+			if (list->len <= eat) {
+				/* Eaten as whole. */
+				eat -= list->len;
+				list = list->next;
+				insp = list;
+			} else {
+				/* Eaten partially. */
+
+				if (skb_shared(list)) {
+					/* Sucks! We need to fork list. :-( */
+					clone = skb_clone(list, GFP_ATOMIC);
+					if (!clone)
+						return NULL;
+					insp = list->next;
+					list = clone;
+				} else {
+					/* This may be pulled without
+					 * problems. */
+					insp = list;
+				}
+				if (!pskb_pull(list, eat)) {
+					if (clone)
+						kfree_skb(clone);
+					return NULL;
+				}
+				break;
+			}
+		} while (eat);
+
+		/* Free pulled out fragments. */
+		while ((list = skb_shinfo(skb)->frag_list) != insp) {
+			skb_shinfo(skb)->frag_list = list->next;
+			kfree_skb(list);
+		}
+		/* And insert new clone at head. */
+		if (clone) {
+			clone->next = list;
+			skb_shinfo(skb)->frag_list = clone;
+		}
+	}
+	/* Success! Now we may commit changes to skb data. */
+
+pull_pages:
+	eat = delta;
+	k = 0;
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		if (skb_shinfo(skb)->frags[i].size <= eat) {
+			put_page(skb_shinfo(skb)->frags[i].page);
+			eat -= skb_shinfo(skb)->frags[i].size;
+		} else {
+			skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
+			if (eat) {
+				skb_shinfo(skb)->frags[k].page_offset += eat;
+				skb_shinfo(skb)->frags[k].size -= eat;
+				eat = 0;
+			}
+			k++;
+		}
+	}
+	skb_shinfo(skb)->nr_frags = k;
+
+	skb->tail     += delta;
+	skb->data_len -= delta;
+
+	return skb->tail;
+}
+
+/* Copy some data bits from skb to kernel buffer. */
+
+int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
+{
+	int i, copy;
+	int start = skb_headlen(skb);
+
+	if (offset > (int)skb->len - len)
+		goto fault;
+
+	/* Copy header. */
+	if ((copy = start - offset) > 0) {
+		if (copy > len)
+			copy = len;
+		memcpy(to, skb->data + offset, copy);
+		if ((len -= copy) == 0)
+			return 0;
+		offset += copy;
+		to     += copy;
+	}
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+
+		BUG_TRAP(start <= offset + len);
+
+		end = start + skb_shinfo(skb)->frags[i].size;
+		if ((copy = end - offset) > 0) {
+			u8 *vaddr;
+
+			if (copy > len)
+				copy = len;
+
+			vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
+			memcpy(to,
+			       vaddr + skb_shinfo(skb)->frags[i].page_offset+
+			       offset - start, copy);
+			kunmap_skb_frag(vaddr);
+
+			if ((len -= copy) == 0)
+				return 0;
+			offset += copy;
+			to     += copy;
+		}
+		start = end;
+	}
+
+	if (skb_shinfo(skb)->frag_list) {
+		struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+		for (; list; list = list->next) {
+			int end;
+
+			BUG_TRAP(start <= offset + len);
+
+			end = start + list->len;
+			if ((copy = end - offset) > 0) {
+				if (copy > len)
+					copy = len;
+				if (skb_copy_bits(list, offset - start,
+						  to, copy))
+					goto fault;
+				if ((len -= copy) == 0)
+					return 0;
+				offset += copy;
+				to     += copy;
+			}
+			start = end;
+		}
+	}
+	if (!len)
+		return 0;
+
+fault:
+	return -EFAULT;
+}
+
+/* Checksum skb data. */
+
+unsigned int skb_checksum(const struct sk_buff *skb, int offset,
+			  int len, unsigned int csum)
+{
+	int start = skb_headlen(skb);
+	int i, copy = start - offset;
+	int pos = 0;
+
+	/* Checksum header. */
+	if (copy > 0) {
+		if (copy > len)
+			copy = len;
+		csum = csum_partial(skb->data + offset, copy, csum);
+		if ((len -= copy) == 0)
+			return csum;
+		offset += copy;
+		pos	= copy;
+	}
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+
+		BUG_TRAP(start <= offset + len);
+
+		end = start + skb_shinfo(skb)->frags[i].size;
+		if ((copy = end - offset) > 0) {
+			unsigned int csum2;
+			u8 *vaddr;
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+			if (copy > len)
+				copy = len;
+			vaddr = kmap_skb_frag(frag);
+			csum2 = csum_partial(vaddr + frag->page_offset +
+					     offset - start, copy, 0);
+			kunmap_skb_frag(vaddr);
+			csum = csum_block_add(csum, csum2, pos);
+			if (!(len -= copy))
+				return csum;
+			offset += copy;
+			pos    += copy;
+		}
+		start = end;
+	}
+
+	if (skb_shinfo(skb)->frag_list) {
+		struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+		for (; list; list = list->next) {
+			int end;
+
+			BUG_TRAP(start <= offset + len);
+
+			end = start + list->len;
+			if ((copy = end - offset) > 0) {
+				unsigned int csum2;
+				if (copy > len)
+					copy = len;
+				csum2 = skb_checksum(list, offset - start,
+						     copy, 0);
+				csum = csum_block_add(csum, csum2, pos);
+				if ((len -= copy) == 0)
+					return csum;
+				offset += copy;
+				pos    += copy;
+			}
+			start = end;
+		}
+	}
+	if (len)
+		BUG();
+
+	return csum;
+}
+
+/* Both of above in one bottle. */
+
+unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
+				    u8 *to, int len, unsigned int csum)
+{
+	int start = skb_headlen(skb);
+	int i, copy = start - offset;
+	int pos = 0;
+
+	/* Copy header. */
+	if (copy > 0) {
+		if (copy > len)
+			copy = len;
+		csum = csum_partial_copy_nocheck(skb->data + offset, to,
+						 copy, csum);
+		if ((len -= copy) == 0)
+			return csum;
+		offset += copy;
+		to     += copy;
+		pos	= copy;
+	}
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+
+		BUG_TRAP(start <= offset + len);
+
+		end = start + skb_shinfo(skb)->frags[i].size;
+		if ((copy = end - offset) > 0) {
+			unsigned int csum2;
+			u8 *vaddr;
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+			if (copy > len)
+				copy = len;
+			vaddr = kmap_skb_frag(frag);
+			csum2 = csum_partial_copy_nocheck(vaddr +
+							  frag->page_offset +
+							  offset - start, to,
+							  copy, 0);
+			kunmap_skb_frag(vaddr);
+			csum = csum_block_add(csum, csum2, pos);
+			if (!(len -= copy))
+				return csum;
+			offset += copy;
+			to     += copy;
+			pos    += copy;
+		}
+		start = end;
+	}
+
+	if (skb_shinfo(skb)->frag_list) {
+		struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+		for (; list; list = list->next) {
+			unsigned int csum2;
+			int end;
+
+			BUG_TRAP(start <= offset + len);
+
+			end = start + list->len;
+			if ((copy = end - offset) > 0) {
+				if (copy > len)
+					copy = len;
+				csum2 = skb_copy_and_csum_bits(list,
+							       offset - start,
+							       to, copy, 0);
+				csum = csum_block_add(csum, csum2, pos);
+				if ((len -= copy) == 0)
+					return csum;
+				offset += copy;
+				to     += copy;
+				pos    += copy;
+			}
+			start = end;
+		}
+	}
+	if (len)
+		BUG();
+	return csum;
+}
+
+void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
+{
+	unsigned int csum;
+	long csstart;
+
+	if (skb->ip_summed == CHECKSUM_HW)
+		csstart = skb->h.raw - skb->data;
+	else
+		csstart = skb_headlen(skb);
+
+	if (csstart > skb_headlen(skb))
+		BUG();
+
+	memcpy(to, skb->data, csstart);
+
+	csum = 0;
+	if (csstart != skb->len)
+		csum = skb_copy_and_csum_bits(skb, csstart, to + csstart,
+					      skb->len - csstart, 0);
+
+	if (skb->ip_summed == CHECKSUM_HW) {
+		long csstuff = csstart + skb->csum;
+
+		*((unsigned short *)(to + csstuff)) = csum_fold(csum);
+	}
+}
+
+/**
+ *	skb_dequeue - remove from the head of the queue
+ *	@list: list to dequeue from
+ *
+ *	Remove the head of the list. The list lock is taken so the function
+ *	may be used safely with other locking list functions. The head item is
+ *	returned or %NULL if the list is empty.
+ */
+
+struct sk_buff *skb_dequeue(struct sk_buff_head *list)
+{
+	unsigned long flags;
+	struct sk_buff *result;
+
+	spin_lock_irqsave(&list->lock, flags);
+	result = __skb_dequeue(list);
+	spin_unlock_irqrestore(&list->lock, flags);
+	return result;
+}
+
+/**
+ *	skb_dequeue_tail - remove from the tail of the queue
+ *	@list: list to dequeue from
+ *
+ *	Remove the tail of the list. The list lock is taken so the function
+ *	may be used safely with other locking list functions. The tail item is
+ *	returned or %NULL if the list is empty.
+ */
+struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list)
+{
+	unsigned long flags;
+	struct sk_buff *result;
+
+	spin_lock_irqsave(&list->lock, flags);
+	result = __skb_dequeue_tail(list);
+	spin_unlock_irqrestore(&list->lock, flags);
+	return result;
+}
+
+/**
+ *	skb_queue_purge - empty a list
+ *	@list: list to empty
+ *
+ *	Delete all buffers on an &sk_buff list. Each buffer is removed from
+ *	the list and one reference dropped. This function takes the list
+ *	lock and is atomic with respect to other list locking functions.
+ */
+void skb_queue_purge(struct sk_buff_head *list)
+{
+	struct sk_buff *skb;
+	while ((skb = skb_dequeue(list)) != NULL)
+		kfree_skb(skb);
+}
+
+/**
+ *	skb_queue_head - queue a buffer at the list head
+ *	@list: list to use
+ *	@newsk: buffer to queue
+ *
+ *	Queue a buffer at the start of the list. This function takes the
+ *	list lock and can be used safely with other locking &sk_buff functions
+ *	safely.
+ *
+ *	A buffer cannot be placed on two lists at the same time.
+ */
+void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&list->lock, flags);
+	__skb_queue_head(list, newsk);
+	spin_unlock_irqrestore(&list->lock, flags);
+}
+
+/**
+ *	skb_queue_tail - queue a buffer at the list tail
+ *	@list: list to use
+ *	@newsk: buffer to queue
+ *
+ *	Queue a buffer at the tail of the list. This function takes the
+ *	list lock and can be used safely with other locking &sk_buff functions
+ *	safely.
+ *
+ *	A buffer cannot be placed on two lists at the same time.
+ */
+void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&list->lock, flags);
+	__skb_queue_tail(list, newsk);
+	spin_unlock_irqrestore(&list->lock, flags);
+}
+/**
+ *	skb_unlink	-	remove a buffer from a list
+ *	@skb: buffer to remove
+ *
+ *	Place a packet after a given packet in a list. The list locks are taken
+ *	and this function is atomic with respect to other list locked calls
+ *
+ *	Works even without knowing the list it is sitting on, which can be
+ *	handy at times. It also means that THE LIST MUST EXIST when you
+ *	unlink. Thus a list must have its contents unlinked before it is
+ *	destroyed.
+ */
+void skb_unlink(struct sk_buff *skb)
+{
+	struct sk_buff_head *list = skb->list;
+
+	if (list) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&list->lock, flags);
+		if (skb->list == list)
+			__skb_unlink(skb, skb->list);
+		spin_unlock_irqrestore(&list->lock, flags);
+	}
+}
+
+
+/**
+ *	skb_append	-	append a buffer
+ *	@old: buffer to insert after
+ *	@newsk: buffer to insert
+ *
+ *	Place a packet after a given packet in a list. The list locks are taken
+ *	and this function is atomic with respect to other list locked calls.
+ *	A buffer cannot be placed on two lists at the same time.
+ */
+
+void skb_append(struct sk_buff *old, struct sk_buff *newsk)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&old->list->lock, flags);
+	__skb_append(old, newsk);
+	spin_unlock_irqrestore(&old->list->lock, flags);
+}
+
+
+/**
+ *	skb_insert	-	insert a buffer
+ *	@old: buffer to insert before
+ *	@newsk: buffer to insert
+ *
+ *	Place a packet before a given packet in a list. The list locks are taken
+ *	and this function is atomic with respect to other list locked calls
+ *	A buffer cannot be placed on two lists at the same time.
+ */
+
+void skb_insert(struct sk_buff *old, struct sk_buff *newsk)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&old->list->lock, flags);
+	__skb_insert(newsk, old->prev, old, old->list);
+	spin_unlock_irqrestore(&old->list->lock, flags);
+}
+
+#if 0
+/*
+ * 	Tune the memory allocator for a new MTU size.
+ */
+void skb_add_mtu(int mtu)
+{
+	/* Must match allocation in alloc_skb */
+	mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info);
+
+	kmem_add_cache_size(mtu);
+}
+#endif
+
+static inline void skb_split_inside_header(struct sk_buff *skb,
+					   struct sk_buff* skb1,
+					   const u32 len, const int pos)
+{
+	int i;
+
+	memcpy(skb_put(skb1, pos - len), skb->data + len, pos - len);
+
+	/* And move data appendix as is. */
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+		skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i];
+
+	skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags;
+	skb_shinfo(skb)->nr_frags  = 0;
+	skb1->data_len		   = skb->data_len;
+	skb1->len		   += skb1->data_len;
+	skb->data_len		   = 0;
+	skb->len		   = len;
+	skb->tail		   = skb->data + len;
+}
+
+static inline void skb_split_no_header(struct sk_buff *skb,
+				       struct sk_buff* skb1,
+				       const u32 len, int pos)
+{
+	int i, k = 0;
+	const int nfrags = skb_shinfo(skb)->nr_frags;
+
+	skb_shinfo(skb)->nr_frags = 0;
+	skb1->len		  = skb1->data_len = skb->len - len;
+	skb->len		  = len;
+	skb->data_len		  = len - pos;
+
+	for (i = 0; i < nfrags; i++) {
+		int size = skb_shinfo(skb)->frags[i].size;
+
+		if (pos + size > len) {
+			skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i];
+
+			if (pos < len) {
+				/* Split frag.
+				 * We have two variants in this case:
+				 * 1. Move all the frag to the second
+				 *    part, if it is possible. F.e.
+				 *    this approach is mandatory for TUX,
+				 *    where splitting is expensive.
+				 * 2. Split is accurately. We make this.
+				 */
+				get_page(skb_shinfo(skb)->frags[i].page);
+				skb_shinfo(skb1)->frags[0].page_offset += len - pos;
+				skb_shinfo(skb1)->frags[0].size -= len - pos;
+				skb_shinfo(skb)->frags[i].size	= len - pos;
+				skb_shinfo(skb)->nr_frags++;
+			}
+			k++;
+		} else
+			skb_shinfo(skb)->nr_frags++;
+		pos += size;
+	}
+	skb_shinfo(skb1)->nr_frags = k;
+}
+
+/**
+ * skb_split - Split fragmented skb to two parts at length len.
+ * @skb: the buffer to split
+ * @skb1: the buffer to receive the second part
+ * @len: new length for skb
+ */
+void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
+{
+	int pos = skb_headlen(skb);
+
+	if (len < pos)	/* Split line is inside header. */
+		skb_split_inside_header(skb, skb1, len, pos);
+	else		/* Second chunk has no header, nothing to copy. */
+		skb_split_no_header(skb, skb1, len, pos);
+}
+
+void __init skb_init(void)
+{
+	skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
+					      sizeof(struct sk_buff),
+					      0,
+					      SLAB_HWCACHE_ALIGN,
+					      NULL, NULL);
+	if (!skbuff_head_cache)
+		panic("cannot create skbuff cache");
+}
+
+EXPORT_SYMBOL(___pskb_trim);
+EXPORT_SYMBOL(__kfree_skb);
+EXPORT_SYMBOL(__pskb_pull_tail);
+EXPORT_SYMBOL(alloc_skb);
+EXPORT_SYMBOL(pskb_copy);
+EXPORT_SYMBOL(pskb_expand_head);
+EXPORT_SYMBOL(skb_checksum);
+EXPORT_SYMBOL(skb_clone);
+EXPORT_SYMBOL(skb_clone_fraglist);
+EXPORT_SYMBOL(skb_copy);
+EXPORT_SYMBOL(skb_copy_and_csum_bits);
+EXPORT_SYMBOL(skb_copy_and_csum_dev);
+EXPORT_SYMBOL(skb_copy_bits);
+EXPORT_SYMBOL(skb_copy_expand);
+EXPORT_SYMBOL(skb_over_panic);
+EXPORT_SYMBOL(skb_pad);
+EXPORT_SYMBOL(skb_realloc_headroom);
+EXPORT_SYMBOL(skb_under_panic);
+EXPORT_SYMBOL(skb_dequeue);
+EXPORT_SYMBOL(skb_dequeue_tail);
+EXPORT_SYMBOL(skb_insert);
+EXPORT_SYMBOL(skb_queue_purge);
+EXPORT_SYMBOL(skb_queue_head);
+EXPORT_SYMBOL(skb_queue_tail);
+EXPORT_SYMBOL(skb_unlink);
+EXPORT_SYMBOL(skb_append);
+EXPORT_SYMBOL(skb_split);
diff --git a/net/core/sock.c b/net/core/sock.c
new file mode 100644
index 00000000000..629ab4a5b45
--- /dev/null
+++ b/net/core/sock.c
@@ -0,0 +1,1565 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Generic socket support routines. Memory allocators, socket lock/release
+ *		handler for protocols to use and generic option handler.
+ *
+ *
+ * Version:	$Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Florian La Roche, <flla@stud.uni-sb.de>
+ *		Alan Cox, <A.Cox@swansea.ac.uk>
+ *
+ * Fixes:
+ *		Alan Cox	: 	Numerous verify_area() problems
+ *		Alan Cox	:	Connecting on a connecting socket
+ *					now returns an error for tcp.
+ *		Alan Cox	:	sock->protocol is set correctly.
+ *					and is not sometimes left as 0.
+ *		Alan Cox	:	connect handles icmp errors on a
+ *					connect properly. Unfortunately there
+ *					is a restart syscall nasty there. I
+ *					can't match BSD without hacking the C
+ *					library. Ideas urgently sought!
+ *		Alan Cox	:	Disallow bind() to addresses that are
+ *					not ours - especially broadcast ones!!
+ *		Alan Cox	:	Socket 1024 _IS_ ok for users. (fencepost)
+ *		Alan Cox	:	sock_wfree/sock_rfree don't destroy sockets,
+ *					instead they leave that for the DESTROY timer.
+ *		Alan Cox	:	Clean up error flag in accept
+ *		Alan Cox	:	TCP ack handling is buggy, the DESTROY timer
+ *					was buggy. Put a remove_sock() in the handler
+ *					for memory when we hit 0. Also altered the timer
+ *					code. The ACK stuff can wait and needs major 
+ *					TCP layer surgery.
+ *		Alan Cox	:	Fixed TCP ack bug, removed remove sock
+ *					and fixed timer/inet_bh race.
+ *		Alan Cox	:	Added zapped flag for TCP
+ *		Alan Cox	:	Move kfree_skb into skbuff.c and tidied up surplus code
+ *		Alan Cox	:	for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
+ *		Alan Cox	:	kfree_s calls now are kfree_skbmem so we can track skb resources
+ *		Alan Cox	:	Supports socket option broadcast now as does udp. Packet and raw need fixing.
+ *		Alan Cox	:	Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
+ *		Rick Sladkey	:	Relaxed UDP rules for matching packets.
+ *		C.E.Hawkins	:	IFF_PROMISC/SIOCGHWADDR support
+ *	Pauline Middelink	:	identd support
+ *		Alan Cox	:	Fixed connect() taking signals I think.
+ *		Alan Cox	:	SO_LINGER supported
+ *		Alan Cox	:	Error reporting fixes
+ *		Anonymous	:	inet_create tidied up (sk->reuse setting)
+ *		Alan Cox	:	inet sockets don't set sk->type!
+ *		Alan Cox	:	Split socket option code
+ *		Alan Cox	:	Callbacks
+ *		Alan Cox	:	Nagle flag for Charles & Johannes stuff
+ *		Alex		:	Removed restriction on inet fioctl
+ *		Alan Cox	:	Splitting INET from NET core
+ *		Alan Cox	:	Fixed bogus SO_TYPE handling in getsockopt()
+ *		Adam Caldwell	:	Missing return in SO_DONTROUTE/SO_DEBUG code
+ *		Alan Cox	:	Split IP from generic code
+ *		Alan Cox	:	New kfree_skbmem()
+ *		Alan Cox	:	Make SO_DEBUG superuser only.
+ *		Alan Cox	:	Allow anyone to clear SO_DEBUG
+ *					(compatibility fix)
+ *		Alan Cox	:	Added optimistic memory grabbing for AF_UNIX throughput.
+ *		Alan Cox	:	Allocator for a socket is settable.
+ *		Alan Cox	:	SO_ERROR includes soft errors.
+ *		Alan Cox	:	Allow NULL arguments on some SO_ opts
+ *		Alan Cox	: 	Generic socket allocation to make hooks
+ *					easier (suggested by Craig Metz).
+ *		Michael Pall	:	SO_ERROR returns positive errno again
+ *              Steve Whitehouse:       Added default destructor to free
+ *                                      protocol private data.
+ *              Steve Whitehouse:       Added various other default routines
+ *                                      common to several socket families.
+ *              Chris Evans     :       Call suser() check last on F_SETOWN
+ *		Jay Schulist	:	Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
+ *		Andi Kleen	:	Add sock_kmalloc()/sock_kfree_s()
+ *		Andi Kleen	:	Fix write_space callback
+ *		Chris Evans	:	Security fixes - signedness again
+ *		Arnaldo C. Melo :       cleanups, use skb_queue_purge
+ *
+ * To Fix:
+ *
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/poll.h>
+#include <linux/tcp.h>
+#include <linux/init.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+#include <linux/netdevice.h>
+#include <net/protocol.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/xfrm.h>
+#include <linux/ipsec.h>
+
+#include <linux/filter.h>
+
+#ifdef CONFIG_INET
+#include <net/tcp.h>
+#endif
+
+/* Take into consideration the size of the struct sk_buff overhead in the
+ * determination of these values, since that is non-constant across
+ * platforms.  This makes socket queueing behavior and performance
+ * not depend upon such differences.
+ */
+#define _SK_MEM_PACKETS		256
+#define _SK_MEM_OVERHEAD	(sizeof(struct sk_buff) + 256)
+#define SK_WMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
+#define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
+
+/* Run time adjustable parameters. */
+__u32 sysctl_wmem_max = SK_WMEM_MAX;
+__u32 sysctl_rmem_max = SK_RMEM_MAX;
+__u32 sysctl_wmem_default = SK_WMEM_MAX;
+__u32 sysctl_rmem_default = SK_RMEM_MAX;
+
+/* Maximal space eaten by iovec or ancilliary data plus some space */
+int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512);
+
+static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
+{
+	struct timeval tv;
+
+	if (optlen < sizeof(tv))
+		return -EINVAL;
+	if (copy_from_user(&tv, optval, sizeof(tv)))
+		return -EFAULT;
+
+	*timeo_p = MAX_SCHEDULE_TIMEOUT;
+	if (tv.tv_sec == 0 && tv.tv_usec == 0)
+		return 0;
+	if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
+		*timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
+	return 0;
+}
+
+static void sock_warn_obsolete_bsdism(const char *name)
+{
+	static int warned;
+	static char warncomm[TASK_COMM_LEN];
+	if (strcmp(warncomm, current->comm) && warned < 5) { 
+		strcpy(warncomm,  current->comm); 
+		printk(KERN_WARNING "process `%s' is using obsolete "
+		       "%s SO_BSDCOMPAT\n", warncomm, name);
+		warned++;
+	}
+}
+
+static void sock_disable_timestamp(struct sock *sk)
+{	
+	if (sock_flag(sk, SOCK_TIMESTAMP)) { 
+		sock_reset_flag(sk, SOCK_TIMESTAMP);
+		net_disable_timestamp();
+	}
+}
+
+
+/*
+ *	This is meant for all protocols to use and covers goings on
+ *	at the socket level. Everything here is generic.
+ */
+
+int sock_setsockopt(struct socket *sock, int level, int optname,
+		    char __user *optval, int optlen)
+{
+	struct sock *sk=sock->sk;
+	struct sk_filter *filter;
+	int val;
+	int valbool;
+	struct linger ling;
+	int ret = 0;
+	
+	/*
+	 *	Options without arguments
+	 */
+
+#ifdef SO_DONTLINGER		/* Compatibility item... */
+	switch (optname) {
+		case SO_DONTLINGER:
+			sock_reset_flag(sk, SOCK_LINGER);
+			return 0;
+	}
+#endif	
+		
+  	if(optlen<sizeof(int))
+  		return(-EINVAL);
+  	
+	if (get_user(val, (int __user *)optval))
+		return -EFAULT;
+	
+  	valbool = val?1:0;
+
+	lock_sock(sk);
+
+  	switch(optname) 
+  	{
+		case SO_DEBUG:	
+			if(val && !capable(CAP_NET_ADMIN))
+			{
+				ret = -EACCES;
+			}
+			else if (valbool)
+				sock_set_flag(sk, SOCK_DBG);
+			else
+				sock_reset_flag(sk, SOCK_DBG);
+			break;
+		case SO_REUSEADDR:
+			sk->sk_reuse = valbool;
+			break;
+		case SO_TYPE:
+		case SO_ERROR:
+			ret = -ENOPROTOOPT;
+		  	break;
+		case SO_DONTROUTE:
+			if (valbool)
+				sock_set_flag(sk, SOCK_LOCALROUTE);
+			else
+				sock_reset_flag(sk, SOCK_LOCALROUTE);
+			break;
+		case SO_BROADCAST:
+			sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
+			break;
+		case SO_SNDBUF:
+			/* Don't error on this BSD doesn't and if you think
+			   about it this is right. Otherwise apps have to
+			   play 'guess the biggest size' games. RCVBUF/SNDBUF
+			   are treated in BSD as hints */
+			   
+			if (val > sysctl_wmem_max)
+				val = sysctl_wmem_max;
+
+			sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
+			if ((val * 2) < SOCK_MIN_SNDBUF)
+				sk->sk_sndbuf = SOCK_MIN_SNDBUF;
+			else
+				sk->sk_sndbuf = val * 2;
+
+			/*
+			 *	Wake up sending tasks if we
+			 *	upped the value.
+			 */
+			sk->sk_write_space(sk);
+			break;
+
+		case SO_RCVBUF:
+			/* Don't error on this BSD doesn't and if you think
+			   about it this is right. Otherwise apps have to
+			   play 'guess the biggest size' games. RCVBUF/SNDBUF
+			   are treated in BSD as hints */
+			  
+			if (val > sysctl_rmem_max)
+				val = sysctl_rmem_max;
+
+			sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
+			/* FIXME: is this lower bound the right one? */
+			if ((val * 2) < SOCK_MIN_RCVBUF)
+				sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
+			else
+				sk->sk_rcvbuf = val * 2;
+			break;
+
+		case SO_KEEPALIVE:
+#ifdef CONFIG_INET
+			if (sk->sk_protocol == IPPROTO_TCP)
+				tcp_set_keepalive(sk, valbool);
+#endif
+			sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
+			break;
+
+	 	case SO_OOBINLINE:
+			sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
+			break;
+
+	 	case SO_NO_CHECK:
+			sk->sk_no_check = valbool;
+			break;
+
+		case SO_PRIORITY:
+			if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN)) 
+				sk->sk_priority = val;
+			else
+				ret = -EPERM;
+			break;
+
+		case SO_LINGER:
+			if(optlen<sizeof(ling)) {
+				ret = -EINVAL;	/* 1003.1g */
+				break;
+			}
+			if (copy_from_user(&ling,optval,sizeof(ling))) {
+				ret = -EFAULT;
+				break;
+			}
+			if (!ling.l_onoff)
+				sock_reset_flag(sk, SOCK_LINGER);
+			else {
+#if (BITS_PER_LONG == 32)
+				if (ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
+					sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
+				else
+#endif
+					sk->sk_lingertime = ling.l_linger * HZ;
+				sock_set_flag(sk, SOCK_LINGER);
+			}
+			break;
+
+		case SO_BSDCOMPAT:
+			sock_warn_obsolete_bsdism("setsockopt");
+			break;
+
+		case SO_PASSCRED:
+			if (valbool)
+				set_bit(SOCK_PASSCRED, &sock->flags);
+			else
+				clear_bit(SOCK_PASSCRED, &sock->flags);
+			break;
+
+		case SO_TIMESTAMP:
+			if (valbool)  {
+				sock_set_flag(sk, SOCK_RCVTSTAMP);
+				sock_enable_timestamp(sk);
+			} else
+				sock_reset_flag(sk, SOCK_RCVTSTAMP);
+			break;
+
+		case SO_RCVLOWAT:
+			if (val < 0)
+				val = INT_MAX;
+			sk->sk_rcvlowat = val ? : 1;
+			break;
+
+		case SO_RCVTIMEO:
+			ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
+			break;
+
+		case SO_SNDTIMEO:
+			ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
+			break;
+
+#ifdef CONFIG_NETDEVICES
+		case SO_BINDTODEVICE:
+		{
+			char devname[IFNAMSIZ]; 
+
+			/* Sorry... */ 
+			if (!capable(CAP_NET_RAW)) {
+				ret = -EPERM;
+				break;
+			}
+
+			/* Bind this socket to a particular device like "eth0",
+			 * as specified in the passed interface name. If the
+			 * name is "" or the option length is zero the socket 
+			 * is not bound. 
+			 */ 
+
+			if (!valbool) {
+				sk->sk_bound_dev_if = 0;
+			} else {
+				if (optlen > IFNAMSIZ) 
+					optlen = IFNAMSIZ; 
+				if (copy_from_user(devname, optval, optlen)) {
+					ret = -EFAULT;
+					break;
+				}
+
+				/* Remove any cached route for this socket. */
+				sk_dst_reset(sk);
+
+				if (devname[0] == '\0') {
+					sk->sk_bound_dev_if = 0;
+				} else {
+					struct net_device *dev = dev_get_by_name(devname);
+					if (!dev) {
+						ret = -ENODEV;
+						break;
+					}
+					sk->sk_bound_dev_if = dev->ifindex;
+					dev_put(dev);
+				}
+			}
+			break;
+		}
+#endif
+
+
+		case SO_ATTACH_FILTER:
+			ret = -EINVAL;
+			if (optlen == sizeof(struct sock_fprog)) {
+				struct sock_fprog fprog;
+
+				ret = -EFAULT;
+				if (copy_from_user(&fprog, optval, sizeof(fprog)))
+					break;
+
+				ret = sk_attach_filter(&fprog, sk);
+			}
+			break;
+
+		case SO_DETACH_FILTER:
+			spin_lock_bh(&sk->sk_lock.slock);
+			filter = sk->sk_filter;
+                        if (filter) {
+				sk->sk_filter = NULL;
+				spin_unlock_bh(&sk->sk_lock.slock);
+				sk_filter_release(sk, filter);
+				break;
+			}
+			spin_unlock_bh(&sk->sk_lock.slock);
+			ret = -ENONET;
+			break;
+
+		/* We implement the SO_SNDLOWAT etc to
+		   not be settable (1003.1g 5.3) */
+		default:
+		  	ret = -ENOPROTOOPT;
+			break;
+  	}
+	release_sock(sk);
+	return ret;
+}
+
+
+int sock_getsockopt(struct socket *sock, int level, int optname,
+		    char __user *optval, int __user *optlen)
+{
+	struct sock *sk = sock->sk;
+	
+	union
+	{
+  		int val;
+  		struct linger ling;
+		struct timeval tm;
+	} v;
+	
+	unsigned int lv = sizeof(int);
+	int len;
+  	
+  	if(get_user(len,optlen))
+  		return -EFAULT;
+	if(len < 0)
+		return -EINVAL;
+		
+  	switch(optname) 
+  	{
+		case SO_DEBUG:		
+			v.val = sock_flag(sk, SOCK_DBG);
+			break;
+		
+		case SO_DONTROUTE:
+			v.val = sock_flag(sk, SOCK_LOCALROUTE);
+			break;
+		
+		case SO_BROADCAST:
+			v.val = !!sock_flag(sk, SOCK_BROADCAST);
+			break;
+
+		case SO_SNDBUF:
+			v.val = sk->sk_sndbuf;
+			break;
+		
+		case SO_RCVBUF:
+			v.val = sk->sk_rcvbuf;
+			break;
+
+		case SO_REUSEADDR:
+			v.val = sk->sk_reuse;
+			break;
+
+		case SO_KEEPALIVE:
+			v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
+			break;
+
+		case SO_TYPE:
+			v.val = sk->sk_type;		  		
+			break;
+
+		case SO_ERROR:
+			v.val = -sock_error(sk);
+			if(v.val==0)
+				v.val = xchg(&sk->sk_err_soft, 0);
+			break;
+
+		case SO_OOBINLINE:
+			v.val = !!sock_flag(sk, SOCK_URGINLINE);
+			break;
+	
+		case SO_NO_CHECK:
+			v.val = sk->sk_no_check;
+			break;
+
+		case SO_PRIORITY:
+			v.val = sk->sk_priority;
+			break;
+		
+		case SO_LINGER:	
+			lv		= sizeof(v.ling);
+			v.ling.l_onoff	= !!sock_flag(sk, SOCK_LINGER);
+ 			v.ling.l_linger	= sk->sk_lingertime / HZ;
+			break;
+					
+		case SO_BSDCOMPAT:
+			sock_warn_obsolete_bsdism("getsockopt");
+			break;
+
+		case SO_TIMESTAMP:
+			v.val = sock_flag(sk, SOCK_RCVTSTAMP);
+			break;
+
+		case SO_RCVTIMEO:
+			lv=sizeof(struct timeval);
+			if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
+				v.tm.tv_sec = 0;
+				v.tm.tv_usec = 0;
+			} else {
+				v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
+				v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
+			}
+			break;
+
+		case SO_SNDTIMEO:
+			lv=sizeof(struct timeval);
+			if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
+				v.tm.tv_sec = 0;
+				v.tm.tv_usec = 0;
+			} else {
+				v.tm.tv_sec = sk->sk_sndtimeo / HZ;
+				v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
+			}
+			break;
+
+		case SO_RCVLOWAT:
+			v.val = sk->sk_rcvlowat;
+			break;
+
+		case SO_SNDLOWAT:
+			v.val=1;
+			break; 
+
+		case SO_PASSCRED:
+			v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
+			break;
+
+		case SO_PEERCRED:
+			if (len > sizeof(sk->sk_peercred))
+				len = sizeof(sk->sk_peercred);
+			if (copy_to_user(optval, &sk->sk_peercred, len))
+				return -EFAULT;
+			goto lenout;
+
+		case SO_PEERNAME:
+		{
+			char address[128];
+
+			if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
+				return -ENOTCONN;
+			if (lv < len)
+				return -EINVAL;
+			if (copy_to_user(optval, address, len))
+				return -EFAULT;
+			goto lenout;
+		}
+
+		/* Dubious BSD thing... Probably nobody even uses it, but
+		 * the UNIX standard wants it for whatever reason... -DaveM
+		 */
+		case SO_ACCEPTCONN:
+			v.val = sk->sk_state == TCP_LISTEN;
+			break;
+
+		case SO_PEERSEC:
+			return security_socket_getpeersec(sock, optval, optlen, len);
+
+		default:
+			return(-ENOPROTOOPT);
+	}
+	if (len > lv)
+		len = lv;
+	if (copy_to_user(optval, &v, len))
+		return -EFAULT;
+lenout:
+  	if (put_user(len, optlen))
+  		return -EFAULT;
+  	return 0;
+}
+
+/**
+ *	sk_alloc - All socket objects are allocated here
+ *	@family - protocol family
+ *	@priority - for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
+ *	@prot - struct proto associated with this new sock instance
+ *	@zero_it - if we should zero the newly allocated sock
+ */
+struct sock *sk_alloc(int family, int priority, struct proto *prot, int zero_it)
+{
+	struct sock *sk = NULL;
+	kmem_cache_t *slab = prot->slab;
+
+	if (slab != NULL)
+		sk = kmem_cache_alloc(slab, priority);
+	else
+		sk = kmalloc(prot->obj_size, priority);
+
+	if (sk) {
+		if (zero_it) {
+			memset(sk, 0, prot->obj_size);
+			sk->sk_family = family;
+			sk->sk_prot = prot;
+			sock_lock_init(sk);
+		}
+		
+		if (security_sk_alloc(sk, family, priority)) {
+			kmem_cache_free(slab, sk);
+			sk = NULL;
+		} else
+			__module_get(prot->owner);
+	}
+	return sk;
+}
+
+void sk_free(struct sock *sk)
+{
+	struct sk_filter *filter;
+	struct module *owner = sk->sk_prot->owner;
+
+	if (sk->sk_destruct)
+		sk->sk_destruct(sk);
+
+	filter = sk->sk_filter;
+	if (filter) {
+		sk_filter_release(sk, filter);
+		sk->sk_filter = NULL;
+	}
+
+	sock_disable_timestamp(sk);
+
+	if (atomic_read(&sk->sk_omem_alloc))
+		printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
+		       __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
+
+	security_sk_free(sk);
+	if (sk->sk_prot->slab != NULL)
+		kmem_cache_free(sk->sk_prot->slab, sk);
+	else
+		kfree(sk);
+	module_put(owner);
+}
+
+void __init sk_init(void)
+{
+	if (num_physpages <= 4096) {
+		sysctl_wmem_max = 32767;
+		sysctl_rmem_max = 32767;
+		sysctl_wmem_default = 32767;
+		sysctl_rmem_default = 32767;
+	} else if (num_physpages >= 131072) {
+		sysctl_wmem_max = 131071;
+		sysctl_rmem_max = 131071;
+	}
+}
+
+/*
+ *	Simple resource managers for sockets.
+ */
+
+
+/* 
+ * Write buffer destructor automatically called from kfree_skb. 
+ */
+void sock_wfree(struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+
+	/* In case it might be waiting for more memory. */
+	atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
+	if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
+		sk->sk_write_space(sk);
+	sock_put(sk);
+}
+
+/* 
+ * Read buffer destructor automatically called from kfree_skb. 
+ */
+void sock_rfree(struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+
+	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
+}
+
+
+int sock_i_uid(struct sock *sk)
+{
+	int uid;
+
+	read_lock(&sk->sk_callback_lock);
+	uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
+	read_unlock(&sk->sk_callback_lock);
+	return uid;
+}
+
+unsigned long sock_i_ino(struct sock *sk)
+{
+	unsigned long ino;
+
+	read_lock(&sk->sk_callback_lock);
+	ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
+	read_unlock(&sk->sk_callback_lock);
+	return ino;
+}
+
+/*
+ * Allocate a skb from the socket's send buffer.
+ */
+struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int priority)
+{
+	if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
+		struct sk_buff * skb = alloc_skb(size, priority);
+		if (skb) {
+			skb_set_owner_w(skb, sk);
+			return skb;
+		}
+	}
+	return NULL;
+}
+
+/*
+ * Allocate a skb from the socket's receive buffer.
+ */ 
+struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int priority)
+{
+	if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
+		struct sk_buff *skb = alloc_skb(size, priority);
+		if (skb) {
+			skb_set_owner_r(skb, sk);
+			return skb;
+		}
+	}
+	return NULL;
+}
+
+/* 
+ * Allocate a memory block from the socket's option memory buffer.
+ */ 
+void *sock_kmalloc(struct sock *sk, int size, int priority)
+{
+	if ((unsigned)size <= sysctl_optmem_max &&
+	    atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
+		void *mem;
+		/* First do the add, to avoid the race if kmalloc
+ 		 * might sleep.
+		 */
+		atomic_add(size, &sk->sk_omem_alloc);
+		mem = kmalloc(size, priority);
+		if (mem)
+			return mem;
+		atomic_sub(size, &sk->sk_omem_alloc);
+	}
+	return NULL;
+}
+
+/*
+ * Free an option memory block.
+ */
+void sock_kfree_s(struct sock *sk, void *mem, int size)
+{
+	kfree(mem);
+	atomic_sub(size, &sk->sk_omem_alloc);
+}
+
+/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
+   I think, these locks should be removed for datagram sockets.
+ */
+static long sock_wait_for_wmem(struct sock * sk, long timeo)
+{
+	DEFINE_WAIT(wait);
+
+	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+	for (;;) {
+		if (!timeo)
+			break;
+		if (signal_pending(current))
+			break;
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
+			break;
+		if (sk->sk_shutdown & SEND_SHUTDOWN)
+			break;
+		if (sk->sk_err)
+			break;
+		timeo = schedule_timeout(timeo);
+	}
+	finish_wait(sk->sk_sleep, &wait);
+	return timeo;
+}
+
+
+/*
+ *	Generic send/receive buffer handlers
+ */
+
+static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
+					    unsigned long header_len,
+					    unsigned long data_len,
+					    int noblock, int *errcode)
+{
+	struct sk_buff *skb;
+	unsigned int gfp_mask;
+	long timeo;
+	int err;
+
+	gfp_mask = sk->sk_allocation;
+	if (gfp_mask & __GFP_WAIT)
+		gfp_mask |= __GFP_REPEAT;
+
+	timeo = sock_sndtimeo(sk, noblock);
+	while (1) {
+		err = sock_error(sk);
+		if (err != 0)
+			goto failure;
+
+		err = -EPIPE;
+		if (sk->sk_shutdown & SEND_SHUTDOWN)
+			goto failure;
+
+		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
+			skb = alloc_skb(header_len, sk->sk_allocation);
+			if (skb) {
+				int npages;
+				int i;
+
+				/* No pages, we're done... */
+				if (!data_len)
+					break;
+
+				npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+				skb->truesize += data_len;
+				skb_shinfo(skb)->nr_frags = npages;
+				for (i = 0; i < npages; i++) {
+					struct page *page;
+					skb_frag_t *frag;
+
+					page = alloc_pages(sk->sk_allocation, 0);
+					if (!page) {
+						err = -ENOBUFS;
+						skb_shinfo(skb)->nr_frags = i;
+						kfree_skb(skb);
+						goto failure;
+					}
+
+					frag = &skb_shinfo(skb)->frags[i];
+					frag->page = page;
+					frag->page_offset = 0;
+					frag->size = (data_len >= PAGE_SIZE ?
+						      PAGE_SIZE :
+						      data_len);
+					data_len -= PAGE_SIZE;
+				}
+
+				/* Full success... */
+				break;
+			}
+			err = -ENOBUFS;
+			goto failure;
+		}
+		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+		err = -EAGAIN;
+		if (!timeo)
+			goto failure;
+		if (signal_pending(current))
+			goto interrupted;
+		timeo = sock_wait_for_wmem(sk, timeo);
+	}
+
+	skb_set_owner_w(skb, sk);
+	return skb;
+
+interrupted:
+	err = sock_intr_errno(timeo);
+failure:
+	*errcode = err;
+	return NULL;
+}
+
+struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, 
+				    int noblock, int *errcode)
+{
+	return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
+}
+
+static void __lock_sock(struct sock *sk)
+{
+	DEFINE_WAIT(wait);
+
+	for(;;) {
+		prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
+					TASK_UNINTERRUPTIBLE);
+		spin_unlock_bh(&sk->sk_lock.slock);
+		schedule();
+		spin_lock_bh(&sk->sk_lock.slock);
+		if(!sock_owned_by_user(sk))
+			break;
+	}
+	finish_wait(&sk->sk_lock.wq, &wait);
+}
+
+static void __release_sock(struct sock *sk)
+{
+	struct sk_buff *skb = sk->sk_backlog.head;
+
+	do {
+		sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
+		bh_unlock_sock(sk);
+
+		do {
+			struct sk_buff *next = skb->next;
+
+			skb->next = NULL;
+			sk->sk_backlog_rcv(sk, skb);
+
+			/*
+			 * We are in process context here with softirqs
+			 * disabled, use cond_resched_softirq() to preempt.
+			 * This is safe to do because we've taken the backlog
+			 * queue private:
+			 */
+			cond_resched_softirq();
+
+			skb = next;
+		} while (skb != NULL);
+
+		bh_lock_sock(sk);
+	} while((skb = sk->sk_backlog.head) != NULL);
+}
+
+/**
+ * sk_wait_data - wait for data to arrive at sk_receive_queue
+ * sk - sock to wait on
+ * timeo - for how long
+ *
+ * Now socket state including sk->sk_err is changed only under lock,
+ * hence we may omit checks after joining wait queue.
+ * We check receive queue before schedule() only as optimization;
+ * it is very likely that release_sock() added new data.
+ */
+int sk_wait_data(struct sock *sk, long *timeo)
+{
+	int rc;
+	DEFINE_WAIT(wait);
+
+	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+	rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
+	clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+	finish_wait(sk->sk_sleep, &wait);
+	return rc;
+}
+
+EXPORT_SYMBOL(sk_wait_data);
+
+/*
+ * Set of default routines for initialising struct proto_ops when
+ * the protocol does not support a particular function. In certain
+ * cases where it makes no sense for a protocol to have a "do nothing"
+ * function, some default processing is provided.
+ */
+
+int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_connect(struct socket *sock, struct sockaddr *saddr, 
+		    int len, int flags)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_getname(struct socket *sock, struct sockaddr *saddr, 
+		    int *len, int peer)
+{
+	return -EOPNOTSUPP;
+}
+
+unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
+{
+	return 0;
+}
+
+int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_listen(struct socket *sock, int backlog)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_shutdown(struct socket *sock, int how)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_setsockopt(struct socket *sock, int level, int optname,
+		    char __user *optval, int optlen)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_getsockopt(struct socket *sock, int level, int optname,
+		    char __user *optval, int __user *optlen)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
+		    size_t len)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
+		    size_t len, int flags)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
+{
+	/* Mirror missing mmap method error code */
+	return -ENODEV;
+}
+
+ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
+{
+	ssize_t res;
+	struct msghdr msg = {.msg_flags = flags};
+	struct kvec iov;
+	char *kaddr = kmap(page);
+	iov.iov_base = kaddr + offset;
+	iov.iov_len = size;
+	res = kernel_sendmsg(sock, &msg, &iov, 1, size);
+	kunmap(page);
+	return res;
+}
+
+/*
+ *	Default Socket Callbacks
+ */
+
+static void sock_def_wakeup(struct sock *sk)
+{
+	read_lock(&sk->sk_callback_lock);
+	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+		wake_up_interruptible_all(sk->sk_sleep);
+	read_unlock(&sk->sk_callback_lock);
+}
+
+static void sock_def_error_report(struct sock *sk)
+{
+	read_lock(&sk->sk_callback_lock);
+	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+		wake_up_interruptible(sk->sk_sleep);
+	sk_wake_async(sk,0,POLL_ERR); 
+	read_unlock(&sk->sk_callback_lock);
+}
+
+static void sock_def_readable(struct sock *sk, int len)
+{
+	read_lock(&sk->sk_callback_lock);
+	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+		wake_up_interruptible(sk->sk_sleep);
+	sk_wake_async(sk,1,POLL_IN);
+	read_unlock(&sk->sk_callback_lock);
+}
+
+static void sock_def_write_space(struct sock *sk)
+{
+	read_lock(&sk->sk_callback_lock);
+
+	/* Do not wake up a writer until he can make "significant"
+	 * progress.  --DaveM
+	 */
+	if((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
+		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+			wake_up_interruptible(sk->sk_sleep);
+
+		/* Should agree with poll, otherwise some programs break */
+		if (sock_writeable(sk))
+			sk_wake_async(sk, 2, POLL_OUT);
+	}
+
+	read_unlock(&sk->sk_callback_lock);
+}
+
+static void sock_def_destruct(struct sock *sk)
+{
+	if (sk->sk_protinfo)
+		kfree(sk->sk_protinfo);
+}
+
+void sk_send_sigurg(struct sock *sk)
+{
+	if (sk->sk_socket && sk->sk_socket->file)
+		if (send_sigurg(&sk->sk_socket->file->f_owner))
+			sk_wake_async(sk, 3, POLL_PRI);
+}
+
+void sk_reset_timer(struct sock *sk, struct timer_list* timer,
+		    unsigned long expires)
+{
+	if (!mod_timer(timer, expires))
+		sock_hold(sk);
+}
+
+EXPORT_SYMBOL(sk_reset_timer);
+
+void sk_stop_timer(struct sock *sk, struct timer_list* timer)
+{
+	if (timer_pending(timer) && del_timer(timer))
+		__sock_put(sk);
+}
+
+EXPORT_SYMBOL(sk_stop_timer);
+
+void sock_init_data(struct socket *sock, struct sock *sk)
+{
+	skb_queue_head_init(&sk->sk_receive_queue);
+	skb_queue_head_init(&sk->sk_write_queue);
+	skb_queue_head_init(&sk->sk_error_queue);
+
+	sk->sk_send_head	=	NULL;
+
+	init_timer(&sk->sk_timer);
+	
+	sk->sk_allocation	=	GFP_KERNEL;
+	sk->sk_rcvbuf		=	sysctl_rmem_default;
+	sk->sk_sndbuf		=	sysctl_wmem_default;
+	sk->sk_state		=	TCP_CLOSE;
+	sk->sk_socket		=	sock;
+
+	sock_set_flag(sk, SOCK_ZAPPED);
+
+	if(sock)
+	{
+		sk->sk_type	=	sock->type;
+		sk->sk_sleep	=	&sock->wait;
+		sock->sk	=	sk;
+	} else
+		sk->sk_sleep	=	NULL;
+
+	rwlock_init(&sk->sk_dst_lock);
+	rwlock_init(&sk->sk_callback_lock);
+
+	sk->sk_state_change	=	sock_def_wakeup;
+	sk->sk_data_ready	=	sock_def_readable;
+	sk->sk_write_space	=	sock_def_write_space;
+	sk->sk_error_report	=	sock_def_error_report;
+	sk->sk_destruct		=	sock_def_destruct;
+
+	sk->sk_sndmsg_page	=	NULL;
+	sk->sk_sndmsg_off	=	0;
+
+	sk->sk_peercred.pid 	=	0;
+	sk->sk_peercred.uid	=	-1;
+	sk->sk_peercred.gid	=	-1;
+	sk->sk_write_pending	=	0;
+	sk->sk_rcvlowat		=	1;
+	sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;
+	sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
+
+	sk->sk_stamp.tv_sec     = -1L;
+	sk->sk_stamp.tv_usec    = -1L;
+
+	atomic_set(&sk->sk_refcnt, 1);
+}
+
+void fastcall lock_sock(struct sock *sk)
+{
+	might_sleep();
+	spin_lock_bh(&(sk->sk_lock.slock));
+	if (sk->sk_lock.owner)
+		__lock_sock(sk);
+	sk->sk_lock.owner = (void *)1;
+	spin_unlock_bh(&(sk->sk_lock.slock));
+}
+
+EXPORT_SYMBOL(lock_sock);
+
+void fastcall release_sock(struct sock *sk)
+{
+	spin_lock_bh(&(sk->sk_lock.slock));
+	if (sk->sk_backlog.tail)
+		__release_sock(sk);
+	sk->sk_lock.owner = NULL;
+        if (waitqueue_active(&(sk->sk_lock.wq)))
+		wake_up(&(sk->sk_lock.wq));
+	spin_unlock_bh(&(sk->sk_lock.slock));
+}
+EXPORT_SYMBOL(release_sock);
+
+int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
+{ 
+	if (!sock_flag(sk, SOCK_TIMESTAMP))
+		sock_enable_timestamp(sk);
+	if (sk->sk_stamp.tv_sec == -1) 
+		return -ENOENT;
+	if (sk->sk_stamp.tv_sec == 0)
+		do_gettimeofday(&sk->sk_stamp);
+	return copy_to_user(userstamp, &sk->sk_stamp, sizeof(struct timeval)) ?
+		-EFAULT : 0; 
+} 
+EXPORT_SYMBOL(sock_get_timestamp);
+
+void sock_enable_timestamp(struct sock *sk)
+{	
+	if (!sock_flag(sk, SOCK_TIMESTAMP)) { 
+		sock_set_flag(sk, SOCK_TIMESTAMP);
+		net_enable_timestamp();
+	}
+}
+EXPORT_SYMBOL(sock_enable_timestamp); 
+
+/*
+ *	Get a socket option on an socket.
+ *
+ *	FIX: POSIX 1003.1g is very ambiguous here. It states that
+ *	asynchronous errors should be reported by getsockopt. We assume
+ *	this means if you specify SO_ERROR (otherwise whats the point of it).
+ */
+int sock_common_getsockopt(struct socket *sock, int level, int optname,
+			   char __user *optval, int __user *optlen)
+{
+	struct sock *sk = sock->sk;
+
+	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
+}
+
+EXPORT_SYMBOL(sock_common_getsockopt);
+
+int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
+			struct msghdr *msg, size_t size, int flags)
+{
+	struct sock *sk = sock->sk;
+	int addr_len = 0;
+	int err;
+
+	err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
+				   flags & ~MSG_DONTWAIT, &addr_len);
+	if (err >= 0)
+		msg->msg_namelen = addr_len;
+	return err;
+}
+
+EXPORT_SYMBOL(sock_common_recvmsg);
+
+/*
+ *	Set socket options on an inet socket.
+ */
+int sock_common_setsockopt(struct socket *sock, int level, int optname,
+			   char __user *optval, int optlen)
+{
+	struct sock *sk = sock->sk;
+
+	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
+}
+
+EXPORT_SYMBOL(sock_common_setsockopt);
+
+void sk_common_release(struct sock *sk)
+{
+	if (sk->sk_prot->destroy)
+		sk->sk_prot->destroy(sk);
+
+	/*
+	 * Observation: when sock_common_release is called, processes have
+	 * no access to socket. But net still has.
+	 * Step one, detach it from networking:
+	 *
+	 * A. Remove from hash tables.
+	 */
+
+	sk->sk_prot->unhash(sk);
+
+	/*
+	 * In this point socket cannot receive new packets, but it is possible
+	 * that some packets are in flight because some CPU runs receiver and
+	 * did hash table lookup before we unhashed socket. They will achieve
+	 * receive queue and will be purged by socket destructor.
+	 *
+	 * Also we still have packets pending on receive queue and probably,
+	 * our own packets waiting in device queues. sock_destroy will drain
+	 * receive queue, but transmitted packets will delay socket destruction
+	 * until the last reference will be released.
+	 */
+
+	sock_orphan(sk);
+
+	xfrm_sk_free_policy(sk);
+
+#ifdef INET_REFCNT_DEBUG
+	if (atomic_read(&sk->sk_refcnt) != 1)
+		printk(KERN_DEBUG "Destruction of the socket %p delayed, c=%d\n",
+		       sk, atomic_read(&sk->sk_refcnt));
+#endif
+	sock_put(sk);
+}
+
+EXPORT_SYMBOL(sk_common_release);
+
+static DEFINE_RWLOCK(proto_list_lock);
+static LIST_HEAD(proto_list);
+
+int proto_register(struct proto *prot, int alloc_slab)
+{
+	int rc = -ENOBUFS;
+
+	write_lock(&proto_list_lock);
+
+	if (alloc_slab) {
+		prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
+					       SLAB_HWCACHE_ALIGN, NULL, NULL);
+
+		if (prot->slab == NULL) {
+			printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
+			       prot->name);
+			goto out_unlock;
+		}
+	}
+
+	list_add(&prot->node, &proto_list);
+	rc = 0;
+out_unlock:
+	write_unlock(&proto_list_lock);
+	return rc;
+}
+
+EXPORT_SYMBOL(proto_register);
+
+void proto_unregister(struct proto *prot)
+{
+	write_lock(&proto_list_lock);
+
+	if (prot->slab != NULL) {
+		kmem_cache_destroy(prot->slab);
+		prot->slab = NULL;
+	}
+
+	list_del(&prot->node);
+	write_unlock(&proto_list_lock);
+}
+
+EXPORT_SYMBOL(proto_unregister);
+
+#ifdef CONFIG_PROC_FS
+static inline struct proto *__proto_head(void)
+{
+	return list_entry(proto_list.next, struct proto, node);
+}
+
+static inline struct proto *proto_head(void)
+{
+	return list_empty(&proto_list) ? NULL : __proto_head();
+}
+
+static inline struct proto *proto_next(struct proto *proto)
+{
+	return proto->node.next == &proto_list ? NULL :
+		list_entry(proto->node.next, struct proto, node);
+}
+
+static inline struct proto *proto_get_idx(loff_t pos)
+{
+	struct proto *proto;
+	loff_t i = 0;
+
+	list_for_each_entry(proto, &proto_list, node)
+		if (i++ == pos)
+			goto out;
+
+	proto = NULL;
+out:
+	return proto;
+}
+
+static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	read_lock(&proto_list_lock);
+	return *pos ? proto_get_idx(*pos - 1) : SEQ_START_TOKEN;
+}
+
+static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	++*pos;
+	return v == SEQ_START_TOKEN ? proto_head() : proto_next(v);
+}
+
+static void proto_seq_stop(struct seq_file *seq, void *v)
+{
+	read_unlock(&proto_list_lock);
+}
+
+static char proto_method_implemented(const void *method)
+{
+	return method == NULL ? 'n' : 'y';
+}
+
+static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
+{
+	seq_printf(seq, "%-9s %4u %6d  %6d   %-3s %6u   %-3s  %-10s "
+			"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
+		   proto->name,
+		   proto->obj_size,
+		   proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
+		   proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
+		   proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
+		   proto->max_header,
+		   proto->slab == NULL ? "no" : "yes",
+		   module_name(proto->owner),
+		   proto_method_implemented(proto->close),
+		   proto_method_implemented(proto->connect),
+		   proto_method_implemented(proto->disconnect),
+		   proto_method_implemented(proto->accept),
+		   proto_method_implemented(proto->ioctl),
+		   proto_method_implemented(proto->init),
+		   proto_method_implemented(proto->destroy),
+		   proto_method_implemented(proto->shutdown),
+		   proto_method_implemented(proto->setsockopt),
+		   proto_method_implemented(proto->getsockopt),
+		   proto_method_implemented(proto->sendmsg),
+		   proto_method_implemented(proto->recvmsg),
+		   proto_method_implemented(proto->sendpage),
+		   proto_method_implemented(proto->bind),
+		   proto_method_implemented(proto->backlog_rcv),
+		   proto_method_implemented(proto->hash),
+		   proto_method_implemented(proto->unhash),
+		   proto_method_implemented(proto->get_port),
+		   proto_method_implemented(proto->enter_memory_pressure));
+}
+
+static int proto_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN)
+		seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
+			   "protocol",
+			   "size",
+			   "sockets",
+			   "memory",
+			   "press",
+			   "maxhdr",
+			   "slab",
+			   "module",
+			   "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
+	else
+		proto_seq_printf(seq, v);
+	return 0;
+}
+
+static struct seq_operations proto_seq_ops = {
+	.start  = proto_seq_start,
+	.next   = proto_seq_next,
+	.stop   = proto_seq_stop,
+	.show   = proto_seq_show,
+};
+
+static int proto_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &proto_seq_ops);
+}
+
+static struct file_operations proto_seq_fops = {
+	.owner		= THIS_MODULE,
+	.open		= proto_seq_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static int __init proto_init(void)
+{
+	/* register /proc/net/protocols */
+	return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
+}
+
+subsys_initcall(proto_init);
+
+#endif /* PROC_FS */
+
+EXPORT_SYMBOL(sk_alloc);
+EXPORT_SYMBOL(sk_free);
+EXPORT_SYMBOL(sk_send_sigurg);
+EXPORT_SYMBOL(sock_alloc_send_skb);
+EXPORT_SYMBOL(sock_init_data);
+EXPORT_SYMBOL(sock_kfree_s);
+EXPORT_SYMBOL(sock_kmalloc);
+EXPORT_SYMBOL(sock_no_accept);
+EXPORT_SYMBOL(sock_no_bind);
+EXPORT_SYMBOL(sock_no_connect);
+EXPORT_SYMBOL(sock_no_getname);
+EXPORT_SYMBOL(sock_no_getsockopt);
+EXPORT_SYMBOL(sock_no_ioctl);
+EXPORT_SYMBOL(sock_no_listen);
+EXPORT_SYMBOL(sock_no_mmap);
+EXPORT_SYMBOL(sock_no_poll);
+EXPORT_SYMBOL(sock_no_recvmsg);
+EXPORT_SYMBOL(sock_no_sendmsg);
+EXPORT_SYMBOL(sock_no_sendpage);
+EXPORT_SYMBOL(sock_no_setsockopt);
+EXPORT_SYMBOL(sock_no_shutdown);
+EXPORT_SYMBOL(sock_no_socketpair);
+EXPORT_SYMBOL(sock_rfree);
+EXPORT_SYMBOL(sock_setsockopt);
+EXPORT_SYMBOL(sock_wfree);
+EXPORT_SYMBOL(sock_wmalloc);
+EXPORT_SYMBOL(sock_i_uid);
+EXPORT_SYMBOL(sock_i_ino);
+#ifdef CONFIG_SYSCTL
+EXPORT_SYMBOL(sysctl_optmem_max);
+EXPORT_SYMBOL(sysctl_rmem_max);
+EXPORT_SYMBOL(sysctl_wmem_max);
+#endif
diff --git a/net/core/stream.c b/net/core/stream.c
new file mode 100644
index 00000000000..1e27a57b5a9
--- /dev/null
+++ b/net/core/stream.c
@@ -0,0 +1,287 @@
+/*
+ *     SUCS NET3:
+ *
+ *     Generic stream handling routines. These are generic for most
+ *     protocols. Even IP. Tonight 8-).
+ *     This is used because TCP, LLC (others too) layer all have mostly
+ *     identical sendmsg() and recvmsg() code.
+ *     So we (will) share it here.
+ *
+ *     Authors:        Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *                     (from old tcp.c code)
+ *                     Alan Cox <alan@redhat.com> (Borrowed comments 8-))
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/signal.h>
+#include <linux/tcp.h>
+#include <linux/wait.h>
+#include <net/sock.h>
+
+/**
+ * sk_stream_write_space - stream socket write_space callback.
+ * sk - socket
+ *
+ * FIXME: write proper description
+ */
+void sk_stream_write_space(struct sock *sk)
+{
+	struct socket *sock = sk->sk_socket;
+
+	if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) {
+		clear_bit(SOCK_NOSPACE, &sock->flags);
+
+		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+			wake_up_interruptible(sk->sk_sleep);
+		if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
+			sock_wake_async(sock, 2, POLL_OUT);
+	}
+}
+
+EXPORT_SYMBOL(sk_stream_write_space);
+
+/**
+ * sk_stream_wait_connect - Wait for a socket to get into the connected state
+ * @sk - sock to wait on
+ * @timeo_p - for how long to wait
+ *
+ * Must be called with the socket locked.
+ */
+int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
+{
+	struct task_struct *tsk = current;
+	DEFINE_WAIT(wait);
+
+	while (1) {
+		if (sk->sk_err)
+			return sock_error(sk);
+		if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV))
+			return -EPIPE;
+		if (!*timeo_p)
+			return -EAGAIN;
+		if (signal_pending(tsk))
+			return sock_intr_errno(*timeo_p);
+
+		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		sk->sk_write_pending++;
+		if (sk_wait_event(sk, timeo_p,
+				  !((1 << sk->sk_state) & 
+				    ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))))
+			break;
+		finish_wait(sk->sk_sleep, &wait);
+		sk->sk_write_pending--;
+	}
+	return 0;
+}
+
+EXPORT_SYMBOL(sk_stream_wait_connect);
+
+/**
+ * sk_stream_closing - Return 1 if we still have things to send in our buffers.
+ * @sk - socket to verify
+ */
+static inline int sk_stream_closing(struct sock *sk)
+{
+	return (1 << sk->sk_state) &
+	       (TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK);
+}
+
+void sk_stream_wait_close(struct sock *sk, long timeout)
+{
+	if (timeout) {
+		DEFINE_WAIT(wait);
+
+		do {
+			prepare_to_wait(sk->sk_sleep, &wait,
+					TASK_INTERRUPTIBLE);
+			if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk)))
+				break;
+		} while (!signal_pending(current) && timeout);
+
+		finish_wait(sk->sk_sleep, &wait);
+	}
+}
+
+EXPORT_SYMBOL(sk_stream_wait_close);
+
+/**
+ * sk_stream_wait_memory - Wait for more memory for a socket
+ * @sk - socket to wait for memory
+ * @timeo_p - for how long
+ */
+int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
+{
+	int err = 0;
+	long vm_wait = 0;
+	long current_timeo = *timeo_p;
+	DEFINE_WAIT(wait);
+
+	if (sk_stream_memory_free(sk))
+		current_timeo = vm_wait = (net_random() % (HZ / 5)) + 2;
+
+	while (1) {
+		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+
+		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+
+		if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
+			goto do_error;
+		if (!*timeo_p)
+			goto do_nonblock;
+		if (signal_pending(current))
+			goto do_interrupted;
+		clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+		if (sk_stream_memory_free(sk) && !vm_wait)
+			break;
+
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+		sk->sk_write_pending++;
+		sk_wait_event(sk, &current_timeo, sk_stream_memory_free(sk) &&
+						  vm_wait);
+		sk->sk_write_pending--;
+
+		if (vm_wait) {
+			vm_wait -= current_timeo;
+			current_timeo = *timeo_p;
+			if (current_timeo != MAX_SCHEDULE_TIMEOUT &&
+			    (current_timeo -= vm_wait) < 0)
+				current_timeo = 0;
+			vm_wait = 0;
+		}
+		*timeo_p = current_timeo;
+	}
+out:
+	finish_wait(sk->sk_sleep, &wait);
+	return err;
+
+do_error:
+	err = -EPIPE;
+	goto out;
+do_nonblock:
+	err = -EAGAIN;
+	goto out;
+do_interrupted:
+	err = sock_intr_errno(*timeo_p);
+	goto out;
+}
+
+EXPORT_SYMBOL(sk_stream_wait_memory);
+
+void sk_stream_rfree(struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+
+	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
+	sk->sk_forward_alloc += skb->truesize;
+}
+
+EXPORT_SYMBOL(sk_stream_rfree);
+
+int sk_stream_error(struct sock *sk, int flags, int err)
+{
+	if (err == -EPIPE)
+		err = sock_error(sk) ? : -EPIPE;
+	if (err == -EPIPE && !(flags & MSG_NOSIGNAL))
+		send_sig(SIGPIPE, current, 0);
+	return err;
+}
+
+EXPORT_SYMBOL(sk_stream_error);
+
+void __sk_stream_mem_reclaim(struct sock *sk)
+{
+	if (sk->sk_forward_alloc >= SK_STREAM_MEM_QUANTUM) {
+		atomic_sub(sk->sk_forward_alloc / SK_STREAM_MEM_QUANTUM,
+			   sk->sk_prot->memory_allocated);
+		sk->sk_forward_alloc &= SK_STREAM_MEM_QUANTUM - 1;
+		if (*sk->sk_prot->memory_pressure &&
+		    (atomic_read(sk->sk_prot->memory_allocated) <
+		     sk->sk_prot->sysctl_mem[0]))
+			*sk->sk_prot->memory_pressure = 0;
+	}
+}
+
+EXPORT_SYMBOL(__sk_stream_mem_reclaim);
+
+int sk_stream_mem_schedule(struct sock *sk, int size, int kind)
+{
+	int amt = sk_stream_pages(size);
+
+	sk->sk_forward_alloc += amt * SK_STREAM_MEM_QUANTUM;
+	atomic_add(amt, sk->sk_prot->memory_allocated);
+
+	/* Under limit. */
+	if (atomic_read(sk->sk_prot->memory_allocated) < sk->sk_prot->sysctl_mem[0]) {
+		if (*sk->sk_prot->memory_pressure)
+			*sk->sk_prot->memory_pressure = 0;
+		return 1;
+	}
+
+	/* Over hard limit. */
+	if (atomic_read(sk->sk_prot->memory_allocated) > sk->sk_prot->sysctl_mem[2]) {
+		sk->sk_prot->enter_memory_pressure();
+		goto suppress_allocation;
+	}
+
+	/* Under pressure. */
+	if (atomic_read(sk->sk_prot->memory_allocated) > sk->sk_prot->sysctl_mem[1])
+		sk->sk_prot->enter_memory_pressure();
+
+	if (kind) {
+		if (atomic_read(&sk->sk_rmem_alloc) < sk->sk_prot->sysctl_rmem[0])
+			return 1;
+	} else if (sk->sk_wmem_queued < sk->sk_prot->sysctl_wmem[0])
+		return 1;
+
+	if (!*sk->sk_prot->memory_pressure ||
+	    sk->sk_prot->sysctl_mem[2] > atomic_read(sk->sk_prot->sockets_allocated) *
+				sk_stream_pages(sk->sk_wmem_queued +
+						atomic_read(&sk->sk_rmem_alloc) +
+						sk->sk_forward_alloc))
+		return 1;
+
+suppress_allocation:
+
+	if (!kind) {
+		sk_stream_moderate_sndbuf(sk);
+
+		/* Fail only if socket is _under_ its sndbuf.
+		 * In this case we cannot block, so that we have to fail.
+		 */
+		if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
+			return 1;
+	}
+
+	/* Alas. Undo changes. */
+	sk->sk_forward_alloc -= amt * SK_STREAM_MEM_QUANTUM;
+	atomic_sub(amt, sk->sk_prot->memory_allocated);
+	return 0;
+}
+
+EXPORT_SYMBOL(sk_stream_mem_schedule);
+
+void sk_stream_kill_queues(struct sock *sk)
+{
+	/* First the read buffer. */
+	__skb_queue_purge(&sk->sk_receive_queue);
+
+	/* Next, the error queue. */
+	__skb_queue_purge(&sk->sk_error_queue);
+
+	/* Next, the write queue. */
+	BUG_TRAP(skb_queue_empty(&sk->sk_write_queue));
+
+	/* Account for returned memory. */
+	sk_stream_mem_reclaim(sk);
+
+	BUG_TRAP(!sk->sk_wmem_queued);
+	BUG_TRAP(!sk->sk_forward_alloc);
+
+	/* It is _impossible_ for the backlog to contain anything
+	 * when we get here.  All user references to this socket
+	 * have gone away, only the net layer knows can touch it.
+	 */
+}
+
+EXPORT_SYMBOL(sk_stream_kill_queues);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
new file mode 100644
index 00000000000..c8be646cb19
--- /dev/null
+++ b/net/core/sysctl_net_core.c
@@ -0,0 +1,182 @@
+/* -*- linux-c -*-
+ * sysctl_net_core.c: sysctl interface to net core subsystem.
+ *
+ * Begun April 1, 1996, Mike Shaver.
+ * Added /proc/sys/net/core directory entry (empty =) ). [MS]
+ */
+
+#include <linux/mm.h>
+#include <linux/sysctl.h>
+#include <linux/config.h>
+#include <linux/module.h>
+
+#ifdef CONFIG_SYSCTL
+
+extern int netdev_max_backlog;
+extern int weight_p;
+extern int no_cong_thresh;
+extern int no_cong;
+extern int lo_cong;
+extern int mod_cong;
+extern int netdev_fastroute;
+extern int net_msg_cost;
+extern int net_msg_burst;
+
+extern __u32 sysctl_wmem_max;
+extern __u32 sysctl_rmem_max;
+extern __u32 sysctl_wmem_default;
+extern __u32 sysctl_rmem_default;
+
+extern int sysctl_core_destroy_delay;
+extern int sysctl_optmem_max;
+extern int sysctl_somaxconn;
+
+#ifdef CONFIG_NET_DIVERT
+extern char sysctl_divert_version[];
+#endif /* CONFIG_NET_DIVERT */
+
+/*
+ * This strdup() is used for creating copies of network 
+ * device names to be handed over to sysctl.
+ */
+ 
+char *net_sysctl_strdup(const char *s)
+{
+	char *rv = kmalloc(strlen(s)+1, GFP_KERNEL);
+	if (rv)
+		strcpy(rv, s);
+	return rv;
+}
+
+ctl_table core_table[] = {
+#ifdef CONFIG_NET
+	{
+		.ctl_name	= NET_CORE_WMEM_MAX,
+		.procname	= "wmem_max",
+		.data		= &sysctl_wmem_max,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_CORE_RMEM_MAX,
+		.procname	= "rmem_max",
+		.data		= &sysctl_rmem_max,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_CORE_WMEM_DEFAULT,
+		.procname	= "wmem_default",
+		.data		= &sysctl_wmem_default,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_CORE_RMEM_DEFAULT,
+		.procname	= "rmem_default",
+		.data		= &sysctl_rmem_default,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_CORE_DEV_WEIGHT,
+		.procname	= "dev_weight",
+		.data		= &weight_p,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_CORE_MAX_BACKLOG,
+		.procname	= "netdev_max_backlog",
+		.data		= &netdev_max_backlog,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_CORE_NO_CONG_THRESH,
+		.procname	= "no_cong_thresh",
+		.data		= &no_cong_thresh,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_CORE_NO_CONG,
+		.procname	= "no_cong",
+		.data		= &no_cong,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_CORE_LO_CONG,
+		.procname	= "lo_cong",
+		.data		= &lo_cong,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_CORE_MOD_CONG,
+		.procname	= "mod_cong",
+		.data		= &mod_cong,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_CORE_MSG_COST,
+		.procname	= "message_cost",
+		.data		= &net_msg_cost,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+		.strategy	= &sysctl_jiffies,
+	},
+	{
+		.ctl_name	= NET_CORE_MSG_BURST,
+		.procname	= "message_burst",
+		.data		= &net_msg_burst,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_CORE_OPTMEM_MAX,
+		.procname	= "optmem_max",
+		.data		= &sysctl_optmem_max,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+#ifdef CONFIG_NET_DIVERT
+	{
+		.ctl_name	= NET_CORE_DIVERT_VERSION,
+		.procname	= "divert_version",
+		.data		= (void *)sysctl_divert_version,
+		.maxlen		= 32,
+		.mode		= 0444,
+		.proc_handler	= &proc_dostring
+	},
+#endif /* CONFIG_NET_DIVERT */
+#endif /* CONFIG_NET */
+	{
+		.ctl_name	= NET_CORE_SOMAXCONN,
+		.procname	= "somaxconn",
+		.data		= &sysctl_somaxconn,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{ .ctl_name = 0 }
+};
+
+EXPORT_SYMBOL(net_sysctl_strdup);
+
+#endif
diff --git a/net/core/utils.c b/net/core/utils.c
new file mode 100644
index 00000000000..e11a8654f36
--- /dev/null
+++ b/net/core/utils.c
@@ -0,0 +1,155 @@
+/*
+ *	Generic address resultion entity
+ *
+ *	Authors:
+ *	net_random Alan Cox
+ *	net_ratelimit Andy Kleen
+ *
+ *	Created by Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/random.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+
+/*
+  This is a maximally equidistributed combined Tausworthe generator
+  based on code from GNU Scientific Library 1.5 (30 Jun 2004)
+
+   x_n = (s1_n ^ s2_n ^ s3_n) 
+
+   s1_{n+1} = (((s1_n & 4294967294) <<12) ^ (((s1_n <<13) ^ s1_n) >>19))
+   s2_{n+1} = (((s2_n & 4294967288) << 4) ^ (((s2_n << 2) ^ s2_n) >>25))
+   s3_{n+1} = (((s3_n & 4294967280) <<17) ^ (((s3_n << 3) ^ s3_n) >>11))
+
+   The period of this generator is about 2^88.
+
+   From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe
+   Generators", Mathematics of Computation, 65, 213 (1996), 203--213.
+
+   This is available on the net from L'Ecuyer's home page,
+
+   http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps
+   ftp://ftp.iro.umontreal.ca/pub/simulation/lecuyer/papers/tausme.ps 
+
+   There is an erratum in the paper "Tables of Maximally
+   Equidistributed Combined LFSR Generators", Mathematics of
+   Computation, 68, 225 (1999), 261--269:
+   http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps
+
+        ... the k_j most significant bits of z_j must be non-
+        zero, for each j. (Note: this restriction also applies to the 
+        computer code given in [4], but was mistakenly not mentioned in
+        that paper.)
+   
+   This affects the seeding procedure by imposing the requirement
+   s1 > 1, s2 > 7, s3 > 15.
+
+*/
+struct nrnd_state {
+	u32 s1, s2, s3;
+};
+
+static DEFINE_PER_CPU(struct nrnd_state, net_rand_state);
+
+static u32 __net_random(struct nrnd_state *state)
+{
+#define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b)
+
+	state->s1 = TAUSWORTHE(state->s1, 13, 19, 4294967294UL, 12);
+	state->s2 = TAUSWORTHE(state->s2, 2, 25, 4294967288UL, 4);
+	state->s3 = TAUSWORTHE(state->s3, 3, 11, 4294967280UL, 17);
+
+	return (state->s1 ^ state->s2 ^ state->s3);
+}
+
+static void __net_srandom(struct nrnd_state *state, unsigned long s)
+{
+	if (s == 0)
+		s = 1;      /* default seed is 1 */
+
+#define LCG(n) (69069 * n)
+	state->s1 = LCG(s);
+	state->s2 = LCG(state->s1);
+	state->s3 = LCG(state->s2);
+
+	/* "warm it up" */
+	__net_random(state);
+	__net_random(state);
+	__net_random(state);
+	__net_random(state);
+	__net_random(state);
+	__net_random(state);
+}
+
+
+unsigned long net_random(void)
+{
+	unsigned long r;
+	struct nrnd_state *state = &get_cpu_var(net_rand_state);
+	r = __net_random(state);
+	put_cpu_var(state);
+	return r;
+}
+
+
+void net_srandom(unsigned long entropy)
+{
+	struct nrnd_state *state = &get_cpu_var(net_rand_state);
+	__net_srandom(state, state->s1^entropy);
+	put_cpu_var(state);
+}
+
+void __init net_random_init(void)
+{
+	int i;
+
+	for (i = 0; i < NR_CPUS; i++) {
+		struct nrnd_state *state = &per_cpu(net_rand_state,i);
+		__net_srandom(state, i+jiffies);
+	}
+}
+
+static int net_random_reseed(void)
+{
+	int i;
+	unsigned long seed[NR_CPUS];
+
+	get_random_bytes(seed, sizeof(seed));
+	for (i = 0; i < NR_CPUS; i++) {
+		struct nrnd_state *state = &per_cpu(net_rand_state,i);
+		__net_srandom(state, seed[i]);
+	}
+	return 0;
+}
+late_initcall(net_random_reseed);
+
+int net_msg_cost = 5*HZ;
+int net_msg_burst = 10;
+
+/* 
+ * All net warning printk()s should be guarded by this function.
+ */ 
+int net_ratelimit(void)
+{
+	return __printk_ratelimit(net_msg_cost, net_msg_burst);
+}
+
+EXPORT_SYMBOL(net_random);
+EXPORT_SYMBOL(net_ratelimit);
+EXPORT_SYMBOL(net_srandom);
diff --git a/net/core/wireless.c b/net/core/wireless.c
new file mode 100644
index 00000000000..750cc5daeb0
--- /dev/null
+++ b/net/core/wireless.c
@@ -0,0 +1,1459 @@
+/*
+ * This file implement the Wireless Extensions APIs.
+ *
+ * Authors :	Jean Tourrilhes - HPL - <jt@hpl.hp.com>
+ * Copyright (c) 1997-2004 Jean Tourrilhes, All Rights Reserved.
+ *
+ * (As all part of the Linux kernel, this file is GPL)
+ */
+
+/************************** DOCUMENTATION **************************/
+/*
+ * API definition :
+ * --------------
+ * See <linux/wireless.h> for details of the APIs and the rest.
+ *
+ * History :
+ * -------
+ *
+ * v1 - 5.12.01 - Jean II
+ *	o Created this file.
+ *
+ * v2 - 13.12.01 - Jean II
+ *	o Move /proc/net/wireless stuff from net/core/dev.c to here
+ *	o Make Wireless Extension IOCTLs go through here
+ *	o Added iw_handler handling ;-)
+ *	o Added standard ioctl description
+ *	o Initial dumb commit strategy based on orinoco.c
+ *
+ * v3 - 19.12.01 - Jean II
+ *	o Make sure we don't go out of standard_ioctl[] in ioctl_standard_call
+ *	o Add event dispatcher function
+ *	o Add event description
+ *	o Propagate events as rtnetlink IFLA_WIRELESS option
+ *	o Generate event on selected SET requests
+ *
+ * v4 - 18.04.02 - Jean II
+ *	o Fix stupid off by one in iw_ioctl_description : IW_ESSID_MAX_SIZE + 1
+ *
+ * v5 - 21.06.02 - Jean II
+ *	o Add IW_PRIV_TYPE_ADDR in priv_type_size (+cleanup)
+ *	o Reshuffle IW_HEADER_TYPE_XXX to map IW_PRIV_TYPE_XXX changes
+ *	o Add IWEVCUSTOM for driver specific event/scanning token
+ *	o Turn on WE_STRICT_WRITE by default + kernel warning
+ *	o Fix WE_STRICT_WRITE in ioctl_export_private() (32 => iw_num)
+ *	o Fix off-by-one in test (extra_size <= IFNAMSIZ)
+ *
+ * v6 - 9.01.03 - Jean II
+ *	o Add common spy support : iw_handler_set_spy(), wireless_spy_update()
+ *	o Add enhanced spy support : iw_handler_set_thrspy() and event.
+ *	o Add WIRELESS_EXT version display in /proc/net/wireless
+ *
+ * v6 - 18.06.04 - Jean II
+ *	o Change get_spydata() method for added safety
+ *	o Remove spy #ifdef, they are always on -> cleaner code
+ *	o Allow any size GET request if user specifies length > max
+ *		and if request has IW_DESCR_FLAG_NOMAX flag or is SIOCGIWPRIV
+ *	o Start migrating get_wireless_stats to struct iw_handler_def
+ *	o Add wmb() in iw_handler_set_spy() for non-coherent archs/cpus
+ * Based on patch from Pavel Roskin <proski@gnu.org> :
+ *	o Fix kernel data leak to user space in private handler handling
+ */
+
+/***************************** INCLUDES *****************************/
+
+#include <linux/config.h>		/* Not needed ??? */
+#include <linux/module.h>
+#include <linux/types.h>		/* off_t */
+#include <linux/netdevice.h>		/* struct ifreq, dev_get_by_name() */
+#include <linux/proc_fs.h>
+#include <linux/rtnetlink.h>		/* rtnetlink stuff */
+#include <linux/seq_file.h>
+#include <linux/init.h>			/* for __init */
+#include <linux/if_arp.h>		/* ARPHRD_ETHER */
+
+#include <linux/wireless.h>		/* Pretty obvious */
+#include <net/iw_handler.h>		/* New driver API */
+
+#include <asm/uaccess.h>		/* copy_to_user() */
+
+/**************************** CONSTANTS ****************************/
+
+/* Debugging stuff */
+#undef WE_IOCTL_DEBUG		/* Debug IOCTL API */
+#undef WE_EVENT_DEBUG		/* Debug Event dispatcher */
+#undef WE_SPY_DEBUG		/* Debug enhanced spy support */
+
+/* Options */
+#define WE_EVENT_NETLINK	/* Propagate events using rtnetlink */
+#define WE_SET_EVENT		/* Generate an event on some set commands */
+
+/************************* GLOBAL VARIABLES *************************/
+/*
+ * You should not use global variables, because of re-entrancy.
+ * On our case, it's only const, so it's OK...
+ */
+/*
+ * Meta-data about all the standard Wireless Extension request we
+ * know about.
+ */
+static const struct iw_ioctl_description standard_ioctl[] = {
+	[SIOCSIWCOMMIT	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_NULL,
+	},
+	[SIOCGIWNAME	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_CHAR,
+		.flags		= IW_DESCR_FLAG_DUMP,
+	},
+	[SIOCSIWNWID	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+		.flags		= IW_DESCR_FLAG_EVENT,
+	},
+	[SIOCGIWNWID	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+		.flags		= IW_DESCR_FLAG_DUMP,
+	},
+	[SIOCSIWFREQ	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_FREQ,
+		.flags		= IW_DESCR_FLAG_EVENT,
+	},
+	[SIOCGIWFREQ	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_FREQ,
+		.flags		= IW_DESCR_FLAG_DUMP,
+	},
+	[SIOCSIWMODE	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_UINT,
+		.flags		= IW_DESCR_FLAG_EVENT,
+	},
+	[SIOCGIWMODE	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_UINT,
+		.flags		= IW_DESCR_FLAG_DUMP,
+	},
+	[SIOCSIWSENS	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCGIWSENS	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCSIWRANGE	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_NULL,
+	},
+	[SIOCGIWRANGE	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= sizeof(struct iw_range),
+		.flags		= IW_DESCR_FLAG_DUMP,
+	},
+	[SIOCSIWPRIV	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_NULL,
+	},
+	[SIOCGIWPRIV	- SIOCIWFIRST] = { /* (handled directly by us) */
+		.header_type	= IW_HEADER_TYPE_NULL,
+	},
+	[SIOCSIWSTATS	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_NULL,
+	},
+	[SIOCGIWSTATS	- SIOCIWFIRST] = { /* (handled directly by us) */
+		.header_type	= IW_HEADER_TYPE_NULL,
+		.flags		= IW_DESCR_FLAG_DUMP,
+	},
+	[SIOCSIWSPY	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= sizeof(struct sockaddr),
+		.max_tokens	= IW_MAX_SPY,
+	},
+	[SIOCGIWSPY	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= sizeof(struct sockaddr) +
+				  sizeof(struct iw_quality),
+		.max_tokens	= IW_MAX_SPY,
+	},
+	[SIOCSIWTHRSPY	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= sizeof(struct iw_thrspy),
+		.min_tokens	= 1,
+		.max_tokens	= 1,
+	},
+	[SIOCGIWTHRSPY	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= sizeof(struct iw_thrspy),
+		.min_tokens	= 1,
+		.max_tokens	= 1,
+	},
+	[SIOCSIWAP	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_ADDR,
+	},
+	[SIOCGIWAP	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_ADDR,
+		.flags		= IW_DESCR_FLAG_DUMP,
+	},
+	[SIOCGIWAPLIST	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= sizeof(struct sockaddr) +
+				  sizeof(struct iw_quality),
+		.max_tokens	= IW_MAX_AP,
+		.flags		= IW_DESCR_FLAG_NOMAX,
+	},
+	[SIOCSIWSCAN	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCGIWSCAN	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= IW_SCAN_MAX_DATA,
+		.flags		= IW_DESCR_FLAG_NOMAX,
+	},
+	[SIOCSIWESSID	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= IW_ESSID_MAX_SIZE + 1,
+		.flags		= IW_DESCR_FLAG_EVENT,
+	},
+	[SIOCGIWESSID	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= IW_ESSID_MAX_SIZE + 1,
+		.flags		= IW_DESCR_FLAG_DUMP,
+	},
+	[SIOCSIWNICKN	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= IW_ESSID_MAX_SIZE + 1,
+	},
+	[SIOCGIWNICKN	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= IW_ESSID_MAX_SIZE + 1,
+	},
+	[SIOCSIWRATE	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCGIWRATE	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCSIWRTS	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCGIWRTS	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCSIWFRAG	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCGIWFRAG	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCSIWTXPOW	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCGIWTXPOW	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCSIWRETRY	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCGIWRETRY	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCSIWENCODE	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= IW_ENCODING_TOKEN_MAX,
+		.flags		= IW_DESCR_FLAG_EVENT | IW_DESCR_FLAG_RESTRICT,
+	},
+	[SIOCGIWENCODE	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= IW_ENCODING_TOKEN_MAX,
+		.flags		= IW_DESCR_FLAG_DUMP | IW_DESCR_FLAG_RESTRICT,
+	},
+	[SIOCSIWPOWER	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCGIWPOWER	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+};
+static const int standard_ioctl_num = (sizeof(standard_ioctl) /
+				       sizeof(struct iw_ioctl_description));
+
+/*
+ * Meta-data about all the additional standard Wireless Extension events
+ * we know about.
+ */
+static const struct iw_ioctl_description standard_event[] = {
+	[IWEVTXDROP	- IWEVFIRST] = {
+		.header_type	= IW_HEADER_TYPE_ADDR,
+	},
+	[IWEVQUAL	- IWEVFIRST] = {
+		.header_type	= IW_HEADER_TYPE_QUAL,
+	},
+	[IWEVCUSTOM	- IWEVFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= IW_CUSTOM_MAX,
+	},
+	[IWEVREGISTERED	- IWEVFIRST] = {
+		.header_type	= IW_HEADER_TYPE_ADDR,
+	},
+	[IWEVEXPIRED	- IWEVFIRST] = {
+		.header_type	= IW_HEADER_TYPE_ADDR, 
+	},
+};
+static const int standard_event_num = (sizeof(standard_event) /
+				       sizeof(struct iw_ioctl_description));
+
+/* Size (in bytes) of the various private data types */
+static const char iw_priv_type_size[] = {
+	0,				/* IW_PRIV_TYPE_NONE */
+	1,				/* IW_PRIV_TYPE_BYTE */
+	1,				/* IW_PRIV_TYPE_CHAR */
+	0,				/* Not defined */
+	sizeof(__u32),			/* IW_PRIV_TYPE_INT */
+	sizeof(struct iw_freq),		/* IW_PRIV_TYPE_FLOAT */
+	sizeof(struct sockaddr),	/* IW_PRIV_TYPE_ADDR */
+	0,				/* Not defined */
+};
+
+/* Size (in bytes) of various events */
+static const int event_type_size[] = {
+	IW_EV_LCP_LEN,			/* IW_HEADER_TYPE_NULL */
+	0,
+	IW_EV_CHAR_LEN,			/* IW_HEADER_TYPE_CHAR */
+	0,
+	IW_EV_UINT_LEN,			/* IW_HEADER_TYPE_UINT */
+	IW_EV_FREQ_LEN,			/* IW_HEADER_TYPE_FREQ */
+	IW_EV_ADDR_LEN,			/* IW_HEADER_TYPE_ADDR */
+	0,
+	IW_EV_POINT_LEN,		/* Without variable payload */
+	IW_EV_PARAM_LEN,		/* IW_HEADER_TYPE_PARAM */
+	IW_EV_QUAL_LEN,			/* IW_HEADER_TYPE_QUAL */
+};
+
+/************************ COMMON SUBROUTINES ************************/
+/*
+ * Stuff that may be used in various place or doesn't fit in one
+ * of the section below.
+ */
+
+/* ---------------------------------------------------------------- */
+/*
+ * Return the driver handler associated with a specific Wireless Extension.
+ * Called from various place, so make sure it remains efficient.
+ */
+static inline iw_handler get_handler(struct net_device *dev,
+				     unsigned int cmd)
+{
+	/* Don't "optimise" the following variable, it will crash */
+	unsigned int	index;		/* *MUST* be unsigned */
+
+	/* Check if we have some wireless handlers defined */
+	if(dev->wireless_handlers == NULL)
+		return NULL;
+
+	/* Try as a standard command */
+	index = cmd - SIOCIWFIRST;
+	if(index < dev->wireless_handlers->num_standard)
+		return dev->wireless_handlers->standard[index];
+
+	/* Try as a private command */
+	index = cmd - SIOCIWFIRSTPRIV;
+	if(index < dev->wireless_handlers->num_private)
+		return dev->wireless_handlers->private[index];
+
+	/* Not found */
+	return NULL;
+}
+
+/* ---------------------------------------------------------------- */
+/*
+ * Get statistics out of the driver
+ */
+static inline struct iw_statistics *get_wireless_stats(struct net_device *dev)
+{
+	/* New location */
+	if((dev->wireless_handlers != NULL) &&
+	   (dev->wireless_handlers->get_wireless_stats != NULL))
+		return dev->wireless_handlers->get_wireless_stats(dev);
+
+	/* Old location, will be phased out in next WE */
+	return (dev->get_wireless_stats ?
+		dev->get_wireless_stats(dev) :
+		(struct iw_statistics *) NULL);
+}
+
+/* ---------------------------------------------------------------- */
+/*
+ * Call the commit handler in the driver
+ * (if exist and if conditions are right)
+ *
+ * Note : our current commit strategy is currently pretty dumb,
+ * but we will be able to improve on that...
+ * The goal is to try to agreagate as many changes as possible
+ * before doing the commit. Drivers that will define a commit handler
+ * are usually those that need a reset after changing parameters, so
+ * we want to minimise the number of reset.
+ * A cool idea is to use a timer : at each "set" command, we re-set the
+ * timer, when the timer eventually fires, we call the driver.
+ * Hopefully, more on that later.
+ *
+ * Also, I'm waiting to see how many people will complain about the
+ * netif_running(dev) test. I'm open on that one...
+ * Hopefully, the driver will remember to do a commit in "open()" ;-)
+ */
+static inline int call_commit_handler(struct net_device *	dev)
+{
+	if((netif_running(dev)) &&
+	   (dev->wireless_handlers->standard[0] != NULL)) {
+		/* Call the commit handler on the driver */
+		return dev->wireless_handlers->standard[0](dev, NULL,
+							   NULL, NULL);
+	} else
+		return 0;		/* Command completed successfully */
+}
+
+/* ---------------------------------------------------------------- */
+/*
+ * Calculate size of private arguments
+ */
+static inline int get_priv_size(__u16	args)
+{
+	int	num = args & IW_PRIV_SIZE_MASK;
+	int	type = (args & IW_PRIV_TYPE_MASK) >> 12;
+
+	return num * iw_priv_type_size[type];
+}
+
+/* ---------------------------------------------------------------- */
+/*
+ * Re-calculate the size of private arguments
+ */
+static inline int adjust_priv_size(__u16		args,
+				   union iwreq_data *	wrqu)
+{
+	int	num = wrqu->data.length;
+	int	max = args & IW_PRIV_SIZE_MASK;
+	int	type = (args & IW_PRIV_TYPE_MASK) >> 12;
+
+	/* Make sure the driver doesn't goof up */
+	if (max < num)
+		num = max;
+
+	return num * iw_priv_type_size[type];
+}
+
+
+/******************** /proc/net/wireless SUPPORT ********************/
+/*
+ * The /proc/net/wireless file is a human readable user-space interface
+ * exporting various wireless specific statistics from the wireless devices.
+ * This is the most popular part of the Wireless Extensions ;-)
+ *
+ * This interface is a pure clone of /proc/net/dev (in net/core/dev.c).
+ * The content of the file is basically the content of "struct iw_statistics".
+ */
+
+#ifdef CONFIG_PROC_FS
+
+/* ---------------------------------------------------------------- */
+/*
+ * Print one entry (line) of /proc/net/wireless
+ */
+static __inline__ void wireless_seq_printf_stats(struct seq_file *seq,
+						 struct net_device *dev)
+{
+	/* Get stats from the driver */
+	struct iw_statistics *stats = get_wireless_stats(dev);
+
+	if (stats) {
+		seq_printf(seq, "%6s: %04x  %3d%c  %3d%c  %3d%c  %6d %6d %6d "
+				"%6d %6d   %6d\n",
+			   dev->name, stats->status, stats->qual.qual,
+			   stats->qual.updated & IW_QUAL_QUAL_UPDATED
+			   ? '.' : ' ',
+			   ((__u8) stats->qual.level),
+			   stats->qual.updated & IW_QUAL_LEVEL_UPDATED
+			   ? '.' : ' ',
+			   ((__u8) stats->qual.noise),
+			   stats->qual.updated & IW_QUAL_NOISE_UPDATED
+			   ? '.' : ' ',
+			   stats->discard.nwid, stats->discard.code,
+			   stats->discard.fragment, stats->discard.retries,
+			   stats->discard.misc, stats->miss.beacon);
+		stats->qual.updated = 0;
+	}
+}
+
+/* ---------------------------------------------------------------- */
+/*
+ * Print info for /proc/net/wireless (print all entries)
+ */
+static int wireless_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN)
+		seq_printf(seq, "Inter-| sta-|   Quality        |   Discarded "
+				"packets               | Missed | WE\n"
+				" face | tus | link level noise |  nwid  "
+				"crypt   frag  retry   misc | beacon | %d\n",
+			   WIRELESS_EXT);
+	else
+		wireless_seq_printf_stats(seq, v);
+	return 0;
+}
+
+extern void *dev_seq_start(struct seq_file *seq, loff_t *pos);
+extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos);
+extern void dev_seq_stop(struct seq_file *seq, void *v);
+
+static struct seq_operations wireless_seq_ops = {
+	.start = dev_seq_start,
+	.next  = dev_seq_next,
+	.stop  = dev_seq_stop,
+	.show  = wireless_seq_show,
+};
+
+static int wireless_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &wireless_seq_ops);
+}
+
+static struct file_operations wireless_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = wireless_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+};
+
+int __init wireless_proc_init(void)
+{
+	if (!proc_net_fops_create("wireless", S_IRUGO, &wireless_seq_fops))
+		return -ENOMEM;
+
+	return 0;
+}
+#endif	/* CONFIG_PROC_FS */
+
+/************************** IOCTL SUPPORT **************************/
+/*
+ * The original user space API to configure all those Wireless Extensions
+ * is through IOCTLs.
+ * In there, we check if we need to call the new driver API (iw_handler)
+ * or just call the driver ioctl handler.
+ */
+
+/* ---------------------------------------------------------------- */
+/*
+ *	Allow programatic access to /proc/net/wireless even if /proc
+ *	doesn't exist... Also more efficient...
+ */
+static inline int dev_iwstats(struct net_device *dev, struct ifreq *ifr)
+{
+	/* Get stats from the driver */
+	struct iw_statistics *stats;
+
+	stats = get_wireless_stats(dev);
+	if (stats != (struct iw_statistics *) NULL) {
+		struct iwreq *	wrq = (struct iwreq *)ifr;
+
+		/* Copy statistics to the user buffer */
+		if(copy_to_user(wrq->u.data.pointer, stats,
+				sizeof(struct iw_statistics)))
+			return -EFAULT;
+
+		/* Check if we need to clear the update flag */
+		if(wrq->u.data.flags != 0)
+			stats->qual.updated = 0;
+		return 0;
+	} else
+		return -EOPNOTSUPP;
+}
+
+/* ---------------------------------------------------------------- */
+/*
+ * Export the driver private handler definition
+ * They will be picked up by tools like iwpriv...
+ */
+static inline int ioctl_export_private(struct net_device *	dev,
+				       struct ifreq *		ifr)
+{
+	struct iwreq *				iwr = (struct iwreq *) ifr;
+
+	/* Check if the driver has something to export */
+	if((dev->wireless_handlers->num_private_args == 0) ||
+	   (dev->wireless_handlers->private_args == NULL))
+		return -EOPNOTSUPP;
+
+	/* Check NULL pointer */
+	if(iwr->u.data.pointer == NULL)
+		return -EFAULT;
+
+	/* Check if there is enough buffer up there */
+	if(iwr->u.data.length < dev->wireless_handlers->num_private_args) {
+		/* User space can't know in advance how large the buffer
+		 * needs to be. Give it a hint, so that we can support
+		 * any size buffer we want somewhat efficiently... */
+		iwr->u.data.length = dev->wireless_handlers->num_private_args;
+		return -E2BIG;
+	}
+
+	/* Set the number of available ioctls. */
+	iwr->u.data.length = dev->wireless_handlers->num_private_args;
+
+	/* Copy structure to the user buffer. */
+	if (copy_to_user(iwr->u.data.pointer,
+			 dev->wireless_handlers->private_args,
+			 sizeof(struct iw_priv_args) * iwr->u.data.length))
+		return -EFAULT;
+
+	return 0;
+}
+
+/* ---------------------------------------------------------------- */
+/*
+ * Wrapper to call a standard Wireless Extension handler.
+ * We do various checks and also take care of moving data between
+ * user space and kernel space.
+ */
+static inline int ioctl_standard_call(struct net_device *	dev,
+				      struct ifreq *		ifr,
+				      unsigned int		cmd,
+				      iw_handler		handler)
+{
+	struct iwreq *				iwr = (struct iwreq *) ifr;
+	const struct iw_ioctl_description *	descr;
+	struct iw_request_info			info;
+	int					ret = -EINVAL;
+
+	/* Get the description of the IOCTL */
+	if((cmd - SIOCIWFIRST) >= standard_ioctl_num)
+		return -EOPNOTSUPP;
+	descr = &(standard_ioctl[cmd - SIOCIWFIRST]);
+
+#ifdef WE_IOCTL_DEBUG
+	printk(KERN_DEBUG "%s (WE) : Found standard handler for 0x%04X\n",
+	       ifr->ifr_name, cmd);
+	printk(KERN_DEBUG "%s (WE) : Header type : %d, Token type : %d, size : %d, token : %d\n", dev->name, descr->header_type, descr->token_type, descr->token_size, descr->max_tokens);
+#endif	/* WE_IOCTL_DEBUG */
+
+	/* Prepare the call */
+	info.cmd = cmd;
+	info.flags = 0;
+
+	/* Check if we have a pointer to user space data or not */
+	if(descr->header_type != IW_HEADER_TYPE_POINT) {
+
+		/* No extra arguments. Trivial to handle */
+		ret = handler(dev, &info, &(iwr->u), NULL);
+
+#ifdef WE_SET_EVENT
+		/* Generate an event to notify listeners of the change */
+		if((descr->flags & IW_DESCR_FLAG_EVENT) &&
+		   ((ret == 0) || (ret == -EIWCOMMIT)))
+			wireless_send_event(dev, cmd, &(iwr->u), NULL);
+#endif	/* WE_SET_EVENT */
+	} else {
+		char *	extra;
+		int	extra_size;
+		int	user_length = 0;
+		int	err;
+
+		/* Calculate space needed by arguments. Always allocate
+		 * for max space. Easier, and won't last long... */
+		extra_size = descr->max_tokens * descr->token_size;
+
+		/* Check what user space is giving us */
+		if(IW_IS_SET(cmd)) {
+			/* Check NULL pointer */
+			if((iwr->u.data.pointer == NULL) &&
+			   (iwr->u.data.length != 0))
+				return -EFAULT;
+			/* Check if number of token fits within bounds */
+			if(iwr->u.data.length > descr->max_tokens)
+				return -E2BIG;
+			if(iwr->u.data.length < descr->min_tokens)
+				return -EINVAL;
+		} else {
+			/* Check NULL pointer */
+			if(iwr->u.data.pointer == NULL)
+				return -EFAULT;
+			/* Save user space buffer size for checking */
+			user_length = iwr->u.data.length;
+
+			/* Don't check if user_length > max to allow forward
+			 * compatibility. The test user_length < min is
+			 * implied by the test at the end. */
+
+			/* Support for very large requests */
+			if((descr->flags & IW_DESCR_FLAG_NOMAX) &&
+			   (user_length > descr->max_tokens)) {
+				/* Allow userspace to GET more than max so
+				 * we can support any size GET requests.
+				 * There is still a limit : -ENOMEM. */
+				extra_size = user_length * descr->token_size;
+				/* Note : user_length is originally a __u16,
+				 * and token_size is controlled by us,
+				 * so extra_size won't get negative and
+				 * won't overflow... */
+			}
+		}
+
+#ifdef WE_IOCTL_DEBUG
+		printk(KERN_DEBUG "%s (WE) : Malloc %d bytes\n",
+		       dev->name, extra_size);
+#endif	/* WE_IOCTL_DEBUG */
+
+		/* Create the kernel buffer */
+		extra = kmalloc(extra_size, GFP_KERNEL);
+		if (extra == NULL) {
+			return -ENOMEM;
+		}
+
+		/* If it is a SET, get all the extra data in here */
+		if(IW_IS_SET(cmd) && (iwr->u.data.length != 0)) {
+			err = copy_from_user(extra, iwr->u.data.pointer,
+					     iwr->u.data.length *
+					     descr->token_size);
+			if (err) {
+				kfree(extra);
+				return -EFAULT;
+			}
+#ifdef WE_IOCTL_DEBUG
+			printk(KERN_DEBUG "%s (WE) : Got %d bytes\n",
+			       dev->name,
+			       iwr->u.data.length * descr->token_size);
+#endif	/* WE_IOCTL_DEBUG */
+		}
+
+		/* Call the handler */
+		ret = handler(dev, &info, &(iwr->u), extra);
+
+		/* If we have something to return to the user */
+		if (!ret && IW_IS_GET(cmd)) {
+			/* Check if there is enough buffer up there */
+			if(user_length < iwr->u.data.length) {
+				kfree(extra);
+				return -E2BIG;
+			}
+
+			err = copy_to_user(iwr->u.data.pointer, extra,
+					   iwr->u.data.length *
+					   descr->token_size);
+			if (err)
+				ret =  -EFAULT;				   
+#ifdef WE_IOCTL_DEBUG
+			printk(KERN_DEBUG "%s (WE) : Wrote %d bytes\n",
+			       dev->name,
+			       iwr->u.data.length * descr->token_size);
+#endif	/* WE_IOCTL_DEBUG */
+		}
+
+#ifdef WE_SET_EVENT
+		/* Generate an event to notify listeners of the change */
+		if((descr->flags & IW_DESCR_FLAG_EVENT) &&
+		   ((ret == 0) || (ret == -EIWCOMMIT))) {
+			if(descr->flags & IW_DESCR_FLAG_RESTRICT)
+				/* If the event is restricted, don't
+				 * export the payload */
+				wireless_send_event(dev, cmd, &(iwr->u), NULL);
+			else
+				wireless_send_event(dev, cmd, &(iwr->u),
+						    extra);
+		}
+#endif	/* WE_SET_EVENT */
+
+		/* Cleanup - I told you it wasn't that long ;-) */
+		kfree(extra);
+	}
+
+	/* Call commit handler if needed and defined */
+	if(ret == -EIWCOMMIT)
+		ret = call_commit_handler(dev);
+
+	/* Here, we will generate the appropriate event if needed */
+
+	return ret;
+}
+
+/* ---------------------------------------------------------------- */
+/*
+ * Wrapper to call a private Wireless Extension handler.
+ * We do various checks and also take care of moving data between
+ * user space and kernel space.
+ * It's not as nice and slimline as the standard wrapper. The cause
+ * is struct iw_priv_args, which was not really designed for the
+ * job we are going here.
+ *
+ * IMPORTANT : This function prevent to set and get data on the same
+ * IOCTL and enforce the SET/GET convention. Not doing it would be
+ * far too hairy...
+ * If you need to set and get data at the same time, please don't use
+ * a iw_handler but process it in your ioctl handler (i.e. use the
+ * old driver API).
+ */
+static inline int ioctl_private_call(struct net_device *	dev,
+				     struct ifreq *		ifr,
+				     unsigned int		cmd,
+				     iw_handler		handler)
+{
+	struct iwreq *			iwr = (struct iwreq *) ifr;
+	const struct iw_priv_args *	descr = NULL;
+	struct iw_request_info		info;
+	int				extra_size = 0;
+	int				i;
+	int				ret = -EINVAL;
+
+	/* Get the description of the IOCTL */
+	for(i = 0; i < dev->wireless_handlers->num_private_args; i++)
+		if(cmd == dev->wireless_handlers->private_args[i].cmd) {
+			descr = &(dev->wireless_handlers->private_args[i]);
+			break;
+		}
+
+#ifdef WE_IOCTL_DEBUG
+	printk(KERN_DEBUG "%s (WE) : Found private handler for 0x%04X\n",
+	       ifr->ifr_name, cmd);
+	if(descr) {
+		printk(KERN_DEBUG "%s (WE) : Name %s, set %X, get %X\n",
+		       dev->name, descr->name,
+		       descr->set_args, descr->get_args);
+	}
+#endif	/* WE_IOCTL_DEBUG */
+
+	/* Compute the size of the set/get arguments */
+	if(descr != NULL) {
+		if(IW_IS_SET(cmd)) {
+			int	offset = 0;	/* For sub-ioctls */
+			/* Check for sub-ioctl handler */
+			if(descr->name[0] == '\0')
+				/* Reserve one int for sub-ioctl index */
+				offset = sizeof(__u32);
+
+			/* Size of set arguments */
+			extra_size = get_priv_size(descr->set_args);
+
+			/* Does it fits in iwr ? */
+			if((descr->set_args & IW_PRIV_SIZE_FIXED) &&
+			   ((extra_size + offset) <= IFNAMSIZ))
+				extra_size = 0;
+		} else {
+			/* Size of get arguments */
+			extra_size = get_priv_size(descr->get_args);
+
+			/* Does it fits in iwr ? */
+			if((descr->get_args & IW_PRIV_SIZE_FIXED) &&
+			   (extra_size <= IFNAMSIZ))
+				extra_size = 0;
+		}
+	}
+
+	/* Prepare the call */
+	info.cmd = cmd;
+	info.flags = 0;
+
+	/* Check if we have a pointer to user space data or not. */
+	if(extra_size == 0) {
+		/* No extra arguments. Trivial to handle */
+		ret = handler(dev, &info, &(iwr->u), (char *) &(iwr->u));
+	} else {
+		char *	extra;
+		int	err;
+
+		/* Check what user space is giving us */
+		if(IW_IS_SET(cmd)) {
+			/* Check NULL pointer */
+			if((iwr->u.data.pointer == NULL) &&
+			   (iwr->u.data.length != 0))
+				return -EFAULT;
+
+			/* Does it fits within bounds ? */
+			if(iwr->u.data.length > (descr->set_args &
+						 IW_PRIV_SIZE_MASK))
+				return -E2BIG;
+		} else {
+			/* Check NULL pointer */
+			if(iwr->u.data.pointer == NULL)
+				return -EFAULT;
+		}
+
+#ifdef WE_IOCTL_DEBUG
+		printk(KERN_DEBUG "%s (WE) : Malloc %d bytes\n",
+		       dev->name, extra_size);
+#endif	/* WE_IOCTL_DEBUG */
+
+		/* Always allocate for max space. Easier, and won't last
+		 * long... */
+		extra = kmalloc(extra_size, GFP_KERNEL);
+		if (extra == NULL) {
+			return -ENOMEM;
+		}
+
+		/* If it is a SET, get all the extra data in here */
+		if(IW_IS_SET(cmd) && (iwr->u.data.length != 0)) {
+			err = copy_from_user(extra, iwr->u.data.pointer,
+					     extra_size);
+			if (err) {
+				kfree(extra);
+				return -EFAULT;
+			}
+#ifdef WE_IOCTL_DEBUG
+			printk(KERN_DEBUG "%s (WE) : Got %d elem\n",
+			       dev->name, iwr->u.data.length);
+#endif	/* WE_IOCTL_DEBUG */
+		}
+
+		/* Call the handler */
+		ret = handler(dev, &info, &(iwr->u), extra);
+
+		/* If we have something to return to the user */
+		if (!ret && IW_IS_GET(cmd)) {
+
+			/* Adjust for the actual length if it's variable,
+			 * avoid leaking kernel bits outside. */
+			if (!(descr->get_args & IW_PRIV_SIZE_FIXED)) {
+				extra_size = adjust_priv_size(descr->get_args,
+							      &(iwr->u));
+			}
+
+			err = copy_to_user(iwr->u.data.pointer, extra,
+					   extra_size);
+			if (err)
+				ret =  -EFAULT;				   
+#ifdef WE_IOCTL_DEBUG
+			printk(KERN_DEBUG "%s (WE) : Wrote %d elem\n",
+			       dev->name, iwr->u.data.length);
+#endif	/* WE_IOCTL_DEBUG */
+		}
+
+		/* Cleanup - I told you it wasn't that long ;-) */
+		kfree(extra);
+	}
+
+
+	/* Call commit handler if needed and defined */
+	if(ret == -EIWCOMMIT)
+		ret = call_commit_handler(dev);
+
+	return ret;
+}
+
+/* ---------------------------------------------------------------- */
+/*
+ * Main IOCTl dispatcher. Called from the main networking code
+ * (dev_ioctl() in net/core/dev.c).
+ * Check the type of IOCTL and call the appropriate wrapper...
+ */
+int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd)
+{
+	struct net_device *dev;
+	iw_handler	handler;
+
+	/* Permissions are already checked in dev_ioctl() before calling us.
+	 * The copy_to/from_user() of ifr is also dealt with in there */
+
+	/* Make sure the device exist */
+	if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL)
+		return -ENODEV;
+
+	/* A bunch of special cases, then the generic case...
+	 * Note that 'cmd' is already filtered in dev_ioctl() with
+	 * (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) */
+	switch(cmd) 
+	{
+		case SIOCGIWSTATS:
+			/* Get Wireless Stats */
+			return dev_iwstats(dev, ifr);
+
+		case SIOCGIWPRIV:
+			/* Check if we have some wireless handlers defined */
+			if(dev->wireless_handlers != NULL) {
+				/* We export to user space the definition of
+				 * the private handler ourselves */
+				return ioctl_export_private(dev, ifr);
+			}
+			// ## Fall-through for old API ##
+		default:
+			/* Generic IOCTL */
+			/* Basic check */
+			if (!netif_device_present(dev))
+				return -ENODEV;
+			/* New driver API : try to find the handler */
+			handler = get_handler(dev, cmd);
+			if(handler != NULL) {
+				/* Standard and private are not the same */
+				if(cmd < SIOCIWFIRSTPRIV)
+					return ioctl_standard_call(dev,
+								   ifr,
+								   cmd,
+								   handler);
+				else
+					return ioctl_private_call(dev,
+								  ifr,
+								  cmd,
+								  handler);
+			}
+			/* Old driver API : call driver ioctl handler */
+			if (dev->do_ioctl) {
+				return dev->do_ioctl(dev, ifr, cmd);
+			}
+			return -EOPNOTSUPP;
+	}
+	/* Not reached */
+	return -EINVAL;
+}
+
+/************************* EVENT PROCESSING *************************/
+/*
+ * Process events generated by the wireless layer or the driver.
+ * Most often, the event will be propagated through rtnetlink
+ */
+
+#ifdef WE_EVENT_NETLINK
+/* "rtnl" is defined in net/core/rtnetlink.c, but we need it here.
+ * It is declared in <linux/rtnetlink.h> */
+
+/* ---------------------------------------------------------------- */
+/*
+ * Fill a rtnetlink message with our event data.
+ * Note that we propage only the specified event and don't dump the
+ * current wireless config. Dumping the wireless config is far too
+ * expensive (for each parameter, the driver need to query the hardware).
+ */
+static inline int rtnetlink_fill_iwinfo(struct sk_buff *	skb,
+					struct net_device *	dev,
+					int			type,
+					char *			event,
+					int			event_len)
+{
+	struct ifinfomsg *r;
+	struct nlmsghdr  *nlh;
+	unsigned char	 *b = skb->tail;
+
+	nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(*r));
+	r = NLMSG_DATA(nlh);
+	r->ifi_family = AF_UNSPEC;
+	r->ifi_type = dev->type;
+	r->ifi_index = dev->ifindex;
+	r->ifi_flags = dev->flags;
+	r->ifi_change = 0;	/* Wireless changes don't affect those flags */
+
+	/* Add the wireless events in the netlink packet */
+	RTA_PUT(skb, IFLA_WIRELESS,
+		event_len, event);
+
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+/* ---------------------------------------------------------------- */
+/*
+ * Create and broadcast and send it on the standard rtnetlink socket
+ * This is a pure clone rtmsg_ifinfo() in net/core/rtnetlink.c
+ * Andrzej Krzysztofowicz mandated that I used a IFLA_XXX field
+ * within a RTM_NEWLINK event.
+ */
+static inline void rtmsg_iwinfo(struct net_device *	dev,
+				char *			event,
+				int			event_len)
+{
+	struct sk_buff *skb;
+	int size = NLMSG_GOODSIZE;
+
+	skb = alloc_skb(size, GFP_ATOMIC);
+	if (!skb)
+		return;
+
+	if (rtnetlink_fill_iwinfo(skb, dev, RTM_NEWLINK,
+				  event, event_len) < 0) {
+		kfree_skb(skb);
+		return;
+	}
+	NETLINK_CB(skb).dst_groups = RTMGRP_LINK;
+	netlink_broadcast(rtnl, skb, 0, RTMGRP_LINK, GFP_ATOMIC);
+}
+#endif	/* WE_EVENT_NETLINK */
+
+/* ---------------------------------------------------------------- */
+/*
+ * Main event dispatcher. Called from other parts and drivers.
+ * Send the event on the appropriate channels.
+ * May be called from interrupt context.
+ */
+void wireless_send_event(struct net_device *	dev,
+			 unsigned int		cmd,
+			 union iwreq_data *	wrqu,
+			 char *			extra)
+{
+	const struct iw_ioctl_description *	descr = NULL;
+	int extra_len = 0;
+	struct iw_event  *event;		/* Mallocated whole event */
+	int event_len;				/* Its size */
+	int hdr_len;				/* Size of the event header */
+	/* Don't "optimise" the following variable, it will crash */
+	unsigned	cmd_index;		/* *MUST* be unsigned */
+
+	/* Get the description of the IOCTL */
+	if(cmd <= SIOCIWLAST) {
+		cmd_index = cmd - SIOCIWFIRST;
+		if(cmd_index < standard_ioctl_num)
+			descr = &(standard_ioctl[cmd_index]);
+	} else {
+		cmd_index = cmd - IWEVFIRST;
+		if(cmd_index < standard_event_num)
+			descr = &(standard_event[cmd_index]);
+	}
+	/* Don't accept unknown events */
+	if(descr == NULL) {
+		/* Note : we don't return an error to the driver, because
+		 * the driver would not know what to do about it. It can't
+		 * return an error to the user, because the event is not
+		 * initiated by a user request.
+		 * The best the driver could do is to log an error message.
+		 * We will do it ourselves instead...
+		 */
+	  	printk(KERN_ERR "%s (WE) : Invalid/Unknown Wireless Event (0x%04X)\n",
+		       dev->name, cmd);
+		return;
+	}
+#ifdef WE_EVENT_DEBUG
+	printk(KERN_DEBUG "%s (WE) : Got event 0x%04X\n",
+	       dev->name, cmd);
+	printk(KERN_DEBUG "%s (WE) : Header type : %d, Token type : %d, size : %d, token : %d\n", dev->name, descr->header_type, descr->token_type, descr->token_size, descr->max_tokens);
+#endif	/* WE_EVENT_DEBUG */
+
+	/* Check extra parameters and set extra_len */
+	if(descr->header_type == IW_HEADER_TYPE_POINT) {
+		/* Check if number of token fits within bounds */
+		if(wrqu->data.length > descr->max_tokens) {
+		  	printk(KERN_ERR "%s (WE) : Wireless Event too big (%d)\n", dev->name, wrqu->data.length);
+			return;
+		}
+		if(wrqu->data.length < descr->min_tokens) {
+		  	printk(KERN_ERR "%s (WE) : Wireless Event too small (%d)\n", dev->name, wrqu->data.length);
+			return;
+		}
+		/* Calculate extra_len - extra is NULL for restricted events */
+		if(extra != NULL)
+			extra_len = wrqu->data.length * descr->token_size;
+#ifdef WE_EVENT_DEBUG
+		printk(KERN_DEBUG "%s (WE) : Event 0x%04X, tokens %d, extra_len %d\n", dev->name, cmd, wrqu->data.length, extra_len);
+#endif	/* WE_EVENT_DEBUG */
+	}
+
+	/* Total length of the event */
+	hdr_len = event_type_size[descr->header_type];
+	event_len = hdr_len + extra_len;
+
+#ifdef WE_EVENT_DEBUG
+	printk(KERN_DEBUG "%s (WE) : Event 0x%04X, hdr_len %d, event_len %d\n", dev->name, cmd, hdr_len, event_len);
+#endif	/* WE_EVENT_DEBUG */
+
+	/* Create temporary buffer to hold the event */
+	event = kmalloc(event_len, GFP_ATOMIC);
+	if(event == NULL)
+		return;
+
+	/* Fill event */
+	event->len = event_len;
+	event->cmd = cmd;
+	memcpy(&event->u, wrqu, hdr_len - IW_EV_LCP_LEN);
+	if(extra != NULL)
+		memcpy(((char *) event) + hdr_len, extra, extra_len);
+
+#ifdef WE_EVENT_NETLINK
+	/* rtnetlink event channel */
+	rtmsg_iwinfo(dev, (char *) event, event_len);
+#endif	/* WE_EVENT_NETLINK */
+
+	/* Cleanup */
+	kfree(event);
+
+	return;		/* Always success, I guess ;-) */
+}
+
+/********************** ENHANCED IWSPY SUPPORT **********************/
+/*
+ * In the old days, the driver was handling spy support all by itself.
+ * Now, the driver can delegate this task to Wireless Extensions.
+ * It needs to use those standard spy iw_handler in struct iw_handler_def,
+ * push data to us via wireless_spy_update() and include struct iw_spy_data
+ * in its private part (and advertise it in iw_handler_def->spy_offset).
+ * One of the main advantage of centralising spy support here is that
+ * it becomes much easier to improve and extend it without having to touch
+ * the drivers. One example is the addition of the Spy-Threshold events.
+ */
+
+/* ---------------------------------------------------------------- */
+/*
+ * Return the pointer to the spy data in the driver.
+ * Because this is called on the Rx path via wireless_spy_update(),
+ * we want it to be efficient...
+ */
+static inline struct iw_spy_data * get_spydata(struct net_device *dev)
+{
+	/* This is the new way */
+	if(dev->wireless_data)
+		return(dev->wireless_data->spy_data);
+
+	/* This is the old way. Doesn't work for multi-headed drivers.
+	 * It will be removed in the next version of WE. */
+	return (dev->priv + dev->wireless_handlers->spy_offset);
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Standard Wireless Handler : set Spy List
+ */
+int iw_handler_set_spy(struct net_device *	dev,
+		       struct iw_request_info *	info,
+		       union iwreq_data *	wrqu,
+		       char *			extra)
+{
+	struct iw_spy_data *	spydata = get_spydata(dev);
+	struct sockaddr *	address = (struct sockaddr *) extra;
+
+	if(!dev->wireless_data)
+		/* Help user know that driver needs updating */
+		printk(KERN_DEBUG "%s (WE) : Driver using old/buggy spy support, please fix driver !\n",
+		       dev->name);
+	/* Make sure driver is not buggy or using the old API */
+	if(!spydata)
+		return -EOPNOTSUPP;
+
+	/* Disable spy collection while we copy the addresses.
+	 * While we copy addresses, any call to wireless_spy_update()
+	 * will NOP. This is OK, as anyway the addresses are changing. */
+	spydata->spy_number = 0;
+
+	/* We want to operate without locking, because wireless_spy_update()
+	 * most likely will happen in the interrupt handler, and therefore
+	 * have its own locking constraints and needs performance.
+	 * The rtnl_lock() make sure we don't race with the other iw_handlers.
+	 * This make sure wireless_spy_update() "see" that the spy list
+	 * is temporarily disabled. */
+	wmb();
+
+	/* Are there are addresses to copy? */
+	if(wrqu->data.length > 0) {
+		int i;
+
+		/* Copy addresses */
+		for(i = 0; i < wrqu->data.length; i++)
+			memcpy(spydata->spy_address[i], address[i].sa_data,
+			       ETH_ALEN);
+		/* Reset stats */
+		memset(spydata->spy_stat, 0,
+		       sizeof(struct iw_quality) * IW_MAX_SPY);
+
+#ifdef WE_SPY_DEBUG
+		printk(KERN_DEBUG "iw_handler_set_spy() :  offset %ld, spydata %p, num %d\n", dev->wireless_handlers->spy_offset, spydata, wrqu->data.length);
+		for (i = 0; i < wrqu->data.length; i++)
+			printk(KERN_DEBUG
+			       "%02X:%02X:%02X:%02X:%02X:%02X \n",
+			       spydata->spy_address[i][0],
+			       spydata->spy_address[i][1],
+			       spydata->spy_address[i][2],
+			       spydata->spy_address[i][3],
+			       spydata->spy_address[i][4],
+			       spydata->spy_address[i][5]);
+#endif	/* WE_SPY_DEBUG */
+	}
+
+	/* Make sure above is updated before re-enabling */
+	wmb();
+
+	/* Enable addresses */
+	spydata->spy_number = wrqu->data.length;
+
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Standard Wireless Handler : get Spy List
+ */
+int iw_handler_get_spy(struct net_device *	dev,
+		       struct iw_request_info *	info,
+		       union iwreq_data *	wrqu,
+		       char *			extra)
+{
+	struct iw_spy_data *	spydata = get_spydata(dev);
+	struct sockaddr *	address = (struct sockaddr *) extra;
+	int			i;
+
+	/* Make sure driver is not buggy or using the old API */
+	if(!spydata)
+		return -EOPNOTSUPP;
+
+	wrqu->data.length = spydata->spy_number;
+
+	/* Copy addresses. */
+	for(i = 0; i < spydata->spy_number; i++) 	{
+		memcpy(address[i].sa_data, spydata->spy_address[i], ETH_ALEN);
+		address[i].sa_family = AF_UNIX;
+	}
+	/* Copy stats to the user buffer (just after). */
+	if(spydata->spy_number > 0)
+		memcpy(extra  + (sizeof(struct sockaddr) *spydata->spy_number),
+		       spydata->spy_stat,
+		       sizeof(struct iw_quality) * spydata->spy_number);
+	/* Reset updated flags. */
+	for(i = 0; i < spydata->spy_number; i++)
+		spydata->spy_stat[i].updated = 0;
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Standard Wireless Handler : set spy threshold
+ */
+int iw_handler_set_thrspy(struct net_device *	dev,
+			  struct iw_request_info *info,
+			  union iwreq_data *	wrqu,
+			  char *		extra)
+{
+	struct iw_spy_data *	spydata = get_spydata(dev);
+	struct iw_thrspy *	threshold = (struct iw_thrspy *) extra;
+
+	/* Make sure driver is not buggy or using the old API */
+	if(!spydata)
+		return -EOPNOTSUPP;
+
+	/* Just do it */
+	memcpy(&(spydata->spy_thr_low), &(threshold->low),
+	       2 * sizeof(struct iw_quality));
+
+	/* Clear flag */
+	memset(spydata->spy_thr_under, '\0', sizeof(spydata->spy_thr_under));
+
+#ifdef WE_SPY_DEBUG
+	printk(KERN_DEBUG "iw_handler_set_thrspy() :  low %d ; high %d\n", spydata->spy_thr_low.level, spydata->spy_thr_high.level);
+#endif	/* WE_SPY_DEBUG */
+
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Standard Wireless Handler : get spy threshold
+ */
+int iw_handler_get_thrspy(struct net_device *	dev,
+			  struct iw_request_info *info,
+			  union iwreq_data *	wrqu,
+			  char *		extra)
+{
+	struct iw_spy_data *	spydata = get_spydata(dev);
+	struct iw_thrspy *	threshold = (struct iw_thrspy *) extra;
+
+	/* Make sure driver is not buggy or using the old API */
+	if(!spydata)
+		return -EOPNOTSUPP;
+
+	/* Just do it */
+	memcpy(&(threshold->low), &(spydata->spy_thr_low),
+	       2 * sizeof(struct iw_quality));
+
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Prepare and send a Spy Threshold event
+ */
+static void iw_send_thrspy_event(struct net_device *	dev,
+				 struct iw_spy_data *	spydata,
+				 unsigned char *	address,
+				 struct iw_quality *	wstats)
+{
+	union iwreq_data	wrqu;
+	struct iw_thrspy	threshold;
+
+	/* Init */
+	wrqu.data.length = 1;
+	wrqu.data.flags = 0;
+	/* Copy address */
+	memcpy(threshold.addr.sa_data, address, ETH_ALEN);
+	threshold.addr.sa_family = ARPHRD_ETHER;
+	/* Copy stats */
+	memcpy(&(threshold.qual), wstats, sizeof(struct iw_quality));
+	/* Copy also thresholds */
+	memcpy(&(threshold.low), &(spydata->spy_thr_low),
+	       2 * sizeof(struct iw_quality));
+
+#ifdef WE_SPY_DEBUG
+	printk(KERN_DEBUG "iw_send_thrspy_event() : address %02X:%02X:%02X:%02X:%02X:%02X, level %d, up = %d\n",
+	       threshold.addr.sa_data[0],
+	       threshold.addr.sa_data[1],
+	       threshold.addr.sa_data[2],
+	       threshold.addr.sa_data[3],
+	       threshold.addr.sa_data[4],
+	       threshold.addr.sa_data[5], threshold.qual.level);
+#endif	/* WE_SPY_DEBUG */
+
+	/* Send event to user space */
+	wireless_send_event(dev, SIOCGIWTHRSPY, &wrqu, (char *) &threshold);
+}
+
+/* ---------------------------------------------------------------- */
+/*
+ * Call for the driver to update the spy data.
+ * For now, the spy data is a simple array. As the size of the array is
+ * small, this is good enough. If we wanted to support larger number of
+ * spy addresses, we should use something more efficient...
+ */
+void wireless_spy_update(struct net_device *	dev,
+			 unsigned char *	address,
+			 struct iw_quality *	wstats)
+{
+	struct iw_spy_data *	spydata = get_spydata(dev);
+	int			i;
+	int			match = -1;
+
+	/* Make sure driver is not buggy or using the old API */
+	if(!spydata)
+		return;
+
+#ifdef WE_SPY_DEBUG
+	printk(KERN_DEBUG "wireless_spy_update() :  offset %ld, spydata %p, address %02X:%02X:%02X:%02X:%02X:%02X\n", dev->wireless_handlers->spy_offset, spydata, address[0], address[1], address[2], address[3], address[4], address[5]);
+#endif	/* WE_SPY_DEBUG */
+
+	/* Update all records that match */
+	for(i = 0; i < spydata->spy_number; i++)
+		if(!memcmp(address, spydata->spy_address[i], ETH_ALEN)) {
+			memcpy(&(spydata->spy_stat[i]), wstats,
+			       sizeof(struct iw_quality));
+			match = i;
+		}
+
+	/* Generate an event if we cross the spy threshold.
+	 * To avoid event storms, we have a simple hysteresis : we generate
+	 * event only when we go under the low threshold or above the
+	 * high threshold. */
+	if(match >= 0) {
+		if(spydata->spy_thr_under[match]) {
+			if(wstats->level > spydata->spy_thr_high.level) {
+				spydata->spy_thr_under[match] = 0;
+				iw_send_thrspy_event(dev, spydata,
+						     address, wstats);
+			}
+		} else {
+			if(wstats->level < spydata->spy_thr_low.level) {
+				spydata->spy_thr_under[match] = 1;
+				iw_send_thrspy_event(dev, spydata,
+						     address, wstats);
+			}
+		}
+	}
+}
+
+EXPORT_SYMBOL(iw_handler_get_spy);
+EXPORT_SYMBOL(iw_handler_get_thrspy);
+EXPORT_SYMBOL(iw_handler_set_spy);
+EXPORT_SYMBOL(iw_handler_set_thrspy);
+EXPORT_SYMBOL(wireless_send_event);
+EXPORT_SYMBOL(wireless_spy_update);
diff --git a/net/decnet/Kconfig b/net/decnet/Kconfig
new file mode 100644
index 00000000000..2101da542ba
--- /dev/null
+++ b/net/decnet/Kconfig
@@ -0,0 +1,27 @@
+#
+# DECnet configuration
+#
+config DECNET_ROUTER
+	bool "DECnet: router support (EXPERIMENTAL)"
+	depends on DECNET && EXPERIMENTAL
+	---help---
+	  Add support for turning your DECnet Endnode into a level 1 or 2
+	  router.  This is an experimental, but functional option.  If you
+	  do say Y here, then make sure that you also say Y to "Kernel/User
+	  network link driver", "Routing messages" and "Network packet
+	  filtering".  The first two are required to allow configuration via
+	  rtnetlink (you will need Alexey Kuznetsov's iproute2 package
+	  from <ftp://ftp.tux.org/pub/net/ip-routing/>). The "Network packet
+	  filtering" option will be required for the forthcoming routing daemon
+	  to work.
+
+	  See <file:Documentation/networking/decnet.txt> for more information.
+
+config DECNET_ROUTE_FWMARK
+	bool "DECnet: use FWMARK value as routing key (EXPERIMENTAL)"
+	depends on DECNET_ROUTER && NETFILTER
+	help
+	  If you say Y here, you will be able to specify different routes for
+	  packets with different FWMARK ("firewalling mark") values
+	  (see ipchains(8), "-m" argument).
+
diff --git a/net/decnet/Makefile b/net/decnet/Makefile
new file mode 100644
index 00000000000..e44003af71f
--- /dev/null
+++ b/net/decnet/Makefile
@@ -0,0 +1,10 @@
+
+obj-$(CONFIG_DECNET) += decnet.o
+
+decnet-y := af_decnet.o dn_nsp_in.o dn_nsp_out.o \
+	    dn_route.o dn_dev.o dn_neigh.o dn_timer.o
+decnet-$(CONFIG_DECNET_ROUTER) += dn_fib.o dn_rules.o dn_table.o
+decnet-y += sysctl_net_decnet.o
+
+obj-$(CONFIG_NETFILTER) += netfilter/
+
diff --git a/net/decnet/README b/net/decnet/README
new file mode 100644
index 00000000000..60e7ec88c81
--- /dev/null
+++ b/net/decnet/README
@@ -0,0 +1,8 @@
+                       Linux DECnet Project
+                      ======================
+
+The documentation for this kernel subsystem is available in the
+Documentation/networking subdirectory of this distribution and also
+on line at http://www.chygwyn.com/DECnet/
+
+Steve Whitehouse <SteveW@ACM.org>
diff --git a/net/decnet/TODO b/net/decnet/TODO
new file mode 100644
index 00000000000..ebb5ac69d12
--- /dev/null
+++ b/net/decnet/TODO
@@ -0,0 +1,41 @@
+Steve's quick list of things that need finishing off:
+[they are in no particular order and range from the trivial to the long winded]
+
+ o Proper timeouts on each neighbour (in routing mode) rather than
+   just the 60 second On-Ethernet cache value.
+
+ o Support for X.25 linklayer
+
+ o Support for DDCMP link layer
+
+ o The DDCMP device itself
+
+ o PPP support (rfc1762)
+
+ o Lots of testing with real applications
+
+ o Verify errors etc. against POSIX 1003.1g (draft)
+
+ o Using send/recvmsg() to get at connect/disconnect data (POSIX 1003.1g) 
+   [maybe this should be done at socket level... the control data in the
+    send/recvmsg() calls should simply be a vector of set/getsockopt()
+    calls]
+
+ o check MSG_CTRUNC is set where it should be.
+
+ o Find all the commonality between DECnet and IPv4 routing code and extract 
+   it into a small library of routines. [probably a project for 2.7.xx]
+
+ o Add perfect socket hashing - an idea suggested by Paul Koning. Currently
+   we have a half-way house scheme which seems to work reasonably well, but
+   the full scheme is still worth implementing, its not not top of my list
+   right now.
+
+ o Add session control message flow control
+
+ o Add NSP message flow control
+
+ o DECnet sendpages() function
+
+ o AIO for DECnet
+
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
new file mode 100644
index 00000000000..29bb3cd2196
--- /dev/null
+++ b/net/decnet/af_decnet.c
@@ -0,0 +1,2405 @@
+
+/*
+ * DECnet       An implementation of the DECnet protocol suite for the LINUX
+ *              operating system.  DECnet is implemented using the  BSD Socket
+ *              interface as the means of communication with the user level.
+ *
+ *              DECnet Socket Layer Interface
+ *
+ * Authors:     Eduardo Marcelo Serrat <emserrat@geocities.com>
+ *              Patrick Caulfield <patrick@pandh.demon.co.uk>
+ *
+ * Changes:
+ *        Steve Whitehouse: Copied from Eduardo Serrat and Patrick Caulfield's
+ *                          version of the code. Original copyright preserved
+ *                          below.
+ *        Steve Whitehouse: Some bug fixes, cleaning up some code to make it
+ *                          compatible with my routing layer.
+ *        Steve Whitehouse: Merging changes from Eduardo Serrat and Patrick
+ *                          Caulfield.
+ *        Steve Whitehouse: Further bug fixes, checking module code still works
+ *                          with new routing layer.
+ *        Steve Whitehouse: Additional set/get_sockopt() calls.
+ *        Steve Whitehouse: Fixed TIOCINQ ioctl to be same as Eduardo's new
+ *                          code.
+ *        Steve Whitehouse: recvmsg() changed to try and behave in a POSIX like
+ *                          way. Didn't manage it entirely, but its better.
+ *        Steve Whitehouse: ditto for sendmsg().
+ *        Steve Whitehouse: A selection of bug fixes to various things.
+ *        Steve Whitehouse: Added TIOCOUTQ ioctl.
+ *        Steve Whitehouse: Fixes to username2sockaddr & sockaddr2username.
+ *        Steve Whitehouse: Fixes to connect() error returns.
+ *       Patrick Caulfield: Fixes to delayed acceptance logic.
+ *         David S. Miller: New socket locking
+ *        Steve Whitehouse: Socket list hashing/locking
+ *         Arnaldo C. Melo: use capable, not suser
+ *        Steve Whitehouse: Removed unused code. Fix to use sk->allocation
+ *                          when required.
+ *       Patrick Caulfield: /proc/net/decnet now has object name/number
+ *        Steve Whitehouse: Fixed local port allocation, hashed sk list
+ *          Matthew Wilcox: Fixes for dn_ioctl()
+ *        Steve Whitehouse: New connect/accept logic to allow timeouts and
+ *                          prepare for sendpage etc.
+ */
+
+
+/******************************************************************************
+    (c) 1995-1998 E.M. Serrat		emserrat@geocities.com
+    
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+HISTORY:
+
+Version           Kernel     Date       Author/Comments
+-------           ------     ----       ---------------
+Version 0.0.1     2.0.30    01-dic-97	Eduardo Marcelo Serrat
+					(emserrat@geocities.com)
+
+                                        First Development of DECnet Socket La-
+					yer for Linux. Only supports outgoing
+					connections.
+
+Version 0.0.2	  2.1.105   20-jun-98   Patrick J. Caulfield
+					(patrick@pandh.demon.co.uk)
+
+					Port to new kernel development version.
+
+Version 0.0.3     2.1.106   25-jun-98   Eduardo Marcelo Serrat
+					(emserrat@geocities.com)
+					_
+                                        Added support for incoming connections
+                                        so we can start developing server apps
+                                        on Linux.
+					-
+					Module Support
+Version 0.0.4     2.1.109   21-jul-98   Eduardo Marcelo Serrat
+                                       (emserrat@geocities.com)
+                                       _
+                                        Added support for X11R6.4. Now we can 
+                                        use DECnet transport for X on Linux!!!
+                                       -
+Version 0.0.5    2.1.110   01-aug-98   Eduardo Marcelo Serrat
+                                       (emserrat@geocities.com)
+                                       Removed bugs on flow control
+                                       Removed bugs on incoming accessdata
+                                       order
+                                       -
+Version 0.0.6    2.1.110   07-aug-98   Eduardo Marcelo Serrat
+                                       dn_recvmsg fixes
+
+                                        Patrick J. Caulfield
+                                       dn_bind fixes
+*******************************************************************************/
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/inet.h>
+#include <linux/route.h>
+#include <linux/netfilter.h>
+#include <linux/seq_file.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <net/flow.h>
+#include <asm/system.h>
+#include <asm/ioctls.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <net/neighbour.h>
+#include <net/dst.h>
+#include <net/dn.h>
+#include <net/dn_nsp.h>
+#include <net/dn_dev.h>
+#include <net/dn_route.h>
+#include <net/dn_fib.h>
+#include <net/dn_neigh.h>
+
+struct dn_sock {
+	struct sock sk;
+	struct dn_scp scp;
+};
+
+static void dn_keepalive(struct sock *sk);
+
+#define DN_SK_HASH_SHIFT 8
+#define DN_SK_HASH_SIZE (1 << DN_SK_HASH_SHIFT)
+#define DN_SK_HASH_MASK (DN_SK_HASH_SIZE - 1)
+
+
+static struct proto_ops dn_proto_ops;
+static DEFINE_RWLOCK(dn_hash_lock);
+static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE];
+static struct hlist_head dn_wild_sk;
+
+static int __dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen, int flags);
+static int __dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen, int flags);
+
+static struct hlist_head *dn_find_list(struct sock *sk)
+{
+	struct dn_scp *scp = DN_SK(sk);
+
+	if (scp->addr.sdn_flags & SDF_WILD)
+		return hlist_empty(&dn_wild_sk) ? &dn_wild_sk : NULL;
+
+	return &dn_sk_hash[scp->addrloc & DN_SK_HASH_MASK];
+}
+
+/* 
+ * Valid ports are those greater than zero and not already in use.
+ */
+static int check_port(unsigned short port)
+{
+	struct sock *sk;
+	struct hlist_node *node;
+
+	if (port == 0)
+		return -1;
+
+	sk_for_each(sk, node, &dn_sk_hash[port & DN_SK_HASH_MASK]) {
+		struct dn_scp *scp = DN_SK(sk);
+		if (scp->addrloc == port)
+			return -1;
+	}
+	return 0;
+}
+
+static unsigned short port_alloc(struct sock *sk)
+{
+	struct dn_scp *scp = DN_SK(sk);
+static unsigned short port = 0x2000;
+	unsigned short i_port = port;
+
+	while(check_port(++port) != 0) {
+		if (port == i_port)
+			return 0;
+	}
+
+	scp->addrloc = port;
+
+	return 1;
+}
+
+/*
+ * Since this is only ever called from user
+ * level, we don't need a write_lock() version
+ * of this.
+ */
+static int dn_hash_sock(struct sock *sk)
+{
+	struct dn_scp *scp = DN_SK(sk);
+	struct hlist_head *list;
+	int rv = -EUSERS;
+
+	BUG_ON(sk_hashed(sk));
+
+	write_lock_bh(&dn_hash_lock);
+	
+	if (!scp->addrloc && !port_alloc(sk))
+		goto out;
+
+	rv = -EADDRINUSE;
+	if ((list = dn_find_list(sk)) == NULL)
+		goto out;
+
+	sk_add_node(sk, list);
+	rv = 0;
+out:
+	write_unlock_bh(&dn_hash_lock);
+	return rv;
+}
+
+static void dn_unhash_sock(struct sock *sk)
+{
+	write_lock(&dn_hash_lock);
+	sk_del_node_init(sk);
+	write_unlock(&dn_hash_lock);
+}
+
+static void dn_unhash_sock_bh(struct sock *sk)
+{
+	write_lock_bh(&dn_hash_lock);
+	sk_del_node_init(sk);
+	write_unlock_bh(&dn_hash_lock);
+}
+
+static struct hlist_head *listen_hash(struct sockaddr_dn *addr)
+{
+	int i;
+	unsigned hash = addr->sdn_objnum;
+
+	if (hash == 0) {
+		hash = addr->sdn_objnamel;
+		for(i = 0; i < dn_ntohs(addr->sdn_objnamel); i++) {
+			hash ^= addr->sdn_objname[i];
+			hash ^= (hash << 3);
+		}
+	}
+
+	return &dn_sk_hash[hash & DN_SK_HASH_MASK];
+}
+
+/*
+ * Called to transform a socket from bound (i.e. with a local address)
+ * into a listening socket (doesn't need a local port number) and rehashes
+ * based upon the object name/number.
+ */
+static void dn_rehash_sock(struct sock *sk)
+{
+	struct hlist_head *list;
+	struct dn_scp *scp = DN_SK(sk);
+
+	if (scp->addr.sdn_flags & SDF_WILD)
+		return;
+
+	write_lock_bh(&dn_hash_lock);
+	sk_del_node_init(sk);
+	DN_SK(sk)->addrloc = 0;
+	list = listen_hash(&DN_SK(sk)->addr);
+	sk_add_node(sk, list);
+	write_unlock_bh(&dn_hash_lock);
+}
+
+int dn_sockaddr2username(struct sockaddr_dn *sdn, unsigned char *buf, unsigned char type)
+{
+	int len = 2;
+
+	*buf++ = type;
+
+	switch(type) {
+		case 0:
+			*buf++ = sdn->sdn_objnum;
+			break;
+		case 1:
+			*buf++ = 0;
+			*buf++ = dn_ntohs(sdn->sdn_objnamel);
+			memcpy(buf, sdn->sdn_objname, dn_ntohs(sdn->sdn_objnamel));
+			len = 3 + dn_ntohs(sdn->sdn_objnamel);
+			break;
+		case 2:
+			memset(buf, 0, 5);
+			buf += 5;
+			*buf++ = dn_ntohs(sdn->sdn_objnamel);
+			memcpy(buf, sdn->sdn_objname, dn_ntohs(sdn->sdn_objnamel));
+			len = 7 + dn_ntohs(sdn->sdn_objnamel);
+			break;
+	}
+
+	return len;
+}
+
+/*
+ * On reception of usernames, we handle types 1 and 0 for destination
+ * addresses only. Types 2 and 4 are used for source addresses, but the
+ * UIC, GIC are ignored and they are both treated the same way. Type 3
+ * is never used as I've no idea what its purpose might be or what its
+ * format is.
+ */
+int dn_username2sockaddr(unsigned char *data, int len, struct sockaddr_dn *sdn, unsigned char *fmt)
+{
+	unsigned char type;
+	int size = len;
+	int namel = 12;
+
+	sdn->sdn_objnum = 0;
+	sdn->sdn_objnamel = dn_htons(0);
+	memset(sdn->sdn_objname, 0, DN_MAXOBJL);
+
+	if (len < 2)
+		return -1;
+
+	len -= 2;
+	*fmt = *data++;
+	type = *data++;
+
+	switch(*fmt) {
+		case 0:
+			sdn->sdn_objnum = type;
+			return 2;
+		case 1:
+			namel = 16;
+			break;
+		case 2:
+			len  -= 4;
+			data += 4;
+			break;
+		case 4:
+			len  -= 8;
+			data += 8;
+			break;
+		default:
+			return -1;
+	}
+
+	len -= 1;
+
+	if (len < 0)
+		return -1;
+
+	sdn->sdn_objnamel = dn_htons(*data++);
+	len -= dn_ntohs(sdn->sdn_objnamel);
+
+	if ((len < 0) || (dn_ntohs(sdn->sdn_objnamel) > namel))
+		return -1;
+
+	memcpy(sdn->sdn_objname, data, dn_ntohs(sdn->sdn_objnamel));
+
+	return size - len;
+}
+
+struct sock *dn_sklist_find_listener(struct sockaddr_dn *addr)
+{
+	struct hlist_head *list = listen_hash(addr);
+	struct hlist_node *node;
+	struct sock *sk;
+
+	read_lock(&dn_hash_lock);
+	sk_for_each(sk, node, list) {
+		struct dn_scp *scp = DN_SK(sk);
+		if (sk->sk_state != TCP_LISTEN)
+			continue;
+		if (scp->addr.sdn_objnum) {
+			if (scp->addr.sdn_objnum != addr->sdn_objnum)
+				continue;
+		} else {
+			if (addr->sdn_objnum)
+				continue;
+			if (scp->addr.sdn_objnamel != addr->sdn_objnamel)
+				continue;
+			if (memcmp(scp->addr.sdn_objname, addr->sdn_objname, dn_ntohs(addr->sdn_objnamel)) != 0)
+				continue;
+		}
+		sock_hold(sk);
+		read_unlock(&dn_hash_lock);
+		return sk;
+	}
+
+	sk = sk_head(&dn_wild_sk);
+	if (sk) {
+	       	if (sk->sk_state == TCP_LISTEN)
+			sock_hold(sk);
+		else
+			sk = NULL;
+	}
+
+	read_unlock(&dn_hash_lock);
+	return sk;
+}
+
+struct sock *dn_find_by_skb(struct sk_buff *skb)
+{
+	struct dn_skb_cb *cb = DN_SKB_CB(skb);
+	struct sock *sk;
+	struct hlist_node *node;
+	struct dn_scp *scp;
+
+	read_lock(&dn_hash_lock);
+	sk_for_each(sk, node, &dn_sk_hash[cb->dst_port & DN_SK_HASH_MASK]) {
+		scp = DN_SK(sk);
+		if (cb->src != dn_saddr2dn(&scp->peer))
+			continue;
+		if (cb->dst_port != scp->addrloc)
+			continue;
+		if (scp->addrrem && (cb->src_port != scp->addrrem))
+			continue;
+		sock_hold(sk);
+		goto found;
+	}
+	sk = NULL;
+found:
+	read_unlock(&dn_hash_lock);
+	return sk;
+}
+
+
+
+static void dn_destruct(struct sock *sk)
+{
+	struct dn_scp *scp = DN_SK(sk);
+
+	skb_queue_purge(&scp->data_xmit_queue);
+	skb_queue_purge(&scp->other_xmit_queue);
+	skb_queue_purge(&scp->other_receive_queue);
+
+	dst_release(xchg(&sk->sk_dst_cache, NULL));
+}
+
+static struct proto dn_proto = {
+	.name	  = "DECNET",
+	.owner	  = THIS_MODULE,
+	.obj_size = sizeof(struct dn_sock),
+};
+
+static struct sock *dn_alloc_sock(struct socket *sock, int gfp)
+{
+	struct dn_scp *scp;
+	struct sock *sk = sk_alloc(PF_DECnet, gfp, &dn_proto, 1);
+
+	if  (!sk)
+		goto out;
+
+	if (sock)
+		sock->ops = &dn_proto_ops;
+	sock_init_data(sock, sk);
+
+	sk->sk_backlog_rcv = dn_nsp_backlog_rcv;
+	sk->sk_destruct    = dn_destruct;
+	sk->sk_no_check    = 1;
+	sk->sk_family      = PF_DECnet;
+	sk->sk_protocol    = 0;
+	sk->sk_allocation  = gfp;
+
+	/* Initialization of DECnet Session Control Port		*/
+	scp = DN_SK(sk);
+	scp->state	= DN_O;		/* Open			*/
+	scp->numdat	= 1;		/* Next data seg to tx	*/
+	scp->numoth	= 1;		/* Next oth data to tx  */
+	scp->ackxmt_dat = 0;		/* Last data seg ack'ed */
+	scp->ackxmt_oth = 0;		/* Last oth data ack'ed */
+	scp->ackrcv_dat = 0;		/* Highest data ack recv*/
+	scp->ackrcv_oth = 0;		/* Last oth data ack rec*/
+        scp->flowrem_sw = DN_SEND;
+	scp->flowloc_sw = DN_SEND;
+	scp->flowrem_dat = 0;
+	scp->flowrem_oth = 1;
+	scp->flowloc_dat = 0;
+	scp->flowloc_oth = 1;
+	scp->services_rem = 0;
+	scp->services_loc = 1 | NSP_FC_NONE;
+	scp->info_rem = 0;
+	scp->info_loc = 0x03; /* NSP version 4.1 */
+	scp->segsize_rem = 230 - DN_MAX_NSP_DATA_HEADER; /* Default: Updated by remote segsize */
+	scp->nonagle = 0;
+	scp->multi_ireq = 1;
+	scp->accept_mode = ACC_IMMED;
+	scp->addr.sdn_family    = AF_DECnet;
+	scp->peer.sdn_family    = AF_DECnet;
+	scp->accessdata.acc_accl = 5;
+	memcpy(scp->accessdata.acc_acc, "LINUX", 5);
+
+	scp->max_window   = NSP_MAX_WINDOW;
+	scp->snd_window   = NSP_MIN_WINDOW;
+	scp->nsp_srtt     = NSP_INITIAL_SRTT;
+	scp->nsp_rttvar   = NSP_INITIAL_RTTVAR;
+	scp->nsp_rxtshift = 0;
+
+	skb_queue_head_init(&scp->data_xmit_queue);
+	skb_queue_head_init(&scp->other_xmit_queue);
+	skb_queue_head_init(&scp->other_receive_queue);
+
+	scp->persist = 0;
+	scp->persist_fxn = NULL;
+	scp->keepalive = 10 * HZ;
+	scp->keepalive_fxn = dn_keepalive;
+
+	init_timer(&scp->delack_timer);
+	scp->delack_pending = 0;
+	scp->delack_fxn = dn_nsp_delayed_ack;
+
+	dn_start_slow_timer(sk);
+out:
+	return sk;
+}
+
+/*
+ * Keepalive timer.
+ * FIXME: Should respond to SO_KEEPALIVE etc.
+ */
+static void dn_keepalive(struct sock *sk)
+{
+	struct dn_scp *scp = DN_SK(sk);
+
+	/*
+	 * By checking the other_data transmit queue is empty
+	 * we are double checking that we are not sending too
+	 * many of these keepalive frames.
+	 */
+	if (skb_queue_len(&scp->other_xmit_queue) == 0)
+		dn_nsp_send_link(sk, DN_NOCHANGE, 0);
+}
+
+
+/*
+ * Timer for shutdown/destroyed sockets.
+ * When socket is dead & no packets have been sent for a
+ * certain amount of time, they are removed by this
+ * routine. Also takes care of sending out DI & DC
+ * frames at correct times.
+ */
+int dn_destroy_timer(struct sock *sk)
+{
+	struct dn_scp *scp = DN_SK(sk);
+
+	scp->persist = dn_nsp_persist(sk);
+
+	switch(scp->state) {
+		case DN_DI:
+			dn_nsp_send_disc(sk, NSP_DISCINIT, 0, GFP_ATOMIC);
+			if (scp->nsp_rxtshift >= decnet_di_count)
+				scp->state = DN_CN;
+			return 0;
+
+		case DN_DR:
+			dn_nsp_send_disc(sk, NSP_DISCINIT, 0, GFP_ATOMIC);
+			if (scp->nsp_rxtshift >= decnet_dr_count)
+				scp->state = DN_DRC;
+			return 0;
+
+		case DN_DN:
+			if (scp->nsp_rxtshift < decnet_dn_count) {
+				/* printk(KERN_DEBUG "dn_destroy_timer: DN\n"); */
+				dn_nsp_send_disc(sk, NSP_DISCCONF, NSP_REASON_DC, GFP_ATOMIC);
+				return 0;
+			}
+	}
+
+	scp->persist = (HZ * decnet_time_wait);
+
+	if (sk->sk_socket)
+		return 0;
+
+	if ((jiffies - scp->stamp) >= (HZ * decnet_time_wait)) {
+		dn_unhash_sock(sk);
+		sock_put(sk);
+		return 1;
+	}
+
+	return 0;
+}
+
+static void dn_destroy_sock(struct sock *sk)
+{
+	struct dn_scp *scp = DN_SK(sk);
+
+	scp->nsp_rxtshift = 0; /* reset back off */
+
+	if (sk->sk_socket) {
+		if (sk->sk_socket->state != SS_UNCONNECTED)
+			sk->sk_socket->state = SS_DISCONNECTING;
+	}
+
+	sk->sk_state = TCP_CLOSE;
+
+	switch(scp->state) {
+		case DN_DN:
+			dn_nsp_send_disc(sk, NSP_DISCCONF, NSP_REASON_DC,
+					 sk->sk_allocation);
+			scp->persist_fxn = dn_destroy_timer;
+			scp->persist = dn_nsp_persist(sk);
+			break;
+		case DN_CR:
+			scp->state = DN_DR;
+			goto disc_reject;
+		case DN_RUN:
+			scp->state = DN_DI;
+		case DN_DI:
+		case DN_DR:
+disc_reject:
+			dn_nsp_send_disc(sk, NSP_DISCINIT, 0, sk->sk_allocation);
+		case DN_NC:
+		case DN_NR:
+		case DN_RJ:
+		case DN_DIC:
+		case DN_CN:
+		case DN_DRC:
+		case DN_CI:
+		case DN_CD:
+			scp->persist_fxn = dn_destroy_timer;
+			scp->persist = dn_nsp_persist(sk);
+			break;
+		default:
+			printk(KERN_DEBUG "DECnet: dn_destroy_sock passed socket in invalid state\n");
+		case DN_O:
+			dn_stop_slow_timer(sk);
+
+			dn_unhash_sock_bh(sk);
+			sock_put(sk);
+
+			break;
+	}
+}
+
+char *dn_addr2asc(dn_address addr, char *buf)
+{
+	unsigned short node, area;
+
+	node = addr & 0x03ff;
+	area = addr >> 10;
+	sprintf(buf, "%hd.%hd", area, node);
+
+	return buf;
+}
+
+
+
+static int dn_create(struct socket *sock, int protocol)
+{
+	struct sock *sk;
+
+	switch(sock->type) {
+		case SOCK_SEQPACKET:
+			if (protocol != DNPROTO_NSP)
+				return -EPROTONOSUPPORT;
+			break;
+		case SOCK_STREAM:
+			break;
+		default:
+			return -ESOCKTNOSUPPORT;
+	}
+
+
+	if ((sk = dn_alloc_sock(sock, GFP_KERNEL)) == NULL) 
+		return -ENOBUFS;
+
+	sk->sk_protocol = protocol;
+
+	return 0;
+}
+
+
+static int
+dn_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+
+	if (sk) {
+		sock_orphan(sk);
+		sock_hold(sk);
+		lock_sock(sk);
+		dn_destroy_sock(sk);
+		release_sock(sk);
+		sock_put(sk);
+	}
+
+        return 0;
+}
+
+static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+{
+	struct sock *sk = sock->sk;
+	struct dn_scp *scp = DN_SK(sk);
+	struct sockaddr_dn *saddr = (struct sockaddr_dn *)uaddr;
+	struct net_device *dev;
+	int rv;
+
+	if (addr_len != sizeof(struct sockaddr_dn))
+		return -EINVAL;
+
+	if (saddr->sdn_family != AF_DECnet)
+		return -EINVAL;
+
+	if (dn_ntohs(saddr->sdn_nodeaddrl) && (dn_ntohs(saddr->sdn_nodeaddrl) != 2))
+		return -EINVAL;
+
+	if (dn_ntohs(saddr->sdn_objnamel) > DN_MAXOBJL)
+		return -EINVAL;
+
+	if (saddr->sdn_flags & ~SDF_WILD)
+		return -EINVAL;
+
+#if 1
+	if (!capable(CAP_NET_BIND_SERVICE) && (saddr->sdn_objnum ||
+	    (saddr->sdn_flags & SDF_WILD)))
+		return -EACCES;
+#else
+	/*
+	 * Maybe put the default actions in the default security ops for
+	 * dn_prot_sock ? Would be nice if the capable call would go there
+	 * too.
+	 */
+	if (security_dn_prot_sock(saddr) &&
+	    !capable(CAP_NET_BIND_SERVICE) || 
+	    saddr->sdn_objnum || (saddr->sdn_flags & SDF_WILD))
+		return -EACCES;
+#endif
+
+
+	if (!(saddr->sdn_flags & SDF_WILD)) {
+		if (dn_ntohs(saddr->sdn_nodeaddrl)) {
+			read_lock(&dev_base_lock);
+			for(dev = dev_base; dev; dev = dev->next) {
+				if (!dev->dn_ptr)
+					continue;
+				if (dn_dev_islocal(dev, dn_saddr2dn(saddr)))
+					break;
+			}
+			read_unlock(&dev_base_lock);
+			if (dev == NULL)
+				return -EADDRNOTAVAIL;
+		}
+	}
+
+	rv = -EINVAL;
+	lock_sock(sk);
+	if (sock_flag(sk, SOCK_ZAPPED)) {
+		memcpy(&scp->addr, saddr, addr_len);
+		sock_reset_flag(sk, SOCK_ZAPPED);
+
+		rv = dn_hash_sock(sk);
+		if (rv)
+			sock_set_flag(sk, SOCK_ZAPPED);
+	}
+	release_sock(sk);
+
+        return rv;
+}
+
+
+static int dn_auto_bind(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	struct dn_scp *scp = DN_SK(sk);
+	int rv;
+
+	sock_reset_flag(sk, SOCK_ZAPPED);
+
+	scp->addr.sdn_flags  = 0;
+	scp->addr.sdn_objnum = 0;
+
+	/*
+	 * This stuff is to keep compatibility with Eduardo's
+	 * patch. I hope I can dispense with it shortly...
+	 */
+	if ((scp->accessdata.acc_accl != 0) &&
+		(scp->accessdata.acc_accl <= 12)) {
+	
+		scp->addr.sdn_objnamel = dn_htons(scp->accessdata.acc_accl);
+		memcpy(scp->addr.sdn_objname, scp->accessdata.acc_acc, dn_ntohs(scp->addr.sdn_objnamel));
+
+		scp->accessdata.acc_accl = 0;
+		memset(scp->accessdata.acc_acc, 0, 40);
+	}
+	/* End of compatibility stuff */
+
+	scp->addr.sdn_add.a_len = dn_htons(2);
+	rv = dn_dev_bind_default((dn_address *)scp->addr.sdn_add.a_addr);
+	if (rv == 0) {
+		rv = dn_hash_sock(sk);
+		if (rv)
+			sock_set_flag(sk, SOCK_ZAPPED);
+	}
+
+	return rv;
+}
+
+static int dn_confirm_accept(struct sock *sk, long *timeo, int allocation)
+{
+	struct dn_scp *scp = DN_SK(sk);
+	DEFINE_WAIT(wait);
+	int err;
+
+	if (scp->state != DN_CR)
+		return -EINVAL;
+
+	scp->state = DN_CC;
+	scp->segsize_loc = dst_metric(__sk_dst_get(sk), RTAX_ADVMSS);
+	dn_send_conn_conf(sk, allocation);
+
+	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	for(;;) {
+		release_sock(sk);
+		if (scp->state == DN_CC)
+			*timeo = schedule_timeout(*timeo);
+		lock_sock(sk);
+		err = 0;
+		if (scp->state == DN_RUN)
+			break;
+		err = sock_error(sk);
+		if (err)
+			break;
+		err = sock_intr_errno(*timeo);
+		if (signal_pending(current))
+			break;
+		err = -EAGAIN;
+		if (!*timeo)
+			break;
+		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	}
+	finish_wait(sk->sk_sleep, &wait);
+	if (err == 0) {
+		sk->sk_socket->state = SS_CONNECTED;
+	} else if (scp->state != DN_CC) {
+		sk->sk_socket->state = SS_UNCONNECTED;
+	}
+	return err;
+}
+
+static int dn_wait_run(struct sock *sk, long *timeo)
+{
+	struct dn_scp *scp = DN_SK(sk);
+	DEFINE_WAIT(wait);
+	int err = 0;
+
+	if (scp->state == DN_RUN)
+		goto out;
+
+	if (!*timeo)
+		return -EALREADY;
+
+	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	for(;;) {
+		release_sock(sk);
+		if (scp->state == DN_CI || scp->state == DN_CC)
+			*timeo = schedule_timeout(*timeo);
+		lock_sock(sk);
+		err = 0;
+		if (scp->state == DN_RUN)
+			break;
+		err = sock_error(sk);
+		if (err)
+			break;
+		err = sock_intr_errno(*timeo);
+		if (signal_pending(current))
+			break;
+		err = -ETIMEDOUT;
+		if (!*timeo)
+			break;
+		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	}
+	finish_wait(sk->sk_sleep, &wait);
+out:
+	if (err == 0) {
+		sk->sk_socket->state = SS_CONNECTED;
+	} else if (scp->state != DN_CI && scp->state != DN_CC) {
+		sk->sk_socket->state = SS_UNCONNECTED;
+	}
+	return err;
+}
+
+static int __dn_connect(struct sock *sk, struct sockaddr_dn *addr, int addrlen, long *timeo, int flags)
+{
+	struct socket *sock = sk->sk_socket;
+	struct dn_scp *scp = DN_SK(sk);
+	int err = -EISCONN;
+	struct flowi fl;
+
+	if (sock->state == SS_CONNECTED)
+		goto out;
+
+	if (sock->state == SS_CONNECTING) {
+		err = 0;
+		if (scp->state == DN_RUN) {
+			sock->state = SS_CONNECTED;
+			goto out;
+		}
+		err = -ECONNREFUSED;
+		if (scp->state != DN_CI && scp->state != DN_CC) {
+			sock->state = SS_UNCONNECTED;
+			goto out;
+		}
+		return dn_wait_run(sk, timeo);
+	}
+
+	err = -EINVAL;
+	if (scp->state != DN_O)
+		goto out;
+
+	if (addr == NULL || addrlen != sizeof(struct sockaddr_dn))
+		goto out;
+	if (addr->sdn_family != AF_DECnet)
+		goto out;
+	if (addr->sdn_flags & SDF_WILD)
+		goto out;
+
+	if (sock_flag(sk, SOCK_ZAPPED)) {
+		err = dn_auto_bind(sk->sk_socket);
+		if (err)
+			goto out;
+	}
+
+	memcpy(&scp->peer, addr, sizeof(struct sockaddr_dn));
+
+	err = -EHOSTUNREACH;
+	memset(&fl, 0, sizeof(fl));
+	fl.oif = sk->sk_bound_dev_if;
+	fl.fld_dst = dn_saddr2dn(&scp->peer);
+	fl.fld_src = dn_saddr2dn(&scp->addr);
+	dn_sk_ports_copy(&fl, scp);
+	fl.proto = DNPROTO_NSP;
+	if (dn_route_output_sock(&sk->sk_dst_cache, &fl, sk, flags) < 0)
+		goto out;
+	sk->sk_route_caps = sk->sk_dst_cache->dev->features;
+	sock->state = SS_CONNECTING;
+	scp->state = DN_CI;
+	scp->segsize_loc = dst_metric(sk->sk_dst_cache, RTAX_ADVMSS);
+
+	dn_nsp_send_conninit(sk, NSP_CI);
+	err = -EINPROGRESS;
+	if (*timeo) {
+		err = dn_wait_run(sk, timeo);
+	}
+out:
+	return err;
+}
+
+static int dn_connect(struct socket *sock, struct sockaddr *uaddr, int addrlen, int flags)
+{
+	struct sockaddr_dn *addr = (struct sockaddr_dn *)uaddr;
+	struct sock *sk = sock->sk;
+	int err;
+	long timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
+
+	lock_sock(sk);
+	err = __dn_connect(sk, addr, addrlen, &timeo, 0);
+	release_sock(sk);
+
+	return err;
+}
+
+static inline int dn_check_state(struct sock *sk, struct sockaddr_dn *addr, int addrlen, long *timeo, int flags)
+{
+	struct dn_scp *scp = DN_SK(sk);
+
+	switch(scp->state) {
+		case DN_RUN:
+			return 0;
+		case DN_CR:
+			return dn_confirm_accept(sk, timeo, sk->sk_allocation);
+		case DN_CI:
+		case DN_CC:
+			return dn_wait_run(sk, timeo);
+		case DN_O:
+			return __dn_connect(sk, addr, addrlen, timeo, flags);
+	}
+
+	return -EINVAL;
+}
+
+
+static void dn_access_copy(struct sk_buff *skb, struct accessdata_dn *acc)
+{
+        unsigned char *ptr = skb->data;
+
+        acc->acc_userl = *ptr++;
+        memcpy(&acc->acc_user, ptr, acc->acc_userl);
+        ptr += acc->acc_userl;
+
+        acc->acc_passl = *ptr++;
+        memcpy(&acc->acc_pass, ptr, acc->acc_passl);
+        ptr += acc->acc_passl;
+
+        acc->acc_accl = *ptr++;
+        memcpy(&acc->acc_acc, ptr, acc->acc_accl);
+
+        skb_pull(skb, acc->acc_accl + acc->acc_passl + acc->acc_userl + 3);
+
+}
+
+static void dn_user_copy(struct sk_buff *skb, struct optdata_dn *opt)
+{
+        unsigned char *ptr = skb->data;
+        
+        opt->opt_optl   = *ptr++;
+        opt->opt_status = 0;
+        memcpy(opt->opt_data, ptr, opt->opt_optl);
+        skb_pull(skb, opt->opt_optl + 1);
+
+}
+
+static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo)
+{
+	DEFINE_WAIT(wait);
+	struct sk_buff *skb = NULL;
+	int err = 0;
+
+	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	for(;;) {
+		release_sock(sk);
+		skb = skb_dequeue(&sk->sk_receive_queue);
+		if (skb == NULL) {
+			*timeo = schedule_timeout(*timeo);
+			skb = skb_dequeue(&sk->sk_receive_queue);
+		}
+		lock_sock(sk);
+		if (skb != NULL)
+			break;
+		err = -EINVAL;
+		if (sk->sk_state != TCP_LISTEN)
+			break;
+		err = sock_intr_errno(*timeo);
+		if (signal_pending(current))
+			break;
+		err = -EAGAIN;
+		if (!*timeo)
+			break;
+		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	}
+	finish_wait(sk->sk_sleep, &wait);
+
+	return skb == NULL ? ERR_PTR(err) : skb;
+}
+
+static int dn_accept(struct socket *sock, struct socket *newsock, int flags)
+{
+	struct sock *sk = sock->sk, *newsk;
+	struct sk_buff *skb = NULL;
+	struct dn_skb_cb *cb;
+	unsigned char menuver;
+	int err = 0;
+	unsigned char type;
+	long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+
+	lock_sock(sk);
+
+        if (sk->sk_state != TCP_LISTEN || DN_SK(sk)->state != DN_O) {
+		release_sock(sk);
+		return -EINVAL;
+	}
+
+	skb = skb_dequeue(&sk->sk_receive_queue);
+	if (skb == NULL) {
+		skb = dn_wait_for_connect(sk, &timeo);
+		if (IS_ERR(skb)) {
+			release_sock(sk);
+			return PTR_ERR(skb);
+		}
+	}
+
+	cb = DN_SKB_CB(skb);
+	sk->sk_ack_backlog--;
+	newsk = dn_alloc_sock(newsock, sk->sk_allocation);
+	if (newsk == NULL) {
+		release_sock(sk);
+		kfree_skb(skb);
+		return -ENOBUFS;
+	}
+	release_sock(sk);
+
+	dst_release(xchg(&newsk->sk_dst_cache, skb->dst));
+	skb->dst = NULL;
+
+        DN_SK(newsk)->state        = DN_CR;
+	DN_SK(newsk)->addrrem      = cb->src_port;
+	DN_SK(newsk)->services_rem = cb->services;
+	DN_SK(newsk)->info_rem     = cb->info;
+	DN_SK(newsk)->segsize_rem  = cb->segsize;
+	DN_SK(newsk)->accept_mode  = DN_SK(sk)->accept_mode;
+	
+	if (DN_SK(newsk)->segsize_rem < 230)
+		DN_SK(newsk)->segsize_rem = 230;
+
+	if ((DN_SK(newsk)->services_rem & NSP_FC_MASK) == NSP_FC_NONE)
+		DN_SK(newsk)->max_window = decnet_no_fc_max_cwnd;
+
+	newsk->sk_state  = TCP_LISTEN;
+	memcpy(&(DN_SK(newsk)->addr), &(DN_SK(sk)->addr), sizeof(struct sockaddr_dn));
+
+	/*
+	 * If we are listening on a wild socket, we don't want
+	 * the newly created socket on the wrong hash queue.
+	 */
+	DN_SK(newsk)->addr.sdn_flags &= ~SDF_WILD;
+
+	skb_pull(skb, dn_username2sockaddr(skb->data, skb->len, &(DN_SK(newsk)->addr), &type));
+	skb_pull(skb, dn_username2sockaddr(skb->data, skb->len, &(DN_SK(newsk)->peer), &type));
+	*(dn_address *)(DN_SK(newsk)->peer.sdn_add.a_addr) = cb->src;
+	*(dn_address *)(DN_SK(newsk)->addr.sdn_add.a_addr) = cb->dst;
+
+	menuver = *skb->data;
+	skb_pull(skb, 1);
+
+	if (menuver & DN_MENUVER_ACC)
+		dn_access_copy(skb, &(DN_SK(newsk)->accessdata));
+
+	if (menuver & DN_MENUVER_USR)
+		dn_user_copy(skb, &(DN_SK(newsk)->conndata_in));
+
+	if (menuver & DN_MENUVER_PRX)
+		DN_SK(newsk)->peer.sdn_flags |= SDF_PROXY;
+
+	if (menuver & DN_MENUVER_UIC)
+		DN_SK(newsk)->peer.sdn_flags |= SDF_UICPROXY;
+
+	kfree_skb(skb);
+
+	memcpy(&(DN_SK(newsk)->conndata_out), &(DN_SK(sk)->conndata_out),
+		sizeof(struct optdata_dn));
+	memcpy(&(DN_SK(newsk)->discdata_out), &(DN_SK(sk)->discdata_out),
+		sizeof(struct optdata_dn));
+
+	lock_sock(newsk);
+	err = dn_hash_sock(newsk);
+	if (err == 0) {
+		sock_reset_flag(newsk, SOCK_ZAPPED);
+		dn_send_conn_ack(newsk);
+
+		/*
+	 	 * Here we use sk->sk_allocation since although the conn conf is
+	 	 * for the newsk, the context is the old socket.
+	 	 */
+		if (DN_SK(newsk)->accept_mode == ACC_IMMED)
+			err = dn_confirm_accept(newsk, &timeo,
+						sk->sk_allocation);
+	}
+	release_sock(newsk);
+        return err;
+}
+
+
+static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int *uaddr_len,int peer)
+{
+	struct sockaddr_dn *sa = (struct sockaddr_dn *)uaddr;
+	struct sock *sk = sock->sk;
+	struct dn_scp *scp = DN_SK(sk);
+
+	*uaddr_len = sizeof(struct sockaddr_dn);
+
+	lock_sock(sk);
+
+	if (peer) {
+		if ((sock->state != SS_CONNECTED && 
+		     sock->state != SS_CONNECTING) && 
+		    scp->accept_mode == ACC_IMMED)
+			return -ENOTCONN;
+
+		memcpy(sa, &scp->peer, sizeof(struct sockaddr_dn));
+	} else {
+		memcpy(sa, &scp->addr, sizeof(struct sockaddr_dn));
+	}
+
+	release_sock(sk);
+
+        return 0;
+}
+
+
+static unsigned int dn_poll(struct file *file, struct socket *sock, poll_table  *wait)
+{
+	struct sock *sk = sock->sk;
+	struct dn_scp *scp = DN_SK(sk);
+	int mask = datagram_poll(file, sock, wait);
+
+	if (skb_queue_len(&scp->other_receive_queue))
+		mask |= POLLRDBAND;
+
+	return mask;
+}
+
+static int dn_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	struct sock *sk = sock->sk;
+	struct dn_scp *scp = DN_SK(sk);
+	int err = -EOPNOTSUPP;
+	long amount = 0;
+	struct sk_buff *skb;
+	int val;
+
+	switch(cmd)
+	{
+	case SIOCGIFADDR:
+	case SIOCSIFADDR:
+		return dn_dev_ioctl(cmd, (void __user *)arg);
+
+	case SIOCATMARK:
+		lock_sock(sk);
+		val = (skb_queue_len(&scp->other_receive_queue) != 0);
+		if (scp->state != DN_RUN)
+			val = -ENOTCONN;
+		release_sock(sk);
+		return val;
+
+	case TIOCOUTQ:
+		amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc);
+		if (amount < 0)
+			amount = 0;
+		err = put_user(amount, (int __user *)arg);
+		break;
+
+	case TIOCINQ:
+		lock_sock(sk);
+		if ((skb = skb_peek(&scp->other_receive_queue)) != NULL) {
+			amount = skb->len;
+		} else {
+			struct sk_buff *skb = sk->sk_receive_queue.next;
+			for(;;) {
+				if (skb ==
+				    (struct sk_buff *)&sk->sk_receive_queue)
+					break;
+				amount += skb->len;
+				skb = skb->next;
+			}
+		}
+		release_sock(sk);
+		err = put_user(amount, (int __user *)arg);
+		break;
+
+	default:
+		err = dev_ioctl(cmd, (void __user *)arg);
+		break;
+	}
+
+	return err;
+}
+
+static int dn_listen(struct socket *sock, int backlog)
+{
+	struct sock *sk = sock->sk;
+	int err = -EINVAL;
+
+	lock_sock(sk);
+
+	if (sock_flag(sk, SOCK_ZAPPED))
+		goto out;
+
+	if ((DN_SK(sk)->state != DN_O) || (sk->sk_state == TCP_LISTEN))
+		goto out;
+
+	sk->sk_max_ack_backlog = backlog;
+	sk->sk_ack_backlog     = 0;
+	sk->sk_state           = TCP_LISTEN;
+	err                 = 0;
+	dn_rehash_sock(sk);
+
+out:
+	release_sock(sk);
+
+        return err;
+}
+
+
+static int dn_shutdown(struct socket *sock, int how)
+{
+	struct sock *sk = sock->sk;
+	struct dn_scp *scp = DN_SK(sk);
+	int err = -ENOTCONN;
+
+	lock_sock(sk);
+
+	if (sock->state == SS_UNCONNECTED)
+		goto out;
+
+	err = 0;
+	if (sock->state == SS_DISCONNECTING)
+		goto out;
+
+	err = -EINVAL;
+	if (scp->state == DN_O)
+		goto out;
+
+	if (how != SHUTDOWN_MASK)
+		goto out;
+
+	sk->sk_shutdown = how;
+	dn_destroy_sock(sk);
+	err = 0;
+
+out:
+	release_sock(sk);
+
+	return err;
+}
+
+static int dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
+{
+	struct sock *sk = sock->sk;
+	int err;
+
+	lock_sock(sk);
+	err = __dn_setsockopt(sock, level, optname, optval, optlen, 0);
+	release_sock(sk);
+
+	return err;
+}
+
+static int __dn_setsockopt(struct socket *sock, int level,int optname, char __user *optval, int optlen, int flags) 
+{
+	struct	sock *sk = sock->sk;
+	struct dn_scp *scp = DN_SK(sk);
+	long timeo;
+	union {
+		struct optdata_dn opt;
+		struct accessdata_dn acc;
+		int mode;
+		unsigned long win;
+		int val;
+		unsigned char services;
+		unsigned char info;
+	} u;
+	int err;
+
+	if (optlen && !optval)
+		return -EINVAL;
+
+	if (optlen > sizeof(u))
+		return -EINVAL;
+
+	if (copy_from_user(&u, optval, optlen))
+		return -EFAULT;
+
+	switch(optname) {
+		case DSO_CONDATA:
+			if (sock->state == SS_CONNECTED) 
+				return -EISCONN;
+			if ((scp->state != DN_O) && (scp->state != DN_CR))
+				return -EINVAL;
+
+			if (optlen != sizeof(struct optdata_dn))
+				return -EINVAL;
+
+			if (u.opt.opt_optl > 16)
+				return -EINVAL;
+
+			memcpy(&scp->conndata_out, &u.opt, optlen);
+			break;
+
+		case DSO_DISDATA:
+	   	        if (sock->state != SS_CONNECTED && scp->accept_mode == ACC_IMMED)
+				return -ENOTCONN;
+
+			if (optlen != sizeof(struct optdata_dn))
+				return -EINVAL;
+
+			if (u.opt.opt_optl > 16)
+				return -EINVAL;
+
+			memcpy(&scp->discdata_out, &u.opt, optlen);
+			break;
+
+		case DSO_CONACCESS:
+			if (sock->state == SS_CONNECTED) 
+				return -EISCONN;
+			if (scp->state != DN_O)
+				return -EINVAL;
+
+			if (optlen != sizeof(struct accessdata_dn))
+				return -EINVAL;
+
+			if ((u.acc.acc_accl > DN_MAXACCL) ||
+					(u.acc.acc_passl > DN_MAXACCL) ||
+					(u.acc.acc_userl > DN_MAXACCL))
+				return -EINVAL;
+
+			memcpy(&scp->accessdata, &u.acc, optlen);
+			break;
+
+		case DSO_ACCEPTMODE:
+			if (sock->state == SS_CONNECTED)
+				return -EISCONN;
+			if (scp->state != DN_O)
+				return -EINVAL;
+
+			if (optlen != sizeof(int))
+				return -EINVAL;
+
+			if ((u.mode != ACC_IMMED) && (u.mode != ACC_DEFER))
+				return -EINVAL;
+
+			scp->accept_mode = (unsigned char)u.mode;
+			break;
+
+		case DSO_CONACCEPT:
+
+			if (scp->state != DN_CR)
+				return -EINVAL;
+			timeo = sock_rcvtimeo(sk, 0);
+			err = dn_confirm_accept(sk, &timeo, sk->sk_allocation);
+			return err;
+
+		case DSO_CONREJECT:
+
+			if (scp->state != DN_CR)
+				return -EINVAL;
+
+			scp->state = DN_DR;
+			sk->sk_shutdown = SHUTDOWN_MASK;
+			dn_nsp_send_disc(sk, 0x38, 0, sk->sk_allocation);
+			break;
+
+		default:
+#ifdef CONFIG_NETFILTER
+		return nf_setsockopt(sk, PF_DECnet, optname, optval, optlen);
+#endif
+		case DSO_LINKINFO:
+		case DSO_STREAM:
+		case DSO_SEQPACKET:
+			return -ENOPROTOOPT;
+
+		case DSO_MAXWINDOW:
+			if (optlen != sizeof(unsigned long))
+				return -EINVAL;
+			if (u.win > NSP_MAX_WINDOW)
+				u.win = NSP_MAX_WINDOW;
+			if (u.win == 0)
+				return -EINVAL;
+			scp->max_window = u.win;
+			if (scp->snd_window > u.win)
+				scp->snd_window = u.win;
+			break;
+
+		case DSO_NODELAY:
+			if (optlen != sizeof(int))
+				return -EINVAL;
+			if (scp->nonagle == 2)
+				return -EINVAL;
+			scp->nonagle = (u.val == 0) ? 0 : 1;
+			/* if (scp->nonagle == 1) { Push pending frames } */
+			break;
+
+		case DSO_CORK:
+			if (optlen != sizeof(int))
+				return -EINVAL;
+			if (scp->nonagle == 1)
+				return -EINVAL;
+			scp->nonagle = (u.val == 0) ? 0 : 2;
+			/* if (scp->nonagle == 0) { Push pending frames } */
+			break;
+
+		case DSO_SERVICES:
+			if (optlen != sizeof(unsigned char))
+				return -EINVAL;
+			if ((u.services & ~NSP_FC_MASK) != 0x01)
+				return -EINVAL;
+			if ((u.services & NSP_FC_MASK) == NSP_FC_MASK)
+				return -EINVAL;
+			scp->services_loc = u.services;
+			break;
+
+		case DSO_INFO:
+			if (optlen != sizeof(unsigned char))
+				return -EINVAL;
+			if (u.info & 0xfc)
+				return -EINVAL;
+			scp->info_loc = u.info;
+			break;
+	}
+
+	return 0;
+}
+
+static int dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen)
+{
+	struct sock *sk = sock->sk;
+	int err;
+
+	lock_sock(sk);
+	err = __dn_getsockopt(sock, level, optname, optval, optlen, 0);
+	release_sock(sk);
+
+	return err;
+}
+
+static int __dn_getsockopt(struct socket *sock, int level,int optname, char __user *optval,int __user *optlen, int flags)
+{
+	struct	sock *sk = sock->sk;
+	struct dn_scp *scp = DN_SK(sk);
+	struct linkinfo_dn link;
+	unsigned int r_len;
+	void *r_data = NULL;
+	unsigned int val;
+
+	if(get_user(r_len , optlen))
+		return -EFAULT;
+		
+	switch(optname) {
+		case DSO_CONDATA:
+			if (r_len > sizeof(struct optdata_dn))
+				r_len = sizeof(struct optdata_dn);
+			r_data = &scp->conndata_in;
+			break;
+
+		case DSO_DISDATA:
+			if (r_len > sizeof(struct optdata_dn))
+				r_len = sizeof(struct optdata_dn);
+			r_data = &scp->discdata_in;
+			break;
+
+		case DSO_CONACCESS:
+			if (r_len > sizeof(struct accessdata_dn))
+				r_len = sizeof(struct accessdata_dn);
+			r_data = &scp->accessdata;
+			break;
+
+		case DSO_ACCEPTMODE:
+			if (r_len > sizeof(unsigned char))
+				r_len = sizeof(unsigned char);
+			r_data = &scp->accept_mode;
+			break;
+
+		case DSO_LINKINFO:
+			if (r_len > sizeof(struct linkinfo_dn))
+				r_len = sizeof(struct linkinfo_dn);
+
+			switch(sock->state) {
+				case SS_CONNECTING:
+					link.idn_linkstate = LL_CONNECTING;
+					break;
+				case SS_DISCONNECTING:
+					link.idn_linkstate = LL_DISCONNECTING;
+					break;
+				case SS_CONNECTED:
+					link.idn_linkstate = LL_RUNNING;
+					break;
+				default:
+					link.idn_linkstate = LL_INACTIVE;
+			}
+
+			link.idn_segsize = scp->segsize_rem;
+			r_data = &link;
+			break;
+
+		default:
+#ifdef CONFIG_NETFILTER
+		{
+			int val, len;
+			
+			if(get_user(len, optlen))
+				return -EFAULT;
+			
+			val = nf_getsockopt(sk, PF_DECnet, optname, 
+							optval, &len);
+			if (val >= 0)
+				val = put_user(len, optlen);
+			return val;
+		}
+#endif
+		case DSO_STREAM:
+		case DSO_SEQPACKET:
+		case DSO_CONACCEPT:
+		case DSO_CONREJECT:
+        		return -ENOPROTOOPT;
+
+		case DSO_MAXWINDOW:
+			if (r_len > sizeof(unsigned long))
+				r_len = sizeof(unsigned long);
+			r_data = &scp->max_window;
+			break;
+
+		case DSO_NODELAY:
+			if (r_len > sizeof(int))
+				r_len = sizeof(int);
+			val = (scp->nonagle == 1);
+			r_data = &val;
+			break;
+
+		case DSO_CORK:
+			if (r_len > sizeof(int))
+				r_len = sizeof(int);
+			val = (scp->nonagle == 2);
+			r_data = &val;
+			break;
+
+		case DSO_SERVICES:
+			if (r_len > sizeof(unsigned char))
+				r_len = sizeof(unsigned char);
+			r_data = &scp->services_rem;
+			break;
+
+		case DSO_INFO:
+			if (r_len > sizeof(unsigned char))
+				r_len = sizeof(unsigned char);
+			r_data = &scp->info_rem;
+			break;
+	}
+
+	if (r_data) {
+		if (copy_to_user(optval, r_data, r_len))
+			return -EFAULT;
+		if (put_user(r_len, optlen))
+			return -EFAULT;
+	}
+
+	return 0;
+}
+
+
+static int dn_data_ready(struct sock *sk, struct sk_buff_head *q, int flags, int target)
+{
+	struct sk_buff *skb = q->next;
+	int len = 0;
+
+	if (flags & MSG_OOB)
+		return skb_queue_len(q) ? 1 : 0;
+
+	while(skb != (struct sk_buff *)q) {
+		struct dn_skb_cb *cb = DN_SKB_CB(skb);
+		len += skb->len;
+
+		if (cb->nsp_flags & 0x40) {
+			/* SOCK_SEQPACKET reads to EOM */
+			if (sk->sk_type == SOCK_SEQPACKET)
+				return 1;
+			/* so does SOCK_STREAM unless WAITALL is specified */
+			if (!(flags & MSG_WAITALL))
+				return 1;
+		}
+
+		/* minimum data length for read exceeded */
+		if (len >= target)
+			return 1;
+
+		skb = skb->next;
+	}
+
+	return 0;
+}
+
+
+static int dn_recvmsg(struct kiocb *iocb, struct socket *sock,
+	struct msghdr *msg, size_t size, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct dn_scp *scp = DN_SK(sk);
+	struct sk_buff_head *queue = &sk->sk_receive_queue;
+	size_t target = size > 1 ? 1 : 0;
+	size_t copied = 0;
+	int rv = 0;
+	struct sk_buff *skb, *nskb;
+	struct dn_skb_cb *cb = NULL;
+	unsigned char eor = 0;
+	long timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+
+	lock_sock(sk);
+
+	if (sock_flag(sk, SOCK_ZAPPED)) {
+		rv = -EADDRNOTAVAIL;
+		goto out;
+	}
+
+	rv = dn_check_state(sk, NULL, 0, &timeo, flags);
+	if (rv)
+		goto out;
+
+	if (sk->sk_shutdown & RCV_SHUTDOWN) {
+		if (!(flags & MSG_NOSIGNAL))
+			send_sig(SIGPIPE, current, 0);
+		rv = -EPIPE;
+		goto out;
+	}
+
+	if (flags & ~(MSG_PEEK|MSG_OOB|MSG_WAITALL|MSG_DONTWAIT|MSG_NOSIGNAL)) {
+		rv = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (flags & MSG_OOB)
+		queue = &scp->other_receive_queue;
+
+	if (flags & MSG_WAITALL)
+		target = size;
+
+
+	/*
+	 * See if there is data ready to read, sleep if there isn't
+	 */
+	for(;;) {
+		if (sk->sk_err)
+			goto out;
+
+		if (skb_queue_len(&scp->other_receive_queue)) {
+			if (!(flags & MSG_OOB)) {
+				msg->msg_flags |= MSG_OOB;
+				if (!scp->other_report) {
+					scp->other_report = 1;
+					goto out;
+				}
+			}
+		}
+		
+		if (scp->state != DN_RUN)
+			goto out;
+
+		if (signal_pending(current)) {
+			rv = sock_intr_errno(timeo);
+			goto out;
+		}
+
+		if (dn_data_ready(sk, queue, flags, target))
+			break;
+
+		if (flags & MSG_DONTWAIT) {
+			rv = -EWOULDBLOCK;
+			goto out;
+		}
+
+		set_bit(SOCK_ASYNC_WAITDATA, &sock->flags);
+		SOCK_SLEEP_PRE(sk)
+
+		if (!dn_data_ready(sk, queue, flags, target))
+			schedule();
+
+		SOCK_SLEEP_POST(sk)
+		clear_bit(SOCK_ASYNC_WAITDATA, &sock->flags);
+	}
+
+	for(skb = queue->next; skb != (struct sk_buff *)queue; skb = nskb) {
+		unsigned int chunk = skb->len;
+		cb = DN_SKB_CB(skb);
+
+		if ((chunk + copied) > size)
+			chunk = size - copied;
+
+		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
+			rv = -EFAULT;
+			break;
+		}
+		copied += chunk;
+
+		if (!(flags & MSG_PEEK))
+			skb_pull(skb, chunk);
+
+		eor = cb->nsp_flags & 0x40;
+		nskb = skb->next;
+
+		if (skb->len == 0) {
+			skb_unlink(skb);
+			kfree_skb(skb);
+			/* 
+			 * N.B. Don't refer to skb or cb after this point
+			 * in loop.
+			 */
+			if ((scp->flowloc_sw == DN_DONTSEND) && !dn_congested(sk)) {
+				scp->flowloc_sw = DN_SEND;
+				dn_nsp_send_link(sk, DN_SEND, 0);
+			}
+		}
+
+		if (eor) { 
+			if (sk->sk_type == SOCK_SEQPACKET)
+				break;
+			if (!(flags & MSG_WAITALL))
+				break;
+		}
+
+		if (flags & MSG_OOB)
+			break;
+
+		if (copied >= target)
+			break;
+	}
+
+	rv = copied;
+
+
+	if (eor && (sk->sk_type == SOCK_SEQPACKET))
+		msg->msg_flags |= MSG_EOR;
+
+out:
+	if (rv == 0)
+		rv = (flags & MSG_PEEK) ? -sk->sk_err : sock_error(sk);
+
+	if ((rv >= 0) && msg->msg_name) {
+		memcpy(msg->msg_name, &scp->peer, sizeof(struct sockaddr_dn));
+		msg->msg_namelen = sizeof(struct sockaddr_dn);
+	}
+
+	release_sock(sk);
+
+	return rv;
+}
+
+
+static inline int dn_queue_too_long(struct dn_scp *scp, struct sk_buff_head *queue, int flags)
+{
+	unsigned char fctype = scp->services_rem & NSP_FC_MASK;
+	if (skb_queue_len(queue) >= scp->snd_window)
+		return 1;
+	if (fctype != NSP_FC_NONE) {
+		if (flags & MSG_OOB) {
+			if (scp->flowrem_oth == 0)
+				return 1;
+		} else {
+			if (scp->flowrem_dat == 0)
+				return 1;
+		}
+	}
+	return 0;
+}
+
+/*
+ * The DECnet spec requires the the "routing layer" accepts packets which
+ * are at least 230 bytes in size. This excludes any headers which the NSP
+ * layer might add, so we always assume that we'll be using the maximal
+ * length header on data packets. The variation in length is due to the
+ * inclusion (or not) of the two 16 bit acknowledgement fields so it doesn't
+ * make much practical difference.
+ */
+unsigned dn_mss_from_pmtu(struct net_device *dev, int mtu)
+{
+	unsigned mss = 230 - DN_MAX_NSP_DATA_HEADER;
+	if (dev) {
+		struct dn_dev *dn_db = dev->dn_ptr;
+		mtu -= LL_RESERVED_SPACE(dev);
+		if (dn_db->use_long)
+			mtu -= 21;
+		else
+			mtu -= 6;
+		mtu -= DN_MAX_NSP_DATA_HEADER;
+	} else {
+		/*
+		 * 21 = long header, 16 = guess at MAC header length
+		 */
+		mtu -= (21 + DN_MAX_NSP_DATA_HEADER + 16);
+	}
+	if (mtu > mss)
+		mss = mtu;
+	return mss;
+}
+
+static inline unsigned int dn_current_mss(struct sock *sk, int flags)
+{
+	struct dst_entry *dst = __sk_dst_get(sk);
+	struct dn_scp *scp = DN_SK(sk);
+	int mss_now = min_t(int, scp->segsize_loc, scp->segsize_rem);
+
+	/* Other data messages are limited to 16 bytes per packet */
+	if (flags & MSG_OOB)
+		return 16;
+
+	/* This works out the maximum size of segment we can send out */
+	if (dst) {
+		u32 mtu = dst_mtu(dst);
+		mss_now = min_t(int, dn_mss_from_pmtu(dst->dev, mtu), mss_now);
+	}
+
+	return mss_now;
+}
+
+static int dn_error(struct sock *sk, int flags, int err)
+{
+	if (err == -EPIPE)
+		err = sock_error(sk) ? : -EPIPE;
+	if (err == -EPIPE && !(flags & MSG_NOSIGNAL))
+		send_sig(SIGPIPE, current, 0);
+	return err;
+}
+
+static int dn_sendmsg(struct kiocb *iocb, struct socket *sock,
+	   struct msghdr *msg, size_t size)
+{
+	struct sock *sk = sock->sk;
+	struct dn_scp *scp = DN_SK(sk);
+	size_t mss;
+	struct sk_buff_head *queue = &scp->data_xmit_queue;
+	int flags = msg->msg_flags;
+	int err = 0;
+	size_t sent = 0;
+	int addr_len = msg->msg_namelen;
+	struct sockaddr_dn *addr = (struct sockaddr_dn *)msg->msg_name;
+	struct sk_buff *skb = NULL;
+	struct dn_skb_cb *cb;
+	size_t len;
+	unsigned char fctype;
+	long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
+
+	if (flags & ~(MSG_TRYHARD|MSG_OOB|MSG_DONTWAIT|MSG_EOR|MSG_NOSIGNAL|MSG_MORE|MSG_CMSG_COMPAT))
+		return -EOPNOTSUPP;
+
+	if (addr_len && (addr_len != sizeof(struct sockaddr_dn)))
+		return -EINVAL;
+
+	/*
+	 * The only difference between stream sockets and sequenced packet
+	 * sockets is that the stream sockets always behave as if MSG_EOR
+	 * has been set.
+	 */
+	if (sock->type == SOCK_STREAM) {
+		if (flags & MSG_EOR)
+			return -EINVAL;
+		flags |= MSG_EOR;
+	}
+
+	lock_sock(sk);
+
+	err = dn_check_state(sk, addr, addr_len, &timeo, flags);
+	if (err)
+		goto out_err;
+
+	if (sk->sk_shutdown & SEND_SHUTDOWN) {
+		err = -EPIPE;
+		goto out_err;
+	}
+
+	if ((flags & MSG_TRYHARD) && sk->sk_dst_cache)
+		dst_negative_advice(&sk->sk_dst_cache);
+
+	mss = scp->segsize_rem;
+	fctype = scp->services_rem & NSP_FC_MASK;
+
+	mss = dn_current_mss(sk, flags);
+
+	if (flags & MSG_OOB) {
+		queue = &scp->other_xmit_queue;
+		if (size > mss) {
+			err = -EMSGSIZE;
+			goto out;
+		}
+	}
+
+	scp->persist_fxn = dn_nsp_xmit_timeout;
+
+	while(sent < size) {
+		err = sock_error(sk);
+		if (err)
+			goto out;
+
+		if (signal_pending(current)) {
+			err = sock_intr_errno(timeo);
+			goto out;
+		}
+
+		/*
+		 * Calculate size that we wish to send.
+		 */
+		len = size - sent;
+
+		if (len > mss)
+			len = mss;
+
+		/*
+		 * Wait for queue size to go down below the window
+		 * size.
+		 */
+		if (dn_queue_too_long(scp, queue, flags)) {
+			if (flags & MSG_DONTWAIT) {
+				err = -EWOULDBLOCK;
+				goto out;
+			}
+
+			SOCK_SLEEP_PRE(sk)
+
+			if (dn_queue_too_long(scp, queue, flags))
+				schedule();
+
+			SOCK_SLEEP_POST(sk)
+
+			continue;
+		}
+
+		/*
+		 * Get a suitably sized skb.
+		 */
+		skb = dn_alloc_send_skb(sk, &len, flags & MSG_DONTWAIT, timeo, &err);
+
+		if (err)
+			break;
+
+		if (!skb)
+			continue;
+
+		cb = DN_SKB_CB(skb);
+
+		skb_reserve(skb, DN_MAX_NSP_DATA_HEADER);
+
+		if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
+			err = -EFAULT;
+			goto out;
+		}
+
+		if (flags & MSG_OOB) {
+			cb->nsp_flags = 0x30;
+			if (fctype != NSP_FC_NONE)
+				scp->flowrem_oth--;
+		} else {
+			cb->nsp_flags = 0x00;
+			if (scp->seg_total == 0)
+				cb->nsp_flags |= 0x20;
+
+			scp->seg_total += len;
+		
+			if (((sent + len) == size) && (flags & MSG_EOR)) {
+				cb->nsp_flags |= 0x40;
+				scp->seg_total = 0;
+				if (fctype == NSP_FC_SCMC)
+					scp->flowrem_dat--;
+			}
+			if (fctype == NSP_FC_SRC)
+				scp->flowrem_dat--;
+		}
+
+		sent += len;
+		dn_nsp_queue_xmit(sk, skb, sk->sk_allocation, flags & MSG_OOB);
+		skb = NULL;
+
+		scp->persist = dn_nsp_persist(sk);
+
+	}
+out:
+
+	if (skb)
+		kfree_skb(skb);
+
+	release_sock(sk);
+
+	return sent ? sent : err;
+
+out_err:
+	err = dn_error(sk, flags, err);
+	release_sock(sk);
+	return err;
+}
+
+static int dn_device_event(struct notifier_block *this, unsigned long event,
+			void *ptr)
+{
+	struct net_device *dev = (struct net_device *)ptr;
+
+	switch(event) {
+		case NETDEV_UP:
+			dn_dev_up(dev);
+			break;
+		case NETDEV_DOWN:
+			dn_dev_down(dev);
+			break;
+		default:
+			break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block dn_dev_notifier = {
+	.notifier_call = dn_device_event,
+};
+
+extern int dn_route_rcv(struct sk_buff *, struct net_device *, struct packet_type *);
+
+static struct packet_type dn_dix_packet_type = {
+	.type =		__constant_htons(ETH_P_DNA_RT),
+	.dev =		NULL,		/* All devices */
+	.func =		dn_route_rcv,
+};
+
+#ifdef CONFIG_PROC_FS
+struct dn_iter_state {
+	int bucket;
+};
+
+static struct sock *dn_socket_get_first(struct seq_file *seq)
+{
+	struct dn_iter_state *state = seq->private;
+	struct sock *n = NULL;
+
+	for(state->bucket = 0;
+	    state->bucket < DN_SK_HASH_SIZE;
+	    ++state->bucket) {
+		n = sk_head(&dn_sk_hash[state->bucket]);
+		if (n)
+			break;
+	}
+
+	return n;
+}
+
+static struct sock *dn_socket_get_next(struct seq_file *seq,
+				       struct sock *n)
+{
+	struct dn_iter_state *state = seq->private;
+
+	n = sk_next(n);
+try_again:
+	if (n)
+		goto out;
+	if (++state->bucket >= DN_SK_HASH_SIZE)
+		goto out;
+	n = sk_head(&dn_sk_hash[state->bucket]);
+	goto try_again;
+out:
+	return n;
+}
+
+static struct sock *socket_get_idx(struct seq_file *seq, loff_t *pos)
+{
+	struct sock *sk = dn_socket_get_first(seq);
+
+	if (sk) {
+		while(*pos && (sk = dn_socket_get_next(seq, sk)))
+			--*pos;
+	}
+	return *pos ? NULL : sk;
+}
+
+static void *dn_socket_get_idx(struct seq_file *seq, loff_t pos)
+{
+	void *rc;
+	read_lock_bh(&dn_hash_lock);
+	rc = socket_get_idx(seq, &pos);
+	if (!rc) {
+		read_unlock_bh(&dn_hash_lock);
+	}
+	return rc;
+}
+
+static void *dn_socket_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	return *pos ? dn_socket_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+}
+
+static void *dn_socket_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	void *rc;
+
+	if (v == SEQ_START_TOKEN) {
+		rc = dn_socket_get_idx(seq, 0);
+		goto out;
+	}
+
+	rc = dn_socket_get_next(seq, v);
+	if (rc)
+		goto out;
+	read_unlock_bh(&dn_hash_lock);
+out:
+	++*pos;
+	return rc;
+}
+
+static void dn_socket_seq_stop(struct seq_file *seq, void *v)
+{
+	if (v && v != SEQ_START_TOKEN)
+		read_unlock_bh(&dn_hash_lock);
+}
+
+#define IS_NOT_PRINTABLE(x) ((x) < 32 || (x) > 126)
+
+static void dn_printable_object(struct sockaddr_dn *dn, unsigned char *buf)
+{
+	int i;
+    
+	switch (dn_ntohs(dn->sdn_objnamel)) {
+		case 0:
+			sprintf(buf, "%d", dn->sdn_objnum);
+			break;
+		default:
+			for (i = 0; i < dn_ntohs(dn->sdn_objnamel); i++) {
+				buf[i] = dn->sdn_objname[i];
+				if (IS_NOT_PRINTABLE(buf[i]))
+					buf[i] = '.';
+			}
+			buf[i] = 0;
+    	}
+}
+
+static char *dn_state2asc(unsigned char state)
+{
+	switch(state) {
+		case DN_O:
+			return "OPEN";
+		case DN_CR:
+			return "  CR";
+		case DN_DR:
+			return "  DR";
+		case DN_DRC:
+			return " DRC";
+		case DN_CC:
+			return "  CC";
+		case DN_CI:
+			return "  CI";
+		case DN_NR:
+			return "  NR";
+		case DN_NC:
+			return "  NC";
+		case DN_CD:
+			return "  CD";
+		case DN_RJ:
+			return "  RJ";
+		case DN_RUN:
+			return " RUN";
+		case DN_DI:
+			return "  DI";
+		case DN_DIC:
+			return " DIC";
+		case DN_DN:
+			return "  DN";
+		case DN_CL:
+			return "  CL";
+		case DN_CN:
+			return "  CN";
+	}
+
+	return "????";
+}
+
+static inline void dn_socket_format_entry(struct seq_file *seq, struct sock *sk)
+{
+	struct dn_scp *scp = DN_SK(sk);
+	char buf1[DN_ASCBUF_LEN];
+	char buf2[DN_ASCBUF_LEN];
+	char local_object[DN_MAXOBJL+3];
+	char remote_object[DN_MAXOBJL+3];
+
+	dn_printable_object(&scp->addr, local_object);
+	dn_printable_object(&scp->peer, remote_object);
+
+	seq_printf(seq,
+		   "%6s/%04X %04d:%04d %04d:%04d %01d %-16s "
+		   "%6s/%04X %04d:%04d %04d:%04d %01d %-16s %4s %s\n",
+		   dn_addr2asc(dn_ntohs(dn_saddr2dn(&scp->addr)), buf1),
+		   scp->addrloc,
+		   scp->numdat,
+		   scp->numoth,
+		   scp->ackxmt_dat,
+		   scp->ackxmt_oth,
+		   scp->flowloc_sw,
+		   local_object,
+		   dn_addr2asc(dn_ntohs(dn_saddr2dn(&scp->peer)), buf2),
+		   scp->addrrem,
+		   scp->numdat_rcv,
+		   scp->numoth_rcv,
+		   scp->ackrcv_dat,
+		   scp->ackrcv_oth,
+		   scp->flowrem_sw,
+		   remote_object,
+		   dn_state2asc(scp->state),
+		   ((scp->accept_mode == ACC_IMMED) ? "IMMED" : "DEFER"));
+}
+
+static int dn_socket_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(seq, "Local                                              Remote\n");
+	} else {
+		dn_socket_format_entry(seq, v);
+	}
+	return 0;
+}
+
+static struct seq_operations dn_socket_seq_ops = {
+	.start	= dn_socket_seq_start,
+	.next	= dn_socket_seq_next,
+	.stop	= dn_socket_seq_stop,
+	.show	= dn_socket_seq_show,
+};
+
+static int dn_socket_seq_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	int rc = -ENOMEM;
+	struct dn_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+
+	if (!s)
+		goto out;
+
+	rc = seq_open(file, &dn_socket_seq_ops);
+	if (rc)
+		goto out_kfree;
+
+	seq		= file->private_data;
+	seq->private	= s;
+	memset(s, 0, sizeof(*s));
+out:
+	return rc;
+out_kfree:
+	kfree(s);
+	goto out;
+}
+
+static struct file_operations dn_socket_seq_fops = {
+	.owner		= THIS_MODULE,
+	.open		= dn_socket_seq_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release_private,
+};
+#endif
+
+static struct net_proto_family	dn_family_ops = {
+	.family =	AF_DECnet,
+	.create =	dn_create,
+	.owner	=	THIS_MODULE,
+};
+
+static struct proto_ops dn_proto_ops = {
+	.family =	AF_DECnet,
+	.owner =	THIS_MODULE,
+	.release =	dn_release,
+	.bind =		dn_bind,
+	.connect =	dn_connect,
+	.socketpair =	sock_no_socketpair,
+	.accept =	dn_accept,
+	.getname =	dn_getname,
+	.poll =		dn_poll,
+	.ioctl =	dn_ioctl,
+	.listen =	dn_listen,
+	.shutdown =	dn_shutdown,
+	.setsockopt =	dn_setsockopt,
+	.getsockopt =	dn_getsockopt,
+	.sendmsg =	dn_sendmsg,
+	.recvmsg =	dn_recvmsg,
+	.mmap =		sock_no_mmap,
+	.sendpage =	sock_no_sendpage,
+};
+
+void dn_register_sysctl(void);
+void dn_unregister_sysctl(void);
+
+MODULE_DESCRIPTION("The Linux DECnet Network Protocol");
+MODULE_AUTHOR("Linux DECnet Project Team");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETPROTO(PF_DECnet);
+
+static char banner[] __initdata = KERN_INFO "NET4: DECnet for Linux: V.2.5.68s (C) 1995-2003 Linux DECnet Project Team\n";
+
+static int __init decnet_init(void)
+{
+	int rc;
+
+        printk(banner);
+
+	rc = proto_register(&dn_proto, 1);
+	if (rc != 0)
+		goto out;
+
+	dn_neigh_init();
+	dn_dev_init();
+	dn_route_init();
+	dn_fib_init();
+
+	sock_register(&dn_family_ops);
+	dev_add_pack(&dn_dix_packet_type);
+	register_netdevice_notifier(&dn_dev_notifier);
+
+	proc_net_fops_create("decnet", S_IRUGO, &dn_socket_seq_fops);
+	dn_register_sysctl();
+out:
+	return rc;
+
+}
+module_init(decnet_init);
+
+/*
+ * Prevent DECnet module unloading until its fixed properly.
+ * Requires an audit of the code to check for memory leaks and
+ * initialisation problems etc.
+ */
+#if 0
+static void __exit decnet_exit(void)
+{
+	sock_unregister(AF_DECnet);
+	dev_remove_pack(&dn_dix_packet_type);
+
+	dn_unregister_sysctl();
+
+	unregister_netdevice_notifier(&dn_dev_notifier);
+
+	dn_route_cleanup();
+	dn_dev_cleanup();
+	dn_neigh_cleanup();
+	dn_fib_cleanup();
+
+	proc_net_remove("decnet");
+
+	proto_unregister(&dn_proto);
+}
+module_exit(decnet_exit);
+#endif
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
new file mode 100644
index 00000000000..c2a0346f423
--- /dev/null
+++ b/net/decnet/dn_dev.c
@@ -0,0 +1,1481 @@
+/*
+ * DECnet       An implementation of the DECnet protocol suite for the LINUX
+ *              operating system.  DECnet is implemented using the  BSD Socket
+ *              interface as the means of communication with the user level.
+ *
+ *              DECnet Device Layer
+ *
+ * Authors:     Steve Whitehouse <SteveW@ACM.org>
+ *              Eduardo Marcelo Serrat <emserrat@geocities.com>
+ *
+ * Changes:
+ *          Steve Whitehouse : Devices now see incoming frames so they
+ *                             can mark on who it came from.
+ *          Steve Whitehouse : Fixed bug in creating neighbours. Each neighbour
+ *                             can now have a device specific setup func.
+ *          Steve Whitehouse : Added /proc/sys/net/decnet/conf/<dev>/
+ *          Steve Whitehouse : Fixed bug which sometimes killed timer
+ *          Steve Whitehouse : Multiple ifaddr support
+ *          Steve Whitehouse : SIOCGIFCONF is now a compile time option
+ *          Steve Whitehouse : /proc/sys/net/decnet/conf/<sys>/forwarding
+ *          Steve Whitehouse : Removed timer1 - it's a user space issue now
+ *         Patrick Caulfield : Fixed router hello message format
+ *          Steve Whitehouse : Got rid of constant sizes for blksize for
+ *                             devices. All mtu based now.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/if_arp.h>
+#include <linux/if_ether.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/sysctl.h>
+#include <linux/notifier.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <net/neighbour.h>
+#include <net/dst.h>
+#include <net/flow.h>
+#include <net/dn.h>
+#include <net/dn_dev.h>
+#include <net/dn_route.h>
+#include <net/dn_neigh.h>
+#include <net/dn_fib.h>
+
+#define DN_IFREQ_SIZE (sizeof(struct ifreq) - sizeof(struct sockaddr) + sizeof(struct sockaddr_dn))
+
+static char dn_rt_all_end_mcast[ETH_ALEN] = {0xAB,0x00,0x00,0x04,0x00,0x00};
+static char dn_rt_all_rt_mcast[ETH_ALEN]  = {0xAB,0x00,0x00,0x03,0x00,0x00};
+static char dn_hiord[ETH_ALEN]            = {0xAA,0x00,0x04,0x00,0x00,0x00};
+static unsigned char dn_eco_version[3]    = {0x02,0x00,0x00};
+
+extern struct neigh_table dn_neigh_table;
+
+/*
+ * decnet_address is kept in network order.
+ */
+dn_address decnet_address = 0;
+
+static DEFINE_RWLOCK(dndev_lock);
+static struct net_device *decnet_default_device;
+static struct notifier_block *dnaddr_chain;
+
+static struct dn_dev *dn_dev_create(struct net_device *dev, int *err);
+static void dn_dev_delete(struct net_device *dev);
+static void rtmsg_ifa(int event, struct dn_ifaddr *ifa);
+
+static int dn_eth_up(struct net_device *);
+static void dn_eth_down(struct net_device *);
+static void dn_send_brd_hello(struct net_device *dev, struct dn_ifaddr *ifa);
+static void dn_send_ptp_hello(struct net_device *dev, struct dn_ifaddr *ifa);
+
+static struct dn_dev_parms dn_dev_list[] =  {
+{
+	.type =		ARPHRD_ETHER, /* Ethernet */
+	.mode =		DN_DEV_BCAST,
+	.state =	DN_DEV_S_RU,
+	.t2 =		1,
+	.t3 =		10,
+	.name =		"ethernet",
+	.ctl_name =	NET_DECNET_CONF_ETHER,
+	.up =		dn_eth_up,
+	.down = 	dn_eth_down,
+	.timer3 =	dn_send_brd_hello,
+},
+{
+	.type =		ARPHRD_IPGRE, /* DECnet tunneled over GRE in IP */
+	.mode =		DN_DEV_BCAST,
+	.state =	DN_DEV_S_RU,
+	.t2 =		1,
+	.t3 =		10,
+	.name =		"ipgre",
+	.ctl_name =	NET_DECNET_CONF_GRE,
+	.timer3 =	dn_send_brd_hello,
+},
+#if 0
+{
+	.type =		ARPHRD_X25, /* Bog standard X.25 */
+	.mode =		DN_DEV_UCAST,
+	.state =	DN_DEV_S_DS,
+	.t2 =		1,
+	.t3 =		120,
+	.name =		"x25",
+	.ctl_name =	NET_DECNET_CONF_X25,
+	.timer3 =	dn_send_ptp_hello,
+},
+#endif
+#if 0
+{
+	.type =		ARPHRD_PPP, /* DECnet over PPP */
+	.mode =		DN_DEV_BCAST,
+	.state =	DN_DEV_S_RU,
+	.t2 =		1,
+	.t3 =		10,
+	.name =		"ppp",
+	.ctl_name =	NET_DECNET_CONF_PPP,
+	.timer3 =	dn_send_brd_hello,
+},
+#endif
+{
+	.type =		ARPHRD_DDCMP, /* DECnet over DDCMP */
+	.mode =		DN_DEV_UCAST,
+	.state =	DN_DEV_S_DS,
+	.t2 =		1,
+	.t3 =		120,
+	.name =		"ddcmp",
+	.ctl_name =	NET_DECNET_CONF_DDCMP,
+	.timer3 =	dn_send_ptp_hello,
+},
+{
+	.type =		ARPHRD_LOOPBACK, /* Loopback interface - always last */
+	.mode =		DN_DEV_BCAST,
+	.state =	DN_DEV_S_RU,
+	.t2 =		1,
+	.t3 =		10,
+	.name =		"loopback",
+	.ctl_name =	NET_DECNET_CONF_LOOPBACK,
+	.timer3 =	dn_send_brd_hello,
+}
+};
+
+#define DN_DEV_LIST_SIZE (sizeof(dn_dev_list)/sizeof(struct dn_dev_parms))
+
+#define DN_DEV_PARMS_OFFSET(x) ((int) ((char *) &((struct dn_dev_parms *)0)->x))
+
+#ifdef CONFIG_SYSCTL
+
+static int min_t2[] = { 1 };
+static int max_t2[] = { 60 }; /* No max specified, but this seems sensible */
+static int min_t3[] = { 1 };
+static int max_t3[] = { 8191 }; /* Must fit in 16 bits when multiplied by BCT3MULT or T3MULT */
+
+static int min_priority[1];
+static int max_priority[] = { 127 }; /* From DECnet spec */
+
+static int dn_forwarding_proc(ctl_table *, int, struct file *,
+			void __user *, size_t *, loff_t *);
+static int dn_forwarding_sysctl(ctl_table *table, int __user *name, int nlen,
+			void __user *oldval, size_t __user *oldlenp,
+			void __user *newval, size_t newlen,
+			void **context);
+
+static struct dn_dev_sysctl_table {
+	struct ctl_table_header *sysctl_header;
+	ctl_table dn_dev_vars[5];
+	ctl_table dn_dev_dev[2];
+	ctl_table dn_dev_conf_dir[2];
+	ctl_table dn_dev_proto_dir[2];
+	ctl_table dn_dev_root_dir[2];
+} dn_dev_sysctl = {
+	NULL,
+	{
+	{
+		.ctl_name = NET_DECNET_CONF_DEV_FORWARDING,
+		.procname = "forwarding",
+		.data = (void *)DN_DEV_PARMS_OFFSET(forwarding),
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = dn_forwarding_proc,
+		.strategy = dn_forwarding_sysctl,
+	},
+	{
+		.ctl_name = NET_DECNET_CONF_DEV_PRIORITY,
+		.procname = "priority",
+		.data = (void *)DN_DEV_PARMS_OFFSET(priority),
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = proc_dointvec_minmax,
+		.strategy = sysctl_intvec,
+		.extra1 = &min_priority,
+		.extra2 = &max_priority
+	},
+	{
+		.ctl_name = NET_DECNET_CONF_DEV_T2,
+		.procname = "t2",
+		.data = (void *)DN_DEV_PARMS_OFFSET(t2),
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = proc_dointvec_minmax,
+		.strategy = sysctl_intvec,
+		.extra1 = &min_t2,
+		.extra2 = &max_t2
+	},
+	{
+		.ctl_name = NET_DECNET_CONF_DEV_T3,
+		.procname = "t3",
+		.data = (void *)DN_DEV_PARMS_OFFSET(t3),
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = proc_dointvec_minmax,
+		.strategy = sysctl_intvec,
+		.extra1 = &min_t3,
+		.extra2 = &max_t3
+	},
+	{0}
+	},
+	{{
+		.ctl_name = 0, 
+		.procname = "", 
+		.mode = 0555, 
+		.child = dn_dev_sysctl.dn_dev_vars
+	}, {0}},
+	{{
+		.ctl_name = NET_DECNET_CONF,
+		.procname = "conf", 
+		.mode = 0555, 
+		.child = dn_dev_sysctl.dn_dev_dev
+	}, {0}},
+	{{
+		.ctl_name = NET_DECNET, 
+		.procname = "decnet", 
+		.mode = 0555, 
+		.child = dn_dev_sysctl.dn_dev_conf_dir
+	}, {0}},
+	{{
+		.ctl_name = CTL_NET, 
+		.procname = "net", 
+		.mode = 0555, 
+		.child = dn_dev_sysctl.dn_dev_proto_dir
+	}, {0}}
+};
+
+static void dn_dev_sysctl_register(struct net_device *dev, struct dn_dev_parms *parms)
+{
+	struct dn_dev_sysctl_table *t;
+	int i;
+
+	t = kmalloc(sizeof(*t), GFP_KERNEL);
+	if (t == NULL)
+		return;
+
+	memcpy(t, &dn_dev_sysctl, sizeof(*t));
+
+	for(i = 0; i < ARRAY_SIZE(t->dn_dev_vars) - 1; i++) {
+		long offset = (long)t->dn_dev_vars[i].data;
+		t->dn_dev_vars[i].data = ((char *)parms) + offset;
+		t->dn_dev_vars[i].de = NULL;
+	}
+
+	if (dev) {
+		t->dn_dev_dev[0].procname = dev->name;
+		t->dn_dev_dev[0].ctl_name = dev->ifindex;
+	} else {
+		t->dn_dev_dev[0].procname = parms->name;
+		t->dn_dev_dev[0].ctl_name = parms->ctl_name;
+	}
+
+	t->dn_dev_dev[0].child = t->dn_dev_vars;
+	t->dn_dev_dev[0].de = NULL;
+	t->dn_dev_conf_dir[0].child = t->dn_dev_dev;
+	t->dn_dev_conf_dir[0].de = NULL;
+	t->dn_dev_proto_dir[0].child = t->dn_dev_conf_dir;
+	t->dn_dev_proto_dir[0].de = NULL;
+	t->dn_dev_root_dir[0].child = t->dn_dev_proto_dir;
+	t->dn_dev_root_dir[0].de = NULL;
+	t->dn_dev_vars[0].extra1 = (void *)dev;
+
+	t->sysctl_header = register_sysctl_table(t->dn_dev_root_dir, 0);
+	if (t->sysctl_header == NULL)
+		kfree(t);
+	else
+		parms->sysctl = t;
+}
+
+static void dn_dev_sysctl_unregister(struct dn_dev_parms *parms)
+{
+	if (parms->sysctl) {
+		struct dn_dev_sysctl_table *t = parms->sysctl;
+		parms->sysctl = NULL;
+		unregister_sysctl_table(t->sysctl_header);
+		kfree(t);
+	}
+}
+
+static int dn_forwarding_proc(ctl_table *table, int write, 
+				struct file *filep,
+				void __user *buffer,
+				size_t *lenp, loff_t *ppos)
+{
+#ifdef CONFIG_DECNET_ROUTER
+	struct net_device *dev = table->extra1;
+	struct dn_dev *dn_db;
+	int err;
+	int tmp, old;
+
+	if (table->extra1 == NULL)
+		return -EINVAL;
+
+	dn_db = dev->dn_ptr;
+	old = dn_db->parms.forwarding;
+
+	err = proc_dointvec(table, write, filep, buffer, lenp, ppos);
+
+	if ((err >= 0) && write) {
+		if (dn_db->parms.forwarding < 0)
+			dn_db->parms.forwarding = 0;
+		if (dn_db->parms.forwarding > 2)
+			dn_db->parms.forwarding = 2;
+		/*
+		 * What an ugly hack this is... its works, just. It
+		 * would be nice if sysctl/proc were just that little
+		 * bit more flexible so I don't have to write a special
+		 * routine, or suffer hacks like this - SJW
+		 */
+		tmp = dn_db->parms.forwarding;
+		dn_db->parms.forwarding = old;
+		if (dn_db->parms.down)
+			dn_db->parms.down(dev);
+		dn_db->parms.forwarding = tmp;
+		if (dn_db->parms.up)
+			dn_db->parms.up(dev);
+	}
+
+	return err;
+#else
+	return -EINVAL;
+#endif
+}
+
+static int dn_forwarding_sysctl(ctl_table *table, int __user *name, int nlen,
+			void __user *oldval, size_t __user *oldlenp,
+			void __user *newval, size_t newlen,
+			void **context)
+{
+#ifdef CONFIG_DECNET_ROUTER
+	struct net_device *dev = table->extra1;
+	struct dn_dev *dn_db;
+	int value;
+
+	if (table->extra1 == NULL)
+		return -EINVAL;
+
+	dn_db = dev->dn_ptr;
+
+	if (newval && newlen) {
+		if (newlen != sizeof(int))
+			return -EINVAL;
+
+		if (get_user(value, (int __user *)newval))
+			return -EFAULT;
+		if (value < 0)
+			return -EINVAL;
+		if (value > 2)
+			return -EINVAL;
+
+		if (dn_db->parms.down)
+			dn_db->parms.down(dev);
+		dn_db->parms.forwarding = value;
+		if (dn_db->parms.up)
+			dn_db->parms.up(dev);
+	}
+
+	return 0;
+#else
+	return -EINVAL;
+#endif
+}
+
+#else /* CONFIG_SYSCTL */
+static void dn_dev_sysctl_unregister(struct dn_dev_parms *parms)
+{
+}
+static void dn_dev_sysctl_register(struct net_device *dev, struct dn_dev_parms *parms)
+{
+}
+
+#endif /* CONFIG_SYSCTL */
+
+static inline __u16 mtu2blksize(struct net_device *dev)
+{
+	u32 blksize = dev->mtu;
+	if (blksize > 0xffff)
+		blksize = 0xffff;
+
+	if (dev->type == ARPHRD_ETHER ||
+	    dev->type == ARPHRD_PPP ||
+	    dev->type == ARPHRD_IPGRE ||
+	    dev->type == ARPHRD_LOOPBACK)
+		blksize -= 2;
+
+	return (__u16)blksize;
+}
+
+static struct dn_ifaddr *dn_dev_alloc_ifa(void)
+{
+	struct dn_ifaddr *ifa;
+
+	ifa = kmalloc(sizeof(*ifa), GFP_KERNEL);
+
+	if (ifa) {
+		memset(ifa, 0, sizeof(*ifa));
+	}
+
+	return ifa;
+}
+
+static __inline__ void dn_dev_free_ifa(struct dn_ifaddr *ifa)
+{
+	kfree(ifa);
+}
+
+static void dn_dev_del_ifa(struct dn_dev *dn_db, struct dn_ifaddr **ifap, int destroy)
+{
+	struct dn_ifaddr *ifa1 = *ifap;
+	unsigned char mac_addr[6];
+	struct net_device *dev = dn_db->dev;
+
+	ASSERT_RTNL();
+
+	*ifap = ifa1->ifa_next;
+
+	if (dn_db->dev->type == ARPHRD_ETHER) {
+		if (ifa1->ifa_local != dn_htons(dn_eth2dn(dev->dev_addr))) {
+			dn_dn2eth(mac_addr, ifa1->ifa_local);
+			dev_mc_delete(dev, mac_addr, ETH_ALEN, 0);
+		}
+	}
+
+	rtmsg_ifa(RTM_DELADDR, ifa1);
+	notifier_call_chain(&dnaddr_chain, NETDEV_DOWN, ifa1);
+	if (destroy) {
+		dn_dev_free_ifa(ifa1);
+
+		if (dn_db->ifa_list == NULL)
+			dn_dev_delete(dn_db->dev);
+	}
+}
+
+static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa)
+{
+	struct net_device *dev = dn_db->dev;
+	struct dn_ifaddr *ifa1;
+	unsigned char mac_addr[6];
+
+	ASSERT_RTNL();
+
+	/* Check for duplicates */	
+	for(ifa1 = dn_db->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
+		if (ifa1->ifa_local == ifa->ifa_local)
+			return -EEXIST;
+	}
+
+	if (dev->type == ARPHRD_ETHER) {
+		if (ifa->ifa_local != dn_htons(dn_eth2dn(dev->dev_addr))) {
+			dn_dn2eth(mac_addr, ifa->ifa_local);
+			dev_mc_add(dev, mac_addr, ETH_ALEN, 0);
+			dev_mc_upload(dev);
+		}
+	}
+
+	ifa->ifa_next = dn_db->ifa_list;
+	dn_db->ifa_list = ifa;
+
+	rtmsg_ifa(RTM_NEWADDR, ifa);
+	notifier_call_chain(&dnaddr_chain, NETDEV_UP, ifa);
+
+	return 0;
+}
+
+static int dn_dev_set_ifa(struct net_device *dev, struct dn_ifaddr *ifa)
+{
+	struct dn_dev *dn_db = dev->dn_ptr;
+	int rv;
+
+	if (dn_db == NULL) {
+		int err;
+		dn_db = dn_dev_create(dev, &err);
+		if (dn_db == NULL)
+			return err;
+	}
+
+	ifa->ifa_dev = dn_db;
+
+	if (dev->flags & IFF_LOOPBACK)
+		ifa->ifa_scope = RT_SCOPE_HOST;
+
+	rv = dn_dev_insert_ifa(dn_db, ifa);
+	if (rv)
+		dn_dev_free_ifa(ifa);
+	return rv;
+}
+
+
+int dn_dev_ioctl(unsigned int cmd, void __user *arg)
+{
+	char buffer[DN_IFREQ_SIZE];
+	struct ifreq *ifr = (struct ifreq *)buffer;
+	struct sockaddr_dn *sdn = (struct sockaddr_dn *)&ifr->ifr_addr;
+	struct dn_dev *dn_db;
+	struct net_device *dev;
+	struct dn_ifaddr *ifa = NULL, **ifap = NULL;
+	int ret = 0;
+
+	if (copy_from_user(ifr, arg, DN_IFREQ_SIZE))
+		return -EFAULT;
+	ifr->ifr_name[IFNAMSIZ-1] = 0;
+
+#ifdef CONFIG_KMOD
+	dev_load(ifr->ifr_name);
+#endif
+
+	switch(cmd) {
+		case SIOCGIFADDR:
+			break;
+		case SIOCSIFADDR:
+			if (!capable(CAP_NET_ADMIN))
+				return -EACCES;
+			if (sdn->sdn_family != AF_DECnet)
+				return -EINVAL;
+			break;
+		default:
+			return -EINVAL;
+	}
+
+	rtnl_lock();
+
+	if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL) {
+		ret = -ENODEV;
+		goto done;
+	}
+
+	if ((dn_db = dev->dn_ptr) != NULL) {
+		for (ifap = &dn_db->ifa_list; (ifa=*ifap) != NULL; ifap = &ifa->ifa_next)
+			if (strcmp(ifr->ifr_name, ifa->ifa_label) == 0)
+				break;
+	}
+
+	if (ifa == NULL && cmd != SIOCSIFADDR) {
+		ret = -EADDRNOTAVAIL;
+		goto done;
+	}
+
+	switch(cmd) {
+		case SIOCGIFADDR:
+			*((dn_address *)sdn->sdn_nodeaddr) = ifa->ifa_local;
+			goto rarok;
+
+		case SIOCSIFADDR:
+			if (!ifa) {
+				if ((ifa = dn_dev_alloc_ifa()) == NULL) {
+					ret = -ENOBUFS;
+					break;
+				}
+				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
+			} else {
+				if (ifa->ifa_local == dn_saddr2dn(sdn))
+					break;
+				dn_dev_del_ifa(dn_db, ifap, 0);
+			}
+
+			ifa->ifa_local = ifa->ifa_address = dn_saddr2dn(sdn);
+
+			ret = dn_dev_set_ifa(dev, ifa);
+	}
+done:
+	rtnl_unlock();
+
+	return ret;
+rarok:
+	if (copy_to_user(arg, ifr, DN_IFREQ_SIZE))
+		ret = -EFAULT;
+	goto done;
+}
+
+struct net_device *dn_dev_get_default(void)
+{
+	struct net_device *dev;
+	read_lock(&dndev_lock);
+	dev = decnet_default_device;
+	if (dev) {
+		if (dev->dn_ptr)
+			dev_hold(dev);
+		else
+			dev = NULL;
+	}
+	read_unlock(&dndev_lock);
+	return dev;
+}
+
+int dn_dev_set_default(struct net_device *dev, int force)
+{
+	struct net_device *old = NULL;
+	int rv = -EBUSY;
+	if (!dev->dn_ptr)
+		return -ENODEV;
+	write_lock(&dndev_lock);
+	if (force || decnet_default_device == NULL) {
+		old = decnet_default_device;
+		decnet_default_device = dev;
+		rv = 0;
+	}
+	write_unlock(&dndev_lock);
+	if (old)
+		dev_put(dev);
+	return rv;
+}
+
+static void dn_dev_check_default(struct net_device *dev)
+{
+	write_lock(&dndev_lock);
+	if (dev == decnet_default_device) {
+		decnet_default_device = NULL;
+	} else {
+		dev = NULL;
+	}
+	write_unlock(&dndev_lock);
+	if (dev)
+		dev_put(dev);
+}
+
+static struct dn_dev *dn_dev_by_index(int ifindex)
+{
+	struct net_device *dev;
+	struct dn_dev *dn_dev = NULL;
+	dev = dev_get_by_index(ifindex);
+	if (dev) {
+		dn_dev = dev->dn_ptr;
+		dev_put(dev);
+	}
+
+	return dn_dev;
+}
+
+static int dn_dev_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct rtattr **rta = arg;
+	struct dn_dev *dn_db;
+	struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
+	struct dn_ifaddr *ifa, **ifap;
+
+	if ((dn_db = dn_dev_by_index(ifm->ifa_index)) == NULL)
+		return -EADDRNOTAVAIL;
+
+	for(ifap = &dn_db->ifa_list; (ifa=*ifap) != NULL; ifap = &ifa->ifa_next) {
+		void *tmp = rta[IFA_LOCAL-1];
+		if ((tmp && memcmp(RTA_DATA(tmp), &ifa->ifa_local, 2)) ||
+		    (rta[IFA_LABEL-1] && rtattr_strcmp(rta[IFA_LABEL-1], ifa->ifa_label)))
+			continue;
+
+		dn_dev_del_ifa(dn_db, ifap, 1);
+		return 0;
+	}
+
+	return -EADDRNOTAVAIL;
+}
+
+static int dn_dev_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct rtattr **rta = arg;
+	struct net_device *dev;
+	struct dn_dev *dn_db;
+	struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
+	struct dn_ifaddr *ifa;
+	int rv;
+
+	if (rta[IFA_LOCAL-1] == NULL)
+		return -EINVAL;
+
+	if ((dev = __dev_get_by_index(ifm->ifa_index)) == NULL)
+		return -ENODEV;
+
+	if ((dn_db = dev->dn_ptr) == NULL) {
+		int err;
+		dn_db = dn_dev_create(dev, &err);
+		if (!dn_db)
+			return err;
+	}
+	
+	if ((ifa = dn_dev_alloc_ifa()) == NULL)
+		return -ENOBUFS;
+
+	if (!rta[IFA_ADDRESS - 1])
+		rta[IFA_ADDRESS - 1] = rta[IFA_LOCAL - 1];
+	memcpy(&ifa->ifa_local, RTA_DATA(rta[IFA_LOCAL-1]), 2);
+	memcpy(&ifa->ifa_address, RTA_DATA(rta[IFA_ADDRESS-1]), 2);
+	ifa->ifa_flags = ifm->ifa_flags;
+	ifa->ifa_scope = ifm->ifa_scope;
+	ifa->ifa_dev = dn_db;
+	if (rta[IFA_LABEL-1])
+		rtattr_strlcpy(ifa->ifa_label, rta[IFA_LABEL-1], IFNAMSIZ);
+	else
+		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
+
+	rv = dn_dev_insert_ifa(dn_db, ifa);
+	if (rv)
+		dn_dev_free_ifa(ifa);
+	return rv;
+}
+
+static int dn_dev_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa,
+				u32 pid, u32 seq, int event)
+{
+	struct ifaddrmsg *ifm;
+	struct nlmsghdr *nlh;
+	unsigned char *b = skb->tail;
+
+	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm));
+	ifm = NLMSG_DATA(nlh);
+
+	ifm->ifa_family = AF_DECnet;
+	ifm->ifa_prefixlen = 16;
+	ifm->ifa_flags = ifa->ifa_flags | IFA_F_PERMANENT;
+	ifm->ifa_scope = ifa->ifa_scope;
+	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
+	if (ifa->ifa_address)
+		RTA_PUT(skb, IFA_ADDRESS, 2, &ifa->ifa_address);
+	if (ifa->ifa_local)
+		RTA_PUT(skb, IFA_LOCAL, 2, &ifa->ifa_local);
+	if (ifa->ifa_label[0])
+		RTA_PUT(skb, IFA_LABEL, IFNAMSIZ, &ifa->ifa_label);
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+        skb_trim(skb, b - skb->data);
+        return -1;
+}
+
+static void rtmsg_ifa(int event, struct dn_ifaddr *ifa)
+{
+	struct sk_buff *skb;
+	int size = NLMSG_SPACE(sizeof(struct ifaddrmsg)+128);
+
+	skb = alloc_skb(size, GFP_KERNEL);
+	if (!skb) {
+		netlink_set_err(rtnl, 0, RTMGRP_DECnet_IFADDR, ENOBUFS);
+		return;
+	}
+	if (dn_dev_fill_ifaddr(skb, ifa, 0, 0, event) < 0) {
+		kfree_skb(skb);
+		netlink_set_err(rtnl, 0, RTMGRP_DECnet_IFADDR, EINVAL);
+		return;
+	}
+	NETLINK_CB(skb).dst_groups = RTMGRP_DECnet_IFADDR;
+	netlink_broadcast(rtnl, skb, 0, RTMGRP_DECnet_IFADDR, GFP_KERNEL);
+}
+
+static int dn_dev_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int idx, dn_idx;
+	int s_idx, s_dn_idx;
+	struct net_device *dev;
+	struct dn_dev *dn_db;
+	struct dn_ifaddr *ifa;
+
+	s_idx = cb->args[0];
+	s_dn_idx = dn_idx = cb->args[1];
+	read_lock(&dev_base_lock);
+	for(dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
+		if (idx < s_idx)
+			continue;
+		if (idx > s_idx)
+			s_dn_idx = 0;
+		if ((dn_db = dev->dn_ptr) == NULL)
+			continue;
+
+		for(ifa = dn_db->ifa_list, dn_idx = 0; ifa; ifa = ifa->ifa_next, dn_idx++) {
+			if (dn_idx < s_dn_idx)
+				continue;
+
+			if (dn_dev_fill_ifaddr(skb, ifa,
+					       NETLINK_CB(cb->skb).pid,
+					       cb->nlh->nlmsg_seq,
+					       RTM_NEWADDR) <= 0)
+				goto done;
+		}
+	}
+done:
+	read_unlock(&dev_base_lock);
+	cb->args[0] = idx;
+	cb->args[1] = dn_idx;
+
+	return skb->len;
+}
+
+static int dn_dev_get_first(struct net_device *dev, dn_address *addr)
+{
+	struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr;
+	struct dn_ifaddr *ifa;
+	int rv = -ENODEV;
+	if (dn_db == NULL)
+		goto out;
+	ifa = dn_db->ifa_list;
+	if (ifa != NULL) {
+		*addr = ifa->ifa_local;
+		rv = 0;
+	}
+out:
+	return rv;
+}
+
+/* 
+ * Find a default address to bind to.
+ *
+ * This is one of those areas where the initial VMS concepts don't really
+ * map onto the Linux concepts, and since we introduced multiple addresses
+ * per interface we have to cope with slightly odd ways of finding out what
+ * "our address" really is. Mostly it's not a problem; for this we just guess
+ * a sensible default. Eventually the routing code will take care of all the
+ * nasties for us I hope.
+ */
+int dn_dev_bind_default(dn_address *addr)
+{
+	struct net_device *dev;
+	int rv;
+	dev = dn_dev_get_default();
+last_chance:
+	if (dev) {
+		read_lock(&dev_base_lock);
+		rv = dn_dev_get_first(dev, addr);
+		read_unlock(&dev_base_lock);
+		dev_put(dev);
+		if (rv == 0 || dev == &loopback_dev)
+			return rv;
+	}
+	dev = &loopback_dev;
+	dev_hold(dev);
+	goto last_chance;
+}
+
+static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa)
+{
+        struct endnode_hello_message *msg;
+        struct sk_buff *skb = NULL;
+        unsigned short int *pktlen;
+	struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr;
+
+        if ((skb = dn_alloc_skb(NULL, sizeof(*msg), GFP_ATOMIC)) == NULL)
+		return;
+
+        skb->dev = dev;
+
+        msg = (struct endnode_hello_message *)skb_put(skb,sizeof(*msg));
+
+        msg->msgflg  = 0x0D;
+        memcpy(msg->tiver, dn_eco_version, 3);
+	dn_dn2eth(msg->id, ifa->ifa_local);
+        msg->iinfo   = DN_RT_INFO_ENDN;
+        msg->blksize = dn_htons(mtu2blksize(dev));
+        msg->area    = 0x00;
+        memset(msg->seed, 0, 8);
+        memcpy(msg->neighbor, dn_hiord, ETH_ALEN);
+
+	if (dn_db->router) {
+		struct dn_neigh *dn = (struct dn_neigh *)dn_db->router;
+		dn_dn2eth(msg->neighbor, dn->addr);
+	}
+
+        msg->timer   = dn_htons((unsigned short)dn_db->parms.t3);
+        msg->mpd     = 0x00;
+        msg->datalen = 0x02;
+        memset(msg->data, 0xAA, 2);
+        
+        pktlen = (unsigned short *)skb_push(skb,2);
+        *pktlen = dn_htons(skb->len - 2);
+
+	skb->nh.raw = skb->data;
+
+	dn_rt_finish_output(skb, dn_rt_all_rt_mcast, msg->id);
+}
+
+
+#define DRDELAY (5 * HZ)
+
+static int dn_am_i_a_router(struct dn_neigh *dn, struct dn_dev *dn_db, struct dn_ifaddr *ifa)
+{
+	/* First check time since device went up */
+	if ((jiffies - dn_db->uptime) < DRDELAY)
+		return 0;
+
+	/* If there is no router, then yes... */
+	if (!dn_db->router)
+		return 1;
+
+	/* otherwise only if we have a higher priority or.. */
+	if (dn->priority < dn_db->parms.priority)
+		return 1;
+
+	/* if we have equal priority and a higher node number */
+	if (dn->priority != dn_db->parms.priority)
+		return 0;
+
+	if (dn_ntohs(dn->addr) < dn_ntohs(ifa->ifa_local))
+		return 1;
+
+	return 0;
+}
+
+static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa)
+{
+	int n;
+	struct dn_dev *dn_db = dev->dn_ptr;
+	struct dn_neigh *dn = (struct dn_neigh *)dn_db->router;
+	struct sk_buff *skb;
+	size_t size;
+	unsigned char *ptr;
+	unsigned char *i1, *i2;
+	unsigned short *pktlen;
+	char *src;
+
+	if (mtu2blksize(dev) < (26 + 7))
+		return;
+
+	n = mtu2blksize(dev) - 26;
+	n /= 7;
+
+	if (n > 32)
+		n = 32;
+
+	size = 2 + 26 + 7 * n;
+
+	if ((skb = dn_alloc_skb(NULL, size, GFP_ATOMIC)) == NULL)
+		return;
+
+	skb->dev = dev;
+	ptr = skb_put(skb, size);
+
+	*ptr++ = DN_RT_PKT_CNTL | DN_RT_PKT_ERTH;
+	*ptr++ = 2; /* ECO */
+	*ptr++ = 0;
+	*ptr++ = 0;
+	dn_dn2eth(ptr, ifa->ifa_local);
+	src = ptr;
+	ptr += ETH_ALEN;
+	*ptr++ = dn_db->parms.forwarding == 1 ? 
+			DN_RT_INFO_L1RT : DN_RT_INFO_L2RT;
+	*((unsigned short *)ptr) = dn_htons(mtu2blksize(dev));
+	ptr += 2;
+	*ptr++ = dn_db->parms.priority; /* Priority */ 
+	*ptr++ = 0; /* Area: Reserved */
+	*((unsigned short *)ptr) = dn_htons((unsigned short)dn_db->parms.t3);
+	ptr += 2;
+	*ptr++ = 0; /* MPD: Reserved */
+	i1 = ptr++;
+	memset(ptr, 0, 7); /* Name: Reserved */
+	ptr += 7;
+	i2 = ptr++;
+
+	n = dn_neigh_elist(dev, ptr, n);
+
+	*i2 = 7 * n;
+	*i1 = 8 + *i2;
+
+	skb_trim(skb, (27 + *i2));
+
+	pktlen = (unsigned short *)skb_push(skb, 2);
+	*pktlen = dn_htons(skb->len - 2);
+
+	skb->nh.raw = skb->data;
+
+	if (dn_am_i_a_router(dn, dn_db, ifa)) {
+		struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC);
+		if (skb2) {
+			dn_rt_finish_output(skb2, dn_rt_all_end_mcast, src);
+		}
+	}
+
+	dn_rt_finish_output(skb, dn_rt_all_rt_mcast, src);
+}
+
+static void dn_send_brd_hello(struct net_device *dev, struct dn_ifaddr *ifa)
+{
+	struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr;
+
+	if (dn_db->parms.forwarding == 0)
+		dn_send_endnode_hello(dev, ifa);
+	else
+		dn_send_router_hello(dev, ifa);
+}
+
+static void dn_send_ptp_hello(struct net_device *dev, struct dn_ifaddr *ifa)
+{
+	int tdlen = 16;
+	int size = dev->hard_header_len + 2 + 4 + tdlen;
+	struct sk_buff *skb = dn_alloc_skb(NULL, size, GFP_ATOMIC);
+	int i;
+	unsigned char *ptr;
+	char src[ETH_ALEN];
+
+	if (skb == NULL)
+		return ;
+
+	skb->dev = dev;
+	skb_push(skb, dev->hard_header_len);
+	ptr = skb_put(skb, 2 + 4 + tdlen);
+
+	*ptr++ = DN_RT_PKT_HELO;
+	*((dn_address *)ptr) = ifa->ifa_local;
+	ptr += 2;
+	*ptr++ = tdlen;
+
+	for(i = 0; i < tdlen; i++)
+		*ptr++ = 0252;
+
+	dn_dn2eth(src, ifa->ifa_local);
+	dn_rt_finish_output(skb, dn_rt_all_rt_mcast, src);
+}
+
+static int dn_eth_up(struct net_device *dev)
+{
+	struct dn_dev *dn_db = dev->dn_ptr;
+
+	if (dn_db->parms.forwarding == 0)
+		dev_mc_add(dev, dn_rt_all_end_mcast, ETH_ALEN, 0);
+	else
+		dev_mc_add(dev, dn_rt_all_rt_mcast, ETH_ALEN, 0);
+
+	dev_mc_upload(dev);
+
+	dn_db->use_long = 1;
+
+	return 0;
+}
+
+static void dn_eth_down(struct net_device *dev)
+{
+	struct dn_dev *dn_db = dev->dn_ptr;
+
+	if (dn_db->parms.forwarding == 0)
+		dev_mc_delete(dev, dn_rt_all_end_mcast, ETH_ALEN, 0);
+	else
+		dev_mc_delete(dev, dn_rt_all_rt_mcast, ETH_ALEN, 0);
+}
+
+static void dn_dev_set_timer(struct net_device *dev);
+
+static void dn_dev_timer_func(unsigned long arg)
+{
+	struct net_device *dev = (struct net_device *)arg;
+	struct dn_dev *dn_db = dev->dn_ptr;
+	struct dn_ifaddr *ifa;
+
+	if (dn_db->t3 <= dn_db->parms.t2) {
+		if (dn_db->parms.timer3) {
+			for(ifa = dn_db->ifa_list; ifa; ifa = ifa->ifa_next) {
+				if (!(ifa->ifa_flags & IFA_F_SECONDARY))
+					dn_db->parms.timer3(dev, ifa);
+			}
+		}
+		dn_db->t3 = dn_db->parms.t3;
+	} else {
+		dn_db->t3 -= dn_db->parms.t2;
+	}
+
+	dn_dev_set_timer(dev);
+}
+
+static void dn_dev_set_timer(struct net_device *dev)
+{
+	struct dn_dev *dn_db = dev->dn_ptr;
+
+	if (dn_db->parms.t2 > dn_db->parms.t3)
+		dn_db->parms.t2 = dn_db->parms.t3;
+
+	dn_db->timer.data = (unsigned long)dev;
+	dn_db->timer.function = dn_dev_timer_func;
+	dn_db->timer.expires = jiffies + (dn_db->parms.t2 * HZ);
+
+	add_timer(&dn_db->timer);
+}
+
+struct dn_dev *dn_dev_create(struct net_device *dev, int *err)
+{
+	int i;
+	struct dn_dev_parms *p = dn_dev_list;
+	struct dn_dev *dn_db;
+
+	for(i = 0; i < DN_DEV_LIST_SIZE; i++, p++) {
+		if (p->type == dev->type)
+			break;
+	}
+
+	*err = -ENODEV;
+	if (i == DN_DEV_LIST_SIZE)
+		return NULL;
+
+	*err = -ENOBUFS;
+	if ((dn_db = kmalloc(sizeof(struct dn_dev), GFP_ATOMIC)) == NULL)
+		return NULL;
+
+	memset(dn_db, 0, sizeof(struct dn_dev));
+	memcpy(&dn_db->parms, p, sizeof(struct dn_dev_parms));
+	smp_wmb();
+	dev->dn_ptr = dn_db;
+	dn_db->dev = dev;
+	init_timer(&dn_db->timer);
+
+	dn_db->uptime = jiffies;
+	if (dn_db->parms.up) {
+		if (dn_db->parms.up(dev) < 0) {
+			dev->dn_ptr = NULL;
+			kfree(dn_db);
+			return NULL;
+		}
+	}
+
+	dn_db->neigh_parms = neigh_parms_alloc(dev, &dn_neigh_table);
+
+	dn_dev_sysctl_register(dev, &dn_db->parms);
+
+	dn_dev_set_timer(dev);
+
+	*err = 0;
+	return dn_db;
+}
+
+
+/*
+ * This processes a device up event. We only start up
+ * the loopback device & ethernet devices with correct
+ * MAC addreses automatically. Others must be started
+ * specifically.
+ *
+ * FIXME: How should we configure the loopback address ? If we could dispense
+ * with using decnet_address here and for autobind, it will be one less thing
+ * for users to worry about setting up.
+ */
+
+void dn_dev_up(struct net_device *dev)
+{
+	struct dn_ifaddr *ifa;
+	dn_address addr = decnet_address;
+	int maybe_default = 0;
+	struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr;
+
+	if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_LOOPBACK))
+		return;
+
+	/*
+	 * Need to ensure that loopback device has a dn_db attached to it
+	 * to allow creation of neighbours against it, even though it might
+	 * not have a local address of its own. Might as well do the same for
+	 * all autoconfigured interfaces.
+	 */
+	if (dn_db == NULL) {
+		int err;
+		dn_db = dn_dev_create(dev, &err);
+		if (dn_db == NULL)
+			return;
+	}
+
+	if (dev->type == ARPHRD_ETHER) {
+		if (memcmp(dev->dev_addr, dn_hiord, 4) != 0)
+			return;
+		addr = dn_htons(dn_eth2dn(dev->dev_addr));
+		maybe_default = 1;
+	}
+
+	if (addr == 0)
+		return;
+
+	if ((ifa = dn_dev_alloc_ifa()) == NULL)
+		return;
+
+	ifa->ifa_local = ifa->ifa_address = addr;
+	ifa->ifa_flags = 0;
+	ifa->ifa_scope = RT_SCOPE_UNIVERSE;
+	strcpy(ifa->ifa_label, dev->name);
+
+	dn_dev_set_ifa(dev, ifa);
+
+	/*
+	 * Automagically set the default device to the first automatically
+	 * configured ethernet card in the system.
+	 */
+	if (maybe_default) {
+		dev_hold(dev);
+		if (dn_dev_set_default(dev, 0))
+			dev_put(dev);
+	}
+}
+
+static void dn_dev_delete(struct net_device *dev)
+{
+	struct dn_dev *dn_db = dev->dn_ptr;
+
+	if (dn_db == NULL)
+		return;
+
+	del_timer_sync(&dn_db->timer);
+	dn_dev_sysctl_unregister(&dn_db->parms);
+	dn_dev_check_default(dev);
+	neigh_ifdown(&dn_neigh_table, dev);
+
+	if (dn_db->parms.down)
+		dn_db->parms.down(dev);
+
+	dev->dn_ptr = NULL;
+
+	neigh_parms_release(&dn_neigh_table, dn_db->neigh_parms);
+	neigh_ifdown(&dn_neigh_table, dev);
+
+	if (dn_db->router)
+		neigh_release(dn_db->router);
+	if (dn_db->peer)
+		neigh_release(dn_db->peer);
+
+	kfree(dn_db);
+}
+
+void dn_dev_down(struct net_device *dev)
+{
+	struct dn_dev *dn_db = dev->dn_ptr;
+	struct dn_ifaddr *ifa;
+
+	if (dn_db == NULL)
+		return;
+
+	while((ifa = dn_db->ifa_list) != NULL) {
+		dn_dev_del_ifa(dn_db, &dn_db->ifa_list, 0);
+		dn_dev_free_ifa(ifa);
+	}
+
+	dn_dev_delete(dev);
+}
+
+void dn_dev_init_pkt(struct sk_buff *skb)
+{
+	return;
+}
+
+void dn_dev_veri_pkt(struct sk_buff *skb)
+{
+	return;
+}
+
+void dn_dev_hello(struct sk_buff *skb)
+{
+	return;
+}
+
+void dn_dev_devices_off(void)
+{
+	struct net_device *dev;
+
+	rtnl_lock();
+	for(dev = dev_base; dev; dev = dev->next)
+		dn_dev_down(dev);
+	rtnl_unlock();
+
+}
+
+void dn_dev_devices_on(void)
+{
+	struct net_device *dev;
+
+	rtnl_lock();
+	for(dev = dev_base; dev; dev = dev->next) {
+		if (dev->flags & IFF_UP)
+			dn_dev_up(dev);
+	}
+	rtnl_unlock();
+}
+
+int register_dnaddr_notifier(struct notifier_block *nb)
+{
+	return notifier_chain_register(&dnaddr_chain, nb);
+}
+
+int unregister_dnaddr_notifier(struct notifier_block *nb)
+{
+	return notifier_chain_unregister(&dnaddr_chain, nb);
+}
+
+#ifdef CONFIG_PROC_FS
+static inline struct net_device *dn_dev_get_next(struct seq_file *seq, struct net_device *dev)
+{
+	do {
+		dev = dev->next;
+	} while(dev && !dev->dn_ptr);
+
+	return dev;
+}
+
+static struct net_device *dn_dev_get_idx(struct seq_file *seq, loff_t pos)
+{
+	struct net_device *dev;
+
+	dev = dev_base;
+	if (dev && !dev->dn_ptr)
+		dev = dn_dev_get_next(seq, dev);
+	if (pos) {
+		while(dev && (dev = dn_dev_get_next(seq, dev)))
+			--pos;
+	}
+	return dev;
+}
+
+static void *dn_dev_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	if (*pos) {
+		struct net_device *dev;
+		read_lock(&dev_base_lock);
+		dev = dn_dev_get_idx(seq, *pos - 1);
+		if (dev == NULL)
+			read_unlock(&dev_base_lock);
+		return dev;
+	}
+	return SEQ_START_TOKEN;
+}
+
+static void *dn_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct net_device *dev = v;
+	loff_t one = 1;
+
+	if (v == SEQ_START_TOKEN) {
+		dev = dn_dev_seq_start(seq, &one);
+	} else {
+		dev = dn_dev_get_next(seq, dev);
+		if (dev == NULL)
+			read_unlock(&dev_base_lock);
+	}
+	++*pos;
+	return dev;
+}
+
+static void dn_dev_seq_stop(struct seq_file *seq, void *v)
+{
+	if (v && v != SEQ_START_TOKEN)
+		read_unlock(&dev_base_lock);
+}
+
+static char *dn_type2asc(char type)
+{
+	switch(type) {
+		case DN_DEV_BCAST:
+			return "B";
+		case DN_DEV_UCAST:
+			return "U";
+		case DN_DEV_MPOINT:
+			return "M";
+	}
+
+	return "?";
+}
+
+static int dn_dev_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN)
+        	seq_puts(seq, "Name     Flags T1   Timer1 T3   Timer3 BlkSize Pri State DevType    Router Peer\n");
+	else {
+		struct net_device *dev = v;
+		char peer_buf[DN_ASCBUF_LEN];
+		char router_buf[DN_ASCBUF_LEN];
+		struct dn_dev *dn_db = dev->dn_ptr;
+
+                seq_printf(seq, "%-8s %1s     %04u %04u   %04lu %04lu"
+				"   %04hu    %03d %02x    %-10s %-7s %-7s\n",
+                             	dev->name ? dev->name : "???",
+                             	dn_type2asc(dn_db->parms.mode),
+                             	0, 0,
+				dn_db->t3, dn_db->parms.t3,
+				mtu2blksize(dev),
+				dn_db->parms.priority,
+				dn_db->parms.state, dn_db->parms.name,
+				dn_db->router ? dn_addr2asc(dn_ntohs(*(dn_address *)dn_db->router->primary_key), router_buf) : "",
+				dn_db->peer ? dn_addr2asc(dn_ntohs(*(dn_address *)dn_db->peer->primary_key), peer_buf) : "");
+	}
+	return 0;
+}
+
+static struct seq_operations dn_dev_seq_ops = {
+	.start	= dn_dev_seq_start,
+	.next	= dn_dev_seq_next,
+	.stop	= dn_dev_seq_stop,
+	.show	= dn_dev_seq_show,
+};
+
+static int dn_dev_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &dn_dev_seq_ops);
+}
+
+static struct file_operations dn_dev_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open	 = dn_dev_seq_open,
+	.read	 = seq_read,
+	.llseek	 = seq_lseek,
+	.release = seq_release,
+};
+
+#endif /* CONFIG_PROC_FS */
+
+static struct rtnetlink_link dnet_rtnetlink_table[RTM_MAX-RTM_BASE+1] = 
+{
+	 [4] = { .doit   = dn_dev_rtm_newaddr,	},
+	 [5] = { .doit   = dn_dev_rtm_deladdr,	},
+	 [6] = { .dumpit = dn_dev_dump_ifaddr,	},
+
+#ifdef CONFIG_DECNET_ROUTER
+	 [8] = { .doit   = dn_fib_rtm_newroute,	},
+	 [9] = { .doit   = dn_fib_rtm_delroute,	},
+	[10] = { .doit   = dn_cache_getroute, .dumpit = dn_fib_dump, },
+	[16] = { .doit   = dn_fib_rtm_newrule, },
+	[17] = { .doit   = dn_fib_rtm_delrule, },
+	[18] = { .dumpit = dn_fib_dump_rules,  },
+#else
+	[10] = { .doit   = dn_cache_getroute, .dumpit = dn_cache_dump, },
+#endif
+
+};
+
+static int __initdata addr[2];
+module_param_array(addr, int, NULL, 0444);
+MODULE_PARM_DESC(addr, "The DECnet address of this machine: area,node");
+
+void __init dn_dev_init(void)
+{
+        if (addr[0] > 63 || addr[0] < 0) {
+                printk(KERN_ERR "DECnet: Area must be between 0 and 63");
+                return;
+        }
+
+        if (addr[1] > 1023 || addr[1] < 0) {
+                printk(KERN_ERR "DECnet: Node must be between 0 and 1023");
+                return;
+        }
+
+        decnet_address = dn_htons((addr[0] << 10) | addr[1]);
+
+	dn_dev_devices_on();
+
+	rtnetlink_links[PF_DECnet] = dnet_rtnetlink_table;
+
+	proc_net_fops_create("decnet_dev", S_IRUGO, &dn_dev_seq_fops);
+
+#ifdef CONFIG_SYSCTL
+	{
+		int i;
+		for(i = 0; i < DN_DEV_LIST_SIZE; i++)
+			dn_dev_sysctl_register(NULL, &dn_dev_list[i]);
+	}
+#endif /* CONFIG_SYSCTL */
+}
+
+void __exit dn_dev_cleanup(void)
+{
+	rtnetlink_links[PF_DECnet] = NULL;
+
+#ifdef CONFIG_SYSCTL
+	{
+		int i;
+		for(i = 0; i < DN_DEV_LIST_SIZE; i++)
+			dn_dev_sysctl_unregister(&dn_dev_list[i]);
+	}
+#endif /* CONFIG_SYSCTL */
+
+	proc_net_remove("decnet_dev");
+
+	dn_dev_devices_off();
+}
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
new file mode 100644
index 00000000000..9934b25720e
--- /dev/null
+++ b/net/decnet/dn_fib.c
@@ -0,0 +1,802 @@
+/*
+ * DECnet       An implementation of the DECnet protocol suite for the LINUX
+ *              operating system.  DECnet is implemented using the  BSD Socket
+ *              interface as the means of communication with the user level.
+ *
+ *              DECnet Routing Forwarding Information Base (Glue/Info List)
+ *
+ * Author:      Steve Whitehouse <SteveW@ACM.org>
+ *
+ *
+ * Changes:
+ *              Alexey Kuznetsov : SMP locking changes
+ *              Steve Whitehouse : Rewrote it... Well to be more correct, I
+ *                                 copied most of it from the ipv4 fib code.
+ *              Steve Whitehouse : Updated it in style and fixed a few bugs
+ *                                 which were fixed in the ipv4 code since
+ *                                 this code was copied from it.
+ *
+ */
+#include <linux/config.h>
+#include <linux/string.h>
+#include <linux/net.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/proc_fs.h>
+#include <linux/netdevice.h>
+#include <linux/timer.h>
+#include <linux/spinlock.h>
+#include <asm/atomic.h>
+#include <asm/uaccess.h>
+#include <net/neighbour.h>
+#include <net/dst.h>
+#include <net/flow.h>
+#include <net/dn.h>
+#include <net/dn_route.h>
+#include <net/dn_fib.h>
+#include <net/dn_neigh.h>
+#include <net/dn_dev.h>
+
+#define RT_MIN_TABLE 1
+
+#define for_fib_info() { struct dn_fib_info *fi;\
+	for(fi = dn_fib_info_list; fi; fi = fi->fib_next)
+#define endfor_fib_info() }
+
+#define for_nexthops(fi) { int nhsel; const struct dn_fib_nh *nh;\
+	for(nhsel = 0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
+
+#define change_nexthops(fi) { int nhsel; struct dn_fib_nh *nh;\
+	for(nhsel = 0, nh = (struct dn_fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
+
+#define endfor_nexthops(fi) }
+
+extern int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb);
+
+static DEFINE_SPINLOCK(dn_fib_multipath_lock);
+static struct dn_fib_info *dn_fib_info_list;
+static DEFINE_RWLOCK(dn_fib_info_lock);
+
+static struct
+{
+	int error;
+	u8 scope;
+} dn_fib_props[RTA_MAX+1] = {
+	[RTN_UNSPEC] =      { .error = 0,       .scope = RT_SCOPE_NOWHERE },
+	[RTN_UNICAST] =     { .error = 0,       .scope = RT_SCOPE_UNIVERSE },
+	[RTN_LOCAL] =       { .error = 0,       .scope = RT_SCOPE_HOST },
+	[RTN_BROADCAST] =   { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE },
+	[RTN_ANYCAST] =     { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE },
+	[RTN_MULTICAST] =   { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE },
+	[RTN_BLACKHOLE] =   { .error = -EINVAL, .scope = RT_SCOPE_UNIVERSE },
+	[RTN_UNREACHABLE] = { .error = -EHOSTUNREACH, .scope = RT_SCOPE_UNIVERSE },
+	[RTN_PROHIBIT] =    { .error = -EACCES, .scope = RT_SCOPE_UNIVERSE },
+	[RTN_THROW] =       { .error = -EAGAIN, .scope = RT_SCOPE_UNIVERSE },
+	[RTN_NAT] =         { .error = 0,       .scope = RT_SCOPE_NOWHERE },
+	[RTN_XRESOLVE] =    { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE },
+};
+
+void dn_fib_free_info(struct dn_fib_info *fi)
+{
+	if (fi->fib_dead == 0) {
+		printk(KERN_DEBUG "DECnet: BUG! Attempt to free alive dn_fib_info\n");
+		return;
+	}
+
+	change_nexthops(fi) {
+		if (nh->nh_dev)
+			dev_put(nh->nh_dev);
+		nh->nh_dev = NULL;
+	} endfor_nexthops(fi);
+	kfree(fi);
+}
+
+void dn_fib_release_info(struct dn_fib_info *fi)
+{
+	write_lock(&dn_fib_info_lock);
+	if (fi && --fi->fib_treeref == 0) {
+		if (fi->fib_next)
+			fi->fib_next->fib_prev = fi->fib_prev;
+		if (fi->fib_prev)
+			fi->fib_prev->fib_next = fi->fib_next;
+		if (fi == dn_fib_info_list)
+			dn_fib_info_list = fi->fib_next;
+		fi->fib_dead = 1;
+		dn_fib_info_put(fi);
+	}
+	write_unlock(&dn_fib_info_lock);
+}
+
+static inline int dn_fib_nh_comp(const struct dn_fib_info *fi, const struct dn_fib_info *ofi)
+{
+	const struct dn_fib_nh *onh = ofi->fib_nh;
+
+	for_nexthops(fi) {
+		if (nh->nh_oif != onh->nh_oif ||
+			nh->nh_gw != onh->nh_gw ||
+			nh->nh_scope != onh->nh_scope ||
+			nh->nh_weight != onh->nh_weight ||
+			((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
+				return -1;
+		onh++;
+	} endfor_nexthops(fi);
+	return 0;
+}
+
+static inline struct dn_fib_info *dn_fib_find_info(const struct dn_fib_info *nfi)
+{
+	for_fib_info() {
+		if (fi->fib_nhs != nfi->fib_nhs)
+			continue;
+		if (nfi->fib_protocol == fi->fib_protocol &&
+			nfi->fib_prefsrc == fi->fib_prefsrc &&
+			nfi->fib_priority == fi->fib_priority &&
+			memcmp(nfi->fib_metrics, fi->fib_metrics, sizeof(fi->fib_metrics)) == 0 &&
+			((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
+			(nfi->fib_nhs == 0 || dn_fib_nh_comp(fi, nfi) == 0))
+				return fi;
+	} endfor_fib_info();
+	return NULL;
+}
+
+u16 dn_fib_get_attr16(struct rtattr *attr, int attrlen, int type)
+{
+	while(RTA_OK(attr,attrlen)) {
+		if (attr->rta_type == type)
+			return *(u16*)RTA_DATA(attr);
+		attr = RTA_NEXT(attr, attrlen);
+	}
+
+	return 0;
+}
+
+static int dn_fib_count_nhs(struct rtattr *rta)
+{
+	int nhs = 0;
+	struct rtnexthop *nhp = RTA_DATA(rta);
+	int nhlen = RTA_PAYLOAD(rta);
+
+	while(nhlen >= (int)sizeof(struct rtnexthop)) {
+		if ((nhlen -= nhp->rtnh_len) < 0)
+			return 0;
+		nhs++;
+		nhp = RTNH_NEXT(nhp);
+	}
+
+	return nhs;
+}
+
+static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
+{
+	struct rtnexthop *nhp = RTA_DATA(rta);
+	int nhlen = RTA_PAYLOAD(rta);
+
+	change_nexthops(fi) {
+		int attrlen = nhlen - sizeof(struct rtnexthop);
+		if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
+			return -EINVAL;
+
+		nh->nh_flags  = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
+		nh->nh_oif    = nhp->rtnh_ifindex;
+		nh->nh_weight = nhp->rtnh_hops + 1;
+
+		if (attrlen) {
+			nh->nh_gw = dn_fib_get_attr16(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
+		}
+		nhp = RTNH_NEXT(nhp);
+	} endfor_nexthops(fi);
+
+	return 0;
+}
+
+
+static int dn_fib_check_nh(const struct rtmsg *r, struct dn_fib_info *fi, struct dn_fib_nh *nh)
+{
+	int err;
+
+	if (nh->nh_gw) {
+		struct flowi fl;
+		struct dn_fib_res res;
+
+		memset(&fl, 0, sizeof(fl));
+
+		if (nh->nh_flags&RTNH_F_ONLINK) {
+			struct net_device *dev;
+
+			if (r->rtm_scope >= RT_SCOPE_LINK)
+				return -EINVAL;
+			if (dnet_addr_type(nh->nh_gw) != RTN_UNICAST)
+				return -EINVAL;
+			if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
+				return -ENODEV;
+			if (!(dev->flags&IFF_UP))
+				return -ENETDOWN;
+			nh->nh_dev = dev;
+			dev_hold(dev);
+			nh->nh_scope = RT_SCOPE_LINK;
+			return 0;
+		}
+
+		memset(&fl, 0, sizeof(fl));
+		fl.fld_dst = nh->nh_gw;
+		fl.oif = nh->nh_oif;
+		fl.fld_scope = r->rtm_scope + 1;
+
+		if (fl.fld_scope < RT_SCOPE_LINK)
+			fl.fld_scope = RT_SCOPE_LINK;
+
+		if ((err = dn_fib_lookup(&fl, &res)) != 0)
+			return err;
+
+		err = -EINVAL;
+		if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
+			goto out;
+		nh->nh_scope = res.scope;
+		nh->nh_oif = DN_FIB_RES_OIF(res);
+		nh->nh_dev = DN_FIB_RES_DEV(res);
+		if (nh->nh_dev == NULL)
+			goto out;
+		dev_hold(nh->nh_dev);
+		err = -ENETDOWN;
+		if (!(nh->nh_dev->flags & IFF_UP))
+			goto out;
+		err = 0;
+out:
+		dn_fib_res_put(&res);
+		return err;
+	} else {
+		struct net_device *dev;
+
+		if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
+			return -EINVAL;
+
+		dev = __dev_get_by_index(nh->nh_oif);
+		if (dev == NULL || dev->dn_ptr == NULL)
+			return -ENODEV;
+		if (!(dev->flags&IFF_UP))
+			return -ENETDOWN;
+		nh->nh_dev = dev;
+		dev_hold(nh->nh_dev);
+		nh->nh_scope = RT_SCOPE_HOST;
+	}
+
+	return 0;
+}
+
+
+struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta *rta, const struct nlmsghdr *nlh, int *errp)
+{
+	int err;
+	struct dn_fib_info *fi = NULL;
+	struct dn_fib_info *ofi;
+	int nhs = 1;
+
+	if (dn_fib_props[r->rtm_type].scope > r->rtm_scope)
+		goto err_inval;
+
+	if (rta->rta_mp) {
+		nhs = dn_fib_count_nhs(rta->rta_mp);
+		if (nhs == 0)
+			goto err_inval;
+	}
+
+	fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct dn_fib_nh), GFP_KERNEL);
+	err = -ENOBUFS;
+	if (fi == NULL)
+		goto failure;
+	memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct dn_fib_nh));
+
+	fi->fib_protocol = r->rtm_protocol;
+	fi->fib_nhs = nhs;
+	fi->fib_flags = r->rtm_flags;
+	if (rta->rta_priority)
+		fi->fib_priority = *rta->rta_priority;
+	if (rta->rta_mx) {
+		int attrlen = RTA_PAYLOAD(rta->rta_mx);
+		struct rtattr *attr = RTA_DATA(rta->rta_mx);
+
+		while(RTA_OK(attr, attrlen)) {
+			unsigned flavour = attr->rta_type;
+			if (flavour) {
+				if (flavour > RTAX_MAX)
+					goto err_inval;
+				fi->fib_metrics[flavour-1] = *(unsigned*)RTA_DATA(attr);
+			}
+			attr = RTA_NEXT(attr, attrlen);
+		}
+	}
+	if (rta->rta_prefsrc)
+		memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 2);
+
+	if (rta->rta_mp) {
+		if ((err = dn_fib_get_nhs(fi, rta->rta_mp, r)) != 0)
+			goto failure;
+		if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
+			goto err_inval;
+		if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 2))
+			goto err_inval;
+	} else {
+		struct dn_fib_nh *nh = fi->fib_nh;
+		if (rta->rta_oif)
+			nh->nh_oif = *rta->rta_oif;
+		if (rta->rta_gw)
+			memcpy(&nh->nh_gw, rta->rta_gw, 2);
+		nh->nh_flags = r->rtm_flags;
+		nh->nh_weight = 1;
+	}
+
+	if (r->rtm_type == RTN_NAT) {
+		if (rta->rta_gw == NULL || nhs != 1 || rta->rta_oif)
+			goto err_inval;
+		memcpy(&fi->fib_nh->nh_gw, rta->rta_gw, 2);
+		goto link_it;
+	}
+
+	if (dn_fib_props[r->rtm_type].error) {
+		if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
+			goto err_inval;
+		goto link_it;
+	}
+
+	if (r->rtm_scope > RT_SCOPE_HOST)
+		goto err_inval;
+
+	if (r->rtm_scope == RT_SCOPE_HOST) {
+		struct dn_fib_nh *nh = fi->fib_nh;
+
+		/* Local address is added */
+		if (nhs != 1 || nh->nh_gw)
+			goto err_inval;
+		nh->nh_scope = RT_SCOPE_NOWHERE;
+		nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
+		err = -ENODEV;
+		if (nh->nh_dev == NULL)
+			goto failure;
+	} else {
+		change_nexthops(fi) {
+			if ((err = dn_fib_check_nh(r, fi, nh)) != 0)
+				goto failure;
+		} endfor_nexthops(fi)
+	}
+
+	if (fi->fib_prefsrc) {
+		if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
+		    memcmp(&fi->fib_prefsrc, rta->rta_dst, 2))
+			if (dnet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
+				goto err_inval;
+	}
+
+link_it:
+	if ((ofi = dn_fib_find_info(fi)) != NULL) {
+		fi->fib_dead = 1;
+		dn_fib_free_info(fi);
+		ofi->fib_treeref++;
+		return ofi;
+	}
+
+	fi->fib_treeref++;
+	atomic_inc(&fi->fib_clntref);
+	write_lock(&dn_fib_info_lock);
+	fi->fib_next = dn_fib_info_list;
+	fi->fib_prev = NULL;
+	if (dn_fib_info_list)
+		dn_fib_info_list->fib_prev = fi;
+	dn_fib_info_list = fi;
+	write_unlock(&dn_fib_info_lock);
+	return fi;
+
+err_inval:
+	err = -EINVAL;
+
+failure:
+	*errp = err;
+	if (fi) {
+		fi->fib_dead = 1;
+		dn_fib_free_info(fi);
+	}
+
+	return NULL;
+}
+
+int dn_fib_semantic_match(int type, struct dn_fib_info *fi, const struct flowi *fl, struct dn_fib_res *res)
+{
+	int err = dn_fib_props[type].error;
+
+	if (err == 0) {
+		if (fi->fib_flags & RTNH_F_DEAD)
+			return 1;
+
+		res->fi = fi;
+
+		switch(type) {
+			case RTN_NAT:
+				DN_FIB_RES_RESET(*res);
+				atomic_inc(&fi->fib_clntref);
+				return 0;
+			case RTN_UNICAST:
+			case RTN_LOCAL:
+				for_nexthops(fi) {
+					if (nh->nh_flags & RTNH_F_DEAD)
+						continue;
+					if (!fl->oif || fl->oif == nh->nh_oif)
+						break;
+				}
+				if (nhsel < fi->fib_nhs) {
+					res->nh_sel = nhsel;
+					atomic_inc(&fi->fib_clntref);
+					return 0;
+				}
+				endfor_nexthops(fi);
+				res->fi = NULL;
+				return 1;
+			default:
+				if (net_ratelimit())
+					 printk("DECnet: impossible routing event : dn_fib_semantic_match type=%d\n", type);
+				res->fi = NULL;
+				return -EINVAL;
+		}
+	}
+	return err;
+}
+
+void dn_fib_select_multipath(const struct flowi *fl, struct dn_fib_res *res)
+{
+	struct dn_fib_info *fi = res->fi;
+	int w;
+
+	spin_lock_bh(&dn_fib_multipath_lock);
+	if (fi->fib_power <= 0) {
+		int power = 0;
+		change_nexthops(fi) {
+			if (!(nh->nh_flags&RTNH_F_DEAD)) {
+				power += nh->nh_weight;
+				nh->nh_power = nh->nh_weight;
+			}
+		} endfor_nexthops(fi);
+		fi->fib_power = power;
+		if (power < 0) {
+			spin_unlock_bh(&dn_fib_multipath_lock);
+			res->nh_sel = 0;
+			return;
+		}
+	}
+
+	w = jiffies % fi->fib_power;
+
+	change_nexthops(fi) {
+		if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
+			if ((w -= nh->nh_power) <= 0) {
+				nh->nh_power--;
+				fi->fib_power--;
+				res->nh_sel = nhsel;
+				spin_unlock_bh(&dn_fib_multipath_lock);
+				return;
+			}
+		}
+	} endfor_nexthops(fi);
+	res->nh_sel = 0;
+	spin_unlock_bh(&dn_fib_multipath_lock);
+}
+
+
+static int dn_fib_check_attr(struct rtmsg *r, struct rtattr **rta)
+{
+	int i;
+
+	for(i = 1; i <= RTA_MAX; i++) {
+		struct rtattr *attr = rta[i-1];
+		if (attr) {
+			if (RTA_PAYLOAD(attr) < 4 && RTA_PAYLOAD(attr) != 2)
+				return -EINVAL;
+			if (i != RTA_MULTIPATH && i != RTA_METRICS)
+				rta[i-1] = (struct rtattr *)RTA_DATA(attr);
+		}
+	}
+
+	return 0;
+}
+
+int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct dn_fib_table *tb;
+	struct rtattr **rta = arg;
+	struct rtmsg *r = NLMSG_DATA(nlh);
+
+	if (dn_fib_check_attr(r, rta))
+		return -EINVAL;
+
+	tb = dn_fib_get_table(r->rtm_table, 0);
+	if (tb)
+		return tb->delete(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb));
+
+	return -ESRCH;
+}
+
+int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct dn_fib_table *tb;
+	struct rtattr **rta = arg;
+	struct rtmsg *r = NLMSG_DATA(nlh);
+
+	if (dn_fib_check_attr(r, rta))
+		return -EINVAL;
+
+	tb = dn_fib_get_table(r->rtm_table, 1);
+	if (tb) 
+		return tb->insert(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb));
+
+	return -ENOBUFS;
+}
+
+
+int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int t;
+	int s_t;
+	struct dn_fib_table *tb;
+
+	if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
+		((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
+			return dn_cache_dump(skb, cb);
+
+	s_t = cb->args[0];
+	if (s_t == 0)
+		s_t = cb->args[0] = RT_MIN_TABLE;
+
+	for(t = s_t; t <= RT_TABLE_MAX; t++) {
+		if (t < s_t)
+			continue;
+		if (t > s_t)
+			memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(int));
+		tb = dn_fib_get_table(t, 0);
+		if (tb == NULL)
+			continue;
+		if (tb->dump(tb, skb, cb) < 0)
+			break;
+	}
+
+	cb->args[0] = t;
+
+	return skb->len;
+}
+
+static void fib_magic(int cmd, int type, __u16 dst, int dst_len, struct dn_ifaddr *ifa)
+{
+	struct dn_fib_table *tb;
+	struct {
+		struct nlmsghdr nlh;
+		struct rtmsg rtm;
+	} req;
+	struct dn_kern_rta rta;
+
+	memset(&req.rtm, 0, sizeof(req.rtm));
+	memset(&rta, 0, sizeof(rta));
+
+	if (type == RTN_UNICAST)
+		tb = dn_fib_get_table(RT_MIN_TABLE, 1);
+	else
+		tb = dn_fib_get_table(RT_TABLE_LOCAL, 1);
+
+	if (tb == NULL)
+		return;
+
+	req.nlh.nlmsg_len = sizeof(req);
+	req.nlh.nlmsg_type = cmd;
+	req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND;
+	req.nlh.nlmsg_pid = 0;
+	req.nlh.nlmsg_seq = 0;
+
+	req.rtm.rtm_dst_len = dst_len;
+	req.rtm.rtm_table = tb->n;
+	req.rtm.rtm_protocol = RTPROT_KERNEL;
+	req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST);
+	req.rtm.rtm_type = type;
+
+	rta.rta_dst = &dst;
+	rta.rta_prefsrc = &ifa->ifa_local;
+	rta.rta_oif = &ifa->ifa_dev->dev->ifindex;
+
+	if (cmd == RTM_NEWROUTE)
+		tb->insert(tb, &req.rtm, &rta, &req.nlh, NULL);
+	else
+		tb->delete(tb, &req.rtm, &rta, &req.nlh, NULL);
+}
+
+static void dn_fib_add_ifaddr(struct dn_ifaddr *ifa)
+{
+
+	fib_magic(RTM_NEWROUTE, RTN_LOCAL, ifa->ifa_local, 16, ifa);
+
+#if 0
+	if (!(dev->flags&IFF_UP))
+		return;
+	/* In the future, we will want to add default routes here */
+
+#endif
+}
+
+static void dn_fib_del_ifaddr(struct dn_ifaddr *ifa)
+{
+	int found_it = 0;
+	struct net_device *dev;
+	struct dn_dev *dn_db;
+	struct dn_ifaddr *ifa2;
+
+	ASSERT_RTNL();
+
+	/* Scan device list */
+	read_lock(&dev_base_lock);
+	for(dev = dev_base; dev; dev = dev->next) {
+		dn_db = dev->dn_ptr;
+		if (dn_db == NULL)
+			continue;
+		for(ifa2 = dn_db->ifa_list; ifa2; ifa2 = ifa2->ifa_next) {
+			if (ifa2->ifa_local == ifa->ifa_local) {
+				found_it = 1;
+				break;
+			}
+		}
+	}
+	read_unlock(&dev_base_lock);
+
+	if (found_it == 0) {
+		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 16, ifa);
+
+		if (dnet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
+			if (dn_fib_sync_down(ifa->ifa_local, NULL, 0))
+				dn_fib_flush();
+		}
+	}
+}
+
+static void dn_fib_disable_addr(struct net_device *dev, int force)
+{
+	if (dn_fib_sync_down(0, dev, force))
+		dn_fib_flush();
+	dn_rt_cache_flush(0);
+	neigh_ifdown(&dn_neigh_table, dev);
+}
+
+static int dn_fib_dnaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	struct dn_ifaddr *ifa = (struct dn_ifaddr *)ptr;
+
+	switch(event) {
+		case NETDEV_UP:
+			dn_fib_add_ifaddr(ifa);
+			dn_fib_sync_up(ifa->ifa_dev->dev);
+			dn_rt_cache_flush(-1);
+			break;
+		case NETDEV_DOWN:
+			dn_fib_del_ifaddr(ifa);
+			if (ifa->ifa_dev && ifa->ifa_dev->ifa_list == NULL) {
+				dn_fib_disable_addr(ifa->ifa_dev->dev, 1);
+			} else {
+				dn_rt_cache_flush(-1);
+			}
+			break;
+	}
+	return NOTIFY_DONE;
+}
+
+int dn_fib_sync_down(dn_address local, struct net_device *dev, int force)
+{
+        int ret = 0;
+        int scope = RT_SCOPE_NOWHERE;
+
+        if (force)
+                scope = -1;
+
+        for_fib_info() {
+                /* 
+                 * This makes no sense for DECnet.... we will almost
+                 * certainly have more than one local address the same
+                 * over all our interfaces. It needs thinking about
+                 * some more.
+                 */
+                if (local && fi->fib_prefsrc == local) {
+                        fi->fib_flags |= RTNH_F_DEAD;
+                        ret++;
+                } else if (dev && fi->fib_nhs) {
+                        int dead = 0;
+
+                        change_nexthops(fi) {
+                                if (nh->nh_flags&RTNH_F_DEAD)
+                                        dead++;
+                                else if (nh->nh_dev == dev &&
+                                                nh->nh_scope != scope) {
+					spin_lock_bh(&dn_fib_multipath_lock);
+                                        nh->nh_flags |= RTNH_F_DEAD;
+                                        fi->fib_power -= nh->nh_power;
+                                        nh->nh_power = 0;
+					spin_unlock_bh(&dn_fib_multipath_lock);
+                                        dead++;
+                                }
+                        } endfor_nexthops(fi)
+                        if (dead == fi->fib_nhs) {
+                                fi->fib_flags |= RTNH_F_DEAD;
+                                ret++;
+                        }
+                }
+        } endfor_fib_info();
+        return ret;
+}
+
+
+int dn_fib_sync_up(struct net_device *dev)
+{
+        int ret = 0;
+
+        if (!(dev->flags&IFF_UP))
+                return 0;
+
+        for_fib_info() {
+                int alive = 0;
+
+                change_nexthops(fi) {
+                        if (!(nh->nh_flags&RTNH_F_DEAD)) {
+                                alive++;
+                                continue;
+                        }
+                        if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
+                                continue;
+                        if (nh->nh_dev != dev || dev->dn_ptr == NULL)
+                                continue;
+                        alive++;
+			spin_lock_bh(&dn_fib_multipath_lock);
+                        nh->nh_power = 0;
+                        nh->nh_flags &= ~RTNH_F_DEAD;
+			spin_unlock_bh(&dn_fib_multipath_lock);
+                } endfor_nexthops(fi);
+
+                if (alive > 0) {
+                        fi->fib_flags &= ~RTNH_F_DEAD;
+                        ret++;
+                }
+        } endfor_fib_info();
+        return ret;
+}
+
+void dn_fib_flush(void)
+{
+        int flushed = 0;
+        struct dn_fib_table *tb;
+        int id;
+
+        for(id = RT_TABLE_MAX; id > 0; id--) {
+                if ((tb = dn_fib_get_table(id, 0)) == NULL)
+                        continue;
+                flushed += tb->flush(tb);
+        }
+
+        if (flushed)
+                dn_rt_cache_flush(-1);
+}
+
+static struct notifier_block dn_fib_dnaddr_notifier = {
+	.notifier_call = dn_fib_dnaddr_event,
+};
+
+void __exit dn_fib_cleanup(void)
+{
+	dn_fib_table_cleanup();
+	dn_fib_rules_cleanup();
+
+	unregister_dnaddr_notifier(&dn_fib_dnaddr_notifier);
+}
+
+
+void __init dn_fib_init(void)
+{
+
+	dn_fib_table_init();
+	dn_fib_rules_init();
+
+	register_dnaddr_notifier(&dn_fib_dnaddr_notifier);
+}
+
+
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
new file mode 100644
index 00000000000..f6dfe96f45b
--- /dev/null
+++ b/net/decnet/dn_neigh.c
@@ -0,0 +1,627 @@
+/*
+ * DECnet       An implementation of the DECnet protocol suite for the LINUX
+ *              operating system.  DECnet is implemented using the  BSD Socket
+ *              interface as the means of communication with the user level.
+ *
+ *              DECnet Neighbour Functions (Adjacency Database and 
+ *                                                        On-Ethernet Cache)
+ *
+ * Author:      Steve Whitehouse <SteveW@ACM.org>
+ *
+ *
+ * Changes:
+ *     Steve Whitehouse     : Fixed router listing routine
+ *     Steve Whitehouse     : Added error_report functions
+ *     Steve Whitehouse     : Added default router detection
+ *     Steve Whitehouse     : Hop counts in outgoing messages
+ *     Steve Whitehouse     : Fixed src/dst in outgoing messages so
+ *                            forwarding now stands a good chance of
+ *                            working.
+ *     Steve Whitehouse     : Fixed neighbour states (for now anyway).
+ *     Steve Whitehouse     : Made error_report functions dummies. This
+ *                            is not the right place to return skbs.
+ *     Steve Whitehouse     : Convert to seq_file
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/net.h>
+#include <linux/module.h>
+#include <linux/socket.h>
+#include <linux/if_arp.h>
+#include <linux/if_ether.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/string.h>
+#include <linux/netfilter_decnet.h>
+#include <linux/spinlock.h>
+#include <linux/seq_file.h>
+#include <linux/rcupdate.h>
+#include <linux/jhash.h>
+#include <asm/atomic.h>
+#include <net/neighbour.h>
+#include <net/dst.h>
+#include <net/flow.h>
+#include <net/dn.h>
+#include <net/dn_dev.h>
+#include <net/dn_neigh.h>
+#include <net/dn_route.h>
+
+static u32 dn_neigh_hash(const void *pkey, const struct net_device *dev);
+static int dn_neigh_construct(struct neighbour *);
+static void dn_long_error_report(struct neighbour *, struct sk_buff *);
+static void dn_short_error_report(struct neighbour *, struct sk_buff *);
+static int dn_long_output(struct sk_buff *);
+static int dn_short_output(struct sk_buff *);
+static int dn_phase3_output(struct sk_buff *);
+
+
+/*
+ * For talking to broadcast devices: Ethernet & PPP
+ */
+static struct neigh_ops dn_long_ops = {
+	.family =		AF_DECnet,
+	.error_report =		dn_long_error_report,
+	.output =		dn_long_output,
+	.connected_output =	dn_long_output,
+	.hh_output =		dev_queue_xmit,
+	.queue_xmit =		dev_queue_xmit,
+};
+
+/*
+ * For talking to pointopoint and multidrop devices: DDCMP and X.25
+ */
+static struct neigh_ops dn_short_ops = {
+	.family =		AF_DECnet,
+	.error_report =		dn_short_error_report,
+	.output =		dn_short_output,
+	.connected_output =	dn_short_output,
+	.hh_output =		dev_queue_xmit,
+	.queue_xmit =		dev_queue_xmit,
+};
+
+/*
+ * For talking to DECnet phase III nodes
+ */
+static struct neigh_ops dn_phase3_ops = {
+	.family =		AF_DECnet,
+	.error_report =		dn_short_error_report, /* Can use short version here */
+	.output =		dn_phase3_output,
+	.connected_output =	dn_phase3_output,
+	.hh_output =		dev_queue_xmit,
+	.queue_xmit =		dev_queue_xmit
+};
+
+struct neigh_table dn_neigh_table = {
+	.family =			PF_DECnet,
+	.entry_size =			sizeof(struct dn_neigh),
+	.key_len =			sizeof(dn_address),
+	.hash =				dn_neigh_hash,
+	.constructor =			dn_neigh_construct,
+	.id =				"dn_neigh_cache",
+	.parms ={
+		.tbl =			&dn_neigh_table,
+		.entries =		0,
+		.base_reachable_time =	30 * HZ,
+		.retrans_time =	1 * HZ,
+		.gc_staletime =	60 * HZ,
+		.reachable_time =		30 * HZ,
+		.delay_probe_time =	5 * HZ,
+		.queue_len =		3,
+		.ucast_probes =	0,
+		.app_probes =		0,
+		.mcast_probes =	0,
+		.anycast_delay =	0,
+		.proxy_delay =		0,
+		.proxy_qlen =		0,
+		.locktime =		1 * HZ,
+	},
+	.gc_interval =			30 * HZ,
+	.gc_thresh1 =			128,
+	.gc_thresh2 =			512,
+	.gc_thresh3 =			1024,
+};
+
+static u32 dn_neigh_hash(const void *pkey, const struct net_device *dev)
+{
+	return jhash_2words(*(dn_address *)pkey, 0, dn_neigh_table.hash_rnd);
+}
+
+static int dn_neigh_construct(struct neighbour *neigh)
+{
+	struct net_device *dev = neigh->dev;
+	struct dn_neigh *dn = (struct dn_neigh *)neigh;
+	struct dn_dev *dn_db;
+	struct neigh_parms *parms;
+
+	rcu_read_lock();
+	dn_db = rcu_dereference(dev->dn_ptr);
+	if (dn_db == NULL) {
+		rcu_read_unlock();
+		return -EINVAL;
+	}
+
+	parms = dn_db->neigh_parms;
+	if (!parms) {
+		rcu_read_unlock();
+		return -EINVAL;
+	}
+
+	__neigh_parms_put(neigh->parms);
+	neigh->parms = neigh_parms_clone(parms);
+	rcu_read_unlock();
+
+	if (dn_db->use_long)
+		neigh->ops = &dn_long_ops;
+	else
+		neigh->ops = &dn_short_ops;
+
+	if (dn->flags & DN_NDFLAG_P3)
+		neigh->ops = &dn_phase3_ops;
+
+	neigh->nud_state = NUD_NOARP;
+	neigh->output = neigh->ops->connected_output;
+
+	if ((dev->type == ARPHRD_IPGRE) || (dev->flags & IFF_POINTOPOINT))
+		memcpy(neigh->ha, dev->broadcast, dev->addr_len);
+	else if ((dev->type == ARPHRD_ETHER) || (dev->type == ARPHRD_LOOPBACK))
+		dn_dn2eth(neigh->ha, dn->addr);
+	else {
+		if (net_ratelimit())
+			printk(KERN_DEBUG "Trying to create neigh for hw %d\n",  dev->type);
+		return -EINVAL;
+	}
+
+	/*
+	 * Make an estimate of the remote block size by assuming that its
+	 * two less then the device mtu, which it true for ethernet (and
+	 * other things which support long format headers) since there is
+	 * an extra length field (of 16 bits) which isn't part of the
+	 * ethernet headers and which the DECnet specs won't admit is part
+	 * of the DECnet routing headers either.
+	 *
+	 * If we over estimate here its no big deal, the NSP negotiations
+	 * will prevent us from sending packets which are too large for the
+	 * remote node to handle. In any case this figure is normally updated
+	 * by a hello message in most cases.
+	 */
+	dn->blksize = dev->mtu - 2;
+
+	return 0;
+}
+
+static void dn_long_error_report(struct neighbour *neigh, struct sk_buff *skb)
+{
+	printk(KERN_DEBUG "dn_long_error_report: called\n");
+	kfree_skb(skb);
+}
+
+
+static void dn_short_error_report(struct neighbour *neigh, struct sk_buff *skb)
+{
+	printk(KERN_DEBUG "dn_short_error_report: called\n");
+	kfree_skb(skb);
+}
+
+static int dn_neigh_output_packet(struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb->dst;
+	struct dn_route *rt = (struct dn_route *)dst;
+	struct neighbour *neigh = dst->neighbour;
+	struct net_device *dev = neigh->dev;
+	char mac_addr[ETH_ALEN];
+
+	dn_dn2eth(mac_addr, rt->rt_local_src);
+	if (!dev->hard_header || dev->hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, mac_addr, skb->len) >= 0)
+		return neigh->ops->queue_xmit(skb);
+
+	if (net_ratelimit())
+		printk(KERN_DEBUG "dn_neigh_output_packet: oops, can't send packet\n");
+
+	kfree_skb(skb);
+	return -EINVAL;
+}
+
+static int dn_long_output(struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb->dst;
+	struct neighbour *neigh = dst->neighbour;
+	struct net_device *dev = neigh->dev;
+	int headroom = dev->hard_header_len + sizeof(struct dn_long_packet) + 3;
+	unsigned char *data;
+	struct dn_long_packet *lp;
+	struct dn_skb_cb *cb = DN_SKB_CB(skb);
+
+
+	if (skb_headroom(skb) < headroom) {
+		struct sk_buff *skb2 = skb_realloc_headroom(skb, headroom);
+		if (skb2 == NULL) {
+			if (net_ratelimit())
+				printk(KERN_CRIT "dn_long_output: no memory\n");
+			kfree_skb(skb);
+			return -ENOBUFS;
+		}
+		kfree_skb(skb);
+		skb = skb2;
+		if (net_ratelimit())
+			printk(KERN_INFO "dn_long_output: Increasing headroom\n");
+	}
+
+	data = skb_push(skb, sizeof(struct dn_long_packet) + 3);
+	lp = (struct dn_long_packet *)(data+3);
+
+	*((unsigned short *)data) = dn_htons(skb->len - 2);
+	*(data + 2) = 1 | DN_RT_F_PF; /* Padding */
+
+	lp->msgflg   = DN_RT_PKT_LONG|(cb->rt_flags&(DN_RT_F_IE|DN_RT_F_RQR|DN_RT_F_RTS));
+	lp->d_area   = lp->d_subarea = 0;
+	dn_dn2eth(lp->d_id, dn_ntohs(cb->dst));
+	lp->s_area   = lp->s_subarea = 0;
+	dn_dn2eth(lp->s_id, dn_ntohs(cb->src));
+	lp->nl2      = 0;
+	lp->visit_ct = cb->hops & 0x3f;
+	lp->s_class  = 0;
+	lp->pt       = 0;
+
+	skb->nh.raw = skb->data;
+
+	return NF_HOOK(PF_DECnet, NF_DN_POST_ROUTING, skb, NULL, neigh->dev, dn_neigh_output_packet);
+}
+
+static int dn_short_output(struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb->dst;
+	struct neighbour *neigh = dst->neighbour;
+	struct net_device *dev = neigh->dev;
+	int headroom = dev->hard_header_len + sizeof(struct dn_short_packet) + 2;
+	struct dn_short_packet *sp;
+	unsigned char *data;
+	struct dn_skb_cb *cb = DN_SKB_CB(skb);
+
+
+        if (skb_headroom(skb) < headroom) {
+                struct sk_buff *skb2 = skb_realloc_headroom(skb, headroom);
+                if (skb2 == NULL) {
+			if (net_ratelimit())
+                        	printk(KERN_CRIT "dn_short_output: no memory\n");
+                        kfree_skb(skb);
+                        return -ENOBUFS;
+                }
+                kfree_skb(skb);
+                skb = skb2;
+		if (net_ratelimit())
+                	printk(KERN_INFO "dn_short_output: Increasing headroom\n");
+        }
+
+	data = skb_push(skb, sizeof(struct dn_short_packet) + 2);
+	*((unsigned short *)data) = dn_htons(skb->len - 2);
+	sp = (struct dn_short_packet *)(data+2);
+
+	sp->msgflg     = DN_RT_PKT_SHORT|(cb->rt_flags&(DN_RT_F_RQR|DN_RT_F_RTS));
+	sp->dstnode    = cb->dst;
+	sp->srcnode    = cb->src;
+	sp->forward    = cb->hops & 0x3f;
+
+	skb->nh.raw = skb->data;
+
+	return NF_HOOK(PF_DECnet, NF_DN_POST_ROUTING, skb, NULL, neigh->dev, dn_neigh_output_packet);
+}
+
+/*
+ * Phase 3 output is the same is short output, execpt that
+ * it clears the area bits before transmission.
+ */
+static int dn_phase3_output(struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb->dst;
+	struct neighbour *neigh = dst->neighbour;
+	struct net_device *dev = neigh->dev;
+	int headroom = dev->hard_header_len + sizeof(struct dn_short_packet) + 2;
+	struct dn_short_packet *sp;
+	unsigned char *data;
+	struct dn_skb_cb *cb = DN_SKB_CB(skb);
+
+	if (skb_headroom(skb) < headroom) {
+		struct sk_buff *skb2 = skb_realloc_headroom(skb, headroom);
+		if (skb2 == NULL) {
+			if (net_ratelimit())
+				printk(KERN_CRIT "dn_phase3_output: no memory\n");
+			kfree_skb(skb);
+			return -ENOBUFS;
+		}
+		kfree_skb(skb);
+		skb = skb2;
+		if (net_ratelimit())
+			printk(KERN_INFO "dn_phase3_output: Increasing headroom\n");
+	}
+
+	data = skb_push(skb, sizeof(struct dn_short_packet) + 2);
+	*((unsigned short *)data) = dn_htons(skb->len - 2);
+	sp = (struct dn_short_packet *)(data + 2);
+
+	sp->msgflg   = DN_RT_PKT_SHORT|(cb->rt_flags&(DN_RT_F_RQR|DN_RT_F_RTS));
+	sp->dstnode  = cb->dst & dn_htons(0x03ff);
+	sp->srcnode  = cb->src & dn_htons(0x03ff);
+	sp->forward  = cb->hops & 0x3f;
+
+	skb->nh.raw = skb->data;
+
+	return NF_HOOK(PF_DECnet, NF_DN_POST_ROUTING, skb, NULL, neigh->dev, dn_neigh_output_packet);
+}
+
+/*
+ * Unfortunately, the neighbour code uses the device in its hash
+ * function, so we don't get any advantage from it. This function
+ * basically does a neigh_lookup(), but without comparing the device
+ * field. This is required for the On-Ethernet cache
+ */
+
+/*
+ * Pointopoint link receives a hello message
+ */
+void dn_neigh_pointopoint_hello(struct sk_buff *skb)
+{
+	kfree_skb(skb);
+}
+
+/*
+ * Ethernet router hello message received
+ */
+int dn_neigh_router_hello(struct sk_buff *skb)
+{
+	struct rtnode_hello_message *msg = (struct rtnode_hello_message *)skb->data;
+
+	struct neighbour *neigh;
+	struct dn_neigh *dn;
+	struct dn_dev *dn_db;
+	dn_address src;
+
+	src = dn_htons(dn_eth2dn(msg->id));
+
+	neigh = __neigh_lookup(&dn_neigh_table, &src, skb->dev, 1);
+
+	dn = (struct dn_neigh *)neigh;
+
+	if (neigh) {
+		write_lock(&neigh->lock);
+
+		neigh->used = jiffies;
+		dn_db = (struct dn_dev *)neigh->dev->dn_ptr;
+
+		if (!(neigh->nud_state & NUD_PERMANENT)) {
+			neigh->updated = jiffies;
+
+			if (neigh->dev->type == ARPHRD_ETHER)
+				memcpy(neigh->ha, &eth_hdr(skb)->h_source, ETH_ALEN);
+
+			dn->blksize  = dn_ntohs(msg->blksize);
+			dn->priority = msg->priority;
+
+			dn->flags &= ~DN_NDFLAG_P3;
+
+			switch(msg->iinfo & DN_RT_INFO_TYPE) {
+				case DN_RT_INFO_L1RT:
+					dn->flags &=~DN_NDFLAG_R2;
+					dn->flags |= DN_NDFLAG_R1;
+					break;
+				case DN_RT_INFO_L2RT:
+					dn->flags |= DN_NDFLAG_R2;
+			}
+		}
+
+		if (!dn_db->router) {
+			dn_db->router = neigh_clone(neigh);
+		} else {
+			if (msg->priority > ((struct dn_neigh *)dn_db->router)->priority)
+				neigh_release(xchg(&dn_db->router, neigh_clone(neigh)));
+		}
+		write_unlock(&neigh->lock);
+		neigh_release(neigh);
+	}
+
+	kfree_skb(skb);
+	return 0;
+}
+
+/*
+ * Endnode hello message received
+ */
+int dn_neigh_endnode_hello(struct sk_buff *skb)
+{
+	struct endnode_hello_message *msg = (struct endnode_hello_message *)skb->data;
+	struct neighbour *neigh;
+	struct dn_neigh *dn;
+	dn_address src;
+
+	src = dn_htons(dn_eth2dn(msg->id));
+
+	neigh = __neigh_lookup(&dn_neigh_table, &src, skb->dev, 1);
+
+	dn = (struct dn_neigh *)neigh;
+
+	if (neigh) {
+		write_lock(&neigh->lock);
+
+		neigh->used = jiffies;
+
+		if (!(neigh->nud_state & NUD_PERMANENT)) {
+			neigh->updated = jiffies;
+
+			if (neigh->dev->type == ARPHRD_ETHER)
+				memcpy(neigh->ha, &eth_hdr(skb)->h_source, ETH_ALEN);
+			dn->flags   &= ~(DN_NDFLAG_R1 | DN_NDFLAG_R2);
+			dn->blksize  = dn_ntohs(msg->blksize);
+			dn->priority = 0;
+		}
+
+		write_unlock(&neigh->lock);
+		neigh_release(neigh);
+	}
+
+	kfree_skb(skb);
+	return 0;
+}
+
+static char *dn_find_slot(char *base, int max, int priority)
+{
+	int i;
+	unsigned char *min = NULL;
+
+	base += 6; /* skip first id */
+
+	for(i = 0; i < max; i++) {
+		if (!min || (*base < *min))
+			min = base;
+		base += 7; /* find next priority */
+	}
+
+	if (!min)
+		return NULL;
+
+	return (*min < priority) ? (min - 6) : NULL;
+}
+
+struct elist_cb_state {
+	struct net_device *dev;
+	unsigned char *ptr;
+	unsigned char *rs;
+	int t, n;
+};
+
+static void neigh_elist_cb(struct neighbour *neigh, void *_info)
+{
+	struct elist_cb_state *s = _info;
+	struct dn_dev *dn_db;
+	struct dn_neigh *dn;
+
+	if (neigh->dev != s->dev)
+		return;
+
+	dn = (struct dn_neigh *) neigh;
+	if (!(dn->flags & (DN_NDFLAG_R1|DN_NDFLAG_R2)))
+		return;
+
+	dn_db = (struct dn_dev *) s->dev->dn_ptr;
+	if (dn_db->parms.forwarding == 1 && (dn->flags & DN_NDFLAG_R2))
+		return;
+
+	if (s->t == s->n)
+		s->rs = dn_find_slot(s->ptr, s->n, dn->priority);
+	else
+		s->t++;
+	if (s->rs == NULL)
+		return;
+
+	dn_dn2eth(s->rs, dn->addr);
+	s->rs += 6;
+	*(s->rs) = neigh->nud_state & NUD_CONNECTED ? 0x80 : 0x0;
+	*(s->rs) |= dn->priority;
+	s->rs++;
+}
+
+int dn_neigh_elist(struct net_device *dev, unsigned char *ptr, int n)
+{
+	struct elist_cb_state state;
+
+	state.dev = dev;
+	state.t = 0;
+	state.n = n;
+	state.ptr = ptr;
+	state.rs = ptr;
+
+	neigh_for_each(&dn_neigh_table, neigh_elist_cb, &state);
+
+	return state.t;
+}
+
+
+#ifdef CONFIG_PROC_FS
+
+static inline void dn_neigh_format_entry(struct seq_file *seq,
+					 struct neighbour *n)
+{
+	struct dn_neigh *dn = (struct dn_neigh *) n;
+	char buf[DN_ASCBUF_LEN];
+
+	read_lock(&n->lock);
+	seq_printf(seq, "%-7s %s%s%s   %02x    %02d  %07ld %-8s\n",
+		   dn_addr2asc(dn_ntohs(dn->addr), buf),
+		   (dn->flags&DN_NDFLAG_R1) ? "1" : "-",
+		   (dn->flags&DN_NDFLAG_R2) ? "2" : "-",
+		   (dn->flags&DN_NDFLAG_P3) ? "3" : "-",
+		   dn->n.nud_state,
+		   atomic_read(&dn->n.refcnt),
+		   dn->blksize,
+		   (dn->n.dev) ? dn->n.dev->name : "?");
+	read_unlock(&n->lock);
+}
+
+static int dn_neigh_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(seq, "Addr    Flags State Use Blksize Dev\n");
+	} else {
+		dn_neigh_format_entry(seq, v);
+	}
+
+	return 0;
+}
+
+static void *dn_neigh_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	return neigh_seq_start(seq, pos, &dn_neigh_table,
+			       NEIGH_SEQ_NEIGH_ONLY);
+}
+
+static struct seq_operations dn_neigh_seq_ops = {
+	.start = dn_neigh_seq_start,
+	.next  = neigh_seq_next,
+	.stop  = neigh_seq_stop,
+	.show  = dn_neigh_seq_show,
+};
+
+static int dn_neigh_seq_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	int rc = -ENOMEM;
+	struct neigh_seq_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+
+	if (!s)
+		goto out;
+
+	memset(s, 0, sizeof(*s));
+	rc = seq_open(file, &dn_neigh_seq_ops);
+	if (rc)
+		goto out_kfree;
+
+	seq          = file->private_data;
+	seq->private = s;
+	memset(s, 0, sizeof(*s));
+out:
+	return rc;
+out_kfree:
+	kfree(s);
+	goto out;
+}
+
+static struct file_operations dn_neigh_seq_fops = {
+	.owner		= THIS_MODULE,
+	.open		= dn_neigh_seq_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release_private,
+};
+
+#endif
+
+void __init dn_neigh_init(void)
+{
+	neigh_table_init(&dn_neigh_table);
+	proc_net_fops_create("decnet_neigh", S_IRUGO, &dn_neigh_seq_fops);
+}
+
+void __exit dn_neigh_cleanup(void)
+{
+	proc_net_remove("decnet_neigh");
+	neigh_table_clear(&dn_neigh_table);
+}
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
new file mode 100644
index 00000000000..202dbde9850
--- /dev/null
+++ b/net/decnet/dn_nsp_in.c
@@ -0,0 +1,934 @@
+/*
+ * DECnet       An implementation of the DECnet protocol suite for the LINUX
+ *              operating system.  DECnet is implemented using the  BSD Socket
+ *              interface as the means of communication with the user level.
+ *
+ *              DECnet Network Services Protocol (Input)
+ *
+ * Author:      Eduardo Marcelo Serrat <emserrat@geocities.com>
+ *
+ * Changes:
+ *
+ *    Steve Whitehouse:  Split into dn_nsp_in.c and dn_nsp_out.c from
+ *                       original dn_nsp.c.
+ *    Steve Whitehouse:  Updated to work with my new routing architecture.
+ *    Steve Whitehouse:  Add changes from Eduardo Serrat's patches.
+ *    Steve Whitehouse:  Put all ack handling code in a common routine.
+ *    Steve Whitehouse:  Put other common bits into dn_nsp_rx()
+ *    Steve Whitehouse:  More checks on skb->len to catch bogus packets
+ *                       Fixed various race conditions and possible nasties.
+ *    Steve Whitehouse:  Now handles returned conninit frames.
+ *     David S. Miller:  New socket locking
+ *    Steve Whitehouse:  Fixed lockup when socket filtering was enabled.
+ *         Paul Koning:  Fix to push CC sockets into RUN when acks are
+ *                       received.
+ *    Steve Whitehouse:
+ *   Patrick Caulfield:  Checking conninits for correctness & sending of error
+ *                       responses.
+ *    Steve Whitehouse:  Added backlog congestion level return codes.
+ *   Patrick Caulfield:
+ *    Steve Whitehouse:  Added flow control support (outbound)
+ *    Steve Whitehouse:  Prepare for nonlinear skbs
+ */
+
+/******************************************************************************
+    (c) 1995-1998 E.M. Serrat		emserrat@geocities.com
+    
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+*******************************************************************************/
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/inet.h>
+#include <linux/route.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <asm/system.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/termios.h>      
+#include <linux/interrupt.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/netfilter_decnet.h>
+#include <net/neighbour.h>
+#include <net/dst.h>
+#include <net/dn.h>
+#include <net/dn_nsp.h>
+#include <net/dn_dev.h>
+#include <net/dn_route.h>
+
+extern int decnet_log_martians;
+
+static void dn_log_martian(struct sk_buff *skb, const char *msg)
+{
+	if (decnet_log_martians && net_ratelimit()) {
+		char *devname = skb->dev ? skb->dev->name : "???";
+		struct dn_skb_cb *cb = DN_SKB_CB(skb);
+		printk(KERN_INFO "DECnet: Martian packet (%s) dev=%s src=0x%04hx dst=0x%04hx srcport=0x%04hx dstport=0x%04hx\n", msg, devname, cb->src, cb->dst, cb->src_port, cb->dst_port);
+	}
+}
+
+/*
+ * For this function we've flipped the cross-subchannel bit
+ * if the message is an otherdata or linkservice message. Thus
+ * we can use it to work out what to update.
+ */
+static void dn_ack(struct sock *sk, struct sk_buff *skb, unsigned short ack)
+{
+	struct dn_scp *scp = DN_SK(sk);
+	unsigned short type = ((ack >> 12) & 0x0003);
+	int wakeup = 0;
+
+	switch(type) {
+		case 0: /* ACK - Data */
+			if (dn_after(ack, scp->ackrcv_dat)) {
+				scp->ackrcv_dat = ack & 0x0fff;
+				wakeup |= dn_nsp_check_xmit_queue(sk, skb, &scp->data_xmit_queue, ack);
+			}
+			break;
+		case 1: /* NAK - Data */
+			break;
+		case 2: /* ACK - OtherData */
+			if (dn_after(ack, scp->ackrcv_oth)) {
+				scp->ackrcv_oth = ack & 0x0fff;
+				wakeup |= dn_nsp_check_xmit_queue(sk, skb, &scp->other_xmit_queue, ack);
+			}
+			break;
+		case 3: /* NAK - OtherData */
+			break;
+	}
+
+	if (wakeup && !sock_flag(sk, SOCK_DEAD))
+		sk->sk_state_change(sk);
+}
+
+/*
+ * This function is a universal ack processor.
+ */
+static int dn_process_ack(struct sock *sk, struct sk_buff *skb, int oth)
+{
+	unsigned short *ptr = (unsigned short *)skb->data;
+	int len = 0;
+	unsigned short ack;
+
+	if (skb->len < 2)
+		return len;
+
+	if ((ack = dn_ntohs(*ptr)) & 0x8000) {
+		skb_pull(skb, 2);
+		ptr++;
+		len += 2;
+		if ((ack & 0x4000) == 0) {
+			if (oth) 
+				ack ^= 0x2000;
+			dn_ack(sk, skb, ack);
+		}
+	}
+
+	if (skb->len < 2)
+		return len;
+
+	if ((ack = dn_ntohs(*ptr)) & 0x8000) {
+		skb_pull(skb, 2);
+		len += 2;
+		if ((ack & 0x4000) == 0) {
+			if (oth) 
+				ack ^= 0x2000;
+			dn_ack(sk, skb, ack);
+		}
+	}
+
+	return len;
+}
+
+
+/**
+ * dn_check_idf - Check an image data field format is correct.
+ * @pptr: Pointer to pointer to image data
+ * @len: Pointer to length of image data
+ * @max: The maximum allowed length of the data in the image data field
+ * @follow_on: Check that this many bytes exist beyond the end of the image data
+ *
+ * Returns: 0 if ok, -1 on error
+ */
+static inline int dn_check_idf(unsigned char **pptr, int *len, unsigned char max, unsigned char follow_on)
+{
+	unsigned char *ptr = *pptr;
+	unsigned char flen = *ptr++;
+
+	(*len)--;
+	if (flen > max)
+		return -1;
+	if ((flen + follow_on) > *len)
+		return -1;
+
+	*len -= flen;
+	*pptr = ptr + flen;
+	return 0;
+}
+
+/*
+ * Table of reason codes to pass back to node which sent us a badly
+ * formed message, plus text messages for the log. A zero entry in
+ * the reason field means "don't reply" otherwise a disc init is sent with
+ * the specified reason code.
+ */
+static struct {
+	unsigned short reason;
+	const char *text;
+} ci_err_table[] = {
+ { 0,             "CI: Truncated message" },
+ { NSP_REASON_ID, "CI: Destination username error" },
+ { NSP_REASON_ID, "CI: Destination username type" },
+ { NSP_REASON_US, "CI: Source username error" },
+ { 0,             "CI: Truncated at menuver" },
+ { 0,             "CI: Truncated before access or user data" },
+ { NSP_REASON_IO, "CI: Access data format error" },
+ { NSP_REASON_IO, "CI: User data format error" }
+};
+
+/*
+ * This function uses a slightly different lookup method
+ * to find its sockets, since it searches on object name/number
+ * rather than port numbers. Various tests are done to ensure that
+ * the incoming data is in the correct format before it is queued to
+ * a socket.
+ */
+static struct sock *dn_find_listener(struct sk_buff *skb, unsigned short *reason)
+{
+	struct dn_skb_cb *cb = DN_SKB_CB(skb);
+	struct nsp_conn_init_msg *msg = (struct nsp_conn_init_msg *)skb->data;
+	struct sockaddr_dn dstaddr;
+	struct sockaddr_dn srcaddr;
+	unsigned char type = 0;
+	int dstlen;
+	int srclen;
+	unsigned char *ptr;
+	int len;
+	int err = 0;
+	unsigned char menuver;
+
+	memset(&dstaddr, 0, sizeof(struct sockaddr_dn));
+	memset(&srcaddr, 0, sizeof(struct sockaddr_dn));
+
+	/*
+	 * 1. Decode & remove message header
+	 */
+	cb->src_port = msg->srcaddr;
+	cb->dst_port = msg->dstaddr;
+	cb->services = msg->services;
+	cb->info     = msg->info;
+	cb->segsize  = dn_ntohs(msg->segsize);
+
+	if (!pskb_may_pull(skb, sizeof(*msg)))
+		goto err_out;
+
+	skb_pull(skb, sizeof(*msg));
+
+	len = skb->len;
+	ptr = skb->data;
+
+	/*
+	 * 2. Check destination end username format
+	 */
+	dstlen = dn_username2sockaddr(ptr, len, &dstaddr, &type);
+	err++;
+	if (dstlen < 0)
+		goto err_out;
+
+	err++;
+	if (type > 1)
+		goto err_out;
+
+	len -= dstlen;
+	ptr += dstlen;
+
+	/*
+	 * 3. Check source end username format
+	 */
+	srclen = dn_username2sockaddr(ptr, len, &srcaddr, &type);
+	err++;
+	if (srclen < 0)
+		goto err_out;
+
+	len -= srclen;
+	ptr += srclen;
+	err++;
+	if (len < 1)
+		goto err_out;
+
+	menuver = *ptr;
+	ptr++;
+	len--;
+
+	/*
+	 * 4. Check that optional data actually exists if menuver says it does
+	 */
+	err++;
+	if ((menuver & (DN_MENUVER_ACC | DN_MENUVER_USR)) && (len < 1))
+		goto err_out;
+
+	/*
+	 * 5. Check optional access data format
+	 */
+	err++;
+	if (menuver & DN_MENUVER_ACC) {
+		if (dn_check_idf(&ptr, &len, 39, 1))
+			goto err_out;
+		if (dn_check_idf(&ptr, &len, 39, 1))
+			goto err_out;
+		if (dn_check_idf(&ptr, &len, 39, (menuver & DN_MENUVER_USR) ? 1 : 0))
+			goto err_out;
+	}
+
+	/*
+	 * 6. Check optional user data format
+	 */
+	err++;
+	if (menuver & DN_MENUVER_USR) {
+		if (dn_check_idf(&ptr, &len, 16, 0))
+			goto err_out;
+	}
+
+	/*
+	 * 7. Look up socket based on destination end username
+	 */
+	return dn_sklist_find_listener(&dstaddr);
+err_out:
+	dn_log_martian(skb, ci_err_table[err].text);
+	*reason = ci_err_table[err].reason;
+	return NULL;
+}
+
+
+static void dn_nsp_conn_init(struct sock *sk, struct sk_buff *skb)
+{
+	if (sk_acceptq_is_full(sk)) {
+		kfree_skb(skb);
+		return;
+	}
+
+	sk->sk_ack_backlog++;
+	skb_queue_tail(&sk->sk_receive_queue, skb);
+	sk->sk_state_change(sk);
+}
+
+static void dn_nsp_conn_conf(struct sock *sk, struct sk_buff *skb)
+{
+	struct dn_skb_cb *cb = DN_SKB_CB(skb);
+	struct dn_scp *scp = DN_SK(sk);
+	unsigned char *ptr;
+
+	if (skb->len < 4)
+		goto out;
+
+	ptr = skb->data;
+	cb->services = *ptr++;
+	cb->info = *ptr++;
+	cb->segsize = dn_ntohs(*(__u16 *)ptr);
+
+	if ((scp->state == DN_CI) || (scp->state == DN_CD)) {
+		scp->persist = 0;
+                scp->addrrem = cb->src_port;
+                sk->sk_state = TCP_ESTABLISHED;
+                scp->state = DN_RUN;
+		scp->services_rem = cb->services;
+		scp->info_rem = cb->info;
+		scp->segsize_rem = cb->segsize;
+
+		if ((scp->services_rem & NSP_FC_MASK) == NSP_FC_NONE)
+			scp->max_window = decnet_no_fc_max_cwnd;
+
+		if (skb->len > 0) {
+			unsigned char dlen = *skb->data;
+			if ((dlen <= 16) && (dlen <= skb->len)) {
+				scp->conndata_in.opt_optl = dlen;
+				memcpy(scp->conndata_in.opt_data, skb->data + 1, dlen);
+			}
+		}
+                dn_nsp_send_link(sk, DN_NOCHANGE, 0);
+                if (!sock_flag(sk, SOCK_DEAD))
+                	sk->sk_state_change(sk);
+        }
+
+out:
+        kfree_skb(skb);
+}
+
+static void dn_nsp_conn_ack(struct sock *sk, struct sk_buff *skb)
+{
+	struct dn_scp *scp = DN_SK(sk);
+
+	if (scp->state == DN_CI) {
+		scp->state = DN_CD;
+		scp->persist = 0;
+	}
+
+	kfree_skb(skb);
+}
+
+static void dn_nsp_disc_init(struct sock *sk, struct sk_buff *skb)
+{
+	struct dn_scp *scp = DN_SK(sk);
+	struct dn_skb_cb *cb = DN_SKB_CB(skb);
+	unsigned short reason;
+
+	if (skb->len < 2)
+		goto out;
+
+	reason = dn_ntohs(*(__u16 *)skb->data);
+	skb_pull(skb, 2);
+
+	scp->discdata_in.opt_status = reason;
+	scp->discdata_in.opt_optl   = 0;
+	memset(scp->discdata_in.opt_data, 0, 16);
+
+	if (skb->len > 0) {
+		unsigned char dlen = *skb->data;
+		if ((dlen <= 16) && (dlen <= skb->len)) {
+			scp->discdata_in.opt_optl = dlen;
+			memcpy(scp->discdata_in.opt_data, skb->data + 1, dlen);
+		}
+	}
+
+	scp->addrrem = cb->src_port;
+	sk->sk_state = TCP_CLOSE;
+
+	switch(scp->state) {
+		case DN_CI:
+		case DN_CD:
+			scp->state = DN_RJ;
+			sk->sk_err = ECONNREFUSED;
+			break;
+		case DN_RUN:
+			sk->sk_shutdown |= SHUTDOWN_MASK;
+			scp->state = DN_DN;
+			break;
+		case DN_DI:
+			scp->state = DN_DIC;
+			break;
+	}
+
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		if (sk->sk_socket->state != SS_UNCONNECTED)
+			sk->sk_socket->state = SS_DISCONNECTING;
+		sk->sk_state_change(sk);
+	}
+
+	/* 
+	 * It appears that its possible for remote machines to send disc
+	 * init messages with no port identifier if we are in the CI and
+	 * possibly also the CD state. Obviously we shouldn't reply with
+	 * a message if we don't know what the end point is.
+	 */
+	if (scp->addrrem) {
+		dn_nsp_send_disc(sk, NSP_DISCCONF, NSP_REASON_DC, GFP_ATOMIC);
+	}
+	scp->persist_fxn = dn_destroy_timer;
+	scp->persist = dn_nsp_persist(sk);
+
+out:
+	kfree_skb(skb);
+}
+
+/*
+ * disc_conf messages are also called no_resources or no_link
+ * messages depending upon the "reason" field.
+ */
+static void dn_nsp_disc_conf(struct sock *sk, struct sk_buff *skb)
+{
+	struct dn_scp *scp = DN_SK(sk);
+	unsigned short reason;
+
+	if (skb->len != 2)
+		goto out;
+
+	reason = dn_ntohs(*(__u16 *)skb->data);
+
+	sk->sk_state = TCP_CLOSE;
+
+	switch(scp->state) {
+		case DN_CI:
+			scp->state = DN_NR;
+			break;
+		case DN_DR:
+			if (reason == NSP_REASON_DC)
+				scp->state = DN_DRC;
+			if (reason == NSP_REASON_NL)
+				scp->state = DN_CN;
+			break;
+		case DN_DI:
+			scp->state = DN_DIC;
+			break;
+		case DN_RUN:
+			sk->sk_shutdown |= SHUTDOWN_MASK;
+		case DN_CC:
+			scp->state = DN_CN;
+	}
+
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		if (sk->sk_socket->state != SS_UNCONNECTED)
+			sk->sk_socket->state = SS_DISCONNECTING;
+		sk->sk_state_change(sk);
+	}
+
+	scp->persist_fxn = dn_destroy_timer;
+	scp->persist = dn_nsp_persist(sk);
+
+out:
+	kfree_skb(skb);
+}
+
+static void dn_nsp_linkservice(struct sock *sk, struct sk_buff *skb)
+{
+	struct dn_scp *scp = DN_SK(sk);
+	unsigned short segnum;
+	unsigned char lsflags;
+	signed char fcval;
+	int wake_up = 0;
+	char *ptr = skb->data;
+	unsigned char fctype = scp->services_rem & NSP_FC_MASK;
+
+	if (skb->len != 4)
+		goto out;
+
+	segnum = dn_ntohs(*(__u16 *)ptr);
+	ptr += 2;
+	lsflags = *(unsigned char *)ptr++;
+	fcval = *ptr;
+
+	/*
+	 * Here we ignore erronous packets which should really
+	 * should cause a connection abort. It is not critical 
+	 * for now though.
+	 */
+	if (lsflags & 0xf8)
+		goto out;
+
+	if (seq_next(scp->numoth_rcv, segnum)) {
+		seq_add(&scp->numoth_rcv, 1);
+		switch(lsflags & 0x04) { /* FCVAL INT */
+		case 0x00: /* Normal Request */
+			switch(lsflags & 0x03) { /* FCVAL MOD */
+       	         	case 0x00: /* Request count */
+				if (fcval < 0) {
+					unsigned char p_fcval = -fcval;
+					if ((scp->flowrem_dat > p_fcval) &&
+					    (fctype == NSP_FC_SCMC)) {
+						scp->flowrem_dat -= p_fcval;
+					}
+				} else if (fcval > 0) {
+					scp->flowrem_dat += fcval;
+					wake_up = 1;
+				}
+               	       	 	break;
+			case 0x01: /* Stop outgoing data */
+				scp->flowrem_sw = DN_DONTSEND;
+				break;
+			case 0x02: /* Ok to start again */
+				scp->flowrem_sw = DN_SEND;
+				dn_nsp_output(sk);
+				wake_up = 1;
+			}
+			break;
+		case 0x04: /* Interrupt Request */
+			if (fcval > 0) {
+				scp->flowrem_oth += fcval;
+				wake_up = 1;
+			}
+			break;
+                }
+		if (wake_up && !sock_flag(sk, SOCK_DEAD))
+			sk->sk_state_change(sk);
+        }
+
+	dn_nsp_send_oth_ack(sk);
+
+out:
+	kfree_skb(skb);
+}
+
+/*
+ * Copy of sock_queue_rcv_skb (from sock.h) without
+ * bh_lock_sock() (its already held when this is called) which
+ * also allows data and other data to be queued to a socket.
+ */
+static __inline__ int dn_queue_skb(struct sock *sk, struct sk_buff *skb, int sig, struct sk_buff_head *queue)
+{
+	int err;
+	
+        /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
+           number of warnings when compiling with -W --ANK
+         */
+        if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
+	    (unsigned)sk->sk_rcvbuf) {
+        	err = -ENOMEM;
+        	goto out;
+        }
+
+	err = sk_filter(sk, skb, 0);
+	if (err)
+		goto out;
+
+        skb_set_owner_r(skb, sk);
+        skb_queue_tail(queue, skb);
+
+	/* This code only runs from BH or BH protected context.
+	 * Therefore the plain read_lock is ok here. -DaveM
+	 */
+	read_lock(&sk->sk_callback_lock);
+        if (!sock_flag(sk, SOCK_DEAD)) {
+		struct socket *sock = sk->sk_socket;
+		wake_up_interruptible(sk->sk_sleep);
+		if (sock && sock->fasync_list &&
+		    !test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
+			__kill_fasync(sock->fasync_list, sig, 
+				    (sig == SIGURG) ? POLL_PRI : POLL_IN);
+	}
+	read_unlock(&sk->sk_callback_lock);
+out:
+        return err;
+}
+
+static void dn_nsp_otherdata(struct sock *sk, struct sk_buff *skb)
+{
+	struct dn_scp *scp = DN_SK(sk);
+	unsigned short segnum;
+	struct dn_skb_cb *cb = DN_SKB_CB(skb);
+	int queued = 0;
+
+	if (skb->len < 2)
+		goto out;
+
+	cb->segnum = segnum = dn_ntohs(*(__u16 *)skb->data);
+	skb_pull(skb, 2);
+
+	if (seq_next(scp->numoth_rcv, segnum)) {
+
+		if (dn_queue_skb(sk, skb, SIGURG, &scp->other_receive_queue) == 0) {
+			seq_add(&scp->numoth_rcv, 1);
+			scp->other_report = 0;
+			queued = 1;
+		}
+	}
+
+	dn_nsp_send_oth_ack(sk);
+out:
+	if (!queued)
+		kfree_skb(skb);
+}
+
+static void dn_nsp_data(struct sock *sk, struct sk_buff *skb)
+{
+	int queued = 0;
+	unsigned short segnum;
+	struct dn_skb_cb *cb = DN_SKB_CB(skb);
+	struct dn_scp *scp = DN_SK(sk);
+
+	if (skb->len < 2)
+		goto out;
+
+	cb->segnum = segnum = dn_ntohs(*(__u16 *)skb->data);
+	skb_pull(skb, 2);
+
+	if (seq_next(scp->numdat_rcv, segnum)) {
+                if (dn_queue_skb(sk, skb, SIGIO, &sk->sk_receive_queue) == 0) {
+			seq_add(&scp->numdat_rcv, 1);
+                	queued = 1;
+                }
+
+		if ((scp->flowloc_sw == DN_SEND) && dn_congested(sk)) {
+			scp->flowloc_sw = DN_DONTSEND;
+			dn_nsp_send_link(sk, DN_DONTSEND, 0);
+		}
+        }
+
+	dn_nsp_send_data_ack(sk);
+out:
+	if (!queued)
+		kfree_skb(skb);
+}
+
+/*
+ * If one of our conninit messages is returned, this function
+ * deals with it. It puts the socket into the NO_COMMUNICATION
+ * state.
+ */
+static void dn_returned_conn_init(struct sock *sk, struct sk_buff *skb)
+{
+	struct dn_scp *scp = DN_SK(sk);
+
+	if (scp->state == DN_CI) {
+		scp->state = DN_NC;
+		sk->sk_state = TCP_CLOSE;
+		if (!sock_flag(sk, SOCK_DEAD))
+			sk->sk_state_change(sk);
+	}
+
+	kfree_skb(skb);
+}
+
+static int dn_nsp_no_socket(struct sk_buff *skb, unsigned short reason)
+{
+	struct dn_skb_cb *cb = DN_SKB_CB(skb);
+	int ret = NET_RX_DROP;
+
+	/* Must not reply to returned packets */
+	if (cb->rt_flags & DN_RT_F_RTS)
+		goto out;
+
+	if ((reason != NSP_REASON_OK) && ((cb->nsp_flags & 0x0c) == 0x08)) {
+		switch(cb->nsp_flags & 0x70) {
+			case 0x10:
+			case 0x60: /* (Retransmitted) Connect Init */
+				dn_nsp_return_disc(skb, NSP_DISCINIT, reason);
+				ret = NET_RX_SUCCESS;
+				break;
+			case 0x20: /* Connect Confirm */
+				dn_nsp_return_disc(skb, NSP_DISCCONF, reason);
+				ret = NET_RX_SUCCESS;
+				break;
+		}
+	}
+
+out:
+	kfree_skb(skb);
+	return ret;
+}
+
+static int dn_nsp_rx_packet(struct sk_buff *skb)
+{
+	struct dn_skb_cb *cb = DN_SKB_CB(skb);
+	struct sock *sk = NULL;
+	unsigned char *ptr = (unsigned char *)skb->data;
+	unsigned short reason = NSP_REASON_NL;
+
+	if (!pskb_may_pull(skb, 2))
+		goto free_out;
+
+	skb->h.raw    = skb->data;
+	cb->nsp_flags = *ptr++;
+
+	if (decnet_debug_level & 2)
+		printk(KERN_DEBUG "dn_nsp_rx: Message type 0x%02x\n", (int)cb->nsp_flags);
+
+	if (cb->nsp_flags & 0x83) 
+		goto free_out;
+
+	/*
+	 * Filter out conninits and useless packet types
+	 */
+	if ((cb->nsp_flags & 0x0c) == 0x08) {
+		switch(cb->nsp_flags & 0x70) {
+			case 0x00: /* NOP */
+			case 0x70: /* Reserved */
+			case 0x50: /* Reserved, Phase II node init */
+				goto free_out;
+			case 0x10:
+			case 0x60:
+				if (unlikely(cb->rt_flags & DN_RT_F_RTS))
+					goto free_out;
+				sk = dn_find_listener(skb, &reason);
+				goto got_it;
+		}
+	}
+
+	if (!pskb_may_pull(skb, 3))
+		goto free_out;
+
+	/*
+	 * Grab the destination address.
+	 */
+	cb->dst_port = *(unsigned short *)ptr;
+	cb->src_port = 0;
+	ptr += 2;
+
+	/*
+	 * If not a connack, grab the source address too.
+	 */
+	if (pskb_may_pull(skb, 5)) {
+		cb->src_port = *(unsigned short *)ptr;
+		ptr += 2;
+		skb_pull(skb, 5);
+	}
+
+	/*
+	 * Returned packets...
+	 * Swap src & dst and look up in the normal way.
+	 */
+	if (unlikely(cb->rt_flags & DN_RT_F_RTS)) {
+		unsigned short tmp = cb->dst_port;
+		cb->dst_port = cb->src_port;
+		cb->src_port = tmp;
+		tmp = cb->dst;
+		cb->dst = cb->src;
+		cb->src = tmp;
+	}
+
+	/*
+	 * Find the socket to which this skb is destined.
+	 */
+	sk = dn_find_by_skb(skb);
+got_it:
+	if (sk != NULL) {
+		struct dn_scp *scp = DN_SK(sk);
+		int ret;
+
+		/* Reset backoff */
+		scp->nsp_rxtshift = 0;
+
+		/*
+		 * We linearize everything except data segments here.
+		 */
+		if (cb->nsp_flags & ~0x60) {
+			if (unlikely(skb_is_nonlinear(skb)) &&
+			    skb_linearize(skb, GFP_ATOMIC) != 0)
+				goto free_out;
+		}
+
+		bh_lock_sock(sk);
+		ret = NET_RX_SUCCESS;
+		if (decnet_debug_level & 8)
+			printk(KERN_DEBUG "NSP: 0x%02x 0x%02x 0x%04x 0x%04x %d\n",
+				(int)cb->rt_flags, (int)cb->nsp_flags, 
+				(int)cb->src_port, (int)cb->dst_port, 
+				!!sock_owned_by_user(sk));
+		if (!sock_owned_by_user(sk))
+			ret = dn_nsp_backlog_rcv(sk, skb);
+		else
+			sk_add_backlog(sk, skb);
+		bh_unlock_sock(sk);
+		sock_put(sk);
+
+		return ret;
+	}
+
+	return dn_nsp_no_socket(skb, reason);
+
+free_out:
+	kfree_skb(skb);
+	return NET_RX_DROP;
+}
+
+int dn_nsp_rx(struct sk_buff *skb)
+{
+	return NF_HOOK(PF_DECnet, NF_DN_LOCAL_IN, skb, skb->dev, NULL, dn_nsp_rx_packet);
+}
+
+/*
+ * This is the main receive routine for sockets. It is called
+ * from the above when the socket is not busy, and also from
+ * sock_release() when there is a backlog queued up.
+ */
+int dn_nsp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	struct dn_scp *scp = DN_SK(sk);
+	struct dn_skb_cb *cb = DN_SKB_CB(skb);
+
+	if (cb->rt_flags & DN_RT_F_RTS) {
+		if (cb->nsp_flags == 0x18 || cb->nsp_flags == 0x68)
+			dn_returned_conn_init(sk, skb);
+		else
+			kfree_skb(skb);
+		return NET_RX_SUCCESS;
+	}
+
+	/*
+	 * Control packet.
+	 */
+	if ((cb->nsp_flags & 0x0c) == 0x08) {
+		switch(cb->nsp_flags & 0x70) {
+			case 0x10:
+			case 0x60:
+				dn_nsp_conn_init(sk, skb);
+				break;
+			case 0x20:
+				dn_nsp_conn_conf(sk, skb);
+				break;
+			case 0x30:
+				dn_nsp_disc_init(sk, skb);
+				break;
+			case 0x40:      
+				dn_nsp_disc_conf(sk, skb);
+				break;
+		}
+
+	} else if (cb->nsp_flags == 0x24) {
+		/*
+		 * Special for connacks, 'cos they don't have
+		 * ack data or ack otherdata info.
+		 */
+		dn_nsp_conn_ack(sk, skb);
+	} else {
+		int other = 1;
+
+		/* both data and ack frames can kick a CC socket into RUN */
+		if ((scp->state == DN_CC) && !sock_flag(sk, SOCK_DEAD)) {
+			scp->state = DN_RUN;
+			sk->sk_state = TCP_ESTABLISHED;
+			sk->sk_state_change(sk);
+		}
+
+		if ((cb->nsp_flags & 0x1c) == 0)
+			other = 0;
+		if (cb->nsp_flags == 0x04)
+			other = 0;
+
+		/*
+		 * Read out ack data here, this applies equally
+		 * to data, other data, link serivce and both
+		 * ack data and ack otherdata.
+		 */
+		dn_process_ack(sk, skb, other);
+
+		/*
+		 * If we've some sort of data here then call a
+		 * suitable routine for dealing with it, otherwise
+		 * the packet is an ack and can be discarded.
+		 */
+		if ((cb->nsp_flags & 0x0c) == 0) {
+
+			if (scp->state != DN_RUN)
+				goto free_out;
+
+			switch(cb->nsp_flags) {
+				case 0x10: /* LS */
+					dn_nsp_linkservice(sk, skb);
+					break;
+				case 0x30: /* OD */
+					dn_nsp_otherdata(sk, skb);
+					break;
+				default:
+					dn_nsp_data(sk, skb);
+			}
+
+		} else { /* Ack, chuck it out here */
+free_out:
+			kfree_skb(skb);
+		}
+	}
+
+	return NET_RX_SUCCESS;
+}
+
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
new file mode 100644
index 00000000000..42abbf3f524
--- /dev/null
+++ b/net/decnet/dn_nsp_out.c
@@ -0,0 +1,782 @@
+
+/*
+ * DECnet       An implementation of the DECnet protocol suite for the LINUX
+ *              operating system.  DECnet is implemented using the  BSD Socket
+ *              interface as the means of communication with the user level.
+ *
+ *              DECnet Network Services Protocol (Output)
+ *
+ * Author:      Eduardo Marcelo Serrat <emserrat@geocities.com>
+ *
+ * Changes:
+ *
+ *    Steve Whitehouse:  Split into dn_nsp_in.c and dn_nsp_out.c from
+ *                       original dn_nsp.c.
+ *    Steve Whitehouse:  Updated to work with my new routing architecture.
+ *    Steve Whitehouse:  Added changes from Eduardo Serrat's patches.
+ *    Steve Whitehouse:  Now conninits have the "return" bit set.
+ *    Steve Whitehouse:  Fixes to check alloc'd skbs are non NULL!
+ *                       Moved output state machine into one function
+ *    Steve Whitehouse:  New output state machine
+ *         Paul Koning:  Connect Confirm message fix.
+ *      Eduardo Serrat:  Fix to stop dn_nsp_do_disc() sending malformed packets.
+ *    Steve Whitehouse:  dn_nsp_output() and friends needed a spring clean
+ *    Steve Whitehouse:  Moved dn_nsp_send() in here from route.h
+ */
+
+/******************************************************************************
+    (c) 1995-1998 E.M. Serrat		emserrat@geocities.com
+    
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+*******************************************************************************/
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/inet.h>
+#include <linux/route.h>
+#include <net/sock.h>
+#include <asm/system.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/termios.h>      
+#include <linux/interrupt.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/if_packet.h>
+#include <net/neighbour.h>
+#include <net/dst.h>
+#include <net/flow.h>
+#include <net/dn.h>
+#include <net/dn_nsp.h>
+#include <net/dn_dev.h>
+#include <net/dn_route.h>
+
+
+static int nsp_backoff[NSP_MAXRXTSHIFT + 1] = { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
+
+static void dn_nsp_send(struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+	struct dn_scp *scp = DN_SK(sk);
+	struct dst_entry *dst;
+	struct flowi fl;
+
+	skb->h.raw = skb->data;
+	scp->stamp = jiffies;
+
+	dst = sk_dst_check(sk, 0);
+	if (dst) {
+try_again:
+		skb->dst = dst;
+		dst_output(skb);
+		return;
+	}
+
+	memset(&fl, 0, sizeof(fl));
+	fl.oif = sk->sk_bound_dev_if;
+	fl.fld_src = dn_saddr2dn(&scp->addr);
+	fl.fld_dst = dn_saddr2dn(&scp->peer);
+	dn_sk_ports_copy(&fl, scp);
+	fl.proto = DNPROTO_NSP;
+	if (dn_route_output_sock(&sk->sk_dst_cache, &fl, sk, 0) == 0) {
+		dst = sk_dst_get(sk);
+		sk->sk_route_caps = dst->dev->features;
+		goto try_again;
+	}
+
+	sk->sk_err = EHOSTUNREACH;
+	if (!sock_flag(sk, SOCK_DEAD))
+		sk->sk_state_change(sk);
+}
+
+
+/*
+ * If sk == NULL, then we assume that we are supposed to be making
+ * a routing layer skb. If sk != NULL, then we are supposed to be
+ * creating an skb for the NSP layer.
+ *
+ * The eventual aim is for each socket to have a cached header size
+ * for its outgoing packets, and to set hdr from this when sk != NULL.
+ */
+struct sk_buff *dn_alloc_skb(struct sock *sk, int size, int pri)
+{
+	struct sk_buff *skb;
+	int hdr = 64;
+
+	if ((skb = alloc_skb(size + hdr, pri)) == NULL)
+		return NULL;
+
+	skb->protocol = __constant_htons(ETH_P_DNA_RT);
+	skb->pkt_type = PACKET_OUTGOING;
+
+	if (sk)
+		skb_set_owner_w(skb, sk);
+
+	skb_reserve(skb, hdr);
+
+	return skb;
+}
+
+/*
+ * Wrapper for the above, for allocs of data skbs. We try and get the
+ * whole size thats been asked for (plus 11 bytes of header). If this
+ * fails, then we try for any size over 16 bytes for SOCK_STREAMS.
+ */
+struct sk_buff *dn_alloc_send_skb(struct sock *sk, size_t *size, int noblock, long timeo, int *err)
+{
+	int space;
+	int len;
+	struct sk_buff *skb = NULL;
+
+	*err = 0;
+
+	while(skb == NULL) {
+		if (signal_pending(current)) {
+			*err = sock_intr_errno(timeo);
+			break;
+		}
+
+		if (sk->sk_shutdown & SEND_SHUTDOWN) {
+			*err = EINVAL;
+			break;
+		}
+
+		if (sk->sk_err)
+			break;
+
+		len = *size + 11;
+		space = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc);
+
+		if (space < len) {
+			if ((sk->sk_socket->type == SOCK_STREAM) &&
+			    (space >= (16 + 11)))
+				len = space;
+		}
+
+		if (space < len) {
+			set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+			if (noblock) {
+				*err = EWOULDBLOCK;
+				break;
+			}
+
+			clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+			SOCK_SLEEP_PRE(sk)
+
+			if ((sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc)) <
+			    len)
+				schedule();
+
+			SOCK_SLEEP_POST(sk)
+			continue;
+		}
+
+		if ((skb = dn_alloc_skb(sk, len, sk->sk_allocation)) == NULL)
+			continue;
+
+		*size = len - 11;
+	}
+
+	return skb;
+}
+
+/*
+ * Calculate persist timer based upon the smoothed round
+ * trip time and the variance. Backoff according to the
+ * nsp_backoff[] array.
+ */
+unsigned long dn_nsp_persist(struct sock *sk)
+{
+	struct dn_scp *scp = DN_SK(sk);
+
+	unsigned long t = ((scp->nsp_srtt >> 2) + scp->nsp_rttvar) >> 1;
+
+	t *= nsp_backoff[scp->nsp_rxtshift];
+
+	if (t < HZ) t = HZ;
+	if (t > (600*HZ)) t = (600*HZ);
+
+	if (scp->nsp_rxtshift < NSP_MAXRXTSHIFT)
+		scp->nsp_rxtshift++;
+
+	/* printk(KERN_DEBUG "rxtshift %lu, t=%lu\n", scp->nsp_rxtshift, t); */
+
+	return t;
+}
+
+/*
+ * This is called each time we get an estimate for the rtt
+ * on the link.
+ */
+static void dn_nsp_rtt(struct sock *sk, long rtt)
+{
+	struct dn_scp *scp = DN_SK(sk);
+	long srtt = (long)scp->nsp_srtt;
+	long rttvar = (long)scp->nsp_rttvar;
+	long delta;
+
+	/*
+	 * If the jiffies clock flips over in the middle of timestamp
+	 * gathering this value might turn out negative, so we make sure
+	 * that is it always positive here.
+	 */
+	if (rtt < 0) 
+		rtt = -rtt;
+	/*
+	 * Add new rtt to smoothed average
+	 */
+	delta = ((rtt << 3) - srtt);
+	srtt += (delta >> 3);
+	if (srtt >= 1) 
+		scp->nsp_srtt = (unsigned long)srtt;
+	else
+		scp->nsp_srtt = 1;
+
+	/*
+	 * Add new rtt varience to smoothed varience
+	 */
+	delta >>= 1;
+	rttvar += ((((delta>0)?(delta):(-delta)) - rttvar) >> 2);
+	if (rttvar >= 1) 
+		scp->nsp_rttvar = (unsigned long)rttvar;
+	else
+		scp->nsp_rttvar = 1;
+
+	/* printk(KERN_DEBUG "srtt=%lu rttvar=%lu\n", scp->nsp_srtt, scp->nsp_rttvar); */
+}
+
+/**
+ * dn_nsp_clone_and_send - Send a data packet by cloning it
+ * @skb: The packet to clone and transmit
+ * @gfp: memory allocation flag
+ *
+ * Clone a queued data or other data packet and transmit it.
+ *
+ * Returns: The number of times the packet has been sent previously
+ */
+static inline unsigned dn_nsp_clone_and_send(struct sk_buff *skb, int gfp)
+{
+	struct dn_skb_cb *cb = DN_SKB_CB(skb);
+	struct sk_buff *skb2;
+	int ret = 0;
+
+	if ((skb2 = skb_clone(skb, gfp)) != NULL) {
+		ret = cb->xmit_count;
+		cb->xmit_count++;
+		cb->stamp = jiffies;
+		skb2->sk = skb->sk;
+		dn_nsp_send(skb2);
+	}
+
+	return ret;
+}
+
+/**
+ * dn_nsp_output - Try and send something from socket queues
+ * @sk: The socket whose queues are to be investigated
+ * @gfp: The memory allocation flags
+ *
+ * Try and send the packet on the end of the data and other data queues.
+ * Other data gets priority over data, and if we retransmit a packet we
+ * reduce the window by dividing it in two.
+ *
+ */
+void dn_nsp_output(struct sock *sk)
+{
+	struct dn_scp *scp = DN_SK(sk);
+	struct sk_buff *skb;
+	unsigned reduce_win = 0;
+
+	/*
+	 * First we check for otherdata/linkservice messages
+	 */
+	if ((skb = skb_peek(&scp->other_xmit_queue)) != NULL)
+		reduce_win = dn_nsp_clone_and_send(skb, GFP_ATOMIC);
+
+	/*
+	 * If we may not send any data, we don't.
+	 * If we are still trying to get some other data down the
+	 * channel, we don't try and send any data.
+	 */
+	if (reduce_win || (scp->flowrem_sw != DN_SEND))
+		goto recalc_window;
+
+	if ((skb = skb_peek(&scp->data_xmit_queue)) != NULL)
+		reduce_win = dn_nsp_clone_and_send(skb, GFP_ATOMIC);
+
+	/*
+	 * If we've sent any frame more than once, we cut the
+	 * send window size in half. There is always a minimum
+	 * window size of one available.
+	 */
+recalc_window:
+	if (reduce_win) {
+		scp->snd_window >>= 1;
+		if (scp->snd_window < NSP_MIN_WINDOW)
+			scp->snd_window = NSP_MIN_WINDOW;
+	}
+}
+
+int dn_nsp_xmit_timeout(struct sock *sk)
+{
+	struct dn_scp *scp = DN_SK(sk);
+
+	dn_nsp_output(sk);
+
+	if (skb_queue_len(&scp->data_xmit_queue) || skb_queue_len(&scp->other_xmit_queue))
+		scp->persist = dn_nsp_persist(sk);
+
+	return 0;
+}
+
+static inline unsigned char *dn_mk_common_header(struct dn_scp *scp, struct sk_buff *skb, unsigned char msgflag, int len)
+{
+	unsigned char *ptr = skb_push(skb, len);
+
+	BUG_ON(len < 5);
+
+	*ptr++ = msgflag;
+	*((unsigned short *)ptr) = scp->addrrem;
+	ptr += 2;
+	*((unsigned short *)ptr) = scp->addrloc;
+	ptr += 2;
+	return ptr;
+}
+
+static unsigned short *dn_mk_ack_header(struct sock *sk, struct sk_buff *skb, unsigned char msgflag, int hlen, int other)
+{
+	struct dn_scp *scp = DN_SK(sk);
+	unsigned short acknum = scp->numdat_rcv & 0x0FFF;
+	unsigned short ackcrs = scp->numoth_rcv & 0x0FFF;
+	unsigned short *ptr;
+
+	BUG_ON(hlen < 9);
+
+	scp->ackxmt_dat = acknum;
+	scp->ackxmt_oth = ackcrs;
+	acknum |= 0x8000;
+	ackcrs |= 0x8000;
+
+	/* If this is an "other data/ack" message, swap acknum and ackcrs */
+	if (other) {
+		unsigned short tmp = acknum;
+		acknum = ackcrs;
+		ackcrs = tmp;
+	}
+
+	/* Set "cross subchannel" bit in ackcrs */
+	ackcrs |= 0x2000;
+
+	ptr = (unsigned short *)dn_mk_common_header(scp, skb, msgflag, hlen);
+
+	*ptr++ = dn_htons(acknum);
+	*ptr++ = dn_htons(ackcrs);
+
+	return ptr;
+}
+
+static unsigned short *dn_nsp_mk_data_header(struct sock *sk, struct sk_buff *skb, int oth)
+{
+	struct dn_scp *scp = DN_SK(sk);
+	struct dn_skb_cb *cb = DN_SKB_CB(skb);
+	unsigned short *ptr = dn_mk_ack_header(sk, skb, cb->nsp_flags, 11, oth);
+
+	if (unlikely(oth)) {
+		cb->segnum = scp->numoth;
+		seq_add(&scp->numoth, 1);
+	} else {
+		cb->segnum = scp->numdat;
+		seq_add(&scp->numdat, 1);
+	}
+	*(ptr++) = dn_htons(cb->segnum);
+
+	return ptr;
+}
+
+void dn_nsp_queue_xmit(struct sock *sk, struct sk_buff *skb, int gfp, int oth)
+{
+	struct dn_scp *scp = DN_SK(sk);
+	struct dn_skb_cb *cb = DN_SKB_CB(skb);
+	unsigned long t = ((scp->nsp_srtt >> 2) + scp->nsp_rttvar) >> 1;
+
+	cb->xmit_count = 0;
+	dn_nsp_mk_data_header(sk, skb, oth);
+
+	/*
+	 * Slow start: If we have been idle for more than
+	 * one RTT, then reset window to min size.
+	 */
+	if ((jiffies - scp->stamp) > t)
+		scp->snd_window = NSP_MIN_WINDOW;
+
+	if (oth)
+		skb_queue_tail(&scp->other_xmit_queue, skb);
+	else
+		skb_queue_tail(&scp->data_xmit_queue, skb);
+
+	if (scp->flowrem_sw != DN_SEND)
+		return;
+
+	dn_nsp_clone_and_send(skb, gfp);
+}
+
+
+int dn_nsp_check_xmit_queue(struct sock *sk, struct sk_buff *skb, struct sk_buff_head *q, unsigned short acknum)
+{
+	struct dn_skb_cb *cb = DN_SKB_CB(skb);
+	struct dn_scp *scp = DN_SK(sk);
+	struct sk_buff *skb2, *list, *ack = NULL;
+	int wakeup = 0;
+	int try_retrans = 0;
+	unsigned long reftime = cb->stamp;
+	unsigned long pkttime;
+	unsigned short xmit_count;
+	unsigned short segnum;
+
+	skb2 = q->next;
+	list = (struct sk_buff *)q;
+	while(list != skb2) {
+		struct dn_skb_cb *cb2 = DN_SKB_CB(skb2);
+
+		if (dn_before_or_equal(cb2->segnum, acknum))
+			ack = skb2;
+
+		/* printk(KERN_DEBUG "ack: %s %04x %04x\n", ack ? "ACK" : "SKIP", (int)cb2->segnum, (int)acknum); */
+
+		skb2 = skb2->next;
+
+		if (ack == NULL)
+			continue;
+
+		/* printk(KERN_DEBUG "check_xmit_queue: %04x, %d\n", acknum, cb2->xmit_count); */
+
+		/* Does _last_ packet acked have xmit_count > 1 */
+		try_retrans = 0;
+		/* Remember to wake up the sending process */
+		wakeup = 1;
+		/* Keep various statistics */
+		pkttime = cb2->stamp;
+		xmit_count = cb2->xmit_count;
+		segnum = cb2->segnum;
+		/* Remove and drop ack'ed packet */
+		skb_unlink(ack);
+		kfree_skb(ack);
+		ack = NULL;
+
+		/*
+		 * We don't expect to see acknowledgements for packets we
+		 * haven't sent yet.
+		 */
+		WARN_ON(xmit_count == 0);
+
+		/*
+		 * If the packet has only been sent once, we can use it
+		 * to calculate the RTT and also open the window a little
+		 * further.
+		 */
+		if (xmit_count == 1) {
+			if (dn_equal(segnum, acknum)) 
+				dn_nsp_rtt(sk, (long)(pkttime - reftime));
+
+			if (scp->snd_window < scp->max_window)
+				scp->snd_window++;
+		}
+
+		/*
+		 * Packet has been sent more than once. If this is the last
+		 * packet to be acknowledged then we want to send the next
+		 * packet in the send queue again (assumes the remote host does
+		 * go-back-N error control).
+		 */
+		if (xmit_count > 1)
+			try_retrans = 1;
+	}
+
+	if (try_retrans)
+		dn_nsp_output(sk);
+
+	return wakeup;
+}
+
+void dn_nsp_send_data_ack(struct sock *sk)
+{
+	struct sk_buff *skb = NULL;
+
+	if ((skb = dn_alloc_skb(sk, 9, GFP_ATOMIC)) == NULL)
+		return;
+
+	skb_reserve(skb, 9);
+	dn_mk_ack_header(sk, skb, 0x04, 9, 0);
+	dn_nsp_send(skb);
+}
+
+void dn_nsp_send_oth_ack(struct sock *sk)
+{
+	struct sk_buff *skb = NULL;
+
+	if ((skb = dn_alloc_skb(sk, 9, GFP_ATOMIC)) == NULL)
+		return;
+
+	skb_reserve(skb, 9);
+	dn_mk_ack_header(sk, skb, 0x14, 9, 1);
+	dn_nsp_send(skb);
+}
+
+
+void dn_send_conn_ack (struct sock *sk)
+{
+	struct dn_scp *scp = DN_SK(sk);
+	struct sk_buff *skb = NULL;
+        struct nsp_conn_ack_msg *msg;
+
+	if ((skb = dn_alloc_skb(sk, 3, sk->sk_allocation)) == NULL)
+		return;
+
+        msg = (struct nsp_conn_ack_msg *)skb_put(skb, 3);
+        msg->msgflg = 0x24;                   
+	msg->dstaddr = scp->addrrem;
+
+	dn_nsp_send(skb);	
+}
+
+void dn_nsp_delayed_ack(struct sock *sk)
+{
+	struct dn_scp *scp = DN_SK(sk);
+
+	if (scp->ackxmt_oth != scp->numoth_rcv)
+		dn_nsp_send_oth_ack(sk);
+
+	if (scp->ackxmt_dat != scp->numdat_rcv)
+		dn_nsp_send_data_ack(sk);
+}
+
+static int dn_nsp_retrans_conn_conf(struct sock *sk)
+{
+	struct dn_scp *scp = DN_SK(sk);
+
+	if (scp->state == DN_CC)
+		dn_send_conn_conf(sk, GFP_ATOMIC);
+
+	return 0;
+}
+
+void dn_send_conn_conf(struct sock *sk, int gfp)
+{
+	struct dn_scp *scp = DN_SK(sk);
+	struct sk_buff *skb = NULL;
+        struct nsp_conn_init_msg *msg;
+	unsigned char len = scp->conndata_out.opt_optl;
+
+	if ((skb = dn_alloc_skb(sk, 50 + scp->conndata_out.opt_optl, gfp)) == NULL)
+		return;
+
+        msg = (struct nsp_conn_init_msg *)skb_put(skb, sizeof(*msg));
+        msg->msgflg = 0x28;                   
+	msg->dstaddr = scp->addrrem;
+        msg->srcaddr = scp->addrloc;
+        msg->services = scp->services_loc;
+        msg->info = scp->info_loc;
+        msg->segsize = dn_htons(scp->segsize_loc);
+
+	*skb_put(skb,1) = len;
+
+	if (len > 0) 
+		memcpy(skb_put(skb, len), scp->conndata_out.opt_data, len);
+	
+
+	dn_nsp_send(skb);
+
+	scp->persist = dn_nsp_persist(sk);
+	scp->persist_fxn = dn_nsp_retrans_conn_conf;
+}
+
+
+static __inline__ void dn_nsp_do_disc(struct sock *sk, unsigned char msgflg, 
+			unsigned short reason, int gfp, struct dst_entry *dst,
+			int ddl, unsigned char *dd, __u16 rem, __u16 loc)
+{
+	struct sk_buff *skb = NULL;
+	int size = 7 + ddl + ((msgflg == NSP_DISCINIT) ? 1 : 0);
+	unsigned char *msg;
+
+	if ((dst == NULL) || (rem == 0)) {
+		if (net_ratelimit())
+			printk(KERN_DEBUG "DECnet: dn_nsp_do_disc: BUG! Please report this to SteveW@ACM.org rem=%u dst=%p\n", (unsigned)rem, dst);
+		return;
+	}
+
+	if ((skb = dn_alloc_skb(sk, size, gfp)) == NULL)
+		return;
+
+	msg = skb_put(skb, size);
+	*msg++ = msgflg;
+	*(__u16 *)msg = rem;
+	msg += 2;
+	*(__u16 *)msg = loc;
+	msg += 2;
+	*(__u16 *)msg = dn_htons(reason);
+	msg += 2;
+	if (msgflg == NSP_DISCINIT)
+		*msg++ = ddl;
+
+	if (ddl) {
+		memcpy(msg, dd, ddl);
+	}
+
+	/*
+	 * This doesn't go via the dn_nsp_send() function since we need
+	 * to be able to send disc packets out which have no socket
+	 * associations.
+	 */
+	skb->dst = dst_clone(dst);
+	dst_output(skb);
+}
+
+
+void dn_nsp_send_disc(struct sock *sk, unsigned char msgflg, 
+			unsigned short reason, int gfp)
+{
+	struct dn_scp *scp = DN_SK(sk);
+	int ddl = 0;
+
+	if (msgflg == NSP_DISCINIT)
+		ddl = scp->discdata_out.opt_optl;
+
+	if (reason == 0)
+		reason = scp->discdata_out.opt_status;
+
+	dn_nsp_do_disc(sk, msgflg, reason, gfp, sk->sk_dst_cache, ddl, 
+		scp->discdata_out.opt_data, scp->addrrem, scp->addrloc);
+}
+
+
+void dn_nsp_return_disc(struct sk_buff *skb, unsigned char msgflg, 
+			unsigned short reason)
+{
+	struct dn_skb_cb *cb = DN_SKB_CB(skb);
+	int ddl = 0;
+	int gfp = GFP_ATOMIC;
+
+	dn_nsp_do_disc(NULL, msgflg, reason, gfp, skb->dst, ddl, 
+			NULL, cb->src_port, cb->dst_port);
+}
+
+
+void dn_nsp_send_link(struct sock *sk, unsigned char lsflags, char fcval)
+{
+	struct dn_scp *scp = DN_SK(sk);
+	struct sk_buff *skb;
+	unsigned char *ptr;
+	int gfp = GFP_ATOMIC;
+
+	if ((skb = dn_alloc_skb(sk, DN_MAX_NSP_DATA_HEADER + 2, gfp)) == NULL)
+		return;
+
+	skb_reserve(skb, DN_MAX_NSP_DATA_HEADER);
+	ptr = skb_put(skb, 2);
+	DN_SKB_CB(skb)->nsp_flags = 0x10;
+	*ptr++ = lsflags;
+	*ptr = fcval;
+
+	dn_nsp_queue_xmit(sk, skb, gfp, 1);
+
+	scp->persist = dn_nsp_persist(sk);
+	scp->persist_fxn = dn_nsp_xmit_timeout;
+}
+
+static int dn_nsp_retrans_conninit(struct sock *sk)
+{
+	struct dn_scp *scp = DN_SK(sk);
+
+	if (scp->state == DN_CI)
+		dn_nsp_send_conninit(sk, NSP_RCI);
+
+	return 0;
+}
+
+void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg)
+{
+	struct dn_scp *scp = DN_SK(sk);
+	struct nsp_conn_init_msg *msg;
+	unsigned char aux;
+	unsigned char menuver;
+	struct dn_skb_cb *cb;
+	unsigned char type = 1;
+	int allocation = (msgflg == NSP_CI) ? sk->sk_allocation : GFP_ATOMIC;
+	struct sk_buff *skb = dn_alloc_skb(sk, 200, allocation);
+
+	if (!skb)
+		return;
+
+	cb  = DN_SKB_CB(skb);
+	msg = (struct nsp_conn_init_msg *)skb_put(skb,sizeof(*msg));
+
+	msg->msgflg	= msgflg;
+	msg->dstaddr	= 0x0000;		/* Remote Node will assign it*/
+
+	msg->srcaddr	= scp->addrloc;
+	msg->services	= scp->services_loc;	/* Requested flow control    */
+	msg->info	= scp->info_loc;	/* Version Number            */	
+	msg->segsize	= dn_htons(scp->segsize_loc);	/* Max segment size  */	
+
+	if (scp->peer.sdn_objnum)
+		type = 0;
+
+	skb_put(skb, dn_sockaddr2username(&scp->peer, skb->tail, type));
+	skb_put(skb, dn_sockaddr2username(&scp->addr, skb->tail, 2));
+
+	menuver = DN_MENUVER_ACC | DN_MENUVER_USR;
+	if (scp->peer.sdn_flags & SDF_PROXY)
+		menuver |= DN_MENUVER_PRX;
+	if (scp->peer.sdn_flags & SDF_UICPROXY)
+		menuver |= DN_MENUVER_UIC;
+
+	*skb_put(skb, 1) = menuver;	/* Menu Version		*/
+	
+	aux = scp->accessdata.acc_userl;
+	*skb_put(skb, 1) = aux;
+	if (aux > 0)
+	memcpy(skb_put(skb, aux), scp->accessdata.acc_user, aux);
+
+	aux = scp->accessdata.acc_passl;
+	*skb_put(skb, 1) = aux;
+	if (aux > 0)
+	memcpy(skb_put(skb, aux), scp->accessdata.acc_pass, aux);
+
+	aux = scp->accessdata.acc_accl;
+	*skb_put(skb, 1) = aux;
+	if (aux > 0)
+	memcpy(skb_put(skb, aux), scp->accessdata.acc_acc, aux);
+
+	aux = scp->conndata_out.opt_optl;
+	*skb_put(skb, 1) = aux;
+	if (aux > 0)
+	memcpy(skb_put(skb,aux), scp->conndata_out.opt_data, aux);
+
+	scp->persist = dn_nsp_persist(sk);
+	scp->persist_fxn = dn_nsp_retrans_conninit;
+
+	cb->rt_flags = DN_RT_F_RQR;
+
+	dn_nsp_send(skb);	
+}
+
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
new file mode 100644
index 00000000000..1e7b5c3ea21
--- /dev/null
+++ b/net/decnet/dn_route.c
@@ -0,0 +1,1840 @@
+/*
+ * DECnet       An implementation of the DECnet protocol suite for the LINUX
+ *              operating system.  DECnet is implemented using the  BSD Socket
+ *              interface as the means of communication with the user level.
+ *
+ *              DECnet Routing Functions (Endnode and Router)
+ *
+ * Authors:     Steve Whitehouse <SteveW@ACM.org>
+ *              Eduardo Marcelo Serrat <emserrat@geocities.com>
+ *
+ * Changes:
+ *              Steve Whitehouse : Fixes to allow "intra-ethernet" and
+ *                                 "return-to-sender" bits on outgoing
+ *                                 packets.
+ *		Steve Whitehouse : Timeouts for cached routes.
+ *              Steve Whitehouse : Use dst cache for input routes too.
+ *              Steve Whitehouse : Fixed error values in dn_send_skb.
+ *              Steve Whitehouse : Rework routing functions to better fit
+ *                                 DECnet routing design
+ *              Alexey Kuznetsov : New SMP locking
+ *              Steve Whitehouse : More SMP locking changes & dn_cache_dump()
+ *              Steve Whitehouse : Prerouting NF hook, now really is prerouting.
+ *				   Fixed possible skb leak in rtnetlink funcs.
+ *              Steve Whitehouse : Dave Miller's dynamic hash table sizing and
+ *                                 Alexey Kuznetsov's finer grained locking
+ *                                 from ipv4/route.c.
+ *              Steve Whitehouse : Routing is now starting to look like a
+ *                                 sensible set of code now, mainly due to
+ *                                 my copying the IPv4 routing code. The
+ *                                 hooks here are modified and will continue
+ *                                 to evolve for a while.
+ *              Steve Whitehouse : Real SMP at last :-) Also new netfilter
+ *                                 stuff. Look out raw sockets your days
+ *                                 are numbered!
+ *              Steve Whitehouse : Added return-to-sender functions. Added
+ *                                 backlog congestion level return codes.
+ *		Steve Whitehouse : Fixed bug where routes were set up with
+ *                                 no ref count on net devices.
+ *              Steve Whitehouse : RCU for the route cache
+ *              Steve Whitehouse : Preparations for the flow cache
+ *              Steve Whitehouse : Prepare for nonlinear skbs
+ */
+
+/******************************************************************************
+    (c) 1995-1998 E.M. Serrat		emserrat@geocities.com
+    
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+*******************************************************************************/
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/inet.h>
+#include <linux/route.h>
+#include <linux/in_route.h>
+#include <net/sock.h>
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/rtnetlink.h>
+#include <linux/string.h>
+#include <linux/netfilter_decnet.h>
+#include <linux/rcupdate.h>
+#include <linux/times.h>
+#include <asm/errno.h>
+#include <net/neighbour.h>
+#include <net/dst.h>
+#include <net/flow.h>
+#include <net/dn.h>
+#include <net/dn_dev.h>
+#include <net/dn_nsp.h>
+#include <net/dn_route.h>
+#include <net/dn_neigh.h>
+#include <net/dn_fib.h>
+
+struct dn_rt_hash_bucket
+{
+	struct dn_route *chain;
+	spinlock_t lock;
+} __attribute__((__aligned__(8)));
+
+extern struct neigh_table dn_neigh_table;
+
+
+static unsigned char dn_hiord_addr[6] = {0xAA,0x00,0x04,0x00,0x00,0x00};
+
+static const int dn_rt_min_delay = 2 * HZ;
+static const int dn_rt_max_delay = 10 * HZ;
+static const int dn_rt_mtu_expires = 10 * 60 * HZ;
+
+static unsigned long dn_rt_deadline;
+
+static int dn_dst_gc(void);
+static struct dst_entry *dn_dst_check(struct dst_entry *, __u32);
+static struct dst_entry *dn_dst_negative_advice(struct dst_entry *);
+static void dn_dst_link_failure(struct sk_buff *);
+static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu);
+static int dn_route_input(struct sk_buff *);
+static void dn_run_flush(unsigned long dummy);
+
+static struct dn_rt_hash_bucket *dn_rt_hash_table;
+static unsigned dn_rt_hash_mask;
+
+static struct timer_list dn_route_timer;
+static struct timer_list dn_rt_flush_timer =
+		TIMER_INITIALIZER(dn_run_flush, 0, 0);
+int decnet_dst_gc_interval = 2;
+
+static struct dst_ops dn_dst_ops = {
+	.family =		PF_DECnet,
+	.protocol =		__constant_htons(ETH_P_DNA_RT),
+	.gc_thresh =		128,
+	.gc =			dn_dst_gc,
+	.check =		dn_dst_check,
+	.negative_advice =	dn_dst_negative_advice,
+	.link_failure =		dn_dst_link_failure,
+	.update_pmtu =		dn_dst_update_pmtu,
+	.entry_size =		sizeof(struct dn_route),
+	.entries =		ATOMIC_INIT(0),
+};
+
+static __inline__ unsigned dn_hash(unsigned short src, unsigned short dst)
+{
+	unsigned short tmp = src ^ dst;
+	tmp ^= (tmp >> 3);
+	tmp ^= (tmp >> 5);
+	tmp ^= (tmp >> 10);
+	return dn_rt_hash_mask & (unsigned)tmp;
+}
+
+static inline void dnrt_free(struct dn_route *rt)
+{
+	call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
+}
+
+static inline void dnrt_drop(struct dn_route *rt)
+{
+	if (rt)
+		dst_release(&rt->u.dst);
+	call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
+}
+
+static void dn_dst_check_expire(unsigned long dummy)
+{
+	int i;
+	struct dn_route *rt, **rtp;
+	unsigned long now = jiffies;
+	unsigned long expire = 120 * HZ;
+
+	for(i = 0; i <= dn_rt_hash_mask; i++) {
+		rtp = &dn_rt_hash_table[i].chain;
+
+		spin_lock(&dn_rt_hash_table[i].lock);
+		while((rt=*rtp) != NULL) {
+			if (atomic_read(&rt->u.dst.__refcnt) ||
+					(now - rt->u.dst.lastuse) < expire) {
+				rtp = &rt->u.rt_next;
+				continue;
+			}
+			*rtp = rt->u.rt_next;
+			rt->u.rt_next = NULL;
+			dnrt_free(rt);
+		}
+		spin_unlock(&dn_rt_hash_table[i].lock);
+
+		if ((jiffies - now) > 0)
+			break;
+	}
+
+	mod_timer(&dn_route_timer, now + decnet_dst_gc_interval * HZ);
+}
+
+static int dn_dst_gc(void)
+{
+	struct dn_route *rt, **rtp;
+	int i;
+	unsigned long now = jiffies;
+	unsigned long expire = 10 * HZ;
+
+	for(i = 0; i <= dn_rt_hash_mask; i++) {
+
+		spin_lock_bh(&dn_rt_hash_table[i].lock);
+		rtp = &dn_rt_hash_table[i].chain;
+
+		while((rt=*rtp) != NULL) {
+			if (atomic_read(&rt->u.dst.__refcnt) ||
+					(now - rt->u.dst.lastuse) < expire) {
+				rtp = &rt->u.rt_next;
+				continue;
+			}
+			*rtp = rt->u.rt_next;
+			rt->u.rt_next = NULL;
+			dnrt_drop(rt);
+			break;
+		}
+		spin_unlock_bh(&dn_rt_hash_table[i].lock);
+	}
+
+	return 0;
+}
+
+/*
+ * The decnet standards don't impose a particular minimum mtu, what they
+ * do insist on is that the routing layer accepts a datagram of at least
+ * 230 bytes long. Here we have to subtract the routing header length from
+ * 230 to get the minimum acceptable mtu. If there is no neighbour, then we
+ * assume the worst and use a long header size.
+ *
+ * We update both the mtu and the advertised mss (i.e. the segment size we
+ * advertise to the other end).
+ */
+static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu)
+{
+	u32 min_mtu = 230;
+	struct dn_dev *dn = dst->neighbour ?
+			    (struct dn_dev *)dst->neighbour->dev->dn_ptr : NULL;
+
+	if (dn && dn->use_long == 0)
+		min_mtu -= 6;
+	else
+		min_mtu -= 21;
+
+	if (dst->metrics[RTAX_MTU-1] > mtu && mtu >= min_mtu) {
+		if (!(dst_metric_locked(dst, RTAX_MTU))) {
+			dst->metrics[RTAX_MTU-1] = mtu;
+			dst_set_expires(dst, dn_rt_mtu_expires);
+		}
+		if (!(dst_metric_locked(dst, RTAX_ADVMSS))) {
+			u32 mss = mtu - DN_MAX_NSP_DATA_HEADER;
+			if (dst->metrics[RTAX_ADVMSS-1] > mss)
+				dst->metrics[RTAX_ADVMSS-1] = mss;
+		}
+	}
+}
+
+/* 
+ * When a route has been marked obsolete. (e.g. routing cache flush)
+ */
+static struct dst_entry *dn_dst_check(struct dst_entry *dst, __u32 cookie)
+{
+	return NULL;
+}
+
+static struct dst_entry *dn_dst_negative_advice(struct dst_entry *dst)
+{
+	dst_release(dst);
+	return NULL;
+}
+
+static void dn_dst_link_failure(struct sk_buff *skb)
+{
+	return;
+}
+
+static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
+{
+	return memcmp(&fl1->nl_u.dn_u, &fl2->nl_u.dn_u, sizeof(fl1->nl_u.dn_u)) == 0 &&
+		fl1->oif == fl2->oif &&
+		fl1->iif == fl2->iif;
+}
+
+static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route **rp)
+{
+	struct dn_route *rth, **rthp;
+	unsigned long now = jiffies;
+
+	rthp = &dn_rt_hash_table[hash].chain;
+
+	spin_lock_bh(&dn_rt_hash_table[hash].lock);
+	while((rth = *rthp) != NULL) {
+		if (compare_keys(&rth->fl, &rt->fl)) {
+			/* Put it first */
+			*rthp = rth->u.rt_next;
+			rcu_assign_pointer(rth->u.rt_next,
+					   dn_rt_hash_table[hash].chain);
+			rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth);
+
+			rth->u.dst.__use++;
+			dst_hold(&rth->u.dst);
+			rth->u.dst.lastuse = now;
+			spin_unlock_bh(&dn_rt_hash_table[hash].lock);
+
+			dnrt_drop(rt);
+			*rp = rth;
+			return 0;
+		}
+		rthp = &rth->u.rt_next;
+	}
+
+	rcu_assign_pointer(rt->u.rt_next, dn_rt_hash_table[hash].chain);
+	rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt);
+	
+	dst_hold(&rt->u.dst);
+	rt->u.dst.__use++;
+	rt->u.dst.lastuse = now;
+	spin_unlock_bh(&dn_rt_hash_table[hash].lock);
+	*rp = rt;
+	return 0;
+}
+
+void dn_run_flush(unsigned long dummy)
+{
+	int i;
+	struct dn_route *rt, *next;
+
+	for(i = 0; i < dn_rt_hash_mask; i++) {
+		spin_lock_bh(&dn_rt_hash_table[i].lock);
+
+		if ((rt = xchg(&dn_rt_hash_table[i].chain, NULL)) == NULL)
+			goto nothing_to_declare;
+
+		for(; rt; rt=next) {
+			next = rt->u.rt_next;
+			rt->u.rt_next = NULL;
+			dst_free((struct dst_entry *)rt);
+		}
+
+nothing_to_declare:
+		spin_unlock_bh(&dn_rt_hash_table[i].lock);
+	}
+}
+
+static DEFINE_SPINLOCK(dn_rt_flush_lock);
+
+void dn_rt_cache_flush(int delay)
+{
+	unsigned long now = jiffies;
+	int user_mode = !in_interrupt();
+
+	if (delay < 0)
+		delay = dn_rt_min_delay;
+
+	spin_lock_bh(&dn_rt_flush_lock);
+
+	if (del_timer(&dn_rt_flush_timer) && delay > 0 && dn_rt_deadline) {
+		long tmo = (long)(dn_rt_deadline - now);
+
+		if (user_mode && tmo < dn_rt_max_delay - dn_rt_min_delay)
+			tmo = 0;
+
+		if (delay > tmo)
+			delay = tmo;
+	}
+
+	if (delay <= 0) {
+		spin_unlock_bh(&dn_rt_flush_lock);
+		dn_run_flush(0);
+		return;
+	}
+
+	if (dn_rt_deadline == 0)
+		dn_rt_deadline = now + dn_rt_max_delay;
+
+	dn_rt_flush_timer.expires = now + delay;
+	add_timer(&dn_rt_flush_timer);
+	spin_unlock_bh(&dn_rt_flush_lock);
+}
+
+/**
+ * dn_return_short - Return a short packet to its sender
+ * @skb: The packet to return
+ *
+ */
+static int dn_return_short(struct sk_buff *skb)
+{
+	struct dn_skb_cb *cb;
+	unsigned char *ptr;
+	dn_address *src;
+	dn_address *dst;
+	dn_address tmp;
+
+	/* Add back headers */
+	skb_push(skb, skb->data - skb->nh.raw);
+
+	if ((skb = skb_unshare(skb, GFP_ATOMIC)) == NULL)
+		return NET_RX_DROP;
+
+	cb = DN_SKB_CB(skb);
+	/* Skip packet length and point to flags */
+	ptr = skb->data + 2;
+	*ptr++ = (cb->rt_flags & ~DN_RT_F_RQR) | DN_RT_F_RTS;
+
+	dst = (dn_address *)ptr;
+	ptr += 2;
+	src = (dn_address *)ptr;
+	ptr += 2;
+	*ptr = 0; /* Zero hop count */
+
+	/* Swap source and destination */
+	tmp  = *src;
+	*src = *dst;
+	*dst = tmp;
+
+	skb->pkt_type = PACKET_OUTGOING;
+	dn_rt_finish_output(skb, NULL, NULL);
+	return NET_RX_SUCCESS;
+}
+
+/**
+ * dn_return_long - Return a long packet to its sender
+ * @skb: The long format packet to return
+ *
+ */
+static int dn_return_long(struct sk_buff *skb)
+{
+	struct dn_skb_cb *cb;
+	unsigned char *ptr;
+	unsigned char *src_addr, *dst_addr;
+	unsigned char tmp[ETH_ALEN];
+
+	/* Add back all headers */
+	skb_push(skb, skb->data - skb->nh.raw);
+
+	if ((skb = skb_unshare(skb, GFP_ATOMIC)) == NULL)
+		return NET_RX_DROP;
+
+	cb = DN_SKB_CB(skb);
+	/* Ignore packet length and point to flags */
+	ptr = skb->data + 2;
+
+	/* Skip padding */
+	if (*ptr & DN_RT_F_PF) {
+		char padlen = (*ptr & ~DN_RT_F_PF);
+		ptr += padlen;
+	}
+
+	*ptr++ = (cb->rt_flags & ~DN_RT_F_RQR) | DN_RT_F_RTS;
+	ptr += 2;
+	dst_addr = ptr;
+	ptr += 8;
+	src_addr = ptr;
+	ptr += 6;
+	*ptr = 0; /* Zero hop count */
+
+	/* Swap source and destination */
+	memcpy(tmp, src_addr, ETH_ALEN);
+	memcpy(src_addr, dst_addr, ETH_ALEN);
+	memcpy(dst_addr, tmp, ETH_ALEN);
+
+	skb->pkt_type = PACKET_OUTGOING;
+	dn_rt_finish_output(skb, dst_addr, src_addr);
+	return NET_RX_SUCCESS;
+}
+
+/**
+ * dn_route_rx_packet - Try and find a route for an incoming packet
+ * @skb: The packet to find a route for
+ *
+ * Returns: result of input function if route is found, error code otherwise
+ */
+static int dn_route_rx_packet(struct sk_buff *skb)
+{
+	struct dn_skb_cb *cb = DN_SKB_CB(skb);
+	int err;
+
+	if ((err = dn_route_input(skb)) == 0)
+		return dst_input(skb);
+
+	if (decnet_debug_level & 4) {
+		char *devname = skb->dev ? skb->dev->name : "???";
+		struct dn_skb_cb *cb = DN_SKB_CB(skb);
+		printk(KERN_DEBUG
+			"DECnet: dn_route_rx_packet: rt_flags=0x%02x dev=%s len=%d src=0x%04hx dst=0x%04hx err=%d type=%d\n",
+			(int)cb->rt_flags, devname, skb->len, cb->src, cb->dst, 
+			err, skb->pkt_type);
+	}
+
+	if ((skb->pkt_type == PACKET_HOST) && (cb->rt_flags & DN_RT_F_RQR)) {
+		switch(cb->rt_flags & DN_RT_PKT_MSK) {
+			case DN_RT_PKT_SHORT:
+				return dn_return_short(skb);
+			case DN_RT_PKT_LONG:
+				return dn_return_long(skb);
+		}
+	}
+
+	kfree_skb(skb);
+	return NET_RX_DROP;
+}
+
+static int dn_route_rx_long(struct sk_buff *skb)
+{
+	struct dn_skb_cb *cb = DN_SKB_CB(skb);
+	unsigned char *ptr = skb->data;
+
+	if (!pskb_may_pull(skb, 21)) /* 20 for long header, 1 for shortest nsp */
+		goto drop_it;
+
+	skb_pull(skb, 20);
+	skb->h.raw = skb->data;
+
+        /* Destination info */
+        ptr += 2;
+	cb->dst = dn_htons(dn_eth2dn(ptr));
+        if (memcmp(ptr, dn_hiord_addr, 4) != 0)
+                goto drop_it;
+        ptr += 6;
+
+
+        /* Source info */
+        ptr += 2;
+	cb->src = dn_htons(dn_eth2dn(ptr));
+        if (memcmp(ptr, dn_hiord_addr, 4) != 0)
+                goto drop_it;
+        ptr += 6;
+        /* Other junk */
+        ptr++;
+        cb->hops = *ptr++; /* Visit Count */
+
+	return NF_HOOK(PF_DECnet, NF_DN_PRE_ROUTING, skb, skb->dev, NULL, dn_route_rx_packet);
+
+drop_it:
+	kfree_skb(skb);
+	return NET_RX_DROP;
+}
+
+
+
+static int dn_route_rx_short(struct sk_buff *skb)
+{
+	struct dn_skb_cb *cb = DN_SKB_CB(skb);
+	unsigned char *ptr = skb->data;
+
+	if (!pskb_may_pull(skb, 6)) /* 5 for short header + 1 for shortest nsp */
+		goto drop_it;
+
+	skb_pull(skb, 5);
+	skb->h.raw = skb->data;
+
+	cb->dst = *(dn_address *)ptr;
+        ptr += 2;
+        cb->src = *(dn_address *)ptr;
+        ptr += 2;
+        cb->hops = *ptr & 0x3f;
+
+	return NF_HOOK(PF_DECnet, NF_DN_PRE_ROUTING, skb, skb->dev, NULL, dn_route_rx_packet);
+
+drop_it:
+        kfree_skb(skb);
+        return NET_RX_DROP;
+}
+
+static int dn_route_discard(struct sk_buff *skb)
+{
+	/*
+	 * I know we drop the packet here, but thats considered success in
+	 * this case
+	 */
+	kfree_skb(skb);
+	return NET_RX_SUCCESS;
+}
+
+static int dn_route_ptp_hello(struct sk_buff *skb)
+{
+	dn_dev_hello(skb);
+	dn_neigh_pointopoint_hello(skb);
+	return NET_RX_SUCCESS;
+}
+
+int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
+{
+	struct dn_skb_cb *cb;
+	unsigned char flags = 0;
+	__u16 len = dn_ntohs(*(__u16 *)skb->data);
+	struct dn_dev *dn = (struct dn_dev *)dev->dn_ptr;
+	unsigned char padlen = 0;
+
+	if (dn == NULL)
+		goto dump_it;
+
+	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
+		goto out;
+
+	if (!pskb_may_pull(skb, 3))
+		goto dump_it;
+
+	skb_pull(skb, 2);
+
+	if (len > skb->len)
+		goto dump_it;
+
+	skb_trim(skb, len);
+
+	flags = *skb->data;
+
+	cb = DN_SKB_CB(skb);
+	cb->stamp = jiffies;
+	cb->iif = dev->ifindex;
+
+	/*
+	 * If we have padding, remove it.
+	 */
+	if (flags & DN_RT_F_PF) {
+		padlen = flags & ~DN_RT_F_PF;
+		if (!pskb_may_pull(skb, padlen + 1))
+			goto dump_it;
+		skb_pull(skb, padlen);
+		flags = *skb->data;
+	}
+
+	skb->nh.raw = skb->data;
+
+	/*
+	 * Weed out future version DECnet
+	 */
+	if (flags & DN_RT_F_VER)
+		goto dump_it;
+
+	cb->rt_flags = flags;
+
+	if (decnet_debug_level & 1)
+		printk(KERN_DEBUG 
+			"dn_route_rcv: got 0x%02x from %s [%d %d %d]\n",
+			(int)flags, (dev) ? dev->name : "???", len, skb->len, 
+			padlen);
+
+        if (flags & DN_RT_PKT_CNTL) {
+		if (unlikely(skb_is_nonlinear(skb)) &&
+		    skb_linearize(skb, GFP_ATOMIC) != 0)
+			goto dump_it;
+
+                switch(flags & DN_RT_CNTL_MSK) {
+        	        case DN_RT_PKT_INIT:
+				dn_dev_init_pkt(skb);
+				break;
+                	case DN_RT_PKT_VERI:
+				dn_dev_veri_pkt(skb);
+				break;
+		}
+
+		if (dn->parms.state != DN_DEV_S_RU)
+			goto dump_it;
+
+		switch(flags & DN_RT_CNTL_MSK) {
+                	case DN_RT_PKT_HELO:
+				return NF_HOOK(PF_DECnet, NF_DN_HELLO, skb, skb->dev, NULL, dn_route_ptp_hello);
+
+                	case DN_RT_PKT_L1RT:
+                	case DN_RT_PKT_L2RT:
+                                return NF_HOOK(PF_DECnet, NF_DN_ROUTE, skb, skb->dev, NULL, dn_route_discard);
+                	case DN_RT_PKT_ERTH:
+				return NF_HOOK(PF_DECnet, NF_DN_HELLO, skb, skb->dev, NULL, dn_neigh_router_hello);
+
+                	case DN_RT_PKT_EEDH:
+				return NF_HOOK(PF_DECnet, NF_DN_HELLO, skb, skb->dev, NULL, dn_neigh_endnode_hello);
+                }
+        } else {
+		if (dn->parms.state != DN_DEV_S_RU)
+			goto dump_it;
+
+		skb_pull(skb, 1); /* Pull flags */
+
+                switch(flags & DN_RT_PKT_MSK) {
+                	case DN_RT_PKT_LONG:
+                        	return dn_route_rx_long(skb);
+                	case DN_RT_PKT_SHORT:
+                        	return dn_route_rx_short(skb);
+		}
+        }
+
+dump_it:
+	kfree_skb(skb);
+out:
+	return NET_RX_DROP;
+}
+
+static int dn_output(struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb->dst;
+	struct dn_route *rt = (struct dn_route *)dst;
+	struct net_device *dev = dst->dev;
+	struct dn_skb_cb *cb = DN_SKB_CB(skb);
+	struct neighbour *neigh;
+
+	int err = -EINVAL;
+
+	if ((neigh = dst->neighbour) == NULL)
+		goto error;
+
+	skb->dev = dev;
+
+	cb->src = rt->rt_saddr;
+	cb->dst = rt->rt_daddr;
+
+	/*
+	 * Always set the Intra-Ethernet bit on all outgoing packets
+	 * originated on this node. Only valid flag from upper layers
+	 * is return-to-sender-requested. Set hop count to 0 too.
+	 */
+	cb->rt_flags &= ~DN_RT_F_RQR;
+	cb->rt_flags |= DN_RT_F_IE;
+	cb->hops = 0;
+
+	return NF_HOOK(PF_DECnet, NF_DN_LOCAL_OUT, skb, NULL, dev, neigh->output);
+
+error:
+	if (net_ratelimit())
+		printk(KERN_DEBUG "dn_output: This should not happen\n");
+
+	kfree_skb(skb);
+
+	return err;
+}
+
+static int dn_forward(struct sk_buff *skb)
+{
+	struct dn_skb_cb *cb = DN_SKB_CB(skb);
+	struct dst_entry *dst = skb->dst;
+	struct dn_dev *dn_db = dst->dev->dn_ptr;
+	struct dn_route *rt;
+	struct neighbour *neigh = dst->neighbour;
+	int header_len;
+#ifdef CONFIG_NETFILTER
+	struct net_device *dev = skb->dev;
+#endif
+
+	if (skb->pkt_type != PACKET_HOST)
+		goto drop;
+
+	/* Ensure that we have enough space for headers */
+	rt = (struct dn_route *)skb->dst;
+	header_len = dn_db->use_long ? 21 : 6;
+	if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+header_len))
+		goto drop;
+
+	/*
+	 * Hop count exceeded.
+	 */
+	if (++cb->hops > 30)
+		goto drop;
+
+	skb->dev = rt->u.dst.dev;
+
+	/*
+	 * If packet goes out same interface it came in on, then set
+	 * the Intra-Ethernet bit. This has no effect for short
+	 * packets, so we don't need to test for them here.
+	 */
+	cb->rt_flags &= ~DN_RT_F_IE;
+	if (rt->rt_flags & RTCF_DOREDIRECT)
+		cb->rt_flags |= DN_RT_F_IE;
+
+	return NF_HOOK(PF_DECnet, NF_DN_FORWARD, skb, dev, skb->dev, neigh->output);
+
+drop:
+	kfree_skb(skb);
+	return NET_RX_DROP;
+}
+
+/*
+ * Drop packet. This is used for endnodes and for
+ * when we should not be forwarding packets from
+ * this dest.
+ */
+static int dn_blackhole(struct sk_buff *skb)
+{
+	kfree_skb(skb);
+	return NET_RX_DROP;
+}
+
+/*
+ * Used to catch bugs. This should never normally get
+ * called.
+ */
+static int dn_rt_bug(struct sk_buff *skb)
+{
+	if (net_ratelimit()) {
+		struct dn_skb_cb *cb = DN_SKB_CB(skb);
+
+		printk(KERN_DEBUG "dn_rt_bug: skb from:%04x to:%04x\n",
+				cb->src, cb->dst);
+	}
+
+	kfree_skb(skb);
+
+	return NET_RX_BAD;
+}
+
+static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
+{
+	struct dn_fib_info *fi = res->fi;
+	struct net_device *dev = rt->u.dst.dev;
+	struct neighbour *n;
+	unsigned mss;
+
+	if (fi) {
+		if (DN_FIB_RES_GW(*res) &&
+		    DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
+			rt->rt_gateway = DN_FIB_RES_GW(*res);
+		memcpy(rt->u.dst.metrics, fi->fib_metrics,
+		       sizeof(rt->u.dst.metrics));
+	}
+	rt->rt_type = res->type;
+
+	if (dev != NULL && rt->u.dst.neighbour == NULL) {
+		n = __neigh_lookup_errno(&dn_neigh_table, &rt->rt_gateway, dev);
+		if (IS_ERR(n))
+			return PTR_ERR(n);
+		rt->u.dst.neighbour = n;
+	}
+
+	if (rt->u.dst.metrics[RTAX_MTU-1] == 0 || 
+            rt->u.dst.metrics[RTAX_MTU-1] > rt->u.dst.dev->mtu)
+		rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu;
+	mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->u.dst));
+	if (rt->u.dst.metrics[RTAX_ADVMSS-1] == 0 ||
+	    rt->u.dst.metrics[RTAX_ADVMSS-1] > mss)
+		rt->u.dst.metrics[RTAX_ADVMSS-1] = mss;
+	return 0;
+}
+
+static inline int dn_match_addr(__u16 addr1, __u16 addr2)
+{
+	__u16 tmp = dn_ntohs(addr1) ^ dn_ntohs(addr2);
+	int match = 16;
+	while(tmp) {
+		tmp >>= 1;
+		match--;
+	}
+	return match;
+}
+
+static __u16 dnet_select_source(const struct net_device *dev, __u16 daddr, int scope)
+{
+	__u16 saddr = 0;
+	struct dn_dev *dn_db = dev->dn_ptr;
+	struct dn_ifaddr *ifa;
+	int best_match = 0;
+	int ret;
+
+	read_lock(&dev_base_lock);
+	for(ifa = dn_db->ifa_list; ifa; ifa = ifa->ifa_next) {
+		if (ifa->ifa_scope > scope)
+			continue;
+		if (!daddr) {
+			saddr = ifa->ifa_local;
+			break;
+		}
+		ret = dn_match_addr(daddr, ifa->ifa_local);
+		if (ret > best_match)
+			saddr = ifa->ifa_local;
+		if (best_match == 0)
+			saddr = ifa->ifa_local;
+	}
+	read_unlock(&dev_base_lock);
+
+	return saddr;
+}
+
+static inline __u16 __dn_fib_res_prefsrc(struct dn_fib_res *res)
+{
+	return dnet_select_source(DN_FIB_RES_DEV(*res), DN_FIB_RES_GW(*res), res->scope);
+}
+
+static inline __u16 dn_fib_rules_map_destination(__u16 daddr, struct dn_fib_res *res)
+{
+	__u16 mask = dnet_make_mask(res->prefixlen);
+	return (daddr&~mask)|res->fi->fib_nh->nh_gw;
+}
+
+static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *oldflp, int try_hard)
+{
+	struct flowi fl = { .nl_u = { .dn_u = 
+				      { .daddr = oldflp->fld_dst,
+					.saddr = oldflp->fld_src,
+					.scope = RT_SCOPE_UNIVERSE,
+#ifdef CONFIG_DECNET_ROUTE_FWMARK
+					.fwmark = oldflp->fld_fwmark
+#endif
+				     } },
+			    .iif = loopback_dev.ifindex,
+			    .oif = oldflp->oif };
+	struct dn_route *rt = NULL;
+	struct net_device *dev_out = NULL;
+	struct neighbour *neigh = NULL;
+	unsigned hash;
+	unsigned flags = 0;
+	struct dn_fib_res res = { .fi = NULL, .type = RTN_UNICAST };
+	int err;
+	int free_res = 0;
+	__u16 gateway = 0;
+
+	if (decnet_debug_level & 16)
+		printk(KERN_DEBUG
+		       "dn_route_output_slow: dst=%04x src=%04x mark=%d"
+		       " iif=%d oif=%d\n", oldflp->fld_dst, oldflp->fld_src,
+                       oldflp->fld_fwmark, loopback_dev.ifindex, oldflp->oif);
+
+	/* If we have an output interface, verify its a DECnet device */
+	if (oldflp->oif) {
+		dev_out = dev_get_by_index(oldflp->oif);
+		err = -ENODEV;
+		if (dev_out && dev_out->dn_ptr == NULL) {
+			dev_put(dev_out);
+			dev_out = NULL;
+		}
+		if (dev_out == NULL)
+			goto out;
+	}
+
+	/* If we have a source address, verify that its a local address */
+	if (oldflp->fld_src) {
+		err = -EADDRNOTAVAIL;
+
+		if (dev_out) {
+			if (dn_dev_islocal(dev_out, oldflp->fld_src))
+				goto source_ok;
+			dev_put(dev_out);
+			goto out;
+		}
+		read_lock(&dev_base_lock);
+		for(dev_out = dev_base; dev_out; dev_out = dev_out->next) {
+			if (!dev_out->dn_ptr)
+				continue;
+			if (dn_dev_islocal(dev_out, oldflp->fld_src))
+				break;
+		}
+		read_unlock(&dev_base_lock);
+		if (dev_out == NULL)
+			goto out;
+		dev_hold(dev_out);
+source_ok:
+		;
+	}
+
+	/* No destination? Assume its local */
+	if (!fl.fld_dst) {
+		fl.fld_dst = fl.fld_src;
+
+		err = -EADDRNOTAVAIL;
+		if (dev_out)
+			dev_put(dev_out);
+		dev_out = &loopback_dev;
+		dev_hold(dev_out);
+		if (!fl.fld_dst) {
+			fl.fld_dst =
+			fl.fld_src = dnet_select_source(dev_out, 0,
+						       RT_SCOPE_HOST);
+			if (!fl.fld_dst)
+				goto out;
+		}
+		fl.oif = loopback_dev.ifindex;
+		res.type = RTN_LOCAL;
+		goto make_route;
+	}
+
+	if (decnet_debug_level & 16)
+		printk(KERN_DEBUG
+		       "dn_route_output_slow: initial checks complete."
+		       " dst=%o4x src=%04x oif=%d try_hard=%d\n", fl.fld_dst,
+		       fl.fld_src, fl.oif, try_hard);
+
+	/*
+	 * N.B. If the kernel is compiled without router support then
+	 * dn_fib_lookup() will evaluate to non-zero so this if () block
+	 * will always be executed.
+	 */
+	err = -ESRCH;
+	if (try_hard || (err = dn_fib_lookup(&fl, &res)) != 0) {
+		struct dn_dev *dn_db;
+		if (err != -ESRCH)
+			goto out;
+		/*
+	 	 * Here the fallback is basically the standard algorithm for 
+		 * routing in endnodes which is described in the DECnet routing
+		 * docs
+		 *
+		 * If we are not trying hard, look in neighbour cache.
+		 * The result is tested to ensure that if a specific output
+		 * device/source address was requested, then we honour that 
+		 * here
+		 */
+		if (!try_hard) {
+			neigh = neigh_lookup_nodev(&dn_neigh_table, &fl.fld_dst);
+			if (neigh) {
+				if ((oldflp->oif && 
+				    (neigh->dev->ifindex != oldflp->oif)) ||
+				    (oldflp->fld_src &&
+				    (!dn_dev_islocal(neigh->dev,
+						      oldflp->fld_src)))) {
+					neigh_release(neigh);
+					neigh = NULL;
+				} else {
+					if (dev_out)
+						dev_put(dev_out);
+					if (dn_dev_islocal(neigh->dev, fl.fld_dst)) {
+						dev_out = &loopback_dev;
+						res.type = RTN_LOCAL;
+					} else {
+						dev_out = neigh->dev;
+					}
+					dev_hold(dev_out);
+					goto select_source;
+				}
+			}
+		}
+
+		/* Not there? Perhaps its a local address */
+		if (dev_out == NULL)
+			dev_out = dn_dev_get_default();
+		err = -ENODEV;
+		if (dev_out == NULL)
+			goto out;
+		dn_db = dev_out->dn_ptr;
+		/* Possible improvement - check all devices for local addr */
+		if (dn_dev_islocal(dev_out, fl.fld_dst)) {
+			dev_put(dev_out);
+			dev_out = &loopback_dev;
+			dev_hold(dev_out);
+			res.type = RTN_LOCAL;
+			goto select_source;
+		}
+		/* Not local either.... try sending it to the default router */
+		neigh = neigh_clone(dn_db->router);
+		BUG_ON(neigh && neigh->dev != dev_out);
+
+		/* Ok then, we assume its directly connected and move on */
+select_source:
+		if (neigh)
+			gateway = ((struct dn_neigh *)neigh)->addr;
+		if (gateway == 0)
+			gateway = fl.fld_dst;
+		if (fl.fld_src == 0) {
+			fl.fld_src = dnet_select_source(dev_out, gateway,
+							 res.type == RTN_LOCAL ?
+							 RT_SCOPE_HOST : 
+							 RT_SCOPE_LINK);
+			if (fl.fld_src == 0 && res.type != RTN_LOCAL)
+				goto e_addr;
+		}
+		fl.oif = dev_out->ifindex;
+		goto make_route;
+	}
+	free_res = 1;
+
+	if (res.type == RTN_NAT)
+		goto e_inval;
+
+	if (res.type == RTN_LOCAL) {
+		if (!fl.fld_src)
+			fl.fld_src = fl.fld_dst;
+		if (dev_out)
+			dev_put(dev_out);
+		dev_out = &loopback_dev;
+		dev_hold(dev_out);
+		fl.oif = dev_out->ifindex;
+		if (res.fi)
+			dn_fib_info_put(res.fi);
+		res.fi = NULL;
+		goto make_route;
+	}
+
+	if (res.fi->fib_nhs > 1 && fl.oif == 0)
+		dn_fib_select_multipath(&fl, &res);
+
+	/* 
+	 * We could add some logic to deal with default routes here and
+	 * get rid of some of the special casing above.
+	 */
+
+	if (!fl.fld_src)
+		fl.fld_src = DN_FIB_RES_PREFSRC(res);
+	
+	if (dev_out)
+		dev_put(dev_out);
+	dev_out = DN_FIB_RES_DEV(res);
+	dev_hold(dev_out);
+	fl.oif = dev_out->ifindex;
+	gateway = DN_FIB_RES_GW(res);
+
+make_route:
+	if (dev_out->flags & IFF_LOOPBACK)
+		flags |= RTCF_LOCAL;
+
+	rt = dst_alloc(&dn_dst_ops);
+	if (rt == NULL)
+		goto e_nobufs;
+
+	atomic_set(&rt->u.dst.__refcnt, 1);
+	rt->u.dst.flags   = DST_HOST;
+
+	rt->fl.fld_src    = oldflp->fld_src;
+	rt->fl.fld_dst    = oldflp->fld_dst;
+	rt->fl.oif        = oldflp->oif;
+	rt->fl.iif        = 0;
+#ifdef CONFIG_DECNET_ROUTE_FWMARK
+	rt->fl.fld_fwmark = oldflp->fld_fwmark;
+#endif
+
+	rt->rt_saddr      = fl.fld_src;
+	rt->rt_daddr      = fl.fld_dst;
+	rt->rt_gateway    = gateway ? gateway : fl.fld_dst;
+	rt->rt_local_src  = fl.fld_src;
+
+	rt->rt_dst_map    = fl.fld_dst;
+	rt->rt_src_map    = fl.fld_src;
+
+	rt->u.dst.dev = dev_out;
+	dev_hold(dev_out);
+	rt->u.dst.neighbour = neigh;
+	neigh = NULL;
+
+	rt->u.dst.lastuse = jiffies;
+	rt->u.dst.output  = dn_output;
+	rt->u.dst.input   = dn_rt_bug;
+	rt->rt_flags      = flags;
+	if (flags & RTCF_LOCAL)
+		rt->u.dst.input = dn_nsp_rx;
+
+	err = dn_rt_set_next_hop(rt, &res);
+	if (err)
+		goto e_neighbour;
+
+	hash = dn_hash(rt->fl.fld_src, rt->fl.fld_dst);
+	dn_insert_route(rt, hash, (struct dn_route **)pprt);
+
+done:
+	if (neigh)
+		neigh_release(neigh);
+	if (free_res)
+		dn_fib_res_put(&res);
+	if (dev_out)
+		dev_put(dev_out);
+out:
+	return err;
+
+e_addr:
+        err = -EADDRNOTAVAIL;
+        goto done;
+e_inval:
+	err = -EINVAL;
+	goto done;
+e_nobufs:
+	err = -ENOBUFS;
+	goto done;
+e_neighbour:
+	dst_free(&rt->u.dst);
+	goto e_nobufs;
+}
+
+
+/*
+ * N.B. The flags may be moved into the flowi at some future stage.
+ */
+static int __dn_route_output_key(struct dst_entry **pprt, const struct flowi *flp, int flags)
+{
+	unsigned hash = dn_hash(flp->fld_src, flp->fld_dst);
+	struct dn_route *rt = NULL;
+
+	if (!(flags & MSG_TRYHARD)) {
+		rcu_read_lock_bh();
+		for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt;
+			rt = rcu_dereference(rt->u.rt_next)) {
+			if ((flp->fld_dst == rt->fl.fld_dst) &&
+			    (flp->fld_src == rt->fl.fld_src) &&
+#ifdef CONFIG_DECNET_ROUTE_FWMARK
+			    (flp->fld_fwmark == rt->fl.fld_fwmark) &&
+#endif
+			    (rt->fl.iif == 0) &&
+			    (rt->fl.oif == flp->oif)) {
+				rt->u.dst.lastuse = jiffies;
+				dst_hold(&rt->u.dst);
+				rt->u.dst.__use++;
+				rcu_read_unlock_bh();
+				*pprt = &rt->u.dst;
+				return 0;
+			}
+		}
+		rcu_read_unlock_bh();
+	}
+
+	return dn_route_output_slow(pprt, flp, flags);
+}
+
+static int dn_route_output_key(struct dst_entry **pprt, struct flowi *flp, int flags)
+{
+	int err;
+
+	err = __dn_route_output_key(pprt, flp, flags);
+	if (err == 0 && flp->proto) {
+		err = xfrm_lookup(pprt, flp, NULL, 0);
+	}
+	return err;
+}
+
+int dn_route_output_sock(struct dst_entry **pprt, struct flowi *fl, struct sock *sk, int flags)
+{
+	int err;
+
+	err = __dn_route_output_key(pprt, fl, flags & MSG_TRYHARD);
+	if (err == 0 && fl->proto) {
+		err = xfrm_lookup(pprt, fl, sk, !(flags & MSG_DONTWAIT));
+	}
+	return err;
+}
+
+static int dn_route_input_slow(struct sk_buff *skb)
+{
+	struct dn_route *rt = NULL;
+	struct dn_skb_cb *cb = DN_SKB_CB(skb);
+	struct net_device *in_dev = skb->dev;
+	struct net_device *out_dev = NULL;
+	struct dn_dev *dn_db;
+	struct neighbour *neigh = NULL;
+	unsigned hash;
+	int flags = 0;
+	__u16 gateway = 0;
+	__u16 local_src = 0;
+	struct flowi fl = { .nl_u = { .dn_u = 
+				     { .daddr = cb->dst,
+				       .saddr = cb->src,
+				       .scope = RT_SCOPE_UNIVERSE,
+#ifdef CONFIG_DECNET_ROUTE_FWMARK
+				       .fwmark = skb->nfmark
+#endif
+				    } },
+			    .iif = skb->dev->ifindex };
+	struct dn_fib_res res = { .fi = NULL, .type = RTN_UNREACHABLE };
+	int err = -EINVAL;
+	int free_res = 0;
+
+	dev_hold(in_dev);
+
+	if ((dn_db = in_dev->dn_ptr) == NULL)
+		goto out;
+
+	/* Zero source addresses are not allowed */
+	if (fl.fld_src == 0)
+		goto out;
+
+	/*
+	 * In this case we've just received a packet from a source
+	 * outside ourselves pretending to come from us. We don't
+	 * allow it any further to prevent routing loops, spoofing and
+	 * other nasties. Loopback packets already have the dst attached
+	 * so this only affects packets which have originated elsewhere.
+	 */
+	err  = -ENOTUNIQ;
+	if (dn_dev_islocal(in_dev, cb->src))
+		goto out;
+
+	err = dn_fib_lookup(&fl, &res);
+	if (err) {
+		if (err != -ESRCH)
+			goto out;
+		/*
+		 * Is the destination us ?
+		 */
+		if (!dn_dev_islocal(in_dev, cb->dst))
+			goto e_inval;
+
+		res.type = RTN_LOCAL;
+		flags |= RTCF_DIRECTSRC;
+	} else {
+		__u16 src_map = fl.fld_src;
+		free_res = 1;
+
+		out_dev = DN_FIB_RES_DEV(res);
+		if (out_dev == NULL) {
+			if (net_ratelimit())
+				printk(KERN_CRIT "Bug in dn_route_input_slow() "
+						 "No output device\n");
+			goto e_inval;
+		}
+		dev_hold(out_dev);
+
+		if (res.r)
+			src_map = dn_fib_rules_policy(fl.fld_src, &res, &flags);
+
+		gateway = DN_FIB_RES_GW(res);
+		if (res.type == RTN_NAT) {
+			fl.fld_dst = dn_fib_rules_map_destination(fl.fld_dst, &res);
+			dn_fib_res_put(&res);
+			free_res = 0;
+			if (dn_fib_lookup(&fl, &res))
+				goto e_inval;
+			free_res = 1;
+			if (res.type != RTN_UNICAST)
+				goto e_inval;
+			flags |= RTCF_DNAT;
+			gateway = fl.fld_dst;
+		}
+		fl.fld_src = src_map;
+	}
+
+	switch(res.type) {
+	case RTN_UNICAST:
+		/*
+		 * Forwarding check here, we only check for forwarding
+		 * being turned off, if you want to only forward intra
+		 * area, its up to you to set the routing tables up
+		 * correctly.
+		 */
+		if (dn_db->parms.forwarding == 0)
+			goto e_inval;
+
+		if (res.fi->fib_nhs > 1 && fl.oif == 0)
+			dn_fib_select_multipath(&fl, &res);
+
+		/* 
+		 * Check for out_dev == in_dev. We use the RTCF_DOREDIRECT
+		 * flag as a hint to set the intra-ethernet bit when
+		 * forwarding. If we've got NAT in operation, we don't do
+		 * this optimisation.
+		 */
+		if (out_dev == in_dev && !(flags & RTCF_NAT))
+			flags |= RTCF_DOREDIRECT;
+
+		local_src = DN_FIB_RES_PREFSRC(res);
+
+	case RTN_BLACKHOLE:
+	case RTN_UNREACHABLE:
+		break;
+	case RTN_LOCAL:
+		flags |= RTCF_LOCAL;
+		fl.fld_src = cb->dst;
+		fl.fld_dst = cb->src;
+
+		/* Routing tables gave us a gateway */
+		if (gateway)
+			goto make_route;
+
+		/* Packet was intra-ethernet, so we know its on-link */
+		if (cb->rt_flags | DN_RT_F_IE) {
+			gateway = cb->src;
+			flags |= RTCF_DIRECTSRC;
+			goto make_route;
+		}
+
+		/* Use the default router if there is one */
+		neigh = neigh_clone(dn_db->router);
+		if (neigh) {
+			gateway = ((struct dn_neigh *)neigh)->addr;
+			goto make_route;
+		}
+
+		/* Close eyes and pray */
+		gateway = cb->src;
+		flags |= RTCF_DIRECTSRC;
+		goto make_route;
+	default:
+		goto e_inval;
+	}
+
+make_route:
+	rt = dst_alloc(&dn_dst_ops);
+	if (rt == NULL)
+		goto e_nobufs;
+
+	rt->rt_saddr      = fl.fld_src;
+	rt->rt_daddr      = fl.fld_dst;
+	rt->rt_gateway    = fl.fld_dst;
+	if (gateway)
+		rt->rt_gateway = gateway;
+	rt->rt_local_src  = local_src ? local_src : rt->rt_saddr;
+
+	rt->rt_dst_map    = fl.fld_dst;
+	rt->rt_src_map    = fl.fld_src;
+
+	rt->fl.fld_src    = cb->src;
+	rt->fl.fld_dst    = cb->dst;
+	rt->fl.oif        = 0;
+	rt->fl.iif        = in_dev->ifindex;
+	rt->fl.fld_fwmark = fl.fld_fwmark;
+
+	rt->u.dst.flags = DST_HOST;
+	rt->u.dst.neighbour = neigh;
+	rt->u.dst.dev = out_dev;
+	rt->u.dst.lastuse = jiffies;
+	rt->u.dst.output = dn_rt_bug;
+	switch(res.type) {
+		case RTN_UNICAST:
+			rt->u.dst.input = dn_forward;
+			break;
+		case RTN_LOCAL:
+			rt->u.dst.output = dn_output;
+			rt->u.dst.input = dn_nsp_rx;
+			rt->u.dst.dev = in_dev;
+			flags |= RTCF_LOCAL;
+			break;
+		default:
+		case RTN_UNREACHABLE:
+		case RTN_BLACKHOLE:
+			rt->u.dst.input = dn_blackhole;
+	}
+	rt->rt_flags = flags;
+	if (rt->u.dst.dev)
+		dev_hold(rt->u.dst.dev);
+
+	err = dn_rt_set_next_hop(rt, &res);
+	if (err)
+		goto e_neighbour;
+
+	hash = dn_hash(rt->fl.fld_src, rt->fl.fld_dst);
+	dn_insert_route(rt, hash, (struct dn_route **)&skb->dst);
+
+done:
+	if (neigh)
+		neigh_release(neigh);
+	if (free_res)
+		dn_fib_res_put(&res);
+	dev_put(in_dev);
+	if (out_dev)
+		dev_put(out_dev);
+out:
+	return err;
+
+e_inval:
+	err = -EINVAL;
+	goto done;
+
+e_nobufs:
+	err = -ENOBUFS;
+	goto done;
+
+e_neighbour:
+	dst_free(&rt->u.dst);
+	goto done;
+}
+
+int dn_route_input(struct sk_buff *skb)
+{
+	struct dn_route *rt;
+	struct dn_skb_cb *cb = DN_SKB_CB(skb);
+	unsigned hash = dn_hash(cb->src, cb->dst);
+
+	if (skb->dst)
+		return 0;
+
+	rcu_read_lock();
+	for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt != NULL;
+	    rt = rcu_dereference(rt->u.rt_next)) {
+		if ((rt->fl.fld_src == cb->src) &&
+	 	    (rt->fl.fld_dst == cb->dst) &&
+		    (rt->fl.oif == 0) &&
+#ifdef CONFIG_DECNET_ROUTE_FWMARK
+		    (rt->fl.fld_fwmark == skb->nfmark) &&
+#endif
+		    (rt->fl.iif == cb->iif)) {
+			rt->u.dst.lastuse = jiffies;
+			dst_hold(&rt->u.dst);
+			rt->u.dst.__use++;
+			rcu_read_unlock();
+			skb->dst = (struct dst_entry *)rt;
+			return 0;
+		}
+	}
+	rcu_read_unlock();
+
+	return dn_route_input_slow(skb);
+}
+
+static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, int nowait)
+{
+	struct dn_route *rt = (struct dn_route *)skb->dst;
+	struct rtmsg *r;
+	struct nlmsghdr *nlh;
+	unsigned char *b = skb->tail;
+	struct rta_cacheinfo ci;
+
+	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*r));
+	r = NLMSG_DATA(nlh);
+	nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0;
+	r->rtm_family = AF_DECnet;
+	r->rtm_dst_len = 16;
+	r->rtm_src_len = 0;
+	r->rtm_tos = 0;
+	r->rtm_table = RT_TABLE_MAIN;
+	r->rtm_type = rt->rt_type;
+	r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
+	r->rtm_scope = RT_SCOPE_UNIVERSE;
+	r->rtm_protocol = RTPROT_UNSPEC;
+	if (rt->rt_flags & RTCF_NOTIFY)
+		r->rtm_flags |= RTM_F_NOTIFY;
+	RTA_PUT(skb, RTA_DST, 2, &rt->rt_daddr);
+	if (rt->fl.fld_src) {
+		r->rtm_src_len = 16;
+		RTA_PUT(skb, RTA_SRC, 2, &rt->fl.fld_src);
+	}
+	if (rt->u.dst.dev)
+		RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex);
+	/*
+	 * Note to self - change this if input routes reverse direction when
+	 * they deal only with inputs and not with replies like they do
+	 * currently.
+	 */
+	RTA_PUT(skb, RTA_PREFSRC, 2, &rt->rt_local_src);
+	if (rt->rt_daddr != rt->rt_gateway)
+		RTA_PUT(skb, RTA_GATEWAY, 2, &rt->rt_gateway);
+	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
+		goto rtattr_failure;
+	ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
+	ci.rta_used     = rt->u.dst.__use;
+	ci.rta_clntref  = atomic_read(&rt->u.dst.__refcnt);
+	if (rt->u.dst.expires)
+		ci.rta_expires = jiffies_to_clock_t(rt->u.dst.expires - jiffies);
+	else
+		ci.rta_expires = 0;
+	ci.rta_error    = rt->u.dst.error;
+	ci.rta_id       = ci.rta_ts = ci.rta_tsage = 0;
+	RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
+	if (rt->fl.iif)
+		RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif);
+
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+        skb_trim(skb, b - skb->data);
+        return -1;
+}
+
+/*
+ * This is called by both endnodes and routers now.
+ */
+int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct rtattr **rta = arg;
+	struct rtmsg *rtm = NLMSG_DATA(nlh);
+	struct dn_route *rt = NULL;
+	struct dn_skb_cb *cb;
+	int err;
+	struct sk_buff *skb;
+	struct flowi fl;
+
+	memset(&fl, 0, sizeof(fl));
+	fl.proto = DNPROTO_NSP;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (skb == NULL)
+		return -ENOBUFS;
+	skb->mac.raw = skb->data;
+	cb = DN_SKB_CB(skb);
+
+	if (rta[RTA_SRC-1])
+		memcpy(&fl.fld_src, RTA_DATA(rta[RTA_SRC-1]), 2);
+	if (rta[RTA_DST-1])
+		memcpy(&fl.fld_dst, RTA_DATA(rta[RTA_DST-1]), 2);
+	if (rta[RTA_IIF-1])
+		memcpy(&fl.iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
+
+	if (fl.iif) {
+		struct net_device *dev;
+		if ((dev = dev_get_by_index(fl.iif)) == NULL) {
+			kfree_skb(skb);
+			return -ENODEV;
+		}
+		if (!dev->dn_ptr) {
+			dev_put(dev);
+			kfree_skb(skb);
+			return -ENODEV;
+		}
+		skb->protocol = __constant_htons(ETH_P_DNA_RT);
+		skb->dev = dev;
+		cb->src = fl.fld_src;
+		cb->dst = fl.fld_dst;
+		local_bh_disable();
+		err = dn_route_input(skb);
+		local_bh_enable();
+		memset(cb, 0, sizeof(struct dn_skb_cb));
+		rt = (struct dn_route *)skb->dst;
+		if (!err && -rt->u.dst.error)
+			err = rt->u.dst.error;
+	} else {
+		int oif = 0;
+		if (rta[RTA_OIF - 1])
+			memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int));
+		fl.oif = oif;
+		err = dn_route_output_key((struct dst_entry **)&rt, &fl, 0);
+	}
+
+	if (skb->dev)
+		dev_put(skb->dev);
+	skb->dev = NULL;
+	if (err)
+		goto out_free;
+	skb->dst = &rt->u.dst;
+	if (rtm->rtm_flags & RTM_F_NOTIFY)
+		rt->rt_flags |= RTCF_NOTIFY;
+
+	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
+
+	err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0);
+
+	if (err == 0)
+		goto out_free;
+	if (err < 0) {
+		err = -EMSGSIZE;
+		goto out_free;
+	}
+
+	err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
+
+	return err;
+
+out_free:
+	kfree_skb(skb);
+	return err;
+}
+
+/*
+ * For routers, this is called from dn_fib_dump, but for endnodes its
+ * called directly from the rtnetlink dispatch table.
+ */
+int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct dn_route *rt;
+	int h, s_h;
+	int idx, s_idx;
+
+	if (NLMSG_PAYLOAD(cb->nlh, 0) < sizeof(struct rtmsg))
+		return -EINVAL;
+	if (!(((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED))
+		return 0;
+
+	s_h = cb->args[0];
+	s_idx = idx = cb->args[1];
+	for(h = 0; h <= dn_rt_hash_mask; h++) {
+		if (h < s_h)
+			continue;
+		if (h > s_h)
+			s_idx = 0;
+		rcu_read_lock_bh();
+		for(rt = rcu_dereference(dn_rt_hash_table[h].chain), idx = 0;
+			rt;
+			rt = rcu_dereference(rt->u.rt_next), idx++) {
+			if (idx < s_idx)
+				continue;
+			skb->dst = dst_clone(&rt->u.dst);
+			if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
+					cb->nlh->nlmsg_seq, RTM_NEWROUTE, 1) <= 0) {
+				dst_release(xchg(&skb->dst, NULL));
+				rcu_read_unlock_bh();
+				goto done;
+			}
+			dst_release(xchg(&skb->dst, NULL));
+		}
+		rcu_read_unlock_bh();
+	}
+
+done:
+	cb->args[0] = h;
+	cb->args[1] = idx;
+	return skb->len;
+}
+
+#ifdef CONFIG_PROC_FS
+struct dn_rt_cache_iter_state {
+	int bucket;
+};
+
+static struct dn_route *dn_rt_cache_get_first(struct seq_file *seq)
+{
+	struct dn_route *rt = NULL;
+	struct dn_rt_cache_iter_state *s = seq->private;
+
+	for(s->bucket = dn_rt_hash_mask; s->bucket >= 0; --s->bucket) {
+		rcu_read_lock_bh();
+		rt = dn_rt_hash_table[s->bucket].chain;
+		if (rt)
+			break;
+		rcu_read_unlock_bh();
+	}
+	return rt;
+}
+
+static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_route *rt)
+{
+	struct dn_rt_cache_iter_state *s = rcu_dereference(seq->private);
+
+	rt = rt->u.rt_next;
+	while(!rt) {
+		rcu_read_unlock_bh();
+		if (--s->bucket < 0)
+			break;
+		rcu_read_lock_bh();
+		rt = dn_rt_hash_table[s->bucket].chain;
+	}
+	return rt;
+}
+
+static void *dn_rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct dn_route *rt = dn_rt_cache_get_first(seq);
+
+	if (rt) {
+		while(*pos && (rt = dn_rt_cache_get_next(seq, rt)))
+			--*pos;
+	}
+	return *pos ? NULL : rt;
+}
+
+static void *dn_rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct dn_route *rt = dn_rt_cache_get_next(seq, v);
+	++*pos;
+	return rt;
+}
+
+static void dn_rt_cache_seq_stop(struct seq_file *seq, void *v)
+{
+	if (v)
+		rcu_read_unlock_bh();
+}
+
+static int dn_rt_cache_seq_show(struct seq_file *seq, void *v)
+{
+	struct dn_route *rt = v;
+	char buf1[DN_ASCBUF_LEN], buf2[DN_ASCBUF_LEN];
+
+	seq_printf(seq, "%-8s %-7s %-7s %04d %04d %04d\n",
+			rt->u.dst.dev ? rt->u.dst.dev->name : "*",
+			dn_addr2asc(dn_ntohs(rt->rt_daddr), buf1),
+			dn_addr2asc(dn_ntohs(rt->rt_saddr), buf2),
+			atomic_read(&rt->u.dst.__refcnt),
+			rt->u.dst.__use,
+			(int) dst_metric(&rt->u.dst, RTAX_RTT));
+	return 0;
+} 
+
+static struct seq_operations dn_rt_cache_seq_ops = {
+	.start	= dn_rt_cache_seq_start,
+	.next	= dn_rt_cache_seq_next,
+	.stop	= dn_rt_cache_seq_stop,
+	.show	= dn_rt_cache_seq_show,
+};
+
+static int dn_rt_cache_seq_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	int rc = -ENOMEM;
+	struct dn_rt_cache_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+
+	if (!s)
+		goto out;
+	rc = seq_open(file, &dn_rt_cache_seq_ops);
+	if (rc)
+		goto out_kfree;
+	seq		= file->private_data;
+	seq->private	= s;
+	memset(s, 0, sizeof(*s));
+out:
+	return rc;
+out_kfree:
+	kfree(s);
+	goto out;
+}
+
+static struct file_operations dn_rt_cache_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open	 = dn_rt_cache_seq_open,
+	.read	 = seq_read,
+	.llseek	 = seq_lseek,
+	.release = seq_release_private,
+};
+
+#endif /* CONFIG_PROC_FS */
+
+void __init dn_route_init(void)
+{
+	int i, goal, order;
+
+	dn_dst_ops.kmem_cachep = kmem_cache_create("dn_dst_cache",
+						   sizeof(struct dn_route),
+						   0, SLAB_HWCACHE_ALIGN,
+						   NULL, NULL);
+
+	if (!dn_dst_ops.kmem_cachep)
+		panic("DECnet: Failed to allocate dn_dst_cache\n");
+
+	init_timer(&dn_route_timer);
+	dn_route_timer.function = dn_dst_check_expire;
+	dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ;
+	add_timer(&dn_route_timer);
+
+	goal = num_physpages >> (26 - PAGE_SHIFT);
+
+	for(order = 0; (1UL << order) < goal; order++)
+		/* NOTHING */;
+
+        /*
+         * Only want 1024 entries max, since the table is very, very unlikely
+         * to be larger than that.
+         */
+        while(order && ((((1UL << order) * PAGE_SIZE) / 
+                                sizeof(struct dn_rt_hash_bucket)) >= 2048))
+                order--;
+
+        do {
+                dn_rt_hash_mask = (1UL << order) * PAGE_SIZE /
+                        sizeof(struct dn_rt_hash_bucket);
+                while(dn_rt_hash_mask & (dn_rt_hash_mask - 1))
+                        dn_rt_hash_mask--;
+                dn_rt_hash_table = (struct dn_rt_hash_bucket *)
+                        __get_free_pages(GFP_ATOMIC, order);
+        } while (dn_rt_hash_table == NULL && --order > 0);
+
+	if (!dn_rt_hash_table)
+                panic("Failed to allocate DECnet route cache hash table\n");
+
+	printk(KERN_INFO 
+		"DECnet: Routing cache hash table of %u buckets, %ldKbytes\n", 
+		dn_rt_hash_mask, 
+		(long)(dn_rt_hash_mask*sizeof(struct dn_rt_hash_bucket))/1024);
+
+	dn_rt_hash_mask--;
+        for(i = 0; i <= dn_rt_hash_mask; i++) {
+                spin_lock_init(&dn_rt_hash_table[i].lock);
+                dn_rt_hash_table[i].chain = NULL;
+        }
+
+        dn_dst_ops.gc_thresh = (dn_rt_hash_mask + 1);
+
+	proc_net_fops_create("decnet_cache", S_IRUGO, &dn_rt_cache_seq_fops);
+}
+
+void __exit dn_route_cleanup(void)
+{
+	del_timer(&dn_route_timer);
+	dn_run_flush(0);
+
+	proc_net_remove("decnet_cache");
+}
+
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
new file mode 100644
index 00000000000..597587d170d
--- /dev/null
+++ b/net/decnet/dn_rules.c
@@ -0,0 +1,416 @@
+
+/*
+ * DECnet       An implementation of the DECnet protocol suite for the LINUX
+ *              operating system.  DECnet is implemented using the  BSD Socket
+ *              interface as the means of communication with the user level.
+ *
+ *              DECnet Routing Forwarding Information Base (Rules)
+ *
+ * Author:      Steve Whitehouse <SteveW@ACM.org>
+ *              Mostly copied from Alexey Kuznetsov's ipv4/fib_rules.c
+ *
+ *
+ * Changes:
+ *
+ */
+#include <linux/config.h>
+#include <linux/string.h>
+#include <linux/net.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/proc_fs.h>
+#include <linux/netdevice.h>
+#include <linux/timer.h>
+#include <linux/spinlock.h>
+#include <linux/in_route.h>
+#include <asm/atomic.h>
+#include <asm/uaccess.h>
+#include <net/neighbour.h>
+#include <net/dst.h>
+#include <net/flow.h>
+#include <net/dn.h>
+#include <net/dn_fib.h>
+#include <net/dn_neigh.h>
+#include <net/dn_dev.h>
+
+struct dn_fib_rule
+{
+	struct dn_fib_rule	*r_next;
+	atomic_t		r_clntref;
+	u32			r_preference;
+	unsigned char		r_table;
+	unsigned char		r_action;
+	unsigned char		r_dst_len;
+	unsigned char		r_src_len;
+	dn_address		r_src;
+	dn_address		r_srcmask;
+	dn_address		r_dst;
+	dn_address		r_dstmask;
+	dn_address		r_srcmap;
+	u8			r_flags;
+#ifdef CONFIG_DECNET_ROUTE_FWMARK
+	u32			r_fwmark;
+#endif
+	int			r_ifindex;
+	char			r_ifname[IFNAMSIZ];
+	int			r_dead;
+};
+
+static struct dn_fib_rule default_rule = {
+	.r_clntref =		ATOMIC_INIT(2),
+	.r_preference =		0x7fff,
+	.r_table =		RT_TABLE_MAIN,
+	.r_action =		RTN_UNICAST
+};
+
+static struct dn_fib_rule *dn_fib_rules = &default_rule;
+static DEFINE_RWLOCK(dn_fib_rules_lock);
+
+
+int dn_fib_rtm_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct rtattr **rta = arg;
+	struct rtmsg *rtm = NLMSG_DATA(nlh);
+	struct dn_fib_rule *r, **rp;
+	int err = -ESRCH;
+
+	for(rp=&dn_fib_rules; (r=*rp) != NULL; rp = &r->r_next) {
+		if ((!rta[RTA_SRC-1] || memcmp(RTA_DATA(rta[RTA_SRC-1]), &r->r_src, 2) == 0) &&
+			rtm->rtm_src_len == r->r_src_len &&
+			rtm->rtm_dst_len == r->r_dst_len &&
+			(!rta[RTA_DST-1] || memcmp(RTA_DATA(rta[RTA_DST-1]), &r->r_dst, 2) == 0) &&
+#ifdef CONFIG_DECNET_ROUTE_FWMARK
+			(!rta[RTA_PROTOINFO-1] || memcmp(RTA_DATA(rta[RTA_PROTOINFO-1]), &r->r_fwmark, 4) == 0) &&
+#endif
+			(!rtm->rtm_type || rtm->rtm_type == r->r_action) &&
+			(!rta[RTA_PRIORITY-1] || memcmp(RTA_DATA(rta[RTA_PRIORITY-1]), &r->r_preference, 4) == 0) &&
+			(!rta[RTA_IIF-1] || rtattr_strcmp(rta[RTA_IIF-1], r->r_ifname) == 0) &&
+			(!rtm->rtm_table || (r && rtm->rtm_table == r->r_table))) {
+
+			err = -EPERM;
+			if (r == &default_rule)
+				break;
+
+			write_lock_bh(&dn_fib_rules_lock);
+			*rp = r->r_next;
+			r->r_dead = 1;
+			write_unlock_bh(&dn_fib_rules_lock);
+			dn_fib_rule_put(r);
+			err = 0;
+			break;
+		}
+	}
+
+	return err;
+}
+
+void dn_fib_rule_put(struct dn_fib_rule *r)
+{
+	if (atomic_dec_and_test(&r->r_clntref)) {
+		if (r->r_dead)
+			kfree(r);
+		else
+			printk(KERN_DEBUG "Attempt to free alive dn_fib_rule\n");
+	}
+}
+
+
+int dn_fib_rtm_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct rtattr **rta = arg;
+	struct rtmsg *rtm = NLMSG_DATA(nlh);
+	struct dn_fib_rule *r, *new_r, **rp;
+	unsigned char table_id;
+
+	if (rtm->rtm_src_len > 16 || rtm->rtm_dst_len > 16)
+		return -EINVAL;
+
+	if (rta[RTA_IIF-1] && RTA_PAYLOAD(rta[RTA_IIF-1]) > IFNAMSIZ)
+		return -EINVAL;
+
+	if (rtm->rtm_type == RTN_NAT)
+		return -EINVAL;
+
+	table_id = rtm->rtm_table;
+	if (table_id == RT_TABLE_UNSPEC) {
+		struct dn_fib_table *tb;
+		if (rtm->rtm_type == RTN_UNICAST) {
+			if ((tb = dn_fib_empty_table()) == NULL)
+				return -ENOBUFS;
+			table_id = tb->n;
+		}
+	}
+
+	new_r = kmalloc(sizeof(*new_r), GFP_KERNEL);
+	if (!new_r)
+		return -ENOMEM;
+	memset(new_r, 0, sizeof(*new_r));
+	if (rta[RTA_SRC-1])
+		memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 2);
+	if (rta[RTA_DST-1])
+		memcpy(&new_r->r_dst, RTA_DATA(rta[RTA_DST-1]), 2);
+	if (rta[RTA_GATEWAY-1])
+		memcpy(&new_r->r_srcmap, RTA_DATA(rta[RTA_GATEWAY-1]), 2);
+	new_r->r_src_len = rtm->rtm_src_len;
+	new_r->r_dst_len = rtm->rtm_dst_len;
+	new_r->r_srcmask = dnet_make_mask(rtm->rtm_src_len);
+	new_r->r_dstmask = dnet_make_mask(rtm->rtm_dst_len);
+#ifdef CONFIG_DECNET_ROUTE_FWMARK
+	if (rta[RTA_PROTOINFO-1])
+		memcpy(&new_r->r_fwmark, RTA_DATA(rta[RTA_PROTOINFO-1]), 4);
+#endif
+	new_r->r_action = rtm->rtm_type;
+	new_r->r_flags = rtm->rtm_flags;
+	if (rta[RTA_PRIORITY-1])
+		memcpy(&new_r->r_preference, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
+	new_r->r_table = table_id;
+	if (rta[RTA_IIF-1]) {
+		struct net_device *dev;
+		rtattr_strlcpy(new_r->r_ifname, rta[RTA_IIF-1], IFNAMSIZ);
+		new_r->r_ifindex = -1;
+		dev = dev_get_by_name(new_r->r_ifname);
+		if (dev) {
+			new_r->r_ifindex = dev->ifindex;
+			dev_put(dev);
+		}
+	}
+
+	rp = &dn_fib_rules;
+	if (!new_r->r_preference) {
+		r = dn_fib_rules;
+		if (r && (r = r->r_next) != NULL) {
+			rp = &dn_fib_rules->r_next;
+			if (r->r_preference)
+				new_r->r_preference = r->r_preference - 1;
+		}
+	}
+
+	while((r=*rp) != NULL) {
+		if (r->r_preference > new_r->r_preference)
+			break;
+		rp = &r->r_next;
+	}
+
+	new_r->r_next = r;
+	atomic_inc(&new_r->r_clntref);
+	write_lock_bh(&dn_fib_rules_lock);
+	*rp = new_r;
+	write_unlock_bh(&dn_fib_rules_lock);
+	return 0;
+}
+
+
+int dn_fib_lookup(const struct flowi *flp, struct dn_fib_res *res)
+{
+	struct dn_fib_rule *r, *policy;
+	struct dn_fib_table *tb;
+	dn_address saddr = flp->fld_src;
+	dn_address daddr = flp->fld_dst;
+	int err;
+
+	read_lock(&dn_fib_rules_lock);
+	for(r = dn_fib_rules; r; r = r->r_next) {
+		if (((saddr^r->r_src) & r->r_srcmask) ||
+		    ((daddr^r->r_dst) & r->r_dstmask) ||
+#ifdef CONFIG_DECNET_ROUTE_FWMARK
+		    (r->r_fwmark && r->r_fwmark != flp->fld_fwmark) ||
+#endif
+		    (r->r_ifindex && r->r_ifindex != flp->iif))
+			continue;
+
+		switch(r->r_action) {
+			case RTN_UNICAST:
+			case RTN_NAT:
+				policy = r;
+				break;
+			case RTN_UNREACHABLE:
+				read_unlock(&dn_fib_rules_lock);
+				return -ENETUNREACH;
+			default:
+			case RTN_BLACKHOLE:
+				read_unlock(&dn_fib_rules_lock);
+				return -EINVAL;
+			case RTN_PROHIBIT:
+				read_unlock(&dn_fib_rules_lock);
+				return -EACCES;
+		}
+
+		if ((tb = dn_fib_get_table(r->r_table, 0)) == NULL)
+			continue;
+		err = tb->lookup(tb, flp, res);
+		if (err == 0) {
+			res->r = policy;
+			if (policy)
+				atomic_inc(&policy->r_clntref);
+			read_unlock(&dn_fib_rules_lock);
+			return 0;
+		}
+		if (err < 0 && err != -EAGAIN) {
+			read_unlock(&dn_fib_rules_lock);
+			return err;
+		}
+	}
+
+	read_unlock(&dn_fib_rules_lock);
+	return -ESRCH;
+}
+
+unsigned dnet_addr_type(__u16 addr)
+{
+	struct flowi fl = { .nl_u = { .dn_u = { .daddr = addr } } };
+	struct dn_fib_res res;
+	unsigned ret = RTN_UNICAST;
+	struct dn_fib_table *tb = dn_fib_tables[RT_TABLE_LOCAL];
+
+	res.r = NULL;
+
+	if (tb) {
+		if (!tb->lookup(tb, &fl, &res)) {
+			ret = res.type;
+			dn_fib_res_put(&res);
+		}
+	}
+	return ret;
+}
+
+__u16 dn_fib_rules_policy(__u16 saddr, struct dn_fib_res *res, unsigned *flags)
+{
+	struct dn_fib_rule *r = res->r;
+
+	if (r->r_action == RTN_NAT) {
+		int addrtype = dnet_addr_type(r->r_srcmap);
+
+		if (addrtype == RTN_NAT) {
+			saddr = (saddr&~r->r_srcmask)|r->r_srcmap;
+			*flags |= RTCF_SNAT;
+		} else if (addrtype == RTN_LOCAL || r->r_srcmap == 0) {
+			saddr = r->r_srcmap;
+			*flags |= RTCF_MASQ;
+		}
+	}
+	return saddr;
+}
+
+static void dn_fib_rules_detach(struct net_device *dev)
+{
+	struct dn_fib_rule *r;
+
+	for(r = dn_fib_rules; r; r = r->r_next) {
+		if (r->r_ifindex == dev->ifindex) {
+			write_lock_bh(&dn_fib_rules_lock);
+			r->r_ifindex = -1;
+			write_unlock_bh(&dn_fib_rules_lock);
+		}
+	}
+}
+
+static void dn_fib_rules_attach(struct net_device *dev)
+{
+	struct dn_fib_rule *r;
+
+	for(r = dn_fib_rules; r; r = r->r_next) {
+		if (r->r_ifindex == -1 && strcmp(dev->name, r->r_ifname) == 0) {
+			write_lock_bh(&dn_fib_rules_lock);
+			r->r_ifindex = dev->ifindex;
+			write_unlock_bh(&dn_fib_rules_lock);
+		}
+	}
+}
+
+static int dn_fib_rules_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	struct net_device *dev = ptr;
+
+	switch(event) {
+		case NETDEV_UNREGISTER:
+			dn_fib_rules_detach(dev);
+			dn_fib_sync_down(0, dev, 1);
+		case NETDEV_REGISTER:
+			dn_fib_rules_attach(dev);
+			dn_fib_sync_up(dev);
+	}
+
+	return NOTIFY_DONE;
+}
+
+
+static struct notifier_block dn_fib_rules_notifier = {
+	.notifier_call =	dn_fib_rules_event,
+};
+
+static int dn_fib_fill_rule(struct sk_buff *skb, struct dn_fib_rule *r, struct netlink_callback *cb)
+{
+	struct rtmsg *rtm;
+	struct nlmsghdr *nlh;
+	unsigned char *b = skb->tail;
+
+
+	nlh = NLMSG_PUT(skb, NETLINK_CREDS(cb->skb)->pid, cb->nlh->nlmsg_seq, RTM_NEWRULE, sizeof(*rtm));
+	rtm = NLMSG_DATA(nlh);
+	rtm->rtm_family = AF_DECnet;
+	rtm->rtm_dst_len = r->r_dst_len;
+	rtm->rtm_src_len = r->r_src_len;
+	rtm->rtm_tos = 0;
+#ifdef CONFIG_DECNET_ROUTE_FWMARK
+	if (r->r_fwmark)
+		RTA_PUT(skb, RTA_PROTOINFO, 4, &r->r_fwmark);
+#endif
+	rtm->rtm_table = r->r_table;
+	rtm->rtm_protocol = 0;
+	rtm->rtm_scope = 0;
+	rtm->rtm_type = r->r_action;
+	rtm->rtm_flags = r->r_flags;
+
+	if (r->r_dst_len)
+		RTA_PUT(skb, RTA_DST, 2, &r->r_dst);
+	if (r->r_src_len)
+		RTA_PUT(skb, RTA_SRC, 2, &r->r_src);
+	if (r->r_ifname[0])
+		RTA_PUT(skb, RTA_IIF, IFNAMSIZ, &r->r_ifname);
+	if (r->r_preference)
+		RTA_PUT(skb, RTA_PRIORITY, 4, &r->r_preference);
+	if (r->r_srcmap)
+		RTA_PUT(skb, RTA_GATEWAY, 2, &r->r_srcmap);
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+int dn_fib_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int idx;
+	int s_idx = cb->args[0];
+	struct dn_fib_rule *r;
+
+	read_lock(&dn_fib_rules_lock);
+	for(r = dn_fib_rules, idx = 0; r; r = r->r_next, idx++) {
+		if (idx < s_idx)
+			continue;
+		if (dn_fib_fill_rule(skb, r, cb) < 0)
+			break;
+	}
+	read_unlock(&dn_fib_rules_lock);
+	cb->args[0] = idx;
+
+	return skb->len;
+}
+
+void __init dn_fib_rules_init(void)
+{
+	register_netdevice_notifier(&dn_fib_rules_notifier);
+}
+
+void __exit dn_fib_rules_cleanup(void)
+{
+	unregister_netdevice_notifier(&dn_fib_rules_notifier);
+}
+
+
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
new file mode 100644
index 00000000000..dad5603912b
--- /dev/null
+++ b/net/decnet/dn_table.c
@@ -0,0 +1,825 @@
+/*
+ * DECnet       An implementation of the DECnet protocol suite for the LINUX
+ *              operating system.  DECnet is implemented using the  BSD Socket
+ *              interface as the means of communication with the user level.
+ *
+ *              DECnet Routing Forwarding Information Base (Routing Tables)
+ *
+ * Author:      Steve Whitehouse <SteveW@ACM.org>
+ *              Mostly copied from the IPv4 routing code
+ *
+ *
+ * Changes:
+ *
+ */
+#include <linux/config.h>
+#include <linux/string.h>
+#include <linux/net.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/proc_fs.h>
+#include <linux/netdevice.h>
+#include <linux/timer.h>
+#include <linux/spinlock.h>
+#include <asm/atomic.h>
+#include <asm/uaccess.h>
+#include <linux/route.h> /* RTF_xxx */
+#include <net/neighbour.h>
+#include <net/dst.h>
+#include <net/flow.h>
+#include <net/dn.h>
+#include <net/dn_route.h>
+#include <net/dn_fib.h>
+#include <net/dn_neigh.h>
+#include <net/dn_dev.h>
+
+struct dn_zone
+{
+	struct dn_zone		*dz_next;
+	struct dn_fib_node 	**dz_hash;
+	int			dz_nent;
+	int			dz_divisor;
+	u32			dz_hashmask;
+#define DZ_HASHMASK(dz)	((dz)->dz_hashmask)
+	int			dz_order;
+	u16			dz_mask;
+#define DZ_MASK(dz)	((dz)->dz_mask)
+};
+
+struct dn_hash
+{
+	struct dn_zone	*dh_zones[17];
+	struct dn_zone	*dh_zone_list;
+};
+
+#define dz_key_0(key)		((key).datum = 0)
+#define dz_prefix(key,dz)	((key).datum)
+
+#define for_nexthops(fi) { int nhsel; const struct dn_fib_nh *nh;\
+        for(nhsel = 0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
+
+#define endfor_nexthops(fi) }
+
+#define DN_MAX_DIVISOR 1024
+#define DN_S_ZOMBIE 1
+#define DN_S_ACCESSED 2
+
+#define DN_FIB_SCAN(f, fp) \
+for( ; ((f) = *(fp)) != NULL; (fp) = &(f)->fn_next)
+
+#define DN_FIB_SCAN_KEY(f, fp, key) \
+for( ; ((f) = *(fp)) != NULL && dn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_next)
+
+#define RT_TABLE_MIN 1
+
+static DEFINE_RWLOCK(dn_fib_tables_lock);
+struct dn_fib_table *dn_fib_tables[RT_TABLE_MAX + 1];
+
+static kmem_cache_t *dn_hash_kmem;
+static int dn_fib_hash_zombies;
+
+static inline dn_fib_idx_t dn_hash(dn_fib_key_t key, struct dn_zone *dz)
+{
+	u16 h = ntohs(key.datum)>>(16 - dz->dz_order);
+	h ^= (h >> 10);
+	h ^= (h >> 6);
+	h &= DZ_HASHMASK(dz);
+	return *(dn_fib_idx_t *)&h;
+}
+
+static inline dn_fib_key_t dz_key(u16 dst, struct dn_zone *dz)
+{
+	dn_fib_key_t k;
+	k.datum = dst & DZ_MASK(dz);
+	return k;
+}
+
+static inline struct dn_fib_node **dn_chain_p(dn_fib_key_t key, struct dn_zone *dz)
+{
+	return &dz->dz_hash[dn_hash(key, dz).datum];
+}
+
+static inline struct dn_fib_node *dz_chain(dn_fib_key_t key, struct dn_zone *dz)
+{
+	return dz->dz_hash[dn_hash(key, dz).datum];
+}
+
+static inline int dn_key_eq(dn_fib_key_t a, dn_fib_key_t b)
+{
+	return a.datum == b.datum;
+}
+
+static inline int dn_key_leq(dn_fib_key_t a, dn_fib_key_t b)
+{
+	return a.datum <= b.datum;
+}
+
+static inline void dn_rebuild_zone(struct dn_zone *dz,
+				   struct dn_fib_node **old_ht,
+				   int old_divisor)
+{
+	int i;
+	struct dn_fib_node *f, **fp, *next;
+
+	for(i = 0; i < old_divisor; i++) {
+		for(f = old_ht[i]; f; f = f->fn_next) {
+			next = f->fn_next;
+			for(fp = dn_chain_p(f->fn_key, dz);
+				*fp && dn_key_leq((*fp)->fn_key, f->fn_key);
+				fp = &(*fp)->fn_next)
+				/* NOTHING */;
+			f->fn_next = *fp;
+			*fp = f;
+		}
+	}
+}
+
+static void dn_rehash_zone(struct dn_zone *dz)
+{
+	struct dn_fib_node **ht, **old_ht;
+	int old_divisor, new_divisor;
+	u32 new_hashmask;
+
+	old_divisor = dz->dz_divisor;
+
+	switch(old_divisor) {
+		case 16:
+			new_divisor = 256;
+			new_hashmask = 0xFF;
+			break;
+		default:
+			printk(KERN_DEBUG "DECnet: dn_rehash_zone: BUG! %d\n", old_divisor);
+		case 256:
+			new_divisor = 1024;
+			new_hashmask = 0x3FF;
+			break;
+	}
+
+	ht = kmalloc(new_divisor*sizeof(struct dn_fib_node*), GFP_KERNEL);
+
+	if (ht == NULL)
+		return;
+
+	memset(ht, 0, new_divisor*sizeof(struct dn_fib_node *));
+	write_lock_bh(&dn_fib_tables_lock);
+	old_ht = dz->dz_hash;
+	dz->dz_hash = ht;
+	dz->dz_hashmask = new_hashmask;
+	dz->dz_divisor = new_divisor;
+	dn_rebuild_zone(dz, old_ht, old_divisor);
+	write_unlock_bh(&dn_fib_tables_lock);
+	kfree(old_ht);
+}
+
+static void dn_free_node(struct dn_fib_node *f)
+{
+	dn_fib_release_info(DN_FIB_INFO(f));
+	kmem_cache_free(dn_hash_kmem, f);
+}
+
+
+static struct dn_zone *dn_new_zone(struct dn_hash *table, int z)
+{
+	int i;
+	struct dn_zone *dz = kmalloc(sizeof(struct dn_zone), GFP_KERNEL);
+	if (!dz)
+		return NULL;
+
+	memset(dz, 0, sizeof(struct dn_zone));
+	if (z) {
+		dz->dz_divisor = 16;
+		dz->dz_hashmask = 0x0F;
+	} else {
+		dz->dz_divisor = 1;
+		dz->dz_hashmask = 0;
+	}
+
+	dz->dz_hash = kmalloc(dz->dz_divisor*sizeof(struct dn_fib_node *), GFP_KERNEL);
+
+	if (!dz->dz_hash) {
+		kfree(dz);
+		return NULL;
+	}
+
+	memset(dz->dz_hash, 0, dz->dz_divisor*sizeof(struct dn_fib_node*));
+	dz->dz_order = z;
+	dz->dz_mask = dnet_make_mask(z);
+
+	for(i = z + 1; i <= 16; i++)
+		if (table->dh_zones[i])
+			break;
+
+	write_lock_bh(&dn_fib_tables_lock);
+	if (i>16) {
+		dz->dz_next = table->dh_zone_list;
+		table->dh_zone_list = dz;
+	} else {
+		dz->dz_next = table->dh_zones[i]->dz_next;
+		table->dh_zones[i]->dz_next = dz;
+	}
+	table->dh_zones[z] = dz;
+	write_unlock_bh(&dn_fib_tables_lock);
+	return dz;
+}
+
+
+static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct dn_kern_rta *rta, struct dn_fib_info *fi)
+{
+	struct rtnexthop *nhp;
+	int nhlen;
+
+	if (rta->rta_priority && *rta->rta_priority != fi->fib_priority)
+		return 1;
+
+	if (rta->rta_oif || rta->rta_gw) {
+		if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
+		    (!rta->rta_gw  || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 2) == 0))
+			return 0;
+		return 1;
+	}
+
+	if (rta->rta_mp == NULL)
+		return 0;
+
+	nhp = RTA_DATA(rta->rta_mp);
+	nhlen = RTA_PAYLOAD(rta->rta_mp);
+
+	for_nexthops(fi) {
+		int attrlen = nhlen - sizeof(struct rtnexthop);
+		dn_address gw;
+
+		if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
+			return -EINVAL;
+		if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
+			return 1;
+		if (attrlen) {
+			gw = dn_fib_get_attr16(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
+
+			if (gw && gw != nh->nh_gw)
+				return 1;
+		}
+		nhp = RTNH_NEXT(nhp);
+	} endfor_nexthops(fi);
+
+	return 0;
+}
+
+static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
+                        u8 tb_id, u8 type, u8 scope, void *dst, int dst_len,
+                        struct dn_fib_info *fi)
+{
+        struct rtmsg *rtm;
+        struct nlmsghdr *nlh;
+        unsigned char *b = skb->tail;
+
+        nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*rtm));
+        rtm = NLMSG_DATA(nlh);
+        rtm->rtm_family = AF_DECnet;
+        rtm->rtm_dst_len = dst_len;
+        rtm->rtm_src_len = 0;
+        rtm->rtm_tos = 0;
+        rtm->rtm_table = tb_id;
+        rtm->rtm_flags = fi->fib_flags;
+        rtm->rtm_scope = scope;
+	rtm->rtm_type  = type;
+        if (rtm->rtm_dst_len)
+                RTA_PUT(skb, RTA_DST, 2, dst);
+        rtm->rtm_protocol = fi->fib_protocol;
+        if (fi->fib_priority)
+                RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
+	if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
+		goto rtattr_failure;
+        if (fi->fib_nhs == 1) {
+                if (fi->fib_nh->nh_gw)
+                        RTA_PUT(skb, RTA_GATEWAY, 2, &fi->fib_nh->nh_gw);
+                if (fi->fib_nh->nh_oif)
+                        RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
+        }
+        if (fi->fib_nhs > 1) {
+                struct rtnexthop *nhp;
+                struct rtattr *mp_head;
+                if (skb_tailroom(skb) <= RTA_SPACE(0))
+                        goto rtattr_failure;
+                mp_head = (struct rtattr *)skb_put(skb, RTA_SPACE(0));
+
+                for_nexthops(fi) {
+                        if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
+                                goto rtattr_failure;
+                        nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
+                        nhp->rtnh_flags = nh->nh_flags & 0xFF;
+                        nhp->rtnh_hops = nh->nh_weight - 1;
+                        nhp->rtnh_ifindex = nh->nh_oif;
+                        if (nh->nh_gw)
+                                RTA_PUT(skb, RTA_GATEWAY, 2, &nh->nh_gw);
+                        nhp->rtnh_len = skb->tail - (unsigned char *)nhp;
+                } endfor_nexthops(fi);
+                mp_head->rta_type = RTA_MULTIPATH;
+                mp_head->rta_len = skb->tail - (u8*)mp_head;
+        }
+
+        nlh->nlmsg_len = skb->tail - b;
+        return skb->len;
+
+
+nlmsg_failure:
+rtattr_failure:
+        skb_trim(skb, b - skb->data);
+        return -1;
+}
+
+
+static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, int tb_id,
+                        struct nlmsghdr *nlh, struct netlink_skb_parms *req)
+{
+        struct sk_buff *skb;
+        u32 pid = req ? req->pid : 0;
+        int size = NLMSG_SPACE(sizeof(struct rtmsg) + 256);
+
+        skb = alloc_skb(size, GFP_KERNEL);
+        if (!skb)
+                return;
+
+        if (dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id, 
+                                f->fn_type, f->fn_scope, &f->fn_key, z, 
+                                DN_FIB_INFO(f)) < 0) {
+                kfree_skb(skb);
+                return;
+        }
+        NETLINK_CB(skb).dst_groups = RTMGRP_DECnet_ROUTE;
+        if (nlh->nlmsg_flags & NLM_F_ECHO)
+                atomic_inc(&skb->users);
+        netlink_broadcast(rtnl, skb, pid, RTMGRP_DECnet_ROUTE, GFP_KERNEL);
+        if (nlh->nlmsg_flags & NLM_F_ECHO)
+                netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
+}
+
+static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb, 
+				struct netlink_callback *cb,
+				struct dn_fib_table *tb,
+				struct dn_zone *dz,
+				struct dn_fib_node *f)
+{
+	int i, s_i;
+
+	s_i = cb->args[3];
+	for(i = 0; f; i++, f = f->fn_next) {
+		if (i < s_i)
+			continue;
+		if (f->fn_state & DN_S_ZOMBIE)
+			continue;
+		if (dn_fib_dump_info(skb, NETLINK_CB(cb->skb).pid, 
+				cb->nlh->nlmsg_seq,
+				RTM_NEWROUTE,
+				tb->n, 
+				(f->fn_state & DN_S_ZOMBIE) ? 0 : f->fn_type,
+				f->fn_scope, &f->fn_key, dz->dz_order, 
+				f->fn_info) < 0) {
+			cb->args[3] = i;
+			return -1;
+		}
+	}
+	cb->args[3] = i;
+	return skb->len;
+}
+
+static __inline__ int dn_hash_dump_zone(struct sk_buff *skb, 
+				struct netlink_callback *cb,
+				struct dn_fib_table *tb,
+				struct dn_zone *dz)
+{
+	int h, s_h;
+
+	s_h = cb->args[2];
+	for(h = 0; h < dz->dz_divisor; h++) {
+		if (h < s_h)
+			continue;
+		if (h > s_h)
+			memset(&cb->args[3], 0, sizeof(cb->args) - 3*sizeof(cb->args[0]));
+		if (dz->dz_hash == NULL || dz->dz_hash[h] == NULL)
+			continue;
+		if (dn_hash_dump_bucket(skb, cb, tb, dz, dz->dz_hash[h]) < 0) {
+			cb->args[2] = h;
+			return -1;
+		}
+	}
+	cb->args[2] = h;
+	return skb->len;
+}
+
+static int dn_fib_table_dump(struct dn_fib_table *tb, struct sk_buff *skb, 
+                                struct netlink_callback *cb)
+{
+        int m, s_m;
+	struct dn_zone *dz;
+	struct dn_hash *table = (struct dn_hash *)tb->data;
+
+	s_m = cb->args[1];
+	read_lock(&dn_fib_tables_lock);
+	for(dz = table->dh_zone_list, m = 0; dz; dz = dz->dz_next, m++) {
+		if (m < s_m)
+			continue;
+		if (m > s_m)
+			memset(&cb->args[2], 0, sizeof(cb->args) - 2*sizeof(cb->args[0]));
+
+		if (dn_hash_dump_zone(skb, cb, tb, dz) < 0) {
+			cb->args[1] = m;
+			read_unlock(&dn_fib_tables_lock);
+			return -1;
+		}
+	}
+	read_unlock(&dn_fib_tables_lock);
+	cb->args[1] = m;
+
+        return skb->len;
+}
+
+static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, struct netlink_skb_parms *req)
+{
+	struct dn_hash *table = (struct dn_hash *)tb->data;
+	struct dn_fib_node *new_f, *f, **fp, **del_fp;
+	struct dn_zone *dz;
+	struct dn_fib_info *fi;
+        int z = r->rtm_dst_len;
+	int type = r->rtm_type;
+	dn_fib_key_t key;
+        int err;
+
+        if (z > 16)
+                return -EINVAL;
+
+	dz = table->dh_zones[z];
+	if (!dz && !(dz = dn_new_zone(table, z)))
+		return -ENOBUFS;
+
+	dz_key_0(key);
+	if (rta->rta_dst) {
+		dn_address dst;
+		memcpy(&dst, rta->rta_dst, 2);
+		if (dst & ~DZ_MASK(dz))
+			return -EINVAL;
+		key = dz_key(dst, dz);
+	}
+
+        if ((fi = dn_fib_create_info(r, rta, n, &err)) == NULL)
+                return err;
+
+	if (dz->dz_nent > (dz->dz_divisor << 2) &&
+			dz->dz_divisor > DN_MAX_DIVISOR &&
+			(z==16 || (1<<z) > dz->dz_divisor))
+		dn_rehash_zone(dz);
+
+	fp = dn_chain_p(key, dz);
+
+	DN_FIB_SCAN(f, fp) {
+		if (dn_key_leq(key, f->fn_key))
+			break;
+	}
+
+	del_fp = NULL;
+
+	if (f && (f->fn_state & DN_S_ZOMBIE) &&
+			dn_key_eq(f->fn_key, key)) {
+		del_fp = fp;
+		fp = &f->fn_next;
+		f = *fp;
+		goto create;
+	}
+
+	DN_FIB_SCAN_KEY(f, fp, key) {
+		if (fi->fib_priority <= DN_FIB_INFO(f)->fib_priority)
+			break;
+	}
+
+	if (f && dn_key_eq(f->fn_key, key) &&
+			fi->fib_priority == DN_FIB_INFO(f)->fib_priority) {
+		struct dn_fib_node **ins_fp;
+
+		err = -EEXIST;
+		if (n->nlmsg_flags & NLM_F_EXCL)
+			goto out;
+
+		if (n->nlmsg_flags & NLM_F_REPLACE) {
+			del_fp = fp;
+			fp = &f->fn_next;
+			f = *fp;
+			goto replace;
+		}
+
+		ins_fp = fp;
+		err = -EEXIST;
+
+		DN_FIB_SCAN_KEY(f, fp, key) {
+			if (fi->fib_priority != DN_FIB_INFO(f)->fib_priority)
+				break;
+			if (f->fn_type == type && f->fn_scope == r->rtm_scope
+					&& DN_FIB_INFO(f) == fi)
+				goto out;
+		}
+
+		if (!(n->nlmsg_flags & NLM_F_APPEND)) {
+			fp = ins_fp;
+			f = *fp;
+		}
+	}
+
+create:
+	err = -ENOENT;
+	if (!(n->nlmsg_flags & NLM_F_CREATE))
+		goto out;
+
+replace:
+	err = -ENOBUFS;
+	new_f = kmem_cache_alloc(dn_hash_kmem, SLAB_KERNEL);
+	if (new_f == NULL)
+		goto out;
+
+	memset(new_f, 0, sizeof(struct dn_fib_node));
+
+	new_f->fn_key = key;
+	new_f->fn_type = type;
+	new_f->fn_scope = r->rtm_scope;
+	DN_FIB_INFO(new_f) = fi;
+
+	new_f->fn_next = f;
+	write_lock_bh(&dn_fib_tables_lock);
+	*fp = new_f;
+	write_unlock_bh(&dn_fib_tables_lock);
+	dz->dz_nent++;
+
+	if (del_fp) {
+		f = *del_fp;
+		write_lock_bh(&dn_fib_tables_lock);
+		*del_fp = f->fn_next;
+		write_unlock_bh(&dn_fib_tables_lock);
+
+		if (!(f->fn_state & DN_S_ZOMBIE))
+			dn_rtmsg_fib(RTM_DELROUTE, f, z, tb->n, n, req);
+		if (f->fn_state & DN_S_ACCESSED)
+			dn_rt_cache_flush(-1);
+		dn_free_node(f);
+		dz->dz_nent--;
+	} else {
+		dn_rt_cache_flush(-1);
+	}
+
+        dn_rtmsg_fib(RTM_NEWROUTE, new_f, z, tb->n, n, req);
+
+        return 0;
+out:
+	dn_fib_release_info(fi);
+	return err;
+}
+
+
+static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, struct netlink_skb_parms *req)
+{
+	struct dn_hash *table = (struct dn_hash*)tb->data;
+	struct dn_fib_node **fp, **del_fp, *f;
+        int z = r->rtm_dst_len;
+	struct dn_zone *dz;
+	dn_fib_key_t key;
+	int matched;
+
+
+        if (z > 16)
+                return -EINVAL;
+
+	if ((dz = table->dh_zones[z]) == NULL)
+		return -ESRCH;
+
+	dz_key_0(key);
+	if (rta->rta_dst) {
+		dn_address dst;
+		memcpy(&dst, rta->rta_dst, 2);
+		if (dst & ~DZ_MASK(dz))
+			return -EINVAL;
+		key = dz_key(dst, dz);
+	}
+
+	fp = dn_chain_p(key, dz);
+
+	DN_FIB_SCAN(f, fp) {
+		if (dn_key_eq(f->fn_key, key))
+			break;
+		if (dn_key_leq(key, f->fn_key))
+			return -ESRCH;
+	}
+
+	matched = 0;
+	del_fp = NULL;
+	DN_FIB_SCAN_KEY(f, fp, key) {
+		struct dn_fib_info *fi = DN_FIB_INFO(f);
+
+		if (f->fn_state & DN_S_ZOMBIE)
+			return -ESRCH;
+
+		matched++;
+
+		if (del_fp == NULL &&
+				(!r->rtm_type || f->fn_type == r->rtm_type) &&
+				(r->rtm_scope == RT_SCOPE_NOWHERE || f->fn_scope == r->rtm_scope) &&
+				(!r->rtm_protocol || 
+					fi->fib_protocol == r->rtm_protocol) &&
+				dn_fib_nh_match(r, n, rta, fi) == 0)
+			del_fp = fp;
+	}
+
+	if (del_fp) {
+		f = *del_fp;
+        	dn_rtmsg_fib(RTM_DELROUTE, f, z, tb->n, n, req);
+
+		if (matched != 1) {
+			write_lock_bh(&dn_fib_tables_lock);
+			*del_fp = f->fn_next;
+			write_unlock_bh(&dn_fib_tables_lock);
+
+			if (f->fn_state & DN_S_ACCESSED)
+				dn_rt_cache_flush(-1);
+			dn_free_node(f);
+			dz->dz_nent--;
+		} else {
+			f->fn_state |= DN_S_ZOMBIE;
+			if (f->fn_state & DN_S_ACCESSED) {
+				f->fn_state &= ~DN_S_ACCESSED;
+				dn_rt_cache_flush(-1);
+			}
+			if (++dn_fib_hash_zombies > 128)
+				dn_fib_flush();
+		}
+
+		return 0;
+	}
+
+        return -ESRCH;
+}
+
+static inline int dn_flush_list(struct dn_fib_node **fp, int z, struct dn_hash *table)
+{
+	int found = 0;
+	struct dn_fib_node *f;
+
+	while((f = *fp) != NULL) {
+		struct dn_fib_info *fi = DN_FIB_INFO(f);
+
+		if (fi && ((f->fn_state & DN_S_ZOMBIE) || (fi->fib_flags & RTNH_F_DEAD))) {
+			write_lock_bh(&dn_fib_tables_lock);
+			*fp = f->fn_next;
+			write_unlock_bh(&dn_fib_tables_lock);
+
+			dn_free_node(f);
+			found++;
+			continue;
+		}
+		fp = &f->fn_next;
+	}
+
+	return found;
+}
+
+static int dn_fib_table_flush(struct dn_fib_table *tb)
+{
+	struct dn_hash *table = (struct dn_hash *)tb->data;
+	struct dn_zone *dz;
+	int found = 0;
+
+	dn_fib_hash_zombies = 0;
+	for(dz = table->dh_zone_list; dz; dz = dz->dz_next) {
+		int i;
+		int tmp = 0;
+		for(i = dz->dz_divisor-1; i >= 0; i--)
+			tmp += dn_flush_list(&dz->dz_hash[i], dz->dz_order, table);
+		dz->dz_nent -= tmp;
+		found += tmp;
+	}
+
+	return found;
+}
+
+static int dn_fib_table_lookup(struct dn_fib_table *tb, const struct flowi *flp, struct dn_fib_res *res)
+{
+        int err;
+	struct dn_zone *dz;
+	struct dn_hash *t = (struct dn_hash *)tb->data;
+
+	read_lock(&dn_fib_tables_lock);
+	for(dz = t->dh_zone_list; dz; dz = dz->dz_next) {
+		struct dn_fib_node *f;
+		dn_fib_key_t k = dz_key(flp->fld_dst, dz);
+
+		for(f = dz_chain(k, dz); f; f = f->fn_next) {
+			if (!dn_key_eq(k, f->fn_key)) {
+				if (dn_key_leq(k, f->fn_key))
+					break;
+				else
+					continue;
+			}
+
+			f->fn_state |= DN_S_ACCESSED;
+
+			if (f->fn_state&DN_S_ZOMBIE)
+				continue;
+
+			if (f->fn_scope < flp->fld_scope)
+				continue;
+
+			err = dn_fib_semantic_match(f->fn_type, DN_FIB_INFO(f), flp, res);
+
+			if (err == 0) {
+				res->type = f->fn_type;
+				res->scope = f->fn_scope; 
+				res->prefixlen = dz->dz_order;
+				goto out;
+			}
+			if (err < 0)
+				goto out;
+		}
+	}
+	err = 1;
+out:
+	read_unlock(&dn_fib_tables_lock);
+        return err;
+}
+
+
+struct dn_fib_table *dn_fib_get_table(int n, int create)
+{
+        struct dn_fib_table *t;
+
+        if (n < RT_TABLE_MIN)
+                return NULL;
+
+        if (n > RT_TABLE_MAX)
+                return NULL;
+
+        if (dn_fib_tables[n]) 
+                return dn_fib_tables[n];
+
+        if (!create)
+                return NULL;
+
+        if (in_interrupt() && net_ratelimit()) {
+                printk(KERN_DEBUG "DECnet: BUG! Attempt to create routing table from interrupt\n"); 
+                return NULL;
+        }
+        if ((t = kmalloc(sizeof(struct dn_fib_table) + sizeof(struct dn_hash), GFP_KERNEL)) == NULL)
+                return NULL;
+
+        memset(t, 0, sizeof(struct dn_fib_table));
+
+        t->n = n;
+        t->insert = dn_fib_table_insert;
+        t->delete = dn_fib_table_delete;
+        t->lookup = dn_fib_table_lookup;
+        t->flush  = dn_fib_table_flush;
+        t->dump = dn_fib_table_dump;
+	memset(t->data, 0, sizeof(struct dn_hash));
+        dn_fib_tables[n] = t;
+
+        return t;
+}
+
+static void dn_fib_del_tree(int n)
+{
+        struct dn_fib_table *t;
+
+        write_lock(&dn_fib_tables_lock);
+        t = dn_fib_tables[n];
+        dn_fib_tables[n] = NULL;
+        write_unlock(&dn_fib_tables_lock);
+
+        if (t) {
+                kfree(t);
+        }
+}
+
+struct dn_fib_table *dn_fib_empty_table(void)
+{
+        int id;
+
+        for(id = RT_TABLE_MIN; id <= RT_TABLE_MAX; id++)
+                if (dn_fib_tables[id] == NULL)
+                        return dn_fib_get_table(id, 1);
+        return NULL;
+}
+
+void __init dn_fib_table_init(void)
+{
+	dn_hash_kmem = kmem_cache_create("dn_fib_info_cache",
+					sizeof(struct dn_fib_info),
+					0, SLAB_HWCACHE_ALIGN,
+					NULL, NULL);
+}
+
+void __exit dn_fib_table_cleanup(void)
+{
+	int i;
+
+	for (i = RT_TABLE_MIN; i <= RT_TABLE_MAX; ++i)
+		dn_fib_del_tree(i);
+
+	return;
+}
diff --git a/net/decnet/dn_timer.c b/net/decnet/dn_timer.c
new file mode 100644
index 00000000000..09825711d58
--- /dev/null
+++ b/net/decnet/dn_timer.c
@@ -0,0 +1,109 @@
+/*
+ * DECnet       An implementation of the DECnet protocol suite for the LINUX
+ *              operating system.  DECnet is implemented using the  BSD Socket
+ *              interface as the means of communication with the user level.
+ *
+ *              DECnet Socket Timer Functions
+ *
+ * Author:      Steve Whitehouse <SteveW@ACM.org>
+ *
+ *
+ * Changes:
+ *       Steve Whitehouse      : Made keepalive timer part of the same
+ *                               timer idea.
+ *       Steve Whitehouse      : Added checks for sk->sock_readers
+ *       David S. Miller       : New socket locking
+ *       Steve Whitehouse      : Timer grabs socket ref.
+ */
+#include <linux/net.h>
+#include <linux/socket.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/timer.h>
+#include <linux/spinlock.h>
+#include <net/sock.h>
+#include <asm/atomic.h>
+#include <net/flow.h>
+#include <net/dn.h>
+
+/*
+ * Slow timer is for everything else (n * 500mS)
+ */
+
+#define SLOW_INTERVAL (HZ/2)
+
+static void dn_slow_timer(unsigned long arg);
+
+void dn_start_slow_timer(struct sock *sk)
+{
+	sk->sk_timer.expires	= jiffies + SLOW_INTERVAL;
+	sk->sk_timer.function	= dn_slow_timer;
+	sk->sk_timer.data	= (unsigned long)sk;
+
+	add_timer(&sk->sk_timer);
+}
+
+void dn_stop_slow_timer(struct sock *sk)
+{
+	del_timer(&sk->sk_timer);
+}
+
+static void dn_slow_timer(unsigned long arg)
+{
+	struct sock *sk = (struct sock *)arg;
+	struct dn_scp *scp = DN_SK(sk);
+
+	sock_hold(sk);
+	bh_lock_sock(sk);
+
+	if (sock_owned_by_user(sk)) {
+		sk->sk_timer.expires = jiffies + HZ / 10;
+		add_timer(&sk->sk_timer);
+		goto out;
+	}
+
+	/*
+	 * The persist timer is the standard slow timer used for retransmits
+	 * in both connection establishment and disconnection as well as
+	 * in the RUN state. The different states are catered for by changing
+	 * the function pointer in the socket. Setting the timer to a value
+	 * of zero turns it off. We allow the persist_fxn to turn the
+	 * timer off in a permant way by returning non-zero, so that
+	 * timer based routines may remove sockets. This is why we have a
+	 * sock_hold()/sock_put() around the timer to prevent the socket
+	 * going away in the middle.
+	 */
+	if (scp->persist && scp->persist_fxn) {
+		if (scp->persist <= SLOW_INTERVAL) {
+			scp->persist = 0;
+
+			if (scp->persist_fxn(sk))
+				goto out;
+		} else {
+			scp->persist -= SLOW_INTERVAL;
+		}
+	}
+
+	/*
+	 * Check for keepalive timeout. After the other timer 'cos if
+	 * the previous timer caused a retransmit, we don't need to
+	 * do this. scp->stamp is the last time that we sent a packet.
+	 * The keepalive function sends a link service packet to the
+	 * other end. If it remains unacknowledged, the standard
+	 * socket timers will eventually shut the socket down. Each
+	 * time we do this, scp->stamp will be updated, thus
+	 * we won't try and send another until scp->keepalive has passed
+	 * since the last successful transmission.
+	 */
+	if (scp->keepalive && scp->keepalive_fxn && (scp->state == DN_RUN)) {
+		if ((jiffies - scp->stamp) >= scp->keepalive)
+			scp->keepalive_fxn(sk);
+	}
+
+	sk->sk_timer.expires = jiffies + SLOW_INTERVAL;
+
+	add_timer(&sk->sk_timer);
+out:
+	bh_unlock_sock(sk);
+	sock_put(sk);
+}
diff --git a/net/decnet/netfilter/Kconfig b/net/decnet/netfilter/Kconfig
new file mode 100644
index 00000000000..ecdb3f9f14c
--- /dev/null
+++ b/net/decnet/netfilter/Kconfig
@@ -0,0 +1,15 @@
+#
+# DECnet netfilter configuration
+#
+
+menu "DECnet: Netfilter Configuration"
+	depends on DECNET && NETFILTER && EXPERIMENTAL
+
+config DECNET_NF_GRABULATOR
+	tristate "Routing message grabulator (for userland routing daemon)"
+	help
+	  Enable this module if you want to use the userland DECnet routing
+	  daemon. You will also need to enable routing support for DECnet
+	  unless you just want to monitor routing messages from other nodes.
+
+endmenu
diff --git a/net/decnet/netfilter/Makefile b/net/decnet/netfilter/Makefile
new file mode 100644
index 00000000000..255c1ae9dae
--- /dev/null
+++ b/net/decnet/netfilter/Makefile
@@ -0,0 +1,6 @@
+#
+# Makefile for DECnet netfilter modules
+#
+
+obj-$(CONFIG_DECNET_NF_GRABULATOR) += dn_rtmsg.o
+
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
new file mode 100644
index 00000000000..f86a6259fd1
--- /dev/null
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -0,0 +1,167 @@
+/*
+ * DECnet       An implementation of the DECnet protocol suite for the LINUX
+ *              operating system.  DECnet is implemented using the  BSD Socket
+ *              interface as the means of communication with the user level.
+ *
+ *              DECnet Routing Message Grabulator
+ *
+ *              (C) 2000 ChyGwyn Limited  -  http://www.chygwyn.com/
+ *              This code may be copied under the GPL v.2 or at your option
+ *              any later version.
+ *
+ * Author:      Steven Whitehouse <steve@chygwyn.com>
+ *
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/netfilter.h>
+#include <linux/spinlock.h>
+#include <linux/netlink.h>
+
+#include <net/sock.h>
+#include <net/flow.h>
+#include <net/dn.h>
+#include <net/dn_route.h>
+
+#include <linux/netfilter_decnet.h>
+
+static struct sock *dnrmg = NULL;
+
+
+static struct sk_buff *dnrmg_build_message(struct sk_buff *rt_skb, int *errp)
+{
+	struct sk_buff *skb = NULL;
+	size_t size;
+	unsigned char *old_tail;
+	struct nlmsghdr *nlh;
+	unsigned char *ptr;
+	struct nf_dn_rtmsg *rtm;
+
+	size = NLMSG_SPACE(rt_skb->len);
+	size += NLMSG_ALIGN(sizeof(struct nf_dn_rtmsg));
+	skb = alloc_skb(size, GFP_ATOMIC);
+	if (!skb)
+		goto nlmsg_failure;
+	old_tail = skb->tail;
+	nlh = NLMSG_PUT(skb, 0, 0, 0, size - sizeof(*nlh));
+	rtm = (struct nf_dn_rtmsg *)NLMSG_DATA(nlh);
+	rtm->nfdn_ifindex = rt_skb->dev->ifindex;
+	ptr = NFDN_RTMSG(rtm);
+	memcpy(ptr, rt_skb->data, rt_skb->len);
+	nlh->nlmsg_len = skb->tail - old_tail;
+	return skb;
+
+nlmsg_failure:
+	if (skb)
+		kfree_skb(skb);
+	*errp = -ENOMEM;
+	if (net_ratelimit())
+		printk(KERN_ERR "dn_rtmsg: error creating netlink message\n");
+	return NULL;
+}
+
+static void dnrmg_send_peer(struct sk_buff *skb)
+{
+	struct sk_buff *skb2;
+	int status = 0;
+	int group = 0;
+	unsigned char flags = *skb->data;
+
+	switch(flags & DN_RT_CNTL_MSK) {
+		case DN_RT_PKT_L1RT:
+			group = DNRMG_L1_GROUP;
+			break;
+		case DN_RT_PKT_L2RT:
+			group = DNRMG_L2_GROUP;
+			break;
+		default:
+			return;
+	}
+
+	skb2 = dnrmg_build_message(skb, &status);
+	if (skb2 == NULL)
+		return;
+	NETLINK_CB(skb2).dst_groups = group;
+	netlink_broadcast(dnrmg, skb2, 0, group, GFP_ATOMIC);
+}
+
+
+static unsigned int dnrmg_hook(unsigned int hook,
+			struct sk_buff **pskb,
+			const struct net_device *in,
+			const struct net_device *out,
+			int (*okfn)(struct sk_buff *))
+{
+	dnrmg_send_peer(*pskb);
+	return NF_ACCEPT;
+}
+
+
+#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
+
+static inline void dnrmg_receive_user_skb(struct sk_buff *skb)
+{
+	struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data;
+
+	if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
+		return;
+
+	if (!cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN))
+		RCV_SKB_FAIL(-EPERM);
+
+	/* Eventually we might send routing messages too */
+
+	RCV_SKB_FAIL(-EINVAL);
+}
+
+static void dnrmg_receive_user_sk(struct sock *sk, int len)
+{
+	struct sk_buff *skb;
+
+	while((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+		dnrmg_receive_user_skb(skb);
+		kfree_skb(skb);
+	}
+}
+
+static struct nf_hook_ops dnrmg_ops = {
+	.hook		= dnrmg_hook,
+	.pf		= PF_DECnet,
+	.hooknum	= NF_DN_ROUTE,
+	.priority	= NF_DN_PRI_DNRTMSG,
+};
+
+static int __init init(void)
+{
+	int rv = 0;
+
+	dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, dnrmg_receive_user_sk);
+	if (dnrmg == NULL) {
+		printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket");
+		return -ENOMEM;
+	}
+
+	rv = nf_register_hook(&dnrmg_ops);
+	if (rv) {
+		sock_release(dnrmg->sk_socket);
+	}
+
+	return rv;
+}
+
+static void __exit fini(void)
+{
+	nf_unregister_hook(&dnrmg_ops);
+	sock_release(dnrmg->sk_socket);
+}
+
+
+MODULE_DESCRIPTION("DECnet Routing Message Grabulator");
+MODULE_AUTHOR("Steven Whitehouse <steve@chygwyn.com>");
+MODULE_LICENSE("GPL");
+
+module_init(init);
+module_exit(fini);
+
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
new file mode 100644
index 00000000000..02bca49cb50
--- /dev/null
+++ b/net/decnet/sysctl_net_decnet.c
@@ -0,0 +1,480 @@
+/*
+ * DECnet       An implementation of the DECnet protocol suite for the LINUX
+ *              operating system.  DECnet is implemented using the  BSD Socket
+ *              interface as the means of communication with the user level.
+ *
+ *              DECnet sysctl support functions
+ *
+ * Author:      Steve Whitehouse <SteveW@ACM.org>
+ *
+ *
+ * Changes:
+ * Steve Whitehouse - C99 changes and default device handling
+ *
+ */
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/sysctl.h>
+#include <linux/fs.h>
+#include <linux/netdevice.h>
+#include <linux/string.h>
+#include <net/neighbour.h>
+#include <net/dst.h>
+#include <net/flow.h>
+
+#include <asm/uaccess.h>
+
+#include <net/dn.h>
+#include <net/dn_dev.h>
+#include <net/dn_route.h>
+
+
+int decnet_debug_level;
+int decnet_time_wait = 30;
+int decnet_dn_count = 1;
+int decnet_di_count = 3;
+int decnet_dr_count = 3;
+int decnet_log_martians = 1;
+int decnet_no_fc_max_cwnd = NSP_MIN_WINDOW;
+
+#ifdef CONFIG_SYSCTL
+extern int decnet_dst_gc_interval;
+static int min_decnet_time_wait[] = { 5 };
+static int max_decnet_time_wait[] = { 600 };
+static int min_state_count[] = { 1 };
+static int max_state_count[] = { NSP_MAXRXTSHIFT };
+static int min_decnet_dst_gc_interval[] = { 1 };
+static int max_decnet_dst_gc_interval[] = { 60 };
+static int min_decnet_no_fc_max_cwnd[] = { NSP_MIN_WINDOW };
+static int max_decnet_no_fc_max_cwnd[] = { NSP_MAX_WINDOW };
+static char node_name[7] = "???";
+
+static struct ctl_table_header *dn_table_header = NULL;
+
+/*
+ * ctype.h :-)
+ */
+#define ISNUM(x) (((x) >= '0') && ((x) <= '9'))
+#define ISLOWER(x) (((x) >= 'a') && ((x) <= 'z'))
+#define ISUPPER(x) (((x) >= 'A') && ((x) <= 'Z'))
+#define ISALPHA(x) (ISLOWER(x) || ISUPPER(x))
+#define INVALID_END_CHAR(x) (ISNUM(x) || ISALPHA(x))
+
+static void strip_it(char *str)
+{
+	for(;;) {
+		switch(*str) {
+			case ' ':
+			case '\n':
+			case '\r':
+			case ':':
+				*str = 0;
+			case 0:
+				return;
+		}
+		str++;
+	}
+}
+
+/*
+ * Simple routine to parse an ascii DECnet address
+ * into a network order address.
+ */
+static int parse_addr(dn_address *addr, char *str)
+{
+	dn_address area, node;
+
+	while(*str && !ISNUM(*str)) str++;
+
+	if (*str == 0)
+		return -1;
+
+	area = (*str++ - '0');
+	if (ISNUM(*str)) {
+		area *= 10;
+		area += (*str++ - '0');
+	}
+
+	if (*str++ != '.')
+		return -1;
+
+	if (!ISNUM(*str))
+		return -1;
+
+	node = *str++ - '0';
+	if (ISNUM(*str)) {
+		node *= 10;
+		node += (*str++ - '0');
+	}
+	if (ISNUM(*str)) {
+		node *= 10;
+		node += (*str++ - '0');
+	}
+	if (ISNUM(*str)) {
+		node *= 10;
+		node += (*str++ - '0');
+	}
+
+	if ((node > 1023) || (area > 63))
+		return -1;
+
+	if (INVALID_END_CHAR(*str))
+		return -1;
+
+	*addr = dn_htons((area << 10) | node);
+
+	return 0;
+}
+
+
+static int dn_node_address_strategy(ctl_table *table, int __user *name, int nlen,
+				void __user *oldval, size_t __user *oldlenp,
+				void __user *newval, size_t newlen,
+				void **context)
+{
+	size_t len;
+	dn_address addr;
+
+	if (oldval && oldlenp) {
+		if (get_user(len, oldlenp))
+			return -EFAULT;
+		if (len) {
+			if (len != sizeof(unsigned short))
+				return -EINVAL;
+			if (put_user(decnet_address, (unsigned short __user *)oldval))
+				return -EFAULT;
+		}
+	}
+	if (newval && newlen) {
+		if (newlen != sizeof(unsigned short))
+			return -EINVAL;
+		if (get_user(addr, (unsigned short __user *)newval))
+			return -EFAULT;
+
+		dn_dev_devices_off();
+
+		decnet_address = addr;
+
+		dn_dev_devices_on();
+	}
+	return 0;
+}
+
+static int dn_node_address_handler(ctl_table *table, int write, 
+				struct file *filp,
+				void __user *buffer,
+				size_t *lenp, loff_t *ppos)
+{
+	char addr[DN_ASCBUF_LEN];
+	size_t len;
+	dn_address dnaddr;
+
+	if (!*lenp || (*ppos && !write)) {
+		*lenp = 0;
+		return 0;
+	}
+
+	if (write) {
+		int len = (*lenp < DN_ASCBUF_LEN) ? *lenp : (DN_ASCBUF_LEN-1);
+
+		if (copy_from_user(addr, buffer, len))
+			return -EFAULT;
+
+		addr[len] = 0;
+		strip_it(addr);
+
+		if (parse_addr(&dnaddr, addr))
+			return -EINVAL;
+
+		dn_dev_devices_off();
+
+		decnet_address = dnaddr;
+
+		dn_dev_devices_on();
+
+		*ppos += len;
+
+		return 0;
+	}
+
+	dn_addr2asc(dn_ntohs(decnet_address), addr);
+	len = strlen(addr);
+	addr[len++] = '\n';
+
+	if (len > *lenp) len = *lenp;
+
+	if (copy_to_user(buffer, addr, len))
+		return -EFAULT;
+
+	*lenp = len;
+	*ppos += len;
+
+	return 0;
+}
+
+
+static int dn_def_dev_strategy(ctl_table *table, int __user *name, int nlen,
+				void __user *oldval, size_t __user *oldlenp,
+				void __user *newval, size_t newlen,
+				void **context)
+{
+	size_t len;
+	struct net_device *dev;
+	char devname[17];
+	size_t namel;
+	int rv = 0;
+
+	devname[0] = 0;
+
+	if (oldval && oldlenp) {
+		if (get_user(len, oldlenp))
+			return -EFAULT;
+		if (len) {
+			dev = dn_dev_get_default();
+			if (dev) {
+				strcpy(devname, dev->name);
+				dev_put(dev);
+			}
+
+			namel = strlen(devname) + 1;
+			if (len > namel) len = namel;	
+
+			if (copy_to_user(oldval, devname, len))
+				return -EFAULT;
+
+			if (put_user(len, oldlenp))
+				return -EFAULT;
+		}
+	}
+
+	if (newval && newlen) {
+		if (newlen > 16)
+			return -E2BIG;
+
+		if (copy_from_user(devname, newval, newlen))
+			return -EFAULT;
+
+		devname[newlen] = 0;
+
+		dev = dev_get_by_name(devname);
+		if (dev == NULL)
+			return -ENODEV;
+
+		rv = -ENODEV;
+		if (dev->dn_ptr != NULL) {
+			rv = dn_dev_set_default(dev, 1);
+			if (rv)
+				dev_put(dev);
+		}
+	}
+
+	return rv;
+}
+
+
+static int dn_def_dev_handler(ctl_table *table, int write, 
+				struct file * filp,
+				void __user *buffer,
+				size_t *lenp, loff_t *ppos)
+{
+	size_t len;
+	struct net_device *dev;
+	char devname[17];
+
+	if (!*lenp || (*ppos && !write)) {
+		*lenp = 0;
+		return 0;
+	}
+
+	if (write) {
+		if (*lenp > 16)
+			return -E2BIG;
+
+		if (copy_from_user(devname, buffer, *lenp))
+			return -EFAULT;
+
+		devname[*lenp] = 0;
+		strip_it(devname);
+
+		dev = dev_get_by_name(devname);
+		if (dev == NULL)
+			return -ENODEV;
+
+		if (dev->dn_ptr == NULL) {
+			dev_put(dev);
+			return -ENODEV;
+		}
+
+		if (dn_dev_set_default(dev, 1)) {
+			dev_put(dev);
+			return -ENODEV;
+		}
+		*ppos += *lenp;
+
+		return 0;
+	}
+
+	dev = dn_dev_get_default();
+	if (dev == NULL) {
+		*lenp = 0;
+		return 0;
+	}
+
+	strcpy(devname, dev->name);
+	dev_put(dev);
+	len = strlen(devname);
+	devname[len++] = '\n';
+
+	if (len > *lenp) len = *lenp;
+
+	if (copy_to_user(buffer, devname, len))
+		return -EFAULT;
+
+	*lenp = len;
+	*ppos += len;
+
+	return 0;
+}
+
+static ctl_table dn_table[] = {
+	{
+		.ctl_name = NET_DECNET_NODE_ADDRESS, 
+		.procname = "node_address", 
+		.maxlen = 7, 
+		.mode = 0644, 
+		.proc_handler = dn_node_address_handler,
+		.strategy = dn_node_address_strategy,
+	},
+	{
+		.ctl_name = NET_DECNET_NODE_NAME,
+		.procname = "node_name",
+		.data = node_name, 
+		.maxlen = 7,
+		.mode = 0644,
+		.proc_handler = &proc_dostring,
+		.strategy = &sysctl_string,
+	},
+	{
+		.ctl_name = NET_DECNET_DEFAULT_DEVICE,
+		.procname = "default_device", 
+		.maxlen = 16, 
+		.mode = 0644,
+		.proc_handler = dn_def_dev_handler,
+		.strategy = dn_def_dev_strategy,
+	},
+	{
+		.ctl_name = NET_DECNET_TIME_WAIT,
+		.procname = "time_wait",
+		.data = &decnet_time_wait,
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = &proc_dointvec_minmax,
+		.strategy = &sysctl_intvec,
+		.extra1 = &min_decnet_time_wait,
+		.extra2 = &max_decnet_time_wait
+	},
+	{
+		.ctl_name = NET_DECNET_DN_COUNT,
+		.procname = "dn_count",
+		.data = &decnet_dn_count,
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = &proc_dointvec_minmax,
+		.strategy = &sysctl_intvec,
+		.extra1 = &min_state_count,
+		.extra2 = &max_state_count
+	},
+	{
+		.ctl_name = NET_DECNET_DI_COUNT,
+		.procname = "di_count",
+		.data = &decnet_di_count,
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = &proc_dointvec_minmax,
+		.strategy = &sysctl_intvec,
+		.extra1 = &min_state_count,
+		.extra2 = &max_state_count
+	},
+	{
+		.ctl_name = NET_DECNET_DR_COUNT,
+		.procname = "dr_count",
+		.data = &decnet_dr_count,
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = &proc_dointvec_minmax,
+		.strategy = &sysctl_intvec,
+		.extra1 = &min_state_count,
+		.extra2 = &max_state_count
+	},
+	{
+		.ctl_name = NET_DECNET_DST_GC_INTERVAL,
+		.procname = "dst_gc_interval",
+		.data = &decnet_dst_gc_interval,
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = &proc_dointvec_minmax,
+		.strategy = &sysctl_intvec,
+		.extra1 = &min_decnet_dst_gc_interval,
+		.extra2 = &max_decnet_dst_gc_interval
+	},
+	{
+		.ctl_name = NET_DECNET_NO_FC_MAX_CWND,
+		.procname = "no_fc_max_cwnd",
+		.data = &decnet_no_fc_max_cwnd,
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = &proc_dointvec_minmax,
+		.strategy = &sysctl_intvec,
+		.extra1 = &min_decnet_no_fc_max_cwnd,
+		.extra2 = &max_decnet_no_fc_max_cwnd
+	},
+	{
+		.ctl_name = NET_DECNET_DEBUG_LEVEL,
+		.procname = "debug",
+		.data = &decnet_debug_level,
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = &proc_dointvec,
+		.strategy = &sysctl_intvec,
+	},
+	{0}
+};
+
+static ctl_table dn_dir_table[] = {
+	{
+		.ctl_name = NET_DECNET, 
+		.procname = "decnet", 
+		.mode = 0555, 
+		.child = dn_table},
+	{0}
+};
+
+static ctl_table dn_root_table[] = {
+	{
+		.ctl_name = CTL_NET, 
+		.procname = "net", 
+		.mode = 0555, 
+		.child = dn_dir_table
+	},
+	{0}
+};
+
+void dn_register_sysctl(void)
+{
+	dn_table_header = register_sysctl_table(dn_root_table, 1);
+}
+
+void dn_unregister_sysctl(void)
+{
+	unregister_sysctl_table(dn_table_header);
+}
+
+#else  /* CONFIG_SYSCTL */
+void dn_unregister_sysctl(void)
+{
+}
+void dn_register_sysctl(void)
+{
+}
+
+#endif
diff --git a/net/econet/Makefile b/net/econet/Makefile
new file mode 100644
index 00000000000..39f0a77abdb
--- /dev/null
+++ b/net/econet/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for Econet support code.
+#
+
+obj-$(CONFIG_ECONET) += econet.o
+
+econet-objs := af_econet.o
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
new file mode 100644
index 00000000000..de691e119e1
--- /dev/null
+++ b/net/econet/af_econet.c
@@ -0,0 +1,1129 @@
+/*
+ *	An implementation of the Acorn Econet and AUN protocols.
+ *	Philip Blundell <philb@gnu.org>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/route.h>
+#include <linux/inet.h>
+#include <linux/etherdevice.h>
+#include <linux/if_arp.h>
+#include <linux/wireless.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/inet_common.h>
+#include <linux/stat.h>
+#include <linux/init.h>
+#include <linux/if_ec.h>
+#include <net/udp.h>
+#include <net/ip.h>
+#include <linux/spinlock.h>
+#include <linux/rcupdate.h>
+#include <linux/bitops.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+static struct proto_ops econet_ops;
+static struct hlist_head econet_sklist;
+static DEFINE_RWLOCK(econet_lock);
+
+/* Since there are only 256 possible network numbers (or fewer, depends
+   how you count) it makes sense to use a simple lookup table. */
+static struct net_device *net2dev_map[256];
+
+#define EC_PORT_IP	0xd2
+
+#ifdef CONFIG_ECONET_AUNUDP
+static spinlock_t aun_queue_lock;
+static struct socket *udpsock;
+#define AUN_PORT	0x8000
+
+
+struct aunhdr
+{
+	unsigned char code;		/* AUN magic protocol byte */
+	unsigned char port;
+	unsigned char cb;
+	unsigned char pad;
+	unsigned long handle;
+};
+
+static unsigned long aun_seq;
+
+/* Queue of packets waiting to be transmitted. */
+static struct sk_buff_head aun_queue;
+static struct timer_list ab_cleanup_timer;
+
+#endif		/* CONFIG_ECONET_AUNUDP */
+
+/* Per-packet information */
+struct ec_cb
+{
+	struct sockaddr_ec sec;
+	unsigned long cookie;		/* Supplied by user. */
+#ifdef CONFIG_ECONET_AUNUDP
+	int done;
+	unsigned long seq;		/* Sequencing */
+	unsigned long timeout;		/* Timeout */
+	unsigned long start;		/* jiffies */
+#endif
+#ifdef CONFIG_ECONET_NATIVE
+	void (*sent)(struct sk_buff *, int result);
+#endif
+};
+
+static void econet_remove_socket(struct hlist_head *list, struct sock *sk)
+{
+	write_lock_bh(&econet_lock);
+	sk_del_node_init(sk);
+	write_unlock_bh(&econet_lock);
+}
+
+static void econet_insert_socket(struct hlist_head *list, struct sock *sk)
+{
+	write_lock_bh(&econet_lock);
+	sk_add_node(sk, list);
+	write_unlock_bh(&econet_lock);
+}
+
+/*
+ *	Pull a packet from our receive queue and hand it to the user.
+ *	If necessary we block.
+ */
+
+static int econet_recvmsg(struct kiocb *iocb, struct socket *sock,
+			  struct msghdr *msg, size_t len, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct sk_buff *skb;
+	size_t copied;
+	int err;
+
+	msg->msg_namelen = sizeof(struct sockaddr_ec);
+
+	/*
+	 *	Call the generic datagram receiver. This handles all sorts
+	 *	of horrible races and re-entrancy so we can forget about it
+	 *	in the protocol layers.
+	 *
+	 *	Now it will return ENETDOWN, if device have just gone down,
+	 *	but then it will block.
+	 */
+
+	skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
+
+	/*
+	 *	An error occurred so return it. Because skb_recv_datagram() 
+	 *	handles the blocking we don't see and worry about blocking
+	 *	retries.
+	 */
+
+	if(skb==NULL)
+		goto out;
+
+	/*
+	 *	You lose any data beyond the buffer you gave. If it worries a
+	 *	user program they can ask the device for its MTU anyway.
+	 */
+
+	copied = skb->len;
+	if (copied > len)
+	{
+		copied=len;
+		msg->msg_flags|=MSG_TRUNC;
+	}
+
+	/* We can't use skb_copy_datagram here */
+	err = memcpy_toiovec(msg->msg_iov, skb->data, copied);
+	if (err)
+		goto out_free;
+	sk->sk_stamp = skb->stamp;
+
+	if (msg->msg_name)
+		memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
+
+	/*
+	 *	Free or return the buffer as appropriate. Again this
+	 *	hides all the races and re-entrancy issues from us.
+	 */
+	err = copied;
+
+out_free:
+	skb_free_datagram(sk, skb);
+out:
+	return err;
+}
+
+/*
+ *	Bind an Econet socket.
+ */
+
+static int econet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+{
+	struct sockaddr_ec *sec = (struct sockaddr_ec *)uaddr;
+	struct sock *sk=sock->sk;
+	struct econet_sock *eo = ec_sk(sk);
+	
+	/*
+	 *	Check legality
+	 */
+	 
+	if (addr_len < sizeof(struct sockaddr_ec) ||
+	    sec->sec_family != AF_ECONET)
+		return -EINVAL;
+	
+	eo->cb	    = sec->cb;
+	eo->port    = sec->port;
+	eo->station = sec->addr.station;
+	eo->net	    = sec->addr.net;
+
+	return 0;
+}
+
+#if defined(CONFIG_ECONET_AUNUDP) || defined(CONFIG_ECONET_NATIVE)
+/*
+ *	Queue a transmit result for the user to be told about.
+ */
+
+static void tx_result(struct sock *sk, unsigned long cookie, int result)
+{
+	struct sk_buff *skb = alloc_skb(0, GFP_ATOMIC);
+	struct ec_cb *eb;
+	struct sockaddr_ec *sec;
+
+	if (skb == NULL)
+	{
+		printk(KERN_DEBUG "ec: memory squeeze, transmit result dropped.\n");
+		return;
+	}
+
+	eb = (struct ec_cb *)&skb->cb;
+	sec = (struct sockaddr_ec *)&eb->sec;
+	memset(sec, 0, sizeof(struct sockaddr_ec));
+	sec->cookie = cookie;
+	sec->type = ECTYPE_TRANSMIT_STATUS | result;
+	sec->sec_family = AF_ECONET;
+
+	if (sock_queue_rcv_skb(sk, skb) < 0)
+		kfree_skb(skb);
+}
+#endif
+
+#ifdef CONFIG_ECONET_NATIVE
+/*
+ *	Called by the Econet hardware driver when a packet transmit
+ *	has completed.  Tell the user.
+ */
+
+static void ec_tx_done(struct sk_buff *skb, int result)
+{
+	struct ec_cb *eb = (struct ec_cb *)&skb->cb;
+	tx_result(skb->sk, eb->cookie, result);
+}
+#endif
+
+/*
+ *	Send a packet.  We have to work out which device it's going out on
+ *	and hence whether to use real Econet or the UDP emulation.
+ */
+
+static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
+			  struct msghdr *msg, size_t len)
+{
+	struct sock *sk = sock->sk;
+	struct sockaddr_ec *saddr=(struct sockaddr_ec *)msg->msg_name;
+	struct net_device *dev;
+	struct ec_addr addr;
+	int err;
+	unsigned char port, cb;
+#if defined(CONFIG_ECONET_AUNUDP) || defined(CONFIG_ECONET_NATIVE)
+	struct sk_buff *skb;
+	struct ec_cb *eb;
+#endif
+#ifdef CONFIG_ECONET_AUNUDP
+	struct msghdr udpmsg;
+	struct iovec iov[msg->msg_iovlen+1];
+	struct aunhdr ah;
+	struct sockaddr_in udpdest;
+	__kernel_size_t size;
+	int i;
+	mm_segment_t oldfs;
+#endif
+		
+	/*
+	 *	Check the flags. 
+	 */
+
+	if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_CMSG_COMPAT)) 
+		return -EINVAL;
+
+	/*
+	 *	Get and verify the address. 
+	 */
+	 
+	if (saddr == NULL) {
+		struct econet_sock *eo = ec_sk(sk);
+
+		addr.station = eo->station;
+		addr.net     = eo->net;
+		port	     = eo->port;
+		cb	     = eo->cb;
+	} else {
+		if (msg->msg_namelen < sizeof(struct sockaddr_ec)) 
+			return -EINVAL;
+		addr.station = saddr->addr.station;
+		addr.net = saddr->addr.net;
+		port = saddr->port;
+		cb = saddr->cb;
+	}
+
+	/* Look for a device with the right network number. */
+	dev = net2dev_map[addr.net];
+
+	/* If not directly reachable, use some default */
+	if (dev == NULL)
+	{
+		dev = net2dev_map[0];
+		/* No interfaces at all? */
+		if (dev == NULL)
+			return -ENETDOWN;
+	}
+
+	if (len + 15 > dev->mtu)
+		return -EMSGSIZE;
+
+	if (dev->type == ARPHRD_ECONET)
+	{
+		/* Real hardware Econet.  We're not worthy etc. */
+#ifdef CONFIG_ECONET_NATIVE
+		unsigned short proto = 0;
+
+		dev_hold(dev);
+		
+		skb = sock_alloc_send_skb(sk, len+LL_RESERVED_SPACE(dev), 
+					  msg->msg_flags & MSG_DONTWAIT, &err);
+		if (skb==NULL)
+			goto out_unlock;
+		
+		skb_reserve(skb, LL_RESERVED_SPACE(dev));
+		skb->nh.raw = skb->data;
+		
+		eb = (struct ec_cb *)&skb->cb;
+		
+		/* BUG: saddr may be NULL */
+		eb->cookie = saddr->cookie;
+		eb->sec = *saddr;
+		eb->sent = ec_tx_done;
+
+		if (dev->hard_header) {
+			int res;
+			struct ec_framehdr *fh;
+			err = -EINVAL;
+			res = dev->hard_header(skb, dev, ntohs(proto), 
+					       &addr, NULL, len);
+			/* Poke in our control byte and
+			   port number.  Hack, hack.  */
+			fh = (struct ec_framehdr *)(skb->data);
+			fh->cb = cb;
+			fh->port = port;
+			if (sock->type != SOCK_DGRAM) {
+				skb->tail = skb->data;
+				skb->len = 0;
+			} else if (res < 0)
+				goto out_free;
+		}
+		
+		/* Copy the data. Returns -EFAULT on error */
+		err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
+		skb->protocol = proto;
+		skb->dev = dev;
+		skb->priority = sk->sk_priority;
+		if (err)
+			goto out_free;
+		
+		err = -ENETDOWN;
+		if (!(dev->flags & IFF_UP))
+			goto out_free;
+		
+		/*
+		 *	Now send it
+		 */
+		
+		dev_queue_xmit(skb);
+		dev_put(dev);
+		return(len);
+
+	out_free:
+		kfree_skb(skb);
+	out_unlock:
+		if (dev)
+			dev_put(dev);
+#else
+		err = -EPROTOTYPE;
+#endif
+		return err;
+	}
+
+#ifdef CONFIG_ECONET_AUNUDP
+	/* AUN virtual Econet. */
+
+	if (udpsock == NULL)
+		return -ENETDOWN;		/* No socket - can't send */
+	
+	/* Make up a UDP datagram and hand it off to some higher intellect. */
+
+	memset(&udpdest, 0, sizeof(udpdest));
+	udpdest.sin_family = AF_INET;
+	udpdest.sin_port = htons(AUN_PORT);
+
+	/* At the moment we use the stupid Acorn scheme of Econet address
+	   y.x maps to IP a.b.c.x.  This should be replaced with something
+	   more flexible and more aware of subnet masks.  */
+	{
+		struct in_device *idev;
+		unsigned long network = 0;
+
+		rcu_read_lock();
+		idev = __in_dev_get(dev);
+		if (idev) {
+			if (idev->ifa_list)
+				network = ntohl(idev->ifa_list->ifa_address) & 
+					0xffffff00;		/* !!! */
+		}
+		rcu_read_unlock();
+		udpdest.sin_addr.s_addr = htonl(network | addr.station);
+	}
+
+	ah.port = port;
+	ah.cb = cb & 0x7f;
+	ah.code = 2;		/* magic */
+	ah.pad = 0;
+
+	/* tack our header on the front of the iovec */
+	size = sizeof(struct aunhdr);
+	/*
+	 * XXX: that is b0rken.  We can't mix userland and kernel pointers
+	 * in iovec, since on a lot of platforms copy_from_user() will
+	 * *not* work with the kernel and userland ones at the same time,
+	 * regardless of what we do with set_fs().  And we are talking about
+	 * econet-over-ethernet here, so "it's only ARM anyway" doesn't
+	 * apply.  Any suggestions on fixing that code?		-- AV
+	 */
+	iov[0].iov_base = (void *)&ah;
+	iov[0].iov_len = size;
+	for (i = 0; i < msg->msg_iovlen; i++) {
+		void __user *base = msg->msg_iov[i].iov_base;
+		size_t len = msg->msg_iov[i].iov_len;
+		/* Check it now since we switch to KERNEL_DS later. */
+		if (!access_ok(VERIFY_READ, base, len))
+			return -EFAULT;
+		iov[i+1].iov_base = base;
+		iov[i+1].iov_len = len;
+		size += len;
+	}
+
+	/* Get a skbuff (no data, just holds our cb information) */
+	if ((skb = sock_alloc_send_skb(sk, 0, 
+			     msg->msg_flags & MSG_DONTWAIT, &err)) == NULL)
+		return err;
+
+	eb = (struct ec_cb *)&skb->cb;
+
+	eb->cookie = saddr->cookie;
+	eb->timeout = (5*HZ);
+	eb->start = jiffies;
+	ah.handle = aun_seq;
+	eb->seq = (aun_seq++);
+	eb->sec = *saddr;
+
+	skb_queue_tail(&aun_queue, skb);
+
+	udpmsg.msg_name = (void *)&udpdest;
+	udpmsg.msg_namelen = sizeof(udpdest);
+	udpmsg.msg_iov = &iov[0];
+	udpmsg.msg_iovlen = msg->msg_iovlen + 1;
+	udpmsg.msg_control = NULL;
+	udpmsg.msg_controllen = 0;
+	udpmsg.msg_flags=0;
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);	/* More privs :-) */
+	err = sock_sendmsg(udpsock, &udpmsg, size);
+	set_fs(oldfs);
+#else
+	err = -EPROTOTYPE;
+#endif
+	return err;
+}
+
+/*
+ *	Look up the address of a socket.
+ */
+
+static int econet_getname(struct socket *sock, struct sockaddr *uaddr,
+			  int *uaddr_len, int peer)
+{
+	struct sock *sk = sock->sk;
+	struct econet_sock *eo = ec_sk(sk);
+	struct sockaddr_ec *sec = (struct sockaddr_ec *)uaddr;
+
+	if (peer)
+		return -EOPNOTSUPP;
+
+	sec->sec_family	  = AF_ECONET;
+	sec->port	  = eo->port;
+	sec->addr.station = eo->station;
+	sec->addr.net	  = eo->net;
+
+	*uaddr_len = sizeof(*sec);
+	return 0;
+}
+
+static void econet_destroy_timer(unsigned long data)
+{
+	struct sock *sk=(struct sock *)data;
+
+	if (!atomic_read(&sk->sk_wmem_alloc) &&
+	    !atomic_read(&sk->sk_rmem_alloc)) {
+		sk_free(sk);
+		return;
+	}
+
+	sk->sk_timer.expires = jiffies + 10 * HZ;
+	add_timer(&sk->sk_timer);
+	printk(KERN_DEBUG "econet socket destroy delayed\n");
+}
+
+/*
+ *	Close an econet socket.
+ */
+
+static int econet_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+
+	if (!sk)
+		return 0;
+
+	econet_remove_socket(&econet_sklist, sk);
+
+	/*
+	 *	Now the socket is dead. No more input will appear.
+	 */
+
+	sk->sk_state_change(sk);	/* It is useless. Just for sanity. */
+
+	sock->sk = NULL;
+	sk->sk_socket = NULL;
+	sock_set_flag(sk, SOCK_DEAD);
+
+	/* Purge queues */
+
+	skb_queue_purge(&sk->sk_receive_queue);
+
+	if (atomic_read(&sk->sk_rmem_alloc) ||
+	    atomic_read(&sk->sk_wmem_alloc)) {
+		sk->sk_timer.data     = (unsigned long)sk;
+		sk->sk_timer.expires  = jiffies + HZ;
+		sk->sk_timer.function = econet_destroy_timer;
+		add_timer(&sk->sk_timer);
+		return 0;
+	}
+
+	sk_free(sk);
+	return 0;
+}
+
+static struct proto econet_proto = {
+	.name	  = "ECONET",
+	.owner	  = THIS_MODULE,
+	.obj_size = sizeof(struct econet_sock),
+};
+
+/*
+ *	Create an Econet socket
+ */
+
+static int econet_create(struct socket *sock, int protocol)
+{
+	struct sock *sk;
+	struct econet_sock *eo;
+	int err;
+
+	/* Econet only provides datagram services. */
+	if (sock->type != SOCK_DGRAM)
+		return -ESOCKTNOSUPPORT;
+
+	sock->state = SS_UNCONNECTED;
+
+	err = -ENOBUFS;
+	sk = sk_alloc(PF_ECONET, GFP_KERNEL, &econet_proto, 1);
+	if (sk == NULL)
+		goto out;
+
+	sk->sk_reuse = 1;
+	sock->ops = &econet_ops;
+	sock_init_data(sock, sk);
+
+	eo = ec_sk(sk);
+	sock_reset_flag(sk, SOCK_ZAPPED);
+	sk->sk_family = PF_ECONET;
+	eo->num = protocol;
+
+	econet_insert_socket(&econet_sklist, sk);
+	return(0);
+out:
+	return err;
+}
+
+/*
+ *	Handle Econet specific ioctls
+ */
+
+static int ec_dev_ioctl(struct socket *sock, unsigned int cmd, void __user *arg)
+{
+	struct ifreq ifr;
+	struct ec_device *edev;
+	struct net_device *dev;
+	struct sockaddr_ec *sec;
+
+	/*
+	 *	Fetch the caller's info block into kernel space
+	 */
+
+	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+		return -EFAULT;
+
+	if ((dev = dev_get_by_name(ifr.ifr_name)) == NULL) 
+		return -ENODEV;
+
+	sec = (struct sockaddr_ec *)&ifr.ifr_addr;
+
+	switch (cmd)
+	{
+	case SIOCSIFADDR:
+		edev = dev->ec_ptr;
+		if (edev == NULL)
+		{
+			/* Magic up a new one. */
+			edev = kmalloc(sizeof(struct ec_device), GFP_KERNEL);
+			if (edev == NULL) {
+				printk("af_ec: memory squeeze.\n");
+				dev_put(dev);
+				return -ENOMEM;
+			}
+			memset(edev, 0, sizeof(struct ec_device));
+			dev->ec_ptr = edev;
+		}
+		else
+			net2dev_map[edev->net] = NULL;
+		edev->station = sec->addr.station;
+		edev->net = sec->addr.net;
+		net2dev_map[sec->addr.net] = dev;
+		if (!net2dev_map[0])
+			net2dev_map[0] = dev;
+		dev_put(dev);
+		return 0;
+
+	case SIOCGIFADDR:
+		edev = dev->ec_ptr;
+		if (edev == NULL)
+		{
+			dev_put(dev);
+			return -ENODEV;
+		}
+		memset(sec, 0, sizeof(struct sockaddr_ec));
+		sec->addr.station = edev->station;
+		sec->addr.net = edev->net;
+		sec->sec_family = AF_ECONET;
+		dev_put(dev);
+		if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+			return -EFAULT;
+		return 0;
+	}
+
+	dev_put(dev);
+	return -EINVAL;
+}
+
+/*
+ *	Handle generic ioctls
+ */
+
+static int econet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	struct sock *sk = sock->sk;
+	void __user *argp = (void __user *)arg;
+
+	switch(cmd) {
+		case SIOCGSTAMP:
+			return sock_get_timestamp(sk, argp);
+
+		case SIOCSIFADDR:
+		case SIOCGIFADDR:
+			return ec_dev_ioctl(sock, cmd, argp);
+			break;
+
+		default:
+			return dev_ioctl(cmd, argp);
+	}
+	/*NOTREACHED*/
+	return 0;
+}
+
+static struct net_proto_family econet_family_ops = {
+	.family =	PF_ECONET,
+	.create =	econet_create,
+	.owner	=	THIS_MODULE,
+};
+
+static struct proto_ops SOCKOPS_WRAPPED(econet_ops) = {
+	.family =	PF_ECONET,
+	.owner =	THIS_MODULE,
+	.release =	econet_release,
+	.bind =		econet_bind,
+	.connect =	sock_no_connect,
+	.socketpair =	sock_no_socketpair,
+	.accept =	sock_no_accept,
+	.getname =	econet_getname, 
+	.poll =		datagram_poll,
+	.ioctl =	econet_ioctl,
+	.listen =	sock_no_listen,
+	.shutdown =	sock_no_shutdown,
+	.setsockopt =	sock_no_setsockopt,
+	.getsockopt =	sock_no_getsockopt,
+	.sendmsg =	econet_sendmsg,
+	.recvmsg =	econet_recvmsg,
+	.mmap =		sock_no_mmap,
+	.sendpage =	sock_no_sendpage,
+};
+
+#include <linux/smp_lock.h>
+SOCKOPS_WRAP(econet, PF_ECONET);
+
+#if defined(CONFIG_ECONET_AUNUDP) || defined(CONFIG_ECONET_NATIVE)
+/*
+ *	Find the listening socket, if any, for the given data.
+ */
+
+static struct sock *ec_listening_socket(unsigned char port, unsigned char
+				 station, unsigned char net)
+{
+	struct sock *sk;
+	struct hlist_node *node;
+
+	sk_for_each(sk, node, &econet_sklist) {
+		struct econet_sock *opt = ec_sk(sk);
+		if ((opt->port == port || opt->port == 0) && 
+		    (opt->station == station || opt->station == 0) &&
+		    (opt->net == net || opt->net == 0))
+			goto found;
+	}
+	sk = NULL;
+found:
+	return sk;
+}
+
+/*
+ *	Queue a received packet for a socket.
+ */
+
+static int ec_queue_packet(struct sock *sk, struct sk_buff *skb,
+			   unsigned char stn, unsigned char net,
+			   unsigned char cb, unsigned char port)
+{
+	struct ec_cb *eb = (struct ec_cb *)&skb->cb;
+	struct sockaddr_ec *sec = (struct sockaddr_ec *)&eb->sec;
+
+	memset(sec, 0, sizeof(struct sockaddr_ec));
+	sec->sec_family = AF_ECONET;
+	sec->type = ECTYPE_PACKET_RECEIVED;
+	sec->port = port;
+	sec->cb = cb;
+	sec->addr.net = net;
+	sec->addr.station = stn;
+
+	return sock_queue_rcv_skb(sk, skb);
+}
+#endif
+
+#ifdef CONFIG_ECONET_AUNUDP
+/*
+ *	Send an AUN protocol response. 
+ */
+
+static void aun_send_response(__u32 addr, unsigned long seq, int code, int cb)
+{
+	struct sockaddr_in sin = {
+		.sin_family = AF_INET,
+		.sin_port = htons(AUN_PORT),
+		.sin_addr = {.s_addr = addr}
+	};
+	struct aunhdr ah = {.code = code, .cb = cb, .handle = seq};
+	struct kvec iov = {.iov_base = (void *)&ah, .iov_len = sizeof(ah)};
+	struct msghdr udpmsg;
+	
+	udpmsg.msg_name = (void *)&sin;
+	udpmsg.msg_namelen = sizeof(sin);
+	udpmsg.msg_control = NULL;
+	udpmsg.msg_controllen = 0;
+	udpmsg.msg_flags=0;
+
+	kernel_sendmsg(udpsock, &udpmsg, &iov, 1, sizeof(ah));
+}
+
+
+/*
+ *	Handle incoming AUN packets.  Work out if anybody wants them,
+ *	and send positive or negative acknowledgements as appropriate.
+ */
+
+static void aun_incoming(struct sk_buff *skb, struct aunhdr *ah, size_t len)
+{
+	struct iphdr *ip = skb->nh.iph;
+	unsigned char stn = ntohl(ip->saddr) & 0xff;
+	struct sock *sk;
+	struct sk_buff *newskb;
+	struct ec_device *edev = skb->dev->ec_ptr;
+
+	if (! edev)
+		goto bad;
+
+	if ((sk = ec_listening_socket(ah->port, stn, edev->net)) == NULL)
+		goto bad;		/* Nobody wants it */
+
+	newskb = alloc_skb((len - sizeof(struct aunhdr) + 15) & ~15, 
+			   GFP_ATOMIC);
+	if (newskb == NULL)
+	{
+		printk(KERN_DEBUG "AUN: memory squeeze, dropping packet.\n");
+		/* Send nack and hope sender tries again */
+		goto bad;
+	}
+
+	memcpy(skb_put(newskb, len - sizeof(struct aunhdr)), (void *)(ah+1), 
+	       len - sizeof(struct aunhdr));
+
+	if (ec_queue_packet(sk, newskb, stn, edev->net, ah->cb, ah->port))
+	{
+		/* Socket is bankrupt. */
+		kfree_skb(newskb);
+		goto bad;
+	}
+
+	aun_send_response(ip->saddr, ah->handle, 3, 0);
+	return;
+
+bad:
+	aun_send_response(ip->saddr, ah->handle, 4, 0);
+}
+
+/*
+ *	Handle incoming AUN transmit acknowledgements.  If the sequence
+ *      number matches something in our backlog then kill it and tell
+ *	the user.  If the remote took too long to reply then we may have
+ *	dropped the packet already.
+ */
+
+static void aun_tx_ack(unsigned long seq, int result)
+{
+	struct sk_buff *skb;
+	unsigned long flags;
+	struct ec_cb *eb;
+
+	spin_lock_irqsave(&aun_queue_lock, flags);
+	skb = skb_peek(&aun_queue);
+	while (skb && skb != (struct sk_buff *)&aun_queue)
+	{
+		struct sk_buff *newskb = skb->next;
+		eb = (struct ec_cb *)&skb->cb;
+		if (eb->seq == seq)
+			goto foundit;
+
+		skb = newskb;
+	}
+	spin_unlock_irqrestore(&aun_queue_lock, flags);
+	printk(KERN_DEBUG "AUN: unknown sequence %ld\n", seq);
+	return;
+
+foundit:
+	tx_result(skb->sk, eb->cookie, result);
+	skb_unlink(skb);
+	spin_unlock_irqrestore(&aun_queue_lock, flags);
+	kfree_skb(skb);
+}
+
+/*
+ *	Deal with received AUN frames - sort out what type of thing it is
+ *	and hand it to the right function.
+ */
+
+static void aun_data_available(struct sock *sk, int slen)
+{
+	int err;
+	struct sk_buff *skb;
+	unsigned char *data;
+	struct aunhdr *ah;
+	struct iphdr *ip;
+	size_t len;
+
+	while ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL) {
+		if (err == -EAGAIN) {
+			printk(KERN_ERR "AUN: no data available?!");
+			return;
+		}
+		printk(KERN_DEBUG "AUN: recvfrom() error %d\n", -err);
+	}
+
+	data = skb->h.raw + sizeof(struct udphdr);
+	ah = (struct aunhdr *)data;
+	len = skb->len - sizeof(struct udphdr);
+	ip = skb->nh.iph;
+
+	switch (ah->code)
+	{
+	case 2:
+		aun_incoming(skb, ah, len);
+		break;
+	case 3:
+		aun_tx_ack(ah->handle, ECTYPE_TRANSMIT_OK);
+		break;
+	case 4:
+		aun_tx_ack(ah->handle, ECTYPE_TRANSMIT_NOT_LISTENING);
+		break;
+#if 0
+		/* This isn't quite right yet. */
+	case 5:
+		aun_send_response(ip->saddr, ah->handle, 6, ah->cb);
+		break;
+#endif
+	default:
+		printk(KERN_DEBUG "unknown AUN packet (type %d)\n", data[0]);
+	}
+
+	skb_free_datagram(sk, skb);
+}
+
+/*
+ *	Called by the timer to manage the AUN transmit queue.  If a packet
+ *	was sent to a dead or nonexistent host then we will never get an
+ *	acknowledgement back.  After a few seconds we need to spot this and
+ *	drop the packet.
+ */
+
+static void ab_cleanup(unsigned long h)
+{
+	struct sk_buff *skb;
+	unsigned long flags;
+
+	spin_lock_irqsave(&aun_queue_lock, flags);
+	skb = skb_peek(&aun_queue);
+	while (skb && skb != (struct sk_buff *)&aun_queue)
+	{
+		struct sk_buff *newskb = skb->next;
+		struct ec_cb *eb = (struct ec_cb *)&skb->cb;
+		if ((jiffies - eb->start) > eb->timeout)
+		{
+			tx_result(skb->sk, eb->cookie, 
+				  ECTYPE_TRANSMIT_NOT_PRESENT);
+			skb_unlink(skb);
+			kfree_skb(skb);
+		}
+		skb = newskb;
+	}
+	spin_unlock_irqrestore(&aun_queue_lock, flags);
+
+	mod_timer(&ab_cleanup_timer, jiffies + (HZ*2));
+}
+
+static int __init aun_udp_initialise(void)
+{
+	int error;
+	struct sockaddr_in sin;
+
+	skb_queue_head_init(&aun_queue);
+	spin_lock_init(&aun_queue_lock);
+	init_timer(&ab_cleanup_timer);
+	ab_cleanup_timer.expires = jiffies + (HZ*2);
+	ab_cleanup_timer.function = ab_cleanup;
+	add_timer(&ab_cleanup_timer);
+
+	memset(&sin, 0, sizeof(sin));
+	sin.sin_port = htons(AUN_PORT);
+
+	/* We can count ourselves lucky Acorn machines are too dim to
+	   speak IPv6. :-) */
+	if ((error = sock_create_kern(PF_INET, SOCK_DGRAM, 0, &udpsock)) < 0)
+	{
+		printk("AUN: socket error %d\n", -error);
+		return error;
+	}
+	
+	udpsock->sk->sk_reuse = 1;
+	udpsock->sk->sk_allocation = GFP_ATOMIC; /* we're going to call it
+						    from interrupts */
+	
+	error = udpsock->ops->bind(udpsock, (struct sockaddr *)&sin,
+				sizeof(sin));
+	if (error < 0)
+	{
+		printk("AUN: bind error %d\n", -error);
+		goto release;
+	}
+
+	udpsock->sk->sk_data_ready = aun_data_available;
+
+	return 0;
+
+release:
+	sock_release(udpsock);
+	udpsock = NULL;
+	return error;
+}
+#endif
+
+#ifdef CONFIG_ECONET_NATIVE
+
+/*
+ *	Receive an Econet frame from a device.
+ */
+
+static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
+{
+	struct ec_framehdr *hdr;
+	struct sock *sk;
+	struct ec_device *edev = dev->ec_ptr;
+
+	if (skb->pkt_type == PACKET_OTHERHOST)
+		goto drop;
+
+	if (!edev)
+		goto drop;
+
+	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
+		return NET_RX_DROP;
+
+	if (!pskb_may_pull(skb, sizeof(struct ec_framehdr)))
+		goto drop;
+
+	hdr = (struct ec_framehdr *) skb->data;
+
+	/* First check for encapsulated IP */
+	if (hdr->port == EC_PORT_IP) {
+		skb->protocol = htons(ETH_P_IP);
+		skb_pull(skb, sizeof(struct ec_framehdr));
+		netif_rx(skb);
+		return 0;
+	}
+
+	sk = ec_listening_socket(hdr->port, hdr->src_stn, hdr->src_net);
+	if (!sk)
+		goto drop;
+
+	if (ec_queue_packet(sk, skb, edev->net, hdr->src_stn, hdr->cb,
+			    hdr->port))
+		goto drop;
+
+	return 0;
+
+drop:
+	kfree_skb(skb);
+	return NET_RX_DROP;
+}
+
+static struct packet_type econet_packet_type = {
+	.type =		__constant_htons(ETH_P_ECONET),
+	.func =		econet_rcv,
+};
+
+static void econet_hw_initialise(void)
+{
+	dev_add_pack(&econet_packet_type);
+}
+
+#endif
+
+static int econet_notifier(struct notifier_block *this, unsigned long msg, void *data)
+{
+	struct net_device *dev = (struct net_device *)data;
+	struct ec_device *edev;
+
+	switch (msg) {
+	case NETDEV_UNREGISTER:
+		/* A device has gone down - kill any data we hold for it. */
+		edev = dev->ec_ptr;
+		if (edev)
+		{
+			if (net2dev_map[0] == dev)
+				net2dev_map[0] = NULL;
+			net2dev_map[edev->net] = NULL;
+			kfree(edev);
+			dev->ec_ptr = NULL;
+		}
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block econet_netdev_notifier = {
+	.notifier_call =econet_notifier,
+};
+
+static void __exit econet_proto_exit(void)
+{
+#ifdef CONFIG_ECONET_AUNUDP
+	del_timer(&ab_cleanup_timer);
+	if (udpsock)
+		sock_release(udpsock);
+#endif
+	unregister_netdevice_notifier(&econet_netdev_notifier);
+	sock_unregister(econet_family_ops.family);
+	proto_unregister(&econet_proto);
+}
+
+static int __init econet_proto_init(void)
+{
+	int err = proto_register(&econet_proto, 0);
+
+	if (err != 0)
+		goto out;
+	sock_register(&econet_family_ops);
+#ifdef CONFIG_ECONET_AUNUDP
+	spin_lock_init(&aun_queue_lock);
+	aun_udp_initialise();
+#endif
+#ifdef CONFIG_ECONET_NATIVE
+	econet_hw_initialise();
+#endif
+	register_netdevice_notifier(&econet_netdev_notifier);
+out:
+	return err;
+}
+
+module_init(econet_proto_init);
+module_exit(econet_proto_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETPROTO(PF_ECONET);
diff --git a/net/ethernet/Makefile b/net/ethernet/Makefile
new file mode 100644
index 00000000000..69b74a9a0fc
--- /dev/null
+++ b/net/ethernet/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile for the Linux Ethernet layer.
+#
+
+obj-y					+= eth.o
+obj-$(CONFIG_SYSCTL)			+= sysctl_net_ether.o
+obj-$(subst m,y,$(CONFIG_IPX))		+= pe2.o
+obj-$(subst m,y,$(CONFIG_ATALK))	+= pe2.o
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
new file mode 100644
index 00000000000..16c4234cbe1
--- /dev/null
+++ b/net/ethernet/eth.c
@@ -0,0 +1,308 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Ethernet-type device handling.
+ *
+ * Version:	@(#)eth.c	1.0.7	05/25/93
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Mark Evans, <evansmp@uhura.aston.ac.uk>
+ *		Florian  La Roche, <rzsfl@rz.uni-sb.de>
+ *		Alan Cox, <gw4pts@gw4pts.ampr.org>
+ * 
+ * Fixes:
+ *		Mr Linux	: Arp problems
+ *		Alan Cox	: Generic queue tidyup (very tiny here)
+ *		Alan Cox	: eth_header ntohs should be htons
+ *		Alan Cox	: eth_rebuild_header missing an htons and
+ *				  minor other things.
+ *		Tegge		: Arp bug fixes. 
+ *		Florian		: Removed many unnecessary functions, code cleanup
+ *				  and changes for new arp and skbuff.
+ *		Alan Cox	: Redid header building to reflect new format.
+ *		Alan Cox	: ARP only when compiled with CONFIG_INET
+ *		Greg Page	: 802.2 and SNAP stuff.
+ *		Alan Cox	: MAC layer pointers/new format.
+ *		Paul Gortmaker	: eth_copy_and_sum shouldn't csum padding.
+ *		Alan Cox	: Protect against forwarding explosions with
+ *				  older network drivers and IFF_ALLMULTI.
+ *	Christer Weinigel	: Better rebuild header message.
+ *             Andrew Morton    : 26Feb01: kill ether_setup() - use netdev_boot_setup().
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/ip.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/config.h>
+#include <linux/init.h>
+#include <net/dst.h>
+#include <net/arp.h>
+#include <net/sock.h>
+#include <net/ipv6.h>
+#include <net/ip.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/checksum.h>
+
+extern int __init netdev_boot_setup(char *str);
+
+__setup("ether=", netdev_boot_setup);
+
+/*
+ *	 Create the Ethernet MAC header for an arbitrary protocol layer 
+ *
+ *	saddr=NULL	means use device source address
+ *	daddr=NULL	means leave destination address (eg unresolved arp)
+ */
+
+int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
+	   void *daddr, void *saddr, unsigned len)
+{
+	struct ethhdr *eth = (struct ethhdr *)skb_push(skb,ETH_HLEN);
+
+	/* 
+	 *	Set the protocol type. For a packet of type ETH_P_802_3 we put the length
+	 *	in here instead. It is up to the 802.2 layer to carry protocol information.
+	 */
+	
+	if(type!=ETH_P_802_3) 
+		eth->h_proto = htons(type);
+	else
+		eth->h_proto = htons(len);
+
+	/*
+	 *	Set the source hardware address. 
+	 */
+	 
+	if(saddr)
+		memcpy(eth->h_source,saddr,dev->addr_len);
+	else
+		memcpy(eth->h_source,dev->dev_addr,dev->addr_len);
+
+	/*
+	 *	Anyway, the loopback-device should never use this function... 
+	 */
+
+	if (dev->flags & (IFF_LOOPBACK|IFF_NOARP)) 
+	{
+		memset(eth->h_dest, 0, dev->addr_len);
+		return ETH_HLEN;
+	}
+	
+	if(daddr)
+	{
+		memcpy(eth->h_dest,daddr,dev->addr_len);
+		return ETH_HLEN;
+	}
+	
+	return -ETH_HLEN;
+}
+
+
+/*
+ *	Rebuild the Ethernet MAC header. This is called after an ARP
+ *	(or in future other address resolution) has completed on this
+ *	sk_buff. We now let ARP fill in the other fields.
+ *
+ *	This routine CANNOT use cached dst->neigh!
+ *	Really, it is used only when dst->neigh is wrong.
+ */
+
+int eth_rebuild_header(struct sk_buff *skb)
+{
+	struct ethhdr *eth = (struct ethhdr *)skb->data;
+	struct net_device *dev = skb->dev;
+
+	switch (eth->h_proto)
+	{
+#ifdef CONFIG_INET
+	case __constant_htons(ETH_P_IP):
+ 		return arp_find(eth->h_dest, skb);
+#endif	
+	default:
+		printk(KERN_DEBUG
+		       "%s: unable to resolve type %X addresses.\n", 
+		       dev->name, (int)eth->h_proto);
+		
+		memcpy(eth->h_source, dev->dev_addr, dev->addr_len);
+		break;
+	}
+
+	return 0;
+}
+
+
+/*
+ *	Determine the packet's protocol ID. The rule here is that we 
+ *	assume 802.3 if the type field is short enough to be a length.
+ *	This is normal practice and works for any 'now in use' protocol.
+ */
+ 
+unsigned short eth_type_trans(struct sk_buff *skb, struct net_device *dev)
+{
+	struct ethhdr *eth;
+	unsigned char *rawp;
+	
+	skb->mac.raw=skb->data;
+	skb_pull(skb,ETH_HLEN);
+	eth = eth_hdr(skb);
+	skb->input_dev = dev;
+	
+	if(*eth->h_dest&1)
+	{
+		if(memcmp(eth->h_dest,dev->broadcast, ETH_ALEN)==0)
+			skb->pkt_type=PACKET_BROADCAST;
+		else
+			skb->pkt_type=PACKET_MULTICAST;
+	}
+	
+	/*
+	 *	This ALLMULTI check should be redundant by 1.4
+	 *	so don't forget to remove it.
+	 *
+	 *	Seems, you forgot to remove it. All silly devices
+	 *	seems to set IFF_PROMISC.
+	 */
+	 
+	else if(1 /*dev->flags&IFF_PROMISC*/)
+	{
+		if(memcmp(eth->h_dest,dev->dev_addr, ETH_ALEN))
+			skb->pkt_type=PACKET_OTHERHOST;
+	}
+	
+	if (ntohs(eth->h_proto) >= 1536)
+		return eth->h_proto;
+		
+	rawp = skb->data;
+	
+	/*
+	 *	This is a magic hack to spot IPX packets. Older Novell breaks
+	 *	the protocol design and runs IPX over 802.3 without an 802.2 LLC
+	 *	layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
+	 *	won't work for fault tolerant netware but does for the rest.
+	 */
+	if (*(unsigned short *)rawp == 0xFFFF)
+		return htons(ETH_P_802_3);
+		
+	/*
+	 *	Real 802.2 LLC
+	 */
+	return htons(ETH_P_802_2);
+}
+
+static int eth_header_parse(struct sk_buff *skb, unsigned char *haddr)
+{
+	struct ethhdr *eth = eth_hdr(skb);
+	memcpy(haddr, eth->h_source, ETH_ALEN);
+	return ETH_ALEN;
+}
+
+int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh)
+{
+	unsigned short type = hh->hh_type;
+	struct ethhdr *eth;
+	struct net_device *dev = neigh->dev;
+
+	eth = (struct ethhdr*)
+		(((u8*)hh->hh_data) + (HH_DATA_OFF(sizeof(*eth))));
+
+	if (type == __constant_htons(ETH_P_802_3))
+		return -1;
+
+	eth->h_proto = type;
+	memcpy(eth->h_source, dev->dev_addr, dev->addr_len);
+	memcpy(eth->h_dest, neigh->ha, dev->addr_len);
+	hh->hh_len = ETH_HLEN;
+	return 0;
+}
+
+/*
+ * Called by Address Resolution module to notify changes in address.
+ */
+
+void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev, unsigned char * haddr)
+{
+	memcpy(((u8*)hh->hh_data) + HH_DATA_OFF(sizeof(struct ethhdr)),
+	       haddr, dev->addr_len);
+}
+
+EXPORT_SYMBOL(eth_type_trans);
+
+static int eth_mac_addr(struct net_device *dev, void *p)
+{
+	struct sockaddr *addr=p;
+	if (netif_running(dev))
+		return -EBUSY;
+	memcpy(dev->dev_addr, addr->sa_data,dev->addr_len);
+	return 0;
+}
+
+static int eth_change_mtu(struct net_device *dev, int new_mtu)
+{
+	if ((new_mtu < 68) || (new_mtu > 1500))
+		return -EINVAL;
+	dev->mtu = new_mtu;
+	return 0;
+}
+
+/*
+ * Fill in the fields of the device structure with ethernet-generic values.
+ */
+void ether_setup(struct net_device *dev)
+{
+	dev->change_mtu		= eth_change_mtu;
+	dev->hard_header	= eth_header;
+	dev->rebuild_header 	= eth_rebuild_header;
+	dev->set_mac_address 	= eth_mac_addr;
+	dev->hard_header_cache	= eth_header_cache;
+	dev->header_cache_update= eth_header_cache_update;
+	dev->hard_header_parse	= eth_header_parse;
+
+	dev->type		= ARPHRD_ETHER;
+	dev->hard_header_len 	= ETH_HLEN;
+	dev->mtu		= 1500; /* eth_mtu */
+	dev->addr_len		= ETH_ALEN;
+	dev->tx_queue_len	= 1000;	/* Ethernet wants good queues */	
+	dev->flags		= IFF_BROADCAST|IFF_MULTICAST;
+	
+	memset(dev->broadcast,0xFF, ETH_ALEN);
+
+}
+EXPORT_SYMBOL(ether_setup);
+
+/**
+ * alloc_etherdev - Allocates and sets up an ethernet device
+ * @sizeof_priv: Size of additional driver-private structure to be allocated
+ *	for this ethernet device
+ *
+ * Fill in the fields of the device structure with ethernet-generic
+ * values. Basically does everything except registering the device.
+ *
+ * Constructs a new net device, complete with a private data area of
+ * size @sizeof_priv.  A 32-byte (not bit) alignment is enforced for
+ * this private data area.
+ */
+
+struct net_device *alloc_etherdev(int sizeof_priv)
+{
+	return alloc_netdev(sizeof_priv, "eth%d", ether_setup);
+}
+EXPORT_SYMBOL(alloc_etherdev);
diff --git a/net/ethernet/pe2.c b/net/ethernet/pe2.c
new file mode 100644
index 00000000000..98a494be603
--- /dev/null
+++ b/net/ethernet/pe2.c
@@ -0,0 +1,40 @@
+#include <linux/in.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+
+#include <net/datalink.h>
+
+static int pEII_request(struct datalink_proto *dl,
+			struct sk_buff *skb, unsigned char *dest_node)
+{
+	struct net_device *dev = skb->dev;
+
+	skb->protocol = htons(ETH_P_IPX);
+	if (dev->hard_header)
+		dev->hard_header(skb, dev, ETH_P_IPX,
+				 dest_node, NULL, skb->len);
+	return dev_queue_xmit(skb);
+}
+
+struct datalink_proto *make_EII_client(void)
+{
+	struct datalink_proto *proto = kmalloc(sizeof(*proto), GFP_ATOMIC);
+
+	if (proto) {
+		proto->header_length = 0;
+		proto->request = pEII_request;
+	}
+
+	return proto;
+}
+
+void destroy_EII_client(struct datalink_proto *dl)
+{
+	if (dl)
+		kfree(dl);
+}
+
+EXPORT_SYMBOL(destroy_EII_client);
+EXPORT_SYMBOL(make_EII_client);
diff --git a/net/ethernet/sysctl_net_ether.c b/net/ethernet/sysctl_net_ether.c
new file mode 100644
index 00000000000..b81a6d53234
--- /dev/null
+++ b/net/ethernet/sysctl_net_ether.c
@@ -0,0 +1,13 @@
+/* -*- linux-c -*-
+ * sysctl_net_ether.c: sysctl interface to net Ethernet subsystem.
+ *
+ * Begun April 1, 1996, Mike Shaver.
+ * Added /proc/sys/net/ether directory entry (empty =) ). [MS]
+ */
+
+#include <linux/mm.h>
+#include <linux/sysctl.h>
+
+ctl_table ether_table[] = {
+	{0}
+};
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
new file mode 100644
index 00000000000..6d3e8b1bd1f
--- /dev/null
+++ b/net/ipv4/Kconfig
@@ -0,0 +1,411 @@
+#
+# IP configuration
+#
+config IP_MULTICAST
+	bool "IP: multicasting"
+	depends on INET
+	help
+	  This is code for addressing several networked computers at once,
+	  enlarging your kernel by about 2 KB. You need multicasting if you
+	  intend to participate in the MBONE, a high bandwidth network on top
+	  of the Internet which carries audio and video broadcasts. More
+	  information about the MBONE is on the WWW at
+	  <http://www-itg.lbl.gov/mbone/>. Information about the multicast
+	  capabilities of the various network cards is contained in
+	  <file:Documentation/networking/multicast.txt>. For most people, it's
+	  safe to say N.
+
+config IP_ADVANCED_ROUTER
+	bool "IP: advanced router"
+	depends on INET
+	---help---
+	  If you intend to run your Linux box mostly as a router, i.e. as a
+	  computer that forwards and redistributes network packets, say Y; you
+	  will then be presented with several options that allow more precise
+	  control about the routing process.
+
+	  The answer to this question won't directly affect the kernel:
+	  answering N will just cause the configurator to skip all the
+	  questions about advanced routing.
+
+	  Note that your box can only act as a router if you enable IP
+	  forwarding in your kernel; you can do that by saying Y to "/proc
+	  file system support" and "Sysctl support" below and executing the
+	  line
+
+	  echo "1" > /proc/sys/net/ipv4/ip_forward
+
+	  at boot time after the /proc file system has been mounted.
+
+	  If you turn on IP forwarding, you will also get the rp_filter, which
+	  automatically rejects incoming packets if the routing table entry
+	  for their source address doesn't match the network interface they're
+	  arriving on. This has security advantages because it prevents the
+	  so-called IP spoofing, however it can pose problems if you use
+	  asymmetric routing (packets from you to a host take a different path
+	  than packets from that host to you) or if you operate a non-routing
+	  host which has several IP addresses on different interfaces. To turn
+	  rp_filter off use:
+
+	  echo 0 > /proc/sys/net/ipv4/conf/<device>/rp_filter
+	  or
+	  echo 0 > /proc/sys/net/ipv4/conf/all/rp_filter
+
+	  If unsure, say N here.
+
+config IP_MULTIPLE_TABLES
+	bool "IP: policy routing"
+	depends on IP_ADVANCED_ROUTER
+	---help---
+	  Normally, a router decides what to do with a received packet based
+	  solely on the packet's final destination address. If you say Y here,
+	  the Linux router will also be able to take the packet's source
+	  address into account. Furthermore, the TOS (Type-Of-Service) field
+	  of the packet can be used for routing decisions as well.
+
+	  If you are interested in this, please see the preliminary
+	  documentation at <http://www.compendium.com.ar/policy-routing.txt>
+	  and <ftp://post.tepkom.ru/pub/vol2/Linux/docs/advanced-routing.tex>.
+	  You will need supporting software from
+	  <ftp://ftp.tux.org/pub/net/ip-routing/>.
+
+	  If unsure, say N.
+
+config IP_ROUTE_FWMARK
+	bool "IP: use netfilter MARK value as routing key"
+	depends on IP_MULTIPLE_TABLES && NETFILTER
+	help
+	  If you say Y here, you will be able to specify different routes for
+	  packets with different mark values (see iptables(8), MARK target).
+
+config IP_ROUTE_MULTIPATH
+	bool "IP: equal cost multipath"
+	depends on IP_ADVANCED_ROUTER
+	help
+	  Normally, the routing tables specify a single action to be taken in
+	  a deterministic manner for a given packet. If you say Y here
+	  however, it becomes possible to attach several actions to a packet
+	  pattern, in effect specifying several alternative paths to travel
+	  for those packets. The router considers all these paths to be of
+	  equal "cost" and chooses one of them in a non-deterministic fashion
+	  if a matching packet arrives.
+
+config IP_ROUTE_MULTIPATH_CACHED
+	bool "IP: equal cost multipath with caching support (EXPERIMENTAL)"
+	depends on: IP_ROUTE_MULTIPATH
+	help
+	  Normally, equal cost multipath routing is not supported by the
+	  routing cache. If you say Y here, alternative routes are cached
+	  and on cache lookup a route is chosen in a configurable fashion.
+
+	  If unsure, say N.
+
+config IP_ROUTE_MULTIPATH_RR
+	tristate "MULTIPATH: round robin algorithm"
+	depends on IP_ROUTE_MULTIPATH_CACHED
+	help
+	  Mulitpath routes are chosen according to Round Robin
+
+config IP_ROUTE_MULTIPATH_RANDOM
+	tristate "MULTIPATH: random algorithm"
+	depends on IP_ROUTE_MULTIPATH_CACHED
+	help
+	  Multipath routes are chosen in a random fashion. Actually,
+	  there is no weight for a route. The advantage of this policy
+	  is that it is implemented stateless and therefore introduces only
+	  a very small delay.
+
+config IP_ROUTE_MULTIPATH_WRANDOM
+	tristate "MULTIPATH: weighted random algorithm"
+	depends on IP_ROUTE_MULTIPATH_CACHED
+	help
+	  Multipath routes are chosen in a weighted random fashion. 
+	  The per route weights are the weights visible via ip route 2. As the
+	  corresponding state management introduces some overhead routing delay
+	  is increased.
+
+config IP_ROUTE_MULTIPATH_DRR
+	tristate "MULTIPATH: interface round robin algorithm"
+	depends on IP_ROUTE_MULTIPATH_CACHED
+	help
+	  Connections are distributed in a round robin fashion over the
+	  available interfaces. This policy makes sense if the connections 
+	  should be primarily distributed on interfaces and not on routes. 
+
+config IP_ROUTE_VERBOSE
+	bool "IP: verbose route monitoring"
+	depends on IP_ADVANCED_ROUTER
+	help
+	  If you say Y here, which is recommended, then the kernel will print
+	  verbose messages regarding the routing, for example warnings about
+	  received packets which look strange and could be evidence of an
+	  attack or a misconfigured system somewhere. The information is
+	  handled by the klogd daemon which is responsible for kernel messages
+	  ("man klogd").
+
+config IP_PNP
+	bool "IP: kernel level autoconfiguration"
+	depends on INET
+	help
+	  This enables automatic configuration of IP addresses of devices and
+	  of the routing table during kernel boot, based on either information
+	  supplied on the kernel command line or by BOOTP or RARP protocols.
+	  You need to say Y only for diskless machines requiring network
+	  access to boot (in which case you want to say Y to "Root file system
+	  on NFS" as well), because all other machines configure the network
+	  in their startup scripts.
+
+config IP_PNP_DHCP
+	bool "IP: DHCP support"
+	depends on IP_PNP
+	---help---
+	  If you want your Linux box to mount its whole root file system (the
+	  one containing the directory /) from some other computer over the
+	  net via NFS and you want the IP address of your computer to be
+	  discovered automatically at boot time using the DHCP protocol (a
+	  special protocol designed for doing this job), say Y here. In case
+	  the boot ROM of your network card was designed for booting Linux and
+	  does DHCP itself, providing all necessary information on the kernel
+	  command line, you can say N here.
+
+	  If unsure, say Y. Note that if you want to use DHCP, a DHCP server
+	  must be operating on your network.  Read
+	  <file:Documentation/nfsroot.txt> for details.
+
+config IP_PNP_BOOTP
+	bool "IP: BOOTP support"
+	depends on IP_PNP
+	---help---
+	  If you want your Linux box to mount its whole root file system (the
+	  one containing the directory /) from some other computer over the
+	  net via NFS and you want the IP address of your computer to be
+	  discovered automatically at boot time using the BOOTP protocol (a
+	  special protocol designed for doing this job), say Y here. In case
+	  the boot ROM of your network card was designed for booting Linux and
+	  does BOOTP itself, providing all necessary information on the kernel
+	  command line, you can say N here. If unsure, say Y. Note that if you
+	  want to use BOOTP, a BOOTP server must be operating on your network.
+	  Read <file:Documentation/nfsroot.txt> for details.
+
+config IP_PNP_RARP
+	bool "IP: RARP support"
+	depends on IP_PNP
+	help
+	  If you want your Linux box to mount its whole root file system (the
+	  one containing the directory /) from some other computer over the
+	  net via NFS and you want the IP address of your computer to be
+	  discovered automatically at boot time using the RARP protocol (an
+	  older protocol which is being obsoleted by BOOTP and DHCP), say Y
+	  here. Note that if you want to use RARP, a RARP server must be
+	  operating on your network. Read <file:Documentation/nfsroot.txt> for
+	  details.
+
+# not yet ready..
+#   bool '    IP: ARP support' CONFIG_IP_PNP_ARP		
+config NET_IPIP
+	tristate "IP: tunneling"
+	depends on INET
+	select INET_TUNNEL
+	---help---
+	  Tunneling means encapsulating data of one protocol type within
+	  another protocol and sending it over a channel that understands the
+	  encapsulating protocol. This particular tunneling driver implements
+	  encapsulation of IP within IP, which sounds kind of pointless, but
+	  can be useful if you want to make your (or some other) machine
+	  appear on a different network than it physically is, or to use
+	  mobile-IP facilities (allowing laptops to seamlessly move between
+	  networks without changing their IP addresses).
+
+	  Saying Y to this option will produce two modules ( = code which can
+	  be inserted in and removed from the running kernel whenever you
+	  want). Most people won't need this and can say N.
+
+config NET_IPGRE
+	tristate "IP: GRE tunnels over IP"
+	depends on INET
+	select XFRM
+	help
+	  Tunneling means encapsulating data of one protocol type within
+	  another protocol and sending it over a channel that understands the
+	  encapsulating protocol. This particular tunneling driver implements
+	  GRE (Generic Routing Encapsulation) and at this time allows
+	  encapsulating of IPv4 or IPv6 over existing IPv4 infrastructure.
+	  This driver is useful if the other endpoint is a Cisco router: Cisco
+	  likes GRE much better than the other Linux tunneling driver ("IP
+	  tunneling" above). In addition, GRE allows multicast redistribution
+	  through the tunnel.
+
+config NET_IPGRE_BROADCAST
+	bool "IP: broadcast GRE over IP"
+	depends on IP_MULTICAST && NET_IPGRE
+	help
+	  One application of GRE/IP is to construct a broadcast WAN (Wide Area
+	  Network), which looks like a normal Ethernet LAN (Local Area
+	  Network), but can be distributed all over the Internet. If you want
+	  to do that, say Y here and to "IP multicast routing" below.
+
+config IP_MROUTE
+	bool "IP: multicast routing"
+	depends on IP_MULTICAST
+	help
+	  This is used if you want your machine to act as a router for IP
+	  packets that have several destination addresses. It is needed on the
+	  MBONE, a high bandwidth network on top of the Internet which carries
+	  audio and video broadcasts. In order to do that, you would most
+	  likely run the program mrouted. Information about the multicast
+	  capabilities of the various network cards is contained in
+	  <file:Documentation/networking/multicast.txt>. If you haven't heard
+	  about it, you don't need it.
+
+config IP_PIMSM_V1
+	bool "IP: PIM-SM version 1 support"
+	depends on IP_MROUTE
+	help
+	  Kernel side support for Sparse Mode PIM (Protocol Independent
+	  Multicast) version 1. This multicast routing protocol is used widely
+	  because Cisco supports it. You need special software to use it
+	  (pimd-v1). Please see <http://netweb.usc.edu/pim/> for more
+	  information about PIM.
+
+	  Say Y if you want to use PIM-SM v1. Note that you can say N here if
+	  you just want to use Dense Mode PIM.
+
+config IP_PIMSM_V2
+	bool "IP: PIM-SM version 2 support"
+	depends on IP_MROUTE
+	help
+	  Kernel side support for Sparse Mode PIM version 2. In order to use
+	  this, you need an experimental routing daemon supporting it (pimd or
+	  gated-5). This routing protocol is not used widely, so say N unless
+	  you want to play with it.
+
+config ARPD
+	bool "IP: ARP daemon support (EXPERIMENTAL)"
+	depends on INET && EXPERIMENTAL
+	---help---
+	  Normally, the kernel maintains an internal cache which maps IP
+	  addresses to hardware addresses on the local network, so that
+	  Ethernet/Token Ring/ etc. frames are sent to the proper address on
+	  the physical networking layer. For small networks having a few
+	  hundred directly connected hosts or less, keeping this address
+	  resolution (ARP) cache inside the kernel works well. However,
+	  maintaining an internal ARP cache does not work well for very large
+	  switched networks, and will use a lot of kernel memory if TCP/IP
+	  connections are made to many machines on the network.
+
+	  If you say Y here, the kernel's internal ARP cache will never grow
+	  to more than 256 entries (the oldest entries are expired in a LIFO
+	  manner) and communication will be attempted with the user space ARP
+	  daemon arpd. Arpd then answers the address resolution request either
+	  from its own cache or by asking the net.
+
+	  This code is experimental and also obsolete. If you want to use it,
+	  you need to find a version of the daemon arpd on the net somewhere,
+	  and you should also say Y to "Kernel/User network link driver",
+	  below. If unsure, say N.
+
+config SYN_COOKIES
+	bool "IP: TCP syncookie support (disabled per default)"
+	depends on INET
+	---help---
+	  Normal TCP/IP networking is open to an attack known as "SYN
+	  flooding". This denial-of-service attack prevents legitimate remote
+	  users from being able to connect to your computer during an ongoing
+	  attack and requires very little work from the attacker, who can
+	  operate from anywhere on the Internet.
+
+	  SYN cookies provide protection against this type of attack. If you
+	  say Y here, the TCP/IP stack will use a cryptographic challenge
+	  protocol known as "SYN cookies" to enable legitimate users to
+	  continue to connect, even when your machine is under attack. There
+	  is no need for the legitimate users to change their TCP/IP software;
+	  SYN cookies work transparently to them. For technical information
+	  about SYN cookies, check out <http://cr.yp.to/syncookies.html>.
+
+	  If you are SYN flooded, the source address reported by the kernel is
+	  likely to have been forged by the attacker; it is only reported as
+	  an aid in tracing the packets to their actual source and should not
+	  be taken as absolute truth.
+
+	  SYN cookies may prevent correct error reporting on clients when the
+	  server is really overloaded. If this happens frequently better turn
+	  them off.
+
+	  If you say Y here, note that SYN cookies aren't enabled by default;
+	  you can enable them by saying Y to "/proc file system support" and
+	  "Sysctl support" below and executing the command
+
+	  echo 1 >/proc/sys/net/ipv4/tcp_syncookies
+
+	  at boot time after the /proc file system has been mounted.
+
+	  If unsure, say N.
+
+config INET_AH
+	tristate "IP: AH transformation"
+	depends on INET
+	select XFRM
+	select CRYPTO
+	select CRYPTO_HMAC
+	select CRYPTO_MD5
+	select CRYPTO_SHA1
+	---help---
+	  Support for IPsec AH.
+
+	  If unsure, say Y.
+
+config INET_ESP
+	tristate "IP: ESP transformation"
+	depends on INET
+	select XFRM
+	select CRYPTO
+	select CRYPTO_HMAC
+	select CRYPTO_MD5
+	select CRYPTO_SHA1
+	select CRYPTO_DES
+	---help---
+	  Support for IPsec ESP.
+
+	  If unsure, say Y.
+
+config INET_IPCOMP
+	tristate "IP: IPComp transformation"
+	depends on INET
+	select XFRM
+	select INET_TUNNEL
+	select CRYPTO
+	select CRYPTO_DEFLATE
+	---help---
+	  Support for IP Payload Compression Protocol (IPComp) (RFC3173),
+	  typically needed for IPsec.
+	  
+	  If unsure, say Y.
+
+config INET_TUNNEL
+	tristate "IP: tunnel transformation"
+	depends on INET
+	select XFRM
+	---help---
+	  Support for generic IP tunnel transformation, which is required by
+	  the IP tunneling module as well as tunnel mode IPComp.
+	  
+	  If unsure, say Y.
+
+config IP_TCPDIAG
+	tristate "IP: TCP socket monitoring interface"
+	depends on INET
+	default y
+	---help---
+	  Support for TCP socket monitoring interface used by native Linux
+	  tools such as ss. ss is included in iproute2, currently downloadable
+	  at <http://developer.osdl.org/dev/iproute2>. If you want IPv6 support
+	  and have selected IPv6 as a module, you need to build this as a
+	  module too.
+	  
+	  If unsure, say Y.
+
+config IP_TCPDIAG_IPV6
+	def_bool (IP_TCPDIAG=y && IPV6=y) || (IP_TCPDIAG=m && IPV6)
+
+source "net/ipv4/ipvs/Kconfig"
+
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
new file mode 100644
index 00000000000..8b379627ebb
--- /dev/null
+++ b/net/ipv4/Makefile
@@ -0,0 +1,33 @@
+#
+# Makefile for the Linux TCP/IP (INET) layer.
+#
+
+obj-y     := utils.o route.o inetpeer.o protocol.o \
+	     ip_input.o ip_fragment.o ip_forward.o ip_options.o \
+	     ip_output.o ip_sockglue.o \
+	     tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o tcp_minisocks.o \
+	     datagram.o raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \
+	     sysctl_net_ipv4.o fib_frontend.o fib_semantics.o fib_hash.o
+
+obj-$(CONFIG_PROC_FS) += proc.o
+obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
+obj-$(CONFIG_IP_MROUTE) += ipmr.o
+obj-$(CONFIG_NET_IPIP) += ipip.o
+obj-$(CONFIG_NET_IPGRE) += ip_gre.o
+obj-$(CONFIG_SYN_COOKIES) += syncookies.o
+obj-$(CONFIG_INET_AH) += ah4.o
+obj-$(CONFIG_INET_ESP) += esp4.o
+obj-$(CONFIG_INET_IPCOMP) += ipcomp.o
+obj-$(CONFIG_INET_TUNNEL) += xfrm4_tunnel.o 
+obj-$(CONFIG_IP_PNP) += ipconfig.o
+obj-$(CONFIG_IP_ROUTE_MULTIPATH_RR) += multipath_rr.o
+obj-$(CONFIG_IP_ROUTE_MULTIPATH_RANDOM) += multipath_random.o
+obj-$(CONFIG_IP_ROUTE_MULTIPATH_WRANDOM) += multipath_wrandom.o
+obj-$(CONFIG_IP_ROUTE_MULTIPATH_DRR) += multipath_drr.o
+obj-$(CONFIG_NETFILTER)	+= netfilter/
+obj-$(CONFIG_IP_VS) += ipvs/
+obj-$(CONFIG_IP_TCPDIAG) += tcp_diag.o 
+obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o
+
+obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
+		      xfrm4_output.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
new file mode 100644
index 00000000000..c34dab67e46
--- /dev/null
+++ b/net/ipv4/af_inet.c
@@ -0,0 +1,1188 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		PF_INET protocol family socket handler.
+ *
+ * Version:	$Id: af_inet.c,v 1.137 2002/02/01 22:01:03 davem Exp $
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Florian La Roche, <flla@stud.uni-sb.de>
+ *		Alan Cox, <A.Cox@swansea.ac.uk>
+ *
+ * Changes (see also sock.c)
+ *
+ *		piggy,
+ *		Karl Knutson	:	Socket protocol table
+ *		A.N.Kuznetsov	:	Socket death error in accept().
+ *		John Richardson :	Fix non blocking error in connect()
+ *					so sockets that fail to connect
+ *					don't return -EINPROGRESS.
+ *		Alan Cox	:	Asynchronous I/O support
+ *		Alan Cox	:	Keep correct socket pointer on sock
+ *					structures
+ *					when accept() ed
+ *		Alan Cox	:	Semantics of SO_LINGER aren't state
+ *					moved to close when you look carefully.
+ *					With this fixed and the accept bug fixed
+ *					some RPC stuff seems happier.
+ *		Niibe Yutaka	:	4.4BSD style write async I/O
+ *		Alan Cox,
+ *		Tony Gale 	:	Fixed reuse semantics.
+ *		Alan Cox	:	bind() shouldn't abort existing but dead
+ *					sockets. Stops FTP netin:.. I hope.
+ *		Alan Cox	:	bind() works correctly for RAW sockets.
+ *					Note that FreeBSD at least was broken
+ *					in this respect so be careful with
+ *					compatibility tests...
+ *		Alan Cox	:	routing cache support
+ *		Alan Cox	:	memzero the socket structure for
+ *					compactness.
+ *		Matt Day	:	nonblock connect error handler
+ *		Alan Cox	:	Allow large numbers of pending sockets
+ *					(eg for big web sites), but only if
+ *					specifically application requested.
+ *		Alan Cox	:	New buffering throughout IP. Used
+ *					dumbly.
+ *		Alan Cox	:	New buffering now used smartly.
+ *		Alan Cox	:	BSD rather than common sense
+ *					interpretation of listen.
+ *		Germano Caronni	:	Assorted small races.
+ *		Alan Cox	:	sendmsg/recvmsg basic support.
+ *		Alan Cox	:	Only sendmsg/recvmsg now supported.
+ *		Alan Cox	:	Locked down bind (see security list).
+ *		Alan Cox	:	Loosened bind a little.
+ *		Mike McLagan	:	ADD/DEL DLCI Ioctls
+ *	Willy Konynenberg	:	Transparent proxying support.
+ *		David S. Miller	:	New socket lookup architecture.
+ *					Some other random speedups.
+ *		Cyrus Durgin	:	Cleaned up file for kmod hacks.
+ *		Andi Kleen	:	Fix inet_stream_connect TCP race.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/stat.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/netfilter_ipv4.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+#include <linux/smp_lock.h>
+#include <linux/inet.h>
+#include <linux/igmp.h>
+#include <linux/netdevice.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/arp.h>
+#include <net/route.h>
+#include <net/ip_fib.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/raw.h>
+#include <net/icmp.h>
+#include <net/ipip.h>
+#include <net/inet_common.h>
+#include <net/xfrm.h>
+#ifdef CONFIG_IP_MROUTE
+#include <linux/mroute.h>
+#endif
+
+DEFINE_SNMP_STAT(struct linux_mib, net_statistics);
+
+#ifdef INET_REFCNT_DEBUG
+atomic_t inet_sock_nr;
+#endif
+
+extern void ip_mc_drop_socket(struct sock *sk);
+
+/* The inetsw table contains everything that inet_create needs to
+ * build a new socket.
+ */
+static struct list_head inetsw[SOCK_MAX];
+static DEFINE_SPINLOCK(inetsw_lock);
+
+/* New destruction routine */
+
+void inet_sock_destruct(struct sock *sk)
+{
+	struct inet_sock *inet = inet_sk(sk);
+
+	__skb_queue_purge(&sk->sk_receive_queue);
+	__skb_queue_purge(&sk->sk_error_queue);
+
+	if (sk->sk_type == SOCK_STREAM && sk->sk_state != TCP_CLOSE) {
+		printk("Attempt to release TCP socket in state %d %p\n",
+		       sk->sk_state, sk);
+		return;
+	}
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		printk("Attempt to release alive inet socket %p\n", sk);
+		return;
+	}
+
+	BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
+	BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
+	BUG_TRAP(!sk->sk_wmem_queued);
+	BUG_TRAP(!sk->sk_forward_alloc);
+
+	if (inet->opt)
+		kfree(inet->opt);
+	dst_release(sk->sk_dst_cache);
+#ifdef INET_REFCNT_DEBUG
+	atomic_dec(&inet_sock_nr);
+	printk(KERN_DEBUG "INET socket %p released, %d are still alive\n",
+	       sk, atomic_read(&inet_sock_nr));
+#endif
+}
+
+/*
+ *	The routines beyond this point handle the behaviour of an AF_INET
+ *	socket object. Mostly it punts to the subprotocols of IP to do
+ *	the work.
+ */
+
+/*
+ *	Automatically bind an unbound socket.
+ */
+
+static int inet_autobind(struct sock *sk)
+{
+	struct inet_sock *inet;
+	/* We may need to bind the socket. */
+	lock_sock(sk);
+	inet = inet_sk(sk);
+	if (!inet->num) {
+		if (sk->sk_prot->get_port(sk, 0)) {
+			release_sock(sk);
+			return -EAGAIN;
+		}
+		inet->sport = htons(inet->num);
+	}
+	release_sock(sk);
+	return 0;
+}
+
+/*
+ *	Move a socket into listening state.
+ */
+int inet_listen(struct socket *sock, int backlog)
+{
+	struct sock *sk = sock->sk;
+	unsigned char old_state;
+	int err;
+
+	lock_sock(sk);
+
+	err = -EINVAL;
+	if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
+		goto out;
+
+	old_state = sk->sk_state;
+	if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN)))
+		goto out;
+
+	/* Really, if the socket is already in listen state
+	 * we can only allow the backlog to be adjusted.
+	 */
+	if (old_state != TCP_LISTEN) {
+		err = tcp_listen_start(sk);
+		if (err)
+			goto out;
+	}
+	sk->sk_max_ack_backlog = backlog;
+	err = 0;
+
+out:
+	release_sock(sk);
+	return err;
+}
+
+/*
+ *	Create an inet socket.
+ */
+
+static int inet_create(struct socket *sock, int protocol)
+{
+	struct sock *sk;
+	struct list_head *p;
+	struct inet_protosw *answer;
+	struct inet_sock *inet;
+	struct proto *answer_prot;
+	unsigned char answer_flags;
+	char answer_no_check;
+	int err;
+
+	sock->state = SS_UNCONNECTED;
+
+	/* Look for the requested type/protocol pair. */
+	answer = NULL;
+	rcu_read_lock();
+	list_for_each_rcu(p, &inetsw[sock->type]) {
+		answer = list_entry(p, struct inet_protosw, list);
+
+		/* Check the non-wild match. */
+		if (protocol == answer->protocol) {
+			if (protocol != IPPROTO_IP)
+				break;
+		} else {
+			/* Check for the two wild cases. */
+			if (IPPROTO_IP == protocol) {
+				protocol = answer->protocol;
+				break;
+			}
+			if (IPPROTO_IP == answer->protocol)
+				break;
+		}
+		answer = NULL;
+	}
+
+	err = -ESOCKTNOSUPPORT;
+	if (!answer)
+		goto out_rcu_unlock;
+	err = -EPERM;
+	if (answer->capability > 0 && !capable(answer->capability))
+		goto out_rcu_unlock;
+	err = -EPROTONOSUPPORT;
+	if (!protocol)
+		goto out_rcu_unlock;
+
+	sock->ops = answer->ops;
+	answer_prot = answer->prot;
+	answer_no_check = answer->no_check;
+	answer_flags = answer->flags;
+	rcu_read_unlock();
+
+	BUG_TRAP(answer_prot->slab != NULL);
+
+	err = -ENOBUFS;
+	sk = sk_alloc(PF_INET, GFP_KERNEL, answer_prot, 1);
+	if (sk == NULL)
+		goto out;
+
+	err = 0;
+	sk->sk_no_check = answer_no_check;
+	if (INET_PROTOSW_REUSE & answer_flags)
+		sk->sk_reuse = 1;
+
+	inet = inet_sk(sk);
+
+	if (SOCK_RAW == sock->type) {
+		inet->num = protocol;
+		if (IPPROTO_RAW == protocol)
+			inet->hdrincl = 1;
+	}
+
+	if (ipv4_config.no_pmtu_disc)
+		inet->pmtudisc = IP_PMTUDISC_DONT;
+	else
+		inet->pmtudisc = IP_PMTUDISC_WANT;
+
+	inet->id = 0;
+
+	sock_init_data(sock, sk);
+
+	sk->sk_destruct	   = inet_sock_destruct;
+	sk->sk_family	   = PF_INET;
+	sk->sk_protocol	   = protocol;
+	sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
+
+	inet->uc_ttl	= -1;
+	inet->mc_loop	= 1;
+	inet->mc_ttl	= 1;
+	inet->mc_index	= 0;
+	inet->mc_list	= NULL;
+
+#ifdef INET_REFCNT_DEBUG
+	atomic_inc(&inet_sock_nr);
+#endif
+
+	if (inet->num) {
+		/* It assumes that any protocol which allows
+		 * the user to assign a number at socket
+		 * creation time automatically
+		 * shares.
+		 */
+		inet->sport = htons(inet->num);
+		/* Add to protocol hash chains. */
+		sk->sk_prot->hash(sk);
+	}
+
+	if (sk->sk_prot->init) {
+		err = sk->sk_prot->init(sk);
+		if (err)
+			sk_common_release(sk);
+	}
+out:
+	return err;
+out_rcu_unlock:
+	rcu_read_unlock();
+	goto out;
+}
+
+
+/*
+ *	The peer socket should always be NULL (or else). When we call this
+ *	function we are destroying the object and from then on nobody
+ *	should refer to it.
+ */
+int inet_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+
+	if (sk) {
+		long timeout;
+
+		/* Applications forget to leave groups before exiting */
+		ip_mc_drop_socket(sk);
+
+		/* If linger is set, we don't return until the close
+		 * is complete.  Otherwise we return immediately. The
+		 * actually closing is done the same either way.
+		 *
+		 * If the close is due to the process exiting, we never
+		 * linger..
+		 */
+		timeout = 0;
+		if (sock_flag(sk, SOCK_LINGER) &&
+		    !(current->flags & PF_EXITING))
+			timeout = sk->sk_lingertime;
+		sock->sk = NULL;
+		sk->sk_prot->close(sk, timeout);
+	}
+	return 0;
+}
+
+/* It is off by default, see below. */
+int sysctl_ip_nonlocal_bind;
+
+int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+{
+	struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
+	struct sock *sk = sock->sk;
+	struct inet_sock *inet = inet_sk(sk);
+	unsigned short snum;
+	int chk_addr_ret;
+	int err;
+
+	/* If the socket has its own bind function then use it. (RAW) */
+	if (sk->sk_prot->bind) {
+		err = sk->sk_prot->bind(sk, uaddr, addr_len);
+		goto out;
+	}
+	err = -EINVAL;
+	if (addr_len < sizeof(struct sockaddr_in))
+		goto out;
+
+	chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr);
+
+	/* Not specified by any standard per-se, however it breaks too
+	 * many applications when removed.  It is unfortunate since
+	 * allowing applications to make a non-local bind solves
+	 * several problems with systems using dynamic addressing.
+	 * (ie. your servers still start up even if your ISDN link
+	 *  is temporarily down)
+	 */
+	err = -EADDRNOTAVAIL;
+	if (!sysctl_ip_nonlocal_bind &&
+	    !inet->freebind &&
+	    addr->sin_addr.s_addr != INADDR_ANY &&
+	    chk_addr_ret != RTN_LOCAL &&
+	    chk_addr_ret != RTN_MULTICAST &&
+	    chk_addr_ret != RTN_BROADCAST)
+		goto out;
+
+	snum = ntohs(addr->sin_port);
+	err = -EACCES;
+	if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
+		goto out;
+
+	/*      We keep a pair of addresses. rcv_saddr is the one
+	 *      used by hash lookups, and saddr is used for transmit.
+	 *
+	 *      In the BSD API these are the same except where it
+	 *      would be illegal to use them (multicast/broadcast) in
+	 *      which case the sending device address is used.
+	 */
+	lock_sock(sk);
+
+	/* Check these errors (active socket, double bind). */
+	err = -EINVAL;
+	if (sk->sk_state != TCP_CLOSE || inet->num)
+		goto out_release_sock;
+
+	inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr;
+	if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
+		inet->saddr = 0;  /* Use device */
+
+	/* Make sure we are allowed to bind here. */
+	if (sk->sk_prot->get_port(sk, snum)) {
+		inet->saddr = inet->rcv_saddr = 0;
+		err = -EADDRINUSE;
+		goto out_release_sock;
+	}
+
+	if (inet->rcv_saddr)
+		sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
+	if (snum)
+		sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
+	inet->sport = htons(inet->num);
+	inet->daddr = 0;
+	inet->dport = 0;
+	sk_dst_reset(sk);
+	err = 0;
+out_release_sock:
+	release_sock(sk);
+out:
+	return err;
+}
+
+int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr,
+		       int addr_len, int flags)
+{
+	struct sock *sk = sock->sk;
+
+	if (uaddr->sa_family == AF_UNSPEC)
+		return sk->sk_prot->disconnect(sk, flags);
+
+	if (!inet_sk(sk)->num && inet_autobind(sk))
+		return -EAGAIN;
+	return sk->sk_prot->connect(sk, (struct sockaddr *)uaddr, addr_len);
+}
+
+static long inet_wait_for_connect(struct sock *sk, long timeo)
+{
+	DEFINE_WAIT(wait);
+
+	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+
+	/* Basic assumption: if someone sets sk->sk_err, he _must_
+	 * change state of the socket from TCP_SYN_*.
+	 * Connect() does not allow to get error notifications
+	 * without closing the socket.
+	 */
+	while ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
+		release_sock(sk);
+		timeo = schedule_timeout(timeo);
+		lock_sock(sk);
+		if (signal_pending(current) || !timeo)
+			break;
+		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	}
+	finish_wait(sk->sk_sleep, &wait);
+	return timeo;
+}
+
+/*
+ *	Connect to a remote host. There is regrettably still a little
+ *	TCP 'magic' in here.
+ */
+int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
+			int addr_len, int flags)
+{
+	struct sock *sk = sock->sk;
+	int err;
+	long timeo;
+
+	lock_sock(sk);
+
+	if (uaddr->sa_family == AF_UNSPEC) {
+		err = sk->sk_prot->disconnect(sk, flags);
+		sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
+		goto out;
+	}
+
+	switch (sock->state) {
+	default:
+		err = -EINVAL;
+		goto out;
+	case SS_CONNECTED:
+		err = -EISCONN;
+		goto out;
+	case SS_CONNECTING:
+		err = -EALREADY;
+		/* Fall out of switch with err, set for this state */
+		break;
+	case SS_UNCONNECTED:
+		err = -EISCONN;
+		if (sk->sk_state != TCP_CLOSE)
+			goto out;
+
+		err = sk->sk_prot->connect(sk, uaddr, addr_len);
+		if (err < 0)
+			goto out;
+
+  		sock->state = SS_CONNECTING;
+
+		/* Just entered SS_CONNECTING state; the only
+		 * difference is that return value in non-blocking
+		 * case is EINPROGRESS, rather than EALREADY.
+		 */
+		err = -EINPROGRESS;
+		break;
+	}
+
+	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
+
+	if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
+		/* Error code is set above */
+		if (!timeo || !inet_wait_for_connect(sk, timeo))
+			goto out;
+
+		err = sock_intr_errno(timeo);
+		if (signal_pending(current))
+			goto out;
+	}
+
+	/* Connection was closed by RST, timeout, ICMP error
+	 * or another process disconnected us.
+	 */
+	if (sk->sk_state == TCP_CLOSE)
+		goto sock_error;
+
+	/* sk->sk_err may be not zero now, if RECVERR was ordered by user
+	 * and error was received after socket entered established state.
+	 * Hence, it is handled normally after connect() return successfully.
+	 */
+
+	sock->state = SS_CONNECTED;
+	err = 0;
+out:
+	release_sock(sk);
+	return err;
+
+sock_error:
+	err = sock_error(sk) ? : -ECONNABORTED;
+	sock->state = SS_UNCONNECTED;
+	if (sk->sk_prot->disconnect(sk, flags))
+		sock->state = SS_DISCONNECTING;
+	goto out;
+}
+
+/*
+ *	Accept a pending connection. The TCP layer now gives BSD semantics.
+ */
+
+int inet_accept(struct socket *sock, struct socket *newsock, int flags)
+{
+	struct sock *sk1 = sock->sk;
+	int err = -EINVAL;
+	struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err);
+
+	if (!sk2)
+		goto do_err;
+
+	lock_sock(sk2);
+
+	BUG_TRAP((1 << sk2->sk_state) &
+		 (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE));
+
+	sock_graft(sk2, newsock);
+
+	newsock->state = SS_CONNECTED;
+	err = 0;
+	release_sock(sk2);
+do_err:
+	return err;
+}
+
+
+/*
+ *	This does both peername and sockname.
+ */
+int inet_getname(struct socket *sock, struct sockaddr *uaddr,
+			int *uaddr_len, int peer)
+{
+	struct sock *sk		= sock->sk;
+	struct inet_sock *inet	= inet_sk(sk);
+	struct sockaddr_in *sin	= (struct sockaddr_in *)uaddr;
+
+	sin->sin_family = AF_INET;
+	if (peer) {
+		if (!inet->dport ||
+		    (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) &&
+		     peer == 1))
+			return -ENOTCONN;
+		sin->sin_port = inet->dport;
+		sin->sin_addr.s_addr = inet->daddr;
+	} else {
+		__u32 addr = inet->rcv_saddr;
+		if (!addr)
+			addr = inet->saddr;
+		sin->sin_port = inet->sport;
+		sin->sin_addr.s_addr = addr;
+	}
+	memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+	*uaddr_len = sizeof(*sin);
+	return 0;
+}
+
+int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
+		 size_t size)
+{
+	struct sock *sk = sock->sk;
+
+	/* We may need to bind the socket. */
+	if (!inet_sk(sk)->num && inet_autobind(sk))
+		return -EAGAIN;
+
+	return sk->sk_prot->sendmsg(iocb, sk, msg, size);
+}
+
+
+static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
+{
+	struct sock *sk = sock->sk;
+
+	/* We may need to bind the socket. */
+	if (!inet_sk(sk)->num && inet_autobind(sk))
+		return -EAGAIN;
+
+	if (sk->sk_prot->sendpage)
+		return sk->sk_prot->sendpage(sk, page, offset, size, flags);
+	return sock_no_sendpage(sock, page, offset, size, flags);
+}
+
+
+int inet_shutdown(struct socket *sock, int how)
+{
+	struct sock *sk = sock->sk;
+	int err = 0;
+
+	/* This should really check to make sure
+	 * the socket is a TCP socket. (WHY AC...)
+	 */
+	how++; /* maps 0->1 has the advantage of making bit 1 rcvs and
+		       1->2 bit 2 snds.
+		       2->3 */
+	if ((how & ~SHUTDOWN_MASK) || !how)	/* MAXINT->0 */
+		return -EINVAL;
+
+	lock_sock(sk);
+	if (sock->state == SS_CONNECTING) {
+		if ((1 << sk->sk_state) &
+		    (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE))
+			sock->state = SS_DISCONNECTING;
+		else
+			sock->state = SS_CONNECTED;
+	}
+
+	switch (sk->sk_state) {
+	case TCP_CLOSE:
+		err = -ENOTCONN;
+		/* Hack to wake up other listeners, who can poll for
+		   POLLHUP, even on eg. unconnected UDP sockets -- RR */
+	default:
+		sk->sk_shutdown |= how;
+		if (sk->sk_prot->shutdown)
+			sk->sk_prot->shutdown(sk, how);
+		break;
+
+	/* Remaining two branches are temporary solution for missing
+	 * close() in multithreaded environment. It is _not_ a good idea,
+	 * but we have no choice until close() is repaired at VFS level.
+	 */
+	case TCP_LISTEN:
+		if (!(how & RCV_SHUTDOWN))
+			break;
+		/* Fall through */
+	case TCP_SYN_SENT:
+		err = sk->sk_prot->disconnect(sk, O_NONBLOCK);
+		sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
+		break;
+	}
+
+	/* Wake up anyone sleeping in poll. */
+	sk->sk_state_change(sk);
+	release_sock(sk);
+	return err;
+}
+
+/*
+ *	ioctl() calls you can issue on an INET socket. Most of these are
+ *	device configuration and stuff and very rarely used. Some ioctls
+ *	pass on to the socket itself.
+ *
+ *	NOTE: I like the idea of a module for the config stuff. ie ifconfig
+ *	loads the devconfigure module does its configuring and unloads it.
+ *	There's a good 20K of config code hanging around the kernel.
+ */
+
+int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	struct sock *sk = sock->sk;
+	int err = 0;
+
+	switch (cmd) {
+		case SIOCGSTAMP:
+			err = sock_get_timestamp(sk, (struct timeval __user *)arg);
+			break;
+		case SIOCADDRT:
+		case SIOCDELRT:
+		case SIOCRTMSG:
+			err = ip_rt_ioctl(cmd, (void __user *)arg);
+			break;
+		case SIOCDARP:
+		case SIOCGARP:
+		case SIOCSARP:
+			err = arp_ioctl(cmd, (void __user *)arg);
+			break;
+		case SIOCGIFADDR:
+		case SIOCSIFADDR:
+		case SIOCGIFBRDADDR:
+		case SIOCSIFBRDADDR:
+		case SIOCGIFNETMASK:
+		case SIOCSIFNETMASK:
+		case SIOCGIFDSTADDR:
+		case SIOCSIFDSTADDR:
+		case SIOCSIFPFLAGS:
+		case SIOCGIFPFLAGS:
+		case SIOCSIFFLAGS:
+			err = devinet_ioctl(cmd, (void __user *)arg);
+			break;
+		default:
+			if (!sk->sk_prot->ioctl ||
+			    (err = sk->sk_prot->ioctl(sk, cmd, arg)) ==
+			    					-ENOIOCTLCMD)
+				err = dev_ioctl(cmd, (void __user *)arg);
+			break;
+	}
+	return err;
+}
+
+struct proto_ops inet_stream_ops = {
+	.family =	PF_INET,
+	.owner =	THIS_MODULE,
+	.release =	inet_release,
+	.bind =		inet_bind,
+	.connect =	inet_stream_connect,
+	.socketpair =	sock_no_socketpair,
+	.accept =	inet_accept,
+	.getname =	inet_getname,
+	.poll =		tcp_poll,
+	.ioctl =	inet_ioctl,
+	.listen =	inet_listen,
+	.shutdown =	inet_shutdown,
+	.setsockopt =	sock_common_setsockopt,
+	.getsockopt =	sock_common_getsockopt,
+	.sendmsg =	inet_sendmsg,
+	.recvmsg =	sock_common_recvmsg,
+	.mmap =		sock_no_mmap,
+	.sendpage =	tcp_sendpage
+};
+
+struct proto_ops inet_dgram_ops = {
+	.family =	PF_INET,
+	.owner =	THIS_MODULE,
+	.release =	inet_release,
+	.bind =		inet_bind,
+	.connect =	inet_dgram_connect,
+	.socketpair =	sock_no_socketpair,
+	.accept =	sock_no_accept,
+	.getname =	inet_getname,
+	.poll =		udp_poll,
+	.ioctl =	inet_ioctl,
+	.listen =	sock_no_listen,
+	.shutdown =	inet_shutdown,
+	.setsockopt =	sock_common_setsockopt,
+	.getsockopt =	sock_common_getsockopt,
+	.sendmsg =	inet_sendmsg,
+	.recvmsg =	sock_common_recvmsg,
+	.mmap =		sock_no_mmap,
+	.sendpage =	inet_sendpage,
+};
+
+/*
+ * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without
+ * udp_poll
+ */
+static struct proto_ops inet_sockraw_ops = {
+	.family =	PF_INET,
+	.owner =	THIS_MODULE,
+	.release =	inet_release,
+	.bind =		inet_bind,
+	.connect =	inet_dgram_connect,
+	.socketpair =	sock_no_socketpair,
+	.accept =	sock_no_accept,
+	.getname =	inet_getname,
+	.poll =		datagram_poll,
+	.ioctl =	inet_ioctl,
+	.listen =	sock_no_listen,
+	.shutdown =	inet_shutdown,
+	.setsockopt =	sock_common_setsockopt,
+	.getsockopt =	sock_common_getsockopt,
+	.sendmsg =	inet_sendmsg,
+	.recvmsg =	sock_common_recvmsg,
+	.mmap =		sock_no_mmap,
+	.sendpage =	inet_sendpage,
+};
+
+static struct net_proto_family inet_family_ops = {
+	.family = PF_INET,
+	.create = inet_create,
+	.owner	= THIS_MODULE,
+};
+
+
+extern void tcp_init(void);
+extern void tcp_v4_init(struct net_proto_family *);
+
+/* Upon startup we insert all the elements in inetsw_array[] into
+ * the linked list inetsw.
+ */
+static struct inet_protosw inetsw_array[] =
+{
+        {
+                .type =       SOCK_STREAM,
+                .protocol =   IPPROTO_TCP,
+                .prot =       &tcp_prot,
+                .ops =        &inet_stream_ops,
+                .capability = -1,
+                .no_check =   0,
+                .flags =      INET_PROTOSW_PERMANENT,
+        },
+
+        {
+                .type =       SOCK_DGRAM,
+                .protocol =   IPPROTO_UDP,
+                .prot =       &udp_prot,
+                .ops =        &inet_dgram_ops,
+                .capability = -1,
+                .no_check =   UDP_CSUM_DEFAULT,
+                .flags =      INET_PROTOSW_PERMANENT,
+       },
+        
+
+       {
+               .type =       SOCK_RAW,
+               .protocol =   IPPROTO_IP,	/* wild card */
+               .prot =       &raw_prot,
+               .ops =        &inet_sockraw_ops,
+               .capability = CAP_NET_RAW,
+               .no_check =   UDP_CSUM_DEFAULT,
+               .flags =      INET_PROTOSW_REUSE,
+       }
+};
+
+#define INETSW_ARRAY_LEN (sizeof(inetsw_array) / sizeof(struct inet_protosw))
+
+void inet_register_protosw(struct inet_protosw *p)
+{
+	struct list_head *lh;
+	struct inet_protosw *answer;
+	int protocol = p->protocol;
+	struct list_head *last_perm;
+
+	spin_lock_bh(&inetsw_lock);
+
+	if (p->type >= SOCK_MAX)
+		goto out_illegal;
+
+	/* If we are trying to override a permanent protocol, bail. */
+	answer = NULL;
+	last_perm = &inetsw[p->type];
+	list_for_each(lh, &inetsw[p->type]) {
+		answer = list_entry(lh, struct inet_protosw, list);
+
+		/* Check only the non-wild match. */
+		if (INET_PROTOSW_PERMANENT & answer->flags) {
+			if (protocol == answer->protocol)
+				break;
+			last_perm = lh;
+		}
+
+		answer = NULL;
+	}
+	if (answer)
+		goto out_permanent;
+
+	/* Add the new entry after the last permanent entry if any, so that
+	 * the new entry does not override a permanent entry when matched with
+	 * a wild-card protocol. But it is allowed to override any existing
+	 * non-permanent entry.  This means that when we remove this entry, the 
+	 * system automatically returns to the old behavior.
+	 */
+	list_add_rcu(&p->list, last_perm);
+out:
+	spin_unlock_bh(&inetsw_lock);
+
+	synchronize_net();
+
+	return;
+
+out_permanent:
+	printk(KERN_ERR "Attempt to override permanent protocol %d.\n",
+	       protocol);
+	goto out;
+
+out_illegal:
+	printk(KERN_ERR
+	       "Ignoring attempt to register invalid socket type %d.\n",
+	       p->type);
+	goto out;
+}
+
+void inet_unregister_protosw(struct inet_protosw *p)
+{
+	if (INET_PROTOSW_PERMANENT & p->flags) {
+		printk(KERN_ERR
+		       "Attempt to unregister permanent protocol %d.\n",
+		       p->protocol);
+	} else {
+		spin_lock_bh(&inetsw_lock);
+		list_del_rcu(&p->list);
+		spin_unlock_bh(&inetsw_lock);
+
+		synchronize_net();
+	}
+}
+
+#ifdef CONFIG_IP_MULTICAST
+static struct net_protocol igmp_protocol = {
+	.handler =	igmp_rcv,
+};
+#endif
+
+static struct net_protocol tcp_protocol = {
+	.handler =	tcp_v4_rcv,
+	.err_handler =	tcp_v4_err,
+	.no_policy =	1,
+};
+
+static struct net_protocol udp_protocol = {
+	.handler =	udp_rcv,
+	.err_handler =	udp_err,
+	.no_policy =	1,
+};
+
+static struct net_protocol icmp_protocol = {
+	.handler =	icmp_rcv,
+};
+
+static int __init init_ipv4_mibs(void)
+{
+	net_statistics[0] = alloc_percpu(struct linux_mib);
+	net_statistics[1] = alloc_percpu(struct linux_mib);
+	ip_statistics[0] = alloc_percpu(struct ipstats_mib);
+	ip_statistics[1] = alloc_percpu(struct ipstats_mib);
+	icmp_statistics[0] = alloc_percpu(struct icmp_mib);
+	icmp_statistics[1] = alloc_percpu(struct icmp_mib);
+	tcp_statistics[0] = alloc_percpu(struct tcp_mib);
+	tcp_statistics[1] = alloc_percpu(struct tcp_mib);
+	udp_statistics[0] = alloc_percpu(struct udp_mib);
+	udp_statistics[1] = alloc_percpu(struct udp_mib);
+	if (!
+	    (net_statistics[0] && net_statistics[1] && ip_statistics[0]
+	     && ip_statistics[1] && tcp_statistics[0] && tcp_statistics[1]
+	     && udp_statistics[0] && udp_statistics[1]))
+		return -ENOMEM;
+
+	(void) tcp_mib_init();
+
+	return 0;
+}
+
+static int ipv4_proc_init(void);
+extern void ipfrag_init(void);
+
+static int __init inet_init(void)
+{
+	struct sk_buff *dummy_skb;
+	struct inet_protosw *q;
+	struct list_head *r;
+	int rc = -EINVAL;
+
+	if (sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)) {
+		printk(KERN_CRIT "%s: panic\n", __FUNCTION__);
+		goto out;
+	}
+
+	rc = proto_register(&tcp_prot, 1);
+	if (rc)
+		goto out;
+
+	rc = proto_register(&udp_prot, 1);
+	if (rc)
+		goto out_unregister_tcp_proto;
+
+	rc = proto_register(&raw_prot, 1);
+	if (rc)
+		goto out_unregister_udp_proto;
+
+	/*
+	 *	Tell SOCKET that we are alive... 
+	 */
+
+  	(void)sock_register(&inet_family_ops);
+
+	/*
+	 *	Add all the base protocols.
+	 */
+
+	if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0)
+		printk(KERN_CRIT "inet_init: Cannot add ICMP protocol\n");
+	if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)
+		printk(KERN_CRIT "inet_init: Cannot add UDP protocol\n");
+	if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)
+		printk(KERN_CRIT "inet_init: Cannot add TCP protocol\n");
+#ifdef CONFIG_IP_MULTICAST
+	if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0)
+		printk(KERN_CRIT "inet_init: Cannot add IGMP protocol\n");
+#endif
+
+	/* Register the socket-side information for inet_create. */
+	for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r)
+		INIT_LIST_HEAD(r);
+
+	for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q)
+		inet_register_protosw(q);
+
+	/*
+	 *	Set the ARP module up
+	 */
+
+	arp_init();
+
+  	/*
+  	 *	Set the IP module up
+  	 */
+
+	ip_init();
+
+	tcp_v4_init(&inet_family_ops);
+
+	/* Setup TCP slab cache for open requests. */
+	tcp_init();
+
+
+	/*
+	 *	Set the ICMP layer up
+	 */
+
+	icmp_init(&inet_family_ops);
+
+	/*
+	 *	Initialise the multicast router
+	 */
+#if defined(CONFIG_IP_MROUTE)
+	ip_mr_init();
+#endif
+	/*
+	 *	Initialise per-cpu ipv4 mibs
+	 */ 
+
+	if(init_ipv4_mibs())
+		printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n"); ;
+	
+	ipv4_proc_init();
+
+	ipfrag_init();
+
+	rc = 0;
+out:
+	return rc;
+out_unregister_tcp_proto:
+	proto_unregister(&tcp_prot);
+out_unregister_udp_proto:
+	proto_unregister(&udp_prot);
+	goto out;
+}
+
+module_init(inet_init);
+
+/* ------------------------------------------------------------------------ */
+
+#ifdef CONFIG_PROC_FS
+extern int  fib_proc_init(void);
+extern void fib_proc_exit(void);
+extern int  ip_misc_proc_init(void);
+extern int  raw_proc_init(void);
+extern void raw_proc_exit(void);
+extern int  tcp4_proc_init(void);
+extern void tcp4_proc_exit(void);
+extern int  udp4_proc_init(void);
+extern void udp4_proc_exit(void);
+
+static int __init ipv4_proc_init(void)
+{
+	int rc = 0;
+
+	if (raw_proc_init())
+		goto out_raw;
+	if (tcp4_proc_init())
+		goto out_tcp;
+	if (udp4_proc_init())
+		goto out_udp;
+	if (fib_proc_init())
+		goto out_fib;
+	if (ip_misc_proc_init())
+		goto out_misc;
+out:
+	return rc;
+out_misc:
+	fib_proc_exit();
+out_fib:
+	udp4_proc_exit();
+out_udp:
+	tcp4_proc_exit();
+out_tcp:
+	raw_proc_exit();
+out_raw:
+	rc = -ENOMEM;
+	goto out;
+}
+
+#else /* CONFIG_PROC_FS */
+static int __init ipv4_proc_init(void)
+{
+	return 0;
+}
+#endif /* CONFIG_PROC_FS */
+
+MODULE_ALIAS_NETPROTO(PF_INET);
+
+EXPORT_SYMBOL(inet_accept);
+EXPORT_SYMBOL(inet_bind);
+EXPORT_SYMBOL(inet_dgram_connect);
+EXPORT_SYMBOL(inet_dgram_ops);
+EXPORT_SYMBOL(inet_getname);
+EXPORT_SYMBOL(inet_ioctl);
+EXPORT_SYMBOL(inet_listen);
+EXPORT_SYMBOL(inet_register_protosw);
+EXPORT_SYMBOL(inet_release);
+EXPORT_SYMBOL(inet_sendmsg);
+EXPORT_SYMBOL(inet_shutdown);
+EXPORT_SYMBOL(inet_sock_destruct);
+EXPORT_SYMBOL(inet_stream_connect);
+EXPORT_SYMBOL(inet_stream_ops);
+EXPORT_SYMBOL(inet_unregister_protosw);
+EXPORT_SYMBOL(net_statistics);
+
+#ifdef INET_REFCNT_DEBUG
+EXPORT_SYMBOL(inet_sock_nr);
+#endif
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
new file mode 100644
index 00000000000..0e98f2235b6
--- /dev/null
+++ b/net/ipv4/ah4.c
@@ -0,0 +1,335 @@
+#include <linux/config.h>
+#include <linux/module.h>
+#include <net/ip.h>
+#include <net/xfrm.h>
+#include <net/ah.h>
+#include <linux/crypto.h>
+#include <linux/pfkeyv2.h>
+#include <net/icmp.h>
+#include <asm/scatterlist.h>
+
+
+/* Clear mutable options and find final destination to substitute
+ * into IP header for icv calculation. Options are already checked
+ * for validity, so paranoia is not required. */
+
+static int ip_clear_mutable_options(struct iphdr *iph, u32 *daddr)
+{
+	unsigned char * optptr = (unsigned char*)(iph+1);
+	int  l = iph->ihl*4 - sizeof(struct iphdr);
+	int  optlen;
+
+	while (l > 0) {
+		switch (*optptr) {
+		case IPOPT_END:
+			return 0;
+		case IPOPT_NOOP:
+			l--;
+			optptr++;
+			continue;
+		}
+		optlen = optptr[1];
+		if (optlen<2 || optlen>l)
+			return -EINVAL;
+		switch (*optptr) {
+		case IPOPT_SEC:
+		case 0x85:	/* Some "Extended Security" crap. */
+		case 0x86:	/* Another "Commercial Security" crap. */
+		case IPOPT_RA:
+		case 0x80|21:	/* RFC1770 */
+			break;
+		case IPOPT_LSRR:
+		case IPOPT_SSRR:
+			if (optlen < 6)
+				return -EINVAL;
+			memcpy(daddr, optptr+optlen-4, 4);
+			/* Fall through */
+		default:
+			memset(optptr+2, 0, optlen-2);
+		}
+		l -= optlen;
+		optptr += optlen;
+	}
+	return 0;
+}
+
+static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+	int err;
+	struct iphdr *iph, *top_iph;
+	struct ip_auth_hdr *ah;
+	struct ah_data *ahp;
+	union {
+		struct iphdr	iph;
+		char 		buf[60];
+	} tmp_iph;
+
+	top_iph = skb->nh.iph;
+	iph = &tmp_iph.iph;
+
+	iph->tos = top_iph->tos;
+	iph->ttl = top_iph->ttl;
+	iph->frag_off = top_iph->frag_off;
+
+	if (top_iph->ihl != 5) {
+		iph->daddr = top_iph->daddr;
+		memcpy(iph+1, top_iph+1, top_iph->ihl*4 - sizeof(struct iphdr));
+		err = ip_clear_mutable_options(top_iph, &top_iph->daddr);
+		if (err)
+			goto error;
+	}
+
+	ah = (struct ip_auth_hdr *)((char *)top_iph+top_iph->ihl*4);
+	ah->nexthdr = top_iph->protocol;
+
+	top_iph->tos = 0;
+	top_iph->tot_len = htons(skb->len);
+	top_iph->frag_off = 0;
+	top_iph->ttl = 0;
+	top_iph->protocol = IPPROTO_AH;
+	top_iph->check = 0;
+
+	ahp = x->data;
+	ah->hdrlen  = (XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + 
+				   ahp->icv_trunc_len) >> 2) - 2;
+
+	ah->reserved = 0;
+	ah->spi = x->id.spi;
+	ah->seq_no = htonl(++x->replay.oseq);
+	ahp->icv(ahp, skb, ah->auth_data);
+
+	top_iph->tos = iph->tos;
+	top_iph->ttl = iph->ttl;
+	top_iph->frag_off = iph->frag_off;
+	if (top_iph->ihl != 5) {
+		top_iph->daddr = iph->daddr;
+		memcpy(top_iph+1, iph+1, top_iph->ihl*4 - sizeof(struct iphdr));
+	}
+
+	ip_send_check(top_iph);
+
+	err = 0;
+
+error:
+	return err;
+}
+
+static int ah_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
+{
+	int ah_hlen;
+	struct iphdr *iph;
+	struct ip_auth_hdr *ah;
+	struct ah_data *ahp;
+	char work_buf[60];
+
+	if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr)))
+		goto out;
+
+	ah = (struct ip_auth_hdr*)skb->data;
+	ahp = x->data;
+	ah_hlen = (ah->hdrlen + 2) << 2;
+	
+	if (ah_hlen != XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_full_len) &&
+	    ah_hlen != XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_trunc_len)) 
+		goto out;
+
+	if (!pskb_may_pull(skb, ah_hlen))
+		goto out;
+
+	/* We are going to _remove_ AH header to keep sockets happy,
+	 * so... Later this can change. */
+	if (skb_cloned(skb) &&
+	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+		goto out;
+
+	skb->ip_summed = CHECKSUM_NONE;
+
+	ah = (struct ip_auth_hdr*)skb->data;
+	iph = skb->nh.iph;
+
+	memcpy(work_buf, iph, iph->ihl*4);
+
+	iph->ttl = 0;
+	iph->tos = 0;
+	iph->frag_off = 0;
+	iph->check = 0;
+	if (iph->ihl != 5) {
+		u32 dummy;
+		if (ip_clear_mutable_options(iph, &dummy))
+			goto out;
+	}
+        {
+		u8 auth_data[MAX_AH_AUTH_LEN];
+		
+		memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
+		skb_push(skb, skb->data - skb->nh.raw);
+		ahp->icv(ahp, skb, ah->auth_data);
+		if (memcmp(ah->auth_data, auth_data, ahp->icv_trunc_len)) {
+			x->stats.integrity_failed++;
+			goto out;
+		}
+	}
+	((struct iphdr*)work_buf)->protocol = ah->nexthdr;
+	skb->nh.raw = skb_pull(skb, ah_hlen);
+	memcpy(skb->nh.raw, work_buf, iph->ihl*4);
+	skb->nh.iph->tot_len = htons(skb->len);
+	skb_pull(skb, skb->nh.iph->ihl*4);
+	skb->h.raw = skb->data;
+
+	return 0;
+
+out:
+	return -EINVAL;
+}
+
+static void ah4_err(struct sk_buff *skb, u32 info)
+{
+	struct iphdr *iph = (struct iphdr*)skb->data;
+	struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+(iph->ihl<<2));
+	struct xfrm_state *x;
+
+	if (skb->h.icmph->type != ICMP_DEST_UNREACH ||
+	    skb->h.icmph->code != ICMP_FRAG_NEEDED)
+		return;
+
+	x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET);
+	if (!x)
+		return;
+	printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n",
+	       ntohl(ah->spi), ntohl(iph->daddr));
+	xfrm_state_put(x);
+}
+
+static int ah_init_state(struct xfrm_state *x, void *args)
+{
+	struct ah_data *ahp = NULL;
+	struct xfrm_algo_desc *aalg_desc;
+
+	if (!x->aalg)
+		goto error;
+
+	/* null auth can use a zero length key */
+	if (x->aalg->alg_key_len > 512)
+		goto error;
+
+	if (x->encap)
+		goto error;
+
+	ahp = kmalloc(sizeof(*ahp), GFP_KERNEL);
+	if (ahp == NULL)
+		return -ENOMEM;
+
+	memset(ahp, 0, sizeof(*ahp));
+
+	ahp->key = x->aalg->alg_key;
+	ahp->key_len = (x->aalg->alg_key_len+7)/8;
+	ahp->tfm = crypto_alloc_tfm(x->aalg->alg_name, 0);
+	if (!ahp->tfm)
+		goto error;
+	ahp->icv = ah_hmac_digest;
+	
+	/*
+	 * Lookup the algorithm description maintained by xfrm_algo,
+	 * verify crypto transform properties, and store information
+	 * we need for AH processing.  This lookup cannot fail here
+	 * after a successful crypto_alloc_tfm().
+	 */
+	aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
+	BUG_ON(!aalg_desc);
+
+	if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
+	    crypto_tfm_alg_digestsize(ahp->tfm)) {
+		printk(KERN_INFO "AH: %s digestsize %u != %hu\n",
+		       x->aalg->alg_name, crypto_tfm_alg_digestsize(ahp->tfm),
+		       aalg_desc->uinfo.auth.icv_fullbits/8);
+		goto error;
+	}
+	
+	ahp->icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8;
+	ahp->icv_trunc_len = aalg_desc->uinfo.auth.icv_truncbits/8;
+	
+	BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN);
+	
+	ahp->work_icv = kmalloc(ahp->icv_full_len, GFP_KERNEL);
+	if (!ahp->work_icv)
+		goto error;
+	
+	x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_trunc_len);
+	if (x->props.mode)
+		x->props.header_len += sizeof(struct iphdr);
+	x->data = ahp;
+
+	return 0;
+
+error:
+	if (ahp) {
+		if (ahp->work_icv)
+			kfree(ahp->work_icv);
+		if (ahp->tfm)
+			crypto_free_tfm(ahp->tfm);
+		kfree(ahp);
+	}
+	return -EINVAL;
+}
+
+static void ah_destroy(struct xfrm_state *x)
+{
+	struct ah_data *ahp = x->data;
+
+	if (!ahp)
+		return;
+
+	if (ahp->work_icv) {
+		kfree(ahp->work_icv);
+		ahp->work_icv = NULL;
+	}
+	if (ahp->tfm) {
+		crypto_free_tfm(ahp->tfm);
+		ahp->tfm = NULL;
+	}
+	kfree(ahp);
+}
+
+
+static struct xfrm_type ah_type =
+{
+	.description	= "AH4",
+	.owner		= THIS_MODULE,
+	.proto	     	= IPPROTO_AH,
+	.init_state	= ah_init_state,
+	.destructor	= ah_destroy,
+	.input		= ah_input,
+	.output		= ah_output
+};
+
+static struct net_protocol ah4_protocol = {
+	.handler	=	xfrm4_rcv,
+	.err_handler	=	ah4_err,
+	.no_policy	=	1,
+};
+
+static int __init ah4_init(void)
+{
+	if (xfrm_register_type(&ah_type, AF_INET) < 0) {
+		printk(KERN_INFO "ip ah init: can't add xfrm type\n");
+		return -EAGAIN;
+	}
+	if (inet_add_protocol(&ah4_protocol, IPPROTO_AH) < 0) {
+		printk(KERN_INFO "ip ah init: can't add protocol\n");
+		xfrm_unregister_type(&ah_type, AF_INET);
+		return -EAGAIN;
+	}
+	return 0;
+}
+
+static void __exit ah4_fini(void)
+{
+	if (inet_del_protocol(&ah4_protocol, IPPROTO_AH) < 0)
+		printk(KERN_INFO "ip ah close: can't remove protocol\n");
+	if (xfrm_unregister_type(&ah_type, AF_INET) < 0)
+		printk(KERN_INFO "ip ah close: can't remove xfrm type\n");
+}
+
+module_init(ah4_init);
+module_exit(ah4_fini);
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
new file mode 100644
index 00000000000..a642fd61285
--- /dev/null
+++ b/net/ipv4/arp.c
@@ -0,0 +1,1425 @@
+/* linux/net/inet/arp.c
+ *
+ * Version:	$Id: arp.c,v 1.99 2001/08/30 22:55:42 davem Exp $
+ *
+ * Copyright (C) 1994 by Florian  La Roche
+ *
+ * This module implements the Address Resolution Protocol ARP (RFC 826),
+ * which is used to convert IP addresses (or in the future maybe other
+ * high-level addresses) into a low-level hardware address (like an Ethernet
+ * address).
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Fixes:
+ *		Alan Cox	:	Removed the Ethernet assumptions in 
+ *					Florian's code
+ *		Alan Cox	:	Fixed some small errors in the ARP 
+ *					logic
+ *		Alan Cox	:	Allow >4K in /proc
+ *		Alan Cox	:	Make ARP add its own protocol entry
+ *		Ross Martin     :       Rewrote arp_rcv() and arp_get_info()
+ *		Stephen Henson	:	Add AX25 support to arp_get_info()
+ *		Alan Cox	:	Drop data when a device is downed.
+ *		Alan Cox	:	Use init_timer().
+ *		Alan Cox	:	Double lock fixes.
+ *		Martin Seine	:	Move the arphdr structure
+ *					to if_arp.h for compatibility.
+ *					with BSD based programs.
+ *		Andrew Tridgell :       Added ARP netmask code and
+ *					re-arranged proxy handling.
+ *		Alan Cox	:	Changed to use notifiers.
+ *		Niibe Yutaka	:	Reply for this device or proxies only.
+ *		Alan Cox	:	Don't proxy across hardware types!
+ *		Jonathan Naylor :	Added support for NET/ROM.
+ *		Mike Shaver     :       RFC1122 checks.
+ *		Jonathan Naylor :	Only lookup the hardware address for
+ *					the correct hardware type.
+ *		Germano Caronni	:	Assorted subtle races.
+ *		Craig Schlenter :	Don't modify permanent entry 
+ *					during arp_rcv.
+ *		Russ Nelson	:	Tidied up a few bits.
+ *		Alexey Kuznetsov:	Major changes to caching and behaviour,
+ *					eg intelligent arp probing and 
+ *					generation
+ *					of host down events.
+ *		Alan Cox	:	Missing unlock in device events.
+ *		Eckes		:	ARP ioctl control errors.
+ *		Alexey Kuznetsov:	Arp free fix.
+ *		Manuel Rodriguez:	Gratuitous ARP.
+ *              Jonathan Layes  :       Added arpd support through kerneld 
+ *                                      message queue (960314)
+ *		Mike Shaver	:	/proc/sys/net/ipv4/arp_* support
+ *		Mike McLagan    :	Routing by source
+ *		Stuart Cheshire	:	Metricom and grat arp fixes
+ *					*** FOR 2.1 clean this up ***
+ *		Lawrence V. Stefani: (08/12/96) Added FDDI support.
+ *		Alan Cox 	:	Took the AP1000 nasty FDDI hack and
+ *					folded into the mainstream FDDI code.
+ *					Ack spit, Linus how did you allow that
+ *					one in...
+ *		Jes Sorensen	:	Make FDDI work again in 2.1.x and
+ *					clean up the APFDDI & gen. FDDI bits.
+ *		Alexey Kuznetsov:	new arp state machine;
+ *					now it is in net/core/neighbour.c.
+ *		Krzysztof Halasa:	Added Frame Relay ARP support.
+ *		Arnaldo C. Melo :	convert /proc/net/arp to seq_file
+ *		Shmulik Hen:		Split arp_send to arp_create and
+ *					arp_xmit so intermediate drivers like
+ *					bonding can change the skb before
+ *					sending (e.g. insert 8021q tag).
+ *		Harald Welte	:	convert to make use of jenkins hash
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/config.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/errno.h>
+#include <linux/in.h>
+#include <linux/mm.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/fddidevice.h>
+#include <linux/if_arp.h>
+#include <linux/trdevice.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/stat.h>
+#include <linux/init.h>
+#include <linux/net.h>
+#include <linux/rcupdate.h>
+#include <linux/jhash.h>
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/route.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <net/sock.h>
+#include <net/arp.h>
+#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+#include <net/ax25.h>
+#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
+#include <net/netrom.h>
+#endif
+#endif
+#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
+#include <net/atmclip.h>
+struct neigh_table *clip_tbl_hook;
+#endif
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include <linux/netfilter_arp.h>
+
+/*
+ *	Interface to generic neighbour cache.
+ */
+static u32 arp_hash(const void *pkey, const struct net_device *dev);
+static int arp_constructor(struct neighbour *neigh);
+static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb);
+static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb);
+static void parp_redo(struct sk_buff *skb);
+
+static struct neigh_ops arp_generic_ops = {
+	.family =		AF_INET,
+	.solicit =		arp_solicit,
+	.error_report =		arp_error_report,
+	.output =		neigh_resolve_output,
+	.connected_output =	neigh_connected_output,
+	.hh_output =		dev_queue_xmit,
+	.queue_xmit =		dev_queue_xmit,
+};
+
+static struct neigh_ops arp_hh_ops = {
+	.family =		AF_INET,
+	.solicit =		arp_solicit,
+	.error_report =		arp_error_report,
+	.output =		neigh_resolve_output,
+	.connected_output =	neigh_resolve_output,
+	.hh_output =		dev_queue_xmit,
+	.queue_xmit =		dev_queue_xmit,
+};
+
+static struct neigh_ops arp_direct_ops = {
+	.family =		AF_INET,
+	.output =		dev_queue_xmit,
+	.connected_output =	dev_queue_xmit,
+	.hh_output =		dev_queue_xmit,
+	.queue_xmit =		dev_queue_xmit,
+};
+
+struct neigh_ops arp_broken_ops = {
+	.family =		AF_INET,
+	.solicit =		arp_solicit,
+	.error_report =		arp_error_report,
+	.output =		neigh_compat_output,
+	.connected_output =	neigh_compat_output,
+	.hh_output =		dev_queue_xmit,
+	.queue_xmit =		dev_queue_xmit,
+};
+
+struct neigh_table arp_tbl = {
+	.family =	AF_INET,
+	.entry_size =	sizeof(struct neighbour) + 4,
+	.key_len =	4,
+	.hash =		arp_hash,
+	.constructor =	arp_constructor,
+	.proxy_redo =	parp_redo,
+	.id =		"arp_cache",
+	.parms = {
+		.tbl =			&arp_tbl,
+		.base_reachable_time =	30 * HZ,
+		.retrans_time =	1 * HZ,
+		.gc_staletime =	60 * HZ,
+		.reachable_time =		30 * HZ,
+		.delay_probe_time =	5 * HZ,
+		.queue_len =		3,
+		.ucast_probes =	3,
+		.mcast_probes =	3,
+		.anycast_delay =	1 * HZ,
+		.proxy_delay =		(8 * HZ) / 10,
+		.proxy_qlen =		64,
+		.locktime =		1 * HZ,
+	},
+	.gc_interval =	30 * HZ,
+	.gc_thresh1 =	128,
+	.gc_thresh2 =	512,
+	.gc_thresh3 =	1024,
+};
+
+int arp_mc_map(u32 addr, u8 *haddr, struct net_device *dev, int dir)
+{
+	switch (dev->type) {
+	case ARPHRD_ETHER:
+	case ARPHRD_FDDI:
+	case ARPHRD_IEEE802:
+		ip_eth_mc_map(addr, haddr);
+		return 0; 
+	case ARPHRD_IEEE802_TR:
+		ip_tr_mc_map(addr, haddr);
+		return 0;
+	case ARPHRD_INFINIBAND:
+		ip_ib_mc_map(addr, haddr);
+		return 0;
+	default:
+		if (dir) {
+			memcpy(haddr, dev->broadcast, dev->addr_len);
+			return 0;
+		}
+	}
+	return -EINVAL;
+}
+
+
+static u32 arp_hash(const void *pkey, const struct net_device *dev)
+{
+	return jhash_2words(*(u32 *)pkey, dev->ifindex, arp_tbl.hash_rnd);
+}
+
+static int arp_constructor(struct neighbour *neigh)
+{
+	u32 addr = *(u32*)neigh->primary_key;
+	struct net_device *dev = neigh->dev;
+	struct in_device *in_dev;
+	struct neigh_parms *parms;
+
+	neigh->type = inet_addr_type(addr);
+
+	rcu_read_lock();
+	in_dev = rcu_dereference(__in_dev_get(dev));
+	if (in_dev == NULL) {
+		rcu_read_unlock();
+		return -EINVAL;
+	}
+
+	parms = in_dev->arp_parms;
+	__neigh_parms_put(neigh->parms);
+	neigh->parms = neigh_parms_clone(parms);
+	rcu_read_unlock();
+
+	if (dev->hard_header == NULL) {
+		neigh->nud_state = NUD_NOARP;
+		neigh->ops = &arp_direct_ops;
+		neigh->output = neigh->ops->queue_xmit;
+	} else {
+		/* Good devices (checked by reading texts, but only Ethernet is
+		   tested)
+
+		   ARPHRD_ETHER: (ethernet, apfddi)
+		   ARPHRD_FDDI: (fddi)
+		   ARPHRD_IEEE802: (tr)
+		   ARPHRD_METRICOM: (strip)
+		   ARPHRD_ARCNET:
+		   etc. etc. etc.
+
+		   ARPHRD_IPDDP will also work, if author repairs it.
+		   I did not it, because this driver does not work even
+		   in old paradigm.
+		 */
+
+#if 1
+		/* So... these "amateur" devices are hopeless.
+		   The only thing, that I can say now:
+		   It is very sad that we need to keep ugly obsolete
+		   code to make them happy.
+
+		   They should be moved to more reasonable state, now
+		   they use rebuild_header INSTEAD OF hard_start_xmit!!!
+		   Besides that, they are sort of out of date
+		   (a lot of redundant clones/copies, useless in 2.1),
+		   I wonder why people believe that they work.
+		 */
+		switch (dev->type) {
+		default:
+			break;
+		case ARPHRD_ROSE:	
+#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+		case ARPHRD_AX25:
+#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
+		case ARPHRD_NETROM:
+#endif
+			neigh->ops = &arp_broken_ops;
+			neigh->output = neigh->ops->output;
+			return 0;
+#endif
+		;}
+#endif
+		if (neigh->type == RTN_MULTICAST) {
+			neigh->nud_state = NUD_NOARP;
+			arp_mc_map(addr, neigh->ha, dev, 1);
+		} else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) {
+			neigh->nud_state = NUD_NOARP;
+			memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
+		} else if (neigh->type == RTN_BROADCAST || dev->flags&IFF_POINTOPOINT) {
+			neigh->nud_state = NUD_NOARP;
+			memcpy(neigh->ha, dev->broadcast, dev->addr_len);
+		}
+		if (dev->hard_header_cache)
+			neigh->ops = &arp_hh_ops;
+		else
+			neigh->ops = &arp_generic_ops;
+		if (neigh->nud_state&NUD_VALID)
+			neigh->output = neigh->ops->connected_output;
+		else
+			neigh->output = neigh->ops->output;
+	}
+	return 0;
+}
+
+static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb)
+{
+	dst_link_failure(skb);
+	kfree_skb(skb);
+}
+
+static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
+{
+	u32 saddr = 0;
+	u8  *dst_ha = NULL;
+	struct net_device *dev = neigh->dev;
+	u32 target = *(u32*)neigh->primary_key;
+	int probes = atomic_read(&neigh->probes);
+	struct in_device *in_dev = in_dev_get(dev);
+
+	if (!in_dev)
+		return;
+
+	switch (IN_DEV_ARP_ANNOUNCE(in_dev)) {
+	default:
+	case 0:		/* By default announce any local IP */
+		if (skb && inet_addr_type(skb->nh.iph->saddr) == RTN_LOCAL)
+			saddr = skb->nh.iph->saddr;
+		break;
+	case 1:		/* Restrict announcements of saddr in same subnet */
+		if (!skb)
+			break;
+		saddr = skb->nh.iph->saddr;
+		if (inet_addr_type(saddr) == RTN_LOCAL) {
+			/* saddr should be known to target */
+			if (inet_addr_onlink(in_dev, target, saddr))
+				break;
+		}
+		saddr = 0;
+		break;
+	case 2:		/* Avoid secondary IPs, get a primary/preferred one */
+		break;
+	}
+
+	if (in_dev)
+		in_dev_put(in_dev);
+	if (!saddr)
+		saddr = inet_select_addr(dev, target, RT_SCOPE_LINK);
+
+	if ((probes -= neigh->parms->ucast_probes) < 0) {
+		if (!(neigh->nud_state&NUD_VALID))
+			printk(KERN_DEBUG "trying to ucast probe in NUD_INVALID\n");
+		dst_ha = neigh->ha;
+		read_lock_bh(&neigh->lock);
+	} else if ((probes -= neigh->parms->app_probes) < 0) {
+#ifdef CONFIG_ARPD
+		neigh_app_ns(neigh);
+#endif
+		return;
+	}
+
+	arp_send(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr,
+		 dst_ha, dev->dev_addr, NULL);
+	if (dst_ha)
+		read_unlock_bh(&neigh->lock);
+}
+
+static int arp_ignore(struct in_device *in_dev, struct net_device *dev,
+		      u32 sip, u32 tip)
+{
+	int scope;
+
+	switch (IN_DEV_ARP_IGNORE(in_dev)) {
+	case 0:	/* Reply, the tip is already validated */
+		return 0;
+	case 1:	/* Reply only if tip is configured on the incoming interface */
+		sip = 0;
+		scope = RT_SCOPE_HOST;
+		break;
+	case 2:	/*
+		 * Reply only if tip is configured on the incoming interface
+		 * and is in same subnet as sip
+		 */
+		scope = RT_SCOPE_HOST;
+		break;
+	case 3:	/* Do not reply for scope host addresses */
+		sip = 0;
+		scope = RT_SCOPE_LINK;
+		dev = NULL;
+		break;
+	case 4:	/* Reserved */
+	case 5:
+	case 6:
+	case 7:
+		return 0;
+	case 8:	/* Do not reply */
+		return 1;
+	default:
+		return 0;
+	}
+	return !inet_confirm_addr(dev, sip, tip, scope);
+}
+
+static int arp_filter(__u32 sip, __u32 tip, struct net_device *dev)
+{
+	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = sip,
+						 .saddr = tip } } };
+	struct rtable *rt;
+	int flag = 0; 
+	/*unsigned long now; */
+
+	if (ip_route_output_key(&rt, &fl) < 0) 
+		return 1;
+	if (rt->u.dst.dev != dev) { 
+		NET_INC_STATS_BH(LINUX_MIB_ARPFILTER);
+		flag = 1;
+	} 
+	ip_rt_put(rt); 
+	return flag; 
+} 
+
+/* OBSOLETE FUNCTIONS */
+
+/*
+ *	Find an arp mapping in the cache. If not found, post a request.
+ *
+ *	It is very UGLY routine: it DOES NOT use skb->dst->neighbour,
+ *	even if it exists. It is supposed that skb->dev was mangled
+ *	by a virtual device (eql, shaper). Nobody but broken devices
+ *	is allowed to use this function, it is scheduled to be removed. --ANK
+ */
+
+static int arp_set_predefined(int addr_hint, unsigned char * haddr, u32 paddr, struct net_device * dev)
+{
+	switch (addr_hint) {
+	case RTN_LOCAL:
+		printk(KERN_DEBUG "ARP: arp called for own IP address\n");
+		memcpy(haddr, dev->dev_addr, dev->addr_len);
+		return 1;
+	case RTN_MULTICAST:
+		arp_mc_map(paddr, haddr, dev, 1);
+		return 1;
+	case RTN_BROADCAST:
+		memcpy(haddr, dev->broadcast, dev->addr_len);
+		return 1;
+	}
+	return 0;
+}
+
+
+int arp_find(unsigned char *haddr, struct sk_buff *skb)
+{
+	struct net_device *dev = skb->dev;
+	u32 paddr;
+	struct neighbour *n;
+
+	if (!skb->dst) {
+		printk(KERN_DEBUG "arp_find is called with dst==NULL\n");
+		kfree_skb(skb);
+		return 1;
+	}
+
+	paddr = ((struct rtable*)skb->dst)->rt_gateway;
+
+	if (arp_set_predefined(inet_addr_type(paddr), haddr, paddr, dev))
+		return 0;
+
+	n = __neigh_lookup(&arp_tbl, &paddr, dev, 1);
+
+	if (n) {
+		n->used = jiffies;
+		if (n->nud_state&NUD_VALID || neigh_event_send(n, skb) == 0) {
+			read_lock_bh(&n->lock);
+ 			memcpy(haddr, n->ha, dev->addr_len);
+			read_unlock_bh(&n->lock);
+			neigh_release(n);
+			return 0;
+		}
+		neigh_release(n);
+	} else
+		kfree_skb(skb);
+	return 1;
+}
+
+/* END OF OBSOLETE FUNCTIONS */
+
+int arp_bind_neighbour(struct dst_entry *dst)
+{
+	struct net_device *dev = dst->dev;
+	struct neighbour *n = dst->neighbour;
+
+	if (dev == NULL)
+		return -EINVAL;
+	if (n == NULL) {
+		u32 nexthop = ((struct rtable*)dst)->rt_gateway;
+		if (dev->flags&(IFF_LOOPBACK|IFF_POINTOPOINT))
+			nexthop = 0;
+		n = __neigh_lookup_errno(
+#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
+		    dev->type == ARPHRD_ATM ? clip_tbl_hook :
+#endif
+		    &arp_tbl, &nexthop, dev);
+		if (IS_ERR(n))
+			return PTR_ERR(n);
+		dst->neighbour = n;
+	}
+	return 0;
+}
+
+/*
+ * Check if we can use proxy ARP for this path
+ */
+
+static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt)
+{
+	struct in_device *out_dev;
+	int imi, omi = -1;
+
+	if (!IN_DEV_PROXY_ARP(in_dev))
+		return 0;
+
+	if ((imi = IN_DEV_MEDIUM_ID(in_dev)) == 0)
+		return 1;
+	if (imi == -1)
+		return 0;
+
+	/* place to check for proxy_arp for routes */
+
+	if ((out_dev = in_dev_get(rt->u.dst.dev)) != NULL) {
+		omi = IN_DEV_MEDIUM_ID(out_dev);
+		in_dev_put(out_dev);
+	}
+	return (omi != imi && omi != -1);
+}
+
+/*
+ *	Interface to link layer: send routine and receive handler.
+ */
+
+/*
+ *	Create an arp packet. If (dest_hw == NULL), we create a broadcast
+ *	message.
+ */
+struct sk_buff *arp_create(int type, int ptype, u32 dest_ip,
+			   struct net_device *dev, u32 src_ip,
+			   unsigned char *dest_hw, unsigned char *src_hw,
+			   unsigned char *target_hw)
+{
+	struct sk_buff *skb;
+	struct arphdr *arp;
+	unsigned char *arp_ptr;
+
+	/*
+	 *	Allocate a buffer
+	 */
+	
+	skb = alloc_skb(sizeof(struct arphdr)+ 2*(dev->addr_len+4)
+				+ LL_RESERVED_SPACE(dev), GFP_ATOMIC);
+	if (skb == NULL)
+		return NULL;
+
+	skb_reserve(skb, LL_RESERVED_SPACE(dev));
+	skb->nh.raw = skb->data;
+	arp = (struct arphdr *) skb_put(skb,sizeof(struct arphdr) + 2*(dev->addr_len+4));
+	skb->dev = dev;
+	skb->protocol = htons(ETH_P_ARP);
+	if (src_hw == NULL)
+		src_hw = dev->dev_addr;
+	if (dest_hw == NULL)
+		dest_hw = dev->broadcast;
+
+	/*
+	 *	Fill the device header for the ARP frame
+	 */
+	if (dev->hard_header &&
+	    dev->hard_header(skb,dev,ptype,dest_hw,src_hw,skb->len) < 0)
+		goto out;
+
+	/*
+	 * Fill out the arp protocol part.
+	 *
+	 * The arp hardware type should match the device type, except for FDDI,
+	 * which (according to RFC 1390) should always equal 1 (Ethernet).
+	 */
+	/*
+	 *	Exceptions everywhere. AX.25 uses the AX.25 PID value not the
+	 *	DIX code for the protocol. Make these device structure fields.
+	 */
+	switch (dev->type) {
+	default:
+		arp->ar_hrd = htons(dev->type);
+		arp->ar_pro = htons(ETH_P_IP);
+		break;
+
+#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+	case ARPHRD_AX25:
+		arp->ar_hrd = htons(ARPHRD_AX25);
+		arp->ar_pro = htons(AX25_P_IP);
+		break;
+
+#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
+	case ARPHRD_NETROM:
+		arp->ar_hrd = htons(ARPHRD_NETROM);
+		arp->ar_pro = htons(AX25_P_IP);
+		break;
+#endif
+#endif
+
+#ifdef CONFIG_FDDI
+	case ARPHRD_FDDI:
+		arp->ar_hrd = htons(ARPHRD_ETHER);
+		arp->ar_pro = htons(ETH_P_IP);
+		break;
+#endif
+#ifdef CONFIG_TR
+	case ARPHRD_IEEE802_TR:
+		arp->ar_hrd = htons(ARPHRD_IEEE802);
+		arp->ar_pro = htons(ETH_P_IP);
+		break;
+#endif
+	}
+
+	arp->ar_hln = dev->addr_len;
+	arp->ar_pln = 4;
+	arp->ar_op = htons(type);
+
+	arp_ptr=(unsigned char *)(arp+1);
+
+	memcpy(arp_ptr, src_hw, dev->addr_len);
+	arp_ptr+=dev->addr_len;
+	memcpy(arp_ptr, &src_ip,4);
+	arp_ptr+=4;
+	if (target_hw != NULL)
+		memcpy(arp_ptr, target_hw, dev->addr_len);
+	else
+		memset(arp_ptr, 0, dev->addr_len);
+	arp_ptr+=dev->addr_len;
+	memcpy(arp_ptr, &dest_ip, 4);
+
+	return skb;
+
+out:
+	kfree_skb(skb);
+	return NULL;
+}
+
+/*
+ *	Send an arp packet.
+ */
+void arp_xmit(struct sk_buff *skb)
+{
+	/* Send it off, maybe filter it using firewalling first.  */
+	NF_HOOK(NF_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit);
+}
+
+/*
+ *	Create and send an arp packet.
+ */
+void arp_send(int type, int ptype, u32 dest_ip, 
+	      struct net_device *dev, u32 src_ip, 
+	      unsigned char *dest_hw, unsigned char *src_hw,
+	      unsigned char *target_hw)
+{
+	struct sk_buff *skb;
+
+	/*
+	 *	No arp on this interface.
+	 */
+	
+	if (dev->flags&IFF_NOARP)
+		return;
+
+	skb = arp_create(type, ptype, dest_ip, dev, src_ip,
+			 dest_hw, src_hw, target_hw);
+	if (skb == NULL) {
+		return;
+	}
+
+	arp_xmit(skb);
+}
+
+static void parp_redo(struct sk_buff *skb)
+{
+	nf_reset(skb);
+	arp_rcv(skb, skb->dev, NULL);
+}
+
+/*
+ *	Process an arp request.
+ */
+
+static int arp_process(struct sk_buff *skb)
+{
+	struct net_device *dev = skb->dev;
+	struct in_device *in_dev = in_dev_get(dev);
+	struct arphdr *arp;
+	unsigned char *arp_ptr;
+	struct rtable *rt;
+	unsigned char *sha, *tha;
+	u32 sip, tip;
+	u16 dev_type = dev->type;
+	int addr_type;
+	struct neighbour *n;
+
+	/* arp_rcv below verifies the ARP header and verifies the device
+	 * is ARP'able.
+	 */
+
+	if (in_dev == NULL)
+		goto out;
+
+	arp = skb->nh.arph;
+
+	switch (dev_type) {
+	default:	
+		if (arp->ar_pro != htons(ETH_P_IP) ||
+		    htons(dev_type) != arp->ar_hrd)
+			goto out;
+		break;
+#ifdef CONFIG_NET_ETHERNET
+	case ARPHRD_ETHER:
+#endif
+#ifdef CONFIG_TR
+	case ARPHRD_IEEE802_TR:
+#endif
+#ifdef CONFIG_FDDI
+	case ARPHRD_FDDI:
+#endif
+#ifdef CONFIG_NET_FC
+	case ARPHRD_IEEE802:
+#endif
+#if defined(CONFIG_NET_ETHERNET) || defined(CONFIG_TR) || \
+    defined(CONFIG_FDDI)	 || defined(CONFIG_NET_FC)
+		/*
+		 * ETHERNET, Token Ring and Fibre Channel (which are IEEE 802
+		 * devices, according to RFC 2625) devices will accept ARP
+		 * hardware types of either 1 (Ethernet) or 6 (IEEE 802.2).
+		 * This is the case also of FDDI, where the RFC 1390 says that
+		 * FDDI devices should accept ARP hardware of (1) Ethernet,
+		 * however, to be more robust, we'll accept both 1 (Ethernet)
+		 * or 6 (IEEE 802.2)
+		 */
+		if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
+		     arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
+		    arp->ar_pro != htons(ETH_P_IP))
+			goto out;
+		break;
+#endif
+#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+	case ARPHRD_AX25:
+		if (arp->ar_pro != htons(AX25_P_IP) ||
+		    arp->ar_hrd != htons(ARPHRD_AX25))
+			goto out;
+		break;
+#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
+	case ARPHRD_NETROM:
+		if (arp->ar_pro != htons(AX25_P_IP) ||
+		    arp->ar_hrd != htons(ARPHRD_NETROM))
+			goto out;
+		break;
+#endif
+#endif
+	}
+
+	/* Understand only these message types */
+
+	if (arp->ar_op != htons(ARPOP_REPLY) &&
+	    arp->ar_op != htons(ARPOP_REQUEST))
+		goto out;
+
+/*
+ *	Extract fields
+ */
+	arp_ptr= (unsigned char *)(arp+1);
+	sha	= arp_ptr;
+	arp_ptr += dev->addr_len;
+	memcpy(&sip, arp_ptr, 4);
+	arp_ptr += 4;
+	tha	= arp_ptr;
+	arp_ptr += dev->addr_len;
+	memcpy(&tip, arp_ptr, 4);
+/* 
+ *	Check for bad requests for 127.x.x.x and requests for multicast
+ *	addresses.  If this is one such, delete it.
+ */
+	if (LOOPBACK(tip) || MULTICAST(tip))
+		goto out;
+
+/*
+ *     Special case: We must set Frame Relay source Q.922 address
+ */
+	if (dev_type == ARPHRD_DLCI)
+		sha = dev->broadcast;
+
+/*
+ *  Process entry.  The idea here is we want to send a reply if it is a
+ *  request for us or if it is a request for someone else that we hold
+ *  a proxy for.  We want to add an entry to our cache if it is a reply
+ *  to us or if it is a request for our address.  
+ *  (The assumption for this last is that if someone is requesting our 
+ *  address, they are probably intending to talk to us, so it saves time 
+ *  if we cache their address.  Their address is also probably not in 
+ *  our cache, since ours is not in their cache.)
+ * 
+ *  Putting this another way, we only care about replies if they are to
+ *  us, in which case we add them to the cache.  For requests, we care
+ *  about those for us and those for our proxies.  We reply to both,
+ *  and in the case of requests for us we add the requester to the arp 
+ *  cache.
+ */
+
+	/* Special case: IPv4 duplicate address detection packet (RFC2131) */
+	if (sip == 0) {
+		if (arp->ar_op == htons(ARPOP_REQUEST) &&
+		    inet_addr_type(tip) == RTN_LOCAL &&
+		    !arp_ignore(in_dev,dev,sip,tip))
+			arp_send(ARPOP_REPLY,ETH_P_ARP,tip,dev,tip,sha,dev->dev_addr,dev->dev_addr);
+		goto out;
+	}
+
+	if (arp->ar_op == htons(ARPOP_REQUEST) &&
+	    ip_route_input(skb, tip, sip, 0, dev) == 0) {
+
+		rt = (struct rtable*)skb->dst;
+		addr_type = rt->rt_type;
+
+		if (addr_type == RTN_LOCAL) {
+			n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
+			if (n) {
+				int dont_send = 0;
+
+				if (!dont_send)
+					dont_send |= arp_ignore(in_dev,dev,sip,tip);
+				if (!dont_send && IN_DEV_ARPFILTER(in_dev))
+					dont_send |= arp_filter(sip,tip,dev); 
+				if (!dont_send)
+					arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha);
+
+				neigh_release(n);
+			}
+			goto out;
+		} else if (IN_DEV_FORWARD(in_dev)) {
+			if ((rt->rt_flags&RTCF_DNAT) ||
+			    (addr_type == RTN_UNICAST  && rt->u.dst.dev != dev &&
+			     (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, &tip, dev, 0)))) {
+				n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
+				if (n)
+					neigh_release(n);
+
+				if (skb->stamp.tv_sec == LOCALLY_ENQUEUED || 
+				    skb->pkt_type == PACKET_HOST ||
+				    in_dev->arp_parms->proxy_delay == 0) {
+					arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha);
+				} else {
+					pneigh_enqueue(&arp_tbl, in_dev->arp_parms, skb);
+					in_dev_put(in_dev);
+					return 0;
+				}
+				goto out;
+			}
+		}
+	}
+
+	/* Update our ARP tables */
+
+	n = __neigh_lookup(&arp_tbl, &sip, dev, 0);
+
+#ifdef CONFIG_IP_ACCEPT_UNSOLICITED_ARP
+	/* Unsolicited ARP is not accepted by default.
+	   It is possible, that this option should be enabled for some
+	   devices (strip is candidate)
+	 */
+	if (n == NULL &&
+	    arp->ar_op == htons(ARPOP_REPLY) &&
+	    inet_addr_type(sip) == RTN_UNICAST)
+		n = __neigh_lookup(&arp_tbl, &sip, dev, -1);
+#endif
+
+	if (n) {
+		int state = NUD_REACHABLE;
+		int override;
+
+		/* If several different ARP replies follows back-to-back,
+		   use the FIRST one. It is possible, if several proxy
+		   agents are active. Taking the first reply prevents
+		   arp trashing and chooses the fastest router.
+		 */
+		override = time_after(jiffies, n->updated + n->parms->locktime);
+
+		/* Broadcast replies and request packets
+		   do not assert neighbour reachability.
+		 */
+		if (arp->ar_op != htons(ARPOP_REPLY) ||
+		    skb->pkt_type != PACKET_HOST)
+			state = NUD_STALE;
+		neigh_update(n, sha, state, override ? NEIGH_UPDATE_F_OVERRIDE : 0);
+		neigh_release(n);
+	}
+
+out:
+	if (in_dev)
+		in_dev_put(in_dev);
+	kfree_skb(skb);
+	return 0;
+}
+
+
+/*
+ *	Receive an arp request from the device layer.
+ */
+
+int arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
+{
+	struct arphdr *arp;
+
+	/* ARP header, plus 2 device addresses, plus 2 IP addresses.  */
+	if (!pskb_may_pull(skb, (sizeof(struct arphdr) +
+				 (2 * dev->addr_len) +
+				 (2 * sizeof(u32)))))
+		goto freeskb;
+
+	arp = skb->nh.arph;
+	if (arp->ar_hln != dev->addr_len ||
+	    dev->flags & IFF_NOARP ||
+	    skb->pkt_type == PACKET_OTHERHOST ||
+	    skb->pkt_type == PACKET_LOOPBACK ||
+	    arp->ar_pln != 4)
+		goto freeskb;
+
+	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
+		goto out_of_mem;
+
+	return NF_HOOK(NF_ARP, NF_ARP_IN, skb, dev, NULL, arp_process);
+
+freeskb:
+	kfree_skb(skb);
+out_of_mem:
+	return 0;
+}
+
+/*
+ *	User level interface (ioctl)
+ */
+
+/*
+ *	Set (create) an ARP cache entry.
+ */
+
+static int arp_req_set(struct arpreq *r, struct net_device * dev)
+{
+	u32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr;
+	struct neighbour *neigh;
+	int err;
+
+	if (r->arp_flags&ATF_PUBL) {
+		u32 mask = ((struct sockaddr_in *) &r->arp_netmask)->sin_addr.s_addr;
+		if (mask && mask != 0xFFFFFFFF)
+			return -EINVAL;
+		if (!dev && (r->arp_flags & ATF_COM)) {
+			dev = dev_getbyhwaddr(r->arp_ha.sa_family, r->arp_ha.sa_data);
+			if (!dev)
+				return -ENODEV;
+		}
+		if (mask) {
+			if (pneigh_lookup(&arp_tbl, &ip, dev, 1) == NULL)
+				return -ENOBUFS;
+			return 0;
+		}
+		if (dev == NULL) {
+			ipv4_devconf.proxy_arp = 1;
+			return 0;
+		}
+		if (__in_dev_get(dev)) {
+			__in_dev_get(dev)->cnf.proxy_arp = 1;
+			return 0;
+		}
+		return -ENXIO;
+	}
+
+	if (r->arp_flags & ATF_PERM)
+		r->arp_flags |= ATF_COM;
+	if (dev == NULL) {
+		struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip,
+							 .tos = RTO_ONLINK } } };
+		struct rtable * rt;
+		if ((err = ip_route_output_key(&rt, &fl)) != 0)
+			return err;
+		dev = rt->u.dst.dev;
+		ip_rt_put(rt);
+		if (!dev)
+			return -EINVAL;
+	}
+	switch (dev->type) {
+#ifdef CONFIG_FDDI
+	case ARPHRD_FDDI:
+		/*
+		 * According to RFC 1390, FDDI devices should accept ARP
+		 * hardware types of 1 (Ethernet).  However, to be more
+		 * robust, we'll accept hardware types of either 1 (Ethernet)
+		 * or 6 (IEEE 802.2).
+		 */
+		if (r->arp_ha.sa_family != ARPHRD_FDDI &&
+		    r->arp_ha.sa_family != ARPHRD_ETHER &&
+		    r->arp_ha.sa_family != ARPHRD_IEEE802)
+			return -EINVAL;
+		break;
+#endif
+	default:
+		if (r->arp_ha.sa_family != dev->type)
+			return -EINVAL;
+		break;
+	}
+
+	neigh = __neigh_lookup_errno(&arp_tbl, &ip, dev);
+	err = PTR_ERR(neigh);
+	if (!IS_ERR(neigh)) {
+		unsigned state = NUD_STALE;
+		if (r->arp_flags & ATF_PERM)
+			state = NUD_PERMANENT;
+		err = neigh_update(neigh, (r->arp_flags&ATF_COM) ?
+				   r->arp_ha.sa_data : NULL, state, 
+				   NEIGH_UPDATE_F_OVERRIDE|
+				   NEIGH_UPDATE_F_ADMIN);
+		neigh_release(neigh);
+	}
+	return err;
+}
+
+static unsigned arp_state_to_flags(struct neighbour *neigh)
+{
+	unsigned flags = 0;
+	if (neigh->nud_state&NUD_PERMANENT)
+		flags = ATF_PERM|ATF_COM;
+	else if (neigh->nud_state&NUD_VALID)
+		flags = ATF_COM;
+	return flags;
+}
+
+/*
+ *	Get an ARP cache entry.
+ */
+
+static int arp_req_get(struct arpreq *r, struct net_device *dev)
+{
+	u32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr;
+	struct neighbour *neigh;
+	int err = -ENXIO;
+
+	neigh = neigh_lookup(&arp_tbl, &ip, dev);
+	if (neigh) {
+		read_lock_bh(&neigh->lock);
+		memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len);
+		r->arp_flags = arp_state_to_flags(neigh);
+		read_unlock_bh(&neigh->lock);
+		r->arp_ha.sa_family = dev->type;
+		strlcpy(r->arp_dev, dev->name, sizeof(r->arp_dev));
+		neigh_release(neigh);
+		err = 0;
+	}
+	return err;
+}
+
+static int arp_req_delete(struct arpreq *r, struct net_device * dev)
+{
+	int err;
+	u32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
+	struct neighbour *neigh;
+
+	if (r->arp_flags & ATF_PUBL) {
+		u32 mask =
+		       ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr;
+		if (mask == 0xFFFFFFFF)
+			return pneigh_delete(&arp_tbl, &ip, dev);
+		if (mask == 0) {
+			if (dev == NULL) {
+				ipv4_devconf.proxy_arp = 0;
+				return 0;
+			}
+			if (__in_dev_get(dev)) {
+				__in_dev_get(dev)->cnf.proxy_arp = 0;
+				return 0;
+			}
+			return -ENXIO;
+		}
+		return -EINVAL;
+	}
+
+	if (dev == NULL) {
+		struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip,
+							 .tos = RTO_ONLINK } } };
+		struct rtable * rt;
+		if ((err = ip_route_output_key(&rt, &fl)) != 0)
+			return err;
+		dev = rt->u.dst.dev;
+		ip_rt_put(rt);
+		if (!dev)
+			return -EINVAL;
+	}
+	err = -ENXIO;
+	neigh = neigh_lookup(&arp_tbl, &ip, dev);
+	if (neigh) {
+		if (neigh->nud_state&~NUD_NOARP)
+			err = neigh_update(neigh, NULL, NUD_FAILED, 
+					   NEIGH_UPDATE_F_OVERRIDE|
+					   NEIGH_UPDATE_F_ADMIN);
+		neigh_release(neigh);
+	}
+	return err;
+}
+
+/*
+ *	Handle an ARP layer I/O control request.
+ */
+
+int arp_ioctl(unsigned int cmd, void __user *arg)
+{
+	int err;
+	struct arpreq r;
+	struct net_device *dev = NULL;
+
+	switch (cmd) {
+		case SIOCDARP:
+		case SIOCSARP:
+			if (!capable(CAP_NET_ADMIN))
+				return -EPERM;
+		case SIOCGARP:
+			err = copy_from_user(&r, arg, sizeof(struct arpreq));
+			if (err)
+				return -EFAULT;
+			break;
+		default:
+			return -EINVAL;
+	}
+
+	if (r.arp_pa.sa_family != AF_INET)
+		return -EPFNOSUPPORT;
+
+	if (!(r.arp_flags & ATF_PUBL) &&
+	    (r.arp_flags & (ATF_NETMASK|ATF_DONTPUB)))
+		return -EINVAL;
+	if (!(r.arp_flags & ATF_NETMASK))
+		((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr =
+							   htonl(0xFFFFFFFFUL);
+	rtnl_lock();
+	if (r.arp_dev[0]) {
+		err = -ENODEV;
+		if ((dev = __dev_get_by_name(r.arp_dev)) == NULL)
+			goto out;
+
+		/* Mmmm... It is wrong... ARPHRD_NETROM==0 */
+		if (!r.arp_ha.sa_family)
+			r.arp_ha.sa_family = dev->type;
+		err = -EINVAL;
+		if ((r.arp_flags & ATF_COM) && r.arp_ha.sa_family != dev->type)
+			goto out;
+	} else if (cmd == SIOCGARP) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	switch(cmd) {
+	case SIOCDARP:
+	        err = arp_req_delete(&r, dev);
+		break;
+	case SIOCSARP:
+		err = arp_req_set(&r, dev);
+		break;
+	case SIOCGARP:
+		err = arp_req_get(&r, dev);
+		if (!err && copy_to_user(arg, &r, sizeof(r)))
+			err = -EFAULT;
+		break;
+	}
+out:
+	rtnl_unlock();
+	return err;
+}
+
+static int arp_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	struct net_device *dev = ptr;
+
+	switch (event) {
+	case NETDEV_CHANGEADDR:
+		neigh_changeaddr(&arp_tbl, dev);
+		rt_cache_flush(0);
+		break;
+	default:
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block arp_netdev_notifier = {
+	.notifier_call = arp_netdev_event,
+};
+
+/* Note, that it is not on notifier chain.
+   It is necessary, that this routine was called after route cache will be
+   flushed.
+ */
+void arp_ifdown(struct net_device *dev)
+{
+	neigh_ifdown(&arp_tbl, dev);
+}
+
+
+/*
+ *	Called once on startup.
+ */
+
+static struct packet_type arp_packet_type = {
+	.type =	__constant_htons(ETH_P_ARP),
+	.func =	arp_rcv,
+};
+
+static int arp_proc_init(void);
+
+void __init arp_init(void)
+{
+	neigh_table_init(&arp_tbl);
+
+	dev_add_pack(&arp_packet_type);
+	arp_proc_init();
+#ifdef CONFIG_SYSCTL
+	neigh_sysctl_register(NULL, &arp_tbl.parms, NET_IPV4,
+			      NET_IPV4_NEIGH, "ipv4", NULL, NULL);
+#endif
+	register_netdevice_notifier(&arp_netdev_notifier);
+}
+
+#ifdef CONFIG_PROC_FS
+#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+
+/* ------------------------------------------------------------------------ */
+/*
+ *	ax25 -> ASCII conversion
+ */
+static char *ax2asc2(ax25_address *a, char *buf)
+{
+	char c, *s;
+	int n;
+
+	for (n = 0, s = buf; n < 6; n++) {
+		c = (a->ax25_call[n] >> 1) & 0x7F;
+
+		if (c != ' ') *s++ = c;
+	}
+	
+	*s++ = '-';
+
+	if ((n = ((a->ax25_call[6] >> 1) & 0x0F)) > 9) {
+		*s++ = '1';
+		n -= 10;
+	}
+	
+	*s++ = n + '0';
+	*s++ = '\0';
+
+	if (*buf == '\0' || *buf == '-')
+	   return "*";
+
+	return buf;
+
+}
+#endif /* CONFIG_AX25 */
+
+#define HBUFFERLEN 30
+
+static void arp_format_neigh_entry(struct seq_file *seq,
+				   struct neighbour *n)
+{
+	char hbuffer[HBUFFERLEN];
+	const char hexbuf[] = "0123456789ABCDEF";
+	int k, j;
+	char tbuf[16];
+	struct net_device *dev = n->dev;
+	int hatype = dev->type;
+
+	read_lock(&n->lock);
+	/* Convert hardware address to XX:XX:XX:XX ... form. */
+#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+	if (hatype == ARPHRD_AX25 || hatype == ARPHRD_NETROM)
+		ax2asc2((ax25_address *)n->ha, hbuffer);
+	else {
+#endif
+	for (k = 0, j = 0; k < HBUFFERLEN - 3 && j < dev->addr_len; j++) {
+		hbuffer[k++] = hexbuf[(n->ha[j] >> 4) & 15];
+		hbuffer[k++] = hexbuf[n->ha[j] & 15];
+		hbuffer[k++] = ':';
+	}
+	hbuffer[--k] = 0;
+#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+	}
+#endif
+	sprintf(tbuf, "%u.%u.%u.%u", NIPQUAD(*(u32*)n->primary_key));
+	seq_printf(seq, "%-16s 0x%-10x0x%-10x%s     *        %s\n",
+		   tbuf, hatype, arp_state_to_flags(n), hbuffer, dev->name);
+	read_unlock(&n->lock);
+}
+
+static void arp_format_pneigh_entry(struct seq_file *seq,
+				    struct pneigh_entry *n)
+{
+	struct net_device *dev = n->dev;
+	int hatype = dev ? dev->type : 0;
+	char tbuf[16];
+
+	sprintf(tbuf, "%u.%u.%u.%u", NIPQUAD(*(u32*)n->key));
+	seq_printf(seq, "%-16s 0x%-10x0x%-10x%s     *        %s\n",
+		   tbuf, hatype, ATF_PUBL | ATF_PERM, "00:00:00:00:00:00",
+		   dev ? dev->name : "*");
+}
+
+static int arp_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(seq, "IP address       HW type     Flags       "
+			      "HW address            Mask     Device\n");
+	} else {
+		struct neigh_seq_state *state = seq->private;
+
+		if (state->flags & NEIGH_SEQ_IS_PNEIGH)
+			arp_format_pneigh_entry(seq, v);
+		else
+			arp_format_neigh_entry(seq, v);
+	}
+
+	return 0;
+}
+
+static void *arp_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	/* Don't want to confuse "arp -a" w/ magic entries,
+	 * so we tell the generic iterator to skip NUD_NOARP.
+	 */
+	return neigh_seq_start(seq, pos, &arp_tbl, NEIGH_SEQ_SKIP_NOARP);
+}
+
+/* ------------------------------------------------------------------------ */
+
+static struct seq_operations arp_seq_ops = {
+	.start  = arp_seq_start,
+	.next   = neigh_seq_next,
+	.stop   = neigh_seq_stop,
+	.show   = arp_seq_show,
+};
+
+static int arp_seq_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	int rc = -ENOMEM;
+	struct neigh_seq_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+       
+	if (!s)
+		goto out;
+
+	memset(s, 0, sizeof(*s));
+	rc = seq_open(file, &arp_seq_ops);
+	if (rc)
+		goto out_kfree;
+
+	seq	     = file->private_data;
+	seq->private = s;
+out:
+	return rc;
+out_kfree:
+	kfree(s);
+	goto out;
+}
+
+static struct file_operations arp_seq_fops = {
+	.owner		= THIS_MODULE,
+	.open           = arp_seq_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release	= seq_release_private,
+};
+
+static int __init arp_proc_init(void)
+{
+	if (!proc_net_fops_create("arp", S_IRUGO, &arp_seq_fops))
+		return -ENOMEM;
+	return 0;
+}
+
+#else /* CONFIG_PROC_FS */
+
+static int __init arp_proc_init(void)
+{
+	return 0;
+}
+
+#endif /* CONFIG_PROC_FS */
+
+EXPORT_SYMBOL(arp_broken_ops);
+EXPORT_SYMBOL(arp_find);
+EXPORT_SYMBOL(arp_rcv);
+EXPORT_SYMBOL(arp_create);
+EXPORT_SYMBOL(arp_xmit);
+EXPORT_SYMBOL(arp_send);
+EXPORT_SYMBOL(arp_tbl);
+
+#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
+EXPORT_SYMBOL(clip_tbl_hook);
+#endif
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
new file mode 100644
index 00000000000..b1db561f254
--- /dev/null
+++ b/net/ipv4/datagram.c
@@ -0,0 +1,73 @@
+/*
+ *	common UDP/RAW code
+ *	Linux INET implementation
+ *
+ * Authors:
+ * 	Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
+ *
+ * 	This program is free software; you can redistribute it and/or
+ * 	modify it under the terms of the GNU General Public License
+ * 	as published by the Free Software Foundation; either version
+ * 	2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/in.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <net/route.h>
+
+int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
+	struct rtable *rt;
+	u32 saddr;
+	int oif;
+	int err;
+
+	
+	if (addr_len < sizeof(*usin)) 
+	  	return -EINVAL;
+
+	if (usin->sin_family != AF_INET) 
+	  	return -EAFNOSUPPORT;
+
+	sk_dst_reset(sk);
+
+	oif = sk->sk_bound_dev_if;
+	saddr = inet->saddr;
+	if (MULTICAST(usin->sin_addr.s_addr)) {
+		if (!oif)
+			oif = inet->mc_index;
+		if (!saddr)
+			saddr = inet->mc_addr;
+	}
+	err = ip_route_connect(&rt, usin->sin_addr.s_addr, saddr,
+			       RT_CONN_FLAGS(sk), oif,
+			       sk->sk_protocol,
+			       inet->sport, usin->sin_port, sk);
+	if (err)
+		return err;
+	if ((rt->rt_flags & RTCF_BROADCAST) && !sock_flag(sk, SOCK_BROADCAST)) {
+		ip_rt_put(rt);
+		return -EACCES;
+	}
+  	if (!inet->saddr)
+	  	inet->saddr = rt->rt_src;	/* Update source address */
+	if (!inet->rcv_saddr)
+		inet->rcv_saddr = rt->rt_src;
+	inet->daddr = rt->rt_dst;
+	inet->dport = usin->sin_port;
+	sk->sk_state = TCP_ESTABLISHED;
+	inet->id = jiffies;
+
+	sk_dst_set(sk, &rt->u.dst);
+	return(0);
+}
+
+EXPORT_SYMBOL(ip4_datagram_connect);
+
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
new file mode 100644
index 00000000000..eea7ef01077
--- /dev/null
+++ b/net/ipv4/devinet.c
@@ -0,0 +1,1508 @@
+/*
+ *	NET3	IP device support routines.
+ *
+ *	Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *	Derived from the IP parts of dev.c 1.0.19
+ * 		Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *				Mark Evans, <evansmp@uhura.aston.ac.uk>
+ *
+ *	Additional Authors:
+ *		Alan Cox, <gw4pts@gw4pts.ampr.org>
+ *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ *	Changes:
+ *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
+ *					lists.
+ *		Cyrus Durgin:		updated for kmod
+ *		Matthias Andree:	in devinet_ioctl, compare label and
+ *					address (4.4BSD alias style support),
+ *					fall back to comparing just the label
+ *					if no match found.
+ */
+
+#include <linux/config.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/init.h>
+#include <linux/notifier.h>
+#include <linux/inetdevice.h>
+#include <linux/igmp.h>
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+#include <linux/kmod.h>
+
+#include <net/ip.h>
+#include <net/route.h>
+#include <net/ip_fib.h>
+
+struct ipv4_devconf ipv4_devconf = {
+	.accept_redirects = 1,
+	.send_redirects =  1,
+	.secure_redirects = 1,
+	.shared_media =	  1,
+};
+
+static struct ipv4_devconf ipv4_devconf_dflt = {
+	.accept_redirects =  1,
+	.send_redirects =    1,
+	.secure_redirects =  1,
+	.shared_media =	     1,
+	.accept_source_route = 1,
+};
+
+static void rtmsg_ifa(int event, struct in_ifaddr *);
+
+static struct notifier_block *inetaddr_chain;
+static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
+			 int destroy);
+#ifdef CONFIG_SYSCTL
+static void devinet_sysctl_register(struct in_device *in_dev,
+				    struct ipv4_devconf *p);
+static void devinet_sysctl_unregister(struct ipv4_devconf *p);
+#endif
+
+/* Locks all the inet devices. */
+
+static struct in_ifaddr *inet_alloc_ifa(void)
+{
+	struct in_ifaddr *ifa = kmalloc(sizeof(*ifa), GFP_KERNEL);
+
+	if (ifa) {
+		memset(ifa, 0, sizeof(*ifa));
+		INIT_RCU_HEAD(&ifa->rcu_head);
+	}
+
+	return ifa;
+}
+
+static void inet_rcu_free_ifa(struct rcu_head *head)
+{
+	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
+	if (ifa->ifa_dev)
+		in_dev_put(ifa->ifa_dev);
+	kfree(ifa);
+}
+
+static inline void inet_free_ifa(struct in_ifaddr *ifa)
+{
+	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
+}
+
+void in_dev_finish_destroy(struct in_device *idev)
+{
+	struct net_device *dev = idev->dev;
+
+	BUG_TRAP(!idev->ifa_list);
+	BUG_TRAP(!idev->mc_list);
+#ifdef NET_REFCNT_DEBUG
+	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
+	       idev, dev ? dev->name : "NIL");
+#endif
+	dev_put(dev);
+	if (!idev->dead)
+		printk("Freeing alive in_device %p\n", idev);
+	else {
+		kfree(idev);
+	}
+}
+
+struct in_device *inetdev_init(struct net_device *dev)
+{
+	struct in_device *in_dev;
+
+	ASSERT_RTNL();
+
+	in_dev = kmalloc(sizeof(*in_dev), GFP_KERNEL);
+	if (!in_dev)
+		goto out;
+	memset(in_dev, 0, sizeof(*in_dev));
+	INIT_RCU_HEAD(&in_dev->rcu_head);
+	memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf));
+	in_dev->cnf.sysctl = NULL;
+	in_dev->dev = dev;
+	if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
+		goto out_kfree;
+	/* Reference in_dev->dev */
+	dev_hold(dev);
+#ifdef CONFIG_SYSCTL
+	neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
+			      NET_IPV4_NEIGH, "ipv4", NULL, NULL);
+#endif
+
+	/* Account for reference dev->ip_ptr */
+	in_dev_hold(in_dev);
+	rcu_assign_pointer(dev->ip_ptr, in_dev);
+
+#ifdef CONFIG_SYSCTL
+	devinet_sysctl_register(in_dev, &in_dev->cnf);
+#endif
+	ip_mc_init_dev(in_dev);
+	if (dev->flags & IFF_UP)
+		ip_mc_up(in_dev);
+out:
+	return in_dev;
+out_kfree:
+	kfree(in_dev);
+	in_dev = NULL;
+	goto out;
+}
+
+static void in_dev_rcu_put(struct rcu_head *head)
+{
+	struct in_device *idev = container_of(head, struct in_device, rcu_head);
+	in_dev_put(idev);
+}
+
+static void inetdev_destroy(struct in_device *in_dev)
+{
+	struct in_ifaddr *ifa;
+	struct net_device *dev;
+
+	ASSERT_RTNL();
+
+	dev = in_dev->dev;
+	if (dev == &loopback_dev)
+		return;
+
+	in_dev->dead = 1;
+
+	ip_mc_destroy_dev(in_dev);
+
+	while ((ifa = in_dev->ifa_list) != NULL) {
+		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
+		inet_free_ifa(ifa);
+	}
+
+#ifdef CONFIG_SYSCTL
+	devinet_sysctl_unregister(&in_dev->cnf);
+#endif
+
+	dev->ip_ptr = NULL;
+
+#ifdef CONFIG_SYSCTL
+	neigh_sysctl_unregister(in_dev->arp_parms);
+#endif
+	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
+	arp_ifdown(dev);
+
+	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
+}
+
+int inet_addr_onlink(struct in_device *in_dev, u32 a, u32 b)
+{
+	rcu_read_lock();
+	for_primary_ifa(in_dev) {
+		if (inet_ifa_match(a, ifa)) {
+			if (!b || inet_ifa_match(b, ifa)) {
+				rcu_read_unlock();
+				return 1;
+			}
+		}
+	} endfor_ifa(in_dev);
+	rcu_read_unlock();
+	return 0;
+}
+
+static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
+			 int destroy)
+{
+	struct in_ifaddr *ifa1 = *ifap;
+
+	ASSERT_RTNL();
+
+	/* 1. Deleting primary ifaddr forces deletion all secondaries */
+
+	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
+		struct in_ifaddr *ifa;
+		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
+
+		while ((ifa = *ifap1) != NULL) {
+			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
+			    ifa1->ifa_mask != ifa->ifa_mask ||
+			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
+				ifap1 = &ifa->ifa_next;
+				continue;
+			}
+
+			*ifap1 = ifa->ifa_next;
+
+			rtmsg_ifa(RTM_DELADDR, ifa);
+			notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa);
+			inet_free_ifa(ifa);
+		}
+	}
+
+	/* 2. Unlink it */
+
+	*ifap = ifa1->ifa_next;
+
+	/* 3. Announce address deletion */
+
+	/* Send message first, then call notifier.
+	   At first sight, FIB update triggered by notifier
+	   will refer to already deleted ifaddr, that could confuse
+	   netlink listeners. It is not true: look, gated sees
+	   that route deleted and if it still thinks that ifaddr
+	   is valid, it will try to restore deleted routes... Grr.
+	   So that, this order is correct.
+	 */
+	rtmsg_ifa(RTM_DELADDR, ifa1);
+	notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
+	if (destroy) {
+		inet_free_ifa(ifa1);
+
+		if (!in_dev->ifa_list)
+			inetdev_destroy(in_dev);
+	}
+}
+
+static int inet_insert_ifa(struct in_ifaddr *ifa)
+{
+	struct in_device *in_dev = ifa->ifa_dev;
+	struct in_ifaddr *ifa1, **ifap, **last_primary;
+
+	ASSERT_RTNL();
+
+	if (!ifa->ifa_local) {
+		inet_free_ifa(ifa);
+		return 0;
+	}
+
+	ifa->ifa_flags &= ~IFA_F_SECONDARY;
+	last_primary = &in_dev->ifa_list;
+
+	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
+	     ifap = &ifa1->ifa_next) {
+		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
+		    ifa->ifa_scope <= ifa1->ifa_scope)
+			last_primary = &ifa1->ifa_next;
+		if (ifa1->ifa_mask == ifa->ifa_mask &&
+		    inet_ifa_match(ifa1->ifa_address, ifa)) {
+			if (ifa1->ifa_local == ifa->ifa_local) {
+				inet_free_ifa(ifa);
+				return -EEXIST;
+			}
+			if (ifa1->ifa_scope != ifa->ifa_scope) {
+				inet_free_ifa(ifa);
+				return -EINVAL;
+			}
+			ifa->ifa_flags |= IFA_F_SECONDARY;
+		}
+	}
+
+	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
+		net_srandom(ifa->ifa_local);
+		ifap = last_primary;
+	}
+
+	ifa->ifa_next = *ifap;
+	*ifap = ifa;
+
+	/* Send message first, then call notifier.
+	   Notifier will trigger FIB update, so that
+	   listeners of netlink will know about new ifaddr */
+	rtmsg_ifa(RTM_NEWADDR, ifa);
+	notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
+
+	return 0;
+}
+
+static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
+{
+	struct in_device *in_dev = __in_dev_get(dev);
+
+	ASSERT_RTNL();
+
+	if (!in_dev) {
+		in_dev = inetdev_init(dev);
+		if (!in_dev) {
+			inet_free_ifa(ifa);
+			return -ENOBUFS;
+		}
+	}
+	if (ifa->ifa_dev != in_dev) {
+		BUG_TRAP(!ifa->ifa_dev);
+		in_dev_hold(in_dev);
+		ifa->ifa_dev = in_dev;
+	}
+	if (LOOPBACK(ifa->ifa_local))
+		ifa->ifa_scope = RT_SCOPE_HOST;
+	return inet_insert_ifa(ifa);
+}
+
+struct in_device *inetdev_by_index(int ifindex)
+{
+	struct net_device *dev;
+	struct in_device *in_dev = NULL;
+	read_lock(&dev_base_lock);
+	dev = __dev_get_by_index(ifindex);
+	if (dev)
+		in_dev = in_dev_get(dev);
+	read_unlock(&dev_base_lock);
+	return in_dev;
+}
+
+/* Called only from RTNL semaphored context. No locks. */
+
+struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix,
+				    u32 mask)
+{
+	ASSERT_RTNL();
+
+	for_primary_ifa(in_dev) {
+		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
+			return ifa;
+	} endfor_ifa(in_dev);
+	return NULL;
+}
+
+static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct rtattr **rta = arg;
+	struct in_device *in_dev;
+	struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
+	struct in_ifaddr *ifa, **ifap;
+
+	ASSERT_RTNL();
+
+	if ((in_dev = inetdev_by_index(ifm->ifa_index)) == NULL)
+		goto out;
+	__in_dev_put(in_dev);
+
+	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
+	     ifap = &ifa->ifa_next) {
+		if ((rta[IFA_LOCAL - 1] &&
+		     memcmp(RTA_DATA(rta[IFA_LOCAL - 1]),
+			    &ifa->ifa_local, 4)) ||
+		    (rta[IFA_LABEL - 1] &&
+		     rtattr_strcmp(rta[IFA_LABEL - 1], ifa->ifa_label)) ||
+		    (rta[IFA_ADDRESS - 1] &&
+		     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
+		      !inet_ifa_match(*(u32*)RTA_DATA(rta[IFA_ADDRESS - 1]),
+			      	      ifa))))
+			continue;
+		inet_del_ifa(in_dev, ifap, 1);
+		return 0;
+	}
+out:
+	return -EADDRNOTAVAIL;
+}
+
+static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct rtattr **rta = arg;
+	struct net_device *dev;
+	struct in_device *in_dev;
+	struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
+	struct in_ifaddr *ifa;
+	int rc = -EINVAL;
+
+	ASSERT_RTNL();
+
+	if (ifm->ifa_prefixlen > 32 || !rta[IFA_LOCAL - 1])
+		goto out;
+
+	rc = -ENODEV;
+	if ((dev = __dev_get_by_index(ifm->ifa_index)) == NULL)
+		goto out;
+
+	rc = -ENOBUFS;
+	if ((in_dev = __in_dev_get(dev)) == NULL) {
+		in_dev = inetdev_init(dev);
+		if (!in_dev)
+			goto out;
+	}
+
+	if ((ifa = inet_alloc_ifa()) == NULL)
+		goto out;
+
+	if (!rta[IFA_ADDRESS - 1])
+		rta[IFA_ADDRESS - 1] = rta[IFA_LOCAL - 1];
+	memcpy(&ifa->ifa_local, RTA_DATA(rta[IFA_LOCAL - 1]), 4);
+	memcpy(&ifa->ifa_address, RTA_DATA(rta[IFA_ADDRESS - 1]), 4);
+	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
+	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
+	if (rta[IFA_BROADCAST - 1])
+		memcpy(&ifa->ifa_broadcast,
+		       RTA_DATA(rta[IFA_BROADCAST - 1]), 4);
+	if (rta[IFA_ANYCAST - 1])
+		memcpy(&ifa->ifa_anycast, RTA_DATA(rta[IFA_ANYCAST - 1]), 4);
+	ifa->ifa_flags = ifm->ifa_flags;
+	ifa->ifa_scope = ifm->ifa_scope;
+	in_dev_hold(in_dev);
+	ifa->ifa_dev   = in_dev;
+	if (rta[IFA_LABEL - 1])
+		rtattr_strlcpy(ifa->ifa_label, rta[IFA_LABEL - 1], IFNAMSIZ);
+	else
+		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
+
+	rc = inet_insert_ifa(ifa);
+out:
+	return rc;
+}
+
+/*
+ *	Determine a default network mask, based on the IP address.
+ */
+
+static __inline__ int inet_abc_len(u32 addr)
+{
+	int rc = -1;	/* Something else, probably a multicast. */
+
+  	if (ZERONET(addr))
+  		rc = 0;
+	else {
+		addr = ntohl(addr);
+
+		if (IN_CLASSA(addr))
+			rc = 8;
+		else if (IN_CLASSB(addr))
+			rc = 16;
+		else if (IN_CLASSC(addr))
+			rc = 24;
+	}
+
+  	return rc;
+}
+
+
+int devinet_ioctl(unsigned int cmd, void __user *arg)
+{
+	struct ifreq ifr;
+	struct sockaddr_in sin_orig;
+	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
+	struct in_device *in_dev;
+	struct in_ifaddr **ifap = NULL;
+	struct in_ifaddr *ifa = NULL;
+	struct net_device *dev;
+	char *colon;
+	int ret = -EFAULT;
+	int tryaddrmatch = 0;
+
+	/*
+	 *	Fetch the caller's info block into kernel space
+	 */
+
+	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+		goto out;
+	ifr.ifr_name[IFNAMSIZ - 1] = 0;
+
+	/* save original address for comparison */
+	memcpy(&sin_orig, sin, sizeof(*sin));
+
+	colon = strchr(ifr.ifr_name, ':');
+	if (colon)
+		*colon = 0;
+
+#ifdef CONFIG_KMOD
+	dev_load(ifr.ifr_name);
+#endif
+
+	switch(cmd) {
+	case SIOCGIFADDR:	/* Get interface address */
+	case SIOCGIFBRDADDR:	/* Get the broadcast address */
+	case SIOCGIFDSTADDR:	/* Get the destination address */
+	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
+		/* Note that these ioctls will not sleep,
+		   so that we do not impose a lock.
+		   One day we will be forced to put shlock here (I mean SMP)
+		 */
+		tryaddrmatch = (sin_orig.sin_family == AF_INET);
+		memset(sin, 0, sizeof(*sin));
+		sin->sin_family = AF_INET;
+		break;
+
+	case SIOCSIFFLAGS:
+		ret = -EACCES;
+		if (!capable(CAP_NET_ADMIN))
+			goto out;
+		break;
+	case SIOCSIFADDR:	/* Set interface address (and family) */
+	case SIOCSIFBRDADDR:	/* Set the broadcast address */
+	case SIOCSIFDSTADDR:	/* Set the destination address */
+	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
+		ret = -EACCES;
+		if (!capable(CAP_NET_ADMIN))
+			goto out;
+		ret = -EINVAL;
+		if (sin->sin_family != AF_INET)
+			goto out;
+		break;
+	default:
+		ret = -EINVAL;
+		goto out;
+	}
+
+	rtnl_lock();
+
+	ret = -ENODEV;
+	if ((dev = __dev_get_by_name(ifr.ifr_name)) == NULL)
+		goto done;
+
+	if (colon)
+		*colon = ':';
+
+	if ((in_dev = __in_dev_get(dev)) != NULL) {
+		if (tryaddrmatch) {
+			/* Matthias Andree */
+			/* compare label and address (4.4BSD style) */
+			/* note: we only do this for a limited set of ioctls
+			   and only if the original address family was AF_INET.
+			   This is checked above. */
+			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
+			     ifap = &ifa->ifa_next) {
+				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
+				    sin_orig.sin_addr.s_addr ==
+							ifa->ifa_address) {
+					break; /* found */
+				}
+			}
+		}
+		/* we didn't get a match, maybe the application is
+		   4.3BSD-style and passed in junk so we fall back to
+		   comparing just the label */
+		if (!ifa) {
+			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
+			     ifap = &ifa->ifa_next)
+				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
+					break;
+		}
+	}
+
+	ret = -EADDRNOTAVAIL;
+	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
+		goto done;
+
+	switch(cmd) {
+	case SIOCGIFADDR:	/* Get interface address */
+		sin->sin_addr.s_addr = ifa->ifa_local;
+		goto rarok;
+
+	case SIOCGIFBRDADDR:	/* Get the broadcast address */
+		sin->sin_addr.s_addr = ifa->ifa_broadcast;
+		goto rarok;
+
+	case SIOCGIFDSTADDR:	/* Get the destination address */
+		sin->sin_addr.s_addr = ifa->ifa_address;
+		goto rarok;
+
+	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
+		sin->sin_addr.s_addr = ifa->ifa_mask;
+		goto rarok;
+
+	case SIOCSIFFLAGS:
+		if (colon) {
+			ret = -EADDRNOTAVAIL;
+			if (!ifa)
+				break;
+			ret = 0;
+			if (!(ifr.ifr_flags & IFF_UP))
+				inet_del_ifa(in_dev, ifap, 1);
+			break;
+		}
+		ret = dev_change_flags(dev, ifr.ifr_flags);
+		break;
+
+	case SIOCSIFADDR:	/* Set interface address (and family) */
+		ret = -EINVAL;
+		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
+			break;
+
+		if (!ifa) {
+			ret = -ENOBUFS;
+			if ((ifa = inet_alloc_ifa()) == NULL)
+				break;
+			if (colon)
+				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
+			else
+				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
+		} else {
+			ret = 0;
+			if (ifa->ifa_local == sin->sin_addr.s_addr)
+				break;
+			inet_del_ifa(in_dev, ifap, 0);
+			ifa->ifa_broadcast = 0;
+			ifa->ifa_anycast = 0;
+		}
+
+		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
+
+		if (!(dev->flags & IFF_POINTOPOINT)) {
+			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
+			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
+			if ((dev->flags & IFF_BROADCAST) &&
+			    ifa->ifa_prefixlen < 31)
+				ifa->ifa_broadcast = ifa->ifa_address |
+						     ~ifa->ifa_mask;
+		} else {
+			ifa->ifa_prefixlen = 32;
+			ifa->ifa_mask = inet_make_mask(32);
+		}
+		ret = inet_set_ifa(dev, ifa);
+		break;
+
+	case SIOCSIFBRDADDR:	/* Set the broadcast address */
+		ret = 0;
+		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
+			inet_del_ifa(in_dev, ifap, 0);
+			ifa->ifa_broadcast = sin->sin_addr.s_addr;
+			inet_insert_ifa(ifa);
+		}
+		break;
+
+	case SIOCSIFDSTADDR:	/* Set the destination address */
+		ret = 0;
+		if (ifa->ifa_address == sin->sin_addr.s_addr)
+			break;
+		ret = -EINVAL;
+		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
+			break;
+		ret = 0;
+		inet_del_ifa(in_dev, ifap, 0);
+		ifa->ifa_address = sin->sin_addr.s_addr;
+		inet_insert_ifa(ifa);
+		break;
+
+	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
+
+		/*
+		 *	The mask we set must be legal.
+		 */
+		ret = -EINVAL;
+		if (bad_mask(sin->sin_addr.s_addr, 0))
+			break;
+		ret = 0;
+		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
+			inet_del_ifa(in_dev, ifap, 0);
+			ifa->ifa_mask = sin->sin_addr.s_addr;
+			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
+
+			/* See if current broadcast address matches
+			 * with current netmask, then recalculate
+			 * the broadcast address. Otherwise it's a
+			 * funny address, so don't touch it since
+			 * the user seems to know what (s)he's doing...
+			 */
+			if ((dev->flags & IFF_BROADCAST) &&
+			    (ifa->ifa_prefixlen < 31) &&
+			    (ifa->ifa_broadcast ==
+			     (ifa->ifa_local|~ifa->ifa_mask))) {
+				ifa->ifa_broadcast = (ifa->ifa_local |
+						      ~sin->sin_addr.s_addr);
+			}
+			inet_insert_ifa(ifa);
+		}
+		break;
+	}
+done:
+	rtnl_unlock();
+out:
+	return ret;
+rarok:
+	rtnl_unlock();
+	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
+	goto out;
+}
+
+static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
+{
+	struct in_device *in_dev = __in_dev_get(dev);
+	struct in_ifaddr *ifa;
+	struct ifreq ifr;
+	int done = 0;
+
+	if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
+		goto out;
+
+	for (; ifa; ifa = ifa->ifa_next) {
+		if (!buf) {
+			done += sizeof(ifr);
+			continue;
+		}
+		if (len < (int) sizeof(ifr))
+			break;
+		memset(&ifr, 0, sizeof(struct ifreq));
+		if (ifa->ifa_label)
+			strcpy(ifr.ifr_name, ifa->ifa_label);
+		else
+			strcpy(ifr.ifr_name, dev->name);
+
+		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
+		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
+								ifa->ifa_local;
+
+		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
+			done = -EFAULT;
+			break;
+		}
+		buf  += sizeof(struct ifreq);
+		len  -= sizeof(struct ifreq);
+		done += sizeof(struct ifreq);
+	}
+out:
+	return done;
+}
+
+u32 inet_select_addr(const struct net_device *dev, u32 dst, int scope)
+{
+	u32 addr = 0;
+	struct in_device *in_dev;
+
+	rcu_read_lock();
+	in_dev = __in_dev_get(dev);
+	if (!in_dev)
+		goto no_in_dev;
+
+	for_primary_ifa(in_dev) {
+		if (ifa->ifa_scope > scope)
+			continue;
+		if (!dst || inet_ifa_match(dst, ifa)) {
+			addr = ifa->ifa_local;
+			break;
+		}
+		if (!addr)
+			addr = ifa->ifa_local;
+	} endfor_ifa(in_dev);
+no_in_dev:
+	rcu_read_unlock();
+
+	if (addr)
+		goto out;
+
+	/* Not loopback addresses on loopback should be preferred
+	   in this case. It is importnat that lo is the first interface
+	   in dev_base list.
+	 */
+	read_lock(&dev_base_lock);
+	rcu_read_lock();
+	for (dev = dev_base; dev; dev = dev->next) {
+		if ((in_dev = __in_dev_get(dev)) == NULL)
+			continue;
+
+		for_primary_ifa(in_dev) {
+			if (ifa->ifa_scope != RT_SCOPE_LINK &&
+			    ifa->ifa_scope <= scope) {
+				addr = ifa->ifa_local;
+				goto out_unlock_both;
+			}
+		} endfor_ifa(in_dev);
+	}
+out_unlock_both:
+	read_unlock(&dev_base_lock);
+	rcu_read_unlock();
+out:
+	return addr;
+}
+
+static u32 confirm_addr_indev(struct in_device *in_dev, u32 dst,
+			      u32 local, int scope)
+{
+	int same = 0;
+	u32 addr = 0;
+
+	for_ifa(in_dev) {
+		if (!addr &&
+		    (local == ifa->ifa_local || !local) &&
+		    ifa->ifa_scope <= scope) {
+			addr = ifa->ifa_local;
+			if (same)
+				break;
+		}
+		if (!same) {
+			same = (!local || inet_ifa_match(local, ifa)) &&
+				(!dst || inet_ifa_match(dst, ifa));
+			if (same && addr) {
+				if (local || !dst)
+					break;
+				/* Is the selected addr into dst subnet? */
+				if (inet_ifa_match(addr, ifa))
+					break;
+				/* No, then can we use new local src? */
+				if (ifa->ifa_scope <= scope) {
+					addr = ifa->ifa_local;
+					break;
+				}
+				/* search for large dst subnet for addr */
+				same = 0;
+			}
+		}
+	} endfor_ifa(in_dev);
+
+	return same? addr : 0;
+}
+
+/*
+ * Confirm that local IP address exists using wildcards:
+ * - dev: only on this interface, 0=any interface
+ * - dst: only in the same subnet as dst, 0=any dst
+ * - local: address, 0=autoselect the local address
+ * - scope: maximum allowed scope value for the local address
+ */
+u32 inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scope)
+{
+	u32 addr = 0;
+	struct in_device *in_dev;
+
+	if (dev) {
+		rcu_read_lock();
+		if ((in_dev = __in_dev_get(dev)))
+			addr = confirm_addr_indev(in_dev, dst, local, scope);
+		rcu_read_unlock();
+
+		return addr;
+	}
+
+	read_lock(&dev_base_lock);
+	rcu_read_lock();
+	for (dev = dev_base; dev; dev = dev->next) {
+		if ((in_dev = __in_dev_get(dev))) {
+			addr = confirm_addr_indev(in_dev, dst, local, scope);
+			if (addr)
+				break;
+		}
+	}
+	rcu_read_unlock();
+	read_unlock(&dev_base_lock);
+
+	return addr;
+}
+
+/*
+ *	Device notifier
+ */
+
+int register_inetaddr_notifier(struct notifier_block *nb)
+{
+	return notifier_chain_register(&inetaddr_chain, nb);
+}
+
+int unregister_inetaddr_notifier(struct notifier_block *nb)
+{
+	return notifier_chain_unregister(&inetaddr_chain, nb);
+}
+
+/* Rename ifa_labels for a device name change. Make some effort to preserve existing
+ * alias numbering and to create unique labels if possible.
+*/
+static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
+{ 
+	struct in_ifaddr *ifa;
+	int named = 0;
+
+	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { 
+		char old[IFNAMSIZ], *dot; 
+
+		memcpy(old, ifa->ifa_label, IFNAMSIZ);
+		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 
+		if (named++ == 0)
+			continue;
+		dot = strchr(ifa->ifa_label, ':');
+		if (dot == NULL) { 
+			sprintf(old, ":%d", named); 
+			dot = old;
+		}
+		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) { 
+			strcat(ifa->ifa_label, dot); 
+		} else { 
+			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot); 
+		} 
+	}	
+} 
+
+/* Called only under RTNL semaphore */
+
+static int inetdev_event(struct notifier_block *this, unsigned long event,
+			 void *ptr)
+{
+	struct net_device *dev = ptr;
+	struct in_device *in_dev = __in_dev_get(dev);
+
+	ASSERT_RTNL();
+
+	if (!in_dev) {
+		if (event == NETDEV_REGISTER && dev == &loopback_dev) {
+			in_dev = inetdev_init(dev);
+			if (!in_dev)
+				panic("devinet: Failed to create loopback\n");
+			in_dev->cnf.no_xfrm = 1;
+			in_dev->cnf.no_policy = 1;
+		}
+		goto out;
+	}
+
+	switch (event) {
+	case NETDEV_REGISTER:
+		printk(KERN_DEBUG "inetdev_event: bug\n");
+		dev->ip_ptr = NULL;
+		break;
+	case NETDEV_UP:
+		if (dev->mtu < 68)
+			break;
+		if (dev == &loopback_dev) {
+			struct in_ifaddr *ifa;
+			if ((ifa = inet_alloc_ifa()) != NULL) {
+				ifa->ifa_local =
+				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
+				ifa->ifa_prefixlen = 8;
+				ifa->ifa_mask = inet_make_mask(8);
+				in_dev_hold(in_dev);
+				ifa->ifa_dev = in_dev;
+				ifa->ifa_scope = RT_SCOPE_HOST;
+				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
+				inet_insert_ifa(ifa);
+			}
+		}
+		ip_mc_up(in_dev);
+		break;
+	case NETDEV_DOWN:
+		ip_mc_down(in_dev);
+		break;
+	case NETDEV_CHANGEMTU:
+		if (dev->mtu >= 68)
+			break;
+		/* MTU falled under 68, disable IP */
+	case NETDEV_UNREGISTER:
+		inetdev_destroy(in_dev);
+		break;
+	case NETDEV_CHANGENAME:
+		/* Do not notify about label change, this event is
+		 * not interesting to applications using netlink.
+		 */
+		inetdev_changename(dev, in_dev);
+
+#ifdef CONFIG_SYSCTL
+		devinet_sysctl_unregister(&in_dev->cnf);
+		neigh_sysctl_unregister(in_dev->arp_parms);
+		neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
+				      NET_IPV4_NEIGH, "ipv4", NULL, NULL);
+		devinet_sysctl_register(in_dev, &in_dev->cnf);
+#endif
+		break;
+	}
+out:
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block ip_netdev_notifier = {
+	.notifier_call =inetdev_event,
+};
+
+static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
+			    u32 pid, u32 seq, int event)
+{
+	struct ifaddrmsg *ifm;
+	struct nlmsghdr  *nlh;
+	unsigned char	 *b = skb->tail;
+
+	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm));
+	if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
+	ifm = NLMSG_DATA(nlh);
+	ifm->ifa_family = AF_INET;
+	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
+	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
+	ifm->ifa_scope = ifa->ifa_scope;
+	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
+	if (ifa->ifa_address)
+		RTA_PUT(skb, IFA_ADDRESS, 4, &ifa->ifa_address);
+	if (ifa->ifa_local)
+		RTA_PUT(skb, IFA_LOCAL, 4, &ifa->ifa_local);
+	if (ifa->ifa_broadcast)
+		RTA_PUT(skb, IFA_BROADCAST, 4, &ifa->ifa_broadcast);
+	if (ifa->ifa_anycast)
+		RTA_PUT(skb, IFA_ANYCAST, 4, &ifa->ifa_anycast);
+	if (ifa->ifa_label[0])
+		RTA_PUT(skb, IFA_LABEL, IFNAMSIZ, &ifa->ifa_label);
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int idx, ip_idx;
+	struct net_device *dev;
+	struct in_device *in_dev;
+	struct in_ifaddr *ifa;
+	int s_ip_idx, s_idx = cb->args[0];
+
+	s_ip_idx = ip_idx = cb->args[1];
+	read_lock(&dev_base_lock);
+	for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
+		if (idx < s_idx)
+			continue;
+		if (idx > s_idx)
+			s_ip_idx = 0;
+		rcu_read_lock();
+		if ((in_dev = __in_dev_get(dev)) == NULL) {
+			rcu_read_unlock();
+			continue;
+		}
+
+		for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
+		     ifa = ifa->ifa_next, ip_idx++) {
+			if (ip_idx < s_ip_idx)
+				continue;
+			if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
+					     cb->nlh->nlmsg_seq,
+					     RTM_NEWADDR) <= 0) {
+				rcu_read_unlock();
+				goto done;
+			}
+		}
+		rcu_read_unlock();
+	}
+
+done:
+	read_unlock(&dev_base_lock);
+	cb->args[0] = idx;
+	cb->args[1] = ip_idx;
+
+	return skb->len;
+}
+
+static void rtmsg_ifa(int event, struct in_ifaddr* ifa)
+{
+	int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + 128);
+	struct sk_buff *skb = alloc_skb(size, GFP_KERNEL);
+
+	if (!skb)
+		netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, ENOBUFS);
+	else if (inet_fill_ifaddr(skb, ifa, 0, 0, event) < 0) {
+		kfree_skb(skb);
+		netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, EINVAL);
+	} else {
+		NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_IFADDR;
+		netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV4_IFADDR, GFP_KERNEL);
+	}
+}
+
+static struct rtnetlink_link inet_rtnetlink_table[RTM_MAX - RTM_BASE + 1] = {
+	 [4] = { .doit	 = inet_rtm_newaddr,  },
+	 [5] = { .doit	 = inet_rtm_deladdr,  },
+	 [6] = { .dumpit = inet_dump_ifaddr,  },
+	 [8] = { .doit	 = inet_rtm_newroute, },
+	 [9] = { .doit	 = inet_rtm_delroute, },
+	[10] = { .doit	 = inet_rtm_getroute, .dumpit = inet_dump_fib, },
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+	[16] = { .doit	 = inet_rtm_newrule, },
+	[17] = { .doit	 = inet_rtm_delrule, },
+	[18] = { .dumpit = inet_dump_rules,  },
+#endif
+};
+
+#ifdef CONFIG_SYSCTL
+
+void inet_forward_change(void)
+{
+	struct net_device *dev;
+	int on = ipv4_devconf.forwarding;
+
+	ipv4_devconf.accept_redirects = !on;
+	ipv4_devconf_dflt.forwarding = on;
+
+	read_lock(&dev_base_lock);
+	for (dev = dev_base; dev; dev = dev->next) {
+		struct in_device *in_dev;
+		rcu_read_lock();
+		in_dev = __in_dev_get(dev);
+		if (in_dev)
+			in_dev->cnf.forwarding = on;
+		rcu_read_unlock();
+	}
+	read_unlock(&dev_base_lock);
+
+	rt_cache_flush(0);
+}
+
+static int devinet_sysctl_forward(ctl_table *ctl, int write,
+				  struct file* filp, void __user *buffer,
+				  size_t *lenp, loff_t *ppos)
+{
+	int *valp = ctl->data;
+	int val = *valp;
+	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+
+	if (write && *valp != val) {
+		if (valp == &ipv4_devconf.forwarding)
+			inet_forward_change();
+		else if (valp != &ipv4_devconf_dflt.forwarding)
+			rt_cache_flush(0);
+	}
+
+	return ret;
+}
+
+int ipv4_doint_and_flush(ctl_table *ctl, int write,
+			 struct file* filp, void __user *buffer,
+			 size_t *lenp, loff_t *ppos)
+{
+	int *valp = ctl->data;
+	int val = *valp;
+	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+
+	if (write && *valp != val)
+		rt_cache_flush(0);
+
+	return ret;
+}
+
+int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
+				  void __user *oldval, size_t __user *oldlenp,
+				  void __user *newval, size_t newlen, 
+				  void **context)
+{
+	int *valp = table->data;
+	int new;
+
+	if (!newval || !newlen)
+		return 0;
+
+	if (newlen != sizeof(int))
+		return -EINVAL;
+
+	if (get_user(new, (int __user *)newval))
+		return -EFAULT;
+
+	if (new == *valp)
+		return 0;
+
+	if (oldval && oldlenp) {
+		size_t len;
+
+		if (get_user(len, oldlenp))
+			return -EFAULT;
+
+		if (len) {
+			if (len > table->maxlen)
+				len = table->maxlen;
+			if (copy_to_user(oldval, valp, len))
+				return -EFAULT;
+			if (put_user(len, oldlenp))
+				return -EFAULT;
+		}
+	}
+
+	*valp = new;
+	rt_cache_flush(0);
+	return 1;
+}
+
+
+static struct devinet_sysctl_table {
+	struct ctl_table_header *sysctl_header;
+	ctl_table		devinet_vars[__NET_IPV4_CONF_MAX];
+	ctl_table		devinet_dev[2];
+	ctl_table		devinet_conf_dir[2];
+	ctl_table		devinet_proto_dir[2];
+	ctl_table		devinet_root_dir[2];
+} devinet_sysctl = {
+	.devinet_vars = {
+		{
+			.ctl_name	= NET_IPV4_CONF_FORWARDING,
+			.procname	= "forwarding",
+			.data		= &ipv4_devconf.forwarding,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &devinet_sysctl_forward,
+		},
+		{
+			.ctl_name	= NET_IPV4_CONF_MC_FORWARDING,
+			.procname	= "mc_forwarding",
+			.data		= &ipv4_devconf.mc_forwarding,
+			.maxlen		= sizeof(int),
+			.mode		= 0444,
+			.proc_handler	= &proc_dointvec,
+		},
+		{
+			.ctl_name	= NET_IPV4_CONF_ACCEPT_REDIRECTS,
+			.procname	= "accept_redirects",
+			.data		= &ipv4_devconf.accept_redirects,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec,
+		},
+		{
+			.ctl_name	= NET_IPV4_CONF_SECURE_REDIRECTS,
+			.procname	= "secure_redirects",
+			.data		= &ipv4_devconf.secure_redirects,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec,
+		},
+		{
+			.ctl_name	= NET_IPV4_CONF_SHARED_MEDIA,
+			.procname	= "shared_media",
+			.data		= &ipv4_devconf.shared_media,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec,
+		},
+		{
+			.ctl_name	= NET_IPV4_CONF_RP_FILTER,
+			.procname	= "rp_filter",
+			.data		= &ipv4_devconf.rp_filter,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec,
+		},
+		{
+			.ctl_name	= NET_IPV4_CONF_SEND_REDIRECTS,
+			.procname	= "send_redirects",
+			.data		= &ipv4_devconf.send_redirects,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec,
+		},
+		{
+			.ctl_name	= NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE,
+			.procname	= "accept_source_route",
+			.data		= &ipv4_devconf.accept_source_route,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec,
+		},
+		{
+			.ctl_name	= NET_IPV4_CONF_PROXY_ARP,
+			.procname	= "proxy_arp",
+			.data		= &ipv4_devconf.proxy_arp,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec,
+		},
+		{
+			.ctl_name	= NET_IPV4_CONF_MEDIUM_ID,
+			.procname	= "medium_id",
+			.data		= &ipv4_devconf.medium_id,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec,
+		},
+		{
+			.ctl_name	= NET_IPV4_CONF_BOOTP_RELAY,
+			.procname	= "bootp_relay",
+			.data		= &ipv4_devconf.bootp_relay,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec,
+		},
+		{
+			.ctl_name	= NET_IPV4_CONF_LOG_MARTIANS,
+			.procname	= "log_martians",
+			.data		= &ipv4_devconf.log_martians,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec,
+		},
+		{
+			.ctl_name	= NET_IPV4_CONF_TAG,
+			.procname	= "tag",
+			.data		= &ipv4_devconf.tag,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec,
+		},
+		{
+			.ctl_name	= NET_IPV4_CONF_ARPFILTER,
+			.procname	= "arp_filter",
+			.data		= &ipv4_devconf.arp_filter,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec,
+		},
+		{
+			.ctl_name	= NET_IPV4_CONF_ARP_ANNOUNCE,
+			.procname	= "arp_announce",
+			.data		= &ipv4_devconf.arp_announce,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec,
+		},
+		{
+			.ctl_name	= NET_IPV4_CONF_ARP_IGNORE,
+			.procname	= "arp_ignore",
+			.data		= &ipv4_devconf.arp_ignore,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &proc_dointvec,
+		},
+		{
+			.ctl_name	= NET_IPV4_CONF_NOXFRM,
+			.procname	= "disable_xfrm",
+			.data		= &ipv4_devconf.no_xfrm,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &ipv4_doint_and_flush,
+			.strategy	= &ipv4_doint_and_flush_strategy,
+		},
+		{
+			.ctl_name	= NET_IPV4_CONF_NOPOLICY,
+			.procname	= "disable_policy",
+			.data		= &ipv4_devconf.no_policy,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &ipv4_doint_and_flush,
+			.strategy	= &ipv4_doint_and_flush_strategy,
+		},
+		{
+			.ctl_name	= NET_IPV4_CONF_FORCE_IGMP_VERSION,
+			.procname	= "force_igmp_version",
+			.data		= &ipv4_devconf.force_igmp_version,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= &ipv4_doint_and_flush,
+			.strategy	= &ipv4_doint_and_flush_strategy,
+		},
+	},
+	.devinet_dev = {
+		{
+			.ctl_name	= NET_PROTO_CONF_ALL,
+			.procname	= "all",
+			.mode		= 0555,
+			.child		= devinet_sysctl.devinet_vars,
+		},
+	},
+	.devinet_conf_dir = {
+	        {
+			.ctl_name	= NET_IPV4_CONF,
+			.procname	= "conf",
+			.mode		= 0555,
+			.child		= devinet_sysctl.devinet_dev,
+		},
+	},
+	.devinet_proto_dir = {
+		{
+			.ctl_name	= NET_IPV4,
+			.procname	= "ipv4",
+			.mode		= 0555,
+			.child 		= devinet_sysctl.devinet_conf_dir,
+		},
+	},
+	.devinet_root_dir = {
+		{
+			.ctl_name	= CTL_NET,
+			.procname 	= "net",
+			.mode		= 0555,
+			.child		= devinet_sysctl.devinet_proto_dir,
+		},
+	},
+};
+
+static void devinet_sysctl_register(struct in_device *in_dev,
+				    struct ipv4_devconf *p)
+{
+	int i;
+	struct net_device *dev = in_dev ? in_dev->dev : NULL;
+	struct devinet_sysctl_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
+	char *dev_name = NULL;
+
+	if (!t)
+		return;
+	memcpy(t, &devinet_sysctl, sizeof(*t));
+	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
+		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
+		t->devinet_vars[i].de = NULL;
+	}
+
+	if (dev) {
+		dev_name = dev->name; 
+		t->devinet_dev[0].ctl_name = dev->ifindex;
+	} else {
+		dev_name = "default";
+		t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
+	}
+
+	/* 
+	 * Make a copy of dev_name, because '.procname' is regarded as const 
+	 * by sysctl and we wouldn't want anyone to change it under our feet
+	 * (see SIOCSIFNAME).
+	 */	
+	dev_name = net_sysctl_strdup(dev_name);
+	if (!dev_name)
+	    goto free;
+
+	t->devinet_dev[0].procname    = dev_name;
+	t->devinet_dev[0].child	      = t->devinet_vars;
+	t->devinet_dev[0].de	      = NULL;
+	t->devinet_conf_dir[0].child  = t->devinet_dev;
+	t->devinet_conf_dir[0].de     = NULL;
+	t->devinet_proto_dir[0].child = t->devinet_conf_dir;
+	t->devinet_proto_dir[0].de    = NULL;
+	t->devinet_root_dir[0].child  = t->devinet_proto_dir;
+	t->devinet_root_dir[0].de     = NULL;
+
+	t->sysctl_header = register_sysctl_table(t->devinet_root_dir, 0);
+	if (!t->sysctl_header)
+	    goto free_procname;
+
+	p->sysctl = t;
+	return;
+
+	/* error path */
+ free_procname:
+	kfree(dev_name);
+ free:
+	kfree(t);
+	return;
+}
+
+static void devinet_sysctl_unregister(struct ipv4_devconf *p)
+{
+	if (p->sysctl) {
+		struct devinet_sysctl_table *t = p->sysctl;
+		p->sysctl = NULL;
+		unregister_sysctl_table(t->sysctl_header);
+		kfree(t->devinet_dev[0].procname);
+		kfree(t);
+	}
+}
+#endif
+
+void __init devinet_init(void)
+{
+	register_gifconf(PF_INET, inet_gifconf);
+	register_netdevice_notifier(&ip_netdev_notifier);
+	rtnetlink_links[PF_INET] = inet_rtnetlink_table;
+#ifdef CONFIG_SYSCTL
+	devinet_sysctl.sysctl_header =
+		register_sysctl_table(devinet_sysctl.devinet_root_dir, 0);
+	devinet_sysctl_register(NULL, &ipv4_devconf_dflt);
+#endif
+}
+
+EXPORT_SYMBOL(devinet_ioctl);
+EXPORT_SYMBOL(in_dev_finish_destroy);
+EXPORT_SYMBOL(inet_select_addr);
+EXPORT_SYMBOL(inetdev_by_index);
+EXPORT_SYMBOL(register_inetaddr_notifier);
+EXPORT_SYMBOL(unregister_inetaddr_notifier);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
new file mode 100644
index 00000000000..053a883247b
--- /dev/null
+++ b/net/ipv4/esp4.c
@@ -0,0 +1,510 @@
+#include <linux/config.h>
+#include <linux/module.h>
+#include <net/ip.h>
+#include <net/xfrm.h>
+#include <net/esp.h>
+#include <asm/scatterlist.h>
+#include <linux/crypto.h>
+#include <linux/pfkeyv2.h>
+#include <linux/random.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+
+/* decapsulation data for use when post-processing */
+struct esp_decap_data {
+	xfrm_address_t	saddr;
+	__u16		sport;
+	__u8		proto;
+};
+
+static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+	int err;
+	struct iphdr *top_iph;
+	struct ip_esp_hdr *esph;
+	struct crypto_tfm *tfm;
+	struct esp_data *esp;
+	struct sk_buff *trailer;
+	int blksize;
+	int clen;
+	int alen;
+	int nfrags;
+
+	/* Strip IP+ESP header. */
+	__skb_pull(skb, skb->h.raw - skb->data);
+	/* Now skb is pure payload to encrypt */
+
+	err = -ENOMEM;
+
+	/* Round to block size */
+	clen = skb->len;
+
+	esp = x->data;
+	alen = esp->auth.icv_trunc_len;
+	tfm = esp->conf.tfm;
+	blksize = (crypto_tfm_alg_blocksize(tfm) + 3) & ~3;
+	clen = (clen + 2 + blksize-1)&~(blksize-1);
+	if (esp->conf.padlen)
+		clen = (clen + esp->conf.padlen-1)&~(esp->conf.padlen-1);
+
+	if ((nfrags = skb_cow_data(skb, clen-skb->len+alen, &trailer)) < 0)
+		goto error;
+
+	/* Fill padding... */
+	do {
+		int i;
+		for (i=0; i<clen-skb->len - 2; i++)
+			*(u8*)(trailer->tail + i) = i+1;
+	} while (0);
+	*(u8*)(trailer->tail + clen-skb->len - 2) = (clen - skb->len)-2;
+	pskb_put(skb, trailer, clen - skb->len);
+
+	__skb_push(skb, skb->data - skb->nh.raw);
+	top_iph = skb->nh.iph;
+	esph = (struct ip_esp_hdr *)(skb->nh.raw + top_iph->ihl*4);
+	top_iph->tot_len = htons(skb->len + alen);
+	*(u8*)(trailer->tail - 1) = top_iph->protocol;
+
+	/* this is non-NULL only with UDP Encapsulation */
+	if (x->encap) {
+		struct xfrm_encap_tmpl *encap = x->encap;
+		struct udphdr *uh;
+		u32 *udpdata32;
+
+		uh = (struct udphdr *)esph;
+		uh->source = encap->encap_sport;
+		uh->dest = encap->encap_dport;
+		uh->len = htons(skb->len + alen - top_iph->ihl*4);
+		uh->check = 0;
+
+		switch (encap->encap_type) {
+		default:
+		case UDP_ENCAP_ESPINUDP:
+			esph = (struct ip_esp_hdr *)(uh + 1);
+			break;
+		case UDP_ENCAP_ESPINUDP_NON_IKE:
+			udpdata32 = (u32 *)(uh + 1);
+			udpdata32[0] = udpdata32[1] = 0;
+			esph = (struct ip_esp_hdr *)(udpdata32 + 2);
+			break;
+		}
+
+		top_iph->protocol = IPPROTO_UDP;
+	} else
+		top_iph->protocol = IPPROTO_ESP;
+
+	esph->spi = x->id.spi;
+	esph->seq_no = htonl(++x->replay.oseq);
+
+	if (esp->conf.ivlen)
+		crypto_cipher_set_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
+
+	do {
+		struct scatterlist *sg = &esp->sgbuf[0];
+
+		if (unlikely(nfrags > ESP_NUM_FAST_SG)) {
+			sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC);
+			if (!sg)
+				goto error;
+		}
+		skb_to_sgvec(skb, sg, esph->enc_data+esp->conf.ivlen-skb->data, clen);
+		crypto_cipher_encrypt(tfm, sg, sg, clen);
+		if (unlikely(sg != &esp->sgbuf[0]))
+			kfree(sg);
+	} while (0);
+
+	if (esp->conf.ivlen) {
+		memcpy(esph->enc_data, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
+		crypto_cipher_get_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
+	}
+
+	if (esp->auth.icv_full_len) {
+		esp->auth.icv(esp, skb, (u8*)esph-skb->data,
+		              sizeof(struct ip_esp_hdr) + esp->conf.ivlen+clen, trailer->tail);
+		pskb_put(skb, trailer, alen);
+	}
+
+	ip_send_check(top_iph);
+
+	err = 0;
+
+error:
+	return err;
+}
+
+/*
+ * Note: detecting truncated vs. non-truncated authentication data is very
+ * expensive, so we only support truncated data, which is the recommended
+ * and common case.
+ */
+static int esp_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
+{
+	struct iphdr *iph;
+	struct ip_esp_hdr *esph;
+	struct esp_data *esp = x->data;
+	struct sk_buff *trailer;
+	int blksize = crypto_tfm_alg_blocksize(esp->conf.tfm);
+	int alen = esp->auth.icv_trunc_len;
+	int elen = skb->len - sizeof(struct ip_esp_hdr) - esp->conf.ivlen - alen;
+	int nfrags;
+	int encap_len = 0;
+
+	if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr)))
+		goto out;
+
+	if (elen <= 0 || (elen & (blksize-1)))
+		goto out;
+
+	/* If integrity check is required, do this. */
+	if (esp->auth.icv_full_len) {
+		u8 sum[esp->auth.icv_full_len];
+		u8 sum1[alen];
+		
+		esp->auth.icv(esp, skb, 0, skb->len-alen, sum);
+
+		if (skb_copy_bits(skb, skb->len-alen, sum1, alen))
+			BUG();
+
+		if (unlikely(memcmp(sum, sum1, alen))) {
+			x->stats.integrity_failed++;
+			goto out;
+		}
+	}
+
+	if ((nfrags = skb_cow_data(skb, 0, &trailer)) < 0)
+		goto out;
+
+	skb->ip_summed = CHECKSUM_NONE;
+
+	esph = (struct ip_esp_hdr*)skb->data;
+	iph = skb->nh.iph;
+
+	/* Get ivec. This can be wrong, check against another impls. */
+	if (esp->conf.ivlen)
+		crypto_cipher_set_iv(esp->conf.tfm, esph->enc_data, crypto_tfm_alg_ivsize(esp->conf.tfm));
+
+        {
+		u8 nexthdr[2];
+		struct scatterlist *sg = &esp->sgbuf[0];
+		u8 workbuf[60];
+		int padlen;
+
+		if (unlikely(nfrags > ESP_NUM_FAST_SG)) {
+			sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC);
+			if (!sg)
+				goto out;
+		}
+		skb_to_sgvec(skb, sg, sizeof(struct ip_esp_hdr) + esp->conf.ivlen, elen);
+		crypto_cipher_decrypt(esp->conf.tfm, sg, sg, elen);
+		if (unlikely(sg != &esp->sgbuf[0]))
+			kfree(sg);
+
+		if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2))
+			BUG();
+
+		padlen = nexthdr[0];
+		if (padlen+2 >= elen)
+			goto out;
+
+		/* ... check padding bits here. Silly. :-) */ 
+
+		if (x->encap && decap && decap->decap_type) {
+			struct esp_decap_data *encap_data;
+			struct udphdr *uh = (struct udphdr *) (iph+1);
+
+			encap_data = (struct esp_decap_data *) (decap->decap_data);
+			encap_data->proto = 0;
+
+			switch (decap->decap_type) {
+			case UDP_ENCAP_ESPINUDP:
+			case UDP_ENCAP_ESPINUDP_NON_IKE:
+				encap_data->proto = AF_INET;
+				encap_data->saddr.a4 = iph->saddr;
+				encap_data->sport = uh->source;
+				encap_len = (void*)esph - (void*)uh;
+				break;
+
+			default:
+				goto out;
+			}
+		}
+
+		iph->protocol = nexthdr[1];
+		pskb_trim(skb, skb->len - alen - padlen - 2);
+		memcpy(workbuf, skb->nh.raw, iph->ihl*4);
+		skb->h.raw = skb_pull(skb, sizeof(struct ip_esp_hdr) + esp->conf.ivlen);
+		skb->nh.raw += encap_len + sizeof(struct ip_esp_hdr) + esp->conf.ivlen;
+		memcpy(skb->nh.raw, workbuf, iph->ihl*4);
+		skb->nh.iph->tot_len = htons(skb->len);
+	}
+
+	return 0;
+
+out:
+	return -EINVAL;
+}
+
+static int esp_post_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
+{
+  
+	if (x->encap) {
+		struct xfrm_encap_tmpl *encap;
+		struct esp_decap_data *decap_data;
+
+		encap = x->encap;
+		decap_data = (struct esp_decap_data *)(decap->decap_data);
+
+		/* first, make sure that the decap type == the encap type */
+		if (encap->encap_type != decap->decap_type)
+			return -EINVAL;
+
+		switch (encap->encap_type) {
+		default:
+		case UDP_ENCAP_ESPINUDP:
+		case UDP_ENCAP_ESPINUDP_NON_IKE:
+			/*
+			 * 1) if the NAT-T peer's IP or port changed then
+			 *    advertize the change to the keying daemon.
+			 *    This is an inbound SA, so just compare
+			 *    SRC ports.
+			 */
+			if (decap_data->proto == AF_INET &&
+			    (decap_data->saddr.a4 != x->props.saddr.a4 ||
+			     decap_data->sport != encap->encap_sport)) {
+				xfrm_address_t ipaddr;
+
+				ipaddr.a4 = decap_data->saddr.a4;
+				km_new_mapping(x, &ipaddr, decap_data->sport);
+					
+				/* XXX: perhaps add an extra
+				 * policy check here, to see
+				 * if we should allow or
+				 * reject a packet from a
+				 * different source
+				 * address/port.
+				 */
+			}
+		
+			/*
+			 * 2) ignore UDP/TCP checksums in case
+			 *    of NAT-T in Transport Mode, or
+			 *    perform other post-processing fixes
+			 *    as per * draft-ietf-ipsec-udp-encaps-06,
+			 *    section 3.1.2
+			 */
+			if (!x->props.mode)
+				skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+			break;
+		}
+	}
+	return 0;
+}
+
+static u32 esp4_get_max_size(struct xfrm_state *x, int mtu)
+{
+	struct esp_data *esp = x->data;
+	u32 blksize = crypto_tfm_alg_blocksize(esp->conf.tfm);
+
+	if (x->props.mode) {
+		mtu = (mtu + 2 + blksize-1)&~(blksize-1);
+	} else {
+		/* The worst case. */
+		mtu += 2 + blksize;
+	}
+	if (esp->conf.padlen)
+		mtu = (mtu + esp->conf.padlen-1)&~(esp->conf.padlen-1);
+
+	return mtu + x->props.header_len + esp->auth.icv_trunc_len;
+}
+
+static void esp4_err(struct sk_buff *skb, u32 info)
+{
+	struct iphdr *iph = (struct iphdr*)skb->data;
+	struct ip_esp_hdr *esph = (struct ip_esp_hdr*)(skb->data+(iph->ihl<<2));
+	struct xfrm_state *x;
+
+	if (skb->h.icmph->type != ICMP_DEST_UNREACH ||
+	    skb->h.icmph->code != ICMP_FRAG_NEEDED)
+		return;
+
+	x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET);
+	if (!x)
+		return;
+	NETDEBUG(printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n",
+			ntohl(esph->spi), ntohl(iph->daddr)));
+	xfrm_state_put(x);
+}
+
+static void esp_destroy(struct xfrm_state *x)
+{
+	struct esp_data *esp = x->data;
+
+	if (!esp)
+		return;
+
+	if (esp->conf.tfm) {
+		crypto_free_tfm(esp->conf.tfm);
+		esp->conf.tfm = NULL;
+	}
+	if (esp->conf.ivec) {
+		kfree(esp->conf.ivec);
+		esp->conf.ivec = NULL;
+	}
+	if (esp->auth.tfm) {
+		crypto_free_tfm(esp->auth.tfm);
+		esp->auth.tfm = NULL;
+	}
+	if (esp->auth.work_icv) {
+		kfree(esp->auth.work_icv);
+		esp->auth.work_icv = NULL;
+	}
+	kfree(esp);
+}
+
+static int esp_init_state(struct xfrm_state *x, void *args)
+{
+	struct esp_data *esp = NULL;
+
+	/* null auth and encryption can have zero length keys */
+	if (x->aalg) {
+		if (x->aalg->alg_key_len > 512)
+			goto error;
+	}
+	if (x->ealg == NULL)
+		goto error;
+
+	esp = kmalloc(sizeof(*esp), GFP_KERNEL);
+	if (esp == NULL)
+		return -ENOMEM;
+
+	memset(esp, 0, sizeof(*esp));
+
+	if (x->aalg) {
+		struct xfrm_algo_desc *aalg_desc;
+
+		esp->auth.key = x->aalg->alg_key;
+		esp->auth.key_len = (x->aalg->alg_key_len+7)/8;
+		esp->auth.tfm = crypto_alloc_tfm(x->aalg->alg_name, 0);
+		if (esp->auth.tfm == NULL)
+			goto error;
+		esp->auth.icv = esp_hmac_digest;
+
+		aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
+		BUG_ON(!aalg_desc);
+
+		if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
+		    crypto_tfm_alg_digestsize(esp->auth.tfm)) {
+			NETDEBUG(printk(KERN_INFO "ESP: %s digestsize %u != %hu\n",
+			       x->aalg->alg_name,
+			       crypto_tfm_alg_digestsize(esp->auth.tfm),
+			       aalg_desc->uinfo.auth.icv_fullbits/8));
+			goto error;
+		}
+
+		esp->auth.icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8;
+		esp->auth.icv_trunc_len = aalg_desc->uinfo.auth.icv_truncbits/8;
+
+		esp->auth.work_icv = kmalloc(esp->auth.icv_full_len, GFP_KERNEL);
+		if (!esp->auth.work_icv)
+			goto error;
+	}
+	esp->conf.key = x->ealg->alg_key;
+	esp->conf.key_len = (x->ealg->alg_key_len+7)/8;
+	if (x->props.ealgo == SADB_EALG_NULL)
+		esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_ECB);
+	else
+		esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_CBC);
+	if (esp->conf.tfm == NULL)
+		goto error;
+	esp->conf.ivlen = crypto_tfm_alg_ivsize(esp->conf.tfm);
+	esp->conf.padlen = 0;
+	if (esp->conf.ivlen) {
+		esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL);
+		if (unlikely(esp->conf.ivec == NULL))
+			goto error;
+		get_random_bytes(esp->conf.ivec, esp->conf.ivlen);
+	}
+	if (crypto_cipher_setkey(esp->conf.tfm, esp->conf.key, esp->conf.key_len))
+		goto error;
+	x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen;
+	if (x->props.mode)
+		x->props.header_len += sizeof(struct iphdr);
+	if (x->encap) {
+		struct xfrm_encap_tmpl *encap = x->encap;
+
+		switch (encap->encap_type) {
+		default:
+			goto error;
+		case UDP_ENCAP_ESPINUDP:
+			x->props.header_len += sizeof(struct udphdr);
+			break;
+		case UDP_ENCAP_ESPINUDP_NON_IKE:
+			x->props.header_len += sizeof(struct udphdr) + 2 * sizeof(u32);
+			break;
+		}
+	}
+	x->data = esp;
+	x->props.trailer_len = esp4_get_max_size(x, 0) - x->props.header_len;
+	return 0;
+
+error:
+	x->data = esp;
+	esp_destroy(x);
+	x->data = NULL;
+	return -EINVAL;
+}
+
+static struct xfrm_type esp_type =
+{
+	.description	= "ESP4",
+	.owner		= THIS_MODULE,
+	.proto	     	= IPPROTO_ESP,
+	.init_state	= esp_init_state,
+	.destructor	= esp_destroy,
+	.get_max_size	= esp4_get_max_size,
+	.input		= esp_input,
+	.post_input	= esp_post_input,
+	.output		= esp_output
+};
+
+static struct net_protocol esp4_protocol = {
+	.handler	=	xfrm4_rcv,
+	.err_handler	=	esp4_err,
+	.no_policy	=	1,
+};
+
+static int __init esp4_init(void)
+{
+	struct xfrm_decap_state decap;
+
+	if (sizeof(struct esp_decap_data)  <
+	    sizeof(decap.decap_data)) {
+		extern void decap_data_too_small(void);
+
+		decap_data_too_small();
+	}
+
+	if (xfrm_register_type(&esp_type, AF_INET) < 0) {
+		printk(KERN_INFO "ip esp init: can't add xfrm type\n");
+		return -EAGAIN;
+	}
+	if (inet_add_protocol(&esp4_protocol, IPPROTO_ESP) < 0) {
+		printk(KERN_INFO "ip esp init: can't add protocol\n");
+		xfrm_unregister_type(&esp_type, AF_INET);
+		return -EAGAIN;
+	}
+	return 0;
+}
+
+static void __exit esp4_fini(void)
+{
+	if (inet_del_protocol(&esp4_protocol, IPPROTO_ESP) < 0)
+		printk(KERN_INFO "ip esp close: can't remove protocol\n");
+	if (xfrm_unregister_type(&esp_type, AF_INET) < 0)
+		printk(KERN_INFO "ip esp close: can't remove xfrm type\n");
+}
+
+module_init(esp4_init);
+module_exit(esp4_fini);
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
new file mode 100644
index 00000000000..563e7d61270
--- /dev/null
+++ b/net/ipv4/fib_frontend.c
@@ -0,0 +1,611 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		IPv4 Forwarding Information Base: FIB frontend.
+ *
+ * Version:	$Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/errno.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/init.h>
+
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/route.h>
+#include <net/tcp.h>
+#include <net/sock.h>
+#include <net/icmp.h>
+#include <net/arp.h>
+#include <net/ip_fib.h>
+
+#define FFprint(a...) printk(KERN_DEBUG a)
+
+#ifndef CONFIG_IP_MULTIPLE_TABLES
+
+#define RT_TABLE_MIN RT_TABLE_MAIN
+
+struct fib_table *ip_fib_local_table;
+struct fib_table *ip_fib_main_table;
+
+#else
+
+#define RT_TABLE_MIN 1
+
+struct fib_table *fib_tables[RT_TABLE_MAX+1];
+
+struct fib_table *__fib_new_table(int id)
+{
+	struct fib_table *tb;
+
+	tb = fib_hash_init(id);
+	if (!tb)
+		return NULL;
+	fib_tables[id] = tb;
+	return tb;
+}
+
+
+#endif /* CONFIG_IP_MULTIPLE_TABLES */
+
+
+static void fib_flush(void)
+{
+	int flushed = 0;
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+	struct fib_table *tb;
+	int id;
+
+	for (id = RT_TABLE_MAX; id>0; id--) {
+		if ((tb = fib_get_table(id))==NULL)
+			continue;
+		flushed += tb->tb_flush(tb);
+	}
+#else /* CONFIG_IP_MULTIPLE_TABLES */
+	flushed += ip_fib_main_table->tb_flush(ip_fib_main_table);
+	flushed += ip_fib_local_table->tb_flush(ip_fib_local_table);
+#endif /* CONFIG_IP_MULTIPLE_TABLES */
+
+	if (flushed)
+		rt_cache_flush(-1);
+}
+
+/*
+ *	Find the first device with a given source address.
+ */
+
+struct net_device * ip_dev_find(u32 addr)
+{
+	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
+	struct fib_result res;
+	struct net_device *dev = NULL;
+
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+	res.r = NULL;
+#endif
+
+	if (!ip_fib_local_table ||
+	    ip_fib_local_table->tb_lookup(ip_fib_local_table, &fl, &res))
+		return NULL;
+	if (res.type != RTN_LOCAL)
+		goto out;
+	dev = FIB_RES_DEV(res);
+
+	if (dev)
+		dev_hold(dev);
+out:
+	fib_res_put(&res);
+	return dev;
+}
+
+unsigned inet_addr_type(u32 addr)
+{
+	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
+	struct fib_result	res;
+	unsigned ret = RTN_BROADCAST;
+
+	if (ZERONET(addr) || BADCLASS(addr))
+		return RTN_BROADCAST;
+	if (MULTICAST(addr))
+		return RTN_MULTICAST;
+
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+	res.r = NULL;
+#endif
+	
+	if (ip_fib_local_table) {
+		ret = RTN_UNICAST;
+		if (!ip_fib_local_table->tb_lookup(ip_fib_local_table,
+						   &fl, &res)) {
+			ret = res.type;
+			fib_res_put(&res);
+		}
+	}
+	return ret;
+}
+
+/* Given (packet source, input interface) and optional (dst, oif, tos):
+   - (main) check, that source is valid i.e. not broadcast or our local
+     address.
+   - figure out what "logical" interface this packet arrived
+     and calculate "specific destination" address.
+   - check, that packet arrived from expected physical interface.
+ */
+
+int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
+			struct net_device *dev, u32 *spec_dst, u32 *itag)
+{
+	struct in_device *in_dev;
+	struct flowi fl = { .nl_u = { .ip4_u =
+				      { .daddr = src,
+					.saddr = dst,
+					.tos = tos } },
+			    .iif = oif };
+	struct fib_result res;
+	int no_addr, rpf;
+	int ret;
+
+	no_addr = rpf = 0;
+	rcu_read_lock();
+	in_dev = __in_dev_get(dev);
+	if (in_dev) {
+		no_addr = in_dev->ifa_list == NULL;
+		rpf = IN_DEV_RPFILTER(in_dev);
+	}
+	rcu_read_unlock();
+
+	if (in_dev == NULL)
+		goto e_inval;
+
+	if (fib_lookup(&fl, &res))
+		goto last_resort;
+	if (res.type != RTN_UNICAST)
+		goto e_inval_res;
+	*spec_dst = FIB_RES_PREFSRC(res);
+	fib_combine_itag(itag, &res);
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+	if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
+#else
+	if (FIB_RES_DEV(res) == dev)
+#endif
+	{
+		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
+		fib_res_put(&res);
+		return ret;
+	}
+	fib_res_put(&res);
+	if (no_addr)
+		goto last_resort;
+	if (rpf)
+		goto e_inval;
+	fl.oif = dev->ifindex;
+
+	ret = 0;
+	if (fib_lookup(&fl, &res) == 0) {
+		if (res.type == RTN_UNICAST) {
+			*spec_dst = FIB_RES_PREFSRC(res);
+			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
+		}
+		fib_res_put(&res);
+	}
+	return ret;
+
+last_resort:
+	if (rpf)
+		goto e_inval;
+	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
+	*itag = 0;
+	return 0;
+
+e_inval_res:
+	fib_res_put(&res);
+e_inval:
+	return -EINVAL;
+}
+
+#ifndef CONFIG_IP_NOSIOCRT
+
+/*
+ *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
+ */
+ 
+int ip_rt_ioctl(unsigned int cmd, void __user *arg)
+{
+	int err;
+	struct kern_rta rta;
+	struct rtentry  r;
+	struct {
+		struct nlmsghdr nlh;
+		struct rtmsg	rtm;
+	} req;
+
+	switch (cmd) {
+	case SIOCADDRT:		/* Add a route */
+	case SIOCDELRT:		/* Delete a route */
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+		if (copy_from_user(&r, arg, sizeof(struct rtentry)))
+			return -EFAULT;
+		rtnl_lock();
+		err = fib_convert_rtentry(cmd, &req.nlh, &req.rtm, &rta, &r);
+		if (err == 0) {
+			if (cmd == SIOCDELRT) {
+				struct fib_table *tb = fib_get_table(req.rtm.rtm_table);
+				err = -ESRCH;
+				if (tb)
+					err = tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
+			} else {
+				struct fib_table *tb = fib_new_table(req.rtm.rtm_table);
+				err = -ENOBUFS;
+				if (tb)
+					err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
+			}
+			if (rta.rta_mx)
+				kfree(rta.rta_mx);
+		}
+		rtnl_unlock();
+		return err;
+	}
+	return -EINVAL;
+}
+
+#else
+
+int ip_rt_ioctl(unsigned int cmd, void *arg)
+{
+	return -EINVAL;
+}
+
+#endif
+
+static int inet_check_attr(struct rtmsg *r, struct rtattr **rta)
+{
+	int i;
+
+	for (i=1; i<=RTA_MAX; i++) {
+		struct rtattr *attr = rta[i-1];
+		if (attr) {
+			if (RTA_PAYLOAD(attr) < 4)
+				return -EINVAL;
+			if (i != RTA_MULTIPATH && i != RTA_METRICS)
+				rta[i-1] = (struct rtattr*)RTA_DATA(attr);
+		}
+	}
+	return 0;
+}
+
+int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+{
+	struct fib_table * tb;
+	struct rtattr **rta = arg;
+	struct rtmsg *r = NLMSG_DATA(nlh);
+
+	if (inet_check_attr(r, rta))
+		return -EINVAL;
+
+	tb = fib_get_table(r->rtm_table);
+	if (tb)
+		return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
+	return -ESRCH;
+}
+
+int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+{
+	struct fib_table * tb;
+	struct rtattr **rta = arg;
+	struct rtmsg *r = NLMSG_DATA(nlh);
+
+	if (inet_check_attr(r, rta))
+		return -EINVAL;
+
+	tb = fib_new_table(r->rtm_table);
+	if (tb)
+		return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
+	return -ENOBUFS;
+}
+
+int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int t;
+	int s_t;
+	struct fib_table *tb;
+
+	if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
+	    ((struct rtmsg*)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
+		return ip_rt_dump(skb, cb);
+
+	s_t = cb->args[0];
+	if (s_t == 0)
+		s_t = cb->args[0] = RT_TABLE_MIN;
+
+	for (t=s_t; t<=RT_TABLE_MAX; t++) {
+		if (t < s_t) continue;
+		if (t > s_t)
+			memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
+		if ((tb = fib_get_table(t))==NULL)
+			continue;
+		if (tb->tb_dump(tb, skb, cb) < 0) 
+			break;
+	}
+
+	cb->args[0] = t;
+
+	return skb->len;
+}
+
+/* Prepare and feed intra-kernel routing request.
+   Really, it should be netlink message, but :-( netlink
+   can be not configured, so that we feed it directly
+   to fib engine. It is legal, because all events occur
+   only when netlink is already locked.
+ */
+
+static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr *ifa)
+{
+	struct fib_table * tb;
+	struct {
+		struct nlmsghdr	nlh;
+		struct rtmsg	rtm;
+	} req;
+	struct kern_rta rta;
+
+	memset(&req.rtm, 0, sizeof(req.rtm));
+	memset(&rta, 0, sizeof(rta));
+
+	if (type == RTN_UNICAST)
+		tb = fib_new_table(RT_TABLE_MAIN);
+	else
+		tb = fib_new_table(RT_TABLE_LOCAL);
+
+	if (tb == NULL)
+		return;
+
+	req.nlh.nlmsg_len = sizeof(req);
+	req.nlh.nlmsg_type = cmd;
+	req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND;
+	req.nlh.nlmsg_pid = 0;
+	req.nlh.nlmsg_seq = 0;
+
+	req.rtm.rtm_dst_len = dst_len;
+	req.rtm.rtm_table = tb->tb_id;
+	req.rtm.rtm_protocol = RTPROT_KERNEL;
+	req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST);
+	req.rtm.rtm_type = type;
+
+	rta.rta_dst = &dst;
+	rta.rta_prefsrc = &ifa->ifa_local;
+	rta.rta_oif = &ifa->ifa_dev->dev->ifindex;
+
+	if (cmd == RTM_NEWROUTE)
+		tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
+	else
+		tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
+}
+
+static void fib_add_ifaddr(struct in_ifaddr *ifa)
+{
+	struct in_device *in_dev = ifa->ifa_dev;
+	struct net_device *dev = in_dev->dev;
+	struct in_ifaddr *prim = ifa;
+	u32 mask = ifa->ifa_mask;
+	u32 addr = ifa->ifa_local;
+	u32 prefix = ifa->ifa_address&mask;
+
+	if (ifa->ifa_flags&IFA_F_SECONDARY) {
+		prim = inet_ifa_byprefix(in_dev, prefix, mask);
+		if (prim == NULL) {
+			printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
+			return;
+		}
+	}
+
+	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
+
+	if (!(dev->flags&IFF_UP))
+		return;
+
+	/* Add broadcast address, if it is explicitly assigned. */
+	if (ifa->ifa_broadcast && ifa->ifa_broadcast != 0xFFFFFFFF)
+		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
+
+	if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
+	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
+		fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
+			  RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
+
+		/* Add network specific broadcasts, when it takes a sense */
+		if (ifa->ifa_prefixlen < 31) {
+			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
+			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
+		}
+	}
+}
+
+static void fib_del_ifaddr(struct in_ifaddr *ifa)
+{
+	struct in_device *in_dev = ifa->ifa_dev;
+	struct net_device *dev = in_dev->dev;
+	struct in_ifaddr *ifa1;
+	struct in_ifaddr *prim = ifa;
+	u32 brd = ifa->ifa_address|~ifa->ifa_mask;
+	u32 any = ifa->ifa_address&ifa->ifa_mask;
+#define LOCAL_OK	1
+#define BRD_OK		2
+#define BRD0_OK		4
+#define BRD1_OK		8
+	unsigned ok = 0;
+
+	if (!(ifa->ifa_flags&IFA_F_SECONDARY))
+		fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
+			  RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
+	else {
+		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
+		if (prim == NULL) {
+			printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
+			return;
+		}
+	}
+
+	/* Deletion is more complicated than add.
+	   We should take care of not to delete too much :-)
+
+	   Scan address list to be sure that addresses are really gone.
+	 */
+
+	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
+		if (ifa->ifa_local == ifa1->ifa_local)
+			ok |= LOCAL_OK;
+		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
+			ok |= BRD_OK;
+		if (brd == ifa1->ifa_broadcast)
+			ok |= BRD1_OK;
+		if (any == ifa1->ifa_broadcast)
+			ok |= BRD0_OK;
+	}
+
+	if (!(ok&BRD_OK))
+		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
+	if (!(ok&BRD1_OK))
+		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
+	if (!(ok&BRD0_OK))
+		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
+	if (!(ok&LOCAL_OK)) {
+		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
+
+		/* Check, that this local address finally disappeared. */
+		if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
+			/* And the last, but not the least thing.
+			   We must flush stray FIB entries.
+
+			   First of all, we scan fib_info list searching
+			   for stray nexthop entries, then ignite fib_flush.
+			*/
+			if (fib_sync_down(ifa->ifa_local, NULL, 0))
+				fib_flush();
+		}
+	}
+#undef LOCAL_OK
+#undef BRD_OK
+#undef BRD0_OK
+#undef BRD1_OK
+}
+
+static void fib_disable_ip(struct net_device *dev, int force)
+{
+	if (fib_sync_down(0, dev, force))
+		fib_flush();
+	rt_cache_flush(0);
+	arp_ifdown(dev);
+}
+
+static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
+
+	switch (event) {
+	case NETDEV_UP:
+		fib_add_ifaddr(ifa);
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+		fib_sync_up(ifa->ifa_dev->dev);
+#endif
+		rt_cache_flush(-1);
+		break;
+	case NETDEV_DOWN:
+		fib_del_ifaddr(ifa);
+		if (ifa->ifa_dev && ifa->ifa_dev->ifa_list == NULL) {
+			/* Last address was deleted from this interface.
+			   Disable IP.
+			 */
+			fib_disable_ip(ifa->ifa_dev->dev, 1);
+		} else {
+			rt_cache_flush(-1);
+		}
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	struct net_device *dev = ptr;
+	struct in_device *in_dev = __in_dev_get(dev);
+
+	if (event == NETDEV_UNREGISTER) {
+		fib_disable_ip(dev, 2);
+		return NOTIFY_DONE;
+	}
+
+	if (!in_dev)
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case NETDEV_UP:
+		for_ifa(in_dev) {
+			fib_add_ifaddr(ifa);
+		} endfor_ifa(in_dev);
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+		fib_sync_up(dev);
+#endif
+		rt_cache_flush(-1);
+		break;
+	case NETDEV_DOWN:
+		fib_disable_ip(dev, 0);
+		break;
+	case NETDEV_CHANGEMTU:
+	case NETDEV_CHANGE:
+		rt_cache_flush(0);
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block fib_inetaddr_notifier = {
+	.notifier_call =fib_inetaddr_event,
+};
+
+static struct notifier_block fib_netdev_notifier = {
+	.notifier_call =fib_netdev_event,
+};
+
+void __init ip_fib_init(void)
+{
+#ifndef CONFIG_IP_MULTIPLE_TABLES
+	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
+	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
+#else
+	fib_rules_init();
+#endif
+
+	register_netdevice_notifier(&fib_netdev_notifier);
+	register_inetaddr_notifier(&fib_inetaddr_notifier);
+}
+
+EXPORT_SYMBOL(inet_addr_type);
+EXPORT_SYMBOL(ip_dev_find);
+EXPORT_SYMBOL(ip_rt_ioctl);
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
new file mode 100644
index 00000000000..6506dcc01b4
--- /dev/null
+++ b/net/ipv4/fib_hash.c
@@ -0,0 +1,1086 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		IPv4 FIB: lookup engine and maintenance routines.
+ *
+ * Version:	$Id: fib_hash.c,v 1.13 2001/10/31 21:55:54 davem Exp $
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/errno.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/proc_fs.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/init.h>
+
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/route.h>
+#include <net/tcp.h>
+#include <net/sock.h>
+#include <net/ip_fib.h>
+
+#include "fib_lookup.h"
+
+static kmem_cache_t *fn_hash_kmem;
+static kmem_cache_t *fn_alias_kmem;
+
+struct fib_node {
+	struct hlist_node	fn_hash;
+	struct list_head	fn_alias;
+	u32			fn_key;
+};
+
+struct fn_zone {
+	struct fn_zone		*fz_next;	/* Next not empty zone	*/
+	struct hlist_head	*fz_hash;	/* Hash table pointer	*/
+	int			fz_nent;	/* Number of entries	*/
+
+	int			fz_divisor;	/* Hash divisor		*/
+	u32			fz_hashmask;	/* (fz_divisor - 1)	*/
+#define FZ_HASHMASK(fz)		((fz)->fz_hashmask)
+
+	int			fz_order;	/* Zone order		*/
+	u32			fz_mask;
+#define FZ_MASK(fz)		((fz)->fz_mask)
+};
+
+/* NOTE. On fast computers evaluation of fz_hashmask and fz_mask
+ * can be cheaper than memory lookup, so that FZ_* macros are used.
+ */
+
+struct fn_hash {
+	struct fn_zone	*fn_zones[33];
+	struct fn_zone	*fn_zone_list;
+};
+
+static inline u32 fn_hash(u32 key, struct fn_zone *fz)
+{
+	u32 h = ntohl(key)>>(32 - fz->fz_order);
+	h ^= (h>>20);
+	h ^= (h>>10);
+	h ^= (h>>5);
+	h &= FZ_HASHMASK(fz);
+	return h;
+}
+
+static inline u32 fz_key(u32 dst, struct fn_zone *fz)
+{
+	return dst & FZ_MASK(fz);
+}
+
+static DEFINE_RWLOCK(fib_hash_lock);
+static unsigned int fib_hash_genid;
+
+#define FZ_MAX_DIVISOR ((PAGE_SIZE<<MAX_ORDER) / sizeof(struct hlist_head))
+
+static struct hlist_head *fz_hash_alloc(int divisor)
+{
+	unsigned long size = divisor * sizeof(struct hlist_head);
+
+	if (size <= PAGE_SIZE) {
+		return kmalloc(size, GFP_KERNEL);
+	} else {
+		return (struct hlist_head *)
+			__get_free_pages(GFP_KERNEL, get_order(size));
+	}
+}
+
+/* The fib hash lock must be held when this is called. */
+static inline void fn_rebuild_zone(struct fn_zone *fz,
+				   struct hlist_head *old_ht,
+				   int old_divisor)
+{
+	int i;
+
+	for (i = 0; i < old_divisor; i++) {
+		struct hlist_node *node, *n;
+		struct fib_node *f;
+
+		hlist_for_each_entry_safe(f, node, n, &old_ht[i], fn_hash) {
+			struct hlist_head *new_head;
+
+			hlist_del(&f->fn_hash);
+
+			new_head = &fz->fz_hash[fn_hash(f->fn_key, fz)];
+			hlist_add_head(&f->fn_hash, new_head);
+		}
+	}
+}
+
+static void fz_hash_free(struct hlist_head *hash, int divisor)
+{
+	unsigned long size = divisor * sizeof(struct hlist_head);
+
+	if (size <= PAGE_SIZE)
+		kfree(hash);
+	else
+		free_pages((unsigned long)hash, get_order(size));
+}
+
+static void fn_rehash_zone(struct fn_zone *fz)
+{
+	struct hlist_head *ht, *old_ht;
+	int old_divisor, new_divisor;
+	u32 new_hashmask;
+		
+	old_divisor = fz->fz_divisor;
+
+	switch (old_divisor) {
+	case 16:
+		new_divisor = 256;
+		break;
+	case 256:
+		new_divisor = 1024;
+		break;
+	default:
+		if ((old_divisor << 1) > FZ_MAX_DIVISOR) {
+			printk(KERN_CRIT "route.c: bad divisor %d!\n", old_divisor);
+			return;
+		}
+		new_divisor = (old_divisor << 1);
+		break;
+	}
+
+	new_hashmask = (new_divisor - 1);
+
+#if RT_CACHE_DEBUG >= 2
+	printk("fn_rehash_zone: hash for zone %d grows from %d\n", fz->fz_order, old_divisor);
+#endif
+
+	ht = fz_hash_alloc(new_divisor);
+
+	if (ht)	{
+		memset(ht, 0, new_divisor * sizeof(struct hlist_head));
+
+		write_lock_bh(&fib_hash_lock);
+		old_ht = fz->fz_hash;
+		fz->fz_hash = ht;
+		fz->fz_hashmask = new_hashmask;
+		fz->fz_divisor = new_divisor;
+		fn_rebuild_zone(fz, old_ht, old_divisor);
+		fib_hash_genid++;
+		write_unlock_bh(&fib_hash_lock);
+
+		fz_hash_free(old_ht, old_divisor);
+	}
+}
+
+static inline void fn_free_node(struct fib_node * f)
+{
+	kmem_cache_free(fn_hash_kmem, f);
+}
+
+static inline void fn_free_alias(struct fib_alias *fa)
+{
+	fib_release_info(fa->fa_info);
+	kmem_cache_free(fn_alias_kmem, fa);
+}
+
+static struct fn_zone *
+fn_new_zone(struct fn_hash *table, int z)
+{
+	int i;
+	struct fn_zone *fz = kmalloc(sizeof(struct fn_zone), GFP_KERNEL);
+	if (!fz)
+		return NULL;
+
+	memset(fz, 0, sizeof(struct fn_zone));
+	if (z) {
+		fz->fz_divisor = 16;
+	} else {
+		fz->fz_divisor = 1;
+	}
+	fz->fz_hashmask = (fz->fz_divisor - 1);
+	fz->fz_hash = fz_hash_alloc(fz->fz_divisor);
+	if (!fz->fz_hash) {
+		kfree(fz);
+		return NULL;
+	}
+	memset(fz->fz_hash, 0, fz->fz_divisor * sizeof(struct hlist_head *));
+	fz->fz_order = z;
+	fz->fz_mask = inet_make_mask(z);
+
+	/* Find the first not empty zone with more specific mask */
+	for (i=z+1; i<=32; i++)
+		if (table->fn_zones[i])
+			break;
+	write_lock_bh(&fib_hash_lock);
+	if (i>32) {
+		/* No more specific masks, we are the first. */
+		fz->fz_next = table->fn_zone_list;
+		table->fn_zone_list = fz;
+	} else {
+		fz->fz_next = table->fn_zones[i]->fz_next;
+		table->fn_zones[i]->fz_next = fz;
+	}
+	table->fn_zones[z] = fz;
+	fib_hash_genid++;
+	write_unlock_bh(&fib_hash_lock);
+	return fz;
+}
+
+static int
+fn_hash_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
+{
+	int err;
+	struct fn_zone *fz;
+	struct fn_hash *t = (struct fn_hash*)tb->tb_data;
+
+	read_lock(&fib_hash_lock);
+	for (fz = t->fn_zone_list; fz; fz = fz->fz_next) {
+		struct hlist_head *head;
+		struct hlist_node *node;
+		struct fib_node *f;
+		u32 k = fz_key(flp->fl4_dst, fz);
+
+		head = &fz->fz_hash[fn_hash(k, fz)];
+		hlist_for_each_entry(f, node, head, fn_hash) {
+			if (f->fn_key != k)
+				continue;
+
+			err = fib_semantic_match(&f->fn_alias,
+						 flp, res,
+						 f->fn_key, fz->fz_mask,
+						 fz->fz_order);
+			if (err <= 0)
+				goto out;
+		}
+	}
+	err = 1;
+out:
+	read_unlock(&fib_hash_lock);
+	return err;
+}
+
+static int fn_hash_last_dflt=-1;
+
+static void
+fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
+{
+	int order, last_idx;
+	struct hlist_node *node;
+	struct fib_node *f;
+	struct fib_info *fi = NULL;
+	struct fib_info *last_resort;
+	struct fn_hash *t = (struct fn_hash*)tb->tb_data;
+	struct fn_zone *fz = t->fn_zones[0];
+
+	if (fz == NULL)
+		return;
+
+	last_idx = -1;
+	last_resort = NULL;
+	order = -1;
+
+	read_lock(&fib_hash_lock);
+	hlist_for_each_entry(f, node, &fz->fz_hash[0], fn_hash) {
+		struct fib_alias *fa;
+
+		list_for_each_entry(fa, &f->fn_alias, fa_list) {
+			struct fib_info *next_fi = fa->fa_info;
+
+			if (fa->fa_scope != res->scope ||
+			    fa->fa_type != RTN_UNICAST)
+				continue;
+
+			if (next_fi->fib_priority > res->fi->fib_priority)
+				break;
+			if (!next_fi->fib_nh[0].nh_gw ||
+			    next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
+				continue;
+			fa->fa_state |= FA_S_ACCESSED;
+
+			if (fi == NULL) {
+				if (next_fi != res->fi)
+					break;
+			} else if (!fib_detect_death(fi, order, &last_resort,
+						     &last_idx, &fn_hash_last_dflt)) {
+				if (res->fi)
+					fib_info_put(res->fi);
+				res->fi = fi;
+				atomic_inc(&fi->fib_clntref);
+				fn_hash_last_dflt = order;
+				goto out;
+			}
+			fi = next_fi;
+			order++;
+		}
+	}
+
+	if (order <= 0 || fi == NULL) {
+		fn_hash_last_dflt = -1;
+		goto out;
+	}
+
+	if (!fib_detect_death(fi, order, &last_resort, &last_idx, &fn_hash_last_dflt)) {
+		if (res->fi)
+			fib_info_put(res->fi);
+		res->fi = fi;
+		atomic_inc(&fi->fib_clntref);
+		fn_hash_last_dflt = order;
+		goto out;
+	}
+
+	if (last_idx >= 0) {
+		if (res->fi)
+			fib_info_put(res->fi);
+		res->fi = last_resort;
+		if (last_resort)
+			atomic_inc(&last_resort->fib_clntref);
+	}
+	fn_hash_last_dflt = last_idx;
+out:
+	read_unlock(&fib_hash_lock);
+}
+
+/* Insert node F to FZ. */
+static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f)
+{
+	struct hlist_head *head = &fz->fz_hash[fn_hash(f->fn_key, fz)];
+
+	hlist_add_head(&f->fn_hash, head);
+}
+
+/* Return the node in FZ matching KEY. */
+static struct fib_node *fib_find_node(struct fn_zone *fz, u32 key)
+{
+	struct hlist_head *head = &fz->fz_hash[fn_hash(key, fz)];
+	struct hlist_node *node;
+	struct fib_node *f;
+
+	hlist_for_each_entry(f, node, head, fn_hash) {
+		if (f->fn_key == key)
+			return f;
+	}
+
+	return NULL;
+}
+
+static int
+fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
+	       struct nlmsghdr *n, struct netlink_skb_parms *req)
+{
+	struct fn_hash *table = (struct fn_hash *) tb->tb_data;
+	struct fib_node *new_f, *f;
+	struct fib_alias *fa, *new_fa;
+	struct fn_zone *fz;
+	struct fib_info *fi;
+	int z = r->rtm_dst_len;
+	int type = r->rtm_type;
+	u8 tos = r->rtm_tos;
+	u32 key;
+	int err;
+
+	if (z > 32)
+		return -EINVAL;
+	fz = table->fn_zones[z];
+	if (!fz && !(fz = fn_new_zone(table, z)))
+		return -ENOBUFS;
+
+	key = 0;
+	if (rta->rta_dst) {
+		u32 dst;
+		memcpy(&dst, rta->rta_dst, 4);
+		if (dst & ~FZ_MASK(fz))
+			return -EINVAL;
+		key = fz_key(dst, fz);
+	}
+
+	if  ((fi = fib_create_info(r, rta, n, &err)) == NULL)
+		return err;
+
+	if (fz->fz_nent > (fz->fz_divisor<<1) &&
+	    fz->fz_divisor < FZ_MAX_DIVISOR &&
+	    (z==32 || (1<<z) > fz->fz_divisor))
+		fn_rehash_zone(fz);
+
+	f = fib_find_node(fz, key);
+
+	if (!f)
+		fa = NULL;
+	else
+		fa = fib_find_alias(&f->fn_alias, tos, fi->fib_priority);
+
+	/* Now fa, if non-NULL, points to the first fib alias
+	 * with the same keys [prefix,tos,priority], if such key already
+	 * exists or to the node before which we will insert new one.
+	 *
+	 * If fa is NULL, we will need to allocate a new one and
+	 * insert to the head of f.
+	 *
+	 * If f is NULL, no fib node matched the destination key
+	 * and we need to allocate a new one of those as well.
+	 */
+
+	if (fa && fa->fa_tos == tos &&
+	    fa->fa_info->fib_priority == fi->fib_priority) {
+		struct fib_alias *fa_orig;
+
+		err = -EEXIST;
+		if (n->nlmsg_flags & NLM_F_EXCL)
+			goto out;
+
+		if (n->nlmsg_flags & NLM_F_REPLACE) {
+			struct fib_info *fi_drop;
+			u8 state;
+
+			write_lock_bh(&fib_hash_lock);
+			fi_drop = fa->fa_info;
+			fa->fa_info = fi;
+			fa->fa_type = type;
+			fa->fa_scope = r->rtm_scope;
+			state = fa->fa_state;
+			fa->fa_state &= ~FA_S_ACCESSED;
+			fib_hash_genid++;
+			write_unlock_bh(&fib_hash_lock);
+
+			fib_release_info(fi_drop);
+			if (state & FA_S_ACCESSED)
+				rt_cache_flush(-1);
+			return 0;
+		}
+
+		/* Error if we find a perfect match which
+		 * uses the same scope, type, and nexthop
+		 * information.
+		 */
+		fa_orig = fa;
+		fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
+		list_for_each_entry_continue(fa, &f->fn_alias, fa_list) {
+			if (fa->fa_tos != tos)
+				break;
+			if (fa->fa_info->fib_priority != fi->fib_priority)
+				break;
+			if (fa->fa_type == type &&
+			    fa->fa_scope == r->rtm_scope &&
+			    fa->fa_info == fi)
+				goto out;
+		}
+		if (!(n->nlmsg_flags & NLM_F_APPEND))
+			fa = fa_orig;
+	}
+
+	err = -ENOENT;
+	if (!(n->nlmsg_flags&NLM_F_CREATE))
+		goto out;
+
+	err = -ENOBUFS;
+	new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL);
+	if (new_fa == NULL)
+		goto out;
+
+	new_f = NULL;
+	if (!f) {
+		new_f = kmem_cache_alloc(fn_hash_kmem, SLAB_KERNEL);
+		if (new_f == NULL)
+			goto out_free_new_fa;
+
+		INIT_HLIST_NODE(&new_f->fn_hash);
+		INIT_LIST_HEAD(&new_f->fn_alias);
+		new_f->fn_key = key;
+		f = new_f;
+	}
+
+	new_fa->fa_info = fi;
+	new_fa->fa_tos = tos;
+	new_fa->fa_type = type;
+	new_fa->fa_scope = r->rtm_scope;
+	new_fa->fa_state = 0;
+
+	/*
+	 * Insert new entry to the list.
+	 */
+
+	write_lock_bh(&fib_hash_lock);
+	if (new_f)
+		fib_insert_node(fz, new_f);
+	list_add_tail(&new_fa->fa_list,
+		 (fa ? &fa->fa_list : &f->fn_alias));
+	fib_hash_genid++;
+	write_unlock_bh(&fib_hash_lock);
+
+	if (new_f)
+		fz->fz_nent++;
+	rt_cache_flush(-1);
+
+	rtmsg_fib(RTM_NEWROUTE, key, new_fa, z, tb->tb_id, n, req);
+	return 0;
+
+out_free_new_fa:
+	kmem_cache_free(fn_alias_kmem, new_fa);
+out:
+	fib_release_info(fi);
+	return err;
+}
+
+
+static int
+fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
+	       struct nlmsghdr *n, struct netlink_skb_parms *req)
+{
+	struct fn_hash *table = (struct fn_hash*)tb->tb_data;
+	struct fib_node *f;
+	struct fib_alias *fa, *fa_to_delete;
+	int z = r->rtm_dst_len;
+	struct fn_zone *fz;
+	u32 key;
+	u8 tos = r->rtm_tos;
+
+	if (z > 32)
+		return -EINVAL;
+	if ((fz  = table->fn_zones[z]) == NULL)
+		return -ESRCH;
+
+	key = 0;
+	if (rta->rta_dst) {
+		u32 dst;
+		memcpy(&dst, rta->rta_dst, 4);
+		if (dst & ~FZ_MASK(fz))
+			return -EINVAL;
+		key = fz_key(dst, fz);
+	}
+
+	f = fib_find_node(fz, key);
+
+	if (!f)
+		fa = NULL;
+	else
+		fa = fib_find_alias(&f->fn_alias, tos, 0);
+	if (!fa)
+		return -ESRCH;
+
+	fa_to_delete = NULL;
+	fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
+	list_for_each_entry_continue(fa, &f->fn_alias, fa_list) {
+		struct fib_info *fi = fa->fa_info;
+
+		if (fa->fa_tos != tos)
+			break;
+
+		if ((!r->rtm_type ||
+		     fa->fa_type == r->rtm_type) &&
+		    (r->rtm_scope == RT_SCOPE_NOWHERE ||
+		     fa->fa_scope == r->rtm_scope) &&
+		    (!r->rtm_protocol ||
+		     fi->fib_protocol == r->rtm_protocol) &&
+		    fib_nh_match(r, n, rta, fi) == 0) {
+			fa_to_delete = fa;
+			break;
+		}
+	}
+
+	if (fa_to_delete) {
+		int kill_fn;
+
+		fa = fa_to_delete;
+		rtmsg_fib(RTM_DELROUTE, key, fa, z, tb->tb_id, n, req);
+
+		kill_fn = 0;
+		write_lock_bh(&fib_hash_lock);
+		list_del(&fa->fa_list);
+		if (list_empty(&f->fn_alias)) {
+			hlist_del(&f->fn_hash);
+			kill_fn = 1;
+		}
+		fib_hash_genid++;
+		write_unlock_bh(&fib_hash_lock);
+
+		if (fa->fa_state & FA_S_ACCESSED)
+			rt_cache_flush(-1);
+		fn_free_alias(fa);
+		if (kill_fn) {
+			fn_free_node(f);
+			fz->fz_nent--;
+		}
+
+		return 0;
+	}
+	return -ESRCH;
+}
+
+static int fn_flush_list(struct fn_zone *fz, int idx)
+{
+	struct hlist_head *head = &fz->fz_hash[idx];
+	struct hlist_node *node, *n;
+	struct fib_node *f;
+	int found = 0;
+
+	hlist_for_each_entry_safe(f, node, n, head, fn_hash) {
+		struct fib_alias *fa, *fa_node;
+		int kill_f;
+
+		kill_f = 0;
+		list_for_each_entry_safe(fa, fa_node, &f->fn_alias, fa_list) {
+			struct fib_info *fi = fa->fa_info;
+
+			if (fi && (fi->fib_flags&RTNH_F_DEAD)) {
+				write_lock_bh(&fib_hash_lock);
+				list_del(&fa->fa_list);
+				if (list_empty(&f->fn_alias)) {
+					hlist_del(&f->fn_hash);
+					kill_f = 1;
+				}
+				fib_hash_genid++;
+				write_unlock_bh(&fib_hash_lock);
+
+				fn_free_alias(fa);
+				found++;
+			}
+		}
+		if (kill_f) {
+			fn_free_node(f);
+			fz->fz_nent--;
+		}
+	}
+	return found;
+}
+
+static int fn_hash_flush(struct fib_table *tb)
+{
+	struct fn_hash *table = (struct fn_hash *) tb->tb_data;
+	struct fn_zone *fz;
+	int found = 0;
+
+	for (fz = table->fn_zone_list; fz; fz = fz->fz_next) {
+		int i;
+
+		for (i = fz->fz_divisor - 1; i >= 0; i--)
+			found += fn_flush_list(fz, i);
+	}
+	return found;
+}
+
+
+static inline int
+fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
+		     struct fib_table *tb,
+		     struct fn_zone *fz,
+		     struct hlist_head *head)
+{
+	struct hlist_node *node;
+	struct fib_node *f;
+	int i, s_i;
+
+	s_i = cb->args[3];
+	i = 0;
+	hlist_for_each_entry(f, node, head, fn_hash) {
+		struct fib_alias *fa;
+
+		list_for_each_entry(fa, &f->fn_alias, fa_list) {
+			if (i < s_i)
+				goto next;
+
+			if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
+					  cb->nlh->nlmsg_seq,
+					  RTM_NEWROUTE,
+					  tb->tb_id,
+					  fa->fa_type,
+					  fa->fa_scope,
+					  &f->fn_key,
+					  fz->fz_order,
+					  fa->fa_tos,
+					  fa->fa_info) < 0) {
+				cb->args[3] = i;
+				return -1;
+			}
+		next:
+			i++;
+		}
+	}
+	cb->args[3] = i;
+	return skb->len;
+}
+
+static inline int
+fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb,
+		   struct fib_table *tb,
+		   struct fn_zone *fz)
+{
+	int h, s_h;
+
+	s_h = cb->args[2];
+	for (h=0; h < fz->fz_divisor; h++) {
+		if (h < s_h) continue;
+		if (h > s_h)
+			memset(&cb->args[3], 0,
+			       sizeof(cb->args) - 3*sizeof(cb->args[0]));
+		if (fz->fz_hash == NULL ||
+		    hlist_empty(&fz->fz_hash[h]))
+			continue;
+		if (fn_hash_dump_bucket(skb, cb, tb, fz, &fz->fz_hash[h])<0) {
+			cb->args[2] = h;
+			return -1;
+		}
+	}
+	cb->args[2] = h;
+	return skb->len;
+}
+
+static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int m, s_m;
+	struct fn_zone *fz;
+	struct fn_hash *table = (struct fn_hash*)tb->tb_data;
+
+	s_m = cb->args[1];
+	read_lock(&fib_hash_lock);
+	for (fz = table->fn_zone_list, m=0; fz; fz = fz->fz_next, m++) {
+		if (m < s_m) continue;
+		if (m > s_m)
+			memset(&cb->args[2], 0,
+			       sizeof(cb->args) - 2*sizeof(cb->args[0]));
+		if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) {
+			cb->args[1] = m;
+			read_unlock(&fib_hash_lock);
+			return -1;
+		}
+	}
+	read_unlock(&fib_hash_lock);
+	cb->args[1] = m;
+	return skb->len;
+}
+
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+struct fib_table * fib_hash_init(int id)
+#else
+struct fib_table * __init fib_hash_init(int id)
+#endif
+{
+	struct fib_table *tb;
+
+	if (fn_hash_kmem == NULL)
+		fn_hash_kmem = kmem_cache_create("ip_fib_hash",
+						 sizeof(struct fib_node),
+						 0, SLAB_HWCACHE_ALIGN,
+						 NULL, NULL);
+
+	if (fn_alias_kmem == NULL)
+		fn_alias_kmem = kmem_cache_create("ip_fib_alias",
+						  sizeof(struct fib_alias),
+						  0, SLAB_HWCACHE_ALIGN,
+						  NULL, NULL);
+
+	tb = kmalloc(sizeof(struct fib_table) + sizeof(struct fn_hash),
+		     GFP_KERNEL);
+	if (tb == NULL)
+		return NULL;
+
+	tb->tb_id = id;
+	tb->tb_lookup = fn_hash_lookup;
+	tb->tb_insert = fn_hash_insert;
+	tb->tb_delete = fn_hash_delete;
+	tb->tb_flush = fn_hash_flush;
+	tb->tb_select_default = fn_hash_select_default;
+	tb->tb_dump = fn_hash_dump;
+	memset(tb->tb_data, 0, sizeof(struct fn_hash));
+	return tb;
+}
+
+/* ------------------------------------------------------------------------ */
+#ifdef CONFIG_PROC_FS
+
+struct fib_iter_state {
+	struct fn_zone	*zone;
+	int		bucket;
+	struct hlist_head *hash_head;
+	struct fib_node *fn;
+	struct fib_alias *fa;
+	loff_t pos;
+	unsigned int genid;
+	int valid;
+};
+
+static struct fib_alias *fib_get_first(struct seq_file *seq)
+{
+	struct fib_iter_state *iter = seq->private;
+	struct fn_hash *table = (struct fn_hash *) ip_fib_main_table->tb_data;
+
+	iter->bucket    = 0;
+	iter->hash_head = NULL;
+	iter->fn        = NULL;
+	iter->fa        = NULL;
+	iter->pos	= 0;
+	iter->genid	= fib_hash_genid;
+	iter->valid	= 1;
+
+	for (iter->zone = table->fn_zone_list; iter->zone;
+	     iter->zone = iter->zone->fz_next) {
+		int maxslot;
+
+		if (!iter->zone->fz_nent)
+			continue;
+
+		iter->hash_head = iter->zone->fz_hash;
+		maxslot = iter->zone->fz_divisor;
+
+		for (iter->bucket = 0; iter->bucket < maxslot;
+		     ++iter->bucket, ++iter->hash_head) {
+			struct hlist_node *node;
+			struct fib_node *fn;
+
+			hlist_for_each_entry(fn,node,iter->hash_head,fn_hash) {
+				struct fib_alias *fa;
+
+				list_for_each_entry(fa,&fn->fn_alias,fa_list) {
+					iter->fn = fn;
+					iter->fa = fa;
+					goto out;
+				}
+			}
+		}
+	}
+out:
+	return iter->fa;
+}
+
+static struct fib_alias *fib_get_next(struct seq_file *seq)
+{
+	struct fib_iter_state *iter = seq->private;
+	struct fib_node *fn;
+	struct fib_alias *fa;
+
+	/* Advance FA, if any. */
+	fn = iter->fn;
+	fa = iter->fa;
+	if (fa) {
+		BUG_ON(!fn);
+		list_for_each_entry_continue(fa, &fn->fn_alias, fa_list) {
+			iter->fa = fa;
+			goto out;
+		}
+	}
+
+	fa = iter->fa = NULL;
+
+	/* Advance FN. */
+	if (fn) {
+		struct hlist_node *node = &fn->fn_hash;
+		hlist_for_each_entry_continue(fn, node, fn_hash) {
+			iter->fn = fn;
+
+			list_for_each_entry(fa, &fn->fn_alias, fa_list) {
+				iter->fa = fa;
+				goto out;
+			}
+		}
+	}
+
+	fn = iter->fn = NULL;
+
+	/* Advance hash chain. */
+	if (!iter->zone)
+		goto out;
+
+	for (;;) {
+		struct hlist_node *node;
+		int maxslot;
+
+		maxslot = iter->zone->fz_divisor;
+
+		while (++iter->bucket < maxslot) {
+			iter->hash_head++;
+
+			hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) {
+				list_for_each_entry(fa, &fn->fn_alias, fa_list) {
+					iter->fn = fn;
+					iter->fa = fa;
+					goto out;
+				}
+			}
+		}
+
+		iter->zone = iter->zone->fz_next;
+
+		if (!iter->zone)
+			goto out;
+		
+		iter->bucket = 0;
+		iter->hash_head = iter->zone->fz_hash;
+
+		hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) {
+			list_for_each_entry(fa, &fn->fn_alias, fa_list) {
+				iter->fn = fn;
+				iter->fa = fa;
+				goto out;
+			}
+		}
+	}
+out:
+	iter->pos++;
+	return fa;
+}
+
+static struct fib_alias *fib_get_idx(struct seq_file *seq, loff_t pos)
+{
+	struct fib_iter_state *iter = seq->private;
+	struct fib_alias *fa;
+	
+	if (iter->valid && pos >= iter->pos && iter->genid == fib_hash_genid) {
+		fa   = iter->fa;
+		pos -= iter->pos;
+	} else
+		fa = fib_get_first(seq);
+
+	if (fa)
+		while (pos && (fa = fib_get_next(seq)))
+			--pos;
+	return pos ? NULL : fa;
+}
+
+static void *fib_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	void *v = NULL;
+
+	read_lock(&fib_hash_lock);
+	if (ip_fib_main_table)
+		v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+	return v;
+}
+
+static void *fib_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	++*pos;
+	return v == SEQ_START_TOKEN ? fib_get_first(seq) : fib_get_next(seq);
+}
+
+static void fib_seq_stop(struct seq_file *seq, void *v)
+{
+	read_unlock(&fib_hash_lock);
+}
+
+static unsigned fib_flag_trans(int type, u32 mask, struct fib_info *fi)
+{
+	static unsigned type2flags[RTN_MAX + 1] = {
+		[7] = RTF_REJECT, [8] = RTF_REJECT,
+	};
+	unsigned flags = type2flags[type];
+
+	if (fi && fi->fib_nh->nh_gw)
+		flags |= RTF_GATEWAY;
+	if (mask == 0xFFFFFFFF)
+		flags |= RTF_HOST;
+	flags |= RTF_UP;
+	return flags;
+}
+
+/* 
+ *	This outputs /proc/net/route.
+ *
+ *	It always works in backward compatibility mode.
+ *	The format of the file is not supposed to be changed.
+ */
+static int fib_seq_show(struct seq_file *seq, void *v)
+{
+	struct fib_iter_state *iter;
+	char bf[128];
+	u32 prefix, mask;
+	unsigned flags;
+	struct fib_node *f;
+	struct fib_alias *fa;
+	struct fib_info *fi;
+
+	if (v == SEQ_START_TOKEN) {
+		seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
+			   "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
+			   "\tWindow\tIRTT");
+		goto out;
+	}
+
+	iter	= seq->private;
+	f	= iter->fn;
+	fa	= iter->fa;
+	fi	= fa->fa_info;
+	prefix	= f->fn_key;
+	mask	= FZ_MASK(iter->zone);
+	flags	= fib_flag_trans(fa->fa_type, mask, fi);
+	if (fi)
+		snprintf(bf, sizeof(bf),
+			 "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
+			 fi->fib_dev ? fi->fib_dev->name : "*", prefix,
+			 fi->fib_nh->nh_gw, flags, 0, 0, fi->fib_priority,
+			 mask, (fi->fib_advmss ? fi->fib_advmss + 40 : 0),
+			 fi->fib_window,
+			 fi->fib_rtt >> 3);
+	else
+		snprintf(bf, sizeof(bf),
+			 "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
+			 prefix, 0, flags, 0, 0, 0, mask, 0, 0, 0);
+	seq_printf(seq, "%-127s\n", bf);
+out:
+	return 0;
+}
+
+static struct seq_operations fib_seq_ops = {
+	.start  = fib_seq_start,
+	.next   = fib_seq_next,
+	.stop   = fib_seq_stop,
+	.show   = fib_seq_show,
+};
+
+static int fib_seq_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	int rc = -ENOMEM;
+	struct fib_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+       
+	if (!s)
+		goto out;
+
+	rc = seq_open(file, &fib_seq_ops);
+	if (rc)
+		goto out_kfree;
+
+	seq	     = file->private_data;
+	seq->private = s;
+	memset(s, 0, sizeof(*s));
+out:
+	return rc;
+out_kfree:
+	kfree(s);
+	goto out;
+}
+
+static struct file_operations fib_seq_fops = {
+	.owner		= THIS_MODULE,
+	.open           = fib_seq_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release	= seq_release_private,
+};
+
+int __init fib_proc_init(void)
+{
+	if (!proc_net_fops_create("route", S_IRUGO, &fib_seq_fops))
+		return -ENOMEM;
+	return 0;
+}
+
+void __init fib_proc_exit(void)
+{
+	proc_net_remove("route");
+}
+#endif /* CONFIG_PROC_FS */
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
new file mode 100644
index 00000000000..ac4485f75e9
--- /dev/null
+++ b/net/ipv4/fib_lookup.h
@@ -0,0 +1,43 @@
+#ifndef _FIB_LOOKUP_H
+#define _FIB_LOOKUP_H
+
+#include <linux/types.h>
+#include <linux/list.h>
+#include <net/ip_fib.h>
+
+struct fib_alias {
+	struct list_head	fa_list;
+	struct fib_info		*fa_info;
+	u8			fa_tos;
+	u8			fa_type;
+	u8			fa_scope;
+	u8			fa_state;
+};
+
+#define FA_S_ACCESSED	0x01
+
+/* Exported by fib_semantics.c */
+extern int fib_semantic_match(struct list_head *head,
+			      const struct flowi *flp,
+			      struct fib_result *res, __u32 zone, __u32 mask,
+				int prefixlen);
+extern void fib_release_info(struct fib_info *);
+extern struct fib_info *fib_create_info(const struct rtmsg *r,
+					struct kern_rta *rta,
+					const struct nlmsghdr *,
+					int *err);
+extern int fib_nh_match(struct rtmsg *r, struct nlmsghdr *,
+			struct kern_rta *rta, struct fib_info *fi);
+extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
+			 u8 tb_id, u8 type, u8 scope, void *dst,
+			 int dst_len, u8 tos, struct fib_info *fi);
+extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
+		      int z, int tb_id,
+		      struct nlmsghdr *n, struct netlink_skb_parms *req);
+extern struct fib_alias *fib_find_alias(struct list_head *fah,
+					u8 tos, u32 prio);
+extern int fib_detect_death(struct fib_info *fi, int order,
+			    struct fib_info **last_resort,
+			    int *last_idx, int *dflt);
+
+#endif /* _FIB_LOOKUP_H */
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
new file mode 100644
index 00000000000..39d0aadb9a2
--- /dev/null
+++ b/net/ipv4/fib_rules.c
@@ -0,0 +1,437 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		IPv4 Forwarding Information Base: policy rules.
+ *
+ * Version:	$Id: fib_rules.c,v 1.17 2001/10/31 21:55:54 davem Exp $
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Fixes:
+ * 		Rani Assaf	:	local_rule cannot be deleted
+ *		Marc Boucher	:	routing by fwmark
+ */
+
+#include <linux/config.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/errno.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/proc_fs.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/init.h>
+
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/route.h>
+#include <net/tcp.h>
+#include <net/sock.h>
+#include <net/ip_fib.h>
+
+#define FRprintk(a...)
+
+struct fib_rule
+{
+	struct fib_rule *r_next;
+	atomic_t	r_clntref;
+	u32		r_preference;
+	unsigned char	r_table;
+	unsigned char	r_action;
+	unsigned char	r_dst_len;
+	unsigned char	r_src_len;
+	u32		r_src;
+	u32		r_srcmask;
+	u32		r_dst;
+	u32		r_dstmask;
+	u32		r_srcmap;
+	u8		r_flags;
+	u8		r_tos;
+#ifdef CONFIG_IP_ROUTE_FWMARK
+	u32		r_fwmark;
+#endif
+	int		r_ifindex;
+#ifdef CONFIG_NET_CLS_ROUTE
+	__u32		r_tclassid;
+#endif
+	char		r_ifname[IFNAMSIZ];
+	int		r_dead;
+};
+
+static struct fib_rule default_rule = {
+	.r_clntref =	ATOMIC_INIT(2),
+	.r_preference =	0x7FFF,
+	.r_table =	RT_TABLE_DEFAULT,
+	.r_action =	RTN_UNICAST,
+};
+
+static struct fib_rule main_rule = {
+	.r_next =	&default_rule,
+	.r_clntref =	ATOMIC_INIT(2),
+	.r_preference =	0x7FFE,
+	.r_table =	RT_TABLE_MAIN,
+	.r_action =	RTN_UNICAST,
+};
+
+static struct fib_rule local_rule = {
+	.r_next =	&main_rule,
+	.r_clntref =	ATOMIC_INIT(2),
+	.r_table =	RT_TABLE_LOCAL,
+	.r_action =	RTN_UNICAST,
+};
+
+static struct fib_rule *fib_rules = &local_rule;
+static DEFINE_RWLOCK(fib_rules_lock);
+
+int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+{
+	struct rtattr **rta = arg;
+	struct rtmsg *rtm = NLMSG_DATA(nlh);
+	struct fib_rule *r, **rp;
+	int err = -ESRCH;
+
+	for (rp=&fib_rules; (r=*rp) != NULL; rp=&r->r_next) {
+		if ((!rta[RTA_SRC-1] || memcmp(RTA_DATA(rta[RTA_SRC-1]), &r->r_src, 4) == 0) &&
+		    rtm->rtm_src_len == r->r_src_len &&
+		    rtm->rtm_dst_len == r->r_dst_len &&
+		    (!rta[RTA_DST-1] || memcmp(RTA_DATA(rta[RTA_DST-1]), &r->r_dst, 4) == 0) &&
+		    rtm->rtm_tos == r->r_tos &&
+#ifdef CONFIG_IP_ROUTE_FWMARK
+		    (!rta[RTA_PROTOINFO-1] || memcmp(RTA_DATA(rta[RTA_PROTOINFO-1]), &r->r_fwmark, 4) == 0) &&
+#endif
+		    (!rtm->rtm_type || rtm->rtm_type == r->r_action) &&
+		    (!rta[RTA_PRIORITY-1] || memcmp(RTA_DATA(rta[RTA_PRIORITY-1]), &r->r_preference, 4) == 0) &&
+		    (!rta[RTA_IIF-1] || rtattr_strcmp(rta[RTA_IIF-1], r->r_ifname) == 0) &&
+		    (!rtm->rtm_table || (r && rtm->rtm_table == r->r_table))) {
+			err = -EPERM;
+			if (r == &local_rule)
+				break;
+
+			write_lock_bh(&fib_rules_lock);
+			*rp = r->r_next;
+			r->r_dead = 1;
+			write_unlock_bh(&fib_rules_lock);
+			fib_rule_put(r);
+			err = 0;
+			break;
+		}
+	}
+	return err;
+}
+
+/* Allocate new unique table id */
+
+static struct fib_table *fib_empty_table(void)
+{
+	int id;
+
+	for (id = 1; id <= RT_TABLE_MAX; id++)
+		if (fib_tables[id] == NULL)
+			return __fib_new_table(id);
+	return NULL;
+}
+
+void fib_rule_put(struct fib_rule *r)
+{
+	if (atomic_dec_and_test(&r->r_clntref)) {
+		if (r->r_dead)
+			kfree(r);
+		else
+			printk("Freeing alive rule %p\n", r);
+	}
+}
+
+int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+{
+	struct rtattr **rta = arg;
+	struct rtmsg *rtm = NLMSG_DATA(nlh);
+	struct fib_rule *r, *new_r, **rp;
+	unsigned char table_id;
+
+	if (rtm->rtm_src_len > 32 || rtm->rtm_dst_len > 32 ||
+	    (rtm->rtm_tos & ~IPTOS_TOS_MASK))
+		return -EINVAL;
+
+	if (rta[RTA_IIF-1] && RTA_PAYLOAD(rta[RTA_IIF-1]) > IFNAMSIZ)
+		return -EINVAL;
+
+	table_id = rtm->rtm_table;
+	if (table_id == RT_TABLE_UNSPEC) {
+		struct fib_table *table;
+		if (rtm->rtm_type == RTN_UNICAST) {
+			if ((table = fib_empty_table()) == NULL)
+				return -ENOBUFS;
+			table_id = table->tb_id;
+		}
+	}
+
+	new_r = kmalloc(sizeof(*new_r), GFP_KERNEL);
+	if (!new_r)
+		return -ENOMEM;
+	memset(new_r, 0, sizeof(*new_r));
+	if (rta[RTA_SRC-1])
+		memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 4);
+	if (rta[RTA_DST-1])
+		memcpy(&new_r->r_dst, RTA_DATA(rta[RTA_DST-1]), 4);
+	if (rta[RTA_GATEWAY-1])
+		memcpy(&new_r->r_srcmap, RTA_DATA(rta[RTA_GATEWAY-1]), 4);
+	new_r->r_src_len = rtm->rtm_src_len;
+	new_r->r_dst_len = rtm->rtm_dst_len;
+	new_r->r_srcmask = inet_make_mask(rtm->rtm_src_len);
+	new_r->r_dstmask = inet_make_mask(rtm->rtm_dst_len);
+	new_r->r_tos = rtm->rtm_tos;
+#ifdef CONFIG_IP_ROUTE_FWMARK
+	if (rta[RTA_PROTOINFO-1])
+		memcpy(&new_r->r_fwmark, RTA_DATA(rta[RTA_PROTOINFO-1]), 4);
+#endif
+	new_r->r_action = rtm->rtm_type;
+	new_r->r_flags = rtm->rtm_flags;
+	if (rta[RTA_PRIORITY-1])
+		memcpy(&new_r->r_preference, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
+	new_r->r_table = table_id;
+	if (rta[RTA_IIF-1]) {
+		struct net_device *dev;
+		rtattr_strlcpy(new_r->r_ifname, rta[RTA_IIF-1], IFNAMSIZ);
+		new_r->r_ifindex = -1;
+		dev = __dev_get_by_name(new_r->r_ifname);
+		if (dev)
+			new_r->r_ifindex = dev->ifindex;
+	}
+#ifdef CONFIG_NET_CLS_ROUTE
+	if (rta[RTA_FLOW-1])
+		memcpy(&new_r->r_tclassid, RTA_DATA(rta[RTA_FLOW-1]), 4);
+#endif
+
+	rp = &fib_rules;
+	if (!new_r->r_preference) {
+		r = fib_rules;
+		if (r && (r = r->r_next) != NULL) {
+			rp = &fib_rules->r_next;
+			if (r->r_preference)
+				new_r->r_preference = r->r_preference - 1;
+		}
+	}
+
+	while ( (r = *rp) != NULL ) {
+		if (r->r_preference > new_r->r_preference)
+			break;
+		rp = &r->r_next;
+	}
+
+	new_r->r_next = r;
+	atomic_inc(&new_r->r_clntref);
+	write_lock_bh(&fib_rules_lock);
+	*rp = new_r;
+	write_unlock_bh(&fib_rules_lock);
+	return 0;
+}
+
+#ifdef CONFIG_NET_CLS_ROUTE
+u32 fib_rules_tclass(struct fib_result *res)
+{
+	if (res->r)
+		return res->r->r_tclassid;
+	return 0;
+}
+#endif
+
+
+static void fib_rules_detach(struct net_device *dev)
+{
+	struct fib_rule *r;
+
+	for (r=fib_rules; r; r=r->r_next) {
+		if (r->r_ifindex == dev->ifindex) {
+			write_lock_bh(&fib_rules_lock);
+			r->r_ifindex = -1;
+			write_unlock_bh(&fib_rules_lock);
+		}
+	}
+}
+
+static void fib_rules_attach(struct net_device *dev)
+{
+	struct fib_rule *r;
+
+	for (r=fib_rules; r; r=r->r_next) {
+		if (r->r_ifindex == -1 && strcmp(dev->name, r->r_ifname) == 0) {
+			write_lock_bh(&fib_rules_lock);
+			r->r_ifindex = dev->ifindex;
+			write_unlock_bh(&fib_rules_lock);
+		}
+	}
+}
+
+int fib_lookup(const struct flowi *flp, struct fib_result *res)
+{
+	int err;
+	struct fib_rule *r, *policy;
+	struct fib_table *tb;
+
+	u32 daddr = flp->fl4_dst;
+	u32 saddr = flp->fl4_src;
+
+FRprintk("Lookup: %u.%u.%u.%u <- %u.%u.%u.%u ",
+	NIPQUAD(flp->fl4_dst), NIPQUAD(flp->fl4_src));
+	read_lock(&fib_rules_lock);
+	for (r = fib_rules; r; r=r->r_next) {
+		if (((saddr^r->r_src) & r->r_srcmask) ||
+		    ((daddr^r->r_dst) & r->r_dstmask) ||
+		    (r->r_tos && r->r_tos != flp->fl4_tos) ||
+#ifdef CONFIG_IP_ROUTE_FWMARK
+		    (r->r_fwmark && r->r_fwmark != flp->fl4_fwmark) ||
+#endif
+		    (r->r_ifindex && r->r_ifindex != flp->iif))
+			continue;
+
+FRprintk("tb %d r %d ", r->r_table, r->r_action);
+		switch (r->r_action) {
+		case RTN_UNICAST:
+			policy = r;
+			break;
+		case RTN_UNREACHABLE:
+			read_unlock(&fib_rules_lock);
+			return -ENETUNREACH;
+		default:
+		case RTN_BLACKHOLE:
+			read_unlock(&fib_rules_lock);
+			return -EINVAL;
+		case RTN_PROHIBIT:
+			read_unlock(&fib_rules_lock);
+			return -EACCES;
+		}
+
+		if ((tb = fib_get_table(r->r_table)) == NULL)
+			continue;
+		err = tb->tb_lookup(tb, flp, res);
+		if (err == 0) {
+			res->r = policy;
+			if (policy)
+				atomic_inc(&policy->r_clntref);
+			read_unlock(&fib_rules_lock);
+			return 0;
+		}
+		if (err < 0 && err != -EAGAIN) {
+			read_unlock(&fib_rules_lock);
+			return err;
+		}
+	}
+FRprintk("FAILURE\n");
+	read_unlock(&fib_rules_lock);
+	return -ENETUNREACH;
+}
+
+void fib_select_default(const struct flowi *flp, struct fib_result *res)
+{
+	if (res->r && res->r->r_action == RTN_UNICAST &&
+	    FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) {
+		struct fib_table *tb;
+		if ((tb = fib_get_table(res->r->r_table)) != NULL)
+			tb->tb_select_default(tb, flp, res);
+	}
+}
+
+static int fib_rules_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	struct net_device *dev = ptr;
+
+	if (event == NETDEV_UNREGISTER)
+		fib_rules_detach(dev);
+	else if (event == NETDEV_REGISTER)
+		fib_rules_attach(dev);
+	return NOTIFY_DONE;
+}
+
+
+static struct notifier_block fib_rules_notifier = {
+	.notifier_call =fib_rules_event,
+};
+
+static __inline__ int inet_fill_rule(struct sk_buff *skb,
+				     struct fib_rule *r,
+				     struct netlink_callback *cb)
+{
+	struct rtmsg *rtm;
+	struct nlmsghdr  *nlh;
+	unsigned char	 *b = skb->tail;
+
+	nlh = NLMSG_PUT(skb, NETLINK_CREDS(cb->skb)->pid, cb->nlh->nlmsg_seq, RTM_NEWRULE, sizeof(*rtm));
+	rtm = NLMSG_DATA(nlh);
+	rtm->rtm_family = AF_INET;
+	rtm->rtm_dst_len = r->r_dst_len;
+	rtm->rtm_src_len = r->r_src_len;
+	rtm->rtm_tos = r->r_tos;
+#ifdef CONFIG_IP_ROUTE_FWMARK
+	if (r->r_fwmark)
+		RTA_PUT(skb, RTA_PROTOINFO, 4, &r->r_fwmark);
+#endif
+	rtm->rtm_table = r->r_table;
+	rtm->rtm_protocol = 0;
+	rtm->rtm_scope = 0;
+	rtm->rtm_type = r->r_action;
+	rtm->rtm_flags = r->r_flags;
+
+	if (r->r_dst_len)
+		RTA_PUT(skb, RTA_DST, 4, &r->r_dst);
+	if (r->r_src_len)
+		RTA_PUT(skb, RTA_SRC, 4, &r->r_src);
+	if (r->r_ifname[0])
+		RTA_PUT(skb, RTA_IIF, IFNAMSIZ, &r->r_ifname);
+	if (r->r_preference)
+		RTA_PUT(skb, RTA_PRIORITY, 4, &r->r_preference);
+	if (r->r_srcmap)
+		RTA_PUT(skb, RTA_GATEWAY, 4, &r->r_srcmap);
+#ifdef CONFIG_NET_CLS_ROUTE
+	if (r->r_tclassid)
+		RTA_PUT(skb, RTA_FLOW, 4, &r->r_tclassid);
+#endif
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int idx;
+	int s_idx = cb->args[0];
+	struct fib_rule *r;
+
+	read_lock(&fib_rules_lock);
+	for (r=fib_rules, idx=0; r; r = r->r_next, idx++) {
+		if (idx < s_idx)
+			continue;
+		if (inet_fill_rule(skb, r, cb) < 0)
+			break;
+	}
+	read_unlock(&fib_rules_lock);
+	cb->args[0] = idx;
+
+	return skb->len;
+}
+
+void __init fib_rules_init(void)
+{
+	register_netdevice_notifier(&fib_rules_notifier);
+}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
new file mode 100644
index 00000000000..029362d6613
--- /dev/null
+++ b/net/ipv4/fib_semantics.c
@@ -0,0 +1,1332 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		IPv4 Forwarding Information Base: semantics.
+ *
+ * Version:	$Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/errno.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/proc_fs.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/init.h>
+
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/route.h>
+#include <net/tcp.h>
+#include <net/sock.h>
+#include <net/ip_fib.h>
+#include <net/ip_mp_alg.h>
+
+#include "fib_lookup.h"
+
+#define FSprintk(a...)
+
+static DEFINE_RWLOCK(fib_info_lock);
+static struct hlist_head *fib_info_hash;
+static struct hlist_head *fib_info_laddrhash;
+static unsigned int fib_hash_size;
+static unsigned int fib_info_cnt;
+
+#define DEVINDEX_HASHBITS 8
+#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
+static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
+
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+
+static DEFINE_SPINLOCK(fib_multipath_lock);
+
+#define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
+for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
+
+#define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
+for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
+
+#else /* CONFIG_IP_ROUTE_MULTIPATH */
+
+/* Hope, that gcc will optimize it to get rid of dummy loop */
+
+#define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
+for (nhsel=0; nhsel < 1; nhsel++)
+
+#define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
+for (nhsel=0; nhsel < 1; nhsel++)
+
+#endif /* CONFIG_IP_ROUTE_MULTIPATH */
+
+#define endfor_nexthops(fi) }
+
+
+static struct 
+{
+	int	error;
+	u8	scope;
+} fib_props[RTA_MAX + 1] = {
+        {
+		.error	= 0,
+		.scope	= RT_SCOPE_NOWHERE,
+	},	/* RTN_UNSPEC */
+	{
+		.error	= 0,
+		.scope	= RT_SCOPE_UNIVERSE,
+	},	/* RTN_UNICAST */
+	{
+		.error	= 0,
+		.scope	= RT_SCOPE_HOST,
+	},	/* RTN_LOCAL */
+	{
+		.error	= 0,
+		.scope	= RT_SCOPE_LINK,
+	},	/* RTN_BROADCAST */
+	{
+		.error	= 0,
+		.scope	= RT_SCOPE_LINK,
+	},	/* RTN_ANYCAST */
+	{
+		.error	= 0,
+		.scope	= RT_SCOPE_UNIVERSE,
+	},	/* RTN_MULTICAST */
+	{
+		.error	= -EINVAL,
+		.scope	= RT_SCOPE_UNIVERSE,
+	},	/* RTN_BLACKHOLE */
+	{
+		.error	= -EHOSTUNREACH,
+		.scope	= RT_SCOPE_UNIVERSE,
+	},	/* RTN_UNREACHABLE */
+	{
+		.error	= -EACCES,
+		.scope	= RT_SCOPE_UNIVERSE,
+	},	/* RTN_PROHIBIT */
+	{
+		.error	= -EAGAIN,
+		.scope	= RT_SCOPE_UNIVERSE,
+	},	/* RTN_THROW */
+	{
+		.error	= -EINVAL,
+		.scope	= RT_SCOPE_NOWHERE,
+	},	/* RTN_NAT */
+	{
+		.error	= -EINVAL,
+		.scope	= RT_SCOPE_NOWHERE,
+	},	/* RTN_XRESOLVE */
+};
+
+
+/* Release a nexthop info record */
+
+void free_fib_info(struct fib_info *fi)
+{
+	if (fi->fib_dead == 0) {
+		printk("Freeing alive fib_info %p\n", fi);
+		return;
+	}
+	change_nexthops(fi) {
+		if (nh->nh_dev)
+			dev_put(nh->nh_dev);
+		nh->nh_dev = NULL;
+	} endfor_nexthops(fi);
+	fib_info_cnt--;
+	kfree(fi);
+}
+
+void fib_release_info(struct fib_info *fi)
+{
+	write_lock(&fib_info_lock);
+	if (fi && --fi->fib_treeref == 0) {
+		hlist_del(&fi->fib_hash);
+		if (fi->fib_prefsrc)
+			hlist_del(&fi->fib_lhash);
+		change_nexthops(fi) {
+			if (!nh->nh_dev)
+				continue;
+			hlist_del(&nh->nh_hash);
+		} endfor_nexthops(fi)
+		fi->fib_dead = 1;
+		fib_info_put(fi);
+	}
+	write_unlock(&fib_info_lock);
+}
+
+static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
+{
+	const struct fib_nh *onh = ofi->fib_nh;
+
+	for_nexthops(fi) {
+		if (nh->nh_oif != onh->nh_oif ||
+		    nh->nh_gw  != onh->nh_gw ||
+		    nh->nh_scope != onh->nh_scope ||
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+		    nh->nh_weight != onh->nh_weight ||
+#endif
+#ifdef CONFIG_NET_CLS_ROUTE
+		    nh->nh_tclassid != onh->nh_tclassid ||
+#endif
+		    ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
+			return -1;
+		onh++;
+	} endfor_nexthops(fi);
+	return 0;
+}
+
+static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
+{
+	unsigned int mask = (fib_hash_size - 1);
+	unsigned int val = fi->fib_nhs;
+
+	val ^= fi->fib_protocol;
+	val ^= fi->fib_prefsrc;
+	val ^= fi->fib_priority;
+
+	return (val ^ (val >> 7) ^ (val >> 12)) & mask;
+}
+
+static struct fib_info *fib_find_info(const struct fib_info *nfi)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct fib_info *fi;
+	unsigned int hash;
+
+	hash = fib_info_hashfn(nfi);
+	head = &fib_info_hash[hash];
+
+	hlist_for_each_entry(fi, node, head, fib_hash) {
+		if (fi->fib_nhs != nfi->fib_nhs)
+			continue;
+		if (nfi->fib_protocol == fi->fib_protocol &&
+		    nfi->fib_prefsrc == fi->fib_prefsrc &&
+		    nfi->fib_priority == fi->fib_priority &&
+		    memcmp(nfi->fib_metrics, fi->fib_metrics,
+			   sizeof(fi->fib_metrics)) == 0 &&
+		    ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
+		    (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
+			return fi;
+	}
+
+	return NULL;
+}
+
+static inline unsigned int fib_devindex_hashfn(unsigned int val)
+{
+	unsigned int mask = DEVINDEX_HASHSIZE - 1;
+
+	return (val ^
+		(val >> DEVINDEX_HASHBITS) ^
+		(val >> (DEVINDEX_HASHBITS * 2))) & mask;
+}
+
+/* Check, that the gateway is already configured.
+   Used only by redirect accept routine.
+ */
+
+int ip_fib_check_default(u32 gw, struct net_device *dev)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct fib_nh *nh;
+	unsigned int hash;
+
+	read_lock(&fib_info_lock);
+
+	hash = fib_devindex_hashfn(dev->ifindex);
+	head = &fib_info_devhash[hash];
+	hlist_for_each_entry(nh, node, head, nh_hash) {
+		if (nh->nh_dev == dev &&
+		    nh->nh_gw == gw &&
+		    !(nh->nh_flags&RTNH_F_DEAD)) {
+			read_unlock(&fib_info_lock);
+			return 0;
+		}
+	}
+
+	read_unlock(&fib_info_lock);
+
+	return -1;
+}
+
+void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
+	       int z, int tb_id,
+	       struct nlmsghdr *n, struct netlink_skb_parms *req)
+{
+	struct sk_buff *skb;
+	u32 pid = req ? req->pid : 0;
+	int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
+
+	skb = alloc_skb(size, GFP_KERNEL);
+	if (!skb)
+		return;
+
+	if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
+			  fa->fa_type, fa->fa_scope, &key, z,
+			  fa->fa_tos,
+			  fa->fa_info) < 0) {
+		kfree_skb(skb);
+		return;
+	}
+	NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_ROUTE;
+	if (n->nlmsg_flags&NLM_F_ECHO)
+		atomic_inc(&skb->users);
+	netlink_broadcast(rtnl, skb, pid, RTMGRP_IPV4_ROUTE, GFP_KERNEL);
+	if (n->nlmsg_flags&NLM_F_ECHO)
+		netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
+}
+
+/* Return the first fib alias matching TOS with
+ * priority less than or equal to PRIO.
+ */
+struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
+{
+	if (fah) {
+		struct fib_alias *fa;
+		list_for_each_entry(fa, fah, fa_list) {
+			if (fa->fa_tos > tos)
+				continue;
+			if (fa->fa_info->fib_priority >= prio ||
+			    fa->fa_tos < tos)
+				return fa;
+		}
+	}
+	return NULL;
+}
+
+int fib_detect_death(struct fib_info *fi, int order,
+		     struct fib_info **last_resort, int *last_idx, int *dflt)
+{
+	struct neighbour *n;
+	int state = NUD_NONE;
+
+	n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
+	if (n) {
+		state = n->nud_state;
+		neigh_release(n);
+	}
+	if (state==NUD_REACHABLE)
+		return 0;
+	if ((state&NUD_VALID) && order != *dflt)
+		return 0;
+	if ((state&NUD_VALID) ||
+	    (*last_idx<0 && order > *dflt)) {
+		*last_resort = fi;
+		*last_idx = order;
+	}
+	return 1;
+}
+
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+
+static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type)
+{
+	while (RTA_OK(attr,attrlen)) {
+		if (attr->rta_type == type)
+			return *(u32*)RTA_DATA(attr);
+		attr = RTA_NEXT(attr, attrlen);
+	}
+	return 0;
+}
+
+static int
+fib_count_nexthops(struct rtattr *rta)
+{
+	int nhs = 0;
+	struct rtnexthop *nhp = RTA_DATA(rta);
+	int nhlen = RTA_PAYLOAD(rta);
+
+	while (nhlen >= (int)sizeof(struct rtnexthop)) {
+		if ((nhlen -= nhp->rtnh_len) < 0)
+			return 0;
+		nhs++;
+		nhp = RTNH_NEXT(nhp);
+	};
+	return nhs;
+}
+
+static int
+fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
+{
+	struct rtnexthop *nhp = RTA_DATA(rta);
+	int nhlen = RTA_PAYLOAD(rta);
+
+	change_nexthops(fi) {
+		int attrlen = nhlen - sizeof(struct rtnexthop);
+		if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
+			return -EINVAL;
+		nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
+		nh->nh_oif = nhp->rtnh_ifindex;
+		nh->nh_weight = nhp->rtnh_hops + 1;
+		if (attrlen) {
+			nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
+#ifdef CONFIG_NET_CLS_ROUTE
+			nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
+#endif
+		}
+		nhp = RTNH_NEXT(nhp);
+	} endfor_nexthops(fi);
+	return 0;
+}
+
+#endif
+
+int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
+		 struct fib_info *fi)
+{
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+	struct rtnexthop *nhp;
+	int nhlen;
+#endif
+
+	if (rta->rta_priority &&
+	    *rta->rta_priority != fi->fib_priority)
+		return 1;
+
+	if (rta->rta_oif || rta->rta_gw) {
+		if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
+		    (!rta->rta_gw  || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0))
+			return 0;
+		return 1;
+	}
+
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+	if (rta->rta_mp == NULL)
+		return 0;
+	nhp = RTA_DATA(rta->rta_mp);
+	nhlen = RTA_PAYLOAD(rta->rta_mp);
+	
+	for_nexthops(fi) {
+		int attrlen = nhlen - sizeof(struct rtnexthop);
+		u32 gw;
+
+		if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
+			return -EINVAL;
+		if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
+			return 1;
+		if (attrlen) {
+			gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
+			if (gw && gw != nh->nh_gw)
+				return 1;
+#ifdef CONFIG_NET_CLS_ROUTE
+			gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
+			if (gw && gw != nh->nh_tclassid)
+				return 1;
+#endif
+		}
+		nhp = RTNH_NEXT(nhp);
+	} endfor_nexthops(fi);
+#endif
+	return 0;
+}
+
+
+/*
+   Picture
+   -------
+
+   Semantics of nexthop is very messy by historical reasons.
+   We have to take into account, that:
+   a) gateway can be actually local interface address,
+      so that gatewayed route is direct.
+   b) gateway must be on-link address, possibly
+      described not by an ifaddr, but also by a direct route.
+   c) If both gateway and interface are specified, they should not
+      contradict.
+   d) If we use tunnel routes, gateway could be not on-link.
+
+   Attempt to reconcile all of these (alas, self-contradictory) conditions
+   results in pretty ugly and hairy code with obscure logic.
+
+   I chose to generalized it instead, so that the size
+   of code does not increase practically, but it becomes
+   much more general.
+   Every prefix is assigned a "scope" value: "host" is local address,
+   "link" is direct route,
+   [ ... "site" ... "interior" ... ]
+   and "universe" is true gateway route with global meaning.
+
+   Every prefix refers to a set of "nexthop"s (gw, oif),
+   where gw must have narrower scope. This recursion stops
+   when gw has LOCAL scope or if "nexthop" is declared ONLINK,
+   which means that gw is forced to be on link.
+
+   Code is still hairy, but now it is apparently logically
+   consistent and very flexible. F.e. as by-product it allows
+   to co-exists in peace independent exterior and interior
+   routing processes.
+
+   Normally it looks as following.
+
+   {universe prefix}  -> (gw, oif) [scope link]
+                          |
+			  |-> {link prefix} -> (gw, oif) [scope local]
+			                        |
+						|-> {local prefix} (terminal node)
+ */
+
+static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh)
+{
+	int err;
+
+	if (nh->nh_gw) {
+		struct fib_result res;
+
+#ifdef CONFIG_IP_ROUTE_PERVASIVE
+		if (nh->nh_flags&RTNH_F_PERVASIVE)
+			return 0;
+#endif
+		if (nh->nh_flags&RTNH_F_ONLINK) {
+			struct net_device *dev;
+
+			if (r->rtm_scope >= RT_SCOPE_LINK)
+				return -EINVAL;
+			if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
+				return -EINVAL;
+			if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
+				return -ENODEV;
+			if (!(dev->flags&IFF_UP))
+				return -ENETDOWN;
+			nh->nh_dev = dev;
+			dev_hold(dev);
+			nh->nh_scope = RT_SCOPE_LINK;
+			return 0;
+		}
+		{
+			struct flowi fl = { .nl_u = { .ip4_u =
+						      { .daddr = nh->nh_gw,
+							.scope = r->rtm_scope + 1 } },
+					    .oif = nh->nh_oif };
+
+			/* It is not necessary, but requires a bit of thinking */
+			if (fl.fl4_scope < RT_SCOPE_LINK)
+				fl.fl4_scope = RT_SCOPE_LINK;
+			if ((err = fib_lookup(&fl, &res)) != 0)
+				return err;
+		}
+		err = -EINVAL;
+		if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
+			goto out;
+		nh->nh_scope = res.scope;
+		nh->nh_oif = FIB_RES_OIF(res);
+		if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
+			goto out;
+		dev_hold(nh->nh_dev);
+		err = -ENETDOWN;
+		if (!(nh->nh_dev->flags & IFF_UP))
+			goto out;
+		err = 0;
+out:
+		fib_res_put(&res);
+		return err;
+	} else {
+		struct in_device *in_dev;
+
+		if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
+			return -EINVAL;
+
+		in_dev = inetdev_by_index(nh->nh_oif);
+		if (in_dev == NULL)
+			return -ENODEV;
+		if (!(in_dev->dev->flags&IFF_UP)) {
+			in_dev_put(in_dev);
+			return -ENETDOWN;
+		}
+		nh->nh_dev = in_dev->dev;
+		dev_hold(nh->nh_dev);
+		nh->nh_scope = RT_SCOPE_HOST;
+		in_dev_put(in_dev);
+	}
+	return 0;
+}
+
+static inline unsigned int fib_laddr_hashfn(u32 val)
+{
+	unsigned int mask = (fib_hash_size - 1);
+
+	return (val ^ (val >> 7) ^ (val >> 14)) & mask;
+}
+
+static struct hlist_head *fib_hash_alloc(int bytes)
+{
+	if (bytes <= PAGE_SIZE)
+		return kmalloc(bytes, GFP_KERNEL);
+	else
+		return (struct hlist_head *)
+			__get_free_pages(GFP_KERNEL, get_order(bytes));
+}
+
+static void fib_hash_free(struct hlist_head *hash, int bytes)
+{
+	if (!hash)
+		return;
+
+	if (bytes <= PAGE_SIZE)
+		kfree(hash);
+	else
+		free_pages((unsigned long) hash, get_order(bytes));
+}
+
+static void fib_hash_move(struct hlist_head *new_info_hash,
+			  struct hlist_head *new_laddrhash,
+			  unsigned int new_size)
+{
+	unsigned int old_size = fib_hash_size;
+	unsigned int i;
+
+	write_lock(&fib_info_lock);
+	fib_hash_size = new_size;
+
+	for (i = 0; i < old_size; i++) {
+		struct hlist_head *head = &fib_info_hash[i];
+		struct hlist_node *node, *n;
+		struct fib_info *fi;
+
+		hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
+			struct hlist_head *dest;
+			unsigned int new_hash;
+
+			hlist_del(&fi->fib_hash);
+
+			new_hash = fib_info_hashfn(fi);
+			dest = &new_info_hash[new_hash];
+			hlist_add_head(&fi->fib_hash, dest);
+		}
+	}
+	fib_info_hash = new_info_hash;
+
+	for (i = 0; i < old_size; i++) {
+		struct hlist_head *lhead = &fib_info_laddrhash[i];
+		struct hlist_node *node, *n;
+		struct fib_info *fi;
+
+		hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
+			struct hlist_head *ldest;
+			unsigned int new_hash;
+
+			hlist_del(&fi->fib_lhash);
+
+			new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
+			ldest = &new_laddrhash[new_hash];
+			hlist_add_head(&fi->fib_lhash, ldest);
+		}
+	}
+	fib_info_laddrhash = new_laddrhash;
+
+	write_unlock(&fib_info_lock);
+}
+
+struct fib_info *
+fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
+		const struct nlmsghdr *nlh, int *errp)
+{
+	int err;
+	struct fib_info *fi = NULL;
+	struct fib_info *ofi;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+	int nhs = 1;
+#else
+	const int nhs = 1;
+#endif
+#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+	u32 mp_alg = IP_MP_ALG_NONE;
+#endif
+
+	/* Fast check to catch the most weird cases */
+	if (fib_props[r->rtm_type].scope > r->rtm_scope)
+		goto err_inval;
+
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+	if (rta->rta_mp) {
+		nhs = fib_count_nexthops(rta->rta_mp);
+		if (nhs == 0)
+			goto err_inval;
+	}
+#endif
+#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+	if (rta->rta_mp_alg) {
+		mp_alg = *rta->rta_mp_alg;
+
+		if (mp_alg < IP_MP_ALG_NONE ||
+		    mp_alg > IP_MP_ALG_MAX)
+			goto err_inval;
+	}
+#endif
+
+	err = -ENOBUFS;
+	if (fib_info_cnt >= fib_hash_size) {
+		unsigned int new_size = fib_hash_size << 1;
+		struct hlist_head *new_info_hash;
+		struct hlist_head *new_laddrhash;
+		unsigned int bytes;
+
+		if (!new_size)
+			new_size = 1;
+		bytes = new_size * sizeof(struct hlist_head *);
+		new_info_hash = fib_hash_alloc(bytes);
+		new_laddrhash = fib_hash_alloc(bytes);
+		if (!new_info_hash || !new_laddrhash) {
+			fib_hash_free(new_info_hash, bytes);
+			fib_hash_free(new_laddrhash, bytes);
+		} else {
+			memset(new_info_hash, 0, bytes);
+			memset(new_laddrhash, 0, bytes);
+
+			fib_hash_move(new_info_hash, new_laddrhash, new_size);
+		}
+
+		if (!fib_hash_size)
+			goto failure;
+	}
+
+	fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
+	if (fi == NULL)
+		goto failure;
+	fib_info_cnt++;
+	memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh));
+
+	fi->fib_protocol = r->rtm_protocol;
+
+	fi->fib_nhs = nhs;
+	change_nexthops(fi) {
+		nh->nh_parent = fi;
+	} endfor_nexthops(fi)
+
+	fi->fib_flags = r->rtm_flags;
+	if (rta->rta_priority)
+		fi->fib_priority = *rta->rta_priority;
+	if (rta->rta_mx) {
+		int attrlen = RTA_PAYLOAD(rta->rta_mx);
+		struct rtattr *attr = RTA_DATA(rta->rta_mx);
+
+		while (RTA_OK(attr, attrlen)) {
+			unsigned flavor = attr->rta_type;
+			if (flavor) {
+				if (flavor > RTAX_MAX)
+					goto err_inval;
+				fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr);
+			}
+			attr = RTA_NEXT(attr, attrlen);
+		}
+	}
+	if (rta->rta_prefsrc)
+		memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
+
+	if (rta->rta_mp) {
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+		if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0)
+			goto failure;
+		if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
+			goto err_inval;
+		if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4))
+			goto err_inval;
+#ifdef CONFIG_NET_CLS_ROUTE
+		if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4))
+			goto err_inval;
+#endif
+#else
+		goto err_inval;
+#endif
+	} else {
+		struct fib_nh *nh = fi->fib_nh;
+		if (rta->rta_oif)
+			nh->nh_oif = *rta->rta_oif;
+		if (rta->rta_gw)
+			memcpy(&nh->nh_gw, rta->rta_gw, 4);
+#ifdef CONFIG_NET_CLS_ROUTE
+		if (rta->rta_flow)
+			memcpy(&nh->nh_tclassid, rta->rta_flow, 4);
+#endif
+		nh->nh_flags = r->rtm_flags;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+		nh->nh_weight = 1;
+#endif
+	}
+
+#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+	fi->fib_mp_alg = mp_alg;
+#endif
+
+	if (fib_props[r->rtm_type].error) {
+		if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
+			goto err_inval;
+		goto link_it;
+	}
+
+	if (r->rtm_scope > RT_SCOPE_HOST)
+		goto err_inval;
+
+	if (r->rtm_scope == RT_SCOPE_HOST) {
+		struct fib_nh *nh = fi->fib_nh;
+
+		/* Local address is added. */
+		if (nhs != 1 || nh->nh_gw)
+			goto err_inval;
+		nh->nh_scope = RT_SCOPE_NOWHERE;
+		nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
+		err = -ENODEV;
+		if (nh->nh_dev == NULL)
+			goto failure;
+	} else {
+		change_nexthops(fi) {
+			if ((err = fib_check_nh(r, fi, nh)) != 0)
+				goto failure;
+		} endfor_nexthops(fi)
+	}
+
+	if (fi->fib_prefsrc) {
+		if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
+		    memcmp(&fi->fib_prefsrc, rta->rta_dst, 4))
+			if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
+				goto err_inval;
+	}
+
+link_it:
+	if ((ofi = fib_find_info(fi)) != NULL) {
+		fi->fib_dead = 1;
+		free_fib_info(fi);
+		ofi->fib_treeref++;
+		return ofi;
+	}
+
+	fi->fib_treeref++;
+	atomic_inc(&fi->fib_clntref);
+	write_lock(&fib_info_lock);
+	hlist_add_head(&fi->fib_hash,
+		       &fib_info_hash[fib_info_hashfn(fi)]);
+	if (fi->fib_prefsrc) {
+		struct hlist_head *head;
+
+		head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
+		hlist_add_head(&fi->fib_lhash, head);
+	}
+	change_nexthops(fi) {
+		struct hlist_head *head;
+		unsigned int hash;
+
+		if (!nh->nh_dev)
+			continue;
+		hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
+		head = &fib_info_devhash[hash];
+		hlist_add_head(&nh->nh_hash, head);
+	} endfor_nexthops(fi)
+	write_unlock(&fib_info_lock);
+	return fi;
+
+err_inval:
+	err = -EINVAL;
+
+failure:
+        *errp = err;
+        if (fi) {
+		fi->fib_dead = 1;
+		free_fib_info(fi);
+	}
+	return NULL;
+}
+
+int fib_semantic_match(struct list_head *head, const struct flowi *flp,
+		       struct fib_result *res, __u32 zone, __u32 mask, 
+			int prefixlen)
+{
+	struct fib_alias *fa;
+	int nh_sel = 0;
+
+	list_for_each_entry(fa, head, fa_list) {
+		int err;
+
+		if (fa->fa_tos &&
+		    fa->fa_tos != flp->fl4_tos)
+			continue;
+
+		if (fa->fa_scope < flp->fl4_scope)
+			continue;
+
+		fa->fa_state |= FA_S_ACCESSED;
+
+		err = fib_props[fa->fa_type].error;
+		if (err == 0) {
+			struct fib_info *fi = fa->fa_info;
+
+			if (fi->fib_flags & RTNH_F_DEAD)
+				continue;
+
+			switch (fa->fa_type) {
+			case RTN_UNICAST:
+			case RTN_LOCAL:
+			case RTN_BROADCAST:
+			case RTN_ANYCAST:
+			case RTN_MULTICAST:
+				for_nexthops(fi) {
+					if (nh->nh_flags&RTNH_F_DEAD)
+						continue;
+					if (!flp->oif || flp->oif == nh->nh_oif)
+						break;
+				}
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+				if (nhsel < fi->fib_nhs) {
+					nh_sel = nhsel;
+					goto out_fill_res;
+				}
+#else
+				if (nhsel < 1) {
+					goto out_fill_res;
+				}
+#endif
+				endfor_nexthops(fi);
+				continue;
+
+			default:
+				printk(KERN_DEBUG "impossible 102\n");
+				return -EINVAL;
+			};
+		}
+		return err;
+	}
+	return 1;
+
+out_fill_res:
+	res->prefixlen = prefixlen;
+	res->nh_sel = nh_sel;
+	res->type = fa->fa_type;
+	res->scope = fa->fa_scope;
+	res->fi = fa->fa_info;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+	res->netmask = mask;
+	res->network = zone &
+		(0xFFFFFFFF >> (32 - prefixlen));
+#endif
+	atomic_inc(&res->fi->fib_clntref);
+	return 0;
+}
+
+/* Find appropriate source address to this destination */
+
+u32 __fib_res_prefsrc(struct fib_result *res)
+{
+	return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
+}
+
+int
+fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
+	      u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
+	      struct fib_info *fi)
+{
+	struct rtmsg *rtm;
+	struct nlmsghdr  *nlh;
+	unsigned char	 *b = skb->tail;
+
+	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*rtm));
+	rtm = NLMSG_DATA(nlh);
+	rtm->rtm_family = AF_INET;
+	rtm->rtm_dst_len = dst_len;
+	rtm->rtm_src_len = 0;
+	rtm->rtm_tos = tos;
+	rtm->rtm_table = tb_id;
+	rtm->rtm_type = type;
+	rtm->rtm_flags = fi->fib_flags;
+	rtm->rtm_scope = scope;
+	if (rtm->rtm_dst_len)
+		RTA_PUT(skb, RTA_DST, 4, dst);
+	rtm->rtm_protocol = fi->fib_protocol;
+	if (fi->fib_priority)
+		RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
+#ifdef CONFIG_NET_CLS_ROUTE
+	if (fi->fib_nh[0].nh_tclassid)
+		RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
+#endif
+	if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
+		goto rtattr_failure;
+	if (fi->fib_prefsrc)
+		RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc);
+	if (fi->fib_nhs == 1) {
+		if (fi->fib_nh->nh_gw)
+			RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
+		if (fi->fib_nh->nh_oif)
+			RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
+	}
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+	if (fi->fib_nhs > 1) {
+		struct rtnexthop *nhp;
+		struct rtattr *mp_head;
+		if (skb_tailroom(skb) <= RTA_SPACE(0))
+			goto rtattr_failure;
+		mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0));
+
+		for_nexthops(fi) {
+			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
+				goto rtattr_failure;
+			nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
+			nhp->rtnh_flags = nh->nh_flags & 0xFF;
+			nhp->rtnh_hops = nh->nh_weight-1;
+			nhp->rtnh_ifindex = nh->nh_oif;
+			if (nh->nh_gw)
+				RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
+			nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
+		} endfor_nexthops(fi);
+		mp_head->rta_type = RTA_MULTIPATH;
+		mp_head->rta_len = skb->tail - (u8*)mp_head;
+	}
+#endif
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+#ifndef CONFIG_IP_NOSIOCRT
+
+int
+fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
+		    struct kern_rta *rta, struct rtentry *r)
+{
+	int    plen;
+	u32    *ptr;
+
+	memset(rtm, 0, sizeof(*rtm));
+	memset(rta, 0, sizeof(*rta));
+
+	if (r->rt_dst.sa_family != AF_INET)
+		return -EAFNOSUPPORT;
+
+	/* Check mask for validity:
+	   a) it must be contiguous.
+	   b) destination must have all host bits clear.
+	   c) if application forgot to set correct family (AF_INET),
+	      reject request unless it is absolutely clear i.e.
+	      both family and mask are zero.
+	 */
+	plen = 32;
+	ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
+	if (!(r->rt_flags&RTF_HOST)) {
+		u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
+		if (r->rt_genmask.sa_family != AF_INET) {
+			if (mask || r->rt_genmask.sa_family)
+				return -EAFNOSUPPORT;
+		}
+		if (bad_mask(mask, *ptr))
+			return -EINVAL;
+		plen = inet_mask_len(mask);
+	}
+
+	nl->nlmsg_flags = NLM_F_REQUEST;
+	nl->nlmsg_pid = 0;
+	nl->nlmsg_seq = 0;
+	nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
+	if (cmd == SIOCDELRT) {
+		nl->nlmsg_type = RTM_DELROUTE;
+		nl->nlmsg_flags = 0;
+	} else {
+		nl->nlmsg_type = RTM_NEWROUTE;
+		nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
+		rtm->rtm_protocol = RTPROT_BOOT;
+	}
+
+	rtm->rtm_dst_len = plen;
+	rta->rta_dst = ptr;
+
+	if (r->rt_metric) {
+		*(u32*)&r->rt_pad3 = r->rt_metric - 1;
+		rta->rta_priority = (u32*)&r->rt_pad3;
+	}
+	if (r->rt_flags&RTF_REJECT) {
+		rtm->rtm_scope = RT_SCOPE_HOST;
+		rtm->rtm_type = RTN_UNREACHABLE;
+		return 0;
+	}
+	rtm->rtm_scope = RT_SCOPE_NOWHERE;
+	rtm->rtm_type = RTN_UNICAST;
+
+	if (r->rt_dev) {
+		char *colon;
+		struct net_device *dev;
+		char   devname[IFNAMSIZ];
+
+		if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1))
+			return -EFAULT;
+		devname[IFNAMSIZ-1] = 0;
+		colon = strchr(devname, ':');
+		if (colon)
+			*colon = 0;
+		dev = __dev_get_by_name(devname);
+		if (!dev)
+			return -ENODEV;
+		rta->rta_oif = &dev->ifindex;
+		if (colon) {
+			struct in_ifaddr *ifa;
+			struct in_device *in_dev = __in_dev_get(dev);
+			if (!in_dev)
+				return -ENODEV;
+			*colon = ':';
+			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
+				if (strcmp(ifa->ifa_label, devname) == 0)
+					break;
+			if (ifa == NULL)
+				return -ENODEV;
+			rta->rta_prefsrc = &ifa->ifa_local;
+		}
+	}
+
+	ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr;
+	if (r->rt_gateway.sa_family == AF_INET && *ptr) {
+		rta->rta_gw = ptr;
+		if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
+			rtm->rtm_scope = RT_SCOPE_UNIVERSE;
+	}
+
+	if (cmd == SIOCDELRT)
+		return 0;
+
+	if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
+		return -EINVAL;
+
+	if (rtm->rtm_scope == RT_SCOPE_NOWHERE)
+		rtm->rtm_scope = RT_SCOPE_LINK;
+
+	if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) {
+		struct rtattr *rec;
+		struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL);
+		if (mx == NULL)
+			return -ENOMEM;
+		rta->rta_mx = mx;
+		mx->rta_type = RTA_METRICS;
+		mx->rta_len  = RTA_LENGTH(0);
+		if (r->rt_flags&RTF_MTU) {
+			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
+			rec->rta_type = RTAX_ADVMSS;
+			rec->rta_len = RTA_LENGTH(4);
+			mx->rta_len += RTA_LENGTH(4);
+			*(u32*)RTA_DATA(rec) = r->rt_mtu - 40;
+		}
+		if (r->rt_flags&RTF_WINDOW) {
+			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
+			rec->rta_type = RTAX_WINDOW;
+			rec->rta_len = RTA_LENGTH(4);
+			mx->rta_len += RTA_LENGTH(4);
+			*(u32*)RTA_DATA(rec) = r->rt_window;
+		}
+		if (r->rt_flags&RTF_IRTT) {
+			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
+			rec->rta_type = RTAX_RTT;
+			rec->rta_len = RTA_LENGTH(4);
+			mx->rta_len += RTA_LENGTH(4);
+			*(u32*)RTA_DATA(rec) = r->rt_irtt<<3;
+		}
+	}
+	return 0;
+}
+
+#endif
+
+/*
+   Update FIB if:
+   - local address disappeared -> we must delete all the entries
+     referring to it.
+   - device went down -> we must shutdown all nexthops going via it.
+ */
+
+int fib_sync_down(u32 local, struct net_device *dev, int force)
+{
+	int ret = 0;
+	int scope = RT_SCOPE_NOWHERE;
+	
+	if (force)
+		scope = -1;
+
+	if (local && fib_info_laddrhash) {
+		unsigned int hash = fib_laddr_hashfn(local);
+		struct hlist_head *head = &fib_info_laddrhash[hash];
+		struct hlist_node *node;
+		struct fib_info *fi;
+
+		hlist_for_each_entry(fi, node, head, fib_lhash) {
+			if (fi->fib_prefsrc == local) {
+				fi->fib_flags |= RTNH_F_DEAD;
+				ret++;
+			}
+		}
+	}
+
+	if (dev) {
+		struct fib_info *prev_fi = NULL;
+		unsigned int hash = fib_devindex_hashfn(dev->ifindex);
+		struct hlist_head *head = &fib_info_devhash[hash];
+		struct hlist_node *node;
+		struct fib_nh *nh;
+
+		hlist_for_each_entry(nh, node, head, nh_hash) {
+			struct fib_info *fi = nh->nh_parent;
+			int dead;
+
+			BUG_ON(!fi->fib_nhs);
+			if (nh->nh_dev != dev || fi == prev_fi)
+				continue;
+			prev_fi = fi;
+			dead = 0;
+			change_nexthops(fi) {
+				if (nh->nh_flags&RTNH_F_DEAD)
+					dead++;
+				else if (nh->nh_dev == dev &&
+					 nh->nh_scope != scope) {
+					nh->nh_flags |= RTNH_F_DEAD;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+					spin_lock_bh(&fib_multipath_lock);
+					fi->fib_power -= nh->nh_power;
+					nh->nh_power = 0;
+					spin_unlock_bh(&fib_multipath_lock);
+#endif
+					dead++;
+				}
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+				if (force > 1 && nh->nh_dev == dev) {
+					dead = fi->fib_nhs;
+					break;
+				}
+#endif
+			} endfor_nexthops(fi)
+			if (dead == fi->fib_nhs) {
+				fi->fib_flags |= RTNH_F_DEAD;
+				ret++;
+			}
+		}
+	}
+
+	return ret;
+}
+
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+
+/*
+   Dead device goes up. We wake up dead nexthops.
+   It takes sense only on multipath routes.
+ */
+
+int fib_sync_up(struct net_device *dev)
+{
+	struct fib_info *prev_fi;
+	unsigned int hash;
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct fib_nh *nh;
+	int ret;
+
+	if (!(dev->flags&IFF_UP))
+		return 0;
+
+	prev_fi = NULL;
+	hash = fib_devindex_hashfn(dev->ifindex);
+	head = &fib_info_devhash[hash];
+	ret = 0;
+
+	hlist_for_each_entry(nh, node, head, nh_hash) {
+		struct fib_info *fi = nh->nh_parent;
+		int alive;
+
+		BUG_ON(!fi->fib_nhs);
+		if (nh->nh_dev != dev || fi == prev_fi)
+			continue;
+
+		prev_fi = fi;
+		alive = 0;
+		change_nexthops(fi) {
+			if (!(nh->nh_flags&RTNH_F_DEAD)) {
+				alive++;
+				continue;
+			}
+			if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
+				continue;
+			if (nh->nh_dev != dev || __in_dev_get(dev) == NULL)
+				continue;
+			alive++;
+			spin_lock_bh(&fib_multipath_lock);
+			nh->nh_power = 0;
+			nh->nh_flags &= ~RTNH_F_DEAD;
+			spin_unlock_bh(&fib_multipath_lock);
+		} endfor_nexthops(fi)
+
+		if (alive > 0) {
+			fi->fib_flags &= ~RTNH_F_DEAD;
+			ret++;
+		}
+	}
+
+	return ret;
+}
+
+/*
+   The algorithm is suboptimal, but it provides really
+   fair weighted route distribution.
+ */
+
+void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
+{
+	struct fib_info *fi = res->fi;
+	int w;
+
+	spin_lock_bh(&fib_multipath_lock);
+	if (fi->fib_power <= 0) {
+		int power = 0;
+		change_nexthops(fi) {
+			if (!(nh->nh_flags&RTNH_F_DEAD)) {
+				power += nh->nh_weight;
+				nh->nh_power = nh->nh_weight;
+			}
+		} endfor_nexthops(fi);
+		fi->fib_power = power;
+		if (power <= 0) {
+			spin_unlock_bh(&fib_multipath_lock);
+			/* Race condition: route has just become dead. */
+			res->nh_sel = 0;
+			return;
+		}
+	}
+
+
+	/* w should be random number [0..fi->fib_power-1],
+	   it is pretty bad approximation.
+	 */
+
+	w = jiffies % fi->fib_power;
+
+	change_nexthops(fi) {
+		if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
+			if ((w -= nh->nh_power) <= 0) {
+				nh->nh_power--;
+				fi->fib_power--;
+				res->nh_sel = nhsel;
+				spin_unlock_bh(&fib_multipath_lock);
+				return;
+			}
+		}
+	} endfor_nexthops(fi);
+
+	/* Race condition: route has just become dead. */
+	res->nh_sel = 0;
+	spin_unlock_bh(&fib_multipath_lock);
+}
+#endif
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
new file mode 100644
index 00000000000..85bf0d3e294
--- /dev/null
+++ b/net/ipv4/icmp.c
@@ -0,0 +1,1143 @@
+/*
+ *	NET3:	Implementation of the ICMP protocol layer.
+ *
+ *		Alan Cox, <alan@redhat.com>
+ *
+ *	Version: $Id: icmp.c,v 1.85 2002/02/01 22:01:03 davem Exp $
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ *	Some of the function names and the icmp unreach table for this
+ *	module were derived from [icmp.c 1.0.11 06/02/93] by
+ *	Ross Biro, Fred N. van Kempen, Mark Evans, Alan Cox, Gerhard Koerting.
+ *	Other than that this module is a complete rewrite.
+ *
+ *	Fixes:
+ *	Clemens Fruhwirth	:	introduce global icmp rate limiting
+ *					with icmp type masking ability instead
+ *					of broken per type icmp timeouts.
+ *		Mike Shaver	:	RFC1122 checks.
+ *		Alan Cox	:	Multicast ping reply as self.
+ *		Alan Cox	:	Fix atomicity lockup in ip_build_xmit
+ *					call.
+ *		Alan Cox	:	Added 216,128 byte paths to the MTU
+ *					code.
+ *		Martin Mares	:	RFC1812 checks.
+ *		Martin Mares	:	Can be configured to follow redirects
+ *					if acting as a router _without_ a
+ *					routing protocol (RFC 1812).
+ *		Martin Mares	:	Echo requests may be configured to
+ *					be ignored (RFC 1812).
+ *		Martin Mares	:	Limitation of ICMP error message
+ *					transmit rate (RFC 1812).
+ *		Martin Mares	:	TOS and Precedence set correctly
+ *					(RFC 1812).
+ *		Martin Mares	:	Now copying as much data from the
+ *					original packet as we can without
+ *					exceeding 576 bytes (RFC 1812).
+ *	Willy Konynenberg	:	Transparent proxying support.
+ *		Keith Owens	:	RFC1191 correction for 4.2BSD based
+ *					path MTU bug.
+ *		Thomas Quinot	:	ICMP Dest Unreach codes up to 15 are
+ *					valid (RFC 1812).
+ *		Andi Kleen	:	Check all packet lengths properly
+ *					and moved all kfree_skb() up to
+ *					icmp_rcv.
+ *		Andi Kleen	:	Move the rate limit bookkeeping
+ *					into the dest entry and use a token
+ *					bucket filter (thanks to ANK). Make
+ *					the rates sysctl configurable.
+ *		Yu Tianli	:	Fixed two ugly bugs in icmp_send
+ *					- IP option length was accounted wrongly
+ *					- ICMP header length was not accounted
+ *					  at all.
+ *              Tristan Greaves :       Added sysctl option to ignore bogus
+ *              			broadcast responses from broken routers.
+ *
+ * To Fix:
+ *
+ *	- Should use skb_pull() instead of all the manual checking.
+ *	  This would also greatly simply some upper layer error handlers. --AK
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/fcntl.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/string.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/snmp.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <net/protocol.h>
+#include <net/icmp.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/raw.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <linux/errno.h>
+#include <linux/timer.h>
+#include <linux/init.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <net/checksum.h>
+
+/*
+ *	Build xmit assembly blocks
+ */
+
+struct icmp_bxm {
+	struct sk_buff *skb;
+	int offset;
+	int data_len;
+
+	struct {
+		struct icmphdr icmph;
+		__u32	       times[3];
+	} data;
+	int head_len;
+	struct ip_options replyopts;
+	unsigned char  optbuf[40];
+};
+
+/*
+ *	Statistics
+ */
+DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics);
+
+/* An array of errno for error messages from dest unreach. */
+/* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. */
+
+struct icmp_err icmp_err_convert[] = {
+	{
+		.errno = ENETUNREACH,	/* ICMP_NET_UNREACH */
+		.fatal = 0,
+	},
+	{
+		.errno = EHOSTUNREACH,	/* ICMP_HOST_UNREACH */
+		.fatal = 0,
+	},
+	{
+		.errno = ENOPROTOOPT	/* ICMP_PROT_UNREACH */,
+		.fatal = 1,
+	},
+	{
+		.errno = ECONNREFUSED,	/* ICMP_PORT_UNREACH */
+		.fatal = 1,
+	},
+	{
+		.errno = EMSGSIZE,	/* ICMP_FRAG_NEEDED */
+		.fatal = 0,
+	},
+	{
+		.errno = EOPNOTSUPP,	/* ICMP_SR_FAILED */
+		.fatal = 0,
+	},
+	{
+		.errno = ENETUNREACH,	/* ICMP_NET_UNKNOWN */
+		.fatal = 1,
+	},
+	{
+		.errno = EHOSTDOWN,	/* ICMP_HOST_UNKNOWN */
+		.fatal = 1,
+	},
+	{
+		.errno = ENONET,	/* ICMP_HOST_ISOLATED */
+		.fatal = 1,
+	},
+	{
+		.errno = ENETUNREACH,	/* ICMP_NET_ANO	*/
+		.fatal = 1,
+	},
+	{
+		.errno = EHOSTUNREACH,	/* ICMP_HOST_ANO */
+		.fatal = 1,
+	},
+	{
+		.errno = ENETUNREACH,	/* ICMP_NET_UNR_TOS */
+		.fatal = 0,
+	},
+	{
+		.errno = EHOSTUNREACH,	/* ICMP_HOST_UNR_TOS */
+		.fatal = 0,
+	},
+	{
+		.errno = EHOSTUNREACH,	/* ICMP_PKT_FILTERED */
+		.fatal = 1,
+	},
+	{
+		.errno = EHOSTUNREACH,	/* ICMP_PREC_VIOLATION */
+		.fatal = 1,
+	},
+	{
+		.errno = EHOSTUNREACH,	/* ICMP_PREC_CUTOFF */
+		.fatal = 1,
+	},
+};
+
+/* Control parameters for ECHO replies. */
+int sysctl_icmp_echo_ignore_all;
+int sysctl_icmp_echo_ignore_broadcasts;
+
+/* Control parameter - ignore bogus broadcast responses? */
+int sysctl_icmp_ignore_bogus_error_responses;
+
+/*
+ * 	Configurable global rate limit.
+ *
+ *	ratelimit defines tokens/packet consumed for dst->rate_token bucket
+ *	ratemask defines which icmp types are ratelimited by setting
+ * 	it's bit position.
+ *
+ *	default:
+ *	dest unreachable (3), source quench (4),
+ *	time exceeded (11), parameter problem (12)
+ */
+
+int sysctl_icmp_ratelimit = 1 * HZ;
+int sysctl_icmp_ratemask = 0x1818;
+
+/*
+ *	ICMP control array. This specifies what to do with each ICMP.
+ */
+
+struct icmp_control {
+	int output_entry;	/* Field for increment on output */
+	int input_entry;	/* Field for increment on input */
+	void (*handler)(struct sk_buff *skb);
+	short   error;		/* This ICMP is classed as an error message */
+};
+
+static struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
+
+/*
+ *	The ICMP socket(s). This is the most convenient way to flow control
+ *	our ICMP output as well as maintain a clean interface throughout
+ *	all layers. All Socketless IP sends will soon be gone.
+ *
+ *	On SMP we have one ICMP socket per-cpu.
+ */
+static DEFINE_PER_CPU(struct socket *, __icmp_socket) = NULL;
+#define icmp_socket	__get_cpu_var(__icmp_socket)
+
+static __inline__ int icmp_xmit_lock(void)
+{
+	local_bh_disable();
+
+	if (unlikely(!spin_trylock(&icmp_socket->sk->sk_lock.slock))) {
+		/* This can happen if the output path signals a
+		 * dst_link_failure() for an outgoing ICMP packet.
+		 */
+		local_bh_enable();
+		return 1;
+	}
+	return 0;
+}
+
+static void icmp_xmit_unlock(void)
+{
+	spin_unlock_bh(&icmp_socket->sk->sk_lock.slock);
+}
+
+/*
+ *	Send an ICMP frame.
+ */
+
+/*
+ *	Check transmit rate limitation for given message.
+ *	The rate information is held in the destination cache now.
+ *	This function is generic and could be used for other purposes
+ *	too. It uses a Token bucket filter as suggested by Alexey Kuznetsov.
+ *
+ *	Note that the same dst_entry fields are modified by functions in
+ *	route.c too, but these work for packet destinations while xrlim_allow
+ *	works for icmp destinations. This means the rate limiting information
+ *	for one "ip object" is shared - and these ICMPs are twice limited:
+ *	by source and by destination.
+ *
+ *	RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate
+ *			  SHOULD allow setting of rate limits
+ *
+ * 	Shared between ICMPv4 and ICMPv6.
+ */
+#define XRLIM_BURST_FACTOR 6
+int xrlim_allow(struct dst_entry *dst, int timeout)
+{
+	unsigned long now;
+	int rc = 0;
+
+	now = jiffies;
+	dst->rate_tokens += now - dst->rate_last;
+	dst->rate_last = now;
+	if (dst->rate_tokens > XRLIM_BURST_FACTOR * timeout)
+		dst->rate_tokens = XRLIM_BURST_FACTOR * timeout;
+	if (dst->rate_tokens >= timeout) {
+		dst->rate_tokens -= timeout;
+		rc = 1;
+	}
+	return rc;
+}
+
+static inline int icmpv4_xrlim_allow(struct rtable *rt, int type, int code)
+{
+	struct dst_entry *dst = &rt->u.dst;
+	int rc = 1;
+
+	if (type > NR_ICMP_TYPES)
+		goto out;
+
+	/* Don't limit PMTU discovery. */
+	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
+		goto out;
+
+	/* No rate limit on loopback */
+	if (dst->dev && (dst->dev->flags&IFF_LOOPBACK))
+ 		goto out;
+
+	/* Limit if icmp type is enabled in ratemask. */
+	if ((1 << type) & sysctl_icmp_ratemask)
+		rc = xrlim_allow(dst, sysctl_icmp_ratelimit);
+out:
+	return rc;
+}
+
+/*
+ *	Maintain the counters used in the SNMP statistics for outgoing ICMP
+ */
+static void icmp_out_count(int type)
+{
+	if (type <= NR_ICMP_TYPES) {
+		ICMP_INC_STATS(icmp_pointers[type].output_entry);
+		ICMP_INC_STATS(ICMP_MIB_OUTMSGS);
+	}
+}
+
+/*
+ *	Checksum each fragment, and on the first include the headers and final
+ *	checksum.
+ */
+static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd,
+			  struct sk_buff *skb)
+{
+	struct icmp_bxm *icmp_param = (struct icmp_bxm *)from;
+	unsigned int csum;
+
+	csum = skb_copy_and_csum_bits(icmp_param->skb,
+				      icmp_param->offset + offset,
+				      to, len, 0);
+
+	skb->csum = csum_block_add(skb->csum, csum, odd);
+	if (icmp_pointers[icmp_param->data.icmph.type].error)
+		nf_ct_attach(skb, icmp_param->skb);
+	return 0;
+}
+
+static void icmp_push_reply(struct icmp_bxm *icmp_param,
+			    struct ipcm_cookie *ipc, struct rtable *rt)
+{
+	struct sk_buff *skb;
+
+	ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param,
+		       icmp_param->data_len+icmp_param->head_len,
+		       icmp_param->head_len,
+		       ipc, rt, MSG_DONTWAIT);
+
+	if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) {
+		struct icmphdr *icmph = skb->h.icmph;
+		unsigned int csum = 0;
+		struct sk_buff *skb1;
+
+		skb_queue_walk(&icmp_socket->sk->sk_write_queue, skb1) {
+			csum = csum_add(csum, skb1->csum);
+		}
+		csum = csum_partial_copy_nocheck((void *)&icmp_param->data,
+						 (char *)icmph,
+						 icmp_param->head_len, csum);
+		icmph->checksum = csum_fold(csum);
+		skb->ip_summed = CHECKSUM_NONE;
+		ip_push_pending_frames(icmp_socket->sk);
+	}
+}
+
+/*
+ *	Driving logic for building and sending ICMP messages.
+ */
+
+static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
+{
+	struct sock *sk = icmp_socket->sk;
+	struct inet_sock *inet = inet_sk(sk);
+	struct ipcm_cookie ipc;
+	struct rtable *rt = (struct rtable *)skb->dst;
+	u32 daddr;
+
+	if (ip_options_echo(&icmp_param->replyopts, skb))
+		goto out;
+
+	if (icmp_xmit_lock())
+		return;
+
+	icmp_param->data.icmph.checksum = 0;
+	icmp_out_count(icmp_param->data.icmph.type);
+
+	inet->tos = skb->nh.iph->tos;
+	daddr = ipc.addr = rt->rt_src;
+	ipc.opt = NULL;
+	if (icmp_param->replyopts.optlen) {
+		ipc.opt = &icmp_param->replyopts;
+		if (ipc.opt->srr)
+			daddr = icmp_param->replyopts.faddr;
+	}
+	{
+		struct flowi fl = { .nl_u = { .ip4_u =
+					      { .daddr = daddr,
+						.saddr = rt->rt_spec_dst,
+						.tos = RT_TOS(skb->nh.iph->tos) } },
+				    .proto = IPPROTO_ICMP };
+		if (ip_route_output_key(&rt, &fl))
+			goto out_unlock;
+	}
+	if (icmpv4_xrlim_allow(rt, icmp_param->data.icmph.type,
+			       icmp_param->data.icmph.code))
+		icmp_push_reply(icmp_param, &ipc, rt);
+	ip_rt_put(rt);
+out_unlock:
+	icmp_xmit_unlock();
+out:;
+}
+
+
+/*
+ *	Send an ICMP message in response to a situation
+ *
+ *	RFC 1122: 3.2.2	MUST send at least the IP header and 8 bytes of header.
+ *		  MAY send more (we do).
+ *			MUST NOT change this header information.
+ *			MUST NOT reply to a multicast/broadcast IP address.
+ *			MUST NOT reply to a multicast/broadcast MAC address.
+ *			MUST reply to only the first fragment.
+ */
+
+void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info)
+{
+	struct iphdr *iph;
+	int room;
+	struct icmp_bxm icmp_param;
+	struct rtable *rt = (struct rtable *)skb_in->dst;
+	struct ipcm_cookie ipc;
+	u32 saddr;
+	u8  tos;
+
+	if (!rt)
+		goto out;
+
+	/*
+	 *	Find the original header. It is expected to be valid, of course.
+	 *	Check this, icmp_send is called from the most obscure devices
+	 *	sometimes.
+	 */
+	iph = skb_in->nh.iph;
+
+	if ((u8 *)iph < skb_in->head || (u8 *)(iph + 1) > skb_in->tail)
+		goto out;
+
+	/*
+	 *	No replies to physical multicast/broadcast
+	 */
+	if (skb_in->pkt_type != PACKET_HOST)
+		goto out;
+
+	/*
+	 *	Now check at the protocol level
+	 */
+	if (rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
+		goto out;
+
+	/*
+	 *	Only reply to fragment 0. We byte re-order the constant
+	 *	mask for efficiency.
+	 */
+	if (iph->frag_off & htons(IP_OFFSET))
+		goto out;
+
+	/*
+	 *	If we send an ICMP error to an ICMP error a mess would result..
+	 */
+	if (icmp_pointers[type].error) {
+		/*
+		 *	We are an error, check if we are replying to an
+		 *	ICMP error
+		 */
+		if (iph->protocol == IPPROTO_ICMP) {
+			u8 _inner_type, *itp;
+
+			itp = skb_header_pointer(skb_in,
+						 skb_in->nh.raw +
+						 (iph->ihl << 2) +
+						 offsetof(struct icmphdr,
+							  type) -
+						 skb_in->data,
+						 sizeof(_inner_type),
+						 &_inner_type);
+			if (itp == NULL)
+				goto out;
+
+			/*
+			 *	Assume any unknown ICMP type is an error. This
+			 *	isn't specified by the RFC, but think about it..
+			 */
+			if (*itp > NR_ICMP_TYPES ||
+			    icmp_pointers[*itp].error)
+				goto out;
+		}
+	}
+
+	if (icmp_xmit_lock())
+		return;
+
+	/*
+	 *	Construct source address and options.
+	 */
+
+	saddr = iph->daddr;
+	if (!(rt->rt_flags & RTCF_LOCAL))
+		saddr = 0;
+
+	tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) |
+					   IPTOS_PREC_INTERNETCONTROL) :
+					  iph->tos;
+
+	if (ip_options_echo(&icmp_param.replyopts, skb_in))
+		goto ende;
+
+
+	/*
+	 *	Prepare data for ICMP header.
+	 */
+
+	icmp_param.data.icmph.type	 = type;
+	icmp_param.data.icmph.code	 = code;
+	icmp_param.data.icmph.un.gateway = info;
+	icmp_param.data.icmph.checksum	 = 0;
+	icmp_param.skb	  = skb_in;
+	icmp_param.offset = skb_in->nh.raw - skb_in->data;
+	icmp_out_count(icmp_param.data.icmph.type);
+	inet_sk(icmp_socket->sk)->tos = tos;
+	ipc.addr = iph->saddr;
+	ipc.opt = &icmp_param.replyopts;
+
+	{
+		struct flowi fl = {
+			.nl_u = {
+				.ip4_u = {
+					.daddr = icmp_param.replyopts.srr ?
+						icmp_param.replyopts.faddr :
+						iph->saddr,
+					.saddr = saddr,
+					.tos = RT_TOS(tos)
+				}
+			},
+			.proto = IPPROTO_ICMP,
+			.uli_u = {
+				.icmpt = {
+					.type = type,
+					.code = code
+				}
+			}
+		};
+		if (ip_route_output_key(&rt, &fl))
+			goto out_unlock;
+	}
+
+	if (!icmpv4_xrlim_allow(rt, type, code))
+		goto ende;
+
+	/* RFC says return as much as we can without exceeding 576 bytes. */
+
+	room = dst_mtu(&rt->u.dst);
+	if (room > 576)
+		room = 576;
+	room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen;
+	room -= sizeof(struct icmphdr);
+
+	icmp_param.data_len = skb_in->len - icmp_param.offset;
+	if (icmp_param.data_len > room)
+		icmp_param.data_len = room;
+	icmp_param.head_len = sizeof(struct icmphdr);
+
+	icmp_push_reply(&icmp_param, &ipc, rt);
+ende:
+	ip_rt_put(rt);
+out_unlock:
+	icmp_xmit_unlock();
+out:;
+}
+
+
+/*
+ *	Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, and ICMP_QUENCH.
+ */
+
+static void icmp_unreach(struct sk_buff *skb)
+{
+	struct iphdr *iph;
+	struct icmphdr *icmph;
+	int hash, protocol;
+	struct net_protocol *ipprot;
+	struct sock *raw_sk;
+	u32 info = 0;
+
+	/*
+	 *	Incomplete header ?
+	 * 	Only checks for the IP header, there should be an
+	 *	additional check for longer headers in upper levels.
+	 */
+
+	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+		goto out_err;
+
+	icmph = skb->h.icmph;
+	iph   = (struct iphdr *)skb->data;
+
+	if (iph->ihl < 5) /* Mangled header, drop. */
+		goto out_err;
+
+	if (icmph->type == ICMP_DEST_UNREACH) {
+		switch (icmph->code & 15) {
+		case ICMP_NET_UNREACH:
+		case ICMP_HOST_UNREACH:
+		case ICMP_PROT_UNREACH:
+		case ICMP_PORT_UNREACH:
+			break;
+		case ICMP_FRAG_NEEDED:
+			if (ipv4_config.no_pmtu_disc) {
+				LIMIT_NETDEBUG(
+					printk(KERN_INFO "ICMP: %u.%u.%u.%u: "
+							 "fragmentation needed "
+							 "and DF set.\n",
+					       NIPQUAD(iph->daddr)));
+			} else {
+				info = ip_rt_frag_needed(iph,
+						     ntohs(icmph->un.frag.mtu));
+				if (!info)
+					goto out;
+			}
+			break;
+		case ICMP_SR_FAILED:
+			LIMIT_NETDEBUG(
+				printk(KERN_INFO "ICMP: %u.%u.%u.%u: Source "
+						 "Route Failed.\n",
+				       NIPQUAD(iph->daddr)));
+			break;
+		default:
+			break;
+		}
+		if (icmph->code > NR_ICMP_UNREACH)
+			goto out;
+	} else if (icmph->type == ICMP_PARAMETERPROB)
+		info = ntohl(icmph->un.gateway) >> 24;
+
+	/*
+	 *	Throw it at our lower layers
+	 *
+	 *	RFC 1122: 3.2.2 MUST extract the protocol ID from the passed
+	 *		  header.
+	 *	RFC 1122: 3.2.2.1 MUST pass ICMP unreach messages to the
+	 *		  transport layer.
+	 *	RFC 1122: 3.2.2.2 MUST pass ICMP time expired messages to
+	 *		  transport layer.
+	 */
+
+	/*
+	 *	Check the other end isnt violating RFC 1122. Some routers send
+	 *	bogus responses to broadcast frames. If you see this message
+	 *	first check your netmask matches at both ends, if it does then
+	 *	get the other vendor to fix their kit.
+	 */
+
+	if (!sysctl_icmp_ignore_bogus_error_responses &&
+	    inet_addr_type(iph->daddr) == RTN_BROADCAST) {
+		if (net_ratelimit())
+			printk(KERN_WARNING "%u.%u.%u.%u sent an invalid ICMP "
+					    "type %u, code %u "
+					    "error to a broadcast: %u.%u.%u.%u on %s\n",
+			       NIPQUAD(skb->nh.iph->saddr),
+			       icmph->type, icmph->code,
+			       NIPQUAD(iph->daddr),
+			       skb->dev->name);
+		goto out;
+	}
+
+	/* Checkin full IP header plus 8 bytes of protocol to
+	 * avoid additional coding at protocol handlers.
+	 */
+	if (!pskb_may_pull(skb, iph->ihl * 4 + 8))
+		goto out;
+
+	iph = (struct iphdr *)skb->data;
+	protocol = iph->protocol;
+
+	/*
+	 *	Deliver ICMP message to raw sockets. Pretty useless feature?
+	 */
+
+	/* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */
+	hash = protocol & (MAX_INET_PROTOS - 1);
+	read_lock(&raw_v4_lock);
+	if ((raw_sk = sk_head(&raw_v4_htable[hash])) != NULL) {
+		while ((raw_sk = __raw_v4_lookup(raw_sk, protocol, iph->daddr,
+						 iph->saddr,
+						 skb->dev->ifindex)) != NULL) {
+			raw_err(raw_sk, skb, info);
+			raw_sk = sk_next(raw_sk);
+			iph = (struct iphdr *)skb->data;
+		}
+	}
+	read_unlock(&raw_v4_lock);
+
+	rcu_read_lock();
+	ipprot = rcu_dereference(inet_protos[hash]);
+	if (ipprot && ipprot->err_handler)
+		ipprot->err_handler(skb, info);
+	rcu_read_unlock();
+
+out:
+	return;
+out_err:
+	ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+	goto out;
+}
+
+
+/*
+ *	Handle ICMP_REDIRECT.
+ */
+
+static void icmp_redirect(struct sk_buff *skb)
+{
+	struct iphdr *iph;
+	unsigned long ip;
+
+	if (skb->len < sizeof(struct iphdr))
+		goto out_err;
+
+	/*
+	 *	Get the copied header of the packet that caused the redirect
+	 */
+	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+		goto out;
+
+	iph = (struct iphdr *)skb->data;
+	ip = iph->daddr;
+
+	switch (skb->h.icmph->code & 7) {
+	case ICMP_REDIR_NET:
+	case ICMP_REDIR_NETTOS:
+		/*
+		 * As per RFC recommendations now handle it as a host redirect.
+		 */
+	case ICMP_REDIR_HOST:
+	case ICMP_REDIR_HOSTTOS:
+		ip_rt_redirect(skb->nh.iph->saddr, ip, skb->h.icmph->un.gateway,
+			       iph->saddr, iph->tos, skb->dev);
+		break;
+  	}
+out:
+	return;
+out_err:
+	ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+	goto out;
+}
+
+/*
+ *	Handle ICMP_ECHO ("ping") requests.
+ *
+ *	RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo
+ *		  requests.
+ *	RFC 1122: 3.2.2.6 Data received in the ICMP_ECHO request MUST be
+ *		  included in the reply.
+ *	RFC 1812: 4.3.3.6 SHOULD have a config option for silently ignoring
+ *		  echo requests, MUST have default=NOT.
+ *	See also WRT handling of options once they are done and working.
+ */
+
+static void icmp_echo(struct sk_buff *skb)
+{
+	if (!sysctl_icmp_echo_ignore_all) {
+		struct icmp_bxm icmp_param;
+
+		icmp_param.data.icmph	   = *skb->h.icmph;
+		icmp_param.data.icmph.type = ICMP_ECHOREPLY;
+		icmp_param.skb		   = skb;
+		icmp_param.offset	   = 0;
+		icmp_param.data_len	   = skb->len;
+		icmp_param.head_len	   = sizeof(struct icmphdr);
+		icmp_reply(&icmp_param, skb);
+	}
+}
+
+/*
+ *	Handle ICMP Timestamp requests.
+ *	RFC 1122: 3.2.2.8 MAY implement ICMP timestamp requests.
+ *		  SHOULD be in the kernel for minimum random latency.
+ *		  MUST be accurate to a few minutes.
+ *		  MUST be updated at least at 15Hz.
+ */
+static void icmp_timestamp(struct sk_buff *skb)
+{
+	struct timeval tv;
+	struct icmp_bxm icmp_param;
+	/*
+	 *	Too short.
+	 */
+	if (skb->len < 4)
+		goto out_err;
+
+	/*
+	 *	Fill in the current time as ms since midnight UT:
+	 */
+	do_gettimeofday(&tv);
+	icmp_param.data.times[1] = htonl((tv.tv_sec % 86400) * 1000 +
+					 tv.tv_usec / 1000);
+	icmp_param.data.times[2] = icmp_param.data.times[1];
+	if (skb_copy_bits(skb, 0, &icmp_param.data.times[0], 4))
+		BUG();
+	icmp_param.data.icmph	   = *skb->h.icmph;
+	icmp_param.data.icmph.type = ICMP_TIMESTAMPREPLY;
+	icmp_param.data.icmph.code = 0;
+	icmp_param.skb		   = skb;
+	icmp_param.offset	   = 0;
+	icmp_param.data_len	   = 0;
+	icmp_param.head_len	   = sizeof(struct icmphdr) + 12;
+	icmp_reply(&icmp_param, skb);
+out:
+	return;
+out_err:
+	ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+	goto out;
+}
+
+
+/*
+ *	Handle ICMP_ADDRESS_MASK requests.  (RFC950)
+ *
+ * RFC1122 (3.2.2.9).  A host MUST only send replies to
+ * ADDRESS_MASK requests if it's been configured as an address mask
+ * agent.  Receiving a request doesn't constitute implicit permission to
+ * act as one. Of course, implementing this correctly requires (SHOULD)
+ * a way to turn the functionality on and off.  Another one for sysctl(),
+ * I guess. -- MS
+ *
+ * RFC1812 (4.3.3.9).	A router MUST implement it.
+ *			A router SHOULD have switch turning it on/off.
+ *		      	This switch MUST be ON by default.
+ *
+ * Gratuitous replies, zero-source replies are not implemented,
+ * that complies with RFC. DO NOT implement them!!! All the idea
+ * of broadcast addrmask replies as specified in RFC950 is broken.
+ * The problem is that it is not uncommon to have several prefixes
+ * on one physical interface. Moreover, addrmask agent can even be
+ * not aware of existing another prefixes.
+ * If source is zero, addrmask agent cannot choose correct prefix.
+ * Gratuitous mask announcements suffer from the same problem.
+ * RFC1812 explains it, but still allows to use ADDRMASK,
+ * that is pretty silly. --ANK
+ *
+ * All these rules are so bizarre, that I removed kernel addrmask
+ * support at all. It is wrong, it is obsolete, nobody uses it in
+ * any case. --ANK
+ *
+ * Furthermore you can do it with a usermode address agent program
+ * anyway...
+ */
+
+static void icmp_address(struct sk_buff *skb)
+{
+#if 0
+	if (net_ratelimit())
+		printk(KERN_DEBUG "a guy asks for address mask. Who is it?\n");
+#endif
+}
+
+/*
+ * RFC1812 (4.3.3.9).	A router SHOULD listen all replies, and complain
+ *			loudly if an inconsistency is found.
+ */
+
+static void icmp_address_reply(struct sk_buff *skb)
+{
+	struct rtable *rt = (struct rtable *)skb->dst;
+	struct net_device *dev = skb->dev;
+	struct in_device *in_dev;
+	struct in_ifaddr *ifa;
+
+	if (skb->len < 4 || !(rt->rt_flags&RTCF_DIRECTSRC))
+		goto out;
+
+	in_dev = in_dev_get(dev);
+	if (!in_dev)
+		goto out;
+	rcu_read_lock();
+	if (in_dev->ifa_list &&
+	    IN_DEV_LOG_MARTIANS(in_dev) &&
+	    IN_DEV_FORWARD(in_dev)) {
+		u32 _mask, *mp;
+
+		mp = skb_header_pointer(skb, 0, sizeof(_mask), &_mask);
+		if (mp == NULL)
+			BUG();
+		for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
+			if (*mp == ifa->ifa_mask &&
+			    inet_ifa_match(rt->rt_src, ifa))
+				break;
+		}
+		if (!ifa && net_ratelimit()) {
+			printk(KERN_INFO "Wrong address mask %u.%u.%u.%u from "
+					 "%s/%u.%u.%u.%u\n",
+			       NIPQUAD(*mp), dev->name, NIPQUAD(rt->rt_src));
+		}
+	}
+	rcu_read_unlock();
+	in_dev_put(in_dev);
+out:;
+}
+
+static void icmp_discard(struct sk_buff *skb)
+{
+}
+
+/*
+ *	Deal with incoming ICMP packets.
+ */
+int icmp_rcv(struct sk_buff *skb)
+{
+	struct icmphdr *icmph;
+	struct rtable *rt = (struct rtable *)skb->dst;
+
+	ICMP_INC_STATS_BH(ICMP_MIB_INMSGS);
+
+	switch (skb->ip_summed) {
+	case CHECKSUM_HW:
+		if (!(u16)csum_fold(skb->csum))
+			break;
+		NETDEBUG(if (net_ratelimit())
+				printk(KERN_DEBUG "icmp v4 hw csum failure\n"));
+	case CHECKSUM_NONE:
+		if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0)))
+			goto error;
+	default:;
+	}
+
+	if (!pskb_pull(skb, sizeof(struct icmphdr)))
+		goto error;
+
+	icmph = skb->h.icmph;
+
+	/*
+	 *	18 is the highest 'known' ICMP type. Anything else is a mystery
+	 *
+	 *	RFC 1122: 3.2.2  Unknown ICMP messages types MUST be silently
+	 *		  discarded.
+	 */
+	if (icmph->type > NR_ICMP_TYPES)
+		goto error;
+
+
+	/*
+	 *	Parse the ICMP message
+	 */
+
+ 	if (rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
+		/*
+		 *	RFC 1122: 3.2.2.6 An ICMP_ECHO to broadcast MAY be
+		 *	  silently ignored (we let user decide with a sysctl).
+		 *	RFC 1122: 3.2.2.8 An ICMP_TIMESTAMP MAY be silently
+		 *	  discarded if to broadcast/multicast.
+		 */
+		if (icmph->type == ICMP_ECHO &&
+		    sysctl_icmp_echo_ignore_broadcasts) {
+			goto error;
+		}
+		if (icmph->type != ICMP_ECHO &&
+		    icmph->type != ICMP_TIMESTAMP &&
+		    icmph->type != ICMP_ADDRESS &&
+		    icmph->type != ICMP_ADDRESSREPLY) {
+			goto error;
+  		}
+	}
+
+	ICMP_INC_STATS_BH(icmp_pointers[icmph->type].input_entry);
+	icmp_pointers[icmph->type].handler(skb);
+
+drop:
+	kfree_skb(skb);
+	return 0;
+error:
+	ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+	goto drop;
+}
+
+/*
+ *	This table is the definition of how we handle ICMP.
+ */
+static struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = {
+	[ICMP_ECHOREPLY] = {
+		.output_entry = ICMP_MIB_OUTECHOREPS,
+		.input_entry = ICMP_MIB_INECHOREPS,
+		.handler = icmp_discard,
+	},
+	[1] = {
+		.output_entry = ICMP_MIB_DUMMY,
+		.input_entry = ICMP_MIB_INERRORS,
+		.handler = icmp_discard,
+		.error = 1,
+	},
+	[2] = {
+		.output_entry = ICMP_MIB_DUMMY,
+		.input_entry = ICMP_MIB_INERRORS,
+		.handler = icmp_discard,
+		.error = 1,
+	},
+	[ICMP_DEST_UNREACH] = {
+		.output_entry = ICMP_MIB_OUTDESTUNREACHS,
+		.input_entry = ICMP_MIB_INDESTUNREACHS,
+		.handler = icmp_unreach,
+		.error = 1,
+	},
+	[ICMP_SOURCE_QUENCH] = {
+		.output_entry = ICMP_MIB_OUTSRCQUENCHS,
+		.input_entry = ICMP_MIB_INSRCQUENCHS,
+		.handler = icmp_unreach,
+		.error = 1,
+	},
+	[ICMP_REDIRECT] = {
+		.output_entry = ICMP_MIB_OUTREDIRECTS,
+		.input_entry = ICMP_MIB_INREDIRECTS,
+		.handler = icmp_redirect,
+		.error = 1,
+	},
+	[6] = {
+		.output_entry = ICMP_MIB_DUMMY,
+		.input_entry = ICMP_MIB_INERRORS,
+		.handler = icmp_discard,
+		.error = 1,
+	},
+	[7] = {
+		.output_entry = ICMP_MIB_DUMMY,
+		.input_entry = ICMP_MIB_INERRORS,
+		.handler = icmp_discard,
+		.error = 1,
+	},
+	[ICMP_ECHO] = {
+		.output_entry = ICMP_MIB_OUTECHOS,
+		.input_entry = ICMP_MIB_INECHOS,
+		.handler = icmp_echo,
+	},
+	[9] = {
+		.output_entry = ICMP_MIB_DUMMY,
+		.input_entry = ICMP_MIB_INERRORS,
+		.handler = icmp_discard,
+		.error = 1,
+	},
+	[10] = {
+		.output_entry = ICMP_MIB_DUMMY,
+		.input_entry = ICMP_MIB_INERRORS,
+		.handler = icmp_discard,
+		.error = 1,
+	},
+	[ICMP_TIME_EXCEEDED] = {
+		.output_entry = ICMP_MIB_OUTTIMEEXCDS,
+		.input_entry = ICMP_MIB_INTIMEEXCDS,
+		.handler = icmp_unreach,
+		.error = 1,
+	},
+	[ICMP_PARAMETERPROB] = {
+		.output_entry = ICMP_MIB_OUTPARMPROBS,
+		.input_entry = ICMP_MIB_INPARMPROBS,
+		.handler = icmp_unreach,
+		.error = 1,
+	},
+	[ICMP_TIMESTAMP] = {
+		.output_entry = ICMP_MIB_OUTTIMESTAMPS,
+		.input_entry = ICMP_MIB_INTIMESTAMPS,
+		.handler = icmp_timestamp,
+	},
+	[ICMP_TIMESTAMPREPLY] = {
+		.output_entry = ICMP_MIB_OUTTIMESTAMPREPS,
+		.input_entry = ICMP_MIB_INTIMESTAMPREPS,
+		.handler = icmp_discard,
+	},
+	[ICMP_INFO_REQUEST] = {
+		.output_entry = ICMP_MIB_DUMMY,
+		.input_entry = ICMP_MIB_DUMMY,
+		.handler = icmp_discard,
+	},
+ 	[ICMP_INFO_REPLY] = {
+		.output_entry = ICMP_MIB_DUMMY,
+		.input_entry = ICMP_MIB_DUMMY,
+		.handler = icmp_discard,
+	},
+	[ICMP_ADDRESS] = {
+		.output_entry = ICMP_MIB_OUTADDRMASKS,
+		.input_entry = ICMP_MIB_INADDRMASKS,
+		.handler = icmp_address,
+	},
+	[ICMP_ADDRESSREPLY] = {
+		.output_entry = ICMP_MIB_OUTADDRMASKREPS,
+		.input_entry = ICMP_MIB_INADDRMASKREPS,
+		.handler = icmp_address_reply,
+	},
+};
+
+void __init icmp_init(struct net_proto_family *ops)
+{
+	struct inet_sock *inet;
+	int i;
+
+	for (i = 0; i < NR_CPUS; i++) {
+		int err;
+
+		if (!cpu_possible(i))
+			continue;
+
+		err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP,
+				       &per_cpu(__icmp_socket, i));
+
+		if (err < 0)
+			panic("Failed to create the ICMP control socket.\n");
+
+		per_cpu(__icmp_socket, i)->sk->sk_allocation = GFP_ATOMIC;
+
+		/* Enough space for 2 64K ICMP packets, including
+		 * sk_buff struct overhead.
+		 */
+		per_cpu(__icmp_socket, i)->sk->sk_sndbuf =
+			(2 * ((64 * 1024) + sizeof(struct sk_buff)));
+
+		inet = inet_sk(per_cpu(__icmp_socket, i)->sk);
+		inet->uc_ttl = -1;
+		inet->pmtudisc = IP_PMTUDISC_DONT;
+
+		/* Unhash it so that IP input processing does not even
+		 * see it, we do not wish this socket to see incoming
+		 * packets.
+		 */
+		per_cpu(__icmp_socket, i)->sk->sk_prot->unhash(per_cpu(__icmp_socket, i)->sk);
+	}
+}
+
+EXPORT_SYMBOL(icmp_err_convert);
+EXPORT_SYMBOL(icmp_send);
+EXPORT_SYMBOL(icmp_statistics);
+EXPORT_SYMBOL(xrlim_allow);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
new file mode 100644
index 00000000000..1f3183168a9
--- /dev/null
+++ b/net/ipv4/igmp.c
@@ -0,0 +1,2473 @@
+/*
+ *	Linux NET3:	Internet Group Management Protocol  [IGMP]
+ *
+ *	This code implements the IGMP protocol as defined in RFC1112. There has
+ *	been a further revision of this protocol since which is now supported.
+ *
+ *	If you have trouble with this module be careful what gcc you have used,
+ *	the older version didn't come out right using gcc 2.5.8, the newer one
+ *	seems to fall out with gcc 2.6.2.
+ *
+ *	Version: $Id: igmp.c,v 1.47 2002/02/01 22:01:03 davem Exp $
+ *
+ *	Authors:
+ *		Alan Cox <Alan.Cox@linux.org>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ *	Fixes:
+ *
+ *		Alan Cox	:	Added lots of __inline__ to optimise
+ *					the memory usage of all the tiny little
+ *					functions.
+ *		Alan Cox	:	Dumped the header building experiment.
+ *		Alan Cox	:	Minor tweaks ready for multicast routing
+ *					and extended IGMP protocol.
+ *		Alan Cox	:	Removed a load of inline directives. Gcc 2.5.8
+ *					writes utterly bogus code otherwise (sigh)
+ *					fixed IGMP loopback to behave in the manner
+ *					desired by mrouted, fixed the fact it has been
+ *					broken since 1.3.6 and cleaned up a few minor
+ *					points.
+ *
+ *		Chih-Jen Chang	:	Tried to revise IGMP to Version 2
+ *		Tsu-Sheng Tsao		E-mail: chihjenc@scf.usc.edu and tsusheng@scf.usc.edu
+ *					The enhancements are mainly based on Steve Deering's 
+ * 					ipmulti-3.5 source code.
+ *		Chih-Jen Chang	:	Added the igmp_get_mrouter_info and
+ *		Tsu-Sheng Tsao		igmp_set_mrouter_info to keep track of
+ *					the mrouted version on that device.
+ *		Chih-Jen Chang	:	Added the max_resp_time parameter to
+ *		Tsu-Sheng Tsao		igmp_heard_query(). Using this parameter
+ *					to identify the multicast router version
+ *					and do what the IGMP version 2 specified.
+ *		Chih-Jen Chang	:	Added a timer to revert to IGMP V2 router
+ *		Tsu-Sheng Tsao		if the specified time expired.
+ *		Alan Cox	:	Stop IGMP from 0.0.0.0 being accepted.
+ *		Alan Cox	:	Use GFP_ATOMIC in the right places.
+ *		Christian Daudt :	igmp timer wasn't set for local group
+ *					memberships but was being deleted, 
+ *					which caused a "del_timer() called 
+ *					from %p with timer not initialized\n"
+ *					message (960131).
+ *		Christian Daudt :	removed del_timer from 
+ *					igmp_timer_expire function (960205).
+ *             Christian Daudt :       igmp_heard_report now only calls
+ *                                     igmp_timer_expire if tm->running is
+ *                                     true (960216).
+ *		Malcolm Beattie :	ttl comparison wrong in igmp_rcv made
+ *					igmp_heard_query never trigger. Expiry
+ *					miscalculation fixed in igmp_heard_query
+ *					and random() made to return unsigned to
+ *					prevent negative expiry times.
+ *		Alexey Kuznetsov:	Wrong group leaving behaviour, backport
+ *					fix from pending 2.1.x patches.
+ *		Alan Cox:		Forget to enable FDDI support earlier.
+ *		Alexey Kuznetsov:	Fixed leaving groups on device down.
+ *		Alexey Kuznetsov:	Accordance to igmp-v2-06 draft.
+ *		David L Stevens:	IGMPv3 support, with help from
+ *					Vinay Kulkarni
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/inetdevice.h>
+#include <linux/igmp.h>
+#include <linux/if_arp.h>
+#include <linux/rtnetlink.h>
+#include <linux/times.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/route.h>
+#include <net/sock.h>
+#include <net/checksum.h>
+#include <linux/netfilter_ipv4.h>
+#ifdef CONFIG_IP_MROUTE
+#include <linux/mroute.h>
+#endif
+#ifdef CONFIG_PROC_FS
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#endif
+
+#define IP_MAX_MEMBERSHIPS	20
+#define IP_MAX_MSF		10
+
+#ifdef CONFIG_IP_MULTICAST
+/* Parameter names and values are taken from igmp-v2-06 draft */
+
+#define IGMP_V1_Router_Present_Timeout		(400*HZ)
+#define IGMP_V2_Router_Present_Timeout		(400*HZ)
+#define IGMP_Unsolicited_Report_Interval	(10*HZ)
+#define IGMP_Query_Response_Interval		(10*HZ)
+#define IGMP_Unsolicited_Report_Count		2
+
+
+#define IGMP_Initial_Report_Delay		(1)
+
+/* IGMP_Initial_Report_Delay is not from IGMP specs!
+ * IGMP specs require to report membership immediately after
+ * joining a group, but we delay the first report by a
+ * small interval. It seems more natural and still does not
+ * contradict to specs provided this delay is small enough.
+ */
+
+#define IGMP_V1_SEEN(in_dev) (ipv4_devconf.force_igmp_version == 1 || \
+		(in_dev)->cnf.force_igmp_version == 1 || \
+		((in_dev)->mr_v1_seen && \
+		time_before(jiffies, (in_dev)->mr_v1_seen)))
+#define IGMP_V2_SEEN(in_dev) (ipv4_devconf.force_igmp_version == 2 || \
+		(in_dev)->cnf.force_igmp_version == 2 || \
+		((in_dev)->mr_v2_seen && \
+		time_before(jiffies, (in_dev)->mr_v2_seen)))
+
+static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im);
+static void igmpv3_del_delrec(struct in_device *in_dev, __u32 multiaddr);
+static void igmpv3_clear_delrec(struct in_device *in_dev);
+static int sf_setstate(struct ip_mc_list *pmc);
+static void sf_markstate(struct ip_mc_list *pmc);
+#endif
+static void ip_mc_clear_src(struct ip_mc_list *pmc);
+static int ip_mc_add_src(struct in_device *in_dev, __u32 *pmca, int sfmode,
+			 int sfcount, __u32 *psfsrc, int delta);
+
+static void ip_ma_put(struct ip_mc_list *im)
+{
+	if (atomic_dec_and_test(&im->refcnt)) {
+		in_dev_put(im->interface);
+		kfree(im);
+	}
+}
+
+#ifdef CONFIG_IP_MULTICAST
+
+/*
+ *	Timer management
+ */
+
+static __inline__ void igmp_stop_timer(struct ip_mc_list *im)
+{
+	spin_lock_bh(&im->lock);
+	if (del_timer(&im->timer))
+		atomic_dec(&im->refcnt);
+	im->tm_running=0;
+	im->reporter = 0;
+	im->unsolicit_count = 0;
+	spin_unlock_bh(&im->lock);
+}
+
+/* It must be called with locked im->lock */
+static void igmp_start_timer(struct ip_mc_list *im, int max_delay)
+{
+	int tv=net_random() % max_delay;
+
+	im->tm_running=1;
+	if (!mod_timer(&im->timer, jiffies+tv+2))
+		atomic_inc(&im->refcnt);
+}
+
+static void igmp_gq_start_timer(struct in_device *in_dev)
+{
+	int tv = net_random() % in_dev->mr_maxdelay;
+
+	in_dev->mr_gq_running = 1;
+	if (!mod_timer(&in_dev->mr_gq_timer, jiffies+tv+2))
+		in_dev_hold(in_dev);
+}
+
+static void igmp_ifc_start_timer(struct in_device *in_dev, int delay)
+{
+	int tv = net_random() % delay;
+
+	if (!mod_timer(&in_dev->mr_ifc_timer, jiffies+tv+2))
+		in_dev_hold(in_dev);
+}
+
+static void igmp_mod_timer(struct ip_mc_list *im, int max_delay)
+{
+	spin_lock_bh(&im->lock);
+	im->unsolicit_count = 0;
+	if (del_timer(&im->timer)) {
+		if ((long)(im->timer.expires-jiffies) < max_delay) {
+			add_timer(&im->timer);
+			im->tm_running=1;
+			spin_unlock_bh(&im->lock);
+			return;
+		}
+		atomic_dec(&im->refcnt);
+	}
+	igmp_start_timer(im, max_delay);
+	spin_unlock_bh(&im->lock);
+}
+
+
+/*
+ *	Send an IGMP report.
+ */
+
+#define IGMP_SIZE (sizeof(struct igmphdr)+sizeof(struct iphdr)+4)
+
+
+static int is_in(struct ip_mc_list *pmc, struct ip_sf_list *psf, int type,
+	int gdeleted, int sdeleted)
+{
+	switch (type) {
+	case IGMPV3_MODE_IS_INCLUDE:
+	case IGMPV3_MODE_IS_EXCLUDE:
+		if (gdeleted || sdeleted)
+			return 0;
+		return !(pmc->gsquery && !psf->sf_gsresp);
+	case IGMPV3_CHANGE_TO_INCLUDE:
+		if (gdeleted || sdeleted)
+			return 0;
+		return psf->sf_count[MCAST_INCLUDE] != 0;
+	case IGMPV3_CHANGE_TO_EXCLUDE:
+		if (gdeleted || sdeleted)
+			return 0;
+		if (pmc->sfcount[MCAST_EXCLUDE] == 0 ||
+		    psf->sf_count[MCAST_INCLUDE])
+			return 0;
+		return pmc->sfcount[MCAST_EXCLUDE] ==
+			psf->sf_count[MCAST_EXCLUDE];
+	case IGMPV3_ALLOW_NEW_SOURCES:
+		if (gdeleted || !psf->sf_crcount)
+			return 0;
+		return (pmc->sfmode == MCAST_INCLUDE) ^ sdeleted;
+	case IGMPV3_BLOCK_OLD_SOURCES:
+		if (pmc->sfmode == MCAST_INCLUDE)
+			return gdeleted || (psf->sf_crcount && sdeleted);
+		return psf->sf_crcount && !gdeleted && !sdeleted;
+	}
+	return 0;
+}
+
+static int
+igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted)
+{
+	struct ip_sf_list *psf;
+	int scount = 0;
+
+	for (psf=pmc->sources; psf; psf=psf->sf_next) {
+		if (!is_in(pmc, psf, type, gdeleted, sdeleted))
+			continue;
+		scount++;
+	}
+	return scount;
+}
+
+static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
+{
+	struct sk_buff *skb;
+	struct rtable *rt;
+	struct iphdr *pip;
+	struct igmpv3_report *pig;
+
+	skb = alloc_skb(size + LL_RESERVED_SPACE(dev), GFP_ATOMIC);
+	if (skb == NULL)
+		return NULL;
+
+	{
+		struct flowi fl = { .oif = dev->ifindex,
+				    .nl_u = { .ip4_u = {
+				    .daddr = IGMPV3_ALL_MCR } },
+				    .proto = IPPROTO_IGMP };
+		if (ip_route_output_key(&rt, &fl)) {
+			kfree_skb(skb);
+			return NULL;
+		}
+	}
+	if (rt->rt_src == 0) {
+		kfree_skb(skb);
+		ip_rt_put(rt);
+		return NULL;
+	}
+
+	skb->dst = &rt->u.dst;
+	skb->dev = dev;
+
+	skb_reserve(skb, LL_RESERVED_SPACE(dev));
+
+	skb->nh.iph = pip =(struct iphdr *)skb_put(skb, sizeof(struct iphdr)+4);
+
+	pip->version  = 4;
+	pip->ihl      = (sizeof(struct iphdr)+4)>>2;
+	pip->tos      = 0xc0;
+	pip->frag_off = htons(IP_DF);
+	pip->ttl      = 1;
+	pip->daddr    = rt->rt_dst;
+	pip->saddr    = rt->rt_src;
+	pip->protocol = IPPROTO_IGMP;
+	pip->tot_len  = 0;	/* filled in later */
+	ip_select_ident(pip, &rt->u.dst, NULL);
+	((u8*)&pip[1])[0] = IPOPT_RA;
+	((u8*)&pip[1])[1] = 4;
+	((u8*)&pip[1])[2] = 0;
+	((u8*)&pip[1])[3] = 0;
+
+	pig =(struct igmpv3_report *)skb_put(skb, sizeof(*pig));
+	skb->h.igmph = (struct igmphdr *)pig;
+	pig->type = IGMPV3_HOST_MEMBERSHIP_REPORT;
+	pig->resv1 = 0;
+	pig->csum = 0;
+	pig->resv2 = 0;
+	pig->ngrec = 0;
+	return skb;
+}
+
+static int igmpv3_sendpack(struct sk_buff *skb)
+{
+	struct iphdr *pip = skb->nh.iph;
+	struct igmphdr *pig = skb->h.igmph;
+	int iplen, igmplen;
+
+	iplen = skb->tail - (unsigned char *)skb->nh.iph;
+	pip->tot_len = htons(iplen);
+	ip_send_check(pip);
+
+	igmplen = skb->tail - (unsigned char *)skb->h.igmph;
+	pig->csum = ip_compute_csum((void *)skb->h.igmph, igmplen);
+
+	return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, skb->dev,
+		       dst_output);
+}
+
+static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel)
+{
+	return sizeof(struct igmpv3_grec) + 4*igmp_scount(pmc,type,gdel,sdel);
+}
+
+static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc,
+	int type, struct igmpv3_grec **ppgr)
+{
+	struct net_device *dev = pmc->interface->dev;
+	struct igmpv3_report *pih;
+	struct igmpv3_grec *pgr;
+
+	if (!skb)
+		skb = igmpv3_newpack(dev, dev->mtu);
+	if (!skb)
+		return NULL;
+	pgr = (struct igmpv3_grec *)skb_put(skb, sizeof(struct igmpv3_grec));
+	pgr->grec_type = type;
+	pgr->grec_auxwords = 0;
+	pgr->grec_nsrcs = 0;
+	pgr->grec_mca = pmc->multiaddr;
+	pih = (struct igmpv3_report *)skb->h.igmph;
+	pih->ngrec = htons(ntohs(pih->ngrec)+1);
+	*ppgr = pgr;
+	return skb;
+}
+
+#define AVAILABLE(skb) ((skb) ? ((skb)->dev ? (skb)->dev->mtu - (skb)->len : \
+	skb_tailroom(skb)) : 0)
+
+static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
+	int type, int gdeleted, int sdeleted)
+{
+	struct net_device *dev = pmc->interface->dev;
+	struct igmpv3_report *pih;
+	struct igmpv3_grec *pgr = NULL;
+	struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list;
+	int scount, first, isquery, truncate;
+
+	if (pmc->multiaddr == IGMP_ALL_HOSTS)
+		return skb;
+
+	isquery = type == IGMPV3_MODE_IS_INCLUDE ||
+		  type == IGMPV3_MODE_IS_EXCLUDE;
+	truncate = type == IGMPV3_MODE_IS_EXCLUDE ||
+		    type == IGMPV3_CHANGE_TO_EXCLUDE;
+
+	psf_list = sdeleted ? &pmc->tomb : &pmc->sources;
+
+	if (!*psf_list) {
+		if (type == IGMPV3_ALLOW_NEW_SOURCES ||
+		    type == IGMPV3_BLOCK_OLD_SOURCES)
+			return skb;
+		if (pmc->crcount || isquery) {
+			/* make sure we have room for group header and at
+			 * least one source.
+			 */
+			if (skb && AVAILABLE(skb) < sizeof(struct igmpv3_grec)+
+			    sizeof(__u32)) {
+				igmpv3_sendpack(skb);
+				skb = NULL; /* add_grhead will get a new one */
+			}
+			skb = add_grhead(skb, pmc, type, &pgr);
+		}
+		return skb;
+	}
+	pih = skb ? (struct igmpv3_report *)skb->h.igmph : NULL;
+
+	/* EX and TO_EX get a fresh packet, if needed */
+	if (truncate) {
+		if (pih && pih->ngrec &&
+		    AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
+			if (skb)
+				igmpv3_sendpack(skb);
+			skb = igmpv3_newpack(dev, dev->mtu);
+		}
+	}
+	first = 1;
+	scount = 0;
+	psf_prev = NULL;
+	for (psf=*psf_list; psf; psf=psf_next) {
+		u32 *psrc;
+
+		psf_next = psf->sf_next;
+
+		if (!is_in(pmc, psf, type, gdeleted, sdeleted)) {
+			psf_prev = psf;
+			continue;
+		}
+
+		/* clear marks on query responses */
+		if (isquery)
+			psf->sf_gsresp = 0;
+
+		if (AVAILABLE(skb) < sizeof(u32) +
+		    first*sizeof(struct igmpv3_grec)) {
+			if (truncate && !first)
+				break;	 /* truncate these */
+			if (pgr)
+				pgr->grec_nsrcs = htons(scount);
+			if (skb)
+				igmpv3_sendpack(skb);
+			skb = igmpv3_newpack(dev, dev->mtu);
+			first = 1;
+			scount = 0;
+		}
+		if (first) {
+			skb = add_grhead(skb, pmc, type, &pgr);
+			first = 0;
+		}
+		psrc = (u32 *)skb_put(skb, sizeof(u32));
+		*psrc = psf->sf_inaddr;
+		scount++;
+		if ((type == IGMPV3_ALLOW_NEW_SOURCES ||
+		     type == IGMPV3_BLOCK_OLD_SOURCES) && psf->sf_crcount) {
+			psf->sf_crcount--;
+			if ((sdeleted || gdeleted) && psf->sf_crcount == 0) {
+				if (psf_prev)
+					psf_prev->sf_next = psf->sf_next;
+				else
+					*psf_list = psf->sf_next;
+				kfree(psf);
+				continue;
+			}
+		}
+		psf_prev = psf;
+	}
+	if (pgr)
+		pgr->grec_nsrcs = htons(scount);
+
+	if (isquery)
+		pmc->gsquery = 0;	/* clear query state on report */
+	return skb;
+}
+
+static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc)
+{
+	struct sk_buff *skb = NULL;
+	int type;
+
+	if (!pmc) {
+		read_lock(&in_dev->mc_list_lock);
+		for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) {
+			if (pmc->multiaddr == IGMP_ALL_HOSTS)
+				continue;
+			spin_lock_bh(&pmc->lock);
+			if (pmc->sfcount[MCAST_EXCLUDE])
+				type = IGMPV3_MODE_IS_EXCLUDE;
+			else
+				type = IGMPV3_MODE_IS_INCLUDE;
+			skb = add_grec(skb, pmc, type, 0, 0);
+			spin_unlock_bh(&pmc->lock);
+		}
+		read_unlock(&in_dev->mc_list_lock);
+	} else {
+		spin_lock_bh(&pmc->lock);
+		if (pmc->sfcount[MCAST_EXCLUDE])
+			type = IGMPV3_MODE_IS_EXCLUDE;
+		else
+			type = IGMPV3_MODE_IS_INCLUDE;
+		skb = add_grec(skb, pmc, type, 0, 0);
+		spin_unlock_bh(&pmc->lock);
+	}
+	if (!skb)
+		return 0;
+	return igmpv3_sendpack(skb);
+}
+
+/*
+ * remove zero-count source records from a source filter list
+ */
+static void igmpv3_clear_zeros(struct ip_sf_list **ppsf)
+{
+	struct ip_sf_list *psf_prev, *psf_next, *psf;
+
+	psf_prev = NULL;
+	for (psf=*ppsf; psf; psf = psf_next) {
+		psf_next = psf->sf_next;
+		if (psf->sf_crcount == 0) {
+			if (psf_prev)
+				psf_prev->sf_next = psf->sf_next;
+			else
+				*ppsf = psf->sf_next;
+			kfree(psf);
+		} else
+			psf_prev = psf;
+	}
+}
+
+static void igmpv3_send_cr(struct in_device *in_dev)
+{
+	struct ip_mc_list *pmc, *pmc_prev, *pmc_next;
+	struct sk_buff *skb = NULL;
+	int type, dtype;
+
+	read_lock(&in_dev->mc_list_lock);
+	spin_lock_bh(&in_dev->mc_tomb_lock);
+
+	/* deleted MCA's */
+	pmc_prev = NULL;
+	for (pmc=in_dev->mc_tomb; pmc; pmc=pmc_next) {
+		pmc_next = pmc->next;
+		if (pmc->sfmode == MCAST_INCLUDE) {
+			type = IGMPV3_BLOCK_OLD_SOURCES;
+			dtype = IGMPV3_BLOCK_OLD_SOURCES;
+			skb = add_grec(skb, pmc, type, 1, 0);
+			skb = add_grec(skb, pmc, dtype, 1, 1);
+		}
+		if (pmc->crcount) {
+			pmc->crcount--;
+			if (pmc->sfmode == MCAST_EXCLUDE) {
+				type = IGMPV3_CHANGE_TO_INCLUDE;
+				skb = add_grec(skb, pmc, type, 1, 0);
+			}
+			if (pmc->crcount == 0) {
+				igmpv3_clear_zeros(&pmc->tomb);
+				igmpv3_clear_zeros(&pmc->sources);
+			}
+		}
+		if (pmc->crcount == 0 && !pmc->tomb && !pmc->sources) {
+			if (pmc_prev)
+				pmc_prev->next = pmc_next;
+			else
+				in_dev->mc_tomb = pmc_next;
+			in_dev_put(pmc->interface);
+			kfree(pmc);
+		} else
+			pmc_prev = pmc;
+	}
+	spin_unlock_bh(&in_dev->mc_tomb_lock);
+
+	/* change recs */
+	for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) {
+		spin_lock_bh(&pmc->lock);
+		if (pmc->sfcount[MCAST_EXCLUDE]) {
+			type = IGMPV3_BLOCK_OLD_SOURCES;
+			dtype = IGMPV3_ALLOW_NEW_SOURCES;
+		} else {
+			type = IGMPV3_ALLOW_NEW_SOURCES;
+			dtype = IGMPV3_BLOCK_OLD_SOURCES;
+		}
+		skb = add_grec(skb, pmc, type, 0, 0);
+		skb = add_grec(skb, pmc, dtype, 0, 1);	/* deleted sources */
+
+		/* filter mode changes */
+		if (pmc->crcount) {
+			pmc->crcount--;
+			if (pmc->sfmode == MCAST_EXCLUDE)
+				type = IGMPV3_CHANGE_TO_EXCLUDE;
+			else
+				type = IGMPV3_CHANGE_TO_INCLUDE;
+			skb = add_grec(skb, pmc, type, 0, 0);
+		}
+		spin_unlock_bh(&pmc->lock);
+	}
+	read_unlock(&in_dev->mc_list_lock);
+
+	if (!skb)
+		return;
+	(void) igmpv3_sendpack(skb);
+}
+
+static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
+	int type)
+{
+	struct sk_buff *skb;
+	struct iphdr *iph;
+	struct igmphdr *ih;
+	struct rtable *rt;
+	struct net_device *dev = in_dev->dev;
+	u32	group = pmc ? pmc->multiaddr : 0;
+	u32	dst;
+
+	if (type == IGMPV3_HOST_MEMBERSHIP_REPORT)
+		return igmpv3_send_report(in_dev, pmc);
+	else if (type == IGMP_HOST_LEAVE_MESSAGE)
+		dst = IGMP_ALL_ROUTER;
+	else
+		dst = group;
+
+	{
+		struct flowi fl = { .oif = dev->ifindex,
+				    .nl_u = { .ip4_u = { .daddr = dst } },
+				    .proto = IPPROTO_IGMP };
+		if (ip_route_output_key(&rt, &fl))
+			return -1;
+	}
+	if (rt->rt_src == 0) {
+		ip_rt_put(rt);
+		return -1;
+	}
+
+	skb=alloc_skb(IGMP_SIZE+LL_RESERVED_SPACE(dev), GFP_ATOMIC);
+	if (skb == NULL) {
+		ip_rt_put(rt);
+		return -1;
+	}
+
+	skb->dst = &rt->u.dst;
+
+	skb_reserve(skb, LL_RESERVED_SPACE(dev));
+
+	skb->nh.iph = iph = (struct iphdr *)skb_put(skb, sizeof(struct iphdr)+4);
+
+	iph->version  = 4;
+	iph->ihl      = (sizeof(struct iphdr)+4)>>2;
+	iph->tos      = 0xc0;
+	iph->frag_off = htons(IP_DF);
+	iph->ttl      = 1;
+	iph->daddr    = dst;
+	iph->saddr    = rt->rt_src;
+	iph->protocol = IPPROTO_IGMP;
+	iph->tot_len  = htons(IGMP_SIZE);
+	ip_select_ident(iph, &rt->u.dst, NULL);
+	((u8*)&iph[1])[0] = IPOPT_RA;
+	((u8*)&iph[1])[1] = 4;
+	((u8*)&iph[1])[2] = 0;
+	((u8*)&iph[1])[3] = 0;
+	ip_send_check(iph);
+
+	ih = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
+	ih->type=type;
+	ih->code=0;
+	ih->csum=0;
+	ih->group=group;
+	ih->csum=ip_compute_csum((void *)ih, sizeof(struct igmphdr));
+
+	return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
+		       dst_output);
+}
+
+static void igmp_gq_timer_expire(unsigned long data)
+{
+	struct in_device *in_dev = (struct in_device *)data;
+
+	in_dev->mr_gq_running = 0;
+	igmpv3_send_report(in_dev, NULL);
+	__in_dev_put(in_dev);
+}
+
+static void igmp_ifc_timer_expire(unsigned long data)
+{
+	struct in_device *in_dev = (struct in_device *)data;
+
+	igmpv3_send_cr(in_dev);
+	if (in_dev->mr_ifc_count) {
+		in_dev->mr_ifc_count--;
+		igmp_ifc_start_timer(in_dev, IGMP_Unsolicited_Report_Interval);
+	}
+	__in_dev_put(in_dev);
+}
+
+static void igmp_ifc_event(struct in_device *in_dev)
+{
+	if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev))
+		return;
+	in_dev->mr_ifc_count = in_dev->mr_qrv ? in_dev->mr_qrv : 
+		IGMP_Unsolicited_Report_Count;
+	igmp_ifc_start_timer(in_dev, 1);
+}
+
+
+static void igmp_timer_expire(unsigned long data)
+{
+	struct ip_mc_list *im=(struct ip_mc_list *)data;
+	struct in_device *in_dev = im->interface;
+
+	spin_lock(&im->lock);
+	im->tm_running=0;
+
+	if (im->unsolicit_count) {
+		im->unsolicit_count--;
+		igmp_start_timer(im, IGMP_Unsolicited_Report_Interval);
+	}
+	im->reporter = 1;
+	spin_unlock(&im->lock);
+
+	if (IGMP_V1_SEEN(in_dev))
+		igmp_send_report(in_dev, im, IGMP_HOST_MEMBERSHIP_REPORT);
+	else if (IGMP_V2_SEEN(in_dev))
+		igmp_send_report(in_dev, im, IGMPV2_HOST_MEMBERSHIP_REPORT);
+	else
+		igmp_send_report(in_dev, im, IGMPV3_HOST_MEMBERSHIP_REPORT);
+
+	ip_ma_put(im);
+}
+
+static void igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs)
+{
+	struct ip_sf_list *psf;
+	int i, scount;
+
+	scount = 0;
+	for (psf=pmc->sources; psf; psf=psf->sf_next) {
+		if (scount == nsrcs)
+			break;
+		for (i=0; i<nsrcs; i++)
+			if (srcs[i] == psf->sf_inaddr) {
+				psf->sf_gsresp = 1;
+				scount++;
+				break;
+			}
+	}
+}
+
+static void igmp_heard_report(struct in_device *in_dev, u32 group)
+{
+	struct ip_mc_list *im;
+
+	/* Timers are only set for non-local groups */
+
+	if (group == IGMP_ALL_HOSTS)
+		return;
+
+	read_lock(&in_dev->mc_list_lock);
+	for (im=in_dev->mc_list; im!=NULL; im=im->next) {
+		if (im->multiaddr == group) {
+			igmp_stop_timer(im);
+			break;
+		}
+	}
+	read_unlock(&in_dev->mc_list_lock);
+}
+
+static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
+	int len)
+{
+	struct igmphdr 		*ih = skb->h.igmph;
+	struct igmpv3_query *ih3 = (struct igmpv3_query *)ih;
+	struct ip_mc_list	*im;
+	u32			group = ih->group;
+	int			max_delay;
+	int			mark = 0;
+
+
+	if (len == 8) {
+		if (ih->code == 0) {
+			/* Alas, old v1 router presents here. */
+	
+			max_delay = IGMP_Query_Response_Interval;
+			in_dev->mr_v1_seen = jiffies +
+				IGMP_V1_Router_Present_Timeout;
+			group = 0;
+		} else {
+			/* v2 router present */
+			max_delay = ih->code*(HZ/IGMP_TIMER_SCALE);
+			in_dev->mr_v2_seen = jiffies +
+				IGMP_V2_Router_Present_Timeout;
+		}
+		/* cancel the interface change timer */
+		in_dev->mr_ifc_count = 0;
+		if (del_timer(&in_dev->mr_ifc_timer))
+			__in_dev_put(in_dev);
+		/* clear deleted report items */
+		igmpv3_clear_delrec(in_dev);
+	} else if (len < 12) {
+		return;	/* ignore bogus packet; freed by caller */
+	} else { /* v3 */
+		if (!pskb_may_pull(skb, sizeof(struct igmpv3_query)))
+			return;
+		
+		ih3 = (struct igmpv3_query *) skb->h.raw;
+		if (ih3->nsrcs) {
+			if (!pskb_may_pull(skb, sizeof(struct igmpv3_query) 
+					   + ntohs(ih3->nsrcs)*sizeof(__u32)))
+				return;
+			ih3 = (struct igmpv3_query *) skb->h.raw;
+		}
+
+		max_delay = IGMPV3_MRC(ih3->code)*(HZ/IGMP_TIMER_SCALE);
+		if (!max_delay)
+			max_delay = 1;	/* can't mod w/ 0 */
+		in_dev->mr_maxdelay = max_delay;
+		if (ih3->qrv)
+			in_dev->mr_qrv = ih3->qrv;
+		if (!group) { /* general query */
+			if (ih3->nsrcs)
+				return;	/* no sources allowed */
+			igmp_gq_start_timer(in_dev);
+			return;
+		}
+		/* mark sources to include, if group & source-specific */
+		mark = ih3->nsrcs != 0;
+	}
+
+	/*
+	 * - Start the timers in all of our membership records
+	 *   that the query applies to for the interface on
+	 *   which the query arrived excl. those that belong
+	 *   to a "local" group (224.0.0.X)
+	 * - For timers already running check if they need to
+	 *   be reset.
+	 * - Use the igmp->igmp_code field as the maximum
+	 *   delay possible
+	 */
+	read_lock(&in_dev->mc_list_lock);
+	for (im=in_dev->mc_list; im!=NULL; im=im->next) {
+		if (group && group != im->multiaddr)
+			continue;
+		if (im->multiaddr == IGMP_ALL_HOSTS)
+			continue;
+		spin_lock_bh(&im->lock);
+		if (im->tm_running)
+			im->gsquery = im->gsquery && mark;
+		else
+			im->gsquery = mark;
+		if (im->gsquery)
+			igmp_marksources(im, ntohs(ih3->nsrcs), ih3->srcs);
+		spin_unlock_bh(&im->lock);
+		igmp_mod_timer(im, max_delay);
+	}
+	read_unlock(&in_dev->mc_list_lock);
+}
+
+int igmp_rcv(struct sk_buff *skb)
+{
+	/* This basically follows the spec line by line -- see RFC1112 */
+	struct igmphdr *ih;
+	struct in_device *in_dev = in_dev_get(skb->dev);
+	int len = skb->len;
+
+	if (in_dev==NULL) {
+		kfree_skb(skb);
+		return 0;
+	}
+
+	if (!pskb_may_pull(skb, sizeof(struct igmphdr)) || 
+	    (u16)csum_fold(skb_checksum(skb, 0, len, 0))) {
+		in_dev_put(in_dev);
+		kfree_skb(skb);
+		return 0;
+	}
+
+	ih = skb->h.igmph;
+	switch (ih->type) {
+	case IGMP_HOST_MEMBERSHIP_QUERY:
+		igmp_heard_query(in_dev, skb, len);
+		break;
+	case IGMP_HOST_MEMBERSHIP_REPORT:
+	case IGMPV2_HOST_MEMBERSHIP_REPORT:
+	case IGMPV3_HOST_MEMBERSHIP_REPORT:
+		/* Is it our report looped back? */
+		if (((struct rtable*)skb->dst)->fl.iif == 0)
+			break;
+		igmp_heard_report(in_dev, ih->group);
+		break;
+	case IGMP_PIM:
+#ifdef CONFIG_IP_PIMSM_V1
+		in_dev_put(in_dev);
+		return pim_rcv_v1(skb);
+#endif
+	case IGMP_DVMRP:
+	case IGMP_TRACE:
+	case IGMP_HOST_LEAVE_MESSAGE:
+	case IGMP_MTRACE:
+	case IGMP_MTRACE_RESP:
+		break;
+	default:
+		NETDEBUG(printk(KERN_DEBUG "New IGMP type=%d, why we do not know about it?\n", ih->type));
+	}
+	in_dev_put(in_dev);
+	kfree_skb(skb);
+	return 0;
+}
+
+#endif
+
+
+/*
+ *	Add a filter to a device
+ */
+
+static void ip_mc_filter_add(struct in_device *in_dev, u32 addr)
+{
+	char buf[MAX_ADDR_LEN];
+	struct net_device *dev = in_dev->dev;
+
+	/* Checking for IFF_MULTICAST here is WRONG-WRONG-WRONG.
+	   We will get multicast token leakage, when IFF_MULTICAST
+	   is changed. This check should be done in dev->set_multicast_list
+	   routine. Something sort of:
+	   if (dev->mc_list && dev->flags&IFF_MULTICAST) { do it; }
+	   --ANK
+	   */
+	if (arp_mc_map(addr, buf, dev, 0) == 0)
+		dev_mc_add(dev,buf,dev->addr_len,0);
+}
+
+/*
+ *	Remove a filter from a device
+ */
+
+static void ip_mc_filter_del(struct in_device *in_dev, u32 addr)
+{
+	char buf[MAX_ADDR_LEN];
+	struct net_device *dev = in_dev->dev;
+
+	if (arp_mc_map(addr, buf, dev, 0) == 0)
+		dev_mc_delete(dev,buf,dev->addr_len,0);
+}
+
+#ifdef CONFIG_IP_MULTICAST
+/*
+ * deleted ip_mc_list manipulation
+ */
+static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
+{
+	struct ip_mc_list *pmc;
+
+	/* this is an "ip_mc_list" for convenience; only the fields below
+	 * are actually used. In particular, the refcnt and users are not
+	 * used for management of the delete list. Using the same structure
+	 * for deleted items allows change reports to use common code with
+	 * non-deleted or query-response MCA's.
+	 */
+	pmc = (struct ip_mc_list *)kmalloc(sizeof(*pmc), GFP_KERNEL);
+	if (!pmc)
+		return;
+	memset(pmc, 0, sizeof(*pmc));
+	spin_lock_bh(&im->lock);
+	pmc->interface = im->interface;
+	in_dev_hold(in_dev);
+	pmc->multiaddr = im->multiaddr;
+	pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv :
+		IGMP_Unsolicited_Report_Count;
+	pmc->sfmode = im->sfmode;
+	if (pmc->sfmode == MCAST_INCLUDE) {
+		struct ip_sf_list *psf;
+
+		pmc->tomb = im->tomb;
+		pmc->sources = im->sources;
+		im->tomb = im->sources = NULL;
+		for (psf=pmc->sources; psf; psf=psf->sf_next)
+			psf->sf_crcount = pmc->crcount;
+	}
+	spin_unlock_bh(&im->lock);
+
+	spin_lock_bh(&in_dev->mc_tomb_lock);
+	pmc->next = in_dev->mc_tomb;
+	in_dev->mc_tomb = pmc;
+	spin_unlock_bh(&in_dev->mc_tomb_lock);
+}
+
+static void igmpv3_del_delrec(struct in_device *in_dev, __u32 multiaddr)
+{
+	struct ip_mc_list *pmc, *pmc_prev;
+	struct ip_sf_list *psf, *psf_next;
+
+	spin_lock_bh(&in_dev->mc_tomb_lock);
+	pmc_prev = NULL;
+	for (pmc=in_dev->mc_tomb; pmc; pmc=pmc->next) {
+		if (pmc->multiaddr == multiaddr)
+			break;
+		pmc_prev = pmc;
+	}
+	if (pmc) {
+		if (pmc_prev)
+			pmc_prev->next = pmc->next;
+		else
+			in_dev->mc_tomb = pmc->next;
+	}
+	spin_unlock_bh(&in_dev->mc_tomb_lock);
+	if (pmc) {
+		for (psf=pmc->tomb; psf; psf=psf_next) {
+			psf_next = psf->sf_next;
+			kfree(psf);
+		}
+		in_dev_put(pmc->interface);
+		kfree(pmc);
+	}
+}
+
+static void igmpv3_clear_delrec(struct in_device *in_dev)
+{
+	struct ip_mc_list *pmc, *nextpmc;
+
+	spin_lock_bh(&in_dev->mc_tomb_lock);
+	pmc = in_dev->mc_tomb;
+	in_dev->mc_tomb = NULL;
+	spin_unlock_bh(&in_dev->mc_tomb_lock);
+
+	for (; pmc; pmc = nextpmc) {
+		nextpmc = pmc->next;
+		ip_mc_clear_src(pmc);
+		in_dev_put(pmc->interface);
+		kfree(pmc);
+	}
+	/* clear dead sources, too */
+	read_lock(&in_dev->mc_list_lock);
+	for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) {
+		struct ip_sf_list *psf, *psf_next;
+
+		spin_lock_bh(&pmc->lock);
+		psf = pmc->tomb;
+		pmc->tomb = NULL;
+		spin_unlock_bh(&pmc->lock);
+		for (; psf; psf=psf_next) {
+			psf_next = psf->sf_next;
+			kfree(psf);
+		}
+	}
+	read_unlock(&in_dev->mc_list_lock);
+}
+#endif
+
+static void igmp_group_dropped(struct ip_mc_list *im)
+{
+	struct in_device *in_dev = im->interface;
+#ifdef CONFIG_IP_MULTICAST
+	int reporter;
+#endif
+
+	if (im->loaded) {
+		im->loaded = 0;
+		ip_mc_filter_del(in_dev, im->multiaddr);
+	}
+
+#ifdef CONFIG_IP_MULTICAST
+	if (im->multiaddr == IGMP_ALL_HOSTS)
+		return;
+
+	reporter = im->reporter;
+	igmp_stop_timer(im);
+
+	if (!in_dev->dead) {
+		if (IGMP_V1_SEEN(in_dev))
+			goto done;
+		if (IGMP_V2_SEEN(in_dev)) {
+			if (reporter)
+				igmp_send_report(in_dev, im, IGMP_HOST_LEAVE_MESSAGE);
+			goto done;
+		}
+		/* IGMPv3 */
+		igmpv3_add_delrec(in_dev, im);
+
+		igmp_ifc_event(in_dev);
+	}
+done:
+#endif
+	ip_mc_clear_src(im);
+}
+
+static void igmp_group_added(struct ip_mc_list *im)
+{
+	struct in_device *in_dev = im->interface;
+
+	if (im->loaded == 0) {
+		im->loaded = 1;
+		ip_mc_filter_add(in_dev, im->multiaddr);
+	}
+
+#ifdef CONFIG_IP_MULTICAST
+	if (im->multiaddr == IGMP_ALL_HOSTS)
+		return;
+
+	if (in_dev->dead)
+		return;
+	if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) {
+		spin_lock_bh(&im->lock);
+		igmp_start_timer(im, IGMP_Initial_Report_Delay);
+		spin_unlock_bh(&im->lock);
+		return;
+	}
+	/* else, v3 */
+
+	im->crcount = in_dev->mr_qrv ? in_dev->mr_qrv :
+		IGMP_Unsolicited_Report_Count;
+	igmp_ifc_event(in_dev);
+#endif
+}
+
+
+/*
+ *	Multicast list managers
+ */
+
+
+/*
+ *	A socket has joined a multicast group on device dev.
+ */
+
+void ip_mc_inc_group(struct in_device *in_dev, u32 addr)
+{
+	struct ip_mc_list *im;
+
+	ASSERT_RTNL();
+
+	for (im=in_dev->mc_list; im; im=im->next) {
+		if (im->multiaddr == addr) {
+			im->users++;
+			ip_mc_add_src(in_dev, &addr, MCAST_EXCLUDE, 0, NULL, 0);
+			goto out;
+		}
+	}
+
+	im = (struct ip_mc_list *)kmalloc(sizeof(*im), GFP_KERNEL);
+	if (!im)
+		goto out;
+
+	im->users=1;
+	im->interface=in_dev;
+	in_dev_hold(in_dev);
+	im->multiaddr=addr;
+	/* initial mode is (EX, empty) */
+	im->sfmode = MCAST_EXCLUDE;
+	im->sfcount[MCAST_INCLUDE] = 0;
+	im->sfcount[MCAST_EXCLUDE] = 1;
+	im->sources = NULL;
+	im->tomb = NULL;
+	im->crcount = 0;
+	atomic_set(&im->refcnt, 1);
+	spin_lock_init(&im->lock);
+#ifdef CONFIG_IP_MULTICAST
+	im->tm_running=0;
+	init_timer(&im->timer);
+	im->timer.data=(unsigned long)im;
+	im->timer.function=&igmp_timer_expire;
+	im->unsolicit_count = IGMP_Unsolicited_Report_Count;
+	im->reporter = 0;
+	im->gsquery = 0;
+#endif
+	im->loaded = 0;
+	write_lock_bh(&in_dev->mc_list_lock);
+	im->next=in_dev->mc_list;
+	in_dev->mc_list=im;
+	write_unlock_bh(&in_dev->mc_list_lock);
+#ifdef CONFIG_IP_MULTICAST
+	igmpv3_del_delrec(in_dev, im->multiaddr);
+#endif
+	igmp_group_added(im);
+	if (!in_dev->dead)
+		ip_rt_multicast_event(in_dev);
+out:
+	return;
+}
+
+/*
+ *	A socket has left a multicast group on device dev
+ */
+
+void ip_mc_dec_group(struct in_device *in_dev, u32 addr)
+{
+	struct ip_mc_list *i, **ip;
+	
+	ASSERT_RTNL();
+	
+	for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) {
+		if (i->multiaddr==addr) {
+			if (--i->users == 0) {
+				write_lock_bh(&in_dev->mc_list_lock);
+				*ip = i->next;
+				write_unlock_bh(&in_dev->mc_list_lock);
+				igmp_group_dropped(i);
+
+				if (!in_dev->dead)
+					ip_rt_multicast_event(in_dev);
+
+				ip_ma_put(i);
+				return;
+			}
+			break;
+		}
+	}
+}
+
+/* Device going down */
+
+void ip_mc_down(struct in_device *in_dev)
+{
+	struct ip_mc_list *i;
+
+	ASSERT_RTNL();
+
+	for (i=in_dev->mc_list; i; i=i->next)
+		igmp_group_dropped(i);
+
+#ifdef CONFIG_IP_MULTICAST
+	in_dev->mr_ifc_count = 0;
+	if (del_timer(&in_dev->mr_ifc_timer))
+		__in_dev_put(in_dev);
+	in_dev->mr_gq_running = 0;
+	if (del_timer(&in_dev->mr_gq_timer))
+		__in_dev_put(in_dev);
+	igmpv3_clear_delrec(in_dev);
+#endif
+
+	ip_mc_dec_group(in_dev, IGMP_ALL_HOSTS);
+}
+
+void ip_mc_init_dev(struct in_device *in_dev)
+{
+	ASSERT_RTNL();
+
+	in_dev->mc_tomb = NULL;
+#ifdef CONFIG_IP_MULTICAST
+	in_dev->mr_gq_running = 0;
+	init_timer(&in_dev->mr_gq_timer);
+	in_dev->mr_gq_timer.data=(unsigned long) in_dev;
+	in_dev->mr_gq_timer.function=&igmp_gq_timer_expire;
+	in_dev->mr_ifc_count = 0;
+	init_timer(&in_dev->mr_ifc_timer);
+	in_dev->mr_ifc_timer.data=(unsigned long) in_dev;
+	in_dev->mr_ifc_timer.function=&igmp_ifc_timer_expire;
+	in_dev->mr_qrv = IGMP_Unsolicited_Report_Count;
+#endif
+
+	rwlock_init(&in_dev->mc_list_lock);
+	spin_lock_init(&in_dev->mc_tomb_lock);
+}
+
+/* Device going up */
+
+void ip_mc_up(struct in_device *in_dev)
+{
+	struct ip_mc_list *i;
+
+	ASSERT_RTNL();
+
+	ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
+
+	for (i=in_dev->mc_list; i; i=i->next)
+		igmp_group_added(i);
+}
+
+/*
+ *	Device is about to be destroyed: clean up.
+ */
+
+void ip_mc_destroy_dev(struct in_device *in_dev)
+{
+	struct ip_mc_list *i;
+
+	ASSERT_RTNL();
+
+	/* Deactivate timers */
+	ip_mc_down(in_dev);
+
+	write_lock_bh(&in_dev->mc_list_lock);
+	while ((i = in_dev->mc_list) != NULL) {
+		in_dev->mc_list = i->next;
+		write_unlock_bh(&in_dev->mc_list_lock);
+
+		igmp_group_dropped(i);
+		ip_ma_put(i);
+
+		write_lock_bh(&in_dev->mc_list_lock);
+	}
+	write_unlock_bh(&in_dev->mc_list_lock);
+}
+
+static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr)
+{
+	struct flowi fl = { .nl_u = { .ip4_u =
+				      { .daddr = imr->imr_multiaddr.s_addr } } };
+	struct rtable *rt;
+	struct net_device *dev = NULL;
+	struct in_device *idev = NULL;
+
+	if (imr->imr_ifindex) {
+		idev = inetdev_by_index(imr->imr_ifindex);
+		if (idev)
+			__in_dev_put(idev);
+		return idev;
+	}
+	if (imr->imr_address.s_addr) {
+		dev = ip_dev_find(imr->imr_address.s_addr);
+		if (!dev)
+			return NULL;
+		__dev_put(dev);
+	}
+
+	if (!dev && !ip_route_output_key(&rt, &fl)) {
+		dev = rt->u.dst.dev;
+		ip_rt_put(rt);
+	}
+	if (dev) {
+		imr->imr_ifindex = dev->ifindex;
+		idev = __in_dev_get(dev);
+	}
+	return idev;
+}
+
+/*
+ *	Join a socket to a group
+ */
+int sysctl_igmp_max_memberships = IP_MAX_MEMBERSHIPS;
+int sysctl_igmp_max_msf = IP_MAX_MSF;
+
+
+static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
+	__u32 *psfsrc)
+{
+	struct ip_sf_list *psf, *psf_prev;
+	int rv = 0;
+
+	psf_prev = NULL;
+	for (psf=pmc->sources; psf; psf=psf->sf_next) {
+		if (psf->sf_inaddr == *psfsrc)
+			break;
+		psf_prev = psf;
+	}
+	if (!psf || psf->sf_count[sfmode] == 0) {
+		/* source filter not found, or count wrong =>  bug */
+		return -ESRCH;
+	}
+	psf->sf_count[sfmode]--;
+	if (psf->sf_count[sfmode] == 0) {
+		ip_rt_multicast_event(pmc->interface);
+	}
+	if (!psf->sf_count[MCAST_INCLUDE] && !psf->sf_count[MCAST_EXCLUDE]) {
+#ifdef CONFIG_IP_MULTICAST
+		struct in_device *in_dev = pmc->interface;
+#endif
+
+		/* no more filters for this source */
+		if (psf_prev)
+			psf_prev->sf_next = psf->sf_next;
+		else
+			pmc->sources = psf->sf_next;
+#ifdef CONFIG_IP_MULTICAST
+		if (psf->sf_oldin &&
+		    !IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) {
+			psf->sf_crcount = in_dev->mr_qrv ? in_dev->mr_qrv : 
+				IGMP_Unsolicited_Report_Count;
+			psf->sf_next = pmc->tomb;
+			pmc->tomb = psf;
+			rv = 1;
+		} else
+#endif
+			kfree(psf);
+	}
+	return rv;
+}
+
+#ifndef CONFIG_IP_MULTICAST
+#define igmp_ifc_event(x)	do { } while (0)
+#endif
+
+static int ip_mc_del_src(struct in_device *in_dev, __u32 *pmca, int sfmode,
+			 int sfcount, __u32 *psfsrc, int delta)
+{
+	struct ip_mc_list *pmc;
+	int	changerec = 0;
+	int	i, err;
+
+	if (!in_dev)
+		return -ENODEV;
+	read_lock(&in_dev->mc_list_lock);
+	for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) {
+		if (*pmca == pmc->multiaddr)
+			break;
+	}
+	if (!pmc) {
+		/* MCA not found?? bug */
+		read_unlock(&in_dev->mc_list_lock);
+		return -ESRCH;
+	}
+	spin_lock_bh(&pmc->lock);
+	read_unlock(&in_dev->mc_list_lock);
+#ifdef CONFIG_IP_MULTICAST
+	sf_markstate(pmc);
+#endif
+	if (!delta) {
+		err = -EINVAL;
+		if (!pmc->sfcount[sfmode])
+			goto out_unlock;
+		pmc->sfcount[sfmode]--;
+	}
+	err = 0;
+	for (i=0; i<sfcount; i++) {
+		int rv = ip_mc_del1_src(pmc, sfmode, &psfsrc[i]);
+
+		changerec |= rv > 0;
+		if (!err && rv < 0)
+			err = rv;
+	}
+	if (pmc->sfmode == MCAST_EXCLUDE &&
+	    pmc->sfcount[MCAST_EXCLUDE] == 0 &&
+	    pmc->sfcount[MCAST_INCLUDE]) {
+#ifdef CONFIG_IP_MULTICAST
+		struct ip_sf_list *psf;
+#endif
+
+		/* filter mode change */
+		pmc->sfmode = MCAST_INCLUDE;
+#ifdef CONFIG_IP_MULTICAST
+		pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv : 
+			IGMP_Unsolicited_Report_Count;
+		in_dev->mr_ifc_count = pmc->crcount;
+		for (psf=pmc->sources; psf; psf = psf->sf_next)
+			psf->sf_crcount = 0;
+		igmp_ifc_event(pmc->interface);
+	} else if (sf_setstate(pmc) || changerec) {
+		igmp_ifc_event(pmc->interface);
+#endif
+	}
+out_unlock:
+	spin_unlock_bh(&pmc->lock);
+	return err;
+}
+
+/*
+ * Add multicast single-source filter to the interface list
+ */
+static int ip_mc_add1_src(struct ip_mc_list *pmc, int sfmode,
+	__u32 *psfsrc, int delta)
+{
+	struct ip_sf_list *psf, *psf_prev;
+
+	psf_prev = NULL;
+	for (psf=pmc->sources; psf; psf=psf->sf_next) {
+		if (psf->sf_inaddr == *psfsrc)
+			break;
+		psf_prev = psf;
+	}
+	if (!psf) {
+		psf = (struct ip_sf_list *)kmalloc(sizeof(*psf), GFP_ATOMIC);
+		if (!psf)
+			return -ENOBUFS;
+		memset(psf, 0, sizeof(*psf));
+		psf->sf_inaddr = *psfsrc;
+		if (psf_prev) {
+			psf_prev->sf_next = psf;
+		} else
+			pmc->sources = psf;
+	}
+	psf->sf_count[sfmode]++;
+	if (psf->sf_count[sfmode] == 1) {
+		ip_rt_multicast_event(pmc->interface);
+	}
+	return 0;
+}
+
+#ifdef CONFIG_IP_MULTICAST
+static void sf_markstate(struct ip_mc_list *pmc)
+{
+	struct ip_sf_list *psf;
+	int mca_xcount = pmc->sfcount[MCAST_EXCLUDE];
+
+	for (psf=pmc->sources; psf; psf=psf->sf_next)
+		if (pmc->sfcount[MCAST_EXCLUDE]) {
+			psf->sf_oldin = mca_xcount ==
+				psf->sf_count[MCAST_EXCLUDE] &&
+				!psf->sf_count[MCAST_INCLUDE];
+		} else
+			psf->sf_oldin = psf->sf_count[MCAST_INCLUDE] != 0;
+}
+
+static int sf_setstate(struct ip_mc_list *pmc)
+{
+	struct ip_sf_list *psf;
+	int mca_xcount = pmc->sfcount[MCAST_EXCLUDE];
+	int qrv = pmc->interface->mr_qrv;
+	int new_in, rv;
+
+	rv = 0;
+	for (psf=pmc->sources; psf; psf=psf->sf_next) {
+		if (pmc->sfcount[MCAST_EXCLUDE]) {
+			new_in = mca_xcount == psf->sf_count[MCAST_EXCLUDE] &&
+				!psf->sf_count[MCAST_INCLUDE];
+		} else
+			new_in = psf->sf_count[MCAST_INCLUDE] != 0;
+		if (new_in != psf->sf_oldin) {
+			psf->sf_crcount = qrv;
+			rv++;
+		}
+	}
+	return rv;
+}
+#endif
+
+/*
+ * Add multicast source filter list to the interface list
+ */
+static int ip_mc_add_src(struct in_device *in_dev, __u32 *pmca, int sfmode,
+			 int sfcount, __u32 *psfsrc, int delta)
+{
+	struct ip_mc_list *pmc;
+	int	isexclude;
+	int	i, err;
+
+	if (!in_dev)
+		return -ENODEV;
+	read_lock(&in_dev->mc_list_lock);
+	for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) {
+		if (*pmca == pmc->multiaddr)
+			break;
+	}
+	if (!pmc) {
+		/* MCA not found?? bug */
+		read_unlock(&in_dev->mc_list_lock);
+		return -ESRCH;
+	}
+	spin_lock_bh(&pmc->lock);
+	read_unlock(&in_dev->mc_list_lock);
+
+#ifdef CONFIG_IP_MULTICAST
+	sf_markstate(pmc);
+#endif
+	isexclude = pmc->sfmode == MCAST_EXCLUDE;
+	if (!delta)
+		pmc->sfcount[sfmode]++;
+	err = 0;
+	for (i=0; i<sfcount; i++) {
+		err = ip_mc_add1_src(pmc, sfmode, &psfsrc[i], delta);
+		if (err)
+			break;
+	}
+	if (err) {
+		int j;
+
+		pmc->sfcount[sfmode]--;
+		for (j=0; j<i; j++)
+			(void) ip_mc_del1_src(pmc, sfmode, &psfsrc[i]);
+	} else if (isexclude != (pmc->sfcount[MCAST_EXCLUDE] != 0)) {
+#ifdef CONFIG_IP_MULTICAST
+		struct in_device *in_dev = pmc->interface;
+		struct ip_sf_list *psf;
+#endif
+
+		/* filter mode change */
+		if (pmc->sfcount[MCAST_EXCLUDE])
+			pmc->sfmode = MCAST_EXCLUDE;
+		else if (pmc->sfcount[MCAST_INCLUDE])
+			pmc->sfmode = MCAST_INCLUDE;
+#ifdef CONFIG_IP_MULTICAST
+		/* else no filters; keep old mode for reports */
+
+		pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv : 
+			IGMP_Unsolicited_Report_Count;
+		in_dev->mr_ifc_count = pmc->crcount;
+		for (psf=pmc->sources; psf; psf = psf->sf_next)
+			psf->sf_crcount = 0;
+		igmp_ifc_event(in_dev);
+	} else if (sf_setstate(pmc)) {
+		igmp_ifc_event(in_dev);
+#endif
+	}
+	spin_unlock_bh(&pmc->lock);
+	return err;
+}
+
+static void ip_mc_clear_src(struct ip_mc_list *pmc)
+{
+	struct ip_sf_list *psf, *nextpsf;
+
+	for (psf=pmc->tomb; psf; psf=nextpsf) {
+		nextpsf = psf->sf_next;
+		kfree(psf);
+	}
+	pmc->tomb = NULL;
+	for (psf=pmc->sources; psf; psf=nextpsf) {
+		nextpsf = psf->sf_next;
+		kfree(psf);
+	}
+	pmc->sources = NULL;
+	pmc->sfmode = MCAST_EXCLUDE;
+	pmc->sfcount[MCAST_EXCLUDE] = 0;
+	pmc->sfcount[MCAST_EXCLUDE] = 1;
+}
+
+
+/*
+ * Join a multicast group
+ */
+int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
+{
+	int err;
+	u32 addr = imr->imr_multiaddr.s_addr;
+	struct ip_mc_socklist *iml, *i;
+	struct in_device *in_dev;
+	struct inet_sock *inet = inet_sk(sk);
+	int count = 0;
+
+	if (!MULTICAST(addr))
+		return -EINVAL;
+
+	rtnl_shlock();
+
+	in_dev = ip_mc_find_dev(imr);
+
+	if (!in_dev) {
+		iml = NULL;
+		err = -ENODEV;
+		goto done;
+	}
+
+	iml = (struct ip_mc_socklist *)sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL);
+
+	err = -EADDRINUSE;
+	for (i = inet->mc_list; i; i = i->next) {
+		if (memcmp(&i->multi, imr, sizeof(*imr)) == 0) {
+			/* New style additions are reference counted */
+			if (imr->imr_address.s_addr == 0) {
+				i->count++;
+				err = 0;
+			}
+			goto done;
+		}
+		count++;
+	}
+	err = -ENOBUFS;
+	if (iml == NULL || count >= sysctl_igmp_max_memberships)
+		goto done;
+	memcpy(&iml->multi, imr, sizeof(*imr));
+	iml->next = inet->mc_list;
+	iml->count = 1;
+	iml->sflist = NULL;
+	iml->sfmode = MCAST_EXCLUDE;
+	inet->mc_list = iml;
+	ip_mc_inc_group(in_dev, addr);
+	iml = NULL;
+	err = 0;
+
+done:
+	rtnl_shunlock();
+	if (iml)
+		sock_kfree_s(sk, iml, sizeof(*iml));
+	return err;
+}
+
+static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
+			   struct in_device *in_dev)
+{
+	int err;
+
+	if (iml->sflist == 0) {
+		/* any-source empty exclude case */
+		return ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr,
+			iml->sfmode, 0, NULL, 0);
+	}
+	err = ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr,
+			iml->sfmode, iml->sflist->sl_count,
+			iml->sflist->sl_addr, 0);
+	sock_kfree_s(sk, iml->sflist, IP_SFLSIZE(iml->sflist->sl_max));
+	iml->sflist = NULL;
+	return err;
+}
+
+/*
+ *	Ask a socket to leave a group.
+ */
+
+int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct ip_mc_socklist *iml, **imlp;
+
+	rtnl_lock();
+	for (imlp = &inet->mc_list; (iml = *imlp) != NULL; imlp = &iml->next) {
+		if (iml->multi.imr_multiaddr.s_addr==imr->imr_multiaddr.s_addr &&
+		    iml->multi.imr_address.s_addr==imr->imr_address.s_addr &&
+		    (!imr->imr_ifindex || iml->multi.imr_ifindex==imr->imr_ifindex)) {
+			struct in_device *in_dev;
+
+			in_dev = inetdev_by_index(iml->multi.imr_ifindex);
+			if (in_dev)
+				(void) ip_mc_leave_src(sk, iml, in_dev);
+			if (--iml->count) {
+				rtnl_unlock();
+				if (in_dev)
+					in_dev_put(in_dev);
+				return 0;
+			}
+
+			*imlp = iml->next;
+
+			if (in_dev) {
+				ip_mc_dec_group(in_dev, imr->imr_multiaddr.s_addr);
+				in_dev_put(in_dev);
+			}
+			rtnl_unlock();
+			sock_kfree_s(sk, iml, sizeof(*iml));
+			return 0;
+		}
+	}
+	rtnl_unlock();
+	return -EADDRNOTAVAIL;
+}
+
+int ip_mc_source(int add, int omode, struct sock *sk, struct
+	ip_mreq_source *mreqs, int ifindex)
+{
+	int err;
+	struct ip_mreqn imr;
+	u32 addr = mreqs->imr_multiaddr;
+	struct ip_mc_socklist *pmc;
+	struct in_device *in_dev = NULL;
+	struct inet_sock *inet = inet_sk(sk);
+	struct ip_sf_socklist *psl;
+	int i, j, rv;
+
+	if (!MULTICAST(addr))
+		return -EINVAL;
+
+	rtnl_shlock();
+
+	imr.imr_multiaddr.s_addr = mreqs->imr_multiaddr;
+	imr.imr_address.s_addr = mreqs->imr_interface;
+	imr.imr_ifindex = ifindex;
+	in_dev = ip_mc_find_dev(&imr);
+
+	if (!in_dev) {
+		err = -ENODEV;
+		goto done;
+	}
+	err = -EADDRNOTAVAIL;
+
+	for (pmc=inet->mc_list; pmc; pmc=pmc->next) {
+		if (memcmp(&pmc->multi, mreqs, 2*sizeof(__u32)) == 0)
+			break;
+	}
+	if (!pmc)		/* must have a prior join */
+		goto done;
+	/* if a source filter was set, must be the same mode as before */
+	if (pmc->sflist) {
+		if (pmc->sfmode != omode)
+			goto done;
+	} else if (pmc->sfmode != omode) {
+		/* allow mode switches for empty-set filters */
+		ip_mc_add_src(in_dev, &mreqs->imr_multiaddr, omode, 0, NULL, 0);
+		ip_mc_del_src(in_dev, &mreqs->imr_multiaddr, pmc->sfmode, 0, 
+			NULL, 0);
+		pmc->sfmode = omode;
+	}
+
+	psl = pmc->sflist;
+	if (!add) {
+		if (!psl)
+			goto done;
+		rv = !0;
+		for (i=0; i<psl->sl_count; i++) {
+			rv = memcmp(&psl->sl_addr[i], &mreqs->imr_sourceaddr,
+				sizeof(__u32));
+			if (rv == 0)
+				break;
+		}
+		if (rv)		/* source not found */
+			goto done;
+
+		/* update the interface filter */
+		ip_mc_del_src(in_dev, &mreqs->imr_multiaddr, omode, 1, 
+			&mreqs->imr_sourceaddr, 1);
+
+		for (j=i+1; j<psl->sl_count; j++)
+			psl->sl_addr[j-1] = psl->sl_addr[j];
+		psl->sl_count--;
+		err = 0;
+		goto done;
+	}
+	/* else, add a new source to the filter */
+
+	if (psl && psl->sl_count >= sysctl_igmp_max_msf) {
+		err = -ENOBUFS;
+		goto done;
+	}
+	if (!psl || psl->sl_count == psl->sl_max) {
+		struct ip_sf_socklist *newpsl;
+		int count = IP_SFBLOCK;
+
+		if (psl)
+			count += psl->sl_max;
+		newpsl = (struct ip_sf_socklist *)sock_kmalloc(sk,
+			IP_SFLSIZE(count), GFP_KERNEL);
+		if (!newpsl) {
+			err = -ENOBUFS;
+			goto done;
+		}
+		newpsl->sl_max = count;
+		newpsl->sl_count = count - IP_SFBLOCK;
+		if (psl) {
+			for (i=0; i<psl->sl_count; i++)
+				newpsl->sl_addr[i] = psl->sl_addr[i];
+			sock_kfree_s(sk, psl, IP_SFLSIZE(psl->sl_max));
+		}
+		pmc->sflist = psl = newpsl;
+	}
+	rv = 1;	/* > 0 for insert logic below if sl_count is 0 */
+	for (i=0; i<psl->sl_count; i++) {
+		rv = memcmp(&psl->sl_addr[i], &mreqs->imr_sourceaddr,
+			sizeof(__u32));
+		if (rv == 0)
+			break;
+	}
+	if (rv == 0)		/* address already there is an error */
+		goto done;
+	for (j=psl->sl_count-1; j>=i; j--)
+		psl->sl_addr[j+1] = psl->sl_addr[j];
+	psl->sl_addr[i] = mreqs->imr_sourceaddr;
+	psl->sl_count++;
+	err = 0;
+	/* update the interface list */
+	ip_mc_add_src(in_dev, &mreqs->imr_multiaddr, omode, 1, 
+		&mreqs->imr_sourceaddr, 1);
+done:
+	rtnl_shunlock();
+	return err;
+}
+
+int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
+{
+	int err;
+	struct ip_mreqn	imr;
+	u32 addr = msf->imsf_multiaddr;
+	struct ip_mc_socklist *pmc;
+	struct in_device *in_dev;
+	struct inet_sock *inet = inet_sk(sk);
+	struct ip_sf_socklist *newpsl, *psl;
+
+	if (!MULTICAST(addr))
+		return -EINVAL;
+	if (msf->imsf_fmode != MCAST_INCLUDE &&
+	    msf->imsf_fmode != MCAST_EXCLUDE)
+		return -EINVAL;
+
+	rtnl_shlock();
+
+	imr.imr_multiaddr.s_addr = msf->imsf_multiaddr;
+	imr.imr_address.s_addr = msf->imsf_interface;
+	imr.imr_ifindex = ifindex;
+	in_dev = ip_mc_find_dev(&imr);
+
+	if (!in_dev) {
+		err = -ENODEV;
+		goto done;
+	}
+	err = -EADDRNOTAVAIL;
+
+	for (pmc=inet->mc_list; pmc; pmc=pmc->next) {
+		if (pmc->multi.imr_multiaddr.s_addr == msf->imsf_multiaddr &&
+		    pmc->multi.imr_ifindex == imr.imr_ifindex)
+			break;
+	}
+	if (!pmc)		/* must have a prior join */
+		goto done;
+	if (msf->imsf_numsrc) {
+		newpsl = (struct ip_sf_socklist *)sock_kmalloc(sk,
+				IP_SFLSIZE(msf->imsf_numsrc), GFP_KERNEL);
+		if (!newpsl) {
+			err = -ENOBUFS;
+			goto done;
+		}
+		newpsl->sl_max = newpsl->sl_count = msf->imsf_numsrc;
+		memcpy(newpsl->sl_addr, msf->imsf_slist,
+			msf->imsf_numsrc * sizeof(msf->imsf_slist[0]));
+		err = ip_mc_add_src(in_dev, &msf->imsf_multiaddr,
+			msf->imsf_fmode, newpsl->sl_count, newpsl->sl_addr, 0);
+		if (err) {
+			sock_kfree_s(sk, newpsl, IP_SFLSIZE(newpsl->sl_max));
+			goto done;
+		}
+	} else
+		newpsl = NULL;
+	psl = pmc->sflist;
+	if (psl) {
+		(void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
+			psl->sl_count, psl->sl_addr, 0);
+		sock_kfree_s(sk, psl, IP_SFLSIZE(psl->sl_max));
+	} else
+		(void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
+			0, NULL, 0);
+	pmc->sflist = newpsl;
+	pmc->sfmode = msf->imsf_fmode;
+done:
+	rtnl_shunlock();
+	return err;
+}
+
+int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
+	struct ip_msfilter __user *optval, int __user *optlen)
+{
+	int err, len, count, copycount;
+	struct ip_mreqn	imr;
+	u32 addr = msf->imsf_multiaddr;
+	struct ip_mc_socklist *pmc;
+	struct in_device *in_dev;
+	struct inet_sock *inet = inet_sk(sk);
+	struct ip_sf_socklist *psl;
+
+	if (!MULTICAST(addr))
+		return -EINVAL;
+
+	rtnl_shlock();
+
+	imr.imr_multiaddr.s_addr = msf->imsf_multiaddr;
+	imr.imr_address.s_addr = msf->imsf_interface;
+	imr.imr_ifindex = 0;
+	in_dev = ip_mc_find_dev(&imr);
+
+	if (!in_dev) {
+		err = -ENODEV;
+		goto done;
+	}
+	err = -EADDRNOTAVAIL;
+
+	for (pmc=inet->mc_list; pmc; pmc=pmc->next) {
+		if (pmc->multi.imr_multiaddr.s_addr == msf->imsf_multiaddr &&
+		    pmc->multi.imr_ifindex == imr.imr_ifindex)
+			break;
+	}
+	if (!pmc)		/* must have a prior join */
+		goto done;
+	msf->imsf_fmode = pmc->sfmode;
+	psl = pmc->sflist;
+	rtnl_shunlock();
+	if (!psl) {
+		len = 0;
+		count = 0;
+	} else {
+		count = psl->sl_count;
+	}
+	copycount = count < msf->imsf_numsrc ? count : msf->imsf_numsrc;
+	len = copycount * sizeof(psl->sl_addr[0]);
+	msf->imsf_numsrc = count;
+	if (put_user(IP_MSFILTER_SIZE(copycount), optlen) ||
+	    copy_to_user(optval, msf, IP_MSFILTER_SIZE(0))) {
+		return -EFAULT;
+	}
+	if (len &&
+	    copy_to_user(&optval->imsf_slist[0], psl->sl_addr, len))
+		return -EFAULT;
+	return 0;
+done:
+	rtnl_shunlock();
+	return err;
+}
+
+int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
+	struct group_filter __user *optval, int __user *optlen)
+{
+	int err, i, count, copycount;
+	struct sockaddr_in *psin;
+	u32 addr;
+	struct ip_mc_socklist *pmc;
+	struct inet_sock *inet = inet_sk(sk);
+	struct ip_sf_socklist *psl;
+
+	psin = (struct sockaddr_in *)&gsf->gf_group;
+	if (psin->sin_family != AF_INET)
+		return -EINVAL;
+	addr = psin->sin_addr.s_addr;
+	if (!MULTICAST(addr))
+		return -EINVAL;
+
+	rtnl_shlock();
+
+	err = -EADDRNOTAVAIL;
+
+	for (pmc=inet->mc_list; pmc; pmc=pmc->next) {
+		if (pmc->multi.imr_multiaddr.s_addr == addr &&
+		    pmc->multi.imr_ifindex == gsf->gf_interface)
+			break;
+	}
+	if (!pmc)		/* must have a prior join */
+		goto done;
+	gsf->gf_fmode = pmc->sfmode;
+	psl = pmc->sflist;
+	rtnl_shunlock();
+	count = psl ? psl->sl_count : 0;
+	copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
+	gsf->gf_numsrc = count;
+	if (put_user(GROUP_FILTER_SIZE(copycount), optlen) ||
+	    copy_to_user(optval, gsf, GROUP_FILTER_SIZE(0))) {
+		return -EFAULT;
+	}
+	for (i=0; i<copycount; i++) {
+		struct sockaddr_in *psin;
+		struct sockaddr_storage ss;
+
+		psin = (struct sockaddr_in *)&ss;
+		memset(&ss, 0, sizeof(ss));
+		psin->sin_family = AF_INET;
+		psin->sin_addr.s_addr = psl->sl_addr[i];
+		if (copy_to_user(&optval->gf_slist[i], &ss, sizeof(ss)))
+			return -EFAULT;
+	}
+	return 0;
+done:
+	rtnl_shunlock();
+	return err;
+}
+
+/*
+ * check if a multicast source filter allows delivery for a given <src,dst,intf>
+ */
+int ip_mc_sf_allow(struct sock *sk, u32 loc_addr, u32 rmt_addr, int dif)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct ip_mc_socklist *pmc;
+	struct ip_sf_socklist *psl;
+	int i;
+
+	if (!MULTICAST(loc_addr))
+		return 1;
+
+	for (pmc=inet->mc_list; pmc; pmc=pmc->next) {
+		if (pmc->multi.imr_multiaddr.s_addr == loc_addr &&
+		    pmc->multi.imr_ifindex == dif)
+			break;
+	}
+	if (!pmc)
+		return 1;
+	psl = pmc->sflist;
+	if (!psl)
+		return pmc->sfmode == MCAST_EXCLUDE;
+
+	for (i=0; i<psl->sl_count; i++) {
+		if (psl->sl_addr[i] == rmt_addr)
+			break;
+	}
+	if (pmc->sfmode == MCAST_INCLUDE && i >= psl->sl_count)
+		return 0;
+	if (pmc->sfmode == MCAST_EXCLUDE && i < psl->sl_count)
+		return 0;
+	return 1;
+}
+
+/*
+ *	A socket is closing.
+ */
+
+void ip_mc_drop_socket(struct sock *sk)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct ip_mc_socklist *iml;
+
+	if (inet->mc_list == NULL)
+		return;
+
+	rtnl_lock();
+	while ((iml = inet->mc_list) != NULL) {
+		struct in_device *in_dev;
+		inet->mc_list = iml->next;
+
+		if ((in_dev = inetdev_by_index(iml->multi.imr_ifindex)) != NULL) {
+			(void) ip_mc_leave_src(sk, iml, in_dev);
+			ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr);
+			in_dev_put(in_dev);
+		}
+		sock_kfree_s(sk, iml, sizeof(*iml));
+
+	}
+	rtnl_unlock();
+}
+
+int ip_check_mc(struct in_device *in_dev, u32 mc_addr, u32 src_addr, u16 proto)
+{
+	struct ip_mc_list *im;
+	struct ip_sf_list *psf;
+	int rv = 0;
+
+	read_lock(&in_dev->mc_list_lock);
+	for (im=in_dev->mc_list; im; im=im->next) {
+		if (im->multiaddr == mc_addr)
+			break;
+	}
+	if (im && proto == IPPROTO_IGMP) {
+		rv = 1;
+	} else if (im) {
+		if (src_addr) {
+			for (psf=im->sources; psf; psf=psf->sf_next) {
+				if (psf->sf_inaddr == src_addr)
+					break;
+			}
+			if (psf)
+				rv = psf->sf_count[MCAST_INCLUDE] ||
+					psf->sf_count[MCAST_EXCLUDE] !=
+					im->sfcount[MCAST_EXCLUDE];
+			else
+				rv = im->sfcount[MCAST_EXCLUDE] != 0;
+		} else
+			rv = 1; /* unspecified source; tentatively allow */
+	}
+	read_unlock(&in_dev->mc_list_lock);
+	return rv;
+}
+
+#if defined(CONFIG_PROC_FS)
+struct igmp_mc_iter_state {
+	struct net_device *dev;
+	struct in_device *in_dev;
+};
+
+#define	igmp_mc_seq_private(seq)	((struct igmp_mc_iter_state *)(seq)->private)
+
+static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq)
+{
+	struct ip_mc_list *im = NULL;
+	struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
+
+	for (state->dev = dev_base, state->in_dev = NULL;
+	     state->dev; 
+	     state->dev = state->dev->next) {
+		struct in_device *in_dev;
+		in_dev = in_dev_get(state->dev);
+		if (!in_dev)
+			continue;
+		read_lock(&in_dev->mc_list_lock);
+		im = in_dev->mc_list;
+		if (im) {
+			state->in_dev = in_dev;
+			break;
+		}
+		read_unlock(&in_dev->mc_list_lock);
+		in_dev_put(in_dev);
+	}
+	return im;
+}
+
+static struct ip_mc_list *igmp_mc_get_next(struct seq_file *seq, struct ip_mc_list *im)
+{
+	struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
+	im = im->next;
+	while (!im) {
+		if (likely(state->in_dev != NULL)) {
+			read_unlock(&state->in_dev->mc_list_lock);
+			in_dev_put(state->in_dev);
+		}
+		state->dev = state->dev->next;
+		if (!state->dev) {
+			state->in_dev = NULL;
+			break;
+		}
+		state->in_dev = in_dev_get(state->dev);
+		if (!state->in_dev)
+			continue;
+		read_lock(&state->in_dev->mc_list_lock);
+		im = state->in_dev->mc_list;
+	}
+	return im;
+}
+
+static struct ip_mc_list *igmp_mc_get_idx(struct seq_file *seq, loff_t pos)
+{
+	struct ip_mc_list *im = igmp_mc_get_first(seq);
+	if (im)
+		while (pos && (im = igmp_mc_get_next(seq, im)) != NULL)
+			--pos;
+	return pos ? NULL : im;
+}
+
+static void *igmp_mc_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	read_lock(&dev_base_lock);
+	return *pos ? igmp_mc_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+}
+
+static void *igmp_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct ip_mc_list *im;
+	if (v == SEQ_START_TOKEN)
+		im = igmp_mc_get_first(seq);
+	else
+		im = igmp_mc_get_next(seq, v);
+	++*pos;
+	return im;
+}
+
+static void igmp_mc_seq_stop(struct seq_file *seq, void *v)
+{
+	struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
+	if (likely(state->in_dev != NULL)) {
+		read_unlock(&state->in_dev->mc_list_lock);
+		in_dev_put(state->in_dev);
+		state->in_dev = NULL;
+	}
+	state->dev = NULL;
+	read_unlock(&dev_base_lock);
+}
+
+static int igmp_mc_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq, 
+			 "Idx\tDevice    : Count Querier\tGroup    Users Timer\tReporter\n");
+	else {
+		struct ip_mc_list *im = (struct ip_mc_list *)v;
+		struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
+		char   *querier;
+#ifdef CONFIG_IP_MULTICAST
+		querier = IGMP_V1_SEEN(state->in_dev) ? "V1" :
+			  IGMP_V2_SEEN(state->in_dev) ? "V2" :
+			  "V3";
+#else
+		querier = "NONE";
+#endif
+
+		if (state->in_dev->mc_list == im) {
+			seq_printf(seq, "%d\t%-10s: %5d %7s\n",
+				   state->dev->ifindex, state->dev->name, state->dev->mc_count, querier);
+		}
+
+		seq_printf(seq,
+			   "\t\t\t\t%08lX %5d %d:%08lX\t\t%d\n",
+			   im->multiaddr, im->users,
+			   im->tm_running, im->tm_running ?
+			   jiffies_to_clock_t(im->timer.expires-jiffies) : 0,
+			   im->reporter);
+	}
+	return 0;
+}
+
+static struct seq_operations igmp_mc_seq_ops = {
+	.start	=	igmp_mc_seq_start,
+	.next	=	igmp_mc_seq_next,
+	.stop	=	igmp_mc_seq_stop,
+	.show	=	igmp_mc_seq_show,
+};
+
+static int igmp_mc_seq_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	int rc = -ENOMEM;
+	struct igmp_mc_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+
+	if (!s)
+		goto out;
+	rc = seq_open(file, &igmp_mc_seq_ops);
+	if (rc)
+		goto out_kfree;
+
+	seq = file->private_data;
+	seq->private = s;
+	memset(s, 0, sizeof(*s));
+out:
+	return rc;
+out_kfree:
+	kfree(s);
+	goto out;
+}
+
+static struct file_operations igmp_mc_seq_fops = {
+	.owner		=	THIS_MODULE,
+	.open		=	igmp_mc_seq_open,
+	.read		=	seq_read,
+	.llseek		=	seq_lseek,
+	.release	=	seq_release_private,
+};
+
+struct igmp_mcf_iter_state {
+	struct net_device *dev;
+	struct in_device *idev;
+	struct ip_mc_list *im;
+};
+
+#define igmp_mcf_seq_private(seq)	((struct igmp_mcf_iter_state *)(seq)->private)
+
+static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq)
+{
+	struct ip_sf_list *psf = NULL;
+	struct ip_mc_list *im = NULL;
+	struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq);
+
+	for (state->dev = dev_base, state->idev = NULL, state->im = NULL;
+	     state->dev; 
+	     state->dev = state->dev->next) {
+		struct in_device *idev;
+		idev = in_dev_get(state->dev);
+		if (unlikely(idev == NULL))
+			continue;
+		read_lock(&idev->mc_list_lock);
+		im = idev->mc_list;
+		if (likely(im != NULL)) {
+			spin_lock_bh(&im->lock);
+			psf = im->sources;
+			if (likely(psf != NULL)) {
+				state->im = im;
+				state->idev = idev;
+				break;
+			}
+			spin_unlock_bh(&im->lock);
+		}
+		read_unlock(&idev->mc_list_lock);
+		in_dev_put(idev);
+	}
+	return psf;
+}
+
+static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_list *psf)
+{
+	struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq);
+
+	psf = psf->sf_next;
+	while (!psf) {
+		spin_unlock_bh(&state->im->lock);
+		state->im = state->im->next;
+		while (!state->im) {
+			if (likely(state->idev != NULL)) {
+				read_unlock(&state->idev->mc_list_lock);
+				in_dev_put(state->idev);
+			}
+			state->dev = state->dev->next;
+			if (!state->dev) {
+				state->idev = NULL;
+				goto out;
+			}
+			state->idev = in_dev_get(state->dev);
+			if (!state->idev)
+				continue;
+			read_lock(&state->idev->mc_list_lock);
+			state->im = state->idev->mc_list;
+		}
+		if (!state->im)
+			break;
+		spin_lock_bh(&state->im->lock);
+		psf = state->im->sources;
+	}
+out:
+	return psf;
+}
+
+static struct ip_sf_list *igmp_mcf_get_idx(struct seq_file *seq, loff_t pos)
+{
+	struct ip_sf_list *psf = igmp_mcf_get_first(seq);
+	if (psf)
+		while (pos && (psf = igmp_mcf_get_next(seq, psf)) != NULL)
+			--pos;
+	return pos ? NULL : psf;
+}
+
+static void *igmp_mcf_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	read_lock(&dev_base_lock);
+	return *pos ? igmp_mcf_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+}
+
+static void *igmp_mcf_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct ip_sf_list *psf;
+	if (v == SEQ_START_TOKEN)
+		psf = igmp_mcf_get_first(seq);
+	else
+		psf = igmp_mcf_get_next(seq, v);
+	++*pos;
+	return psf;
+}
+
+static void igmp_mcf_seq_stop(struct seq_file *seq, void *v)
+{
+	struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq);
+	if (likely(state->im != NULL)) {
+		spin_unlock_bh(&state->im->lock);
+		state->im = NULL;
+	}
+	if (likely(state->idev != NULL)) {
+		read_unlock(&state->idev->mc_list_lock);
+		in_dev_put(state->idev);
+		state->idev = NULL;
+	}
+	state->dev = NULL;
+	read_unlock(&dev_base_lock);
+}
+
+static int igmp_mcf_seq_show(struct seq_file *seq, void *v)
+{
+	struct ip_sf_list *psf = (struct ip_sf_list *)v;
+	struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq);
+
+	if (v == SEQ_START_TOKEN) {
+		seq_printf(seq, 
+			   "%3s %6s "
+			   "%10s %10s %6s %6s\n", "Idx",
+			   "Device", "MCA",
+			   "SRC", "INC", "EXC");
+	} else {
+		seq_printf(seq,
+			   "%3d %6.6s 0x%08x "
+			   "0x%08x %6lu %6lu\n", 
+			   state->dev->ifindex, state->dev->name, 
+			   ntohl(state->im->multiaddr),
+			   ntohl(psf->sf_inaddr),
+			   psf->sf_count[MCAST_INCLUDE],
+			   psf->sf_count[MCAST_EXCLUDE]);
+	}
+	return 0;
+}
+
+static struct seq_operations igmp_mcf_seq_ops = {
+	.start	=	igmp_mcf_seq_start,
+	.next	=	igmp_mcf_seq_next,
+	.stop	=	igmp_mcf_seq_stop,
+	.show	=	igmp_mcf_seq_show,
+};
+
+static int igmp_mcf_seq_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	int rc = -ENOMEM;
+	struct igmp_mcf_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+
+	if (!s)
+		goto out;
+	rc = seq_open(file, &igmp_mcf_seq_ops);
+	if (rc)
+		goto out_kfree;
+
+	seq = file->private_data;
+	seq->private = s;
+	memset(s, 0, sizeof(*s));
+out:
+	return rc;
+out_kfree:
+	kfree(s);
+	goto out;
+}
+
+static struct file_operations igmp_mcf_seq_fops = {
+	.owner		=	THIS_MODULE,
+	.open		=	igmp_mcf_seq_open,
+	.read		=	seq_read,
+	.llseek		=	seq_lseek,
+	.release	=	seq_release_private,
+};
+
+int __init igmp_mc_proc_init(void)
+{
+	proc_net_fops_create("igmp", S_IRUGO, &igmp_mc_seq_fops);
+	proc_net_fops_create("mcfilter", S_IRUGO, &igmp_mcf_seq_fops);
+	return 0;
+}
+#endif
+
+EXPORT_SYMBOL(ip_mc_dec_group);
+EXPORT_SYMBOL(ip_mc_inc_group);
+EXPORT_SYMBOL(ip_mc_join_group);
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
new file mode 100644
index 00000000000..95473953c40
--- /dev/null
+++ b/net/ipv4/inetpeer.c
@@ -0,0 +1,460 @@
+/*
+ *		INETPEER - A storage for permanent information about peers
+ *
+ *  This source is covered by the GNU GPL, the same as all kernel sources.
+ *
+ *  Version:	$Id: inetpeer.c,v 1.7 2001/09/20 21:22:50 davem Exp $
+ *
+ *  Authors:	Andrey V. Savochkin <saw@msu.ru>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/random.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/time.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/net.h>
+#include <net/inetpeer.h>
+
+/*
+ *  Theory of operations.
+ *  We keep one entry for each peer IP address.  The nodes contains long-living
+ *  information about the peer which doesn't depend on routes.
+ *  At this moment this information consists only of ID field for the next
+ *  outgoing IP packet.  This field is incremented with each packet as encoded
+ *  in inet_getid() function (include/net/inetpeer.h).
+ *  At the moment of writing this notes identifier of IP packets is generated
+ *  to be unpredictable using this code only for packets subjected
+ *  (actually or potentially) to defragmentation.  I.e. DF packets less than
+ *  PMTU in size uses a constant ID and do not use this code (see
+ *  ip_select_ident() in include/net/ip.h).
+ *
+ *  Route cache entries hold references to our nodes.
+ *  New cache entries get references via lookup by destination IP address in
+ *  the avl tree.  The reference is grabbed only when it's needed i.e. only
+ *  when we try to output IP packet which needs an unpredictable ID (see
+ *  __ip_select_ident() in net/ipv4/route.c).
+ *  Nodes are removed only when reference counter goes to 0.
+ *  When it's happened the node may be removed when a sufficient amount of
+ *  time has been passed since its last use.  The less-recently-used entry can
+ *  also be removed if the pool is overloaded i.e. if the total amount of
+ *  entries is greater-or-equal than the threshold.
+ *
+ *  Node pool is organised as an AVL tree.
+ *  Such an implementation has been chosen not just for fun.  It's a way to
+ *  prevent easy and efficient DoS attacks by creating hash collisions.  A huge
+ *  amount of long living nodes in a single hash slot would significantly delay
+ *  lookups performed with disabled BHs.
+ *
+ *  Serialisation issues.
+ *  1.  Nodes may appear in the tree only with the pool write lock held.
+ *  2.  Nodes may disappear from the tree only with the pool write lock held
+ *      AND reference count being 0.
+ *  3.  Nodes appears and disappears from unused node list only under
+ *      "inet_peer_unused_lock".
+ *  4.  Global variable peer_total is modified under the pool lock.
+ *  5.  struct inet_peer fields modification:
+ *		avl_left, avl_right, avl_parent, avl_height: pool lock
+ *		unused_next, unused_prevp: unused node list lock
+ *		refcnt: atomically against modifications on other CPU;
+ *		   usually under some other lock to prevent node disappearing
+ *		dtime: unused node list lock
+ *		v4daddr: unchangeable
+ *		ip_id_count: idlock
+ */
+
+/* Exported for inet_getid inline function.  */
+DEFINE_SPINLOCK(inet_peer_idlock);
+
+static kmem_cache_t *peer_cachep;
+
+#define node_height(x) x->avl_height
+static struct inet_peer peer_fake_node = {
+	.avl_left	= &peer_fake_node,
+	.avl_right	= &peer_fake_node,
+	.avl_height	= 0
+};
+#define peer_avl_empty (&peer_fake_node)
+static struct inet_peer *peer_root = peer_avl_empty;
+static DEFINE_RWLOCK(peer_pool_lock);
+#define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
+
+static volatile int peer_total;
+/* Exported for sysctl_net_ipv4.  */
+int inet_peer_threshold = 65536 + 128;	/* start to throw entries more
+					 * aggressively at this stage */
+int inet_peer_minttl = 120 * HZ;	/* TTL under high load: 120 sec */
+int inet_peer_maxttl = 10 * 60 * HZ;	/* usual time to live: 10 min */
+
+static struct inet_peer *inet_peer_unused_head;
+/* Exported for inet_putpeer inline function.  */
+struct inet_peer **inet_peer_unused_tailp = &inet_peer_unused_head;
+DEFINE_SPINLOCK(inet_peer_unused_lock);
+#define PEER_MAX_CLEANUP_WORK 30
+
+static void peer_check_expire(unsigned long dummy);
+static struct timer_list peer_periodic_timer =
+	TIMER_INITIALIZER(peer_check_expire, 0, 0);
+
+/* Exported for sysctl_net_ipv4.  */
+int inet_peer_gc_mintime = 10 * HZ,
+    inet_peer_gc_maxtime = 120 * HZ;
+
+/* Called from ip_output.c:ip_init  */
+void __init inet_initpeers(void)
+{
+	struct sysinfo si;
+
+	/* Use the straight interface to information about memory. */
+	si_meminfo(&si);
+	/* The values below were suggested by Alexey Kuznetsov
+	 * <kuznet@ms2.inr.ac.ru>.  I don't have any opinion about the values
+	 * myself.  --SAW
+	 */
+	if (si.totalram <= (32768*1024)/PAGE_SIZE)
+		inet_peer_threshold >>= 1; /* max pool size about 1MB on IA32 */
+	if (si.totalram <= (16384*1024)/PAGE_SIZE)
+		inet_peer_threshold >>= 1; /* about 512KB */
+	if (si.totalram <= (8192*1024)/PAGE_SIZE)
+		inet_peer_threshold >>= 2; /* about 128KB */
+
+	peer_cachep = kmem_cache_create("inet_peer_cache",
+			sizeof(struct inet_peer),
+			0, SLAB_HWCACHE_ALIGN,
+			NULL, NULL);
+
+	if (!peer_cachep)
+		panic("cannot create inet_peer_cache");
+
+	/* All the timers, started at system startup tend
+	   to synchronize. Perturb it a bit.
+	 */
+	peer_periodic_timer.expires = jiffies
+		+ net_random() % inet_peer_gc_maxtime
+		+ inet_peer_gc_maxtime;
+	add_timer(&peer_periodic_timer);
+}
+
+/* Called with or without local BH being disabled. */
+static void unlink_from_unused(struct inet_peer *p)
+{
+	spin_lock_bh(&inet_peer_unused_lock);
+	if (p->unused_prevp != NULL) {
+		/* On unused list. */
+		*p->unused_prevp = p->unused_next;
+		if (p->unused_next != NULL)
+			p->unused_next->unused_prevp = p->unused_prevp;
+		else
+			inet_peer_unused_tailp = p->unused_prevp;
+		p->unused_prevp = NULL; /* mark it as removed */
+	}
+	spin_unlock_bh(&inet_peer_unused_lock);
+}
+
+/* Called with local BH disabled and the pool lock held. */
+#define lookup(daddr) 						\
+({								\
+	struct inet_peer *u, **v;				\
+	stackptr = stack;					\
+	*stackptr++ = &peer_root;				\
+	for (u = peer_root; u != peer_avl_empty; ) {		\
+		if (daddr == u->v4daddr)			\
+			break;					\
+		if (daddr < u->v4daddr)				\
+			v = &u->avl_left;			\
+		else						\
+			v = &u->avl_right;			\
+		*stackptr++ = v;				\
+		u = *v;						\
+	}							\
+	u;							\
+})
+
+/* Called with local BH disabled and the pool write lock held. */
+#define lookup_rightempty(start)				\
+({								\
+	struct inet_peer *u, **v;				\
+	*stackptr++ = &start->avl_left;				\
+	v = &start->avl_left;					\
+	for (u = *v; u->avl_right != peer_avl_empty; ) {	\
+		v = &u->avl_right;				\
+		*stackptr++ = v;				\
+		u = *v;						\
+	}							\
+	u;							\
+})
+
+/* Called with local BH disabled and the pool write lock held.
+ * Variable names are the proof of operation correctness.
+ * Look into mm/map_avl.c for more detail description of the ideas.  */
+static void peer_avl_rebalance(struct inet_peer **stack[],
+		struct inet_peer ***stackend)
+{
+	struct inet_peer **nodep, *node, *l, *r;
+	int lh, rh;
+
+	while (stackend > stack) {
+		nodep = *--stackend;
+		node = *nodep;
+		l = node->avl_left;
+		r = node->avl_right;
+		lh = node_height(l);
+		rh = node_height(r);
+		if (lh > rh + 1) { /* l: RH+2 */
+			struct inet_peer *ll, *lr, *lrl, *lrr;
+			int lrh;
+			ll = l->avl_left;
+			lr = l->avl_right;
+			lrh = node_height(lr);
+			if (lrh <= node_height(ll)) {	/* ll: RH+1 */
+				node->avl_left = lr;	/* lr: RH or RH+1 */
+				node->avl_right = r;	/* r: RH */
+				node->avl_height = lrh + 1; /* RH+1 or RH+2 */
+				l->avl_left = ll;	/* ll: RH+1 */
+				l->avl_right = node;	/* node: RH+1 or RH+2 */
+				l->avl_height = node->avl_height + 1;
+				*nodep = l;
+			} else { /* ll: RH, lr: RH+1 */
+				lrl = lr->avl_left;	/* lrl: RH or RH-1 */
+				lrr = lr->avl_right;	/* lrr: RH or RH-1 */
+				node->avl_left = lrr;	/* lrr: RH or RH-1 */
+				node->avl_right = r;	/* r: RH */
+				node->avl_height = rh + 1; /* node: RH+1 */
+				l->avl_left = ll;	/* ll: RH */
+				l->avl_right = lrl;	/* lrl: RH or RH-1 */
+				l->avl_height = rh + 1;	/* l: RH+1 */
+				lr->avl_left = l;	/* l: RH+1 */
+				lr->avl_right = node;	/* node: RH+1 */
+				lr->avl_height = rh + 2;
+				*nodep = lr;
+			}
+		} else if (rh > lh + 1) { /* r: LH+2 */
+			struct inet_peer *rr, *rl, *rlr, *rll;
+			int rlh;
+			rr = r->avl_right;
+			rl = r->avl_left;
+			rlh = node_height(rl);
+			if (rlh <= node_height(rr)) {	/* rr: LH+1 */
+				node->avl_right = rl;	/* rl: LH or LH+1 */
+				node->avl_left = l;	/* l: LH */
+				node->avl_height = rlh + 1; /* LH+1 or LH+2 */
+				r->avl_right = rr;	/* rr: LH+1 */
+				r->avl_left = node;	/* node: LH+1 or LH+2 */
+				r->avl_height = node->avl_height + 1;
+				*nodep = r;
+			} else { /* rr: RH, rl: RH+1 */
+				rlr = rl->avl_right;	/* rlr: LH or LH-1 */
+				rll = rl->avl_left;	/* rll: LH or LH-1 */
+				node->avl_right = rll;	/* rll: LH or LH-1 */
+				node->avl_left = l;	/* l: LH */
+				node->avl_height = lh + 1; /* node: LH+1 */
+				r->avl_right = rr;	/* rr: LH */
+				r->avl_left = rlr;	/* rlr: LH or LH-1 */
+				r->avl_height = lh + 1;	/* r: LH+1 */
+				rl->avl_right = r;	/* r: LH+1 */
+				rl->avl_left = node;	/* node: LH+1 */
+				rl->avl_height = lh + 2;
+				*nodep = rl;
+			}
+		} else {
+			node->avl_height = (lh > rh ? lh : rh) + 1;
+		}
+	}
+}
+
+/* Called with local BH disabled and the pool write lock held. */
+#define link_to_pool(n)						\
+do {								\
+	n->avl_height = 1;					\
+	n->avl_left = peer_avl_empty;				\
+	n->avl_right = peer_avl_empty;				\
+	**--stackptr = n;					\
+	peer_avl_rebalance(stack, stackptr);			\
+} while(0)
+
+/* May be called with local BH enabled. */
+static void unlink_from_pool(struct inet_peer *p)
+{
+	int do_free;
+
+	do_free = 0;
+
+	write_lock_bh(&peer_pool_lock);
+	/* Check the reference counter.  It was artificially incremented by 1
+	 * in cleanup() function to prevent sudden disappearing.  If the
+	 * reference count is still 1 then the node is referenced only as `p'
+	 * here and from the pool.  So under the exclusive pool lock it's safe
+	 * to remove the node and free it later. */
+	if (atomic_read(&p->refcnt) == 1) {
+		struct inet_peer **stack[PEER_MAXDEPTH];
+		struct inet_peer ***stackptr, ***delp;
+		if (lookup(p->v4daddr) != p)
+			BUG();
+		delp = stackptr - 1; /* *delp[0] == p */
+		if (p->avl_left == peer_avl_empty) {
+			*delp[0] = p->avl_right;
+			--stackptr;
+		} else {
+			/* look for a node to insert instead of p */
+			struct inet_peer *t;
+			t = lookup_rightempty(p);
+			if (*stackptr[-1] != t)
+				BUG();
+			**--stackptr = t->avl_left;
+			/* t is removed, t->v4daddr > x->v4daddr for any
+			 * x in p->avl_left subtree.
+			 * Put t in the old place of p. */
+			*delp[0] = t;
+			t->avl_left = p->avl_left;
+			t->avl_right = p->avl_right;
+			t->avl_height = p->avl_height;
+			if (delp[1] != &p->avl_left)
+				BUG();
+			delp[1] = &t->avl_left; /* was &p->avl_left */
+		}
+		peer_avl_rebalance(stack, stackptr);
+		peer_total--;
+		do_free = 1;
+	}
+	write_unlock_bh(&peer_pool_lock);
+
+	if (do_free)
+		kmem_cache_free(peer_cachep, p);
+	else
+		/* The node is used again.  Decrease the reference counter
+		 * back.  The loop "cleanup -> unlink_from_unused
+		 *   -> unlink_from_pool -> putpeer -> link_to_unused
+		 *   -> cleanup (for the same node)"
+		 * doesn't really exist because the entry will have a
+		 * recent deletion time and will not be cleaned again soon. */
+		inet_putpeer(p);
+}
+
+/* May be called with local BH enabled. */
+static int cleanup_once(unsigned long ttl)
+{
+	struct inet_peer *p;
+
+	/* Remove the first entry from the list of unused nodes. */
+	spin_lock_bh(&inet_peer_unused_lock);
+	p = inet_peer_unused_head;
+	if (p != NULL) {
+		if (time_after(p->dtime + ttl, jiffies)) {
+			/* Do not prune fresh entries. */
+			spin_unlock_bh(&inet_peer_unused_lock);
+			return -1;
+		}
+		inet_peer_unused_head = p->unused_next;
+		if (p->unused_next != NULL)
+			p->unused_next->unused_prevp = p->unused_prevp;
+		else
+			inet_peer_unused_tailp = p->unused_prevp;
+		p->unused_prevp = NULL; /* mark as not on the list */
+		/* Grab an extra reference to prevent node disappearing
+		 * before unlink_from_pool() call. */
+		atomic_inc(&p->refcnt);
+	}
+	spin_unlock_bh(&inet_peer_unused_lock);
+
+	if (p == NULL)
+		/* It means that the total number of USED entries has
+		 * grown over inet_peer_threshold.  It shouldn't really
+		 * happen because of entry limits in route cache. */
+		return -1;
+
+	unlink_from_pool(p);
+	return 0;
+}
+
+/* Called with or without local BH being disabled. */
+struct inet_peer *inet_getpeer(__u32 daddr, int create)
+{
+	struct inet_peer *p, *n;
+	struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr;
+
+	/* Look up for the address quickly. */
+	read_lock_bh(&peer_pool_lock);
+	p = lookup(daddr);
+	if (p != peer_avl_empty)
+		atomic_inc(&p->refcnt);
+	read_unlock_bh(&peer_pool_lock);
+
+	if (p != peer_avl_empty) {
+		/* The existing node has been found. */
+		/* Remove the entry from unused list if it was there. */
+		unlink_from_unused(p);
+		return p;
+	}
+
+	if (!create)
+		return NULL;
+
+	/* Allocate the space outside the locked region. */
+	n = kmem_cache_alloc(peer_cachep, GFP_ATOMIC);
+	if (n == NULL)
+		return NULL;
+	n->v4daddr = daddr;
+	atomic_set(&n->refcnt, 1);
+	n->ip_id_count = secure_ip_id(daddr);
+	n->tcp_ts_stamp = 0;
+
+	write_lock_bh(&peer_pool_lock);
+	/* Check if an entry has suddenly appeared. */
+	p = lookup(daddr);
+	if (p != peer_avl_empty)
+		goto out_free;
+
+	/* Link the node. */
+	link_to_pool(n);
+	n->unused_prevp = NULL; /* not on the list */
+	peer_total++;
+	write_unlock_bh(&peer_pool_lock);
+
+	if (peer_total >= inet_peer_threshold)
+		/* Remove one less-recently-used entry. */
+		cleanup_once(0);
+
+	return n;
+
+out_free:
+	/* The appropriate node is already in the pool. */
+	atomic_inc(&p->refcnt);
+	write_unlock_bh(&peer_pool_lock);
+	/* Remove the entry from unused list if it was there. */
+	unlink_from_unused(p);
+	/* Free preallocated the preallocated node. */
+	kmem_cache_free(peer_cachep, n);
+	return p;
+}
+
+/* Called with local BH disabled. */
+static void peer_check_expire(unsigned long dummy)
+{
+	int i;
+	int ttl;
+
+	if (peer_total >= inet_peer_threshold)
+		ttl = inet_peer_minttl;
+	else
+		ttl = inet_peer_maxttl
+				- (inet_peer_maxttl - inet_peer_minttl) / HZ *
+					peer_total / inet_peer_threshold * HZ;
+	for (i = 0; i < PEER_MAX_CLEANUP_WORK && !cleanup_once(ttl); i++);
+
+	/* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime
+	 * interval depending on the total number of entries (more entries,
+	 * less interval). */
+	peer_periodic_timer.expires = jiffies
+		+ inet_peer_gc_maxtime
+		- (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ *
+			peer_total / inet_peer_threshold * HZ;
+	add_timer(&peer_periodic_timer);
+}
+
+EXPORT_SYMBOL(inet_peer_idlock);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
new file mode 100644
index 00000000000..77094aac6c2
--- /dev/null
+++ b/net/ipv4/ip_forward.c
@@ -0,0 +1,127 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		The IP forwarding functionality.
+ *		
+ * Version:	$Id: ip_forward.c,v 1.48 2000/12/13 18:31:48 davem Exp $
+ *
+ * Authors:	see ip.c
+ *
+ * Fixes:
+ *		Many		:	Split from ip.c , see ip_input.c for 
+ *					history.
+ *		Dave Gregorich	:	NULL ip_rt_put fix for multicast 
+ *					routing.
+ *		Jos Vos		:	Add call_out_firewall before sending,
+ *					use output device for accounting.
+ *		Jos Vos		:	Call forward firewall after routing
+ *					(always use output device).
+ *		Mike McLagan	:	Routing by source
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/icmp.h>
+#include <linux/netdevice.h>
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/icmp.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/checksum.h>
+#include <linux/route.h>
+#include <net/route.h>
+#include <net/xfrm.h>
+
+static inline int ip_forward_finish(struct sk_buff *skb)
+{
+	struct ip_options * opt	= &(IPCB(skb)->opt);
+
+	IP_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
+
+	if (unlikely(opt->optlen))
+		ip_forward_options(skb);
+
+	return dst_output(skb);
+}
+
+int ip_forward(struct sk_buff *skb)
+{
+	struct iphdr *iph;	/* Our header */
+	struct rtable *rt;	/* Route we use */
+	struct ip_options * opt	= &(IPCB(skb)->opt);
+
+	if (!xfrm4_policy_check(NULL, XFRM_POLICY_FWD, skb))
+		goto drop;
+
+	if (IPCB(skb)->opt.router_alert && ip_call_ra_chain(skb))
+		return NET_RX_SUCCESS;
+
+	if (skb->pkt_type != PACKET_HOST)
+		goto drop;
+
+	skb->ip_summed = CHECKSUM_NONE;
+	
+	/*
+	 *	According to the RFC, we must first decrease the TTL field. If
+	 *	that reaches zero, we must reply an ICMP control message telling
+	 *	that the packet's lifetime expired.
+	 */
+
+	iph = skb->nh.iph;
+
+	if (iph->ttl <= 1)
+                goto too_many_hops;
+
+	if (!xfrm4_route_forward(skb))
+		goto drop;
+
+	iph = skb->nh.iph;
+	rt = (struct rtable*)skb->dst;
+
+	if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
+		goto sr_failed;
+
+	/* We are about to mangle packet. Copy it! */
+	if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len))
+		goto drop;
+	iph = skb->nh.iph;
+
+	/* Decrease ttl after skb cow done */
+	ip_decrease_ttl(iph);
+
+	/*
+	 *	We now generate an ICMP HOST REDIRECT giving the route
+	 *	we calculated.
+	 */
+	if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr)
+		ip_rt_send_redirect(skb);
+
+	skb->priority = rt_tos2priority(iph->tos);
+
+	return NF_HOOK(PF_INET, NF_IP_FORWARD, skb, skb->dev, rt->u.dst.dev,
+		       ip_forward_finish);
+
+sr_failed:
+        /*
+	 *	Strict routing permits no gatewaying
+	 */
+         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_SR_FAILED, 0);
+         goto drop;
+
+too_many_hops:
+        /* Tell the sender its packet died... */
+        icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);
+drop:
+	kfree_skb(skb);
+	return NET_RX_DROP;
+}
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
new file mode 100644
index 00000000000..7f68e27eb4e
--- /dev/null
+++ b/net/ipv4/ip_fragment.c
@@ -0,0 +1,691 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		The IP fragmentation functionality.
+ *		
+ * Version:	$Id: ip_fragment.c,v 1.59 2002/01/12 07:54:56 davem Exp $
+ *
+ * Authors:	Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG>
+ *		Alan Cox <Alan.Cox@linux.org>
+ *
+ * Fixes:
+ *		Alan Cox	:	Split from ip.c , see ip_input.c for history.
+ *		David S. Miller :	Begin massive cleanup...
+ *		Andi Kleen	:	Add sysctls.
+ *		xxxx		:	Overlapfrag bug.
+ *		Ultima          :       ip_expire() kernel panic.
+ *		Bill Hawes	:	Frag accounting and evictor fixes.
+ *		John McDonald	:	0 length frag bug.
+ *		Alexey Kuznetsov:	SMP races, threading, cleanup.
+ *		Patrick McHardy :	LRU queue of frag heads for evictor.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/jiffies.h>
+#include <linux/skbuff.h>
+#include <linux/list.h>
+#include <linux/ip.h>
+#include <linux/icmp.h>
+#include <linux/netdevice.h>
+#include <linux/jhash.h>
+#include <linux/random.h>
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/checksum.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/inet.h>
+#include <linux/netfilter_ipv4.h>
+
+/* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6
+ * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c
+ * as well. Or notify me, at least. --ANK
+ */
+
+/* Fragment cache limits. We will commit 256K at one time. Should we
+ * cross that limit we will prune down to 192K. This should cope with
+ * even the most extreme cases without allowing an attacker to measurably
+ * harm machine performance.
+ */
+int sysctl_ipfrag_high_thresh = 256*1024;
+int sysctl_ipfrag_low_thresh = 192*1024;
+
+/* Important NOTE! Fragment queue must be destroyed before MSL expires.
+ * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL.
+ */
+int sysctl_ipfrag_time = IP_FRAG_TIME;
+
+struct ipfrag_skb_cb
+{
+	struct inet_skb_parm	h;
+	int			offset;
+};
+
+#define FRAG_CB(skb)	((struct ipfrag_skb_cb*)((skb)->cb))
+
+/* Describe an entry in the "incomplete datagrams" queue. */
+struct ipq {
+	struct ipq	*next;		/* linked list pointers			*/
+	struct list_head lru_list;	/* lru list member 			*/
+	u32		user;
+	u32		saddr;
+	u32		daddr;
+	u16		id;
+	u8		protocol;
+	u8		last_in;
+#define COMPLETE		4
+#define FIRST_IN		2
+#define LAST_IN			1
+
+	struct sk_buff	*fragments;	/* linked list of received fragments	*/
+	int		len;		/* total length of original datagram	*/
+	int		meat;
+	spinlock_t	lock;
+	atomic_t	refcnt;
+	struct timer_list timer;	/* when will this queue expire?		*/
+	struct ipq	**pprev;
+	int		iif;
+	struct timeval	stamp;
+};
+
+/* Hash table. */
+
+#define IPQ_HASHSZ	64
+
+/* Per-bucket lock is easy to add now. */
+static struct ipq *ipq_hash[IPQ_HASHSZ];
+static DEFINE_RWLOCK(ipfrag_lock);
+static u32 ipfrag_hash_rnd;
+static LIST_HEAD(ipq_lru_list);
+int ip_frag_nqueues = 0;
+
+static __inline__ void __ipq_unlink(struct ipq *qp)
+{
+	if(qp->next)
+		qp->next->pprev = qp->pprev;
+	*qp->pprev = qp->next;
+	list_del(&qp->lru_list);
+	ip_frag_nqueues--;
+}
+
+static __inline__ void ipq_unlink(struct ipq *ipq)
+{
+	write_lock(&ipfrag_lock);
+	__ipq_unlink(ipq);
+	write_unlock(&ipfrag_lock);
+}
+
+static unsigned int ipqhashfn(u16 id, u32 saddr, u32 daddr, u8 prot)
+{
+	return jhash_3words((u32)id << 16 | prot, saddr, daddr,
+			    ipfrag_hash_rnd) & (IPQ_HASHSZ - 1);
+}
+
+static struct timer_list ipfrag_secret_timer;
+int sysctl_ipfrag_secret_interval = 10 * 60 * HZ;
+
+static void ipfrag_secret_rebuild(unsigned long dummy)
+{
+	unsigned long now = jiffies;
+	int i;
+
+	write_lock(&ipfrag_lock);
+	get_random_bytes(&ipfrag_hash_rnd, sizeof(u32));
+	for (i = 0; i < IPQ_HASHSZ; i++) {
+		struct ipq *q;
+
+		q = ipq_hash[i];
+		while (q) {
+			struct ipq *next = q->next;
+			unsigned int hval = ipqhashfn(q->id, q->saddr,
+						      q->daddr, q->protocol);
+
+			if (hval != i) {
+				/* Unlink. */
+				if (q->next)
+					q->next->pprev = q->pprev;
+				*q->pprev = q->next;
+
+				/* Relink to new hash chain. */
+				if ((q->next = ipq_hash[hval]) != NULL)
+					q->next->pprev = &q->next;
+				ipq_hash[hval] = q;
+				q->pprev = &ipq_hash[hval];
+			}
+
+			q = next;
+		}
+	}
+	write_unlock(&ipfrag_lock);
+
+	mod_timer(&ipfrag_secret_timer, now + sysctl_ipfrag_secret_interval);
+}
+
+atomic_t ip_frag_mem = ATOMIC_INIT(0);	/* Memory used for fragments */
+
+/* Memory Tracking Functions. */
+static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work)
+{
+	if (work)
+		*work -= skb->truesize;
+	atomic_sub(skb->truesize, &ip_frag_mem);
+	kfree_skb(skb);
+}
+
+static __inline__ void frag_free_queue(struct ipq *qp, int *work)
+{
+	if (work)
+		*work -= sizeof(struct ipq);
+	atomic_sub(sizeof(struct ipq), &ip_frag_mem);
+	kfree(qp);
+}
+
+static __inline__ struct ipq *frag_alloc_queue(void)
+{
+	struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC);
+
+	if(!qp)
+		return NULL;
+	atomic_add(sizeof(struct ipq), &ip_frag_mem);
+	return qp;
+}
+
+
+/* Destruction primitives. */
+
+/* Complete destruction of ipq. */
+static void ip_frag_destroy(struct ipq *qp, int *work)
+{
+	struct sk_buff *fp;
+
+	BUG_TRAP(qp->last_in&COMPLETE);
+	BUG_TRAP(del_timer(&qp->timer) == 0);
+
+	/* Release all fragment data. */
+	fp = qp->fragments;
+	while (fp) {
+		struct sk_buff *xp = fp->next;
+
+		frag_kfree_skb(fp, work);
+		fp = xp;
+	}
+
+	/* Finally, release the queue descriptor itself. */
+	frag_free_queue(qp, work);
+}
+
+static __inline__ void ipq_put(struct ipq *ipq, int *work)
+{
+	if (atomic_dec_and_test(&ipq->refcnt))
+		ip_frag_destroy(ipq, work);
+}
+
+/* Kill ipq entry. It is not destroyed immediately,
+ * because caller (and someone more) holds reference count.
+ */
+static void ipq_kill(struct ipq *ipq)
+{
+	if (del_timer(&ipq->timer))
+		atomic_dec(&ipq->refcnt);
+
+	if (!(ipq->last_in & COMPLETE)) {
+		ipq_unlink(ipq);
+		atomic_dec(&ipq->refcnt);
+		ipq->last_in |= COMPLETE;
+	}
+}
+
+/* Memory limiting on fragments.  Evictor trashes the oldest 
+ * fragment queue until we are back under the threshold.
+ */
+static void ip_evictor(void)
+{
+	struct ipq *qp;
+	struct list_head *tmp;
+	int work;
+
+	work = atomic_read(&ip_frag_mem) - sysctl_ipfrag_low_thresh;
+	if (work <= 0)
+		return;
+
+	while (work > 0) {
+		read_lock(&ipfrag_lock);
+		if (list_empty(&ipq_lru_list)) {
+			read_unlock(&ipfrag_lock);
+			return;
+		}
+		tmp = ipq_lru_list.next;
+		qp = list_entry(tmp, struct ipq, lru_list);
+		atomic_inc(&qp->refcnt);
+		read_unlock(&ipfrag_lock);
+
+		spin_lock(&qp->lock);
+		if (!(qp->last_in&COMPLETE))
+			ipq_kill(qp);
+		spin_unlock(&qp->lock);
+
+		ipq_put(qp, &work);
+		IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
+	}
+}
+
+/*
+ * Oops, a fragment queue timed out.  Kill it and send an ICMP reply.
+ */
+static void ip_expire(unsigned long arg)
+{
+	struct ipq *qp = (struct ipq *) arg;
+
+	spin_lock(&qp->lock);
+
+	if (qp->last_in & COMPLETE)
+		goto out;
+
+	ipq_kill(qp);
+
+	IP_INC_STATS_BH(IPSTATS_MIB_REASMTIMEOUT);
+	IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
+
+	if ((qp->last_in&FIRST_IN) && qp->fragments != NULL) {
+		struct sk_buff *head = qp->fragments;
+		/* Send an ICMP "Fragment Reassembly Timeout" message. */
+		if ((head->dev = dev_get_by_index(qp->iif)) != NULL) {
+			icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
+			dev_put(head->dev);
+		}
+	}
+out:
+	spin_unlock(&qp->lock);
+	ipq_put(qp, NULL);
+}
+
+/* Creation primitives. */
+
+static struct ipq *ip_frag_intern(unsigned int hash, struct ipq *qp_in)
+{
+	struct ipq *qp;
+
+	write_lock(&ipfrag_lock);
+#ifdef CONFIG_SMP
+	/* With SMP race we have to recheck hash table, because
+	 * such entry could be created on other cpu, while we
+	 * promoted read lock to write lock.
+	 */
+	for(qp = ipq_hash[hash]; qp; qp = qp->next) {
+		if(qp->id == qp_in->id		&&
+		   qp->saddr == qp_in->saddr	&&
+		   qp->daddr == qp_in->daddr	&&
+		   qp->protocol == qp_in->protocol &&
+		   qp->user == qp_in->user) {
+			atomic_inc(&qp->refcnt);
+			write_unlock(&ipfrag_lock);
+			qp_in->last_in |= COMPLETE;
+			ipq_put(qp_in, NULL);
+			return qp;
+		}
+	}
+#endif
+	qp = qp_in;
+
+	if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time))
+		atomic_inc(&qp->refcnt);
+
+	atomic_inc(&qp->refcnt);
+	if((qp->next = ipq_hash[hash]) != NULL)
+		qp->next->pprev = &qp->next;
+	ipq_hash[hash] = qp;
+	qp->pprev = &ipq_hash[hash];
+	INIT_LIST_HEAD(&qp->lru_list);
+	list_add_tail(&qp->lru_list, &ipq_lru_list);
+	ip_frag_nqueues++;
+	write_unlock(&ipfrag_lock);
+	return qp;
+}
+
+/* Add an entry to the 'ipq' queue for a newly received IP datagram. */
+static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph, u32 user)
+{
+	struct ipq *qp;
+
+	if ((qp = frag_alloc_queue()) == NULL)
+		goto out_nomem;
+
+	qp->protocol = iph->protocol;
+	qp->last_in = 0;
+	qp->id = iph->id;
+	qp->saddr = iph->saddr;
+	qp->daddr = iph->daddr;
+	qp->user = user;
+	qp->len = 0;
+	qp->meat = 0;
+	qp->fragments = NULL;
+	qp->iif = 0;
+
+	/* Initialize a timer for this entry. */
+	init_timer(&qp->timer);
+	qp->timer.data = (unsigned long) qp;	/* pointer to queue	*/
+	qp->timer.function = ip_expire;		/* expire function	*/
+	spin_lock_init(&qp->lock);
+	atomic_set(&qp->refcnt, 1);
+
+	return ip_frag_intern(hash, qp);
+
+out_nomem:
+	NETDEBUG(if (net_ratelimit()) printk(KERN_ERR "ip_frag_create: no memory left !\n"));
+	return NULL;
+}
+
+/* Find the correct entry in the "incomplete datagrams" queue for
+ * this IP datagram, and create new one, if nothing is found.
+ */
+static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
+{
+	__u16 id = iph->id;
+	__u32 saddr = iph->saddr;
+	__u32 daddr = iph->daddr;
+	__u8 protocol = iph->protocol;
+	unsigned int hash = ipqhashfn(id, saddr, daddr, protocol);
+	struct ipq *qp;
+
+	read_lock(&ipfrag_lock);
+	for(qp = ipq_hash[hash]; qp; qp = qp->next) {
+		if(qp->id == id		&&
+		   qp->saddr == saddr	&&
+		   qp->daddr == daddr	&&
+		   qp->protocol == protocol &&
+		   qp->user == user) {
+			atomic_inc(&qp->refcnt);
+			read_unlock(&ipfrag_lock);
+			return qp;
+		}
+	}
+	read_unlock(&ipfrag_lock);
+
+	return ip_frag_create(hash, iph, user);
+}
+
+/* Add new segment to existing queue. */
+static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+{
+	struct sk_buff *prev, *next;
+	int flags, offset;
+	int ihl, end;
+
+	if (qp->last_in & COMPLETE)
+		goto err;
+
+ 	offset = ntohs(skb->nh.iph->frag_off);
+	flags = offset & ~IP_OFFSET;
+	offset &= IP_OFFSET;
+	offset <<= 3;		/* offset is in 8-byte chunks */
+ 	ihl = skb->nh.iph->ihl * 4;
+
+	/* Determine the position of this fragment. */
+ 	end = offset + skb->len - ihl;
+
+	/* Is this the final fragment? */
+	if ((flags & IP_MF) == 0) {
+		/* If we already have some bits beyond end
+		 * or have different end, the segment is corrrupted.
+		 */
+		if (end < qp->len ||
+		    ((qp->last_in & LAST_IN) && end != qp->len))
+			goto err;
+		qp->last_in |= LAST_IN;
+		qp->len = end;
+	} else {
+		if (end&7) {
+			end &= ~7;
+			if (skb->ip_summed != CHECKSUM_UNNECESSARY)
+				skb->ip_summed = CHECKSUM_NONE;
+		}
+		if (end > qp->len) {
+			/* Some bits beyond end -> corruption. */
+			if (qp->last_in & LAST_IN)
+				goto err;
+			qp->len = end;
+		}
+	}
+	if (end == offset)
+		goto err;
+
+	if (pskb_pull(skb, ihl) == NULL)
+		goto err;
+	if (pskb_trim(skb, end-offset))
+		goto err;
+
+	/* Find out which fragments are in front and at the back of us
+	 * in the chain of fragments so far.  We must know where to put
+	 * this fragment, right?
+	 */
+	prev = NULL;
+	for(next = qp->fragments; next != NULL; next = next->next) {
+		if (FRAG_CB(next)->offset >= offset)
+			break;	/* bingo! */
+		prev = next;
+	}
+
+	/* We found where to put this one.  Check for overlap with
+	 * preceding fragment, and, if needed, align things so that
+	 * any overlaps are eliminated.
+	 */
+	if (prev) {
+		int i = (FRAG_CB(prev)->offset + prev->len) - offset;
+
+		if (i > 0) {
+			offset += i;
+			if (end <= offset)
+				goto err;
+			if (!pskb_pull(skb, i))
+				goto err;
+			if (skb->ip_summed != CHECKSUM_UNNECESSARY)
+				skb->ip_summed = CHECKSUM_NONE;
+		}
+	}
+
+	while (next && FRAG_CB(next)->offset < end) {
+		int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */
+
+		if (i < next->len) {
+			/* Eat head of the next overlapped fragment
+			 * and leave the loop. The next ones cannot overlap.
+			 */
+			if (!pskb_pull(next, i))
+				goto err;
+			FRAG_CB(next)->offset += i;
+			qp->meat -= i;
+			if (next->ip_summed != CHECKSUM_UNNECESSARY)
+				next->ip_summed = CHECKSUM_NONE;
+			break;
+		} else {
+			struct sk_buff *free_it = next;
+
+			/* Old fragmnet is completely overridden with
+			 * new one drop it.
+			 */
+			next = next->next;
+
+			if (prev)
+				prev->next = next;
+			else
+				qp->fragments = next;
+
+			qp->meat -= free_it->len;
+			frag_kfree_skb(free_it, NULL);
+		}
+	}
+
+	FRAG_CB(skb)->offset = offset;
+
+	/* Insert this fragment in the chain of fragments. */
+	skb->next = next;
+	if (prev)
+		prev->next = skb;
+	else
+		qp->fragments = skb;
+
+ 	if (skb->dev)
+ 		qp->iif = skb->dev->ifindex;
+	skb->dev = NULL;
+	qp->stamp = skb->stamp;
+	qp->meat += skb->len;
+	atomic_add(skb->truesize, &ip_frag_mem);
+	if (offset == 0)
+		qp->last_in |= FIRST_IN;
+
+	write_lock(&ipfrag_lock);
+	list_move_tail(&qp->lru_list, &ipq_lru_list);
+	write_unlock(&ipfrag_lock);
+
+	return;
+
+err:
+	kfree_skb(skb);
+}
+
+
+/* Build a new IP datagram from all its fragments. */
+
+static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
+{
+	struct iphdr *iph;
+	struct sk_buff *fp, *head = qp->fragments;
+	int len;
+	int ihlen;
+
+	ipq_kill(qp);
+
+	BUG_TRAP(head != NULL);
+	BUG_TRAP(FRAG_CB(head)->offset == 0);
+
+	/* Allocate a new buffer for the datagram. */
+	ihlen = head->nh.iph->ihl*4;
+	len = ihlen + qp->len;
+
+	if(len > 65535)
+		goto out_oversize;
+
+	/* Head of list must not be cloned. */
+	if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
+		goto out_nomem;
+
+	/* If the first fragment is fragmented itself, we split
+	 * it to two chunks: the first with data and paged part
+	 * and the second, holding only fragments. */
+	if (skb_shinfo(head)->frag_list) {
+		struct sk_buff *clone;
+		int i, plen = 0;
+
+		if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL)
+			goto out_nomem;
+		clone->next = head->next;
+		head->next = clone;
+		skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
+		skb_shinfo(head)->frag_list = NULL;
+		for (i=0; i<skb_shinfo(head)->nr_frags; i++)
+			plen += skb_shinfo(head)->frags[i].size;
+		clone->len = clone->data_len = head->data_len - plen;
+		head->data_len -= clone->len;
+		head->len -= clone->len;
+		clone->csum = 0;
+		clone->ip_summed = head->ip_summed;
+		atomic_add(clone->truesize, &ip_frag_mem);
+	}
+
+	skb_shinfo(head)->frag_list = head->next;
+	skb_push(head, head->data - head->nh.raw);
+	atomic_sub(head->truesize, &ip_frag_mem);
+
+	for (fp=head->next; fp; fp = fp->next) {
+		head->data_len += fp->len;
+		head->len += fp->len;
+		if (head->ip_summed != fp->ip_summed)
+			head->ip_summed = CHECKSUM_NONE;
+		else if (head->ip_summed == CHECKSUM_HW)
+			head->csum = csum_add(head->csum, fp->csum);
+		head->truesize += fp->truesize;
+		atomic_sub(fp->truesize, &ip_frag_mem);
+	}
+
+	head->next = NULL;
+	head->dev = dev;
+	head->stamp = qp->stamp;
+
+	iph = head->nh.iph;
+	iph->frag_off = 0;
+	iph->tot_len = htons(len);
+	IP_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
+	qp->fragments = NULL;
+	return head;
+
+out_nomem:
+ 	NETDEBUG(if (net_ratelimit())
+	         printk(KERN_ERR 
+			"IP: queue_glue: no memory for gluing queue %p\n",
+			qp));
+	goto out_fail;
+out_oversize:
+	if (net_ratelimit())
+		printk(KERN_INFO
+			"Oversized IP packet from %d.%d.%d.%d.\n",
+			NIPQUAD(qp->saddr));
+out_fail:
+	IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
+	return NULL;
+}
+
+/* Process an incoming IP datagram fragment. */
+struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user)
+{
+	struct iphdr *iph = skb->nh.iph;
+	struct ipq *qp;
+	struct net_device *dev;
+	
+	IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS);
+
+	/* Start by cleaning up the memory. */
+	if (atomic_read(&ip_frag_mem) > sysctl_ipfrag_high_thresh)
+		ip_evictor();
+
+	dev = skb->dev;
+
+	/* Lookup (or create) queue header */
+	if ((qp = ip_find(iph, user)) != NULL) {
+		struct sk_buff *ret = NULL;
+
+		spin_lock(&qp->lock);
+
+		ip_frag_queue(qp, skb);
+
+		if (qp->last_in == (FIRST_IN|LAST_IN) &&
+		    qp->meat == qp->len)
+			ret = ip_frag_reasm(qp, dev);
+
+		spin_unlock(&qp->lock);
+		ipq_put(qp, NULL);
+		return ret;
+	}
+
+	IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
+	kfree_skb(skb);
+	return NULL;
+}
+
+void ipfrag_init(void)
+{
+	ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
+				 (jiffies ^ (jiffies >> 6)));
+
+	init_timer(&ipfrag_secret_timer);
+	ipfrag_secret_timer.function = ipfrag_secret_rebuild;
+	ipfrag_secret_timer.expires = jiffies + sysctl_ipfrag_secret_interval;
+	add_timer(&ipfrag_secret_timer);
+}
+
+EXPORT_SYMBOL(ip_defrag);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
new file mode 100644
index 00000000000..88483552222
--- /dev/null
+++ b/net/ipv4/ip_gre.c
@@ -0,0 +1,1290 @@
+/*
+ *	Linux NET3:	GRE over IP protocol decoder. 
+ *
+ *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <asm/uaccess.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/in.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/if_arp.h>
+#include <linux/mroute.h>
+#include <linux/init.h>
+#include <linux/in6.h>
+#include <linux/inetdevice.h>
+#include <linux/igmp.h>
+#include <linux/netfilter_ipv4.h>
+
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/protocol.h>
+#include <net/ipip.h>
+#include <net/arp.h>
+#include <net/checksum.h>
+#include <net/dsfield.h>
+#include <net/inet_ecn.h>
+#include <net/xfrm.h>
+
+#ifdef CONFIG_IPV6
+#include <net/ipv6.h>
+#include <net/ip6_fib.h>
+#include <net/ip6_route.h>
+#endif
+
+/*
+   Problems & solutions
+   --------------------
+
+   1. The most important issue is detecting local dead loops.
+   They would cause complete host lockup in transmit, which
+   would be "resolved" by stack overflow or, if queueing is enabled,
+   with infinite looping in net_bh.
+
+   We cannot track such dead loops during route installation,
+   it is infeasible task. The most general solutions would be
+   to keep skb->encapsulation counter (sort of local ttl),
+   and silently drop packet when it expires. It is the best
+   solution, but it supposes maintaing new variable in ALL
+   skb, even if no tunneling is used.
+
+   Current solution: t->recursion lock breaks dead loops. It looks 
+   like dev->tbusy flag, but I preferred new variable, because
+   the semantics is different. One day, when hard_start_xmit
+   will be multithreaded we will have to use skb->encapsulation.
+
+
+
+   2. Networking dead loops would not kill routers, but would really
+   kill network. IP hop limit plays role of "t->recursion" in this case,
+   if we copy it from packet being encapsulated to upper header.
+   It is very good solution, but it introduces two problems:
+
+   - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
+     do not work over tunnels.
+   - traceroute does not work. I planned to relay ICMP from tunnel,
+     so that this problem would be solved and traceroute output
+     would even more informative. This idea appeared to be wrong:
+     only Linux complies to rfc1812 now (yes, guys, Linux is the only
+     true router now :-)), all routers (at least, in neighbourhood of mine)
+     return only 8 bytes of payload. It is the end.
+
+   Hence, if we want that OSPF worked or traceroute said something reasonable,
+   we should search for another solution.
+
+   One of them is to parse packet trying to detect inner encapsulation
+   made by our node. It is difficult or even impossible, especially,
+   taking into account fragmentation. TO be short, tt is not solution at all.
+
+   Current solution: The solution was UNEXPECTEDLY SIMPLE.
+   We force DF flag on tunnels with preconfigured hop limit,
+   that is ALL. :-) Well, it does not remove the problem completely,
+   but exponential growth of network traffic is changed to linear
+   (branches, that exceed pmtu are pruned) and tunnel mtu
+   fastly degrades to value <68, where looping stops.
+   Yes, it is not good if there exists a router in the loop,
+   which does not force DF, even when encapsulating packets have DF set.
+   But it is not our problem! Nobody could accuse us, we made
+   all that we could make. Even if it is your gated who injected
+   fatal route to network, even if it were you who configured
+   fatal static route: you are innocent. :-)
+
+
+
+   3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
+   practically identical code. It would be good to glue them
+   together, but it is not very evident, how to make them modular.
+   sit is integral part of IPv6, ipip and gre are naturally modular.
+   We could extract common parts (hash table, ioctl etc)
+   to a separate module (ip_tunnel.c).
+
+   Alexey Kuznetsov.
+ */
+
+static int ipgre_tunnel_init(struct net_device *dev);
+static void ipgre_tunnel_setup(struct net_device *dev);
+
+/* Fallback tunnel: no source, no destination, no key, no options */
+
+static int ipgre_fb_tunnel_init(struct net_device *dev);
+
+static struct net_device *ipgre_fb_tunnel_dev;
+
+/* Tunnel hash table */
+
+/*
+   4 hash tables:
+
+   3: (remote,local)
+   2: (remote,*)
+   1: (*,local)
+   0: (*,*)
+
+   We require exact key match i.e. if a key is present in packet
+   it will match only tunnel with the same key; if it is not present,
+   it will match only keyless tunnel.
+
+   All keysless packets, if not matched configured keyless tunnels
+   will match fallback tunnel.
+ */
+
+#define HASH_SIZE  16
+#define HASH(addr) ((addr^(addr>>4))&0xF)
+
+static struct ip_tunnel *tunnels[4][HASH_SIZE];
+
+#define tunnels_r_l	(tunnels[3])
+#define tunnels_r	(tunnels[2])
+#define tunnels_l	(tunnels[1])
+#define tunnels_wc	(tunnels[0])
+
+static DEFINE_RWLOCK(ipgre_lock);
+
+/* Given src, dst and key, find appropriate for input tunnel. */
+
+static struct ip_tunnel * ipgre_tunnel_lookup(u32 remote, u32 local, u32 key)
+{
+	unsigned h0 = HASH(remote);
+	unsigned h1 = HASH(key);
+	struct ip_tunnel *t;
+
+	for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
+		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
+			if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
+				return t;
+		}
+	}
+	for (t = tunnels_r[h0^h1]; t; t = t->next) {
+		if (remote == t->parms.iph.daddr) {
+			if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
+				return t;
+		}
+	}
+	for (t = tunnels_l[h1]; t; t = t->next) {
+		if (local == t->parms.iph.saddr ||
+		     (local == t->parms.iph.daddr && MULTICAST(local))) {
+			if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
+				return t;
+		}
+	}
+	for (t = tunnels_wc[h1]; t; t = t->next) {
+		if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
+			return t;
+	}
+
+	if (ipgre_fb_tunnel_dev->flags&IFF_UP)
+		return ipgre_fb_tunnel_dev->priv;
+	return NULL;
+}
+
+static struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t)
+{
+	u32 remote = t->parms.iph.daddr;
+	u32 local = t->parms.iph.saddr;
+	u32 key = t->parms.i_key;
+	unsigned h = HASH(key);
+	int prio = 0;
+
+	if (local)
+		prio |= 1;
+	if (remote && !MULTICAST(remote)) {
+		prio |= 2;
+		h ^= HASH(remote);
+	}
+
+	return &tunnels[prio][h];
+}
+
+static void ipgre_tunnel_link(struct ip_tunnel *t)
+{
+	struct ip_tunnel **tp = ipgre_bucket(t);
+
+	t->next = *tp;
+	write_lock_bh(&ipgre_lock);
+	*tp = t;
+	write_unlock_bh(&ipgre_lock);
+}
+
+static void ipgre_tunnel_unlink(struct ip_tunnel *t)
+{
+	struct ip_tunnel **tp;
+
+	for (tp = ipgre_bucket(t); *tp; tp = &(*tp)->next) {
+		if (t == *tp) {
+			write_lock_bh(&ipgre_lock);
+			*tp = t->next;
+			write_unlock_bh(&ipgre_lock);
+			break;
+		}
+	}
+}
+
+static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int create)
+{
+	u32 remote = parms->iph.daddr;
+	u32 local = parms->iph.saddr;
+	u32 key = parms->i_key;
+	struct ip_tunnel *t, **tp, *nt;
+	struct net_device *dev;
+	unsigned h = HASH(key);
+	int prio = 0;
+	char name[IFNAMSIZ];
+
+	if (local)
+		prio |= 1;
+	if (remote && !MULTICAST(remote)) {
+		prio |= 2;
+		h ^= HASH(remote);
+	}
+	for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
+		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
+			if (key == t->parms.i_key)
+				return t;
+		}
+	}
+	if (!create)
+		return NULL;
+
+	if (parms->name[0])
+		strlcpy(name, parms->name, IFNAMSIZ);
+	else {
+		int i;
+		for (i=1; i<100; i++) {
+			sprintf(name, "gre%d", i);
+			if (__dev_get_by_name(name) == NULL)
+				break;
+		}
+		if (i==100)
+			goto failed;
+	}
+
+	dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
+	if (!dev)
+	  return NULL;
+
+	dev->init = ipgre_tunnel_init;
+	nt = dev->priv;
+	nt->parms = *parms;
+
+	if (register_netdevice(dev) < 0) {
+		free_netdev(dev);
+		goto failed;
+	}
+
+	nt = dev->priv;
+	nt->parms = *parms;
+
+	dev_hold(dev);
+	ipgre_tunnel_link(nt);
+	/* Do not decrement MOD_USE_COUNT here. */
+	return nt;
+
+failed:
+	return NULL;
+}
+
+static void ipgre_tunnel_uninit(struct net_device *dev)
+{
+	ipgre_tunnel_unlink((struct ip_tunnel*)dev->priv);
+	dev_put(dev);
+}
+
+
+static void ipgre_err(struct sk_buff *skb, u32 info)
+{
+#ifndef I_WISH_WORLD_WERE_PERFECT
+
+/* It is not :-( All the routers (except for Linux) return only
+   8 bytes of packet payload. It means, that precise relaying of
+   ICMP in the real Internet is absolutely infeasible.
+
+   Moreover, Cisco "wise men" put GRE key to the third word
+   in GRE header. It makes impossible maintaining even soft state for keyed
+   GRE tunnels with enabled checksum. Tell them "thank you".
+
+   Well, I wonder, rfc1812 was written by Cisco employee,
+   what the hell these idiots break standrads established
+   by themself???
+ */
+
+	struct iphdr *iph = (struct iphdr*)skb->data;
+	u16	     *p = (u16*)(skb->data+(iph->ihl<<2));
+	int grehlen = (iph->ihl<<2) + 4;
+	int type = skb->h.icmph->type;
+	int code = skb->h.icmph->code;
+	struct ip_tunnel *t;
+	u16 flags;
+
+	flags = p[0];
+	if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
+		if (flags&(GRE_VERSION|GRE_ROUTING))
+			return;
+		if (flags&GRE_KEY) {
+			grehlen += 4;
+			if (flags&GRE_CSUM)
+				grehlen += 4;
+		}
+	}
+
+	/* If only 8 bytes returned, keyed message will be dropped here */
+	if (skb_headlen(skb) < grehlen)
+		return;
+
+	switch (type) {
+	default:
+	case ICMP_PARAMETERPROB:
+		return;
+
+	case ICMP_DEST_UNREACH:
+		switch (code) {
+		case ICMP_SR_FAILED:
+		case ICMP_PORT_UNREACH:
+			/* Impossible event. */
+			return;
+		case ICMP_FRAG_NEEDED:
+			/* Soft state for pmtu is maintained by IP core. */
+			return;
+		default:
+			/* All others are translated to HOST_UNREACH.
+			   rfc2003 contains "deep thoughts" about NET_UNREACH,
+			   I believe they are just ether pollution. --ANK
+			 */
+			break;
+		}
+		break;
+	case ICMP_TIME_EXCEEDED:
+		if (code != ICMP_EXC_TTL)
+			return;
+		break;
+	}
+
+	read_lock(&ipgre_lock);
+	t = ipgre_tunnel_lookup(iph->daddr, iph->saddr, (flags&GRE_KEY) ? *(((u32*)p) + (grehlen>>2) - 1) : 0);
+	if (t == NULL || t->parms.iph.daddr == 0 || MULTICAST(t->parms.iph.daddr))
+		goto out;
+
+	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
+		goto out;
+
+	if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
+		t->err_count++;
+	else
+		t->err_count = 1;
+	t->err_time = jiffies;
+out:
+	read_unlock(&ipgre_lock);
+	return;
+#else
+	struct iphdr *iph = (struct iphdr*)dp;
+	struct iphdr *eiph;
+	u16	     *p = (u16*)(dp+(iph->ihl<<2));
+	int type = skb->h.icmph->type;
+	int code = skb->h.icmph->code;
+	int rel_type = 0;
+	int rel_code = 0;
+	int rel_info = 0;
+	u16 flags;
+	int grehlen = (iph->ihl<<2) + 4;
+	struct sk_buff *skb2;
+	struct flowi fl;
+	struct rtable *rt;
+
+	if (p[1] != htons(ETH_P_IP))
+		return;
+
+	flags = p[0];
+	if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
+		if (flags&(GRE_VERSION|GRE_ROUTING))
+			return;
+		if (flags&GRE_CSUM)
+			grehlen += 4;
+		if (flags&GRE_KEY)
+			grehlen += 4;
+		if (flags&GRE_SEQ)
+			grehlen += 4;
+	}
+	if (len < grehlen + sizeof(struct iphdr))
+		return;
+	eiph = (struct iphdr*)(dp + grehlen);
+
+	switch (type) {
+	default:
+		return;
+	case ICMP_PARAMETERPROB:
+		if (skb->h.icmph->un.gateway < (iph->ihl<<2))
+			return;
+
+		/* So... This guy found something strange INSIDE encapsulated
+		   packet. Well, he is fool, but what can we do ?
+		 */
+		rel_type = ICMP_PARAMETERPROB;
+		rel_info = skb->h.icmph->un.gateway - grehlen;
+		break;
+
+	case ICMP_DEST_UNREACH:
+		switch (code) {
+		case ICMP_SR_FAILED:
+		case ICMP_PORT_UNREACH:
+			/* Impossible event. */
+			return;
+		case ICMP_FRAG_NEEDED:
+			/* And it is the only really necessary thing :-) */
+			rel_info = ntohs(skb->h.icmph->un.frag.mtu);
+			if (rel_info < grehlen+68)
+				return;
+			rel_info -= grehlen;
+			/* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
+			if (rel_info > ntohs(eiph->tot_len))
+				return;
+			break;
+		default:
+			/* All others are translated to HOST_UNREACH.
+			   rfc2003 contains "deep thoughts" about NET_UNREACH,
+			   I believe, it is just ether pollution. --ANK
+			 */
+			rel_type = ICMP_DEST_UNREACH;
+			rel_code = ICMP_HOST_UNREACH;
+			break;
+		}
+		break;
+	case ICMP_TIME_EXCEEDED:
+		if (code != ICMP_EXC_TTL)
+			return;
+		break;
+	}
+
+	/* Prepare fake skb to feed it to icmp_send */
+	skb2 = skb_clone(skb, GFP_ATOMIC);
+	if (skb2 == NULL)
+		return;
+	dst_release(skb2->dst);
+	skb2->dst = NULL;
+	skb_pull(skb2, skb->data - (u8*)eiph);
+	skb2->nh.raw = skb2->data;
+
+	/* Try to guess incoming interface */
+	memset(&fl, 0, sizeof(fl));
+	fl.fl4_dst = eiph->saddr;
+	fl.fl4_tos = RT_TOS(eiph->tos);
+	fl.proto = IPPROTO_GRE;
+	if (ip_route_output_key(&rt, &fl)) {
+		kfree_skb(skb2);
+		return;
+	}
+	skb2->dev = rt->u.dst.dev;
+
+	/* route "incoming" packet */
+	if (rt->rt_flags&RTCF_LOCAL) {
+		ip_rt_put(rt);
+		rt = NULL;
+		fl.fl4_dst = eiph->daddr;
+		fl.fl4_src = eiph->saddr;
+		fl.fl4_tos = eiph->tos;
+		if (ip_route_output_key(&rt, &fl) ||
+		    rt->u.dst.dev->type != ARPHRD_IPGRE) {
+			ip_rt_put(rt);
+			kfree_skb(skb2);
+			return;
+		}
+	} else {
+		ip_rt_put(rt);
+		if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
+		    skb2->dst->dev->type != ARPHRD_IPGRE) {
+			kfree_skb(skb2);
+			return;
+		}
+	}
+
+	/* change mtu on this route */
+	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
+		if (rel_info > dst_mtu(skb2->dst)) {
+			kfree_skb(skb2);
+			return;
+		}
+		skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
+		rel_info = htonl(rel_info);
+	} else if (type == ICMP_TIME_EXCEEDED) {
+		struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv;
+		if (t->parms.iph.ttl) {
+			rel_type = ICMP_DEST_UNREACH;
+			rel_code = ICMP_HOST_UNREACH;
+		}
+	}
+
+	icmp_send(skb2, rel_type, rel_code, rel_info);
+	kfree_skb(skb2);
+#endif
+}
+
+static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
+{
+	if (INET_ECN_is_ce(iph->tos)) {
+		if (skb->protocol == htons(ETH_P_IP)) {
+			IP_ECN_set_ce(skb->nh.iph);
+		} else if (skb->protocol == htons(ETH_P_IPV6)) {
+			IP6_ECN_set_ce(skb->nh.ipv6h);
+		}
+	}
+}
+
+static inline u8
+ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
+{
+	u8 inner = 0;
+	if (skb->protocol == htons(ETH_P_IP))
+		inner = old_iph->tos;
+	else if (skb->protocol == htons(ETH_P_IPV6))
+		inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
+	return INET_ECN_encapsulate(tos, inner);
+}
+
+static int ipgre_rcv(struct sk_buff *skb)
+{
+	struct iphdr *iph;
+	u8     *h;
+	u16    flags;
+	u16    csum = 0;
+	u32    key = 0;
+	u32    seqno = 0;
+	struct ip_tunnel *tunnel;
+	int    offset = 4;
+
+	if (!pskb_may_pull(skb, 16))
+		goto drop_nolock;
+
+	iph = skb->nh.iph;
+	h = skb->data;
+	flags = *(u16*)h;
+
+	if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
+		/* - Version must be 0.
+		   - We do not support routing headers.
+		 */
+		if (flags&(GRE_VERSION|GRE_ROUTING))
+			goto drop_nolock;
+
+		if (flags&GRE_CSUM) {
+			if (skb->ip_summed == CHECKSUM_HW) {
+				csum = (u16)csum_fold(skb->csum);
+				if (csum)
+					skb->ip_summed = CHECKSUM_NONE;
+			}
+			if (skb->ip_summed == CHECKSUM_NONE) {
+				skb->csum = skb_checksum(skb, 0, skb->len, 0);
+				skb->ip_summed = CHECKSUM_HW;
+				csum = (u16)csum_fold(skb->csum);
+			}
+			offset += 4;
+		}
+		if (flags&GRE_KEY) {
+			key = *(u32*)(h + offset);
+			offset += 4;
+		}
+		if (flags&GRE_SEQ) {
+			seqno = ntohl(*(u32*)(h + offset));
+			offset += 4;
+		}
+	}
+
+	read_lock(&ipgre_lock);
+	if ((tunnel = ipgre_tunnel_lookup(iph->saddr, iph->daddr, key)) != NULL) {
+		secpath_reset(skb);
+
+		skb->protocol = *(u16*)(h + 2);
+		/* WCCP version 1 and 2 protocol decoding.
+		 * - Change protocol to IP
+		 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
+		 */
+		if (flags == 0 &&
+		    skb->protocol == __constant_htons(ETH_P_WCCP)) {
+			skb->protocol = __constant_htons(ETH_P_IP);
+			if ((*(h + offset) & 0xF0) != 0x40) 
+				offset += 4;
+		}
+
+		skb->mac.raw = skb->nh.raw;
+		skb->nh.raw = __pskb_pull(skb, offset);
+		skb_postpull_rcsum(skb, skb->mac.raw, offset);
+		memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
+		skb->pkt_type = PACKET_HOST;
+#ifdef CONFIG_NET_IPGRE_BROADCAST
+		if (MULTICAST(iph->daddr)) {
+			/* Looped back packet, drop it! */
+			if (((struct rtable*)skb->dst)->fl.iif == 0)
+				goto drop;
+			tunnel->stat.multicast++;
+			skb->pkt_type = PACKET_BROADCAST;
+		}
+#endif
+
+		if (((flags&GRE_CSUM) && csum) ||
+		    (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
+			tunnel->stat.rx_crc_errors++;
+			tunnel->stat.rx_errors++;
+			goto drop;
+		}
+		if (tunnel->parms.i_flags&GRE_SEQ) {
+			if (!(flags&GRE_SEQ) ||
+			    (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
+				tunnel->stat.rx_fifo_errors++;
+				tunnel->stat.rx_errors++;
+				goto drop;
+			}
+			tunnel->i_seqno = seqno + 1;
+		}
+		tunnel->stat.rx_packets++;
+		tunnel->stat.rx_bytes += skb->len;
+		skb->dev = tunnel->dev;
+		dst_release(skb->dst);
+		skb->dst = NULL;
+		nf_reset(skb);
+		ipgre_ecn_decapsulate(iph, skb);
+		netif_rx(skb);
+		read_unlock(&ipgre_lock);
+		return(0);
+	}
+	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0);
+
+drop:
+	read_unlock(&ipgre_lock);
+drop_nolock:
+	kfree_skb(skb);
+	return(0);
+}
+
+static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
+	struct net_device_stats *stats = &tunnel->stat;
+	struct iphdr  *old_iph = skb->nh.iph;
+	struct iphdr  *tiph;
+	u8     tos;
+	u16    df;
+	struct rtable *rt;     			/* Route to the other host */
+	struct net_device *tdev;			/* Device to other host */
+	struct iphdr  *iph;			/* Our new IP header */
+	int    max_headroom;			/* The extra header space needed */
+	int    gre_hlen;
+	u32    dst;
+	int    mtu;
+
+	if (tunnel->recursion++) {
+		tunnel->stat.collisions++;
+		goto tx_error;
+	}
+
+	if (dev->hard_header) {
+		gre_hlen = 0;
+		tiph = (struct iphdr*)skb->data;
+	} else {
+		gre_hlen = tunnel->hlen;
+		tiph = &tunnel->parms.iph;
+	}
+
+	if ((dst = tiph->daddr) == 0) {
+		/* NBMA tunnel */
+
+		if (skb->dst == NULL) {
+			tunnel->stat.tx_fifo_errors++;
+			goto tx_error;
+		}
+
+		if (skb->protocol == htons(ETH_P_IP)) {
+			rt = (struct rtable*)skb->dst;
+			if ((dst = rt->rt_gateway) == 0)
+				goto tx_error_icmp;
+		}
+#ifdef CONFIG_IPV6
+		else if (skb->protocol == htons(ETH_P_IPV6)) {
+			struct in6_addr *addr6;
+			int addr_type;
+			struct neighbour *neigh = skb->dst->neighbour;
+
+			if (neigh == NULL)
+				goto tx_error;
+
+			addr6 = (struct in6_addr*)&neigh->primary_key;
+			addr_type = ipv6_addr_type(addr6);
+
+			if (addr_type == IPV6_ADDR_ANY) {
+				addr6 = &skb->nh.ipv6h->daddr;
+				addr_type = ipv6_addr_type(addr6);
+			}
+
+			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
+				goto tx_error_icmp;
+
+			dst = addr6->s6_addr32[3];
+		}
+#endif
+		else
+			goto tx_error;
+	}
+
+	tos = tiph->tos;
+	if (tos&1) {
+		if (skb->protocol == htons(ETH_P_IP))
+			tos = old_iph->tos;
+		tos &= ~1;
+	}
+
+	{
+		struct flowi fl = { .oif = tunnel->parms.link,
+				    .nl_u = { .ip4_u =
+					      { .daddr = dst,
+						.saddr = tiph->saddr,
+						.tos = RT_TOS(tos) } },
+				    .proto = IPPROTO_GRE };
+		if (ip_route_output_key(&rt, &fl)) {
+			tunnel->stat.tx_carrier_errors++;
+			goto tx_error;
+		}
+	}
+	tdev = rt->u.dst.dev;
+
+	if (tdev == dev) {
+		ip_rt_put(rt);
+		tunnel->stat.collisions++;
+		goto tx_error;
+	}
+
+	df = tiph->frag_off;
+	if (df)
+		mtu = dst_mtu(&rt->u.dst) - tunnel->hlen;
+	else
+		mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
+
+	if (skb->dst)
+		skb->dst->ops->update_pmtu(skb->dst, mtu);
+
+	if (skb->protocol == htons(ETH_P_IP)) {
+		df |= (old_iph->frag_off&htons(IP_DF));
+
+		if ((old_iph->frag_off&htons(IP_DF)) &&
+		    mtu < ntohs(old_iph->tot_len)) {
+			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
+			ip_rt_put(rt);
+			goto tx_error;
+		}
+	}
+#ifdef CONFIG_IPV6
+	else if (skb->protocol == htons(ETH_P_IPV6)) {
+		struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
+
+		if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
+			if ((tunnel->parms.iph.daddr && !MULTICAST(tunnel->parms.iph.daddr)) ||
+			    rt6->rt6i_dst.plen == 128) {
+				rt6->rt6i_flags |= RTF_MODIFIED;
+				skb->dst->metrics[RTAX_MTU-1] = mtu;
+			}
+		}
+
+		if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
+			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
+			ip_rt_put(rt);
+			goto tx_error;
+		}
+	}
+#endif
+
+	if (tunnel->err_count > 0) {
+		if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
+			tunnel->err_count--;
+
+			dst_link_failure(skb);
+		} else
+			tunnel->err_count = 0;
+	}
+
+	max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
+
+	if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
+		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
+		if (!new_skb) {
+			ip_rt_put(rt);
+  			stats->tx_dropped++;
+			dev_kfree_skb(skb);
+			tunnel->recursion--;
+			return 0;
+		}
+		if (skb->sk)
+			skb_set_owner_w(new_skb, skb->sk);
+		dev_kfree_skb(skb);
+		skb = new_skb;
+		old_iph = skb->nh.iph;
+	}
+
+	skb->h.raw = skb->nh.raw;
+	skb->nh.raw = skb_push(skb, gre_hlen);
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	/*
+	 *	Push down and install the IPIP header.
+	 */
+
+	iph 			=	skb->nh.iph;
+	iph->version		=	4;
+	iph->ihl		=	sizeof(struct iphdr) >> 2;
+	iph->frag_off		=	df;
+	iph->protocol		=	IPPROTO_GRE;
+	iph->tos		=	ipgre_ecn_encapsulate(tos, old_iph, skb);
+	iph->daddr		=	rt->rt_dst;
+	iph->saddr		=	rt->rt_src;
+
+	if ((iph->ttl = tiph->ttl) == 0) {
+		if (skb->protocol == htons(ETH_P_IP))
+			iph->ttl = old_iph->ttl;
+#ifdef CONFIG_IPV6
+		else if (skb->protocol == htons(ETH_P_IPV6))
+			iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
+#endif
+		else
+			iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
+	}
+
+	((u16*)(iph+1))[0] = tunnel->parms.o_flags;
+	((u16*)(iph+1))[1] = skb->protocol;
+
+	if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
+		u32 *ptr = (u32*)(((u8*)iph) + tunnel->hlen - 4);
+
+		if (tunnel->parms.o_flags&GRE_SEQ) {
+			++tunnel->o_seqno;
+			*ptr = htonl(tunnel->o_seqno);
+			ptr--;
+		}
+		if (tunnel->parms.o_flags&GRE_KEY) {
+			*ptr = tunnel->parms.o_key;
+			ptr--;
+		}
+		if (tunnel->parms.o_flags&GRE_CSUM) {
+			*ptr = 0;
+			*(__u16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
+		}
+	}
+
+	nf_reset(skb);
+
+	IPTUNNEL_XMIT();
+	tunnel->recursion--;
+	return 0;
+
+tx_error_icmp:
+	dst_link_failure(skb);
+
+tx_error:
+	stats->tx_errors++;
+	dev_kfree_skb(skb);
+	tunnel->recursion--;
+	return 0;
+}
+
+static int
+ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+	int err = 0;
+	struct ip_tunnel_parm p;
+	struct ip_tunnel *t;
+
+	switch (cmd) {
+	case SIOCGETTUNNEL:
+		t = NULL;
+		if (dev == ipgre_fb_tunnel_dev) {
+			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
+				err = -EFAULT;
+				break;
+			}
+			t = ipgre_tunnel_locate(&p, 0);
+		}
+		if (t == NULL)
+			t = (struct ip_tunnel*)dev->priv;
+		memcpy(&p, &t->parms, sizeof(p));
+		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+			err = -EFAULT;
+		break;
+
+	case SIOCADDTUNNEL:
+	case SIOCCHGTUNNEL:
+		err = -EPERM;
+		if (!capable(CAP_NET_ADMIN))
+			goto done;
+
+		err = -EFAULT;
+		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+			goto done;
+
+		err = -EINVAL;
+		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
+		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
+		    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
+			goto done;
+		if (p.iph.ttl)
+			p.iph.frag_off |= htons(IP_DF);
+
+		if (!(p.i_flags&GRE_KEY))
+			p.i_key = 0;
+		if (!(p.o_flags&GRE_KEY))
+			p.o_key = 0;
+
+		t = ipgre_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
+
+		if (dev != ipgre_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
+			if (t != NULL) {
+				if (t->dev != dev) {
+					err = -EEXIST;
+					break;
+				}
+			} else {
+				unsigned nflags=0;
+
+				t = (struct ip_tunnel*)dev->priv;
+
+				if (MULTICAST(p.iph.daddr))
+					nflags = IFF_BROADCAST;
+				else if (p.iph.daddr)
+					nflags = IFF_POINTOPOINT;
+
+				if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
+					err = -EINVAL;
+					break;
+				}
+				ipgre_tunnel_unlink(t);
+				t->parms.iph.saddr = p.iph.saddr;
+				t->parms.iph.daddr = p.iph.daddr;
+				t->parms.i_key = p.i_key;
+				t->parms.o_key = p.o_key;
+				memcpy(dev->dev_addr, &p.iph.saddr, 4);
+				memcpy(dev->broadcast, &p.iph.daddr, 4);
+				ipgre_tunnel_link(t);
+				netdev_state_change(dev);
+			}
+		}
+
+		if (t) {
+			err = 0;
+			if (cmd == SIOCCHGTUNNEL) {
+				t->parms.iph.ttl = p.iph.ttl;
+				t->parms.iph.tos = p.iph.tos;
+				t->parms.iph.frag_off = p.iph.frag_off;
+			}
+			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
+				err = -EFAULT;
+		} else
+			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
+		break;
+
+	case SIOCDELTUNNEL:
+		err = -EPERM;
+		if (!capable(CAP_NET_ADMIN))
+			goto done;
+
+		if (dev == ipgre_fb_tunnel_dev) {
+			err = -EFAULT;
+			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+				goto done;
+			err = -ENOENT;
+			if ((t = ipgre_tunnel_locate(&p, 0)) == NULL)
+				goto done;
+			err = -EPERM;
+			if (t == ipgre_fb_tunnel_dev->priv)
+				goto done;
+			dev = t->dev;
+		}
+		err = unregister_netdevice(dev);
+		break;
+
+	default:
+		err = -EINVAL;
+	}
+
+done:
+	return err;
+}
+
+static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev)
+{
+	return &(((struct ip_tunnel*)dev->priv)->stat);
+}
+
+static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
+{
+	struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
+	if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen)
+		return -EINVAL;
+	dev->mtu = new_mtu;
+	return 0;
+}
+
+#ifdef CONFIG_NET_IPGRE_BROADCAST
+/* Nice toy. Unfortunately, useless in real life :-)
+   It allows to construct virtual multiprotocol broadcast "LAN"
+   over the Internet, provided multicast routing is tuned.
+
+
+   I have no idea was this bicycle invented before me,
+   so that I had to set ARPHRD_IPGRE to a random value.
+   I have an impression, that Cisco could make something similar,
+   but this feature is apparently missing in IOS<=11.2(8).
+   
+   I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
+   with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
+
+   ping -t 255 224.66.66.66
+
+   If nobody answers, mbone does not work.
+
+   ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
+   ip addr add 10.66.66.<somewhat>/24 dev Universe
+   ifconfig Universe up
+   ifconfig Universe add fe80::<Your_real_addr>/10
+   ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
+   ftp 10.66.66.66
+   ...
+   ftp fec0:6666:6666::193.233.7.65
+   ...
+
+ */
+
+static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
+			void *daddr, void *saddr, unsigned len)
+{
+	struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
+	struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
+	u16 *p = (u16*)(iph+1);
+
+	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
+	p[0]		= t->parms.o_flags;
+	p[1]		= htons(type);
+
+	/*
+	 *	Set the source hardware address. 
+	 */
+	 
+	if (saddr)
+		memcpy(&iph->saddr, saddr, 4);
+
+	if (daddr) {
+		memcpy(&iph->daddr, daddr, 4);
+		return t->hlen;
+	}
+	if (iph->daddr && !MULTICAST(iph->daddr))
+		return t->hlen;
+	
+	return -t->hlen;
+}
+
+static int ipgre_open(struct net_device *dev)
+{
+	struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
+
+	if (MULTICAST(t->parms.iph.daddr)) {
+		struct flowi fl = { .oif = t->parms.link,
+				    .nl_u = { .ip4_u =
+					      { .daddr = t->parms.iph.daddr,
+						.saddr = t->parms.iph.saddr,
+						.tos = RT_TOS(t->parms.iph.tos) } },
+				    .proto = IPPROTO_GRE };
+		struct rtable *rt;
+		if (ip_route_output_key(&rt, &fl))
+			return -EADDRNOTAVAIL;
+		dev = rt->u.dst.dev;
+		ip_rt_put(rt);
+		if (__in_dev_get(dev) == NULL)
+			return -EADDRNOTAVAIL;
+		t->mlink = dev->ifindex;
+		ip_mc_inc_group(__in_dev_get(dev), t->parms.iph.daddr);
+	}
+	return 0;
+}
+
+static int ipgre_close(struct net_device *dev)
+{
+	struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
+	if (MULTICAST(t->parms.iph.daddr) && t->mlink) {
+		struct in_device *in_dev = inetdev_by_index(t->mlink);
+		if (in_dev) {
+			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
+			in_dev_put(in_dev);
+		}
+	}
+	return 0;
+}
+
+#endif
+
+static void ipgre_tunnel_setup(struct net_device *dev)
+{
+	SET_MODULE_OWNER(dev);
+	dev->uninit		= ipgre_tunnel_uninit;
+	dev->destructor 	= free_netdev;
+	dev->hard_start_xmit	= ipgre_tunnel_xmit;
+	dev->get_stats		= ipgre_tunnel_get_stats;
+	dev->do_ioctl		= ipgre_tunnel_ioctl;
+	dev->change_mtu		= ipgre_tunnel_change_mtu;
+
+	dev->type		= ARPHRD_IPGRE;
+	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr) + 4;
+	dev->mtu		= 1500 - sizeof(struct iphdr) - 4;
+	dev->flags		= IFF_NOARP;
+	dev->iflink		= 0;
+	dev->addr_len		= 4;
+}
+
+static int ipgre_tunnel_init(struct net_device *dev)
+{
+	struct net_device *tdev = NULL;
+	struct ip_tunnel *tunnel;
+	struct iphdr *iph;
+	int hlen = LL_MAX_HEADER;
+	int mtu = 1500;
+	int addend = sizeof(struct iphdr) + 4;
+
+	tunnel = (struct ip_tunnel*)dev->priv;
+	iph = &tunnel->parms.iph;
+
+	tunnel->dev = dev;
+	strcpy(tunnel->parms.name, dev->name);
+
+	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
+	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
+
+	/* Guess output device to choose reasonable mtu and hard_header_len */
+
+	if (iph->daddr) {
+		struct flowi fl = { .oif = tunnel->parms.link,
+				    .nl_u = { .ip4_u =
+					      { .daddr = iph->daddr,
+						.saddr = iph->saddr,
+						.tos = RT_TOS(iph->tos) } },
+				    .proto = IPPROTO_GRE };
+		struct rtable *rt;
+		if (!ip_route_output_key(&rt, &fl)) {
+			tdev = rt->u.dst.dev;
+			ip_rt_put(rt);
+		}
+
+		dev->flags |= IFF_POINTOPOINT;
+
+#ifdef CONFIG_NET_IPGRE_BROADCAST
+		if (MULTICAST(iph->daddr)) {
+			if (!iph->saddr)
+				return -EINVAL;
+			dev->flags = IFF_BROADCAST;
+			dev->hard_header = ipgre_header;
+			dev->open = ipgre_open;
+			dev->stop = ipgre_close;
+		}
+#endif
+	}
+
+	if (!tdev && tunnel->parms.link)
+		tdev = __dev_get_by_index(tunnel->parms.link);
+
+	if (tdev) {
+		hlen = tdev->hard_header_len;
+		mtu = tdev->mtu;
+	}
+	dev->iflink = tunnel->parms.link;
+
+	/* Precalculate GRE options length */
+	if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
+		if (tunnel->parms.o_flags&GRE_CSUM)
+			addend += 4;
+		if (tunnel->parms.o_flags&GRE_KEY)
+			addend += 4;
+		if (tunnel->parms.o_flags&GRE_SEQ)
+			addend += 4;
+	}
+	dev->hard_header_len = hlen + addend;
+	dev->mtu = mtu - addend;
+	tunnel->hlen = addend;
+	return 0;
+}
+
+int __init ipgre_fb_tunnel_init(struct net_device *dev)
+{
+	struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
+	struct iphdr *iph = &tunnel->parms.iph;
+
+	tunnel->dev = dev;
+	strcpy(tunnel->parms.name, dev->name);
+
+	iph->version		= 4;
+	iph->protocol		= IPPROTO_GRE;
+	iph->ihl		= 5;
+	tunnel->hlen		= sizeof(struct iphdr) + 4;
+
+	dev_hold(dev);
+	tunnels_wc[0]		= tunnel;
+	return 0;
+}
+
+
+static struct net_protocol ipgre_protocol = {
+	.handler	=	ipgre_rcv,
+	.err_handler	=	ipgre_err,
+};
+
+
+/*
+ *	And now the modules code and kernel interface.
+ */
+
+static int __init ipgre_init(void)
+{
+	int err;
+
+	printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
+
+	if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
+		printk(KERN_INFO "ipgre init: can't add protocol\n");
+		return -EAGAIN;
+	}
+
+	ipgre_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
+					   ipgre_tunnel_setup);
+	if (!ipgre_fb_tunnel_dev) {
+		err = -ENOMEM;
+		goto err1;
+	}
+
+	ipgre_fb_tunnel_dev->init = ipgre_fb_tunnel_init;
+
+	if ((err = register_netdev(ipgre_fb_tunnel_dev)))
+		goto err2;
+out:
+	return err;
+err2:
+	free_netdev(ipgre_fb_tunnel_dev);
+err1:
+	inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
+	goto out;
+}
+
+static void ipgre_fini(void)
+{
+	if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
+		printk(KERN_INFO "ipgre close: can't remove protocol\n");
+
+	unregister_netdev(ipgre_fb_tunnel_dev);
+}
+
+module_init(ipgre_init);
+module_exit(ipgre_fini);
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
new file mode 100644
index 00000000000..a0d0833034b
--- /dev/null
+++ b/net/ipv4/ip_input.c
@@ -0,0 +1,431 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		The Internet Protocol (IP) module.
+ *
+ * Version:	$Id: ip_input.c,v 1.55 2002/01/12 07:39:45 davem Exp $
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Donald Becker, <becker@super.org>
+ *		Alan Cox, <Alan.Cox@linux.org>
+ *		Richard Underwood
+ *		Stefan Becker, <stefanb@yello.ping.de>
+ *		Jorge Cwik, <jorge@laser.satlink.net>
+ *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *		
+ *
+ * Fixes:
+ *		Alan Cox	:	Commented a couple of minor bits of surplus code
+ *		Alan Cox	:	Undefining IP_FORWARD doesn't include the code
+ *					(just stops a compiler warning).
+ *		Alan Cox	:	Frames with >=MAX_ROUTE record routes, strict routes or loose routes
+ *					are junked rather than corrupting things.
+ *		Alan Cox	:	Frames to bad broadcast subnets are dumped
+ *					We used to process them non broadcast and
+ *					boy could that cause havoc.
+ *		Alan Cox	:	ip_forward sets the free flag on the
+ *					new frame it queues. Still crap because
+ *					it copies the frame but at least it
+ *					doesn't eat memory too.
+ *		Alan Cox	:	Generic queue code and memory fixes.
+ *		Fred Van Kempen :	IP fragment support (borrowed from NET2E)
+ *		Gerhard Koerting:	Forward fragmented frames correctly.
+ *		Gerhard Koerting: 	Fixes to my fix of the above 8-).
+ *		Gerhard Koerting:	IP interface addressing fix.
+ *		Linus Torvalds	:	More robustness checks
+ *		Alan Cox	:	Even more checks: Still not as robust as it ought to be
+ *		Alan Cox	:	Save IP header pointer for later
+ *		Alan Cox	:	ip option setting
+ *		Alan Cox	:	Use ip_tos/ip_ttl settings
+ *		Alan Cox	:	Fragmentation bogosity removed
+ *					(Thanks to Mark.Bush@prg.ox.ac.uk)
+ *		Dmitry Gorodchanin :	Send of a raw packet crash fix.
+ *		Alan Cox	:	Silly ip bug when an overlength
+ *					fragment turns up. Now frees the
+ *					queue.
+ *		Linus Torvalds/ :	Memory leakage on fragmentation
+ *		Alan Cox	:	handling.
+ *		Gerhard Koerting:	Forwarding uses IP priority hints
+ *		Teemu Rantanen	:	Fragment problems.
+ *		Alan Cox	:	General cleanup, comments and reformat
+ *		Alan Cox	:	SNMP statistics
+ *		Alan Cox	:	BSD address rule semantics. Also see
+ *					UDP as there is a nasty checksum issue
+ *					if you do things the wrong way.
+ *		Alan Cox	:	Always defrag, moved IP_FORWARD to the config.in file
+ *		Alan Cox	: 	IP options adjust sk->priority.
+ *		Pedro Roque	:	Fix mtu/length error in ip_forward.
+ *		Alan Cox	:	Avoid ip_chk_addr when possible.
+ *	Richard Underwood	:	IP multicasting.
+ *		Alan Cox	:	Cleaned up multicast handlers.
+ *		Alan Cox	:	RAW sockets demultiplex in the BSD style.
+ *		Gunther Mayer	:	Fix the SNMP reporting typo
+ *		Alan Cox	:	Always in group 224.0.0.1
+ *	Pauline Middelink	:	Fast ip_checksum update when forwarding
+ *					Masquerading support.
+ *		Alan Cox	:	Multicast loopback error for 224.0.0.1
+ *		Alan Cox	:	IP_MULTICAST_LOOP option.
+ *		Alan Cox	:	Use notifiers.
+ *		Bjorn Ekwall	:	Removed ip_csum (from slhc.c too)
+ *		Bjorn Ekwall	:	Moved ip_fast_csum to ip.h (inline!)
+ *		Stefan Becker   :       Send out ICMP HOST REDIRECT
+ *	Arnt Gulbrandsen	:	ip_build_xmit
+ *		Alan Cox	:	Per socket routing cache
+ *		Alan Cox	:	Fixed routing cache, added header cache.
+ *		Alan Cox	:	Loopback didn't work right in original ip_build_xmit - fixed it.
+ *		Alan Cox	:	Only send ICMP_REDIRECT if src/dest are the same net.
+ *		Alan Cox	:	Incoming IP option handling.
+ *		Alan Cox	:	Set saddr on raw output frames as per BSD.
+ *		Alan Cox	:	Stopped broadcast source route explosions.
+ *		Alan Cox	:	Can disable source routing
+ *		Takeshi Sone    :	Masquerading didn't work.
+ *	Dave Bonn,Alan Cox	:	Faster IP forwarding whenever possible.
+ *		Alan Cox	:	Memory leaks, tramples, misc debugging.
+ *		Alan Cox	:	Fixed multicast (by popular demand 8))
+ *		Alan Cox	:	Fixed forwarding (by even more popular demand 8))
+ *		Alan Cox	:	Fixed SNMP statistics [I think]
+ *	Gerhard Koerting	:	IP fragmentation forwarding fix
+ *		Alan Cox	:	Device lock against page fault.
+ *		Alan Cox	:	IP_HDRINCL facility.
+ *	Werner Almesberger	:	Zero fragment bug
+ *		Alan Cox	:	RAW IP frame length bug
+ *		Alan Cox	:	Outgoing firewall on build_xmit
+ *		A.N.Kuznetsov	:	IP_OPTIONS support throughout the kernel
+ *		Alan Cox	:	Multicast routing hooks
+ *		Jos Vos		:	Do accounting *before* call_in_firewall
+ *	Willy Konynenberg	:	Transparent proxying support
+ *
+ *  
+ *
+ * To Fix:
+ *		IP fragmentation wants rewriting cleanly. The RFC815 algorithm is much more efficient
+ *		and could be made very efficient with the addition of some virtual memory hacks to permit
+ *		the allocation of a buffer that can then be 'grown' by twiddling page tables.
+ *		Output fragmentation wants updating along with the buffer management to use a single 
+ *		interleaved copy algorithm so that fragmenting has a one copy overhead. Actual packet
+ *		output should probably do its own fragmentation at the UDP/RAW layer. TCP shouldn't cause
+ *		fragmentation anyway.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/system.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/config.h>
+
+#include <linux/net.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+
+#include <net/snmp.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/arp.h>
+#include <net/icmp.h>
+#include <net/raw.h>
+#include <net/checksum.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/xfrm.h>
+#include <linux/mroute.h>
+#include <linux/netlink.h>
+
+/*
+ *	SNMP management statistics
+ */
+
+DEFINE_SNMP_STAT(struct ipstats_mib, ip_statistics);
+
+/*
+ *	Process Router Attention IP option
+ */ 
+int ip_call_ra_chain(struct sk_buff *skb)
+{
+	struct ip_ra_chain *ra;
+	u8 protocol = skb->nh.iph->protocol;
+	struct sock *last = NULL;
+
+	read_lock(&ip_ra_lock);
+	for (ra = ip_ra_chain; ra; ra = ra->next) {
+		struct sock *sk = ra->sk;
+
+		/* If socket is bound to an interface, only report
+		 * the packet if it came  from that interface.
+		 */
+		if (sk && inet_sk(sk)->num == protocol &&
+		    (!sk->sk_bound_dev_if ||
+		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
+			if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+				skb = ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN);
+				if (skb == NULL) {
+					read_unlock(&ip_ra_lock);
+					return 1;
+				}
+			}
+			if (last) {
+				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+				if (skb2)
+					raw_rcv(last, skb2);
+			}
+			last = sk;
+		}
+	}
+
+	if (last) {
+		raw_rcv(last, skb);
+		read_unlock(&ip_ra_lock);
+		return 1;
+	}
+	read_unlock(&ip_ra_lock);
+	return 0;
+}
+
+static inline int ip_local_deliver_finish(struct sk_buff *skb)
+{
+	int ihl = skb->nh.iph->ihl*4;
+
+#ifdef CONFIG_NETFILTER_DEBUG
+	nf_debug_ip_local_deliver(skb);
+#endif /*CONFIG_NETFILTER_DEBUG*/
+
+	__skb_pull(skb, ihl);
+
+	/* Free reference early: we don't need it any more, and it may
+           hold ip_conntrack module loaded indefinitely. */
+	nf_reset(skb);
+
+        /* Point into the IP datagram, just past the header. */
+        skb->h.raw = skb->data;
+
+	rcu_read_lock();
+	{
+		/* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */
+		int protocol = skb->nh.iph->protocol;
+		int hash;
+		struct sock *raw_sk;
+		struct net_protocol *ipprot;
+
+	resubmit:
+		hash = protocol & (MAX_INET_PROTOS - 1);
+		raw_sk = sk_head(&raw_v4_htable[hash]);
+
+		/* If there maybe a raw socket we must check - if not we
+		 * don't care less
+		 */
+		if (raw_sk)
+			raw_v4_input(skb, skb->nh.iph, hash);
+
+		if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) {
+			int ret;
+
+			if (!ipprot->no_policy &&
+			    !xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+				kfree_skb(skb);
+				goto out;
+			}
+			ret = ipprot->handler(skb);
+			if (ret < 0) {
+				protocol = -ret;
+				goto resubmit;
+			}
+			IP_INC_STATS_BH(IPSTATS_MIB_INDELIVERS);
+		} else {
+			if (!raw_sk) {
+				if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+					IP_INC_STATS_BH(IPSTATS_MIB_INUNKNOWNPROTOS);
+					icmp_send(skb, ICMP_DEST_UNREACH,
+						  ICMP_PROT_UNREACH, 0);
+				}
+			} else
+				IP_INC_STATS_BH(IPSTATS_MIB_INDELIVERS);
+			kfree_skb(skb);
+		}
+	}
+ out:
+	rcu_read_unlock();
+
+	return 0;
+}
+
+/*
+ * 	Deliver IP Packets to the higher protocol layers.
+ */ 
+int ip_local_deliver(struct sk_buff *skb)
+{
+	/*
+	 *	Reassemble IP fragments.
+	 */
+
+	if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+		skb = ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER);
+		if (!skb)
+			return 0;
+	}
+
+	return NF_HOOK(PF_INET, NF_IP_LOCAL_IN, skb, skb->dev, NULL,
+		       ip_local_deliver_finish);
+}
+
+static inline int ip_rcv_finish(struct sk_buff *skb)
+{
+	struct net_device *dev = skb->dev;
+	struct iphdr *iph = skb->nh.iph;
+
+	/*
+	 *	Initialise the virtual path cache for the packet. It describes
+	 *	how the packet travels inside Linux networking.
+	 */ 
+	if (skb->dst == NULL) {
+		if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))
+			goto drop; 
+	}
+
+#ifdef CONFIG_NET_CLS_ROUTE
+	if (skb->dst->tclassid) {
+		struct ip_rt_acct *st = ip_rt_acct + 256*smp_processor_id();
+		u32 idx = skb->dst->tclassid;
+		st[idx&0xFF].o_packets++;
+		st[idx&0xFF].o_bytes+=skb->len;
+		st[(idx>>16)&0xFF].i_packets++;
+		st[(idx>>16)&0xFF].i_bytes+=skb->len;
+	}
+#endif
+
+	if (iph->ihl > 5) {
+		struct ip_options *opt;
+
+		/* It looks as overkill, because not all
+		   IP options require packet mangling.
+		   But it is the easiest for now, especially taking
+		   into account that combination of IP options
+		   and running sniffer is extremely rare condition.
+		                                      --ANK (980813)
+		*/
+
+		if (skb_cow(skb, skb_headroom(skb))) {
+			IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
+			goto drop;
+		}
+		iph = skb->nh.iph;
+
+		if (ip_options_compile(NULL, skb))
+			goto inhdr_error;
+
+		opt = &(IPCB(skb)->opt);
+		if (opt->srr) {
+			struct in_device *in_dev = in_dev_get(dev);
+			if (in_dev) {
+				if (!IN_DEV_SOURCE_ROUTE(in_dev)) {
+					if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
+						printk(KERN_INFO "source route option %u.%u.%u.%u -> %u.%u.%u.%u\n",
+						       NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
+					in_dev_put(in_dev);
+					goto drop;
+				}
+				in_dev_put(in_dev);
+			}
+			if (ip_options_rcv_srr(skb))
+				goto drop;
+		}
+	}
+
+	return dst_input(skb);
+
+inhdr_error:
+	IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+drop:
+        kfree_skb(skb);
+        return NET_RX_DROP;
+}
+
+/*
+ * 	Main IP Receive routine.
+ */ 
+int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
+{
+	struct iphdr *iph;
+
+	/* When the interface is in promisc. mode, drop all the crap
+	 * that it receives, do not try to analyse it.
+	 */
+	if (skb->pkt_type == PACKET_OTHERHOST)
+		goto drop;
+
+	IP_INC_STATS_BH(IPSTATS_MIB_INRECEIVES);
+
+	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
+		IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
+		goto out;
+	}
+
+	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+		goto inhdr_error;
+
+	iph = skb->nh.iph;
+
+	/*
+	 *	RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum.
+	 *
+	 *	Is the datagram acceptable?
+	 *
+	 *	1.	Length at least the size of an ip header
+	 *	2.	Version of 4
+	 *	3.	Checksums correctly. [Speed optimisation for later, skip loopback checksums]
+	 *	4.	Doesn't have a bogus length
+	 */
+
+	if (iph->ihl < 5 || iph->version != 4)
+		goto inhdr_error; 
+
+	if (!pskb_may_pull(skb, iph->ihl*4))
+		goto inhdr_error;
+
+	iph = skb->nh.iph;
+
+	if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
+		goto inhdr_error; 
+
+	{
+		__u32 len = ntohs(iph->tot_len); 
+		if (skb->len < len || len < (iph->ihl<<2))
+			goto inhdr_error;
+
+		/* Our transport medium may have padded the buffer out. Now we know it
+		 * is IP we can trim to the true length of the frame.
+		 * Note this now means skb->len holds ntohs(iph->tot_len).
+		 */
+		if (pskb_trim_rcsum(skb, len)) {
+			IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
+			goto drop;
+		}
+	}
+
+	return NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, dev, NULL,
+		       ip_rcv_finish);
+
+inhdr_error:
+	IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+drop:
+        kfree_skb(skb);
+out:
+        return NET_RX_DROP;
+}
+
+EXPORT_SYMBOL(ip_rcv);
+EXPORT_SYMBOL(ip_statistics);
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
new file mode 100644
index 00000000000..6d89f3f3e70
--- /dev/null
+++ b/net/ipv4/ip_options.c
@@ -0,0 +1,625 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		The options processing module for ip.c
+ *
+ * Version:	$Id: ip_options.c,v 1.21 2001/09/01 00:31:50 davem Exp $
+ *
+ * Authors:	A.N.Kuznetsov
+ *		
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <asm/uaccess.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/icmp.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+
+/* 
+ * Write options to IP header, record destination address to
+ * source route option, address of outgoing interface
+ * (we should already know it, so that this  function is allowed be
+ * called only after routing decision) and timestamp,
+ * if we originate this datagram.
+ *
+ * daddr is real destination address, next hop is recorded in IP header.
+ * saddr is address of outgoing interface.
+ */
+
+void ip_options_build(struct sk_buff * skb, struct ip_options * opt,
+			    u32 daddr, struct rtable *rt, int is_frag) 
+{
+	unsigned char * iph = skb->nh.raw;
+
+	memcpy(&(IPCB(skb)->opt), opt, sizeof(struct ip_options));
+	memcpy(iph+sizeof(struct iphdr), opt->__data, opt->optlen);
+	opt = &(IPCB(skb)->opt);
+	opt->is_data = 0;
+
+	if (opt->srr)
+		memcpy(iph+opt->srr+iph[opt->srr+1]-4, &daddr, 4);
+
+	if (!is_frag) {
+		if (opt->rr_needaddr)
+			ip_rt_get_source(iph+opt->rr+iph[opt->rr+2]-5, rt);
+		if (opt->ts_needaddr)
+			ip_rt_get_source(iph+opt->ts+iph[opt->ts+2]-9, rt);
+		if (opt->ts_needtime) {
+			struct timeval tv;
+			__u32 midtime;
+			do_gettimeofday(&tv);
+			midtime = htonl((tv.tv_sec % 86400) * 1000 + tv.tv_usec / 1000);
+			memcpy(iph+opt->ts+iph[opt->ts+2]-5, &midtime, 4);
+		}
+		return;
+	}
+	if (opt->rr) {
+		memset(iph+opt->rr, IPOPT_NOP, iph[opt->rr+1]);
+		opt->rr = 0;
+		opt->rr_needaddr = 0;
+	}
+	if (opt->ts) {
+		memset(iph+opt->ts, IPOPT_NOP, iph[opt->ts+1]);
+		opt->ts = 0;
+		opt->ts_needaddr = opt->ts_needtime = 0;
+	}
+}
+
+/* 
+ * Provided (sopt, skb) points to received options,
+ * build in dopt compiled option set appropriate for answering.
+ * i.e. invert SRR option, copy anothers,
+ * and grab room in RR/TS options.
+ *
+ * NOTE: dopt cannot point to skb.
+ */
+
+int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) 
+{
+	struct ip_options *sopt;
+	unsigned char *sptr, *dptr;
+	int soffset, doffset;
+	int	optlen;
+	u32	daddr;
+
+	memset(dopt, 0, sizeof(struct ip_options));
+
+	dopt->is_data = 1;
+
+	sopt = &(IPCB(skb)->opt);
+
+	if (sopt->optlen == 0) {
+		dopt->optlen = 0;
+		return 0;
+	}
+
+	sptr = skb->nh.raw;
+	dptr = dopt->__data;
+
+	if (skb->dst)
+		daddr = ((struct rtable*)skb->dst)->rt_spec_dst;
+	else
+		daddr = skb->nh.iph->daddr;
+
+	if (sopt->rr) {
+		optlen  = sptr[sopt->rr+1];
+		soffset = sptr[sopt->rr+2];
+		dopt->rr = dopt->optlen + sizeof(struct iphdr);
+		memcpy(dptr, sptr+sopt->rr, optlen);
+		if (sopt->rr_needaddr && soffset <= optlen) {
+			if (soffset + 3 > optlen)
+				return -EINVAL;
+			dptr[2] = soffset + 4;
+			dopt->rr_needaddr = 1;
+		}
+		dptr += optlen;
+		dopt->optlen += optlen;
+	}
+	if (sopt->ts) {
+		optlen = sptr[sopt->ts+1];
+		soffset = sptr[sopt->ts+2];
+		dopt->ts = dopt->optlen + sizeof(struct iphdr);
+		memcpy(dptr, sptr+sopt->ts, optlen);
+		if (soffset <= optlen) {
+			if (sopt->ts_needaddr) {
+				if (soffset + 3 > optlen)
+					return -EINVAL;
+				dopt->ts_needaddr = 1;
+				soffset += 4;
+			}
+			if (sopt->ts_needtime) {
+				if (soffset + 3 > optlen)
+					return -EINVAL;
+				if ((dptr[3]&0xF) != IPOPT_TS_PRESPEC) {
+					dopt->ts_needtime = 1;
+					soffset += 4;
+				} else {
+					dopt->ts_needtime = 0;
+
+					if (soffset + 8 <= optlen) {
+						__u32 addr;
+
+						memcpy(&addr, sptr+soffset-1, 4);
+						if (inet_addr_type(addr) != RTN_LOCAL) {
+							dopt->ts_needtime = 1;
+							soffset += 8;
+						}
+					}
+				}
+			}
+			dptr[2] = soffset;
+		}
+		dptr += optlen;
+		dopt->optlen += optlen;
+	}
+	if (sopt->srr) {
+		unsigned char * start = sptr+sopt->srr;
+		u32 faddr;
+
+		optlen  = start[1];
+		soffset = start[2];
+		doffset = 0;
+		if (soffset > optlen)
+			soffset = optlen + 1;
+		soffset -= 4;
+		if (soffset > 3) {
+			memcpy(&faddr, &start[soffset-1], 4);
+			for (soffset-=4, doffset=4; soffset > 3; soffset-=4, doffset+=4)
+				memcpy(&dptr[doffset-1], &start[soffset-1], 4);
+			/*
+			 * RFC1812 requires to fix illegal source routes.
+			 */
+			if (memcmp(&skb->nh.iph->saddr, &start[soffset+3], 4) == 0)
+				doffset -= 4;
+		}
+		if (doffset > 3) {
+			memcpy(&start[doffset-1], &daddr, 4);
+			dopt->faddr = faddr;
+			dptr[0] = start[0];
+			dptr[1] = doffset+3;
+			dptr[2] = 4;
+			dptr += doffset+3;
+			dopt->srr = dopt->optlen + sizeof(struct iphdr);
+			dopt->optlen += doffset+3;
+			dopt->is_strictroute = sopt->is_strictroute;
+		}
+	}
+	while (dopt->optlen & 3) {
+		*dptr++ = IPOPT_END;
+		dopt->optlen++;
+	}
+	return 0;
+}
+
+/*
+ *	Options "fragmenting", just fill options not
+ *	allowed in fragments with NOOPs.
+ *	Simple and stupid 8), but the most efficient way.
+ */
+
+void ip_options_fragment(struct sk_buff * skb) 
+{
+	unsigned char * optptr = skb->nh.raw;
+	struct ip_options * opt = &(IPCB(skb)->opt);
+	int  l = opt->optlen;
+	int  optlen;
+
+	while (l > 0) {
+		switch (*optptr) {
+		case IPOPT_END:
+			return;
+		case IPOPT_NOOP:
+			l--;
+			optptr++;
+			continue;
+		}
+		optlen = optptr[1];
+		if (optlen<2 || optlen>l)
+		  return;
+		if (!IPOPT_COPIED(*optptr))
+			memset(optptr, IPOPT_NOOP, optlen);
+		l -= optlen;
+		optptr += optlen;
+	}
+	opt->ts = 0;
+	opt->rr = 0;
+	opt->rr_needaddr = 0;
+	opt->ts_needaddr = 0;
+	opt->ts_needtime = 0;
+	return;
+}
+
+/*
+ * Verify options and fill pointers in struct options.
+ * Caller should clear *opt, and set opt->data.
+ * If opt == NULL, then skb->data should point to IP header.
+ */
+
+int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
+{
+	int l;
+	unsigned char * iph;
+	unsigned char * optptr;
+	int optlen;
+	unsigned char * pp_ptr = NULL;
+	struct rtable *rt = skb ? (struct rtable*)skb->dst : NULL;
+
+	if (!opt) {
+		opt = &(IPCB(skb)->opt);
+		memset(opt, 0, sizeof(struct ip_options));
+		iph = skb->nh.raw;
+		opt->optlen = ((struct iphdr *)iph)->ihl*4 - sizeof(struct iphdr);
+		optptr = iph + sizeof(struct iphdr);
+		opt->is_data = 0;
+	} else {
+		optptr = opt->is_data ? opt->__data : (unsigned char*)&(skb->nh.iph[1]);
+		iph = optptr - sizeof(struct iphdr);
+	}
+
+	for (l = opt->optlen; l > 0; ) {
+		switch (*optptr) {
+		      case IPOPT_END:
+			for (optptr++, l--; l>0; optptr++, l--) {
+				if (*optptr != IPOPT_END) {
+					*optptr = IPOPT_END;
+					opt->is_changed = 1;
+				}
+			}
+			goto eol;
+		      case IPOPT_NOOP:
+			l--;
+			optptr++;
+			continue;
+		}
+		optlen = optptr[1];
+		if (optlen<2 || optlen>l) {
+			pp_ptr = optptr;
+			goto error;
+		}
+		switch (*optptr) {
+		      case IPOPT_SSRR:
+		      case IPOPT_LSRR:
+			if (optlen < 3) {
+				pp_ptr = optptr + 1;
+				goto error;
+			}
+			if (optptr[2] < 4) {
+				pp_ptr = optptr + 2;
+				goto error;
+			}
+			/* NB: cf RFC-1812 5.2.4.1 */
+			if (opt->srr) {
+				pp_ptr = optptr;
+				goto error;
+			}
+			if (!skb) {
+				if (optptr[2] != 4 || optlen < 7 || ((optlen-3) & 3)) {
+					pp_ptr = optptr + 1;
+					goto error;
+				}
+				memcpy(&opt->faddr, &optptr[3], 4);
+				if (optlen > 7)
+					memmove(&optptr[3], &optptr[7], optlen-7);
+			}
+			opt->is_strictroute = (optptr[0] == IPOPT_SSRR);
+			opt->srr = optptr - iph;
+			break;
+		      case IPOPT_RR:
+			if (opt->rr) {
+				pp_ptr = optptr;
+				goto error;
+			}
+			if (optlen < 3) {
+				pp_ptr = optptr + 1;
+				goto error;
+			}
+			if (optptr[2] < 4) {
+				pp_ptr = optptr + 2;
+				goto error;
+			}
+			if (optptr[2] <= optlen) {
+				if (optptr[2]+3 > optlen) {
+					pp_ptr = optptr + 2;
+					goto error;
+				}
+				if (skb) {
+					memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4);
+					opt->is_changed = 1;
+				}
+				optptr[2] += 4;
+				opt->rr_needaddr = 1;
+			}
+			opt->rr = optptr - iph;
+			break;
+		      case IPOPT_TIMESTAMP:
+			if (opt->ts) {
+				pp_ptr = optptr;
+				goto error;
+			}
+			if (optlen < 4) {
+				pp_ptr = optptr + 1;
+				goto error;
+			}
+			if (optptr[2] < 5) {
+				pp_ptr = optptr + 2;
+				goto error;
+			}
+			if (optptr[2] <= optlen) {
+				__u32 * timeptr = NULL;
+				if (optptr[2]+3 > optptr[1]) {
+					pp_ptr = optptr + 2;
+					goto error;
+				}
+				switch (optptr[3]&0xF) {
+				      case IPOPT_TS_TSONLY:
+					opt->ts = optptr - iph;
+					if (skb) 
+						timeptr = (__u32*)&optptr[optptr[2]-1];
+					opt->ts_needtime = 1;
+					optptr[2] += 4;
+					break;
+				      case IPOPT_TS_TSANDADDR:
+					if (optptr[2]+7 > optptr[1]) {
+						pp_ptr = optptr + 2;
+						goto error;
+					}
+					opt->ts = optptr - iph;
+					if (skb) {
+						memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4);
+						timeptr = (__u32*)&optptr[optptr[2]+3];
+					}
+					opt->ts_needaddr = 1;
+					opt->ts_needtime = 1;
+					optptr[2] += 8;
+					break;
+				      case IPOPT_TS_PRESPEC:
+					if (optptr[2]+7 > optptr[1]) {
+						pp_ptr = optptr + 2;
+						goto error;
+					}
+					opt->ts = optptr - iph;
+					{
+						u32 addr;
+						memcpy(&addr, &optptr[optptr[2]-1], 4);
+						if (inet_addr_type(addr) == RTN_UNICAST)
+							break;
+						if (skb)
+							timeptr = (__u32*)&optptr[optptr[2]+3];
+					}
+					opt->ts_needtime = 1;
+					optptr[2] += 8;
+					break;
+				      default:
+					if (!skb && !capable(CAP_NET_RAW)) {
+						pp_ptr = optptr + 3;
+						goto error;
+					}
+					break;
+				}
+				if (timeptr) {
+					struct timeval tv;
+					__u32  midtime;
+					do_gettimeofday(&tv);
+					midtime = htonl((tv.tv_sec % 86400) * 1000 + tv.tv_usec / 1000);
+					memcpy(timeptr, &midtime, sizeof(__u32));
+					opt->is_changed = 1;
+				}
+			} else {
+				unsigned overflow = optptr[3]>>4;
+				if (overflow == 15) {
+					pp_ptr = optptr + 3;
+					goto error;
+				}
+				opt->ts = optptr - iph;
+				if (skb) {
+					optptr[3] = (optptr[3]&0xF)|((overflow+1)<<4);
+					opt->is_changed = 1;
+				}
+			}
+			break;
+		      case IPOPT_RA:
+			if (optlen < 4) {
+				pp_ptr = optptr + 1;
+				goto error;
+			}
+			if (optptr[2] == 0 && optptr[3] == 0)
+				opt->router_alert = optptr - iph;
+			break;
+		      case IPOPT_SEC:
+		      case IPOPT_SID:
+		      default:
+			if (!skb && !capable(CAP_NET_RAW)) {
+				pp_ptr = optptr;
+				goto error;
+			}
+			break;
+		}
+		l -= optlen;
+		optptr += optlen;
+	}
+
+eol:
+	if (!pp_ptr)
+		return 0;
+
+error:
+	if (skb) {
+		icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl((pp_ptr-iph)<<24));
+	}
+	return -EINVAL;
+}
+
+
+/*
+ *	Undo all the changes done by ip_options_compile().
+ */
+
+void ip_options_undo(struct ip_options * opt)
+{
+	if (opt->srr) {
+		unsigned  char * optptr = opt->__data+opt->srr-sizeof(struct  iphdr);
+		memmove(optptr+7, optptr+3, optptr[1]-7);
+		memcpy(optptr+3, &opt->faddr, 4);
+	}
+	if (opt->rr_needaddr) {
+		unsigned  char * optptr = opt->__data+opt->rr-sizeof(struct  iphdr);
+		optptr[2] -= 4;
+		memset(&optptr[optptr[2]-1], 0, 4);
+	}
+	if (opt->ts) {
+		unsigned  char * optptr = opt->__data+opt->ts-sizeof(struct  iphdr);
+		if (opt->ts_needtime) {
+			optptr[2] -= 4;
+			memset(&optptr[optptr[2]-1], 0, 4);
+			if ((optptr[3]&0xF) == IPOPT_TS_PRESPEC)
+				optptr[2] -= 4;
+		}
+		if (opt->ts_needaddr) {
+			optptr[2] -= 4;
+			memset(&optptr[optptr[2]-1], 0, 4);
+		}
+	}
+}
+
+int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen, int user)
+{
+	struct ip_options *opt;
+
+	opt = kmalloc(sizeof(struct ip_options)+((optlen+3)&~3), GFP_KERNEL);
+	if (!opt)
+		return -ENOMEM;
+	memset(opt, 0, sizeof(struct ip_options));
+	if (optlen) {
+		if (user) {
+			if (copy_from_user(opt->__data, data, optlen)) {
+				kfree(opt);
+				return -EFAULT;
+			}
+		} else
+			memcpy(opt->__data, data, optlen);
+	}
+	while (optlen & 3)
+		opt->__data[optlen++] = IPOPT_END;
+	opt->optlen = optlen;
+	opt->is_data = 1;
+	opt->is_setbyuser = 1;
+	if (optlen && ip_options_compile(opt, NULL)) {
+		kfree(opt);
+		return -EINVAL;
+	}
+	if (*optp)
+		kfree(*optp);
+	*optp = opt;
+	return 0;
+}
+
+void ip_forward_options(struct sk_buff *skb)
+{
+	struct   ip_options * opt	= &(IPCB(skb)->opt);
+	unsigned char * optptr;
+	struct rtable *rt = (struct rtable*)skb->dst;
+	unsigned char *raw = skb->nh.raw;
+
+	if (opt->rr_needaddr) {
+		optptr = (unsigned char *)raw + opt->rr;
+		ip_rt_get_source(&optptr[optptr[2]-5], rt);
+		opt->is_changed = 1;
+	}
+	if (opt->srr_is_hit) {
+		int srrptr, srrspace;
+
+		optptr = raw + opt->srr;
+
+		for ( srrptr=optptr[2], srrspace = optptr[1];
+		     srrptr <= srrspace;
+		     srrptr += 4
+		     ) {
+			if (srrptr + 3 > srrspace)
+				break;
+			if (memcmp(&rt->rt_dst, &optptr[srrptr-1], 4) == 0)
+				break;
+		}
+		if (srrptr + 3 <= srrspace) {
+			opt->is_changed = 1;
+			ip_rt_get_source(&optptr[srrptr-1], rt);
+			skb->nh.iph->daddr = rt->rt_dst;
+			optptr[2] = srrptr+4;
+		} else if (net_ratelimit())
+			printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n");
+		if (opt->ts_needaddr) {
+			optptr = raw + opt->ts;
+			ip_rt_get_source(&optptr[optptr[2]-9], rt);
+			opt->is_changed = 1;
+		}
+	}
+	if (opt->is_changed) {
+		opt->is_changed = 0;
+		ip_send_check(skb->nh.iph);
+	}
+}
+
+int ip_options_rcv_srr(struct sk_buff *skb)
+{
+	struct ip_options *opt = &(IPCB(skb)->opt);
+	int srrspace, srrptr;
+	u32 nexthop;
+	struct iphdr *iph = skb->nh.iph;
+	unsigned char * optptr = skb->nh.raw + opt->srr;
+	struct rtable *rt = (struct rtable*)skb->dst;
+	struct rtable *rt2;
+	int err;
+
+	if (!opt->srr)
+		return 0;
+
+	if (skb->pkt_type != PACKET_HOST)
+		return -EINVAL;
+	if (rt->rt_type == RTN_UNICAST) {
+		if (!opt->is_strictroute)
+			return 0;
+		icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl(16<<24));
+		return -EINVAL;
+	}
+	if (rt->rt_type != RTN_LOCAL)
+		return -EINVAL;
+
+	for (srrptr=optptr[2], srrspace = optptr[1]; srrptr <= srrspace; srrptr += 4) {
+		if (srrptr + 3 > srrspace) {
+			icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl((opt->srr+2)<<24));
+			return -EINVAL;
+		}
+		memcpy(&nexthop, &optptr[srrptr-1], 4);
+
+		rt = (struct rtable*)skb->dst;
+		skb->dst = NULL;
+		err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev);
+		rt2 = (struct rtable*)skb->dst;
+		if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) {
+			ip_rt_put(rt2);
+			skb->dst = &rt->u.dst;
+			return -EINVAL;
+		}
+		ip_rt_put(rt);
+		if (rt2->rt_type != RTN_LOCAL)
+			break;
+		/* Superfast 8) loopback forward */
+		memcpy(&iph->daddr, &optptr[srrptr-1], 4);
+		opt->is_changed = 1;
+	}
+	if (srrptr <= srrspace) {
+		opt->srr_is_hit = 1;
+		opt->is_changed = 1;
+	}
+	return 0;
+}
+
+EXPORT_SYMBOL(ip_options_compile);
+EXPORT_SYMBOL(ip_options_undo);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
new file mode 100644
index 00000000000..30ab7b6ab76
--- /dev/null
+++ b/net/ipv4/ip_output.c
@@ -0,0 +1,1359 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		The Internet Protocol (IP) output module.
+ *
+ * Version:	$Id: ip_output.c,v 1.100 2002/02/01 22:01:03 davem Exp $
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Donald Becker, <becker@super.org>
+ *		Alan Cox, <Alan.Cox@linux.org>
+ *		Richard Underwood
+ *		Stefan Becker, <stefanb@yello.ping.de>
+ *		Jorge Cwik, <jorge@laser.satlink.net>
+ *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *		Hirokazu Takahashi, <taka@valinux.co.jp>
+ *
+ *	See ip_input.c for original log
+ *
+ *	Fixes:
+ *		Alan Cox	:	Missing nonblock feature in ip_build_xmit.
+ *		Mike Kilburn	:	htons() missing in ip_build_xmit.
+ *		Bradford Johnson:	Fix faulty handling of some frames when 
+ *					no route is found.
+ *		Alexander Demenshin:	Missing sk/skb free in ip_queue_xmit
+ *					(in case if packet not accepted by
+ *					output firewall rules)
+ *		Mike McLagan	:	Routing by source
+ *		Alexey Kuznetsov:	use new route cache
+ *		Andi Kleen:		Fix broken PMTU recovery and remove
+ *					some redundant tests.
+ *	Vitaly E. Lavrov	:	Transparent proxy revived after year coma.
+ *		Andi Kleen	: 	Replace ip_reply with ip_send_reply.
+ *		Andi Kleen	:	Split fast and slow ip_build_xmit path 
+ *					for decreased register pressure on x86 
+ *					and more readibility. 
+ *		Marc Boucher	:	When call_out_firewall returns FW_QUEUE,
+ *					silently drop skb instead of failing with -EPERM.
+ *		Detlev Wengorz	:	Copy protocol for fragments.
+ *		Hirokazu Takahashi:	HW checksumming for outgoing UDP
+ *					datagrams.
+ *		Hirokazu Takahashi:	sendfile() on UDP works now.
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/config.h>
+
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+#include <linux/init.h>
+
+#include <net/snmp.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/route.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/arp.h>
+#include <net/icmp.h>
+#include <net/raw.h>
+#include <net/checksum.h>
+#include <net/inetpeer.h>
+#include <net/checksum.h>
+#include <linux/igmp.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_bridge.h>
+#include <linux/mroute.h>
+#include <linux/netlink.h>
+
+/*
+ *      Shall we try to damage output packets if routing dev changes?
+ */
+
+int sysctl_ip_dynaddr;
+int sysctl_ip_default_ttl = IPDEFTTL;
+
+/* Generate a checksum for an outgoing IP datagram. */
+__inline__ void ip_send_check(struct iphdr *iph)
+{
+	iph->check = 0;
+	iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
+}
+
+/* dev_loopback_xmit for use with netfilter. */
+static int ip_dev_loopback_xmit(struct sk_buff *newskb)
+{
+	newskb->mac.raw = newskb->data;
+	__skb_pull(newskb, newskb->nh.raw - newskb->data);
+	newskb->pkt_type = PACKET_LOOPBACK;
+	newskb->ip_summed = CHECKSUM_UNNECESSARY;
+	BUG_TRAP(newskb->dst);
+
+#ifdef CONFIG_NETFILTER_DEBUG
+	nf_debug_ip_loopback_xmit(newskb);
+#endif
+	netif_rx(newskb);
+	return 0;
+}
+
+static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
+{
+	int ttl = inet->uc_ttl;
+
+	if (ttl < 0)
+		ttl = dst_metric(dst, RTAX_HOPLIMIT);
+	return ttl;
+}
+
+/* 
+ *		Add an ip header to a skbuff and send it out.
+ *
+ */
+int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
+			  u32 saddr, u32 daddr, struct ip_options *opt)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct rtable *rt = (struct rtable *)skb->dst;
+	struct iphdr *iph;
+
+	/* Build the IP header. */
+	if (opt)
+		iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr) + opt->optlen);
+	else
+		iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr));
+
+	iph->version  = 4;
+	iph->ihl      = 5;
+	iph->tos      = inet->tos;
+	if (ip_dont_fragment(sk, &rt->u.dst))
+		iph->frag_off = htons(IP_DF);
+	else
+		iph->frag_off = 0;
+	iph->ttl      = ip_select_ttl(inet, &rt->u.dst);
+	iph->daddr    = rt->rt_dst;
+	iph->saddr    = rt->rt_src;
+	iph->protocol = sk->sk_protocol;
+	iph->tot_len  = htons(skb->len);
+	ip_select_ident(iph, &rt->u.dst, sk);
+	skb->nh.iph   = iph;
+
+	if (opt && opt->optlen) {
+		iph->ihl += opt->optlen>>2;
+		ip_options_build(skb, opt, daddr, rt, 0);
+	}
+	ip_send_check(iph);
+
+	skb->priority = sk->sk_priority;
+
+	/* Send it out. */
+	return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
+		       dst_output);
+}
+
+static inline int ip_finish_output2(struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb->dst;
+	struct hh_cache *hh = dst->hh;
+	struct net_device *dev = dst->dev;
+	int hh_len = LL_RESERVED_SPACE(dev);
+
+	/* Be paranoid, rather than too clever. */
+	if (unlikely(skb_headroom(skb) < hh_len && dev->hard_header)) {
+		struct sk_buff *skb2;
+
+		skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
+		if (skb2 == NULL) {
+			kfree_skb(skb);
+			return -ENOMEM;
+		}
+		if (skb->sk)
+			skb_set_owner_w(skb2, skb->sk);
+		kfree_skb(skb);
+		skb = skb2;
+	}
+
+#ifdef CONFIG_NETFILTER_DEBUG
+	nf_debug_ip_finish_output2(skb);
+#endif /*CONFIG_NETFILTER_DEBUG*/
+
+	if (hh) {
+		int hh_alen;
+
+		read_lock_bh(&hh->hh_lock);
+		hh_alen = HH_DATA_ALIGN(hh->hh_len);
+  		memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
+		read_unlock_bh(&hh->hh_lock);
+	        skb_push(skb, hh->hh_len);
+		return hh->hh_output(skb);
+	} else if (dst->neighbour)
+		return dst->neighbour->output(skb);
+
+	if (net_ratelimit())
+		printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n");
+	kfree_skb(skb);
+	return -EINVAL;
+}
+
+int ip_finish_output(struct sk_buff *skb)
+{
+	struct net_device *dev = skb->dst->dev;
+
+	skb->dev = dev;
+	skb->protocol = htons(ETH_P_IP);
+
+	return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev,
+		       ip_finish_output2);
+}
+
+int ip_mc_output(struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+	struct rtable *rt = (struct rtable*)skb->dst;
+	struct net_device *dev = rt->u.dst.dev;
+
+	/*
+	 *	If the indicated interface is up and running, send the packet.
+	 */
+	IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
+
+	skb->dev = dev;
+	skb->protocol = htons(ETH_P_IP);
+
+	/*
+	 *	Multicasts are looped back for other local users
+	 */
+
+	if (rt->rt_flags&RTCF_MULTICAST) {
+		if ((!sk || inet_sk(sk)->mc_loop)
+#ifdef CONFIG_IP_MROUTE
+		/* Small optimization: do not loopback not local frames,
+		   which returned after forwarding; they will be  dropped
+		   by ip_mr_input in any case.
+		   Note, that local frames are looped back to be delivered
+		   to local recipients.
+
+		   This check is duplicated in ip_mr_input at the moment.
+		 */
+		    && ((rt->rt_flags&RTCF_LOCAL) || !(IPCB(skb)->flags&IPSKB_FORWARDED))
+#endif
+		) {
+			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
+			if (newskb)
+				NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL,
+					newskb->dev, 
+					ip_dev_loopback_xmit);
+		}
+
+		/* Multicasts with ttl 0 must not go beyond the host */
+
+		if (skb->nh.iph->ttl == 0) {
+			kfree_skb(skb);
+			return 0;
+		}
+	}
+
+	if (rt->rt_flags&RTCF_BROADCAST) {
+		struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
+		if (newskb)
+			NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL,
+				newskb->dev, ip_dev_loopback_xmit);
+	}
+
+	if (skb->len > dst_mtu(&rt->u.dst))
+		return ip_fragment(skb, ip_finish_output);
+	else
+		return ip_finish_output(skb);
+}
+
+int ip_output(struct sk_buff *skb)
+{
+	IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
+
+	if (skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->tso_size)
+		return ip_fragment(skb, ip_finish_output);
+	else
+		return ip_finish_output(skb);
+}
+
+int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
+{
+	struct sock *sk = skb->sk;
+	struct inet_sock *inet = inet_sk(sk);
+	struct ip_options *opt = inet->opt;
+	struct rtable *rt;
+	struct iphdr *iph;
+
+	/* Skip all of this if the packet is already routed,
+	 * f.e. by something like SCTP.
+	 */
+	rt = (struct rtable *) skb->dst;
+	if (rt != NULL)
+		goto packet_routed;
+
+	/* Make sure we can route this packet. */
+	rt = (struct rtable *)__sk_dst_check(sk, 0);
+	if (rt == NULL) {
+		u32 daddr;
+
+		/* Use correct destination address if we have options. */
+		daddr = inet->daddr;
+		if(opt && opt->srr)
+			daddr = opt->faddr;
+
+		{
+			struct flowi fl = { .oif = sk->sk_bound_dev_if,
+					    .nl_u = { .ip4_u =
+						      { .daddr = daddr,
+							.saddr = inet->saddr,
+							.tos = RT_CONN_FLAGS(sk) } },
+					    .proto = sk->sk_protocol,
+					    .uli_u = { .ports =
+						       { .sport = inet->sport,
+							 .dport = inet->dport } } };
+
+			/* If this fails, retransmit mechanism of transport layer will
+			 * keep trying until route appears or the connection times
+			 * itself out.
+			 */
+			if (ip_route_output_flow(&rt, &fl, sk, 0))
+				goto no_route;
+		}
+		__sk_dst_set(sk, &rt->u.dst);
+		tcp_v4_setup_caps(sk, &rt->u.dst);
+	}
+	skb->dst = dst_clone(&rt->u.dst);
+
+packet_routed:
+	if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
+		goto no_route;
+
+	/* OK, we know where to send it, allocate and build IP header. */
+	iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
+	*((__u16 *)iph)	= htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
+	iph->tot_len = htons(skb->len);
+	if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok)
+		iph->frag_off = htons(IP_DF);
+	else
+		iph->frag_off = 0;
+	iph->ttl      = ip_select_ttl(inet, &rt->u.dst);
+	iph->protocol = sk->sk_protocol;
+	iph->saddr    = rt->rt_src;
+	iph->daddr    = rt->rt_dst;
+	skb->nh.iph   = iph;
+	/* Transport layer set skb->h.foo itself. */
+
+	if (opt && opt->optlen) {
+		iph->ihl += opt->optlen >> 2;
+		ip_options_build(skb, opt, inet->daddr, rt, 0);
+	}
+
+	ip_select_ident_more(iph, &rt->u.dst, sk, skb_shinfo(skb)->tso_segs);
+
+	/* Add an IP checksum. */
+	ip_send_check(iph);
+
+	skb->priority = sk->sk_priority;
+
+	return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
+		       dst_output);
+
+no_route:
+	IP_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
+	kfree_skb(skb);
+	return -EHOSTUNREACH;
+}
+
+
+static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
+{
+	to->pkt_type = from->pkt_type;
+	to->priority = from->priority;
+	to->protocol = from->protocol;
+	to->security = from->security;
+	dst_release(to->dst);
+	to->dst = dst_clone(from->dst);
+	to->dev = from->dev;
+
+	/* Copy the flags to each fragment. */
+	IPCB(to)->flags = IPCB(from)->flags;
+
+#ifdef CONFIG_NET_SCHED
+	to->tc_index = from->tc_index;
+#endif
+#ifdef CONFIG_NETFILTER
+	to->nfmark = from->nfmark;
+	to->nfcache = from->nfcache;
+	/* Connection association is same as pre-frag packet */
+	nf_conntrack_put(to->nfct);
+	to->nfct = from->nfct;
+	nf_conntrack_get(to->nfct);
+	to->nfctinfo = from->nfctinfo;
+#ifdef CONFIG_BRIDGE_NETFILTER
+	nf_bridge_put(to->nf_bridge);
+	to->nf_bridge = from->nf_bridge;
+	nf_bridge_get(to->nf_bridge);
+#endif
+#ifdef CONFIG_NETFILTER_DEBUG
+	to->nf_debug = from->nf_debug;
+#endif
+#endif
+}
+
+/*
+ *	This IP datagram is too large to be sent in one piece.  Break it up into
+ *	smaller pieces (each of size equal to IP header plus
+ *	a block of the data of the original IP data part) that will yet fit in a
+ *	single device frame, and queue such a frame for sending.
+ */
+
+int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
+{
+	struct iphdr *iph;
+	int raw = 0;
+	int ptr;
+	struct net_device *dev;
+	struct sk_buff *skb2;
+	unsigned int mtu, hlen, left, len, ll_rs;
+	int offset;
+	int not_last_frag;
+	struct rtable *rt = (struct rtable*)skb->dst;
+	int err = 0;
+
+	dev = rt->u.dst.dev;
+
+	/*
+	 *	Point into the IP datagram header.
+	 */
+
+	iph = skb->nh.iph;
+
+	if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
+		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+			  htonl(dst_mtu(&rt->u.dst)));
+		kfree_skb(skb);
+		return -EMSGSIZE;
+	}
+
+	/*
+	 *	Setup starting values.
+	 */
+
+	hlen = iph->ihl * 4;
+	mtu = dst_mtu(&rt->u.dst) - hlen;	/* Size of data space */
+
+	/* When frag_list is given, use it. First, check its validity:
+	 * some transformers could create wrong frag_list or break existing
+	 * one, it is not prohibited. In this case fall back to copying.
+	 *
+	 * LATER: this step can be merged to real generation of fragments,
+	 * we can switch to copy when see the first bad fragment.
+	 */
+	if (skb_shinfo(skb)->frag_list) {
+		struct sk_buff *frag;
+		int first_len = skb_pagelen(skb);
+
+		if (first_len - hlen > mtu ||
+		    ((first_len - hlen) & 7) ||
+		    (iph->frag_off & htons(IP_MF|IP_OFFSET)) ||
+		    skb_cloned(skb))
+			goto slow_path;
+
+		for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
+			/* Correct geometry. */
+			if (frag->len > mtu ||
+			    ((frag->len & 7) && frag->next) ||
+			    skb_headroom(frag) < hlen)
+			    goto slow_path;
+
+			/* Partially cloned skb? */
+			if (skb_shared(frag))
+				goto slow_path;
+		}
+
+		/* Everything is OK. Generate! */
+
+		err = 0;
+		offset = 0;
+		frag = skb_shinfo(skb)->frag_list;
+		skb_shinfo(skb)->frag_list = NULL;
+		skb->data_len = first_len - skb_headlen(skb);
+		skb->len = first_len;
+		iph->tot_len = htons(first_len);
+		iph->frag_off = htons(IP_MF);
+		ip_send_check(iph);
+
+		for (;;) {
+			/* Prepare header of the next frame,
+			 * before previous one went down. */
+			if (frag) {
+				frag->ip_summed = CHECKSUM_NONE;
+				frag->h.raw = frag->data;
+				frag->nh.raw = __skb_push(frag, hlen);
+				memcpy(frag->nh.raw, iph, hlen);
+				iph = frag->nh.iph;
+				iph->tot_len = htons(frag->len);
+				ip_copy_metadata(frag, skb);
+				if (offset == 0)
+					ip_options_fragment(frag);
+				offset += skb->len - hlen;
+				iph->frag_off = htons(offset>>3);
+				if (frag->next != NULL)
+					iph->frag_off |= htons(IP_MF);
+				/* Ready, complete checksum */
+				ip_send_check(iph);
+			}
+
+			err = output(skb);
+
+			if (err || !frag)
+				break;
+
+			skb = frag;
+			frag = skb->next;
+			skb->next = NULL;
+		}
+
+		if (err == 0) {
+			IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
+			return 0;
+		}
+
+		while (frag) {
+			skb = frag->next;
+			kfree_skb(frag);
+			frag = skb;
+		}
+		IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+		return err;
+	}
+
+slow_path:
+	left = skb->len - hlen;		/* Space per frame */
+	ptr = raw + hlen;		/* Where to start from */
+
+#ifdef CONFIG_BRIDGE_NETFILTER
+	/* for bridged IP traffic encapsulated inside f.e. a vlan header,
+	 * we need to make room for the encapsulating header */
+	ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, nf_bridge_pad(skb));
+	mtu -= nf_bridge_pad(skb);
+#else
+	ll_rs = LL_RESERVED_SPACE(rt->u.dst.dev);
+#endif
+	/*
+	 *	Fragment the datagram.
+	 */
+
+	offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
+	not_last_frag = iph->frag_off & htons(IP_MF);
+
+	/*
+	 *	Keep copying data until we run out.
+	 */
+
+	while(left > 0)	{
+		len = left;
+		/* IF: it doesn't fit, use 'mtu' - the data space left */
+		if (len > mtu)
+			len = mtu;
+		/* IF: we are not sending upto and including the packet end
+		   then align the next start on an eight byte boundary */
+		if (len < left)	{
+			len &= ~7;
+		}
+		/*
+		 *	Allocate buffer.
+		 */
+
+		if ((skb2 = alloc_skb(len+hlen+ll_rs, GFP_ATOMIC)) == NULL) {
+			NETDEBUG(printk(KERN_INFO "IP: frag: no memory for new fragment!\n"));
+			err = -ENOMEM;
+			goto fail;
+		}
+
+		/*
+		 *	Set up data on packet
+		 */
+
+		ip_copy_metadata(skb2, skb);
+		skb_reserve(skb2, ll_rs);
+		skb_put(skb2, len + hlen);
+		skb2->nh.raw = skb2->data;
+		skb2->h.raw = skb2->data + hlen;
+
+		/*
+		 *	Charge the memory for the fragment to any owner
+		 *	it might possess
+		 */
+
+		if (skb->sk)
+			skb_set_owner_w(skb2, skb->sk);
+
+		/*
+		 *	Copy the packet header into the new buffer.
+		 */
+
+		memcpy(skb2->nh.raw, skb->data, hlen);
+
+		/*
+		 *	Copy a block of the IP datagram.
+		 */
+		if (skb_copy_bits(skb, ptr, skb2->h.raw, len))
+			BUG();
+		left -= len;
+
+		/*
+		 *	Fill in the new header fields.
+		 */
+		iph = skb2->nh.iph;
+		iph->frag_off = htons((offset >> 3));
+
+		/* ANK: dirty, but effective trick. Upgrade options only if
+		 * the segment to be fragmented was THE FIRST (otherwise,
+		 * options are already fixed) and make it ONCE
+		 * on the initial skb, so that all the following fragments
+		 * will inherit fixed options.
+		 */
+		if (offset == 0)
+			ip_options_fragment(skb);
+
+		/*
+		 *	Added AC : If we are fragmenting a fragment that's not the
+		 *		   last fragment then keep MF on each bit
+		 */
+		if (left > 0 || not_last_frag)
+			iph->frag_off |= htons(IP_MF);
+		ptr += len;
+		offset += len;
+
+		/*
+		 *	Put this fragment into the sending queue.
+		 */
+
+		IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
+
+		iph->tot_len = htons(len + hlen);
+
+		ip_send_check(iph);
+
+		err = output(skb2);
+		if (err)
+			goto fail;
+	}
+	kfree_skb(skb);
+	IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
+	return err;
+
+fail:
+	kfree_skb(skb); 
+	IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+	return err;
+}
+
+int
+ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
+{
+	struct iovec *iov = from;
+
+	if (skb->ip_summed == CHECKSUM_HW) {
+		if (memcpy_fromiovecend(to, iov, offset, len) < 0)
+			return -EFAULT;
+	} else {
+		unsigned int csum = 0;
+		if (csum_partial_copy_fromiovecend(to, iov, offset, len, &csum) < 0)
+			return -EFAULT;
+		skb->csum = csum_block_add(skb->csum, csum, odd);
+	}
+	return 0;
+}
+
+static inline unsigned int
+csum_page(struct page *page, int offset, int copy)
+{
+	char *kaddr;
+	unsigned int csum;
+	kaddr = kmap(page);
+	csum = csum_partial(kaddr + offset, copy, 0);
+	kunmap(page);
+	return csum;
+}
+
+/*
+ *	ip_append_data() and ip_append_page() can make one large IP datagram
+ *	from many pieces of data. Each pieces will be holded on the socket
+ *	until ip_push_pending_frames() is called. Each piece can be a page
+ *	or non-page data.
+ *	
+ *	Not only UDP, other transport protocols - e.g. raw sockets - can use
+ *	this interface potentially.
+ *
+ *	LATER: length must be adjusted by pad at tail, when it is required.
+ */
+int ip_append_data(struct sock *sk,
+		   int getfrag(void *from, char *to, int offset, int len,
+			       int odd, struct sk_buff *skb),
+		   void *from, int length, int transhdrlen,
+		   struct ipcm_cookie *ipc, struct rtable *rt,
+		   unsigned int flags)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct sk_buff *skb;
+
+	struct ip_options *opt = NULL;
+	int hh_len;
+	int exthdrlen;
+	int mtu;
+	int copy;
+	int err;
+	int offset = 0;
+	unsigned int maxfraglen, fragheaderlen;
+	int csummode = CHECKSUM_NONE;
+
+	if (flags&MSG_PROBE)
+		return 0;
+
+	if (skb_queue_empty(&sk->sk_write_queue)) {
+		/*
+		 * setup for corking.
+		 */
+		opt = ipc->opt;
+		if (opt) {
+			if (inet->cork.opt == NULL) {
+				inet->cork.opt = kmalloc(sizeof(struct ip_options) + 40, sk->sk_allocation);
+				if (unlikely(inet->cork.opt == NULL))
+					return -ENOBUFS;
+			}
+			memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen);
+			inet->cork.flags |= IPCORK_OPT;
+			inet->cork.addr = ipc->addr;
+		}
+		dst_hold(&rt->u.dst);
+		inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path);
+		inet->cork.rt = rt;
+		inet->cork.length = 0;
+		sk->sk_sndmsg_page = NULL;
+		sk->sk_sndmsg_off = 0;
+		if ((exthdrlen = rt->u.dst.header_len) != 0) {
+			length += exthdrlen;
+			transhdrlen += exthdrlen;
+		}
+	} else {
+		rt = inet->cork.rt;
+		if (inet->cork.flags & IPCORK_OPT)
+			opt = inet->cork.opt;
+
+		transhdrlen = 0;
+		exthdrlen = 0;
+		mtu = inet->cork.fragsize;
+	}
+	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
+
+	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
+	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
+
+	if (inet->cork.length + length > 0xFFFF - fragheaderlen) {
+		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu-exthdrlen);
+		return -EMSGSIZE;
+	}
+
+	/*
+	 * transhdrlen > 0 means that this is the first fragment and we wish
+	 * it won't be fragmented in the future.
+	 */
+	if (transhdrlen &&
+	    length + fragheaderlen <= mtu &&
+	    rt->u.dst.dev->features&(NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM) &&
+	    !exthdrlen)
+		csummode = CHECKSUM_HW;
+
+	inet->cork.length += length;
+
+	/* So, what's going on in the loop below?
+	 *
+	 * We use calculated fragment length to generate chained skb,
+	 * each of segments is IP fragment ready for sending to network after
+	 * adding appropriate IP header.
+	 */
+
+	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
+		goto alloc_new_skb;
+
+	while (length > 0) {
+		/* Check if the remaining data fits into current packet. */
+		copy = mtu - skb->len;
+		if (copy < length)
+			copy = maxfraglen - skb->len;
+		if (copy <= 0) {
+			char *data;
+			unsigned int datalen;
+			unsigned int fraglen;
+			unsigned int fraggap;
+			unsigned int alloclen;
+			struct sk_buff *skb_prev;
+alloc_new_skb:
+			skb_prev = skb;
+			if (skb_prev)
+				fraggap = skb_prev->len - maxfraglen;
+			else
+				fraggap = 0;
+
+			/*
+			 * If remaining data exceeds the mtu,
+			 * we know we need more fragment(s).
+			 */
+			datalen = length + fraggap;
+			if (datalen > mtu - fragheaderlen)
+				datalen = maxfraglen - fragheaderlen;
+			fraglen = datalen + fragheaderlen;
+
+			if ((flags & MSG_MORE) && 
+			    !(rt->u.dst.dev->features&NETIF_F_SG))
+				alloclen = mtu;
+			else
+				alloclen = datalen + fragheaderlen;
+
+			/* The last fragment gets additional space at tail.
+			 * Note, with MSG_MORE we overallocate on fragments,
+			 * because we have no idea what fragment will be
+			 * the last.
+			 */
+			if (datalen == length)
+				alloclen += rt->u.dst.trailer_len;
+
+			if (transhdrlen) {
+				skb = sock_alloc_send_skb(sk, 
+						alloclen + hh_len + 15,
+						(flags & MSG_DONTWAIT), &err);
+			} else {
+				skb = NULL;
+				if (atomic_read(&sk->sk_wmem_alloc) <=
+				    2 * sk->sk_sndbuf)
+					skb = sock_wmalloc(sk, 
+							   alloclen + hh_len + 15, 1,
+							   sk->sk_allocation);
+				if (unlikely(skb == NULL))
+					err = -ENOBUFS;
+			}
+			if (skb == NULL)
+				goto error;
+
+			/*
+			 *	Fill in the control structures
+			 */
+			skb->ip_summed = csummode;
+			skb->csum = 0;
+			skb_reserve(skb, hh_len);
+
+			/*
+			 *	Find where to start putting bytes.
+			 */
+			data = skb_put(skb, fraglen);
+			skb->nh.raw = data + exthdrlen;
+			data += fragheaderlen;
+			skb->h.raw = data + exthdrlen;
+
+			if (fraggap) {
+				skb->csum = skb_copy_and_csum_bits(
+					skb_prev, maxfraglen,
+					data + transhdrlen, fraggap, 0);
+				skb_prev->csum = csum_sub(skb_prev->csum,
+							  skb->csum);
+				data += fraggap;
+				skb_trim(skb_prev, maxfraglen);
+			}
+
+			copy = datalen - transhdrlen - fraggap;
+			if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
+				err = -EFAULT;
+				kfree_skb(skb);
+				goto error;
+			}
+
+			offset += copy;
+			length -= datalen - fraggap;
+			transhdrlen = 0;
+			exthdrlen = 0;
+			csummode = CHECKSUM_NONE;
+
+			/*
+			 * Put the packet on the pending queue.
+			 */
+			__skb_queue_tail(&sk->sk_write_queue, skb);
+			continue;
+		}
+
+		if (copy > length)
+			copy = length;
+
+		if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
+			unsigned int off;
+
+			off = skb->len;
+			if (getfrag(from, skb_put(skb, copy), 
+					offset, copy, off, skb) < 0) {
+				__skb_trim(skb, off);
+				err = -EFAULT;
+				goto error;
+			}
+		} else {
+			int i = skb_shinfo(skb)->nr_frags;
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
+			struct page *page = sk->sk_sndmsg_page;
+			int off = sk->sk_sndmsg_off;
+			unsigned int left;
+
+			if (page && (left = PAGE_SIZE - off) > 0) {
+				if (copy >= left)
+					copy = left;
+				if (page != frag->page) {
+					if (i == MAX_SKB_FRAGS) {
+						err = -EMSGSIZE;
+						goto error;
+					}
+					get_page(page);
+	 				skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
+					frag = &skb_shinfo(skb)->frags[i];
+				}
+			} else if (i < MAX_SKB_FRAGS) {
+				if (copy > PAGE_SIZE)
+					copy = PAGE_SIZE;
+				page = alloc_pages(sk->sk_allocation, 0);
+				if (page == NULL)  {
+					err = -ENOMEM;
+					goto error;
+				}
+				sk->sk_sndmsg_page = page;
+				sk->sk_sndmsg_off = 0;
+
+				skb_fill_page_desc(skb, i, page, 0, 0);
+				frag = &skb_shinfo(skb)->frags[i];
+				skb->truesize += PAGE_SIZE;
+				atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
+			} else {
+				err = -EMSGSIZE;
+				goto error;
+			}
+			if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
+				err = -EFAULT;
+				goto error;
+			}
+			sk->sk_sndmsg_off += copy;
+			frag->size += copy;
+			skb->len += copy;
+			skb->data_len += copy;
+		}
+		offset += copy;
+		length -= copy;
+	}
+
+	return 0;
+
+error:
+	inet->cork.length -= length;
+	IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+	return err; 
+}
+
+ssize_t	ip_append_page(struct sock *sk, struct page *page,
+		       int offset, size_t size, int flags)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct sk_buff *skb;
+	struct rtable *rt;
+	struct ip_options *opt = NULL;
+	int hh_len;
+	int mtu;
+	int len;
+	int err;
+	unsigned int maxfraglen, fragheaderlen, fraggap;
+
+	if (inet->hdrincl)
+		return -EPERM;
+
+	if (flags&MSG_PROBE)
+		return 0;
+
+	if (skb_queue_empty(&sk->sk_write_queue))
+		return -EINVAL;
+
+	rt = inet->cork.rt;
+	if (inet->cork.flags & IPCORK_OPT)
+		opt = inet->cork.opt;
+
+	if (!(rt->u.dst.dev->features&NETIF_F_SG))
+		return -EOPNOTSUPP;
+
+	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
+	mtu = inet->cork.fragsize;
+
+	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
+	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
+
+	if (inet->cork.length + size > 0xFFFF - fragheaderlen) {
+		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu);
+		return -EMSGSIZE;
+	}
+
+	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
+		return -EINVAL;
+
+	inet->cork.length += size;
+
+	while (size > 0) {
+		int i;
+
+		/* Check if the remaining data fits into current packet. */
+		len = mtu - skb->len;
+		if (len < size)
+			len = maxfraglen - skb->len;
+		if (len <= 0) {
+			struct sk_buff *skb_prev;
+			char *data;
+			struct iphdr *iph;
+			int alloclen;
+
+			skb_prev = skb;
+			if (skb_prev)
+				fraggap = skb_prev->len - maxfraglen;
+			else
+				fraggap = 0;
+
+			alloclen = fragheaderlen + hh_len + fraggap + 15;
+			skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation);
+			if (unlikely(!skb)) {
+				err = -ENOBUFS;
+				goto error;
+			}
+
+			/*
+			 *	Fill in the control structures
+			 */
+			skb->ip_summed = CHECKSUM_NONE;
+			skb->csum = 0;
+			skb_reserve(skb, hh_len);
+
+			/*
+			 *	Find where to start putting bytes.
+			 */
+			data = skb_put(skb, fragheaderlen + fraggap);
+			skb->nh.iph = iph = (struct iphdr *)data;
+			data += fragheaderlen;
+			skb->h.raw = data;
+
+			if (fraggap) {
+				skb->csum = skb_copy_and_csum_bits(
+					skb_prev, maxfraglen,
+					data, fraggap, 0);
+				skb_prev->csum = csum_sub(skb_prev->csum,
+							  skb->csum);
+				skb_trim(skb_prev, maxfraglen);
+			}
+
+			/*
+			 * Put the packet on the pending queue.
+			 */
+			__skb_queue_tail(&sk->sk_write_queue, skb);
+			continue;
+		}
+
+		i = skb_shinfo(skb)->nr_frags;
+		if (len > size)
+			len = size;
+		if (skb_can_coalesce(skb, i, page, offset)) {
+			skb_shinfo(skb)->frags[i-1].size += len;
+		} else if (i < MAX_SKB_FRAGS) {
+			get_page(page);
+			skb_fill_page_desc(skb, i, page, offset, len);
+		} else {
+			err = -EMSGSIZE;
+			goto error;
+		}
+
+		if (skb->ip_summed == CHECKSUM_NONE) {
+			unsigned int csum;
+			csum = csum_page(page, offset, len);
+			skb->csum = csum_block_add(skb->csum, csum, skb->len);
+		}
+
+		skb->len += len;
+		skb->data_len += len;
+		offset += len;
+		size -= len;
+	}
+	return 0;
+
+error:
+	inet->cork.length -= size;
+	IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+	return err;
+}
+
+/*
+ *	Combined all pending IP fragments on the socket as one IP datagram
+ *	and push them out.
+ */
+int ip_push_pending_frames(struct sock *sk)
+{
+	struct sk_buff *skb, *tmp_skb;
+	struct sk_buff **tail_skb;
+	struct inet_sock *inet = inet_sk(sk);
+	struct ip_options *opt = NULL;
+	struct rtable *rt = inet->cork.rt;
+	struct iphdr *iph;
+	int df = 0;
+	__u8 ttl;
+	int err = 0;
+
+	if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
+		goto out;
+	tail_skb = &(skb_shinfo(skb)->frag_list);
+
+	/* move skb->data to ip header from ext header */
+	if (skb->data < skb->nh.raw)
+		__skb_pull(skb, skb->nh.raw - skb->data);
+	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
+		__skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
+		*tail_skb = tmp_skb;
+		tail_skb = &(tmp_skb->next);
+		skb->len += tmp_skb->len;
+		skb->data_len += tmp_skb->len;
+		skb->truesize += tmp_skb->truesize;
+		__sock_put(tmp_skb->sk);
+		tmp_skb->destructor = NULL;
+		tmp_skb->sk = NULL;
+	}
+
+	/* Unless user demanded real pmtu discovery (IP_PMTUDISC_DO), we allow
+	 * to fragment the frame generated here. No matter, what transforms
+	 * how transforms change size of the packet, it will come out.
+	 */
+	if (inet->pmtudisc != IP_PMTUDISC_DO)
+		skb->local_df = 1;
+
+	/* DF bit is set when we want to see DF on outgoing frames.
+	 * If local_df is set too, we still allow to fragment this frame
+	 * locally. */
+	if (inet->pmtudisc == IP_PMTUDISC_DO ||
+	    (skb->len <= dst_mtu(&rt->u.dst) &&
+	     ip_dont_fragment(sk, &rt->u.dst)))
+		df = htons(IP_DF);
+
+	if (inet->cork.flags & IPCORK_OPT)
+		opt = inet->cork.opt;
+
+	if (rt->rt_type == RTN_MULTICAST)
+		ttl = inet->mc_ttl;
+	else
+		ttl = ip_select_ttl(inet, &rt->u.dst);
+
+	iph = (struct iphdr *)skb->data;
+	iph->version = 4;
+	iph->ihl = 5;
+	if (opt) {
+		iph->ihl += opt->optlen>>2;
+		ip_options_build(skb, opt, inet->cork.addr, rt, 0);
+	}
+	iph->tos = inet->tos;
+	iph->tot_len = htons(skb->len);
+	iph->frag_off = df;
+	if (!df) {
+		__ip_select_ident(iph, &rt->u.dst, 0);
+	} else {
+		iph->id = htons(inet->id++);
+	}
+	iph->ttl = ttl;
+	iph->protocol = sk->sk_protocol;
+	iph->saddr = rt->rt_src;
+	iph->daddr = rt->rt_dst;
+	ip_send_check(iph);
+
+	skb->priority = sk->sk_priority;
+	skb->dst = dst_clone(&rt->u.dst);
+
+	/* Netfilter gets whole the not fragmented skb. */
+	err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, 
+		      skb->dst->dev, dst_output);
+	if (err) {
+		if (err > 0)
+			err = inet->recverr ? net_xmit_errno(err) : 0;
+		if (err)
+			goto error;
+	}
+
+out:
+	inet->cork.flags &= ~IPCORK_OPT;
+	if (inet->cork.opt) {
+		kfree(inet->cork.opt);
+		inet->cork.opt = NULL;
+	}
+	if (inet->cork.rt) {
+		ip_rt_put(inet->cork.rt);
+		inet->cork.rt = NULL;
+	}
+	return err;
+
+error:
+	IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+	goto out;
+}
+
+/*
+ *	Throw away all pending data on the socket.
+ */
+void ip_flush_pending_frames(struct sock *sk)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct sk_buff *skb;
+
+	while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
+		kfree_skb(skb);
+
+	inet->cork.flags &= ~IPCORK_OPT;
+	if (inet->cork.opt) {
+		kfree(inet->cork.opt);
+		inet->cork.opt = NULL;
+	}
+	if (inet->cork.rt) {
+		ip_rt_put(inet->cork.rt);
+		inet->cork.rt = NULL;
+	}
+}
+
+
+/*
+ *	Fetch data from kernel space and fill in checksum if needed.
+ */
+static int ip_reply_glue_bits(void *dptr, char *to, int offset, 
+			      int len, int odd, struct sk_buff *skb)
+{
+	unsigned int csum;
+
+	csum = csum_partial_copy_nocheck(dptr+offset, to, len, 0);
+	skb->csum = csum_block_add(skb->csum, csum, odd);
+	return 0;  
+}
+
+/* 
+ *	Generic function to send a packet as reply to another packet.
+ *	Used to send TCP resets so far. ICMP should use this function too.
+ *
+ *	Should run single threaded per socket because it uses the sock 
+ *     	structure to pass arguments.
+ *
+ *	LATER: switch from ip_build_xmit to ip_append_*
+ */
+void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
+		   unsigned int len)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct {
+		struct ip_options	opt;
+		char			data[40];
+	} replyopts;
+	struct ipcm_cookie ipc;
+	u32 daddr;
+	struct rtable *rt = (struct rtable*)skb->dst;
+
+	if (ip_options_echo(&replyopts.opt, skb))
+		return;
+
+	daddr = ipc.addr = rt->rt_src;
+	ipc.opt = NULL;
+
+	if (replyopts.opt.optlen) {
+		ipc.opt = &replyopts.opt;
+
+		if (ipc.opt->srr)
+			daddr = replyopts.opt.faddr;
+	}
+
+	{
+		struct flowi fl = { .nl_u = { .ip4_u =
+					      { .daddr = daddr,
+						.saddr = rt->rt_spec_dst,
+						.tos = RT_TOS(skb->nh.iph->tos) } },
+				    /* Not quite clean, but right. */
+				    .uli_u = { .ports =
+					       { .sport = skb->h.th->dest,
+					         .dport = skb->h.th->source } },
+				    .proto = sk->sk_protocol };
+		if (ip_route_output_key(&rt, &fl))
+			return;
+	}
+
+	/* And let IP do all the hard work.
+
+	   This chunk is not reenterable, hence spinlock.
+	   Note that it uses the fact, that this function is called
+	   with locally disabled BH and that sk cannot be already spinlocked.
+	 */
+	bh_lock_sock(sk);
+	inet->tos = skb->nh.iph->tos;
+	sk->sk_priority = skb->priority;
+	sk->sk_protocol = skb->nh.iph->protocol;
+	ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
+		       &ipc, rt, MSG_DONTWAIT);
+	if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
+		if (arg->csumoffset >= 0)
+			*((u16 *)skb->h.raw + arg->csumoffset) = csum_fold(csum_add(skb->csum, arg->csum));
+		skb->ip_summed = CHECKSUM_NONE;
+		ip_push_pending_frames(sk);
+	}
+
+	bh_unlock_sock(sk);
+
+	ip_rt_put(rt);
+}
+
+/*
+ *	IP protocol layer initialiser
+ */
+
+static struct packet_type ip_packet_type = {
+	.type = __constant_htons(ETH_P_IP),
+	.func = ip_rcv,
+};
+
+/*
+ *	IP registers the packet type and then calls the subprotocol initialisers
+ */
+
+void __init ip_init(void)
+{
+	dev_add_pack(&ip_packet_type);
+
+	ip_rt_init();
+	inet_initpeers();
+
+#if defined(CONFIG_IP_MULTICAST) && defined(CONFIG_PROC_FS)
+	igmp_mc_proc_init();
+#endif
+}
+
+EXPORT_SYMBOL(ip_finish_output);
+EXPORT_SYMBOL(ip_fragment);
+EXPORT_SYMBOL(ip_generic_getfrag);
+EXPORT_SYMBOL(ip_queue_xmit);
+EXPORT_SYMBOL(ip_send_check);
+
+#ifdef CONFIG_SYSCTL
+EXPORT_SYMBOL(sysctl_ip_default_ttl);
+#endif
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
new file mode 100644
index 00000000000..47012b93cad
--- /dev/null
+++ b/net/ipv4/ip_sockglue.c
@@ -0,0 +1,1093 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		The IP to API glue.
+ *		
+ * Version:	$Id: ip_sockglue.c,v 1.62 2002/02/01 22:01:04 davem Exp $
+ *
+ * Authors:	see ip.c
+ *
+ * Fixes:
+ *		Many		:	Split from ip.c , see ip.c for history.
+ *		Martin Mares	:	TOS setting fixed.
+ *		Alan Cox	:	Fixed a couple of oopses in Martin's 
+ *					TOS tweaks.
+ *		Mike McLagan	:	Routing by source
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/icmp.h>
+#include <linux/netdevice.h>
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/tcp.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/igmp.h>
+#include <linux/netfilter.h>
+#include <linux/route.h>
+#include <linux/mroute.h>
+#include <net/route.h>
+#include <net/xfrm.h>
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#include <net/transp_v6.h>
+#endif
+
+#include <linux/errqueue.h>
+#include <asm/uaccess.h>
+
+#define IP_CMSG_PKTINFO		1
+#define IP_CMSG_TTL		2
+#define IP_CMSG_TOS		4
+#define IP_CMSG_RECVOPTS	8
+#define IP_CMSG_RETOPTS		16
+
+/*
+ *	SOL_IP control messages.
+ */
+
+static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
+{
+	struct in_pktinfo info;
+	struct rtable *rt = (struct rtable *)skb->dst;
+
+	info.ipi_addr.s_addr = skb->nh.iph->daddr;
+	if (rt) {
+		info.ipi_ifindex = rt->rt_iif;
+		info.ipi_spec_dst.s_addr = rt->rt_spec_dst;
+	} else {
+		info.ipi_ifindex = 0;
+		info.ipi_spec_dst.s_addr = 0;
+	}
+
+	put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
+}
+
+static void ip_cmsg_recv_ttl(struct msghdr *msg, struct sk_buff *skb)
+{
+	int ttl = skb->nh.iph->ttl;
+	put_cmsg(msg, SOL_IP, IP_TTL, sizeof(int), &ttl);
+}
+
+static void ip_cmsg_recv_tos(struct msghdr *msg, struct sk_buff *skb)
+{
+	put_cmsg(msg, SOL_IP, IP_TOS, 1, &skb->nh.iph->tos);
+}
+
+static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb)
+{
+	if (IPCB(skb)->opt.optlen == 0)
+		return;
+
+	put_cmsg(msg, SOL_IP, IP_RECVOPTS, IPCB(skb)->opt.optlen, skb->nh.iph+1);
+}
+
+
+static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb)
+{
+	unsigned char optbuf[sizeof(struct ip_options) + 40];
+	struct ip_options * opt = (struct ip_options*)optbuf;
+
+	if (IPCB(skb)->opt.optlen == 0)
+		return;
+
+	if (ip_options_echo(opt, skb)) {
+		msg->msg_flags |= MSG_CTRUNC;
+		return;
+	}
+	ip_options_undo(opt);
+
+	put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data);
+}
+
+
+void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
+{
+	struct inet_sock *inet = inet_sk(skb->sk);
+	unsigned flags = inet->cmsg_flags;
+
+	/* Ordered by supposed usage frequency */
+	if (flags & 1)
+		ip_cmsg_recv_pktinfo(msg, skb);
+	if ((flags>>=1) == 0)
+		return;
+
+	if (flags & 1)
+		ip_cmsg_recv_ttl(msg, skb);
+	if ((flags>>=1) == 0)
+		return;
+
+	if (flags & 1)
+		ip_cmsg_recv_tos(msg, skb);
+	if ((flags>>=1) == 0)
+		return;
+
+	if (flags & 1)
+		ip_cmsg_recv_opts(msg, skb);
+	if ((flags>>=1) == 0)
+		return;
+
+	if (flags & 1)
+		ip_cmsg_recv_retopts(msg, skb);
+}
+
+int ip_cmsg_send(struct msghdr *msg, struct ipcm_cookie *ipc)
+{
+	int err;
+	struct cmsghdr *cmsg;
+
+	for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
+		if (!CMSG_OK(msg, cmsg))
+			return -EINVAL;
+		if (cmsg->cmsg_level != SOL_IP)
+			continue;
+		switch (cmsg->cmsg_type) {
+		case IP_RETOPTS:
+			err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
+			err = ip_options_get(&ipc->opt, CMSG_DATA(cmsg), err < 40 ? err : 40, 0);
+			if (err)
+				return err;
+			break;
+		case IP_PKTINFO:
+		{
+			struct in_pktinfo *info;
+			if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo)))
+				return -EINVAL;
+			info = (struct in_pktinfo *)CMSG_DATA(cmsg);
+			ipc->oif = info->ipi_ifindex;
+			ipc->addr = info->ipi_spec_dst.s_addr;
+			break;
+		}
+		default:
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+
+/* Special input handler for packets caught by router alert option.
+   They are selected only by protocol field, and then processed likely
+   local ones; but only if someone wants them! Otherwise, router
+   not running rsvpd will kill RSVP.
+
+   It is user level problem, what it will make with them.
+   I have no idea, how it will masquearde or NAT them (it is joke, joke :-)),
+   but receiver should be enough clever f.e. to forward mtrace requests,
+   sent to multicast group to reach destination designated router.
+ */
+struct ip_ra_chain *ip_ra_chain;
+DEFINE_RWLOCK(ip_ra_lock);
+
+int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct sock *))
+{
+	struct ip_ra_chain *ra, *new_ra, **rap;
+
+	if (sk->sk_type != SOCK_RAW || inet_sk(sk)->num == IPPROTO_RAW)
+		return -EINVAL;
+
+	new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
+
+	write_lock_bh(&ip_ra_lock);
+	for (rap = &ip_ra_chain; (ra=*rap) != NULL; rap = &ra->next) {
+		if (ra->sk == sk) {
+			if (on) {
+				write_unlock_bh(&ip_ra_lock);
+				if (new_ra)
+					kfree(new_ra);
+				return -EADDRINUSE;
+			}
+			*rap = ra->next;
+			write_unlock_bh(&ip_ra_lock);
+
+			if (ra->destructor)
+				ra->destructor(sk);
+			sock_put(sk);
+			kfree(ra);
+			return 0;
+		}
+	}
+	if (new_ra == NULL) {
+		write_unlock_bh(&ip_ra_lock);
+		return -ENOBUFS;
+	}
+	new_ra->sk = sk;
+	new_ra->destructor = destructor;
+
+	new_ra->next = ra;
+	*rap = new_ra;
+	sock_hold(sk);
+	write_unlock_bh(&ip_ra_lock);
+
+	return 0;
+}
+
+void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, 
+		   u16 port, u32 info, u8 *payload)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct sock_exterr_skb *serr;
+
+	if (!inet->recverr)
+		return;
+
+	skb = skb_clone(skb, GFP_ATOMIC);
+	if (!skb)
+		return;
+
+	serr = SKB_EXT_ERR(skb);  
+	serr->ee.ee_errno = err;
+	serr->ee.ee_origin = SO_EE_ORIGIN_ICMP;
+	serr->ee.ee_type = skb->h.icmph->type; 
+	serr->ee.ee_code = skb->h.icmph->code;
+	serr->ee.ee_pad = 0;
+	serr->ee.ee_info = info;
+	serr->ee.ee_data = 0;
+	serr->addr_offset = (u8*)&(((struct iphdr*)(skb->h.icmph+1))->daddr) - skb->nh.raw;
+	serr->port = port;
+
+	skb->h.raw = payload;
+	if (!skb_pull(skb, payload - skb->data) ||
+	    sock_queue_err_skb(sk, skb))
+		kfree_skb(skb);
+}
+
+void ip_local_error(struct sock *sk, int err, u32 daddr, u16 port, u32 info)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct sock_exterr_skb *serr;
+	struct iphdr *iph;
+	struct sk_buff *skb;
+
+	if (!inet->recverr)
+		return;
+
+	skb = alloc_skb(sizeof(struct iphdr), GFP_ATOMIC);
+	if (!skb)
+		return;
+
+	iph = (struct iphdr*)skb_put(skb, sizeof(struct iphdr));
+	skb->nh.iph = iph;
+	iph->daddr = daddr;
+
+	serr = SKB_EXT_ERR(skb);  
+	serr->ee.ee_errno = err;
+	serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
+	serr->ee.ee_type = 0; 
+	serr->ee.ee_code = 0;
+	serr->ee.ee_pad = 0;
+	serr->ee.ee_info = info;
+	serr->ee.ee_data = 0;
+	serr->addr_offset = (u8*)&iph->daddr - skb->nh.raw;
+	serr->port = port;
+
+	skb->h.raw = skb->tail;
+	__skb_pull(skb, skb->tail - skb->data);
+
+	if (sock_queue_err_skb(sk, skb))
+		kfree_skb(skb);
+}
+
+/* 
+ *	Handle MSG_ERRQUEUE
+ */
+int ip_recv_error(struct sock *sk, struct msghdr *msg, int len)
+{
+	struct sock_exterr_skb *serr;
+	struct sk_buff *skb, *skb2;
+	struct sockaddr_in *sin;
+	struct {
+		struct sock_extended_err ee;
+		struct sockaddr_in	 offender;
+	} errhdr;
+	int err;
+	int copied;
+
+	err = -EAGAIN;
+	skb = skb_dequeue(&sk->sk_error_queue);
+	if (skb == NULL)
+		goto out;
+
+	copied = skb->len;
+	if (copied > len) {
+		msg->msg_flags |= MSG_TRUNC;
+		copied = len;
+	}
+	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	if (err)
+		goto out_free_skb;
+
+	sock_recv_timestamp(msg, sk, skb);
+
+	serr = SKB_EXT_ERR(skb);
+
+	sin = (struct sockaddr_in *)msg->msg_name;
+	if (sin) {
+		sin->sin_family = AF_INET;
+		sin->sin_addr.s_addr = *(u32*)(skb->nh.raw + serr->addr_offset);
+		sin->sin_port = serr->port;
+		memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
+	}
+
+	memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
+	sin = &errhdr.offender;
+	sin->sin_family = AF_UNSPEC;
+	if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP) {
+		struct inet_sock *inet = inet_sk(sk);
+
+		sin->sin_family = AF_INET;
+		sin->sin_addr.s_addr = skb->nh.iph->saddr;
+		sin->sin_port = 0;
+		memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
+		if (inet->cmsg_flags)
+			ip_cmsg_recv(msg, skb);
+	}
+
+	put_cmsg(msg, SOL_IP, IP_RECVERR, sizeof(errhdr), &errhdr);
+
+	/* Now we could try to dump offended packet options */
+
+	msg->msg_flags |= MSG_ERRQUEUE;
+	err = copied;
+
+	/* Reset and regenerate socket error */
+	spin_lock_irq(&sk->sk_error_queue.lock);
+	sk->sk_err = 0;
+	if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
+		sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
+		spin_unlock_irq(&sk->sk_error_queue.lock);
+		sk->sk_error_report(sk);
+	} else
+		spin_unlock_irq(&sk->sk_error_queue.lock);
+
+out_free_skb:	
+	kfree_skb(skb);
+out:
+	return err;
+}
+
+
+/*
+ *	Socket option code for IP. This is the end of the line after any TCP,UDP etc options on
+ *	an IP socket.
+ */
+
+int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	int val=0,err;
+
+	if (level != SOL_IP)
+		return -ENOPROTOOPT;
+
+	if (((1<<optname) & ((1<<IP_PKTINFO) | (1<<IP_RECVTTL) | 
+			    (1<<IP_RECVOPTS) | (1<<IP_RECVTOS) | 
+			    (1<<IP_RETOPTS) | (1<<IP_TOS) | 
+			    (1<<IP_TTL) | (1<<IP_HDRINCL) | 
+			    (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) | 
+			    (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND))) || 
+				optname == IP_MULTICAST_TTL || 
+				optname == IP_MULTICAST_LOOP) { 
+		if (optlen >= sizeof(int)) {
+			if (get_user(val, (int __user *) optval))
+				return -EFAULT;
+		} else if (optlen >= sizeof(char)) {
+			unsigned char ucval;
+
+			if (get_user(ucval, (unsigned char __user *) optval))
+				return -EFAULT;
+			val = (int) ucval;
+		}
+	}
+
+	/* If optlen==0, it is equivalent to val == 0 */
+
+#ifdef CONFIG_IP_MROUTE
+	if (optname >= MRT_BASE && optname <= (MRT_BASE + 10))
+		return ip_mroute_setsockopt(sk,optname,optval,optlen);
+#endif
+
+	err = 0;
+	lock_sock(sk);
+
+	switch (optname) {
+		case IP_OPTIONS:
+		{
+			struct ip_options * opt = NULL;
+			if (optlen > 40 || optlen < 0)
+				goto e_inval;
+			err = ip_options_get(&opt, optval, optlen, 1);
+			if (err)
+				break;
+			if (sk->sk_type == SOCK_STREAM) {
+				struct tcp_sock *tp = tcp_sk(sk);
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+				if (sk->sk_family == PF_INET ||
+				    (!((1 << sk->sk_state) &
+				       (TCPF_LISTEN | TCPF_CLOSE)) &&
+				     inet->daddr != LOOPBACK4_IPV6)) {
+#endif
+					if (inet->opt)
+						tp->ext_header_len -= inet->opt->optlen;
+					if (opt)
+						tp->ext_header_len += opt->optlen;
+					tcp_sync_mss(sk, tp->pmtu_cookie);
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+				}
+#endif
+			}
+			opt = xchg(&inet->opt, opt);
+			if (opt)
+				kfree(opt);
+			break;
+		}
+		case IP_PKTINFO:
+			if (val)
+				inet->cmsg_flags |= IP_CMSG_PKTINFO;
+			else
+				inet->cmsg_flags &= ~IP_CMSG_PKTINFO;
+			break;
+		case IP_RECVTTL:
+			if (val)
+				inet->cmsg_flags |=  IP_CMSG_TTL;
+			else
+				inet->cmsg_flags &= ~IP_CMSG_TTL;
+			break;
+		case IP_RECVTOS:
+			if (val)
+				inet->cmsg_flags |=  IP_CMSG_TOS;
+			else
+				inet->cmsg_flags &= ~IP_CMSG_TOS;
+			break;
+		case IP_RECVOPTS:
+			if (val)
+				inet->cmsg_flags |=  IP_CMSG_RECVOPTS;
+			else
+				inet->cmsg_flags &= ~IP_CMSG_RECVOPTS;
+			break;
+		case IP_RETOPTS:
+			if (val)
+				inet->cmsg_flags |= IP_CMSG_RETOPTS;
+			else
+				inet->cmsg_flags &= ~IP_CMSG_RETOPTS;
+			break;
+		case IP_TOS:	/* This sets both TOS and Precedence */
+			if (sk->sk_type == SOCK_STREAM) {
+				val &= ~3;
+				val |= inet->tos & 3;
+			}
+			if (IPTOS_PREC(val) >= IPTOS_PREC_CRITIC_ECP && 
+			    !capable(CAP_NET_ADMIN)) {
+				err = -EPERM;
+				break;
+			}
+			if (inet->tos != val) {
+				inet->tos = val;
+				sk->sk_priority = rt_tos2priority(val);
+				sk_dst_reset(sk); 
+			}
+			break;
+		case IP_TTL:
+			if (optlen<1)
+				goto e_inval;
+			if (val != -1 && (val < 1 || val>255))
+				goto e_inval;
+			inet->uc_ttl = val;
+			break;
+		case IP_HDRINCL:
+			if (sk->sk_type != SOCK_RAW) {
+				err = -ENOPROTOOPT;
+				break;
+			}
+			inet->hdrincl = val ? 1 : 0;
+			break;
+		case IP_MTU_DISCOVER:
+			if (val<0 || val>2)
+				goto e_inval;
+			inet->pmtudisc = val;
+			break;
+		case IP_RECVERR:
+			inet->recverr = !!val;
+			if (!val)
+				skb_queue_purge(&sk->sk_error_queue);
+			break;
+		case IP_MULTICAST_TTL:
+			if (sk->sk_type == SOCK_STREAM)
+				goto e_inval;
+			if (optlen<1)
+				goto e_inval;
+			if (val==-1)
+				val = 1;
+			if (val < 0 || val > 255)
+				goto e_inval;
+			inet->mc_ttl = val;
+	                break;
+		case IP_MULTICAST_LOOP: 
+			if (optlen<1)
+				goto e_inval;
+			inet->mc_loop = !!val;
+	                break;
+		case IP_MULTICAST_IF: 
+		{
+			struct ip_mreqn mreq;
+			struct net_device *dev = NULL;
+
+			if (sk->sk_type == SOCK_STREAM)
+				goto e_inval;
+			/*
+			 *	Check the arguments are allowable
+			 */
+
+			err = -EFAULT;
+			if (optlen >= sizeof(struct ip_mreqn)) {
+				if (copy_from_user(&mreq,optval,sizeof(mreq)))
+					break;
+			} else {
+				memset(&mreq, 0, sizeof(mreq));
+				if (optlen >= sizeof(struct in_addr) &&
+				    copy_from_user(&mreq.imr_address,optval,sizeof(struct in_addr)))
+					break;
+			}
+
+			if (!mreq.imr_ifindex) {
+				if (mreq.imr_address.s_addr == INADDR_ANY) {
+					inet->mc_index = 0;
+					inet->mc_addr  = 0;
+					err = 0;
+					break;
+				}
+				dev = ip_dev_find(mreq.imr_address.s_addr);
+				if (dev) {
+					mreq.imr_ifindex = dev->ifindex;
+					dev_put(dev);
+				}
+			} else
+				dev = __dev_get_by_index(mreq.imr_ifindex);
+
+
+			err = -EADDRNOTAVAIL;
+			if (!dev)
+				break;
+
+			err = -EINVAL;
+			if (sk->sk_bound_dev_if &&
+			    mreq.imr_ifindex != sk->sk_bound_dev_if)
+				break;
+
+			inet->mc_index = mreq.imr_ifindex;
+			inet->mc_addr  = mreq.imr_address.s_addr;
+			err = 0;
+			break;
+		}
+
+		case IP_ADD_MEMBERSHIP:
+		case IP_DROP_MEMBERSHIP: 
+		{
+			struct ip_mreqn mreq;
+
+			if (optlen < sizeof(struct ip_mreq))
+				goto e_inval;
+			err = -EFAULT;
+			if (optlen >= sizeof(struct ip_mreqn)) {
+				if(copy_from_user(&mreq,optval,sizeof(mreq)))
+					break;
+			} else {
+				memset(&mreq, 0, sizeof(mreq));
+				if (copy_from_user(&mreq,optval,sizeof(struct ip_mreq)))
+					break; 
+			}
+
+			if (optname == IP_ADD_MEMBERSHIP)
+				err = ip_mc_join_group(sk, &mreq);
+			else
+				err = ip_mc_leave_group(sk, &mreq);
+			break;
+		}
+		case IP_MSFILTER:
+		{
+			extern int sysctl_optmem_max;
+			extern int sysctl_igmp_max_msf;
+			struct ip_msfilter *msf;
+
+			if (optlen < IP_MSFILTER_SIZE(0))
+				goto e_inval;
+			if (optlen > sysctl_optmem_max) {
+				err = -ENOBUFS;
+				break;
+			}
+			msf = (struct ip_msfilter *)kmalloc(optlen, GFP_KERNEL);
+			if (msf == 0) {
+				err = -ENOBUFS;
+				break;
+			}
+			err = -EFAULT;
+			if (copy_from_user(msf, optval, optlen)) {
+				kfree(msf);
+				break;
+			}
+			/* numsrc >= (1G-4) overflow in 32 bits */
+			if (msf->imsf_numsrc >= 0x3ffffffcU ||
+			    msf->imsf_numsrc > sysctl_igmp_max_msf) {
+				kfree(msf);
+				err = -ENOBUFS;
+				break;
+			}
+			if (IP_MSFILTER_SIZE(msf->imsf_numsrc) > optlen) {
+				kfree(msf);
+				err = -EINVAL;
+				break;
+			}
+			err = ip_mc_msfilter(sk, msf, 0);
+			kfree(msf);
+			break;
+		}
+		case IP_BLOCK_SOURCE:
+		case IP_UNBLOCK_SOURCE:
+		case IP_ADD_SOURCE_MEMBERSHIP:
+		case IP_DROP_SOURCE_MEMBERSHIP:
+		{
+			struct ip_mreq_source mreqs;
+			int omode, add;
+
+			if (optlen != sizeof(struct ip_mreq_source))
+				goto e_inval;
+			if (copy_from_user(&mreqs, optval, sizeof(mreqs))) {
+				err = -EFAULT;
+				break;
+			}
+			if (optname == IP_BLOCK_SOURCE) {
+				omode = MCAST_EXCLUDE;
+				add = 1;
+			} else if (optname == IP_UNBLOCK_SOURCE) {
+				omode = MCAST_EXCLUDE;
+				add = 0;
+			} else if (optname == IP_ADD_SOURCE_MEMBERSHIP) {
+				struct ip_mreqn mreq;
+
+				mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr;
+				mreq.imr_address.s_addr = mreqs.imr_interface;
+				mreq.imr_ifindex = 0;
+				err = ip_mc_join_group(sk, &mreq);
+				if (err)
+					break;
+				omode = MCAST_INCLUDE;
+				add = 1;
+			} else /*IP_DROP_SOURCE_MEMBERSHIP */ {
+				omode = MCAST_INCLUDE;
+				add = 0;
+			}
+			err = ip_mc_source(add, omode, sk, &mreqs, 0);
+			break;
+		}
+		case MCAST_JOIN_GROUP:
+		case MCAST_LEAVE_GROUP: 
+		{
+			struct group_req greq;
+			struct sockaddr_in *psin;
+			struct ip_mreqn mreq;
+
+			if (optlen < sizeof(struct group_req))
+				goto e_inval;
+			err = -EFAULT;
+			if(copy_from_user(&greq, optval, sizeof(greq)))
+				break;
+			psin = (struct sockaddr_in *)&greq.gr_group;
+			if (psin->sin_family != AF_INET)
+				goto e_inval;
+			memset(&mreq, 0, sizeof(mreq));
+			mreq.imr_multiaddr = psin->sin_addr;
+			mreq.imr_ifindex = greq.gr_interface;
+
+			if (optname == MCAST_JOIN_GROUP)
+				err = ip_mc_join_group(sk, &mreq);
+			else
+				err = ip_mc_leave_group(sk, &mreq);
+			break;
+		}
+		case MCAST_JOIN_SOURCE_GROUP:
+		case MCAST_LEAVE_SOURCE_GROUP:
+		case MCAST_BLOCK_SOURCE:
+		case MCAST_UNBLOCK_SOURCE:
+		{
+			struct group_source_req greqs;
+			struct ip_mreq_source mreqs;
+			struct sockaddr_in *psin;
+			int omode, add;
+
+			if (optlen != sizeof(struct group_source_req))
+				goto e_inval;
+			if (copy_from_user(&greqs, optval, sizeof(greqs))) {
+				err = -EFAULT;
+				break;
+			}
+			if (greqs.gsr_group.ss_family != AF_INET ||
+			    greqs.gsr_source.ss_family != AF_INET) {
+				err = -EADDRNOTAVAIL;
+				break;
+			}
+			psin = (struct sockaddr_in *)&greqs.gsr_group;
+			mreqs.imr_multiaddr = psin->sin_addr.s_addr;
+			psin = (struct sockaddr_in *)&greqs.gsr_source;
+			mreqs.imr_sourceaddr = psin->sin_addr.s_addr;
+			mreqs.imr_interface = 0; /* use index for mc_source */
+
+			if (optname == MCAST_BLOCK_SOURCE) {
+				omode = MCAST_EXCLUDE;
+				add = 1;
+			} else if (optname == MCAST_UNBLOCK_SOURCE) {
+				omode = MCAST_EXCLUDE;
+				add = 0;
+			} else if (optname == MCAST_JOIN_SOURCE_GROUP) {
+				struct ip_mreqn mreq;
+
+				psin = (struct sockaddr_in *)&greqs.gsr_group;
+				mreq.imr_multiaddr = psin->sin_addr;
+				mreq.imr_address.s_addr = 0;
+				mreq.imr_ifindex = greqs.gsr_interface;
+				err = ip_mc_join_group(sk, &mreq);
+				if (err)
+					break;
+				greqs.gsr_interface = mreq.imr_ifindex;
+				omode = MCAST_INCLUDE;
+				add = 1;
+			} else /* MCAST_LEAVE_SOURCE_GROUP */ {
+				omode = MCAST_INCLUDE;
+				add = 0;
+			}
+			err = ip_mc_source(add, omode, sk, &mreqs,
+				greqs.gsr_interface);
+			break;
+		}
+		case MCAST_MSFILTER:
+		{
+			extern int sysctl_optmem_max;
+			extern int sysctl_igmp_max_msf;
+			struct sockaddr_in *psin;
+			struct ip_msfilter *msf = NULL;
+			struct group_filter *gsf = NULL;
+			int msize, i, ifindex;
+
+			if (optlen < GROUP_FILTER_SIZE(0))
+				goto e_inval;
+			if (optlen > sysctl_optmem_max) {
+				err = -ENOBUFS;
+				break;
+			}
+			gsf = (struct group_filter *)kmalloc(optlen,GFP_KERNEL);
+			if (gsf == 0) {
+				err = -ENOBUFS;
+				break;
+			}
+			err = -EFAULT;
+			if (copy_from_user(gsf, optval, optlen)) {
+				goto mc_msf_out;
+			}
+			/* numsrc >= (4G-140)/128 overflow in 32 bits */
+			if (gsf->gf_numsrc >= 0x1ffffff ||
+			    gsf->gf_numsrc > sysctl_igmp_max_msf) {
+				err = -ENOBUFS;
+				goto mc_msf_out;
+			}
+			if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) {
+				err = -EINVAL;
+				goto mc_msf_out;
+			}
+			msize = IP_MSFILTER_SIZE(gsf->gf_numsrc);
+			msf = (struct ip_msfilter *)kmalloc(msize,GFP_KERNEL);
+			if (msf == 0) {
+				err = -ENOBUFS;
+				goto mc_msf_out;
+			}
+			ifindex = gsf->gf_interface;
+			psin = (struct sockaddr_in *)&gsf->gf_group;
+			if (psin->sin_family != AF_INET) {
+				err = -EADDRNOTAVAIL;
+				goto mc_msf_out;
+			}
+			msf->imsf_multiaddr = psin->sin_addr.s_addr;
+			msf->imsf_interface = 0;
+			msf->imsf_fmode = gsf->gf_fmode;
+			msf->imsf_numsrc = gsf->gf_numsrc;
+			err = -EADDRNOTAVAIL;
+			for (i=0; i<gsf->gf_numsrc; ++i) {
+				psin = (struct sockaddr_in *)&gsf->gf_slist[i];
+
+				if (psin->sin_family != AF_INET)
+					goto mc_msf_out;
+				msf->imsf_slist[i] = psin->sin_addr.s_addr;
+			}
+			kfree(gsf);
+			gsf = NULL;
+
+			err = ip_mc_msfilter(sk, msf, ifindex);
+mc_msf_out:
+			if (msf)
+				kfree(msf);
+			if (gsf)
+				kfree(gsf);
+			break;
+		}
+		case IP_ROUTER_ALERT:	
+			err = ip_ra_control(sk, val ? 1 : 0, NULL);
+			break;
+
+		case IP_FREEBIND:
+			if (optlen<1)
+				goto e_inval;
+			inet->freebind = !!val; 
+	                break;			
+ 
+		case IP_IPSEC_POLICY:
+		case IP_XFRM_POLICY:
+			err = xfrm_user_policy(sk, optname, optval, optlen);
+			break;
+
+		default:
+#ifdef CONFIG_NETFILTER
+			err = nf_setsockopt(sk, PF_INET, optname, optval, 
+					    optlen);
+#else
+			err = -ENOPROTOOPT;
+#endif
+			break;
+	}
+	release_sock(sk);
+	return err;
+
+e_inval:
+	release_sock(sk);
+	return -EINVAL;
+}
+
+/*
+ *	Get the options. Note for future reference. The GET of IP options gets the
+ *	_received_ ones. The set sets the _sent_ ones.
+ */
+
+int ip_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	int val;
+	int len;
+	
+	if(level!=SOL_IP)
+		return -EOPNOTSUPP;
+
+#ifdef CONFIG_IP_MROUTE
+	if(optname>=MRT_BASE && optname <=MRT_BASE+10)
+	{
+		return ip_mroute_getsockopt(sk,optname,optval,optlen);
+	}
+#endif
+
+	if(get_user(len,optlen))
+		return -EFAULT;
+	if(len < 0)
+		return -EINVAL;
+		
+	lock_sock(sk);
+
+	switch(optname)	{
+		case IP_OPTIONS:
+			{
+				unsigned char optbuf[sizeof(struct ip_options)+40];
+				struct ip_options * opt = (struct ip_options*)optbuf;
+				opt->optlen = 0;
+				if (inet->opt)
+					memcpy(optbuf, inet->opt,
+					       sizeof(struct ip_options)+
+					       inet->opt->optlen);
+				release_sock(sk);
+
+				if (opt->optlen == 0) 
+					return put_user(0, optlen);
+
+				ip_options_undo(opt);
+
+				len = min_t(unsigned int, len, opt->optlen);
+				if(put_user(len, optlen))
+					return -EFAULT;
+				if(copy_to_user(optval, opt->__data, len))
+					return -EFAULT;
+				return 0;
+			}
+		case IP_PKTINFO:
+			val = (inet->cmsg_flags & IP_CMSG_PKTINFO) != 0;
+			break;
+		case IP_RECVTTL:
+			val = (inet->cmsg_flags & IP_CMSG_TTL) != 0;
+			break;
+		case IP_RECVTOS:
+			val = (inet->cmsg_flags & IP_CMSG_TOS) != 0;
+			break;
+		case IP_RECVOPTS:
+			val = (inet->cmsg_flags & IP_CMSG_RECVOPTS) != 0;
+			break;
+		case IP_RETOPTS:
+			val = (inet->cmsg_flags & IP_CMSG_RETOPTS) != 0;
+			break;
+		case IP_TOS:
+			val = inet->tos;
+			break;
+		case IP_TTL:
+			val = (inet->uc_ttl == -1 ?
+			       sysctl_ip_default_ttl :
+			       inet->uc_ttl);
+			break;
+		case IP_HDRINCL:
+			val = inet->hdrincl;
+			break;
+		case IP_MTU_DISCOVER:
+			val = inet->pmtudisc;
+			break;
+		case IP_MTU:
+		{
+			struct dst_entry *dst;
+			val = 0;
+			dst = sk_dst_get(sk);
+			if (dst) {
+				val = dst_mtu(dst);
+				dst_release(dst);
+			}
+			if (!val) {
+				release_sock(sk);
+				return -ENOTCONN;
+			}
+			break;
+		}
+		case IP_RECVERR:
+			val = inet->recverr;
+			break;
+		case IP_MULTICAST_TTL:
+			val = inet->mc_ttl;
+			break;
+		case IP_MULTICAST_LOOP:
+			val = inet->mc_loop;
+			break;
+		case IP_MULTICAST_IF:
+		{
+			struct in_addr addr;
+			len = min_t(unsigned int, len, sizeof(struct in_addr));
+			addr.s_addr = inet->mc_addr;
+			release_sock(sk);
+
+  			if(put_user(len, optlen))
+  				return -EFAULT;
+			if(copy_to_user(optval, &addr, len))
+				return -EFAULT;
+			return 0;
+		}
+		case IP_MSFILTER:
+		{
+			struct ip_msfilter msf;
+			int err;
+
+			if (len < IP_MSFILTER_SIZE(0)) {
+				release_sock(sk);
+				return -EINVAL;
+			}
+			if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) {
+				release_sock(sk);
+				return -EFAULT;
+			}
+			err = ip_mc_msfget(sk, &msf,
+				(struct ip_msfilter __user *)optval, optlen);
+			release_sock(sk);
+			return err;
+		}
+		case MCAST_MSFILTER:
+		{
+			struct group_filter gsf;
+			int err;
+
+			if (len < GROUP_FILTER_SIZE(0)) {
+				release_sock(sk);
+				return -EINVAL;
+			}
+			if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) {
+				release_sock(sk);
+				return -EFAULT;
+			}
+			err = ip_mc_gsfget(sk, &gsf,
+				(struct group_filter __user *)optval, optlen);
+			release_sock(sk);
+			return err;
+		}
+		case IP_PKTOPTIONS:		
+		{
+			struct msghdr msg;
+
+			release_sock(sk);
+
+			if (sk->sk_type != SOCK_STREAM)
+				return -ENOPROTOOPT;
+
+			msg.msg_control = optval;
+			msg.msg_controllen = len;
+			msg.msg_flags = 0;
+
+			if (inet->cmsg_flags & IP_CMSG_PKTINFO) {
+				struct in_pktinfo info;
+
+				info.ipi_addr.s_addr = inet->rcv_saddr;
+				info.ipi_spec_dst.s_addr = inet->rcv_saddr;
+				info.ipi_ifindex = inet->mc_index;
+				put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
+			}
+			if (inet->cmsg_flags & IP_CMSG_TTL) {
+				int hlim = inet->mc_ttl;
+				put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim);
+			}
+			len -= msg.msg_controllen;
+			return put_user(len, optlen);
+		}
+		case IP_FREEBIND: 
+			val = inet->freebind; 
+			break; 
+		default:
+#ifdef CONFIG_NETFILTER
+			val = nf_getsockopt(sk, PF_INET, optname, optval, 
+					    &len);
+			release_sock(sk);
+			if (val >= 0)
+				val = put_user(len, optlen);
+			return val;
+#else
+			release_sock(sk);
+			return -ENOPROTOOPT;
+#endif
+	}
+	release_sock(sk);
+	
+	if (len < sizeof(int) && len > 0 && val>=0 && val<255) {
+		unsigned char ucval = (unsigned char)val;
+		len = 1;
+		if(put_user(len, optlen))
+			return -EFAULT;
+		if(copy_to_user(optval,&ucval,1))
+			return -EFAULT;
+	} else {
+		len = min_t(unsigned int, sizeof(int), len);
+		if(put_user(len, optlen))
+			return -EFAULT;
+		if(copy_to_user(optval,&val,len))
+			return -EFAULT;
+	}
+	return 0;
+}
+
+EXPORT_SYMBOL(ip_cmsg_recv);
+
+#ifdef CONFIG_IP_SCTP_MODULE
+EXPORT_SYMBOL(ip_getsockopt);
+EXPORT_SYMBOL(ip_setsockopt);
+#endif
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
new file mode 100644
index 00000000000..1a23c5263b9
--- /dev/null
+++ b/net/ipv4/ipcomp.c
@@ -0,0 +1,524 @@
+/*
+ * IP Payload Compression Protocol (IPComp) - RFC3173.
+ *
+ * Copyright (c) 2003 James Morris <jmorris@intercode.com.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) 
+ * any later version.
+ *
+ * Todo:
+ *   - Tunable compression parameters.
+ *   - Compression stats.
+ *   - Adaptive compression.
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/scatterlist.h>
+#include <asm/semaphore.h>
+#include <linux/crypto.h>
+#include <linux/pfkeyv2.h>
+#include <linux/percpu.h>
+#include <linux/smp.h>
+#include <linux/list.h>
+#include <linux/vmalloc.h>
+#include <linux/rtnetlink.h>
+#include <net/ip.h>
+#include <net/xfrm.h>
+#include <net/icmp.h>
+#include <net/ipcomp.h>
+
+struct ipcomp_tfms {
+	struct list_head list;
+	struct crypto_tfm **tfms;
+	int users;
+};
+
+static DECLARE_MUTEX(ipcomp_resource_sem);
+static void **ipcomp_scratches;
+static int ipcomp_scratch_users;
+static LIST_HEAD(ipcomp_tfms_list);
+
+static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
+{
+	int err, plen, dlen;
+	struct iphdr *iph;
+	struct ipcomp_data *ipcd = x->data;
+	u8 *start, *scratch;
+	struct crypto_tfm *tfm;
+	int cpu;
+	
+	plen = skb->len;
+	dlen = IPCOMP_SCRATCH_SIZE;
+	start = skb->data;
+
+	cpu = get_cpu();
+	scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
+	tfm = *per_cpu_ptr(ipcd->tfms, cpu);
+
+	err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen);
+	if (err)
+		goto out;
+
+	if (dlen < (plen + sizeof(struct ip_comp_hdr))) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = pskb_expand_head(skb, 0, dlen - plen, GFP_ATOMIC);
+	if (err)
+		goto out;
+		
+	skb_put(skb, dlen - plen);
+	memcpy(skb->data, scratch, dlen);
+	iph = skb->nh.iph;
+	iph->tot_len = htons(dlen + iph->ihl * 4);
+out:	
+	put_cpu();
+	return err;
+}
+
+static int ipcomp_input(struct xfrm_state *x,
+                        struct xfrm_decap_state *decap, struct sk_buff *skb)
+{
+	u8 nexthdr;
+	int err = 0;
+	struct iphdr *iph;
+	union {
+		struct iphdr	iph;
+		char 		buf[60];
+	} tmp_iph;
+
+
+	if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
+	    skb_linearize(skb, GFP_ATOMIC) != 0) {
+	    	err = -ENOMEM;
+	    	goto out;
+	}
+
+	skb->ip_summed = CHECKSUM_NONE;
+
+	/* Remove ipcomp header and decompress original payload */	
+	iph = skb->nh.iph;
+	memcpy(&tmp_iph, iph, iph->ihl * 4);
+	nexthdr = *(u8 *)skb->data;
+	skb_pull(skb, sizeof(struct ip_comp_hdr));
+	skb->nh.raw += sizeof(struct ip_comp_hdr);
+	memcpy(skb->nh.raw, &tmp_iph, tmp_iph.iph.ihl * 4);
+	iph = skb->nh.iph;
+	iph->tot_len = htons(ntohs(iph->tot_len) - sizeof(struct ip_comp_hdr));
+	iph->protocol = nexthdr;
+	skb->h.raw = skb->data;
+	err = ipcomp_decompress(x, skb);
+
+out:	
+	return err;
+}
+
+static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb)
+{
+	int err, plen, dlen, ihlen;
+	struct iphdr *iph = skb->nh.iph;
+	struct ipcomp_data *ipcd = x->data;
+	u8 *start, *scratch;
+	struct crypto_tfm *tfm;
+	int cpu;
+	
+	ihlen = iph->ihl * 4;
+	plen = skb->len - ihlen;
+	dlen = IPCOMP_SCRATCH_SIZE;
+	start = skb->data + ihlen;
+
+	cpu = get_cpu();
+	scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
+	tfm = *per_cpu_ptr(ipcd->tfms, cpu);
+
+	err = crypto_comp_compress(tfm, start, plen, scratch, &dlen);
+	if (err)
+		goto out;
+
+	if ((dlen + sizeof(struct ip_comp_hdr)) >= plen) {
+		err = -EMSGSIZE;
+		goto out;
+	}
+	
+	memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen);
+	put_cpu();
+
+	pskb_trim(skb, ihlen + dlen + sizeof(struct ip_comp_hdr));
+	return 0;
+	
+out:	
+	put_cpu();
+	return err;
+}
+
+static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+	int err;
+	struct iphdr *iph;
+	struct ip_comp_hdr *ipch;
+	struct ipcomp_data *ipcd = x->data;
+	int hdr_len = 0;
+
+	iph = skb->nh.iph;
+	iph->tot_len = htons(skb->len);
+	hdr_len = iph->ihl * 4;
+	if ((skb->len - hdr_len) < ipcd->threshold) {
+		/* Don't bother compressing */
+		goto out_ok;
+	}
+
+	if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
+	    skb_linearize(skb, GFP_ATOMIC) != 0) {
+		goto out_ok;
+	}
+	
+	err = ipcomp_compress(x, skb);
+	iph = skb->nh.iph;
+
+	if (err) {
+		goto out_ok;
+	}
+
+	/* Install ipcomp header, convert into ipcomp datagram. */
+	iph->tot_len = htons(skb->len);
+	ipch = (struct ip_comp_hdr *)((char *)iph + iph->ihl * 4);
+	ipch->nexthdr = iph->protocol;
+	ipch->flags = 0;
+	ipch->cpi = htons((u16 )ntohl(x->id.spi));
+	iph->protocol = IPPROTO_COMP;
+	ip_send_check(iph);
+	return 0;
+
+out_ok:
+	if (x->props.mode)
+		ip_send_check(iph);
+	return 0;
+}
+
+static void ipcomp4_err(struct sk_buff *skb, u32 info)
+{
+	u32 spi;
+	struct iphdr *iph = (struct iphdr *)skb->data;
+	struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
+	struct xfrm_state *x;
+
+	if (skb->h.icmph->type != ICMP_DEST_UNREACH ||
+	    skb->h.icmph->code != ICMP_FRAG_NEEDED)
+		return;
+
+	spi = ntohl(ntohs(ipch->cpi));
+	x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr,
+	                      spi, IPPROTO_COMP, AF_INET);
+	if (!x)
+		return;
+	NETDEBUG(printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%u.%u.%u.%u\n",
+	       spi, NIPQUAD(iph->daddr)));
+	xfrm_state_put(x);
+}
+
+/* We always hold one tunnel user reference to indicate a tunnel */ 
+static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
+{
+	struct xfrm_state *t;
+	
+	t = xfrm_state_alloc();
+	if (t == NULL)
+		goto out;
+
+	t->id.proto = IPPROTO_IPIP;
+	t->id.spi = x->props.saddr.a4;
+	t->id.daddr.a4 = x->id.daddr.a4;
+	memcpy(&t->sel, &x->sel, sizeof(t->sel));
+	t->props.family = AF_INET;
+	t->props.mode = 1;
+	t->props.saddr.a4 = x->props.saddr.a4;
+	t->props.flags = x->props.flags;
+	
+	t->type = xfrm_get_type(IPPROTO_IPIP, t->props.family);
+	if (t->type == NULL)
+		goto error;
+		
+	if (t->type->init_state(t, NULL))
+		goto error;
+
+	t->km.state = XFRM_STATE_VALID;
+	atomic_set(&t->tunnel_users, 1);
+out:
+	return t;
+
+error:
+	t->km.state = XFRM_STATE_DEAD;
+	xfrm_state_put(t);
+	t = NULL;
+	goto out;
+}
+
+/*
+ * Must be protected by xfrm_cfg_sem.  State and tunnel user references are
+ * always incremented on success.
+ */
+static int ipcomp_tunnel_attach(struct xfrm_state *x)
+{
+	int err = 0;
+	struct xfrm_state *t;
+
+	t = xfrm_state_lookup((xfrm_address_t *)&x->id.daddr.a4,
+	                      x->props.saddr.a4, IPPROTO_IPIP, AF_INET);
+	if (!t) {
+		t = ipcomp_tunnel_create(x);
+		if (!t) {
+			err = -EINVAL;
+			goto out;
+		}
+		xfrm_state_insert(t);
+		xfrm_state_hold(t);
+	}
+	x->tunnel = t;
+	atomic_inc(&t->tunnel_users);
+out:
+	return err;
+}
+
+static void ipcomp_free_scratches(void)
+{
+	int i;
+	void **scratches;
+
+	if (--ipcomp_scratch_users)
+		return;
+
+	scratches = ipcomp_scratches;
+	if (!scratches)
+		return;
+
+	for_each_cpu(i) {
+		void *scratch = *per_cpu_ptr(scratches, i);
+		if (scratch)
+			vfree(scratch);
+	}
+
+	free_percpu(scratches);
+}
+
+static void **ipcomp_alloc_scratches(void)
+{
+	int i;
+	void **scratches;
+
+	if (ipcomp_scratch_users++)
+		return ipcomp_scratches;
+
+	scratches = alloc_percpu(void *);
+	if (!scratches)
+		return NULL;
+
+	ipcomp_scratches = scratches;
+
+	for_each_cpu(i) {
+		void *scratch = vmalloc(IPCOMP_SCRATCH_SIZE);
+		if (!scratch)
+			return NULL;
+		*per_cpu_ptr(scratches, i) = scratch;
+	}
+
+	return scratches;
+}
+
+static void ipcomp_free_tfms(struct crypto_tfm **tfms)
+{
+	struct ipcomp_tfms *pos;
+	int cpu;
+
+	list_for_each_entry(pos, &ipcomp_tfms_list, list) {
+		if (pos->tfms == tfms)
+			break;
+	}
+
+	BUG_TRAP(pos);
+
+	if (--pos->users)
+		return;
+
+	list_del(&pos->list);
+	kfree(pos);
+
+	if (!tfms)
+		return;
+
+	for_each_cpu(cpu) {
+		struct crypto_tfm *tfm = *per_cpu_ptr(tfms, cpu);
+		if (tfm)
+			crypto_free_tfm(tfm);
+	}
+	free_percpu(tfms);
+}
+
+static struct crypto_tfm **ipcomp_alloc_tfms(const char *alg_name)
+{
+	struct ipcomp_tfms *pos;
+	struct crypto_tfm **tfms;
+	int cpu;
+
+	/* This can be any valid CPU ID so we don't need locking. */
+	cpu = smp_processor_id();
+
+	list_for_each_entry(pos, &ipcomp_tfms_list, list) {
+		struct crypto_tfm *tfm;
+
+		tfms = pos->tfms;
+		tfm = *per_cpu_ptr(tfms, cpu);
+
+		if (!strcmp(crypto_tfm_alg_name(tfm), alg_name)) {
+			pos->users++;
+			return tfms;
+		}
+	}
+
+	pos = kmalloc(sizeof(*pos), GFP_KERNEL);
+	if (!pos)
+		return NULL;
+
+	pos->users = 1;
+	INIT_LIST_HEAD(&pos->list);
+	list_add(&pos->list, &ipcomp_tfms_list);
+
+	pos->tfms = tfms = alloc_percpu(struct crypto_tfm *);
+	if (!tfms)
+		goto error;
+
+	for_each_cpu(cpu) {
+		struct crypto_tfm *tfm = crypto_alloc_tfm(alg_name, 0);
+		if (!tfm)
+			goto error;
+		*per_cpu_ptr(tfms, cpu) = tfm;
+	}
+
+	return tfms;
+
+error:
+	ipcomp_free_tfms(tfms);
+	return NULL;
+}
+
+static void ipcomp_free_data(struct ipcomp_data *ipcd)
+{
+	if (ipcd->tfms)
+		ipcomp_free_tfms(ipcd->tfms);
+	ipcomp_free_scratches();
+}
+
+static void ipcomp_destroy(struct xfrm_state *x)
+{
+	struct ipcomp_data *ipcd = x->data;
+	if (!ipcd)
+		return;
+	xfrm_state_delete_tunnel(x);
+	down(&ipcomp_resource_sem);
+	ipcomp_free_data(ipcd);
+	up(&ipcomp_resource_sem);
+	kfree(ipcd);
+}
+
+static int ipcomp_init_state(struct xfrm_state *x, void *args)
+{
+	int err;
+	struct ipcomp_data *ipcd;
+	struct xfrm_algo_desc *calg_desc;
+
+	err = -EINVAL;
+	if (!x->calg)
+		goto out;
+
+	if (x->encap)
+		goto out;
+
+	err = -ENOMEM;
+	ipcd = kmalloc(sizeof(*ipcd), GFP_KERNEL);
+	if (!ipcd)
+		goto out;
+
+	memset(ipcd, 0, sizeof(*ipcd));
+	x->props.header_len = 0;
+	if (x->props.mode)
+		x->props.header_len += sizeof(struct iphdr);
+
+	down(&ipcomp_resource_sem);
+	if (!ipcomp_alloc_scratches())
+		goto error;
+
+	ipcd->tfms = ipcomp_alloc_tfms(x->calg->alg_name);
+	if (!ipcd->tfms)
+		goto error;
+	up(&ipcomp_resource_sem);
+
+	if (x->props.mode) {
+		err = ipcomp_tunnel_attach(x);
+		if (err)
+			goto error_tunnel;
+	}
+
+	calg_desc = xfrm_calg_get_byname(x->calg->alg_name, 0);
+	BUG_ON(!calg_desc);
+	ipcd->threshold = calg_desc->uinfo.comp.threshold;
+	x->data = ipcd;
+	err = 0;
+out:
+	return err;
+
+error_tunnel:
+	down(&ipcomp_resource_sem);
+error:
+	ipcomp_free_data(ipcd);
+	up(&ipcomp_resource_sem);
+	kfree(ipcd);
+	goto out;
+}
+
+static struct xfrm_type ipcomp_type = {
+	.description	= "IPCOMP4",
+	.owner		= THIS_MODULE,
+	.proto	     	= IPPROTO_COMP,
+	.init_state	= ipcomp_init_state,
+	.destructor	= ipcomp_destroy,
+	.input		= ipcomp_input,
+	.output		= ipcomp_output
+};
+
+static struct net_protocol ipcomp4_protocol = {
+	.handler	=	xfrm4_rcv,
+	.err_handler	=	ipcomp4_err,
+	.no_policy	=	1,
+};
+
+static int __init ipcomp4_init(void)
+{
+	if (xfrm_register_type(&ipcomp_type, AF_INET) < 0) {
+		printk(KERN_INFO "ipcomp init: can't add xfrm type\n");
+		return -EAGAIN;
+	}
+	if (inet_add_protocol(&ipcomp4_protocol, IPPROTO_COMP) < 0) {
+		printk(KERN_INFO "ipcomp init: can't add protocol\n");
+		xfrm_unregister_type(&ipcomp_type, AF_INET);
+		return -EAGAIN;
+	}
+	return 0;
+}
+
+static void __exit ipcomp4_fini(void)
+{
+	if (inet_del_protocol(&ipcomp4_protocol, IPPROTO_COMP) < 0)
+		printk(KERN_INFO "ip ipcomp close: can't remove protocol\n");
+	if (xfrm_unregister_type(&ipcomp_type, AF_INET) < 0)
+		printk(KERN_INFO "ip ipcomp close: can't remove xfrm type\n");
+}
+
+module_init(ipcomp4_init);
+module_exit(ipcomp4_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) - RFC3173");
+MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
+
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
new file mode 100644
index 00000000000..f2509034ce7
--- /dev/null
+++ b/net/ipv4/ipconfig.c
@@ -0,0 +1,1507 @@
+/*
+ *  $Id: ipconfig.c,v 1.46 2002/02/01 22:01:04 davem Exp $
+ *
+ *  Automatic Configuration of IP -- use DHCP, BOOTP, RARP, or
+ *  user-supplied information to configure own IP address and routes.
+ *
+ *  Copyright (C) 1996-1998 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ *
+ *  Derived from network configuration code in fs/nfs/nfsroot.c,
+ *  originally Copyright (C) 1995, 1996 Gero Kuhlmann and me.
+ *
+ *  BOOTP rewritten to construct and analyse packets itself instead
+ *  of misusing the IP layer. num_bugs_causing_wrong_arp_replies--;
+ *					     -- MJ, December 1998
+ *  
+ *  Fixed ip_auto_config_setup calling at startup in the new "Linker Magic"
+ *  initialization scheme.
+ *	- Arnaldo Carvalho de Melo <acme@conectiva.com.br>, 08/11/1999
+ *
+ *  DHCP support added.  To users this looks like a whole separate
+ *  protocol, but we know it's just a bag on the side of BOOTP.
+ *		-- Chip Salzenberg <chip@valinux.com>, May 2000
+ *
+ *  Ported DHCP support from 2.2.16 to 2.4.0-test4
+ *              -- Eric Biederman <ebiederman@lnxi.com>, 30 Aug 2000
+ *
+ *  Merged changes from 2.2.19 into 2.4.3
+ *              -- Eric Biederman <ebiederman@lnxi.com>, 22 April Aug 2001
+ *
+ *  Multiple Nameservers in /proc/net/pnp
+ *              --  Josef Siemes <jsiemes@web.de>, Aug 2002
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/random.h>
+#include <linux/init.h>
+#include <linux/utsname.h>
+#include <linux/in.h>
+#include <linux/if.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/socket.h>
+#include <linux/route.h>
+#include <linux/udp.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/major.h>
+#include <linux/root_dev.h>
+#include <linux/delay.h>
+#include <net/arp.h>
+#include <net/ip.h>
+#include <net/ipconfig.h>
+
+#include <asm/uaccess.h>
+#include <net/checksum.h>
+#include <asm/processor.h>
+
+/* Define this to allow debugging output */
+#undef IPCONFIG_DEBUG
+
+#ifdef IPCONFIG_DEBUG
+#define DBG(x) printk x
+#else
+#define DBG(x) do { } while(0)
+#endif
+
+#if defined(CONFIG_IP_PNP_DHCP)
+#define IPCONFIG_DHCP
+#endif
+#if defined(CONFIG_IP_PNP_BOOTP) || defined(CONFIG_IP_PNP_DHCP)
+#define IPCONFIG_BOOTP
+#endif
+#if defined(CONFIG_IP_PNP_RARP)
+#define IPCONFIG_RARP
+#endif
+#if defined(IPCONFIG_BOOTP) || defined(IPCONFIG_RARP)
+#define IPCONFIG_DYNAMIC
+#endif
+
+/* Define the friendly delay before and after opening net devices */
+#define CONF_PRE_OPEN		500	/* Before opening: 1/2 second */
+#define CONF_POST_OPEN		1	/* After opening: 1 second */
+
+/* Define the timeout for waiting for a DHCP/BOOTP/RARP reply */
+#define CONF_OPEN_RETRIES 	2	/* (Re)open devices twice */
+#define CONF_SEND_RETRIES 	6	/* Send six requests per open */
+#define CONF_INTER_TIMEOUT	(HZ/2)	/* Inter-device timeout: 1/2 second */
+#define CONF_BASE_TIMEOUT	(HZ*2)	/* Initial timeout: 2 seconds */
+#define CONF_TIMEOUT_RANDOM	(HZ)	/* Maximum amount of randomization */
+#define CONF_TIMEOUT_MULT	*7/4	/* Rate of timeout growth */
+#define CONF_TIMEOUT_MAX	(HZ*30)	/* Maximum allowed timeout */
+#define CONF_NAMESERVERS_MAX   3       /* Maximum number of nameservers  
+                                           - '3' from resolv.h */
+
+
+/*
+ * Public IP configuration
+ */
+
+/* This is used by platforms which might be able to set the ipconfig
+ * variables using firmware environment vars.  If this is set, it will
+ * ignore such firmware variables.
+ */
+int ic_set_manually __initdata = 0;		/* IPconfig parameters set manually */
+
+static int ic_enable __initdata = 0;		/* IP config enabled? */
+
+/* Protocol choice */
+int ic_proto_enabled __initdata = 0
+#ifdef IPCONFIG_BOOTP
+			| IC_BOOTP
+#endif
+#ifdef CONFIG_IP_PNP_DHCP
+			| IC_USE_DHCP
+#endif
+#ifdef IPCONFIG_RARP
+			| IC_RARP
+#endif
+			;
+
+static int ic_host_name_set __initdata = 0;	/* Host name set by us? */
+
+u32 ic_myaddr = INADDR_NONE;		/* My IP address */
+static u32 ic_netmask = INADDR_NONE;	/* Netmask for local subnet */
+u32 ic_gateway = INADDR_NONE;	/* Gateway IP address */
+
+u32 ic_servaddr = INADDR_NONE;	/* Boot server IP address */
+
+u32 root_server_addr = INADDR_NONE;	/* Address of NFS server */
+u8 root_server_path[256] = { 0, };	/* Path to mount as root */
+
+/* Persistent data: */
+
+static int ic_proto_used;			/* Protocol used, if any */
+static u32 ic_nameservers[CONF_NAMESERVERS_MAX]; /* DNS Server IP addresses */
+static u8 ic_domain[64];		/* DNS (not NIS) domain name */
+
+/*
+ * Private state.
+ */
+
+/* Name of user-selected boot device */
+static char user_dev_name[IFNAMSIZ] __initdata = { 0, };
+
+/* Protocols supported by available interfaces */
+static int ic_proto_have_if __initdata = 0;
+
+#ifdef IPCONFIG_DYNAMIC
+static DEFINE_SPINLOCK(ic_recv_lock);
+static volatile int ic_got_reply __initdata = 0;    /* Proto(s) that replied */
+#endif
+#ifdef IPCONFIG_DHCP
+static int ic_dhcp_msgtype __initdata = 0;	/* DHCP msg type received */
+#endif
+
+
+/*
+ *	Network devices
+ */
+
+struct ic_device {
+	struct ic_device *next;
+	struct net_device *dev;
+	unsigned short flags;
+	short able;
+	u32 xid;
+};
+
+static struct ic_device *ic_first_dev __initdata = NULL;/* List of open device */
+static struct net_device *ic_dev __initdata = NULL;	/* Selected device */
+
+static int __init ic_open_devs(void)
+{
+	struct ic_device *d, **last;
+	struct net_device *dev;
+	unsigned short oflags;
+
+	last = &ic_first_dev;
+	rtnl_shlock();
+
+	/* bring loopback device up first */
+	if (dev_change_flags(&loopback_dev, loopback_dev.flags | IFF_UP) < 0)
+		printk(KERN_ERR "IP-Config: Failed to open %s\n", loopback_dev.name);
+
+	for (dev = dev_base; dev; dev = dev->next) {
+		if (dev == &loopback_dev)
+			continue;
+		if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) :
+		    (!(dev->flags & IFF_LOOPBACK) &&
+		     (dev->flags & (IFF_POINTOPOINT|IFF_BROADCAST)) &&
+		     strncmp(dev->name, "dummy", 5))) {
+			int able = 0;
+			if (dev->mtu >= 364)
+				able |= IC_BOOTP;
+			else
+				printk(KERN_WARNING "DHCP/BOOTP: Ignoring device %s, MTU %d too small", dev->name, dev->mtu);
+			if (!(dev->flags & IFF_NOARP))
+				able |= IC_RARP;
+			able &= ic_proto_enabled;
+			if (ic_proto_enabled && !able)
+				continue;
+			oflags = dev->flags;
+			if (dev_change_flags(dev, oflags | IFF_UP) < 0) {
+				printk(KERN_ERR "IP-Config: Failed to open %s\n", dev->name);
+				continue;
+			}
+			if (!(d = kmalloc(sizeof(struct ic_device), GFP_KERNEL))) {
+				rtnl_shunlock();
+				return -1;
+			}
+			d->dev = dev;
+			*last = d;
+			last = &d->next;
+			d->flags = oflags;
+			d->able = able;
+			if (able & IC_BOOTP)
+				get_random_bytes(&d->xid, sizeof(u32));
+			else
+				d->xid = 0;
+			ic_proto_have_if |= able;
+			DBG(("IP-Config: %s UP (able=%d, xid=%08x)\n",
+				dev->name, able, d->xid));
+		}
+	}
+	rtnl_shunlock();
+
+	*last = NULL;
+
+	if (!ic_first_dev) {
+		if (user_dev_name[0])
+			printk(KERN_ERR "IP-Config: Device `%s' not found.\n", user_dev_name);
+		else
+			printk(KERN_ERR "IP-Config: No network devices available.\n");
+		return -1;
+	}
+	return 0;
+}
+
+static void __init ic_close_devs(void)
+{
+	struct ic_device *d, *next;
+	struct net_device *dev;
+
+	rtnl_shlock();
+	next = ic_first_dev;
+	while ((d = next)) {
+		next = d->next;
+		dev = d->dev;
+		if (dev != ic_dev) {
+			DBG(("IP-Config: Downing %s\n", dev->name));
+			dev_change_flags(dev, d->flags);
+		}
+		kfree(d);
+	}
+	rtnl_shunlock();
+}
+
+/*
+ *	Interface to various network functions.
+ */
+
+static inline void
+set_sockaddr(struct sockaddr_in *sin, u32 addr, u16 port)
+{
+	sin->sin_family = AF_INET;
+	sin->sin_addr.s_addr = addr;
+	sin->sin_port = port;
+}
+
+static int __init ic_dev_ioctl(unsigned int cmd, struct ifreq *arg)
+{
+	int res;
+
+	mm_segment_t oldfs = get_fs();
+	set_fs(get_ds());
+	res = devinet_ioctl(cmd, (struct ifreq __user *) arg);
+	set_fs(oldfs);
+	return res;
+}
+
+static int __init ic_route_ioctl(unsigned int cmd, struct rtentry *arg)
+{
+	int res;
+
+	mm_segment_t oldfs = get_fs();
+	set_fs(get_ds());
+	res = ip_rt_ioctl(cmd, (void __user *) arg);
+	set_fs(oldfs);
+	return res;
+}
+
+/*
+ *	Set up interface addresses and routes.
+ */
+
+static int __init ic_setup_if(void)
+{
+	struct ifreq ir;
+	struct sockaddr_in *sin = (void *) &ir.ifr_ifru.ifru_addr;
+	int err;
+
+	memset(&ir, 0, sizeof(ir));
+	strcpy(ir.ifr_ifrn.ifrn_name, ic_dev->name);
+	set_sockaddr(sin, ic_myaddr, 0);
+	if ((err = ic_dev_ioctl(SIOCSIFADDR, &ir)) < 0) {
+		printk(KERN_ERR "IP-Config: Unable to set interface address (%d).\n", err);
+		return -1;
+	}
+	set_sockaddr(sin, ic_netmask, 0);
+	if ((err = ic_dev_ioctl(SIOCSIFNETMASK, &ir)) < 0) {
+		printk(KERN_ERR "IP-Config: Unable to set interface netmask (%d).\n", err);
+		return -1;
+	}
+	set_sockaddr(sin, ic_myaddr | ~ic_netmask, 0);
+	if ((err = ic_dev_ioctl(SIOCSIFBRDADDR, &ir)) < 0) {
+		printk(KERN_ERR "IP-Config: Unable to set interface broadcast address (%d).\n", err);
+		return -1;
+	}
+	return 0;
+}
+
+static int __init ic_setup_routes(void)
+{
+	/* No need to setup device routes, only the default route... */
+
+	if (ic_gateway != INADDR_NONE) {
+		struct rtentry rm;
+		int err;
+
+		memset(&rm, 0, sizeof(rm));
+		if ((ic_gateway ^ ic_myaddr) & ic_netmask) {
+			printk(KERN_ERR "IP-Config: Gateway not on directly connected network.\n");
+			return -1;
+		}
+		set_sockaddr((struct sockaddr_in *) &rm.rt_dst, 0, 0);
+		set_sockaddr((struct sockaddr_in *) &rm.rt_genmask, 0, 0);
+		set_sockaddr((struct sockaddr_in *) &rm.rt_gateway, ic_gateway, 0);
+		rm.rt_flags = RTF_UP | RTF_GATEWAY;
+		if ((err = ic_route_ioctl(SIOCADDRT, &rm)) < 0) {
+			printk(KERN_ERR "IP-Config: Cannot add default route (%d).\n", err);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ *	Fill in default values for all missing parameters.
+ */
+
+static int __init ic_defaults(void)
+{
+	/*
+	 *	At this point we have no userspace running so need not
+	 *	claim locks on system_utsname
+	 */
+	 
+	if (!ic_host_name_set)
+		sprintf(system_utsname.nodename, "%u.%u.%u.%u", NIPQUAD(ic_myaddr));
+
+	if (root_server_addr == INADDR_NONE)
+		root_server_addr = ic_servaddr;
+
+	if (ic_netmask == INADDR_NONE) {
+		if (IN_CLASSA(ntohl(ic_myaddr)))
+			ic_netmask = htonl(IN_CLASSA_NET);
+		else if (IN_CLASSB(ntohl(ic_myaddr)))
+			ic_netmask = htonl(IN_CLASSB_NET);
+		else if (IN_CLASSC(ntohl(ic_myaddr)))
+			ic_netmask = htonl(IN_CLASSC_NET);
+		else {
+			printk(KERN_ERR "IP-Config: Unable to guess netmask for address %u.%u.%u.%u\n",
+				NIPQUAD(ic_myaddr));
+			return -1;
+		}
+		printk("IP-Config: Guessing netmask %u.%u.%u.%u\n", NIPQUAD(ic_netmask));
+	}
+
+	return 0;
+}
+
+/*
+ *	RARP support.
+ */
+
+#ifdef IPCONFIG_RARP
+
+static int ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt);
+
+static struct packet_type rarp_packet_type __initdata = {
+	.type =	__constant_htons(ETH_P_RARP),
+	.func =	ic_rarp_recv,
+};
+
+static inline void ic_rarp_init(void)
+{
+	dev_add_pack(&rarp_packet_type);
+}
+
+static inline void ic_rarp_cleanup(void)
+{
+	dev_remove_pack(&rarp_packet_type);
+}
+
+/*
+ *  Process received RARP packet.
+ */
+static int __init
+ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
+{
+	struct arphdr *rarp;
+	unsigned char *rarp_ptr;
+	unsigned long sip, tip;
+	unsigned char *sha, *tha;		/* s for "source", t for "target" */
+	struct ic_device *d;
+
+	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
+		return NET_RX_DROP;
+
+	if (!pskb_may_pull(skb, sizeof(struct arphdr)))
+		goto drop;
+
+	/* Basic sanity checks can be done without the lock.  */
+	rarp = (struct arphdr *)skb->h.raw;
+
+	/* If this test doesn't pass, it's not IP, or we should
+	 * ignore it anyway.
+	 */
+	if (rarp->ar_hln != dev->addr_len || dev->type != ntohs(rarp->ar_hrd))
+		goto drop;
+
+	/* If it's not a RARP reply, delete it. */
+	if (rarp->ar_op != htons(ARPOP_RREPLY))
+		goto drop;
+
+	/* If it's not Ethernet, delete it. */
+	if (rarp->ar_pro != htons(ETH_P_IP))
+		goto drop;
+
+	if (!pskb_may_pull(skb,
+			   sizeof(struct arphdr) +
+			   (2 * dev->addr_len) +
+			   (2 * 4)))
+		goto drop;
+
+	/* OK, it is all there and looks valid, process... */
+	rarp = (struct arphdr *)skb->h.raw;
+	rarp_ptr = (unsigned char *) (rarp + 1);
+
+	/* One reply at a time, please. */
+	spin_lock(&ic_recv_lock);
+
+	/* If we already have a reply, just drop the packet */
+	if (ic_got_reply)
+		goto drop_unlock;
+
+	/* Find the ic_device that the packet arrived on */
+	d = ic_first_dev;
+	while (d && d->dev != dev)
+		d = d->next;
+	if (!d)
+		goto drop_unlock;	/* should never happen */
+
+	/* Extract variable-width fields */
+	sha = rarp_ptr;
+	rarp_ptr += dev->addr_len;
+	memcpy(&sip, rarp_ptr, 4);
+	rarp_ptr += 4;
+	tha = rarp_ptr;
+	rarp_ptr += dev->addr_len;
+	memcpy(&tip, rarp_ptr, 4);
+
+	/* Discard packets which are not meant for us. */
+	if (memcmp(tha, dev->dev_addr, dev->addr_len))
+		goto drop_unlock;
+
+	/* Discard packets which are not from specified server. */
+	if (ic_servaddr != INADDR_NONE && ic_servaddr != sip)
+		goto drop_unlock;
+
+	/* We have a winner! */
+	ic_dev = dev;
+	if (ic_myaddr == INADDR_NONE)
+		ic_myaddr = tip;
+	ic_servaddr = sip;
+	ic_got_reply = IC_RARP;
+
+drop_unlock:
+	/* Show's over.  Nothing to see here.  */
+	spin_unlock(&ic_recv_lock);
+
+drop:
+	/* Throw the packet out. */
+	kfree_skb(skb);
+	return 0;
+}
+
+
+/*
+ *  Send RARP request packet over a single interface.
+ */
+static void __init ic_rarp_send_if(struct ic_device *d)
+{
+	struct net_device *dev = d->dev;
+	arp_send(ARPOP_RREQUEST, ETH_P_RARP, 0, dev, 0, NULL,
+		 dev->dev_addr, dev->dev_addr);
+}
+#endif
+
+/*
+ *	DHCP/BOOTP support.
+ */
+
+#ifdef IPCONFIG_BOOTP
+
+struct bootp_pkt {		/* BOOTP packet format */
+	struct iphdr iph;	/* IP header */
+	struct udphdr udph;	/* UDP header */
+	u8 op;			/* 1=request, 2=reply */
+	u8 htype;		/* HW address type */
+	u8 hlen;		/* HW address length */
+	u8 hops;		/* Used only by gateways */
+	u32 xid;		/* Transaction ID */
+	u16 secs;		/* Seconds since we started */
+	u16 flags;		/* Just what it says */
+	u32 client_ip;		/* Client's IP address if known */
+	u32 your_ip;		/* Assigned IP address */
+	u32 server_ip;		/* (Next, e.g. NFS) Server's IP address */
+	u32 relay_ip;		/* IP address of BOOTP relay */
+	u8 hw_addr[16];		/* Client's HW address */
+	u8 serv_name[64];	/* Server host name */
+	u8 boot_file[128];	/* Name of boot file */
+	u8 exten[312];		/* DHCP options / BOOTP vendor extensions */
+};
+
+/* packet ops */
+#define BOOTP_REQUEST	1
+#define BOOTP_REPLY	2
+
+/* DHCP message types */
+#define DHCPDISCOVER	1
+#define DHCPOFFER	2
+#define DHCPREQUEST	3
+#define DHCPDECLINE	4
+#define DHCPACK		5
+#define DHCPNAK		6
+#define DHCPRELEASE	7
+#define DHCPINFORM	8
+
+static int ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt);
+
+static struct packet_type bootp_packet_type __initdata = {
+	.type =	__constant_htons(ETH_P_IP),
+	.func =	ic_bootp_recv,
+};
+
+
+/*
+ *  Initialize DHCP/BOOTP extension fields in the request.
+ */
+
+static const u8 ic_bootp_cookie[4] = { 99, 130, 83, 99 };
+
+#ifdef IPCONFIG_DHCP
+
+static void __init
+ic_dhcp_init_options(u8 *options)
+{
+	u8 mt = ((ic_servaddr == INADDR_NONE)
+		 ? DHCPDISCOVER : DHCPREQUEST);
+	u8 *e = options;
+
+#ifdef IPCONFIG_DEBUG
+	printk("DHCP: Sending message type %d\n", mt);
+#endif
+
+	memcpy(e, ic_bootp_cookie, 4);	/* RFC1048 Magic Cookie */
+	e += 4;
+
+	*e++ = 53;		/* DHCP message type */
+	*e++ = 1;
+	*e++ = mt;
+
+	if (mt == DHCPREQUEST) {
+		*e++ = 54;	/* Server ID (IP address) */
+		*e++ = 4;
+		memcpy(e, &ic_servaddr, 4);
+		e += 4;
+
+		*e++ = 50;	/* Requested IP address */
+		*e++ = 4;
+		memcpy(e, &ic_myaddr, 4);
+		e += 4;
+	}
+
+	/* always? */
+	{
+		static const u8 ic_req_params[] = {
+			1,	/* Subnet mask */
+			3,	/* Default gateway */
+			6,	/* DNS server */
+			12,	/* Host name */
+			15,	/* Domain name */
+			17,	/* Boot path */
+			40,	/* NIS domain name */
+		};
+
+		*e++ = 55;	/* Parameter request list */
+		*e++ = sizeof(ic_req_params);
+		memcpy(e, ic_req_params, sizeof(ic_req_params));
+		e += sizeof(ic_req_params);
+	}
+
+	*e++ = 255;	/* End of the list */
+}
+
+#endif /* IPCONFIG_DHCP */
+
+static void __init ic_bootp_init_ext(u8 *e)
+{
+	memcpy(e, ic_bootp_cookie, 4);	/* RFC1048 Magic Cookie */
+	e += 4;
+	*e++ = 1;		/* Subnet mask request */
+	*e++ = 4;
+	e += 4;
+	*e++ = 3;		/* Default gateway request */
+	*e++ = 4;
+	e += 4;
+	*e++ = 5;		/* Name server request */
+	*e++ = 8;
+	e += 8;
+	*e++ = 12;		/* Host name request */
+	*e++ = 32;
+	e += 32;
+	*e++ = 40;		/* NIS Domain name request */
+	*e++ = 32;
+	e += 32;
+	*e++ = 17;		/* Boot path */
+	*e++ = 40;
+	e += 40;
+
+	*e++ = 57;		/* set extension buffer size for reply */ 
+	*e++ = 2;
+	*e++ = 1;		/* 128+236+8+20+14, see dhcpd sources */ 
+	*e++ = 150;
+
+	*e++ = 255;		/* End of the list */
+}
+
+
+/*
+ *  Initialize the DHCP/BOOTP mechanism.
+ */
+static inline void ic_bootp_init(void)
+{
+	int i;
+
+	for (i = 0; i < CONF_NAMESERVERS_MAX; i++)
+		ic_nameservers[i] = INADDR_NONE;
+
+	dev_add_pack(&bootp_packet_type);
+}
+
+
+/*
+ *  DHCP/BOOTP cleanup.
+ */
+static inline void ic_bootp_cleanup(void)
+{
+	dev_remove_pack(&bootp_packet_type);
+}
+
+
+/*
+ *  Send DHCP/BOOTP request to single interface.
+ */
+static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_diff)
+{
+	struct net_device *dev = d->dev;
+	struct sk_buff *skb;
+	struct bootp_pkt *b;
+	int hh_len = LL_RESERVED_SPACE(dev);
+	struct iphdr *h;
+
+	/* Allocate packet */
+	skb = alloc_skb(sizeof(struct bootp_pkt) + hh_len + 15, GFP_KERNEL);
+	if (!skb)
+		return;
+	skb_reserve(skb, hh_len);
+	b = (struct bootp_pkt *) skb_put(skb, sizeof(struct bootp_pkt));
+	memset(b, 0, sizeof(struct bootp_pkt));
+
+	/* Construct IP header */
+	skb->nh.iph = h = &b->iph;
+	h->version = 4;
+	h->ihl = 5;
+	h->tot_len = htons(sizeof(struct bootp_pkt));
+	h->frag_off = htons(IP_DF);
+	h->ttl = 64;
+	h->protocol = IPPROTO_UDP;
+	h->daddr = INADDR_BROADCAST;
+	h->check = ip_fast_csum((unsigned char *) h, h->ihl);
+
+	/* Construct UDP header */
+	b->udph.source = htons(68);
+	b->udph.dest = htons(67);
+	b->udph.len = htons(sizeof(struct bootp_pkt) - sizeof(struct iphdr));
+	/* UDP checksum not calculated -- explicitly allowed in BOOTP RFC */
+
+	/* Construct DHCP/BOOTP header */
+	b->op = BOOTP_REQUEST;
+	if (dev->type < 256) /* check for false types */
+		b->htype = dev->type;
+	else if (dev->type == ARPHRD_IEEE802_TR) /* fix for token ring */
+		b->htype = ARPHRD_IEEE802;
+	else if (dev->type == ARPHRD_FDDI)
+		b->htype = ARPHRD_ETHER;
+	else {
+		printk("Unknown ARP type 0x%04x for device %s\n", dev->type, dev->name);
+		b->htype = dev->type; /* can cause undefined behavior */
+	}
+	b->hlen = dev->addr_len;
+	b->your_ip = INADDR_NONE;
+	b->server_ip = INADDR_NONE;
+	memcpy(b->hw_addr, dev->dev_addr, dev->addr_len);
+	b->secs = htons(jiffies_diff / HZ);
+	b->xid = d->xid;
+
+	/* add DHCP options or BOOTP extensions */
+#ifdef IPCONFIG_DHCP
+	if (ic_proto_enabled & IC_USE_DHCP)
+		ic_dhcp_init_options(b->exten);
+	else
+#endif
+		ic_bootp_init_ext(b->exten);
+
+	/* Chain packet down the line... */
+	skb->dev = dev;
+	skb->protocol = htons(ETH_P_IP);
+	if ((dev->hard_header &&
+	     dev->hard_header(skb, dev, ntohs(skb->protocol), dev->broadcast, dev->dev_addr, skb->len) < 0) ||
+	    dev_queue_xmit(skb) < 0)
+		printk("E");
+}
+
+
+/*
+ *  Copy BOOTP-supplied string if not already set.
+ */
+static int __init ic_bootp_string(char *dest, char *src, int len, int max)
+{
+	if (!len)
+		return 0;
+	if (len > max-1)
+		len = max-1;
+	memcpy(dest, src, len);
+	dest[len] = '\0';
+	return 1;
+}
+
+
+/*
+ *  Process BOOTP extensions.
+ */
+static void __init ic_do_bootp_ext(u8 *ext)
+{
+       u8 servers;
+       int i;
+
+#ifdef IPCONFIG_DEBUG
+	u8 *c;
+
+	printk("DHCP/BOOTP: Got extension %d:",*ext);
+	for(c=ext+2; c<ext+2+ext[1]; c++)
+		printk(" %02x", *c);
+	printk("\n");
+#endif
+
+	switch (*ext++) {
+		case 1:		/* Subnet mask */
+			if (ic_netmask == INADDR_NONE)
+				memcpy(&ic_netmask, ext+1, 4);
+			break;
+		case 3:		/* Default gateway */
+			if (ic_gateway == INADDR_NONE)
+				memcpy(&ic_gateway, ext+1, 4);
+			break;
+		case 6:		/* DNS server */
+			servers= *ext/4;
+			if (servers > CONF_NAMESERVERS_MAX)
+				servers = CONF_NAMESERVERS_MAX;
+			for (i = 0; i < servers; i++) {
+				if (ic_nameservers[i] == INADDR_NONE)
+					memcpy(&ic_nameservers[i], ext+1+4*i, 4);
+			}
+			break;
+		case 12:	/* Host name */
+			ic_bootp_string(system_utsname.nodename, ext+1, *ext, __NEW_UTS_LEN);
+			ic_host_name_set = 1;
+			break;
+		case 15:	/* Domain name (DNS) */
+			ic_bootp_string(ic_domain, ext+1, *ext, sizeof(ic_domain));
+			break;
+		case 17:	/* Root path */
+			if (!root_server_path[0])
+				ic_bootp_string(root_server_path, ext+1, *ext, sizeof(root_server_path));
+			break;
+		case 40:	/* NIS Domain name (_not_ DNS) */
+			ic_bootp_string(system_utsname.domainname, ext+1, *ext, __NEW_UTS_LEN);
+			break;
+	}
+}
+
+
+/*
+ *  Receive BOOTP reply.
+ */
+static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
+{
+	struct bootp_pkt *b;
+	struct iphdr *h;
+	struct ic_device *d;
+	int len, ext_len;
+
+	/* Perform verifications before taking the lock.  */
+	if (skb->pkt_type == PACKET_OTHERHOST)
+		goto drop;
+
+	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
+		return NET_RX_DROP;
+
+	if (!pskb_may_pull(skb,
+			   sizeof(struct iphdr) +
+			   sizeof(struct udphdr)))
+		goto drop;
+
+	b = (struct bootp_pkt *) skb->nh.iph;
+	h = &b->iph;
+
+	if (h->ihl != 5 || h->version != 4 || h->protocol != IPPROTO_UDP)
+		goto drop;
+
+	/* Fragments are not supported */
+	if (h->frag_off & htons(IP_OFFSET | IP_MF)) {
+		if (net_ratelimit())
+			printk(KERN_ERR "DHCP/BOOTP: Ignoring fragmented "
+			       "reply.\n");
+		goto drop;
+	}
+
+	if (skb->len < ntohs(h->tot_len))
+		goto drop;
+
+	if (ip_fast_csum((char *) h, h->ihl))
+		goto drop;
+
+	if (b->udph.source != htons(67) || b->udph.dest != htons(68))
+		goto drop;
+
+	if (ntohs(h->tot_len) < ntohs(b->udph.len) + sizeof(struct iphdr))
+		goto drop;
+
+	len = ntohs(b->udph.len) - sizeof(struct udphdr);
+	ext_len = len - (sizeof(*b) -
+			 sizeof(struct iphdr) -
+			 sizeof(struct udphdr) -
+			 sizeof(b->exten));
+	if (ext_len < 0)
+		goto drop;
+
+	/* Ok the front looks good, make sure we can get at the rest.  */
+	if (!pskb_may_pull(skb, skb->len))
+		goto drop;
+
+	b = (struct bootp_pkt *) skb->nh.iph;
+	h = &b->iph;
+
+	/* One reply at a time, please. */
+	spin_lock(&ic_recv_lock);
+
+	/* If we already have a reply, just drop the packet */
+	if (ic_got_reply)
+		goto drop_unlock;
+
+	/* Find the ic_device that the packet arrived on */
+	d = ic_first_dev;
+	while (d && d->dev != dev)
+		d = d->next;
+	if (!d)
+		goto drop_unlock;  /* should never happen */
+
+	/* Is it a reply to our BOOTP request? */
+	if (b->op != BOOTP_REPLY ||
+	    b->xid != d->xid) {
+		if (net_ratelimit())
+			printk(KERN_ERR "DHCP/BOOTP: Reply not for us, "
+			       "op[%x] xid[%x]\n",
+			       b->op, b->xid);
+		goto drop_unlock;
+	}
+
+	/* Parse extensions */
+	if (ext_len >= 4 &&
+	    !memcmp(b->exten, ic_bootp_cookie, 4)) { /* Check magic cookie */
+                u8 *end = (u8 *) b + ntohs(b->iph.tot_len);
+		u8 *ext;
+
+#ifdef IPCONFIG_DHCP
+		if (ic_proto_enabled & IC_USE_DHCP) {
+			u32 server_id = INADDR_NONE;
+			int mt = 0;
+
+			ext = &b->exten[4];
+			while (ext < end && *ext != 0xff) {
+				u8 *opt = ext++;
+				if (*opt == 0)	/* Padding */
+					continue;
+				ext += *ext + 1;
+				if (ext >= end)
+					break;
+				switch (*opt) {
+				case 53:	/* Message type */
+					if (opt[1])
+						mt = opt[2];
+					break;
+				case 54:	/* Server ID (IP address) */
+					if (opt[1] >= 4)
+						memcpy(&server_id, opt + 2, 4);
+					break;
+				};
+			}
+
+#ifdef IPCONFIG_DEBUG
+			printk("DHCP: Got message type %d\n", mt);
+#endif
+
+			switch (mt) {
+			case DHCPOFFER:
+				/* While in the process of accepting one offer,
+				 * ignore all others.
+				 */
+				if (ic_myaddr != INADDR_NONE)
+					goto drop_unlock;
+
+				/* Let's accept that offer. */
+				ic_myaddr = b->your_ip;
+				ic_servaddr = server_id;
+#ifdef IPCONFIG_DEBUG
+				printk("DHCP: Offered address %u.%u.%u.%u",
+				       NIPQUAD(ic_myaddr));
+				printk(" by server %u.%u.%u.%u\n",
+				       NIPQUAD(ic_servaddr));
+#endif
+				/* The DHCP indicated server address takes
+				 * precedence over the bootp header one if
+				 * they are different.
+				 */
+				if ((server_id != INADDR_NONE) &&
+				    (b->server_ip != server_id))
+					b->server_ip = ic_servaddr;
+				break;
+
+			case DHCPACK:
+				if (memcmp(dev->dev_addr, b->hw_addr, dev->addr_len) != 0)
+					goto drop_unlock;
+
+				/* Yeah! */
+				break;
+
+			default:
+				/* Urque.  Forget it*/
+				ic_myaddr = INADDR_NONE;
+				ic_servaddr = INADDR_NONE;
+				goto drop_unlock;
+			};
+
+			ic_dhcp_msgtype = mt;
+
+		}
+#endif /* IPCONFIG_DHCP */
+
+		ext = &b->exten[4];
+		while (ext < end && *ext != 0xff) {
+			u8 *opt = ext++;
+			if (*opt == 0)	/* Padding */
+				continue;
+			ext += *ext + 1;
+			if (ext < end)
+				ic_do_bootp_ext(opt);
+		}
+	}
+
+	/* We have a winner! */
+	ic_dev = dev;
+	ic_myaddr = b->your_ip;
+	ic_servaddr = b->server_ip;
+	if (ic_gateway == INADDR_NONE && b->relay_ip)
+		ic_gateway = b->relay_ip;
+	if (ic_nameservers[0] == INADDR_NONE)
+		ic_nameservers[0] = ic_servaddr;
+	ic_got_reply = IC_BOOTP;
+
+drop_unlock:
+	/* Show's over.  Nothing to see here.  */
+	spin_unlock(&ic_recv_lock);
+
+drop:
+	/* Throw the packet out. */
+	kfree_skb(skb);
+
+	return 0;
+}	
+
+
+#endif
+
+
+/*
+ *	Dynamic IP configuration -- DHCP, BOOTP, RARP.
+ */
+
+#ifdef IPCONFIG_DYNAMIC
+
+static int __init ic_dynamic(void)
+{
+	int retries;
+	struct ic_device *d;
+	unsigned long start_jiffies, timeout, jiff;
+	int do_bootp = ic_proto_have_if & IC_BOOTP;
+	int do_rarp = ic_proto_have_if & IC_RARP;
+
+	/*
+	 * If none of DHCP/BOOTP/RARP was selected, return with an error.
+	 * This routine gets only called when some pieces of information
+	 * are missing, and without DHCP/BOOTP/RARP we are unable to get it.
+	 */
+	if (!ic_proto_enabled) {
+		printk(KERN_ERR "IP-Config: Incomplete network configuration information.\n");
+		return -1;
+	}
+
+#ifdef IPCONFIG_BOOTP
+	if ((ic_proto_enabled ^ ic_proto_have_if) & IC_BOOTP)
+		printk(KERN_ERR "DHCP/BOOTP: No suitable device found.\n");
+#endif
+#ifdef IPCONFIG_RARP
+	if ((ic_proto_enabled ^ ic_proto_have_if) & IC_RARP)
+		printk(KERN_ERR "RARP: No suitable device found.\n");
+#endif
+
+	if (!ic_proto_have_if)
+		/* Error message already printed */
+		return -1;
+
+	/*
+	 * Setup protocols
+	 */
+#ifdef IPCONFIG_BOOTP
+	if (do_bootp)
+		ic_bootp_init();
+#endif
+#ifdef IPCONFIG_RARP
+	if (do_rarp)
+		ic_rarp_init();
+#endif
+
+	/*
+	 * Send requests and wait, until we get an answer. This loop
+	 * seems to be a terrible waste of CPU time, but actually there is
+	 * only one process running at all, so we don't need to use any
+	 * scheduler functions.
+	 * [Actually we could now, but the nothing else running note still 
+	 *  applies.. - AC]
+	 */
+	printk(KERN_NOTICE "Sending %s%s%s requests .",
+	       do_bootp
+		? ((ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP") : "",
+	       (do_bootp && do_rarp) ? " and " : "",
+	       do_rarp ? "RARP" : "");
+
+	start_jiffies = jiffies;
+	d = ic_first_dev;
+	retries = CONF_SEND_RETRIES;
+	get_random_bytes(&timeout, sizeof(timeout));
+	timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned) CONF_TIMEOUT_RANDOM);
+	for(;;) {
+#ifdef IPCONFIG_BOOTP
+		if (do_bootp && (d->able & IC_BOOTP))
+			ic_bootp_send_if(d, jiffies - start_jiffies);
+#endif
+#ifdef IPCONFIG_RARP
+		if (do_rarp && (d->able & IC_RARP))
+			ic_rarp_send_if(d);
+#endif
+
+		jiff = jiffies + (d->next ? CONF_INTER_TIMEOUT : timeout);
+		while (time_before(jiffies, jiff) && !ic_got_reply) {
+			set_current_state(TASK_UNINTERRUPTIBLE);
+			schedule_timeout(1);
+		}
+#ifdef IPCONFIG_DHCP
+		/* DHCP isn't done until we get a DHCPACK. */
+		if ((ic_got_reply & IC_BOOTP)
+		    && (ic_proto_enabled & IC_USE_DHCP)
+		    && ic_dhcp_msgtype != DHCPACK)
+		{
+			ic_got_reply = 0;
+			printk(",");
+			continue;
+		}
+#endif /* IPCONFIG_DHCP */
+
+		if (ic_got_reply) {
+			printk(" OK\n");
+			break;
+		}
+
+		if ((d = d->next))
+			continue;
+
+		if (! --retries) {
+			printk(" timed out!\n");
+			break;
+		}
+
+		d = ic_first_dev;
+
+		timeout = timeout CONF_TIMEOUT_MULT;
+		if (timeout > CONF_TIMEOUT_MAX)
+			timeout = CONF_TIMEOUT_MAX;
+
+		printk(".");
+	}
+
+#ifdef IPCONFIG_BOOTP
+	if (do_bootp)
+		ic_bootp_cleanup();
+#endif
+#ifdef IPCONFIG_RARP
+	if (do_rarp)
+		ic_rarp_cleanup();
+#endif
+
+	if (!ic_got_reply)
+		return -1;
+
+	printk("IP-Config: Got %s answer from %u.%u.%u.%u, ",
+		((ic_got_reply & IC_RARP) ? "RARP" 
+		 : (ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP"),
+		NIPQUAD(ic_servaddr));
+	printk("my address is %u.%u.%u.%u\n", NIPQUAD(ic_myaddr));
+
+	return 0;
+}
+
+#endif /* IPCONFIG_DYNAMIC */
+
+#ifdef CONFIG_PROC_FS
+
+static int pnp_seq_show(struct seq_file *seq, void *v)
+{
+	int i;
+
+	if (ic_proto_used & IC_PROTO)
+		seq_printf(seq, "#PROTO: %s\n",
+			   (ic_proto_used & IC_RARP) ? "RARP"
+			   : (ic_proto_used & IC_USE_DHCP) ? "DHCP" : "BOOTP");
+	else
+		seq_puts(seq, "#MANUAL\n");
+
+	if (ic_domain[0])
+		seq_printf(seq,
+			   "domain %s\n", ic_domain);
+	for (i = 0; i < CONF_NAMESERVERS_MAX; i++) {
+		if (ic_nameservers[i] != INADDR_NONE)
+			seq_printf(seq,
+				   "nameserver %u.%u.%u.%u\n",
+				   NIPQUAD(ic_nameservers[i]));
+	}
+	if (ic_servaddr != INADDR_NONE)
+		seq_printf(seq,
+			   "bootserver %u.%u.%u.%u\n",
+			   NIPQUAD(ic_servaddr));
+	return 0;
+}
+
+static int pnp_seq_open(struct inode *indoe, struct file *file)
+{
+	return single_open(file, pnp_seq_show, NULL);
+}
+
+static struct file_operations pnp_seq_fops = {
+	.owner		= THIS_MODULE,
+	.open		= pnp_seq_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+#endif /* CONFIG_PROC_FS */
+
+/*
+ *  Extract IP address from the parameter string if needed. Note that we
+ *  need to have root_server_addr set _before_ IPConfig gets called as it
+ *  can override it.
+ */
+u32 __init root_nfs_parse_addr(char *name)
+{
+	u32 addr;
+	int octets = 0;
+	char *cp, *cq;
+
+	cp = cq = name;
+	while (octets < 4) {
+		while (*cp >= '0' && *cp <= '9')
+			cp++;
+		if (cp == cq || cp - cq > 3)
+			break;
+		if (*cp == '.' || octets == 3)
+			octets++;
+		if (octets < 4)
+			cp++;
+		cq = cp;
+	}
+	if (octets == 4 && (*cp == ':' || *cp == '\0')) {
+		if (*cp == ':')
+			*cp++ = '\0';
+		addr = in_aton(name);
+		memmove(name, cp, strlen(cp) + 1);
+	} else
+		addr = INADDR_NONE;
+
+	return addr;
+}
+
+/*
+ *	IP Autoconfig dispatcher.
+ */
+
+static int __init ip_auto_config(void)
+{
+	u32 addr;
+
+#ifdef CONFIG_PROC_FS
+	proc_net_fops_create("pnp", S_IRUGO, &pnp_seq_fops);
+#endif /* CONFIG_PROC_FS */
+
+	if (!ic_enable)
+		return 0;
+
+	DBG(("IP-Config: Entered.\n"));
+#ifdef IPCONFIG_DYNAMIC
+ try_try_again:
+#endif
+	/* Give hardware a chance to settle */
+	msleep(CONF_PRE_OPEN);
+
+	/* Setup all network devices */
+	if (ic_open_devs() < 0)
+		return -1;
+
+	/* Give drivers a chance to settle */
+	ssleep(CONF_POST_OPEN);
+
+	/*
+	 * If the config information is insufficient (e.g., our IP address or
+	 * IP address of the boot server is missing or we have multiple network
+	 * interfaces and no default was set), use BOOTP or RARP to get the
+	 * missing values.
+	 */
+	if (ic_myaddr == INADDR_NONE ||
+#ifdef CONFIG_ROOT_NFS
+	    (MAJOR(ROOT_DEV) == UNNAMED_MAJOR
+	     && root_server_addr == INADDR_NONE
+	     && ic_servaddr == INADDR_NONE) ||
+#endif
+	    ic_first_dev->next) {
+#ifdef IPCONFIG_DYNAMIC
+	
+		int retries = CONF_OPEN_RETRIES;
+
+		if (ic_dynamic() < 0) {
+			ic_close_devs();
+
+			/*
+			 * I don't know why, but sometimes the
+			 * eepro100 driver (at least) gets upset and
+			 * doesn't work the first time it's opened.
+			 * But then if you close it and reopen it, it
+			 * works just fine.  So we need to try that at
+			 * least once before giving up.
+			 *
+			 * Also, if the root will be NFS-mounted, we
+			 * have nowhere to go if DHCP fails.  So we
+			 * just have to keep trying forever.
+			 *
+			 * 				-- Chip
+			 */
+#ifdef CONFIG_ROOT_NFS
+			if (ROOT_DEV ==  Root_NFS) {
+				printk(KERN_ERR 
+					"IP-Config: Retrying forever (NFS root)...\n");
+				goto try_try_again;
+			}
+#endif
+
+			if (--retries) {
+				printk(KERN_ERR 
+				       "IP-Config: Reopening network devices...\n");
+				goto try_try_again;
+			}
+
+			/* Oh, well.  At least we tried. */
+			printk(KERN_ERR "IP-Config: Auto-configuration of network failed.\n");
+			return -1;
+		}
+#else /* !DYNAMIC */
+		printk(KERN_ERR "IP-Config: Incomplete network configuration information.\n");
+		ic_close_devs();
+		return -1;
+#endif /* IPCONFIG_DYNAMIC */
+	} else {
+		/* Device selected manually or only one device -> use it */
+		ic_dev = ic_first_dev->dev;
+	}
+
+	addr = root_nfs_parse_addr(root_server_path);
+	if (root_server_addr == INADDR_NONE)
+		root_server_addr = addr;
+
+	/*
+	 * Use defaults whereever applicable.
+	 */
+	if (ic_defaults() < 0)
+		return -1;
+
+	/*
+	 * Close all network devices except the device we've
+	 * autoconfigured and set up routes.
+	 */
+	ic_close_devs();
+	if (ic_setup_if() < 0 || ic_setup_routes() < 0)
+		return -1;
+
+	/*
+	 * Record which protocol was actually used.
+	 */
+#ifdef IPCONFIG_DYNAMIC
+	ic_proto_used = ic_got_reply | (ic_proto_enabled & IC_USE_DHCP);
+#endif
+
+#ifndef IPCONFIG_SILENT
+	/*
+	 * Clue in the operator.
+	 */
+	printk("IP-Config: Complete:");
+	printk("\n      device=%s", ic_dev->name);
+	printk(", addr=%u.%u.%u.%u", NIPQUAD(ic_myaddr));
+	printk(", mask=%u.%u.%u.%u", NIPQUAD(ic_netmask));
+	printk(", gw=%u.%u.%u.%u", NIPQUAD(ic_gateway));
+	printk(",\n     host=%s, domain=%s, nis-domain=%s",
+	       system_utsname.nodename, ic_domain, system_utsname.domainname);
+	printk(",\n     bootserver=%u.%u.%u.%u", NIPQUAD(ic_servaddr));
+	printk(", rootserver=%u.%u.%u.%u", NIPQUAD(root_server_addr));
+	printk(", rootpath=%s", root_server_path);
+	printk("\n");
+#endif /* !SILENT */
+
+	return 0;
+}
+
+late_initcall(ip_auto_config);
+
+
+/*
+ *  Decode any IP configuration options in the "ip=" or "nfsaddrs=" kernel
+ *  command line parameter. It consists of option fields separated by colons in
+ *  the following order:
+ *
+ *  <client-ip>:<server-ip>:<gw-ip>:<netmask>:<host name>:<device>:<PROTO>
+ *
+ *  Any of the fields can be empty which means to use a default value:
+ *	<client-ip>	- address given by BOOTP or RARP
+ *	<server-ip>	- address of host returning BOOTP or RARP packet
+ *	<gw-ip>		- none, or the address returned by BOOTP
+ *	<netmask>	- automatically determined from <client-ip>, or the
+ *			  one returned by BOOTP
+ *	<host name>	- <client-ip> in ASCII notation, or the name returned
+ *			  by BOOTP
+ *	<device>	- use all available devices
+ *	<PROTO>:
+ *	   off|none	    - don't do autoconfig at all (DEFAULT)
+ *	   on|any           - use any configured protocol
+ *	   dhcp|bootp|rarp  - use only the specified protocol
+ *	   both             - use both BOOTP and RARP (not DHCP)
+ */
+static int __init ic_proto_name(char *name)
+{
+	if (!strcmp(name, "on") || !strcmp(name, "any")) {
+		return 1;
+	}
+#ifdef CONFIG_IP_PNP_DHCP
+	else if (!strcmp(name, "dhcp")) {
+		ic_proto_enabled &= ~IC_RARP;
+		return 1;
+	}
+#endif
+#ifdef CONFIG_IP_PNP_BOOTP
+	else if (!strcmp(name, "bootp")) {
+		ic_proto_enabled &= ~(IC_RARP | IC_USE_DHCP);
+		return 1;
+	}
+#endif
+#ifdef CONFIG_IP_PNP_RARP
+	else if (!strcmp(name, "rarp")) {
+		ic_proto_enabled &= ~(IC_BOOTP | IC_USE_DHCP);
+		return 1;
+	}
+#endif
+#ifdef IPCONFIG_DYNAMIC
+	else if (!strcmp(name, "both")) {
+		ic_proto_enabled &= ~IC_USE_DHCP; /* backward compat :-( */
+		return 1;
+	}
+#endif
+	return 0;
+}
+
+static int __init ip_auto_config_setup(char *addrs)
+{
+	char *cp, *ip, *dp;
+	int num = 0;
+
+	ic_set_manually = 1;
+
+	ic_enable = (*addrs && 
+		(strcmp(addrs, "off") != 0) && 
+		(strcmp(addrs, "none") != 0));
+	if (!ic_enable)
+		return 1;
+
+	if (ic_proto_name(addrs))
+		return 1;
+
+	/* Parse the whole string */
+	ip = addrs;
+	while (ip && *ip) {
+		if ((cp = strchr(ip, ':')))
+			*cp++ = '\0';
+		if (strlen(ip) > 0) {
+			DBG(("IP-Config: Parameter #%d: `%s'\n", num, ip));
+			switch (num) {
+			case 0:
+				if ((ic_myaddr = in_aton(ip)) == INADDR_ANY)
+					ic_myaddr = INADDR_NONE;
+				break;
+			case 1:
+				if ((ic_servaddr = in_aton(ip)) == INADDR_ANY)
+					ic_servaddr = INADDR_NONE;
+				break;
+			case 2:
+				if ((ic_gateway = in_aton(ip)) == INADDR_ANY)
+					ic_gateway = INADDR_NONE;
+				break;
+			case 3:
+				if ((ic_netmask = in_aton(ip)) == INADDR_ANY)
+					ic_netmask = INADDR_NONE;
+				break;
+			case 4:
+				if ((dp = strchr(ip, '.'))) {
+					*dp++ = '\0';
+					strlcpy(system_utsname.domainname, dp,
+						sizeof(system_utsname.domainname));
+				}
+				strlcpy(system_utsname.nodename, ip,
+					sizeof(system_utsname.nodename));
+				ic_host_name_set = 1;
+				break;
+			case 5:
+				strlcpy(user_dev_name, ip, sizeof(user_dev_name));
+				break;
+			case 6:
+				ic_proto_name(ip);
+				break;
+			}
+		}
+		ip = cp;
+		num++;
+	}
+
+	return 1;
+}
+
+static int __init nfsaddrs_config_setup(char *addrs)
+{
+	return ip_auto_config_setup(addrs);
+}
+
+__setup("ip=", ip_auto_config_setup);
+__setup("nfsaddrs=", nfsaddrs_config_setup);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
new file mode 100644
index 00000000000..68a78731f72
--- /dev/null
+++ b/net/ipv4/ipip.c
@@ -0,0 +1,905 @@
+/*
+ *	Linux NET3:	IP/IP protocol decoder. 
+ *
+ *	Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
+ *
+ *	Authors:
+ *		Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
+ *
+ *	Fixes:
+ *		Alan Cox	:	Merged and made usable non modular (its so tiny its silly as
+ *					a module taking up 2 pages).
+ *		Alan Cox	: 	Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
+ *					to keep ip_forward happy.
+ *		Alan Cox	:	More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
+ *		Kai Schulte	:	Fixed #defines for IP_FIREWALL->FIREWALL
+ *              David Woodhouse :       Perform some basic ICMP handling.
+ *                                      IPIP Routing without decapsulation.
+ *              Carlos Picoto   :       GRE over IP support
+ *		Alexey Kuznetsov:	Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
+ *					I do not want to merge them together.
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ */
+
+/* tunnel.c: an IP tunnel driver
+
+	The purpose of this driver is to provide an IP tunnel through
+	which you can tunnel network traffic transparently across subnets.
+
+	This was written by looking at Nick Holloway's dummy driver
+	Thanks for the great code!
+
+		-Sam Lantinga	(slouken@cs.ucdavis.edu)  02/01/95
+		
+	Minor tweaks:
+		Cleaned up the code a little and added some pre-1.3.0 tweaks.
+		dev->hard_header/hard_header_len changed to use no headers.
+		Comments/bracketing tweaked.
+		Made the tunnels use dev->name not tunnel: when error reporting.
+		Added tx_dropped stat
+		
+		-Alan Cox	(Alan.Cox@linux.org) 21 March 95
+
+	Reworked:
+		Changed to tunnel to destination gateway in addition to the
+			tunnel's pointopoint address
+		Almost completely rewritten
+		Note:  There is currently no firewall or ICMP handling done.
+
+		-Sam Lantinga	(slouken@cs.ucdavis.edu) 02/13/96
+		
+*/
+
+/* Things I wish I had known when writing the tunnel driver:
+
+	When the tunnel_xmit() function is called, the skb contains the
+	packet to be sent (plus a great deal of extra info), and dev
+	contains the tunnel device that _we_ are.
+
+	When we are passed a packet, we are expected to fill in the
+	source address with our source IP address.
+
+	What is the proper way to allocate, copy and free a buffer?
+	After you allocate it, it is a "0 length" chunk of memory
+	starting at zero.  If you want to add headers to the buffer
+	later, you'll have to call "skb_reserve(skb, amount)" with
+	the amount of memory you want reserved.  Then, you call
+	"skb_put(skb, amount)" with the amount of space you want in
+	the buffer.  skb_put() returns a pointer to the top (#0) of
+	that buffer.  skb->len is set to the amount of space you have
+	"allocated" with skb_put().  You can then write up to skb->len
+	bytes to that buffer.  If you need more, you can call skb_put()
+	again with the additional amount of space you need.  You can
+	find out how much more space you can allocate by calling 
+	"skb_tailroom(skb)".
+	Now, to add header space, call "skb_push(skb, header_len)".
+	This creates space at the beginning of the buffer and returns
+	a pointer to this new space.  If later you need to strip a
+	header from a buffer, call "skb_pull(skb, header_len)".
+	skb_headroom() will return how much space is left at the top
+	of the buffer (before the main data).  Remember, this headroom
+	space must be reserved before the skb_put() function is called.
+	*/
+
+/*
+   This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
+
+   For comments look at net/ipv4/ip_gre.c --ANK
+ */
+
+ 
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <asm/uaccess.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/in.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/if_arp.h>
+#include <linux/mroute.h>
+#include <linux/init.h>
+#include <linux/netfilter_ipv4.h>
+
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/protocol.h>
+#include <net/ipip.h>
+#include <net/inet_ecn.h>
+#include <net/xfrm.h>
+
+#define HASH_SIZE  16
+#define HASH(addr) ((addr^(addr>>4))&0xF)
+
+static int ipip_fb_tunnel_init(struct net_device *dev);
+static int ipip_tunnel_init(struct net_device *dev);
+static void ipip_tunnel_setup(struct net_device *dev);
+
+static struct net_device *ipip_fb_tunnel_dev;
+
+static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
+static struct ip_tunnel *tunnels_r[HASH_SIZE];
+static struct ip_tunnel *tunnels_l[HASH_SIZE];
+static struct ip_tunnel *tunnels_wc[1];
+static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
+
+static DEFINE_RWLOCK(ipip_lock);
+
+static struct ip_tunnel * ipip_tunnel_lookup(u32 remote, u32 local)
+{
+	unsigned h0 = HASH(remote);
+	unsigned h1 = HASH(local);
+	struct ip_tunnel *t;
+
+	for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
+		if (local == t->parms.iph.saddr &&
+		    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
+			return t;
+	}
+	for (t = tunnels_r[h0]; t; t = t->next) {
+		if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
+			return t;
+	}
+	for (t = tunnels_l[h1]; t; t = t->next) {
+		if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
+			return t;
+	}
+	if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
+		return t;
+	return NULL;
+}
+
+static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
+{
+	u32 remote = t->parms.iph.daddr;
+	u32 local = t->parms.iph.saddr;
+	unsigned h = 0;
+	int prio = 0;
+
+	if (remote) {
+		prio |= 2;
+		h ^= HASH(remote);
+	}
+	if (local) {
+		prio |= 1;
+		h ^= HASH(local);
+	}
+	return &tunnels[prio][h];
+}
+
+
+static void ipip_tunnel_unlink(struct ip_tunnel *t)
+{
+	struct ip_tunnel **tp;
+
+	for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
+		if (t == *tp) {
+			write_lock_bh(&ipip_lock);
+			*tp = t->next;
+			write_unlock_bh(&ipip_lock);
+			break;
+		}
+	}
+}
+
+static void ipip_tunnel_link(struct ip_tunnel *t)
+{
+	struct ip_tunnel **tp = ipip_bucket(t);
+
+	t->next = *tp;
+	write_lock_bh(&ipip_lock);
+	*tp = t;
+	write_unlock_bh(&ipip_lock);
+}
+
+static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
+{
+	u32 remote = parms->iph.daddr;
+	u32 local = parms->iph.saddr;
+	struct ip_tunnel *t, **tp, *nt;
+	struct net_device *dev;
+	unsigned h = 0;
+	int prio = 0;
+	char name[IFNAMSIZ];
+
+	if (remote) {
+		prio |= 2;
+		h ^= HASH(remote);
+	}
+	if (local) {
+		prio |= 1;
+		h ^= HASH(local);
+	}
+	for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
+		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
+			return t;
+	}
+	if (!create)
+		return NULL;
+
+	if (parms->name[0])
+		strlcpy(name, parms->name, IFNAMSIZ);
+	else {
+		int i;
+		for (i=1; i<100; i++) {
+			sprintf(name, "tunl%d", i);
+			if (__dev_get_by_name(name) == NULL)
+				break;
+		}
+		if (i==100)
+			goto failed;
+	}
+
+	dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
+	if (dev == NULL)
+		return NULL;
+
+	nt = dev->priv;
+	SET_MODULE_OWNER(dev);
+	dev->init = ipip_tunnel_init;
+	nt->parms = *parms;
+
+	if (register_netdevice(dev) < 0) {
+		free_netdev(dev);
+		goto failed;
+	}
+
+	dev_hold(dev);
+	ipip_tunnel_link(nt);
+	/* Do not decrement MOD_USE_COUNT here. */
+	return nt;
+
+failed:
+	return NULL;
+}
+
+static void ipip_tunnel_uninit(struct net_device *dev)
+{
+	if (dev == ipip_fb_tunnel_dev) {
+		write_lock_bh(&ipip_lock);
+		tunnels_wc[0] = NULL;
+		write_unlock_bh(&ipip_lock);
+	} else
+		ipip_tunnel_unlink((struct ip_tunnel*)dev->priv);
+	dev_put(dev);
+}
+
+static void ipip_err(struct sk_buff *skb, void *__unused)
+{
+#ifndef I_WISH_WORLD_WERE_PERFECT
+
+/* It is not :-( All the routers (except for Linux) return only
+   8 bytes of packet payload. It means, that precise relaying of
+   ICMP in the real Internet is absolutely infeasible.
+ */
+	struct iphdr *iph = (struct iphdr*)skb->data;
+	int type = skb->h.icmph->type;
+	int code = skb->h.icmph->code;
+	struct ip_tunnel *t;
+
+	switch (type) {
+	default:
+	case ICMP_PARAMETERPROB:
+		return;
+
+	case ICMP_DEST_UNREACH:
+		switch (code) {
+		case ICMP_SR_FAILED:
+		case ICMP_PORT_UNREACH:
+			/* Impossible event. */
+			return;
+		case ICMP_FRAG_NEEDED:
+			/* Soft state for pmtu is maintained by IP core. */
+			return;
+		default:
+			/* All others are translated to HOST_UNREACH.
+			   rfc2003 contains "deep thoughts" about NET_UNREACH,
+			   I believe they are just ether pollution. --ANK
+			 */
+			break;
+		}
+		break;
+	case ICMP_TIME_EXCEEDED:
+		if (code != ICMP_EXC_TTL)
+			return;
+		break;
+	}
+
+	read_lock(&ipip_lock);
+	t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
+	if (t == NULL || t->parms.iph.daddr == 0)
+		goto out;
+	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
+		goto out;
+
+	if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
+		t->err_count++;
+	else
+		t->err_count = 1;
+	t->err_time = jiffies;
+out:
+	read_unlock(&ipip_lock);
+	return;
+#else
+	struct iphdr *iph = (struct iphdr*)dp;
+	int hlen = iph->ihl<<2;
+	struct iphdr *eiph;
+	int type = skb->h.icmph->type;
+	int code = skb->h.icmph->code;
+	int rel_type = 0;
+	int rel_code = 0;
+	int rel_info = 0;
+	struct sk_buff *skb2;
+	struct flowi fl;
+	struct rtable *rt;
+
+	if (len < hlen + sizeof(struct iphdr))
+		return;
+	eiph = (struct iphdr*)(dp + hlen);
+
+	switch (type) {
+	default:
+		return;
+	case ICMP_PARAMETERPROB:
+		if (skb->h.icmph->un.gateway < hlen)
+			return;
+
+		/* So... This guy found something strange INSIDE encapsulated
+		   packet. Well, he is fool, but what can we do ?
+		 */
+		rel_type = ICMP_PARAMETERPROB;
+		rel_info = skb->h.icmph->un.gateway - hlen;
+		break;
+
+	case ICMP_DEST_UNREACH:
+		switch (code) {
+		case ICMP_SR_FAILED:
+		case ICMP_PORT_UNREACH:
+			/* Impossible event. */
+			return;
+		case ICMP_FRAG_NEEDED:
+			/* And it is the only really necessary thing :-) */
+			rel_info = ntohs(skb->h.icmph->un.frag.mtu);
+			if (rel_info < hlen+68)
+				return;
+			rel_info -= hlen;
+			/* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
+			if (rel_info > ntohs(eiph->tot_len))
+				return;
+			break;
+		default:
+			/* All others are translated to HOST_UNREACH.
+			   rfc2003 contains "deep thoughts" about NET_UNREACH,
+			   I believe, it is just ether pollution. --ANK
+			 */
+			rel_type = ICMP_DEST_UNREACH;
+			rel_code = ICMP_HOST_UNREACH;
+			break;
+		}
+		break;
+	case ICMP_TIME_EXCEEDED:
+		if (code != ICMP_EXC_TTL)
+			return;
+		break;
+	}
+
+	/* Prepare fake skb to feed it to icmp_send */
+	skb2 = skb_clone(skb, GFP_ATOMIC);
+	if (skb2 == NULL)
+		return;
+	dst_release(skb2->dst);
+	skb2->dst = NULL;
+	skb_pull(skb2, skb->data - (u8*)eiph);
+	skb2->nh.raw = skb2->data;
+
+	/* Try to guess incoming interface */
+	memset(&fl, 0, sizeof(fl));
+	fl.fl4_daddr = eiph->saddr;
+	fl.fl4_tos = RT_TOS(eiph->tos);
+	fl.proto = IPPROTO_IPIP;
+	if (ip_route_output_key(&rt, &key)) {
+		kfree_skb(skb2);
+		return;
+	}
+	skb2->dev = rt->u.dst.dev;
+
+	/* route "incoming" packet */
+	if (rt->rt_flags&RTCF_LOCAL) {
+		ip_rt_put(rt);
+		rt = NULL;
+		fl.fl4_daddr = eiph->daddr;
+		fl.fl4_src = eiph->saddr;
+		fl.fl4_tos = eiph->tos;
+		if (ip_route_output_key(&rt, &fl) ||
+		    rt->u.dst.dev->type != ARPHRD_TUNNEL) {
+			ip_rt_put(rt);
+			kfree_skb(skb2);
+			return;
+		}
+	} else {
+		ip_rt_put(rt);
+		if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
+		    skb2->dst->dev->type != ARPHRD_TUNNEL) {
+			kfree_skb(skb2);
+			return;
+		}
+	}
+
+	/* change mtu on this route */
+	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
+		if (rel_info > dst_mtu(skb2->dst)) {
+			kfree_skb(skb2);
+			return;
+		}
+		skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
+		rel_info = htonl(rel_info);
+	} else if (type == ICMP_TIME_EXCEEDED) {
+		struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv;
+		if (t->parms.iph.ttl) {
+			rel_type = ICMP_DEST_UNREACH;
+			rel_code = ICMP_HOST_UNREACH;
+		}
+	}
+
+	icmp_send(skb2, rel_type, rel_code, rel_info);
+	kfree_skb(skb2);
+	return;
+#endif
+}
+
+static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff *skb)
+{
+	struct iphdr *inner_iph = skb->nh.iph;
+
+	if (INET_ECN_is_ce(outer_iph->tos))
+		IP_ECN_set_ce(inner_iph);
+}
+
+static int ipip_rcv(struct sk_buff *skb)
+{
+	struct iphdr *iph;
+	struct ip_tunnel *tunnel;
+
+	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+		goto out;
+
+	iph = skb->nh.iph;
+
+	read_lock(&ipip_lock);
+	if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
+		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+			read_unlock(&ipip_lock);
+			kfree_skb(skb);
+			return 0;
+		}
+
+		secpath_reset(skb);
+
+		skb->mac.raw = skb->nh.raw;
+		skb->nh.raw = skb->data;
+		memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
+		skb->protocol = htons(ETH_P_IP);
+		skb->pkt_type = PACKET_HOST;
+
+		tunnel->stat.rx_packets++;
+		tunnel->stat.rx_bytes += skb->len;
+		skb->dev = tunnel->dev;
+		dst_release(skb->dst);
+		skb->dst = NULL;
+		nf_reset(skb);
+		ipip_ecn_decapsulate(iph, skb);
+		netif_rx(skb);
+		read_unlock(&ipip_lock);
+		return 0;
+	}
+	read_unlock(&ipip_lock);
+
+out:
+	return -1;
+}
+
+/*
+ *	This function assumes it is being called from dev_queue_xmit()
+ *	and that skb is filled properly by that function.
+ */
+
+static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
+	struct net_device_stats *stats = &tunnel->stat;
+	struct iphdr  *tiph = &tunnel->parms.iph;
+	u8     tos = tunnel->parms.iph.tos;
+	u16    df = tiph->frag_off;
+	struct rtable *rt;     			/* Route to the other host */
+	struct net_device *tdev;			/* Device to other host */
+	struct iphdr  *old_iph = skb->nh.iph;
+	struct iphdr  *iph;			/* Our new IP header */
+	int    max_headroom;			/* The extra header space needed */
+	u32    dst = tiph->daddr;
+	int    mtu;
+
+	if (tunnel->recursion++) {
+		tunnel->stat.collisions++;
+		goto tx_error;
+	}
+
+	if (skb->protocol != htons(ETH_P_IP))
+		goto tx_error;
+
+	if (tos&1)
+		tos = old_iph->tos;
+
+	if (!dst) {
+		/* NBMA tunnel */
+		if ((rt = (struct rtable*)skb->dst) == NULL) {
+			tunnel->stat.tx_fifo_errors++;
+			goto tx_error;
+		}
+		if ((dst = rt->rt_gateway) == 0)
+			goto tx_error_icmp;
+	}
+
+	{
+		struct flowi fl = { .oif = tunnel->parms.link,
+				    .nl_u = { .ip4_u =
+					      { .daddr = dst,
+						.saddr = tiph->saddr,
+						.tos = RT_TOS(tos) } },
+				    .proto = IPPROTO_IPIP };
+		if (ip_route_output_key(&rt, &fl)) {
+			tunnel->stat.tx_carrier_errors++;
+			goto tx_error_icmp;
+		}
+	}
+	tdev = rt->u.dst.dev;
+
+	if (tdev == dev) {
+		ip_rt_put(rt);
+		tunnel->stat.collisions++;
+		goto tx_error;
+	}
+
+	if (tiph->frag_off)
+		mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
+	else
+		mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
+
+	if (mtu < 68) {
+		tunnel->stat.collisions++;
+		ip_rt_put(rt);
+		goto tx_error;
+	}
+	if (skb->dst)
+		skb->dst->ops->update_pmtu(skb->dst, mtu);
+
+	df |= (old_iph->frag_off&htons(IP_DF));
+
+	if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
+		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
+		ip_rt_put(rt);
+		goto tx_error;
+	}
+
+	if (tunnel->err_count > 0) {
+		if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
+			tunnel->err_count--;
+			dst_link_failure(skb);
+		} else
+			tunnel->err_count = 0;
+	}
+
+	/*
+	 * Okay, now see if we can stuff it in the buffer as-is.
+	 */
+	max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
+
+	if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
+		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
+		if (!new_skb) {
+			ip_rt_put(rt);
+  			stats->tx_dropped++;
+			dev_kfree_skb(skb);
+			tunnel->recursion--;
+			return 0;
+		}
+		if (skb->sk)
+			skb_set_owner_w(new_skb, skb->sk);
+		dev_kfree_skb(skb);
+		skb = new_skb;
+		old_iph = skb->nh.iph;
+	}
+
+	skb->h.raw = skb->nh.raw;
+	skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	/*
+	 *	Push down and install the IPIP header.
+	 */
+
+	iph 			=	skb->nh.iph;
+	iph->version		=	4;
+	iph->ihl		=	sizeof(struct iphdr)>>2;
+	iph->frag_off		=	df;
+	iph->protocol		=	IPPROTO_IPIP;
+	iph->tos		=	INET_ECN_encapsulate(tos, old_iph->tos);
+	iph->daddr		=	rt->rt_dst;
+	iph->saddr		=	rt->rt_src;
+
+	if ((iph->ttl = tiph->ttl) == 0)
+		iph->ttl	=	old_iph->ttl;
+
+	nf_reset(skb);
+
+	IPTUNNEL_XMIT();
+	tunnel->recursion--;
+	return 0;
+
+tx_error_icmp:
+	dst_link_failure(skb);
+tx_error:
+	stats->tx_errors++;
+	dev_kfree_skb(skb);
+	tunnel->recursion--;
+	return 0;
+}
+
+static int
+ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+	int err = 0;
+	struct ip_tunnel_parm p;
+	struct ip_tunnel *t;
+
+	switch (cmd) {
+	case SIOCGETTUNNEL:
+		t = NULL;
+		if (dev == ipip_fb_tunnel_dev) {
+			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
+				err = -EFAULT;
+				break;
+			}
+			t = ipip_tunnel_locate(&p, 0);
+		}
+		if (t == NULL)
+			t = (struct ip_tunnel*)dev->priv;
+		memcpy(&p, &t->parms, sizeof(p));
+		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+			err = -EFAULT;
+		break;
+
+	case SIOCADDTUNNEL:
+	case SIOCCHGTUNNEL:
+		err = -EPERM;
+		if (!capable(CAP_NET_ADMIN))
+			goto done;
+
+		err = -EFAULT;
+		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+			goto done;
+
+		err = -EINVAL;
+		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
+		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
+			goto done;
+		if (p.iph.ttl)
+			p.iph.frag_off |= htons(IP_DF);
+
+		t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
+
+		if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
+			if (t != NULL) {
+				if (t->dev != dev) {
+					err = -EEXIST;
+					break;
+				}
+			} else {
+				if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
+				    (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
+					err = -EINVAL;
+					break;
+				}
+				t = (struct ip_tunnel*)dev->priv;
+				ipip_tunnel_unlink(t);
+				t->parms.iph.saddr = p.iph.saddr;
+				t->parms.iph.daddr = p.iph.daddr;
+				memcpy(dev->dev_addr, &p.iph.saddr, 4);
+				memcpy(dev->broadcast, &p.iph.daddr, 4);
+				ipip_tunnel_link(t);
+				netdev_state_change(dev);
+			}
+		}
+
+		if (t) {
+			err = 0;
+			if (cmd == SIOCCHGTUNNEL) {
+				t->parms.iph.ttl = p.iph.ttl;
+				t->parms.iph.tos = p.iph.tos;
+				t->parms.iph.frag_off = p.iph.frag_off;
+			}
+			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
+				err = -EFAULT;
+		} else
+			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
+		break;
+
+	case SIOCDELTUNNEL:
+		err = -EPERM;
+		if (!capable(CAP_NET_ADMIN))
+			goto done;
+
+		if (dev == ipip_fb_tunnel_dev) {
+			err = -EFAULT;
+			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+				goto done;
+			err = -ENOENT;
+			if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
+				goto done;
+			err = -EPERM;
+			if (t->dev == ipip_fb_tunnel_dev)
+				goto done;
+			dev = t->dev;
+		}
+		err = unregister_netdevice(dev);
+		break;
+
+	default:
+		err = -EINVAL;
+	}
+
+done:
+	return err;
+}
+
+static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
+{
+	return &(((struct ip_tunnel*)dev->priv)->stat);
+}
+
+static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
+{
+	if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
+		return -EINVAL;
+	dev->mtu = new_mtu;
+	return 0;
+}
+
+static void ipip_tunnel_setup(struct net_device *dev)
+{
+	SET_MODULE_OWNER(dev);
+	dev->uninit		= ipip_tunnel_uninit;
+	dev->hard_start_xmit	= ipip_tunnel_xmit;
+	dev->get_stats		= ipip_tunnel_get_stats;
+	dev->do_ioctl		= ipip_tunnel_ioctl;
+	dev->change_mtu		= ipip_tunnel_change_mtu;
+	dev->destructor		= free_netdev;
+
+	dev->type		= ARPHRD_TUNNEL;
+	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr);
+	dev->mtu		= 1500 - sizeof(struct iphdr);
+	dev->flags		= IFF_NOARP;
+	dev->iflink		= 0;
+	dev->addr_len		= 4;
+}
+
+static int ipip_tunnel_init(struct net_device *dev)
+{
+	struct net_device *tdev = NULL;
+	struct ip_tunnel *tunnel;
+	struct iphdr *iph;
+
+	tunnel = (struct ip_tunnel*)dev->priv;
+	iph = &tunnel->parms.iph;
+
+	tunnel->dev = dev;
+	strcpy(tunnel->parms.name, dev->name);
+
+	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
+	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
+
+	if (iph->daddr) {
+		struct flowi fl = { .oif = tunnel->parms.link,
+				    .nl_u = { .ip4_u =
+					      { .daddr = iph->daddr,
+						.saddr = iph->saddr,
+						.tos = RT_TOS(iph->tos) } },
+				    .proto = IPPROTO_IPIP };
+		struct rtable *rt;
+		if (!ip_route_output_key(&rt, &fl)) {
+			tdev = rt->u.dst.dev;
+			ip_rt_put(rt);
+		}
+		dev->flags |= IFF_POINTOPOINT;
+	}
+
+	if (!tdev && tunnel->parms.link)
+		tdev = __dev_get_by_index(tunnel->parms.link);
+
+	if (tdev) {
+		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
+		dev->mtu = tdev->mtu - sizeof(struct iphdr);
+	}
+	dev->iflink = tunnel->parms.link;
+
+	return 0;
+}
+
+static int __init ipip_fb_tunnel_init(struct net_device *dev)
+{
+	struct ip_tunnel *tunnel = dev->priv;
+	struct iphdr *iph = &tunnel->parms.iph;
+
+	tunnel->dev = dev;
+	strcpy(tunnel->parms.name, dev->name);
+
+	iph->version		= 4;
+	iph->protocol		= IPPROTO_IPIP;
+	iph->ihl		= 5;
+
+	dev_hold(dev);
+	tunnels_wc[0]		= tunnel;
+	return 0;
+}
+
+static struct xfrm_tunnel ipip_handler = {
+	.handler	=	ipip_rcv,
+	.err_handler	=	ipip_err,
+};
+
+static char banner[] __initdata =
+	KERN_INFO "IPv4 over IPv4 tunneling driver\n";
+
+static int __init ipip_init(void)
+{
+	int err;
+
+	printk(banner);
+
+	if (xfrm4_tunnel_register(&ipip_handler) < 0) {
+		printk(KERN_INFO "ipip init: can't register tunnel\n");
+		return -EAGAIN;
+	}
+
+	ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
+					   "tunl0",
+					   ipip_tunnel_setup);
+	if (!ipip_fb_tunnel_dev) {
+		err = -ENOMEM;
+		goto err1;
+	}
+
+	ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
+
+	if ((err = register_netdev(ipip_fb_tunnel_dev)))
+		goto err2;
+ out:
+	return err;
+ err2:
+	free_netdev(ipip_fb_tunnel_dev);
+ err1:
+	xfrm4_tunnel_deregister(&ipip_handler);
+	goto out;
+}
+
+static void __exit ipip_fini(void)
+{
+	if (xfrm4_tunnel_deregister(&ipip_handler) < 0)
+		printk(KERN_INFO "ipip close: can't deregister tunnel\n");
+
+	unregister_netdev(ipip_fb_tunnel_dev);
+}
+
+module_init(ipip_init);
+module_exit(ipip_fini);
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
new file mode 100644
index 00000000000..e21c049ec62
--- /dev/null
+++ b/net/ipv4/ipmr.c
@@ -0,0 +1,1900 @@
+/*
+ *	IP multicast routing support for mrouted 3.6/3.8
+ *
+ *		(c) 1995 Alan Cox, <alan@redhat.com>
+ *	  Linux Consultancy and Custom Driver Development
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ *	Version: $Id: ipmr.c,v 1.65 2001/10/31 21:55:54 davem Exp $
+ *
+ *	Fixes:
+ *	Michael Chastain	:	Incorrect size of copying.
+ *	Alan Cox		:	Added the cache manager code
+ *	Alan Cox		:	Fixed the clone/copy bug and device race.
+ *	Mike McLagan		:	Routing by source
+ *	Malcolm Beattie		:	Buffer handling fixes.
+ *	Alexey Kuznetsov	:	Double buffer free and other fixes.
+ *	SVR Anand		:	Fixed several multicast bugs and problems.
+ *	Alexey Kuznetsov	:	Status, optimisations and more.
+ *	Brad Parker		:	Better behaviour on mrouted upcall
+ *					overflow.
+ *      Carlos Picoto           :       PIMv1 Support
+ *	Pavlin Ivanov Radoslavov:	PIMv2 Registers must checksum only PIM header
+ *					Relax this requrement to work with older peers.
+ *
+ */
+
+#include <linux/config.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/fcntl.h>
+#include <linux/stat.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/igmp.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/mroute.h>
+#include <linux/init.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/raw.h>
+#include <linux/notifier.h>
+#include <linux/if_arp.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/ipip.h>
+#include <net/checksum.h>
+
+#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
+#define CONFIG_IP_PIMSM	1
+#endif
+
+static struct sock *mroute_socket;
+
+
+/* Big lock, protecting vif table, mrt cache and mroute socket state.
+   Note that the changes are semaphored via rtnl_lock.
+ */
+
+static DEFINE_RWLOCK(mrt_lock);
+
+/*
+ *	Multicast router control variables
+ */
+
+static struct vif_device vif_table[MAXVIFS];		/* Devices 		*/
+static int maxvif;
+
+#define VIF_EXISTS(idx) (vif_table[idx].dev != NULL)
+
+static int mroute_do_assert;				/* Set in PIM assert	*/
+static int mroute_do_pim;
+
+static struct mfc_cache *mfc_cache_array[MFC_LINES];	/* Forwarding cache	*/
+
+static struct mfc_cache *mfc_unres_queue;		/* Queue of unresolved entries */
+static atomic_t cache_resolve_queue_len;		/* Size of unresolved	*/
+
+/* Special spinlock for queue of unresolved entries */
+static DEFINE_SPINLOCK(mfc_unres_lock);
+
+/* We return to original Alan's scheme. Hash table of resolved
+   entries is changed only in process context and protected
+   with weak lock mrt_lock. Queue of unresolved entries is protected
+   with strong spinlock mfc_unres_lock.
+
+   In this case data path is free of exclusive locks at all.
+ */
+
+static kmem_cache_t *mrt_cachep;
+
+static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
+static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
+static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
+
+#ifdef CONFIG_IP_PIMSM_V2
+static struct net_protocol pim_protocol;
+#endif
+
+static struct timer_list ipmr_expire_timer;
+
+/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
+
+static
+struct net_device *ipmr_new_tunnel(struct vifctl *v)
+{
+	struct net_device  *dev;
+
+	dev = __dev_get_by_name("tunl0");
+
+	if (dev) {
+		int err;
+		struct ifreq ifr;
+		mm_segment_t	oldfs;
+		struct ip_tunnel_parm p;
+		struct in_device  *in_dev;
+
+		memset(&p, 0, sizeof(p));
+		p.iph.daddr = v->vifc_rmt_addr.s_addr;
+		p.iph.saddr = v->vifc_lcl_addr.s_addr;
+		p.iph.version = 4;
+		p.iph.ihl = 5;
+		p.iph.protocol = IPPROTO_IPIP;
+		sprintf(p.name, "dvmrp%d", v->vifc_vifi);
+		ifr.ifr_ifru.ifru_data = (void*)&p;
+
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
+		set_fs(oldfs);
+
+		dev = NULL;
+
+		if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) {
+			dev->flags |= IFF_MULTICAST;
+
+			in_dev = __in_dev_get(dev);
+			if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL)
+				goto failure;
+			in_dev->cnf.rp_filter = 0;
+
+			if (dev_open(dev))
+				goto failure;
+		}
+	}
+	return dev;
+
+failure:
+	/* allow the register to be completed before unregistering. */
+	rtnl_unlock();
+	rtnl_lock();
+
+	unregister_netdevice(dev);
+	return NULL;
+}
+
+#ifdef CONFIG_IP_PIMSM
+
+static int reg_vif_num = -1;
+
+static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	read_lock(&mrt_lock);
+	((struct net_device_stats*)dev->priv)->tx_bytes += skb->len;
+	((struct net_device_stats*)dev->priv)->tx_packets++;
+	ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
+	read_unlock(&mrt_lock);
+	kfree_skb(skb);
+	return 0;
+}
+
+static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
+{
+	return (struct net_device_stats*)dev->priv;
+}
+
+static void reg_vif_setup(struct net_device *dev)
+{
+	dev->type		= ARPHRD_PIMREG;
+	dev->mtu		= 1500 - sizeof(struct iphdr) - 8;
+	dev->flags		= IFF_NOARP;
+	dev->hard_start_xmit	= reg_vif_xmit;
+	dev->get_stats		= reg_vif_get_stats;
+	dev->destructor		= free_netdev;
+}
+
+static struct net_device *ipmr_reg_vif(void)
+{
+	struct net_device *dev;
+	struct in_device *in_dev;
+
+	dev = alloc_netdev(sizeof(struct net_device_stats), "pimreg",
+			   reg_vif_setup);
+
+	if (dev == NULL)
+		return NULL;
+
+	if (register_netdevice(dev)) {
+		free_netdev(dev);
+		return NULL;
+	}
+	dev->iflink = 0;
+
+	if ((in_dev = inetdev_init(dev)) == NULL)
+		goto failure;
+
+	in_dev->cnf.rp_filter = 0;
+
+	if (dev_open(dev))
+		goto failure;
+
+	return dev;
+
+failure:
+	/* allow the register to be completed before unregistering. */
+	rtnl_unlock();
+	rtnl_lock();
+
+	unregister_netdevice(dev);
+	return NULL;
+}
+#endif
+
+/*
+ *	Delete a VIF entry
+ */
+ 
+static int vif_delete(int vifi)
+{
+	struct vif_device *v;
+	struct net_device *dev;
+	struct in_device *in_dev;
+
+	if (vifi < 0 || vifi >= maxvif)
+		return -EADDRNOTAVAIL;
+
+	v = &vif_table[vifi];
+
+	write_lock_bh(&mrt_lock);
+	dev = v->dev;
+	v->dev = NULL;
+
+	if (!dev) {
+		write_unlock_bh(&mrt_lock);
+		return -EADDRNOTAVAIL;
+	}
+
+#ifdef CONFIG_IP_PIMSM
+	if (vifi == reg_vif_num)
+		reg_vif_num = -1;
+#endif
+
+	if (vifi+1 == maxvif) {
+		int tmp;
+		for (tmp=vifi-1; tmp>=0; tmp--) {
+			if (VIF_EXISTS(tmp))
+				break;
+		}
+		maxvif = tmp+1;
+	}
+
+	write_unlock_bh(&mrt_lock);
+
+	dev_set_allmulti(dev, -1);
+
+	if ((in_dev = __in_dev_get(dev)) != NULL) {
+		in_dev->cnf.mc_forwarding--;
+		ip_rt_multicast_event(in_dev);
+	}
+
+	if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
+		unregister_netdevice(dev);
+
+	dev_put(dev);
+	return 0;
+}
+
+/* Destroy an unresolved cache entry, killing queued skbs
+   and reporting error to netlink readers.
+ */
+
+static void ipmr_destroy_unres(struct mfc_cache *c)
+{
+	struct sk_buff *skb;
+
+	atomic_dec(&cache_resolve_queue_len);
+
+	while((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
+		if (skb->nh.iph->version == 0) {
+			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
+			nlh->nlmsg_type = NLMSG_ERROR;
+			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
+			skb_trim(skb, nlh->nlmsg_len);
+			((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
+			netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
+		} else
+			kfree_skb(skb);
+	}
+
+	kmem_cache_free(mrt_cachep, c);
+}
+
+
+/* Single timer process for all the unresolved queue. */
+
+static void ipmr_expire_process(unsigned long dummy)
+{
+	unsigned long now;
+	unsigned long expires;
+	struct mfc_cache *c, **cp;
+
+	if (!spin_trylock(&mfc_unres_lock)) {
+		mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
+		return;
+	}
+
+	if (atomic_read(&cache_resolve_queue_len) == 0)
+		goto out;
+
+	now = jiffies;
+	expires = 10*HZ;
+	cp = &mfc_unres_queue;
+
+	while ((c=*cp) != NULL) {
+		if (time_after(c->mfc_un.unres.expires, now)) {
+			unsigned long interval = c->mfc_un.unres.expires - now;
+			if (interval < expires)
+				expires = interval;
+			cp = &c->next;
+			continue;
+		}
+
+		*cp = c->next;
+
+		ipmr_destroy_unres(c);
+	}
+
+	if (atomic_read(&cache_resolve_queue_len))
+		mod_timer(&ipmr_expire_timer, jiffies + expires);
+
+out:
+	spin_unlock(&mfc_unres_lock);
+}
+
+/* Fill oifs list. It is called under write locked mrt_lock. */
+
+static void ipmr_update_threshoulds(struct mfc_cache *cache, unsigned char *ttls)
+{
+	int vifi;
+
+	cache->mfc_un.res.minvif = MAXVIFS;
+	cache->mfc_un.res.maxvif = 0;
+	memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
+
+	for (vifi=0; vifi<maxvif; vifi++) {
+		if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
+			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
+			if (cache->mfc_un.res.minvif > vifi)
+				cache->mfc_un.res.minvif = vifi;
+			if (cache->mfc_un.res.maxvif <= vifi)
+				cache->mfc_un.res.maxvif = vifi + 1;
+		}
+	}
+}
+
+static int vif_add(struct vifctl *vifc, int mrtsock)
+{
+	int vifi = vifc->vifc_vifi;
+	struct vif_device *v = &vif_table[vifi];
+	struct net_device *dev;
+	struct in_device *in_dev;
+
+	/* Is vif busy ? */
+	if (VIF_EXISTS(vifi))
+		return -EADDRINUSE;
+
+	switch (vifc->vifc_flags) {
+#ifdef CONFIG_IP_PIMSM
+	case VIFF_REGISTER:
+		/*
+		 * Special Purpose VIF in PIM
+		 * All the packets will be sent to the daemon
+		 */
+		if (reg_vif_num >= 0)
+			return -EADDRINUSE;
+		dev = ipmr_reg_vif();
+		if (!dev)
+			return -ENOBUFS;
+		break;
+#endif
+	case VIFF_TUNNEL:	
+		dev = ipmr_new_tunnel(vifc);
+		if (!dev)
+			return -ENOBUFS;
+		break;
+	case 0:
+		dev=ip_dev_find(vifc->vifc_lcl_addr.s_addr);
+		if (!dev)
+			return -EADDRNOTAVAIL;
+		__dev_put(dev);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if ((in_dev = __in_dev_get(dev)) == NULL)
+		return -EADDRNOTAVAIL;
+	in_dev->cnf.mc_forwarding++;
+	dev_set_allmulti(dev, +1);
+	ip_rt_multicast_event(in_dev);
+
+	/*
+	 *	Fill in the VIF structures
+	 */
+	v->rate_limit=vifc->vifc_rate_limit;
+	v->local=vifc->vifc_lcl_addr.s_addr;
+	v->remote=vifc->vifc_rmt_addr.s_addr;
+	v->flags=vifc->vifc_flags;
+	if (!mrtsock)
+		v->flags |= VIFF_STATIC;
+	v->threshold=vifc->vifc_threshold;
+	v->bytes_in = 0;
+	v->bytes_out = 0;
+	v->pkt_in = 0;
+	v->pkt_out = 0;
+	v->link = dev->ifindex;
+	if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
+		v->link = dev->iflink;
+
+	/* And finish update writing critical data */
+	write_lock_bh(&mrt_lock);
+	dev_hold(dev);
+	v->dev=dev;
+#ifdef CONFIG_IP_PIMSM
+	if (v->flags&VIFF_REGISTER)
+		reg_vif_num = vifi;
+#endif
+	if (vifi+1 > maxvif)
+		maxvif = vifi+1;
+	write_unlock_bh(&mrt_lock);
+	return 0;
+}
+
+static struct mfc_cache *ipmr_cache_find(__u32 origin, __u32 mcastgrp)
+{
+	int line=MFC_HASH(mcastgrp,origin);
+	struct mfc_cache *c;
+
+	for (c=mfc_cache_array[line]; c; c = c->next) {
+		if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
+			break;
+	}
+	return c;
+}
+
+/*
+ *	Allocate a multicast cache entry
+ */
+static struct mfc_cache *ipmr_cache_alloc(void)
+{
+	struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_KERNEL);
+	if(c==NULL)
+		return NULL;
+	memset(c, 0, sizeof(*c));
+	c->mfc_un.res.minvif = MAXVIFS;
+	return c;
+}
+
+static struct mfc_cache *ipmr_cache_alloc_unres(void)
+{
+	struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_ATOMIC);
+	if(c==NULL)
+		return NULL;
+	memset(c, 0, sizeof(*c));
+	skb_queue_head_init(&c->mfc_un.unres.unresolved);
+	c->mfc_un.unres.expires = jiffies + 10*HZ;
+	return c;
+}
+
+/*
+ *	A cache entry has gone into a resolved state from queued
+ */
+ 
+static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
+{
+	struct sk_buff *skb;
+
+	/*
+	 *	Play the pending entries through our router
+	 */
+
+	while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
+		if (skb->nh.iph->version == 0) {
+			int err;
+			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
+
+			if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
+				nlh->nlmsg_len = skb->tail - (u8*)nlh;
+			} else {
+				nlh->nlmsg_type = NLMSG_ERROR;
+				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
+				skb_trim(skb, nlh->nlmsg_len);
+				((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -EMSGSIZE;
+			}
+			err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
+		} else
+			ip_mr_forward(skb, c, 0);
+	}
+}
+
+/*
+ *	Bounce a cache query up to mrouted. We could use netlink for this but mrouted
+ *	expects the following bizarre scheme.
+ *
+ *	Called under mrt_lock.
+ */
+ 
+static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
+{
+	struct sk_buff *skb;
+	int ihl = pkt->nh.iph->ihl<<2;
+	struct igmphdr *igmp;
+	struct igmpmsg *msg;
+	int ret;
+
+#ifdef CONFIG_IP_PIMSM
+	if (assert == IGMPMSG_WHOLEPKT)
+		skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
+	else
+#endif
+		skb = alloc_skb(128, GFP_ATOMIC);
+
+	if(!skb)
+		return -ENOBUFS;
+
+#ifdef CONFIG_IP_PIMSM
+	if (assert == IGMPMSG_WHOLEPKT) {
+		/* Ugly, but we have no choice with this interface.
+		   Duplicate old header, fix ihl, length etc.
+		   And all this only to mangle msg->im_msgtype and
+		   to set msg->im_mbz to "mbz" :-)
+		 */
+		msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr));
+		skb->nh.raw = skb->h.raw = (u8*)msg;
+		memcpy(msg, pkt->nh.raw, sizeof(struct iphdr));
+		msg->im_msgtype = IGMPMSG_WHOLEPKT;
+		msg->im_mbz = 0;
+ 		msg->im_vif = reg_vif_num;
+		skb->nh.iph->ihl = sizeof(struct iphdr) >> 2;
+		skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr));
+	} else 
+#endif
+	{	
+		
+	/*
+	 *	Copy the IP header
+	 */
+
+	skb->nh.iph = (struct iphdr *)skb_put(skb, ihl);
+	memcpy(skb->data,pkt->data,ihl);
+	skb->nh.iph->protocol = 0;			/* Flag to the kernel this is a route add */
+	msg = (struct igmpmsg*)skb->nh.iph;
+	msg->im_vif = vifi;
+	skb->dst = dst_clone(pkt->dst);
+
+	/*
+	 *	Add our header
+	 */
+
+	igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr));
+	igmp->type	=
+	msg->im_msgtype = assert;
+	igmp->code 	=	0;
+	skb->nh.iph->tot_len=htons(skb->len);			/* Fix the length */
+	skb->h.raw = skb->nh.raw;
+        }
+
+	if (mroute_socket == NULL) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	/*
+	 *	Deliver to mrouted
+	 */
+	if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) {
+		if (net_ratelimit())
+			printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
+		kfree_skb(skb);
+	}
+
+	return ret;
+}
+
+/*
+ *	Queue a packet for resolution. It gets locked cache entry!
+ */
+ 
+static int
+ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
+{
+	int err;
+	struct mfc_cache *c;
+
+	spin_lock_bh(&mfc_unres_lock);
+	for (c=mfc_unres_queue; c; c=c->next) {
+		if (c->mfc_mcastgrp == skb->nh.iph->daddr &&
+		    c->mfc_origin == skb->nh.iph->saddr)
+			break;
+	}
+
+	if (c == NULL) {
+		/*
+		 *	Create a new entry if allowable
+		 */
+
+		if (atomic_read(&cache_resolve_queue_len)>=10 ||
+		    (c=ipmr_cache_alloc_unres())==NULL) {
+			spin_unlock_bh(&mfc_unres_lock);
+
+			kfree_skb(skb);
+			return -ENOBUFS;
+		}
+
+		/*
+		 *	Fill in the new cache entry
+		 */
+		c->mfc_parent=-1;
+		c->mfc_origin=skb->nh.iph->saddr;
+		c->mfc_mcastgrp=skb->nh.iph->daddr;
+
+		/*
+		 *	Reflect first query at mrouted.
+		 */
+		if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
+			/* If the report failed throw the cache entry 
+			   out - Brad Parker
+			 */
+			spin_unlock_bh(&mfc_unres_lock);
+
+			kmem_cache_free(mrt_cachep, c);
+			kfree_skb(skb);
+			return err;
+		}
+
+		atomic_inc(&cache_resolve_queue_len);
+		c->next = mfc_unres_queue;
+		mfc_unres_queue = c;
+
+		mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
+	}
+
+	/*
+	 *	See if we can append the packet
+	 */
+	if (c->mfc_un.unres.unresolved.qlen>3) {
+		kfree_skb(skb);
+		err = -ENOBUFS;
+	} else {
+		skb_queue_tail(&c->mfc_un.unres.unresolved,skb);
+		err = 0;
+	}
+
+	spin_unlock_bh(&mfc_unres_lock);
+	return err;
+}
+
+/*
+ *	MFC cache manipulation by user space mroute daemon
+ */
+
+static int ipmr_mfc_delete(struct mfcctl *mfc)
+{
+	int line;
+	struct mfc_cache *c, **cp;
+
+	line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
+
+	for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
+		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
+		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
+			write_lock_bh(&mrt_lock);
+			*cp = c->next;
+			write_unlock_bh(&mrt_lock);
+
+			kmem_cache_free(mrt_cachep, c);
+			return 0;
+		}
+	}
+	return -ENOENT;
+}
+
+static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
+{
+	int line;
+	struct mfc_cache *uc, *c, **cp;
+
+	line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
+
+	for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
+		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
+		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
+			break;
+	}
+
+	if (c != NULL) {
+		write_lock_bh(&mrt_lock);
+		c->mfc_parent = mfc->mfcc_parent;
+		ipmr_update_threshoulds(c, mfc->mfcc_ttls);
+		if (!mrtsock)
+			c->mfc_flags |= MFC_STATIC;
+		write_unlock_bh(&mrt_lock);
+		return 0;
+	}
+
+	if(!MULTICAST(mfc->mfcc_mcastgrp.s_addr))
+		return -EINVAL;
+
+	c=ipmr_cache_alloc();
+	if (c==NULL)
+		return -ENOMEM;
+
+	c->mfc_origin=mfc->mfcc_origin.s_addr;
+	c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
+	c->mfc_parent=mfc->mfcc_parent;
+	ipmr_update_threshoulds(c, mfc->mfcc_ttls);
+	if (!mrtsock)
+		c->mfc_flags |= MFC_STATIC;
+
+	write_lock_bh(&mrt_lock);
+	c->next = mfc_cache_array[line];
+	mfc_cache_array[line] = c;
+	write_unlock_bh(&mrt_lock);
+
+	/*
+	 *	Check to see if we resolved a queued list. If so we
+	 *	need to send on the frames and tidy up.
+	 */
+	spin_lock_bh(&mfc_unres_lock);
+	for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
+	     cp = &uc->next) {
+		if (uc->mfc_origin == c->mfc_origin &&
+		    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
+			*cp = uc->next;
+			if (atomic_dec_and_test(&cache_resolve_queue_len))
+				del_timer(&ipmr_expire_timer);
+			break;
+		}
+	}
+	spin_unlock_bh(&mfc_unres_lock);
+
+	if (uc) {
+		ipmr_cache_resolve(uc, c);
+		kmem_cache_free(mrt_cachep, uc);
+	}
+	return 0;
+}
+
+/*
+ *	Close the multicast socket, and clear the vif tables etc
+ */
+ 
+static void mroute_clean_tables(struct sock *sk)
+{
+	int i;
+		
+	/*
+	 *	Shut down all active vif entries
+	 */
+	for(i=0; i<maxvif; i++) {
+		if (!(vif_table[i].flags&VIFF_STATIC))
+			vif_delete(i);
+	}
+
+	/*
+	 *	Wipe the cache
+	 */
+	for (i=0;i<MFC_LINES;i++) {
+		struct mfc_cache *c, **cp;
+
+		cp = &mfc_cache_array[i];
+		while ((c = *cp) != NULL) {
+			if (c->mfc_flags&MFC_STATIC) {
+				cp = &c->next;
+				continue;
+			}
+			write_lock_bh(&mrt_lock);
+			*cp = c->next;
+			write_unlock_bh(&mrt_lock);
+
+			kmem_cache_free(mrt_cachep, c);
+		}
+	}
+
+	if (atomic_read(&cache_resolve_queue_len) != 0) {
+		struct mfc_cache *c;
+
+		spin_lock_bh(&mfc_unres_lock);
+		while (mfc_unres_queue != NULL) {
+			c = mfc_unres_queue;
+			mfc_unres_queue = c->next;
+			spin_unlock_bh(&mfc_unres_lock);
+
+			ipmr_destroy_unres(c);
+
+			spin_lock_bh(&mfc_unres_lock);
+		}
+		spin_unlock_bh(&mfc_unres_lock);
+	}
+}
+
+static void mrtsock_destruct(struct sock *sk)
+{
+	rtnl_lock();
+	if (sk == mroute_socket) {
+		ipv4_devconf.mc_forwarding--;
+
+		write_lock_bh(&mrt_lock);
+		mroute_socket=NULL;
+		write_unlock_bh(&mrt_lock);
+
+		mroute_clean_tables(sk);
+	}
+	rtnl_unlock();
+}
+
+/*
+ *	Socket options and virtual interface manipulation. The whole
+ *	virtual interface system is a complete heap, but unfortunately
+ *	that's how BSD mrouted happens to think. Maybe one day with a proper
+ *	MOSPF/PIM router set up we can clean this up.
+ */
+ 
+int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int optlen)
+{
+	int ret;
+	struct vifctl vif;
+	struct mfcctl mfc;
+	
+	if(optname!=MRT_INIT)
+	{
+		if(sk!=mroute_socket && !capable(CAP_NET_ADMIN))
+			return -EACCES;
+	}
+
+	switch(optname)
+	{
+		case MRT_INIT:
+			if (sk->sk_type != SOCK_RAW ||
+			    inet_sk(sk)->num != IPPROTO_IGMP)
+				return -EOPNOTSUPP;
+			if(optlen!=sizeof(int))
+				return -ENOPROTOOPT;
+
+			rtnl_lock();
+			if (mroute_socket) {
+				rtnl_unlock();
+				return -EADDRINUSE;
+			}
+
+			ret = ip_ra_control(sk, 1, mrtsock_destruct);
+			if (ret == 0) {
+				write_lock_bh(&mrt_lock);
+				mroute_socket=sk;
+				write_unlock_bh(&mrt_lock);
+
+				ipv4_devconf.mc_forwarding++;
+			}
+			rtnl_unlock();
+			return ret;
+		case MRT_DONE:
+			if (sk!=mroute_socket)
+				return -EACCES;
+			return ip_ra_control(sk, 0, NULL);
+		case MRT_ADD_VIF:
+		case MRT_DEL_VIF:
+			if(optlen!=sizeof(vif))
+				return -EINVAL;
+			if (copy_from_user(&vif,optval,sizeof(vif)))
+				return -EFAULT; 
+			if(vif.vifc_vifi >= MAXVIFS)
+				return -ENFILE;
+			rtnl_lock();
+			if (optname==MRT_ADD_VIF) {
+				ret = vif_add(&vif, sk==mroute_socket);
+			} else {
+				ret = vif_delete(vif.vifc_vifi);
+			}
+			rtnl_unlock();
+			return ret;
+
+		/*
+		 *	Manipulate the forwarding caches. These live
+		 *	in a sort of kernel/user symbiosis.
+		 */
+		case MRT_ADD_MFC:
+		case MRT_DEL_MFC:
+			if(optlen!=sizeof(mfc))
+				return -EINVAL;
+			if (copy_from_user(&mfc,optval, sizeof(mfc)))
+				return -EFAULT;
+			rtnl_lock();
+			if (optname==MRT_DEL_MFC)
+				ret = ipmr_mfc_delete(&mfc);
+			else
+				ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
+			rtnl_unlock();
+			return ret;
+		/*
+		 *	Control PIM assert.
+		 */
+		case MRT_ASSERT:
+		{
+			int v;
+			if(get_user(v,(int __user *)optval))
+				return -EFAULT;
+			mroute_do_assert=(v)?1:0;
+			return 0;
+		}
+#ifdef CONFIG_IP_PIMSM
+		case MRT_PIM:
+		{
+			int v, ret;
+			if(get_user(v,(int __user *)optval))
+				return -EFAULT;
+			v = (v)?1:0;
+			rtnl_lock();
+			ret = 0;
+			if (v != mroute_do_pim) {
+				mroute_do_pim = v;
+				mroute_do_assert = v;
+#ifdef CONFIG_IP_PIMSM_V2
+				if (mroute_do_pim)
+					ret = inet_add_protocol(&pim_protocol,
+								IPPROTO_PIM);
+				else
+					ret = inet_del_protocol(&pim_protocol,
+								IPPROTO_PIM);
+				if (ret < 0)
+					ret = -EAGAIN;
+#endif
+			}
+			rtnl_unlock();
+			return ret;
+		}
+#endif
+		/*
+		 *	Spurious command, or MRT_VERSION which you cannot
+		 *	set.
+		 */
+		default:
+			return -ENOPROTOOPT;
+	}
+}
+
+/*
+ *	Getsock opt support for the multicast routing system.
+ */
+ 
+int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __user *optlen)
+{
+	int olr;
+	int val;
+
+	if(optname!=MRT_VERSION && 
+#ifdef CONFIG_IP_PIMSM
+	   optname!=MRT_PIM &&
+#endif
+	   optname!=MRT_ASSERT)
+		return -ENOPROTOOPT;
+
+	if (get_user(olr, optlen))
+		return -EFAULT;
+
+	olr = min_t(unsigned int, olr, sizeof(int));
+	if (olr < 0)
+		return -EINVAL;
+		
+	if(put_user(olr,optlen))
+		return -EFAULT;
+	if(optname==MRT_VERSION)
+		val=0x0305;
+#ifdef CONFIG_IP_PIMSM
+	else if(optname==MRT_PIM)
+		val=mroute_do_pim;
+#endif
+	else
+		val=mroute_do_assert;
+	if(copy_to_user(optval,&val,olr))
+		return -EFAULT;
+	return 0;
+}
+
+/*
+ *	The IP multicast ioctl support routines.
+ */
+ 
+int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
+{
+	struct sioc_sg_req sr;
+	struct sioc_vif_req vr;
+	struct vif_device *vif;
+	struct mfc_cache *c;
+	
+	switch(cmd)
+	{
+		case SIOCGETVIFCNT:
+			if (copy_from_user(&vr,arg,sizeof(vr)))
+				return -EFAULT; 
+			if(vr.vifi>=maxvif)
+				return -EINVAL;
+			read_lock(&mrt_lock);
+			vif=&vif_table[vr.vifi];
+			if(VIF_EXISTS(vr.vifi))	{
+				vr.icount=vif->pkt_in;
+				vr.ocount=vif->pkt_out;
+				vr.ibytes=vif->bytes_in;
+				vr.obytes=vif->bytes_out;
+				read_unlock(&mrt_lock);
+
+				if (copy_to_user(arg,&vr,sizeof(vr)))
+					return -EFAULT;
+				return 0;
+			}
+			read_unlock(&mrt_lock);
+			return -EADDRNOTAVAIL;
+		case SIOCGETSGCNT:
+			if (copy_from_user(&sr,arg,sizeof(sr)))
+				return -EFAULT;
+
+			read_lock(&mrt_lock);
+			c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
+			if (c) {
+				sr.pktcnt = c->mfc_un.res.pkt;
+				sr.bytecnt = c->mfc_un.res.bytes;
+				sr.wrong_if = c->mfc_un.res.wrong_if;
+				read_unlock(&mrt_lock);
+
+				if (copy_to_user(arg,&sr,sizeof(sr)))
+					return -EFAULT;
+				return 0;
+			}
+			read_unlock(&mrt_lock);
+			return -EADDRNOTAVAIL;
+		default:
+			return -ENOIOCTLCMD;
+	}
+}
+
+
+static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	struct vif_device *v;
+	int ct;
+	if (event != NETDEV_UNREGISTER)
+		return NOTIFY_DONE;
+	v=&vif_table[0];
+	for(ct=0;ct<maxvif;ct++,v++) {
+		if (v->dev==ptr)
+			vif_delete(ct);
+	}
+	return NOTIFY_DONE;
+}
+
+
+static struct notifier_block ip_mr_notifier={
+	.notifier_call = ipmr_device_event,
+};
+
+/*
+ * 	Encapsulate a packet by attaching a valid IPIP header to it.
+ *	This avoids tunnel drivers and other mess and gives us the speed so
+ *	important for multicast video.
+ */
+ 
+static void ip_encap(struct sk_buff *skb, u32 saddr, u32 daddr)
+{
+	struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr));
+
+	iph->version	= 	4;
+	iph->tos	=	skb->nh.iph->tos;
+	iph->ttl	=	skb->nh.iph->ttl;
+	iph->frag_off	=	0;
+	iph->daddr	=	daddr;
+	iph->saddr	=	saddr;
+	iph->protocol	=	IPPROTO_IPIP;
+	iph->ihl	=	5;
+	iph->tot_len	=	htons(skb->len);
+	ip_select_ident(iph, skb->dst, NULL);
+	ip_send_check(iph);
+
+	skb->h.ipiph = skb->nh.iph;
+	skb->nh.iph = iph;
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+	nf_reset(skb);
+}
+
+static inline int ipmr_forward_finish(struct sk_buff *skb)
+{
+	struct ip_options * opt	= &(IPCB(skb)->opt);
+
+	IP_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
+
+	if (unlikely(opt->optlen))
+		ip_forward_options(skb);
+
+	return dst_output(skb);
+}
+
+/*
+ *	Processing handlers for ipmr_forward
+ */
+
+static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
+{
+	struct iphdr *iph = skb->nh.iph;
+	struct vif_device *vif = &vif_table[vifi];
+	struct net_device *dev;
+	struct rtable *rt;
+	int    encap = 0;
+
+	if (vif->dev == NULL)
+		goto out_free;
+
+#ifdef CONFIG_IP_PIMSM
+	if (vif->flags & VIFF_REGISTER) {
+		vif->pkt_out++;
+		vif->bytes_out+=skb->len;
+		((struct net_device_stats*)vif->dev->priv)->tx_bytes += skb->len;
+		((struct net_device_stats*)vif->dev->priv)->tx_packets++;
+		ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
+		kfree_skb(skb);
+		return;
+	}
+#endif
+
+	if (vif->flags&VIFF_TUNNEL) {
+		struct flowi fl = { .oif = vif->link,
+				    .nl_u = { .ip4_u =
+					      { .daddr = vif->remote,
+						.saddr = vif->local,
+						.tos = RT_TOS(iph->tos) } },
+				    .proto = IPPROTO_IPIP };
+		if (ip_route_output_key(&rt, &fl))
+			goto out_free;
+		encap = sizeof(struct iphdr);
+	} else {
+		struct flowi fl = { .oif = vif->link,
+				    .nl_u = { .ip4_u =
+					      { .daddr = iph->daddr,
+						.tos = RT_TOS(iph->tos) } },
+				    .proto = IPPROTO_IPIP };
+		if (ip_route_output_key(&rt, &fl))
+			goto out_free;
+	}
+
+	dev = rt->u.dst.dev;
+
+	if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
+		/* Do not fragment multicasts. Alas, IPv4 does not
+		   allow to send ICMP, so that packets will disappear
+		   to blackhole.
+		 */
+
+		IP_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS);
+		ip_rt_put(rt);
+		goto out_free;
+	}
+
+	encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
+
+	if (skb_cow(skb, encap)) {
+ 		ip_rt_put(rt);
+		goto out_free;
+	}
+
+	vif->pkt_out++;
+	vif->bytes_out+=skb->len;
+
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+	iph = skb->nh.iph;
+	ip_decrease_ttl(iph);
+
+	/* FIXME: forward and output firewalls used to be called here.
+	 * What do we do with netfilter? -- RR */
+	if (vif->flags & VIFF_TUNNEL) {
+		ip_encap(skb, vif->local, vif->remote);
+		/* FIXME: extra output firewall step used to be here. --RR */
+		((struct ip_tunnel *)vif->dev->priv)->stat.tx_packets++;
+		((struct ip_tunnel *)vif->dev->priv)->stat.tx_bytes+=skb->len;
+	}
+
+	IPCB(skb)->flags |= IPSKB_FORWARDED;
+
+	/*
+	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
+	 * not only before forwarding, but after forwarding on all output
+	 * interfaces. It is clear, if mrouter runs a multicasting
+	 * program, it should receive packets not depending to what interface
+	 * program is joined.
+	 * If we will not make it, the program will have to join on all
+	 * interfaces. On the other hand, multihoming host (or router, but
+	 * not mrouter) cannot join to more than one interface - it will
+	 * result in receiving multiple packets.
+	 */
+	NF_HOOK(PF_INET, NF_IP_FORWARD, skb, skb->dev, dev, 
+		ipmr_forward_finish);
+	return;
+
+out_free:
+	kfree_skb(skb);
+	return;
+}
+
+static int ipmr_find_vif(struct net_device *dev)
+{
+	int ct;
+	for (ct=maxvif-1; ct>=0; ct--) {
+		if (vif_table[ct].dev == dev)
+			break;
+	}
+	return ct;
+}
+
+/* "local" means that we should preserve one skb (for local delivery) */
+
+static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
+{
+	int psend = -1;
+	int vif, ct;
+
+	vif = cache->mfc_parent;
+	cache->mfc_un.res.pkt++;
+	cache->mfc_un.res.bytes += skb->len;
+
+	/*
+	 * Wrong interface: drop packet and (maybe) send PIM assert.
+	 */
+	if (vif_table[vif].dev != skb->dev) {
+		int true_vifi;
+
+		if (((struct rtable*)skb->dst)->fl.iif == 0) {
+			/* It is our own packet, looped back.
+			   Very complicated situation...
+
+			   The best workaround until routing daemons will be
+			   fixed is not to redistribute packet, if it was
+			   send through wrong interface. It means, that
+			   multicast applications WILL NOT work for
+			   (S,G), which have default multicast route pointing
+			   to wrong oif. In any case, it is not a good
+			   idea to use multicasting applications on router.
+			 */
+			goto dont_forward;
+		}
+
+		cache->mfc_un.res.wrong_if++;
+		true_vifi = ipmr_find_vif(skb->dev);
+
+		if (true_vifi >= 0 && mroute_do_assert &&
+		    /* pimsm uses asserts, when switching from RPT to SPT,
+		       so that we cannot check that packet arrived on an oif.
+		       It is bad, but otherwise we would need to move pretty
+		       large chunk of pimd to kernel. Ough... --ANK
+		     */
+		    (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
+		    time_after(jiffies, 
+			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
+			cache->mfc_un.res.last_assert = jiffies;
+			ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
+		}
+		goto dont_forward;
+	}
+
+	vif_table[vif].pkt_in++;
+	vif_table[vif].bytes_in+=skb->len;
+
+	/*
+	 *	Forward the frame
+	 */
+	for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
+		if (skb->nh.iph->ttl > cache->mfc_un.res.ttls[ct]) {
+			if (psend != -1) {
+				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+				if (skb2)
+					ipmr_queue_xmit(skb2, cache, psend);
+			}
+			psend=ct;
+		}
+	}
+	if (psend != -1) {
+		if (local) {
+			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+			if (skb2)
+				ipmr_queue_xmit(skb2, cache, psend);
+		} else {
+			ipmr_queue_xmit(skb, cache, psend);
+			return 0;
+		}
+	}
+
+dont_forward:
+	if (!local)
+		kfree_skb(skb);
+	return 0;
+}
+
+
+/*
+ *	Multicast packets for forwarding arrive here
+ */
+
+int ip_mr_input(struct sk_buff *skb)
+{
+	struct mfc_cache *cache;
+	int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL;
+
+	/* Packet is looped back after forward, it should not be
+	   forwarded second time, but still can be delivered locally.
+	 */
+	if (IPCB(skb)->flags&IPSKB_FORWARDED)
+		goto dont_forward;
+
+	if (!local) {
+		    if (IPCB(skb)->opt.router_alert) {
+			    if (ip_call_ra_chain(skb))
+				    return 0;
+		    } else if (skb->nh.iph->protocol == IPPROTO_IGMP){
+			    /* IGMPv1 (and broken IGMPv2 implementations sort of
+			       Cisco IOS <= 11.2(8)) do not put router alert
+			       option to IGMP packets destined to routable
+			       groups. It is very bad, because it means
+			       that we can forward NO IGMP messages.
+			     */
+			    read_lock(&mrt_lock);
+			    if (mroute_socket) {
+				    raw_rcv(mroute_socket, skb);
+				    read_unlock(&mrt_lock);
+				    return 0;
+			    }
+			    read_unlock(&mrt_lock);
+		    }
+	}
+
+	read_lock(&mrt_lock);
+	cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr);
+
+	/*
+	 *	No usable cache entry
+	 */
+	if (cache==NULL) {
+		int vif;
+
+		if (local) {
+			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+			ip_local_deliver(skb);
+			if (skb2 == NULL) {
+				read_unlock(&mrt_lock);
+				return -ENOBUFS;
+			}
+			skb = skb2;
+		}
+
+		vif = ipmr_find_vif(skb->dev);
+		if (vif >= 0) {
+			int err = ipmr_cache_unresolved(vif, skb);
+			read_unlock(&mrt_lock);
+
+			return err;
+		}
+		read_unlock(&mrt_lock);
+		kfree_skb(skb);
+		return -ENODEV;
+	}
+
+	ip_mr_forward(skb, cache, local);
+
+	read_unlock(&mrt_lock);
+
+	if (local)
+		return ip_local_deliver(skb);
+
+	return 0;
+
+dont_forward:
+	if (local)
+		return ip_local_deliver(skb);
+	kfree_skb(skb);
+	return 0;
+}
+
+#ifdef CONFIG_IP_PIMSM_V1
+/*
+ * Handle IGMP messages of PIMv1
+ */
+
+int pim_rcv_v1(struct sk_buff * skb)
+{
+	struct igmphdr *pim;
+	struct iphdr   *encap;
+	struct net_device  *reg_dev = NULL;
+
+	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) 
+		goto drop;
+
+	pim = (struct igmphdr*)skb->h.raw;
+
+        if (!mroute_do_pim ||
+	    skb->len < sizeof(*pim) + sizeof(*encap) ||
+	    pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) 
+		goto drop;
+
+	encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr));
+	/*
+	   Check that:
+	   a. packet is really destinted to a multicast group
+	   b. packet is not a NULL-REGISTER
+	   c. packet is not truncated
+	 */
+	if (!MULTICAST(encap->daddr) ||
+	    encap->tot_len == 0 ||
+	    ntohs(encap->tot_len) + sizeof(*pim) > skb->len) 
+		goto drop;
+
+	read_lock(&mrt_lock);
+	if (reg_vif_num >= 0)
+		reg_dev = vif_table[reg_vif_num].dev;
+	if (reg_dev)
+		dev_hold(reg_dev);
+	read_unlock(&mrt_lock);
+
+	if (reg_dev == NULL) 
+		goto drop;
+
+	skb->mac.raw = skb->nh.raw;
+	skb_pull(skb, (u8*)encap - skb->data);
+	skb->nh.iph = (struct iphdr *)skb->data;
+	skb->dev = reg_dev;
+	memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
+	skb->protocol = htons(ETH_P_IP);
+	skb->ip_summed = 0;
+	skb->pkt_type = PACKET_HOST;
+	dst_release(skb->dst);
+	skb->dst = NULL;
+	((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
+	((struct net_device_stats*)reg_dev->priv)->rx_packets++;
+	nf_reset(skb);
+	netif_rx(skb);
+	dev_put(reg_dev);
+	return 0;
+ drop:
+	kfree_skb(skb);
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_IP_PIMSM_V2
+static int pim_rcv(struct sk_buff * skb)
+{
+	struct pimreghdr *pim;
+	struct iphdr   *encap;
+	struct net_device  *reg_dev = NULL;
+
+	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) 
+		goto drop;
+
+	pim = (struct pimreghdr*)skb->h.raw;
+        if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
+	    (pim->flags&PIM_NULL_REGISTER) ||
+	    (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && 
+	     (u16)csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 
+		goto drop;
+
+	/* check if the inner packet is destined to mcast group */
+	encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr));
+	if (!MULTICAST(encap->daddr) ||
+	    encap->tot_len == 0 ||
+	    ntohs(encap->tot_len) + sizeof(*pim) > skb->len) 
+		goto drop;
+
+	read_lock(&mrt_lock);
+	if (reg_vif_num >= 0)
+		reg_dev = vif_table[reg_vif_num].dev;
+	if (reg_dev)
+		dev_hold(reg_dev);
+	read_unlock(&mrt_lock);
+
+	if (reg_dev == NULL) 
+		goto drop;
+
+	skb->mac.raw = skb->nh.raw;
+	skb_pull(skb, (u8*)encap - skb->data);
+	skb->nh.iph = (struct iphdr *)skb->data;
+	skb->dev = reg_dev;
+	memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
+	skb->protocol = htons(ETH_P_IP);
+	skb->ip_summed = 0;
+	skb->pkt_type = PACKET_HOST;
+	dst_release(skb->dst);
+	((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
+	((struct net_device_stats*)reg_dev->priv)->rx_packets++;
+	skb->dst = NULL;
+	nf_reset(skb);
+	netif_rx(skb);
+	dev_put(reg_dev);
+	return 0;
+ drop:
+	kfree_skb(skb);
+	return 0;
+}
+#endif
+
+static int
+ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
+{
+	int ct;
+	struct rtnexthop *nhp;
+	struct net_device *dev = vif_table[c->mfc_parent].dev;
+	u8 *b = skb->tail;
+	struct rtattr *mp_head;
+
+	if (dev)
+		RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
+
+	mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0));
+
+	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
+		if (c->mfc_un.res.ttls[ct] < 255) {
+			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
+				goto rtattr_failure;
+			nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
+			nhp->rtnh_flags = 0;
+			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
+			nhp->rtnh_ifindex = vif_table[ct].dev->ifindex;
+			nhp->rtnh_len = sizeof(*nhp);
+		}
+	}
+	mp_head->rta_type = RTA_MULTIPATH;
+	mp_head->rta_len = skb->tail - (u8*)mp_head;
+	rtm->rtm_type = RTN_MULTICAST;
+	return 1;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -EMSGSIZE;
+}
+
+int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
+{
+	int err;
+	struct mfc_cache *cache;
+	struct rtable *rt = (struct rtable*)skb->dst;
+
+	read_lock(&mrt_lock);
+	cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
+
+	if (cache==NULL) {
+		struct net_device *dev;
+		int vif;
+
+		if (nowait) {
+			read_unlock(&mrt_lock);
+			return -EAGAIN;
+		}
+
+		dev = skb->dev;
+		if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
+			read_unlock(&mrt_lock);
+			return -ENODEV;
+		}
+		skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
+		skb->nh.iph->ihl = sizeof(struct iphdr)>>2;
+		skb->nh.iph->saddr = rt->rt_src;
+		skb->nh.iph->daddr = rt->rt_dst;
+		skb->nh.iph->version = 0;
+		err = ipmr_cache_unresolved(vif, skb);
+		read_unlock(&mrt_lock);
+		return err;
+	}
+
+	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
+		cache->mfc_flags |= MFC_NOTIFY;
+	err = ipmr_fill_mroute(skb, cache, rtm);
+	read_unlock(&mrt_lock);
+	return err;
+}
+
+#ifdef CONFIG_PROC_FS	
+/*
+ *	The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
+ */
+struct ipmr_vif_iter {
+	int ct;
+};
+
+static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
+					   loff_t pos)
+{
+	for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
+		if(!VIF_EXISTS(iter->ct))
+			continue;
+		if (pos-- == 0) 
+			return &vif_table[iter->ct];
+	}
+	return NULL;
+}
+
+static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	read_lock(&mrt_lock);
+	return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1) 
+		: SEQ_START_TOKEN;
+}
+
+static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct ipmr_vif_iter *iter = seq->private;
+
+	++*pos;
+	if (v == SEQ_START_TOKEN)
+		return ipmr_vif_seq_idx(iter, 0);
+	
+	while (++iter->ct < maxvif) {
+		if(!VIF_EXISTS(iter->ct))
+			continue;
+		return &vif_table[iter->ct];
+	}
+	return NULL;
+}
+
+static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
+{
+	read_unlock(&mrt_lock);
+}
+
+static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(seq, 
+			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
+	} else {
+		const struct vif_device *vif = v;
+		const char *name =  vif->dev ? vif->dev->name : "none";
+
+		seq_printf(seq,
+			   "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
+			   vif - vif_table,
+			   name, vif->bytes_in, vif->pkt_in, 
+			   vif->bytes_out, vif->pkt_out,
+			   vif->flags, vif->local, vif->remote);
+	}
+	return 0;
+}
+
+static struct seq_operations ipmr_vif_seq_ops = {
+	.start = ipmr_vif_seq_start,
+	.next  = ipmr_vif_seq_next,
+	.stop  = ipmr_vif_seq_stop,
+	.show  = ipmr_vif_seq_show,
+};
+
+static int ipmr_vif_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	int rc = -ENOMEM;
+	struct ipmr_vif_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
+       
+	if (!s)
+		goto out;
+
+	rc = seq_open(file, &ipmr_vif_seq_ops);
+	if (rc)
+		goto out_kfree;
+
+	s->ct = 0;
+	seq = file->private_data;
+	seq->private = s;
+out:
+	return rc;
+out_kfree:
+	kfree(s);
+	goto out;
+
+}
+
+static struct file_operations ipmr_vif_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = ipmr_vif_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release_private,
+};
+
+struct ipmr_mfc_iter {
+	struct mfc_cache **cache;
+	int ct;
+};
+
+
+static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
+{
+	struct mfc_cache *mfc;
+
+	it->cache = mfc_cache_array;
+	read_lock(&mrt_lock);
+	for (it->ct = 0; it->ct < MFC_LINES; it->ct++) 
+		for(mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next) 
+			if (pos-- == 0) 
+				return mfc;
+	read_unlock(&mrt_lock);
+
+	it->cache = &mfc_unres_queue;
+	spin_lock_bh(&mfc_unres_lock);
+	for(mfc = mfc_unres_queue; mfc; mfc = mfc->next) 
+		if (pos-- == 0)
+			return mfc;
+	spin_unlock_bh(&mfc_unres_lock);
+
+	it->cache = NULL;
+	return NULL;
+}
+
+
+static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct ipmr_mfc_iter *it = seq->private;
+	it->cache = NULL;
+	it->ct = 0;
+	return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1) 
+		: SEQ_START_TOKEN;
+}
+
+static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct mfc_cache *mfc = v;
+	struct ipmr_mfc_iter *it = seq->private;
+
+	++*pos;
+
+	if (v == SEQ_START_TOKEN)
+		return ipmr_mfc_seq_idx(seq->private, 0);
+
+	if (mfc->next)
+		return mfc->next;
+	
+	if (it->cache == &mfc_unres_queue) 
+		goto end_of_list;
+
+	BUG_ON(it->cache != mfc_cache_array);
+
+	while (++it->ct < MFC_LINES) {
+		mfc = mfc_cache_array[it->ct];
+		if (mfc)
+			return mfc;
+	}
+
+	/* exhausted cache_array, show unresolved */
+	read_unlock(&mrt_lock);
+	it->cache = &mfc_unres_queue;
+	it->ct = 0;
+		
+	spin_lock_bh(&mfc_unres_lock);
+	mfc = mfc_unres_queue;
+	if (mfc) 
+		return mfc;
+
+ end_of_list:
+	spin_unlock_bh(&mfc_unres_lock);
+	it->cache = NULL;
+
+	return NULL;
+}
+
+static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
+{
+	struct ipmr_mfc_iter *it = seq->private;
+
+	if (it->cache == &mfc_unres_queue)
+		spin_unlock_bh(&mfc_unres_lock);
+	else if (it->cache == mfc_cache_array)
+		read_unlock(&mrt_lock);
+}
+
+static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
+{
+	int n;
+
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(seq, 
+		 "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
+	} else {
+		const struct mfc_cache *mfc = v;
+		const struct ipmr_mfc_iter *it = seq->private;
+		
+		seq_printf(seq, "%08lX %08lX %-3d %8ld %8ld %8ld",
+			   (unsigned long) mfc->mfc_mcastgrp,
+			   (unsigned long) mfc->mfc_origin,
+			   mfc->mfc_parent,
+			   mfc->mfc_un.res.pkt,
+			   mfc->mfc_un.res.bytes,
+			   mfc->mfc_un.res.wrong_if);
+
+		if (it->cache != &mfc_unres_queue) {
+			for(n = mfc->mfc_un.res.minvif; 
+			    n < mfc->mfc_un.res.maxvif; n++ ) {
+				if(VIF_EXISTS(n) 
+				   && mfc->mfc_un.res.ttls[n] < 255)
+				seq_printf(seq, 
+					   " %2d:%-3d", 
+					   n, mfc->mfc_un.res.ttls[n]);
+			}
+		}
+		seq_putc(seq, '\n');
+	}
+	return 0;
+}
+
+static struct seq_operations ipmr_mfc_seq_ops = {
+	.start = ipmr_mfc_seq_start,
+	.next  = ipmr_mfc_seq_next,
+	.stop  = ipmr_mfc_seq_stop,
+	.show  = ipmr_mfc_seq_show,
+};
+
+static int ipmr_mfc_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	int rc = -ENOMEM;
+	struct ipmr_mfc_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
+       
+	if (!s)
+		goto out;
+
+	rc = seq_open(file, &ipmr_mfc_seq_ops);
+	if (rc)
+		goto out_kfree;
+
+	seq = file->private_data;
+	seq->private = s;
+out:
+	return rc;
+out_kfree:
+	kfree(s);
+	goto out;
+
+}
+
+static struct file_operations ipmr_mfc_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = ipmr_mfc_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release_private,
+};
+#endif	
+
+#ifdef CONFIG_IP_PIMSM_V2
+static struct net_protocol pim_protocol = {
+	.handler	=	pim_rcv,
+};
+#endif
+
+
+/*
+ *	Setup for IP multicast routing
+ */
+ 
+void __init ip_mr_init(void)
+{
+	mrt_cachep = kmem_cache_create("ip_mrt_cache",
+				       sizeof(struct mfc_cache),
+				       0, SLAB_HWCACHE_ALIGN,
+				       NULL, NULL);
+	if (!mrt_cachep)
+		panic("cannot allocate ip_mrt_cache");
+
+	init_timer(&ipmr_expire_timer);
+	ipmr_expire_timer.function=ipmr_expire_process;
+	register_netdevice_notifier(&ip_mr_notifier);
+#ifdef CONFIG_PROC_FS	
+	proc_net_fops_create("ip_mr_vif", 0, &ipmr_vif_fops);
+	proc_net_fops_create("ip_mr_cache", 0, &ipmr_mfc_fops);
+#endif	
+}
diff --git a/net/ipv4/ipvs/Kconfig b/net/ipv4/ipvs/Kconfig
new file mode 100644
index 00000000000..63a82b4b64b
--- /dev/null
+++ b/net/ipv4/ipvs/Kconfig
@@ -0,0 +1,244 @@
+#
+# IP Virtual Server configuration
+#
+menu	"IP: Virtual Server Configuration"
+	depends on INET && NETFILTER
+
+config	IP_VS
+	tristate "IP virtual server support (EXPERIMENTAL)"
+	depends on INET && NETFILTER
+	---help---
+	  IP Virtual Server support will let you build a high-performance
+	  virtual server based on cluster of two or more real servers. This
+	  option must be enabled for at least one of the clustered computers
+	  that will take care of intercepting incoming connections to a
+	  single IP address and scheduling them to real servers.
+
+	  Three request dispatching techniques are implemented, they are
+	  virtual server via NAT, virtual server via tunneling and virtual
+	  server via direct routing. The several scheduling algorithms can
+	  be used to choose which server the connection is directed to,
+	  thus load balancing can be achieved among the servers.  For more
+	  information and its administration program, please visit the
+	  following URL: <http://www.linuxvirtualserver.org/>.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config	IP_VS_DEBUG
+	bool "IP virtual server debugging"
+	depends on IP_VS
+	---help---
+	  Say Y here if you want to get additional messages useful in
+	  debugging the IP virtual server code. You can change the debug
+	  level in /proc/sys/net/ipv4/vs/debug_level
+
+config	IP_VS_TAB_BITS
+	int "IPVS connection table size (the Nth power of 2)"
+	depends on IP_VS 
+	default "12" 
+	---help---
+	  The IPVS connection hash table uses the chaining scheme to handle
+	  hash collisions. Using a big IPVS connection hash table will greatly
+	  reduce conflicts when there are hundreds of thousands of connections
+	  in the hash table.
+
+	  Note the table size must be power of 2. The table size will be the
+	  value of 2 to the your input number power. The number to choose is
+	  from 8 to 20, the default number is 12, which means the table size
+	  is 4096. Don't input the number too small, otherwise you will lose
+	  performance on it. You can adapt the table size yourself, according
+	  to your virtual server application. It is good to set the table size
+	  not far less than the number of connections per second multiplying
+	  average lasting time of connection in the table.  For example, your
+	  virtual server gets 200 connections per second, the connection lasts
+	  for 200 seconds in average in the connection table, the table size
+	  should be not far less than 200x200, it is good to set the table
+	  size 32768 (2**15).
+
+	  Another note that each connection occupies 128 bytes effectively and
+	  each hash entry uses 8 bytes, so you can estimate how much memory is
+	  needed for your box.
+
+comment "IPVS transport protocol load balancing support"
+        depends on IP_VS
+
+config	IP_VS_PROTO_TCP
+	bool "TCP load balancing support"
+	depends on IP_VS
+	---help---
+	  This option enables support for load balancing TCP transport
+	  protocol. Say Y if unsure.
+
+config	IP_VS_PROTO_UDP
+	bool "UDP load balancing support"
+	depends on IP_VS
+	---help---
+	  This option enables support for load balancing UDP transport
+	  protocol. Say Y if unsure.
+
+config	IP_VS_PROTO_ESP
+	bool "ESP load balancing support"
+	depends on IP_VS
+	---help---
+	  This option enables support for load balancing ESP (Encapsultion
+	  Security Payload) transport protocol. Say Y if unsure.
+
+config	IP_VS_PROTO_AH
+	bool "AH load balancing support"
+	depends on IP_VS
+	---help---
+	  This option enables support for load balancing AH (Authentication
+	  Header) transport protocol. Say Y if unsure.
+
+comment "IPVS scheduler"
+        depends on IP_VS
+
+config	IP_VS_RR
+	tristate "round-robin scheduling"
+	depends on IP_VS
+	---help---
+	  The robin-robin scheduling algorithm simply directs network
+	  connections to different real servers in a round-robin manner.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+ 
+config	IP_VS_WRR
+        tristate "weighted round-robin scheduling" 
+	depends on IP_VS
+	---help---
+	  The weighted robin-robin scheduling algorithm directs network
+	  connections to different real servers based on server weights
+	  in a round-robin manner. Servers with higher weights receive
+	  new connections first than those with less weights, and servers
+	  with higher weights get more connections than those with less
+	  weights and servers with equal weights get equal connections.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config	IP_VS_LC
+        tristate "least-connection scheduling"
+        depends on IP_VS
+	---help---
+	  The least-connection scheduling algorithm directs network
+	  connections to the server with the least number of active 
+	  connections.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config	IP_VS_WLC
+        tristate "weighted least-connection scheduling"
+        depends on IP_VS
+	---help---
+	  The weighted least-connection scheduling algorithm directs network
+	  connections to the server with the least active connections
+	  normalized by the server weight.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config	IP_VS_LBLC
+	tristate "locality-based least-connection scheduling"
+        depends on IP_VS
+	---help---
+	  The locality-based least-connection scheduling algorithm is for
+	  destination IP load balancing. It is usually used in cache cluster.
+	  This algorithm usually directs packet destined for an IP address to
+	  its server if the server is alive and under load. If the server is
+	  overloaded (its active connection numbers is larger than its weight)
+	  and there is a server in its half load, then allocate the weighted
+	  least-connection server to this IP address.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config  IP_VS_LBLCR
+	tristate "locality-based least-connection with replication scheduling"
+        depends on IP_VS
+	---help---
+	  The locality-based least-connection with replication scheduling
+	  algorithm is also for destination IP load balancing. It is 
+	  usually used in cache cluster. It differs from the LBLC scheduling
+	  as follows: the load balancer maintains mappings from a target
+	  to a set of server nodes that can serve the target. Requests for
+	  a target are assigned to the least-connection node in the target's
+	  server set. If all the node in the server set are over loaded,
+	  it picks up a least-connection node in the cluster and adds it
+	  in the sever set for the target. If the server set has not been
+	  modified for the specified time, the most loaded node is removed
+	  from the server set, in order to avoid high degree of replication.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config	IP_VS_DH
+	tristate "destination hashing scheduling"
+        depends on IP_VS
+	---help---
+	  The destination hashing scheduling algorithm assigns network
+	  connections to the servers through looking up a statically assigned
+	  hash table by their destination IP addresses.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config	IP_VS_SH
+	tristate "source hashing scheduling"
+        depends on IP_VS
+	---help---
+	  The source hashing scheduling algorithm assigns network
+	  connections to the servers through looking up a statically assigned
+	  hash table by their source IP addresses.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config	IP_VS_SED
+	tristate "shortest expected delay scheduling"
+        depends on IP_VS
+	---help---
+	  The shortest expected delay scheduling algorithm assigns network
+	  connections to the server with the shortest expected delay. The 
+	  expected delay that the job will experience is (Ci + 1) / Ui if 
+	  sent to the ith server, in which Ci is the number of connections
+	  on the the ith server and Ui is the fixed service rate (weight)
+	  of the ith server.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+config	IP_VS_NQ
+	tristate "never queue scheduling"
+        depends on IP_VS
+	---help---
+	  The never queue scheduling algorithm adopts a two-speed model.
+	  When there is an idle server available, the job will be sent to
+	  the idle server, instead of waiting for a fast one. When there
+	  is no idle server available, the job will be sent to the server
+	  that minimize its expected delay (The Shortest Expected Delay
+	  scheduling algorithm).
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+comment 'IPVS application helper'
+	depends on IP_VS
+
+config	IP_VS_FTP
+  	tristate "FTP protocol helper"
+        depends on IP_VS && IP_VS_PROTO_TCP
+	---help---
+	  FTP is a protocol that transfers IP address and/or port number in
+	  the payload. In the virtual server via Network Address Translation,
+	  the IP address and port number of real servers cannot be sent to
+	  clients in ftp connections directly, so FTP protocol helper is
+	  required for tracking the connection and mangling it back to that of
+	  virtual service.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
+endmenu
diff --git a/net/ipv4/ipvs/Makefile b/net/ipv4/ipvs/Makefile
new file mode 100644
index 00000000000..a788461a40c
--- /dev/null
+++ b/net/ipv4/ipvs/Makefile
@@ -0,0 +1,34 @@
+#
+# Makefile for the IPVS modules on top of IPv4.
+#
+
+# IPVS transport protocol load balancing support
+ip_vs_proto-objs-y :=
+ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_TCP) += ip_vs_proto_tcp.o
+ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o
+ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_ESP) += ip_vs_proto_esp.o
+ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH) += ip_vs_proto_ah.o
+
+ip_vs-objs :=	ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o	   \
+		ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o	   		   \
+		ip_vs_est.o ip_vs_proto.o ip_vs_proto_icmp.o		   \
+		$(ip_vs_proto-objs-y)
+
+
+# IPVS core
+obj-$(CONFIG_IP_VS) += ip_vs.o
+
+# IPVS schedulers
+obj-$(CONFIG_IP_VS_RR) += ip_vs_rr.o
+obj-$(CONFIG_IP_VS_WRR) += ip_vs_wrr.o
+obj-$(CONFIG_IP_VS_LC) += ip_vs_lc.o
+obj-$(CONFIG_IP_VS_WLC) += ip_vs_wlc.o
+obj-$(CONFIG_IP_VS_LBLC) += ip_vs_lblc.o
+obj-$(CONFIG_IP_VS_LBLCR) += ip_vs_lblcr.o
+obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o
+obj-$(CONFIG_IP_VS_SH) += ip_vs_sh.o
+obj-$(CONFIG_IP_VS_SED) += ip_vs_sed.o
+obj-$(CONFIG_IP_VS_NQ) += ip_vs_nq.o
+
+# IPVS application helpers
+obj-$(CONFIG_IP_VS_FTP) += ip_vs_ftp.o
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
new file mode 100644
index 00000000000..d9212addd19
--- /dev/null
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -0,0 +1,658 @@
+/*
+ * ip_vs_app.c: Application module support for IPVS
+ *
+ * Version:     $Id: ip_vs_app.c,v 1.17 2003/03/22 06:31:21 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Most code here is taken from ip_masq_app.c in kernel 2.2. The difference
+ * is that ip_vs_app module handles the reverse direction (incoming requests
+ * and outgoing responses).
+ *
+ *		IP_MASQ_APP application masquerading module
+ *
+ * Author:	Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <net/protocol.h>
+#include <asm/system.h>
+#include <linux/stat.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+#include <net/ip_vs.h>
+
+EXPORT_SYMBOL(register_ip_vs_app);
+EXPORT_SYMBOL(unregister_ip_vs_app);
+EXPORT_SYMBOL(register_ip_vs_app_inc);
+
+/* ipvs application list head */
+static LIST_HEAD(ip_vs_app_list);
+static DECLARE_MUTEX(__ip_vs_app_mutex);
+
+
+/*
+ *	Get an ip_vs_app object
+ */
+static inline int ip_vs_app_get(struct ip_vs_app *app)
+{
+	/* test and get the module atomically */
+	if (app->module)
+		return try_module_get(app->module);
+	else
+		return 1;
+}
+
+
+static inline void ip_vs_app_put(struct ip_vs_app *app)
+{
+	if (app->module)
+		module_put(app->module);
+}
+
+
+/*
+ *	Allocate/initialize app incarnation and register it in proto apps.
+ */
+static int
+ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
+{
+	struct ip_vs_protocol *pp;
+	struct ip_vs_app *inc;
+	int ret;
+
+	if (!(pp = ip_vs_proto_get(proto)))
+		return -EPROTONOSUPPORT;
+
+	if (!pp->unregister_app)
+		return -EOPNOTSUPP;
+
+	inc = kmalloc(sizeof(struct ip_vs_app), GFP_KERNEL);
+	if (!inc)
+		return -ENOMEM;
+	memcpy(inc, app, sizeof(*inc));
+	INIT_LIST_HEAD(&inc->p_list);
+	INIT_LIST_HEAD(&inc->incs_list);
+	inc->app = app;
+	inc->port = htons(port);
+	atomic_set(&inc->usecnt, 0);
+
+	if (app->timeouts) {
+		inc->timeout_table =
+			ip_vs_create_timeout_table(app->timeouts,
+						   app->timeouts_size);
+		if (!inc->timeout_table) {
+			ret = -ENOMEM;
+			goto out;
+		}
+	}
+
+	ret = pp->register_app(inc);
+	if (ret)
+		goto out;
+
+	list_add(&inc->a_list, &app->incs_list);
+	IP_VS_DBG(9, "%s application %s:%u registered\n",
+		  pp->name, inc->name, inc->port);
+
+	return 0;
+
+  out:
+	if (inc->timeout_table)
+		kfree(inc->timeout_table);
+	kfree(inc);
+	return ret;
+}
+
+
+/*
+ *	Release app incarnation
+ */
+static void
+ip_vs_app_inc_release(struct ip_vs_app *inc)
+{
+	struct ip_vs_protocol *pp;
+
+	if (!(pp = ip_vs_proto_get(inc->protocol)))
+		return;
+
+	if (pp->unregister_app)
+		pp->unregister_app(inc);
+
+	IP_VS_DBG(9, "%s App %s:%u unregistered\n",
+		  pp->name, inc->name, inc->port);
+
+	list_del(&inc->a_list);
+
+	if (inc->timeout_table != NULL)
+		kfree(inc->timeout_table);
+	kfree(inc);
+}
+
+
+/*
+ *	Get reference to app inc (only called from softirq)
+ *
+ */
+int ip_vs_app_inc_get(struct ip_vs_app *inc)
+{
+	int result;
+
+	atomic_inc(&inc->usecnt);
+	if (unlikely((result = ip_vs_app_get(inc->app)) != 1))
+		atomic_dec(&inc->usecnt);
+	return result;
+}
+
+
+/*
+ *	Put the app inc (only called from timer or net softirq)
+ */
+void ip_vs_app_inc_put(struct ip_vs_app *inc)
+{
+	ip_vs_app_put(inc->app);
+	atomic_dec(&inc->usecnt);
+}
+
+
+/*
+ *	Register an application incarnation in protocol applications
+ */
+int
+register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port)
+{
+	int result;
+
+	down(&__ip_vs_app_mutex);
+
+	result = ip_vs_app_inc_new(app, proto, port);
+
+	up(&__ip_vs_app_mutex);
+
+	return result;
+}
+
+
+/*
+ *	ip_vs_app registration routine
+ */
+int register_ip_vs_app(struct ip_vs_app *app)
+{
+	/* increase the module use count */
+	ip_vs_use_count_inc();
+
+	down(&__ip_vs_app_mutex);
+
+	list_add(&app->a_list, &ip_vs_app_list);
+
+	up(&__ip_vs_app_mutex);
+
+	return 0;
+}
+
+
+/*
+ *	ip_vs_app unregistration routine
+ *	We are sure there are no app incarnations attached to services
+ */
+void unregister_ip_vs_app(struct ip_vs_app *app)
+{
+	struct ip_vs_app *inc, *nxt;
+
+	down(&__ip_vs_app_mutex);
+
+	list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) {
+		ip_vs_app_inc_release(inc);
+	}
+
+	list_del(&app->a_list);
+
+	up(&__ip_vs_app_mutex);
+
+	/* decrease the module use count */
+	ip_vs_use_count_dec();
+}
+
+
+#if 0000
+/*
+ *	Get reference to app by name (called from user context)
+ */
+struct ip_vs_app *ip_vs_app_get_by_name(char *appname)
+{
+	struct ip_vs_app *app, *a = NULL;
+
+	down(&__ip_vs_app_mutex);
+
+	list_for_each_entry(ent, &ip_vs_app_list, a_list) {
+		if (strcmp(app->name, appname))
+			continue;
+
+		/* softirq may call ip_vs_app_get too, so the caller
+		   must disable softirq on the current CPU */
+		if (ip_vs_app_get(app))
+			a = app;
+		break;
+	}
+
+	up(&__ip_vs_app_mutex);
+
+	return a;
+}
+#endif
+
+
+/*
+ *	Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
+ */
+int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp)
+{
+	return pp->app_conn_bind(cp);
+}
+
+
+/*
+ *	Unbind cp from application incarnation (called by cp destructor)
+ */
+void ip_vs_unbind_app(struct ip_vs_conn *cp)
+{
+	struct ip_vs_app *inc = cp->app;
+
+	if (!inc)
+		return;
+
+	if (inc->unbind_conn)
+		inc->unbind_conn(inc, cp);
+	if (inc->done_conn)
+		inc->done_conn(inc, cp);
+	ip_vs_app_inc_put(inc);
+	cp->app = NULL;
+}
+
+
+/*
+ *	Fixes th->seq based on ip_vs_seq info.
+ */
+static inline void vs_fix_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
+{
+	__u32 seq = ntohl(th->seq);
+
+	/*
+	 *	Adjust seq with delta-offset for all packets after
+	 *	the most recent resized pkt seq and with previous_delta offset
+	 *	for all packets	before most recent resized pkt seq.
+	 */
+	if (vseq->delta || vseq->previous_delta) {
+		if(after(seq, vseq->init_seq)) {
+			th->seq = htonl(seq + vseq->delta);
+			IP_VS_DBG(9, "vs_fix_seq(): added delta (%d) to seq\n",
+				  vseq->delta);
+		} else {
+			th->seq = htonl(seq + vseq->previous_delta);
+			IP_VS_DBG(9, "vs_fix_seq(): added previous_delta "
+				  "(%d) to seq\n", vseq->previous_delta);
+		}
+	}
+}
+
+
+/*
+ *	Fixes th->ack_seq based on ip_vs_seq info.
+ */
+static inline void
+vs_fix_ack_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
+{
+	__u32 ack_seq = ntohl(th->ack_seq);
+
+	/*
+	 * Adjust ack_seq with delta-offset for
+	 * the packets AFTER most recent resized pkt has caused a shift
+	 * for packets before most recent resized pkt, use previous_delta
+	 */
+	if (vseq->delta || vseq->previous_delta) {
+		/* since ack_seq is the number of octet that is expected
+		   to receive next, so compare it with init_seq+delta */
+		if(after(ack_seq, vseq->init_seq+vseq->delta)) {
+			th->ack_seq = htonl(ack_seq - vseq->delta);
+			IP_VS_DBG(9, "vs_fix_ack_seq(): subtracted delta "
+				  "(%d) from ack_seq\n", vseq->delta);
+
+		} else {
+			th->ack_seq = htonl(ack_seq - vseq->previous_delta);
+			IP_VS_DBG(9, "vs_fix_ack_seq(): subtracted "
+				  "previous_delta (%d) from ack_seq\n",
+				  vseq->previous_delta);
+		}
+	}
+}
+
+
+/*
+ *	Updates ip_vs_seq if pkt has been resized
+ *	Assumes already checked proto==IPPROTO_TCP and diff!=0.
+ */
+static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,
+				 unsigned flag, __u32 seq, int diff)
+{
+	/* spinlock is to keep updating cp->flags atomic */
+	spin_lock(&cp->lock);
+	if (!(cp->flags & flag) || after(seq, vseq->init_seq)) {
+		vseq->previous_delta = vseq->delta;
+		vseq->delta += diff;
+		vseq->init_seq = seq;
+		cp->flags |= flag;
+	}
+	spin_unlock(&cp->lock);
+}
+
+static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb,
+				  struct ip_vs_app *app)
+{
+	int diff;
+	unsigned int tcp_offset = (*pskb)->nh.iph->ihl*4;
+	struct tcphdr *th;
+	__u32 seq;
+
+	if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th)))
+		return 0;
+
+	th = (struct tcphdr *)((*pskb)->nh.raw + tcp_offset);
+
+	/*
+	 *	Remember seq number in case this pkt gets resized
+	 */
+	seq = ntohl(th->seq);
+
+	/*
+	 *	Fix seq stuff if flagged as so.
+	 */
+	if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
+		vs_fix_seq(&cp->out_seq, th);
+	if (cp->flags & IP_VS_CONN_F_IN_SEQ)
+		vs_fix_ack_seq(&cp->in_seq, th);
+
+	/*
+	 *	Call private output hook function
+	 */
+	if (app->pkt_out == NULL)
+		return 1;
+
+	if (!app->pkt_out(app, cp, pskb, &diff))
+		return 0;
+
+	/*
+	 *	Update ip_vs seq stuff if len has changed.
+	 */
+	if (diff != 0)
+		vs_seq_update(cp, &cp->out_seq,
+			      IP_VS_CONN_F_OUT_SEQ, seq, diff);
+
+	return 1;
+}
+
+/*
+ *	Output pkt hook. Will call bound ip_vs_app specific function
+ *	called by ipvs packet handler, assumes previously checked cp!=NULL
+ *	returns false if it can't handle packet (oom)
+ */
+int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb)
+{
+	struct ip_vs_app *app;
+
+	/*
+	 *	check if application module is bound to
+	 *	this ip_vs_conn.
+	 */
+	if ((app = cp->app) == NULL)
+		return 1;
+
+	/* TCP is complicated */
+	if (cp->protocol == IPPROTO_TCP)
+		return app_tcp_pkt_out(cp, pskb, app);
+
+	/*
+	 *	Call private output hook function
+	 */
+	if (app->pkt_out == NULL)
+		return 1;
+
+	return app->pkt_out(app, cp, pskb, NULL);
+}
+
+
+static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb,
+				 struct ip_vs_app *app)
+{
+	int diff;
+	unsigned int tcp_offset = (*pskb)->nh.iph->ihl*4;
+	struct tcphdr *th;
+	__u32 seq;
+
+	if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th)))
+		return 0;
+
+	th = (struct tcphdr *)((*pskb)->nh.raw + tcp_offset);
+
+	/*
+	 *	Remember seq number in case this pkt gets resized
+	 */
+	seq = ntohl(th->seq);
+
+	/*
+	 *	Fix seq stuff if flagged as so.
+	 */
+	if (cp->flags & IP_VS_CONN_F_IN_SEQ)
+		vs_fix_seq(&cp->in_seq, th);
+	if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
+		vs_fix_ack_seq(&cp->out_seq, th);
+
+	/*
+	 *	Call private input hook function
+	 */
+	if (app->pkt_in == NULL)
+		return 1;
+
+	if (!app->pkt_in(app, cp, pskb, &diff))
+		return 0;
+
+	/*
+	 *	Update ip_vs seq stuff if len has changed.
+	 */
+	if (diff != 0)
+		vs_seq_update(cp, &cp->in_seq,
+			      IP_VS_CONN_F_IN_SEQ, seq, diff);
+
+	return 1;
+}
+
+/*
+ *	Input pkt hook. Will call bound ip_vs_app specific function
+ *	called by ipvs packet handler, assumes previously checked cp!=NULL.
+ *	returns false if can't handle packet (oom).
+ */
+int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb)
+{
+	struct ip_vs_app *app;
+
+	/*
+	 *	check if application module is bound to
+	 *	this ip_vs_conn.
+	 */
+	if ((app = cp->app) == NULL)
+		return 1;
+
+	/* TCP is complicated */
+	if (cp->protocol == IPPROTO_TCP)
+		return app_tcp_pkt_in(cp, pskb, app);
+
+	/*
+	 *	Call private input hook function
+	 */
+	if (app->pkt_in == NULL)
+		return 1;
+
+	return app->pkt_in(app, cp, pskb, NULL);
+}
+
+
+#ifdef CONFIG_PROC_FS
+/*
+ *	/proc/net/ip_vs_app entry function
+ */
+
+static struct ip_vs_app *ip_vs_app_idx(loff_t pos)
+{
+	struct ip_vs_app *app, *inc;
+
+	list_for_each_entry(app, &ip_vs_app_list, a_list) {
+		list_for_each_entry(inc, &app->incs_list, a_list) {
+			if (pos-- == 0)
+				return inc;
+		}
+	}
+	return NULL;
+
+}
+
+static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	down(&__ip_vs_app_mutex);
+
+	return *pos ? ip_vs_app_idx(*pos - 1) : SEQ_START_TOKEN;
+}
+
+static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct ip_vs_app *inc, *app;
+	struct list_head *e;
+
+	++*pos;
+	if (v == SEQ_START_TOKEN)
+		return ip_vs_app_idx(0);
+
+	inc = v;
+	app = inc->app;
+
+	if ((e = inc->a_list.next) != &app->incs_list)
+		return list_entry(e, struct ip_vs_app, a_list);
+
+	/* go on to next application */
+	for (e = app->a_list.next; e != &ip_vs_app_list; e = e->next) {
+		app = list_entry(e, struct ip_vs_app, a_list);
+		list_for_each_entry(inc, &app->incs_list, a_list) {
+			return inc;
+		}
+	}
+	return NULL;
+}
+
+static void ip_vs_app_seq_stop(struct seq_file *seq, void *v)
+{
+	up(&__ip_vs_app_mutex);
+}
+
+static int ip_vs_app_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq, "prot port    usecnt name\n");
+	else {
+		const struct ip_vs_app *inc = v;
+
+		seq_printf(seq, "%-3s  %-7u %-6d %-17s\n",
+			   ip_vs_proto_name(inc->protocol),
+			   ntohs(inc->port),
+			   atomic_read(&inc->usecnt),
+			   inc->name);
+	}
+	return 0;
+}
+
+static struct seq_operations ip_vs_app_seq_ops = {
+	.start = ip_vs_app_seq_start,
+	.next  = ip_vs_app_seq_next,
+	.stop  = ip_vs_app_seq_stop,
+	.show  = ip_vs_app_seq_show,
+};
+
+static int ip_vs_app_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &ip_vs_app_seq_ops);
+}
+
+static struct file_operations ip_vs_app_fops = {
+	.owner	 = THIS_MODULE,
+	.open	 = ip_vs_app_open,
+	.read	 = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+};
+#endif
+
+
+/*
+ *	Replace a segment of data with a new segment
+ */
+int ip_vs_skb_replace(struct sk_buff *skb, int pri,
+		      char *o_buf, int o_len, char *n_buf, int n_len)
+{
+	struct iphdr *iph;
+	int diff;
+	int o_offset;
+	int o_left;
+
+	EnterFunction(9);
+
+	diff = n_len - o_len;
+	o_offset = o_buf - (char *)skb->data;
+	/* The length of left data after o_buf+o_len in the skb data */
+	o_left = skb->len - (o_offset + o_len);
+
+	if (diff <= 0) {
+		memmove(o_buf + n_len, o_buf + o_len, o_left);
+		memcpy(o_buf, n_buf, n_len);
+		skb_trim(skb, skb->len + diff);
+	} else if (diff <= skb_tailroom(skb)) {
+		skb_put(skb, diff);
+		memmove(o_buf + n_len, o_buf + o_len, o_left);
+		memcpy(o_buf, n_buf, n_len);
+	} else {
+		if (pskb_expand_head(skb, skb_headroom(skb), diff, pri))
+			return -ENOMEM;
+		skb_put(skb, diff);
+		memmove(skb->data + o_offset + n_len,
+			skb->data + o_offset + o_len, o_left);
+		memcpy(skb->data + o_offset, n_buf, n_len);
+	}
+
+	/* must update the iph total length here */
+	iph = skb->nh.iph;
+	iph->tot_len = htons(skb->len);
+
+	LeaveFunction(9);
+	return 0;
+}
+
+
+int ip_vs_app_init(void)
+{
+	/* we will replace it with proc_net_ipvs_create() soon */
+	proc_net_fops_create("ip_vs_app", 0, &ip_vs_app_fops);
+	return 0;
+}
+
+
+void ip_vs_app_cleanup(void)
+{
+	proc_net_remove("ip_vs_app");
+}
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
new file mode 100644
index 00000000000..fd6feb5499f
--- /dev/null
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -0,0 +1,920 @@
+/*
+ * IPVS         An implementation of the IP virtual server support for the
+ *              LINUX operating system.  IPVS is now implemented as a module
+ *              over the Netfilter framework. IPVS can be used to build a
+ *              high-performance and highly available server based on a
+ *              cluster of servers.
+ *
+ * Version:     $Id: ip_vs_conn.c,v 1.31 2003/04/18 09:03:16 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Peter Kese <peter.kese@ijs.si>
+ *              Julian Anastasov <ja@ssi.bg>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * The IPVS code for kernel 2.2 was done by Wensong Zhang and Peter Kese,
+ * with changes/fixes from Julian Anastasov, Lars Marowsky-Bree, Horms
+ * and others. Many code here is taken from IP MASQ code of kernel 2.2.
+ *
+ * Changes:
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/vmalloc.h>
+#include <linux/proc_fs.h>		/* for proc_net_* */
+#include <linux/seq_file.h>
+#include <linux/jhash.h>
+#include <linux/random.h>
+
+#include <net/ip_vs.h>
+
+
+/*
+ *  Connection hash table: for input and output packets lookups of IPVS
+ */
+static struct list_head *ip_vs_conn_tab;
+
+/*  SLAB cache for IPVS connections */
+static kmem_cache_t *ip_vs_conn_cachep;
+
+/*  counter for current IPVS connections */
+static atomic_t ip_vs_conn_count = ATOMIC_INIT(0);
+
+/*  counter for no client port connections */
+static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0);
+
+/* random value for IPVS connection hash */
+static unsigned int ip_vs_conn_rnd;
+
+/*
+ *  Fine locking granularity for big connection hash table
+ */
+#define CT_LOCKARRAY_BITS  4
+#define CT_LOCKARRAY_SIZE  (1<<CT_LOCKARRAY_BITS)
+#define CT_LOCKARRAY_MASK  (CT_LOCKARRAY_SIZE-1)
+
+struct ip_vs_aligned_lock
+{
+	rwlock_t	l;
+} __attribute__((__aligned__(SMP_CACHE_BYTES)));
+
+/* lock array for conn table */
+static struct ip_vs_aligned_lock
+__ip_vs_conntbl_lock_array[CT_LOCKARRAY_SIZE] __cacheline_aligned;
+
+static inline void ct_read_lock(unsigned key)
+{
+	read_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static inline void ct_read_unlock(unsigned key)
+{
+	read_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static inline void ct_write_lock(unsigned key)
+{
+	write_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static inline void ct_write_unlock(unsigned key)
+{
+	write_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static inline void ct_read_lock_bh(unsigned key)
+{
+	read_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static inline void ct_read_unlock_bh(unsigned key)
+{
+	read_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static inline void ct_write_lock_bh(unsigned key)
+{
+	write_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+static inline void ct_write_unlock_bh(unsigned key)
+{
+	write_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+}
+
+
+/*
+ *	Returns hash value for IPVS connection entry
+ */
+static unsigned int ip_vs_conn_hashkey(unsigned proto, __u32 addr, __u16 port)
+{
+	return jhash_3words(addr, port, proto, ip_vs_conn_rnd)
+		& IP_VS_CONN_TAB_MASK;
+}
+
+
+/*
+ *	Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port.
+ *	returns bool success.
+ */
+static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
+{
+	unsigned hash;
+	int ret;
+
+	/* Hash by protocol, client address and port */
+	hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport);
+
+	ct_write_lock(hash);
+
+	if (!(cp->flags & IP_VS_CONN_F_HASHED)) {
+		list_add(&cp->c_list, &ip_vs_conn_tab[hash]);
+		cp->flags |= IP_VS_CONN_F_HASHED;
+		atomic_inc(&cp->refcnt);
+		ret = 1;
+	} else {
+		IP_VS_ERR("ip_vs_conn_hash(): request for already hashed, "
+			  "called from %p\n", __builtin_return_address(0));
+		ret = 0;
+	}
+
+	ct_write_unlock(hash);
+
+	return ret;
+}
+
+
+/*
+ *	UNhashes ip_vs_conn from ip_vs_conn_tab.
+ *	returns bool success.
+ */
+static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
+{
+	unsigned hash;
+	int ret;
+
+	/* unhash it and decrease its reference counter */
+	hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport);
+
+	ct_write_lock(hash);
+
+	if (cp->flags & IP_VS_CONN_F_HASHED) {
+		list_del(&cp->c_list);
+		cp->flags &= ~IP_VS_CONN_F_HASHED;
+		atomic_dec(&cp->refcnt);
+		ret = 1;
+	} else
+		ret = 0;
+
+	ct_write_unlock(hash);
+
+	return ret;
+}
+
+
+/*
+ *  Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab.
+ *  Called for pkts coming from OUTside-to-INside.
+ *	s_addr, s_port: pkt source address (foreign host)
+ *	d_addr, d_port: pkt dest address (load balancer)
+ */
+static inline struct ip_vs_conn *__ip_vs_conn_in_get
+(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port)
+{
+	unsigned hash;
+	struct ip_vs_conn *cp;
+
+	hash = ip_vs_conn_hashkey(protocol, s_addr, s_port);
+
+	ct_read_lock(hash);
+
+	list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+		if (s_addr==cp->caddr && s_port==cp->cport &&
+		    d_port==cp->vport && d_addr==cp->vaddr &&
+		    protocol==cp->protocol) {
+			/* HIT */
+			atomic_inc(&cp->refcnt);
+			ct_read_unlock(hash);
+			return cp;
+		}
+	}
+
+	ct_read_unlock(hash);
+
+	return NULL;
+}
+
+struct ip_vs_conn *ip_vs_conn_in_get
+(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port)
+{
+	struct ip_vs_conn *cp;
+
+	cp = __ip_vs_conn_in_get(protocol, s_addr, s_port, d_addr, d_port);
+	if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt))
+		cp = __ip_vs_conn_in_get(protocol, s_addr, 0, d_addr, d_port);
+
+	IP_VS_DBG(7, "lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
+		  ip_vs_proto_name(protocol),
+		  NIPQUAD(s_addr), ntohs(s_port),
+		  NIPQUAD(d_addr), ntohs(d_port),
+		  cp?"hit":"not hit");
+
+	return cp;
+}
+
+
+/*
+ *  Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab.
+ *  Called for pkts coming from inside-to-OUTside.
+ *	s_addr, s_port: pkt source address (inside host)
+ *	d_addr, d_port: pkt dest address (foreign host)
+ */
+struct ip_vs_conn *ip_vs_conn_out_get
+(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port)
+{
+	unsigned hash;
+	struct ip_vs_conn *cp, *ret=NULL;
+
+	/*
+	 *	Check for "full" addressed entries
+	 */
+	hash = ip_vs_conn_hashkey(protocol, d_addr, d_port);
+
+	ct_read_lock(hash);
+
+	list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+		if (d_addr == cp->caddr && d_port == cp->cport &&
+		    s_port == cp->dport && s_addr == cp->daddr &&
+		    protocol == cp->protocol) {
+			/* HIT */
+			atomic_inc(&cp->refcnt);
+			ret = cp;
+			break;
+		}
+	}
+
+	ct_read_unlock(hash);
+
+	IP_VS_DBG(7, "lookup/out %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
+		  ip_vs_proto_name(protocol),
+		  NIPQUAD(s_addr), ntohs(s_port),
+		  NIPQUAD(d_addr), ntohs(d_port),
+		  ret?"hit":"not hit");
+
+	return ret;
+}
+
+
+/*
+ *      Put back the conn and restart its timer with its timeout
+ */
+void ip_vs_conn_put(struct ip_vs_conn *cp)
+{
+	/* reset it expire in its timeout */
+	mod_timer(&cp->timer, jiffies+cp->timeout);
+
+	__ip_vs_conn_put(cp);
+}
+
+
+/*
+ *	Fill a no_client_port connection with a client port number
+ */
+void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __u16 cport)
+{
+	if (ip_vs_conn_unhash(cp)) {
+		spin_lock(&cp->lock);
+		if (cp->flags & IP_VS_CONN_F_NO_CPORT) {
+			atomic_dec(&ip_vs_conn_no_cport_cnt);
+			cp->flags &= ~IP_VS_CONN_F_NO_CPORT;
+			cp->cport = cport;
+		}
+		spin_unlock(&cp->lock);
+
+		/* hash on new dport */
+		ip_vs_conn_hash(cp);
+	}
+}
+
+
+/*
+ *	Bind a connection entry with the corresponding packet_xmit.
+ *	Called by ip_vs_conn_new.
+ */
+static inline void ip_vs_bind_xmit(struct ip_vs_conn *cp)
+{
+	switch (IP_VS_FWD_METHOD(cp)) {
+	case IP_VS_CONN_F_MASQ:
+		cp->packet_xmit = ip_vs_nat_xmit;
+		break;
+
+	case IP_VS_CONN_F_TUNNEL:
+		cp->packet_xmit = ip_vs_tunnel_xmit;
+		break;
+
+	case IP_VS_CONN_F_DROUTE:
+		cp->packet_xmit = ip_vs_dr_xmit;
+		break;
+
+	case IP_VS_CONN_F_LOCALNODE:
+		cp->packet_xmit = ip_vs_null_xmit;
+		break;
+
+	case IP_VS_CONN_F_BYPASS:
+		cp->packet_xmit = ip_vs_bypass_xmit;
+		break;
+	}
+}
+
+
+static inline int ip_vs_dest_totalconns(struct ip_vs_dest *dest)
+{
+	return atomic_read(&dest->activeconns)
+		+ atomic_read(&dest->inactconns);
+}
+
+/*
+ *	Bind a connection entry with a virtual service destination
+ *	Called just after a new connection entry is created.
+ */
+static inline void
+ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
+{
+	/* if dest is NULL, then return directly */
+	if (!dest)
+		return;
+
+	/* Increase the refcnt counter of the dest */
+	atomic_inc(&dest->refcnt);
+
+	/* Bind with the destination and its corresponding transmitter */
+	cp->flags |= atomic_read(&dest->conn_flags);
+	cp->dest = dest;
+
+	IP_VS_DBG(9, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
+		  "d:%u.%u.%u.%u:%d fwd:%c s:%u flg:%X cnt:%d destcnt:%d\n",
+		  ip_vs_proto_name(cp->protocol),
+		  NIPQUAD(cp->caddr), ntohs(cp->cport),
+		  NIPQUAD(cp->vaddr), ntohs(cp->vport),
+		  NIPQUAD(cp->daddr), ntohs(cp->dport),
+		  ip_vs_fwd_tag(cp), cp->state,
+		  cp->flags, atomic_read(&cp->refcnt),
+		  atomic_read(&dest->refcnt));
+
+	/* Update the connection counters */
+	if (cp->cport || (cp->flags & IP_VS_CONN_F_NO_CPORT)) {
+		/* It is a normal connection, so increase the inactive
+		   connection counter because it is in TCP SYNRECV
+		   state (inactive) or other protocol inacive state */
+		atomic_inc(&dest->inactconns);
+	} else {
+		/* It is a persistent connection/template, so increase
+		   the peristent connection counter */
+		atomic_inc(&dest->persistconns);
+	}
+
+	if (dest->u_threshold != 0 &&
+	    ip_vs_dest_totalconns(dest) >= dest->u_threshold)
+		dest->flags |= IP_VS_DEST_F_OVERLOAD;
+}
+
+
+/*
+ *	Unbind a connection entry with its VS destination
+ *	Called by the ip_vs_conn_expire function.
+ */
+static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
+{
+	struct ip_vs_dest *dest = cp->dest;
+
+	if (!dest)
+		return;
+
+	IP_VS_DBG(9, "Unbind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
+		  "d:%u.%u.%u.%u:%d fwd:%c s:%u flg:%X cnt:%d destcnt:%d\n",
+		  ip_vs_proto_name(cp->protocol),
+		  NIPQUAD(cp->caddr), ntohs(cp->cport),
+		  NIPQUAD(cp->vaddr), ntohs(cp->vport),
+		  NIPQUAD(cp->daddr), ntohs(cp->dport),
+		  ip_vs_fwd_tag(cp), cp->state,
+		  cp->flags, atomic_read(&cp->refcnt),
+		  atomic_read(&dest->refcnt));
+
+	/* Update the connection counters */
+	if (cp->cport || (cp->flags & IP_VS_CONN_F_NO_CPORT)) {
+		/* It is a normal connection, so decrease the inactconns
+		   or activeconns counter */
+		if (cp->flags & IP_VS_CONN_F_INACTIVE) {
+			atomic_dec(&dest->inactconns);
+		} else {
+			atomic_dec(&dest->activeconns);
+		}
+	} else {
+		/* It is a persistent connection/template, so decrease
+		   the peristent connection counter */
+		atomic_dec(&dest->persistconns);
+	}
+
+	if (dest->l_threshold != 0) {
+		if (ip_vs_dest_totalconns(dest) < dest->l_threshold)
+			dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+	} else if (dest->u_threshold != 0) {
+		if (ip_vs_dest_totalconns(dest) * 4 < dest->u_threshold * 3)
+			dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+	} else {
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+			dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+	}
+
+	/*
+	 * Simply decrease the refcnt of the dest, because the
+	 * dest will be either in service's destination list
+	 * or in the trash.
+	 */
+	atomic_dec(&dest->refcnt);
+}
+
+
+/*
+ *	Checking if the destination of a connection template is available.
+ *	If available, return 1, otherwise invalidate this connection
+ *	template and return 0.
+ */
+int ip_vs_check_template(struct ip_vs_conn *ct)
+{
+	struct ip_vs_dest *dest = ct->dest;
+
+	/*
+	 * Checking the dest server status.
+	 */
+	if ((dest == NULL) ||
+	    !(dest->flags & IP_VS_DEST_F_AVAILABLE) || 
+	    (sysctl_ip_vs_expire_quiescent_template && 
+	     (atomic_read(&dest->weight) == 0))) {
+		IP_VS_DBG(9, "check_template: dest not available for "
+			  "protocol %s s:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
+			  "-> d:%u.%u.%u.%u:%d\n",
+			  ip_vs_proto_name(ct->protocol),
+			  NIPQUAD(ct->caddr), ntohs(ct->cport),
+			  NIPQUAD(ct->vaddr), ntohs(ct->vport),
+			  NIPQUAD(ct->daddr), ntohs(ct->dport));
+
+		/*
+		 * Invalidate the connection template
+		 */
+		if (ct->cport) {
+			if (ip_vs_conn_unhash(ct)) {
+				ct->dport = 65535;
+				ct->vport = 65535;
+				ct->cport = 0;
+				ip_vs_conn_hash(ct);
+			}
+		}
+
+		/*
+		 * Simply decrease the refcnt of the template,
+		 * don't restart its timer.
+		 */
+		atomic_dec(&ct->refcnt);
+		return 0;
+	}
+	return 1;
+}
+
+static void ip_vs_conn_expire(unsigned long data)
+{
+	struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
+
+	cp->timeout = 60*HZ;
+
+	/*
+	 *	hey, I'm using it
+	 */
+	atomic_inc(&cp->refcnt);
+
+	/*
+	 *	do I control anybody?
+	 */
+	if (atomic_read(&cp->n_control))
+		goto expire_later;
+
+	/*
+	 *	unhash it if it is hashed in the conn table
+	 */
+	if (!ip_vs_conn_unhash(cp))
+		goto expire_later;
+
+	/*
+	 *	refcnt==1 implies I'm the only one referrer
+	 */
+	if (likely(atomic_read(&cp->refcnt) == 1)) {
+		/* delete the timer if it is activated by other users */
+		if (timer_pending(&cp->timer))
+			del_timer(&cp->timer);
+
+		/* does anybody control me? */
+		if (cp->control)
+			ip_vs_control_del(cp);
+
+		if (unlikely(cp->app != NULL))
+			ip_vs_unbind_app(cp);
+		ip_vs_unbind_dest(cp);
+		if (cp->flags & IP_VS_CONN_F_NO_CPORT)
+			atomic_dec(&ip_vs_conn_no_cport_cnt);
+		atomic_dec(&ip_vs_conn_count);
+
+		kmem_cache_free(ip_vs_conn_cachep, cp);
+		return;
+	}
+
+	/* hash it back to the table */
+	ip_vs_conn_hash(cp);
+
+  expire_later:
+	IP_VS_DBG(7, "delayed: refcnt-1=%d conn.n_control=%d\n",
+		  atomic_read(&cp->refcnt)-1,
+		  atomic_read(&cp->n_control));
+
+	ip_vs_conn_put(cp);
+}
+
+
+void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
+{
+	if (del_timer(&cp->timer))
+		mod_timer(&cp->timer, jiffies);
+	__ip_vs_conn_put(cp);
+}
+
+
+/*
+ *	Create a new connection entry and hash it into the ip_vs_conn_tab
+ */
+struct ip_vs_conn *
+ip_vs_conn_new(int proto, __u32 caddr, __u16 cport, __u32 vaddr, __u16 vport,
+	       __u32 daddr, __u16 dport, unsigned flags,
+	       struct ip_vs_dest *dest)
+{
+	struct ip_vs_conn *cp;
+	struct ip_vs_protocol *pp = ip_vs_proto_get(proto);
+
+	cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC);
+	if (cp == NULL) {
+		IP_VS_ERR_RL("ip_vs_conn_new: no memory available.\n");
+		return NULL;
+	}
+
+	memset(cp, 0, sizeof(*cp));
+	INIT_LIST_HEAD(&cp->c_list);
+	init_timer(&cp->timer);
+	cp->timer.data     = (unsigned long)cp;
+	cp->timer.function = ip_vs_conn_expire;
+	cp->protocol	   = proto;
+	cp->caddr	   = caddr;
+	cp->cport	   = cport;
+	cp->vaddr	   = vaddr;
+	cp->vport	   = vport;
+	cp->daddr          = daddr;
+	cp->dport          = dport;
+	cp->flags	   = flags;
+	spin_lock_init(&cp->lock);
+
+	/*
+	 * Set the entry is referenced by the current thread before hashing
+	 * it in the table, so that other thread run ip_vs_random_dropentry
+	 * but cannot drop this entry.
+	 */
+	atomic_set(&cp->refcnt, 1);
+
+	atomic_set(&cp->n_control, 0);
+	atomic_set(&cp->in_pkts, 0);
+
+	atomic_inc(&ip_vs_conn_count);
+	if (flags & IP_VS_CONN_F_NO_CPORT)
+		atomic_inc(&ip_vs_conn_no_cport_cnt);
+
+	/* Bind the connection with a destination server */
+	ip_vs_bind_dest(cp, dest);
+
+	/* Set its state and timeout */
+	cp->state = 0;
+	cp->timeout = 3*HZ;
+
+	/* Bind its packet transmitter */
+	ip_vs_bind_xmit(cp);
+
+	if (unlikely(pp && atomic_read(&pp->appcnt)))
+		ip_vs_bind_app(cp, pp);
+
+	/* Hash it in the ip_vs_conn_tab finally */
+	ip_vs_conn_hash(cp);
+
+	return cp;
+}
+
+
+/*
+ *	/proc/net/ip_vs_conn entries
+ */
+#ifdef CONFIG_PROC_FS
+
+static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
+{
+	int idx;
+	struct ip_vs_conn *cp;
+	
+	for(idx = 0; idx < IP_VS_CONN_TAB_SIZE; idx++) {
+		ct_read_lock_bh(idx);
+		list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
+			if (pos-- == 0) {
+				seq->private = &ip_vs_conn_tab[idx];
+				return cp;
+			}
+		}
+		ct_read_unlock_bh(idx);
+	}
+
+	return NULL;
+}
+
+static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	seq->private = NULL;
+	return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN;
+}
+
+static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct ip_vs_conn *cp = v;
+	struct list_head *e, *l = seq->private;
+	int idx;
+
+	++*pos;
+	if (v == SEQ_START_TOKEN) 
+		return ip_vs_conn_array(seq, 0);
+
+	/* more on same hash chain? */
+	if ((e = cp->c_list.next) != l)
+		return list_entry(e, struct ip_vs_conn, c_list);
+
+	idx = l - ip_vs_conn_tab;
+	ct_read_unlock_bh(idx);
+
+	while (++idx < IP_VS_CONN_TAB_SIZE) {
+		ct_read_lock_bh(idx);
+		list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
+			seq->private = &ip_vs_conn_tab[idx];
+			return cp;
+		}	
+		ct_read_unlock_bh(idx);
+	}
+	seq->private = NULL;
+	return NULL;
+}
+
+static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v)
+{
+	struct list_head *l = seq->private;
+
+	if (l)
+		ct_read_unlock_bh(l - ip_vs_conn_tab);
+}
+
+static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
+{
+
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq,
+   "Pro FromIP   FPrt ToIP     TPrt DestIP   DPrt State       Expires\n");
+	else {
+		const struct ip_vs_conn *cp = v;
+
+		seq_printf(seq,
+			"%-3s %08X %04X %08X %04X %08X %04X %-11s %7lu\n",
+				ip_vs_proto_name(cp->protocol),
+				ntohl(cp->caddr), ntohs(cp->cport),
+				ntohl(cp->vaddr), ntohs(cp->vport),
+				ntohl(cp->daddr), ntohs(cp->dport),
+				ip_vs_state_name(cp->protocol, cp->state),
+				(cp->timer.expires-jiffies)/HZ);
+	}
+	return 0;
+}
+
+static struct seq_operations ip_vs_conn_seq_ops = {
+	.start = ip_vs_conn_seq_start,
+	.next  = ip_vs_conn_seq_next,
+	.stop  = ip_vs_conn_seq_stop,
+	.show  = ip_vs_conn_seq_show,
+};
+
+static int ip_vs_conn_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &ip_vs_conn_seq_ops);
+}
+
+static struct file_operations ip_vs_conn_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = ip_vs_conn_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+};
+#endif
+
+
+/*
+ *      Randomly drop connection entries before running out of memory
+ */
+static inline int todrop_entry(struct ip_vs_conn *cp)
+{
+	/*
+	 * The drop rate array needs tuning for real environments.
+	 * Called from timer bh only => no locking
+	 */
+	static char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
+	static char todrop_counter[9] = {0};
+	int i;
+
+	/* if the conn entry hasn't lasted for 60 seconds, don't drop it.
+	   This will leave enough time for normal connection to get
+	   through. */
+	if (time_before(cp->timeout + jiffies, cp->timer.expires + 60*HZ))
+		return 0;
+
+	/* Don't drop the entry if its number of incoming packets is not
+	   located in [0, 8] */
+	i = atomic_read(&cp->in_pkts);
+	if (i > 8 || i < 0) return 0;
+
+	if (!todrop_rate[i]) return 0;
+	if (--todrop_counter[i] > 0) return 0;
+
+	todrop_counter[i] = todrop_rate[i];
+	return 1;
+}
+
+
+void ip_vs_random_dropentry(void)
+{
+	int idx;
+	struct ip_vs_conn *cp;
+	struct ip_vs_conn *ct;
+
+	/*
+	 * Randomly scan 1/32 of the whole table every second
+	 */
+	for (idx = 0; idx < (IP_VS_CONN_TAB_SIZE>>5); idx++) {
+		unsigned hash = net_random() & IP_VS_CONN_TAB_MASK;
+
+		/*
+		 *  Lock is actually needed in this loop.
+		 */
+		ct_write_lock(hash);
+
+		list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+			if (!cp->cport && !(cp->flags & IP_VS_CONN_F_NO_CPORT))
+				/* connection template */
+				continue;
+
+			if (cp->protocol == IPPROTO_TCP) {
+				switch(cp->state) {
+				case IP_VS_TCP_S_SYN_RECV:
+				case IP_VS_TCP_S_SYNACK:
+					break;
+
+				case IP_VS_TCP_S_ESTABLISHED:
+					if (todrop_entry(cp))
+						break;
+					continue;
+
+				default:
+					continue;
+				}
+			} else {
+				if (!todrop_entry(cp))
+					continue;
+			}
+
+			/*
+			 * Drop the entry, and drop its ct if not referenced
+			 */
+			atomic_inc(&cp->refcnt);
+			ct_write_unlock(hash);
+
+			if ((ct = cp->control))
+				atomic_inc(&ct->refcnt);
+			IP_VS_DBG(4, "del connection\n");
+			ip_vs_conn_expire_now(cp);
+			if (ct) {
+				IP_VS_DBG(4, "del conn template\n");
+				ip_vs_conn_expire_now(ct);
+			}
+			ct_write_lock(hash);
+		}
+		ct_write_unlock(hash);
+	}
+}
+
+
+/*
+ *      Flush all the connection entries in the ip_vs_conn_tab
+ */
+static void ip_vs_conn_flush(void)
+{
+	int idx;
+	struct ip_vs_conn *cp;
+	struct ip_vs_conn *ct;
+
+  flush_again:
+	for (idx=0; idx<IP_VS_CONN_TAB_SIZE; idx++) {
+		/*
+		 *  Lock is actually needed in this loop.
+		 */
+		ct_write_lock_bh(idx);
+
+		list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
+			atomic_inc(&cp->refcnt);
+			ct_write_unlock(idx);
+
+			if ((ct = cp->control))
+				atomic_inc(&ct->refcnt);
+			IP_VS_DBG(4, "del connection\n");
+			ip_vs_conn_expire_now(cp);
+			if (ct) {
+				IP_VS_DBG(4, "del conn template\n");
+				ip_vs_conn_expire_now(ct);
+			}
+			ct_write_lock(idx);
+		}
+		ct_write_unlock_bh(idx);
+	}
+
+	/* the counter may be not NULL, because maybe some conn entries
+	   are run by slow timer handler or unhashed but still referred */
+	if (atomic_read(&ip_vs_conn_count) != 0) {
+		schedule();
+		goto flush_again;
+	}
+}
+
+
+int ip_vs_conn_init(void)
+{
+	int idx;
+
+	/*
+	 * Allocate the connection hash table and initialize its list heads
+	 */
+	ip_vs_conn_tab = vmalloc(IP_VS_CONN_TAB_SIZE*sizeof(struct list_head));
+	if (!ip_vs_conn_tab)
+		return -ENOMEM;
+
+	/* Allocate ip_vs_conn slab cache */
+	ip_vs_conn_cachep = kmem_cache_create("ip_vs_conn",
+					      sizeof(struct ip_vs_conn), 0,
+					      SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (!ip_vs_conn_cachep) {
+		vfree(ip_vs_conn_tab);
+		return -ENOMEM;
+	}
+
+	IP_VS_INFO("Connection hash table configured "
+		   "(size=%d, memory=%ldKbytes)\n",
+		   IP_VS_CONN_TAB_SIZE,
+		   (long)(IP_VS_CONN_TAB_SIZE*sizeof(struct list_head))/1024);
+	IP_VS_DBG(0, "Each connection entry needs %Zd bytes at least\n",
+		  sizeof(struct ip_vs_conn));
+
+	for (idx = 0; idx < IP_VS_CONN_TAB_SIZE; idx++) {
+		INIT_LIST_HEAD(&ip_vs_conn_tab[idx]);
+	}
+
+	for (idx = 0; idx < CT_LOCKARRAY_SIZE; idx++)  {
+		rwlock_init(&__ip_vs_conntbl_lock_array[idx].l);
+	}
+
+	proc_net_fops_create("ip_vs_conn", 0, &ip_vs_conn_fops);
+
+	/* calculate the random value for connection hash */
+	get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
+
+	return 0;
+}
+
+
+void ip_vs_conn_cleanup(void)
+{
+	/* flush all the connection entries first */
+	ip_vs_conn_flush();
+
+	/* Release the empty cache */
+	kmem_cache_destroy(ip_vs_conn_cachep);
+	proc_net_remove("ip_vs_conn");
+	vfree(ip_vs_conn_tab);
+}
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
new file mode 100644
index 00000000000..5fb257dd07c
--- /dev/null
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -0,0 +1,1191 @@
+/*
+ * IPVS         An implementation of the IP virtual server support for the
+ *              LINUX operating system.  IPVS is now implemented as a module
+ *              over the Netfilter framework. IPVS can be used to build a
+ *              high-performance and highly available server based on a
+ *              cluster of servers.
+ *
+ * Version:     $Id: ip_vs_core.c,v 1.34 2003/05/10 03:05:23 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Peter Kese <peter.kese@ijs.si>
+ *              Julian Anastasov <ja@ssi.bg>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * The IPVS code for kernel 2.2 was done by Wensong Zhang and Peter Kese,
+ * with changes/fixes from Julian Anastasov, Lars Marowsky-Bree, Horms
+ * and others.
+ *
+ * Changes:
+ *	Paul `Rusty' Russell		properly handle non-linear skbs
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/icmp.h>
+
+#include <net/ip.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/icmp.h>                   /* for icmp_send */
+#include <net/route.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+
+#include <net/ip_vs.h>
+
+
+EXPORT_SYMBOL(register_ip_vs_scheduler);
+EXPORT_SYMBOL(unregister_ip_vs_scheduler);
+EXPORT_SYMBOL(ip_vs_skb_replace);
+EXPORT_SYMBOL(ip_vs_proto_name);
+EXPORT_SYMBOL(ip_vs_conn_new);
+EXPORT_SYMBOL(ip_vs_conn_in_get);
+EXPORT_SYMBOL(ip_vs_conn_out_get);
+#ifdef CONFIG_IP_VS_PROTO_TCP
+EXPORT_SYMBOL(ip_vs_tcp_conn_listen);
+#endif
+EXPORT_SYMBOL(ip_vs_conn_put);
+#ifdef CONFIG_IP_VS_DEBUG
+EXPORT_SYMBOL(ip_vs_get_debug_level);
+#endif
+EXPORT_SYMBOL(ip_vs_make_skb_writable);
+
+
+/* ID used in ICMP lookups */
+#define icmp_id(icmph)          (((icmph)->un).echo.id)
+
+const char *ip_vs_proto_name(unsigned proto)
+{
+	static char buf[20];
+
+	switch (proto) {
+	case IPPROTO_IP:
+		return "IP";
+	case IPPROTO_UDP:
+		return "UDP";
+	case IPPROTO_TCP:
+		return "TCP";
+	case IPPROTO_ICMP:
+		return "ICMP";
+	default:
+		sprintf(buf, "IP_%d", proto);
+		return buf;
+	}
+}
+
+void ip_vs_init_hash_table(struct list_head *table, int rows)
+{
+	while (--rows >= 0)
+		INIT_LIST_HEAD(&table[rows]);
+}
+
+static inline void
+ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
+{
+	struct ip_vs_dest *dest = cp->dest;
+	if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
+		spin_lock(&dest->stats.lock);
+		dest->stats.inpkts++;
+		dest->stats.inbytes += skb->len;
+		spin_unlock(&dest->stats.lock);
+
+		spin_lock(&dest->svc->stats.lock);
+		dest->svc->stats.inpkts++;
+		dest->svc->stats.inbytes += skb->len;
+		spin_unlock(&dest->svc->stats.lock);
+
+		spin_lock(&ip_vs_stats.lock);
+		ip_vs_stats.inpkts++;
+		ip_vs_stats.inbytes += skb->len;
+		spin_unlock(&ip_vs_stats.lock);
+	}
+}
+
+
+static inline void
+ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
+{
+	struct ip_vs_dest *dest = cp->dest;
+	if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
+		spin_lock(&dest->stats.lock);
+		dest->stats.outpkts++;
+		dest->stats.outbytes += skb->len;
+		spin_unlock(&dest->stats.lock);
+
+		spin_lock(&dest->svc->stats.lock);
+		dest->svc->stats.outpkts++;
+		dest->svc->stats.outbytes += skb->len;
+		spin_unlock(&dest->svc->stats.lock);
+
+		spin_lock(&ip_vs_stats.lock);
+		ip_vs_stats.outpkts++;
+		ip_vs_stats.outbytes += skb->len;
+		spin_unlock(&ip_vs_stats.lock);
+	}
+}
+
+
+static inline void
+ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
+{
+	spin_lock(&cp->dest->stats.lock);
+	cp->dest->stats.conns++;
+	spin_unlock(&cp->dest->stats.lock);
+
+	spin_lock(&svc->stats.lock);
+	svc->stats.conns++;
+	spin_unlock(&svc->stats.lock);
+
+	spin_lock(&ip_vs_stats.lock);
+	ip_vs_stats.conns++;
+	spin_unlock(&ip_vs_stats.lock);
+}
+
+
+static inline int
+ip_vs_set_state(struct ip_vs_conn *cp, int direction,
+		const struct sk_buff *skb,
+		struct ip_vs_protocol *pp)
+{
+	if (unlikely(!pp->state_transition))
+		return 0;
+	return pp->state_transition(cp, direction, skb, pp);
+}
+
+
+int ip_vs_make_skb_writable(struct sk_buff **pskb, int writable_len)
+{
+	struct sk_buff *skb = *pskb;
+
+	/* skb is already used, better copy skb and its payload */
+	if (unlikely(skb_shared(skb) || skb->sk))
+		goto copy_skb;
+
+	/* skb data is already used, copy it */
+	if (unlikely(skb_cloned(skb)))
+		goto copy_data;
+
+	return pskb_may_pull(skb, writable_len);
+
+  copy_data:
+	if (unlikely(writable_len > skb->len))
+		return 0;
+	return !pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+
+  copy_skb:
+	if (unlikely(writable_len > skb->len))
+		return 0;
+	skb = skb_copy(skb, GFP_ATOMIC);
+	if (!skb)
+		return 0;
+	BUG_ON(skb_is_nonlinear(skb));
+
+	/* Rest of kernel will get very unhappy if we pass it a
+	   suddenly-orphaned skbuff */
+	if ((*pskb)->sk)
+		skb_set_owner_w(skb, (*pskb)->sk);
+	kfree_skb(*pskb);
+	*pskb = skb;
+	return 1;
+}
+
+/*
+ *  IPVS persistent scheduling function
+ *  It creates a connection entry according to its template if exists,
+ *  or selects a server and creates a connection entry plus a template.
+ *  Locking: we are svc user (svc->refcnt), so we hold all dests too
+ *  Protocols supported: TCP, UDP
+ */
+static struct ip_vs_conn *
+ip_vs_sched_persist(struct ip_vs_service *svc,
+		    const struct sk_buff *skb,
+		    __u16 ports[2])
+{
+	struct ip_vs_conn *cp = NULL;
+	struct iphdr *iph = skb->nh.iph;
+	struct ip_vs_dest *dest;
+	struct ip_vs_conn *ct;
+	__u16  dport;	 /* destination port to forward */
+	__u32  snet;	 /* source network of the client, after masking */
+
+	/* Mask saddr with the netmask to adjust template granularity */
+	snet = iph->saddr & svc->netmask;
+
+	IP_VS_DBG(6, "p-schedule: src %u.%u.%u.%u:%u dest %u.%u.%u.%u:%u "
+		  "mnet %u.%u.%u.%u\n",
+		  NIPQUAD(iph->saddr), ntohs(ports[0]),
+		  NIPQUAD(iph->daddr), ntohs(ports[1]),
+		  NIPQUAD(snet));
+
+	/*
+	 * As far as we know, FTP is a very complicated network protocol, and
+	 * it uses control connection and data connections. For active FTP,
+	 * FTP server initialize data connection to the client, its source port
+	 * is often 20. For passive FTP, FTP server tells the clients the port
+	 * that it passively listens to,  and the client issues the data
+	 * connection. In the tunneling or direct routing mode, the load
+	 * balancer is on the client-to-server half of connection, the port
+	 * number is unknown to the load balancer. So, a conn template like
+	 * <caddr, 0, vaddr, 0, daddr, 0> is created for persistent FTP
+	 * service, and a template like <caddr, 0, vaddr, vport, daddr, dport>
+	 * is created for other persistent services.
+	 */
+	if (ports[1] == svc->port) {
+		/* Check if a template already exists */
+		if (svc->port != FTPPORT)
+			ct = ip_vs_conn_in_get(iph->protocol, snet, 0,
+					       iph->daddr, ports[1]);
+		else
+			ct = ip_vs_conn_in_get(iph->protocol, snet, 0,
+					       iph->daddr, 0);
+
+		if (!ct || !ip_vs_check_template(ct)) {
+			/*
+			 * No template found or the dest of the connection
+			 * template is not available.
+			 */
+			dest = svc->scheduler->schedule(svc, skb);
+			if (dest == NULL) {
+				IP_VS_DBG(1, "p-schedule: no dest found.\n");
+				return NULL;
+			}
+
+			/*
+			 * Create a template like <protocol,caddr,0,
+			 * vaddr,vport,daddr,dport> for non-ftp service,
+			 * and <protocol,caddr,0,vaddr,0,daddr,0>
+			 * for ftp service.
+			 */
+			if (svc->port != FTPPORT)
+				ct = ip_vs_conn_new(iph->protocol,
+						    snet, 0,
+						    iph->daddr,
+						    ports[1],
+						    dest->addr, dest->port,
+						    0,
+						    dest);
+			else
+				ct = ip_vs_conn_new(iph->protocol,
+						    snet, 0,
+						    iph->daddr, 0,
+						    dest->addr, 0,
+						    0,
+						    dest);
+			if (ct == NULL)
+				return NULL;
+
+			ct->timeout = svc->timeout;
+		} else {
+			/* set destination with the found template */
+			dest = ct->dest;
+		}
+		dport = dest->port;
+	} else {
+		/*
+		 * Note: persistent fwmark-based services and persistent
+		 * port zero service are handled here.
+		 * fwmark template: <IPPROTO_IP,caddr,0,fwmark,0,daddr,0>
+		 * port zero template: <protocol,caddr,0,vaddr,0,daddr,0>
+		 */
+		if (svc->fwmark)
+			ct = ip_vs_conn_in_get(IPPROTO_IP, snet, 0,
+					       htonl(svc->fwmark), 0);
+		else
+			ct = ip_vs_conn_in_get(iph->protocol, snet, 0,
+					       iph->daddr, 0);
+
+		if (!ct || !ip_vs_check_template(ct)) {
+			/*
+			 * If it is not persistent port zero, return NULL,
+			 * otherwise create a connection template.
+			 */
+			if (svc->port)
+				return NULL;
+
+			dest = svc->scheduler->schedule(svc, skb);
+			if (dest == NULL) {
+				IP_VS_DBG(1, "p-schedule: no dest found.\n");
+				return NULL;
+			}
+
+			/*
+			 * Create a template according to the service
+			 */
+			if (svc->fwmark)
+				ct = ip_vs_conn_new(IPPROTO_IP,
+						    snet, 0,
+						    htonl(svc->fwmark), 0,
+						    dest->addr, 0,
+						    0,
+						    dest);
+			else
+				ct = ip_vs_conn_new(iph->protocol,
+						    snet, 0,
+						    iph->daddr, 0,
+						    dest->addr, 0,
+						    0,
+						    dest);
+			if (ct == NULL)
+				return NULL;
+
+			ct->timeout = svc->timeout;
+		} else {
+			/* set destination with the found template */
+			dest = ct->dest;
+		}
+		dport = ports[1];
+	}
+
+	/*
+	 *    Create a new connection according to the template
+	 */
+	cp = ip_vs_conn_new(iph->protocol,
+			    iph->saddr, ports[0],
+			    iph->daddr, ports[1],
+			    dest->addr, dport,
+			    0,
+			    dest);
+	if (cp == NULL) {
+		ip_vs_conn_put(ct);
+		return NULL;
+	}
+
+	/*
+	 *    Add its control
+	 */
+	ip_vs_control_add(cp, ct);
+	ip_vs_conn_put(ct);
+
+	ip_vs_conn_stats(cp, svc);
+	return cp;
+}
+
+
+/*
+ *  IPVS main scheduling function
+ *  It selects a server according to the virtual service, and
+ *  creates a connection entry.
+ *  Protocols supported: TCP, UDP
+ */
+struct ip_vs_conn *
+ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+	struct ip_vs_conn *cp = NULL;
+	struct iphdr *iph = skb->nh.iph;
+	struct ip_vs_dest *dest;
+	__u16 _ports[2], *pptr;
+
+	pptr = skb_header_pointer(skb, iph->ihl*4,
+				  sizeof(_ports), _ports);
+	if (pptr == NULL)
+		return NULL;
+
+	/*
+	 *    Persistent service
+	 */
+	if (svc->flags & IP_VS_SVC_F_PERSISTENT)
+		return ip_vs_sched_persist(svc, skb, pptr);
+
+	/*
+	 *    Non-persistent service
+	 */
+	if (!svc->fwmark && pptr[1] != svc->port) {
+		if (!svc->port)
+			IP_VS_ERR("Schedule: port zero only supported "
+				  "in persistent services, "
+				  "check your ipvs configuration\n");
+		return NULL;
+	}
+
+	dest = svc->scheduler->schedule(svc, skb);
+	if (dest == NULL) {
+		IP_VS_DBG(1, "Schedule: no dest found.\n");
+		return NULL;
+	}
+
+	/*
+	 *    Create a connection entry.
+	 */
+	cp = ip_vs_conn_new(iph->protocol,
+			    iph->saddr, pptr[0],
+			    iph->daddr, pptr[1],
+			    dest->addr, dest->port?dest->port:pptr[1],
+			    0,
+			    dest);
+	if (cp == NULL)
+		return NULL;
+
+	IP_VS_DBG(6, "Schedule fwd:%c c:%u.%u.%u.%u:%u v:%u.%u.%u.%u:%u "
+		  "d:%u.%u.%u.%u:%u flg:%X cnt:%d\n",
+		  ip_vs_fwd_tag(cp),
+		  NIPQUAD(cp->caddr), ntohs(cp->cport),
+		  NIPQUAD(cp->vaddr), ntohs(cp->vport),
+		  NIPQUAD(cp->daddr), ntohs(cp->dport),
+		  cp->flags, atomic_read(&cp->refcnt));
+
+	ip_vs_conn_stats(cp, svc);
+	return cp;
+}
+
+
+/*
+ *  Pass or drop the packet.
+ *  Called by ip_vs_in, when the virtual service is available but
+ *  no destination is available for a new connection.
+ */
+int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
+		struct ip_vs_protocol *pp)
+{
+	__u16 _ports[2], *pptr;
+	struct iphdr *iph = skb->nh.iph;
+
+	pptr = skb_header_pointer(skb, iph->ihl*4,
+				  sizeof(_ports), _ports);
+	if (pptr == NULL) {
+		ip_vs_service_put(svc);
+		return NF_DROP;
+	}
+
+	/* if it is fwmark-based service, the cache_bypass sysctl is up
+	   and the destination is RTN_UNICAST (and not local), then create
+	   a cache_bypass connection entry */
+	if (sysctl_ip_vs_cache_bypass && svc->fwmark
+	    && (inet_addr_type(iph->daddr) == RTN_UNICAST)) {
+		int ret, cs;
+		struct ip_vs_conn *cp;
+
+		ip_vs_service_put(svc);
+
+		/* create a new connection entry */
+		IP_VS_DBG(6, "ip_vs_leave: create a cache_bypass entry\n");
+		cp = ip_vs_conn_new(iph->protocol,
+				    iph->saddr, pptr[0],
+				    iph->daddr, pptr[1],
+				    0, 0,
+				    IP_VS_CONN_F_BYPASS,
+				    NULL);
+		if (cp == NULL)
+			return NF_DROP;
+
+		/* statistics */
+		ip_vs_in_stats(cp, skb);
+
+		/* set state */
+		cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp);
+
+		/* transmit the first SYN packet */
+		ret = cp->packet_xmit(skb, cp, pp);
+		/* do not touch skb anymore */
+
+		atomic_inc(&cp->in_pkts);
+		ip_vs_conn_put(cp);
+		return ret;
+	}
+
+	/*
+	 * When the virtual ftp service is presented, packets destined
+	 * for other services on the VIP may get here (except services
+	 * listed in the ipvs table), pass the packets, because it is
+	 * not ipvs job to decide to drop the packets.
+	 */
+	if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT)) {
+		ip_vs_service_put(svc);
+		return NF_ACCEPT;
+	}
+
+	ip_vs_service_put(svc);
+
+	/*
+	 * Notify the client that the destination is unreachable, and
+	 * release the socket buffer.
+	 * Since it is in IP layer, the TCP socket is not actually
+	 * created, the TCP RST packet cannot be sent, instead that
+	 * ICMP_PORT_UNREACH is sent here no matter it is TCP/UDP. --WZ
+	 */
+	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+	return NF_DROP;
+}
+
+
+/*
+ *      It is hooked before NF_IP_PRI_NAT_SRC at the NF_IP_POST_ROUTING
+ *      chain, and is used for VS/NAT.
+ *      It detects packets for VS/NAT connections and sends the packets
+ *      immediately. This can avoid that iptable_nat mangles the packets
+ *      for VS/NAT.
+ */
+static unsigned int ip_vs_post_routing(unsigned int hooknum,
+				       struct sk_buff **pskb,
+				       const struct net_device *in,
+				       const struct net_device *out,
+				       int (*okfn)(struct sk_buff *))
+{
+	if (!((*pskb)->nfcache & NFC_IPVS_PROPERTY))
+		return NF_ACCEPT;
+
+	/* The packet was sent from IPVS, exit this chain */
+	(*okfn)(*pskb);
+
+	return NF_STOLEN;
+}
+
+u16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
+{
+	return (u16) csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
+}
+
+static inline struct sk_buff *
+ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
+{
+	skb = ip_defrag(skb, user);
+	if (skb)
+		ip_send_check(skb->nh.iph);
+	return skb;
+}
+
+/*
+ * Packet has been made sufficiently writable in caller
+ * - inout: 1=in->out, 0=out->in
+ */
+void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
+		    struct ip_vs_conn *cp, int inout)
+{
+	struct iphdr *iph	 = skb->nh.iph;
+	unsigned int icmp_offset = iph->ihl*4;
+	struct icmphdr *icmph	 = (struct icmphdr *)(skb->nh.raw + icmp_offset);
+	struct iphdr *ciph	 = (struct iphdr *)(icmph + 1);
+
+	if (inout) {
+		iph->saddr = cp->vaddr;
+		ip_send_check(iph);
+		ciph->daddr = cp->vaddr;
+		ip_send_check(ciph);
+	} else {
+		iph->daddr = cp->daddr;
+		ip_send_check(iph);
+		ciph->saddr = cp->daddr;
+		ip_send_check(ciph);
+	}
+
+	/* the TCP/UDP port */
+	if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol) {
+		__u16 *ports = (void *)ciph + ciph->ihl*4;
+
+		if (inout)
+			ports[1] = cp->vport;
+		else
+			ports[0] = cp->dport;
+	}
+
+	/* And finally the ICMP checksum */
+	icmph->checksum = 0;
+	icmph->checksum = ip_vs_checksum_complete(skb, icmp_offset);
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	if (inout)
+		IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
+			"Forwarding altered outgoing ICMP");
+	else
+		IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
+			"Forwarding altered incoming ICMP");
+}
+
+/*
+ *	Handle ICMP messages in the inside-to-outside direction (outgoing).
+ *	Find any that might be relevant, check against existing connections,
+ *	forward to the right destination host if relevant.
+ *	Currently handles error types - unreachable, quench, ttl exceeded.
+ *	(Only used in VS/NAT)
+ */
+static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
+{
+	struct sk_buff *skb = *pskb;
+	struct iphdr *iph;
+	struct icmphdr	_icmph, *ic;
+	struct iphdr	_ciph, *cih;	/* The ip header contained within the ICMP */
+	struct ip_vs_conn *cp;
+	struct ip_vs_protocol *pp;
+	unsigned int offset, ihl, verdict;
+
+	*related = 1;
+
+	/* reassemble IP fragments */
+	if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) {
+		skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
+		if (!skb)
+			return NF_STOLEN;
+		*pskb = skb;
+	}
+
+	iph = skb->nh.iph;
+	offset = ihl = iph->ihl * 4;
+	ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
+	if (ic == NULL)
+		return NF_DROP;
+
+	IP_VS_DBG(12, "Outgoing ICMP (%d,%d) %u.%u.%u.%u->%u.%u.%u.%u\n",
+		  ic->type, ntohs(icmp_id(ic)),
+		  NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
+
+	/*
+	 * Work through seeing if this is for us.
+	 * These checks are supposed to be in an order that means easy
+	 * things are checked first to speed up processing.... however
+	 * this means that some packets will manage to get a long way
+	 * down this stack and then be rejected, but that's life.
+	 */
+	if ((ic->type != ICMP_DEST_UNREACH) &&
+	    (ic->type != ICMP_SOURCE_QUENCH) &&
+	    (ic->type != ICMP_TIME_EXCEEDED)) {
+		*related = 0;
+		return NF_ACCEPT;
+	}
+
+	/* Now find the contained IP header */
+	offset += sizeof(_icmph);
+	cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
+	if (cih == NULL)
+		return NF_ACCEPT; /* The packet looks wrong, ignore */
+
+	pp = ip_vs_proto_get(cih->protocol);
+	if (!pp)
+		return NF_ACCEPT;
+
+	/* Is the embedded protocol header present? */
+	if (unlikely(cih->frag_off & __constant_htons(IP_OFFSET) &&
+		     pp->dont_defrag))
+		return NF_ACCEPT;
+
+	IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMP for");
+
+	offset += cih->ihl * 4;
+
+	/* The embedded headers contain source and dest in reverse order */
+	cp = pp->conn_out_get(skb, pp, cih, offset, 1);
+	if (!cp)
+		return NF_ACCEPT;
+
+	verdict = NF_DROP;
+
+	if (IP_VS_FWD_METHOD(cp) != 0) {
+		IP_VS_ERR("shouldn't reach here, because the box is on the"
+			  "half connection in the tun/dr module.\n");
+	}
+
+	/* Ensure the checksum is correct */
+	if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
+	    ip_vs_checksum_complete(skb, ihl)) {
+		/* Failed checksum! */
+		IP_VS_DBG(1, "Forward ICMP: failed checksum from %d.%d.%d.%d!\n",
+			  NIPQUAD(iph->saddr));
+		goto out;
+	}
+
+	if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
+		offset += 2 * sizeof(__u16);
+	if (!ip_vs_make_skb_writable(pskb, offset))
+		goto out;
+	skb = *pskb;
+
+	ip_vs_nat_icmp(skb, pp, cp, 1);
+
+	/* do the statistics and put it back */
+	ip_vs_out_stats(cp, skb);
+
+	skb->nfcache |= NFC_IPVS_PROPERTY;
+	verdict = NF_ACCEPT;
+
+  out:
+	__ip_vs_conn_put(cp);
+
+	return verdict;
+}
+
+static inline int is_tcp_reset(const struct sk_buff *skb)
+{
+	struct tcphdr _tcph, *th;
+
+	th = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
+				sizeof(_tcph), &_tcph);
+	if (th == NULL)
+		return 0;
+	return th->rst;
+}
+
+/*
+ *	It is hooked at the NF_IP_FORWARD chain, used only for VS/NAT.
+ *	Check if outgoing packet belongs to the established ip_vs_conn,
+ *      rewrite addresses of the packet and send it on its way...
+ */
+static unsigned int
+ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
+	  const struct net_device *in, const struct net_device *out,
+	  int (*okfn)(struct sk_buff *))
+{
+	struct sk_buff  *skb = *pskb;
+	struct iphdr	*iph;
+	struct ip_vs_protocol *pp;
+	struct ip_vs_conn *cp;
+	int ihl;
+
+	EnterFunction(11);
+
+	if (skb->nfcache & NFC_IPVS_PROPERTY)
+		return NF_ACCEPT;
+
+	iph = skb->nh.iph;
+	if (unlikely(iph->protocol == IPPROTO_ICMP)) {
+		int related, verdict = ip_vs_out_icmp(pskb, &related);
+
+		if (related)
+			return verdict;
+		skb = *pskb;
+		iph = skb->nh.iph;
+	}
+
+	pp = ip_vs_proto_get(iph->protocol);
+	if (unlikely(!pp))
+		return NF_ACCEPT;
+
+	/* reassemble IP fragments */
+	if (unlikely(iph->frag_off & __constant_htons(IP_MF|IP_OFFSET) &&
+		     !pp->dont_defrag)) {
+		skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
+		if (!skb)
+			return NF_STOLEN;
+		iph = skb->nh.iph;
+		*pskb = skb;
+	}
+
+	ihl = iph->ihl << 2;
+
+	/*
+	 * Check if the packet belongs to an existing entry
+	 */
+	cp = pp->conn_out_get(skb, pp, iph, ihl, 0);
+
+	if (unlikely(!cp)) {
+		if (sysctl_ip_vs_nat_icmp_send &&
+		    (pp->protocol == IPPROTO_TCP ||
+		     pp->protocol == IPPROTO_UDP)) {
+			__u16 _ports[2], *pptr;
+
+			pptr = skb_header_pointer(skb, ihl,
+						  sizeof(_ports), _ports);
+			if (pptr == NULL)
+				return NF_ACCEPT;	/* Not for me */
+			if (ip_vs_lookup_real_service(iph->protocol,
+						      iph->saddr, pptr[0])) {
+				/*
+				 * Notify the real server: there is no
+				 * existing entry if it is not RST
+				 * packet or not TCP packet.
+				 */
+				if (iph->protocol != IPPROTO_TCP
+				    || !is_tcp_reset(skb)) {
+					icmp_send(skb,ICMP_DEST_UNREACH,
+						  ICMP_PORT_UNREACH, 0);
+					return NF_DROP;
+				}
+			}
+		}
+		IP_VS_DBG_PKT(12, pp, skb, 0,
+			      "packet continues traversal as normal");
+		return NF_ACCEPT;
+	}
+
+	IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");
+
+	if (!ip_vs_make_skb_writable(pskb, ihl))
+		goto drop;
+
+	/* mangle the packet */
+	if (pp->snat_handler && !pp->snat_handler(pskb, pp, cp))
+		goto drop;
+	skb = *pskb;
+	skb->nh.iph->saddr = cp->vaddr;
+	ip_send_check(skb->nh.iph);
+
+	IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
+
+	ip_vs_out_stats(cp, skb);
+	ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
+	ip_vs_conn_put(cp);
+
+	skb->nfcache |= NFC_IPVS_PROPERTY;
+
+	LeaveFunction(11);
+	return NF_ACCEPT;
+
+  drop:
+	ip_vs_conn_put(cp);
+	kfree_skb(*pskb);
+	return NF_STOLEN;
+}
+
+
+/*
+ *	Handle ICMP messages in the outside-to-inside direction (incoming).
+ *	Find any that might be relevant, check against existing connections,
+ *	forward to the right destination host if relevant.
+ *	Currently handles error types - unreachable, quench, ttl exceeded.
+ */
+static int 
+ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
+{
+	struct sk_buff *skb = *pskb;
+	struct iphdr *iph;
+	struct icmphdr	_icmph, *ic;
+	struct iphdr	_ciph, *cih;	/* The ip header contained within the ICMP */
+	struct ip_vs_conn *cp;
+	struct ip_vs_protocol *pp;
+	unsigned int offset, ihl, verdict;
+
+	*related = 1;
+
+	/* reassemble IP fragments */
+	if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) {
+		skb = ip_vs_gather_frags(skb,
+		                         hooknum == NF_IP_LOCAL_IN ?
+					 IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD);
+		if (!skb)
+			return NF_STOLEN;
+		*pskb = skb;
+	}
+
+	iph = skb->nh.iph;
+	offset = ihl = iph->ihl * 4;
+	ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
+	if (ic == NULL)
+		return NF_DROP;
+
+	IP_VS_DBG(12, "Incoming ICMP (%d,%d) %u.%u.%u.%u->%u.%u.%u.%u\n",
+		  ic->type, ntohs(icmp_id(ic)),
+		  NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
+
+	/*
+	 * Work through seeing if this is for us.
+	 * These checks are supposed to be in an order that means easy
+	 * things are checked first to speed up processing.... however
+	 * this means that some packets will manage to get a long way
+	 * down this stack and then be rejected, but that's life.
+	 */
+	if ((ic->type != ICMP_DEST_UNREACH) &&
+	    (ic->type != ICMP_SOURCE_QUENCH) &&
+	    (ic->type != ICMP_TIME_EXCEEDED)) {
+		*related = 0;
+		return NF_ACCEPT;
+	}
+
+	/* Now find the contained IP header */
+	offset += sizeof(_icmph);
+	cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
+	if (cih == NULL)
+		return NF_ACCEPT; /* The packet looks wrong, ignore */
+
+	pp = ip_vs_proto_get(cih->protocol);
+	if (!pp)
+		return NF_ACCEPT;
+
+	/* Is the embedded protocol header present? */
+	if (unlikely(cih->frag_off & __constant_htons(IP_OFFSET) &&
+		     pp->dont_defrag))
+		return NF_ACCEPT;
+
+	IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMP for");
+
+	offset += cih->ihl * 4;
+
+	/* The embedded headers contain source and dest in reverse order */
+	cp = pp->conn_in_get(skb, pp, cih, offset, 1);
+	if (!cp)
+		return NF_ACCEPT;
+
+	verdict = NF_DROP;
+
+	/* Ensure the checksum is correct */
+	if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
+	    ip_vs_checksum_complete(skb, ihl)) {
+		/* Failed checksum! */
+		IP_VS_DBG(1, "Incoming ICMP: failed checksum from %d.%d.%d.%d!\n",
+			  NIPQUAD(iph->saddr));
+		goto out;
+	}
+
+	/* do the statistics and put it back */
+	ip_vs_in_stats(cp, skb);
+	if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
+		offset += 2 * sizeof(__u16);
+	verdict = ip_vs_icmp_xmit(skb, cp, pp, offset);
+	/* do not touch skb anymore */
+
+  out:
+	__ip_vs_conn_put(cp);
+
+	return verdict;
+}
+
+/*
+ *	Check if it's for virtual services, look it up,
+ *	and send it on its way...
+ */
+static unsigned int
+ip_vs_in(unsigned int hooknum, struct sk_buff **pskb,
+	 const struct net_device *in, const struct net_device *out,
+	 int (*okfn)(struct sk_buff *))
+{
+	struct sk_buff	*skb = *pskb;
+	struct iphdr	*iph;
+	struct ip_vs_protocol *pp;
+	struct ip_vs_conn *cp;
+	int ret, restart;
+	int ihl;
+
+	/*
+	 *	Big tappo: only PACKET_HOST (neither loopback nor mcasts)
+	 *	... don't know why 1st test DOES NOT include 2nd (?)
+	 */
+	if (unlikely(skb->pkt_type != PACKET_HOST
+		     || skb->dev == &loopback_dev || skb->sk)) {
+		IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n",
+			  skb->pkt_type,
+			  skb->nh.iph->protocol,
+			  NIPQUAD(skb->nh.iph->daddr));
+		return NF_ACCEPT;
+	}
+
+	iph = skb->nh.iph;
+	if (unlikely(iph->protocol == IPPROTO_ICMP)) {
+		int related, verdict = ip_vs_in_icmp(pskb, &related, hooknum);
+
+		if (related)
+			return verdict;
+		skb = *pskb;
+		iph = skb->nh.iph;
+	}
+
+	/* Protocol supported? */
+	pp = ip_vs_proto_get(iph->protocol);
+	if (unlikely(!pp))
+		return NF_ACCEPT;
+
+	ihl = iph->ihl << 2;
+
+	/*
+	 * Check if the packet belongs to an existing connection entry
+	 */
+	cp = pp->conn_in_get(skb, pp, iph, ihl, 0);
+
+	if (unlikely(!cp)) {
+		int v;
+
+		if (!pp->conn_schedule(skb, pp, &v, &cp))
+			return v;
+	}
+
+	if (unlikely(!cp)) {
+		/* sorry, all this trouble for a no-hit :) */
+		IP_VS_DBG_PKT(12, pp, skb, 0,
+			      "packet continues traversal as normal");
+		return NF_ACCEPT;
+	}
+
+	IP_VS_DBG_PKT(11, pp, skb, 0, "Incoming packet");
+
+	/* Check the server status */
+	if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
+		/* the destination server is not available */
+
+		if (sysctl_ip_vs_expire_nodest_conn) {
+			/* try to expire the connection immediately */
+			ip_vs_conn_expire_now(cp);
+		} else {
+			/* don't restart its timer, and silently
+			   drop the packet. */
+			__ip_vs_conn_put(cp);
+		}
+		return NF_DROP;
+	}
+
+	ip_vs_in_stats(cp, skb);
+	restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp);
+	if (cp->packet_xmit)
+		ret = cp->packet_xmit(skb, cp, pp);
+		/* do not touch skb anymore */
+	else {
+		IP_VS_DBG_RL("warning: packet_xmit is null");
+		ret = NF_ACCEPT;
+	}
+
+	/* increase its packet counter and check if it is needed
+	   to be synchronized */
+	atomic_inc(&cp->in_pkts);
+	if ((ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+	    (cp->protocol != IPPROTO_TCP ||
+	     cp->state == IP_VS_TCP_S_ESTABLISHED) &&
+	    (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1]
+	     == sysctl_ip_vs_sync_threshold[0]))
+		ip_vs_sync_conn(cp);
+
+	ip_vs_conn_put(cp);
+	return ret;
+}
+
+
+/*
+ *	It is hooked at the NF_IP_FORWARD chain, in order to catch ICMP
+ *      related packets destined for 0.0.0.0/0.
+ *      When fwmark-based virtual service is used, such as transparent
+ *      cache cluster, TCP packets can be marked and routed to ip_vs_in,
+ *      but ICMP destined for 0.0.0.0/0 cannot not be easily marked and
+ *      sent to ip_vs_in_icmp. So, catch them at the NF_IP_FORWARD chain
+ *      and send them to ip_vs_in_icmp.
+ */
+static unsigned int
+ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff **pskb,
+		   const struct net_device *in, const struct net_device *out,
+		   int (*okfn)(struct sk_buff *))
+{
+	int r;
+
+	if ((*pskb)->nh.iph->protocol != IPPROTO_ICMP)
+		return NF_ACCEPT;
+
+	return ip_vs_in_icmp(pskb, &r, hooknum);
+}
+
+
+/* After packet filtering, forward packet through VS/DR, VS/TUN,
+   or VS/NAT(change destination), so that filtering rules can be
+   applied to IPVS. */
+static struct nf_hook_ops ip_vs_in_ops = {
+	.hook		= ip_vs_in,
+	.owner		= THIS_MODULE,
+	.pf		= PF_INET,
+	.hooknum        = NF_IP_LOCAL_IN,
+	.priority       = 100,
+};
+
+/* After packet filtering, change source only for VS/NAT */
+static struct nf_hook_ops ip_vs_out_ops = {
+	.hook		= ip_vs_out,
+	.owner		= THIS_MODULE,
+	.pf		= PF_INET,
+	.hooknum        = NF_IP_FORWARD,
+	.priority       = 100,
+};
+
+/* After packet filtering (but before ip_vs_out_icmp), catch icmp
+   destined for 0.0.0.0/0, which is for incoming IPVS connections */
+static struct nf_hook_ops ip_vs_forward_icmp_ops = {
+	.hook		= ip_vs_forward_icmp,
+	.owner		= THIS_MODULE,
+	.pf		= PF_INET,
+	.hooknum        = NF_IP_FORWARD,
+	.priority       = 99,
+};
+
+/* Before the netfilter connection tracking, exit from POST_ROUTING */
+static struct nf_hook_ops ip_vs_post_routing_ops = {
+	.hook		= ip_vs_post_routing,
+	.owner		= THIS_MODULE,
+	.pf		= PF_INET,
+	.hooknum        = NF_IP_POST_ROUTING,
+	.priority       = NF_IP_PRI_NAT_SRC-1,
+};
+
+
+/*
+ *	Initialize IP Virtual Server
+ */
+static int __init ip_vs_init(void)
+{
+	int ret;
+
+	ret = ip_vs_control_init();
+	if (ret < 0) {
+		IP_VS_ERR("can't setup control.\n");
+		goto cleanup_nothing;
+	}
+
+	ip_vs_protocol_init();
+
+	ret = ip_vs_app_init();
+	if (ret < 0) {
+		IP_VS_ERR("can't setup application helper.\n");
+		goto cleanup_protocol;
+	}
+
+	ret = ip_vs_conn_init();
+	if (ret < 0) {
+		IP_VS_ERR("can't setup connection table.\n");
+		goto cleanup_app;
+	}
+
+	ret = nf_register_hook(&ip_vs_in_ops);
+	if (ret < 0) {
+		IP_VS_ERR("can't register in hook.\n");
+		goto cleanup_conn;
+	}
+
+	ret = nf_register_hook(&ip_vs_out_ops);
+	if (ret < 0) {
+		IP_VS_ERR("can't register out hook.\n");
+		goto cleanup_inops;
+	}
+	ret = nf_register_hook(&ip_vs_post_routing_ops);
+	if (ret < 0) {
+		IP_VS_ERR("can't register post_routing hook.\n");
+		goto cleanup_outops;
+	}
+	ret = nf_register_hook(&ip_vs_forward_icmp_ops);
+	if (ret < 0) {
+		IP_VS_ERR("can't register forward_icmp hook.\n");
+		goto cleanup_postroutingops;
+	}
+
+	IP_VS_INFO("ipvs loaded.\n");
+	return ret;
+
+  cleanup_postroutingops:
+	nf_unregister_hook(&ip_vs_post_routing_ops);
+  cleanup_outops:
+	nf_unregister_hook(&ip_vs_out_ops);
+  cleanup_inops:
+	nf_unregister_hook(&ip_vs_in_ops);
+  cleanup_conn:
+	ip_vs_conn_cleanup();
+  cleanup_app:
+	ip_vs_app_cleanup();
+  cleanup_protocol:
+	ip_vs_protocol_cleanup();
+	ip_vs_control_cleanup();
+  cleanup_nothing:
+	return ret;
+}
+
+static void __exit ip_vs_cleanup(void)
+{
+	nf_unregister_hook(&ip_vs_forward_icmp_ops);
+	nf_unregister_hook(&ip_vs_post_routing_ops);
+	nf_unregister_hook(&ip_vs_out_ops);
+	nf_unregister_hook(&ip_vs_in_ops);
+	ip_vs_conn_cleanup();
+	ip_vs_app_cleanup();
+	ip_vs_protocol_cleanup();
+	ip_vs_control_cleanup();
+	IP_VS_INFO("ipvs unloaded.\n");
+}
+
+module_init(ip_vs_init);
+module_exit(ip_vs_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
new file mode 100644
index 00000000000..218d9701036
--- /dev/null
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -0,0 +1,2391 @@
+/*
+ * IPVS         An implementation of the IP virtual server support for the
+ *              LINUX operating system.  IPVS is now implemented as a module
+ *              over the NetFilter framework. IPVS can be used to build a
+ *              high-performance and highly available server based on a
+ *              cluster of servers.
+ *
+ * Version:     $Id: ip_vs_ctl.c,v 1.36 2003/06/08 09:31:19 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Peter Kese <peter.kese@ijs.si>
+ *              Julian Anastasov <ja@ssi.bg>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include <linux/workqueue.h>
+#include <linux/swap.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+
+#include <net/ip.h>
+#include <net/sock.h>
+
+#include <asm/uaccess.h>
+
+#include <net/ip_vs.h>
+
+/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
+static DECLARE_MUTEX(__ip_vs_mutex);
+
+/* lock for service table */
+static DEFINE_RWLOCK(__ip_vs_svc_lock);
+
+/* lock for table with the real services */
+static DEFINE_RWLOCK(__ip_vs_rs_lock);
+
+/* lock for state and timeout tables */
+static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
+
+/* lock for drop entry handling */
+static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
+
+/* lock for drop packet handling */
+static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
+
+/* 1/rate drop and drop-entry variables */
+int ip_vs_drop_rate = 0;
+int ip_vs_drop_counter = 0;
+static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
+
+/* number of virtual services */
+static int ip_vs_num_services = 0;
+
+/* sysctl variables */
+static int sysctl_ip_vs_drop_entry = 0;
+static int sysctl_ip_vs_drop_packet = 0;
+static int sysctl_ip_vs_secure_tcp = 0;
+static int sysctl_ip_vs_amemthresh = 1024;
+static int sysctl_ip_vs_am_droprate = 10;
+int sysctl_ip_vs_cache_bypass = 0;
+int sysctl_ip_vs_expire_nodest_conn = 0;
+int sysctl_ip_vs_expire_quiescent_template = 0;
+int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
+int sysctl_ip_vs_nat_icmp_send = 0;
+
+
+#ifdef CONFIG_IP_VS_DEBUG
+static int sysctl_ip_vs_debug_level = 0;
+
+int ip_vs_get_debug_level(void)
+{
+	return sysctl_ip_vs_debug_level;
+}
+#endif
+
+/*
+ *	update_defense_level is called from keventd and from sysctl.
+ */
+static void update_defense_level(void)
+{
+	struct sysinfo i;
+	static int old_secure_tcp = 0;
+	int availmem;
+	int nomem;
+	int to_change = -1;
+
+	/* we only count free and buffered memory (in pages) */
+	si_meminfo(&i);
+	availmem = i.freeram + i.bufferram;
+	/* however in linux 2.5 the i.bufferram is total page cache size,
+	   we need adjust it */
+	/* si_swapinfo(&i); */
+	/* availmem = availmem - (i.totalswap - i.freeswap); */
+
+	nomem = (availmem < sysctl_ip_vs_amemthresh);
+
+	/* drop_entry */
+	spin_lock(&__ip_vs_dropentry_lock);
+	switch (sysctl_ip_vs_drop_entry) {
+	case 0:
+		atomic_set(&ip_vs_dropentry, 0);
+		break;
+	case 1:
+		if (nomem) {
+			atomic_set(&ip_vs_dropentry, 1);
+			sysctl_ip_vs_drop_entry = 2;
+		} else {
+			atomic_set(&ip_vs_dropentry, 0);
+		}
+		break;
+	case 2:
+		if (nomem) {
+			atomic_set(&ip_vs_dropentry, 1);
+		} else {
+			atomic_set(&ip_vs_dropentry, 0);
+			sysctl_ip_vs_drop_entry = 1;
+		};
+		break;
+	case 3:
+		atomic_set(&ip_vs_dropentry, 1);
+		break;
+	}
+	spin_unlock(&__ip_vs_dropentry_lock);
+
+	/* drop_packet */
+	spin_lock(&__ip_vs_droppacket_lock);
+	switch (sysctl_ip_vs_drop_packet) {
+	case 0:
+		ip_vs_drop_rate = 0;
+		break;
+	case 1:
+		if (nomem) {
+			ip_vs_drop_rate = ip_vs_drop_counter
+				= sysctl_ip_vs_amemthresh /
+				(sysctl_ip_vs_amemthresh-availmem);
+			sysctl_ip_vs_drop_packet = 2;
+		} else {
+			ip_vs_drop_rate = 0;
+		}
+		break;
+	case 2:
+		if (nomem) {
+			ip_vs_drop_rate = ip_vs_drop_counter
+				= sysctl_ip_vs_amemthresh /
+				(sysctl_ip_vs_amemthresh-availmem);
+		} else {
+			ip_vs_drop_rate = 0;
+			sysctl_ip_vs_drop_packet = 1;
+		}
+		break;
+	case 3:
+		ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
+		break;
+	}
+	spin_unlock(&__ip_vs_droppacket_lock);
+
+	/* secure_tcp */
+	write_lock(&__ip_vs_securetcp_lock);
+	switch (sysctl_ip_vs_secure_tcp) {
+	case 0:
+		if (old_secure_tcp >= 2)
+			to_change = 0;
+		break;
+	case 1:
+		if (nomem) {
+			if (old_secure_tcp < 2)
+				to_change = 1;
+			sysctl_ip_vs_secure_tcp = 2;
+		} else {
+			if (old_secure_tcp >= 2)
+				to_change = 0;
+		}
+		break;
+	case 2:
+		if (nomem) {
+			if (old_secure_tcp < 2)
+				to_change = 1;
+		} else {
+			if (old_secure_tcp >= 2)
+				to_change = 0;
+			sysctl_ip_vs_secure_tcp = 1;
+		}
+		break;
+	case 3:
+		if (old_secure_tcp < 2)
+			to_change = 1;
+		break;
+	}
+	old_secure_tcp = sysctl_ip_vs_secure_tcp;
+	if (to_change >= 0)
+		ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
+	write_unlock(&__ip_vs_securetcp_lock);
+}
+
+
+/*
+ *	Timer for checking the defense
+ */
+#define DEFENSE_TIMER_PERIOD	1*HZ
+static void defense_work_handler(void *data);
+static DECLARE_WORK(defense_work, defense_work_handler, NULL);
+
+static void defense_work_handler(void *data)
+{
+	update_defense_level();
+	if (atomic_read(&ip_vs_dropentry))
+		ip_vs_random_dropentry();
+
+	schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
+}
+
+int
+ip_vs_use_count_inc(void)
+{
+	return try_module_get(THIS_MODULE);
+}
+
+void
+ip_vs_use_count_dec(void)
+{
+	module_put(THIS_MODULE);
+}
+
+
+/*
+ *	Hash table: for virtual service lookups
+ */
+#define IP_VS_SVC_TAB_BITS 8
+#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
+#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
+
+/* the service table hashed by <protocol, addr, port> */
+static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
+/* the service table hashed by fwmark */
+static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
+
+/*
+ *	Hash table: for real service lookups
+ */
+#define IP_VS_RTAB_BITS 4
+#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
+#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
+
+static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
+
+/*
+ *	Trash for destinations
+ */
+static LIST_HEAD(ip_vs_dest_trash);
+
+/*
+ *	FTP & NULL virtual service counters
+ */
+static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
+static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
+
+
+/*
+ *	Returns hash value for virtual service
+ */
+static __inline__ unsigned
+ip_vs_svc_hashkey(unsigned proto, __u32 addr, __u16 port)
+{
+	register unsigned porth = ntohs(port);
+
+	return (proto^ntohl(addr)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
+		& IP_VS_SVC_TAB_MASK;
+}
+
+/*
+ *	Returns hash value of fwmark for virtual service lookup
+ */
+static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
+{
+	return fwmark & IP_VS_SVC_TAB_MASK;
+}
+
+/*
+ *	Hashes a service in the ip_vs_svc_table by <proto,addr,port>
+ *	or in the ip_vs_svc_fwm_table by fwmark.
+ *	Should be called with locked tables.
+ */
+static int ip_vs_svc_hash(struct ip_vs_service *svc)
+{
+	unsigned hash;
+
+	if (svc->flags & IP_VS_SVC_F_HASHED) {
+		IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
+			  "called from %p\n", __builtin_return_address(0));
+		return 0;
+	}
+
+	if (svc->fwmark == 0) {
+		/*
+		 *  Hash it by <protocol,addr,port> in ip_vs_svc_table
+		 */
+		hash = ip_vs_svc_hashkey(svc->protocol, svc->addr, svc->port);
+		list_add(&svc->s_list, &ip_vs_svc_table[hash]);
+	} else {
+		/*
+		 *  Hash it by fwmark in ip_vs_svc_fwm_table
+		 */
+		hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
+		list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
+	}
+
+	svc->flags |= IP_VS_SVC_F_HASHED;
+	/* increase its refcnt because it is referenced by the svc table */
+	atomic_inc(&svc->refcnt);
+	return 1;
+}
+
+
+/*
+ *	Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
+ *	Should be called with locked tables.
+ */
+static int ip_vs_svc_unhash(struct ip_vs_service *svc)
+{
+	if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
+		IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
+			  "called from %p\n", __builtin_return_address(0));
+		return 0;
+	}
+
+	if (svc->fwmark == 0) {
+		/* Remove it from the ip_vs_svc_table table */
+		list_del(&svc->s_list);
+	} else {
+		/* Remove it from the ip_vs_svc_fwm_table table */
+		list_del(&svc->f_list);
+	}
+
+	svc->flags &= ~IP_VS_SVC_F_HASHED;
+	atomic_dec(&svc->refcnt);
+	return 1;
+}
+
+
+/*
+ *	Get service by {proto,addr,port} in the service table.
+ */
+static __inline__ struct ip_vs_service *
+__ip_vs_service_get(__u16 protocol, __u32 vaddr, __u16 vport)
+{
+	unsigned hash;
+	struct ip_vs_service *svc;
+
+	/* Check for "full" addressed entries */
+	hash = ip_vs_svc_hashkey(protocol, vaddr, vport);
+
+	list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
+		if ((svc->addr == vaddr)
+		    && (svc->port == vport)
+		    && (svc->protocol == protocol)) {
+			/* HIT */
+			atomic_inc(&svc->usecnt);
+			return svc;
+		}
+	}
+
+	return NULL;
+}
+
+
+/*
+ *	Get service by {fwmark} in the service table.
+ */
+static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark)
+{
+	unsigned hash;
+	struct ip_vs_service *svc;
+
+	/* Check for fwmark addressed entries */
+	hash = ip_vs_svc_fwm_hashkey(fwmark);
+
+	list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
+		if (svc->fwmark == fwmark) {
+			/* HIT */
+			atomic_inc(&svc->usecnt);
+			return svc;
+		}
+	}
+
+	return NULL;
+}
+
+struct ip_vs_service *
+ip_vs_service_get(__u32 fwmark, __u16 protocol, __u32 vaddr, __u16 vport)
+{
+	struct ip_vs_service *svc;
+
+	read_lock(&__ip_vs_svc_lock);
+
+	/*
+	 *	Check the table hashed by fwmark first
+	 */
+	if (fwmark && (svc = __ip_vs_svc_fwm_get(fwmark)))
+		goto out;
+
+	/*
+	 *	Check the table hashed by <protocol,addr,port>
+	 *	for "full" addressed entries
+	 */
+	svc = __ip_vs_service_get(protocol, vaddr, vport);
+
+	if (svc == NULL
+	    && protocol == IPPROTO_TCP
+	    && atomic_read(&ip_vs_ftpsvc_counter)
+	    && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
+		/*
+		 * Check if ftp service entry exists, the packet
+		 * might belong to FTP data connections.
+		 */
+		svc = __ip_vs_service_get(protocol, vaddr, FTPPORT);
+	}
+
+	if (svc == NULL
+	    && atomic_read(&ip_vs_nullsvc_counter)) {
+		/*
+		 * Check if the catch-all port (port zero) exists
+		 */
+		svc = __ip_vs_service_get(protocol, vaddr, 0);
+	}
+
+  out:
+	read_unlock(&__ip_vs_svc_lock);
+
+	IP_VS_DBG(6, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
+		  fwmark, ip_vs_proto_name(protocol),
+		  NIPQUAD(vaddr), ntohs(vport),
+		  svc?"hit":"not hit");
+
+	return svc;
+}
+
+
+static inline void
+__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
+{
+	atomic_inc(&svc->refcnt);
+	dest->svc = svc;
+}
+
+static inline void
+__ip_vs_unbind_svc(struct ip_vs_dest *dest)
+{
+	struct ip_vs_service *svc = dest->svc;
+
+	dest->svc = NULL;
+	if (atomic_dec_and_test(&svc->refcnt))
+		kfree(svc);
+}
+
+
+/*
+ *	Returns hash value for real service
+ */
+static __inline__ unsigned ip_vs_rs_hashkey(__u32 addr, __u16 port)
+{
+	register unsigned porth = ntohs(port);
+
+	return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth)
+		& IP_VS_RTAB_MASK;
+}
+
+/*
+ *	Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
+ *	should be called with locked tables.
+ */
+static int ip_vs_rs_hash(struct ip_vs_dest *dest)
+{
+	unsigned hash;
+
+	if (!list_empty(&dest->d_list)) {
+		return 0;
+	}
+
+	/*
+	 *	Hash by proto,addr,port,
+	 *	which are the parameters of the real service.
+	 */
+	hash = ip_vs_rs_hashkey(dest->addr, dest->port);
+	list_add(&dest->d_list, &ip_vs_rtable[hash]);
+
+	return 1;
+}
+
+/*
+ *	UNhashes ip_vs_dest from ip_vs_rtable.
+ *	should be called with locked tables.
+ */
+static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
+{
+	/*
+	 * Remove it from the ip_vs_rtable table.
+	 */
+	if (!list_empty(&dest->d_list)) {
+		list_del(&dest->d_list);
+		INIT_LIST_HEAD(&dest->d_list);
+	}
+
+	return 1;
+}
+
+/*
+ *	Lookup real service by <proto,addr,port> in the real service table.
+ */
+struct ip_vs_dest *
+ip_vs_lookup_real_service(__u16 protocol, __u32 daddr, __u16 dport)
+{
+	unsigned hash;
+	struct ip_vs_dest *dest;
+
+	/*
+	 *	Check for "full" addressed entries
+	 *	Return the first found entry
+	 */
+	hash = ip_vs_rs_hashkey(daddr, dport);
+
+	read_lock(&__ip_vs_rs_lock);
+	list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
+		if ((dest->addr == daddr)
+		    && (dest->port == dport)
+		    && ((dest->protocol == protocol) ||
+			dest->vfwmark)) {
+			/* HIT */
+			read_unlock(&__ip_vs_rs_lock);
+			return dest;
+		}
+	}
+	read_unlock(&__ip_vs_rs_lock);
+
+	return NULL;
+}
+
+/*
+ *	Lookup destination by {addr,port} in the given service
+ */
+static struct ip_vs_dest *
+ip_vs_lookup_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport)
+{
+	struct ip_vs_dest *dest;
+
+	/*
+	 * Find the destination for the given service
+	 */
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		if ((dest->addr == daddr) && (dest->port == dport)) {
+			/* HIT */
+			return dest;
+		}
+	}
+
+	return NULL;
+}
+
+
+/*
+ *  Lookup dest by {svc,addr,port} in the destination trash.
+ *  The destination trash is used to hold the destinations that are removed
+ *  from the service table but are still referenced by some conn entries.
+ *  The reason to add the destination trash is when the dest is temporary
+ *  down (either by administrator or by monitor program), the dest can be
+ *  picked back from the trash, the remaining connections to the dest can
+ *  continue, and the counting information of the dest is also useful for
+ *  scheduling.
+ */
+static struct ip_vs_dest *
+ip_vs_trash_get_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport)
+{
+	struct ip_vs_dest *dest, *nxt;
+
+	/*
+	 * Find the destination in trash
+	 */
+	list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
+		IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
+			  "refcnt=%d\n",
+			  dest->vfwmark,
+			  NIPQUAD(dest->addr), ntohs(dest->port),
+			  atomic_read(&dest->refcnt));
+		if (dest->addr == daddr &&
+		    dest->port == dport &&
+		    dest->vfwmark == svc->fwmark &&
+		    dest->protocol == svc->protocol &&
+		    (svc->fwmark ||
+		     (dest->vaddr == svc->addr &&
+		      dest->vport == svc->port))) {
+			/* HIT */
+			return dest;
+		}
+
+		/*
+		 * Try to purge the destination from trash if not referenced
+		 */
+		if (atomic_read(&dest->refcnt) == 1) {
+			IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u "
+				  "from trash\n",
+				  dest->vfwmark,
+				  NIPQUAD(dest->addr), ntohs(dest->port));
+			list_del(&dest->n_list);
+			ip_vs_dst_reset(dest);
+			__ip_vs_unbind_svc(dest);
+			kfree(dest);
+		}
+	}
+
+	return NULL;
+}
+
+
+/*
+ *  Clean up all the destinations in the trash
+ *  Called by the ip_vs_control_cleanup()
+ *
+ *  When the ip_vs_control_clearup is activated by ipvs module exit,
+ *  the service tables must have been flushed and all the connections
+ *  are expired, and the refcnt of each destination in the trash must
+ *  be 1, so we simply release them here.
+ */
+static void ip_vs_trash_cleanup(void)
+{
+	struct ip_vs_dest *dest, *nxt;
+
+	list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
+		list_del(&dest->n_list);
+		ip_vs_dst_reset(dest);
+		__ip_vs_unbind_svc(dest);
+		kfree(dest);
+	}
+}
+
+
+static void
+ip_vs_zero_stats(struct ip_vs_stats *stats)
+{
+	spin_lock_bh(&stats->lock);
+	memset(stats, 0, (char *)&stats->lock - (char *)stats);
+	spin_unlock_bh(&stats->lock);
+	ip_vs_zero_estimator(stats);
+}
+
+/*
+ *	Update a destination in the given service
+ */
+static void
+__ip_vs_update_dest(struct ip_vs_service *svc,
+		    struct ip_vs_dest *dest, struct ip_vs_dest_user *udest)
+{
+	int conn_flags;
+
+	/* set the weight and the flags */
+	atomic_set(&dest->weight, udest->weight);
+	conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
+
+	/* check if local node and update the flags */
+	if (inet_addr_type(udest->addr) == RTN_LOCAL) {
+		conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
+			| IP_VS_CONN_F_LOCALNODE;
+	}
+
+	/* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
+	if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
+		conn_flags |= IP_VS_CONN_F_NOOUTPUT;
+	} else {
+		/*
+		 *    Put the real service in ip_vs_rtable if not present.
+		 *    For now only for NAT!
+		 */
+		write_lock_bh(&__ip_vs_rs_lock);
+		ip_vs_rs_hash(dest);
+		write_unlock_bh(&__ip_vs_rs_lock);
+	}
+	atomic_set(&dest->conn_flags, conn_flags);
+
+	/* bind the service */
+	if (!dest->svc) {
+		__ip_vs_bind_svc(dest, svc);
+	} else {
+		if (dest->svc != svc) {
+			__ip_vs_unbind_svc(dest);
+			ip_vs_zero_stats(&dest->stats);
+			__ip_vs_bind_svc(dest, svc);
+		}
+	}
+
+	/* set the dest status flags */
+	dest->flags |= IP_VS_DEST_F_AVAILABLE;
+
+	if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
+		dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+	dest->u_threshold = udest->u_threshold;
+	dest->l_threshold = udest->l_threshold;
+}
+
+
+/*
+ *	Create a destination for the given service
+ */
+static int
+ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
+	       struct ip_vs_dest **dest_p)
+{
+	struct ip_vs_dest *dest;
+	unsigned atype;
+
+	EnterFunction(2);
+
+	atype = inet_addr_type(udest->addr);
+	if (atype != RTN_LOCAL && atype != RTN_UNICAST)
+		return -EINVAL;
+
+	dest = kmalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
+	if (dest == NULL) {
+		IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
+		return -ENOMEM;
+	}
+	memset(dest, 0, sizeof(struct ip_vs_dest));
+
+	dest->protocol = svc->protocol;
+	dest->vaddr = svc->addr;
+	dest->vport = svc->port;
+	dest->vfwmark = svc->fwmark;
+	dest->addr = udest->addr;
+	dest->port = udest->port;
+
+	atomic_set(&dest->activeconns, 0);
+	atomic_set(&dest->inactconns, 0);
+	atomic_set(&dest->persistconns, 0);
+	atomic_set(&dest->refcnt, 0);
+
+	INIT_LIST_HEAD(&dest->d_list);
+	spin_lock_init(&dest->dst_lock);
+	spin_lock_init(&dest->stats.lock);
+	__ip_vs_update_dest(svc, dest, udest);
+	ip_vs_new_estimator(&dest->stats);
+
+	*dest_p = dest;
+
+	LeaveFunction(2);
+	return 0;
+}
+
+
+/*
+ *	Add a destination into an existing service
+ */
+static int
+ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
+{
+	struct ip_vs_dest *dest;
+	__u32 daddr = udest->addr;
+	__u16 dport = udest->port;
+	int ret;
+
+	EnterFunction(2);
+
+	if (udest->weight < 0) {
+		IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
+		return -ERANGE;
+	}
+
+	if (udest->l_threshold > udest->u_threshold) {
+		IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
+			  "upper threshold\n");
+		return -ERANGE;
+	}
+
+	/*
+	 * Check if the dest already exists in the list
+	 */
+	dest = ip_vs_lookup_dest(svc, daddr, dport);
+	if (dest != NULL) {
+		IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
+		return -EEXIST;
+	}
+
+	/*
+	 * Check if the dest already exists in the trash and
+	 * is from the same service
+	 */
+	dest = ip_vs_trash_get_dest(svc, daddr, dport);
+	if (dest != NULL) {
+		IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
+			  "refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
+			  NIPQUAD(daddr), ntohs(dport),
+			  atomic_read(&dest->refcnt),
+			  dest->vfwmark,
+			  NIPQUAD(dest->vaddr),
+			  ntohs(dest->vport));
+		__ip_vs_update_dest(svc, dest, udest);
+
+		/*
+		 * Get the destination from the trash
+		 */
+		list_del(&dest->n_list);
+
+		ip_vs_new_estimator(&dest->stats);
+
+		write_lock_bh(&__ip_vs_svc_lock);
+
+		/*
+		 * Wait until all other svc users go away.
+		 */
+		IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
+
+		list_add(&dest->n_list, &svc->destinations);
+		svc->num_dests++;
+
+		/* call the update_service function of its scheduler */
+		svc->scheduler->update_service(svc);
+
+		write_unlock_bh(&__ip_vs_svc_lock);
+		return 0;
+	}
+
+	/*
+	 * Allocate and initialize the dest structure
+	 */
+	ret = ip_vs_new_dest(svc, udest, &dest);
+	if (ret) {
+		return ret;
+	}
+
+	/*
+	 * Add the dest entry into the list
+	 */
+	atomic_inc(&dest->refcnt);
+
+	write_lock_bh(&__ip_vs_svc_lock);
+
+	/*
+	 * Wait until all other svc users go away.
+	 */
+	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
+
+	list_add(&dest->n_list, &svc->destinations);
+	svc->num_dests++;
+
+	/* call the update_service function of its scheduler */
+	svc->scheduler->update_service(svc);
+
+	write_unlock_bh(&__ip_vs_svc_lock);
+
+	LeaveFunction(2);
+
+	return 0;
+}
+
+
+/*
+ *	Edit a destination in the given service
+ */
+static int
+ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
+{
+	struct ip_vs_dest *dest;
+	__u32 daddr = udest->addr;
+	__u16 dport = udest->port;
+
+	EnterFunction(2);
+
+	if (udest->weight < 0) {
+		IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
+		return -ERANGE;
+	}
+
+	if (udest->l_threshold > udest->u_threshold) {
+		IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
+			  "upper threshold\n");
+		return -ERANGE;
+	}
+
+	/*
+	 *  Lookup the destination list
+	 */
+	dest = ip_vs_lookup_dest(svc, daddr, dport);
+	if (dest == NULL) {
+		IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
+		return -ENOENT;
+	}
+
+	__ip_vs_update_dest(svc, dest, udest);
+
+	write_lock_bh(&__ip_vs_svc_lock);
+
+	/* Wait until all other svc users go away */
+	while (atomic_read(&svc->usecnt) > 1) {};
+
+	/* call the update_service, because server weight may be changed */
+	svc->scheduler->update_service(svc);
+
+	write_unlock_bh(&__ip_vs_svc_lock);
+
+	LeaveFunction(2);
+
+	return 0;
+}
+
+
+/*
+ *	Delete a destination (must be already unlinked from the service)
+ */
+static void __ip_vs_del_dest(struct ip_vs_dest *dest)
+{
+	ip_vs_kill_estimator(&dest->stats);
+
+	/*
+	 *  Remove it from the d-linked list with the real services.
+	 */
+	write_lock_bh(&__ip_vs_rs_lock);
+	ip_vs_rs_unhash(dest);
+	write_unlock_bh(&__ip_vs_rs_lock);
+
+	/*
+	 *  Decrease the refcnt of the dest, and free the dest
+	 *  if nobody refers to it (refcnt=0). Otherwise, throw
+	 *  the destination into the trash.
+	 */
+	if (atomic_dec_and_test(&dest->refcnt)) {
+		ip_vs_dst_reset(dest);
+		/* simply decrease svc->refcnt here, let the caller check
+		   and release the service if nobody refers to it.
+		   Only user context can release destination and service,
+		   and only one user context can update virtual service at a
+		   time, so the operation here is OK */
+		atomic_dec(&dest->svc->refcnt);
+		kfree(dest);
+	} else {
+		IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, refcnt=%d\n",
+			  NIPQUAD(dest->addr), ntohs(dest->port),
+			  atomic_read(&dest->refcnt));
+		list_add(&dest->n_list, &ip_vs_dest_trash);
+		atomic_inc(&dest->refcnt);
+	}
+}
+
+
+/*
+ *	Unlink a destination from the given service
+ */
+static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
+				struct ip_vs_dest *dest,
+				int svcupd)
+{
+	dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
+
+	/*
+	 *  Remove it from the d-linked destination list.
+	 */
+	list_del(&dest->n_list);
+	svc->num_dests--;
+	if (svcupd) {
+		/*
+		 *  Call the update_service function of its scheduler
+		 */
+		svc->scheduler->update_service(svc);
+	}
+}
+
+
+/*
+ *	Delete a destination server in the given service
+ */
+static int
+ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest)
+{
+	struct ip_vs_dest *dest;
+	__u32 daddr = udest->addr;
+	__u16 dport = udest->port;
+
+	EnterFunction(2);
+
+	dest = ip_vs_lookup_dest(svc, daddr, dport);
+	if (dest == NULL) {
+		IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
+		return -ENOENT;
+	}
+
+	write_lock_bh(&__ip_vs_svc_lock);
+
+	/*
+	 *	Wait until all other svc users go away.
+	 */
+	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
+
+	/*
+	 *	Unlink dest from the service
+	 */
+	__ip_vs_unlink_dest(svc, dest, 1);
+
+	write_unlock_bh(&__ip_vs_svc_lock);
+
+	/*
+	 *	Delete the destination
+	 */
+	__ip_vs_del_dest(dest);
+
+	LeaveFunction(2);
+
+	return 0;
+}
+
+
+/*
+ *	Add a service into the service hash table
+ */
+static int
+ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
+{
+	int ret = 0;
+	struct ip_vs_scheduler *sched = NULL;
+	struct ip_vs_service *svc = NULL;
+
+	/* increase the module use count */
+	ip_vs_use_count_inc();
+
+	/* Lookup the scheduler by 'u->sched_name' */
+	sched = ip_vs_scheduler_get(u->sched_name);
+	if (sched == NULL) {
+		IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
+			   u->sched_name);
+		ret = -ENOENT;
+		goto out_mod_dec;
+	}
+
+	svc = (struct ip_vs_service *)
+		kmalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
+	if (svc == NULL) {
+		IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
+		ret = -ENOMEM;
+		goto out_err;
+	}
+	memset(svc, 0, sizeof(struct ip_vs_service));
+
+	/* I'm the first user of the service */
+	atomic_set(&svc->usecnt, 1);
+	atomic_set(&svc->refcnt, 0);
+
+	svc->protocol = u->protocol;
+	svc->addr = u->addr;
+	svc->port = u->port;
+	svc->fwmark = u->fwmark;
+	svc->flags = u->flags;
+	svc->timeout = u->timeout * HZ;
+	svc->netmask = u->netmask;
+
+	INIT_LIST_HEAD(&svc->destinations);
+	rwlock_init(&svc->sched_lock);
+	spin_lock_init(&svc->stats.lock);
+
+	/* Bind the scheduler */
+	ret = ip_vs_bind_scheduler(svc, sched);
+	if (ret)
+		goto out_err;
+	sched = NULL;
+
+	/* Update the virtual service counters */
+	if (svc->port == FTPPORT)
+		atomic_inc(&ip_vs_ftpsvc_counter);
+	else if (svc->port == 0)
+		atomic_inc(&ip_vs_nullsvc_counter);
+
+	ip_vs_new_estimator(&svc->stats);
+	ip_vs_num_services++;
+
+	/* Hash the service into the service table */
+	write_lock_bh(&__ip_vs_svc_lock);
+	ip_vs_svc_hash(svc);
+	write_unlock_bh(&__ip_vs_svc_lock);
+
+	*svc_p = svc;
+	return 0;
+
+  out_err:
+	if (svc != NULL) {
+		if (svc->scheduler)
+			ip_vs_unbind_scheduler(svc);
+		if (svc->inc) {
+			local_bh_disable();
+			ip_vs_app_inc_put(svc->inc);
+			local_bh_enable();
+		}
+		kfree(svc);
+	}
+	ip_vs_scheduler_put(sched);
+
+  out_mod_dec:
+	/* decrease the module use count */
+	ip_vs_use_count_dec();
+
+	return ret;
+}
+
+
+/*
+ *	Edit a service and bind it with a new scheduler
+ */
+static int
+ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u)
+{
+	struct ip_vs_scheduler *sched, *old_sched;
+	int ret = 0;
+
+	/*
+	 * Lookup the scheduler, by 'u->sched_name'
+	 */
+	sched = ip_vs_scheduler_get(u->sched_name);
+	if (sched == NULL) {
+		IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
+			   u->sched_name);
+		return -ENOENT;
+	}
+	old_sched = sched;
+
+	write_lock_bh(&__ip_vs_svc_lock);
+
+	/*
+	 * Wait until all other svc users go away.
+	 */
+	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
+
+	/*
+	 * Set the flags and timeout value
+	 */
+	svc->flags = u->flags | IP_VS_SVC_F_HASHED;
+	svc->timeout = u->timeout * HZ;
+	svc->netmask = u->netmask;
+
+	old_sched = svc->scheduler;
+	if (sched != old_sched) {
+		/*
+		 * Unbind the old scheduler
+		 */
+		if ((ret = ip_vs_unbind_scheduler(svc))) {
+			old_sched = sched;
+			goto out;
+		}
+
+		/*
+		 * Bind the new scheduler
+		 */
+		if ((ret = ip_vs_bind_scheduler(svc, sched))) {
+			/*
+			 * If ip_vs_bind_scheduler fails, restore the old
+			 * scheduler.
+			 * The main reason of failure is out of memory.
+			 *
+			 * The question is if the old scheduler can be
+			 * restored all the time. TODO: if it cannot be
+			 * restored some time, we must delete the service,
+			 * otherwise the system may crash.
+			 */
+			ip_vs_bind_scheduler(svc, old_sched);
+			old_sched = sched;
+			goto out;
+		}
+	}
+
+  out:
+	write_unlock_bh(&__ip_vs_svc_lock);
+
+	if (old_sched)
+		ip_vs_scheduler_put(old_sched);
+
+	return ret;
+}
+
+
+/*
+ *	Delete a service from the service list
+ *	- The service must be unlinked, unlocked and not referenced!
+ *	- We are called under _bh lock
+ */
+static void __ip_vs_del_service(struct ip_vs_service *svc)
+{
+	struct ip_vs_dest *dest, *nxt;
+	struct ip_vs_scheduler *old_sched;
+
+	ip_vs_num_services--;
+	ip_vs_kill_estimator(&svc->stats);
+
+	/* Unbind scheduler */
+	old_sched = svc->scheduler;
+	ip_vs_unbind_scheduler(svc);
+	if (old_sched)
+		ip_vs_scheduler_put(old_sched);
+
+	/* Unbind app inc */
+	if (svc->inc) {
+		ip_vs_app_inc_put(svc->inc);
+		svc->inc = NULL;
+	}
+
+	/*
+	 *    Unlink the whole destination list
+	 */
+	list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
+		__ip_vs_unlink_dest(svc, dest, 0);
+		__ip_vs_del_dest(dest);
+	}
+
+	/*
+	 *    Update the virtual service counters
+	 */
+	if (svc->port == FTPPORT)
+		atomic_dec(&ip_vs_ftpsvc_counter);
+	else if (svc->port == 0)
+		atomic_dec(&ip_vs_nullsvc_counter);
+
+	/*
+	 *    Free the service if nobody refers to it
+	 */
+	if (atomic_read(&svc->refcnt) == 0)
+		kfree(svc);
+
+	/* decrease the module use count */
+	ip_vs_use_count_dec();
+}
+
+/*
+ *	Delete a service from the service list
+ */
+static int ip_vs_del_service(struct ip_vs_service *svc)
+{
+	if (svc == NULL)
+		return -EEXIST;
+
+	/*
+	 * Unhash it from the service table
+	 */
+	write_lock_bh(&__ip_vs_svc_lock);
+
+	ip_vs_svc_unhash(svc);
+
+	/*
+	 * Wait until all the svc users go away.
+	 */
+	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
+
+	__ip_vs_del_service(svc);
+
+	write_unlock_bh(&__ip_vs_svc_lock);
+
+	return 0;
+}
+
+
+/*
+ *	Flush all the virtual services
+ */
+static int ip_vs_flush(void)
+{
+	int idx;
+	struct ip_vs_service *svc, *nxt;
+
+	/*
+	 * Flush the service table hashed by <protocol,addr,port>
+	 */
+	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
+			write_lock_bh(&__ip_vs_svc_lock);
+			ip_vs_svc_unhash(svc);
+			/*
+			 * Wait until all the svc users go away.
+			 */
+			IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
+			__ip_vs_del_service(svc);
+			write_unlock_bh(&__ip_vs_svc_lock);
+		}
+	}
+
+	/*
+	 * Flush the service table hashed by fwmark
+	 */
+	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		list_for_each_entry_safe(svc, nxt,
+					 &ip_vs_svc_fwm_table[idx], f_list) {
+			write_lock_bh(&__ip_vs_svc_lock);
+			ip_vs_svc_unhash(svc);
+			/*
+			 * Wait until all the svc users go away.
+			 */
+			IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
+			__ip_vs_del_service(svc);
+			write_unlock_bh(&__ip_vs_svc_lock);
+		}
+	}
+
+	return 0;
+}
+
+
+/*
+ *	Zero counters in a service or all services
+ */
+static int ip_vs_zero_service(struct ip_vs_service *svc)
+{
+	struct ip_vs_dest *dest;
+
+	write_lock_bh(&__ip_vs_svc_lock);
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		ip_vs_zero_stats(&dest->stats);
+	}
+	ip_vs_zero_stats(&svc->stats);
+	write_unlock_bh(&__ip_vs_svc_lock);
+	return 0;
+}
+
+static int ip_vs_zero_all(void)
+{
+	int idx;
+	struct ip_vs_service *svc;
+
+	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+			ip_vs_zero_service(svc);
+		}
+	}
+
+	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+			ip_vs_zero_service(svc);
+		}
+	}
+
+	ip_vs_zero_stats(&ip_vs_stats);
+	return 0;
+}
+
+
+static int
+proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
+		     void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int *valp = table->data;
+	int val = *valp;
+	int rc;
+
+	rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+	if (write && (*valp != val)) {
+		if ((*valp < 0) || (*valp > 3)) {
+			/* Restore the correct value */
+			*valp = val;
+		} else {
+			local_bh_disable();
+			update_defense_level();
+			local_bh_enable();
+		}
+	}
+	return rc;
+}
+
+
+static int
+proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
+		       void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int *valp = table->data;
+	int val[2];
+	int rc;
+
+	/* backup the value first */
+	memcpy(val, valp, sizeof(val));
+
+	rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+	if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
+		/* Restore the correct value */
+		memcpy(valp, val, sizeof(val));
+	}
+	return rc;
+}
+
+
+/*
+ *	IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
+ */
+
+static struct ctl_table vs_vars[] = {
+	{
+		.ctl_name	= NET_IPV4_VS_AMEMTHRESH,
+		.procname	= "amemthresh",
+		.data		= &sysctl_ip_vs_amemthresh,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+#ifdef CONFIG_IP_VS_DEBUG
+	{
+		.ctl_name	= NET_IPV4_VS_DEBUG_LEVEL,
+		.procname	= "debug_level",
+		.data		= &sysctl_ip_vs_debug_level,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+#endif
+	{
+		.ctl_name	= NET_IPV4_VS_AMDROPRATE,
+		.procname	= "am_droprate",
+		.data		= &sysctl_ip_vs_am_droprate,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_IPV4_VS_DROP_ENTRY,
+		.procname	= "drop_entry",
+		.data		= &sysctl_ip_vs_drop_entry,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_do_defense_mode,
+	},
+	{
+		.ctl_name	= NET_IPV4_VS_DROP_PACKET,
+		.procname	= "drop_packet",
+		.data		= &sysctl_ip_vs_drop_packet,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_do_defense_mode,
+	},
+	{
+		.ctl_name	= NET_IPV4_VS_SECURE_TCP,
+		.procname	= "secure_tcp",
+		.data		= &sysctl_ip_vs_secure_tcp,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_do_defense_mode,
+	},
+#if 0
+	{
+		.ctl_name	= NET_IPV4_VS_TO_ES,
+		.procname	= "timeout_established",
+		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_VS_TO_SS,
+		.procname	= "timeout_synsent",
+		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_VS_TO_SR,
+		.procname	= "timeout_synrecv",
+		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_VS_TO_FW,
+		.procname	= "timeout_finwait",
+		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_VS_TO_TW,
+		.procname	= "timeout_timewait",
+		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_VS_TO_CL,
+		.procname	= "timeout_close",
+		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_VS_TO_CW,
+		.procname	= "timeout_closewait",
+		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_VS_TO_LA,
+		.procname	= "timeout_lastack",
+		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_VS_TO_LI,
+		.procname	= "timeout_listen",
+		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_VS_TO_SA,
+		.procname	= "timeout_synack",
+		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_VS_TO_UDP,
+		.procname	= "timeout_udp",
+		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_VS_TO_ICMP,
+		.procname	= "timeout_icmp",
+		.data	= &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+#endif
+	{
+		.ctl_name	= NET_IPV4_VS_CACHE_BYPASS,
+		.procname	= "cache_bypass",
+		.data		= &sysctl_ip_vs_cache_bypass,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_IPV4_VS_EXPIRE_NODEST_CONN,
+		.procname	= "expire_nodest_conn",
+		.data		= &sysctl_ip_vs_expire_nodest_conn,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_IPV4_VS_EXPIRE_QUIESCENT_TEMPLATE,
+		.procname	= "expire_quiescent_template",
+		.data		= &sysctl_ip_vs_expire_quiescent_template,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_IPV4_VS_SYNC_THRESHOLD,
+		.procname	= "sync_threshold",
+		.data		= &sysctl_ip_vs_sync_threshold,
+		.maxlen		= sizeof(sysctl_ip_vs_sync_threshold),
+		.mode		= 0644,
+		.proc_handler	= &proc_do_sync_threshold,
+	},
+	{
+		.ctl_name	= NET_IPV4_VS_NAT_ICMP_SEND,
+		.procname	= "nat_icmp_send",
+		.data		= &sysctl_ip_vs_nat_icmp_send,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table vs_table[] = {
+	{
+		.ctl_name	= NET_IPV4_VS,
+		.procname	= "vs",
+		.mode		= 0555,
+		.child		= vs_vars
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table ipv4_table[] = {
+	{
+		.ctl_name	= NET_IPV4,
+		.procname	= "ipv4",
+		.mode		= 0555,
+		.child		= vs_table,
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table vs_root_table[] = {
+	{
+		.ctl_name	= CTL_NET,
+		.procname	= "net",
+		.mode		= 0555,
+		.child		= ipv4_table,
+	},
+	{ .ctl_name = 0 }
+};
+
+static struct ctl_table_header * sysctl_header;
+
+#ifdef CONFIG_PROC_FS
+
+struct ip_vs_iter {
+	struct list_head *table;
+	int bucket;
+};
+
+/*
+ *	Write the contents of the VS rule table to a PROCfs file.
+ *	(It is kept just for backward compatibility)
+ */
+static inline const char *ip_vs_fwd_name(unsigned flags)
+{
+	switch (flags & IP_VS_CONN_F_FWD_MASK) {
+	case IP_VS_CONN_F_LOCALNODE:
+		return "Local";
+	case IP_VS_CONN_F_TUNNEL:
+		return "Tunnel";
+	case IP_VS_CONN_F_DROUTE:
+		return "Route";
+	default:
+		return "Masq";
+	}
+}
+
+
+/* Get the Nth entry in the two lists */
+static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
+{
+	struct ip_vs_iter *iter = seq->private;
+	int idx;
+	struct ip_vs_service *svc;
+
+	/* look in hash by protocol */
+	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+			if (pos-- == 0){
+				iter->table = ip_vs_svc_table;
+				iter->bucket = idx;
+				return svc;
+			}
+		}
+	}
+
+	/* keep looking in fwmark */
+	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+			if (pos-- == 0) {
+				iter->table = ip_vs_svc_fwm_table;
+				iter->bucket = idx;
+				return svc;
+			}
+		}
+	}
+
+	return NULL;
+}
+
+static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
+{
+
+	read_lock_bh(&__ip_vs_svc_lock);
+	return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
+}
+
+
+static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct list_head *e;
+	struct ip_vs_iter *iter;
+	struct ip_vs_service *svc;
+
+	++*pos;
+	if (v == SEQ_START_TOKEN)
+		return ip_vs_info_array(seq,0);
+
+	svc = v;
+	iter = seq->private;
+
+	if (iter->table == ip_vs_svc_table) {
+		/* next service in table hashed by protocol */
+		if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
+			return list_entry(e, struct ip_vs_service, s_list);
+
+
+		while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
+			list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
+					    s_list) {
+				return svc;
+			}
+		}
+
+		iter->table = ip_vs_svc_fwm_table;
+		iter->bucket = -1;
+		goto scan_fwmark;
+	}
+
+	/* next service in hashed by fwmark */
+	if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
+		return list_entry(e, struct ip_vs_service, f_list);
+
+ scan_fwmark:
+	while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
+		list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
+				    f_list)
+			return svc;
+	}
+
+	return NULL;
+}
+
+static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
+{
+	read_unlock_bh(&__ip_vs_svc_lock);
+}
+
+
+static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN) {
+		seq_printf(seq,
+			"IP Virtual Server version %d.%d.%d (size=%d)\n",
+			NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
+		seq_puts(seq,
+			 "Prot LocalAddress:Port Scheduler Flags\n");
+		seq_puts(seq,
+			 "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
+	} else {
+		const struct ip_vs_service *svc = v;
+		const struct ip_vs_iter *iter = seq->private;
+		const struct ip_vs_dest *dest;
+
+		if (iter->table == ip_vs_svc_table)
+			seq_printf(seq, "%s  %08X:%04X %s ",
+				   ip_vs_proto_name(svc->protocol),
+				   ntohl(svc->addr),
+				   ntohs(svc->port),
+				   svc->scheduler->name);
+		else
+			seq_printf(seq, "FWM  %08X %s ",
+				   svc->fwmark, svc->scheduler->name);
+
+		if (svc->flags & IP_VS_SVC_F_PERSISTENT)
+			seq_printf(seq, "persistent %d %08X\n",
+				svc->timeout,
+				ntohl(svc->netmask));
+		else
+			seq_putc(seq, '\n');
+
+		list_for_each_entry(dest, &svc->destinations, n_list) {
+			seq_printf(seq,
+				   "  -> %08X:%04X      %-7s %-6d %-10d %-10d\n",
+				   ntohl(dest->addr), ntohs(dest->port),
+				   ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
+				   atomic_read(&dest->weight),
+				   atomic_read(&dest->activeconns),
+				   atomic_read(&dest->inactconns));
+		}
+	}
+	return 0;
+}
+
+static struct seq_operations ip_vs_info_seq_ops = {
+	.start = ip_vs_info_seq_start,
+	.next  = ip_vs_info_seq_next,
+	.stop  = ip_vs_info_seq_stop,
+	.show  = ip_vs_info_seq_show,
+};
+
+static int ip_vs_info_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	int rc = -ENOMEM;
+	struct ip_vs_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
+
+	if (!s)
+		goto out;
+
+	rc = seq_open(file, &ip_vs_info_seq_ops);
+	if (rc)
+		goto out_kfree;
+
+	seq	     = file->private_data;
+	seq->private = s;
+	memset(s, 0, sizeof(*s));
+out:
+	return rc;
+out_kfree:
+	kfree(s);
+	goto out;
+}
+
+static struct file_operations ip_vs_info_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = ip_vs_info_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release_private,
+};
+
+#endif
+
+struct ip_vs_stats ip_vs_stats;
+
+#ifdef CONFIG_PROC_FS
+static int ip_vs_stats_show(struct seq_file *seq, void *v)
+{
+
+/*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
+	seq_puts(seq,
+		 "   Total Incoming Outgoing         Incoming         Outgoing\n");
+	seq_printf(seq,
+		   "   Conns  Packets  Packets            Bytes            Bytes\n");
+
+	spin_lock_bh(&ip_vs_stats.lock);
+	seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns,
+		   ip_vs_stats.inpkts, ip_vs_stats.outpkts,
+		   (unsigned long long) ip_vs_stats.inbytes,
+		   (unsigned long long) ip_vs_stats.outbytes);
+
+/*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
+	seq_puts(seq,
+		   " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
+	seq_printf(seq,"%8X %8X %8X %16X %16X\n",
+			ip_vs_stats.cps,
+			ip_vs_stats.inpps,
+			ip_vs_stats.outpps,
+			ip_vs_stats.inbps,
+			ip_vs_stats.outbps);
+	spin_unlock_bh(&ip_vs_stats.lock);
+
+	return 0;
+}
+
+static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ip_vs_stats_show, NULL);
+}
+
+static struct file_operations ip_vs_stats_fops = {
+	.owner = THIS_MODULE,
+	.open = ip_vs_stats_seq_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+#endif
+
+/*
+ *	Set timeout values for tcp tcpfin udp in the timeout_table.
+ */
+static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
+{
+	IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
+		  u->tcp_timeout,
+		  u->tcp_fin_timeout,
+		  u->udp_timeout);
+
+#ifdef CONFIG_IP_VS_PROTO_TCP
+	if (u->tcp_timeout) {
+		ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
+			= u->tcp_timeout * HZ;
+	}
+
+	if (u->tcp_fin_timeout) {
+		ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
+			= u->tcp_fin_timeout * HZ;
+	}
+#endif
+
+#ifdef CONFIG_IP_VS_PROTO_UDP
+	if (u->udp_timeout) {
+		ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
+			= u->udp_timeout * HZ;
+	}
+#endif
+	return 0;
+}
+
+
+#define SET_CMDID(cmd)		(cmd - IP_VS_BASE_CTL)
+#define SERVICE_ARG_LEN		(sizeof(struct ip_vs_service_user))
+#define SVCDEST_ARG_LEN		(sizeof(struct ip_vs_service_user) +	\
+				 sizeof(struct ip_vs_dest_user))
+#define TIMEOUT_ARG_LEN		(sizeof(struct ip_vs_timeout_user))
+#define DAEMON_ARG_LEN		(sizeof(struct ip_vs_daemon_user))
+#define MAX_ARG_LEN		SVCDEST_ARG_LEN
+
+static unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
+	[SET_CMDID(IP_VS_SO_SET_ADD)]		= SERVICE_ARG_LEN,
+	[SET_CMDID(IP_VS_SO_SET_EDIT)]		= SERVICE_ARG_LEN,
+	[SET_CMDID(IP_VS_SO_SET_DEL)]		= SERVICE_ARG_LEN,
+	[SET_CMDID(IP_VS_SO_SET_FLUSH)]		= 0,
+	[SET_CMDID(IP_VS_SO_SET_ADDDEST)]	= SVCDEST_ARG_LEN,
+	[SET_CMDID(IP_VS_SO_SET_DELDEST)]	= SVCDEST_ARG_LEN,
+	[SET_CMDID(IP_VS_SO_SET_EDITDEST)]	= SVCDEST_ARG_LEN,
+	[SET_CMDID(IP_VS_SO_SET_TIMEOUT)]	= TIMEOUT_ARG_LEN,
+	[SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]	= DAEMON_ARG_LEN,
+	[SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]	= DAEMON_ARG_LEN,
+	[SET_CMDID(IP_VS_SO_SET_ZERO)]		= SERVICE_ARG_LEN,
+};
+
+static int
+do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
+{
+	int ret;
+	unsigned char arg[MAX_ARG_LEN];
+	struct ip_vs_service_user *usvc;
+	struct ip_vs_service *svc;
+	struct ip_vs_dest_user *udest;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (len != set_arglen[SET_CMDID(cmd)]) {
+		IP_VS_ERR("set_ctl: len %u != %u\n",
+			  len, set_arglen[SET_CMDID(cmd)]);
+		return -EINVAL;
+	}
+
+	if (copy_from_user(arg, user, len) != 0)
+		return -EFAULT;
+
+	/* increase the module use count */
+	ip_vs_use_count_inc();
+
+	if (down_interruptible(&__ip_vs_mutex)) {
+		ret = -ERESTARTSYS;
+		goto out_dec;
+	}
+
+	if (cmd == IP_VS_SO_SET_FLUSH) {
+		/* Flush the virtual service */
+		ret = ip_vs_flush();
+		goto out_unlock;
+	} else if (cmd == IP_VS_SO_SET_TIMEOUT) {
+		/* Set timeout values for (tcp tcpfin udp) */
+		ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
+		goto out_unlock;
+	} else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
+		struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
+		ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
+		goto out_unlock;
+	} else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
+		struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
+		ret = stop_sync_thread(dm->state);
+		goto out_unlock;
+	}
+
+	usvc = (struct ip_vs_service_user *)arg;
+	udest = (struct ip_vs_dest_user *)(usvc + 1);
+
+	if (cmd == IP_VS_SO_SET_ZERO) {
+		/* if no service address is set, zero counters in all */
+		if (!usvc->fwmark && !usvc->addr && !usvc->port) {
+			ret = ip_vs_zero_all();
+			goto out_unlock;
+		}
+	}
+
+	/* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
+	if (usvc->protocol!=IPPROTO_TCP && usvc->protocol!=IPPROTO_UDP) {
+		IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
+			  usvc->protocol, NIPQUAD(usvc->addr),
+			  ntohs(usvc->port), usvc->sched_name);
+		ret = -EFAULT;
+		goto out_unlock;
+	}
+
+	/* Lookup the exact service by <protocol, addr, port> or fwmark */
+	if (usvc->fwmark == 0)
+		svc = __ip_vs_service_get(usvc->protocol,
+					  usvc->addr, usvc->port);
+	else
+		svc = __ip_vs_svc_fwm_get(usvc->fwmark);
+
+	if (cmd != IP_VS_SO_SET_ADD
+	    && (svc == NULL || svc->protocol != usvc->protocol)) {
+		ret = -ESRCH;
+		goto out_unlock;
+	}
+
+	switch (cmd) {
+	case IP_VS_SO_SET_ADD:
+		if (svc != NULL)
+			ret = -EEXIST;
+		else
+			ret = ip_vs_add_service(usvc, &svc);
+		break;
+	case IP_VS_SO_SET_EDIT:
+		ret = ip_vs_edit_service(svc, usvc);
+		break;
+	case IP_VS_SO_SET_DEL:
+		ret = ip_vs_del_service(svc);
+		if (!ret)
+			goto out_unlock;
+		break;
+	case IP_VS_SO_SET_ZERO:
+		ret = ip_vs_zero_service(svc);
+		break;
+	case IP_VS_SO_SET_ADDDEST:
+		ret = ip_vs_add_dest(svc, udest);
+		break;
+	case IP_VS_SO_SET_EDITDEST:
+		ret = ip_vs_edit_dest(svc, udest);
+		break;
+	case IP_VS_SO_SET_DELDEST:
+		ret = ip_vs_del_dest(svc, udest);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	if (svc)
+		ip_vs_service_put(svc);
+
+  out_unlock:
+	up(&__ip_vs_mutex);
+  out_dec:
+	/* decrease the module use count */
+	ip_vs_use_count_dec();
+
+	return ret;
+}
+
+
+static void
+ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
+{
+	spin_lock_bh(&src->lock);
+	memcpy(dst, src, (char*)&src->lock - (char*)src);
+	spin_unlock_bh(&src->lock);
+}
+
+static void
+ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
+{
+	dst->protocol = src->protocol;
+	dst->addr = src->addr;
+	dst->port = src->port;
+	dst->fwmark = src->fwmark;
+	strcpy(dst->sched_name, src->scheduler->name);
+	dst->flags = src->flags;
+	dst->timeout = src->timeout / HZ;
+	dst->netmask = src->netmask;
+	dst->num_dests = src->num_dests;
+	ip_vs_copy_stats(&dst->stats, &src->stats);
+}
+
+static inline int
+__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
+			    struct ip_vs_get_services __user *uptr)
+{
+	int idx, count=0;
+	struct ip_vs_service *svc;
+	struct ip_vs_service_entry entry;
+	int ret = 0;
+
+	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+			if (count >= get->num_services)
+				goto out;
+			ip_vs_copy_service(&entry, svc);
+			if (copy_to_user(&uptr->entrytable[count],
+					 &entry, sizeof(entry))) {
+				ret = -EFAULT;
+				goto out;
+			}
+			count++;
+		}
+	}
+
+	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+			if (count >= get->num_services)
+				goto out;
+			ip_vs_copy_service(&entry, svc);
+			if (copy_to_user(&uptr->entrytable[count],
+					 &entry, sizeof(entry))) {
+				ret = -EFAULT;
+				goto out;
+			}
+			count++;
+		}
+	}
+  out:
+	return ret;
+}
+
+static inline int
+__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
+			 struct ip_vs_get_dests __user *uptr)
+{
+	struct ip_vs_service *svc;
+	int ret = 0;
+
+	if (get->fwmark)
+		svc = __ip_vs_svc_fwm_get(get->fwmark);
+	else
+		svc = __ip_vs_service_get(get->protocol,
+					  get->addr, get->port);
+	if (svc) {
+		int count = 0;
+		struct ip_vs_dest *dest;
+		struct ip_vs_dest_entry entry;
+
+		list_for_each_entry(dest, &svc->destinations, n_list) {
+			if (count >= get->num_dests)
+				break;
+
+			entry.addr = dest->addr;
+			entry.port = dest->port;
+			entry.conn_flags = atomic_read(&dest->conn_flags);
+			entry.weight = atomic_read(&dest->weight);
+			entry.u_threshold = dest->u_threshold;
+			entry.l_threshold = dest->l_threshold;
+			entry.activeconns = atomic_read(&dest->activeconns);
+			entry.inactconns = atomic_read(&dest->inactconns);
+			entry.persistconns = atomic_read(&dest->persistconns);
+			ip_vs_copy_stats(&entry.stats, &dest->stats);
+			if (copy_to_user(&uptr->entrytable[count],
+					 &entry, sizeof(entry))) {
+				ret = -EFAULT;
+				break;
+			}
+			count++;
+		}
+		ip_vs_service_put(svc);
+	} else
+		ret = -ESRCH;
+	return ret;
+}
+
+static inline void
+__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
+{
+#ifdef CONFIG_IP_VS_PROTO_TCP
+	u->tcp_timeout =
+		ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
+	u->tcp_fin_timeout =
+		ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
+#endif
+#ifdef CONFIG_IP_VS_PROTO_UDP
+	u->udp_timeout =
+		ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
+#endif
+}
+
+
+#define GET_CMDID(cmd)		(cmd - IP_VS_BASE_CTL)
+#define GET_INFO_ARG_LEN	(sizeof(struct ip_vs_getinfo))
+#define GET_SERVICES_ARG_LEN	(sizeof(struct ip_vs_get_services))
+#define GET_SERVICE_ARG_LEN	(sizeof(struct ip_vs_service_entry))
+#define GET_DESTS_ARG_LEN	(sizeof(struct ip_vs_get_dests))
+#define GET_TIMEOUT_ARG_LEN	(sizeof(struct ip_vs_timeout_user))
+#define GET_DAEMON_ARG_LEN	(sizeof(struct ip_vs_daemon_user) * 2)
+
+static unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
+	[GET_CMDID(IP_VS_SO_GET_VERSION)]	= 64,
+	[GET_CMDID(IP_VS_SO_GET_INFO)]		= GET_INFO_ARG_LEN,
+	[GET_CMDID(IP_VS_SO_GET_SERVICES)]	= GET_SERVICES_ARG_LEN,
+	[GET_CMDID(IP_VS_SO_GET_SERVICE)]	= GET_SERVICE_ARG_LEN,
+	[GET_CMDID(IP_VS_SO_GET_DESTS)]		= GET_DESTS_ARG_LEN,
+	[GET_CMDID(IP_VS_SO_GET_TIMEOUT)]	= GET_TIMEOUT_ARG_LEN,
+	[GET_CMDID(IP_VS_SO_GET_DAEMON)]	= GET_DAEMON_ARG_LEN,
+};
+
+static int
+do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+{
+	unsigned char arg[128];
+	int ret = 0;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (*len < get_arglen[GET_CMDID(cmd)]) {
+		IP_VS_ERR("get_ctl: len %u < %u\n",
+			  *len, get_arglen[GET_CMDID(cmd)]);
+		return -EINVAL;
+	}
+
+	if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
+		return -EFAULT;
+
+	if (down_interruptible(&__ip_vs_mutex))
+		return -ERESTARTSYS;
+
+	switch (cmd) {
+	case IP_VS_SO_GET_VERSION:
+	{
+		char buf[64];
+
+		sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
+			NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
+		if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
+			ret = -EFAULT;
+			goto out;
+		}
+		*len = strlen(buf)+1;
+	}
+	break;
+
+	case IP_VS_SO_GET_INFO:
+	{
+		struct ip_vs_getinfo info;
+		info.version = IP_VS_VERSION_CODE;
+		info.size = IP_VS_CONN_TAB_SIZE;
+		info.num_services = ip_vs_num_services;
+		if (copy_to_user(user, &info, sizeof(info)) != 0)
+			ret = -EFAULT;
+	}
+	break;
+
+	case IP_VS_SO_GET_SERVICES:
+	{
+		struct ip_vs_get_services *get;
+		int size;
+
+		get = (struct ip_vs_get_services *)arg;
+		size = sizeof(*get) +
+			sizeof(struct ip_vs_service_entry) * get->num_services;
+		if (*len != size) {
+			IP_VS_ERR("length: %u != %u\n", *len, size);
+			ret = -EINVAL;
+			goto out;
+		}
+		ret = __ip_vs_get_service_entries(get, user);
+	}
+	break;
+
+	case IP_VS_SO_GET_SERVICE:
+	{
+		struct ip_vs_service_entry *entry;
+		struct ip_vs_service *svc;
+
+		entry = (struct ip_vs_service_entry *)arg;
+		if (entry->fwmark)
+			svc = __ip_vs_svc_fwm_get(entry->fwmark);
+		else
+			svc = __ip_vs_service_get(entry->protocol,
+						  entry->addr, entry->port);
+		if (svc) {
+			ip_vs_copy_service(entry, svc);
+			if (copy_to_user(user, entry, sizeof(*entry)) != 0)
+				ret = -EFAULT;
+			ip_vs_service_put(svc);
+		} else
+			ret = -ESRCH;
+	}
+	break;
+
+	case IP_VS_SO_GET_DESTS:
+	{
+		struct ip_vs_get_dests *get;
+		int size;
+
+		get = (struct ip_vs_get_dests *)arg;
+		size = sizeof(*get) +
+			sizeof(struct ip_vs_dest_entry) * get->num_dests;
+		if (*len != size) {
+			IP_VS_ERR("length: %u != %u\n", *len, size);
+			ret = -EINVAL;
+			goto out;
+		}
+		ret = __ip_vs_get_dest_entries(get, user);
+	}
+	break;
+
+	case IP_VS_SO_GET_TIMEOUT:
+	{
+		struct ip_vs_timeout_user t;
+
+		__ip_vs_get_timeouts(&t);
+		if (copy_to_user(user, &t, sizeof(t)) != 0)
+			ret = -EFAULT;
+	}
+	break;
+
+	case IP_VS_SO_GET_DAEMON:
+	{
+		struct ip_vs_daemon_user d[2];
+
+		memset(&d, 0, sizeof(d));
+		if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
+			d[0].state = IP_VS_STATE_MASTER;
+			strcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn);
+			d[0].syncid = ip_vs_master_syncid;
+		}
+		if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
+			d[1].state = IP_VS_STATE_BACKUP;
+			strcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn);
+			d[1].syncid = ip_vs_backup_syncid;
+		}
+		if (copy_to_user(user, &d, sizeof(d)) != 0)
+			ret = -EFAULT;
+	}
+	break;
+
+	default:
+		ret = -EINVAL;
+	}
+
+  out:
+	up(&__ip_vs_mutex);
+	return ret;
+}
+
+
+static struct nf_sockopt_ops ip_vs_sockopts = {
+	.pf		= PF_INET,
+	.set_optmin	= IP_VS_BASE_CTL,
+	.set_optmax	= IP_VS_SO_SET_MAX+1,
+	.set		= do_ip_vs_set_ctl,
+	.get_optmin	= IP_VS_BASE_CTL,
+	.get_optmax	= IP_VS_SO_GET_MAX+1,
+	.get		= do_ip_vs_get_ctl,
+};
+
+
+int ip_vs_control_init(void)
+{
+	int ret;
+	int idx;
+
+	EnterFunction(2);
+
+	ret = nf_register_sockopt(&ip_vs_sockopts);
+	if (ret) {
+		IP_VS_ERR("cannot register sockopt.\n");
+		return ret;
+	}
+
+	proc_net_fops_create("ip_vs", 0, &ip_vs_info_fops);
+	proc_net_fops_create("ip_vs_stats",0, &ip_vs_stats_fops);
+
+	sysctl_header = register_sysctl_table(vs_root_table, 0);
+
+	/* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
+	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
+		INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
+		INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
+	}
+	for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++)  {
+		INIT_LIST_HEAD(&ip_vs_rtable[idx]);
+	}
+
+	memset(&ip_vs_stats, 0, sizeof(ip_vs_stats));
+	spin_lock_init(&ip_vs_stats.lock);
+	ip_vs_new_estimator(&ip_vs_stats);
+
+	/* Hook the defense timer */
+	schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
+
+	LeaveFunction(2);
+	return 0;
+}
+
+
+void ip_vs_control_cleanup(void)
+{
+	EnterFunction(2);
+	ip_vs_trash_cleanup();
+	cancel_rearming_delayed_work(&defense_work);
+	ip_vs_kill_estimator(&ip_vs_stats);
+	unregister_sysctl_table(sysctl_header);
+	proc_net_remove("ip_vs_stats");
+	proc_net_remove("ip_vs");
+	nf_unregister_sockopt(&ip_vs_sockopts);
+	LeaveFunction(2);
+}
diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c
new file mode 100644
index 00000000000..f3bc320dce9
--- /dev/null
+++ b/net/ipv4/ipvs/ip_vs_dh.c
@@ -0,0 +1,258 @@
+/*
+ * IPVS:        Destination Hashing scheduling module
+ *
+ * Version:     $Id: ip_vs_dh.c,v 1.5 2002/09/15 08:14:08 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@gnuchina.org>
+ *
+ *              Inspired by the consistent hashing scheduler patch from
+ *              Thomas Proell <proellt@gmx.de>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+/*
+ * The dh algorithm is to select server by the hash key of destination IP
+ * address. The pseudo code is as follows:
+ *
+ *       n <- servernode[dest_ip];
+ *       if (n is dead) OR
+ *          (n is overloaded) OR (n.weight <= 0) then
+ *                 return NULL;
+ *
+ *       return n;
+ *
+ * Notes that servernode is a 256-bucket hash table that maps the hash
+ * index derived from packet destination IP address to the current server
+ * array. If the dh scheduler is used in cache cluster, it is good to
+ * combine it with cache_bypass feature. When the statically assigned
+ * server is dead or overloaded, the load balancer can bypass the cache
+ * server and send requests to the original server directly.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+
+/*
+ *      IPVS DH bucket
+ */
+struct ip_vs_dh_bucket {
+	struct ip_vs_dest       *dest;          /* real server (cache) */
+};
+
+/*
+ *     for IPVS DH entry hash table
+ */
+#ifndef CONFIG_IP_VS_DH_TAB_BITS
+#define CONFIG_IP_VS_DH_TAB_BITS        8
+#endif
+#define IP_VS_DH_TAB_BITS               CONFIG_IP_VS_DH_TAB_BITS
+#define IP_VS_DH_TAB_SIZE               (1 << IP_VS_DH_TAB_BITS)
+#define IP_VS_DH_TAB_MASK               (IP_VS_DH_TAB_SIZE - 1)
+
+
+/*
+ *	Returns hash value for IPVS DH entry
+ */
+static inline unsigned ip_vs_dh_hashkey(__u32 addr)
+{
+	return (ntohl(addr)*2654435761UL) & IP_VS_DH_TAB_MASK;
+}
+
+
+/*
+ *      Get ip_vs_dest associated with supplied parameters.
+ */
+static inline struct ip_vs_dest *
+ip_vs_dh_get(struct ip_vs_dh_bucket *tbl, __u32 addr)
+{
+	return (tbl[ip_vs_dh_hashkey(addr)]).dest;
+}
+
+
+/*
+ *      Assign all the hash buckets of the specified table with the service.
+ */
+static int
+ip_vs_dh_assign(struct ip_vs_dh_bucket *tbl, struct ip_vs_service *svc)
+{
+	int i;
+	struct ip_vs_dh_bucket *b;
+	struct list_head *p;
+	struct ip_vs_dest *dest;
+
+	b = tbl;
+	p = &svc->destinations;
+	for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
+		if (list_empty(p)) {
+			b->dest = NULL;
+		} else {
+			if (p == &svc->destinations)
+				p = p->next;
+
+			dest = list_entry(p, struct ip_vs_dest, n_list);
+			atomic_inc(&dest->refcnt);
+			b->dest = dest;
+
+			p = p->next;
+		}
+		b++;
+	}
+	return 0;
+}
+
+
+/*
+ *      Flush all the hash buckets of the specified table.
+ */
+static void ip_vs_dh_flush(struct ip_vs_dh_bucket *tbl)
+{
+	int i;
+	struct ip_vs_dh_bucket *b;
+
+	b = tbl;
+	for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
+		if (b->dest) {
+			atomic_dec(&b->dest->refcnt);
+			b->dest = NULL;
+		}
+		b++;
+	}
+}
+
+
+static int ip_vs_dh_init_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_dh_bucket *tbl;
+
+	/* allocate the DH table for this service */
+	tbl = kmalloc(sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE,
+		      GFP_ATOMIC);
+	if (tbl == NULL) {
+		IP_VS_ERR("ip_vs_dh_init_svc(): no memory\n");
+		return -ENOMEM;
+	}
+	svc->sched_data = tbl;
+	IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) allocated for "
+		  "current service\n",
+		  sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
+
+	/* assign the hash buckets with the updated service */
+	ip_vs_dh_assign(tbl, svc);
+
+	return 0;
+}
+
+
+static int ip_vs_dh_done_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_dh_bucket *tbl = svc->sched_data;
+
+	/* got to clean up hash buckets here */
+	ip_vs_dh_flush(tbl);
+
+	/* release the table itself */
+	kfree(svc->sched_data);
+	IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) released\n",
+		  sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
+
+	return 0;
+}
+
+
+static int ip_vs_dh_update_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_dh_bucket *tbl = svc->sched_data;
+
+	/* got to clean up hash buckets here */
+	ip_vs_dh_flush(tbl);
+
+	/* assign the hash buckets with the updated service */
+	ip_vs_dh_assign(tbl, svc);
+
+	return 0;
+}
+
+
+/*
+ *      If the dest flags is set with IP_VS_DEST_F_OVERLOAD,
+ *      consider that the server is overloaded here.
+ */
+static inline int is_overloaded(struct ip_vs_dest *dest)
+{
+	return dest->flags & IP_VS_DEST_F_OVERLOAD;
+}
+
+
+/*
+ *      Destination hashing scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+	struct ip_vs_dest *dest;
+	struct ip_vs_dh_bucket *tbl;
+	struct iphdr *iph = skb->nh.iph;
+
+	IP_VS_DBG(6, "ip_vs_dh_schedule(): Scheduling...\n");
+
+	tbl = (struct ip_vs_dh_bucket *)svc->sched_data;
+	dest = ip_vs_dh_get(tbl, iph->daddr);
+	if (!dest
+	    || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
+	    || atomic_read(&dest->weight) <= 0
+	    || is_overloaded(dest)) {
+		return NULL;
+	}
+
+	IP_VS_DBG(6, "DH: destination IP address %u.%u.%u.%u "
+		  "--> server %u.%u.%u.%u:%d\n",
+		  NIPQUAD(iph->daddr),
+		  NIPQUAD(dest->addr),
+		  ntohs(dest->port));
+
+	return dest;
+}
+
+
+/*
+ *      IPVS DH Scheduler structure
+ */
+static struct ip_vs_scheduler ip_vs_dh_scheduler =
+{
+	.name =			"dh",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.init_service =		ip_vs_dh_init_svc,
+	.done_service =		ip_vs_dh_done_svc,
+	.update_service =	ip_vs_dh_update_svc,
+	.schedule =		ip_vs_dh_schedule,
+};
+
+
+static int __init ip_vs_dh_init(void)
+{
+	INIT_LIST_HEAD(&ip_vs_dh_scheduler.n_list);
+	return register_ip_vs_scheduler(&ip_vs_dh_scheduler);
+}
+
+
+static void __exit ip_vs_dh_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_dh_scheduler);
+}
+
+
+module_init(ip_vs_dh_init);
+module_exit(ip_vs_dh_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c
new file mode 100644
index 00000000000..67b3e2fc1fa
--- /dev/null
+++ b/net/ipv4/ipvs/ip_vs_est.c
@@ -0,0 +1,200 @@
+/*
+ * ip_vs_est.c: simple rate estimator for IPVS
+ *
+ * Version:     $Id: ip_vs_est.c,v 1.4 2002/11/30 01:50:35 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+#include <net/ip_vs.h>
+
+/*
+  This code is to estimate rate in a shorter interval (such as 8
+  seconds) for virtual services and real servers. For measure rate in a
+  long interval, it is easy to implement a user level daemon which
+  periodically reads those statistical counters and measure rate.
+
+  Currently, the measurement is activated by slow timer handler. Hope
+  this measurement will not introduce too much load.
+
+  We measure rate during the last 8 seconds every 2 seconds:
+
+    avgrate = avgrate*(1-W) + rate*W
+
+    where W = 2^(-2)
+
+  NOTES.
+
+  * The stored value for average bps is scaled by 2^5, so that maximal
+    rate is ~2.15Gbits/s, average pps and cps are scaled by 2^10.
+
+  * A lot code is taken from net/sched/estimator.c
+ */
+
+
+struct ip_vs_estimator
+{
+	struct ip_vs_estimator	*next;
+	struct ip_vs_stats	*stats;
+
+	u32			last_conns;
+	u32			last_inpkts;
+	u32			last_outpkts;
+	u64			last_inbytes;
+	u64			last_outbytes;
+
+	u32			cps;
+	u32			inpps;
+	u32			outpps;
+	u32			inbps;
+	u32			outbps;
+};
+
+
+static struct ip_vs_estimator *est_list = NULL;
+static DEFINE_RWLOCK(est_lock);
+static struct timer_list est_timer;
+
+static void estimation_timer(unsigned long arg)
+{
+	struct ip_vs_estimator *e;
+	struct ip_vs_stats *s;
+	u32 n_conns;
+	u32 n_inpkts, n_outpkts;
+	u64 n_inbytes, n_outbytes;
+	u32 rate;
+
+	read_lock(&est_lock);
+	for (e = est_list; e; e = e->next) {
+		s = e->stats;
+
+		spin_lock(&s->lock);
+		n_conns = s->conns;
+		n_inpkts = s->inpkts;
+		n_outpkts = s->outpkts;
+		n_inbytes = s->inbytes;
+		n_outbytes = s->outbytes;
+
+		/* scaled by 2^10, but divided 2 seconds */
+		rate = (n_conns - e->last_conns)<<9;
+		e->last_conns = n_conns;
+		e->cps += ((long)rate - (long)e->cps)>>2;
+		s->cps = (e->cps+0x1FF)>>10;
+
+		rate = (n_inpkts - e->last_inpkts)<<9;
+		e->last_inpkts = n_inpkts;
+		e->inpps += ((long)rate - (long)e->inpps)>>2;
+		s->inpps = (e->inpps+0x1FF)>>10;
+
+		rate = (n_outpkts - e->last_outpkts)<<9;
+		e->last_outpkts = n_outpkts;
+		e->outpps += ((long)rate - (long)e->outpps)>>2;
+		s->outpps = (e->outpps+0x1FF)>>10;
+
+		rate = (n_inbytes - e->last_inbytes)<<4;
+		e->last_inbytes = n_inbytes;
+		e->inbps += ((long)rate - (long)e->inbps)>>2;
+		s->inbps = (e->inbps+0xF)>>5;
+
+		rate = (n_outbytes - e->last_outbytes)<<4;
+		e->last_outbytes = n_outbytes;
+		e->outbps += ((long)rate - (long)e->outbps)>>2;
+		s->outbps = (e->outbps+0xF)>>5;
+		spin_unlock(&s->lock);
+	}
+	read_unlock(&est_lock);
+	mod_timer(&est_timer, jiffies + 2*HZ);
+}
+
+int ip_vs_new_estimator(struct ip_vs_stats *stats)
+{
+	struct ip_vs_estimator *est;
+
+	est = kmalloc(sizeof(*est), GFP_KERNEL);
+	if (est == NULL)
+		return -ENOMEM;
+
+	memset(est, 0, sizeof(*est));
+	est->stats = stats;
+	est->last_conns = stats->conns;
+	est->cps = stats->cps<<10;
+
+	est->last_inpkts = stats->inpkts;
+	est->inpps = stats->inpps<<10;
+
+	est->last_outpkts = stats->outpkts;
+	est->outpps = stats->outpps<<10;
+
+	est->last_inbytes = stats->inbytes;
+	est->inbps = stats->inbps<<5;
+
+	est->last_outbytes = stats->outbytes;
+	est->outbps = stats->outbps<<5;
+
+	write_lock_bh(&est_lock);
+	est->next = est_list;
+	if (est->next == NULL) {
+		init_timer(&est_timer);
+		est_timer.expires = jiffies + 2*HZ;
+		est_timer.function = estimation_timer;
+		add_timer(&est_timer);
+	}
+	est_list = est;
+	write_unlock_bh(&est_lock);
+	return 0;
+}
+
+void ip_vs_kill_estimator(struct ip_vs_stats *stats)
+{
+	struct ip_vs_estimator *est, **pest;
+	int killed = 0;
+
+	write_lock_bh(&est_lock);
+	pest = &est_list;
+	while ((est=*pest) != NULL) {
+		if (est->stats != stats) {
+			pest = &est->next;
+			continue;
+		}
+		*pest = est->next;
+		kfree(est);
+		killed++;
+	}
+	if (killed && est_list == NULL)
+		del_timer_sync(&est_timer);
+	write_unlock_bh(&est_lock);
+}
+
+void ip_vs_zero_estimator(struct ip_vs_stats *stats)
+{
+	struct ip_vs_estimator *e;
+
+	write_lock_bh(&est_lock);
+	for (e = est_list; e; e = e->next) {
+		if (e->stats != stats)
+			continue;
+
+		/* set counters zero */
+		e->last_conns = 0;
+		e->last_inpkts = 0;
+		e->last_outpkts = 0;
+		e->last_inbytes = 0;
+		e->last_outbytes = 0;
+		e->cps = 0;
+		e->inpps = 0;
+		e->outpps = 0;
+		e->inbps = 0;
+		e->outbps = 0;
+	}
+	write_unlock_bh(&est_lock);
+}
diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c
new file mode 100644
index 00000000000..a19a33ceb81
--- /dev/null
+++ b/net/ipv4/ipvs/ip_vs_ftp.c
@@ -0,0 +1,400 @@
+/*
+ * ip_vs_ftp.c: IPVS ftp application module
+ *
+ * Version:	$Id: ip_vs_ftp.c,v 1.13 2002/09/15 08:14:08 wensong Exp $
+ *
+ * Authors:	Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ * Changes:
+ *
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ * Most code here is taken from ip_masq_ftp.c in kernel 2.2. The difference
+ * is that ip_vs_ftp module handles the reverse direction to ip_masq_ftp.
+ *
+ *		IP_MASQ_FTP ftp masquerading module
+ *
+ * Version:	@(#)ip_masq_ftp.c 0.04   02/05/96
+ *
+ * Author:	Wouter Gadeyne
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+
+#include <net/ip_vs.h>
+
+
+#define SERVER_STRING "227 Entering Passive Mode ("
+#define CLIENT_STRING "PORT "
+
+
+/*
+ * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper
+ * First port is set to the default port.
+ */
+static int ports[IP_VS_APP_MAX_PORTS] = {21, 0};
+module_param_array(ports, int, NULL, 0);
+
+/*
+ *	Debug level
+ */
+#ifdef CONFIG_IP_VS_DEBUG
+static int debug=0;
+module_param(debug, int, 0);
+#endif
+
+
+/*	Dummy variable */
+static int ip_vs_ftp_pasv;
+
+
+static int
+ip_vs_ftp_init_conn(struct ip_vs_app *app, struct ip_vs_conn *cp)
+{
+	return 0;
+}
+
+
+static int
+ip_vs_ftp_done_conn(struct ip_vs_app *app, struct ip_vs_conn *cp)
+{
+	return 0;
+}
+
+
+/*
+ * Get <addr,port> from the string "xxx.xxx.xxx.xxx,ppp,ppp", started
+ * with the "pattern" and terminated with the "term" character.
+ * <addr,port> is in network order.
+ */
+static int ip_vs_ftp_get_addrport(char *data, char *data_limit,
+				  const char *pattern, size_t plen, char term,
+				  __u32 *addr, __u16 *port,
+				  char **start, char **end)
+{
+	unsigned char p[6];
+	int i = 0;
+
+	if (data_limit - data < plen) {
+		/* check if there is partial match */
+		if (strnicmp(data, pattern, data_limit - data) == 0)
+			return -1;
+		else
+			return 0;
+	}
+
+	if (strnicmp(data, pattern, plen) != 0) {
+		return 0;
+	}
+	*start = data + plen;
+
+	for (data = *start; *data != term; data++) {
+		if (data == data_limit)
+			return -1;
+	}
+	*end = data;
+
+	memset(p, 0, sizeof(p));
+	for (data = *start; data != *end; data++) {
+		if (*data >= '0' && *data <= '9') {
+			p[i] = p[i]*10 + *data - '0';
+		} else if (*data == ',' && i < 5) {
+			i++;
+		} else {
+			/* unexpected character */
+			return -1;
+		}
+	}
+
+	if (i != 5)
+		return -1;
+
+	*addr = (p[3]<<24) | (p[2]<<16) | (p[1]<<8) | p[0];
+	*port = (p[5]<<8) | p[4];
+	return 1;
+}
+
+
+/*
+ * Look at outgoing ftp packets to catch the response to a PASV command
+ * from the server (inside-to-outside).
+ * When we see one, we build a connection entry with the client address,
+ * client port 0 (unknown at the moment), the server address and the
+ * server port.  Mark the current connection entry as a control channel
+ * of the new entry. All this work is just to make the data connection
+ * can be scheduled to the right server later.
+ *
+ * The outgoing packet should be something like
+ *   "227 Entering Passive Mode (xxx,xxx,xxx,xxx,ppp,ppp)".
+ * xxx,xxx,xxx,xxx is the server address, ppp,ppp is the server port number.
+ */
+static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
+			 struct sk_buff **pskb, int *diff)
+{
+	struct iphdr *iph;
+	struct tcphdr *th;
+	char *data, *data_limit;
+	char *start, *end;
+	__u32 from;
+	__u16 port;
+	struct ip_vs_conn *n_cp;
+	char buf[24];		/* xxx.xxx.xxx.xxx,ppp,ppp\000 */
+	unsigned buf_len;
+	int ret;
+
+	*diff = 0;
+
+	/* Only useful for established sessions */
+	if (cp->state != IP_VS_TCP_S_ESTABLISHED)
+		return 1;
+
+	/* Linear packets are much easier to deal with. */
+	if (!ip_vs_make_skb_writable(pskb, (*pskb)->len))
+		return 0;
+
+	if (cp->app_data == &ip_vs_ftp_pasv) {
+		iph = (*pskb)->nh.iph;
+		th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
+		data = (char *)th + (th->doff << 2);
+		data_limit = (*pskb)->tail;
+
+		if (ip_vs_ftp_get_addrport(data, data_limit,
+					   SERVER_STRING,
+					   sizeof(SERVER_STRING)-1, ')',
+					   &from, &port,
+					   &start, &end) != 1)
+			return 1;
+
+		IP_VS_DBG(1-debug, "PASV response (%u.%u.%u.%u:%d) -> "
+			  "%u.%u.%u.%u:%d detected\n",
+			  NIPQUAD(from), ntohs(port), NIPQUAD(cp->caddr), 0);
+
+		/*
+		 * Now update or create an connection entry for it
+		 */
+		n_cp = ip_vs_conn_out_get(iph->protocol, from, port,
+					  cp->caddr, 0);
+		if (!n_cp) {
+			n_cp = ip_vs_conn_new(IPPROTO_TCP,
+					      cp->caddr, 0,
+					      cp->vaddr, port,
+					      from, port,
+					      IP_VS_CONN_F_NO_CPORT,
+					      cp->dest);
+			if (!n_cp)
+				return 0;
+
+			/* add its controller */
+			ip_vs_control_add(n_cp, cp);
+		}
+
+		/*
+		 * Replace the old passive address with the new one
+		 */
+		from = n_cp->vaddr;
+		port = n_cp->vport;
+		sprintf(buf,"%d,%d,%d,%d,%d,%d", NIPQUAD(from),
+			port&255, (port>>8)&255);
+		buf_len = strlen(buf);
+
+		/*
+		 * Calculate required delta-offset to keep TCP happy
+		 */
+		*diff = buf_len - (end-start);
+
+		if (*diff == 0) {
+			/* simply replace it with new passive address */
+			memcpy(start, buf, buf_len);
+			ret = 1;
+		} else {
+			ret = !ip_vs_skb_replace(*pskb, GFP_ATOMIC, start,
+					  end-start, buf, buf_len);
+		}
+
+		cp->app_data = NULL;
+		ip_vs_tcp_conn_listen(n_cp);
+		ip_vs_conn_put(n_cp);
+		return ret;
+	}
+	return 1;
+}
+
+
+/*
+ * Look at incoming ftp packets to catch the PASV/PORT command
+ * (outside-to-inside).
+ *
+ * The incoming packet having the PORT command should be something like
+ *      "PORT xxx,xxx,xxx,xxx,ppp,ppp\n".
+ * xxx,xxx,xxx,xxx is the client address, ppp,ppp is the client port number.
+ * In this case, we create a connection entry using the client address and
+ * port, so that the active ftp data connection from the server can reach
+ * the client.
+ */
+static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
+			struct sk_buff **pskb, int *diff)
+{
+	struct iphdr *iph;
+	struct tcphdr *th;
+	char *data, *data_start, *data_limit;
+	char *start, *end;
+	__u32 to;
+	__u16 port;
+	struct ip_vs_conn *n_cp;
+
+	/* no diff required for incoming packets */
+	*diff = 0;
+
+	/* Only useful for established sessions */
+	if (cp->state != IP_VS_TCP_S_ESTABLISHED)
+		return 1;
+
+	/* Linear packets are much easier to deal with. */
+	if (!ip_vs_make_skb_writable(pskb, (*pskb)->len))
+		return 0;
+
+	/*
+	 * Detecting whether it is passive
+	 */
+	iph = (*pskb)->nh.iph;
+	th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
+
+	/* Since there may be OPTIONS in the TCP packet and the HLEN is
+	   the length of the header in 32-bit multiples, it is accurate
+	   to calculate data address by th+HLEN*4 */
+	data = data_start = (char *)th + (th->doff << 2);
+	data_limit = (*pskb)->tail;
+
+	while (data <= data_limit - 6) {
+		if (strnicmp(data, "PASV\r\n", 6) == 0) {
+			/* Passive mode on */
+			IP_VS_DBG(1-debug, "got PASV at %zd of %zd\n",
+				  data - data_start,
+				  data_limit - data_start);
+			cp->app_data = &ip_vs_ftp_pasv;
+			return 1;
+		}
+		data++;
+	}
+
+	/*
+	 * To support virtual FTP server, the scenerio is as follows:
+	 *       FTP client ----> Load Balancer ----> FTP server
+	 * First detect the port number in the application data,
+	 * then create a new connection entry for the coming data
+	 * connection.
+	 */
+	if (ip_vs_ftp_get_addrport(data_start, data_limit,
+				   CLIENT_STRING, sizeof(CLIENT_STRING)-1,
+				   '\r', &to, &port,
+				   &start, &end) != 1)
+		return 1;
+
+	IP_VS_DBG(1-debug, "PORT %u.%u.%u.%u:%d detected\n",
+		  NIPQUAD(to), ntohs(port));
+
+	/* Passive mode off */
+	cp->app_data = NULL;
+
+	/*
+	 * Now update or create a connection entry for it
+	 */
+	IP_VS_DBG(1-debug, "protocol %s %u.%u.%u.%u:%d %u.%u.%u.%u:%d\n",
+		  ip_vs_proto_name(iph->protocol),
+		  NIPQUAD(to), ntohs(port), NIPQUAD(cp->vaddr), 0);
+
+	n_cp = ip_vs_conn_in_get(iph->protocol,
+				 to, port,
+				 cp->vaddr, htons(ntohs(cp->vport)-1));
+	if (!n_cp) {
+		n_cp = ip_vs_conn_new(IPPROTO_TCP,
+				      to, port,
+				      cp->vaddr, htons(ntohs(cp->vport)-1),
+				      cp->daddr, htons(ntohs(cp->dport)-1),
+				      0,
+				      cp->dest);
+		if (!n_cp)
+			return 0;
+
+		/* add its controller */
+		ip_vs_control_add(n_cp, cp);
+	}
+
+	/*
+	 *	Move tunnel to listen state
+	 */
+	ip_vs_tcp_conn_listen(n_cp);
+	ip_vs_conn_put(n_cp);
+
+	return 1;
+}
+
+
+static struct ip_vs_app ip_vs_ftp = {
+	.name =		"ftp",
+	.type =		IP_VS_APP_TYPE_FTP,
+	.protocol =	IPPROTO_TCP,
+	.module =	THIS_MODULE,
+	.incs_list =	LIST_HEAD_INIT(ip_vs_ftp.incs_list),
+	.init_conn =	ip_vs_ftp_init_conn,
+	.done_conn =	ip_vs_ftp_done_conn,
+	.bind_conn =	NULL,
+	.unbind_conn =	NULL,
+	.pkt_out =	ip_vs_ftp_out,
+	.pkt_in =	ip_vs_ftp_in,
+};
+
+
+/*
+ *	ip_vs_ftp initialization
+ */
+static int __init ip_vs_ftp_init(void)
+{
+	int i, ret;
+	struct ip_vs_app *app = &ip_vs_ftp;
+
+	ret = register_ip_vs_app(app);
+	if (ret)
+		return ret;
+
+	for (i=0; i<IP_VS_APP_MAX_PORTS; i++) {
+		if (!ports[i])
+			continue;
+		ret = register_ip_vs_app_inc(app, app->protocol, ports[i]);
+		if (ret)
+			break;
+		IP_VS_DBG(1-debug, "%s: loaded support on port[%d] = %d\n",
+			  app->name, i, ports[i]);
+	}
+
+	if (ret)
+		unregister_ip_vs_app(app);
+
+	return ret;
+}
+
+
+/*
+ *	ip_vs_ftp finish.
+ */
+static void __exit ip_vs_ftp_exit(void)
+{
+	unregister_ip_vs_app(&ip_vs_ftp);
+}
+
+
+module_init(ip_vs_ftp_init);
+module_exit(ip_vs_ftp_exit);
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c
new file mode 100644
index 00000000000..c035838b780
--- /dev/null
+++ b/net/ipv4/ipvs/ip_vs_lblc.c
@@ -0,0 +1,624 @@
+/*
+ * IPVS:        Locality-Based Least-Connection scheduling module
+ *
+ * Version:     $Id: ip_vs_lblc.c,v 1.10 2002/09/15 08:14:08 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@gnuchina.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *     Martin Hamilton         :    fixed the terrible locking bugs
+ *                                   *lock(tbl->lock) ==> *lock(&tbl->lock)
+ *     Wensong Zhang           :    fixed the uninitilized tbl->lock bug
+ *     Wensong Zhang           :    added doing full expiration check to
+ *                                   collect stale entries of 24+ hours when
+ *                                   no partial expire check in a half hour
+ *     Julian Anastasov        :    replaced del_timer call with del_timer_sync
+ *                                   to avoid the possible race between timer
+ *                                   handler and del_timer thread in SMP
+ *
+ */
+
+/*
+ * The lblc algorithm is as follows (pseudo code):
+ *
+ *       if cachenode[dest_ip] is null then
+ *               n, cachenode[dest_ip] <- {weighted least-conn node};
+ *       else
+ *               n <- cachenode[dest_ip];
+ *               if (n is dead) OR
+ *                  (n.conns>n.weight AND
+ *                   there is a node m with m.conns<m.weight/2) then
+ *                 n, cachenode[dest_ip] <- {weighted least-conn node};
+ *
+ *       return n;
+ *
+ * Thanks must go to Wenzhuo Zhang for talking WCCP to me and pushing
+ * me to write this module.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+/* for sysctl */
+#include <linux/fs.h>
+#include <linux/sysctl.h>
+
+#include <net/ip_vs.h>
+
+
+/*
+ *    It is for garbage collection of stale IPVS lblc entries,
+ *    when the table is full.
+ */
+#define CHECK_EXPIRE_INTERVAL   (60*HZ)
+#define ENTRY_TIMEOUT           (6*60*HZ)
+
+/*
+ *    It is for full expiration check.
+ *    When there is no partial expiration check (garbage collection)
+ *    in a half hour, do a full expiration check to collect stale
+ *    entries that haven't been touched for a day.
+ */
+#define COUNT_FOR_FULL_EXPIRATION   30
+static int sysctl_ip_vs_lblc_expiration = 24*60*60*HZ;
+
+
+/*
+ *     for IPVS lblc entry hash table
+ */
+#ifndef CONFIG_IP_VS_LBLC_TAB_BITS
+#define CONFIG_IP_VS_LBLC_TAB_BITS      10
+#endif
+#define IP_VS_LBLC_TAB_BITS     CONFIG_IP_VS_LBLC_TAB_BITS
+#define IP_VS_LBLC_TAB_SIZE     (1 << IP_VS_LBLC_TAB_BITS)
+#define IP_VS_LBLC_TAB_MASK     (IP_VS_LBLC_TAB_SIZE - 1)
+
+
+/*
+ *      IPVS lblc entry represents an association between destination
+ *      IP address and its destination server
+ */
+struct ip_vs_lblc_entry {
+	struct list_head        list;
+	__u32                   addr;           /* destination IP address */
+	struct ip_vs_dest       *dest;          /* real server (cache) */
+	unsigned long           lastuse;        /* last used time */
+};
+
+
+/*
+ *      IPVS lblc hash table
+ */
+struct ip_vs_lblc_table {
+	rwlock_t	        lock;           /* lock for this table */
+	struct list_head        bucket[IP_VS_LBLC_TAB_SIZE];  /* hash bucket */
+	atomic_t                entries;        /* number of entries */
+	int                     max_size;       /* maximum size of entries */
+	struct timer_list       periodic_timer; /* collect stale entries */
+	int                     rover;          /* rover for expire check */
+	int                     counter;        /* counter for no expire */
+};
+
+
+/*
+ *      IPVS LBLC sysctl table
+ */
+
+static ctl_table vs_vars_table[] = {
+	{
+		.ctl_name	= NET_IPV4_VS_LBLC_EXPIRE,
+		.procname	= "lblc_expiration",
+		.data		= &sysctl_ip_vs_lblc_expiration,
+		.maxlen		= sizeof(int),
+		.mode		= 0644, 
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table vs_table[] = {
+	{
+		.ctl_name	= NET_IPV4_VS,
+		.procname	= "vs",
+		.mode		= 0555, 
+		.child		= vs_vars_table
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table ipv4_table[] = {
+	{
+		.ctl_name	= NET_IPV4,
+		.procname	= "ipv4", 
+		.mode		= 0555,
+		.child		= vs_table
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table lblc_root_table[] = {
+	{
+		.ctl_name	= CTL_NET,
+		.procname	= "net", 
+		.mode		= 0555, 
+		.child		= ipv4_table
+	},
+	{ .ctl_name = 0 }
+};
+
+static struct ctl_table_header * sysctl_header;
+
+/*
+ *      new/free a ip_vs_lblc_entry, which is a mapping of a destionation
+ *      IP address to a server.
+ */
+static inline struct ip_vs_lblc_entry *
+ip_vs_lblc_new(__u32 daddr, struct ip_vs_dest *dest)
+{
+	struct ip_vs_lblc_entry *en;
+
+	en = kmalloc(sizeof(struct ip_vs_lblc_entry), GFP_ATOMIC);
+	if (en == NULL) {
+		IP_VS_ERR("ip_vs_lblc_new(): no memory\n");
+		return NULL;
+	}
+
+	INIT_LIST_HEAD(&en->list);
+	en->addr = daddr;
+
+	atomic_inc(&dest->refcnt);
+	en->dest = dest;
+
+	return en;
+}
+
+
+static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
+{
+	list_del(&en->list);
+	/*
+	 * We don't kfree dest because it is refered either by its service
+	 * or the trash dest list.
+	 */
+	atomic_dec(&en->dest->refcnt);
+	kfree(en);
+}
+
+
+/*
+ *	Returns hash value for IPVS LBLC entry
+ */
+static inline unsigned ip_vs_lblc_hashkey(__u32 addr)
+{
+	return (ntohl(addr)*2654435761UL) & IP_VS_LBLC_TAB_MASK;
+}
+
+
+/*
+ *	Hash an entry in the ip_vs_lblc_table.
+ *	returns bool success.
+ */
+static int
+ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en)
+{
+	unsigned hash;
+
+	if (!list_empty(&en->list)) {
+		IP_VS_ERR("ip_vs_lblc_hash(): request for already hashed, "
+			  "called from %p\n", __builtin_return_address(0));
+		return 0;
+	}
+
+	/*
+	 *	Hash by destination IP address
+	 */
+	hash = ip_vs_lblc_hashkey(en->addr);
+
+	write_lock(&tbl->lock);
+	list_add(&en->list, &tbl->bucket[hash]);
+	atomic_inc(&tbl->entries);
+	write_unlock(&tbl->lock);
+
+	return 1;
+}
+
+
+#if 0000
+/*
+ *	Unhash ip_vs_lblc_entry from ip_vs_lblc_table.
+ *	returns bool success.
+ */
+static int ip_vs_lblc_unhash(struct ip_vs_lblc_table *tbl,
+			     struct ip_vs_lblc_entry *en)
+{
+	if (list_empty(&en->list)) {
+		IP_VS_ERR("ip_vs_lblc_unhash(): request for not hashed entry, "
+			  "called from %p\n", __builtin_return_address(0));
+		return 0;
+	}
+
+	/*
+	 * Remove it from the table
+	 */
+	write_lock(&tbl->lock);
+	list_del(&en->list);
+	INIT_LIST_HEAD(&en->list);
+	write_unlock(&tbl->lock);
+
+	return 1;
+}
+#endif
+
+
+/*
+ *  Get ip_vs_lblc_entry associated with supplied parameters.
+ */
+static inline struct ip_vs_lblc_entry *
+ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __u32 addr)
+{
+	unsigned hash;
+	struct ip_vs_lblc_entry *en;
+
+	hash = ip_vs_lblc_hashkey(addr);
+
+	read_lock(&tbl->lock);
+
+	list_for_each_entry(en, &tbl->bucket[hash], list) {
+		if (en->addr == addr) {
+			/* HIT */
+			read_unlock(&tbl->lock);
+			return en;
+		}
+	}
+
+	read_unlock(&tbl->lock);
+
+	return NULL;
+}
+
+
+/*
+ *      Flush all the entries of the specified table.
+ */
+static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl)
+{
+	int i;
+	struct ip_vs_lblc_entry *en, *nxt;
+
+	for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
+		write_lock(&tbl->lock);
+		list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) {
+			ip_vs_lblc_free(en);
+			atomic_dec(&tbl->entries);
+		}
+		write_unlock(&tbl->lock);
+	}
+}
+
+
+static inline void ip_vs_lblc_full_check(struct ip_vs_lblc_table *tbl)
+{
+	unsigned long now = jiffies;
+	int i, j;
+	struct ip_vs_lblc_entry *en, *nxt;
+
+	for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
+		j = (j + 1) & IP_VS_LBLC_TAB_MASK;
+
+		write_lock(&tbl->lock);
+		list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
+			if (time_before(now, 
+					en->lastuse + sysctl_ip_vs_lblc_expiration))
+				continue;
+
+			ip_vs_lblc_free(en);
+			atomic_dec(&tbl->entries);
+		}
+		write_unlock(&tbl->lock);
+	}
+	tbl->rover = j;
+}
+
+
+/*
+ *      Periodical timer handler for IPVS lblc table
+ *      It is used to collect stale entries when the number of entries
+ *      exceeds the maximum size of the table.
+ *
+ *      Fixme: we probably need more complicated algorithm to collect
+ *             entries that have not been used for a long time even
+ *             if the number of entries doesn't exceed the maximum size
+ *             of the table.
+ *      The full expiration check is for this purpose now.
+ */
+static void ip_vs_lblc_check_expire(unsigned long data)
+{
+	struct ip_vs_lblc_table *tbl;
+	unsigned long now = jiffies;
+	int goal;
+	int i, j;
+	struct ip_vs_lblc_entry *en, *nxt;
+
+	tbl = (struct ip_vs_lblc_table *)data;
+
+	if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
+		/* do full expiration check */
+		ip_vs_lblc_full_check(tbl);
+		tbl->counter = 1;
+		goto out;
+	}
+
+	if (atomic_read(&tbl->entries) <= tbl->max_size) {
+		tbl->counter++;
+		goto out;
+	}
+
+	goal = (atomic_read(&tbl->entries) - tbl->max_size)*4/3;
+	if (goal > tbl->max_size/2)
+		goal = tbl->max_size/2;
+
+	for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
+		j = (j + 1) & IP_VS_LBLC_TAB_MASK;
+
+		write_lock(&tbl->lock);
+		list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
+			if (time_before(now, en->lastuse + ENTRY_TIMEOUT))
+				continue;
+
+			ip_vs_lblc_free(en);
+			atomic_dec(&tbl->entries);
+			goal--;
+		}
+		write_unlock(&tbl->lock);
+		if (goal <= 0)
+			break;
+	}
+	tbl->rover = j;
+
+  out:
+	mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL);
+}
+
+
+static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
+{
+	int i;
+	struct ip_vs_lblc_table *tbl;
+
+	/*
+	 *    Allocate the ip_vs_lblc_table for this service
+	 */
+	tbl = kmalloc(sizeof(struct ip_vs_lblc_table), GFP_ATOMIC);
+	if (tbl == NULL) {
+		IP_VS_ERR("ip_vs_lblc_init_svc(): no memory\n");
+		return -ENOMEM;
+	}
+	svc->sched_data = tbl;
+	IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) allocated for "
+		  "current service\n",
+		  sizeof(struct ip_vs_lblc_table));
+
+	/*
+	 *    Initialize the hash buckets
+	 */
+	for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
+		INIT_LIST_HEAD(&tbl->bucket[i]);
+	}
+	rwlock_init(&tbl->lock);
+	tbl->max_size = IP_VS_LBLC_TAB_SIZE*16;
+	tbl->rover = 0;
+	tbl->counter = 1;
+
+	/*
+	 *    Hook periodic timer for garbage collection
+	 */
+	init_timer(&tbl->periodic_timer);
+	tbl->periodic_timer.data = (unsigned long)tbl;
+	tbl->periodic_timer.function = ip_vs_lblc_check_expire;
+	tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL;
+	add_timer(&tbl->periodic_timer);
+
+	return 0;
+}
+
+
+static int ip_vs_lblc_done_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_lblc_table *tbl = svc->sched_data;
+
+	/* remove periodic timer */
+	del_timer_sync(&tbl->periodic_timer);
+
+	/* got to clean up table entries here */
+	ip_vs_lblc_flush(tbl);
+
+	/* release the table itself */
+	kfree(svc->sched_data);
+	IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) released\n",
+		  sizeof(struct ip_vs_lblc_table));
+
+	return 0;
+}
+
+
+static int ip_vs_lblc_update_svc(struct ip_vs_service *svc)
+{
+	return 0;
+}
+
+
+static inline struct ip_vs_dest *
+__ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
+{
+	struct ip_vs_dest *dest, *least;
+	int loh, doh;
+
+	/*
+	 * We think the overhead of processing active connections is fifty
+	 * times higher than that of inactive connections in average. (This
+	 * fifty times might not be accurate, we will change it later.) We
+	 * use the following formula to estimate the overhead:
+	 *                dest->activeconns*50 + dest->inactconns
+	 * and the load:
+	 *                (dest overhead) / dest->weight
+	 *
+	 * Remember -- no floats in kernel mode!!!
+	 * The comparison of h1*w2 > h2*w1 is equivalent to that of
+	 *                h1/w1 > h2/w2
+	 * if every weight is larger than zero.
+	 *
+	 * The server with weight=0 is quiesced and will not receive any
+	 * new connection.
+	 */
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+			continue;
+		if (atomic_read(&dest->weight) > 0) {
+			least = dest;
+			loh = atomic_read(&least->activeconns) * 50
+				+ atomic_read(&least->inactconns);
+			goto nextstage;
+		}
+	}
+	return NULL;
+
+	/*
+	 *    Find the destination with the least load.
+	 */
+  nextstage:
+	list_for_each_entry_continue(dest, &svc->destinations, n_list) {
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+			continue;
+
+		doh = atomic_read(&dest->activeconns) * 50
+			+ atomic_read(&dest->inactconns);
+		if (loh * atomic_read(&dest->weight) >
+		    doh * atomic_read(&least->weight)) {
+			least = dest;
+			loh = doh;
+		}
+	}
+
+	IP_VS_DBG(6, "LBLC: server %d.%d.%d.%d:%d "
+		  "activeconns %d refcnt %d weight %d overhead %d\n",
+		  NIPQUAD(least->addr), ntohs(least->port),
+		  atomic_read(&least->activeconns),
+		  atomic_read(&least->refcnt),
+		  atomic_read(&least->weight), loh);
+
+	return least;
+}
+
+
+/*
+ *   If this destination server is overloaded and there is a less loaded
+ *   server, then return true.
+ */
+static inline int
+is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
+{
+	if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) {
+		struct ip_vs_dest *d;
+
+		list_for_each_entry(d, &svc->destinations, n_list) {
+			if (atomic_read(&d->activeconns)*2
+			    < atomic_read(&d->weight)) {
+				return 1;
+			}
+		}
+	}
+	return 0;
+}
+
+
+/*
+ *    Locality-Based (weighted) Least-Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+	struct ip_vs_dest *dest;
+	struct ip_vs_lblc_table *tbl;
+	struct ip_vs_lblc_entry *en;
+	struct iphdr *iph = skb->nh.iph;
+
+	IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n");
+
+	tbl = (struct ip_vs_lblc_table *)svc->sched_data;
+	en = ip_vs_lblc_get(tbl, iph->daddr);
+	if (en == NULL) {
+		dest = __ip_vs_wlc_schedule(svc, iph);
+		if (dest == NULL) {
+			IP_VS_DBG(1, "no destination available\n");
+			return NULL;
+		}
+		en = ip_vs_lblc_new(iph->daddr, dest);
+		if (en == NULL) {
+			return NULL;
+		}
+		ip_vs_lblc_hash(tbl, en);
+	} else {
+		dest = en->dest;
+		if (!(dest->flags & IP_VS_DEST_F_AVAILABLE)
+		    || atomic_read(&dest->weight) <= 0
+		    || is_overloaded(dest, svc)) {
+			dest = __ip_vs_wlc_schedule(svc, iph);
+			if (dest == NULL) {
+				IP_VS_DBG(1, "no destination available\n");
+				return NULL;
+			}
+			atomic_dec(&en->dest->refcnt);
+			atomic_inc(&dest->refcnt);
+			en->dest = dest;
+		}
+	}
+	en->lastuse = jiffies;
+
+	IP_VS_DBG(6, "LBLC: destination IP address %u.%u.%u.%u "
+		  "--> server %u.%u.%u.%u:%d\n",
+		  NIPQUAD(en->addr),
+		  NIPQUAD(dest->addr),
+		  ntohs(dest->port));
+
+	return dest;
+}
+
+
+/*
+ *      IPVS LBLC Scheduler structure
+ */
+static struct ip_vs_scheduler ip_vs_lblc_scheduler =
+{
+	.name =			"lblc",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.init_service =		ip_vs_lblc_init_svc,
+	.done_service =		ip_vs_lblc_done_svc,
+	.update_service =	ip_vs_lblc_update_svc,
+	.schedule =		ip_vs_lblc_schedule,
+};
+
+
+static int __init ip_vs_lblc_init(void)
+{
+	INIT_LIST_HEAD(&ip_vs_lblc_scheduler.n_list);
+	sysctl_header = register_sysctl_table(lblc_root_table, 0);
+	return register_ip_vs_scheduler(&ip_vs_lblc_scheduler);
+}
+
+
+static void __exit ip_vs_lblc_cleanup(void)
+{
+	unregister_sysctl_table(sysctl_header);
+	unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
+}
+
+
+module_init(ip_vs_lblc_init);
+module_exit(ip_vs_lblc_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c
new file mode 100644
index 00000000000..22b5dd55d27
--- /dev/null
+++ b/net/ipv4/ipvs/ip_vs_lblcr.c
@@ -0,0 +1,888 @@
+/*
+ * IPVS:        Locality-Based Least-Connection with Replication scheduler
+ *
+ * Version:     $Id: ip_vs_lblcr.c,v 1.11 2002/09/15 08:14:08 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@gnuchina.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *     Julian Anastasov        :    Added the missing (dest->weight>0)
+ *                                  condition in the ip_vs_dest_set_max.
+ *
+ */
+
+/*
+ * The lblc/r algorithm is as follows (pseudo code):
+ *
+ *       if serverSet[dest_ip] is null then
+ *               n, serverSet[dest_ip] <- {weighted least-conn node};
+ *       else
+ *               n <- {least-conn (alive) node in serverSet[dest_ip]};
+ *               if (n is null) OR
+ *                  (n.conns>n.weight AND
+ *                   there is a node m with m.conns<m.weight/2) then
+ *                   n <- {weighted least-conn node};
+ *                   add n to serverSet[dest_ip];
+ *               if |serverSet[dest_ip]| > 1 AND
+ *                   now - serverSet[dest_ip].lastMod > T then
+ *                   m <- {most conn node in serverSet[dest_ip]};
+ *                   remove m from serverSet[dest_ip];
+ *       if serverSet[dest_ip] changed then
+ *               serverSet[dest_ip].lastMod <- now;
+ *
+ *       return n;
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+/* for sysctl */
+#include <linux/fs.h>
+#include <linux/sysctl.h>
+/* for proc_net_create/proc_net_remove */
+#include <linux/proc_fs.h>
+
+#include <net/ip_vs.h>
+
+
+/*
+ *    It is for garbage collection of stale IPVS lblcr entries,
+ *    when the table is full.
+ */
+#define CHECK_EXPIRE_INTERVAL   (60*HZ)
+#define ENTRY_TIMEOUT           (6*60*HZ)
+
+/*
+ *    It is for full expiration check.
+ *    When there is no partial expiration check (garbage collection)
+ *    in a half hour, do a full expiration check to collect stale
+ *    entries that haven't been touched for a day.
+ */
+#define COUNT_FOR_FULL_EXPIRATION   30
+static int sysctl_ip_vs_lblcr_expiration = 24*60*60*HZ;
+
+
+/*
+ *     for IPVS lblcr entry hash table
+ */
+#ifndef CONFIG_IP_VS_LBLCR_TAB_BITS
+#define CONFIG_IP_VS_LBLCR_TAB_BITS      10
+#endif
+#define IP_VS_LBLCR_TAB_BITS     CONFIG_IP_VS_LBLCR_TAB_BITS
+#define IP_VS_LBLCR_TAB_SIZE     (1 << IP_VS_LBLCR_TAB_BITS)
+#define IP_VS_LBLCR_TAB_MASK     (IP_VS_LBLCR_TAB_SIZE - 1)
+
+
+/*
+ *      IPVS destination set structure and operations
+ */
+struct ip_vs_dest_list {
+	struct ip_vs_dest_list  *next;          /* list link */
+	struct ip_vs_dest       *dest;          /* destination server */
+};
+
+struct ip_vs_dest_set {
+	atomic_t                size;           /* set size */
+	unsigned long           lastmod;        /* last modified time */
+	struct ip_vs_dest_list  *list;          /* destination list */
+	rwlock_t	        lock;           /* lock for this list */
+};
+
+
+static struct ip_vs_dest_list *
+ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
+{
+	struct ip_vs_dest_list *e;
+
+	for (e=set->list; e!=NULL; e=e->next) {
+		if (e->dest == dest)
+			/* already existed */
+			return NULL;
+	}
+
+	e = kmalloc(sizeof(struct ip_vs_dest_list), GFP_ATOMIC);
+	if (e == NULL) {
+		IP_VS_ERR("ip_vs_dest_set_insert(): no memory\n");
+		return NULL;
+	}
+
+	atomic_inc(&dest->refcnt);
+	e->dest = dest;
+
+	/* link it to the list */
+	write_lock(&set->lock);
+	e->next = set->list;
+	set->list = e;
+	atomic_inc(&set->size);
+	write_unlock(&set->lock);
+
+	set->lastmod = jiffies;
+	return e;
+}
+
+static void
+ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
+{
+	struct ip_vs_dest_list *e, **ep;
+
+	write_lock(&set->lock);
+	for (ep=&set->list, e=*ep; e!=NULL; e=*ep) {
+		if (e->dest == dest) {
+			/* HIT */
+			*ep = e->next;
+			atomic_dec(&set->size);
+			set->lastmod = jiffies;
+			atomic_dec(&e->dest->refcnt);
+			kfree(e);
+			break;
+		}
+		ep = &e->next;
+	}
+	write_unlock(&set->lock);
+}
+
+static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
+{
+	struct ip_vs_dest_list *e, **ep;
+
+	write_lock(&set->lock);
+	for (ep=&set->list, e=*ep; e!=NULL; e=*ep) {
+		*ep = e->next;
+		/*
+		 * We don't kfree dest because it is refered either
+		 * by its service or by the trash dest list.
+		 */
+		atomic_dec(&e->dest->refcnt);
+		kfree(e);
+	}
+	write_unlock(&set->lock);
+}
+
+/* get weighted least-connection node in the destination set */
+static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
+{
+	register struct ip_vs_dest_list *e;
+	struct ip_vs_dest *dest, *least;
+	int loh, doh;
+
+	if (set == NULL)
+		return NULL;
+
+	read_lock(&set->lock);
+	/* select the first destination server, whose weight > 0 */
+	for (e=set->list; e!=NULL; e=e->next) {
+		least = e->dest;
+		if (least->flags & IP_VS_DEST_F_OVERLOAD)
+			continue;
+
+		if ((atomic_read(&least->weight) > 0)
+		    && (least->flags & IP_VS_DEST_F_AVAILABLE)) {
+			loh = atomic_read(&least->activeconns) * 50
+				+ atomic_read(&least->inactconns);
+			goto nextstage;
+		}
+	}
+	read_unlock(&set->lock);
+	return NULL;
+
+	/* find the destination with the weighted least load */
+  nextstage:
+	for (e=e->next; e!=NULL; e=e->next) {
+		dest = e->dest;
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+			continue;
+
+		doh = atomic_read(&dest->activeconns) * 50
+			+ atomic_read(&dest->inactconns);
+		if ((loh * atomic_read(&dest->weight) >
+		     doh * atomic_read(&least->weight))
+		    && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
+			least = dest;
+			loh = doh;
+		}
+	}
+	read_unlock(&set->lock);
+
+	IP_VS_DBG(6, "ip_vs_dest_set_min: server %d.%d.%d.%d:%d "
+		  "activeconns %d refcnt %d weight %d overhead %d\n",
+		  NIPQUAD(least->addr), ntohs(least->port),
+		  atomic_read(&least->activeconns),
+		  atomic_read(&least->refcnt),
+		  atomic_read(&least->weight), loh);
+	return least;
+}
+
+
+/* get weighted most-connection node in the destination set */
+static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
+{
+	register struct ip_vs_dest_list *e;
+	struct ip_vs_dest *dest, *most;
+	int moh, doh;
+
+	if (set == NULL)
+		return NULL;
+
+	read_lock(&set->lock);
+	/* select the first destination server, whose weight > 0 */
+	for (e=set->list; e!=NULL; e=e->next) {
+		most = e->dest;
+		if (atomic_read(&most->weight) > 0) {
+			moh = atomic_read(&most->activeconns) * 50
+				+ atomic_read(&most->inactconns);
+			goto nextstage;
+		}
+	}
+	read_unlock(&set->lock);
+	return NULL;
+
+	/* find the destination with the weighted most load */
+  nextstage:
+	for (e=e->next; e!=NULL; e=e->next) {
+		dest = e->dest;
+		doh = atomic_read(&dest->activeconns) * 50
+			+ atomic_read(&dest->inactconns);
+		/* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */
+		if ((moh * atomic_read(&dest->weight) <
+		     doh * atomic_read(&most->weight))
+		    && (atomic_read(&dest->weight) > 0)) {
+			most = dest;
+			moh = doh;
+		}
+	}
+	read_unlock(&set->lock);
+
+	IP_VS_DBG(6, "ip_vs_dest_set_max: server %d.%d.%d.%d:%d "
+		  "activeconns %d refcnt %d weight %d overhead %d\n",
+		  NIPQUAD(most->addr), ntohs(most->port),
+		  atomic_read(&most->activeconns),
+		  atomic_read(&most->refcnt),
+		  atomic_read(&most->weight), moh);
+	return most;
+}
+
+
+/*
+ *      IPVS lblcr entry represents an association between destination
+ *      IP address and its destination server set
+ */
+struct ip_vs_lblcr_entry {
+	struct list_head        list;
+	__u32                   addr;           /* destination IP address */
+	struct ip_vs_dest_set   set;            /* destination server set */
+	unsigned long           lastuse;        /* last used time */
+};
+
+
+/*
+ *      IPVS lblcr hash table
+ */
+struct ip_vs_lblcr_table {
+	rwlock_t	        lock;           /* lock for this table */
+	struct list_head        bucket[IP_VS_LBLCR_TAB_SIZE];  /* hash bucket */
+	atomic_t                entries;        /* number of entries */
+	int                     max_size;       /* maximum size of entries */
+	struct timer_list       periodic_timer; /* collect stale entries */
+	int                     rover;          /* rover for expire check */
+	int                     counter;        /* counter for no expire */
+};
+
+
+/*
+ *      IPVS LBLCR sysctl table
+ */
+
+static ctl_table vs_vars_table[] = {
+	{
+		.ctl_name	= NET_IPV4_VS_LBLCR_EXPIRE,
+		.procname	= "lblcr_expiration",
+		.data		= &sysctl_ip_vs_lblcr_expiration,
+		.maxlen		= sizeof(int),
+		.mode		= 0644, 
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table vs_table[] = {
+	{
+		.ctl_name	= NET_IPV4_VS,
+		.procname	= "vs",
+		.mode		= 0555,
+		.child		= vs_vars_table
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table ipv4_table[] = {
+	{
+		.ctl_name	= NET_IPV4,
+		.procname	= "ipv4", 
+		.mode		= 0555,
+		.child		= vs_table
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table lblcr_root_table[] = {
+	{
+		.ctl_name	= CTL_NET,
+		.procname	= "net", 
+		.mode		= 0555, 
+		.child		= ipv4_table
+	},
+	{ .ctl_name = 0 }
+};
+
+static struct ctl_table_header * sysctl_header;
+
+/*
+ *      new/free a ip_vs_lblcr_entry, which is a mapping of a destination
+ *      IP address to a server.
+ */
+static inline struct ip_vs_lblcr_entry *ip_vs_lblcr_new(__u32 daddr)
+{
+	struct ip_vs_lblcr_entry *en;
+
+	en = kmalloc(sizeof(struct ip_vs_lblcr_entry), GFP_ATOMIC);
+	if (en == NULL) {
+		IP_VS_ERR("ip_vs_lblcr_new(): no memory\n");
+		return NULL;
+	}
+
+	INIT_LIST_HEAD(&en->list);
+	en->addr = daddr;
+
+	/* initilize its dest set */
+	atomic_set(&(en->set.size), 0);
+	en->set.list = NULL;
+	rwlock_init(&en->set.lock);
+
+	return en;
+}
+
+
+static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
+{
+	list_del(&en->list);
+	ip_vs_dest_set_eraseall(&en->set);
+	kfree(en);
+}
+
+
+/*
+ *	Returns hash value for IPVS LBLCR entry
+ */
+static inline unsigned ip_vs_lblcr_hashkey(__u32 addr)
+{
+	return (ntohl(addr)*2654435761UL) & IP_VS_LBLCR_TAB_MASK;
+}
+
+
+/*
+ *	Hash an entry in the ip_vs_lblcr_table.
+ *	returns bool success.
+ */
+static int
+ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en)
+{
+	unsigned hash;
+
+	if (!list_empty(&en->list)) {
+		IP_VS_ERR("ip_vs_lblcr_hash(): request for already hashed, "
+			  "called from %p\n", __builtin_return_address(0));
+		return 0;
+	}
+
+	/*
+	 *	Hash by destination IP address
+	 */
+	hash = ip_vs_lblcr_hashkey(en->addr);
+
+	write_lock(&tbl->lock);
+	list_add(&en->list, &tbl->bucket[hash]);
+	atomic_inc(&tbl->entries);
+	write_unlock(&tbl->lock);
+
+	return 1;
+}
+
+
+#if 0000
+/*
+ *	Unhash ip_vs_lblcr_entry from ip_vs_lblcr_table.
+ *	returns bool success.
+ */
+static int ip_vs_lblcr_unhash(struct ip_vs_lblcr_table *tbl,
+			     struct ip_vs_lblcr_entry *en)
+{
+	if (list_empty(&en->list)) {
+		IP_VS_ERR("ip_vs_lblcr_unhash(): request for not hashed entry, "
+			  "called from %p\n", __builtin_return_address(0));
+		return 0;
+	}
+
+	/*
+	 * Remove it from the table
+	 */
+	write_lock(&tbl->lock);
+	list_del(&en->list);
+	INIT_LIST_HEAD(&en->list);
+	write_unlock(&tbl->lock);
+
+	return 1;
+}
+#endif
+
+
+/*
+ *  Get ip_vs_lblcr_entry associated with supplied parameters.
+ */
+static inline struct ip_vs_lblcr_entry *
+ip_vs_lblcr_get(struct ip_vs_lblcr_table *tbl, __u32 addr)
+{
+	unsigned hash;
+	struct ip_vs_lblcr_entry *en;
+
+	hash = ip_vs_lblcr_hashkey(addr);
+
+	read_lock(&tbl->lock);
+
+	list_for_each_entry(en, &tbl->bucket[hash], list) {
+		if (en->addr == addr) {
+			/* HIT */
+			read_unlock(&tbl->lock);
+			return en;
+		}
+	}
+
+	read_unlock(&tbl->lock);
+
+	return NULL;
+}
+
+
+/*
+ *      Flush all the entries of the specified table.
+ */
+static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl)
+{
+	int i;
+	struct ip_vs_lblcr_entry *en, *nxt;
+
+	for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+		write_lock(&tbl->lock);
+		list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) {
+			ip_vs_lblcr_free(en);
+			atomic_dec(&tbl->entries);
+		}
+		write_unlock(&tbl->lock);
+	}
+}
+
+
+static inline void ip_vs_lblcr_full_check(struct ip_vs_lblcr_table *tbl)
+{
+	unsigned long now = jiffies;
+	int i, j;
+	struct ip_vs_lblcr_entry *en, *nxt;
+
+	for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+		j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
+
+		write_lock(&tbl->lock);
+		list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
+			if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration,
+				       now))
+				continue;
+
+			ip_vs_lblcr_free(en);
+			atomic_dec(&tbl->entries);
+		}
+		write_unlock(&tbl->lock);
+	}
+	tbl->rover = j;
+}
+
+
+/*
+ *      Periodical timer handler for IPVS lblcr table
+ *      It is used to collect stale entries when the number of entries
+ *      exceeds the maximum size of the table.
+ *
+ *      Fixme: we probably need more complicated algorithm to collect
+ *             entries that have not been used for a long time even
+ *             if the number of entries doesn't exceed the maximum size
+ *             of the table.
+ *      The full expiration check is for this purpose now.
+ */
+static void ip_vs_lblcr_check_expire(unsigned long data)
+{
+	struct ip_vs_lblcr_table *tbl;
+	unsigned long now = jiffies;
+	int goal;
+	int i, j;
+	struct ip_vs_lblcr_entry *en, *nxt;
+
+	tbl = (struct ip_vs_lblcr_table *)data;
+
+	if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
+		/* do full expiration check */
+		ip_vs_lblcr_full_check(tbl);
+		tbl->counter = 1;
+		goto out;
+	}
+
+	if (atomic_read(&tbl->entries) <= tbl->max_size) {
+		tbl->counter++;
+		goto out;
+	}
+
+	goal = (atomic_read(&tbl->entries) - tbl->max_size)*4/3;
+	if (goal > tbl->max_size/2)
+		goal = tbl->max_size/2;
+
+	for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+		j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
+
+		write_lock(&tbl->lock);
+		list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
+			if (time_before(now, en->lastuse+ENTRY_TIMEOUT))
+				continue;
+
+			ip_vs_lblcr_free(en);
+			atomic_dec(&tbl->entries);
+			goal--;
+		}
+		write_unlock(&tbl->lock);
+		if (goal <= 0)
+			break;
+	}
+	tbl->rover = j;
+
+  out:
+	mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL);
+}
+
+
+#ifdef CONFIG_IP_VS_LBLCR_DEBUG
+static struct ip_vs_lblcr_table *lblcr_table_list;
+
+/*
+ *	/proc/net/ip_vs_lblcr to display the mappings of
+ *                  destination IP address <==> its serverSet
+ */
+static int
+ip_vs_lblcr_getinfo(char *buffer, char **start, off_t offset, int length)
+{
+	off_t pos=0, begin;
+	int len=0, size;
+	struct ip_vs_lblcr_table *tbl;
+	unsigned long now = jiffies;
+	int i;
+	struct ip_vs_lblcr_entry *en;
+
+	tbl = lblcr_table_list;
+
+	size = sprintf(buffer, "LastTime Dest IP address  Server set\n");
+	pos += size;
+	len += size;
+
+	for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+		read_lock_bh(&tbl->lock);
+		list_for_each_entry(en, &tbl->bucket[i], list) {
+			char tbuf[16];
+			struct ip_vs_dest_list *d;
+
+			sprintf(tbuf, "%u.%u.%u.%u", NIPQUAD(en->addr));
+			size = sprintf(buffer+len, "%8lu %-16s ",
+				       now-en->lastuse, tbuf);
+
+			read_lock(&en->set.lock);
+			for (d=en->set.list; d!=NULL; d=d->next) {
+				size += sprintf(buffer+len+size,
+						"%u.%u.%u.%u ",
+						NIPQUAD(d->dest->addr));
+			}
+			read_unlock(&en->set.lock);
+			size += sprintf(buffer+len+size, "\n");
+			len += size;
+			pos += size;
+			if (pos <= offset)
+				len=0;
+			if (pos >= offset+length) {
+				read_unlock_bh(&tbl->lock);
+				goto done;
+			}
+		}
+		read_unlock_bh(&tbl->lock);
+	}
+
+  done:
+	begin = len - (pos - offset);
+	*start = buffer + begin;
+	len -= begin;
+	if(len>length)
+		len = length;
+	return len;
+}
+#endif
+
+
+static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
+{
+	int i;
+	struct ip_vs_lblcr_table *tbl;
+
+	/*
+	 *    Allocate the ip_vs_lblcr_table for this service
+	 */
+	tbl = kmalloc(sizeof(struct ip_vs_lblcr_table), GFP_ATOMIC);
+	if (tbl == NULL) {
+		IP_VS_ERR("ip_vs_lblcr_init_svc(): no memory\n");
+		return -ENOMEM;
+	}
+	svc->sched_data = tbl;
+	IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) allocated for "
+		  "current service\n",
+		  sizeof(struct ip_vs_lblcr_table));
+
+	/*
+	 *    Initialize the hash buckets
+	 */
+	for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+		INIT_LIST_HEAD(&tbl->bucket[i]);
+	}
+	rwlock_init(&tbl->lock);
+	tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16;
+	tbl->rover = 0;
+	tbl->counter = 1;
+
+	/*
+	 *    Hook periodic timer for garbage collection
+	 */
+	init_timer(&tbl->periodic_timer);
+	tbl->periodic_timer.data = (unsigned long)tbl;
+	tbl->periodic_timer.function = ip_vs_lblcr_check_expire;
+	tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL;
+	add_timer(&tbl->periodic_timer);
+
+#ifdef CONFIG_IP_VS_LBLCR_DEBUG
+	lblcr_table_list = tbl;
+#endif
+	return 0;
+}
+
+
+static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_lblcr_table *tbl = svc->sched_data;
+
+	/* remove periodic timer */
+	del_timer_sync(&tbl->periodic_timer);
+
+	/* got to clean up table entries here */
+	ip_vs_lblcr_flush(tbl);
+
+	/* release the table itself */
+	kfree(svc->sched_data);
+	IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n",
+		  sizeof(struct ip_vs_lblcr_table));
+
+	return 0;
+}
+
+
+static int ip_vs_lblcr_update_svc(struct ip_vs_service *svc)
+{
+	return 0;
+}
+
+
+static inline struct ip_vs_dest *
+__ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
+{
+	struct ip_vs_dest *dest, *least;
+	int loh, doh;
+
+	/*
+	 * We think the overhead of processing active connections is fifty
+	 * times higher than that of inactive connections in average. (This
+	 * fifty times might not be accurate, we will change it later.) We
+	 * use the following formula to estimate the overhead:
+	 *                dest->activeconns*50 + dest->inactconns
+	 * and the load:
+	 *                (dest overhead) / dest->weight
+	 *
+	 * Remember -- no floats in kernel mode!!!
+	 * The comparison of h1*w2 > h2*w1 is equivalent to that of
+	 *                h1/w1 > h2/w2
+	 * if every weight is larger than zero.
+	 *
+	 * The server with weight=0 is quiesced and will not receive any
+	 * new connection.
+	 */
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+			continue;
+
+		if (atomic_read(&dest->weight) > 0) {
+			least = dest;
+			loh = atomic_read(&least->activeconns) * 50
+				+ atomic_read(&least->inactconns);
+			goto nextstage;
+		}
+	}
+	return NULL;
+
+	/*
+	 *    Find the destination with the least load.
+	 */
+  nextstage:
+	list_for_each_entry_continue(dest, &svc->destinations, n_list) {
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+			continue;
+
+		doh = atomic_read(&dest->activeconns) * 50
+			+ atomic_read(&dest->inactconns);
+		if (loh * atomic_read(&dest->weight) >
+		    doh * atomic_read(&least->weight)) {
+			least = dest;
+			loh = doh;
+		}
+	}
+
+	IP_VS_DBG(6, "LBLCR: server %d.%d.%d.%d:%d "
+		  "activeconns %d refcnt %d weight %d overhead %d\n",
+		  NIPQUAD(least->addr), ntohs(least->port),
+		  atomic_read(&least->activeconns),
+		  atomic_read(&least->refcnt),
+		  atomic_read(&least->weight), loh);
+
+	return least;
+}
+
+
+/*
+ *   If this destination server is overloaded and there is a less loaded
+ *   server, then return true.
+ */
+static inline int
+is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
+{
+	if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) {
+		struct ip_vs_dest *d;
+
+		list_for_each_entry(d, &svc->destinations, n_list) {
+			if (atomic_read(&d->activeconns)*2
+			    < atomic_read(&d->weight)) {
+				return 1;
+			}
+		}
+	}
+	return 0;
+}
+
+
+/*
+ *    Locality-Based (weighted) Least-Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+	struct ip_vs_dest *dest;
+	struct ip_vs_lblcr_table *tbl;
+	struct ip_vs_lblcr_entry *en;
+	struct iphdr *iph = skb->nh.iph;
+
+	IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n");
+
+	tbl = (struct ip_vs_lblcr_table *)svc->sched_data;
+	en = ip_vs_lblcr_get(tbl, iph->daddr);
+	if (en == NULL) {
+		dest = __ip_vs_wlc_schedule(svc, iph);
+		if (dest == NULL) {
+			IP_VS_DBG(1, "no destination available\n");
+			return NULL;
+		}
+		en = ip_vs_lblcr_new(iph->daddr);
+		if (en == NULL) {
+			return NULL;
+		}
+		ip_vs_dest_set_insert(&en->set, dest);
+		ip_vs_lblcr_hash(tbl, en);
+	} else {
+		dest = ip_vs_dest_set_min(&en->set);
+		if (!dest || is_overloaded(dest, svc)) {
+			dest = __ip_vs_wlc_schedule(svc, iph);
+			if (dest == NULL) {
+				IP_VS_DBG(1, "no destination available\n");
+				return NULL;
+			}
+			ip_vs_dest_set_insert(&en->set, dest);
+		}
+		if (atomic_read(&en->set.size) > 1 &&
+		    jiffies-en->set.lastmod > sysctl_ip_vs_lblcr_expiration) {
+			struct ip_vs_dest *m;
+			m = ip_vs_dest_set_max(&en->set);
+			if (m)
+				ip_vs_dest_set_erase(&en->set, m);
+		}
+	}
+	en->lastuse = jiffies;
+
+	IP_VS_DBG(6, "LBLCR: destination IP address %u.%u.%u.%u "
+		  "--> server %u.%u.%u.%u:%d\n",
+		  NIPQUAD(en->addr),
+		  NIPQUAD(dest->addr),
+		  ntohs(dest->port));
+
+	return dest;
+}
+
+
+/*
+ *      IPVS LBLCR Scheduler structure
+ */
+static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
+{
+	.name =			"lblcr",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.init_service =		ip_vs_lblcr_init_svc,
+	.done_service =		ip_vs_lblcr_done_svc,
+	.update_service =	ip_vs_lblcr_update_svc,
+	.schedule =		ip_vs_lblcr_schedule,
+};
+
+
+static int __init ip_vs_lblcr_init(void)
+{
+	INIT_LIST_HEAD(&ip_vs_lblcr_scheduler.n_list);
+	sysctl_header = register_sysctl_table(lblcr_root_table, 0);
+#ifdef CONFIG_IP_VS_LBLCR_DEBUG
+	proc_net_create("ip_vs_lblcr", 0, ip_vs_lblcr_getinfo);
+#endif
+	return register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
+}
+
+
+static void __exit ip_vs_lblcr_cleanup(void)
+{
+#ifdef CONFIG_IP_VS_LBLCR_DEBUG
+	proc_net_remove("ip_vs_lblcr");
+#endif
+	unregister_sysctl_table(sysctl_header);
+	unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
+}
+
+
+module_init(ip_vs_lblcr_init);
+module_exit(ip_vs_lblcr_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ipvs/ip_vs_lc.c b/net/ipv4/ipvs/ip_vs_lc.c
new file mode 100644
index 00000000000..d88fef90a64
--- /dev/null
+++ b/net/ipv4/ipvs/ip_vs_lc.c
@@ -0,0 +1,123 @@
+/*
+ * IPVS:        Least-Connection Scheduling module
+ *
+ * Version:     $Id: ip_vs_lc.c,v 1.10 2003/04/18 09:03:16 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *     Wensong Zhang            :     added the ip_vs_lc_update_svc
+ *     Wensong Zhang            :     added any dest with weight=0 is quiesced
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+
+static int ip_vs_lc_init_svc(struct ip_vs_service *svc)
+{
+	return 0;
+}
+
+
+static int ip_vs_lc_done_svc(struct ip_vs_service *svc)
+{
+	return 0;
+}
+
+
+static int ip_vs_lc_update_svc(struct ip_vs_service *svc)
+{
+	return 0;
+}
+
+
+static inline unsigned int
+ip_vs_lc_dest_overhead(struct ip_vs_dest *dest)
+{
+	/*
+	 * We think the overhead of processing active connections is 256
+	 * times higher than that of inactive connections in average. (This
+	 * 256 times might not be accurate, we will change it later) We
+	 * use the following formula to estimate the overhead now:
+	 *		  dest->activeconns*256 + dest->inactconns
+	 */
+	return (atomic_read(&dest->activeconns) << 8) +
+		atomic_read(&dest->inactconns);
+}
+
+
+/*
+ *	Least Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+	struct ip_vs_dest *dest, *least = NULL;
+	unsigned int loh = 0, doh;
+
+	IP_VS_DBG(6, "ip_vs_lc_schedule(): Scheduling...\n");
+
+	/*
+	 * Simply select the server with the least number of
+	 *        (activeconns<<5) + inactconns
+	 * Except whose weight is equal to zero.
+	 * If the weight is equal to zero, it means that the server is
+	 * quiesced, the existing connections to the server still get
+	 * served, but no new connection is assigned to the server.
+	 */
+
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		if ((dest->flags & IP_VS_DEST_F_OVERLOAD) ||
+		    atomic_read(&dest->weight) == 0)
+			continue;
+		doh = ip_vs_lc_dest_overhead(dest);
+		if (!least || doh < loh) {
+			least = dest;
+			loh = doh;
+		}
+	}
+
+	if (least)
+	IP_VS_DBG(6, "LC: server %u.%u.%u.%u:%u activeconns %d inactconns %d\n",
+		  NIPQUAD(least->addr), ntohs(least->port),
+		  atomic_read(&least->activeconns),
+		  atomic_read(&least->inactconns));
+
+	return least;
+}
+
+
+static struct ip_vs_scheduler ip_vs_lc_scheduler = {
+	.name =			"lc",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.init_service =		ip_vs_lc_init_svc,
+	.done_service =		ip_vs_lc_done_svc,
+	.update_service =	ip_vs_lc_update_svc,
+	.schedule =		ip_vs_lc_schedule,
+};
+
+
+static int __init ip_vs_lc_init(void)
+{
+	INIT_LIST_HEAD(&ip_vs_lc_scheduler.n_list);
+	return register_ip_vs_scheduler(&ip_vs_lc_scheduler) ;
+}
+
+static void __exit ip_vs_lc_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_lc_scheduler);
+}
+
+module_init(ip_vs_lc_init);
+module_exit(ip_vs_lc_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ipvs/ip_vs_nq.c b/net/ipv4/ipvs/ip_vs_nq.c
new file mode 100644
index 00000000000..bc2a9e5f2a7
--- /dev/null
+++ b/net/ipv4/ipvs/ip_vs_nq.c
@@ -0,0 +1,161 @@
+/*
+ * IPVS:        Never Queue scheduling module
+ *
+ * Version:     $Id: ip_vs_nq.c,v 1.2 2003/06/08 09:31:19 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+/*
+ * The NQ algorithm adopts a two-speed model. When there is an idle server
+ * available, the job will be sent to the idle server, instead of waiting
+ * for a fast one. When there is no idle server available, the job will be
+ * sent to the server that minimize its expected delay (The Shortest
+ * Expected Delay scheduling algorithm).
+ *
+ * See the following paper for more information:
+ * A. Weinrib and S. Shenker, Greed is not enough: Adaptive load sharing
+ * in large heterogeneous systems. In Proceedings IEEE INFOCOM'88,
+ * pages 986-994, 1988.
+ *
+ * Thanks must go to Marko Buuri <marko@buuri.name> for talking NQ to me.
+ *
+ * The difference between NQ and SED is that NQ can improve overall
+ * system utilization.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+
+static int
+ip_vs_nq_init_svc(struct ip_vs_service *svc)
+{
+	return 0;
+}
+
+
+static int
+ip_vs_nq_done_svc(struct ip_vs_service *svc)
+{
+	return 0;
+}
+
+
+static int
+ip_vs_nq_update_svc(struct ip_vs_service *svc)
+{
+	return 0;
+}
+
+
+static inline unsigned int
+ip_vs_nq_dest_overhead(struct ip_vs_dest *dest)
+{
+	/*
+	 * We only use the active connection number in the cost
+	 * calculation here.
+	 */
+	return atomic_read(&dest->activeconns) + 1;
+}
+
+
+/*
+ *	Weighted Least Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+	struct ip_vs_dest *dest, *least = NULL;
+	unsigned int loh = 0, doh;
+
+	IP_VS_DBG(6, "ip_vs_nq_schedule(): Scheduling...\n");
+
+	/*
+	 * We calculate the load of each dest server as follows:
+	 *	(server expected overhead) / dest->weight
+	 *
+	 * Remember -- no floats in kernel mode!!!
+	 * The comparison of h1*w2 > h2*w1 is equivalent to that of
+	 *		  h1/w1 > h2/w2
+	 * if every weight is larger than zero.
+	 *
+	 * The server with weight=0 is quiesced and will not receive any
+	 * new connections.
+	 */
+
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD ||
+		    !atomic_read(&dest->weight))
+			continue;
+
+		doh = ip_vs_nq_dest_overhead(dest);
+
+		/* return the server directly if it is idle */
+		if (atomic_read(&dest->activeconns) == 0) {
+			least = dest;
+			loh = doh;
+			goto out;
+		}
+
+		if (!least ||
+		    (loh * atomic_read(&dest->weight) >
+		     doh * atomic_read(&least->weight))) {
+			least = dest;
+			loh = doh;
+		}
+	}
+
+	if (!least)
+		return NULL;
+
+  out:
+	IP_VS_DBG(6, "NQ: server %u.%u.%u.%u:%u "
+		  "activeconns %d refcnt %d weight %d overhead %d\n",
+		  NIPQUAD(least->addr), ntohs(least->port),
+		  atomic_read(&least->activeconns),
+		  atomic_read(&least->refcnt),
+		  atomic_read(&least->weight), loh);
+
+	return least;
+}
+
+
+static struct ip_vs_scheduler ip_vs_nq_scheduler =
+{
+	.name =			"nq",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.init_service =		ip_vs_nq_init_svc,
+	.done_service =		ip_vs_nq_done_svc,
+	.update_service =	ip_vs_nq_update_svc,
+	.schedule =		ip_vs_nq_schedule,
+};
+
+
+static int __init ip_vs_nq_init(void)
+{
+	INIT_LIST_HEAD(&ip_vs_nq_scheduler.n_list);
+	return register_ip_vs_scheduler(&ip_vs_nq_scheduler);
+}
+
+static void __exit ip_vs_nq_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_nq_scheduler);
+}
+
+module_init(ip_vs_nq_init);
+module_exit(ip_vs_nq_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c
new file mode 100644
index 00000000000..253c46252bd
--- /dev/null
+++ b/net/ipv4/ipvs/ip_vs_proto.c
@@ -0,0 +1,244 @@
+/*
+ * ip_vs_proto.c: transport protocol load balancing support for IPVS
+ *
+ * Version:     $Id: ip_vs_proto.c,v 1.2 2003/04/18 09:03:16 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Julian Anastasov <ja@ssi.bg>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <asm/system.h>
+#include <linux/stat.h>
+#include <linux/proc_fs.h>
+
+#include <net/ip_vs.h>
+
+
+/*
+ * IPVS protocols can only be registered/unregistered when the ipvs
+ * module is loaded/unloaded, so no lock is needed in accessing the
+ * ipvs protocol table.
+ */
+
+#define IP_VS_PROTO_TAB_SIZE		32	/* must be power of 2 */
+#define IP_VS_PROTO_HASH(proto)		((proto) & (IP_VS_PROTO_TAB_SIZE-1))
+
+static struct ip_vs_protocol *ip_vs_proto_table[IP_VS_PROTO_TAB_SIZE];
+
+
+/*
+ *	register an ipvs protocol
+ */
+static int register_ip_vs_protocol(struct ip_vs_protocol *pp)
+{
+	unsigned hash = IP_VS_PROTO_HASH(pp->protocol);
+
+	pp->next = ip_vs_proto_table[hash];
+	ip_vs_proto_table[hash] = pp;
+
+	if (pp->init != NULL)
+		pp->init(pp);
+
+	return 0;
+}
+
+
+/*
+ *	unregister an ipvs protocol
+ */
+static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp)
+{
+	struct ip_vs_protocol **pp_p;
+	unsigned hash = IP_VS_PROTO_HASH(pp->protocol);
+
+	pp_p = &ip_vs_proto_table[hash];
+	for (; *pp_p; pp_p = &(*pp_p)->next) {
+		if (*pp_p == pp) {
+			*pp_p = pp->next;
+			if (pp->exit != NULL)
+				pp->exit(pp);
+			return 0;
+		}
+	}
+
+	return -ESRCH;
+}
+
+
+/*
+ *	get ip_vs_protocol object by its proto.
+ */
+struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto)
+{
+	struct ip_vs_protocol *pp;
+	unsigned hash = IP_VS_PROTO_HASH(proto);
+
+	for (pp = ip_vs_proto_table[hash]; pp; pp = pp->next) {
+		if (pp->protocol == proto)
+			return pp;
+	}
+
+	return NULL;
+}
+
+
+/*
+ *	Propagate event for state change to all protocols
+ */
+void ip_vs_protocol_timeout_change(int flags)
+{
+	struct ip_vs_protocol *pp;
+	int i;
+
+	for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
+		for (pp = ip_vs_proto_table[i]; pp; pp = pp->next) {
+			if (pp->timeout_change)
+				pp->timeout_change(pp, flags);
+		}
+	}
+}
+
+
+int *
+ip_vs_create_timeout_table(int *table, int size)
+{
+	int *t;
+
+	t = kmalloc(size, GFP_ATOMIC);
+	if (t == NULL)
+		return NULL;
+	memcpy(t, table, size);
+	return t;
+}
+
+
+/*
+ *	Set timeout value for state specified by name
+ */
+int
+ip_vs_set_state_timeout(int *table, int num, char **names, char *name, int to)
+{
+	int i;
+
+	if (!table || !name || !to)
+		return -EINVAL;
+
+	for (i = 0; i < num; i++) {
+		if (strcmp(names[i], name))
+			continue;
+		table[i] = to * HZ;
+		return 0;
+	}
+	return -ENOENT;
+}
+
+
+const char * ip_vs_state_name(__u16 proto, int state)
+{
+	struct ip_vs_protocol *pp = ip_vs_proto_get(proto);
+
+	if (pp == NULL || pp->state_name == NULL)
+		return "ERR!";
+	return pp->state_name(state);
+}
+
+
+void
+ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp,
+			  const struct sk_buff *skb,
+			  int offset,
+			  const char *msg)
+{
+	char buf[128];
+	struct iphdr _iph, *ih;
+
+	ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
+	if (ih == NULL)
+		sprintf(buf, "%s TRUNCATED", pp->name);
+	else if (ih->frag_off & __constant_htons(IP_OFFSET))
+		sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u frag",
+			pp->name, NIPQUAD(ih->saddr),
+			NIPQUAD(ih->daddr));
+	else {
+		__u16 _ports[2], *pptr
+;
+		pptr = skb_header_pointer(skb, offset + ih->ihl*4,
+					  sizeof(_ports), _ports);
+		if (pptr == NULL)
+			sprintf(buf, "%s TRUNCATED %u.%u.%u.%u->%u.%u.%u.%u",
+				pp->name,
+				NIPQUAD(ih->saddr),
+				NIPQUAD(ih->daddr));
+		else
+			sprintf(buf, "%s %u.%u.%u.%u:%u->%u.%u.%u.%u:%u",
+				pp->name,
+				NIPQUAD(ih->saddr),
+				ntohs(pptr[0]),
+				NIPQUAD(ih->daddr),
+				ntohs(pptr[1]));
+	}
+
+	printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
+}
+
+
+int ip_vs_protocol_init(void)
+{
+	char protocols[64];
+#define REGISTER_PROTOCOL(p)			\
+	do {					\
+		register_ip_vs_protocol(p);	\
+		strcat(protocols, ", ");	\
+		strcat(protocols, (p)->name);	\
+	} while (0)
+
+	protocols[0] = '\0';
+	protocols[2] = '\0';
+#ifdef CONFIG_IP_VS_PROTO_TCP
+	REGISTER_PROTOCOL(&ip_vs_protocol_tcp);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_UDP
+	REGISTER_PROTOCOL(&ip_vs_protocol_udp);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_ICMP
+	REGISTER_PROTOCOL(&ip_vs_protocol_icmp);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_AH
+	REGISTER_PROTOCOL(&ip_vs_protocol_ah);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_ESP
+	REGISTER_PROTOCOL(&ip_vs_protocol_esp);
+#endif
+	IP_VS_INFO("Registered protocols (%s)\n", &protocols[2]);
+
+	return 0;
+}
+
+
+void ip_vs_protocol_cleanup(void)
+{
+	struct ip_vs_protocol *pp;
+	int i;
+
+	/* unregister all the ipvs protocols */
+	for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
+		while ((pp = ip_vs_proto_table[i]) != NULL)
+			unregister_ip_vs_protocol(pp);
+	}
+}
diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah.c
new file mode 100644
index 00000000000..453e94a0bbd
--- /dev/null
+++ b/net/ipv4/ipvs/ip_vs_proto_ah.c
@@ -0,0 +1,177 @@
+/*
+ * ip_vs_proto_ah.c:	AH IPSec load balancing support for IPVS
+ *
+ * Version:     $Id: ip_vs_proto_ah.c,v 1.1 2003/07/04 15:04:37 wensong Exp $
+ *
+ * Authors:	Julian Anastasov <ja@ssi.bg>, February 2002
+ *		Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		version 2 as published by the Free Software Foundation;
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+
+#include <net/ip_vs.h>
+
+
+/* TODO:
+
+struct isakmp_hdr {
+	__u8		icookie[8];
+	__u8		rcookie[8];
+	__u8		np;
+	__u8		version;
+	__u8		xchgtype;
+	__u8		flags;
+	__u32		msgid;
+	__u32		length;
+};
+
+*/
+
+#define PORT_ISAKMP	500
+
+
+static struct ip_vs_conn *
+ah_conn_in_get(const struct sk_buff *skb,
+	       struct ip_vs_protocol *pp,
+	       const struct iphdr *iph,
+	       unsigned int proto_off,
+	       int inverse)
+{
+	struct ip_vs_conn *cp;
+
+	if (likely(!inverse)) {
+		cp = ip_vs_conn_in_get(IPPROTO_UDP,
+				       iph->saddr,
+				       __constant_htons(PORT_ISAKMP),
+				       iph->daddr,
+				       __constant_htons(PORT_ISAKMP));
+	} else {
+		cp = ip_vs_conn_in_get(IPPROTO_UDP,
+				       iph->daddr,
+				       __constant_htons(PORT_ISAKMP),
+				       iph->saddr,
+				       __constant_htons(PORT_ISAKMP));
+	}
+
+	if (!cp) {
+		/*
+		 * We are not sure if the packet is from our
+		 * service, so our conn_schedule hook should return NF_ACCEPT
+		 */
+		IP_VS_DBG(12, "Unknown ISAKMP entry for outin packet "
+			  "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n",
+			  inverse ? "ICMP+" : "",
+			  pp->name,
+			  NIPQUAD(iph->saddr),
+			  NIPQUAD(iph->daddr));
+	}
+
+	return cp;
+}
+
+
+static struct ip_vs_conn *
+ah_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
+		const struct iphdr *iph, unsigned int proto_off, int inverse)
+{
+	struct ip_vs_conn *cp;
+
+	if (likely(!inverse)) {
+		cp = ip_vs_conn_out_get(IPPROTO_UDP,
+					iph->saddr,
+					__constant_htons(PORT_ISAKMP),
+					iph->daddr,
+					__constant_htons(PORT_ISAKMP));
+	} else {
+		cp = ip_vs_conn_out_get(IPPROTO_UDP,
+					iph->daddr,
+					__constant_htons(PORT_ISAKMP),
+					iph->saddr,
+					__constant_htons(PORT_ISAKMP));
+	}
+
+	if (!cp) {
+		IP_VS_DBG(12, "Unknown ISAKMP entry for inout packet "
+			  "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n",
+			  inverse ? "ICMP+" : "",
+			  pp->name,
+			  NIPQUAD(iph->saddr),
+			  NIPQUAD(iph->daddr));
+	}
+
+	return cp;
+}
+
+
+static int
+ah_conn_schedule(struct sk_buff *skb,
+		 struct ip_vs_protocol *pp,
+		 int *verdict, struct ip_vs_conn **cpp)
+{
+	/*
+	 * AH is only related traffic. Pass the packet to IP stack.
+	 */
+	*verdict = NF_ACCEPT;
+	return 0;
+}
+
+
+static void
+ah_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
+		int offset, const char *msg)
+{
+	char buf[256];
+	struct iphdr _iph, *ih;
+
+	ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
+	if (ih == NULL)
+		sprintf(buf, "%s TRUNCATED", pp->name);
+	else
+		sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u",
+			pp->name, NIPQUAD(ih->saddr),
+			NIPQUAD(ih->daddr));
+
+	printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
+}
+
+
+static void ah_init(struct ip_vs_protocol *pp)
+{
+	/* nothing to do now */
+}
+
+
+static void ah_exit(struct ip_vs_protocol *pp)
+{
+	/* nothing to do now */
+}
+
+
+struct ip_vs_protocol ip_vs_protocol_ah = {
+	.name =			"AH",
+	.protocol =		IPPROTO_AH,
+	.dont_defrag =		1,
+	.init =			ah_init,
+	.exit =			ah_exit,
+	.conn_schedule =	ah_conn_schedule,
+	.conn_in_get =		ah_conn_in_get,
+	.conn_out_get =		ah_conn_out_get,
+	.snat_handler =		NULL,
+	.dnat_handler =		NULL,
+	.csum_check =		NULL,
+	.state_transition =	NULL,
+	.register_app =		NULL,
+	.unregister_app =	NULL,
+	.app_conn_bind =	NULL,
+	.debug_packet =		ah_debug_packet,
+	.timeout_change =	NULL,		/* ISAKMP */
+	.set_state_timeout =	NULL,
+};
diff --git a/net/ipv4/ipvs/ip_vs_proto_esp.c b/net/ipv4/ipvs/ip_vs_proto_esp.c
new file mode 100644
index 00000000000..478e5c7c7e8
--- /dev/null
+++ b/net/ipv4/ipvs/ip_vs_proto_esp.c
@@ -0,0 +1,175 @@
+/*
+ * ip_vs_proto_esp.c:	ESP IPSec load balancing support for IPVS
+ *
+ * Version:     $Id: ip_vs_proto_esp.c,v 1.1 2003/07/04 15:04:37 wensong Exp $
+ *
+ * Authors:	Julian Anastasov <ja@ssi.bg>, February 2002
+ *		Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		version 2 as published by the Free Software Foundation;
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+
+#include <net/ip_vs.h>
+
+
+/* TODO:
+
+struct isakmp_hdr {
+	__u8		icookie[8];
+	__u8		rcookie[8];
+	__u8		np;
+	__u8		version;
+	__u8		xchgtype;
+	__u8		flags;
+	__u32		msgid;
+	__u32		length;
+};
+
+*/
+
+#define PORT_ISAKMP	500
+
+
+static struct ip_vs_conn *
+esp_conn_in_get(const struct sk_buff *skb,
+		struct ip_vs_protocol *pp,
+		const struct iphdr *iph,
+		unsigned int proto_off,
+		int inverse)
+{
+	struct ip_vs_conn *cp;
+
+	if (likely(!inverse)) {
+		cp = ip_vs_conn_in_get(IPPROTO_UDP,
+				       iph->saddr,
+				       __constant_htons(PORT_ISAKMP),
+				       iph->daddr,
+				       __constant_htons(PORT_ISAKMP));
+	} else {
+		cp = ip_vs_conn_in_get(IPPROTO_UDP,
+				       iph->daddr,
+				       __constant_htons(PORT_ISAKMP),
+				       iph->saddr,
+				       __constant_htons(PORT_ISAKMP));
+	}
+
+	if (!cp) {
+		/*
+		 * We are not sure if the packet is from our
+		 * service, so our conn_schedule hook should return NF_ACCEPT
+		 */
+		IP_VS_DBG(12, "Unknown ISAKMP entry for outin packet "
+			  "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n",
+			  inverse ? "ICMP+" : "",
+			  pp->name,
+			  NIPQUAD(iph->saddr),
+			  NIPQUAD(iph->daddr));
+	}
+
+	return cp;
+}
+
+
+static struct ip_vs_conn *
+esp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
+		 const struct iphdr *iph, unsigned int proto_off, int inverse)
+{
+	struct ip_vs_conn *cp;
+
+	if (likely(!inverse)) {
+		cp = ip_vs_conn_out_get(IPPROTO_UDP,
+					iph->saddr,
+					__constant_htons(PORT_ISAKMP),
+					iph->daddr,
+					__constant_htons(PORT_ISAKMP));
+	} else {
+		cp = ip_vs_conn_out_get(IPPROTO_UDP,
+					iph->daddr,
+					__constant_htons(PORT_ISAKMP),
+					iph->saddr,
+					__constant_htons(PORT_ISAKMP));
+	}
+
+	if (!cp) {
+		IP_VS_DBG(12, "Unknown ISAKMP entry for inout packet "
+			  "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n",
+			  inverse ? "ICMP+" : "",
+			  pp->name,
+			  NIPQUAD(iph->saddr),
+			  NIPQUAD(iph->daddr));
+	}
+
+	return cp;
+}
+
+
+static int
+esp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
+		  int *verdict, struct ip_vs_conn **cpp)
+{
+	/*
+	 * ESP is only related traffic. Pass the packet to IP stack.
+	 */
+	*verdict = NF_ACCEPT;
+	return 0;
+}
+
+
+static void
+esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
+		 int offset, const char *msg)
+{
+	char buf[256];
+	struct iphdr _iph, *ih;
+
+	ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
+	if (ih == NULL)
+		sprintf(buf, "%s TRUNCATED", pp->name);
+	else
+		sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u",
+			pp->name, NIPQUAD(ih->saddr),
+			NIPQUAD(ih->daddr));
+
+	printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
+}
+
+
+static void esp_init(struct ip_vs_protocol *pp)
+{
+	/* nothing to do now */
+}
+
+
+static void esp_exit(struct ip_vs_protocol *pp)
+{
+	/* nothing to do now */
+}
+
+
+struct ip_vs_protocol ip_vs_protocol_esp = {
+	.name =			"ESP",
+	.protocol =		IPPROTO_ESP,
+	.dont_defrag =		1,
+	.init =			esp_init,
+	.exit =			esp_exit,
+	.conn_schedule =	esp_conn_schedule,
+	.conn_in_get =		esp_conn_in_get,
+	.conn_out_get =		esp_conn_out_get,
+	.snat_handler =		NULL,
+	.dnat_handler =		NULL,
+	.csum_check =		NULL,
+	.state_transition =	NULL,
+	.register_app =		NULL,
+	.unregister_app =	NULL,
+	.app_conn_bind =	NULL,
+	.debug_packet =		esp_debug_packet,
+	.timeout_change =	NULL,		/* ISAKMP */
+};
diff --git a/net/ipv4/ipvs/ip_vs_proto_icmp.c b/net/ipv4/ipvs/ip_vs_proto_icmp.c
new file mode 100644
index 00000000000..191e94aa1c1
--- /dev/null
+++ b/net/ipv4/ipvs/ip_vs_proto_icmp.c
@@ -0,0 +1,182 @@
+/*
+ * ip_vs_proto_icmp.c:	ICMP load balancing support for IP Virtual Server
+ *
+ * Authors:	Julian Anastasov <ja@ssi.bg>, March 2002
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		version 2 as published by the Free Software Foundation;
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/icmp.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+
+#include <net/ip_vs.h>
+
+
+static int icmp_timeouts[1] =		{ 1*60*HZ };
+
+static char * icmp_state_name_table[1] = { "ICMP" };
+
+static struct ip_vs_conn *
+icmp_conn_in_get(const struct sk_buff *skb,
+		 struct ip_vs_protocol *pp,
+		 const struct iphdr *iph,
+		 unsigned int proto_off,
+		 int inverse)
+{
+#if 0
+	struct ip_vs_conn *cp;
+
+	if (likely(!inverse)) {
+		cp = ip_vs_conn_in_get(iph->protocol,
+			iph->saddr, 0,
+			iph->daddr, 0);
+	} else {
+		cp = ip_vs_conn_in_get(iph->protocol,
+			iph->daddr, 0,
+			iph->saddr, 0);
+	}
+
+	return cp;
+
+#else
+	return NULL;
+#endif
+}
+
+static struct ip_vs_conn *
+icmp_conn_out_get(const struct sk_buff *skb,
+		  struct ip_vs_protocol *pp,
+		  const struct iphdr *iph,
+		  unsigned int proto_off,
+		  int inverse)
+{
+#if 0
+	struct ip_vs_conn *cp;
+
+	if (likely(!inverse)) {
+		cp = ip_vs_conn_out_get(iph->protocol,
+			iph->saddr, 0,
+			iph->daddr, 0);
+	} else {
+		cp = ip_vs_conn_out_get(IPPROTO_UDP,
+			iph->daddr, 0,
+			iph->saddr, 0);
+	}
+
+	return cp;
+#else
+	return NULL;
+#endif
+}
+
+static int
+icmp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
+		   int *verdict, struct ip_vs_conn **cpp)
+{
+	*verdict = NF_ACCEPT;
+	return 0;
+}
+
+static int
+icmp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
+{
+	if (!(skb->nh.iph->frag_off & __constant_htons(IP_OFFSET))) {
+		if (skb->ip_summed != CHECKSUM_UNNECESSARY) {
+			if (ip_vs_checksum_complete(skb, skb->nh.iph->ihl * 4)) {
+				IP_VS_DBG_RL_PKT(0, pp, skb, 0, "Failed checksum for");
+				return 0;
+			}
+		}
+	}
+	return 1;
+}
+
+static void
+icmp_debug_packet(struct ip_vs_protocol *pp,
+		  const struct sk_buff *skb,
+		  int offset,
+		  const char *msg)
+{
+	char buf[256];
+	struct iphdr _iph, *ih;
+
+	ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
+	if (ih == NULL)
+		sprintf(buf, "%s TRUNCATED", pp->name);
+	else if (ih->frag_off & __constant_htons(IP_OFFSET))
+		sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u frag",
+			pp->name, NIPQUAD(ih->saddr),
+			NIPQUAD(ih->daddr));
+	else {
+		struct icmphdr _icmph, *ic;
+
+		ic = skb_header_pointer(skb, offset + ih->ihl*4,
+					sizeof(_icmph), &_icmph);
+		if (ic == NULL)
+			sprintf(buf, "%s TRUNCATED to %u bytes\n",
+				pp->name, skb->len - offset);
+		else
+			sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u T:%d C:%d",
+				pp->name, NIPQUAD(ih->saddr),
+				NIPQUAD(ih->daddr),
+				ic->type, ic->code);
+	}
+	printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
+}
+
+static int
+icmp_state_transition(struct ip_vs_conn *cp, int direction,
+		      const struct sk_buff *skb,
+		      struct ip_vs_protocol *pp)
+{
+	cp->timeout = pp->timeout_table[IP_VS_ICMP_S_NORMAL];
+	return 1;
+}
+
+static int
+icmp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
+{
+	int num;
+	char **names;
+
+	num = IP_VS_ICMP_S_LAST;
+	names = icmp_state_name_table;
+	return ip_vs_set_state_timeout(pp->timeout_table, num, names, sname, to);
+}
+
+
+static void icmp_init(struct ip_vs_protocol *pp)
+{
+	pp->timeout_table = icmp_timeouts;
+}
+
+static void icmp_exit(struct ip_vs_protocol *pp)
+{
+}
+
+struct ip_vs_protocol ip_vs_protocol_icmp = {
+	.name =			"ICMP",
+	.protocol =		IPPROTO_ICMP,
+	.dont_defrag =		0,
+	.init =			icmp_init,
+	.exit =			icmp_exit,
+	.conn_schedule =	icmp_conn_schedule,
+	.conn_in_get =		icmp_conn_in_get,
+	.conn_out_get =		icmp_conn_out_get,
+	.snat_handler =		NULL,
+	.dnat_handler =		NULL,
+	.csum_check =		icmp_csum_check,
+	.state_transition =	icmp_state_transition,
+	.register_app =		NULL,
+	.unregister_app =	NULL,
+	.app_conn_bind =	NULL,
+	.debug_packet =		icmp_debug_packet,
+	.timeout_change =	NULL,
+	.set_state_timeout =	icmp_set_state_timeout,
+};
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
new file mode 100644
index 00000000000..e65de675da7
--- /dev/null
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -0,0 +1,640 @@
+/*
+ * ip_vs_proto_tcp.c:	TCP load balancing support for IPVS
+ *
+ * Version:     $Id: ip_vs_proto_tcp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Julian Anastasov <ja@ssi.bg>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>                  /* for tcphdr */
+#include <net/ip.h>
+#include <net/tcp.h>                    /* for csum_tcpudp_magic */
+#include <linux/netfilter_ipv4.h>
+
+#include <net/ip_vs.h>
+
+
+static struct ip_vs_conn *
+tcp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
+		const struct iphdr *iph, unsigned int proto_off, int inverse)
+{
+	__u16 _ports[2], *pptr;
+
+	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
+	if (pptr == NULL)
+		return NULL;
+
+	if (likely(!inverse)) {
+		return ip_vs_conn_in_get(iph->protocol,
+					 iph->saddr, pptr[0],
+					 iph->daddr, pptr[1]);
+	} else {
+		return ip_vs_conn_in_get(iph->protocol,
+					 iph->daddr, pptr[1],
+					 iph->saddr, pptr[0]);
+	}
+}
+
+static struct ip_vs_conn *
+tcp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
+		 const struct iphdr *iph, unsigned int proto_off, int inverse)
+{
+	__u16 _ports[2], *pptr;
+
+	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
+	if (pptr == NULL)
+		return NULL;
+
+	if (likely(!inverse)) {
+		return ip_vs_conn_out_get(iph->protocol,
+					  iph->saddr, pptr[0],
+					  iph->daddr, pptr[1]);
+	} else {
+		return ip_vs_conn_out_get(iph->protocol,
+					  iph->daddr, pptr[1],
+					  iph->saddr, pptr[0]);
+	}
+}
+
+
+static int
+tcp_conn_schedule(struct sk_buff *skb,
+		  struct ip_vs_protocol *pp,
+		  int *verdict, struct ip_vs_conn **cpp)
+{
+	struct ip_vs_service *svc;
+	struct tcphdr _tcph, *th;
+
+	th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
+				sizeof(_tcph), &_tcph);
+	if (th == NULL) {
+		*verdict = NF_DROP;
+		return 0;
+	}
+
+	if (th->syn &&
+	    (svc = ip_vs_service_get(skb->nfmark, skb->nh.iph->protocol,
+				     skb->nh.iph->daddr, th->dest))) {
+		if (ip_vs_todrop()) {
+			/*
+			 * It seems that we are very loaded.
+			 * We have to drop this packet :(
+			 */
+			ip_vs_service_put(svc);
+			*verdict = NF_DROP;
+			return 0;
+		}
+
+		/*
+		 * Let the virtual server select a real server for the
+		 * incoming connection, and create a connection entry.
+		 */
+		*cpp = ip_vs_schedule(svc, skb);
+		if (!*cpp) {
+			*verdict = ip_vs_leave(svc, skb, pp);
+			return 0;
+		}
+		ip_vs_service_put(svc);
+	}
+	return 1;
+}
+
+
+static inline void
+tcp_fast_csum_update(struct tcphdr *tcph, u32 oldip, u32 newip,
+		     u16 oldport, u16 newport)
+{
+	tcph->check =
+		ip_vs_check_diff(~oldip, newip,
+				 ip_vs_check_diff(oldport ^ 0xFFFF,
+						  newport, tcph->check));
+}
+
+
+static int
+tcp_snat_handler(struct sk_buff **pskb,
+		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
+{
+	struct tcphdr *tcph;
+	unsigned int tcphoff = (*pskb)->nh.iph->ihl * 4;
+
+	/* csum_check requires unshared skb */
+	if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
+		return 0;
+
+	if (unlikely(cp->app != NULL)) {
+		/* Some checks before mangling */
+		if (pp->csum_check && !pp->csum_check(*pskb, pp))
+			return 0;
+
+		/* Call application helper if needed */
+		if (!ip_vs_app_pkt_out(cp, pskb))
+			return 0;
+	}
+
+	tcph = (void *)(*pskb)->nh.iph + tcphoff;
+	tcph->source = cp->vport;
+
+	/* Adjust TCP checksums */
+	if (!cp->app) {
+		/* Only port and addr are changed, do fast csum update */
+		tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr,
+				     cp->dport, cp->vport);
+		if ((*pskb)->ip_summed == CHECKSUM_HW)
+			(*pskb)->ip_summed = CHECKSUM_NONE;
+	} else {
+		/* full checksum calculation */
+		tcph->check = 0;
+		(*pskb)->csum = skb_checksum(*pskb, tcphoff,
+					     (*pskb)->len - tcphoff, 0);
+		tcph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
+						(*pskb)->len - tcphoff,
+						cp->protocol,
+						(*pskb)->csum);
+		IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
+			  pp->name, tcph->check,
+			  (char*)&(tcph->check) - (char*)tcph);
+	}
+	return 1;
+}
+
+
+static int
+tcp_dnat_handler(struct sk_buff **pskb,
+		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
+{
+	struct tcphdr *tcph;
+	unsigned int tcphoff = (*pskb)->nh.iph->ihl * 4;
+
+	/* csum_check requires unshared skb */
+	if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
+		return 0;
+
+	if (unlikely(cp->app != NULL)) {
+		/* Some checks before mangling */
+		if (pp->csum_check && !pp->csum_check(*pskb, pp))
+			return 0;
+
+		/*
+		 *	Attempt ip_vs_app call.
+		 *	It will fix ip_vs_conn and iph ack_seq stuff
+		 */
+		if (!ip_vs_app_pkt_in(cp, pskb))
+			return 0;
+	}
+
+	tcph = (void *)(*pskb)->nh.iph + tcphoff;
+	tcph->dest = cp->dport;
+
+	/*
+	 *	Adjust TCP checksums
+	 */
+	if (!cp->app) {
+		/* Only port and addr are changed, do fast csum update */
+		tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr,
+				     cp->vport, cp->dport);
+		if ((*pskb)->ip_summed == CHECKSUM_HW)
+			(*pskb)->ip_summed = CHECKSUM_NONE;
+	} else {
+		/* full checksum calculation */
+		tcph->check = 0;
+		(*pskb)->csum = skb_checksum(*pskb, tcphoff,
+					     (*pskb)->len - tcphoff, 0);
+		tcph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
+						(*pskb)->len - tcphoff,
+						cp->protocol,
+						(*pskb)->csum);
+		(*pskb)->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+	return 1;
+}
+
+
+static int
+tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
+{
+	unsigned int tcphoff = skb->nh.iph->ihl*4;
+
+	switch (skb->ip_summed) {
+	case CHECKSUM_NONE:
+		skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
+	case CHECKSUM_HW:
+		if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr,
+				      skb->len - tcphoff,
+				      skb->nh.iph->protocol, skb->csum)) {
+			IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+					 "Failed checksum for");
+			return 0;
+		}
+		break;
+	default:
+		/* CHECKSUM_UNNECESSARY */
+		break;
+	}
+
+	return 1;
+}
+
+
+#define TCP_DIR_INPUT		0
+#define TCP_DIR_OUTPUT		4
+#define TCP_DIR_INPUT_ONLY	8
+
+static int tcp_state_off[IP_VS_DIR_LAST] = {
+	[IP_VS_DIR_INPUT]		=	TCP_DIR_INPUT,
+	[IP_VS_DIR_OUTPUT]		=	TCP_DIR_OUTPUT,
+	[IP_VS_DIR_INPUT_ONLY]		=	TCP_DIR_INPUT_ONLY,
+};
+
+/*
+ *	Timeout table[state]
+ */
+static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
+	[IP_VS_TCP_S_NONE]		=	2*HZ,
+	[IP_VS_TCP_S_ESTABLISHED]	=	15*60*HZ,
+	[IP_VS_TCP_S_SYN_SENT]		=	2*60*HZ,
+	[IP_VS_TCP_S_SYN_RECV]		=	1*60*HZ,
+	[IP_VS_TCP_S_FIN_WAIT]		=	2*60*HZ,
+	[IP_VS_TCP_S_TIME_WAIT]		=	2*60*HZ,
+	[IP_VS_TCP_S_CLOSE]		=	10*HZ,
+	[IP_VS_TCP_S_CLOSE_WAIT]	=	60*HZ,
+	[IP_VS_TCP_S_LAST_ACK]		=	30*HZ,
+	[IP_VS_TCP_S_LISTEN]		=	2*60*HZ,
+	[IP_VS_TCP_S_SYNACK]		=	120*HZ,
+	[IP_VS_TCP_S_LAST]		=	2*HZ,
+};
+
+
+#if 0
+
+/* FIXME: This is going to die */
+
+static int tcp_timeouts_dos[IP_VS_TCP_S_LAST+1] = {
+	[IP_VS_TCP_S_NONE]		=	2*HZ,
+	[IP_VS_TCP_S_ESTABLISHED]	=	8*60*HZ,
+	[IP_VS_TCP_S_SYN_SENT]		=	60*HZ,
+	[IP_VS_TCP_S_SYN_RECV]		=	10*HZ,
+	[IP_VS_TCP_S_FIN_WAIT]		=	60*HZ,
+	[IP_VS_TCP_S_TIME_WAIT]		=	60*HZ,
+	[IP_VS_TCP_S_CLOSE]		=	10*HZ,
+	[IP_VS_TCP_S_CLOSE_WAIT]	=	60*HZ,
+	[IP_VS_TCP_S_LAST_ACK]		=	30*HZ,
+	[IP_VS_TCP_S_LISTEN]		=	2*60*HZ,
+	[IP_VS_TCP_S_SYNACK]		=	100*HZ,
+	[IP_VS_TCP_S_LAST]		=	2*HZ,
+};
+
+#endif
+
+static char * tcp_state_name_table[IP_VS_TCP_S_LAST+1] = {
+	[IP_VS_TCP_S_NONE]		=	"NONE",
+	[IP_VS_TCP_S_ESTABLISHED]	=	"ESTABLISHED",
+	[IP_VS_TCP_S_SYN_SENT]		=	"SYN_SENT",
+	[IP_VS_TCP_S_SYN_RECV]		=	"SYN_RECV",
+	[IP_VS_TCP_S_FIN_WAIT]		=	"FIN_WAIT",
+	[IP_VS_TCP_S_TIME_WAIT]		=	"TIME_WAIT",
+	[IP_VS_TCP_S_CLOSE]		=	"CLOSE",
+	[IP_VS_TCP_S_CLOSE_WAIT]	=	"CLOSE_WAIT",
+	[IP_VS_TCP_S_LAST_ACK]		=	"LAST_ACK",
+	[IP_VS_TCP_S_LISTEN]		=	"LISTEN",
+	[IP_VS_TCP_S_SYNACK]		=	"SYNACK",
+	[IP_VS_TCP_S_LAST]		=	"BUG!",
+};
+
+#define sNO IP_VS_TCP_S_NONE
+#define sES IP_VS_TCP_S_ESTABLISHED
+#define sSS IP_VS_TCP_S_SYN_SENT
+#define sSR IP_VS_TCP_S_SYN_RECV
+#define sFW IP_VS_TCP_S_FIN_WAIT
+#define sTW IP_VS_TCP_S_TIME_WAIT
+#define sCL IP_VS_TCP_S_CLOSE
+#define sCW IP_VS_TCP_S_CLOSE_WAIT
+#define sLA IP_VS_TCP_S_LAST_ACK
+#define sLI IP_VS_TCP_S_LISTEN
+#define sSA IP_VS_TCP_S_SYNACK
+
+struct tcp_states_t {
+	int next_state[IP_VS_TCP_S_LAST];
+};
+
+static const char * tcp_state_name(int state)
+{
+	if (state >= IP_VS_TCP_S_LAST)
+		return "ERR!";
+	return tcp_state_name_table[state] ? tcp_state_name_table[state] : "?";
+}
+
+static struct tcp_states_t tcp_states [] = {
+/*	INPUT */
+/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
+/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
+/*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sTW }},
+/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
+/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sSR }},
+
+/*	OUTPUT */
+/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
+/*syn*/ {{sSS, sES, sSS, sSR, sSS, sSS, sSS, sSS, sSS, sLI, sSR }},
+/*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
+/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
+/*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
+
+/*	INPUT-ONLY */
+/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
+/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
+/*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
+/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
+/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
+};
+
+static struct tcp_states_t tcp_states_dos [] = {
+/*	INPUT */
+/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
+/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }},
+/*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sSA }},
+/*ack*/ {{sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }},
+/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
+
+/*	OUTPUT */
+/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
+/*syn*/ {{sSS, sES, sSS, sSA, sSS, sSS, sSS, sSS, sSS, sLI, sSA }},
+/*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
+/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
+/*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
+
+/*	INPUT-ONLY */
+/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA	*/
+/*syn*/ {{sSA, sES, sES, sSR, sSA, sSA, sSA, sSA, sSA, sSA, sSA }},
+/*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
+/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
+/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
+};
+
+static struct tcp_states_t *tcp_state_table = tcp_states;
+
+
+static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
+{
+	int on = (flags & 1);		/* secure_tcp */
+
+	/*
+	** FIXME: change secure_tcp to independent sysctl var
+	** or make it per-service or per-app because it is valid
+	** for most if not for all of the applications. Something
+	** like "capabilities" (flags) for each object.
+	*/
+	tcp_state_table = (on? tcp_states_dos : tcp_states);
+}
+
+static int
+tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
+{
+	return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST,
+				       tcp_state_name_table, sname, to);
+}
+
+static inline int tcp_state_idx(struct tcphdr *th)
+{
+	if (th->rst)
+		return 3;
+	if (th->syn)
+		return 0;
+	if (th->fin)
+		return 1;
+	if (th->ack)
+		return 2;
+	return -1;
+}
+
+static inline void
+set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
+	      int direction, struct tcphdr *th)
+{
+	int state_idx;
+	int new_state = IP_VS_TCP_S_CLOSE;
+	int state_off = tcp_state_off[direction];
+
+	/*
+	 *    Update state offset to INPUT_ONLY if necessary
+	 *    or delete NO_OUTPUT flag if output packet detected
+	 */
+	if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
+		if (state_off == TCP_DIR_OUTPUT)
+			cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
+		else
+			state_off = TCP_DIR_INPUT_ONLY;
+	}
+
+	if ((state_idx = tcp_state_idx(th)) < 0) {
+		IP_VS_DBG(8, "tcp_state_idx=%d!!!\n", state_idx);
+		goto tcp_state_out;
+	}
+
+	new_state = tcp_state_table[state_off+state_idx].next_state[cp->state];
+
+  tcp_state_out:
+	if (new_state != cp->state) {
+		struct ip_vs_dest *dest = cp->dest;
+
+		IP_VS_DBG(8, "%s %s [%c%c%c%c] %u.%u.%u.%u:%d->"
+			  "%u.%u.%u.%u:%d state: %s->%s cnt:%d\n",
+			  pp->name,
+			  (state_off==TCP_DIR_OUTPUT)?"output ":"input ",
+			  th->syn? 'S' : '.',
+			  th->fin? 'F' : '.',
+			  th->ack? 'A' : '.',
+			  th->rst? 'R' : '.',
+			  NIPQUAD(cp->daddr), ntohs(cp->dport),
+			  NIPQUAD(cp->caddr), ntohs(cp->cport),
+			  tcp_state_name(cp->state),
+			  tcp_state_name(new_state),
+			  atomic_read(&cp->refcnt));
+		if (dest) {
+			if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
+			    (new_state != IP_VS_TCP_S_ESTABLISHED)) {
+				atomic_dec(&dest->activeconns);
+				atomic_inc(&dest->inactconns);
+				cp->flags |= IP_VS_CONN_F_INACTIVE;
+			} else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
+				   (new_state == IP_VS_TCP_S_ESTABLISHED)) {
+				atomic_inc(&dest->activeconns);
+				atomic_dec(&dest->inactconns);
+				cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+			}
+		}
+	}
+
+	cp->timeout = pp->timeout_table[cp->state = new_state];
+}
+
+
+/*
+ *	Handle state transitions
+ */
+static int
+tcp_state_transition(struct ip_vs_conn *cp, int direction,
+		     const struct sk_buff *skb,
+		     struct ip_vs_protocol *pp)
+{
+	struct tcphdr _tcph, *th;
+
+	th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
+				sizeof(_tcph), &_tcph);
+	if (th == NULL)
+		return 0;
+
+	spin_lock(&cp->lock);
+	set_tcp_state(pp, cp, direction, th);
+	spin_unlock(&cp->lock);
+
+	return 1;
+}
+
+
+/*
+ *	Hash table for TCP application incarnations
+ */
+#define	TCP_APP_TAB_BITS	4
+#define	TCP_APP_TAB_SIZE	(1 << TCP_APP_TAB_BITS)
+#define	TCP_APP_TAB_MASK	(TCP_APP_TAB_SIZE - 1)
+
+static struct list_head tcp_apps[TCP_APP_TAB_SIZE];
+static DEFINE_SPINLOCK(tcp_app_lock);
+
+static inline __u16 tcp_app_hashkey(__u16 port)
+{
+	return ((port >> TCP_APP_TAB_BITS) ^ port) & TCP_APP_TAB_MASK;
+}
+
+
+static int tcp_register_app(struct ip_vs_app *inc)
+{
+	struct ip_vs_app *i;
+	__u16 hash, port = inc->port;
+	int ret = 0;
+
+	hash = tcp_app_hashkey(port);
+
+	spin_lock_bh(&tcp_app_lock);
+	list_for_each_entry(i, &tcp_apps[hash], p_list) {
+		if (i->port == port) {
+			ret = -EEXIST;
+			goto out;
+		}
+	}
+	list_add(&inc->p_list, &tcp_apps[hash]);
+	atomic_inc(&ip_vs_protocol_tcp.appcnt);
+
+  out:
+	spin_unlock_bh(&tcp_app_lock);
+	return ret;
+}
+
+
+static void
+tcp_unregister_app(struct ip_vs_app *inc)
+{
+	spin_lock_bh(&tcp_app_lock);
+	atomic_dec(&ip_vs_protocol_tcp.appcnt);
+	list_del(&inc->p_list);
+	spin_unlock_bh(&tcp_app_lock);
+}
+
+
+static int
+tcp_app_conn_bind(struct ip_vs_conn *cp)
+{
+	int hash;
+	struct ip_vs_app *inc;
+	int result = 0;
+
+	/* Default binding: bind app only for NAT */
+	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
+		return 0;
+
+	/* Lookup application incarnations and bind the right one */
+	hash = tcp_app_hashkey(cp->vport);
+
+	spin_lock(&tcp_app_lock);
+	list_for_each_entry(inc, &tcp_apps[hash], p_list) {
+		if (inc->port == cp->vport) {
+			if (unlikely(!ip_vs_app_inc_get(inc)))
+				break;
+			spin_unlock(&tcp_app_lock);
+
+			IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
+				  "%u.%u.%u.%u:%u to app %s on port %u\n",
+				  __FUNCTION__,
+				  NIPQUAD(cp->caddr), ntohs(cp->cport),
+				  NIPQUAD(cp->vaddr), ntohs(cp->vport),
+				  inc->name, ntohs(inc->port));
+			cp->app = inc;
+			if (inc->init_conn)
+				result = inc->init_conn(inc, cp);
+			goto out;
+		}
+	}
+	spin_unlock(&tcp_app_lock);
+
+  out:
+	return result;
+}
+
+
+/*
+ *	Set LISTEN timeout. (ip_vs_conn_put will setup timer)
+ */
+void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
+{
+	spin_lock(&cp->lock);
+	cp->state = IP_VS_TCP_S_LISTEN;
+	cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN];
+	spin_unlock(&cp->lock);
+}
+
+
+static void tcp_init(struct ip_vs_protocol *pp)
+{
+	IP_VS_INIT_HASH_TABLE(tcp_apps);
+	pp->timeout_table = tcp_timeouts;
+}
+
+
+static void tcp_exit(struct ip_vs_protocol *pp)
+{
+}
+
+
+struct ip_vs_protocol ip_vs_protocol_tcp = {
+	.name =			"TCP",
+	.protocol =		IPPROTO_TCP,
+	.dont_defrag =		0,
+	.appcnt =		ATOMIC_INIT(0),
+	.init =			tcp_init,
+	.exit =			tcp_exit,
+	.register_app =		tcp_register_app,
+	.unregister_app =	tcp_unregister_app,
+	.conn_schedule =	tcp_conn_schedule,
+	.conn_in_get =		tcp_conn_in_get,
+	.conn_out_get =		tcp_conn_out_get,
+	.snat_handler =		tcp_snat_handler,
+	.dnat_handler =		tcp_dnat_handler,
+	.csum_check =		tcp_csum_check,
+	.state_name =		tcp_state_name,
+	.state_transition =	tcp_state_transition,
+	.app_conn_bind =	tcp_app_conn_bind,
+	.debug_packet =		ip_vs_tcpudp_debug_packet,
+	.timeout_change =	tcp_timeout_change,
+	.set_state_timeout =	tcp_set_state_timeout,
+};
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
new file mode 100644
index 00000000000..8ae5f2e0aef
--- /dev/null
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -0,0 +1,427 @@
+/*
+ * ip_vs_proto_udp.c:	UDP load balancing support for IPVS
+ *
+ * Version:     $Id: ip_vs_proto_udp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Julian Anastasov <ja@ssi.bg>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/netfilter_ipv4.h>
+
+#include <net/ip_vs.h>
+
+
+static struct ip_vs_conn *
+udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
+		const struct iphdr *iph, unsigned int proto_off, int inverse)
+{
+	struct ip_vs_conn *cp;
+	__u16 _ports[2], *pptr;
+
+	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
+	if (pptr == NULL)
+		return NULL;
+
+	if (likely(!inverse)) {
+		cp = ip_vs_conn_in_get(iph->protocol,
+				       iph->saddr, pptr[0],
+				       iph->daddr, pptr[1]);
+	} else {
+		cp = ip_vs_conn_in_get(iph->protocol,
+				       iph->daddr, pptr[1],
+				       iph->saddr, pptr[0]);
+	}
+
+	return cp;
+}
+
+
+static struct ip_vs_conn *
+udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
+		 const struct iphdr *iph, unsigned int proto_off, int inverse)
+{
+	struct ip_vs_conn *cp;
+	__u16 _ports[2], *pptr;
+
+	pptr = skb_header_pointer(skb, skb->nh.iph->ihl*4,
+				  sizeof(_ports), _ports);
+	if (pptr == NULL)
+		return NULL;
+
+	if (likely(!inverse)) {
+		cp = ip_vs_conn_out_get(iph->protocol,
+					iph->saddr, pptr[0],
+					iph->daddr, pptr[1]);
+	} else {
+		cp = ip_vs_conn_out_get(iph->protocol,
+					iph->daddr, pptr[1],
+					iph->saddr, pptr[0]);
+	}
+
+	return cp;
+}
+
+
+static int
+udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
+		  int *verdict, struct ip_vs_conn **cpp)
+{
+	struct ip_vs_service *svc;
+	struct udphdr _udph, *uh;
+
+	uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
+				sizeof(_udph), &_udph);
+	if (uh == NULL) {
+		*verdict = NF_DROP;
+		return 0;
+	}
+
+	if ((svc = ip_vs_service_get(skb->nfmark, skb->nh.iph->protocol,
+				     skb->nh.iph->daddr, uh->dest))) {
+		if (ip_vs_todrop()) {
+			/*
+			 * It seems that we are very loaded.
+			 * We have to drop this packet :(
+			 */
+			ip_vs_service_put(svc);
+			*verdict = NF_DROP;
+			return 0;
+		}
+
+		/*
+		 * Let the virtual server select a real server for the
+		 * incoming connection, and create a connection entry.
+		 */
+		*cpp = ip_vs_schedule(svc, skb);
+		if (!*cpp) {
+			*verdict = ip_vs_leave(svc, skb, pp);
+			return 0;
+		}
+		ip_vs_service_put(svc);
+	}
+	return 1;
+}
+
+
+static inline void
+udp_fast_csum_update(struct udphdr *uhdr, u32 oldip, u32 newip,
+		     u16 oldport, u16 newport)
+{
+	uhdr->check =
+		ip_vs_check_diff(~oldip, newip,
+				 ip_vs_check_diff(oldport ^ 0xFFFF,
+						  newport, uhdr->check));
+	if (!uhdr->check)
+		uhdr->check = 0xFFFF;
+}
+
+static int
+udp_snat_handler(struct sk_buff **pskb,
+		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
+{
+	struct udphdr *udph;
+	unsigned int udphoff = (*pskb)->nh.iph->ihl * 4;
+
+	/* csum_check requires unshared skb */
+	if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
+		return 0;
+
+	if (unlikely(cp->app != NULL)) {
+		/* Some checks before mangling */
+		if (pp->csum_check && !pp->csum_check(*pskb, pp))
+			return 0;
+
+		/*
+		 *	Call application helper if needed
+		 */
+		if (!ip_vs_app_pkt_out(cp, pskb))
+			return 0;
+	}
+
+	udph = (void *)(*pskb)->nh.iph + udphoff;
+	udph->source = cp->vport;
+
+	/*
+	 *	Adjust UDP checksums
+	 */
+	if (!cp->app && (udph->check != 0)) {
+		/* Only port and addr are changed, do fast csum update */
+		udp_fast_csum_update(udph, cp->daddr, cp->vaddr,
+				     cp->dport, cp->vport);
+		if ((*pskb)->ip_summed == CHECKSUM_HW)
+			(*pskb)->ip_summed = CHECKSUM_NONE;
+	} else {
+		/* full checksum calculation */
+		udph->check = 0;
+		(*pskb)->csum = skb_checksum(*pskb, udphoff,
+					     (*pskb)->len - udphoff, 0);
+		udph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
+						(*pskb)->len - udphoff,
+						cp->protocol,
+						(*pskb)->csum);
+		if (udph->check == 0)
+			udph->check = 0xFFFF;
+		IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
+			  pp->name, udph->check,
+			  (char*)&(udph->check) - (char*)udph);
+	}
+	return 1;
+}
+
+
+static int
+udp_dnat_handler(struct sk_buff **pskb,
+		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
+{
+	struct udphdr *udph;
+	unsigned int udphoff = (*pskb)->nh.iph->ihl * 4;
+
+	/* csum_check requires unshared skb */
+	if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
+		return 0;
+
+	if (unlikely(cp->app != NULL)) {
+		/* Some checks before mangling */
+		if (pp->csum_check && !pp->csum_check(*pskb, pp))
+			return 0;
+
+		/*
+		 *	Attempt ip_vs_app call.
+		 *	It will fix ip_vs_conn
+		 */
+		if (!ip_vs_app_pkt_in(cp, pskb))
+			return 0;
+	}
+
+	udph = (void *)(*pskb)->nh.iph + udphoff;
+	udph->dest = cp->dport;
+
+	/*
+	 *	Adjust UDP checksums
+	 */
+	if (!cp->app && (udph->check != 0)) {
+		/* Only port and addr are changed, do fast csum update */
+		udp_fast_csum_update(udph, cp->vaddr, cp->daddr,
+				     cp->vport, cp->dport);
+		if ((*pskb)->ip_summed == CHECKSUM_HW)
+			(*pskb)->ip_summed = CHECKSUM_NONE;
+	} else {
+		/* full checksum calculation */
+		udph->check = 0;
+		(*pskb)->csum = skb_checksum(*pskb, udphoff,
+					     (*pskb)->len - udphoff, 0);
+		udph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
+						(*pskb)->len - udphoff,
+						cp->protocol,
+						(*pskb)->csum);
+		if (udph->check == 0)
+			udph->check = 0xFFFF;
+		(*pskb)->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+	return 1;
+}
+
+
+static int
+udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
+{
+	struct udphdr _udph, *uh;
+	unsigned int udphoff = skb->nh.iph->ihl*4;
+
+	uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
+	if (uh == NULL)
+		return 0;
+
+	if (uh->check != 0) {
+		switch (skb->ip_summed) {
+		case CHECKSUM_NONE:
+			skb->csum = skb_checksum(skb, udphoff,
+						 skb->len - udphoff, 0);
+		case CHECKSUM_HW:
+			if (csum_tcpudp_magic(skb->nh.iph->saddr,
+					      skb->nh.iph->daddr,
+					      skb->len - udphoff,
+					      skb->nh.iph->protocol,
+					      skb->csum)) {
+				IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+						 "Failed checksum for");
+				return 0;
+			}
+			break;
+		default:
+			/* CHECKSUM_UNNECESSARY */
+			break;
+		}
+	}
+	return 1;
+}
+
+
+/*
+ *	Note: the caller guarantees that only one of register_app,
+ *	unregister_app or app_conn_bind is called each time.
+ */
+
+#define	UDP_APP_TAB_BITS	4
+#define	UDP_APP_TAB_SIZE	(1 << UDP_APP_TAB_BITS)
+#define	UDP_APP_TAB_MASK	(UDP_APP_TAB_SIZE - 1)
+
+static struct list_head udp_apps[UDP_APP_TAB_SIZE];
+static DEFINE_SPINLOCK(udp_app_lock);
+
+static inline __u16 udp_app_hashkey(__u16 port)
+{
+	return ((port >> UDP_APP_TAB_BITS) ^ port) & UDP_APP_TAB_MASK;
+}
+
+
+static int udp_register_app(struct ip_vs_app *inc)
+{
+	struct ip_vs_app *i;
+	__u16 hash, port = inc->port;
+	int ret = 0;
+
+	hash = udp_app_hashkey(port);
+
+
+	spin_lock_bh(&udp_app_lock);
+	list_for_each_entry(i, &udp_apps[hash], p_list) {
+		if (i->port == port) {
+			ret = -EEXIST;
+			goto out;
+		}
+	}
+	list_add(&inc->p_list, &udp_apps[hash]);
+	atomic_inc(&ip_vs_protocol_udp.appcnt);
+
+  out:
+	spin_unlock_bh(&udp_app_lock);
+	return ret;
+}
+
+
+static void
+udp_unregister_app(struct ip_vs_app *inc)
+{
+	spin_lock_bh(&udp_app_lock);
+	atomic_dec(&ip_vs_protocol_udp.appcnt);
+	list_del(&inc->p_list);
+	spin_unlock_bh(&udp_app_lock);
+}
+
+
+static int udp_app_conn_bind(struct ip_vs_conn *cp)
+{
+	int hash;
+	struct ip_vs_app *inc;
+	int result = 0;
+
+	/* Default binding: bind app only for NAT */
+	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
+		return 0;
+
+	/* Lookup application incarnations and bind the right one */
+	hash = udp_app_hashkey(cp->vport);
+
+	spin_lock(&udp_app_lock);
+	list_for_each_entry(inc, &udp_apps[hash], p_list) {
+		if (inc->port == cp->vport) {
+			if (unlikely(!ip_vs_app_inc_get(inc)))
+				break;
+			spin_unlock(&udp_app_lock);
+
+			IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
+				  "%u.%u.%u.%u:%u to app %s on port %u\n",
+				  __FUNCTION__,
+				  NIPQUAD(cp->caddr), ntohs(cp->cport),
+				  NIPQUAD(cp->vaddr), ntohs(cp->vport),
+				  inc->name, ntohs(inc->port));
+			cp->app = inc;
+			if (inc->init_conn)
+				result = inc->init_conn(inc, cp);
+			goto out;
+		}
+	}
+	spin_unlock(&udp_app_lock);
+
+  out:
+	return result;
+}
+
+
+static int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
+	[IP_VS_UDP_S_NORMAL]		=	5*60*HZ,
+	[IP_VS_UDP_S_LAST]		=	2*HZ,
+};
+
+static char * udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
+	[IP_VS_UDP_S_NORMAL]		=	"UDP",
+	[IP_VS_UDP_S_LAST]		=	"BUG!",
+};
+
+
+static int
+udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
+{
+	return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
+				       udp_state_name_table, sname, to);
+}
+
+static const char * udp_state_name(int state)
+{
+	if (state >= IP_VS_UDP_S_LAST)
+		return "ERR!";
+	return udp_state_name_table[state] ? udp_state_name_table[state] : "?";
+}
+
+static int
+udp_state_transition(struct ip_vs_conn *cp, int direction,
+		     const struct sk_buff *skb,
+		     struct ip_vs_protocol *pp)
+{
+	cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL];
+	return 1;
+}
+
+static void udp_init(struct ip_vs_protocol *pp)
+{
+	IP_VS_INIT_HASH_TABLE(udp_apps);
+	pp->timeout_table = udp_timeouts;
+}
+
+static void udp_exit(struct ip_vs_protocol *pp)
+{
+}
+
+
+struct ip_vs_protocol ip_vs_protocol_udp = {
+	.name =			"UDP",
+	.protocol =		IPPROTO_UDP,
+	.dont_defrag =		0,
+	.init =			udp_init,
+	.exit =			udp_exit,
+	.conn_schedule =	udp_conn_schedule,
+	.conn_in_get =		udp_conn_in_get,
+	.conn_out_get =		udp_conn_out_get,
+	.snat_handler =		udp_snat_handler,
+	.dnat_handler =		udp_dnat_handler,
+	.csum_check =		udp_csum_check,
+	.state_transition =	udp_state_transition,
+	.state_name =		udp_state_name,
+	.register_app =		udp_register_app,
+	.unregister_app =	udp_unregister_app,
+	.app_conn_bind =	udp_app_conn_bind,
+	.debug_packet =		ip_vs_tcpudp_debug_packet,
+	.timeout_change =	NULL,
+	.set_state_timeout =	udp_set_state_timeout,
+};
diff --git a/net/ipv4/ipvs/ip_vs_rr.c b/net/ipv4/ipvs/ip_vs_rr.c
new file mode 100644
index 00000000000..b23bab231ca
--- /dev/null
+++ b/net/ipv4/ipvs/ip_vs_rr.c
@@ -0,0 +1,118 @@
+/*
+ * IPVS:        Round-Robin Scheduling module
+ *
+ * Version:     $Id: ip_vs_rr.c,v 1.9 2002/09/15 08:14:08 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Peter Kese <peter.kese@ijs.si>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Fixes/Changes:
+ *     Wensong Zhang            :     changed the ip_vs_rr_schedule to return dest
+ *     Julian Anastasov         :     fixed the NULL pointer access bug in debugging
+ *     Wensong Zhang            :     changed some comestics things for debugging
+ *     Wensong Zhang            :     changed for the d-linked destination list
+ *     Wensong Zhang            :     added the ip_vs_rr_update_svc
+ *     Wensong Zhang            :     added any dest with weight=0 is quiesced
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+
+static int ip_vs_rr_init_svc(struct ip_vs_service *svc)
+{
+	svc->sched_data = &svc->destinations;
+	return 0;
+}
+
+
+static int ip_vs_rr_done_svc(struct ip_vs_service *svc)
+{
+	return 0;
+}
+
+
+static int ip_vs_rr_update_svc(struct ip_vs_service *svc)
+{
+	svc->sched_data = &svc->destinations;
+	return 0;
+}
+
+
+/*
+ * Round-Robin Scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+	struct list_head *p, *q;
+	struct ip_vs_dest *dest;
+
+	IP_VS_DBG(6, "ip_vs_rr_schedule(): Scheduling...\n");
+
+	write_lock(&svc->sched_lock);
+	p = (struct list_head *)svc->sched_data;
+	p = p->next;
+	q = p;
+	do {
+		/* skip list head */
+		if (q == &svc->destinations) {
+			q = q->next;
+			continue;
+		}
+		
+		dest = list_entry(q, struct ip_vs_dest, n_list);
+		if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
+		    atomic_read(&dest->weight) > 0)
+			/* HIT */
+			goto out;
+		q = q->next;
+	} while (q != p);
+	write_unlock(&svc->sched_lock);
+	return NULL;
+
+  out:
+	svc->sched_data = q;
+	write_unlock(&svc->sched_lock);
+	IP_VS_DBG(6, "RR: server %u.%u.%u.%u:%u "
+		  "activeconns %d refcnt %d weight %d\n",
+		  NIPQUAD(dest->addr), ntohs(dest->port),
+		  atomic_read(&dest->activeconns),
+		  atomic_read(&dest->refcnt), atomic_read(&dest->weight));
+
+	return dest;
+}
+
+
+static struct ip_vs_scheduler ip_vs_rr_scheduler = {
+	.name =			"rr",			/* name */
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.init_service =		ip_vs_rr_init_svc,
+	.done_service =		ip_vs_rr_done_svc,
+	.update_service =	ip_vs_rr_update_svc,
+	.schedule =		ip_vs_rr_schedule,
+};
+
+static int __init ip_vs_rr_init(void)
+{
+	INIT_LIST_HEAD(&ip_vs_rr_scheduler.n_list);
+	return register_ip_vs_scheduler(&ip_vs_rr_scheduler);
+}
+
+static void __exit ip_vs_rr_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_rr_scheduler);
+}
+
+module_init(ip_vs_rr_init);
+module_exit(ip_vs_rr_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ipvs/ip_vs_sched.c b/net/ipv4/ipvs/ip_vs_sched.c
new file mode 100644
index 00000000000..0f7c56a225b
--- /dev/null
+++ b/net/ipv4/ipvs/ip_vs_sched.c
@@ -0,0 +1,251 @@
+/*
+ * IPVS         An implementation of the IP virtual server support for the
+ *              LINUX operating system.  IPVS is now implemented as a module
+ *              over the Netfilter framework. IPVS can be used to build a
+ *              high-performance and highly available server based on a
+ *              cluster of servers.
+ *
+ * Version:     $Id: ip_vs_sched.c,v 1.13 2003/05/10 03:05:23 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Peter Kese <peter.kese@ijs.si>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <asm/string.h>
+#include <linux/kmod.h>
+
+#include <net/ip_vs.h>
+
+/*
+ *  IPVS scheduler list
+ */
+static LIST_HEAD(ip_vs_schedulers);
+
+/* lock for service table */
+static DEFINE_RWLOCK(__ip_vs_sched_lock);
+
+
+/*
+ *  Bind a service with a scheduler
+ */
+int ip_vs_bind_scheduler(struct ip_vs_service *svc,
+			 struct ip_vs_scheduler *scheduler)
+{
+	int ret;
+
+	if (svc == NULL) {
+		IP_VS_ERR("ip_vs_bind_scheduler(): svc arg NULL\n");
+		return -EINVAL;
+	}
+	if (scheduler == NULL) {
+		IP_VS_ERR("ip_vs_bind_scheduler(): scheduler arg NULL\n");
+		return -EINVAL;
+	}
+
+	svc->scheduler = scheduler;
+
+	if (scheduler->init_service) {
+		ret = scheduler->init_service(svc);
+		if (ret) {
+			IP_VS_ERR("ip_vs_bind_scheduler(): init error\n");
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+
+/*
+ *  Unbind a service with its scheduler
+ */
+int ip_vs_unbind_scheduler(struct ip_vs_service *svc)
+{
+	struct ip_vs_scheduler *sched;
+
+	if (svc == NULL) {
+		IP_VS_ERR("ip_vs_unbind_scheduler(): svc arg NULL\n");
+		return -EINVAL;
+	}
+
+	sched = svc->scheduler;
+	if (sched == NULL) {
+		IP_VS_ERR("ip_vs_unbind_scheduler(): svc isn't bound\n");
+		return -EINVAL;
+	}
+
+	if (sched->done_service) {
+		if (sched->done_service(svc) != 0) {
+			IP_VS_ERR("ip_vs_unbind_scheduler(): done error\n");
+			return -EINVAL;
+		}
+	}
+
+	svc->scheduler = NULL;
+	return 0;
+}
+
+
+/*
+ *  Get scheduler in the scheduler list by name
+ */
+static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name)
+{
+	struct ip_vs_scheduler *sched;
+
+	IP_VS_DBG(2, "ip_vs_sched_getbyname(): sched_name \"%s\"\n",
+		  sched_name);
+
+	read_lock_bh(&__ip_vs_sched_lock);
+
+	list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
+		/*
+		 * Test and get the modules atomically
+		 */
+		if (sched->module && !try_module_get(sched->module)) {
+			/*
+			 * This scheduler is just deleted
+			 */
+			continue;
+		}
+		if (strcmp(sched_name, sched->name)==0) {
+			/* HIT */
+			read_unlock_bh(&__ip_vs_sched_lock);
+			return sched;
+		}
+		if (sched->module)
+			module_put(sched->module);
+	}
+
+	read_unlock_bh(&__ip_vs_sched_lock);
+	return NULL;
+}
+
+
+/*
+ *  Lookup scheduler and try to load it if it doesn't exist
+ */
+struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name)
+{
+	struct ip_vs_scheduler *sched;
+
+	/*
+	 *  Search for the scheduler by sched_name
+	 */
+	sched = ip_vs_sched_getbyname(sched_name);
+
+	/*
+	 *  If scheduler not found, load the module and search again
+	 */
+	if (sched == NULL) {
+		request_module("ip_vs_%s", sched_name);
+		sched = ip_vs_sched_getbyname(sched_name);
+	}
+
+	return sched;
+}
+
+void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler)
+{
+	if (scheduler->module)
+		module_put(scheduler->module);
+}
+
+
+/*
+ *  Register a scheduler in the scheduler list
+ */
+int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
+{
+	struct ip_vs_scheduler *sched;
+
+	if (!scheduler) {
+		IP_VS_ERR("register_ip_vs_scheduler(): NULL arg\n");
+		return -EINVAL;
+	}
+
+	if (!scheduler->name) {
+		IP_VS_ERR("register_ip_vs_scheduler(): NULL scheduler_name\n");
+		return -EINVAL;
+	}
+
+	/* increase the module use count */
+	ip_vs_use_count_inc();
+
+	/*
+	 *  Make sure that the scheduler with this name doesn't exist
+	 *  in the scheduler list.
+	 */
+	sched = ip_vs_sched_getbyname(scheduler->name);
+	if (sched) {
+		ip_vs_scheduler_put(sched);
+		ip_vs_use_count_dec();
+		IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler "
+			  "already existed in the system\n", scheduler->name);
+		return -EINVAL;
+	}
+
+	write_lock_bh(&__ip_vs_sched_lock);
+
+	if (scheduler->n_list.next != &scheduler->n_list) {
+		write_unlock_bh(&__ip_vs_sched_lock);
+		ip_vs_use_count_dec();
+		IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler "
+			  "already linked\n", scheduler->name);
+		return -EINVAL;
+	}
+
+	/*
+	 *	Add it into the d-linked scheduler list
+	 */
+	list_add(&scheduler->n_list, &ip_vs_schedulers);
+	write_unlock_bh(&__ip_vs_sched_lock);
+
+	IP_VS_INFO("[%s] scheduler registered.\n", scheduler->name);
+
+	return 0;
+}
+
+
+/*
+ *  Unregister a scheduler from the scheduler list
+ */
+int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
+{
+	if (!scheduler) {
+		IP_VS_ERR( "unregister_ip_vs_scheduler(): NULL arg\n");
+		return -EINVAL;
+	}
+
+	write_lock_bh(&__ip_vs_sched_lock);
+	if (scheduler->n_list.next == &scheduler->n_list) {
+		write_unlock_bh(&__ip_vs_sched_lock);
+		IP_VS_ERR("unregister_ip_vs_scheduler(): [%s] scheduler "
+			  "is not in the list. failed\n", scheduler->name);
+		return -EINVAL;
+	}
+
+	/*
+	 *	Remove it from the d-linked scheduler list
+	 */
+	list_del(&scheduler->n_list);
+	write_unlock_bh(&__ip_vs_sched_lock);
+
+	/* decrease the module use count */
+	ip_vs_use_count_dec();
+
+	IP_VS_INFO("[%s] scheduler unregistered.\n", scheduler->name);
+
+	return 0;
+}
diff --git a/net/ipv4/ipvs/ip_vs_sed.c b/net/ipv4/ipvs/ip_vs_sed.c
new file mode 100644
index 00000000000..ff366f7390d
--- /dev/null
+++ b/net/ipv4/ipvs/ip_vs_sed.c
@@ -0,0 +1,163 @@
+/*
+ * IPVS:        Shortest Expected Delay scheduling module
+ *
+ * Version:     $Id: ip_vs_sed.c,v 1.1 2003/05/10 03:06:08 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+/*
+ * The SED algorithm attempts to minimize each job's expected delay until
+ * completion. The expected delay that the job will experience is
+ * (Ci + 1) / Ui if sent to the ith server, in which Ci is the number of
+ * jobs on the the ith server and Ui is the fixed service rate (weight) of
+ * the ith server. The SED algorithm adopts a greedy policy that each does
+ * what is in its own best interest, i.e. to join the queue which would
+ * minimize its expected delay of completion.
+ *
+ * See the following paper for more information:
+ * A. Weinrib and S. Shenker, Greed is not enough: Adaptive load sharing
+ * in large heterogeneous systems. In Proceedings IEEE INFOCOM'88,
+ * pages 986-994, 1988.
+ *
+ * Thanks must go to Marko Buuri <marko@buuri.name> for talking SED to me.
+ *
+ * The difference between SED and WLC is that SED includes the incoming
+ * job in the cost function (the increment of 1). SED may outperform
+ * WLC, while scheduling big jobs under larger heterogeneous systems
+ * (the server weight varies a lot).
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+
+static int
+ip_vs_sed_init_svc(struct ip_vs_service *svc)
+{
+	return 0;
+}
+
+
+static int
+ip_vs_sed_done_svc(struct ip_vs_service *svc)
+{
+	return 0;
+}
+
+
+static int
+ip_vs_sed_update_svc(struct ip_vs_service *svc)
+{
+	return 0;
+}
+
+
+static inline unsigned int
+ip_vs_sed_dest_overhead(struct ip_vs_dest *dest)
+{
+	/*
+	 * We only use the active connection number in the cost
+	 * calculation here.
+	 */
+	return atomic_read(&dest->activeconns) + 1;
+}
+
+
+/*
+ *	Weighted Least Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+	struct ip_vs_dest *dest, *least;
+	unsigned int loh, doh;
+
+	IP_VS_DBG(6, "ip_vs_sed_schedule(): Scheduling...\n");
+
+	/*
+	 * We calculate the load of each dest server as follows:
+	 *	(server expected overhead) / dest->weight
+	 *
+	 * Remember -- no floats in kernel mode!!!
+	 * The comparison of h1*w2 > h2*w1 is equivalent to that of
+	 *		  h1/w1 > h2/w2
+	 * if every weight is larger than zero.
+	 *
+	 * The server with weight=0 is quiesced and will not receive any
+	 * new connections.
+	 */
+
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
+		    atomic_read(&dest->weight) > 0) {
+			least = dest;
+			loh = ip_vs_sed_dest_overhead(least);
+			goto nextstage;
+		}
+	}
+	return NULL;
+
+	/*
+	 *    Find the destination with the least load.
+	 */
+  nextstage:
+	list_for_each_entry_continue(dest, &svc->destinations, n_list) {
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+			continue;
+		doh = ip_vs_sed_dest_overhead(dest);
+		if (loh * atomic_read(&dest->weight) >
+		    doh * atomic_read(&least->weight)) {
+			least = dest;
+			loh = doh;
+		}
+	}
+
+	IP_VS_DBG(6, "SED: server %u.%u.%u.%u:%u "
+		  "activeconns %d refcnt %d weight %d overhead %d\n",
+		  NIPQUAD(least->addr), ntohs(least->port),
+		  atomic_read(&least->activeconns),
+		  atomic_read(&least->refcnt),
+		  atomic_read(&least->weight), loh);
+
+	return least;
+}
+
+
+static struct ip_vs_scheduler ip_vs_sed_scheduler =
+{
+	.name =			"sed",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.init_service =		ip_vs_sed_init_svc,
+	.done_service =		ip_vs_sed_done_svc,
+	.update_service =	ip_vs_sed_update_svc,
+	.schedule =		ip_vs_sed_schedule,
+};
+
+
+static int __init ip_vs_sed_init(void)
+{
+	INIT_LIST_HEAD(&ip_vs_sed_scheduler.n_list);
+	return register_ip_vs_scheduler(&ip_vs_sed_scheduler);
+}
+
+static void __exit ip_vs_sed_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_sed_scheduler);
+}
+
+module_init(ip_vs_sed_init);
+module_exit(ip_vs_sed_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c
new file mode 100644
index 00000000000..6f7c50e44a3
--- /dev/null
+++ b/net/ipv4/ipvs/ip_vs_sh.c
@@ -0,0 +1,255 @@
+/*
+ * IPVS:        Source Hashing scheduling module
+ *
+ * Version:     $Id: ip_vs_sh.c,v 1.5 2002/09/15 08:14:08 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@gnuchina.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+/*
+ * The sh algorithm is to select server by the hash key of source IP
+ * address. The pseudo code is as follows:
+ *
+ *       n <- servernode[src_ip];
+ *       if (n is dead) OR
+ *          (n is overloaded) or (n.weight <= 0) then
+ *                 return NULL;
+ *
+ *       return n;
+ *
+ * Notes that servernode is a 256-bucket hash table that maps the hash
+ * index derived from packet source IP address to the current server
+ * array. If the sh scheduler is used in cache cluster, it is good to
+ * combine it with cache_bypass feature. When the statically assigned
+ * server is dead or overloaded, the load balancer can bypass the cache
+ * server and send requests to the original server directly.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+
+/*
+ *      IPVS SH bucket
+ */
+struct ip_vs_sh_bucket {
+	struct ip_vs_dest       *dest;          /* real server (cache) */
+};
+
+/*
+ *     for IPVS SH entry hash table
+ */
+#ifndef CONFIG_IP_VS_SH_TAB_BITS
+#define CONFIG_IP_VS_SH_TAB_BITS        8
+#endif
+#define IP_VS_SH_TAB_BITS               CONFIG_IP_VS_SH_TAB_BITS
+#define IP_VS_SH_TAB_SIZE               (1 << IP_VS_SH_TAB_BITS)
+#define IP_VS_SH_TAB_MASK               (IP_VS_SH_TAB_SIZE - 1)
+
+
+/*
+ *	Returns hash value for IPVS SH entry
+ */
+static inline unsigned ip_vs_sh_hashkey(__u32 addr)
+{
+	return (ntohl(addr)*2654435761UL) & IP_VS_SH_TAB_MASK;
+}
+
+
+/*
+ *      Get ip_vs_dest associated with supplied parameters.
+ */
+static inline struct ip_vs_dest *
+ip_vs_sh_get(struct ip_vs_sh_bucket *tbl, __u32 addr)
+{
+	return (tbl[ip_vs_sh_hashkey(addr)]).dest;
+}
+
+
+/*
+ *      Assign all the hash buckets of the specified table with the service.
+ */
+static int
+ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc)
+{
+	int i;
+	struct ip_vs_sh_bucket *b;
+	struct list_head *p;
+	struct ip_vs_dest *dest;
+
+	b = tbl;
+	p = &svc->destinations;
+	for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
+		if (list_empty(p)) {
+			b->dest = NULL;
+		} else {
+			if (p == &svc->destinations)
+				p = p->next;
+
+			dest = list_entry(p, struct ip_vs_dest, n_list);
+			atomic_inc(&dest->refcnt);
+			b->dest = dest;
+
+			p = p->next;
+		}
+		b++;
+	}
+	return 0;
+}
+
+
+/*
+ *      Flush all the hash buckets of the specified table.
+ */
+static void ip_vs_sh_flush(struct ip_vs_sh_bucket *tbl)
+{
+	int i;
+	struct ip_vs_sh_bucket *b;
+
+	b = tbl;
+	for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
+		if (b->dest) {
+			atomic_dec(&b->dest->refcnt);
+			b->dest = NULL;
+		}
+		b++;
+	}
+}
+
+
+static int ip_vs_sh_init_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_sh_bucket *tbl;
+
+	/* allocate the SH table for this service */
+	tbl = kmalloc(sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE,
+		      GFP_ATOMIC);
+	if (tbl == NULL) {
+		IP_VS_ERR("ip_vs_sh_init_svc(): no memory\n");
+		return -ENOMEM;
+	}
+	svc->sched_data = tbl;
+	IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) allocated for "
+		  "current service\n",
+		  sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
+
+	/* assign the hash buckets with the updated service */
+	ip_vs_sh_assign(tbl, svc);
+
+	return 0;
+}
+
+
+static int ip_vs_sh_done_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_sh_bucket *tbl = svc->sched_data;
+
+	/* got to clean up hash buckets here */
+	ip_vs_sh_flush(tbl);
+
+	/* release the table itself */
+	kfree(svc->sched_data);
+	IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) released\n",
+		  sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
+
+	return 0;
+}
+
+
+static int ip_vs_sh_update_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_sh_bucket *tbl = svc->sched_data;
+
+	/* got to clean up hash buckets here */
+	ip_vs_sh_flush(tbl);
+
+	/* assign the hash buckets with the updated service */
+	ip_vs_sh_assign(tbl, svc);
+
+	return 0;
+}
+
+
+/*
+ *      If the dest flags is set with IP_VS_DEST_F_OVERLOAD,
+ *      consider that the server is overloaded here.
+ */
+static inline int is_overloaded(struct ip_vs_dest *dest)
+{
+	return dest->flags & IP_VS_DEST_F_OVERLOAD;
+}
+
+
+/*
+ *      Source Hashing scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+	struct ip_vs_dest *dest;
+	struct ip_vs_sh_bucket *tbl;
+	struct iphdr *iph = skb->nh.iph;
+
+	IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
+
+	tbl = (struct ip_vs_sh_bucket *)svc->sched_data;
+	dest = ip_vs_sh_get(tbl, iph->saddr);
+	if (!dest
+	    || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
+	    || atomic_read(&dest->weight) <= 0
+	    || is_overloaded(dest)) {
+		return NULL;
+	}
+
+	IP_VS_DBG(6, "SH: source IP address %u.%u.%u.%u "
+		  "--> server %u.%u.%u.%u:%d\n",
+		  NIPQUAD(iph->saddr),
+		  NIPQUAD(dest->addr),
+		  ntohs(dest->port));
+
+	return dest;
+}
+
+
+/*
+ *      IPVS SH Scheduler structure
+ */
+static struct ip_vs_scheduler ip_vs_sh_scheduler =
+{
+	.name =			"sh",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.init_service =		ip_vs_sh_init_svc,
+	.done_service =		ip_vs_sh_done_svc,
+	.update_service =	ip_vs_sh_update_svc,
+	.schedule =		ip_vs_sh_schedule,
+};
+
+
+static int __init ip_vs_sh_init(void)
+{
+	INIT_LIST_HEAD(&ip_vs_sh_scheduler.n_list);
+	return register_ip_vs_scheduler(&ip_vs_sh_scheduler);
+}
+
+
+static void __exit ip_vs_sh_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_sh_scheduler);
+}
+
+
+module_init(ip_vs_sh_init);
+module_exit(ip_vs_sh_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
new file mode 100644
index 00000000000..25c479550a3
--- /dev/null
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -0,0 +1,892 @@
+/*
+ * IPVS         An implementation of the IP virtual server support for the
+ *              LINUX operating system.  IPVS is now implemented as a module
+ *              over the NetFilter framework. IPVS can be used to build a
+ *              high-performance and highly available server based on a
+ *              cluster of servers.
+ *
+ * Version:     $Id: ip_vs_sync.c,v 1.13 2003/06/08 09:31:19 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ * ip_vs_sync:  sync connection info from master load balancer to backups
+ *              through multicast
+ *
+ * Changes:
+ *	Alexandre Cassen	:	Added master & backup support at a time.
+ *	Alexandre Cassen	:	Added SyncID support for incoming sync
+ *					messages filtering.
+ *	Justin Ossevoort	:	Fix endian problem on sync message size.
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/net.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/igmp.h>                 /* for ip_mc_join_group */
+
+#include <net/ip.h>
+#include <net/sock.h>
+#include <asm/uaccess.h>                /* for get_fs and set_fs */
+
+#include <net/ip_vs.h>
+
+#define IP_VS_SYNC_GROUP 0xe0000051    /* multicast addr - 224.0.0.81 */
+#define IP_VS_SYNC_PORT  8848          /* multicast port */
+
+
+/*
+ *	IPVS sync connection entry
+ */
+struct ip_vs_sync_conn {
+	__u8			reserved;
+
+	/* Protocol, addresses and port numbers */
+	__u8			protocol;       /* Which protocol (TCP/UDP) */
+	__u16			cport;
+	__u16                   vport;
+	__u16                   dport;
+	__u32                   caddr;          /* client address */
+	__u32                   vaddr;          /* virtual address */
+	__u32                   daddr;          /* destination address */
+
+	/* Flags and state transition */
+	__u16                   flags;          /* status flags */
+	__u16                   state;          /* state info */
+
+	/* The sequence options start here */
+};
+
+struct ip_vs_sync_conn_options {
+	struct ip_vs_seq        in_seq;         /* incoming seq. struct */
+	struct ip_vs_seq        out_seq;        /* outgoing seq. struct */
+};
+
+#define IP_VS_SYNC_CONN_TIMEOUT (3*60*HZ)
+#define SIMPLE_CONN_SIZE  (sizeof(struct ip_vs_sync_conn))
+#define FULL_CONN_SIZE  \
+(sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options))
+
+
+/*
+  The master mulitcasts messages to the backup load balancers in the
+  following format.
+
+       0                   1                   2                   3
+       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |  Count Conns  |    SyncID     |            Size               |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                                                               |
+      |                    IPVS Sync Connection (1)                   |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                            .                                  |
+      |                            .                                  |
+      |                            .                                  |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                                                               |
+      |                    IPVS Sync Connection (n)                   |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+*/
+
+#define SYNC_MESG_HEADER_LEN	4
+
+struct ip_vs_sync_mesg {
+	__u8                    nr_conns;
+	__u8                    syncid;
+	__u16                   size;
+
+	/* ip_vs_sync_conn entries start here */
+};
+
+/* the maximum length of sync (sending/receiving) message */
+static int sync_send_mesg_maxlen;
+static int sync_recv_mesg_maxlen;
+
+struct ip_vs_sync_buff {
+	struct list_head        list;
+	unsigned long           firstuse;
+
+	/* pointers for the message data */
+	struct ip_vs_sync_mesg  *mesg;
+	unsigned char           *head;
+	unsigned char           *end;
+};
+
+
+/* the sync_buff list head and the lock */
+static LIST_HEAD(ip_vs_sync_queue);
+static DEFINE_SPINLOCK(ip_vs_sync_lock);
+
+/* current sync_buff for accepting new conn entries */
+static struct ip_vs_sync_buff   *curr_sb = NULL;
+static DEFINE_SPINLOCK(curr_sb_lock);
+
+/* ipvs sync daemon state */
+volatile int ip_vs_sync_state = IP_VS_STATE_NONE;
+volatile int ip_vs_master_syncid = 0;
+volatile int ip_vs_backup_syncid = 0;
+
+/* multicast interface name */
+char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+
+/* multicast addr */
+static struct sockaddr_in mcast_addr;
+
+
+static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
+{
+	spin_lock(&ip_vs_sync_lock);
+	list_add_tail(&sb->list, &ip_vs_sync_queue);
+	spin_unlock(&ip_vs_sync_lock);
+}
+
+static inline struct ip_vs_sync_buff * sb_dequeue(void)
+{
+	struct ip_vs_sync_buff *sb;
+
+	spin_lock_bh(&ip_vs_sync_lock);
+	if (list_empty(&ip_vs_sync_queue)) {
+		sb = NULL;
+	} else {
+		sb = list_entry(ip_vs_sync_queue.next,
+				struct ip_vs_sync_buff,
+				list);
+		list_del(&sb->list);
+	}
+	spin_unlock_bh(&ip_vs_sync_lock);
+
+	return sb;
+}
+
+static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void)
+{
+	struct ip_vs_sync_buff *sb;
+
+	if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
+		return NULL;
+
+	if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) {
+		kfree(sb);
+		return NULL;
+	}
+	sb->mesg->nr_conns = 0;
+	sb->mesg->syncid = ip_vs_master_syncid;
+	sb->mesg->size = 4;
+	sb->head = (unsigned char *)sb->mesg + 4;
+	sb->end = (unsigned char *)sb->mesg + sync_send_mesg_maxlen;
+	sb->firstuse = jiffies;
+	return sb;
+}
+
+static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
+{
+	kfree(sb->mesg);
+	kfree(sb);
+}
+
+/*
+ *	Get the current sync buffer if it has been created for more
+ *	than the specified time or the specified time is zero.
+ */
+static inline struct ip_vs_sync_buff *
+get_curr_sync_buff(unsigned long time)
+{
+	struct ip_vs_sync_buff *sb;
+
+	spin_lock_bh(&curr_sb_lock);
+	if (curr_sb && (time == 0 ||
+			time_before(jiffies - curr_sb->firstuse, time))) {
+		sb = curr_sb;
+		curr_sb = NULL;
+	} else
+		sb = NULL;
+	spin_unlock_bh(&curr_sb_lock);
+	return sb;
+}
+
+
+/*
+ *      Add an ip_vs_conn information into the current sync_buff.
+ *      Called by ip_vs_in.
+ */
+void ip_vs_sync_conn(struct ip_vs_conn *cp)
+{
+	struct ip_vs_sync_mesg *m;
+	struct ip_vs_sync_conn *s;
+	int len;
+
+	spin_lock(&curr_sb_lock);
+	if (!curr_sb) {
+		if (!(curr_sb=ip_vs_sync_buff_create())) {
+			spin_unlock(&curr_sb_lock);
+			IP_VS_ERR("ip_vs_sync_buff_create failed.\n");
+			return;
+		}
+	}
+
+	len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
+		SIMPLE_CONN_SIZE;
+	m = curr_sb->mesg;
+	s = (struct ip_vs_sync_conn *)curr_sb->head;
+
+	/* copy members */
+	s->protocol = cp->protocol;
+	s->cport = cp->cport;
+	s->vport = cp->vport;
+	s->dport = cp->dport;
+	s->caddr = cp->caddr;
+	s->vaddr = cp->vaddr;
+	s->daddr = cp->daddr;
+	s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED);
+	s->state = htons(cp->state);
+	if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
+		struct ip_vs_sync_conn_options *opt =
+			(struct ip_vs_sync_conn_options *)&s[1];
+		memcpy(opt, &cp->in_seq, sizeof(*opt));
+	}
+
+	m->nr_conns++;
+	m->size += len;
+	curr_sb->head += len;
+
+	/* check if there is a space for next one */
+	if (curr_sb->head+FULL_CONN_SIZE > curr_sb->end) {
+		sb_queue_tail(curr_sb);
+		curr_sb = NULL;
+	}
+	spin_unlock(&curr_sb_lock);
+
+	/* synchronize its controller if it has */
+	if (cp->control)
+		ip_vs_sync_conn(cp->control);
+}
+
+
+/*
+ *      Process received multicast message and create the corresponding
+ *      ip_vs_conn entries.
+ */
+static void ip_vs_process_message(const char *buffer, const size_t buflen)
+{
+	struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer;
+	struct ip_vs_sync_conn *s;
+	struct ip_vs_sync_conn_options *opt;
+	struct ip_vs_conn *cp;
+	char *p;
+	int i;
+
+	/* Convert size back to host byte order */
+	m->size = ntohs(m->size);
+
+	if (buflen != m->size) {
+		IP_VS_ERR("bogus message\n");
+		return;
+	}
+
+	/* SyncID sanity check */
+	if (ip_vs_backup_syncid != 0 && m->syncid != ip_vs_backup_syncid) {
+		IP_VS_DBG(7, "Ignoring incoming msg with syncid = %d\n",
+			  m->syncid);
+		return;
+	}
+
+	p = (char *)buffer + sizeof(struct ip_vs_sync_mesg);
+	for (i=0; i<m->nr_conns; i++) {
+		s = (struct ip_vs_sync_conn *)p;
+		cp = ip_vs_conn_in_get(s->protocol,
+				       s->caddr, s->cport,
+				       s->vaddr, s->vport);
+		if (!cp) {
+			cp = ip_vs_conn_new(s->protocol,
+					    s->caddr, s->cport,
+					    s->vaddr, s->vport,
+					    s->daddr, s->dport,
+					    ntohs(s->flags), NULL);
+			if (!cp) {
+				IP_VS_ERR("ip_vs_conn_new failed\n");
+				return;
+			}
+			cp->state = ntohs(s->state);
+		} else if (!cp->dest) {
+			/* it is an entry created by the synchronization */
+			cp->state = ntohs(s->state);
+			cp->flags = ntohs(s->flags) | IP_VS_CONN_F_HASHED;
+		}	/* Note that we don't touch its state and flags
+			   if it is a normal entry. */
+
+		if (ntohs(s->flags) & IP_VS_CONN_F_SEQ_MASK) {
+			opt = (struct ip_vs_sync_conn_options *)&s[1];
+			memcpy(&cp->in_seq, opt, sizeof(*opt));
+			p += FULL_CONN_SIZE;
+		} else
+			p += SIMPLE_CONN_SIZE;
+
+		atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]);
+		cp->timeout = IP_VS_SYNC_CONN_TIMEOUT;
+		ip_vs_conn_put(cp);
+
+		if (p > buffer+buflen) {
+			IP_VS_ERR("bogus message\n");
+			return;
+		}
+	}
+}
+
+
+/*
+ *      Setup loopback of outgoing multicasts on a sending socket
+ */
+static void set_mcast_loop(struct sock *sk, u_char loop)
+{
+	struct inet_sock *inet = inet_sk(sk);
+
+	/* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */
+	lock_sock(sk);
+	inet->mc_loop = loop ? 1 : 0;
+	release_sock(sk);
+}
+
+/*
+ *      Specify TTL for outgoing multicasts on a sending socket
+ */
+static void set_mcast_ttl(struct sock *sk, u_char ttl)
+{
+	struct inet_sock *inet = inet_sk(sk);
+
+	/* setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)); */
+	lock_sock(sk);
+	inet->mc_ttl = ttl;
+	release_sock(sk);
+}
+
+/*
+ *      Specifiy default interface for outgoing multicasts
+ */
+static int set_mcast_if(struct sock *sk, char *ifname)
+{
+	struct net_device *dev;
+	struct inet_sock *inet = inet_sk(sk);
+
+	if ((dev = __dev_get_by_name(ifname)) == NULL)
+		return -ENODEV;
+
+	if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
+		return -EINVAL;
+
+	lock_sock(sk);
+	inet->mc_index = dev->ifindex;
+	/*  inet->mc_addr  = 0; */
+	release_sock(sk);
+
+	return 0;
+}
+
+
+/*
+ *	Set the maximum length of sync message according to the
+ *	specified interface's MTU.
+ */
+static int set_sync_mesg_maxlen(int sync_state)
+{
+	struct net_device *dev;
+	int num;
+
+	if (sync_state == IP_VS_STATE_MASTER) {
+		if ((dev = __dev_get_by_name(ip_vs_master_mcast_ifn)) == NULL)
+			return -ENODEV;
+
+		num = (dev->mtu - sizeof(struct iphdr) -
+		       sizeof(struct udphdr) -
+		       SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE;
+		sync_send_mesg_maxlen =
+			SYNC_MESG_HEADER_LEN + SIMPLE_CONN_SIZE * num;
+		IP_VS_DBG(7, "setting the maximum length of sync sending "
+			  "message %d.\n", sync_send_mesg_maxlen);
+	} else if (sync_state == IP_VS_STATE_BACKUP) {
+		if ((dev = __dev_get_by_name(ip_vs_backup_mcast_ifn)) == NULL)
+			return -ENODEV;
+
+		sync_recv_mesg_maxlen = dev->mtu -
+			sizeof(struct iphdr) - sizeof(struct udphdr);
+		IP_VS_DBG(7, "setting the maximum length of sync receiving "
+			  "message %d.\n", sync_recv_mesg_maxlen);
+	}
+
+	return 0;
+}
+
+
+/*
+ *      Join a multicast group.
+ *      the group is specified by a class D multicast address 224.0.0.0/8
+ *      in the in_addr structure passed in as a parameter.
+ */
+static int
+join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
+{
+	struct ip_mreqn mreq;
+	struct net_device *dev;
+	int ret;
+
+	memset(&mreq, 0, sizeof(mreq));
+	memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
+
+	if ((dev = __dev_get_by_name(ifname)) == NULL)
+		return -ENODEV;
+	if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
+		return -EINVAL;
+
+	mreq.imr_ifindex = dev->ifindex;
+
+	lock_sock(sk);
+	ret = ip_mc_join_group(sk, &mreq);
+	release_sock(sk);
+
+	return ret;
+}
+
+
+static int bind_mcastif_addr(struct socket *sock, char *ifname)
+{
+	struct net_device *dev;
+	u32 addr;
+	struct sockaddr_in sin;
+
+	if ((dev = __dev_get_by_name(ifname)) == NULL)
+		return -ENODEV;
+
+	addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
+	if (!addr)
+		IP_VS_ERR("You probably need to specify IP address on "
+			  "multicast interface.\n");
+
+	IP_VS_DBG(7, "binding socket with (%s) %u.%u.%u.%u\n",
+		  ifname, NIPQUAD(addr));
+
+	/* Now bind the socket with the address of multicast interface */
+	sin.sin_family	     = AF_INET;
+	sin.sin_addr.s_addr  = addr;
+	sin.sin_port         = 0;
+
+	return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin));
+}
+
+/*
+ *      Set up sending multicast socket over UDP
+ */
+static struct socket * make_send_sock(void)
+{
+	struct socket *sock;
+
+	/* First create a socket */
+	if (sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock) < 0) {
+		IP_VS_ERR("Error during creation of socket; terminating\n");
+		return NULL;
+	}
+
+	if (set_mcast_if(sock->sk, ip_vs_master_mcast_ifn) < 0) {
+		IP_VS_ERR("Error setting outbound mcast interface\n");
+		goto error;
+	}
+
+	set_mcast_loop(sock->sk, 0);
+	set_mcast_ttl(sock->sk, 1);
+
+	if (bind_mcastif_addr(sock, ip_vs_master_mcast_ifn) < 0) {
+		IP_VS_ERR("Error binding address of the mcast interface\n");
+		goto error;
+	}
+
+	if (sock->ops->connect(sock,
+			       (struct sockaddr*)&mcast_addr,
+			       sizeof(struct sockaddr), 0) < 0) {
+		IP_VS_ERR("Error connecting to the multicast addr\n");
+		goto error;
+	}
+
+	return sock;
+
+  error:
+	sock_release(sock);
+	return NULL;
+}
+
+
+/*
+ *      Set up receiving multicast socket over UDP
+ */
+static struct socket * make_receive_sock(void)
+{
+	struct socket *sock;
+
+	/* First create a socket */
+	if (sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock) < 0) {
+		IP_VS_ERR("Error during creation of socket; terminating\n");
+		return NULL;
+	}
+
+	/* it is equivalent to the REUSEADDR option in user-space */
+	sock->sk->sk_reuse = 1;
+
+	if (sock->ops->bind(sock,
+			    (struct sockaddr*)&mcast_addr,
+			    sizeof(struct sockaddr)) < 0) {
+		IP_VS_ERR("Error binding to the multicast addr\n");
+		goto error;
+	}
+
+	/* join the multicast group */
+	if (join_mcast_group(sock->sk,
+			     (struct in_addr*)&mcast_addr.sin_addr,
+			     ip_vs_backup_mcast_ifn) < 0) {
+		IP_VS_ERR("Error joining to the multicast group\n");
+		goto error;
+	}
+
+	return sock;
+
+  error:
+	sock_release(sock);
+	return NULL;
+}
+
+
+static int
+ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length)
+{
+	struct msghdr	msg = {.msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL};
+	struct kvec	iov;
+	int		len;
+
+	EnterFunction(7);
+	iov.iov_base     = (void *)buffer;
+	iov.iov_len      = length;
+
+	len = kernel_sendmsg(sock, &msg, &iov, 1, (size_t)(length));
+
+	LeaveFunction(7);
+	return len;
+}
+
+static void
+ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg)
+{
+	int msize;
+
+	msize = msg->size;
+
+	/* Put size in network byte order */
+	msg->size = htons(msg->size);
+
+	if (ip_vs_send_async(sock, (char *)msg, msize) != msize)
+		IP_VS_ERR("ip_vs_send_async error\n");
+}
+
+static int
+ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
+{
+	struct msghdr		msg = {NULL,};
+	struct kvec		iov;
+	int			len;
+
+	EnterFunction(7);
+
+	/* Receive a packet */
+	iov.iov_base     = buffer;
+	iov.iov_len      = (size_t)buflen;
+
+	len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, 0);
+
+	if (len < 0)
+		return -1;
+
+	LeaveFunction(7);
+	return len;
+}
+
+
+static DECLARE_WAIT_QUEUE_HEAD(sync_wait);
+static pid_t sync_master_pid = 0;
+static pid_t sync_backup_pid = 0;
+
+static DECLARE_WAIT_QUEUE_HEAD(stop_sync_wait);
+static int stop_master_sync = 0;
+static int stop_backup_sync = 0;
+
+static void sync_master_loop(void)
+{
+	struct socket *sock;
+	struct ip_vs_sync_buff *sb;
+
+	/* create the sending multicast socket */
+	sock = make_send_sock();
+	if (!sock)
+		return;
+
+	IP_VS_INFO("sync thread started: state = MASTER, mcast_ifn = %s, "
+		   "syncid = %d\n",
+		   ip_vs_master_mcast_ifn, ip_vs_master_syncid);
+
+	for (;;) {
+		while ((sb=sb_dequeue())) {
+			ip_vs_send_sync_msg(sock, sb->mesg);
+			ip_vs_sync_buff_release(sb);
+		}
+
+		/* check if entries stay in curr_sb for 2 seconds */
+		if ((sb = get_curr_sync_buff(2*HZ))) {
+			ip_vs_send_sync_msg(sock, sb->mesg);
+			ip_vs_sync_buff_release(sb);
+		}
+
+		if (stop_master_sync)
+			break;
+
+		ssleep(1);
+	}
+
+	/* clean up the sync_buff queue */
+	while ((sb=sb_dequeue())) {
+		ip_vs_sync_buff_release(sb);
+	}
+
+	/* clean up the current sync_buff */
+	if ((sb = get_curr_sync_buff(0))) {
+		ip_vs_sync_buff_release(sb);
+	}
+
+	/* release the sending multicast socket */
+	sock_release(sock);
+}
+
+
+static void sync_backup_loop(void)
+{
+	struct socket *sock;
+	char *buf;
+	int len;
+
+	if (!(buf = kmalloc(sync_recv_mesg_maxlen, GFP_ATOMIC))) {
+		IP_VS_ERR("sync_backup_loop: kmalloc error\n");
+		return;
+	}
+
+	/* create the receiving multicast socket */
+	sock = make_receive_sock();
+	if (!sock)
+		goto out;
+
+	IP_VS_INFO("sync thread started: state = BACKUP, mcast_ifn = %s, "
+		   "syncid = %d\n",
+		   ip_vs_backup_mcast_ifn, ip_vs_backup_syncid);
+
+	for (;;) {
+		/* do you have data now? */
+		while (!skb_queue_empty(&(sock->sk->sk_receive_queue))) {
+			if ((len =
+			     ip_vs_receive(sock, buf,
+					   sync_recv_mesg_maxlen)) <= 0) {
+				IP_VS_ERR("receiving message error\n");
+				break;
+			}
+			/* disable bottom half, because it accessed the data
+			   shared by softirq while getting/creating conns */
+			local_bh_disable();
+			ip_vs_process_message(buf, len);
+			local_bh_enable();
+		}
+
+		if (stop_backup_sync)
+			break;
+
+		ssleep(1);
+	}
+
+	/* release the sending multicast socket */
+	sock_release(sock);
+
+  out:
+	kfree(buf);
+}
+
+
+static void set_sync_pid(int sync_state, pid_t sync_pid)
+{
+	if (sync_state == IP_VS_STATE_MASTER)
+		sync_master_pid = sync_pid;
+	else if (sync_state == IP_VS_STATE_BACKUP)
+		sync_backup_pid = sync_pid;
+}
+
+static void set_stop_sync(int sync_state, int set)
+{
+	if (sync_state == IP_VS_STATE_MASTER)
+		stop_master_sync = set;
+	else if (sync_state == IP_VS_STATE_BACKUP)
+		stop_backup_sync = set;
+	else {
+		stop_master_sync = set;
+		stop_backup_sync = set;
+	}
+}
+
+static int sync_thread(void *startup)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	mm_segment_t oldmm;
+	int state;
+	const char *name;
+
+	/* increase the module use count */
+	ip_vs_use_count_inc();
+
+	if (ip_vs_sync_state & IP_VS_STATE_MASTER && !sync_master_pid) {
+		state = IP_VS_STATE_MASTER;
+		name = "ipvs_syncmaster";
+	} else if (ip_vs_sync_state & IP_VS_STATE_BACKUP && !sync_backup_pid) {
+		state = IP_VS_STATE_BACKUP;
+		name = "ipvs_syncbackup";
+	} else {
+		IP_VS_BUG();
+		ip_vs_use_count_dec();
+		return -EINVAL;
+	}
+
+	daemonize(name);
+
+	oldmm = get_fs();
+	set_fs(KERNEL_DS);
+
+	/* Block all signals */
+	spin_lock_irq(&current->sighand->siglock);
+	siginitsetinv(&current->blocked, 0);
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
+	/* set the maximum length of sync message */
+	set_sync_mesg_maxlen(state);
+
+	/* set up multicast address */
+	mcast_addr.sin_family = AF_INET;
+	mcast_addr.sin_port = htons(IP_VS_SYNC_PORT);
+	mcast_addr.sin_addr.s_addr = htonl(IP_VS_SYNC_GROUP);
+
+	add_wait_queue(&sync_wait, &wait);
+
+	set_sync_pid(state, current->pid);
+	complete((struct completion *)startup);
+
+	/* processing master/backup loop here */
+	if (state == IP_VS_STATE_MASTER)
+		sync_master_loop();
+	else if (state == IP_VS_STATE_BACKUP)
+		sync_backup_loop();
+	else IP_VS_BUG();
+
+	remove_wait_queue(&sync_wait, &wait);
+
+	/* thread exits */
+	set_sync_pid(state, 0);
+	IP_VS_INFO("sync thread stopped!\n");
+
+	set_fs(oldmm);
+
+	/* decrease the module use count */
+	ip_vs_use_count_dec();
+
+	set_stop_sync(state, 0);
+	wake_up(&stop_sync_wait);
+
+	return 0;
+}
+
+
+static int fork_sync_thread(void *startup)
+{
+	pid_t pid;
+
+	/* fork the sync thread here, then the parent process of the
+	   sync thread is the init process after this thread exits. */
+  repeat:
+	if ((pid = kernel_thread(sync_thread, startup, 0)) < 0) {
+		IP_VS_ERR("could not create sync_thread due to %d... "
+			  "retrying.\n", pid);
+		ssleep(1);
+		goto repeat;
+	}
+
+	return 0;
+}
+
+
+int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
+{
+	DECLARE_COMPLETION(startup);
+	pid_t pid;
+
+	if ((state == IP_VS_STATE_MASTER && sync_master_pid) ||
+	    (state == IP_VS_STATE_BACKUP && sync_backup_pid))
+		return -EEXIST;
+
+	IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, current->pid);
+	IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %Zd bytes\n",
+		  sizeof(struct ip_vs_sync_conn));
+
+	ip_vs_sync_state |= state;
+	if (state == IP_VS_STATE_MASTER) {
+		strcpy(ip_vs_master_mcast_ifn, mcast_ifn);
+		ip_vs_master_syncid = syncid;
+	} else {
+		strcpy(ip_vs_backup_mcast_ifn, mcast_ifn);
+		ip_vs_backup_syncid = syncid;
+	}
+
+  repeat:
+	if ((pid = kernel_thread(fork_sync_thread, &startup, 0)) < 0) {
+		IP_VS_ERR("could not create fork_sync_thread due to %d... "
+			  "retrying.\n", pid);
+		ssleep(1);
+		goto repeat;
+	}
+
+	wait_for_completion(&startup);
+
+	return 0;
+}
+
+
+int stop_sync_thread(int state)
+{
+	DECLARE_WAITQUEUE(wait, current);
+
+	if ((state == IP_VS_STATE_MASTER && !sync_master_pid) ||
+	    (state == IP_VS_STATE_BACKUP && !sync_backup_pid))
+		return -ESRCH;
+
+	IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, current->pid);
+	IP_VS_INFO("stopping sync thread %d ...\n",
+		   (state == IP_VS_STATE_MASTER) ? sync_master_pid : sync_backup_pid);
+
+	__set_current_state(TASK_UNINTERRUPTIBLE);
+	add_wait_queue(&stop_sync_wait, &wait);
+	set_stop_sync(state, 1);
+	ip_vs_sync_state -= state;
+	wake_up(&sync_wait);
+	schedule();
+	__set_current_state(TASK_RUNNING);
+	remove_wait_queue(&stop_sync_wait, &wait);
+
+	/* Note: no need to reap the sync thread, because its parent
+	   process is the init process */
+
+	if ((state == IP_VS_STATE_MASTER && stop_master_sync) ||
+	    (state == IP_VS_STATE_BACKUP && stop_backup_sync))
+		IP_VS_BUG();
+
+	return 0;
+}
diff --git a/net/ipv4/ipvs/ip_vs_wlc.c b/net/ipv4/ipvs/ip_vs_wlc.c
new file mode 100644
index 00000000000..8a9d913261d
--- /dev/null
+++ b/net/ipv4/ipvs/ip_vs_wlc.c
@@ -0,0 +1,151 @@
+/*
+ * IPVS:        Weighted Least-Connection Scheduling module
+ *
+ * Version:     $Id: ip_vs_wlc.c,v 1.13 2003/04/18 09:03:16 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Peter Kese <peter.kese@ijs.si>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *     Wensong Zhang            :     changed the ip_vs_wlc_schedule to return dest
+ *     Wensong Zhang            :     changed to use the inactconns in scheduling
+ *     Wensong Zhang            :     changed some comestics things for debugging
+ *     Wensong Zhang            :     changed for the d-linked destination list
+ *     Wensong Zhang            :     added the ip_vs_wlc_update_svc
+ *     Wensong Zhang            :     added any dest with weight=0 is quiesced
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+
+static int
+ip_vs_wlc_init_svc(struct ip_vs_service *svc)
+{
+	return 0;
+}
+
+
+static int
+ip_vs_wlc_done_svc(struct ip_vs_service *svc)
+{
+	return 0;
+}
+
+
+static int
+ip_vs_wlc_update_svc(struct ip_vs_service *svc)
+{
+	return 0;
+}
+
+
+static inline unsigned int
+ip_vs_wlc_dest_overhead(struct ip_vs_dest *dest)
+{
+	/*
+	 * We think the overhead of processing active connections is 256
+	 * times higher than that of inactive connections in average. (This
+	 * 256 times might not be accurate, we will change it later) We
+	 * use the following formula to estimate the overhead now:
+	 *		  dest->activeconns*256 + dest->inactconns
+	 */
+	return (atomic_read(&dest->activeconns) << 8) +
+		atomic_read(&dest->inactconns);
+}
+
+
+/*
+ *	Weighted Least Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+	struct ip_vs_dest *dest, *least;
+	unsigned int loh, doh;
+
+	IP_VS_DBG(6, "ip_vs_wlc_schedule(): Scheduling...\n");
+
+	/*
+	 * We calculate the load of each dest server as follows:
+	 *		  (dest overhead) / dest->weight
+	 *
+	 * Remember -- no floats in kernel mode!!!
+	 * The comparison of h1*w2 > h2*w1 is equivalent to that of
+	 *		  h1/w1 > h2/w2
+	 * if every weight is larger than zero.
+	 *
+	 * The server with weight=0 is quiesced and will not receive any
+	 * new connections.
+	 */
+
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
+		    atomic_read(&dest->weight) > 0) {
+			least = dest;
+			loh = ip_vs_wlc_dest_overhead(least);
+			goto nextstage;
+		}
+	}
+	return NULL;
+
+	/*
+	 *    Find the destination with the least load.
+	 */
+  nextstage:
+	list_for_each_entry_continue(dest, &svc->destinations, n_list) {
+		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+			continue;
+		doh = ip_vs_wlc_dest_overhead(dest);
+		if (loh * atomic_read(&dest->weight) >
+		    doh * atomic_read(&least->weight)) {
+			least = dest;
+			loh = doh;
+		}
+	}
+
+	IP_VS_DBG(6, "WLC: server %u.%u.%u.%u:%u "
+		  "activeconns %d refcnt %d weight %d overhead %d\n",
+		  NIPQUAD(least->addr), ntohs(least->port),
+		  atomic_read(&least->activeconns),
+		  atomic_read(&least->refcnt),
+		  atomic_read(&least->weight), loh);
+
+	return least;
+}
+
+
+static struct ip_vs_scheduler ip_vs_wlc_scheduler =
+{
+	.name =			"wlc",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.init_service =		ip_vs_wlc_init_svc,
+	.done_service =		ip_vs_wlc_done_svc,
+	.update_service =	ip_vs_wlc_update_svc,
+	.schedule =		ip_vs_wlc_schedule,
+};
+
+
+static int __init ip_vs_wlc_init(void)
+{
+	INIT_LIST_HEAD(&ip_vs_wlc_scheduler.n_list);
+	return register_ip_vs_scheduler(&ip_vs_wlc_scheduler);
+}
+
+static void __exit ip_vs_wlc_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_wlc_scheduler);
+}
+
+module_init(ip_vs_wlc_init);
+module_exit(ip_vs_wlc_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ipvs/ip_vs_wrr.c b/net/ipv4/ipvs/ip_vs_wrr.c
new file mode 100644
index 00000000000..749fa044eca
--- /dev/null
+++ b/net/ipv4/ipvs/ip_vs_wrr.c
@@ -0,0 +1,235 @@
+/*
+ * IPVS:        Weighted Round-Robin Scheduling module
+ *
+ * Version:     $Id: ip_vs_wrr.c,v 1.12 2002/09/15 08:14:08 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *     Wensong Zhang            :     changed the ip_vs_wrr_schedule to return dest
+ *     Wensong Zhang            :     changed some comestics things for debugging
+ *     Wensong Zhang            :     changed for the d-linked destination list
+ *     Wensong Zhang            :     added the ip_vs_wrr_update_svc
+ *     Julian Anastasov         :     fixed the bug of returning destination
+ *                                    with weight 0 when all weights are zero
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+/*
+ * current destination pointer for weighted round-robin scheduling
+ */
+struct ip_vs_wrr_mark {
+	struct list_head *cl;	/* current list head */
+	int cw;			/* current weight */
+	int mw;			/* maximum weight */
+	int di;			/* decreasing interval */
+};
+
+
+/*
+ *    Get the gcd of server weights
+ */
+static int gcd(int a, int b)
+{
+	int c;
+
+	while ((c = a % b)) {
+		a = b;
+		b = c;
+	}
+	return b;
+}
+
+static int ip_vs_wrr_gcd_weight(struct ip_vs_service *svc)
+{
+	struct ip_vs_dest *dest;
+	int weight;
+	int g = 0;
+
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		weight = atomic_read(&dest->weight);
+		if (weight > 0) {
+			if (g > 0)
+				g = gcd(weight, g);
+			else
+				g = weight;
+		}
+	}
+	return g ? g : 1;
+}
+
+
+/*
+ *    Get the maximum weight of the service destinations.
+ */
+static int ip_vs_wrr_max_weight(struct ip_vs_service *svc)
+{
+	struct ip_vs_dest *dest;
+	int weight = 0;
+
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		if (atomic_read(&dest->weight) > weight)
+			weight = atomic_read(&dest->weight);
+	}
+
+	return weight;
+}
+
+
+static int ip_vs_wrr_init_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_wrr_mark *mark;
+
+	/*
+	 *    Allocate the mark variable for WRR scheduling
+	 */
+	mark = kmalloc(sizeof(struct ip_vs_wrr_mark), GFP_ATOMIC);
+	if (mark == NULL) {
+		IP_VS_ERR("ip_vs_wrr_init_svc(): no memory\n");
+		return -ENOMEM;
+	}
+	mark->cl = &svc->destinations;
+	mark->cw = 0;
+	mark->mw = ip_vs_wrr_max_weight(svc);
+	mark->di = ip_vs_wrr_gcd_weight(svc);
+	svc->sched_data = mark;
+
+	return 0;
+}
+
+
+static int ip_vs_wrr_done_svc(struct ip_vs_service *svc)
+{
+	/*
+	 *    Release the mark variable
+	 */
+	kfree(svc->sched_data);
+
+	return 0;
+}
+
+
+static int ip_vs_wrr_update_svc(struct ip_vs_service *svc)
+{
+	struct ip_vs_wrr_mark *mark = svc->sched_data;
+
+	mark->cl = &svc->destinations;
+	mark->mw = ip_vs_wrr_max_weight(svc);
+	mark->di = ip_vs_wrr_gcd_weight(svc);
+	if (mark->cw > mark->mw)
+		mark->cw = 0;
+	return 0;
+}
+
+
+/*
+ *    Weighted Round-Robin Scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+	struct ip_vs_dest *dest;
+	struct ip_vs_wrr_mark *mark = svc->sched_data;
+	struct list_head *p;
+
+	IP_VS_DBG(6, "ip_vs_wrr_schedule(): Scheduling...\n");
+
+	/*
+	 * This loop will always terminate, because mark->cw in (0, max_weight]
+	 * and at least one server has its weight equal to max_weight.
+	 */
+	write_lock(&svc->sched_lock);
+	p = mark->cl;
+	while (1) {
+		if (mark->cl == &svc->destinations) {
+			/* it is at the head of the destination list */
+
+			if (mark->cl == mark->cl->next) {
+				/* no dest entry */
+				dest = NULL;
+				goto out;
+			}
+
+			mark->cl = svc->destinations.next;
+			mark->cw -= mark->di;
+			if (mark->cw <= 0) {
+				mark->cw = mark->mw;
+				/*
+				 * Still zero, which means no available servers.
+				 */
+				if (mark->cw == 0) {
+					mark->cl = &svc->destinations;
+					IP_VS_INFO("ip_vs_wrr_schedule(): "
+						   "no available servers\n");
+					dest = NULL;
+					goto out;
+				}
+			}
+		} else
+			mark->cl = mark->cl->next;
+
+		if (mark->cl != &svc->destinations) {
+			/* not at the head of the list */
+			dest = list_entry(mark->cl, struct ip_vs_dest, n_list);
+			if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
+			    atomic_read(&dest->weight) >= mark->cw) {
+				/* got it */
+				break;
+			}
+		}
+
+		if (mark->cl == p && mark->cw == mark->di) {
+			/* back to the start, and no dest is found.
+			   It is only possible when all dests are OVERLOADED */
+			dest = NULL;
+			goto out;
+		}
+	}
+
+	IP_VS_DBG(6, "WRR: server %u.%u.%u.%u:%u "
+		  "activeconns %d refcnt %d weight %d\n",
+		  NIPQUAD(dest->addr), ntohs(dest->port),
+		  atomic_read(&dest->activeconns),
+		  atomic_read(&dest->refcnt),
+		  atomic_read(&dest->weight));
+
+  out:
+	write_unlock(&svc->sched_lock);
+	return dest;
+}
+
+
+static struct ip_vs_scheduler ip_vs_wrr_scheduler = {
+	.name =			"wrr",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.init_service =		ip_vs_wrr_init_svc,
+	.done_service =		ip_vs_wrr_done_svc,
+	.update_service =	ip_vs_wrr_update_svc,
+	.schedule =		ip_vs_wrr_schedule,
+};
+
+static int __init ip_vs_wrr_init(void)
+{
+	INIT_LIST_HEAD(&ip_vs_wrr_scheduler.n_list);
+	return register_ip_vs_scheduler(&ip_vs_wrr_scheduler) ;
+}
+
+static void __exit ip_vs_wrr_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_wrr_scheduler);
+}
+
+module_init(ip_vs_wrr_init);
+module_exit(ip_vs_wrr_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
new file mode 100644
index 00000000000..faa6176bbeb
--- /dev/null
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -0,0 +1,563 @@
+/*
+ * ip_vs_xmit.c: various packet transmitters for IPVS
+ *
+ * Version:     $Id: ip_vs_xmit.c,v 1.2 2002/11/30 01:50:35 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *              Julian Anastasov <ja@ssi.bg>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>                  /* for tcphdr */
+#include <net/tcp.h>                    /* for csum_tcpudp_magic */
+#include <net/udp.h>
+#include <net/icmp.h>                   /* for icmp_send */
+#include <net/route.h>                  /* for ip_route_output */
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+
+#include <net/ip_vs.h>
+
+
+/*
+ *      Destination cache to speed up outgoing route lookup
+ */
+static inline void
+__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst)
+{
+	struct dst_entry *old_dst;
+
+	old_dst = dest->dst_cache;
+	dest->dst_cache = dst;
+	dest->dst_rtos = rtos;
+	dst_release(old_dst);
+}
+
+static inline struct dst_entry *
+__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie)
+{
+	struct dst_entry *dst = dest->dst_cache;
+
+	if (!dst)
+		return NULL;
+	if ((dst->obsolete || rtos != dest->dst_rtos) &&
+	    dst->ops->check(dst, cookie) == NULL) {
+		dest->dst_cache = NULL;
+		dst_release(dst);
+		return NULL;
+	}
+	dst_hold(dst);
+	return dst;
+}
+
+static inline struct rtable *
+__ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
+{
+	struct rtable *rt;			/* Route to the other host */
+	struct ip_vs_dest *dest = cp->dest;
+
+	if (dest) {
+		spin_lock(&dest->dst_lock);
+		if (!(rt = (struct rtable *)
+		      __ip_vs_dst_check(dest, rtos, 0))) {
+			struct flowi fl = {
+				.oif = 0,
+				.nl_u = {
+					.ip4_u = {
+						.daddr = dest->addr,
+						.saddr = 0,
+						.tos = rtos, } },
+			};
+
+			if (ip_route_output_key(&rt, &fl)) {
+				spin_unlock(&dest->dst_lock);
+				IP_VS_DBG_RL("ip_route_output error, "
+					     "dest: %u.%u.%u.%u\n",
+					     NIPQUAD(dest->addr));
+				return NULL;
+			}
+			__ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst));
+			IP_VS_DBG(10, "new dst %u.%u.%u.%u, refcnt=%d, rtos=%X\n",
+				  NIPQUAD(dest->addr),
+				  atomic_read(&rt->u.dst.__refcnt), rtos);
+		}
+		spin_unlock(&dest->dst_lock);
+	} else {
+		struct flowi fl = {
+			.oif = 0,
+			.nl_u = {
+				.ip4_u = {
+					.daddr = cp->daddr,
+					.saddr = 0,
+					.tos = rtos, } },
+		};
+
+		if (ip_route_output_key(&rt, &fl)) {
+			IP_VS_DBG_RL("ip_route_output error, dest: "
+				     "%u.%u.%u.%u\n", NIPQUAD(cp->daddr));
+			return NULL;
+		}
+	}
+
+	return rt;
+}
+
+
+/*
+ *	Release dest->dst_cache before a dest is removed
+ */
+void
+ip_vs_dst_reset(struct ip_vs_dest *dest)
+{
+	struct dst_entry *old_dst;
+
+	old_dst = dest->dst_cache;
+	dest->dst_cache = NULL;
+	dst_release(old_dst);
+}
+
+#define IP_VS_XMIT(skb, rt)				\
+do {							\
+	nf_reset_debug(skb);				\
+	(skb)->nfcache |= NFC_IPVS_PROPERTY;		\
+	(skb)->ip_summed = CHECKSUM_NONE;		\
+	NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, (skb), NULL,	\
+		(rt)->u.dst.dev, dst_output);		\
+} while (0)
+
+
+/*
+ *      NULL transmitter (do nothing except return NF_ACCEPT)
+ */
+int
+ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
+		struct ip_vs_protocol *pp)
+{
+	/* we do not touch skb and do not need pskb ptr */
+	return NF_ACCEPT;
+}
+
+
+/*
+ *      Bypass transmitter
+ *      Let packets bypass the destination when the destination is not
+ *      available, it may be only used in transparent cache cluster.
+ */
+int
+ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
+		  struct ip_vs_protocol *pp)
+{
+	struct rtable *rt;			/* Route to the other host */
+	struct iphdr  *iph = skb->nh.iph;
+	u8     tos = iph->tos;
+	int    mtu;
+	struct flowi fl = {
+		.oif = 0,
+		.nl_u = {
+			.ip4_u = {
+				.daddr = iph->daddr,
+				.saddr = 0,
+				.tos = RT_TOS(tos), } },
+	};
+
+	EnterFunction(10);
+
+	if (ip_route_output_key(&rt, &fl)) {
+		IP_VS_DBG_RL("ip_vs_bypass_xmit(): ip_route_output error, "
+			     "dest: %u.%u.%u.%u\n", NIPQUAD(iph->daddr));
+		goto tx_error_icmp;
+	}
+
+	/* MTU checking */
+	mtu = dst_mtu(&rt->u.dst);
+	if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) {
+		ip_rt_put(rt);
+		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
+		IP_VS_DBG_RL("ip_vs_bypass_xmit(): frag needed\n");
+		goto tx_error;
+	}
+
+	/*
+	 * Call ip_send_check because we are not sure it is called
+	 * after ip_defrag. Is copy-on-write needed?
+	 */
+	if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
+		ip_rt_put(rt);
+		return NF_STOLEN;
+	}
+	ip_send_check(skb->nh.iph);
+
+	/* drop old route */
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->local_df = 1;
+
+	IP_VS_XMIT(skb, rt);
+
+	LeaveFunction(10);
+	return NF_STOLEN;
+
+ tx_error_icmp:
+	dst_link_failure(skb);
+ tx_error:
+	kfree_skb(skb);
+	LeaveFunction(10);
+	return NF_STOLEN;
+}
+
+
+/*
+ *      NAT transmitter (only for outside-to-inside nat forwarding)
+ *      Not used for related ICMP
+ */
+int
+ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
+	       struct ip_vs_protocol *pp)
+{
+	struct rtable *rt;		/* Route to the other host */
+	int mtu;
+	struct iphdr *iph = skb->nh.iph;
+
+	EnterFunction(10);
+
+	/* check if it is a connection of no-client-port */
+	if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
+		__u16 _pt, *p;
+		p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
+		if (p == NULL)
+			goto tx_error;
+		ip_vs_conn_fill_cport(cp, *p);
+		IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
+	}
+
+	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
+		goto tx_error_icmp;
+
+	/* MTU checking */
+	mtu = dst_mtu(&rt->u.dst);
+	if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) {
+		ip_rt_put(rt);
+		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
+		IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for");
+		goto tx_error;
+	}
+
+	/* copy-on-write the packet before mangling it */
+	if (!ip_vs_make_skb_writable(&skb, sizeof(struct iphdr)))
+		goto tx_error_put;
+
+	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
+		goto tx_error_put;
+
+	/* drop old route */
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	/* mangle the packet */
+	if (pp->dnat_handler && !pp->dnat_handler(&skb, pp, cp))
+		goto tx_error;
+	skb->nh.iph->daddr = cp->daddr;
+	ip_send_check(skb->nh.iph);
+
+	IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
+
+	/* FIXME: when application helper enlarges the packet and the length
+	   is larger than the MTU of outgoing device, there will be still
+	   MTU problem. */
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->local_df = 1;
+
+	IP_VS_XMIT(skb, rt);
+
+	LeaveFunction(10);
+	return NF_STOLEN;
+
+  tx_error_icmp:
+	dst_link_failure(skb);
+  tx_error:
+	LeaveFunction(10);
+	kfree_skb(skb);
+	return NF_STOLEN;
+  tx_error_put:
+	ip_rt_put(rt);
+	goto tx_error;
+}
+
+
+/*
+ *   IP Tunneling transmitter
+ *
+ *   This function encapsulates the packet in a new IP packet, its
+ *   destination will be set to cp->daddr. Most code of this function
+ *   is taken from ipip.c.
+ *
+ *   It is used in VS/TUN cluster. The load balancer selects a real
+ *   server from a cluster based on a scheduling algorithm,
+ *   encapsulates the request packet and forwards it to the selected
+ *   server. For example, all real servers are configured with
+ *   "ifconfig tunl0 <Virtual IP Address> up". When the server receives
+ *   the encapsulated packet, it will decapsulate the packet, processe
+ *   the request and return the response packets directly to the client
+ *   without passing the load balancer. This can greatly increase the
+ *   scalability of virtual server.
+ *
+ *   Used for ANY protocol
+ */
+int
+ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
+		  struct ip_vs_protocol *pp)
+{
+	struct rtable *rt;			/* Route to the other host */
+	struct net_device *tdev;		/* Device to other host */
+	struct iphdr  *old_iph = skb->nh.iph;
+	u8     tos = old_iph->tos;
+	u16    df = old_iph->frag_off;
+	struct iphdr  *iph;			/* Our new IP header */
+	int    max_headroom;			/* The extra header space needed */
+	int    mtu;
+
+	EnterFunction(10);
+
+	if (skb->protocol != __constant_htons(ETH_P_IP)) {
+		IP_VS_DBG_RL("ip_vs_tunnel_xmit(): protocol error, "
+			     "ETH_P_IP: %d, skb protocol: %d\n",
+			     __constant_htons(ETH_P_IP), skb->protocol);
+		goto tx_error;
+	}
+
+	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos))))
+		goto tx_error_icmp;
+
+	tdev = rt->u.dst.dev;
+
+	mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
+	if (mtu < 68) {
+		ip_rt_put(rt);
+		IP_VS_DBG_RL("ip_vs_tunnel_xmit(): mtu less than 68\n");
+		goto tx_error;
+	}
+	if (skb->dst)
+		skb->dst->ops->update_pmtu(skb->dst, mtu);
+
+	df |= (old_iph->frag_off&__constant_htons(IP_DF));
+
+	if ((old_iph->frag_off&__constant_htons(IP_DF))
+	    && mtu < ntohs(old_iph->tot_len)) {
+		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
+		ip_rt_put(rt);
+		IP_VS_DBG_RL("ip_vs_tunnel_xmit(): frag needed\n");
+		goto tx_error;
+	}
+
+	/*
+	 * Okay, now see if we can stuff it in the buffer as-is.
+	 */
+	max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
+
+	if (skb_headroom(skb) < max_headroom
+	    || skb_cloned(skb) || skb_shared(skb)) {
+		struct sk_buff *new_skb =
+			skb_realloc_headroom(skb, max_headroom);
+		if (!new_skb) {
+			ip_rt_put(rt);
+			kfree_skb(skb);
+			IP_VS_ERR_RL("ip_vs_tunnel_xmit(): no memory\n");
+			return NF_STOLEN;
+		}
+		kfree_skb(skb);
+		skb = new_skb;
+		old_iph = skb->nh.iph;
+	}
+
+	skb->h.raw = (void *) old_iph;
+
+	/* fix old IP header checksum */
+	ip_send_check(old_iph);
+
+	skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
+	/* drop old route */
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	/*
+	 *	Push down and install the IPIP header.
+	 */
+	iph			=	skb->nh.iph;
+	iph->version		=	4;
+	iph->ihl		=	sizeof(struct iphdr)>>2;
+	iph->frag_off		=	df;
+	iph->protocol		=	IPPROTO_IPIP;
+	iph->tos		=	tos;
+	iph->daddr		=	rt->rt_dst;
+	iph->saddr		=	rt->rt_src;
+	iph->ttl		=	old_iph->ttl;
+	iph->tot_len		=	htons(skb->len);
+	ip_select_ident(iph, &rt->u.dst, NULL);
+	ip_send_check(iph);
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->local_df = 1;
+
+	IP_VS_XMIT(skb, rt);
+
+	LeaveFunction(10);
+
+	return NF_STOLEN;
+
+  tx_error_icmp:
+	dst_link_failure(skb);
+  tx_error:
+	kfree_skb(skb);
+	LeaveFunction(10);
+	return NF_STOLEN;
+}
+
+
+/*
+ *      Direct Routing transmitter
+ *      Used for ANY protocol
+ */
+int
+ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
+	      struct ip_vs_protocol *pp)
+{
+	struct rtable *rt;			/* Route to the other host */
+	struct iphdr  *iph = skb->nh.iph;
+	int    mtu;
+
+	EnterFunction(10);
+
+	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
+		goto tx_error_icmp;
+
+	/* MTU checking */
+	mtu = dst_mtu(&rt->u.dst);
+	if ((iph->frag_off&__constant_htons(IP_DF)) && skb->len > mtu) {
+		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
+		ip_rt_put(rt);
+		IP_VS_DBG_RL("ip_vs_dr_xmit(): frag needed\n");
+		goto tx_error;
+	}
+
+	/*
+	 * Call ip_send_check because we are not sure it is called
+	 * after ip_defrag. Is copy-on-write needed?
+	 */
+	if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
+		ip_rt_put(rt);
+		return NF_STOLEN;
+	}
+	ip_send_check(skb->nh.iph);
+
+	/* drop old route */
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->local_df = 1;
+
+	IP_VS_XMIT(skb, rt);
+
+	LeaveFunction(10);
+	return NF_STOLEN;
+
+  tx_error_icmp:
+	dst_link_failure(skb);
+  tx_error:
+	kfree_skb(skb);
+	LeaveFunction(10);
+	return NF_STOLEN;
+}
+
+
+/*
+ *	ICMP packet transmitter
+ *	called by the ip_vs_in_icmp
+ */
+int
+ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
+		struct ip_vs_protocol *pp, int offset)
+{
+	struct rtable	*rt;	/* Route to the other host */
+	int mtu;
+	int rc;
+
+	EnterFunction(10);
+
+	/* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
+	   forwarded directly here, because there is no need to
+	   translate address/port back */
+	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
+		if (cp->packet_xmit)
+			rc = cp->packet_xmit(skb, cp, pp);
+		else
+			rc = NF_ACCEPT;
+		/* do not touch skb anymore */
+		atomic_inc(&cp->in_pkts);
+		__ip_vs_conn_put(cp);
+		goto out;
+	}
+
+	/*
+	 * mangle and send the packet here (only for VS/NAT)
+	 */
+
+	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(skb->nh.iph->tos))))
+		goto tx_error_icmp;
+
+	/* MTU checking */
+	mtu = dst_mtu(&rt->u.dst);
+	if ((skb->len > mtu) && (skb->nh.iph->frag_off&__constant_htons(IP_DF))) {
+		ip_rt_put(rt);
+		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
+		IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
+		goto tx_error;
+	}
+
+	/* copy-on-write the packet before mangling it */
+	if (!ip_vs_make_skb_writable(&skb, offset))
+		goto tx_error_put;
+
+	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
+		goto tx_error_put;
+
+	/* drop the old route when skb is not shared */
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	ip_vs_nat_icmp(skb, pp, cp, 0);
+
+	/* Another hack: avoid icmp_send in ip_fragment */
+	skb->local_df = 1;
+
+	IP_VS_XMIT(skb, rt);
+
+	rc = NF_STOLEN;
+	goto out;
+
+  tx_error_icmp:
+	dst_link_failure(skb);
+  tx_error:
+	dev_kfree_skb(skb);
+	rc = NF_STOLEN;
+  out:
+	LeaveFunction(10);
+	return rc;
+  tx_error_put:
+	ip_rt_put(rt);
+	goto tx_error;
+}
diff --git a/net/ipv4/multipath.c b/net/ipv4/multipath.c
new file mode 100644
index 00000000000..4e9ca7c7640
--- /dev/null
+++ b/net/ipv4/multipath.c
@@ -0,0 +1,55 @@
+/* multipath.c: IPV4 multipath algorithm support.
+ *
+ * Copyright (C) 2004, 2005 Einar Lueck <elueck@de.ibm.com>
+ * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/spinlock.h>
+
+#include <net/ip_mp_alg.h>
+
+static DEFINE_SPINLOCK(alg_table_lock);
+struct ip_mp_alg_ops *ip_mp_alg_table[IP_MP_ALG_MAX + 1];
+
+int multipath_alg_register(struct ip_mp_alg_ops *ops, enum ip_mp_alg n)
+{
+	struct ip_mp_alg_ops **slot;
+	int err;
+
+	if (n < IP_MP_ALG_NONE || n > IP_MP_ALG_MAX ||
+	    !ops->mp_alg_select_route)
+		return -EINVAL;
+
+	spin_lock(&alg_table_lock);
+	slot = &ip_mp_alg_table[n];
+	if (*slot != NULL) {
+		err = -EBUSY;
+	} else {
+		*slot = ops;
+		err = 0;
+	}
+	spin_unlock(&alg_table_lock);
+
+	return err;
+}
+EXPORT_SYMBOL(multipath_alg_register);
+
+void multipath_alg_unregister(struct ip_mp_alg_ops *ops, enum ip_mp_alg n)
+{
+	struct ip_mp_alg_ops **slot;
+
+	if (n < IP_MP_ALG_NONE || n > IP_MP_ALG_MAX)
+		return;
+
+	spin_lock(&alg_table_lock);
+	slot = &ip_mp_alg_table[n];
+	if (*slot == ops)
+		*slot = NULL;
+	spin_unlock(&alg_table_lock);
+
+	synchronize_net();
+}
+EXPORT_SYMBOL(multipath_alg_unregister);
diff --git a/net/ipv4/multipath_drr.c b/net/ipv4/multipath_drr.c
new file mode 100644
index 00000000000..9349686131f
--- /dev/null
+++ b/net/ipv4/multipath_drr.c
@@ -0,0 +1,265 @@
+/*
+ *              Device round robin policy for multipath.
+ *
+ *
+ * Version:	$Id: multipath_drr.c,v 1.1.2.1 2004/09/16 07:42:34 elueck Exp $
+ *
+ * Authors:	Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/fcntl.h>
+#include <linux/stat.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/igmp.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/mroute.h>
+#include <linux/init.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/raw.h>
+#include <linux/notifier.h>
+#include <linux/if_arp.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/ipip.h>
+#include <net/checksum.h>
+#include <net/ip_mp_alg.h>
+
+struct multipath_device {
+	int		ifi; /* interface index of device */
+	atomic_t	usecount;
+	int 		allocated;
+};
+
+#define MULTIPATH_MAX_DEVICECANDIDATES 10
+
+static struct multipath_device state[MULTIPATH_MAX_DEVICECANDIDATES];
+static DEFINE_SPINLOCK(state_lock);
+static struct rtable *last_selection = NULL;
+
+static int inline __multipath_findslot(void)
+{
+	int i;
+
+	for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++) {
+		if (state[i].allocated == 0)
+			return i;
+	}
+	return -1;
+}
+
+static int inline __multipath_finddev(int ifindex)
+{
+	int i;
+
+	for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++) {
+		if (state[i].allocated != 0 &&
+		    state[i].ifi == ifindex)
+			return i;
+	}
+	return -1;
+}
+
+static int drr_dev_event(struct notifier_block *this,
+			 unsigned long event, void *ptr)
+{
+	struct net_device *dev = ptr;
+	int devidx;
+
+	switch (event) {
+	case NETDEV_UNREGISTER:
+	case NETDEV_DOWN:
+		spin_lock_bh(&state_lock);
+
+		devidx = __multipath_finddev(dev->ifindex);
+		if (devidx != -1) {
+			state[devidx].allocated = 0;
+			state[devidx].ifi = 0;
+			atomic_set(&state[devidx].usecount, 0);
+		}
+
+		spin_unlock_bh(&state_lock);
+		break;
+	};
+
+	return NOTIFY_DONE;
+}
+
+struct notifier_block drr_dev_notifier = {
+	.notifier_call	= drr_dev_event,
+};
+
+static void drr_remove(struct rtable *rt)
+{
+	if (last_selection == rt)
+		last_selection = NULL;
+}
+
+static void drr_safe_inc(atomic_t *usecount)
+{
+	int n;
+
+	atomic_inc(usecount);
+
+	n = atomic_read(usecount);
+	if (n <= 0) {
+		int i;
+
+		spin_lock_bh(&state_lock);
+
+		for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++)
+			atomic_set(&state[i].usecount, 0);
+
+		spin_unlock_bh(&state_lock);
+	}
+}
+
+static void drr_select_route(const struct flowi *flp,
+			     struct rtable *first, struct rtable **rp)
+{
+	struct rtable *nh, *result, *cur_min;
+	int min_usecount = -1; 
+	int devidx = -1;
+	int cur_min_devidx = -1;
+
+       	/* if necessary and possible utilize the old alternative */
+	if ((flp->flags & FLOWI_FLAG_MULTIPATHOLDROUTE) != 0 &&
+	    last_selection != NULL) {
+		result = last_selection;
+		*rp = result;
+		return;
+	}
+
+	/* 1. make sure all alt. nexthops have the same GC related data */
+	/* 2. determine the new candidate to be returned */
+	result = NULL;
+	cur_min = NULL;
+	for (nh = rcu_dereference(first); nh;
+	     nh = rcu_dereference(nh->u.rt_next)) {
+		if ((nh->u.dst.flags & DST_BALANCED) != 0 &&
+		    multipath_comparekeys(&nh->fl, flp)) {
+			int nh_ifidx = nh->u.dst.dev->ifindex;
+
+			nh->u.dst.lastuse = jiffies;
+			nh->u.dst.__use++;
+			if (result != NULL)
+				continue;
+
+			/* search for the output interface */
+
+			/* this is not SMP safe, only add/remove are
+			 * SMP safe as wrong usecount updates have no big
+			 * impact
+			 */
+			devidx = __multipath_finddev(nh_ifidx);
+			if (devidx == -1) {
+				/* add the interface to the array 
+				 * SMP safe
+				 */
+				spin_lock_bh(&state_lock);
+
+				/* due to SMP: search again */
+				devidx = __multipath_finddev(nh_ifidx);
+				if (devidx == -1) {
+					/* add entry for device */
+					devidx = __multipath_findslot();
+					if (devidx == -1) {
+						/* unlikely but possible */
+						continue;
+					}
+
+					state[devidx].allocated = 1;
+					state[devidx].ifi = nh_ifidx;
+					atomic_set(&state[devidx].usecount, 0);
+					min_usecount = 0;
+				}
+
+				spin_unlock_bh(&state_lock);
+			}
+
+			if (min_usecount == 0) {
+				/* if the device has not been used it is
+				 * the primary target
+				 */
+				drr_safe_inc(&state[devidx].usecount);
+				result = nh;
+			} else {
+				int count =
+					atomic_read(&state[devidx].usecount);
+
+				if (min_usecount == -1 ||
+				    count < min_usecount) {
+					cur_min = nh;
+					cur_min_devidx = devidx;
+					min_usecount = count;
+				}
+			}
+		}
+	}
+
+	if (!result) {
+		if (cur_min) {
+			drr_safe_inc(&state[cur_min_devidx].usecount);
+			result = cur_min;
+		} else {
+			result = first;
+		}
+	}
+
+	*rp = result;
+	last_selection = result;
+}
+
+static struct ip_mp_alg_ops drr_ops = {
+	.mp_alg_select_route	=	drr_select_route,
+	.mp_alg_remove		=	drr_remove,
+};
+
+static int __init drr_init(void)
+{
+	int err = register_netdevice_notifier(&drr_dev_notifier);
+
+	if (err)
+		return err;
+
+	err = multipath_alg_register(&drr_ops, IP_MP_ALG_RR);
+	if (err)
+		goto fail;
+
+	return 0;
+
+fail:
+	unregister_netdevice_notifier(&drr_dev_notifier);
+	return err;
+}
+
+static void __exit drr_exit(void)
+{
+	unregister_netdevice_notifier(&drr_dev_notifier);
+	multipath_alg_unregister(&drr_ops, IP_MP_ALG_DRR);
+}
+
+module_init(drr_init);
+module_exit(drr_exit);
diff --git a/net/ipv4/multipath_random.c b/net/ipv4/multipath_random.c
new file mode 100644
index 00000000000..805a16e47de
--- /dev/null
+++ b/net/ipv4/multipath_random.c
@@ -0,0 +1,128 @@
+/*
+ *              Random policy for multipath.
+ *
+ *
+ * Version:	$Id: multipath_random.c,v 1.1.2.3 2004/09/21 08:42:11 elueck Exp $
+ *
+ * Authors:	Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/fcntl.h>
+#include <linux/stat.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/igmp.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/mroute.h>
+#include <linux/init.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/raw.h>
+#include <linux/notifier.h>
+#include <linux/if_arp.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/ipip.h>
+#include <net/checksum.h>
+#include <net/ip_mp_alg.h>
+
+#define MULTIPATH_MAX_CANDIDATES 40
+
+/* interface to random number generation */
+static unsigned int RANDOM_SEED = 93186752;
+
+static inline unsigned int random(unsigned int ubound)
+{
+	static unsigned int a = 1588635695,
+		q = 2,
+		r = 1117695901;
+
+	RANDOM_SEED = a*(RANDOM_SEED % q) - r*(RANDOM_SEED / q);
+
+	return RANDOM_SEED % ubound;
+}
+
+
+static void random_select_route(const struct flowi *flp,
+				struct rtable *first,
+				struct rtable **rp)
+{
+	struct rtable *rt;
+	struct rtable *decision;
+	unsigned char candidate_count = 0;
+
+	/* count all candidate */
+	for (rt = rcu_dereference(first); rt;
+	     rt = rcu_dereference(rt->u.rt_next)) {
+		if ((rt->u.dst.flags & DST_BALANCED) != 0 &&
+		    multipath_comparekeys(&rt->fl, flp))
+			++candidate_count;
+	}
+
+	/* choose a random candidate */
+	decision = first;
+	if (candidate_count > 1) {
+		unsigned char i = 0;
+		unsigned char candidate_no = (unsigned char)
+			random(candidate_count);
+
+		/* find chosen candidate and adjust GC data for all candidates
+		 * to ensure they stay in cache
+		 */
+		for (rt = first; rt; rt = rt->u.rt_next) {
+			if ((rt->u.dst.flags & DST_BALANCED) != 0 &&
+			    multipath_comparekeys(&rt->fl, flp)) {
+				rt->u.dst.lastuse = jiffies;
+
+				if (i == candidate_no)
+					decision = rt;
+
+				if (i >= candidate_count)
+					break;
+
+				i++;
+			}
+		}
+	}
+
+	decision->u.dst.__use++;
+	*rp = decision;
+}
+
+static struct ip_mp_alg_ops random_ops = {
+	.mp_alg_select_route	=	random_select_route,
+};
+
+static int __init random_init(void)
+{
+	return multipath_alg_register(&random_ops, IP_MP_ALG_RANDOM);
+}
+
+static void __exit random_exit(void)
+{
+	multipath_alg_unregister(&random_ops, IP_MP_ALG_RANDOM);
+}
+
+module_init(random_init);
+module_exit(random_exit);
diff --git a/net/ipv4/multipath_rr.c b/net/ipv4/multipath_rr.c
new file mode 100644
index 00000000000..554a8256816
--- /dev/null
+++ b/net/ipv4/multipath_rr.c
@@ -0,0 +1,115 @@
+/*
+ *              Round robin policy for multipath.
+ *
+ *
+ * Version:	$Id: multipath_rr.c,v 1.1.2.2 2004/09/16 07:42:34 elueck Exp $
+ *
+ * Authors:	Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/fcntl.h>
+#include <linux/stat.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/igmp.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/mroute.h>
+#include <linux/init.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/raw.h>
+#include <linux/notifier.h>
+#include <linux/if_arp.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/ipip.h>
+#include <net/checksum.h>
+#include <net/ip_mp_alg.h>
+
+#define MULTIPATH_MAX_CANDIDATES 40
+
+static struct rtable* last_used = NULL;
+
+static void rr_remove(struct rtable *rt)
+{
+	if (last_used == rt)
+		last_used = NULL;
+}
+
+static void rr_select_route(const struct flowi *flp,
+			    struct rtable *first, struct rtable **rp)
+{
+	struct rtable *nh, *result, *min_use_cand = NULL;
+	int min_use = -1;
+
+	/* if necessary and possible utilize the old alternative */
+	if ((flp->flags & FLOWI_FLAG_MULTIPATHOLDROUTE) != 0 &&
+	    last_used != NULL) {
+		result = last_used;
+		goto out;
+	}
+
+	/* 1. make sure all alt. nexthops have the same GC related data
+	 * 2. determine the new candidate to be returned
+	 */
+	result = NULL;
+	for (nh = rcu_dereference(first); nh;
+ 	     nh = rcu_dereference(nh->u.rt_next)) {
+		if ((nh->u.dst.flags & DST_BALANCED) != 0 &&
+		    multipath_comparekeys(&nh->fl, flp)) {
+			nh->u.dst.lastuse = jiffies;
+
+			if (min_use == -1 || nh->u.dst.__use < min_use) {
+				min_use = nh->u.dst.__use;
+				min_use_cand = nh;
+			}
+		}
+	}
+	result = min_use_cand;
+	if (!result)
+		result = first;
+
+out:
+	last_used = result;
+	result->u.dst.__use++;
+	*rp = result;
+}
+
+static struct ip_mp_alg_ops rr_ops = {
+	.mp_alg_select_route	=	rr_select_route,
+	.mp_alg_remove		=	rr_remove,
+};
+
+static int __init rr_init(void)
+{
+	return multipath_alg_register(&rr_ops, IP_MP_ALG_RR);
+}
+
+static void __exit rr_exit(void)
+{
+	multipath_alg_unregister(&rr_ops, IP_MP_ALG_RR);
+}
+
+module_init(rr_init);
+module_exit(rr_exit);
diff --git a/net/ipv4/multipath_wrandom.c b/net/ipv4/multipath_wrandom.c
new file mode 100644
index 00000000000..10b23e1bece
--- /dev/null
+++ b/net/ipv4/multipath_wrandom.c
@@ -0,0 +1,344 @@
+/*
+ *              Weighted random policy for multipath.
+ *
+ *
+ * Version:	$Id: multipath_wrandom.c,v 1.1.2.3 2004/09/22 07:51:40 elueck Exp $
+ *
+ * Authors:	Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/fcntl.h>
+#include <linux/stat.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/igmp.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/mroute.h>
+#include <linux/init.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/raw.h>
+#include <linux/notifier.h>
+#include <linux/if_arp.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/ipip.h>
+#include <net/checksum.h>
+#include <net/ip_fib.h>
+#include <net/ip_mp_alg.h>
+
+#define MULTIPATH_STATE_SIZE 15
+
+struct multipath_candidate {
+	struct multipath_candidate	*next;
+	int				power;
+	struct rtable			*rt;
+};
+
+struct multipath_dest {
+	struct list_head	list;
+
+	const struct fib_nh	*nh_info;
+	__u32			netmask;
+	__u32			network;
+	unsigned char		prefixlen;
+
+	struct rcu_head		rcu;
+};
+
+struct multipath_bucket {
+	struct list_head	head;
+	spinlock_t		lock;
+};
+
+struct multipath_route {
+	struct list_head	list;
+
+	int			oif;
+	__u32			gw;
+	struct list_head	dests;
+
+	struct rcu_head		rcu;
+};
+
+/* state: primarily weight per route information */
+static struct multipath_bucket state[MULTIPATH_STATE_SIZE];
+
+/* interface to random number generation */
+static unsigned int RANDOM_SEED = 93186752;
+
+static inline unsigned int random(unsigned int ubound)
+{
+	static unsigned int a = 1588635695,
+		q = 2,
+		r = 1117695901;
+	RANDOM_SEED = a*(RANDOM_SEED % q) - r*(RANDOM_SEED / q);
+	return RANDOM_SEED % ubound;
+}
+
+static unsigned char __multipath_lookup_weight(const struct flowi *fl,
+					       const struct rtable *rt)
+{
+	const int state_idx = rt->idev->dev->ifindex % MULTIPATH_STATE_SIZE;
+	struct multipath_route *r;
+	struct multipath_route *target_route = NULL;
+	struct multipath_dest *d;
+	int weight = 1;
+
+	/* lookup the weight information for a certain route */
+	rcu_read_lock();
+
+	/* find state entry for gateway or add one if necessary */
+	list_for_each_entry_rcu(r, &state[state_idx].head, list) {
+		if (r->gw == rt->rt_gateway &&
+		    r->oif == rt->idev->dev->ifindex) {
+			target_route = r;
+			break;
+		}
+	}
+
+	if (!target_route) {
+		/* this should not happen... but we are prepared */
+		printk( KERN_CRIT"%s: missing state for gateway: %u and " \
+			"device %d\n", __FUNCTION__, rt->rt_gateway,
+			rt->idev->dev->ifindex);
+		goto out;
+	}
+
+	/* find state entry for destination */
+	list_for_each_entry_rcu(d, &target_route->dests, list) {
+		__u32 targetnetwork = fl->fl4_dst & 
+			(0xFFFFFFFF >> (32 - d->prefixlen));
+
+		if ((targetnetwork & d->netmask) == d->network) {
+			weight = d->nh_info->nh_weight;
+			goto out;
+		}
+	}
+
+out:
+	rcu_read_unlock();
+	return weight;
+}
+
+static void wrandom_init_state(void) 
+{
+	int i;
+
+	for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) {
+		INIT_LIST_HEAD(&state[i].head);
+		spin_lock_init(&state[i].lock);
+	}
+}
+
+static void wrandom_select_route(const struct flowi *flp,
+				 struct rtable *first,
+				 struct rtable **rp)
+{
+	struct rtable *rt;
+	struct rtable *decision;
+	struct multipath_candidate *first_mpc = NULL;
+	struct multipath_candidate *mpc, *last_mpc = NULL;
+	int power = 0;
+	int last_power;
+	int selector;
+	const size_t size_mpc = sizeof(struct multipath_candidate);
+
+	/* collect all candidates and identify their weights */
+	for (rt = rcu_dereference(first); rt;
+	     rt = rcu_dereference(rt->u.rt_next)) {
+		if ((rt->u.dst.flags & DST_BALANCED) != 0 &&
+		    multipath_comparekeys(&rt->fl, flp)) {
+			struct multipath_candidate* mpc =
+				(struct multipath_candidate*)
+				kmalloc(size_mpc, GFP_KERNEL);
+
+			if (!mpc)
+				return;
+
+			power += __multipath_lookup_weight(flp, rt) * 10000;
+
+			mpc->power = power;
+			mpc->rt = rt;
+			mpc->next = NULL;
+
+			if (!first_mpc)
+				first_mpc = mpc;
+			else
+				last_mpc->next = mpc;
+
+			last_mpc = mpc;
+		}
+	}
+
+	/* choose a weighted random candidate */
+	decision = first;
+	selector = random(power);
+	last_power = 0;
+
+	/* select candidate, adjust GC data and cleanup local state */
+	decision = first;
+	last_mpc = NULL;
+	for (mpc = first_mpc; mpc; mpc = mpc->next) {
+		mpc->rt->u.dst.lastuse = jiffies;
+		if (last_power <= selector && selector < mpc->power)
+			decision = mpc->rt;
+
+		last_power = mpc->power;
+		if (last_mpc)
+			kfree(last_mpc);
+
+		last_mpc = mpc;
+	}
+
+	if (last_mpc) {
+		/* concurrent __multipath_flush may lead to !last_mpc */
+		kfree(last_mpc);
+	}
+
+	decision->u.dst.__use++;
+	*rp = decision;
+}
+
+static void wrandom_set_nhinfo(__u32 network,
+			       __u32 netmask,
+			       unsigned char prefixlen,
+			       const struct fib_nh *nh)
+{
+	const int state_idx = nh->nh_oif % MULTIPATH_STATE_SIZE;
+	struct multipath_route *r, *target_route = NULL;
+	struct multipath_dest *d, *target_dest = NULL;
+
+	/* store the weight information for a certain route */
+	spin_lock(&state[state_idx].lock);
+
+	/* find state entry for gateway or add one if necessary */
+	list_for_each_entry_rcu(r, &state[state_idx].head, list) {
+		if (r->gw == nh->nh_gw && r->oif == nh->nh_oif) {
+			target_route = r;
+			break;
+		}
+	}
+
+	if (!target_route) {
+		const size_t size_rt = sizeof(struct multipath_route);
+		target_route = (struct multipath_route *)
+			kmalloc(size_rt, GFP_KERNEL);
+
+		target_route->gw = nh->nh_gw;
+		target_route->oif = nh->nh_oif;
+		memset(&target_route->rcu, 0, sizeof(struct rcu_head));
+		INIT_LIST_HEAD(&target_route->dests);
+
+		list_add_rcu(&target_route->list, &state[state_idx].head);
+	}
+
+	/* find state entry for destination or add one if necessary */
+	list_for_each_entry_rcu(d, &target_route->dests, list) {
+		if (d->nh_info == nh) {
+			target_dest = d;
+			break;
+		}
+	}
+
+	if (!target_dest) {
+		const size_t size_dst = sizeof(struct multipath_dest);
+		target_dest = (struct multipath_dest*)
+			kmalloc(size_dst, GFP_KERNEL);
+
+		target_dest->nh_info = nh;
+		target_dest->network = network;
+		target_dest->netmask = netmask;
+		target_dest->prefixlen = prefixlen;
+		memset(&target_dest->rcu, 0, sizeof(struct rcu_head));
+
+		list_add_rcu(&target_dest->list, &target_route->dests);
+	}
+	/* else: we already stored this info for another destination =>
+	 * we are finished
+	 */
+
+	spin_unlock(&state[state_idx].lock);
+}
+
+static void __multipath_free(struct rcu_head *head)
+{
+	struct multipath_route *rt = container_of(head, struct multipath_route,
+						  rcu);
+	kfree(rt);
+}
+
+static void __multipath_free_dst(struct rcu_head *head)
+{
+  	struct multipath_dest *dst = container_of(head,
+						  struct multipath_dest,
+						  rcu);
+	kfree(dst);
+}
+
+static void wrandom_flush(void)
+{
+	int i;
+
+	/* defere delete to all entries */
+	for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) {
+		struct multipath_route *r;
+
+		spin_lock(&state[i].lock);
+		list_for_each_entry_rcu(r, &state[i].head, list) {
+			struct multipath_dest *d;
+			list_for_each_entry_rcu(d, &r->dests, list) {
+				list_del_rcu(&d->list);
+				call_rcu(&d->rcu,
+					 __multipath_free_dst);
+			}
+			list_del_rcu(&r->list);
+			call_rcu(&r->rcu,
+				 __multipath_free);
+		}
+
+		spin_unlock(&state[i].lock);
+	}
+}
+
+static struct ip_mp_alg_ops wrandom_ops = {
+	.mp_alg_select_route	=	wrandom_select_route,
+	.mp_alg_flush		=	wrandom_flush,
+	.mp_alg_set_nhinfo	=	wrandom_set_nhinfo,
+};
+
+static int __init wrandom_init(void)
+{
+	wrandom_init_state();
+
+	return multipath_alg_register(&wrandom_ops, IP_MP_ALG_WRANDOM);
+}
+
+static void __exit wrandom_exit(void)
+{
+	multipath_alg_unregister(&wrandom_ops, IP_MP_ALG_WRANDOM);
+}
+
+module_init(wrandom_init);
+module_exit(wrandom_exit);
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
new file mode 100644
index 00000000000..46d4cb1c06f
--- /dev/null
+++ b/net/ipv4/netfilter/Kconfig
@@ -0,0 +1,696 @@
+#
+# IP netfilter configuration
+#
+
+menu "IP: Netfilter Configuration"
+	depends on INET && NETFILTER
+
+# connection tracking, helpers and protocols
+config IP_NF_CONNTRACK
+	tristate "Connection tracking (required for masq/NAT)"
+	---help---
+	  Connection tracking keeps a record of what packets have passed
+	  through your machine, in order to figure out how they are related
+	  into connections.
+
+	  This is required to do Masquerading or other kinds of Network
+	  Address Translation (except for Fast NAT).  It can also be used to
+	  enhance packet filtering (see `Connection state match support'
+	  below).
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_CT_ACCT
+	bool "Connection tracking flow accounting"
+	depends on IP_NF_CONNTRACK
+	help
+	  If this option is enabled, the connection tracking code will
+	  keep per-flow packet and byte counters.
+
+	  Those counters can be used for flow-based accounting or the
+	  `connbytes' match.
+
+	  If unsure, say `N'.
+
+config IP_NF_CONNTRACK_MARK
+	bool  'Connection mark tracking support'
+	help
+	  This option enables support for connection marks, used by the
+	  `CONNMARK' target and `connmark' match. Similar to the mark value
+	  of packets, but this mark value is kept in the conntrack session
+	  instead of the individual packets.
+	
+config IP_NF_CT_PROTO_SCTP
+	tristate  'SCTP protocol connection tracking support (EXPERIMENTAL)'
+	depends on IP_NF_CONNTRACK && EXPERIMENTAL
+	help
+	  With this option enabled, the connection tracking code will
+	  be able to do state tracking on SCTP connections.
+
+	  If you want to compile it as a module, say M here and read
+	  <file:Documentation/modules.txt>.  If unsure, say `N'.
+
+config IP_NF_FTP
+	tristate "FTP protocol support"
+	depends on IP_NF_CONNTRACK
+	help
+	  Tracking FTP connections is problematic: special helpers are
+	  required for tracking them, and doing masquerading and other forms
+	  of Network Address Translation on them.
+
+	  To compile it as a module, choose M here.  If unsure, say Y.
+
+config IP_NF_IRC
+	tristate "IRC protocol support"
+	depends on IP_NF_CONNTRACK
+	---help---
+	  There is a commonly-used extension to IRC called
+	  Direct Client-to-Client Protocol (DCC).  This enables users to send
+	  files to each other, and also chat to each other without the need
+	  of a server.  DCC Sending is used anywhere you send files over IRC,
+	  and DCC Chat is most commonly used by Eggdrop bots.  If you are
+	  using NAT, this extension will enable you to send files and initiate
+	  chats.  Note that you do NOT need this extension to get files or
+	  have others initiate chats, or everything else in IRC.
+
+	  To compile it as a module, choose M here.  If unsure, say Y.
+
+config IP_NF_TFTP
+	tristate "TFTP protocol support"
+	depends on IP_NF_CONNTRACK
+	help
+	  TFTP connection tracking helper, this is required depending
+	  on how restrictive your ruleset is.
+	  If you are using a tftp client behind -j SNAT or -j MASQUERADING
+	  you will need this.
+
+	  To compile it as a module, choose M here.  If unsure, say Y.
+
+config IP_NF_AMANDA
+	tristate "Amanda backup protocol support"
+	depends on IP_NF_CONNTRACK
+	help
+	  If you are running the Amanda backup package <http://www.amanda.org/>
+	  on this machine or machines that will be MASQUERADED through this
+	  machine, then you may want to enable this feature.  This allows the
+	  connection tracking and natting code to allow the sub-channels that
+	  Amanda requires for communication of the backup data, messages and
+	  index.
+
+	  To compile it as a module, choose M here.  If unsure, say Y.
+
+config IP_NF_QUEUE
+	tristate "Userspace queueing via NETLINK"
+	help
+	  Netfilter has the ability to queue packets to user space: the
+	  netlink device can be used to access them using this driver.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_IPTABLES
+	tristate "IP tables support (required for filtering/masq/NAT)"
+	help
+	  iptables is a general, extensible packet identification framework.
+	  The packet filtering and full NAT (masquerading, port forwarding,
+	  etc) subsystems now use this: say `Y' or `M' here if you want to use
+	  either of those.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+# The matches.
+config IP_NF_MATCH_LIMIT
+	tristate "limit match support"
+	depends on IP_NF_IPTABLES
+	help
+	  limit matching allows you to control the rate at which a rule can be
+	  matched: mainly useful in combination with the LOG target ("LOG
+	  target support", below) and to avoid some Denial of Service attacks.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_MATCH_IPRANGE
+	tristate "IP range match support"
+	depends on IP_NF_IPTABLES
+	help
+	  This option makes possible to match IP addresses against IP address
+	  ranges.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_MATCH_MAC
+	tristate "MAC address match support"
+	depends on IP_NF_IPTABLES
+	help
+	  MAC matching allows you to match packets based on the source
+	  Ethernet address of the packet.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_MATCH_PKTTYPE
+	tristate "Packet type match support"
+	depends on IP_NF_IPTABLES
+	help
+         Packet type matching allows you to match a packet by
+         its "class", eg. BROADCAST, MULTICAST, ...
+
+	  Typical usage:
+	  iptables -A INPUT -m pkttype --pkt-type broadcast -j LOG
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_MATCH_MARK
+	tristate "netfilter MARK match support"
+	depends on IP_NF_IPTABLES
+	help
+	  Netfilter mark matching allows you to match packets based on the
+	  `nfmark' value in the packet.  This can be set by the MARK target
+	  (see below).
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_MATCH_MULTIPORT
+	tristate "Multiple port match support"
+	depends on IP_NF_IPTABLES
+	help
+	  Multiport matching allows you to match TCP or UDP packets based on
+	  a series of source or destination ports: normally a rule can only
+	  match a single range of ports.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_MATCH_TOS
+	tristate "TOS match support"
+	depends on IP_NF_IPTABLES
+	help
+	  TOS matching allows you to match packets based on the Type Of
+	  Service fields of the IP packet.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_MATCH_RECENT
+	tristate "recent match support"
+	depends on IP_NF_IPTABLES
+	help
+	  This match is used for creating one or many lists of recently
+	  used addresses and then matching against that/those list(s).
+
+	  Short options are available by using 'iptables -m recent -h'
+	  Official Website: <http://snowman.net/projects/ipt_recent/>
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_MATCH_ECN
+	tristate "ECN match support"
+	depends on IP_NF_IPTABLES
+	help
+	  This option adds a `ECN' match, which allows you to match against
+	  the IPv4 and TCP header ECN fields.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_MATCH_DSCP
+	tristate "DSCP match support"
+	depends on IP_NF_IPTABLES
+	help
+	  This option adds a `DSCP' match, which allows you to match against
+	  the IPv4 header DSCP field (DSCP codepoint).
+
+	  The DSCP codepoint can have any value between 0x0 and 0x4f.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_MATCH_AH_ESP
+	tristate "AH/ESP match support"
+	depends on IP_NF_IPTABLES
+	help
+	  These two match extensions (`ah' and `esp') allow you to match a
+	  range of SPIs inside AH or ESP headers of IPSec packets.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_MATCH_LENGTH
+	tristate "LENGTH match support"
+	depends on IP_NF_IPTABLES
+	help
+	  This option allows you to match the length of a packet against a
+	  specific value or range of values.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_MATCH_TTL
+	tristate "TTL match support"
+	depends on IP_NF_IPTABLES
+	help
+	  This adds CONFIG_IP_NF_MATCH_TTL option, which enabled the user
+	  to match packets by their TTL value.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_MATCH_TCPMSS
+	tristate "tcpmss match support"
+	depends on IP_NF_IPTABLES
+	help
+	  This option adds a `tcpmss' match, which allows you to examine the
+	  MSS value of TCP SYN packets, which control the maximum packet size
+	  for that connection.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_MATCH_HELPER
+	tristate "Helper match support"
+	depends on IP_NF_CONNTRACK && IP_NF_IPTABLES
+	help
+	  Helper matching allows you to match packets in dynamic connections
+	  tracked by a conntrack-helper, ie. ip_conntrack_ftp
+
+	  To compile it as a module, choose M here.  If unsure, say Y.
+
+config IP_NF_MATCH_STATE
+	tristate "Connection state match support"
+	depends on IP_NF_CONNTRACK && IP_NF_IPTABLES
+	help
+	  Connection state matching allows you to match packets based on their
+	  relationship to a tracked connection (ie. previous packets).  This
+	  is a powerful tool for packet classification.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_MATCH_CONNTRACK
+	tristate "Connection tracking match support"
+	depends on IP_NF_CONNTRACK && IP_NF_IPTABLES
+	help
+	  This is a general conntrack match module, a superset of the state match.
+
+	  It allows matching on additional conntrack information, which is
+	  useful in complex configurations, such as NAT gateways with multiple
+	  internet links or tunnels.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_MATCH_OWNER
+	tristate "Owner match support"
+	depends on IP_NF_IPTABLES
+	help
+	  Packet owner matching allows you to match locally-generated packets
+	  based on who created them: the user, group, process or session.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_MATCH_PHYSDEV
+	tristate "Physdev match support"
+	depends on IP_NF_IPTABLES && BRIDGE_NETFILTER
+	help
+	  Physdev packet matching matches against the physical bridge ports
+	  the IP packet arrived on or will leave by.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_MATCH_ADDRTYPE
+	tristate  'address type match support'
+	depends on IP_NF_IPTABLES
+	help
+	  This option allows you to match what routing thinks of an address,
+	  eg. UNICAST, LOCAL, BROADCAST, ...
+	
+	  If you want to compile it as a module, say M here and read
+	  <file:Documentation/modules.txt>.  If unsure, say `N'.
+
+config IP_NF_MATCH_REALM
+	tristate  'realm match support'
+	depends on IP_NF_IPTABLES
+	select NET_CLS_ROUTE
+	help
+	  This option adds a `realm' match, which allows you to use the realm
+	  key from the routing subsystem inside iptables.
+	
+	  This match pretty much resembles the CONFIG_NET_CLS_ROUTE4 option 
+	  in tc world.
+	
+	  If you want to compile it as a module, say M here and read
+	  <file:Documentation/modules.txt>.  If unsure, say `N'.
+
+config IP_NF_MATCH_SCTP
+	tristate  'SCTP protocol match support'
+	depends on IP_NF_IPTABLES
+	help
+	  With this option enabled, you will be able to use the iptables
+	  `sctp' match in order to match on SCTP source/destination ports
+	  and SCTP chunk types.
+
+	  If you want to compile it as a module, say M here and read
+	  <file:Documentation/modules.txt>.  If unsure, say `N'.
+
+config IP_NF_MATCH_COMMENT
+	tristate  'comment match support'
+	depends on IP_NF_IPTABLES
+	help
+	  This option adds a `comment' dummy-match, which allows you to put
+	  comments in your iptables ruleset.
+
+	  If you want to compile it as a module, say M here and read
+	  <file:Documentation/modules.txt>.  If unsure, say `N'.
+
+config IP_NF_MATCH_CONNMARK
+	tristate  'Connection mark match support'
+	depends on IP_NF_CONNTRACK_MARK && IP_NF_IPTABLES
+	help
+	  This option adds a `connmark' match, which allows you to match the
+	  connection mark value previously set for the session by `CONNMARK'. 
+	
+	  If you want to compile it as a module, say M here and read
+	  <file:Documentation/modules.txt>.  The module will be called
+	  ipt_connmark.o.  If unsure, say `N'.
+
+config IP_NF_MATCH_HASHLIMIT
+	tristate  'hashlimit match support'
+	depends on IP_NF_IPTABLES
+	help
+	  This option adds a new iptables `hashlimit' match.  
+
+	  As opposed to `limit', this match dynamically crates a hash table
+	  of limit buckets, based on your selection of source/destination
+	  ip addresses and/or ports.
+
+	  It enables you to express policies like `10kpps for any given
+	  destination IP' or `500pps from any given source IP'  with a single
+	  IPtables rule.
+
+# `filter', generic and specific targets
+config IP_NF_FILTER
+	tristate "Packet filtering"
+	depends on IP_NF_IPTABLES
+	help
+	  Packet filtering defines a table `filter', which has a series of
+	  rules for simple packet filtering at local input, forwarding and
+	  local output.  See the man page for iptables(8).
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_TARGET_REJECT
+	tristate "REJECT target support"
+	depends on IP_NF_FILTER
+	help
+	  The REJECT target allows a filtering rule to specify that an ICMP
+	  error should be issued in response to an incoming packet, rather
+	  than silently being dropped.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_TARGET_LOG
+	tristate "LOG target support"
+	depends on IP_NF_IPTABLES
+	help
+	  This option adds a `LOG' target, which allows you to create rules in
+	  any iptables table which records the packet header to the syslog.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_TARGET_ULOG
+	tristate "ULOG target support"
+	depends on IP_NF_IPTABLES
+	---help---
+	  This option adds a `ULOG' target, which allows you to create rules in
+	  any iptables table. The packet is passed to a userspace logging
+	  daemon using netlink multicast sockets; unlike the LOG target
+	  which can only be viewed through syslog.
+
+	  The apropriate userspace logging daemon (ulogd) may be obtained from
+	  <http://www.gnumonks.org/projects/ulogd/>
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_TARGET_TCPMSS
+	tristate "TCPMSS target support"
+	depends on IP_NF_IPTABLES
+	---help---
+	  This option adds a `TCPMSS' target, which allows you to alter the
+	  MSS value of TCP SYN packets, to control the maximum size for that
+	  connection (usually limiting it to your outgoing interface's MTU
+	  minus 40).
+
+	  This is used to overcome criminally braindead ISPs or servers which
+	  block ICMP Fragmentation Needed packets.  The symptoms of this
+	  problem are that everything works fine from your Linux
+	  firewall/router, but machines behind it can never exchange large
+	  packets:
+	  	1) Web browsers connect, then hang with no data received.
+	  	2) Small mail works fine, but large emails hang.
+	  	3) ssh works fine, but scp hangs after initial handshaking.
+
+	  Workaround: activate this option and add a rule to your firewall
+	  configuration like:
+
+	  iptables -A FORWARD -p tcp --tcp-flags SYN,RST SYN \
+	  		 -j TCPMSS --clamp-mss-to-pmtu
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+# NAT + specific targets
+config IP_NF_NAT
+	tristate "Full NAT"
+	depends on IP_NF_IPTABLES && IP_NF_CONNTRACK
+	help
+	  The Full NAT option allows masquerading, port forwarding and other
+	  forms of full Network Address Port Translation.  It is controlled by
+	  the `nat' table in iptables: see the man page for iptables(8).
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_NAT_NEEDED
+	bool
+	depends on IP_NF_NAT != n
+	default y
+
+config IP_NF_TARGET_MASQUERADE
+	tristate "MASQUERADE target support"
+	depends on IP_NF_NAT
+	help
+	  Masquerading is a special case of NAT: all outgoing connections are
+	  changed to seem to come from a particular interface's address, and
+	  if the interface goes down, those connections are lost.  This is
+	  only useful for dialup accounts with dynamic IP address (ie. your IP
+	  address will be different on next dialup).
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_TARGET_REDIRECT
+	tristate "REDIRECT target support"
+	depends on IP_NF_NAT
+	help
+	  REDIRECT is a special case of NAT: all incoming connections are
+	  mapped onto the incoming interface's address, causing the packets to
+	  come to the local machine instead of passing through.  This is
+	  useful for transparent proxies.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_TARGET_NETMAP
+	tristate "NETMAP target support"
+	depends on IP_NF_NAT
+	help
+	  NETMAP is an implementation of static 1:1 NAT mapping of network
+	  addresses. It maps the network address part, while keeping the host
+	  address part intact. It is similar to Fast NAT, except that
+	  Netfilter's connection tracking doesn't work well with Fast NAT.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_TARGET_SAME
+	tristate "SAME target support"
+	depends on IP_NF_NAT
+	help
+	  This option adds a `SAME' target, which works like the standard SNAT
+	  target, but attempts to give clients the same IP for all connections.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_NAT_SNMP_BASIC
+	tristate "Basic SNMP-ALG support (EXPERIMENTAL)"
+	depends on EXPERIMENTAL && IP_NF_NAT
+	---help---
+
+	  This module implements an Application Layer Gateway (ALG) for
+	  SNMP payloads.  In conjunction with NAT, it allows a network
+	  management system to access multiple private networks with
+	  conflicting addresses.  It works by modifying IP addresses
+	  inside SNMP payloads to match IP-layer NAT mapping.
+
+	  This is the "basic" form of SNMP-ALG, as described in RFC 2962
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_NAT_IRC
+	tristate
+	depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
+	default IP_NF_NAT if IP_NF_IRC=y
+	default m if IP_NF_IRC=m
+
+# If they want FTP, set to $CONFIG_IP_NF_NAT (m or y), 
+# or $CONFIG_IP_NF_FTP (m or y), whichever is weaker.  Argh.
+config IP_NF_NAT_FTP
+	tristate
+	depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
+	default IP_NF_NAT if IP_NF_FTP=y
+	default m if IP_NF_FTP=m
+
+config IP_NF_NAT_TFTP
+	tristate
+	depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
+	default IP_NF_NAT if IP_NF_TFTP=y
+	default m if IP_NF_TFTP=m
+
+config IP_NF_NAT_AMANDA
+	tristate
+	depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
+	default IP_NF_NAT if IP_NF_AMANDA=y
+	default m if IP_NF_AMANDA=m
+
+# mangle + specific targets
+config IP_NF_MANGLE
+	tristate "Packet mangling"
+	depends on IP_NF_IPTABLES
+	help
+	  This option adds a `mangle' table to iptables: see the man page for
+	  iptables(8).  This table is used for various packet alterations
+	  which can effect how the packet is routed.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_TARGET_TOS
+	tristate "TOS target support"
+	depends on IP_NF_MANGLE
+	help
+	  This option adds a `TOS' target, which allows you to create rules in
+	  the `mangle' table which alter the Type Of Service field of an IP
+	  packet prior to routing.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_TARGET_ECN
+	tristate "ECN target support"
+	depends on IP_NF_MANGLE
+	---help---
+	  This option adds a `ECN' target, which can be used in the iptables mangle
+	  table.  
+
+	  You can use this target to remove the ECN bits from the IPv4 header of
+	  an IP packet.  This is particularly useful, if you need to work around
+	  existing ECN blackholes on the internet, but don't want to disable
+	  ECN support in general.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_TARGET_DSCP
+	tristate "DSCP target support"
+	depends on IP_NF_MANGLE
+	help
+	  This option adds a `DSCP' match, which allows you to match against
+	  the IPv4 header DSCP field (DSCP codepoint).
+
+	  The DSCP codepoint can have any value between 0x0 and 0x4f.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_TARGET_MARK
+	tristate "MARK target support"
+	depends on IP_NF_MANGLE
+	help
+	  This option adds a `MARK' target, which allows you to create rules
+	  in the `mangle' table which alter the netfilter mark (nfmark) field
+	  associated with the packet prior to routing. This can change
+	  the routing method (see `Use netfilter MARK value as routing
+	  key') and can also be used by other subsystems to change their
+	  behavior.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_TARGET_CLASSIFY
+	tristate "CLASSIFY target support"
+	depends on IP_NF_MANGLE
+	help
+	  This option adds a `CLASSIFY' target, which enables the user to set
+	  the priority of a packet. Some qdiscs can use this value for
+	  classification, among these are:
+
+  	  atm, cbq, dsmark, pfifo_fast, htb, prio
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_TARGET_CONNMARK
+	tristate  'CONNMARK target support'
+	depends on IP_NF_CONNTRACK_MARK && IP_NF_MANGLE
+	help
+	  This option adds a `CONNMARK' target, which allows one to manipulate
+	  the connection mark value.  Similar to the MARK target, but
+	  affects the connection mark value rather than the packet mark value.
+	
+	  If you want to compile it as a module, say M here and read
+	  <file:Documentation/modules.txt>.  The module will be called
+	  ipt_CONNMARK.o.  If unsure, say `N'.
+
+config IP_NF_TARGET_CLUSTERIP
+	tristate "CLUSTERIP target support (EXPERIMENTAL)"
+	depends on IP_NF_CONNTRACK_MARK && IP_NF_IPTABLES && EXPERIMENTAL
+	help
+	  The CLUSTERIP target allows you to build load-balancing clusters of
+	  network servers without having a dedicated load-balancing
+	  router/server/switch.
+	
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+# raw + specific targets
+config IP_NF_RAW
+	tristate  'raw table support (required for NOTRACK/TRACE)'
+	depends on IP_NF_IPTABLES
+	help
+	  This option adds a `raw' table to iptables. This table is the very
+	  first in the netfilter framework and hooks in at the PREROUTING
+	  and OUTPUT chains.
+	
+	  If you want to compile it as a module, say M here and read
+	  <file:Documentation/modules.txt>.  If unsure, say `N'.
+
+config IP_NF_TARGET_NOTRACK
+	tristate  'NOTRACK target support'
+	depends on IP_NF_RAW
+	depends on IP_NF_CONNTRACK
+	help
+	  The NOTRACK target allows a select rule to specify
+	  which packets *not* to enter the conntrack/NAT
+	  subsystem with all the consequences (no ICMP error tracking,
+	  no protocol helpers for the selected packets).
+	
+	  If you want to compile it as a module, say M here and read
+	  <file:Documentation/modules.txt>.  If unsure, say `N'.
+
+
+# ARP tables
+config IP_NF_ARPTABLES
+	tristate "ARP tables support"
+	help
+	  arptables is a general, extensible packet identification framework.
+	  The ARP packet filtering and mangling (manipulation)subsystems
+	  use this: say Y or M here if you want to use either of those.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_ARPFILTER
+	tristate "ARP packet filtering"
+	depends on IP_NF_ARPTABLES
+	help
+	  ARP packet filtering defines a table `filter', which has a series of
+	  rules for simple ARP packet filtering at local input and
+	  local output.  On a bridge, you can also specify filtering rules
+	  for forwarded ARP packets. See the man page for arptables(8).
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_NF_ARP_MANGLE
+	tristate "ARP payload mangling"
+	depends on IP_NF_ARPTABLES
+	help
+	  Allows altering the ARP packet payload: source and destination
+	  hardware and network addresses.
+
+endmenu
+
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
new file mode 100644
index 00000000000..45796d5924d
--- /dev/null
+++ b/net/ipv4/netfilter/Makefile
@@ -0,0 +1,89 @@
+#
+# Makefile for the netfilter modules on top of IPv4.
+#
+
+# objects for the standalone - connection tracking / NAT
+ip_conntrack-objs	:= ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o
+iptable_nat-objs	:= ip_nat_standalone.o ip_nat_rule.o ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o
+
+# connection tracking
+obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o
+
+# SCTP protocol connection tracking
+obj-$(CONFIG_IP_NF_CT_PROTO_SCTP) += ip_conntrack_proto_sctp.o
+
+# connection tracking helpers
+obj-$(CONFIG_IP_NF_AMANDA) += ip_conntrack_amanda.o
+obj-$(CONFIG_IP_NF_TFTP) += ip_conntrack_tftp.o
+obj-$(CONFIG_IP_NF_FTP) += ip_conntrack_ftp.o
+obj-$(CONFIG_IP_NF_IRC) += ip_conntrack_irc.o
+
+# NAT helpers 
+obj-$(CONFIG_IP_NF_NAT_AMANDA) += ip_nat_amanda.o
+obj-$(CONFIG_IP_NF_NAT_TFTP) += ip_nat_tftp.o
+obj-$(CONFIG_IP_NF_NAT_FTP) += ip_nat_ftp.o
+obj-$(CONFIG_IP_NF_NAT_IRC) += ip_nat_irc.o
+
+# generic IP tables 
+obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
+
+# the three instances of ip_tables
+obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
+obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o
+obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o
+obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
+
+# matches
+obj-$(CONFIG_IP_NF_MATCH_HELPER) += ipt_helper.o
+obj-$(CONFIG_IP_NF_MATCH_LIMIT) += ipt_limit.o
+obj-$(CONFIG_IP_NF_MATCH_HASHLIMIT) += ipt_hashlimit.o
+obj-$(CONFIG_IP_NF_MATCH_SCTP) += ipt_sctp.o
+obj-$(CONFIG_IP_NF_MATCH_MARK) += ipt_mark.o
+obj-$(CONFIG_IP_NF_MATCH_MAC) += ipt_mac.o
+obj-$(CONFIG_IP_NF_MATCH_IPRANGE) += ipt_iprange.o
+obj-$(CONFIG_IP_NF_MATCH_PKTTYPE) += ipt_pkttype.o
+obj-$(CONFIG_IP_NF_MATCH_MULTIPORT) += ipt_multiport.o
+obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o
+obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o
+obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o
+obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
+obj-$(CONFIG_IP_NF_MATCH_DSCP) += ipt_dscp.o
+obj-$(CONFIG_IP_NF_MATCH_AH_ESP) += ipt_ah.o ipt_esp.o
+obj-$(CONFIG_IP_NF_MATCH_LENGTH) += ipt_length.o
+obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o
+obj-$(CONFIG_IP_NF_MATCH_STATE) += ipt_state.o
+obj-$(CONFIG_IP_NF_MATCH_CONNMARK) += ipt_connmark.o
+obj-$(CONFIG_IP_NF_MATCH_CONNTRACK) += ipt_conntrack.o
+obj-$(CONFIG_IP_NF_MATCH_TCPMSS) += ipt_tcpmss.o
+obj-$(CONFIG_IP_NF_MATCH_REALM) += ipt_realm.o
+obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
+obj-$(CONFIG_IP_NF_MATCH_PHYSDEV) += ipt_physdev.o
+obj-$(CONFIG_IP_NF_MATCH_COMMENT) += ipt_comment.o
+
+# targets
+obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
+obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o
+obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
+obj-$(CONFIG_IP_NF_TARGET_DSCP) += ipt_DSCP.o
+obj-$(CONFIG_IP_NF_TARGET_MARK) += ipt_MARK.o
+obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
+obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
+obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o
+obj-$(CONFIG_IP_NF_TARGET_SAME) += ipt_SAME.o
+obj-$(CONFIG_IP_NF_TARGET_CLASSIFY) += ipt_CLASSIFY.o
+obj-$(CONFIG_IP_NF_NAT_SNMP_BASIC) += ip_nat_snmp_basic.o
+obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o
+obj-$(CONFIG_IP_NF_TARGET_CONNMARK) += ipt_CONNMARK.o
+obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
+obj-$(CONFIG_IP_NF_TARGET_TCPMSS) += ipt_TCPMSS.o
+obj-$(CONFIG_IP_NF_TARGET_NOTRACK) += ipt_NOTRACK.o
+obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
+
+# generic ARP tables
+obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o
+obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o
+
+# just filtering instance of ARP tables for now
+obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o
+
+obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
new file mode 100644
index 00000000000..df79f5ed6a0
--- /dev/null
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -0,0 +1,1333 @@
+/*
+ * Packet matching code for ARP packets.
+ *
+ * Based heavily, if not almost entirely, upon ip_tables.c framework.
+ *
+ * Some ARP specific bits are:
+ *
+ * Copyright (C) 2002 David S. Miller (davem@redhat.com)
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/kmod.h>
+#include <linux/vmalloc.h>
+#include <linux/proc_fs.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <asm/uaccess.h>
+#include <asm/semaphore.h>
+
+#include <linux/netfilter_arp/arp_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("David S. Miller <davem@redhat.com>");
+MODULE_DESCRIPTION("arptables core");
+
+/*#define DEBUG_ARP_TABLES*/
+/*#define DEBUG_ARP_TABLES_USER*/
+
+#ifdef DEBUG_ARP_TABLES
+#define dprintf(format, args...)  printk(format , ## args)
+#else
+#define dprintf(format, args...)
+#endif
+
+#ifdef DEBUG_ARP_TABLES_USER
+#define duprintf(format, args...) printk(format , ## args)
+#else
+#define duprintf(format, args...)
+#endif
+
+#ifdef CONFIG_NETFILTER_DEBUG
+#define ARP_NF_ASSERT(x)					\
+do {								\
+	if (!(x))						\
+		printk("ARP_NF_ASSERT: %s:%s:%u\n",		\
+		       __FUNCTION__, __FILE__, __LINE__);	\
+} while(0)
+#else
+#define ARP_NF_ASSERT(x)
+#endif
+#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
+
+static DECLARE_MUTEX(arpt_mutex);
+
+#define ASSERT_READ_LOCK(x) ARP_NF_ASSERT(down_trylock(&arpt_mutex) != 0)
+#define ASSERT_WRITE_LOCK(x) ARP_NF_ASSERT(down_trylock(&arpt_mutex) != 0)
+#include <linux/netfilter_ipv4/lockhelp.h>
+#include <linux/netfilter_ipv4/listhelp.h>
+
+struct arpt_table_info {
+	unsigned int size;
+	unsigned int number;
+	unsigned int initial_entries;
+	unsigned int hook_entry[NF_ARP_NUMHOOKS];
+	unsigned int underflow[NF_ARP_NUMHOOKS];
+	char entries[0] __attribute__((aligned(SMP_CACHE_BYTES)));
+};
+
+static LIST_HEAD(arpt_target);
+static LIST_HEAD(arpt_tables);
+#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
+
+#ifdef CONFIG_SMP
+#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
+#else
+#define TABLE_OFFSET(t,p) 0
+#endif
+
+static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
+				      char *hdr_addr, int len)
+{
+	int i, ret;
+
+	if (len > ARPT_DEV_ADDR_LEN_MAX)
+		len = ARPT_DEV_ADDR_LEN_MAX;
+
+	ret = 0;
+	for (i = 0; i < len; i++)
+		ret |= (hdr_addr[i] ^ ap->addr[i]) & ap->mask[i];
+
+	return (ret != 0);
+}
+
+/* Returns whether packet matches rule or not. */
+static inline int arp_packet_match(const struct arphdr *arphdr,
+				   struct net_device *dev,
+				   const char *indev,
+				   const char *outdev,
+				   const struct arpt_arp *arpinfo)
+{
+	char *arpptr = (char *)(arphdr + 1);
+	char *src_devaddr, *tgt_devaddr;
+	u32 src_ipaddr, tgt_ipaddr;
+	int i, ret;
+
+#define FWINV(bool,invflg) ((bool) ^ !!(arpinfo->invflags & invflg))
+
+	if (FWINV((arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop,
+		  ARPT_INV_ARPOP)) {
+		dprintf("ARP operation field mismatch.\n");
+		dprintf("ar_op: %04x info->arpop: %04x info->arpop_mask: %04x\n",
+			arphdr->ar_op, arpinfo->arpop, arpinfo->arpop_mask);
+		return 0;
+	}
+
+	if (FWINV((arphdr->ar_hrd & arpinfo->arhrd_mask) != arpinfo->arhrd,
+		  ARPT_INV_ARPHRD)) {
+		dprintf("ARP hardware address format mismatch.\n");
+		dprintf("ar_hrd: %04x info->arhrd: %04x info->arhrd_mask: %04x\n",
+			arphdr->ar_hrd, arpinfo->arhrd, arpinfo->arhrd_mask);
+		return 0;
+	}
+
+	if (FWINV((arphdr->ar_pro & arpinfo->arpro_mask) != arpinfo->arpro,
+		  ARPT_INV_ARPPRO)) {
+		dprintf("ARP protocol address format mismatch.\n");
+		dprintf("ar_pro: %04x info->arpro: %04x info->arpro_mask: %04x\n",
+			arphdr->ar_pro, arpinfo->arpro, arpinfo->arpro_mask);
+		return 0;
+	}
+
+	if (FWINV((arphdr->ar_hln & arpinfo->arhln_mask) != arpinfo->arhln,
+		  ARPT_INV_ARPHLN)) {
+		dprintf("ARP hardware address length mismatch.\n");
+		dprintf("ar_hln: %02x info->arhln: %02x info->arhln_mask: %02x\n",
+			arphdr->ar_hln, arpinfo->arhln, arpinfo->arhln_mask);
+		return 0;
+	}
+
+	src_devaddr = arpptr;
+	arpptr += dev->addr_len;
+	memcpy(&src_ipaddr, arpptr, sizeof(u32));
+	arpptr += sizeof(u32);
+	tgt_devaddr = arpptr;
+	arpptr += dev->addr_len;
+	memcpy(&tgt_ipaddr, arpptr, sizeof(u32));
+
+	if (FWINV(arp_devaddr_compare(&arpinfo->src_devaddr, src_devaddr, dev->addr_len),
+		  ARPT_INV_SRCDEVADDR) ||
+	    FWINV(arp_devaddr_compare(&arpinfo->tgt_devaddr, tgt_devaddr, dev->addr_len),
+		  ARPT_INV_TGTDEVADDR)) {
+		dprintf("Source or target device address mismatch.\n");
+
+		return 0;
+	}
+
+	if (FWINV((src_ipaddr & arpinfo->smsk.s_addr) != arpinfo->src.s_addr,
+		  ARPT_INV_SRCIP) ||
+	    FWINV(((tgt_ipaddr & arpinfo->tmsk.s_addr) != arpinfo->tgt.s_addr),
+		  ARPT_INV_TGTIP)) {
+		dprintf("Source or target IP address mismatch.\n");
+
+		dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
+			NIPQUAD(src_ipaddr),
+			NIPQUAD(arpinfo->smsk.s_addr),
+			NIPQUAD(arpinfo->src.s_addr),
+			arpinfo->invflags & ARPT_INV_SRCIP ? " (INV)" : "");
+		dprintf("TGT: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
+			NIPQUAD(tgt_ipaddr),
+			NIPQUAD(arpinfo->tmsk.s_addr),
+			NIPQUAD(arpinfo->tgt.s_addr),
+			arpinfo->invflags & ARPT_INV_TGTIP ? " (INV)" : "");
+		return 0;
+	}
+
+	/* Look for ifname matches.  */
+	for (i = 0, ret = 0; i < IFNAMSIZ; i++) {
+		ret |= (indev[i] ^ arpinfo->iniface[i])
+			& arpinfo->iniface_mask[i];
+	}
+
+	if (FWINV(ret != 0, ARPT_INV_VIA_IN)) {
+		dprintf("VIA in mismatch (%s vs %s).%s\n",
+			indev, arpinfo->iniface,
+			arpinfo->invflags&ARPT_INV_VIA_IN ?" (INV)":"");
+		return 0;
+	}
+
+	for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
+		unsigned long odev;
+		memcpy(&odev, outdev + i*sizeof(unsigned long),
+		       sizeof(unsigned long));
+		ret |= (odev
+			^ ((const unsigned long *)arpinfo->outiface)[i])
+			& ((const unsigned long *)arpinfo->outiface_mask)[i];
+	}
+
+	if (FWINV(ret != 0, ARPT_INV_VIA_OUT)) {
+		dprintf("VIA out mismatch (%s vs %s).%s\n",
+			outdev, arpinfo->outiface,
+			arpinfo->invflags&ARPT_INV_VIA_OUT ?" (INV)":"");
+		return 0;
+	}
+
+	return 1;
+}
+
+static inline int arp_checkentry(const struct arpt_arp *arp)
+{
+	if (arp->flags & ~ARPT_F_MASK) {
+		duprintf("Unknown flag bits set: %08X\n",
+			 arp->flags & ~ARPT_F_MASK);
+		return 0;
+	}
+	if (arp->invflags & ~ARPT_INV_MASK) {
+		duprintf("Unknown invflag bits set: %08X\n",
+			 arp->invflags & ~ARPT_INV_MASK);
+		return 0;
+	}
+
+	return 1;
+}
+
+static unsigned int arpt_error(struct sk_buff **pskb,
+			       unsigned int hooknum,
+			       const struct net_device *in,
+			       const struct net_device *out,
+			       const void *targinfo,
+			       void *userinfo)
+{
+	if (net_ratelimit())
+		printk("arp_tables: error: '%s'\n", (char *)targinfo);
+
+	return NF_DROP;
+}
+
+static inline struct arpt_entry *get_entry(void *base, unsigned int offset)
+{
+	return (struct arpt_entry *)(base + offset);
+}
+
+unsigned int arpt_do_table(struct sk_buff **pskb,
+			   unsigned int hook,
+			   const struct net_device *in,
+			   const struct net_device *out,
+			   struct arpt_table *table,
+			   void *userdata)
+{
+	static const char nulldevname[IFNAMSIZ];
+	unsigned int verdict = NF_DROP;
+	struct arphdr *arp;
+	int hotdrop = 0;
+	struct arpt_entry *e, *back;
+	const char *indev, *outdev;
+	void *table_base;
+
+	/* ARP header, plus 2 device addresses, plus 2 IP addresses.  */
+	if (!pskb_may_pull((*pskb), (sizeof(struct arphdr) +
+				     (2 * (*pskb)->dev->addr_len) +
+				     (2 * sizeof(u32)))))
+		return NF_DROP;
+
+	indev = in ? in->name : nulldevname;
+	outdev = out ? out->name : nulldevname;
+
+	read_lock_bh(&table->lock);
+	table_base = (void *)table->private->entries
+		+ TABLE_OFFSET(table->private,
+			       smp_processor_id());
+	e = get_entry(table_base, table->private->hook_entry[hook]);
+	back = get_entry(table_base, table->private->underflow[hook]);
+
+	arp = (*pskb)->nh.arph;
+	do {
+		if (arp_packet_match(arp, (*pskb)->dev, indev, outdev, &e->arp)) {
+			struct arpt_entry_target *t;
+			int hdr_len;
+
+			hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
+				(2 * (*pskb)->dev->addr_len);
+			ADD_COUNTER(e->counters, hdr_len, 1);
+
+			t = arpt_get_target(e);
+
+			/* Standard target? */
+			if (!t->u.kernel.target->target) {
+				int v;
+
+				v = ((struct arpt_standard_target *)t)->verdict;
+				if (v < 0) {
+					/* Pop from stack? */
+					if (v != ARPT_RETURN) {
+						verdict = (unsigned)(-v) - 1;
+						break;
+					}
+					e = back;
+					back = get_entry(table_base,
+							 back->comefrom);
+					continue;
+				}
+				if (table_base + v
+				    != (void *)e + e->next_offset) {
+					/* Save old back ptr in next entry */
+					struct arpt_entry *next
+						= (void *)e + e->next_offset;
+					next->comefrom =
+						(void *)back - table_base;
+
+					/* set back pointer to next entry */
+					back = next;
+				}
+
+				e = get_entry(table_base, v);
+			} else {
+				/* Targets which reenter must return
+				 * abs. verdicts
+				 */
+				verdict = t->u.kernel.target->target(pskb,
+								     hook,
+								     in, out,
+								     t->data,
+								     userdata);
+
+				/* Target might have changed stuff. */
+				arp = (*pskb)->nh.arph;
+
+				if (verdict == ARPT_CONTINUE)
+					e = (void *)e + e->next_offset;
+				else
+					/* Verdict */
+					break;
+			}
+		} else {
+			e = (void *)e + e->next_offset;
+		}
+	} while (!hotdrop);
+	read_unlock_bh(&table->lock);
+
+	if (hotdrop)
+		return NF_DROP;
+	else
+		return verdict;
+}
+
+static inline void *find_inlist_lock_noload(struct list_head *head,
+					    const char *name,
+					    int *error,
+					    struct semaphore *mutex)
+{
+	void *ret;
+
+	*error = down_interruptible(mutex);
+	if (*error != 0)
+		return NULL;
+
+	ret = list_named_find(head, name);
+	if (!ret) {
+		*error = -ENOENT;
+		up(mutex);
+	}
+	return ret;
+}
+
+#ifndef CONFIG_KMOD
+#define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m))
+#else
+static void *
+find_inlist_lock(struct list_head *head,
+		 const char *name,
+		 const char *prefix,
+		 int *error,
+		 struct semaphore *mutex)
+{
+	void *ret;
+
+	ret = find_inlist_lock_noload(head, name, error, mutex);
+	if (!ret) {
+		duprintf("find_inlist: loading `%s%s'.\n", prefix, name);
+		request_module("%s%s", prefix, name);
+		ret = find_inlist_lock_noload(head, name, error, mutex);
+	}
+
+	return ret;
+}
+#endif
+
+static inline struct arpt_table *arpt_find_table_lock(const char *name, int *error, struct semaphore *mutex)
+{
+	return find_inlist_lock(&arpt_tables, name, "arptable_", error, mutex);
+}
+
+static struct arpt_target *arpt_find_target_lock(const char *name, int *error, struct semaphore *mutex)
+{
+	return find_inlist_lock(&arpt_target, name, "arpt_", error, mutex);
+}
+
+/* All zeroes == unconditional rule. */
+static inline int unconditional(const struct arpt_arp *arp)
+{
+	unsigned int i;
+
+	for (i = 0; i < sizeof(*arp)/sizeof(__u32); i++)
+		if (((__u32 *)arp)[i])
+			return 0;
+
+	return 1;
+}
+
+/* Figures out from what hook each rule can be called: returns 0 if
+ * there are loops.  Puts hook bitmask in comefrom.
+ */
+static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int valid_hooks)
+{
+	unsigned int hook;
+
+	/* No recursion; use packet counter to save back ptrs (reset
+	 * to 0 as we leave), and comefrom to save source hook bitmask.
+	 */
+	for (hook = 0; hook < NF_ARP_NUMHOOKS; hook++) {
+		unsigned int pos = newinfo->hook_entry[hook];
+		struct arpt_entry *e
+			= (struct arpt_entry *)(newinfo->entries + pos);
+
+		if (!(valid_hooks & (1 << hook)))
+			continue;
+
+		/* Set initial back pointer. */
+		e->counters.pcnt = pos;
+
+		for (;;) {
+			struct arpt_standard_target *t
+				= (void *)arpt_get_target(e);
+
+			if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) {
+				printk("arptables: loop hook %u pos %u %08X.\n",
+				       hook, pos, e->comefrom);
+				return 0;
+			}
+			e->comefrom
+				|= ((1 << hook) | (1 << NF_ARP_NUMHOOKS));
+
+			/* Unconditional return/END. */
+			if (e->target_offset == sizeof(struct arpt_entry)
+			    && (strcmp(t->target.u.user.name,
+				       ARPT_STANDARD_TARGET) == 0)
+			    && t->verdict < 0
+			    && unconditional(&e->arp)) {
+				unsigned int oldpos, size;
+
+				/* Return: backtrack through the last
+				 * big jump.
+				 */
+				do {
+					e->comefrom ^= (1<<NF_ARP_NUMHOOKS);
+					oldpos = pos;
+					pos = e->counters.pcnt;
+					e->counters.pcnt = 0;
+
+					/* We're at the start. */
+					if (pos == oldpos)
+						goto next;
+
+					e = (struct arpt_entry *)
+						(newinfo->entries + pos);
+				} while (oldpos == pos + e->next_offset);
+
+				/* Move along one */
+				size = e->next_offset;
+				e = (struct arpt_entry *)
+					(newinfo->entries + pos + size);
+				e->counters.pcnt = pos;
+				pos += size;
+			} else {
+				int newpos = t->verdict;
+
+				if (strcmp(t->target.u.user.name,
+					   ARPT_STANDARD_TARGET) == 0
+				    && newpos >= 0) {
+					/* This a jump; chase it. */
+					duprintf("Jump rule %u -> %u\n",
+						 pos, newpos);
+				} else {
+					/* ... this is a fallthru */
+					newpos = pos + e->next_offset;
+				}
+				e = (struct arpt_entry *)
+					(newinfo->entries + newpos);
+				e->counters.pcnt = pos;
+				pos = newpos;
+			}
+		}
+		next:
+		duprintf("Finished chain %u\n", hook);
+	}
+	return 1;
+}
+
+static inline int standard_check(const struct arpt_entry_target *t,
+				 unsigned int max_offset)
+{
+	struct arpt_standard_target *targ = (void *)t;
+
+	/* Check standard info. */
+	if (t->u.target_size
+	    != ARPT_ALIGN(sizeof(struct arpt_standard_target))) {
+		duprintf("arpt_standard_check: target size %u != %Zu\n",
+			 t->u.target_size,
+			 ARPT_ALIGN(sizeof(struct arpt_standard_target)));
+		return 0;
+	}
+
+	if (targ->verdict >= 0
+	    && targ->verdict > max_offset - sizeof(struct arpt_entry)) {
+		duprintf("arpt_standard_check: bad verdict (%i)\n",
+			 targ->verdict);
+		return 0;
+	}
+
+	if (targ->verdict < -NF_MAX_VERDICT - 1) {
+		duprintf("arpt_standard_check: bad negative verdict (%i)\n",
+			 targ->verdict);
+		return 0;
+	}
+	return 1;
+}
+
+static struct arpt_target arpt_standard_target;
+
+static inline int check_entry(struct arpt_entry *e, const char *name, unsigned int size,
+			      unsigned int *i)
+{
+	struct arpt_entry_target *t;
+	struct arpt_target *target;
+	int ret;
+
+	if (!arp_checkentry(&e->arp)) {
+		duprintf("arp_tables: arp check failed %p %s.\n", e, name);
+		return -EINVAL;
+	}
+
+	t = arpt_get_target(e);
+	target = arpt_find_target_lock(t->u.user.name, &ret, &arpt_mutex);
+	if (!target) {
+		duprintf("check_entry: `%s' not found\n", t->u.user.name);
+		goto out;
+	}
+	if (!try_module_get((target->me))) {
+		ret = -ENOENT;
+		goto out_unlock;
+	}
+	t->u.kernel.target = target;
+	up(&arpt_mutex);
+
+	if (t->u.kernel.target == &arpt_standard_target) {
+		if (!standard_check(t, size)) {
+			ret = -EINVAL;
+			goto out;
+		}
+	} else if (t->u.kernel.target->checkentry
+		   && !t->u.kernel.target->checkentry(name, e, t->data,
+						      t->u.target_size
+						      - sizeof(*t),
+						      e->comefrom)) {
+		module_put(t->u.kernel.target->me);
+		duprintf("arp_tables: check failed for `%s'.\n",
+			 t->u.kernel.target->name);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	(*i)++;
+	return 0;
+
+out_unlock:
+	up(&arpt_mutex);
+out:
+	return ret;
+}
+
+static inline int check_entry_size_and_hooks(struct arpt_entry *e,
+					     struct arpt_table_info *newinfo,
+					     unsigned char *base,
+					     unsigned char *limit,
+					     const unsigned int *hook_entries,
+					     const unsigned int *underflows,
+					     unsigned int *i)
+{
+	unsigned int h;
+
+	if ((unsigned long)e % __alignof__(struct arpt_entry) != 0
+	    || (unsigned char *)e + sizeof(struct arpt_entry) >= limit) {
+		duprintf("Bad offset %p\n", e);
+		return -EINVAL;
+	}
+
+	if (e->next_offset
+	    < sizeof(struct arpt_entry) + sizeof(struct arpt_entry_target)) {
+		duprintf("checking: element %p size %u\n",
+			 e, e->next_offset);
+		return -EINVAL;
+	}
+
+	/* Check hooks & underflows */
+	for (h = 0; h < NF_ARP_NUMHOOKS; h++) {
+		if ((unsigned char *)e - base == hook_entries[h])
+			newinfo->hook_entry[h] = hook_entries[h];
+		if ((unsigned char *)e - base == underflows[h])
+			newinfo->underflow[h] = underflows[h];
+	}
+
+	/* FIXME: underflows must be unconditional, standard verdicts
+           < 0 (not ARPT_RETURN). --RR */
+
+	/* Clear counters and comefrom */
+	e->counters = ((struct arpt_counters) { 0, 0 });
+	e->comefrom = 0;
+
+	(*i)++;
+	return 0;
+}
+
+static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i)
+{
+	struct arpt_entry_target *t;
+
+	if (i && (*i)-- == 0)
+		return 1;
+
+	t = arpt_get_target(e);
+	if (t->u.kernel.target->destroy)
+		t->u.kernel.target->destroy(t->data,
+					    t->u.target_size - sizeof(*t));
+	module_put(t->u.kernel.target->me);
+	return 0;
+}
+
+/* Checks and translates the user-supplied table segment (held in
+ * newinfo).
+ */
+static int translate_table(const char *name,
+			   unsigned int valid_hooks,
+			   struct arpt_table_info *newinfo,
+			   unsigned int size,
+			   unsigned int number,
+			   const unsigned int *hook_entries,
+			   const unsigned int *underflows)
+{
+	unsigned int i;
+	int ret;
+
+	newinfo->size = size;
+	newinfo->number = number;
+
+	/* Init all hooks to impossible value. */
+	for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
+		newinfo->hook_entry[i] = 0xFFFFFFFF;
+		newinfo->underflow[i] = 0xFFFFFFFF;
+	}
+
+	duprintf("translate_table: size %u\n", newinfo->size);
+	i = 0;
+
+	/* Walk through entries, checking offsets. */
+	ret = ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+				 check_entry_size_and_hooks,
+				 newinfo,
+				 newinfo->entries,
+				 newinfo->entries + size,
+				 hook_entries, underflows, &i);
+	duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
+	if (ret != 0)
+		return ret;
+
+	if (i != number) {
+		duprintf("translate_table: %u not %u entries\n",
+			 i, number);
+		return -EINVAL;
+	}
+
+	/* Check hooks all assigned */
+	for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
+		/* Only hooks which are valid */
+		if (!(valid_hooks & (1 << i)))
+			continue;
+		if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
+			duprintf("Invalid hook entry %u %u\n",
+				 i, hook_entries[i]);
+			return -EINVAL;
+		}
+		if (newinfo->underflow[i] == 0xFFFFFFFF) {
+			duprintf("Invalid underflow %u %u\n",
+				 i, underflows[i]);
+			return -EINVAL;
+		}
+	}
+
+	if (!mark_source_chains(newinfo, valid_hooks)) {
+		duprintf("Looping hook\n");
+		return -ELOOP;
+	}
+
+	/* Finally, each sanity check must pass */
+	i = 0;
+	ret = ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+				 check_entry, name, size, &i);
+
+	if (ret != 0) {
+		ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+				   cleanup_entry, &i);
+		return ret;
+	}
+
+	/* And one copy for every other CPU */
+	for (i = 1; i < num_possible_cpus(); i++) {
+		memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
+		       newinfo->entries,
+		       SMP_ALIGN(newinfo->size));
+	}
+
+	return ret;
+}
+
+static struct arpt_table_info *replace_table(struct arpt_table *table,
+					     unsigned int num_counters,
+					     struct arpt_table_info *newinfo,
+					     int *error)
+{
+	struct arpt_table_info *oldinfo;
+
+	/* Do the substitution. */
+	write_lock_bh(&table->lock);
+	/* Check inside lock: is the old number correct? */
+	if (num_counters != table->private->number) {
+		duprintf("num_counters != table->private->number (%u/%u)\n",
+			 num_counters, table->private->number);
+		write_unlock_bh(&table->lock);
+		*error = -EAGAIN;
+		return NULL;
+	}
+	oldinfo = table->private;
+	table->private = newinfo;
+	newinfo->initial_entries = oldinfo->initial_entries;
+	write_unlock_bh(&table->lock);
+
+	return oldinfo;
+}
+
+/* Gets counters. */
+static inline int add_entry_to_counter(const struct arpt_entry *e,
+				       struct arpt_counters total[],
+				       unsigned int *i)
+{
+	ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
+
+	(*i)++;
+	return 0;
+}
+
+static void get_counters(const struct arpt_table_info *t,
+			 struct arpt_counters counters[])
+{
+	unsigned int cpu;
+	unsigned int i;
+
+	for (cpu = 0; cpu < num_possible_cpus(); cpu++) {
+		i = 0;
+		ARPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
+				   t->size,
+				   add_entry_to_counter,
+				   counters,
+				   &i);
+	}
+}
+
+static int copy_entries_to_user(unsigned int total_size,
+				struct arpt_table *table,
+				void __user *userptr)
+{
+	unsigned int off, num, countersize;
+	struct arpt_entry *e;
+	struct arpt_counters *counters;
+	int ret = 0;
+
+	/* We need atomic snapshot of counters: rest doesn't change
+	 * (other than comefrom, which userspace doesn't care
+	 * about).
+	 */
+	countersize = sizeof(struct arpt_counters) * table->private->number;
+	counters = vmalloc(countersize);
+
+	if (counters == NULL)
+		return -ENOMEM;
+
+	/* First, sum counters... */
+	memset(counters, 0, countersize);
+	write_lock_bh(&table->lock);
+	get_counters(table->private, counters);
+	write_unlock_bh(&table->lock);
+
+	/* ... then copy entire thing from CPU 0... */
+	if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
+		ret = -EFAULT;
+		goto free_counters;
+	}
+
+	/* FIXME: use iterator macros --RR */
+	/* ... then go back and fix counters and names */
+	for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
+		struct arpt_entry_target *t;
+
+		e = (struct arpt_entry *)(table->private->entries + off);
+		if (copy_to_user(userptr + off
+				 + offsetof(struct arpt_entry, counters),
+				 &counters[num],
+				 sizeof(counters[num])) != 0) {
+			ret = -EFAULT;
+			goto free_counters;
+		}
+
+		t = arpt_get_target(e);
+		if (copy_to_user(userptr + off + e->target_offset
+				 + offsetof(struct arpt_entry_target,
+					    u.user.name),
+				 t->u.kernel.target->name,
+				 strlen(t->u.kernel.target->name)+1) != 0) {
+			ret = -EFAULT;
+			goto free_counters;
+		}
+	}
+
+ free_counters:
+	vfree(counters);
+	return ret;
+}
+
+static int get_entries(const struct arpt_get_entries *entries,
+		       struct arpt_get_entries __user *uptr)
+{
+	int ret;
+	struct arpt_table *t;
+
+	t = arpt_find_table_lock(entries->name, &ret, &arpt_mutex);
+	if (t) {
+		duprintf("t->private->number = %u\n",
+			 t->private->number);
+		if (entries->size == t->private->size)
+			ret = copy_entries_to_user(t->private->size,
+						   t, uptr->entrytable);
+		else {
+			duprintf("get_entries: I've got %u not %u!\n",
+				 t->private->size,
+				 entries->size);
+			ret = -EINVAL;
+		}
+		up(&arpt_mutex);
+	} else
+		duprintf("get_entries: Can't find %s!\n",
+			 entries->name);
+
+	return ret;
+}
+
+static int do_replace(void __user *user, unsigned int len)
+{
+	int ret;
+	struct arpt_replace tmp;
+	struct arpt_table *t;
+	struct arpt_table_info *newinfo, *oldinfo;
+	struct arpt_counters *counters;
+
+	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+		return -EFAULT;
+
+	/* Hack: Causes ipchains to give correct error msg --RR */
+	if (len != sizeof(tmp) + tmp.size)
+		return -ENOPROTOOPT;
+
+	/* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
+	if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
+		return -ENOMEM;
+
+	newinfo = vmalloc(sizeof(struct arpt_table_info)
+			  + SMP_ALIGN(tmp.size) * num_possible_cpus());
+	if (!newinfo)
+		return -ENOMEM;
+
+	if (copy_from_user(newinfo->entries, user + sizeof(tmp),
+			   tmp.size) != 0) {
+		ret = -EFAULT;
+		goto free_newinfo;
+	}
+
+	counters = vmalloc(tmp.num_counters * sizeof(struct arpt_counters));
+	if (!counters) {
+		ret = -ENOMEM;
+		goto free_newinfo;
+	}
+	memset(counters, 0, tmp.num_counters * sizeof(struct arpt_counters));
+
+	ret = translate_table(tmp.name, tmp.valid_hooks,
+			      newinfo, tmp.size, tmp.num_entries,
+			      tmp.hook_entry, tmp.underflow);
+	if (ret != 0)
+		goto free_newinfo_counters;
+
+	duprintf("arp_tables: Translated table\n");
+
+	t = arpt_find_table_lock(tmp.name, &ret, &arpt_mutex);
+	if (!t)
+		goto free_newinfo_counters_untrans;
+
+	/* You lied! */
+	if (tmp.valid_hooks != t->valid_hooks) {
+		duprintf("Valid hook crap: %08X vs %08X\n",
+			 tmp.valid_hooks, t->valid_hooks);
+		ret = -EINVAL;
+		goto free_newinfo_counters_untrans_unlock;
+	}
+
+	/* Get a reference in advance, we're not allowed fail later */
+	if (!try_module_get(t->me)) {
+		ret = -EBUSY;
+		goto free_newinfo_counters_untrans_unlock;
+	}
+
+	oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
+	if (!oldinfo)
+		goto put_module;
+
+	/* Update module usage count based on number of rules */
+	duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
+		oldinfo->number, oldinfo->initial_entries, newinfo->number);
+	if ((oldinfo->number > oldinfo->initial_entries) || 
+	    (newinfo->number <= oldinfo->initial_entries)) 
+		module_put(t->me);
+	if ((oldinfo->number > oldinfo->initial_entries) &&
+	    (newinfo->number <= oldinfo->initial_entries))
+		module_put(t->me);
+
+	/* Get the old counters. */
+	get_counters(oldinfo, counters);
+	/* Decrease module usage counts and free resource */
+	ARPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
+	vfree(oldinfo);
+	if (copy_to_user(tmp.counters, counters,
+			 sizeof(struct arpt_counters) * tmp.num_counters) != 0)
+		ret = -EFAULT;
+	vfree(counters);
+	up(&arpt_mutex);
+	return ret;
+
+ put_module:
+	module_put(t->me);
+ free_newinfo_counters_untrans_unlock:
+	up(&arpt_mutex);
+ free_newinfo_counters_untrans:
+	ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry, NULL);
+ free_newinfo_counters:
+	vfree(counters);
+ free_newinfo:
+	vfree(newinfo);
+	return ret;
+}
+
+/* We're lazy, and add to the first CPU; overflow works its fey magic
+ * and everything is OK.
+ */
+static inline int add_counter_to_entry(struct arpt_entry *e,
+				       const struct arpt_counters addme[],
+				       unsigned int *i)
+{
+
+	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+
+	(*i)++;
+	return 0;
+}
+
+static int do_add_counters(void __user *user, unsigned int len)
+{
+	unsigned int i;
+	struct arpt_counters_info tmp, *paddc;
+	struct arpt_table *t;
+	int ret;
+
+	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+		return -EFAULT;
+
+	if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct arpt_counters))
+		return -EINVAL;
+
+	paddc = vmalloc(len);
+	if (!paddc)
+		return -ENOMEM;
+
+	if (copy_from_user(paddc, user, len) != 0) {
+		ret = -EFAULT;
+		goto free;
+	}
+
+	t = arpt_find_table_lock(tmp.name, &ret, &arpt_mutex);
+	if (!t)
+		goto free;
+
+	write_lock_bh(&t->lock);
+	if (t->private->number != paddc->num_counters) {
+		ret = -EINVAL;
+		goto unlock_up_free;
+	}
+
+	i = 0;
+	ARPT_ENTRY_ITERATE(t->private->entries,
+			   t->private->size,
+			   add_counter_to_entry,
+			   paddc->counters,
+			   &i);
+ unlock_up_free:
+	write_unlock_bh(&t->lock);
+	up(&arpt_mutex);
+ free:
+	vfree(paddc);
+
+	return ret;
+}
+
+static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
+{
+	int ret;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	switch (cmd) {
+	case ARPT_SO_SET_REPLACE:
+		ret = do_replace(user, len);
+		break;
+
+	case ARPT_SO_SET_ADD_COUNTERS:
+		ret = do_add_counters(user, len);
+		break;
+
+	default:
+		duprintf("do_arpt_set_ctl:  unknown request %i\n", cmd);
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+{
+	int ret;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	switch (cmd) {
+	case ARPT_SO_GET_INFO: {
+		char name[ARPT_TABLE_MAXNAMELEN];
+		struct arpt_table *t;
+
+		if (*len != sizeof(struct arpt_getinfo)) {
+			duprintf("length %u != %Zu\n", *len,
+				 sizeof(struct arpt_getinfo));
+			ret = -EINVAL;
+			break;
+		}
+
+		if (copy_from_user(name, user, sizeof(name)) != 0) {
+			ret = -EFAULT;
+			break;
+		}
+		name[ARPT_TABLE_MAXNAMELEN-1] = '\0';
+		t = arpt_find_table_lock(name, &ret, &arpt_mutex);
+		if (t) {
+			struct arpt_getinfo info;
+
+			info.valid_hooks = t->valid_hooks;
+			memcpy(info.hook_entry, t->private->hook_entry,
+			       sizeof(info.hook_entry));
+			memcpy(info.underflow, t->private->underflow,
+			       sizeof(info.underflow));
+			info.num_entries = t->private->number;
+			info.size = t->private->size;
+			strcpy(info.name, name);
+
+			if (copy_to_user(user, &info, *len) != 0)
+				ret = -EFAULT;
+			else
+				ret = 0;
+
+			up(&arpt_mutex);
+		}
+	}
+	break;
+
+	case ARPT_SO_GET_ENTRIES: {
+		struct arpt_get_entries get;
+
+		if (*len < sizeof(get)) {
+			duprintf("get_entries: %u < %Zu\n", *len, sizeof(get));
+			ret = -EINVAL;
+		} else if (copy_from_user(&get, user, sizeof(get)) != 0) {
+			ret = -EFAULT;
+		} else if (*len != sizeof(struct arpt_get_entries) + get.size) {
+			duprintf("get_entries: %u != %Zu\n", *len,
+				 sizeof(struct arpt_get_entries) + get.size);
+			ret = -EINVAL;
+		} else
+			ret = get_entries(&get, user);
+		break;
+	}
+
+	default:
+		duprintf("do_arpt_get_ctl: unknown request %i\n", cmd);
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+/* Registration hooks for targets. */
+int arpt_register_target(struct arpt_target *target)
+{
+	int ret;
+
+	ret = down_interruptible(&arpt_mutex);
+	if (ret != 0)
+		return ret;
+
+	if (!list_named_insert(&arpt_target, target)) {
+		duprintf("arpt_register_target: `%s' already in list!\n",
+			 target->name);
+		ret = -EINVAL;
+	}
+	up(&arpt_mutex);
+	return ret;
+}
+
+void arpt_unregister_target(struct arpt_target *target)
+{
+	down(&arpt_mutex);
+	LIST_DELETE(&arpt_target, target);
+	up(&arpt_mutex);
+}
+
+int arpt_register_table(struct arpt_table *table,
+			const struct arpt_replace *repl)
+{
+	int ret;
+	struct arpt_table_info *newinfo;
+	static struct arpt_table_info bootstrap
+		= { 0, 0, 0, { 0 }, { 0 }, { } };
+
+	newinfo = vmalloc(sizeof(struct arpt_table_info)
+			  + SMP_ALIGN(repl->size) * num_possible_cpus());
+	if (!newinfo) {
+		ret = -ENOMEM;
+		return ret;
+	}
+	memcpy(newinfo->entries, repl->entries, repl->size);
+
+	ret = translate_table(table->name, table->valid_hooks,
+			      newinfo, repl->size,
+			      repl->num_entries,
+			      repl->hook_entry,
+			      repl->underflow);
+	duprintf("arpt_register_table: translate table gives %d\n", ret);
+	if (ret != 0) {
+		vfree(newinfo);
+		return ret;
+	}
+
+	ret = down_interruptible(&arpt_mutex);
+	if (ret != 0) {
+		vfree(newinfo);
+		return ret;
+	}
+
+	/* Don't autoload: we'd eat our tail... */
+	if (list_named_find(&arpt_tables, table->name)) {
+		ret = -EEXIST;
+		goto free_unlock;
+	}
+
+	/* Simplifies replace_table code. */
+	table->private = &bootstrap;
+	if (!replace_table(table, 0, newinfo, &ret))
+		goto free_unlock;
+
+	duprintf("table->private->number = %u\n",
+		 table->private->number);
+	
+	/* save number of initial entries */
+	table->private->initial_entries = table->private->number;
+
+	rwlock_init(&table->lock);
+	list_prepend(&arpt_tables, table);
+
+ unlock:
+	up(&arpt_mutex);
+	return ret;
+
+ free_unlock:
+	vfree(newinfo);
+	goto unlock;
+}
+
+void arpt_unregister_table(struct arpt_table *table)
+{
+	down(&arpt_mutex);
+	LIST_DELETE(&arpt_tables, table);
+	up(&arpt_mutex);
+
+	/* Decrease module usage counts and free resources */
+	ARPT_ENTRY_ITERATE(table->private->entries, table->private->size,
+			   cleanup_entry, NULL);
+	vfree(table->private);
+}
+
+/* The built-in targets: standard (NULL) and error. */
+static struct arpt_target arpt_standard_target = {
+	.name		= ARPT_STANDARD_TARGET,
+};
+
+static struct arpt_target arpt_error_target = {
+	.name		= ARPT_ERROR_TARGET,
+	.target		= arpt_error,
+};
+
+static struct nf_sockopt_ops arpt_sockopts = {
+	.pf		= PF_INET,
+	.set_optmin	= ARPT_BASE_CTL,
+	.set_optmax	= ARPT_SO_SET_MAX+1,
+	.set		= do_arpt_set_ctl,
+	.get_optmin	= ARPT_BASE_CTL,
+	.get_optmax	= ARPT_SO_GET_MAX+1,
+	.get		= do_arpt_get_ctl,
+};
+
+#ifdef CONFIG_PROC_FS
+static inline int print_name(const struct arpt_table *t,
+			     off_t start_offset, char *buffer, int length,
+			     off_t *pos, unsigned int *count)
+{
+	if ((*count)++ >= start_offset) {
+		unsigned int namelen;
+
+		namelen = sprintf(buffer + *pos, "%s\n", t->name);
+		if (*pos + namelen > length) {
+			/* Stop iterating */
+			return 1;
+		}
+		*pos += namelen;
+	}
+	return 0;
+}
+
+static int arpt_get_tables(char *buffer, char **start, off_t offset, int length)
+{
+	off_t pos = 0;
+	unsigned int count = 0;
+
+	if (down_interruptible(&arpt_mutex) != 0)
+		return 0;
+
+	LIST_FIND(&arpt_tables, print_name, struct arpt_table *,
+		  offset, buffer, length, &pos, &count);
+
+	up(&arpt_mutex);
+
+	/* `start' hack - see fs/proc/generic.c line ~105 */
+	*start=(char *)((unsigned long)count-offset);
+	return pos;
+}
+#endif /*CONFIG_PROC_FS*/
+
+static int __init init(void)
+{
+	int ret;
+
+	/* Noone else will be downing sem now, so we won't sleep */
+	down(&arpt_mutex);
+	list_append(&arpt_target, &arpt_standard_target);
+	list_append(&arpt_target, &arpt_error_target);
+	up(&arpt_mutex);
+
+	/* Register setsockopt */
+	ret = nf_register_sockopt(&arpt_sockopts);
+	if (ret < 0) {
+		duprintf("Unable to register sockopts.\n");
+		return ret;
+	}
+
+#ifdef CONFIG_PROC_FS
+	{
+		struct proc_dir_entry *proc;
+
+		proc = proc_net_create("arp_tables_names", 0, arpt_get_tables);
+		if (!proc) {
+			nf_unregister_sockopt(&arpt_sockopts);
+			return -ENOMEM;
+		}
+		proc->owner = THIS_MODULE;
+	}
+#endif
+
+	printk("arp_tables: (C) 2002 David S. Miller\n");
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	nf_unregister_sockopt(&arpt_sockopts);
+#ifdef CONFIG_PROC_FS
+	proc_net_remove("arp_tables_names");
+#endif
+}
+
+EXPORT_SYMBOL(arpt_register_table);
+EXPORT_SYMBOL(arpt_unregister_table);
+EXPORT_SYMBOL(arpt_do_table);
+EXPORT_SYMBOL(arpt_register_target);
+EXPORT_SYMBOL(arpt_unregister_target);
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
new file mode 100644
index 00000000000..3e592ec8648
--- /dev/null
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -0,0 +1,104 @@
+/* module that allows mangling of the arp payload */
+#include <linux/module.h>
+#include <linux/netfilter_arp/arpt_mangle.h>
+#include <net/sock.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
+MODULE_DESCRIPTION("arptables arp payload mangle target");
+
+static unsigned int
+target(struct sk_buff **pskb, unsigned int hooknum, const struct net_device *in,
+   const struct net_device *out, const void *targinfo, void *userinfo)
+{
+	const struct arpt_mangle *mangle = targinfo;
+	struct arphdr *arp;
+	unsigned char *arpptr;
+	int pln, hln;
+
+	if (skb_shared(*pskb) || skb_cloned(*pskb)) {
+		struct sk_buff *nskb;
+
+		nskb = skb_copy(*pskb, GFP_ATOMIC);
+		if (!nskb)
+			return NF_DROP;
+		if ((*pskb)->sk)
+			skb_set_owner_w(nskb, (*pskb)->sk);
+		kfree_skb(*pskb);
+		*pskb = nskb;
+	}
+
+	arp = (*pskb)->nh.arph;
+	arpptr = (*pskb)->nh.raw + sizeof(*arp);
+	pln = arp->ar_pln;
+	hln = arp->ar_hln;
+	/* We assume that pln and hln were checked in the match */
+	if (mangle->flags & ARPT_MANGLE_SDEV) {
+		if (ARPT_DEV_ADDR_LEN_MAX < hln ||
+		   (arpptr + hln > (**pskb).tail))
+			return NF_DROP;
+		memcpy(arpptr, mangle->src_devaddr, hln);
+	}
+	arpptr += hln;
+	if (mangle->flags & ARPT_MANGLE_SIP) {
+		if (ARPT_MANGLE_ADDR_LEN_MAX < pln ||
+		   (arpptr + pln > (**pskb).tail))
+			return NF_DROP;
+		memcpy(arpptr, &mangle->u_s.src_ip, pln);
+	}
+	arpptr += pln;
+	if (mangle->flags & ARPT_MANGLE_TDEV) {
+		if (ARPT_DEV_ADDR_LEN_MAX < hln ||
+		   (arpptr + hln > (**pskb).tail))
+			return NF_DROP;
+		memcpy(arpptr, mangle->tgt_devaddr, hln);
+	}
+	arpptr += hln;
+	if (mangle->flags & ARPT_MANGLE_TIP) {
+		if (ARPT_MANGLE_ADDR_LEN_MAX < pln ||
+		   (arpptr + pln > (**pskb).tail))
+			return NF_DROP;
+		memcpy(arpptr, &mangle->u_t.tgt_ip, pln);
+	}
+	return mangle->target;
+}
+
+static int
+checkentry(const char *tablename, const struct arpt_entry *e, void *targinfo,
+   unsigned int targinfosize, unsigned int hook_mask)
+{
+	const struct arpt_mangle *mangle = targinfo;
+
+	if (mangle->flags & ~ARPT_MANGLE_MASK ||
+	    !(mangle->flags & ARPT_MANGLE_MASK))
+		return 0;
+
+	if (mangle->target != NF_DROP && mangle->target != NF_ACCEPT &&
+	   mangle->target != ARPT_CONTINUE)
+		return 0;
+	return 1;
+}
+
+static struct arpt_target arpt_mangle_reg
+= {
+        .name		= "mangle",
+        .target		= target,
+        .checkentry	= checkentry,
+        .me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	if (arpt_register_target(&arpt_mangle_reg))
+		return -EINVAL;
+
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	arpt_unregister_target(&arpt_mangle_reg);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
new file mode 100644
index 00000000000..0d759f5a4ef
--- /dev/null
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -0,0 +1,214 @@
+/*
+ * Filtering ARP tables module.
+ *
+ * Copyright (C) 2002 David S. Miller (davem@redhat.com)
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/netfilter_arp/arp_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("David S. Miller <davem@redhat.com>");
+MODULE_DESCRIPTION("arptables filter table");
+
+#define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \
+			   (1 << NF_ARP_FORWARD))
+
+/* Standard entry. */
+struct arpt_standard
+{
+	struct arpt_entry entry;
+	struct arpt_standard_target target;
+};
+
+struct arpt_error_target
+{
+	struct arpt_entry_target target;
+	char errorname[ARPT_FUNCTION_MAXNAMELEN];
+};
+
+struct arpt_error
+{
+	struct arpt_entry entry;
+	struct arpt_error_target target;
+};
+
+static struct
+{
+	struct arpt_replace repl;
+	struct arpt_standard entries[3];
+	struct arpt_error term;
+} initial_table __initdata
+= { { "filter", FILTER_VALID_HOOKS, 4,
+      sizeof(struct arpt_standard) * 3 + sizeof(struct arpt_error),
+      { [NF_ARP_IN] = 0,
+	[NF_ARP_OUT] = sizeof(struct arpt_standard),
+	[NF_ARP_FORWARD] = 2 * sizeof(struct arpt_standard), },
+      { [NF_ARP_IN] = 0,
+	[NF_ARP_OUT] = sizeof(struct arpt_standard),
+	[NF_ARP_FORWARD] = 2 * sizeof(struct arpt_standard), },
+      0, NULL, { } },
+    {
+	    /* ARP_IN */
+	    {
+		    {
+			    {
+				    { 0 }, { 0 }, { 0 }, { 0 },
+				    0, 0,
+				    { { 0, }, { 0, } },
+				    { { 0, }, { 0, } },
+				    0, 0,
+				    0, 0,
+				    0, 0,
+				    "", "", { 0 }, { 0 },
+				    0, 0
+			    },
+			    sizeof(struct arpt_entry),
+			    sizeof(struct arpt_standard),
+			    0,
+			    { 0, 0 }, { } },
+		    { { { { ARPT_ALIGN(sizeof(struct arpt_standard_target)), "" } }, { } },
+		      -NF_ACCEPT - 1 }
+	    },
+	    /* ARP_OUT */
+	    {
+		    {
+			    {
+				    { 0 }, { 0 }, { 0 }, { 0 },
+				    0, 0,
+				    { { 0, }, { 0, } },
+				    { { 0, }, { 0, } },
+				    0, 0,
+				    0, 0,
+				    0, 0,
+				    "", "", { 0 }, { 0 },
+				    0, 0
+			    },
+			    sizeof(struct arpt_entry),
+			    sizeof(struct arpt_standard),
+			    0,
+			    { 0, 0 }, { } },
+		    { { { { ARPT_ALIGN(sizeof(struct arpt_standard_target)), "" } }, { } },
+		      -NF_ACCEPT - 1 }
+	    },
+	    /* ARP_FORWARD */
+	    {
+		    {
+			    {
+				    { 0 }, { 0 }, { 0 }, { 0 },
+				    0, 0,
+				    { { 0, }, { 0, } },
+				    { { 0, }, { 0, } },
+				    0, 0,
+				    0, 0,
+				    0, 0,
+				    "", "", { 0 }, { 0 },
+				    0, 0
+			    },
+			    sizeof(struct arpt_entry),
+			    sizeof(struct arpt_standard),
+			    0,
+			    { 0, 0 }, { } },
+		    { { { { ARPT_ALIGN(sizeof(struct arpt_standard_target)), "" } }, { } },
+		      -NF_ACCEPT - 1 }
+	    }
+    },
+    /* ERROR */
+    {
+	    {
+		    {
+			    { 0 }, { 0 }, { 0 }, { 0 },
+			    0, 0,
+			    { { 0, }, { 0, } },
+			    { { 0, }, { 0, } },
+			    0, 0,
+			    0, 0,
+			    0, 0,
+			    "", "", { 0 }, { 0 },
+			    0, 0
+		    },
+		    sizeof(struct arpt_entry),
+		    sizeof(struct arpt_error),
+		    0,
+		    { 0, 0 }, { } },
+	    { { { { ARPT_ALIGN(sizeof(struct arpt_error_target)), ARPT_ERROR_TARGET } },
+		{ } },
+	      "ERROR"
+	    }
+    }
+};
+
+static struct arpt_table packet_filter = {
+	.name		= "filter",
+	.valid_hooks	= FILTER_VALID_HOOKS,
+	.lock		= RW_LOCK_UNLOCKED,
+	.private	= NULL,
+	.me		= THIS_MODULE,
+};
+
+/* The work comes in here from netfilter.c */
+static unsigned int arpt_hook(unsigned int hook,
+			      struct sk_buff **pskb,
+			      const struct net_device *in,
+			      const struct net_device *out,
+			      int (*okfn)(struct sk_buff *))
+{
+	return arpt_do_table(pskb, hook, in, out, &packet_filter, NULL);
+}
+
+static struct nf_hook_ops arpt_ops[] = {
+	{
+		.hook		= arpt_hook,
+		.owner		= THIS_MODULE,
+		.pf		= NF_ARP,
+		.hooknum	= NF_ARP_IN,
+	},
+	{
+		.hook		= arpt_hook,
+		.owner		= THIS_MODULE,
+		.pf		= NF_ARP,
+		.hooknum	= NF_ARP_OUT,
+	},
+	{
+		.hook		= arpt_hook,
+		.owner		= THIS_MODULE,
+		.pf		= NF_ARP,
+		.hooknum	= NF_ARP_FORWARD,
+	},
+};
+
+static int __init init(void)
+{
+	int ret, i;
+
+	/* Register table */
+	ret = arpt_register_table(&packet_filter, &initial_table.repl);
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; i < ARRAY_SIZE(arpt_ops); i++)
+		if ((ret = nf_register_hook(&arpt_ops[i])) < 0)
+			goto cleanup_hooks;
+	return ret;
+
+cleanup_hooks:
+	while (--i >= 0)
+		nf_unregister_hook(&arpt_ops[i]);
+
+	arpt_unregister_table(&packet_filter);
+	return ret;
+}
+
+static void __exit fini(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(arpt_ops); i++)
+		nf_unregister_hook(&arpt_ops[i]);
+
+	arpt_unregister_table(&packet_filter);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c
new file mode 100644
index 00000000000..3dbddd06260
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_amanda.c
@@ -0,0 +1,167 @@
+/* Amanda extension for IP connection tracking, Version 0.2
+ * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>
+ * based on HW's ip_conntrack_irc.c as well as other modules
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ *
+ *	Module load syntax:
+ * 	insmod ip_conntrack_amanda.o [master_timeout=n]
+ *	
+ *	Where master_timeout is the timeout (in seconds) of the master
+ *	connection (port 10080).  This defaults to 5 minutes but if
+ *	your clients take longer than 5 minutes to do their work
+ *	before getting back to the Amanda server, you can increase
+ *	this value.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/ip.h>
+#include <linux/moduleparam.h>
+#include <net/checksum.h>
+#include <net/udp.h>
+
+#include <linux/netfilter_ipv4/lockhelp.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_amanda.h>
+
+static unsigned int master_timeout = 300;
+
+MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
+MODULE_DESCRIPTION("Amanda connection tracking module");
+MODULE_LICENSE("GPL");
+module_param(master_timeout, int, 0600);
+MODULE_PARM_DESC(master_timeout, "timeout for the master connection");
+
+static char *conns[] = { "DATA ", "MESG ", "INDEX " };
+
+/* This is slow, but it's simple. --RR */
+static char amanda_buffer[65536];
+static DECLARE_LOCK(amanda_buffer_lock);
+
+unsigned int (*ip_nat_amanda_hook)(struct sk_buff **pskb,
+				   enum ip_conntrack_info ctinfo,
+				   unsigned int matchoff,
+				   unsigned int matchlen,
+				   struct ip_conntrack_expect *exp);
+EXPORT_SYMBOL_GPL(ip_nat_amanda_hook);
+
+static int help(struct sk_buff **pskb,
+                struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
+{
+	struct ip_conntrack_expect *exp;
+	char *data, *data_limit, *tmp;
+	unsigned int dataoff, i;
+	u_int16_t port, len;
+	int ret = NF_ACCEPT;
+
+	/* Only look at packets from the Amanda server */
+	if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
+		return NF_ACCEPT;
+
+	/* increase the UDP timeout of the master connection as replies from
+	 * Amanda clients to the server can be quite delayed */
+	ip_ct_refresh_acct(ct, ctinfo, NULL, master_timeout * HZ);
+
+	/* No data? */
+	dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+	if (dataoff >= (*pskb)->len) {
+		if (net_ratelimit())
+			printk("amanda_help: skblen = %u\n", (*pskb)->len);
+		return NF_ACCEPT;
+	}
+
+	LOCK_BH(&amanda_buffer_lock);
+	skb_copy_bits(*pskb, dataoff, amanda_buffer, (*pskb)->len - dataoff);
+	data = amanda_buffer;
+	data_limit = amanda_buffer + (*pskb)->len - dataoff;
+	*data_limit = '\0';
+
+	/* Search for the CONNECT string */
+	data = strstr(data, "CONNECT ");
+	if (!data)
+		goto out;
+	data += strlen("CONNECT ");
+
+	/* Only search first line. */	
+	if ((tmp = strchr(data, '\n')))
+		*tmp = '\0';
+
+	for (i = 0; i < ARRAY_SIZE(conns); i++) {
+		char *match = strstr(data, conns[i]);
+		if (!match)
+			continue;
+		tmp = data = match + strlen(conns[i]);
+		port = simple_strtoul(data, &data, 10);
+		len = data - tmp;
+		if (port == 0 || len > 5)
+			break;
+
+		exp = ip_conntrack_expect_alloc();
+		if (exp == NULL) {
+			ret = NF_DROP;
+			goto out;
+		}
+
+		exp->expectfn = NULL;
+		exp->master = ct;
+
+		exp->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
+		exp->tuple.src.u.tcp.port = 0;
+		exp->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
+		exp->tuple.dst.protonum = IPPROTO_TCP;
+		exp->tuple.dst.u.tcp.port = htons(port);
+
+		exp->mask.src.ip = 0xFFFFFFFF;
+		exp->mask.src.u.tcp.port = 0;
+		exp->mask.dst.ip = 0xFFFFFFFF;
+		exp->mask.dst.protonum = 0xFF;
+		exp->mask.dst.u.tcp.port = 0xFFFF;
+
+		if (ip_nat_amanda_hook)
+			ret = ip_nat_amanda_hook(pskb, ctinfo,
+						 tmp - amanda_buffer,
+						 len, exp);
+		else if (ip_conntrack_expect_related(exp) != 0) {
+			ip_conntrack_expect_free(exp);
+			ret = NF_DROP;
+		}
+	}
+
+out:
+	UNLOCK_BH(&amanda_buffer_lock);
+	return ret;
+}
+
+static struct ip_conntrack_helper amanda_helper = {
+	.max_expected = ARRAY_SIZE(conns),
+	.timeout = 180,
+	.me = THIS_MODULE,
+	.help = help,
+	.name = "amanda",
+
+	.tuple = { .src = { .u = { __constant_htons(10080) } },
+		   .dst = { .protonum = IPPROTO_UDP },
+	},
+	.mask = { .src = { .u = { 0xFFFF } },
+		 .dst = { .protonum = 0xFF },
+	},
+};
+
+static void __exit fini(void)
+{
+	ip_conntrack_helper_unregister(&amanda_helper);
+}
+
+static int __init init(void)
+{
+	return ip_conntrack_helper_register(&amanda_helper);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
new file mode 100644
index 00000000000..28d9425d5c3
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -0,0 +1,1247 @@
+/* Connection state tracking for netfilter.  This is separated from,
+   but required by, the NAT layer; it can also be used by an iptables
+   extension. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell  
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
+ * 	- new API and handling of conntrack/nat helpers
+ * 	- now capable of multiple expectations for one master
+ * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
+ * 	- add usage/reference counts to ip_conntrack_expect
+ *	- export ip_conntrack[_expect]_{find_get,put} functions
+ * */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/icmp.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <linux/vmalloc.h>
+#include <net/checksum.h>
+#include <net/ip.h>
+#include <linux/stddef.h>
+#include <linux/sysctl.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/jhash.h>
+#include <linux/err.h>
+#include <linux/percpu.h>
+#include <linux/moduleparam.h>
+
+/* This rwlock protects the main hash table, protocol/helper/expected
+   registrations, conntrack timers*/
+#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock)
+#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock)
+
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+#include <linux/netfilter_ipv4/listhelp.h>
+
+#define IP_CONNTRACK_VERSION	"2.1"
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+DECLARE_RWLOCK(ip_conntrack_lock);
+
+/* ip_conntrack_standalone needs this */
+atomic_t ip_conntrack_count = ATOMIC_INIT(0);
+
+void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
+LIST_HEAD(ip_conntrack_expect_list);
+struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
+static LIST_HEAD(helpers);
+unsigned int ip_conntrack_htable_size = 0;
+int ip_conntrack_max;
+struct list_head *ip_conntrack_hash;
+static kmem_cache_t *ip_conntrack_cachep;
+static kmem_cache_t *ip_conntrack_expect_cachep;
+struct ip_conntrack ip_conntrack_untracked;
+unsigned int ip_ct_log_invalid;
+static LIST_HEAD(unconfirmed);
+static int ip_conntrack_vmalloc;
+
+DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
+
+void 
+ip_conntrack_put(struct ip_conntrack *ct)
+{
+	IP_NF_ASSERT(ct);
+	nf_conntrack_put(&ct->ct_general);
+}
+
+static int ip_conntrack_hash_rnd_initted;
+static unsigned int ip_conntrack_hash_rnd;
+
+static u_int32_t
+hash_conntrack(const struct ip_conntrack_tuple *tuple)
+{
+#if 0
+	dump_tuple(tuple);
+#endif
+	return (jhash_3words(tuple->src.ip,
+	                     (tuple->dst.ip ^ tuple->dst.protonum),
+	                     (tuple->src.u.all | (tuple->dst.u.all << 16)),
+	                     ip_conntrack_hash_rnd) % ip_conntrack_htable_size);
+}
+
+int
+ip_ct_get_tuple(const struct iphdr *iph,
+		const struct sk_buff *skb,
+		unsigned int dataoff,
+		struct ip_conntrack_tuple *tuple,
+		const struct ip_conntrack_protocol *protocol)
+{
+	/* Never happen */
+	if (iph->frag_off & htons(IP_OFFSET)) {
+		printk("ip_conntrack_core: Frag of proto %u.\n",
+		       iph->protocol);
+		return 0;
+	}
+
+	tuple->src.ip = iph->saddr;
+	tuple->dst.ip = iph->daddr;
+	tuple->dst.protonum = iph->protocol;
+	tuple->dst.dir = IP_CT_DIR_ORIGINAL;
+
+	return protocol->pkt_to_tuple(skb, dataoff, tuple);
+}
+
+int
+ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse,
+		   const struct ip_conntrack_tuple *orig,
+		   const struct ip_conntrack_protocol *protocol)
+{
+	inverse->src.ip = orig->dst.ip;
+	inverse->dst.ip = orig->src.ip;
+	inverse->dst.protonum = orig->dst.protonum;
+	inverse->dst.dir = !orig->dst.dir;
+
+	return protocol->invert_tuple(inverse, orig);
+}
+
+
+/* ip_conntrack_expect helper functions */
+static void destroy_expect(struct ip_conntrack_expect *exp)
+{
+	ip_conntrack_put(exp->master);
+	IP_NF_ASSERT(!timer_pending(&exp->timeout));
+	kmem_cache_free(ip_conntrack_expect_cachep, exp);
+	CONNTRACK_STAT_INC(expect_delete);
+}
+
+static void unlink_expect(struct ip_conntrack_expect *exp)
+{
+	MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
+	list_del(&exp->list);
+	/* Logically in destroy_expect, but we hold the lock here. */
+	exp->master->expecting--;
+}
+
+static void expectation_timed_out(unsigned long ul_expect)
+{
+	struct ip_conntrack_expect *exp = (void *)ul_expect;
+
+	WRITE_LOCK(&ip_conntrack_lock);
+	unlink_expect(exp);
+	WRITE_UNLOCK(&ip_conntrack_lock);
+	destroy_expect(exp);
+}
+
+/* If an expectation for this connection is found, it gets delete from
+ * global list then returned. */
+static struct ip_conntrack_expect *
+find_expectation(const struct ip_conntrack_tuple *tuple)
+{
+	struct ip_conntrack_expect *i;
+
+	list_for_each_entry(i, &ip_conntrack_expect_list, list) {
+		/* If master is not in hash table yet (ie. packet hasn't left
+		   this machine yet), how can other end know about expected?
+		   Hence these are not the droids you are looking for (if
+		   master ct never got confirmed, we'd hold a reference to it
+		   and weird things would happen to future packets). */
+		if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)
+		    && is_confirmed(i->master)
+		    && del_timer(&i->timeout)) {
+			unlink_expect(i);
+			return i;
+		}
+	}
+	return NULL;
+}
+
+/* delete all expectations for this conntrack */
+static void remove_expectations(struct ip_conntrack *ct)
+{
+	struct ip_conntrack_expect *i, *tmp;
+
+	/* Optimization: most connection never expect any others. */
+	if (ct->expecting == 0)
+		return;
+
+	list_for_each_entry_safe(i, tmp, &ip_conntrack_expect_list, list) {
+		if (i->master == ct && del_timer(&i->timeout)) {
+			unlink_expect(i);
+			destroy_expect(i);
+		}
+	}
+}
+
+static void
+clean_from_lists(struct ip_conntrack *ct)
+{
+	unsigned int ho, hr;
+	
+	DEBUGP("clean_from_lists(%p)\n", ct);
+	MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
+
+	ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+	hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+	LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
+	LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
+
+	/* Destroy all pending expectations */
+	remove_expectations(ct);
+}
+
+static void
+destroy_conntrack(struct nf_conntrack *nfct)
+{
+	struct ip_conntrack *ct = (struct ip_conntrack *)nfct;
+	struct ip_conntrack_protocol *proto;
+
+	DEBUGP("destroy_conntrack(%p)\n", ct);
+	IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
+	IP_NF_ASSERT(!timer_pending(&ct->timeout));
+
+	/* To make sure we don't get any weird locking issues here:
+	 * destroy_conntrack() MUST NOT be called with a write lock
+	 * to ip_conntrack_lock!!! -HW */
+	proto = ip_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
+	if (proto && proto->destroy)
+		proto->destroy(ct);
+
+	if (ip_conntrack_destroyed)
+		ip_conntrack_destroyed(ct);
+
+	WRITE_LOCK(&ip_conntrack_lock);
+	/* Expectations will have been removed in clean_from_lists,
+	 * except TFTP can create an expectation on the first packet,
+	 * before connection is in the list, so we need to clean here,
+	 * too. */
+	remove_expectations(ct);
+
+	/* We overload first tuple to link into unconfirmed list. */
+	if (!is_confirmed(ct)) {
+		BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list));
+		list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+	}
+
+	CONNTRACK_STAT_INC(delete);
+	WRITE_UNLOCK(&ip_conntrack_lock);
+
+	if (ct->master)
+		ip_conntrack_put(ct->master);
+
+	DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
+	kmem_cache_free(ip_conntrack_cachep, ct);
+	atomic_dec(&ip_conntrack_count);
+}
+
+static void death_by_timeout(unsigned long ul_conntrack)
+{
+	struct ip_conntrack *ct = (void *)ul_conntrack;
+
+	WRITE_LOCK(&ip_conntrack_lock);
+	/* Inside lock so preempt is disabled on module removal path.
+	 * Otherwise we can get spurious warnings. */
+	CONNTRACK_STAT_INC(delete_list);
+	clean_from_lists(ct);
+	WRITE_UNLOCK(&ip_conntrack_lock);
+	ip_conntrack_put(ct);
+}
+
+static inline int
+conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i,
+		    const struct ip_conntrack_tuple *tuple,
+		    const struct ip_conntrack *ignored_conntrack)
+{
+	MUST_BE_READ_LOCKED(&ip_conntrack_lock);
+	return tuplehash_to_ctrack(i) != ignored_conntrack
+		&& ip_ct_tuple_equal(tuple, &i->tuple);
+}
+
+static struct ip_conntrack_tuple_hash *
+__ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
+		    const struct ip_conntrack *ignored_conntrack)
+{
+	struct ip_conntrack_tuple_hash *h;
+	unsigned int hash = hash_conntrack(tuple);
+
+	MUST_BE_READ_LOCKED(&ip_conntrack_lock);
+	list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
+		if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) {
+			CONNTRACK_STAT_INC(found);
+			return h;
+		}
+		CONNTRACK_STAT_INC(searched);
+	}
+
+	return NULL;
+}
+
+/* Find a connection corresponding to a tuple. */
+struct ip_conntrack_tuple_hash *
+ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
+		      const struct ip_conntrack *ignored_conntrack)
+{
+	struct ip_conntrack_tuple_hash *h;
+
+	READ_LOCK(&ip_conntrack_lock);
+	h = __ip_conntrack_find(tuple, ignored_conntrack);
+	if (h)
+		atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
+	READ_UNLOCK(&ip_conntrack_lock);
+
+	return h;
+}
+
+/* Confirm a connection given skb; places it in hash table */
+int
+__ip_conntrack_confirm(struct sk_buff **pskb)
+{
+	unsigned int hash, repl_hash;
+	struct ip_conntrack *ct;
+	enum ip_conntrack_info ctinfo;
+
+	ct = ip_conntrack_get(*pskb, &ctinfo);
+
+	/* ipt_REJECT uses ip_conntrack_attach to attach related
+	   ICMP/TCP RST packets in other direction.  Actual packet
+	   which created connection will be IP_CT_NEW or for an
+	   expected connection, IP_CT_RELATED. */
+	if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
+		return NF_ACCEPT;
+
+	hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+	repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+
+	/* We're not in hash table, and we refuse to set up related
+	   connections for unconfirmed conns.  But packet copies and
+	   REJECT will give spurious warnings here. */
+	/* IP_NF_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
+
+	/* No external references means noone else could have
+           confirmed us. */
+	IP_NF_ASSERT(!is_confirmed(ct));
+	DEBUGP("Confirming conntrack %p\n", ct);
+
+	WRITE_LOCK(&ip_conntrack_lock);
+
+	/* See if there's one in the list already, including reverse:
+           NAT could have grabbed it without realizing, since we're
+           not in the hash.  If there is, we lost race. */
+	if (!LIST_FIND(&ip_conntrack_hash[hash],
+		       conntrack_tuple_cmp,
+		       struct ip_conntrack_tuple_hash *,
+		       &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
+	    && !LIST_FIND(&ip_conntrack_hash[repl_hash],
+			  conntrack_tuple_cmp,
+			  struct ip_conntrack_tuple_hash *,
+			  &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
+		/* Remove from unconfirmed list */
+		list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+
+		list_prepend(&ip_conntrack_hash[hash],
+			     &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
+		list_prepend(&ip_conntrack_hash[repl_hash],
+			     &ct->tuplehash[IP_CT_DIR_REPLY]);
+		/* Timer relative to confirmation time, not original
+		   setting time, otherwise we'd get timer wrap in
+		   weird delay cases. */
+		ct->timeout.expires += jiffies;
+		add_timer(&ct->timeout);
+		atomic_inc(&ct->ct_general.use);
+		set_bit(IPS_CONFIRMED_BIT, &ct->status);
+		CONNTRACK_STAT_INC(insert);
+		WRITE_UNLOCK(&ip_conntrack_lock);
+		return NF_ACCEPT;
+	}
+
+	CONNTRACK_STAT_INC(insert_failed);
+	WRITE_UNLOCK(&ip_conntrack_lock);
+
+	return NF_DROP;
+}
+
+/* Returns true if a connection correspondings to the tuple (required
+   for NAT). */
+int
+ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
+			 const struct ip_conntrack *ignored_conntrack)
+{
+	struct ip_conntrack_tuple_hash *h;
+
+	READ_LOCK(&ip_conntrack_lock);
+	h = __ip_conntrack_find(tuple, ignored_conntrack);
+	READ_UNLOCK(&ip_conntrack_lock);
+
+	return h != NULL;
+}
+
+/* There's a small race here where we may free a just-assured
+   connection.  Too bad: we're in trouble anyway. */
+static inline int unreplied(const struct ip_conntrack_tuple_hash *i)
+{
+	return !(test_bit(IPS_ASSURED_BIT, &tuplehash_to_ctrack(i)->status));
+}
+
+static int early_drop(struct list_head *chain)
+{
+	/* Traverse backwards: gives us oldest, which is roughly LRU */
+	struct ip_conntrack_tuple_hash *h;
+	struct ip_conntrack *ct = NULL;
+	int dropped = 0;
+
+	READ_LOCK(&ip_conntrack_lock);
+	h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *);
+	if (h) {
+		ct = tuplehash_to_ctrack(h);
+		atomic_inc(&ct->ct_general.use);
+	}
+	READ_UNLOCK(&ip_conntrack_lock);
+
+	if (!ct)
+		return dropped;
+
+	if (del_timer(&ct->timeout)) {
+		death_by_timeout((unsigned long)ct);
+		dropped = 1;
+		CONNTRACK_STAT_INC(early_drop);
+	}
+	ip_conntrack_put(ct);
+	return dropped;
+}
+
+static inline int helper_cmp(const struct ip_conntrack_helper *i,
+			     const struct ip_conntrack_tuple *rtuple)
+{
+	return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
+}
+
+static struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple)
+{
+	return LIST_FIND(&helpers, helper_cmp,
+			 struct ip_conntrack_helper *,
+			 tuple);
+}
+
+/* Allocate a new conntrack: we return -ENOMEM if classification
+   failed due to stress.  Otherwise it really is unclassifiable. */
+static struct ip_conntrack_tuple_hash *
+init_conntrack(const struct ip_conntrack_tuple *tuple,
+	       struct ip_conntrack_protocol *protocol,
+	       struct sk_buff *skb)
+{
+	struct ip_conntrack *conntrack;
+	struct ip_conntrack_tuple repl_tuple;
+	size_t hash;
+	struct ip_conntrack_expect *exp;
+
+	if (!ip_conntrack_hash_rnd_initted) {
+		get_random_bytes(&ip_conntrack_hash_rnd, 4);
+		ip_conntrack_hash_rnd_initted = 1;
+	}
+
+	hash = hash_conntrack(tuple);
+
+	if (ip_conntrack_max
+	    && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
+		/* Try dropping from this hash chain. */
+		if (!early_drop(&ip_conntrack_hash[hash])) {
+			if (net_ratelimit())
+				printk(KERN_WARNING
+				       "ip_conntrack: table full, dropping"
+				       " packet.\n");
+			return ERR_PTR(-ENOMEM);
+		}
+	}
+
+	if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) {
+		DEBUGP("Can't invert tuple.\n");
+		return NULL;
+	}
+
+	conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
+	if (!conntrack) {
+		DEBUGP("Can't allocate conntrack.\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	memset(conntrack, 0, sizeof(*conntrack));
+	atomic_set(&conntrack->ct_general.use, 1);
+	conntrack->ct_general.destroy = destroy_conntrack;
+	conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *tuple;
+	conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = repl_tuple;
+	if (!protocol->new(conntrack, skb)) {
+		kmem_cache_free(ip_conntrack_cachep, conntrack);
+		return NULL;
+	}
+	/* Don't set timer yet: wait for confirmation */
+	init_timer(&conntrack->timeout);
+	conntrack->timeout.data = (unsigned long)conntrack;
+	conntrack->timeout.function = death_by_timeout;
+
+	WRITE_LOCK(&ip_conntrack_lock);
+	exp = find_expectation(tuple);
+
+	if (exp) {
+		DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
+			conntrack, exp);
+		/* Welcome, Mr. Bond.  We've been expecting you... */
+		__set_bit(IPS_EXPECTED_BIT, &conntrack->status);
+		conntrack->master = exp->master;
+#if CONFIG_IP_NF_CONNTRACK_MARK
+		conntrack->mark = exp->master->mark;
+#endif
+		nf_conntrack_get(&conntrack->master->ct_general);
+		CONNTRACK_STAT_INC(expect_new);
+	} else {
+		conntrack->helper = ip_ct_find_helper(&repl_tuple);
+
+		CONNTRACK_STAT_INC(new);
+	}
+
+	/* Overload tuple linked list to put us in unconfirmed list. */
+	list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
+
+	atomic_inc(&ip_conntrack_count);
+	WRITE_UNLOCK(&ip_conntrack_lock);
+
+	if (exp) {
+		if (exp->expectfn)
+			exp->expectfn(conntrack, exp);
+		destroy_expect(exp);
+	}
+
+	return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
+}
+
+/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
+static inline struct ip_conntrack *
+resolve_normal_ct(struct sk_buff *skb,
+		  struct ip_conntrack_protocol *proto,
+		  int *set_reply,
+		  unsigned int hooknum,
+		  enum ip_conntrack_info *ctinfo)
+{
+	struct ip_conntrack_tuple tuple;
+	struct ip_conntrack_tuple_hash *h;
+	struct ip_conntrack *ct;
+
+	IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
+
+	if (!ip_ct_get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4, 
+				&tuple,proto))
+		return NULL;
+
+	/* look for tuple match */
+	h = ip_conntrack_find_get(&tuple, NULL);
+	if (!h) {
+		h = init_conntrack(&tuple, proto, skb);
+		if (!h)
+			return NULL;
+		if (IS_ERR(h))
+			return (void *)h;
+	}
+	ct = tuplehash_to_ctrack(h);
+
+	/* It exists; we have (non-exclusive) reference. */
+	if (DIRECTION(h) == IP_CT_DIR_REPLY) {
+		*ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
+		/* Please set reply bit if this packet OK */
+		*set_reply = 1;
+	} else {
+		/* Once we've had two way comms, always ESTABLISHED. */
+		if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+			DEBUGP("ip_conntrack_in: normal packet for %p\n",
+			       ct);
+		        *ctinfo = IP_CT_ESTABLISHED;
+		} else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
+			DEBUGP("ip_conntrack_in: related packet for %p\n",
+			       ct);
+			*ctinfo = IP_CT_RELATED;
+		} else {
+			DEBUGP("ip_conntrack_in: new packet for %p\n",
+			       ct);
+			*ctinfo = IP_CT_NEW;
+		}
+		*set_reply = 0;
+	}
+	skb->nfct = &ct->ct_general;
+	skb->nfctinfo = *ctinfo;
+	return ct;
+}
+
+/* Netfilter hook itself. */
+unsigned int ip_conntrack_in(unsigned int hooknum,
+			     struct sk_buff **pskb,
+			     const struct net_device *in,
+			     const struct net_device *out,
+			     int (*okfn)(struct sk_buff *))
+{
+	struct ip_conntrack *ct;
+	enum ip_conntrack_info ctinfo;
+	struct ip_conntrack_protocol *proto;
+	int set_reply;
+	int ret;
+
+	/* Previously seen (loopback or untracked)?  Ignore. */
+	if ((*pskb)->nfct) {
+		CONNTRACK_STAT_INC(ignore);
+		return NF_ACCEPT;
+	}
+
+	/* Never happen */
+	if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
+		if (net_ratelimit()) {
+		printk(KERN_ERR "ip_conntrack_in: Frag of proto %u (hook=%u)\n",
+		       (*pskb)->nh.iph->protocol, hooknum);
+		}
+		return NF_DROP;
+	}
+
+	/* FIXME: Do this right please. --RR */
+	(*pskb)->nfcache |= NFC_UNKNOWN;
+
+/* Doesn't cover locally-generated broadcast, so not worth it. */
+#if 0
+	/* Ignore broadcast: no `connection'. */
+	if ((*pskb)->pkt_type == PACKET_BROADCAST) {
+		printk("Broadcast packet!\n");
+		return NF_ACCEPT;
+	} else if (((*pskb)->nh.iph->daddr & htonl(0x000000FF)) 
+		   == htonl(0x000000FF)) {
+		printk("Should bcast: %u.%u.%u.%u->%u.%u.%u.%u (sk=%p, ptype=%u)\n",
+		       NIPQUAD((*pskb)->nh.iph->saddr),
+		       NIPQUAD((*pskb)->nh.iph->daddr),
+		       (*pskb)->sk, (*pskb)->pkt_type);
+	}
+#endif
+
+	proto = ip_ct_find_proto((*pskb)->nh.iph->protocol);
+
+	/* It may be an special packet, error, unclean...
+	 * inverse of the return code tells to the netfilter
+	 * core what to do with the packet. */
+	if (proto->error != NULL 
+	    && (ret = proto->error(*pskb, &ctinfo, hooknum)) <= 0) {
+		CONNTRACK_STAT_INC(error);
+		CONNTRACK_STAT_INC(invalid);
+		return -ret;
+	}
+
+	if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) {
+		/* Not valid part of a connection */
+		CONNTRACK_STAT_INC(invalid);
+		return NF_ACCEPT;
+	}
+
+	if (IS_ERR(ct)) {
+		/* Too stressed to deal. */
+		CONNTRACK_STAT_INC(drop);
+		return NF_DROP;
+	}
+
+	IP_NF_ASSERT((*pskb)->nfct);
+
+	ret = proto->packet(ct, *pskb, ctinfo);
+	if (ret < 0) {
+		/* Invalid: inverse of the return code tells
+		 * the netfilter core what to do*/
+		nf_conntrack_put((*pskb)->nfct);
+		(*pskb)->nfct = NULL;
+		CONNTRACK_STAT_INC(invalid);
+		return -ret;
+	}
+
+	if (set_reply)
+		set_bit(IPS_SEEN_REPLY_BIT, &ct->status);
+
+	return ret;
+}
+
+int invert_tuplepr(struct ip_conntrack_tuple *inverse,
+		   const struct ip_conntrack_tuple *orig)
+{
+	return ip_ct_invert_tuple(inverse, orig, 
+				  ip_ct_find_proto(orig->dst.protonum));
+}
+
+/* Would two expected things clash? */
+static inline int expect_clash(const struct ip_conntrack_expect *a,
+			       const struct ip_conntrack_expect *b)
+{
+	/* Part covered by intersection of masks must be unequal,
+           otherwise they clash */
+	struct ip_conntrack_tuple intersect_mask
+		= { { a->mask.src.ip & b->mask.src.ip,
+		      { a->mask.src.u.all & b->mask.src.u.all } },
+		    { a->mask.dst.ip & b->mask.dst.ip,
+		      { a->mask.dst.u.all & b->mask.dst.u.all },
+		      a->mask.dst.protonum & b->mask.dst.protonum } };
+
+	return ip_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
+}
+
+static inline int expect_matches(const struct ip_conntrack_expect *a,
+				 const struct ip_conntrack_expect *b)
+{
+	return a->master == b->master
+		&& ip_ct_tuple_equal(&a->tuple, &b->tuple)
+		&& ip_ct_tuple_equal(&a->mask, &b->mask);
+}
+
+/* Generally a bad idea to call this: could have matched already. */
+void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp)
+{
+	struct ip_conntrack_expect *i;
+
+	WRITE_LOCK(&ip_conntrack_lock);
+	/* choose the the oldest expectation to evict */
+	list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
+		if (expect_matches(i, exp) && del_timer(&i->timeout)) {
+			unlink_expect(i);
+			WRITE_UNLOCK(&ip_conntrack_lock);
+			destroy_expect(i);
+			return;
+		}
+	}
+	WRITE_UNLOCK(&ip_conntrack_lock);
+}
+
+struct ip_conntrack_expect *ip_conntrack_expect_alloc(void)
+{
+	struct ip_conntrack_expect *new;
+
+	new = kmem_cache_alloc(ip_conntrack_expect_cachep, GFP_ATOMIC);
+	if (!new) {
+		DEBUGP("expect_related: OOM allocating expect\n");
+		return NULL;
+	}
+	new->master = NULL;
+	return new;
+}
+
+void ip_conntrack_expect_free(struct ip_conntrack_expect *expect)
+{
+	kmem_cache_free(ip_conntrack_expect_cachep, expect);
+}
+
+static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp)
+{
+	atomic_inc(&exp->master->ct_general.use);
+	exp->master->expecting++;
+	list_add(&exp->list, &ip_conntrack_expect_list);
+
+	if (exp->master->helper->timeout) {
+		init_timer(&exp->timeout);
+		exp->timeout.data = (unsigned long)exp;
+		exp->timeout.function = expectation_timed_out;
+		exp->timeout.expires
+			= jiffies + exp->master->helper->timeout * HZ;
+		add_timer(&exp->timeout);
+	} else
+		exp->timeout.function = NULL;
+
+	CONNTRACK_STAT_INC(expect_create);
+}
+
+/* Race with expectations being used means we could have none to find; OK. */
+static void evict_oldest_expect(struct ip_conntrack *master)
+{
+	struct ip_conntrack_expect *i;
+
+	list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
+		if (i->master == master) {
+			if (del_timer(&i->timeout)) {
+				unlink_expect(i);
+				destroy_expect(i);
+			}
+			break;
+		}
+	}
+}
+
+static inline int refresh_timer(struct ip_conntrack_expect *i)
+{
+	if (!del_timer(&i->timeout))
+		return 0;
+
+	i->timeout.expires = jiffies + i->master->helper->timeout*HZ;
+	add_timer(&i->timeout);
+	return 1;
+}
+
+int ip_conntrack_expect_related(struct ip_conntrack_expect *expect)
+{
+	struct ip_conntrack_expect *i;
+	int ret;
+
+	DEBUGP("ip_conntrack_expect_related %p\n", related_to);
+	DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple);
+	DEBUGP("mask:  "); DUMP_TUPLE(&expect->mask);
+
+	WRITE_LOCK(&ip_conntrack_lock);
+	list_for_each_entry(i, &ip_conntrack_expect_list, list) {
+		if (expect_matches(i, expect)) {
+			/* Refresh timer: if it's dying, ignore.. */
+			if (refresh_timer(i)) {
+				ret = 0;
+				/* We don't need the one they've given us. */
+				ip_conntrack_expect_free(expect);
+				goto out;
+			}
+		} else if (expect_clash(i, expect)) {
+			ret = -EBUSY;
+			goto out;
+		}
+	}
+
+	/* Will be over limit? */
+	if (expect->master->helper->max_expected && 
+	    expect->master->expecting >= expect->master->helper->max_expected)
+		evict_oldest_expect(expect->master);
+
+	ip_conntrack_expect_insert(expect);
+	ret = 0;
+out:
+	WRITE_UNLOCK(&ip_conntrack_lock);
+ 	return ret;
+}
+
+/* Alter reply tuple (maybe alter helper).  This is for NAT, and is
+   implicitly racy: see __ip_conntrack_confirm */
+void ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
+			      const struct ip_conntrack_tuple *newreply)
+{
+	WRITE_LOCK(&ip_conntrack_lock);
+	/* Should be unconfirmed, so not in hash table yet */
+	IP_NF_ASSERT(!is_confirmed(conntrack));
+
+	DEBUGP("Altering reply tuple of %p to ", conntrack);
+	DUMP_TUPLE(newreply);
+
+	conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
+	if (!conntrack->master && conntrack->expecting == 0)
+		conntrack->helper = ip_ct_find_helper(newreply);
+	WRITE_UNLOCK(&ip_conntrack_lock);
+}
+
+int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
+{
+	BUG_ON(me->timeout == 0);
+	WRITE_LOCK(&ip_conntrack_lock);
+	list_prepend(&helpers, me);
+	WRITE_UNLOCK(&ip_conntrack_lock);
+
+	return 0;
+}
+
+static inline int unhelp(struct ip_conntrack_tuple_hash *i,
+			 const struct ip_conntrack_helper *me)
+{
+	if (tuplehash_to_ctrack(i)->helper == me)
+		tuplehash_to_ctrack(i)->helper = NULL;
+	return 0;
+}
+
+void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
+{
+	unsigned int i;
+	struct ip_conntrack_expect *exp, *tmp;
+
+	/* Need write lock here, to delete helper. */
+	WRITE_LOCK(&ip_conntrack_lock);
+	LIST_DELETE(&helpers, me);
+
+	/* Get rid of expectations */
+	list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) {
+		if (exp->master->helper == me && del_timer(&exp->timeout)) {
+			unlink_expect(exp);
+			destroy_expect(exp);
+		}
+	}
+	/* Get rid of expecteds, set helpers to NULL. */
+	LIST_FIND_W(&unconfirmed, unhelp, struct ip_conntrack_tuple_hash*, me);
+	for (i = 0; i < ip_conntrack_htable_size; i++)
+		LIST_FIND_W(&ip_conntrack_hash[i], unhelp,
+			    struct ip_conntrack_tuple_hash *, me);
+	WRITE_UNLOCK(&ip_conntrack_lock);
+
+	/* Someone could be still looking at the helper in a bh. */
+	synchronize_net();
+}
+
+static inline void ct_add_counters(struct ip_conntrack *ct,
+				   enum ip_conntrack_info ctinfo,
+				   const struct sk_buff *skb)
+{
+#ifdef CONFIG_IP_NF_CT_ACCT
+	if (skb) {
+		ct->counters[CTINFO2DIR(ctinfo)].packets++;
+		ct->counters[CTINFO2DIR(ctinfo)].bytes += 
+					ntohs(skb->nh.iph->tot_len);
+	}
+#endif
+}
+
+/* Refresh conntrack for this many jiffies and do accounting (if skb != NULL) */
+void ip_ct_refresh_acct(struct ip_conntrack *ct, 
+		        enum ip_conntrack_info ctinfo,
+			const struct sk_buff *skb,
+			unsigned long extra_jiffies)
+{
+	IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
+
+	/* If not in hash table, timer will not be active yet */
+	if (!is_confirmed(ct)) {
+		ct->timeout.expires = extra_jiffies;
+		ct_add_counters(ct, ctinfo, skb);
+	} else {
+		WRITE_LOCK(&ip_conntrack_lock);
+		/* Need del_timer for race avoidance (may already be dying). */
+		if (del_timer(&ct->timeout)) {
+			ct->timeout.expires = jiffies + extra_jiffies;
+			add_timer(&ct->timeout);
+		}
+		ct_add_counters(ct, ctinfo, skb);
+		WRITE_UNLOCK(&ip_conntrack_lock);
+	}
+}
+
+/* Returns new sk_buff, or NULL */
+struct sk_buff *
+ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user)
+{
+	struct sock *sk = skb->sk;
+#ifdef CONFIG_NETFILTER_DEBUG
+	unsigned int olddebug = skb->nf_debug;
+#endif
+
+	if (sk) {
+		sock_hold(sk);
+		skb_orphan(skb);
+	}
+
+	local_bh_disable(); 
+	skb = ip_defrag(skb, user);
+	local_bh_enable();
+
+	if (!skb) {
+		if (sk)
+			sock_put(sk);
+		return skb;
+	}
+
+	if (sk) {
+		skb_set_owner_w(skb, sk);
+		sock_put(sk);
+	}
+
+	ip_send_check(skb->nh.iph);
+	skb->nfcache |= NFC_ALTERED;
+#ifdef CONFIG_NETFILTER_DEBUG
+	/* Packet path as if nothing had happened. */
+	skb->nf_debug = olddebug;
+#endif
+	return skb;
+}
+
+/* Used by ipt_REJECT. */
+static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
+{
+	struct ip_conntrack *ct;
+	enum ip_conntrack_info ctinfo;
+
+	/* This ICMP is in reverse direction to the packet which caused it */
+	ct = ip_conntrack_get(skb, &ctinfo);
+	
+	if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
+		ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
+	else
+		ctinfo = IP_CT_RELATED;
+
+	/* Attach to new skbuff, and increment count */
+	nskb->nfct = &ct->ct_general;
+	nskb->nfctinfo = ctinfo;
+	nf_conntrack_get(nskb->nfct);
+}
+
+static inline int
+do_iter(const struct ip_conntrack_tuple_hash *i,
+	int (*iter)(struct ip_conntrack *i, void *data),
+	void *data)
+{
+	return iter(tuplehash_to_ctrack(i), data);
+}
+
+/* Bring out ya dead! */
+static struct ip_conntrack_tuple_hash *
+get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data),
+		void *data, unsigned int *bucket)
+{
+	struct ip_conntrack_tuple_hash *h = NULL;
+
+	WRITE_LOCK(&ip_conntrack_lock);
+	for (; *bucket < ip_conntrack_htable_size; (*bucket)++) {
+		h = LIST_FIND_W(&ip_conntrack_hash[*bucket], do_iter,
+				struct ip_conntrack_tuple_hash *, iter, data);
+		if (h)
+			break;
+	}
+	if (!h)
+		h = LIST_FIND_W(&unconfirmed, do_iter,
+				struct ip_conntrack_tuple_hash *, iter, data);
+	if (h)
+		atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
+	WRITE_UNLOCK(&ip_conntrack_lock);
+
+	return h;
+}
+
+void
+ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data)
+{
+	struct ip_conntrack_tuple_hash *h;
+	unsigned int bucket = 0;
+
+	while ((h = get_next_corpse(iter, data, &bucket)) != NULL) {
+		struct ip_conntrack *ct = tuplehash_to_ctrack(h);
+		/* Time to push up daises... */
+		if (del_timer(&ct->timeout))
+			death_by_timeout((unsigned long)ct);
+		/* ... else the timer will get him soon. */
+
+		ip_conntrack_put(ct);
+	}
+}
+
+/* Fast function for those who don't want to parse /proc (and I don't
+   blame them). */
+/* Reversing the socket's dst/src point of view gives us the reply
+   mapping. */
+static int
+getorigdst(struct sock *sk, int optval, void __user *user, int *len)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct ip_conntrack_tuple_hash *h;
+	struct ip_conntrack_tuple tuple;
+	
+	IP_CT_TUPLE_U_BLANK(&tuple);
+	tuple.src.ip = inet->rcv_saddr;
+	tuple.src.u.tcp.port = inet->sport;
+	tuple.dst.ip = inet->daddr;
+	tuple.dst.u.tcp.port = inet->dport;
+	tuple.dst.protonum = IPPROTO_TCP;
+
+	/* We only do TCP at the moment: is there a better way? */
+	if (strcmp(sk->sk_prot->name, "TCP")) {
+		DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n");
+		return -ENOPROTOOPT;
+	}
+
+	if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
+		DEBUGP("SO_ORIGINAL_DST: len %u not %u\n",
+		       *len, sizeof(struct sockaddr_in));
+		return -EINVAL;
+	}
+
+	h = ip_conntrack_find_get(&tuple, NULL);
+	if (h) {
+		struct sockaddr_in sin;
+		struct ip_conntrack *ct = tuplehash_to_ctrack(h);
+
+		sin.sin_family = AF_INET;
+		sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
+			.tuple.dst.u.tcp.port;
+		sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
+			.tuple.dst.ip;
+
+		DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
+		       NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
+		ip_conntrack_put(ct);
+		if (copy_to_user(user, &sin, sizeof(sin)) != 0)
+			return -EFAULT;
+		else
+			return 0;
+	}
+	DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
+	       NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port),
+	       NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port));
+	return -ENOENT;
+}
+
+static struct nf_sockopt_ops so_getorigdst = {
+	.pf		= PF_INET,
+	.get_optmin	= SO_ORIGINAL_DST,
+	.get_optmax	= SO_ORIGINAL_DST+1,
+	.get		= &getorigdst,
+};
+
+static int kill_all(struct ip_conntrack *i, void *data)
+{
+	return 1;
+}
+
+static void free_conntrack_hash(void)
+{
+	if (ip_conntrack_vmalloc)
+		vfree(ip_conntrack_hash);
+	else
+		free_pages((unsigned long)ip_conntrack_hash, 
+			   get_order(sizeof(struct list_head)
+				     * ip_conntrack_htable_size));
+}
+
+/* Mishearing the voices in his head, our hero wonders how he's
+   supposed to kill the mall. */
+void ip_conntrack_cleanup(void)
+{
+	ip_ct_attach = NULL;
+	/* This makes sure all current packets have passed through
+           netfilter framework.  Roll on, two-stage module
+           delete... */
+	synchronize_net();
+ 
+ i_see_dead_people:
+	ip_ct_iterate_cleanup(kill_all, NULL);
+	if (atomic_read(&ip_conntrack_count) != 0) {
+		schedule();
+		goto i_see_dead_people;
+	}
+
+	kmem_cache_destroy(ip_conntrack_cachep);
+	kmem_cache_destroy(ip_conntrack_expect_cachep);
+	free_conntrack_hash();
+	nf_unregister_sockopt(&so_getorigdst);
+}
+
+static int hashsize;
+module_param(hashsize, int, 0400);
+
+int __init ip_conntrack_init(void)
+{
+	unsigned int i;
+	int ret;
+
+	/* Idea from tcp.c: use 1/16384 of memory.  On i386: 32MB
+	 * machine has 256 buckets.  >= 1GB machines have 8192 buckets. */
+ 	if (hashsize) {
+ 		ip_conntrack_htable_size = hashsize;
+ 	} else {
+		ip_conntrack_htable_size
+			= (((num_physpages << PAGE_SHIFT) / 16384)
+			   / sizeof(struct list_head));
+		if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
+			ip_conntrack_htable_size = 8192;
+		if (ip_conntrack_htable_size < 16)
+			ip_conntrack_htable_size = 16;
+	}
+	ip_conntrack_max = 8 * ip_conntrack_htable_size;
+
+	printk("ip_conntrack version %s (%u buckets, %d max)"
+	       " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION,
+	       ip_conntrack_htable_size, ip_conntrack_max,
+	       sizeof(struct ip_conntrack));
+
+	ret = nf_register_sockopt(&so_getorigdst);
+	if (ret != 0) {
+		printk(KERN_ERR "Unable to register netfilter socket option\n");
+		return ret;
+	}
+
+	/* AK: the hash table is twice as big than needed because it
+	   uses list_head.  it would be much nicer to caches to use a
+	   single pointer list head here. */
+	ip_conntrack_vmalloc = 0; 
+	ip_conntrack_hash 
+		=(void*)__get_free_pages(GFP_KERNEL, 
+					 get_order(sizeof(struct list_head)
+						   *ip_conntrack_htable_size));
+	if (!ip_conntrack_hash) { 
+		ip_conntrack_vmalloc = 1;
+		printk(KERN_WARNING "ip_conntrack: falling back to vmalloc.\n");
+		ip_conntrack_hash = vmalloc(sizeof(struct list_head)
+					    * ip_conntrack_htable_size);
+	}
+	if (!ip_conntrack_hash) {
+		printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
+		goto err_unreg_sockopt;
+	}
+
+	ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
+	                                        sizeof(struct ip_conntrack), 0,
+	                                        0, NULL, NULL);
+	if (!ip_conntrack_cachep) {
+		printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
+		goto err_free_hash;
+	}
+
+	ip_conntrack_expect_cachep = kmem_cache_create("ip_conntrack_expect",
+					sizeof(struct ip_conntrack_expect),
+					0, 0, NULL, NULL);
+	if (!ip_conntrack_expect_cachep) {
+		printk(KERN_ERR "Unable to create ip_expect slab cache\n");
+		goto err_free_conntrack_slab;
+	}
+
+	/* Don't NEED lock here, but good form anyway. */
+	WRITE_LOCK(&ip_conntrack_lock);
+	for (i = 0; i < MAX_IP_CT_PROTO; i++)
+		ip_ct_protos[i] = &ip_conntrack_generic_protocol;
+	/* Sew in builtin protocols. */
+	ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp;
+	ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp;
+	ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
+	WRITE_UNLOCK(&ip_conntrack_lock);
+
+	for (i = 0; i < ip_conntrack_htable_size; i++)
+		INIT_LIST_HEAD(&ip_conntrack_hash[i]);
+
+	/* For use by ipt_REJECT */
+	ip_ct_attach = ip_conntrack_attach;
+
+	/* Set up fake conntrack:
+	    - to never be deleted, not in any hashes */
+	atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
+	/*  - and look it like as a confirmed connection */
+	set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
+
+	return ret;
+
+err_free_conntrack_slab:
+	kmem_cache_destroy(ip_conntrack_cachep);
+err_free_hash:
+	free_conntrack_hash();
+err_unreg_sockopt:
+	nf_unregister_sockopt(&so_getorigdst);
+
+	return -ENOMEM;
+}
diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c
new file mode 100644
index 00000000000..12b88cbb11d
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_ftp.c
@@ -0,0 +1,501 @@
+/* FTP extension for IP connection tracking. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell  
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/ip.h>
+#include <linux/ctype.h>
+#include <net/checksum.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter_ipv4/lockhelp.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
+#include <linux/moduleparam.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
+MODULE_DESCRIPTION("ftp connection tracking helper");
+
+/* This is slow, but it's simple. --RR */
+static char ftp_buffer[65536];
+
+static DECLARE_LOCK(ip_ftp_lock);
+
+#define MAX_PORTS 8
+static int ports[MAX_PORTS];
+static int ports_c;
+module_param_array(ports, int, &ports_c, 0400);
+
+static int loose;
+module_param(loose, int, 0600);
+
+unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb,
+				enum ip_conntrack_info ctinfo,
+				enum ip_ct_ftp_type type,
+				unsigned int matchoff,
+				unsigned int matchlen,
+				struct ip_conntrack_expect *exp,
+				u32 *seq);
+EXPORT_SYMBOL_GPL(ip_nat_ftp_hook);
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+static int try_rfc959(const char *, size_t, u_int32_t [], char);
+static int try_eprt(const char *, size_t, u_int32_t [], char);
+static int try_epsv_response(const char *, size_t, u_int32_t [], char);
+
+static struct ftp_search {
+	enum ip_conntrack_dir dir;
+	const char *pattern;
+	size_t plen;
+	char skip;
+	char term;
+	enum ip_ct_ftp_type ftptype;
+	int (*getnum)(const char *, size_t, u_int32_t[], char);
+} search[] = {
+	{
+		IP_CT_DIR_ORIGINAL,
+		"PORT",	sizeof("PORT") - 1, ' ', '\r',
+		IP_CT_FTP_PORT,
+		try_rfc959,
+	},
+	{
+		IP_CT_DIR_REPLY,
+		"227 ",	sizeof("227 ") - 1, '(', ')',
+		IP_CT_FTP_PASV,
+		try_rfc959,
+	},
+	{
+		IP_CT_DIR_ORIGINAL,
+		"EPRT", sizeof("EPRT") - 1, ' ', '\r',
+		IP_CT_FTP_EPRT,
+		try_eprt,
+	},
+	{
+		IP_CT_DIR_REPLY,
+		"229 ", sizeof("229 ") - 1, '(', ')',
+		IP_CT_FTP_EPSV,
+		try_epsv_response,
+	},
+};
+
+static int try_number(const char *data, size_t dlen, u_int32_t array[],
+		      int array_size, char sep, char term)
+{
+	u_int32_t i, len;
+
+	memset(array, 0, sizeof(array[0])*array_size);
+
+	/* Keep data pointing at next char. */
+	for (i = 0, len = 0; len < dlen && i < array_size; len++, data++) {
+		if (*data >= '0' && *data <= '9') {
+			array[i] = array[i]*10 + *data - '0';
+		}
+		else if (*data == sep)
+			i++;
+		else {
+			/* Unexpected character; true if it's the
+			   terminator and we're finished. */
+			if (*data == term && i == array_size - 1)
+				return len;
+
+			DEBUGP("Char %u (got %u nums) `%u' unexpected\n",
+			       len, i, *data);
+			return 0;
+		}
+	}
+	DEBUGP("Failed to fill %u numbers separated by %c\n", array_size, sep);
+
+	return 0;
+}
+
+/* Returns 0, or length of numbers: 192,168,1,1,5,6 */
+static int try_rfc959(const char *data, size_t dlen, u_int32_t array[6],
+		       char term)
+{
+	return try_number(data, dlen, array, 6, ',', term);
+}
+
+/* Grab port: number up to delimiter */
+static int get_port(const char *data, int start, size_t dlen, char delim,
+		    u_int32_t array[2])
+{
+	u_int16_t port = 0;
+	int i;
+
+	for (i = start; i < dlen; i++) {
+		/* Finished? */
+		if (data[i] == delim) {
+			if (port == 0)
+				break;
+			array[0] = port >> 8;
+			array[1] = port;
+			return i + 1;
+		}
+		else if (data[i] >= '0' && data[i] <= '9')
+			port = port*10 + data[i] - '0';
+		else /* Some other crap */
+			break;
+	}
+	return 0;
+}
+
+/* Returns 0, or length of numbers: |1|132.235.1.2|6275| */
+static int try_eprt(const char *data, size_t dlen, u_int32_t array[6],
+		    char term)
+{
+	char delim;
+	int length;
+
+	/* First character is delimiter, then "1" for IPv4, then
+           delimiter again. */
+	if (dlen <= 3) return 0;
+	delim = data[0];
+	if (isdigit(delim) || delim < 33 || delim > 126
+	    || data[1] != '1' || data[2] != delim)
+		return 0;
+
+	DEBUGP("EPRT: Got |1|!\n");
+	/* Now we have IP address. */
+	length = try_number(data + 3, dlen - 3, array, 4, '.', delim);
+	if (length == 0)
+		return 0;
+
+	DEBUGP("EPRT: Got IP address!\n");
+	/* Start offset includes initial "|1|", and trailing delimiter */
+	return get_port(data, 3 + length + 1, dlen, delim, array+4);
+}
+
+/* Returns 0, or length of numbers: |||6446| */
+static int try_epsv_response(const char *data, size_t dlen, u_int32_t array[6],
+			     char term)
+{
+	char delim;
+
+	/* Three delimiters. */
+	if (dlen <= 3) return 0;
+	delim = data[0];
+	if (isdigit(delim) || delim < 33 || delim > 126
+	    || data[1] != delim || data[2] != delim)
+		return 0;
+
+	return get_port(data, 3, dlen, delim, array+4);
+}
+
+/* Return 1 for match, 0 for accept, -1 for partial. */
+static int find_pattern(const char *data, size_t dlen,
+			const char *pattern, size_t plen,
+			char skip, char term,
+			unsigned int *numoff,
+			unsigned int *numlen,
+			u_int32_t array[6],
+			int (*getnum)(const char *, size_t, u_int32_t[], char))
+{
+	size_t i;
+
+	DEBUGP("find_pattern `%s': dlen = %u\n", pattern, dlen);
+	if (dlen == 0)
+		return 0;
+
+	if (dlen <= plen) {
+		/* Short packet: try for partial? */
+		if (strnicmp(data, pattern, dlen) == 0)
+			return -1;
+		else return 0;
+	}
+
+	if (strnicmp(data, pattern, plen) != 0) {
+#if 0
+		size_t i;
+
+		DEBUGP("ftp: string mismatch\n");
+		for (i = 0; i < plen; i++) {
+			DEBUGP("ftp:char %u `%c'(%u) vs `%c'(%u)\n",
+				i, data[i], data[i],
+				pattern[i], pattern[i]);
+		}
+#endif
+		return 0;
+	}
+
+	DEBUGP("Pattern matches!\n");
+	/* Now we've found the constant string, try to skip
+	   to the 'skip' character */
+	for (i = plen; data[i] != skip; i++)
+		if (i == dlen - 1) return -1;
+
+	/* Skip over the last character */
+	i++;
+
+	DEBUGP("Skipped up to `%c'!\n", skip);
+
+	*numoff = i;
+	*numlen = getnum(data + i, dlen - i, array, term);
+	if (!*numlen)
+		return -1;
+
+	DEBUGP("Match succeeded!\n");
+	return 1;
+}
+
+/* Look up to see if we're just after a \n. */
+static int find_nl_seq(u16 seq, const struct ip_ct_ftp_master *info, int dir)
+{
+	unsigned int i;
+
+	for (i = 0; i < info->seq_aft_nl_num[dir]; i++)
+		if (info->seq_aft_nl[dir][i] == seq)
+			return 1;
+	return 0;
+}
+
+/* We don't update if it's older than what we have. */
+static void update_nl_seq(u16 nl_seq, struct ip_ct_ftp_master *info, int dir)
+{
+	unsigned int i, oldest = NUM_SEQ_TO_REMEMBER;
+
+	/* Look for oldest: if we find exact match, we're done. */
+	for (i = 0; i < info->seq_aft_nl_num[dir]; i++) {
+		if (info->seq_aft_nl[dir][i] == nl_seq)
+			return;
+
+		if (oldest == info->seq_aft_nl_num[dir]
+		    || before(info->seq_aft_nl[dir][i], oldest))
+			oldest = i;
+	}
+
+	if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER)
+		info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq;
+	else if (oldest != NUM_SEQ_TO_REMEMBER)
+		info->seq_aft_nl[dir][oldest] = nl_seq;
+}
+
+static int help(struct sk_buff **pskb,
+		struct ip_conntrack *ct,
+		enum ip_conntrack_info ctinfo)
+{
+	unsigned int dataoff, datalen;
+	struct tcphdr _tcph, *th;
+	char *fb_ptr;
+	int ret;
+	u32 seq, array[6] = { 0 };
+	int dir = CTINFO2DIR(ctinfo);
+	unsigned int matchlen, matchoff;
+	struct ip_ct_ftp_master *ct_ftp_info = &ct->help.ct_ftp_info;
+	struct ip_conntrack_expect *exp;
+	unsigned int i;
+	int found = 0, ends_in_nl;
+
+	/* Until there's been traffic both ways, don't look in packets. */
+	if (ctinfo != IP_CT_ESTABLISHED
+	    && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) {
+		DEBUGP("ftp: Conntrackinfo = %u\n", ctinfo);
+		return NF_ACCEPT;
+	}
+
+	th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
+				sizeof(_tcph), &_tcph);
+	if (th == NULL)
+		return NF_ACCEPT;
+
+	dataoff = (*pskb)->nh.iph->ihl*4 + th->doff*4;
+	/* No data? */
+	if (dataoff >= (*pskb)->len) {
+		DEBUGP("ftp: pskblen = %u\n", (*pskb)->len);
+		return NF_ACCEPT;
+	}
+	datalen = (*pskb)->len - dataoff;
+
+	LOCK_BH(&ip_ftp_lock);
+	fb_ptr = skb_header_pointer(*pskb, dataoff,
+				    (*pskb)->len - dataoff, ftp_buffer);
+	BUG_ON(fb_ptr == NULL);
+
+	ends_in_nl = (fb_ptr[datalen - 1] == '\n');
+	seq = ntohl(th->seq) + datalen;
+
+	/* Look up to see if we're just after a \n. */
+	if (!find_nl_seq(ntohl(th->seq), ct_ftp_info, dir)) {
+		/* Now if this ends in \n, update ftp info. */
+		DEBUGP("ip_conntrack_ftp_help: wrong seq pos %s(%u) or %s(%u)\n",
+		       ct_ftp_info->seq_aft_nl[0][dir] 
+		       old_seq_aft_nl_set ? "":"(UNSET) ", old_seq_aft_nl);
+		ret = NF_ACCEPT;
+		goto out_update_nl;
+	}
+
+	/* Initialize IP array to expected address (it's not mentioned
+           in EPSV responses) */
+	array[0] = (ntohl(ct->tuplehash[dir].tuple.src.ip) >> 24) & 0xFF;
+	array[1] = (ntohl(ct->tuplehash[dir].tuple.src.ip) >> 16) & 0xFF;
+	array[2] = (ntohl(ct->tuplehash[dir].tuple.src.ip) >> 8) & 0xFF;
+	array[3] = ntohl(ct->tuplehash[dir].tuple.src.ip) & 0xFF;
+
+	for (i = 0; i < ARRAY_SIZE(search); i++) {
+		if (search[i].dir != dir) continue;
+
+		found = find_pattern(fb_ptr, (*pskb)->len - dataoff,
+				     search[i].pattern,
+				     search[i].plen,
+				     search[i].skip,
+				     search[i].term,
+				     &matchoff, &matchlen,
+				     array,
+				     search[i].getnum);
+		if (found) break;
+	}
+	if (found == -1) {
+		/* We don't usually drop packets.  After all, this is
+		   connection tracking, not packet filtering.
+		   However, it is necessary for accurate tracking in
+		   this case. */
+		if (net_ratelimit())
+			printk("conntrack_ftp: partial %s %u+%u\n",
+			       search[i].pattern,
+			       ntohl(th->seq), datalen);
+		ret = NF_DROP;
+		goto out;
+	} else if (found == 0) { /* No match */
+		ret = NF_ACCEPT;
+		goto out_update_nl;
+	}
+
+	DEBUGP("conntrack_ftp: match `%s' (%u bytes at %u)\n",
+	       fb_ptr + matchoff, matchlen, ntohl(th->seq) + matchoff);
+			 
+	/* Allocate expectation which will be inserted */
+	exp = ip_conntrack_expect_alloc();
+	if (exp == NULL) {
+		ret = NF_DROP;
+		goto out;
+	}
+
+	/* We refer to the reverse direction ("!dir") tuples here,
+	 * because we're expecting something in the other direction.
+	 * Doesn't matter unless NAT is happening.  */
+	exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
+
+	if (htonl((array[0] << 24) | (array[1] << 16) | (array[2] << 8) | array[3])
+	    != ct->tuplehash[dir].tuple.src.ip) {
+		/* Enrico Scholz's passive FTP to partially RNAT'd ftp
+		   server: it really wants us to connect to a
+		   different IP address.  Simply don't record it for
+		   NAT. */
+		DEBUGP("conntrack_ftp: NOT RECORDING: %u,%u,%u,%u != %u.%u.%u.%u\n",
+		       array[0], array[1], array[2], array[3],
+		       NIPQUAD(ct->tuplehash[dir].tuple.src.ip));
+
+		/* Thanks to Cristiano Lincoln Mattos
+		   <lincoln@cesar.org.br> for reporting this potential
+		   problem (DMZ machines opening holes to internal
+		   networks, or the packet filter itself). */
+		if (!loose) {
+			ret = NF_ACCEPT;
+			ip_conntrack_expect_free(exp);
+			goto out_update_nl;
+		}
+		exp->tuple.dst.ip = htonl((array[0] << 24) | (array[1] << 16)
+					 | (array[2] << 8) | array[3]);
+	}
+
+	exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
+	exp->tuple.dst.u.tcp.port = htons(array[4] << 8 | array[5]);
+	exp->tuple.src.u.tcp.port = 0; /* Don't care. */
+	exp->tuple.dst.protonum = IPPROTO_TCP;
+	exp->mask = ((struct ip_conntrack_tuple)
+		{ { 0xFFFFFFFF, { 0 } },
+		  { 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFF }});
+
+	exp->expectfn = NULL;
+	exp->master = ct;
+
+	/* Now, NAT might want to mangle the packet, and register the
+	 * (possibly changed) expectation itself. */
+	if (ip_nat_ftp_hook)
+		ret = ip_nat_ftp_hook(pskb, ctinfo, search[i].ftptype,
+				      matchoff, matchlen, exp, &seq);
+	else {
+		/* Can't expect this?  Best to drop packet now. */
+		if (ip_conntrack_expect_related(exp) != 0) {
+			ip_conntrack_expect_free(exp);
+			ret = NF_DROP;
+		} else
+			ret = NF_ACCEPT;
+	}
+
+out_update_nl:
+	/* Now if this ends in \n, update ftp info.  Seq may have been
+	 * adjusted by NAT code. */
+	if (ends_in_nl)
+		update_nl_seq(seq, ct_ftp_info,dir);
+ out:
+	UNLOCK_BH(&ip_ftp_lock);
+	return ret;
+}
+
+static struct ip_conntrack_helper ftp[MAX_PORTS];
+static char ftp_names[MAX_PORTS][10];
+
+/* Not __exit: called from init() */
+static void fini(void)
+{
+	int i;
+	for (i = 0; i < ports_c; i++) {
+		DEBUGP("ip_ct_ftp: unregistering helper for port %d\n",
+				ports[i]);
+		ip_conntrack_helper_unregister(&ftp[i]);
+	}
+}
+
+static int __init init(void)
+{
+	int i, ret;
+	char *tmpname;
+
+	if (ports_c == 0)
+		ports[ports_c++] = FTP_PORT;
+
+	for (i = 0; i < ports_c; i++) {
+		ftp[i].tuple.src.u.tcp.port = htons(ports[i]);
+		ftp[i].tuple.dst.protonum = IPPROTO_TCP;
+		ftp[i].mask.src.u.tcp.port = 0xFFFF;
+		ftp[i].mask.dst.protonum = 0xFF;
+		ftp[i].max_expected = 1;
+		ftp[i].timeout = 5 * 60; /* 5 minutes */
+		ftp[i].me = THIS_MODULE;
+		ftp[i].help = help;
+
+		tmpname = &ftp_names[i][0];
+		if (ports[i] == FTP_PORT)
+			sprintf(tmpname, "ftp");
+		else
+			sprintf(tmpname, "ftp-%d", ports[i]);
+		ftp[i].name = tmpname;
+
+		DEBUGP("ip_ct_ftp: registering helper for port %d\n", 
+				ports[i]);
+		ret = ip_conntrack_helper_register(&ftp[i]);
+
+		if (ret) {
+			fini();
+			return ret;
+		}
+	}
+	return 0;
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c
new file mode 100644
index 00000000000..33cc7348b6e
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_irc.c
@@ -0,0 +1,313 @@
+/* IRC extension for IP connection tracking, Version 1.21
+ * (C) 2000-2002 by Harald Welte <laforge@gnumonks.org>
+ * based on RR's ip_conntrack_ftp.c	
+ *
+ * ip_conntrack_irc.c,v 1.21 2002/02/05 14:49:26 laforge Exp
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ **
+ *	Module load syntax:
+ * 	insmod ip_conntrack_irc.o ports=port1,port2,...port<MAX_PORTS>
+ *			    max_dcc_channels=n dcc_timeout=secs
+ *	
+ * 	please give the ports of all IRC servers You wish to connect to.
+ *	If You don't specify ports, the default will be port 6667.
+ *	With max_dcc_channels you can define the maximum number of not
+ *	yet answered DCC channels per IRC session (default 8).
+ *	With dcc_timeout you can specify how long the system waits for 
+ *	an expected DCC channel (default 300 seconds).
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/ip.h>
+#include <net/checksum.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter_ipv4/lockhelp.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_irc.h>
+#include <linux/moduleparam.h>
+
+#define MAX_PORTS 8
+static int ports[MAX_PORTS];
+static int ports_c;
+static int max_dcc_channels = 8;
+static unsigned int dcc_timeout = 300;
+/* This is slow, but it's simple. --RR */
+static char irc_buffer[65536];
+static DECLARE_LOCK(irc_buffer_lock);
+
+unsigned int (*ip_nat_irc_hook)(struct sk_buff **pskb,
+				enum ip_conntrack_info ctinfo,
+				unsigned int matchoff,
+				unsigned int matchlen,
+				struct ip_conntrack_expect *exp);
+EXPORT_SYMBOL_GPL(ip_nat_irc_hook);
+
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("IRC (DCC) connection tracking helper");
+MODULE_LICENSE("GPL");
+module_param_array(ports, int, &ports_c, 0400);
+MODULE_PARM_DESC(ports, "port numbers of IRC servers");
+module_param(max_dcc_channels, int, 0400);
+MODULE_PARM_DESC(max_dcc_channels, "max number of expected DCC channels per IRC session");
+module_param(dcc_timeout, int, 0400);
+MODULE_PARM_DESC(dcc_timeout, "timeout on for unestablished DCC channels");
+
+static char *dccprotos[] = { "SEND ", "CHAT ", "MOVE ", "TSEND ", "SCHAT " };
+#define MINMATCHLEN	5
+
+#if 0
+#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s:" format, \
+                                       __FILE__, __FUNCTION__ , ## args)
+#else
+#define DEBUGP(format, args...)
+#endif
+
+static int parse_dcc(char *data, char *data_end, u_int32_t *ip,
+		     u_int16_t *port, char **ad_beg_p, char **ad_end_p)
+/* tries to get the ip_addr and port out of a dcc command
+   return value: -1 on failure, 0 on success 
+	data		pointer to first byte of DCC command data
+	data_end	pointer to last byte of dcc command data
+	ip		returns parsed ip of dcc command
+	port		returns parsed port of dcc command
+	ad_beg_p	returns pointer to first byte of addr data
+	ad_end_p	returns pointer to last byte of addr data */
+{
+
+	/* at least 12: "AAAAAAAA P\1\n" */
+	while (*data++ != ' ')
+		if (data > data_end - 12)
+			return -1;
+
+	*ad_beg_p = data;
+	*ip = simple_strtoul(data, &data, 10);
+
+	/* skip blanks between ip and port */
+	while (*data == ' ') {
+		if (data >= data_end) 
+			return -1;
+		data++;
+	}
+
+	*port = simple_strtoul(data, &data, 10);
+	*ad_end_p = data;
+
+	return 0;
+}
+
+static int help(struct sk_buff **pskb,
+		struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
+{
+	unsigned int dataoff;
+	struct tcphdr _tcph, *th;
+	char *data, *data_limit, *ib_ptr;
+	int dir = CTINFO2DIR(ctinfo);
+	struct ip_conntrack_expect *exp;
+	u32 seq;
+	u_int32_t dcc_ip;
+	u_int16_t dcc_port;
+	int i, ret = NF_ACCEPT;
+	char *addr_beg_p, *addr_end_p;
+
+	DEBUGP("entered\n");
+
+	/* If packet is coming from IRC server */
+	if (dir == IP_CT_DIR_REPLY)
+		return NF_ACCEPT;
+
+	/* Until there's been traffic both ways, don't look in packets. */
+	if (ctinfo != IP_CT_ESTABLISHED
+	    && ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
+		DEBUGP("Conntrackinfo = %u\n", ctinfo);
+		return NF_ACCEPT;
+	}
+
+	/* Not a full tcp header? */
+	th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
+				sizeof(_tcph), &_tcph);
+	if (th == NULL)
+		return NF_ACCEPT;
+
+	/* No data? */
+	dataoff = (*pskb)->nh.iph->ihl*4 + th->doff*4;
+	if (dataoff >= (*pskb)->len)
+		return NF_ACCEPT;
+
+	LOCK_BH(&irc_buffer_lock);
+	ib_ptr = skb_header_pointer(*pskb, dataoff,
+				    (*pskb)->len - dataoff, irc_buffer);
+	BUG_ON(ib_ptr == NULL);
+
+	data = ib_ptr;
+	data_limit = ib_ptr + (*pskb)->len - dataoff;
+
+	/* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24
+	 * 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */
+	while (data < (data_limit - (19 + MINMATCHLEN))) {
+		if (memcmp(data, "\1DCC ", 5)) {
+			data++;
+			continue;
+		}
+
+		data += 5;
+		/* we have at least (19+MINMATCHLEN)-5 bytes valid data left */
+
+		DEBUGP("DCC found in master %u.%u.%u.%u:%u %u.%u.%u.%u:%u...\n",
+			NIPQUAD(iph->saddr), ntohs(th->source),
+			NIPQUAD(iph->daddr), ntohs(th->dest));
+
+		for (i = 0; i < ARRAY_SIZE(dccprotos); i++) {
+			if (memcmp(data, dccprotos[i], strlen(dccprotos[i]))) {
+				/* no match */
+				continue;
+			}
+
+			DEBUGP("DCC %s detected\n", dccprotos[i]);
+			data += strlen(dccprotos[i]);
+			/* we have at least 
+			 * (19+MINMATCHLEN)-5-dccprotos[i].matchlen bytes valid
+			 * data left (== 14/13 bytes) */
+			if (parse_dcc((char *)data, data_limit, &dcc_ip,
+				       &dcc_port, &addr_beg_p, &addr_end_p)) {
+				/* unable to parse */
+				DEBUGP("unable to parse dcc command\n");
+				continue;
+			}
+			DEBUGP("DCC bound ip/port: %u.%u.%u.%u:%u\n",
+				HIPQUAD(dcc_ip), dcc_port);
+
+			/* dcc_ip can be the internal OR external (NAT'ed) IP
+			 * Tiago Sousa <mirage@kaotik.org> */
+			if (ct->tuplehash[dir].tuple.src.ip != htonl(dcc_ip)
+			    && ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip != htonl(dcc_ip)) {
+				if (net_ratelimit())
+					printk(KERN_WARNING
+						"Forged DCC command from "
+						"%u.%u.%u.%u: %u.%u.%u.%u:%u\n",
+				NIPQUAD(ct->tuplehash[dir].tuple.src.ip),
+						HIPQUAD(dcc_ip), dcc_port);
+
+				continue;
+			}
+
+			exp = ip_conntrack_expect_alloc();
+			if (exp == NULL) {
+				ret = NF_DROP;
+				goto out;
+			}
+
+			/* save position of address in dcc string,
+			 * necessary for NAT */
+			DEBUGP("tcph->seq = %u\n", th->seq);
+			seq = ntohl(th->seq) + (addr_beg_p - ib_ptr);
+
+			/* We refer to the reverse direction ("!dir")
+			 * tuples here, because we're expecting
+			 * something in the other * direction.
+			 * Doesn't matter unless NAT is happening.  */
+			exp->tuple = ((struct ip_conntrack_tuple)
+				{ { 0, { 0 } },
+				  { ct->tuplehash[!dir].tuple.dst.ip,
+				    { .tcp = { htons(dcc_port) } },
+				    IPPROTO_TCP }});
+			exp->mask = ((struct ip_conntrack_tuple)
+				{ { 0, { 0 } },
+				  { 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFF }});
+			exp->expectfn = NULL;
+			exp->master = ct;
+			if (ip_nat_irc_hook)
+				ret = ip_nat_irc_hook(pskb, ctinfo, 
+						      addr_beg_p - ib_ptr,
+						      addr_end_p - addr_beg_p,
+						      exp);
+			else if (ip_conntrack_expect_related(exp) != 0) {
+				ip_conntrack_expect_free(exp);
+				ret = NF_DROP;
+			}
+			goto out;
+		} /* for .. NUM_DCCPROTO */
+	} /* while data < ... */
+
+ out:
+	UNLOCK_BH(&irc_buffer_lock);
+	return ret;
+}
+
+static struct ip_conntrack_helper irc_helpers[MAX_PORTS];
+static char irc_names[MAX_PORTS][10];
+
+static void fini(void);
+
+static int __init init(void)
+{
+	int i, ret;
+	struct ip_conntrack_helper *hlpr;
+	char *tmpname;
+
+	if (max_dcc_channels < 1) {
+		printk("ip_conntrack_irc: max_dcc_channels must be a positive integer\n");
+		return -EBUSY;
+	}
+	if (dcc_timeout < 0) {
+		printk("ip_conntrack_irc: dcc_timeout must be a positive integer\n");
+		return -EBUSY;
+	}
+	
+	/* If no port given, default to standard irc port */
+	if (ports_c == 0)
+		ports[ports_c++] = IRC_PORT;
+
+	for (i = 0; i < ports_c; i++) {
+		hlpr = &irc_helpers[i];
+		hlpr->tuple.src.u.tcp.port = htons(ports[i]);
+		hlpr->tuple.dst.protonum = IPPROTO_TCP;
+		hlpr->mask.src.u.tcp.port = 0xFFFF;
+		hlpr->mask.dst.protonum = 0xFF;
+		hlpr->max_expected = max_dcc_channels;
+		hlpr->timeout = dcc_timeout;
+		hlpr->me = THIS_MODULE;
+		hlpr->help = help;
+
+		tmpname = &irc_names[i][0];
+		if (ports[i] == IRC_PORT)
+			sprintf(tmpname, "irc");
+		else
+			sprintf(tmpname, "irc-%d", i);
+		hlpr->name = tmpname;
+
+		DEBUGP("port #%d: %d\n", i, ports[i]);
+
+		ret = ip_conntrack_helper_register(hlpr);
+
+		if (ret) {
+			printk("ip_conntrack_irc: ERROR registering port %d\n",
+				ports[i]);
+			fini();
+			return -EBUSY;
+		}
+	}
+	return 0;
+}
+
+/* This function is intentionally _NOT_ defined as __exit, because 
+ * it is needed by the init function */
+static void fini(void)
+{
+	int i;
+	for (i = 0; i < ports_c; i++) {
+		DEBUGP("unregistering port %d\n",
+		       ports[i]);
+		ip_conntrack_helper_unregister(&irc_helpers[i]);
+	}
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_generic.c b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
new file mode 100644
index 00000000000..88c3712bd25
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
@@ -0,0 +1,75 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
+
+unsigned long ip_ct_generic_timeout = 600*HZ;
+
+static int generic_pkt_to_tuple(const struct sk_buff *skb,
+				unsigned int dataoff,
+				struct ip_conntrack_tuple *tuple)
+{
+	tuple->src.u.all = 0;
+	tuple->dst.u.all = 0;
+
+	return 1;
+}
+
+static int generic_invert_tuple(struct ip_conntrack_tuple *tuple,
+				const struct ip_conntrack_tuple *orig)
+{
+	tuple->src.u.all = 0;
+	tuple->dst.u.all = 0;
+
+	return 1;
+}
+
+/* Print out the per-protocol part of the tuple. */
+static int generic_print_tuple(struct seq_file *s,
+			       const struct ip_conntrack_tuple *tuple)
+{
+	return 0;
+}
+
+/* Print out the private part of the conntrack. */
+static int generic_print_conntrack(struct seq_file *s,
+				   const struct ip_conntrack *state)
+{
+	return 0;
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+static int packet(struct ip_conntrack *conntrack,
+		  const struct sk_buff *skb,
+		  enum ip_conntrack_info ctinfo)
+{
+	ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_generic_timeout);
+	return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static int new(struct ip_conntrack *conntrack, const struct sk_buff *skb)
+{
+	return 1;
+}
+
+struct ip_conntrack_protocol ip_conntrack_generic_protocol =
+{
+	.proto			= 0,
+	.name			= "unknown",
+	.pkt_to_tuple		= generic_pkt_to_tuple,
+	.invert_tuple		= generic_invert_tuple,
+	.print_tuple		= generic_print_tuple,
+	.print_conntrack	= generic_print_conntrack,
+	.packet			= packet,
+	.new			= new,
+};
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
new file mode 100644
index 00000000000..602c74db325
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
@@ -0,0 +1,279 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/netfilter.h>
+#include <linux/in.h>
+#include <linux/icmp.h>
+#include <linux/seq_file.h>
+#include <net/ip.h>
+#include <net/checksum.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
+
+unsigned long ip_ct_icmp_timeout = 30*HZ;
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+static int icmp_pkt_to_tuple(const struct sk_buff *skb,
+			     unsigned int dataoff,
+			     struct ip_conntrack_tuple *tuple)
+{
+	struct icmphdr _hdr, *hp;
+
+	hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+	if (hp == NULL)
+		return 0;
+
+	tuple->dst.u.icmp.type = hp->type;
+	tuple->src.u.icmp.id = hp->un.echo.id;
+	tuple->dst.u.icmp.code = hp->code;
+
+	return 1;
+}
+
+static int icmp_invert_tuple(struct ip_conntrack_tuple *tuple,
+			     const struct ip_conntrack_tuple *orig)
+{
+	/* Add 1; spaces filled with 0. */
+	static u_int8_t invmap[]
+		= { [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
+		    [ICMP_ECHOREPLY] = ICMP_ECHO + 1,
+		    [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
+		    [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
+		    [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
+		    [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
+		    [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
+		    [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1};
+
+	if (orig->dst.u.icmp.type >= sizeof(invmap)
+	    || !invmap[orig->dst.u.icmp.type])
+		return 0;
+
+	tuple->src.u.icmp.id = orig->src.u.icmp.id;
+	tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1;
+	tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
+	return 1;
+}
+
+/* Print out the per-protocol part of the tuple. */
+static int icmp_print_tuple(struct seq_file *s,
+			    const struct ip_conntrack_tuple *tuple)
+{
+	return seq_printf(s, "type=%u code=%u id=%u ",
+			  tuple->dst.u.icmp.type,
+			  tuple->dst.u.icmp.code,
+			  ntohs(tuple->src.u.icmp.id));
+}
+
+/* Print out the private part of the conntrack. */
+static int icmp_print_conntrack(struct seq_file *s,
+				const struct ip_conntrack *conntrack)
+{
+	return 0;
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+static int icmp_packet(struct ip_conntrack *ct,
+		       const struct sk_buff *skb,
+		       enum ip_conntrack_info ctinfo)
+{
+	/* Try to delete connection immediately after all replies:
+           won't actually vanish as we still have skb, and del_timer
+           means this will only run once even if count hits zero twice
+           (theoretically possible with SMP) */
+	if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
+		if (atomic_dec_and_test(&ct->proto.icmp.count)
+		    && del_timer(&ct->timeout))
+			ct->timeout.function((unsigned long)ct);
+	} else {
+		atomic_inc(&ct->proto.icmp.count);
+		ip_ct_refresh_acct(ct, ctinfo, skb, ip_ct_icmp_timeout);
+	}
+
+	return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static int icmp_new(struct ip_conntrack *conntrack,
+		    const struct sk_buff *skb)
+{
+	static u_int8_t valid_new[]
+		= { [ICMP_ECHO] = 1,
+		    [ICMP_TIMESTAMP] = 1,
+		    [ICMP_INFO_REQUEST] = 1,
+		    [ICMP_ADDRESS] = 1 };
+
+	if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new)
+	    || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) {
+		/* Can't create a new ICMP `conn' with this. */
+		DEBUGP("icmp: can't create new conn with type %u\n",
+		       conntrack->tuplehash[0].tuple.dst.u.icmp.type);
+		DUMP_TUPLE(&conntrack->tuplehash[0].tuple);
+		return 0;
+	}
+	atomic_set(&conntrack->proto.icmp.count, 0);
+	return 1;
+}
+
+static int
+icmp_error_message(struct sk_buff *skb,
+		   enum ip_conntrack_info *ctinfo,
+		   unsigned int hooknum)
+{
+	struct ip_conntrack_tuple innertuple, origtuple;
+	struct {
+		struct icmphdr icmp;
+		struct iphdr ip;
+	} _in, *inside;
+	struct ip_conntrack_protocol *innerproto;
+	struct ip_conntrack_tuple_hash *h;
+	int dataoff;
+
+	IP_NF_ASSERT(skb->nfct == NULL);
+
+	/* Not enough header? */
+	inside = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_in), &_in);
+	if (inside == NULL)
+		return NF_ACCEPT;
+
+	/* Ignore ICMP's containing fragments (shouldn't happen) */
+	if (inside->ip.frag_off & htons(IP_OFFSET)) {
+		DEBUGP("icmp_error_track: fragment of proto %u\n",
+		       inside->ip.protocol);
+		return NF_ACCEPT;
+	}
+
+	innerproto = ip_ct_find_proto(inside->ip.protocol);
+	dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp) + inside->ip.ihl*4;
+	/* Are they talking about one of our connections? */
+	if (!ip_ct_get_tuple(&inside->ip, skb, dataoff, &origtuple, innerproto)) {
+		DEBUGP("icmp_error: ! get_tuple p=%u", inside->ip.protocol);
+		return NF_ACCEPT;
+	}
+
+	/* Ordinarily, we'd expect the inverted tupleproto, but it's
+	   been preserved inside the ICMP. */
+	if (!ip_ct_invert_tuple(&innertuple, &origtuple, innerproto)) {
+		DEBUGP("icmp_error_track: Can't invert tuple\n");
+		return NF_ACCEPT;
+	}
+
+	*ctinfo = IP_CT_RELATED;
+
+	h = ip_conntrack_find_get(&innertuple, NULL);
+	if (!h) {
+		/* Locally generated ICMPs will match inverted if they
+		   haven't been SNAT'ed yet */
+		/* FIXME: NAT code has to handle half-done double NAT --RR */
+		if (hooknum == NF_IP_LOCAL_OUT)
+			h = ip_conntrack_find_get(&origtuple, NULL);
+
+		if (!h) {
+			DEBUGP("icmp_error_track: no match\n");
+			return NF_ACCEPT;
+		}
+		/* Reverse direction from that found */
+		if (DIRECTION(h) != IP_CT_DIR_REPLY)
+			*ctinfo += IP_CT_IS_REPLY;
+	} else {
+		if (DIRECTION(h) == IP_CT_DIR_REPLY)
+			*ctinfo += IP_CT_IS_REPLY;
+	}
+
+	/* Update skb to refer to this connection */
+	skb->nfct = &tuplehash_to_ctrack(h)->ct_general;
+	skb->nfctinfo = *ctinfo;
+	return -NF_ACCEPT;
+}
+
+/* Small and modified version of icmp_rcv */
+static int
+icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
+	   unsigned int hooknum)
+{
+	struct icmphdr _ih, *icmph;
+
+	/* Not enough header? */
+	icmph = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ih), &_ih);
+	if (icmph == NULL) {
+		if (LOG_INVALID(IPPROTO_ICMP))
+			nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+				      "ip_ct_icmp: short packet ");
+		return -NF_ACCEPT;
+	}
+
+	/* See ip_conntrack_proto_tcp.c */
+	if (hooknum != NF_IP_PRE_ROUTING)
+		goto checksum_skipped;
+
+	switch (skb->ip_summed) {
+	case CHECKSUM_HW:
+		if (!(u16)csum_fold(skb->csum)) 
+			break;
+		if (LOG_INVALID(IPPROTO_ICMP))
+			nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+				      "ip_ct_icmp: bad HW ICMP checksum ");
+		return -NF_ACCEPT;
+	case CHECKSUM_NONE:
+		if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) {
+			if (LOG_INVALID(IPPROTO_ICMP))
+				nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+					      "ip_ct_icmp: bad ICMP checksum ");
+			return -NF_ACCEPT;
+		}
+	default:
+		break;
+	}
+
+checksum_skipped:
+	/*
+	 *	18 is the highest 'known' ICMP type. Anything else is a mystery
+	 *
+	 *	RFC 1122: 3.2.2  Unknown ICMP messages types MUST be silently
+	 *		  discarded.
+	 */
+	if (icmph->type > NR_ICMP_TYPES) {
+		if (LOG_INVALID(IPPROTO_ICMP))
+			nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+				      "ip_ct_icmp: invalid ICMP type ");
+		return -NF_ACCEPT;
+	}
+
+	/* Need to track icmp error message? */
+	if (icmph->type != ICMP_DEST_UNREACH
+	    && icmph->type != ICMP_SOURCE_QUENCH
+	    && icmph->type != ICMP_TIME_EXCEEDED
+	    && icmph->type != ICMP_PARAMETERPROB
+	    && icmph->type != ICMP_REDIRECT)
+		return NF_ACCEPT;
+
+	return icmp_error_message(skb, ctinfo, hooknum);
+}
+
+struct ip_conntrack_protocol ip_conntrack_protocol_icmp =
+{
+	.proto 			= IPPROTO_ICMP,
+	.name 			= "icmp",
+	.pkt_to_tuple		= icmp_pkt_to_tuple,
+	.invert_tuple		= icmp_invert_tuple,
+	.print_tuple		= icmp_print_tuple,
+	.print_conntrack	= icmp_print_conntrack,
+	.packet			= icmp_packet,
+	.new			= icmp_new,
+	.error			= icmp_error,
+};
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
new file mode 100644
index 00000000000..ff8c34a860f
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
@@ -0,0 +1,649 @@
+/*
+ * Connection tracking protocol helper module for SCTP.
+ * 
+ * SCTP is defined in RFC 2960. References to various sections in this code 
+ * are to this RFC.
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Added support for proc manipulation of timeouts.
+ */
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/sctp.h>
+#include <linux/string.h>
+#include <linux/seq_file.h>
+
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
+#include <linux/netfilter_ipv4/lockhelp.h>
+
+#if 0
+#define DEBUGP(format, ...) printk(format, ## __VA_ARGS__)
+#else
+#define DEBUGP(format, args...)
+#endif
+
+/* Protects conntrack->proto.sctp */
+static DECLARE_RWLOCK(sctp_lock);
+
+/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
+   closely.  They're more complex. --RR 
+
+   And so for me for SCTP :D -Kiran */
+
+static const char *sctp_conntrack_names[] = {
+	"NONE",
+	"CLOSED",
+	"COOKIE_WAIT",
+	"COOKIE_ECHOED",
+	"ESTABLISHED",
+	"SHUTDOWN_SENT",
+	"SHUTDOWN_RECD",
+	"SHUTDOWN_ACK_SENT",
+};
+
+#define SECS  * HZ
+#define MINS  * 60 SECS
+#define HOURS * 60 MINS
+#define DAYS  * 24 HOURS
+
+static unsigned long ip_ct_sctp_timeout_closed            =  10 SECS;
+static unsigned long ip_ct_sctp_timeout_cookie_wait       =   3 SECS;
+static unsigned long ip_ct_sctp_timeout_cookie_echoed     =   3 SECS;
+static unsigned long ip_ct_sctp_timeout_established       =   5 DAYS;
+static unsigned long ip_ct_sctp_timeout_shutdown_sent     = 300 SECS / 1000;
+static unsigned long ip_ct_sctp_timeout_shutdown_recd     = 300 SECS / 1000;
+static unsigned long ip_ct_sctp_timeout_shutdown_ack_sent =   3 SECS;
+
+static unsigned long * sctp_timeouts[]
+= { NULL,                                  /* SCTP_CONNTRACK_NONE  */
+    &ip_ct_sctp_timeout_closed,	           /* SCTP_CONNTRACK_CLOSED */
+    &ip_ct_sctp_timeout_cookie_wait,       /* SCTP_CONNTRACK_COOKIE_WAIT */
+    &ip_ct_sctp_timeout_cookie_echoed,     /* SCTP_CONNTRACK_COOKIE_ECHOED */
+    &ip_ct_sctp_timeout_established,       /* SCTP_CONNTRACK_ESTABLISHED */
+    &ip_ct_sctp_timeout_shutdown_sent,     /* SCTP_CONNTRACK_SHUTDOWN_SENT */
+    &ip_ct_sctp_timeout_shutdown_recd,     /* SCTP_CONNTRACK_SHUTDOWN_RECD */
+    &ip_ct_sctp_timeout_shutdown_ack_sent  /* SCTP_CONNTRACK_SHUTDOWN_ACK_SENT */
+ };
+
+#define sNO SCTP_CONNTRACK_NONE
+#define	sCL SCTP_CONNTRACK_CLOSED
+#define	sCW SCTP_CONNTRACK_COOKIE_WAIT
+#define	sCE SCTP_CONNTRACK_COOKIE_ECHOED
+#define	sES SCTP_CONNTRACK_ESTABLISHED
+#define	sSS SCTP_CONNTRACK_SHUTDOWN_SENT
+#define	sSR SCTP_CONNTRACK_SHUTDOWN_RECD
+#define	sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT
+#define	sIV SCTP_CONNTRACK_MAX
+
+/* 
+	These are the descriptions of the states:
+
+NOTE: These state names are tantalizingly similar to the states of an 
+SCTP endpoint. But the interpretation of the states is a little different,
+considering that these are the states of the connection and not of an end 
+point. Please note the subtleties. -Kiran
+
+NONE              - Nothing so far.
+COOKIE WAIT       - We have seen an INIT chunk in the original direction, or also 
+                    an INIT_ACK chunk in the reply direction.
+COOKIE ECHOED     - We have seen a COOKIE_ECHO chunk in the original direction.
+ESTABLISHED       - We have seen a COOKIE_ACK in the reply direction.
+SHUTDOWN_SENT     - We have seen a SHUTDOWN chunk in the original direction.
+SHUTDOWN_RECD     - We have seen a SHUTDOWN chunk in the reply directoin.
+SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite
+                    to that of the SHUTDOWN chunk.
+CLOSED            - We have seen a SHUTDOWN_COMPLETE chunk in the direction of 
+                    the SHUTDOWN chunk. Connection is closed.
+*/
+
+/* TODO
+ - I have assumed that the first INIT is in the original direction. 
+ This messes things when an INIT comes in the reply direction in CLOSED
+ state.
+ - Check the error type in the reply dir before transitioning from 
+cookie echoed to closed.
+ - Sec 5.2.4 of RFC 2960
+ - Multi Homing support.
+*/
+
+/* SCTP conntrack state transitions */
+static enum sctp_conntrack sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = {
+	{
+/*	ORIGINAL	*/
+/*                  sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
+/* init         */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA},
+/* init_ack     */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},
+/* abort        */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
+/* shutdown     */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA},
+/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA},
+/* error        */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant have Stale cookie*/
+/* cookie_echo  */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA},/* 5.2.4 - Big TODO */
+/* cookie_ack   */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in orig dir */
+/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL}
+	},
+	{
+/*	REPLY	*/
+/*                  sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
+/* init         */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* INIT in sCL Big TODO */
+/* init_ack     */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},
+/* abort        */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
+/* shutdown     */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA},
+/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA},
+/* error        */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA},
+/* cookie_echo  */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in reply dir */
+/* cookie_ack   */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA},
+/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL}
+	}
+};
+
+static int sctp_pkt_to_tuple(const struct sk_buff *skb,
+			     unsigned int dataoff,
+			     struct ip_conntrack_tuple *tuple)
+{
+	sctp_sctphdr_t _hdr, *hp;
+
+	DEBUGP(__FUNCTION__);
+	DEBUGP("\n");
+
+	/* Actually only need first 8 bytes. */
+	hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
+	if (hp == NULL)
+		return 0;
+
+	tuple->src.u.sctp.port = hp->source;
+	tuple->dst.u.sctp.port = hp->dest;
+	return 1;
+}
+
+static int sctp_invert_tuple(struct ip_conntrack_tuple *tuple,
+			     const struct ip_conntrack_tuple *orig)
+{
+	DEBUGP(__FUNCTION__);
+	DEBUGP("\n");
+
+	tuple->src.u.sctp.port = orig->dst.u.sctp.port;
+	tuple->dst.u.sctp.port = orig->src.u.sctp.port;
+	return 1;
+}
+
+/* Print out the per-protocol part of the tuple. */
+static int sctp_print_tuple(struct seq_file *s,
+			    const struct ip_conntrack_tuple *tuple)
+{
+	DEBUGP(__FUNCTION__);
+	DEBUGP("\n");
+
+	return seq_printf(s, "sport=%hu dport=%hu ",
+			  ntohs(tuple->src.u.sctp.port),
+			  ntohs(tuple->dst.u.sctp.port));
+}
+
+/* Print out the private part of the conntrack. */
+static int sctp_print_conntrack(struct seq_file *s,
+				const struct ip_conntrack *conntrack)
+{
+	enum sctp_conntrack state;
+
+	DEBUGP(__FUNCTION__);
+	DEBUGP("\n");
+
+	READ_LOCK(&sctp_lock);
+	state = conntrack->proto.sctp.state;
+	READ_UNLOCK(&sctp_lock);
+
+	return seq_printf(s, "%s ", sctp_conntrack_names[state]);
+}
+
+#define for_each_sctp_chunk(skb, sch, _sch, offset, count)		\
+for (offset = skb->nh.iph->ihl * 4 + sizeof(sctp_sctphdr_t), count = 0;	\
+	offset < skb->len &&						\
+	(sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch));	\
+	offset += (htons(sch->length) + 3) & ~3, count++)
+
+/* Some validity checks to make sure the chunks are fine */
+static int do_basic_checks(struct ip_conntrack *conntrack,
+			   const struct sk_buff *skb,
+			   char *map)
+{
+	u_int32_t offset, count;
+	sctp_chunkhdr_t _sch, *sch;
+	int flag;
+
+	DEBUGP(__FUNCTION__);
+	DEBUGP("\n");
+
+	flag = 0;
+
+	for_each_sctp_chunk (skb, sch, _sch, offset, count) {
+		DEBUGP("Chunk Num: %d  Type: %d\n", count, sch->type);
+
+		if (sch->type == SCTP_CID_INIT 
+			|| sch->type == SCTP_CID_INIT_ACK
+			|| sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
+			flag = 1;
+		}
+
+		/* Cookie Ack/Echo chunks not the first OR 
+		   Init / Init Ack / Shutdown compl chunks not the only chunks */
+		if ((sch->type == SCTP_CID_COOKIE_ACK 
+			|| sch->type == SCTP_CID_COOKIE_ECHO
+			|| flag)
+		     && count !=0 ) {
+			DEBUGP("Basic checks failed\n");
+			return 1;
+		}
+
+		if (map) {
+			set_bit(sch->type, (void *)map);
+		}
+	}
+
+	DEBUGP("Basic checks passed\n");
+	return 0;
+}
+
+static int new_state(enum ip_conntrack_dir dir,
+		     enum sctp_conntrack cur_state,
+		     int chunk_type)
+{
+	int i;
+
+	DEBUGP(__FUNCTION__);
+	DEBUGP("\n");
+
+	DEBUGP("Chunk type: %d\n", chunk_type);
+
+	switch (chunk_type) {
+		case SCTP_CID_INIT: 
+			DEBUGP("SCTP_CID_INIT\n");
+			i = 0; break;
+		case SCTP_CID_INIT_ACK: 
+			DEBUGP("SCTP_CID_INIT_ACK\n");
+			i = 1; break;
+		case SCTP_CID_ABORT: 
+			DEBUGP("SCTP_CID_ABORT\n");
+			i = 2; break;
+		case SCTP_CID_SHUTDOWN: 
+			DEBUGP("SCTP_CID_SHUTDOWN\n");
+			i = 3; break;
+		case SCTP_CID_SHUTDOWN_ACK: 
+			DEBUGP("SCTP_CID_SHUTDOWN_ACK\n");
+			i = 4; break;
+		case SCTP_CID_ERROR: 
+			DEBUGP("SCTP_CID_ERROR\n");
+			i = 5; break;
+		case SCTP_CID_COOKIE_ECHO: 
+			DEBUGP("SCTP_CID_COOKIE_ECHO\n");
+			i = 6; break;
+		case SCTP_CID_COOKIE_ACK: 
+			DEBUGP("SCTP_CID_COOKIE_ACK\n");
+			i = 7; break;
+		case SCTP_CID_SHUTDOWN_COMPLETE: 
+			DEBUGP("SCTP_CID_SHUTDOWN_COMPLETE\n");
+			i = 8; break;
+		default:
+			/* Other chunks like DATA, SACK, HEARTBEAT and
+			its ACK do not cause a change in state */
+			DEBUGP("Unknown chunk type, Will stay in %s\n", 
+						sctp_conntrack_names[cur_state]);
+			return cur_state;
+	}
+
+	DEBUGP("dir: %d   cur_state: %s  chunk_type: %d  new_state: %s\n", 
+			dir, sctp_conntrack_names[cur_state], chunk_type,
+			sctp_conntrack_names[sctp_conntracks[dir][i][cur_state]]);
+
+	return sctp_conntracks[dir][i][cur_state];
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+static int sctp_packet(struct ip_conntrack *conntrack,
+		       const struct sk_buff *skb,
+		       enum ip_conntrack_info ctinfo)
+{
+	enum sctp_conntrack newconntrack, oldsctpstate;
+	struct iphdr *iph = skb->nh.iph;
+	sctp_sctphdr_t _sctph, *sh;
+	sctp_chunkhdr_t _sch, *sch;
+	u_int32_t offset, count;
+	char map[256 / sizeof (char)] = {0};
+
+	DEBUGP(__FUNCTION__);
+	DEBUGP("\n");
+
+	sh = skb_header_pointer(skb, iph->ihl * 4, sizeof(_sctph), &_sctph);
+	if (sh == NULL)
+		return -1;
+
+	if (do_basic_checks(conntrack, skb, map) != 0)
+		return -1;
+
+	/* Check the verification tag (Sec 8.5) */
+	if (!test_bit(SCTP_CID_INIT, (void *)map)
+		&& !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, (void *)map)
+		&& !test_bit(SCTP_CID_COOKIE_ECHO, (void *)map)
+		&& !test_bit(SCTP_CID_ABORT, (void *)map)
+		&& !test_bit(SCTP_CID_SHUTDOWN_ACK, (void *)map)
+		&& (sh->vtag != conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) {
+		DEBUGP("Verification tag check failed\n");
+		return -1;
+	}
+
+	oldsctpstate = newconntrack = SCTP_CONNTRACK_MAX;
+	for_each_sctp_chunk (skb, sch, _sch, offset, count) {
+		WRITE_LOCK(&sctp_lock);
+
+		/* Special cases of Verification tag check (Sec 8.5.1) */
+		if (sch->type == SCTP_CID_INIT) {
+			/* Sec 8.5.1 (A) */
+			if (sh->vtag != 0) {
+				WRITE_UNLOCK(&sctp_lock);
+				return -1;
+			}
+		} else if (sch->type == SCTP_CID_ABORT) {
+			/* Sec 8.5.1 (B) */
+			if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])
+				&& !(sh->vtag == conntrack->proto.sctp.vtag
+							[1 - CTINFO2DIR(ctinfo)])) {
+				WRITE_UNLOCK(&sctp_lock);
+				return -1;
+			}
+		} else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
+			/* Sec 8.5.1 (C) */
+			if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])
+				&& !(sh->vtag == conntrack->proto.sctp.vtag
+							[1 - CTINFO2DIR(ctinfo)] 
+					&& (sch->flags & 1))) {
+				WRITE_UNLOCK(&sctp_lock);
+				return -1;
+			}
+		} else if (sch->type == SCTP_CID_COOKIE_ECHO) {
+			/* Sec 8.5.1 (D) */
+			if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) {
+				WRITE_UNLOCK(&sctp_lock);
+				return -1;
+			}
+		}
+
+		oldsctpstate = conntrack->proto.sctp.state;
+		newconntrack = new_state(CTINFO2DIR(ctinfo), oldsctpstate, sch->type);
+
+		/* Invalid */
+		if (newconntrack == SCTP_CONNTRACK_MAX) {
+			DEBUGP("ip_conntrack_sctp: Invalid dir=%i ctype=%u conntrack=%u\n",
+			       CTINFO2DIR(ctinfo), sch->type, oldsctpstate);
+			WRITE_UNLOCK(&sctp_lock);
+			return -1;
+		}
+
+		/* If it is an INIT or an INIT ACK note down the vtag */
+		if (sch->type == SCTP_CID_INIT 
+			|| sch->type == SCTP_CID_INIT_ACK) {
+			sctp_inithdr_t _inithdr, *ih;
+
+			ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
+			                        sizeof(_inithdr), &_inithdr);
+			if (ih == NULL) {
+					WRITE_UNLOCK(&sctp_lock);
+					return -1;
+			}
+			DEBUGP("Setting vtag %x for dir %d\n", 
+					ih->init_tag, !CTINFO2DIR(ctinfo));
+			conntrack->proto.sctp.vtag[!CTINFO2DIR(ctinfo)] = ih->init_tag;
+		}
+
+		conntrack->proto.sctp.state = newconntrack;
+		WRITE_UNLOCK(&sctp_lock);
+	}
+
+	ip_ct_refresh_acct(conntrack, ctinfo, skb, *sctp_timeouts[newconntrack]);
+
+	if (oldsctpstate == SCTP_CONNTRACK_COOKIE_ECHOED
+		&& CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY
+		&& newconntrack == SCTP_CONNTRACK_ESTABLISHED) {
+		DEBUGP("Setting assured bit\n");
+		set_bit(IPS_ASSURED_BIT, &conntrack->status);
+	}
+
+	return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static int sctp_new(struct ip_conntrack *conntrack, 
+		    const struct sk_buff *skb)
+{
+	enum sctp_conntrack newconntrack;
+	struct iphdr *iph = skb->nh.iph;
+	sctp_sctphdr_t _sctph, *sh;
+	sctp_chunkhdr_t _sch, *sch;
+	u_int32_t offset, count;
+	char map[256 / sizeof (char)] = {0};
+
+	DEBUGP(__FUNCTION__);
+	DEBUGP("\n");
+
+	sh = skb_header_pointer(skb, iph->ihl * 4, sizeof(_sctph), &_sctph);
+	if (sh == NULL)
+		return 0;
+
+	if (do_basic_checks(conntrack, skb, map) != 0)
+		return 0;
+
+	/* If an OOTB packet has any of these chunks discard (Sec 8.4) */
+	if ((test_bit (SCTP_CID_ABORT, (void *)map))
+		|| (test_bit (SCTP_CID_SHUTDOWN_COMPLETE, (void *)map))
+		|| (test_bit (SCTP_CID_COOKIE_ACK, (void *)map))) {
+		return 0;
+	}
+
+	newconntrack = SCTP_CONNTRACK_MAX;
+	for_each_sctp_chunk (skb, sch, _sch, offset, count) {
+		/* Don't need lock here: this conntrack not in circulation yet */
+		newconntrack = new_state (IP_CT_DIR_ORIGINAL, 
+						SCTP_CONNTRACK_NONE, sch->type);
+
+		/* Invalid: delete conntrack */
+		if (newconntrack == SCTP_CONNTRACK_MAX) {
+			DEBUGP("ip_conntrack_sctp: invalid new deleting.\n");
+			return 0;
+		}
+
+		/* Copy the vtag into the state info */
+		if (sch->type == SCTP_CID_INIT) {
+			if (sh->vtag == 0) {
+				sctp_inithdr_t _inithdr, *ih;
+
+				ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
+				                        sizeof(_inithdr), &_inithdr);
+				if (ih == NULL)
+					return 0;
+
+				DEBUGP("Setting vtag %x for new conn\n", 
+					ih->init_tag);
+
+				conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = 
+								ih->init_tag;
+			} else {
+				/* Sec 8.5.1 (A) */
+				return 0;
+			}
+		}
+		/* If it is a shutdown ack OOTB packet, we expect a return
+		   shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
+		else {
+			DEBUGP("Setting vtag %x for new conn OOTB\n", 
+				sh->vtag);
+			conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag;
+		}
+
+		conntrack->proto.sctp.state = newconntrack;
+	}
+
+	return 1;
+}
+
+static struct ip_conntrack_protocol ip_conntrack_protocol_sctp = { 
+	.proto 		 = IPPROTO_SCTP, 
+	.name 		 = "sctp",
+	.pkt_to_tuple 	 = sctp_pkt_to_tuple, 
+	.invert_tuple 	 = sctp_invert_tuple, 
+	.print_tuple 	 = sctp_print_tuple, 
+	.print_conntrack = sctp_print_conntrack,
+	.packet 	 = sctp_packet, 
+	.new 		 = sctp_new, 
+	.destroy 	 = NULL, 
+	.me 		 = THIS_MODULE 
+};
+
+#ifdef CONFIG_SYSCTL
+static ctl_table ip_ct_sysctl_table[] = {
+	{
+		.ctl_name	= NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED,
+		.procname	= "ip_conntrack_sctp_timeout_closed",
+		.data		= &ip_ct_sctp_timeout_closed,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT,
+		.procname	= "ip_conntrack_sctp_timeout_cookie_wait",
+		.data		= &ip_ct_sctp_timeout_cookie_wait,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED,
+		.procname	= "ip_conntrack_sctp_timeout_cookie_echoed",
+		.data		= &ip_ct_sctp_timeout_cookie_echoed,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED,
+		.procname	= "ip_conntrack_sctp_timeout_established",
+		.data		= &ip_ct_sctp_timeout_established,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT,
+		.procname	= "ip_conntrack_sctp_timeout_shutdown_sent",
+		.data		= &ip_ct_sctp_timeout_shutdown_sent,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD,
+		.procname	= "ip_conntrack_sctp_timeout_shutdown_recd",
+		.data		= &ip_ct_sctp_timeout_shutdown_recd,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT,
+		.procname	= "ip_conntrack_sctp_timeout_shutdown_ack_sent",
+		.data		= &ip_ct_sctp_timeout_shutdown_ack_sent,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table ip_ct_netfilter_table[] = {
+	{
+		.ctl_name	= NET_IPV4_NETFILTER,
+		.procname	= "netfilter",
+		.mode		= 0555,
+		.child		= ip_ct_sysctl_table,
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table ip_ct_ipv4_table[] = {
+	{
+		.ctl_name	= NET_IPV4,
+		.procname	= "ipv4",
+		.mode		= 0555,
+		.child		= ip_ct_netfilter_table,
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table ip_ct_net_table[] = {
+	{
+		.ctl_name	= CTL_NET,
+		.procname	= "net",
+		.mode		= 0555, 
+		.child		= ip_ct_ipv4_table,
+	},
+	{ .ctl_name = 0 }
+};
+
+static struct ctl_table_header *ip_ct_sysctl_header;
+#endif
+
+static int __init init(void)
+{
+	int ret;
+
+	ret = ip_conntrack_protocol_register(&ip_conntrack_protocol_sctp);
+	if (ret) {
+		printk("ip_conntrack_proto_sctp: protocol register failed\n");
+		goto out;
+	}
+
+#ifdef CONFIG_SYSCTL
+	ip_ct_sysctl_header = register_sysctl_table(ip_ct_net_table, 0);
+	if (ip_ct_sysctl_header == NULL) {
+		ret = -ENOMEM;
+		printk("ip_conntrack_proto_sctp: can't register to sysctl.\n");
+		goto cleanup;
+	}
+#endif
+
+	return ret;
+
+#ifdef CONFIG_SYSCTL
+ cleanup:
+	ip_conntrack_protocol_unregister(&ip_conntrack_protocol_sctp);
+#endif
+ out:
+	DEBUGP("SCTP conntrack module loading %s\n", 
+					ret ? "failed": "succeeded");
+	return ret;
+}
+
+static void __exit fini(void)
+{
+	ip_conntrack_protocol_unregister(&ip_conntrack_protocol_sctp);
+#ifdef CONFIG_SYSCTL
+ 	unregister_sysctl_table(ip_ct_sysctl_header);
+#endif
+	DEBUGP("SCTP conntrack module unloaded\n");
+}
+
+module_init(init);
+module_exit(fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Kiran Kumar Immidi");
+MODULE_DESCRIPTION("Netfilter connection tracking protocol helper for SCTP");
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
new file mode 100644
index 00000000000..e800b16fc92
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -0,0 +1,1098 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>:
+ *	- Real stateful connection tracking
+ *	- Modified state transitions table
+ *	- Window scaling support added
+ *	- SACK support added
+ *
+ * Willy Tarreau:
+ *	- State table bugfixes
+ *	- More robust state changes
+ *	- Tuning timer parameters
+ *
+ * version 2.2
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/spinlock.h>
+
+#include <net/tcp.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
+#include <linux/netfilter_ipv4/lockhelp.h>
+
+#if 0
+#define DEBUGP printk
+#define DEBUGP_VARS
+#else
+#define DEBUGP(format, args...)
+#endif
+
+/* Protects conntrack->proto.tcp */
+static DECLARE_RWLOCK(tcp_lock);
+
+/* "Be conservative in what you do, 
+    be liberal in what you accept from others." 
+    If it's non-zero, we mark only out of window RST segments as INVALID. */
+int ip_ct_tcp_be_liberal = 0;
+
+/* When connection is picked up from the middle, how many packets are required
+   to pass in each direction when we assume we are in sync - if any side uses
+   window scaling, we lost the game. 
+   If it is set to zero, we disable picking up already established 
+   connections. */
+int ip_ct_tcp_loose = 3;
+
+/* Max number of the retransmitted packets without receiving an (acceptable) 
+   ACK from the destination. If this number is reached, a shorter timer 
+   will be started. */
+int ip_ct_tcp_max_retrans = 3;
+
+  /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
+     closely.  They're more complex. --RR */
+
+static const char *tcp_conntrack_names[] = {
+	"NONE",
+	"SYN_SENT",
+	"SYN_RECV",
+	"ESTABLISHED",
+	"FIN_WAIT",
+	"CLOSE_WAIT",
+	"LAST_ACK",
+	"TIME_WAIT",
+	"CLOSE",
+	"LISTEN"
+};
+  
+#define SECS * HZ
+#define MINS * 60 SECS
+#define HOURS * 60 MINS
+#define DAYS * 24 HOURS
+
+unsigned long ip_ct_tcp_timeout_syn_sent =      2 MINS;
+unsigned long ip_ct_tcp_timeout_syn_recv =     60 SECS;
+unsigned long ip_ct_tcp_timeout_established =   5 DAYS;
+unsigned long ip_ct_tcp_timeout_fin_wait =      2 MINS;
+unsigned long ip_ct_tcp_timeout_close_wait =   60 SECS;
+unsigned long ip_ct_tcp_timeout_last_ack =     30 SECS;
+unsigned long ip_ct_tcp_timeout_time_wait =     2 MINS;
+unsigned long ip_ct_tcp_timeout_close =        10 SECS;
+
+/* RFC1122 says the R2 limit should be at least 100 seconds.
+   Linux uses 15 packets as limit, which corresponds 
+   to ~13-30min depending on RTO. */
+unsigned long ip_ct_tcp_timeout_max_retrans =     5 MINS;
+ 
+static unsigned long * tcp_timeouts[]
+= { NULL,                              /*      TCP_CONNTRACK_NONE */
+    &ip_ct_tcp_timeout_syn_sent,       /*      TCP_CONNTRACK_SYN_SENT, */
+    &ip_ct_tcp_timeout_syn_recv,       /*      TCP_CONNTRACK_SYN_RECV, */
+    &ip_ct_tcp_timeout_established,    /*      TCP_CONNTRACK_ESTABLISHED,      */
+    &ip_ct_tcp_timeout_fin_wait,       /*      TCP_CONNTRACK_FIN_WAIT, */
+    &ip_ct_tcp_timeout_close_wait,     /*      TCP_CONNTRACK_CLOSE_WAIT,       */
+    &ip_ct_tcp_timeout_last_ack,       /*      TCP_CONNTRACK_LAST_ACK, */
+    &ip_ct_tcp_timeout_time_wait,      /*      TCP_CONNTRACK_TIME_WAIT,        */
+    &ip_ct_tcp_timeout_close,          /*      TCP_CONNTRACK_CLOSE,    */
+    NULL,                              /*      TCP_CONNTRACK_LISTEN */
+ };
+ 
+#define sNO TCP_CONNTRACK_NONE
+#define sSS TCP_CONNTRACK_SYN_SENT
+#define sSR TCP_CONNTRACK_SYN_RECV
+#define sES TCP_CONNTRACK_ESTABLISHED
+#define sFW TCP_CONNTRACK_FIN_WAIT
+#define sCW TCP_CONNTRACK_CLOSE_WAIT
+#define sLA TCP_CONNTRACK_LAST_ACK
+#define sTW TCP_CONNTRACK_TIME_WAIT
+#define sCL TCP_CONNTRACK_CLOSE
+#define sLI TCP_CONNTRACK_LISTEN
+#define sIV TCP_CONNTRACK_MAX
+#define sIG TCP_CONNTRACK_IGNORE
+
+/* What TCP flags are set from RST/SYN/FIN/ACK. */
+enum tcp_bit_set {
+	TCP_SYN_SET,
+	TCP_SYNACK_SET,
+	TCP_FIN_SET,
+	TCP_ACK_SET,
+	TCP_RST_SET,
+	TCP_NONE_SET,
+};
+  
+/*
+ * The TCP state transition table needs a few words...
+ *
+ * We are the man in the middle. All the packets go through us
+ * but might get lost in transit to the destination.
+ * It is assumed that the destinations can't receive segments 
+ * we haven't seen.
+ *
+ * The checked segment is in window, but our windows are *not*
+ * equivalent with the ones of the sender/receiver. We always
+ * try to guess the state of the current sender.
+ *
+ * The meaning of the states are:
+ *
+ * NONE:	initial state
+ * SYN_SENT:	SYN-only packet seen 
+ * SYN_RECV:	SYN-ACK packet seen
+ * ESTABLISHED:	ACK packet seen
+ * FIN_WAIT:	FIN packet seen
+ * CLOSE_WAIT:	ACK seen (after FIN) 
+ * LAST_ACK:	FIN seen (after FIN)
+ * TIME_WAIT:	last ACK seen
+ * CLOSE:	closed connection
+ *
+ * LISTEN state is not used.
+ *
+ * Packets marked as IGNORED (sIG):
+ *	if they may be either invalid or valid 
+ *	and the receiver may send back a connection 
+ *	closing RST or a SYN/ACK.
+ *
+ * Packets marked as INVALID (sIV):
+ *	if they are invalid
+ *	or we do not support the request (simultaneous open)
+ */
+static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
+	{
+/* ORIGINAL */
+/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
+/*syn*/	   { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV },
+/*
+ *	sNO -> sSS	Initialize a new connection
+ *	sSS -> sSS	Retransmitted SYN
+ *	sSR -> sIG	Late retransmitted SYN?
+ *	sES -> sIG	Error: SYNs in window outside the SYN_SENT state
+ *			are errors. Receiver will reply with RST 
+ *			and close the connection.
+ *			Or we are not in sync and hold a dead connection.
+ *	sFW -> sIG
+ *	sCW -> sIG
+ *	sLA -> sIG
+ *	sTW -> sSS	Reopened connection (RFC 1122).
+ *	sCL -> sSS
+ */
+/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
+/*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
+/*
+ * A SYN/ACK from the client is always invalid:
+ *	- either it tries to set up a simultaneous open, which is 
+ *	  not supported;
+ *	- or the firewall has just been inserted between the two hosts
+ *	  during the session set-up. The SYN will be retransmitted 
+ *	  by the true client (or it'll time out).
+ */
+/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
+/*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
+/*
+ *	sNO -> sIV	Too late and no reason to do anything...
+ *	sSS -> sIV	Client migth not send FIN in this state:
+ *			we enforce waiting for a SYN/ACK reply first.
+ *	sSR -> sFW	Close started.
+ *	sES -> sFW	
+ *	sFW -> sLA	FIN seen in both directions, waiting for
+ *			the last ACK. 
+ *			Migth be a retransmitted FIN as well...
+ *	sCW -> sLA
+ *	sLA -> sLA	Retransmitted FIN. Remain in the same state.
+ *	sTW -> sTW
+ *	sCL -> sCL
+ */
+/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
+/*ack*/	   { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
+/*
+ *	sNO -> sES	Assumed.
+ *	sSS -> sIV	ACK is invalid: we haven't seen a SYN/ACK yet.
+ *	sSR -> sES	Established state is reached.
+ *	sES -> sES	:-)
+ *	sFW -> sCW	Normal close request answered by ACK.
+ *	sCW -> sCW
+ *	sLA -> sTW	Last ACK detected.
+ *	sTW -> sTW	Retransmitted last ACK. Remain in the same state.
+ *	sCL -> sCL
+ */
+/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
+/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
+/*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
+	},
+	{
+/* REPLY */
+/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
+/*syn*/	   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
+/*
+ *	sNO -> sIV	Never reached.
+ *	sSS -> sIV	Simultaneous open, not supported
+ *	sSR -> sIV	Simultaneous open, not supported.
+ *	sES -> sIV	Server may not initiate a connection.
+ *	sFW -> sIV
+ *	sCW -> sIV
+ *	sLA -> sIV
+ *	sTW -> sIV	Reopened connection, but server may not do it.
+ *	sCL -> sIV
+ */
+/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
+/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV },
+/*
+ *	sSS -> sSR	Standard open.
+ *	sSR -> sSR	Retransmitted SYN/ACK.
+ *	sES -> sIG	Late retransmitted SYN/ACK?
+ *	sFW -> sIG	Might be SYN/ACK answering ignored SYN
+ *	sCW -> sIG
+ *	sLA -> sIG
+ *	sTW -> sIG
+ *	sCL -> sIG
+ */
+/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
+/*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
+/*
+ *	sSS -> sIV	Server might not send FIN in this state.
+ *	sSR -> sFW	Close started.
+ *	sES -> sFW
+ *	sFW -> sLA	FIN seen in both directions.
+ *	sCW -> sLA
+ *	sLA -> sLA	Retransmitted FIN.
+ *	sTW -> sTW
+ *	sCL -> sCL
+ */
+/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
+/*ack*/	   { sIV, sIV, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
+/*
+ *	sSS -> sIV	Might be a half-open connection.
+ *	sSR -> sSR	Might answer late resent SYN.
+ *	sES -> sES	:-)
+ *	sFW -> sCW	Normal close request answered by ACK.
+ *	sCW -> sCW
+ *	sLA -> sTW	Last ACK detected.
+ *	sTW -> sTW	Retransmitted last ACK.
+ *	sCL -> sCL
+ */
+/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
+/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
+/*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
+  	}
+};
+
+static int tcp_pkt_to_tuple(const struct sk_buff *skb,
+			    unsigned int dataoff,
+			    struct ip_conntrack_tuple *tuple)
+{
+	struct tcphdr _hdr, *hp;
+
+	/* Actually only need first 8 bytes. */
+	hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
+	if (hp == NULL)
+		return 0;
+
+	tuple->src.u.tcp.port = hp->source;
+	tuple->dst.u.tcp.port = hp->dest;
+
+	return 1;
+}
+
+static int tcp_invert_tuple(struct ip_conntrack_tuple *tuple,
+			    const struct ip_conntrack_tuple *orig)
+{
+	tuple->src.u.tcp.port = orig->dst.u.tcp.port;
+	tuple->dst.u.tcp.port = orig->src.u.tcp.port;
+	return 1;
+}
+
+/* Print out the per-protocol part of the tuple. */
+static int tcp_print_tuple(struct seq_file *s,
+			   const struct ip_conntrack_tuple *tuple)
+{
+	return seq_printf(s, "sport=%hu dport=%hu ",
+			  ntohs(tuple->src.u.tcp.port),
+			  ntohs(tuple->dst.u.tcp.port));
+}
+
+/* Print out the private part of the conntrack. */
+static int tcp_print_conntrack(struct seq_file *s,
+			       const struct ip_conntrack *conntrack)
+{
+	enum tcp_conntrack state;
+
+	READ_LOCK(&tcp_lock);
+	state = conntrack->proto.tcp.state;
+	READ_UNLOCK(&tcp_lock);
+
+	return seq_printf(s, "%s ", tcp_conntrack_names[state]);
+}
+
+static unsigned int get_conntrack_index(const struct tcphdr *tcph)
+{
+	if (tcph->rst) return TCP_RST_SET;
+	else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
+	else if (tcph->fin) return TCP_FIN_SET;
+	else if (tcph->ack) return TCP_ACK_SET;
+	else return TCP_NONE_SET;
+}
+
+/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
+   in IP Filter' by Guido van Rooij.
+   
+   http://www.nluug.nl/events/sane2000/papers.html
+   http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz
+   
+   The boundaries and the conditions are changed according to RFC793:
+   the packet must intersect the window (i.e. segments may be
+   after the right or before the left edge) and thus receivers may ACK
+   segments after the right edge of the window.
+
+   	td_maxend = max(sack + max(win,1)) seen in reply packets
+	td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
+	td_maxwin += seq + len - sender.td_maxend
+			if seq + len > sender.td_maxend
+	td_end    = max(seq + len) seen in sent packets
+   
+   I.   Upper bound for valid data:	seq <= sender.td_maxend
+   II.  Lower bound for valid data:	seq + len >= sender.td_end - receiver.td_maxwin
+   III.	Upper bound for valid ack:      sack <= receiver.td_end
+   IV.	Lower bound for valid ack:	ack >= receiver.td_end - MAXACKWINDOW
+   	
+   where sack is the highest right edge of sack block found in the packet.
+   	
+   The upper bound limit for a valid ack is not ignored - 
+   we doesn't have to deal with fragments. 
+*/
+
+static inline __u32 segment_seq_plus_len(__u32 seq,
+					 size_t len,
+					 struct iphdr *iph,
+					 struct tcphdr *tcph)
+{
+	return (seq + len - (iph->ihl + tcph->doff)*4
+		+ (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
+}
+  
+/* Fixme: what about big packets? */
+#define MAXACKWINCONST			66000
+#define MAXACKWINDOW(sender)						\
+	((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin	\
+					      : MAXACKWINCONST)
+  
+/*
+ * Simplified tcp_parse_options routine from tcp_input.c
+ */
+static void tcp_options(const struct sk_buff *skb,
+			struct iphdr *iph,
+			struct tcphdr *tcph, 
+			struct ip_ct_tcp_state *state)
+{
+	unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
+	unsigned char *ptr;
+	int length = (tcph->doff*4) - sizeof(struct tcphdr);
+	
+	if (!length)
+		return;
+
+	ptr = skb_header_pointer(skb,
+				 (iph->ihl * 4) + sizeof(struct tcphdr),
+				 length, buff);
+	BUG_ON(ptr == NULL);
+
+	state->td_scale = 
+	state->flags = 0;
+	
+	while (length > 0) {
+		int opcode=*ptr++;
+		int opsize;
+		
+		switch (opcode) {
+		case TCPOPT_EOL:
+			return;
+		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
+			length--;
+			continue;
+		default:
+			opsize=*ptr++;
+			if (opsize < 2) /* "silly options" */
+				return;
+			if (opsize > length)
+				break;	/* don't parse partial options */
+
+			if (opcode == TCPOPT_SACK_PERM 
+			    && opsize == TCPOLEN_SACK_PERM)
+				state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
+			else if (opcode == TCPOPT_WINDOW
+				 && opsize == TCPOLEN_WINDOW) {
+				state->td_scale = *(u_int8_t *)ptr;
+				
+				if (state->td_scale > 14) {
+					/* See RFC1323 */
+					state->td_scale = 14;
+				}
+				state->flags |=
+					IP_CT_TCP_FLAG_WINDOW_SCALE;
+			}
+			ptr += opsize - 2;
+			length -= opsize;
+		}
+	}
+}
+
+static void tcp_sack(const struct sk_buff *skb,
+		     struct iphdr *iph,
+		     struct tcphdr *tcph,
+		     __u32 *sack)
+{
+	unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
+	unsigned char *ptr;
+	int length = (tcph->doff*4) - sizeof(struct tcphdr);
+	__u32 tmp;
+
+	if (!length)
+		return;
+
+	ptr = skb_header_pointer(skb,
+				 (iph->ihl * 4) + sizeof(struct tcphdr),
+				 length, buff);
+	BUG_ON(ptr == NULL);
+
+	/* Fast path for timestamp-only option */
+	if (length == TCPOLEN_TSTAMP_ALIGNED*4
+	    && *(__u32 *)ptr ==
+	        __constant_ntohl((TCPOPT_NOP << 24) 
+	        		 | (TCPOPT_NOP << 16)
+	        		 | (TCPOPT_TIMESTAMP << 8)
+	        		 | TCPOLEN_TIMESTAMP))
+		return;
+		
+	while (length > 0) {
+		int opcode=*ptr++;
+		int opsize, i;
+		
+		switch (opcode) {
+		case TCPOPT_EOL:
+			return;
+		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
+			length--;
+			continue;
+		default:
+			opsize=*ptr++;
+			if (opsize < 2) /* "silly options" */
+				return;
+			if (opsize > length)
+				break;	/* don't parse partial options */
+
+			if (opcode == TCPOPT_SACK 
+			    && opsize >= (TCPOLEN_SACK_BASE 
+			    		  + TCPOLEN_SACK_PERBLOCK)
+			    && !((opsize - TCPOLEN_SACK_BASE) 
+			    	 % TCPOLEN_SACK_PERBLOCK)) {
+			    	for (i = 0;
+			    	     i < (opsize - TCPOLEN_SACK_BASE);
+			    	     i += TCPOLEN_SACK_PERBLOCK) {
+					tmp = ntohl(*((u_int32_t *)(ptr+i)+1));
+					
+					if (after(tmp, *sack))
+						*sack = tmp;
+				}
+				return;
+			}
+			ptr += opsize - 2;
+			length -= opsize;
+		}
+	}
+}
+
+static int tcp_in_window(struct ip_ct_tcp *state, 
+                         enum ip_conntrack_dir dir,
+                         unsigned int index,
+                         const struct sk_buff *skb,
+                         struct iphdr *iph,
+                         struct tcphdr *tcph)
+{
+	struct ip_ct_tcp_state *sender = &state->seen[dir];
+	struct ip_ct_tcp_state *receiver = &state->seen[!dir];
+	__u32 seq, ack, sack, end, win, swin;
+	int res;
+	
+	/*
+	 * Get the required data from the packet.
+	 */
+	seq = ntohl(tcph->seq);
+	ack = sack = ntohl(tcph->ack_seq);
+	win = ntohs(tcph->window);
+	end = segment_seq_plus_len(seq, skb->len, iph, tcph);
+	
+	if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
+		tcp_sack(skb, iph, tcph, &sack);
+		
+	DEBUGP("tcp_in_window: START\n");
+	DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
+	       "seq=%u ack=%u sack=%u win=%u end=%u\n",
+		NIPQUAD(iph->saddr), ntohs(tcph->source), 
+		NIPQUAD(iph->daddr), ntohs(tcph->dest),
+		seq, ack, sack, win, end);
+	DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
+	       "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
+		sender->td_end, sender->td_maxend, sender->td_maxwin,
+		sender->td_scale, 
+		receiver->td_end, receiver->td_maxend, receiver->td_maxwin, 
+		receiver->td_scale);
+		
+	if (sender->td_end == 0) {
+		/*
+		 * Initialize sender data.
+		 */
+		if (tcph->syn && tcph->ack) {
+			/*
+			 * Outgoing SYN-ACK in reply to a SYN.
+			 */
+			sender->td_end = 
+			sender->td_maxend = end;
+			sender->td_maxwin = (win == 0 ? 1 : win);
+
+			tcp_options(skb, iph, tcph, sender);
+			/* 
+			 * RFC 1323:
+			 * Both sides must send the Window Scale option
+			 * to enable window scaling in either direction.
+			 */
+			if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
+			      && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
+				sender->td_scale = 
+				receiver->td_scale = 0;
+		} else {
+			/*
+			 * We are in the middle of a connection,
+			 * its history is lost for us.
+			 * Let's try to use the data from the packet.
+		 	 */
+			sender->td_end = end;
+			sender->td_maxwin = (win == 0 ? 1 : win);
+			sender->td_maxend = end + sender->td_maxwin;
+		}
+	} else if (((state->state == TCP_CONNTRACK_SYN_SENT
+		     && dir == IP_CT_DIR_ORIGINAL)
+		    || (state->state == TCP_CONNTRACK_SYN_RECV
+		        && dir == IP_CT_DIR_REPLY))
+		    && after(end, sender->td_end)) {
+		/*
+		 * RFC 793: "if a TCP is reinitialized ... then it need
+		 * not wait at all; it must only be sure to use sequence 
+		 * numbers larger than those recently used."
+		 */
+		sender->td_end =
+		sender->td_maxend = end;
+		sender->td_maxwin = (win == 0 ? 1 : win);
+
+		tcp_options(skb, iph, tcph, sender);
+	}
+	
+	if (!(tcph->ack)) {
+		/*
+		 * If there is no ACK, just pretend it was set and OK.
+		 */
+		ack = sack = receiver->td_end;
+	} else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) == 
+		    (TCP_FLAG_ACK|TCP_FLAG_RST)) 
+		   && (ack == 0)) {
+		/*
+		 * Broken TCP stacks, that set ACK in RST packets as well
+		 * with zero ack value.
+		 */
+		ack = sack = receiver->td_end;
+	}
+
+	if (seq == end
+	    && (!tcph->rst 
+	        || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)))
+		/*
+		 * Packets contains no data: we assume it is valid
+		 * and check the ack value only.
+		 * However RST segments are always validated by their
+		 * SEQ number, except when seq == 0 (reset sent answering
+		 * SYN.
+		 */
+		seq = end = sender->td_end;
+		
+	DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
+	       "seq=%u ack=%u sack =%u win=%u end=%u\n",
+		NIPQUAD(iph->saddr), ntohs(tcph->source),
+		NIPQUAD(iph->daddr), ntohs(tcph->dest),
+		seq, ack, sack, win, end);
+	DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
+	       "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
+		sender->td_end, sender->td_maxend, sender->td_maxwin,
+		sender->td_scale, 
+		receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
+		receiver->td_scale);
+	
+	DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
+		before(seq, sender->td_maxend + 1),
+	    	after(end, sender->td_end - receiver->td_maxwin - 1),
+	    	before(sack, receiver->td_end + 1),
+	    	after(ack, receiver->td_end - MAXACKWINDOW(sender)));
+	
+	if (sender->loose || receiver->loose ||
+	    (before(seq, sender->td_maxend + 1) &&
+	     after(end, sender->td_end - receiver->td_maxwin - 1) &&
+	     before(sack, receiver->td_end + 1) &&
+	     after(ack, receiver->td_end - MAXACKWINDOW(sender)))) {
+	    	/*
+		 * Take into account window scaling (RFC 1323).
+		 */
+		if (!tcph->syn)
+			win <<= sender->td_scale;
+		
+		/*
+		 * Update sender data.
+		 */
+		swin = win + (sack - ack);
+		if (sender->td_maxwin < swin)
+			sender->td_maxwin = swin;
+		if (after(end, sender->td_end))
+			sender->td_end = end;
+		/*
+		 * Update receiver data.
+		 */
+		if (after(end, sender->td_maxend))
+			receiver->td_maxwin += end - sender->td_maxend;
+		if (after(sack + win, receiver->td_maxend - 1)) {
+			receiver->td_maxend = sack + win;
+			if (win == 0)
+				receiver->td_maxend++;
+		}
+
+		/* 
+		 * Check retransmissions.
+		 */
+		if (index == TCP_ACK_SET) {
+			if (state->last_dir == dir
+			    && state->last_seq == seq
+			    && state->last_ack == ack
+			    && state->last_end == end)
+				state->retrans++;
+			else {
+				state->last_dir = dir;
+				state->last_seq = seq;
+				state->last_ack = ack;
+				state->last_end = end;
+				state->retrans = 0;
+			}
+		}
+		/*
+		 * Close the window of disabled window tracking :-)
+		 */
+		if (sender->loose)
+			sender->loose--;
+		
+		res = 1;
+	} else {
+		if (LOG_INVALID(IPPROTO_TCP))
+			nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+			"ip_ct_tcp: %s ",
+			before(seq, sender->td_maxend + 1) ?
+			after(end, sender->td_end - receiver->td_maxwin - 1) ?
+			before(sack, receiver->td_end + 1) ?
+			after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG"
+			: "ACK is under the lower bound (possible overly delayed ACK)"
+			: "ACK is over the upper bound (ACKed data not seen yet)"
+			: "SEQ is under the lower bound (already ACKed data retransmitted)"
+			: "SEQ is over the upper bound (over the window of the receiver)");
+
+		res = ip_ct_tcp_be_liberal;
+  	}
+  
+	DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u "
+	       "receiver end=%u maxend=%u maxwin=%u\n",
+		res, sender->td_end, sender->td_maxend, sender->td_maxwin, 
+		receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
+
+	return res;
+}
+
+#ifdef CONFIG_IP_NF_NAT_NEEDED
+/* Update sender->td_end after NAT successfully mangled the packet */
+void ip_conntrack_tcp_update(struct sk_buff *skb,
+			     struct ip_conntrack *conntrack, 
+			     enum ip_conntrack_dir dir)
+{
+	struct iphdr *iph = skb->nh.iph;
+	struct tcphdr *tcph = (void *)skb->nh.iph + skb->nh.iph->ihl*4;
+	__u32 end;
+#ifdef DEBUGP_VARS
+	struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[dir];
+	struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[!dir];
+#endif
+
+	end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, iph, tcph);
+	
+	WRITE_LOCK(&tcp_lock);
+	/*
+	 * We have to worry for the ack in the reply packet only...
+	 */
+	if (after(end, conntrack->proto.tcp.seen[dir].td_end))
+		conntrack->proto.tcp.seen[dir].td_end = end;
+	conntrack->proto.tcp.last_end = end;
+	WRITE_UNLOCK(&tcp_lock);
+	DEBUGP("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
+	       "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
+		sender->td_end, sender->td_maxend, sender->td_maxwin,
+		sender->td_scale, 
+		receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
+		receiver->td_scale);
+}
+ 
+#endif
+
+#define	TH_FIN	0x01
+#define	TH_SYN	0x02
+#define	TH_RST	0x04
+#define	TH_PUSH	0x08
+#define	TH_ACK	0x10
+#define	TH_URG	0x20
+#define	TH_ECE	0x40
+#define	TH_CWR	0x80
+
+/* table of valid flag combinations - ECE and CWR are always valid */
+static u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] =
+{
+	[TH_SYN]			= 1,
+	[TH_SYN|TH_ACK]			= 1,
+	[TH_RST]			= 1,
+	[TH_RST|TH_ACK]			= 1,
+	[TH_RST|TH_ACK|TH_PUSH]		= 1,
+	[TH_FIN|TH_ACK]			= 1,
+	[TH_ACK]			= 1,
+	[TH_ACK|TH_PUSH]		= 1,
+	[TH_ACK|TH_URG]			= 1,
+	[TH_ACK|TH_URG|TH_PUSH]		= 1,
+	[TH_FIN|TH_ACK|TH_PUSH]		= 1,
+	[TH_FIN|TH_ACK|TH_URG]		= 1,
+	[TH_FIN|TH_ACK|TH_URG|TH_PUSH]	= 1,
+};
+
+/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
+static int tcp_error(struct sk_buff *skb,
+		     enum ip_conntrack_info *ctinfo,
+		     unsigned int hooknum)
+{
+	struct iphdr *iph = skb->nh.iph;
+	struct tcphdr _tcph, *th;
+	unsigned int tcplen = skb->len - iph->ihl * 4;
+	u_int8_t tcpflags;
+
+	/* Smaller that minimal TCP header? */
+	th = skb_header_pointer(skb, iph->ihl * 4,
+				sizeof(_tcph), &_tcph);
+	if (th == NULL) {
+		if (LOG_INVALID(IPPROTO_TCP))
+			nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+				"ip_ct_tcp: short packet ");
+		return -NF_ACCEPT;
+  	}
+  
+	/* Not whole TCP header or malformed packet */
+	if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
+		if (LOG_INVALID(IPPROTO_TCP))
+			nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+				"ip_ct_tcp: truncated/malformed packet ");
+		return -NF_ACCEPT;
+	}
+  
+	/* Checksum invalid? Ignore.
+	 * We skip checking packets on the outgoing path
+	 * because the semantic of CHECKSUM_HW is different there 
+	 * and moreover root might send raw packets.
+	 */
+	/* FIXME: Source route IP option packets --RR */
+	if (hooknum == NF_IP_PRE_ROUTING
+	    && csum_tcpudp_magic(iph->saddr, iph->daddr, tcplen, IPPROTO_TCP,
+			         skb->ip_summed == CHECKSUM_HW ? skb->csum
+			      	 : skb_checksum(skb, iph->ihl*4, tcplen, 0))) {
+		if (LOG_INVALID(IPPROTO_TCP))
+			nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+				  "ip_ct_tcp: bad TCP checksum ");
+		return -NF_ACCEPT;
+	}
+
+	/* Check TCP flags. */
+	tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR));
+	if (!tcp_valid_flags[tcpflags]) {
+		if (LOG_INVALID(IPPROTO_TCP))
+			nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+				  "ip_ct_tcp: invalid TCP flag combination ");
+		return -NF_ACCEPT;
+	}
+
+	return NF_ACCEPT;
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+static int tcp_packet(struct ip_conntrack *conntrack,
+		      const struct sk_buff *skb,
+		      enum ip_conntrack_info ctinfo)
+{
+	enum tcp_conntrack new_state, old_state;
+	enum ip_conntrack_dir dir;
+	struct iphdr *iph = skb->nh.iph;
+	struct tcphdr *th, _tcph;
+	unsigned long timeout;
+	unsigned int index;
+	
+	th = skb_header_pointer(skb, iph->ihl * 4,
+				sizeof(_tcph), &_tcph);
+	BUG_ON(th == NULL);
+	
+	WRITE_LOCK(&tcp_lock);
+	old_state = conntrack->proto.tcp.state;
+	dir = CTINFO2DIR(ctinfo);
+	index = get_conntrack_index(th);
+	new_state = tcp_conntracks[dir][index][old_state];
+
+	switch (new_state) {
+	case TCP_CONNTRACK_IGNORE:
+		/* Either SYN in ORIGINAL
+		 * or SYN/ACK in REPLY. */
+		if (index == TCP_SYNACK_SET
+		    && conntrack->proto.tcp.last_index == TCP_SYN_SET
+		    && conntrack->proto.tcp.last_dir != dir
+		    && ntohl(th->ack_seq) ==
+		    	     conntrack->proto.tcp.last_end) {
+			/* This SYN/ACK acknowledges a SYN that we earlier 
+			 * ignored as invalid. This means that the client and
+			 * the server are both in sync, while the firewall is
+			 * not. We kill this session and block the SYN/ACK so
+			 * that the client cannot but retransmit its SYN and 
+			 * thus initiate a clean new session.
+			 */
+		    	WRITE_UNLOCK(&tcp_lock);
+			if (LOG_INVALID(IPPROTO_TCP))
+				nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+					  "ip_ct_tcp: killing out of sync session ");
+		    	if (del_timer(&conntrack->timeout))
+		    		conntrack->timeout.function((unsigned long)
+		    					    conntrack);
+		    	return -NF_DROP;
+		}
+		conntrack->proto.tcp.last_index = index;
+		conntrack->proto.tcp.last_dir = dir;
+		conntrack->proto.tcp.last_seq = ntohl(th->seq);
+		conntrack->proto.tcp.last_end = 
+		    segment_seq_plus_len(ntohl(th->seq), skb->len, iph, th);
+		
+		WRITE_UNLOCK(&tcp_lock);
+		if (LOG_INVALID(IPPROTO_TCP))
+			nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+				  "ip_ct_tcp: invalid packet ignored ");
+		return NF_ACCEPT;
+	case TCP_CONNTRACK_MAX:
+		/* Invalid packet */
+		DEBUGP("ip_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
+		       dir, get_conntrack_index(th),
+		       old_state);
+		WRITE_UNLOCK(&tcp_lock);
+		if (LOG_INVALID(IPPROTO_TCP))
+			nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+				  "ip_ct_tcp: invalid state ");
+		return -NF_ACCEPT;
+	case TCP_CONNTRACK_SYN_SENT:
+		if (old_state < TCP_CONNTRACK_TIME_WAIT)
+			break;
+		if ((conntrack->proto.tcp.seen[dir].flags &
+		         IP_CT_TCP_FLAG_CLOSE_INIT)
+		    || after(ntohl(th->seq),
+		    	     conntrack->proto.tcp.seen[dir].td_end)) {	
+		    	/* Attempt to reopen a closed connection.
+		    	* Delete this connection and look up again. */
+		    	WRITE_UNLOCK(&tcp_lock);
+		    	if (del_timer(&conntrack->timeout))
+		    		conntrack->timeout.function((unsigned long)
+		    					    conntrack);
+		    	return -NF_REPEAT;
+		} else {
+			WRITE_UNLOCK(&tcp_lock);
+			if (LOG_INVALID(IPPROTO_TCP))
+				nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+				              "ip_ct_tcp: invalid SYN");
+			return -NF_ACCEPT;
+		}
+	case TCP_CONNTRACK_CLOSE:
+		if (index == TCP_RST_SET
+		    && test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
+		    && conntrack->proto.tcp.last_index == TCP_SYN_SET
+		    && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) {
+			/* RST sent to invalid SYN we had let trough
+			 * SYN was in window then, tear down connection.
+			 * We skip window checking, because packet might ACK
+			 * segments we ignored in the SYN. */
+			goto in_window;
+		}
+		/* Just fall trough */
+	default:
+		/* Keep compilers happy. */
+		break;
+	}
+
+	if (!tcp_in_window(&conntrack->proto.tcp, dir, index, 
+			   skb, iph, th)) {
+		WRITE_UNLOCK(&tcp_lock);
+		return -NF_ACCEPT;
+	}
+    in_window:
+	/* From now on we have got in-window packets */	
+	conntrack->proto.tcp.last_index = index;
+
+	DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
+	       "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
+		NIPQUAD(iph->saddr), ntohs(th->source),
+		NIPQUAD(iph->daddr), ntohs(th->dest),
+		(th->syn ? 1 : 0), (th->ack ? 1 : 0),
+		(th->fin ? 1 : 0), (th->rst ? 1 : 0),
+		old_state, new_state);
+
+	conntrack->proto.tcp.state = new_state;
+	if (old_state != new_state 
+	    && (new_state == TCP_CONNTRACK_FIN_WAIT
+	    	|| new_state == TCP_CONNTRACK_CLOSE))
+		conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
+	timeout = conntrack->proto.tcp.retrans >= ip_ct_tcp_max_retrans
+		  && *tcp_timeouts[new_state] > ip_ct_tcp_timeout_max_retrans
+		  ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
+	WRITE_UNLOCK(&tcp_lock);
+
+	if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
+		/* If only reply is a RST, we can consider ourselves not to
+		   have an established connection: this is a fairly common
+		   problem case, so we can delete the conntrack
+		   immediately.  --RR */
+		if (th->rst) {
+			if (del_timer(&conntrack->timeout))
+				conntrack->timeout.function((unsigned long)
+							    conntrack);
+			return NF_ACCEPT;
+		}
+	} else if (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
+		   && (old_state == TCP_CONNTRACK_SYN_RECV
+		       || old_state == TCP_CONNTRACK_ESTABLISHED)
+		   && new_state == TCP_CONNTRACK_ESTABLISHED) {
+		/* Set ASSURED if we see see valid ack in ESTABLISHED 
+		   after SYN_RECV or a valid answer for a picked up 
+		   connection. */
+			set_bit(IPS_ASSURED_BIT, &conntrack->status);
+	}
+	ip_ct_refresh_acct(conntrack, ctinfo, skb, timeout);
+
+	return NF_ACCEPT;
+}
+ 
+/* Called when a new connection for this protocol found. */
+static int tcp_new(struct ip_conntrack *conntrack,
+		   const struct sk_buff *skb)
+{
+	enum tcp_conntrack new_state;
+	struct iphdr *iph = skb->nh.iph;
+	struct tcphdr *th, _tcph;
+#ifdef DEBUGP_VARS
+	struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[0];
+	struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[1];
+#endif
+
+	th = skb_header_pointer(skb, iph->ihl * 4,
+				sizeof(_tcph), &_tcph);
+	BUG_ON(th == NULL);
+	
+	/* Don't need lock here: this conntrack not in circulation yet */
+	new_state
+		= tcp_conntracks[0][get_conntrack_index(th)]
+		[TCP_CONNTRACK_NONE];
+
+	/* Invalid: delete conntrack */
+	if (new_state >= TCP_CONNTRACK_MAX) {
+		DEBUGP("ip_ct_tcp: invalid new deleting.\n");
+		return 0;
+	}
+
+	if (new_state == TCP_CONNTRACK_SYN_SENT) {
+		/* SYN packet */
+		conntrack->proto.tcp.seen[0].td_end =
+			segment_seq_plus_len(ntohl(th->seq), skb->len,
+					     iph, th);
+		conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
+		if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
+			conntrack->proto.tcp.seen[0].td_maxwin = 1;
+		conntrack->proto.tcp.seen[0].td_maxend =
+			conntrack->proto.tcp.seen[0].td_end;
+
+		tcp_options(skb, iph, th, &conntrack->proto.tcp.seen[0]);
+		conntrack->proto.tcp.seen[1].flags = 0;
+		conntrack->proto.tcp.seen[0].loose = 
+		conntrack->proto.tcp.seen[1].loose = 0;
+	} else if (ip_ct_tcp_loose == 0) {
+		/* Don't try to pick up connections. */
+		return 0;
+	} else {
+		/*
+		 * We are in the middle of a connection,
+		 * its history is lost for us.
+		 * Let's try to use the data from the packet.
+		 */
+		conntrack->proto.tcp.seen[0].td_end =
+			segment_seq_plus_len(ntohl(th->seq), skb->len,
+					     iph, th);
+		conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
+		if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
+			conntrack->proto.tcp.seen[0].td_maxwin = 1;
+		conntrack->proto.tcp.seen[0].td_maxend =
+			conntrack->proto.tcp.seen[0].td_end + 
+			conntrack->proto.tcp.seen[0].td_maxwin;
+		conntrack->proto.tcp.seen[0].td_scale = 0;
+
+		/* We assume SACK. Should we assume window scaling too? */
+		conntrack->proto.tcp.seen[0].flags =
+		conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM;
+		conntrack->proto.tcp.seen[0].loose = 
+		conntrack->proto.tcp.seen[1].loose = ip_ct_tcp_loose;
+	}
+    
+	conntrack->proto.tcp.seen[1].td_end = 0;
+	conntrack->proto.tcp.seen[1].td_maxend = 0;
+	conntrack->proto.tcp.seen[1].td_maxwin = 1;
+	conntrack->proto.tcp.seen[1].td_scale = 0;      
+
+	/* tcp_packet will set them */
+	conntrack->proto.tcp.state = TCP_CONNTRACK_NONE;
+	conntrack->proto.tcp.last_index = TCP_NONE_SET;
+	 
+	DEBUGP("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
+	       "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
+		sender->td_end, sender->td_maxend, sender->td_maxwin,
+		sender->td_scale, 
+		receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
+		receiver->td_scale);
+	return 1;
+}
+  
+struct ip_conntrack_protocol ip_conntrack_protocol_tcp =
+{
+	.proto 			= IPPROTO_TCP,
+	.name 			= "tcp",
+	.pkt_to_tuple 		= tcp_pkt_to_tuple,
+	.invert_tuple 		= tcp_invert_tuple,
+	.print_tuple 		= tcp_print_tuple,
+	.print_conntrack 	= tcp_print_conntrack,
+	.packet 		= tcp_packet,
+	.new 			= tcp_new,
+	.error			= tcp_error,
+};
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
new file mode 100644
index 00000000000..5bc28a22462
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
@@ -0,0 +1,146 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/netfilter.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/seq_file.h>
+#include <net/checksum.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
+
+unsigned long ip_ct_udp_timeout = 30*HZ;
+unsigned long ip_ct_udp_timeout_stream = 180*HZ;
+
+static int udp_pkt_to_tuple(const struct sk_buff *skb,
+			     unsigned int dataoff,
+			     struct ip_conntrack_tuple *tuple)
+{
+	struct udphdr _hdr, *hp;
+
+	/* Actually only need first 8 bytes. */
+	hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+	if (hp == NULL)
+		return 0;
+
+	tuple->src.u.udp.port = hp->source;
+	tuple->dst.u.udp.port = hp->dest;
+
+	return 1;
+}
+
+static int udp_invert_tuple(struct ip_conntrack_tuple *tuple,
+			    const struct ip_conntrack_tuple *orig)
+{
+	tuple->src.u.udp.port = orig->dst.u.udp.port;
+	tuple->dst.u.udp.port = orig->src.u.udp.port;
+	return 1;
+}
+
+/* Print out the per-protocol part of the tuple. */
+static int udp_print_tuple(struct seq_file *s,
+			   const struct ip_conntrack_tuple *tuple)
+{
+	return seq_printf(s, "sport=%hu dport=%hu ",
+			  ntohs(tuple->src.u.udp.port),
+			  ntohs(tuple->dst.u.udp.port));
+}
+
+/* Print out the private part of the conntrack. */
+static int udp_print_conntrack(struct seq_file *s,
+			       const struct ip_conntrack *conntrack)
+{
+	return 0;
+}
+
+/* Returns verdict for packet, and may modify conntracktype */
+static int udp_packet(struct ip_conntrack *conntrack,
+		      const struct sk_buff *skb,
+		      enum ip_conntrack_info ctinfo)
+{
+	/* If we've seen traffic both ways, this is some kind of UDP
+	   stream.  Extend timeout. */
+	if (test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
+		ip_ct_refresh_acct(conntrack, ctinfo, skb, 
+				   ip_ct_udp_timeout_stream);
+		/* Also, more likely to be important, and not a probe */
+		set_bit(IPS_ASSURED_BIT, &conntrack->status);
+	} else
+		ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_udp_timeout);
+
+	return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static int udp_new(struct ip_conntrack *conntrack, const struct sk_buff *skb)
+{
+	return 1;
+}
+
+static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
+		     unsigned int hooknum)
+{
+	struct iphdr *iph = skb->nh.iph;
+	unsigned int udplen = skb->len - iph->ihl * 4;
+	struct udphdr _hdr, *hdr;
+
+	/* Header is too small? */
+	hdr = skb_header_pointer(skb, iph->ihl*4, sizeof(_hdr), &_hdr);
+	if (hdr == NULL) {
+		if (LOG_INVALID(IPPROTO_UDP))
+			nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+				  "ip_ct_udp: short packet ");
+		return -NF_ACCEPT;
+	}
+	
+	/* Truncated/malformed packets */
+	if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
+		if (LOG_INVALID(IPPROTO_UDP))
+			nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+				  "ip_ct_udp: truncated/malformed packet ");
+		return -NF_ACCEPT;
+	}
+	
+	/* Packet with no checksum */
+	if (!hdr->check)
+		return NF_ACCEPT;
+
+	/* Checksum invalid? Ignore.
+	 * We skip checking packets on the outgoing path
+	 * because the semantic of CHECKSUM_HW is different there 
+	 * and moreover root might send raw packets.
+	 * FIXME: Source route IP option packets --RR */
+	if (hooknum == NF_IP_PRE_ROUTING
+	    && csum_tcpudp_magic(iph->saddr, iph->daddr, udplen, IPPROTO_UDP,
+			         skb->ip_summed == CHECKSUM_HW ? skb->csum
+			      	 : skb_checksum(skb, iph->ihl*4, udplen, 0))) {
+		if (LOG_INVALID(IPPROTO_UDP))
+			nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+				  "ip_ct_udp: bad UDP checksum ");
+		return -NF_ACCEPT;
+	}
+	
+	return NF_ACCEPT;
+}
+
+struct ip_conntrack_protocol ip_conntrack_protocol_udp =
+{
+	.proto 			= IPPROTO_UDP,
+	.name			= "udp",
+	.pkt_to_tuple		= udp_pkt_to_tuple,
+	.invert_tuple		= udp_invert_tuple,
+	.print_tuple		= udp_print_tuple,
+	.print_conntrack	= udp_print_conntrack,
+	.packet			= udp_packet,
+	.new			= udp_new,
+	.error			= udp_error,
+};
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
new file mode 100644
index 00000000000..80a7bde2a57
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -0,0 +1,961 @@
+/* This file contains all the functions required for the standalone
+   ip_conntrack module.
+
+   These are not required by the compatibility layer.
+*/
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/percpu.h>
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+#include <net/checksum.h>
+#include <net/ip.h>
+
+#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock)
+#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock)
+
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/listhelp.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+MODULE_LICENSE("GPL");
+
+extern atomic_t ip_conntrack_count;
+DECLARE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
+
+static int kill_proto(struct ip_conntrack *i, void *data)
+{
+	return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum == 
+			*((u_int8_t *) data));
+}
+
+#ifdef CONFIG_PROC_FS
+static int
+print_tuple(struct seq_file *s, const struct ip_conntrack_tuple *tuple,
+	    struct ip_conntrack_protocol *proto)
+{
+	seq_printf(s, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ",
+		   NIPQUAD(tuple->src.ip), NIPQUAD(tuple->dst.ip));
+	return proto->print_tuple(s, tuple);
+}
+
+#ifdef CONFIG_IP_NF_CT_ACCT
+static unsigned int
+seq_print_counters(struct seq_file *s,
+		   const struct ip_conntrack_counter *counter)
+{
+	return seq_printf(s, "packets=%llu bytes=%llu ",
+			  (unsigned long long)counter->packets,
+			  (unsigned long long)counter->bytes);
+}
+#else
+#define seq_print_counters(x, y)	0
+#endif
+
+struct ct_iter_state {
+	unsigned int bucket;
+};
+
+static struct list_head *ct_get_first(struct seq_file *seq)
+{
+	struct ct_iter_state *st = seq->private;
+
+	for (st->bucket = 0;
+	     st->bucket < ip_conntrack_htable_size;
+	     st->bucket++) {
+		if (!list_empty(&ip_conntrack_hash[st->bucket]))
+			return ip_conntrack_hash[st->bucket].next;
+	}
+	return NULL;
+}
+
+static struct list_head *ct_get_next(struct seq_file *seq, struct list_head *head)
+{
+	struct ct_iter_state *st = seq->private;
+
+	head = head->next;
+	while (head == &ip_conntrack_hash[st->bucket]) {
+		if (++st->bucket >= ip_conntrack_htable_size)
+			return NULL;
+		head = ip_conntrack_hash[st->bucket].next;
+	}
+	return head;
+}
+
+static struct list_head *ct_get_idx(struct seq_file *seq, loff_t pos)
+{
+	struct list_head *head = ct_get_first(seq);
+
+	if (head)
+		while (pos && (head = ct_get_next(seq, head)))
+			pos--;
+	return pos ? NULL : head;
+}
+
+static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	READ_LOCK(&ip_conntrack_lock);
+	return ct_get_idx(seq, *pos);
+}
+
+static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+	(*pos)++;
+	return ct_get_next(s, v);
+}
+  
+static void ct_seq_stop(struct seq_file *s, void *v)
+{
+	READ_UNLOCK(&ip_conntrack_lock);
+}
+ 
+static int ct_seq_show(struct seq_file *s, void *v)
+{
+	const struct ip_conntrack_tuple_hash *hash = v;
+	const struct ip_conntrack *conntrack = tuplehash_to_ctrack(hash);
+	struct ip_conntrack_protocol *proto;
+
+	MUST_BE_READ_LOCKED(&ip_conntrack_lock);
+	IP_NF_ASSERT(conntrack);
+
+	/* we only want to print DIR_ORIGINAL */
+	if (DIRECTION(hash))
+		return 0;
+
+	proto = ip_ct_find_proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
+			       .tuple.dst.protonum);
+	IP_NF_ASSERT(proto);
+
+	if (seq_printf(s, "%-8s %u %ld ",
+		      proto->name,
+		      conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum,
+		      timer_pending(&conntrack->timeout)
+		      ? (long)(conntrack->timeout.expires - jiffies)/HZ
+		      : 0) != 0)
+		return -ENOSPC;
+
+	if (proto->print_conntrack(s, conntrack))
+		return -ENOSPC;
+  
+	if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+			proto))
+		return -ENOSPC;
+
+ 	if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_ORIGINAL]))
+		return -ENOSPC;
+
+	if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)))
+		if (seq_printf(s, "[UNREPLIED] "))
+			return -ENOSPC;
+
+	if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple,
+			proto))
+		return -ENOSPC;
+
+ 	if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_REPLY]))
+		return -ENOSPC;
+
+	if (test_bit(IPS_ASSURED_BIT, &conntrack->status))
+		if (seq_printf(s, "[ASSURED] "))
+			return -ENOSPC;
+
+#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
+	if (seq_printf(s, "mark=%lu ", conntrack->mark))
+		return -ENOSPC;
+#endif
+
+	if (seq_printf(s, "use=%u\n", atomic_read(&conntrack->ct_general.use)))
+		return -ENOSPC;
+
+	return 0;
+}
+
+static struct seq_operations ct_seq_ops = {
+	.start = ct_seq_start,
+	.next  = ct_seq_next,
+	.stop  = ct_seq_stop,
+	.show  = ct_seq_show
+};
+  
+static int ct_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	struct ct_iter_state *st;
+	int ret;
+
+	st = kmalloc(sizeof(struct ct_iter_state), GFP_KERNEL);
+	if (st == NULL)
+		return -ENOMEM;
+	ret = seq_open(file, &ct_seq_ops);
+	if (ret)
+		goto out_free;
+	seq          = file->private_data;
+	seq->private = st;
+	memset(st, 0, sizeof(struct ct_iter_state));
+	return ret;
+out_free:
+	kfree(st);
+	return ret;
+}
+
+static struct file_operations ct_file_ops = {
+	.owner   = THIS_MODULE,
+	.open    = ct_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release_private,
+};
+  
+/* expects */
+static void *exp_seq_start(struct seq_file *s, loff_t *pos)
+{
+	struct list_head *e = &ip_conntrack_expect_list;
+	loff_t i;
+
+	/* strange seq_file api calls stop even if we fail,
+	 * thus we need to grab lock since stop unlocks */
+	READ_LOCK(&ip_conntrack_lock);
+
+	if (list_empty(e))
+		return NULL;
+
+	for (i = 0; i <= *pos; i++) {
+		e = e->next;
+		if (e == &ip_conntrack_expect_list)
+			return NULL;
+	}
+	return e;
+}
+
+static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ 	struct list_head *e = v;
+
+	e = e->next;
+
+	if (e == &ip_conntrack_expect_list)
+		return NULL;
+
+	return e;
+}
+
+static void exp_seq_stop(struct seq_file *s, void *v)
+{
+	READ_UNLOCK(&ip_conntrack_lock);
+}
+
+static int exp_seq_show(struct seq_file *s, void *v)
+{
+	struct ip_conntrack_expect *expect = v;
+
+	if (expect->timeout.function)
+		seq_printf(s, "%ld ", timer_pending(&expect->timeout)
+			   ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
+	else
+		seq_printf(s, "- ");
+
+	seq_printf(s, "proto=%u ", expect->tuple.dst.protonum);
+
+	print_tuple(s, &expect->tuple,
+		    ip_ct_find_proto(expect->tuple.dst.protonum));
+	return seq_putc(s, '\n');
+}
+
+static struct seq_operations exp_seq_ops = {
+	.start = exp_seq_start,
+	.next = exp_seq_next,
+	.stop = exp_seq_stop,
+	.show = exp_seq_show
+};
+
+static int exp_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &exp_seq_ops);
+}
+  
+static struct file_operations exp_file_ops = {
+	.owner   = THIS_MODULE,
+	.open    = exp_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release
+};
+
+static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	int cpu;
+
+	if (*pos == 0)
+		return SEQ_START_TOKEN;
+
+	for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) {
+		if (!cpu_possible(cpu))
+			continue;
+		*pos = cpu+1;
+		return &per_cpu(ip_conntrack_stat, cpu);
+	}
+
+	return NULL;
+}
+
+static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	int cpu;
+
+	for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
+		if (!cpu_possible(cpu))
+			continue;
+		*pos = cpu+1;
+		return &per_cpu(ip_conntrack_stat, cpu);
+	}
+
+	return NULL;
+}
+
+static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
+{
+}
+
+static int ct_cpu_seq_show(struct seq_file *seq, void *v)
+{
+	unsigned int nr_conntracks = atomic_read(&ip_conntrack_count);
+	struct ip_conntrack_stat *st = v;
+
+	if (v == SEQ_START_TOKEN) {
+		seq_printf(seq, "entries  searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error  expect_new expect_create expect_delete\n");
+		return 0;
+	}
+
+	seq_printf(seq, "%08x  %08x %08x %08x %08x %08x %08x %08x "
+			"%08x %08x %08x %08x %08x  %08x %08x %08x \n",
+		   nr_conntracks,
+		   st->searched,
+		   st->found,
+		   st->new,
+		   st->invalid,
+		   st->ignore,
+		   st->delete,
+		   st->delete_list,
+		   st->insert,
+		   st->insert_failed,
+		   st->drop,
+		   st->early_drop,
+		   st->error,
+
+		   st->expect_new,
+		   st->expect_create,
+		   st->expect_delete
+		);
+	return 0;
+}
+
+static struct seq_operations ct_cpu_seq_ops = {
+	.start  = ct_cpu_seq_start,
+	.next   = ct_cpu_seq_next,
+	.stop   = ct_cpu_seq_stop,
+	.show   = ct_cpu_seq_show,
+};
+
+static int ct_cpu_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &ct_cpu_seq_ops);
+}
+
+static struct file_operations ct_cpu_seq_fops = {
+	.owner   = THIS_MODULE,
+	.open    = ct_cpu_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release_private,
+};
+#endif
+
+static unsigned int ip_confirm(unsigned int hooknum,
+			       struct sk_buff **pskb,
+			       const struct net_device *in,
+			       const struct net_device *out,
+			       int (*okfn)(struct sk_buff *))
+{
+	struct ip_conntrack *ct;
+	enum ip_conntrack_info ctinfo;
+
+	/* This is where we call the helper: as the packet goes out. */
+	ct = ip_conntrack_get(*pskb, &ctinfo);
+	if (ct && ct->helper) {
+		unsigned int ret;
+		ret = ct->helper->help(pskb, ct, ctinfo);
+		if (ret != NF_ACCEPT)
+			return ret;
+	}
+
+	/* We've seen it coming out the other side: confirm it */
+	return ip_conntrack_confirm(pskb);
+}
+
+static unsigned int ip_conntrack_defrag(unsigned int hooknum,
+				        struct sk_buff **pskb,
+				        const struct net_device *in,
+				        const struct net_device *out,
+				        int (*okfn)(struct sk_buff *))
+{
+#if !defined(CONFIG_IP_NF_NAT) && !defined(CONFIG_IP_NF_NAT_MODULE)
+	/* Previously seen (loopback)?  Ignore.  Do this before
+           fragment check. */
+	if ((*pskb)->nfct)
+		return NF_ACCEPT;
+#endif
+
+	/* Gather fragments. */
+	if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+		*pskb = ip_ct_gather_frags(*pskb,
+		                           hooknum == NF_IP_PRE_ROUTING ? 
+					   IP_DEFRAG_CONNTRACK_IN :
+					   IP_DEFRAG_CONNTRACK_OUT);
+		if (!*pskb)
+			return NF_STOLEN;
+	}
+	return NF_ACCEPT;
+}
+
+static unsigned int ip_refrag(unsigned int hooknum,
+			      struct sk_buff **pskb,
+			      const struct net_device *in,
+			      const struct net_device *out,
+			      int (*okfn)(struct sk_buff *))
+{
+	struct rtable *rt = (struct rtable *)(*pskb)->dst;
+
+	/* We've seen it coming out the other side: confirm */
+	if (ip_confirm(hooknum, pskb, in, out, okfn) != NF_ACCEPT)
+		return NF_DROP;
+
+	/* Local packets are never produced too large for their
+	   interface.  We degfragment them at LOCAL_OUT, however,
+	   so we have to refragment them here. */
+	if ((*pskb)->len > dst_mtu(&rt->u.dst) &&
+	    !skb_shinfo(*pskb)->tso_size) {
+		/* No hook can be after us, so this should be OK. */
+		ip_fragment(*pskb, okfn);
+		return NF_STOLEN;
+	}
+	return NF_ACCEPT;
+}
+
+static unsigned int ip_conntrack_local(unsigned int hooknum,
+				       struct sk_buff **pskb,
+				       const struct net_device *in,
+				       const struct net_device *out,
+				       int (*okfn)(struct sk_buff *))
+{
+	/* root is playing with raw sockets. */
+	if ((*pskb)->len < sizeof(struct iphdr)
+	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
+		if (net_ratelimit())
+			printk("ipt_hook: happy cracking.\n");
+		return NF_ACCEPT;
+	}
+	return ip_conntrack_in(hooknum, pskb, in, out, okfn);
+}
+
+/* Connection tracking may drop packets, but never alters them, so
+   make it the first hook. */
+static struct nf_hook_ops ip_conntrack_defrag_ops = {
+	.hook		= ip_conntrack_defrag,
+	.owner		= THIS_MODULE,
+	.pf		= PF_INET,
+	.hooknum	= NF_IP_PRE_ROUTING,
+	.priority	= NF_IP_PRI_CONNTRACK_DEFRAG,
+};
+
+static struct nf_hook_ops ip_conntrack_in_ops = {
+	.hook		= ip_conntrack_in,
+	.owner		= THIS_MODULE,
+	.pf		= PF_INET,
+	.hooknum	= NF_IP_PRE_ROUTING,
+	.priority	= NF_IP_PRI_CONNTRACK,
+};
+
+static struct nf_hook_ops ip_conntrack_defrag_local_out_ops = {
+	.hook		= ip_conntrack_defrag,
+	.owner		= THIS_MODULE,
+	.pf		= PF_INET,
+	.hooknum	= NF_IP_LOCAL_OUT,
+	.priority	= NF_IP_PRI_CONNTRACK_DEFRAG,
+};
+
+static struct nf_hook_ops ip_conntrack_local_out_ops = {
+	.hook		= ip_conntrack_local,
+	.owner		= THIS_MODULE,
+	.pf		= PF_INET,
+	.hooknum	= NF_IP_LOCAL_OUT,
+	.priority	= NF_IP_PRI_CONNTRACK,
+};
+
+/* Refragmenter; last chance. */
+static struct nf_hook_ops ip_conntrack_out_ops = {
+	.hook		= ip_refrag,
+	.owner		= THIS_MODULE,
+	.pf		= PF_INET,
+	.hooknum	= NF_IP_POST_ROUTING,
+	.priority	= NF_IP_PRI_LAST,
+};
+
+static struct nf_hook_ops ip_conntrack_local_in_ops = {
+	.hook		= ip_confirm,
+	.owner		= THIS_MODULE,
+	.pf		= PF_INET,
+	.hooknum	= NF_IP_LOCAL_IN,
+	.priority	= NF_IP_PRI_LAST-1,
+};
+
+/* Sysctl support */
+
+#ifdef CONFIG_SYSCTL
+
+/* From ip_conntrack_core.c */
+extern int ip_conntrack_max;
+extern unsigned int ip_conntrack_htable_size;
+
+/* From ip_conntrack_proto_tcp.c */
+extern unsigned long ip_ct_tcp_timeout_syn_sent;
+extern unsigned long ip_ct_tcp_timeout_syn_recv;
+extern unsigned long ip_ct_tcp_timeout_established;
+extern unsigned long ip_ct_tcp_timeout_fin_wait;
+extern unsigned long ip_ct_tcp_timeout_close_wait;
+extern unsigned long ip_ct_tcp_timeout_last_ack;
+extern unsigned long ip_ct_tcp_timeout_time_wait;
+extern unsigned long ip_ct_tcp_timeout_close;
+extern unsigned long ip_ct_tcp_timeout_max_retrans;
+extern int ip_ct_tcp_loose;
+extern int ip_ct_tcp_be_liberal;
+extern int ip_ct_tcp_max_retrans;
+
+/* From ip_conntrack_proto_udp.c */
+extern unsigned long ip_ct_udp_timeout;
+extern unsigned long ip_ct_udp_timeout_stream;
+
+/* From ip_conntrack_proto_icmp.c */
+extern unsigned long ip_ct_icmp_timeout;
+
+/* From ip_conntrack_proto_icmp.c */
+extern unsigned long ip_ct_generic_timeout;
+
+/* Log invalid packets of a given protocol */
+static int log_invalid_proto_min = 0;
+static int log_invalid_proto_max = 255;
+
+static struct ctl_table_header *ip_ct_sysctl_header;
+
+static ctl_table ip_ct_sysctl_table[] = {
+	{
+		.ctl_name	= NET_IPV4_NF_CONNTRACK_MAX,
+		.procname	= "ip_conntrack_max",
+		.data		= &ip_conntrack_max,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_IPV4_NF_CONNTRACK_COUNT,
+		.procname	= "ip_conntrack_count",
+		.data		= &ip_conntrack_count,
+		.maxlen		= sizeof(int),
+		.mode		= 0444,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_IPV4_NF_CONNTRACK_BUCKETS,
+		.procname	= "ip_conntrack_buckets",
+		.data		= &ip_conntrack_htable_size,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0444,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT,
+		.procname	= "ip_conntrack_tcp_timeout_syn_sent",
+		.data		= &ip_ct_tcp_timeout_syn_sent,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV,
+		.procname	= "ip_conntrack_tcp_timeout_syn_recv",
+		.data		= &ip_ct_tcp_timeout_syn_recv,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED,
+		.procname	= "ip_conntrack_tcp_timeout_established",
+		.data		= &ip_ct_tcp_timeout_established,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT,
+		.procname	= "ip_conntrack_tcp_timeout_fin_wait",
+		.data		= &ip_ct_tcp_timeout_fin_wait,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT,
+		.procname	= "ip_conntrack_tcp_timeout_close_wait",
+		.data		= &ip_ct_tcp_timeout_close_wait,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK,
+		.procname	= "ip_conntrack_tcp_timeout_last_ack",
+		.data		= &ip_ct_tcp_timeout_last_ack,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT,
+		.procname	= "ip_conntrack_tcp_timeout_time_wait",
+		.data		= &ip_ct_tcp_timeout_time_wait,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE,
+		.procname	= "ip_conntrack_tcp_timeout_close",
+		.data		= &ip_ct_tcp_timeout_close,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT,
+		.procname	= "ip_conntrack_udp_timeout",
+		.data		= &ip_ct_udp_timeout,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT_STREAM,
+		.procname	= "ip_conntrack_udp_timeout_stream",
+		.data		= &ip_ct_udp_timeout_stream,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_NF_CONNTRACK_ICMP_TIMEOUT,
+		.procname	= "ip_conntrack_icmp_timeout",
+		.data		= &ip_ct_icmp_timeout,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_NF_CONNTRACK_GENERIC_TIMEOUT,
+		.procname	= "ip_conntrack_generic_timeout",
+		.data		= &ip_ct_generic_timeout,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_NF_CONNTRACK_LOG_INVALID,
+		.procname	= "ip_conntrack_log_invalid",
+		.data		= &ip_ct_log_invalid,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &log_invalid_proto_min,
+		.extra2		= &log_invalid_proto_max,
+	},
+	{
+		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS,
+		.procname	= "ip_conntrack_tcp_timeout_max_retrans",
+		.data		= &ip_ct_tcp_timeout_max_retrans,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_LOOSE,
+		.procname	= "ip_conntrack_tcp_loose",
+		.data		= &ip_ct_tcp_loose,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL,
+		.procname	= "ip_conntrack_tcp_be_liberal",
+		.data		= &ip_ct_tcp_be_liberal,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS,
+		.procname	= "ip_conntrack_tcp_max_retrans",
+		.data		= &ip_ct_tcp_max_retrans,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{ .ctl_name = 0 }
+};
+
+#define NET_IP_CONNTRACK_MAX 2089
+
+static ctl_table ip_ct_netfilter_table[] = {
+	{
+		.ctl_name	= NET_IPV4_NETFILTER,
+		.procname	= "netfilter",
+		.mode		= 0555,
+		.child		= ip_ct_sysctl_table,
+	},
+	{
+		.ctl_name	= NET_IP_CONNTRACK_MAX,
+		.procname	= "ip_conntrack_max",
+		.data		= &ip_conntrack_max,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table ip_ct_ipv4_table[] = {
+	{
+		.ctl_name	= NET_IPV4,
+		.procname	= "ipv4",
+		.mode		= 0555,
+		.child		= ip_ct_netfilter_table,
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table ip_ct_net_table[] = {
+	{
+		.ctl_name	= CTL_NET,
+		.procname	= "net",
+		.mode		= 0555, 
+		.child		= ip_ct_ipv4_table,
+	},
+	{ .ctl_name = 0 }
+};
+
+EXPORT_SYMBOL(ip_ct_log_invalid);
+#endif /* CONFIG_SYSCTL */
+
+static int init_or_cleanup(int init)
+{
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry *proc, *proc_exp, *proc_stat;
+#endif
+	int ret = 0;
+
+	if (!init) goto cleanup;
+
+	ret = ip_conntrack_init();
+	if (ret < 0)
+		goto cleanup_nothing;
+
+#ifdef CONFIG_PROC_FS
+	ret = -ENOMEM;
+	proc = proc_net_fops_create("ip_conntrack", 0440, &ct_file_ops);
+	if (!proc) goto cleanup_init;
+
+	proc_exp = proc_net_fops_create("ip_conntrack_expect", 0440,
+					&exp_file_ops);
+	if (!proc_exp) goto cleanup_proc;
+
+	proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat);
+	if (!proc_stat)
+		goto cleanup_proc_exp;
+
+	proc_stat->proc_fops = &ct_cpu_seq_fops;
+	proc_stat->owner = THIS_MODULE;
+#endif
+
+	ret = nf_register_hook(&ip_conntrack_defrag_ops);
+	if (ret < 0) {
+		printk("ip_conntrack: can't register pre-routing defrag hook.\n");
+		goto cleanup_proc_stat;
+	}
+	ret = nf_register_hook(&ip_conntrack_defrag_local_out_ops);
+	if (ret < 0) {
+		printk("ip_conntrack: can't register local_out defrag hook.\n");
+		goto cleanup_defragops;
+	}
+	ret = nf_register_hook(&ip_conntrack_in_ops);
+	if (ret < 0) {
+		printk("ip_conntrack: can't register pre-routing hook.\n");
+		goto cleanup_defraglocalops;
+	}
+	ret = nf_register_hook(&ip_conntrack_local_out_ops);
+	if (ret < 0) {
+		printk("ip_conntrack: can't register local out hook.\n");
+		goto cleanup_inops;
+	}
+	ret = nf_register_hook(&ip_conntrack_out_ops);
+	if (ret < 0) {
+		printk("ip_conntrack: can't register post-routing hook.\n");
+		goto cleanup_inandlocalops;
+	}
+	ret = nf_register_hook(&ip_conntrack_local_in_ops);
+	if (ret < 0) {
+		printk("ip_conntrack: can't register local in hook.\n");
+		goto cleanup_inoutandlocalops;
+	}
+#ifdef CONFIG_SYSCTL
+	ip_ct_sysctl_header = register_sysctl_table(ip_ct_net_table, 0);
+	if (ip_ct_sysctl_header == NULL) {
+		printk("ip_conntrack: can't register to sysctl.\n");
+		ret = -ENOMEM;
+		goto cleanup_localinops;
+	}
+#endif
+
+	return ret;
+
+ cleanup:
+#ifdef CONFIG_SYSCTL
+ 	unregister_sysctl_table(ip_ct_sysctl_header);
+ cleanup_localinops:
+#endif
+	nf_unregister_hook(&ip_conntrack_local_in_ops);
+ cleanup_inoutandlocalops:
+	nf_unregister_hook(&ip_conntrack_out_ops);
+ cleanup_inandlocalops:
+	nf_unregister_hook(&ip_conntrack_local_out_ops);
+ cleanup_inops:
+	nf_unregister_hook(&ip_conntrack_in_ops);
+ cleanup_defraglocalops:
+	nf_unregister_hook(&ip_conntrack_defrag_local_out_ops);
+ cleanup_defragops:
+	nf_unregister_hook(&ip_conntrack_defrag_ops);
+ cleanup_proc_stat:
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("ip_conntrack", proc_net_stat);
+ cleanup_proc_exp:
+	proc_net_remove("ip_conntrack_expect");
+ cleanup_proc:
+	proc_net_remove("ip_conntrack");
+ cleanup_init:
+#endif /* CONFIG_PROC_FS */
+	ip_conntrack_cleanup();
+ cleanup_nothing:
+	return ret;
+}
+
+/* FIXME: Allow NULL functions and sub in pointers to generic for
+   them. --RR */
+int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto)
+{
+	int ret = 0;
+
+	WRITE_LOCK(&ip_conntrack_lock);
+	if (ip_ct_protos[proto->proto] != &ip_conntrack_generic_protocol) {
+		ret = -EBUSY;
+		goto out;
+	}
+	ip_ct_protos[proto->proto] = proto;
+ out:
+	WRITE_UNLOCK(&ip_conntrack_lock);
+	return ret;
+}
+
+void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto)
+{
+	WRITE_LOCK(&ip_conntrack_lock);
+	ip_ct_protos[proto->proto] = &ip_conntrack_generic_protocol;
+	WRITE_UNLOCK(&ip_conntrack_lock);
+	
+	/* Somebody could be still looking at the proto in bh. */
+	synchronize_net();
+
+	/* Remove all contrack entries for this protocol */
+	ip_ct_iterate_cleanup(kill_proto, &proto->proto);
+}
+
+static int __init init(void)
+{
+	return init_or_cleanup(1);
+}
+
+static void __exit fini(void)
+{
+	init_or_cleanup(0);
+}
+
+module_init(init);
+module_exit(fini);
+
+/* Some modules need us, but don't depend directly on any symbol.
+   They should call this. */
+void need_ip_conntrack(void)
+{
+}
+
+EXPORT_SYMBOL(ip_conntrack_protocol_register);
+EXPORT_SYMBOL(ip_conntrack_protocol_unregister);
+EXPORT_SYMBOL(ip_ct_get_tuple);
+EXPORT_SYMBOL(invert_tuplepr);
+EXPORT_SYMBOL(ip_conntrack_alter_reply);
+EXPORT_SYMBOL(ip_conntrack_destroyed);
+EXPORT_SYMBOL(need_ip_conntrack);
+EXPORT_SYMBOL(ip_conntrack_helper_register);
+EXPORT_SYMBOL(ip_conntrack_helper_unregister);
+EXPORT_SYMBOL(ip_ct_iterate_cleanup);
+EXPORT_SYMBOL(ip_ct_refresh_acct);
+EXPORT_SYMBOL(ip_ct_protos);
+EXPORT_SYMBOL(ip_ct_find_proto);
+EXPORT_SYMBOL(ip_conntrack_expect_alloc);
+EXPORT_SYMBOL(ip_conntrack_expect_free);
+EXPORT_SYMBOL(ip_conntrack_expect_related);
+EXPORT_SYMBOL(ip_conntrack_unexpect_related);
+EXPORT_SYMBOL(ip_conntrack_tuple_taken);
+EXPORT_SYMBOL(ip_ct_gather_frags);
+EXPORT_SYMBOL(ip_conntrack_htable_size);
+EXPORT_SYMBOL(ip_conntrack_lock);
+EXPORT_SYMBOL(ip_conntrack_hash);
+EXPORT_SYMBOL(ip_conntrack_untracked);
+EXPORT_SYMBOL_GPL(ip_conntrack_find_get);
+EXPORT_SYMBOL_GPL(ip_conntrack_put);
+#ifdef CONFIG_IP_NF_NAT_NEEDED
+EXPORT_SYMBOL(ip_conntrack_tcp_update);
+#endif
diff --git a/net/ipv4/netfilter/ip_conntrack_tftp.c b/net/ipv4/netfilter/ip_conntrack_tftp.c
new file mode 100644
index 00000000000..992fac3e36e
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_tftp.c
@@ -0,0 +1,159 @@
+/* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Version: 0.0.7
+ *
+ * Thu 21 Mar 2002 Harald Welte <laforge@gnumonks.org>
+ * 	- port to newnat API
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_tftp.h>
+#include <linux/moduleparam.h>
+
+MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
+MODULE_DESCRIPTION("tftp connection tracking helper");
+MODULE_LICENSE("GPL");
+
+#define MAX_PORTS 8
+static int ports[MAX_PORTS];
+static int ports_c;
+module_param_array(ports, int, &ports_c, 0400);
+MODULE_PARM_DESC(ports, "port numbers of tftp servers");
+
+#if 0
+#define DEBUGP(format, args...) printk("%s:%s:" format, \
+                                       __FILE__, __FUNCTION__ , ## args)
+#else
+#define DEBUGP(format, args...)
+#endif
+
+unsigned int (*ip_nat_tftp_hook)(struct sk_buff **pskb,
+				 enum ip_conntrack_info ctinfo,
+				 struct ip_conntrack_expect *exp);
+EXPORT_SYMBOL_GPL(ip_nat_tftp_hook);
+
+static int tftp_help(struct sk_buff **pskb,
+		     struct ip_conntrack *ct,
+		     enum ip_conntrack_info ctinfo)
+{
+	struct tftphdr _tftph, *tfh;
+	struct ip_conntrack_expect *exp;
+	unsigned int ret = NF_ACCEPT;
+
+	tfh = skb_header_pointer(*pskb,
+				 (*pskb)->nh.iph->ihl*4+sizeof(struct udphdr),
+				 sizeof(_tftph), &_tftph);
+	if (tfh == NULL)
+		return NF_ACCEPT;
+
+	switch (ntohs(tfh->opcode)) {
+	/* RRQ and WRQ works the same way */
+	case TFTP_OPCODE_READ:
+	case TFTP_OPCODE_WRITE:
+		DEBUGP("");
+		DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+		DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+
+		exp = ip_conntrack_expect_alloc();
+		if (exp == NULL)
+			return NF_DROP;
+
+		exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+		exp->mask.src.ip = 0xffffffff;
+		exp->mask.dst.ip = 0xffffffff;
+		exp->mask.dst.u.udp.port = 0xffff;
+		exp->mask.dst.protonum = 0xff;
+		exp->expectfn = NULL;
+		exp->master = ct;
+
+		DEBUGP("expect: ");
+		DUMP_TUPLE(&exp->tuple);
+		DUMP_TUPLE(&exp->mask);
+		if (ip_nat_tftp_hook)
+			ret = ip_nat_tftp_hook(pskb, ctinfo, exp);
+		else if (ip_conntrack_expect_related(exp) != 0) {
+			ip_conntrack_expect_free(exp);
+			ret = NF_DROP;
+		}
+		break;
+	case TFTP_OPCODE_DATA:
+	case TFTP_OPCODE_ACK:
+		DEBUGP("Data/ACK opcode\n");
+		break;
+	case TFTP_OPCODE_ERROR:
+		DEBUGP("Error opcode\n");
+		break;
+	default:
+		DEBUGP("Unknown opcode\n");
+	}
+	return NF_ACCEPT;
+}
+
+static struct ip_conntrack_helper tftp[MAX_PORTS];
+static char tftp_names[MAX_PORTS][10];
+
+static void fini(void)
+{
+	int i;
+
+	for (i = 0 ; i < ports_c; i++) {
+		DEBUGP("unregistering helper for port %d\n",
+			ports[i]);
+		ip_conntrack_helper_unregister(&tftp[i]);
+	} 
+}
+
+static int __init init(void)
+{
+	int i, ret;
+	char *tmpname;
+
+	if (ports_c == 0)
+		ports[ports_c++] = TFTP_PORT;
+
+	for (i = 0; i < ports_c; i++) {
+		/* Create helper structure */
+		memset(&tftp[i], 0, sizeof(struct ip_conntrack_helper));
+
+		tftp[i].tuple.dst.protonum = IPPROTO_UDP;
+		tftp[i].tuple.src.u.udp.port = htons(ports[i]);
+		tftp[i].mask.dst.protonum = 0xFF;
+		tftp[i].mask.src.u.udp.port = 0xFFFF;
+		tftp[i].max_expected = 1;
+		tftp[i].timeout = 5 * 60; /* 5 minutes */
+		tftp[i].me = THIS_MODULE;
+		tftp[i].help = tftp_help;
+
+		tmpname = &tftp_names[i][0];
+		if (ports[i] == TFTP_PORT)
+			sprintf(tmpname, "tftp");
+		else
+			sprintf(tmpname, "tftp-%d", i);
+		tftp[i].name = tmpname;
+
+		DEBUGP("port #%d: %d\n", i, ports[i]);
+
+		ret=ip_conntrack_helper_register(&tftp[i]);
+		if (ret) {
+			printk("ERROR registering helper for port %d\n",
+				ports[i]);
+			fini();
+			return(ret);
+		}
+	}
+	return(0);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_nat_amanda.c b/net/ipv4/netfilter/ip_nat_amanda.c
new file mode 100644
index 00000000000..da1f412583e
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_amanda.c
@@ -0,0 +1,88 @@
+/* Amanda extension for TCP NAT alteration.
+ * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>
+ * based on a copy of HW's ip_nat_irc.c as well as other modules
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ *
+ *	Module load syntax:
+ * 	insmod ip_nat_amanda.o
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_nat_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_amanda.h>
+
+
+MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
+MODULE_DESCRIPTION("Amanda NAT helper");
+MODULE_LICENSE("GPL");
+
+static unsigned int help(struct sk_buff **pskb,
+			 enum ip_conntrack_info ctinfo,
+			 unsigned int matchoff,
+			 unsigned int matchlen,
+			 struct ip_conntrack_expect *exp)
+{
+	char buffer[sizeof("65535")];
+	u_int16_t port;
+	unsigned int ret;
+
+	/* Connection comes from client. */
+	exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+	exp->dir = IP_CT_DIR_ORIGINAL;
+
+	/* When you see the packet, we need to NAT it the same as the
+	 * this one (ie. same IP: it will be TCP and master is UDP). */
+	exp->expectfn = ip_nat_follow_master;
+
+	/* Try to get same port: if not, try to change it. */
+	for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
+		exp->tuple.dst.u.tcp.port = htons(port);
+		if (ip_conntrack_expect_related(exp) == 0)
+			break;
+	}
+
+	if (port == 0) {
+		ip_conntrack_expect_free(exp);
+		return NF_DROP;
+	}
+
+	sprintf(buffer, "%u", port);
+	ret = ip_nat_mangle_udp_packet(pskb, exp->master, ctinfo,
+				       matchoff, matchlen,
+				       buffer, strlen(buffer));
+	if (ret != NF_ACCEPT)
+		ip_conntrack_unexpect_related(exp);
+	return ret;
+}
+
+static void __exit fini(void)
+{
+	ip_nat_amanda_hook = NULL;
+	/* Make sure noone calls it, meanwhile. */
+	synchronize_net();
+}
+
+static int __init init(void)
+{
+	BUG_ON(ip_nat_amanda_hook);
+	ip_nat_amanda_hook = help;
+	return 0;
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
new file mode 100644
index 00000000000..162ceacfc29
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -0,0 +1,556 @@
+/* NAT for netfilter; shared with compatibility layer. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/vmalloc.h>
+#include <net/checksum.h>
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/tcp.h>  /* For tcp_prot in getorigdst */
+#include <linux/icmp.h>
+#include <linux/udp.h>
+#include <linux/jhash.h>
+
+#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock)
+#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock)
+
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_nat_protocol.h>
+#include <linux/netfilter_ipv4/ip_nat_core.h>
+#include <linux/netfilter_ipv4/ip_nat_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/listhelp.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+DECLARE_RWLOCK(ip_nat_lock);
+
+/* Calculated at init based on memory size */
+static unsigned int ip_nat_htable_size;
+
+static struct list_head *bysource;
+struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO];
+
+
+/* We keep an extra hash for each conntrack, for fast searching. */
+static inline unsigned int
+hash_by_src(const struct ip_conntrack_tuple *tuple)
+{
+	/* Original src, to ensure we map it consistently if poss. */
+	return jhash_3words(tuple->src.ip, tuple->src.u.all,
+			    tuple->dst.protonum, 0) % ip_nat_htable_size;
+}
+
+/* Noone using conntrack by the time this called. */
+static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn)
+{
+	if (!(conn->status & IPS_NAT_DONE_MASK))
+		return;
+
+	WRITE_LOCK(&ip_nat_lock);
+	list_del(&conn->nat.info.bysource);
+	WRITE_UNLOCK(&ip_nat_lock);
+}
+
+/* We do checksum mangling, so if they were wrong before they're still
+ * wrong.  Also works for incomplete packets (eg. ICMP dest
+ * unreachables.) */
+u_int16_t
+ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck)
+{
+	u_int32_t diffs[] = { oldvalinv, newval };
+	return csum_fold(csum_partial((char *)diffs, sizeof(diffs),
+				      oldcheck^0xFFFF));
+}
+
+/* Is this tuple already taken? (not by us) */
+int
+ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
+		  const struct ip_conntrack *ignored_conntrack)
+{
+	/* Conntrack tracking doesn't keep track of outgoing tuples; only
+	   incoming ones.  NAT means they don't have a fixed mapping,
+	   so we invert the tuple and look for the incoming reply.
+
+	   We could keep a separate hash if this proves too slow. */
+	struct ip_conntrack_tuple reply;
+
+	invert_tuplepr(&reply, tuple);
+	return ip_conntrack_tuple_taken(&reply, ignored_conntrack);
+}
+
+/* If we source map this tuple so reply looks like reply_tuple, will
+ * that meet the constraints of range. */
+static int
+in_range(const struct ip_conntrack_tuple *tuple,
+	 const struct ip_nat_range *range)
+{
+	struct ip_nat_protocol *proto = ip_nat_find_proto(tuple->dst.protonum);
+
+	/* If we are supposed to map IPs, then we must be in the
+	   range specified, otherwise let this drag us onto a new src IP. */
+	if (range->flags & IP_NAT_RANGE_MAP_IPS) {
+		if (ntohl(tuple->src.ip) < ntohl(range->min_ip)
+		    || ntohl(tuple->src.ip) > ntohl(range->max_ip))
+			return 0;
+	}
+
+	if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)
+	    || proto->in_range(tuple, IP_NAT_MANIP_SRC,
+			       &range->min, &range->max))
+		return 1;
+
+	return 0;
+}
+
+static inline int
+same_src(const struct ip_conntrack *ct,
+	 const struct ip_conntrack_tuple *tuple)
+{
+	return (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum
+		== tuple->dst.protonum
+		&& ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip
+		== tuple->src.ip
+		&& ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.all
+		== tuple->src.u.all);
+}
+
+/* Only called for SRC manip */
+static int
+find_appropriate_src(const struct ip_conntrack_tuple *tuple,
+		     struct ip_conntrack_tuple *result,
+		     const struct ip_nat_range *range)
+{
+	unsigned int h = hash_by_src(tuple);
+	struct ip_conntrack *ct;
+
+	READ_LOCK(&ip_nat_lock);
+	list_for_each_entry(ct, &bysource[h], nat.info.bysource) {
+		if (same_src(ct, tuple)) {
+			/* Copy source part from reply tuple. */
+			invert_tuplepr(result,
+				       &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+			result->dst = tuple->dst;
+
+			if (in_range(result, range)) {
+				READ_UNLOCK(&ip_nat_lock);
+				return 1;
+			}
+		}
+	}
+	READ_UNLOCK(&ip_nat_lock);
+	return 0;
+}
+
+/* For [FUTURE] fragmentation handling, we want the least-used
+   src-ip/dst-ip/proto triple.  Fairness doesn't come into it.  Thus
+   if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports
+   1-65535, we don't do pro-rata allocation based on ports; we choose
+   the ip with the lowest src-ip/dst-ip/proto usage.
+*/
+static void
+find_best_ips_proto(struct ip_conntrack_tuple *tuple,
+		    const struct ip_nat_range *range,
+		    const struct ip_conntrack *conntrack,
+		    enum ip_nat_manip_type maniptype)
+{
+	u_int32_t *var_ipp;
+	/* Host order */
+	u_int32_t minip, maxip, j;
+
+	/* No IP mapping?  Do nothing. */
+	if (!(range->flags & IP_NAT_RANGE_MAP_IPS))
+		return;
+
+	if (maniptype == IP_NAT_MANIP_SRC)
+		var_ipp = &tuple->src.ip;
+	else
+		var_ipp = &tuple->dst.ip;
+
+	/* Fast path: only one choice. */
+	if (range->min_ip == range->max_ip) {
+		*var_ipp = range->min_ip;
+		return;
+	}
+
+	/* Hashing source and destination IPs gives a fairly even
+	 * spread in practice (if there are a small number of IPs
+	 * involved, there usually aren't that many connections
+	 * anyway).  The consistency means that servers see the same
+	 * client coming from the same IP (some Internet Banking sites
+	 * like this), even across reboots. */
+	minip = ntohl(range->min_ip);
+	maxip = ntohl(range->max_ip);
+	j = jhash_2words(tuple->src.ip, tuple->dst.ip, 0);
+	*var_ipp = htonl(minip + j % (maxip - minip + 1));
+}
+
+/* Manipulate the tuple into the range given.  For NF_IP_POST_ROUTING,
+ * we change the source to map into the range.  For NF_IP_PRE_ROUTING
+ * and NF_IP_LOCAL_OUT, we change the destination to map into the
+ * range.  It might not be possible to get a unique tuple, but we try.
+ * At worst (or if we race), we will end up with a final duplicate in
+ * __ip_conntrack_confirm and drop the packet. */
+static void
+get_unique_tuple(struct ip_conntrack_tuple *tuple,
+		 const struct ip_conntrack_tuple *orig_tuple,
+		 const struct ip_nat_range *range,
+		 struct ip_conntrack *conntrack,
+		 enum ip_nat_manip_type maniptype)
+{
+	struct ip_nat_protocol *proto
+		= ip_nat_find_proto(orig_tuple->dst.protonum);
+
+	/* 1) If this srcip/proto/src-proto-part is currently mapped,
+	   and that same mapping gives a unique tuple within the given
+	   range, use that.
+
+	   This is only required for source (ie. NAT/masq) mappings.
+	   So far, we don't do local source mappings, so multiple
+	   manips not an issue.  */
+	if (maniptype == IP_NAT_MANIP_SRC) {
+		if (find_appropriate_src(orig_tuple, tuple, range)) {
+			DEBUGP("get_unique_tuple: Found current src map\n");
+			if (!ip_nat_used_tuple(tuple, conntrack))
+				return;
+		}
+	}
+
+	/* 2) Select the least-used IP/proto combination in the given
+	   range. */
+	*tuple = *orig_tuple;
+	find_best_ips_proto(tuple, range, conntrack, maniptype);
+
+	/* 3) The per-protocol part of the manip is made to map into
+	   the range to make a unique tuple. */
+
+	/* Only bother mapping if it's not already in range and unique */
+	if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)
+	     || proto->in_range(tuple, maniptype, &range->min, &range->max))
+	    && !ip_nat_used_tuple(tuple, conntrack))
+		return;
+
+	/* Last change: get protocol to try to obtain unique tuple. */
+	proto->unique_tuple(tuple, range, maniptype, conntrack);
+}
+
+unsigned int
+ip_nat_setup_info(struct ip_conntrack *conntrack,
+		  const struct ip_nat_range *range,
+		  unsigned int hooknum)
+{
+	struct ip_conntrack_tuple curr_tuple, new_tuple;
+	struct ip_nat_info *info = &conntrack->nat.info;
+	int have_to_hash = !(conntrack->status & IPS_NAT_DONE_MASK);
+	enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum);
+
+	IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
+		     || hooknum == NF_IP_POST_ROUTING
+		     || hooknum == NF_IP_LOCAL_IN
+		     || hooknum == NF_IP_LOCAL_OUT);
+	BUG_ON(ip_nat_initialized(conntrack, maniptype));
+
+	/* What we've got will look like inverse of reply. Normally
+	   this is what is in the conntrack, except for prior
+	   manipulations (future optimization: if num_manips == 0,
+	   orig_tp =
+	   conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */
+	invert_tuplepr(&curr_tuple,
+		       &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple);
+
+	get_unique_tuple(&new_tuple, &curr_tuple, range, conntrack, maniptype);
+
+	if (!ip_ct_tuple_equal(&new_tuple, &curr_tuple)) {
+		struct ip_conntrack_tuple reply;
+
+		/* Alter conntrack table so will recognize replies. */
+		invert_tuplepr(&reply, &new_tuple);
+		ip_conntrack_alter_reply(conntrack, &reply);
+
+		/* Non-atomic: we own this at the moment. */
+		if (maniptype == IP_NAT_MANIP_SRC)
+			conntrack->status |= IPS_SRC_NAT;
+		else
+			conntrack->status |= IPS_DST_NAT;
+	}
+
+	/* Place in source hash if this is the first time. */
+	if (have_to_hash) {
+		unsigned int srchash
+			= hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
+				      .tuple);
+		WRITE_LOCK(&ip_nat_lock);
+		list_add(&info->bysource, &bysource[srchash]);
+		WRITE_UNLOCK(&ip_nat_lock);
+	}
+
+	/* It's done. */
+	if (maniptype == IP_NAT_MANIP_DST)
+		set_bit(IPS_DST_NAT_DONE_BIT, &conntrack->status);
+	else
+		set_bit(IPS_SRC_NAT_DONE_BIT, &conntrack->status);
+
+	return NF_ACCEPT;
+}
+
+/* Returns true if succeeded. */
+static int
+manip_pkt(u_int16_t proto,
+	  struct sk_buff **pskb,
+	  unsigned int iphdroff,
+	  const struct ip_conntrack_tuple *target,
+	  enum ip_nat_manip_type maniptype)
+{
+	struct iphdr *iph;
+
+	(*pskb)->nfcache |= NFC_ALTERED;
+	if (!skb_ip_make_writable(pskb, iphdroff + sizeof(*iph)))
+		return 0;
+
+	iph = (void *)(*pskb)->data + iphdroff;
+
+	/* Manipulate protcol part. */
+	if (!ip_nat_find_proto(proto)->manip_pkt(pskb, iphdroff,
+	                                         target, maniptype))
+		return 0;
+
+	iph = (void *)(*pskb)->data + iphdroff;
+
+	if (maniptype == IP_NAT_MANIP_SRC) {
+		iph->check = ip_nat_cheat_check(~iph->saddr, target->src.ip,
+						iph->check);
+		iph->saddr = target->src.ip;
+	} else {
+		iph->check = ip_nat_cheat_check(~iph->daddr, target->dst.ip,
+						iph->check);
+		iph->daddr = target->dst.ip;
+	}
+	return 1;
+}
+
+/* Do packet manipulations according to ip_nat_setup_info. */
+unsigned int nat_packet(struct ip_conntrack *ct,
+			enum ip_conntrack_info ctinfo,
+			unsigned int hooknum,
+			struct sk_buff **pskb)
+{
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	unsigned long statusbit;
+	enum ip_nat_manip_type mtype = HOOK2MANIP(hooknum);
+
+	if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)
+	    && (hooknum == NF_IP_POST_ROUTING || hooknum == NF_IP_LOCAL_IN)) {
+		DEBUGP("ip_nat_core: adjusting sequence number\n");
+		/* future: put this in a l4-proto specific function,
+		 * and call this function here. */
+		if (!ip_nat_seq_adjust(pskb, ct, ctinfo))
+			return NF_DROP;
+	}
+
+	if (mtype == IP_NAT_MANIP_SRC)
+		statusbit = IPS_SRC_NAT;
+	else
+		statusbit = IPS_DST_NAT;
+
+	/* Invert if this is reply dir. */
+	if (dir == IP_CT_DIR_REPLY)
+		statusbit ^= IPS_NAT_MASK;
+
+	/* Non-atomic: these bits don't change. */
+	if (ct->status & statusbit) {
+		struct ip_conntrack_tuple target;
+
+		/* We are aiming to look like inverse of other direction. */
+		invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
+
+		if (!manip_pkt(target.dst.protonum, pskb, 0, &target, mtype))
+			return NF_DROP;
+	}
+	return NF_ACCEPT;
+}
+
+/* Dir is direction ICMP is coming from (opposite to packet it contains) */
+int icmp_reply_translation(struct sk_buff **pskb,
+			   struct ip_conntrack *ct,
+			   enum ip_nat_manip_type manip,
+			   enum ip_conntrack_dir dir)
+{
+	struct {
+		struct icmphdr icmp;
+		struct iphdr ip;
+	} *inside;
+	struct ip_conntrack_tuple inner, target;
+	int hdrlen = (*pskb)->nh.iph->ihl * 4;
+
+	if (!skb_ip_make_writable(pskb, hdrlen + sizeof(*inside)))
+		return 0;
+
+	inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
+
+	/* We're actually going to mangle it beyond trivial checksum
+	   adjustment, so make sure the current checksum is correct. */
+	if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY) {
+		hdrlen = (*pskb)->nh.iph->ihl * 4;
+		if ((u16)csum_fold(skb_checksum(*pskb, hdrlen,
+						(*pskb)->len - hdrlen, 0)))
+			return 0;
+	}
+
+	/* Must be RELATED */
+	IP_NF_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED ||
+		     (*pskb)->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY);
+
+	/* Redirects on non-null nats must be dropped, else they'll
+           start talking to each other without our translation, and be
+           confused... --RR */
+	if (inside->icmp.type == ICMP_REDIRECT) {
+		/* If NAT isn't finished, assume it and drop. */
+		if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
+			return 0;
+
+		if (ct->status & IPS_NAT_MASK)
+			return 0;
+	}
+
+	DEBUGP("icmp_reply_translation: translating error %p manp %u dir %s\n",
+	       *pskb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
+
+	if (!ip_ct_get_tuple(&inside->ip, *pskb, (*pskb)->nh.iph->ihl*4 +
+	                     sizeof(struct icmphdr) + inside->ip.ihl*4,
+	                     &inner, ip_ct_find_proto(inside->ip.protocol)))
+		return 0;
+
+	/* Change inner back to look like incoming packet.  We do the
+	   opposite manip on this hook to normal, because it might not
+	   pass all hooks (locally-generated ICMP).  Consider incoming
+	   packet: PREROUTING (DST manip), routing produces ICMP, goes
+	   through POSTROUTING (which must correct the DST manip). */
+	if (!manip_pkt(inside->ip.protocol, pskb,
+		       (*pskb)->nh.iph->ihl*4
+		       + sizeof(inside->icmp),
+		       &ct->tuplehash[!dir].tuple,
+		       !manip))
+		return 0;
+
+	/* Reloading "inside" here since manip_pkt inner. */
+	inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
+	inside->icmp.checksum = 0;
+	inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen,
+						       (*pskb)->len - hdrlen,
+						       0));
+
+	/* Change outer to look the reply to an incoming packet
+	 * (proto 0 means don't invert per-proto part). */
+
+	/* Obviously, we need to NAT destination IP, but source IP
+	   should be NAT'ed only if it is from a NAT'd host.
+
+	   Explanation: some people use NAT for anonymizing.  Also,
+	   CERT recommends dropping all packets from private IP
+	   addresses (although ICMP errors from internal links with
+	   such addresses are not too uncommon, as Alan Cox points
+	   out) */
+	if (manip != IP_NAT_MANIP_SRC
+	    || ((*pskb)->nh.iph->saddr == ct->tuplehash[dir].tuple.src.ip)) {
+		invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
+		if (!manip_pkt(0, pskb, 0, &target, manip))
+			return 0;
+	}
+
+	return 1;
+}
+
+/* Protocol registration. */
+int ip_nat_protocol_register(struct ip_nat_protocol *proto)
+{
+	int ret = 0;
+
+	WRITE_LOCK(&ip_nat_lock);
+	if (ip_nat_protos[proto->protonum] != &ip_nat_unknown_protocol) {
+		ret = -EBUSY;
+		goto out;
+	}
+	ip_nat_protos[proto->protonum] = proto;
+ out:
+	WRITE_UNLOCK(&ip_nat_lock);
+	return ret;
+}
+
+/* Noone stores the protocol anywhere; simply delete it. */
+void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
+{
+	WRITE_LOCK(&ip_nat_lock);
+	ip_nat_protos[proto->protonum] = &ip_nat_unknown_protocol;
+	WRITE_UNLOCK(&ip_nat_lock);
+
+	/* Someone could be still looking at the proto in a bh. */
+	synchronize_net();
+}
+
+int __init ip_nat_init(void)
+{
+	size_t i;
+
+	/* Leave them the same for the moment. */
+	ip_nat_htable_size = ip_conntrack_htable_size;
+
+	/* One vmalloc for both hash tables */
+	bysource = vmalloc(sizeof(struct list_head) * ip_nat_htable_size);
+	if (!bysource)
+		return -ENOMEM;
+
+	/* Sew in builtin protocols. */
+	WRITE_LOCK(&ip_nat_lock);
+	for (i = 0; i < MAX_IP_NAT_PROTO; i++)
+		ip_nat_protos[i] = &ip_nat_unknown_protocol;
+	ip_nat_protos[IPPROTO_TCP] = &ip_nat_protocol_tcp;
+	ip_nat_protos[IPPROTO_UDP] = &ip_nat_protocol_udp;
+	ip_nat_protos[IPPROTO_ICMP] = &ip_nat_protocol_icmp;
+	WRITE_UNLOCK(&ip_nat_lock);
+
+	for (i = 0; i < ip_nat_htable_size; i++) {
+		INIT_LIST_HEAD(&bysource[i]);
+	}
+
+	/* FIXME: Man, this is a hack.  <SIGH> */
+	IP_NF_ASSERT(ip_conntrack_destroyed == NULL);
+	ip_conntrack_destroyed = &ip_nat_cleanup_conntrack;
+
+	/* Initialize fake conntrack so that NAT will skip it */
+	ip_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
+	return 0;
+}
+
+/* Clear NAT section of all conntracks, in case we're loaded again. */
+static int clean_nat(struct ip_conntrack *i, void *data)
+{
+	memset(&i->nat, 0, sizeof(i->nat));
+	i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST);
+	return 0;
+}
+
+/* Not __exit: called from ip_nat_standalone.c:init_or_cleanup() --RR */
+void ip_nat_cleanup(void)
+{
+	ip_ct_iterate_cleanup(&clean_nat, NULL);
+	ip_conntrack_destroyed = NULL;
+	vfree(bysource);
+}
diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c
new file mode 100644
index 00000000000..c6000e794ad
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_ftp.c
@@ -0,0 +1,183 @@
+/* FTP extension for TCP NAT alteration. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/moduleparam.h>
+#include <net/tcp.h>
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_nat_helper.h>
+#include <linux/netfilter_ipv4/ip_nat_rule.h>
+#include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
+MODULE_DESCRIPTION("ftp NAT helper");
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+/* FIXME: Time out? --RR */
+
+static int
+mangle_rfc959_packet(struct sk_buff **pskb,
+		     u_int32_t newip,
+		     u_int16_t port,
+		     unsigned int matchoff,
+		     unsigned int matchlen,
+		     struct ip_conntrack *ct,
+		     enum ip_conntrack_info ctinfo,
+		     u32 *seq)
+{
+	char buffer[sizeof("nnn,nnn,nnn,nnn,nnn,nnn")];
+
+	sprintf(buffer, "%u,%u,%u,%u,%u,%u",
+		NIPQUAD(newip), port>>8, port&0xFF);
+
+	DEBUGP("calling ip_nat_mangle_tcp_packet\n");
+
+	*seq += strlen(buffer) - matchlen;
+	return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff, 
+					matchlen, buffer, strlen(buffer));
+}
+
+/* |1|132.235.1.2|6275| */
+static int
+mangle_eprt_packet(struct sk_buff **pskb,
+		   u_int32_t newip,
+		   u_int16_t port,
+		   unsigned int matchoff,
+		   unsigned int matchlen,
+		   struct ip_conntrack *ct,
+		   enum ip_conntrack_info ctinfo,
+		   u32 *seq)
+{
+	char buffer[sizeof("|1|255.255.255.255|65535|")];
+
+	sprintf(buffer, "|1|%u.%u.%u.%u|%u|", NIPQUAD(newip), port);
+
+	DEBUGP("calling ip_nat_mangle_tcp_packet\n");
+
+	*seq += strlen(buffer) - matchlen;
+	return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff, 
+					matchlen, buffer, strlen(buffer));
+}
+
+/* |1|132.235.1.2|6275| */
+static int
+mangle_epsv_packet(struct sk_buff **pskb,
+		   u_int32_t newip,
+		   u_int16_t port,
+		   unsigned int matchoff,
+		   unsigned int matchlen,
+		   struct ip_conntrack *ct,
+		   enum ip_conntrack_info ctinfo,
+		   u32 *seq)
+{
+	char buffer[sizeof("|||65535|")];
+
+	sprintf(buffer, "|||%u|", port);
+
+	DEBUGP("calling ip_nat_mangle_tcp_packet\n");
+
+	*seq += strlen(buffer) - matchlen;
+	return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff, 
+					matchlen, buffer, strlen(buffer));
+}
+
+static int (*mangle[])(struct sk_buff **, u_int32_t, u_int16_t,
+		     unsigned int,
+		     unsigned int,
+		     struct ip_conntrack *,
+		     enum ip_conntrack_info,
+		     u32 *seq)
+= { [IP_CT_FTP_PORT] = mangle_rfc959_packet,
+    [IP_CT_FTP_PASV] = mangle_rfc959_packet,
+    [IP_CT_FTP_EPRT] = mangle_eprt_packet,
+    [IP_CT_FTP_EPSV] = mangle_epsv_packet
+};
+
+/* So, this packet has hit the connection tracking matching code.
+   Mangle it, and change the expectation to match the new version. */
+static unsigned int ip_nat_ftp(struct sk_buff **pskb,
+			       enum ip_conntrack_info ctinfo,
+			       enum ip_ct_ftp_type type,
+			       unsigned int matchoff,
+			       unsigned int matchlen,
+			       struct ip_conntrack_expect *exp,
+			       u32 *seq)
+{
+	u_int32_t newip;
+	u_int16_t port;
+	int dir = CTINFO2DIR(ctinfo);
+	struct ip_conntrack *ct = exp->master;
+
+	DEBUGP("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen);
+
+	/* Connection will come from wherever this packet goes, hence !dir */
+	newip = ct->tuplehash[!dir].tuple.dst.ip;
+	exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+	exp->dir = !dir;
+
+	/* When you see the packet, we need to NAT it the same as the
+	 * this one. */
+	exp->expectfn = ip_nat_follow_master;
+
+	/* Try to get same port: if not, try to change it. */
+	for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
+		exp->tuple.dst.u.tcp.port = htons(port);
+		if (ip_conntrack_expect_related(exp) == 0)
+			break;
+	}
+
+	if (port == 0) {
+		ip_conntrack_expect_free(exp);
+		return NF_DROP;
+	}
+
+	if (!mangle[type](pskb, newip, port, matchoff, matchlen, ct, ctinfo,
+			  seq)) {
+		ip_conntrack_unexpect_related(exp);
+		return NF_DROP;
+	}
+	return NF_ACCEPT;
+}
+
+static void __exit fini(void)
+{
+	ip_nat_ftp_hook = NULL;
+	/* Make sure noone calls it, meanwhile. */
+	synchronize_net();
+}
+
+static int __init init(void)
+{
+	BUG_ON(ip_nat_ftp_hook);
+	ip_nat_ftp_hook = ip_nat_ftp;
+	return 0;
+}
+
+/* Prior to 2.6.11, we had a ports param.  No longer, but don't break users. */
+static int warn_set(const char *val, struct kernel_param *kp)
+{
+	printk(KERN_INFO __stringify(KBUILD_MODNAME)
+	       ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
+	return 0;
+}
+module_param_call(ports, warn_set, NULL, NULL, 0);
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c
new file mode 100644
index 00000000000..1637b96d8c0
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_helper.c
@@ -0,0 +1,430 @@
+/* ip_nat_helper.c - generic support functions for NAT helpers 
+ *
+ * (C) 2000-2002 Harald Welte <laforge@netfilter.org>
+ * (C) 2003-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * 	14 Jan 2002 Harald Welte <laforge@gnumonks.org>:
+ *		- add support for SACK adjustment 
+ *	14 Mar 2002 Harald Welte <laforge@gnumonks.org>:
+ *		- merge SACK support into newnat API
+ *	16 Aug 2002 Brian J. Murrell <netfilter@interlinx.bc.ca>:
+ *		- make ip_nat_resize_packet more generic (TCP and UDP)
+ *		- add ip_nat_mangle_udp_packet
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kmod.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/checksum.h>
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+
+#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock)
+#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock)
+
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_nat_protocol.h>
+#include <linux/netfilter_ipv4/ip_nat_core.h>
+#include <linux/netfilter_ipv4/ip_nat_helper.h>
+#include <linux/netfilter_ipv4/listhelp.h>
+
+#if 0
+#define DEBUGP printk
+#define DUMP_OFFSET(x)	printk("offset_before=%d, offset_after=%d, correction_pos=%u\n", x->offset_before, x->offset_after, x->correction_pos);
+#else
+#define DEBUGP(format, args...)
+#define DUMP_OFFSET(x)
+#endif
+
+static DECLARE_LOCK(ip_nat_seqofs_lock);
+
+/* Setup TCP sequence correction given this change at this sequence */
+static inline void 
+adjust_tcp_sequence(u32 seq,
+		    int sizediff,
+		    struct ip_conntrack *ct, 
+		    enum ip_conntrack_info ctinfo)
+{
+	int dir;
+	struct ip_nat_seq *this_way, *other_way;
+
+	DEBUGP("ip_nat_resize_packet: old_size = %u, new_size = %u\n",
+		(*skb)->len, new_size);
+
+	dir = CTINFO2DIR(ctinfo);
+
+	this_way = &ct->nat.info.seq[dir];
+	other_way = &ct->nat.info.seq[!dir];
+
+	DEBUGP("ip_nat_resize_packet: Seq_offset before: ");
+	DUMP_OFFSET(this_way);
+
+	LOCK_BH(&ip_nat_seqofs_lock);
+
+	/* SYN adjust. If it's uninitialized, or this is after last
+	 * correction, record it: we don't handle more than one
+	 * adjustment in the window, but do deal with common case of a
+	 * retransmit */
+	if (this_way->offset_before == this_way->offset_after
+	    || before(this_way->correction_pos, seq)) {
+		    this_way->correction_pos = seq;
+		    this_way->offset_before = this_way->offset_after;
+		    this_way->offset_after += sizediff;
+	}
+	UNLOCK_BH(&ip_nat_seqofs_lock);
+
+	DEBUGP("ip_nat_resize_packet: Seq_offset after: ");
+	DUMP_OFFSET(this_way);
+}
+
+/* Frobs data inside this packet, which is linear. */
+static void mangle_contents(struct sk_buff *skb,
+			    unsigned int dataoff,
+			    unsigned int match_offset,
+			    unsigned int match_len,
+			    const char *rep_buffer,
+			    unsigned int rep_len)
+{
+	unsigned char *data;
+
+	BUG_ON(skb_is_nonlinear(skb));
+	data = (unsigned char *)skb->nh.iph + dataoff;
+
+	/* move post-replacement */
+	memmove(data + match_offset + rep_len,
+		data + match_offset + match_len,
+		skb->tail - (data + match_offset + match_len));
+
+	/* insert data from buffer */
+	memcpy(data + match_offset, rep_buffer, rep_len);
+
+	/* update skb info */
+	if (rep_len > match_len) {
+		DEBUGP("ip_nat_mangle_packet: Extending packet by "
+			"%u from %u bytes\n", rep_len - match_len,
+		       skb->len);
+		skb_put(skb, rep_len - match_len);
+	} else {
+		DEBUGP("ip_nat_mangle_packet: Shrinking packet from "
+			"%u from %u bytes\n", match_len - rep_len,
+		       skb->len);
+		__skb_trim(skb, skb->len + rep_len - match_len);
+	}
+
+	/* fix IP hdr checksum information */
+	skb->nh.iph->tot_len = htons(skb->len);
+	ip_send_check(skb->nh.iph);
+}
+
+/* Unusual, but possible case. */
+static int enlarge_skb(struct sk_buff **pskb, unsigned int extra)
+{
+	struct sk_buff *nskb;
+
+	if ((*pskb)->len + extra > 65535)
+		return 0;
+
+	nskb = skb_copy_expand(*pskb, skb_headroom(*pskb), extra, GFP_ATOMIC);
+	if (!nskb)
+		return 0;
+
+	/* Transfer socket to new skb. */
+	if ((*pskb)->sk)
+		skb_set_owner_w(nskb, (*pskb)->sk);
+#ifdef CONFIG_NETFILTER_DEBUG
+	nskb->nf_debug = (*pskb)->nf_debug;
+#endif
+	kfree_skb(*pskb);
+	*pskb = nskb;
+	return 1;
+}
+
+/* Generic function for mangling variable-length address changes inside
+ * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX
+ * command in FTP).
+ *
+ * Takes care about all the nasty sequence number changes, checksumming,
+ * skb enlargement, ...
+ *
+ * */
+int 
+ip_nat_mangle_tcp_packet(struct sk_buff **pskb,
+			 struct ip_conntrack *ct,
+			 enum ip_conntrack_info ctinfo,
+			 unsigned int match_offset,
+			 unsigned int match_len,
+			 const char *rep_buffer,
+			 unsigned int rep_len)
+{
+	struct iphdr *iph;
+	struct tcphdr *tcph;
+	int datalen;
+
+	if (!skb_ip_make_writable(pskb, (*pskb)->len))
+		return 0;
+
+	if (rep_len > match_len
+	    && rep_len - match_len > skb_tailroom(*pskb)
+	    && !enlarge_skb(pskb, rep_len - match_len))
+		return 0;
+
+	SKB_LINEAR_ASSERT(*pskb);
+
+	iph = (*pskb)->nh.iph;
+	tcph = (void *)iph + iph->ihl*4;
+
+	mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4,
+			match_offset, match_len, rep_buffer, rep_len);
+
+	datalen = (*pskb)->len - iph->ihl*4;
+	tcph->check = 0;
+	tcph->check = tcp_v4_check(tcph, datalen, iph->saddr, iph->daddr,
+				   csum_partial((char *)tcph, datalen, 0));
+
+	if (rep_len != match_len) {
+		set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
+		adjust_tcp_sequence(ntohl(tcph->seq),
+				    (int)rep_len - (int)match_len,
+				    ct, ctinfo);
+		/* Tell TCP window tracking about seq change */
+		ip_conntrack_tcp_update(*pskb, ct, CTINFO2DIR(ctinfo));
+	}
+	return 1;
+}
+			
+/* Generic function for mangling variable-length address changes inside
+ * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX
+ * command in the Amanda protocol)
+ *
+ * Takes care about all the nasty sequence number changes, checksumming,
+ * skb enlargement, ...
+ *
+ * XXX - This function could be merged with ip_nat_mangle_tcp_packet which
+ *       should be fairly easy to do.
+ */
+int 
+ip_nat_mangle_udp_packet(struct sk_buff **pskb,
+			 struct ip_conntrack *ct,
+			 enum ip_conntrack_info ctinfo,
+			 unsigned int match_offset,
+			 unsigned int match_len,
+			 const char *rep_buffer,
+			 unsigned int rep_len)
+{
+	struct iphdr *iph;
+	struct udphdr *udph;
+
+	/* UDP helpers might accidentally mangle the wrong packet */
+	iph = (*pskb)->nh.iph;
+	if ((*pskb)->len < iph->ihl*4 + sizeof(*udph) + 
+	                       match_offset + match_len)
+		return 0;
+
+	if (!skb_ip_make_writable(pskb, (*pskb)->len))
+		return 0;
+
+	if (rep_len > match_len
+	    && rep_len - match_len > skb_tailroom(*pskb)
+	    && !enlarge_skb(pskb, rep_len - match_len))
+		return 0;
+
+	iph = (*pskb)->nh.iph;
+	udph = (void *)iph + iph->ihl*4;
+	mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph),
+			match_offset, match_len, rep_buffer, rep_len);
+
+	/* update the length of the UDP packet */
+	udph->len = htons((*pskb)->len - iph->ihl*4);
+
+	/* fix udp checksum if udp checksum was previously calculated */
+	if (udph->check) {
+		int datalen = (*pskb)->len - iph->ihl * 4;
+		udph->check = 0;
+		udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
+		                                datalen, IPPROTO_UDP,
+		                                csum_partial((char *)udph,
+		                                             datalen, 0));
+	}
+
+	return 1;
+}
+
+/* Adjust one found SACK option including checksum correction */
+static void
+sack_adjust(struct sk_buff *skb,
+	    struct tcphdr *tcph, 
+	    unsigned int sackoff,
+	    unsigned int sackend,
+	    struct ip_nat_seq *natseq)
+{
+	while (sackoff < sackend) {
+		struct tcp_sack_block *sack;
+		u_int32_t new_start_seq, new_end_seq;
+
+		sack = (void *)skb->data + sackoff;
+		if (after(ntohl(sack->start_seq) - natseq->offset_before,
+			  natseq->correction_pos))
+			new_start_seq = ntohl(sack->start_seq) 
+					- natseq->offset_after;
+		else
+			new_start_seq = ntohl(sack->start_seq) 
+					- natseq->offset_before;
+		new_start_seq = htonl(new_start_seq);
+
+		if (after(ntohl(sack->end_seq) - natseq->offset_before,
+			  natseq->correction_pos))
+			new_end_seq = ntohl(sack->end_seq)
+				      - natseq->offset_after;
+		else
+			new_end_seq = ntohl(sack->end_seq)
+				      - natseq->offset_before;
+		new_end_seq = htonl(new_end_seq);
+
+		DEBUGP("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
+			ntohl(sack->start_seq), new_start_seq,
+			ntohl(sack->end_seq), new_end_seq);
+
+		tcph->check = 
+			ip_nat_cheat_check(~sack->start_seq, new_start_seq,
+					   ip_nat_cheat_check(~sack->end_seq, 
+						   	      new_end_seq,
+							      tcph->check));
+		sack->start_seq = new_start_seq;
+		sack->end_seq = new_end_seq;
+		sackoff += sizeof(*sack);
+	}
+}
+
+/* TCP SACK sequence number adjustment */
+static inline unsigned int
+ip_nat_sack_adjust(struct sk_buff **pskb,
+		   struct tcphdr *tcph,
+		   struct ip_conntrack *ct,
+		   enum ip_conntrack_info ctinfo)
+{
+	unsigned int dir, optoff, optend;
+
+	optoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct tcphdr);
+	optend = (*pskb)->nh.iph->ihl*4 + tcph->doff*4;
+
+	if (!skb_ip_make_writable(pskb, optend))
+		return 0;
+
+	dir = CTINFO2DIR(ctinfo);
+
+	while (optoff < optend) {
+		/* Usually: option, length. */
+		unsigned char *op = (*pskb)->data + optoff;
+
+		switch (op[0]) {
+		case TCPOPT_EOL:
+			return 1;
+		case TCPOPT_NOP:
+			optoff++;
+			continue;
+		default:
+			/* no partial options */
+			if (optoff + 1 == optend
+			    || optoff + op[1] > optend
+			    || op[1] < 2)
+				return 0;
+			if (op[0] == TCPOPT_SACK
+			    && op[1] >= 2+TCPOLEN_SACK_PERBLOCK
+			    && ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
+				sack_adjust(*pskb, tcph, optoff+2,
+					    optoff+op[1],
+					    &ct->nat.info.seq[!dir]);
+			optoff += op[1];
+		}
+	}
+	return 1;
+}
+
+/* TCP sequence number adjustment.  Returns 1 on success, 0 on failure */
+int
+ip_nat_seq_adjust(struct sk_buff **pskb, 
+		  struct ip_conntrack *ct, 
+		  enum ip_conntrack_info ctinfo)
+{
+	struct tcphdr *tcph;
+	int dir, newseq, newack;
+	struct ip_nat_seq *this_way, *other_way;	
+
+	dir = CTINFO2DIR(ctinfo);
+
+	this_way = &ct->nat.info.seq[dir];
+	other_way = &ct->nat.info.seq[!dir];
+
+	if (!skb_ip_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph)))
+		return 0;
+
+	tcph = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
+	if (after(ntohl(tcph->seq), this_way->correction_pos))
+		newseq = ntohl(tcph->seq) + this_way->offset_after;
+	else
+		newseq = ntohl(tcph->seq) + this_way->offset_before;
+	newseq = htonl(newseq);
+
+	if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
+		  other_way->correction_pos))
+		newack = ntohl(tcph->ack_seq) - other_way->offset_after;
+	else
+		newack = ntohl(tcph->ack_seq) - other_way->offset_before;
+	newack = htonl(newack);
+
+	tcph->check = ip_nat_cheat_check(~tcph->seq, newseq,
+					 ip_nat_cheat_check(~tcph->ack_seq, 
+					 		    newack, 
+							    tcph->check));
+
+	DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n",
+		ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
+		ntohl(newack));
+
+	tcph->seq = newseq;
+	tcph->ack_seq = newack;
+
+	if (!ip_nat_sack_adjust(pskb, tcph, ct, ctinfo))
+		return 0;
+
+	ip_conntrack_tcp_update(*pskb, ct, dir);
+
+	return 1;
+}
+
+/* Setup NAT on this expected conntrack so it follows master. */
+/* If we fail to get a free NAT slot, we'll get dropped on confirm */
+void ip_nat_follow_master(struct ip_conntrack *ct,
+			  struct ip_conntrack_expect *exp)
+{
+	struct ip_nat_range range;
+
+	/* This must be a fresh one. */
+	BUG_ON(ct->status & IPS_NAT_DONE_MASK);
+
+	/* Change src to where master sends to */
+	range.flags = IP_NAT_RANGE_MAP_IPS;
+	range.min_ip = range.max_ip
+		= ct->master->tuplehash[!exp->dir].tuple.dst.ip;
+	/* hook doesn't matter, but it has to do source manip */
+	ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
+
+	/* For DST manip, map port here to where it's expected. */
+	range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
+	range.min = range.max = exp->saved_proto;
+	range.min_ip = range.max_ip
+		= ct->master->tuplehash[!exp->dir].tuple.src.ip;
+	/* hook doesn't matter, but it has to do destination manip */
+	ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
+}
diff --git a/net/ipv4/netfilter/ip_nat_irc.c b/net/ipv4/netfilter/ip_nat_irc.c
new file mode 100644
index 00000000000..9c1ca3381d5
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_irc.c
@@ -0,0 +1,125 @@
+/* IRC extension for TCP NAT alteration.
+ * (C) 2000-2001 by Harald Welte <laforge@gnumonks.org>
+ * (C) 2004 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
+ * based on a copy of RR's ip_nat_ftp.c
+ *
+ * ip_nat_irc.c,v 1.16 2001/12/06 07:42:10 laforge Exp
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/kernel.h>
+#include <net/tcp.h>
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_nat_helper.h>
+#include <linux/netfilter_ipv4/ip_nat_rule.h>
+#include <linux/netfilter_ipv4/ip_conntrack_irc.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/moduleparam.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("IRC (DCC) NAT helper");
+MODULE_LICENSE("GPL");
+
+static unsigned int help(struct sk_buff **pskb,
+			 enum ip_conntrack_info ctinfo,
+			 unsigned int matchoff,
+			 unsigned int matchlen,
+			 struct ip_conntrack_expect *exp)
+{
+	u_int16_t port;
+	unsigned int ret;
+
+	/* "4294967296 65635 " */
+	char buffer[18];
+
+	DEBUGP("IRC_NAT: info (seq %u + %u) in %u\n",
+	       expect->seq, exp_irc_info->len,
+	       ntohl(tcph->seq));
+
+	/* Reply comes from server. */
+	exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+	exp->dir = IP_CT_DIR_REPLY;
+
+	/* When you see the packet, we need to NAT it the same as the
+	 * this one. */
+	exp->expectfn = ip_nat_follow_master;
+
+	/* Try to get same port: if not, try to change it. */
+	for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
+		exp->tuple.dst.u.tcp.port = htons(port);
+		if (ip_conntrack_expect_related(exp) == 0)
+			break;
+	}
+
+	if (port == 0) {
+		ip_conntrack_expect_free(exp);
+		return NF_DROP;
+	}
+
+	/*      strlen("\1DCC CHAT chat AAAAAAAA P\1\n")=27
+	 *      strlen("\1DCC SCHAT chat AAAAAAAA P\1\n")=28
+	 *      strlen("\1DCC SEND F AAAAAAAA P S\1\n")=26
+	 *      strlen("\1DCC MOVE F AAAAAAAA P S\1\n")=26
+	 *      strlen("\1DCC TSEND F AAAAAAAA P S\1\n")=27
+	 *              AAAAAAAAA: bound addr (1.0.0.0==16777216, min 8 digits,
+	 *                      255.255.255.255==4294967296, 10 digits)
+	 *              P:         bound port (min 1 d, max 5d (65635))
+	 *              F:         filename   (min 1 d )
+	 *              S:         size       (min 1 d )
+	 *              0x01, \n:  terminators
+	 */
+
+	/* AAA = "us", ie. where server normally talks to. */
+	sprintf(buffer, "%u %u",
+		ntohl(exp->master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip),
+		port);
+	DEBUGP("ip_nat_irc: Inserting '%s' == %u.%u.%u.%u, port %u\n",
+	       buffer, NIPQUAD(exp->tuple.src.ip), port);
+
+	ret = ip_nat_mangle_tcp_packet(pskb, exp->master, ctinfo, 
+				       matchoff, matchlen, buffer, 
+				       strlen(buffer));
+	if (ret != NF_ACCEPT)
+		ip_conntrack_unexpect_related(exp);
+	return ret;
+}
+
+static void __exit fini(void)
+{
+	ip_nat_irc_hook = NULL;
+	/* Make sure noone calls it, meanwhile. */
+	synchronize_net();
+}
+
+static int __init init(void)
+{
+	BUG_ON(ip_nat_irc_hook);
+	ip_nat_irc_hook = help;
+	return 0;
+}
+
+/* Prior to 2.6.11, we had a ports param.  No longer, but don't break users. */
+static int warn_set(const char *val, struct kernel_param *kp)
+{
+	printk(KERN_INFO __stringify(KBUILD_MODNAME)
+	       ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
+	return 0;
+}
+module_param_call(ports, warn_set, NULL, NULL, 0);
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c
new file mode 100644
index 00000000000..a558cf0eee8
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c
@@ -0,0 +1,115 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/netfilter.h>
+#include <linux/ip.h>
+#include <linux/icmp.h>
+#include <linux/if.h>
+
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_nat_core.h>
+#include <linux/netfilter_ipv4/ip_nat_rule.h>
+#include <linux/netfilter_ipv4/ip_nat_protocol.h>
+
+static int
+icmp_in_range(const struct ip_conntrack_tuple *tuple,
+	      enum ip_nat_manip_type maniptype,
+	      const union ip_conntrack_manip_proto *min,
+	      const union ip_conntrack_manip_proto *max)
+{
+	return (tuple->src.u.icmp.id >= min->icmp.id
+		&& tuple->src.u.icmp.id <= max->icmp.id);
+}
+
+static int
+icmp_unique_tuple(struct ip_conntrack_tuple *tuple,
+		  const struct ip_nat_range *range,
+		  enum ip_nat_manip_type maniptype,
+		  const struct ip_conntrack *conntrack)
+{
+	static u_int16_t id;
+	unsigned int range_size
+		= (unsigned int)range->max.icmp.id - range->min.icmp.id + 1;
+	unsigned int i;
+
+	/* If no range specified... */
+	if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED))
+		range_size = 0xFFFF;
+
+	for (i = 0; i < range_size; i++, id++) {
+		tuple->src.u.icmp.id = range->min.icmp.id + (id % range_size);
+		if (!ip_nat_used_tuple(tuple, conntrack))
+			return 1;
+	}
+	return 0;
+}
+
+static int
+icmp_manip_pkt(struct sk_buff **pskb,
+	       unsigned int iphdroff,
+	       const struct ip_conntrack_tuple *tuple,
+	       enum ip_nat_manip_type maniptype)
+{
+	struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
+	struct icmphdr *hdr;
+	unsigned int hdroff = iphdroff + iph->ihl*4;
+
+	if (!skb_ip_make_writable(pskb, hdroff + sizeof(*hdr)))
+		return 0;
+
+	hdr = (struct icmphdr *)((*pskb)->data + hdroff);
+
+	hdr->checksum = ip_nat_cheat_check(hdr->un.echo.id ^ 0xFFFF,
+					    tuple->src.u.icmp.id,
+					    hdr->checksum);
+	hdr->un.echo.id = tuple->src.u.icmp.id;
+	return 1;
+}
+
+static unsigned int
+icmp_print(char *buffer,
+	   const struct ip_conntrack_tuple *match,
+	   const struct ip_conntrack_tuple *mask)
+{
+	unsigned int len = 0;
+
+	if (mask->src.u.icmp.id)
+		len += sprintf(buffer + len, "id=%u ",
+			       ntohs(match->src.u.icmp.id));
+
+	if (mask->dst.u.icmp.type)
+		len += sprintf(buffer + len, "type=%u ",
+			       ntohs(match->dst.u.icmp.type));
+
+	if (mask->dst.u.icmp.code)
+		len += sprintf(buffer + len, "code=%u ",
+			       ntohs(match->dst.u.icmp.code));
+
+	return len;
+}
+
+static unsigned int
+icmp_print_range(char *buffer, const struct ip_nat_range *range)
+{
+	if (range->min.icmp.id != 0 || range->max.icmp.id != 0xFFFF)
+		return sprintf(buffer, "id %u-%u ",
+			       ntohs(range->min.icmp.id),
+			       ntohs(range->max.icmp.id));
+	else return 0;
+}
+
+struct ip_nat_protocol ip_nat_protocol_icmp
+= { "ICMP", IPPROTO_ICMP,
+    icmp_manip_pkt,
+    icmp_in_range,
+    icmp_unique_tuple,
+    icmp_print,
+    icmp_print_range
+};
diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c
new file mode 100644
index 00000000000..a91cfceff27
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c
@@ -0,0 +1,178 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/netfilter.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/if.h>
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_nat_rule.h>
+#include <linux/netfilter_ipv4/ip_nat_protocol.h>
+#include <linux/netfilter_ipv4/ip_nat_core.h>
+
+static int
+tcp_in_range(const struct ip_conntrack_tuple *tuple,
+	     enum ip_nat_manip_type maniptype,
+	     const union ip_conntrack_manip_proto *min,
+	     const union ip_conntrack_manip_proto *max)
+{
+	u_int16_t port;
+
+	if (maniptype == IP_NAT_MANIP_SRC)
+		port = tuple->src.u.tcp.port;
+	else
+		port = tuple->dst.u.tcp.port;
+
+	return ntohs(port) >= ntohs(min->tcp.port)
+		&& ntohs(port) <= ntohs(max->tcp.port);
+}
+
+static int
+tcp_unique_tuple(struct ip_conntrack_tuple *tuple,
+		 const struct ip_nat_range *range,
+		 enum ip_nat_manip_type maniptype,
+		 const struct ip_conntrack *conntrack)
+{
+	static u_int16_t port, *portptr;
+	unsigned int range_size, min, i;
+
+	if (maniptype == IP_NAT_MANIP_SRC)
+		portptr = &tuple->src.u.tcp.port;
+	else
+		portptr = &tuple->dst.u.tcp.port;
+
+	/* If no range specified... */
+	if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
+		/* If it's dst rewrite, can't change port */
+		if (maniptype == IP_NAT_MANIP_DST)
+			return 0;
+
+		/* Map privileged onto privileged. */
+		if (ntohs(*portptr) < 1024) {
+			/* Loose convention: >> 512 is credential passing */
+			if (ntohs(*portptr)<512) {
+				min = 1;
+				range_size = 511 - min + 1;
+			} else {
+				min = 600;
+				range_size = 1023 - min + 1;
+			}
+		} else {
+			min = 1024;
+			range_size = 65535 - 1024 + 1;
+		}
+	} else {
+		min = ntohs(range->min.tcp.port);
+		range_size = ntohs(range->max.tcp.port) - min + 1;
+	}
+
+	for (i = 0; i < range_size; i++, port++) {
+		*portptr = htons(min + port % range_size);
+		if (!ip_nat_used_tuple(tuple, conntrack)) {
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static int
+tcp_manip_pkt(struct sk_buff **pskb,
+	      unsigned int iphdroff,
+	      const struct ip_conntrack_tuple *tuple,
+	      enum ip_nat_manip_type maniptype)
+{
+	struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
+	struct tcphdr *hdr;
+	unsigned int hdroff = iphdroff + iph->ihl*4;
+	u32 oldip, newip;
+	u16 *portptr, newport, oldport;
+	int hdrsize = 8; /* TCP connection tracking guarantees this much */
+
+	/* this could be a inner header returned in icmp packet; in such
+	   cases we cannot update the checksum field since it is outside of
+	   the 8 bytes of transport layer headers we are guaranteed */
+	if ((*pskb)->len >= hdroff + sizeof(struct tcphdr))
+		hdrsize = sizeof(struct tcphdr);
+
+	if (!skb_ip_make_writable(pskb, hdroff + hdrsize))
+		return 0;
+
+	iph = (struct iphdr *)((*pskb)->data + iphdroff);
+	hdr = (struct tcphdr *)((*pskb)->data + hdroff);
+
+	if (maniptype == IP_NAT_MANIP_SRC) {
+		/* Get rid of src ip and src pt */
+		oldip = iph->saddr;
+		newip = tuple->src.ip;
+		newport = tuple->src.u.tcp.port;
+		portptr = &hdr->source;
+	} else {
+		/* Get rid of dst ip and dst pt */
+		oldip = iph->daddr;
+		newip = tuple->dst.ip;
+		newport = tuple->dst.u.tcp.port;
+		portptr = &hdr->dest;
+	}
+
+	oldport = *portptr;
+	*portptr = newport;
+
+	if (hdrsize < sizeof(*hdr))
+		return 1;
+
+	hdr->check = ip_nat_cheat_check(~oldip, newip,
+					ip_nat_cheat_check(oldport ^ 0xFFFF,
+							   newport,
+							   hdr->check));
+	return 1;
+}
+
+static unsigned int
+tcp_print(char *buffer,
+	  const struct ip_conntrack_tuple *match,
+	  const struct ip_conntrack_tuple *mask)
+{
+	unsigned int len = 0;
+
+	if (mask->src.u.tcp.port)
+		len += sprintf(buffer + len, "srcpt=%u ",
+			       ntohs(match->src.u.tcp.port));
+
+
+	if (mask->dst.u.tcp.port)
+		len += sprintf(buffer + len, "dstpt=%u ",
+			       ntohs(match->dst.u.tcp.port));
+
+	return len;
+}
+
+static unsigned int
+tcp_print_range(char *buffer, const struct ip_nat_range *range)
+{
+	if (range->min.tcp.port != 0 || range->max.tcp.port != 0xFFFF) {
+		if (range->min.tcp.port == range->max.tcp.port)
+			return sprintf(buffer, "port %u ",
+				       ntohs(range->min.tcp.port));
+		else
+			return sprintf(buffer, "ports %u-%u ",
+				       ntohs(range->min.tcp.port),
+				       ntohs(range->max.tcp.port));
+	}
+	else return 0;
+}
+
+struct ip_nat_protocol ip_nat_protocol_tcp
+= { "TCP", IPPROTO_TCP,
+    tcp_manip_pkt,
+    tcp_in_range,
+    tcp_unique_tuple,
+    tcp_print,
+    tcp_print_range
+};
diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c
new file mode 100644
index 00000000000..c669e3b5f5d
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_proto_udp.c
@@ -0,0 +1,165 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/netfilter.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/if.h>
+
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_nat_core.h>
+#include <linux/netfilter_ipv4/ip_nat_rule.h>
+#include <linux/netfilter_ipv4/ip_nat_protocol.h>
+
+static int
+udp_in_range(const struct ip_conntrack_tuple *tuple,
+	     enum ip_nat_manip_type maniptype,
+	     const union ip_conntrack_manip_proto *min,
+	     const union ip_conntrack_manip_proto *max)
+{
+	u_int16_t port;
+
+	if (maniptype == IP_NAT_MANIP_SRC)
+		port = tuple->src.u.udp.port;
+	else
+		port = tuple->dst.u.udp.port;
+
+	return ntohs(port) >= ntohs(min->udp.port)
+		&& ntohs(port) <= ntohs(max->udp.port);
+}
+
+static int
+udp_unique_tuple(struct ip_conntrack_tuple *tuple,
+		 const struct ip_nat_range *range,
+		 enum ip_nat_manip_type maniptype,
+		 const struct ip_conntrack *conntrack)
+{
+	static u_int16_t port, *portptr;
+	unsigned int range_size, min, i;
+
+	if (maniptype == IP_NAT_MANIP_SRC)
+		portptr = &tuple->src.u.udp.port;
+	else
+		portptr = &tuple->dst.u.udp.port;
+
+	/* If no range specified... */
+	if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
+		/* If it's dst rewrite, can't change port */
+		if (maniptype == IP_NAT_MANIP_DST)
+			return 0;
+
+		if (ntohs(*portptr) < 1024) {
+			/* Loose convention: >> 512 is credential passing */
+			if (ntohs(*portptr)<512) {
+				min = 1;
+				range_size = 511 - min + 1;
+			} else {
+				min = 600;
+				range_size = 1023 - min + 1;
+			}
+		} else {
+			min = 1024;
+			range_size = 65535 - 1024 + 1;
+		}
+	} else {
+		min = ntohs(range->min.udp.port);
+		range_size = ntohs(range->max.udp.port) - min + 1;
+	}
+
+	for (i = 0; i < range_size; i++, port++) {
+		*portptr = htons(min + port % range_size);
+		if (!ip_nat_used_tuple(tuple, conntrack))
+			return 1;
+	}
+	return 0;
+}
+
+static int
+udp_manip_pkt(struct sk_buff **pskb,
+	      unsigned int iphdroff,
+	      const struct ip_conntrack_tuple *tuple,
+	      enum ip_nat_manip_type maniptype)
+{
+	struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
+	struct udphdr *hdr;
+	unsigned int hdroff = iphdroff + iph->ihl*4;
+	u32 oldip, newip;
+	u16 *portptr, newport;
+
+	if (!skb_ip_make_writable(pskb, hdroff + sizeof(*hdr)))
+		return 0;
+
+	iph = (struct iphdr *)((*pskb)->data + iphdroff);
+	hdr = (struct udphdr *)((*pskb)->data + hdroff);
+
+	if (maniptype == IP_NAT_MANIP_SRC) {
+		/* Get rid of src ip and src pt */
+		oldip = iph->saddr;
+		newip = tuple->src.ip;
+		newport = tuple->src.u.udp.port;
+		portptr = &hdr->source;
+	} else {
+		/* Get rid of dst ip and dst pt */
+		oldip = iph->daddr;
+		newip = tuple->dst.ip;
+		newport = tuple->dst.u.udp.port;
+		portptr = &hdr->dest;
+	}
+	if (hdr->check) /* 0 is a special case meaning no checksum */
+		hdr->check = ip_nat_cheat_check(~oldip, newip,
+					ip_nat_cheat_check(*portptr ^ 0xFFFF,
+							   newport,
+							   hdr->check));
+	*portptr = newport;
+	return 1;
+}
+
+static unsigned int
+udp_print(char *buffer,
+	  const struct ip_conntrack_tuple *match,
+	  const struct ip_conntrack_tuple *mask)
+{
+	unsigned int len = 0;
+
+	if (mask->src.u.udp.port)
+		len += sprintf(buffer + len, "srcpt=%u ",
+			       ntohs(match->src.u.udp.port));
+
+
+	if (mask->dst.u.udp.port)
+		len += sprintf(buffer + len, "dstpt=%u ",
+			       ntohs(match->dst.u.udp.port));
+
+	return len;
+}
+
+static unsigned int
+udp_print_range(char *buffer, const struct ip_nat_range *range)
+{
+	if (range->min.udp.port != 0 || range->max.udp.port != 0xFFFF) {
+		if (range->min.udp.port == range->max.udp.port)
+			return sprintf(buffer, "port %u ",
+				       ntohs(range->min.udp.port));
+		else
+			return sprintf(buffer, "ports %u-%u ",
+				       ntohs(range->min.udp.port),
+				       ntohs(range->max.udp.port));
+	}
+	else return 0;
+}
+
+struct ip_nat_protocol ip_nat_protocol_udp
+= { "UDP", IPPROTO_UDP,
+    udp_manip_pkt,
+    udp_in_range,
+    udp_unique_tuple,
+    udp_print,
+    udp_print_range
+};
diff --git a/net/ipv4/netfilter/ip_nat_proto_unknown.c b/net/ipv4/netfilter/ip_nat_proto_unknown.c
new file mode 100644
index 00000000000..f5525bd58d1
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_proto_unknown.c
@@ -0,0 +1,70 @@
+/* The "unknown" protocol.  This is what is used for protocols we
+ * don't understand.  It's returned by ip_ct_find_proto().
+ */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/netfilter.h>
+#include <linux/if.h>
+
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_nat_rule.h>
+#include <linux/netfilter_ipv4/ip_nat_protocol.h>
+
+static int unknown_in_range(const struct ip_conntrack_tuple *tuple,
+			    enum ip_nat_manip_type manip_type,
+			    const union ip_conntrack_manip_proto *min,
+			    const union ip_conntrack_manip_proto *max)
+{
+	return 1;
+}
+
+static int unknown_unique_tuple(struct ip_conntrack_tuple *tuple,
+				const struct ip_nat_range *range,
+				enum ip_nat_manip_type maniptype,
+				const struct ip_conntrack *conntrack)
+{
+	/* Sorry: we can't help you; if it's not unique, we can't frob
+	   anything. */
+	return 0;
+}
+
+static int
+unknown_manip_pkt(struct sk_buff **pskb,
+		  unsigned int iphdroff,
+		  const struct ip_conntrack_tuple *tuple,
+		  enum ip_nat_manip_type maniptype)
+{
+	return 1;
+}
+
+static unsigned int
+unknown_print(char *buffer,
+	      const struct ip_conntrack_tuple *match,
+	      const struct ip_conntrack_tuple *mask)
+{
+	return 0;
+}
+
+static unsigned int
+unknown_print_range(char *buffer, const struct ip_nat_range *range)
+{
+	return 0;
+}
+
+struct ip_nat_protocol ip_nat_unknown_protocol = {
+	"unknown", 0,
+	unknown_manip_pkt,
+	unknown_in_range,
+	unknown_unique_tuple,
+	unknown_print,
+	unknown_print_range
+};
diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c
new file mode 100644
index 00000000000..581f097f5a2
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_rule.c
@@ -0,0 +1,319 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Everything about the rules for NAT. */
+#include <linux/types.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/module.h>
+#include <linux/kmod.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <net/checksum.h>
+#include <net/route.h>
+#include <linux/bitops.h>
+
+#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock)
+#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock)
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_nat_core.h>
+#include <linux/netfilter_ipv4/ip_nat_rule.h>
+#include <linux/netfilter_ipv4/listhelp.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+#define NAT_VALID_HOOKS ((1<<NF_IP_PRE_ROUTING) | (1<<NF_IP_POST_ROUTING) | (1<<NF_IP_LOCAL_OUT))
+
+static struct
+{
+	struct ipt_replace repl;
+	struct ipt_standard entries[3];
+	struct ipt_error term;
+} nat_initial_table __initdata
+= { { "nat", NAT_VALID_HOOKS, 4,
+      sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
+      { [NF_IP_PRE_ROUTING] = 0,
+	[NF_IP_POST_ROUTING] = sizeof(struct ipt_standard),
+	[NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 },
+      { [NF_IP_PRE_ROUTING] = 0,
+	[NF_IP_POST_ROUTING] = sizeof(struct ipt_standard),
+	[NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 },
+      0, NULL, { } },
+    {
+	    /* PRE_ROUTING */
+	    { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
+		0,
+		sizeof(struct ipt_entry),
+		sizeof(struct ipt_standard),
+		0, { 0, 0 }, { } },
+	      { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
+		-NF_ACCEPT - 1 } },
+	    /* POST_ROUTING */
+	    { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
+		0,
+		sizeof(struct ipt_entry),
+		sizeof(struct ipt_standard),
+		0, { 0, 0 }, { } },
+	      { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
+		-NF_ACCEPT - 1 } },
+	    /* LOCAL_OUT */
+	    { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
+		0,
+		sizeof(struct ipt_entry),
+		sizeof(struct ipt_standard),
+		0, { 0, 0 }, { } },
+	      { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
+		-NF_ACCEPT - 1 } }
+    },
+    /* ERROR */
+    { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
+	0,
+	sizeof(struct ipt_entry),
+	sizeof(struct ipt_error),
+	0, { 0, 0 }, { } },
+      { { { { IPT_ALIGN(sizeof(struct ipt_error_target)), IPT_ERROR_TARGET } },
+	  { } },
+	"ERROR"
+      }
+    }
+};
+
+static struct ipt_table nat_table = {
+	.name		= "nat",
+	.valid_hooks	= NAT_VALID_HOOKS,
+	.lock		= RW_LOCK_UNLOCKED,
+	.me		= THIS_MODULE,
+};
+
+/* Source NAT */
+static unsigned int ipt_snat_target(struct sk_buff **pskb,
+				    const struct net_device *in,
+				    const struct net_device *out,
+				    unsigned int hooknum,
+				    const void *targinfo,
+				    void *userinfo)
+{
+	struct ip_conntrack *ct;
+	enum ip_conntrack_info ctinfo;
+	const struct ip_nat_multi_range_compat *mr = targinfo;
+
+	IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING);
+
+	ct = ip_conntrack_get(*pskb, &ctinfo);
+
+	/* Connection must be valid and new. */
+	IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED
+	                    || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
+	IP_NF_ASSERT(out);
+
+	return ip_nat_setup_info(ct, &mr->range[0], hooknum);
+}
+
+/* Before 2.6.11 we did implicit source NAT if required. Warn about change. */
+static void warn_if_extra_mangle(u32 dstip, u32 srcip)
+{
+	static int warned = 0;
+	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } };
+	struct rtable *rt;
+
+	if (ip_route_output_key(&rt, &fl) != 0)
+		return;
+
+	if (rt->rt_src != srcip && !warned) {
+		printk("NAT: no longer support implicit source local NAT\n");
+		printk("NAT: packet src %u.%u.%u.%u -> dst %u.%u.%u.%u\n",
+		       NIPQUAD(srcip), NIPQUAD(dstip));
+		warned = 1;
+	}
+	ip_rt_put(rt);
+}
+
+static unsigned int ipt_dnat_target(struct sk_buff **pskb,
+				    const struct net_device *in,
+				    const struct net_device *out,
+				    unsigned int hooknum,
+				    const void *targinfo,
+				    void *userinfo)
+{
+	struct ip_conntrack *ct;
+	enum ip_conntrack_info ctinfo;
+	const struct ip_nat_multi_range_compat *mr = targinfo;
+
+	IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
+		     || hooknum == NF_IP_LOCAL_OUT);
+
+	ct = ip_conntrack_get(*pskb, &ctinfo);
+
+	/* Connection must be valid and new. */
+	IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+
+	if (hooknum == NF_IP_LOCAL_OUT
+	    && mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)
+		warn_if_extra_mangle((*pskb)->nh.iph->daddr,
+				     mr->range[0].min_ip);
+
+	return ip_nat_setup_info(ct, &mr->range[0], hooknum);
+}
+
+static int ipt_snat_checkentry(const char *tablename,
+			       const struct ipt_entry *e,
+			       void *targinfo,
+			       unsigned int targinfosize,
+			       unsigned int hook_mask)
+{
+	struct ip_nat_multi_range_compat *mr = targinfo;
+
+	/* Must be a valid range */
+	if (mr->rangesize != 1) {
+		printk("SNAT: multiple ranges no longer supported\n");
+		return 0;
+	}
+
+	if (targinfosize != IPT_ALIGN(sizeof(struct ip_nat_multi_range_compat))) {
+		DEBUGP("SNAT: Target size %u wrong for %u ranges\n",
+		       targinfosize, mr->rangesize);
+		return 0;
+	}
+
+	/* Only allow these for NAT. */
+	if (strcmp(tablename, "nat") != 0) {
+		DEBUGP("SNAT: wrong table %s\n", tablename);
+		return 0;
+	}
+
+	if (hook_mask & ~(1 << NF_IP_POST_ROUTING)) {
+		DEBUGP("SNAT: hook mask 0x%x bad\n", hook_mask);
+		return 0;
+	}
+	return 1;
+}
+
+static int ipt_dnat_checkentry(const char *tablename,
+			       const struct ipt_entry *e,
+			       void *targinfo,
+			       unsigned int targinfosize,
+			       unsigned int hook_mask)
+{
+	struct ip_nat_multi_range_compat *mr = targinfo;
+
+	/* Must be a valid range */
+	if (mr->rangesize != 1) {
+		printk("DNAT: multiple ranges no longer supported\n");
+		return 0;
+	}
+
+	if (targinfosize != IPT_ALIGN(sizeof(struct ip_nat_multi_range_compat))) {
+		DEBUGP("DNAT: Target size %u wrong for %u ranges\n",
+		       targinfosize, mr->rangesize);
+		return 0;
+	}
+
+	/* Only allow these for NAT. */
+	if (strcmp(tablename, "nat") != 0) {
+		DEBUGP("DNAT: wrong table %s\n", tablename);
+		return 0;
+	}
+
+	if (hook_mask & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT))) {
+		DEBUGP("DNAT: hook mask 0x%x bad\n", hook_mask);
+		return 0;
+	}
+	
+	return 1;
+}
+
+inline unsigned int
+alloc_null_binding(struct ip_conntrack *conntrack,
+		   struct ip_nat_info *info,
+		   unsigned int hooknum)
+{
+	/* Force range to this IP; let proto decide mapping for
+	   per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
+	   Use reply in case it's already been mangled (eg local packet).
+	*/
+	u_int32_t ip
+		= (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
+		   ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip
+		   : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip);
+	struct ip_nat_range range
+		= { IP_NAT_RANGE_MAP_IPS, ip, ip, { 0 }, { 0 } };
+
+	DEBUGP("Allocating NULL binding for %p (%u.%u.%u.%u)\n", conntrack,
+	       NIPQUAD(ip));
+	return ip_nat_setup_info(conntrack, &range, hooknum);
+}
+
+int ip_nat_rule_find(struct sk_buff **pskb,
+		     unsigned int hooknum,
+		     const struct net_device *in,
+		     const struct net_device *out,
+		     struct ip_conntrack *ct,
+		     struct ip_nat_info *info)
+{
+	int ret;
+
+	ret = ipt_do_table(pskb, hooknum, in, out, &nat_table, NULL);
+
+	if (ret == NF_ACCEPT) {
+		if (!ip_nat_initialized(ct, HOOK2MANIP(hooknum)))
+			/* NUL mapping */
+			ret = alloc_null_binding(ct, info, hooknum);
+	}
+	return ret;
+}
+
+static struct ipt_target ipt_snat_reg = {
+	.name		= "SNAT",
+	.target		= ipt_snat_target,
+	.checkentry	= ipt_snat_checkentry,
+};
+
+static struct ipt_target ipt_dnat_reg = {
+	.name		= "DNAT",
+	.target		= ipt_dnat_target,
+	.checkentry	= ipt_dnat_checkentry,
+};
+
+int __init ip_nat_rule_init(void)
+{
+	int ret;
+
+	ret = ipt_register_table(&nat_table, &nat_initial_table.repl);
+	if (ret != 0)
+		return ret;
+	ret = ipt_register_target(&ipt_snat_reg);
+	if (ret != 0)
+		goto unregister_table;
+
+	ret = ipt_register_target(&ipt_dnat_reg);
+	if (ret != 0)
+		goto unregister_snat;
+
+	return ret;
+
+ unregister_snat:
+	ipt_unregister_target(&ipt_snat_reg);
+ unregister_table:
+	ipt_unregister_table(&nat_table);
+
+	return ret;
+}
+
+void ip_nat_rule_cleanup(void)
+{
+	ipt_unregister_target(&ipt_dnat_reg);
+	ipt_unregister_target(&ipt_snat_reg);
+	ipt_unregister_table(&nat_table);
+}
diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c
new file mode 100644
index 00000000000..2a48b6e635a
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c
@@ -0,0 +1,1347 @@
+/*
+ * ip_nat_snmp_basic.c
+ *
+ * Basic SNMP Application Layer Gateway
+ *
+ * This IP NAT module is intended for use with SNMP network 
+ * discovery and monitoring applications where target networks use 
+ * conflicting private address realms.
+ *
+ * Static NAT is used to remap the networks from the view of the network 
+ * management system at the IP layer, and this module remaps some application
+ * layer addresses to match.
+ *
+ * The simplest form of ALG is performed, where only tagged IP addresses
+ * are modified.  The module does not need to be MIB aware and only scans
+ * messages at the ASN.1/BER level.
+ *
+ * Currently, only SNMPv1 and SNMPv2 are supported.
+ *
+ * More information on ALG and associated issues can be found in
+ * RFC 2962
+ *
+ * The ASB.1/BER parsing code is derived from the gxsnmp package by Gregory 
+ * McLean & Jochen Friedrich, stripped down for use in the kernel.
+ *
+ * Copyright (c) 2000 RP Internet (www.rpi.net.au).
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ * Author: James Morris <jmorris@intercode.com.au>
+ *
+ * Updates:
+ * 2000-08-06: Convert to new helper API (Harald Welte).
+ *
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/moduleparam.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_nat_helper.h>
+#include <linux/ip.h>
+#include <net/checksum.h>
+#include <net/udp.h>
+#include <asm/uaccess.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
+MODULE_DESCRIPTION("Basic SNMP Application Layer Gateway");
+
+#define SNMP_PORT 161
+#define SNMP_TRAP_PORT 162
+#define NOCT1(n) (u_int8_t )((n) & 0xff)
+
+static int debug;
+static DEFINE_SPINLOCK(snmp_lock);
+
+/* 
+ * Application layer address mapping mimics the NAT mapping, but 
+ * only for the first octet in this case (a more flexible system
+ * can be implemented if needed).
+ */
+struct oct1_map
+{
+	u_int8_t from;
+	u_int8_t to;
+};
+
+                                  
+/*****************************************************************************
+ *
+ * Basic ASN.1 decoding routines (gxsnmp author Dirk Wisse)
+ *
+ *****************************************************************************/
+
+/* Class */
+#define ASN1_UNI	0	/* Universal */
+#define ASN1_APL	1	/* Application */
+#define ASN1_CTX	2	/* Context */
+#define ASN1_PRV	3	/* Private */
+
+/* Tag */
+#define ASN1_EOC	0	/* End Of Contents */
+#define ASN1_BOL	1	/* Boolean */
+#define ASN1_INT	2	/* Integer */
+#define ASN1_BTS	3	/* Bit String */
+#define ASN1_OTS	4	/* Octet String */
+#define ASN1_NUL	5	/* Null */
+#define ASN1_OJI	6	/* Object Identifier  */
+#define ASN1_OJD	7	/* Object Description */
+#define ASN1_EXT	8	/* External */
+#define ASN1_SEQ	16	/* Sequence */
+#define ASN1_SET	17	/* Set */
+#define ASN1_NUMSTR	18	/* Numerical String */
+#define ASN1_PRNSTR	19	/* Printable String */
+#define ASN1_TEXSTR	20	/* Teletext String */
+#define ASN1_VIDSTR	21	/* Video String */
+#define ASN1_IA5STR	22	/* IA5 String */
+#define ASN1_UNITIM	23	/* Universal Time */
+#define ASN1_GENTIM	24	/* General Time */
+#define ASN1_GRASTR	25	/* Graphical String */
+#define ASN1_VISSTR	26	/* Visible String */
+#define ASN1_GENSTR	27	/* General String */
+
+/* Primitive / Constructed methods*/
+#define ASN1_PRI	0	/* Primitive */
+#define ASN1_CON	1	/* Constructed */
+
+/*
+ * Error codes.
+ */
+#define ASN1_ERR_NOERROR		0
+#define ASN1_ERR_DEC_EMPTY		2
+#define ASN1_ERR_DEC_EOC_MISMATCH	3
+#define ASN1_ERR_DEC_LENGTH_MISMATCH	4
+#define ASN1_ERR_DEC_BADVALUE		5
+
+/* 
+ * ASN.1 context.
+ */
+struct asn1_ctx
+{
+	int error;			/* Error condition */
+	unsigned char *pointer;		/* Octet just to be decoded */
+	unsigned char *begin;		/* First octet */
+	unsigned char *end;		/* Octet after last octet */
+};
+
+/*
+ * Octet string (not null terminated)
+ */
+struct asn1_octstr
+{
+	unsigned char *data;
+	unsigned int len;
+};
+	
+static void asn1_open(struct asn1_ctx *ctx,
+                      unsigned char *buf,
+                      unsigned int len)
+{
+	ctx->begin = buf;
+	ctx->end = buf + len;
+	ctx->pointer = buf;
+	ctx->error = ASN1_ERR_NOERROR;
+}
+
+static unsigned char asn1_octet_decode(struct asn1_ctx *ctx, unsigned char *ch)
+{
+	if (ctx->pointer >= ctx->end) {
+		ctx->error = ASN1_ERR_DEC_EMPTY;
+		return 0;
+	}
+	*ch = *(ctx->pointer)++;
+	return 1;
+}
+
+static unsigned char asn1_tag_decode(struct asn1_ctx *ctx, unsigned int *tag)
+{
+	unsigned char ch;
+	
+	*tag = 0;
+	
+	do
+	{
+		if (!asn1_octet_decode(ctx, &ch))
+			return 0;
+		*tag <<= 7;
+		*tag |= ch & 0x7F;
+	} while ((ch & 0x80) == 0x80);
+	return 1;
+}
+
+static unsigned char asn1_id_decode(struct asn1_ctx *ctx, 
+                                    unsigned int *cls,
+                                    unsigned int *con,
+                                    unsigned int *tag)
+{
+	unsigned char ch;
+	
+	if (!asn1_octet_decode(ctx, &ch))
+		return 0;
+		
+	*cls = (ch & 0xC0) >> 6;
+	*con = (ch & 0x20) >> 5;
+	*tag = (ch & 0x1F);
+	
+	if (*tag == 0x1F) {
+		if (!asn1_tag_decode(ctx, tag))
+			return 0;
+	}
+	return 1;
+}
+
+static unsigned char asn1_length_decode(struct asn1_ctx *ctx,
+                                        unsigned int *def,
+                                        unsigned int *len)
+{
+	unsigned char ch, cnt;
+	
+	if (!asn1_octet_decode(ctx, &ch))
+		return 0;
+		
+	if (ch == 0x80)
+		*def = 0;
+	else {
+		*def = 1;
+		
+		if (ch < 0x80)
+			*len = ch;
+		else {
+			cnt = (unsigned char) (ch & 0x7F);
+			*len = 0;
+			
+			while (cnt > 0) {
+				if (!asn1_octet_decode(ctx, &ch))
+					return 0;
+				*len <<= 8;
+				*len |= ch;
+				cnt--;
+			}
+		}
+	}
+	return 1;
+}
+
+static unsigned char asn1_header_decode(struct asn1_ctx *ctx,
+                                        unsigned char **eoc,
+                                        unsigned int *cls,
+                                        unsigned int *con,
+                                        unsigned int *tag)
+{
+	unsigned int def, len;
+	
+	if (!asn1_id_decode(ctx, cls, con, tag))
+		return 0;
+		
+	if (!asn1_length_decode(ctx, &def, &len))
+		return 0;
+		
+	if (def)
+		*eoc = ctx->pointer + len;
+	else
+		*eoc = NULL;
+	return 1;
+}
+
+static unsigned char asn1_eoc_decode(struct asn1_ctx *ctx, unsigned char *eoc)
+{
+	unsigned char ch;
+	
+	if (eoc == 0) {
+		if (!asn1_octet_decode(ctx, &ch))
+			return 0;
+			
+		if (ch != 0x00) {
+			ctx->error = ASN1_ERR_DEC_EOC_MISMATCH;
+			return 0;
+		}
+		
+		if (!asn1_octet_decode(ctx, &ch))
+			return 0;
+			
+		if (ch != 0x00) {
+			ctx->error = ASN1_ERR_DEC_EOC_MISMATCH;
+			return 0;
+		}
+		return 1;
+	} else {
+		if (ctx->pointer != eoc) {
+			ctx->error = ASN1_ERR_DEC_LENGTH_MISMATCH;
+			return 0;
+		}
+		return 1;
+	}
+}
+
+static unsigned char asn1_null_decode(struct asn1_ctx *ctx, unsigned char *eoc)
+{
+	ctx->pointer = eoc;
+	return 1;
+}
+
+static unsigned char asn1_long_decode(struct asn1_ctx *ctx,
+                                      unsigned char *eoc,
+                                      long *integer)
+{
+	unsigned char ch;
+	unsigned int  len;
+	
+	if (!asn1_octet_decode(ctx, &ch))
+		return 0;
+		
+	*integer = (signed char) ch;
+	len = 1;
+	
+	while (ctx->pointer < eoc) {
+		if (++len > sizeof (long)) {
+			ctx->error = ASN1_ERR_DEC_BADVALUE;
+			return 0;
+		}
+		
+		if (!asn1_octet_decode(ctx, &ch))
+			return 0;
+			
+		*integer <<= 8;
+		*integer |= ch;
+	}
+	return 1;
+}
+
+static unsigned char asn1_uint_decode(struct asn1_ctx *ctx,
+                                      unsigned char *eoc,
+                                      unsigned int *integer)
+{
+	unsigned char ch;
+	unsigned int  len;
+	
+	if (!asn1_octet_decode(ctx, &ch))
+		return 0;
+		
+	*integer = ch;
+	if (ch == 0) len = 0;
+	else len = 1;
+	
+	while (ctx->pointer < eoc) {
+		if (++len > sizeof (unsigned int)) {
+			ctx->error = ASN1_ERR_DEC_BADVALUE;
+			return 0;
+		}
+		
+		if (!asn1_octet_decode(ctx, &ch))
+			return 0;
+			
+		*integer <<= 8;
+		*integer |= ch;
+	}
+	return 1;
+}
+
+static unsigned char asn1_ulong_decode(struct asn1_ctx *ctx,
+                                       unsigned char *eoc,
+                                       unsigned long *integer)
+{
+	unsigned char ch;
+	unsigned int  len;
+	
+	if (!asn1_octet_decode(ctx, &ch))
+		return 0;
+		
+	*integer = ch;
+	if (ch == 0) len = 0;
+	else len = 1;
+	
+	while (ctx->pointer < eoc) {
+		if (++len > sizeof (unsigned long)) {
+			ctx->error = ASN1_ERR_DEC_BADVALUE;
+			return 0;
+		}
+		
+		if (!asn1_octet_decode(ctx, &ch))
+			return 0;
+			
+		*integer <<= 8;
+		*integer |= ch;
+	}
+	return 1;
+}
+
+static unsigned char asn1_octets_decode(struct asn1_ctx *ctx,
+                                        unsigned char *eoc,
+                                        unsigned char **octets,
+                                        unsigned int *len)
+{
+	unsigned char *ptr;
+	
+	*len = 0;
+	
+	*octets = kmalloc(eoc - ctx->pointer, GFP_ATOMIC);
+	if (*octets == NULL) {
+		if (net_ratelimit())
+			printk("OOM in bsalg (%d)\n", __LINE__);
+		return 0;
+	}
+	
+	ptr = *octets;
+	while (ctx->pointer < eoc) {
+		if (!asn1_octet_decode(ctx, (unsigned char *)ptr++)) {
+			kfree(*octets);
+			*octets = NULL;
+			return 0;
+		}
+		(*len)++;
+	}
+	return 1;
+}
+
+static unsigned char asn1_subid_decode(struct asn1_ctx *ctx,
+                                       unsigned long *subid)
+{
+	unsigned char ch;
+	
+	*subid = 0;
+	
+	do {
+		if (!asn1_octet_decode(ctx, &ch))
+			return 0;
+		
+		*subid <<= 7;
+		*subid |= ch & 0x7F;
+	} while ((ch & 0x80) == 0x80);
+	return 1;
+}
+
+static unsigned char asn1_oid_decode(struct asn1_ctx *ctx,
+                                     unsigned char *eoc,
+                                     unsigned long **oid,
+                                     unsigned int *len)
+{
+	unsigned long subid;
+	unsigned int  size;
+	unsigned long *optr;
+	
+	size = eoc - ctx->pointer + 1;
+	*oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC);
+	if (*oid == NULL) {
+		if (net_ratelimit())
+			printk("OOM in bsalg (%d)\n", __LINE__);
+		return 0;
+	}
+	
+	optr = *oid;
+	
+	if (!asn1_subid_decode(ctx, &subid)) {
+		kfree(*oid);
+		*oid = NULL;
+		return 0;
+	}
+	
+	if (subid < 40) {
+		optr [0] = 0;
+		optr [1] = subid;
+	} else if (subid < 80) {
+		optr [0] = 1;
+		optr [1] = subid - 40;
+	} else {
+		optr [0] = 2;
+		optr [1] = subid - 80;
+	}
+	
+	*len = 2;
+	optr += 2;
+	
+	while (ctx->pointer < eoc) {
+		if (++(*len) > size) {
+			ctx->error = ASN1_ERR_DEC_BADVALUE;
+			kfree(*oid);
+			*oid = NULL;
+			return 0;
+		}
+		
+		if (!asn1_subid_decode(ctx, optr++)) {
+			kfree(*oid);
+			*oid = NULL;
+			return 0;
+		}
+	}
+	return 1;
+}
+
+/*****************************************************************************
+ *
+ * SNMP decoding routines (gxsnmp author Dirk Wisse)
+ *
+ *****************************************************************************/
+
+/* SNMP Versions */
+#define SNMP_V1				0
+#define SNMP_V2C			1
+#define SNMP_V2				2
+#define SNMP_V3				3
+
+/* Default Sizes */
+#define SNMP_SIZE_COMM			256
+#define SNMP_SIZE_OBJECTID		128
+#define SNMP_SIZE_BUFCHR		256
+#define SNMP_SIZE_BUFINT		128
+#define SNMP_SIZE_SMALLOBJECTID		16
+
+/* Requests */
+#define SNMP_PDU_GET			0
+#define SNMP_PDU_NEXT			1
+#define SNMP_PDU_RESPONSE		2
+#define SNMP_PDU_SET			3
+#define SNMP_PDU_TRAP1			4
+#define SNMP_PDU_BULK			5
+#define SNMP_PDU_INFORM			6
+#define SNMP_PDU_TRAP2			7
+
+/* Errors */
+#define SNMP_NOERROR			0
+#define SNMP_TOOBIG			1
+#define SNMP_NOSUCHNAME			2
+#define SNMP_BADVALUE			3
+#define SNMP_READONLY			4
+#define SNMP_GENERROR			5
+#define SNMP_NOACCESS			6
+#define SNMP_WRONGTYPE			7
+#define SNMP_WRONGLENGTH		8
+#define SNMP_WRONGENCODING		9
+#define SNMP_WRONGVALUE			10
+#define SNMP_NOCREATION			11
+#define SNMP_INCONSISTENTVALUE		12
+#define SNMP_RESOURCEUNAVAILABLE	13
+#define SNMP_COMMITFAILED		14
+#define SNMP_UNDOFAILED			15
+#define SNMP_AUTHORIZATIONERROR		16
+#define SNMP_NOTWRITABLE		17
+#define SNMP_INCONSISTENTNAME		18
+
+/* General SNMP V1 Traps */
+#define SNMP_TRAP_COLDSTART		0
+#define SNMP_TRAP_WARMSTART		1
+#define SNMP_TRAP_LINKDOWN		2
+#define SNMP_TRAP_LINKUP		3
+#define SNMP_TRAP_AUTFAILURE		4
+#define SNMP_TRAP_EQPNEIGHBORLOSS	5
+#define SNMP_TRAP_ENTSPECIFIC		6
+
+/* SNMPv1 Types */
+#define SNMP_NULL                0
+#define SNMP_INTEGER             1    /* l  */
+#define SNMP_OCTETSTR            2    /* c  */
+#define SNMP_DISPLAYSTR          2    /* c  */
+#define SNMP_OBJECTID            3    /* ul */
+#define SNMP_IPADDR              4    /* uc */
+#define SNMP_COUNTER             5    /* ul */
+#define SNMP_GAUGE               6    /* ul */
+#define SNMP_TIMETICKS           7    /* ul */
+#define SNMP_OPAQUE              8    /* c  */
+
+/* Additional SNMPv2 Types */
+#define SNMP_UINTEGER            5    /* ul */
+#define SNMP_BITSTR              9    /* uc */
+#define SNMP_NSAP               10    /* uc */
+#define SNMP_COUNTER64          11    /* ul */
+#define SNMP_NOSUCHOBJECT       12
+#define SNMP_NOSUCHINSTANCE     13
+#define SNMP_ENDOFMIBVIEW       14
+
+union snmp_syntax
+{
+	unsigned char uc[0];	/* 8 bit unsigned */
+	char c[0];		/* 8 bit signed */
+	unsigned long ul[0];	/* 32 bit unsigned */
+	long l[0];		/* 32 bit signed */
+};
+
+struct snmp_object
+{
+	unsigned long *id;
+	unsigned int id_len;
+	unsigned short type;
+	unsigned int syntax_len;
+	union snmp_syntax syntax;
+};
+
+struct snmp_request
+{
+	unsigned long id;
+	unsigned int error_status;
+	unsigned int error_index;
+};
+
+struct snmp_v1_trap
+{
+	unsigned long *id;
+	unsigned int id_len;
+	unsigned long ip_address;	/* pointer  */
+	unsigned int general;
+	unsigned int specific;
+	unsigned long time;
+};
+
+/* SNMP types */
+#define SNMP_IPA    0
+#define SNMP_CNT    1
+#define SNMP_GGE    2
+#define SNMP_TIT    3
+#define SNMP_OPQ    4
+#define SNMP_C64    6
+
+/* SNMP errors */
+#define SERR_NSO    0
+#define SERR_NSI    1
+#define SERR_EOM    2
+
+static inline void mangle_address(unsigned char *begin,
+                                  unsigned char *addr,
+                                  const struct oct1_map *map,
+                                  u_int16_t *check);
+struct snmp_cnv
+{
+	unsigned int class;
+	unsigned int tag;
+	int syntax;
+};
+
+static struct snmp_cnv snmp_conv [] =
+{
+	{ASN1_UNI, ASN1_NUL, SNMP_NULL},
+	{ASN1_UNI, ASN1_INT, SNMP_INTEGER},
+	{ASN1_UNI, ASN1_OTS, SNMP_OCTETSTR},
+	{ASN1_UNI, ASN1_OTS, SNMP_DISPLAYSTR},
+	{ASN1_UNI, ASN1_OJI, SNMP_OBJECTID},
+	{ASN1_APL, SNMP_IPA, SNMP_IPADDR},
+	{ASN1_APL, SNMP_CNT, SNMP_COUNTER},	/* Counter32 */
+	{ASN1_APL, SNMP_GGE, SNMP_GAUGE},	/* Gauge32 == Unsigned32  */
+	{ASN1_APL, SNMP_TIT, SNMP_TIMETICKS},
+	{ASN1_APL, SNMP_OPQ, SNMP_OPAQUE},
+	
+	/* SNMPv2 data types and errors */
+	{ASN1_UNI, ASN1_BTS, SNMP_BITSTR},
+	{ASN1_APL, SNMP_C64, SNMP_COUNTER64},
+	{ASN1_CTX, SERR_NSO, SNMP_NOSUCHOBJECT},
+	{ASN1_CTX, SERR_NSI, SNMP_NOSUCHINSTANCE},
+	{ASN1_CTX, SERR_EOM, SNMP_ENDOFMIBVIEW},
+	{0,       0,       -1}
+};
+
+static unsigned char snmp_tag_cls2syntax(unsigned int tag,
+                                         unsigned int cls,
+                                         unsigned short *syntax)
+{
+	struct snmp_cnv *cnv;
+	
+	cnv = snmp_conv;
+	
+	while (cnv->syntax != -1) {
+		if (cnv->tag == tag && cnv->class == cls) {
+			*syntax = cnv->syntax;
+			return 1;
+		}
+		cnv++;
+	}
+	return 0;
+}
+
+static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
+                                        struct snmp_object **obj)
+{
+	unsigned int cls, con, tag, len, idlen;
+	unsigned short type;
+	unsigned char *eoc, *end, *p;
+	unsigned long *lp, *id;
+	unsigned long ul;
+	long  l;
+	
+	*obj = NULL;
+	id = NULL;
+	
+	if (!asn1_header_decode(ctx, &eoc, &cls, &con, &tag))
+		return 0;
+		
+	if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
+		return 0;
+	
+	if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+		return 0;
+	
+	if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI)
+		return 0;
+	
+	if (!asn1_oid_decode(ctx, end, &id, &idlen))
+		return 0;
+		
+	if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) {
+		kfree(id);
+		return 0;
+	}
+	
+	if (con != ASN1_PRI) {
+		kfree(id);
+		return 0;
+	}
+	
+	if (!snmp_tag_cls2syntax(tag, cls, &type)) {
+		kfree(id);
+		return 0;
+	}
+	
+	switch (type) {
+		case SNMP_INTEGER:
+			len = sizeof(long);
+			if (!asn1_long_decode(ctx, end, &l)) {
+				kfree(id);
+				return 0;
+			}
+			*obj = kmalloc(sizeof(struct snmp_object) + len,
+			               GFP_ATOMIC);
+			if (*obj == NULL) {
+				kfree(id);
+				if (net_ratelimit())
+					printk("OOM in bsalg (%d)\n", __LINE__);
+				return 0;
+			}
+			(*obj)->syntax.l[0] = l;
+			break;
+		case SNMP_OCTETSTR:
+		case SNMP_OPAQUE:
+			if (!asn1_octets_decode(ctx, end, &p, &len)) {
+				kfree(id);
+				return 0;
+			}
+			*obj = kmalloc(sizeof(struct snmp_object) + len,
+			               GFP_ATOMIC);
+			if (*obj == NULL) {
+				kfree(id);
+				if (net_ratelimit())
+					printk("OOM in bsalg (%d)\n", __LINE__);
+				return 0;
+			}
+			memcpy((*obj)->syntax.c, p, len);
+			kfree(p);
+			break;
+		case SNMP_NULL:
+		case SNMP_NOSUCHOBJECT:
+		case SNMP_NOSUCHINSTANCE:
+		case SNMP_ENDOFMIBVIEW:
+			len = 0;
+			*obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC);
+			if (*obj == NULL) {
+				kfree(id);
+				if (net_ratelimit())
+					printk("OOM in bsalg (%d)\n", __LINE__);
+				return 0;
+			}
+			if (!asn1_null_decode(ctx, end)) {
+				kfree(id);
+				kfree(*obj);
+				*obj = NULL;
+				return 0;
+			}
+			break;
+		case SNMP_OBJECTID:
+			if (!asn1_oid_decode(ctx, end, (unsigned long **)&lp, &len)) {
+				kfree(id);
+				return 0;
+			}
+			len *= sizeof(unsigned long);
+			*obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
+			if (*obj == NULL) {
+				kfree(id);
+				if (net_ratelimit())
+					printk("OOM in bsalg (%d)\n", __LINE__);
+				return 0;
+			}
+			memcpy((*obj)->syntax.ul, lp, len);
+			kfree(lp);
+			break;
+		case SNMP_IPADDR:
+			if (!asn1_octets_decode(ctx, end, &p, &len)) {
+				kfree(id);
+				return 0;
+			}
+			if (len != 4) {
+				kfree(p);
+				kfree(id);
+				return 0;
+			}
+			*obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
+			if (*obj == NULL) {
+				kfree(p);
+				kfree(id);
+				if (net_ratelimit())
+					printk("OOM in bsalg (%d)\n", __LINE__);
+				return 0;
+			}
+			memcpy((*obj)->syntax.uc, p, len);
+			kfree(p);
+			break;
+		case SNMP_COUNTER:
+		case SNMP_GAUGE:
+		case SNMP_TIMETICKS:
+			len = sizeof(unsigned long);
+			if (!asn1_ulong_decode(ctx, end, &ul)) {
+				kfree(id);
+				return 0;
+			}
+			*obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
+			if (*obj == NULL) {
+				kfree(id);
+				if (net_ratelimit())
+					printk("OOM in bsalg (%d)\n", __LINE__);
+				return 0;
+			}
+			(*obj)->syntax.ul[0] = ul;
+			break;
+		default:
+			kfree(id);
+			return 0;
+	}
+	
+	(*obj)->syntax_len = len;
+	(*obj)->type = type;
+	(*obj)->id = id;
+	(*obj)->id_len = idlen;
+	
+	if (!asn1_eoc_decode(ctx, eoc)) {
+		kfree(id);
+		kfree(*obj);
+		*obj = NULL;
+		return 0;
+	}
+	return 1;
+}
+
+static unsigned char snmp_request_decode(struct asn1_ctx *ctx,
+                                         struct snmp_request *request)
+{
+	unsigned int cls, con, tag;
+	unsigned char *end;
+	
+	if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+		return 0;
+		
+	if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
+		return 0;
+		
+	if (!asn1_ulong_decode(ctx, end, &request->id))
+		return 0;
+		
+	if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+		return 0;
+		
+	if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
+		return 0;
+		
+	if (!asn1_uint_decode(ctx, end, &request->error_status))
+		return 0;
+		
+	if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+		return 0;
+		
+	if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
+		return 0;
+		
+	if (!asn1_uint_decode(ctx, end, &request->error_index))
+		return 0;
+	
+	return 1;
+}
+
+/* 
+ * Fast checksum update for possibly oddly-aligned UDP byte, from the
+ * code example in the draft.
+ */
+static void fast_csum(unsigned char *csum,
+                      const unsigned char *optr,
+                      const unsigned char *nptr,
+                      int odd)
+{
+	long x, old, new;
+	
+	x = csum[0] * 256 + csum[1];
+	
+	x =~ x & 0xFFFF;
+	
+	if (odd) old = optr[0] * 256;
+	else old = optr[0];
+	
+	x -= old & 0xFFFF;
+	if (x <= 0) {
+		x--;
+		x &= 0xFFFF;
+	}
+	
+	if (odd) new = nptr[0] * 256;
+	else new = nptr[0];
+	
+	x += new & 0xFFFF;
+	if (x & 0x10000) {
+		x++;
+		x &= 0xFFFF;
+	}
+	
+	x =~ x & 0xFFFF;
+	csum[0] = x / 256;
+	csum[1] = x & 0xFF;
+}
+
+/* 
+ * Mangle IP address.
+ * 	- begin points to the start of the snmp messgae
+ *      - addr points to the start of the address
+ */
+static inline void mangle_address(unsigned char *begin,
+                                  unsigned char *addr,
+                                  const struct oct1_map *map,
+                                  u_int16_t *check)
+{
+	if (map->from == NOCT1(*addr)) {
+		u_int32_t old;
+		
+		if (debug)
+			memcpy(&old, (unsigned char *)addr, sizeof(old));
+			
+		*addr = map->to;
+		
+		/* Update UDP checksum if being used */
+		if (*check) {
+			unsigned char odd = !((addr - begin) % 2);
+			
+			fast_csum((unsigned char *)check,
+			          &map->from, &map->to, odd);
+			          
+		}
+		
+		if (debug)
+			printk(KERN_DEBUG "bsalg: mapped %u.%u.%u.%u to "
+			       "%u.%u.%u.%u\n", NIPQUAD(old), NIPQUAD(*addr));
+	}
+}
+
+static unsigned char snmp_trap_decode(struct asn1_ctx *ctx,
+                                      struct snmp_v1_trap *trap,
+                                      const struct oct1_map *map,
+                                      u_int16_t *check)
+{
+	unsigned int cls, con, tag, len;
+	unsigned char *end;
+
+	if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+		return 0;
+		
+	if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI)
+		return 0;
+	
+	if (!asn1_oid_decode(ctx, end, &trap->id, &trap->id_len))
+		return 0;
+		
+	if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+		goto err_id_free;
+
+	if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_IPA) ||
+	      (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_OTS)))
+		goto err_id_free;
+	
+	if (!asn1_octets_decode(ctx, end, (unsigned char **)&trap->ip_address, &len))
+		goto err_id_free;
+	
+	/* IPv4 only */
+	if (len != 4)
+		goto err_addr_free;
+	
+	mangle_address(ctx->begin, ctx->pointer - 4, map, check);
+	
+	if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+		goto err_addr_free;
+		
+	if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
+		goto err_addr_free;
+		
+	if (!asn1_uint_decode(ctx, end, &trap->general))
+		goto err_addr_free;
+		
+	if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+		goto err_addr_free;
+	
+	if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
+		goto err_addr_free;
+		
+	if (!asn1_uint_decode(ctx, end, &trap->specific))
+		goto err_addr_free;
+		
+	if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+		goto err_addr_free;
+		
+	if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_TIT) ||
+	      (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_INT)))
+		goto err_addr_free;
+		
+	if (!asn1_ulong_decode(ctx, end, &trap->time))
+		goto err_addr_free;
+		
+	return 1;
+
+err_id_free:
+	kfree(trap->id);
+
+err_addr_free:
+	kfree((unsigned long *)trap->ip_address);
+	
+	return 0;
+}
+
+/*****************************************************************************
+ *
+ * Misc. routines
+ *
+ *****************************************************************************/
+
+static void hex_dump(unsigned char *buf, size_t len)
+{
+	size_t i;
+	
+	for (i = 0; i < len; i++) {
+		if (i && !(i % 16))
+			printk("\n");
+		printk("%02x ", *(buf + i));
+	}
+	printk("\n");
+}
+
+/*
+ * Parse and mangle SNMP message according to mapping.
+ * (And this is the fucking 'basic' method).
+ */
+static int snmp_parse_mangle(unsigned char *msg,
+                             u_int16_t len,
+                             const struct oct1_map *map,
+                             u_int16_t *check)
+{
+	unsigned char *eoc, *end;
+	unsigned int cls, con, tag, vers, pdutype;
+	struct asn1_ctx ctx;
+	struct asn1_octstr comm;
+	struct snmp_object **obj;
+	
+	if (debug > 1)
+		hex_dump(msg, len);
+
+	asn1_open(&ctx, msg, len);
+	
+	/* 
+	 * Start of SNMP message.
+	 */
+	if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag))
+		return 0;
+	if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
+		return 0;
+	
+	/* 
+	 * Version 1 or 2 handled.
+	 */
+	if (!asn1_header_decode(&ctx, &end, &cls, &con, &tag))
+		return 0;
+	if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
+		return 0;
+	if (!asn1_uint_decode (&ctx, end, &vers))
+		return 0;
+	if (debug > 1)
+		printk(KERN_DEBUG "bsalg: snmp version: %u\n", vers + 1);
+	if (vers > 1)
+		return 1;
+	
+	/*
+	 * Community.
+	 */
+	if (!asn1_header_decode (&ctx, &end, &cls, &con, &tag))
+		return 0;
+	if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OTS)
+		return 0;
+	if (!asn1_octets_decode(&ctx, end, &comm.data, &comm.len))
+		return 0;
+	if (debug > 1) {
+		unsigned int i;
+		
+		printk(KERN_DEBUG "bsalg: community: ");
+		for (i = 0; i < comm.len; i++)
+			printk("%c", comm.data[i]);
+		printk("\n");
+	}
+	kfree(comm.data);
+	
+	/*
+	 * PDU type
+	 */
+	if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &pdutype))
+		return 0;
+	if (cls != ASN1_CTX || con != ASN1_CON)
+		return 0;
+	if (debug > 1) {
+		unsigned char *pdus[] = {
+			[SNMP_PDU_GET] = "get",
+			[SNMP_PDU_NEXT] = "get-next",
+			[SNMP_PDU_RESPONSE] = "response",
+			[SNMP_PDU_SET] = "set",
+			[SNMP_PDU_TRAP1] = "trapv1",
+			[SNMP_PDU_BULK] = "bulk",
+			[SNMP_PDU_INFORM] = "inform",
+			[SNMP_PDU_TRAP2] = "trapv2"
+		};
+		
+		if (pdutype > SNMP_PDU_TRAP2)
+			printk(KERN_DEBUG "bsalg: bad pdu type %u\n", pdutype);
+		else
+			printk(KERN_DEBUG "bsalg: pdu: %s\n", pdus[pdutype]);
+	}
+	if (pdutype != SNMP_PDU_RESPONSE &&
+	    pdutype != SNMP_PDU_TRAP1 && pdutype != SNMP_PDU_TRAP2)
+		return 1;
+	
+	/*
+	 * Request header or v1 trap
+	 */
+	if (pdutype == SNMP_PDU_TRAP1) {
+		struct snmp_v1_trap trap;
+		unsigned char ret = snmp_trap_decode(&ctx, &trap, map, check);
+		
+		/* Discard trap allocations regardless */
+		kfree(trap.id);
+		kfree((unsigned long *)trap.ip_address);
+		
+		if (!ret)
+			return ret;
+		
+	} else {
+		struct snmp_request req;
+		
+		if (!snmp_request_decode(&ctx, &req))
+			return 0;
+			
+		if (debug > 1)
+			printk(KERN_DEBUG "bsalg: request: id=0x%lx error_status=%u "
+			"error_index=%u\n", req.id, req.error_status,
+			req.error_index);
+	}
+	
+	/*
+	 * Loop through objects, look for IP addresses to mangle.
+	 */
+	if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag))
+		return 0;
+		
+	if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
+		return 0;
+	
+	obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC);
+	if (obj == NULL) {
+		if (net_ratelimit())
+			printk(KERN_WARNING "OOM in bsalg(%d)\n", __LINE__);
+		return 0;	
+	}
+
+	while (!asn1_eoc_decode(&ctx, eoc)) {
+		unsigned int i;
+		
+		if (!snmp_object_decode(&ctx, obj)) {
+			if (*obj) {
+				if ((*obj)->id)
+					kfree((*obj)->id);
+				kfree(*obj);
+			}	
+			kfree(obj);
+			return 0;
+		}
+
+		if (debug > 1) {
+			printk(KERN_DEBUG "bsalg: object: ");
+			for (i = 0; i < (*obj)->id_len; i++) {
+				if (i > 0)
+					printk(".");
+				printk("%lu", (*obj)->id[i]);
+			}
+			printk(": type=%u\n", (*obj)->type);
+			
+		}
+
+		if ((*obj)->type == SNMP_IPADDR)
+			mangle_address(ctx.begin, ctx.pointer - 4 , map, check);
+		
+		kfree((*obj)->id);
+		kfree(*obj);
+	}
+	kfree(obj);
+	
+	if (!asn1_eoc_decode(&ctx, eoc))
+		return 0;
+		
+	return 1;
+}
+
+/*****************************************************************************
+ *
+ * NAT routines.
+ *
+ *****************************************************************************/
+
+/* 
+ * SNMP translation routine.
+ */
+static int snmp_translate(struct ip_conntrack *ct,
+                          enum ip_conntrack_info ctinfo,
+                          struct sk_buff **pskb)
+{
+	struct iphdr *iph = (*pskb)->nh.iph;
+	struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl);
+	u_int16_t udplen = ntohs(udph->len);
+	u_int16_t paylen = udplen - sizeof(struct udphdr);
+	int dir = CTINFO2DIR(ctinfo);
+	struct oct1_map map;
+
+	/*
+	 * Determine mappping for application layer addresses based
+	 * on NAT manipulations for the packet.
+	 */
+	if (dir == IP_CT_DIR_ORIGINAL) {
+		/* SNAT traps */
+		map.from = NOCT1(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip);
+		map.to = NOCT1(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip);
+	} else {
+		/* DNAT replies */
+		map.from = NOCT1(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip);
+		map.to = NOCT1(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip);
+	}
+	
+	if (map.from == map.to)
+		return NF_ACCEPT;
+	
+	if (!snmp_parse_mangle((unsigned char *)udph + sizeof(struct udphdr),
+	                       paylen, &map, &udph->check)) {
+		if (net_ratelimit())
+			printk(KERN_WARNING "bsalg: parser failed\n");
+		return NF_DROP;
+	}
+	return NF_ACCEPT;
+}
+
+/* We don't actually set up expectations, just adjust internal IP
+ * addresses if this is being NATted */
+static int help(struct sk_buff **pskb,
+		struct ip_conntrack *ct,
+		enum ip_conntrack_info ctinfo)
+{
+	int dir = CTINFO2DIR(ctinfo);
+	unsigned int ret;
+	struct iphdr *iph = (*pskb)->nh.iph;
+	struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl);
+
+	/* SNMP replies and originating SNMP traps get mangled */
+	if (udph->source == ntohs(SNMP_PORT) && dir != IP_CT_DIR_REPLY)
+		return NF_ACCEPT;
+	if (udph->dest == ntohs(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL)
+		return NF_ACCEPT;
+
+	/* No NAT? */
+	if (!(ct->status & IPS_NAT_MASK))
+		return NF_ACCEPT;
+
+	/* 
+	 * Make sure the packet length is ok.  So far, we were only guaranteed
+	 * to have a valid length IP header plus 8 bytes, which means we have
+	 * enough room for a UDP header.  Just verify the UDP length field so we
+	 * can mess around with the payload.
+	 */
+	if (ntohs(udph->len) != (*pskb)->len - (iph->ihl << 2)) {
+		 if (net_ratelimit())
+			 printk(KERN_WARNING "SNMP: dropping malformed packet "
+				"src=%u.%u.%u.%u dst=%u.%u.%u.%u\n",
+				NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
+		 return NF_DROP;
+	}
+
+	if (!skb_ip_make_writable(pskb, (*pskb)->len))
+		return NF_DROP;
+
+	spin_lock_bh(&snmp_lock);
+	ret = snmp_translate(ct, ctinfo, pskb);
+	spin_unlock_bh(&snmp_lock);
+	return ret;
+}
+
+static struct ip_conntrack_helper snmp_helper = {
+	.max_expected = 0,
+	.timeout = 180,
+	.me = THIS_MODULE,
+	.help = help,
+	.name = "snmp",
+
+	.tuple = { .src = { .u = { __constant_htons(SNMP_PORT) } },
+		   .dst = { .protonum = IPPROTO_UDP },
+	},
+	.mask = { .src = { .u = { 0xFFFF } },
+		 .dst = { .protonum = 0xFF },
+	},
+};
+
+static struct ip_conntrack_helper snmp_trap_helper = {
+	.max_expected = 0,
+	.timeout = 180,
+	.me = THIS_MODULE,
+	.help = help,
+	.name = "snmp_trap",
+
+	.tuple = { .src = { .u = { __constant_htons(SNMP_TRAP_PORT) } },
+		   .dst = { .protonum = IPPROTO_UDP },
+	},
+	.mask = { .src = { .u = { 0xFFFF } },
+		 .dst = { .protonum = 0xFF },
+	},
+};
+
+/*****************************************************************************
+ *
+ * Module stuff.
+ *
+ *****************************************************************************/
+ 
+static int __init init(void)
+{
+	int ret = 0;
+
+	ret = ip_conntrack_helper_register(&snmp_helper);
+	if (ret < 0)
+		return ret;
+	ret = ip_conntrack_helper_register(&snmp_trap_helper);
+	if (ret < 0) {
+		ip_conntrack_helper_unregister(&snmp_helper);
+		return ret;
+	}
+	return ret;
+}
+
+static void __exit fini(void)
+{
+	ip_conntrack_helper_unregister(&snmp_helper);
+	ip_conntrack_helper_unregister(&snmp_trap_helper);
+}
+
+module_init(init);
+module_exit(fini);
+
+module_param(debug, bool, 0600);
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
new file mode 100644
index 00000000000..dec4a74212c
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -0,0 +1,349 @@
+/* This file contains all the functions required for the standalone
+   ip_nat module.
+
+   These are not required by the compatibility layer.
+*/
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
+ * 	- new API and handling of conntrack/nat helpers
+ * 	- now capable of multiple expectations for one master
+ * */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/icmp.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <net/ip.h>
+#include <net/checksum.h>
+#include <linux/spinlock.h>
+
+#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock)
+#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock)
+
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_nat_rule.h>
+#include <linux/netfilter_ipv4/ip_nat_protocol.h>
+#include <linux/netfilter_ipv4/ip_nat_core.h>
+#include <linux/netfilter_ipv4/ip_nat_helper.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+#include <linux/netfilter_ipv4/listhelp.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+#define HOOKNAME(hooknum) ((hooknum) == NF_IP_POST_ROUTING ? "POST_ROUTING"  \
+			   : ((hooknum) == NF_IP_PRE_ROUTING ? "PRE_ROUTING" \
+			      : ((hooknum) == NF_IP_LOCAL_OUT ? "LOCAL_OUT"  \
+			         : ((hooknum) == NF_IP_LOCAL_IN ? "LOCAL_IN"  \
+				    : "*ERROR*")))
+
+static unsigned int
+ip_nat_fn(unsigned int hooknum,
+	  struct sk_buff **pskb,
+	  const struct net_device *in,
+	  const struct net_device *out,
+	  int (*okfn)(struct sk_buff *))
+{
+	struct ip_conntrack *ct;
+	enum ip_conntrack_info ctinfo;
+	struct ip_nat_info *info;
+	/* maniptype == SRC for postrouting. */
+	enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum);
+
+	/* We never see fragments: conntrack defrags on pre-routing
+	   and local-out, and ip_nat_out protects post-routing. */
+	IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off
+		       & htons(IP_MF|IP_OFFSET)));
+
+	(*pskb)->nfcache |= NFC_UNKNOWN;
+
+	/* If we had a hardware checksum before, it's now invalid */
+	if ((*pskb)->ip_summed == CHECKSUM_HW)
+		if (skb_checksum_help(*pskb, (out == NULL)))
+			return NF_DROP;
+
+	ct = ip_conntrack_get(*pskb, &ctinfo);
+	/* Can't track?  It's not due to stress, or conntrack would
+	   have dropped it.  Hence it's the user's responsibilty to
+	   packet filter it out, or implement conntrack/NAT for that
+	   protocol. 8) --RR */
+	if (!ct) {
+		/* Exception: ICMP redirect to new connection (not in
+                   hash table yet).  We must not let this through, in
+                   case we're doing NAT to the same network. */
+		if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
+			struct icmphdr _hdr, *hp;
+
+			hp = skb_header_pointer(*pskb,
+						(*pskb)->nh.iph->ihl*4,
+						sizeof(_hdr), &_hdr);
+			if (hp != NULL &&
+			    hp->type == ICMP_REDIRECT)
+				return NF_DROP;
+		}
+		return NF_ACCEPT;
+	}
+
+	switch (ctinfo) {
+	case IP_CT_RELATED:
+	case IP_CT_RELATED+IP_CT_IS_REPLY:
+		if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
+			if (!icmp_reply_translation(pskb, ct, maniptype,
+						    CTINFO2DIR(ctinfo)))
+				return NF_DROP;
+			else
+				return NF_ACCEPT;
+		}
+		/* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
+	case IP_CT_NEW:
+		info = &ct->nat.info;
+
+		/* Seen it before?  This can happen for loopback, retrans,
+		   or local packets.. */
+		if (!ip_nat_initialized(ct, maniptype)) {
+			unsigned int ret;
+
+			/* LOCAL_IN hook doesn't have a chain!  */
+			if (hooknum == NF_IP_LOCAL_IN)
+				ret = alloc_null_binding(ct, info, hooknum);
+			else
+				ret = ip_nat_rule_find(pskb, hooknum,
+						       in, out, ct,
+						       info);
+
+			if (ret != NF_ACCEPT) {
+				return ret;
+			}
+		} else
+			DEBUGP("Already setup manip %s for ct %p\n",
+			       maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST",
+			       ct);
+		break;
+
+	default:
+		/* ESTABLISHED */
+		IP_NF_ASSERT(ctinfo == IP_CT_ESTABLISHED
+			     || ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY));
+		info = &ct->nat.info;
+	}
+
+	IP_NF_ASSERT(info);
+	return nat_packet(ct, ctinfo, hooknum, pskb);
+}
+
+static unsigned int
+ip_nat_in(unsigned int hooknum,
+          struct sk_buff **pskb,
+          const struct net_device *in,
+          const struct net_device *out,
+          int (*okfn)(struct sk_buff *))
+{
+	u_int32_t saddr, daddr;
+	unsigned int ret;
+
+	saddr = (*pskb)->nh.iph->saddr;
+	daddr = (*pskb)->nh.iph->daddr;
+
+	ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
+	if (ret != NF_DROP && ret != NF_STOLEN
+	    && ((*pskb)->nh.iph->saddr != saddr
+	        || (*pskb)->nh.iph->daddr != daddr)) {
+		dst_release((*pskb)->dst);
+		(*pskb)->dst = NULL;
+	}
+	return ret;
+}
+
+static unsigned int
+ip_nat_out(unsigned int hooknum,
+	   struct sk_buff **pskb,
+	   const struct net_device *in,
+	   const struct net_device *out,
+	   int (*okfn)(struct sk_buff *))
+{
+	/* root is playing with raw sockets. */
+	if ((*pskb)->len < sizeof(struct iphdr)
+	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
+		return NF_ACCEPT;
+
+	/* We can hit fragment here; forwarded packets get
+	   defragmented by connection tracking coming in, then
+	   fragmented (grr) by the forward code.
+
+	   In future: If we have nfct != NULL, AND we have NAT
+	   initialized, AND there is no helper, then we can do full
+	   NAPT on the head, and IP-address-only NAT on the rest.
+
+	   I'm starting to have nightmares about fragments.  */
+
+	if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+		*pskb = ip_ct_gather_frags(*pskb, IP_DEFRAG_NAT_OUT);
+
+		if (!*pskb)
+			return NF_STOLEN;
+	}
+
+	return ip_nat_fn(hooknum, pskb, in, out, okfn);
+}
+
+static unsigned int
+ip_nat_local_fn(unsigned int hooknum,
+		struct sk_buff **pskb,
+		const struct net_device *in,
+		const struct net_device *out,
+		int (*okfn)(struct sk_buff *))
+{
+	u_int32_t saddr, daddr;
+	unsigned int ret;
+
+	/* root is playing with raw sockets. */
+	if ((*pskb)->len < sizeof(struct iphdr)
+	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
+		return NF_ACCEPT;
+
+	saddr = (*pskb)->nh.iph->saddr;
+	daddr = (*pskb)->nh.iph->daddr;
+
+	ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
+	if (ret != NF_DROP && ret != NF_STOLEN
+	    && ((*pskb)->nh.iph->saddr != saddr
+		|| (*pskb)->nh.iph->daddr != daddr))
+		return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
+	return ret;
+}
+
+/* We must be after connection tracking and before packet filtering. */
+
+/* Before packet filtering, change destination */
+static struct nf_hook_ops ip_nat_in_ops = {
+	.hook		= ip_nat_in,
+	.owner		= THIS_MODULE,
+	.pf		= PF_INET,
+	.hooknum	= NF_IP_PRE_ROUTING,
+	.priority	= NF_IP_PRI_NAT_DST,
+};
+
+/* After packet filtering, change source */
+static struct nf_hook_ops ip_nat_out_ops = {
+	.hook		= ip_nat_out,
+	.owner		= THIS_MODULE,
+	.pf		= PF_INET,
+	.hooknum	= NF_IP_POST_ROUTING,
+	.priority	= NF_IP_PRI_NAT_SRC,
+};
+
+/* Before packet filtering, change destination */
+static struct nf_hook_ops ip_nat_local_out_ops = {
+	.hook		= ip_nat_local_fn,
+	.owner		= THIS_MODULE,
+	.pf		= PF_INET,
+	.hooknum	= NF_IP_LOCAL_OUT,
+	.priority	= NF_IP_PRI_NAT_DST,
+};
+
+/* After packet filtering, change source for reply packets of LOCAL_OUT DNAT */
+static struct nf_hook_ops ip_nat_local_in_ops = {
+	.hook		= ip_nat_fn,
+	.owner		= THIS_MODULE,
+	.pf		= PF_INET,
+	.hooknum	= NF_IP_LOCAL_IN,
+	.priority	= NF_IP_PRI_NAT_SRC,
+};
+
+static int init_or_cleanup(int init)
+{
+	int ret = 0;
+
+	need_ip_conntrack();
+
+	if (!init) goto cleanup;
+
+	ret = ip_nat_rule_init();
+	if (ret < 0) {
+		printk("ip_nat_init: can't setup rules.\n");
+		goto cleanup_nothing;
+	}
+	ret = ip_nat_init();
+	if (ret < 0) {
+		printk("ip_nat_init: can't setup rules.\n");
+		goto cleanup_rule_init;
+	}
+	ret = nf_register_hook(&ip_nat_in_ops);
+	if (ret < 0) {
+		printk("ip_nat_init: can't register in hook.\n");
+		goto cleanup_nat;
+	}
+	ret = nf_register_hook(&ip_nat_out_ops);
+	if (ret < 0) {
+		printk("ip_nat_init: can't register out hook.\n");
+		goto cleanup_inops;
+	}
+	ret = nf_register_hook(&ip_nat_local_out_ops);
+	if (ret < 0) {
+		printk("ip_nat_init: can't register local out hook.\n");
+		goto cleanup_outops;
+	}
+	ret = nf_register_hook(&ip_nat_local_in_ops);
+	if (ret < 0) {
+		printk("ip_nat_init: can't register local in hook.\n");
+		goto cleanup_localoutops;
+	}
+	return ret;
+
+ cleanup:
+	nf_unregister_hook(&ip_nat_local_in_ops);
+ cleanup_localoutops:
+	nf_unregister_hook(&ip_nat_local_out_ops);
+ cleanup_outops:
+	nf_unregister_hook(&ip_nat_out_ops);
+ cleanup_inops:
+	nf_unregister_hook(&ip_nat_in_ops);
+ cleanup_nat:
+	ip_nat_cleanup();
+ cleanup_rule_init:
+	ip_nat_rule_cleanup();
+ cleanup_nothing:
+	MUST_BE_READ_WRITE_UNLOCKED(&ip_nat_lock);
+	return ret;
+}
+
+static int __init init(void)
+{
+	return init_or_cleanup(1);
+}
+
+static void __exit fini(void)
+{
+	init_or_cleanup(0);
+}
+
+module_init(init);
+module_exit(fini);
+
+EXPORT_SYMBOL(ip_nat_setup_info);
+EXPORT_SYMBOL(ip_nat_protocol_register);
+EXPORT_SYMBOL(ip_nat_protocol_unregister);
+EXPORT_SYMBOL(ip_nat_cheat_check);
+EXPORT_SYMBOL(ip_nat_mangle_tcp_packet);
+EXPORT_SYMBOL(ip_nat_mangle_udp_packet);
+EXPORT_SYMBOL(ip_nat_used_tuple);
+EXPORT_SYMBOL(ip_nat_follow_master);
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/ip_nat_tftp.c b/net/ipv4/netfilter/ip_nat_tftp.c
new file mode 100644
index 00000000000..0343e0d6467
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_tftp.c
@@ -0,0 +1,70 @@
+/* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Version: 0.0.7
+ *
+ * Thu 21 Mar 2002 Harald Welte <laforge@gnumonks.org>
+ * 	- Port to newnat API
+ *
+ * This module currently supports DNAT:
+ * iptables -t nat -A PREROUTING -d x.x.x.x -j DNAT --to-dest x.x.x.y
+ *
+ * and SNAT:
+ * iptables -t nat -A POSTROUTING { -j MASQUERADE , -j SNAT --to-source x.x.x.x }
+ *
+ * It has not been tested with
+ * -j SNAT --to-source x.x.x.x-x.x.x.y since I only have one external ip
+ * If you do test this please let me know if it works or not.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_tftp.h>
+#include <linux/netfilter_ipv4/ip_nat_helper.h>
+#include <linux/netfilter_ipv4/ip_nat_rule.h>
+#include <linux/moduleparam.h>
+
+MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
+MODULE_DESCRIPTION("tftp NAT helper");
+MODULE_LICENSE("GPL");
+
+static unsigned int help(struct sk_buff **pskb,
+			 enum ip_conntrack_info ctinfo,
+			 struct ip_conntrack_expect *exp)
+{
+	exp->saved_proto.udp.port = exp->tuple.dst.u.tcp.port;
+	exp->dir = IP_CT_DIR_REPLY;
+	exp->expectfn = ip_nat_follow_master;
+	if (ip_conntrack_expect_related(exp) != 0) {
+		ip_conntrack_expect_free(exp);
+		return NF_DROP;
+	}
+	return NF_ACCEPT;
+}
+
+static void __exit fini(void)
+{
+	ip_nat_tftp_hook = NULL;
+	/* Make sure noone calls it, meanwhile. */
+	synchronize_net();
+}
+
+static int __init init(void)
+{
+	BUG_ON(ip_nat_tftp_hook);
+	ip_nat_tftp_hook = help;
+	return 0;
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
new file mode 100644
index 00000000000..9e40dffc204
--- /dev/null
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -0,0 +1,741 @@
+/*
+ * This is a module which is used for queueing IPv4 packets and
+ * communicating with userspace via netlink.
+ *
+ * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * 2000-03-27: Simplified code (thanks to Andi Kleen for clues).
+ * 2000-05-20: Fixed notifier problems (following Miguel Freitas' report).
+ * 2000-06-19: Fixed so nfmark is copied to metadata (reported by Sebastian 
+ *             Zander).
+ * 2000-08-01: Added Nick Williams' MAC support.
+ * 2002-06-25: Code cleanup.
+ * 2005-01-10: Added /proc counter for dropped packets; fixed so
+ *             packets aren't delivered to user space if they're going 
+ *             to be dropped. 
+ *
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/notifier.h>
+#include <linux/netdevice.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4/ip_queue.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netlink.h>
+#include <linux/spinlock.h>
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include <linux/security.h>
+#include <net/sock.h>
+#include <net/route.h>
+
+#define IPQ_QMAX_DEFAULT 1024
+#define IPQ_PROC_FS_NAME "ip_queue"
+#define NET_IPQ_QMAX 2088
+#define NET_IPQ_QMAX_NAME "ip_queue_maxlen"
+
+struct ipq_rt_info {
+	__u8 tos;
+	__u32 daddr;
+	__u32 saddr;
+};
+
+struct ipq_queue_entry {
+	struct list_head list;
+	struct nf_info *info;
+	struct sk_buff *skb;
+	struct ipq_rt_info rt_info;
+};
+
+typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long);
+
+static unsigned char copy_mode = IPQ_COPY_NONE;
+static unsigned int queue_maxlen = IPQ_QMAX_DEFAULT;
+static DEFINE_RWLOCK(queue_lock);
+static int peer_pid;
+static unsigned int copy_range;
+static unsigned int queue_total;
+static unsigned int queue_dropped = 0;
+static unsigned int queue_user_dropped = 0;
+static struct sock *ipqnl;
+static LIST_HEAD(queue_list);
+static DECLARE_MUTEX(ipqnl_sem);
+
+static void
+ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
+{
+	nf_reinject(entry->skb, entry->info, verdict);
+	kfree(entry);
+}
+
+static inline void
+__ipq_enqueue_entry(struct ipq_queue_entry *entry)
+{
+       list_add(&entry->list, &queue_list);
+       queue_total++;
+}
+
+/*
+ * Find and return a queued entry matched by cmpfn, or return the last
+ * entry if cmpfn is NULL.
+ */
+static inline struct ipq_queue_entry *
+__ipq_find_entry(ipq_cmpfn cmpfn, unsigned long data)
+{
+	struct list_head *p;
+
+	list_for_each_prev(p, &queue_list) {
+		struct ipq_queue_entry *entry = (struct ipq_queue_entry *)p;
+		
+		if (!cmpfn || cmpfn(entry, data))
+			return entry;
+	}
+	return NULL;
+}
+
+static inline void
+__ipq_dequeue_entry(struct ipq_queue_entry *entry)
+{
+	list_del(&entry->list);
+	queue_total--;
+}
+
+static inline struct ipq_queue_entry *
+__ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data)
+{
+	struct ipq_queue_entry *entry;
+
+	entry = __ipq_find_entry(cmpfn, data);
+	if (entry == NULL)
+		return NULL;
+
+	__ipq_dequeue_entry(entry);
+	return entry;
+}
+
+
+static inline void
+__ipq_flush(int verdict)
+{
+	struct ipq_queue_entry *entry;
+	
+	while ((entry = __ipq_find_dequeue_entry(NULL, 0)))
+		ipq_issue_verdict(entry, verdict);
+}
+
+static inline int
+__ipq_set_mode(unsigned char mode, unsigned int range)
+{
+	int status = 0;
+	
+	switch(mode) {
+	case IPQ_COPY_NONE:
+	case IPQ_COPY_META:
+		copy_mode = mode;
+		copy_range = 0;
+		break;
+		
+	case IPQ_COPY_PACKET:
+		copy_mode = mode;
+		copy_range = range;
+		if (copy_range > 0xFFFF)
+			copy_range = 0xFFFF;
+		break;
+		
+	default:
+		status = -EINVAL;
+
+	}
+	return status;
+}
+
+static inline void
+__ipq_reset(void)
+{
+	peer_pid = 0;
+	net_disable_timestamp();
+	__ipq_set_mode(IPQ_COPY_NONE, 0);
+	__ipq_flush(NF_DROP);
+}
+
+static struct ipq_queue_entry *
+ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data)
+{
+	struct ipq_queue_entry *entry;
+	
+	write_lock_bh(&queue_lock);
+	entry = __ipq_find_dequeue_entry(cmpfn, data);
+	write_unlock_bh(&queue_lock);
+	return entry;
+}
+
+static void
+ipq_flush(int verdict)
+{
+	write_lock_bh(&queue_lock);
+	__ipq_flush(verdict);
+	write_unlock_bh(&queue_lock);
+}
+
+static struct sk_buff *
+ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
+{
+	unsigned char *old_tail;
+	size_t size = 0;
+	size_t data_len = 0;
+	struct sk_buff *skb;
+	struct ipq_packet_msg *pmsg;
+	struct nlmsghdr *nlh;
+
+	read_lock_bh(&queue_lock);
+	
+	switch (copy_mode) {
+	case IPQ_COPY_META:
+	case IPQ_COPY_NONE:
+		size = NLMSG_SPACE(sizeof(*pmsg));
+		data_len = 0;
+		break;
+	
+	case IPQ_COPY_PACKET:
+		if (copy_range == 0 || copy_range > entry->skb->len)
+			data_len = entry->skb->len;
+		else
+			data_len = copy_range;
+		
+		size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
+		break;
+	
+	default:
+		*errp = -EINVAL;
+		read_unlock_bh(&queue_lock);
+		return NULL;
+	}
+
+	read_unlock_bh(&queue_lock);
+
+	skb = alloc_skb(size, GFP_ATOMIC);
+	if (!skb)
+		goto nlmsg_failure;
+		
+	old_tail= skb->tail;
+	nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
+	pmsg = NLMSG_DATA(nlh);
+	memset(pmsg, 0, sizeof(*pmsg));
+
+	pmsg->packet_id       = (unsigned long )entry;
+	pmsg->data_len        = data_len;
+	pmsg->timestamp_sec   = entry->skb->stamp.tv_sec;
+	pmsg->timestamp_usec  = entry->skb->stamp.tv_usec;
+	pmsg->mark            = entry->skb->nfmark;
+	pmsg->hook            = entry->info->hook;
+	pmsg->hw_protocol     = entry->skb->protocol;
+	
+	if (entry->info->indev)
+		strcpy(pmsg->indev_name, entry->info->indev->name);
+	else
+		pmsg->indev_name[0] = '\0';
+	
+	if (entry->info->outdev)
+		strcpy(pmsg->outdev_name, entry->info->outdev->name);
+	else
+		pmsg->outdev_name[0] = '\0';
+	
+	if (entry->info->indev && entry->skb->dev) {
+		pmsg->hw_type = entry->skb->dev->type;
+		if (entry->skb->dev->hard_header_parse)
+			pmsg->hw_addrlen =
+				entry->skb->dev->hard_header_parse(entry->skb,
+				                                   pmsg->hw_addr);
+	}
+	
+	if (data_len)
+		if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len))
+			BUG();
+		
+	nlh->nlmsg_len = skb->tail - old_tail;
+	return skb;
+
+nlmsg_failure:
+	if (skb)
+		kfree_skb(skb);
+	*errp = -EINVAL;
+	printk(KERN_ERR "ip_queue: error creating packet message\n");
+	return NULL;
+}
+
+static int
+ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data)
+{
+	int status = -EINVAL;
+	struct sk_buff *nskb;
+	struct ipq_queue_entry *entry;
+
+	if (copy_mode == IPQ_COPY_NONE)
+		return -EAGAIN;
+
+	entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
+	if (entry == NULL) {
+		printk(KERN_ERR "ip_queue: OOM in ipq_enqueue_packet()\n");
+		return -ENOMEM;
+	}
+
+	entry->info = info;
+	entry->skb = skb;
+
+	if (entry->info->hook == NF_IP_LOCAL_OUT) {
+		struct iphdr *iph = skb->nh.iph;
+
+		entry->rt_info.tos = iph->tos;
+		entry->rt_info.daddr = iph->daddr;
+		entry->rt_info.saddr = iph->saddr;
+	}
+
+	nskb = ipq_build_packet_message(entry, &status);
+	if (nskb == NULL)
+		goto err_out_free;
+		
+	write_lock_bh(&queue_lock);
+	
+	if (!peer_pid)
+		goto err_out_free_nskb; 
+
+	if (queue_total >= queue_maxlen) {
+                queue_dropped++;
+		status = -ENOSPC;
+		if (net_ratelimit())
+		          printk (KERN_WARNING "ip_queue: full at %d entries, "
+				  "dropping packets(s). Dropped: %d\n", queue_total,
+				  queue_dropped);
+		goto err_out_free_nskb;
+	}
+
+ 	/* netlink_unicast will either free the nskb or attach it to a socket */ 
+	status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
+	if (status < 0) {
+	        queue_user_dropped++;
+		goto err_out_unlock;
+	}
+
+	__ipq_enqueue_entry(entry);
+
+	write_unlock_bh(&queue_lock);
+	return status;
+
+err_out_free_nskb:
+	kfree_skb(nskb); 
+	
+err_out_unlock:
+	write_unlock_bh(&queue_lock);
+
+err_out_free:
+	kfree(entry);
+	return status;
+}
+
+static int
+ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
+{
+	int diff;
+	struct iphdr *user_iph = (struct iphdr *)v->payload;
+
+	if (v->data_len < sizeof(*user_iph))
+		return 0;
+	diff = v->data_len - e->skb->len;
+	if (diff < 0)
+		skb_trim(e->skb, v->data_len);
+	else if (diff > 0) {
+		if (v->data_len > 0xFFFF)
+			return -EINVAL;
+		if (diff > skb_tailroom(e->skb)) {
+			struct sk_buff *newskb;
+			
+			newskb = skb_copy_expand(e->skb,
+			                         skb_headroom(e->skb),
+			                         diff,
+			                         GFP_ATOMIC);
+			if (newskb == NULL) {
+				printk(KERN_WARNING "ip_queue: OOM "
+				      "in mangle, dropping packet\n");
+				return -ENOMEM;
+			}
+			if (e->skb->sk)
+				skb_set_owner_w(newskb, e->skb->sk);
+			kfree_skb(e->skb);
+			e->skb = newskb;
+		}
+		skb_put(e->skb, diff);
+	}
+	if (!skb_ip_make_writable(&e->skb, v->data_len))
+		return -ENOMEM;
+	memcpy(e->skb->data, v->payload, v->data_len);
+	e->skb->nfcache |= NFC_ALTERED;
+
+	/*
+	 * Extra routing may needed on local out, as the QUEUE target never
+	 * returns control to the table.
+	 */
+	if (e->info->hook == NF_IP_LOCAL_OUT) {
+		struct iphdr *iph = e->skb->nh.iph;
+
+		if (!(iph->tos == e->rt_info.tos
+		      && iph->daddr == e->rt_info.daddr
+		      && iph->saddr == e->rt_info.saddr))
+			return ip_route_me_harder(&e->skb);
+	}
+	return 0;
+}
+
+static inline int
+id_cmp(struct ipq_queue_entry *e, unsigned long id)
+{
+	return (id == (unsigned long )e);
+}
+
+static int
+ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
+{
+	struct ipq_queue_entry *entry;
+
+	if (vmsg->value > NF_MAX_VERDICT)
+		return -EINVAL;
+
+	entry = ipq_find_dequeue_entry(id_cmp, vmsg->id);
+	if (entry == NULL)
+		return -ENOENT;
+	else {
+		int verdict = vmsg->value;
+		
+		if (vmsg->data_len && vmsg->data_len == len)
+			if (ipq_mangle_ipv4(vmsg, entry) < 0)
+				verdict = NF_DROP;
+		
+		ipq_issue_verdict(entry, verdict);
+		return 0;
+	}
+}
+
+static int
+ipq_set_mode(unsigned char mode, unsigned int range)
+{
+	int status;
+
+	write_lock_bh(&queue_lock);
+	status = __ipq_set_mode(mode, range);
+	write_unlock_bh(&queue_lock);
+	return status;
+}
+
+static int
+ipq_receive_peer(struct ipq_peer_msg *pmsg,
+                 unsigned char type, unsigned int len)
+{
+	int status = 0;
+
+	if (len < sizeof(*pmsg))
+		return -EINVAL;
+
+	switch (type) {
+	case IPQM_MODE:
+		status = ipq_set_mode(pmsg->msg.mode.value,
+		                      pmsg->msg.mode.range);
+		break;
+		
+	case IPQM_VERDICT:
+		if (pmsg->msg.verdict.value > NF_MAX_VERDICT)
+			status = -EINVAL;
+		else
+			status = ipq_set_verdict(&pmsg->msg.verdict,
+			                         len - sizeof(*pmsg));
+			break;
+	default:
+		status = -EINVAL;
+	}
+	return status;
+}
+
+static int
+dev_cmp(struct ipq_queue_entry *entry, unsigned long ifindex)
+{
+	if (entry->info->indev)
+		if (entry->info->indev->ifindex == ifindex)
+			return 1;
+			
+	if (entry->info->outdev)
+		if (entry->info->outdev->ifindex == ifindex)
+			return 1;
+
+	return 0;
+}
+
+static void
+ipq_dev_drop(int ifindex)
+{
+	struct ipq_queue_entry *entry;
+	
+	while ((entry = ipq_find_dequeue_entry(dev_cmp, ifindex)) != NULL)
+		ipq_issue_verdict(entry, NF_DROP);
+}
+
+#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
+
+static inline void
+ipq_rcv_skb(struct sk_buff *skb)
+{
+	int status, type, pid, flags, nlmsglen, skblen;
+	struct nlmsghdr *nlh;
+
+	skblen = skb->len;
+	if (skblen < sizeof(*nlh))
+		return;
+
+	nlh = (struct nlmsghdr *)skb->data;
+	nlmsglen = nlh->nlmsg_len;
+	if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
+		return;
+
+	pid = nlh->nlmsg_pid;
+	flags = nlh->nlmsg_flags;
+	
+	if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI)
+		RCV_SKB_FAIL(-EINVAL);
+		
+	if (flags & MSG_TRUNC)
+		RCV_SKB_FAIL(-ECOMM);
+		
+	type = nlh->nlmsg_type;
+	if (type < NLMSG_NOOP || type >= IPQM_MAX)
+		RCV_SKB_FAIL(-EINVAL);
+		
+	if (type <= IPQM_BASE)
+		return;
+		
+	if (security_netlink_recv(skb))
+		RCV_SKB_FAIL(-EPERM);
+	
+	write_lock_bh(&queue_lock);
+	
+	if (peer_pid) {
+		if (peer_pid != pid) {
+			write_unlock_bh(&queue_lock);
+			RCV_SKB_FAIL(-EBUSY);
+		}
+	} else {
+		net_enable_timestamp();
+		peer_pid = pid;
+	}
+		
+	write_unlock_bh(&queue_lock);
+	
+	status = ipq_receive_peer(NLMSG_DATA(nlh), type,
+	                          skblen - NLMSG_LENGTH(0));
+	if (status < 0)
+		RCV_SKB_FAIL(status);
+		
+	if (flags & NLM_F_ACK)
+		netlink_ack(skb, nlh, 0);
+        return;
+}
+
+static void
+ipq_rcv_sk(struct sock *sk, int len)
+{
+	do {
+		struct sk_buff *skb;
+
+		if (down_trylock(&ipqnl_sem))
+			return;
+			
+		while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+			ipq_rcv_skb(skb);
+			kfree_skb(skb);
+		}
+		
+		up(&ipqnl_sem);
+
+	} while (ipqnl && ipqnl->sk_receive_queue.qlen);
+}
+
+static int
+ipq_rcv_dev_event(struct notifier_block *this,
+                  unsigned long event, void *ptr)
+{
+	struct net_device *dev = ptr;
+
+	/* Drop any packets associated with the downed device */
+	if (event == NETDEV_DOWN)
+		ipq_dev_drop(dev->ifindex);
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block ipq_dev_notifier = {
+	.notifier_call	= ipq_rcv_dev_event,
+};
+
+static int
+ipq_rcv_nl_event(struct notifier_block *this,
+                 unsigned long event, void *ptr)
+{
+	struct netlink_notify *n = ptr;
+
+	if (event == NETLINK_URELEASE &&
+	    n->protocol == NETLINK_FIREWALL && n->pid) {
+		write_lock_bh(&queue_lock);
+		if (n->pid == peer_pid)
+			__ipq_reset();
+		write_unlock_bh(&queue_lock);
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block ipq_nl_notifier = {
+	.notifier_call	= ipq_rcv_nl_event,
+};
+
+static struct ctl_table_header *ipq_sysctl_header;
+
+static ctl_table ipq_table[] = {
+	{
+		.ctl_name	= NET_IPQ_QMAX,
+		.procname	= NET_IPQ_QMAX_NAME,
+		.data		= &queue_maxlen,
+		.maxlen		= sizeof(queue_maxlen),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+ 	{ .ctl_name = 0 }
+};
+
+static ctl_table ipq_dir_table[] = {
+	{
+		.ctl_name	= NET_IPV4,
+		.procname	= "ipv4",
+		.mode		= 0555,
+		.child		= ipq_table
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table ipq_root_table[] = {
+	{
+		.ctl_name	= CTL_NET,
+		.procname	= "net",
+		.mode		= 0555,
+		.child		= ipq_dir_table
+	},
+	{ .ctl_name = 0 }
+};
+
+#ifdef CONFIG_PROC_FS
+static int
+ipq_get_info(char *buffer, char **start, off_t offset, int length)
+{
+	int len;
+
+	read_lock_bh(&queue_lock);
+	
+	len = sprintf(buffer,
+	              "Peer PID          : %d\n"
+	              "Copy mode         : %hu\n"
+	              "Copy range        : %u\n"
+	              "Queue length      : %u\n"
+	              "Queue max. length : %u\n"
+		      "Queue dropped     : %u\n"
+		      "Netlink dropped   : %u\n",
+	              peer_pid,
+	              copy_mode,
+	              copy_range,
+	              queue_total,
+	              queue_maxlen,
+		      queue_dropped,
+		      queue_user_dropped);
+
+	read_unlock_bh(&queue_lock);
+	
+	*start = buffer + offset;
+	len -= offset;
+	if (len > length)
+		len = length;
+	else if (len < 0)
+		len = 0;
+	return len;
+}
+#endif /* CONFIG_PROC_FS */
+
+static int
+init_or_cleanup(int init)
+{
+	int status = -ENOMEM;
+	struct proc_dir_entry *proc;
+	
+	if (!init)
+		goto cleanup;
+
+	netlink_register_notifier(&ipq_nl_notifier);
+	ipqnl = netlink_kernel_create(NETLINK_FIREWALL, ipq_rcv_sk);
+	if (ipqnl == NULL) {
+		printk(KERN_ERR "ip_queue: failed to create netlink socket\n");
+		goto cleanup_netlink_notifier;
+	}
+
+	proc = proc_net_create(IPQ_PROC_FS_NAME, 0, ipq_get_info);
+	if (proc)
+		proc->owner = THIS_MODULE;
+	else {
+		printk(KERN_ERR "ip_queue: failed to create proc entry\n");
+		goto cleanup_ipqnl;
+	}
+	
+	register_netdevice_notifier(&ipq_dev_notifier);
+	ipq_sysctl_header = register_sysctl_table(ipq_root_table, 0);
+	
+	status = nf_register_queue_handler(PF_INET, ipq_enqueue_packet, NULL);
+	if (status < 0) {
+		printk(KERN_ERR "ip_queue: failed to register queue handler\n");
+		goto cleanup_sysctl;
+	}
+	return status;
+
+cleanup:
+	nf_unregister_queue_handler(PF_INET);
+	synchronize_net();
+	ipq_flush(NF_DROP);
+	
+cleanup_sysctl:
+	unregister_sysctl_table(ipq_sysctl_header);
+	unregister_netdevice_notifier(&ipq_dev_notifier);
+	proc_net_remove(IPQ_PROC_FS_NAME);
+	
+cleanup_ipqnl:
+	sock_release(ipqnl->sk_socket);
+	down(&ipqnl_sem);
+	up(&ipqnl_sem);
+	
+cleanup_netlink_notifier:
+	netlink_unregister_notifier(&ipq_nl_notifier);
+	return status;
+}
+
+static int __init init(void)
+{
+	
+	return init_or_cleanup(1);
+}
+
+static void __exit fini(void)
+{
+	init_or_cleanup(0);
+}
+
+MODULE_DESCRIPTION("IPv4 packet queue handler");
+MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
+MODULE_LICENSE("GPL");
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
new file mode 100644
index 00000000000..8a54f92b849
--- /dev/null
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -0,0 +1,1964 @@
+/*
+ * Packet matching code.
+ *
+ * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
+ * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
+ * 	- increase module usage count as soon as we have rules inside
+ * 	  a table
+ */
+#include <linux/config.h>
+#include <linux/cache.h>
+#include <linux/skbuff.h>
+#include <linux/kmod.h>
+#include <linux/vmalloc.h>
+#include <linux/netdevice.h>
+#include <linux/module.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/icmp.h>
+#include <net/ip.h>
+#include <asm/uaccess.h>
+#include <asm/semaphore.h>
+#include <linux/proc_fs.h>
+#include <linux/err.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("IPv4 packet filter");
+
+/*#define DEBUG_IP_FIREWALL*/
+/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
+/*#define DEBUG_IP_FIREWALL_USER*/
+
+#ifdef DEBUG_IP_FIREWALL
+#define dprintf(format, args...)  printk(format , ## args)
+#else
+#define dprintf(format, args...)
+#endif
+
+#ifdef DEBUG_IP_FIREWALL_USER
+#define duprintf(format, args...) printk(format , ## args)
+#else
+#define duprintf(format, args...)
+#endif
+
+#ifdef CONFIG_NETFILTER_DEBUG
+#define IP_NF_ASSERT(x)						\
+do {								\
+	if (!(x))						\
+		printk("IP_NF_ASSERT: %s:%s:%u\n",		\
+		       __FUNCTION__, __FILE__, __LINE__);	\
+} while(0)
+#else
+#define IP_NF_ASSERT(x)
+#endif
+#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
+
+static DECLARE_MUTEX(ipt_mutex);
+
+/* Must have mutex */
+#define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
+#define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
+#include <linux/netfilter_ipv4/lockhelp.h>
+#include <linux/netfilter_ipv4/listhelp.h>
+
+#if 0
+/* All the better to debug you with... */
+#define static
+#define inline
+#endif
+
+/*
+   We keep a set of rules for each CPU, so we can avoid write-locking
+   them in the softirq when updating the counters and therefore
+   only need to read-lock in the softirq; doing a write_lock_bh() in user
+   context stops packets coming through and allows user context to read
+   the counters or update the rules.
+
+   To be cache friendly on SMP, we arrange them like so:
+   [ n-entries ]
+   ... cache-align padding ...
+   [ n-entries ]
+
+   Hence the start of any table is given by get_table() below.  */
+
+/* The table itself */
+struct ipt_table_info
+{
+	/* Size per table */
+	unsigned int size;
+	/* Number of entries: FIXME. --RR */
+	unsigned int number;
+	/* Initial number of entries. Needed for module usage count */
+	unsigned int initial_entries;
+
+	/* Entry points and underflows */
+	unsigned int hook_entry[NF_IP_NUMHOOKS];
+	unsigned int underflow[NF_IP_NUMHOOKS];
+
+	/* ipt_entry tables: one per CPU */
+	char entries[0] ____cacheline_aligned;
+};
+
+static LIST_HEAD(ipt_target);
+static LIST_HEAD(ipt_match);
+static LIST_HEAD(ipt_tables);
+#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
+
+#ifdef CONFIG_SMP
+#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
+#else
+#define TABLE_OFFSET(t,p) 0
+#endif
+
+#if 0
+#define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
+#define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
+#define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
+#endif
+
+/* Returns whether matches rule or not. */
+static inline int
+ip_packet_match(const struct iphdr *ip,
+		const char *indev,
+		const char *outdev,
+		const struct ipt_ip *ipinfo,
+		int isfrag)
+{
+	size_t i;
+	unsigned long ret;
+
+#define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
+
+	if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
+		  IPT_INV_SRCIP)
+	    || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
+		     IPT_INV_DSTIP)) {
+		dprintf("Source or dest mismatch.\n");
+
+		dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
+			NIPQUAD(ip->saddr),
+			NIPQUAD(ipinfo->smsk.s_addr),
+			NIPQUAD(ipinfo->src.s_addr),
+			ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
+		dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
+			NIPQUAD(ip->daddr),
+			NIPQUAD(ipinfo->dmsk.s_addr),
+			NIPQUAD(ipinfo->dst.s_addr),
+			ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
+		return 0;
+	}
+
+	/* Look for ifname matches; this should unroll nicely. */
+	for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
+		ret |= (((const unsigned long *)indev)[i]
+			^ ((const unsigned long *)ipinfo->iniface)[i])
+			& ((const unsigned long *)ipinfo->iniface_mask)[i];
+	}
+
+	if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
+		dprintf("VIA in mismatch (%s vs %s).%s\n",
+			indev, ipinfo->iniface,
+			ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
+		return 0;
+	}
+
+	for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
+		ret |= (((const unsigned long *)outdev)[i]
+			^ ((const unsigned long *)ipinfo->outiface)[i])
+			& ((const unsigned long *)ipinfo->outiface_mask)[i];
+	}
+
+	if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
+		dprintf("VIA out mismatch (%s vs %s).%s\n",
+			outdev, ipinfo->outiface,
+			ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
+		return 0;
+	}
+
+	/* Check specific protocol */
+	if (ipinfo->proto
+	    && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
+		dprintf("Packet protocol %hi does not match %hi.%s\n",
+			ip->protocol, ipinfo->proto,
+			ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
+		return 0;
+	}
+
+	/* If we have a fragment rule but the packet is not a fragment
+	 * then we return zero */
+	if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
+		dprintf("Fragment rule but not fragment.%s\n",
+			ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
+		return 0;
+	}
+
+	return 1;
+}
+
+static inline int
+ip_checkentry(const struct ipt_ip *ip)
+{
+	if (ip->flags & ~IPT_F_MASK) {
+		duprintf("Unknown flag bits set: %08X\n",
+			 ip->flags & ~IPT_F_MASK);
+		return 0;
+	}
+	if (ip->invflags & ~IPT_INV_MASK) {
+		duprintf("Unknown invflag bits set: %08X\n",
+			 ip->invflags & ~IPT_INV_MASK);
+		return 0;
+	}
+	return 1;
+}
+
+static unsigned int
+ipt_error(struct sk_buff **pskb,
+	  const struct net_device *in,
+	  const struct net_device *out,
+	  unsigned int hooknum,
+	  const void *targinfo,
+	  void *userinfo)
+{
+	if (net_ratelimit())
+		printk("ip_tables: error: `%s'\n", (char *)targinfo);
+
+	return NF_DROP;
+}
+
+static inline
+int do_match(struct ipt_entry_match *m,
+	     const struct sk_buff *skb,
+	     const struct net_device *in,
+	     const struct net_device *out,
+	     int offset,
+	     int *hotdrop)
+{
+	/* Stop iteration if it doesn't match */
+	if (!m->u.kernel.match->match(skb, in, out, m->data, offset, hotdrop))
+		return 1;
+	else
+		return 0;
+}
+
+static inline struct ipt_entry *
+get_entry(void *base, unsigned int offset)
+{
+	return (struct ipt_entry *)(base + offset);
+}
+
+/* Returns one of the generic firewall policies, like NF_ACCEPT. */
+unsigned int
+ipt_do_table(struct sk_buff **pskb,
+	     unsigned int hook,
+	     const struct net_device *in,
+	     const struct net_device *out,
+	     struct ipt_table *table,
+	     void *userdata)
+{
+	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
+	u_int16_t offset;
+	struct iphdr *ip;
+	u_int16_t datalen;
+	int hotdrop = 0;
+	/* Initializing verdict to NF_DROP keeps gcc happy. */
+	unsigned int verdict = NF_DROP;
+	const char *indev, *outdev;
+	void *table_base;
+	struct ipt_entry *e, *back;
+
+	/* Initialization */
+	ip = (*pskb)->nh.iph;
+	datalen = (*pskb)->len - ip->ihl * 4;
+	indev = in ? in->name : nulldevname;
+	outdev = out ? out->name : nulldevname;
+	/* We handle fragments by dealing with the first fragment as
+	 * if it was a normal packet.  All other fragments are treated
+	 * normally, except that they will NEVER match rules that ask
+	 * things we don't know, ie. tcp syn flag or ports).  If the
+	 * rule is also a fragment-specific rule, non-fragments won't
+	 * match it. */
+	offset = ntohs(ip->frag_off) & IP_OFFSET;
+
+	read_lock_bh(&table->lock);
+	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
+	table_base = (void *)table->private->entries
+		+ TABLE_OFFSET(table->private, smp_processor_id());
+	e = get_entry(table_base, table->private->hook_entry[hook]);
+
+#ifdef CONFIG_NETFILTER_DEBUG
+	/* Check noone else using our table */
+	if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
+	    && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
+		printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
+		       smp_processor_id(),
+		       table->name,
+		       &((struct ipt_entry *)table_base)->comefrom,
+		       ((struct ipt_entry *)table_base)->comefrom);
+	}
+	((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
+#endif
+
+	/* For return from builtin chain */
+	back = get_entry(table_base, table->private->underflow[hook]);
+
+	do {
+		IP_NF_ASSERT(e);
+		IP_NF_ASSERT(back);
+		(*pskb)->nfcache |= e->nfcache;
+		if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
+			struct ipt_entry_target *t;
+
+			if (IPT_MATCH_ITERATE(e, do_match,
+					      *pskb, in, out,
+					      offset, &hotdrop) != 0)
+				goto no_match;
+
+			ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
+
+			t = ipt_get_target(e);
+			IP_NF_ASSERT(t->u.kernel.target);
+			/* Standard target? */
+			if (!t->u.kernel.target->target) {
+				int v;
+
+				v = ((struct ipt_standard_target *)t)->verdict;
+				if (v < 0) {
+					/* Pop from stack? */
+					if (v != IPT_RETURN) {
+						verdict = (unsigned)(-v) - 1;
+						break;
+					}
+					e = back;
+					back = get_entry(table_base,
+							 back->comefrom);
+					continue;
+				}
+				if (table_base + v
+				    != (void *)e + e->next_offset) {
+					/* Save old back ptr in next entry */
+					struct ipt_entry *next
+						= (void *)e + e->next_offset;
+					next->comefrom
+						= (void *)back - table_base;
+					/* set back pointer to next entry */
+					back = next;
+				}
+
+				e = get_entry(table_base, v);
+			} else {
+				/* Targets which reenter must return
+                                   abs. verdicts */
+#ifdef CONFIG_NETFILTER_DEBUG
+				((struct ipt_entry *)table_base)->comefrom
+					= 0xeeeeeeec;
+#endif
+				verdict = t->u.kernel.target->target(pskb,
+								     in, out,
+								     hook,
+								     t->data,
+								     userdata);
+
+#ifdef CONFIG_NETFILTER_DEBUG
+				if (((struct ipt_entry *)table_base)->comefrom
+				    != 0xeeeeeeec
+				    && verdict == IPT_CONTINUE) {
+					printk("Target %s reentered!\n",
+					       t->u.kernel.target->name);
+					verdict = NF_DROP;
+				}
+				((struct ipt_entry *)table_base)->comefrom
+					= 0x57acc001;
+#endif
+				/* Target might have changed stuff. */
+				ip = (*pskb)->nh.iph;
+				datalen = (*pskb)->len - ip->ihl * 4;
+
+				if (verdict == IPT_CONTINUE)
+					e = (void *)e + e->next_offset;
+				else
+					/* Verdict */
+					break;
+			}
+		} else {
+
+		no_match:
+			e = (void *)e + e->next_offset;
+		}
+	} while (!hotdrop);
+
+#ifdef CONFIG_NETFILTER_DEBUG
+	((struct ipt_entry *)table_base)->comefrom = 0xdead57ac;
+#endif
+	read_unlock_bh(&table->lock);
+
+#ifdef DEBUG_ALLOW_ALL
+	return NF_ACCEPT;
+#else
+	if (hotdrop)
+		return NF_DROP;
+	else return verdict;
+#endif
+}
+
+/*
+ * These are weird, but module loading must not be done with mutex
+ * held (since they will register), and we have to have a single
+ * function to use try_then_request_module().
+ */
+
+/* Find table by name, grabs mutex & ref.  Returns ERR_PTR() on error. */
+static inline struct ipt_table *find_table_lock(const char *name)
+{
+	struct ipt_table *t;
+
+	if (down_interruptible(&ipt_mutex) != 0)
+		return ERR_PTR(-EINTR);
+
+	list_for_each_entry(t, &ipt_tables, list)
+		if (strcmp(t->name, name) == 0 && try_module_get(t->me))
+			return t;
+	up(&ipt_mutex);
+	return NULL;
+}
+
+/* Find match, grabs ref.  Returns ERR_PTR() on error. */
+static inline struct ipt_match *find_match(const char *name, u8 revision)
+{
+	struct ipt_match *m;
+	int err = 0;
+
+	if (down_interruptible(&ipt_mutex) != 0)
+		return ERR_PTR(-EINTR);
+
+	list_for_each_entry(m, &ipt_match, list) {
+		if (strcmp(m->name, name) == 0) {
+			if (m->revision == revision) {
+				if (try_module_get(m->me)) {
+					up(&ipt_mutex);
+					return m;
+				}
+			} else
+				err = -EPROTOTYPE; /* Found something. */
+		}
+	}
+	up(&ipt_mutex);
+	return ERR_PTR(err);
+}
+
+/* Find target, grabs ref.  Returns ERR_PTR() on error. */
+static inline struct ipt_target *find_target(const char *name, u8 revision)
+{
+	struct ipt_target *t;
+	int err = 0;
+
+	if (down_interruptible(&ipt_mutex) != 0)
+		return ERR_PTR(-EINTR);
+
+	list_for_each_entry(t, &ipt_target, list) {
+		if (strcmp(t->name, name) == 0) {
+			if (t->revision == revision) {
+				if (try_module_get(t->me)) {
+					up(&ipt_mutex);
+					return t;
+				}
+			} else
+				err = -EPROTOTYPE; /* Found something. */
+		}
+	}
+	up(&ipt_mutex);
+	return ERR_PTR(err);
+}
+
+struct ipt_target *ipt_find_target(const char *name, u8 revision)
+{
+	struct ipt_target *target;
+
+	target = try_then_request_module(find_target(name, revision),
+					 "ipt_%s", name);
+	if (IS_ERR(target) || !target)
+		return NULL;
+	return target;
+}
+
+static int match_revfn(const char *name, u8 revision, int *bestp)
+{
+	struct ipt_match *m;
+	int have_rev = 0;
+
+	list_for_each_entry(m, &ipt_match, list) {
+		if (strcmp(m->name, name) == 0) {
+			if (m->revision > *bestp)
+				*bestp = m->revision;
+			if (m->revision == revision)
+				have_rev = 1;
+		}
+	}
+	return have_rev;
+}
+
+static int target_revfn(const char *name, u8 revision, int *bestp)
+{
+	struct ipt_target *t;
+	int have_rev = 0;
+
+	list_for_each_entry(t, &ipt_target, list) {
+		if (strcmp(t->name, name) == 0) {
+			if (t->revision > *bestp)
+				*bestp = t->revision;
+			if (t->revision == revision)
+				have_rev = 1;
+		}
+	}
+	return have_rev;
+}
+
+/* Returns true or false (if no such extension at all) */
+static inline int find_revision(const char *name, u8 revision,
+				int (*revfn)(const char *, u8, int *),
+				int *err)
+{
+	int have_rev, best = -1;
+
+	if (down_interruptible(&ipt_mutex) != 0) {
+		*err = -EINTR;
+		return 1;
+	}
+	have_rev = revfn(name, revision, &best);
+	up(&ipt_mutex);
+
+	/* Nothing at all?  Return 0 to try loading module. */
+	if (best == -1) {
+		*err = -ENOENT;
+		return 0;
+	}
+
+	*err = best;
+	if (!have_rev)
+		*err = -EPROTONOSUPPORT;
+	return 1;
+}
+
+
+/* All zeroes == unconditional rule. */
+static inline int
+unconditional(const struct ipt_ip *ip)
+{
+	unsigned int i;
+
+	for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
+		if (((__u32 *)ip)[i])
+			return 0;
+
+	return 1;
+}
+
+/* Figures out from what hook each rule can be called: returns 0 if
+   there are loops.  Puts hook bitmask in comefrom. */
+static int
+mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
+{
+	unsigned int hook;
+
+	/* No recursion; use packet counter to save back ptrs (reset
+	   to 0 as we leave), and comefrom to save source hook bitmask */
+	for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
+		unsigned int pos = newinfo->hook_entry[hook];
+		struct ipt_entry *e
+			= (struct ipt_entry *)(newinfo->entries + pos);
+
+		if (!(valid_hooks & (1 << hook)))
+			continue;
+
+		/* Set initial back pointer. */
+		e->counters.pcnt = pos;
+
+		for (;;) {
+			struct ipt_standard_target *t
+				= (void *)ipt_get_target(e);
+
+			if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
+				printk("iptables: loop hook %u pos %u %08X.\n",
+				       hook, pos, e->comefrom);
+				return 0;
+			}
+			e->comefrom
+				|= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
+
+			/* Unconditional return/END. */
+			if (e->target_offset == sizeof(struct ipt_entry)
+			    && (strcmp(t->target.u.user.name,
+				       IPT_STANDARD_TARGET) == 0)
+			    && t->verdict < 0
+			    && unconditional(&e->ip)) {
+				unsigned int oldpos, size;
+
+				/* Return: backtrack through the last
+				   big jump. */
+				do {
+					e->comefrom ^= (1<<NF_IP_NUMHOOKS);
+#ifdef DEBUG_IP_FIREWALL_USER
+					if (e->comefrom
+					    & (1 << NF_IP_NUMHOOKS)) {
+						duprintf("Back unset "
+							 "on hook %u "
+							 "rule %u\n",
+							 hook, pos);
+					}
+#endif
+					oldpos = pos;
+					pos = e->counters.pcnt;
+					e->counters.pcnt = 0;
+
+					/* We're at the start. */
+					if (pos == oldpos)
+						goto next;
+
+					e = (struct ipt_entry *)
+						(newinfo->entries + pos);
+				} while (oldpos == pos + e->next_offset);
+
+				/* Move along one */
+				size = e->next_offset;
+				e = (struct ipt_entry *)
+					(newinfo->entries + pos + size);
+				e->counters.pcnt = pos;
+				pos += size;
+			} else {
+				int newpos = t->verdict;
+
+				if (strcmp(t->target.u.user.name,
+					   IPT_STANDARD_TARGET) == 0
+				    && newpos >= 0) {
+					/* This a jump; chase it. */
+					duprintf("Jump rule %u -> %u\n",
+						 pos, newpos);
+				} else {
+					/* ... this is a fallthru */
+					newpos = pos + e->next_offset;
+				}
+				e = (struct ipt_entry *)
+					(newinfo->entries + newpos);
+				e->counters.pcnt = pos;
+				pos = newpos;
+			}
+		}
+		next:
+		duprintf("Finished chain %u\n", hook);
+	}
+	return 1;
+}
+
+static inline int
+cleanup_match(struct ipt_entry_match *m, unsigned int *i)
+{
+	if (i && (*i)-- == 0)
+		return 1;
+
+	if (m->u.kernel.match->destroy)
+		m->u.kernel.match->destroy(m->data,
+					   m->u.match_size - sizeof(*m));
+	module_put(m->u.kernel.match->me);
+	return 0;
+}
+
+static inline int
+standard_check(const struct ipt_entry_target *t,
+	       unsigned int max_offset)
+{
+	struct ipt_standard_target *targ = (void *)t;
+
+	/* Check standard info. */
+	if (t->u.target_size
+	    != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
+		duprintf("standard_check: target size %u != %u\n",
+			 t->u.target_size,
+			 IPT_ALIGN(sizeof(struct ipt_standard_target)));
+		return 0;
+	}
+
+	if (targ->verdict >= 0
+	    && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
+		duprintf("ipt_standard_check: bad verdict (%i)\n",
+			 targ->verdict);
+		return 0;
+	}
+
+	if (targ->verdict < -NF_MAX_VERDICT - 1) {
+		duprintf("ipt_standard_check: bad negative verdict (%i)\n",
+			 targ->verdict);
+		return 0;
+	}
+	return 1;
+}
+
+static inline int
+check_match(struct ipt_entry_match *m,
+	    const char *name,
+	    const struct ipt_ip *ip,
+	    unsigned int hookmask,
+	    unsigned int *i)
+{
+	struct ipt_match *match;
+
+	match = try_then_request_module(find_match(m->u.user.name,
+						   m->u.user.revision),
+					"ipt_%s", m->u.user.name);
+	if (IS_ERR(match) || !match) {
+		duprintf("check_match: `%s' not found\n", m->u.user.name);
+		return match ? PTR_ERR(match) : -ENOENT;
+	}
+	m->u.kernel.match = match;
+
+	if (m->u.kernel.match->checkentry
+	    && !m->u.kernel.match->checkentry(name, ip, m->data,
+					      m->u.match_size - sizeof(*m),
+					      hookmask)) {
+		module_put(m->u.kernel.match->me);
+		duprintf("ip_tables: check failed for `%s'.\n",
+			 m->u.kernel.match->name);
+		return -EINVAL;
+	}
+
+	(*i)++;
+	return 0;
+}
+
+static struct ipt_target ipt_standard_target;
+
+static inline int
+check_entry(struct ipt_entry *e, const char *name, unsigned int size,
+	    unsigned int *i)
+{
+	struct ipt_entry_target *t;
+	struct ipt_target *target;
+	int ret;
+	unsigned int j;
+
+	if (!ip_checkentry(&e->ip)) {
+		duprintf("ip_tables: ip check failed %p %s.\n", e, name);
+		return -EINVAL;
+	}
+
+	j = 0;
+	ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
+	if (ret != 0)
+		goto cleanup_matches;
+
+	t = ipt_get_target(e);
+	target = try_then_request_module(find_target(t->u.user.name,
+						     t->u.user.revision),
+					 "ipt_%s", t->u.user.name);
+	if (IS_ERR(target) || !target) {
+		duprintf("check_entry: `%s' not found\n", t->u.user.name);
+		ret = target ? PTR_ERR(target) : -ENOENT;
+		goto cleanup_matches;
+	}
+	t->u.kernel.target = target;
+
+	if (t->u.kernel.target == &ipt_standard_target) {
+		if (!standard_check(t, size)) {
+			ret = -EINVAL;
+			goto cleanup_matches;
+		}
+	} else if (t->u.kernel.target->checkentry
+		   && !t->u.kernel.target->checkentry(name, e, t->data,
+						      t->u.target_size
+						      - sizeof(*t),
+						      e->comefrom)) {
+		module_put(t->u.kernel.target->me);
+		duprintf("ip_tables: check failed for `%s'.\n",
+			 t->u.kernel.target->name);
+		ret = -EINVAL;
+		goto cleanup_matches;
+	}
+
+	(*i)++;
+	return 0;
+
+ cleanup_matches:
+	IPT_MATCH_ITERATE(e, cleanup_match, &j);
+	return ret;
+}
+
+static inline int
+check_entry_size_and_hooks(struct ipt_entry *e,
+			   struct ipt_table_info *newinfo,
+			   unsigned char *base,
+			   unsigned char *limit,
+			   const unsigned int *hook_entries,
+			   const unsigned int *underflows,
+			   unsigned int *i)
+{
+	unsigned int h;
+
+	if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
+	    || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
+		duprintf("Bad offset %p\n", e);
+		return -EINVAL;
+	}
+
+	if (e->next_offset
+	    < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
+		duprintf("checking: element %p size %u\n",
+			 e, e->next_offset);
+		return -EINVAL;
+	}
+
+	/* Check hooks & underflows */
+	for (h = 0; h < NF_IP_NUMHOOKS; h++) {
+		if ((unsigned char *)e - base == hook_entries[h])
+			newinfo->hook_entry[h] = hook_entries[h];
+		if ((unsigned char *)e - base == underflows[h])
+			newinfo->underflow[h] = underflows[h];
+	}
+
+	/* FIXME: underflows must be unconditional, standard verdicts
+           < 0 (not IPT_RETURN). --RR */
+
+	/* Clear counters and comefrom */
+	e->counters = ((struct ipt_counters) { 0, 0 });
+	e->comefrom = 0;
+
+	(*i)++;
+	return 0;
+}
+
+static inline int
+cleanup_entry(struct ipt_entry *e, unsigned int *i)
+{
+	struct ipt_entry_target *t;
+
+	if (i && (*i)-- == 0)
+		return 1;
+
+	/* Cleanup all matches */
+	IPT_MATCH_ITERATE(e, cleanup_match, NULL);
+	t = ipt_get_target(e);
+	if (t->u.kernel.target->destroy)
+		t->u.kernel.target->destroy(t->data,
+					    t->u.target_size - sizeof(*t));
+	module_put(t->u.kernel.target->me);
+	return 0;
+}
+
+/* Checks and translates the user-supplied table segment (held in
+   newinfo) */
+static int
+translate_table(const char *name,
+		unsigned int valid_hooks,
+		struct ipt_table_info *newinfo,
+		unsigned int size,
+		unsigned int number,
+		const unsigned int *hook_entries,
+		const unsigned int *underflows)
+{
+	unsigned int i;
+	int ret;
+
+	newinfo->size = size;
+	newinfo->number = number;
+
+	/* Init all hooks to impossible value. */
+	for (i = 0; i < NF_IP_NUMHOOKS; i++) {
+		newinfo->hook_entry[i] = 0xFFFFFFFF;
+		newinfo->underflow[i] = 0xFFFFFFFF;
+	}
+
+	duprintf("translate_table: size %u\n", newinfo->size);
+	i = 0;
+	/* Walk through entries, checking offsets. */
+	ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+				check_entry_size_and_hooks,
+				newinfo,
+				newinfo->entries,
+				newinfo->entries + size,
+				hook_entries, underflows, &i);
+	if (ret != 0)
+		return ret;
+
+	if (i != number) {
+		duprintf("translate_table: %u not %u entries\n",
+			 i, number);
+		return -EINVAL;
+	}
+
+	/* Check hooks all assigned */
+	for (i = 0; i < NF_IP_NUMHOOKS; i++) {
+		/* Only hooks which are valid */
+		if (!(valid_hooks & (1 << i)))
+			continue;
+		if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
+			duprintf("Invalid hook entry %u %u\n",
+				 i, hook_entries[i]);
+			return -EINVAL;
+		}
+		if (newinfo->underflow[i] == 0xFFFFFFFF) {
+			duprintf("Invalid underflow %u %u\n",
+				 i, underflows[i]);
+			return -EINVAL;
+		}
+	}
+
+	if (!mark_source_chains(newinfo, valid_hooks))
+		return -ELOOP;
+
+	/* Finally, each sanity check must pass */
+	i = 0;
+	ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+				check_entry, name, size, &i);
+
+	if (ret != 0) {
+		IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+				  cleanup_entry, &i);
+		return ret;
+	}
+
+	/* And one copy for every other CPU */
+	for (i = 1; i < num_possible_cpus(); i++) {
+		memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
+		       newinfo->entries,
+		       SMP_ALIGN(newinfo->size));
+	}
+
+	return ret;
+}
+
+static struct ipt_table_info *
+replace_table(struct ipt_table *table,
+	      unsigned int num_counters,
+	      struct ipt_table_info *newinfo,
+	      int *error)
+{
+	struct ipt_table_info *oldinfo;
+
+#ifdef CONFIG_NETFILTER_DEBUG
+	{
+		struct ipt_entry *table_base;
+		unsigned int i;
+
+		for (i = 0; i < num_possible_cpus(); i++) {
+			table_base =
+				(void *)newinfo->entries
+				+ TABLE_OFFSET(newinfo, i);
+
+			table_base->comefrom = 0xdead57ac;
+		}
+	}
+#endif
+
+	/* Do the substitution. */
+	write_lock_bh(&table->lock);
+	/* Check inside lock: is the old number correct? */
+	if (num_counters != table->private->number) {
+		duprintf("num_counters != table->private->number (%u/%u)\n",
+			 num_counters, table->private->number);
+		write_unlock_bh(&table->lock);
+		*error = -EAGAIN;
+		return NULL;
+	}
+	oldinfo = table->private;
+	table->private = newinfo;
+	newinfo->initial_entries = oldinfo->initial_entries;
+	write_unlock_bh(&table->lock);
+
+	return oldinfo;
+}
+
+/* Gets counters. */
+static inline int
+add_entry_to_counter(const struct ipt_entry *e,
+		     struct ipt_counters total[],
+		     unsigned int *i)
+{
+	ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
+
+	(*i)++;
+	return 0;
+}
+
+static void
+get_counters(const struct ipt_table_info *t,
+	     struct ipt_counters counters[])
+{
+	unsigned int cpu;
+	unsigned int i;
+
+	for (cpu = 0; cpu < num_possible_cpus(); cpu++) {
+		i = 0;
+		IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
+				  t->size,
+				  add_entry_to_counter,
+				  counters,
+				  &i);
+	}
+}
+
+static int
+copy_entries_to_user(unsigned int total_size,
+		     struct ipt_table *table,
+		     void __user *userptr)
+{
+	unsigned int off, num, countersize;
+	struct ipt_entry *e;
+	struct ipt_counters *counters;
+	int ret = 0;
+
+	/* We need atomic snapshot of counters: rest doesn't change
+	   (other than comefrom, which userspace doesn't care
+	   about). */
+	countersize = sizeof(struct ipt_counters) * table->private->number;
+	counters = vmalloc(countersize);
+
+	if (counters == NULL)
+		return -ENOMEM;
+
+	/* First, sum counters... */
+	memset(counters, 0, countersize);
+	write_lock_bh(&table->lock);
+	get_counters(table->private, counters);
+	write_unlock_bh(&table->lock);
+
+	/* ... then copy entire thing from CPU 0... */
+	if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
+		ret = -EFAULT;
+		goto free_counters;
+	}
+
+	/* FIXME: use iterator macros --RR */
+	/* ... then go back and fix counters and names */
+	for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
+		unsigned int i;
+		struct ipt_entry_match *m;
+		struct ipt_entry_target *t;
+
+		e = (struct ipt_entry *)(table->private->entries + off);
+		if (copy_to_user(userptr + off
+				 + offsetof(struct ipt_entry, counters),
+				 &counters[num],
+				 sizeof(counters[num])) != 0) {
+			ret = -EFAULT;
+			goto free_counters;
+		}
+
+		for (i = sizeof(struct ipt_entry);
+		     i < e->target_offset;
+		     i += m->u.match_size) {
+			m = (void *)e + i;
+
+			if (copy_to_user(userptr + off + i
+					 + offsetof(struct ipt_entry_match,
+						    u.user.name),
+					 m->u.kernel.match->name,
+					 strlen(m->u.kernel.match->name)+1)
+			    != 0) {
+				ret = -EFAULT;
+				goto free_counters;
+			}
+		}
+
+		t = ipt_get_target(e);
+		if (copy_to_user(userptr + off + e->target_offset
+				 + offsetof(struct ipt_entry_target,
+					    u.user.name),
+				 t->u.kernel.target->name,
+				 strlen(t->u.kernel.target->name)+1) != 0) {
+			ret = -EFAULT;
+			goto free_counters;
+		}
+	}
+
+ free_counters:
+	vfree(counters);
+	return ret;
+}
+
+static int
+get_entries(const struct ipt_get_entries *entries,
+	    struct ipt_get_entries __user *uptr)
+{
+	int ret;
+	struct ipt_table *t;
+
+	t = find_table_lock(entries->name);
+	if (t && !IS_ERR(t)) {
+		duprintf("t->private->number = %u\n",
+			 t->private->number);
+		if (entries->size == t->private->size)
+			ret = copy_entries_to_user(t->private->size,
+						   t, uptr->entrytable);
+		else {
+			duprintf("get_entries: I've got %u not %u!\n",
+				 t->private->size,
+				 entries->size);
+			ret = -EINVAL;
+		}
+		module_put(t->me);
+		up(&ipt_mutex);
+	} else
+		ret = t ? PTR_ERR(t) : -ENOENT;
+
+	return ret;
+}
+
+static int
+do_replace(void __user *user, unsigned int len)
+{
+	int ret;
+	struct ipt_replace tmp;
+	struct ipt_table *t;
+	struct ipt_table_info *newinfo, *oldinfo;
+	struct ipt_counters *counters;
+
+	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+		return -EFAULT;
+
+	/* Hack: Causes ipchains to give correct error msg --RR */
+	if (len != sizeof(tmp) + tmp.size)
+		return -ENOPROTOOPT;
+
+	/* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
+	if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
+		return -ENOMEM;
+
+	newinfo = vmalloc(sizeof(struct ipt_table_info)
+			  + SMP_ALIGN(tmp.size) * num_possible_cpus());
+	if (!newinfo)
+		return -ENOMEM;
+
+	if (copy_from_user(newinfo->entries, user + sizeof(tmp),
+			   tmp.size) != 0) {
+		ret = -EFAULT;
+		goto free_newinfo;
+	}
+
+	counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
+	if (!counters) {
+		ret = -ENOMEM;
+		goto free_newinfo;
+	}
+	memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));
+
+	ret = translate_table(tmp.name, tmp.valid_hooks,
+			      newinfo, tmp.size, tmp.num_entries,
+			      tmp.hook_entry, tmp.underflow);
+	if (ret != 0)
+		goto free_newinfo_counters;
+
+	duprintf("ip_tables: Translated table\n");
+
+	t = try_then_request_module(find_table_lock(tmp.name),
+				    "iptable_%s", tmp.name);
+	if (!t || IS_ERR(t)) {
+		ret = t ? PTR_ERR(t) : -ENOENT;
+		goto free_newinfo_counters_untrans;
+	}
+
+	/* You lied! */
+	if (tmp.valid_hooks != t->valid_hooks) {
+		duprintf("Valid hook crap: %08X vs %08X\n",
+			 tmp.valid_hooks, t->valid_hooks);
+		ret = -EINVAL;
+		goto put_module;
+	}
+
+	oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
+	if (!oldinfo)
+		goto put_module;
+
+	/* Update module usage count based on number of rules */
+	duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
+		oldinfo->number, oldinfo->initial_entries, newinfo->number);
+	if ((oldinfo->number > oldinfo->initial_entries) || 
+	    (newinfo->number <= oldinfo->initial_entries)) 
+		module_put(t->me);
+	if ((oldinfo->number > oldinfo->initial_entries) &&
+	    (newinfo->number <= oldinfo->initial_entries))
+		module_put(t->me);
+
+	/* Get the old counters. */
+	get_counters(oldinfo, counters);
+	/* Decrease module usage counts and free resource */
+	IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
+	vfree(oldinfo);
+	if (copy_to_user(tmp.counters, counters,
+			 sizeof(struct ipt_counters) * tmp.num_counters) != 0)
+		ret = -EFAULT;
+	vfree(counters);
+	up(&ipt_mutex);
+	return ret;
+
+ put_module:
+	module_put(t->me);
+	up(&ipt_mutex);
+ free_newinfo_counters_untrans:
+	IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
+ free_newinfo_counters:
+	vfree(counters);
+ free_newinfo:
+	vfree(newinfo);
+	return ret;
+}
+
+/* We're lazy, and add to the first CPU; overflow works its fey magic
+ * and everything is OK. */
+static inline int
+add_counter_to_entry(struct ipt_entry *e,
+		     const struct ipt_counters addme[],
+		     unsigned int *i)
+{
+#if 0
+	duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
+		 *i,
+		 (long unsigned int)e->counters.pcnt,
+		 (long unsigned int)e->counters.bcnt,
+		 (long unsigned int)addme[*i].pcnt,
+		 (long unsigned int)addme[*i].bcnt);
+#endif
+
+	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+
+	(*i)++;
+	return 0;
+}
+
+static int
+do_add_counters(void __user *user, unsigned int len)
+{
+	unsigned int i;
+	struct ipt_counters_info tmp, *paddc;
+	struct ipt_table *t;
+	int ret = 0;
+
+	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+		return -EFAULT;
+
+	if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
+		return -EINVAL;
+
+	paddc = vmalloc(len);
+	if (!paddc)
+		return -ENOMEM;
+
+	if (copy_from_user(paddc, user, len) != 0) {
+		ret = -EFAULT;
+		goto free;
+	}
+
+	t = find_table_lock(tmp.name);
+	if (!t || IS_ERR(t)) {
+		ret = t ? PTR_ERR(t) : -ENOENT;
+		goto free;
+	}
+
+	write_lock_bh(&t->lock);
+	if (t->private->number != paddc->num_counters) {
+		ret = -EINVAL;
+		goto unlock_up_free;
+	}
+
+	i = 0;
+	IPT_ENTRY_ITERATE(t->private->entries,
+			  t->private->size,
+			  add_counter_to_entry,
+			  paddc->counters,
+			  &i);
+ unlock_up_free:
+	write_unlock_bh(&t->lock);
+	up(&ipt_mutex);
+	module_put(t->me);
+ free:
+	vfree(paddc);
+
+	return ret;
+}
+
+static int
+do_ipt_set_ctl(struct sock *sk,	int cmd, void __user *user, unsigned int len)
+{
+	int ret;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	switch (cmd) {
+	case IPT_SO_SET_REPLACE:
+		ret = do_replace(user, len);
+		break;
+
+	case IPT_SO_SET_ADD_COUNTERS:
+		ret = do_add_counters(user, len);
+		break;
+
+	default:
+		duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static int
+do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+{
+	int ret;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	switch (cmd) {
+	case IPT_SO_GET_INFO: {
+		char name[IPT_TABLE_MAXNAMELEN];
+		struct ipt_table *t;
+
+		if (*len != sizeof(struct ipt_getinfo)) {
+			duprintf("length %u != %u\n", *len,
+				 sizeof(struct ipt_getinfo));
+			ret = -EINVAL;
+			break;
+		}
+
+		if (copy_from_user(name, user, sizeof(name)) != 0) {
+			ret = -EFAULT;
+			break;
+		}
+		name[IPT_TABLE_MAXNAMELEN-1] = '\0';
+
+		t = try_then_request_module(find_table_lock(name),
+					    "iptable_%s", name);
+		if (t && !IS_ERR(t)) {
+			struct ipt_getinfo info;
+
+			info.valid_hooks = t->valid_hooks;
+			memcpy(info.hook_entry, t->private->hook_entry,
+			       sizeof(info.hook_entry));
+			memcpy(info.underflow, t->private->underflow,
+			       sizeof(info.underflow));
+			info.num_entries = t->private->number;
+			info.size = t->private->size;
+			memcpy(info.name, name, sizeof(info.name));
+
+			if (copy_to_user(user, &info, *len) != 0)
+				ret = -EFAULT;
+			else
+				ret = 0;
+			up(&ipt_mutex);
+			module_put(t->me);
+		} else
+			ret = t ? PTR_ERR(t) : -ENOENT;
+	}
+	break;
+
+	case IPT_SO_GET_ENTRIES: {
+		struct ipt_get_entries get;
+
+		if (*len < sizeof(get)) {
+			duprintf("get_entries: %u < %u\n", *len, sizeof(get));
+			ret = -EINVAL;
+		} else if (copy_from_user(&get, user, sizeof(get)) != 0) {
+			ret = -EFAULT;
+		} else if (*len != sizeof(struct ipt_get_entries) + get.size) {
+			duprintf("get_entries: %u != %u\n", *len,
+				 sizeof(struct ipt_get_entries) + get.size);
+			ret = -EINVAL;
+		} else
+			ret = get_entries(&get, user);
+		break;
+	}
+
+	case IPT_SO_GET_REVISION_MATCH:
+	case IPT_SO_GET_REVISION_TARGET: {
+		struct ipt_get_revision rev;
+		int (*revfn)(const char *, u8, int *);
+
+		if (*len != sizeof(rev)) {
+			ret = -EINVAL;
+			break;
+		}
+		if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
+			ret = -EFAULT;
+			break;
+		}
+
+		if (cmd == IPT_SO_GET_REVISION_TARGET)
+			revfn = target_revfn;
+		else
+			revfn = match_revfn;
+
+		try_then_request_module(find_revision(rev.name, rev.revision,
+						      revfn, &ret),
+					"ipt_%s", rev.name);
+		break;
+	}
+
+	default:
+		duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+/* Registration hooks for targets. */
+int
+ipt_register_target(struct ipt_target *target)
+{
+	int ret;
+
+	ret = down_interruptible(&ipt_mutex);
+	if (ret != 0)
+		return ret;
+	list_add(&target->list, &ipt_target);
+	up(&ipt_mutex);
+	return ret;
+}
+
+void
+ipt_unregister_target(struct ipt_target *target)
+{
+	down(&ipt_mutex);
+	LIST_DELETE(&ipt_target, target);
+	up(&ipt_mutex);
+}
+
+int
+ipt_register_match(struct ipt_match *match)
+{
+	int ret;
+
+	ret = down_interruptible(&ipt_mutex);
+	if (ret != 0)
+		return ret;
+
+	list_add(&match->list, &ipt_match);
+	up(&ipt_mutex);
+
+	return ret;
+}
+
+void
+ipt_unregister_match(struct ipt_match *match)
+{
+	down(&ipt_mutex);
+	LIST_DELETE(&ipt_match, match);
+	up(&ipt_mutex);
+}
+
+int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl)
+{
+	int ret;
+	struct ipt_table_info *newinfo;
+	static struct ipt_table_info bootstrap
+		= { 0, 0, 0, { 0 }, { 0 }, { } };
+
+	newinfo = vmalloc(sizeof(struct ipt_table_info)
+			  + SMP_ALIGN(repl->size) * num_possible_cpus());
+	if (!newinfo)
+		return -ENOMEM;
+
+	memcpy(newinfo->entries, repl->entries, repl->size);
+
+	ret = translate_table(table->name, table->valid_hooks,
+			      newinfo, repl->size,
+			      repl->num_entries,
+			      repl->hook_entry,
+			      repl->underflow);
+	if (ret != 0) {
+		vfree(newinfo);
+		return ret;
+	}
+
+	ret = down_interruptible(&ipt_mutex);
+	if (ret != 0) {
+		vfree(newinfo);
+		return ret;
+	}
+
+	/* Don't autoload: we'd eat our tail... */
+	if (list_named_find(&ipt_tables, table->name)) {
+		ret = -EEXIST;
+		goto free_unlock;
+	}
+
+	/* Simplifies replace_table code. */
+	table->private = &bootstrap;
+	if (!replace_table(table, 0, newinfo, &ret))
+		goto free_unlock;
+
+	duprintf("table->private->number = %u\n",
+		 table->private->number);
+	
+	/* save number of initial entries */
+	table->private->initial_entries = table->private->number;
+
+	rwlock_init(&table->lock);
+	list_prepend(&ipt_tables, table);
+
+ unlock:
+	up(&ipt_mutex);
+	return ret;
+
+ free_unlock:
+	vfree(newinfo);
+	goto unlock;
+}
+
+void ipt_unregister_table(struct ipt_table *table)
+{
+	down(&ipt_mutex);
+	LIST_DELETE(&ipt_tables, table);
+	up(&ipt_mutex);
+
+	/* Decrease module usage counts and free resources */
+	IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
+			  cleanup_entry, NULL);
+	vfree(table->private);
+}
+
+/* Returns 1 if the port is matched by the range, 0 otherwise */
+static inline int
+port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
+{
+	int ret;
+
+	ret = (port >= min && port <= max) ^ invert;
+	return ret;
+}
+
+static int
+tcp_find_option(u_int8_t option,
+		const struct sk_buff *skb,
+		unsigned int optlen,
+		int invert,
+		int *hotdrop)
+{
+	/* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
+	u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
+	unsigned int i;
+
+	duprintf("tcp_match: finding option\n");
+
+	if (!optlen)
+		return invert;
+
+	/* If we don't have the whole header, drop packet. */
+	op = skb_header_pointer(skb,
+				skb->nh.iph->ihl*4 + sizeof(struct tcphdr),
+				optlen, _opt);
+	if (op == NULL) {
+		*hotdrop = 1;
+		return 0;
+	}
+
+	for (i = 0; i < optlen; ) {
+		if (op[i] == option) return !invert;
+		if (op[i] < 2) i++;
+		else i += op[i+1]?:1;
+	}
+
+	return invert;
+}
+
+static int
+tcp_match(const struct sk_buff *skb,
+	  const struct net_device *in,
+	  const struct net_device *out,
+	  const void *matchinfo,
+	  int offset,
+	  int *hotdrop)
+{
+	struct tcphdr _tcph, *th;
+	const struct ipt_tcp *tcpinfo = matchinfo;
+
+	if (offset) {
+		/* To quote Alan:
+
+		   Don't allow a fragment of TCP 8 bytes in. Nobody normal
+		   causes this. Its a cracker trying to break in by doing a
+		   flag overwrite to pass the direction checks.
+		*/
+		if (offset == 1) {
+			duprintf("Dropping evil TCP offset=1 frag.\n");
+			*hotdrop = 1;
+		}
+		/* Must not be a fragment. */
+		return 0;
+	}
+
+#define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
+
+	th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
+				sizeof(_tcph), &_tcph);
+	if (th == NULL) {
+		/* We've been asked to examine this packet, and we
+		   can't.  Hence, no choice but to drop. */
+		duprintf("Dropping evil TCP offset=0 tinygram.\n");
+		*hotdrop = 1;
+		return 0;
+	}
+
+	if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
+			ntohs(th->source),
+			!!(tcpinfo->invflags & IPT_TCP_INV_SRCPT)))
+		return 0;
+	if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
+			ntohs(th->dest),
+			!!(tcpinfo->invflags & IPT_TCP_INV_DSTPT)))
+		return 0;
+	if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
+		      == tcpinfo->flg_cmp,
+		      IPT_TCP_INV_FLAGS))
+		return 0;
+	if (tcpinfo->option) {
+		if (th->doff * 4 < sizeof(_tcph)) {
+			*hotdrop = 1;
+			return 0;
+		}
+		if (!tcp_find_option(tcpinfo->option, skb,
+				     th->doff*4 - sizeof(_tcph),
+				     tcpinfo->invflags & IPT_TCP_INV_OPTION,
+				     hotdrop))
+			return 0;
+	}
+	return 1;
+}
+
+/* Called when user tries to insert an entry of this type. */
+static int
+tcp_checkentry(const char *tablename,
+	       const struct ipt_ip *ip,
+	       void *matchinfo,
+	       unsigned int matchsize,
+	       unsigned int hook_mask)
+{
+	const struct ipt_tcp *tcpinfo = matchinfo;
+
+	/* Must specify proto == TCP, and no unknown invflags */
+	return ip->proto == IPPROTO_TCP
+		&& !(ip->invflags & IPT_INV_PROTO)
+		&& matchsize == IPT_ALIGN(sizeof(struct ipt_tcp))
+		&& !(tcpinfo->invflags & ~IPT_TCP_INV_MASK);
+}
+
+static int
+udp_match(const struct sk_buff *skb,
+	  const struct net_device *in,
+	  const struct net_device *out,
+	  const void *matchinfo,
+	  int offset,
+	  int *hotdrop)
+{
+	struct udphdr _udph, *uh;
+	const struct ipt_udp *udpinfo = matchinfo;
+
+	/* Must not be a fragment. */
+	if (offset)
+		return 0;
+
+	uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
+				sizeof(_udph), &_udph);
+	if (uh == NULL) {
+		/* We've been asked to examine this packet, and we
+		   can't.  Hence, no choice but to drop. */
+		duprintf("Dropping evil UDP tinygram.\n");
+		*hotdrop = 1;
+		return 0;
+	}
+
+	return port_match(udpinfo->spts[0], udpinfo->spts[1],
+			  ntohs(uh->source),
+			  !!(udpinfo->invflags & IPT_UDP_INV_SRCPT))
+		&& port_match(udpinfo->dpts[0], udpinfo->dpts[1],
+			      ntohs(uh->dest),
+			      !!(udpinfo->invflags & IPT_UDP_INV_DSTPT));
+}
+
+/* Called when user tries to insert an entry of this type. */
+static int
+udp_checkentry(const char *tablename,
+	       const struct ipt_ip *ip,
+	       void *matchinfo,
+	       unsigned int matchinfosize,
+	       unsigned int hook_mask)
+{
+	const struct ipt_udp *udpinfo = matchinfo;
+
+	/* Must specify proto == UDP, and no unknown invflags */
+	if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) {
+		duprintf("ipt_udp: Protocol %u != %u\n", ip->proto,
+			 IPPROTO_UDP);
+		return 0;
+	}
+	if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
+		duprintf("ipt_udp: matchsize %u != %u\n",
+			 matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
+		return 0;
+	}
+	if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
+		duprintf("ipt_udp: unknown flags %X\n",
+			 udpinfo->invflags);
+		return 0;
+	}
+
+	return 1;
+}
+
+/* Returns 1 if the type and code is matched by the range, 0 otherwise */
+static inline int
+icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
+		     u_int8_t type, u_int8_t code,
+		     int invert)
+{
+	return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code))
+		^ invert;
+}
+
+static int
+icmp_match(const struct sk_buff *skb,
+	   const struct net_device *in,
+	   const struct net_device *out,
+	   const void *matchinfo,
+	   int offset,
+	   int *hotdrop)
+{
+	struct icmphdr _icmph, *ic;
+	const struct ipt_icmp *icmpinfo = matchinfo;
+
+	/* Must not be a fragment. */
+	if (offset)
+		return 0;
+
+	ic = skb_header_pointer(skb, skb->nh.iph->ihl*4,
+				sizeof(_icmph), &_icmph);
+	if (ic == NULL) {
+		/* We've been asked to examine this packet, and we
+		 * can't.  Hence, no choice but to drop.
+		 */
+		duprintf("Dropping evil ICMP tinygram.\n");
+		*hotdrop = 1;
+		return 0;
+	}
+
+	return icmp_type_code_match(icmpinfo->type,
+				    icmpinfo->code[0],
+				    icmpinfo->code[1],
+				    ic->type, ic->code,
+				    !!(icmpinfo->invflags&IPT_ICMP_INV));
+}
+
+/* Called when user tries to insert an entry of this type. */
+static int
+icmp_checkentry(const char *tablename,
+	   const struct ipt_ip *ip,
+	   void *matchinfo,
+	   unsigned int matchsize,
+	   unsigned int hook_mask)
+{
+	const struct ipt_icmp *icmpinfo = matchinfo;
+
+	/* Must specify proto == ICMP, and no unknown invflags */
+	return ip->proto == IPPROTO_ICMP
+		&& !(ip->invflags & IPT_INV_PROTO)
+		&& matchsize == IPT_ALIGN(sizeof(struct ipt_icmp))
+		&& !(icmpinfo->invflags & ~IPT_ICMP_INV);
+}
+
+/* The built-in targets: standard (NULL) and error. */
+static struct ipt_target ipt_standard_target = {
+	.name		= IPT_STANDARD_TARGET,
+};
+
+static struct ipt_target ipt_error_target = {
+	.name		= IPT_ERROR_TARGET,
+	.target		= ipt_error,
+};
+
+static struct nf_sockopt_ops ipt_sockopts = {
+	.pf		= PF_INET,
+	.set_optmin	= IPT_BASE_CTL,
+	.set_optmax	= IPT_SO_SET_MAX+1,
+	.set		= do_ipt_set_ctl,
+	.get_optmin	= IPT_BASE_CTL,
+	.get_optmax	= IPT_SO_GET_MAX+1,
+	.get		= do_ipt_get_ctl,
+};
+
+static struct ipt_match tcp_matchstruct = {
+	.name		= "tcp",
+	.match		= &tcp_match,
+	.checkentry	= &tcp_checkentry,
+};
+
+static struct ipt_match udp_matchstruct = {
+	.name		= "udp",
+	.match		= &udp_match,
+	.checkentry	= &udp_checkentry,
+};
+
+static struct ipt_match icmp_matchstruct = {
+	.name		= "icmp",
+	.match		= &icmp_match,
+	.checkentry	= &icmp_checkentry,
+};
+
+#ifdef CONFIG_PROC_FS
+static inline int print_name(const char *i,
+			     off_t start_offset, char *buffer, int length,
+			     off_t *pos, unsigned int *count)
+{
+	if ((*count)++ >= start_offset) {
+		unsigned int namelen;
+
+		namelen = sprintf(buffer + *pos, "%s\n",
+				  i + sizeof(struct list_head));
+		if (*pos + namelen > length) {
+			/* Stop iterating */
+			return 1;
+		}
+		*pos += namelen;
+	}
+	return 0;
+}
+
+static inline int print_target(const struct ipt_target *t,
+                               off_t start_offset, char *buffer, int length,
+                               off_t *pos, unsigned int *count)
+{
+	if (t == &ipt_standard_target || t == &ipt_error_target)
+		return 0;
+	return print_name((char *)t, start_offset, buffer, length, pos, count);
+}
+
+static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
+{
+	off_t pos = 0;
+	unsigned int count = 0;
+
+	if (down_interruptible(&ipt_mutex) != 0)
+		return 0;
+
+	LIST_FIND(&ipt_tables, print_name, void *,
+		  offset, buffer, length, &pos, &count);
+
+	up(&ipt_mutex);
+
+	/* `start' hack - see fs/proc/generic.c line ~105 */
+	*start=(char *)((unsigned long)count-offset);
+	return pos;
+}
+
+static int ipt_get_targets(char *buffer, char **start, off_t offset, int length)
+{
+	off_t pos = 0;
+	unsigned int count = 0;
+
+	if (down_interruptible(&ipt_mutex) != 0)
+		return 0;
+
+	LIST_FIND(&ipt_target, print_target, struct ipt_target *,
+		  offset, buffer, length, &pos, &count);
+	
+	up(&ipt_mutex);
+
+	*start = (char *)((unsigned long)count - offset);
+	return pos;
+}
+
+static int ipt_get_matches(char *buffer, char **start, off_t offset, int length)
+{
+	off_t pos = 0;
+	unsigned int count = 0;
+
+	if (down_interruptible(&ipt_mutex) != 0)
+		return 0;
+	
+	LIST_FIND(&ipt_match, print_name, void *,
+		  offset, buffer, length, &pos, &count);
+
+	up(&ipt_mutex);
+
+	*start = (char *)((unsigned long)count - offset);
+	return pos;
+}
+
+static struct { char *name; get_info_t *get_info; } ipt_proc_entry[] =
+{ { "ip_tables_names", ipt_get_tables },
+  { "ip_tables_targets", ipt_get_targets },
+  { "ip_tables_matches", ipt_get_matches },
+  { NULL, NULL} };
+#endif /*CONFIG_PROC_FS*/
+
+static int __init init(void)
+{
+	int ret;
+
+	/* Noone else will be downing sem now, so we won't sleep */
+	down(&ipt_mutex);
+	list_append(&ipt_target, &ipt_standard_target);
+	list_append(&ipt_target, &ipt_error_target);
+	list_append(&ipt_match, &tcp_matchstruct);
+	list_append(&ipt_match, &udp_matchstruct);
+	list_append(&ipt_match, &icmp_matchstruct);
+	up(&ipt_mutex);
+
+	/* Register setsockopt */
+	ret = nf_register_sockopt(&ipt_sockopts);
+	if (ret < 0) {
+		duprintf("Unable to register sockopts.\n");
+		return ret;
+	}
+
+#ifdef CONFIG_PROC_FS
+	{
+	struct proc_dir_entry *proc;
+	int i;
+
+	for (i = 0; ipt_proc_entry[i].name; i++) {
+		proc = proc_net_create(ipt_proc_entry[i].name, 0,
+				       ipt_proc_entry[i].get_info);
+		if (!proc) {
+			while (--i >= 0)
+				proc_net_remove(ipt_proc_entry[i].name);
+			nf_unregister_sockopt(&ipt_sockopts);
+			return -ENOMEM;
+		}
+		proc->owner = THIS_MODULE;
+	}
+	}
+#endif
+
+	printk("ip_tables: (C) 2000-2002 Netfilter core team\n");
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	nf_unregister_sockopt(&ipt_sockopts);
+#ifdef CONFIG_PROC_FS
+	{
+	int i;
+	for (i = 0; ipt_proc_entry[i].name; i++)
+		proc_net_remove(ipt_proc_entry[i].name);
+	}
+#endif
+}
+
+EXPORT_SYMBOL(ipt_register_table);
+EXPORT_SYMBOL(ipt_unregister_table);
+EXPORT_SYMBOL(ipt_register_match);
+EXPORT_SYMBOL(ipt_unregister_match);
+EXPORT_SYMBOL(ipt_do_table);
+EXPORT_SYMBOL(ipt_register_target);
+EXPORT_SYMBOL(ipt_unregister_target);
+EXPORT_SYMBOL(ipt_find_target);
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_CLASSIFY.c b/net/ipv4/netfilter/ipt_CLASSIFY.c
new file mode 100644
index 00000000000..9842e6e2318
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_CLASSIFY.c
@@ -0,0 +1,92 @@
+/*
+ * This is a module which is used for setting the skb->priority field
+ * of an skb for qdisc classification.
+ */
+
+/* (C) 2001-2002 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <net/checksum.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_CLASSIFY.h>
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("iptables qdisc classification target module");
+
+static unsigned int
+target(struct sk_buff **pskb,
+       const struct net_device *in,
+       const struct net_device *out,
+       unsigned int hooknum,
+       const void *targinfo,
+       void *userinfo)
+{
+	const struct ipt_classify_target_info *clinfo = targinfo;
+
+	if((*pskb)->priority != clinfo->priority) {
+		(*pskb)->priority = clinfo->priority;
+		(*pskb)->nfcache |= NFC_ALTERED;
+	}
+
+	return IPT_CONTINUE;
+}
+
+static int
+checkentry(const char *tablename,
+           const struct ipt_entry *e,
+           void *targinfo,
+           unsigned int targinfosize,
+           unsigned int hook_mask)
+{
+	if (targinfosize != IPT_ALIGN(sizeof(struct ipt_classify_target_info))){
+		printk(KERN_ERR "CLASSIFY: invalid size (%u != %Zu).\n",
+		       targinfosize,
+		       IPT_ALIGN(sizeof(struct ipt_classify_target_info)));
+		return 0;
+	}
+	
+	if (hook_mask & ~((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) |
+	                  (1 << NF_IP_POST_ROUTING))) {
+		printk(KERN_ERR "CLASSIFY: only valid in LOCAL_OUT, FORWARD "
+		                "and POST_ROUTING.\n");
+		return 0;
+	}
+
+	if (strcmp(tablename, "mangle") != 0) {
+		printk(KERN_ERR "CLASSIFY: can only be called from "
+		                "\"mangle\" table, not \"%s\".\n",
+		                tablename);
+		return 0;
+	}
+
+	return 1;
+}
+
+static struct ipt_target ipt_classify_reg = { 
+	.name 		= "CLASSIFY", 
+	.target 	= target,
+	.checkentry	= checkentry,
+	.me 		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ipt_register_target(&ipt_classify_reg);
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_target(&ipt_classify_reg);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
new file mode 100644
index 00000000000..0f12e3a3dc7
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -0,0 +1,761 @@
+/* Cluster IP hashmark target 
+ * (C) 2003-2004 by Harald Welte <laforge@netfilter.org>
+ * based on ideas of Fabio Olive Leite <olive@unixforge.org>
+ *
+ * Development of this code funded by SuSE Linux AG, http://www.suse.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/proc_fs.h>
+#include <linux/jhash.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/icmp.h>
+#include <linux/if_arp.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+#include <net/checksum.h>
+
+#include <linux/netfilter_arp.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/lockhelp.h>
+
+#define CLUSTERIP_VERSION "0.6"
+
+#define DEBUG_CLUSTERIP
+
+#ifdef DEBUG_CLUSTERIP
+#define DEBUGP	printk
+#else
+#define DEBUGP
+#endif
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("iptables target for CLUSTERIP");
+
+struct clusterip_config {
+	struct list_head list;			/* list of all configs */
+	atomic_t refcount;			/* reference count */
+
+	u_int32_t clusterip;			/* the IP address */
+	u_int8_t clustermac[ETH_ALEN];		/* the MAC address */
+	struct net_device *dev;			/* device */
+	u_int16_t num_total_nodes;		/* total number of nodes */
+	u_int16_t num_local_nodes;		/* number of local nodes */
+	u_int16_t local_nodes[CLUSTERIP_MAX_NODES];	/* node number array */
+
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry *pde;		/* proc dir entry */
+#endif
+	enum clusterip_hashmode hash_mode;	/* which hashing mode */
+	u_int32_t hash_initval;			/* hash initialization */
+};
+
+static LIST_HEAD(clusterip_configs);
+
+/* clusterip_lock protects the clusterip_configs list _AND_ the configurable
+ * data within all structurses (num_local_nodes, local_nodes[]) */
+static DECLARE_RWLOCK(clusterip_lock);
+
+#ifdef CONFIG_PROC_FS
+static struct file_operations clusterip_proc_fops;
+static struct proc_dir_entry *clusterip_procdir;
+#endif
+
+static inline void
+clusterip_config_get(struct clusterip_config *c) {
+	atomic_inc(&c->refcount);
+}
+
+static inline void
+clusterip_config_put(struct clusterip_config *c) {
+	if (atomic_dec_and_test(&c->refcount)) {
+		WRITE_LOCK(&clusterip_lock);
+		list_del(&c->list);
+		WRITE_UNLOCK(&clusterip_lock);
+		dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0);
+		dev_put(c->dev);
+		kfree(c);
+	}
+}
+
+
+static struct clusterip_config *
+__clusterip_config_find(u_int32_t clusterip)
+{
+	struct list_head *pos;
+
+	MUST_BE_READ_LOCKED(&clusterip_lock);
+	list_for_each(pos, &clusterip_configs) {
+		struct clusterip_config *c = list_entry(pos, 
+					struct clusterip_config, list);
+		if (c->clusterip == clusterip) {
+			return c;
+		}
+	}
+
+	return NULL;
+}
+
+static inline struct clusterip_config *
+clusterip_config_find_get(u_int32_t clusterip)
+{
+	struct clusterip_config *c;
+
+	READ_LOCK(&clusterip_lock);
+	c = __clusterip_config_find(clusterip);
+	if (!c) {
+		READ_UNLOCK(&clusterip_lock);
+		return NULL;
+	}
+	atomic_inc(&c->refcount);
+	READ_UNLOCK(&clusterip_lock);
+
+	return c;
+}
+
+static struct clusterip_config *
+clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
+			struct net_device *dev)
+{
+	struct clusterip_config *c;
+	char buffer[16];
+
+	c = kmalloc(sizeof(*c), GFP_ATOMIC);
+	if (!c)
+		return NULL;
+
+	memset(c, 0, sizeof(*c));
+	c->dev = dev;
+	c->clusterip = ip;
+	memcpy(&c->clustermac, &i->clustermac, ETH_ALEN);
+	c->num_total_nodes = i->num_total_nodes;
+	c->num_local_nodes = i->num_local_nodes;
+	memcpy(&c->local_nodes, &i->local_nodes, sizeof(&c->local_nodes));
+	c->hash_mode = i->hash_mode;
+	c->hash_initval = i->hash_initval;
+	atomic_set(&c->refcount, 1);
+
+#ifdef CONFIG_PROC_FS
+	/* create proc dir entry */
+	sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip));
+	c->pde = create_proc_entry(buffer, S_IWUSR|S_IRUSR, clusterip_procdir);
+	if (!c->pde) {
+		kfree(c);
+		return NULL;
+	}
+	c->pde->proc_fops = &clusterip_proc_fops;
+	c->pde->data = c;
+#endif
+
+	WRITE_LOCK(&clusterip_lock);
+	list_add(&c->list, &clusterip_configs);
+	WRITE_UNLOCK(&clusterip_lock);
+
+	return c;
+}
+
+static int
+clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum)
+{
+	int i;
+
+	WRITE_LOCK(&clusterip_lock);
+
+	if (c->num_local_nodes >= CLUSTERIP_MAX_NODES
+	    || nodenum > CLUSTERIP_MAX_NODES) {
+		WRITE_UNLOCK(&clusterip_lock);
+		return 1;
+	}
+
+	/* check if we alrady have this number in our array */
+	for (i = 0; i < c->num_local_nodes; i++) {
+		if (c->local_nodes[i] == nodenum) {
+			WRITE_UNLOCK(&clusterip_lock);
+			return 1;
+		}
+	}
+
+	c->local_nodes[c->num_local_nodes++] = nodenum;
+
+	WRITE_UNLOCK(&clusterip_lock);
+	return 0;
+}
+
+static int
+clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum)
+{
+	int i;
+
+	WRITE_LOCK(&clusterip_lock);
+
+	if (c->num_local_nodes <= 1 || nodenum > CLUSTERIP_MAX_NODES) {
+		WRITE_UNLOCK(&clusterip_lock);
+		return 1;
+	}
+		
+	for (i = 0; i < c->num_local_nodes; i++) {
+		if (c->local_nodes[i] == nodenum) {
+			int size = sizeof(u_int16_t)*(c->num_local_nodes-(i+1));
+			memmove(&c->local_nodes[i], &c->local_nodes[i+1], size);
+			c->num_local_nodes--;
+			WRITE_UNLOCK(&clusterip_lock);
+			return 0;
+		}
+	}
+
+	WRITE_UNLOCK(&clusterip_lock);
+	return 1;
+}
+
+static inline u_int32_t
+clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config)
+{
+	struct iphdr *iph = skb->nh.iph;
+	unsigned long hashval;
+	u_int16_t sport, dport;
+	struct tcphdr *th;
+	struct udphdr *uh;
+	struct icmphdr *ih;
+
+	switch (iph->protocol) {
+	case IPPROTO_TCP:
+		th = (void *)iph+iph->ihl*4;
+		sport = ntohs(th->source);
+		dport = ntohs(th->dest);
+		break;
+	case IPPROTO_UDP:
+		uh = (void *)iph+iph->ihl*4;
+		sport = ntohs(uh->source);
+		dport = ntohs(uh->dest);
+		break;
+	case IPPROTO_ICMP:
+		ih = (void *)iph+iph->ihl*4;
+		sport = ntohs(ih->un.echo.id);
+		dport = (ih->type<<8)|ih->code;
+		break;
+	default:
+		if (net_ratelimit()) {
+			printk(KERN_NOTICE "CLUSTERIP: unknown protocol `%u'\n",
+				iph->protocol);
+		}
+		sport = dport = 0;
+	}
+
+	switch (config->hash_mode) {
+	case CLUSTERIP_HASHMODE_SIP:
+		hashval = jhash_1word(ntohl(iph->saddr),
+				      config->hash_initval);
+		break;
+	case CLUSTERIP_HASHMODE_SIP_SPT:
+		hashval = jhash_2words(ntohl(iph->saddr), sport, 
+				       config->hash_initval);
+		break;
+	case CLUSTERIP_HASHMODE_SIP_SPT_DPT:
+		hashval = jhash_3words(ntohl(iph->saddr), sport, dport,
+				       config->hash_initval);
+		break;
+	default:
+		/* to make gcc happy */
+		hashval = 0;
+		/* This cannot happen, unless the check function wasn't called
+		 * at rule load time */
+		printk("CLUSTERIP: unknown mode `%u'\n", config->hash_mode);
+		BUG();
+		break;
+	}
+
+	/* node numbers are 1..n, not 0..n */
+	return ((hashval % config->num_total_nodes)+1);
+}
+
+static inline int
+clusterip_responsible(struct clusterip_config *config, u_int32_t hash)
+{
+	int i;
+
+	READ_LOCK(&clusterip_lock);
+
+	if (config->num_local_nodes == 0) {
+		READ_UNLOCK(&clusterip_lock);
+		return 0;
+	}
+
+	for (i = 0; i < config->num_local_nodes; i++) {
+		if (config->local_nodes[i] == hash) {
+			READ_UNLOCK(&clusterip_lock);
+			return 1;
+		}
+	}
+
+	READ_UNLOCK(&clusterip_lock);
+
+	return 0;
+}
+
+/*********************************************************************** 
+ * IPTABLES TARGET 
+ ***********************************************************************/
+
+static unsigned int
+target(struct sk_buff **pskb,
+       const struct net_device *in,
+       const struct net_device *out,
+       unsigned int hooknum,
+       const void *targinfo,
+       void *userinfo)
+{
+	const struct ipt_clusterip_tgt_info *cipinfo = targinfo;
+	enum ip_conntrack_info ctinfo;
+	struct ip_conntrack *ct = ip_conntrack_get((*pskb), &ctinfo);
+	u_int32_t hash;
+
+	/* don't need to clusterip_config_get() here, since refcount
+	 * is only decremented by destroy() - and ip_tables guarantees
+	 * that the ->target() function isn't called after ->destroy() */
+
+	if (!ct) {
+		printk(KERN_ERR "CLUSTERIP: no conntrack!\n");
+			/* FIXME: need to drop invalid ones, since replies
+			 * to outgoing connections of other nodes will be 
+			 * marked as INVALID */
+		return NF_DROP;
+	}
+
+	/* special case: ICMP error handling. conntrack distinguishes between
+	 * error messages (RELATED) and information requests (see below) */
+	if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP
+	    && (ctinfo == IP_CT_RELATED 
+		|| ctinfo == IP_CT_IS_REPLY+IP_CT_IS_REPLY))
+		return IPT_CONTINUE;
+
+	/* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO, 
+	 * TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here
+	 * on, which all have an ID field [relevant for hashing]. */
+
+	hash = clusterip_hashfn(*pskb, cipinfo->config);
+
+	switch (ctinfo) {
+		case IP_CT_NEW:
+			ct->mark = hash;
+			break;
+		case IP_CT_RELATED:
+		case IP_CT_RELATED+IP_CT_IS_REPLY:
+			/* FIXME: we don't handle expectations at the
+			 * moment.  they can arrive on a different node than
+			 * the master connection (e.g. FTP passive mode) */
+		case IP_CT_ESTABLISHED:
+		case IP_CT_ESTABLISHED+IP_CT_IS_REPLY:
+			break;
+		default:
+			break;
+	}
+
+#ifdef DEBUG_CLUSTERP
+	DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+#endif
+	DEBUGP("hash=%u ct_hash=%lu ", hash, ct->mark);
+	if (!clusterip_responsible(cipinfo->config, hash)) {
+		DEBUGP("not responsible\n");
+		return NF_DROP;
+	}
+	DEBUGP("responsible\n");
+
+	/* despite being received via linklayer multicast, this is
+	 * actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */
+	(*pskb)->pkt_type = PACKET_HOST;
+
+	return IPT_CONTINUE;
+}
+
+static int
+checkentry(const char *tablename,
+	   const struct ipt_entry *e,
+           void *targinfo,
+           unsigned int targinfosize,
+           unsigned int hook_mask)
+{
+	struct ipt_clusterip_tgt_info *cipinfo = targinfo;
+
+	struct clusterip_config *config;
+
+	if (targinfosize != IPT_ALIGN(sizeof(struct ipt_clusterip_tgt_info))) {
+		printk(KERN_WARNING "CLUSTERIP: targinfosize %u != %Zu\n",
+		       targinfosize,
+		       IPT_ALIGN(sizeof(struct ipt_clusterip_tgt_info)));
+		return 0;
+	}
+
+	if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP &&
+	    cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT &&
+	    cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) {
+		printk(KERN_WARNING "CLUSTERIP: unknown mode `%u'\n",
+			cipinfo->hash_mode);
+		return 0;
+
+	}
+	if (e->ip.dmsk.s_addr != 0xffffffff
+	    || e->ip.dst.s_addr == 0) {
+		printk(KERN_ERR "CLUSTERIP: Please specify destination IP\n");
+		return 0;
+	}
+
+	/* FIXME: further sanity checks */
+
+	config = clusterip_config_find_get(e->ip.dst.s_addr);
+	if (!config) {
+		if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) {
+			printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr));
+			return 0;
+		} else {
+			struct net_device *dev;
+
+			if (e->ip.iniface[0] == '\0') {
+				printk(KERN_WARNING "CLUSTERIP: Please specify an interface name\n");
+				return 0;
+			}
+
+			dev = dev_get_by_name(e->ip.iniface);
+			if (!dev) {
+				printk(KERN_WARNING "CLUSTERIP: no such interface %s\n", e->ip.iniface);
+				return 0;
+			}
+
+			config = clusterip_config_init(cipinfo, 
+							e->ip.dst.s_addr, dev);
+			if (!config) {
+				printk(KERN_WARNING "CLUSTERIP: cannot allocate config\n");
+				dev_put(dev);
+				return 0;
+			}
+			dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0);
+		}
+	}
+
+	cipinfo->config = config;
+
+	return 1;
+}
+
+/* drop reference count of cluster config when rule is deleted */
+static void destroy(void *matchinfo, unsigned int matchinfosize)
+{
+	struct ipt_clusterip_tgt_info *cipinfo = matchinfo;
+
+	/* we first remove the proc entry and then drop the reference
+	 * count.  In case anyone still accesses the file, the open/close
+	 * functions are also incrementing the refcount on their own */
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry(cipinfo->config->pde->name,
+			  cipinfo->config->pde->parent);
+#endif
+	clusterip_config_put(cipinfo->config);
+}
+
+static struct ipt_target clusterip_tgt = { 
+	.name = "CLUSTERIP",
+	.target = &target, 
+	.checkentry = &checkentry, 
+	.destroy = &destroy,
+	.me = THIS_MODULE
+};
+
+
+/*********************************************************************** 
+ * ARP MANGLING CODE 
+ ***********************************************************************/
+
+/* hardcoded for 48bit ethernet and 32bit ipv4 addresses */
+struct arp_payload {
+	u_int8_t src_hw[ETH_ALEN];
+	u_int32_t src_ip;
+	u_int8_t dst_hw[ETH_ALEN];
+	u_int32_t dst_ip;
+} __attribute__ ((packed));
+
+#ifdef CLUSTERIP_DEBUG
+static void arp_print(struct arp_payload *payload) 
+{
+#define HBUFFERLEN 30
+	char hbuffer[HBUFFERLEN];
+	int j,k;
+	const char hexbuf[]= "0123456789abcdef";
+
+	for (k=0, j=0; k < HBUFFERLEN-3 && j < ETH_ALEN; j++) {
+		hbuffer[k++]=hexbuf[(payload->src_hw[j]>>4)&15];
+		hbuffer[k++]=hexbuf[payload->src_hw[j]&15];
+		hbuffer[k++]=':';
+	}
+	hbuffer[--k]='\0';
+
+	printk("src %u.%u.%u.%u@%s, dst %u.%u.%u.%u\n", 
+		NIPQUAD(payload->src_ip), hbuffer,
+		NIPQUAD(payload->dst_ip));
+}
+#endif
+
+static unsigned int
+arp_mangle(unsigned int hook,
+	   struct sk_buff **pskb,
+	   const struct net_device *in,
+	   const struct net_device *out,
+	   int (*okfn)(struct sk_buff *))
+{
+	struct arphdr *arp = (*pskb)->nh.arph;
+	struct arp_payload *payload;
+	struct clusterip_config *c;
+
+	/* we don't care about non-ethernet and non-ipv4 ARP */
+	if (arp->ar_hrd != htons(ARPHRD_ETHER)
+	    || arp->ar_pro != htons(ETH_P_IP)
+	    || arp->ar_pln != 4 || arp->ar_hln != ETH_ALEN)
+		return NF_ACCEPT;
+
+	/* we only want to mangle arp replies */
+	if (arp->ar_op != htons(ARPOP_REPLY))
+		return NF_ACCEPT;
+
+	payload = (void *)(arp+1);
+
+	/* if there is no clusterip configuration for the arp reply's 
+	 * source ip, we don't want to mangle it */
+	c = clusterip_config_find_get(payload->src_ip);
+	if (!c)
+		return NF_ACCEPT;
+
+	/* normally the linux kernel always replies to arp queries of 
+	 * addresses on different interfacs.  However, in the CLUSTERIP case
+	 * this wouldn't work, since we didn't subscribe the mcast group on
+	 * other interfaces */
+	if (c->dev != out) {
+		DEBUGP("CLUSTERIP: not mangling arp reply on different "
+		       "interface: cip'%s'-skb'%s'\n", c->dev->name, out->name);
+		clusterip_config_put(c);
+		return NF_ACCEPT;
+	}
+
+	/* mangle reply hardware address */
+	memcpy(payload->src_hw, c->clustermac, arp->ar_hln);
+
+#ifdef CLUSTERIP_DEBUG
+	DEBUGP(KERN_DEBUG "CLUSTERIP mangled arp reply: ");
+	arp_print(payload);
+#endif
+
+	clusterip_config_put(c);
+
+	return NF_ACCEPT;
+}
+
+static struct nf_hook_ops cip_arp_ops = {
+	.hook = arp_mangle,
+	.pf = NF_ARP,
+	.hooknum = NF_ARP_OUT,
+	.priority = -1
+};
+
+/*********************************************************************** 
+ * PROC DIR HANDLING 
+ ***********************************************************************/
+
+#ifdef CONFIG_PROC_FS
+
+static void *clusterip_seq_start(struct seq_file *s, loff_t *pos)
+{
+	struct proc_dir_entry *pde = s->private;
+	struct clusterip_config *c = pde->data;
+	unsigned int *nodeidx;
+
+	READ_LOCK(&clusterip_lock);
+	if (*pos >= c->num_local_nodes)
+		return NULL;
+
+	nodeidx = kmalloc(sizeof(unsigned int), GFP_KERNEL);
+	if (!nodeidx)
+		return ERR_PTR(-ENOMEM);
+
+	*nodeidx = *pos;
+	return nodeidx;
+}
+
+static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+	struct proc_dir_entry *pde = s->private;
+	struct clusterip_config *c = pde->data;
+	unsigned int *nodeidx = (unsigned int *)v;
+
+	*pos = ++(*nodeidx);
+	if (*pos >= c->num_local_nodes) {
+		kfree(v);
+		return NULL;
+	}
+	return nodeidx;
+}
+
+static void clusterip_seq_stop(struct seq_file *s, void *v)
+{
+	kfree(v);
+
+	READ_UNLOCK(&clusterip_lock);
+}
+
+static int clusterip_seq_show(struct seq_file *s, void *v)
+{
+	struct proc_dir_entry *pde = s->private;
+	struct clusterip_config *c = pde->data;
+	unsigned int *nodeidx = (unsigned int *)v;
+
+	if (*nodeidx != 0) 
+		seq_putc(s, ',');
+	seq_printf(s, "%u", c->local_nodes[*nodeidx]);
+
+	if (*nodeidx == c->num_local_nodes-1)
+		seq_putc(s, '\n');
+
+	return 0;
+}
+
+static struct seq_operations clusterip_seq_ops = {
+	.start	= clusterip_seq_start,
+	.next	= clusterip_seq_next,
+	.stop	= clusterip_seq_stop,
+	.show	= clusterip_seq_show,
+};
+
+static int clusterip_proc_open(struct inode *inode, struct file *file)
+{
+	int ret = seq_open(file, &clusterip_seq_ops);
+
+	if (!ret) {
+		struct seq_file *sf = file->private_data;
+		struct proc_dir_entry *pde = PDE(inode);
+		struct clusterip_config *c = pde->data;
+
+		sf->private = pde;
+
+		clusterip_config_get(c);
+	}
+
+	return ret;
+}
+
+static int clusterip_proc_release(struct inode *inode, struct file *file)
+{
+	struct proc_dir_entry *pde = PDE(inode);
+	struct clusterip_config *c = pde->data;
+	int ret;
+
+	ret = seq_release(inode, file);
+
+	if (!ret)
+		clusterip_config_put(c);
+
+	return ret;
+}
+
+static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
+				size_t size, loff_t *ofs)
+{
+#define PROC_WRITELEN	10
+	char buffer[PROC_WRITELEN+1];
+	struct proc_dir_entry *pde = PDE(file->f_dentry->d_inode);
+	struct clusterip_config *c = pde->data;
+	unsigned long nodenum;
+
+	if (copy_from_user(buffer, input, PROC_WRITELEN))
+		return -EFAULT;
+
+	if (*buffer == '+') {
+		nodenum = simple_strtoul(buffer+1, NULL, 10);
+		if (clusterip_add_node(c, nodenum))
+			return -ENOMEM;
+	} else if (*buffer == '-') {
+		nodenum = simple_strtoul(buffer+1, NULL,10);
+		if (clusterip_del_node(c, nodenum))
+			return -ENOENT;
+	} else
+		return -EIO;
+
+	return size;
+}
+
+static struct file_operations clusterip_proc_fops = {
+	.owner	 = THIS_MODULE,
+	.open	 = clusterip_proc_open,
+	.read	 = seq_read,
+	.write	 = clusterip_proc_write,
+	.llseek	 = seq_lseek,
+	.release = clusterip_proc_release,
+};
+
+#endif /* CONFIG_PROC_FS */
+
+static int init_or_cleanup(int fini)
+{
+	int ret;
+
+	if (fini)
+		goto cleanup;
+
+	if (ipt_register_target(&clusterip_tgt)) {
+		ret = -EINVAL;
+		goto cleanup_none;
+	}
+
+	if (nf_register_hook(&cip_arp_ops) < 0) {
+		ret = -EINVAL;
+		goto cleanup_target;
+	}
+
+#ifdef CONFIG_PROC_FS
+	clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", proc_net);
+	if (!clusterip_procdir) {
+		printk(KERN_ERR "CLUSTERIP: Unable to proc dir entry\n");
+		ret = -ENOMEM;
+		goto cleanup_hook;
+	}
+#endif /* CONFIG_PROC_FS */
+
+	printk(KERN_NOTICE "ClusterIP Version %s loaded successfully\n",
+		CLUSTERIP_VERSION);
+
+	return 0;
+
+cleanup:
+	printk(KERN_NOTICE "ClusterIP Version %s unloading\n",
+		CLUSTERIP_VERSION);
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry(clusterip_procdir->name, clusterip_procdir->parent);
+#endif
+cleanup_hook:
+	nf_unregister_hook(&cip_arp_ops);
+cleanup_target:
+	ipt_unregister_target(&clusterip_tgt);
+cleanup_none:
+	return -EINVAL;
+}
+
+static int __init init(void)
+{
+	return init_or_cleanup(0);
+}
+
+static void __exit fini(void)
+{
+	init_or_cleanup(1);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_CONNMARK.c b/net/ipv4/netfilter/ipt_CONNMARK.c
new file mode 100644
index 00000000000..30ddd3e18eb
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_CONNMARK.c
@@ -0,0 +1,118 @@
+/* This kernel module is used to modify the connection mark values, or
+ * to optionally restore the skb nfmark from the connection mark
+ *
+ * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
+ * by Henrik Nordstrom <hno@marasystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <net/checksum.h>
+
+MODULE_AUTHOR("Henrik Nordstrom <hno@marasytems.com>");
+MODULE_DESCRIPTION("IP tables CONNMARK matching module");
+MODULE_LICENSE("GPL");
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_CONNMARK.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+
+static unsigned int
+target(struct sk_buff **pskb,
+       const struct net_device *in,
+       const struct net_device *out,
+       unsigned int hooknum,
+       const void *targinfo,
+       void *userinfo)
+{
+	const struct ipt_connmark_target_info *markinfo = targinfo;
+	unsigned long diff;
+	unsigned long nfmark;
+	unsigned long newmark;
+
+	enum ip_conntrack_info ctinfo;
+	struct ip_conntrack *ct = ip_conntrack_get((*pskb), &ctinfo);
+	if (ct) {
+	    switch(markinfo->mode) {
+	    case IPT_CONNMARK_SET:
+		newmark = (ct->mark & ~markinfo->mask) | markinfo->mark;
+		if (newmark != ct->mark)
+		    ct->mark = newmark;
+		break;
+	    case IPT_CONNMARK_SAVE:
+		newmark = (ct->mark & ~markinfo->mask) | ((*pskb)->nfmark & markinfo->mask);
+		if (ct->mark != newmark)
+		    ct->mark = newmark;
+		break;
+	    case IPT_CONNMARK_RESTORE:
+		nfmark = (*pskb)->nfmark;
+		diff = (ct->mark ^ nfmark) & markinfo->mask;
+		if (diff != 0) {
+		    (*pskb)->nfmark = nfmark ^ diff;
+		    (*pskb)->nfcache |= NFC_ALTERED;
+		}
+		break;
+	    }
+	}
+
+	return IPT_CONTINUE;
+}
+
+static int
+checkentry(const char *tablename,
+	   const struct ipt_entry *e,
+	   void *targinfo,
+	   unsigned int targinfosize,
+	   unsigned int hook_mask)
+{
+	struct ipt_connmark_target_info *matchinfo = targinfo;
+	if (targinfosize != IPT_ALIGN(sizeof(struct ipt_connmark_target_info))) {
+		printk(KERN_WARNING "CONNMARK: targinfosize %u != %Zu\n",
+		       targinfosize,
+		       IPT_ALIGN(sizeof(struct ipt_connmark_target_info)));
+		return 0;
+	}
+
+	if (matchinfo->mode == IPT_CONNMARK_RESTORE) {
+	    if (strcmp(tablename, "mangle") != 0) {
+		    printk(KERN_WARNING "CONNMARK: restore can only be called from \"mangle\" table, not \"%s\"\n", tablename);
+		    return 0;
+	    }
+	}
+
+	return 1;
+}
+
+static struct ipt_target ipt_connmark_reg = {
+	.name = "CONNMARK",
+	.target = &target,
+	.checkentry = &checkentry,
+	.me = THIS_MODULE
+};
+
+static int __init init(void)
+{
+	return ipt_register_target(&ipt_connmark_reg);
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_target(&ipt_connmark_reg);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_DSCP.c b/net/ipv4/netfilter/ipt_DSCP.c
new file mode 100644
index 00000000000..3ea4509099f
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_DSCP.c
@@ -0,0 +1,106 @@
+/* iptables module for setting the IPv4 DSCP field, Version 1.8
+ *
+ * (C) 2002 by Harald Welte <laforge@netfilter.org>
+ * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh <mgm@paktronix.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as 
+ * published by the Free Software Foundation.
+ * 
+ * See RFC2474 for a description of the DSCP field within the IP Header.
+ *
+ * ipt_DSCP.c,v 1.8 2002/08/06 18:41:57 laforge Exp
+*/
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <net/checksum.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_DSCP.h>
+
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("iptables DSCP modification module");
+MODULE_LICENSE("GPL");
+
+static unsigned int
+target(struct sk_buff **pskb,
+       const struct net_device *in,
+       const struct net_device *out,
+       unsigned int hooknum,
+       const void *targinfo,
+       void *userinfo)
+{
+	const struct ipt_DSCP_info *dinfo = targinfo;
+	u_int8_t sh_dscp = ((dinfo->dscp << IPT_DSCP_SHIFT) & IPT_DSCP_MASK);
+
+
+	if (((*pskb)->nh.iph->tos & IPT_DSCP_MASK) != sh_dscp) {
+		u_int16_t diffs[2];
+
+		if (!skb_ip_make_writable(pskb, sizeof(struct iphdr)))
+			return NF_DROP;
+
+		diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF;
+		(*pskb)->nh.iph->tos = ((*pskb)->nh.iph->tos & ~IPT_DSCP_MASK)
+			| sh_dscp;
+		diffs[1] = htons((*pskb)->nh.iph->tos);
+		(*pskb)->nh.iph->check
+			= csum_fold(csum_partial((char *)diffs,
+						 sizeof(diffs),
+						 (*pskb)->nh.iph->check
+						 ^ 0xFFFF));
+		(*pskb)->nfcache |= NFC_ALTERED;
+	}
+	return IPT_CONTINUE;
+}
+
+static int
+checkentry(const char *tablename,
+	   const struct ipt_entry *e,
+           void *targinfo,
+           unsigned int targinfosize,
+           unsigned int hook_mask)
+{
+	const u_int8_t dscp = ((struct ipt_DSCP_info *)targinfo)->dscp;
+
+	if (targinfosize != IPT_ALIGN(sizeof(struct ipt_DSCP_info))) {
+		printk(KERN_WARNING "DSCP: targinfosize %u != %Zu\n",
+		       targinfosize,
+		       IPT_ALIGN(sizeof(struct ipt_DSCP_info)));
+		return 0;
+	}
+
+	if (strcmp(tablename, "mangle") != 0) {
+		printk(KERN_WARNING "DSCP: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
+		return 0;
+	}
+
+	if ((dscp > IPT_DSCP_MAX)) {
+		printk(KERN_WARNING "DSCP: dscp %x out of range\n", dscp);
+		return 0;
+	}
+
+	return 1;
+}
+
+static struct ipt_target ipt_dscp_reg = {
+	.name		= "DSCP",
+	.target		= target,
+	.checkentry	= checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ipt_register_target(&ipt_dscp_reg);
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_target(&ipt_dscp_reg);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
new file mode 100644
index 00000000000..ada9911118e
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -0,0 +1,175 @@
+/* iptables module for the IPv4 and TCP ECN bits, Version 1.5
+ *
+ * (C) 2002 by Harald Welte <laforge@netfilter.org>
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as 
+ * published by the Free Software Foundation.
+ *
+ * ipt_ECN.c,v 1.5 2002/08/18 19:36:51 laforge Exp
+*/
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <net/checksum.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_ECN.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("iptables ECN modification module");
+
+/* set ECT codepoint from IP header.
+ * 	return 0 if there was an error. */
+static inline int
+set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
+{
+	if (((*pskb)->nh.iph->tos & IPT_ECN_IP_MASK)
+	    != (einfo->ip_ect & IPT_ECN_IP_MASK)) {
+		u_int16_t diffs[2];
+
+		if (!skb_ip_make_writable(pskb, sizeof(struct iphdr)))
+			return 0;
+
+		diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF;
+		(*pskb)->nh.iph->tos &= ~IPT_ECN_IP_MASK;
+		(*pskb)->nh.iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK);
+		diffs[1] = htons((*pskb)->nh.iph->tos);
+		(*pskb)->nh.iph->check
+			= csum_fold(csum_partial((char *)diffs,
+						 sizeof(diffs),
+						 (*pskb)->nh.iph->check
+						 ^0xFFFF));
+		(*pskb)->nfcache |= NFC_ALTERED;
+	} 
+	return 1;
+}
+
+/* Return 0 if there was an error. */
+static inline int
+set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward)
+{
+	struct tcphdr _tcph, *tcph;
+	u_int16_t diffs[2];
+
+	/* Not enought header? */
+	tcph = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
+				  sizeof(_tcph), &_tcph);
+	if (!tcph)
+		return 0;
+
+	if (!(einfo->operation & IPT_ECN_OP_SET_ECE
+	      || tcph->ece == einfo->proto.tcp.ece)
+	    && (!(einfo->operation & IPT_ECN_OP_SET_CWR
+		  || tcph->cwr == einfo->proto.tcp.cwr)))
+		return 1;
+
+	if (!skb_ip_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph)))
+		return 0;
+	tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4;
+
+	diffs[0] = ((u_int16_t *)tcph)[6];
+	if (einfo->operation & IPT_ECN_OP_SET_ECE)
+		tcph->ece = einfo->proto.tcp.ece;
+	if (einfo->operation & IPT_ECN_OP_SET_CWR)
+		tcph->cwr = einfo->proto.tcp.cwr;
+	diffs[1] = ((u_int16_t *)tcph)[6];
+	diffs[0] = diffs[0] ^ 0xFFFF;
+
+	if ((*pskb)->ip_summed != CHECKSUM_HW)
+		tcph->check = csum_fold(csum_partial((char *)diffs,
+						     sizeof(diffs),
+						     tcph->check^0xFFFF));
+	else
+		if (skb_checksum_help(*pskb, inward))
+			return 0;
+	(*pskb)->nfcache |= NFC_ALTERED;
+	return 1;
+}
+
+static unsigned int
+target(struct sk_buff **pskb,
+       const struct net_device *in,
+       const struct net_device *out,
+       unsigned int hooknum,
+       const void *targinfo,
+       void *userinfo)
+{
+	const struct ipt_ECN_info *einfo = targinfo;
+
+	if (einfo->operation & IPT_ECN_OP_SET_IP)
+		if (!set_ect_ip(pskb, einfo))
+			return NF_DROP;
+
+	if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR)
+	    && (*pskb)->nh.iph->protocol == IPPROTO_TCP)
+		if (!set_ect_tcp(pskb, einfo, (out == NULL)))
+			return NF_DROP;
+
+	return IPT_CONTINUE;
+}
+
+static int
+checkentry(const char *tablename,
+	   const struct ipt_entry *e,
+           void *targinfo,
+           unsigned int targinfosize,
+           unsigned int hook_mask)
+{
+	const struct ipt_ECN_info *einfo = (struct ipt_ECN_info *)targinfo;
+
+	if (targinfosize != IPT_ALIGN(sizeof(struct ipt_ECN_info))) {
+		printk(KERN_WARNING "ECN: targinfosize %u != %Zu\n",
+		       targinfosize,
+		       IPT_ALIGN(sizeof(struct ipt_ECN_info)));
+		return 0;
+	}
+
+	if (strcmp(tablename, "mangle") != 0) {
+		printk(KERN_WARNING "ECN: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
+		return 0;
+	}
+
+	if (einfo->operation & IPT_ECN_OP_MASK) {
+		printk(KERN_WARNING "ECN: unsupported ECN operation %x\n",
+			einfo->operation);
+		return 0;
+	}
+	if (einfo->ip_ect & ~IPT_ECN_IP_MASK) {
+		printk(KERN_WARNING "ECN: new ECT codepoint %x out of mask\n",
+			einfo->ip_ect);
+		return 0;
+	}
+
+	if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR))
+	    && (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & IPT_INV_PROTO))) {
+		printk(KERN_WARNING "ECN: cannot use TCP operations on a "
+		       "non-tcp rule\n");
+		return 0;
+	}
+
+	return 1;
+}
+
+static struct ipt_target ipt_ecn_reg = {
+	.name		= "ECN",
+	.target		= target,
+	.checkentry	= checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ipt_register_target(&ipt_ecn_reg);
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_target(&ipt_ecn_reg);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
new file mode 100644
index 00000000000..ef08733d26d
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -0,0 +1,485 @@
+/*
+ * This is a module which is used for logging packets.
+ */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/tcp.h>
+#include <net/route.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_LOG.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("iptables syslog logging module");
+
+static unsigned int nflog = 1;
+module_param(nflog, int, 0400);
+MODULE_PARM_DESC(nflog, "register as internal netfilter logging module");
+ 
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+/* Use lock to serialize, so printks don't overlap */
+static DEFINE_SPINLOCK(log_lock);
+
+/* One level of recursion won't kill us */
+static void dump_packet(const struct ipt_log_info *info,
+			const struct sk_buff *skb,
+			unsigned int iphoff)
+{
+	struct iphdr _iph, *ih;
+
+	ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph);
+	if (ih == NULL) {
+		printk("TRUNCATED");
+		return;
+	}
+
+	/* Important fields:
+	 * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */
+	/* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */
+	printk("SRC=%u.%u.%u.%u DST=%u.%u.%u.%u ",
+	       NIPQUAD(ih->saddr), NIPQUAD(ih->daddr));
+
+	/* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */
+	printk("LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
+	       ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK,
+	       ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id));
+
+	/* Max length: 6 "CE DF MF " */
+	if (ntohs(ih->frag_off) & IP_CE)
+		printk("CE ");
+	if (ntohs(ih->frag_off) & IP_DF)
+		printk("DF ");
+	if (ntohs(ih->frag_off) & IP_MF)
+		printk("MF ");
+
+	/* Max length: 11 "FRAG:65535 " */
+	if (ntohs(ih->frag_off) & IP_OFFSET)
+		printk("FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET);
+
+	if ((info->logflags & IPT_LOG_IPOPT)
+	    && ih->ihl * 4 > sizeof(struct iphdr)) {
+		unsigned char _opt[4 * 15 - sizeof(struct iphdr)], *op;
+		unsigned int i, optsize;
+
+		optsize = ih->ihl * 4 - sizeof(struct iphdr);
+		op = skb_header_pointer(skb, iphoff+sizeof(_iph),
+					optsize, _opt);
+		if (op == NULL) {
+			printk("TRUNCATED");
+			return;
+		}
+
+		/* Max length: 127 "OPT (" 15*4*2chars ") " */
+		printk("OPT (");
+		for (i = 0; i < optsize; i++)
+			printk("%02X", op[i]);
+		printk(") ");
+	}
+
+	switch (ih->protocol) {
+	case IPPROTO_TCP: {
+		struct tcphdr _tcph, *th;
+
+		/* Max length: 10 "PROTO=TCP " */
+		printk("PROTO=TCP ");
+
+		if (ntohs(ih->frag_off) & IP_OFFSET)
+			break;
+
+		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
+		th = skb_header_pointer(skb, iphoff + ih->ihl * 4,
+					sizeof(_tcph), &_tcph);
+		if (th == NULL) {
+			printk("INCOMPLETE [%u bytes] ",
+			       skb->len - iphoff - ih->ihl*4);
+			break;
+		}
+
+		/* Max length: 20 "SPT=65535 DPT=65535 " */
+		printk("SPT=%u DPT=%u ",
+		       ntohs(th->source), ntohs(th->dest));
+		/* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
+		if (info->logflags & IPT_LOG_TCPSEQ)
+			printk("SEQ=%u ACK=%u ",
+			       ntohl(th->seq), ntohl(th->ack_seq));
+		/* Max length: 13 "WINDOW=65535 " */
+		printk("WINDOW=%u ", ntohs(th->window));
+		/* Max length: 9 "RES=0x3F " */
+		printk("RES=0x%02x ", (u8)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22));
+		/* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
+		if (th->cwr)
+			printk("CWR ");
+		if (th->ece)
+			printk("ECE ");
+		if (th->urg)
+			printk("URG ");
+		if (th->ack)
+			printk("ACK ");
+		if (th->psh)
+			printk("PSH ");
+		if (th->rst)
+			printk("RST ");
+		if (th->syn)
+			printk("SYN ");
+		if (th->fin)
+			printk("FIN ");
+		/* Max length: 11 "URGP=65535 " */
+		printk("URGP=%u ", ntohs(th->urg_ptr));
+
+		if ((info->logflags & IPT_LOG_TCPOPT)
+		    && th->doff * 4 > sizeof(struct tcphdr)) {
+			unsigned char _opt[4 * 15 - sizeof(struct tcphdr)];
+			unsigned char *op;
+			unsigned int i, optsize;
+
+			optsize = th->doff * 4 - sizeof(struct tcphdr);
+			op = skb_header_pointer(skb,
+						iphoff+ih->ihl*4+sizeof(_tcph),
+						optsize, _opt);
+			if (op == NULL) {
+				printk("TRUNCATED");
+				return;
+			}
+
+			/* Max length: 127 "OPT (" 15*4*2chars ") " */
+			printk("OPT (");
+			for (i = 0; i < optsize; i++)
+				printk("%02X", op[i]);
+			printk(") ");
+		}
+		break;
+	}
+	case IPPROTO_UDP: {
+		struct udphdr _udph, *uh;
+
+		/* Max length: 10 "PROTO=UDP " */
+		printk("PROTO=UDP ");
+
+		if (ntohs(ih->frag_off) & IP_OFFSET)
+			break;
+
+		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
+		uh = skb_header_pointer(skb, iphoff+ih->ihl*4,
+					sizeof(_udph), &_udph);
+		if (uh == NULL) {
+			printk("INCOMPLETE [%u bytes] ",
+			       skb->len - iphoff - ih->ihl*4);
+			break;
+		}
+
+		/* Max length: 20 "SPT=65535 DPT=65535 " */
+		printk("SPT=%u DPT=%u LEN=%u ",
+		       ntohs(uh->source), ntohs(uh->dest),
+		       ntohs(uh->len));
+		break;
+	}
+	case IPPROTO_ICMP: {
+		struct icmphdr _icmph, *ich;
+		static size_t required_len[NR_ICMP_TYPES+1]
+			= { [ICMP_ECHOREPLY] = 4,
+			    [ICMP_DEST_UNREACH]
+			    = 8 + sizeof(struct iphdr),
+			    [ICMP_SOURCE_QUENCH]
+			    = 8 + sizeof(struct iphdr),
+			    [ICMP_REDIRECT]
+			    = 8 + sizeof(struct iphdr),
+			    [ICMP_ECHO] = 4,
+			    [ICMP_TIME_EXCEEDED]
+			    = 8 + sizeof(struct iphdr),
+			    [ICMP_PARAMETERPROB]
+			    = 8 + sizeof(struct iphdr),
+			    [ICMP_TIMESTAMP] = 20,
+			    [ICMP_TIMESTAMPREPLY] = 20,
+			    [ICMP_ADDRESS] = 12,
+			    [ICMP_ADDRESSREPLY] = 12 };
+
+		/* Max length: 11 "PROTO=ICMP " */
+		printk("PROTO=ICMP ");
+
+		if (ntohs(ih->frag_off) & IP_OFFSET)
+			break;
+
+		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
+		ich = skb_header_pointer(skb, iphoff + ih->ihl * 4,
+					 sizeof(_icmph), &_icmph);
+		if (ich == NULL) {
+			printk("INCOMPLETE [%u bytes] ",
+			       skb->len - iphoff - ih->ihl*4);
+			break;
+		}
+
+		/* Max length: 18 "TYPE=255 CODE=255 " */
+		printk("TYPE=%u CODE=%u ", ich->type, ich->code);
+
+		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
+		if (ich->type <= NR_ICMP_TYPES
+		    && required_len[ich->type]
+		    && skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) {
+			printk("INCOMPLETE [%u bytes] ",
+			       skb->len - iphoff - ih->ihl*4);
+			break;
+		}
+
+		switch (ich->type) {
+		case ICMP_ECHOREPLY:
+		case ICMP_ECHO:
+			/* Max length: 19 "ID=65535 SEQ=65535 " */
+			printk("ID=%u SEQ=%u ",
+			       ntohs(ich->un.echo.id),
+			       ntohs(ich->un.echo.sequence));
+			break;
+
+		case ICMP_PARAMETERPROB:
+			/* Max length: 14 "PARAMETER=255 " */
+			printk("PARAMETER=%u ",
+			       ntohl(ich->un.gateway) >> 24);
+			break;
+		case ICMP_REDIRECT:
+			/* Max length: 24 "GATEWAY=255.255.255.255 " */
+			printk("GATEWAY=%u.%u.%u.%u ",
+			       NIPQUAD(ich->un.gateway));
+			/* Fall through */
+		case ICMP_DEST_UNREACH:
+		case ICMP_SOURCE_QUENCH:
+		case ICMP_TIME_EXCEEDED:
+			/* Max length: 3+maxlen */
+			if (!iphoff) { /* Only recurse once. */
+				printk("[");
+				dump_packet(info, skb,
+					    iphoff + ih->ihl*4+sizeof(_icmph));
+				printk("] ");
+			}
+
+			/* Max length: 10 "MTU=65535 " */
+			if (ich->type == ICMP_DEST_UNREACH
+			    && ich->code == ICMP_FRAG_NEEDED)
+				printk("MTU=%u ", ntohs(ich->un.frag.mtu));
+		}
+		break;
+	}
+	/* Max Length */
+	case IPPROTO_AH: {
+		struct ip_auth_hdr _ahdr, *ah;
+
+		if (ntohs(ih->frag_off) & IP_OFFSET)
+			break;
+		
+		/* Max length: 9 "PROTO=AH " */
+		printk("PROTO=AH ");
+
+		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
+		ah = skb_header_pointer(skb, iphoff+ih->ihl*4,
+					sizeof(_ahdr), &_ahdr);
+		if (ah == NULL) {
+			printk("INCOMPLETE [%u bytes] ",
+			       skb->len - iphoff - ih->ihl*4);
+			break;
+		}
+
+		/* Length: 15 "SPI=0xF1234567 " */
+		printk("SPI=0x%x ", ntohl(ah->spi));
+		break;
+	}
+	case IPPROTO_ESP: {
+		struct ip_esp_hdr _esph, *eh;
+
+		/* Max length: 10 "PROTO=ESP " */
+		printk("PROTO=ESP ");
+
+		if (ntohs(ih->frag_off) & IP_OFFSET)
+			break;
+
+		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
+		eh = skb_header_pointer(skb, iphoff+ih->ihl*4,
+					sizeof(_esph), &_esph);
+		if (eh == NULL) {
+			printk("INCOMPLETE [%u bytes] ",
+			       skb->len - iphoff - ih->ihl*4);
+			break;
+		}
+
+		/* Length: 15 "SPI=0xF1234567 " */
+		printk("SPI=0x%x ", ntohl(eh->spi));
+		break;
+	}
+	/* Max length: 10 "PROTO 255 " */
+	default:
+		printk("PROTO=%u ", ih->protocol);
+	}
+
+	/* Max length: 15 "UID=4294967295 " */
+ 	if ((info->logflags & IPT_LOG_UID) && !iphoff && skb->sk) {
+		read_lock_bh(&skb->sk->sk_callback_lock);
+		if (skb->sk->sk_socket && skb->sk->sk_socket->file)
+ 			printk("UID=%u ", skb->sk->sk_socket->file->f_uid);
+		read_unlock_bh(&skb->sk->sk_callback_lock);
+	}
+
+	/* Proto    Max log string length */
+	/* IP:      40+46+6+11+127 = 230 */
+	/* TCP:     10+max(25,20+30+13+9+32+11+127) = 252 */
+	/* UDP:     10+max(25,20) = 35 */
+	/* ICMP:    11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */
+	/* ESP:     10+max(25)+15 = 50 */
+	/* AH:      9+max(25)+15 = 49 */
+	/* unknown: 10 */
+
+	/* (ICMP allows recursion one level deep) */
+	/* maxlen =  IP + ICMP +  IP + max(TCP,UDP,ICMP,unknown) */
+	/* maxlen = 230+   91  + 230 + 252 = 803 */
+}
+
+static void
+ipt_log_packet(unsigned int hooknum,
+	       const struct sk_buff *skb,
+	       const struct net_device *in,
+	       const struct net_device *out,
+	       const struct ipt_log_info *loginfo,
+	       const char *level_string,
+	       const char *prefix)
+{
+	spin_lock_bh(&log_lock);
+	printk(level_string);
+	printk("%sIN=%s OUT=%s ",
+	       prefix == NULL ? loginfo->prefix : prefix,
+	       in ? in->name : "",
+	       out ? out->name : "");
+#ifdef CONFIG_BRIDGE_NETFILTER
+	if (skb->nf_bridge) {
+		struct net_device *physindev = skb->nf_bridge->physindev;
+		struct net_device *physoutdev = skb->nf_bridge->physoutdev;
+
+		if (physindev && in != physindev)
+			printk("PHYSIN=%s ", physindev->name);
+		if (physoutdev && out != physoutdev)
+			printk("PHYSOUT=%s ", physoutdev->name);
+	}
+#endif
+
+	if (in && !out) {
+		/* MAC logging for input chain only. */
+		printk("MAC=");
+		if (skb->dev && skb->dev->hard_header_len
+		    && skb->mac.raw != (void*)skb->nh.iph) {
+			int i;
+			unsigned char *p = skb->mac.raw;
+			for (i = 0; i < skb->dev->hard_header_len; i++,p++)
+				printk("%02x%c", *p,
+				       i==skb->dev->hard_header_len - 1
+				       ? ' ':':');
+		} else
+			printk(" ");
+	}
+
+	dump_packet(loginfo, skb, 0);
+	printk("\n");
+	spin_unlock_bh(&log_lock);
+}
+
+static unsigned int
+ipt_log_target(struct sk_buff **pskb,
+	       const struct net_device *in,
+	       const struct net_device *out,
+	       unsigned int hooknum,
+	       const void *targinfo,
+	       void *userinfo)
+{
+	const struct ipt_log_info *loginfo = targinfo;
+	char level_string[4] = "< >";
+
+	level_string[1] = '0' + (loginfo->level % 8);
+	ipt_log_packet(hooknum, *pskb, in, out, loginfo, level_string, NULL);
+
+	return IPT_CONTINUE;
+}
+
+static void
+ipt_logfn(unsigned int hooknum,
+	  const struct sk_buff *skb,
+	  const struct net_device *in,
+	  const struct net_device *out,
+	  const char *prefix)
+{
+	struct ipt_log_info loginfo = { 
+		.level = 0, 
+		.logflags = IPT_LOG_MASK, 
+		.prefix = "" 
+	};
+
+	ipt_log_packet(hooknum, skb, in, out, &loginfo, KERN_WARNING, prefix);
+}
+
+static int ipt_log_checkentry(const char *tablename,
+			      const struct ipt_entry *e,
+			      void *targinfo,
+			      unsigned int targinfosize,
+			      unsigned int hook_mask)
+{
+	const struct ipt_log_info *loginfo = targinfo;
+
+	if (targinfosize != IPT_ALIGN(sizeof(struct ipt_log_info))) {
+		DEBUGP("LOG: targinfosize %u != %u\n",
+		       targinfosize, IPT_ALIGN(sizeof(struct ipt_log_info)));
+		return 0;
+	}
+
+	if (loginfo->level >= 8) {
+		DEBUGP("LOG: level %u >= 8\n", loginfo->level);
+		return 0;
+	}
+
+	if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') {
+		DEBUGP("LOG: prefix term %i\n",
+		       loginfo->prefix[sizeof(loginfo->prefix)-1]);
+		return 0;
+	}
+
+	return 1;
+}
+
+static struct ipt_target ipt_log_reg = {
+	.name		= "LOG",
+	.target		= ipt_log_target,
+	.checkentry	= ipt_log_checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	if (ipt_register_target(&ipt_log_reg))
+		return -EINVAL;
+	if (nflog)
+		nf_log_register(PF_INET, &ipt_logfn);
+	
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	if (nflog)
+		nf_log_unregister(PF_INET, &ipt_logfn);
+	ipt_unregister_target(&ipt_log_reg);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_MARK.c b/net/ipv4/netfilter/ipt_MARK.c
new file mode 100644
index 00000000000..33c6f9b63b8
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_MARK.c
@@ -0,0 +1,162 @@
+/* This is a module which is used for setting the NFMARK field of an skb. */
+
+/* (C) 1999-2001 Marc Boucher <marc@mbsi.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <net/checksum.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_MARK.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
+MODULE_DESCRIPTION("iptables MARK modification module");
+
+static unsigned int
+target_v0(struct sk_buff **pskb,
+	  const struct net_device *in,
+	  const struct net_device *out,
+	  unsigned int hooknum,
+	  const void *targinfo,
+	  void *userinfo)
+{
+	const struct ipt_mark_target_info *markinfo = targinfo;
+
+	if((*pskb)->nfmark != markinfo->mark) {
+		(*pskb)->nfmark = markinfo->mark;
+		(*pskb)->nfcache |= NFC_ALTERED;
+	}
+	return IPT_CONTINUE;
+}
+
+static unsigned int
+target_v1(struct sk_buff **pskb,
+	  const struct net_device *in,
+	  const struct net_device *out,
+	  unsigned int hooknum,
+	  const void *targinfo,
+	  void *userinfo)
+{
+	const struct ipt_mark_target_info_v1 *markinfo = targinfo;
+	int mark = 0;
+
+	switch (markinfo->mode) {
+	case IPT_MARK_SET:
+		mark = markinfo->mark;
+		break;
+		
+	case IPT_MARK_AND:
+		mark = (*pskb)->nfmark & markinfo->mark;
+		break;
+		
+	case IPT_MARK_OR:
+		mark = (*pskb)->nfmark | markinfo->mark;
+		break;
+	}
+
+	if((*pskb)->nfmark != mark) {
+		(*pskb)->nfmark = mark;
+		(*pskb)->nfcache |= NFC_ALTERED;
+	}
+	return IPT_CONTINUE;
+}
+
+
+static int
+checkentry_v0(const char *tablename,
+	      const struct ipt_entry *e,
+	      void *targinfo,
+	      unsigned int targinfosize,
+	      unsigned int hook_mask)
+{
+	if (targinfosize != IPT_ALIGN(sizeof(struct ipt_mark_target_info))) {
+		printk(KERN_WARNING "MARK: targinfosize %u != %Zu\n",
+		       targinfosize,
+		       IPT_ALIGN(sizeof(struct ipt_mark_target_info)));
+		return 0;
+	}
+
+	if (strcmp(tablename, "mangle") != 0) {
+		printk(KERN_WARNING "MARK: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
+		return 0;
+	}
+
+	return 1;
+}
+
+static int
+checkentry_v1(const char *tablename,
+	      const struct ipt_entry *e,
+	      void *targinfo,
+	      unsigned int targinfosize,
+	      unsigned int hook_mask)
+{
+	struct ipt_mark_target_info_v1 *markinfo = targinfo;
+
+	if (targinfosize != IPT_ALIGN(sizeof(struct ipt_mark_target_info_v1))){
+		printk(KERN_WARNING "MARK: targinfosize %u != %Zu\n",
+		       targinfosize,
+		       IPT_ALIGN(sizeof(struct ipt_mark_target_info_v1)));
+		return 0;
+	}
+
+	if (strcmp(tablename, "mangle") != 0) {
+		printk(KERN_WARNING "MARK: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
+		return 0;
+	}
+
+	if (markinfo->mode != IPT_MARK_SET
+	    && markinfo->mode != IPT_MARK_AND
+	    && markinfo->mode != IPT_MARK_OR) {
+		printk(KERN_WARNING "MARK: unknown mode %u\n",
+		       markinfo->mode);
+		return 0;
+	}
+
+	return 1;
+}
+
+static struct ipt_target ipt_mark_reg_v0 = {
+	.name		= "MARK",
+	.target		= target_v0,
+	.checkentry	= checkentry_v0,
+	.me		= THIS_MODULE,
+	.revision	= 0,
+};
+
+static struct ipt_target ipt_mark_reg_v1 = {
+	.name		= "MARK",
+	.target		= target_v1,
+	.checkentry	= checkentry_v1,
+	.me		= THIS_MODULE,
+	.revision	= 1,
+};
+
+static int __init init(void)
+{
+	int err;
+
+	err = ipt_register_target(&ipt_mark_reg_v0);
+	if (!err) {
+		err = ipt_register_target(&ipt_mark_reg_v1);
+		if (err)
+			ipt_unregister_target(&ipt_mark_reg_v0);
+	}
+	return err;
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_target(&ipt_mark_reg_v0);
+	ipt_unregister_target(&ipt_mark_reg_v1);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
new file mode 100644
index 00000000000..57e9f6cf1c3
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -0,0 +1,207 @@
+/* Masquerade.  Simple mapping which alters range to a local IP address
+   (depending on route). */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/ip.h>
+#include <linux/timer.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <net/protocol.h>
+#include <net/ip.h>
+#include <net/checksum.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv4/ip_nat_rule.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("iptables MASQUERADE target module");
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+/* Lock protects masq region inside conntrack */
+static DECLARE_RWLOCK(masq_lock);
+
+/* FIXME: Multiple targets. --RR */
+static int
+masquerade_check(const char *tablename,
+		 const struct ipt_entry *e,
+		 void *targinfo,
+		 unsigned int targinfosize,
+		 unsigned int hook_mask)
+{
+	const struct ip_nat_multi_range_compat *mr = targinfo;
+
+	if (strcmp(tablename, "nat") != 0) {
+		DEBUGP("masquerade_check: bad table `%s'.\n", tablename);
+		return 0;
+	}
+	if (targinfosize != IPT_ALIGN(sizeof(*mr))) {
+		DEBUGP("masquerade_check: size %u != %u.\n",
+		       targinfosize, sizeof(*mr));
+		return 0;
+	}
+	if (hook_mask & ~(1 << NF_IP_POST_ROUTING)) {
+		DEBUGP("masquerade_check: bad hooks %x.\n", hook_mask);
+		return 0;
+	}
+	if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
+		DEBUGP("masquerade_check: bad MAP_IPS.\n");
+		return 0;
+	}
+	if (mr->rangesize != 1) {
+		DEBUGP("masquerade_check: bad rangesize %u.\n", mr->rangesize);
+		return 0;
+	}
+	return 1;
+}
+
+static unsigned int
+masquerade_target(struct sk_buff **pskb,
+		  const struct net_device *in,
+		  const struct net_device *out,
+		  unsigned int hooknum,
+		  const void *targinfo,
+		  void *userinfo)
+{
+	struct ip_conntrack *ct;
+	enum ip_conntrack_info ctinfo;
+	const struct ip_nat_multi_range_compat *mr;
+	struct ip_nat_range newrange;
+	struct rtable *rt;
+	u_int32_t newsrc;
+
+	IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING);
+
+	/* FIXME: For the moment, don't do local packets, breaks
+	   testsuite for 2.3.49 --RR */
+	if ((*pskb)->sk)
+		return NF_ACCEPT;
+
+	ct = ip_conntrack_get(*pskb, &ctinfo);
+	IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED
+	                    || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
+
+	mr = targinfo;
+	rt = (struct rtable *)(*pskb)->dst;
+	newsrc = inet_select_addr(out, rt->rt_gateway, RT_SCOPE_UNIVERSE);
+	if (!newsrc) {
+		printk("MASQUERADE: %s ate my IP address\n", out->name);
+		return NF_DROP;
+	}
+
+	WRITE_LOCK(&masq_lock);
+	ct->nat.masq_index = out->ifindex;
+	WRITE_UNLOCK(&masq_lock);
+
+	/* Transfer from original range. */
+	newrange = ((struct ip_nat_range)
+		{ mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
+		  newsrc, newsrc,
+		  mr->range[0].min, mr->range[0].max });
+
+	/* Hand modified range to generic setup. */
+	return ip_nat_setup_info(ct, &newrange, hooknum);
+}
+
+static inline int
+device_cmp(struct ip_conntrack *i, void *ifindex)
+{
+	int ret;
+
+	READ_LOCK(&masq_lock);
+	ret = (i->nat.masq_index == (int)(long)ifindex);
+	READ_UNLOCK(&masq_lock);
+
+	return ret;
+}
+
+static int masq_device_event(struct notifier_block *this,
+			     unsigned long event,
+			     void *ptr)
+{
+	struct net_device *dev = ptr;
+
+	if (event == NETDEV_DOWN) {
+		/* Device was downed.  Search entire table for
+		   conntracks which were associated with that device,
+		   and forget them. */
+		IP_NF_ASSERT(dev->ifindex != 0);
+
+		ip_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex);
+	}
+
+	return NOTIFY_DONE;
+}
+
+static int masq_inet_event(struct notifier_block *this,
+			   unsigned long event,
+			   void *ptr)
+{
+	struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev;
+
+	if (event == NETDEV_DOWN) {
+		/* IP address was deleted.  Search entire table for
+		   conntracks which were associated with that device,
+		   and forget them. */
+		IP_NF_ASSERT(dev->ifindex != 0);
+
+		ip_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex);
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block masq_dev_notifier = {
+	.notifier_call	= masq_device_event,
+};
+
+static struct notifier_block masq_inet_notifier = {
+	.notifier_call	= masq_inet_event,
+};
+
+static struct ipt_target masquerade = {
+	.name		= "MASQUERADE",
+	.target		= masquerade_target,
+	.checkentry	= masquerade_check,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	int ret;
+
+	ret = ipt_register_target(&masquerade);
+
+	if (ret == 0) {
+		/* Register for device down reports */
+		register_netdevice_notifier(&masq_dev_notifier);
+		/* Register IP address change reports */
+		register_inetaddr_notifier(&masq_inet_notifier);
+	}
+
+	return ret;
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_target(&masquerade);
+	unregister_netdevice_notifier(&masq_dev_notifier);
+	unregister_inetaddr_notifier(&masq_inet_notifier);	
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
new file mode 100644
index 00000000000..06254b29d03
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -0,0 +1,117 @@
+/* NETMAP - static NAT mapping of IP network addresses (1:1).
+ * The mapping can be applied to source (POSTROUTING),
+ * destination (PREROUTING), or both (with separate rules).
+ */
+
+/* (C) 2000-2001 Svenning Soerensen <svenning@post5.tele.dk>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/config.h>
+#include <linux/ip.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv4/ip_nat_rule.h>
+
+#define MODULENAME "NETMAP"
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Svenning Soerensen <svenning@post5.tele.dk>");
+MODULE_DESCRIPTION("iptables 1:1 NAT mapping of IP networks target");
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+static int
+check(const char *tablename,
+      const struct ipt_entry *e,
+      void *targinfo,
+      unsigned int targinfosize,
+      unsigned int hook_mask)
+{
+	const struct ip_nat_multi_range_compat *mr = targinfo;
+
+	if (strcmp(tablename, "nat") != 0) {
+		DEBUGP(MODULENAME":check: bad table `%s'.\n", tablename);
+		return 0;
+	}
+	if (targinfosize != IPT_ALIGN(sizeof(*mr))) {
+		DEBUGP(MODULENAME":check: size %u.\n", targinfosize);
+		return 0;
+	}
+	if (hook_mask & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_POST_ROUTING))) {
+		DEBUGP(MODULENAME":check: bad hooks %x.\n", hook_mask);
+		return 0;
+	}
+	if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) {
+		DEBUGP(MODULENAME":check: bad MAP_IPS.\n");
+		return 0;
+	}
+	if (mr->rangesize != 1) {
+		DEBUGP(MODULENAME":check: bad rangesize %u.\n", mr->rangesize);
+		return 0;
+	}
+	return 1;
+}
+
+static unsigned int
+target(struct sk_buff **pskb,
+       const struct net_device *in,
+       const struct net_device *out,
+       unsigned int hooknum,
+       const void *targinfo,
+       void *userinfo)
+{
+	struct ip_conntrack *ct;
+	enum ip_conntrack_info ctinfo;
+	u_int32_t new_ip, netmask;
+	const struct ip_nat_multi_range_compat *mr = targinfo;
+	struct ip_nat_range newrange;
+
+	IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
+		     || hooknum == NF_IP_POST_ROUTING);
+	ct = ip_conntrack_get(*pskb, &ctinfo);
+
+	netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
+
+	if (hooknum == NF_IP_PRE_ROUTING)
+		new_ip = (*pskb)->nh.iph->daddr & ~netmask;
+	else
+		new_ip = (*pskb)->nh.iph->saddr & ~netmask;
+	new_ip |= mr->range[0].min_ip & netmask;
+
+	newrange = ((struct ip_nat_range)
+		{ mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
+		  new_ip, new_ip,
+		  mr->range[0].min, mr->range[0].max });
+
+	/* Hand modified range to generic setup. */
+	return ip_nat_setup_info(ct, &newrange, hooknum);
+}
+
+static struct ipt_target target_module = { 
+	.name 		= MODULENAME,
+	.target 	= target, 
+	.checkentry 	= check,
+    	.me 		= THIS_MODULE 
+};
+
+static int __init init(void)
+{
+	return ipt_register_target(&target_module);
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_target(&target_module);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_NOTRACK.c b/net/ipv4/netfilter/ipt_NOTRACK.c
new file mode 100644
index 00000000000..a4bb9b3bc29
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_NOTRACK.c
@@ -0,0 +1,76 @@
+/* This is a module which is used for setting up fake conntracks
+ * on packets so that they are not seen by the conntrack/NAT code.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+
+static unsigned int
+target(struct sk_buff **pskb,
+       const struct net_device *in,
+       const struct net_device *out,
+       unsigned int hooknum,
+       const void *targinfo,
+       void *userinfo)
+{
+	/* Previously seen (loopback)? Ignore. */
+	if ((*pskb)->nfct != NULL)
+		return IPT_CONTINUE;
+
+	/* Attach fake conntrack entry. 
+	   If there is a real ct entry correspondig to this packet, 
+	   it'll hang aroun till timing out. We don't deal with it
+	   for performance reasons. JK */
+	(*pskb)->nfct = &ip_conntrack_untracked.ct_general;
+	(*pskb)->nfctinfo = IP_CT_NEW;
+	nf_conntrack_get((*pskb)->nfct);
+
+	return IPT_CONTINUE;
+}
+
+static int
+checkentry(const char *tablename,
+	   const struct ipt_entry *e,
+           void *targinfo,
+           unsigned int targinfosize,
+           unsigned int hook_mask)
+{
+	if (targinfosize != 0) {
+		printk(KERN_WARNING "NOTRACK: targinfosize %u != 0\n",
+		       targinfosize);
+		return 0;
+	}
+
+	if (strcmp(tablename, "raw") != 0) {
+		printk(KERN_WARNING "NOTRACK: can only be called from \"raw\" table, not \"%s\"\n", tablename);
+		return 0;
+	}
+
+	return 1;
+}
+
+static struct ipt_target ipt_notrack_reg = { 
+	.name = "NOTRACK", 
+	.target = target, 
+	.checkentry = checkentry,
+	.me = THIS_MODULE 
+};
+
+static int __init init(void)
+{
+	if (ipt_register_target(&ipt_notrack_reg))
+		return -EINVAL;
+
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_target(&ipt_notrack_reg);
+}
+
+module_init(init);
+module_exit(fini);
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
new file mode 100644
index 00000000000..d2e13447678
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -0,0 +1,129 @@
+/* Redirect.  Simple mapping which alters dst to a local IP address. */
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/ip.h>
+#include <linux/timer.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/netdevice.h>
+#include <linux/if.h>
+#include <linux/inetdevice.h>
+#include <net/protocol.h>
+#include <net/checksum.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv4/ip_nat_rule.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("iptables REDIRECT target module");
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+/* FIXME: Take multiple ranges --RR */
+static int
+redirect_check(const char *tablename,
+	       const struct ipt_entry *e,
+	       void *targinfo,
+	       unsigned int targinfosize,
+	       unsigned int hook_mask)
+{
+	const struct ip_nat_multi_range_compat *mr = targinfo;
+
+	if (strcmp(tablename, "nat") != 0) {
+		DEBUGP("redirect_check: bad table `%s'.\n", table);
+		return 0;
+	}
+	if (targinfosize != IPT_ALIGN(sizeof(*mr))) {
+		DEBUGP("redirect_check: size %u.\n", targinfosize);
+		return 0;
+	}
+	if (hook_mask & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT))) {
+		DEBUGP("redirect_check: bad hooks %x.\n", hook_mask);
+		return 0;
+	}
+	if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
+		DEBUGP("redirect_check: bad MAP_IPS.\n");
+		return 0;
+	}
+	if (mr->rangesize != 1) {
+		DEBUGP("redirect_check: bad rangesize %u.\n", mr->rangesize);
+		return 0;
+	}
+	return 1;
+}
+
+static unsigned int
+redirect_target(struct sk_buff **pskb,
+		const struct net_device *in,
+		const struct net_device *out,
+		unsigned int hooknum,
+		const void *targinfo,
+		void *userinfo)
+{
+	struct ip_conntrack *ct;
+	enum ip_conntrack_info ctinfo;
+	u_int32_t newdst;
+	const struct ip_nat_multi_range_compat *mr = targinfo;
+	struct ip_nat_range newrange;
+
+	IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
+		     || hooknum == NF_IP_LOCAL_OUT);
+
+	ct = ip_conntrack_get(*pskb, &ctinfo);
+	IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+
+	/* Local packets: make them go to loopback */
+	if (hooknum == NF_IP_LOCAL_OUT)
+		newdst = htonl(0x7F000001);
+	else {
+		struct in_device *indev;
+
+		/* Device might not have an associated in_device. */
+		indev = (struct in_device *)(*pskb)->dev->ip_ptr;
+		if (indev == NULL || indev->ifa_list == NULL)
+			return NF_DROP;
+
+		/* Grab first address on interface. */
+		newdst = indev->ifa_list->ifa_local;
+	}
+
+	/* Transfer from original range. */
+	newrange = ((struct ip_nat_range)
+		{ mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
+		  newdst, newdst,
+		  mr->range[0].min, mr->range[0].max });
+
+	/* Hand modified range to generic setup. */
+	return ip_nat_setup_info(ct, &newrange, hooknum);
+}
+
+static struct ipt_target redirect_reg = {
+	.name		= "REDIRECT",
+	.target		= redirect_target,
+	.checkentry	= redirect_check,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ipt_register_target(&redirect_reg);
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_target(&redirect_reg);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
new file mode 100644
index 00000000000..266d6497928
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -0,0 +1,335 @@
+/*
+ * This is a module which is used for rejecting packets.
+ * Added support for customized reject packets (Jozsef Kadlecsik).
+ * Added support for ICMP type-3-code-13 (Maciej Soltysiak). [RFC 1812]
+ */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/icmp.h>
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/tcp.h>
+#include <net/route.h>
+#include <net/dst.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_REJECT.h>
+#ifdef CONFIG_BRIDGE_NETFILTER
+#include <linux/netfilter_bridge.h>
+#endif
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("iptables REJECT target module");
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+static inline struct rtable *route_reverse(struct sk_buff *skb, 
+					   struct tcphdr *tcph, int hook)
+{
+	struct iphdr *iph = skb->nh.iph;
+	struct dst_entry *odst;
+	struct flowi fl = {};
+	struct rtable *rt;
+
+	/* We don't require ip forwarding to be enabled to be able to
+	 * send a RST reply for bridged traffic. */
+	if (hook != NF_IP_FORWARD
+#ifdef CONFIG_BRIDGE_NETFILTER
+	    || (skb->nf_bridge && skb->nf_bridge->mask & BRNF_BRIDGED)
+#endif
+	   ) {
+		fl.nl_u.ip4_u.daddr = iph->saddr;
+		if (hook == NF_IP_LOCAL_IN)
+			fl.nl_u.ip4_u.saddr = iph->daddr;
+		fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
+
+		if (ip_route_output_key(&rt, &fl) != 0)
+			return NULL;
+	} else {
+		/* non-local src, find valid iif to satisfy
+		 * rp-filter when calling ip_route_input. */
+		fl.nl_u.ip4_u.daddr = iph->daddr;
+		if (ip_route_output_key(&rt, &fl) != 0)
+			return NULL;
+
+		odst = skb->dst;
+		if (ip_route_input(skb, iph->saddr, iph->daddr,
+		                   RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
+			dst_release(&rt->u.dst);
+			return NULL;
+		}
+		dst_release(&rt->u.dst);
+		rt = (struct rtable *)skb->dst;
+		skb->dst = odst;
+
+		fl.nl_u.ip4_u.daddr = iph->saddr;
+		fl.nl_u.ip4_u.saddr = iph->daddr;
+		fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
+	}
+
+	if (rt->u.dst.error) {
+		dst_release(&rt->u.dst);
+		return NULL;
+	}
+
+	fl.proto = IPPROTO_TCP;
+	fl.fl_ip_sport = tcph->dest;
+	fl.fl_ip_dport = tcph->source;
+
+	if (xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0)) {
+		dst_release(&rt->u.dst);
+		rt = NULL;
+	}
+
+	return rt;
+}
+
+/* Send RST reply */
+static void send_reset(struct sk_buff *oldskb, int hook)
+{
+	struct sk_buff *nskb;
+	struct tcphdr _otcph, *oth, *tcph;
+	struct rtable *rt;
+	u_int16_t tmp_port;
+	u_int32_t tmp_addr;
+	int needs_ack;
+	int hh_len;
+
+	/* IP header checks: fragment. */
+	if (oldskb->nh.iph->frag_off & htons(IP_OFFSET))
+		return;
+
+	oth = skb_header_pointer(oldskb, oldskb->nh.iph->ihl * 4,
+				 sizeof(_otcph), &_otcph);
+	if (oth == NULL)
+ 		return;
+
+	/* No RST for RST. */
+	if (oth->rst)
+		return;
+
+	/* FIXME: Check checksum --RR */
+	if ((rt = route_reverse(oldskb, oth, hook)) == NULL)
+		return;
+
+	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
+
+	/* We need a linear, writeable skb.  We also need to expand
+	   headroom in case hh_len of incoming interface < hh_len of
+	   outgoing interface */
+	nskb = skb_copy_expand(oldskb, hh_len, skb_tailroom(oldskb),
+			       GFP_ATOMIC);
+	if (!nskb) {
+		dst_release(&rt->u.dst);
+		return;
+	}
+
+	dst_release(nskb->dst);
+	nskb->dst = &rt->u.dst;
+
+	/* This packet will not be the same as the other: clear nf fields */
+	nf_reset(nskb);
+	nskb->nfcache = 0;
+	nskb->nfmark = 0;
+#ifdef CONFIG_BRIDGE_NETFILTER
+	nf_bridge_put(nskb->nf_bridge);
+	nskb->nf_bridge = NULL;
+#endif
+
+	tcph = (struct tcphdr *)((u_int32_t*)nskb->nh.iph + nskb->nh.iph->ihl);
+
+	/* Swap source and dest */
+	tmp_addr = nskb->nh.iph->saddr;
+	nskb->nh.iph->saddr = nskb->nh.iph->daddr;
+	nskb->nh.iph->daddr = tmp_addr;
+	tmp_port = tcph->source;
+	tcph->source = tcph->dest;
+	tcph->dest = tmp_port;
+
+	/* Truncate to length (no data) */
+	tcph->doff = sizeof(struct tcphdr)/4;
+	skb_trim(nskb, nskb->nh.iph->ihl*4 + sizeof(struct tcphdr));
+	nskb->nh.iph->tot_len = htons(nskb->len);
+
+	if (tcph->ack) {
+		needs_ack = 0;
+		tcph->seq = oth->ack_seq;
+		tcph->ack_seq = 0;
+	} else {
+		needs_ack = 1;
+		tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin
+				      + oldskb->len - oldskb->nh.iph->ihl*4
+				      - (oth->doff<<2));
+		tcph->seq = 0;
+	}
+
+	/* Reset flags */
+	((u_int8_t *)tcph)[13] = 0;
+	tcph->rst = 1;
+	tcph->ack = needs_ack;
+
+	tcph->window = 0;
+	tcph->urg_ptr = 0;
+
+	/* Adjust TCP checksum */
+	tcph->check = 0;
+	tcph->check = tcp_v4_check(tcph, sizeof(struct tcphdr),
+				   nskb->nh.iph->saddr,
+				   nskb->nh.iph->daddr,
+				   csum_partial((char *)tcph,
+						sizeof(struct tcphdr), 0));
+
+	/* Adjust IP TTL, DF */
+	nskb->nh.iph->ttl = MAXTTL;
+	/* Set DF, id = 0 */
+	nskb->nh.iph->frag_off = htons(IP_DF);
+	nskb->nh.iph->id = 0;
+
+	/* Adjust IP checksum */
+	nskb->nh.iph->check = 0;
+	nskb->nh.iph->check = ip_fast_csum((unsigned char *)nskb->nh.iph, 
+					   nskb->nh.iph->ihl);
+
+	/* "Never happens" */
+	if (nskb->len > dst_mtu(nskb->dst))
+		goto free_nskb;
+
+	nf_ct_attach(nskb, oldskb);
+
+	NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, nskb, NULL, nskb->dst->dev,
+		dst_output);
+	return;
+
+ free_nskb:
+	kfree_skb(nskb);
+}
+
+static inline void send_unreach(struct sk_buff *skb_in, int code)
+{
+	icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
+}	
+
+static unsigned int reject(struct sk_buff **pskb,
+			   const struct net_device *in,
+			   const struct net_device *out,
+			   unsigned int hooknum,
+			   const void *targinfo,
+			   void *userinfo)
+{
+	const struct ipt_reject_info *reject = targinfo;
+
+	/* Our naive response construction doesn't deal with IP
+           options, and probably shouldn't try. */
+	if ((*pskb)->nh.iph->ihl<<2 != sizeof(struct iphdr))
+		return NF_DROP;
+
+	/* WARNING: This code causes reentry within iptables.
+	   This means that the iptables jump stack is now crap.  We
+	   must return an absolute verdict. --RR */
+    	switch (reject->with) {
+    	case IPT_ICMP_NET_UNREACHABLE:
+    		send_unreach(*pskb, ICMP_NET_UNREACH);
+    		break;
+    	case IPT_ICMP_HOST_UNREACHABLE:
+    		send_unreach(*pskb, ICMP_HOST_UNREACH);
+    		break;
+    	case IPT_ICMP_PROT_UNREACHABLE:
+    		send_unreach(*pskb, ICMP_PROT_UNREACH);
+    		break;
+    	case IPT_ICMP_PORT_UNREACHABLE:
+    		send_unreach(*pskb, ICMP_PORT_UNREACH);
+    		break;
+    	case IPT_ICMP_NET_PROHIBITED:
+    		send_unreach(*pskb, ICMP_NET_ANO);
+    		break;
+	case IPT_ICMP_HOST_PROHIBITED:
+    		send_unreach(*pskb, ICMP_HOST_ANO);
+    		break;
+    	case IPT_ICMP_ADMIN_PROHIBITED:
+		send_unreach(*pskb, ICMP_PKT_FILTERED);
+		break;
+	case IPT_TCP_RESET:
+		send_reset(*pskb, hooknum);
+	case IPT_ICMP_ECHOREPLY:
+		/* Doesn't happen. */
+		break;
+	}
+
+	return NF_DROP;
+}
+
+static int check(const char *tablename,
+		 const struct ipt_entry *e,
+		 void *targinfo,
+		 unsigned int targinfosize,
+		 unsigned int hook_mask)
+{
+ 	const struct ipt_reject_info *rejinfo = targinfo;
+
+ 	if (targinfosize != IPT_ALIGN(sizeof(struct ipt_reject_info))) {
+  		DEBUGP("REJECT: targinfosize %u != 0\n", targinfosize);
+  		return 0;
+  	}
+
+	/* Only allow these for packet filtering. */
+	if (strcmp(tablename, "filter") != 0) {
+		DEBUGP("REJECT: bad table `%s'.\n", tablename);
+		return 0;
+	}
+	if ((hook_mask & ~((1 << NF_IP_LOCAL_IN)
+			   | (1 << NF_IP_FORWARD)
+			   | (1 << NF_IP_LOCAL_OUT))) != 0) {
+		DEBUGP("REJECT: bad hook mask %X\n", hook_mask);
+		return 0;
+	}
+
+	if (rejinfo->with == IPT_ICMP_ECHOREPLY) {
+		printk("REJECT: ECHOREPLY no longer supported.\n");
+		return 0;
+	} else if (rejinfo->with == IPT_TCP_RESET) {
+		/* Must specify that it's a TCP packet */
+		if (e->ip.proto != IPPROTO_TCP
+		    || (e->ip.invflags & IPT_INV_PROTO)) {
+			DEBUGP("REJECT: TCP_RESET invalid for non-tcp\n");
+			return 0;
+		}
+	}
+
+	return 1;
+}
+
+static struct ipt_target ipt_reject_reg = {
+	.name		= "REJECT",
+	.target		= reject,
+	.checkentry	= check,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ipt_register_target(&ipt_reject_reg);
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_target(&ipt_reject_reg);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c
new file mode 100644
index 00000000000..7a0536d864a
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_SAME.c
@@ -0,0 +1,211 @@
+/* Same.  Just like SNAT, only try to make the connections
+ * 	  between client A and server B always have the same source ip.
+ *
+ * (C) 2000 Paul `Rusty' Russell
+ * (C) 2001 Martin Josefsson
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * 010320 Martin Josefsson <gandalf@wlug.westbo.se>
+ * 	* copied ipt_BALANCE.c to ipt_SAME.c and changed a few things.
+ * 010728 Martin Josefsson <gandalf@wlug.westbo.se>
+ * 	* added --nodst to not include destination-ip in new source
+ * 	  calculations.
+ *	* added some more sanity-checks.
+ * 010729 Martin Josefsson <gandalf@wlug.westbo.se>
+ * 	* fixed a buggy if-statement in same_check(), should have
+ * 	  used ntohl() but didn't.
+ * 	* added support for multiple ranges. IPT_SAME_MAX_RANGE is
+ * 	  defined in linux/include/linux/netfilter_ipv4/ipt_SAME.h
+ * 	  and is currently set to 10.
+ * 	* added support for 1-address range, nice to have now that
+ * 	  we have multiple ranges.
+ */
+#include <linux/types.h>
+#include <linux/ip.h>
+#include <linux/timer.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/netdevice.h>
+#include <linux/if.h>
+#include <linux/inetdevice.h>
+#include <net/protocol.h>
+#include <net/checksum.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv4/ip_nat_rule.h>
+#include <linux/netfilter_ipv4/ipt_SAME.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Martin Josefsson <gandalf@wlug.westbo.se>");
+MODULE_DESCRIPTION("iptables special SNAT module for consistent sourceip");
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+static int
+same_check(const char *tablename,
+	      const struct ipt_entry *e,
+	      void *targinfo,
+	      unsigned int targinfosize,
+	      unsigned int hook_mask)
+{
+	unsigned int count, countess, rangeip, index = 0;
+	struct ipt_same_info *mr = targinfo;
+
+	mr->ipnum = 0;
+
+	if (strcmp(tablename, "nat") != 0) {
+		DEBUGP("same_check: bad table `%s'.\n", tablename);
+		return 0;
+	}
+	if (targinfosize != IPT_ALIGN(sizeof(*mr))) {
+		DEBUGP("same_check: size %u.\n", targinfosize);
+		return 0;
+	}
+	if (hook_mask & ~(1 << NF_IP_PRE_ROUTING | 1 << NF_IP_POST_ROUTING)) {
+		DEBUGP("same_check: bad hooks %x.\n", hook_mask);
+		return 0;
+	}
+	if (mr->rangesize < 1) {
+		DEBUGP("same_check: need at least one dest range.\n");
+		return 0;
+	}
+	if (mr->rangesize > IPT_SAME_MAX_RANGE) {
+		DEBUGP("same_check: too many ranges specified, maximum "
+				"is %u ranges\n",
+				IPT_SAME_MAX_RANGE);
+		return 0;
+	}
+	for (count = 0; count < mr->rangesize; count++) {
+		if (ntohl(mr->range[count].min_ip) >
+				ntohl(mr->range[count].max_ip)) {
+			DEBUGP("same_check: min_ip is larger than max_ip in "
+				"range `%u.%u.%u.%u-%u.%u.%u.%u'.\n",
+				NIPQUAD(mr->range[count].min_ip),
+				NIPQUAD(mr->range[count].max_ip));
+			return 0;
+		}
+		if (!(mr->range[count].flags & IP_NAT_RANGE_MAP_IPS)) {
+			DEBUGP("same_check: bad MAP_IPS.\n");
+			return 0;
+		}
+		rangeip = (ntohl(mr->range[count].max_ip) - 
+					ntohl(mr->range[count].min_ip) + 1);
+		mr->ipnum += rangeip;
+		
+		DEBUGP("same_check: range %u, ipnum = %u\n", count, rangeip);
+	}
+	DEBUGP("same_check: total ipaddresses = %u\n", mr->ipnum);
+	
+	mr->iparray = kmalloc((sizeof(u_int32_t) * mr->ipnum), GFP_KERNEL);
+	if (!mr->iparray) {
+		DEBUGP("same_check: Couldn't allocate %u bytes "
+			"for %u ipaddresses!\n", 
+			(sizeof(u_int32_t) * mr->ipnum), mr->ipnum);
+		return 0;
+	}
+	DEBUGP("same_check: Allocated %u bytes for %u ipaddresses.\n",
+			(sizeof(u_int32_t) * mr->ipnum), mr->ipnum);
+	
+	for (count = 0; count < mr->rangesize; count++) {
+		for (countess = ntohl(mr->range[count].min_ip);
+				countess <= ntohl(mr->range[count].max_ip);
+					countess++) {
+			mr->iparray[index] = countess;
+			DEBUGP("same_check: Added ipaddress `%u.%u.%u.%u' "
+				"in index %u.\n",
+				HIPQUAD(countess), index);
+			index++;
+		}
+	}
+	return 1;
+}
+
+static void 
+same_destroy(void *targinfo,
+		unsigned int targinfosize)
+{
+	struct ipt_same_info *mr = targinfo;
+
+	kfree(mr->iparray);
+	
+	DEBUGP("same_destroy: Deallocated %u bytes for %u ipaddresses.\n",
+			(sizeof(u_int32_t) * mr->ipnum), mr->ipnum);
+}
+
+static unsigned int
+same_target(struct sk_buff **pskb,
+		const struct net_device *in,
+		const struct net_device *out,
+		unsigned int hooknum,
+		const void *targinfo,
+		void *userinfo)
+{
+	struct ip_conntrack *ct;
+	enum ip_conntrack_info ctinfo;
+	u_int32_t tmpip, aindex, new_ip;
+	const struct ipt_same_info *same = targinfo;
+	struct ip_nat_range newrange;
+	const struct ip_conntrack_tuple *t;
+
+	IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING ||
+			hooknum == NF_IP_POST_ROUTING);
+	ct = ip_conntrack_get(*pskb, &ctinfo);
+
+	t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+
+	/* Base new source on real src ip and optionally dst ip,
+	   giving some hope for consistency across reboots.
+	   Here we calculate the index in same->iparray which
+	   holds the ipaddress we should use */
+	
+	tmpip = ntohl(t->src.ip);
+
+	if (!(same->info & IPT_SAME_NODST))
+		tmpip += ntohl(t->dst.ip);
+	
+	aindex = tmpip % same->ipnum;
+
+	new_ip = htonl(same->iparray[aindex]);
+
+	DEBUGP("ipt_SAME: src=%u.%u.%u.%u dst=%u.%u.%u.%u, "
+			"new src=%u.%u.%u.%u\n",
+			NIPQUAD(t->src.ip), NIPQUAD(t->dst.ip),
+			NIPQUAD(new_ip));
+
+	/* Transfer from original range. */
+	newrange = ((struct ip_nat_range)
+		{ same->range[0].flags, new_ip, new_ip,
+		  /* FIXME: Use ports from correct range! */
+		  same->range[0].min, same->range[0].max });
+
+	/* Hand modified range to generic setup. */
+	return ip_nat_setup_info(ct, &newrange, hooknum);
+}
+
+static struct ipt_target same_reg = { 
+	.name		= "SAME",
+	.target		= same_target,
+	.checkentry	= same_check,
+	.destroy	= same_destroy,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ipt_register_target(&same_reg);
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_target(&same_reg);
+}
+
+module_init(init);
+module_exit(fini);
+
diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c
new file mode 100644
index 00000000000..1049050b2bf
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_TCPMSS.c
@@ -0,0 +1,262 @@
+/*
+ * This is a module which is used for setting the MSS option in TCP packets.
+ *
+ * Copyright (C) 2000 Marc Boucher <marc@mbsi.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/ip.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_TCPMSS.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
+MODULE_DESCRIPTION("iptables TCP MSS modification module");
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+static u_int16_t
+cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck)
+{
+	u_int32_t diffs[] = { oldvalinv, newval };
+	return csum_fold(csum_partial((char *)diffs, sizeof(diffs),
+                                      oldcheck^0xFFFF));
+}
+
+static inline unsigned int
+optlen(const u_int8_t *opt, unsigned int offset)
+{
+	/* Beware zero-length options: make finite progress */
+	if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0) return 1;
+	else return opt[offset+1];
+}
+
+static unsigned int
+ipt_tcpmss_target(struct sk_buff **pskb,
+		  const struct net_device *in,
+		  const struct net_device *out,
+		  unsigned int hooknum,
+		  const void *targinfo,
+		  void *userinfo)
+{
+	const struct ipt_tcpmss_info *tcpmssinfo = targinfo;
+	struct tcphdr *tcph;
+	struct iphdr *iph;
+	u_int16_t tcplen, newtotlen, oldval, newmss;
+	unsigned int i;
+	u_int8_t *opt;
+
+	if (!skb_ip_make_writable(pskb, (*pskb)->len))
+		return NF_DROP;
+
+	iph = (*pskb)->nh.iph;
+	tcplen = (*pskb)->len - iph->ihl*4;
+
+	tcph = (void *)iph + iph->ihl*4;
+
+	/* Since it passed flags test in tcp match, we know it is is
+	   not a fragment, and has data >= tcp header length.  SYN
+	   packets should not contain data: if they did, then we risk
+	   running over MTU, sending Frag Needed and breaking things
+	   badly. --RR */
+	if (tcplen != tcph->doff*4) {
+		if (net_ratelimit())
+			printk(KERN_ERR
+			       "ipt_tcpmss_target: bad length (%d bytes)\n",
+			       (*pskb)->len);
+		return NF_DROP;
+	}
+
+	if(tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) {
+		if(!(*pskb)->dst) {
+			if (net_ratelimit())
+				printk(KERN_ERR
+			       		"ipt_tcpmss_target: no dst?! can't determine path-MTU\n");
+			return NF_DROP; /* or IPT_CONTINUE ?? */
+		}
+
+		if(dst_mtu((*pskb)->dst) <= (sizeof(struct iphdr) + sizeof(struct tcphdr))) {
+			if (net_ratelimit())
+				printk(KERN_ERR
+		       			"ipt_tcpmss_target: unknown or invalid path-MTU (%d)\n", dst_mtu((*pskb)->dst));
+			return NF_DROP; /* or IPT_CONTINUE ?? */
+		}
+
+		newmss = dst_mtu((*pskb)->dst) - sizeof(struct iphdr) - sizeof(struct tcphdr);
+	} else
+		newmss = tcpmssinfo->mss;
+
+ 	opt = (u_int8_t *)tcph;
+	for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)){
+		if ((opt[i] == TCPOPT_MSS) &&
+		    ((tcph->doff*4 - i) >= TCPOLEN_MSS) &&
+		    (opt[i+1] == TCPOLEN_MSS)) {
+			u_int16_t oldmss;
+
+			oldmss = (opt[i+2] << 8) | opt[i+3];
+
+			if((tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) &&
+				(oldmss <= newmss))
+					return IPT_CONTINUE;
+
+			opt[i+2] = (newmss & 0xff00) >> 8;
+			opt[i+3] = (newmss & 0x00ff);
+
+			tcph->check = cheat_check(htons(oldmss)^0xFFFF,
+						  htons(newmss),
+						  tcph->check);
+
+			DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu"
+			       "->%u.%u.%u.%u:%hu changed TCP MSS option"
+			       " (from %u to %u)\n", 
+			       NIPQUAD((*pskb)->nh.iph->saddr),
+			       ntohs(tcph->source),
+			       NIPQUAD((*pskb)->nh.iph->daddr),
+			       ntohs(tcph->dest),
+			       oldmss, newmss);
+			goto retmodified;
+		}
+	}
+
+	/*
+	 * MSS Option not found ?! add it..
+	 */
+	if (skb_tailroom((*pskb)) < TCPOLEN_MSS) {
+		struct sk_buff *newskb;
+
+		newskb = skb_copy_expand(*pskb, skb_headroom(*pskb),
+					 TCPOLEN_MSS, GFP_ATOMIC);
+		if (!newskb) {
+			if (net_ratelimit())
+				printk(KERN_ERR "ipt_tcpmss_target:"
+				       " unable to allocate larger skb\n");
+			return NF_DROP;
+		}
+
+		kfree_skb(*pskb);
+		*pskb = newskb;
+		iph = (*pskb)->nh.iph;
+		tcph = (void *)iph + iph->ihl*4;
+	}
+
+	skb_put((*pskb), TCPOLEN_MSS);
+
+ 	opt = (u_int8_t *)tcph + sizeof(struct tcphdr);
+	memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr));
+
+	tcph->check = cheat_check(htons(tcplen) ^ 0xFFFF,
+				  htons(tcplen + TCPOLEN_MSS), tcph->check);
+	tcplen += TCPOLEN_MSS;
+
+	opt[0] = TCPOPT_MSS;
+	opt[1] = TCPOLEN_MSS;
+	opt[2] = (newmss & 0xff00) >> 8;
+	opt[3] = (newmss & 0x00ff);
+
+	tcph->check = cheat_check(~0, *((u_int32_t *)opt), tcph->check);
+
+	oldval = ((u_int16_t *)tcph)[6];
+	tcph->doff += TCPOLEN_MSS/4;
+	tcph->check = cheat_check(oldval ^ 0xFFFF,
+				  ((u_int16_t *)tcph)[6], tcph->check);
+
+	newtotlen = htons(ntohs(iph->tot_len) + TCPOLEN_MSS);
+	iph->check = cheat_check(iph->tot_len ^ 0xFFFF,
+				 newtotlen, iph->check);
+	iph->tot_len = newtotlen;
+
+	DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu"
+	       "->%u.%u.%u.%u:%hu added TCP MSS option (%u)\n",
+	       NIPQUAD((*pskb)->nh.iph->saddr),
+	       ntohs(tcph->source),
+	       NIPQUAD((*pskb)->nh.iph->daddr),
+	       ntohs(tcph->dest),
+	       newmss);
+
+ retmodified:
+	/* We never hw checksum SYN packets.  */
+	BUG_ON((*pskb)->ip_summed == CHECKSUM_HW);
+
+	(*pskb)->nfcache |= NFC_UNKNOWN | NFC_ALTERED;
+	return IPT_CONTINUE;
+}
+
+#define TH_SYN 0x02
+
+static inline int find_syn_match(const struct ipt_entry_match *m)
+{
+	const struct ipt_tcp *tcpinfo = (const struct ipt_tcp *)m->data;
+
+	if (strcmp(m->u.kernel.match->name, "tcp") == 0
+	    && (tcpinfo->flg_cmp & TH_SYN)
+	    && !(tcpinfo->invflags & IPT_TCP_INV_FLAGS))
+		return 1;
+
+	return 0;
+}
+
+/* Must specify -p tcp --syn/--tcp-flags SYN */
+static int
+ipt_tcpmss_checkentry(const char *tablename,
+		      const struct ipt_entry *e,
+		      void *targinfo,
+		      unsigned int targinfosize,
+		      unsigned int hook_mask)
+{
+	const struct ipt_tcpmss_info *tcpmssinfo = targinfo;
+
+	if (targinfosize != IPT_ALIGN(sizeof(struct ipt_tcpmss_info))) {
+		DEBUGP("ipt_tcpmss_checkentry: targinfosize %u != %u\n",
+		       targinfosize, IPT_ALIGN(sizeof(struct ipt_tcpmss_info)));
+		return 0;
+	}
+
+
+	if((tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) && 
+			((hook_mask & ~((1 << NF_IP_FORWARD)
+			   	| (1 << NF_IP_LOCAL_OUT)
+			   	| (1 << NF_IP_POST_ROUTING))) != 0)) {
+		printk("TCPMSS: path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n");
+		return 0;
+	}
+
+	if (e->ip.proto == IPPROTO_TCP
+	    && !(e->ip.invflags & IPT_INV_PROTO)
+	    && IPT_MATCH_ITERATE(e, find_syn_match))
+		return 1;
+
+	printk("TCPMSS: Only works on TCP SYN packets\n");
+	return 0;
+}
+
+static struct ipt_target ipt_tcpmss_reg = {
+	.name		= "TCPMSS",
+	.target		= ipt_tcpmss_target,
+	.checkentry	= ipt_tcpmss_checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ipt_register_target(&ipt_tcpmss_reg);
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_target(&ipt_tcpmss_reg);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
new file mode 100644
index 00000000000..85c70d240f8
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -0,0 +1,105 @@
+/* This is a module which is used for setting the TOS field of a packet. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <net/checksum.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_TOS.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("iptables TOS mangling module");
+
+static unsigned int
+target(struct sk_buff **pskb,
+       const struct net_device *in,
+       const struct net_device *out,
+       unsigned int hooknum,
+       const void *targinfo,
+       void *userinfo)
+{
+	const struct ipt_tos_target_info *tosinfo = targinfo;
+
+	if (((*pskb)->nh.iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) {
+		u_int16_t diffs[2];
+
+		if (!skb_ip_make_writable(pskb, sizeof(struct iphdr)))
+			return NF_DROP;
+
+		diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF;
+		(*pskb)->nh.iph->tos
+			= ((*pskb)->nh.iph->tos & IPTOS_PREC_MASK)
+			| tosinfo->tos;
+		diffs[1] = htons((*pskb)->nh.iph->tos);
+		(*pskb)->nh.iph->check
+			= csum_fold(csum_partial((char *)diffs,
+						 sizeof(diffs),
+						 (*pskb)->nh.iph->check
+						 ^0xFFFF));
+		(*pskb)->nfcache |= NFC_ALTERED;
+	}
+	return IPT_CONTINUE;
+}
+
+static int
+checkentry(const char *tablename,
+	   const struct ipt_entry *e,
+           void *targinfo,
+           unsigned int targinfosize,
+           unsigned int hook_mask)
+{
+	const u_int8_t tos = ((struct ipt_tos_target_info *)targinfo)->tos;
+
+	if (targinfosize != IPT_ALIGN(sizeof(struct ipt_tos_target_info))) {
+		printk(KERN_WARNING "TOS: targinfosize %u != %Zu\n",
+		       targinfosize,
+		       IPT_ALIGN(sizeof(struct ipt_tos_target_info)));
+		return 0;
+	}
+
+	if (strcmp(tablename, "mangle") != 0) {
+		printk(KERN_WARNING "TOS: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
+		return 0;
+	}
+
+	if (tos != IPTOS_LOWDELAY
+	    && tos != IPTOS_THROUGHPUT
+	    && tos != IPTOS_RELIABILITY
+	    && tos != IPTOS_MINCOST
+	    && tos != IPTOS_NORMALSVC) {
+		printk(KERN_WARNING "TOS: bad tos value %#x\n", tos);
+		return 0;
+	}
+
+	return 1;
+}
+
+static struct ipt_target ipt_tos_reg = {
+	.name		= "TOS",
+	.target		= target,
+	.checkentry	= checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ipt_register_target(&ipt_tos_reg);
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_target(&ipt_tos_reg);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
new file mode 100644
index 00000000000..6f2cefbe16c
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -0,0 +1,419 @@
+/*
+ * netfilter module for userspace packet logging daemons
+ *
+ * (C) 2000-2004 by Harald Welte <laforge@netfilter.org>
+ *
+ * 2000/09/22 ulog-cprange feature added
+ * 2001/01/04 in-kernel queue as proposed by Sebastian Zander 
+ * 						<zander@fokus.gmd.de>
+ * 2001/01/30 per-rule nlgroup conflicts with global queue. 
+ *            nlgroup now global (sysctl)
+ * 2001/04/19 ulog-queue reworked, now fixed buffer size specified at
+ * 	      module loadtime -HW
+ * 2002/07/07 remove broken nflog_rcv() function -HW
+ * 2002/08/29 fix shifted/unshifted nlgroup bug -HW
+ * 2002/10/30 fix uninitialized mac_len field - <Anders K. Pedersen>
+ * 2004/10/25 fix erroneous calculation of 'len' parameter to NLMSG_PUT
+ *	      resulting in bogus 'error during NLMSG_PUT' messages.
+ *
+ * (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This module accepts two parameters: 
+ * 
+ * nlbufsiz:
+ *   The parameter specifies how big the buffer for each netlink multicast
+ * group is. e.g. If you say nlbufsiz=8192, up to eight kb of packets will
+ * get accumulated in the kernel until they are sent to userspace. It is
+ * NOT possible to allocate more than 128kB, and it is strongly discouraged,
+ * because atomically allocating 128kB inside the network rx softirq is not
+ * reliable. Please also keep in mind that this buffer size is allocated for
+ * each nlgroup you are using, so the total kernel memory usage increases
+ * by that factor.
+ *
+ * flushtimeout:
+ *   Specify, after how many hundredths of a second the queue should be
+ *   flushed even if it is not full yet.
+ *
+ * ipt_ULOG.c,v 1.22 2002/10/30 09:07:31 laforge Exp
+ */
+
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/spinlock.h>
+#include <linux/socket.h>
+#include <linux/skbuff.h>
+#include <linux/kernel.h>
+#include <linux/timer.h>
+#include <linux/netlink.h>
+#include <linux/netdevice.h>
+#include <linux/mm.h>
+#include <linux/moduleparam.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_ULOG.h>
+#include <linux/netfilter_ipv4/lockhelp.h>
+#include <net/sock.h>
+#include <linux/bitops.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("iptables userspace logging module");
+
+#define ULOG_NL_EVENT		111		/* Harald's favorite number */
+#define ULOG_MAXNLGROUPS	32		/* numer of nlgroups */
+
+#if 0
+#define DEBUGP(format, args...) printk("%s:%s:" format, \
+                                       __FILE__, __FUNCTION__ , ## args)
+#else
+#define DEBUGP(format, args...)
+#endif
+
+#define PRINTR(format, args...) do { if (net_ratelimit()) printk(format , ## args); } while (0)
+
+static unsigned int nlbufsiz = 4096;
+module_param(nlbufsiz, uint, 0600); /* FIXME: Check size < 128k --RR */
+MODULE_PARM_DESC(nlbufsiz, "netlink buffer size");
+
+static unsigned int flushtimeout = 10;
+module_param(flushtimeout, int, 0600);
+MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths of a second)");
+
+static unsigned int nflog = 1;
+module_param(nflog, int, 0400);
+MODULE_PARM_DESC(nflog, "register as internal netfilter logging module");
+
+/* global data structures */
+
+typedef struct {
+	unsigned int qlen;		/* number of nlmsgs' in the skb */
+	struct nlmsghdr *lastnlh;	/* netlink header of last msg in skb */
+	struct sk_buff *skb;		/* the pre-allocated skb */
+	struct timer_list timer;	/* the timer function */
+} ulog_buff_t;
+
+static ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS];	/* array of buffers */
+
+static struct sock *nflognl;	/* our socket */
+static DECLARE_LOCK(ulog_lock);	/* spinlock */
+
+/* send one ulog_buff_t to userspace */
+static void ulog_send(unsigned int nlgroupnum)
+{
+	ulog_buff_t *ub = &ulog_buffers[nlgroupnum];
+
+	if (timer_pending(&ub->timer)) {
+		DEBUGP("ipt_ULOG: ulog_send: timer was pending, deleting\n");
+		del_timer(&ub->timer);
+	}
+
+	/* last nlmsg needs NLMSG_DONE */
+	if (ub->qlen > 1)
+		ub->lastnlh->nlmsg_type = NLMSG_DONE;
+
+	NETLINK_CB(ub->skb).dst_groups = (1 << nlgroupnum);
+	DEBUGP("ipt_ULOG: throwing %d packets to netlink mask %u\n",
+		ub->qlen, nlgroupnum);
+	netlink_broadcast(nflognl, ub->skb, 0, (1 << nlgroupnum), GFP_ATOMIC);
+
+	ub->qlen = 0;
+	ub->skb = NULL;
+	ub->lastnlh = NULL;
+
+}
+
+
+/* timer function to flush queue in flushtimeout time */
+static void ulog_timer(unsigned long data)
+{
+	DEBUGP("ipt_ULOG: timer function called, calling ulog_send\n");
+
+	/* lock to protect against somebody modifying our structure
+	 * from ipt_ulog_target at the same time */
+	LOCK_BH(&ulog_lock);
+	ulog_send(data);
+	UNLOCK_BH(&ulog_lock);
+}
+
+static struct sk_buff *ulog_alloc_skb(unsigned int size)
+{
+	struct sk_buff *skb;
+
+	/* alloc skb which should be big enough for a whole
+	 * multipart message. WARNING: has to be <= 131000
+	 * due to slab allocator restrictions */
+
+	skb = alloc_skb(nlbufsiz, GFP_ATOMIC);
+	if (!skb) {
+		PRINTR("ipt_ULOG: can't alloc whole buffer %ub!\n",
+			nlbufsiz);
+
+		/* try to allocate only as much as we need for 
+		 * current packet */
+
+		skb = alloc_skb(size, GFP_ATOMIC);
+		if (!skb)
+			PRINTR("ipt_ULOG: can't even allocate %ub\n", size);
+	}
+
+	return skb;
+}
+
+static void ipt_ulog_packet(unsigned int hooknum,
+			    const struct sk_buff *skb,
+			    const struct net_device *in,
+			    const struct net_device *out,
+			    const struct ipt_ulog_info *loginfo,
+			    const char *prefix)
+{
+	ulog_buff_t *ub;
+	ulog_packet_msg_t *pm;
+	size_t size, copy_len;
+	struct nlmsghdr *nlh;
+
+	/* ffs == find first bit set, necessary because userspace
+	 * is already shifting groupnumber, but we need unshifted.
+	 * ffs() returns [1..32], we need [0..31] */
+	unsigned int groupnum = ffs(loginfo->nl_group) - 1;
+
+	/* calculate the size of the skb needed */
+	if ((loginfo->copy_range == 0) ||
+	    (loginfo->copy_range > skb->len)) {
+		copy_len = skb->len;
+	} else {
+		copy_len = loginfo->copy_range;
+	}
+
+	size = NLMSG_SPACE(sizeof(*pm) + copy_len);
+
+	ub = &ulog_buffers[groupnum];
+	
+	LOCK_BH(&ulog_lock);
+
+	if (!ub->skb) {
+		if (!(ub->skb = ulog_alloc_skb(size)))
+			goto alloc_failure;
+	} else if (ub->qlen >= loginfo->qthreshold ||
+		   size > skb_tailroom(ub->skb)) {
+		/* either the queue len is too high or we don't have 
+		 * enough room in nlskb left. send it to userspace. */
+
+		ulog_send(groupnum);
+
+		if (!(ub->skb = ulog_alloc_skb(size)))
+			goto alloc_failure;
+	}
+
+	DEBUGP("ipt_ULOG: qlen %d, qthreshold %d\n", ub->qlen, 
+		loginfo->qthreshold);
+
+	/* NLMSG_PUT contains a hidden goto nlmsg_failure !!! */
+	nlh = NLMSG_PUT(ub->skb, 0, ub->qlen, ULOG_NL_EVENT, 
+			sizeof(*pm)+copy_len);
+	ub->qlen++;
+
+	pm = NLMSG_DATA(nlh);
+
+	/* We might not have a timestamp, get one */
+	if (skb->stamp.tv_sec == 0)
+		do_gettimeofday((struct timeval *)&skb->stamp);
+
+	/* copy hook, prefix, timestamp, payload, etc. */
+	pm->data_len = copy_len;
+	pm->timestamp_sec = skb->stamp.tv_sec;
+	pm->timestamp_usec = skb->stamp.tv_usec;
+	pm->mark = skb->nfmark;
+	pm->hook = hooknum;
+	if (prefix != NULL)
+		strncpy(pm->prefix, prefix, sizeof(pm->prefix));
+	else if (loginfo->prefix[0] != '\0')
+		strncpy(pm->prefix, loginfo->prefix, sizeof(pm->prefix));
+	else
+		*(pm->prefix) = '\0';
+
+	if (in && in->hard_header_len > 0
+	    && skb->mac.raw != (void *) skb->nh.iph
+	    && in->hard_header_len <= ULOG_MAC_LEN) {
+		memcpy(pm->mac, skb->mac.raw, in->hard_header_len);
+		pm->mac_len = in->hard_header_len;
+	} else
+		pm->mac_len = 0;
+
+	if (in)
+		strncpy(pm->indev_name, in->name, sizeof(pm->indev_name));
+	else
+		pm->indev_name[0] = '\0';
+
+	if (out)
+		strncpy(pm->outdev_name, out->name, sizeof(pm->outdev_name));
+	else
+		pm->outdev_name[0] = '\0';
+
+	/* copy_len <= skb->len, so can't fail. */
+	if (skb_copy_bits(skb, 0, pm->payload, copy_len) < 0)
+		BUG();
+	
+	/* check if we are building multi-part messages */
+	if (ub->qlen > 1) {
+		ub->lastnlh->nlmsg_flags |= NLM_F_MULTI;
+	}
+
+	ub->lastnlh = nlh;
+
+	/* if timer isn't already running, start it */
+	if (!timer_pending(&ub->timer)) {
+		ub->timer.expires = jiffies + flushtimeout * HZ / 100;
+		add_timer(&ub->timer);
+	}
+
+	/* if threshold is reached, send message to userspace */
+	if (ub->qlen >= loginfo->qthreshold) {
+		if (loginfo->qthreshold > 1)
+			nlh->nlmsg_type = NLMSG_DONE;
+		ulog_send(groupnum);
+	}
+
+	UNLOCK_BH(&ulog_lock);
+
+	return;
+
+nlmsg_failure:
+	PRINTR("ipt_ULOG: error during NLMSG_PUT\n");
+
+alloc_failure:
+	PRINTR("ipt_ULOG: Error building netlink message\n");
+
+	UNLOCK_BH(&ulog_lock);
+}
+
+static unsigned int ipt_ulog_target(struct sk_buff **pskb,
+				    const struct net_device *in,
+				    const struct net_device *out,
+				    unsigned int hooknum,
+				    const void *targinfo, void *userinfo)
+{
+	struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo;
+
+	ipt_ulog_packet(hooknum, *pskb, in, out, loginfo, NULL);
+ 
+ 	return IPT_CONTINUE;
+}
+ 
+static void ipt_logfn(unsigned int hooknum,
+		      const struct sk_buff *skb,
+		      const struct net_device *in,
+		      const struct net_device *out,
+		      const char *prefix)
+{
+	struct ipt_ulog_info loginfo = { 
+		.nl_group = ULOG_DEFAULT_NLGROUP,
+		.copy_range = 0,
+		.qthreshold = ULOG_DEFAULT_QTHRESHOLD,
+		.prefix = ""
+	};
+
+	ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix);
+}
+
+static int ipt_ulog_checkentry(const char *tablename,
+			       const struct ipt_entry *e,
+			       void *targinfo,
+			       unsigned int targinfosize,
+			       unsigned int hookmask)
+{
+	struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo;
+
+	if (targinfosize != IPT_ALIGN(sizeof(struct ipt_ulog_info))) {
+		DEBUGP("ipt_ULOG: targinfosize %u != 0\n", targinfosize);
+		return 0;
+	}
+
+	if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') {
+		DEBUGP("ipt_ULOG: prefix term %i\n",
+		       loginfo->prefix[sizeof(loginfo->prefix) - 1]);
+		return 0;
+	}
+
+	if (loginfo->qthreshold > ULOG_MAX_QLEN) {
+		DEBUGP("ipt_ULOG: queue threshold %i > MAX_QLEN\n",
+			loginfo->qthreshold);
+		return 0;
+	}
+
+	return 1;
+}
+
+static struct ipt_target ipt_ulog_reg = {
+	.name		= "ULOG",
+	.target		= ipt_ulog_target,
+	.checkentry	= ipt_ulog_checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	int i;
+
+	DEBUGP("ipt_ULOG: init module\n");
+
+	if (nlbufsiz >= 128*1024) {
+		printk("Netlink buffer has to be <= 128kB\n");
+		return -EINVAL;
+	}
+
+	/* initialize ulog_buffers */
+	for (i = 0; i < ULOG_MAXNLGROUPS; i++) {
+		init_timer(&ulog_buffers[i].timer);
+		ulog_buffers[i].timer.function = ulog_timer;
+		ulog_buffers[i].timer.data = i;
+	}
+
+	nflognl = netlink_kernel_create(NETLINK_NFLOG, NULL);
+	if (!nflognl)
+		return -ENOMEM;
+
+	if (ipt_register_target(&ipt_ulog_reg) != 0) {
+		sock_release(nflognl->sk_socket);
+		return -EINVAL;
+	}
+	if (nflog)
+		nf_log_register(PF_INET, &ipt_logfn);
+	
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	ulog_buff_t *ub;
+	int i;
+
+	DEBUGP("ipt_ULOG: cleanup_module\n");
+
+	if (nflog)
+		nf_log_unregister(PF_INET, &ipt_logfn);
+	ipt_unregister_target(&ipt_ulog_reg);
+	sock_release(nflognl->sk_socket);
+
+	/* remove pending timers and free allocated skb's */
+	for (i = 0; i < ULOG_MAXNLGROUPS; i++) {
+		ub = &ulog_buffers[i];
+		if (timer_pending(&ub->timer)) {
+			DEBUGP("timer was pending, deleting\n");
+			del_timer(&ub->timer);
+		}
+
+		if (ub->skb) {
+			kfree_skb(ub->skb);
+			ub->skb = NULL;
+		}
+	}
+
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
new file mode 100644
index 00000000000..f5909a4c3fc
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -0,0 +1,77 @@
+/*
+ *  iptables module to match inet_addr_type() of an ip.
+ *
+ *  Copyright (c) 2004 Patrick McHardy <kaber@trash.net>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/ip.h>
+#include <net/route.h>
+
+#include <linux/netfilter_ipv4/ipt_addrtype.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("iptables addrtype match");
+
+static inline int match_type(u_int32_t addr, u_int16_t mask)
+{
+	return !!(mask & (1 << inet_addr_type(addr)));
+}
+
+static int match(const struct sk_buff *skb, const struct net_device *in,
+		 const struct net_device *out, const void *matchinfo,
+		 int offset, int *hotdrop)
+{
+	const struct ipt_addrtype_info *info = matchinfo;
+	const struct iphdr *iph = skb->nh.iph;
+	int ret = 1;
+
+	if (info->source)
+		ret &= match_type(iph->saddr, info->source)^info->invert_source;
+	if (info->dest)
+		ret &= match_type(iph->daddr, info->dest)^info->invert_dest;
+	
+	return ret;
+}
+
+static int checkentry(const char *tablename, const struct ipt_ip *ip,
+		      void *matchinfo, unsigned int matchsize,
+		      unsigned int hook_mask)
+{
+	if (matchsize != IPT_ALIGN(sizeof(struct ipt_addrtype_info))) {
+		printk(KERN_ERR "ipt_addrtype: invalid size (%u != %Zu)\n.",
+		       matchsize, IPT_ALIGN(sizeof(struct ipt_addrtype_info)));
+		return 0;
+	}
+
+	return 1;
+}
+
+static struct ipt_match addrtype_match = {
+	.name		= "addrtype",
+	.match		= match,
+	.checkentry	= checkentry,
+	.me		= THIS_MODULE
+};
+
+static int __init init(void)
+{
+	return ipt_register_match(&addrtype_match);
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_match(&addrtype_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
new file mode 100644
index 00000000000..a0fea847cb7
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -0,0 +1,117 @@
+/* Kernel module to match AH parameters. */
+/* (C) 1999-2000 Yon Uriarte <yon@astaro.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+
+#include <linux/netfilter_ipv4/ipt_ah.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Yon Uriarte <yon@astaro.de>");
+MODULE_DESCRIPTION("iptables AH SPI match module");
+
+#ifdef DEBUG_CONNTRACK
+#define duprintf(format, args...) printk(format , ## args)
+#else
+#define duprintf(format, args...)
+#endif
+
+/* Returns 1 if the spi is matched by the range, 0 otherwise */
+static inline int
+spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, int invert)
+{
+	int r=0;
+        duprintf("ah spi_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ',
+        	min,spi,max);
+	r=(spi >= min && spi <= max) ^ invert;
+	duprintf(" result %s\n",r? "PASS" : "FAILED");
+	return r;
+}
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      int *hotdrop)
+{
+	struct ip_auth_hdr _ahdr, *ah;
+	const struct ipt_ah *ahinfo = matchinfo;
+
+	/* Must not be a fragment. */
+	if (offset)
+		return 0;
+
+	ah = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
+				sizeof(_ahdr), &_ahdr);
+	if (ah == NULL) {
+		/* We've been asked to examine this packet, and we
+		 * can't.  Hence, no choice but to drop.
+		 */
+		duprintf("Dropping evil AH tinygram.\n");
+		*hotdrop = 1;
+		return 0;
+	}
+
+	return spi_match(ahinfo->spis[0], ahinfo->spis[1],
+			 ntohl(ah->spi),
+			 !!(ahinfo->invflags & IPT_AH_INV_SPI));
+}
+
+/* Called when user tries to insert an entry of this type. */
+static int
+checkentry(const char *tablename,
+	   const struct ipt_ip *ip,
+	   void *matchinfo,
+	   unsigned int matchinfosize,
+	   unsigned int hook_mask)
+{
+	const struct ipt_ah *ahinfo = matchinfo;
+
+	/* Must specify proto == AH, and no unknown invflags */
+	if (ip->proto != IPPROTO_AH || (ip->invflags & IPT_INV_PROTO)) {
+		duprintf("ipt_ah: Protocol %u != %u\n", ip->proto,
+			 IPPROTO_AH);
+		return 0;
+	}
+	if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_ah))) {
+		duprintf("ipt_ah: matchsize %u != %u\n",
+			 matchinfosize, IPT_ALIGN(sizeof(struct ipt_ah)));
+		return 0;
+	}
+	if (ahinfo->invflags & ~IPT_AH_INV_MASK) {
+		duprintf("ipt_ah: unknown flags %X\n",
+			 ahinfo->invflags);
+		return 0;
+	}
+
+	return 1;
+}
+
+static struct ipt_match ah_match = {
+	.name		= "ah",
+	.match		= &match,
+	.checkentry	= &checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ipt_register_match(&ah_match);
+}
+
+static void __exit cleanup(void)
+{
+	ipt_unregister_match(&ah_match);
+}
+
+module_init(init);
+module_exit(cleanup);
diff --git a/net/ipv4/netfilter/ipt_comment.c b/net/ipv4/netfilter/ipt_comment.c
new file mode 100644
index 00000000000..6b76a1ea524
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_comment.c
@@ -0,0 +1,59 @@
+/*
+ * Implements a dummy match to allow attaching comments to rules
+ *
+ * 2003-05-13 Brad Fisher (brad@info-link.net)
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_comment.h>
+
+MODULE_AUTHOR("Brad Fisher <brad@info-link.net>");
+MODULE_DESCRIPTION("iptables comment match module");
+MODULE_LICENSE("GPL");
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      int *hotdrop)
+{
+	/* We always match */
+	return 1;
+}
+
+static int
+checkentry(const char *tablename,
+           const struct ipt_ip *ip,
+           void *matchinfo,
+           unsigned int matchsize,
+           unsigned int hook_mask)
+{
+	/* Check the size */
+	if (matchsize != IPT_ALIGN(sizeof(struct ipt_comment_info)))
+		return 0;
+	return 1;
+}
+
+static struct ipt_match comment_match = {
+	.name		= "comment",
+	.match		= match,
+	.checkentry	= checkentry,
+	.me		= THIS_MODULE
+};
+
+static int __init init(void)
+{
+	return ipt_register_match(&comment_match);
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_match(&comment_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_connmark.c b/net/ipv4/netfilter/ipt_connmark.c
new file mode 100644
index 00000000000..2706f96cea5
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_connmark.c
@@ -0,0 +1,81 @@
+/* This kernel module matches connection mark values set by the
+ * CONNMARK target
+ *
+ * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
+ * by Henrik Nordstrom <hno@marasystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+MODULE_AUTHOR("Henrik Nordstrom <hno@marasytems.com>");
+MODULE_DESCRIPTION("IP tables connmark match module");
+MODULE_LICENSE("GPL");
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_connmark.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      int *hotdrop)
+{
+	const struct ipt_connmark_info *info = matchinfo;
+	enum ip_conntrack_info ctinfo;
+	struct ip_conntrack *ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
+	if (!ct)
+		return 0;
+
+	return ((ct->mark & info->mask) == info->mark) ^ info->invert;
+}
+
+static int
+checkentry(const char *tablename,
+	   const struct ipt_ip *ip,
+	   void *matchinfo,
+	   unsigned int matchsize,
+	   unsigned int hook_mask)
+{
+	if (matchsize != IPT_ALIGN(sizeof(struct ipt_connmark_info)))
+		return 0;
+
+	return 1;
+}
+
+static struct ipt_match connmark_match = {
+	.name = "connmark",
+	.match = &match,
+	.checkentry = &checkentry,
+	.me = THIS_MODULE
+};
+
+static int __init init(void)
+{
+	return ipt_register_match(&connmark_match);
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_match(&connmark_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_conntrack.c b/net/ipv4/netfilter/ipt_conntrack.c
new file mode 100644
index 00000000000..c1d22801b7c
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_conntrack.c
@@ -0,0 +1,136 @@
+/* Kernel module to match connection tracking information.
+ * Superset of Rusty's minimalistic state match.
+ *
+ * (C) 2001  Marc Boucher (marc@mbsi.ca).
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_conntrack.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
+MODULE_DESCRIPTION("iptables connection tracking match module");
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      int *hotdrop)
+{
+	const struct ipt_conntrack_info *sinfo = matchinfo;
+	struct ip_conntrack *ct;
+	enum ip_conntrack_info ctinfo;
+	unsigned int statebit;
+
+	ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
+
+#define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg))
+
+	if (ct == &ip_conntrack_untracked)
+		statebit = IPT_CONNTRACK_STATE_UNTRACKED;
+	else if (ct)
+ 		statebit = IPT_CONNTRACK_STATE_BIT(ctinfo);
+ 	else
+ 		statebit = IPT_CONNTRACK_STATE_INVALID;
+ 
+	if(sinfo->flags & IPT_CONNTRACK_STATE) {
+		if (ct) {
+			if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip !=
+			    ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip)
+				statebit |= IPT_CONNTRACK_STATE_SNAT;
+
+			if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip !=
+			    ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip)
+				statebit |= IPT_CONNTRACK_STATE_DNAT;
+		}
+
+		if (FWINV((statebit & sinfo->statemask) == 0, IPT_CONNTRACK_STATE))
+			return 0;
+	}
+
+	if(sinfo->flags & IPT_CONNTRACK_PROTO) {
+		if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, IPT_CONNTRACK_PROTO))
+                	return 0;
+	}
+
+	if(sinfo->flags & IPT_CONNTRACK_ORIGSRC) {
+		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, IPT_CONNTRACK_ORIGSRC))
+			return 0;
+	}
+
+	if(sinfo->flags & IPT_CONNTRACK_ORIGDST) {
+		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, IPT_CONNTRACK_ORIGDST))
+			return 0;
+	}
+
+	if(sinfo->flags & IPT_CONNTRACK_REPLSRC) {
+		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, IPT_CONNTRACK_REPLSRC))
+			return 0;
+	}
+
+	if(sinfo->flags & IPT_CONNTRACK_REPLDST) {
+		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, IPT_CONNTRACK_REPLDST))
+			return 0;
+	}
+
+	if(sinfo->flags & IPT_CONNTRACK_STATUS) {
+		if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, IPT_CONNTRACK_STATUS))
+			return 0;
+	}
+
+	if(sinfo->flags & IPT_CONNTRACK_EXPIRES) {
+		unsigned long expires;
+
+		if(!ct)
+			return 0;
+
+		expires = timer_pending(&ct->timeout) ? (ct->timeout.expires - jiffies)/HZ : 0;
+
+		if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), IPT_CONNTRACK_EXPIRES))
+			return 0;
+	}
+
+	return 1;
+}
+
+static int check(const char *tablename,
+		 const struct ipt_ip *ip,
+		 void *matchinfo,
+		 unsigned int matchsize,
+		 unsigned int hook_mask)
+{
+	if (matchsize != IPT_ALIGN(sizeof(struct ipt_conntrack_info)))
+		return 0;
+
+	return 1;
+}
+
+static struct ipt_match conntrack_match = {
+	.name		= "conntrack",
+	.match		= &match,
+	.checkentry	= &check,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	need_ip_conntrack();
+	return ipt_register_match(&conntrack_match);
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_match(&conntrack_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_dscp.c b/net/ipv4/netfilter/ipt_dscp.c
new file mode 100644
index 00000000000..5df52a64a5d
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_dscp.c
@@ -0,0 +1,63 @@
+/* IP tables module for matching the value of the IPv4 DSCP field
+ *
+ * ipt_dscp.c,v 1.3 2002/08/05 19:00:21 laforge Exp
+ *
+ * (C) 2002 by Harald Welte <laforge@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter_ipv4/ipt_dscp.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("iptables DSCP matching module");
+MODULE_LICENSE("GPL");
+
+static int match(const struct sk_buff *skb, const struct net_device *in,
+		 const struct net_device *out, const void *matchinfo,
+		 int offset, int *hotdrop)
+{
+	const struct ipt_dscp_info *info = matchinfo;
+	const struct iphdr *iph = skb->nh.iph;
+
+	u_int8_t sh_dscp = ((info->dscp << IPT_DSCP_SHIFT) & IPT_DSCP_MASK);
+
+	return ((iph->tos&IPT_DSCP_MASK) == sh_dscp) ^ info->invert;
+}
+
+static int checkentry(const char *tablename, const struct ipt_ip *ip,
+		      void *matchinfo, unsigned int matchsize,
+		      unsigned int hook_mask)
+{
+	if (matchsize != IPT_ALIGN(sizeof(struct ipt_dscp_info)))
+		return 0;
+
+	return 1;
+}
+
+static struct ipt_match dscp_match = {
+	.name		= "dscp",
+	.match		= &match,
+	.checkentry	= &checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ipt_register_match(&dscp_match);
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_match(&dscp_match);
+
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
new file mode 100644
index 00000000000..b6f7181e89c
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -0,0 +1,131 @@
+/* IP tables module for matching the value of the IPv4 and TCP ECN bits
+ *
+ * ipt_ecn.c,v 1.3 2002/05/29 15:09:00 laforge Exp
+ *
+ * (C) 2002 by Harald Welte <laforge@gnumonks.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/tcp.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_ecn.h>
+
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("iptables ECN matching module");
+MODULE_LICENSE("GPL");
+
+static inline int match_ip(const struct sk_buff *skb,
+			   const struct ipt_ecn_info *einfo)
+{
+	return ((skb->nh.iph->tos&IPT_ECN_IP_MASK) == einfo->ip_ect);
+}
+
+static inline int match_tcp(const struct sk_buff *skb,
+			    const struct ipt_ecn_info *einfo,
+			    int *hotdrop)
+{
+	struct tcphdr _tcph, *th;
+
+	/* In practice, TCP match does this, so can't fail.  But let's
+	 * be good citizens.
+	 */
+	th = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
+				sizeof(_tcph), &_tcph);
+	if (th == NULL) {
+		*hotdrop = 0;
+		return 0;
+	}
+
+	if (einfo->operation & IPT_ECN_OP_MATCH_ECE) {
+		if (einfo->invert & IPT_ECN_OP_MATCH_ECE) {
+			if (th->ece == 1)
+				return 0;
+		} else {
+			if (th->ece == 0)
+				return 0;
+		}
+	}
+
+	if (einfo->operation & IPT_ECN_OP_MATCH_CWR) {
+		if (einfo->invert & IPT_ECN_OP_MATCH_CWR) {
+			if (th->cwr == 1)
+				return 0;
+		} else {
+			if (th->cwr == 0)
+				return 0;
+		}
+	}
+
+	return 1;
+}
+
+static int match(const struct sk_buff *skb, const struct net_device *in,
+		 const struct net_device *out, const void *matchinfo,
+		 int offset, int *hotdrop)
+{
+	const struct ipt_ecn_info *info = matchinfo;
+
+	if (info->operation & IPT_ECN_OP_MATCH_IP)
+		if (!match_ip(skb, info))
+			return 0;
+
+	if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) {
+		if (skb->nh.iph->protocol != IPPROTO_TCP)
+			return 0;
+		if (!match_tcp(skb, info, hotdrop))
+			return 0;
+	}
+
+	return 1;
+}
+
+static int checkentry(const char *tablename, const struct ipt_ip *ip,
+		      void *matchinfo, unsigned int matchsize,
+		      unsigned int hook_mask)
+{
+	const struct ipt_ecn_info *info = matchinfo;
+
+	if (matchsize != IPT_ALIGN(sizeof(struct ipt_ecn_info)))
+		return 0;
+
+	if (info->operation & IPT_ECN_OP_MATCH_MASK)
+		return 0;
+
+	if (info->invert & IPT_ECN_OP_MATCH_MASK)
+		return 0;
+
+	if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)
+	    && ip->proto != IPPROTO_TCP) {
+		printk(KERN_WARNING "ipt_ecn: can't match TCP bits in rule for"
+		       " non-tcp packets\n");
+		return 0;
+	}
+
+	return 1;
+}
+
+static struct ipt_match ecn_match = {
+	.name		= "ecn",
+	.match		= &match,
+	.checkentry	= &checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ipt_register_match(&ecn_match);
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_match(&ecn_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_esp.c b/net/ipv4/netfilter/ipt_esp.c
new file mode 100644
index 00000000000..e1d0dd31e11
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_esp.c
@@ -0,0 +1,118 @@
+/* Kernel module to match ESP parameters. */
+
+/* (C) 1999-2000 Yon Uriarte <yon@astaro.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+
+#include <linux/netfilter_ipv4/ipt_esp.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Yon Uriarte <yon@astaro.de>");
+MODULE_DESCRIPTION("iptables ESP SPI match module");
+
+#ifdef DEBUG_CONNTRACK
+#define duprintf(format, args...) printk(format , ## args)
+#else
+#define duprintf(format, args...)
+#endif
+
+/* Returns 1 if the spi is matched by the range, 0 otherwise */
+static inline int
+spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, int invert)
+{
+	int r=0;
+        duprintf("esp spi_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ',
+        	min,spi,max);
+	r=(spi >= min && spi <= max) ^ invert;
+	duprintf(" result %s\n",r? "PASS" : "FAILED");
+	return r;
+}
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      int *hotdrop)
+{
+	struct ip_esp_hdr _esp, *eh;
+	const struct ipt_esp *espinfo = matchinfo;
+
+	/* Must not be a fragment. */
+	if (offset)
+		return 0;
+
+	eh = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
+				sizeof(_esp), &_esp);
+	if (eh == NULL) {
+		/* We've been asked to examine this packet, and we
+		 * can't.  Hence, no choice but to drop.
+		 */
+		duprintf("Dropping evil ESP tinygram.\n");
+		*hotdrop = 1;
+		return 0;
+	}
+
+	return spi_match(espinfo->spis[0], espinfo->spis[1],
+			 ntohl(eh->spi),
+			 !!(espinfo->invflags & IPT_ESP_INV_SPI));
+}
+
+/* Called when user tries to insert an entry of this type. */
+static int
+checkentry(const char *tablename,
+	   const struct ipt_ip *ip,
+	   void *matchinfo,
+	   unsigned int matchinfosize,
+	   unsigned int hook_mask)
+{
+	const struct ipt_esp *espinfo = matchinfo;
+
+	/* Must specify proto == ESP, and no unknown invflags */
+	if (ip->proto != IPPROTO_ESP || (ip->invflags & IPT_INV_PROTO)) {
+		duprintf("ipt_esp: Protocol %u != %u\n", ip->proto,
+			 IPPROTO_ESP);
+		return 0;
+	}
+	if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_esp))) {
+		duprintf("ipt_esp: matchsize %u != %u\n",
+			 matchinfosize, IPT_ALIGN(sizeof(struct ipt_esp)));
+		return 0;
+	}
+	if (espinfo->invflags & ~IPT_ESP_INV_MASK) {
+		duprintf("ipt_esp: unknown flags %X\n",
+			 espinfo->invflags);
+		return 0;
+	}
+
+	return 1;
+}
+
+static struct ipt_match esp_match = {
+	.name		= "esp",
+	.match		= &match,
+	.checkentry	= &checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ipt_register_match(&esp_match);
+}
+
+static void __exit cleanup(void)
+{
+	ipt_unregister_match(&esp_match);
+}
+
+module_init(init);
+module_exit(cleanup);
diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c
new file mode 100644
index 00000000000..f1937190cd7
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_hashlimit.c
@@ -0,0 +1,731 @@
+/* iptables match extension to limit the number of packets per second
+ * seperately for each hashbucket (sourceip/sourceport/dstip/dstport)
+ *
+ * (C) 2003-2004 by Harald Welte <laforge@netfilter.org>
+ *
+ * $Id: ipt_hashlimit.c 3244 2004-10-20 16:24:29Z laforge@netfilter.org $
+ *
+ * Development of this code was funded by Astaro AG, http://www.astaro.com/
+ *
+ * based on ipt_limit.c by:
+ * J�r�me de Vivie	<devivie@info.enserb.u-bordeaux.fr>
+ * Herv� Eychenne	<eychenne@info.enserb.u-bordeaux.fr>
+ * Rusty Russell	<rusty@rustcorp.com.au>
+ *
+ * The general idea is to create a hash table for every dstip and have a
+ * seperate limit counter per tuple.  This way you can do something like 'limit
+ * the number of syn packets for each of my internal addresses.
+ *
+ * Ideally this would just be implemented as a general 'hash' match, which would
+ * allow us to attach any iptables target to it's hash buckets.  But this is
+ * not possible in the current iptables architecture.  As always, pkttables for
+ * 2.7.x will help ;)
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/random.h>
+#include <linux/jhash.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/sctp.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/list.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_hashlimit.h>
+#include <linux/netfilter_ipv4/lockhelp.h>
+
+/* FIXME: this is just for IP_NF_ASSERRT */
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("iptables match for limiting per hash-bucket");
+
+/* need to declare this at the top */
+static struct proc_dir_entry *hashlimit_procdir;
+static struct file_operations dl_file_ops;
+
+/* hash table crap */
+
+struct dsthash_dst {
+	u_int32_t src_ip;
+	u_int32_t dst_ip;
+	/* ports have to be consecutive !!! */
+	u_int16_t src_port;
+	u_int16_t dst_port;
+};
+
+struct dsthash_ent {
+	/* static / read-only parts in the beginning */
+	struct hlist_node node;
+	struct dsthash_dst dst;
+
+	/* modified structure members in the end */
+	unsigned long expires;		/* precalculated expiry time */
+	struct {
+		unsigned long prev;	/* last modification */
+		u_int32_t credit;
+		u_int32_t credit_cap, cost;
+	} rateinfo;
+};
+
+struct ipt_hashlimit_htable {
+	struct hlist_node node;		/* global list of all htables */
+	atomic_t use;
+
+	struct hashlimit_cfg cfg;	/* config */
+
+	/* used internally */
+	spinlock_t lock;		/* lock for list_head */
+	u_int32_t rnd;			/* random seed for hash */
+	struct timer_list timer;	/* timer for gc */
+	atomic_t count;			/* number entries in table */
+
+	/* seq_file stuff */
+	struct proc_dir_entry *pde;
+
+	struct hlist_head hash[0];	/* hashtable itself */
+};
+
+static DECLARE_LOCK(hashlimit_lock);	/* protects htables list */
+static DECLARE_MUTEX(hlimit_mutex);	/* additional checkentry protection */
+static HLIST_HEAD(hashlimit_htables);
+static kmem_cache_t *hashlimit_cachep;
+
+static inline int dst_cmp(const struct dsthash_ent *ent, struct dsthash_dst *b)
+{
+	return (ent->dst.dst_ip == b->dst_ip 
+		&& ent->dst.dst_port == b->dst_port
+		&& ent->dst.src_port == b->src_port
+		&& ent->dst.src_ip == b->src_ip);
+}
+
+static inline u_int32_t
+hash_dst(const struct ipt_hashlimit_htable *ht, const struct dsthash_dst *dst)
+{
+	return (jhash_3words(dst->dst_ip, (dst->dst_port<<16 | dst->src_port), 
+			     dst->src_ip, ht->rnd) % ht->cfg.size);
+}
+
+static inline struct dsthash_ent *
+__dsthash_find(const struct ipt_hashlimit_htable *ht, struct dsthash_dst *dst)
+{
+	struct dsthash_ent *ent;
+	struct hlist_node *pos;
+	u_int32_t hash = hash_dst(ht, dst);
+
+	if (!hlist_empty(&ht->hash[hash]))
+		hlist_for_each_entry(ent, pos, &ht->hash[hash], node) {
+			if (dst_cmp(ent, dst)) {
+				return ent;
+			}
+		}
+	
+	return NULL;
+}
+
+/* allocate dsthash_ent, initialize dst, put in htable and lock it */
+static struct dsthash_ent *
+__dsthash_alloc_init(struct ipt_hashlimit_htable *ht, struct dsthash_dst *dst)
+{
+	struct dsthash_ent *ent;
+
+	/* initialize hash with random val at the time we allocate
+	 * the first hashtable entry */
+	if (!ht->rnd)
+		get_random_bytes(&ht->rnd, 4);
+
+	if (ht->cfg.max &&
+	    atomic_read(&ht->count) >= ht->cfg.max) {
+		/* FIXME: do something. question is what.. */
+		if (net_ratelimit())
+			printk(KERN_WARNING 
+				"ipt_hashlimit: max count of %u reached\n", 
+				ht->cfg.max);
+		return NULL;
+	}
+
+	ent = kmem_cache_alloc(hashlimit_cachep, GFP_ATOMIC);
+	if (!ent) {
+		if (net_ratelimit())
+			printk(KERN_ERR 
+				"ipt_hashlimit: can't allocate dsthash_ent\n");
+		return NULL;
+	}
+
+	atomic_inc(&ht->count);
+
+	ent->dst.dst_ip = dst->dst_ip;
+	ent->dst.dst_port = dst->dst_port;
+	ent->dst.src_ip = dst->src_ip;
+	ent->dst.src_port = dst->src_port;
+
+	hlist_add_head(&ent->node, &ht->hash[hash_dst(ht, dst)]);
+
+	return ent;
+}
+
+static inline void 
+__dsthash_free(struct ipt_hashlimit_htable *ht, struct dsthash_ent *ent)
+{
+	hlist_del(&ent->node);
+	kmem_cache_free(hashlimit_cachep, ent);
+	atomic_dec(&ht->count);
+}
+static void htable_gc(unsigned long htlong);
+
+static int htable_create(struct ipt_hashlimit_info *minfo)
+{
+	int i;
+	unsigned int size;
+	struct ipt_hashlimit_htable *hinfo;
+
+	if (minfo->cfg.size)
+		size = minfo->cfg.size;
+	else {
+		size = (((num_physpages << PAGE_SHIFT) / 16384)
+			 / sizeof(struct list_head));
+		if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
+			size = 8192;
+		if (size < 16)
+			size = 16;
+	}
+	/* FIXME: don't use vmalloc() here or anywhere else -HW */
+	hinfo = vmalloc(sizeof(struct ipt_hashlimit_htable)
+			+ (sizeof(struct list_head) * size));
+	if (!hinfo) {
+		printk(KERN_ERR "ipt_hashlimit: Unable to create hashtable\n");
+		return -1;
+	}
+	minfo->hinfo = hinfo;
+
+	/* copy match config into hashtable config */
+	memcpy(&hinfo->cfg, &minfo->cfg, sizeof(hinfo->cfg));
+	hinfo->cfg.size = size;
+	if (!hinfo->cfg.max)
+		hinfo->cfg.max = 8 * hinfo->cfg.size;
+	else if (hinfo->cfg.max < hinfo->cfg.size)
+		hinfo->cfg.max = hinfo->cfg.size;
+
+	for (i = 0; i < hinfo->cfg.size; i++)
+		INIT_HLIST_HEAD(&hinfo->hash[i]);
+
+	atomic_set(&hinfo->count, 0);
+	atomic_set(&hinfo->use, 1);
+	hinfo->rnd = 0;
+	spin_lock_init(&hinfo->lock);
+	hinfo->pde = create_proc_entry(minfo->name, 0, hashlimit_procdir);
+	if (!hinfo->pde) {
+		vfree(hinfo);
+		return -1;
+	}
+	hinfo->pde->proc_fops = &dl_file_ops;
+	hinfo->pde->data = hinfo;
+
+	init_timer(&hinfo->timer);
+	hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval);
+	hinfo->timer.data = (unsigned long )hinfo;
+	hinfo->timer.function = htable_gc;
+	add_timer(&hinfo->timer);
+
+	LOCK_BH(&hashlimit_lock);
+	hlist_add_head(&hinfo->node, &hashlimit_htables);
+	UNLOCK_BH(&hashlimit_lock);
+
+	return 0;
+}
+
+static int select_all(struct ipt_hashlimit_htable *ht, struct dsthash_ent *he)
+{
+	return 1;
+}
+
+static int select_gc(struct ipt_hashlimit_htable *ht, struct dsthash_ent *he)
+{
+	return (jiffies >= he->expires);
+}
+
+static void htable_selective_cleanup(struct ipt_hashlimit_htable *ht,
+		 		int (*select)(struct ipt_hashlimit_htable *ht, 
+					      struct dsthash_ent *he))
+{
+	int i;
+
+	IP_NF_ASSERT(ht->cfg.size && ht->cfg.max);
+
+	/* lock hash table and iterate over it */
+	spin_lock_bh(&ht->lock);
+	for (i = 0; i < ht->cfg.size; i++) {
+		struct dsthash_ent *dh;
+		struct hlist_node *pos, *n;
+		hlist_for_each_entry_safe(dh, pos, n, &ht->hash[i], node) {
+			if ((*select)(ht, dh))
+				__dsthash_free(ht, dh);
+		}
+	}
+	spin_unlock_bh(&ht->lock);
+}
+
+/* hash table garbage collector, run by timer */
+static void htable_gc(unsigned long htlong)
+{
+	struct ipt_hashlimit_htable *ht = (struct ipt_hashlimit_htable *)htlong;
+
+	htable_selective_cleanup(ht, select_gc);
+
+	/* re-add the timer accordingly */
+	ht->timer.expires = jiffies + msecs_to_jiffies(ht->cfg.gc_interval);
+	add_timer(&ht->timer);
+}
+
+static void htable_destroy(struct ipt_hashlimit_htable *hinfo)
+{
+	/* remove timer, if it is pending */
+	if (timer_pending(&hinfo->timer))
+		del_timer(&hinfo->timer);
+
+	/* remove proc entry */
+	remove_proc_entry(hinfo->pde->name, hashlimit_procdir);
+
+	htable_selective_cleanup(hinfo, select_all);
+	vfree(hinfo);
+}
+
+static struct ipt_hashlimit_htable *htable_find_get(char *name)
+{
+	struct ipt_hashlimit_htable *hinfo;
+	struct hlist_node *pos;
+
+	LOCK_BH(&hashlimit_lock);
+	hlist_for_each_entry(hinfo, pos, &hashlimit_htables, node) {
+		if (!strcmp(name, hinfo->pde->name)) {
+			atomic_inc(&hinfo->use);
+			UNLOCK_BH(&hashlimit_lock);
+			return hinfo;
+		}
+	}
+	UNLOCK_BH(&hashlimit_lock);
+
+	return NULL;
+}
+
+static void htable_put(struct ipt_hashlimit_htable *hinfo)
+{
+	if (atomic_dec_and_test(&hinfo->use)) {
+		LOCK_BH(&hashlimit_lock);
+		hlist_del(&hinfo->node);
+		UNLOCK_BH(&hashlimit_lock);
+		htable_destroy(hinfo);
+	}
+}
+
+
+/* The algorithm used is the Simple Token Bucket Filter (TBF)
+ * see net/sched/sch_tbf.c in the linux source tree
+ */
+
+/* Rusty: This is my (non-mathematically-inclined) understanding of
+   this algorithm.  The `average rate' in jiffies becomes your initial
+   amount of credit `credit' and the most credit you can ever have
+   `credit_cap'.  The `peak rate' becomes the cost of passing the
+   test, `cost'.
+
+   `prev' tracks the last packet hit: you gain one credit per jiffy.
+   If you get credit balance more than this, the extra credit is
+   discarded.  Every time the match passes, you lose `cost' credits;
+   if you don't have that many, the test fails.
+
+   See Alexey's formal explanation in net/sched/sch_tbf.c.
+
+   To get the maximum range, we multiply by this factor (ie. you get N
+   credits per jiffy).  We want to allow a rate as low as 1 per day
+   (slowest userspace tool allows), which means
+   CREDITS_PER_JIFFY*HZ*60*60*24 < 2^32 ie.
+*/
+#define MAX_CPJ (0xFFFFFFFF / (HZ*60*60*24))
+
+/* Repeated shift and or gives us all 1s, final shift and add 1 gives
+ * us the power of 2 below the theoretical max, so GCC simply does a
+ * shift. */
+#define _POW2_BELOW2(x) ((x)|((x)>>1))
+#define _POW2_BELOW4(x) (_POW2_BELOW2(x)|_POW2_BELOW2((x)>>2))
+#define _POW2_BELOW8(x) (_POW2_BELOW4(x)|_POW2_BELOW4((x)>>4))
+#define _POW2_BELOW16(x) (_POW2_BELOW8(x)|_POW2_BELOW8((x)>>8))
+#define _POW2_BELOW32(x) (_POW2_BELOW16(x)|_POW2_BELOW16((x)>>16))
+#define POW2_BELOW32(x) ((_POW2_BELOW32(x)>>1) + 1)
+
+#define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
+
+/* Precision saver. */
+static inline u_int32_t
+user2credits(u_int32_t user)
+{
+	/* If multiplying would overflow... */
+	if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY))
+		/* Divide first. */
+		return (user / IPT_HASHLIMIT_SCALE) * HZ * CREDITS_PER_JIFFY;
+
+	return (user * HZ * CREDITS_PER_JIFFY) / IPT_HASHLIMIT_SCALE;
+}
+
+static inline void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now)
+{
+	dh->rateinfo.credit += (now - xchg(&dh->rateinfo.prev, now)) 
+					* CREDITS_PER_JIFFY;
+	if (dh->rateinfo.credit > dh->rateinfo.credit_cap)
+		dh->rateinfo.credit = dh->rateinfo.credit_cap;
+}
+
+static inline int get_ports(const struct sk_buff *skb, int offset, 
+			    u16 ports[2])
+{
+	union {
+		struct tcphdr th;
+		struct udphdr uh;
+		sctp_sctphdr_t sctph;
+	} hdr_u, *ptr_u;
+
+	/* Must not be a fragment. */
+	if (offset)
+		return 1;
+
+	/* Must be big enough to read ports (both UDP and TCP have
+	   them at the start). */
+	ptr_u = skb_header_pointer(skb, skb->nh.iph->ihl*4, 8, &hdr_u); 
+	if (!ptr_u)
+		return 1;
+
+	switch (skb->nh.iph->protocol) {
+		case IPPROTO_TCP:
+			ports[0] = ptr_u->th.source;
+			ports[1] = ptr_u->th.dest;
+			break;
+		case IPPROTO_UDP:
+			ports[0] = ptr_u->uh.source;
+			ports[1] = ptr_u->uh.dest;
+			break;
+		case IPPROTO_SCTP:
+			ports[0] = ptr_u->sctph.source;
+			ports[1] = ptr_u->sctph.dest;
+			break;
+		default:
+			/* all other protocols don't supprot per-port hash
+			 * buckets */
+			ports[0] = ports[1] = 0;
+			break;
+	}
+
+	return 0;
+}
+
+
+static int
+hashlimit_match(const struct sk_buff *skb,
+		const struct net_device *in,
+		const struct net_device *out,
+		const void *matchinfo,
+		int offset,
+		int *hotdrop)
+{
+	struct ipt_hashlimit_info *r = 
+		((struct ipt_hashlimit_info *)matchinfo)->u.master;
+	struct ipt_hashlimit_htable *hinfo = r->hinfo;
+	unsigned long now = jiffies;
+	struct dsthash_ent *dh;
+	struct dsthash_dst dst;
+
+	/* build 'dst' according to hinfo->cfg and current packet */
+	memset(&dst, 0, sizeof(dst));
+	if (hinfo->cfg.mode & IPT_HASHLIMIT_HASH_DIP)
+		dst.dst_ip = skb->nh.iph->daddr;
+	if (hinfo->cfg.mode & IPT_HASHLIMIT_HASH_SIP)
+		dst.src_ip = skb->nh.iph->saddr;
+	if (hinfo->cfg.mode & IPT_HASHLIMIT_HASH_DPT
+	    ||hinfo->cfg.mode & IPT_HASHLIMIT_HASH_SPT) {
+		u_int16_t ports[2];
+		if (get_ports(skb, offset, ports)) {
+			/* We've been asked to examine this packet, and we
+		 	  can't.  Hence, no choice but to drop. */
+			*hotdrop = 1;
+			return 0;
+		}
+		if (hinfo->cfg.mode & IPT_HASHLIMIT_HASH_SPT)
+			dst.src_port = ports[0];
+		if (hinfo->cfg.mode & IPT_HASHLIMIT_HASH_DPT)
+			dst.dst_port = ports[1];
+	} 
+
+	spin_lock_bh(&hinfo->lock);
+	dh = __dsthash_find(hinfo, &dst);
+	if (!dh) {
+		dh = __dsthash_alloc_init(hinfo, &dst);
+
+		if (!dh) {
+			/* enomem... don't match == DROP */
+			if (net_ratelimit())
+				printk(KERN_ERR "%s: ENOMEM\n", __FUNCTION__);
+			spin_unlock_bh(&hinfo->lock);
+			return 0;
+		}
+
+		dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire);
+
+		dh->rateinfo.prev = jiffies;
+		dh->rateinfo.credit = user2credits(hinfo->cfg.avg * 
+							hinfo->cfg.burst);
+		dh->rateinfo.credit_cap = user2credits(hinfo->cfg.avg * 
+							hinfo->cfg.burst);
+		dh->rateinfo.cost = user2credits(hinfo->cfg.avg);
+
+		spin_unlock_bh(&hinfo->lock);
+		return 1;
+	}
+
+	/* update expiration timeout */
+	dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
+
+	rateinfo_recalc(dh, now);
+	if (dh->rateinfo.credit >= dh->rateinfo.cost) {
+		/* We're underlimit. */
+		dh->rateinfo.credit -= dh->rateinfo.cost;
+		spin_unlock_bh(&hinfo->lock);
+		return 1;
+	}
+
+       	spin_unlock_bh(&hinfo->lock);
+
+	/* default case: we're overlimit, thus don't match */
+	return 0;
+}
+
+static int
+hashlimit_checkentry(const char *tablename,
+		     const struct ipt_ip *ip,
+		     void *matchinfo,
+		     unsigned int matchsize,
+		     unsigned int hook_mask)
+{
+	struct ipt_hashlimit_info *r = matchinfo;
+
+	if (matchsize != IPT_ALIGN(sizeof(struct ipt_hashlimit_info)))
+		return 0;
+
+	/* Check for overflow. */
+	if (r->cfg.burst == 0
+	    || user2credits(r->cfg.avg * r->cfg.burst) < 
+	    				user2credits(r->cfg.avg)) {
+		printk(KERN_ERR "ipt_hashlimit: Overflow, try lower: %u/%u\n",
+		       r->cfg.avg, r->cfg.burst);
+		return 0;
+	}
+
+	if (r->cfg.mode == 0 
+	    || r->cfg.mode > (IPT_HASHLIMIT_HASH_DPT
+		          |IPT_HASHLIMIT_HASH_DIP
+			  |IPT_HASHLIMIT_HASH_SIP
+			  |IPT_HASHLIMIT_HASH_SPT))
+		return 0;
+
+	if (!r->cfg.gc_interval)
+		return 0;
+	
+	if (!r->cfg.expire)
+		return 0;
+
+	/* This is the best we've got: We cannot release and re-grab lock,
+	 * since checkentry() is called before ip_tables.c grabs ipt_mutex.  
+	 * We also cannot grab the hashtable spinlock, since htable_create will 
+	 * call vmalloc, and that can sleep.  And we cannot just re-search
+	 * the list of htable's in htable_create(), since then we would
+	 * create duplicate proc files. -HW */
+	down(&hlimit_mutex);
+	r->hinfo = htable_find_get(r->name);
+	if (!r->hinfo && (htable_create(r) != 0)) {
+		up(&hlimit_mutex);
+		return 0;
+	}
+	up(&hlimit_mutex);
+
+	/* Ugly hack: For SMP, we only want to use one set */
+	r->u.master = r;
+
+	return 1;
+}
+
+static void
+hashlimit_destroy(void *matchinfo, unsigned int matchsize)
+{
+	struct ipt_hashlimit_info *r = (struct ipt_hashlimit_info *) matchinfo;
+
+	htable_put(r->hinfo);
+}
+
+static struct ipt_match ipt_hashlimit = { 
+	.name = "hashlimit", 
+	.match = hashlimit_match, 
+	.checkentry = hashlimit_checkentry, 
+	.destroy = hashlimit_destroy,
+	.me = THIS_MODULE 
+};
+
+/* PROC stuff */
+
+static void *dl_seq_start(struct seq_file *s, loff_t *pos)
+{
+	struct proc_dir_entry *pde = s->private;
+	struct ipt_hashlimit_htable *htable = pde->data;
+	unsigned int *bucket;
+
+	spin_lock_bh(&htable->lock);
+	if (*pos >= htable->cfg.size)
+		return NULL;
+
+	bucket = kmalloc(sizeof(unsigned int), GFP_ATOMIC);
+	if (!bucket)
+		return ERR_PTR(-ENOMEM);
+
+	*bucket = *pos;
+	return bucket;
+}
+
+static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+	struct proc_dir_entry *pde = s->private;
+	struct ipt_hashlimit_htable *htable = pde->data;
+	unsigned int *bucket = (unsigned int *)v;
+
+	*pos = ++(*bucket);
+	if (*pos >= htable->cfg.size) {
+		kfree(v);
+		return NULL;
+	}
+	return bucket;
+}
+
+static void dl_seq_stop(struct seq_file *s, void *v)
+{
+	struct proc_dir_entry *pde = s->private;
+	struct ipt_hashlimit_htable *htable = pde->data;
+	unsigned int *bucket = (unsigned int *)v;
+
+	kfree(bucket);
+
+	spin_unlock_bh(&htable->lock);
+}
+
+static inline int dl_seq_real_show(struct dsthash_ent *ent, struct seq_file *s)
+{
+	/* recalculate to show accurate numbers */
+	rateinfo_recalc(ent, jiffies);
+
+	return seq_printf(s, "%ld %u.%u.%u.%u:%u->%u.%u.%u.%u:%u %u %u %u\n",
+			(long)(ent->expires - jiffies)/HZ,
+			NIPQUAD(ent->dst.src_ip), ntohs(ent->dst.src_port),
+			NIPQUAD(ent->dst.dst_ip), ntohs(ent->dst.dst_port),
+			ent->rateinfo.credit, ent->rateinfo.credit_cap,
+			ent->rateinfo.cost);
+}
+
+static int dl_seq_show(struct seq_file *s, void *v)
+{
+	struct proc_dir_entry *pde = s->private;
+	struct ipt_hashlimit_htable *htable = pde->data;
+	unsigned int *bucket = (unsigned int *)v;
+	struct dsthash_ent *ent;
+	struct hlist_node *pos;
+
+	if (!hlist_empty(&htable->hash[*bucket]))
+		hlist_for_each_entry(ent, pos, &htable->hash[*bucket], node) {
+			if (dl_seq_real_show(ent, s)) {
+				/* buffer was filled and unable to print that tuple */
+				return 1;
+			}
+		}
+	
+	return 0;
+}
+
+static struct seq_operations dl_seq_ops = {
+	.start = dl_seq_start,
+	.next  = dl_seq_next,
+	.stop  = dl_seq_stop,
+	.show  = dl_seq_show
+};
+
+static int dl_proc_open(struct inode *inode, struct file *file)
+{
+	int ret = seq_open(file, &dl_seq_ops);
+
+	if (!ret) {
+		struct seq_file *sf = file->private_data;
+		sf->private = PDE(inode);
+	}
+	return ret;
+}
+
+static struct file_operations dl_file_ops = {
+	.owner   = THIS_MODULE,
+	.open    = dl_proc_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release
+};
+
+static int init_or_fini(int fini)
+{
+	int ret = 0;
+
+	if (fini)
+		goto cleanup;
+
+	if (ipt_register_match(&ipt_hashlimit)) {
+		ret = -EINVAL;
+		goto cleanup_nothing;
+	}
+
+	hashlimit_cachep = kmem_cache_create("ipt_hashlimit",
+					    sizeof(struct dsthash_ent), 0,
+					    0, NULL, NULL);
+	if (!hashlimit_cachep) {
+		printk(KERN_ERR "Unable to create ipt_hashlimit slab cache\n");
+		ret = -ENOMEM;
+		goto cleanup_unreg_match;
+	}
+
+	hashlimit_procdir = proc_mkdir("ipt_hashlimit", proc_net);
+	if (!hashlimit_procdir) {
+		printk(KERN_ERR "Unable to create proc dir entry\n");
+		ret = -ENOMEM;
+		goto cleanup_free_slab;
+	}
+
+	return ret;
+
+cleanup:
+	remove_proc_entry("ipt_hashlimit", proc_net);
+cleanup_free_slab:
+	kmem_cache_destroy(hashlimit_cachep);
+cleanup_unreg_match:
+	ipt_unregister_match(&ipt_hashlimit);
+cleanup_nothing:
+	return ret;
+	
+}
+
+static int __init init(void)
+{
+	return init_or_fini(0);
+}
+
+static void __exit fini(void)
+{
+	init_or_fini(1);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_helper.c b/net/ipv4/netfilter/ipt_helper.c
new file mode 100644
index 00000000000..33fdf364d3d
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_helper.c
@@ -0,0 +1,113 @@
+/* iptables module to match on related connections */
+/*
+ * (C) 2001 Martin Josefsson <gandalf@wlug.westbo.se>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *   19 Mar 2002 Harald Welte <laforge@gnumonks.org>:
+ *   		 - Port to newnat infrastructure
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_helper.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Martin Josefsson <gandalf@netfilter.org>");
+MODULE_DESCRIPTION("iptables helper match module");
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      int *hotdrop)
+{
+	const struct ipt_helper_info *info = matchinfo;
+	struct ip_conntrack *ct;
+	enum ip_conntrack_info ctinfo;
+	int ret = info->invert;
+	
+	ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
+	if (!ct) {
+		DEBUGP("ipt_helper: Eek! invalid conntrack?\n");
+		return ret;
+	}
+
+	if (!ct->master) {
+		DEBUGP("ipt_helper: conntrack %p has no master\n", ct);
+		return ret;
+	}
+
+	READ_LOCK(&ip_conntrack_lock);
+	if (!ct->master->helper) {
+		DEBUGP("ipt_helper: master ct %p has no helper\n", 
+			exp->expectant);
+		goto out_unlock;
+	}
+
+	DEBUGP("master's name = %s , info->name = %s\n", 
+		ct->master->helper->name, info->name);
+
+	if (info->name[0] == '\0')
+		ret ^= 1;
+	else
+		ret ^= !strncmp(ct->master->helper->name, info->name, 
+		                strlen(ct->master->helper->name));
+out_unlock:
+	READ_UNLOCK(&ip_conntrack_lock);
+	return ret;
+}
+
+static int check(const char *tablename,
+		 const struct ipt_ip *ip,
+		 void *matchinfo,
+		 unsigned int matchsize,
+		 unsigned int hook_mask)
+{
+	struct ipt_helper_info *info = matchinfo;
+
+	info->name[29] = '\0';
+
+	/* verify size */
+	if (matchsize != IPT_ALIGN(sizeof(struct ipt_helper_info)))
+		return 0;
+
+	return 1;
+}
+
+static struct ipt_match helper_match = {
+	.name		= "helper",
+	.match		= &match,
+	.checkentry	= &check,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	need_ip_conntrack();
+	return ipt_register_match(&helper_match);
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_match(&helper_match);
+}
+
+module_init(init);
+module_exit(fini);
+
diff --git a/net/ipv4/netfilter/ipt_iprange.c b/net/ipv4/netfilter/ipt_iprange.c
new file mode 100644
index 00000000000..b835b7b2e56
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_iprange.c
@@ -0,0 +1,99 @@
+/*
+ * iptables module to match IP address ranges
+ *
+ * (C) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_iprange.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_DESCRIPTION("iptables arbitrary IP range match module");
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset, int *hotdrop)
+{
+	const struct ipt_iprange_info *info = matchinfo;
+	const struct iphdr *iph = skb->nh.iph;
+
+	if (info->flags & IPRANGE_SRC) {
+		if (((ntohl(iph->saddr) < ntohl(info->src.min_ip))
+			  || (ntohl(iph->saddr) > ntohl(info->src.max_ip)))
+			 ^ !!(info->flags & IPRANGE_SRC_INV)) {
+			DEBUGP("src IP %u.%u.%u.%u NOT in range %s"
+			       "%u.%u.%u.%u-%u.%u.%u.%u\n",
+				NIPQUAD(iph->saddr),
+			        info->flags & IPRANGE_SRC_INV ? "(INV) " : "",
+				NIPQUAD(info->src.min_ip),
+				NIPQUAD(info->src.max_ip));
+			return 0;
+		}
+	}
+	if (info->flags & IPRANGE_DST) {
+		if (((ntohl(iph->daddr) < ntohl(info->dst.min_ip))
+			  || (ntohl(iph->daddr) > ntohl(info->dst.max_ip)))
+			 ^ !!(info->flags & IPRANGE_DST_INV)) {
+			DEBUGP("dst IP %u.%u.%u.%u NOT in range %s"
+			       "%u.%u.%u.%u-%u.%u.%u.%u\n",
+				NIPQUAD(iph->daddr),
+			        info->flags & IPRANGE_DST_INV ? "(INV) " : "",
+				NIPQUAD(info->dst.min_ip),
+				NIPQUAD(info->dst.max_ip));
+			return 0;
+		}
+	}
+	return 1;
+}
+
+static int check(const char *tablename,
+		 const struct ipt_ip *ip,
+		 void *matchinfo,
+		 unsigned int matchsize,
+		 unsigned int hook_mask)
+{
+	/* verify size */
+	if (matchsize != IPT_ALIGN(sizeof(struct ipt_iprange_info)))
+		return 0;
+
+	return 1;
+}
+
+static struct ipt_match iprange_match = 
+{ 
+	.list = { NULL, NULL }, 
+	.name = "iprange", 
+	.match = &match, 
+	.checkentry = &check, 
+	.destroy = NULL, 
+	.me = THIS_MODULE
+};
+
+static int __init init(void)
+{
+	return ipt_register_match(&iprange_match);
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_match(&iprange_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_length.c b/net/ipv4/netfilter/ipt_length.c
new file mode 100644
index 00000000000..4eabcfbda9d
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_length.c
@@ -0,0 +1,64 @@
+/* Kernel module to match packet length. */
+/* (C) 1999-2001 James Morris <jmorros@intercode.com.au>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter_ipv4/ipt_length.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
+MODULE_DESCRIPTION("IP tables packet length matching module");
+MODULE_LICENSE("GPL");
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      int *hotdrop)
+{
+	const struct ipt_length_info *info = matchinfo;
+	u_int16_t pktlen = ntohs(skb->nh.iph->tot_len);
+	
+	return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
+}
+
+static int
+checkentry(const char *tablename,
+           const struct ipt_ip *ip,
+           void *matchinfo,
+           unsigned int matchsize,
+           unsigned int hook_mask)
+{
+	if (matchsize != IPT_ALIGN(sizeof(struct ipt_length_info)))
+		return 0;
+
+	return 1;
+}
+
+static struct ipt_match length_match = {
+	.name		= "length",
+	.match		= &match,
+	.checkentry	= &checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ipt_register_match(&length_match);
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_match(&length_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_limit.c b/net/ipv4/netfilter/ipt_limit.c
new file mode 100644
index 00000000000..0c24dcc703a
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_limit.c
@@ -0,0 +1,157 @@
+/* Kernel module to control the rate
+ *
+ * 2 September 1999: Changed from the target RATE to the match
+ *                   `limit', removed logging.  Did I mention that
+ *                   Alexey is a fucking genius?
+ *                   Rusty Russell (rusty@rustcorp.com.au).  */
+
+/* (C) 1999 J�r�me de Vivie <devivie@info.enserb.u-bordeaux.fr>
+ * (C) 1999 Herv� Eychenne <eychenne@info.enserb.u-bordeaux.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_limit.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Herve Eychenne <rv@wallfire.org>");
+MODULE_DESCRIPTION("iptables rate limit match");
+
+/* The algorithm used is the Simple Token Bucket Filter (TBF)
+ * see net/sched/sch_tbf.c in the linux source tree
+ */
+
+static DEFINE_SPINLOCK(limit_lock);
+
+/* Rusty: This is my (non-mathematically-inclined) understanding of
+   this algorithm.  The `average rate' in jiffies becomes your initial
+   amount of credit `credit' and the most credit you can ever have
+   `credit_cap'.  The `peak rate' becomes the cost of passing the
+   test, `cost'.
+
+   `prev' tracks the last packet hit: you gain one credit per jiffy.
+   If you get credit balance more than this, the extra credit is
+   discarded.  Every time the match passes, you lose `cost' credits;
+   if you don't have that many, the test fails.
+
+   See Alexey's formal explanation in net/sched/sch_tbf.c.
+
+   To get the maxmum range, we multiply by this factor (ie. you get N
+   credits per jiffy).  We want to allow a rate as low as 1 per day
+   (slowest userspace tool allows), which means
+   CREDITS_PER_JIFFY*HZ*60*60*24 < 2^32. ie. */
+#define MAX_CPJ (0xFFFFFFFF / (HZ*60*60*24))
+
+/* Repeated shift and or gives us all 1s, final shift and add 1 gives
+ * us the power of 2 below the theoretical max, so GCC simply does a
+ * shift. */
+#define _POW2_BELOW2(x) ((x)|((x)>>1))
+#define _POW2_BELOW4(x) (_POW2_BELOW2(x)|_POW2_BELOW2((x)>>2))
+#define _POW2_BELOW8(x) (_POW2_BELOW4(x)|_POW2_BELOW4((x)>>4))
+#define _POW2_BELOW16(x) (_POW2_BELOW8(x)|_POW2_BELOW8((x)>>8))
+#define _POW2_BELOW32(x) (_POW2_BELOW16(x)|_POW2_BELOW16((x)>>16))
+#define POW2_BELOW32(x) ((_POW2_BELOW32(x)>>1) + 1)
+
+#define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
+
+static int
+ipt_limit_match(const struct sk_buff *skb,
+		const struct net_device *in,
+		const struct net_device *out,
+		const void *matchinfo,
+		int offset,
+		int *hotdrop)
+{
+	struct ipt_rateinfo *r = ((struct ipt_rateinfo *)matchinfo)->master;
+	unsigned long now = jiffies;
+
+	spin_lock_bh(&limit_lock);
+	r->credit += (now - xchg(&r->prev, now)) * CREDITS_PER_JIFFY;
+	if (r->credit > r->credit_cap)
+		r->credit = r->credit_cap;
+
+	if (r->credit >= r->cost) {
+		/* We're not limited. */
+		r->credit -= r->cost;
+		spin_unlock_bh(&limit_lock);
+		return 1;
+	}
+
+       	spin_unlock_bh(&limit_lock);
+	return 0;
+}
+
+/* Precision saver. */
+static u_int32_t
+user2credits(u_int32_t user)
+{
+	/* If multiplying would overflow... */
+	if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY))
+		/* Divide first. */
+		return (user / IPT_LIMIT_SCALE) * HZ * CREDITS_PER_JIFFY;
+
+	return (user * HZ * CREDITS_PER_JIFFY) / IPT_LIMIT_SCALE;
+}
+
+static int
+ipt_limit_checkentry(const char *tablename,
+		     const struct ipt_ip *ip,
+		     void *matchinfo,
+		     unsigned int matchsize,
+		     unsigned int hook_mask)
+{
+	struct ipt_rateinfo *r = matchinfo;
+
+	if (matchsize != IPT_ALIGN(sizeof(struct ipt_rateinfo)))
+		return 0;
+
+	/* Check for overflow. */
+	if (r->burst == 0
+	    || user2credits(r->avg * r->burst) < user2credits(r->avg)) {
+		printk("Overflow in ipt_limit, try lower: %u/%u\n",
+		       r->avg, r->burst);
+		return 0;
+	}
+
+	/* User avg in seconds * IPT_LIMIT_SCALE: convert to jiffies *
+	   128. */
+	r->prev = jiffies;
+	r->credit = user2credits(r->avg * r->burst);	 /* Credits full. */
+	r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */
+	r->cost = user2credits(r->avg);
+
+	/* For SMP, we only want to use one set of counters. */
+	r->master = r;
+
+	return 1;
+}
+
+static struct ipt_match ipt_limit_reg = {
+	.name		= "limit",
+	.match		= ipt_limit_match,
+	.checkentry	= ipt_limit_checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	if (ipt_register_match(&ipt_limit_reg))
+		return -EINVAL;
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_match(&ipt_limit_reg);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_mac.c b/net/ipv4/netfilter/ipt_mac.c
new file mode 100644
index 00000000000..11a459e33f2
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_mac.c
@@ -0,0 +1,79 @@
+/* Kernel module to match MAC address parameters. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/if_ether.h>
+
+#include <linux/netfilter_ipv4/ipt_mac.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("iptables mac matching module");
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      int *hotdrop)
+{
+    const struct ipt_mac_info *info = matchinfo;
+
+    /* Is mac pointer valid? */
+    return (skb->mac.raw >= skb->head
+	    && (skb->mac.raw + ETH_HLEN) <= skb->data
+	    /* If so, compare... */
+	    && ((memcmp(eth_hdr(skb)->h_source, info->srcaddr, ETH_ALEN)
+		== 0) ^ info->invert));
+}
+
+static int
+ipt_mac_checkentry(const char *tablename,
+		   const struct ipt_ip *ip,
+		   void *matchinfo,
+		   unsigned int matchsize,
+		   unsigned int hook_mask)
+{
+	/* FORWARD isn't always valid, but it's nice to be able to do --RR */
+	if (hook_mask
+	    & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN)
+		| (1 << NF_IP_FORWARD))) {
+		printk("ipt_mac: only valid for PRE_ROUTING, LOCAL_IN or FORWARD.\n");
+		return 0;
+	}
+
+	if (matchsize != IPT_ALIGN(sizeof(struct ipt_mac_info)))
+		return 0;
+
+	return 1;
+}
+
+static struct ipt_match mac_match = {
+	.name		= "mac",
+	.match		= &match,
+	.checkentry	= &ipt_mac_checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ipt_register_match(&mac_match);
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_match(&mac_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_mark.c b/net/ipv4/netfilter/ipt_mark.c
new file mode 100644
index 00000000000..8955728127b
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_mark.c
@@ -0,0 +1,64 @@
+/* Kernel module to match NFMARK values. */
+
+/* (C) 1999-2001 Marc Boucher <marc@mbsi.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter_ipv4/ipt_mark.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
+MODULE_DESCRIPTION("iptables mark matching module");
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      int *hotdrop)
+{
+	const struct ipt_mark_info *info = matchinfo;
+
+	return ((skb->nfmark & info->mask) == info->mark) ^ info->invert;
+}
+
+static int
+checkentry(const char *tablename,
+           const struct ipt_ip *ip,
+           void *matchinfo,
+           unsigned int matchsize,
+           unsigned int hook_mask)
+{
+	if (matchsize != IPT_ALIGN(sizeof(struct ipt_mark_info)))
+		return 0;
+
+	return 1;
+}
+
+static struct ipt_match mark_match = {
+	.name		= "mark",
+	.match		= &match,
+	.checkentry	= &checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ipt_register_match(&mark_match);
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_match(&mark_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_multiport.c b/net/ipv4/netfilter/ipt_multiport.c
new file mode 100644
index 00000000000..99e8188162e
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_multiport.c
@@ -0,0 +1,212 @@
+/* Kernel module to match one of a list of TCP/UDP ports: ports are in
+   the same place so we can treat them as equal. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/udp.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter_ipv4/ipt_multiport.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("iptables multiple port match module");
+
+#if 0
+#define duprintf(format, args...) printk(format , ## args)
+#else
+#define duprintf(format, args...)
+#endif
+
+/* Returns 1 if the port is matched by the test, 0 otherwise. */
+static inline int
+ports_match(const u_int16_t *portlist, enum ipt_multiport_flags flags,
+	    u_int8_t count, u_int16_t src, u_int16_t dst)
+{
+	unsigned int i;
+	for (i=0; i<count; i++) {
+		if (flags != IPT_MULTIPORT_DESTINATION
+		    && portlist[i] == src)
+			return 1;
+
+		if (flags != IPT_MULTIPORT_SOURCE
+		    && portlist[i] == dst)
+			return 1;
+	}
+
+	return 0;
+}
+
+/* Returns 1 if the port is matched by the test, 0 otherwise. */
+static inline int
+ports_match_v1(const struct ipt_multiport_v1 *minfo,
+	       u_int16_t src, u_int16_t dst)
+{
+	unsigned int i;
+	u_int16_t s, e;
+
+	for (i=0; i < minfo->count; i++) {
+		s = minfo->ports[i];
+
+		if (minfo->pflags[i]) {
+			/* range port matching */
+			e = minfo->ports[++i];
+			duprintf("src or dst matches with %d-%d?\n", s, e);
+
+			if (minfo->flags == IPT_MULTIPORT_SOURCE
+			    && src >= s && src <= e)
+				return 1 ^ minfo->invert;
+			if (minfo->flags == IPT_MULTIPORT_DESTINATION
+			    && dst >= s && dst <= e)
+				return 1 ^ minfo->invert;
+			if (minfo->flags == IPT_MULTIPORT_EITHER
+			    && ((dst >= s && dst <= e)
+				|| (src >= s && src <= e)))
+				return 1 ^ minfo->invert;
+		} else {
+			/* exact port matching */
+			duprintf("src or dst matches with %d?\n", s);
+
+			if (minfo->flags == IPT_MULTIPORT_SOURCE
+			    && src == s)
+				return 1 ^ minfo->invert;
+			if (minfo->flags == IPT_MULTIPORT_DESTINATION
+			    && dst == s)
+				return 1 ^ minfo->invert;
+			if (minfo->flags == IPT_MULTIPORT_EITHER
+			    && (src == s || dst == s))
+				return 1 ^ minfo->invert;
+		}
+	}
+ 
+ 	return minfo->invert;
+}
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      int *hotdrop)
+{
+	u16 _ports[2], *pptr;
+	const struct ipt_multiport *multiinfo = matchinfo;
+
+	if (offset)
+		return 0;
+
+	pptr = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
+				  sizeof(_ports), _ports);
+	if (pptr == NULL) {
+		/* We've been asked to examine this packet, and we
+		 * can't.  Hence, no choice but to drop.
+		 */
+		duprintf("ipt_multiport:"
+			 " Dropping evil offset=0 tinygram.\n");
+		*hotdrop = 1;
+		return 0;
+	}
+
+	return ports_match(multiinfo->ports,
+			   multiinfo->flags, multiinfo->count,
+			   ntohs(pptr[0]), ntohs(pptr[1]));
+}
+
+static int
+match_v1(const struct sk_buff *skb,
+	 const struct net_device *in,
+	 const struct net_device *out,
+	 const void *matchinfo,
+	 int offset,
+	 int *hotdrop)
+{
+	u16 _ports[2], *pptr;
+	const struct ipt_multiport_v1 *multiinfo = matchinfo;
+
+	if (offset)
+		return 0;
+
+	pptr = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
+				  sizeof(_ports), _ports);
+	if (pptr == NULL) {
+		/* We've been asked to examine this packet, and we
+		 * can't.  Hence, no choice but to drop.
+		 */
+		duprintf("ipt_multiport:"
+			 " Dropping evil offset=0 tinygram.\n");
+		*hotdrop = 1;
+		return 0;
+	}
+
+	return ports_match_v1(multiinfo, ntohs(pptr[0]), ntohs(pptr[1]));
+}
+
+/* Called when user tries to insert an entry of this type. */
+static int
+checkentry(const char *tablename,
+	   const struct ipt_ip *ip,
+	   void *matchinfo,
+	   unsigned int matchsize,
+	   unsigned int hook_mask)
+{
+	return (matchsize == IPT_ALIGN(sizeof(struct ipt_multiport)));
+}
+
+static int
+checkentry_v1(const char *tablename,
+	      const struct ipt_ip *ip,
+	      void *matchinfo,
+	      unsigned int matchsize,
+	      unsigned int hook_mask)
+{
+	return (matchsize == IPT_ALIGN(sizeof(struct ipt_multiport_v1)));
+}
+
+static struct ipt_match multiport_match = {
+	.name		= "multiport",
+	.revision	= 0,
+	.match		= &match,
+	.checkentry	= &checkentry,
+	.me		= THIS_MODULE,
+};
+
+static struct ipt_match multiport_match_v1 = {
+	.name		= "multiport",
+	.revision	= 1,
+	.match		= &match_v1,
+	.checkentry	= &checkentry_v1,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	int err;
+
+	err = ipt_register_match(&multiport_match);
+	if (!err) {
+		err = ipt_register_match(&multiport_match_v1);
+		if (err)
+			ipt_unregister_match(&multiport_match);
+	}
+
+	return err;
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_match(&multiport_match);
+	ipt_unregister_match(&multiport_match_v1);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c
new file mode 100644
index 00000000000..3b9065e0638
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_owner.c
@@ -0,0 +1,217 @@
+/* Kernel module to match various things tied to sockets associated with
+   locally generated outgoing packets. */
+
+/* (C) 2000 Marc Boucher <marc@mbsi.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/file.h>
+#include <net/sock.h>
+
+#include <linux/netfilter_ipv4/ipt_owner.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
+MODULE_DESCRIPTION("iptables owner match");
+
+static int
+match_comm(const struct sk_buff *skb, const char *comm)
+{
+	struct task_struct *g, *p;
+	struct files_struct *files;
+	int i;
+
+	read_lock(&tasklist_lock);
+	do_each_thread(g, p) {
+		if(strncmp(p->comm, comm, sizeof(p->comm)))
+			continue;
+
+		task_lock(p);
+		files = p->files;
+		if(files) {
+			spin_lock(&files->file_lock);
+			for (i=0; i < files->max_fds; i++) {
+				if (fcheck_files(files, i) ==
+				    skb->sk->sk_socket->file) {
+					spin_unlock(&files->file_lock);
+					task_unlock(p);
+					read_unlock(&tasklist_lock);
+					return 1;
+				}
+			}
+			spin_unlock(&files->file_lock);
+		}
+		task_unlock(p);
+	} while_each_thread(g, p);
+	read_unlock(&tasklist_lock);
+	return 0;
+}
+
+static int
+match_pid(const struct sk_buff *skb, pid_t pid)
+{
+	struct task_struct *p;
+	struct files_struct *files;
+	int i;
+
+	read_lock(&tasklist_lock);
+	p = find_task_by_pid(pid);
+	if (!p)
+		goto out;
+	task_lock(p);
+	files = p->files;
+	if(files) {
+		spin_lock(&files->file_lock);
+		for (i=0; i < files->max_fds; i++) {
+			if (fcheck_files(files, i) ==
+			    skb->sk->sk_socket->file) {
+				spin_unlock(&files->file_lock);
+				task_unlock(p);
+				read_unlock(&tasklist_lock);
+				return 1;
+			}
+		}
+		spin_unlock(&files->file_lock);
+	}
+	task_unlock(p);
+out:
+	read_unlock(&tasklist_lock);
+	return 0;
+}
+
+static int
+match_sid(const struct sk_buff *skb, pid_t sid)
+{
+	struct task_struct *g, *p;
+	struct file *file = skb->sk->sk_socket->file;
+	int i, found=0;
+
+	read_lock(&tasklist_lock);
+	do_each_thread(g, p) {
+		struct files_struct *files;
+		if (p->signal->session != sid)
+			continue;
+
+		task_lock(p);
+		files = p->files;
+		if (files) {
+			spin_lock(&files->file_lock);
+			for (i=0; i < files->max_fds; i++) {
+				if (fcheck_files(files, i) == file) {
+					found = 1;
+					break;
+				}
+			}
+			spin_unlock(&files->file_lock);
+		}
+		task_unlock(p);
+		if (found)
+			goto out;
+	} while_each_thread(g, p);
+out:
+	read_unlock(&tasklist_lock);
+
+	return found;
+}
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      int *hotdrop)
+{
+	const struct ipt_owner_info *info = matchinfo;
+
+	if (!skb->sk || !skb->sk->sk_socket || !skb->sk->sk_socket->file)
+		return 0;
+
+	if(info->match & IPT_OWNER_UID) {
+		if ((skb->sk->sk_socket->file->f_uid != info->uid) ^
+		    !!(info->invert & IPT_OWNER_UID))
+			return 0;
+	}
+
+	if(info->match & IPT_OWNER_GID) {
+		if ((skb->sk->sk_socket->file->f_gid != info->gid) ^
+		    !!(info->invert & IPT_OWNER_GID))
+			return 0;
+	}
+
+	if(info->match & IPT_OWNER_PID) {
+		if (!match_pid(skb, info->pid) ^
+		    !!(info->invert & IPT_OWNER_PID))
+			return 0;
+	}
+
+	if(info->match & IPT_OWNER_SID) {
+		if (!match_sid(skb, info->sid) ^
+		    !!(info->invert & IPT_OWNER_SID))
+			return 0;
+	}
+
+	if(info->match & IPT_OWNER_COMM) {
+		if (!match_comm(skb, info->comm) ^
+		    !!(info->invert & IPT_OWNER_COMM))
+			return 0;
+	}
+
+	return 1;
+}
+
+static int
+checkentry(const char *tablename,
+           const struct ipt_ip *ip,
+           void *matchinfo,
+           unsigned int matchsize,
+           unsigned int hook_mask)
+{
+        if (hook_mask
+            & ~((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_POST_ROUTING))) {
+                printk("ipt_owner: only valid for LOCAL_OUT or POST_ROUTING.\n");
+                return 0;
+        }
+
+	if (matchsize != IPT_ALIGN(sizeof(struct ipt_owner_info))) {
+		printk("Matchsize %u != %Zu\n", matchsize,
+		       IPT_ALIGN(sizeof(struct ipt_owner_info)));
+		return 0;
+	}
+#ifdef CONFIG_SMP
+	/* files->file_lock can not be used in a BH */
+	if (((struct ipt_owner_info *)matchinfo)->match
+	    & (IPT_OWNER_PID|IPT_OWNER_SID|IPT_OWNER_COMM)) {
+		printk("ipt_owner: pid, sid and command matching is broken "
+		       "on SMP.\n");
+		return 0;
+	}
+#endif
+	return 1;
+}
+
+static struct ipt_match owner_match = {
+	.name		= "owner",
+	.match		= &match,
+	.checkentry	= &checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ipt_register_match(&owner_match);
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_match(&owner_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_physdev.c b/net/ipv4/netfilter/ipt_physdev.c
new file mode 100644
index 00000000000..1a53924041f
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_physdev.c
@@ -0,0 +1,134 @@
+/* Kernel module to match the bridge port in and
+ * out device for IP packets coming into contact with a bridge. */
+
+/* (C) 2001-2003 Bart De Schuymer <bdschuym@pandora.be>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter_ipv4/ipt_physdev.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_bridge.h>
+#define MATCH   1
+#define NOMATCH 0
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
+MODULE_DESCRIPTION("iptables bridge physical device match module");
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      int *hotdrop)
+{
+	int i;
+	static const char nulldevname[IFNAMSIZ];
+	const struct ipt_physdev_info *info = matchinfo;
+	unsigned int ret;
+	const char *indev, *outdev;
+	struct nf_bridge_info *nf_bridge;
+
+	/* Not a bridged IP packet or no info available yet:
+	 * LOCAL_OUT/mangle and LOCAL_OUT/nat don't know if
+	 * the destination device will be a bridge. */
+	if (!(nf_bridge = skb->nf_bridge)) {
+		/* Return MATCH if the invert flags of the used options are on */
+		if ((info->bitmask & IPT_PHYSDEV_OP_BRIDGED) &&
+		    !(info->invert & IPT_PHYSDEV_OP_BRIDGED))
+			return NOMATCH;
+		if ((info->bitmask & IPT_PHYSDEV_OP_ISIN) &&
+		    !(info->invert & IPT_PHYSDEV_OP_ISIN))
+			return NOMATCH;
+		if ((info->bitmask & IPT_PHYSDEV_OP_ISOUT) &&
+		    !(info->invert & IPT_PHYSDEV_OP_ISOUT))
+			return NOMATCH;
+		if ((info->bitmask & IPT_PHYSDEV_OP_IN) &&
+		    !(info->invert & IPT_PHYSDEV_OP_IN))
+			return NOMATCH;
+		if ((info->bitmask & IPT_PHYSDEV_OP_OUT) &&
+		    !(info->invert & IPT_PHYSDEV_OP_OUT))
+			return NOMATCH;
+		return MATCH;
+	}
+
+	/* This only makes sense in the FORWARD and POSTROUTING chains */
+	if ((info->bitmask & IPT_PHYSDEV_OP_BRIDGED) &&
+	    (!!(nf_bridge->mask & BRNF_BRIDGED) ^
+	    !(info->invert & IPT_PHYSDEV_OP_BRIDGED)))
+		return NOMATCH;
+
+	if ((info->bitmask & IPT_PHYSDEV_OP_ISIN &&
+	    (!nf_bridge->physindev ^ !!(info->invert & IPT_PHYSDEV_OP_ISIN))) ||
+	    (info->bitmask & IPT_PHYSDEV_OP_ISOUT &&
+	    (!nf_bridge->physoutdev ^ !!(info->invert & IPT_PHYSDEV_OP_ISOUT))))
+		return NOMATCH;
+
+	if (!(info->bitmask & IPT_PHYSDEV_OP_IN))
+		goto match_outdev;
+	indev = nf_bridge->physindev ? nf_bridge->physindev->name : nulldevname;
+	for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned int); i++) {
+		ret |= (((const unsigned int *)indev)[i]
+			^ ((const unsigned int *)info->physindev)[i])
+			& ((const unsigned int *)info->in_mask)[i];
+	}
+
+	if ((ret == 0) ^ !(info->invert & IPT_PHYSDEV_OP_IN))
+		return NOMATCH;
+
+match_outdev:
+	if (!(info->bitmask & IPT_PHYSDEV_OP_OUT))
+		return MATCH;
+	outdev = nf_bridge->physoutdev ?
+		 nf_bridge->physoutdev->name : nulldevname;
+	for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned int); i++) {
+		ret |= (((const unsigned int *)outdev)[i]
+			^ ((const unsigned int *)info->physoutdev)[i])
+			& ((const unsigned int *)info->out_mask)[i];
+	}
+
+	return (ret != 0) ^ !(info->invert & IPT_PHYSDEV_OP_OUT);
+}
+
+static int
+checkentry(const char *tablename,
+		       const struct ipt_ip *ip,
+		       void *matchinfo,
+		       unsigned int matchsize,
+		       unsigned int hook_mask)
+{
+	const struct ipt_physdev_info *info = matchinfo;
+
+	if (matchsize != IPT_ALIGN(sizeof(struct ipt_physdev_info)))
+		return 0;
+	if (!(info->bitmask & IPT_PHYSDEV_OP_MASK) ||
+	    info->bitmask & ~IPT_PHYSDEV_OP_MASK)
+		return 0;
+	return 1;
+}
+
+static struct ipt_match physdev_match = {
+	.name		= "physdev",
+	.match		= &match,
+	.checkentry	= &checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ipt_register_match(&physdev_match);
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_match(&physdev_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_pkttype.c b/net/ipv4/netfilter/ipt_pkttype.c
new file mode 100644
index 00000000000..8ddb1dc5e5a
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_pkttype.c
@@ -0,0 +1,70 @@
+/* (C) 1999-2001 Michal Ludvig <michal@logix.cz>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+
+#include <linux/netfilter_ipv4/ipt_pkttype.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Michal Ludvig <michal@logix.cz>");
+MODULE_DESCRIPTION("IP tables match to match on linklayer packet type");
+
+static int match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      int *hotdrop)
+{
+    const struct ipt_pkttype_info *info = matchinfo;
+
+    return (skb->pkt_type == info->pkttype) ^ info->invert;
+}
+
+static int checkentry(const char *tablename,
+		   const struct ipt_ip *ip,
+		   void *matchinfo,
+		   unsigned int matchsize,
+		   unsigned int hook_mask)
+{
+/*
+	if (hook_mask
+	    & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN)
+		| (1 << NF_IP_FORWARD))) {
+		printk("ipt_pkttype: only valid for PRE_ROUTING, LOCAL_IN or FORWARD.\n");
+		return 0;
+	}
+*/
+	if (matchsize != IPT_ALIGN(sizeof(struct ipt_pkttype_info)))
+		return 0;
+
+	return 1;
+}
+
+static struct ipt_match pkttype_match = {
+	.name		= "pkttype",
+	.match		= &match,
+	.checkentry	= &checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ipt_register_match(&pkttype_match);
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_match(&pkttype_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_realm.c b/net/ipv4/netfilter/ipt_realm.c
new file mode 100644
index 00000000000..54a6897ebaa
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_realm.c
@@ -0,0 +1,76 @@
+/* IP tables module for matching the routing realm
+ *
+ * $Id: ipt_realm.c,v 1.3 2004/03/05 13:25:40 laforge Exp $
+ *
+ * (C) 2003 by Sampsa Ranta <sampsa@netsonic.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <net/route.h>
+
+#include <linux/netfilter_ipv4/ipt_realm.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+MODULE_AUTHOR("Sampsa Ranta <sampsa@netsonic.fi>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("iptables realm match");
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      int *hotdrop)
+{
+	const struct ipt_realm_info *info = matchinfo;
+	struct dst_entry *dst = skb->dst;
+    
+	return (info->id == (dst->tclassid & info->mask)) ^ info->invert;
+}
+
+static int check(const char *tablename,
+                 const struct ipt_ip *ip,
+                 void *matchinfo,
+                 unsigned int matchsize,
+                 unsigned int hook_mask)
+{
+	if (hook_mask
+	    & ~((1 << NF_IP_POST_ROUTING) | (1 << NF_IP_FORWARD) |
+	        (1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_LOCAL_IN))) {
+		printk("ipt_realm: only valid for POST_ROUTING, LOCAL_OUT, "
+		       "LOCAL_IN or FORWARD.\n");
+		return 0;
+	}
+	if (matchsize != IPT_ALIGN(sizeof(struct ipt_realm_info))) {
+		printk("ipt_realm: invalid matchsize.\n");
+		return 0;
+	}
+	return 1;
+}
+
+static struct ipt_match realm_match = {
+	.name		= "realm",
+	.match		= match, 
+	.checkentry	= check,
+	.me		= THIS_MODULE
+};
+
+static int __init init(void)
+{
+	return ipt_register_match(&realm_match);
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_match(&realm_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
new file mode 100644
index 00000000000..25ab9fabdcb
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -0,0 +1,1002 @@
+/* Kernel module to check if the source address has been seen recently. */
+/* Copyright 2002-2003, Stephen Frost, 2.5.x port by laforge@netfilter.org */
+/* Author: Stephen Frost <sfrost@snowman.net> */
+/* Project Page: http://snowman.net/projects/ipt_recent/ */
+/* This software is distributed under the terms of the GPL, Version 2 */
+/* This copyright does not cover user programs that use kernel services
+ * by normal system calls. */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <asm/uaccess.h>
+#include <linux/ctype.h>
+#include <linux/ip.h>
+#include <linux/vmalloc.h>
+#include <linux/moduleparam.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_recent.h>
+
+#undef DEBUG
+#define HASH_LOG 9
+
+/* Defaults, these can be overridden on the module command-line. */
+static int ip_list_tot = 100;
+static int ip_pkt_list_tot = 20;
+static int ip_list_hash_size = 0;
+static int ip_list_perms = 0644;
+#ifdef DEBUG
+static int debug = 1;
+#endif
+
+static char version[] =
+KERN_INFO RECENT_NAME " " RECENT_VER ": Stephen Frost <sfrost@snowman.net>.  http://snowman.net/projects/ipt_recent/\n";
+
+MODULE_AUTHOR("Stephen Frost <sfrost@snowman.net>");
+MODULE_DESCRIPTION("IP tables recently seen matching module " RECENT_VER);
+MODULE_LICENSE("GPL");
+module_param(ip_list_tot, int, 0400);
+module_param(ip_pkt_list_tot, int, 0400);
+module_param(ip_list_hash_size, int, 0400);
+module_param(ip_list_perms, int, 0400);
+#ifdef DEBUG
+module_param(debug, int, 0600);
+MODULE_PARM_DESC(debug,"debugging level, defaults to 1");
+#endif
+MODULE_PARM_DESC(ip_list_tot,"number of IPs to remember per list");
+MODULE_PARM_DESC(ip_pkt_list_tot,"number of packets per IP to remember");
+MODULE_PARM_DESC(ip_list_hash_size,"size of hash table used to look up IPs");
+MODULE_PARM_DESC(ip_list_perms,"permissions on /proc/net/ipt_recent/* files");
+
+/* Structure of our list of recently seen addresses. */
+struct recent_ip_list {
+	u_int32_t addr;
+	u_int8_t  ttl;
+	unsigned long last_seen;
+	unsigned long *last_pkts;
+	u_int32_t oldest_pkt;
+	u_int32_t hash_entry;
+	u_int32_t time_pos;
+};
+
+struct time_info_list {
+	u_int32_t position;
+	u_int32_t time;
+};
+
+/* Structure of our linked list of tables of recent lists. */
+struct recent_ip_tables {
+	char name[IPT_RECENT_NAME_LEN];
+	int count;
+	int time_pos;
+	struct recent_ip_list *table;
+	struct recent_ip_tables *next;
+	spinlock_t list_lock;
+	int *hash_table;
+	struct time_info_list *time_info;
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry *status_proc;
+#endif /* CONFIG_PROC_FS */
+};
+
+/* Our current list of addresses we have recently seen.
+ * Only added to on a --set, and only updated on --set || --update 
+ */
+static struct recent_ip_tables *r_tables = NULL;
+
+/* We protect r_list with this spinlock so two processors are not modifying
+ * the list at the same time. 
+ */
+static DEFINE_SPINLOCK(recent_lock);
+
+#ifdef CONFIG_PROC_FS
+/* Our /proc/net/ipt_recent entry */
+static struct proc_dir_entry *proc_net_ipt_recent = NULL;
+#endif
+
+/* Function declaration for later. */
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      int *hotdrop);
+
+/* Function to hash a given address into the hash table of table_size size */
+static int hash_func(unsigned int addr, int table_size)
+{
+	int result = 0;
+	unsigned int value = addr;
+	do { result ^= value; } while((value >>= HASH_LOG));
+
+#ifdef DEBUG
+	if(debug) printk(KERN_INFO RECENT_NAME ": %d = hash_func(%u,%d)\n",
+			 result & (table_size - 1),
+			 addr,
+			 table_size);
+#endif
+
+	return(result & (table_size - 1));
+}
+
+#ifdef CONFIG_PROC_FS
+/* This is the function which produces the output for our /proc output
+ * interface which lists each IP address, the last seen time and the 
+ * other recent times the address was seen.
+ */
+
+static int ip_recent_get_info(char *buffer, char **start, off_t offset, int length, int *eof, void *data)
+{
+	int len = 0, count, last_len = 0, pkt_count;
+	off_t pos = 0;
+	off_t begin = 0;
+	struct recent_ip_tables *curr_table;
+
+	curr_table = (struct recent_ip_tables*) data;
+
+	spin_lock_bh(&curr_table->list_lock);
+	for(count = 0; count < ip_list_tot; count++) {
+		if(!curr_table->table[count].addr) continue;
+		last_len = len;
+		len += sprintf(buffer+len,"src=%u.%u.%u.%u ",NIPQUAD(curr_table->table[count].addr));
+		len += sprintf(buffer+len,"ttl: %u ",curr_table->table[count].ttl);
+		len += sprintf(buffer+len,"last_seen: %lu ",curr_table->table[count].last_seen);
+		len += sprintf(buffer+len,"oldest_pkt: %u ",curr_table->table[count].oldest_pkt);
+		len += sprintf(buffer+len,"last_pkts: %lu",curr_table->table[count].last_pkts[0]);
+		for(pkt_count = 1; pkt_count < ip_pkt_list_tot; pkt_count++) {
+			if(!curr_table->table[count].last_pkts[pkt_count]) break;
+			len += sprintf(buffer+len,", %lu",curr_table->table[count].last_pkts[pkt_count]);
+		}
+		len += sprintf(buffer+len,"\n");
+		pos = begin + len;
+		if(pos < offset) { len = 0; begin = pos; }
+		if(pos > offset + length) { len = last_len; break; }
+	}
+
+	*start = buffer + (offset - begin);
+	len -= (offset - begin);
+	if(len > length) len = length;
+
+	spin_unlock_bh(&curr_table->list_lock);
+	return len;
+}
+
+/* ip_recent_ctrl provides an interface for users to modify the table
+ * directly.  This allows adding entries, removing entries, and
+ * flushing the entire table.
+ * This is done by opening up the appropriate table for writing and
+ * sending one of:
+ * xx.xx.xx.xx   -- Add entry to table with current time
+ * +xx.xx.xx.xx  -- Add entry to table with current time
+ * -xx.xx.xx.xx  -- Remove entry from table
+ * clear         -- Flush table, remove all entries
+ */
+
+static int ip_recent_ctrl(struct file *file, const char __user *input, unsigned long size, void *data)
+{
+	static const u_int32_t max[4] = { 0xffffffff, 0xffffff, 0xffff, 0xff };
+	u_int32_t val;
+	int base, used = 0;
+	char c, *cp;
+	union iaddr {
+		uint8_t bytes[4];
+		uint32_t word;
+	} res;
+	uint8_t *pp = res.bytes;
+	int digit;
+
+	char buffer[20];
+	int len, check_set = 0, count;
+	u_int32_t addr = 0;
+	struct sk_buff *skb;
+	struct ipt_recent_info *info;
+	struct recent_ip_tables *curr_table;
+
+	curr_table = (struct recent_ip_tables*) data;
+
+	if(size > 20) len = 20; else len = size;
+
+	if(copy_from_user(buffer,input,len)) return -EFAULT;
+
+	if(len < 20) buffer[len] = '\0';
+
+#ifdef DEBUG
+	if(debug) printk(KERN_INFO RECENT_NAME ": ip_recent_ctrl len: %d, input: `%.20s'\n",len,buffer);
+#endif
+
+	cp = buffer;
+	while(isspace(*cp)) { cp++; used++; if(used >= len-5) return used; }
+
+	/* Check if we are asked to flush the entire table */
+	if(!memcmp(cp,"clear",5)) {
+		used += 5;
+		spin_lock_bh(&curr_table->list_lock);
+		curr_table->time_pos = 0;
+		for(count = 0; count < ip_list_hash_size; count++) {
+			curr_table->hash_table[count] = -1;
+		}
+		for(count = 0; count < ip_list_tot; count++) {
+			curr_table->table[count].last_seen = 0;
+			curr_table->table[count].addr = 0;
+			curr_table->table[count].ttl = 0;
+			memset(curr_table->table[count].last_pkts,0,ip_pkt_list_tot*sizeof(u_int32_t));
+			curr_table->table[count].oldest_pkt = 0;
+			curr_table->table[count].time_pos = 0;
+			curr_table->time_info[count].position = count;
+			curr_table->time_info[count].time = 0;
+		}
+		spin_unlock_bh(&curr_table->list_lock);
+		return used;
+	}
+
+        check_set = IPT_RECENT_SET;
+	switch(*cp) {
+		case '+': check_set = IPT_RECENT_SET; cp++; used++; break;
+		case '-': check_set = IPT_RECENT_REMOVE; cp++; used++; break;
+		default: if(!isdigit(*cp)) return (used+1); break;
+	}
+
+#ifdef DEBUG
+	if(debug) printk(KERN_INFO RECENT_NAME ": ip_recent_ctrl cp: `%c', check_set: %d\n",*cp,check_set);
+#endif
+	/* Get addr (effectively inet_aton()) */
+	/* Shamelessly stolen from libc, a function in the kernel for doing
+	 * this would, of course, be greatly preferred, but our options appear
+	 * to be rather limited, so we will just do it ourselves here.
+	 */
+	res.word = 0;
+
+	c = *cp;
+	for(;;) {
+		if(!isdigit(c)) return used;
+		val = 0; base = 10; digit = 0;
+		if(c == '0') {
+			c = *++cp;
+			if(c == 'x' || c == 'X') base = 16, c = *++cp;
+			else { base = 8; digit = 1; }
+		}
+		for(;;) {
+			if(isascii(c) && isdigit(c)) {
+				if(base == 8 && (c == '8' || c == '0')) return used;
+				val = (val * base) + (c - '0');
+				c = *++cp;
+				digit = 1;
+			} else if(base == 16 && isascii(c) && isxdigit(c)) {
+				val = (val << 4) | (c + 10 - (islower(c) ? 'a' : 'A'));
+				c = *++cp;
+				digit = 1;
+			} else break;
+		}
+		if(c == '.') {
+			if(pp > res.bytes + 2 || val > 0xff) return used;
+			*pp++ = val;
+			c = *++cp;
+		} else break;
+	}
+	used = cp - buffer;
+	if(c != '\0' && (!isascii(c) || !isspace(c))) return used;
+	if(c == '\n') used++;
+	if(!digit) return used;
+
+	if(val > max[pp - res.bytes]) return used;
+	addr = res.word | htonl(val);
+
+	if(!addr && check_set == IPT_RECENT_SET) return used;
+
+#ifdef DEBUG
+	if(debug) printk(KERN_INFO RECENT_NAME ": ip_recent_ctrl c: %c, addr: %u used: %d\n",c,addr,used);
+#endif
+
+	/* Set up and just call match */
+	info = kmalloc(sizeof(struct ipt_recent_info),GFP_KERNEL);
+	if(!info) { return -ENOMEM; }
+	info->seconds = 0;
+	info->hit_count = 0;
+	info->check_set = check_set;
+	info->invert = 0;
+	info->side = IPT_RECENT_SOURCE;
+	strncpy(info->name,curr_table->name,IPT_RECENT_NAME_LEN);
+	info->name[IPT_RECENT_NAME_LEN-1] = '\0';
+
+	skb = kmalloc(sizeof(struct sk_buff),GFP_KERNEL);
+	if (!skb) {
+		used = -ENOMEM;
+		goto out_free_info;
+	}
+	skb->nh.iph = kmalloc(sizeof(struct iphdr),GFP_KERNEL);
+	if (!skb->nh.iph) {
+		used = -ENOMEM;
+		goto out_free_skb;
+	}
+
+	skb->nh.iph->saddr = addr;
+	skb->nh.iph->daddr = 0;
+	/* Clear ttl since we have no way of knowing it */
+	skb->nh.iph->ttl = 0;
+	match(skb,NULL,NULL,info,0,NULL);
+
+	kfree(skb->nh.iph);
+out_free_skb:
+	kfree(skb);
+out_free_info:
+	kfree(info);
+
+#ifdef DEBUG
+	if(debug) printk(KERN_INFO RECENT_NAME ": Leaving ip_recent_ctrl addr: %u used: %d\n",addr,used);
+#endif
+	return used;
+}
+
+#endif /* CONFIG_PROC_FS */
+
+/* 'match' is our primary function, called by the kernel whenever a rule is
+ * hit with our module as an option to it.
+ * What this function does depends on what was specifically asked of it by
+ * the user:
+ * --set -- Add or update last seen time of the source address of the packet
+ *   -- matchinfo->check_set == IPT_RECENT_SET
+ * --rcheck -- Just check if the source address is in the list
+ *   -- matchinfo->check_set == IPT_RECENT_CHECK
+ * --update -- If the source address is in the list, update last_seen
+ *   -- matchinfo->check_set == IPT_RECENT_UPDATE
+ * --remove -- If the source address is in the list, remove it
+ *   -- matchinfo->check_set == IPT_RECENT_REMOVE
+ * --seconds -- Option to --rcheck/--update, only match if last_seen within seconds
+ *   -- matchinfo->seconds
+ * --hitcount -- Option to --rcheck/--update, only match if seen hitcount times
+ *   -- matchinfo->hit_count
+ * --seconds and --hitcount can be combined
+ */
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      int *hotdrop)
+{
+	int pkt_count, hits_found, ans;
+	unsigned long now;
+	const struct ipt_recent_info *info = matchinfo;
+	u_int32_t addr = 0, time_temp;
+	u_int8_t ttl = skb->nh.iph->ttl;
+	int *hash_table;
+	int orig_hash_result, hash_result, temp, location = 0, time_loc, end_collision_chain = -1;
+	struct time_info_list *time_info;
+	struct recent_ip_tables *curr_table;
+	struct recent_ip_tables *last_table;
+	struct recent_ip_list *r_list;
+
+#ifdef DEBUG
+	if(debug) printk(KERN_INFO RECENT_NAME ": match() called\n");
+#endif
+
+	/* Default is false ^ info->invert */
+	ans = info->invert;
+
+#ifdef DEBUG
+	if(debug) printk(KERN_INFO RECENT_NAME ": match(): name = '%s'\n",info->name);
+#endif
+
+	/* if out != NULL then routing has been done and TTL changed.
+	 * We change it back here internally for match what came in before routing. */
+	if(out) ttl++;
+
+	/* Find the right table */
+	spin_lock_bh(&recent_lock);
+	curr_table = r_tables;
+	while( (last_table = curr_table) && strncmp(info->name,curr_table->name,IPT_RECENT_NAME_LEN) && (curr_table = curr_table->next) );
+
+#ifdef DEBUG
+	if(debug) printk(KERN_INFO RECENT_NAME ": match(): table found('%s')\n",info->name);
+#endif
+
+	spin_unlock_bh(&recent_lock);
+
+	/* Table with this name not found, match impossible */
+	if(!curr_table) { return ans; }
+
+	/* Make sure no one is changing the list while we work with it */
+	spin_lock_bh(&curr_table->list_lock);
+
+	r_list = curr_table->table;
+	if(info->side == IPT_RECENT_DEST) addr = skb->nh.iph->daddr; else addr = skb->nh.iph->saddr;
+
+	if(!addr) { 
+#ifdef DEBUG
+		if(debug) printk(KERN_INFO RECENT_NAME ": match() address (%u) invalid, leaving.\n",addr);
+#endif
+		spin_unlock_bh(&curr_table->list_lock);
+		return ans;
+	}
+
+#ifdef DEBUG
+	if(debug) printk(KERN_INFO RECENT_NAME ": match(): checking table, addr: %u, ttl: %u, orig_ttl: %u\n",addr,ttl,skb->nh.iph->ttl);
+#endif
+
+	/* Get jiffies now in case they changed while we were waiting for a lock */
+	now = jiffies;
+	hash_table = curr_table->hash_table;
+	time_info = curr_table->time_info;
+
+	orig_hash_result = hash_result = hash_func(addr,ip_list_hash_size);
+	/* Hash entry at this result used */
+	/* Check for TTL match if requested.  If TTL is zero then a match would never
+	 * happen, so match regardless of existing TTL in that case.  Zero means the
+	 * entry was added via the /proc interface anyway, so we will just use the
+	 * first TTL we get for that IP address. */
+	if(info->check_set & IPT_RECENT_TTL) {
+		while(hash_table[hash_result] != -1 && !(r_list[hash_table[hash_result]].addr == addr &&
+			(!r_list[hash_table[hash_result]].ttl || r_list[hash_table[hash_result]].ttl == ttl))) {
+			/* Collision in hash table */
+			hash_result = (hash_result + 1) % ip_list_hash_size;
+		}
+	} else {
+		while(hash_table[hash_result] != -1 && r_list[hash_table[hash_result]].addr != addr) {
+			/* Collision in hash table */
+			hash_result = (hash_result + 1) % ip_list_hash_size;
+		}
+	}
+
+	if(hash_table[hash_result] == -1 && !(info->check_set & IPT_RECENT_SET)) {
+		/* IP not in list and not asked to SET */
+		spin_unlock_bh(&curr_table->list_lock);
+		return ans;
+	}
+
+	/* Check if we need to handle the collision, do not need to on REMOVE */
+	if(orig_hash_result != hash_result && !(info->check_set & IPT_RECENT_REMOVE)) {
+#ifdef DEBUG
+		if(debug) printk(KERN_INFO RECENT_NAME ": match(): Collision in hash table. (or: %d,hr: %d,oa: %u,ha: %u)\n",
+				 orig_hash_result,
+				 hash_result,
+				 r_list[hash_table[orig_hash_result]].addr,
+				 addr);
+#endif
+
+		/* We had a collision.
+		 * orig_hash_result is where we started, hash_result is where we ended up.
+		 * So, swap them because we are likely to see the same guy again sooner */
+#ifdef DEBUG
+		if(debug) {
+		  printk(KERN_INFO RECENT_NAME ": match(): Collision; hash_table[orig_hash_result] = %d\n",hash_table[orig_hash_result]);
+		  printk(KERN_INFO RECENT_NAME ": match(): Collision; r_list[hash_table[orig_hash_result]].hash_entry = %d\n",
+				r_list[hash_table[orig_hash_result]].hash_entry);
+		}
+#endif
+
+		r_list[hash_table[orig_hash_result]].hash_entry = hash_result;
+
+
+		temp = hash_table[orig_hash_result];
+#ifdef DEBUG
+		if(debug) printk(KERN_INFO RECENT_NAME ": match(): Collision; hash_table[hash_result] = %d\n",hash_table[hash_result]);
+#endif
+		hash_table[orig_hash_result] = hash_table[hash_result];
+		hash_table[hash_result] = temp;
+		temp = hash_result;
+		hash_result = orig_hash_result;
+		orig_hash_result = temp;
+		time_info[r_list[hash_table[orig_hash_result]].time_pos].position = hash_table[orig_hash_result];
+		if(hash_table[hash_result] != -1) {
+			r_list[hash_table[hash_result]].hash_entry = hash_result;
+			time_info[r_list[hash_table[hash_result]].time_pos].position = hash_table[hash_result];
+		}
+
+#ifdef DEBUG
+		if(debug) printk(KERN_INFO RECENT_NAME ": match(): Collision handled.\n");
+#endif
+	}
+
+	if(hash_table[hash_result] == -1) {
+#ifdef DEBUG
+		if(debug) printk(KERN_INFO RECENT_NAME ": match(): New table entry. (hr: %d,ha: %u)\n",
+				 hash_result, addr);
+#endif
+
+		/* New item found and IPT_RECENT_SET, so we need to add it */
+		location = time_info[curr_table->time_pos].position;
+		hash_table[r_list[location].hash_entry] = -1;
+		hash_table[hash_result] = location;
+		memset(r_list[location].last_pkts,0,ip_pkt_list_tot*sizeof(u_int32_t));
+		r_list[location].time_pos = curr_table->time_pos;
+		r_list[location].addr = addr;
+		r_list[location].ttl = ttl;
+		r_list[location].last_seen = now;
+		r_list[location].oldest_pkt = 1;
+		r_list[location].last_pkts[0] = now;
+		r_list[location].hash_entry = hash_result;
+		time_info[curr_table->time_pos].time = r_list[location].last_seen;
+		curr_table->time_pos = (curr_table->time_pos + 1) % ip_list_tot;
+
+		ans = !info->invert;
+	} else {
+#ifdef DEBUG
+		if(debug) printk(KERN_INFO RECENT_NAME ": match(): Existing table entry. (hr: %d,ha: %u)\n",
+				 hash_result,
+				 addr);
+#endif
+
+		/* Existing item found */
+		location = hash_table[hash_result];
+		/* We have a match on address, now to make sure it meets all requirements for a
+		 * full match. */
+		if(info->check_set & IPT_RECENT_CHECK || info->check_set & IPT_RECENT_UPDATE) {
+			if(!info->seconds && !info->hit_count) ans = !info->invert; else ans = info->invert;
+			if(info->seconds && !info->hit_count) {
+				if(time_before_eq(now,r_list[location].last_seen+info->seconds*HZ)) ans = !info->invert; else ans = info->invert;
+			}
+			if(info->seconds && info->hit_count) {
+				for(pkt_count = 0, hits_found = 0; pkt_count < ip_pkt_list_tot; pkt_count++) {
+					if(time_before_eq(now,r_list[location].last_pkts[pkt_count]+info->seconds*HZ)) hits_found++;
+				}
+				if(hits_found >= info->hit_count) ans = !info->invert; else ans = info->invert;
+			}
+			if(info->hit_count && !info->seconds) {
+				for(pkt_count = 0, hits_found = 0; pkt_count < ip_pkt_list_tot; pkt_count++) {
+					if(r_list[location].last_pkts[pkt_count] == 0) break;
+					hits_found++;
+				}
+				if(hits_found >= info->hit_count) ans = !info->invert; else ans = info->invert;
+			}
+		}
+#ifdef DEBUG
+		if(debug) {
+			if(ans)
+				printk(KERN_INFO RECENT_NAME ": match(): match addr: %u\n",addr);
+			else
+				printk(KERN_INFO RECENT_NAME ": match(): no match addr: %u\n",addr);
+		}
+#endif
+
+		/* If and only if we have been asked to SET, or to UPDATE (on match) do we add the
+		 * current timestamp to the last_seen. */
+		if((info->check_set & IPT_RECENT_SET && (ans = !info->invert)) || (info->check_set & IPT_RECENT_UPDATE && ans)) {
+#ifdef DEBUG
+			if(debug) printk(KERN_INFO RECENT_NAME ": match(): SET or UPDATE; updating time info.\n");
+#endif
+			/* Have to update our time info */
+			time_loc = r_list[location].time_pos;
+			time_info[time_loc].time = now;
+			time_info[time_loc].position = location;
+			while((time_info[(time_loc+1) % ip_list_tot].time < time_info[time_loc].time) && ((time_loc+1) % ip_list_tot) != curr_table->time_pos) {
+				time_temp = time_info[time_loc].time;
+				time_info[time_loc].time = time_info[(time_loc+1)%ip_list_tot].time;
+				time_info[(time_loc+1)%ip_list_tot].time = time_temp;
+				time_temp = time_info[time_loc].position;
+				time_info[time_loc].position = time_info[(time_loc+1)%ip_list_tot].position;
+				time_info[(time_loc+1)%ip_list_tot].position = time_temp;
+				r_list[time_info[time_loc].position].time_pos = time_loc;
+				r_list[time_info[(time_loc+1)%ip_list_tot].position].time_pos = (time_loc+1)%ip_list_tot;
+				time_loc = (time_loc+1) % ip_list_tot;
+			}
+			r_list[location].time_pos = time_loc;
+			r_list[location].ttl = ttl;
+			r_list[location].last_pkts[r_list[location].oldest_pkt] = now;
+			r_list[location].oldest_pkt = ++r_list[location].oldest_pkt % ip_pkt_list_tot;
+			r_list[location].last_seen = now;
+		}
+		/* If we have been asked to remove the entry from the list, just set it to 0 */
+		if(info->check_set & IPT_RECENT_REMOVE) {
+#ifdef DEBUG
+			if(debug) printk(KERN_INFO RECENT_NAME ": match(): REMOVE; clearing entry (or: %d, hr: %d).\n",orig_hash_result,hash_result);
+#endif
+			/* Check if this is part of a collision chain */
+			while(hash_table[(orig_hash_result+1) % ip_list_hash_size] != -1) {
+				orig_hash_result++;
+				if(hash_func(r_list[hash_table[orig_hash_result]].addr,ip_list_hash_size) == hash_result) {
+					/* Found collision chain, how deep does this rabbit hole go? */
+#ifdef DEBUG
+					if(debug) printk(KERN_INFO RECENT_NAME ": match(): REMOVE; found collision chain.\n");
+#endif
+					end_collision_chain = orig_hash_result;
+				}
+			}
+			if(end_collision_chain != -1) {
+#ifdef DEBUG
+				if(debug) printk(KERN_INFO RECENT_NAME ": match(): REMOVE; part of collision chain, moving to end.\n");
+#endif
+				/* Part of a collision chain, swap it with the end of the chain
+				 * before removing. */
+				r_list[hash_table[end_collision_chain]].hash_entry = hash_result;
+				temp = hash_table[end_collision_chain];
+				hash_table[end_collision_chain] = hash_table[hash_result];
+				hash_table[hash_result] = temp;
+				time_info[r_list[hash_table[hash_result]].time_pos].position = hash_table[hash_result];
+				hash_result = end_collision_chain;
+				r_list[hash_table[hash_result]].hash_entry = hash_result;
+				time_info[r_list[hash_table[hash_result]].time_pos].position = hash_table[hash_result];
+			}
+			location = hash_table[hash_result];
+			hash_table[r_list[location].hash_entry] = -1;
+			time_loc = r_list[location].time_pos;
+			time_info[time_loc].time = 0;
+			time_info[time_loc].position = location;
+			while((time_info[(time_loc+1) % ip_list_tot].time < time_info[time_loc].time) && ((time_loc+1) % ip_list_tot) != curr_table->time_pos) {
+				time_temp = time_info[time_loc].time;
+				time_info[time_loc].time = time_info[(time_loc+1)%ip_list_tot].time;
+				time_info[(time_loc+1)%ip_list_tot].time = time_temp;
+				time_temp = time_info[time_loc].position;
+				time_info[time_loc].position = time_info[(time_loc+1)%ip_list_tot].position;
+				time_info[(time_loc+1)%ip_list_tot].position = time_temp;
+				r_list[time_info[time_loc].position].time_pos = time_loc;
+				r_list[time_info[(time_loc+1)%ip_list_tot].position].time_pos = (time_loc+1)%ip_list_tot;
+				time_loc = (time_loc+1) % ip_list_tot;
+			}
+			r_list[location].time_pos = time_loc;
+			r_list[location].last_seen = 0;
+			r_list[location].addr = 0;
+			r_list[location].ttl = 0;
+			memset(r_list[location].last_pkts,0,ip_pkt_list_tot*sizeof(u_int32_t));
+			r_list[location].oldest_pkt = 0;
+			ans = !info->invert;
+		}
+		spin_unlock_bh(&curr_table->list_lock);
+		return ans;
+	}
+
+	spin_unlock_bh(&curr_table->list_lock);
+#ifdef DEBUG
+	if(debug) printk(KERN_INFO RECENT_NAME ": match() left.\n");
+#endif
+	return ans;
+}
+
+/* This function is to verify that the rule given during the userspace iptables
+ * command is correct.
+ * If the command is valid then we check if the table name referred to by the
+ * rule exists, if not it is created.
+ */
+static int
+checkentry(const char *tablename,
+           const struct ipt_ip *ip,
+           void *matchinfo,
+           unsigned int matchsize,
+           unsigned int hook_mask)
+{
+	int flag = 0, c;
+	unsigned long *hold;
+	const struct ipt_recent_info *info = matchinfo;
+	struct recent_ip_tables *curr_table, *find_table, *last_table;
+
+#ifdef DEBUG
+	if(debug) printk(KERN_INFO RECENT_NAME ": checkentry() entered.\n");
+#endif
+
+	if (matchsize != IPT_ALIGN(sizeof(struct ipt_recent_info))) return 0;
+
+	/* seconds and hit_count only valid for CHECK/UPDATE */
+	if(info->check_set & IPT_RECENT_SET) { flag++; if(info->seconds || info->hit_count) return 0; }
+	if(info->check_set & IPT_RECENT_REMOVE) { flag++; if(info->seconds || info->hit_count) return 0; }
+	if(info->check_set & IPT_RECENT_CHECK) flag++;
+	if(info->check_set & IPT_RECENT_UPDATE) flag++;
+
+	/* One and only one of these should ever be set */
+	if(flag != 1) return 0;
+
+	/* Name must be set to something */
+	if(!info->name || !info->name[0]) return 0;
+
+	/* Things look good, create a list for this if it does not exist */
+	/* Lock the linked list while we play with it */
+	spin_lock_bh(&recent_lock);
+
+	/* Look for an entry with this name already created */
+	/* Finds the end of the list and the entry before the end if current name does not exist */
+	find_table = r_tables;
+	while( (last_table = find_table) && strncmp(info->name,find_table->name,IPT_RECENT_NAME_LEN) && (find_table = find_table->next) );
+
+	/* If a table already exists just increment the count on that table and return */
+	if(find_table) { 
+#ifdef DEBUG
+		if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: table found (%s), incrementing count.\n",info->name);
+#endif
+		find_table->count++;
+		spin_unlock_bh(&recent_lock);
+		return 1;
+	}
+
+	spin_unlock_bh(&recent_lock);
+
+	/* Table with this name not found */
+	/* Allocate memory for new linked list item */
+
+#ifdef DEBUG
+	if(debug) {
+		printk(KERN_INFO RECENT_NAME ": checkentry: no table found (%s)\n",info->name);
+		printk(KERN_INFO RECENT_NAME ": checkentry: Allocationg %d for link-list entry.\n",sizeof(struct recent_ip_tables));
+	}
+#endif
+
+	curr_table = vmalloc(sizeof(struct recent_ip_tables));
+	if(curr_table == NULL) return 0;
+
+	spin_lock_init(&curr_table->list_lock);
+	curr_table->next = NULL;
+	curr_table->count = 1;
+	curr_table->time_pos = 0;
+	strncpy(curr_table->name,info->name,IPT_RECENT_NAME_LEN);
+	curr_table->name[IPT_RECENT_NAME_LEN-1] = '\0';
+
+	/* Allocate memory for this table and the list of packets in each entry. */
+#ifdef DEBUG
+	if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: Allocating %d for table (%s).\n",
+			sizeof(struct recent_ip_list)*ip_list_tot,
+			info->name);
+#endif
+
+	curr_table->table = vmalloc(sizeof(struct recent_ip_list)*ip_list_tot);
+	if(curr_table->table == NULL) { vfree(curr_table); return 0; }
+	memset(curr_table->table,0,sizeof(struct recent_ip_list)*ip_list_tot);
+#ifdef DEBUG
+	if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: Allocating %d for pkt_list.\n",
+			sizeof(u_int32_t)*ip_pkt_list_tot*ip_list_tot);
+#endif
+
+	hold = vmalloc(sizeof(u_int32_t)*ip_pkt_list_tot*ip_list_tot);
+#ifdef DEBUG
+	if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: After pkt_list allocation.\n");
+#endif
+	if(hold == NULL) { 
+		printk(KERN_INFO RECENT_NAME ": checkentry: unable to allocate for pkt_list.\n");
+		vfree(curr_table->table); 
+		vfree(curr_table);
+		return 0;
+	}
+	for(c = 0; c < ip_list_tot; c++) {
+		curr_table->table[c].last_pkts = hold + c*ip_pkt_list_tot;
+	}
+
+	/* Allocate memory for the hash table */
+#ifdef DEBUG
+	if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: Allocating %d for hash_table.\n",
+			sizeof(int)*ip_list_hash_size);
+#endif
+
+	curr_table->hash_table = vmalloc(sizeof(int)*ip_list_hash_size);
+	if(!curr_table->hash_table) {
+		printk(KERN_INFO RECENT_NAME ": checkentry: unable to allocate for hash_table.\n");
+		vfree(hold);
+		vfree(curr_table->table); 
+		vfree(curr_table);
+		return 0;
+	}
+
+	for(c = 0; c < ip_list_hash_size; c++) {
+		curr_table->hash_table[c] = -1;
+	}
+
+	/* Allocate memory for the time info */
+#ifdef DEBUG
+	if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: Allocating %d for time_info.\n",
+			sizeof(struct time_info_list)*ip_list_tot);
+#endif
+
+	curr_table->time_info = vmalloc(sizeof(struct time_info_list)*ip_list_tot);
+	if(!curr_table->time_info) {
+		printk(KERN_INFO RECENT_NAME ": checkentry: unable to allocate for time_info.\n");
+		vfree(curr_table->hash_table);
+		vfree(hold);
+		vfree(curr_table->table); 
+		vfree(curr_table);
+		return 0;
+	}
+	for(c = 0; c < ip_list_tot; c++) {
+		curr_table->time_info[c].position = c;
+		curr_table->time_info[c].time = 0;
+	}
+
+	/* Put the new table in place */
+	spin_lock_bh(&recent_lock);
+	find_table = r_tables;
+	while( (last_table = find_table) && strncmp(info->name,find_table->name,IPT_RECENT_NAME_LEN) && (find_table = find_table->next) );
+
+	/* If a table already exists just increment the count on that table and return */
+	if(find_table) { 
+		find_table->count++;	
+		spin_unlock_bh(&recent_lock);
+#ifdef DEBUG
+		if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: table found (%s), created by other process.\n",info->name);
+#endif
+		vfree(curr_table->time_info);
+		vfree(curr_table->hash_table);
+		vfree(hold);
+		vfree(curr_table->table);
+		vfree(curr_table);
+		return 1;
+	}
+	if(!last_table) r_tables = curr_table; else last_table->next = curr_table;
+
+	spin_unlock_bh(&recent_lock);
+
+#ifdef CONFIG_PROC_FS
+	/* Create our proc 'status' entry. */
+	curr_table->status_proc = create_proc_entry(curr_table->name, ip_list_perms, proc_net_ipt_recent);
+	if (!curr_table->status_proc) {
+		printk(KERN_INFO RECENT_NAME ": checkentry: unable to allocate for /proc entry.\n");
+		/* Destroy the created table */
+		spin_lock_bh(&recent_lock);
+		last_table = NULL;
+		curr_table = r_tables;
+		if(!curr_table) {
+#ifdef DEBUG
+			if(debug) printk(KERN_INFO RECENT_NAME ": checkentry() create_proc failed, no tables.\n");
+#endif
+			spin_unlock_bh(&recent_lock);
+			return 0;
+		}
+		while( strncmp(info->name,curr_table->name,IPT_RECENT_NAME_LEN) && (last_table = curr_table) && (curr_table = curr_table->next) );
+		if(!curr_table) {
+#ifdef DEBUG
+			if(debug) printk(KERN_INFO RECENT_NAME ": checkentry() create_proc failed, table already destroyed.\n");
+#endif
+			spin_unlock_bh(&recent_lock);
+			return 0;
+		}
+		if(last_table) last_table->next = curr_table->next; else r_tables = curr_table->next;
+		spin_unlock_bh(&recent_lock);
+		vfree(curr_table->time_info);
+		vfree(curr_table->hash_table);
+		vfree(hold);
+		vfree(curr_table->table);
+		vfree(curr_table);
+		return 0;
+	}
+	
+	curr_table->status_proc->owner = THIS_MODULE;
+	curr_table->status_proc->data = curr_table;
+	wmb();
+	curr_table->status_proc->read_proc = ip_recent_get_info;
+	curr_table->status_proc->write_proc = ip_recent_ctrl;
+#endif /* CONFIG_PROC_FS */
+
+#ifdef DEBUG
+	if(debug) printk(KERN_INFO RECENT_NAME ": checkentry() left.\n");
+#endif
+
+	return 1;
+}
+
+/* This function is called in the event that a rule matching this module is
+ * removed.
+ * When this happens we need to check if there are no other rules matching
+ * the table given.  If that is the case then we remove the table and clean
+ * up its memory.
+ */
+static void
+destroy(void *matchinfo, unsigned int matchsize)
+{
+	const struct ipt_recent_info *info = matchinfo;
+	struct recent_ip_tables *curr_table, *last_table;
+
+#ifdef DEBUG
+	if(debug) printk(KERN_INFO RECENT_NAME ": destroy() entered.\n");
+#endif
+
+	if(matchsize != IPT_ALIGN(sizeof(struct ipt_recent_info))) return;
+
+	/* Lock the linked list while we play with it */
+	spin_lock_bh(&recent_lock);
+
+	/* Look for an entry with this name already created */
+	/* Finds the end of the list and the entry before the end if current name does not exist */
+	last_table = NULL;
+	curr_table = r_tables;
+	if(!curr_table) { 
+#ifdef DEBUG
+		if(debug) printk(KERN_INFO RECENT_NAME ": destroy() No tables found, leaving.\n");
+#endif
+		spin_unlock_bh(&recent_lock);
+		return;
+	}
+	while( strncmp(info->name,curr_table->name,IPT_RECENT_NAME_LEN) && (last_table = curr_table) && (curr_table = curr_table->next) );
+
+	/* If a table does not exist then do nothing and return */
+	if(!curr_table) { 
+#ifdef DEBUG
+		if(debug) printk(KERN_INFO RECENT_NAME ": destroy() table not found, leaving.\n");
+#endif
+		spin_unlock_bh(&recent_lock);
+		return;
+	}
+
+	curr_table->count--;
+
+	/* If count is still non-zero then there are still rules referenceing it so we do nothing */
+	if(curr_table->count) { 
+#ifdef DEBUG
+		if(debug) printk(KERN_INFO RECENT_NAME ": destroy() table found, non-zero count, leaving.\n");
+#endif
+		spin_unlock_bh(&recent_lock);
+		return;
+	}
+
+#ifdef DEBUG
+	if(debug) printk(KERN_INFO RECENT_NAME ": destroy() table found, zero count, removing.\n");
+#endif
+
+	/* Count must be zero so we remove this table from the list */
+	if(last_table) last_table->next = curr_table->next; else r_tables = curr_table->next;
+
+	spin_unlock_bh(&recent_lock);
+
+	/* lock to make sure any late-runners still using this after we removed it from
+	 * the list finish up then remove everything */
+	spin_lock_bh(&curr_table->list_lock);
+	spin_unlock_bh(&curr_table->list_lock);
+
+#ifdef CONFIG_PROC_FS
+	if(curr_table->status_proc) remove_proc_entry(curr_table->name,proc_net_ipt_recent);
+#endif /* CONFIG_PROC_FS */
+	vfree(curr_table->table[0].last_pkts);
+	vfree(curr_table->table);
+	vfree(curr_table->hash_table);
+	vfree(curr_table->time_info);
+	vfree(curr_table);
+
+#ifdef DEBUG
+	if(debug) printk(KERN_INFO RECENT_NAME ": destroy() left.\n");
+#endif
+
+	return;
+}
+
+/* This is the structure we pass to ipt_register to register our
+ * module with iptables.
+ */
+static struct ipt_match recent_match = { 
+  .name = "recent", 
+  .match = &match, 
+  .checkentry = &checkentry, 
+  .destroy = &destroy, 
+  .me = THIS_MODULE
+};
+
+/* Kernel module initialization. */
+static int __init init(void)
+{
+	int err, count;
+
+	printk(version);
+#ifdef CONFIG_PROC_FS
+	proc_net_ipt_recent = proc_mkdir("ipt_recent",proc_net);
+	if(!proc_net_ipt_recent) return -ENOMEM;
+#endif
+
+	if(ip_list_hash_size && ip_list_hash_size <= ip_list_tot) {
+	  printk(KERN_WARNING RECENT_NAME ": ip_list_hash_size too small, resetting to default.\n");
+	  ip_list_hash_size = 0;
+	}
+
+	if(!ip_list_hash_size) {
+		ip_list_hash_size = ip_list_tot*3;
+		count = 2*2;
+		while(ip_list_hash_size > count) count = count*2;
+		ip_list_hash_size = count;
+	}
+
+#ifdef DEBUG
+	if(debug) printk(KERN_INFO RECENT_NAME ": ip_list_hash_size: %d\n",ip_list_hash_size);
+#endif
+
+	err = ipt_register_match(&recent_match);
+	if (err)
+		remove_proc_entry("ipt_recent", proc_net);
+	return err;
+}
+
+/* Kernel module destruction. */
+static void __exit fini(void)
+{
+	ipt_unregister_match(&recent_match);
+
+	remove_proc_entry("ipt_recent",proc_net);
+}
+
+/* Register our module with the kernel. */
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_sctp.c b/net/ipv4/netfilter/ipt_sctp.c
new file mode 100644
index 00000000000..fe2b327bcaa
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_sctp.c
@@ -0,0 +1,203 @@
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/ip.h>
+#include <linux/sctp.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_sctp.h>
+
+#ifdef DEBUG_SCTP
+#define duprintf(format, args...) printk(format , ## args)
+#else
+#define duprintf(format, args...)
+#endif
+
+#define SCCHECK(cond, option, flag, invflag) (!((flag) & (option)) \
+					      || (!!((invflag) & (option)) ^ (cond)))
+
+static int
+match_flags(const struct ipt_sctp_flag_info *flag_info,
+	    const int flag_count,
+	    u_int8_t chunktype,
+	    u_int8_t chunkflags)
+{
+	int i;
+
+	for (i = 0; i < flag_count; i++) {
+		if (flag_info[i].chunktype == chunktype) {
+			return (chunkflags & flag_info[i].flag_mask) == flag_info[i].flag;
+		}
+	}
+
+	return 1;
+}
+
+static int
+match_packet(const struct sk_buff *skb,
+	     const u_int32_t *chunkmap,
+	     int chunk_match_type,
+	     const struct ipt_sctp_flag_info *flag_info,
+	     const int flag_count,
+	     int *hotdrop)
+{
+	int offset;
+	u_int32_t chunkmapcopy[256 / sizeof (u_int32_t)];
+	sctp_chunkhdr_t _sch, *sch;
+
+#ifdef DEBUG_SCTP
+	int i = 0;
+#endif
+
+	if (chunk_match_type == SCTP_CHUNK_MATCH_ALL) {
+		SCTP_CHUNKMAP_COPY(chunkmapcopy, chunkmap);
+	}
+
+	offset = skb->nh.iph->ihl * 4 + sizeof (sctp_sctphdr_t);
+	do {
+		sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch);
+		if (sch == NULL) {
+			duprintf("Dropping invalid SCTP packet.\n");
+			*hotdrop = 1;
+			return 0;
+        	}
+
+		duprintf("Chunk num: %d\toffset: %d\ttype: %d\tlength: %d\tflags: %x\n", 
+				++i, offset, sch->type, htons(sch->length), sch->flags);
+
+		offset += (htons(sch->length) + 3) & ~3;
+
+		duprintf("skb->len: %d\toffset: %d\n", skb->len, offset);
+
+		if (SCTP_CHUNKMAP_IS_SET(chunkmap, sch->type)) {
+			switch (chunk_match_type) {
+			case SCTP_CHUNK_MATCH_ANY:
+				if (match_flags(flag_info, flag_count, 
+					sch->type, sch->flags)) {
+					return 1;
+				}
+				break;
+
+			case SCTP_CHUNK_MATCH_ALL:
+				if (match_flags(flag_info, flag_count, 
+					sch->type, sch->flags)) {
+					SCTP_CHUNKMAP_CLEAR(chunkmapcopy, sch->type);
+				}
+				break;
+
+			case SCTP_CHUNK_MATCH_ONLY:
+				if (!match_flags(flag_info, flag_count, 
+					sch->type, sch->flags)) {
+					return 0;
+				}
+				break;
+			}
+		} else {
+			switch (chunk_match_type) {
+			case SCTP_CHUNK_MATCH_ONLY:
+				return 0;
+			}
+		}
+	} while (offset < skb->len);
+
+	switch (chunk_match_type) {
+	case SCTP_CHUNK_MATCH_ALL:
+		return SCTP_CHUNKMAP_IS_CLEAR(chunkmap);
+	case SCTP_CHUNK_MATCH_ANY:
+		return 0;
+	case SCTP_CHUNK_MATCH_ONLY:
+		return 1;
+	}
+
+	/* This will never be reached, but required to stop compiler whine */
+	return 0;
+}
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      int *hotdrop)
+{
+	const struct ipt_sctp_info *info;
+	sctp_sctphdr_t _sh, *sh;
+
+	info = (const struct ipt_sctp_info *)matchinfo;
+
+	if (offset) {
+		duprintf("Dropping non-first fragment.. FIXME\n");
+		return 0;
+	}
+	
+	sh = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_sh), &_sh);
+	if (sh == NULL) {
+		duprintf("Dropping evil TCP offset=0 tinygram.\n");
+		*hotdrop = 1;
+		return 0;
+       	}
+	duprintf("spt: %d\tdpt: %d\n", ntohs(sh->source), ntohs(sh->dest));
+
+	return  SCCHECK(((ntohs(sh->source) >= info->spts[0]) 
+			&& (ntohs(sh->source) <= info->spts[1])), 
+		   	IPT_SCTP_SRC_PORTS, info->flags, info->invflags)
+		&& SCCHECK(((ntohs(sh->dest) >= info->dpts[0]) 
+			&& (ntohs(sh->dest) <= info->dpts[1])), 
+			IPT_SCTP_DEST_PORTS, info->flags, info->invflags)
+		&& SCCHECK(match_packet(skb, info->chunkmap, info->chunk_match_type,
+ 					info->flag_info, info->flag_count, 
+					hotdrop),
+			   IPT_SCTP_CHUNK_TYPES, info->flags, info->invflags);
+}
+
+static int
+checkentry(const char *tablename,
+	   const struct ipt_ip *ip,
+	   void *matchinfo,
+	   unsigned int matchsize,
+	   unsigned int hook_mask)
+{
+	const struct ipt_sctp_info *info;
+
+	info = (const struct ipt_sctp_info *)matchinfo;
+
+	return ip->proto == IPPROTO_SCTP
+		&& !(ip->invflags & IPT_INV_PROTO)
+		&& matchsize == IPT_ALIGN(sizeof(struct ipt_sctp_info))
+		&& !(info->flags & ~IPT_SCTP_VALID_FLAGS)
+		&& !(info->invflags & ~IPT_SCTP_VALID_FLAGS)
+		&& !(info->invflags & ~info->flags)
+		&& ((!(info->flags & IPT_SCTP_CHUNK_TYPES)) || 
+			(info->chunk_match_type &
+				(SCTP_CHUNK_MATCH_ALL 
+				| SCTP_CHUNK_MATCH_ANY
+				| SCTP_CHUNK_MATCH_ONLY)));
+}
+
+static struct ipt_match sctp_match = 
+{ 
+	.list = { NULL, NULL},
+	.name = "sctp",
+	.match = &match,
+	.checkentry = &checkentry,
+	.destroy = NULL,
+	.me = THIS_MODULE
+};
+
+static int __init init(void)
+{
+	return ipt_register_match(&sctp_match);
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_match(&sctp_match);
+}
+
+module_init(init);
+module_exit(fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Kiran Kumar Immidi");
+MODULE_DESCRIPTION("Match for SCTP protocol packets");
+
diff --git a/net/ipv4/netfilter/ipt_state.c b/net/ipv4/netfilter/ipt_state.c
new file mode 100644
index 00000000000..b1511b97ea5
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_state.c
@@ -0,0 +1,74 @@
+/* Kernel module to match connection tracking information. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_state.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
+MODULE_DESCRIPTION("iptables connection tracking state match module");
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      int *hotdrop)
+{
+	const struct ipt_state_info *sinfo = matchinfo;
+	enum ip_conntrack_info ctinfo;
+	unsigned int statebit;
+
+	if (skb->nfct == &ip_conntrack_untracked.ct_general)
+		statebit = IPT_STATE_UNTRACKED;
+	else if (!ip_conntrack_get(skb, &ctinfo))
+		statebit = IPT_STATE_INVALID;
+	else
+		statebit = IPT_STATE_BIT(ctinfo);
+
+	return (sinfo->statemask & statebit);
+}
+
+static int check(const char *tablename,
+		 const struct ipt_ip *ip,
+		 void *matchinfo,
+		 unsigned int matchsize,
+		 unsigned int hook_mask)
+{
+	if (matchsize != IPT_ALIGN(sizeof(struct ipt_state_info)))
+		return 0;
+
+	return 1;
+}
+
+static struct ipt_match state_match = {
+	.name		= "state",
+	.match		= &match,
+	.checkentry	= &check,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	need_ip_conntrack();
+	return ipt_register_match(&state_match);
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_match(&state_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_tcpmss.c b/net/ipv4/netfilter/ipt_tcpmss.c
new file mode 100644
index 00000000000..4dc9b16ab4a
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_tcpmss.c
@@ -0,0 +1,127 @@
+/* Kernel module to match TCP MSS values. */
+
+/* Copyright (C) 2000 Marc Boucher <marc@mbsi.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter_ipv4/ipt_tcpmss.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+#define TH_SYN 0x02
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
+MODULE_DESCRIPTION("iptables TCP MSS match module");
+
+/* Returns 1 if the mss option is set and matched by the range, 0 otherwise */
+static inline int
+mssoption_match(u_int16_t min, u_int16_t max,
+		const struct sk_buff *skb,
+		int invert,
+		int *hotdrop)
+{
+	struct tcphdr _tcph, *th;
+	/* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
+	u8 _opt[15 * 4 - sizeof(_tcph)], *op;
+	unsigned int i, optlen;
+
+	/* If we don't have the whole header, drop packet. */
+	th = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
+				sizeof(_tcph), &_tcph);
+	if (th == NULL)
+		goto dropit;
+
+	/* Malformed. */
+	if (th->doff*4 < sizeof(*th))
+		goto dropit;
+
+	optlen = th->doff*4 - sizeof(*th);
+	if (!optlen)
+		goto out;
+
+	/* Truncated options. */
+	op = skb_header_pointer(skb, skb->nh.iph->ihl * 4 + sizeof(*th),
+				optlen, _opt);
+	if (op == NULL)
+		goto dropit;
+
+	for (i = 0; i < optlen; ) {
+		if (op[i] == TCPOPT_MSS
+		    && (optlen - i) >= TCPOLEN_MSS
+		    && op[i+1] == TCPOLEN_MSS) {
+			u_int16_t mssval;
+
+			mssval = (op[i+2] << 8) | op[i+3];
+			
+			return (mssval >= min && mssval <= max) ^ invert;
+		}
+		if (op[i] < 2) i++;
+		else i += op[i+1]?:1;
+	}
+out:
+	return invert;
+
+ dropit:
+	*hotdrop = 1;
+	return 0;
+}
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      int *hotdrop)
+{
+	const struct ipt_tcpmss_match_info *info = matchinfo;
+
+	return mssoption_match(info->mss_min, info->mss_max, skb,
+			       info->invert, hotdrop);
+}
+
+static int
+checkentry(const char *tablename,
+           const struct ipt_ip *ip,
+           void *matchinfo,
+           unsigned int matchsize,
+           unsigned int hook_mask)
+{
+	if (matchsize != IPT_ALIGN(sizeof(struct ipt_tcpmss_match_info)))
+		return 0;
+
+	/* Must specify -p tcp */
+	if (ip->proto != IPPROTO_TCP || (ip->invflags & IPT_INV_PROTO)) {
+		printk("tcpmss: Only works on TCP packets\n");
+		return 0;
+	}
+
+	return 1;
+}
+
+static struct ipt_match tcpmss_match = {
+	.name		= "tcpmss",
+	.match		= &match,
+	.checkentry	= &checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ipt_register_match(&tcpmss_match);
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_match(&tcpmss_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_tos.c b/net/ipv4/netfilter/ipt_tos.c
new file mode 100644
index 00000000000..086a1bb61e3
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_tos.c
@@ -0,0 +1,64 @@
+/* Kernel module to match TOS values. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter_ipv4/ipt_tos.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("iptables TOS match module");
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      int *hotdrop)
+{
+	const struct ipt_tos_info *info = matchinfo;
+
+	return (skb->nh.iph->tos == info->tos) ^ info->invert;
+}
+
+static int
+checkentry(const char *tablename,
+           const struct ipt_ip *ip,
+           void *matchinfo,
+           unsigned int matchsize,
+           unsigned int hook_mask)
+{
+	if (matchsize != IPT_ALIGN(sizeof(struct ipt_tos_info)))
+		return 0;
+
+	return 1;
+}
+
+static struct ipt_match tos_match = {
+	.name		= "tos",
+	.match		= &match,
+	.checkentry	= &checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ipt_register_match(&tos_match);
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_match(&tos_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c
new file mode 100644
index 00000000000..219aa9de88c
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_ttl.c
@@ -0,0 +1,79 @@
+/* IP tables module for matching the value of the TTL 
+ *
+ * ipt_ttl.c,v 1.5 2000/11/13 11:16:08 laforge Exp
+ *
+ * (C) 2000,2001 by Harald Welte <laforge@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter_ipv4/ipt_ttl.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("IP tables TTL matching module");
+MODULE_LICENSE("GPL");
+
+static int match(const struct sk_buff *skb, const struct net_device *in,
+		 const struct net_device *out, const void *matchinfo,
+		 int offset, int *hotdrop)
+{
+	const struct ipt_ttl_info *info = matchinfo;
+
+	switch (info->mode) {
+		case IPT_TTL_EQ:
+			return (skb->nh.iph->ttl == info->ttl);
+			break;
+		case IPT_TTL_NE:
+			return (!(skb->nh.iph->ttl == info->ttl));
+			break;
+		case IPT_TTL_LT:
+			return (skb->nh.iph->ttl < info->ttl);
+			break;
+		case IPT_TTL_GT:
+			return (skb->nh.iph->ttl > info->ttl);
+			break;
+		default:
+			printk(KERN_WARNING "ipt_ttl: unknown mode %d\n", 
+				info->mode);
+			return 0;
+	}
+
+	return 0;
+}
+
+static int checkentry(const char *tablename, const struct ipt_ip *ip,
+		      void *matchinfo, unsigned int matchsize,
+		      unsigned int hook_mask)
+{
+	if (matchsize != IPT_ALIGN(sizeof(struct ipt_ttl_info)))
+		return 0;
+
+	return 1;
+}
+
+static struct ipt_match ttl_match = {
+	.name		= "ttl",
+	.match		= &match,
+	.checkentry	= &checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ipt_register_match(&ttl_match);
+}
+
+static void __exit fini(void)
+{
+	ipt_unregister_match(&ttl_match);
+
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
new file mode 100644
index 00000000000..260a4f0a2a9
--- /dev/null
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -0,0 +1,194 @@
+/*
+ * This is the 1999 rewrite of IP Firewalling, aiming for kernel 2.3.x.
+ *
+ * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
+ * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("iptables filter table");
+
+#define FILTER_VALID_HOOKS ((1 << NF_IP_LOCAL_IN) | (1 << NF_IP_FORWARD) | (1 << NF_IP_LOCAL_OUT))
+
+static struct
+{
+	struct ipt_replace repl;
+	struct ipt_standard entries[3];
+	struct ipt_error term;
+} initial_table __initdata 
+= { { "filter", FILTER_VALID_HOOKS, 4,
+      sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
+      { [NF_IP_LOCAL_IN] = 0,
+	[NF_IP_FORWARD] = sizeof(struct ipt_standard),
+	[NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 },
+      { [NF_IP_LOCAL_IN] = 0,
+	[NF_IP_FORWARD] = sizeof(struct ipt_standard),
+	[NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 },
+      0, NULL, { } },
+    {
+	    /* LOCAL_IN */
+	    { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
+		0,
+		sizeof(struct ipt_entry),
+		sizeof(struct ipt_standard),
+		0, { 0, 0 }, { } },
+	      { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
+		-NF_ACCEPT - 1 } },
+	    /* FORWARD */
+	    { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
+		0,
+		sizeof(struct ipt_entry),
+		sizeof(struct ipt_standard),
+		0, { 0, 0 }, { } },
+	      { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
+		-NF_ACCEPT - 1 } },
+	    /* LOCAL_OUT */
+	    { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
+		0,
+		sizeof(struct ipt_entry),
+		sizeof(struct ipt_standard),
+		0, { 0, 0 }, { } },
+	      { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
+		-NF_ACCEPT - 1 } }
+    },
+    /* ERROR */
+    { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
+	0,
+	sizeof(struct ipt_entry),
+	sizeof(struct ipt_error),
+	0, { 0, 0 }, { } },
+      { { { { IPT_ALIGN(sizeof(struct ipt_error_target)), IPT_ERROR_TARGET } },
+	  { } },
+	"ERROR"
+      }
+    }
+};
+
+static struct ipt_table packet_filter = {
+	.name		= "filter",
+	.valid_hooks	= FILTER_VALID_HOOKS,
+	.lock		= RW_LOCK_UNLOCKED,
+	.me		= THIS_MODULE
+};
+
+/* The work comes in here from netfilter.c. */
+static unsigned int
+ipt_hook(unsigned int hook,
+	 struct sk_buff **pskb,
+	 const struct net_device *in,
+	 const struct net_device *out,
+	 int (*okfn)(struct sk_buff *))
+{
+	return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL);
+}
+
+static unsigned int
+ipt_local_out_hook(unsigned int hook,
+		   struct sk_buff **pskb,
+		   const struct net_device *in,
+		   const struct net_device *out,
+		   int (*okfn)(struct sk_buff *))
+{
+	/* root is playing with raw sockets. */
+	if ((*pskb)->len < sizeof(struct iphdr)
+	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
+		if (net_ratelimit())
+			printk("ipt_hook: happy cracking.\n");
+		return NF_ACCEPT;
+	}
+
+	return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL);
+}
+
+static struct nf_hook_ops ipt_ops[] = {
+	{
+		.hook		= ipt_hook,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET,
+		.hooknum	= NF_IP_LOCAL_IN,
+		.priority	= NF_IP_PRI_FILTER,
+	},
+	{
+		.hook		= ipt_hook,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET,
+		.hooknum	= NF_IP_FORWARD,
+		.priority	= NF_IP_PRI_FILTER,
+	},
+	{
+		.hook		= ipt_local_out_hook,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET,
+		.hooknum	= NF_IP_LOCAL_OUT,
+		.priority	= NF_IP_PRI_FILTER,
+	},
+};
+
+/* Default to forward because I got too much mail already. */
+static int forward = NF_ACCEPT;
+module_param(forward, bool, 0000);
+
+static int __init init(void)
+{
+	int ret;
+
+	if (forward < 0 || forward > NF_MAX_VERDICT) {
+		printk("iptables forward must be 0 or 1\n");
+		return -EINVAL;
+	}
+
+	/* Entry 1 is the FORWARD hook */
+	initial_table.entries[1].target.verdict = -forward - 1;
+
+	/* Register table */
+	ret = ipt_register_table(&packet_filter, &initial_table.repl);
+	if (ret < 0)
+		return ret;
+
+	/* Register hooks */
+	ret = nf_register_hook(&ipt_ops[0]);
+	if (ret < 0)
+		goto cleanup_table;
+
+	ret = nf_register_hook(&ipt_ops[1]);
+	if (ret < 0)
+		goto cleanup_hook0;
+
+	ret = nf_register_hook(&ipt_ops[2]);
+	if (ret < 0)
+		goto cleanup_hook1;
+
+	return ret;
+
+ cleanup_hook1:
+	nf_unregister_hook(&ipt_ops[1]);
+ cleanup_hook0:
+	nf_unregister_hook(&ipt_ops[0]);
+ cleanup_table:
+	ipt_unregister_table(&packet_filter);
+
+	return ret;
+}
+
+static void __exit fini(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < sizeof(ipt_ops)/sizeof(struct nf_hook_ops); i++)
+		nf_unregister_hook(&ipt_ops[i]);
+
+	ipt_unregister_table(&packet_filter);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
new file mode 100644
index 00000000000..160eb11b6e2
--- /dev/null
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -0,0 +1,260 @@
+/*
+ * This is the 1999 rewrite of IP Firewalling, aiming for kernel 2.3.x.
+ *
+ * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
+ * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Extended to all five netfilter hooks by Brad Chapman & Harald Welte
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/route.h>
+#include <linux/ip.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("iptables mangle table");
+
+#define MANGLE_VALID_HOOKS ((1 << NF_IP_PRE_ROUTING) | \
+			    (1 << NF_IP_LOCAL_IN) | \
+			    (1 << NF_IP_FORWARD) | \
+			    (1 << NF_IP_LOCAL_OUT) | \
+			    (1 << NF_IP_POST_ROUTING))
+
+/* Ouch - five different hooks? Maybe this should be a config option..... -- BC */
+static struct
+{
+	struct ipt_replace repl;
+	struct ipt_standard entries[5];
+	struct ipt_error term;
+} initial_table __initdata
+= { { "mangle", MANGLE_VALID_HOOKS, 6,
+      sizeof(struct ipt_standard) * 5 + sizeof(struct ipt_error),
+      { [NF_IP_PRE_ROUTING] 	= 0,
+	[NF_IP_LOCAL_IN] 	= sizeof(struct ipt_standard),
+	[NF_IP_FORWARD] 	= sizeof(struct ipt_standard) * 2,
+	[NF_IP_LOCAL_OUT] 	= sizeof(struct ipt_standard) * 3,
+	[NF_IP_POST_ROUTING] 	= sizeof(struct ipt_standard) * 4 },
+      { [NF_IP_PRE_ROUTING] 	= 0,
+	[NF_IP_LOCAL_IN] 	= sizeof(struct ipt_standard),
+	[NF_IP_FORWARD] 	= sizeof(struct ipt_standard) * 2,
+	[NF_IP_LOCAL_OUT] 	= sizeof(struct ipt_standard) * 3,
+	[NF_IP_POST_ROUTING]	= sizeof(struct ipt_standard) * 4 },
+      0, NULL, { } },
+    {
+	    /* PRE_ROUTING */
+	    { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
+		0,
+		sizeof(struct ipt_entry),
+		sizeof(struct ipt_standard),
+		0, { 0, 0 }, { } },
+	      { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
+		-NF_ACCEPT - 1 } },
+	    /* LOCAL_IN */
+ 	    { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
+		0,
+		sizeof(struct ipt_entry),
+		sizeof(struct ipt_standard),
+		0, { 0, 0 }, { } },
+	      { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
+		-NF_ACCEPT - 1 } },
+	    /* FORWARD */
+ 	    { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
+		0,
+		sizeof(struct ipt_entry),
+		sizeof(struct ipt_standard),
+		0, { 0, 0 }, { } },
+	      { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
+		-NF_ACCEPT - 1 } },
+	    /* LOCAL_OUT */
+	    { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
+		0,
+		sizeof(struct ipt_entry),
+		sizeof(struct ipt_standard),
+		0, { 0, 0 }, { } },
+	      { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
+		-NF_ACCEPT - 1 } },
+	    /* POST_ROUTING */
+	    { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
+		0,
+		sizeof(struct ipt_entry),
+		sizeof(struct ipt_standard),
+		0, { 0, 0 }, { } },
+	      { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
+		-NF_ACCEPT - 1 } },
+    },
+    /* ERROR */
+    { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
+	0,
+	sizeof(struct ipt_entry),
+	sizeof(struct ipt_error),
+	0, { 0, 0 }, { } },
+      { { { { IPT_ALIGN(sizeof(struct ipt_error_target)), IPT_ERROR_TARGET } },
+	  { } },
+	"ERROR"
+      }
+    }
+};
+
+static struct ipt_table packet_mangler = {
+	.name		= "mangle",
+	.valid_hooks	= MANGLE_VALID_HOOKS,
+	.lock		= RW_LOCK_UNLOCKED,
+	.me		= THIS_MODULE,
+};
+
+/* The work comes in here from netfilter.c. */
+static unsigned int
+ipt_route_hook(unsigned int hook,
+	 struct sk_buff **pskb,
+	 const struct net_device *in,
+	 const struct net_device *out,
+	 int (*okfn)(struct sk_buff *))
+{
+	return ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL);
+}
+
+static unsigned int
+ipt_local_hook(unsigned int hook,
+		   struct sk_buff **pskb,
+		   const struct net_device *in,
+		   const struct net_device *out,
+		   int (*okfn)(struct sk_buff *))
+{
+	unsigned int ret;
+	u_int8_t tos;
+	u_int32_t saddr, daddr;
+	unsigned long nfmark;
+
+	/* root is playing with raw sockets. */
+	if ((*pskb)->len < sizeof(struct iphdr)
+	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
+		if (net_ratelimit())
+			printk("ipt_hook: happy cracking.\n");
+		return NF_ACCEPT;
+	}
+
+	/* Save things which could affect route */
+	nfmark = (*pskb)->nfmark;
+	saddr = (*pskb)->nh.iph->saddr;
+	daddr = (*pskb)->nh.iph->daddr;
+	tos = (*pskb)->nh.iph->tos;
+
+	ret = ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL);
+	/* Reroute for ANY change. */
+	if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE
+	    && ((*pskb)->nh.iph->saddr != saddr
+		|| (*pskb)->nh.iph->daddr != daddr
+#ifdef CONFIG_IP_ROUTE_FWMARK
+		|| (*pskb)->nfmark != nfmark
+#endif
+		|| (*pskb)->nh.iph->tos != tos))
+		return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
+
+	return ret;
+}
+
+static struct nf_hook_ops ipt_ops[] = {
+	{
+		.hook		= ipt_route_hook,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET,
+		.hooknum	= NF_IP_PRE_ROUTING, 
+		.priority	= NF_IP_PRI_MANGLE,
+	},
+	{
+		.hook		= ipt_route_hook,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET,
+		.hooknum	= NF_IP_LOCAL_IN,
+		.priority	= NF_IP_PRI_MANGLE,
+	},
+	{
+		.hook		= ipt_route_hook,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET,
+		.hooknum	= NF_IP_FORWARD,
+		.priority	= NF_IP_PRI_MANGLE,
+	},
+	{
+		.hook		= ipt_local_hook,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET,
+		.hooknum	= NF_IP_LOCAL_OUT,
+		.priority	= NF_IP_PRI_MANGLE,
+	},
+	{
+		.hook		= ipt_route_hook,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET,
+		.hooknum	= NF_IP_POST_ROUTING,
+		.priority	= NF_IP_PRI_MANGLE,
+	},
+};
+
+static int __init init(void)
+{
+	int ret;
+
+	/* Register table */
+	ret = ipt_register_table(&packet_mangler, &initial_table.repl);
+	if (ret < 0)
+		return ret;
+
+	/* Register hooks */
+	ret = nf_register_hook(&ipt_ops[0]);
+	if (ret < 0)
+		goto cleanup_table;
+
+	ret = nf_register_hook(&ipt_ops[1]);
+	if (ret < 0)
+		goto cleanup_hook0;
+
+	ret = nf_register_hook(&ipt_ops[2]);
+	if (ret < 0)
+		goto cleanup_hook1;
+
+	ret = nf_register_hook(&ipt_ops[3]);
+	if (ret < 0)
+		goto cleanup_hook2;
+
+	ret = nf_register_hook(&ipt_ops[4]);
+	if (ret < 0)
+		goto cleanup_hook3;
+
+	return ret;
+
+ cleanup_hook3:
+        nf_unregister_hook(&ipt_ops[3]);
+ cleanup_hook2:
+        nf_unregister_hook(&ipt_ops[2]);
+ cleanup_hook1:
+	nf_unregister_hook(&ipt_ops[1]);
+ cleanup_hook0:
+	nf_unregister_hook(&ipt_ops[0]);
+ cleanup_table:
+	ipt_unregister_table(&packet_mangler);
+
+	return ret;
+}
+
+static void __exit fini(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < sizeof(ipt_ops)/sizeof(struct nf_hook_ops); i++)
+		nf_unregister_hook(&ipt_ops[i]);
+
+	ipt_unregister_table(&packet_mangler);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
new file mode 100644
index 00000000000..01b4a3c814d
--- /dev/null
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -0,0 +1,156 @@
+/* 
+ * 'raw' table, which is the very first hooked in at PRE_ROUTING and LOCAL_OUT .
+ *
+ * Copyright (C) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ */
+#include <linux/module.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+#define RAW_VALID_HOOKS ((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT))
+
+static struct
+{
+	struct ipt_replace repl;
+	struct ipt_standard entries[2];
+	struct ipt_error term;
+} initial_table __initdata = {
+	.repl = {
+		.name = "raw", 
+		.valid_hooks = RAW_VALID_HOOKS, 
+		.num_entries = 3,
+		.size = sizeof(struct ipt_standard) * 2 + sizeof(struct ipt_error),
+		.hook_entry = { 
+			[NF_IP_PRE_ROUTING] = 0,
+			[NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) },
+		.underflow = { 
+			[NF_IP_PRE_ROUTING] = 0,
+			[NF_IP_LOCAL_OUT]  = sizeof(struct ipt_standard) },
+	},
+	.entries = {
+	     /* PRE_ROUTING */
+	     { 
+		     .entry = { 
+			     .target_offset = sizeof(struct ipt_entry),
+			     .next_offset = sizeof(struct ipt_standard),
+		     },
+		     .target = { 
+			  .target = { 
+				  .u = {
+					  .target_size = IPT_ALIGN(sizeof(struct ipt_standard_target)),
+				  },
+			  },
+			  .verdict = -NF_ACCEPT - 1,
+		     },
+	     },
+
+	     /* LOCAL_OUT */
+	     {
+		     .entry = {
+			     .target_offset = sizeof(struct ipt_entry),
+			     .next_offset = sizeof(struct ipt_standard),
+		     },
+		     .target = {
+			     .target = {
+				     .u = {
+					     .target_size = IPT_ALIGN(sizeof(struct ipt_standard_target)),
+				     },
+			     },
+			     .verdict = -NF_ACCEPT - 1,
+		     },
+	     },
+	},
+	/* ERROR */
+	.term = {
+		.entry = {
+			.target_offset = sizeof(struct ipt_entry),
+			.next_offset = sizeof(struct ipt_error),
+		},
+		.target = {
+			.target = {
+				.u = {
+					.user = {
+						.target_size = IPT_ALIGN(sizeof(struct ipt_error_target)), 
+						.name = IPT_ERROR_TARGET,
+					},
+				},
+			},
+			.errorname = "ERROR",
+		},
+	}
+};
+
+static struct ipt_table packet_raw = { 
+	.name = "raw", 
+	.valid_hooks =  RAW_VALID_HOOKS, 
+	.lock = RW_LOCK_UNLOCKED, 
+	.me = THIS_MODULE
+};
+
+/* The work comes in here from netfilter.c. */
+static unsigned int
+ipt_hook(unsigned int hook,
+	 struct sk_buff **pskb,
+	 const struct net_device *in,
+	 const struct net_device *out,
+	 int (*okfn)(struct sk_buff *))
+{
+	return ipt_do_table(pskb, hook, in, out, &packet_raw, NULL);
+}
+
+/* 'raw' is the very first table. */
+static struct nf_hook_ops ipt_ops[] = {
+	{
+	  .hook = ipt_hook, 
+	  .pf = PF_INET, 
+	  .hooknum = NF_IP_PRE_ROUTING, 
+	  .priority = NF_IP_PRI_RAW
+	},
+	{
+	  .hook = ipt_hook, 
+	  .pf = PF_INET, 
+	  .hooknum = NF_IP_LOCAL_OUT, 
+	  .priority = NF_IP_PRI_RAW
+	},
+};
+
+static int __init init(void)
+{
+	int ret;
+
+	/* Register table */
+	ret = ipt_register_table(&packet_raw, &initial_table.repl);
+	if (ret < 0)
+		return ret;
+
+	/* Register hooks */
+	ret = nf_register_hook(&ipt_ops[0]);
+	if (ret < 0)
+		goto cleanup_table;
+
+	ret = nf_register_hook(&ipt_ops[1]);
+	if (ret < 0)
+		goto cleanup_hook0;
+
+	return ret;
+
+ cleanup_hook0:
+	nf_unregister_hook(&ipt_ops[0]);
+ cleanup_table:
+	ipt_unregister_table(&packet_raw);
+
+	return ret;
+}
+
+static void __exit fini(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < sizeof(ipt_ops)/sizeof(struct nf_hook_ops); i++)
+		nf_unregister_hook(&ipt_ops[i]);
+
+	ipt_unregister_table(&packet_raw);
+}
+
+module_init(init);
+module_exit(fini);
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
new file mode 100644
index 00000000000..912bbcc7f41
--- /dev/null
+++ b/net/ipv4/proc.c
@@ -0,0 +1,382 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		This file implements the various access functions for the
+ *		PROC file system.  It is mainly used for debugging and
+ *		statistics.
+ *
+ * Version:	$Id: proc.c,v 1.45 2001/05/16 16:45:35 davem Exp $
+ *
+ * Authors:	Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Gerald J. Heim, <heim@peanuts.informatik.uni-tuebingen.de>
+ *		Fred Baumgarten, <dc6iq@insu1.etec.uni-karlsruhe.de>
+ *		Erik Schoenfelder, <schoenfr@ibr.cs.tu-bs.de>
+ *
+ * Fixes:
+ *		Alan Cox	:	UDP sockets show the rxqueue/txqueue
+ *					using hint flag for the netinfo.
+ *	Pauline Middelink	:	identd support
+ *		Alan Cox	:	Make /proc safer.
+ *	Erik Schoenfelder	:	/proc/net/snmp
+ *		Alan Cox	:	Handle dead sockets properly.
+ *	Gerhard Koerting	:	Show both timers
+ *		Alan Cox	:	Allow inode to be NULL (kernel socket)
+ *	Andi Kleen		:	Add support for open_requests and
+ *					split functions for more readibility.
+ *	Andi Kleen		:	Add support for /proc/net/netstat
+ *	Arnaldo C. Melo		:	Convert to seq_file
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+#include <linux/types.h>
+#include <net/icmp.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <net/sock.h>
+#include <net/raw.h>
+
+static int fold_prot_inuse(struct proto *proto)
+{
+	int res = 0;
+	int cpu;
+
+	for (cpu = 0; cpu < NR_CPUS; cpu++)
+		res += proto->stats[cpu].inuse;
+
+	return res;
+}
+
+/*
+ *	Report socket allocation statistics [mea@utu.fi]
+ */
+static int sockstat_seq_show(struct seq_file *seq, void *v)
+{
+	/* From net/socket.c */
+	extern void socket_seq_show(struct seq_file *seq);
+
+	socket_seq_show(seq);
+	seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
+		   fold_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count),
+		   tcp_tw_count, atomic_read(&tcp_sockets_allocated),
+		   atomic_read(&tcp_memory_allocated));
+	seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot));
+	seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot));
+	seq_printf(seq,  "FRAG: inuse %d memory %d\n", ip_frag_nqueues,
+		   atomic_read(&ip_frag_mem));
+	return 0;
+}
+
+static int sockstat_seq_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, sockstat_seq_show, NULL);
+}
+
+static struct file_operations sockstat_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open	 = sockstat_seq_open,
+	.read	 = seq_read,
+	.llseek	 = seq_lseek,
+	.release = single_release,
+};
+
+static unsigned long
+fold_field(void *mib[], int offt)
+{
+	unsigned long res = 0;
+	int i;
+
+	for (i = 0; i < NR_CPUS; i++) {
+		if (!cpu_possible(i))
+			continue;
+		res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt);
+		res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt);
+	}
+	return res;
+}
+
+/* snmp items */
+static struct snmp_mib snmp4_ipstats_list[] = {
+	SNMP_MIB_ITEM("InReceives", IPSTATS_MIB_INRECEIVES),
+	SNMP_MIB_ITEM("InHdrErrors", IPSTATS_MIB_INHDRERRORS),
+	SNMP_MIB_ITEM("InAddrErrors", IPSTATS_MIB_INADDRERRORS),
+	SNMP_MIB_ITEM("ForwDatagrams", IPSTATS_MIB_OUTFORWDATAGRAMS),
+	SNMP_MIB_ITEM("InUnknownProtos", IPSTATS_MIB_INUNKNOWNPROTOS),
+	SNMP_MIB_ITEM("InDiscards", IPSTATS_MIB_INDISCARDS),
+	SNMP_MIB_ITEM("InDelivers", IPSTATS_MIB_INDELIVERS),
+	SNMP_MIB_ITEM("OutRequests", IPSTATS_MIB_OUTREQUESTS),
+	SNMP_MIB_ITEM("OutDiscards", IPSTATS_MIB_OUTDISCARDS),
+	SNMP_MIB_ITEM("OutNoRoutes", IPSTATS_MIB_OUTNOROUTES),
+	SNMP_MIB_ITEM("ReasmTimeout", IPSTATS_MIB_REASMTIMEOUT),
+	SNMP_MIB_ITEM("ReasmReqds", IPSTATS_MIB_REASMREQDS),
+	SNMP_MIB_ITEM("ReasmOKs", IPSTATS_MIB_REASMOKS),
+	SNMP_MIB_ITEM("ReasmFails", IPSTATS_MIB_REASMFAILS),
+	SNMP_MIB_ITEM("FragOKs", IPSTATS_MIB_FRAGOKS),
+	SNMP_MIB_ITEM("FragFails", IPSTATS_MIB_FRAGFAILS),
+	SNMP_MIB_ITEM("FragCreates", IPSTATS_MIB_FRAGCREATES),
+	SNMP_MIB_SENTINEL
+};
+
+static struct snmp_mib snmp4_icmp_list[] = {
+	SNMP_MIB_ITEM("InMsgs", ICMP_MIB_INMSGS),
+	SNMP_MIB_ITEM("InErrors", ICMP_MIB_INERRORS),
+	SNMP_MIB_ITEM("InDestUnreachs", ICMP_MIB_INDESTUNREACHS),
+	SNMP_MIB_ITEM("InTimeExcds", ICMP_MIB_INTIMEEXCDS),
+	SNMP_MIB_ITEM("InParmProbs", ICMP_MIB_INPARMPROBS),
+	SNMP_MIB_ITEM("InSrcQuenchs", ICMP_MIB_INSRCQUENCHS),
+	SNMP_MIB_ITEM("InRedirects", ICMP_MIB_INREDIRECTS),
+	SNMP_MIB_ITEM("InEchos", ICMP_MIB_INECHOS),
+	SNMP_MIB_ITEM("InEchoReps", ICMP_MIB_INECHOREPS),
+	SNMP_MIB_ITEM("InTimestamps", ICMP_MIB_INTIMESTAMPS),
+	SNMP_MIB_ITEM("InTimestampReps", ICMP_MIB_INTIMESTAMPREPS),
+	SNMP_MIB_ITEM("InAddrMasks", ICMP_MIB_INADDRMASKS),
+	SNMP_MIB_ITEM("InAddrMaskReps", ICMP_MIB_INADDRMASKREPS),
+	SNMP_MIB_ITEM("OutMsgs", ICMP_MIB_OUTMSGS),
+	SNMP_MIB_ITEM("OutErrors", ICMP_MIB_OUTERRORS),
+	SNMP_MIB_ITEM("OutDestUnreachs", ICMP_MIB_OUTDESTUNREACHS),
+	SNMP_MIB_ITEM("OutTimeExcds", ICMP_MIB_OUTTIMEEXCDS),
+	SNMP_MIB_ITEM("OutParmProbs", ICMP_MIB_OUTPARMPROBS),
+	SNMP_MIB_ITEM("OutSrcQuenchs", ICMP_MIB_OUTSRCQUENCHS),
+	SNMP_MIB_ITEM("OutRedirects", ICMP_MIB_OUTREDIRECTS),
+	SNMP_MIB_ITEM("OutEchos", ICMP_MIB_OUTECHOS),
+	SNMP_MIB_ITEM("OutEchoReps", ICMP_MIB_OUTECHOREPS),
+	SNMP_MIB_ITEM("OutTimestamps", ICMP_MIB_OUTTIMESTAMPS),
+	SNMP_MIB_ITEM("OutTimestampReps", ICMP_MIB_OUTTIMESTAMPREPS),
+	SNMP_MIB_ITEM("OutAddrMasks", ICMP_MIB_OUTADDRMASKS),
+	SNMP_MIB_ITEM("OutAddrMaskReps", ICMP_MIB_OUTADDRMASKREPS),
+	SNMP_MIB_SENTINEL
+};
+
+static struct snmp_mib snmp4_tcp_list[] = {
+	SNMP_MIB_ITEM("RtoAlgorithm", TCP_MIB_RTOALGORITHM),
+	SNMP_MIB_ITEM("RtoMin", TCP_MIB_RTOMIN),
+	SNMP_MIB_ITEM("RtoMax", TCP_MIB_RTOMAX),
+	SNMP_MIB_ITEM("MaxConn", TCP_MIB_MAXCONN),
+	SNMP_MIB_ITEM("ActiveOpens", TCP_MIB_ACTIVEOPENS),
+	SNMP_MIB_ITEM("PassiveOpens", TCP_MIB_PASSIVEOPENS),
+	SNMP_MIB_ITEM("AttemptFails", TCP_MIB_ATTEMPTFAILS),
+	SNMP_MIB_ITEM("EstabResets", TCP_MIB_ESTABRESETS),
+	SNMP_MIB_ITEM("CurrEstab", TCP_MIB_CURRESTAB),
+	SNMP_MIB_ITEM("InSegs", TCP_MIB_INSEGS),
+	SNMP_MIB_ITEM("OutSegs", TCP_MIB_OUTSEGS),
+	SNMP_MIB_ITEM("RetransSegs", TCP_MIB_RETRANSSEGS),
+	SNMP_MIB_ITEM("InErrs", TCP_MIB_INERRS),
+	SNMP_MIB_ITEM("OutRsts", TCP_MIB_OUTRSTS),
+	SNMP_MIB_SENTINEL
+};
+
+static struct snmp_mib snmp4_udp_list[] = {
+	SNMP_MIB_ITEM("InDatagrams", UDP_MIB_INDATAGRAMS),
+	SNMP_MIB_ITEM("NoPorts", UDP_MIB_NOPORTS),
+	SNMP_MIB_ITEM("InErrors", UDP_MIB_INERRORS),
+	SNMP_MIB_ITEM("OutDatagrams", UDP_MIB_OUTDATAGRAMS),
+	SNMP_MIB_SENTINEL
+};
+
+static struct snmp_mib snmp4_net_list[] = {
+	SNMP_MIB_ITEM("SyncookiesSent", LINUX_MIB_SYNCOOKIESSENT),
+	SNMP_MIB_ITEM("SyncookiesRecv", LINUX_MIB_SYNCOOKIESRECV),
+	SNMP_MIB_ITEM("SyncookiesFailed", LINUX_MIB_SYNCOOKIESFAILED),
+	SNMP_MIB_ITEM("EmbryonicRsts", LINUX_MIB_EMBRYONICRSTS),
+	SNMP_MIB_ITEM("PruneCalled", LINUX_MIB_PRUNECALLED),
+	SNMP_MIB_ITEM("RcvPruned", LINUX_MIB_RCVPRUNED),
+	SNMP_MIB_ITEM("OfoPruned", LINUX_MIB_OFOPRUNED),
+	SNMP_MIB_ITEM("OutOfWindowIcmps", LINUX_MIB_OUTOFWINDOWICMPS),
+	SNMP_MIB_ITEM("LockDroppedIcmps", LINUX_MIB_LOCKDROPPEDICMPS),
+	SNMP_MIB_ITEM("ArpFilter", LINUX_MIB_ARPFILTER),
+	SNMP_MIB_ITEM("TW", LINUX_MIB_TIMEWAITED),
+	SNMP_MIB_ITEM("TWRecycled", LINUX_MIB_TIMEWAITRECYCLED),
+	SNMP_MIB_ITEM("TWKilled", LINUX_MIB_TIMEWAITKILLED),
+	SNMP_MIB_ITEM("PAWSPassive", LINUX_MIB_PAWSPASSIVEREJECTED),
+	SNMP_MIB_ITEM("PAWSActive", LINUX_MIB_PAWSACTIVEREJECTED),
+	SNMP_MIB_ITEM("PAWSEstab", LINUX_MIB_PAWSESTABREJECTED),
+	SNMP_MIB_ITEM("DelayedACKs", LINUX_MIB_DELAYEDACKS),
+	SNMP_MIB_ITEM("DelayedACKLocked", LINUX_MIB_DELAYEDACKLOCKED),
+	SNMP_MIB_ITEM("DelayedACKLost", LINUX_MIB_DELAYEDACKLOST),
+	SNMP_MIB_ITEM("ListenOverflows", LINUX_MIB_LISTENOVERFLOWS),
+	SNMP_MIB_ITEM("ListenDrops", LINUX_MIB_LISTENDROPS),
+	SNMP_MIB_ITEM("TCPPrequeued", LINUX_MIB_TCPPREQUEUED),
+	SNMP_MIB_ITEM("TCPDirectCopyFromBacklog", LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG),
+	SNMP_MIB_ITEM("TCPDirectCopyFromPrequeue", LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE),
+	SNMP_MIB_ITEM("TCPPrequeueDropped", LINUX_MIB_TCPPREQUEUEDROPPED),
+	SNMP_MIB_ITEM("TCPHPHits", LINUX_MIB_TCPHPHITS),
+	SNMP_MIB_ITEM("TCPHPHitsToUser", LINUX_MIB_TCPHPHITSTOUSER),
+	SNMP_MIB_ITEM("TCPPureAcks", LINUX_MIB_TCPPUREACKS),
+	SNMP_MIB_ITEM("TCPHPAcks", LINUX_MIB_TCPHPACKS),
+	SNMP_MIB_ITEM("TCPRenoRecovery", LINUX_MIB_TCPRENORECOVERY),
+	SNMP_MIB_ITEM("TCPSackRecovery", LINUX_MIB_TCPSACKRECOVERY),
+	SNMP_MIB_ITEM("TCPSACKReneging", LINUX_MIB_TCPSACKRENEGING),
+	SNMP_MIB_ITEM("TCPFACKReorder", LINUX_MIB_TCPFACKREORDER),
+	SNMP_MIB_ITEM("TCPSACKReorder", LINUX_MIB_TCPSACKREORDER),
+	SNMP_MIB_ITEM("TCPRenoReorder", LINUX_MIB_TCPRENOREORDER),
+	SNMP_MIB_ITEM("TCPTSReorder", LINUX_MIB_TCPTSREORDER),
+	SNMP_MIB_ITEM("TCPFullUndo", LINUX_MIB_TCPFULLUNDO),
+	SNMP_MIB_ITEM("TCPPartialUndo", LINUX_MIB_TCPPARTIALUNDO),
+	SNMP_MIB_ITEM("TCPDSACKUndo", LINUX_MIB_TCPDSACKUNDO),
+	SNMP_MIB_ITEM("TCPLossUndo", LINUX_MIB_TCPLOSSUNDO),
+	SNMP_MIB_ITEM("TCPLoss", LINUX_MIB_TCPLOSS),
+	SNMP_MIB_ITEM("TCPLostRetransmit", LINUX_MIB_TCPLOSTRETRANSMIT),
+	SNMP_MIB_ITEM("TCPRenoFailures", LINUX_MIB_TCPRENOFAILURES),
+	SNMP_MIB_ITEM("TCPSackFailures", LINUX_MIB_TCPSACKFAILURES),
+	SNMP_MIB_ITEM("TCPLossFailures", LINUX_MIB_TCPLOSSFAILURES),
+	SNMP_MIB_ITEM("TCPFastRetrans", LINUX_MIB_TCPFASTRETRANS),
+	SNMP_MIB_ITEM("TCPForwardRetrans", LINUX_MIB_TCPFORWARDRETRANS),
+	SNMP_MIB_ITEM("TCPSlowStartRetrans", LINUX_MIB_TCPSLOWSTARTRETRANS),
+	SNMP_MIB_ITEM("TCPTimeouts", LINUX_MIB_TCPTIMEOUTS),
+	SNMP_MIB_ITEM("TCPRenoRecoveryFail", LINUX_MIB_TCPRENORECOVERYFAIL),
+	SNMP_MIB_ITEM("TCPSackRecoveryFail", LINUX_MIB_TCPSACKRECOVERYFAIL),
+	SNMP_MIB_ITEM("TCPSchedulerFailed", LINUX_MIB_TCPSCHEDULERFAILED),
+	SNMP_MIB_ITEM("TCPRcvCollapsed", LINUX_MIB_TCPRCVCOLLAPSED),
+	SNMP_MIB_ITEM("TCPDSACKOldSent", LINUX_MIB_TCPDSACKOLDSENT),
+	SNMP_MIB_ITEM("TCPDSACKOfoSent", LINUX_MIB_TCPDSACKOFOSENT),
+	SNMP_MIB_ITEM("TCPDSACKRecv", LINUX_MIB_TCPDSACKRECV),
+	SNMP_MIB_ITEM("TCPDSACKOfoRecv", LINUX_MIB_TCPDSACKOFORECV),
+	SNMP_MIB_ITEM("TCPAbortOnSyn", LINUX_MIB_TCPABORTONSYN),
+	SNMP_MIB_ITEM("TCPAbortOnData", LINUX_MIB_TCPABORTONDATA),
+	SNMP_MIB_ITEM("TCPAbortOnClose", LINUX_MIB_TCPABORTONCLOSE),
+	SNMP_MIB_ITEM("TCPAbortOnMemory", LINUX_MIB_TCPABORTONMEMORY),
+	SNMP_MIB_ITEM("TCPAbortOnTimeout", LINUX_MIB_TCPABORTONTIMEOUT),
+	SNMP_MIB_ITEM("TCPAbortOnLinger", LINUX_MIB_TCPABORTONLINGER),
+	SNMP_MIB_ITEM("TCPAbortFailed", LINUX_MIB_TCPABORTFAILED),
+	SNMP_MIB_ITEM("TCPMemoryPressures", LINUX_MIB_TCPMEMORYPRESSURES),
+	SNMP_MIB_SENTINEL
+};
+
+/*
+ *	Called from the PROCfs module. This outputs /proc/net/snmp.
+ */
+static int snmp_seq_show(struct seq_file *seq, void *v)
+{
+	int i;
+
+	seq_puts(seq, "Ip: Forwarding DefaultTTL");
+
+	for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
+		seq_printf(seq, " %s", snmp4_ipstats_list[i].name);
+
+	seq_printf(seq, "\nIp: %d %d",
+			ipv4_devconf.forwarding ? 1 : 2, sysctl_ip_default_ttl);
+
+	for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
+		seq_printf(seq, " %lu",
+			   fold_field((void **) ip_statistics, 
+				      snmp4_ipstats_list[i].entry));
+
+	seq_puts(seq, "\nIcmp:");
+	for (i = 0; snmp4_icmp_list[i].name != NULL; i++)
+		seq_printf(seq, " %s", snmp4_icmp_list[i].name);
+
+	seq_puts(seq, "\nIcmp:");
+	for (i = 0; snmp4_icmp_list[i].name != NULL; i++)
+		seq_printf(seq, " %lu",
+			   fold_field((void **) icmp_statistics, 
+				      snmp4_icmp_list[i].entry));
+
+	seq_puts(seq, "\nTcp:");
+	for (i = 0; snmp4_tcp_list[i].name != NULL; i++)
+		seq_printf(seq, " %s", snmp4_tcp_list[i].name);
+
+	seq_puts(seq, "\nTcp:");
+	for (i = 0; snmp4_tcp_list[i].name != NULL; i++) {
+		/* MaxConn field is signed, RFC 2012 */
+		if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN)
+			seq_printf(seq, " %ld",
+				   fold_field((void **) tcp_statistics, 
+					      snmp4_tcp_list[i].entry));
+		else
+			seq_printf(seq, " %lu",
+				   fold_field((void **) tcp_statistics,
+					      snmp4_tcp_list[i].entry));
+	}
+
+	seq_puts(seq, "\nUdp:");
+	for (i = 0; snmp4_udp_list[i].name != NULL; i++)
+		seq_printf(seq, " %s", snmp4_udp_list[i].name);
+
+	seq_puts(seq, "\nUdp:");
+	for (i = 0; snmp4_udp_list[i].name != NULL; i++)
+		seq_printf(seq, " %lu",
+			   fold_field((void **) udp_statistics, 
+				      snmp4_udp_list[i].entry));
+
+	seq_putc(seq, '\n');
+	return 0;
+}
+
+static int snmp_seq_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, snmp_seq_show, NULL);
+}
+
+static struct file_operations snmp_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open	 = snmp_seq_open,
+	.read	 = seq_read,
+	.llseek	 = seq_lseek,
+	.release = single_release,
+};
+
+/*
+ *	Output /proc/net/netstat
+ */
+static int netstat_seq_show(struct seq_file *seq, void *v)
+{
+	int i;
+
+	seq_puts(seq, "TcpExt:");
+	for (i = 0; snmp4_net_list[i].name != NULL; i++)
+		seq_printf(seq, " %s", snmp4_net_list[i].name);
+
+	seq_puts(seq, "\nTcpExt:");
+	for (i = 0; snmp4_net_list[i].name != NULL; i++)
+		seq_printf(seq, " %lu",
+			   fold_field((void **) net_statistics, 
+				      snmp4_net_list[i].entry));
+
+	seq_putc(seq, '\n');
+	return 0;
+}
+
+static int netstat_seq_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, netstat_seq_show, NULL);
+}
+
+static struct file_operations netstat_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open	 = netstat_seq_open,
+	.read	 = seq_read,
+	.llseek	 = seq_lseek,
+	.release = single_release,
+};
+
+int __init ip_misc_proc_init(void)
+{
+	int rc = 0;
+
+	if (!proc_net_fops_create("netstat", S_IRUGO, &netstat_seq_fops))
+		goto out_netstat;
+
+	if (!proc_net_fops_create("snmp", S_IRUGO, &snmp_seq_fops))
+		goto out_snmp;
+
+	if (!proc_net_fops_create("sockstat", S_IRUGO, &sockstat_seq_fops))
+		goto out_sockstat;
+out:
+	return rc;
+out_sockstat:
+	proc_net_remove("snmp");
+out_snmp:
+	proc_net_remove("netstat");
+out_netstat:
+	rc = -ENOMEM;
+	goto out;
+}
+
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
new file mode 100644
index 00000000000..90a587cacaa
--- /dev/null
+++ b/net/ipv4/protocol.c
@@ -0,0 +1,101 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		INET protocol dispatch tables.
+ *
+ * Version:	$Id: protocol.c,v 1.14 2001/05/18 02:25:49 davem Exp $
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *
+ * Fixes:
+ *		Alan Cox	: Ahah! udp icmp errors don't work because
+ *				  udp_err is never called!
+ *		Alan Cox	: Added new fields for init and ready for
+ *				  proper fragmentation (_NO_ 4K limits!)
+ *		Richard Colella	: Hang on hash collision
+ *		Vince Laviano	: Modified inet_del_protocol() to correctly
+ *				  maintain copy bit.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/config.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/timer.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/ipip.h>
+#include <linux/igmp.h>
+
+struct net_protocol *inet_protos[MAX_INET_PROTOS];
+static DEFINE_SPINLOCK(inet_proto_lock);
+
+/*
+ *	Add a protocol handler to the hash tables
+ */
+
+int inet_add_protocol(struct net_protocol *prot, unsigned char protocol)
+{
+	int hash, ret;
+
+	hash = protocol & (MAX_INET_PROTOS - 1);
+
+	spin_lock_bh(&inet_proto_lock);
+	if (inet_protos[hash]) {
+		ret = -1;
+	} else {
+		inet_protos[hash] = prot;
+		ret = 0;
+	}
+	spin_unlock_bh(&inet_proto_lock);
+
+	return ret;
+}
+
+/*
+ *	Remove a protocol from the hash tables.
+ */
+ 
+int inet_del_protocol(struct net_protocol *prot, unsigned char protocol)
+{
+	int hash, ret;
+
+	hash = protocol & (MAX_INET_PROTOS - 1);
+
+	spin_lock_bh(&inet_proto_lock);
+	if (inet_protos[hash] == prot) {
+		inet_protos[hash] = NULL;
+		ret = 0;
+	} else {
+		ret = -1;
+	}
+	spin_unlock_bh(&inet_proto_lock);
+
+	synchronize_net();
+
+	return ret;
+}
+
+EXPORT_SYMBOL(inet_add_protocol);
+EXPORT_SYMBOL(inet_del_protocol);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
new file mode 100644
index 00000000000..93624a32eb9
--- /dev/null
+++ b/net/ipv4/raw.c
@@ -0,0 +1,888 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		RAW - implementation of IP "raw" sockets.
+ *
+ * Version:	$Id: raw.c,v 1.64 2002/02/01 22:01:04 davem Exp $
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *
+ * Fixes:
+ *		Alan Cox	:	verify_area() fixed up
+ *		Alan Cox	:	ICMP error handling
+ *		Alan Cox	:	EMSGSIZE if you send too big a packet
+ *		Alan Cox	: 	Now uses generic datagrams and shared
+ *					skbuff library. No more peek crashes,
+ *					no more backlogs
+ *		Alan Cox	:	Checks sk->broadcast.
+ *		Alan Cox	:	Uses skb_free_datagram/skb_copy_datagram
+ *		Alan Cox	:	Raw passes ip options too
+ *		Alan Cox	:	Setsocketopt added
+ *		Alan Cox	:	Fixed error return for broadcasts
+ *		Alan Cox	:	Removed wake_up calls
+ *		Alan Cox	:	Use ttl/tos
+ *		Alan Cox	:	Cleaned up old debugging
+ *		Alan Cox	:	Use new kernel side addresses
+ *	Arnt Gulbrandsen	:	Fixed MSG_DONTROUTE in raw sockets.
+ *		Alan Cox	:	BSD style RAW socket demultiplexing.
+ *		Alan Cox	:	Beginnings of mrouted support.
+ *		Alan Cox	:	Added IP_HDRINCL option.
+ *		Alan Cox	:	Skip broadcast check if BSDism set.
+ *		David S. Miller	:	New socket lookup architecture.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+ 
+#include <linux/config.h> 
+#include <asm/atomic.h>
+#include <asm/byteorder.h>
+#include <asm/current.h>
+#include <asm/uaccess.h>
+#include <asm/ioctls.h>
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/aio.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/sockios.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/mroute.h>
+#include <linux/netdevice.h>
+#include <linux/in_route.h>
+#include <linux/route.h>
+#include <linux/tcp.h>
+#include <linux/skbuff.h>
+#include <net/dst.h>
+#include <net/sock.h>
+#include <linux/gfp.h>
+#include <linux/ip.h>
+#include <linux/net.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/raw.h>
+#include <net/snmp.h>
+#include <net/inet_common.h>
+#include <net/checksum.h>
+#include <net/xfrm.h>
+#include <linux/rtnetlink.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+
+struct hlist_head raw_v4_htable[RAWV4_HTABLE_SIZE];
+DEFINE_RWLOCK(raw_v4_lock);
+
+static void raw_v4_hash(struct sock *sk)
+{
+	struct hlist_head *head = &raw_v4_htable[inet_sk(sk)->num &
+						 (RAWV4_HTABLE_SIZE - 1)];
+
+	write_lock_bh(&raw_v4_lock);
+	sk_add_node(sk, head);
+	sock_prot_inc_use(sk->sk_prot);
+	write_unlock_bh(&raw_v4_lock);
+}
+
+static void raw_v4_unhash(struct sock *sk)
+{
+ 	write_lock_bh(&raw_v4_lock);
+	if (sk_del_node_init(sk))
+		sock_prot_dec_use(sk->sk_prot);
+	write_unlock_bh(&raw_v4_lock);
+}
+
+struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num,
+			     unsigned long raddr, unsigned long laddr,
+			     int dif)
+{
+	struct hlist_node *node;
+
+	sk_for_each_from(sk, node) {
+		struct inet_sock *inet = inet_sk(sk);
+
+		if (inet->num == num 					&&
+		    !(inet->daddr && inet->daddr != raddr) 		&&
+		    !(inet->rcv_saddr && inet->rcv_saddr != laddr)	&&
+		    !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+			goto found; /* gotcha */
+	}
+	sk = NULL;
+found:
+	return sk;
+}
+
+/*
+ *	0 - deliver
+ *	1 - block
+ */
+static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb)
+{
+	int type;
+
+	if (!pskb_may_pull(skb, sizeof(struct icmphdr)))
+		return 1;
+
+	type = skb->h.icmph->type;
+	if (type < 32) {
+		__u32 data = raw_sk(sk)->filter.data;
+
+		return ((1 << type) & data) != 0;
+	}
+
+	/* Do not block unknown ICMP types */
+	return 0;
+}
+
+/* IP input processing comes here for RAW socket delivery.
+ * Caller owns SKB, so we must make clones.
+ *
+ * RFC 1122: SHOULD pass TOS value up to the transport layer.
+ * -> It does. And not only TOS, but all IP header.
+ */
+void raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash)
+{
+	struct sock *sk;
+	struct hlist_head *head;
+
+	read_lock(&raw_v4_lock);
+	head = &raw_v4_htable[hash];
+	if (hlist_empty(head))
+		goto out;
+	sk = __raw_v4_lookup(__sk_head(head), iph->protocol,
+			     iph->saddr, iph->daddr,
+			     skb->dev->ifindex);
+
+	while (sk) {
+		if (iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) {
+			struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
+
+			/* Not releasing hash table! */
+			if (clone)
+				raw_rcv(sk, clone);
+		}
+		sk = __raw_v4_lookup(sk_next(sk), iph->protocol,
+				     iph->saddr, iph->daddr,
+				     skb->dev->ifindex);
+	}
+out:
+	read_unlock(&raw_v4_lock);
+}
+
+void raw_err (struct sock *sk, struct sk_buff *skb, u32 info)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	int type = skb->h.icmph->type;
+	int code = skb->h.icmph->code;
+	int err = 0;
+	int harderr = 0;
+
+	/* Report error on raw socket, if:
+	   1. User requested ip_recverr.
+	   2. Socket is connected (otherwise the error indication
+	      is useless without ip_recverr and error is hard.
+	 */
+	if (!inet->recverr && sk->sk_state != TCP_ESTABLISHED)
+		return;
+
+	switch (type) {
+	default:
+	case ICMP_TIME_EXCEEDED:
+		err = EHOSTUNREACH;
+		break;
+	case ICMP_SOURCE_QUENCH:
+		return;
+	case ICMP_PARAMETERPROB:
+		err = EPROTO;
+		harderr = 1;
+		break;
+	case ICMP_DEST_UNREACH:
+		err = EHOSTUNREACH;
+		if (code > NR_ICMP_UNREACH)
+			break;
+		err = icmp_err_convert[code].errno;
+		harderr = icmp_err_convert[code].fatal;
+		if (code == ICMP_FRAG_NEEDED) {
+			harderr = inet->pmtudisc != IP_PMTUDISC_DONT;
+			err = EMSGSIZE;
+		}
+	}
+
+	if (inet->recverr) {
+		struct iphdr *iph = (struct iphdr*)skb->data;
+		u8 *payload = skb->data + (iph->ihl << 2);
+
+		if (inet->hdrincl)
+			payload = skb->data;
+		ip_icmp_error(sk, skb, err, 0, info, payload);
+	}
+
+	if (inet->recverr || harderr) {
+		sk->sk_err = err;
+		sk->sk_error_report(sk);
+	}
+}
+
+static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb)
+{
+	/* Charge it to the socket. */
+	
+	if (sock_queue_rcv_skb(sk, skb) < 0) {
+		/* FIXME: increment a raw drops counter here */
+		kfree_skb(skb);
+		return NET_RX_DROP;
+	}
+
+	return NET_RX_SUCCESS;
+}
+
+int raw_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
+		kfree_skb(skb);
+		return NET_RX_DROP;
+	}
+
+	skb_push(skb, skb->data - skb->nh.raw);
+
+	raw_rcv_skb(sk, skb);
+	return 0;
+}
+
+static int raw_send_hdrinc(struct sock *sk, void *from, int length,
+			struct rtable *rt, 
+			unsigned int flags)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	int hh_len;
+	struct iphdr *iph;
+	struct sk_buff *skb;
+	int err;
+
+	if (length > rt->u.dst.dev->mtu) {
+		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport,
+			       rt->u.dst.dev->mtu);
+		return -EMSGSIZE;
+	}
+	if (flags&MSG_PROBE)
+		goto out;
+
+	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
+
+	skb = sock_alloc_send_skb(sk, length+hh_len+15,
+				  flags&MSG_DONTWAIT, &err);
+	if (skb == NULL)
+		goto error; 
+	skb_reserve(skb, hh_len);
+
+	skb->priority = sk->sk_priority;
+	skb->dst = dst_clone(&rt->u.dst);
+
+	skb->nh.iph = iph = (struct iphdr *)skb_put(skb, length);
+
+	skb->ip_summed = CHECKSUM_NONE;
+
+	skb->h.raw = skb->nh.raw;
+	err = memcpy_fromiovecend((void *)iph, from, 0, length);
+	if (err)
+		goto error_fault;
+
+	/* We don't modify invalid header */
+	if (length >= sizeof(*iph) && iph->ihl * 4 <= length) {
+		if (!iph->saddr)
+			iph->saddr = rt->rt_src;
+		iph->check   = 0;
+		iph->tot_len = htons(length);
+		if (!iph->id)
+			ip_select_ident(iph, &rt->u.dst, NULL);
+
+		iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
+	}
+
+	err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
+		      dst_output);
+	if (err > 0)
+		err = inet->recverr ? net_xmit_errno(err) : 0;
+	if (err)
+		goto error;
+out:
+	return 0;
+
+error_fault:
+	err = -EFAULT;
+	kfree_skb(skb);
+error:
+	IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+	return err; 
+}
+
+static void raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
+{
+	struct iovec *iov;
+	u8 __user *type = NULL;
+	u8 __user *code = NULL;
+	int probed = 0;
+	int i;
+
+	if (!msg->msg_iov)
+		return;
+
+	for (i = 0; i < msg->msg_iovlen; i++) {
+		iov = &msg->msg_iov[i];
+		if (!iov)
+			continue;
+
+		switch (fl->proto) {
+		case IPPROTO_ICMP:
+			/* check if one-byte field is readable or not. */
+			if (iov->iov_base && iov->iov_len < 1)
+				break;
+
+			if (!type) {
+				type = iov->iov_base;
+				/* check if code field is readable or not. */
+				if (iov->iov_len > 1)
+					code = type + 1;
+			} else if (!code)
+				code = iov->iov_base;
+
+			if (type && code) {
+				get_user(fl->fl_icmp_type, type);
+				__get_user(fl->fl_icmp_code, code);
+				probed = 1;
+			}
+			break;
+		default:
+			probed = 1;
+			break;
+		}
+		if (probed)
+			break;
+	}
+}
+
+static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+		       size_t len)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct ipcm_cookie ipc;
+	struct rtable *rt = NULL;
+	int free = 0;
+	u32 daddr;
+	u32 saddr;
+	u8  tos;
+	int err;
+
+	err = -EMSGSIZE;
+	if (len < 0 || len > 0xFFFF)
+		goto out;
+
+	/*
+	 *	Check the flags.
+	 */
+
+	err = -EOPNOTSUPP;
+	if (msg->msg_flags & MSG_OOB)	/* Mirror BSD error message */
+		goto out;               /* compatibility */
+			 
+	/*
+	 *	Get and verify the address. 
+	 */
+
+	if (msg->msg_namelen) {
+		struct sockaddr_in *usin = (struct sockaddr_in*)msg->msg_name;
+		err = -EINVAL;
+		if (msg->msg_namelen < sizeof(*usin))
+			goto out;
+		if (usin->sin_family != AF_INET) {
+			static int complained;
+			if (!complained++)
+				printk(KERN_INFO "%s forgot to set AF_INET in "
+						 "raw sendmsg. Fix it!\n",
+						 current->comm);
+			err = -EAFNOSUPPORT;
+			if (usin->sin_family)
+				goto out;
+		}
+		daddr = usin->sin_addr.s_addr;
+		/* ANK: I did not forget to get protocol from port field.
+		 * I just do not know, who uses this weirdness.
+		 * IP_HDRINCL is much more convenient.
+		 */
+	} else {
+		err = -EDESTADDRREQ;
+		if (sk->sk_state != TCP_ESTABLISHED) 
+			goto out;
+		daddr = inet->daddr;
+	}
+
+	ipc.addr = inet->saddr;
+	ipc.opt = NULL;
+	ipc.oif = sk->sk_bound_dev_if;
+
+	if (msg->msg_controllen) {
+		err = ip_cmsg_send(msg, &ipc);
+		if (err)
+			goto out;
+		if (ipc.opt)
+			free = 1;
+	}
+
+	saddr = ipc.addr;
+	ipc.addr = daddr;
+
+	if (!ipc.opt)
+		ipc.opt = inet->opt;
+
+	if (ipc.opt) {
+		err = -EINVAL;
+		/* Linux does not mangle headers on raw sockets,
+		 * so that IP options + IP_HDRINCL is non-sense.
+		 */
+		if (inet->hdrincl)
+			goto done;
+		if (ipc.opt->srr) {
+			if (!daddr)
+				goto done;
+			daddr = ipc.opt->faddr;
+		}
+	}
+	tos = RT_CONN_FLAGS(sk);
+	if (msg->msg_flags & MSG_DONTROUTE)
+		tos |= RTO_ONLINK;
+
+	if (MULTICAST(daddr)) {
+		if (!ipc.oif)
+			ipc.oif = inet->mc_index;
+		if (!saddr)
+			saddr = inet->mc_addr;
+	}
+
+	{
+		struct flowi fl = { .oif = ipc.oif,
+				    .nl_u = { .ip4_u =
+					      { .daddr = daddr,
+						.saddr = saddr,
+						.tos = tos } },
+				    .proto = inet->hdrincl ? IPPROTO_RAW :
+					    		     sk->sk_protocol,
+				  };
+		if (!inet->hdrincl)
+			raw_probe_proto_opt(&fl, msg);
+
+		err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT));
+	}
+	if (err)
+		goto done;
+
+	err = -EACCES;
+	if (rt->rt_flags & RTCF_BROADCAST && !sock_flag(sk, SOCK_BROADCAST))
+		goto done;
+
+	if (msg->msg_flags & MSG_CONFIRM)
+		goto do_confirm;
+back_from_confirm:
+
+	if (inet->hdrincl)
+		err = raw_send_hdrinc(sk, msg->msg_iov, len, 
+					rt, msg->msg_flags);
+	
+	 else {
+		if (!ipc.addr)
+			ipc.addr = rt->rt_dst;
+		lock_sock(sk);
+		err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0,
+					&ipc, rt, msg->msg_flags);
+		if (err)
+			ip_flush_pending_frames(sk);
+		else if (!(msg->msg_flags & MSG_MORE))
+			err = ip_push_pending_frames(sk);
+		release_sock(sk);
+	}
+done:
+	if (free)
+		kfree(ipc.opt);
+	ip_rt_put(rt);
+
+out:	return err < 0 ? err : len;
+
+do_confirm:
+	dst_confirm(&rt->u.dst);
+	if (!(msg->msg_flags & MSG_PROBE) || len)
+		goto back_from_confirm;
+	err = 0;
+	goto done;
+}
+
+static void raw_close(struct sock *sk, long timeout)
+{
+        /*
+	 * Raw sockets may have direct kernel refereneces. Kill them.
+	 */
+	ip_ra_control(sk, 0, NULL);
+
+	sk_common_release(sk);
+}
+
+/* This gets rid of all the nasties in af_inet. -DaveM */
+static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
+	int ret = -EINVAL;
+	int chk_addr_ret;
+
+	if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
+		goto out;
+	chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr);
+	ret = -EADDRNOTAVAIL;
+	if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL &&
+	    chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
+		goto out;
+	inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr;
+	if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
+		inet->saddr = 0;  /* Use device */
+	sk_dst_reset(sk);
+	ret = 0;
+out:	return ret;
+}
+
+/*
+ *	This should be easy, if there is something there
+ *	we return it, otherwise we block.
+ */
+
+static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+		       size_t len, int noblock, int flags, int *addr_len)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	size_t copied = 0;
+	int err = -EOPNOTSUPP;
+	struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
+	struct sk_buff *skb;
+
+	if (flags & MSG_OOB)
+		goto out;
+
+	if (addr_len)
+		*addr_len = sizeof(*sin);
+
+	if (flags & MSG_ERRQUEUE) {
+		err = ip_recv_error(sk, msg, len);
+		goto out;
+	}
+
+	skb = skb_recv_datagram(sk, flags, noblock, &err);
+	if (!skb)
+		goto out;
+
+	copied = skb->len;
+	if (len < copied) {
+		msg->msg_flags |= MSG_TRUNC;
+		copied = len;
+	}
+
+	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	if (err)
+		goto done;
+
+	sock_recv_timestamp(msg, sk, skb);
+
+	/* Copy the address. */
+	if (sin) {
+		sin->sin_family = AF_INET;
+		sin->sin_addr.s_addr = skb->nh.iph->saddr;
+		memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
+	}
+	if (inet->cmsg_flags)
+		ip_cmsg_recv(msg, skb);
+	if (flags & MSG_TRUNC)
+		copied = skb->len;
+done:
+	skb_free_datagram(sk, skb);
+out:	return err ? err : copied;
+}
+
+static int raw_init(struct sock *sk)
+{
+	struct raw_sock *rp = raw_sk(sk);
+
+	if (inet_sk(sk)->num == IPPROTO_ICMP)
+		memset(&rp->filter, 0, sizeof(rp->filter));
+	return 0;
+}
+
+static int raw_seticmpfilter(struct sock *sk, char __user *optval, int optlen)
+{
+	if (optlen > sizeof(struct icmp_filter))
+		optlen = sizeof(struct icmp_filter);
+	if (copy_from_user(&raw_sk(sk)->filter, optval, optlen))
+		return -EFAULT;
+	return 0;
+}
+
+static int raw_geticmpfilter(struct sock *sk, char __user *optval, int __user *optlen)
+{
+	int len, ret = -EFAULT;
+
+	if (get_user(len, optlen))
+		goto out;
+	ret = -EINVAL;
+	if (len < 0)
+		goto out;
+	if (len > sizeof(struct icmp_filter))
+		len = sizeof(struct icmp_filter);
+	ret = -EFAULT;
+	if (put_user(len, optlen) ||
+	    copy_to_user(optval, &raw_sk(sk)->filter, len))
+		goto out;
+	ret = 0;
+out:	return ret;
+}
+
+static int raw_setsockopt(struct sock *sk, int level, int optname, 
+			  char __user *optval, int optlen)
+{
+	if (level != SOL_RAW)
+		return ip_setsockopt(sk, level, optname, optval, optlen);
+
+	if (optname == ICMP_FILTER) {
+		if (inet_sk(sk)->num != IPPROTO_ICMP)
+			return -EOPNOTSUPP;
+		else
+			return raw_seticmpfilter(sk, optval, optlen);
+	}
+	return -ENOPROTOOPT;
+}
+
+static int raw_getsockopt(struct sock *sk, int level, int optname, 
+			  char __user *optval, int __user *optlen)
+{
+	if (level != SOL_RAW)
+		return ip_getsockopt(sk, level, optname, optval, optlen);
+
+	if (optname == ICMP_FILTER) {
+		if (inet_sk(sk)->num != IPPROTO_ICMP)
+			return -EOPNOTSUPP;
+		else
+			return raw_geticmpfilter(sk, optval, optlen);
+	}
+	return -ENOPROTOOPT;
+}
+
+static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg)
+{
+	switch (cmd) {
+		case SIOCOUTQ: {
+			int amount = atomic_read(&sk->sk_wmem_alloc);
+			return put_user(amount, (int __user *)arg);
+		}
+		case SIOCINQ: {
+			struct sk_buff *skb;
+			int amount = 0;
+
+			spin_lock_irq(&sk->sk_receive_queue.lock);
+			skb = skb_peek(&sk->sk_receive_queue);
+			if (skb != NULL)
+				amount = skb->len;
+			spin_unlock_irq(&sk->sk_receive_queue.lock);
+			return put_user(amount, (int __user *)arg);
+		}
+
+		default:
+#ifdef CONFIG_IP_MROUTE
+			return ipmr_ioctl(sk, cmd, (void __user *)arg);
+#else
+			return -ENOIOCTLCMD;
+#endif
+	}
+}
+
+struct proto raw_prot = {
+	.name =		"RAW",
+	.owner =	THIS_MODULE,
+	.close =	raw_close,
+	.connect =	ip4_datagram_connect,
+	.disconnect =	udp_disconnect,
+	.ioctl =	raw_ioctl,
+	.init =		raw_init,
+	.setsockopt =	raw_setsockopt,
+	.getsockopt =	raw_getsockopt,
+	.sendmsg =	raw_sendmsg,
+	.recvmsg =	raw_recvmsg,
+	.bind =		raw_bind,
+	.backlog_rcv =	raw_rcv_skb,
+	.hash =		raw_v4_hash,
+	.unhash =	raw_v4_unhash,
+	.obj_size =	sizeof(struct raw_sock),
+};
+
+#ifdef CONFIG_PROC_FS
+struct raw_iter_state {
+	int bucket;
+};
+
+#define raw_seq_private(seq) ((struct raw_iter_state *)(seq)->private)
+
+static struct sock *raw_get_first(struct seq_file *seq)
+{
+	struct sock *sk;
+	struct raw_iter_state* state = raw_seq_private(seq);
+
+	for (state->bucket = 0; state->bucket < RAWV4_HTABLE_SIZE; ++state->bucket) {
+		struct hlist_node *node;
+
+		sk_for_each(sk, node, &raw_v4_htable[state->bucket])
+			if (sk->sk_family == PF_INET)
+				goto found;
+	}
+	sk = NULL;
+found:
+	return sk;
+}
+
+static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk)
+{
+	struct raw_iter_state* state = raw_seq_private(seq);
+
+	do {
+		sk = sk_next(sk);
+try_again:
+		;
+	} while (sk && sk->sk_family != PF_INET);
+
+	if (!sk && ++state->bucket < RAWV4_HTABLE_SIZE) {
+		sk = sk_head(&raw_v4_htable[state->bucket]);
+		goto try_again;
+	}
+	return sk;
+}
+
+static struct sock *raw_get_idx(struct seq_file *seq, loff_t pos)
+{
+	struct sock *sk = raw_get_first(seq);
+
+	if (sk)
+		while (pos && (sk = raw_get_next(seq, sk)) != NULL)
+			--pos;
+	return pos ? NULL : sk;
+}
+
+static void *raw_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	read_lock(&raw_v4_lock);
+	return *pos ? raw_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+}
+
+static void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct sock *sk;
+
+	if (v == SEQ_START_TOKEN)
+		sk = raw_get_first(seq);
+	else
+		sk = raw_get_next(seq, v);
+	++*pos;
+	return sk;
+}
+
+static void raw_seq_stop(struct seq_file *seq, void *v)
+{
+	read_unlock(&raw_v4_lock);
+}
+
+static __inline__ char *get_raw_sock(struct sock *sp, char *tmpbuf, int i)
+{
+	struct inet_sock *inet = inet_sk(sp);
+	unsigned int dest = inet->daddr,
+		     src = inet->rcv_saddr;
+	__u16 destp = 0,
+	      srcp  = inet->num;
+
+	sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
+		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p",
+		i, src, srcp, dest, destp, sp->sk_state, 
+		atomic_read(&sp->sk_wmem_alloc),
+		atomic_read(&sp->sk_rmem_alloc),
+		0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
+		atomic_read(&sp->sk_refcnt), sp);
+	return tmpbuf;
+}
+
+static int raw_seq_show(struct seq_file *seq, void *v)
+{
+	char tmpbuf[129];
+
+	if (v == SEQ_START_TOKEN)
+		seq_printf(seq, "%-127s\n",
+			       "  sl  local_address rem_address   st tx_queue "
+			       "rx_queue tr tm->when retrnsmt   uid  timeout "
+			       "inode");
+	else {
+		struct raw_iter_state *state = raw_seq_private(seq);
+
+		seq_printf(seq, "%-127s\n",
+			   get_raw_sock(v, tmpbuf, state->bucket));
+	}
+	return 0;
+}
+
+static struct seq_operations raw_seq_ops = {
+	.start = raw_seq_start,
+	.next  = raw_seq_next,
+	.stop  = raw_seq_stop,
+	.show  = raw_seq_show,
+};
+
+static int raw_seq_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	int rc = -ENOMEM;
+	struct raw_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+
+	if (!s)
+		goto out;
+	rc = seq_open(file, &raw_seq_ops);
+	if (rc)
+		goto out_kfree;
+
+	seq = file->private_data;
+	seq->private = s;
+	memset(s, 0, sizeof(*s));
+out:
+	return rc;
+out_kfree:
+	kfree(s);
+	goto out;
+}
+
+static struct file_operations raw_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open	 = raw_seq_open,
+	.read	 = seq_read,
+	.llseek	 = seq_lseek,
+	.release = seq_release_private,
+};
+
+int __init raw_proc_init(void)
+{
+	if (!proc_net_fops_create("raw", S_IRUGO, &raw_seq_fops))
+		return -ENOMEM;
+	return 0;
+}
+
+void __init raw_proc_exit(void)
+{
+	proc_net_remove("raw");
+}
+#endif /* CONFIG_PROC_FS */
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
new file mode 100644
index 00000000000..9f91a116d91
--- /dev/null
+++ b/net/ipv4/route.c
@@ -0,0 +1,3177 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		ROUTE - implementation of the IP router.
+ *
+ * Version:	$Id: route.c,v 1.103 2002/01/12 07:44:09 davem Exp $
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Alan Cox, <gw4pts@gw4pts.ampr.org>
+ *		Linus Torvalds, <Linus.Torvalds@helsinki.fi>
+ *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * Fixes:
+ *		Alan Cox	:	Verify area fixes.
+ *		Alan Cox	:	cli() protects routing changes
+ *		Rui Oliveira	:	ICMP routing table updates
+ *		(rco@di.uminho.pt)	Routing table insertion and update
+ *		Linus Torvalds	:	Rewrote bits to be sensible
+ *		Alan Cox	:	Added BSD route gw semantics
+ *		Alan Cox	:	Super /proc >4K 
+ *		Alan Cox	:	MTU in route table
+ *		Alan Cox	: 	MSS actually. Also added the window
+ *					clamper.
+ *		Sam Lantinga	:	Fixed route matching in rt_del()
+ *		Alan Cox	:	Routing cache support.
+ *		Alan Cox	:	Removed compatibility cruft.
+ *		Alan Cox	:	RTF_REJECT support.
+ *		Alan Cox	:	TCP irtt support.
+ *		Jonathan Naylor	:	Added Metric support.
+ *	Miquel van Smoorenburg	:	BSD API fixes.
+ *	Miquel van Smoorenburg	:	Metrics.
+ *		Alan Cox	:	Use __u32 properly
+ *		Alan Cox	:	Aligned routing errors more closely with BSD
+ *					our system is still very different.
+ *		Alan Cox	:	Faster /proc handling
+ *	Alexey Kuznetsov	:	Massive rework to support tree based routing,
+ *					routing caches and better behaviour.
+ *		
+ *		Olaf Erb	:	irtt wasn't being copied right.
+ *		Bjorn Ekwall	:	Kerneld route support.
+ *		Alan Cox	:	Multicast fixed (I hope)
+ * 		Pavel Krauz	:	Limited broadcast fixed
+ *		Mike McLagan	:	Routing by source
+ *	Alexey Kuznetsov	:	End of old history. Split to fib.c and
+ *					route.c and rewritten from scratch.
+ *		Andi Kleen	:	Load-limit warning messages.
+ *	Vitaly E. Lavrov	:	Transparent proxy revived after year coma.
+ *	Vitaly E. Lavrov	:	Race condition in ip_route_input_slow.
+ *	Tobias Ringstrom	:	Uninitialized res.type in ip_route_output_slow.
+ *	Vladimir V. Ivanov	:	IP rule info (flowid) is really useful.
+ *		Marc Boucher	:	routing by fwmark
+ *	Robert Olsson		:	Added rt_cache statistics
+ *	Arnaldo C. Melo		:	Convert proc stuff to seq_file
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/errno.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/inetdevice.h>
+#include <linux/igmp.h>
+#include <linux/pkt_sched.h>
+#include <linux/mroute.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/random.h>
+#include <linux/jhash.h>
+#include <linux/rcupdate.h>
+#include <linux/times.h>
+#include <net/protocol.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <net/inetpeer.h>
+#include <net/sock.h>
+#include <net/ip_fib.h>
+#include <net/arp.h>
+#include <net/tcp.h>
+#include <net/icmp.h>
+#include <net/xfrm.h>
+#include <net/ip_mp_alg.h>
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+
+#define RT_FL_TOS(oldflp) \
+    ((u32)(oldflp->fl4_tos & (IPTOS_RT_MASK | RTO_ONLINK)))
+
+#define IP_MAX_MTU	0xFFF0
+
+#define RT_GC_TIMEOUT (300*HZ)
+
+static int ip_rt_min_delay		= 2 * HZ;
+static int ip_rt_max_delay		= 10 * HZ;
+static int ip_rt_max_size;
+static int ip_rt_gc_timeout		= RT_GC_TIMEOUT;
+static int ip_rt_gc_interval		= 60 * HZ;
+static int ip_rt_gc_min_interval	= HZ / 2;
+static int ip_rt_redirect_number	= 9;
+static int ip_rt_redirect_load		= HZ / 50;
+static int ip_rt_redirect_silence	= ((HZ / 50) << (9 + 1));
+static int ip_rt_error_cost		= HZ;
+static int ip_rt_error_burst		= 5 * HZ;
+static int ip_rt_gc_elasticity		= 8;
+static int ip_rt_mtu_expires		= 10 * 60 * HZ;
+static int ip_rt_min_pmtu		= 512 + 20 + 20;
+static int ip_rt_min_advmss		= 256;
+static int ip_rt_secret_interval	= 10 * 60 * HZ;
+static unsigned long rt_deadline;
+
+#define RTprint(a...)	printk(KERN_DEBUG a)
+
+static struct timer_list rt_flush_timer;
+static struct timer_list rt_periodic_timer;
+static struct timer_list rt_secret_timer;
+
+/*
+ *	Interface to generic destination cache.
+ */
+
+static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
+static void		 ipv4_dst_destroy(struct dst_entry *dst);
+static void		 ipv4_dst_ifdown(struct dst_entry *dst,
+					 struct net_device *dev, int how);
+static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
+static void		 ipv4_link_failure(struct sk_buff *skb);
+static void		 ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
+static int rt_garbage_collect(void);
+
+
+static struct dst_ops ipv4_dst_ops = {
+	.family =		AF_INET,
+	.protocol =		__constant_htons(ETH_P_IP),
+	.gc =			rt_garbage_collect,
+	.check =		ipv4_dst_check,
+	.destroy =		ipv4_dst_destroy,
+	.ifdown =		ipv4_dst_ifdown,
+	.negative_advice =	ipv4_negative_advice,
+	.link_failure =		ipv4_link_failure,
+	.update_pmtu =		ip_rt_update_pmtu,
+	.entry_size =		sizeof(struct rtable),
+};
+
+#define ECN_OR_COST(class)	TC_PRIO_##class
+
+__u8 ip_tos2prio[16] = {
+	TC_PRIO_BESTEFFORT,
+	ECN_OR_COST(FILLER),
+	TC_PRIO_BESTEFFORT,
+	ECN_OR_COST(BESTEFFORT),
+	TC_PRIO_BULK,
+	ECN_OR_COST(BULK),
+	TC_PRIO_BULK,
+	ECN_OR_COST(BULK),
+	TC_PRIO_INTERACTIVE,
+	ECN_OR_COST(INTERACTIVE),
+	TC_PRIO_INTERACTIVE,
+	ECN_OR_COST(INTERACTIVE),
+	TC_PRIO_INTERACTIVE_BULK,
+	ECN_OR_COST(INTERACTIVE_BULK),
+	TC_PRIO_INTERACTIVE_BULK,
+	ECN_OR_COST(INTERACTIVE_BULK)
+};
+
+
+/*
+ * Route cache.
+ */
+
+/* The locking scheme is rather straight forward:
+ *
+ * 1) Read-Copy Update protects the buckets of the central route hash.
+ * 2) Only writers remove entries, and they hold the lock
+ *    as they look at rtable reference counts.
+ * 3) Only readers acquire references to rtable entries,
+ *    they do so with atomic increments and with the
+ *    lock held.
+ */
+
+struct rt_hash_bucket {
+	struct rtable	*chain;
+	spinlock_t	lock;
+} __attribute__((__aligned__(8)));
+
+static struct rt_hash_bucket 	*rt_hash_table;
+static unsigned			rt_hash_mask;
+static int			rt_hash_log;
+static unsigned int		rt_hash_rnd;
+
+struct rt_cache_stat *rt_cache_stat;
+
+static int rt_intern_hash(unsigned hash, struct rtable *rth,
+				struct rtable **res);
+
+static unsigned int rt_hash_code(u32 daddr, u32 saddr, u8 tos)
+{
+	return (jhash_3words(daddr, saddr, (u32) tos, rt_hash_rnd)
+		& rt_hash_mask);
+}
+
+#ifdef CONFIG_PROC_FS
+struct rt_cache_iter_state {
+	int bucket;
+};
+
+static struct rtable *rt_cache_get_first(struct seq_file *seq)
+{
+	struct rtable *r = NULL;
+	struct rt_cache_iter_state *st = seq->private;
+
+	for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) {
+		rcu_read_lock_bh();
+		r = rt_hash_table[st->bucket].chain;
+		if (r)
+			break;
+		rcu_read_unlock_bh();
+	}
+	return r;
+}
+
+static struct rtable *rt_cache_get_next(struct seq_file *seq, struct rtable *r)
+{
+	struct rt_cache_iter_state *st = rcu_dereference(seq->private);
+
+	r = r->u.rt_next;
+	while (!r) {
+		rcu_read_unlock_bh();
+		if (--st->bucket < 0)
+			break;
+		rcu_read_lock_bh();
+		r = rt_hash_table[st->bucket].chain;
+	}
+	return r;
+}
+
+static struct rtable *rt_cache_get_idx(struct seq_file *seq, loff_t pos)
+{
+	struct rtable *r = rt_cache_get_first(seq);
+
+	if (r)
+		while (pos && (r = rt_cache_get_next(seq, r)))
+			--pos;
+	return pos ? NULL : r;
+}
+
+static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	return *pos ? rt_cache_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+}
+
+static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct rtable *r = NULL;
+
+	if (v == SEQ_START_TOKEN)
+		r = rt_cache_get_first(seq);
+	else
+		r = rt_cache_get_next(seq, v);
+	++*pos;
+	return r;
+}
+
+static void rt_cache_seq_stop(struct seq_file *seq, void *v)
+{
+	if (v && v != SEQ_START_TOKEN)
+		rcu_read_unlock_bh();
+}
+
+static int rt_cache_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN)
+		seq_printf(seq, "%-127s\n",
+			   "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
+			   "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
+			   "HHUptod\tSpecDst");
+	else {
+		struct rtable *r = v;
+		char temp[256];
+
+		sprintf(temp, "%s\t%08lX\t%08lX\t%8X\t%d\t%u\t%d\t"
+			      "%08lX\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X",
+			r->u.dst.dev ? r->u.dst.dev->name : "*",
+			(unsigned long)r->rt_dst, (unsigned long)r->rt_gateway,
+			r->rt_flags, atomic_read(&r->u.dst.__refcnt),
+			r->u.dst.__use, 0, (unsigned long)r->rt_src,
+			(dst_metric(&r->u.dst, RTAX_ADVMSS) ?
+			     (int)dst_metric(&r->u.dst, RTAX_ADVMSS) + 40 : 0),
+			dst_metric(&r->u.dst, RTAX_WINDOW),
+			(int)((dst_metric(&r->u.dst, RTAX_RTT) >> 3) +
+			      dst_metric(&r->u.dst, RTAX_RTTVAR)),
+			r->fl.fl4_tos,
+			r->u.dst.hh ? atomic_read(&r->u.dst.hh->hh_refcnt) : -1,
+			r->u.dst.hh ? (r->u.dst.hh->hh_output ==
+				       dev_queue_xmit) : 0,
+			r->rt_spec_dst);
+		seq_printf(seq, "%-127s\n", temp);
+        }
+  	return 0;
+}
+
+static struct seq_operations rt_cache_seq_ops = {
+	.start  = rt_cache_seq_start,
+	.next   = rt_cache_seq_next,
+	.stop   = rt_cache_seq_stop,
+	.show   = rt_cache_seq_show,
+};
+
+static int rt_cache_seq_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	int rc = -ENOMEM;
+	struct rt_cache_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+
+	if (!s)
+		goto out;
+	rc = seq_open(file, &rt_cache_seq_ops);
+	if (rc)
+		goto out_kfree;
+	seq          = file->private_data;
+	seq->private = s;
+	memset(s, 0, sizeof(*s));
+out:
+	return rc;
+out_kfree:
+	kfree(s);
+	goto out;
+}
+
+static struct file_operations rt_cache_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open	 = rt_cache_seq_open,
+	.read	 = seq_read,
+	.llseek	 = seq_lseek,
+	.release = seq_release_private,
+};
+
+
+static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	int cpu;
+
+	if (*pos == 0)
+		return SEQ_START_TOKEN;
+
+	for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) {
+		if (!cpu_possible(cpu))
+			continue;
+		*pos = cpu+1;
+		return per_cpu_ptr(rt_cache_stat, cpu);
+	}
+	return NULL;
+}
+
+static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	int cpu;
+
+	for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
+		if (!cpu_possible(cpu))
+			continue;
+		*pos = cpu+1;
+		return per_cpu_ptr(rt_cache_stat, cpu);
+	}
+	return NULL;
+	
+}
+
+static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
+{
+
+}
+
+static int rt_cpu_seq_show(struct seq_file *seq, void *v)
+{
+	struct rt_cache_stat *st = v;
+
+	if (v == SEQ_START_TOKEN) {
+		seq_printf(seq, "entries  in_hit in_slow_tot in_no_route in_brd in_martian_dst in_martian_src  out_hit out_slow_tot out_slow_mc  gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
+		return 0;
+	}
+	
+	seq_printf(seq,"%08x  %08x %08x %08x %08x %08x %08x %08x "
+		   " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
+		   atomic_read(&ipv4_dst_ops.entries),
+		   st->in_hit,
+		   st->in_slow_tot,
+		   st->in_slow_mc,
+		   st->in_no_route,
+		   st->in_brd,
+		   st->in_martian_dst,
+		   st->in_martian_src,
+
+		   st->out_hit,
+		   st->out_slow_tot,
+		   st->out_slow_mc, 
+
+		   st->gc_total,
+		   st->gc_ignored,
+		   st->gc_goal_miss,
+		   st->gc_dst_overflow,
+		   st->in_hlist_search,
+		   st->out_hlist_search
+		);
+	return 0;
+}
+
+static struct seq_operations rt_cpu_seq_ops = {
+	.start  = rt_cpu_seq_start,
+	.next   = rt_cpu_seq_next,
+	.stop   = rt_cpu_seq_stop,
+	.show   = rt_cpu_seq_show,
+};
+
+
+static int rt_cpu_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &rt_cpu_seq_ops);
+}
+
+static struct file_operations rt_cpu_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open	 = rt_cpu_seq_open,
+	.read	 = seq_read,
+	.llseek	 = seq_lseek,
+	.release = seq_release,
+};
+
+#endif /* CONFIG_PROC_FS */
+  
+static __inline__ void rt_free(struct rtable *rt)
+{
+	multipath_remove(rt);
+	call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
+}
+
+static __inline__ void rt_drop(struct rtable *rt)
+{
+	multipath_remove(rt);
+	ip_rt_put(rt);
+	call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
+}
+
+static __inline__ int rt_fast_clean(struct rtable *rth)
+{
+	/* Kill broadcast/multicast entries very aggresively, if they
+	   collide in hash table with more useful entries */
+	return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) &&
+		rth->fl.iif && rth->u.rt_next;
+}
+
+static __inline__ int rt_valuable(struct rtable *rth)
+{
+	return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) ||
+		rth->u.dst.expires;
+}
+
+static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2)
+{
+	unsigned long age;
+	int ret = 0;
+
+	if (atomic_read(&rth->u.dst.__refcnt))
+		goto out;
+
+	ret = 1;
+	if (rth->u.dst.expires &&
+	    time_after_eq(jiffies, rth->u.dst.expires))
+		goto out;
+
+	age = jiffies - rth->u.dst.lastuse;
+	ret = 0;
+	if ((age <= tmo1 && !rt_fast_clean(rth)) ||
+	    (age <= tmo2 && rt_valuable(rth)))
+		goto out;
+	ret = 1;
+out:	return ret;
+}
+
+/* Bits of score are:
+ * 31: very valuable
+ * 30: not quite useless
+ * 29..0: usage counter
+ */
+static inline u32 rt_score(struct rtable *rt)
+{
+	u32 score = jiffies - rt->u.dst.lastuse;
+
+	score = ~score & ~(3<<30);
+
+	if (rt_valuable(rt))
+		score |= (1<<31);
+
+	if (!rt->fl.iif ||
+	    !(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL)))
+		score |= (1<<30);
+
+	return score;
+}
+
+static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
+{
+	return memcmp(&fl1->nl_u.ip4_u, &fl2->nl_u.ip4_u, sizeof(fl1->nl_u.ip4_u)) == 0 &&
+	       fl1->oif     == fl2->oif &&
+	       fl1->iif     == fl2->iif;
+}
+
+#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+static struct rtable **rt_remove_balanced_route(struct rtable **chain_head,
+						struct rtable *expentry,
+						int *removed_count)
+{
+	int passedexpired = 0;
+	struct rtable **nextstep = NULL;
+	struct rtable **rthp = chain_head;
+	struct rtable *rth;
+
+	if (removed_count)
+		*removed_count = 0;
+
+	while ((rth = *rthp) != NULL) {
+		if (rth == expentry)
+			passedexpired = 1;
+
+		if (((*rthp)->u.dst.flags & DST_BALANCED) != 0  &&
+		    compare_keys(&(*rthp)->fl, &expentry->fl)) {
+			if (*rthp == expentry) {
+				*rthp = rth->u.rt_next;
+				continue;
+			} else {
+				*rthp = rth->u.rt_next;
+				rt_free(rth);
+				if (removed_count)
+					++(*removed_count);
+			}
+		} else {
+			if (!((*rthp)->u.dst.flags & DST_BALANCED) &&
+			    passedexpired && !nextstep)
+				nextstep = &rth->u.rt_next;
+
+			rthp = &rth->u.rt_next;
+		}
+	}
+
+	rt_free(expentry);
+	if (removed_count)
+		++(*removed_count);
+
+	return nextstep;
+}
+#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
+
+
+/* This runs via a timer and thus is always in BH context. */
+static void rt_check_expire(unsigned long dummy)
+{
+	static int rover;
+	int i = rover, t;
+	struct rtable *rth, **rthp;
+	unsigned long now = jiffies;
+
+	for (t = ip_rt_gc_interval << rt_hash_log; t >= 0;
+	     t -= ip_rt_gc_timeout) {
+		unsigned long tmo = ip_rt_gc_timeout;
+
+		i = (i + 1) & rt_hash_mask;
+		rthp = &rt_hash_table[i].chain;
+
+		spin_lock(&rt_hash_table[i].lock);
+		while ((rth = *rthp) != NULL) {
+			if (rth->u.dst.expires) {
+				/* Entry is expired even if it is in use */
+				if (time_before_eq(now, rth->u.dst.expires)) {
+					tmo >>= 1;
+					rthp = &rth->u.rt_next;
+					continue;
+				}
+			} else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) {
+				tmo >>= 1;
+				rthp = &rth->u.rt_next;
+				continue;
+			}
+
+			/* Cleanup aged off entries. */
+#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+			/* remove all related balanced entries if necessary */
+			if (rth->u.dst.flags & DST_BALANCED) {
+				rthp = rt_remove_balanced_route(
+					&rt_hash_table[i].chain,
+					rth, NULL);
+				if (!rthp)
+					break;
+			} else {
+				*rthp = rth->u.rt_next;
+				rt_free(rth);
+			}
+#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
+ 			*rthp = rth->u.rt_next;
+ 			rt_free(rth);
+#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
+		}
+		spin_unlock(&rt_hash_table[i].lock);
+
+		/* Fallback loop breaker. */
+		if (time_after(jiffies, now))
+			break;
+	}
+	rover = i;
+	mod_timer(&rt_periodic_timer, now + ip_rt_gc_interval);
+}
+
+/* This can run from both BH and non-BH contexts, the latter
+ * in the case of a forced flush event.
+ */
+static void rt_run_flush(unsigned long dummy)
+{
+	int i;
+	struct rtable *rth, *next;
+
+	rt_deadline = 0;
+
+	get_random_bytes(&rt_hash_rnd, 4);
+
+	for (i = rt_hash_mask; i >= 0; i--) {
+		spin_lock_bh(&rt_hash_table[i].lock);
+		rth = rt_hash_table[i].chain;
+		if (rth)
+			rt_hash_table[i].chain = NULL;
+		spin_unlock_bh(&rt_hash_table[i].lock);
+
+		for (; rth; rth = next) {
+			next = rth->u.rt_next;
+			rt_free(rth);
+		}
+	}
+}
+
+static DEFINE_SPINLOCK(rt_flush_lock);
+
+void rt_cache_flush(int delay)
+{
+	unsigned long now = jiffies;
+	int user_mode = !in_softirq();
+
+	if (delay < 0)
+		delay = ip_rt_min_delay;
+
+	/* flush existing multipath state*/
+	multipath_flush();
+
+	spin_lock_bh(&rt_flush_lock);
+
+	if (del_timer(&rt_flush_timer) && delay > 0 && rt_deadline) {
+		long tmo = (long)(rt_deadline - now);
+
+		/* If flush timer is already running
+		   and flush request is not immediate (delay > 0):
+
+		   if deadline is not achieved, prolongate timer to "delay",
+		   otherwise fire it at deadline time.
+		 */
+
+		if (user_mode && tmo < ip_rt_max_delay-ip_rt_min_delay)
+			tmo = 0;
+		
+		if (delay > tmo)
+			delay = tmo;
+	}
+
+	if (delay <= 0) {
+		spin_unlock_bh(&rt_flush_lock);
+		rt_run_flush(0);
+		return;
+	}
+
+	if (rt_deadline == 0)
+		rt_deadline = now + ip_rt_max_delay;
+
+	mod_timer(&rt_flush_timer, now+delay);
+	spin_unlock_bh(&rt_flush_lock);
+}
+
+static void rt_secret_rebuild(unsigned long dummy)
+{
+	unsigned long now = jiffies;
+
+	rt_cache_flush(0);
+	mod_timer(&rt_secret_timer, now + ip_rt_secret_interval);
+}
+
+/*
+   Short description of GC goals.
+
+   We want to build algorithm, which will keep routing cache
+   at some equilibrium point, when number of aged off entries
+   is kept approximately equal to newly generated ones.
+
+   Current expiration strength is variable "expire".
+   We try to adjust it dynamically, so that if networking
+   is idle expires is large enough to keep enough of warm entries,
+   and when load increases it reduces to limit cache size.
+ */
+
+static int rt_garbage_collect(void)
+{
+	static unsigned long expire = RT_GC_TIMEOUT;
+	static unsigned long last_gc;
+	static int rover;
+	static int equilibrium;
+	struct rtable *rth, **rthp;
+	unsigned long now = jiffies;
+	int goal;
+
+	/*
+	 * Garbage collection is pretty expensive,
+	 * do not make it too frequently.
+	 */
+
+	RT_CACHE_STAT_INC(gc_total);
+
+	if (now - last_gc < ip_rt_gc_min_interval &&
+	    atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) {
+		RT_CACHE_STAT_INC(gc_ignored);
+		goto out;
+	}
+
+	/* Calculate number of entries, which we want to expire now. */
+	goal = atomic_read(&ipv4_dst_ops.entries) -
+		(ip_rt_gc_elasticity << rt_hash_log);
+	if (goal <= 0) {
+		if (equilibrium < ipv4_dst_ops.gc_thresh)
+			equilibrium = ipv4_dst_ops.gc_thresh;
+		goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium;
+		if (goal > 0) {
+			equilibrium += min_t(unsigned int, goal / 2, rt_hash_mask + 1);
+			goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium;
+		}
+	} else {
+		/* We are in dangerous area. Try to reduce cache really
+		 * aggressively.
+		 */
+		goal = max_t(unsigned int, goal / 2, rt_hash_mask + 1);
+		equilibrium = atomic_read(&ipv4_dst_ops.entries) - goal;
+	}
+
+	if (now - last_gc >= ip_rt_gc_min_interval)
+		last_gc = now;
+
+	if (goal <= 0) {
+		equilibrium += goal;
+		goto work_done;
+	}
+
+	do {
+		int i, k;
+
+		for (i = rt_hash_mask, k = rover; i >= 0; i--) {
+			unsigned long tmo = expire;
+
+			k = (k + 1) & rt_hash_mask;
+			rthp = &rt_hash_table[k].chain;
+			spin_lock_bh(&rt_hash_table[k].lock);
+			while ((rth = *rthp) != NULL) {
+				if (!rt_may_expire(rth, tmo, expire)) {
+					tmo >>= 1;
+					rthp = &rth->u.rt_next;
+					continue;
+				}
+#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+				/* remove all related balanced entries
+				 * if necessary
+				 */
+				if (rth->u.dst.flags & DST_BALANCED) {
+					int r;
+
+					rthp = rt_remove_balanced_route(
+						&rt_hash_table[i].chain,
+						rth,
+						&r);
+					goal -= r;
+					if (!rthp)
+						break;
+				} else {
+					*rthp = rth->u.rt_next;
+					rt_free(rth);
+					goal--;
+				}
+#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
+				*rthp = rth->u.rt_next;
+				rt_free(rth);
+				goal--;
+#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
+			}
+			spin_unlock_bh(&rt_hash_table[k].lock);
+			if (goal <= 0)
+				break;
+		}
+		rover = k;
+
+		if (goal <= 0)
+			goto work_done;
+
+		/* Goal is not achieved. We stop process if:
+
+		   - if expire reduced to zero. Otherwise, expire is halfed.
+		   - if table is not full.
+		   - if we are called from interrupt.
+		   - jiffies check is just fallback/debug loop breaker.
+		     We will not spin here for long time in any case.
+		 */
+
+		RT_CACHE_STAT_INC(gc_goal_miss);
+
+		if (expire == 0)
+			break;
+
+		expire >>= 1;
+#if RT_CACHE_DEBUG >= 2
+		printk(KERN_DEBUG "expire>> %u %d %d %d\n", expire,
+				atomic_read(&ipv4_dst_ops.entries), goal, i);
+#endif
+
+		if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size)
+			goto out;
+	} while (!in_softirq() && time_before_eq(jiffies, now));
+
+	if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size)
+		goto out;
+	if (net_ratelimit())
+		printk(KERN_WARNING "dst cache overflow\n");
+	RT_CACHE_STAT_INC(gc_dst_overflow);
+	return 1;
+
+work_done:
+	expire += ip_rt_gc_min_interval;
+	if (expire > ip_rt_gc_timeout ||
+	    atomic_read(&ipv4_dst_ops.entries) < ipv4_dst_ops.gc_thresh)
+		expire = ip_rt_gc_timeout;
+#if RT_CACHE_DEBUG >= 2
+	printk(KERN_DEBUG "expire++ %u %d %d %d\n", expire,
+			atomic_read(&ipv4_dst_ops.entries), goal, rover);
+#endif
+out:	return 0;
+}
+
+static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp)
+{
+	struct rtable	*rth, **rthp;
+	unsigned long	now;
+	struct rtable *cand, **candp;
+	u32 		min_score;
+	int		chain_length;
+	int attempts = !in_softirq();
+
+restart:
+	chain_length = 0;
+	min_score = ~(u32)0;
+	cand = NULL;
+	candp = NULL;
+	now = jiffies;
+
+	rthp = &rt_hash_table[hash].chain;
+
+	spin_lock_bh(&rt_hash_table[hash].lock);
+	while ((rth = *rthp) != NULL) {
+#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+		if (!(rth->u.dst.flags & DST_BALANCED) &&
+		    compare_keys(&rth->fl, &rt->fl)) {
+#else
+		if (compare_keys(&rth->fl, &rt->fl)) {
+#endif
+			/* Put it first */
+			*rthp = rth->u.rt_next;
+			/*
+			 * Since lookup is lockfree, the deletion
+			 * must be visible to another weakly ordered CPU before
+			 * the insertion at the start of the hash chain.
+			 */
+			rcu_assign_pointer(rth->u.rt_next,
+					   rt_hash_table[hash].chain);
+			/*
+			 * Since lookup is lockfree, the update writes
+			 * must be ordered for consistency on SMP.
+			 */
+			rcu_assign_pointer(rt_hash_table[hash].chain, rth);
+
+			rth->u.dst.__use++;
+			dst_hold(&rth->u.dst);
+			rth->u.dst.lastuse = now;
+			spin_unlock_bh(&rt_hash_table[hash].lock);
+
+			rt_drop(rt);
+			*rp = rth;
+			return 0;
+		}
+
+		if (!atomic_read(&rth->u.dst.__refcnt)) {
+			u32 score = rt_score(rth);
+
+			if (score <= min_score) {
+				cand = rth;
+				candp = rthp;
+				min_score = score;
+			}
+		}
+
+		chain_length++;
+
+		rthp = &rth->u.rt_next;
+	}
+
+	if (cand) {
+		/* ip_rt_gc_elasticity used to be average length of chain
+		 * length, when exceeded gc becomes really aggressive.
+		 *
+		 * The second limit is less certain. At the moment it allows
+		 * only 2 entries per bucket. We will see.
+		 */
+		if (chain_length > ip_rt_gc_elasticity) {
+			*candp = cand->u.rt_next;
+			rt_free(cand);
+		}
+	}
+
+	/* Try to bind route to arp only if it is output
+	   route or unicast forwarding path.
+	 */
+	if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
+		int err = arp_bind_neighbour(&rt->u.dst);
+		if (err) {
+			spin_unlock_bh(&rt_hash_table[hash].lock);
+
+			if (err != -ENOBUFS) {
+				rt_drop(rt);
+				return err;
+			}
+
+			/* Neighbour tables are full and nothing
+			   can be released. Try to shrink route cache,
+			   it is most likely it holds some neighbour records.
+			 */
+			if (attempts-- > 0) {
+				int saved_elasticity = ip_rt_gc_elasticity;
+				int saved_int = ip_rt_gc_min_interval;
+				ip_rt_gc_elasticity	= 1;
+				ip_rt_gc_min_interval	= 0;
+				rt_garbage_collect();
+				ip_rt_gc_min_interval	= saved_int;
+				ip_rt_gc_elasticity	= saved_elasticity;
+				goto restart;
+			}
+
+			if (net_ratelimit())
+				printk(KERN_WARNING "Neighbour table overflow.\n");
+			rt_drop(rt);
+			return -ENOBUFS;
+		}
+	}
+
+	rt->u.rt_next = rt_hash_table[hash].chain;
+#if RT_CACHE_DEBUG >= 2
+	if (rt->u.rt_next) {
+		struct rtable *trt;
+		printk(KERN_DEBUG "rt_cache @%02x: %u.%u.%u.%u", hash,
+		       NIPQUAD(rt->rt_dst));
+		for (trt = rt->u.rt_next; trt; trt = trt->u.rt_next)
+			printk(" . %u.%u.%u.%u", NIPQUAD(trt->rt_dst));
+		printk("\n");
+	}
+#endif
+	rt_hash_table[hash].chain = rt;
+	spin_unlock_bh(&rt_hash_table[hash].lock);
+	*rp = rt;
+	return 0;
+}
+
+void rt_bind_peer(struct rtable *rt, int create)
+{
+	static DEFINE_SPINLOCK(rt_peer_lock);
+	struct inet_peer *peer;
+
+	peer = inet_getpeer(rt->rt_dst, create);
+
+	spin_lock_bh(&rt_peer_lock);
+	if (rt->peer == NULL) {
+		rt->peer = peer;
+		peer = NULL;
+	}
+	spin_unlock_bh(&rt_peer_lock);
+	if (peer)
+		inet_putpeer(peer);
+}
+
+/*
+ * Peer allocation may fail only in serious out-of-memory conditions.  However
+ * we still can generate some output.
+ * Random ID selection looks a bit dangerous because we have no chances to
+ * select ID being unique in a reasonable period of time.
+ * But broken packet identifier may be better than no packet at all.
+ */
+static void ip_select_fb_ident(struct iphdr *iph)
+{
+	static DEFINE_SPINLOCK(ip_fb_id_lock);
+	static u32 ip_fallback_id;
+	u32 salt;
+
+	spin_lock_bh(&ip_fb_id_lock);
+	salt = secure_ip_id(ip_fallback_id ^ iph->daddr);
+	iph->id = htons(salt & 0xFFFF);
+	ip_fallback_id = salt;
+	spin_unlock_bh(&ip_fb_id_lock);
+}
+
+void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
+{
+	struct rtable *rt = (struct rtable *) dst;
+
+	if (rt) {
+		if (rt->peer == NULL)
+			rt_bind_peer(rt, 1);
+
+		/* If peer is attached to destination, it is never detached,
+		   so that we need not to grab a lock to dereference it.
+		 */
+		if (rt->peer) {
+			iph->id = htons(inet_getid(rt->peer, more));
+			return;
+		}
+	} else
+		printk(KERN_DEBUG "rt_bind_peer(0) @%p\n", NET_CALLER(iph));
+
+	ip_select_fb_ident(iph);
+}
+
+static void rt_del(unsigned hash, struct rtable *rt)
+{
+	struct rtable **rthp;
+
+	spin_lock_bh(&rt_hash_table[hash].lock);
+	ip_rt_put(rt);
+	for (rthp = &rt_hash_table[hash].chain; *rthp;
+	     rthp = &(*rthp)->u.rt_next)
+		if (*rthp == rt) {
+			*rthp = rt->u.rt_next;
+			rt_free(rt);
+			break;
+		}
+	spin_unlock_bh(&rt_hash_table[hash].lock);
+}
+
+void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
+		    u32 saddr, u8 tos, struct net_device *dev)
+{
+	int i, k;
+	struct in_device *in_dev = in_dev_get(dev);
+	struct rtable *rth, **rthp;
+	u32  skeys[2] = { saddr, 0 };
+	int  ikeys[2] = { dev->ifindex, 0 };
+
+	tos &= IPTOS_RT_MASK;
+
+	if (!in_dev)
+		return;
+
+	if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev)
+	    || MULTICAST(new_gw) || BADCLASS(new_gw) || ZERONET(new_gw))
+		goto reject_redirect;
+
+	if (!IN_DEV_SHARED_MEDIA(in_dev)) {
+		if (!inet_addr_onlink(in_dev, new_gw, old_gw))
+			goto reject_redirect;
+		if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
+			goto reject_redirect;
+	} else {
+		if (inet_addr_type(new_gw) != RTN_UNICAST)
+			goto reject_redirect;
+	}
+
+	for (i = 0; i < 2; i++) {
+		for (k = 0; k < 2; k++) {
+			unsigned hash = rt_hash_code(daddr,
+						     skeys[i] ^ (ikeys[k] << 5),
+						     tos);
+
+			rthp=&rt_hash_table[hash].chain;
+
+			rcu_read_lock();
+			while ((rth = rcu_dereference(*rthp)) != NULL) {
+				struct rtable *rt;
+
+				if (rth->fl.fl4_dst != daddr ||
+				    rth->fl.fl4_src != skeys[i] ||
+				    rth->fl.fl4_tos != tos ||
+				    rth->fl.oif != ikeys[k] ||
+				    rth->fl.iif != 0) {
+					rthp = &rth->u.rt_next;
+					continue;
+				}
+
+				if (rth->rt_dst != daddr ||
+				    rth->rt_src != saddr ||
+				    rth->u.dst.error ||
+				    rth->rt_gateway != old_gw ||
+				    rth->u.dst.dev != dev)
+					break;
+
+				dst_hold(&rth->u.dst);
+				rcu_read_unlock();
+
+				rt = dst_alloc(&ipv4_dst_ops);
+				if (rt == NULL) {
+					ip_rt_put(rth);
+					in_dev_put(in_dev);
+					return;
+				}
+
+				/* Copy all the information. */
+				*rt = *rth;
+ 				INIT_RCU_HEAD(&rt->u.dst.rcu_head);
+				rt->u.dst.__use		= 1;
+				atomic_set(&rt->u.dst.__refcnt, 1);
+				rt->u.dst.child		= NULL;
+				if (rt->u.dst.dev)
+					dev_hold(rt->u.dst.dev);
+				if (rt->idev)
+					in_dev_hold(rt->idev);
+				rt->u.dst.obsolete	= 0;
+				rt->u.dst.lastuse	= jiffies;
+				rt->u.dst.path		= &rt->u.dst;
+				rt->u.dst.neighbour	= NULL;
+				rt->u.dst.hh		= NULL;
+				rt->u.dst.xfrm		= NULL;
+
+				rt->rt_flags		|= RTCF_REDIRECTED;
+
+				/* Gateway is different ... */
+				rt->rt_gateway		= new_gw;
+
+				/* Redirect received -> path was valid */
+				dst_confirm(&rth->u.dst);
+
+				if (rt->peer)
+					atomic_inc(&rt->peer->refcnt);
+
+				if (arp_bind_neighbour(&rt->u.dst) ||
+				    !(rt->u.dst.neighbour->nud_state &
+					    NUD_VALID)) {
+					if (rt->u.dst.neighbour)
+						neigh_event_send(rt->u.dst.neighbour, NULL);
+					ip_rt_put(rth);
+					rt_drop(rt);
+					goto do_next;
+				}
+
+				rt_del(hash, rth);
+				if (!rt_intern_hash(hash, rt, &rt))
+					ip_rt_put(rt);
+				goto do_next;
+			}
+			rcu_read_unlock();
+		do_next:
+			;
+		}
+	}
+	in_dev_put(in_dev);
+	return;
+
+reject_redirect:
+#ifdef CONFIG_IP_ROUTE_VERBOSE
+	if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
+		printk(KERN_INFO "Redirect from %u.%u.%u.%u on %s about "
+			"%u.%u.%u.%u ignored.\n"
+			"  Advised path = %u.%u.%u.%u -> %u.%u.%u.%u, "
+			"tos %02x\n",
+		       NIPQUAD(old_gw), dev->name, NIPQUAD(new_gw),
+		       NIPQUAD(saddr), NIPQUAD(daddr), tos);
+#endif
+	in_dev_put(in_dev);
+}
+
+static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
+{
+	struct rtable *rt = (struct rtable*)dst;
+	struct dst_entry *ret = dst;
+
+	if (rt) {
+		if (dst->obsolete) {
+			ip_rt_put(rt);
+			ret = NULL;
+		} else if ((rt->rt_flags & RTCF_REDIRECTED) ||
+			   rt->u.dst.expires) {
+			unsigned hash = rt_hash_code(rt->fl.fl4_dst,
+						     rt->fl.fl4_src ^
+							(rt->fl.oif << 5),
+						     rt->fl.fl4_tos);
+#if RT_CACHE_DEBUG >= 1
+			printk(KERN_DEBUG "ip_rt_advice: redirect to "
+					  "%u.%u.%u.%u/%02x dropped\n",
+				NIPQUAD(rt->rt_dst), rt->fl.fl4_tos);
+#endif
+			rt_del(hash, rt);
+			ret = NULL;
+		}
+	}
+	return ret;
+}
+
+/*
+ * Algorithm:
+ *	1. The first ip_rt_redirect_number redirects are sent
+ *	   with exponential backoff, then we stop sending them at all,
+ *	   assuming that the host ignores our redirects.
+ *	2. If we did not see packets requiring redirects
+ *	   during ip_rt_redirect_silence, we assume that the host
+ *	   forgot redirected route and start to send redirects again.
+ *
+ * This algorithm is much cheaper and more intelligent than dumb load limiting
+ * in icmp.c.
+ *
+ * NOTE. Do not forget to inhibit load limiting for redirects (redundant)
+ * and "frag. need" (breaks PMTU discovery) in icmp.c.
+ */
+
+void ip_rt_send_redirect(struct sk_buff *skb)
+{
+	struct rtable *rt = (struct rtable*)skb->dst;
+	struct in_device *in_dev = in_dev_get(rt->u.dst.dev);
+
+	if (!in_dev)
+		return;
+
+	if (!IN_DEV_TX_REDIRECTS(in_dev))
+		goto out;
+
+	/* No redirected packets during ip_rt_redirect_silence;
+	 * reset the algorithm.
+	 */
+	if (time_after(jiffies, rt->u.dst.rate_last + ip_rt_redirect_silence))
+		rt->u.dst.rate_tokens = 0;
+
+	/* Too many ignored redirects; do not send anything
+	 * set u.dst.rate_last to the last seen redirected packet.
+	 */
+	if (rt->u.dst.rate_tokens >= ip_rt_redirect_number) {
+		rt->u.dst.rate_last = jiffies;
+		goto out;
+	}
+
+	/* Check for load limit; set rate_last to the latest sent
+	 * redirect.
+	 */
+	if (time_after(jiffies,
+		       (rt->u.dst.rate_last +
+			(ip_rt_redirect_load << rt->u.dst.rate_tokens)))) {
+		icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
+		rt->u.dst.rate_last = jiffies;
+		++rt->u.dst.rate_tokens;
+#ifdef CONFIG_IP_ROUTE_VERBOSE
+		if (IN_DEV_LOG_MARTIANS(in_dev) &&
+		    rt->u.dst.rate_tokens == ip_rt_redirect_number &&
+		    net_ratelimit())
+			printk(KERN_WARNING "host %u.%u.%u.%u/if%d ignores "
+				"redirects for %u.%u.%u.%u to %u.%u.%u.%u.\n",
+				NIPQUAD(rt->rt_src), rt->rt_iif,
+				NIPQUAD(rt->rt_dst), NIPQUAD(rt->rt_gateway));
+#endif
+	}
+out:
+        in_dev_put(in_dev);
+}
+
+static int ip_error(struct sk_buff *skb)
+{
+	struct rtable *rt = (struct rtable*)skb->dst;
+	unsigned long now;
+	int code;
+
+	switch (rt->u.dst.error) {
+		case EINVAL:
+		default:
+			goto out;
+		case EHOSTUNREACH:
+			code = ICMP_HOST_UNREACH;
+			break;
+		case ENETUNREACH:
+			code = ICMP_NET_UNREACH;
+			break;
+		case EACCES:
+			code = ICMP_PKT_FILTERED;
+			break;
+	}
+
+	now = jiffies;
+	rt->u.dst.rate_tokens += now - rt->u.dst.rate_last;
+	if (rt->u.dst.rate_tokens > ip_rt_error_burst)
+		rt->u.dst.rate_tokens = ip_rt_error_burst;
+	rt->u.dst.rate_last = now;
+	if (rt->u.dst.rate_tokens >= ip_rt_error_cost) {
+		rt->u.dst.rate_tokens -= ip_rt_error_cost;
+		icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
+	}
+
+out:	kfree_skb(skb);
+	return 0;
+} 
+
+/*
+ *	The last two values are not from the RFC but
+ *	are needed for AMPRnet AX.25 paths.
+ */
+
+static unsigned short mtu_plateau[] =
+{32000, 17914, 8166, 4352, 2002, 1492, 576, 296, 216, 128 };
+
+static __inline__ unsigned short guess_mtu(unsigned short old_mtu)
+{
+	int i;
+	
+	for (i = 0; i < ARRAY_SIZE(mtu_plateau); i++)
+		if (old_mtu > mtu_plateau[i])
+			return mtu_plateau[i];
+	return 68;
+}
+
+unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu)
+{
+	int i;
+	unsigned short old_mtu = ntohs(iph->tot_len);
+	struct rtable *rth;
+	u32  skeys[2] = { iph->saddr, 0, };
+	u32  daddr = iph->daddr;
+	u8   tos = iph->tos & IPTOS_RT_MASK;
+	unsigned short est_mtu = 0;
+
+	if (ipv4_config.no_pmtu_disc)
+		return 0;
+
+	for (i = 0; i < 2; i++) {
+		unsigned hash = rt_hash_code(daddr, skeys[i], tos);
+
+		rcu_read_lock();
+		for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
+		     rth = rcu_dereference(rth->u.rt_next)) {
+			if (rth->fl.fl4_dst == daddr &&
+			    rth->fl.fl4_src == skeys[i] &&
+			    rth->rt_dst  == daddr &&
+			    rth->rt_src  == iph->saddr &&
+			    rth->fl.fl4_tos == tos &&
+			    rth->fl.iif == 0 &&
+			    !(dst_metric_locked(&rth->u.dst, RTAX_MTU))) {
+				unsigned short mtu = new_mtu;
+
+				if (new_mtu < 68 || new_mtu >= old_mtu) {
+
+					/* BSD 4.2 compatibility hack :-( */
+					if (mtu == 0 &&
+					    old_mtu >= rth->u.dst.metrics[RTAX_MTU-1] &&
+					    old_mtu >= 68 + (iph->ihl << 2))
+						old_mtu -= iph->ihl << 2;
+
+					mtu = guess_mtu(old_mtu);
+				}
+				if (mtu <= rth->u.dst.metrics[RTAX_MTU-1]) {
+					if (mtu < rth->u.dst.metrics[RTAX_MTU-1]) { 
+						dst_confirm(&rth->u.dst);
+						if (mtu < ip_rt_min_pmtu) {
+							mtu = ip_rt_min_pmtu;
+							rth->u.dst.metrics[RTAX_LOCK-1] |=
+								(1 << RTAX_MTU);
+						}
+						rth->u.dst.metrics[RTAX_MTU-1] = mtu;
+						dst_set_expires(&rth->u.dst,
+							ip_rt_mtu_expires);
+					}
+					est_mtu = mtu;
+				}
+			}
+		}
+		rcu_read_unlock();
+	}
+	return est_mtu ? : new_mtu;
+}
+
+static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
+{
+	if (dst->metrics[RTAX_MTU-1] > mtu && mtu >= 68 &&
+	    !(dst_metric_locked(dst, RTAX_MTU))) {
+		if (mtu < ip_rt_min_pmtu) {
+			mtu = ip_rt_min_pmtu;
+			dst->metrics[RTAX_LOCK-1] |= (1 << RTAX_MTU);
+		}
+		dst->metrics[RTAX_MTU-1] = mtu;
+		dst_set_expires(dst, ip_rt_mtu_expires);
+	}
+}
+
+static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
+{
+	return NULL;
+}
+
+static void ipv4_dst_destroy(struct dst_entry *dst)
+{
+	struct rtable *rt = (struct rtable *) dst;
+	struct inet_peer *peer = rt->peer;
+	struct in_device *idev = rt->idev;
+
+	if (peer) {
+		rt->peer = NULL;
+		inet_putpeer(peer);
+	}
+
+	if (idev) {
+		rt->idev = NULL;
+		in_dev_put(idev);
+	}
+}
+
+static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
+			    int how)
+{
+	struct rtable *rt = (struct rtable *) dst;
+	struct in_device *idev = rt->idev;
+	if (dev != &loopback_dev && idev && idev->dev == dev) {
+		struct in_device *loopback_idev = in_dev_get(&loopback_dev);
+		if (loopback_idev) {
+			rt->idev = loopback_idev;
+			in_dev_put(idev);
+		}
+	}
+}
+
+static void ipv4_link_failure(struct sk_buff *skb)
+{
+	struct rtable *rt;
+
+	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
+
+	rt = (struct rtable *) skb->dst;
+	if (rt)
+		dst_set_expires(&rt->u.dst, 0);
+}
+
+static int ip_rt_bug(struct sk_buff *skb)
+{
+	printk(KERN_DEBUG "ip_rt_bug: %u.%u.%u.%u -> %u.%u.%u.%u, %s\n",
+		NIPQUAD(skb->nh.iph->saddr), NIPQUAD(skb->nh.iph->daddr),
+		skb->dev ? skb->dev->name : "?");
+	kfree_skb(skb);
+	return 0;
+}
+
+/*
+   We do not cache source address of outgoing interface,
+   because it is used only by IP RR, TS and SRR options,
+   so that it out of fast path.
+
+   BTW remember: "addr" is allowed to be not aligned
+   in IP options!
+ */
+
+void ip_rt_get_source(u8 *addr, struct rtable *rt)
+{
+	u32 src;
+	struct fib_result res;
+
+	if (rt->fl.iif == 0)
+		src = rt->rt_src;
+	else if (fib_lookup(&rt->fl, &res) == 0) {
+		src = FIB_RES_PREFSRC(res);
+		fib_res_put(&res);
+	} else
+		src = inet_select_addr(rt->u.dst.dev, rt->rt_gateway,
+					RT_SCOPE_UNIVERSE);
+	memcpy(addr, &src, 4);
+}
+
+#ifdef CONFIG_NET_CLS_ROUTE
+static void set_class_tag(struct rtable *rt, u32 tag)
+{
+	if (!(rt->u.dst.tclassid & 0xFFFF))
+		rt->u.dst.tclassid |= tag & 0xFFFF;
+	if (!(rt->u.dst.tclassid & 0xFFFF0000))
+		rt->u.dst.tclassid |= tag & 0xFFFF0000;
+}
+#endif
+
+static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
+{
+	struct fib_info *fi = res->fi;
+
+	if (fi) {
+		if (FIB_RES_GW(*res) &&
+		    FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
+			rt->rt_gateway = FIB_RES_GW(*res);
+		memcpy(rt->u.dst.metrics, fi->fib_metrics,
+		       sizeof(rt->u.dst.metrics));
+		if (fi->fib_mtu == 0) {
+			rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu;
+			if (rt->u.dst.metrics[RTAX_LOCK-1] & (1 << RTAX_MTU) &&
+			    rt->rt_gateway != rt->rt_dst &&
+			    rt->u.dst.dev->mtu > 576)
+				rt->u.dst.metrics[RTAX_MTU-1] = 576;
+		}
+#ifdef CONFIG_NET_CLS_ROUTE
+		rt->u.dst.tclassid = FIB_RES_NH(*res).nh_tclassid;
+#endif
+	} else
+		rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu;
+
+	if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
+		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl;
+	if (rt->u.dst.metrics[RTAX_MTU-1] > IP_MAX_MTU)
+		rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU;
+	if (rt->u.dst.metrics[RTAX_ADVMSS-1] == 0)
+		rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->u.dst.dev->mtu - 40,
+				       ip_rt_min_advmss);
+	if (rt->u.dst.metrics[RTAX_ADVMSS-1] > 65535 - 40)
+		rt->u.dst.metrics[RTAX_ADVMSS-1] = 65535 - 40;
+
+#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+	set_class_tag(rt, fib_rules_tclass(res));
+#endif
+	set_class_tag(rt, itag);
+#endif
+        rt->rt_type = res->type;
+}
+
+static int ip_route_input_mc(struct sk_buff *skb, u32 daddr, u32 saddr,
+				u8 tos, struct net_device *dev, int our)
+{
+	unsigned hash;
+	struct rtable *rth;
+	u32 spec_dst;
+	struct in_device *in_dev = in_dev_get(dev);
+	u32 itag = 0;
+
+	/* Primary sanity checks. */
+
+	if (in_dev == NULL)
+		return -EINVAL;
+
+	if (MULTICAST(saddr) || BADCLASS(saddr) || LOOPBACK(saddr) ||
+	    skb->protocol != htons(ETH_P_IP))
+		goto e_inval;
+
+	if (ZERONET(saddr)) {
+		if (!LOCAL_MCAST(daddr))
+			goto e_inval;
+		spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
+	} else if (fib_validate_source(saddr, 0, tos, 0,
+					dev, &spec_dst, &itag) < 0)
+		goto e_inval;
+
+	rth = dst_alloc(&ipv4_dst_ops);
+	if (!rth)
+		goto e_nobufs;
+
+	rth->u.dst.output= ip_rt_bug;
+
+	atomic_set(&rth->u.dst.__refcnt, 1);
+	rth->u.dst.flags= DST_HOST;
+	if (in_dev->cnf.no_policy)
+		rth->u.dst.flags |= DST_NOPOLICY;
+	rth->fl.fl4_dst	= daddr;
+	rth->rt_dst	= daddr;
+	rth->fl.fl4_tos	= tos;
+#ifdef CONFIG_IP_ROUTE_FWMARK
+	rth->fl.fl4_fwmark= skb->nfmark;
+#endif
+	rth->fl.fl4_src	= saddr;
+	rth->rt_src	= saddr;
+#ifdef CONFIG_NET_CLS_ROUTE
+	rth->u.dst.tclassid = itag;
+#endif
+	rth->rt_iif	=
+	rth->fl.iif	= dev->ifindex;
+	rth->u.dst.dev	= &loopback_dev;
+	dev_hold(rth->u.dst.dev);
+	rth->idev	= in_dev_get(rth->u.dst.dev);
+	rth->fl.oif	= 0;
+	rth->rt_gateway	= daddr;
+	rth->rt_spec_dst= spec_dst;
+	rth->rt_type	= RTN_MULTICAST;
+	rth->rt_flags	= RTCF_MULTICAST;
+	if (our) {
+		rth->u.dst.input= ip_local_deliver;
+		rth->rt_flags |= RTCF_LOCAL;
+	}
+
+#ifdef CONFIG_IP_MROUTE
+	if (!LOCAL_MCAST(daddr) && IN_DEV_MFORWARD(in_dev))
+		rth->u.dst.input = ip_mr_input;
+#endif
+	RT_CACHE_STAT_INC(in_slow_mc);
+
+	in_dev_put(in_dev);
+	hash = rt_hash_code(daddr, saddr ^ (dev->ifindex << 5), tos);
+	return rt_intern_hash(hash, rth, (struct rtable**) &skb->dst);
+
+e_nobufs:
+	in_dev_put(in_dev);
+	return -ENOBUFS;
+
+e_inval:
+	in_dev_put(in_dev);
+	return -EINVAL;
+}
+
+
+static void ip_handle_martian_source(struct net_device *dev,
+				     struct in_device *in_dev,
+				     struct sk_buff *skb,
+				     u32 daddr,
+				     u32 saddr) 
+{
+	RT_CACHE_STAT_INC(in_martian_src);
+#ifdef CONFIG_IP_ROUTE_VERBOSE
+	if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
+		/*
+		 *	RFC1812 recommendation, if source is martian,
+		 *	the only hint is MAC header.
+		 */
+		printk(KERN_WARNING "martian source %u.%u.%u.%u from "
+			"%u.%u.%u.%u, on dev %s\n",
+			NIPQUAD(daddr), NIPQUAD(saddr), dev->name);
+		if (dev->hard_header_len) {
+			int i;
+			unsigned char *p = skb->mac.raw;
+			printk(KERN_WARNING "ll header: ");
+			for (i = 0; i < dev->hard_header_len; i++, p++) {
+				printk("%02x", *p);
+				if (i < (dev->hard_header_len - 1))
+					printk(":");
+			}
+			printk("\n");
+		}
+	}
+#endif
+}
+
+static inline int __mkroute_input(struct sk_buff *skb, 
+				  struct fib_result* res, 
+				  struct in_device *in_dev, 
+				  u32 daddr, u32 saddr, u32 tos, 
+				  struct rtable **result) 
+{
+
+	struct rtable *rth;
+	int err;
+	struct in_device *out_dev;
+	unsigned flags = 0;
+	u32 spec_dst, itag;
+
+	/* get a working reference to the output device */
+	out_dev = in_dev_get(FIB_RES_DEV(*res));
+	if (out_dev == NULL) {
+		if (net_ratelimit())
+			printk(KERN_CRIT "Bug in ip_route_input" \
+			       "_slow(). Please, report\n");
+		return -EINVAL;
+	}
+
+
+	err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(*res), 
+				  in_dev->dev, &spec_dst, &itag);
+	if (err < 0) {
+		ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, 
+					 saddr);
+		
+		err = -EINVAL;
+		goto cleanup;
+	}
+
+	if (err)
+		flags |= RTCF_DIRECTSRC;
+
+	if (out_dev == in_dev && err && !(flags & (RTCF_NAT | RTCF_MASQ)) &&
+	    (IN_DEV_SHARED_MEDIA(out_dev) ||
+	     inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
+		flags |= RTCF_DOREDIRECT;
+
+	if (skb->protocol != htons(ETH_P_IP)) {
+		/* Not IP (i.e. ARP). Do not create route, if it is
+		 * invalid for proxy arp. DNAT routes are always valid.
+		 */
+		if (out_dev == in_dev && !(flags & RTCF_DNAT)) {
+			err = -EINVAL;
+			goto cleanup;
+		}
+	}
+
+
+	rth = dst_alloc(&ipv4_dst_ops);
+	if (!rth) {
+		err = -ENOBUFS;
+		goto cleanup;
+	}
+
+	rth->u.dst.flags= DST_HOST;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+	if (res->fi->fib_nhs > 1)
+		rth->u.dst.flags |= DST_BALANCED;
+#endif
+	if (in_dev->cnf.no_policy)
+		rth->u.dst.flags |= DST_NOPOLICY;
+	if (in_dev->cnf.no_xfrm)
+		rth->u.dst.flags |= DST_NOXFRM;
+	rth->fl.fl4_dst	= daddr;
+	rth->rt_dst	= daddr;
+	rth->fl.fl4_tos	= tos;
+#ifdef CONFIG_IP_ROUTE_FWMARK
+	rth->fl.fl4_fwmark= skb->nfmark;
+#endif
+	rth->fl.fl4_src	= saddr;
+	rth->rt_src	= saddr;
+	rth->rt_gateway	= daddr;
+	rth->rt_iif 	=
+		rth->fl.iif	= in_dev->dev->ifindex;
+	rth->u.dst.dev	= (out_dev)->dev;
+	dev_hold(rth->u.dst.dev);
+	rth->idev	= in_dev_get(rth->u.dst.dev);
+	rth->fl.oif 	= 0;
+	rth->rt_spec_dst= spec_dst;
+
+	rth->u.dst.input = ip_forward;
+	rth->u.dst.output = ip_output;
+
+	rt_set_nexthop(rth, res, itag);
+
+	rth->rt_flags = flags;
+
+	*result = rth;
+	err = 0;
+ cleanup:
+	/* release the working reference to the output device */
+	in_dev_put(out_dev);
+	return err;
+}						
+
+static inline int ip_mkroute_input_def(struct sk_buff *skb, 
+				       struct fib_result* res, 
+				       const struct flowi *fl,
+				       struct in_device *in_dev,
+				       u32 daddr, u32 saddr, u32 tos)
+{
+	struct rtable* rth;
+	int err;
+	unsigned hash;
+
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+	if (res->fi && res->fi->fib_nhs > 1 && fl->oif == 0)
+		fib_select_multipath(fl, res);
+#endif
+
+	/* create a routing cache entry */
+	err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth);
+	if (err)
+		return err;
+	atomic_set(&rth->u.dst.__refcnt, 1);
+
+	/* put it into the cache */
+	hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5), tos);
+	return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst);	
+}
+
+static inline int ip_mkroute_input(struct sk_buff *skb, 
+				   struct fib_result* res, 
+				   const struct flowi *fl,
+				   struct in_device *in_dev,
+				   u32 daddr, u32 saddr, u32 tos)
+{
+#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+	struct rtable* rth;
+	unsigned char hop, hopcount, lasthop;
+	int err = -EINVAL;
+	unsigned int hash;
+
+	if (res->fi)
+		hopcount = res->fi->fib_nhs;
+	else
+		hopcount = 1;
+
+	lasthop = hopcount - 1;
+
+	/* distinguish between multipath and singlepath */
+	if (hopcount < 2)
+		return ip_mkroute_input_def(skb, res, fl, in_dev, daddr,
+					    saddr, tos);
+	
+	/* add all alternatives to the routing cache */
+	for (hop = 0; hop < hopcount; hop++) {
+		res->nh_sel = hop;
+
+		/* create a routing cache entry */
+		err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos,
+				      &rth);
+		if (err)
+			return err;
+
+		/* put it into the cache */
+		hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5), tos);
+		err = rt_intern_hash(hash, rth, (struct rtable**)&skb->dst);
+		if (err)
+			return err;
+
+		/* forward hop information to multipath impl. */
+		multipath_set_nhinfo(rth,
+				     FIB_RES_NETWORK(*res),
+				     FIB_RES_NETMASK(*res),
+				     res->prefixlen,
+				     &FIB_RES_NH(*res));
+
+		/* only for the last hop the reference count is handled
+		 * outside
+		 */
+		if (hop == lasthop)
+			atomic_set(&(skb->dst->__refcnt), 1);
+	}
+	return err;
+#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED  */
+	return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, saddr, tos);
+#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED  */
+}
+
+
+/*
+ *	NOTE. We drop all the packets that has local source
+ *	addresses, because every properly looped back packet
+ *	must have correct destination already attached by output routine.
+ *
+ *	Such approach solves two big problems:
+ *	1. Not simplex devices are handled properly.
+ *	2. IP spoofing attempts are filtered with 100% of guarantee.
+ */
+
+static int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr,
+			       u8 tos, struct net_device *dev)
+{
+	struct fib_result res;
+	struct in_device *in_dev = in_dev_get(dev);
+	struct flowi fl = { .nl_u = { .ip4_u =
+				      { .daddr = daddr,
+					.saddr = saddr,
+					.tos = tos,
+					.scope = RT_SCOPE_UNIVERSE,
+#ifdef CONFIG_IP_ROUTE_FWMARK
+					.fwmark = skb->nfmark
+#endif
+				      } },
+			    .iif = dev->ifindex };
+	unsigned	flags = 0;
+	u32		itag = 0;
+	struct rtable * rth;
+	unsigned	hash;
+	u32		spec_dst;
+	int		err = -EINVAL;
+	int		free_res = 0;
+
+	/* IP on this device is disabled. */
+
+	if (!in_dev)
+		goto out;
+
+	/* Check for the most weird martians, which can be not detected
+	   by fib_lookup.
+	 */
+
+	if (MULTICAST(saddr) || BADCLASS(saddr) || LOOPBACK(saddr))
+		goto martian_source;
+
+	if (daddr == 0xFFFFFFFF || (saddr == 0 && daddr == 0))
+		goto brd_input;
+
+	/* Accept zero addresses only to limited broadcast;
+	 * I even do not know to fix it or not. Waiting for complains :-)
+	 */
+	if (ZERONET(saddr))
+		goto martian_source;
+
+	if (BADCLASS(daddr) || ZERONET(daddr) || LOOPBACK(daddr))
+		goto martian_destination;
+
+	/*
+	 *	Now we are ready to route packet.
+	 */
+	if ((err = fib_lookup(&fl, &res)) != 0) {
+		if (!IN_DEV_FORWARD(in_dev))
+			goto e_inval;
+		goto no_route;
+	}
+	free_res = 1;
+
+	RT_CACHE_STAT_INC(in_slow_tot);
+
+	if (res.type == RTN_BROADCAST)
+		goto brd_input;
+
+	if (res.type == RTN_LOCAL) {
+		int result;
+		result = fib_validate_source(saddr, daddr, tos,
+					     loopback_dev.ifindex,
+					     dev, &spec_dst, &itag);
+		if (result < 0)
+			goto martian_source;
+		if (result)
+			flags |= RTCF_DIRECTSRC;
+		spec_dst = daddr;
+		goto local_input;
+	}
+
+	if (!IN_DEV_FORWARD(in_dev))
+		goto e_inval;
+	if (res.type != RTN_UNICAST)
+		goto martian_destination;
+
+	err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);
+	if (err == -ENOBUFS)
+		goto e_nobufs;
+	if (err == -EINVAL)
+		goto e_inval;
+	
+done:
+	in_dev_put(in_dev);
+	if (free_res)
+		fib_res_put(&res);
+out:	return err;
+
+brd_input:
+	if (skb->protocol != htons(ETH_P_IP))
+		goto e_inval;
+
+	if (ZERONET(saddr))
+		spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
+	else {
+		err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst,
+					  &itag);
+		if (err < 0)
+			goto martian_source;
+		if (err)
+			flags |= RTCF_DIRECTSRC;
+	}
+	flags |= RTCF_BROADCAST;
+	res.type = RTN_BROADCAST;
+	RT_CACHE_STAT_INC(in_brd);
+
+local_input:
+	rth = dst_alloc(&ipv4_dst_ops);
+	if (!rth)
+		goto e_nobufs;
+
+	rth->u.dst.output= ip_rt_bug;
+
+	atomic_set(&rth->u.dst.__refcnt, 1);
+	rth->u.dst.flags= DST_HOST;
+	if (in_dev->cnf.no_policy)
+		rth->u.dst.flags |= DST_NOPOLICY;
+	rth->fl.fl4_dst	= daddr;
+	rth->rt_dst	= daddr;
+	rth->fl.fl4_tos	= tos;
+#ifdef CONFIG_IP_ROUTE_FWMARK
+	rth->fl.fl4_fwmark= skb->nfmark;
+#endif
+	rth->fl.fl4_src	= saddr;
+	rth->rt_src	= saddr;
+#ifdef CONFIG_NET_CLS_ROUTE
+	rth->u.dst.tclassid = itag;
+#endif
+	rth->rt_iif	=
+	rth->fl.iif	= dev->ifindex;
+	rth->u.dst.dev	= &loopback_dev;
+	dev_hold(rth->u.dst.dev);
+	rth->idev	= in_dev_get(rth->u.dst.dev);
+	rth->rt_gateway	= daddr;
+	rth->rt_spec_dst= spec_dst;
+	rth->u.dst.input= ip_local_deliver;
+	rth->rt_flags 	= flags|RTCF_LOCAL;
+	if (res.type == RTN_UNREACHABLE) {
+		rth->u.dst.input= ip_error;
+		rth->u.dst.error= -err;
+		rth->rt_flags 	&= ~RTCF_LOCAL;
+	}
+	rth->rt_type	= res.type;
+	hash = rt_hash_code(daddr, saddr ^ (fl.iif << 5), tos);
+	err = rt_intern_hash(hash, rth, (struct rtable**)&skb->dst);
+	goto done;
+
+no_route:
+	RT_CACHE_STAT_INC(in_no_route);
+	spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
+	res.type = RTN_UNREACHABLE;
+	goto local_input;
+
+	/*
+	 *	Do not cache martian addresses: they should be logged (RFC1812)
+	 */
+martian_destination:
+	RT_CACHE_STAT_INC(in_martian_dst);
+#ifdef CONFIG_IP_ROUTE_VERBOSE
+	if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
+		printk(KERN_WARNING "martian destination %u.%u.%u.%u from "
+			"%u.%u.%u.%u, dev %s\n",
+			NIPQUAD(daddr), NIPQUAD(saddr), dev->name);
+#endif
+e_inval:
+	err = -EINVAL;
+	goto done;
+
+e_nobufs:
+	err = -ENOBUFS;
+	goto done;
+
+martian_source:
+	ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
+	goto e_inval;
+}
+
+int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr,
+		   u8 tos, struct net_device *dev)
+{
+	struct rtable * rth;
+	unsigned	hash;
+	int iif = dev->ifindex;
+
+	tos &= IPTOS_RT_MASK;
+	hash = rt_hash_code(daddr, saddr ^ (iif << 5), tos);
+
+	rcu_read_lock();
+	for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
+	     rth = rcu_dereference(rth->u.rt_next)) {
+		if (rth->fl.fl4_dst == daddr &&
+		    rth->fl.fl4_src == saddr &&
+		    rth->fl.iif == iif &&
+		    rth->fl.oif == 0 &&
+#ifdef CONFIG_IP_ROUTE_FWMARK
+		    rth->fl.fl4_fwmark == skb->nfmark &&
+#endif
+		    rth->fl.fl4_tos == tos) {
+			rth->u.dst.lastuse = jiffies;
+			dst_hold(&rth->u.dst);
+			rth->u.dst.__use++;
+			RT_CACHE_STAT_INC(in_hit);
+			rcu_read_unlock();
+			skb->dst = (struct dst_entry*)rth;
+			return 0;
+		}
+		RT_CACHE_STAT_INC(in_hlist_search);
+	}
+	rcu_read_unlock();
+
+	/* Multicast recognition logic is moved from route cache to here.
+	   The problem was that too many Ethernet cards have broken/missing
+	   hardware multicast filters :-( As result the host on multicasting
+	   network acquires a lot of useless route cache entries, sort of
+	   SDR messages from all the world. Now we try to get rid of them.
+	   Really, provided software IP multicast filter is organized
+	   reasonably (at least, hashed), it does not result in a slowdown
+	   comparing with route cache reject entries.
+	   Note, that multicast routers are not affected, because
+	   route cache entry is created eventually.
+	 */
+	if (MULTICAST(daddr)) {
+		struct in_device *in_dev;
+
+		rcu_read_lock();
+		if ((in_dev = __in_dev_get(dev)) != NULL) {
+			int our = ip_check_mc(in_dev, daddr, saddr,
+				skb->nh.iph->protocol);
+			if (our
+#ifdef CONFIG_IP_MROUTE
+			    || (!LOCAL_MCAST(daddr) && IN_DEV_MFORWARD(in_dev))
+#endif
+			    ) {
+				rcu_read_unlock();
+				return ip_route_input_mc(skb, daddr, saddr,
+							 tos, dev, our);
+			}
+		}
+		rcu_read_unlock();
+		return -EINVAL;
+	}
+	return ip_route_input_slow(skb, daddr, saddr, tos, dev);
+}
+
+static inline int __mkroute_output(struct rtable **result,
+				   struct fib_result* res, 
+				   const struct flowi *fl,
+				   const struct flowi *oldflp, 
+				   struct net_device *dev_out, 
+				   unsigned flags) 
+{
+	struct rtable *rth;
+	struct in_device *in_dev;
+	u32 tos = RT_FL_TOS(oldflp);
+	int err = 0;
+
+	if (LOOPBACK(fl->fl4_src) && !(dev_out->flags&IFF_LOOPBACK))
+		return -EINVAL;
+
+	if (fl->fl4_dst == 0xFFFFFFFF)
+		res->type = RTN_BROADCAST;
+	else if (MULTICAST(fl->fl4_dst))
+		res->type = RTN_MULTICAST;
+	else if (BADCLASS(fl->fl4_dst) || ZERONET(fl->fl4_dst))
+		return -EINVAL;
+
+	if (dev_out->flags & IFF_LOOPBACK)
+		flags |= RTCF_LOCAL;
+
+	/* get work reference to inet device */
+	in_dev = in_dev_get(dev_out);
+	if (!in_dev)
+		return -EINVAL;
+
+	if (res->type == RTN_BROADCAST) {
+		flags |= RTCF_BROADCAST | RTCF_LOCAL;
+		if (res->fi) {
+			fib_info_put(res->fi);
+			res->fi = NULL;
+		}
+	} else if (res->type == RTN_MULTICAST) {
+		flags |= RTCF_MULTICAST|RTCF_LOCAL;
+		if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src, 
+				 oldflp->proto))
+			flags &= ~RTCF_LOCAL;
+		/* If multicast route do not exist use
+		   default one, but do not gateway in this case.
+		   Yes, it is hack.
+		 */
+		if (res->fi && res->prefixlen < 4) {
+			fib_info_put(res->fi);
+			res->fi = NULL;
+		}
+	}
+
+
+	rth = dst_alloc(&ipv4_dst_ops);
+	if (!rth) {
+		err = -ENOBUFS;
+		goto cleanup;
+	}		
+
+	rth->u.dst.flags= DST_HOST;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+	if (res->fi) {
+		rth->rt_multipath_alg = res->fi->fib_mp_alg;
+		if (res->fi->fib_nhs > 1)
+			rth->u.dst.flags |= DST_BALANCED;
+	}
+#endif
+	if (in_dev->cnf.no_xfrm)
+		rth->u.dst.flags |= DST_NOXFRM;
+	if (in_dev->cnf.no_policy)
+		rth->u.dst.flags |= DST_NOPOLICY;
+
+	rth->fl.fl4_dst	= oldflp->fl4_dst;
+	rth->fl.fl4_tos	= tos;
+	rth->fl.fl4_src	= oldflp->fl4_src;
+	rth->fl.oif	= oldflp->oif;
+#ifdef CONFIG_IP_ROUTE_FWMARK
+	rth->fl.fl4_fwmark= oldflp->fl4_fwmark;
+#endif
+	rth->rt_dst	= fl->fl4_dst;
+	rth->rt_src	= fl->fl4_src;
+	rth->rt_iif	= oldflp->oif ? : dev_out->ifindex;
+	/* get references to the devices that are to be hold by the routing 
+	   cache entry */
+	rth->u.dst.dev	= dev_out;
+	dev_hold(dev_out);
+	rth->idev	= in_dev_get(dev_out);
+	rth->rt_gateway = fl->fl4_dst;
+	rth->rt_spec_dst= fl->fl4_src;
+
+	rth->u.dst.output=ip_output;
+
+	RT_CACHE_STAT_INC(out_slow_tot);
+
+	if (flags & RTCF_LOCAL) {
+		rth->u.dst.input = ip_local_deliver;
+		rth->rt_spec_dst = fl->fl4_dst;
+	}
+	if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
+		rth->rt_spec_dst = fl->fl4_src;
+		if (flags & RTCF_LOCAL && 
+		    !(dev_out->flags & IFF_LOOPBACK)) {
+			rth->u.dst.output = ip_mc_output;
+			RT_CACHE_STAT_INC(out_slow_mc);
+		}
+#ifdef CONFIG_IP_MROUTE
+		if (res->type == RTN_MULTICAST) {
+			if (IN_DEV_MFORWARD(in_dev) &&
+			    !LOCAL_MCAST(oldflp->fl4_dst)) {
+				rth->u.dst.input = ip_mr_input;
+				rth->u.dst.output = ip_mc_output;
+			}
+		}
+#endif
+	}
+
+	rt_set_nexthop(rth, res, 0);
+
+	rth->rt_flags = flags;
+
+	*result = rth;
+ cleanup:
+	/* release work reference to inet device */
+	in_dev_put(in_dev);
+
+	return err;
+}
+
+static inline int ip_mkroute_output_def(struct rtable **rp,
+					struct fib_result* res,
+					const struct flowi *fl,
+					const struct flowi *oldflp,
+					struct net_device *dev_out,
+					unsigned flags)
+{
+	struct rtable *rth;
+	int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags);
+	unsigned hash;
+	if (err == 0) {
+		u32 tos = RT_FL_TOS(oldflp);
+
+		atomic_set(&rth->u.dst.__refcnt, 1);
+		
+		hash = rt_hash_code(oldflp->fl4_dst, 
+				    oldflp->fl4_src ^ (oldflp->oif << 5), tos);
+		err = rt_intern_hash(hash, rth, rp);
+	}
+	
+	return err;
+}
+
+static inline int ip_mkroute_output(struct rtable** rp,
+				    struct fib_result* res,
+				    const struct flowi *fl,
+				    const struct flowi *oldflp,
+				    struct net_device *dev_out,
+				    unsigned flags)
+{
+#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+	u32 tos = RT_FL_TOS(oldflp);
+	unsigned char hop;
+	unsigned hash;
+	int err = -EINVAL;
+	struct rtable *rth;
+
+	if (res->fi && res->fi->fib_nhs > 1) {
+		unsigned char hopcount = res->fi->fib_nhs;
+
+		for (hop = 0; hop < hopcount; hop++) {
+			struct net_device *dev2nexthop;
+
+			res->nh_sel = hop;
+
+			/* hold a work reference to the output device */
+			dev2nexthop = FIB_RES_DEV(*res);
+			dev_hold(dev2nexthop);
+
+			err = __mkroute_output(&rth, res, fl, oldflp,
+					       dev2nexthop, flags);
+
+			if (err != 0)
+				goto cleanup;
+
+			hash = rt_hash_code(oldflp->fl4_dst, 
+					    oldflp->fl4_src ^
+					    (oldflp->oif << 5), tos);
+			err = rt_intern_hash(hash, rth, rp);
+
+			/* forward hop information to multipath impl. */
+			multipath_set_nhinfo(rth,
+					     FIB_RES_NETWORK(*res),
+					     FIB_RES_NETMASK(*res),
+					     res->prefixlen,
+					     &FIB_RES_NH(*res));
+		cleanup:
+			/* release work reference to output device */
+			dev_put(dev2nexthop);
+
+			if (err != 0)
+				return err;
+		}
+		atomic_set(&(*rp)->u.dst.__refcnt, 1);
+		return err;
+	} else {
+		return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out,
+					     flags);
+	}
+#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
+	return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out, flags);
+#endif
+}
+
+/*
+ * Major route resolver routine.
+ */
+
+static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
+{
+	u32 tos	= RT_FL_TOS(oldflp);
+	struct flowi fl = { .nl_u = { .ip4_u =
+				      { .daddr = oldflp->fl4_dst,
+					.saddr = oldflp->fl4_src,
+					.tos = tos & IPTOS_RT_MASK,
+					.scope = ((tos & RTO_ONLINK) ?
+						  RT_SCOPE_LINK :
+						  RT_SCOPE_UNIVERSE),
+#ifdef CONFIG_IP_ROUTE_FWMARK
+					.fwmark = oldflp->fl4_fwmark
+#endif
+				      } },
+			    .iif = loopback_dev.ifindex,
+			    .oif = oldflp->oif };
+	struct fib_result res;
+	unsigned flags = 0;
+	struct net_device *dev_out = NULL;
+	int free_res = 0;
+	int err;
+
+
+	res.fi		= NULL;
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+	res.r		= NULL;
+#endif
+
+	if (oldflp->fl4_src) {
+		err = -EINVAL;
+		if (MULTICAST(oldflp->fl4_src) ||
+		    BADCLASS(oldflp->fl4_src) ||
+		    ZERONET(oldflp->fl4_src))
+			goto out;
+
+		/* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
+		dev_out = ip_dev_find(oldflp->fl4_src);
+		if (dev_out == NULL)
+			goto out;
+
+		/* I removed check for oif == dev_out->oif here.
+		   It was wrong for two reasons:
+		   1. ip_dev_find(saddr) can return wrong iface, if saddr is
+		      assigned to multiple interfaces.
+		   2. Moreover, we are allowed to send packets with saddr
+		      of another iface. --ANK
+		 */
+
+		if (oldflp->oif == 0
+		    && (MULTICAST(oldflp->fl4_dst) || oldflp->fl4_dst == 0xFFFFFFFF)) {
+			/* Special hack: user can direct multicasts
+			   and limited broadcast via necessary interface
+			   without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
+			   This hack is not just for fun, it allows
+			   vic,vat and friends to work.
+			   They bind socket to loopback, set ttl to zero
+			   and expect that it will work.
+			   From the viewpoint of routing cache they are broken,
+			   because we are not allowed to build multicast path
+			   with loopback source addr (look, routing cache
+			   cannot know, that ttl is zero, so that packet
+			   will not leave this host and route is valid).
+			   Luckily, this hack is good workaround.
+			 */
+
+			fl.oif = dev_out->ifindex;
+			goto make_route;
+		}
+		if (dev_out)
+			dev_put(dev_out);
+		dev_out = NULL;
+	}
+
+
+	if (oldflp->oif) {
+		dev_out = dev_get_by_index(oldflp->oif);
+		err = -ENODEV;
+		if (dev_out == NULL)
+			goto out;
+		if (__in_dev_get(dev_out) == NULL) {
+			dev_put(dev_out);
+			goto out;	/* Wrong error code */
+		}
+
+		if (LOCAL_MCAST(oldflp->fl4_dst) || oldflp->fl4_dst == 0xFFFFFFFF) {
+			if (!fl.fl4_src)
+				fl.fl4_src = inet_select_addr(dev_out, 0,
+							      RT_SCOPE_LINK);
+			goto make_route;
+		}
+		if (!fl.fl4_src) {
+			if (MULTICAST(oldflp->fl4_dst))
+				fl.fl4_src = inet_select_addr(dev_out, 0,
+							      fl.fl4_scope);
+			else if (!oldflp->fl4_dst)
+				fl.fl4_src = inet_select_addr(dev_out, 0,
+							      RT_SCOPE_HOST);
+		}
+	}
+
+	if (!fl.fl4_dst) {
+		fl.fl4_dst = fl.fl4_src;
+		if (!fl.fl4_dst)
+			fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK);
+		if (dev_out)
+			dev_put(dev_out);
+		dev_out = &loopback_dev;
+		dev_hold(dev_out);
+		fl.oif = loopback_dev.ifindex;
+		res.type = RTN_LOCAL;
+		flags |= RTCF_LOCAL;
+		goto make_route;
+	}
+
+	if (fib_lookup(&fl, &res)) {
+		res.fi = NULL;
+		if (oldflp->oif) {
+			/* Apparently, routing tables are wrong. Assume,
+			   that the destination is on link.
+
+			   WHY? DW.
+			   Because we are allowed to send to iface
+			   even if it has NO routes and NO assigned
+			   addresses. When oif is specified, routing
+			   tables are looked up with only one purpose:
+			   to catch if destination is gatewayed, rather than
+			   direct. Moreover, if MSG_DONTROUTE is set,
+			   we send packet, ignoring both routing tables
+			   and ifaddr state. --ANK
+
+
+			   We could make it even if oif is unknown,
+			   likely IPv6, but we do not.
+			 */
+
+			if (fl.fl4_src == 0)
+				fl.fl4_src = inet_select_addr(dev_out, 0,
+							      RT_SCOPE_LINK);
+			res.type = RTN_UNICAST;
+			goto make_route;
+		}
+		if (dev_out)
+			dev_put(dev_out);
+		err = -ENETUNREACH;
+		goto out;
+	}
+	free_res = 1;
+
+	if (res.type == RTN_LOCAL) {
+		if (!fl.fl4_src)
+			fl.fl4_src = fl.fl4_dst;
+		if (dev_out)
+			dev_put(dev_out);
+		dev_out = &loopback_dev;
+		dev_hold(dev_out);
+		fl.oif = dev_out->ifindex;
+		if (res.fi)
+			fib_info_put(res.fi);
+		res.fi = NULL;
+		flags |= RTCF_LOCAL;
+		goto make_route;
+	}
+
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+	if (res.fi->fib_nhs > 1 && fl.oif == 0)
+		fib_select_multipath(&fl, &res);
+	else
+#endif
+	if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif)
+		fib_select_default(&fl, &res);
+
+	if (!fl.fl4_src)
+		fl.fl4_src = FIB_RES_PREFSRC(res);
+
+	if (dev_out)
+		dev_put(dev_out);
+	dev_out = FIB_RES_DEV(res);
+	dev_hold(dev_out);
+	fl.oif = dev_out->ifindex;
+
+
+make_route:
+	err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags);
+
+
+	if (free_res)
+		fib_res_put(&res);
+	if (dev_out)
+		dev_put(dev_out);
+out:	return err;
+}
+
+int __ip_route_output_key(struct rtable **rp, const struct flowi *flp)
+{
+	unsigned hash;
+	struct rtable *rth;
+
+	hash = rt_hash_code(flp->fl4_dst, flp->fl4_src ^ (flp->oif << 5), flp->fl4_tos);
+
+	rcu_read_lock_bh();
+	for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
+		rth = rcu_dereference(rth->u.rt_next)) {
+		if (rth->fl.fl4_dst == flp->fl4_dst &&
+		    rth->fl.fl4_src == flp->fl4_src &&
+		    rth->fl.iif == 0 &&
+		    rth->fl.oif == flp->oif &&
+#ifdef CONFIG_IP_ROUTE_FWMARK
+		    rth->fl.fl4_fwmark == flp->fl4_fwmark &&
+#endif
+		    !((rth->fl.fl4_tos ^ flp->fl4_tos) &
+			    (IPTOS_RT_MASK | RTO_ONLINK))) {
+
+			/* check for multipath routes and choose one if
+			 * necessary
+			 */
+			if (multipath_select_route(flp, rth, rp)) {
+				dst_hold(&(*rp)->u.dst);
+				RT_CACHE_STAT_INC(out_hit);
+				rcu_read_unlock_bh();
+				return 0;
+			}
+
+			rth->u.dst.lastuse = jiffies;
+			dst_hold(&rth->u.dst);
+			rth->u.dst.__use++;
+			RT_CACHE_STAT_INC(out_hit);
+			rcu_read_unlock_bh();
+			*rp = rth;
+			return 0;
+		}
+		RT_CACHE_STAT_INC(out_hlist_search);
+	}
+	rcu_read_unlock_bh();
+
+	return ip_route_output_slow(rp, flp);
+}
+
+int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, int flags)
+{
+	int err;
+
+	if ((err = __ip_route_output_key(rp, flp)) != 0)
+		return err;
+
+	if (flp->proto) {
+		if (!flp->fl4_src)
+			flp->fl4_src = (*rp)->rt_src;
+		if (!flp->fl4_dst)
+			flp->fl4_dst = (*rp)->rt_dst;
+		return xfrm_lookup((struct dst_entry **)rp, flp, sk, flags);
+	}
+
+	return 0;
+}
+
+int ip_route_output_key(struct rtable **rp, struct flowi *flp)
+{
+	return ip_route_output_flow(rp, flp, NULL, 0);
+}
+
+static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
+			int nowait)
+{
+	struct rtable *rt = (struct rtable*)skb->dst;
+	struct rtmsg *r;
+	struct nlmsghdr  *nlh;
+	unsigned char	 *b = skb->tail;
+	struct rta_cacheinfo ci;
+#ifdef CONFIG_IP_MROUTE
+	struct rtattr *eptr;
+#endif
+	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*r));
+	r = NLMSG_DATA(nlh);
+	nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0;
+	r->rtm_family	 = AF_INET;
+	r->rtm_dst_len	= 32;
+	r->rtm_src_len	= 0;
+	r->rtm_tos	= rt->fl.fl4_tos;
+	r->rtm_table	= RT_TABLE_MAIN;
+	r->rtm_type	= rt->rt_type;
+	r->rtm_scope	= RT_SCOPE_UNIVERSE;
+	r->rtm_protocol = RTPROT_UNSPEC;
+	r->rtm_flags	= (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
+	if (rt->rt_flags & RTCF_NOTIFY)
+		r->rtm_flags |= RTM_F_NOTIFY;
+	RTA_PUT(skb, RTA_DST, 4, &rt->rt_dst);
+	if (rt->fl.fl4_src) {
+		r->rtm_src_len = 32;
+		RTA_PUT(skb, RTA_SRC, 4, &rt->fl.fl4_src);
+	}
+	if (rt->u.dst.dev)
+		RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex);
+#ifdef CONFIG_NET_CLS_ROUTE
+	if (rt->u.dst.tclassid)
+		RTA_PUT(skb, RTA_FLOW, 4, &rt->u.dst.tclassid);
+#endif
+#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+	if (rt->rt_multipath_alg != IP_MP_ALG_NONE) {
+		__u32 alg = rt->rt_multipath_alg;
+
+		RTA_PUT(skb, RTA_MP_ALGO, 4, &alg);
+	}
+#endif
+	if (rt->fl.iif)
+		RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_spec_dst);
+	else if (rt->rt_src != rt->fl.fl4_src)
+		RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_src);
+	if (rt->rt_dst != rt->rt_gateway)
+		RTA_PUT(skb, RTA_GATEWAY, 4, &rt->rt_gateway);
+	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
+		goto rtattr_failure;
+	ci.rta_lastuse	= jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
+	ci.rta_used	= rt->u.dst.__use;
+	ci.rta_clntref	= atomic_read(&rt->u.dst.__refcnt);
+	if (rt->u.dst.expires)
+		ci.rta_expires = jiffies_to_clock_t(rt->u.dst.expires - jiffies);
+	else
+		ci.rta_expires = 0;
+	ci.rta_error	= rt->u.dst.error;
+	ci.rta_id	= ci.rta_ts = ci.rta_tsage = 0;
+	if (rt->peer) {
+		ci.rta_id = rt->peer->ip_id_count;
+		if (rt->peer->tcp_ts_stamp) {
+			ci.rta_ts = rt->peer->tcp_ts;
+			ci.rta_tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp;
+		}
+	}
+#ifdef CONFIG_IP_MROUTE
+	eptr = (struct rtattr*)skb->tail;
+#endif
+	RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
+	if (rt->fl.iif) {
+#ifdef CONFIG_IP_MROUTE
+		u32 dst = rt->rt_dst;
+
+		if (MULTICAST(dst) && !LOCAL_MCAST(dst) &&
+		    ipv4_devconf.mc_forwarding) {
+			int err = ipmr_get_route(skb, r, nowait);
+			if (err <= 0) {
+				if (!nowait) {
+					if (err == 0)
+						return 0;
+					goto nlmsg_failure;
+				} else {
+					if (err == -EMSGSIZE)
+						goto nlmsg_failure;
+					((struct rta_cacheinfo*)RTA_DATA(eptr))->rta_error = err;
+				}
+			}
+		} else
+#endif
+			RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif);
+	}
+
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
+{
+	struct rtattr **rta = arg;
+	struct rtmsg *rtm = NLMSG_DATA(nlh);
+	struct rtable *rt = NULL;
+	u32 dst = 0;
+	u32 src = 0;
+	int iif = 0;
+	int err = -ENOBUFS;
+	struct sk_buff *skb;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		goto out;
+
+	/* Reserve room for dummy headers, this skb can pass
+	   through good chunk of routing engine.
+	 */
+	skb->mac.raw = skb->data;
+	skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
+
+	if (rta[RTA_SRC - 1])
+		memcpy(&src, RTA_DATA(rta[RTA_SRC - 1]), 4);
+	if (rta[RTA_DST - 1])
+		memcpy(&dst, RTA_DATA(rta[RTA_DST - 1]), 4);
+	if (rta[RTA_IIF - 1])
+		memcpy(&iif, RTA_DATA(rta[RTA_IIF - 1]), sizeof(int));
+
+	if (iif) {
+		struct net_device *dev = __dev_get_by_index(iif);
+		err = -ENODEV;
+		if (!dev)
+			goto out_free;
+		skb->protocol	= htons(ETH_P_IP);
+		skb->dev	= dev;
+		local_bh_disable();
+		err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
+		local_bh_enable();
+		rt = (struct rtable*)skb->dst;
+		if (!err && rt->u.dst.error)
+			err = -rt->u.dst.error;
+	} else {
+		struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dst,
+							 .saddr = src,
+							 .tos = rtm->rtm_tos } } };
+		int oif = 0;
+		if (rta[RTA_OIF - 1])
+			memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int));
+		fl.oif = oif;
+		err = ip_route_output_key(&rt, &fl);
+	}
+	if (err)
+		goto out_free;
+
+	skb->dst = &rt->u.dst;
+	if (rtm->rtm_flags & RTM_F_NOTIFY)
+		rt->rt_flags |= RTCF_NOTIFY;
+
+	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
+
+	err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
+				RTM_NEWROUTE, 0);
+	if (!err)
+		goto out_free;
+	if (err < 0) {
+		err = -EMSGSIZE;
+		goto out_free;
+	}
+
+	err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
+	if (err > 0)
+		err = 0;
+out:	return err;
+
+out_free:
+	kfree_skb(skb);
+	goto out;
+}
+
+int ip_rt_dump(struct sk_buff *skb,  struct netlink_callback *cb)
+{
+	struct rtable *rt;
+	int h, s_h;
+	int idx, s_idx;
+
+	s_h = cb->args[0];
+	s_idx = idx = cb->args[1];
+	for (h = 0; h <= rt_hash_mask; h++) {
+		if (h < s_h) continue;
+		if (h > s_h)
+			s_idx = 0;
+		rcu_read_lock_bh();
+		for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt;
+		     rt = rcu_dereference(rt->u.rt_next), idx++) {
+			if (idx < s_idx)
+				continue;
+			skb->dst = dst_clone(&rt->u.dst);
+			if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
+					 cb->nlh->nlmsg_seq,
+					 RTM_NEWROUTE, 1) <= 0) {
+				dst_release(xchg(&skb->dst, NULL));
+				rcu_read_unlock_bh();
+				goto done;
+			}
+			dst_release(xchg(&skb->dst, NULL));
+		}
+		rcu_read_unlock_bh();
+	}
+
+done:
+	cb->args[0] = h;
+	cb->args[1] = idx;
+	return skb->len;
+}
+
+void ip_rt_multicast_event(struct in_device *in_dev)
+{
+	rt_cache_flush(0);
+}
+
+#ifdef CONFIG_SYSCTL
+static int flush_delay;
+
+static int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
+					struct file *filp, void __user *buffer,
+					size_t *lenp, loff_t *ppos)
+{
+	if (write) {
+		proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+		rt_cache_flush(flush_delay);
+		return 0;
+	} 
+
+	return -EINVAL;
+}
+
+static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
+						int __user *name,
+						int nlen,
+						void __user *oldval,
+						size_t __user *oldlenp,
+						void __user *newval,
+						size_t newlen,
+						void **context)
+{
+	int delay;
+	if (newlen != sizeof(int))
+		return -EINVAL;
+	if (get_user(delay, (int __user *)newval))
+		return -EFAULT; 
+	rt_cache_flush(delay); 
+	return 0;
+}
+
+ctl_table ipv4_route_table[] = {
+        {
+		.ctl_name 	= NET_IPV4_ROUTE_FLUSH,
+		.procname	= "flush",
+		.data		= &flush_delay,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &ipv4_sysctl_rtcache_flush,
+		.strategy	= &ipv4_sysctl_rtcache_flush_strategy,
+	},
+	{
+		.ctl_name	= NET_IPV4_ROUTE_MIN_DELAY,
+		.procname	= "min_delay",
+		.data		= &ip_rt_min_delay,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+		.strategy	= &sysctl_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_ROUTE_MAX_DELAY,
+		.procname	= "max_delay",
+		.data		= &ip_rt_max_delay,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+		.strategy	= &sysctl_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_ROUTE_GC_THRESH,
+		.procname	= "gc_thresh",
+		.data		= &ipv4_dst_ops.gc_thresh,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_IPV4_ROUTE_MAX_SIZE,
+		.procname	= "max_size",
+		.data		= &ip_rt_max_size,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		/*  Deprecated. Use gc_min_interval_ms */
+ 
+		.ctl_name	= NET_IPV4_ROUTE_GC_MIN_INTERVAL,
+		.procname	= "gc_min_interval",
+		.data		= &ip_rt_gc_min_interval,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+		.strategy	= &sysctl_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_ROUTE_GC_MIN_INTERVAL_MS,
+		.procname	= "gc_min_interval_ms",
+		.data		= &ip_rt_gc_min_interval,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_ms_jiffies,
+		.strategy	= &sysctl_ms_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_ROUTE_GC_TIMEOUT,
+		.procname	= "gc_timeout",
+		.data		= &ip_rt_gc_timeout,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+		.strategy	= &sysctl_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_ROUTE_GC_INTERVAL,
+		.procname	= "gc_interval",
+		.data		= &ip_rt_gc_interval,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+		.strategy	= &sysctl_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_ROUTE_REDIRECT_LOAD,
+		.procname	= "redirect_load",
+		.data		= &ip_rt_redirect_load,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_IPV4_ROUTE_REDIRECT_NUMBER,
+		.procname	= "redirect_number",
+		.data		= &ip_rt_redirect_number,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_IPV4_ROUTE_REDIRECT_SILENCE,
+		.procname	= "redirect_silence",
+		.data		= &ip_rt_redirect_silence,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_IPV4_ROUTE_ERROR_COST,
+		.procname	= "error_cost",
+		.data		= &ip_rt_error_cost,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_IPV4_ROUTE_ERROR_BURST,
+		.procname	= "error_burst",
+		.data		= &ip_rt_error_burst,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_IPV4_ROUTE_GC_ELASTICITY,
+		.procname	= "gc_elasticity",
+		.data		= &ip_rt_gc_elasticity,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_IPV4_ROUTE_MTU_EXPIRES,
+		.procname	= "mtu_expires",
+		.data		= &ip_rt_mtu_expires,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+		.strategy	= &sysctl_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV4_ROUTE_MIN_PMTU,
+		.procname	= "min_pmtu",
+		.data		= &ip_rt_min_pmtu,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_IPV4_ROUTE_MIN_ADVMSS,
+		.procname	= "min_adv_mss",
+		.data		= &ip_rt_min_advmss,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_IPV4_ROUTE_SECRET_INTERVAL,
+		.procname	= "secret_interval",
+		.data		= &ip_rt_secret_interval,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+		.strategy	= &sysctl_jiffies,
+	},
+	{ .ctl_name = 0 }
+};
+#endif
+
+#ifdef CONFIG_NET_CLS_ROUTE
+struct ip_rt_acct *ip_rt_acct;
+
+/* This code sucks.  But you should have seen it before! --RR */
+
+/* IP route accounting ptr for this logical cpu number. */
+#define IP_RT_ACCT_CPU(i) (ip_rt_acct + i * 256)
+
+#ifdef CONFIG_PROC_FS
+static int ip_rt_acct_read(char *buffer, char **start, off_t offset,
+			   int length, int *eof, void *data)
+{
+	unsigned int i;
+
+	if ((offset & 3) || (length & 3))
+		return -EIO;
+
+	if (offset >= sizeof(struct ip_rt_acct) * 256) {
+		*eof = 1;
+		return 0;
+	}
+
+	if (offset + length >= sizeof(struct ip_rt_acct) * 256) {
+		length = sizeof(struct ip_rt_acct) * 256 - offset;
+		*eof = 1;
+	}
+
+	offset /= sizeof(u32);
+
+	if (length > 0) {
+		u32 *src = ((u32 *) IP_RT_ACCT_CPU(0)) + offset;
+		u32 *dst = (u32 *) buffer;
+
+		/* Copy first cpu. */
+		*start = buffer;
+		memcpy(dst, src, length);
+
+		/* Add the other cpus in, one int at a time */
+		for_each_cpu(i) {
+			unsigned int j;
+
+			src = ((u32 *) IP_RT_ACCT_CPU(i)) + offset;
+
+			for (j = 0; j < length/4; j++)
+				dst[j] += src[j];
+		}
+	}
+	return length;
+}
+#endif /* CONFIG_PROC_FS */
+#endif /* CONFIG_NET_CLS_ROUTE */
+
+static __initdata unsigned long rhash_entries;
+static int __init set_rhash_entries(char *str)
+{
+	if (!str)
+		return 0;
+	rhash_entries = simple_strtoul(str, &str, 0);
+	return 1;
+}
+__setup("rhash_entries=", set_rhash_entries);
+
+int __init ip_rt_init(void)
+{
+	int i, order, goal, rc = 0;
+
+	rt_hash_rnd = (int) ((num_physpages ^ (num_physpages>>8)) ^
+			     (jiffies ^ (jiffies >> 7)));
+
+#ifdef CONFIG_NET_CLS_ROUTE
+	for (order = 0;
+	     (PAGE_SIZE << order) < 256 * sizeof(struct ip_rt_acct) * NR_CPUS; order++)
+		/* NOTHING */;
+	ip_rt_acct = (struct ip_rt_acct *)__get_free_pages(GFP_KERNEL, order);
+	if (!ip_rt_acct)
+		panic("IP: failed to allocate ip_rt_acct\n");
+	memset(ip_rt_acct, 0, PAGE_SIZE << order);
+#endif
+
+	ipv4_dst_ops.kmem_cachep = kmem_cache_create("ip_dst_cache",
+						     sizeof(struct rtable),
+						     0, SLAB_HWCACHE_ALIGN,
+						     NULL, NULL);
+
+	if (!ipv4_dst_ops.kmem_cachep)
+		panic("IP: failed to allocate ip_dst_cache\n");
+
+	goal = num_physpages >> (26 - PAGE_SHIFT);
+	if (rhash_entries)
+		goal = (rhash_entries * sizeof(struct rt_hash_bucket)) >> PAGE_SHIFT;
+	for (order = 0; (1UL << order) < goal; order++)
+		/* NOTHING */;
+
+	do {
+		rt_hash_mask = (1UL << order) * PAGE_SIZE /
+			sizeof(struct rt_hash_bucket);
+		while (rt_hash_mask & (rt_hash_mask - 1))
+			rt_hash_mask--;
+		rt_hash_table = (struct rt_hash_bucket *)
+			__get_free_pages(GFP_ATOMIC, order);
+	} while (rt_hash_table == NULL && --order > 0);
+
+	if (!rt_hash_table)
+		panic("Failed to allocate IP route cache hash table\n");
+
+	printk(KERN_INFO "IP: routing cache hash table of %u buckets, %ldKbytes\n",
+	       rt_hash_mask,
+	       (long) (rt_hash_mask * sizeof(struct rt_hash_bucket)) / 1024);
+
+	for (rt_hash_log = 0; (1 << rt_hash_log) != rt_hash_mask; rt_hash_log++)
+		/* NOTHING */;
+
+	rt_hash_mask--;
+	for (i = 0; i <= rt_hash_mask; i++) {
+		spin_lock_init(&rt_hash_table[i].lock);
+		rt_hash_table[i].chain = NULL;
+	}
+
+	ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1);
+	ip_rt_max_size = (rt_hash_mask + 1) * 16;
+
+	rt_cache_stat = alloc_percpu(struct rt_cache_stat);
+	if (!rt_cache_stat)
+		return -ENOMEM;
+
+	devinet_init();
+	ip_fib_init();
+
+	init_timer(&rt_flush_timer);
+	rt_flush_timer.function = rt_run_flush;
+	init_timer(&rt_periodic_timer);
+	rt_periodic_timer.function = rt_check_expire;
+	init_timer(&rt_secret_timer);
+	rt_secret_timer.function = rt_secret_rebuild;
+
+	/* All the timers, started at system startup tend
+	   to synchronize. Perturb it a bit.
+	 */
+	rt_periodic_timer.expires = jiffies + net_random() % ip_rt_gc_interval +
+					ip_rt_gc_interval;
+	add_timer(&rt_periodic_timer);
+
+	rt_secret_timer.expires = jiffies + net_random() % ip_rt_secret_interval +
+		ip_rt_secret_interval;
+	add_timer(&rt_secret_timer);
+
+#ifdef CONFIG_PROC_FS
+	{
+	struct proc_dir_entry *rtstat_pde = NULL; /* keep gcc happy */
+	if (!proc_net_fops_create("rt_cache", S_IRUGO, &rt_cache_seq_fops) ||
+	    !(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO, 
+			    		     proc_net_stat))) {
+		free_percpu(rt_cache_stat);
+		return -ENOMEM;
+	}
+	rtstat_pde->proc_fops = &rt_cpu_seq_fops;
+	}
+#ifdef CONFIG_NET_CLS_ROUTE
+	create_proc_read_entry("rt_acct", 0, proc_net, ip_rt_acct_read, NULL);
+#endif
+#endif
+#ifdef CONFIG_XFRM
+	xfrm_init();
+	xfrm4_init();
+#endif
+	return rc;
+}
+
+EXPORT_SYMBOL(__ip_select_ident);
+EXPORT_SYMBOL(ip_route_input);
+EXPORT_SYMBOL(ip_route_output_key);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
new file mode 100644
index 00000000000..e923d2f021a
--- /dev/null
+++ b/net/ipv4/syncookies.c
@@ -0,0 +1,279 @@
+/*
+ *  Syncookies implementation for the Linux kernel
+ *
+ *  Copyright (C) 1997 Andi Kleen
+ *  Based on ideas by D.J.Bernstein and Eric Schenk. 
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ * 
+ *  $Id: syncookies.c,v 1.18 2002/02/01 22:01:04 davem Exp $
+ *
+ *  Missing: IPv6 support. 
+ */
+
+#include <linux/tcp.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/cryptohash.h>
+#include <linux/kernel.h>
+#include <net/tcp.h>
+
+extern int sysctl_tcp_syncookies;
+
+static __u32 syncookie_secret[2][16-3+SHA_DIGEST_WORDS];
+
+static __init int init_syncookies(void)
+{
+	get_random_bytes(syncookie_secret, sizeof(syncookie_secret));
+	return 0;
+}
+module_init(init_syncookies);
+
+#define COOKIEBITS 24	/* Upper bits store count */
+#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
+
+static u32 cookie_hash(u32 saddr, u32 daddr, u32 sport, u32 dport,
+		       u32 count, int c)
+{
+	__u32 tmp[16 + 5 + SHA_WORKSPACE_WORDS];
+
+	memcpy(tmp + 3, syncookie_secret[c], sizeof(syncookie_secret[c]));
+	tmp[0] = saddr;
+	tmp[1] = daddr;
+	tmp[2] = (sport << 16) + dport;
+	tmp[3] = count;
+	sha_transform(tmp + 16, (__u8 *)tmp, tmp + 16 + 5);
+
+	return tmp[17];
+}
+
+static __u32 secure_tcp_syn_cookie(__u32 saddr, __u32 daddr, __u16 sport,
+				   __u16 dport, __u32 sseq, __u32 count,
+				   __u32 data)
+{
+	/*
+	 * Compute the secure sequence number.
+	 * The output should be:
+   	 *   HASH(sec1,saddr,sport,daddr,dport,sec1) + sseq + (count * 2^24)
+	 *      + (HASH(sec2,saddr,sport,daddr,dport,count,sec2) % 2^24).
+	 * Where sseq is their sequence number and count increases every
+	 * minute by 1.
+	 * As an extra hack, we add a small "data" value that encodes the
+	 * MSS into the second hash value.
+	 */
+
+	return (cookie_hash(saddr, daddr, sport, dport, 0, 0) +
+		sseq + (count << COOKIEBITS) +
+		((cookie_hash(saddr, daddr, sport, dport, count, 1) + data)
+		 & COOKIEMASK));
+}
+
+/*
+ * This retrieves the small "data" value from the syncookie.
+ * If the syncookie is bad, the data returned will be out of
+ * range.  This must be checked by the caller.
+ *
+ * The count value used to generate the cookie must be within
+ * "maxdiff" if the current (passed-in) "count".  The return value
+ * is (__u32)-1 if this test fails.
+ */
+static __u32 check_tcp_syn_cookie(__u32 cookie, __u32 saddr, __u32 daddr,
+				  __u16 sport, __u16 dport, __u32 sseq,
+				  __u32 count, __u32 maxdiff)
+{
+	__u32 diff;
+
+	/* Strip away the layers from the cookie */
+	cookie -= cookie_hash(saddr, daddr, sport, dport, 0, 0) + sseq;
+
+	/* Cookie is now reduced to (count * 2^24) ^ (hash % 2^24) */
+	diff = (count - (cookie >> COOKIEBITS)) & ((__u32) - 1 >> COOKIEBITS);
+	if (diff >= maxdiff)
+		return (__u32)-1;
+
+	return (cookie -
+		cookie_hash(saddr, daddr, sport, dport, count - diff, 1))
+		& COOKIEMASK;	/* Leaving the data behind */
+}
+
+/* 
+ * This table has to be sorted and terminated with (__u16)-1.
+ * XXX generate a better table.
+ * Unresolved Issues: HIPPI with a 64k MSS is not well supported.
+ */
+static __u16 const msstab[] = {
+	64 - 1,
+	256 - 1,	
+	512 - 1,
+	536 - 1,
+	1024 - 1,	
+	1440 - 1,
+	1460 - 1,
+	4312 - 1,
+	(__u16)-1
+};
+/* The number doesn't include the -1 terminator */
+#define NUM_MSS (ARRAY_SIZE(msstab) - 1)
+
+/*
+ * Generate a syncookie.  mssp points to the mss, which is returned
+ * rounded down to the value encoded in the cookie.
+ */
+__u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	int mssind;
+	const __u16 mss = *mssp;
+
+	
+	tp->last_synq_overflow = jiffies;
+
+	/* XXX sort msstab[] by probability?  Binary search? */
+	for (mssind = 0; mss > msstab[mssind + 1]; mssind++)
+		;
+	*mssp = msstab[mssind] + 1;
+
+	NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESSENT);
+
+	return secure_tcp_syn_cookie(skb->nh.iph->saddr, skb->nh.iph->daddr,
+				     skb->h.th->source, skb->h.th->dest,
+				     ntohl(skb->h.th->seq),
+				     jiffies / (HZ * 60), mssind);
+}
+
+/* 
+ * This (misnamed) value is the age of syncookie which is permitted.
+ * Its ideal value should be dependent on TCP_TIMEOUT_INIT and
+ * sysctl_tcp_retries1. It's a rather complicated formula (exponential
+ * backoff) to compute at runtime so it's currently hardcoded here.
+ */
+#define COUNTER_TRIES 4
+/*  
+ * Check if a ack sequence number is a valid syncookie. 
+ * Return the decoded mss if it is, or 0 if not.
+ */
+static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
+{
+	__u32 seq; 
+	__u32 mssind;
+
+	seq = ntohl(skb->h.th->seq)-1; 
+	mssind = check_tcp_syn_cookie(cookie,
+				      skb->nh.iph->saddr, skb->nh.iph->daddr,
+				      skb->h.th->source, skb->h.th->dest,
+				      seq, jiffies / (HZ * 60), COUNTER_TRIES);
+
+	return mssind < NUM_MSS ? msstab[mssind] + 1 : 0;
+}
+
+extern struct or_calltable or_ipv4;
+
+static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
+					   struct open_request *req,
+					   struct dst_entry *dst)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sock *child;
+
+	child = tp->af_specific->syn_recv_sock(sk, skb, req, dst);
+	if (child)
+		tcp_acceptq_queue(sk, req, child);
+	else
+		tcp_openreq_free(req);
+
+	return child;
+}
+
+struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
+			     struct ip_options *opt)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	__u32 cookie = ntohl(skb->h.th->ack_seq) - 1; 
+	struct sock *ret = sk;
+	struct open_request *req; 
+	int mss; 
+	struct rtable *rt; 
+	__u8 rcv_wscale;
+
+	if (!sysctl_tcp_syncookies || !skb->h.th->ack)
+		goto out;
+
+  	if (time_after(jiffies, tp->last_synq_overflow + TCP_TIMEOUT_INIT) ||
+	    (mss = cookie_check(skb, cookie)) == 0) {
+	 	NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESFAILED);
+		goto out;
+	}
+
+	NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESRECV);
+
+	req = tcp_openreq_alloc();
+	ret = NULL;
+	if (!req)
+		goto out;
+
+	req->rcv_isn		= htonl(skb->h.th->seq) - 1;
+	req->snt_isn		= cookie; 
+	req->mss		= mss;
+ 	req->rmt_port		= skb->h.th->source;
+	req->af.v4_req.loc_addr = skb->nh.iph->daddr;
+	req->af.v4_req.rmt_addr = skb->nh.iph->saddr;
+	req->class		= &or_ipv4; /* for savety */
+	req->af.v4_req.opt	= NULL;
+
+	/* We throwed the options of the initial SYN away, so we hope
+	 * the ACK carries the same options again (see RFC1122 4.2.3.8)
+	 */
+	if (opt && opt->optlen) {
+		int opt_size = sizeof(struct ip_options) + opt->optlen;
+
+		req->af.v4_req.opt = kmalloc(opt_size, GFP_ATOMIC);
+		if (req->af.v4_req.opt) {
+			if (ip_options_echo(req->af.v4_req.opt, skb)) {
+				kfree(req->af.v4_req.opt);
+				req->af.v4_req.opt = NULL;
+			}
+		}
+	}
+
+	req->snd_wscale = req->rcv_wscale = req->tstamp_ok = 0;
+	req->wscale_ok	= req->sack_ok = 0; 
+	req->expires	= 0UL; 
+	req->retrans	= 0; 
+	
+	/*
+	 * We need to lookup the route here to get at the correct
+	 * window size. We should better make sure that the window size
+	 * hasn't changed since we received the original syn, but I see
+	 * no easy way to do this. 
+	 */
+	{
+		struct flowi fl = { .nl_u = { .ip4_u =
+					      { .daddr = ((opt && opt->srr) ?
+							  opt->faddr :
+							  req->af.v4_req.rmt_addr),
+						.saddr = req->af.v4_req.loc_addr,
+						.tos = RT_CONN_FLAGS(sk) } },
+				    .proto = IPPROTO_TCP,
+				    .uli_u = { .ports =
+					       { .sport = skb->h.th->dest,
+						 .dport = skb->h.th->source } } };
+		if (ip_route_output_key(&rt, &fl)) {
+			tcp_openreq_free(req);
+			goto out; 
+		}
+	}
+
+	/* Try to redo what tcp_v4_send_synack did. */
+	req->window_clamp = dst_metric(&rt->u.dst, RTAX_WINDOW);
+	tcp_select_initial_window(tcp_full_space(sk), req->mss,
+				  &req->rcv_wnd, &req->window_clamp, 
+				  0, &rcv_wscale);
+	/* BTW win scale with syncookies is 0 by definition */
+	req->rcv_wscale	  = rcv_wscale; 
+
+	ret = get_cookie_sock(sk, skb, req, &rt->u.dst);
+out:	return ret;
+}
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
new file mode 100644
index 00000000000..3aafb298c1c
--- /dev/null
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -0,0 +1,698 @@
+/*
+ * sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem.
+ *
+ * $Id: sysctl_net_ipv4.c,v 1.50 2001/10/20 00:00:11 davem Exp $
+ *
+ * Begun April 1, 1996, Mike Shaver.
+ * Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS]
+ */
+
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/sysctl.h>
+#include <linux/config.h>
+#include <net/snmp.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <net/tcp.h>
+
+/* From af_inet.c */
+extern int sysctl_ip_nonlocal_bind;
+
+/* From icmp.c */
+extern int sysctl_icmp_echo_ignore_all;
+extern int sysctl_icmp_echo_ignore_broadcasts;
+extern int sysctl_icmp_ignore_bogus_error_responses;
+
+/* From ip_fragment.c */
+extern int sysctl_ipfrag_low_thresh;
+extern int sysctl_ipfrag_high_thresh; 
+extern int sysctl_ipfrag_time;
+extern int sysctl_ipfrag_secret_interval;
+
+/* From ip_output.c */
+extern int sysctl_ip_dynaddr;
+
+/* From icmp.c */
+extern int sysctl_icmp_ratelimit;
+extern int sysctl_icmp_ratemask;
+
+/* From igmp.c */
+extern int sysctl_igmp_max_memberships;
+extern int sysctl_igmp_max_msf;
+
+/* From inetpeer.c */
+extern int inet_peer_threshold;
+extern int inet_peer_minttl;
+extern int inet_peer_maxttl;
+extern int inet_peer_gc_mintime;
+extern int inet_peer_gc_maxtime;
+
+#ifdef CONFIG_SYSCTL
+static int tcp_retr1_max = 255; 
+static int ip_local_port_range_min[] = { 1, 1 };
+static int ip_local_port_range_max[] = { 65535, 65535 };
+#endif
+
+struct ipv4_config ipv4_config;
+
+extern ctl_table ipv4_route_table[];
+
+#ifdef CONFIG_SYSCTL
+
+static
+int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
+			void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int val = ipv4_devconf.forwarding;
+	int ret;
+
+	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+
+	if (write && ipv4_devconf.forwarding != val)
+		inet_forward_change();
+
+	return ret;
+}
+
+static int ipv4_sysctl_forward_strategy(ctl_table *table,
+			 int __user *name, int nlen,
+			 void __user *oldval, size_t __user *oldlenp,
+			 void __user *newval, size_t newlen, 
+			 void **context)
+{
+	int *valp = table->data;
+	int new;
+
+	if (!newval || !newlen)
+		return 0;
+
+	if (newlen != sizeof(int))
+		return -EINVAL;
+
+	if (get_user(new, (int __user *)newval))
+		return -EFAULT;
+
+	if (new == *valp)
+		return 0;
+
+	if (oldval && oldlenp) {
+		size_t len;
+
+		if (get_user(len, oldlenp))
+			return -EFAULT;
+
+		if (len) {
+			if (len > table->maxlen)
+				len = table->maxlen;
+			if (copy_to_user(oldval, valp, len))
+				return -EFAULT;
+			if (put_user(len, oldlenp))
+				return -EFAULT;
+		}
+	}
+
+	*valp = new;
+	inet_forward_change();
+	return 1;
+}
+
+ctl_table ipv4_table[] = {
+        {
+		.ctl_name	= NET_IPV4_TCP_TIMESTAMPS,
+		.procname	= "tcp_timestamps",
+		.data		= &sysctl_tcp_timestamps,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+        {
+		.ctl_name	= NET_IPV4_TCP_WINDOW_SCALING,
+		.procname	= "tcp_window_scaling",
+		.data		= &sysctl_tcp_window_scaling,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+        {
+		.ctl_name	= NET_IPV4_TCP_SACK,
+		.procname	= "tcp_sack",
+		.data		= &sysctl_tcp_sack,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+        {
+		.ctl_name	= NET_IPV4_TCP_RETRANS_COLLAPSE,
+		.procname	= "tcp_retrans_collapse",
+		.data		= &sysctl_tcp_retrans_collapse,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+        {
+		.ctl_name	= NET_IPV4_FORWARD,
+		.procname	= "ip_forward",
+		.data		= &ipv4_devconf.forwarding,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &ipv4_sysctl_forward,
+		.strategy	= &ipv4_sysctl_forward_strategy
+	},
+        {
+		.ctl_name	= NET_IPV4_DEFAULT_TTL,
+		.procname	= "ip_default_ttl",
+ 		.data		= &sysctl_ip_default_ttl,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &ipv4_doint_and_flush,
+		.strategy	= &ipv4_doint_and_flush_strategy,
+	},
+        {
+		.ctl_name	= NET_IPV4_AUTOCONFIG,
+		.procname	= "ip_autoconfig",
+		.data		= &ipv4_config.autoconfig,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+        {
+		.ctl_name	= NET_IPV4_NO_PMTU_DISC,
+		.procname	= "ip_no_pmtu_disc",
+		.data		= &ipv4_config.no_pmtu_disc,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_IPV4_NONLOCAL_BIND,
+		.procname	= "ip_nonlocal_bind",
+		.data		= &sysctl_ip_nonlocal_bind,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_IPV4_TCP_SYN_RETRIES,
+		.procname	= "tcp_syn_retries",
+		.data		= &sysctl_tcp_syn_retries,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_TCP_SYNACK_RETRIES,
+		.procname	= "tcp_synack_retries",
+		.data		= &sysctl_tcp_synack_retries,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_TCP_MAX_ORPHANS,
+		.procname	= "tcp_max_orphans",
+		.data		= &sysctl_tcp_max_orphans,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_TCP_MAX_TW_BUCKETS,
+		.procname	= "tcp_max_tw_buckets",
+		.data		= &sysctl_tcp_max_tw_buckets,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_IPV4_IPFRAG_HIGH_THRESH,
+		.procname	= "ipfrag_high_thresh",
+		.data		= &sysctl_ipfrag_high_thresh,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_IPV4_IPFRAG_LOW_THRESH,
+		.procname	= "ipfrag_low_thresh",
+		.data		= &sysctl_ipfrag_low_thresh,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_IPV4_DYNADDR,
+		.procname	= "ip_dynaddr",
+		.data		= &sysctl_ip_dynaddr,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_IPV4_IPFRAG_TIME,
+		.procname	= "ipfrag_time",
+		.data		= &sysctl_ipfrag_time,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+		.strategy	= &sysctl_jiffies
+	},
+	{
+		.ctl_name	= NET_IPV4_TCP_KEEPALIVE_TIME,
+		.procname	= "tcp_keepalive_time",
+		.data		= &sysctl_tcp_keepalive_time,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+		.strategy	= &sysctl_jiffies
+	},
+	{
+		.ctl_name	= NET_IPV4_TCP_KEEPALIVE_PROBES,
+		.procname	= "tcp_keepalive_probes",
+		.data		= &sysctl_tcp_keepalive_probes,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_IPV4_TCP_KEEPALIVE_INTVL,
+		.procname	= "tcp_keepalive_intvl",
+		.data		= &sysctl_tcp_keepalive_intvl,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+		.strategy	= &sysctl_jiffies
+	},
+	{
+		.ctl_name	= NET_IPV4_TCP_RETRIES1,
+		.procname	= "tcp_retries1",
+		.data		= &sysctl_tcp_retries1,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra2		= &tcp_retr1_max
+	},
+	{
+		.ctl_name	= NET_IPV4_TCP_RETRIES2,
+		.procname	= "tcp_retries2",
+		.data		= &sysctl_tcp_retries2,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_IPV4_TCP_FIN_TIMEOUT,
+		.procname	= "tcp_fin_timeout",
+		.data		= &sysctl_tcp_fin_timeout,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+		.strategy	= &sysctl_jiffies
+	},
+#ifdef CONFIG_SYN_COOKIES
+	{
+		.ctl_name	= NET_TCP_SYNCOOKIES,
+		.procname	= "tcp_syncookies",
+		.data		= &sysctl_tcp_syncookies,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+#endif
+	{
+		.ctl_name	= NET_TCP_TW_RECYCLE,
+		.procname	= "tcp_tw_recycle",
+		.data		= &sysctl_tcp_tw_recycle,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_TCP_ABORT_ON_OVERFLOW,
+		.procname	= "tcp_abort_on_overflow",
+		.data		= &sysctl_tcp_abort_on_overflow,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_TCP_STDURG,
+		.procname	= "tcp_stdurg",
+		.data		= &sysctl_tcp_stdurg,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_TCP_RFC1337,
+		.procname	= "tcp_rfc1337",
+		.data		= &sysctl_tcp_rfc1337,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_TCP_MAX_SYN_BACKLOG,
+		.procname	= "tcp_max_syn_backlog",
+		.data		= &sysctl_max_syn_backlog,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_IPV4_LOCAL_PORT_RANGE,
+		.procname	= "ip_local_port_range",
+		.data		= &sysctl_local_port_range,
+		.maxlen		= sizeof(sysctl_local_port_range),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= ip_local_port_range_min,
+		.extra2		= ip_local_port_range_max
+	},
+	{
+		.ctl_name	= NET_IPV4_ICMP_ECHO_IGNORE_ALL,
+		.procname	= "icmp_echo_ignore_all",
+		.data		= &sysctl_icmp_echo_ignore_all,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_IPV4_ICMP_ECHO_IGNORE_BROADCASTS,
+		.procname	= "icmp_echo_ignore_broadcasts",
+		.data		= &sysctl_icmp_echo_ignore_broadcasts,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_IPV4_ICMP_IGNORE_BOGUS_ERROR_RESPONSES,
+		.procname	= "icmp_ignore_bogus_error_responses",
+		.data		= &sysctl_icmp_ignore_bogus_error_responses,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_IPV4_ROUTE,
+		.procname	= "route",
+		.maxlen		= 0,
+		.mode		= 0555,
+		.child		= ipv4_route_table
+	},
+#ifdef CONFIG_IP_MULTICAST
+	{
+		.ctl_name	= NET_IPV4_IGMP_MAX_MEMBERSHIPS,
+		.procname	= "igmp_max_memberships",
+		.data		= &sysctl_igmp_max_memberships,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+
+#endif
+	{
+		.ctl_name	= NET_IPV4_IGMP_MAX_MSF,
+		.procname	= "igmp_max_msf",
+		.data		= &sysctl_igmp_max_msf,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_IPV4_INET_PEER_THRESHOLD,
+		.procname	= "inet_peer_threshold",
+		.data		= &inet_peer_threshold,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_IPV4_INET_PEER_MINTTL,
+		.procname	= "inet_peer_minttl",
+		.data		= &inet_peer_minttl,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+		.strategy	= &sysctl_jiffies
+	},
+	{
+		.ctl_name	= NET_IPV4_INET_PEER_MAXTTL,
+		.procname	= "inet_peer_maxttl",
+		.data		= &inet_peer_maxttl,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+		.strategy	= &sysctl_jiffies
+	},
+	{
+		.ctl_name	= NET_IPV4_INET_PEER_GC_MINTIME,
+		.procname	= "inet_peer_gc_mintime",
+		.data		= &inet_peer_gc_mintime,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+		.strategy	= &sysctl_jiffies
+	},
+	{
+		.ctl_name	= NET_IPV4_INET_PEER_GC_MAXTIME,
+		.procname	= "inet_peer_gc_maxtime",
+		.data		= &inet_peer_gc_maxtime,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+		.strategy	= &sysctl_jiffies
+	},
+	{
+		.ctl_name	= NET_TCP_ORPHAN_RETRIES,
+		.procname	= "tcp_orphan_retries",
+		.data		= &sysctl_tcp_orphan_retries,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_TCP_FACK,
+		.procname	= "tcp_fack",
+		.data		= &sysctl_tcp_fack,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_TCP_REORDERING,
+		.procname	= "tcp_reordering",
+		.data		= &sysctl_tcp_reordering,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_TCP_ECN,
+		.procname	= "tcp_ecn",
+		.data		= &sysctl_tcp_ecn,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_TCP_DSACK,
+		.procname	= "tcp_dsack",
+		.data		= &sysctl_tcp_dsack,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_TCP_MEM,
+		.procname	= "tcp_mem",
+		.data		= &sysctl_tcp_mem,
+		.maxlen		= sizeof(sysctl_tcp_mem),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_TCP_WMEM,
+		.procname	= "tcp_wmem",
+		.data		= &sysctl_tcp_wmem,
+		.maxlen		= sizeof(sysctl_tcp_wmem),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_TCP_RMEM,
+		.procname	= "tcp_rmem",
+		.data		= &sysctl_tcp_rmem,
+		.maxlen		= sizeof(sysctl_tcp_rmem),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_TCP_APP_WIN,
+		.procname	= "tcp_app_win",
+		.data		= &sysctl_tcp_app_win,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_TCP_ADV_WIN_SCALE,
+		.procname	= "tcp_adv_win_scale",
+		.data		= &sysctl_tcp_adv_win_scale,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_IPV4_ICMP_RATELIMIT,
+		.procname	= "icmp_ratelimit",
+		.data		= &sysctl_icmp_ratelimit,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_IPV4_ICMP_RATEMASK,
+		.procname	= "icmp_ratemask",
+		.data		= &sysctl_icmp_ratemask,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_TCP_TW_REUSE,
+		.procname	= "tcp_tw_reuse",
+		.data		= &sysctl_tcp_tw_reuse,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_TCP_FRTO,
+		.procname	= "tcp_frto",
+		.data		= &sysctl_tcp_frto,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_TCP_LOW_LATENCY,
+		.procname	= "tcp_low_latency",
+		.data		= &sysctl_tcp_low_latency,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_IPV4_IPFRAG_SECRET_INTERVAL,
+		.procname	= "ipfrag_secret_interval",
+		.data		= &sysctl_ipfrag_secret_interval,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+		.strategy	= &sysctl_jiffies
+	},
+	{
+		.ctl_name	= NET_TCP_NO_METRICS_SAVE,
+		.procname	= "tcp_no_metrics_save",
+		.data		= &sysctl_tcp_nometrics_save,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_TCP_WESTWOOD, 
+		.procname	= "tcp_westwood",
+		.data		= &sysctl_tcp_westwood,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_TCP_VEGAS,
+		.procname	= "tcp_vegas_cong_avoid",
+		.data		= &sysctl_tcp_vegas_cong_avoid,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_TCP_VEGAS_ALPHA,
+		.procname	= "tcp_vegas_alpha",
+		.data		= &sysctl_tcp_vegas_alpha,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_TCP_VEGAS_BETA,
+		.procname	= "tcp_vegas_beta",
+		.data		= &sysctl_tcp_vegas_beta,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_TCP_VEGAS_GAMMA,
+		.procname	= "tcp_vegas_gamma",
+		.data		= &sysctl_tcp_vegas_gamma,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_TCP_BIC,
+		.procname	= "tcp_bic",
+		.data		= &sysctl_tcp_bic,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_TCP_BIC_FAST_CONVERGENCE,
+		.procname	= "tcp_bic_fast_convergence",
+		.data		= &sysctl_tcp_bic_fast_convergence,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_TCP_BIC_LOW_WINDOW,
+		.procname	= "tcp_bic_low_window",
+		.data		= &sysctl_tcp_bic_low_window,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_TCP_MODERATE_RCVBUF,
+		.procname	= "tcp_moderate_rcvbuf",
+		.data		= &sysctl_tcp_moderate_rcvbuf,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_TCP_TSO_WIN_DIVISOR,
+		.procname	= "tcp_tso_win_divisor",
+		.data		= &sysctl_tcp_tso_win_divisor,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_TCP_BIC_BETA,
+		.procname	= "tcp_bic_beta",
+		.data		= &sysctl_tcp_bic_beta,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{ .ctl_name = 0 }
+};
+
+#endif /* CONFIG_SYSCTL */
+
+EXPORT_SYMBOL(ipv4_config);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
new file mode 100644
index 00000000000..5cff56af785
--- /dev/null
+++ b/net/ipv4/tcp.c
@@ -0,0 +1,2386 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Implementation of the Transmission Control Protocol(TCP).
+ *
+ * Version:	$Id: tcp.c,v 1.216 2002/02/01 22:01:04 davem Exp $
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Mark Evans, <evansmp@uhura.aston.ac.uk>
+ *		Corey Minyard <wf-rch!minyard@relay.EU.net>
+ *		Florian La Roche, <flla@stud.uni-sb.de>
+ *		Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
+ *		Linus Torvalds, <torvalds@cs.helsinki.fi>
+ *		Alan Cox, <gw4pts@gw4pts.ampr.org>
+ *		Matthew Dillon, <dillon@apollo.west.oic.com>
+ *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *		Jorge Cwik, <jorge@laser.satlink.net>
+ *
+ * Fixes:
+ *		Alan Cox	:	Numerous verify_area() calls
+ *		Alan Cox	:	Set the ACK bit on a reset
+ *		Alan Cox	:	Stopped it crashing if it closed while
+ *					sk->inuse=1 and was trying to connect
+ *					(tcp_err()).
+ *		Alan Cox	:	All icmp error handling was broken
+ *					pointers passed where wrong and the
+ *					socket was looked up backwards. Nobody
+ *					tested any icmp error code obviously.
+ *		Alan Cox	:	tcp_err() now handled properly. It
+ *					wakes people on errors. poll
+ *					behaves and the icmp error race
+ *					has gone by moving it into sock.c
+ *		Alan Cox	:	tcp_send_reset() fixed to work for
+ *					everything not just packets for
+ *					unknown sockets.
+ *		Alan Cox	:	tcp option processing.
+ *		Alan Cox	:	Reset tweaked (still not 100%) [Had
+ *					syn rule wrong]
+ *		Herp Rosmanith  :	More reset fixes
+ *		Alan Cox	:	No longer acks invalid rst frames.
+ *					Acking any kind of RST is right out.
+ *		Alan Cox	:	Sets an ignore me flag on an rst
+ *					receive otherwise odd bits of prattle
+ *					escape still
+ *		Alan Cox	:	Fixed another acking RST frame bug.
+ *					Should stop LAN workplace lockups.
+ *		Alan Cox	: 	Some tidyups using the new skb list
+ *					facilities
+ *		Alan Cox	:	sk->keepopen now seems to work
+ *		Alan Cox	:	Pulls options out correctly on accepts
+ *		Alan Cox	:	Fixed assorted sk->rqueue->next errors
+ *		Alan Cox	:	PSH doesn't end a TCP read. Switched a
+ *					bit to skb ops.
+ *		Alan Cox	:	Tidied tcp_data to avoid a potential
+ *					nasty.
+ *		Alan Cox	:	Added some better commenting, as the
+ *					tcp is hard to follow
+ *		Alan Cox	:	Removed incorrect check for 20 * psh
+ *	Michael O'Reilly	:	ack < copied bug fix.
+ *	Johannes Stille		:	Misc tcp fixes (not all in yet).
+ *		Alan Cox	:	FIN with no memory -> CRASH
+ *		Alan Cox	:	Added socket option proto entries.
+ *					Also added awareness of them to accept.
+ *		Alan Cox	:	Added TCP options (SOL_TCP)
+ *		Alan Cox	:	Switched wakeup calls to callbacks,
+ *					so the kernel can layer network
+ *					sockets.
+ *		Alan Cox	:	Use ip_tos/ip_ttl settings.
+ *		Alan Cox	:	Handle FIN (more) properly (we hope).
+ *		Alan Cox	:	RST frames sent on unsynchronised
+ *					state ack error.
+ *		Alan Cox	:	Put in missing check for SYN bit.
+ *		Alan Cox	:	Added tcp_select_window() aka NET2E
+ *					window non shrink trick.
+ *		Alan Cox	:	Added a couple of small NET2E timer
+ *					fixes
+ *		Charles Hedrick :	TCP fixes
+ *		Toomas Tamm	:	TCP window fixes
+ *		Alan Cox	:	Small URG fix to rlogin ^C ack fight
+ *		Charles Hedrick	:	Rewrote most of it to actually work
+ *		Linus		:	Rewrote tcp_read() and URG handling
+ *					completely
+ *		Gerhard Koerting:	Fixed some missing timer handling
+ *		Matthew Dillon  :	Reworked TCP machine states as per RFC
+ *		Gerhard Koerting:	PC/TCP workarounds
+ *		Adam Caldwell	:	Assorted timer/timing errors
+ *		Matthew Dillon	:	Fixed another RST bug
+ *		Alan Cox	:	Move to kernel side addressing changes.
+ *		Alan Cox	:	Beginning work on TCP fastpathing
+ *					(not yet usable)
+ *		Arnt Gulbrandsen:	Turbocharged tcp_check() routine.
+ *		Alan Cox	:	TCP fast path debugging
+ *		Alan Cox	:	Window clamping
+ *		Michael Riepe	:	Bug in tcp_check()
+ *		Matt Dillon	:	More TCP improvements and RST bug fixes
+ *		Matt Dillon	:	Yet more small nasties remove from the
+ *					TCP code (Be very nice to this man if
+ *					tcp finally works 100%) 8)
+ *		Alan Cox	:	BSD accept semantics.
+ *		Alan Cox	:	Reset on closedown bug.
+ *	Peter De Schrijver	:	ENOTCONN check missing in tcp_sendto().
+ *		Michael Pall	:	Handle poll() after URG properly in
+ *					all cases.
+ *		Michael Pall	:	Undo the last fix in tcp_read_urg()
+ *					(multi URG PUSH broke rlogin).
+ *		Michael Pall	:	Fix the multi URG PUSH problem in
+ *					tcp_readable(), poll() after URG
+ *					works now.
+ *		Michael Pall	:	recv(...,MSG_OOB) never blocks in the
+ *					BSD api.
+ *		Alan Cox	:	Changed the semantics of sk->socket to
+ *					fix a race and a signal problem with
+ *					accept() and async I/O.
+ *		Alan Cox	:	Relaxed the rules on tcp_sendto().
+ *		Yury Shevchuk	:	Really fixed accept() blocking problem.
+ *		Craig I. Hagan  :	Allow for BSD compatible TIME_WAIT for
+ *					clients/servers which listen in on
+ *					fixed ports.
+ *		Alan Cox	:	Cleaned the above up and shrank it to
+ *					a sensible code size.
+ *		Alan Cox	:	Self connect lockup fix.
+ *		Alan Cox	:	No connect to multicast.
+ *		Ross Biro	:	Close unaccepted children on master
+ *					socket close.
+ *		Alan Cox	:	Reset tracing code.
+ *		Alan Cox	:	Spurious resets on shutdown.
+ *		Alan Cox	:	Giant 15 minute/60 second timer error
+ *		Alan Cox	:	Small whoops in polling before an
+ *					accept.
+ *		Alan Cox	:	Kept the state trace facility since
+ *					it's handy for debugging.
+ *		Alan Cox	:	More reset handler fixes.
+ *		Alan Cox	:	Started rewriting the code based on
+ *					the RFC's for other useful protocol
+ *					references see: Comer, KA9Q NOS, and
+ *					for a reference on the difference
+ *					between specifications and how BSD
+ *					works see the 4.4lite source.
+ *		A.N.Kuznetsov	:	Don't time wait on completion of tidy
+ *					close.
+ *		Linus Torvalds	:	Fin/Shutdown & copied_seq changes.
+ *		Linus Torvalds	:	Fixed BSD port reuse to work first syn
+ *		Alan Cox	:	Reimplemented timers as per the RFC
+ *					and using multiple timers for sanity.
+ *		Alan Cox	:	Small bug fixes, and a lot of new
+ *					comments.
+ *		Alan Cox	:	Fixed dual reader crash by locking
+ *					the buffers (much like datagram.c)
+ *		Alan Cox	:	Fixed stuck sockets in probe. A probe
+ *					now gets fed up of retrying without
+ *					(even a no space) answer.
+ *		Alan Cox	:	Extracted closing code better
+ *		Alan Cox	:	Fixed the closing state machine to
+ *					resemble the RFC.
+ *		Alan Cox	:	More 'per spec' fixes.
+ *		Jorge Cwik	:	Even faster checksumming.
+ *		Alan Cox	:	tcp_data() doesn't ack illegal PSH
+ *					only frames. At least one pc tcp stack
+ *					generates them.
+ *		Alan Cox	:	Cache last socket.
+ *		Alan Cox	:	Per route irtt.
+ *		Matt Day	:	poll()->select() match BSD precisely on error
+ *		Alan Cox	:	New buffers
+ *		Marc Tamsky	:	Various sk->prot->retransmits and
+ *					sk->retransmits misupdating fixed.
+ *					Fixed tcp_write_timeout: stuck close,
+ *					and TCP syn retries gets used now.
+ *		Mark Yarvis	:	In tcp_read_wakeup(), don't send an
+ *					ack if state is TCP_CLOSED.
+ *		Alan Cox	:	Look up device on a retransmit - routes may
+ *					change. Doesn't yet cope with MSS shrink right
+ *					but it's a start!
+ *		Marc Tamsky	:	Closing in closing fixes.
+ *		Mike Shaver	:	RFC1122 verifications.
+ *		Alan Cox	:	rcv_saddr errors.
+ *		Alan Cox	:	Block double connect().
+ *		Alan Cox	:	Small hooks for enSKIP.
+ *		Alexey Kuznetsov:	Path MTU discovery.
+ *		Alan Cox	:	Support soft errors.
+ *		Alan Cox	:	Fix MTU discovery pathological case
+ *					when the remote claims no mtu!
+ *		Marc Tamsky	:	TCP_CLOSE fix.
+ *		Colin (G3TNE)	:	Send a reset on syn ack replies in
+ *					window but wrong (fixes NT lpd problems)
+ *		Pedro Roque	:	Better TCP window handling, delayed ack.
+ *		Joerg Reuter	:	No modification of locked buffers in
+ *					tcp_do_retransmit()
+ *		Eric Schenk	:	Changed receiver side silly window
+ *					avoidance algorithm to BSD style
+ *					algorithm. This doubles throughput
+ *					against machines running Solaris,
+ *					and seems to result in general
+ *					improvement.
+ *	Stefan Magdalinski	:	adjusted tcp_readable() to fix FIONREAD
+ *	Willy Konynenberg	:	Transparent proxying support.
+ *	Mike McLagan		:	Routing by source
+ *		Keith Owens	:	Do proper merging with partial SKB's in
+ *					tcp_do_sendmsg to avoid burstiness.
+ *		Eric Schenk	:	Fix fast close down bug with
+ *					shutdown() followed by close().
+ *		Andi Kleen 	:	Make poll agree with SIGIO
+ *	Salvatore Sanfilippo	:	Support SO_LINGER with linger == 1 and
+ *					lingertime == 0 (RFC 793 ABORT Call)
+ *	Hirokazu Takahashi	:	Use copy_from_user() instead of
+ *					csum_and_copy_from_user() if possible.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or(at your option) any later version.
+ *
+ * Description of States:
+ *
+ *	TCP_SYN_SENT		sent a connection request, waiting for ack
+ *
+ *	TCP_SYN_RECV		received a connection request, sent ack,
+ *				waiting for final ack in three-way handshake.
+ *
+ *	TCP_ESTABLISHED		connection established
+ *
+ *	TCP_FIN_WAIT1		our side has shutdown, waiting to complete
+ *				transmission of remaining buffered data
+ *
+ *	TCP_FIN_WAIT2		all buffered data sent, waiting for remote
+ *				to shutdown
+ *
+ *	TCP_CLOSING		both sides have shutdown but we still have
+ *				data we have to finish sending
+ *
+ *	TCP_TIME_WAIT		timeout to catch resent junk before entering
+ *				closed, can only be entered from FIN_WAIT2
+ *				or CLOSING.  Required because the other end
+ *				may not have gotten our last ACK causing it
+ *				to retransmit the data packet (which we ignore)
+ *
+ *	TCP_CLOSE_WAIT		remote side has shutdown and is waiting for
+ *				us to finish writing our data and to shutdown
+ *				(we have to close() to move on to LAST_ACK)
+ *
+ *	TCP_LAST_ACK		out side has shutdown after remote has
+ *				shutdown.  There may still be data in our
+ *				buffer that we have to finish sending
+ *
+ *	TCP_CLOSE		socket is finished
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/poll.h>
+#include <linux/init.h>
+#include <linux/smp_lock.h>
+#include <linux/fs.h>
+#include <linux/random.h>
+#include <linux/bootmem.h>
+
+#include <net/icmp.h>
+#include <net/tcp.h>
+#include <net/xfrm.h>
+#include <net/ip.h>
+
+
+#include <asm/uaccess.h>
+#include <asm/ioctls.h>
+
+int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
+
+DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics);
+
+kmem_cache_t *tcp_openreq_cachep;
+kmem_cache_t *tcp_bucket_cachep;
+kmem_cache_t *tcp_timewait_cachep;
+
+atomic_t tcp_orphan_count = ATOMIC_INIT(0);
+
+int sysctl_tcp_mem[3];
+int sysctl_tcp_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 };
+int sysctl_tcp_rmem[3] = { 4 * 1024, 87380, 87380 * 2 };
+
+EXPORT_SYMBOL(sysctl_tcp_mem);
+EXPORT_SYMBOL(sysctl_tcp_rmem);
+EXPORT_SYMBOL(sysctl_tcp_wmem);
+
+atomic_t tcp_memory_allocated;	/* Current allocated memory. */
+atomic_t tcp_sockets_allocated;	/* Current number of TCP sockets. */
+
+EXPORT_SYMBOL(tcp_memory_allocated);
+EXPORT_SYMBOL(tcp_sockets_allocated);
+
+/*
+ * Pressure flag: try to collapse.
+ * Technical note: it is used by multiple contexts non atomically.
+ * All the sk_stream_mem_schedule() is of this nature: accounting
+ * is strict, actions are advisory and have some latency.
+ */
+int tcp_memory_pressure;
+
+EXPORT_SYMBOL(tcp_memory_pressure);
+
+void tcp_enter_memory_pressure(void)
+{
+	if (!tcp_memory_pressure) {
+		NET_INC_STATS(LINUX_MIB_TCPMEMORYPRESSURES);
+		tcp_memory_pressure = 1;
+	}
+}
+
+EXPORT_SYMBOL(tcp_enter_memory_pressure);
+
+/*
+ * LISTEN is a special case for poll..
+ */
+static __inline__ unsigned int tcp_listen_poll(struct sock *sk,
+					       poll_table *wait)
+{
+	return tcp_sk(sk)->accept_queue ? (POLLIN | POLLRDNORM) : 0;
+}
+
+/*
+ *	Wait for a TCP event.
+ *
+ *	Note that we don't need to lock the socket, as the upper poll layers
+ *	take care of normal races (between the test and the event) and we don't
+ *	go look at any of the socket buffers directly.
+ */
+unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
+{
+	unsigned int mask;
+	struct sock *sk = sock->sk;
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	poll_wait(file, sk->sk_sleep, wait);
+	if (sk->sk_state == TCP_LISTEN)
+		return tcp_listen_poll(sk, wait);
+
+	/* Socket is not locked. We are protected from async events
+	   by poll logic and correct handling of state changes
+	   made by another threads is impossible in any case.
+	 */
+
+	mask = 0;
+	if (sk->sk_err)
+		mask = POLLERR;
+
+	/*
+	 * POLLHUP is certainly not done right. But poll() doesn't
+	 * have a notion of HUP in just one direction, and for a
+	 * socket the read side is more interesting.
+	 *
+	 * Some poll() documentation says that POLLHUP is incompatible
+	 * with the POLLOUT/POLLWR flags, so somebody should check this
+	 * all. But careful, it tends to be safer to return too many
+	 * bits than too few, and you can easily break real applications
+	 * if you don't tell them that something has hung up!
+	 *
+	 * Check-me.
+	 *
+	 * Check number 1. POLLHUP is _UNMASKABLE_ event (see UNIX98 and
+	 * our fs/select.c). It means that after we received EOF,
+	 * poll always returns immediately, making impossible poll() on write()
+	 * in state CLOSE_WAIT. One solution is evident --- to set POLLHUP
+	 * if and only if shutdown has been made in both directions.
+	 * Actually, it is interesting to look how Solaris and DUX
+	 * solve this dilemma. I would prefer, if PULLHUP were maskable,
+	 * then we could set it on SND_SHUTDOWN. BTW examples given
+	 * in Stevens' books assume exactly this behaviour, it explains
+	 * why PULLHUP is incompatible with POLLOUT.	--ANK
+	 *
+	 * NOTE. Check for TCP_CLOSE is added. The goal is to prevent
+	 * blocking on fresh not-connected or disconnected socket. --ANK
+	 */
+	if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == TCP_CLOSE)
+		mask |= POLLHUP;
+	if (sk->sk_shutdown & RCV_SHUTDOWN)
+		mask |= POLLIN | POLLRDNORM;
+
+	/* Connected? */
+	if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) {
+		/* Potential race condition. If read of tp below will
+		 * escape above sk->sk_state, we can be illegally awaken
+		 * in SYN_* states. */
+		if ((tp->rcv_nxt != tp->copied_seq) &&
+		    (tp->urg_seq != tp->copied_seq ||
+		     tp->rcv_nxt != tp->copied_seq + 1 ||
+		     sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data))
+			mask |= POLLIN | POLLRDNORM;
+
+		if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
+			if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
+				mask |= POLLOUT | POLLWRNORM;
+			} else {  /* send SIGIO later */
+				set_bit(SOCK_ASYNC_NOSPACE,
+					&sk->sk_socket->flags);
+				set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+
+				/* Race breaker. If space is freed after
+				 * wspace test but before the flags are set,
+				 * IO signal will be lost.
+				 */
+				if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
+					mask |= POLLOUT | POLLWRNORM;
+			}
+		}
+
+		if (tp->urg_data & TCP_URG_VALID)
+			mask |= POLLPRI;
+	}
+	return mask;
+}
+
+int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	int answ;
+
+	switch (cmd) {
+	case SIOCINQ:
+		if (sk->sk_state == TCP_LISTEN)
+			return -EINVAL;
+
+		lock_sock(sk);
+		if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
+			answ = 0;
+		else if (sock_flag(sk, SOCK_URGINLINE) ||
+			 !tp->urg_data ||
+			 before(tp->urg_seq, tp->copied_seq) ||
+			 !before(tp->urg_seq, tp->rcv_nxt)) {
+			answ = tp->rcv_nxt - tp->copied_seq;
+
+			/* Subtract 1, if FIN is in queue. */
+			if (answ && !skb_queue_empty(&sk->sk_receive_queue))
+				answ -=
+		       ((struct sk_buff *)sk->sk_receive_queue.prev)->h.th->fin;
+		} else
+			answ = tp->urg_seq - tp->copied_seq;
+		release_sock(sk);
+		break;
+	case SIOCATMARK:
+		answ = tp->urg_data && tp->urg_seq == tp->copied_seq;
+		break;
+	case SIOCOUTQ:
+		if (sk->sk_state == TCP_LISTEN)
+			return -EINVAL;
+
+		if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
+			answ = 0;
+		else
+			answ = tp->write_seq - tp->snd_una;
+		break;
+	default:
+		return -ENOIOCTLCMD;
+	};
+
+	return put_user(answ, (int __user *)arg);
+}
+
+
+int tcp_listen_start(struct sock *sk)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct tcp_listen_opt *lopt;
+
+	sk->sk_max_ack_backlog = 0;
+	sk->sk_ack_backlog = 0;
+	tp->accept_queue = tp->accept_queue_tail = NULL;
+	rwlock_init(&tp->syn_wait_lock);
+	tcp_delack_init(tp);
+
+	lopt = kmalloc(sizeof(struct tcp_listen_opt), GFP_KERNEL);
+	if (!lopt)
+		return -ENOMEM;
+
+	memset(lopt, 0, sizeof(struct tcp_listen_opt));
+	for (lopt->max_qlen_log = 6; ; lopt->max_qlen_log++)
+		if ((1 << lopt->max_qlen_log) >= sysctl_max_syn_backlog)
+			break;
+	get_random_bytes(&lopt->hash_rnd, 4);
+
+	write_lock_bh(&tp->syn_wait_lock);
+	tp->listen_opt = lopt;
+	write_unlock_bh(&tp->syn_wait_lock);
+
+	/* There is race window here: we announce ourselves listening,
+	 * but this transition is still not validated by get_port().
+	 * It is OK, because this socket enters to hash table only
+	 * after validation is complete.
+	 */
+	sk->sk_state = TCP_LISTEN;
+	if (!sk->sk_prot->get_port(sk, inet->num)) {
+		inet->sport = htons(inet->num);
+
+		sk_dst_reset(sk);
+		sk->sk_prot->hash(sk);
+
+		return 0;
+	}
+
+	sk->sk_state = TCP_CLOSE;
+	write_lock_bh(&tp->syn_wait_lock);
+	tp->listen_opt = NULL;
+	write_unlock_bh(&tp->syn_wait_lock);
+	kfree(lopt);
+	return -EADDRINUSE;
+}
+
+/*
+ *	This routine closes sockets which have been at least partially
+ *	opened, but not yet accepted.
+ */
+
+static void tcp_listen_stop (struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct tcp_listen_opt *lopt = tp->listen_opt;
+	struct open_request *acc_req = tp->accept_queue;
+	struct open_request *req;
+	int i;
+
+	tcp_delete_keepalive_timer(sk);
+
+	/* make all the listen_opt local to us */
+	write_lock_bh(&tp->syn_wait_lock);
+	tp->listen_opt = NULL;
+	write_unlock_bh(&tp->syn_wait_lock);
+	tp->accept_queue = tp->accept_queue_tail = NULL;
+
+	if (lopt->qlen) {
+		for (i = 0; i < TCP_SYNQ_HSIZE; i++) {
+			while ((req = lopt->syn_table[i]) != NULL) {
+				lopt->syn_table[i] = req->dl_next;
+				lopt->qlen--;
+				tcp_openreq_free(req);
+
+		/* Following specs, it would be better either to send FIN
+		 * (and enter FIN-WAIT-1, it is normal close)
+		 * or to send active reset (abort).
+		 * Certainly, it is pretty dangerous while synflood, but it is
+		 * bad justification for our negligence 8)
+		 * To be honest, we are not able to make either
+		 * of the variants now.			--ANK
+		 */
+			}
+		}
+	}
+	BUG_TRAP(!lopt->qlen);
+
+	kfree(lopt);
+
+	while ((req = acc_req) != NULL) {
+		struct sock *child = req->sk;
+
+		acc_req = req->dl_next;
+
+		local_bh_disable();
+		bh_lock_sock(child);
+		BUG_TRAP(!sock_owned_by_user(child));
+		sock_hold(child);
+
+		tcp_disconnect(child, O_NONBLOCK);
+
+		sock_orphan(child);
+
+		atomic_inc(&tcp_orphan_count);
+
+		tcp_destroy_sock(child);
+
+		bh_unlock_sock(child);
+		local_bh_enable();
+		sock_put(child);
+
+		sk_acceptq_removed(sk);
+		tcp_openreq_fastfree(req);
+	}
+	BUG_TRAP(!sk->sk_ack_backlog);
+}
+
+static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
+{
+	TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
+	tp->pushed_seq = tp->write_seq;
+}
+
+static inline int forced_push(struct tcp_sock *tp)
+{
+	return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1));
+}
+
+static inline void skb_entail(struct sock *sk, struct tcp_sock *tp,
+			      struct sk_buff *skb)
+{
+	skb->csum = 0;
+	TCP_SKB_CB(skb)->seq = tp->write_seq;
+	TCP_SKB_CB(skb)->end_seq = tp->write_seq;
+	TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK;
+	TCP_SKB_CB(skb)->sacked = 0;
+	skb_header_release(skb);
+	__skb_queue_tail(&sk->sk_write_queue, skb);
+	sk_charge_skb(sk, skb);
+	if (!sk->sk_send_head)
+		sk->sk_send_head = skb;
+	else if (tp->nonagle&TCP_NAGLE_PUSH)
+		tp->nonagle &= ~TCP_NAGLE_PUSH; 
+}
+
+static inline void tcp_mark_urg(struct tcp_sock *tp, int flags,
+				struct sk_buff *skb)
+{
+	if (flags & MSG_OOB) {
+		tp->urg_mode = 1;
+		tp->snd_up = tp->write_seq;
+		TCP_SKB_CB(skb)->sacked |= TCPCB_URG;
+	}
+}
+
+static inline void tcp_push(struct sock *sk, struct tcp_sock *tp, int flags,
+			    int mss_now, int nonagle)
+{
+	if (sk->sk_send_head) {
+		struct sk_buff *skb = sk->sk_write_queue.prev;
+		if (!(flags & MSG_MORE) || forced_push(tp))
+			tcp_mark_push(tp, skb);
+		tcp_mark_urg(tp, flags, skb);
+		__tcp_push_pending_frames(sk, tp, mss_now,
+					  (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle);
+	}
+}
+
+static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset,
+			 size_t psize, int flags)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	int mss_now;
+	int err;
+	ssize_t copied;
+	long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
+
+	/* Wait for a connection to finish. */
+	if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
+		if ((err = sk_stream_wait_connect(sk, &timeo)) != 0)
+			goto out_err;
+
+	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+
+	mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
+	copied = 0;
+
+	err = -EPIPE;
+	if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
+		goto do_error;
+
+	while (psize > 0) {
+		struct sk_buff *skb = sk->sk_write_queue.prev;
+		struct page *page = pages[poffset / PAGE_SIZE];
+		int copy, i, can_coalesce;
+		int offset = poffset % PAGE_SIZE;
+		int size = min_t(size_t, psize, PAGE_SIZE - offset);
+
+		if (!sk->sk_send_head || (copy = mss_now - skb->len) <= 0) {
+new_segment:
+			if (!sk_stream_memory_free(sk))
+				goto wait_for_sndbuf;
+
+			skb = sk_stream_alloc_pskb(sk, 0, 0,
+						   sk->sk_allocation);
+			if (!skb)
+				goto wait_for_memory;
+
+			skb_entail(sk, tp, skb);
+			copy = mss_now;
+		}
+
+		if (copy > size)
+			copy = size;
+
+		i = skb_shinfo(skb)->nr_frags;
+		can_coalesce = skb_can_coalesce(skb, i, page, offset);
+		if (!can_coalesce && i >= MAX_SKB_FRAGS) {
+			tcp_mark_push(tp, skb);
+			goto new_segment;
+		}
+		if (sk->sk_forward_alloc < copy &&
+		    !sk_stream_mem_schedule(sk, copy, 0))
+			goto wait_for_memory;
+		
+		if (can_coalesce) {
+			skb_shinfo(skb)->frags[i - 1].size += copy;
+		} else {
+			get_page(page);
+			skb_fill_page_desc(skb, i, page, offset, copy);
+		}
+
+		skb->len += copy;
+		skb->data_len += copy;
+		skb->truesize += copy;
+		sk->sk_wmem_queued += copy;
+		sk->sk_forward_alloc -= copy;
+		skb->ip_summed = CHECKSUM_HW;
+		tp->write_seq += copy;
+		TCP_SKB_CB(skb)->end_seq += copy;
+		skb_shinfo(skb)->tso_segs = 0;
+
+		if (!copied)
+			TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH;
+
+		copied += copy;
+		poffset += copy;
+		if (!(psize -= copy))
+			goto out;
+
+		if (skb->len != mss_now || (flags & MSG_OOB))
+			continue;
+
+		if (forced_push(tp)) {
+			tcp_mark_push(tp, skb);
+			__tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_PUSH);
+		} else if (skb == sk->sk_send_head)
+			tcp_push_one(sk, mss_now);
+		continue;
+
+wait_for_sndbuf:
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+wait_for_memory:
+		if (copied)
+			tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
+
+		if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
+			goto do_error;
+
+		mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
+	}
+
+out:
+	if (copied)
+		tcp_push(sk, tp, flags, mss_now, tp->nonagle);
+	return copied;
+
+do_error:
+	if (copied)
+		goto out;
+out_err:
+	return sk_stream_error(sk, flags, err);
+}
+
+ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
+		     size_t size, int flags)
+{
+	ssize_t res;
+	struct sock *sk = sock->sk;
+
+#define TCP_ZC_CSUM_FLAGS (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)
+
+	if (!(sk->sk_route_caps & NETIF_F_SG) ||
+	    !(sk->sk_route_caps & TCP_ZC_CSUM_FLAGS))
+		return sock_no_sendpage(sock, page, offset, size, flags);
+
+#undef TCP_ZC_CSUM_FLAGS
+
+	lock_sock(sk);
+	TCP_CHECK_TIMER(sk);
+	res = do_tcp_sendpages(sk, &page, offset, size, flags);
+	TCP_CHECK_TIMER(sk);
+	release_sock(sk);
+	return res;
+}
+
+#define TCP_PAGE(sk)	(sk->sk_sndmsg_page)
+#define TCP_OFF(sk)	(sk->sk_sndmsg_off)
+
+static inline int select_size(struct sock *sk, struct tcp_sock *tp)
+{
+	int tmp = tp->mss_cache_std;
+
+	if (sk->sk_route_caps & NETIF_F_SG) {
+		int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER);
+
+		if (tmp >= pgbreak &&
+		    tmp <= pgbreak + (MAX_SKB_FRAGS - 1) * PAGE_SIZE)
+			tmp = pgbreak;
+	}
+	return tmp;
+}
+
+int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+		size_t size)
+{
+	struct iovec *iov;
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *skb;
+	int iovlen, flags;
+	int mss_now;
+	int err, copied;
+	long timeo;
+
+	lock_sock(sk);
+	TCP_CHECK_TIMER(sk);
+
+	flags = msg->msg_flags;
+	timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
+
+	/* Wait for a connection to finish. */
+	if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
+		if ((err = sk_stream_wait_connect(sk, &timeo)) != 0)
+			goto out_err;
+
+	/* This should be in poll */
+	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+
+	mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
+
+	/* Ok commence sending. */
+	iovlen = msg->msg_iovlen;
+	iov = msg->msg_iov;
+	copied = 0;
+
+	err = -EPIPE;
+	if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
+		goto do_error;
+
+	while (--iovlen >= 0) {
+		int seglen = iov->iov_len;
+		unsigned char __user *from = iov->iov_base;
+
+		iov++;
+
+		while (seglen > 0) {
+			int copy;
+
+			skb = sk->sk_write_queue.prev;
+
+			if (!sk->sk_send_head ||
+			    (copy = mss_now - skb->len) <= 0) {
+
+new_segment:
+				/* Allocate new segment. If the interface is SG,
+				 * allocate skb fitting to single page.
+				 */
+				if (!sk_stream_memory_free(sk))
+					goto wait_for_sndbuf;
+
+				skb = sk_stream_alloc_pskb(sk, select_size(sk, tp),
+							   0, sk->sk_allocation);
+				if (!skb)
+					goto wait_for_memory;
+
+				/*
+				 * Check whether we can use HW checksum.
+				 */
+				if (sk->sk_route_caps &
+				    (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM |
+				     NETIF_F_HW_CSUM))
+					skb->ip_summed = CHECKSUM_HW;
+
+				skb_entail(sk, tp, skb);
+				copy = mss_now;
+			}
+
+			/* Try to append data to the end of skb. */
+			if (copy > seglen)
+				copy = seglen;
+
+			/* Where to copy to? */
+			if (skb_tailroom(skb) > 0) {
+				/* We have some space in skb head. Superb! */
+				if (copy > skb_tailroom(skb))
+					copy = skb_tailroom(skb);
+				if ((err = skb_add_data(skb, from, copy)) != 0)
+					goto do_fault;
+			} else {
+				int merge = 0;
+				int i = skb_shinfo(skb)->nr_frags;
+				struct page *page = TCP_PAGE(sk);
+				int off = TCP_OFF(sk);
+
+				if (skb_can_coalesce(skb, i, page, off) &&
+				    off != PAGE_SIZE) {
+					/* We can extend the last page
+					 * fragment. */
+					merge = 1;
+				} else if (i == MAX_SKB_FRAGS ||
+					   (!i &&
+					   !(sk->sk_route_caps & NETIF_F_SG))) {
+					/* Need to add new fragment and cannot
+					 * do this because interface is non-SG,
+					 * or because all the page slots are
+					 * busy. */
+					tcp_mark_push(tp, skb);
+					goto new_segment;
+				} else if (page) {
+					/* If page is cached, align
+					 * offset to L1 cache boundary
+					 */
+					off = (off + L1_CACHE_BYTES - 1) &
+					      ~(L1_CACHE_BYTES - 1);
+					if (off == PAGE_SIZE) {
+						put_page(page);
+						TCP_PAGE(sk) = page = NULL;
+					}
+				}
+
+				if (!page) {
+					/* Allocate new cache page. */
+					if (!(page = sk_stream_alloc_page(sk)))
+						goto wait_for_memory;
+					off = 0;
+				}
+
+				if (copy > PAGE_SIZE - off)
+					copy = PAGE_SIZE - off;
+
+				/* Time to copy data. We are close to
+				 * the end! */
+				err = skb_copy_to_page(sk, from, skb, page,
+						       off, copy);
+				if (err) {
+					/* If this page was new, give it to the
+					 * socket so it does not get leaked.
+					 */
+					if (!TCP_PAGE(sk)) {
+						TCP_PAGE(sk) = page;
+						TCP_OFF(sk) = 0;
+					}
+					goto do_error;
+				}
+
+				/* Update the skb. */
+				if (merge) {
+					skb_shinfo(skb)->frags[i - 1].size +=
+									copy;
+				} else {
+					skb_fill_page_desc(skb, i, page, off, copy);
+					if (TCP_PAGE(sk)) {
+						get_page(page);
+					} else if (off + copy < PAGE_SIZE) {
+						get_page(page);
+						TCP_PAGE(sk) = page;
+					}
+				}
+
+				TCP_OFF(sk) = off + copy;
+			}
+
+			if (!copied)
+				TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH;
+
+			tp->write_seq += copy;
+			TCP_SKB_CB(skb)->end_seq += copy;
+			skb_shinfo(skb)->tso_segs = 0;
+
+			from += copy;
+			copied += copy;
+			if ((seglen -= copy) == 0 && iovlen == 0)
+				goto out;
+
+			if (skb->len != mss_now || (flags & MSG_OOB))
+				continue;
+
+			if (forced_push(tp)) {
+				tcp_mark_push(tp, skb);
+				__tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_PUSH);
+			} else if (skb == sk->sk_send_head)
+				tcp_push_one(sk, mss_now);
+			continue;
+
+wait_for_sndbuf:
+			set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+wait_for_memory:
+			if (copied)
+				tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
+
+			if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
+				goto do_error;
+
+			mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
+		}
+	}
+
+out:
+	if (copied)
+		tcp_push(sk, tp, flags, mss_now, tp->nonagle);
+	TCP_CHECK_TIMER(sk);
+	release_sock(sk);
+	return copied;
+
+do_fault:
+	if (!skb->len) {
+		if (sk->sk_send_head == skb)
+			sk->sk_send_head = NULL;
+		__skb_unlink(skb, skb->list);
+		sk_stream_free_skb(sk, skb);
+	}
+
+do_error:
+	if (copied)
+		goto out;
+out_err:
+	err = sk_stream_error(sk, flags, err);
+	TCP_CHECK_TIMER(sk);
+	release_sock(sk);
+	return err;
+}
+
+/*
+ *	Handle reading urgent data. BSD has very simple semantics for
+ *	this, no blocking and very strange errors 8)
+ */
+
+static int tcp_recv_urg(struct sock *sk, long timeo,
+			struct msghdr *msg, int len, int flags,
+			int *addr_len)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	/* No URG data to read. */
+	if (sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data ||
+	    tp->urg_data == TCP_URG_READ)
+		return -EINVAL;	/* Yes this is right ! */
+
+	if (sk->sk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DONE))
+		return -ENOTCONN;
+
+	if (tp->urg_data & TCP_URG_VALID) {
+		int err = 0;
+		char c = tp->urg_data;
+
+		if (!(flags & MSG_PEEK))
+			tp->urg_data = TCP_URG_READ;
+
+		/* Read urgent data. */
+		msg->msg_flags |= MSG_OOB;
+
+		if (len > 0) {
+			if (!(flags & MSG_TRUNC))
+				err = memcpy_toiovec(msg->msg_iov, &c, 1);
+			len = 1;
+		} else
+			msg->msg_flags |= MSG_TRUNC;
+
+		return err ? -EFAULT : len;
+	}
+
+	if (sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN))
+		return 0;
+
+	/* Fixed the recv(..., MSG_OOB) behaviour.  BSD docs and
+	 * the available implementations agree in this case:
+	 * this call should never block, independent of the
+	 * blocking state of the socket.
+	 * Mike <pall@rz.uni-karlsruhe.de>
+	 */
+	return -EAGAIN;
+}
+
+/* Clean up the receive buffer for full frames taken by the user,
+ * then send an ACK if necessary.  COPIED is the number of bytes
+ * tcp_recvmsg has given to the user so far, it speeds up the
+ * calculation of whether or not we must ACK for the sake of
+ * a window update.
+ */
+static void cleanup_rbuf(struct sock *sk, int copied)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	int time_to_ack = 0;
+
+#if TCP_DEBUG
+	struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
+
+	BUG_TRAP(!skb || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq));
+#endif
+
+	if (tcp_ack_scheduled(tp)) {
+		   /* Delayed ACKs frequently hit locked sockets during bulk
+		    * receive. */
+		if (tp->ack.blocked ||
+		    /* Once-per-two-segments ACK was not sent by tcp_input.c */
+		    tp->rcv_nxt - tp->rcv_wup > tp->ack.rcv_mss ||
+		    /*
+		     * If this read emptied read buffer, we send ACK, if
+		     * connection is not bidirectional, user drained
+		     * receive buffer and there was a small segment
+		     * in queue.
+		     */
+		    (copied > 0 && (tp->ack.pending & TCP_ACK_PUSHED) &&
+		     !tp->ack.pingpong && !atomic_read(&sk->sk_rmem_alloc)))
+			time_to_ack = 1;
+	}
+
+	/* We send an ACK if we can now advertise a non-zero window
+	 * which has been raised "significantly".
+	 *
+	 * Even if window raised up to infinity, do not send window open ACK
+	 * in states, where we will not receive more. It is useless.
+	 */
+	if (copied > 0 && !time_to_ack && !(sk->sk_shutdown & RCV_SHUTDOWN)) {
+		__u32 rcv_window_now = tcp_receive_window(tp);
+
+		/* Optimize, __tcp_select_window() is not cheap. */
+		if (2*rcv_window_now <= tp->window_clamp) {
+			__u32 new_window = __tcp_select_window(sk);
+
+			/* Send ACK now, if this read freed lots of space
+			 * in our buffer. Certainly, new_window is new window.
+			 * We can advertise it now, if it is not less than current one.
+			 * "Lots" means "at least twice" here.
+			 */
+			if (new_window && new_window >= 2 * rcv_window_now)
+				time_to_ack = 1;
+		}
+	}
+	if (time_to_ack)
+		tcp_send_ack(sk);
+}
+
+static void tcp_prequeue_process(struct sock *sk)
+{
+	struct sk_buff *skb;
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	NET_ADD_STATS_USER(LINUX_MIB_TCPPREQUEUED, skb_queue_len(&tp->ucopy.prequeue));
+
+	/* RX process wants to run with disabled BHs, though it is not
+	 * necessary */
+	local_bh_disable();
+	while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
+		sk->sk_backlog_rcv(sk, skb);
+	local_bh_enable();
+
+	/* Clear memory counter. */
+	tp->ucopy.memory = 0;
+}
+
+static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
+{
+	struct sk_buff *skb;
+	u32 offset;
+
+	skb_queue_walk(&sk->sk_receive_queue, skb) {
+		offset = seq - TCP_SKB_CB(skb)->seq;
+		if (skb->h.th->syn)
+			offset--;
+		if (offset < skb->len || skb->h.th->fin) {
+			*off = offset;
+			return skb;
+		}
+	}
+	return NULL;
+}
+
+/*
+ * This routine provides an alternative to tcp_recvmsg() for routines
+ * that would like to handle copying from skbuffs directly in 'sendfile'
+ * fashion.
+ * Note:
+ *	- It is assumed that the socket was locked by the caller.
+ *	- The routine does not block.
+ *	- At present, there is no support for reading OOB data
+ *	  or for 'peeking' the socket using this routine
+ *	  (although both would be easy to implement).
+ */
+int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
+		  sk_read_actor_t recv_actor)
+{
+	struct sk_buff *skb;
+	struct tcp_sock *tp = tcp_sk(sk);
+	u32 seq = tp->copied_seq;
+	u32 offset;
+	int copied = 0;
+
+	if (sk->sk_state == TCP_LISTEN)
+		return -ENOTCONN;
+	while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) {
+		if (offset < skb->len) {
+			size_t used, len;
+
+			len = skb->len - offset;
+			/* Stop reading if we hit a patch of urgent data */
+			if (tp->urg_data) {
+				u32 urg_offset = tp->urg_seq - seq;
+				if (urg_offset < len)
+					len = urg_offset;
+				if (!len)
+					break;
+			}
+			used = recv_actor(desc, skb, offset, len);
+			if (used <= len) {
+				seq += used;
+				copied += used;
+				offset += used;
+			}
+			if (offset != skb->len)
+				break;
+		}
+		if (skb->h.th->fin) {
+			sk_eat_skb(sk, skb);
+			++seq;
+			break;
+		}
+		sk_eat_skb(sk, skb);
+		if (!desc->count)
+			break;
+	}
+	tp->copied_seq = seq;
+
+	tcp_rcv_space_adjust(sk);
+
+	/* Clean up data we have read: This will do ACK frames. */
+	if (copied)
+		cleanup_rbuf(sk, copied);
+	return copied;
+}
+
+/*
+ *	This routine copies from a sock struct into the user buffer.
+ *
+ *	Technical note: in 2.3 we work on _locked_ socket, so that
+ *	tricks with *seq access order and skb->users are not required.
+ *	Probably, code can be easily improved even more.
+ */
+
+int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+		size_t len, int nonblock, int flags, int *addr_len)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	int copied = 0;
+	u32 peek_seq;
+	u32 *seq;
+	unsigned long used;
+	int err;
+	int target;		/* Read at least this many bytes */
+	long timeo;
+	struct task_struct *user_recv = NULL;
+
+	lock_sock(sk);
+
+	TCP_CHECK_TIMER(sk);
+
+	err = -ENOTCONN;
+	if (sk->sk_state == TCP_LISTEN)
+		goto out;
+
+	timeo = sock_rcvtimeo(sk, nonblock);
+
+	/* Urgent data needs to be handled specially. */
+	if (flags & MSG_OOB)
+		goto recv_urg;
+
+	seq = &tp->copied_seq;
+	if (flags & MSG_PEEK) {
+		peek_seq = tp->copied_seq;
+		seq = &peek_seq;
+	}
+
+	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
+
+	do {
+		struct sk_buff *skb;
+		u32 offset;
+
+		/* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */
+		if (tp->urg_data && tp->urg_seq == *seq) {
+			if (copied)
+				break;
+			if (signal_pending(current)) {
+				copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;
+				break;
+			}
+		}
+
+		/* Next get a buffer. */
+
+		skb = skb_peek(&sk->sk_receive_queue);
+		do {
+			if (!skb)
+				break;
+
+			/* Now that we have two receive queues this
+			 * shouldn't happen.
+			 */
+			if (before(*seq, TCP_SKB_CB(skb)->seq)) {
+				printk(KERN_INFO "recvmsg bug: copied %X "
+				       "seq %X\n", *seq, TCP_SKB_CB(skb)->seq);
+				break;
+			}
+			offset = *seq - TCP_SKB_CB(skb)->seq;
+			if (skb->h.th->syn)
+				offset--;
+			if (offset < skb->len)
+				goto found_ok_skb;
+			if (skb->h.th->fin)
+				goto found_fin_ok;
+			BUG_TRAP(flags & MSG_PEEK);
+			skb = skb->next;
+		} while (skb != (struct sk_buff *)&sk->sk_receive_queue);
+
+		/* Well, if we have backlog, try to process it now yet. */
+
+		if (copied >= target && !sk->sk_backlog.tail)
+			break;
+
+		if (copied) {
+			if (sk->sk_err ||
+			    sk->sk_state == TCP_CLOSE ||
+			    (sk->sk_shutdown & RCV_SHUTDOWN) ||
+			    !timeo ||
+			    signal_pending(current) ||
+			    (flags & MSG_PEEK))
+				break;
+		} else {
+			if (sock_flag(sk, SOCK_DONE))
+				break;
+
+			if (sk->sk_err) {
+				copied = sock_error(sk);
+				break;
+			}
+
+			if (sk->sk_shutdown & RCV_SHUTDOWN)
+				break;
+
+			if (sk->sk_state == TCP_CLOSE) {
+				if (!sock_flag(sk, SOCK_DONE)) {
+					/* This occurs when user tries to read
+					 * from never connected socket.
+					 */
+					copied = -ENOTCONN;
+					break;
+				}
+				break;
+			}
+
+			if (!timeo) {
+				copied = -EAGAIN;
+				break;
+			}
+
+			if (signal_pending(current)) {
+				copied = sock_intr_errno(timeo);
+				break;
+			}
+		}
+
+		cleanup_rbuf(sk, copied);
+
+		if (tp->ucopy.task == user_recv) {
+			/* Install new reader */
+			if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) {
+				user_recv = current;
+				tp->ucopy.task = user_recv;
+				tp->ucopy.iov = msg->msg_iov;
+			}
+
+			tp->ucopy.len = len;
+
+			BUG_TRAP(tp->copied_seq == tp->rcv_nxt ||
+				 (flags & (MSG_PEEK | MSG_TRUNC)));
+
+			/* Ugly... If prequeue is not empty, we have to
+			 * process it before releasing socket, otherwise
+			 * order will be broken at second iteration.
+			 * More elegant solution is required!!!
+			 *
+			 * Look: we have the following (pseudo)queues:
+			 *
+			 * 1. packets in flight
+			 * 2. backlog
+			 * 3. prequeue
+			 * 4. receive_queue
+			 *
+			 * Each queue can be processed only if the next ones
+			 * are empty. At this point we have empty receive_queue.
+			 * But prequeue _can_ be not empty after 2nd iteration,
+			 * when we jumped to start of loop because backlog
+			 * processing added something to receive_queue.
+			 * We cannot release_sock(), because backlog contains
+			 * packets arrived _after_ prequeued ones.
+			 *
+			 * Shortly, algorithm is clear --- to process all
+			 * the queues in order. We could make it more directly,
+			 * requeueing packets from backlog to prequeue, if
+			 * is not empty. It is more elegant, but eats cycles,
+			 * unfortunately.
+			 */
+			if (skb_queue_len(&tp->ucopy.prequeue))
+				goto do_prequeue;
+
+			/* __ Set realtime policy in scheduler __ */
+		}
+
+		if (copied >= target) {
+			/* Do not sleep, just process backlog. */
+			release_sock(sk);
+			lock_sock(sk);
+		} else
+			sk_wait_data(sk, &timeo);
+
+		if (user_recv) {
+			int chunk;
+
+			/* __ Restore normal policy in scheduler __ */
+
+			if ((chunk = len - tp->ucopy.len) != 0) {
+				NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);
+				len -= chunk;
+				copied += chunk;
+			}
+
+			if (tp->rcv_nxt == tp->copied_seq &&
+			    skb_queue_len(&tp->ucopy.prequeue)) {
+do_prequeue:
+				tcp_prequeue_process(sk);
+
+				if ((chunk = len - tp->ucopy.len) != 0) {
+					NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
+					len -= chunk;
+					copied += chunk;
+				}
+			}
+		}
+		if ((flags & MSG_PEEK) && peek_seq != tp->copied_seq) {
+			if (net_ratelimit())
+				printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n",
+				       current->comm, current->pid);
+			peek_seq = tp->copied_seq;
+		}
+		continue;
+
+	found_ok_skb:
+		/* Ok so how much can we use? */
+		used = skb->len - offset;
+		if (len < used)
+			used = len;
+
+		/* Do we have urgent data here? */
+		if (tp->urg_data) {
+			u32 urg_offset = tp->urg_seq - *seq;
+			if (urg_offset < used) {
+				if (!urg_offset) {
+					if (!sock_flag(sk, SOCK_URGINLINE)) {
+						++*seq;
+						offset++;
+						used--;
+						if (!used)
+							goto skip_copy;
+					}
+				} else
+					used = urg_offset;
+			}
+		}
+
+		if (!(flags & MSG_TRUNC)) {
+			err = skb_copy_datagram_iovec(skb, offset,
+						      msg->msg_iov, used);
+			if (err) {
+				/* Exception. Bailout! */
+				if (!copied)
+					copied = -EFAULT;
+				break;
+			}
+		}
+
+		*seq += used;
+		copied += used;
+		len -= used;
+
+		tcp_rcv_space_adjust(sk);
+
+skip_copy:
+		if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) {
+			tp->urg_data = 0;
+			tcp_fast_path_check(sk, tp);
+		}
+		if (used + offset < skb->len)
+			continue;
+
+		if (skb->h.th->fin)
+			goto found_fin_ok;
+		if (!(flags & MSG_PEEK))
+			sk_eat_skb(sk, skb);
+		continue;
+
+	found_fin_ok:
+		/* Process the FIN. */
+		++*seq;
+		if (!(flags & MSG_PEEK))
+			sk_eat_skb(sk, skb);
+		break;
+	} while (len > 0);
+
+	if (user_recv) {
+		if (skb_queue_len(&tp->ucopy.prequeue)) {
+			int chunk;
+
+			tp->ucopy.len = copied > 0 ? len : 0;
+
+			tcp_prequeue_process(sk);
+
+			if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) {
+				NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
+				len -= chunk;
+				copied += chunk;
+			}
+		}
+
+		tp->ucopy.task = NULL;
+		tp->ucopy.len = 0;
+	}
+
+	/* According to UNIX98, msg_name/msg_namelen are ignored
+	 * on connected socket. I was just happy when found this 8) --ANK
+	 */
+
+	/* Clean up data we have read: This will do ACK frames. */
+	cleanup_rbuf(sk, copied);
+
+	TCP_CHECK_TIMER(sk);
+	release_sock(sk);
+	return copied;
+
+out:
+	TCP_CHECK_TIMER(sk);
+	release_sock(sk);
+	return err;
+
+recv_urg:
+	err = tcp_recv_urg(sk, timeo, msg, len, flags, addr_len);
+	goto out;
+}
+
+/*
+ *	State processing on a close. This implements the state shift for
+ *	sending our FIN frame. Note that we only send a FIN for some
+ *	states. A shutdown() may have already sent the FIN, or we may be
+ *	closed.
+ */
+
+static unsigned char new_state[16] = {
+  /* current state:        new state:      action:	*/
+  /* (Invalid)		*/ TCP_CLOSE,
+  /* TCP_ESTABLISHED	*/ TCP_FIN_WAIT1 | TCP_ACTION_FIN,
+  /* TCP_SYN_SENT	*/ TCP_CLOSE,
+  /* TCP_SYN_RECV	*/ TCP_FIN_WAIT1 | TCP_ACTION_FIN,
+  /* TCP_FIN_WAIT1	*/ TCP_FIN_WAIT1,
+  /* TCP_FIN_WAIT2	*/ TCP_FIN_WAIT2,
+  /* TCP_TIME_WAIT	*/ TCP_CLOSE,
+  /* TCP_CLOSE		*/ TCP_CLOSE,
+  /* TCP_CLOSE_WAIT	*/ TCP_LAST_ACK  | TCP_ACTION_FIN,
+  /* TCP_LAST_ACK	*/ TCP_LAST_ACK,
+  /* TCP_LISTEN		*/ TCP_CLOSE,
+  /* TCP_CLOSING	*/ TCP_CLOSING,
+};
+
+static int tcp_close_state(struct sock *sk)
+{
+	int next = (int)new_state[sk->sk_state];
+	int ns = next & TCP_STATE_MASK;
+
+	tcp_set_state(sk, ns);
+
+	return next & TCP_ACTION_FIN;
+}
+
+/*
+ *	Shutdown the sending side of a connection. Much like close except
+ *	that we don't receive shut down or set_sock_flag(sk, SOCK_DEAD).
+ */
+
+void tcp_shutdown(struct sock *sk, int how)
+{
+	/*	We need to grab some memory, and put together a FIN,
+	 *	and then put it into the queue to be sent.
+	 *		Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
+	 */
+	if (!(how & SEND_SHUTDOWN))
+		return;
+
+	/* If we've already sent a FIN, or it's a closed state, skip this. */
+	if ((1 << sk->sk_state) &
+	    (TCPF_ESTABLISHED | TCPF_SYN_SENT |
+	     TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) {
+		/* Clear out any half completed packets.  FIN if needed. */
+		if (tcp_close_state(sk))
+			tcp_send_fin(sk);
+	}
+}
+
+/*
+ * At this point, there should be no process reference to this
+ * socket, and thus no user references at all.  Therefore we
+ * can assume the socket waitqueue is inactive and nobody will
+ * try to jump onto it.
+ */
+void tcp_destroy_sock(struct sock *sk)
+{
+	BUG_TRAP(sk->sk_state == TCP_CLOSE);
+	BUG_TRAP(sock_flag(sk, SOCK_DEAD));
+
+	/* It cannot be in hash table! */
+	BUG_TRAP(sk_unhashed(sk));
+
+	/* If it has not 0 inet_sk(sk)->num, it must be bound */
+	BUG_TRAP(!inet_sk(sk)->num || tcp_sk(sk)->bind_hash);
+
+	sk->sk_prot->destroy(sk);
+
+	sk_stream_kill_queues(sk);
+
+	xfrm_sk_free_policy(sk);
+
+#ifdef INET_REFCNT_DEBUG
+	if (atomic_read(&sk->sk_refcnt) != 1) {
+		printk(KERN_DEBUG "Destruction TCP %p delayed, c=%d\n",
+		       sk, atomic_read(&sk->sk_refcnt));
+	}
+#endif
+
+	atomic_dec(&tcp_orphan_count);
+	sock_put(sk);
+}
+
+void tcp_close(struct sock *sk, long timeout)
+{
+	struct sk_buff *skb;
+	int data_was_unread = 0;
+
+	lock_sock(sk);
+	sk->sk_shutdown = SHUTDOWN_MASK;
+
+	if (sk->sk_state == TCP_LISTEN) {
+		tcp_set_state(sk, TCP_CLOSE);
+
+		/* Special case. */
+		tcp_listen_stop(sk);
+
+		goto adjudge_to_death;
+	}
+
+	/*  We need to flush the recv. buffs.  We do this only on the
+	 *  descriptor close, not protocol-sourced closes, because the
+	 *  reader process may not have drained the data yet!
+	 */
+	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+		u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq -
+			  skb->h.th->fin;
+		data_was_unread += len;
+		__kfree_skb(skb);
+	}
+
+	sk_stream_mem_reclaim(sk);
+
+	/* As outlined in draft-ietf-tcpimpl-prob-03.txt, section
+	 * 3.10, we send a RST here because data was lost.  To
+	 * witness the awful effects of the old behavior of always
+	 * doing a FIN, run an older 2.1.x kernel or 2.0.x, start
+	 * a bulk GET in an FTP client, suspend the process, wait
+	 * for the client to advertise a zero window, then kill -9
+	 * the FTP client, wheee...  Note: timeout is always zero
+	 * in such a case.
+	 */
+	if (data_was_unread) {
+		/* Unread data was tossed, zap the connection. */
+		NET_INC_STATS_USER(LINUX_MIB_TCPABORTONCLOSE);
+		tcp_set_state(sk, TCP_CLOSE);
+		tcp_send_active_reset(sk, GFP_KERNEL);
+	} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
+		/* Check zero linger _after_ checking for unread data. */
+		sk->sk_prot->disconnect(sk, 0);
+		NET_INC_STATS_USER(LINUX_MIB_TCPABORTONDATA);
+	} else if (tcp_close_state(sk)) {
+		/* We FIN if the application ate all the data before
+		 * zapping the connection.
+		 */
+
+		/* RED-PEN. Formally speaking, we have broken TCP state
+		 * machine. State transitions:
+		 *
+		 * TCP_ESTABLISHED -> TCP_FIN_WAIT1
+		 * TCP_SYN_RECV	-> TCP_FIN_WAIT1 (forget it, it's impossible)
+		 * TCP_CLOSE_WAIT -> TCP_LAST_ACK
+		 *
+		 * are legal only when FIN has been sent (i.e. in window),
+		 * rather than queued out of window. Purists blame.
+		 *
+		 * F.e. "RFC state" is ESTABLISHED,
+		 * if Linux state is FIN-WAIT-1, but FIN is still not sent.
+		 *
+		 * The visible declinations are that sometimes
+		 * we enter time-wait state, when it is not required really
+		 * (harmless), do not send active resets, when they are
+		 * required by specs (TCP_ESTABLISHED, TCP_CLOSE_WAIT, when
+		 * they look as CLOSING or LAST_ACK for Linux)
+		 * Probably, I missed some more holelets.
+		 * 						--ANK
+		 */
+		tcp_send_fin(sk);
+	}
+
+	sk_stream_wait_close(sk, timeout);
+
+adjudge_to_death:
+	/* It is the last release_sock in its life. It will remove backlog. */
+	release_sock(sk);
+
+
+	/* Now socket is owned by kernel and we acquire BH lock
+	   to finish close. No need to check for user refs.
+	 */
+	local_bh_disable();
+	bh_lock_sock(sk);
+	BUG_TRAP(!sock_owned_by_user(sk));
+
+	sock_hold(sk);
+	sock_orphan(sk);
+
+	/*	This is a (useful) BSD violating of the RFC. There is a
+	 *	problem with TCP as specified in that the other end could
+	 *	keep a socket open forever with no application left this end.
+	 *	We use a 3 minute timeout (about the same as BSD) then kill
+	 *	our end. If they send after that then tough - BUT: long enough
+	 *	that we won't make the old 4*rto = almost no time - whoops
+	 *	reset mistake.
+	 *
+	 *	Nope, it was not mistake. It is really desired behaviour
+	 *	f.e. on http servers, when such sockets are useless, but
+	 *	consume significant resources. Let's do it with special
+	 *	linger2	option.					--ANK
+	 */
+
+	if (sk->sk_state == TCP_FIN_WAIT2) {
+		struct tcp_sock *tp = tcp_sk(sk);
+		if (tp->linger2 < 0) {
+			tcp_set_state(sk, TCP_CLOSE);
+			tcp_send_active_reset(sk, GFP_ATOMIC);
+			NET_INC_STATS_BH(LINUX_MIB_TCPABORTONLINGER);
+		} else {
+			int tmo = tcp_fin_time(tp);
+
+			if (tmo > TCP_TIMEWAIT_LEN) {
+				tcp_reset_keepalive_timer(sk, tcp_fin_time(tp));
+			} else {
+				atomic_inc(&tcp_orphan_count);
+				tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
+				goto out;
+			}
+		}
+	}
+	if (sk->sk_state != TCP_CLOSE) {
+		sk_stream_mem_reclaim(sk);
+		if (atomic_read(&tcp_orphan_count) > sysctl_tcp_max_orphans ||
+		    (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
+		     atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
+			if (net_ratelimit())
+				printk(KERN_INFO "TCP: too many of orphaned "
+				       "sockets\n");
+			tcp_set_state(sk, TCP_CLOSE);
+			tcp_send_active_reset(sk, GFP_ATOMIC);
+			NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY);
+		}
+	}
+	atomic_inc(&tcp_orphan_count);
+
+	if (sk->sk_state == TCP_CLOSE)
+		tcp_destroy_sock(sk);
+	/* Otherwise, socket is reprieved until protocol close. */
+
+out:
+	bh_unlock_sock(sk);
+	local_bh_enable();
+	sock_put(sk);
+}
+
+/* These states need RST on ABORT according to RFC793 */
+
+static inline int tcp_need_reset(int state)
+{
+	return (1 << state) &
+	       (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 |
+		TCPF_FIN_WAIT2 | TCPF_SYN_RECV);
+}
+
+int tcp_disconnect(struct sock *sk, int flags)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+	int err = 0;
+	int old_state = sk->sk_state;
+
+	if (old_state != TCP_CLOSE)
+		tcp_set_state(sk, TCP_CLOSE);
+
+	/* ABORT function of RFC793 */
+	if (old_state == TCP_LISTEN) {
+		tcp_listen_stop(sk);
+	} else if (tcp_need_reset(old_state) ||
+		   (tp->snd_nxt != tp->write_seq &&
+		    (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
+		/* The last check adjusts for discrepance of Linux wrt. RFC
+		 * states
+		 */
+		tcp_send_active_reset(sk, gfp_any());
+		sk->sk_err = ECONNRESET;
+	} else if (old_state == TCP_SYN_SENT)
+		sk->sk_err = ECONNRESET;
+
+	tcp_clear_xmit_timers(sk);
+	__skb_queue_purge(&sk->sk_receive_queue);
+	sk_stream_writequeue_purge(sk);
+	__skb_queue_purge(&tp->out_of_order_queue);
+
+	inet->dport = 0;
+
+	if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
+		inet_reset_saddr(sk);
+
+	sk->sk_shutdown = 0;
+	sock_reset_flag(sk, SOCK_DONE);
+	tp->srtt = 0;
+	if ((tp->write_seq += tp->max_window + 2) == 0)
+		tp->write_seq = 1;
+	tp->backoff = 0;
+	tp->snd_cwnd = 2;
+	tp->probes_out = 0;
+	tp->packets_out = 0;
+	tp->snd_ssthresh = 0x7fffffff;
+	tp->snd_cwnd_cnt = 0;
+	tcp_set_ca_state(tp, TCP_CA_Open);
+	tcp_clear_retrans(tp);
+	tcp_delack_init(tp);
+	sk->sk_send_head = NULL;
+	tp->rx_opt.saw_tstamp = 0;
+	tcp_sack_reset(&tp->rx_opt);
+	__sk_dst_reset(sk);
+
+	BUG_TRAP(!inet->num || tp->bind_hash);
+
+	sk->sk_error_report(sk);
+	return err;
+}
+
+/*
+ *	Wait for an incoming connection, avoid race
+ *	conditions. This must be called with the socket locked.
+ */
+static int wait_for_connect(struct sock *sk, long timeo)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	DEFINE_WAIT(wait);
+	int err;
+
+	/*
+	 * True wake-one mechanism for incoming connections: only
+	 * one process gets woken up, not the 'whole herd'.
+	 * Since we do not 'race & poll' for established sockets
+	 * anymore, the common case will execute the loop only once.
+	 *
+	 * Subtle issue: "add_wait_queue_exclusive()" will be added
+	 * after any current non-exclusive waiters, and we know that
+	 * it will always _stay_ after any new non-exclusive waiters
+	 * because all non-exclusive waiters are added at the
+	 * beginning of the wait-queue. As such, it's ok to "drop"
+	 * our exclusiveness temporarily when we get woken up without
+	 * having to remove and re-insert us on the wait queue.
+	 */
+	for (;;) {
+		prepare_to_wait_exclusive(sk->sk_sleep, &wait,
+					  TASK_INTERRUPTIBLE);
+		release_sock(sk);
+		if (!tp->accept_queue)
+			timeo = schedule_timeout(timeo);
+		lock_sock(sk);
+		err = 0;
+		if (tp->accept_queue)
+			break;
+		err = -EINVAL;
+		if (sk->sk_state != TCP_LISTEN)
+			break;
+		err = sock_intr_errno(timeo);
+		if (signal_pending(current))
+			break;
+		err = -EAGAIN;
+		if (!timeo)
+			break;
+	}
+	finish_wait(sk->sk_sleep, &wait);
+	return err;
+}
+
+/*
+ *	This will accept the next outstanding connection.
+ */
+
+struct sock *tcp_accept(struct sock *sk, int flags, int *err)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct open_request *req;
+	struct sock *newsk;
+	int error;
+
+	lock_sock(sk);
+
+	/* We need to make sure that this socket is listening,
+	 * and that it has something pending.
+	 */
+	error = -EINVAL;
+	if (sk->sk_state != TCP_LISTEN)
+		goto out;
+
+	/* Find already established connection */
+	if (!tp->accept_queue) {
+		long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+
+		/* If this is a non blocking socket don't sleep */
+		error = -EAGAIN;
+		if (!timeo)
+			goto out;
+
+		error = wait_for_connect(sk, timeo);
+		if (error)
+			goto out;
+	}
+
+	req = tp->accept_queue;
+	if ((tp->accept_queue = req->dl_next) == NULL)
+		tp->accept_queue_tail = NULL;
+
+ 	newsk = req->sk;
+	sk_acceptq_removed(sk);
+	tcp_openreq_fastfree(req);
+	BUG_TRAP(newsk->sk_state != TCP_SYN_RECV);
+	release_sock(sk);
+	return newsk;
+
+out:
+	release_sock(sk);
+	*err = error;
+	return NULL;
+}
+
+/*
+ *	Socket option code for TCP.
+ */
+int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
+		   int optlen)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	int val;
+	int err = 0;
+
+	if (level != SOL_TCP)
+		return tp->af_specific->setsockopt(sk, level, optname,
+						   optval, optlen);
+
+	if (optlen < sizeof(int))
+		return -EINVAL;
+
+	if (get_user(val, (int __user *)optval))
+		return -EFAULT;
+
+	lock_sock(sk);
+
+	switch (optname) {
+	case TCP_MAXSEG:
+		/* Values greater than interface MTU won't take effect. However
+		 * at the point when this call is done we typically don't yet
+		 * know which interface is going to be used */
+		if (val < 8 || val > MAX_TCP_WINDOW) {
+			err = -EINVAL;
+			break;
+		}
+		tp->rx_opt.user_mss = val;
+		break;
+
+	case TCP_NODELAY:
+		if (val) {
+			/* TCP_NODELAY is weaker than TCP_CORK, so that
+			 * this option on corked socket is remembered, but
+			 * it is not activated until cork is cleared.
+			 *
+			 * However, when TCP_NODELAY is set we make
+			 * an explicit push, which overrides even TCP_CORK
+			 * for currently queued segments.
+			 */
+			tp->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH;
+			tcp_push_pending_frames(sk, tp);
+		} else {
+			tp->nonagle &= ~TCP_NAGLE_OFF;
+		}
+		break;
+
+	case TCP_CORK:
+		/* When set indicates to always queue non-full frames.
+		 * Later the user clears this option and we transmit
+		 * any pending partial frames in the queue.  This is
+		 * meant to be used alongside sendfile() to get properly
+		 * filled frames when the user (for example) must write
+		 * out headers with a write() call first and then use
+		 * sendfile to send out the data parts.
+		 *
+		 * TCP_CORK can be set together with TCP_NODELAY and it is
+		 * stronger than TCP_NODELAY.
+		 */
+		if (val) {
+			tp->nonagle |= TCP_NAGLE_CORK;
+		} else {
+			tp->nonagle &= ~TCP_NAGLE_CORK;
+			if (tp->nonagle&TCP_NAGLE_OFF)
+				tp->nonagle |= TCP_NAGLE_PUSH;
+			tcp_push_pending_frames(sk, tp);
+		}
+		break;
+
+	case TCP_KEEPIDLE:
+		if (val < 1 || val > MAX_TCP_KEEPIDLE)
+			err = -EINVAL;
+		else {
+			tp->keepalive_time = val * HZ;
+			if (sock_flag(sk, SOCK_KEEPOPEN) &&
+			    !((1 << sk->sk_state) &
+			      (TCPF_CLOSE | TCPF_LISTEN))) {
+				__u32 elapsed = tcp_time_stamp - tp->rcv_tstamp;
+				if (tp->keepalive_time > elapsed)
+					elapsed = tp->keepalive_time - elapsed;
+				else
+					elapsed = 0;
+				tcp_reset_keepalive_timer(sk, elapsed);
+			}
+		}
+		break;
+	case TCP_KEEPINTVL:
+		if (val < 1 || val > MAX_TCP_KEEPINTVL)
+			err = -EINVAL;
+		else
+			tp->keepalive_intvl = val * HZ;
+		break;
+	case TCP_KEEPCNT:
+		if (val < 1 || val > MAX_TCP_KEEPCNT)
+			err = -EINVAL;
+		else
+			tp->keepalive_probes = val;
+		break;
+	case TCP_SYNCNT:
+		if (val < 1 || val > MAX_TCP_SYNCNT)
+			err = -EINVAL;
+		else
+			tp->syn_retries = val;
+		break;
+
+	case TCP_LINGER2:
+		if (val < 0)
+			tp->linger2 = -1;
+		else if (val > sysctl_tcp_fin_timeout / HZ)
+			tp->linger2 = 0;
+		else
+			tp->linger2 = val * HZ;
+		break;
+
+	case TCP_DEFER_ACCEPT:
+		tp->defer_accept = 0;
+		if (val > 0) {
+			/* Translate value in seconds to number of
+			 * retransmits */
+			while (tp->defer_accept < 32 &&
+			       val > ((TCP_TIMEOUT_INIT / HZ) <<
+				       tp->defer_accept))
+				tp->defer_accept++;
+			tp->defer_accept++;
+		}
+		break;
+
+	case TCP_WINDOW_CLAMP:
+		if (!val) {
+			if (sk->sk_state != TCP_CLOSE) {
+				err = -EINVAL;
+				break;
+			}
+			tp->window_clamp = 0;
+		} else
+			tp->window_clamp = val < SOCK_MIN_RCVBUF / 2 ?
+						SOCK_MIN_RCVBUF / 2 : val;
+		break;
+
+	case TCP_QUICKACK:
+		if (!val) {
+			tp->ack.pingpong = 1;
+		} else {
+			tp->ack.pingpong = 0;
+			if ((1 << sk->sk_state) &
+			    (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
+			    tcp_ack_scheduled(tp)) {
+				tp->ack.pending |= TCP_ACK_PUSHED;
+				cleanup_rbuf(sk, 1);
+				if (!(val & 1))
+					tp->ack.pingpong = 1;
+			}
+		}
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	};
+	release_sock(sk);
+	return err;
+}
+
+/* Return information about state of tcp endpoint in API format. */
+void tcp_get_info(struct sock *sk, struct tcp_info *info)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	u32 now = tcp_time_stamp;
+
+	memset(info, 0, sizeof(*info));
+
+	info->tcpi_state = sk->sk_state;
+	info->tcpi_ca_state = tp->ca_state;
+	info->tcpi_retransmits = tp->retransmits;
+	info->tcpi_probes = tp->probes_out;
+	info->tcpi_backoff = tp->backoff;
+
+	if (tp->rx_opt.tstamp_ok)
+		info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
+	if (tp->rx_opt.sack_ok)
+		info->tcpi_options |= TCPI_OPT_SACK;
+	if (tp->rx_opt.wscale_ok) {
+		info->tcpi_options |= TCPI_OPT_WSCALE;
+		info->tcpi_snd_wscale = tp->rx_opt.snd_wscale;
+		info->tcpi_rcv_wscale = tp->rx_opt.rcv_wscale;
+	} 
+
+	if (tp->ecn_flags&TCP_ECN_OK)
+		info->tcpi_options |= TCPI_OPT_ECN;
+
+	info->tcpi_rto = jiffies_to_usecs(tp->rto);
+	info->tcpi_ato = jiffies_to_usecs(tp->ack.ato);
+	info->tcpi_snd_mss = tp->mss_cache_std;
+	info->tcpi_rcv_mss = tp->ack.rcv_mss;
+
+	info->tcpi_unacked = tp->packets_out;
+	info->tcpi_sacked = tp->sacked_out;
+	info->tcpi_lost = tp->lost_out;
+	info->tcpi_retrans = tp->retrans_out;
+	info->tcpi_fackets = tp->fackets_out;
+
+	info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime);
+	info->tcpi_last_data_recv = jiffies_to_msecs(now - tp->ack.lrcvtime);
+	info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp);
+
+	info->tcpi_pmtu = tp->pmtu_cookie;
+	info->tcpi_rcv_ssthresh = tp->rcv_ssthresh;
+	info->tcpi_rtt = jiffies_to_usecs(tp->srtt)>>3;
+	info->tcpi_rttvar = jiffies_to_usecs(tp->mdev)>>2;
+	info->tcpi_snd_ssthresh = tp->snd_ssthresh;
+	info->tcpi_snd_cwnd = tp->snd_cwnd;
+	info->tcpi_advmss = tp->advmss;
+	info->tcpi_reordering = tp->reordering;
+
+	info->tcpi_rcv_rtt = jiffies_to_usecs(tp->rcv_rtt_est.rtt)>>3;
+	info->tcpi_rcv_space = tp->rcvq_space.space;
+
+	info->tcpi_total_retrans = tp->total_retrans;
+}
+
+EXPORT_SYMBOL_GPL(tcp_get_info);
+
+int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
+		   int __user *optlen)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	int val, len;
+
+	if (level != SOL_TCP)
+		return tp->af_specific->getsockopt(sk, level, optname,
+						   optval, optlen);
+
+	if (get_user(len, optlen))
+		return -EFAULT;
+
+	len = min_t(unsigned int, len, sizeof(int));
+
+	if (len < 0)
+		return -EINVAL;
+
+	switch (optname) {
+	case TCP_MAXSEG:
+		val = tp->mss_cache_std;
+		if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
+			val = tp->rx_opt.user_mss;
+		break;
+	case TCP_NODELAY:
+		val = !!(tp->nonagle&TCP_NAGLE_OFF);
+		break;
+	case TCP_CORK:
+		val = !!(tp->nonagle&TCP_NAGLE_CORK);
+		break;
+	case TCP_KEEPIDLE:
+		val = (tp->keepalive_time ? : sysctl_tcp_keepalive_time) / HZ;
+		break;
+	case TCP_KEEPINTVL:
+		val = (tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl) / HZ;
+		break;
+	case TCP_KEEPCNT:
+		val = tp->keepalive_probes ? : sysctl_tcp_keepalive_probes;
+		break;
+	case TCP_SYNCNT:
+		val = tp->syn_retries ? : sysctl_tcp_syn_retries;
+		break;
+	case TCP_LINGER2:
+		val = tp->linger2;
+		if (val >= 0)
+			val = (val ? : sysctl_tcp_fin_timeout) / HZ;
+		break;
+	case TCP_DEFER_ACCEPT:
+		val = !tp->defer_accept ? 0 : ((TCP_TIMEOUT_INIT / HZ) <<
+					       (tp->defer_accept - 1));
+		break;
+	case TCP_WINDOW_CLAMP:
+		val = tp->window_clamp;
+		break;
+	case TCP_INFO: {
+		struct tcp_info info;
+
+		if (get_user(len, optlen))
+			return -EFAULT;
+
+		tcp_get_info(sk, &info);
+
+		len = min_t(unsigned int, len, sizeof(info));
+		if (put_user(len, optlen))
+			return -EFAULT;
+		if (copy_to_user(optval, &info, len))
+			return -EFAULT;
+		return 0;
+	}
+	case TCP_QUICKACK:
+		val = !tp->ack.pingpong;
+		break;
+	default:
+		return -ENOPROTOOPT;
+	};
+
+	if (put_user(len, optlen))
+		return -EFAULT;
+	if (copy_to_user(optval, &val, len))
+		return -EFAULT;
+	return 0;
+}
+
+
+extern void __skb_cb_too_small_for_tcp(int, int);
+extern void tcpdiag_init(void);
+
+static __initdata unsigned long thash_entries;
+static int __init set_thash_entries(char *str)
+{
+	if (!str)
+		return 0;
+	thash_entries = simple_strtoul(str, &str, 0);
+	return 1;
+}
+__setup("thash_entries=", set_thash_entries);
+
+void __init tcp_init(void)
+{
+	struct sk_buff *skb = NULL;
+	int order, i;
+
+	if (sizeof(struct tcp_skb_cb) > sizeof(skb->cb))
+		__skb_cb_too_small_for_tcp(sizeof(struct tcp_skb_cb),
+					   sizeof(skb->cb));
+
+	tcp_openreq_cachep = kmem_cache_create("tcp_open_request",
+						   sizeof(struct open_request),
+					       0, SLAB_HWCACHE_ALIGN,
+					       NULL, NULL);
+	if (!tcp_openreq_cachep)
+		panic("tcp_init: Cannot alloc open_request cache.");
+
+	tcp_bucket_cachep = kmem_cache_create("tcp_bind_bucket",
+					      sizeof(struct tcp_bind_bucket),
+					      0, SLAB_HWCACHE_ALIGN,
+					      NULL, NULL);
+	if (!tcp_bucket_cachep)
+		panic("tcp_init: Cannot alloc tcp_bind_bucket cache.");
+
+	tcp_timewait_cachep = kmem_cache_create("tcp_tw_bucket",
+						sizeof(struct tcp_tw_bucket),
+						0, SLAB_HWCACHE_ALIGN,
+						NULL, NULL);
+	if (!tcp_timewait_cachep)
+		panic("tcp_init: Cannot alloc tcp_tw_bucket cache.");
+
+	/* Size and allocate the main established and bind bucket
+	 * hash tables.
+	 *
+	 * The methodology is similar to that of the buffer cache.
+	 */
+	tcp_ehash = (struct tcp_ehash_bucket *)
+		alloc_large_system_hash("TCP established",
+					sizeof(struct tcp_ehash_bucket),
+					thash_entries,
+					(num_physpages >= 128 * 1024) ?
+						(25 - PAGE_SHIFT) :
+						(27 - PAGE_SHIFT),
+					HASH_HIGHMEM,
+					&tcp_ehash_size,
+					NULL,
+					0);
+	tcp_ehash_size = (1 << tcp_ehash_size) >> 1;
+	for (i = 0; i < (tcp_ehash_size << 1); i++) {
+		rwlock_init(&tcp_ehash[i].lock);
+		INIT_HLIST_HEAD(&tcp_ehash[i].chain);
+	}
+
+	tcp_bhash = (struct tcp_bind_hashbucket *)
+		alloc_large_system_hash("TCP bind",
+					sizeof(struct tcp_bind_hashbucket),
+					tcp_ehash_size,
+					(num_physpages >= 128 * 1024) ?
+						(25 - PAGE_SHIFT) :
+						(27 - PAGE_SHIFT),
+					HASH_HIGHMEM,
+					&tcp_bhash_size,
+					NULL,
+					64 * 1024);
+	tcp_bhash_size = 1 << tcp_bhash_size;
+	for (i = 0; i < tcp_bhash_size; i++) {
+		spin_lock_init(&tcp_bhash[i].lock);
+		INIT_HLIST_HEAD(&tcp_bhash[i].chain);
+	}
+
+	/* Try to be a bit smarter and adjust defaults depending
+	 * on available memory.
+	 */
+	for (order = 0; ((1 << order) << PAGE_SHIFT) <
+			(tcp_bhash_size * sizeof(struct tcp_bind_hashbucket));
+			order++)
+		;
+	if (order > 4) {
+		sysctl_local_port_range[0] = 32768;
+		sysctl_local_port_range[1] = 61000;
+		sysctl_tcp_max_tw_buckets = 180000;
+		sysctl_tcp_max_orphans = 4096 << (order - 4);
+		sysctl_max_syn_backlog = 1024;
+	} else if (order < 3) {
+		sysctl_local_port_range[0] = 1024 * (3 - order);
+		sysctl_tcp_max_tw_buckets >>= (3 - order);
+		sysctl_tcp_max_orphans >>= (3 - order);
+		sysctl_max_syn_backlog = 128;
+	}
+	tcp_port_rover = sysctl_local_port_range[0] - 1;
+
+	sysctl_tcp_mem[0] =  768 << order;
+	sysctl_tcp_mem[1] = 1024 << order;
+	sysctl_tcp_mem[2] = 1536 << order;
+
+	if (order < 3) {
+		sysctl_tcp_wmem[2] = 64 * 1024;
+		sysctl_tcp_rmem[0] = PAGE_SIZE;
+		sysctl_tcp_rmem[1] = 43689;
+		sysctl_tcp_rmem[2] = 2 * 43689;
+	}
+
+	printk(KERN_INFO "TCP: Hash tables configured "
+	       "(established %d bind %d)\n",
+	       tcp_ehash_size << 1, tcp_bhash_size);
+}
+
+EXPORT_SYMBOL(tcp_accept);
+EXPORT_SYMBOL(tcp_close);
+EXPORT_SYMBOL(tcp_destroy_sock);
+EXPORT_SYMBOL(tcp_disconnect);
+EXPORT_SYMBOL(tcp_getsockopt);
+EXPORT_SYMBOL(tcp_ioctl);
+EXPORT_SYMBOL(tcp_openreq_cachep);
+EXPORT_SYMBOL(tcp_poll);
+EXPORT_SYMBOL(tcp_read_sock);
+EXPORT_SYMBOL(tcp_recvmsg);
+EXPORT_SYMBOL(tcp_sendmsg);
+EXPORT_SYMBOL(tcp_sendpage);
+EXPORT_SYMBOL(tcp_setsockopt);
+EXPORT_SYMBOL(tcp_shutdown);
+EXPORT_SYMBOL(tcp_statistics);
+EXPORT_SYMBOL(tcp_timewait_cachep);
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
new file mode 100644
index 00000000000..313c1408da3
--- /dev/null
+++ b/net/ipv4/tcp_diag.c
@@ -0,0 +1,802 @@
+/*
+ * tcp_diag.c	Module for monitoring TCP sockets.
+ *
+ * Version:	$Id: tcp_diag.c,v 1.3 2002/02/01 22:01:04 davem Exp $
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/random.h>
+#include <linux/cache.h>
+#include <linux/init.h>
+#include <linux/time.h>
+
+#include <net/icmp.h>
+#include <net/tcp.h>
+#include <net/ipv6.h>
+#include <net/inet_common.h>
+
+#include <linux/inet.h>
+#include <linux/stddef.h>
+
+#include <linux/tcp_diag.h>
+
+struct tcpdiag_entry
+{
+	u32 *saddr;
+	u32 *daddr;
+	u16 sport;
+	u16 dport;
+	u16 family;
+	u16 userlocks;
+};
+
+static struct sock *tcpnl;
+
+
+#define TCPDIAG_PUT(skb, attrtype, attrlen) \
+({ int rtalen = RTA_LENGTH(attrlen);        \
+   struct rtattr *rta;                      \
+   if (skb_tailroom(skb) < RTA_ALIGN(rtalen)) goto nlmsg_failure; \
+   rta = (void*)__skb_put(skb, RTA_ALIGN(rtalen)); \
+   rta->rta_type = attrtype;                \
+   rta->rta_len = rtalen;                   \
+   RTA_DATA(rta); })
+
+static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk,
+			int ext, u32 pid, u32 seq, u16 nlmsg_flags)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct tcpdiagmsg *r;
+	struct nlmsghdr  *nlh;
+	struct tcp_info  *info = NULL;
+	struct tcpdiag_meminfo  *minfo = NULL;
+	struct tcpvegas_info *vinfo = NULL;
+	unsigned char	 *b = skb->tail;
+
+	nlh = NLMSG_PUT(skb, pid, seq, TCPDIAG_GETSOCK, sizeof(*r));
+	nlh->nlmsg_flags = nlmsg_flags;
+	r = NLMSG_DATA(nlh);
+	if (sk->sk_state != TCP_TIME_WAIT) {
+		if (ext & (1<<(TCPDIAG_MEMINFO-1)))
+			minfo = TCPDIAG_PUT(skb, TCPDIAG_MEMINFO, sizeof(*minfo));
+		if (ext & (1<<(TCPDIAG_INFO-1)))
+			info = TCPDIAG_PUT(skb, TCPDIAG_INFO, sizeof(*info));
+		
+		if ((tcp_is_westwood(tp) || tcp_is_vegas(tp))
+		    && (ext & (1<<(TCPDIAG_VEGASINFO-1))))
+			vinfo = TCPDIAG_PUT(skb, TCPDIAG_VEGASINFO, sizeof(*vinfo));
+	}
+	r->tcpdiag_family = sk->sk_family;
+	r->tcpdiag_state = sk->sk_state;
+	r->tcpdiag_timer = 0;
+	r->tcpdiag_retrans = 0;
+
+	r->id.tcpdiag_if = sk->sk_bound_dev_if;
+	r->id.tcpdiag_cookie[0] = (u32)(unsigned long)sk;
+	r->id.tcpdiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1);
+
+	if (r->tcpdiag_state == TCP_TIME_WAIT) {
+		struct tcp_tw_bucket *tw = (struct tcp_tw_bucket*)sk;
+		long tmo = tw->tw_ttd - jiffies;
+		if (tmo < 0)
+			tmo = 0;
+
+		r->id.tcpdiag_sport = tw->tw_sport;
+		r->id.tcpdiag_dport = tw->tw_dport;
+		r->id.tcpdiag_src[0] = tw->tw_rcv_saddr;
+		r->id.tcpdiag_dst[0] = tw->tw_daddr;
+		r->tcpdiag_state = tw->tw_substate;
+		r->tcpdiag_timer = 3;
+		r->tcpdiag_expires = (tmo*1000+HZ-1)/HZ;
+		r->tcpdiag_rqueue = 0;
+		r->tcpdiag_wqueue = 0;
+		r->tcpdiag_uid = 0;
+		r->tcpdiag_inode = 0;
+#ifdef CONFIG_IP_TCPDIAG_IPV6
+		if (r->tcpdiag_family == AF_INET6) {
+			ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src,
+				       &tw->tw_v6_rcv_saddr);
+			ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst,
+				       &tw->tw_v6_daddr);
+		}
+#endif
+		nlh->nlmsg_len = skb->tail - b;
+		return skb->len;
+	}
+
+	r->id.tcpdiag_sport = inet->sport;
+	r->id.tcpdiag_dport = inet->dport;
+	r->id.tcpdiag_src[0] = inet->rcv_saddr;
+	r->id.tcpdiag_dst[0] = inet->daddr;
+
+#ifdef CONFIG_IP_TCPDIAG_IPV6
+	if (r->tcpdiag_family == AF_INET6) {
+		struct ipv6_pinfo *np = inet6_sk(sk);
+
+		ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src,
+			       &np->rcv_saddr);
+		ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst,
+			       &np->daddr);
+	}
+#endif
+
+#define EXPIRES_IN_MS(tmo)  ((tmo-jiffies)*1000+HZ-1)/HZ
+
+	if (tp->pending == TCP_TIME_RETRANS) {
+		r->tcpdiag_timer = 1;
+		r->tcpdiag_retrans = tp->retransmits;
+		r->tcpdiag_expires = EXPIRES_IN_MS(tp->timeout);
+	} else if (tp->pending == TCP_TIME_PROBE0) {
+		r->tcpdiag_timer = 4;
+		r->tcpdiag_retrans = tp->probes_out;
+		r->tcpdiag_expires = EXPIRES_IN_MS(tp->timeout);
+	} else if (timer_pending(&sk->sk_timer)) {
+		r->tcpdiag_timer = 2;
+		r->tcpdiag_retrans = tp->probes_out;
+		r->tcpdiag_expires = EXPIRES_IN_MS(sk->sk_timer.expires);
+	} else {
+		r->tcpdiag_timer = 0;
+		r->tcpdiag_expires = 0;
+	}
+#undef EXPIRES_IN_MS
+
+	r->tcpdiag_rqueue = tp->rcv_nxt - tp->copied_seq;
+	r->tcpdiag_wqueue = tp->write_seq - tp->snd_una;
+	r->tcpdiag_uid = sock_i_uid(sk);
+	r->tcpdiag_inode = sock_i_ino(sk);
+
+	if (minfo) {
+		minfo->tcpdiag_rmem = atomic_read(&sk->sk_rmem_alloc);
+		minfo->tcpdiag_wmem = sk->sk_wmem_queued;
+		minfo->tcpdiag_fmem = sk->sk_forward_alloc;
+		minfo->tcpdiag_tmem = atomic_read(&sk->sk_wmem_alloc);
+	}
+
+	if (info) 
+		tcp_get_info(sk, info);
+
+	if (vinfo) {
+		if (tcp_is_vegas(tp)) {
+			vinfo->tcpv_enabled = tp->vegas.doing_vegas_now;
+			vinfo->tcpv_rttcnt = tp->vegas.cntRTT;
+			vinfo->tcpv_rtt = jiffies_to_usecs(tp->vegas.baseRTT);
+			vinfo->tcpv_minrtt = jiffies_to_usecs(tp->vegas.minRTT);
+		} else {
+			vinfo->tcpv_enabled = 0;
+			vinfo->tcpv_rttcnt = 0;
+			vinfo->tcpv_rtt = jiffies_to_usecs(tp->westwood.rtt);
+			vinfo->tcpv_minrtt = jiffies_to_usecs(tp->westwood.rtt_min);
+		}
+	}
+
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+extern struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport,
+				  int dif);
+#ifdef CONFIG_IP_TCPDIAG_IPV6
+extern struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
+				  struct in6_addr *daddr, u16 dport,
+				  int dif);
+#else
+static inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
+					 struct in6_addr *daddr, u16 dport,
+					 int dif)
+{
+	return NULL;
+}
+#endif
+
+static int tcpdiag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh)
+{
+	int err;
+	struct sock *sk;
+	struct tcpdiagreq *req = NLMSG_DATA(nlh);
+	struct sk_buff *rep;
+
+	if (req->tcpdiag_family == AF_INET) {
+		sk = tcp_v4_lookup(req->id.tcpdiag_dst[0], req->id.tcpdiag_dport,
+				   req->id.tcpdiag_src[0], req->id.tcpdiag_sport,
+				   req->id.tcpdiag_if);
+	}
+#ifdef CONFIG_IP_TCPDIAG_IPV6
+	else if (req->tcpdiag_family == AF_INET6) {
+		sk = tcp_v6_lookup((struct in6_addr*)req->id.tcpdiag_dst, req->id.tcpdiag_dport,
+				   (struct in6_addr*)req->id.tcpdiag_src, req->id.tcpdiag_sport,
+				   req->id.tcpdiag_if);
+	}
+#endif
+	else {
+		return -EINVAL;
+	}
+
+	if (sk == NULL)
+		return -ENOENT;
+
+	err = -ESTALE;
+	if ((req->id.tcpdiag_cookie[0] != TCPDIAG_NOCOOKIE ||
+	     req->id.tcpdiag_cookie[1] != TCPDIAG_NOCOOKIE) &&
+	    ((u32)(unsigned long)sk != req->id.tcpdiag_cookie[0] ||
+	     (u32)((((unsigned long)sk) >> 31) >> 1) != req->id.tcpdiag_cookie[1]))
+		goto out;
+
+	err = -ENOMEM;
+	rep = alloc_skb(NLMSG_SPACE(sizeof(struct tcpdiagmsg)+
+				    sizeof(struct tcpdiag_meminfo)+
+				    sizeof(struct tcp_info)+64), GFP_KERNEL);
+	if (!rep)
+		goto out;
+
+	if (tcpdiag_fill(rep, sk, req->tcpdiag_ext,
+			 NETLINK_CB(in_skb).pid,
+			 nlh->nlmsg_seq, 0) <= 0)
+		BUG();
+
+	err = netlink_unicast(tcpnl, rep, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
+	if (err > 0)
+		err = 0;
+
+out:
+	if (sk) {
+		if (sk->sk_state == TCP_TIME_WAIT)
+			tcp_tw_put((struct tcp_tw_bucket*)sk);
+		else
+			sock_put(sk);
+	}
+	return err;
+}
+
+static int bitstring_match(const u32 *a1, const u32 *a2, int bits)
+{
+	int words = bits >> 5;
+
+	bits &= 0x1f;
+
+	if (words) {
+		if (memcmp(a1, a2, words << 2))
+			return 0;
+	}
+	if (bits) {
+		__u32 w1, w2;
+		__u32 mask;
+
+		w1 = a1[words];
+		w2 = a2[words];
+
+		mask = htonl((0xffffffff) << (32 - bits));
+
+		if ((w1 ^ w2) & mask)
+			return 0;
+	}
+
+	return 1;
+}
+
+
+static int tcpdiag_bc_run(const void *bc, int len,
+			  const struct tcpdiag_entry *entry)
+{
+	while (len > 0) {
+		int yes = 1;
+		const struct tcpdiag_bc_op *op = bc;
+
+		switch (op->code) {
+		case TCPDIAG_BC_NOP:
+			break;
+		case TCPDIAG_BC_JMP:
+			yes = 0;
+			break;
+		case TCPDIAG_BC_S_GE:
+			yes = entry->sport >= op[1].no;
+			break;
+		case TCPDIAG_BC_S_LE:
+			yes = entry->dport <= op[1].no;
+			break;
+		case TCPDIAG_BC_D_GE:
+			yes = entry->dport >= op[1].no;
+			break;
+		case TCPDIAG_BC_D_LE:
+			yes = entry->dport <= op[1].no;
+			break;
+		case TCPDIAG_BC_AUTO:
+			yes = !(entry->userlocks & SOCK_BINDPORT_LOCK);
+			break;
+		case TCPDIAG_BC_S_COND:
+		case TCPDIAG_BC_D_COND:
+		{
+			struct tcpdiag_hostcond *cond = (struct tcpdiag_hostcond*)(op+1);
+			u32 *addr;
+
+			if (cond->port != -1 &&
+			    cond->port != (op->code == TCPDIAG_BC_S_COND ?
+					     entry->sport : entry->dport)) {
+				yes = 0;
+				break;
+			}
+			
+			if (cond->prefix_len == 0)
+				break;
+
+			if (op->code == TCPDIAG_BC_S_COND)
+				addr = entry->saddr;
+			else
+				addr = entry->daddr;
+
+			if (bitstring_match(addr, cond->addr, cond->prefix_len))
+				break;
+			if (entry->family == AF_INET6 &&
+			    cond->family == AF_INET) {
+				if (addr[0] == 0 && addr[1] == 0 &&
+				    addr[2] == htonl(0xffff) &&
+				    bitstring_match(addr+3, cond->addr, cond->prefix_len))
+					break;
+			}
+			yes = 0;
+			break;
+		}
+		}
+
+		if (yes) { 
+			len -= op->yes;
+			bc += op->yes;
+		} else {
+			len -= op->no;
+			bc += op->no;
+		}
+	}
+	return (len == 0);
+}
+
+static int valid_cc(const void *bc, int len, int cc)
+{
+	while (len >= 0) {
+		const struct tcpdiag_bc_op *op = bc;
+
+		if (cc > len)
+			return 0;
+		if (cc == len)
+			return 1;
+		if (op->yes < 4)
+			return 0;
+		len -= op->yes;
+		bc  += op->yes;
+	}
+	return 0;
+}
+
+static int tcpdiag_bc_audit(const void *bytecode, int bytecode_len)
+{
+	const unsigned char *bc = bytecode;
+	int  len = bytecode_len;
+
+	while (len > 0) {
+		struct tcpdiag_bc_op *op = (struct tcpdiag_bc_op*)bc;
+
+//printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len);
+		switch (op->code) {
+		case TCPDIAG_BC_AUTO:
+		case TCPDIAG_BC_S_COND:
+		case TCPDIAG_BC_D_COND:
+		case TCPDIAG_BC_S_GE:
+		case TCPDIAG_BC_S_LE:
+		case TCPDIAG_BC_D_GE:
+		case TCPDIAG_BC_D_LE:
+			if (op->yes < 4 || op->yes > len+4)
+				return -EINVAL;
+		case TCPDIAG_BC_JMP:
+			if (op->no < 4 || op->no > len+4)
+				return -EINVAL;
+			if (op->no < len &&
+			    !valid_cc(bytecode, bytecode_len, len-op->no))
+				return -EINVAL;
+			break;
+		case TCPDIAG_BC_NOP:
+			if (op->yes < 4 || op->yes > len+4)
+				return -EINVAL;
+			break;
+		default:
+			return -EINVAL;
+		}
+		bc += op->yes;
+		len -= op->yes;
+	}
+	return len == 0 ? 0 : -EINVAL;
+}
+
+static int tcpdiag_dump_sock(struct sk_buff *skb, struct sock *sk,
+			     struct netlink_callback *cb)
+{
+	struct tcpdiagreq *r = NLMSG_DATA(cb->nlh);
+
+	if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
+		struct tcpdiag_entry entry;
+		struct rtattr *bc = (struct rtattr *)(r + 1);
+		struct inet_sock *inet = inet_sk(sk);
+
+		entry.family = sk->sk_family;
+#ifdef CONFIG_IP_TCPDIAG_IPV6
+		if (entry.family == AF_INET6) {
+			struct ipv6_pinfo *np = inet6_sk(sk);
+
+			entry.saddr = np->rcv_saddr.s6_addr32;
+			entry.daddr = np->daddr.s6_addr32;
+		} else
+#endif
+		{
+			entry.saddr = &inet->rcv_saddr;
+			entry.daddr = &inet->daddr;
+		}
+		entry.sport = inet->num;
+		entry.dport = ntohs(inet->dport);
+		entry.userlocks = sk->sk_userlocks;
+
+		if (!tcpdiag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry))
+			return 0;
+	}
+
+	return tcpdiag_fill(skb, sk, r->tcpdiag_ext, NETLINK_CB(cb->skb).pid,
+			    cb->nlh->nlmsg_seq, NLM_F_MULTI);
+}
+
+static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk,
+			    struct open_request *req,
+			    u32 pid, u32 seq)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	unsigned char *b = skb->tail;
+	struct tcpdiagmsg *r;
+	struct nlmsghdr *nlh;
+	long tmo;
+
+	nlh = NLMSG_PUT(skb, pid, seq, TCPDIAG_GETSOCK, sizeof(*r));
+	nlh->nlmsg_flags = NLM_F_MULTI;
+	r = NLMSG_DATA(nlh);
+
+	r->tcpdiag_family = sk->sk_family;
+	r->tcpdiag_state = TCP_SYN_RECV;
+	r->tcpdiag_timer = 1;
+	r->tcpdiag_retrans = req->retrans;
+
+	r->id.tcpdiag_if = sk->sk_bound_dev_if;
+	r->id.tcpdiag_cookie[0] = (u32)(unsigned long)req;
+	r->id.tcpdiag_cookie[1] = (u32)(((unsigned long)req >> 31) >> 1);
+
+	tmo = req->expires - jiffies;
+	if (tmo < 0)
+		tmo = 0;
+
+	r->id.tcpdiag_sport = inet->sport;
+	r->id.tcpdiag_dport = req->rmt_port;
+	r->id.tcpdiag_src[0] = req->af.v4_req.loc_addr;
+	r->id.tcpdiag_dst[0] = req->af.v4_req.rmt_addr;
+	r->tcpdiag_expires = jiffies_to_msecs(tmo),
+	r->tcpdiag_rqueue = 0;
+	r->tcpdiag_wqueue = 0;
+	r->tcpdiag_uid = sock_i_uid(sk);
+	r->tcpdiag_inode = 0;
+#ifdef CONFIG_IP_TCPDIAG_IPV6
+	if (r->tcpdiag_family == AF_INET6) {
+		ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src,
+			       &req->af.v6_req.loc_addr);
+		ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst,
+			       &req->af.v6_req.rmt_addr);
+	}
+#endif
+	nlh->nlmsg_len = skb->tail - b;
+
+	return skb->len;
+
+nlmsg_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk,
+			     struct netlink_callback *cb)
+{
+	struct tcpdiag_entry entry;
+	struct tcpdiagreq *r = NLMSG_DATA(cb->nlh);
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct tcp_listen_opt *lopt;
+	struct rtattr *bc = NULL;
+	struct inet_sock *inet = inet_sk(sk);
+	int j, s_j;
+	int reqnum, s_reqnum;
+	int err = 0;
+
+	s_j = cb->args[3];
+	s_reqnum = cb->args[4];
+
+	if (s_j > 0)
+		s_j--;
+
+	entry.family = sk->sk_family;
+
+	read_lock_bh(&tp->syn_wait_lock);
+
+	lopt = tp->listen_opt;
+	if (!lopt || !lopt->qlen)
+		goto out;
+
+	if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
+		bc = (struct rtattr *)(r + 1);
+		entry.sport = inet->num;
+		entry.userlocks = sk->sk_userlocks;
+	}
+
+	for (j = s_j; j < TCP_SYNQ_HSIZE; j++) {
+		struct open_request *req, *head = lopt->syn_table[j];
+
+		reqnum = 0;
+		for (req = head; req; reqnum++, req = req->dl_next) {
+			if (reqnum < s_reqnum)
+				continue;
+			if (r->id.tcpdiag_dport != req->rmt_port &&
+			    r->id.tcpdiag_dport)
+				continue;
+
+			if (bc) {
+				entry.saddr =
+#ifdef CONFIG_IP_TCPDIAG_IPV6
+					(entry.family == AF_INET6) ?
+					req->af.v6_req.loc_addr.s6_addr32 :
+#endif
+					&req->af.v4_req.loc_addr;
+				entry.daddr = 
+#ifdef CONFIG_IP_TCPDIAG_IPV6
+					(entry.family == AF_INET6) ?
+					req->af.v6_req.rmt_addr.s6_addr32 :
+#endif
+					&req->af.v4_req.rmt_addr;
+				entry.dport = ntohs(req->rmt_port);
+
+				if (!tcpdiag_bc_run(RTA_DATA(bc),
+						    RTA_PAYLOAD(bc), &entry))
+					continue;
+			}
+
+			err = tcpdiag_fill_req(skb, sk, req,
+					       NETLINK_CB(cb->skb).pid,
+					       cb->nlh->nlmsg_seq);
+			if (err < 0) {
+				cb->args[3] = j + 1;
+				cb->args[4] = reqnum;
+				goto out;
+			}
+		}
+
+		s_reqnum = 0;
+	}
+
+out:
+	read_unlock_bh(&tp->syn_wait_lock);
+
+	return err;
+}
+
+static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int i, num;
+	int s_i, s_num;
+	struct tcpdiagreq *r = NLMSG_DATA(cb->nlh);
+
+	s_i = cb->args[1];
+	s_num = num = cb->args[2];
+
+	if (cb->args[0] == 0) {
+		if (!(r->tcpdiag_states&(TCPF_LISTEN|TCPF_SYN_RECV)))
+			goto skip_listen_ht;
+		tcp_listen_lock();
+		for (i = s_i; i < TCP_LHTABLE_SIZE; i++) {
+			struct sock *sk;
+			struct hlist_node *node;
+
+			num = 0;
+			sk_for_each(sk, node, &tcp_listening_hash[i]) {
+				struct inet_sock *inet = inet_sk(sk);
+
+				if (num < s_num) {
+					num++;
+					continue;
+				}
+
+				if (r->id.tcpdiag_sport != inet->sport &&
+				    r->id.tcpdiag_sport)
+					goto next_listen;
+
+				if (!(r->tcpdiag_states&TCPF_LISTEN) ||
+				    r->id.tcpdiag_dport ||
+				    cb->args[3] > 0)
+					goto syn_recv;
+
+				if (tcpdiag_dump_sock(skb, sk, cb) < 0) {
+					tcp_listen_unlock();
+					goto done;
+				}
+
+syn_recv:
+				if (!(r->tcpdiag_states&TCPF_SYN_RECV))
+					goto next_listen;
+
+				if (tcpdiag_dump_reqs(skb, sk, cb) < 0) {
+					tcp_listen_unlock();
+					goto done;
+				}
+
+next_listen:
+				cb->args[3] = 0;
+				cb->args[4] = 0;
+				++num;
+			}
+
+			s_num = 0;
+			cb->args[3] = 0;
+			cb->args[4] = 0;
+		}
+		tcp_listen_unlock();
+skip_listen_ht:
+		cb->args[0] = 1;
+		s_i = num = s_num = 0;
+	}
+
+	if (!(r->tcpdiag_states&~(TCPF_LISTEN|TCPF_SYN_RECV)))
+		return skb->len;
+
+	for (i = s_i; i < tcp_ehash_size; i++) {
+		struct tcp_ehash_bucket *head = &tcp_ehash[i];
+		struct sock *sk;
+		struct hlist_node *node;
+
+		if (i > s_i)
+			s_num = 0;
+
+		read_lock_bh(&head->lock);
+
+		num = 0;
+		sk_for_each(sk, node, &head->chain) {
+			struct inet_sock *inet = inet_sk(sk);
+
+			if (num < s_num)
+				goto next_normal;
+			if (!(r->tcpdiag_states & (1 << sk->sk_state)))
+				goto next_normal;
+			if (r->id.tcpdiag_sport != inet->sport &&
+			    r->id.tcpdiag_sport)
+				goto next_normal;
+			if (r->id.tcpdiag_dport != inet->dport && r->id.tcpdiag_dport)
+				goto next_normal;
+			if (tcpdiag_dump_sock(skb, sk, cb) < 0) {
+				read_unlock_bh(&head->lock);
+				goto done;
+			}
+next_normal:
+			++num;
+		}
+
+		if (r->tcpdiag_states&TCPF_TIME_WAIT) {
+			sk_for_each(sk, node,
+				    &tcp_ehash[i + tcp_ehash_size].chain) {
+				struct inet_sock *inet = inet_sk(sk);
+
+				if (num < s_num)
+					goto next_dying;
+				if (r->id.tcpdiag_sport != inet->sport &&
+				    r->id.tcpdiag_sport)
+					goto next_dying;
+				if (r->id.tcpdiag_dport != inet->dport &&
+				    r->id.tcpdiag_dport)
+					goto next_dying;
+				if (tcpdiag_dump_sock(skb, sk, cb) < 0) {
+					read_unlock_bh(&head->lock);
+					goto done;
+				}
+next_dying:
+				++num;
+			}
+		}
+		read_unlock_bh(&head->lock);
+	}
+
+done:
+	cb->args[1] = i;
+	cb->args[2] = num;
+	return skb->len;
+}
+
+static int tcpdiag_dump_done(struct netlink_callback *cb)
+{
+	return 0;
+}
+
+
+static __inline__ int
+tcpdiag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+	if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
+		return 0;
+
+	if (nlh->nlmsg_type != TCPDIAG_GETSOCK)
+		goto err_inval;
+
+	if (NLMSG_LENGTH(sizeof(struct tcpdiagreq)) > skb->len)
+		goto err_inval;
+
+	if (nlh->nlmsg_flags&NLM_F_DUMP) {
+		if (nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(struct tcpdiagreq))) {
+			struct rtattr *rta = (struct rtattr*)(NLMSG_DATA(nlh) + sizeof(struct tcpdiagreq));
+			if (rta->rta_type != TCPDIAG_REQ_BYTECODE ||
+			    rta->rta_len < 8 ||
+			    rta->rta_len > nlh->nlmsg_len - NLMSG_SPACE(sizeof(struct tcpdiagreq)))
+				goto err_inval;
+			if (tcpdiag_bc_audit(RTA_DATA(rta), RTA_PAYLOAD(rta)))
+				goto err_inval;
+		}
+		return netlink_dump_start(tcpnl, skb, nlh,
+					  tcpdiag_dump,
+					  tcpdiag_dump_done);
+	} else {
+		return tcpdiag_get_exact(skb, nlh);
+	}
+
+err_inval:
+	return -EINVAL;
+}
+
+
+static inline void tcpdiag_rcv_skb(struct sk_buff *skb)
+{
+	int err;
+	struct nlmsghdr * nlh;
+
+	if (skb->len >= NLMSG_SPACE(0)) {
+		nlh = (struct nlmsghdr *)skb->data;
+		if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
+			return;
+		err = tcpdiag_rcv_msg(skb, nlh);
+		if (err || nlh->nlmsg_flags & NLM_F_ACK) 
+			netlink_ack(skb, nlh, err);
+	}
+}
+
+static void tcpdiag_rcv(struct sock *sk, int len)
+{
+	struct sk_buff *skb;
+
+	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+		tcpdiag_rcv_skb(skb);
+		kfree_skb(skb);
+	}
+}
+
+static int __init tcpdiag_init(void)
+{
+	tcpnl = netlink_kernel_create(NETLINK_TCPDIAG, tcpdiag_rcv);
+	if (tcpnl == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+static void __exit tcpdiag_exit(void)
+{
+	sock_release(tcpnl->sk_socket);
+}
+
+module_init(tcpdiag_init);
+module_exit(tcpdiag_exit);
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
new file mode 100644
index 00000000000..25049273590
--- /dev/null
+++ b/net/ipv4/tcp_input.c
@@ -0,0 +1,4959 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Implementation of the Transmission Control Protocol(TCP).
+ *
+ * Version:	$Id: tcp_input.c,v 1.243 2002/02/01 22:01:04 davem Exp $
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Mark Evans, <evansmp@uhura.aston.ac.uk>
+ *		Corey Minyard <wf-rch!minyard@relay.EU.net>
+ *		Florian La Roche, <flla@stud.uni-sb.de>
+ *		Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
+ *		Linus Torvalds, <torvalds@cs.helsinki.fi>
+ *		Alan Cox, <gw4pts@gw4pts.ampr.org>
+ *		Matthew Dillon, <dillon@apollo.west.oic.com>
+ *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *		Jorge Cwik, <jorge@laser.satlink.net>
+ */
+
+/*
+ * Changes:
+ *		Pedro Roque	:	Fast Retransmit/Recovery.
+ *					Two receive queues.
+ *					Retransmit queue handled by TCP.
+ *					Better retransmit timer handling.
+ *					New congestion avoidance.
+ *					Header prediction.
+ *					Variable renaming.
+ *
+ *		Eric		:	Fast Retransmit.
+ *		Randy Scott	:	MSS option defines.
+ *		Eric Schenk	:	Fixes to slow start algorithm.
+ *		Eric Schenk	:	Yet another double ACK bug.
+ *		Eric Schenk	:	Delayed ACK bug fixes.
+ *		Eric Schenk	:	Floyd style fast retrans war avoidance.
+ *		David S. Miller	:	Don't allow zero congestion window.
+ *		Eric Schenk	:	Fix retransmitter so that it sends
+ *					next packet on ack of previous packet.
+ *		Andi Kleen	:	Moved open_request checking here
+ *					and process RSTs for open_requests.
+ *		Andi Kleen	:	Better prune_queue, and other fixes.
+ *		Andrey Savochkin:	Fix RTT measurements in the presnce of
+ *					timestamps.
+ *		Andrey Savochkin:	Check sequence numbers correctly when
+ *					removing SACKs due to in sequence incoming
+ *					data segments.
+ *		Andi Kleen:		Make sure we never ack data there is not
+ *					enough room for. Also make this condition
+ *					a fatal error if it might still happen.
+ *		Andi Kleen:		Add tcp_measure_rcv_mss to make 
+ *					connections with MSS<min(MTU,ann. MSS)
+ *					work without delayed acks. 
+ *		Andi Kleen:		Process packets with PSH set in the
+ *					fast path.
+ *		J Hadi Salim:		ECN support
+ *	 	Andrei Gurtov,
+ *		Pasi Sarolahti,
+ *		Panu Kuhlberg:		Experimental audit of TCP (re)transmission
+ *					engine. Lots of bugs are found.
+ *		Pasi Sarolahti:		F-RTO for dealing with spurious RTOs
+ *		Angelo Dell'Aera:	TCP Westwood+ support
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/sysctl.h>
+#include <net/tcp.h>
+#include <net/inet_common.h>
+#include <linux/ipsec.h>
+#include <asm/unaligned.h>
+
+int sysctl_tcp_timestamps = 1;
+int sysctl_tcp_window_scaling = 1;
+int sysctl_tcp_sack = 1;
+int sysctl_tcp_fack = 1;
+int sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
+int sysctl_tcp_ecn;
+int sysctl_tcp_dsack = 1;
+int sysctl_tcp_app_win = 31;
+int sysctl_tcp_adv_win_scale = 2;
+
+int sysctl_tcp_stdurg;
+int sysctl_tcp_rfc1337;
+int sysctl_tcp_max_orphans = NR_FILE;
+int sysctl_tcp_frto;
+int sysctl_tcp_nometrics_save;
+int sysctl_tcp_westwood;
+int sysctl_tcp_vegas_cong_avoid;
+
+int sysctl_tcp_moderate_rcvbuf = 1;
+
+/* Default values of the Vegas variables, in fixed-point representation
+ * with V_PARAM_SHIFT bits to the right of the binary point.
+ */
+#define V_PARAM_SHIFT 1
+int sysctl_tcp_vegas_alpha = 1<<V_PARAM_SHIFT;
+int sysctl_tcp_vegas_beta  = 3<<V_PARAM_SHIFT;
+int sysctl_tcp_vegas_gamma = 1<<V_PARAM_SHIFT;
+int sysctl_tcp_bic = 1;
+int sysctl_tcp_bic_fast_convergence = 1;
+int sysctl_tcp_bic_low_window = 14;
+int sysctl_tcp_bic_beta = 819;		/* = 819/1024 (BICTCP_BETA_SCALE) */
+
+#define FLAG_DATA		0x01 /* Incoming frame contained data.		*/
+#define FLAG_WIN_UPDATE		0x02 /* Incoming ACK was a window update.	*/
+#define FLAG_DATA_ACKED		0x04 /* This ACK acknowledged new data.		*/
+#define FLAG_RETRANS_DATA_ACKED	0x08 /* "" "" some of which was retransmitted.	*/
+#define FLAG_SYN_ACKED		0x10 /* This ACK acknowledged SYN.		*/
+#define FLAG_DATA_SACKED	0x20 /* New SACK.				*/
+#define FLAG_ECE		0x40 /* ECE in this ACK				*/
+#define FLAG_DATA_LOST		0x80 /* SACK detected data lossage.		*/
+#define FLAG_SLOWPATH		0x100 /* Do not skip RFC checks for window update.*/
+
+#define FLAG_ACKED		(FLAG_DATA_ACKED|FLAG_SYN_ACKED)
+#define FLAG_NOT_DUP		(FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
+#define FLAG_CA_ALERT		(FLAG_DATA_SACKED|FLAG_ECE)
+#define FLAG_FORWARD_PROGRESS	(FLAG_ACKED|FLAG_DATA_SACKED)
+
+#define IsReno(tp) ((tp)->rx_opt.sack_ok == 0)
+#define IsFack(tp) ((tp)->rx_opt.sack_ok & 2)
+#define IsDSack(tp) ((tp)->rx_opt.sack_ok & 4)
+
+#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
+
+/* Adapt the MSS value used to make delayed ack decision to the 
+ * real world.
+ */ 
+static inline void tcp_measure_rcv_mss(struct tcp_sock *tp,
+				       struct sk_buff *skb)
+{
+	unsigned int len, lss;
+
+	lss = tp->ack.last_seg_size; 
+	tp->ack.last_seg_size = 0; 
+
+	/* skb->len may jitter because of SACKs, even if peer
+	 * sends good full-sized frames.
+	 */
+	len = skb->len;
+	if (len >= tp->ack.rcv_mss) {
+		tp->ack.rcv_mss = len;
+	} else {
+		/* Otherwise, we make more careful check taking into account,
+		 * that SACKs block is variable.
+		 *
+		 * "len" is invariant segment length, including TCP header.
+		 */
+		len += skb->data - skb->h.raw;
+		if (len >= TCP_MIN_RCVMSS + sizeof(struct tcphdr) ||
+		    /* If PSH is not set, packet should be
+		     * full sized, provided peer TCP is not badly broken.
+		     * This observation (if it is correct 8)) allows
+		     * to handle super-low mtu links fairly.
+		     */
+		    (len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
+		     !(tcp_flag_word(skb->h.th)&TCP_REMNANT))) {
+			/* Subtract also invariant (if peer is RFC compliant),
+			 * tcp header plus fixed timestamp option length.
+			 * Resulting "len" is MSS free of SACK jitter.
+			 */
+			len -= tp->tcp_header_len;
+			tp->ack.last_seg_size = len;
+			if (len == lss) {
+				tp->ack.rcv_mss = len;
+				return;
+			}
+		}
+		tp->ack.pending |= TCP_ACK_PUSHED;
+	}
+}
+
+static void tcp_incr_quickack(struct tcp_sock *tp)
+{
+	unsigned quickacks = tp->rcv_wnd/(2*tp->ack.rcv_mss);
+
+	if (quickacks==0)
+		quickacks=2;
+	if (quickacks > tp->ack.quick)
+		tp->ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
+}
+
+void tcp_enter_quickack_mode(struct tcp_sock *tp)
+{
+	tcp_incr_quickack(tp);
+	tp->ack.pingpong = 0;
+	tp->ack.ato = TCP_ATO_MIN;
+}
+
+/* Send ACKs quickly, if "quick" count is not exhausted
+ * and the session is not interactive.
+ */
+
+static __inline__ int tcp_in_quickack_mode(struct tcp_sock *tp)
+{
+	return (tp->ack.quick && !tp->ack.pingpong);
+}
+
+/* Buffer size and advertised window tuning.
+ *
+ * 1. Tuning sk->sk_sndbuf, when connection enters established state.
+ */
+
+static void tcp_fixup_sndbuf(struct sock *sk)
+{
+	int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 +
+		     sizeof(struct sk_buff);
+
+	if (sk->sk_sndbuf < 3 * sndmem)
+		sk->sk_sndbuf = min(3 * sndmem, sysctl_tcp_wmem[2]);
+}
+
+/* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
+ *
+ * All tcp_full_space() is split to two parts: "network" buffer, allocated
+ * forward and advertised in receiver window (tp->rcv_wnd) and
+ * "application buffer", required to isolate scheduling/application
+ * latencies from network.
+ * window_clamp is maximal advertised window. It can be less than
+ * tcp_full_space(), in this case tcp_full_space() - window_clamp
+ * is reserved for "application" buffer. The less window_clamp is
+ * the smoother our behaviour from viewpoint of network, but the lower
+ * throughput and the higher sensitivity of the connection to losses. 8)
+ *
+ * rcv_ssthresh is more strict window_clamp used at "slow start"
+ * phase to predict further behaviour of this connection.
+ * It is used for two goals:
+ * - to enforce header prediction at sender, even when application
+ *   requires some significant "application buffer". It is check #1.
+ * - to prevent pruning of receive queue because of misprediction
+ *   of receiver window. Check #2.
+ *
+ * The scheme does not work when sender sends good segments opening
+ * window and then starts to feed us spagetti. But it should work
+ * in common situations. Otherwise, we have to rely on queue collapsing.
+ */
+
+/* Slow part of check#2. */
+static int __tcp_grow_window(struct sock *sk, struct tcp_sock *tp,
+			     struct sk_buff *skb)
+{
+	/* Optimize this! */
+	int truesize = tcp_win_from_space(skb->truesize)/2;
+	int window = tcp_full_space(sk)/2;
+
+	while (tp->rcv_ssthresh <= window) {
+		if (truesize <= skb->len)
+			return 2*tp->ack.rcv_mss;
+
+		truesize >>= 1;
+		window >>= 1;
+	}
+	return 0;
+}
+
+static inline void tcp_grow_window(struct sock *sk, struct tcp_sock *tp,
+				   struct sk_buff *skb)
+{
+	/* Check #1 */
+	if (tp->rcv_ssthresh < tp->window_clamp &&
+	    (int)tp->rcv_ssthresh < tcp_space(sk) &&
+	    !tcp_memory_pressure) {
+		int incr;
+
+		/* Check #2. Increase window, if skb with such overhead
+		 * will fit to rcvbuf in future.
+		 */
+		if (tcp_win_from_space(skb->truesize) <= skb->len)
+			incr = 2*tp->advmss;
+		else
+			incr = __tcp_grow_window(sk, tp, skb);
+
+		if (incr) {
+			tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, tp->window_clamp);
+			tp->ack.quick |= 1;
+		}
+	}
+}
+
+/* 3. Tuning rcvbuf, when connection enters established state. */
+
+static void tcp_fixup_rcvbuf(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff);
+
+	/* Try to select rcvbuf so that 4 mss-sized segments
+	 * will fit to window and correspoding skbs will fit to our rcvbuf.
+	 * (was 3; 4 is minimum to allow fast retransmit to work.)
+	 */
+	while (tcp_win_from_space(rcvmem) < tp->advmss)
+		rcvmem += 128;
+	if (sk->sk_rcvbuf < 4 * rcvmem)
+		sk->sk_rcvbuf = min(4 * rcvmem, sysctl_tcp_rmem[2]);
+}
+
+/* 4. Try to fixup all. It is made iimediately after connection enters
+ *    established state.
+ */
+static void tcp_init_buffer_space(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	int maxwin;
+
+	if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK))
+		tcp_fixup_rcvbuf(sk);
+	if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
+		tcp_fixup_sndbuf(sk);
+
+	tp->rcvq_space.space = tp->rcv_wnd;
+
+	maxwin = tcp_full_space(sk);
+
+	if (tp->window_clamp >= maxwin) {
+		tp->window_clamp = maxwin;
+
+		if (sysctl_tcp_app_win && maxwin > 4 * tp->advmss)
+			tp->window_clamp = max(maxwin -
+					       (maxwin >> sysctl_tcp_app_win),
+					       4 * tp->advmss);
+	}
+
+	/* Force reservation of one segment. */
+	if (sysctl_tcp_app_win &&
+	    tp->window_clamp > 2 * tp->advmss &&
+	    tp->window_clamp + tp->advmss > maxwin)
+		tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
+
+	tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
+	tp->snd_cwnd_stamp = tcp_time_stamp;
+}
+
+static void init_bictcp(struct tcp_sock *tp)
+{
+	tp->bictcp.cnt = 0;
+
+	tp->bictcp.last_max_cwnd = 0;
+	tp->bictcp.last_cwnd = 0;
+	tp->bictcp.last_stamp = 0;
+}
+
+/* 5. Recalculate window clamp after socket hit its memory bounds. */
+static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp)
+{
+	struct sk_buff *skb;
+	unsigned int app_win = tp->rcv_nxt - tp->copied_seq;
+	int ofo_win = 0;
+
+	tp->ack.quick = 0;
+
+	skb_queue_walk(&tp->out_of_order_queue, skb) {
+		ofo_win += skb->len;
+	}
+
+	/* If overcommit is due to out of order segments,
+	 * do not clamp window. Try to expand rcvbuf instead.
+	 */
+	if (ofo_win) {
+		if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
+		    !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
+		    !tcp_memory_pressure &&
+		    atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0])
+			sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
+					    sysctl_tcp_rmem[2]);
+	}
+	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) {
+		app_win += ofo_win;
+		if (atomic_read(&sk->sk_rmem_alloc) >= 2 * sk->sk_rcvbuf)
+			app_win >>= 1;
+		if (app_win > tp->ack.rcv_mss)
+			app_win -= tp->ack.rcv_mss;
+		app_win = max(app_win, 2U*tp->advmss);
+
+		if (!ofo_win)
+			tp->window_clamp = min(tp->window_clamp, app_win);
+		tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss);
+	}
+}
+
+/* Receiver "autotuning" code.
+ *
+ * The algorithm for RTT estimation w/o timestamps is based on
+ * Dynamic Right-Sizing (DRS) by Wu Feng and Mike Fisk of LANL.
+ * <http://www.lanl.gov/radiant/website/pubs/drs/lacsi2001.ps>
+ *
+ * More detail on this code can be found at
+ * <http://www.psc.edu/~jheffner/senior_thesis.ps>,
+ * though this reference is out of date.  A new paper
+ * is pending.
+ */
+static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
+{
+	u32 new_sample = tp->rcv_rtt_est.rtt;
+	long m = sample;
+
+	if (m == 0)
+		m = 1;
+
+	if (new_sample != 0) {
+		/* If we sample in larger samples in the non-timestamp
+		 * case, we could grossly overestimate the RTT especially
+		 * with chatty applications or bulk transfer apps which
+		 * are stalled on filesystem I/O.
+		 *
+		 * Also, since we are only going for a minimum in the
+		 * non-timestamp case, we do not smoothe things out
+		 * else with timestamps disabled convergance takes too
+		 * long.
+		 */
+		if (!win_dep) {
+			m -= (new_sample >> 3);
+			new_sample += m;
+		} else if (m < new_sample)
+			new_sample = m << 3;
+	} else {
+		/* No previous mesaure. */
+		new_sample = m << 3;
+	}
+
+	if (tp->rcv_rtt_est.rtt != new_sample)
+		tp->rcv_rtt_est.rtt = new_sample;
+}
+
+static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
+{
+	if (tp->rcv_rtt_est.time == 0)
+		goto new_measure;
+	if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
+		return;
+	tcp_rcv_rtt_update(tp,
+			   jiffies - tp->rcv_rtt_est.time,
+			   1);
+
+new_measure:
+	tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
+	tp->rcv_rtt_est.time = tcp_time_stamp;
+}
+
+static inline void tcp_rcv_rtt_measure_ts(struct tcp_sock *tp, struct sk_buff *skb)
+{
+	if (tp->rx_opt.rcv_tsecr &&
+	    (TCP_SKB_CB(skb)->end_seq -
+	     TCP_SKB_CB(skb)->seq >= tp->ack.rcv_mss))
+		tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rx_opt.rcv_tsecr, 0);
+}
+
+/*
+ * This function should be called every time data is copied to user space.
+ * It calculates the appropriate TCP receive buffer space.
+ */
+void tcp_rcv_space_adjust(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	int time;
+	int space;
+	
+	if (tp->rcvq_space.time == 0)
+		goto new_measure;
+	
+	time = tcp_time_stamp - tp->rcvq_space.time;
+	if (time < (tp->rcv_rtt_est.rtt >> 3) ||
+	    tp->rcv_rtt_est.rtt == 0)
+		return;
+	
+	space = 2 * (tp->copied_seq - tp->rcvq_space.seq);
+
+	space = max(tp->rcvq_space.space, space);
+
+	if (tp->rcvq_space.space != space) {
+		int rcvmem;
+
+		tp->rcvq_space.space = space;
+
+		if (sysctl_tcp_moderate_rcvbuf) {
+			int new_clamp = space;
+
+			/* Receive space grows, normalize in order to
+			 * take into account packet headers and sk_buff
+			 * structure overhead.
+			 */
+			space /= tp->advmss;
+			if (!space)
+				space = 1;
+			rcvmem = (tp->advmss + MAX_TCP_HEADER +
+				  16 + sizeof(struct sk_buff));
+			while (tcp_win_from_space(rcvmem) < tp->advmss)
+				rcvmem += 128;
+			space *= rcvmem;
+			space = min(space, sysctl_tcp_rmem[2]);
+			if (space > sk->sk_rcvbuf) {
+				sk->sk_rcvbuf = space;
+
+				/* Make the window clamp follow along.  */
+				tp->window_clamp = new_clamp;
+			}
+		}
+	}
+	
+new_measure:
+	tp->rcvq_space.seq = tp->copied_seq;
+	tp->rcvq_space.time = tcp_time_stamp;
+}
+
+/* There is something which you must keep in mind when you analyze the
+ * behavior of the tp->ato delayed ack timeout interval.  When a
+ * connection starts up, we want to ack as quickly as possible.  The
+ * problem is that "good" TCP's do slow start at the beginning of data
+ * transmission.  The means that until we send the first few ACK's the
+ * sender will sit on his end and only queue most of his data, because
+ * he can only send snd_cwnd unacked packets at any given time.  For
+ * each ACK we send, he increments snd_cwnd and transmits more of his
+ * queue.  -DaveM
+ */
+static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb)
+{
+	u32 now;
+
+	tcp_schedule_ack(tp);
+
+	tcp_measure_rcv_mss(tp, skb);
+
+	tcp_rcv_rtt_measure(tp);
+	
+	now = tcp_time_stamp;
+
+	if (!tp->ack.ato) {
+		/* The _first_ data packet received, initialize
+		 * delayed ACK engine.
+		 */
+		tcp_incr_quickack(tp);
+		tp->ack.ato = TCP_ATO_MIN;
+	} else {
+		int m = now - tp->ack.lrcvtime;
+
+		if (m <= TCP_ATO_MIN/2) {
+			/* The fastest case is the first. */
+			tp->ack.ato = (tp->ack.ato>>1) + TCP_ATO_MIN/2;
+		} else if (m < tp->ack.ato) {
+			tp->ack.ato = (tp->ack.ato>>1) + m;
+			if (tp->ack.ato > tp->rto)
+				tp->ack.ato = tp->rto;
+		} else if (m > tp->rto) {
+			/* Too long gap. Apparently sender falled to
+			 * restart window, so that we send ACKs quickly.
+			 */
+			tcp_incr_quickack(tp);
+			sk_stream_mem_reclaim(sk);
+		}
+	}
+	tp->ack.lrcvtime = now;
+
+	TCP_ECN_check_ce(tp, skb);
+
+	if (skb->len >= 128)
+		tcp_grow_window(sk, tp, skb);
+}
+
+/* When starting a new connection, pin down the current choice of 
+ * congestion algorithm.
+ */
+void tcp_ca_init(struct tcp_sock *tp)
+{
+	if (sysctl_tcp_westwood) 
+		tp->adv_cong = TCP_WESTWOOD;
+	else if (sysctl_tcp_bic)
+		tp->adv_cong = TCP_BIC;
+	else if (sysctl_tcp_vegas_cong_avoid) {
+		tp->adv_cong = TCP_VEGAS;
+		tp->vegas.baseRTT = 0x7fffffff;
+		tcp_vegas_enable(tp);
+	} 
+}
+
+/* Do RTT sampling needed for Vegas.
+ * Basically we:
+ *   o min-filter RTT samples from within an RTT to get the current
+ *     propagation delay + queuing delay (we are min-filtering to try to
+ *     avoid the effects of delayed ACKs)
+ *   o min-filter RTT samples from a much longer window (forever for now)
+ *     to find the propagation delay (baseRTT)
+ */
+static inline void vegas_rtt_calc(struct tcp_sock *tp, __u32 rtt)
+{
+	__u32 vrtt = rtt + 1; /* Never allow zero rtt or baseRTT */
+
+	/* Filter to find propagation delay: */
+	if (vrtt < tp->vegas.baseRTT) 
+		tp->vegas.baseRTT = vrtt;
+
+	/* Find the min RTT during the last RTT to find
+	 * the current prop. delay + queuing delay:
+	 */
+	tp->vegas.minRTT = min(tp->vegas.minRTT, vrtt);
+	tp->vegas.cntRTT++;
+}
+
+/* Called to compute a smoothed rtt estimate. The data fed to this
+ * routine either comes from timestamps, or from segments that were
+ * known _not_ to have been retransmitted [see Karn/Partridge
+ * Proceedings SIGCOMM 87]. The algorithm is from the SIGCOMM 88
+ * piece by Van Jacobson.
+ * NOTE: the next three routines used to be one big routine.
+ * To save cycles in the RFC 1323 implementation it was better to break
+ * it up into three procedures. -- erics
+ */
+static void tcp_rtt_estimator(struct tcp_sock *tp, __u32 mrtt)
+{
+	long m = mrtt; /* RTT */
+
+	if (tcp_vegas_enabled(tp))
+		vegas_rtt_calc(tp, mrtt);
+
+	/*	The following amusing code comes from Jacobson's
+	 *	article in SIGCOMM '88.  Note that rtt and mdev
+	 *	are scaled versions of rtt and mean deviation.
+	 *	This is designed to be as fast as possible 
+	 *	m stands for "measurement".
+	 *
+	 *	On a 1990 paper the rto value is changed to:
+	 *	RTO = rtt + 4 * mdev
+	 *
+	 * Funny. This algorithm seems to be very broken.
+	 * These formulae increase RTO, when it should be decreased, increase
+	 * too slowly, when it should be incresed fastly, decrease too fastly
+	 * etc. I guess in BSD RTO takes ONE value, so that it is absolutely
+	 * does not matter how to _calculate_ it. Seems, it was trap
+	 * that VJ failed to avoid. 8)
+	 */
+	if(m == 0)
+		m = 1;
+	if (tp->srtt != 0) {
+		m -= (tp->srtt >> 3);	/* m is now error in rtt est */
+		tp->srtt += m;		/* rtt = 7/8 rtt + 1/8 new */
+		if (m < 0) {
+			m = -m;		/* m is now abs(error) */
+			m -= (tp->mdev >> 2);   /* similar update on mdev */
+			/* This is similar to one of Eifel findings.
+			 * Eifel blocks mdev updates when rtt decreases.
+			 * This solution is a bit different: we use finer gain
+			 * for mdev in this case (alpha*beta).
+			 * Like Eifel it also prevents growth of rto,
+			 * but also it limits too fast rto decreases,
+			 * happening in pure Eifel.
+			 */
+			if (m > 0)
+				m >>= 3;
+		} else {
+			m -= (tp->mdev >> 2);   /* similar update on mdev */
+		}
+		tp->mdev += m;	    	/* mdev = 3/4 mdev + 1/4 new */
+		if (tp->mdev > tp->mdev_max) {
+			tp->mdev_max = tp->mdev;
+			if (tp->mdev_max > tp->rttvar)
+				tp->rttvar = tp->mdev_max;
+		}
+		if (after(tp->snd_una, tp->rtt_seq)) {
+			if (tp->mdev_max < tp->rttvar)
+				tp->rttvar -= (tp->rttvar-tp->mdev_max)>>2;
+			tp->rtt_seq = tp->snd_nxt;
+			tp->mdev_max = TCP_RTO_MIN;
+		}
+	} else {
+		/* no previous measure. */
+		tp->srtt = m<<3;	/* take the measured time to be rtt */
+		tp->mdev = m<<1;	/* make sure rto = 3*rtt */
+		tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN);
+		tp->rtt_seq = tp->snd_nxt;
+	}
+
+	tcp_westwood_update_rtt(tp, tp->srtt >> 3);
+}
+
+/* Calculate rto without backoff.  This is the second half of Van Jacobson's
+ * routine referred to above.
+ */
+static inline void tcp_set_rto(struct tcp_sock *tp)
+{
+	/* Old crap is replaced with new one. 8)
+	 *
+	 * More seriously:
+	 * 1. If rtt variance happened to be less 50msec, it is hallucination.
+	 *    It cannot be less due to utterly erratic ACK generation made
+	 *    at least by solaris and freebsd. "Erratic ACKs" has _nothing_
+	 *    to do with delayed acks, because at cwnd>2 true delack timeout
+	 *    is invisible. Actually, Linux-2.4 also generates erratic
+	 *    ACKs in some curcumstances.
+	 */
+	tp->rto = (tp->srtt >> 3) + tp->rttvar;
+
+	/* 2. Fixups made earlier cannot be right.
+	 *    If we do not estimate RTO correctly without them,
+	 *    all the algo is pure shit and should be replaced
+	 *    with correct one. It is exaclty, which we pretend to do.
+	 */
+}
+
+/* NOTE: clamping at TCP_RTO_MIN is not required, current algo
+ * guarantees that rto is higher.
+ */
+static inline void tcp_bound_rto(struct tcp_sock *tp)
+{
+	if (tp->rto > TCP_RTO_MAX)
+		tp->rto = TCP_RTO_MAX;
+}
+
+/* Save metrics learned by this TCP session.
+   This function is called only, when TCP finishes successfully
+   i.e. when it enters TIME-WAIT or goes from LAST-ACK to CLOSE.
+ */
+void tcp_update_metrics(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct dst_entry *dst = __sk_dst_get(sk);
+
+	if (sysctl_tcp_nometrics_save)
+		return;
+
+	dst_confirm(dst);
+
+	if (dst && (dst->flags&DST_HOST)) {
+		int m;
+
+		if (tp->backoff || !tp->srtt) {
+			/* This session failed to estimate rtt. Why?
+			 * Probably, no packets returned in time.
+			 * Reset our results.
+			 */
+			if (!(dst_metric_locked(dst, RTAX_RTT)))
+				dst->metrics[RTAX_RTT-1] = 0;
+			return;
+		}
+
+		m = dst_metric(dst, RTAX_RTT) - tp->srtt;
+
+		/* If newly calculated rtt larger than stored one,
+		 * store new one. Otherwise, use EWMA. Remember,
+		 * rtt overestimation is always better than underestimation.
+		 */
+		if (!(dst_metric_locked(dst, RTAX_RTT))) {
+			if (m <= 0)
+				dst->metrics[RTAX_RTT-1] = tp->srtt;
+			else
+				dst->metrics[RTAX_RTT-1] -= (m>>3);
+		}
+
+		if (!(dst_metric_locked(dst, RTAX_RTTVAR))) {
+			if (m < 0)
+				m = -m;
+
+			/* Scale deviation to rttvar fixed point */
+			m >>= 1;
+			if (m < tp->mdev)
+				m = tp->mdev;
+
+			if (m >= dst_metric(dst, RTAX_RTTVAR))
+				dst->metrics[RTAX_RTTVAR-1] = m;
+			else
+				dst->metrics[RTAX_RTTVAR-1] -=
+					(dst->metrics[RTAX_RTTVAR-1] - m)>>2;
+		}
+
+		if (tp->snd_ssthresh >= 0xFFFF) {
+			/* Slow start still did not finish. */
+			if (dst_metric(dst, RTAX_SSTHRESH) &&
+			    !dst_metric_locked(dst, RTAX_SSTHRESH) &&
+			    (tp->snd_cwnd >> 1) > dst_metric(dst, RTAX_SSTHRESH))
+				dst->metrics[RTAX_SSTHRESH-1] = tp->snd_cwnd >> 1;
+			if (!dst_metric_locked(dst, RTAX_CWND) &&
+			    tp->snd_cwnd > dst_metric(dst, RTAX_CWND))
+				dst->metrics[RTAX_CWND-1] = tp->snd_cwnd;
+		} else if (tp->snd_cwnd > tp->snd_ssthresh &&
+			   tp->ca_state == TCP_CA_Open) {
+			/* Cong. avoidance phase, cwnd is reliable. */
+			if (!dst_metric_locked(dst, RTAX_SSTHRESH))
+				dst->metrics[RTAX_SSTHRESH-1] =
+					max(tp->snd_cwnd >> 1, tp->snd_ssthresh);
+			if (!dst_metric_locked(dst, RTAX_CWND))
+				dst->metrics[RTAX_CWND-1] = (dst->metrics[RTAX_CWND-1] + tp->snd_cwnd) >> 1;
+		} else {
+			/* Else slow start did not finish, cwnd is non-sense,
+			   ssthresh may be also invalid.
+			 */
+			if (!dst_metric_locked(dst, RTAX_CWND))
+				dst->metrics[RTAX_CWND-1] = (dst->metrics[RTAX_CWND-1] + tp->snd_ssthresh) >> 1;
+			if (dst->metrics[RTAX_SSTHRESH-1] &&
+			    !dst_metric_locked(dst, RTAX_SSTHRESH) &&
+			    tp->snd_ssthresh > dst->metrics[RTAX_SSTHRESH-1])
+				dst->metrics[RTAX_SSTHRESH-1] = tp->snd_ssthresh;
+		}
+
+		if (!dst_metric_locked(dst, RTAX_REORDERING)) {
+			if (dst->metrics[RTAX_REORDERING-1] < tp->reordering &&
+			    tp->reordering != sysctl_tcp_reordering)
+				dst->metrics[RTAX_REORDERING-1] = tp->reordering;
+		}
+	}
+}
+
+/* Numbers are taken from RFC2414.  */
+__u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
+{
+	__u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
+
+	if (!cwnd) {
+		if (tp->mss_cache_std > 1460)
+			cwnd = 2;
+		else
+			cwnd = (tp->mss_cache_std > 1095) ? 3 : 4;
+	}
+	return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
+}
+
+/* Initialize metrics on socket. */
+
+static void tcp_init_metrics(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct dst_entry *dst = __sk_dst_get(sk);
+
+	if (dst == NULL)
+		goto reset;
+
+	dst_confirm(dst);
+
+	if (dst_metric_locked(dst, RTAX_CWND))
+		tp->snd_cwnd_clamp = dst_metric(dst, RTAX_CWND);
+	if (dst_metric(dst, RTAX_SSTHRESH)) {
+		tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH);
+		if (tp->snd_ssthresh > tp->snd_cwnd_clamp)
+			tp->snd_ssthresh = tp->snd_cwnd_clamp;
+	}
+	if (dst_metric(dst, RTAX_REORDERING) &&
+	    tp->reordering != dst_metric(dst, RTAX_REORDERING)) {
+		tp->rx_opt.sack_ok &= ~2;
+		tp->reordering = dst_metric(dst, RTAX_REORDERING);
+	}
+
+	if (dst_metric(dst, RTAX_RTT) == 0)
+		goto reset;
+
+	if (!tp->srtt && dst_metric(dst, RTAX_RTT) < (TCP_TIMEOUT_INIT << 3))
+		goto reset;
+
+	/* Initial rtt is determined from SYN,SYN-ACK.
+	 * The segment is small and rtt may appear much
+	 * less than real one. Use per-dst memory
+	 * to make it more realistic.
+	 *
+	 * A bit of theory. RTT is time passed after "normal" sized packet
+	 * is sent until it is ACKed. In normal curcumstances sending small
+	 * packets force peer to delay ACKs and calculation is correct too.
+	 * The algorithm is adaptive and, provided we follow specs, it
+	 * NEVER underestimate RTT. BUT! If peer tries to make some clever
+	 * tricks sort of "quick acks" for time long enough to decrease RTT
+	 * to low value, and then abruptly stops to do it and starts to delay
+	 * ACKs, wait for troubles.
+	 */
+	if (dst_metric(dst, RTAX_RTT) > tp->srtt) {
+		tp->srtt = dst_metric(dst, RTAX_RTT);
+		tp->rtt_seq = tp->snd_nxt;
+	}
+	if (dst_metric(dst, RTAX_RTTVAR) > tp->mdev) {
+		tp->mdev = dst_metric(dst, RTAX_RTTVAR);
+		tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN);
+	}
+	tcp_set_rto(tp);
+	tcp_bound_rto(tp);
+	if (tp->rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp)
+		goto reset;
+	tp->snd_cwnd = tcp_init_cwnd(tp, dst);
+	tp->snd_cwnd_stamp = tcp_time_stamp;
+	return;
+
+reset:
+	/* Play conservative. If timestamps are not
+	 * supported, TCP will fail to recalculate correct
+	 * rtt, if initial rto is too small. FORGET ALL AND RESET!
+	 */
+	if (!tp->rx_opt.saw_tstamp && tp->srtt) {
+		tp->srtt = 0;
+		tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT;
+		tp->rto = TCP_TIMEOUT_INIT;
+	}
+}
+
+static void tcp_update_reordering(struct tcp_sock *tp, int metric, int ts)
+{
+	if (metric > tp->reordering) {
+		tp->reordering = min(TCP_MAX_REORDERING, metric);
+
+		/* This exciting event is worth to be remembered. 8) */
+		if (ts)
+			NET_INC_STATS_BH(LINUX_MIB_TCPTSREORDER);
+		else if (IsReno(tp))
+			NET_INC_STATS_BH(LINUX_MIB_TCPRENOREORDER);
+		else if (IsFack(tp))
+			NET_INC_STATS_BH(LINUX_MIB_TCPFACKREORDER);
+		else
+			NET_INC_STATS_BH(LINUX_MIB_TCPSACKREORDER);
+#if FASTRETRANS_DEBUG > 1
+		printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n",
+		       tp->rx_opt.sack_ok, tp->ca_state,
+		       tp->reordering,
+		       tp->fackets_out,
+		       tp->sacked_out,
+		       tp->undo_marker ? tp->undo_retrans : 0);
+#endif
+		/* Disable FACK yet. */
+		tp->rx_opt.sack_ok &= ~2;
+	}
+}
+
+/* This procedure tags the retransmission queue when SACKs arrive.
+ *
+ * We have three tag bits: SACKED(S), RETRANS(R) and LOST(L).
+ * Packets in queue with these bits set are counted in variables
+ * sacked_out, retrans_out and lost_out, correspondingly.
+ *
+ * Valid combinations are:
+ * Tag  InFlight	Description
+ * 0	1		- orig segment is in flight.
+ * S	0		- nothing flies, orig reached receiver.
+ * L	0		- nothing flies, orig lost by net.
+ * R	2		- both orig and retransmit are in flight.
+ * L|R	1		- orig is lost, retransmit is in flight.
+ * S|R  1		- orig reached receiver, retrans is still in flight.
+ * (L|S|R is logically valid, it could occur when L|R is sacked,
+ *  but it is equivalent to plain S and code short-curcuits it to S.
+ *  L|S is logically invalid, it would mean -1 packet in flight 8))
+ *
+ * These 6 states form finite state machine, controlled by the following events:
+ * 1. New ACK (+SACK) arrives. (tcp_sacktag_write_queue())
+ * 2. Retransmission. (tcp_retransmit_skb(), tcp_xmit_retransmit_queue())
+ * 3. Loss detection event of one of three flavors:
+ *	A. Scoreboard estimator decided the packet is lost.
+ *	   A'. Reno "three dupacks" marks head of queue lost.
+ *	   A''. Its FACK modfication, head until snd.fack is lost.
+ *	B. SACK arrives sacking data transmitted after never retransmitted
+ *	   hole was sent out.
+ *	C. SACK arrives sacking SND.NXT at the moment, when the
+ *	   segment was retransmitted.
+ * 4. D-SACK added new rule: D-SACK changes any tag to S.
+ *
+ * It is pleasant to note, that state diagram turns out to be commutative,
+ * so that we are allowed not to be bothered by order of our actions,
+ * when multiple events arrive simultaneously. (see the function below).
+ *
+ * Reordering detection.
+ * --------------------
+ * Reordering metric is maximal distance, which a packet can be displaced
+ * in packet stream. With SACKs we can estimate it:
+ *
+ * 1. SACK fills old hole and the corresponding segment was not
+ *    ever retransmitted -> reordering. Alas, we cannot use it
+ *    when segment was retransmitted.
+ * 2. The last flaw is solved with D-SACK. D-SACK arrives
+ *    for retransmitted and already SACKed segment -> reordering..
+ * Both of these heuristics are not used in Loss state, when we cannot
+ * account for retransmits accurately.
+ */
+static int
+tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_una)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked;
+	struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2);
+	int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;
+	int reord = tp->packets_out;
+	int prior_fackets;
+	u32 lost_retrans = 0;
+	int flag = 0;
+	int i;
+
+	/* So, SACKs for already sent large segments will be lost.
+	 * Not good, but alternative is to resegment the queue. */
+	if (sk->sk_route_caps & NETIF_F_TSO) {
+		sk->sk_route_caps &= ~NETIF_F_TSO;
+		sock_set_flag(sk, SOCK_NO_LARGESEND);
+		tp->mss_cache = tp->mss_cache_std;
+	}
+
+	if (!tp->sacked_out)
+		tp->fackets_out = 0;
+	prior_fackets = tp->fackets_out;
+
+	for (i=0; i<num_sacks; i++, sp++) {
+		struct sk_buff *skb;
+		__u32 start_seq = ntohl(sp->start_seq);
+		__u32 end_seq = ntohl(sp->end_seq);
+		int fack_count = 0;
+		int dup_sack = 0;
+
+		/* Check for D-SACK. */
+		if (i == 0) {
+			u32 ack = TCP_SKB_CB(ack_skb)->ack_seq;
+
+			if (before(start_seq, ack)) {
+				dup_sack = 1;
+				tp->rx_opt.sack_ok |= 4;
+				NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV);
+			} else if (num_sacks > 1 &&
+				   !after(end_seq, ntohl(sp[1].end_seq)) &&
+				   !before(start_seq, ntohl(sp[1].start_seq))) {
+				dup_sack = 1;
+				tp->rx_opt.sack_ok |= 4;
+				NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFORECV);
+			}
+
+			/* D-SACK for already forgotten data...
+			 * Do dumb counting. */
+			if (dup_sack &&
+			    !after(end_seq, prior_snd_una) &&
+			    after(end_seq, tp->undo_marker))
+				tp->undo_retrans--;
+
+			/* Eliminate too old ACKs, but take into
+			 * account more or less fresh ones, they can
+			 * contain valid SACK info.
+			 */
+			if (before(ack, prior_snd_una - tp->max_window))
+				return 0;
+		}
+
+		/* Event "B" in the comment above. */
+		if (after(end_seq, tp->high_seq))
+			flag |= FLAG_DATA_LOST;
+
+		sk_stream_for_retrans_queue(skb, sk) {
+			u8 sacked = TCP_SKB_CB(skb)->sacked;
+			int in_sack;
+
+			/* The retransmission queue is always in order, so
+			 * we can short-circuit the walk early.
+			 */
+			if(!before(TCP_SKB_CB(skb)->seq, end_seq))
+				break;
+
+			fack_count += tcp_skb_pcount(skb);
+
+			in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
+				!before(end_seq, TCP_SKB_CB(skb)->end_seq);
+
+			/* Account D-SACK for retransmitted packet. */
+			if ((dup_sack && in_sack) &&
+			    (sacked & TCPCB_RETRANS) &&
+			    after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
+				tp->undo_retrans--;
+
+			/* The frame is ACKed. */
+			if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) {
+				if (sacked&TCPCB_RETRANS) {
+					if ((dup_sack && in_sack) &&
+					    (sacked&TCPCB_SACKED_ACKED))
+						reord = min(fack_count, reord);
+				} else {
+					/* If it was in a hole, we detected reordering. */
+					if (fack_count < prior_fackets &&
+					    !(sacked&TCPCB_SACKED_ACKED))
+						reord = min(fack_count, reord);
+				}
+
+				/* Nothing to do; acked frame is about to be dropped. */
+				continue;
+			}
+
+			if ((sacked&TCPCB_SACKED_RETRANS) &&
+			    after(end_seq, TCP_SKB_CB(skb)->ack_seq) &&
+			    (!lost_retrans || after(end_seq, lost_retrans)))
+				lost_retrans = end_seq;
+
+			if (!in_sack)
+				continue;
+
+			if (!(sacked&TCPCB_SACKED_ACKED)) {
+				if (sacked & TCPCB_SACKED_RETRANS) {
+					/* If the segment is not tagged as lost,
+					 * we do not clear RETRANS, believing
+					 * that retransmission is still in flight.
+					 */
+					if (sacked & TCPCB_LOST) {
+						TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
+						tp->lost_out -= tcp_skb_pcount(skb);
+						tp->retrans_out -= tcp_skb_pcount(skb);
+					}
+				} else {
+					/* New sack for not retransmitted frame,
+					 * which was in hole. It is reordering.
+					 */
+					if (!(sacked & TCPCB_RETRANS) &&
+					    fack_count < prior_fackets)
+						reord = min(fack_count, reord);
+
+					if (sacked & TCPCB_LOST) {
+						TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
+						tp->lost_out -= tcp_skb_pcount(skb);
+					}
+				}
+
+				TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;
+				flag |= FLAG_DATA_SACKED;
+				tp->sacked_out += tcp_skb_pcount(skb);
+
+				if (fack_count > tp->fackets_out)
+					tp->fackets_out = fack_count;
+			} else {
+				if (dup_sack && (sacked&TCPCB_RETRANS))
+					reord = min(fack_count, reord);
+			}
+
+			/* D-SACK. We can detect redundant retransmission
+			 * in S|R and plain R frames and clear it.
+			 * undo_retrans is decreased above, L|R frames
+			 * are accounted above as well.
+			 */
+			if (dup_sack &&
+			    (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) {
+				TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
+				tp->retrans_out -= tcp_skb_pcount(skb);
+			}
+		}
+	}
+
+	/* Check for lost retransmit. This superb idea is
+	 * borrowed from "ratehalving". Event "C".
+	 * Later note: FACK people cheated me again 8),
+	 * we have to account for reordering! Ugly,
+	 * but should help.
+	 */
+	if (lost_retrans && tp->ca_state == TCP_CA_Recovery) {
+		struct sk_buff *skb;
+
+		sk_stream_for_retrans_queue(skb, sk) {
+			if (after(TCP_SKB_CB(skb)->seq, lost_retrans))
+				break;
+			if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
+				continue;
+			if ((TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) &&
+			    after(lost_retrans, TCP_SKB_CB(skb)->ack_seq) &&
+			    (IsFack(tp) ||
+			     !before(lost_retrans,
+				     TCP_SKB_CB(skb)->ack_seq + tp->reordering *
+				     tp->mss_cache_std))) {
+				TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
+				tp->retrans_out -= tcp_skb_pcount(skb);
+
+				if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) {
+					tp->lost_out += tcp_skb_pcount(skb);
+					TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
+					flag |= FLAG_DATA_SACKED;
+					NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT);
+				}
+			}
+		}
+	}
+
+	tp->left_out = tp->sacked_out + tp->lost_out;
+
+	if ((reord < tp->fackets_out) && tp->ca_state != TCP_CA_Loss)
+		tcp_update_reordering(tp, ((tp->fackets_out + 1) - reord), 0);
+
+#if FASTRETRANS_DEBUG > 0
+	BUG_TRAP((int)tp->sacked_out >= 0);
+	BUG_TRAP((int)tp->lost_out >= 0);
+	BUG_TRAP((int)tp->retrans_out >= 0);
+	BUG_TRAP((int)tcp_packets_in_flight(tp) >= 0);
+#endif
+	return flag;
+}
+
+/* RTO occurred, but do not yet enter loss state. Instead, transmit two new
+ * segments to see from the next ACKs whether any data was really missing.
+ * If the RTO was spurious, new ACKs should arrive.
+ */
+void tcp_enter_frto(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *skb;
+
+	tp->frto_counter = 1;
+
+	if (tp->ca_state <= TCP_CA_Disorder ||
+            tp->snd_una == tp->high_seq ||
+            (tp->ca_state == TCP_CA_Loss && !tp->retransmits)) {
+		tp->prior_ssthresh = tcp_current_ssthresh(tp);
+		if (!tcp_westwood_ssthresh(tp))
+			tp->snd_ssthresh = tcp_recalc_ssthresh(tp);
+	}
+
+	/* Have to clear retransmission markers here to keep the bookkeeping
+	 * in shape, even though we are not yet in Loss state.
+	 * If something was really lost, it is eventually caught up
+	 * in tcp_enter_frto_loss.
+	 */
+	tp->retrans_out = 0;
+	tp->undo_marker = tp->snd_una;
+	tp->undo_retrans = 0;
+
+	sk_stream_for_retrans_queue(skb, sk) {
+		TCP_SKB_CB(skb)->sacked &= ~TCPCB_RETRANS;
+	}
+	tcp_sync_left_out(tp);
+
+	tcp_set_ca_state(tp, TCP_CA_Open);
+	tp->frto_highmark = tp->snd_nxt;
+}
+
+/* Enter Loss state after F-RTO was applied. Dupack arrived after RTO,
+ * which indicates that we should follow the traditional RTO recovery,
+ * i.e. mark everything lost and do go-back-N retransmission.
+ */
+static void tcp_enter_frto_loss(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *skb;
+	int cnt = 0;
+
+	tp->sacked_out = 0;
+	tp->lost_out = 0;
+	tp->fackets_out = 0;
+
+	sk_stream_for_retrans_queue(skb, sk) {
+		cnt += tcp_skb_pcount(skb);
+		TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
+		if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) {
+
+			/* Do not mark those segments lost that were
+			 * forward transmitted after RTO
+			 */
+			if (!after(TCP_SKB_CB(skb)->end_seq,
+				   tp->frto_highmark)) {
+				TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
+				tp->lost_out += tcp_skb_pcount(skb);
+			}
+		} else {
+			tp->sacked_out += tcp_skb_pcount(skb);
+			tp->fackets_out = cnt;
+		}
+	}
+	tcp_sync_left_out(tp);
+
+	tp->snd_cwnd = tp->frto_counter + tcp_packets_in_flight(tp)+1;
+	tp->snd_cwnd_cnt = 0;
+	tp->snd_cwnd_stamp = tcp_time_stamp;
+	tp->undo_marker = 0;
+	tp->frto_counter = 0;
+
+	tp->reordering = min_t(unsigned int, tp->reordering,
+					     sysctl_tcp_reordering);
+	tcp_set_ca_state(tp, TCP_CA_Loss);
+	tp->high_seq = tp->frto_highmark;
+	TCP_ECN_queue_cwr(tp);
+
+	init_bictcp(tp);
+}
+
+void tcp_clear_retrans(struct tcp_sock *tp)
+{
+	tp->left_out = 0;
+	tp->retrans_out = 0;
+
+	tp->fackets_out = 0;
+	tp->sacked_out = 0;
+	tp->lost_out = 0;
+
+	tp->undo_marker = 0;
+	tp->undo_retrans = 0;
+}
+
+/* Enter Loss state. If "how" is not zero, forget all SACK information
+ * and reset tags completely, otherwise preserve SACKs. If receiver
+ * dropped its ofo queue, we will know this due to reneging detection.
+ */
+void tcp_enter_loss(struct sock *sk, int how)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *skb;
+	int cnt = 0;
+
+	/* Reduce ssthresh if it has not yet been made inside this window. */
+	if (tp->ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq ||
+	    (tp->ca_state == TCP_CA_Loss && !tp->retransmits)) {
+		tp->prior_ssthresh = tcp_current_ssthresh(tp);
+		tp->snd_ssthresh = tcp_recalc_ssthresh(tp);
+	}
+	tp->snd_cwnd	   = 1;
+	tp->snd_cwnd_cnt   = 0;
+	tp->snd_cwnd_stamp = tcp_time_stamp;
+
+	tcp_clear_retrans(tp);
+
+	/* Push undo marker, if it was plain RTO and nothing
+	 * was retransmitted. */
+	if (!how)
+		tp->undo_marker = tp->snd_una;
+
+	sk_stream_for_retrans_queue(skb, sk) {
+		cnt += tcp_skb_pcount(skb);
+		if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS)
+			tp->undo_marker = 0;
+		TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
+		if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) {
+			TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
+			TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
+			tp->lost_out += tcp_skb_pcount(skb);
+		} else {
+			tp->sacked_out += tcp_skb_pcount(skb);
+			tp->fackets_out = cnt;
+		}
+	}
+	tcp_sync_left_out(tp);
+
+	tp->reordering = min_t(unsigned int, tp->reordering,
+					     sysctl_tcp_reordering);
+	tcp_set_ca_state(tp, TCP_CA_Loss);
+	tp->high_seq = tp->snd_nxt;
+	TCP_ECN_queue_cwr(tp);
+}
+
+static int tcp_check_sack_reneging(struct sock *sk, struct tcp_sock *tp)
+{
+	struct sk_buff *skb;
+
+	/* If ACK arrived pointing to a remembered SACK,
+	 * it means that our remembered SACKs do not reflect
+	 * real state of receiver i.e.
+	 * receiver _host_ is heavily congested (or buggy).
+	 * Do processing similar to RTO timeout.
+	 */
+	if ((skb = skb_peek(&sk->sk_write_queue)) != NULL &&
+	    (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
+		NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING);
+
+		tcp_enter_loss(sk, 1);
+		tp->retransmits++;
+		tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue));
+		tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+		return 1;
+	}
+	return 0;
+}
+
+static inline int tcp_fackets_out(struct tcp_sock *tp)
+{
+	return IsReno(tp) ? tp->sacked_out+1 : tp->fackets_out;
+}
+
+static inline int tcp_skb_timedout(struct tcp_sock *tp, struct sk_buff *skb)
+{
+	return (tcp_time_stamp - TCP_SKB_CB(skb)->when > tp->rto);
+}
+
+static inline int tcp_head_timedout(struct sock *sk, struct tcp_sock *tp)
+{
+	return tp->packets_out &&
+	       tcp_skb_timedout(tp, skb_peek(&sk->sk_write_queue));
+}
+
+/* Linux NewReno/SACK/FACK/ECN state machine.
+ * --------------------------------------
+ *
+ * "Open"	Normal state, no dubious events, fast path.
+ * "Disorder"   In all the respects it is "Open",
+ *		but requires a bit more attention. It is entered when
+ *		we see some SACKs or dupacks. It is split of "Open"
+ *		mainly to move some processing from fast path to slow one.
+ * "CWR"	CWND was reduced due to some Congestion Notification event.
+ *		It can be ECN, ICMP source quench, local device congestion.
+ * "Recovery"	CWND was reduced, we are fast-retransmitting.
+ * "Loss"	CWND was reduced due to RTO timeout or SACK reneging.
+ *
+ * tcp_fastretrans_alert() is entered:
+ * - each incoming ACK, if state is not "Open"
+ * - when arrived ACK is unusual, namely:
+ *	* SACK
+ *	* Duplicate ACK.
+ *	* ECN ECE.
+ *
+ * Counting packets in flight is pretty simple.
+ *
+ *	in_flight = packets_out - left_out + retrans_out
+ *
+ *	packets_out is SND.NXT-SND.UNA counted in packets.
+ *
+ *	retrans_out is number of retransmitted segments.
+ *
+ *	left_out is number of segments left network, but not ACKed yet.
+ *
+ *		left_out = sacked_out + lost_out
+ *
+ *     sacked_out: Packets, which arrived to receiver out of order
+ *		   and hence not ACKed. With SACKs this number is simply
+ *		   amount of SACKed data. Even without SACKs
+ *		   it is easy to give pretty reliable estimate of this number,
+ *		   counting duplicate ACKs.
+ *
+ *       lost_out: Packets lost by network. TCP has no explicit
+ *		   "loss notification" feedback from network (for now).
+ *		   It means that this number can be only _guessed_.
+ *		   Actually, it is the heuristics to predict lossage that
+ *		   distinguishes different algorithms.
+ *
+ *	F.e. after RTO, when all the queue is considered as lost,
+ *	lost_out = packets_out and in_flight = retrans_out.
+ *
+ *		Essentially, we have now two algorithms counting
+ *		lost packets.
+ *
+ *		FACK: It is the simplest heuristics. As soon as we decided
+ *		that something is lost, we decide that _all_ not SACKed
+ *		packets until the most forward SACK are lost. I.e.
+ *		lost_out = fackets_out - sacked_out and left_out = fackets_out.
+ *		It is absolutely correct estimate, if network does not reorder
+ *		packets. And it loses any connection to reality when reordering
+ *		takes place. We use FACK by default until reordering
+ *		is suspected on the path to this destination.
+ *
+ *		NewReno: when Recovery is entered, we assume that one segment
+ *		is lost (classic Reno). While we are in Recovery and
+ *		a partial ACK arrives, we assume that one more packet
+ *		is lost (NewReno). This heuristics are the same in NewReno
+ *		and SACK.
+ *
+ *  Imagine, that's all! Forget about all this shamanism about CWND inflation
+ *  deflation etc. CWND is real congestion window, never inflated, changes
+ *  only according to classic VJ rules.
+ *
+ * Really tricky (and requiring careful tuning) part of algorithm
+ * is hidden in functions tcp_time_to_recover() and tcp_xmit_retransmit_queue().
+ * The first determines the moment _when_ we should reduce CWND and,
+ * hence, slow down forward transmission. In fact, it determines the moment
+ * when we decide that hole is caused by loss, rather than by a reorder.
+ *
+ * tcp_xmit_retransmit_queue() decides, _what_ we should retransmit to fill
+ * holes, caused by lost packets.
+ *
+ * And the most logically complicated part of algorithm is undo
+ * heuristics. We detect false retransmits due to both too early
+ * fast retransmit (reordering) and underestimated RTO, analyzing
+ * timestamps and D-SACKs. When we detect that some segments were
+ * retransmitted by mistake and CWND reduction was wrong, we undo
+ * window reduction and abort recovery phase. This logic is hidden
+ * inside several functions named tcp_try_undo_<something>.
+ */
+
+/* This function decides, when we should leave Disordered state
+ * and enter Recovery phase, reducing congestion window.
+ *
+ * Main question: may we further continue forward transmission
+ * with the same cwnd?
+ */
+static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp)
+{
+	__u32 packets_out;
+
+	/* Trick#1: The loss is proven. */
+	if (tp->lost_out)
+		return 1;
+
+	/* Not-A-Trick#2 : Classic rule... */
+	if (tcp_fackets_out(tp) > tp->reordering)
+		return 1;
+
+	/* Trick#3 : when we use RFC2988 timer restart, fast
+	 * retransmit can be triggered by timeout of queue head.
+	 */
+	if (tcp_head_timedout(sk, tp))
+		return 1;
+
+	/* Trick#4: It is still not OK... But will it be useful to delay
+	 * recovery more?
+	 */
+	packets_out = tp->packets_out;
+	if (packets_out <= tp->reordering &&
+	    tp->sacked_out >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) &&
+	    !tcp_may_send_now(sk, tp)) {
+		/* We have nothing to send. This connection is limited
+		 * either by receiver window or by application.
+		 */
+		return 1;
+	}
+
+	return 0;
+}
+
+/* If we receive more dupacks than we expected counting segments
+ * in assumption of absent reordering, interpret this as reordering.
+ * The only another reason could be bug in receiver TCP.
+ */
+static void tcp_check_reno_reordering(struct tcp_sock *tp, int addend)
+{
+	u32 holes;
+
+	holes = max(tp->lost_out, 1U);
+	holes = min(holes, tp->packets_out);
+
+	if ((tp->sacked_out + holes) > tp->packets_out) {
+		tp->sacked_out = tp->packets_out - holes;
+		tcp_update_reordering(tp, tp->packets_out+addend, 0);
+	}
+}
+
+/* Emulate SACKs for SACKless connection: account for a new dupack. */
+
+static void tcp_add_reno_sack(struct tcp_sock *tp)
+{
+	tp->sacked_out++;
+	tcp_check_reno_reordering(tp, 0);
+	tcp_sync_left_out(tp);
+}
+
+/* Account for ACK, ACKing some data in Reno Recovery phase. */
+
+static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_sock *tp, int acked)
+{
+	if (acked > 0) {
+		/* One ACK acked hole. The rest eat duplicate ACKs. */
+		if (acked-1 >= tp->sacked_out)
+			tp->sacked_out = 0;
+		else
+			tp->sacked_out -= acked-1;
+	}
+	tcp_check_reno_reordering(tp, acked);
+	tcp_sync_left_out(tp);
+}
+
+static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
+{
+	tp->sacked_out = 0;
+	tp->left_out = tp->lost_out;
+}
+
+/* Mark head of queue up as lost. */
+static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
+			       int packets, u32 high_seq)
+{
+	struct sk_buff *skb;
+	int cnt = packets;
+
+	BUG_TRAP(cnt <= tp->packets_out);
+
+	sk_stream_for_retrans_queue(skb, sk) {
+		cnt -= tcp_skb_pcount(skb);
+		if (cnt < 0 || after(TCP_SKB_CB(skb)->end_seq, high_seq))
+			break;
+		if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
+			TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
+			tp->lost_out += tcp_skb_pcount(skb);
+		}
+	}
+	tcp_sync_left_out(tp);
+}
+
+/* Account newly detected lost packet(s) */
+
+static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp)
+{
+	if (IsFack(tp)) {
+		int lost = tp->fackets_out - tp->reordering;
+		if (lost <= 0)
+			lost = 1;
+		tcp_mark_head_lost(sk, tp, lost, tp->high_seq);
+	} else {
+		tcp_mark_head_lost(sk, tp, 1, tp->high_seq);
+	}
+
+	/* New heuristics: it is possible only after we switched
+	 * to restart timer each time when something is ACKed.
+	 * Hence, we can detect timed out packets during fast
+	 * retransmit without falling to slow start.
+	 */
+	if (tcp_head_timedout(sk, tp)) {
+		struct sk_buff *skb;
+
+		sk_stream_for_retrans_queue(skb, sk) {
+			if (tcp_skb_timedout(tp, skb) &&
+			    !(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
+				TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
+				tp->lost_out += tcp_skb_pcount(skb);
+			}
+		}
+		tcp_sync_left_out(tp);
+	}
+}
+
+/* CWND moderation, preventing bursts due to too big ACKs
+ * in dubious situations.
+ */
+static inline void tcp_moderate_cwnd(struct tcp_sock *tp)
+{
+	tp->snd_cwnd = min(tp->snd_cwnd,
+			   tcp_packets_in_flight(tp)+tcp_max_burst(tp));
+	tp->snd_cwnd_stamp = tcp_time_stamp;
+}
+
+/* Decrease cwnd each second ack. */
+
+static void tcp_cwnd_down(struct tcp_sock *tp)
+{
+	int decr = tp->snd_cwnd_cnt + 1;
+	__u32 limit;
+
+	/*
+	 * TCP Westwood
+	 * Here limit is evaluated as BWestimation*RTTmin (for obtaining it
+	 * in packets we use mss_cache). If sysctl_tcp_westwood is off
+	 * tcp_westwood_bw_rttmin() returns 0. In such case snd_ssthresh is
+	 * still used as usual. It prevents other strange cases in which
+	 * BWE*RTTmin could assume value 0. It should not happen but...
+	 */
+
+	if (!(limit = tcp_westwood_bw_rttmin(tp)))
+		limit = tp->snd_ssthresh/2;
+
+	tp->snd_cwnd_cnt = decr&1;
+	decr >>= 1;
+
+	if (decr && tp->snd_cwnd > limit)
+		tp->snd_cwnd -= decr;
+
+	tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp)+1);
+	tp->snd_cwnd_stamp = tcp_time_stamp;
+}
+
+/* Nothing was retransmitted or returned timestamp is less
+ * than timestamp of the first retransmission.
+ */
+static inline int tcp_packet_delayed(struct tcp_sock *tp)
+{
+	return !tp->retrans_stamp ||
+		(tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
+		 (__s32)(tp->rx_opt.rcv_tsecr - tp->retrans_stamp) < 0);
+}
+
+/* Undo procedures. */
+
+#if FASTRETRANS_DEBUG > 1
+static void DBGUNDO(struct sock *sk, struct tcp_sock *tp, const char *msg)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	printk(KERN_DEBUG "Undo %s %u.%u.%u.%u/%u c%u l%u ss%u/%u p%u\n",
+	       msg,
+	       NIPQUAD(inet->daddr), ntohs(inet->dport),
+	       tp->snd_cwnd, tp->left_out,
+	       tp->snd_ssthresh, tp->prior_ssthresh,
+	       tp->packets_out);
+}
+#else
+#define DBGUNDO(x...) do { } while (0)
+#endif
+
+static void tcp_undo_cwr(struct tcp_sock *tp, int undo)
+{
+	if (tp->prior_ssthresh) {
+		if (tcp_is_bic(tp))
+			tp->snd_cwnd = max(tp->snd_cwnd, tp->bictcp.last_max_cwnd);
+		else
+			tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh<<1);
+
+		if (undo && tp->prior_ssthresh > tp->snd_ssthresh) {
+			tp->snd_ssthresh = tp->prior_ssthresh;
+			TCP_ECN_withdraw_cwr(tp);
+		}
+	} else {
+		tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
+	}
+	tcp_moderate_cwnd(tp);
+	tp->snd_cwnd_stamp = tcp_time_stamp;
+}
+
+static inline int tcp_may_undo(struct tcp_sock *tp)
+{
+	return tp->undo_marker &&
+		(!tp->undo_retrans || tcp_packet_delayed(tp));
+}
+
+/* People celebrate: "We love our President!" */
+static int tcp_try_undo_recovery(struct sock *sk, struct tcp_sock *tp)
+{
+	if (tcp_may_undo(tp)) {
+		/* Happy end! We did not retransmit anything
+		 * or our original transmission succeeded.
+		 */
+		DBGUNDO(sk, tp, tp->ca_state == TCP_CA_Loss ? "loss" : "retrans");
+		tcp_undo_cwr(tp, 1);
+		if (tp->ca_state == TCP_CA_Loss)
+			NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO);
+		else
+			NET_INC_STATS_BH(LINUX_MIB_TCPFULLUNDO);
+		tp->undo_marker = 0;
+	}
+	if (tp->snd_una == tp->high_seq && IsReno(tp)) {
+		/* Hold old state until something *above* high_seq
+		 * is ACKed. For Reno it is MUST to prevent false
+		 * fast retransmits (RFC2582). SACK TCP is safe. */
+		tcp_moderate_cwnd(tp);
+		return 1;
+	}
+	tcp_set_ca_state(tp, TCP_CA_Open);
+	return 0;
+}
+
+/* Try to undo cwnd reduction, because D-SACKs acked all retransmitted data */
+static void tcp_try_undo_dsack(struct sock *sk, struct tcp_sock *tp)
+{
+	if (tp->undo_marker && !tp->undo_retrans) {
+		DBGUNDO(sk, tp, "D-SACK");
+		tcp_undo_cwr(tp, 1);
+		tp->undo_marker = 0;
+		NET_INC_STATS_BH(LINUX_MIB_TCPDSACKUNDO);
+	}
+}
+
+/* Undo during fast recovery after partial ACK. */
+
+static int tcp_try_undo_partial(struct sock *sk, struct tcp_sock *tp,
+				int acked)
+{
+	/* Partial ACK arrived. Force Hoe's retransmit. */
+	int failed = IsReno(tp) || tp->fackets_out>tp->reordering;
+
+	if (tcp_may_undo(tp)) {
+		/* Plain luck! Hole if filled with delayed
+		 * packet, rather than with a retransmit.
+		 */
+		if (tp->retrans_out == 0)
+			tp->retrans_stamp = 0;
+
+		tcp_update_reordering(tp, tcp_fackets_out(tp)+acked, 1);
+
+		DBGUNDO(sk, tp, "Hoe");
+		tcp_undo_cwr(tp, 0);
+		NET_INC_STATS_BH(LINUX_MIB_TCPPARTIALUNDO);
+
+		/* So... Do not make Hoe's retransmit yet.
+		 * If the first packet was delayed, the rest
+		 * ones are most probably delayed as well.
+		 */
+		failed = 0;
+	}
+	return failed;
+}
+
+/* Undo during loss recovery after partial ACK. */
+static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp)
+{
+	if (tcp_may_undo(tp)) {
+		struct sk_buff *skb;
+		sk_stream_for_retrans_queue(skb, sk) {
+			TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
+		}
+		DBGUNDO(sk, tp, "partial loss");
+		tp->lost_out = 0;
+		tp->left_out = tp->sacked_out;
+		tcp_undo_cwr(tp, 1);
+		NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO);
+		tp->retransmits = 0;
+		tp->undo_marker = 0;
+		if (!IsReno(tp))
+			tcp_set_ca_state(tp, TCP_CA_Open);
+		return 1;
+	}
+	return 0;
+}
+
+static inline void tcp_complete_cwr(struct tcp_sock *tp)
+{
+	if (tcp_westwood_cwnd(tp)) 
+		tp->snd_ssthresh = tp->snd_cwnd;
+	else
+		tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
+	tp->snd_cwnd_stamp = tcp_time_stamp;
+}
+
+static void tcp_try_to_open(struct sock *sk, struct tcp_sock *tp, int flag)
+{
+	tp->left_out = tp->sacked_out;
+
+	if (tp->retrans_out == 0)
+		tp->retrans_stamp = 0;
+
+	if (flag&FLAG_ECE)
+		tcp_enter_cwr(tp);
+
+	if (tp->ca_state != TCP_CA_CWR) {
+		int state = TCP_CA_Open;
+
+		if (tp->left_out || tp->retrans_out || tp->undo_marker)
+			state = TCP_CA_Disorder;
+
+		if (tp->ca_state != state) {
+			tcp_set_ca_state(tp, state);
+			tp->high_seq = tp->snd_nxt;
+		}
+		tcp_moderate_cwnd(tp);
+	} else {
+		tcp_cwnd_down(tp);
+	}
+}
+
+/* Process an event, which can update packets-in-flight not trivially.
+ * Main goal of this function is to calculate new estimate for left_out,
+ * taking into account both packets sitting in receiver's buffer and
+ * packets lost by network.
+ *
+ * Besides that it does CWND reduction, when packet loss is detected
+ * and changes state of machine.
+ *
+ * It does _not_ decide what to send, it is made in function
+ * tcp_xmit_retransmit_queue().
+ */
+static void
+tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
+		      int prior_packets, int flag)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	int is_dupack = (tp->snd_una == prior_snd_una && !(flag&FLAG_NOT_DUP));
+
+	/* Some technical things:
+	 * 1. Reno does not count dupacks (sacked_out) automatically. */
+	if (!tp->packets_out)
+		tp->sacked_out = 0;
+        /* 2. SACK counts snd_fack in packets inaccurately. */
+	if (tp->sacked_out == 0)
+		tp->fackets_out = 0;
+
+        /* Now state machine starts.
+	 * A. ECE, hence prohibit cwnd undoing, the reduction is required. */
+	if (flag&FLAG_ECE)
+		tp->prior_ssthresh = 0;
+
+	/* B. In all the states check for reneging SACKs. */
+	if (tp->sacked_out && tcp_check_sack_reneging(sk, tp))
+		return;
+
+	/* C. Process data loss notification, provided it is valid. */
+	if ((flag&FLAG_DATA_LOST) &&
+	    before(tp->snd_una, tp->high_seq) &&
+	    tp->ca_state != TCP_CA_Open &&
+	    tp->fackets_out > tp->reordering) {
+		tcp_mark_head_lost(sk, tp, tp->fackets_out-tp->reordering, tp->high_seq);
+		NET_INC_STATS_BH(LINUX_MIB_TCPLOSS);
+	}
+
+	/* D. Synchronize left_out to current state. */
+	tcp_sync_left_out(tp);
+
+	/* E. Check state exit conditions. State can be terminated
+	 *    when high_seq is ACKed. */
+	if (tp->ca_state == TCP_CA_Open) {
+		if (!sysctl_tcp_frto)
+			BUG_TRAP(tp->retrans_out == 0);
+		tp->retrans_stamp = 0;
+	} else if (!before(tp->snd_una, tp->high_seq)) {
+		switch (tp->ca_state) {
+		case TCP_CA_Loss:
+			tp->retransmits = 0;
+			if (tcp_try_undo_recovery(sk, tp))
+				return;
+			break;
+
+		case TCP_CA_CWR:
+			/* CWR is to be held something *above* high_seq
+			 * is ACKed for CWR bit to reach receiver. */
+			if (tp->snd_una != tp->high_seq) {
+				tcp_complete_cwr(tp);
+				tcp_set_ca_state(tp, TCP_CA_Open);
+			}
+			break;
+
+		case TCP_CA_Disorder:
+			tcp_try_undo_dsack(sk, tp);
+			if (!tp->undo_marker ||
+			    /* For SACK case do not Open to allow to undo
+			     * catching for all duplicate ACKs. */
+			    IsReno(tp) || tp->snd_una != tp->high_seq) {
+				tp->undo_marker = 0;
+				tcp_set_ca_state(tp, TCP_CA_Open);
+			}
+			break;
+
+		case TCP_CA_Recovery:
+			if (IsReno(tp))
+				tcp_reset_reno_sack(tp);
+			if (tcp_try_undo_recovery(sk, tp))
+				return;
+			tcp_complete_cwr(tp);
+			break;
+		}
+	}
+
+	/* F. Process state. */
+	switch (tp->ca_state) {
+	case TCP_CA_Recovery:
+		if (prior_snd_una == tp->snd_una) {
+			if (IsReno(tp) && is_dupack)
+				tcp_add_reno_sack(tp);
+		} else {
+			int acked = prior_packets - tp->packets_out;
+			if (IsReno(tp))
+				tcp_remove_reno_sacks(sk, tp, acked);
+			is_dupack = tcp_try_undo_partial(sk, tp, acked);
+		}
+		break;
+	case TCP_CA_Loss:
+		if (flag&FLAG_DATA_ACKED)
+			tp->retransmits = 0;
+		if (!tcp_try_undo_loss(sk, tp)) {
+			tcp_moderate_cwnd(tp);
+			tcp_xmit_retransmit_queue(sk);
+			return;
+		}
+		if (tp->ca_state != TCP_CA_Open)
+			return;
+		/* Loss is undone; fall through to processing in Open state. */
+	default:
+		if (IsReno(tp)) {
+			if (tp->snd_una != prior_snd_una)
+				tcp_reset_reno_sack(tp);
+			if (is_dupack)
+				tcp_add_reno_sack(tp);
+		}
+
+		if (tp->ca_state == TCP_CA_Disorder)
+			tcp_try_undo_dsack(sk, tp);
+
+		if (!tcp_time_to_recover(sk, tp)) {
+			tcp_try_to_open(sk, tp, flag);
+			return;
+		}
+
+		/* Otherwise enter Recovery state */
+
+		if (IsReno(tp))
+			NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERY);
+		else
+			NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERY);
+
+		tp->high_seq = tp->snd_nxt;
+		tp->prior_ssthresh = 0;
+		tp->undo_marker = tp->snd_una;
+		tp->undo_retrans = tp->retrans_out;
+
+		if (tp->ca_state < TCP_CA_CWR) {
+			if (!(flag&FLAG_ECE))
+				tp->prior_ssthresh = tcp_current_ssthresh(tp);
+			tp->snd_ssthresh = tcp_recalc_ssthresh(tp);
+			TCP_ECN_queue_cwr(tp);
+		}
+
+		tp->snd_cwnd_cnt = 0;
+		tcp_set_ca_state(tp, TCP_CA_Recovery);
+	}
+
+	if (is_dupack || tcp_head_timedout(sk, tp))
+		tcp_update_scoreboard(sk, tp);
+	tcp_cwnd_down(tp);
+	tcp_xmit_retransmit_queue(sk);
+}
+
+/* Read draft-ietf-tcplw-high-performance before mucking
+ * with this code. (Superceeds RFC1323)
+ */
+static void tcp_ack_saw_tstamp(struct tcp_sock *tp, int flag)
+{
+	__u32 seq_rtt;
+
+	/* RTTM Rule: A TSecr value received in a segment is used to
+	 * update the averaged RTT measurement only if the segment
+	 * acknowledges some new data, i.e., only if it advances the
+	 * left edge of the send window.
+	 *
+	 * See draft-ietf-tcplw-high-performance-00, section 3.3.
+	 * 1998/04/10 Andrey V. Savochkin <saw@msu.ru>
+	 *
+	 * Changed: reset backoff as soon as we see the first valid sample.
+	 * If we do not, we get strongly overstimated rto. With timestamps
+	 * samples are accepted even from very old segments: f.e., when rtt=1
+	 * increases to 8, we retransmit 5 times and after 8 seconds delayed
+	 * answer arrives rto becomes 120 seconds! If at least one of segments
+	 * in window is lost... Voila.	 			--ANK (010210)
+	 */
+	seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
+	tcp_rtt_estimator(tp, seq_rtt);
+	tcp_set_rto(tp);
+	tp->backoff = 0;
+	tcp_bound_rto(tp);
+}
+
+static void tcp_ack_no_tstamp(struct tcp_sock *tp, u32 seq_rtt, int flag)
+{
+	/* We don't have a timestamp. Can only use
+	 * packets that are not retransmitted to determine
+	 * rtt estimates. Also, we must not reset the
+	 * backoff for rto until we get a non-retransmitted
+	 * packet. This allows us to deal with a situation
+	 * where the network delay has increased suddenly.
+	 * I.e. Karn's algorithm. (SIGCOMM '87, p5.)
+	 */
+
+	if (flag & FLAG_RETRANS_DATA_ACKED)
+		return;
+
+	tcp_rtt_estimator(tp, seq_rtt);
+	tcp_set_rto(tp);
+	tp->backoff = 0;
+	tcp_bound_rto(tp);
+}
+
+static inline void tcp_ack_update_rtt(struct tcp_sock *tp,
+				      int flag, s32 seq_rtt)
+{
+	/* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */
+	if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
+		tcp_ack_saw_tstamp(tp, flag);
+	else if (seq_rtt >= 0)
+		tcp_ack_no_tstamp(tp, seq_rtt, flag);
+}
+
+/*
+ * Compute congestion window to use.
+ *
+ * This is from the implementation of BICTCP in
+ * Lison-Xu, Kahaled Harfoush, and Injog Rhee.
+ *  "Binary Increase Congestion Control for Fast, Long Distance
+ *  Networks" in InfoComm 2004
+ * Available from:
+ *  http://www.csc.ncsu.edu/faculty/rhee/export/bitcp.pdf
+ *
+ * Unless BIC is enabled and congestion window is large
+ * this behaves the same as the original Reno.
+ */
+static inline __u32 bictcp_cwnd(struct tcp_sock *tp)
+{
+	/* orignal Reno behaviour */
+	if (!tcp_is_bic(tp))
+		return tp->snd_cwnd;
+
+	if (tp->bictcp.last_cwnd == tp->snd_cwnd &&
+	   (s32)(tcp_time_stamp - tp->bictcp.last_stamp) <= (HZ>>5))
+		return tp->bictcp.cnt;
+
+	tp->bictcp.last_cwnd = tp->snd_cwnd;
+	tp->bictcp.last_stamp = tcp_time_stamp;
+      
+	/* start off normal */
+	if (tp->snd_cwnd <= sysctl_tcp_bic_low_window)
+		tp->bictcp.cnt = tp->snd_cwnd;
+
+	/* binary increase */
+	else if (tp->snd_cwnd < tp->bictcp.last_max_cwnd) {
+		__u32 	dist = (tp->bictcp.last_max_cwnd - tp->snd_cwnd)
+			/ BICTCP_B;
+
+		if (dist > BICTCP_MAX_INCREMENT)
+			/* linear increase */
+			tp->bictcp.cnt = tp->snd_cwnd / BICTCP_MAX_INCREMENT;
+		else if (dist <= 1U)
+			/* binary search increase */
+			tp->bictcp.cnt = tp->snd_cwnd * BICTCP_FUNC_OF_MIN_INCR
+				/ BICTCP_B;
+		else
+			/* binary search increase */
+			tp->bictcp.cnt = tp->snd_cwnd / dist;
+	} else {
+		/* slow start amd linear increase */
+		if (tp->snd_cwnd < tp->bictcp.last_max_cwnd + BICTCP_B)
+			/* slow start */
+			tp->bictcp.cnt = tp->snd_cwnd * BICTCP_FUNC_OF_MIN_INCR
+				/ BICTCP_B;
+		else if (tp->snd_cwnd < tp->bictcp.last_max_cwnd
+			 		+ BICTCP_MAX_INCREMENT*(BICTCP_B-1))
+			/* slow start */
+			tp->bictcp.cnt = tp->snd_cwnd * (BICTCP_B-1)
+				/ (tp->snd_cwnd-tp->bictcp.last_max_cwnd);
+		else
+			/* linear increase */
+			tp->bictcp.cnt = tp->snd_cwnd / BICTCP_MAX_INCREMENT;
+	}
+	return tp->bictcp.cnt;
+}
+
+/* This is Jacobson's slow start and congestion avoidance. 
+ * SIGCOMM '88, p. 328.
+ */
+static inline void reno_cong_avoid(struct tcp_sock *tp)
+{
+        if (tp->snd_cwnd <= tp->snd_ssthresh) {
+                /* In "safe" area, increase. */
+		if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+			tp->snd_cwnd++;
+	} else {
+                /* In dangerous area, increase slowly.
+		 * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
+		 */
+		if (tp->snd_cwnd_cnt >= bictcp_cwnd(tp)) {
+			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+				tp->snd_cwnd++;
+			tp->snd_cwnd_cnt=0;
+		} else
+			tp->snd_cwnd_cnt++;
+        }
+	tp->snd_cwnd_stamp = tcp_time_stamp;
+}
+
+/* This is based on the congestion detection/avoidance scheme described in
+ *    Lawrence S. Brakmo and Larry L. Peterson.
+ *    "TCP Vegas: End to end congestion avoidance on a global internet."
+ *    IEEE Journal on Selected Areas in Communication, 13(8):1465--1480,
+ *    October 1995. Available from:
+ *	ftp://ftp.cs.arizona.edu/xkernel/Papers/jsac.ps
+ *
+ * See http://www.cs.arizona.edu/xkernel/ for their implementation.
+ * The main aspects that distinguish this implementation from the
+ * Arizona Vegas implementation are:
+ *   o We do not change the loss detection or recovery mechanisms of
+ *     Linux in any way. Linux already recovers from losses quite well,
+ *     using fine-grained timers, NewReno, and FACK.
+ *   o To avoid the performance penalty imposed by increasing cwnd
+ *     only every-other RTT during slow start, we increase during
+ *     every RTT during slow start, just like Reno.
+ *   o Largely to allow continuous cwnd growth during slow start,
+ *     we use the rate at which ACKs come back as the "actual"
+ *     rate, rather than the rate at which data is sent.
+ *   o To speed convergence to the right rate, we set the cwnd
+ *     to achieve the right ("actual") rate when we exit slow start.
+ *   o To filter out the noise caused by delayed ACKs, we use the
+ *     minimum RTT sample observed during the last RTT to calculate
+ *     the actual rate.
+ *   o When the sender re-starts from idle, it waits until it has
+ *     received ACKs for an entire flight of new data before making
+ *     a cwnd adjustment decision. The original Vegas implementation
+ *     assumed senders never went idle.
+ */
+static void vegas_cong_avoid(struct tcp_sock *tp, u32 ack, u32 seq_rtt)
+{
+	/* The key players are v_beg_snd_una and v_beg_snd_nxt.
+	 *
+	 * These are so named because they represent the approximate values
+	 * of snd_una and snd_nxt at the beginning of the current RTT. More
+	 * precisely, they represent the amount of data sent during the RTT.
+	 * At the end of the RTT, when we receive an ACK for v_beg_snd_nxt,
+	 * we will calculate that (v_beg_snd_nxt - v_beg_snd_una) outstanding
+	 * bytes of data have been ACKed during the course of the RTT, giving
+	 * an "actual" rate of:
+	 *
+	 *     (v_beg_snd_nxt - v_beg_snd_una) / (rtt duration)
+	 *
+	 * Unfortunately, v_beg_snd_una is not exactly equal to snd_una,
+	 * because delayed ACKs can cover more than one segment, so they
+	 * don't line up nicely with the boundaries of RTTs.
+	 *
+	 * Another unfortunate fact of life is that delayed ACKs delay the
+	 * advance of the left edge of our send window, so that the number
+	 * of bytes we send in an RTT is often less than our cwnd will allow.
+	 * So we keep track of our cwnd separately, in v_beg_snd_cwnd.
+	 */
+
+	if (after(ack, tp->vegas.beg_snd_nxt)) {
+		/* Do the Vegas once-per-RTT cwnd adjustment. */
+		u32 old_wnd, old_snd_cwnd;
+
+		
+		/* Here old_wnd is essentially the window of data that was
+		 * sent during the previous RTT, and has all
+		 * been acknowledged in the course of the RTT that ended
+		 * with the ACK we just received. Likewise, old_snd_cwnd
+		 * is the cwnd during the previous RTT.
+		 */
+		old_wnd = (tp->vegas.beg_snd_nxt - tp->vegas.beg_snd_una) /
+			tp->mss_cache_std;
+		old_snd_cwnd = tp->vegas.beg_snd_cwnd;
+
+		/* Save the extent of the current window so we can use this
+		 * at the end of the next RTT.
+		 */
+		tp->vegas.beg_snd_una  = tp->vegas.beg_snd_nxt;
+		tp->vegas.beg_snd_nxt  = tp->snd_nxt;
+		tp->vegas.beg_snd_cwnd = tp->snd_cwnd;
+
+		/* Take into account the current RTT sample too, to
+		 * decrease the impact of delayed acks. This double counts
+		 * this sample since we count it for the next window as well,
+		 * but that's not too awful, since we're taking the min,
+		 * rather than averaging.
+		 */
+		vegas_rtt_calc(tp, seq_rtt);
+
+		/* We do the Vegas calculations only if we got enough RTT
+		 * samples that we can be reasonably sure that we got
+		 * at least one RTT sample that wasn't from a delayed ACK.
+		 * If we only had 2 samples total,
+		 * then that means we're getting only 1 ACK per RTT, which
+		 * means they're almost certainly delayed ACKs.
+		 * If  we have 3 samples, we should be OK.
+		 */
+
+		if (tp->vegas.cntRTT <= 2) {
+			/* We don't have enough RTT samples to do the Vegas
+			 * calculation, so we'll behave like Reno.
+			 */
+			if (tp->snd_cwnd > tp->snd_ssthresh)
+				tp->snd_cwnd++;
+		} else {
+			u32 rtt, target_cwnd, diff;
+
+			/* We have enough RTT samples, so, using the Vegas
+			 * algorithm, we determine if we should increase or
+			 * decrease cwnd, and by how much.
+			 */
+
+			/* Pluck out the RTT we are using for the Vegas
+			 * calculations. This is the min RTT seen during the
+			 * last RTT. Taking the min filters out the effects
+			 * of delayed ACKs, at the cost of noticing congestion
+			 * a bit later.
+			 */
+			rtt = tp->vegas.minRTT;
+
+			/* Calculate the cwnd we should have, if we weren't
+			 * going too fast.
+			 *
+			 * This is:
+			 *     (actual rate in segments) * baseRTT
+			 * We keep it as a fixed point number with
+			 * V_PARAM_SHIFT bits to the right of the binary point.
+			 */
+			target_cwnd = ((old_wnd * tp->vegas.baseRTT)
+				       << V_PARAM_SHIFT) / rtt;
+
+			/* Calculate the difference between the window we had,
+			 * and the window we would like to have. This quantity
+			 * is the "Diff" from the Arizona Vegas papers.
+			 *
+			 * Again, this is a fixed point number with
+			 * V_PARAM_SHIFT bits to the right of the binary
+			 * point.
+			 */
+			diff = (old_wnd << V_PARAM_SHIFT) - target_cwnd;
+
+			if (tp->snd_cwnd < tp->snd_ssthresh) {
+				/* Slow start.  */
+				if (diff > sysctl_tcp_vegas_gamma) {
+					/* Going too fast. Time to slow down
+					 * and switch to congestion avoidance.
+					 */
+					tp->snd_ssthresh = 2;
+
+					/* Set cwnd to match the actual rate
+					 * exactly:
+					 *   cwnd = (actual rate) * baseRTT
+					 * Then we add 1 because the integer
+					 * truncation robs us of full link
+					 * utilization.
+					 */
+					tp->snd_cwnd = min(tp->snd_cwnd,
+							   (target_cwnd >>
+							    V_PARAM_SHIFT)+1);
+
+				}
+			} else {
+				/* Congestion avoidance. */
+				u32 next_snd_cwnd;
+
+				/* Figure out where we would like cwnd
+				 * to be.
+				 */
+				if (diff > sysctl_tcp_vegas_beta) {
+					/* The old window was too fast, so
+					 * we slow down.
+					 */
+					next_snd_cwnd = old_snd_cwnd - 1;
+				} else if (diff < sysctl_tcp_vegas_alpha) {
+					/* We don't have enough extra packets
+					 * in the network, so speed up.
+					 */
+					next_snd_cwnd = old_snd_cwnd + 1;
+				} else {
+					/* Sending just as fast as we
+					 * should be.
+					 */
+					next_snd_cwnd = old_snd_cwnd;
+				}
+
+				/* Adjust cwnd upward or downward, toward the
+				 * desired value.
+				 */
+				if (next_snd_cwnd > tp->snd_cwnd)
+					tp->snd_cwnd++;
+				else if (next_snd_cwnd < tp->snd_cwnd)
+					tp->snd_cwnd--;
+			}
+		}
+
+		/* Wipe the slate clean for the next RTT. */
+		tp->vegas.cntRTT = 0;
+		tp->vegas.minRTT = 0x7fffffff;
+	}
+
+	/* The following code is executed for every ack we receive,
+	 * except for conditions checked in should_advance_cwnd()
+	 * before the call to tcp_cong_avoid(). Mainly this means that
+	 * we only execute this code if the ack actually acked some
+	 * data.
+	 */
+
+	/* If we are in slow start, increase our cwnd in response to this ACK.
+	 * (If we are not in slow start then we are in congestion avoidance,
+	 * and adjust our congestion window only once per RTT. See the code
+	 * above.)
+	 */
+	if (tp->snd_cwnd <= tp->snd_ssthresh) 
+		tp->snd_cwnd++;
+
+	/* to keep cwnd from growing without bound */
+	tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp);
+
+	/* Make sure that we are never so timid as to reduce our cwnd below
+	 * 2 MSS.
+	 *
+	 * Going below 2 MSS would risk huge delayed ACKs from our receiver.
+	 */
+	tp->snd_cwnd = max(tp->snd_cwnd, 2U);
+
+	tp->snd_cwnd_stamp = tcp_time_stamp;
+}
+
+static inline void tcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 seq_rtt)
+{
+	if (tcp_vegas_enabled(tp))
+		vegas_cong_avoid(tp, ack, seq_rtt);
+	else
+		reno_cong_avoid(tp);
+}
+
+/* Restart timer after forward progress on connection.
+ * RFC2988 recommends to restart timer to now+rto.
+ */
+
+static inline void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp)
+{
+	if (!tp->packets_out) {
+		tcp_clear_xmit_timer(sk, TCP_TIME_RETRANS);
+	} else {
+		tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+	}
+}
+
+/* There is one downside to this scheme.  Although we keep the
+ * ACK clock ticking, adjusting packet counters and advancing
+ * congestion window, we do not liberate socket send buffer
+ * space.
+ *
+ * Mucking with skb->truesize and sk->sk_wmem_alloc et al.
+ * then making a write space wakeup callback is a possible
+ * future enhancement.  WARNING: it is not trivial to make.
+ */
+static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb,
+			 __u32 now, __s32 *seq_rtt)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct tcp_skb_cb *scb = TCP_SKB_CB(skb); 
+	__u32 seq = tp->snd_una;
+	__u32 packets_acked;
+	int acked = 0;
+
+	/* If we get here, the whole TSO packet has not been
+	 * acked.
+	 */
+	BUG_ON(!after(scb->end_seq, seq));
+
+	packets_acked = tcp_skb_pcount(skb);
+	if (tcp_trim_head(sk, skb, seq - scb->seq))
+		return 0;
+	packets_acked -= tcp_skb_pcount(skb);
+
+	if (packets_acked) {
+		__u8 sacked = scb->sacked;
+
+		acked |= FLAG_DATA_ACKED;
+		if (sacked) {
+			if (sacked & TCPCB_RETRANS) {
+				if (sacked & TCPCB_SACKED_RETRANS)
+					tp->retrans_out -= packets_acked;
+				acked |= FLAG_RETRANS_DATA_ACKED;
+				*seq_rtt = -1;
+			} else if (*seq_rtt < 0)
+				*seq_rtt = now - scb->when;
+			if (sacked & TCPCB_SACKED_ACKED)
+				tp->sacked_out -= packets_acked;
+			if (sacked & TCPCB_LOST)
+				tp->lost_out -= packets_acked;
+			if (sacked & TCPCB_URG) {
+				if (tp->urg_mode &&
+				    !before(seq, tp->snd_up))
+					tp->urg_mode = 0;
+			}
+		} else if (*seq_rtt < 0)
+			*seq_rtt = now - scb->when;
+
+		if (tp->fackets_out) {
+			__u32 dval = min(tp->fackets_out, packets_acked);
+			tp->fackets_out -= dval;
+		}
+		tp->packets_out -= packets_acked;
+
+		BUG_ON(tcp_skb_pcount(skb) == 0);
+		BUG_ON(!before(scb->seq, scb->end_seq));
+	}
+
+	return acked;
+}
+
+
+/* Remove acknowledged frames from the retransmission queue. */
+static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *skb;
+	__u32 now = tcp_time_stamp;
+	int acked = 0;
+	__s32 seq_rtt = -1;
+
+	while ((skb = skb_peek(&sk->sk_write_queue)) &&
+	       skb != sk->sk_send_head) {
+		struct tcp_skb_cb *scb = TCP_SKB_CB(skb); 
+		__u8 sacked = scb->sacked;
+
+		/* If our packet is before the ack sequence we can
+		 * discard it as it's confirmed to have arrived at
+		 * the other end.
+		 */
+		if (after(scb->end_seq, tp->snd_una)) {
+			if (tcp_skb_pcount(skb) > 1)
+				acked |= tcp_tso_acked(sk, skb,
+						       now, &seq_rtt);
+			break;
+		}
+
+		/* Initial outgoing SYN's get put onto the write_queue
+		 * just like anything else we transmit.  It is not
+		 * true data, and if we misinform our callers that
+		 * this ACK acks real data, we will erroneously exit
+		 * connection startup slow start one packet too
+		 * quickly.  This is severely frowned upon behavior.
+		 */
+		if (!(scb->flags & TCPCB_FLAG_SYN)) {
+			acked |= FLAG_DATA_ACKED;
+		} else {
+			acked |= FLAG_SYN_ACKED;
+			tp->retrans_stamp = 0;
+		}
+
+		if (sacked) {
+			if (sacked & TCPCB_RETRANS) {
+				if(sacked & TCPCB_SACKED_RETRANS)
+					tp->retrans_out -= tcp_skb_pcount(skb);
+				acked |= FLAG_RETRANS_DATA_ACKED;
+				seq_rtt = -1;
+			} else if (seq_rtt < 0)
+				seq_rtt = now - scb->when;
+			if (sacked & TCPCB_SACKED_ACKED)
+				tp->sacked_out -= tcp_skb_pcount(skb);
+			if (sacked & TCPCB_LOST)
+				tp->lost_out -= tcp_skb_pcount(skb);
+			if (sacked & TCPCB_URG) {
+				if (tp->urg_mode &&
+				    !before(scb->end_seq, tp->snd_up))
+					tp->urg_mode = 0;
+			}
+		} else if (seq_rtt < 0)
+			seq_rtt = now - scb->when;
+		tcp_dec_pcount_approx(&tp->fackets_out, skb);
+		tcp_packets_out_dec(tp, skb);
+		__skb_unlink(skb, skb->list);
+		sk_stream_free_skb(sk, skb);
+	}
+
+	if (acked&FLAG_ACKED) {
+		tcp_ack_update_rtt(tp, acked, seq_rtt);
+		tcp_ack_packets_out(sk, tp);
+	}
+
+#if FASTRETRANS_DEBUG > 0
+	BUG_TRAP((int)tp->sacked_out >= 0);
+	BUG_TRAP((int)tp->lost_out >= 0);
+	BUG_TRAP((int)tp->retrans_out >= 0);
+	if (!tp->packets_out && tp->rx_opt.sack_ok) {
+		if (tp->lost_out) {
+			printk(KERN_DEBUG "Leak l=%u %d\n",
+			       tp->lost_out, tp->ca_state);
+			tp->lost_out = 0;
+		}
+		if (tp->sacked_out) {
+			printk(KERN_DEBUG "Leak s=%u %d\n",
+			       tp->sacked_out, tp->ca_state);
+			tp->sacked_out = 0;
+		}
+		if (tp->retrans_out) {
+			printk(KERN_DEBUG "Leak r=%u %d\n",
+			       tp->retrans_out, tp->ca_state);
+			tp->retrans_out = 0;
+		}
+	}
+#endif
+	*seq_rtt_p = seq_rtt;
+	return acked;
+}
+
+static void tcp_ack_probe(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	/* Was it a usable window open? */
+
+	if (!after(TCP_SKB_CB(sk->sk_send_head)->end_seq,
+		   tp->snd_una + tp->snd_wnd)) {
+		tp->backoff = 0;
+		tcp_clear_xmit_timer(sk, TCP_TIME_PROBE0);
+		/* Socket must be waked up by subsequent tcp_data_snd_check().
+		 * This function is not for random using!
+		 */
+	} else {
+		tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0,
+				     min(tp->rto << tp->backoff, TCP_RTO_MAX));
+	}
+}
+
+static inline int tcp_ack_is_dubious(struct tcp_sock *tp, int flag)
+{
+	return (!(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
+		tp->ca_state != TCP_CA_Open);
+}
+
+static inline int tcp_may_raise_cwnd(struct tcp_sock *tp, int flag)
+{
+	return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) &&
+		!((1<<tp->ca_state)&(TCPF_CA_Recovery|TCPF_CA_CWR));
+}
+
+/* Check that window update is acceptable.
+ * The function assumes that snd_una<=ack<=snd_next.
+ */
+static inline int tcp_may_update_window(struct tcp_sock *tp, u32 ack,
+					u32 ack_seq, u32 nwin)
+{
+	return (after(ack, tp->snd_una) ||
+		after(ack_seq, tp->snd_wl1) ||
+		(ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd));
+}
+
+/* Update our send window.
+ *
+ * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2
+ * and in FreeBSD. NetBSD's one is even worse.) is wrong.
+ */
+static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp,
+				 struct sk_buff *skb, u32 ack, u32 ack_seq)
+{
+	int flag = 0;
+	u32 nwin = ntohs(skb->h.th->window);
+
+	if (likely(!skb->h.th->syn))
+		nwin <<= tp->rx_opt.snd_wscale;
+
+	if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
+		flag |= FLAG_WIN_UPDATE;
+		tcp_update_wl(tp, ack, ack_seq);
+
+		if (tp->snd_wnd != nwin) {
+			tp->snd_wnd = nwin;
+
+			/* Note, it is the only place, where
+			 * fast path is recovered for sending TCP.
+			 */
+			tcp_fast_path_check(sk, tp);
+
+			if (nwin > tp->max_window) {
+				tp->max_window = nwin;
+				tcp_sync_mss(sk, tp->pmtu_cookie);
+			}
+		}
+	}
+
+	tp->snd_una = ack;
+
+	return flag;
+}
+
+static void tcp_process_frto(struct sock *sk, u32 prior_snd_una)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	
+	tcp_sync_left_out(tp);
+	
+	if (tp->snd_una == prior_snd_una ||
+	    !before(tp->snd_una, tp->frto_highmark)) {
+		/* RTO was caused by loss, start retransmitting in
+		 * go-back-N slow start
+		 */
+		tcp_enter_frto_loss(sk);
+		return;
+	}
+
+	if (tp->frto_counter == 1) {
+		/* First ACK after RTO advances the window: allow two new
+		 * segments out.
+		 */
+		tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
+	} else {
+		/* Also the second ACK after RTO advances the window.
+		 * The RTO was likely spurious. Reduce cwnd and continue
+		 * in congestion avoidance
+		 */
+		tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
+		tcp_moderate_cwnd(tp);
+	}
+
+	/* F-RTO affects on two new ACKs following RTO.
+	 * At latest on third ACK the TCP behavor is back to normal.
+	 */
+	tp->frto_counter = (tp->frto_counter + 1) % 3;
+}
+
+/*
+ * TCP Westwood+
+ */
+
+/*
+ * @init_westwood
+ * This function initializes fields used in TCP Westwood+. We can't
+ * get no information about RTTmin at this time so we simply set it to
+ * TCP_WESTWOOD_INIT_RTT. This value was chosen to be too conservative
+ * since in this way we're sure it will be updated in a consistent
+ * way as soon as possible. It will reasonably happen within the first
+ * RTT period of the connection lifetime.
+ */
+
+static void init_westwood(struct sock *sk)
+{
+        struct tcp_sock *tp = tcp_sk(sk);
+
+        tp->westwood.bw_ns_est = 0;
+        tp->westwood.bw_est = 0;
+        tp->westwood.accounted = 0;
+        tp->westwood.cumul_ack = 0;
+        tp->westwood.rtt_win_sx = tcp_time_stamp;
+        tp->westwood.rtt = TCP_WESTWOOD_INIT_RTT;
+        tp->westwood.rtt_min = TCP_WESTWOOD_INIT_RTT;
+        tp->westwood.snd_una = tp->snd_una;
+}
+
+/*
+ * @westwood_do_filter
+ * Low-pass filter. Implemented using constant coeffients.
+ */
+
+static inline __u32 westwood_do_filter(__u32 a, __u32 b)
+{
+	return (((7 * a) + b) >> 3);
+}
+
+static void westwood_filter(struct sock *sk, __u32 delta)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	tp->westwood.bw_ns_est =
+		westwood_do_filter(tp->westwood.bw_ns_est, 
+				   tp->westwood.bk / delta);
+	tp->westwood.bw_est =
+		westwood_do_filter(tp->westwood.bw_est,
+				   tp->westwood.bw_ns_est);
+}
+
+/* 
+ * @westwood_update_rttmin
+ * It is used to update RTTmin. In this case we MUST NOT use
+ * WESTWOOD_RTT_MIN minimum bound since we could be on a LAN!
+ */
+
+static inline __u32 westwood_update_rttmin(const struct sock *sk)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+	__u32 rttmin = tp->westwood.rtt_min;
+
+	if (tp->westwood.rtt != 0 &&
+	    (tp->westwood.rtt < tp->westwood.rtt_min || !rttmin))
+		rttmin = tp->westwood.rtt;
+
+	return rttmin;
+}
+
+/*
+ * @westwood_acked
+ * Evaluate increases for dk. 
+ */
+
+static inline __u32 westwood_acked(const struct sock *sk)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+
+	return tp->snd_una - tp->westwood.snd_una;
+}
+
+/*
+ * @westwood_new_window
+ * It evaluates if we are receiving data inside the same RTT window as
+ * when we started.
+ * Return value:
+ * It returns 0 if we are still evaluating samples in the same RTT
+ * window, 1 if the sample has to be considered in the next window.
+ */
+
+static int westwood_new_window(const struct sock *sk)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+	__u32 left_bound;
+	__u32 rtt;
+	int ret = 0;
+
+	left_bound = tp->westwood.rtt_win_sx;
+	rtt = max(tp->westwood.rtt, (u32) TCP_WESTWOOD_RTT_MIN);
+
+	/*
+	 * A RTT-window has passed. Be careful since if RTT is less than
+	 * 50ms we don't filter but we continue 'building the sample'.
+	 * This minimum limit was choosen since an estimation on small
+	 * time intervals is better to avoid...
+	 * Obvioulsy on a LAN we reasonably will always have
+	 * right_bound = left_bound + WESTWOOD_RTT_MIN
+         */
+
+	if ((left_bound + rtt) < tcp_time_stamp)
+		ret = 1;
+
+	return ret;
+}
+
+/*
+ * @westwood_update_window
+ * It updates RTT evaluation window if it is the right moment to do
+ * it. If so it calls filter for evaluating bandwidth. 
+ */
+
+static void __westwood_update_window(struct sock *sk, __u32 now)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	__u32 delta = now - tp->westwood.rtt_win_sx;
+
+        if (delta) {
+		if (tp->westwood.rtt)
+			westwood_filter(sk, delta);
+
+		tp->westwood.bk = 0;
+		tp->westwood.rtt_win_sx = tcp_time_stamp;
+	}
+}
+
+
+static void westwood_update_window(struct sock *sk, __u32 now)
+{
+	if (westwood_new_window(sk)) 
+		__westwood_update_window(sk, now);
+}
+
+/*
+ * @__tcp_westwood_fast_bw
+ * It is called when we are in fast path. In particular it is called when
+ * header prediction is successfull. In such case infact update is
+ * straight forward and doesn't need any particular care.
+ */
+
+static void __tcp_westwood_fast_bw(struct sock *sk, struct sk_buff *skb)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	westwood_update_window(sk, tcp_time_stamp);
+
+	tp->westwood.bk += westwood_acked(sk);
+	tp->westwood.snd_una = tp->snd_una;
+	tp->westwood.rtt_min = westwood_update_rttmin(sk);
+}
+
+static inline void tcp_westwood_fast_bw(struct sock *sk, struct sk_buff *skb)
+{
+        if (tcp_is_westwood(tcp_sk(sk)))
+                __tcp_westwood_fast_bw(sk, skb);
+}
+
+
+/*
+ * @westwood_dupack_update
+ * It updates accounted and cumul_ack when receiving a dupack.
+ */
+
+static void westwood_dupack_update(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	tp->westwood.accounted += tp->mss_cache_std;
+	tp->westwood.cumul_ack = tp->mss_cache_std;
+}
+
+static inline int westwood_may_change_cumul(struct tcp_sock *tp)
+{
+	return (tp->westwood.cumul_ack > tp->mss_cache_std);
+}
+
+static inline void westwood_partial_update(struct tcp_sock *tp)
+{
+	tp->westwood.accounted -= tp->westwood.cumul_ack;
+	tp->westwood.cumul_ack = tp->mss_cache_std;
+}
+
+static inline void westwood_complete_update(struct tcp_sock *tp)
+{
+	tp->westwood.cumul_ack -= tp->westwood.accounted;
+	tp->westwood.accounted = 0;
+}
+
+/*
+ * @westwood_acked_count
+ * This function evaluates cumul_ack for evaluating dk in case of
+ * delayed or partial acks.
+ */
+
+static inline __u32 westwood_acked_count(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	tp->westwood.cumul_ack = westwood_acked(sk);
+
+        /* If cumul_ack is 0 this is a dupack since it's not moving
+         * tp->snd_una.
+         */
+        if (!(tp->westwood.cumul_ack))
+                westwood_dupack_update(sk);
+
+        if (westwood_may_change_cumul(tp)) {
+		/* Partial or delayed ack */
+		if (tp->westwood.accounted >= tp->westwood.cumul_ack)
+			westwood_partial_update(tp);
+		else
+			westwood_complete_update(tp);
+	}
+
+	tp->westwood.snd_una = tp->snd_una;
+
+	return tp->westwood.cumul_ack;
+}
+
+
+/*
+ * @__tcp_westwood_slow_bw
+ * It is called when something is going wrong..even if there could
+ * be no problems! Infact a simple delayed packet may trigger a
+ * dupack. But we need to be careful in such case.
+ */
+
+static void __tcp_westwood_slow_bw(struct sock *sk, struct sk_buff *skb)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	westwood_update_window(sk, tcp_time_stamp);
+
+	tp->westwood.bk += westwood_acked_count(sk);
+	tp->westwood.rtt_min = westwood_update_rttmin(sk);
+}
+
+static inline void tcp_westwood_slow_bw(struct sock *sk, struct sk_buff *skb)
+{
+        if (tcp_is_westwood(tcp_sk(sk)))
+                __tcp_westwood_slow_bw(sk, skb);
+}
+
+/* This routine deals with incoming acks, but not outgoing ones. */
+static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	u32 prior_snd_una = tp->snd_una;
+	u32 ack_seq = TCP_SKB_CB(skb)->seq;
+	u32 ack = TCP_SKB_CB(skb)->ack_seq;
+	u32 prior_in_flight;
+	s32 seq_rtt;
+	int prior_packets;
+
+	/* If the ack is newer than sent or older than previous acks
+	 * then we can probably ignore it.
+	 */
+	if (after(ack, tp->snd_nxt))
+		goto uninteresting_ack;
+
+	if (before(ack, prior_snd_una))
+		goto old_ack;
+
+	if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
+		/* Window is constant, pure forward advance.
+		 * No more checks are required.
+		 * Note, we use the fact that SND.UNA>=SND.WL2.
+		 */
+		tcp_update_wl(tp, ack, ack_seq);
+		tp->snd_una = ack;
+		tcp_westwood_fast_bw(sk, skb);
+		flag |= FLAG_WIN_UPDATE;
+
+		NET_INC_STATS_BH(LINUX_MIB_TCPHPACKS);
+	} else {
+		if (ack_seq != TCP_SKB_CB(skb)->end_seq)
+			flag |= FLAG_DATA;
+		else
+			NET_INC_STATS_BH(LINUX_MIB_TCPPUREACKS);
+
+		flag |= tcp_ack_update_window(sk, tp, skb, ack, ack_seq);
+
+		if (TCP_SKB_CB(skb)->sacked)
+			flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
+
+		if (TCP_ECN_rcv_ecn_echo(tp, skb->h.th))
+			flag |= FLAG_ECE;
+
+		tcp_westwood_slow_bw(sk,skb);
+	}
+
+	/* We passed data and got it acked, remove any soft error
+	 * log. Something worked...
+	 */
+	sk->sk_err_soft = 0;
+	tp->rcv_tstamp = tcp_time_stamp;
+	prior_packets = tp->packets_out;
+	if (!prior_packets)
+		goto no_queue;
+
+	prior_in_flight = tcp_packets_in_flight(tp);
+
+	/* See if we can take anything off of the retransmit queue. */
+	flag |= tcp_clean_rtx_queue(sk, &seq_rtt);
+
+	if (tp->frto_counter)
+		tcp_process_frto(sk, prior_snd_una);
+
+	if (tcp_ack_is_dubious(tp, flag)) {
+		/* Advanve CWND, if state allows this. */
+		if ((flag & FLAG_DATA_ACKED) &&
+		    (tcp_vegas_enabled(tp) || prior_in_flight >= tp->snd_cwnd) &&
+		    tcp_may_raise_cwnd(tp, flag))
+			tcp_cong_avoid(tp, ack, seq_rtt);
+		tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag);
+	} else {
+		if ((flag & FLAG_DATA_ACKED) && 
+		    (tcp_vegas_enabled(tp) || prior_in_flight >= tp->snd_cwnd))
+			tcp_cong_avoid(tp, ack, seq_rtt);
+	}
+
+	if ((flag & FLAG_FORWARD_PROGRESS) || !(flag&FLAG_NOT_DUP))
+		dst_confirm(sk->sk_dst_cache);
+
+	return 1;
+
+no_queue:
+	tp->probes_out = 0;
+
+	/* If this ack opens up a zero window, clear backoff.  It was
+	 * being used to time the probes, and is probably far higher than
+	 * it needs to be for normal retransmission.
+	 */
+	if (sk->sk_send_head)
+		tcp_ack_probe(sk);
+	return 1;
+
+old_ack:
+	if (TCP_SKB_CB(skb)->sacked)
+		tcp_sacktag_write_queue(sk, skb, prior_snd_una);
+
+uninteresting_ack:
+	SOCK_DEBUG(sk, "Ack %u out of %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
+	return 0;
+}
+
+
+/* Look for tcp options. Normally only called on SYN and SYNACK packets.
+ * But, this can also be called on packets in the established flow when
+ * the fast version below fails.
+ */
+void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab)
+{
+	unsigned char *ptr;
+	struct tcphdr *th = skb->h.th;
+	int length=(th->doff*4)-sizeof(struct tcphdr);
+
+	ptr = (unsigned char *)(th + 1);
+	opt_rx->saw_tstamp = 0;
+
+	while(length>0) {
+	  	int opcode=*ptr++;
+		int opsize;
+
+		switch (opcode) {
+			case TCPOPT_EOL:
+				return;
+			case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
+				length--;
+				continue;
+			default:
+				opsize=*ptr++;
+				if (opsize < 2) /* "silly options" */
+					return;
+				if (opsize > length)
+					return;	/* don't parse partial options */
+	  			switch(opcode) {
+				case TCPOPT_MSS:
+					if(opsize==TCPOLEN_MSS && th->syn && !estab) {
+						u16 in_mss = ntohs(get_unaligned((__u16 *)ptr));
+						if (in_mss) {
+							if (opt_rx->user_mss && opt_rx->user_mss < in_mss)
+								in_mss = opt_rx->user_mss;
+							opt_rx->mss_clamp = in_mss;
+						}
+					}
+					break;
+				case TCPOPT_WINDOW:
+					if(opsize==TCPOLEN_WINDOW && th->syn && !estab)
+						if (sysctl_tcp_window_scaling) {
+							__u8 snd_wscale = *(__u8 *) ptr;
+							opt_rx->wscale_ok = 1;
+							if (snd_wscale > 14) {
+								if(net_ratelimit())
+									printk(KERN_INFO "tcp_parse_options: Illegal window "
+									       "scaling value %d >14 received.\n",
+									       snd_wscale);
+								snd_wscale = 14;
+							}
+							opt_rx->snd_wscale = snd_wscale;
+						}
+					break;
+				case TCPOPT_TIMESTAMP:
+					if(opsize==TCPOLEN_TIMESTAMP) {
+						if ((estab && opt_rx->tstamp_ok) ||
+						    (!estab && sysctl_tcp_timestamps)) {
+							opt_rx->saw_tstamp = 1;
+							opt_rx->rcv_tsval = ntohl(get_unaligned((__u32 *)ptr));
+							opt_rx->rcv_tsecr = ntohl(get_unaligned((__u32 *)(ptr+4)));
+						}
+					}
+					break;
+				case TCPOPT_SACK_PERM:
+					if(opsize==TCPOLEN_SACK_PERM && th->syn && !estab) {
+						if (sysctl_tcp_sack) {
+							opt_rx->sack_ok = 1;
+							tcp_sack_reset(opt_rx);
+						}
+					}
+					break;
+
+				case TCPOPT_SACK:
+					if((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
+					   !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
+					   opt_rx->sack_ok) {
+						TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
+					}
+	  			};
+	  			ptr+=opsize-2;
+	  			length-=opsize;
+	  	};
+	}
+}
+
+/* Fast parse options. This hopes to only see timestamps.
+ * If it is wrong it falls back on tcp_parse_options().
+ */
+static inline int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
+					 struct tcp_sock *tp)
+{
+	if (th->doff == sizeof(struct tcphdr)>>2) {
+		tp->rx_opt.saw_tstamp = 0;
+		return 0;
+	} else if (tp->rx_opt.tstamp_ok &&
+		   th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) {
+		__u32 *ptr = (__u32 *)(th + 1);
+		if (*ptr == ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
+				  | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
+			tp->rx_opt.saw_tstamp = 1;
+			++ptr;
+			tp->rx_opt.rcv_tsval = ntohl(*ptr);
+			++ptr;
+			tp->rx_opt.rcv_tsecr = ntohl(*ptr);
+			return 1;
+		}
+	}
+	tcp_parse_options(skb, &tp->rx_opt, 1);
+	return 1;
+}
+
+static inline void tcp_store_ts_recent(struct tcp_sock *tp)
+{
+	tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
+	tp->rx_opt.ts_recent_stamp = xtime.tv_sec;
+}
+
+static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
+{
+	if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
+		/* PAWS bug workaround wrt. ACK frames, the PAWS discard
+		 * extra check below makes sure this can only happen
+		 * for pure ACK frames.  -DaveM
+		 *
+		 * Not only, also it occurs for expired timestamps.
+		 */
+
+		if((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) >= 0 ||
+		   xtime.tv_sec >= tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS)
+			tcp_store_ts_recent(tp);
+	}
+}
+
+/* Sorry, PAWS as specified is broken wrt. pure-ACKs -DaveM
+ *
+ * It is not fatal. If this ACK does _not_ change critical state (seqs, window)
+ * it can pass through stack. So, the following predicate verifies that
+ * this segment is not used for anything but congestion avoidance or
+ * fast retransmit. Moreover, we even are able to eliminate most of such
+ * second order effects, if we apply some small "replay" window (~RTO)
+ * to timestamp space.
+ *
+ * All these measures still do not guarantee that we reject wrapped ACKs
+ * on networks with high bandwidth, when sequence space is recycled fastly,
+ * but it guarantees that such events will be very rare and do not affect
+ * connection seriously. This doesn't look nice, but alas, PAWS is really
+ * buggy extension.
+ *
+ * [ Later note. Even worse! It is buggy for segments _with_ data. RFC
+ * states that events when retransmit arrives after original data are rare.
+ * It is a blatant lie. VJ forgot about fast retransmit! 8)8) It is
+ * the biggest problem on large power networks even with minor reordering.
+ * OK, let's give it small replay window. If peer clock is even 1hz, it is safe
+ * up to bandwidth of 18Gigabit/sec. 8) ]
+ */
+
+static int tcp_disordered_ack(struct tcp_sock *tp, struct sk_buff *skb)
+{
+	struct tcphdr *th = skb->h.th;
+	u32 seq = TCP_SKB_CB(skb)->seq;
+	u32 ack = TCP_SKB_CB(skb)->ack_seq;
+
+	return (/* 1. Pure ACK with correct sequence number. */
+		(th->ack && seq == TCP_SKB_CB(skb)->end_seq && seq == tp->rcv_nxt) &&
+
+		/* 2. ... and duplicate ACK. */
+		ack == tp->snd_una &&
+
+		/* 3. ... and does not update window. */
+		!tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) &&
+
+		/* 4. ... and sits in replay window. */
+		(s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (tp->rto*1024)/HZ);
+}
+
+static inline int tcp_paws_discard(struct tcp_sock *tp, struct sk_buff *skb)
+{
+	return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW &&
+		xtime.tv_sec < tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS &&
+		!tcp_disordered_ack(tp, skb));
+}
+
+/* Check segment sequence number for validity.
+ *
+ * Segment controls are considered valid, if the segment
+ * fits to the window after truncation to the window. Acceptability
+ * of data (and SYN, FIN, of course) is checked separately.
+ * See tcp_data_queue(), for example.
+ *
+ * Also, controls (RST is main one) are accepted using RCV.WUP instead
+ * of RCV.NXT. Peer still did not advance his SND.UNA when we
+ * delayed ACK, so that hisSND.UNA<=ourRCV.WUP.
+ * (borrowed from freebsd)
+ */
+
+static inline int tcp_sequence(struct tcp_sock *tp, u32 seq, u32 end_seq)
+{
+	return	!before(end_seq, tp->rcv_wup) &&
+		!after(seq, tp->rcv_nxt + tcp_receive_window(tp));
+}
+
+/* When we get a reset we do this. */
+static void tcp_reset(struct sock *sk)
+{
+	/* We want the right error as BSD sees it (and indeed as we do). */
+	switch (sk->sk_state) {
+		case TCP_SYN_SENT:
+			sk->sk_err = ECONNREFUSED;
+			break;
+		case TCP_CLOSE_WAIT:
+			sk->sk_err = EPIPE;
+			break;
+		case TCP_CLOSE:
+			return;
+		default:
+			sk->sk_err = ECONNRESET;
+	}
+
+	if (!sock_flag(sk, SOCK_DEAD))
+		sk->sk_error_report(sk);
+
+	tcp_done(sk);
+}
+
+/*
+ * 	Process the FIN bit. This now behaves as it is supposed to work
+ *	and the FIN takes effect when it is validly part of sequence
+ *	space. Not before when we get holes.
+ *
+ *	If we are ESTABLISHED, a received fin moves us to CLOSE-WAIT
+ *	(and thence onto LAST-ACK and finally, CLOSE, we never enter
+ *	TIME-WAIT)
+ *
+ *	If we are in FINWAIT-1, a received FIN indicates simultaneous
+ *	close and we go into CLOSING (and later onto TIME-WAIT)
+ *
+ *	If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
+ */
+static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	tcp_schedule_ack(tp);
+
+	sk->sk_shutdown |= RCV_SHUTDOWN;
+	sock_set_flag(sk, SOCK_DONE);
+
+	switch (sk->sk_state) {
+		case TCP_SYN_RECV:
+		case TCP_ESTABLISHED:
+			/* Move to CLOSE_WAIT */
+			tcp_set_state(sk, TCP_CLOSE_WAIT);
+			tp->ack.pingpong = 1;
+			break;
+
+		case TCP_CLOSE_WAIT:
+		case TCP_CLOSING:
+			/* Received a retransmission of the FIN, do
+			 * nothing.
+			 */
+			break;
+		case TCP_LAST_ACK:
+			/* RFC793: Remain in the LAST-ACK state. */
+			break;
+
+		case TCP_FIN_WAIT1:
+			/* This case occurs when a simultaneous close
+			 * happens, we must ack the received FIN and
+			 * enter the CLOSING state.
+			 */
+			tcp_send_ack(sk);
+			tcp_set_state(sk, TCP_CLOSING);
+			break;
+		case TCP_FIN_WAIT2:
+			/* Received a FIN -- send ACK and enter TIME_WAIT. */
+			tcp_send_ack(sk);
+			tcp_time_wait(sk, TCP_TIME_WAIT, 0);
+			break;
+		default:
+			/* Only TCP_LISTEN and TCP_CLOSE are left, in these
+			 * cases we should never reach this piece of code.
+			 */
+			printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n",
+			       __FUNCTION__, sk->sk_state);
+			break;
+	};
+
+	/* It _is_ possible, that we have something out-of-order _after_ FIN.
+	 * Probably, we should reset in this case. For now drop them.
+	 */
+	__skb_queue_purge(&tp->out_of_order_queue);
+	if (tp->rx_opt.sack_ok)
+		tcp_sack_reset(&tp->rx_opt);
+	sk_stream_mem_reclaim(sk);
+
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		sk->sk_state_change(sk);
+
+		/* Do not send POLL_HUP for half duplex close. */
+		if (sk->sk_shutdown == SHUTDOWN_MASK ||
+		    sk->sk_state == TCP_CLOSE)
+			sk_wake_async(sk, 1, POLL_HUP);
+		else
+			sk_wake_async(sk, 1, POLL_IN);
+	}
+}
+
+static __inline__ int
+tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq)
+{
+	if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
+		if (before(seq, sp->start_seq))
+			sp->start_seq = seq;
+		if (after(end_seq, sp->end_seq))
+			sp->end_seq = end_seq;
+		return 1;
+	}
+	return 0;
+}
+
+static inline void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq)
+{
+	if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) {
+		if (before(seq, tp->rcv_nxt))
+			NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOLDSENT);
+		else
+			NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFOSENT);
+
+		tp->rx_opt.dsack = 1;
+		tp->duplicate_sack[0].start_seq = seq;
+		tp->duplicate_sack[0].end_seq = end_seq;
+		tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + 1, 4 - tp->rx_opt.tstamp_ok);
+	}
+}
+
+static inline void tcp_dsack_extend(struct tcp_sock *tp, u32 seq, u32 end_seq)
+{
+	if (!tp->rx_opt.dsack)
+		tcp_dsack_set(tp, seq, end_seq);
+	else
+		tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
+}
+
+static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
+	    before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
+		NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOST);
+		tcp_enter_quickack_mode(tp);
+
+		if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) {
+			u32 end_seq = TCP_SKB_CB(skb)->end_seq;
+
+			if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
+				end_seq = tp->rcv_nxt;
+			tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, end_seq);
+		}
+	}
+
+	tcp_send_ack(sk);
+}
+
+/* These routines update the SACK block as out-of-order packets arrive or
+ * in-order packets close up the sequence space.
+ */
+static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
+{
+	int this_sack;
+	struct tcp_sack_block *sp = &tp->selective_acks[0];
+	struct tcp_sack_block *swalk = sp+1;
+
+	/* See if the recent change to the first SACK eats into
+	 * or hits the sequence space of other SACK blocks, if so coalesce.
+	 */
+	for (this_sack = 1; this_sack < tp->rx_opt.num_sacks; ) {
+		if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) {
+			int i;
+
+			/* Zap SWALK, by moving every further SACK up by one slot.
+			 * Decrease num_sacks.
+			 */
+			tp->rx_opt.num_sacks--;
+			tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok);
+			for(i=this_sack; i < tp->rx_opt.num_sacks; i++)
+				sp[i] = sp[i+1];
+			continue;
+		}
+		this_sack++, swalk++;
+	}
+}
+
+static __inline__ void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sack_block *sack2)
+{
+	__u32 tmp;
+
+	tmp = sack1->start_seq;
+	sack1->start_seq = sack2->start_seq;
+	sack2->start_seq = tmp;
+
+	tmp = sack1->end_seq;
+	sack1->end_seq = sack2->end_seq;
+	sack2->end_seq = tmp;
+}
+
+static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct tcp_sack_block *sp = &tp->selective_acks[0];
+	int cur_sacks = tp->rx_opt.num_sacks;
+	int this_sack;
+
+	if (!cur_sacks)
+		goto new_sack;
+
+	for (this_sack=0; this_sack<cur_sacks; this_sack++, sp++) {
+		if (tcp_sack_extend(sp, seq, end_seq)) {
+			/* Rotate this_sack to the first one. */
+			for (; this_sack>0; this_sack--, sp--)
+				tcp_sack_swap(sp, sp-1);
+			if (cur_sacks > 1)
+				tcp_sack_maybe_coalesce(tp);
+			return;
+		}
+	}
+
+	/* Could not find an adjacent existing SACK, build a new one,
+	 * put it at the front, and shift everyone else down.  We
+	 * always know there is at least one SACK present already here.
+	 *
+	 * If the sack array is full, forget about the last one.
+	 */
+	if (this_sack >= 4) {
+		this_sack--;
+		tp->rx_opt.num_sacks--;
+		sp--;
+	}
+	for(; this_sack > 0; this_sack--, sp--)
+		*sp = *(sp-1);
+
+new_sack:
+	/* Build the new head SACK, and we're done. */
+	sp->start_seq = seq;
+	sp->end_seq = end_seq;
+	tp->rx_opt.num_sacks++;
+	tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok);
+}
+
+/* RCV.NXT advances, some SACKs should be eaten. */
+
+static void tcp_sack_remove(struct tcp_sock *tp)
+{
+	struct tcp_sack_block *sp = &tp->selective_acks[0];
+	int num_sacks = tp->rx_opt.num_sacks;
+	int this_sack;
+
+	/* Empty ofo queue, hence, all the SACKs are eaten. Clear. */
+	if (skb_queue_len(&tp->out_of_order_queue) == 0) {
+		tp->rx_opt.num_sacks = 0;
+		tp->rx_opt.eff_sacks = tp->rx_opt.dsack;
+		return;
+	}
+
+	for(this_sack = 0; this_sack < num_sacks; ) {
+		/* Check if the start of the sack is covered by RCV.NXT. */
+		if (!before(tp->rcv_nxt, sp->start_seq)) {
+			int i;
+
+			/* RCV.NXT must cover all the block! */
+			BUG_TRAP(!before(tp->rcv_nxt, sp->end_seq));
+
+			/* Zap this SACK, by moving forward any other SACKS. */
+			for (i=this_sack+1; i < num_sacks; i++)
+				tp->selective_acks[i-1] = tp->selective_acks[i];
+			num_sacks--;
+			continue;
+		}
+		this_sack++;
+		sp++;
+	}
+	if (num_sacks != tp->rx_opt.num_sacks) {
+		tp->rx_opt.num_sacks = num_sacks;
+		tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok);
+	}
+}
+
+/* This one checks to see if we can put data from the
+ * out_of_order queue into the receive_queue.
+ */
+static void tcp_ofo_queue(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	__u32 dsack_high = tp->rcv_nxt;
+	struct sk_buff *skb;
+
+	while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) {
+		if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
+			break;
+
+		if (before(TCP_SKB_CB(skb)->seq, dsack_high)) {
+			__u32 dsack = dsack_high;
+			if (before(TCP_SKB_CB(skb)->end_seq, dsack_high))
+				dsack_high = TCP_SKB_CB(skb)->end_seq;
+			tcp_dsack_extend(tp, TCP_SKB_CB(skb)->seq, dsack);
+		}
+
+		if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
+			SOCK_DEBUG(sk, "ofo packet was already received \n");
+			__skb_unlink(skb, skb->list);
+			__kfree_skb(skb);
+			continue;
+		}
+		SOCK_DEBUG(sk, "ofo requeuing : rcv_next %X seq %X - %X\n",
+			   tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
+			   TCP_SKB_CB(skb)->end_seq);
+
+		__skb_unlink(skb, skb->list);
+		__skb_queue_tail(&sk->sk_receive_queue, skb);
+		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+		if(skb->h.th->fin)
+			tcp_fin(skb, sk, skb->h.th);
+	}
+}
+
+static int tcp_prune_queue(struct sock *sk);
+
+static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
+{
+	struct tcphdr *th = skb->h.th;
+	struct tcp_sock *tp = tcp_sk(sk);
+	int eaten = -1;
+
+	if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq)
+		goto drop;
+
+	th = skb->h.th;
+	__skb_pull(skb, th->doff*4);
+
+	TCP_ECN_accept_cwr(tp, skb);
+
+	if (tp->rx_opt.dsack) {
+		tp->rx_opt.dsack = 0;
+		tp->rx_opt.eff_sacks = min_t(unsigned int, tp->rx_opt.num_sacks,
+						    4 - tp->rx_opt.tstamp_ok);
+	}
+
+	/*  Queue data for delivery to the user.
+	 *  Packets in sequence go to the receive queue.
+	 *  Out of sequence packets to the out_of_order_queue.
+	 */
+	if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
+		if (tcp_receive_window(tp) == 0)
+			goto out_of_window;
+
+		/* Ok. In sequence. In window. */
+		if (tp->ucopy.task == current &&
+		    tp->copied_seq == tp->rcv_nxt && tp->ucopy.len &&
+		    sock_owned_by_user(sk) && !tp->urg_data) {
+			int chunk = min_t(unsigned int, skb->len,
+							tp->ucopy.len);
+
+			__set_current_state(TASK_RUNNING);
+
+			local_bh_enable();
+			if (!skb_copy_datagram_iovec(skb, 0, tp->ucopy.iov, chunk)) {
+				tp->ucopy.len -= chunk;
+				tp->copied_seq += chunk;
+				eaten = (chunk == skb->len && !th->fin);
+				tcp_rcv_space_adjust(sk);
+			}
+			local_bh_disable();
+		}
+
+		if (eaten <= 0) {
+queue_and_out:
+			if (eaten < 0 &&
+			    (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
+			     !sk_stream_rmem_schedule(sk, skb))) {
+				if (tcp_prune_queue(sk) < 0 ||
+				    !sk_stream_rmem_schedule(sk, skb))
+					goto drop;
+			}
+			sk_stream_set_owner_r(skb, sk);
+			__skb_queue_tail(&sk->sk_receive_queue, skb);
+		}
+		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+		if(skb->len)
+			tcp_event_data_recv(sk, tp, skb);
+		if(th->fin)
+			tcp_fin(skb, sk, th);
+
+		if (skb_queue_len(&tp->out_of_order_queue)) {
+			tcp_ofo_queue(sk);
+
+			/* RFC2581. 4.2. SHOULD send immediate ACK, when
+			 * gap in queue is filled.
+			 */
+			if (!skb_queue_len(&tp->out_of_order_queue))
+				tp->ack.pingpong = 0;
+		}
+
+		if (tp->rx_opt.num_sacks)
+			tcp_sack_remove(tp);
+
+		tcp_fast_path_check(sk, tp);
+
+		if (eaten > 0)
+			__kfree_skb(skb);
+		else if (!sock_flag(sk, SOCK_DEAD))
+			sk->sk_data_ready(sk, 0);
+		return;
+	}
+
+	if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
+		/* A retransmit, 2nd most common case.  Force an immediate ack. */
+		NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOST);
+		tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
+
+out_of_window:
+		tcp_enter_quickack_mode(tp);
+		tcp_schedule_ack(tp);
+drop:
+		__kfree_skb(skb);
+		return;
+	}
+
+	/* Out of window. F.e. zero window probe. */
+	if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp)))
+		goto out_of_window;
+
+	tcp_enter_quickack_mode(tp);
+
+	if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
+		/* Partial packet, seq < rcv_next < end_seq */
+		SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n",
+			   tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
+			   TCP_SKB_CB(skb)->end_seq);
+
+		tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
+		
+		/* If window is closed, drop tail of packet. But after
+		 * remembering D-SACK for its head made in previous line.
+		 */
+		if (!tcp_receive_window(tp))
+			goto out_of_window;
+		goto queue_and_out;
+	}
+
+	TCP_ECN_check_ce(tp, skb);
+
+	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
+	    !sk_stream_rmem_schedule(sk, skb)) {
+		if (tcp_prune_queue(sk) < 0 ||
+		    !sk_stream_rmem_schedule(sk, skb))
+			goto drop;
+	}
+
+	/* Disable header prediction. */
+	tp->pred_flags = 0;
+	tcp_schedule_ack(tp);
+
+	SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
+		   tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
+
+	sk_stream_set_owner_r(skb, sk);
+
+	if (!skb_peek(&tp->out_of_order_queue)) {
+		/* Initial out of order segment, build 1 SACK. */
+		if (tp->rx_opt.sack_ok) {
+			tp->rx_opt.num_sacks = 1;
+			tp->rx_opt.dsack     = 0;
+			tp->rx_opt.eff_sacks = 1;
+			tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
+			tp->selective_acks[0].end_seq =
+						TCP_SKB_CB(skb)->end_seq;
+		}
+		__skb_queue_head(&tp->out_of_order_queue,skb);
+	} else {
+		struct sk_buff *skb1 = tp->out_of_order_queue.prev;
+		u32 seq = TCP_SKB_CB(skb)->seq;
+		u32 end_seq = TCP_SKB_CB(skb)->end_seq;
+
+		if (seq == TCP_SKB_CB(skb1)->end_seq) {
+			__skb_append(skb1, skb);
+
+			if (!tp->rx_opt.num_sacks ||
+			    tp->selective_acks[0].end_seq != seq)
+				goto add_sack;
+
+			/* Common case: data arrive in order after hole. */
+			tp->selective_acks[0].end_seq = end_seq;
+			return;
+		}
+
+		/* Find place to insert this segment. */
+		do {
+			if (!after(TCP_SKB_CB(skb1)->seq, seq))
+				break;
+		} while ((skb1 = skb1->prev) !=
+			 (struct sk_buff*)&tp->out_of_order_queue);
+
+		/* Do skb overlap to previous one? */
+		if (skb1 != (struct sk_buff*)&tp->out_of_order_queue &&
+		    before(seq, TCP_SKB_CB(skb1)->end_seq)) {
+			if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
+				/* All the bits are present. Drop. */
+				__kfree_skb(skb);
+				tcp_dsack_set(tp, seq, end_seq);
+				goto add_sack;
+			}
+			if (after(seq, TCP_SKB_CB(skb1)->seq)) {
+				/* Partial overlap. */
+				tcp_dsack_set(tp, seq, TCP_SKB_CB(skb1)->end_seq);
+			} else {
+				skb1 = skb1->prev;
+			}
+		}
+		__skb_insert(skb, skb1, skb1->next, &tp->out_of_order_queue);
+		
+		/* And clean segments covered by new one as whole. */
+		while ((skb1 = skb->next) !=
+		       (struct sk_buff*)&tp->out_of_order_queue &&
+		       after(end_seq, TCP_SKB_CB(skb1)->seq)) {
+		       if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
+			       tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, end_seq);
+			       break;
+		       }
+		       __skb_unlink(skb1, skb1->list);
+		       tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, TCP_SKB_CB(skb1)->end_seq);
+		       __kfree_skb(skb1);
+		}
+
+add_sack:
+		if (tp->rx_opt.sack_ok)
+			tcp_sack_new_ofo_skb(sk, seq, end_seq);
+	}
+}
+
+/* Collapse contiguous sequence of skbs head..tail with
+ * sequence numbers start..end.
+ * Segments with FIN/SYN are not collapsed (only because this
+ * simplifies code)
+ */
+static void
+tcp_collapse(struct sock *sk, struct sk_buff *head,
+	     struct sk_buff *tail, u32 start, u32 end)
+{
+	struct sk_buff *skb;
+
+	/* First, check that queue is collapsable and find
+	 * the point where collapsing can be useful. */
+	for (skb = head; skb != tail; ) {
+		/* No new bits? It is possible on ofo queue. */
+		if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
+			struct sk_buff *next = skb->next;
+			__skb_unlink(skb, skb->list);
+			__kfree_skb(skb);
+			NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED);
+			skb = next;
+			continue;
+		}
+
+		/* The first skb to collapse is:
+		 * - not SYN/FIN and
+		 * - bloated or contains data before "start" or
+		 *   overlaps to the next one.
+		 */
+		if (!skb->h.th->syn && !skb->h.th->fin &&
+		    (tcp_win_from_space(skb->truesize) > skb->len ||
+		     before(TCP_SKB_CB(skb)->seq, start) ||
+		     (skb->next != tail &&
+		      TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb->next)->seq)))
+			break;
+
+		/* Decided to skip this, advance start seq. */
+		start = TCP_SKB_CB(skb)->end_seq;
+		skb = skb->next;
+	}
+	if (skb == tail || skb->h.th->syn || skb->h.th->fin)
+		return;
+
+	while (before(start, end)) {
+		struct sk_buff *nskb;
+		int header = skb_headroom(skb);
+		int copy = SKB_MAX_ORDER(header, 0);
+
+		/* Too big header? This can happen with IPv6. */
+		if (copy < 0)
+			return;
+		if (end-start < copy)
+			copy = end-start;
+		nskb = alloc_skb(copy+header, GFP_ATOMIC);
+		if (!nskb)
+			return;
+		skb_reserve(nskb, header);
+		memcpy(nskb->head, skb->head, header);
+		nskb->nh.raw = nskb->head + (skb->nh.raw-skb->head);
+		nskb->h.raw = nskb->head + (skb->h.raw-skb->head);
+		nskb->mac.raw = nskb->head + (skb->mac.raw-skb->head);
+		memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
+		TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
+		__skb_insert(nskb, skb->prev, skb, skb->list);
+		sk_stream_set_owner_r(nskb, sk);
+
+		/* Copy data, releasing collapsed skbs. */
+		while (copy > 0) {
+			int offset = start - TCP_SKB_CB(skb)->seq;
+			int size = TCP_SKB_CB(skb)->end_seq - start;
+
+			if (offset < 0) BUG();
+			if (size > 0) {
+				size = min(copy, size);
+				if (skb_copy_bits(skb, offset, skb_put(nskb, size), size))
+					BUG();
+				TCP_SKB_CB(nskb)->end_seq += size;
+				copy -= size;
+				start += size;
+			}
+			if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
+				struct sk_buff *next = skb->next;
+				__skb_unlink(skb, skb->list);
+				__kfree_skb(skb);
+				NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED);
+				skb = next;
+				if (skb == tail || skb->h.th->syn || skb->h.th->fin)
+					return;
+			}
+		}
+	}
+}
+
+/* Collapse ofo queue. Algorithm: select contiguous sequence of skbs
+ * and tcp_collapse() them until all the queue is collapsed.
+ */
+static void tcp_collapse_ofo_queue(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *skb = skb_peek(&tp->out_of_order_queue);
+	struct sk_buff *head;
+	u32 start, end;
+
+	if (skb == NULL)
+		return;
+
+	start = TCP_SKB_CB(skb)->seq;
+	end = TCP_SKB_CB(skb)->end_seq;
+	head = skb;
+
+	for (;;) {
+		skb = skb->next;
+
+		/* Segment is terminated when we see gap or when
+		 * we are at the end of all the queue. */
+		if (skb == (struct sk_buff *)&tp->out_of_order_queue ||
+		    after(TCP_SKB_CB(skb)->seq, end) ||
+		    before(TCP_SKB_CB(skb)->end_seq, start)) {
+			tcp_collapse(sk, head, skb, start, end);
+			head = skb;
+			if (skb == (struct sk_buff *)&tp->out_of_order_queue)
+				break;
+			/* Start new segment */
+			start = TCP_SKB_CB(skb)->seq;
+			end = TCP_SKB_CB(skb)->end_seq;
+		} else {
+			if (before(TCP_SKB_CB(skb)->seq, start))
+				start = TCP_SKB_CB(skb)->seq;
+			if (after(TCP_SKB_CB(skb)->end_seq, end))
+				end = TCP_SKB_CB(skb)->end_seq;
+		}
+	}
+}
+
+/* Reduce allocated memory if we can, trying to get
+ * the socket within its memory limits again.
+ *
+ * Return less than zero if we should start dropping frames
+ * until the socket owning process reads some of the data
+ * to stabilize the situation.
+ */
+static int tcp_prune_queue(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk); 
+
+	SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq);
+
+	NET_INC_STATS_BH(LINUX_MIB_PRUNECALLED);
+
+	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
+		tcp_clamp_window(sk, tp);
+	else if (tcp_memory_pressure)
+		tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
+
+	tcp_collapse_ofo_queue(sk);
+	tcp_collapse(sk, sk->sk_receive_queue.next,
+		     (struct sk_buff*)&sk->sk_receive_queue,
+		     tp->copied_seq, tp->rcv_nxt);
+	sk_stream_mem_reclaim(sk);
+
+	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
+		return 0;
+
+	/* Collapsing did not help, destructive actions follow.
+	 * This must not ever occur. */
+
+	/* First, purge the out_of_order queue. */
+	if (skb_queue_len(&tp->out_of_order_queue)) {
+		NET_ADD_STATS_BH(LINUX_MIB_OFOPRUNED, 
+				 skb_queue_len(&tp->out_of_order_queue));
+		__skb_queue_purge(&tp->out_of_order_queue);
+
+		/* Reset SACK state.  A conforming SACK implementation will
+		 * do the same at a timeout based retransmit.  When a connection
+		 * is in a sad state like this, we care only about integrity
+		 * of the connection not performance.
+		 */
+		if (tp->rx_opt.sack_ok)
+			tcp_sack_reset(&tp->rx_opt);
+		sk_stream_mem_reclaim(sk);
+	}
+
+	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
+		return 0;
+
+	/* If we are really being abused, tell the caller to silently
+	 * drop receive data on the floor.  It will get retransmitted
+	 * and hopefully then we'll have sufficient space.
+	 */
+	NET_INC_STATS_BH(LINUX_MIB_RCVPRUNED);
+
+	/* Massive buffer overcommit. */
+	tp->pred_flags = 0;
+	return -1;
+}
+
+
+/* RFC2861, slow part. Adjust cwnd, after it was not full during one rto.
+ * As additional protections, we do not touch cwnd in retransmission phases,
+ * and if application hit its sndbuf limit recently.
+ */
+void tcp_cwnd_application_limited(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	if (tp->ca_state == TCP_CA_Open &&
+	    sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
+		/* Limited by application or receiver window. */
+		u32 win_used = max(tp->snd_cwnd_used, 2U);
+		if (win_used < tp->snd_cwnd) {
+			tp->snd_ssthresh = tcp_current_ssthresh(tp);
+			tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
+		}
+		tp->snd_cwnd_used = 0;
+	}
+	tp->snd_cwnd_stamp = tcp_time_stamp;
+}
+
+
+/* When incoming ACK allowed to free some skb from write_queue,
+ * we remember this event in flag SOCK_QUEUE_SHRUNK and wake up socket
+ * on the exit from tcp input handler.
+ *
+ * PROBLEM: sndbuf expansion does not work well with largesend.
+ */
+static void tcp_new_space(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	if (tp->packets_out < tp->snd_cwnd &&
+	    !(sk->sk_userlocks & SOCK_SNDBUF_LOCK) &&
+	    !tcp_memory_pressure &&
+	    atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
+ 		int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache_std) +
+			MAX_TCP_HEADER + 16 + sizeof(struct sk_buff),
+		    demanded = max_t(unsigned int, tp->snd_cwnd,
+						   tp->reordering + 1);
+		sndmem *= 2*demanded;
+		if (sndmem > sk->sk_sndbuf)
+			sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
+		tp->snd_cwnd_stamp = tcp_time_stamp;
+	}
+
+	sk->sk_write_space(sk);
+}
+
+static inline void tcp_check_space(struct sock *sk)
+{
+	if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
+		sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
+		if (sk->sk_socket &&
+		    test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
+			tcp_new_space(sk);
+	}
+}
+
+static void __tcp_data_snd_check(struct sock *sk, struct sk_buff *skb)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	if (after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd) ||
+	    tcp_packets_in_flight(tp) >= tp->snd_cwnd ||
+	    tcp_write_xmit(sk, tp->nonagle))
+		tcp_check_probe_timer(sk, tp);
+}
+
+static __inline__ void tcp_data_snd_check(struct sock *sk)
+{
+	struct sk_buff *skb = sk->sk_send_head;
+
+	if (skb != NULL)
+		__tcp_data_snd_check(sk, skb);
+	tcp_check_space(sk);
+}
+
+/*
+ * Check if sending an ack is needed.
+ */
+static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	    /* More than one full frame received... */
+	if (((tp->rcv_nxt - tp->rcv_wup) > tp->ack.rcv_mss
+	     /* ... and right edge of window advances far enough.
+	      * (tcp_recvmsg() will send ACK otherwise). Or...
+	      */
+	     && __tcp_select_window(sk) >= tp->rcv_wnd) ||
+	    /* We ACK each frame or... */
+	    tcp_in_quickack_mode(tp) ||
+	    /* We have out of order data. */
+	    (ofo_possible &&
+	     skb_peek(&tp->out_of_order_queue))) {
+		/* Then ack it now */
+		tcp_send_ack(sk);
+	} else {
+		/* Else, send delayed ack. */
+		tcp_send_delayed_ack(sk);
+	}
+}
+
+static __inline__ void tcp_ack_snd_check(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	if (!tcp_ack_scheduled(tp)) {
+		/* We sent a data segment already. */
+		return;
+	}
+	__tcp_ack_snd_check(sk, 1);
+}
+
+/*
+ *	This routine is only called when we have urgent data
+ *	signalled. Its the 'slow' part of tcp_urg. It could be
+ *	moved inline now as tcp_urg is only called from one
+ *	place. We handle URGent data wrong. We have to - as
+ *	BSD still doesn't use the correction from RFC961.
+ *	For 1003.1g we should support a new option TCP_STDURG to permit
+ *	either form (or just set the sysctl tcp_stdurg).
+ */
+ 
+static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	u32 ptr = ntohs(th->urg_ptr);
+
+	if (ptr && !sysctl_tcp_stdurg)
+		ptr--;
+	ptr += ntohl(th->seq);
+
+	/* Ignore urgent data that we've already seen and read. */
+	if (after(tp->copied_seq, ptr))
+		return;
+
+	/* Do not replay urg ptr.
+	 *
+	 * NOTE: interesting situation not covered by specs.
+	 * Misbehaving sender may send urg ptr, pointing to segment,
+	 * which we already have in ofo queue. We are not able to fetch
+	 * such data and will stay in TCP_URG_NOTYET until will be eaten
+	 * by recvmsg(). Seems, we are not obliged to handle such wicked
+	 * situations. But it is worth to think about possibility of some
+	 * DoSes using some hypothetical application level deadlock.
+	 */
+	if (before(ptr, tp->rcv_nxt))
+		return;
+
+	/* Do we already have a newer (or duplicate) urgent pointer? */
+	if (tp->urg_data && !after(ptr, tp->urg_seq))
+		return;
+
+	/* Tell the world about our new urgent pointer. */
+	sk_send_sigurg(sk);
+
+	/* We may be adding urgent data when the last byte read was
+	 * urgent. To do this requires some care. We cannot just ignore
+	 * tp->copied_seq since we would read the last urgent byte again
+	 * as data, nor can we alter copied_seq until this data arrives
+	 * or we break the sematics of SIOCATMARK (and thus sockatmark())
+	 *
+	 * NOTE. Double Dutch. Rendering to plain English: author of comment
+	 * above did something sort of 	send("A", MSG_OOB); send("B", MSG_OOB);
+	 * and expect that both A and B disappear from stream. This is _wrong_.
+	 * Though this happens in BSD with high probability, this is occasional.
+	 * Any application relying on this is buggy. Note also, that fix "works"
+	 * only in this artificial test. Insert some normal data between A and B and we will
+	 * decline of BSD again. Verdict: it is better to remove to trap
+	 * buggy users.
+	 */
+	if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
+	    !sock_flag(sk, SOCK_URGINLINE) &&
+	    tp->copied_seq != tp->rcv_nxt) {
+		struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
+		tp->copied_seq++;
+		if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) {
+			__skb_unlink(skb, skb->list);
+			__kfree_skb(skb);
+		}
+	}
+
+	tp->urg_data   = TCP_URG_NOTYET;
+	tp->urg_seq    = ptr;
+
+	/* Disable header prediction. */
+	tp->pred_flags = 0;
+}
+
+/* This is the 'fast' part of urgent handling. */
+static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	/* Check if we get a new urgent pointer - normally not. */
+	if (th->urg)
+		tcp_check_urg(sk,th);
+
+	/* Do we wait for any urgent data? - normally not... */
+	if (tp->urg_data == TCP_URG_NOTYET) {
+		u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff * 4) -
+			  th->syn;
+
+		/* Is the urgent pointer pointing into this packet? */	 
+		if (ptr < skb->len) {
+			u8 tmp;
+			if (skb_copy_bits(skb, ptr, &tmp, 1))
+				BUG();
+			tp->urg_data = TCP_URG_VALID | tmp;
+			if (!sock_flag(sk, SOCK_DEAD))
+				sk->sk_data_ready(sk, 0);
+		}
+	}
+}
+
+static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	int chunk = skb->len - hlen;
+	int err;
+
+	local_bh_enable();
+	if (skb->ip_summed==CHECKSUM_UNNECESSARY)
+		err = skb_copy_datagram_iovec(skb, hlen, tp->ucopy.iov, chunk);
+	else
+		err = skb_copy_and_csum_datagram_iovec(skb, hlen,
+						       tp->ucopy.iov);
+
+	if (!err) {
+		tp->ucopy.len -= chunk;
+		tp->copied_seq += chunk;
+		tcp_rcv_space_adjust(sk);
+	}
+
+	local_bh_disable();
+	return err;
+}
+
+static int __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
+{
+	int result;
+
+	if (sock_owned_by_user(sk)) {
+		local_bh_enable();
+		result = __tcp_checksum_complete(skb);
+		local_bh_disable();
+	} else {
+		result = __tcp_checksum_complete(skb);
+	}
+	return result;
+}
+
+static __inline__ int
+tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
+{
+	return skb->ip_summed != CHECKSUM_UNNECESSARY &&
+		__tcp_checksum_complete_user(sk, skb);
+}
+
+/*
+ *	TCP receive function for the ESTABLISHED state. 
+ *
+ *	It is split into a fast path and a slow path. The fast path is 
+ * 	disabled when:
+ *	- A zero window was announced from us - zero window probing
+ *        is only handled properly in the slow path. 
+ *	- Out of order segments arrived.
+ *	- Urgent data is expected.
+ *	- There is no buffer space left
+ *	- Unexpected TCP flags/window values/header lengths are received
+ *	  (detected by checking the TCP header against pred_flags) 
+ *	- Data is sent in both directions. Fast path only supports pure senders
+ *	  or pure receivers (this means either the sequence number or the ack
+ *	  value must stay constant)
+ *	- Unexpected TCP option.
+ *
+ *	When these conditions are not satisfied it drops into a standard 
+ *	receive procedure patterned after RFC793 to handle all cases.
+ *	The first three cases are guaranteed by proper pred_flags setting,
+ *	the rest is checked inline. Fast processing is turned on in 
+ *	tcp_data_queue when everything is OK.
+ */
+int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
+			struct tcphdr *th, unsigned len)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	/*
+	 *	Header prediction.
+	 *	The code loosely follows the one in the famous 
+	 *	"30 instruction TCP receive" Van Jacobson mail.
+	 *	
+	 *	Van's trick is to deposit buffers into socket queue 
+	 *	on a device interrupt, to call tcp_recv function
+	 *	on the receive process context and checksum and copy
+	 *	the buffer to user space. smart...
+	 *
+	 *	Our current scheme is not silly either but we take the 
+	 *	extra cost of the net_bh soft interrupt processing...
+	 *	We do checksum and copy also but from device to kernel.
+	 */
+
+	tp->rx_opt.saw_tstamp = 0;
+
+	/*	pred_flags is 0xS?10 << 16 + snd_wnd
+	 *	if header_predition is to be made
+	 *	'S' will always be tp->tcp_header_len >> 2
+	 *	'?' will be 0 for the fast path, otherwise pred_flags is 0 to
+	 *  turn it off	(when there are holes in the receive 
+	 *	 space for instance)
+	 *	PSH flag is ignored.
+	 */
+
+	if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
+		TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
+		int tcp_header_len = tp->tcp_header_len;
+
+		/* Timestamp header prediction: tcp_header_len
+		 * is automatically equal to th->doff*4 due to pred_flags
+		 * match.
+		 */
+
+		/* Check timestamp */
+		if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
+			__u32 *ptr = (__u32 *)(th + 1);
+
+			/* No? Slow path! */
+			if (*ptr != ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
+					  | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP))
+				goto slow_path;
+
+			tp->rx_opt.saw_tstamp = 1;
+			++ptr; 
+			tp->rx_opt.rcv_tsval = ntohl(*ptr);
+			++ptr;
+			tp->rx_opt.rcv_tsecr = ntohl(*ptr);
+
+			/* If PAWS failed, check it more carefully in slow path */
+			if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
+				goto slow_path;
+
+			/* DO NOT update ts_recent here, if checksum fails
+			 * and timestamp was corrupted part, it will result
+			 * in a hung connection since we will drop all
+			 * future packets due to the PAWS test.
+			 */
+		}
+
+		if (len <= tcp_header_len) {
+			/* Bulk data transfer: sender */
+			if (len == tcp_header_len) {
+				/* Predicted packet is in window by definition.
+				 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
+				 * Hence, check seq<=rcv_wup reduces to:
+				 */
+				if (tcp_header_len ==
+				    (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
+				    tp->rcv_nxt == tp->rcv_wup)
+					tcp_store_ts_recent(tp);
+
+				tcp_rcv_rtt_measure_ts(tp, skb);
+
+				/* We know that such packets are checksummed
+				 * on entry.
+				 */
+				tcp_ack(sk, skb, 0);
+				__kfree_skb(skb); 
+				tcp_data_snd_check(sk);
+				return 0;
+			} else { /* Header too small */
+				TCP_INC_STATS_BH(TCP_MIB_INERRS);
+				goto discard;
+			}
+		} else {
+			int eaten = 0;
+
+			if (tp->ucopy.task == current &&
+			    tp->copied_seq == tp->rcv_nxt &&
+			    len - tcp_header_len <= tp->ucopy.len &&
+			    sock_owned_by_user(sk)) {
+				__set_current_state(TASK_RUNNING);
+
+				if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) {
+					/* Predicted packet is in window by definition.
+					 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
+					 * Hence, check seq<=rcv_wup reduces to:
+					 */
+					if (tcp_header_len ==
+					    (sizeof(struct tcphdr) +
+					     TCPOLEN_TSTAMP_ALIGNED) &&
+					    tp->rcv_nxt == tp->rcv_wup)
+						tcp_store_ts_recent(tp);
+
+					tcp_rcv_rtt_measure_ts(tp, skb);
+
+					__skb_pull(skb, tcp_header_len);
+					tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+					NET_INC_STATS_BH(LINUX_MIB_TCPHPHITSTOUSER);
+					eaten = 1;
+				}
+			}
+			if (!eaten) {
+				if (tcp_checksum_complete_user(sk, skb))
+					goto csum_error;
+
+				/* Predicted packet is in window by definition.
+				 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
+				 * Hence, check seq<=rcv_wup reduces to:
+				 */
+				if (tcp_header_len ==
+				    (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
+				    tp->rcv_nxt == tp->rcv_wup)
+					tcp_store_ts_recent(tp);
+
+				tcp_rcv_rtt_measure_ts(tp, skb);
+
+				if ((int)skb->truesize > sk->sk_forward_alloc)
+					goto step5;
+
+				NET_INC_STATS_BH(LINUX_MIB_TCPHPHITS);
+
+				/* Bulk data transfer: receiver */
+				__skb_pull(skb,tcp_header_len);
+				__skb_queue_tail(&sk->sk_receive_queue, skb);
+				sk_stream_set_owner_r(skb, sk);
+				tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+			}
+
+			tcp_event_data_recv(sk, tp, skb);
+
+			if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
+				/* Well, only one small jumplet in fast path... */
+				tcp_ack(sk, skb, FLAG_DATA);
+				tcp_data_snd_check(sk);
+				if (!tcp_ack_scheduled(tp))
+					goto no_ack;
+			}
+
+			if (eaten) {
+				if (tcp_in_quickack_mode(tp)) {
+					tcp_send_ack(sk);
+				} else {
+					tcp_send_delayed_ack(sk);
+				}
+			} else {
+				__tcp_ack_snd_check(sk, 0);
+			}
+
+no_ack:
+			if (eaten)
+				__kfree_skb(skb);
+			else
+				sk->sk_data_ready(sk, 0);
+			return 0;
+		}
+	}
+
+slow_path:
+	if (len < (th->doff<<2) || tcp_checksum_complete_user(sk, skb))
+		goto csum_error;
+
+	/*
+	 * RFC1323: H1. Apply PAWS check first.
+	 */
+	if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
+	    tcp_paws_discard(tp, skb)) {
+		if (!th->rst) {
+			NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
+			tcp_send_dupack(sk, skb);
+			goto discard;
+		}
+		/* Resets are accepted even if PAWS failed.
+
+		   ts_recent update must be made after we are sure
+		   that the packet is in window.
+		 */
+	}
+
+	/*
+	 *	Standard slow path.
+	 */
+
+	if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
+		/* RFC793, page 37: "In all states except SYN-SENT, all reset
+		 * (RST) segments are validated by checking their SEQ-fields."
+		 * And page 69: "If an incoming segment is not acceptable,
+		 * an acknowledgment should be sent in reply (unless the RST bit
+		 * is set, if so drop the segment and return)".
+		 */
+		if (!th->rst)
+			tcp_send_dupack(sk, skb);
+		goto discard;
+	}
+
+	if(th->rst) {
+		tcp_reset(sk);
+		goto discard;
+	}
+
+	tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
+
+	if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
+		TCP_INC_STATS_BH(TCP_MIB_INERRS);
+		NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN);
+		tcp_reset(sk);
+		return 1;
+	}
+
+step5:
+	if(th->ack)
+		tcp_ack(sk, skb, FLAG_SLOWPATH);
+
+	tcp_rcv_rtt_measure_ts(tp, skb);
+
+	/* Process urgent data. */
+	tcp_urg(sk, skb, th);
+
+	/* step 7: process the segment text */
+	tcp_data_queue(sk, skb);
+
+	tcp_data_snd_check(sk);
+	tcp_ack_snd_check(sk);
+	return 0;
+
+csum_error:
+	TCP_INC_STATS_BH(TCP_MIB_INERRS);
+
+discard:
+	__kfree_skb(skb);
+	return 0;
+}
+
+static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
+					 struct tcphdr *th, unsigned len)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	int saved_clamp = tp->rx_opt.mss_clamp;
+
+	tcp_parse_options(skb, &tp->rx_opt, 0);
+
+	if (th->ack) {
+		/* rfc793:
+		 * "If the state is SYN-SENT then
+		 *    first check the ACK bit
+		 *      If the ACK bit is set
+		 *	  If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send
+		 *        a reset (unless the RST bit is set, if so drop
+		 *        the segment and return)"
+		 *
+		 *  We do not send data with SYN, so that RFC-correct
+		 *  test reduces to:
+		 */
+		if (TCP_SKB_CB(skb)->ack_seq != tp->snd_nxt)
+			goto reset_and_undo;
+
+		if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
+		    !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
+			     tcp_time_stamp)) {
+			NET_INC_STATS_BH(LINUX_MIB_PAWSACTIVEREJECTED);
+			goto reset_and_undo;
+		}
+
+		/* Now ACK is acceptable.
+		 *
+		 * "If the RST bit is set
+		 *    If the ACK was acceptable then signal the user "error:
+		 *    connection reset", drop the segment, enter CLOSED state,
+		 *    delete TCB, and return."
+		 */
+
+		if (th->rst) {
+			tcp_reset(sk);
+			goto discard;
+		}
+
+		/* rfc793:
+		 *   "fifth, if neither of the SYN or RST bits is set then
+		 *    drop the segment and return."
+		 *
+		 *    See note below!
+		 *                                        --ANK(990513)
+		 */
+		if (!th->syn)
+			goto discard_and_undo;
+
+		/* rfc793:
+		 *   "If the SYN bit is on ...
+		 *    are acceptable then ...
+		 *    (our SYN has been ACKed), change the connection
+		 *    state to ESTABLISHED..."
+		 */
+
+		TCP_ECN_rcv_synack(tp, th);
+		if (tp->ecn_flags&TCP_ECN_OK)
+			sock_set_flag(sk, SOCK_NO_LARGESEND);
+
+		tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
+		tcp_ack(sk, skb, FLAG_SLOWPATH);
+
+		/* Ok.. it's good. Set up sequence numbers and
+		 * move to established.
+		 */
+		tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
+		tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
+
+		/* RFC1323: The window in SYN & SYN/ACK segments is
+		 * never scaled.
+		 */
+		tp->snd_wnd = ntohs(th->window);
+		tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(skb)->seq);
+
+		if (!tp->rx_opt.wscale_ok) {
+			tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
+			tp->window_clamp = min(tp->window_clamp, 65535U);
+		}
+
+		if (tp->rx_opt.saw_tstamp) {
+			tp->rx_opt.tstamp_ok	   = 1;
+			tp->tcp_header_len =
+				sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
+			tp->advmss	    -= TCPOLEN_TSTAMP_ALIGNED;
+			tcp_store_ts_recent(tp);
+		} else {
+			tp->tcp_header_len = sizeof(struct tcphdr);
+		}
+
+		if (tp->rx_opt.sack_ok && sysctl_tcp_fack)
+			tp->rx_opt.sack_ok |= 2;
+
+		tcp_sync_mss(sk, tp->pmtu_cookie);
+		tcp_initialize_rcv_mss(sk);
+
+		/* Remember, tcp_poll() does not lock socket!
+		 * Change state from SYN-SENT only after copied_seq
+		 * is initialized. */
+		tp->copied_seq = tp->rcv_nxt;
+		mb();
+		tcp_set_state(sk, TCP_ESTABLISHED);
+
+		/* Make sure socket is routed, for correct metrics.  */
+		tp->af_specific->rebuild_header(sk);
+
+		tcp_init_metrics(sk);
+
+		/* Prevent spurious tcp_cwnd_restart() on first data
+		 * packet.
+		 */
+		tp->lsndtime = tcp_time_stamp;
+
+		tcp_init_buffer_space(sk);
+
+		if (sock_flag(sk, SOCK_KEEPOPEN))
+			tcp_reset_keepalive_timer(sk, keepalive_time_when(tp));
+
+		if (!tp->rx_opt.snd_wscale)
+			__tcp_fast_path_on(tp, tp->snd_wnd);
+		else
+			tp->pred_flags = 0;
+
+		if (!sock_flag(sk, SOCK_DEAD)) {
+			sk->sk_state_change(sk);
+			sk_wake_async(sk, 0, POLL_OUT);
+		}
+
+		if (sk->sk_write_pending || tp->defer_accept || tp->ack.pingpong) {
+			/* Save one ACK. Data will be ready after
+			 * several ticks, if write_pending is set.
+			 *
+			 * It may be deleted, but with this feature tcpdumps
+			 * look so _wonderfully_ clever, that I was not able
+			 * to stand against the temptation 8)     --ANK
+			 */
+			tcp_schedule_ack(tp);
+			tp->ack.lrcvtime = tcp_time_stamp;
+			tp->ack.ato	 = TCP_ATO_MIN;
+			tcp_incr_quickack(tp);
+			tcp_enter_quickack_mode(tp);
+			tcp_reset_xmit_timer(sk, TCP_TIME_DACK, TCP_DELACK_MAX);
+
+discard:
+			__kfree_skb(skb);
+			return 0;
+		} else {
+			tcp_send_ack(sk);
+		}
+		return -1;
+	}
+
+	/* No ACK in the segment */
+
+	if (th->rst) {
+		/* rfc793:
+		 * "If the RST bit is set
+		 *
+		 *      Otherwise (no ACK) drop the segment and return."
+		 */
+
+		goto discard_and_undo;
+	}
+
+	/* PAWS check. */
+	if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp && tcp_paws_check(&tp->rx_opt, 0))
+		goto discard_and_undo;
+
+	if (th->syn) {
+		/* We see SYN without ACK. It is attempt of
+		 * simultaneous connect with crossed SYNs.
+		 * Particularly, it can be connect to self.
+		 */
+		tcp_set_state(sk, TCP_SYN_RECV);
+
+		if (tp->rx_opt.saw_tstamp) {
+			tp->rx_opt.tstamp_ok = 1;
+			tcp_store_ts_recent(tp);
+			tp->tcp_header_len =
+				sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
+		} else {
+			tp->tcp_header_len = sizeof(struct tcphdr);
+		}
+
+		tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
+		tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
+
+		/* RFC1323: The window in SYN & SYN/ACK segments is
+		 * never scaled.
+		 */
+		tp->snd_wnd    = ntohs(th->window);
+		tp->snd_wl1    = TCP_SKB_CB(skb)->seq;
+		tp->max_window = tp->snd_wnd;
+
+		TCP_ECN_rcv_syn(tp, th);
+		if (tp->ecn_flags&TCP_ECN_OK)
+			sock_set_flag(sk, SOCK_NO_LARGESEND);
+
+		tcp_sync_mss(sk, tp->pmtu_cookie);
+		tcp_initialize_rcv_mss(sk);
+
+
+		tcp_send_synack(sk);
+#if 0
+		/* Note, we could accept data and URG from this segment.
+		 * There are no obstacles to make this.
+		 *
+		 * However, if we ignore data in ACKless segments sometimes,
+		 * we have no reasons to accept it sometimes.
+		 * Also, seems the code doing it in step6 of tcp_rcv_state_process
+		 * is not flawless. So, discard packet for sanity.
+		 * Uncomment this return to process the data.
+		 */
+		return -1;
+#else
+		goto discard;
+#endif
+	}
+	/* "fifth, if neither of the SYN or RST bits is set then
+	 * drop the segment and return."
+	 */
+
+discard_and_undo:
+	tcp_clear_options(&tp->rx_opt);
+	tp->rx_opt.mss_clamp = saved_clamp;
+	goto discard;
+
+reset_and_undo:
+	tcp_clear_options(&tp->rx_opt);
+	tp->rx_opt.mss_clamp = saved_clamp;
+	return 1;
+}
+
+
+/*
+ *	This function implements the receiving procedure of RFC 793 for
+ *	all states except ESTABLISHED and TIME_WAIT. 
+ *	It's called from both tcp_v4_rcv and tcp_v6_rcv and should be
+ *	address independent.
+ */
+	
+int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
+			  struct tcphdr *th, unsigned len)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	int queued = 0;
+
+	tp->rx_opt.saw_tstamp = 0;
+
+	switch (sk->sk_state) {
+	case TCP_CLOSE:
+		goto discard;
+
+	case TCP_LISTEN:
+		if(th->ack)
+			return 1;
+
+		if(th->rst)
+			goto discard;
+
+		if(th->syn) {
+			if(tp->af_specific->conn_request(sk, skb) < 0)
+				return 1;
+
+			init_westwood(sk);
+			init_bictcp(tp);
+
+			/* Now we have several options: In theory there is 
+			 * nothing else in the frame. KA9Q has an option to 
+			 * send data with the syn, BSD accepts data with the
+			 * syn up to the [to be] advertised window and 
+			 * Solaris 2.1 gives you a protocol error. For now 
+			 * we just ignore it, that fits the spec precisely 
+			 * and avoids incompatibilities. It would be nice in
+			 * future to drop through and process the data.
+			 *
+			 * Now that TTCP is starting to be used we ought to 
+			 * queue this data.
+			 * But, this leaves one open to an easy denial of
+		 	 * service attack, and SYN cookies can't defend
+			 * against this problem. So, we drop the data
+			 * in the interest of security over speed.
+			 */
+			goto discard;
+		}
+		goto discard;
+
+	case TCP_SYN_SENT:
+		init_westwood(sk);
+		init_bictcp(tp);
+
+		queued = tcp_rcv_synsent_state_process(sk, skb, th, len);
+		if (queued >= 0)
+			return queued;
+
+		/* Do step6 onward by hand. */
+		tcp_urg(sk, skb, th);
+		__kfree_skb(skb);
+		tcp_data_snd_check(sk);
+		return 0;
+	}
+
+	if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
+	    tcp_paws_discard(tp, skb)) {
+		if (!th->rst) {
+			NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
+			tcp_send_dupack(sk, skb);
+			goto discard;
+		}
+		/* Reset is accepted even if it did not pass PAWS. */
+	}
+
+	/* step 1: check sequence number */
+	if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
+		if (!th->rst)
+			tcp_send_dupack(sk, skb);
+		goto discard;
+	}
+
+	/* step 2: check RST bit */
+	if(th->rst) {
+		tcp_reset(sk);
+		goto discard;
+	}
+
+	tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
+
+	/* step 3: check security and precedence [ignored] */
+
+	/*	step 4:
+	 *
+	 *	Check for a SYN in window.
+	 */
+	if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
+		NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN);
+		tcp_reset(sk);
+		return 1;
+	}
+
+	/* step 5: check the ACK field */
+	if (th->ack) {
+		int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH);
+
+		switch(sk->sk_state) {
+		case TCP_SYN_RECV:
+			if (acceptable) {
+				tp->copied_seq = tp->rcv_nxt;
+				mb();
+				tcp_set_state(sk, TCP_ESTABLISHED);
+				sk->sk_state_change(sk);
+
+				/* Note, that this wakeup is only for marginal
+				 * crossed SYN case. Passively open sockets
+				 * are not waked up, because sk->sk_sleep ==
+				 * NULL and sk->sk_socket == NULL.
+				 */
+				if (sk->sk_socket) {
+					sk_wake_async(sk,0,POLL_OUT);
+				}
+
+				tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
+				tp->snd_wnd = ntohs(th->window) <<
+					      tp->rx_opt.snd_wscale;
+				tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq,
+					    TCP_SKB_CB(skb)->seq);
+
+				/* tcp_ack considers this ACK as duplicate
+				 * and does not calculate rtt.
+				 * Fix it at least with timestamps.
+				 */
+				if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
+				    !tp->srtt)
+					tcp_ack_saw_tstamp(tp, 0);
+
+				if (tp->rx_opt.tstamp_ok)
+					tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
+
+				/* Make sure socket is routed, for
+				 * correct metrics.
+				 */
+				tp->af_specific->rebuild_header(sk);
+
+				tcp_init_metrics(sk);
+
+				/* Prevent spurious tcp_cwnd_restart() on
+				 * first data packet.
+				 */
+				tp->lsndtime = tcp_time_stamp;
+
+				tcp_initialize_rcv_mss(sk);
+				tcp_init_buffer_space(sk);
+				tcp_fast_path_on(tp);
+			} else {
+				return 1;
+			}
+			break;
+
+		case TCP_FIN_WAIT1:
+			if (tp->snd_una == tp->write_seq) {
+				tcp_set_state(sk, TCP_FIN_WAIT2);
+				sk->sk_shutdown |= SEND_SHUTDOWN;
+				dst_confirm(sk->sk_dst_cache);
+
+				if (!sock_flag(sk, SOCK_DEAD))
+					/* Wake up lingering close() */
+					sk->sk_state_change(sk);
+				else {
+					int tmo;
+
+					if (tp->linger2 < 0 ||
+					    (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
+					     after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
+						tcp_done(sk);
+						NET_INC_STATS_BH(LINUX_MIB_TCPABORTONDATA);
+						return 1;
+					}
+
+					tmo = tcp_fin_time(tp);
+					if (tmo > TCP_TIMEWAIT_LEN) {
+						tcp_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
+					} else if (th->fin || sock_owned_by_user(sk)) {
+						/* Bad case. We could lose such FIN otherwise.
+						 * It is not a big problem, but it looks confusing
+						 * and not so rare event. We still can lose it now,
+						 * if it spins in bh_lock_sock(), but it is really
+						 * marginal case.
+						 */
+						tcp_reset_keepalive_timer(sk, tmo);
+					} else {
+						tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
+						goto discard;
+					}
+				}
+			}
+			break;
+
+		case TCP_CLOSING:
+			if (tp->snd_una == tp->write_seq) {
+				tcp_time_wait(sk, TCP_TIME_WAIT, 0);
+				goto discard;
+			}
+			break;
+
+		case TCP_LAST_ACK:
+			if (tp->snd_una == tp->write_seq) {
+				tcp_update_metrics(sk);
+				tcp_done(sk);
+				goto discard;
+			}
+			break;
+		}
+	} else
+		goto discard;
+
+	/* step 6: check the URG bit */
+	tcp_urg(sk, skb, th);
+
+	/* step 7: process the segment text */
+	switch (sk->sk_state) {
+	case TCP_CLOSE_WAIT:
+	case TCP_CLOSING:
+	case TCP_LAST_ACK:
+		if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
+			break;
+	case TCP_FIN_WAIT1:
+	case TCP_FIN_WAIT2:
+		/* RFC 793 says to queue data in these states,
+		 * RFC 1122 says we MUST send a reset. 
+		 * BSD 4.4 also does reset.
+		 */
+		if (sk->sk_shutdown & RCV_SHUTDOWN) {
+			if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
+			    after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
+				NET_INC_STATS_BH(LINUX_MIB_TCPABORTONDATA);
+				tcp_reset(sk);
+				return 1;
+			}
+		}
+		/* Fall through */
+	case TCP_ESTABLISHED: 
+		tcp_data_queue(sk, skb);
+		queued = 1;
+		break;
+	}
+
+	/* tcp_data could move socket to TIME-WAIT */
+	if (sk->sk_state != TCP_CLOSE) {
+		tcp_data_snd_check(sk);
+		tcp_ack_snd_check(sk);
+	}
+
+	if (!queued) { 
+discard:
+		__kfree_skb(skb);
+	}
+	return 0;
+}
+
+EXPORT_SYMBOL(sysctl_tcp_ecn);
+EXPORT_SYMBOL(sysctl_tcp_reordering);
+EXPORT_SYMBOL(tcp_parse_options);
+EXPORT_SYMBOL(tcp_rcv_established);
+EXPORT_SYMBOL(tcp_rcv_state_process);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
new file mode 100644
index 00000000000..3ac6659869c
--- /dev/null
+++ b/net/ipv4/tcp_ipv4.c
@@ -0,0 +1,2663 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Implementation of the Transmission Control Protocol(TCP).
+ *
+ * Version:	$Id: tcp_ipv4.c,v 1.240 2002/02/01 22:01:04 davem Exp $
+ *
+ *		IPv4 specific functions
+ *
+ *
+ *		code split from:
+ *		linux/ipv4/tcp.c
+ *		linux/ipv4/tcp_input.c
+ *		linux/ipv4/tcp_output.c
+ *
+ *		See tcp.c for author information
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+/*
+ * Changes:
+ *		David S. Miller	:	New socket lookup architecture.
+ *					This code is dedicated to John Dyson.
+ *		David S. Miller :	Change semantics of established hash,
+ *					half is devoted to TIME_WAIT sockets
+ *					and the rest go in the other half.
+ *		Andi Kleen :		Add support for syncookies and fixed
+ *					some bugs: ip options weren't passed to
+ *					the TCP layer, missed a check for an
+ *					ACK bit.
+ *		Andi Kleen :		Implemented fast path mtu discovery.
+ *	     				Fixed many serious bugs in the
+ *					open_request handling and moved
+ *					most of it into the af independent code.
+ *					Added tail drop and some other bugfixes.
+ *					Added new listen sematics.
+ *		Mike McLagan	:	Routing by source
+ *	Juan Jose Ciarlante:		ip_dynaddr bits
+ *		Andi Kleen:		various fixes.
+ *	Vitaly E. Lavrov	:	Transparent proxy revived after year
+ *					coma.
+ *	Andi Kleen		:	Fix new listen.
+ *	Andi Kleen		:	Fix accept error reporting.
+ *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
+ *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
+ *					a single port at the same time.
+ */
+
+#include <linux/config.h>
+
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/cache.h>
+#include <linux/jhash.h>
+#include <linux/init.h>
+#include <linux/times.h>
+
+#include <net/icmp.h>
+#include <net/tcp.h>
+#include <net/ipv6.h>
+#include <net/inet_common.h>
+#include <net/xfrm.h>
+
+#include <linux/inet.h>
+#include <linux/ipv6.h>
+#include <linux/stddef.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+extern int sysctl_ip_dynaddr;
+int sysctl_tcp_tw_reuse;
+int sysctl_tcp_low_latency;
+
+/* Check TCP sequence numbers in ICMP packets. */
+#define ICMP_MIN_LENGTH 8
+
+/* Socket used for sending RSTs */
+static struct socket *tcp_socket;
+
+void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
+		       struct sk_buff *skb);
+
+struct tcp_hashinfo __cacheline_aligned tcp_hashinfo = {
+	.__tcp_lhash_lock	=	RW_LOCK_UNLOCKED,
+	.__tcp_lhash_users	=	ATOMIC_INIT(0),
+	.__tcp_lhash_wait
+	  = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.__tcp_lhash_wait),
+	.__tcp_portalloc_lock	=	SPIN_LOCK_UNLOCKED
+};
+
+/*
+ * This array holds the first and last local port number.
+ * For high-usage systems, use sysctl to change this to
+ * 32768-61000
+ */
+int sysctl_local_port_range[2] = { 1024, 4999 };
+int tcp_port_rover = 1024 - 1;
+
+static __inline__ int tcp_hashfn(__u32 laddr, __u16 lport,
+				 __u32 faddr, __u16 fport)
+{
+	int h = (laddr ^ lport) ^ (faddr ^ fport);
+	h ^= h >> 16;
+	h ^= h >> 8;
+	return h & (tcp_ehash_size - 1);
+}
+
+static __inline__ int tcp_sk_hashfn(struct sock *sk)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	__u32 laddr = inet->rcv_saddr;
+	__u16 lport = inet->num;
+	__u32 faddr = inet->daddr;
+	__u16 fport = inet->dport;
+
+	return tcp_hashfn(laddr, lport, faddr, fport);
+}
+
+/* Allocate and initialize a new TCP local port bind bucket.
+ * The bindhash mutex for snum's hash chain must be held here.
+ */
+struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head,
+					  unsigned short snum)
+{
+	struct tcp_bind_bucket *tb = kmem_cache_alloc(tcp_bucket_cachep,
+						      SLAB_ATOMIC);
+	if (tb) {
+		tb->port = snum;
+		tb->fastreuse = 0;
+		INIT_HLIST_HEAD(&tb->owners);
+		hlist_add_head(&tb->node, &head->chain);
+	}
+	return tb;
+}
+
+/* Caller must hold hashbucket lock for this tb with local BH disabled */
+void tcp_bucket_destroy(struct tcp_bind_bucket *tb)
+{
+	if (hlist_empty(&tb->owners)) {
+		__hlist_del(&tb->node);
+		kmem_cache_free(tcp_bucket_cachep, tb);
+	}
+}
+
+/* Caller must disable local BH processing. */
+static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child)
+{
+	struct tcp_bind_hashbucket *head =
+				&tcp_bhash[tcp_bhashfn(inet_sk(child)->num)];
+	struct tcp_bind_bucket *tb;
+
+	spin_lock(&head->lock);
+	tb = tcp_sk(sk)->bind_hash;
+	sk_add_bind_node(child, &tb->owners);
+	tcp_sk(child)->bind_hash = tb;
+	spin_unlock(&head->lock);
+}
+
+inline void tcp_inherit_port(struct sock *sk, struct sock *child)
+{
+	local_bh_disable();
+	__tcp_inherit_port(sk, child);
+	local_bh_enable();
+}
+
+void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb,
+		   unsigned short snum)
+{
+	inet_sk(sk)->num = snum;
+	sk_add_bind_node(sk, &tb->owners);
+	tcp_sk(sk)->bind_hash = tb;
+}
+
+static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb)
+{
+	const u32 sk_rcv_saddr = tcp_v4_rcv_saddr(sk);
+	struct sock *sk2;
+	struct hlist_node *node;
+	int reuse = sk->sk_reuse;
+
+	sk_for_each_bound(sk2, node, &tb->owners) {
+		if (sk != sk2 &&
+		    !tcp_v6_ipv6only(sk2) &&
+		    (!sk->sk_bound_dev_if ||
+		     !sk2->sk_bound_dev_if ||
+		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
+			if (!reuse || !sk2->sk_reuse ||
+			    sk2->sk_state == TCP_LISTEN) {
+				const u32 sk2_rcv_saddr = tcp_v4_rcv_saddr(sk2);
+				if (!sk2_rcv_saddr || !sk_rcv_saddr ||
+				    sk2_rcv_saddr == sk_rcv_saddr)
+					break;
+			}
+		}
+	}
+	return node != NULL;
+}
+
+/* Obtain a reference to a local port for the given sock,
+ * if snum is zero it means select any available local port.
+ */
+static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
+{
+	struct tcp_bind_hashbucket *head;
+	struct hlist_node *node;
+	struct tcp_bind_bucket *tb;
+	int ret;
+
+	local_bh_disable();
+	if (!snum) {
+		int low = sysctl_local_port_range[0];
+		int high = sysctl_local_port_range[1];
+		int remaining = (high - low) + 1;
+		int rover;
+
+		spin_lock(&tcp_portalloc_lock);
+		rover = tcp_port_rover;
+		do {
+			rover++;
+			if (rover < low || rover > high)
+				rover = low;
+			head = &tcp_bhash[tcp_bhashfn(rover)];
+			spin_lock(&head->lock);
+			tb_for_each(tb, node, &head->chain)
+				if (tb->port == rover)
+					goto next;
+			break;
+		next:
+			spin_unlock(&head->lock);
+		} while (--remaining > 0);
+		tcp_port_rover = rover;
+		spin_unlock(&tcp_portalloc_lock);
+
+		/* Exhausted local port range during search? */
+		ret = 1;
+		if (remaining <= 0)
+			goto fail;
+
+		/* OK, here is the one we will use.  HEAD is
+		 * non-NULL and we hold it's mutex.
+		 */
+		snum = rover;
+	} else {
+		head = &tcp_bhash[tcp_bhashfn(snum)];
+		spin_lock(&head->lock);
+		tb_for_each(tb, node, &head->chain)
+			if (tb->port == snum)
+				goto tb_found;
+	}
+	tb = NULL;
+	goto tb_not_found;
+tb_found:
+	if (!hlist_empty(&tb->owners)) {
+		if (sk->sk_reuse > 1)
+			goto success;
+		if (tb->fastreuse > 0 &&
+		    sk->sk_reuse && sk->sk_state != TCP_LISTEN) {
+			goto success;
+		} else {
+			ret = 1;
+			if (tcp_bind_conflict(sk, tb))
+				goto fail_unlock;
+		}
+	}
+tb_not_found:
+	ret = 1;
+	if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
+		goto fail_unlock;
+	if (hlist_empty(&tb->owners)) {
+		if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
+			tb->fastreuse = 1;
+		else
+			tb->fastreuse = 0;
+	} else if (tb->fastreuse &&
+		   (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
+		tb->fastreuse = 0;
+success:
+	if (!tcp_sk(sk)->bind_hash)
+		tcp_bind_hash(sk, tb, snum);
+	BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
+ 	ret = 0;
+
+fail_unlock:
+	spin_unlock(&head->lock);
+fail:
+	local_bh_enable();
+	return ret;
+}
+
+/* Get rid of any references to a local port held by the
+ * given sock.
+ */
+static void __tcp_put_port(struct sock *sk)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(inet->num)];
+	struct tcp_bind_bucket *tb;
+
+	spin_lock(&head->lock);
+	tb = tcp_sk(sk)->bind_hash;
+	__sk_del_bind_node(sk);
+	tcp_sk(sk)->bind_hash = NULL;
+	inet->num = 0;
+	tcp_bucket_destroy(tb);
+	spin_unlock(&head->lock);
+}
+
+void tcp_put_port(struct sock *sk)
+{
+	local_bh_disable();
+	__tcp_put_port(sk);
+	local_bh_enable();
+}
+
+/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP.
+ * Look, when several writers sleep and reader wakes them up, all but one
+ * immediately hit write lock and grab all the cpus. Exclusive sleep solves
+ * this, _but_ remember, it adds useless work on UP machines (wake up each
+ * exclusive lock release). It should be ifdefed really.
+ */
+
+void tcp_listen_wlock(void)
+{
+	write_lock(&tcp_lhash_lock);
+
+	if (atomic_read(&tcp_lhash_users)) {
+		DEFINE_WAIT(wait);
+
+		for (;;) {
+			prepare_to_wait_exclusive(&tcp_lhash_wait,
+						&wait, TASK_UNINTERRUPTIBLE);
+			if (!atomic_read(&tcp_lhash_users))
+				break;
+			write_unlock_bh(&tcp_lhash_lock);
+			schedule();
+			write_lock_bh(&tcp_lhash_lock);
+		}
+
+		finish_wait(&tcp_lhash_wait, &wait);
+	}
+}
+
+static __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible)
+{
+	struct hlist_head *list;
+	rwlock_t *lock;
+
+	BUG_TRAP(sk_unhashed(sk));
+	if (listen_possible && sk->sk_state == TCP_LISTEN) {
+		list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
+		lock = &tcp_lhash_lock;
+		tcp_listen_wlock();
+	} else {
+		list = &tcp_ehash[(sk->sk_hashent = tcp_sk_hashfn(sk))].chain;
+		lock = &tcp_ehash[sk->sk_hashent].lock;
+		write_lock(lock);
+	}
+	__sk_add_node(sk, list);
+	sock_prot_inc_use(sk->sk_prot);
+	write_unlock(lock);
+	if (listen_possible && sk->sk_state == TCP_LISTEN)
+		wake_up(&tcp_lhash_wait);
+}
+
+static void tcp_v4_hash(struct sock *sk)
+{
+	if (sk->sk_state != TCP_CLOSE) {
+		local_bh_disable();
+		__tcp_v4_hash(sk, 1);
+		local_bh_enable();
+	}
+}
+
+void tcp_unhash(struct sock *sk)
+{
+	rwlock_t *lock;
+
+	if (sk_unhashed(sk))
+		goto ende;
+
+	if (sk->sk_state == TCP_LISTEN) {
+		local_bh_disable();
+		tcp_listen_wlock();
+		lock = &tcp_lhash_lock;
+	} else {
+		struct tcp_ehash_bucket *head = &tcp_ehash[sk->sk_hashent];
+		lock = &head->lock;
+		write_lock_bh(&head->lock);
+	}
+
+	if (__sk_del_node_init(sk))
+		sock_prot_dec_use(sk->sk_prot);
+	write_unlock_bh(lock);
+
+ ende:
+	if (sk->sk_state == TCP_LISTEN)
+		wake_up(&tcp_lhash_wait);
+}
+
+/* Don't inline this cruft.  Here are some nice properties to
+ * exploit here.  The BSD API does not allow a listening TCP
+ * to specify the remote port nor the remote address for the
+ * connection.  So always assume those are both wildcarded
+ * during the search since they can never be otherwise.
+ */
+static struct sock *__tcp_v4_lookup_listener(struct hlist_head *head, u32 daddr,
+					     unsigned short hnum, int dif)
+{
+	struct sock *result = NULL, *sk;
+	struct hlist_node *node;
+	int score, hiscore;
+
+	hiscore=-1;
+	sk_for_each(sk, node, head) {
+		struct inet_sock *inet = inet_sk(sk);
+
+		if (inet->num == hnum && !ipv6_only_sock(sk)) {
+			__u32 rcv_saddr = inet->rcv_saddr;
+
+			score = (sk->sk_family == PF_INET ? 1 : 0);
+			if (rcv_saddr) {
+				if (rcv_saddr != daddr)
+					continue;
+				score+=2;
+			}
+			if (sk->sk_bound_dev_if) {
+				if (sk->sk_bound_dev_if != dif)
+					continue;
+				score+=2;
+			}
+			if (score == 5)
+				return sk;
+			if (score > hiscore) {
+				hiscore = score;
+				result = sk;
+			}
+		}
+	}
+	return result;
+}
+
+/* Optimize the common listener case. */
+static inline struct sock *tcp_v4_lookup_listener(u32 daddr,
+		unsigned short hnum, int dif)
+{
+	struct sock *sk = NULL;
+	struct hlist_head *head;
+
+	read_lock(&tcp_lhash_lock);
+	head = &tcp_listening_hash[tcp_lhashfn(hnum)];
+	if (!hlist_empty(head)) {
+		struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
+
+		if (inet->num == hnum && !sk->sk_node.next &&
+		    (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
+		    (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
+		    !sk->sk_bound_dev_if)
+			goto sherry_cache;
+		sk = __tcp_v4_lookup_listener(head, daddr, hnum, dif);
+	}
+	if (sk) {
+sherry_cache:
+		sock_hold(sk);
+	}
+	read_unlock(&tcp_lhash_lock);
+	return sk;
+}
+
+/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
+ * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
+ *
+ * Local BH must be disabled here.
+ */
+
+static inline struct sock *__tcp_v4_lookup_established(u32 saddr, u16 sport,
+						       u32 daddr, u16 hnum,
+						       int dif)
+{
+	struct tcp_ehash_bucket *head;
+	TCP_V4_ADDR_COOKIE(acookie, saddr, daddr)
+	__u32 ports = TCP_COMBINED_PORTS(sport, hnum);
+	struct sock *sk;
+	struct hlist_node *node;
+	/* Optimize here for direct hit, only listening connections can
+	 * have wildcards anyways.
+	 */
+	int hash = tcp_hashfn(daddr, hnum, saddr, sport);
+	head = &tcp_ehash[hash];
+	read_lock(&head->lock);
+	sk_for_each(sk, node, &head->chain) {
+		if (TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif))
+			goto hit; /* You sunk my battleship! */
+	}
+
+	/* Must check for a TIME_WAIT'er before going to listener hash. */
+	sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
+		if (TCP_IPV4_TW_MATCH(sk, acookie, saddr, daddr, ports, dif))
+			goto hit;
+	}
+	sk = NULL;
+out:
+	read_unlock(&head->lock);
+	return sk;
+hit:
+	sock_hold(sk);
+	goto out;
+}
+
+static inline struct sock *__tcp_v4_lookup(u32 saddr, u16 sport,
+					   u32 daddr, u16 hnum, int dif)
+{
+	struct sock *sk = __tcp_v4_lookup_established(saddr, sport,
+						      daddr, hnum, dif);
+
+	return sk ? : tcp_v4_lookup_listener(daddr, hnum, dif);
+}
+
+inline struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr,
+				  u16 dport, int dif)
+{
+	struct sock *sk;
+
+	local_bh_disable();
+	sk = __tcp_v4_lookup(saddr, sport, daddr, ntohs(dport), dif);
+	local_bh_enable();
+
+	return sk;
+}
+
+EXPORT_SYMBOL_GPL(tcp_v4_lookup);
+
+static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
+{
+	return secure_tcp_sequence_number(skb->nh.iph->daddr,
+					  skb->nh.iph->saddr,
+					  skb->h.th->dest,
+					  skb->h.th->source);
+}
+
+/* called with local bh disabled */
+static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
+				      struct tcp_tw_bucket **twp)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	u32 daddr = inet->rcv_saddr;
+	u32 saddr = inet->daddr;
+	int dif = sk->sk_bound_dev_if;
+	TCP_V4_ADDR_COOKIE(acookie, saddr, daddr)
+	__u32 ports = TCP_COMBINED_PORTS(inet->dport, lport);
+	int hash = tcp_hashfn(daddr, lport, saddr, inet->dport);
+	struct tcp_ehash_bucket *head = &tcp_ehash[hash];
+	struct sock *sk2;
+	struct hlist_node *node;
+	struct tcp_tw_bucket *tw;
+
+	write_lock(&head->lock);
+
+	/* Check TIME-WAIT sockets first. */
+	sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
+		tw = (struct tcp_tw_bucket *)sk2;
+
+		if (TCP_IPV4_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) {
+			struct tcp_sock *tp = tcp_sk(sk);
+
+			/* With PAWS, it is safe from the viewpoint
+			   of data integrity. Even without PAWS it
+			   is safe provided sequence spaces do not
+			   overlap i.e. at data rates <= 80Mbit/sec.
+
+			   Actually, the idea is close to VJ's one,
+			   only timestamp cache is held not per host,
+			   but per port pair and TW bucket is used
+			   as state holder.
+
+			   If TW bucket has been already destroyed we
+			   fall back to VJ's scheme and use initial
+			   timestamp retrieved from peer table.
+			 */
+			if (tw->tw_ts_recent_stamp &&
+			    (!twp || (sysctl_tcp_tw_reuse &&
+				      xtime.tv_sec -
+				      tw->tw_ts_recent_stamp > 1))) {
+				if ((tp->write_seq =
+						tw->tw_snd_nxt + 65535 + 2) == 0)
+					tp->write_seq = 1;
+				tp->rx_opt.ts_recent	   = tw->tw_ts_recent;
+				tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
+				sock_hold(sk2);
+				goto unique;
+			} else
+				goto not_unique;
+		}
+	}
+	tw = NULL;
+
+	/* And established part... */
+	sk_for_each(sk2, node, &head->chain) {
+		if (TCP_IPV4_MATCH(sk2, acookie, saddr, daddr, ports, dif))
+			goto not_unique;
+	}
+
+unique:
+	/* Must record num and sport now. Otherwise we will see
+	 * in hash table socket with a funny identity. */
+	inet->num = lport;
+	inet->sport = htons(lport);
+	sk->sk_hashent = hash;
+	BUG_TRAP(sk_unhashed(sk));
+	__sk_add_node(sk, &head->chain);
+	sock_prot_inc_use(sk->sk_prot);
+	write_unlock(&head->lock);
+
+	if (twp) {
+		*twp = tw;
+		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+	} else if (tw) {
+		/* Silly. Should hash-dance instead... */
+		tcp_tw_deschedule(tw);
+		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+
+		tcp_tw_put(tw);
+	}
+
+	return 0;
+
+not_unique:
+	write_unlock(&head->lock);
+	return -EADDRNOTAVAIL;
+}
+
+static inline u32 connect_port_offset(const struct sock *sk)
+{
+	const struct inet_sock *inet = inet_sk(sk);
+
+	return secure_tcp_port_ephemeral(inet->rcv_saddr, inet->daddr, 
+					 inet->dport);
+}
+
+/*
+ * Bind a port for a connect operation and hash it.
+ */
+static inline int tcp_v4_hash_connect(struct sock *sk)
+{
+	unsigned short snum = inet_sk(sk)->num;
+ 	struct tcp_bind_hashbucket *head;
+ 	struct tcp_bind_bucket *tb;
+	int ret;
+
+ 	if (!snum) {
+ 		int low = sysctl_local_port_range[0];
+ 		int high = sysctl_local_port_range[1];
+		int range = high - low;
+ 		int i;
+		int port;
+		static u32 hint;
+		u32 offset = hint + connect_port_offset(sk);
+		struct hlist_node *node;
+ 		struct tcp_tw_bucket *tw = NULL;
+
+ 		local_bh_disable();
+		for (i = 1; i <= range; i++) {
+			port = low + (i + offset) % range;
+ 			head = &tcp_bhash[tcp_bhashfn(port)];
+ 			spin_lock(&head->lock);
+
+ 			/* Does not bother with rcv_saddr checks,
+ 			 * because the established check is already
+ 			 * unique enough.
+ 			 */
+			tb_for_each(tb, node, &head->chain) {
+ 				if (tb->port == port) {
+ 					BUG_TRAP(!hlist_empty(&tb->owners));
+ 					if (tb->fastreuse >= 0)
+ 						goto next_port;
+ 					if (!__tcp_v4_check_established(sk,
+									port,
+									&tw))
+ 						goto ok;
+ 					goto next_port;
+ 				}
+ 			}
+
+ 			tb = tcp_bucket_create(head, port);
+ 			if (!tb) {
+ 				spin_unlock(&head->lock);
+ 				break;
+ 			}
+ 			tb->fastreuse = -1;
+ 			goto ok;
+
+ 		next_port:
+ 			spin_unlock(&head->lock);
+ 		}
+ 		local_bh_enable();
+
+ 		return -EADDRNOTAVAIL;
+
+ok:
+		hint += i;
+
+ 		/* Head lock still held and bh's disabled */
+ 		tcp_bind_hash(sk, tb, port);
+		if (sk_unhashed(sk)) {
+ 			inet_sk(sk)->sport = htons(port);
+ 			__tcp_v4_hash(sk, 0);
+ 		}
+ 		spin_unlock(&head->lock);
+
+ 		if (tw) {
+ 			tcp_tw_deschedule(tw);
+ 			tcp_tw_put(tw);
+ 		}
+
+		ret = 0;
+		goto out;
+ 	}
+
+ 	head  = &tcp_bhash[tcp_bhashfn(snum)];
+ 	tb  = tcp_sk(sk)->bind_hash;
+	spin_lock_bh(&head->lock);
+	if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
+		__tcp_v4_hash(sk, 0);
+		spin_unlock_bh(&head->lock);
+		return 0;
+	} else {
+		spin_unlock(&head->lock);
+		/* No definite answer... Walk to established hash table */
+		ret = __tcp_v4_check_established(sk, snum, NULL);
+out:
+		local_bh_enable();
+		return ret;
+	}
+}
+
+/* This will initiate an outgoing connection. */
+int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
+	struct rtable *rt;
+	u32 daddr, nexthop;
+	int tmp;
+	int err;
+
+	if (addr_len < sizeof(struct sockaddr_in))
+		return -EINVAL;
+
+	if (usin->sin_family != AF_INET)
+		return -EAFNOSUPPORT;
+
+	nexthop = daddr = usin->sin_addr.s_addr;
+	if (inet->opt && inet->opt->srr) {
+		if (!daddr)
+			return -EINVAL;
+		nexthop = inet->opt->faddr;
+	}
+
+	tmp = ip_route_connect(&rt, nexthop, inet->saddr,
+			       RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
+			       IPPROTO_TCP,
+			       inet->sport, usin->sin_port, sk);
+	if (tmp < 0)
+		return tmp;
+
+	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
+		ip_rt_put(rt);
+		return -ENETUNREACH;
+	}
+
+	if (!inet->opt || !inet->opt->srr)
+		daddr = rt->rt_dst;
+
+	if (!inet->saddr)
+		inet->saddr = rt->rt_src;
+	inet->rcv_saddr = inet->saddr;
+
+	if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
+		/* Reset inherited state */
+		tp->rx_opt.ts_recent	   = 0;
+		tp->rx_opt.ts_recent_stamp = 0;
+		tp->write_seq		   = 0;
+	}
+
+	if (sysctl_tcp_tw_recycle &&
+	    !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
+		struct inet_peer *peer = rt_get_peer(rt);
+
+		/* VJ's idea. We save last timestamp seen from
+		 * the destination in peer table, when entering state TIME-WAIT
+		 * and initialize rx_opt.ts_recent from it, when trying new connection.
+		 */
+
+		if (peer && peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) {
+			tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
+			tp->rx_opt.ts_recent = peer->tcp_ts;
+		}
+	}
+
+	inet->dport = usin->sin_port;
+	inet->daddr = daddr;
+
+	tp->ext_header_len = 0;
+	if (inet->opt)
+		tp->ext_header_len = inet->opt->optlen;
+
+	tp->rx_opt.mss_clamp = 536;
+
+	/* Socket identity is still unknown (sport may be zero).
+	 * However we set state to SYN-SENT and not releasing socket
+	 * lock select source port, enter ourselves into the hash tables and
+	 * complete initialization after this.
+	 */
+	tcp_set_state(sk, TCP_SYN_SENT);
+	err = tcp_v4_hash_connect(sk);
+	if (err)
+		goto failure;
+
+	err = ip_route_newports(&rt, inet->sport, inet->dport, sk);
+	if (err)
+		goto failure;
+
+	/* OK, now commit destination to socket.  */
+	__sk_dst_set(sk, &rt->u.dst);
+	tcp_v4_setup_caps(sk, &rt->u.dst);
+
+	if (!tp->write_seq)
+		tp->write_seq = secure_tcp_sequence_number(inet->saddr,
+							   inet->daddr,
+							   inet->sport,
+							   usin->sin_port);
+
+	inet->id = tp->write_seq ^ jiffies;
+
+	err = tcp_connect(sk);
+	rt = NULL;
+	if (err)
+		goto failure;
+
+	return 0;
+
+failure:
+	/* This unhashes the socket and releases the local port, if necessary. */
+	tcp_set_state(sk, TCP_CLOSE);
+	ip_rt_put(rt);
+	sk->sk_route_caps = 0;
+	inet->dport = 0;
+	return err;
+}
+
+static __inline__ int tcp_v4_iif(struct sk_buff *skb)
+{
+	return ((struct rtable *)skb->dst)->rt_iif;
+}
+
+static __inline__ u32 tcp_v4_synq_hash(u32 raddr, u16 rport, u32 rnd)
+{
+	return (jhash_2words(raddr, (u32) rport, rnd) & (TCP_SYNQ_HSIZE - 1));
+}
+
+static struct open_request *tcp_v4_search_req(struct tcp_sock *tp,
+					      struct open_request ***prevp,
+					      __u16 rport,
+					      __u32 raddr, __u32 laddr)
+{
+	struct tcp_listen_opt *lopt = tp->listen_opt;
+	struct open_request *req, **prev;
+
+	for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport, lopt->hash_rnd)];
+	     (req = *prev) != NULL;
+	     prev = &req->dl_next) {
+		if (req->rmt_port == rport &&
+		    req->af.v4_req.rmt_addr == raddr &&
+		    req->af.v4_req.loc_addr == laddr &&
+		    TCP_INET_FAMILY(req->class->family)) {
+			BUG_TRAP(!req->sk);
+			*prevp = prev;
+			break;
+		}
+	}
+
+	return req;
+}
+
+static void tcp_v4_synq_add(struct sock *sk, struct open_request *req)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct tcp_listen_opt *lopt = tp->listen_opt;
+	u32 h = tcp_v4_synq_hash(req->af.v4_req.rmt_addr, req->rmt_port, lopt->hash_rnd);
+
+	req->expires = jiffies + TCP_TIMEOUT_INIT;
+	req->retrans = 0;
+	req->sk = NULL;
+	req->dl_next = lopt->syn_table[h];
+
+	write_lock(&tp->syn_wait_lock);
+	lopt->syn_table[h] = req;
+	write_unlock(&tp->syn_wait_lock);
+
+	tcp_synq_added(sk);
+}
+
+
+/*
+ * This routine does path mtu discovery as defined in RFC1191.
+ */
+static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *iph,
+				     u32 mtu)
+{
+	struct dst_entry *dst;
+	struct inet_sock *inet = inet_sk(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
+	 * send out by Linux are always <576bytes so they should go through
+	 * unfragmented).
+	 */
+	if (sk->sk_state == TCP_LISTEN)
+		return;
+
+	/* We don't check in the destentry if pmtu discovery is forbidden
+	 * on this route. We just assume that no packet_to_big packets
+	 * are send back when pmtu discovery is not active.
+     	 * There is a small race when the user changes this flag in the
+	 * route, but I think that's acceptable.
+	 */
+	if ((dst = __sk_dst_check(sk, 0)) == NULL)
+		return;
+
+	dst->ops->update_pmtu(dst, mtu);
+
+	/* Something is about to be wrong... Remember soft error
+	 * for the case, if this connection will not able to recover.
+	 */
+	if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
+		sk->sk_err_soft = EMSGSIZE;
+
+	mtu = dst_mtu(dst);
+
+	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
+	    tp->pmtu_cookie > mtu) {
+		tcp_sync_mss(sk, mtu);
+
+		/* Resend the TCP packet because it's
+		 * clear that the old packet has been
+		 * dropped. This is the new "fast" path mtu
+		 * discovery.
+		 */
+		tcp_simple_retransmit(sk);
+	} /* else let the usual retransmit timer handle it */
+}
+
+/*
+ * This routine is called by the ICMP module when it gets some
+ * sort of error condition.  If err < 0 then the socket should
+ * be closed and the error returned to the user.  If err > 0
+ * it's just the icmp type << 8 | icmp code.  After adjustment
+ * header points to the first 8 bytes of the tcp header.  We need
+ * to find the appropriate port.
+ *
+ * The locking strategy used here is very "optimistic". When
+ * someone else accesses the socket the ICMP is just dropped
+ * and for some paths there is no check at all.
+ * A more general error queue to queue errors for later handling
+ * is probably better.
+ *
+ */
+
+void tcp_v4_err(struct sk_buff *skb, u32 info)
+{
+	struct iphdr *iph = (struct iphdr *)skb->data;
+	struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
+	struct tcp_sock *tp;
+	struct inet_sock *inet;
+	int type = skb->h.icmph->type;
+	int code = skb->h.icmph->code;
+	struct sock *sk;
+	__u32 seq;
+	int err;
+
+	if (skb->len < (iph->ihl << 2) + 8) {
+		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+		return;
+	}
+
+	sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr,
+			   th->source, tcp_v4_iif(skb));
+	if (!sk) {
+		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+		return;
+	}
+	if (sk->sk_state == TCP_TIME_WAIT) {
+		tcp_tw_put((struct tcp_tw_bucket *)sk);
+		return;
+	}
+
+	bh_lock_sock(sk);
+	/* If too many ICMPs get dropped on busy
+	 * servers this needs to be solved differently.
+	 */
+	if (sock_owned_by_user(sk))
+		NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
+
+	if (sk->sk_state == TCP_CLOSE)
+		goto out;
+
+	tp = tcp_sk(sk);
+	seq = ntohl(th->seq);
+	if (sk->sk_state != TCP_LISTEN &&
+	    !between(seq, tp->snd_una, tp->snd_nxt)) {
+		NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS);
+		goto out;
+	}
+
+	switch (type) {
+	case ICMP_SOURCE_QUENCH:
+		/* Just silently ignore these. */
+		goto out;
+	case ICMP_PARAMETERPROB:
+		err = EPROTO;
+		break;
+	case ICMP_DEST_UNREACH:
+		if (code > NR_ICMP_UNREACH)
+			goto out;
+
+		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
+			if (!sock_owned_by_user(sk))
+				do_pmtu_discovery(sk, iph, info);
+			goto out;
+		}
+
+		err = icmp_err_convert[code].errno;
+		break;
+	case ICMP_TIME_EXCEEDED:
+		err = EHOSTUNREACH;
+		break;
+	default:
+		goto out;
+	}
+
+	switch (sk->sk_state) {
+		struct open_request *req, **prev;
+	case TCP_LISTEN:
+		if (sock_owned_by_user(sk))
+			goto out;
+
+		req = tcp_v4_search_req(tp, &prev, th->dest,
+					iph->daddr, iph->saddr);
+		if (!req)
+			goto out;
+
+		/* ICMPs are not backlogged, hence we cannot get
+		   an established socket here.
+		 */
+		BUG_TRAP(!req->sk);
+
+		if (seq != req->snt_isn) {
+			NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
+			goto out;
+		}
+
+		/*
+		 * Still in SYN_RECV, just remove it silently.
+		 * There is no good way to pass the error to the newly
+		 * created socket, and POSIX does not want network
+		 * errors returned from accept().
+		 */
+		tcp_synq_drop(sk, req, prev);
+		goto out;
+
+	case TCP_SYN_SENT:
+	case TCP_SYN_RECV:  /* Cannot happen.
+			       It can f.e. if SYNs crossed.
+			     */
+		if (!sock_owned_by_user(sk)) {
+			TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
+			sk->sk_err = err;
+
+			sk->sk_error_report(sk);
+
+			tcp_done(sk);
+		} else {
+			sk->sk_err_soft = err;
+		}
+		goto out;
+	}
+
+	/* If we've already connected we will keep trying
+	 * until we time out, or the user gives up.
+	 *
+	 * rfc1122 4.2.3.9 allows to consider as hard errors
+	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
+	 * but it is obsoleted by pmtu discovery).
+	 *
+	 * Note, that in modern internet, where routing is unreliable
+	 * and in each dark corner broken firewalls sit, sending random
+	 * errors ordered by their masters even this two messages finally lose
+	 * their original sense (even Linux sends invalid PORT_UNREACHs)
+	 *
+	 * Now we are in compliance with RFCs.
+	 *							--ANK (980905)
+	 */
+
+	inet = inet_sk(sk);
+	if (!sock_owned_by_user(sk) && inet->recverr) {
+		sk->sk_err = err;
+		sk->sk_error_report(sk);
+	} else	{ /* Only an error on timeout */
+		sk->sk_err_soft = err;
+	}
+
+out:
+	bh_unlock_sock(sk);
+	sock_put(sk);
+}
+
+/* This routine computes an IPv4 TCP checksum. */
+void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
+		       struct sk_buff *skb)
+{
+	struct inet_sock *inet = inet_sk(sk);
+
+	if (skb->ip_summed == CHECKSUM_HW) {
+		th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0);
+		skb->csum = offsetof(struct tcphdr, check);
+	} else {
+		th->check = tcp_v4_check(th, len, inet->saddr, inet->daddr,
+					 csum_partial((char *)th,
+						      th->doff << 2,
+						      skb->csum));
+	}
+}
+
+/*
+ *	This routine will send an RST to the other tcp.
+ *
+ *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
+ *		      for reset.
+ *	Answer: if a packet caused RST, it is not for a socket
+ *		existing in our system, if it is matched to a socket,
+ *		it is just duplicate segment or bug in other side's TCP.
+ *		So that we build reply only basing on parameters
+ *		arrived with segment.
+ *	Exception: precedence violation. We do not implement it in any case.
+ */
+
+static void tcp_v4_send_reset(struct sk_buff *skb)
+{
+	struct tcphdr *th = skb->h.th;
+	struct tcphdr rth;
+	struct ip_reply_arg arg;
+
+	/* Never send a reset in response to a reset. */
+	if (th->rst)
+		return;
+
+	if (((struct rtable *)skb->dst)->rt_type != RTN_LOCAL)
+		return;
+
+	/* Swap the send and the receive. */
+	memset(&rth, 0, sizeof(struct tcphdr));
+	rth.dest   = th->source;
+	rth.source = th->dest;
+	rth.doff   = sizeof(struct tcphdr) / 4;
+	rth.rst    = 1;
+
+	if (th->ack) {
+		rth.seq = th->ack_seq;
+	} else {
+		rth.ack = 1;
+		rth.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
+				    skb->len - (th->doff << 2));
+	}
+
+	memset(&arg, 0, sizeof arg);
+	arg.iov[0].iov_base = (unsigned char *)&rth;
+	arg.iov[0].iov_len  = sizeof rth;
+	arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
+				      skb->nh.iph->saddr, /*XXX*/
+				      sizeof(struct tcphdr), IPPROTO_TCP, 0);
+	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
+
+	ip_send_reply(tcp_socket->sk, skb, &arg, sizeof rth);
+
+	TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
+	TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
+}
+
+/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
+   outside socket context is ugly, certainly. What can I do?
+ */
+
+static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
+			    u32 win, u32 ts)
+{
+	struct tcphdr *th = skb->h.th;
+	struct {
+		struct tcphdr th;
+		u32 tsopt[3];
+	} rep;
+	struct ip_reply_arg arg;
+
+	memset(&rep.th, 0, sizeof(struct tcphdr));
+	memset(&arg, 0, sizeof arg);
+
+	arg.iov[0].iov_base = (unsigned char *)&rep;
+	arg.iov[0].iov_len  = sizeof(rep.th);
+	if (ts) {
+		rep.tsopt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
+				     (TCPOPT_TIMESTAMP << 8) |
+				     TCPOLEN_TIMESTAMP);
+		rep.tsopt[1] = htonl(tcp_time_stamp);
+		rep.tsopt[2] = htonl(ts);
+		arg.iov[0].iov_len = sizeof(rep);
+	}
+
+	/* Swap the send and the receive. */
+	rep.th.dest    = th->source;
+	rep.th.source  = th->dest;
+	rep.th.doff    = arg.iov[0].iov_len / 4;
+	rep.th.seq     = htonl(seq);
+	rep.th.ack_seq = htonl(ack);
+	rep.th.ack     = 1;
+	rep.th.window  = htons(win);
+
+	arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
+				      skb->nh.iph->saddr, /*XXX*/
+				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
+	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
+
+	ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
+
+	TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
+}
+
+static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
+{
+	struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
+
+	tcp_v4_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
+			tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
+
+	tcp_tw_put(tw);
+}
+
+static void tcp_v4_or_send_ack(struct sk_buff *skb, struct open_request *req)
+{
+	tcp_v4_send_ack(skb, req->snt_isn + 1, req->rcv_isn + 1, req->rcv_wnd,
+			req->ts_recent);
+}
+
+static struct dst_entry* tcp_v4_route_req(struct sock *sk,
+					  struct open_request *req)
+{
+	struct rtable *rt;
+	struct ip_options *opt = req->af.v4_req.opt;
+	struct flowi fl = { .oif = sk->sk_bound_dev_if,
+			    .nl_u = { .ip4_u =
+				      { .daddr = ((opt && opt->srr) ?
+						  opt->faddr :
+						  req->af.v4_req.rmt_addr),
+					.saddr = req->af.v4_req.loc_addr,
+					.tos = RT_CONN_FLAGS(sk) } },
+			    .proto = IPPROTO_TCP,
+			    .uli_u = { .ports =
+				       { .sport = inet_sk(sk)->sport,
+					 .dport = req->rmt_port } } };
+
+	if (ip_route_output_flow(&rt, &fl, sk, 0)) {
+		IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
+		return NULL;
+	}
+	if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) {
+		ip_rt_put(rt);
+		IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
+		return NULL;
+	}
+	return &rt->u.dst;
+}
+
+/*
+ *	Send a SYN-ACK after having received an ACK.
+ *	This still operates on a open_request only, not on a big
+ *	socket.
+ */
+static int tcp_v4_send_synack(struct sock *sk, struct open_request *req,
+			      struct dst_entry *dst)
+{
+	int err = -1;
+	struct sk_buff * skb;
+
+	/* First, grab a route. */
+	if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL)
+		goto out;
+
+	skb = tcp_make_synack(sk, dst, req);
+
+	if (skb) {
+		struct tcphdr *th = skb->h.th;
+
+		th->check = tcp_v4_check(th, skb->len,
+					 req->af.v4_req.loc_addr,
+					 req->af.v4_req.rmt_addr,
+					 csum_partial((char *)th, skb->len,
+						      skb->csum));
+
+		err = ip_build_and_send_pkt(skb, sk, req->af.v4_req.loc_addr,
+					    req->af.v4_req.rmt_addr,
+					    req->af.v4_req.opt);
+		if (err == NET_XMIT_CN)
+			err = 0;
+	}
+
+out:
+	dst_release(dst);
+	return err;
+}
+
+/*
+ *	IPv4 open_request destructor.
+ */
+static void tcp_v4_or_free(struct open_request *req)
+{
+	if (req->af.v4_req.opt)
+		kfree(req->af.v4_req.opt);
+}
+
+static inline void syn_flood_warning(struct sk_buff *skb)
+{
+	static unsigned long warntime;
+
+	if (time_after(jiffies, (warntime + HZ * 60))) {
+		warntime = jiffies;
+		printk(KERN_INFO
+		       "possible SYN flooding on port %d. Sending cookies.\n",
+		       ntohs(skb->h.th->dest));
+	}
+}
+
+/*
+ * Save and compile IPv4 options into the open_request if needed.
+ */
+static inline struct ip_options *tcp_v4_save_options(struct sock *sk,
+						     struct sk_buff *skb)
+{
+	struct ip_options *opt = &(IPCB(skb)->opt);
+	struct ip_options *dopt = NULL;
+
+	if (opt && opt->optlen) {
+		int opt_size = optlength(opt);
+		dopt = kmalloc(opt_size, GFP_ATOMIC);
+		if (dopt) {
+			if (ip_options_echo(dopt, skb)) {
+				kfree(dopt);
+				dopt = NULL;
+			}
+		}
+	}
+	return dopt;
+}
+
+/*
+ * Maximum number of SYN_RECV sockets in queue per LISTEN socket.
+ * One SYN_RECV socket costs about 80bytes on a 32bit machine.
+ * It would be better to replace it with a global counter for all sockets
+ * but then some measure against one socket starving all other sockets
+ * would be needed.
+ *
+ * It was 128 by default. Experiments with real servers show, that
+ * it is absolutely not enough even at 100conn/sec. 256 cures most
+ * of problems. This value is adjusted to 128 for very small machines
+ * (<=32Mb of memory) and to 1024 on normal or better ones (>=256Mb).
+ * Further increasing requires to change hash table size.
+ */
+int sysctl_max_syn_backlog = 256;
+
+struct or_calltable or_ipv4 = {
+	.family		=	PF_INET,
+	.rtx_syn_ack	=	tcp_v4_send_synack,
+	.send_ack	=	tcp_v4_or_send_ack,
+	.destructor	=	tcp_v4_or_free,
+	.send_reset	=	tcp_v4_send_reset,
+};
+
+int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
+{
+	struct tcp_options_received tmp_opt;
+	struct open_request *req;
+	__u32 saddr = skb->nh.iph->saddr;
+	__u32 daddr = skb->nh.iph->daddr;
+	__u32 isn = TCP_SKB_CB(skb)->when;
+	struct dst_entry *dst = NULL;
+#ifdef CONFIG_SYN_COOKIES
+	int want_cookie = 0;
+#else
+#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
+#endif
+
+	/* Never answer to SYNs send to broadcast or multicast */
+	if (((struct rtable *)skb->dst)->rt_flags &
+	    (RTCF_BROADCAST | RTCF_MULTICAST))
+		goto drop;
+
+	/* TW buckets are converted to open requests without
+	 * limitations, they conserve resources and peer is
+	 * evidently real one.
+	 */
+	if (tcp_synq_is_full(sk) && !isn) {
+#ifdef CONFIG_SYN_COOKIES
+		if (sysctl_tcp_syncookies) {
+			want_cookie = 1;
+		} else
+#endif
+		goto drop;
+	}
+
+	/* Accept backlog is full. If we have already queued enough
+	 * of warm entries in syn queue, drop request. It is better than
+	 * clogging syn queue with openreqs with exponentially increasing
+	 * timeout.
+	 */
+	if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
+		goto drop;
+
+	req = tcp_openreq_alloc();
+	if (!req)
+		goto drop;
+
+	tcp_clear_options(&tmp_opt);
+	tmp_opt.mss_clamp = 536;
+	tmp_opt.user_mss  = tcp_sk(sk)->rx_opt.user_mss;
+
+	tcp_parse_options(skb, &tmp_opt, 0);
+
+	if (want_cookie) {
+		tcp_clear_options(&tmp_opt);
+		tmp_opt.saw_tstamp = 0;
+	}
+
+	if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
+		/* Some OSes (unknown ones, but I see them on web server, which
+		 * contains information interesting only for windows'
+		 * users) do not send their stamp in SYN. It is easy case.
+		 * We simply do not advertise TS support.
+		 */
+		tmp_opt.saw_tstamp = 0;
+		tmp_opt.tstamp_ok  = 0;
+	}
+	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
+
+	tcp_openreq_init(req, &tmp_opt, skb);
+
+	req->af.v4_req.loc_addr = daddr;
+	req->af.v4_req.rmt_addr = saddr;
+	req->af.v4_req.opt = tcp_v4_save_options(sk, skb);
+	req->class = &or_ipv4;
+	if (!want_cookie)
+		TCP_ECN_create_request(req, skb->h.th);
+
+	if (want_cookie) {
+#ifdef CONFIG_SYN_COOKIES
+		syn_flood_warning(skb);
+#endif
+		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
+	} else if (!isn) {
+		struct inet_peer *peer = NULL;
+
+		/* VJ's idea. We save last timestamp seen
+		 * from the destination in peer table, when entering
+		 * state TIME-WAIT, and check against it before
+		 * accepting new connection request.
+		 *
+		 * If "isn" is not zero, this request hit alive
+		 * timewait bucket, so that all the necessary checks
+		 * are made in the function processing timewait state.
+		 */
+		if (tmp_opt.saw_tstamp &&
+		    sysctl_tcp_tw_recycle &&
+		    (dst = tcp_v4_route_req(sk, req)) != NULL &&
+		    (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
+		    peer->v4daddr == saddr) {
+			if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
+			    (s32)(peer->tcp_ts - req->ts_recent) >
+							TCP_PAWS_WINDOW) {
+				NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
+				dst_release(dst);
+				goto drop_and_free;
+			}
+		}
+		/* Kill the following clause, if you dislike this way. */
+		else if (!sysctl_tcp_syncookies &&
+			 (sysctl_max_syn_backlog - tcp_synq_len(sk) <
+			  (sysctl_max_syn_backlog >> 2)) &&
+			 (!peer || !peer->tcp_ts_stamp) &&
+			 (!dst || !dst_metric(dst, RTAX_RTT))) {
+			/* Without syncookies last quarter of
+			 * backlog is filled with destinations,
+			 * proven to be alive.
+			 * It means that we continue to communicate
+			 * to destinations, already remembered
+			 * to the moment of synflood.
+			 */
+			NETDEBUG(if (net_ratelimit()) \
+					printk(KERN_DEBUG "TCP: drop open "
+							  "request from %u.%u."
+							  "%u.%u/%u\n", \
+					       NIPQUAD(saddr),
+					       ntohs(skb->h.th->source)));
+			dst_release(dst);
+			goto drop_and_free;
+		}
+
+		isn = tcp_v4_init_sequence(sk, skb);
+	}
+	req->snt_isn = isn;
+
+	if (tcp_v4_send_synack(sk, req, dst))
+		goto drop_and_free;
+
+	if (want_cookie) {
+	   	tcp_openreq_free(req);
+	} else {
+		tcp_v4_synq_add(sk, req);
+	}
+	return 0;
+
+drop_and_free:
+	tcp_openreq_free(req);
+drop:
+	TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
+	return 0;
+}
+
+
+/*
+ * The three way handshake has completed - we got a valid synack -
+ * now create the new socket.
+ */
+struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
+				  struct open_request *req,
+				  struct dst_entry *dst)
+{
+	struct inet_sock *newinet;
+	struct tcp_sock *newtp;
+	struct sock *newsk;
+
+	if (sk_acceptq_is_full(sk))
+		goto exit_overflow;
+
+	if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL)
+		goto exit;
+
+	newsk = tcp_create_openreq_child(sk, req, skb);
+	if (!newsk)
+		goto exit;
+
+	newsk->sk_dst_cache = dst;
+	tcp_v4_setup_caps(newsk, dst);
+
+	newtp		      = tcp_sk(newsk);
+	newinet		      = inet_sk(newsk);
+	newinet->daddr	      = req->af.v4_req.rmt_addr;
+	newinet->rcv_saddr    = req->af.v4_req.loc_addr;
+	newinet->saddr	      = req->af.v4_req.loc_addr;
+	newinet->opt	      = req->af.v4_req.opt;
+	req->af.v4_req.opt    = NULL;
+	newinet->mc_index     = tcp_v4_iif(skb);
+	newinet->mc_ttl	      = skb->nh.iph->ttl;
+	newtp->ext_header_len = 0;
+	if (newinet->opt)
+		newtp->ext_header_len = newinet->opt->optlen;
+	newinet->id = newtp->write_seq ^ jiffies;
+
+	tcp_sync_mss(newsk, dst_mtu(dst));
+	newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
+	tcp_initialize_rcv_mss(newsk);
+
+	__tcp_v4_hash(newsk, 0);
+	__tcp_inherit_port(sk, newsk);
+
+	return newsk;
+
+exit_overflow:
+	NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
+exit:
+	NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
+	dst_release(dst);
+	return NULL;
+}
+
+static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
+{
+	struct tcphdr *th = skb->h.th;
+	struct iphdr *iph = skb->nh.iph;
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sock *nsk;
+	struct open_request **prev;
+	/* Find possible connection requests. */
+	struct open_request *req = tcp_v4_search_req(tp, &prev, th->source,
+						     iph->saddr, iph->daddr);
+	if (req)
+		return tcp_check_req(sk, skb, req, prev);
+
+	nsk = __tcp_v4_lookup_established(skb->nh.iph->saddr,
+					  th->source,
+					  skb->nh.iph->daddr,
+					  ntohs(th->dest),
+					  tcp_v4_iif(skb));
+
+	if (nsk) {
+		if (nsk->sk_state != TCP_TIME_WAIT) {
+			bh_lock_sock(nsk);
+			return nsk;
+		}
+		tcp_tw_put((struct tcp_tw_bucket *)nsk);
+		return NULL;
+	}
+
+#ifdef CONFIG_SYN_COOKIES
+	if (!th->rst && !th->syn && th->ack)
+		sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
+#endif
+	return sk;
+}
+
+static int tcp_v4_checksum_init(struct sk_buff *skb)
+{
+	if (skb->ip_summed == CHECKSUM_HW) {
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
+				  skb->nh.iph->daddr, skb->csum))
+			return 0;
+
+		NETDEBUG(if (net_ratelimit())
+				printk(KERN_DEBUG "hw tcp v4 csum failed\n"));
+		skb->ip_summed = CHECKSUM_NONE;
+	}
+	if (skb->len <= 76) {
+		if (tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
+				 skb->nh.iph->daddr,
+				 skb_checksum(skb, 0, skb->len, 0)))
+			return -1;
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	} else {
+		skb->csum = ~tcp_v4_check(skb->h.th, skb->len,
+					  skb->nh.iph->saddr,
+					  skb->nh.iph->daddr, 0);
+	}
+	return 0;
+}
+
+
+/* The socket must have it's spinlock held when we get
+ * here.
+ *
+ * We have a potential double-lock case here, so even when
+ * doing backlog processing we use the BH locking scheme.
+ * This is because we cannot sleep with the original spinlock
+ * held.
+ */
+int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
+		TCP_CHECK_TIMER(sk);
+		if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
+			goto reset;
+		TCP_CHECK_TIMER(sk);
+		return 0;
+	}
+
+	if (skb->len < (skb->h.th->doff << 2) || tcp_checksum_complete(skb))
+		goto csum_err;
+
+	if (sk->sk_state == TCP_LISTEN) {
+		struct sock *nsk = tcp_v4_hnd_req(sk, skb);
+		if (!nsk)
+			goto discard;
+
+		if (nsk != sk) {
+			if (tcp_child_process(sk, nsk, skb))
+				goto reset;
+			return 0;
+		}
+	}
+
+	TCP_CHECK_TIMER(sk);
+	if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
+		goto reset;
+	TCP_CHECK_TIMER(sk);
+	return 0;
+
+reset:
+	tcp_v4_send_reset(skb);
+discard:
+	kfree_skb(skb);
+	/* Be careful here. If this function gets more complicated and
+	 * gcc suffers from register pressure on the x86, sk (in %ebx)
+	 * might be destroyed here. This current version compiles correctly,
+	 * but you have been warned.
+	 */
+	return 0;
+
+csum_err:
+	TCP_INC_STATS_BH(TCP_MIB_INERRS);
+	goto discard;
+}
+
+/*
+ *	From tcp_input.c
+ */
+
+int tcp_v4_rcv(struct sk_buff *skb)
+{
+	struct tcphdr *th;
+	struct sock *sk;
+	int ret;
+
+	if (skb->pkt_type != PACKET_HOST)
+		goto discard_it;
+
+	/* Count it even if it's bad */
+	TCP_INC_STATS_BH(TCP_MIB_INSEGS);
+
+	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
+		goto discard_it;
+
+	th = skb->h.th;
+
+	if (th->doff < sizeof(struct tcphdr) / 4)
+		goto bad_packet;
+	if (!pskb_may_pull(skb, th->doff * 4))
+		goto discard_it;
+
+	/* An explanation is required here, I think.
+	 * Packet length and doff are validated by header prediction,
+	 * provided case of th->doff==0 is elimineted.
+	 * So, we defer the checks. */
+	if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
+	     tcp_v4_checksum_init(skb) < 0))
+		goto bad_packet;
+
+	th = skb->h.th;
+	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
+	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
+				    skb->len - th->doff * 4);
+	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
+	TCP_SKB_CB(skb)->when	 = 0;
+	TCP_SKB_CB(skb)->flags	 = skb->nh.iph->tos;
+	TCP_SKB_CB(skb)->sacked	 = 0;
+
+	sk = __tcp_v4_lookup(skb->nh.iph->saddr, th->source,
+			     skb->nh.iph->daddr, ntohs(th->dest),
+			     tcp_v4_iif(skb));
+
+	if (!sk)
+		goto no_tcp_socket;
+
+process:
+	if (sk->sk_state == TCP_TIME_WAIT)
+		goto do_time_wait;
+
+	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
+		goto discard_and_relse;
+
+	if (sk_filter(sk, skb, 0))
+		goto discard_and_relse;
+
+	skb->dev = NULL;
+
+	bh_lock_sock(sk);
+	ret = 0;
+	if (!sock_owned_by_user(sk)) {
+		if (!tcp_prequeue(sk, skb))
+			ret = tcp_v4_do_rcv(sk, skb);
+	} else
+		sk_add_backlog(sk, skb);
+	bh_unlock_sock(sk);
+
+	sock_put(sk);
+
+	return ret;
+
+no_tcp_socket:
+	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
+		goto discard_it;
+
+	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
+bad_packet:
+		TCP_INC_STATS_BH(TCP_MIB_INERRS);
+	} else {
+		tcp_v4_send_reset(skb);
+	}
+
+discard_it:
+	/* Discard frame. */
+	kfree_skb(skb);
+  	return 0;
+
+discard_and_relse:
+	sock_put(sk);
+	goto discard_it;
+
+do_time_wait:
+	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+		tcp_tw_put((struct tcp_tw_bucket *) sk);
+		goto discard_it;
+	}
+
+	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
+		TCP_INC_STATS_BH(TCP_MIB_INERRS);
+		tcp_tw_put((struct tcp_tw_bucket *) sk);
+		goto discard_it;
+	}
+	switch (tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
+					   skb, th, skb->len)) {
+	case TCP_TW_SYN: {
+		struct sock *sk2 = tcp_v4_lookup_listener(skb->nh.iph->daddr,
+							  ntohs(th->dest),
+							  tcp_v4_iif(skb));
+		if (sk2) {
+			tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
+			tcp_tw_put((struct tcp_tw_bucket *)sk);
+			sk = sk2;
+			goto process;
+		}
+		/* Fall through to ACK */
+	}
+	case TCP_TW_ACK:
+		tcp_v4_timewait_ack(sk, skb);
+		break;
+	case TCP_TW_RST:
+		goto no_tcp_socket;
+	case TCP_TW_SUCCESS:;
+	}
+	goto discard_it;
+}
+
+/* With per-bucket locks this operation is not-atomic, so that
+ * this version is not worse.
+ */
+static void __tcp_v4_rehash(struct sock *sk)
+{
+	sk->sk_prot->unhash(sk);
+	sk->sk_prot->hash(sk);
+}
+
+static int tcp_v4_reselect_saddr(struct sock *sk)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	int err;
+	struct rtable *rt;
+	__u32 old_saddr = inet->saddr;
+	__u32 new_saddr;
+	__u32 daddr = inet->daddr;
+
+	if (inet->opt && inet->opt->srr)
+		daddr = inet->opt->faddr;
+
+	/* Query new route. */
+	err = ip_route_connect(&rt, daddr, 0,
+			       RT_CONN_FLAGS(sk),
+			       sk->sk_bound_dev_if,
+			       IPPROTO_TCP,
+			       inet->sport, inet->dport, sk);
+	if (err)
+		return err;
+
+	__sk_dst_set(sk, &rt->u.dst);
+	tcp_v4_setup_caps(sk, &rt->u.dst);
+
+	new_saddr = rt->rt_src;
+
+	if (new_saddr == old_saddr)
+		return 0;
+
+	if (sysctl_ip_dynaddr > 1) {
+		printk(KERN_INFO "tcp_v4_rebuild_header(): shifting inet->"
+				 "saddr from %d.%d.%d.%d to %d.%d.%d.%d\n",
+		       NIPQUAD(old_saddr),
+		       NIPQUAD(new_saddr));
+	}
+
+	inet->saddr = new_saddr;
+	inet->rcv_saddr = new_saddr;
+
+	/* XXX The only one ugly spot where we need to
+	 * XXX really change the sockets identity after
+	 * XXX it has entered the hashes. -DaveM
+	 *
+	 * Besides that, it does not check for connection
+	 * uniqueness. Wait for troubles.
+	 */
+	__tcp_v4_rehash(sk);
+	return 0;
+}
+
+int tcp_v4_rebuild_header(struct sock *sk)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
+	u32 daddr;
+	int err;
+
+	/* Route is OK, nothing to do. */
+	if (rt)
+		return 0;
+
+	/* Reroute. */
+	daddr = inet->daddr;
+	if (inet->opt && inet->opt->srr)
+		daddr = inet->opt->faddr;
+
+	{
+		struct flowi fl = { .oif = sk->sk_bound_dev_if,
+				    .nl_u = { .ip4_u =
+					      { .daddr = daddr,
+						.saddr = inet->saddr,
+						.tos = RT_CONN_FLAGS(sk) } },
+				    .proto = IPPROTO_TCP,
+				    .uli_u = { .ports =
+					       { .sport = inet->sport,
+						 .dport = inet->dport } } };
+						
+		err = ip_route_output_flow(&rt, &fl, sk, 0);
+	}
+	if (!err) {
+		__sk_dst_set(sk, &rt->u.dst);
+		tcp_v4_setup_caps(sk, &rt->u.dst);
+		return 0;
+	}
+
+	/* Routing failed... */
+	sk->sk_route_caps = 0;
+
+	if (!sysctl_ip_dynaddr ||
+	    sk->sk_state != TCP_SYN_SENT ||
+	    (sk->sk_userlocks & SOCK_BINDADDR_LOCK) ||
+	    (err = tcp_v4_reselect_saddr(sk)) != 0)
+		sk->sk_err_soft = -err;
+
+	return err;
+}
+
+static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
+{
+	struct sockaddr_in *sin = (struct sockaddr_in *) uaddr;
+	struct inet_sock *inet = inet_sk(sk);
+
+	sin->sin_family		= AF_INET;
+	sin->sin_addr.s_addr	= inet->daddr;
+	sin->sin_port		= inet->dport;
+}
+
+/* VJ's idea. Save last timestamp seen from this destination
+ * and hold it at least for normal timewait interval to use for duplicate
+ * segment detection in subsequent connections, before they enter synchronized
+ * state.
+ */
+
+int tcp_v4_remember_stamp(struct sock *sk)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
+	struct inet_peer *peer = NULL;
+	int release_it = 0;
+
+	if (!rt || rt->rt_dst != inet->daddr) {
+		peer = inet_getpeer(inet->daddr, 1);
+		release_it = 1;
+	} else {
+		if (!rt->peer)
+			rt_bind_peer(rt, 1);
+		peer = rt->peer;
+	}
+
+	if (peer) {
+		if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
+		    (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
+		     peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
+			peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
+			peer->tcp_ts = tp->rx_opt.ts_recent;
+		}
+		if (release_it)
+			inet_putpeer(peer);
+		return 1;
+	}
+
+	return 0;
+}
+
+int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw)
+{
+	struct inet_peer *peer = NULL;
+
+	peer = inet_getpeer(tw->tw_daddr, 1);
+
+	if (peer) {
+		if ((s32)(peer->tcp_ts - tw->tw_ts_recent) <= 0 ||
+		    (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
+		     peer->tcp_ts_stamp <= tw->tw_ts_recent_stamp)) {
+			peer->tcp_ts_stamp = tw->tw_ts_recent_stamp;
+			peer->tcp_ts = tw->tw_ts_recent;
+		}
+		inet_putpeer(peer);
+		return 1;
+	}
+
+	return 0;
+}
+
+struct tcp_func ipv4_specific = {
+	.queue_xmit	=	ip_queue_xmit,
+	.send_check	=	tcp_v4_send_check,
+	.rebuild_header	=	tcp_v4_rebuild_header,
+	.conn_request	=	tcp_v4_conn_request,
+	.syn_recv_sock	=	tcp_v4_syn_recv_sock,
+	.remember_stamp	=	tcp_v4_remember_stamp,
+	.net_header_len	=	sizeof(struct iphdr),
+	.setsockopt	=	ip_setsockopt,
+	.getsockopt	=	ip_getsockopt,
+	.addr2sockaddr	=	v4_addr2sockaddr,
+	.sockaddr_len	=	sizeof(struct sockaddr_in),
+};
+
+/* NOTE: A lot of things set to zero explicitly by call to
+ *       sk_alloc() so need not be done here.
+ */
+static int tcp_v4_init_sock(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	skb_queue_head_init(&tp->out_of_order_queue);
+	tcp_init_xmit_timers(sk);
+	tcp_prequeue_init(tp);
+
+	tp->rto  = TCP_TIMEOUT_INIT;
+	tp->mdev = TCP_TIMEOUT_INIT;
+
+	/* So many TCP implementations out there (incorrectly) count the
+	 * initial SYN frame in their delayed-ACK and congestion control
+	 * algorithms that we must have the following bandaid to talk
+	 * efficiently to them.  -DaveM
+	 */
+	tp->snd_cwnd = 2;
+
+	/* See draft-stevens-tcpca-spec-01 for discussion of the
+	 * initialization of these values.
+	 */
+	tp->snd_ssthresh = 0x7fffffff;	/* Infinity */
+	tp->snd_cwnd_clamp = ~0;
+	tp->mss_cache_std = tp->mss_cache = 536;
+
+	tp->reordering = sysctl_tcp_reordering;
+
+	sk->sk_state = TCP_CLOSE;
+
+	sk->sk_write_space = sk_stream_write_space;
+	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
+
+	tp->af_specific = &ipv4_specific;
+
+	sk->sk_sndbuf = sysctl_tcp_wmem[1];
+	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
+
+	atomic_inc(&tcp_sockets_allocated);
+
+	return 0;
+}
+
+int tcp_v4_destroy_sock(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	tcp_clear_xmit_timers(sk);
+
+	/* Cleanup up the write buffer. */
+  	sk_stream_writequeue_purge(sk);
+
+	/* Cleans up our, hopefully empty, out_of_order_queue. */
+  	__skb_queue_purge(&tp->out_of_order_queue);
+
+	/* Clean prequeue, it must be empty really */
+	__skb_queue_purge(&tp->ucopy.prequeue);
+
+	/* Clean up a referenced TCP bind bucket. */
+	if (tp->bind_hash)
+		tcp_put_port(sk);
+
+	/*
+	 * If sendmsg cached page exists, toss it.
+	 */
+	if (sk->sk_sndmsg_page) {
+		__free_page(sk->sk_sndmsg_page);
+		sk->sk_sndmsg_page = NULL;
+	}
+
+	atomic_dec(&tcp_sockets_allocated);
+
+	return 0;
+}
+
+EXPORT_SYMBOL(tcp_v4_destroy_sock);
+
+#ifdef CONFIG_PROC_FS
+/* Proc filesystem TCP sock list dumping. */
+
+static inline struct tcp_tw_bucket *tw_head(struct hlist_head *head)
+{
+	return hlist_empty(head) ? NULL :
+		list_entry(head->first, struct tcp_tw_bucket, tw_node);
+}
+
+static inline struct tcp_tw_bucket *tw_next(struct tcp_tw_bucket *tw)
+{
+	return tw->tw_node.next ?
+		hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
+}
+
+static void *listening_get_next(struct seq_file *seq, void *cur)
+{
+	struct tcp_sock *tp;
+	struct hlist_node *node;
+	struct sock *sk = cur;
+	struct tcp_iter_state* st = seq->private;
+
+	if (!sk) {
+		st->bucket = 0;
+		sk = sk_head(&tcp_listening_hash[0]);
+		goto get_sk;
+	}
+
+	++st->num;
+
+	if (st->state == TCP_SEQ_STATE_OPENREQ) {
+		struct open_request *req = cur;
+
+	       	tp = tcp_sk(st->syn_wait_sk);
+		req = req->dl_next;
+		while (1) {
+			while (req) {
+				if (req->class->family == st->family) {
+					cur = req;
+					goto out;
+				}
+				req = req->dl_next;
+			}
+			if (++st->sbucket >= TCP_SYNQ_HSIZE)
+				break;
+get_req:
+			req = tp->listen_opt->syn_table[st->sbucket];
+		}
+		sk	  = sk_next(st->syn_wait_sk);
+		st->state = TCP_SEQ_STATE_LISTENING;
+		read_unlock_bh(&tp->syn_wait_lock);
+	} else {
+	       	tp = tcp_sk(sk);
+		read_lock_bh(&tp->syn_wait_lock);
+		if (tp->listen_opt && tp->listen_opt->qlen)
+			goto start_req;
+		read_unlock_bh(&tp->syn_wait_lock);
+		sk = sk_next(sk);
+	}
+get_sk:
+	sk_for_each_from(sk, node) {
+		if (sk->sk_family == st->family) {
+			cur = sk;
+			goto out;
+		}
+	       	tp = tcp_sk(sk);
+		read_lock_bh(&tp->syn_wait_lock);
+		if (tp->listen_opt && tp->listen_opt->qlen) {
+start_req:
+			st->uid		= sock_i_uid(sk);
+			st->syn_wait_sk = sk;
+			st->state	= TCP_SEQ_STATE_OPENREQ;
+			st->sbucket	= 0;
+			goto get_req;
+		}
+		read_unlock_bh(&tp->syn_wait_lock);
+	}
+	if (++st->bucket < TCP_LHTABLE_SIZE) {
+		sk = sk_head(&tcp_listening_hash[st->bucket]);
+		goto get_sk;
+	}
+	cur = NULL;
+out:
+	return cur;
+}
+
+static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
+{
+	void *rc = listening_get_next(seq, NULL);
+
+	while (rc && *pos) {
+		rc = listening_get_next(seq, rc);
+		--*pos;
+	}
+	return rc;
+}
+
+static void *established_get_first(struct seq_file *seq)
+{
+	struct tcp_iter_state* st = seq->private;
+	void *rc = NULL;
+
+	for (st->bucket = 0; st->bucket < tcp_ehash_size; ++st->bucket) {
+		struct sock *sk;
+		struct hlist_node *node;
+		struct tcp_tw_bucket *tw;
+
+		/* We can reschedule _before_ having picked the target: */
+		cond_resched_softirq();
+
+		read_lock(&tcp_ehash[st->bucket].lock);
+		sk_for_each(sk, node, &tcp_ehash[st->bucket].chain) {
+			if (sk->sk_family != st->family) {
+				continue;
+			}
+			rc = sk;
+			goto out;
+		}
+		st->state = TCP_SEQ_STATE_TIME_WAIT;
+		tw_for_each(tw, node,
+			    &tcp_ehash[st->bucket + tcp_ehash_size].chain) {
+			if (tw->tw_family != st->family) {
+				continue;
+			}
+			rc = tw;
+			goto out;
+		}
+		read_unlock(&tcp_ehash[st->bucket].lock);
+		st->state = TCP_SEQ_STATE_ESTABLISHED;
+	}
+out:
+	return rc;
+}
+
+static void *established_get_next(struct seq_file *seq, void *cur)
+{
+	struct sock *sk = cur;
+	struct tcp_tw_bucket *tw;
+	struct hlist_node *node;
+	struct tcp_iter_state* st = seq->private;
+
+	++st->num;
+
+	if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
+		tw = cur;
+		tw = tw_next(tw);
+get_tw:
+		while (tw && tw->tw_family != st->family) {
+			tw = tw_next(tw);
+		}
+		if (tw) {
+			cur = tw;
+			goto out;
+		}
+		read_unlock(&tcp_ehash[st->bucket].lock);
+		st->state = TCP_SEQ_STATE_ESTABLISHED;
+
+		/* We can reschedule between buckets: */
+		cond_resched_softirq();
+
+		if (++st->bucket < tcp_ehash_size) {
+			read_lock(&tcp_ehash[st->bucket].lock);
+			sk = sk_head(&tcp_ehash[st->bucket].chain);
+		} else {
+			cur = NULL;
+			goto out;
+		}
+	} else
+		sk = sk_next(sk);
+
+	sk_for_each_from(sk, node) {
+		if (sk->sk_family == st->family)
+			goto found;
+	}
+
+	st->state = TCP_SEQ_STATE_TIME_WAIT;
+	tw = tw_head(&tcp_ehash[st->bucket + tcp_ehash_size].chain);
+	goto get_tw;
+found:
+	cur = sk;
+out:
+	return cur;
+}
+
+static void *established_get_idx(struct seq_file *seq, loff_t pos)
+{
+	void *rc = established_get_first(seq);
+
+	while (rc && pos) {
+		rc = established_get_next(seq, rc);
+		--pos;
+	}		
+	return rc;
+}
+
+static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
+{
+	void *rc;
+	struct tcp_iter_state* st = seq->private;
+
+	tcp_listen_lock();
+	st->state = TCP_SEQ_STATE_LISTENING;
+	rc	  = listening_get_idx(seq, &pos);
+
+	if (!rc) {
+		tcp_listen_unlock();
+		local_bh_disable();
+		st->state = TCP_SEQ_STATE_ESTABLISHED;
+		rc	  = established_get_idx(seq, pos);
+	}
+
+	return rc;
+}
+
+static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct tcp_iter_state* st = seq->private;
+	st->state = TCP_SEQ_STATE_LISTENING;
+	st->num = 0;
+	return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+}
+
+static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	void *rc = NULL;
+	struct tcp_iter_state* st;
+
+	if (v == SEQ_START_TOKEN) {
+		rc = tcp_get_idx(seq, 0);
+		goto out;
+	}
+	st = seq->private;
+
+	switch (st->state) {
+	case TCP_SEQ_STATE_OPENREQ:
+	case TCP_SEQ_STATE_LISTENING:
+		rc = listening_get_next(seq, v);
+		if (!rc) {
+			tcp_listen_unlock();
+			local_bh_disable();
+			st->state = TCP_SEQ_STATE_ESTABLISHED;
+			rc	  = established_get_first(seq);
+		}
+		break;
+	case TCP_SEQ_STATE_ESTABLISHED:
+	case TCP_SEQ_STATE_TIME_WAIT:
+		rc = established_get_next(seq, v);
+		break;
+	}
+out:
+	++*pos;
+	return rc;
+}
+
+static void tcp_seq_stop(struct seq_file *seq, void *v)
+{
+	struct tcp_iter_state* st = seq->private;
+
+	switch (st->state) {
+	case TCP_SEQ_STATE_OPENREQ:
+		if (v) {
+			struct tcp_sock *tp = tcp_sk(st->syn_wait_sk);
+			read_unlock_bh(&tp->syn_wait_lock);
+		}
+	case TCP_SEQ_STATE_LISTENING:
+		if (v != SEQ_START_TOKEN)
+			tcp_listen_unlock();
+		break;
+	case TCP_SEQ_STATE_TIME_WAIT:
+	case TCP_SEQ_STATE_ESTABLISHED:
+		if (v)
+			read_unlock(&tcp_ehash[st->bucket].lock);
+		local_bh_enable();
+		break;
+	}
+}
+
+static int tcp_seq_open(struct inode *inode, struct file *file)
+{
+	struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
+	struct seq_file *seq;
+	struct tcp_iter_state *s;
+	int rc;
+
+	if (unlikely(afinfo == NULL))
+		return -EINVAL;
+
+	s = kmalloc(sizeof(*s), GFP_KERNEL);
+	if (!s)
+		return -ENOMEM;
+	memset(s, 0, sizeof(*s));
+	s->family		= afinfo->family;
+	s->seq_ops.start	= tcp_seq_start;
+	s->seq_ops.next		= tcp_seq_next;
+	s->seq_ops.show		= afinfo->seq_show;
+	s->seq_ops.stop		= tcp_seq_stop;
+
+	rc = seq_open(file, &s->seq_ops);
+	if (rc)
+		goto out_kfree;
+	seq	     = file->private_data;
+	seq->private = s;
+out:
+	return rc;
+out_kfree:
+	kfree(s);
+	goto out;
+}
+
+int tcp_proc_register(struct tcp_seq_afinfo *afinfo)
+{
+	int rc = 0;
+	struct proc_dir_entry *p;
+
+	if (!afinfo)
+		return -EINVAL;
+	afinfo->seq_fops->owner		= afinfo->owner;
+	afinfo->seq_fops->open		= tcp_seq_open;
+	afinfo->seq_fops->read		= seq_read;
+	afinfo->seq_fops->llseek	= seq_lseek;
+	afinfo->seq_fops->release	= seq_release_private;
+	
+	p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
+	if (p)
+		p->data = afinfo;
+	else
+		rc = -ENOMEM;
+	return rc;
+}
+
+void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo)
+{
+	if (!afinfo)
+		return;
+	proc_net_remove(afinfo->name);
+	memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); 
+}
+
+static void get_openreq4(struct sock *sk, struct open_request *req,
+			 char *tmpbuf, int i, int uid)
+{
+	int ttd = req->expires - jiffies;
+
+	sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
+		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p",
+		i,
+		req->af.v4_req.loc_addr,
+		ntohs(inet_sk(sk)->sport),
+		req->af.v4_req.rmt_addr,
+		ntohs(req->rmt_port),
+		TCP_SYN_RECV,
+		0, 0, /* could print option size, but that is af dependent. */
+		1,    /* timers active (only the expire timer) */
+		jiffies_to_clock_t(ttd),
+		req->retrans,
+		uid,
+		0,  /* non standard timer */
+		0, /* open_requests have no inode */
+		atomic_read(&sk->sk_refcnt),
+		req);
+}
+
+static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
+{
+	int timer_active;
+	unsigned long timer_expires;
+	struct tcp_sock *tp = tcp_sk(sp);
+	struct inet_sock *inet = inet_sk(sp);
+	unsigned int dest = inet->daddr;
+	unsigned int src = inet->rcv_saddr;
+	__u16 destp = ntohs(inet->dport);
+	__u16 srcp = ntohs(inet->sport);
+
+	if (tp->pending == TCP_TIME_RETRANS) {
+		timer_active	= 1;
+		timer_expires	= tp->timeout;
+	} else if (tp->pending == TCP_TIME_PROBE0) {
+		timer_active	= 4;
+		timer_expires	= tp->timeout;
+	} else if (timer_pending(&sp->sk_timer)) {
+		timer_active	= 2;
+		timer_expires	= sp->sk_timer.expires;
+	} else {
+		timer_active	= 0;
+		timer_expires = jiffies;
+	}
+
+	sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
+			"%08X %5d %8d %lu %d %p %u %u %u %u %d",
+		i, src, srcp, dest, destp, sp->sk_state,
+		tp->write_seq - tp->snd_una, tp->rcv_nxt - tp->copied_seq,
+		timer_active,
+		jiffies_to_clock_t(timer_expires - jiffies),
+		tp->retransmits,
+		sock_i_uid(sp),
+		tp->probes_out,
+		sock_i_ino(sp),
+		atomic_read(&sp->sk_refcnt), sp,
+		tp->rto, tp->ack.ato, (tp->ack.quick << 1) | tp->ack.pingpong,
+		tp->snd_cwnd,
+		tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh);
+}
+
+static void get_timewait4_sock(struct tcp_tw_bucket *tw, char *tmpbuf, int i)
+{
+	unsigned int dest, src;
+	__u16 destp, srcp;
+	int ttd = tw->tw_ttd - jiffies;
+
+	if (ttd < 0)
+		ttd = 0;
+
+	dest  = tw->tw_daddr;
+	src   = tw->tw_rcv_saddr;
+	destp = ntohs(tw->tw_dport);
+	srcp  = ntohs(tw->tw_sport);
+
+	sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
+		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p",
+		i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
+		3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
+		atomic_read(&tw->tw_refcnt), tw);
+}
+
+#define TMPSZ 150
+
+static int tcp4_seq_show(struct seq_file *seq, void *v)
+{
+	struct tcp_iter_state* st;
+	char tmpbuf[TMPSZ + 1];
+
+	if (v == SEQ_START_TOKEN) {
+		seq_printf(seq, "%-*s\n", TMPSZ - 1,
+			   "  sl  local_address rem_address   st tx_queue "
+			   "rx_queue tr tm->when retrnsmt   uid  timeout "
+			   "inode");
+		goto out;
+	}
+	st = seq->private;
+
+	switch (st->state) {
+	case TCP_SEQ_STATE_LISTENING:
+	case TCP_SEQ_STATE_ESTABLISHED:
+		get_tcp4_sock(v, tmpbuf, st->num);
+		break;
+	case TCP_SEQ_STATE_OPENREQ:
+		get_openreq4(st->syn_wait_sk, v, tmpbuf, st->num, st->uid);
+		break;
+	case TCP_SEQ_STATE_TIME_WAIT:
+		get_timewait4_sock(v, tmpbuf, st->num);
+		break;
+	}
+	seq_printf(seq, "%-*s\n", TMPSZ - 1, tmpbuf);
+out:
+	return 0;
+}
+
+static struct file_operations tcp4_seq_fops;
+static struct tcp_seq_afinfo tcp4_seq_afinfo = {
+	.owner		= THIS_MODULE,
+	.name		= "tcp",
+	.family		= AF_INET,
+	.seq_show	= tcp4_seq_show,
+	.seq_fops	= &tcp4_seq_fops,
+};
+
+int __init tcp4_proc_init(void)
+{
+	return tcp_proc_register(&tcp4_seq_afinfo);
+}
+
+void tcp4_proc_exit(void)
+{
+	tcp_proc_unregister(&tcp4_seq_afinfo);
+}
+#endif /* CONFIG_PROC_FS */
+
+struct proto tcp_prot = {
+	.name			= "TCP",
+	.owner			= THIS_MODULE,
+	.close			= tcp_close,
+	.connect		= tcp_v4_connect,
+	.disconnect		= tcp_disconnect,
+	.accept			= tcp_accept,
+	.ioctl			= tcp_ioctl,
+	.init			= tcp_v4_init_sock,
+	.destroy		= tcp_v4_destroy_sock,
+	.shutdown		= tcp_shutdown,
+	.setsockopt		= tcp_setsockopt,
+	.getsockopt		= tcp_getsockopt,
+	.sendmsg		= tcp_sendmsg,
+	.recvmsg		= tcp_recvmsg,
+	.backlog_rcv		= tcp_v4_do_rcv,
+	.hash			= tcp_v4_hash,
+	.unhash			= tcp_unhash,
+	.get_port		= tcp_v4_get_port,
+	.enter_memory_pressure	= tcp_enter_memory_pressure,
+	.sockets_allocated	= &tcp_sockets_allocated,
+	.memory_allocated	= &tcp_memory_allocated,
+	.memory_pressure	= &tcp_memory_pressure,
+	.sysctl_mem		= sysctl_tcp_mem,
+	.sysctl_wmem		= sysctl_tcp_wmem,
+	.sysctl_rmem		= sysctl_tcp_rmem,
+	.max_header		= MAX_TCP_HEADER,
+	.obj_size		= sizeof(struct tcp_sock),
+};
+
+
+
+void __init tcp_v4_init(struct net_proto_family *ops)
+{
+	int err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_TCP, &tcp_socket);
+	if (err < 0)
+		panic("Failed to create the TCP control socket.\n");
+	tcp_socket->sk->sk_allocation   = GFP_ATOMIC;
+	inet_sk(tcp_socket->sk)->uc_ttl = -1;
+
+	/* Unhash it so that IP input processing does not even
+	 * see it, we do not wish this socket to see incoming
+	 * packets.
+	 */
+	tcp_socket->sk->sk_prot->unhash(tcp_socket->sk);
+}
+
+EXPORT_SYMBOL(ipv4_specific);
+EXPORT_SYMBOL(tcp_bind_hash);
+EXPORT_SYMBOL(tcp_bucket_create);
+EXPORT_SYMBOL(tcp_hashinfo);
+EXPORT_SYMBOL(tcp_inherit_port);
+EXPORT_SYMBOL(tcp_listen_wlock);
+EXPORT_SYMBOL(tcp_port_rover);
+EXPORT_SYMBOL(tcp_prot);
+EXPORT_SYMBOL(tcp_put_port);
+EXPORT_SYMBOL(tcp_unhash);
+EXPORT_SYMBOL(tcp_v4_conn_request);
+EXPORT_SYMBOL(tcp_v4_connect);
+EXPORT_SYMBOL(tcp_v4_do_rcv);
+EXPORT_SYMBOL(tcp_v4_rebuild_header);
+EXPORT_SYMBOL(tcp_v4_remember_stamp);
+EXPORT_SYMBOL(tcp_v4_send_check);
+EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
+
+#ifdef CONFIG_PROC_FS
+EXPORT_SYMBOL(tcp_proc_register);
+EXPORT_SYMBOL(tcp_proc_unregister);
+#endif
+EXPORT_SYMBOL(sysctl_local_port_range);
+EXPORT_SYMBOL(sysctl_max_syn_backlog);
+EXPORT_SYMBOL(sysctl_tcp_low_latency);
+EXPORT_SYMBOL(sysctl_tcp_tw_reuse);
+
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
new file mode 100644
index 00000000000..fd70509f0d5
--- /dev/null
+++ b/net/ipv4/tcp_minisocks.c
@@ -0,0 +1,1077 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Implementation of the Transmission Control Protocol(TCP).
+ *
+ * Version:	$Id: tcp_minisocks.c,v 1.15 2002/02/01 22:01:04 davem Exp $
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Mark Evans, <evansmp@uhura.aston.ac.uk>
+ *		Corey Minyard <wf-rch!minyard@relay.EU.net>
+ *		Florian La Roche, <flla@stud.uni-sb.de>
+ *		Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
+ *		Linus Torvalds, <torvalds@cs.helsinki.fi>
+ *		Alan Cox, <gw4pts@gw4pts.ampr.org>
+ *		Matthew Dillon, <dillon@apollo.west.oic.com>
+ *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *		Jorge Cwik, <jorge@laser.satlink.net>
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/sysctl.h>
+#include <linux/workqueue.h>
+#include <net/tcp.h>
+#include <net/inet_common.h>
+#include <net/xfrm.h>
+
+#ifdef CONFIG_SYSCTL
+#define SYNC_INIT 0 /* let the user enable it */
+#else
+#define SYNC_INIT 1
+#endif
+
+int sysctl_tcp_tw_recycle;
+int sysctl_tcp_max_tw_buckets = NR_FILE*2;
+
+int sysctl_tcp_syncookies = SYNC_INIT; 
+int sysctl_tcp_abort_on_overflow;
+
+static void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo);
+
+static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
+{
+	if (seq == s_win)
+		return 1;
+	if (after(end_seq, s_win) && before(seq, e_win))
+		return 1;
+	return (seq == e_win && seq == end_seq);
+}
+
+/* New-style handling of TIME_WAIT sockets. */
+
+int tcp_tw_count;
+
+
+/* Must be called with locally disabled BHs. */
+static void tcp_timewait_kill(struct tcp_tw_bucket *tw)
+{
+	struct tcp_ehash_bucket *ehead;
+	struct tcp_bind_hashbucket *bhead;
+	struct tcp_bind_bucket *tb;
+
+	/* Unlink from established hashes. */
+	ehead = &tcp_ehash[tw->tw_hashent];
+	write_lock(&ehead->lock);
+	if (hlist_unhashed(&tw->tw_node)) {
+		write_unlock(&ehead->lock);
+		return;
+	}
+	__hlist_del(&tw->tw_node);
+	sk_node_init(&tw->tw_node);
+	write_unlock(&ehead->lock);
+
+	/* Disassociate with bind bucket. */
+	bhead = &tcp_bhash[tcp_bhashfn(tw->tw_num)];
+	spin_lock(&bhead->lock);
+	tb = tw->tw_tb;
+	__hlist_del(&tw->tw_bind_node);
+	tw->tw_tb = NULL;
+	tcp_bucket_destroy(tb);
+	spin_unlock(&bhead->lock);
+
+#ifdef INET_REFCNT_DEBUG
+	if (atomic_read(&tw->tw_refcnt) != 1) {
+		printk(KERN_DEBUG "tw_bucket %p refcnt=%d\n", tw,
+		       atomic_read(&tw->tw_refcnt));
+	}
+#endif
+	tcp_tw_put(tw);
+}
+
+/* 
+ * * Main purpose of TIME-WAIT state is to close connection gracefully,
+ *   when one of ends sits in LAST-ACK or CLOSING retransmitting FIN
+ *   (and, probably, tail of data) and one or more our ACKs are lost.
+ * * What is TIME-WAIT timeout? It is associated with maximal packet
+ *   lifetime in the internet, which results in wrong conclusion, that
+ *   it is set to catch "old duplicate segments" wandering out of their path.
+ *   It is not quite correct. This timeout is calculated so that it exceeds
+ *   maximal retransmission timeout enough to allow to lose one (or more)
+ *   segments sent by peer and our ACKs. This time may be calculated from RTO.
+ * * When TIME-WAIT socket receives RST, it means that another end
+ *   finally closed and we are allowed to kill TIME-WAIT too.
+ * * Second purpose of TIME-WAIT is catching old duplicate segments.
+ *   Well, certainly it is pure paranoia, but if we load TIME-WAIT
+ *   with this semantics, we MUST NOT kill TIME-WAIT state with RSTs.
+ * * If we invented some more clever way to catch duplicates
+ *   (f.e. based on PAWS), we could truncate TIME-WAIT to several RTOs.
+ *
+ * The algorithm below is based on FORMAL INTERPRETATION of RFCs.
+ * When you compare it to RFCs, please, read section SEGMENT ARRIVES
+ * from the very beginning.
+ *
+ * NOTE. With recycling (and later with fin-wait-2) TW bucket
+ * is _not_ stateless. It means, that strictly speaking we must
+ * spinlock it. I do not want! Well, probability of misbehaviour
+ * is ridiculously low and, seems, we could use some mb() tricks
+ * to avoid misread sequence numbers, states etc.  --ANK
+ */
+enum tcp_tw_status
+tcp_timewait_state_process(struct tcp_tw_bucket *tw, struct sk_buff *skb,
+			   struct tcphdr *th, unsigned len)
+{
+	struct tcp_options_received tmp_opt;
+	int paws_reject = 0;
+
+	tmp_opt.saw_tstamp = 0;
+	if (th->doff > (sizeof(struct tcphdr) >> 2) && tw->tw_ts_recent_stamp) {
+		tcp_parse_options(skb, &tmp_opt, 0);
+
+		if (tmp_opt.saw_tstamp) {
+			tmp_opt.ts_recent	   = tw->tw_ts_recent;
+			tmp_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
+			paws_reject = tcp_paws_check(&tmp_opt, th->rst);
+		}
+	}
+
+	if (tw->tw_substate == TCP_FIN_WAIT2) {
+		/* Just repeat all the checks of tcp_rcv_state_process() */
+
+		/* Out of window, send ACK */
+		if (paws_reject ||
+		    !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
+				   tw->tw_rcv_nxt,
+				   tw->tw_rcv_nxt + tw->tw_rcv_wnd))
+			return TCP_TW_ACK;
+
+		if (th->rst)
+			goto kill;
+
+		if (th->syn && !before(TCP_SKB_CB(skb)->seq, tw->tw_rcv_nxt))
+			goto kill_with_rst;
+
+		/* Dup ACK? */
+		if (!after(TCP_SKB_CB(skb)->end_seq, tw->tw_rcv_nxt) ||
+		    TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) {
+			tcp_tw_put(tw);
+			return TCP_TW_SUCCESS;
+		}
+
+		/* New data or FIN. If new data arrive after half-duplex close,
+		 * reset.
+		 */
+		if (!th->fin ||
+		    TCP_SKB_CB(skb)->end_seq != tw->tw_rcv_nxt + 1) {
+kill_with_rst:
+			tcp_tw_deschedule(tw);
+			tcp_tw_put(tw);
+			return TCP_TW_RST;
+		}
+
+		/* FIN arrived, enter true time-wait state. */
+		tw->tw_substate	= TCP_TIME_WAIT;
+		tw->tw_rcv_nxt	= TCP_SKB_CB(skb)->end_seq;
+		if (tmp_opt.saw_tstamp) {
+			tw->tw_ts_recent_stamp	= xtime.tv_sec;
+			tw->tw_ts_recent	= tmp_opt.rcv_tsval;
+		}
+
+		/* I am shamed, but failed to make it more elegant.
+		 * Yes, it is direct reference to IP, which is impossible
+		 * to generalize to IPv6. Taking into account that IPv6
+		 * do not undertsnad recycling in any case, it not
+		 * a big problem in practice. --ANK */
+		if (tw->tw_family == AF_INET &&
+		    sysctl_tcp_tw_recycle && tw->tw_ts_recent_stamp &&
+		    tcp_v4_tw_remember_stamp(tw))
+			tcp_tw_schedule(tw, tw->tw_timeout);
+		else
+			tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN);
+		return TCP_TW_ACK;
+	}
+
+	/*
+	 *	Now real TIME-WAIT state.
+	 *
+	 *	RFC 1122:
+	 *	"When a connection is [...] on TIME-WAIT state [...]
+	 *	[a TCP] MAY accept a new SYN from the remote TCP to
+	 *	reopen the connection directly, if it:
+	 *	
+	 *	(1)  assigns its initial sequence number for the new
+	 *	connection to be larger than the largest sequence
+	 *	number it used on the previous connection incarnation,
+	 *	and
+	 *
+	 *	(2)  returns to TIME-WAIT state if the SYN turns out 
+	 *	to be an old duplicate".
+	 */
+
+	if (!paws_reject &&
+	    (TCP_SKB_CB(skb)->seq == tw->tw_rcv_nxt &&
+	     (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq || th->rst))) {
+		/* In window segment, it may be only reset or bare ack. */
+
+		if (th->rst) {
+			/* This is TIME_WAIT assasination, in two flavors.
+			 * Oh well... nobody has a sufficient solution to this
+			 * protocol bug yet.
+			 */
+			if (sysctl_tcp_rfc1337 == 0) {
+kill:
+				tcp_tw_deschedule(tw);
+				tcp_tw_put(tw);
+				return TCP_TW_SUCCESS;
+			}
+		}
+		tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN);
+
+		if (tmp_opt.saw_tstamp) {
+			tw->tw_ts_recent	= tmp_opt.rcv_tsval;
+			tw->tw_ts_recent_stamp	= xtime.tv_sec;
+		}
+
+		tcp_tw_put(tw);
+		return TCP_TW_SUCCESS;
+	}
+
+	/* Out of window segment.
+
+	   All the segments are ACKed immediately.
+
+	   The only exception is new SYN. We accept it, if it is
+	   not old duplicate and we are not in danger to be killed
+	   by delayed old duplicates. RFC check is that it has
+	   newer sequence number works at rates <40Mbit/sec.
+	   However, if paws works, it is reliable AND even more,
+	   we even may relax silly seq space cutoff.
+
+	   RED-PEN: we violate main RFC requirement, if this SYN will appear
+	   old duplicate (i.e. we receive RST in reply to SYN-ACK),
+	   we must return socket to time-wait state. It is not good,
+	   but not fatal yet.
+	 */
+
+	if (th->syn && !th->rst && !th->ack && !paws_reject &&
+	    (after(TCP_SKB_CB(skb)->seq, tw->tw_rcv_nxt) ||
+	     (tmp_opt.saw_tstamp && (s32)(tw->tw_ts_recent - tmp_opt.rcv_tsval) < 0))) {
+		u32 isn = tw->tw_snd_nxt + 65535 + 2;
+		if (isn == 0)
+			isn++;
+		TCP_SKB_CB(skb)->when = isn;
+		return TCP_TW_SYN;
+	}
+
+	if (paws_reject)
+		NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
+
+	if(!th->rst) {
+		/* In this case we must reset the TIMEWAIT timer.
+		 *
+		 * If it is ACKless SYN it may be both old duplicate
+		 * and new good SYN with random sequence number <rcv_nxt.
+		 * Do not reschedule in the last case.
+		 */
+		if (paws_reject || th->ack)
+			tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN);
+
+		/* Send ACK. Note, we do not put the bucket,
+		 * it will be released by caller.
+		 */
+		return TCP_TW_ACK;
+	}
+	tcp_tw_put(tw);
+	return TCP_TW_SUCCESS;
+}
+
+/* Enter the time wait state.  This is called with locally disabled BH.
+ * Essentially we whip up a timewait bucket, copy the
+ * relevant info into it from the SK, and mess with hash chains
+ * and list linkage.
+ */
+static void __tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *tw)
+{
+	struct tcp_ehash_bucket *ehead = &tcp_ehash[sk->sk_hashent];
+	struct tcp_bind_hashbucket *bhead;
+
+	/* Step 1: Put TW into bind hash. Original socket stays there too.
+	   Note, that any socket with inet_sk(sk)->num != 0 MUST be bound in
+	   binding cache, even if it is closed.
+	 */
+	bhead = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num)];
+	spin_lock(&bhead->lock);
+	tw->tw_tb = tcp_sk(sk)->bind_hash;
+	BUG_TRAP(tcp_sk(sk)->bind_hash);
+	tw_add_bind_node(tw, &tw->tw_tb->owners);
+	spin_unlock(&bhead->lock);
+
+	write_lock(&ehead->lock);
+
+	/* Step 2: Remove SK from established hash. */
+	if (__sk_del_node_init(sk))
+		sock_prot_dec_use(sk->sk_prot);
+
+	/* Step 3: Hash TW into TIMEWAIT half of established hash table. */
+	tw_add_node(tw, &(ehead + tcp_ehash_size)->chain);
+	atomic_inc(&tw->tw_refcnt);
+
+	write_unlock(&ehead->lock);
+}
+
+/* 
+ * Move a socket to time-wait or dead fin-wait-2 state.
+ */ 
+void tcp_time_wait(struct sock *sk, int state, int timeo)
+{
+	struct tcp_tw_bucket *tw = NULL;
+	struct tcp_sock *tp = tcp_sk(sk);
+	int recycle_ok = 0;
+
+	if (sysctl_tcp_tw_recycle && tp->rx_opt.ts_recent_stamp)
+		recycle_ok = tp->af_specific->remember_stamp(sk);
+
+	if (tcp_tw_count < sysctl_tcp_max_tw_buckets)
+		tw = kmem_cache_alloc(tcp_timewait_cachep, SLAB_ATOMIC);
+
+	if(tw != NULL) {
+		struct inet_sock *inet = inet_sk(sk);
+		int rto = (tp->rto<<2) - (tp->rto>>1);
+
+		/* Give us an identity. */
+		tw->tw_daddr		= inet->daddr;
+		tw->tw_rcv_saddr	= inet->rcv_saddr;
+		tw->tw_bound_dev_if	= sk->sk_bound_dev_if;
+		tw->tw_num		= inet->num;
+		tw->tw_state		= TCP_TIME_WAIT;
+		tw->tw_substate		= state;
+		tw->tw_sport		= inet->sport;
+		tw->tw_dport		= inet->dport;
+		tw->tw_family		= sk->sk_family;
+		tw->tw_reuse		= sk->sk_reuse;
+		tw->tw_rcv_wscale	= tp->rx_opt.rcv_wscale;
+		atomic_set(&tw->tw_refcnt, 1);
+
+		tw->tw_hashent		= sk->sk_hashent;
+		tw->tw_rcv_nxt		= tp->rcv_nxt;
+		tw->tw_snd_nxt		= tp->snd_nxt;
+		tw->tw_rcv_wnd		= tcp_receive_window(tp);
+		tw->tw_ts_recent	= tp->rx_opt.ts_recent;
+		tw->tw_ts_recent_stamp	= tp->rx_opt.ts_recent_stamp;
+		tw_dead_node_init(tw);
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		if (tw->tw_family == PF_INET6) {
+			struct ipv6_pinfo *np = inet6_sk(sk);
+
+			ipv6_addr_copy(&tw->tw_v6_daddr, &np->daddr);
+			ipv6_addr_copy(&tw->tw_v6_rcv_saddr, &np->rcv_saddr);
+			tw->tw_v6_ipv6only = np->ipv6only;
+		} else {
+			memset(&tw->tw_v6_daddr, 0, sizeof(tw->tw_v6_daddr));
+			memset(&tw->tw_v6_rcv_saddr, 0, sizeof(tw->tw_v6_rcv_saddr));
+			tw->tw_v6_ipv6only = 0;
+		}
+#endif
+		/* Linkage updates. */
+		__tcp_tw_hashdance(sk, tw);
+
+		/* Get the TIME_WAIT timeout firing. */
+		if (timeo < rto)
+			timeo = rto;
+
+		if (recycle_ok) {
+			tw->tw_timeout = rto;
+		} else {
+			tw->tw_timeout = TCP_TIMEWAIT_LEN;
+			if (state == TCP_TIME_WAIT)
+				timeo = TCP_TIMEWAIT_LEN;
+		}
+
+		tcp_tw_schedule(tw, timeo);
+		tcp_tw_put(tw);
+	} else {
+		/* Sorry, if we're out of memory, just CLOSE this
+		 * socket up.  We've got bigger problems than
+		 * non-graceful socket closings.
+		 */
+		if (net_ratelimit())
+			printk(KERN_INFO "TCP: time wait bucket table overflow\n");
+	}
+
+	tcp_update_metrics(sk);
+	tcp_done(sk);
+}
+
+/* Kill off TIME_WAIT sockets once their lifetime has expired. */
+static int tcp_tw_death_row_slot;
+
+static void tcp_twkill(unsigned long);
+
+/* TIME_WAIT reaping mechanism. */
+#define TCP_TWKILL_SLOTS	8	/* Please keep this a power of 2. */
+#define TCP_TWKILL_PERIOD	(TCP_TIMEWAIT_LEN/TCP_TWKILL_SLOTS)
+
+#define TCP_TWKILL_QUOTA	100
+
+static struct hlist_head tcp_tw_death_row[TCP_TWKILL_SLOTS];
+static DEFINE_SPINLOCK(tw_death_lock);
+static struct timer_list tcp_tw_timer = TIMER_INITIALIZER(tcp_twkill, 0, 0);
+static void twkill_work(void *);
+static DECLARE_WORK(tcp_twkill_work, twkill_work, NULL);
+static u32 twkill_thread_slots;
+
+/* Returns non-zero if quota exceeded.  */
+static int tcp_do_twkill_work(int slot, unsigned int quota)
+{
+	struct tcp_tw_bucket *tw;
+	struct hlist_node *node;
+	unsigned int killed;
+	int ret;
+
+	/* NOTE: compare this to previous version where lock
+	 * was released after detaching chain. It was racy,
+	 * because tw buckets are scheduled in not serialized context
+	 * in 2.3 (with netfilter), and with softnet it is common, because
+	 * soft irqs are not sequenced.
+	 */
+	killed = 0;
+	ret = 0;
+rescan:
+	tw_for_each_inmate(tw, node, &tcp_tw_death_row[slot]) {
+		__tw_del_dead_node(tw);
+		spin_unlock(&tw_death_lock);
+		tcp_timewait_kill(tw);
+		tcp_tw_put(tw);
+		killed++;
+		spin_lock(&tw_death_lock);
+		if (killed > quota) {
+			ret = 1;
+			break;
+		}
+
+		/* While we dropped tw_death_lock, another cpu may have
+		 * killed off the next TW bucket in the list, therefore
+		 * do a fresh re-read of the hlist head node with the
+		 * lock reacquired.  We still use the hlist traversal
+		 * macro in order to get the prefetches.
+		 */
+		goto rescan;
+	}
+
+	tcp_tw_count -= killed;
+	NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITED, killed);
+
+	return ret;
+}
+
+static void tcp_twkill(unsigned long dummy)
+{
+	int need_timer, ret;
+
+	spin_lock(&tw_death_lock);
+
+	if (tcp_tw_count == 0)
+		goto out;
+
+	need_timer = 0;
+	ret = tcp_do_twkill_work(tcp_tw_death_row_slot, TCP_TWKILL_QUOTA);
+	if (ret) {
+		twkill_thread_slots |= (1 << tcp_tw_death_row_slot);
+		mb();
+		schedule_work(&tcp_twkill_work);
+		need_timer = 1;
+	} else {
+		/* We purged the entire slot, anything left?  */
+		if (tcp_tw_count)
+			need_timer = 1;
+	}
+	tcp_tw_death_row_slot =
+		((tcp_tw_death_row_slot + 1) & (TCP_TWKILL_SLOTS - 1));
+	if (need_timer)
+		mod_timer(&tcp_tw_timer, jiffies + TCP_TWKILL_PERIOD);
+out:
+	spin_unlock(&tw_death_lock);
+}
+
+extern void twkill_slots_invalid(void);
+
+static void twkill_work(void *dummy)
+{
+	int i;
+
+	if ((TCP_TWKILL_SLOTS - 1) > (sizeof(twkill_thread_slots) * 8))
+		twkill_slots_invalid();
+
+	while (twkill_thread_slots) {
+		spin_lock_bh(&tw_death_lock);
+		for (i = 0; i < TCP_TWKILL_SLOTS; i++) {
+			if (!(twkill_thread_slots & (1 << i)))
+				continue;
+
+			while (tcp_do_twkill_work(i, TCP_TWKILL_QUOTA) != 0) {
+				if (need_resched()) {
+					spin_unlock_bh(&tw_death_lock);
+					schedule();
+					spin_lock_bh(&tw_death_lock);
+				}
+			}
+
+			twkill_thread_slots &= ~(1 << i);
+		}
+		spin_unlock_bh(&tw_death_lock);
+	}
+}
+
+/* These are always called from BH context.  See callers in
+ * tcp_input.c to verify this.
+ */
+
+/* This is for handling early-kills of TIME_WAIT sockets. */
+void tcp_tw_deschedule(struct tcp_tw_bucket *tw)
+{
+	spin_lock(&tw_death_lock);
+	if (tw_del_dead_node(tw)) {
+		tcp_tw_put(tw);
+		if (--tcp_tw_count == 0)
+			del_timer(&tcp_tw_timer);
+	}
+	spin_unlock(&tw_death_lock);
+	tcp_timewait_kill(tw);
+}
+
+/* Short-time timewait calendar */
+
+static int tcp_twcal_hand = -1;
+static int tcp_twcal_jiffie;
+static void tcp_twcal_tick(unsigned long);
+static struct timer_list tcp_twcal_timer =
+		TIMER_INITIALIZER(tcp_twcal_tick, 0, 0);
+static struct hlist_head tcp_twcal_row[TCP_TW_RECYCLE_SLOTS];
+
+static void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo)
+{
+	struct hlist_head *list;
+	int slot;
+
+	/* timeout := RTO * 3.5
+	 *
+	 * 3.5 = 1+2+0.5 to wait for two retransmits.
+	 *
+	 * RATIONALE: if FIN arrived and we entered TIME-WAIT state,
+	 * our ACK acking that FIN can be lost. If N subsequent retransmitted
+	 * FINs (or previous seqments) are lost (probability of such event
+	 * is p^(N+1), where p is probability to lose single packet and
+	 * time to detect the loss is about RTO*(2^N - 1) with exponential
+	 * backoff). Normal timewait length is calculated so, that we
+	 * waited at least for one retransmitted FIN (maximal RTO is 120sec).
+	 * [ BTW Linux. following BSD, violates this requirement waiting
+	 *   only for 60sec, we should wait at least for 240 secs.
+	 *   Well, 240 consumes too much of resources 8)
+	 * ]
+	 * This interval is not reduced to catch old duplicate and
+	 * responces to our wandering segments living for two MSLs.
+	 * However, if we use PAWS to detect
+	 * old duplicates, we can reduce the interval to bounds required
+	 * by RTO, rather than MSL. So, if peer understands PAWS, we
+	 * kill tw bucket after 3.5*RTO (it is important that this number
+	 * is greater than TS tick!) and detect old duplicates with help
+	 * of PAWS.
+	 */
+	slot = (timeo + (1<<TCP_TW_RECYCLE_TICK) - 1) >> TCP_TW_RECYCLE_TICK;
+
+	spin_lock(&tw_death_lock);
+
+	/* Unlink it, if it was scheduled */
+	if (tw_del_dead_node(tw))
+		tcp_tw_count--;
+	else
+		atomic_inc(&tw->tw_refcnt);
+
+	if (slot >= TCP_TW_RECYCLE_SLOTS) {
+		/* Schedule to slow timer */
+		if (timeo >= TCP_TIMEWAIT_LEN) {
+			slot = TCP_TWKILL_SLOTS-1;
+		} else {
+			slot = (timeo + TCP_TWKILL_PERIOD-1) / TCP_TWKILL_PERIOD;
+			if (slot >= TCP_TWKILL_SLOTS)
+				slot = TCP_TWKILL_SLOTS-1;
+		}
+		tw->tw_ttd = jiffies + timeo;
+		slot = (tcp_tw_death_row_slot + slot) & (TCP_TWKILL_SLOTS - 1);
+		list = &tcp_tw_death_row[slot];
+	} else {
+		tw->tw_ttd = jiffies + (slot << TCP_TW_RECYCLE_TICK);
+
+		if (tcp_twcal_hand < 0) {
+			tcp_twcal_hand = 0;
+			tcp_twcal_jiffie = jiffies;
+			tcp_twcal_timer.expires = tcp_twcal_jiffie + (slot<<TCP_TW_RECYCLE_TICK);
+			add_timer(&tcp_twcal_timer);
+		} else {
+			if (time_after(tcp_twcal_timer.expires, jiffies + (slot<<TCP_TW_RECYCLE_TICK)))
+				mod_timer(&tcp_twcal_timer, jiffies + (slot<<TCP_TW_RECYCLE_TICK));
+			slot = (tcp_twcal_hand + slot)&(TCP_TW_RECYCLE_SLOTS-1);
+		}
+		list = &tcp_twcal_row[slot];
+	}
+
+	hlist_add_head(&tw->tw_death_node, list);
+
+	if (tcp_tw_count++ == 0)
+		mod_timer(&tcp_tw_timer, jiffies+TCP_TWKILL_PERIOD);
+	spin_unlock(&tw_death_lock);
+}
+
+void tcp_twcal_tick(unsigned long dummy)
+{
+	int n, slot;
+	unsigned long j;
+	unsigned long now = jiffies;
+	int killed = 0;
+	int adv = 0;
+
+	spin_lock(&tw_death_lock);
+	if (tcp_twcal_hand < 0)
+		goto out;
+
+	slot = tcp_twcal_hand;
+	j = tcp_twcal_jiffie;
+
+	for (n=0; n<TCP_TW_RECYCLE_SLOTS; n++) {
+		if (time_before_eq(j, now)) {
+			struct hlist_node *node, *safe;
+			struct tcp_tw_bucket *tw;
+
+			tw_for_each_inmate_safe(tw, node, safe,
+					   &tcp_twcal_row[slot]) {
+				__tw_del_dead_node(tw);
+				tcp_timewait_kill(tw);
+				tcp_tw_put(tw);
+				killed++;
+			}
+		} else {
+			if (!adv) {
+				adv = 1;
+				tcp_twcal_jiffie = j;
+				tcp_twcal_hand = slot;
+			}
+
+			if (!hlist_empty(&tcp_twcal_row[slot])) {
+				mod_timer(&tcp_twcal_timer, j);
+				goto out;
+			}
+		}
+		j += (1<<TCP_TW_RECYCLE_TICK);
+		slot = (slot+1)&(TCP_TW_RECYCLE_SLOTS-1);
+	}
+	tcp_twcal_hand = -1;
+
+out:
+	if ((tcp_tw_count -= killed) == 0)
+		del_timer(&tcp_tw_timer);
+	NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITKILLED, killed);
+	spin_unlock(&tw_death_lock);
+}
+
+/* This is not only more efficient than what we used to do, it eliminates
+ * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM
+ *
+ * Actually, we could lots of memory writes here. tp of listening
+ * socket contains all necessary default parameters.
+ */
+struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, struct sk_buff *skb)
+{
+	/* allocate the newsk from the same slab of the master sock,
+	 * if not, at sk_free time we'll try to free it from the wrong
+	 * slabcache (i.e. is it TCPv4 or v6?), this is handled thru sk->sk_prot -acme */
+	struct sock *newsk = sk_alloc(PF_INET, GFP_ATOMIC, sk->sk_prot, 0);
+
+	if(newsk != NULL) {
+		struct tcp_sock *newtp;
+		struct sk_filter *filter;
+
+		memcpy(newsk, sk, sizeof(struct tcp_sock));
+		newsk->sk_state = TCP_SYN_RECV;
+
+		/* SANITY */
+		sk_node_init(&newsk->sk_node);
+		tcp_sk(newsk)->bind_hash = NULL;
+
+		/* Clone the TCP header template */
+		inet_sk(newsk)->dport = req->rmt_port;
+
+		sock_lock_init(newsk);
+		bh_lock_sock(newsk);
+
+		rwlock_init(&newsk->sk_dst_lock);
+		atomic_set(&newsk->sk_rmem_alloc, 0);
+		skb_queue_head_init(&newsk->sk_receive_queue);
+		atomic_set(&newsk->sk_wmem_alloc, 0);
+		skb_queue_head_init(&newsk->sk_write_queue);
+		atomic_set(&newsk->sk_omem_alloc, 0);
+		newsk->sk_wmem_queued = 0;
+		newsk->sk_forward_alloc = 0;
+
+		sock_reset_flag(newsk, SOCK_DONE);
+		newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
+		newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
+		newsk->sk_send_head = NULL;
+		rwlock_init(&newsk->sk_callback_lock);
+		skb_queue_head_init(&newsk->sk_error_queue);
+		newsk->sk_write_space = sk_stream_write_space;
+
+		if ((filter = newsk->sk_filter) != NULL)
+			sk_filter_charge(newsk, filter);
+
+		if (unlikely(xfrm_sk_clone_policy(newsk))) {
+			/* It is still raw copy of parent, so invalidate
+			 * destructor and make plain sk_free() */
+			newsk->sk_destruct = NULL;
+			sk_free(newsk);
+			return NULL;
+		}
+
+		/* Now setup tcp_sock */
+		newtp = tcp_sk(newsk);
+		newtp->pred_flags = 0;
+		newtp->rcv_nxt = req->rcv_isn + 1;
+		newtp->snd_nxt = req->snt_isn + 1;
+		newtp->snd_una = req->snt_isn + 1;
+		newtp->snd_sml = req->snt_isn + 1;
+
+		tcp_prequeue_init(newtp);
+
+		tcp_init_wl(newtp, req->snt_isn, req->rcv_isn);
+
+		newtp->retransmits = 0;
+		newtp->backoff = 0;
+		newtp->srtt = 0;
+		newtp->mdev = TCP_TIMEOUT_INIT;
+		newtp->rto = TCP_TIMEOUT_INIT;
+
+		newtp->packets_out = 0;
+		newtp->left_out = 0;
+		newtp->retrans_out = 0;
+		newtp->sacked_out = 0;
+		newtp->fackets_out = 0;
+		newtp->snd_ssthresh = 0x7fffffff;
+
+		/* So many TCP implementations out there (incorrectly) count the
+		 * initial SYN frame in their delayed-ACK and congestion control
+		 * algorithms that we must have the following bandaid to talk
+		 * efficiently to them.  -DaveM
+		 */
+		newtp->snd_cwnd = 2;
+		newtp->snd_cwnd_cnt = 0;
+
+		newtp->frto_counter = 0;
+		newtp->frto_highmark = 0;
+
+		tcp_set_ca_state(newtp, TCP_CA_Open);
+		tcp_init_xmit_timers(newsk);
+		skb_queue_head_init(&newtp->out_of_order_queue);
+		newtp->rcv_wup = req->rcv_isn + 1;
+		newtp->write_seq = req->snt_isn + 1;
+		newtp->pushed_seq = newtp->write_seq;
+		newtp->copied_seq = req->rcv_isn + 1;
+
+		newtp->rx_opt.saw_tstamp = 0;
+
+		newtp->rx_opt.dsack = 0;
+		newtp->rx_opt.eff_sacks = 0;
+
+		newtp->probes_out = 0;
+		newtp->rx_opt.num_sacks = 0;
+		newtp->urg_data = 0;
+		newtp->listen_opt = NULL;
+		newtp->accept_queue = newtp->accept_queue_tail = NULL;
+		/* Deinitialize syn_wait_lock to trap illegal accesses. */
+		memset(&newtp->syn_wait_lock, 0, sizeof(newtp->syn_wait_lock));
+
+		/* Back to base struct sock members. */
+		newsk->sk_err = 0;
+		newsk->sk_priority = 0;
+		atomic_set(&newsk->sk_refcnt, 2);
+#ifdef INET_REFCNT_DEBUG
+		atomic_inc(&inet_sock_nr);
+#endif
+		atomic_inc(&tcp_sockets_allocated);
+
+		if (sock_flag(newsk, SOCK_KEEPOPEN))
+			tcp_reset_keepalive_timer(newsk,
+						  keepalive_time_when(newtp));
+		newsk->sk_socket = NULL;
+		newsk->sk_sleep = NULL;
+
+		newtp->rx_opt.tstamp_ok = req->tstamp_ok;
+		if((newtp->rx_opt.sack_ok = req->sack_ok) != 0) {
+			if (sysctl_tcp_fack)
+				newtp->rx_opt.sack_ok |= 2;
+		}
+		newtp->window_clamp = req->window_clamp;
+		newtp->rcv_ssthresh = req->rcv_wnd;
+		newtp->rcv_wnd = req->rcv_wnd;
+		newtp->rx_opt.wscale_ok = req->wscale_ok;
+		if (newtp->rx_opt.wscale_ok) {
+			newtp->rx_opt.snd_wscale = req->snd_wscale;
+			newtp->rx_opt.rcv_wscale = req->rcv_wscale;
+		} else {
+			newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0;
+			newtp->window_clamp = min(newtp->window_clamp, 65535U);
+		}
+		newtp->snd_wnd = ntohs(skb->h.th->window) << newtp->rx_opt.snd_wscale;
+		newtp->max_window = newtp->snd_wnd;
+
+		if (newtp->rx_opt.tstamp_ok) {
+			newtp->rx_opt.ts_recent = req->ts_recent;
+			newtp->rx_opt.ts_recent_stamp = xtime.tv_sec;
+			newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
+		} else {
+			newtp->rx_opt.ts_recent_stamp = 0;
+			newtp->tcp_header_len = sizeof(struct tcphdr);
+		}
+		if (skb->len >= TCP_MIN_RCVMSS+newtp->tcp_header_len)
+			newtp->ack.last_seg_size = skb->len-newtp->tcp_header_len;
+		newtp->rx_opt.mss_clamp = req->mss;
+		TCP_ECN_openreq_child(newtp, req);
+		if (newtp->ecn_flags&TCP_ECN_OK)
+			sock_set_flag(newsk, SOCK_NO_LARGESEND);
+
+		tcp_ca_init(newtp);
+
+		TCP_INC_STATS_BH(TCP_MIB_PASSIVEOPENS);
+	}
+	return newsk;
+}
+
+/* 
+ *	Process an incoming packet for SYN_RECV sockets represented
+ *	as an open_request.
+ */
+
+struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
+			   struct open_request *req,
+			   struct open_request **prev)
+{
+	struct tcphdr *th = skb->h.th;
+	struct tcp_sock *tp = tcp_sk(sk);
+	u32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
+	int paws_reject = 0;
+	struct tcp_options_received tmp_opt;
+	struct sock *child;
+
+	tmp_opt.saw_tstamp = 0;
+	if (th->doff > (sizeof(struct tcphdr)>>2)) {
+		tcp_parse_options(skb, &tmp_opt, 0);
+
+		if (tmp_opt.saw_tstamp) {
+			tmp_opt.ts_recent = req->ts_recent;
+			/* We do not store true stamp, but it is not required,
+			 * it can be estimated (approximately)
+			 * from another data.
+			 */
+			tmp_opt.ts_recent_stamp = xtime.tv_sec - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans);
+			paws_reject = tcp_paws_check(&tmp_opt, th->rst);
+		}
+	}
+
+	/* Check for pure retransmitted SYN. */
+	if (TCP_SKB_CB(skb)->seq == req->rcv_isn &&
+	    flg == TCP_FLAG_SYN &&
+	    !paws_reject) {
+		/*
+		 * RFC793 draws (Incorrectly! It was fixed in RFC1122)
+		 * this case on figure 6 and figure 8, but formal
+		 * protocol description says NOTHING.
+		 * To be more exact, it says that we should send ACK,
+		 * because this segment (at least, if it has no data)
+		 * is out of window.
+		 *
+		 *  CONCLUSION: RFC793 (even with RFC1122) DOES NOT
+		 *  describe SYN-RECV state. All the description
+		 *  is wrong, we cannot believe to it and should
+		 *  rely only on common sense and implementation
+		 *  experience.
+		 *
+		 * Enforce "SYN-ACK" according to figure 8, figure 6
+		 * of RFC793, fixed by RFC1122.
+		 */
+		req->class->rtx_syn_ack(sk, req, NULL);
+		return NULL;
+	}
+
+	/* Further reproduces section "SEGMENT ARRIVES"
+	   for state SYN-RECEIVED of RFC793.
+	   It is broken, however, it does not work only
+	   when SYNs are crossed.
+
+	   You would think that SYN crossing is impossible here, since
+	   we should have a SYN_SENT socket (from connect()) on our end,
+	   but this is not true if the crossed SYNs were sent to both
+	   ends by a malicious third party.  We must defend against this,
+	   and to do that we first verify the ACK (as per RFC793, page
+	   36) and reset if it is invalid.  Is this a true full defense?
+	   To convince ourselves, let us consider a way in which the ACK
+	   test can still pass in this 'malicious crossed SYNs' case.
+	   Malicious sender sends identical SYNs (and thus identical sequence
+	   numbers) to both A and B:
+
+		A: gets SYN, seq=7
+		B: gets SYN, seq=7
+
+	   By our good fortune, both A and B select the same initial
+	   send sequence number of seven :-)
+
+		A: sends SYN|ACK, seq=7, ack_seq=8
+		B: sends SYN|ACK, seq=7, ack_seq=8
+
+	   So we are now A eating this SYN|ACK, ACK test passes.  So
+	   does sequence test, SYN is truncated, and thus we consider
+	   it a bare ACK.
+
+	   If tp->defer_accept, we silently drop this bare ACK.  Otherwise,
+	   we create an established connection.  Both ends (listening sockets)
+	   accept the new incoming connection and try to talk to each other. 8-)
+
+	   Note: This case is both harmless, and rare.  Possibility is about the
+	   same as us discovering intelligent life on another plant tomorrow.
+
+	   But generally, we should (RFC lies!) to accept ACK
+	   from SYNACK both here and in tcp_rcv_state_process().
+	   tcp_rcv_state_process() does not, hence, we do not too.
+
+	   Note that the case is absolutely generic:
+	   we cannot optimize anything here without
+	   violating protocol. All the checks must be made
+	   before attempt to create socket.
+	 */
+
+	/* RFC793 page 36: "If the connection is in any non-synchronized state ...
+	 *                  and the incoming segment acknowledges something not yet
+	 *                  sent (the segment carries an unaccaptable ACK) ...
+	 *                  a reset is sent."
+	 *
+	 * Invalid ACK: reset will be sent by listening socket
+	 */
+	if ((flg & TCP_FLAG_ACK) &&
+	    (TCP_SKB_CB(skb)->ack_seq != req->snt_isn+1))
+		return sk;
+
+	/* Also, it would be not so bad idea to check rcv_tsecr, which
+	 * is essentially ACK extension and too early or too late values
+	 * should cause reset in unsynchronized states.
+	 */
+
+	/* RFC793: "first check sequence number". */
+
+	if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
+					  req->rcv_isn+1, req->rcv_isn+1+req->rcv_wnd)) {
+		/* Out of window: send ACK and drop. */
+		if (!(flg & TCP_FLAG_RST))
+			req->class->send_ack(skb, req);
+		if (paws_reject)
+			NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
+		return NULL;
+	}
+
+	/* In sequence, PAWS is OK. */
+
+	if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, req->rcv_isn+1))
+			req->ts_recent = tmp_opt.rcv_tsval;
+
+		if (TCP_SKB_CB(skb)->seq == req->rcv_isn) {
+			/* Truncate SYN, it is out of window starting
+			   at req->rcv_isn+1. */
+			flg &= ~TCP_FLAG_SYN;
+		}
+
+		/* RFC793: "second check the RST bit" and
+		 *	   "fourth, check the SYN bit"
+		 */
+		if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN))
+			goto embryonic_reset;
+
+		/* ACK sequence verified above, just make sure ACK is
+		 * set.  If ACK not set, just silently drop the packet.
+		 */
+		if (!(flg & TCP_FLAG_ACK))
+			return NULL;
+
+		/* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
+		if (tp->defer_accept && TCP_SKB_CB(skb)->end_seq == req->rcv_isn+1) {
+			req->acked = 1;
+			return NULL;
+		}
+
+		/* OK, ACK is valid, create big socket and
+		 * feed this segment to it. It will repeat all
+		 * the tests. THIS SEGMENT MUST MOVE SOCKET TO
+		 * ESTABLISHED STATE. If it will be dropped after
+		 * socket is created, wait for troubles.
+		 */
+		child = tp->af_specific->syn_recv_sock(sk, skb, req, NULL);
+		if (child == NULL)
+			goto listen_overflow;
+
+		tcp_synq_unlink(tp, req, prev);
+		tcp_synq_removed(sk, req);
+
+		tcp_acceptq_queue(sk, req, child);
+		return child;
+
+	listen_overflow:
+		if (!sysctl_tcp_abort_on_overflow) {
+			req->acked = 1;
+			return NULL;
+		}
+
+	embryonic_reset:
+		NET_INC_STATS_BH(LINUX_MIB_EMBRYONICRSTS);
+		if (!(flg & TCP_FLAG_RST))
+			req->class->send_reset(skb);
+
+		tcp_synq_drop(sk, req, prev);
+		return NULL;
+}
+
+/*
+ * Queue segment on the new socket if the new socket is active,
+ * otherwise we just shortcircuit this and continue with
+ * the new socket.
+ */
+
+int tcp_child_process(struct sock *parent, struct sock *child,
+		      struct sk_buff *skb)
+{
+	int ret = 0;
+	int state = child->sk_state;
+
+	if (!sock_owned_by_user(child)) {
+		ret = tcp_rcv_state_process(child, skb, skb->h.th, skb->len);
+
+		/* Wakeup parent, send SIGIO */
+		if (state == TCP_SYN_RECV && child->sk_state != state)
+			parent->sk_data_ready(parent, 0);
+	} else {
+		/* Alas, it is possible again, because we do lookup
+		 * in main socket hash table and lock on listening
+		 * socket does not protect us more.
+		 */
+		sk_add_backlog(child, skb);
+	}
+
+	bh_unlock_sock(child);
+	sock_put(child);
+	return ret;
+}
+
+EXPORT_SYMBOL(tcp_check_req);
+EXPORT_SYMBOL(tcp_child_process);
+EXPORT_SYMBOL(tcp_create_openreq_child);
+EXPORT_SYMBOL(tcp_timewait_state_process);
+EXPORT_SYMBOL(tcp_tw_deschedule);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
new file mode 100644
index 00000000000..13c14cb6dee
--- /dev/null
+++ b/net/ipv4/tcp_output.c
@@ -0,0 +1,1739 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Implementation of the Transmission Control Protocol(TCP).
+ *
+ * Version:	$Id: tcp_output.c,v 1.146 2002/02/01 22:01:04 davem Exp $
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Mark Evans, <evansmp@uhura.aston.ac.uk>
+ *		Corey Minyard <wf-rch!minyard@relay.EU.net>
+ *		Florian La Roche, <flla@stud.uni-sb.de>
+ *		Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
+ *		Linus Torvalds, <torvalds@cs.helsinki.fi>
+ *		Alan Cox, <gw4pts@gw4pts.ampr.org>
+ *		Matthew Dillon, <dillon@apollo.west.oic.com>
+ *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *		Jorge Cwik, <jorge@laser.satlink.net>
+ */
+
+/*
+ * Changes:	Pedro Roque	:	Retransmit queue handled by TCP.
+ *				:	Fragmentation on mtu decrease
+ *				:	Segment collapse on retransmit
+ *				:	AF independence
+ *
+ *		Linus Torvalds	:	send_delayed_ack
+ *		David S. Miller	:	Charge memory using the right skb
+ *					during syn/ack processing.
+ *		David S. Miller :	Output engine completely rewritten.
+ *		Andrea Arcangeli:	SYNACK carry ts_recent in tsecr.
+ *		Cacophonix Gaul :	draft-minshall-nagle-01
+ *		J Hadi Salim	:	ECN support
+ *
+ */
+
+#include <net/tcp.h>
+
+#include <linux/compiler.h>
+#include <linux/module.h>
+#include <linux/smp_lock.h>
+
+/* People can turn this off for buggy TCP's found in printers etc. */
+int sysctl_tcp_retrans_collapse = 1;
+
+/* This limits the percentage of the congestion window which we
+ * will allow a single TSO frame to consume.  Building TSO frames
+ * which are too large can cause TCP streams to be bursty.
+ */
+int sysctl_tcp_tso_win_divisor = 8;
+
+static inline void update_send_head(struct sock *sk, struct tcp_sock *tp,
+				    struct sk_buff *skb)
+{
+	sk->sk_send_head = skb->next;
+	if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue)
+		sk->sk_send_head = NULL;
+	tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
+	tcp_packets_out_inc(sk, tp, skb);
+}
+
+/* SND.NXT, if window was not shrunk.
+ * If window has been shrunk, what should we make? It is not clear at all.
+ * Using SND.UNA we will fail to open window, SND.NXT is out of window. :-(
+ * Anything in between SND.UNA...SND.UNA+SND.WND also can be already
+ * invalid. OK, let's make this for now:
+ */
+static inline __u32 tcp_acceptable_seq(struct sock *sk, struct tcp_sock *tp)
+{
+	if (!before(tp->snd_una+tp->snd_wnd, tp->snd_nxt))
+		return tp->snd_nxt;
+	else
+		return tp->snd_una+tp->snd_wnd;
+}
+
+/* Calculate mss to advertise in SYN segment.
+ * RFC1122, RFC1063, draft-ietf-tcpimpl-pmtud-01 state that:
+ *
+ * 1. It is independent of path mtu.
+ * 2. Ideally, it is maximal possible segment size i.e. 65535-40.
+ * 3. For IPv4 it is reasonable to calculate it from maximal MTU of
+ *    attached devices, because some buggy hosts are confused by
+ *    large MSS.
+ * 4. We do not make 3, we advertise MSS, calculated from first
+ *    hop device mtu, but allow to raise it to ip_rt_min_advmss.
+ *    This may be overridden via information stored in routing table.
+ * 5. Value 65535 for MSS is valid in IPv6 and means "as large as possible,
+ *    probably even Jumbo".
+ */
+static __u16 tcp_advertise_mss(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct dst_entry *dst = __sk_dst_get(sk);
+	int mss = tp->advmss;
+
+	if (dst && dst_metric(dst, RTAX_ADVMSS) < mss) {
+		mss = dst_metric(dst, RTAX_ADVMSS);
+		tp->advmss = mss;
+	}
+
+	return (__u16)mss;
+}
+
+/* RFC2861. Reset CWND after idle period longer RTO to "restart window".
+ * This is the first part of cwnd validation mechanism. */
+static void tcp_cwnd_restart(struct tcp_sock *tp, struct dst_entry *dst)
+{
+	s32 delta = tcp_time_stamp - tp->lsndtime;
+	u32 restart_cwnd = tcp_init_cwnd(tp, dst);
+	u32 cwnd = tp->snd_cwnd;
+
+	if (tcp_is_vegas(tp)) 
+		tcp_vegas_enable(tp);
+
+	tp->snd_ssthresh = tcp_current_ssthresh(tp);
+	restart_cwnd = min(restart_cwnd, cwnd);
+
+	while ((delta -= tp->rto) > 0 && cwnd > restart_cwnd)
+		cwnd >>= 1;
+	tp->snd_cwnd = max(cwnd, restart_cwnd);
+	tp->snd_cwnd_stamp = tcp_time_stamp;
+	tp->snd_cwnd_used = 0;
+}
+
+static inline void tcp_event_data_sent(struct tcp_sock *tp,
+				       struct sk_buff *skb, struct sock *sk)
+{
+	u32 now = tcp_time_stamp;
+
+	if (!tp->packets_out && (s32)(now - tp->lsndtime) > tp->rto)
+		tcp_cwnd_restart(tp, __sk_dst_get(sk));
+
+	tp->lsndtime = now;
+
+	/* If it is a reply for ato after last received
+	 * packet, enter pingpong mode.
+	 */
+	if ((u32)(now - tp->ack.lrcvtime) < tp->ack.ato)
+		tp->ack.pingpong = 1;
+}
+
+static __inline__ void tcp_event_ack_sent(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	tcp_dec_quickack_mode(tp);
+	tcp_clear_xmit_timer(sk, TCP_TIME_DACK);
+}
+
+/* Determine a window scaling and initial window to offer.
+ * Based on the assumption that the given amount of space
+ * will be offered. Store the results in the tp structure.
+ * NOTE: for smooth operation initial space offering should
+ * be a multiple of mss if possible. We assume here that mss >= 1.
+ * This MUST be enforced by all callers.
+ */
+void tcp_select_initial_window(int __space, __u32 mss,
+			       __u32 *rcv_wnd, __u32 *window_clamp,
+			       int wscale_ok, __u8 *rcv_wscale)
+{
+	unsigned int space = (__space < 0 ? 0 : __space);
+
+	/* If no clamp set the clamp to the max possible scaled window */
+	if (*window_clamp == 0)
+		(*window_clamp) = (65535 << 14);
+	space = min(*window_clamp, space);
+
+	/* Quantize space offering to a multiple of mss if possible. */
+	if (space > mss)
+		space = (space / mss) * mss;
+
+	/* NOTE: offering an initial window larger than 32767
+	 * will break some buggy TCP stacks. We try to be nice.
+	 * If we are not window scaling, then this truncates
+	 * our initial window offering to 32k. There should also
+	 * be a sysctl option to stop being nice.
+	 */
+	(*rcv_wnd) = min(space, MAX_TCP_WINDOW);
+	(*rcv_wscale) = 0;
+	if (wscale_ok) {
+		/* Set window scaling on max possible window
+		 * See RFC1323 for an explanation of the limit to 14 
+		 */
+		space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max);
+		while (space > 65535 && (*rcv_wscale) < 14) {
+			space >>= 1;
+			(*rcv_wscale)++;
+		}
+	}
+
+	/* Set initial window to value enough for senders,
+	 * following RFC1414. Senders, not following this RFC,
+	 * will be satisfied with 2.
+	 */
+	if (mss > (1<<*rcv_wscale)) {
+		int init_cwnd = 4;
+		if (mss > 1460*3)
+			init_cwnd = 2;
+		else if (mss > 1460)
+			init_cwnd = 3;
+		if (*rcv_wnd > init_cwnd*mss)
+			*rcv_wnd = init_cwnd*mss;
+	}
+
+	/* Set the clamp no higher than max representable value */
+	(*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp);
+}
+
+/* Chose a new window to advertise, update state in tcp_sock for the
+ * socket, and return result with RFC1323 scaling applied.  The return
+ * value can be stuffed directly into th->window for an outgoing
+ * frame.
+ */
+static __inline__ u16 tcp_select_window(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	u32 cur_win = tcp_receive_window(tp);
+	u32 new_win = __tcp_select_window(sk);
+
+	/* Never shrink the offered window */
+	if(new_win < cur_win) {
+		/* Danger Will Robinson!
+		 * Don't update rcv_wup/rcv_wnd here or else
+		 * we will not be able to advertise a zero
+		 * window in time.  --DaveM
+		 *
+		 * Relax Will Robinson.
+		 */
+		new_win = cur_win;
+	}
+	tp->rcv_wnd = new_win;
+	tp->rcv_wup = tp->rcv_nxt;
+
+	/* Make sure we do not exceed the maximum possible
+	 * scaled window.
+	 */
+	if (!tp->rx_opt.rcv_wscale)
+		new_win = min(new_win, MAX_TCP_WINDOW);
+	else
+		new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
+
+	/* RFC1323 scaling applied */
+	new_win >>= tp->rx_opt.rcv_wscale;
+
+	/* If we advertise zero window, disable fast path. */
+	if (new_win == 0)
+		tp->pred_flags = 0;
+
+	return new_win;
+}
+
+
+/* This routine actually transmits TCP packets queued in by
+ * tcp_do_sendmsg().  This is used by both the initial
+ * transmission and possible later retransmissions.
+ * All SKB's seen here are completely headerless.  It is our
+ * job to build the TCP header, and pass the packet down to
+ * IP so it can do the same plus pass the packet off to the
+ * device.
+ *
+ * We are working here with either a clone of the original
+ * SKB, or a fresh unique copy made by the retransmit engine.
+ */
+static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
+{
+	if (skb != NULL) {
+		struct inet_sock *inet = inet_sk(sk);
+		struct tcp_sock *tp = tcp_sk(sk);
+		struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
+		int tcp_header_size = tp->tcp_header_len;
+		struct tcphdr *th;
+		int sysctl_flags;
+		int err;
+
+		BUG_ON(!tcp_skb_pcount(skb));
+
+#define SYSCTL_FLAG_TSTAMPS	0x1
+#define SYSCTL_FLAG_WSCALE	0x2
+#define SYSCTL_FLAG_SACK	0x4
+
+		sysctl_flags = 0;
+		if (tcb->flags & TCPCB_FLAG_SYN) {
+			tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
+			if(sysctl_tcp_timestamps) {
+				tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
+				sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
+			}
+			if(sysctl_tcp_window_scaling) {
+				tcp_header_size += TCPOLEN_WSCALE_ALIGNED;
+				sysctl_flags |= SYSCTL_FLAG_WSCALE;
+			}
+			if(sysctl_tcp_sack) {
+				sysctl_flags |= SYSCTL_FLAG_SACK;
+				if(!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
+					tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
+			}
+		} else if (tp->rx_opt.eff_sacks) {
+			/* A SACK is 2 pad bytes, a 2 byte header, plus
+			 * 2 32-bit sequence numbers for each SACK block.
+			 */
+			tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED +
+					    (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK));
+		}
+		
+		/*
+		 * If the connection is idle and we are restarting,
+		 * then we don't want to do any Vegas calculations
+		 * until we get fresh RTT samples.  So when we
+		 * restart, we reset our Vegas state to a clean
+		 * slate. After we get acks for this flight of
+		 * packets, _then_ we can make Vegas calculations
+		 * again.
+		 */
+		if (tcp_is_vegas(tp) && tcp_packets_in_flight(tp) == 0)
+			tcp_vegas_enable(tp);
+
+		th = (struct tcphdr *) skb_push(skb, tcp_header_size);
+		skb->h.th = th;
+		skb_set_owner_w(skb, sk);
+
+		/* Build TCP header and checksum it. */
+		th->source		= inet->sport;
+		th->dest		= inet->dport;
+		th->seq			= htonl(tcb->seq);
+		th->ack_seq		= htonl(tp->rcv_nxt);
+		*(((__u16 *)th) + 6)	= htons(((tcp_header_size >> 2) << 12) | tcb->flags);
+		if (tcb->flags & TCPCB_FLAG_SYN) {
+			/* RFC1323: The window in SYN & SYN/ACK segments
+			 * is never scaled.
+			 */
+			th->window	= htons(tp->rcv_wnd);
+		} else {
+			th->window	= htons(tcp_select_window(sk));
+		}
+		th->check		= 0;
+		th->urg_ptr		= 0;
+
+		if (tp->urg_mode &&
+		    between(tp->snd_up, tcb->seq+1, tcb->seq+0xFFFF)) {
+			th->urg_ptr		= htons(tp->snd_up-tcb->seq);
+			th->urg			= 1;
+		}
+
+		if (tcb->flags & TCPCB_FLAG_SYN) {
+			tcp_syn_build_options((__u32 *)(th + 1),
+					      tcp_advertise_mss(sk),
+					      (sysctl_flags & SYSCTL_FLAG_TSTAMPS),
+					      (sysctl_flags & SYSCTL_FLAG_SACK),
+					      (sysctl_flags & SYSCTL_FLAG_WSCALE),
+					      tp->rx_opt.rcv_wscale,
+					      tcb->when,
+		      			      tp->rx_opt.ts_recent);
+		} else {
+			tcp_build_and_update_options((__u32 *)(th + 1),
+						     tp, tcb->when);
+
+			TCP_ECN_send(sk, tp, skb, tcp_header_size);
+		}
+		tp->af_specific->send_check(sk, th, skb->len, skb);
+
+		if (tcb->flags & TCPCB_FLAG_ACK)
+			tcp_event_ack_sent(sk);
+
+		if (skb->len != tcp_header_size)
+			tcp_event_data_sent(tp, skb, sk);
+
+		TCP_INC_STATS(TCP_MIB_OUTSEGS);
+
+		err = tp->af_specific->queue_xmit(skb, 0);
+		if (err <= 0)
+			return err;
+
+		tcp_enter_cwr(tp);
+
+		/* NET_XMIT_CN is special. It does not guarantee,
+		 * that this packet is lost. It tells that device
+		 * is about to start to drop packets or already
+		 * drops some packets of the same priority and
+		 * invokes us to send less aggressively.
+		 */
+		return err == NET_XMIT_CN ? 0 : err;
+	}
+	return -ENOBUFS;
+#undef SYSCTL_FLAG_TSTAMPS
+#undef SYSCTL_FLAG_WSCALE
+#undef SYSCTL_FLAG_SACK
+}
+
+
+/* This routine just queue's the buffer 
+ *
+ * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames,
+ * otherwise socket can stall.
+ */
+static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	/* Advance write_seq and place onto the write_queue. */
+	tp->write_seq = TCP_SKB_CB(skb)->end_seq;
+	skb_header_release(skb);
+	__skb_queue_tail(&sk->sk_write_queue, skb);
+	sk_charge_skb(sk, skb);
+
+	/* Queue it, remembering where we must start sending. */
+	if (sk->sk_send_head == NULL)
+		sk->sk_send_head = skb;
+}
+
+static inline void tcp_tso_set_push(struct sk_buff *skb)
+{
+	/* Force push to be on for any TSO frames to workaround
+	 * problems with busted implementations like Mac OS-X that
+	 * hold off socket receive wakeups until push is seen.
+	 */
+	if (tcp_skb_pcount(skb) > 1)
+		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
+}
+
+/* Send _single_ skb sitting at the send head. This function requires
+ * true push pending frames to setup probe timer etc.
+ */
+void tcp_push_one(struct sock *sk, unsigned cur_mss)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *skb = sk->sk_send_head;
+
+	if (tcp_snd_test(tp, skb, cur_mss, TCP_NAGLE_PUSH)) {
+		/* Send it out now. */
+		TCP_SKB_CB(skb)->when = tcp_time_stamp;
+		tcp_tso_set_push(skb);
+		if (!tcp_transmit_skb(sk, skb_clone(skb, sk->sk_allocation))) {
+			sk->sk_send_head = NULL;
+			tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
+			tcp_packets_out_inc(sk, tp, skb);
+			return;
+		}
+	}
+}
+
+void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_std)
+{
+	if (skb->len <= mss_std) {
+		/* Avoid the costly divide in the normal
+		 * non-TSO case.
+		 */
+		skb_shinfo(skb)->tso_segs = 1;
+		skb_shinfo(skb)->tso_size = 0;
+	} else {
+		unsigned int factor;
+
+		factor = skb->len + (mss_std - 1);
+		factor /= mss_std;
+		skb_shinfo(skb)->tso_segs = factor;
+		skb_shinfo(skb)->tso_size = mss_std;
+	}
+}
+
+/* Function to create two new TCP segments.  Shrinks the given segment
+ * to the specified size and appends a new segment with the rest of the
+ * packet to the list.  This won't be called frequently, I hope. 
+ * Remember, these are still headerless SKBs at this point.
+ */
+static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *buff;
+	int nsize;
+	u16 flags;
+
+	nsize = skb_headlen(skb) - len;
+	if (nsize < 0)
+		nsize = 0;
+
+	if (skb_cloned(skb) &&
+	    skb_is_nonlinear(skb) &&
+	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+		return -ENOMEM;
+
+	/* Get a new skb... force flag on. */
+	buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC);
+	if (buff == NULL)
+		return -ENOMEM; /* We'll just try again later. */
+	sk_charge_skb(sk, buff);
+
+	/* Correct the sequence numbers. */
+	TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
+	TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
+	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
+
+	/* PSH and FIN should only be set in the second packet. */
+	flags = TCP_SKB_CB(skb)->flags;
+	TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
+	TCP_SKB_CB(buff)->flags = flags;
+	TCP_SKB_CB(buff)->sacked =
+		(TCP_SKB_CB(skb)->sacked &
+		 (TCPCB_LOST | TCPCB_EVER_RETRANS | TCPCB_AT_TAIL));
+	TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL;
+
+	if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_HW) {
+		/* Copy and checksum data tail into the new buffer. */
+		buff->csum = csum_partial_copy_nocheck(skb->data + len, skb_put(buff, nsize),
+						       nsize, 0);
+
+		skb_trim(skb, len);
+
+		skb->csum = csum_block_sub(skb->csum, buff->csum, len);
+	} else {
+		skb->ip_summed = CHECKSUM_HW;
+		skb_split(skb, buff, len);
+	}
+
+	buff->ip_summed = skb->ip_summed;
+
+	/* Looks stupid, but our code really uses when of
+	 * skbs, which it never sent before. --ANK
+	 */
+	TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when;
+
+	if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) {
+		tp->lost_out -= tcp_skb_pcount(skb);
+		tp->left_out -= tcp_skb_pcount(skb);
+	}
+
+	/* Fix up tso_factor for both original and new SKB.  */
+	tcp_set_skb_tso_segs(skb, tp->mss_cache_std);
+	tcp_set_skb_tso_segs(buff, tp->mss_cache_std);
+
+	if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) {
+		tp->lost_out += tcp_skb_pcount(skb);
+		tp->left_out += tcp_skb_pcount(skb);
+	}
+
+	if (TCP_SKB_CB(buff)->sacked&TCPCB_LOST) {
+		tp->lost_out += tcp_skb_pcount(buff);
+		tp->left_out += tcp_skb_pcount(buff);
+	}
+
+	/* Link BUFF into the send queue. */
+	__skb_append(skb, buff);
+
+	return 0;
+}
+
+/* This is similar to __pskb_pull_head() (it will go to core/skbuff.c
+ * eventually). The difference is that pulled data not copied, but
+ * immediately discarded.
+ */
+static unsigned char *__pskb_trim_head(struct sk_buff *skb, int len)
+{
+	int i, k, eat;
+
+	eat = len;
+	k = 0;
+	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
+		if (skb_shinfo(skb)->frags[i].size <= eat) {
+			put_page(skb_shinfo(skb)->frags[i].page);
+			eat -= skb_shinfo(skb)->frags[i].size;
+		} else {
+			skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
+			if (eat) {
+				skb_shinfo(skb)->frags[k].page_offset += eat;
+				skb_shinfo(skb)->frags[k].size -= eat;
+				eat = 0;
+			}
+			k++;
+		}
+	}
+	skb_shinfo(skb)->nr_frags = k;
+
+	skb->tail = skb->data;
+	skb->data_len -= len;
+	skb->len = skb->data_len;
+	return skb->tail;
+}
+
+int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
+{
+	if (skb_cloned(skb) &&
+	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+		return -ENOMEM;
+
+	if (len <= skb_headlen(skb)) {
+		__skb_pull(skb, len);
+	} else {
+		if (__pskb_trim_head(skb, len-skb_headlen(skb)) == NULL)
+			return -ENOMEM;
+	}
+
+	TCP_SKB_CB(skb)->seq += len;
+	skb->ip_summed = CHECKSUM_HW;
+
+	skb->truesize	     -= len;
+	sk->sk_wmem_queued   -= len;
+	sk->sk_forward_alloc += len;
+	sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
+
+	/* Any change of skb->len requires recalculation of tso
+	 * factor and mss.
+	 */
+	if (tcp_skb_pcount(skb) > 1)
+		tcp_set_skb_tso_segs(skb, tcp_skb_mss(skb));
+
+	return 0;
+}
+
+/* This function synchronize snd mss to current pmtu/exthdr set.
+
+   tp->rx_opt.user_mss is mss set by user by TCP_MAXSEG. It does NOT counts
+   for TCP options, but includes only bare TCP header.
+
+   tp->rx_opt.mss_clamp is mss negotiated at connection setup.
+   It is minumum of user_mss and mss received with SYN.
+   It also does not include TCP options.
+
+   tp->pmtu_cookie is last pmtu, seen by this function.
+
+   tp->mss_cache is current effective sending mss, including
+   all tcp options except for SACKs. It is evaluated,
+   taking into account current pmtu, but never exceeds
+   tp->rx_opt.mss_clamp.
+
+   NOTE1. rfc1122 clearly states that advertised MSS
+   DOES NOT include either tcp or ip options.
+
+   NOTE2. tp->pmtu_cookie and tp->mss_cache are READ ONLY outside
+   this function.			--ANK (980731)
+ */
+
+unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	int mss_now;
+
+	/* Calculate base mss without TCP options:
+	   It is MMS_S - sizeof(tcphdr) of rfc1122
+	 */
+	mss_now = pmtu - tp->af_specific->net_header_len - sizeof(struct tcphdr);
+
+	/* Clamp it (mss_clamp does not include tcp options) */
+	if (mss_now > tp->rx_opt.mss_clamp)
+		mss_now = tp->rx_opt.mss_clamp;
+
+	/* Now subtract optional transport overhead */
+	mss_now -= tp->ext_header_len;
+
+	/* Then reserve room for full set of TCP options and 8 bytes of data */
+	if (mss_now < 48)
+		mss_now = 48;
+
+	/* Now subtract TCP options size, not including SACKs */
+	mss_now -= tp->tcp_header_len - sizeof(struct tcphdr);
+
+	/* Bound mss with half of window */
+	if (tp->max_window && mss_now > (tp->max_window>>1))
+		mss_now = max((tp->max_window>>1), 68U - tp->tcp_header_len);
+
+	/* And store cached results */
+	tp->pmtu_cookie = pmtu;
+	tp->mss_cache = tp->mss_cache_std = mss_now;
+
+	return mss_now;
+}
+
+/* Compute the current effective MSS, taking SACKs and IP options,
+ * and even PMTU discovery events into account.
+ *
+ * LARGESEND note: !urg_mode is overkill, only frames up to snd_up
+ * cannot be large. However, taking into account rare use of URG, this
+ * is not a big flaw.
+ */
+
+unsigned int tcp_current_mss(struct sock *sk, int large)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct dst_entry *dst = __sk_dst_get(sk);
+	unsigned int do_large, mss_now;
+
+	mss_now = tp->mss_cache_std;
+	if (dst) {
+		u32 mtu = dst_mtu(dst);
+		if (mtu != tp->pmtu_cookie)
+			mss_now = tcp_sync_mss(sk, mtu);
+	}
+
+	do_large = (large &&
+		    (sk->sk_route_caps & NETIF_F_TSO) &&
+		    !tp->urg_mode);
+
+	if (do_large) {
+		unsigned int large_mss, factor, limit;
+
+		large_mss = 65535 - tp->af_specific->net_header_len -
+			tp->ext_header_len - tp->tcp_header_len;
+
+		if (tp->max_window && large_mss > (tp->max_window>>1))
+			large_mss = max((tp->max_window>>1),
+					68U - tp->tcp_header_len);
+
+		factor = large_mss / mss_now;
+
+		/* Always keep large mss multiple of real mss, but
+		 * do not exceed 1/tso_win_divisor of the congestion window
+		 * so we can keep the ACK clock ticking and minimize
+		 * bursting.
+		 */
+		limit = tp->snd_cwnd;
+		if (sysctl_tcp_tso_win_divisor)
+			limit /= sysctl_tcp_tso_win_divisor;
+		limit = max(1U, limit);
+		if (factor > limit)
+			factor = limit;
+
+		tp->mss_cache = mss_now * factor;
+
+		mss_now = tp->mss_cache;
+	}
+
+	if (tp->rx_opt.eff_sacks)
+		mss_now -= (TCPOLEN_SACK_BASE_ALIGNED +
+			    (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK));
+	return mss_now;
+}
+
+/* This routine writes packets to the network.  It advances the
+ * send_head.  This happens as incoming acks open up the remote
+ * window for us.
+ *
+ * Returns 1, if no segments are in flight and we have queued segments, but
+ * cannot send anything now because of SWS or another problem.
+ */
+int tcp_write_xmit(struct sock *sk, int nonagle)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	unsigned int mss_now;
+
+	/* If we are closed, the bytes will have to remain here.
+	 * In time closedown will finish, we empty the write queue and all
+	 * will be happy.
+	 */
+	if (sk->sk_state != TCP_CLOSE) {
+		struct sk_buff *skb;
+		int sent_pkts = 0;
+
+		/* Account for SACKS, we may need to fragment due to this.
+		 * It is just like the real MSS changing on us midstream.
+		 * We also handle things correctly when the user adds some
+		 * IP options mid-stream.  Silly to do, but cover it.
+		 */
+		mss_now = tcp_current_mss(sk, 1);
+
+		while ((skb = sk->sk_send_head) &&
+		       tcp_snd_test(tp, skb, mss_now,
+			       	    tcp_skb_is_last(sk, skb) ? nonagle :
+				    			       TCP_NAGLE_PUSH)) {
+			if (skb->len > mss_now) {
+				if (tcp_fragment(sk, skb, mss_now))
+					break;
+			}
+
+			TCP_SKB_CB(skb)->when = tcp_time_stamp;
+			tcp_tso_set_push(skb);
+			if (tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC)))
+				break;
+
+			/* Advance the send_head.  This one is sent out.
+			 * This call will increment packets_out.
+			 */
+			update_send_head(sk, tp, skb);
+
+			tcp_minshall_update(tp, mss_now, skb);
+			sent_pkts = 1;
+		}
+
+		if (sent_pkts) {
+			tcp_cwnd_validate(sk, tp);
+			return 0;
+		}
+
+		return !tp->packets_out && sk->sk_send_head;
+	}
+	return 0;
+}
+
+/* This function returns the amount that we can raise the
+ * usable window based on the following constraints
+ *  
+ * 1. The window can never be shrunk once it is offered (RFC 793)
+ * 2. We limit memory per socket
+ *
+ * RFC 1122:
+ * "the suggested [SWS] avoidance algorithm for the receiver is to keep
+ *  RECV.NEXT + RCV.WIN fixed until:
+ *  RCV.BUFF - RCV.USER - RCV.WINDOW >= min(1/2 RCV.BUFF, MSS)"
+ *
+ * i.e. don't raise the right edge of the window until you can raise
+ * it at least MSS bytes.
+ *
+ * Unfortunately, the recommended algorithm breaks header prediction,
+ * since header prediction assumes th->window stays fixed.
+ *
+ * Strictly speaking, keeping th->window fixed violates the receiver
+ * side SWS prevention criteria. The problem is that under this rule
+ * a stream of single byte packets will cause the right side of the
+ * window to always advance by a single byte.
+ * 
+ * Of course, if the sender implements sender side SWS prevention
+ * then this will not be a problem.
+ * 
+ * BSD seems to make the following compromise:
+ * 
+ *	If the free space is less than the 1/4 of the maximum
+ *	space available and the free space is less than 1/2 mss,
+ *	then set the window to 0.
+ *	[ Actually, bsd uses MSS and 1/4 of maximal _window_ ]
+ *	Otherwise, just prevent the window from shrinking
+ *	and from being larger than the largest representable value.
+ *
+ * This prevents incremental opening of the window in the regime
+ * where TCP is limited by the speed of the reader side taking
+ * data out of the TCP receive queue. It does nothing about
+ * those cases where the window is constrained on the sender side
+ * because the pipeline is full.
+ *
+ * BSD also seems to "accidentally" limit itself to windows that are a
+ * multiple of MSS, at least until the free space gets quite small.
+ * This would appear to be a side effect of the mbuf implementation.
+ * Combining these two algorithms results in the observed behavior
+ * of having a fixed window size at almost all times.
+ *
+ * Below we obtain similar behavior by forcing the offered window to
+ * a multiple of the mss when it is feasible to do so.
+ *
+ * Note, we don't "adjust" for TIMESTAMP or SACK option bytes.
+ * Regular options like TIMESTAMP are taken into account.
+ */
+u32 __tcp_select_window(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	/* MSS for the peer's data.  Previous verions used mss_clamp
+	 * here.  I don't know if the value based on our guesses
+	 * of peer's MSS is better for the performance.  It's more correct
+	 * but may be worse for the performance because of rcv_mss
+	 * fluctuations.  --SAW  1998/11/1
+	 */
+	int mss = tp->ack.rcv_mss;
+	int free_space = tcp_space(sk);
+	int full_space = min_t(int, tp->window_clamp, tcp_full_space(sk));
+	int window;
+
+	if (mss > full_space)
+		mss = full_space; 
+
+	if (free_space < full_space/2) {
+		tp->ack.quick = 0;
+
+		if (tcp_memory_pressure)
+			tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss);
+
+		if (free_space < mss)
+			return 0;
+	}
+
+	if (free_space > tp->rcv_ssthresh)
+		free_space = tp->rcv_ssthresh;
+
+	/* Don't do rounding if we are using window scaling, since the
+	 * scaled window will not line up with the MSS boundary anyway.
+	 */
+	window = tp->rcv_wnd;
+	if (tp->rx_opt.rcv_wscale) {
+		window = free_space;
+
+		/* Advertise enough space so that it won't get scaled away.
+		 * Import case: prevent zero window announcement if
+		 * 1<<rcv_wscale > mss.
+		 */
+		if (((window >> tp->rx_opt.rcv_wscale) << tp->rx_opt.rcv_wscale) != window)
+			window = (((window >> tp->rx_opt.rcv_wscale) + 1)
+				  << tp->rx_opt.rcv_wscale);
+	} else {
+		/* Get the largest window that is a nice multiple of mss.
+		 * Window clamp already applied above.
+		 * If our current window offering is within 1 mss of the
+		 * free space we just keep it. This prevents the divide
+		 * and multiply from happening most of the time.
+		 * We also don't do any window rounding when the free space
+		 * is too small.
+		 */
+		if (window <= free_space - mss || window > free_space)
+			window = (free_space/mss)*mss;
+	}
+
+	return window;
+}
+
+/* Attempt to collapse two adjacent SKB's during retransmission. */
+static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int mss_now)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *next_skb = skb->next;
+
+	/* The first test we must make is that neither of these two
+	 * SKB's are still referenced by someone else.
+	 */
+	if (!skb_cloned(skb) && !skb_cloned(next_skb)) {
+		int skb_size = skb->len, next_skb_size = next_skb->len;
+		u16 flags = TCP_SKB_CB(skb)->flags;
+
+		/* Also punt if next skb has been SACK'd. */
+		if(TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED)
+			return;
+
+		/* Next skb is out of window. */
+		if (after(TCP_SKB_CB(next_skb)->end_seq, tp->snd_una+tp->snd_wnd))
+			return;
+
+		/* Punt if not enough space exists in the first SKB for
+		 * the data in the second, or the total combined payload
+		 * would exceed the MSS.
+		 */
+		if ((next_skb_size > skb_tailroom(skb)) ||
+		    ((skb_size + next_skb_size) > mss_now))
+			return;
+
+		BUG_ON(tcp_skb_pcount(skb) != 1 ||
+		       tcp_skb_pcount(next_skb) != 1);
+
+		/* Ok.  We will be able to collapse the packet. */
+		__skb_unlink(next_skb, next_skb->list);
+
+		memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size);
+
+		if (next_skb->ip_summed == CHECKSUM_HW)
+			skb->ip_summed = CHECKSUM_HW;
+
+		if (skb->ip_summed != CHECKSUM_HW)
+			skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
+
+		/* Update sequence range on original skb. */
+		TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
+
+		/* Merge over control information. */
+		flags |= TCP_SKB_CB(next_skb)->flags; /* This moves PSH/FIN etc. over */
+		TCP_SKB_CB(skb)->flags = flags;
+
+		/* All done, get rid of second SKB and account for it so
+		 * packet counting does not break.
+		 */
+		TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked&(TCPCB_EVER_RETRANS|TCPCB_AT_TAIL);
+		if (TCP_SKB_CB(next_skb)->sacked&TCPCB_SACKED_RETRANS)
+			tp->retrans_out -= tcp_skb_pcount(next_skb);
+		if (TCP_SKB_CB(next_skb)->sacked&TCPCB_LOST) {
+			tp->lost_out -= tcp_skb_pcount(next_skb);
+			tp->left_out -= tcp_skb_pcount(next_skb);
+		}
+		/* Reno case is special. Sigh... */
+		if (!tp->rx_opt.sack_ok && tp->sacked_out) {
+			tcp_dec_pcount_approx(&tp->sacked_out, next_skb);
+			tp->left_out -= tcp_skb_pcount(next_skb);
+		}
+
+		/* Not quite right: it can be > snd.fack, but
+		 * it is better to underestimate fackets.
+		 */
+		tcp_dec_pcount_approx(&tp->fackets_out, next_skb);
+		tcp_packets_out_dec(tp, next_skb);
+		sk_stream_free_skb(sk, next_skb);
+	}
+}
+
+/* Do a simple retransmit without using the backoff mechanisms in
+ * tcp_timer. This is used for path mtu discovery. 
+ * The socket is already locked here.
+ */ 
+void tcp_simple_retransmit(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *skb;
+	unsigned int mss = tcp_current_mss(sk, 0);
+	int lost = 0;
+
+	sk_stream_for_retrans_queue(skb, sk) {
+		if (skb->len > mss && 
+		    !(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) {
+			if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) {
+				TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
+				tp->retrans_out -= tcp_skb_pcount(skb);
+			}
+			if (!(TCP_SKB_CB(skb)->sacked&TCPCB_LOST)) {
+				TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
+				tp->lost_out += tcp_skb_pcount(skb);
+				lost = 1;
+			}
+		}
+	}
+
+	if (!lost)
+		return;
+
+	tcp_sync_left_out(tp);
+
+ 	/* Don't muck with the congestion window here.
+	 * Reason is that we do not increase amount of _data_
+	 * in network, but units changed and effective
+	 * cwnd/ssthresh really reduced now.
+	 */
+	if (tp->ca_state != TCP_CA_Loss) {
+		tp->high_seq = tp->snd_nxt;
+		tp->snd_ssthresh = tcp_current_ssthresh(tp);
+		tp->prior_ssthresh = 0;
+		tp->undo_marker = 0;
+		tcp_set_ca_state(tp, TCP_CA_Loss);
+	}
+	tcp_xmit_retransmit_queue(sk);
+}
+
+/* This retransmits one SKB.  Policy decisions and retransmit queue
+ * state updates are done by the caller.  Returns non-zero if an
+ * error occurred which prevented the send.
+ */
+int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+ 	unsigned int cur_mss = tcp_current_mss(sk, 0);
+	int err;
+
+	/* Do not sent more than we queued. 1/4 is reserved for possible
+	 * copying overhead: frgagmentation, tunneling, mangling etc.
+	 */
+	if (atomic_read(&sk->sk_wmem_alloc) >
+	    min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
+		return -EAGAIN;
+
+	if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
+		if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
+			BUG();
+
+		if (sk->sk_route_caps & NETIF_F_TSO) {
+			sk->sk_route_caps &= ~NETIF_F_TSO;
+			sock_set_flag(sk, SOCK_NO_LARGESEND);
+			tp->mss_cache = tp->mss_cache_std;
+		}
+
+		if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
+			return -ENOMEM;
+	}
+
+	/* If receiver has shrunk his window, and skb is out of
+	 * new window, do not retransmit it. The exception is the
+	 * case, when window is shrunk to zero. In this case
+	 * our retransmit serves as a zero window probe.
+	 */
+	if (!before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)
+	    && TCP_SKB_CB(skb)->seq != tp->snd_una)
+		return -EAGAIN;
+
+	if (skb->len > cur_mss) {
+		int old_factor = tcp_skb_pcount(skb);
+		int new_factor;
+
+		if (tcp_fragment(sk, skb, cur_mss))
+			return -ENOMEM; /* We'll try again later. */
+
+		/* New SKB created, account for it. */
+		new_factor = tcp_skb_pcount(skb);
+		tp->packets_out -= old_factor - new_factor;
+		tp->packets_out += tcp_skb_pcount(skb->next);
+	}
+
+	/* Collapse two adjacent packets if worthwhile and we can. */
+	if(!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) &&
+	   (skb->len < (cur_mss >> 1)) &&
+	   (skb->next != sk->sk_send_head) &&
+	   (skb->next != (struct sk_buff *)&sk->sk_write_queue) &&
+	   (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(skb->next)->nr_frags == 0) &&
+	   (tcp_skb_pcount(skb) == 1 && tcp_skb_pcount(skb->next) == 1) &&
+	   (sysctl_tcp_retrans_collapse != 0))
+		tcp_retrans_try_collapse(sk, skb, cur_mss);
+
+	if(tp->af_specific->rebuild_header(sk))
+		return -EHOSTUNREACH; /* Routing failure or similar. */
+
+	/* Some Solaris stacks overoptimize and ignore the FIN on a
+	 * retransmit when old data is attached.  So strip it off
+	 * since it is cheap to do so and saves bytes on the network.
+	 */
+	if(skb->len > 0 &&
+	   (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
+	   tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
+		if (!pskb_trim(skb, 0)) {
+			TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1;
+			skb_shinfo(skb)->tso_segs = 1;
+			skb_shinfo(skb)->tso_size = 0;
+			skb->ip_summed = CHECKSUM_NONE;
+			skb->csum = 0;
+		}
+	}
+
+	/* Make a copy, if the first transmission SKB clone we made
+	 * is still in somebody's hands, else make a clone.
+	 */
+	TCP_SKB_CB(skb)->when = tcp_time_stamp;
+	tcp_tso_set_push(skb);
+
+	err = tcp_transmit_skb(sk, (skb_cloned(skb) ?
+				    pskb_copy(skb, GFP_ATOMIC):
+				    skb_clone(skb, GFP_ATOMIC)));
+
+	if (err == 0) {
+		/* Update global TCP statistics. */
+		TCP_INC_STATS(TCP_MIB_RETRANSSEGS);
+
+		tp->total_retrans++;
+
+#if FASTRETRANS_DEBUG > 0
+		if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) {
+			if (net_ratelimit())
+				printk(KERN_DEBUG "retrans_out leaked.\n");
+		}
+#endif
+		TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
+		tp->retrans_out += tcp_skb_pcount(skb);
+
+		/* Save stamp of the first retransmit. */
+		if (!tp->retrans_stamp)
+			tp->retrans_stamp = TCP_SKB_CB(skb)->when;
+
+		tp->undo_retrans++;
+
+		/* snd_nxt is stored to detect loss of retransmitted segment,
+		 * see tcp_input.c tcp_sacktag_write_queue().
+		 */
+		TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;
+	}
+	return err;
+}
+
+/* This gets called after a retransmit timeout, and the initially
+ * retransmitted data is acknowledged.  It tries to continue
+ * resending the rest of the retransmit queue, until either
+ * we've sent it all or the congestion window limit is reached.
+ * If doing SACK, the first ACK which comes back for a timeout
+ * based retransmit packet might feed us FACK information again.
+ * If so, we use it to avoid unnecessarily retransmissions.
+ */
+void tcp_xmit_retransmit_queue(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *skb;
+	int packet_cnt = tp->lost_out;
+
+	/* First pass: retransmit lost packets. */
+	if (packet_cnt) {
+		sk_stream_for_retrans_queue(skb, sk) {
+			__u8 sacked = TCP_SKB_CB(skb)->sacked;
+
+			/* Assume this retransmit will generate
+			 * only one packet for congestion window
+			 * calculation purposes.  This works because
+			 * tcp_retransmit_skb() will chop up the
+			 * packet to be MSS sized and all the
+			 * packet counting works out.
+			 */
+			if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
+				return;
+
+			if (sacked&TCPCB_LOST) {
+				if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
+					if (tcp_retransmit_skb(sk, skb))
+						return;
+					if (tp->ca_state != TCP_CA_Loss)
+						NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS);
+					else
+						NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS);
+
+					if (skb ==
+					    skb_peek(&sk->sk_write_queue))
+						tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+				}
+
+				packet_cnt -= tcp_skb_pcount(skb);
+				if (packet_cnt <= 0)
+					break;
+			}
+		}
+	}
+
+	/* OK, demanded retransmission is finished. */
+
+	/* Forward retransmissions are possible only during Recovery. */
+	if (tp->ca_state != TCP_CA_Recovery)
+		return;
+
+	/* No forward retransmissions in Reno are possible. */
+	if (!tp->rx_opt.sack_ok)
+		return;
+
+	/* Yeah, we have to make difficult choice between forward transmission
+	 * and retransmission... Both ways have their merits...
+	 *
+	 * For now we do not retransmit anything, while we have some new
+	 * segments to send.
+	 */
+
+	if (tcp_may_send_now(sk, tp))
+		return;
+
+	packet_cnt = 0;
+
+	sk_stream_for_retrans_queue(skb, sk) {
+		/* Similar to the retransmit loop above we
+		 * can pretend that the retransmitted SKB
+		 * we send out here will be composed of one
+		 * real MSS sized packet because tcp_retransmit_skb()
+		 * will fragment it if necessary.
+		 */
+		if (++packet_cnt > tp->fackets_out)
+			break;
+
+		if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
+			break;
+
+		if (TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS)
+			continue;
+
+		/* Ok, retransmit it. */
+		if (tcp_retransmit_skb(sk, skb))
+			break;
+
+		if (skb == skb_peek(&sk->sk_write_queue))
+			tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+
+		NET_INC_STATS_BH(LINUX_MIB_TCPFORWARDRETRANS);
+	}
+}
+
+
+/* Send a fin.  The caller locks the socket for us.  This cannot be
+ * allowed to fail queueing a FIN frame under any circumstances.
+ */
+void tcp_send_fin(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);	
+	struct sk_buff *skb = skb_peek_tail(&sk->sk_write_queue);
+	int mss_now;
+	
+	/* Optimization, tack on the FIN if we have a queue of
+	 * unsent frames.  But be careful about outgoing SACKS
+	 * and IP options.
+	 */
+	mss_now = tcp_current_mss(sk, 1);
+
+	if (sk->sk_send_head != NULL) {
+		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN;
+		TCP_SKB_CB(skb)->end_seq++;
+		tp->write_seq++;
+	} else {
+		/* Socket is locked, keep trying until memory is available. */
+		for (;;) {
+			skb = alloc_skb(MAX_TCP_HEADER, GFP_KERNEL);
+			if (skb)
+				break;
+			yield();
+		}
+
+		/* Reserve space for headers and prepare control bits. */
+		skb_reserve(skb, MAX_TCP_HEADER);
+		skb->csum = 0;
+		TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
+		TCP_SKB_CB(skb)->sacked = 0;
+		skb_shinfo(skb)->tso_segs = 1;
+		skb_shinfo(skb)->tso_size = 0;
+
+		/* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
+		TCP_SKB_CB(skb)->seq = tp->write_seq;
+		TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
+		tcp_queue_skb(sk, skb);
+	}
+	__tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_OFF);
+}
+
+/* We get here when a process closes a file descriptor (either due to
+ * an explicit close() or as a byproduct of exit()'ing) and there
+ * was unread data in the receive queue.  This behavior is recommended
+ * by draft-ietf-tcpimpl-prob-03.txt section 3.10.  -DaveM
+ */
+void tcp_send_active_reset(struct sock *sk, int priority)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *skb;
+
+	/* NOTE: No TCP options attached and we never retransmit this. */
+	skb = alloc_skb(MAX_TCP_HEADER, priority);
+	if (!skb) {
+		NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);
+		return;
+	}
+
+	/* Reserve space for headers and prepare control bits. */
+	skb_reserve(skb, MAX_TCP_HEADER);
+	skb->csum = 0;
+	TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_RST);
+	TCP_SKB_CB(skb)->sacked = 0;
+	skb_shinfo(skb)->tso_segs = 1;
+	skb_shinfo(skb)->tso_size = 0;
+
+	/* Send it off. */
+	TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp);
+	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
+	TCP_SKB_CB(skb)->when = tcp_time_stamp;
+	if (tcp_transmit_skb(sk, skb))
+		NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);
+}
+
+/* WARNING: This routine must only be called when we have already sent
+ * a SYN packet that crossed the incoming SYN that caused this routine
+ * to get called. If this assumption fails then the initial rcv_wnd
+ * and rcv_wscale values will not be correct.
+ */
+int tcp_send_synack(struct sock *sk)
+{
+	struct sk_buff* skb;
+
+	skb = skb_peek(&sk->sk_write_queue);
+	if (skb == NULL || !(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_SYN)) {
+		printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n");
+		return -EFAULT;
+	}
+	if (!(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_ACK)) {
+		if (skb_cloned(skb)) {
+			struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
+			if (nskb == NULL)
+				return -ENOMEM;
+			__skb_unlink(skb, &sk->sk_write_queue);
+			skb_header_release(nskb);
+			__skb_queue_head(&sk->sk_write_queue, nskb);
+			sk_stream_free_skb(sk, skb);
+			sk_charge_skb(sk, nskb);
+			skb = nskb;
+		}
+
+		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ACK;
+		TCP_ECN_send_synack(tcp_sk(sk), skb);
+	}
+	TCP_SKB_CB(skb)->when = tcp_time_stamp;
+	return tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
+}
+
+/*
+ * Prepare a SYN-ACK.
+ */
+struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
+				 struct open_request *req)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct tcphdr *th;
+	int tcp_header_size;
+	struct sk_buff *skb;
+
+	skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC);
+	if (skb == NULL)
+		return NULL;
+
+	/* Reserve space for headers. */
+	skb_reserve(skb, MAX_TCP_HEADER);
+
+	skb->dst = dst_clone(dst);
+
+	tcp_header_size = (sizeof(struct tcphdr) + TCPOLEN_MSS +
+			   (req->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) +
+			   (req->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) +
+			   /* SACK_PERM is in the place of NOP NOP of TS */
+			   ((req->sack_ok && !req->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0));
+	skb->h.th = th = (struct tcphdr *) skb_push(skb, tcp_header_size);
+
+	memset(th, 0, sizeof(struct tcphdr));
+	th->syn = 1;
+	th->ack = 1;
+	if (dst->dev->features&NETIF_F_TSO)
+		req->ecn_ok = 0;
+	TCP_ECN_make_synack(req, th);
+	th->source = inet_sk(sk)->sport;
+	th->dest = req->rmt_port;
+	TCP_SKB_CB(skb)->seq = req->snt_isn;
+	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
+	TCP_SKB_CB(skb)->sacked = 0;
+	skb_shinfo(skb)->tso_segs = 1;
+	skb_shinfo(skb)->tso_size = 0;
+	th->seq = htonl(TCP_SKB_CB(skb)->seq);
+	th->ack_seq = htonl(req->rcv_isn + 1);
+	if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
+		__u8 rcv_wscale; 
+		/* Set this up on the first call only */
+		req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
+		/* tcp_full_space because it is guaranteed to be the first packet */
+		tcp_select_initial_window(tcp_full_space(sk), 
+			dst_metric(dst, RTAX_ADVMSS) - (req->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
+			&req->rcv_wnd,
+			&req->window_clamp,
+			req->wscale_ok,
+			&rcv_wscale);
+		req->rcv_wscale = rcv_wscale; 
+	}
+
+	/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
+	th->window = htons(req->rcv_wnd);
+
+	TCP_SKB_CB(skb)->when = tcp_time_stamp;
+	tcp_syn_build_options((__u32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), req->tstamp_ok,
+			      req->sack_ok, req->wscale_ok, req->rcv_wscale,
+			      TCP_SKB_CB(skb)->when,
+			      req->ts_recent);
+
+	skb->csum = 0;
+	th->doff = (tcp_header_size >> 2);
+	TCP_INC_STATS(TCP_MIB_OUTSEGS);
+	return skb;
+}
+
+/* 
+ * Do all connect socket setups that can be done AF independent.
+ */ 
+static inline void tcp_connect_init(struct sock *sk)
+{
+	struct dst_entry *dst = __sk_dst_get(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+	__u8 rcv_wscale;
+
+	/* We'll fix this up when we get a response from the other end.
+	 * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
+	 */
+	tp->tcp_header_len = sizeof(struct tcphdr) +
+		(sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
+
+	/* If user gave his TCP_MAXSEG, record it to clamp */
+	if (tp->rx_opt.user_mss)
+		tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
+	tp->max_window = 0;
+	tcp_sync_mss(sk, dst_mtu(dst));
+
+	if (!tp->window_clamp)
+		tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
+	tp->advmss = dst_metric(dst, RTAX_ADVMSS);
+	tcp_initialize_rcv_mss(sk);
+	tcp_ca_init(tp);
+
+	tcp_select_initial_window(tcp_full_space(sk),
+				  tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
+				  &tp->rcv_wnd,
+				  &tp->window_clamp,
+				  sysctl_tcp_window_scaling,
+				  &rcv_wscale);
+
+	tp->rx_opt.rcv_wscale = rcv_wscale;
+	tp->rcv_ssthresh = tp->rcv_wnd;
+
+	sk->sk_err = 0;
+	sock_reset_flag(sk, SOCK_DONE);
+	tp->snd_wnd = 0;
+	tcp_init_wl(tp, tp->write_seq, 0);
+	tp->snd_una = tp->write_seq;
+	tp->snd_sml = tp->write_seq;
+	tp->rcv_nxt = 0;
+	tp->rcv_wup = 0;
+	tp->copied_seq = 0;
+
+	tp->rto = TCP_TIMEOUT_INIT;
+	tp->retransmits = 0;
+	tcp_clear_retrans(tp);
+}
+
+/*
+ * Build a SYN and send it off.
+ */ 
+int tcp_connect(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *buff;
+
+	tcp_connect_init(sk);
+
+	buff = alloc_skb(MAX_TCP_HEADER + 15, sk->sk_allocation);
+	if (unlikely(buff == NULL))
+		return -ENOBUFS;
+
+	/* Reserve space for headers. */
+	skb_reserve(buff, MAX_TCP_HEADER);
+
+	TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN;
+	TCP_ECN_send_syn(sk, tp, buff);
+	TCP_SKB_CB(buff)->sacked = 0;
+	skb_shinfo(buff)->tso_segs = 1;
+	skb_shinfo(buff)->tso_size = 0;
+	buff->csum = 0;
+	TCP_SKB_CB(buff)->seq = tp->write_seq++;
+	TCP_SKB_CB(buff)->end_seq = tp->write_seq;
+	tp->snd_nxt = tp->write_seq;
+	tp->pushed_seq = tp->write_seq;
+	tcp_ca_init(tp);
+
+	/* Send it off. */
+	TCP_SKB_CB(buff)->when = tcp_time_stamp;
+	tp->retrans_stamp = TCP_SKB_CB(buff)->when;
+	skb_header_release(buff);
+	__skb_queue_tail(&sk->sk_write_queue, buff);
+	sk_charge_skb(sk, buff);
+	tp->packets_out += tcp_skb_pcount(buff);
+	tcp_transmit_skb(sk, skb_clone(buff, GFP_KERNEL));
+	TCP_INC_STATS(TCP_MIB_ACTIVEOPENS);
+
+	/* Timer for repeating the SYN until an answer. */
+	tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+	return 0;
+}
+
+/* Send out a delayed ack, the caller does the policy checking
+ * to see if we should even be here.  See tcp_input.c:tcp_ack_snd_check()
+ * for details.
+ */
+void tcp_send_delayed_ack(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	int ato = tp->ack.ato;
+	unsigned long timeout;
+
+	if (ato > TCP_DELACK_MIN) {
+		int max_ato = HZ/2;
+
+		if (tp->ack.pingpong || (tp->ack.pending&TCP_ACK_PUSHED))
+			max_ato = TCP_DELACK_MAX;
+
+		/* Slow path, intersegment interval is "high". */
+
+		/* If some rtt estimate is known, use it to bound delayed ack.
+		 * Do not use tp->rto here, use results of rtt measurements
+		 * directly.
+		 */
+		if (tp->srtt) {
+			int rtt = max(tp->srtt>>3, TCP_DELACK_MIN);
+
+			if (rtt < max_ato)
+				max_ato = rtt;
+		}
+
+		ato = min(ato, max_ato);
+	}
+
+	/* Stay within the limit we were given */
+	timeout = jiffies + ato;
+
+	/* Use new timeout only if there wasn't a older one earlier. */
+	if (tp->ack.pending&TCP_ACK_TIMER) {
+		/* If delack timer was blocked or is about to expire,
+		 * send ACK now.
+		 */
+		if (tp->ack.blocked || time_before_eq(tp->ack.timeout, jiffies+(ato>>2))) {
+			tcp_send_ack(sk);
+			return;
+		}
+
+		if (!time_before(timeout, tp->ack.timeout))
+			timeout = tp->ack.timeout;
+	}
+	tp->ack.pending |= TCP_ACK_SCHED|TCP_ACK_TIMER;
+	tp->ack.timeout = timeout;
+	sk_reset_timer(sk, &tp->delack_timer, timeout);
+}
+
+/* This routine sends an ack and also updates the window. */
+void tcp_send_ack(struct sock *sk)
+{
+	/* If we have been reset, we may not send again. */
+	if (sk->sk_state != TCP_CLOSE) {
+		struct tcp_sock *tp = tcp_sk(sk);
+		struct sk_buff *buff;
+
+		/* We are not putting this on the write queue, so
+		 * tcp_transmit_skb() will set the ownership to this
+		 * sock.
+		 */
+		buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
+		if (buff == NULL) {
+			tcp_schedule_ack(tp);
+			tp->ack.ato = TCP_ATO_MIN;
+			tcp_reset_xmit_timer(sk, TCP_TIME_DACK, TCP_DELACK_MAX);
+			return;
+		}
+
+		/* Reserve space for headers and prepare control bits. */
+		skb_reserve(buff, MAX_TCP_HEADER);
+		buff->csum = 0;
+		TCP_SKB_CB(buff)->flags = TCPCB_FLAG_ACK;
+		TCP_SKB_CB(buff)->sacked = 0;
+		skb_shinfo(buff)->tso_segs = 1;
+		skb_shinfo(buff)->tso_size = 0;
+
+		/* Send it off, this clears delayed acks for us. */
+		TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp);
+		TCP_SKB_CB(buff)->when = tcp_time_stamp;
+		tcp_transmit_skb(sk, buff);
+	}
+}
+
+/* This routine sends a packet with an out of date sequence
+ * number. It assumes the other end will try to ack it.
+ *
+ * Question: what should we make while urgent mode?
+ * 4.4BSD forces sending single byte of data. We cannot send
+ * out of window data, because we have SND.NXT==SND.MAX...
+ *
+ * Current solution: to send TWO zero-length segments in urgent mode:
+ * one is with SEG.SEQ=SND.UNA to deliver urgent pointer, another is
+ * out-of-date with SND.UNA-1 to probe window.
+ */
+static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *skb;
+
+	/* We don't queue it, tcp_transmit_skb() sets ownership. */
+	skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
+	if (skb == NULL) 
+		return -1;
+
+	/* Reserve space for headers and set control bits. */
+	skb_reserve(skb, MAX_TCP_HEADER);
+	skb->csum = 0;
+	TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK;
+	TCP_SKB_CB(skb)->sacked = urgent;
+	skb_shinfo(skb)->tso_segs = 1;
+	skb_shinfo(skb)->tso_size = 0;
+
+	/* Use a previous sequence.  This should cause the other
+	 * end to send an ack.  Don't queue or clone SKB, just
+	 * send it.
+	 */
+	TCP_SKB_CB(skb)->seq = urgent ? tp->snd_una : tp->snd_una - 1;
+	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
+	TCP_SKB_CB(skb)->when = tcp_time_stamp;
+	return tcp_transmit_skb(sk, skb);
+}
+
+int tcp_write_wakeup(struct sock *sk)
+{
+	if (sk->sk_state != TCP_CLOSE) {
+		struct tcp_sock *tp = tcp_sk(sk);
+		struct sk_buff *skb;
+
+		if ((skb = sk->sk_send_head) != NULL &&
+		    before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)) {
+			int err;
+			unsigned int mss = tcp_current_mss(sk, 0);
+			unsigned int seg_size = tp->snd_una+tp->snd_wnd-TCP_SKB_CB(skb)->seq;
+
+			if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
+				tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
+
+			/* We are probing the opening of a window
+			 * but the window size is != 0
+			 * must have been a result SWS avoidance ( sender )
+			 */
+			if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
+			    skb->len > mss) {
+				seg_size = min(seg_size, mss);
+				TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
+				if (tcp_fragment(sk, skb, seg_size))
+					return -1;
+				/* SWS override triggered forced fragmentation.
+				 * Disable TSO, the connection is too sick. */
+				if (sk->sk_route_caps & NETIF_F_TSO) {
+					sock_set_flag(sk, SOCK_NO_LARGESEND);
+					sk->sk_route_caps &= ~NETIF_F_TSO;
+					tp->mss_cache = tp->mss_cache_std;
+				}
+			} else if (!tcp_skb_pcount(skb))
+				tcp_set_skb_tso_segs(skb, tp->mss_cache_std);
+
+			TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
+			TCP_SKB_CB(skb)->when = tcp_time_stamp;
+			tcp_tso_set_push(skb);
+			err = tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
+			if (!err) {
+				update_send_head(sk, tp, skb);
+			}
+			return err;
+		} else {
+			if (tp->urg_mode &&
+			    between(tp->snd_up, tp->snd_una+1, tp->snd_una+0xFFFF))
+				tcp_xmit_probe_skb(sk, TCPCB_URG);
+			return tcp_xmit_probe_skb(sk, 0);
+		}
+	}
+	return -1;
+}
+
+/* A window probe timeout has occurred.  If window is not closed send
+ * a partial packet else a zero probe.
+ */
+void tcp_send_probe0(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	int err;
+
+	err = tcp_write_wakeup(sk);
+
+	if (tp->packets_out || !sk->sk_send_head) {
+		/* Cancel probe timer, if it is not required. */
+		tp->probes_out = 0;
+		tp->backoff = 0;
+		return;
+	}
+
+	if (err <= 0) {
+		if (tp->backoff < sysctl_tcp_retries2)
+			tp->backoff++;
+		tp->probes_out++;
+		tcp_reset_xmit_timer (sk, TCP_TIME_PROBE0, 
+				      min(tp->rto << tp->backoff, TCP_RTO_MAX));
+	} else {
+		/* If packet was not sent due to local congestion,
+		 * do not backoff and do not remember probes_out.
+		 * Let local senders to fight for local resources.
+		 *
+		 * Use accumulated backoff yet.
+		 */
+		if (!tp->probes_out)
+			tp->probes_out=1;
+		tcp_reset_xmit_timer (sk, TCP_TIME_PROBE0, 
+				      min(tp->rto << tp->backoff, TCP_RESOURCE_PROBE_INTERVAL));
+	}
+}
+
+EXPORT_SYMBOL(tcp_connect);
+EXPORT_SYMBOL(tcp_make_synack);
+EXPORT_SYMBOL(tcp_simple_retransmit);
+EXPORT_SYMBOL(tcp_sync_mss);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
new file mode 100644
index 00000000000..85b279f1e93
--- /dev/null
+++ b/net/ipv4/tcp_timer.c
@@ -0,0 +1,656 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Implementation of the Transmission Control Protocol(TCP).
+ *
+ * Version:	$Id: tcp_timer.c,v 1.88 2002/02/01 22:01:04 davem Exp $
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Mark Evans, <evansmp@uhura.aston.ac.uk>
+ *		Corey Minyard <wf-rch!minyard@relay.EU.net>
+ *		Florian La Roche, <flla@stud.uni-sb.de>
+ *		Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
+ *		Linus Torvalds, <torvalds@cs.helsinki.fi>
+ *		Alan Cox, <gw4pts@gw4pts.ampr.org>
+ *		Matthew Dillon, <dillon@apollo.west.oic.com>
+ *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *		Jorge Cwik, <jorge@laser.satlink.net>
+ */
+
+#include <linux/module.h>
+#include <net/tcp.h>
+
+int sysctl_tcp_syn_retries = TCP_SYN_RETRIES; 
+int sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES; 
+int sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
+int sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
+int sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
+int sysctl_tcp_retries1 = TCP_RETR1;
+int sysctl_tcp_retries2 = TCP_RETR2;
+int sysctl_tcp_orphan_retries;
+
+static void tcp_write_timer(unsigned long);
+static void tcp_delack_timer(unsigned long);
+static void tcp_keepalive_timer (unsigned long data);
+
+#ifdef TCP_DEBUG
+const char tcp_timer_bug_msg[] = KERN_DEBUG "tcpbug: unknown timer value\n";
+EXPORT_SYMBOL(tcp_timer_bug_msg);
+#endif
+
+/*
+ * Using different timers for retransmit, delayed acks and probes
+ * We may wish use just one timer maintaining a list of expire jiffies 
+ * to optimize.
+ */
+
+void tcp_init_xmit_timers(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	init_timer(&tp->retransmit_timer);
+	tp->retransmit_timer.function=&tcp_write_timer;
+	tp->retransmit_timer.data = (unsigned long) sk;
+	tp->pending = 0;
+
+	init_timer(&tp->delack_timer);
+	tp->delack_timer.function=&tcp_delack_timer;
+	tp->delack_timer.data = (unsigned long) sk;
+	tp->ack.pending = 0;
+
+	init_timer(&sk->sk_timer);
+	sk->sk_timer.function	= &tcp_keepalive_timer;
+	sk->sk_timer.data	= (unsigned long)sk;
+}
+
+void tcp_clear_xmit_timers(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	tp->pending = 0;
+	sk_stop_timer(sk, &tp->retransmit_timer);
+
+	tp->ack.pending = 0;
+	tp->ack.blocked = 0;
+	sk_stop_timer(sk, &tp->delack_timer);
+
+	sk_stop_timer(sk, &sk->sk_timer);
+}
+
+static void tcp_write_err(struct sock *sk)
+{
+	sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
+	sk->sk_error_report(sk);
+
+	tcp_done(sk);
+	NET_INC_STATS_BH(LINUX_MIB_TCPABORTONTIMEOUT);
+}
+
+/* Do not allow orphaned sockets to eat all our resources.
+ * This is direct violation of TCP specs, but it is required
+ * to prevent DoS attacks. It is called when a retransmission timeout
+ * or zero probe timeout occurs on orphaned socket.
+ *
+ * Criterium is still not confirmed experimentally and may change.
+ * We kill the socket, if:
+ * 1. If number of orphaned sockets exceeds an administratively configured
+ *    limit.
+ * 2. If we have strong memory pressure.
+ */
+static int tcp_out_of_resources(struct sock *sk, int do_reset)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	int orphans = atomic_read(&tcp_orphan_count);
+
+	/* If peer does not open window for long time, or did not transmit 
+	 * anything for long time, penalize it. */
+	if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
+		orphans <<= 1;
+
+	/* If some dubious ICMP arrived, penalize even more. */
+	if (sk->sk_err_soft)
+		orphans <<= 1;
+
+	if (orphans >= sysctl_tcp_max_orphans ||
+	    (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
+	     atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
+		if (net_ratelimit())
+			printk(KERN_INFO "Out of socket memory\n");
+
+		/* Catch exceptional cases, when connection requires reset.
+		 *      1. Last segment was sent recently. */
+		if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||
+		    /*  2. Window is closed. */
+		    (!tp->snd_wnd && !tp->packets_out))
+			do_reset = 1;
+		if (do_reset)
+			tcp_send_active_reset(sk, GFP_ATOMIC);
+		tcp_done(sk);
+		NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY);
+		return 1;
+	}
+	return 0;
+}
+
+/* Calculate maximal number or retries on an orphaned socket. */
+static int tcp_orphan_retries(struct sock *sk, int alive)
+{
+	int retries = sysctl_tcp_orphan_retries; /* May be zero. */
+
+	/* We know from an ICMP that something is wrong. */
+	if (sk->sk_err_soft && !alive)
+		retries = 0;
+
+	/* However, if socket sent something recently, select some safe
+	 * number of retries. 8 corresponds to >100 seconds with minimal
+	 * RTO of 200msec. */
+	if (retries == 0 && alive)
+		retries = 8;
+	return retries;
+}
+
+/* A write timeout has occurred. Process the after effects. */
+static int tcp_write_timeout(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	int retry_until;
+
+	if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
+		if (tp->retransmits)
+			dst_negative_advice(&sk->sk_dst_cache);
+		retry_until = tp->syn_retries ? : sysctl_tcp_syn_retries;
+	} else {
+		if (tp->retransmits >= sysctl_tcp_retries1) {
+			/* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu black
+			   hole detection. :-(
+
+			   It is place to make it. It is not made. I do not want
+			   to make it. It is disguisting. It does not work in any
+			   case. Let me to cite the same draft, which requires for
+			   us to implement this:
+
+   "The one security concern raised by this memo is that ICMP black holes
+   are often caused by over-zealous security administrators who block
+   all ICMP messages.  It is vitally important that those who design and
+   deploy security systems understand the impact of strict filtering on
+   upper-layer protocols.  The safest web site in the world is worthless
+   if most TCP implementations cannot transfer data from it.  It would
+   be far nicer to have all of the black holes fixed rather than fixing
+   all of the TCP implementations."
+
+                           Golden words :-).
+		   */
+
+			dst_negative_advice(&sk->sk_dst_cache);
+		}
+
+		retry_until = sysctl_tcp_retries2;
+		if (sock_flag(sk, SOCK_DEAD)) {
+			int alive = (tp->rto < TCP_RTO_MAX);
+ 
+			retry_until = tcp_orphan_retries(sk, alive);
+
+			if (tcp_out_of_resources(sk, alive || tp->retransmits < retry_until))
+				return 1;
+		}
+	}
+
+	if (tp->retransmits >= retry_until) {
+		/* Has it gone just too far? */
+		tcp_write_err(sk);
+		return 1;
+	}
+	return 0;
+}
+
+static void tcp_delack_timer(unsigned long data)
+{
+	struct sock *sk = (struct sock*)data;
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	bh_lock_sock(sk);
+	if (sock_owned_by_user(sk)) {
+		/* Try again later. */
+		tp->ack.blocked = 1;
+		NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED);
+		sk_reset_timer(sk, &tp->delack_timer, jiffies + TCP_DELACK_MIN);
+		goto out_unlock;
+	}
+
+	sk_stream_mem_reclaim(sk);
+
+	if (sk->sk_state == TCP_CLOSE || !(tp->ack.pending & TCP_ACK_TIMER))
+		goto out;
+
+	if (time_after(tp->ack.timeout, jiffies)) {
+		sk_reset_timer(sk, &tp->delack_timer, tp->ack.timeout);
+		goto out;
+	}
+	tp->ack.pending &= ~TCP_ACK_TIMER;
+
+	if (skb_queue_len(&tp->ucopy.prequeue)) {
+		struct sk_buff *skb;
+
+		NET_ADD_STATS_BH(LINUX_MIB_TCPSCHEDULERFAILED, 
+				 skb_queue_len(&tp->ucopy.prequeue));
+
+		while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
+			sk->sk_backlog_rcv(sk, skb);
+
+		tp->ucopy.memory = 0;
+	}
+
+	if (tcp_ack_scheduled(tp)) {
+		if (!tp->ack.pingpong) {
+			/* Delayed ACK missed: inflate ATO. */
+			tp->ack.ato = min(tp->ack.ato << 1, tp->rto);
+		} else {
+			/* Delayed ACK missed: leave pingpong mode and
+			 * deflate ATO.
+			 */
+			tp->ack.pingpong = 0;
+			tp->ack.ato = TCP_ATO_MIN;
+		}
+		tcp_send_ack(sk);
+		NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS);
+	}
+	TCP_CHECK_TIMER(sk);
+
+out:
+	if (tcp_memory_pressure)
+		sk_stream_mem_reclaim(sk);
+out_unlock:
+	bh_unlock_sock(sk);
+	sock_put(sk);
+}
+
+static void tcp_probe_timer(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	int max_probes;
+
+	if (tp->packets_out || !sk->sk_send_head) {
+		tp->probes_out = 0;
+		return;
+	}
+
+	/* *WARNING* RFC 1122 forbids this
+	 *
+	 * It doesn't AFAIK, because we kill the retransmit timer -AK
+	 *
+	 * FIXME: We ought not to do it, Solaris 2.5 actually has fixing
+	 * this behaviour in Solaris down as a bug fix. [AC]
+	 *
+	 * Let me to explain. probes_out is zeroed by incoming ACKs
+	 * even if they advertise zero window. Hence, connection is killed only
+	 * if we received no ACKs for normal connection timeout. It is not killed
+	 * only because window stays zero for some time, window may be zero
+	 * until armageddon and even later. We are in full accordance
+	 * with RFCs, only probe timer combines both retransmission timeout
+	 * and probe timeout in one bottle.				--ANK
+	 */
+	max_probes = sysctl_tcp_retries2;
+
+	if (sock_flag(sk, SOCK_DEAD)) {
+		int alive = ((tp->rto<<tp->backoff) < TCP_RTO_MAX);
+ 
+		max_probes = tcp_orphan_retries(sk, alive);
+
+		if (tcp_out_of_resources(sk, alive || tp->probes_out <= max_probes))
+			return;
+	}
+
+	if (tp->probes_out > max_probes) {
+		tcp_write_err(sk);
+	} else {
+		/* Only send another probe if we didn't close things up. */
+		tcp_send_probe0(sk);
+	}
+}
+
+/*
+ *	The TCP retransmit timer.
+ */
+
+static void tcp_retransmit_timer(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	if (!tp->packets_out)
+		goto out;
+
+	BUG_TRAP(!skb_queue_empty(&sk->sk_write_queue));
+
+	if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&
+	    !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {
+		/* Receiver dastardly shrinks window. Our retransmits
+		 * become zero probes, but we should not timeout this
+		 * connection. If the socket is an orphan, time it out,
+		 * we cannot allow such beasts to hang infinitely.
+		 */
+#ifdef TCP_DEBUG
+		if (net_ratelimit()) {
+			struct inet_sock *inet = inet_sk(sk);
+			printk(KERN_DEBUG "TCP: Treason uncloaked! Peer %u.%u.%u.%u:%u/%u shrinks window %u:%u. Repaired.\n",
+			       NIPQUAD(inet->daddr), htons(inet->dport),
+			       inet->num, tp->snd_una, tp->snd_nxt);
+		}
+#endif
+		if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) {
+			tcp_write_err(sk);
+			goto out;
+		}
+		tcp_enter_loss(sk, 0);
+		tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue));
+		__sk_dst_reset(sk);
+		goto out_reset_timer;
+	}
+
+	if (tcp_write_timeout(sk))
+		goto out;
+
+	if (tp->retransmits == 0) {
+		if (tp->ca_state == TCP_CA_Disorder || tp->ca_state == TCP_CA_Recovery) {
+			if (tp->rx_opt.sack_ok) {
+				if (tp->ca_state == TCP_CA_Recovery)
+					NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERYFAIL);
+				else
+					NET_INC_STATS_BH(LINUX_MIB_TCPSACKFAILURES);
+			} else {
+				if (tp->ca_state == TCP_CA_Recovery)
+					NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERYFAIL);
+				else
+					NET_INC_STATS_BH(LINUX_MIB_TCPRENOFAILURES);
+			}
+		} else if (tp->ca_state == TCP_CA_Loss) {
+			NET_INC_STATS_BH(LINUX_MIB_TCPLOSSFAILURES);
+		} else {
+			NET_INC_STATS_BH(LINUX_MIB_TCPTIMEOUTS);
+		}
+	}
+
+	if (tcp_use_frto(sk)) {
+		tcp_enter_frto(sk);
+	} else {
+		tcp_enter_loss(sk, 0);
+	}
+
+	if (tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue)) > 0) {
+		/* Retransmission failed because of local congestion,
+		 * do not backoff.
+		 */
+		if (!tp->retransmits)
+			tp->retransmits=1;
+		tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS,
+				     min(tp->rto, TCP_RESOURCE_PROBE_INTERVAL));
+		goto out;
+	}
+
+	/* Increase the timeout each time we retransmit.  Note that
+	 * we do not increase the rtt estimate.  rto is initialized
+	 * from rtt, but increases here.  Jacobson (SIGCOMM 88) suggests
+	 * that doubling rto each time is the least we can get away with.
+	 * In KA9Q, Karn uses this for the first few times, and then
+	 * goes to quadratic.  netBSD doubles, but only goes up to *64,
+	 * and clamps at 1 to 64 sec afterwards.  Note that 120 sec is
+	 * defined in the protocol as the maximum possible RTT.  I guess
+	 * we'll have to use something other than TCP to talk to the
+	 * University of Mars.
+	 *
+	 * PAWS allows us longer timeouts and large windows, so once
+	 * implemented ftp to mars will work nicely. We will have to fix
+	 * the 120 second clamps though!
+	 */
+	tp->backoff++;
+	tp->retransmits++;
+
+out_reset_timer:
+	tp->rto = min(tp->rto << 1, TCP_RTO_MAX);
+	tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+	if (tp->retransmits > sysctl_tcp_retries1)
+		__sk_dst_reset(sk);
+
+out:;
+}
+
+static void tcp_write_timer(unsigned long data)
+{
+	struct sock *sk = (struct sock*)data;
+	struct tcp_sock *tp = tcp_sk(sk);
+	int event;
+
+	bh_lock_sock(sk);
+	if (sock_owned_by_user(sk)) {
+		/* Try again later */
+		sk_reset_timer(sk, &tp->retransmit_timer, jiffies + (HZ / 20));
+		goto out_unlock;
+	}
+
+	if (sk->sk_state == TCP_CLOSE || !tp->pending)
+		goto out;
+
+	if (time_after(tp->timeout, jiffies)) {
+		sk_reset_timer(sk, &tp->retransmit_timer, tp->timeout);
+		goto out;
+	}
+
+	event = tp->pending;
+	tp->pending = 0;
+
+	switch (event) {
+	case TCP_TIME_RETRANS:
+		tcp_retransmit_timer(sk);
+		break;
+	case TCP_TIME_PROBE0:
+		tcp_probe_timer(sk);
+		break;
+	}
+	TCP_CHECK_TIMER(sk);
+
+out:
+	sk_stream_mem_reclaim(sk);
+out_unlock:
+	bh_unlock_sock(sk);
+	sock_put(sk);
+}
+
+/*
+ *	Timer for listening sockets
+ */
+
+static void tcp_synack_timer(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct tcp_listen_opt *lopt = tp->listen_opt;
+	int max_retries = tp->syn_retries ? : sysctl_tcp_synack_retries;
+	int thresh = max_retries;
+	unsigned long now = jiffies;
+	struct open_request **reqp, *req;
+	int i, budget;
+
+	if (lopt == NULL || lopt->qlen == 0)
+		return;
+
+	/* Normally all the openreqs are young and become mature
+	 * (i.e. converted to established socket) for first timeout.
+	 * If synack was not acknowledged for 3 seconds, it means
+	 * one of the following things: synack was lost, ack was lost,
+	 * rtt is high or nobody planned to ack (i.e. synflood).
+	 * When server is a bit loaded, queue is populated with old
+	 * open requests, reducing effective size of queue.
+	 * When server is well loaded, queue size reduces to zero
+	 * after several minutes of work. It is not synflood,
+	 * it is normal operation. The solution is pruning
+	 * too old entries overriding normal timeout, when
+	 * situation becomes dangerous.
+	 *
+	 * Essentially, we reserve half of room for young
+	 * embrions; and abort old ones without pity, if old
+	 * ones are about to clog our table.
+	 */
+	if (lopt->qlen>>(lopt->max_qlen_log-1)) {
+		int young = (lopt->qlen_young<<1);
+
+		while (thresh > 2) {
+			if (lopt->qlen < young)
+				break;
+			thresh--;
+			young <<= 1;
+		}
+	}
+
+	if (tp->defer_accept)
+		max_retries = tp->defer_accept;
+
+	budget = 2*(TCP_SYNQ_HSIZE/(TCP_TIMEOUT_INIT/TCP_SYNQ_INTERVAL));
+	i = lopt->clock_hand;
+
+	do {
+		reqp=&lopt->syn_table[i];
+		while ((req = *reqp) != NULL) {
+			if (time_after_eq(now, req->expires)) {
+				if ((req->retrans < thresh ||
+				     (req->acked && req->retrans < max_retries))
+				    && !req->class->rtx_syn_ack(sk, req, NULL)) {
+					unsigned long timeo;
+
+					if (req->retrans++ == 0)
+						lopt->qlen_young--;
+					timeo = min((TCP_TIMEOUT_INIT << req->retrans),
+						    TCP_RTO_MAX);
+					req->expires = now + timeo;
+					reqp = &req->dl_next;
+					continue;
+				}
+
+				/* Drop this request */
+				write_lock(&tp->syn_wait_lock);
+				*reqp = req->dl_next;
+				write_unlock(&tp->syn_wait_lock);
+				lopt->qlen--;
+				if (req->retrans == 0)
+					lopt->qlen_young--;
+				tcp_openreq_free(req);
+				continue;
+			}
+			reqp = &req->dl_next;
+		}
+
+		i = (i+1)&(TCP_SYNQ_HSIZE-1);
+
+	} while (--budget > 0);
+
+	lopt->clock_hand = i;
+
+	if (lopt->qlen)
+		tcp_reset_keepalive_timer(sk, TCP_SYNQ_INTERVAL);
+}
+
+void tcp_delete_keepalive_timer (struct sock *sk)
+{
+	sk_stop_timer(sk, &sk->sk_timer);
+}
+
+void tcp_reset_keepalive_timer (struct sock *sk, unsigned long len)
+{
+	sk_reset_timer(sk, &sk->sk_timer, jiffies + len);
+}
+
+void tcp_set_keepalive(struct sock *sk, int val)
+{
+	if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))
+		return;
+
+	if (val && !sock_flag(sk, SOCK_KEEPOPEN))
+		tcp_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk)));
+	else if (!val)
+		tcp_delete_keepalive_timer(sk);
+}
+
+
+static void tcp_keepalive_timer (unsigned long data)
+{
+	struct sock *sk = (struct sock *) data;
+	struct tcp_sock *tp = tcp_sk(sk);
+	__u32 elapsed;
+
+	/* Only process if socket is not in use. */
+	bh_lock_sock(sk);
+	if (sock_owned_by_user(sk)) {
+		/* Try again later. */ 
+		tcp_reset_keepalive_timer (sk, HZ/20);
+		goto out;
+	}
+
+	if (sk->sk_state == TCP_LISTEN) {
+		tcp_synack_timer(sk);
+		goto out;
+	}
+
+	if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
+		if (tp->linger2 >= 0) {
+			int tmo = tcp_fin_time(tp) - TCP_TIMEWAIT_LEN;
+
+			if (tmo > 0) {
+				tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
+				goto out;
+			}
+		}
+		tcp_send_active_reset(sk, GFP_ATOMIC);
+		goto death;
+	}
+
+	if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE)
+		goto out;
+
+	elapsed = keepalive_time_when(tp);
+
+	/* It is alive without keepalive 8) */
+	if (tp->packets_out || sk->sk_send_head)
+		goto resched;
+
+	elapsed = tcp_time_stamp - tp->rcv_tstamp;
+
+	if (elapsed >= keepalive_time_when(tp)) {
+		if ((!tp->keepalive_probes && tp->probes_out >= sysctl_tcp_keepalive_probes) ||
+		     (tp->keepalive_probes && tp->probes_out >= tp->keepalive_probes)) {
+			tcp_send_active_reset(sk, GFP_ATOMIC);
+			tcp_write_err(sk);
+			goto out;
+		}
+		if (tcp_write_wakeup(sk) <= 0) {
+			tp->probes_out++;
+			elapsed = keepalive_intvl_when(tp);
+		} else {
+			/* If keepalive was lost due to local congestion,
+			 * try harder.
+			 */
+			elapsed = TCP_RESOURCE_PROBE_INTERVAL;
+		}
+	} else {
+		/* It is tp->rcv_tstamp + keepalive_time_when(tp) */
+		elapsed = keepalive_time_when(tp) - elapsed;
+	}
+
+	TCP_CHECK_TIMER(sk);
+	sk_stream_mem_reclaim(sk);
+
+resched:
+	tcp_reset_keepalive_timer (sk, elapsed);
+	goto out;
+
+death:	
+	tcp_done(sk);
+
+out:
+	bh_unlock_sock(sk);
+	sock_put(sk);
+}
+
+EXPORT_SYMBOL(tcp_clear_xmit_timers);
+EXPORT_SYMBOL(tcp_delete_keepalive_timer);
+EXPORT_SYMBOL(tcp_init_xmit_timers);
+EXPORT_SYMBOL(tcp_reset_keepalive_timer);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
new file mode 100644
index 00000000000..6baddfbedca
--- /dev/null
+++ b/net/ipv4/udp.c
@@ -0,0 +1,1575 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		The User Datagram Protocol (UDP).
+ *
+ * Version:	$Id: udp.c,v 1.102 2002/02/01 22:01:04 davem Exp $
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *		Alan Cox, <Alan.Cox@linux.org>
+ *		Hirokazu Takahashi, <taka@valinux.co.jp>
+ *
+ * Fixes:
+ *		Alan Cox	:	verify_area() calls
+ *		Alan Cox	: 	stopped close while in use off icmp
+ *					messages. Not a fix but a botch that
+ *					for udp at least is 'valid'.
+ *		Alan Cox	:	Fixed icmp handling properly
+ *		Alan Cox	: 	Correct error for oversized datagrams
+ *		Alan Cox	:	Tidied select() semantics. 
+ *		Alan Cox	:	udp_err() fixed properly, also now 
+ *					select and read wake correctly on errors
+ *		Alan Cox	:	udp_send verify_area moved to avoid mem leak
+ *		Alan Cox	:	UDP can count its memory
+ *		Alan Cox	:	send to an unknown connection causes
+ *					an ECONNREFUSED off the icmp, but
+ *					does NOT close.
+ *		Alan Cox	:	Switched to new sk_buff handlers. No more backlog!
+ *		Alan Cox	:	Using generic datagram code. Even smaller and the PEEK
+ *					bug no longer crashes it.
+ *		Fred Van Kempen	: 	Net2e support for sk->broadcast.
+ *		Alan Cox	:	Uses skb_free_datagram
+ *		Alan Cox	:	Added get/set sockopt support.
+ *		Alan Cox	:	Broadcasting without option set returns EACCES.
+ *		Alan Cox	:	No wakeup calls. Instead we now use the callbacks.
+ *		Alan Cox	:	Use ip_tos and ip_ttl
+ *		Alan Cox	:	SNMP Mibs
+ *		Alan Cox	:	MSG_DONTROUTE, and 0.0.0.0 support.
+ *		Matt Dillon	:	UDP length checks.
+ *		Alan Cox	:	Smarter af_inet used properly.
+ *		Alan Cox	:	Use new kernel side addressing.
+ *		Alan Cox	:	Incorrect return on truncated datagram receive.
+ *	Arnt Gulbrandsen 	:	New udp_send and stuff
+ *		Alan Cox	:	Cache last socket
+ *		Alan Cox	:	Route cache
+ *		Jon Peatfield	:	Minor efficiency fix to sendto().
+ *		Mike Shaver	:	RFC1122 checks.
+ *		Alan Cox	:	Nonblocking error fix.
+ *	Willy Konynenberg	:	Transparent proxying support.
+ *		Mike McLagan	:	Routing by source
+ *		David S. Miller	:	New socket lookup architecture.
+ *					Last socket cache retained as it
+ *					does have a high hit rate.
+ *		Olaf Kirch	:	Don't linearise iovec on sendmsg.
+ *		Andi Kleen	:	Some cleanups, cache destination entry
+ *					for connect. 
+ *	Vitaly E. Lavrov	:	Transparent proxy revived after year coma.
+ *		Melvin Smith	:	Check msg_name not msg_namelen in sendto(),
+ *					return ENOTCONN for unconnected sockets (POSIX)
+ *		Janos Farkas	:	don't deliver multi/broadcasts to a different
+ *					bound-to-device socket
+ *	Hirokazu Takahashi	:	HW checksumming for outgoing UDP
+ *					datagrams.
+ *	Hirokazu Takahashi	:	sendfile() on UDP works now.
+ *		Arnaldo C. Melo :	convert /proc/net/udp to seq_file
+ *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
+ *	Alexey Kuznetsov:		allow both IPv4 and IPv6 sockets to bind
+ *					a single port at the same time.
+ *	Derek Atkins <derek@ihtfp.com>: Add Encapulation Support
+ *
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+ 
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/ioctls.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/module.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/config.h>
+#include <linux/inet.h>
+#include <linux/ipv6.h>
+#include <linux/netdevice.h>
+#include <net/snmp.h>
+#include <net/tcp.h>
+#include <net/protocol.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <net/sock.h>
+#include <net/udp.h>
+#include <net/icmp.h>
+#include <net/route.h>
+#include <net/inet_common.h>
+#include <net/checksum.h>
+#include <net/xfrm.h>
+
+/*
+ *	Snmp MIB for the UDP layer
+ */
+
+DEFINE_SNMP_STAT(struct udp_mib, udp_statistics);
+
+struct hlist_head udp_hash[UDP_HTABLE_SIZE];
+DEFINE_RWLOCK(udp_hash_lock);
+
+/* Shared by v4/v6 udp. */
+int udp_port_rover;
+
+static int udp_v4_get_port(struct sock *sk, unsigned short snum)
+{
+	struct hlist_node *node;
+	struct sock *sk2;
+	struct inet_sock *inet = inet_sk(sk);
+
+	write_lock_bh(&udp_hash_lock);
+	if (snum == 0) {
+		int best_size_so_far, best, result, i;
+
+		if (udp_port_rover > sysctl_local_port_range[1] ||
+		    udp_port_rover < sysctl_local_port_range[0])
+			udp_port_rover = sysctl_local_port_range[0];
+		best_size_so_far = 32767;
+		best = result = udp_port_rover;
+		for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
+			struct hlist_head *list;
+			int size;
+
+			list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)];
+			if (hlist_empty(list)) {
+				if (result > sysctl_local_port_range[1])
+					result = sysctl_local_port_range[0] +
+						((result - sysctl_local_port_range[0]) &
+						 (UDP_HTABLE_SIZE - 1));
+				goto gotit;
+			}
+			size = 0;
+			sk_for_each(sk2, node, list)
+				if (++size >= best_size_so_far)
+					goto next;
+			best_size_so_far = size;
+			best = result;
+		next:;
+		}
+		result = best;
+		for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) {
+			if (result > sysctl_local_port_range[1])
+				result = sysctl_local_port_range[0]
+					+ ((result - sysctl_local_port_range[0]) &
+					   (UDP_HTABLE_SIZE - 1));
+			if (!udp_lport_inuse(result))
+				break;
+		}
+		if (i >= (1 << 16) / UDP_HTABLE_SIZE)
+			goto fail;
+gotit:
+		udp_port_rover = snum = result;
+	} else {
+		sk_for_each(sk2, node,
+			    &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) {
+			struct inet_sock *inet2 = inet_sk(sk2);
+
+			if (inet2->num == snum &&
+			    sk2 != sk &&
+			    !ipv6_only_sock(sk2) &&
+			    (!sk2->sk_bound_dev_if ||
+			     !sk->sk_bound_dev_if ||
+			     sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
+			    (!inet2->rcv_saddr ||
+			     !inet->rcv_saddr ||
+			     inet2->rcv_saddr == inet->rcv_saddr) &&
+			    (!sk2->sk_reuse || !sk->sk_reuse))
+				goto fail;
+		}
+	}
+	inet->num = snum;
+	if (sk_unhashed(sk)) {
+		struct hlist_head *h = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
+
+		sk_add_node(sk, h);
+		sock_prot_inc_use(sk->sk_prot);
+	}
+	write_unlock_bh(&udp_hash_lock);
+	return 0;
+
+fail:
+	write_unlock_bh(&udp_hash_lock);
+	return 1;
+}
+
+static void udp_v4_hash(struct sock *sk)
+{
+	BUG();
+}
+
+static void udp_v4_unhash(struct sock *sk)
+{
+	write_lock_bh(&udp_hash_lock);
+	if (sk_del_node_init(sk)) {
+		inet_sk(sk)->num = 0;
+		sock_prot_dec_use(sk->sk_prot);
+	}
+	write_unlock_bh(&udp_hash_lock);
+}
+
+/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
+ * harder than this. -DaveM
+ */
+static struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport,
+					  u32 daddr, u16 dport, int dif)
+{
+	struct sock *sk, *result = NULL;
+	struct hlist_node *node;
+	unsigned short hnum = ntohs(dport);
+	int badness = -1;
+
+	sk_for_each(sk, node, &udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]) {
+		struct inet_sock *inet = inet_sk(sk);
+
+		if (inet->num == hnum && !ipv6_only_sock(sk)) {
+			int score = (sk->sk_family == PF_INET ? 1 : 0);
+			if (inet->rcv_saddr) {
+				if (inet->rcv_saddr != daddr)
+					continue;
+				score+=2;
+			}
+			if (inet->daddr) {
+				if (inet->daddr != saddr)
+					continue;
+				score+=2;
+			}
+			if (inet->dport) {
+				if (inet->dport != sport)
+					continue;
+				score+=2;
+			}
+			if (sk->sk_bound_dev_if) {
+				if (sk->sk_bound_dev_if != dif)
+					continue;
+				score+=2;
+			}
+			if(score == 9) {
+				result = sk;
+				break;
+			} else if(score > badness) {
+				result = sk;
+				badness = score;
+			}
+		}
+	}
+	return result;
+}
+
+static __inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport,
+					     u32 daddr, u16 dport, int dif)
+{
+	struct sock *sk;
+
+	read_lock(&udp_hash_lock);
+	sk = udp_v4_lookup_longway(saddr, sport, daddr, dport, dif);
+	if (sk)
+		sock_hold(sk);
+	read_unlock(&udp_hash_lock);
+	return sk;
+}
+
+static inline struct sock *udp_v4_mcast_next(struct sock *sk,
+					     u16 loc_port, u32 loc_addr,
+					     u16 rmt_port, u32 rmt_addr,
+					     int dif)
+{
+	struct hlist_node *node;
+	struct sock *s = sk;
+	unsigned short hnum = ntohs(loc_port);
+
+	sk_for_each_from(s, node) {
+		struct inet_sock *inet = inet_sk(s);
+
+		if (inet->num != hnum					||
+		    (inet->daddr && inet->daddr != rmt_addr)		||
+		    (inet->dport != rmt_port && inet->dport)		||
+		    (inet->rcv_saddr && inet->rcv_saddr != loc_addr)	||
+		    ipv6_only_sock(s)					||
+		    (s->sk_bound_dev_if && s->sk_bound_dev_if != dif))
+			continue;
+		if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif))
+			continue;
+		goto found;
+  	}
+	s = NULL;
+found:
+  	return s;
+}
+
+/*
+ * This routine is called by the ICMP module when it gets some
+ * sort of error condition.  If err < 0 then the socket should
+ * be closed and the error returned to the user.  If err > 0
+ * it's just the icmp type << 8 | icmp code.  
+ * Header points to the ip header of the error packet. We move
+ * on past this. Then (as it used to claim before adjustment)
+ * header points to the first 8 bytes of the udp header.  We need
+ * to find the appropriate port.
+ */
+
+void udp_err(struct sk_buff *skb, u32 info)
+{
+	struct inet_sock *inet;
+	struct iphdr *iph = (struct iphdr*)skb->data;
+	struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2));
+	int type = skb->h.icmph->type;
+	int code = skb->h.icmph->code;
+	struct sock *sk;
+	int harderr;
+	int err;
+
+	sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex);
+	if (sk == NULL) {
+		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+    	  	return;	/* No socket for error */
+	}
+
+	err = 0;
+	harderr = 0;
+	inet = inet_sk(sk);
+
+	switch (type) {
+	default:
+	case ICMP_TIME_EXCEEDED:
+		err = EHOSTUNREACH;
+		break;
+	case ICMP_SOURCE_QUENCH:
+		goto out;
+	case ICMP_PARAMETERPROB:
+		err = EPROTO;
+		harderr = 1;
+		break;
+	case ICMP_DEST_UNREACH:
+		if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
+			if (inet->pmtudisc != IP_PMTUDISC_DONT) {
+				err = EMSGSIZE;
+				harderr = 1;
+				break;
+			}
+			goto out;
+		}
+		err = EHOSTUNREACH;
+		if (code <= NR_ICMP_UNREACH) {
+			harderr = icmp_err_convert[code].fatal;
+			err = icmp_err_convert[code].errno;
+		}
+		break;
+	}
+
+	/*
+	 *      RFC1122: OK.  Passes ICMP errors back to application, as per 
+	 *	4.1.3.3.
+	 */
+	if (!inet->recverr) {
+		if (!harderr || sk->sk_state != TCP_ESTABLISHED)
+			goto out;
+	} else {
+		ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1));
+	}
+	sk->sk_err = err;
+	sk->sk_error_report(sk);
+out:
+	sock_put(sk);
+}
+
+/*
+ * Throw away all pending data and cancel the corking. Socket is locked.
+ */
+static void udp_flush_pending_frames(struct sock *sk)
+{
+	struct udp_sock *up = udp_sk(sk);
+
+	if (up->pending) {
+		up->len = 0;
+		up->pending = 0;
+		ip_flush_pending_frames(sk);
+	}
+}
+
+/*
+ * Push out all pending data as one UDP datagram. Socket is locked.
+ */
+static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct flowi *fl = &inet->cork.fl;
+	struct sk_buff *skb;
+	struct udphdr *uh;
+	int err = 0;
+
+	/* Grab the skbuff where UDP header space exists. */
+	if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
+		goto out;
+
+	/*
+	 * Create a UDP header
+	 */
+	uh = skb->h.uh;
+	uh->source = fl->fl_ip_sport;
+	uh->dest = fl->fl_ip_dport;
+	uh->len = htons(up->len);
+	uh->check = 0;
+
+	if (sk->sk_no_check == UDP_CSUM_NOXMIT) {
+		skb->ip_summed = CHECKSUM_NONE;
+		goto send;
+	}
+
+	if (skb_queue_len(&sk->sk_write_queue) == 1) {
+		/*
+		 * Only one fragment on the socket.
+		 */
+		if (skb->ip_summed == CHECKSUM_HW) {
+			skb->csum = offsetof(struct udphdr, check);
+			uh->check = ~csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst,
+					up->len, IPPROTO_UDP, 0);
+		} else {
+			skb->csum = csum_partial((char *)uh,
+					sizeof(struct udphdr), skb->csum);
+			uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst,
+					up->len, IPPROTO_UDP, skb->csum);
+			if (uh->check == 0)
+				uh->check = -1;
+		}
+	} else {
+		unsigned int csum = 0;
+		/*
+		 * HW-checksum won't work as there are two or more 
+		 * fragments on the socket so that all csums of sk_buffs
+		 * should be together.
+		 */
+		if (skb->ip_summed == CHECKSUM_HW) {
+			int offset = (unsigned char *)uh - skb->data;
+			skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
+
+			skb->ip_summed = CHECKSUM_NONE;
+		} else {
+			skb->csum = csum_partial((char *)uh,
+					sizeof(struct udphdr), skb->csum);
+		}
+
+		skb_queue_walk(&sk->sk_write_queue, skb) {
+			csum = csum_add(csum, skb->csum);
+		}
+		uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst,
+				up->len, IPPROTO_UDP, csum);
+		if (uh->check == 0)
+			uh->check = -1;
+	}
+send:
+	err = ip_push_pending_frames(sk);
+out:
+	up->len = 0;
+	up->pending = 0;
+	return err;
+}
+
+
+static unsigned short udp_check(struct udphdr *uh, int len, unsigned long saddr, unsigned long daddr, unsigned long base)
+{
+	return(csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base));
+}
+
+int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+		size_t len)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct udp_sock *up = udp_sk(sk);
+	int ulen = len;
+	struct ipcm_cookie ipc;
+	struct rtable *rt = NULL;
+	int free = 0;
+	int connected = 0;
+	u32 daddr, faddr, saddr;
+	u16 dport;
+	u8  tos;
+	int err;
+	int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
+
+	if (len > 0xFFFF)
+		return -EMSGSIZE;
+
+	/* 
+	 *	Check the flags.
+	 */
+
+	if (msg->msg_flags&MSG_OOB)	/* Mirror BSD error message compatibility */
+		return -EOPNOTSUPP;
+
+	ipc.opt = NULL;
+
+	if (up->pending) {
+		/*
+		 * There are pending frames.
+	 	 * The socket lock must be held while it's corked.
+		 */
+		lock_sock(sk);
+		if (likely(up->pending)) {
+			if (unlikely(up->pending != AF_INET)) {
+				release_sock(sk);
+				return -EINVAL;
+			}
+ 			goto do_append_data;
+		}
+		release_sock(sk);
+	}
+	ulen += sizeof(struct udphdr);
+
+	/*
+	 *	Get and verify the address. 
+	 */
+	if (msg->msg_name) {
+		struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name;
+		if (msg->msg_namelen < sizeof(*usin))
+			return -EINVAL;
+		if (usin->sin_family != AF_INET) {
+			if (usin->sin_family != AF_UNSPEC)
+				return -EAFNOSUPPORT;
+		}
+
+		daddr = usin->sin_addr.s_addr;
+		dport = usin->sin_port;
+		if (dport == 0)
+			return -EINVAL;
+	} else {
+		if (sk->sk_state != TCP_ESTABLISHED)
+			return -EDESTADDRREQ;
+		daddr = inet->daddr;
+		dport = inet->dport;
+		/* Open fast path for connected socket.
+		   Route will not be used, if at least one option is set.
+		 */
+		connected = 1;
+  	}
+	ipc.addr = inet->saddr;
+
+	ipc.oif = sk->sk_bound_dev_if;
+	if (msg->msg_controllen) {
+		err = ip_cmsg_send(msg, &ipc);
+		if (err)
+			return err;
+		if (ipc.opt)
+			free = 1;
+		connected = 0;
+	}
+	if (!ipc.opt)
+		ipc.opt = inet->opt;
+
+	saddr = ipc.addr;
+	ipc.addr = faddr = daddr;
+
+	if (ipc.opt && ipc.opt->srr) {
+		if (!daddr)
+			return -EINVAL;
+		faddr = ipc.opt->faddr;
+		connected = 0;
+	}
+	tos = RT_TOS(inet->tos);
+	if (sock_flag(sk, SOCK_LOCALROUTE) ||
+	    (msg->msg_flags & MSG_DONTROUTE) || 
+	    (ipc.opt && ipc.opt->is_strictroute)) {
+		tos |= RTO_ONLINK;
+		connected = 0;
+	}
+
+	if (MULTICAST(daddr)) {
+		if (!ipc.oif)
+			ipc.oif = inet->mc_index;
+		if (!saddr)
+			saddr = inet->mc_addr;
+		connected = 0;
+	}
+
+	if (connected)
+		rt = (struct rtable*)sk_dst_check(sk, 0);
+
+	if (rt == NULL) {
+		struct flowi fl = { .oif = ipc.oif,
+				    .nl_u = { .ip4_u =
+					      { .daddr = faddr,
+						.saddr = saddr,
+						.tos = tos } },
+				    .proto = IPPROTO_UDP,
+				    .uli_u = { .ports =
+					       { .sport = inet->sport,
+						 .dport = dport } } };
+		err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT));
+		if (err)
+			goto out;
+
+		err = -EACCES;
+		if ((rt->rt_flags & RTCF_BROADCAST) &&
+		    !sock_flag(sk, SOCK_BROADCAST))
+			goto out;
+		if (connected)
+			sk_dst_set(sk, dst_clone(&rt->u.dst));
+	}
+
+	if (msg->msg_flags&MSG_CONFIRM)
+		goto do_confirm;
+back_from_confirm:
+
+	saddr = rt->rt_src;
+	if (!ipc.addr)
+		daddr = ipc.addr = rt->rt_dst;
+
+	lock_sock(sk);
+	if (unlikely(up->pending)) {
+		/* The socket is already corked while preparing it. */
+		/* ... which is an evident application bug. --ANK */
+		release_sock(sk);
+
+		NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp cork app bug 2\n"));
+		err = -EINVAL;
+		goto out;
+	}
+	/*
+	 *	Now cork the socket to pend data.
+	 */
+	inet->cork.fl.fl4_dst = daddr;
+	inet->cork.fl.fl_ip_dport = dport;
+	inet->cork.fl.fl4_src = saddr;
+	inet->cork.fl.fl_ip_sport = inet->sport;
+	up->pending = AF_INET;
+
+do_append_data:
+	up->len += ulen;
+	err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen, 
+			sizeof(struct udphdr), &ipc, rt, 
+			corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
+	if (err)
+		udp_flush_pending_frames(sk);
+	else if (!corkreq)
+		err = udp_push_pending_frames(sk, up);
+	release_sock(sk);
+
+out:
+	ip_rt_put(rt);
+	if (free)
+		kfree(ipc.opt);
+	if (!err) {
+		UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS);
+		return len;
+	}
+	return err;
+
+do_confirm:
+	dst_confirm(&rt->u.dst);
+	if (!(msg->msg_flags&MSG_PROBE) || len)
+		goto back_from_confirm;
+	err = 0;
+	goto out;
+}
+
+static int udp_sendpage(struct sock *sk, struct page *page, int offset,
+			size_t size, int flags)
+{
+	struct udp_sock *up = udp_sk(sk);
+	int ret;
+
+	if (!up->pending) {
+		struct msghdr msg = {	.msg_flags = flags|MSG_MORE };
+
+		/* Call udp_sendmsg to specify destination address which
+		 * sendpage interface can't pass.
+		 * This will succeed only when the socket is connected.
+		 */
+		ret = udp_sendmsg(NULL, sk, &msg, 0);
+		if (ret < 0)
+			return ret;
+	}
+
+	lock_sock(sk);
+
+	if (unlikely(!up->pending)) {
+		release_sock(sk);
+
+		NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp cork app bug 3\n"));
+		return -EINVAL;
+	}
+
+	ret = ip_append_page(sk, page, offset, size, flags);
+	if (ret == -EOPNOTSUPP) {
+		release_sock(sk);
+		return sock_no_sendpage(sk->sk_socket, page, offset,
+					size, flags);
+	}
+	if (ret < 0) {
+		udp_flush_pending_frames(sk);
+		goto out;
+	}
+
+	up->len += size;
+	if (!(up->corkflag || (flags&MSG_MORE)))
+		ret = udp_push_pending_frames(sk, up);
+	if (!ret)
+		ret = size;
+out:
+	release_sock(sk);
+	return ret;
+}
+
+/*
+ *	IOCTL requests applicable to the UDP protocol
+ */
+ 
+int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
+{
+	switch(cmd) 
+	{
+		case SIOCOUTQ:
+		{
+			int amount = atomic_read(&sk->sk_wmem_alloc);
+			return put_user(amount, (int __user *)arg);
+		}
+
+		case SIOCINQ:
+		{
+			struct sk_buff *skb;
+			unsigned long amount;
+
+			amount = 0;
+			spin_lock_irq(&sk->sk_receive_queue.lock);
+			skb = skb_peek(&sk->sk_receive_queue);
+			if (skb != NULL) {
+				/*
+				 * We will only return the amount
+				 * of this packet since that is all
+				 * that will be read.
+				 */
+				amount = skb->len - sizeof(struct udphdr);
+			}
+			spin_unlock_irq(&sk->sk_receive_queue.lock);
+			return put_user(amount, (int __user *)arg);
+		}
+
+		default:
+			return -ENOIOCTLCMD;
+	}
+	return(0);
+}
+
+static __inline__ int __udp_checksum_complete(struct sk_buff *skb)
+{
+	return (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
+}
+
+static __inline__ int udp_checksum_complete(struct sk_buff *skb)
+{
+	return skb->ip_summed != CHECKSUM_UNNECESSARY &&
+		__udp_checksum_complete(skb);
+}
+
+/*
+ * 	This should be easy, if there is something there we
+ * 	return it, otherwise we block.
+ */
+
+static int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+		       size_t len, int noblock, int flags, int *addr_len)
+{
+	struct inet_sock *inet = inet_sk(sk);
+  	struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
+  	struct sk_buff *skb;
+  	int copied, err;
+
+	/*
+	 *	Check any passed addresses
+	 */
+	if (addr_len)
+		*addr_len=sizeof(*sin);
+
+	if (flags & MSG_ERRQUEUE)
+		return ip_recv_error(sk, msg, len);
+
+try_again:
+	skb = skb_recv_datagram(sk, flags, noblock, &err);
+	if (!skb)
+		goto out;
+  
+  	copied = skb->len - sizeof(struct udphdr);
+	if (copied > len) {
+		copied = len;
+		msg->msg_flags |= MSG_TRUNC;
+	}
+
+	if (skb->ip_summed==CHECKSUM_UNNECESSARY) {
+		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
+					      copied);
+	} else if (msg->msg_flags&MSG_TRUNC) {
+		if (__udp_checksum_complete(skb))
+			goto csum_copy_err;
+		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
+					      copied);
+	} else {
+		err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
+
+		if (err == -EINVAL)
+			goto csum_copy_err;
+	}
+
+	if (err)
+		goto out_free;
+
+	sock_recv_timestamp(msg, sk, skb);
+
+	/* Copy the address. */
+	if (sin)
+	{
+		sin->sin_family = AF_INET;
+		sin->sin_port = skb->h.uh->source;
+		sin->sin_addr.s_addr = skb->nh.iph->saddr;
+		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+  	}
+	if (inet->cmsg_flags)
+		ip_cmsg_recv(msg, skb);
+
+	err = copied;
+	if (flags & MSG_TRUNC)
+		err = skb->len - sizeof(struct udphdr);
+  
+out_free:
+  	skb_free_datagram(sk, skb);
+out:
+  	return err;
+
+csum_copy_err:
+	UDP_INC_STATS_BH(UDP_MIB_INERRORS);
+
+	/* Clear queue. */
+	if (flags&MSG_PEEK) {
+		int clear = 0;
+		spin_lock_irq(&sk->sk_receive_queue.lock);
+		if (skb == skb_peek(&sk->sk_receive_queue)) {
+			__skb_unlink(skb, &sk->sk_receive_queue);
+			clear = 1;
+		}
+		spin_unlock_irq(&sk->sk_receive_queue.lock);
+		if (clear)
+			kfree_skb(skb);
+	}
+
+	skb_free_datagram(sk, skb);
+
+	if (noblock)
+		return -EAGAIN;	
+	goto try_again;
+}
+
+
+int udp_disconnect(struct sock *sk, int flags)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	/*
+	 *	1003.1g - break association.
+	 */
+	 
+	sk->sk_state = TCP_CLOSE;
+	inet->daddr = 0;
+	inet->dport = 0;
+	sk->sk_bound_dev_if = 0;
+	if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
+		inet_reset_saddr(sk);
+
+	if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) {
+		sk->sk_prot->unhash(sk);
+		inet->sport = 0;
+	}
+	sk_dst_reset(sk);
+	return 0;
+}
+
+static void udp_close(struct sock *sk, long timeout)
+{
+	sk_common_release(sk);
+}
+
+/* return:
+ * 	1  if the the UDP system should process it
+ *	0  if we should drop this packet
+ * 	-1 if it should get processed by xfrm4_rcv_encap
+ */
+static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
+{
+#ifndef CONFIG_XFRM
+	return 1; 
+#else
+	struct udp_sock *up = udp_sk(sk);
+  	struct udphdr *uh = skb->h.uh;
+	struct iphdr *iph;
+	int iphlen, len;
+  
+	__u8 *udpdata = (__u8 *)uh + sizeof(struct udphdr);
+	__u32 *udpdata32 = (__u32 *)udpdata;
+	__u16 encap_type = up->encap_type;
+
+	/* if we're overly short, let UDP handle it */
+	if (udpdata > skb->tail)
+		return 1;
+
+	/* if this is not encapsulated socket, then just return now */
+	if (!encap_type)
+		return 1;
+
+	len = skb->tail - udpdata;
+
+	switch (encap_type) {
+	default:
+	case UDP_ENCAP_ESPINUDP:
+		/* Check if this is a keepalive packet.  If so, eat it. */
+		if (len == 1 && udpdata[0] == 0xff) {
+			return 0;
+		} else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0 ) {
+			/* ESP Packet without Non-ESP header */
+			len = sizeof(struct udphdr);
+		} else
+			/* Must be an IKE packet.. pass it through */
+			return 1;
+		break;
+	case UDP_ENCAP_ESPINUDP_NON_IKE:
+		/* Check if this is a keepalive packet.  If so, eat it. */
+		if (len == 1 && udpdata[0] == 0xff) {
+			return 0;
+		} else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
+			   udpdata32[0] == 0 && udpdata32[1] == 0) {
+			
+			/* ESP Packet with Non-IKE marker */
+			len = sizeof(struct udphdr) + 2 * sizeof(u32);
+		} else
+			/* Must be an IKE packet.. pass it through */
+			return 1;
+		break;
+	}
+
+	/* At this point we are sure that this is an ESPinUDP packet,
+	 * so we need to remove 'len' bytes from the packet (the UDP
+	 * header and optional ESP marker bytes) and then modify the
+	 * protocol to ESP, and then call into the transform receiver.
+	 */
+
+	/* Now we can update and verify the packet length... */
+	iph = skb->nh.iph;
+	iphlen = iph->ihl << 2;
+	iph->tot_len = htons(ntohs(iph->tot_len) - len);
+	if (skb->len < iphlen + len) {
+		/* packet is too small!?! */
+		return 0;
+	}
+
+	/* pull the data buffer up to the ESP header and set the
+	 * transport header to point to ESP.  Keep UDP on the stack
+	 * for later.
+	 */
+	skb->h.raw = skb_pull(skb, len);
+
+	/* modify the protocol (it's ESP!) */
+	iph->protocol = IPPROTO_ESP;
+
+	/* and let the caller know to send this into the ESP processor... */
+	return -1;
+#endif
+}
+
+/* returns:
+ *  -1: error
+ *   0: success
+ *  >0: "udp encap" protocol resubmission
+ *
+ * Note that in the success and error cases, the skb is assumed to
+ * have either been requeued or freed.
+ */
+static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
+{
+	struct udp_sock *up = udp_sk(sk);
+
+	/*
+	 *	Charge it to the socket, dropping if the queue is full.
+	 */
+	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
+		kfree_skb(skb);
+		return -1;
+	}
+
+	if (up->encap_type) {
+		/*
+		 * This is an encapsulation socket, so let's see if this is
+		 * an encapsulated packet.
+		 * If it's a keepalive packet, then just eat it.
+		 * If it's an encapsulateed packet, then pass it to the
+		 * IPsec xfrm input and return the response
+		 * appropriately.  Otherwise, just fall through and
+		 * pass this up the UDP socket.
+		 */
+		int ret;
+
+		ret = udp_encap_rcv(sk, skb);
+		if (ret == 0) {
+			/* Eat the packet .. */
+			kfree_skb(skb);
+			return 0;
+		}
+		if (ret < 0) {
+			/* process the ESP packet */
+			ret = xfrm4_rcv_encap(skb, up->encap_type);
+			UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS);
+			return -ret;
+		}
+		/* FALLTHROUGH -- it's a UDP Packet */
+	}
+
+	if (sk->sk_filter && skb->ip_summed != CHECKSUM_UNNECESSARY) {
+		if (__udp_checksum_complete(skb)) {
+			UDP_INC_STATS_BH(UDP_MIB_INERRORS);
+			kfree_skb(skb);
+			return -1;
+		}
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+
+	if (sock_queue_rcv_skb(sk,skb)<0) {
+		UDP_INC_STATS_BH(UDP_MIB_INERRORS);
+		kfree_skb(skb);
+		return -1;
+	}
+	UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS);
+	return 0;
+}
+
+/*
+ *	Multicasts and broadcasts go to each listener.
+ *
+ *	Note: called only from the BH handler context,
+ *	so we don't need to lock the hashes.
+ */
+static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh,
+				 u32 saddr, u32 daddr)
+{
+	struct sock *sk;
+	int dif;
+
+	read_lock(&udp_hash_lock);
+	sk = sk_head(&udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
+	dif = skb->dev->ifindex;
+	sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
+	if (sk) {
+		struct sock *sknext = NULL;
+
+		do {
+			struct sk_buff *skb1 = skb;
+
+			sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr,
+						   uh->source, saddr, dif);
+			if(sknext)
+				skb1 = skb_clone(skb, GFP_ATOMIC);
+
+			if(skb1) {
+				int ret = udp_queue_rcv_skb(sk, skb1);
+				if (ret > 0)
+					/* we should probably re-process instead
+					 * of dropping packets here. */
+					kfree_skb(skb1);
+			}
+			sk = sknext;
+		} while(sknext);
+	} else
+		kfree_skb(skb);
+	read_unlock(&udp_hash_lock);
+	return 0;
+}
+
+/* Initialize UDP checksum. If exited with zero value (success),
+ * CHECKSUM_UNNECESSARY means, that no more checks are required.
+ * Otherwise, csum completion requires chacksumming packet body,
+ * including udp header and folding it to skb->csum.
+ */
+static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
+			     unsigned short ulen, u32 saddr, u32 daddr)
+{
+	if (uh->check == 0) {
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	} else if (skb->ip_summed == CHECKSUM_HW) {
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		if (!udp_check(uh, ulen, saddr, daddr, skb->csum))
+			return 0;
+		NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp v4 hw csum failure.\n"));
+		skb->ip_summed = CHECKSUM_NONE;
+	}
+	if (skb->ip_summed != CHECKSUM_UNNECESSARY)
+		skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
+	/* Probably, we should checksum udp header (it should be in cache
+	 * in any case) and data in tiny packets (< rx copybreak).
+	 */
+	return 0;
+}
+
+/*
+ *	All we need to do is get the socket, and then do a checksum. 
+ */
+ 
+int udp_rcv(struct sk_buff *skb)
+{
+  	struct sock *sk;
+  	struct udphdr *uh;
+	unsigned short ulen;
+	struct rtable *rt = (struct rtable*)skb->dst;
+	u32 saddr = skb->nh.iph->saddr;
+	u32 daddr = skb->nh.iph->daddr;
+	int len = skb->len;
+
+	/*
+	 *	Validate the packet and the UDP length.
+	 */
+	if (!pskb_may_pull(skb, sizeof(struct udphdr)))
+		goto no_header;
+
+	uh = skb->h.uh;
+
+	ulen = ntohs(uh->len);
+
+	if (ulen > len || ulen < sizeof(*uh))
+		goto short_packet;
+
+	if (pskb_trim(skb, ulen))
+		goto short_packet;
+
+	if (udp_checksum_init(skb, uh, ulen, saddr, daddr) < 0)
+		goto csum_error;
+
+	if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
+		return udp_v4_mcast_deliver(skb, uh, saddr, daddr);
+
+	sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest, skb->dev->ifindex);
+
+	if (sk != NULL) {
+		int ret = udp_queue_rcv_skb(sk, skb);
+		sock_put(sk);
+
+		/* a return value > 0 means to resubmit the input, but
+		 * it it wants the return to be -protocol, or 0
+		 */
+		if (ret > 0)
+			return -ret;
+		return 0;
+	}
+
+	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
+		goto drop;
+
+	/* No socket. Drop packet silently, if checksum is wrong */
+	if (udp_checksum_complete(skb))
+		goto csum_error;
+
+	UDP_INC_STATS_BH(UDP_MIB_NOPORTS);
+	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+
+	/*
+	 * Hmm.  We got an UDP packet to a port to which we
+	 * don't wanna listen.  Ignore it.
+	 */
+	kfree_skb(skb);
+	return(0);
+
+short_packet:
+	NETDEBUG(if (net_ratelimit())
+		printk(KERN_DEBUG "UDP: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n",
+			NIPQUAD(saddr),
+			ntohs(uh->source),
+			ulen,
+			len,
+			NIPQUAD(daddr),
+			ntohs(uh->dest)));
+no_header:
+	UDP_INC_STATS_BH(UDP_MIB_INERRORS);
+	kfree_skb(skb);
+	return(0);
+
+csum_error:
+	/* 
+	 * RFC1122: OK.  Discards the bad packet silently (as far as 
+	 * the network is concerned, anyway) as per 4.1.3.4 (MUST). 
+	 */
+	NETDEBUG(if (net_ratelimit())
+		 printk(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
+			NIPQUAD(saddr),
+			ntohs(uh->source),
+			NIPQUAD(daddr),
+			ntohs(uh->dest),
+			ulen));
+drop:
+	UDP_INC_STATS_BH(UDP_MIB_INERRORS);
+	kfree_skb(skb);
+	return(0);
+}
+
+static int udp_destroy_sock(struct sock *sk)
+{
+	lock_sock(sk);
+	udp_flush_pending_frames(sk);
+	release_sock(sk);
+	return 0;
+}
+
+/*
+ *	Socket option code for UDP
+ */
+static int udp_setsockopt(struct sock *sk, int level, int optname, 
+			  char __user *optval, int optlen)
+{
+	struct udp_sock *up = udp_sk(sk);
+	int val;
+	int err = 0;
+
+	if (level != SOL_UDP)
+		return ip_setsockopt(sk, level, optname, optval, optlen);
+
+	if(optlen<sizeof(int))
+		return -EINVAL;
+
+	if (get_user(val, (int __user *)optval))
+		return -EFAULT;
+
+	switch(optname) {
+	case UDP_CORK:
+		if (val != 0) {
+			up->corkflag = 1;
+		} else {
+			up->corkflag = 0;
+			lock_sock(sk);
+			udp_push_pending_frames(sk, up);
+			release_sock(sk);
+		}
+		break;
+		
+	case UDP_ENCAP:
+		switch (val) {
+		case 0:
+		case UDP_ENCAP_ESPINUDP:
+		case UDP_ENCAP_ESPINUDP_NON_IKE:
+			up->encap_type = val;
+			break;
+		default:
+			err = -ENOPROTOOPT;
+			break;
+		}
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	};
+
+	return err;
+}
+
+static int udp_getsockopt(struct sock *sk, int level, int optname, 
+			  char __user *optval, int __user *optlen)
+{
+	struct udp_sock *up = udp_sk(sk);
+	int val, len;
+
+	if (level != SOL_UDP)
+		return ip_getsockopt(sk, level, optname, optval, optlen);
+
+	if(get_user(len,optlen))
+		return -EFAULT;
+
+	len = min_t(unsigned int, len, sizeof(int));
+	
+	if(len < 0)
+		return -EINVAL;
+
+	switch(optname) {
+	case UDP_CORK:
+		val = up->corkflag;
+		break;
+
+	case UDP_ENCAP:
+		val = up->encap_type;
+		break;
+
+	default:
+		return -ENOPROTOOPT;
+	};
+
+  	if(put_user(len, optlen))
+  		return -EFAULT;
+	if(copy_to_user(optval, &val,len))
+		return -EFAULT;
+  	return 0;
+}
+
+/**
+ * 	udp_poll - wait for a UDP event.
+ *	@file - file struct
+ *	@sock - socket
+ *	@wait - poll table
+ *
+ *	This is same as datagram poll, except for the special case of 
+ *	blocking sockets. If application is using a blocking fd
+ *	and a packet with checksum error is in the queue;
+ *	then it could get return from select indicating data available
+ *	but then block when reading it. Add special case code
+ *	to work around these arguably broken applications.
+ */
+unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
+{
+	unsigned int mask = datagram_poll(file, sock, wait);
+	struct sock *sk = sock->sk;
+	
+	/* Check for false positives due to checksum errors */
+	if ( (mask & POLLRDNORM) &&
+	     !(file->f_flags & O_NONBLOCK) &&
+	     !(sk->sk_shutdown & RCV_SHUTDOWN)){
+		struct sk_buff_head *rcvq = &sk->sk_receive_queue;
+		struct sk_buff *skb;
+
+		spin_lock_irq(&rcvq->lock);
+		while ((skb = skb_peek(rcvq)) != NULL) {
+			if (udp_checksum_complete(skb)) {
+				UDP_INC_STATS_BH(UDP_MIB_INERRORS);
+				__skb_unlink(skb, rcvq);
+				kfree_skb(skb);
+			} else {
+				skb->ip_summed = CHECKSUM_UNNECESSARY;
+				break;
+			}
+		}
+		spin_unlock_irq(&rcvq->lock);
+
+		/* nothing to see, move along */
+		if (skb == NULL)
+			mask &= ~(POLLIN | POLLRDNORM);
+	}
+
+	return mask;
+	
+}
+
+struct proto udp_prot = {
+ 	.name =		"UDP",
+	.owner =	THIS_MODULE,
+	.close =	udp_close,
+	.connect =	ip4_datagram_connect,
+	.disconnect =	udp_disconnect,
+	.ioctl =	udp_ioctl,
+	.destroy =	udp_destroy_sock,
+	.setsockopt =	udp_setsockopt,
+	.getsockopt =	udp_getsockopt,
+	.sendmsg =	udp_sendmsg,
+	.recvmsg =	udp_recvmsg,
+	.sendpage =	udp_sendpage,
+	.backlog_rcv =	udp_queue_rcv_skb,
+	.hash =		udp_v4_hash,
+	.unhash =	udp_v4_unhash,
+	.get_port =	udp_v4_get_port,
+	.obj_size =	sizeof(struct udp_sock),
+};
+
+/* ------------------------------------------------------------------------ */
+#ifdef CONFIG_PROC_FS
+
+static struct sock *udp_get_first(struct seq_file *seq)
+{
+	struct sock *sk;
+	struct udp_iter_state *state = seq->private;
+
+	for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) {
+		struct hlist_node *node;
+		sk_for_each(sk, node, &udp_hash[state->bucket]) {
+			if (sk->sk_family == state->family)
+				goto found;
+		}
+	}
+	sk = NULL;
+found:
+	return sk;
+}
+
+static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
+{
+	struct udp_iter_state *state = seq->private;
+
+	do {
+		sk = sk_next(sk);
+try_again:
+		;
+	} while (sk && sk->sk_family != state->family);
+
+	if (!sk && ++state->bucket < UDP_HTABLE_SIZE) {
+		sk = sk_head(&udp_hash[state->bucket]);
+		goto try_again;
+	}
+	return sk;
+}
+
+static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos)
+{
+	struct sock *sk = udp_get_first(seq);
+
+	if (sk)
+		while(pos && (sk = udp_get_next(seq, sk)) != NULL)
+			--pos;
+	return pos ? NULL : sk;
+}
+
+static void *udp_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	read_lock(&udp_hash_lock);
+	return *pos ? udp_get_idx(seq, *pos-1) : (void *)1;
+}
+
+static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct sock *sk;
+
+	if (v == (void *)1)
+		sk = udp_get_idx(seq, 0);
+	else
+		sk = udp_get_next(seq, v);
+
+	++*pos;
+	return sk;
+}
+
+static void udp_seq_stop(struct seq_file *seq, void *v)
+{
+	read_unlock(&udp_hash_lock);
+}
+
+static int udp_seq_open(struct inode *inode, struct file *file)
+{
+	struct udp_seq_afinfo *afinfo = PDE(inode)->data;
+	struct seq_file *seq;
+	int rc = -ENOMEM;
+	struct udp_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+
+	if (!s)
+		goto out;
+	memset(s, 0, sizeof(*s));
+	s->family		= afinfo->family;
+	s->seq_ops.start	= udp_seq_start;
+	s->seq_ops.next		= udp_seq_next;
+	s->seq_ops.show		= afinfo->seq_show;
+	s->seq_ops.stop		= udp_seq_stop;
+
+	rc = seq_open(file, &s->seq_ops);
+	if (rc)
+		goto out_kfree;
+
+	seq	     = file->private_data;
+	seq->private = s;
+out:
+	return rc;
+out_kfree:
+	kfree(s);
+	goto out;
+}
+
+/* ------------------------------------------------------------------------ */
+int udp_proc_register(struct udp_seq_afinfo *afinfo)
+{
+	struct proc_dir_entry *p;
+	int rc = 0;
+
+	if (!afinfo)
+		return -EINVAL;
+	afinfo->seq_fops->owner		= afinfo->owner;
+	afinfo->seq_fops->open		= udp_seq_open;
+	afinfo->seq_fops->read		= seq_read;
+	afinfo->seq_fops->llseek	= seq_lseek;
+	afinfo->seq_fops->release	= seq_release_private;
+
+	p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
+	if (p)
+		p->data = afinfo;
+	else
+		rc = -ENOMEM;
+	return rc;
+}
+
+void udp_proc_unregister(struct udp_seq_afinfo *afinfo)
+{
+	if (!afinfo)
+		return;
+	proc_net_remove(afinfo->name);
+	memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
+}
+
+/* ------------------------------------------------------------------------ */
+static void udp4_format_sock(struct sock *sp, char *tmpbuf, int bucket)
+{
+	struct inet_sock *inet = inet_sk(sp);
+	unsigned int dest = inet->daddr;
+	unsigned int src  = inet->rcv_saddr;
+	__u16 destp	  = ntohs(inet->dport);
+	__u16 srcp	  = ntohs(inet->sport);
+
+	sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
+		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p",
+		bucket, src, srcp, dest, destp, sp->sk_state, 
+		atomic_read(&sp->sk_wmem_alloc),
+		atomic_read(&sp->sk_rmem_alloc),
+		0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
+		atomic_read(&sp->sk_refcnt), sp);
+}
+
+static int udp4_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN)
+		seq_printf(seq, "%-127s\n",
+			   "  sl  local_address rem_address   st tx_queue "
+			   "rx_queue tr tm->when retrnsmt   uid  timeout "
+			   "inode");
+	else {
+		char tmpbuf[129];
+		struct udp_iter_state *state = seq->private;
+
+		udp4_format_sock(v, tmpbuf, state->bucket);
+		seq_printf(seq, "%-127s\n", tmpbuf);
+	}
+	return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+static struct file_operations udp4_seq_fops;
+static struct udp_seq_afinfo udp4_seq_afinfo = {
+	.owner		= THIS_MODULE,
+	.name		= "udp",
+	.family		= AF_INET,
+	.seq_show	= udp4_seq_show,
+	.seq_fops	= &udp4_seq_fops,
+};
+
+int __init udp4_proc_init(void)
+{
+	return udp_proc_register(&udp4_seq_afinfo);
+}
+
+void udp4_proc_exit(void)
+{
+	udp_proc_unregister(&udp4_seq_afinfo);
+}
+#endif /* CONFIG_PROC_FS */
+
+EXPORT_SYMBOL(udp_disconnect);
+EXPORT_SYMBOL(udp_hash);
+EXPORT_SYMBOL(udp_hash_lock);
+EXPORT_SYMBOL(udp_ioctl);
+EXPORT_SYMBOL(udp_port_rover);
+EXPORT_SYMBOL(udp_prot);
+EXPORT_SYMBOL(udp_sendmsg);
+EXPORT_SYMBOL(udp_poll);
+
+#ifdef CONFIG_PROC_FS
+EXPORT_SYMBOL(udp_proc_register);
+EXPORT_SYMBOL(udp_proc_unregister);
+#endif
diff --git a/net/ipv4/utils.c b/net/ipv4/utils.c
new file mode 100644
index 00000000000..6aecd7a4353
--- /dev/null
+++ b/net/ipv4/utils.c
@@ -0,0 +1,59 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Various kernel-resident INET utility functions; mainly
+ *		for format conversion and debugging output.
+ *
+ * Version:	$Id: utils.c,v 1.8 2000/10/03 07:29:01 anton Exp $
+ *
+ * Author:	Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *
+ * Fixes:
+ *		Alan Cox	:	verify_area check.
+ *		Alan Cox	:	removed old debugging.
+ *		Andi Kleen	:	add net_ratelimit()  
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+/*
+ *	Convert an ASCII string to binary IP. 
+ */
+ 
+__u32 in_aton(const char *str)
+{
+	unsigned long l;
+	unsigned int val;
+	int i;
+
+	l = 0;
+	for (i = 0; i < 4; i++) 
+	{
+		l <<= 8;
+		if (*str != '\0') 
+		{
+			val = 0;
+			while (*str != '\0' && *str != '.') 
+			{
+				val *= 10;
+				val += *str - '0';
+				str++;
+			}
+			l |= val;
+			if (*str != '\0') 
+				str++;
+		}
+	}
+	return(htonl(l));
+}
+
+EXPORT_SYMBOL(in_aton);
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
new file mode 100644
index 00000000000..2d3849c38a0
--- /dev/null
+++ b/net/ipv4/xfrm4_input.c
@@ -0,0 +1,160 @@
+/*
+ * xfrm4_input.c
+ *
+ * Changes:
+ *	YOSHIFUJI Hideaki @USAGI
+ *		Split up af-specific portion
+ *	Derek Atkins <derek@ihtfp.com>
+ *		Add Encapsulation support
+ * 	
+ */
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <net/inet_ecn.h>
+#include <net/ip.h>
+#include <net/xfrm.h>
+
+int xfrm4_rcv(struct sk_buff *skb)
+{
+	return xfrm4_rcv_encap(skb, 0);
+}
+
+EXPORT_SYMBOL(xfrm4_rcv);
+
+static inline void ipip_ecn_decapsulate(struct sk_buff *skb)
+{
+	struct iphdr *outer_iph = skb->nh.iph;
+	struct iphdr *inner_iph = skb->h.ipiph;
+
+	if (INET_ECN_is_ce(outer_iph->tos))
+		IP_ECN_set_ce(inner_iph);
+}
+
+static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq)
+{
+	switch (nexthdr) {
+	case IPPROTO_IPIP:
+		if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+			return -EINVAL;
+		*spi = skb->nh.iph->saddr;
+		*seq = 0;
+		return 0;
+	}
+
+	return xfrm_parse_spi(skb, nexthdr, spi, seq);
+}
+
+int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
+{
+	int err;
+	u32 spi, seq;
+	struct sec_decap_state xfrm_vec[XFRM_MAX_DEPTH];
+	struct xfrm_state *x;
+	int xfrm_nr = 0;
+	int decaps = 0;
+
+	if ((err = xfrm4_parse_spi(skb, skb->nh.iph->protocol, &spi, &seq)) != 0)
+		goto drop;
+
+	do {
+		struct iphdr *iph = skb->nh.iph;
+
+		if (xfrm_nr == XFRM_MAX_DEPTH)
+			goto drop;
+
+		x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi, iph->protocol, AF_INET);
+		if (x == NULL)
+			goto drop;
+
+		spin_lock(&x->lock);
+		if (unlikely(x->km.state != XFRM_STATE_VALID))
+			goto drop_unlock;
+
+		if (x->props.replay_window && xfrm_replay_check(x, seq))
+			goto drop_unlock;
+
+		if (xfrm_state_check_expire(x))
+			goto drop_unlock;
+
+		xfrm_vec[xfrm_nr].decap.decap_type = encap_type;
+		if (x->type->input(x, &(xfrm_vec[xfrm_nr].decap), skb))
+			goto drop_unlock;
+
+		/* only the first xfrm gets the encap type */
+		encap_type = 0;
+
+		if (x->props.replay_window)
+			xfrm_replay_advance(x, seq);
+
+		x->curlft.bytes += skb->len;
+		x->curlft.packets++;
+
+		spin_unlock(&x->lock);
+
+		xfrm_vec[xfrm_nr++].xvec = x;
+
+		iph = skb->nh.iph;
+
+		if (x->props.mode) {
+			if (iph->protocol != IPPROTO_IPIP)
+				goto drop;
+			if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+				goto drop;
+			if (skb_cloned(skb) &&
+			    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+				goto drop;
+			if (x->props.flags & XFRM_STATE_DECAP_DSCP)
+				ipv4_copy_dscp(iph, skb->h.ipiph);
+			if (!(x->props.flags & XFRM_STATE_NOECN))
+				ipip_ecn_decapsulate(skb);
+			skb->mac.raw = memmove(skb->data - skb->mac_len,
+					       skb->mac.raw, skb->mac_len);
+			skb->nh.raw = skb->data;
+			memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
+			decaps = 1;
+			break;
+		}
+
+		if ((err = xfrm_parse_spi(skb, skb->nh.iph->protocol, &spi, &seq)) < 0)
+			goto drop;
+	} while (!err);
+
+	/* Allocate new secpath or COW existing one. */
+
+	if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
+		struct sec_path *sp;
+		sp = secpath_dup(skb->sp);
+		if (!sp)
+			goto drop;
+		if (skb->sp)
+			secpath_put(skb->sp);
+		skb->sp = sp;
+	}
+	if (xfrm_nr + skb->sp->len > XFRM_MAX_DEPTH)
+		goto drop;
+
+	memcpy(skb->sp->x+skb->sp->len, xfrm_vec, xfrm_nr*sizeof(struct sec_decap_state));
+	skb->sp->len += xfrm_nr;
+
+	if (decaps) {
+		if (!(skb->dev->flags&IFF_LOOPBACK)) {
+			dst_release(skb->dst);
+			skb->dst = NULL;
+		}
+		netif_rx(skb);
+		return 0;
+	} else {
+		return -skb->nh.iph->protocol;
+	}
+
+drop_unlock:
+	spin_unlock(&x->lock);
+	xfrm_state_put(x);
+drop:
+	while (--xfrm_nr >= 0)
+		xfrm_state_put(xfrm_vec[xfrm_nr].xvec);
+
+	kfree_skb(skb);
+	return 0;
+}
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
new file mode 100644
index 00000000000..af2392ae576
--- /dev/null
+++ b/net/ipv4/xfrm4_output.c
@@ -0,0 +1,141 @@
+/*
+ * xfrm4_output.c - Common IPsec encapsulation code for IPv4.
+ * Copyright (c) 2004 Herbert Xu <herbert@gondor.apana.org.au>
+ * 
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <net/inet_ecn.h>
+#include <net/ip.h>
+#include <net/xfrm.h>
+#include <net/icmp.h>
+
+/* Add encapsulation header.
+ *
+ * In transport mode, the IP header will be moved forward to make space
+ * for the encapsulation header.
+ *
+ * In tunnel mode, the top IP header will be constructed per RFC 2401.
+ * The following fields in it shall be filled in by x->type->output:
+ *	tot_len
+ *	check
+ *
+ * On exit, skb->h will be set to the start of the payload to be processed
+ * by x->type->output and skb->nh will be set to the top IP header.
+ */
+static void xfrm4_encap(struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb->dst;
+	struct xfrm_state *x = dst->xfrm;
+	struct iphdr *iph, *top_iph;
+
+	iph = skb->nh.iph;
+	skb->h.ipiph = iph;
+
+	skb->nh.raw = skb_push(skb, x->props.header_len);
+	top_iph = skb->nh.iph;
+
+	if (!x->props.mode) {
+		skb->h.raw += iph->ihl*4;
+		memmove(top_iph, iph, iph->ihl*4);
+		return;
+	}
+
+	top_iph->ihl = 5;
+	top_iph->version = 4;
+
+	/* DS disclosed */
+	top_iph->tos = INET_ECN_encapsulate(iph->tos, iph->tos);
+	if (x->props.flags & XFRM_STATE_NOECN)
+		IP_ECN_clear(top_iph);
+
+	top_iph->frag_off = iph->frag_off & htons(IP_DF);
+	if (!top_iph->frag_off)
+		__ip_select_ident(top_iph, dst, 0);
+
+	top_iph->ttl = dst_metric(dst->child, RTAX_HOPLIMIT);
+
+	top_iph->saddr = x->props.saddr.a4;
+	top_iph->daddr = x->id.daddr.a4;
+	top_iph->protocol = IPPROTO_IPIP;
+
+	memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
+}
+
+static int xfrm4_tunnel_check_size(struct sk_buff *skb)
+{
+	int mtu, ret = 0;
+	struct dst_entry *dst;
+	struct iphdr *iph = skb->nh.iph;
+
+	if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE)
+		goto out;
+
+	IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE;
+	
+	if (!(iph->frag_off & htons(IP_DF)) || skb->local_df)
+		goto out;
+
+	dst = skb->dst;
+	mtu = dst_mtu(dst);
+	if (skb->len > mtu) {
+		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
+		ret = -EMSGSIZE;
+	}
+out:
+	return ret;
+}
+
+int xfrm4_output(struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb->dst;
+	struct xfrm_state *x = dst->xfrm;
+	int err;
+	
+	if (skb->ip_summed == CHECKSUM_HW) {
+		err = skb_checksum_help(skb, 0);
+		if (err)
+			goto error_nolock;
+	}
+
+	if (x->props.mode) {
+		err = xfrm4_tunnel_check_size(skb);
+		if (err)
+			goto error_nolock;
+	}
+
+	spin_lock_bh(&x->lock);
+	err = xfrm_state_check(x, skb);
+	if (err)
+		goto error;
+
+	xfrm4_encap(skb);
+
+	err = x->type->output(x, skb);
+	if (err)
+		goto error;
+
+	x->curlft.bytes += skb->len;
+	x->curlft.packets++;
+
+	spin_unlock_bh(&x->lock);
+	
+	if (!(skb->dst = dst_pop(dst))) {
+		err = -EHOSTUNREACH;
+		goto error_nolock;
+	}
+	err = NET_XMIT_BYPASS;
+
+out_exit:
+	return err;
+error:
+	spin_unlock_bh(&x->lock);
+error_nolock:
+	kfree_skb(skb);
+	goto out_exit;
+}
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
new file mode 100644
index 00000000000..7fe2afd2e66
--- /dev/null
+++ b/net/ipv4/xfrm4_policy.c
@@ -0,0 +1,281 @@
+/* 
+ * xfrm4_policy.c
+ *
+ * Changes:
+ *	Kazunori MIYAZAWA @USAGI
+ * 	YOSHIFUJI Hideaki @USAGI
+ *		Split up af-specific portion
+ * 	
+ */
+
+#include <linux/config.h>
+#include <net/xfrm.h>
+#include <net/ip.h>
+
+static struct dst_ops xfrm4_dst_ops;
+static struct xfrm_policy_afinfo xfrm4_policy_afinfo;
+
+static struct xfrm_type_map xfrm4_type_map = { .lock = RW_LOCK_UNLOCKED };
+
+static int xfrm4_dst_lookup(struct xfrm_dst **dst, struct flowi *fl)
+{
+	return __ip_route_output_key((struct rtable**)dst, fl);
+}
+
+static struct dst_entry *
+__xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
+{
+	struct dst_entry *dst;
+
+	read_lock_bh(&policy->lock);
+	for (dst = policy->bundles; dst; dst = dst->next) {
+		struct xfrm_dst *xdst = (struct xfrm_dst*)dst;
+		if (xdst->u.rt.fl.oif == fl->oif &&	/*XXX*/
+		    xdst->u.rt.fl.fl4_dst == fl->fl4_dst &&
+	    	    xdst->u.rt.fl.fl4_src == fl->fl4_src &&
+		    xfrm_bundle_ok(xdst, fl, AF_INET)) {
+			dst_clone(dst);
+			break;
+		}
+	}
+	read_unlock_bh(&policy->lock);
+	return dst;
+}
+
+/* Allocate chain of dst_entry's, attach known xfrm's, calculate
+ * all the metrics... Shortly, bundle a bundle.
+ */
+
+static int
+__xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
+		      struct flowi *fl, struct dst_entry **dst_p)
+{
+	struct dst_entry *dst, *dst_prev;
+	struct rtable *rt0 = (struct rtable*)(*dst_p);
+	struct rtable *rt = rt0;
+	u32 remote = fl->fl4_dst;
+	u32 local  = fl->fl4_src;
+	struct flowi fl_tunnel = {
+		.nl_u = {
+			.ip4_u = {
+				.saddr = local,
+				.daddr = remote
+			}
+		}
+	};
+	int i;
+	int err;
+	int header_len = 0;
+	int trailer_len = 0;
+
+	dst = dst_prev = NULL;
+	dst_hold(&rt->u.dst);
+
+	for (i = 0; i < nx; i++) {
+		struct dst_entry *dst1 = dst_alloc(&xfrm4_dst_ops);
+		struct xfrm_dst *xdst;
+		int tunnel = 0;
+
+		if (unlikely(dst1 == NULL)) {
+			err = -ENOBUFS;
+			dst_release(&rt->u.dst);
+			goto error;
+		}
+
+		if (!dst)
+			dst = dst1;
+		else {
+			dst_prev->child = dst1;
+			dst1->flags |= DST_NOHASH;
+			dst_clone(dst1);
+		}
+
+		xdst = (struct xfrm_dst *)dst1;
+		xdst->route = &rt->u.dst;
+
+		dst1->next = dst_prev;
+		dst_prev = dst1;
+		if (xfrm[i]->props.mode) {
+			remote = xfrm[i]->id.daddr.a4;
+			local  = xfrm[i]->props.saddr.a4;
+			tunnel = 1;
+		}
+		header_len += xfrm[i]->props.header_len;
+		trailer_len += xfrm[i]->props.trailer_len;
+
+		if (tunnel) {
+			fl_tunnel.fl4_src = local;
+			fl_tunnel.fl4_dst = remote;
+			err = xfrm_dst_lookup((struct xfrm_dst **)&rt,
+					      &fl_tunnel, AF_INET);
+			if (err)
+				goto error;
+		} else
+			dst_hold(&rt->u.dst);
+	}
+
+	dst_prev->child = &rt->u.dst;
+	dst->path = &rt->u.dst;
+
+	*dst_p = dst;
+	dst = dst_prev;
+
+	dst_prev = *dst_p;
+	i = 0;
+	for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) {
+		struct xfrm_dst *x = (struct xfrm_dst*)dst_prev;
+		x->u.rt.fl = *fl;
+
+		dst_prev->xfrm = xfrm[i++];
+		dst_prev->dev = rt->u.dst.dev;
+		if (rt->u.dst.dev)
+			dev_hold(rt->u.dst.dev);
+		dst_prev->obsolete	= -1;
+		dst_prev->flags	       |= DST_HOST;
+		dst_prev->lastuse	= jiffies;
+		dst_prev->header_len	= header_len;
+		dst_prev->trailer_len	= trailer_len;
+		memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics));
+
+		/* Copy neighbout for reachability confirmation */
+		dst_prev->neighbour	= neigh_clone(rt->u.dst.neighbour);
+		dst_prev->input		= rt->u.dst.input;
+		dst_prev->output	= xfrm4_output;
+		if (rt->peer)
+			atomic_inc(&rt->peer->refcnt);
+		x->u.rt.peer = rt->peer;
+		/* Sheit... I remember I did this right. Apparently,
+		 * it was magically lost, so this code needs audit */
+		x->u.rt.rt_flags = rt0->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL);
+		x->u.rt.rt_type = rt->rt_type;
+		x->u.rt.rt_src = rt0->rt_src;
+		x->u.rt.rt_dst = rt0->rt_dst;
+		x->u.rt.rt_gateway = rt->rt_gateway;
+		x->u.rt.rt_spec_dst = rt0->rt_spec_dst;
+		header_len -= x->u.dst.xfrm->props.header_len;
+		trailer_len -= x->u.dst.xfrm->props.trailer_len;
+	}
+
+	xfrm_init_pmtu(dst);
+	return 0;
+
+error:
+	if (dst)
+		dst_free(dst);
+	return err;
+}
+
+static void
+_decode_session4(struct sk_buff *skb, struct flowi *fl)
+{
+	struct iphdr *iph = skb->nh.iph;
+	u8 *xprth = skb->nh.raw + iph->ihl*4;
+
+	memset(fl, 0, sizeof(struct flowi));
+	if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) {
+		switch (iph->protocol) {
+		case IPPROTO_UDP:
+		case IPPROTO_TCP:
+		case IPPROTO_SCTP:
+			if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
+				u16 *ports = (u16 *)xprth;
+
+				fl->fl_ip_sport = ports[0];
+				fl->fl_ip_dport = ports[1];
+			}
+			break;
+
+		case IPPROTO_ICMP:
+			if (pskb_may_pull(skb, xprth + 2 - skb->data)) {
+				u8 *icmp = xprth;
+
+				fl->fl_icmp_type = icmp[0];
+				fl->fl_icmp_code = icmp[1];
+			}
+			break;
+
+		case IPPROTO_ESP:
+			if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
+				u32 *ehdr = (u32 *)xprth;
+
+				fl->fl_ipsec_spi = ehdr[0];
+			}
+			break;
+
+		case IPPROTO_AH:
+			if (pskb_may_pull(skb, xprth + 8 - skb->data)) {
+				u32 *ah_hdr = (u32*)xprth;
+
+				fl->fl_ipsec_spi = ah_hdr[1];
+			}
+			break;
+
+		case IPPROTO_COMP:
+			if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
+				u16 *ipcomp_hdr = (u16 *)xprth;
+
+				fl->fl_ipsec_spi = ntohl(ntohs(ipcomp_hdr[1]));
+			}
+			break;
+		default:
+			fl->fl_ipsec_spi = 0;
+			break;
+		};
+	}
+	fl->proto = iph->protocol;
+	fl->fl4_dst = iph->daddr;
+	fl->fl4_src = iph->saddr;
+}
+
+static inline int xfrm4_garbage_collect(void)
+{
+	read_lock(&xfrm4_policy_afinfo.lock);
+	xfrm4_policy_afinfo.garbage_collect();
+	read_unlock(&xfrm4_policy_afinfo.lock);
+	return (atomic_read(&xfrm4_dst_ops.entries) > xfrm4_dst_ops.gc_thresh*2);
+}
+
+static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu)
+{
+	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+	struct dst_entry *path = xdst->route;
+
+	path->ops->update_pmtu(path, mtu);
+}
+
+static struct dst_ops xfrm4_dst_ops = {
+	.family =		AF_INET,
+	.protocol =		__constant_htons(ETH_P_IP),
+	.gc =			xfrm4_garbage_collect,
+	.update_pmtu =		xfrm4_update_pmtu,
+	.gc_thresh =		1024,
+	.entry_size =		sizeof(struct xfrm_dst),
+};
+
+static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
+	.family = 		AF_INET,
+	.lock = 		RW_LOCK_UNLOCKED,
+	.type_map = 		&xfrm4_type_map,
+	.dst_ops =		&xfrm4_dst_ops,
+	.dst_lookup =		xfrm4_dst_lookup,
+	.find_bundle = 		__xfrm4_find_bundle,
+	.bundle_create =	__xfrm4_bundle_create,
+	.decode_session =	_decode_session4,
+};
+
+static void __init xfrm4_policy_init(void)
+{
+	xfrm_policy_register_afinfo(&xfrm4_policy_afinfo);
+}
+
+static void __exit xfrm4_policy_fini(void)
+{
+	xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo);
+}
+
+void __init xfrm4_init(void)
+{
+	xfrm4_state_init();
+	xfrm4_policy_init();
+}
+
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
new file mode 100644
index 00000000000..223a2e83853
--- /dev/null
+++ b/net/ipv4/xfrm4_state.c
@@ -0,0 +1,126 @@
+/*
+ * xfrm4_state.c
+ *
+ * Changes:
+ * 	YOSHIFUJI Hideaki @USAGI
+ * 		Split up af-specific portion
+ *
+ */
+
+#include <net/xfrm.h>
+#include <linux/pfkeyv2.h>
+#include <linux/ipsec.h>
+
+static struct xfrm_state_afinfo xfrm4_state_afinfo;
+
+static void
+__xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl,
+		     struct xfrm_tmpl *tmpl,
+		     xfrm_address_t *daddr, xfrm_address_t *saddr)
+{
+	x->sel.daddr.a4 = fl->fl4_dst;
+	x->sel.saddr.a4 = fl->fl4_src;
+	x->sel.dport = xfrm_flowi_dport(fl);
+	x->sel.dport_mask = ~0;
+	x->sel.sport = xfrm_flowi_sport(fl);
+	x->sel.sport_mask = ~0;
+	x->sel.prefixlen_d = 32;
+	x->sel.prefixlen_s = 32;
+	x->sel.proto = fl->proto;
+	x->sel.ifindex = fl->oif;
+	x->id = tmpl->id;
+	if (x->id.daddr.a4 == 0)
+		x->id.daddr.a4 = daddr->a4;
+	x->props.saddr = tmpl->saddr;
+	if (x->props.saddr.a4 == 0)
+		x->props.saddr.a4 = saddr->a4;
+	x->props.mode = tmpl->mode;
+	x->props.reqid = tmpl->reqid;
+	x->props.family = AF_INET;
+}
+
+static struct xfrm_state *
+__xfrm4_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto)
+{
+	unsigned h = __xfrm4_spi_hash(daddr, spi, proto);
+	struct xfrm_state *x;
+
+	list_for_each_entry(x, xfrm4_state_afinfo.state_byspi+h, byspi) {
+		if (x->props.family == AF_INET &&
+		    spi == x->id.spi &&
+		    daddr->a4 == x->id.daddr.a4 &&
+		    proto == x->id.proto) {
+			xfrm_state_hold(x);
+			return x;
+		}
+	}
+	return NULL;
+}
+
+static struct xfrm_state *
+__xfrm4_find_acq(u8 mode, u32 reqid, u8 proto, 
+		 xfrm_address_t *daddr, xfrm_address_t *saddr, 
+		 int create)
+{
+	struct xfrm_state *x, *x0;
+	unsigned h = __xfrm4_dst_hash(daddr);
+
+	x0 = NULL;
+
+	list_for_each_entry(x, xfrm4_state_afinfo.state_bydst+h, bydst) {
+		if (x->props.family == AF_INET &&
+		    daddr->a4 == x->id.daddr.a4 &&
+		    mode == x->props.mode &&
+		    proto == x->id.proto &&
+		    saddr->a4 == x->props.saddr.a4 &&
+		    reqid == x->props.reqid &&
+		    x->km.state == XFRM_STATE_ACQ &&
+		    !x->id.spi) {
+			    x0 = x;
+			    break;
+		    }
+	}
+	if (!x0 && create && (x0 = xfrm_state_alloc()) != NULL) {
+		x0->sel.daddr.a4 = daddr->a4;
+		x0->sel.saddr.a4 = saddr->a4;
+		x0->sel.prefixlen_d = 32;
+		x0->sel.prefixlen_s = 32;
+		x0->props.saddr.a4 = saddr->a4;
+		x0->km.state = XFRM_STATE_ACQ;
+		x0->id.daddr.a4 = daddr->a4;
+		x0->id.proto = proto;
+		x0->props.family = AF_INET;
+		x0->props.mode = mode;
+		x0->props.reqid = reqid;
+		x0->props.family = AF_INET;
+		x0->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
+		xfrm_state_hold(x0);
+		x0->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
+		add_timer(&x0->timer);
+		xfrm_state_hold(x0);
+		list_add_tail(&x0->bydst, xfrm4_state_afinfo.state_bydst+h);
+		wake_up(&km_waitq);
+	}
+	if (x0)
+		xfrm_state_hold(x0);
+	return x0;
+}
+
+static struct xfrm_state_afinfo xfrm4_state_afinfo = {
+	.family			= AF_INET,
+	.lock			= RW_LOCK_UNLOCKED,
+	.init_tempsel		= __xfrm4_init_tempsel,
+	.state_lookup		= __xfrm4_state_lookup,
+	.find_acq		= __xfrm4_find_acq,
+};
+
+void __init xfrm4_state_init(void)
+{
+	xfrm_state_register_afinfo(&xfrm4_state_afinfo);
+}
+
+void __exit xfrm4_state_fini(void)
+{
+	xfrm_state_unregister_afinfo(&xfrm4_state_afinfo);
+}
+
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
new file mode 100644
index 00000000000..413191f585f
--- /dev/null
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -0,0 +1,144 @@
+/* xfrm4_tunnel.c: Generic IP tunnel transformer.
+ *
+ * Copyright (C) 2003 David S. Miller (davem@redhat.com)
+ */
+
+#include <linux/skbuff.h>
+#include <linux/module.h>
+#include <net/xfrm.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+
+static int ipip_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct iphdr *iph;
+	
+	iph = skb->nh.iph;
+	iph->tot_len = htons(skb->len);
+	ip_send_check(iph);
+
+	return 0;
+}
+
+static int ipip_xfrm_rcv(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
+{
+	return 0;
+}
+
+static struct xfrm_tunnel *ipip_handler;
+static DECLARE_MUTEX(xfrm4_tunnel_sem);
+
+int xfrm4_tunnel_register(struct xfrm_tunnel *handler)
+{
+	int ret;
+
+	down(&xfrm4_tunnel_sem);
+	ret = 0;
+	if (ipip_handler != NULL)
+		ret = -EINVAL;
+	if (!ret)
+		ipip_handler = handler;
+	up(&xfrm4_tunnel_sem);
+
+	return ret;
+}
+
+EXPORT_SYMBOL(xfrm4_tunnel_register);
+
+int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler)
+{
+	int ret;
+
+	down(&xfrm4_tunnel_sem);
+	ret = 0;
+	if (ipip_handler != handler)
+		ret = -EINVAL;
+	if (!ret)
+		ipip_handler = NULL;
+	up(&xfrm4_tunnel_sem);
+
+	synchronize_net();
+
+	return ret;
+}
+
+EXPORT_SYMBOL(xfrm4_tunnel_deregister);
+
+static int ipip_rcv(struct sk_buff *skb)
+{
+	struct xfrm_tunnel *handler = ipip_handler;
+
+	/* Tunnel devices take precedence.  */
+	if (handler && handler->handler(skb) == 0)
+		return 0;
+
+	return xfrm4_rcv(skb);
+}
+
+static void ipip_err(struct sk_buff *skb, u32 info)
+{
+	struct xfrm_tunnel *handler = ipip_handler;
+	u32 arg = info;
+
+	if (handler)
+		handler->err_handler(skb, &arg);
+}
+
+static int ipip_init_state(struct xfrm_state *x, void *args)
+{
+	if (!x->props.mode)
+		return -EINVAL;
+
+	if (x->encap)
+		return -EINVAL;
+
+	x->props.header_len = sizeof(struct iphdr);
+
+	return 0;
+}
+
+static void ipip_destroy(struct xfrm_state *x)
+{
+}
+
+static struct xfrm_type ipip_type = {
+	.description	= "IPIP",
+	.owner		= THIS_MODULE,
+	.proto	     	= IPPROTO_IPIP,
+	.init_state	= ipip_init_state,
+	.destructor	= ipip_destroy,
+	.input		= ipip_xfrm_rcv,
+	.output		= ipip_output
+};
+
+static struct net_protocol ipip_protocol = {
+	.handler	=	ipip_rcv,
+	.err_handler	=	ipip_err,
+	.no_policy	=	1,
+};
+
+static int __init ipip_init(void)
+{
+	if (xfrm_register_type(&ipip_type, AF_INET) < 0) {
+		printk(KERN_INFO "ipip init: can't add xfrm type\n");
+		return -EAGAIN;
+	}
+	if (inet_add_protocol(&ipip_protocol, IPPROTO_IPIP) < 0) {
+		printk(KERN_INFO "ipip init: can't add protocol\n");
+		xfrm_unregister_type(&ipip_type, AF_INET);
+		return -EAGAIN;
+	}
+	return 0;
+}
+
+static void __exit ipip_fini(void)
+{
+	if (inet_del_protocol(&ipip_protocol, IPPROTO_IPIP) < 0)
+		printk(KERN_INFO "ipip close: can't remove protocol\n");
+	if (xfrm_unregister_type(&ipip_type, AF_INET) < 0)
+		printk(KERN_INFO "ipip close: can't remove xfrm type\n");
+}
+
+module_init(ipip_init);
+module_exit(ipip_fini);
+MODULE_LICENSE("GPL");
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
new file mode 100644
index 00000000000..e66ca9381cf
--- /dev/null
+++ b/net/ipv6/Kconfig
@@ -0,0 +1,79 @@
+#
+# IPv6 configuration
+# 
+config IPV6_PRIVACY
+	bool "IPv6: Privacy Extensions (RFC 3041) support"
+	depends on IPV6
+	---help---
+	  Privacy Extensions for Stateless Address Autoconfiguration in IPv6
+	  support.  With this option, additional periodically-alter 
+	  pseudo-random global-scope unicast address(es) will assigned to
+	  your interface(s).
+	
+	  By default, kernel do not generate temporary addresses.
+	  To use temporary addresses, do
+	
+	        echo 2 >/proc/sys/net/ipv6/conf/all/use_tempaddr 
+
+	  See <file:Documentation/networking/ip-sysctl.txt> for details.
+
+config INET6_AH
+	tristate "IPv6: AH transformation"
+	depends on IPV6
+	select XFRM
+	select CRYPTO
+	select CRYPTO_HMAC
+	select CRYPTO_MD5
+	select CRYPTO_SHA1
+	---help---
+	  Support for IPsec AH.
+
+	  If unsure, say Y.
+
+config INET6_ESP
+	tristate "IPv6: ESP transformation"
+	depends on IPV6
+	select XFRM
+	select CRYPTO
+	select CRYPTO_HMAC
+	select CRYPTO_MD5
+	select CRYPTO_SHA1
+	select CRYPTO_DES
+	---help---
+	  Support for IPsec ESP.
+
+	  If unsure, say Y.
+
+config INET6_IPCOMP
+	tristate "IPv6: IPComp transformation"
+	depends on IPV6
+	select XFRM
+	select INET6_TUNNEL
+	select CRYPTO
+	select CRYPTO_DEFLATE
+	---help---
+	  Support for IP Payload Compression Protocol (IPComp) (RFC3173),
+	  typically needed for IPsec.
+
+	  If unsure, say Y.
+
+config INET6_TUNNEL
+	tristate "IPv6: tunnel transformation"
+	depends on IPV6
+	select XFRM
+	---help---
+	  Support for generic IPv6-in-IPv6 tunnel transformation, which is
+	  required by the IPv6-in-IPv6 tunneling module as well as tunnel mode
+	  IPComp.
+	  
+	  If unsure, say Y.
+
+config IPV6_TUNNEL
+	tristate "IPv6: IPv6-in-IPv6 tunnel"
+	depends on IPV6
+	select INET6_TUNNEL
+	---help---
+	  Support for IPv6-in-IPv6 tunnels described in RFC 2473.
+
+	  If unsure, say N.
+
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
new file mode 100644
index 00000000000..b39e0494059
--- /dev/null
+++ b/net/ipv6/Makefile
@@ -0,0 +1,25 @@
+#
+# Makefile for the Linux TCP/IP (INET6) layer.
+#
+
+obj-$(CONFIG_IPV6) += ipv6.o
+
+ipv6-objs :=	af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \
+		route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o raw.o \
+		protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
+		exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \
+		ip6_flowlabel.o ipv6_syms.o
+
+ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
+	xfrm6_output.o
+ipv6-objs += $(ipv6-y)
+
+obj-$(CONFIG_INET6_AH) += ah6.o
+obj-$(CONFIG_INET6_ESP) += esp6.o
+obj-$(CONFIG_INET6_IPCOMP) += ipcomp6.o
+obj-$(CONFIG_INET6_TUNNEL) += xfrm6_tunnel.o 
+obj-$(CONFIG_NETFILTER)	+= netfilter/
+
+obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
+
+obj-y += exthdrs_core.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
new file mode 100644
index 00000000000..5ffde14ddc0
--- /dev/null
+++ b/net/ipv6/addrconf.c
@@ -0,0 +1,3615 @@
+/*
+ *	IPv6 Address [auto]configuration
+ *	Linux INET6 implementation
+ *
+ *	Authors:
+ *	Pedro Roque		<roque@di.fc.ul.pt>	
+ *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
+ *
+ *	$Id: addrconf.c,v 1.69 2001/10/31 21:55:54 davem Exp $
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+/*
+ *	Changes:
+ *
+ *	Janos Farkas			:	delete timer on ifdown
+ *	<chexum@bankinf.banki.hu>
+ *	Andi Kleen			:	kill double kfree on module
+ *						unload.
+ *	Maciej W. Rozycki		:	FDDI support
+ *	sekiya@USAGI			:	Don't send too many RS
+ *						packets.
+ *	yoshfuji@USAGI			:       Fixed interval between DAD
+ *						packets.
+ *	YOSHIFUJI Hideaki @USAGI	:	improved accuracy of
+ *						address validation timer.
+ *	YOSHIFUJI Hideaki @USAGI	:	Privacy Extensions (RFC3041)
+ *						support.
+ *	Yuji SEKIYA @USAGI		:	Don't assign a same IPv6
+ *						address on a same interface.
+ *	YOSHIFUJI Hideaki @USAGI	:	ARCnet support
+ *	YOSHIFUJI Hideaki @USAGI	:	convert /proc/net/if_inet6 to
+ *						seq_file.
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/sched.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/if_arcnet.h>
+#include <linux/if_infiniband.h>
+#include <linux/route.h>
+#include <linux/inetdevice.h>
+#include <linux/init.h>
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+#include <linux/delay.h>
+#include <linux/notifier.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/ndisc.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/tcp.h>
+#include <net/ip.h>
+#include <linux/if_tunnel.h>
+#include <linux/rtnetlink.h>
+
+#ifdef CONFIG_IPV6_PRIVACY
+#include <linux/random.h>
+#include <linux/crypto.h>
+#include <asm/scatterlist.h>
+#endif
+
+#include <asm/uaccess.h>
+
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+/* Set to 3 to get tracing... */
+#define ACONF_DEBUG 2
+
+#if ACONF_DEBUG >= 3
+#define ADBG(x) printk x
+#else
+#define ADBG(x)
+#endif
+
+#define	INFINITY_LIFE_TIME	0xFFFFFFFF
+#define TIME_DELTA(a,b) ((unsigned long)((long)(a) - (long)(b)))
+
+#ifdef CONFIG_SYSCTL
+static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p);
+static void addrconf_sysctl_unregister(struct ipv6_devconf *p);
+#endif
+
+#ifdef CONFIG_IPV6_PRIVACY
+static int __ipv6_regen_rndid(struct inet6_dev *idev);
+static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); 
+static void ipv6_regen_rndid(unsigned long data);
+
+static int desync_factor = MAX_DESYNC_FACTOR * HZ;
+static struct crypto_tfm *md5_tfm;
+static DEFINE_SPINLOCK(md5_tfm_lock);
+#endif
+
+static int ipv6_count_addresses(struct inet6_dev *idev);
+
+/*
+ *	Configured unicast address hash table
+ */
+static struct inet6_ifaddr		*inet6_addr_lst[IN6_ADDR_HSIZE];
+static DEFINE_RWLOCK(addrconf_hash_lock);
+
+/* Protects inet6 devices */
+DEFINE_RWLOCK(addrconf_lock);
+
+static void addrconf_verify(unsigned long);
+
+static struct timer_list addr_chk_timer =
+			TIMER_INITIALIZER(addrconf_verify, 0, 0);
+static DEFINE_SPINLOCK(addrconf_verify_lock);
+
+static void addrconf_join_anycast(struct inet6_ifaddr *ifp);
+static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
+
+static int addrconf_ifdown(struct net_device *dev, int how);
+
+static void addrconf_dad_start(struct inet6_ifaddr *ifp, int flags);
+static void addrconf_dad_timer(unsigned long data);
+static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
+static void addrconf_rs_timer(unsigned long data);
+static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
+static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
+
+static void inet6_prefix_notify(int event, struct inet6_dev *idev, 
+				struct prefix_info *pinfo);
+static int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev);
+
+static struct notifier_block *inet6addr_chain;
+
+struct ipv6_devconf ipv6_devconf = {
+	.forwarding		= 0,
+	.hop_limit		= IPV6_DEFAULT_HOPLIMIT,
+	.mtu6			= IPV6_MIN_MTU,
+	.accept_ra		= 1,
+	.accept_redirects	= 1,
+	.autoconf		= 1,
+	.force_mld_version	= 0,
+	.dad_transmits		= 1,
+	.rtr_solicits		= MAX_RTR_SOLICITATIONS,
+	.rtr_solicit_interval	= RTR_SOLICITATION_INTERVAL,
+	.rtr_solicit_delay	= MAX_RTR_SOLICITATION_DELAY,
+#ifdef CONFIG_IPV6_PRIVACY
+	.use_tempaddr 		= 0,
+	.temp_valid_lft		= TEMP_VALID_LIFETIME,
+	.temp_prefered_lft	= TEMP_PREFERRED_LIFETIME,
+	.regen_max_retry	= REGEN_MAX_RETRY,
+	.max_desync_factor	= MAX_DESYNC_FACTOR,
+#endif
+	.max_addresses		= IPV6_MAX_ADDRESSES,
+};
+
+static struct ipv6_devconf ipv6_devconf_dflt = {
+	.forwarding		= 0,
+	.hop_limit		= IPV6_DEFAULT_HOPLIMIT,
+	.mtu6			= IPV6_MIN_MTU,
+	.accept_ra		= 1,
+	.accept_redirects	= 1,
+	.autoconf		= 1,
+	.dad_transmits		= 1,
+	.rtr_solicits		= MAX_RTR_SOLICITATIONS,
+	.rtr_solicit_interval	= RTR_SOLICITATION_INTERVAL,
+	.rtr_solicit_delay	= MAX_RTR_SOLICITATION_DELAY,
+#ifdef CONFIG_IPV6_PRIVACY
+	.use_tempaddr		= 0,
+	.temp_valid_lft		= TEMP_VALID_LIFETIME,
+	.temp_prefered_lft	= TEMP_PREFERRED_LIFETIME,
+	.regen_max_retry	= REGEN_MAX_RETRY,
+	.max_desync_factor	= MAX_DESYNC_FACTOR,
+#endif
+	.max_addresses		= IPV6_MAX_ADDRESSES,
+};
+
+/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
+#if 0
+const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
+#endif
+const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
+
+int ipv6_addr_type(const struct in6_addr *addr)
+{
+	int type;
+	u32 st;
+
+	st = addr->s6_addr32[0];
+
+	if ((st & htonl(0xFF000000)) == htonl(0xFF000000)) {
+		type = IPV6_ADDR_MULTICAST;
+
+		switch((st & htonl(0x00FF0000))) {
+			case __constant_htonl(0x00010000):
+				type |= IPV6_ADDR_LOOPBACK;
+				break;
+
+			case __constant_htonl(0x00020000):
+				type |= IPV6_ADDR_LINKLOCAL;
+				break;
+
+			case __constant_htonl(0x00050000):
+				type |= IPV6_ADDR_SITELOCAL;
+				break;
+		};
+		return type;
+	}
+
+	type = IPV6_ADDR_UNICAST;
+
+	/* Consider all addresses with the first three bits different of
+	   000 and 111 as finished.
+	 */
+	if ((st & htonl(0xE0000000)) != htonl(0x00000000) &&
+	    (st & htonl(0xE0000000)) != htonl(0xE0000000))
+		return type;
+	
+	if ((st & htonl(0xFFC00000)) == htonl(0xFE800000))
+		return (IPV6_ADDR_LINKLOCAL | type);
+
+	if ((st & htonl(0xFFC00000)) == htonl(0xFEC00000))
+		return (IPV6_ADDR_SITELOCAL | type);
+
+	if ((addr->s6_addr32[0] | addr->s6_addr32[1]) == 0) {
+		if (addr->s6_addr32[2] == 0) {
+			if (addr->s6_addr32[3] == 0)
+				return IPV6_ADDR_ANY;
+
+			if (addr->s6_addr32[3] == htonl(0x00000001))
+				return (IPV6_ADDR_LOOPBACK | type);
+
+			return (IPV6_ADDR_COMPATv4 | type);
+		}
+
+		if (addr->s6_addr32[2] == htonl(0x0000ffff))
+			return IPV6_ADDR_MAPPED;
+	}
+
+	st &= htonl(0xFF000000);
+	if (st == 0)
+		return IPV6_ADDR_RESERVED;
+	st &= htonl(0xFE000000);
+	if (st == htonl(0x02000000))
+		return IPV6_ADDR_RESERVED;	/* for NSAP */
+	if (st == htonl(0x04000000))
+		return IPV6_ADDR_RESERVED;	/* for IPX */
+	return type;
+}
+
+static void addrconf_del_timer(struct inet6_ifaddr *ifp)
+{
+	if (del_timer(&ifp->timer))
+		__in6_ifa_put(ifp);
+}
+
+enum addrconf_timer_t
+{
+	AC_NONE,
+	AC_DAD,
+	AC_RS,
+};
+
+static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
+			       enum addrconf_timer_t what,
+			       unsigned long when)
+{
+	if (!del_timer(&ifp->timer))
+		in6_ifa_hold(ifp);
+
+	switch (what) {
+	case AC_DAD:
+		ifp->timer.function = addrconf_dad_timer;
+		break;
+	case AC_RS:
+		ifp->timer.function = addrconf_rs_timer;
+		break;
+	default:;
+	}
+	ifp->timer.expires = jiffies + when;
+	add_timer(&ifp->timer);
+}
+
+/* Nobody refers to this device, we may destroy it. */
+
+void in6_dev_finish_destroy(struct inet6_dev *idev)
+{
+	struct net_device *dev = idev->dev;
+	BUG_TRAP(idev->addr_list==NULL);
+	BUG_TRAP(idev->mc_list==NULL);
+#ifdef NET_REFCNT_DEBUG
+	printk(KERN_DEBUG "in6_dev_finish_destroy: %s\n", dev ? dev->name : "NIL");
+#endif
+	dev_put(dev);
+	if (!idev->dead) {
+		printk("Freeing alive inet6 device %p\n", idev);
+		return;
+	}
+	snmp6_free_dev(idev);
+	kfree(idev);
+}
+
+static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
+{
+	struct inet6_dev *ndev;
+
+	ASSERT_RTNL();
+
+	if (dev->mtu < IPV6_MIN_MTU)
+		return NULL;
+
+	ndev = kmalloc(sizeof(struct inet6_dev), GFP_KERNEL);
+
+	if (ndev) {
+		memset(ndev, 0, sizeof(struct inet6_dev));
+
+		rwlock_init(&ndev->lock);
+		ndev->dev = dev;
+		memcpy(&ndev->cnf, &ipv6_devconf_dflt, sizeof(ndev->cnf));
+		ndev->cnf.mtu6 = dev->mtu;
+		ndev->cnf.sysctl = NULL;
+		ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl);
+		if (ndev->nd_parms == NULL) {
+			kfree(ndev);
+			return NULL;
+		}
+		/* We refer to the device */
+		dev_hold(dev);
+
+		if (snmp6_alloc_dev(ndev) < 0) {
+			ADBG((KERN_WARNING
+				"%s(): cannot allocate memory for statistics; dev=%s.\n",
+				__FUNCTION__, dev->name));
+			neigh_parms_release(&nd_tbl, ndev->nd_parms);
+			ndev->dead = 1;
+			in6_dev_finish_destroy(ndev);
+			return NULL;
+		}
+
+		if (snmp6_register_dev(ndev) < 0) {
+			ADBG((KERN_WARNING
+				"%s(): cannot create /proc/net/dev_snmp6/%s\n",
+				__FUNCTION__, dev->name));
+			neigh_parms_release(&nd_tbl, ndev->nd_parms);
+			ndev->dead = 1;
+			in6_dev_finish_destroy(ndev);
+			return NULL;
+		}
+
+		/* One reference from device.  We must do this before
+		 * we invoke __ipv6_regen_rndid().
+		 */
+		in6_dev_hold(ndev);
+
+#ifdef CONFIG_IPV6_PRIVACY
+		get_random_bytes(ndev->rndid, sizeof(ndev->rndid));
+		get_random_bytes(ndev->entropy, sizeof(ndev->entropy));
+		init_timer(&ndev->regen_timer);
+		ndev->regen_timer.function = ipv6_regen_rndid;
+		ndev->regen_timer.data = (unsigned long) ndev;
+		if ((dev->flags&IFF_LOOPBACK) ||
+		    dev->type == ARPHRD_TUNNEL ||
+		    dev->type == ARPHRD_SIT) {
+			printk(KERN_INFO
+				"Disabled Privacy Extensions on device %p(%s)\n",
+				dev, dev->name);
+			ndev->cnf.use_tempaddr = -1;
+		} else {
+			in6_dev_hold(ndev);
+			ipv6_regen_rndid((unsigned long) ndev);
+		}
+#endif
+
+		write_lock_bh(&addrconf_lock);
+		dev->ip6_ptr = ndev;
+		write_unlock_bh(&addrconf_lock);
+
+		ipv6_mc_init_dev(ndev);
+		ndev->tstamp = jiffies;
+#ifdef CONFIG_SYSCTL
+		neigh_sysctl_register(dev, ndev->nd_parms, NET_IPV6, 
+				      NET_IPV6_NEIGH, "ipv6",
+				      &ndisc_ifinfo_sysctl_change,
+				      NULL);
+		addrconf_sysctl_register(ndev, &ndev->cnf);
+#endif
+	}
+	return ndev;
+}
+
+static struct inet6_dev * ipv6_find_idev(struct net_device *dev)
+{
+	struct inet6_dev *idev;
+
+	ASSERT_RTNL();
+
+	if ((idev = __in6_dev_get(dev)) == NULL) {
+		if ((idev = ipv6_add_dev(dev)) == NULL)
+			return NULL;
+	}
+	if (dev->flags&IFF_UP)
+		ipv6_mc_up(idev);
+	return idev;
+}
+
+#ifdef CONFIG_SYSCTL
+static void dev_forward_change(struct inet6_dev *idev)
+{
+	struct net_device *dev;
+	struct inet6_ifaddr *ifa;
+	struct in6_addr addr;
+
+	if (!idev)
+		return;
+	dev = idev->dev;
+	if (dev && (dev->flags & IFF_MULTICAST)) {
+		ipv6_addr_all_routers(&addr);
+	
+		if (idev->cnf.forwarding)
+			ipv6_dev_mc_inc(dev, &addr);
+		else
+			ipv6_dev_mc_dec(dev, &addr);
+	}
+	for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) {
+		if (idev->cnf.forwarding)
+			addrconf_join_anycast(ifa);
+		else
+			addrconf_leave_anycast(ifa);
+	}
+}
+
+
+static void addrconf_forward_change(void)
+{
+	struct net_device *dev;
+	struct inet6_dev *idev;
+
+	read_lock(&dev_base_lock);
+	for (dev=dev_base; dev; dev=dev->next) {
+		read_lock(&addrconf_lock);
+		idev = __in6_dev_get(dev);
+		if (idev) {
+			int changed = (!idev->cnf.forwarding) ^ (!ipv6_devconf.forwarding);
+			idev->cnf.forwarding = ipv6_devconf.forwarding;
+			if (changed)
+				dev_forward_change(idev);
+		}
+		read_unlock(&addrconf_lock);
+	}
+	read_unlock(&dev_base_lock);
+}
+#endif
+
+/* Nobody refers to this ifaddr, destroy it */
+
+void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
+{
+	BUG_TRAP(ifp->if_next==NULL);
+	BUG_TRAP(ifp->lst_next==NULL);
+#ifdef NET_REFCNT_DEBUG
+	printk(KERN_DEBUG "inet6_ifa_finish_destroy\n");
+#endif
+
+	in6_dev_put(ifp->idev);
+
+	if (del_timer(&ifp->timer))
+		printk("Timer is still running, when freeing ifa=%p\n", ifp);
+
+	if (!ifp->dead) {
+		printk("Freeing alive inet6 address %p\n", ifp);
+		return;
+	}
+	dst_release(&ifp->rt->u.dst);
+
+	kfree(ifp);
+}
+
+/* On success it returns ifp with increased reference count */
+
+static struct inet6_ifaddr *
+ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
+	      int scope, unsigned flags)
+{
+	struct inet6_ifaddr *ifa = NULL;
+	struct rt6_info *rt;
+	int hash;
+	int err = 0;
+
+	read_lock_bh(&addrconf_lock);
+	if (idev->dead) {
+		err = -ENODEV;			/*XXX*/
+		goto out2;
+	}
+
+	write_lock(&addrconf_hash_lock);
+
+	/* Ignore adding duplicate addresses on an interface */
+	if (ipv6_chk_same_addr(addr, idev->dev)) {
+		ADBG(("ipv6_add_addr: already assigned\n"));
+		err = -EEXIST;
+		goto out;
+	}
+
+	ifa = kmalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC);
+
+	if (ifa == NULL) {
+		ADBG(("ipv6_add_addr: malloc failed\n"));
+		err = -ENOBUFS;
+		goto out;
+	}
+
+	rt = addrconf_dst_alloc(idev, addr, 0);
+	if (IS_ERR(rt)) {
+		err = PTR_ERR(rt);
+		goto out;
+	}
+
+	memset(ifa, 0, sizeof(struct inet6_ifaddr));
+	ipv6_addr_copy(&ifa->addr, addr);
+
+	spin_lock_init(&ifa->lock);
+	init_timer(&ifa->timer);
+	ifa->timer.data = (unsigned long) ifa;
+	ifa->scope = scope;
+	ifa->prefix_len = pfxlen;
+	ifa->flags = flags | IFA_F_TENTATIVE;
+	ifa->cstamp = ifa->tstamp = jiffies;
+
+	ifa->idev = idev;
+	in6_dev_hold(idev);
+	/* For caller */
+	in6_ifa_hold(ifa);
+
+	/* Add to big hash table */
+	hash = ipv6_addr_hash(addr);
+
+	ifa->lst_next = inet6_addr_lst[hash];
+	inet6_addr_lst[hash] = ifa;
+	in6_ifa_hold(ifa);
+	write_unlock(&addrconf_hash_lock);
+
+	write_lock(&idev->lock);
+	/* Add to inet6_dev unicast addr list. */
+	ifa->if_next = idev->addr_list;
+	idev->addr_list = ifa;
+
+#ifdef CONFIG_IPV6_PRIVACY
+	if (ifa->flags&IFA_F_TEMPORARY) {
+		ifa->tmp_next = idev->tempaddr_list;
+		idev->tempaddr_list = ifa;
+		in6_ifa_hold(ifa);
+	}
+#endif
+
+	ifa->rt = rt;
+
+	in6_ifa_hold(ifa);
+	write_unlock(&idev->lock);
+out2:
+	read_unlock_bh(&addrconf_lock);
+
+	if (unlikely(err == 0))
+		notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa);
+	else {
+		kfree(ifa);
+		ifa = ERR_PTR(err);
+	}
+
+	return ifa;
+out:
+	write_unlock(&addrconf_hash_lock);
+	goto out2;
+}
+
+/* This function wants to get referenced ifp and releases it before return */
+
+static void ipv6_del_addr(struct inet6_ifaddr *ifp)
+{
+	struct inet6_ifaddr *ifa, **ifap;
+	struct inet6_dev *idev = ifp->idev;
+	int hash;
+	int deleted = 0, onlink = 0;
+	unsigned long expires = jiffies;
+
+	hash = ipv6_addr_hash(&ifp->addr);
+
+	ifp->dead = 1;
+
+	write_lock_bh(&addrconf_hash_lock);
+	for (ifap = &inet6_addr_lst[hash]; (ifa=*ifap) != NULL;
+	     ifap = &ifa->lst_next) {
+		if (ifa == ifp) {
+			*ifap = ifa->lst_next;
+			__in6_ifa_put(ifp);
+			ifa->lst_next = NULL;
+			break;
+		}
+	}
+	write_unlock_bh(&addrconf_hash_lock);
+
+	write_lock_bh(&idev->lock);
+#ifdef CONFIG_IPV6_PRIVACY
+	if (ifp->flags&IFA_F_TEMPORARY) {
+		for (ifap = &idev->tempaddr_list; (ifa=*ifap) != NULL;
+		     ifap = &ifa->tmp_next) {
+			if (ifa == ifp) {
+				*ifap = ifa->tmp_next;
+				if (ifp->ifpub) {
+					in6_ifa_put(ifp->ifpub);
+					ifp->ifpub = NULL;
+				}
+				__in6_ifa_put(ifp);
+				ifa->tmp_next = NULL;
+				break;
+			}
+		}
+	}
+#endif
+
+	for (ifap = &idev->addr_list; (ifa=*ifap) != NULL;
+	     ifap = &ifa->if_next) {
+		if (ifa == ifp) {
+			*ifap = ifa->if_next;
+			__in6_ifa_put(ifp);
+			ifa->if_next = NULL;
+			if (!(ifp->flags & IFA_F_PERMANENT) || onlink > 0)
+				break;
+			deleted = 1;
+		} else if (ifp->flags & IFA_F_PERMANENT) {
+			if (ipv6_prefix_equal(&ifa->addr, &ifp->addr,
+					      ifp->prefix_len)) {
+				if (ifa->flags & IFA_F_PERMANENT) {
+					onlink = 1;
+					if (deleted)
+						break;
+				} else {
+					unsigned long lifetime;
+
+					if (!onlink)
+						onlink = -1;
+
+					spin_lock(&ifa->lock);
+					lifetime = min_t(unsigned long,
+							 ifa->valid_lft, 0x7fffffffUL/HZ);
+					if (time_before(expires,
+							ifa->tstamp + lifetime * HZ))
+						expires = ifa->tstamp + lifetime * HZ;
+					spin_unlock(&ifa->lock);
+				}
+			}
+		}
+	}
+	write_unlock_bh(&idev->lock);
+
+	ipv6_ifa_notify(RTM_DELADDR, ifp);
+
+	notifier_call_chain(&inet6addr_chain,NETDEV_DOWN,ifp);
+
+	addrconf_del_timer(ifp);
+
+	/*
+	 * Purge or update corresponding prefix
+	 *
+	 * 1) we don't purge prefix here if address was not permanent.
+	 *    prefix is managed by its own lifetime.
+	 * 2) if there're no addresses, delete prefix.
+	 * 3) if there're still other permanent address(es),
+	 *    corresponding prefix is still permanent.
+	 * 4) otherwise, update prefix lifetime to the
+	 *    longest valid lifetime among the corresponding
+	 *    addresses on the device.
+	 *    Note: subsequent RA will update lifetime.
+	 *
+	 * --yoshfuji
+	 */
+	if ((ifp->flags & IFA_F_PERMANENT) && onlink < 1) {
+		struct in6_addr prefix;
+		struct rt6_info *rt;
+
+		ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len);
+		rt = rt6_lookup(&prefix, NULL, ifp->idev->dev->ifindex, 1);
+
+		if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
+			if (onlink == 0) {
+				ip6_del_rt(rt, NULL, NULL);
+				rt = NULL;
+			} else if (!(rt->rt6i_flags & RTF_EXPIRES)) {
+				rt->rt6i_expires = expires;
+				rt->rt6i_flags |= RTF_EXPIRES;
+			}
+		}
+		dst_release(&rt->u.dst);
+	}
+
+	in6_ifa_put(ifp);
+}
+
+#ifdef CONFIG_IPV6_PRIVACY
+static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *ift)
+{
+	struct inet6_dev *idev = ifp->idev;
+	struct in6_addr addr, *tmpaddr;
+	unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_cstamp, tmp_tstamp;
+	int tmp_plen;
+	int ret = 0;
+	int max_addresses;
+
+	write_lock(&idev->lock);
+	if (ift) {
+		spin_lock_bh(&ift->lock);
+		memcpy(&addr.s6_addr[8], &ift->addr.s6_addr[8], 8);
+		spin_unlock_bh(&ift->lock);
+		tmpaddr = &addr;
+	} else {
+		tmpaddr = NULL;
+	}
+retry:
+	in6_dev_hold(idev);
+	if (idev->cnf.use_tempaddr <= 0) {
+		write_unlock(&idev->lock);
+		printk(KERN_INFO
+			"ipv6_create_tempaddr(): use_tempaddr is disabled.\n");
+		in6_dev_put(idev);
+		ret = -1;
+		goto out;
+	}
+	spin_lock_bh(&ifp->lock);
+	if (ifp->regen_count++ >= idev->cnf.regen_max_retry) {
+		idev->cnf.use_tempaddr = -1;	/*XXX*/
+		spin_unlock_bh(&ifp->lock);
+		write_unlock(&idev->lock);
+		printk(KERN_WARNING
+			"ipv6_create_tempaddr(): regeneration time exceeded. disabled temporary address support.\n");
+		in6_dev_put(idev);
+		ret = -1;
+		goto out;
+	}
+	in6_ifa_hold(ifp);
+	memcpy(addr.s6_addr, ifp->addr.s6_addr, 8);
+	if (__ipv6_try_regen_rndid(idev, tmpaddr) < 0) {
+		spin_unlock_bh(&ifp->lock);
+		write_unlock(&idev->lock);
+		printk(KERN_WARNING
+			"ipv6_create_tempaddr(): regeneration of randomized interface id failed.\n");
+		in6_ifa_put(ifp);
+		in6_dev_put(idev);
+		ret = -1;
+		goto out;
+	}
+	memcpy(&addr.s6_addr[8], idev->rndid, 8);
+	tmp_valid_lft = min_t(__u32,
+			      ifp->valid_lft,
+			      idev->cnf.temp_valid_lft);
+	tmp_prefered_lft = min_t(__u32, 
+				 ifp->prefered_lft, 
+				 idev->cnf.temp_prefered_lft - desync_factor / HZ);
+	tmp_plen = ifp->prefix_len;
+	max_addresses = idev->cnf.max_addresses;
+	tmp_cstamp = ifp->cstamp;
+	tmp_tstamp = ifp->tstamp;
+	spin_unlock_bh(&ifp->lock);
+
+	write_unlock(&idev->lock);
+	ift = !max_addresses ||
+	      ipv6_count_addresses(idev) < max_addresses ? 
+		ipv6_add_addr(idev, &addr, tmp_plen,
+			      ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK, IFA_F_TEMPORARY) : NULL;
+	if (!ift || IS_ERR(ift)) {
+		in6_ifa_put(ifp);
+		in6_dev_put(idev);
+		printk(KERN_INFO
+			"ipv6_create_tempaddr(): retry temporary address regeneration.\n");
+		tmpaddr = &addr;
+		write_lock(&idev->lock);
+		goto retry;
+	}
+
+	spin_lock_bh(&ift->lock);
+	ift->ifpub = ifp;
+	ift->valid_lft = tmp_valid_lft;
+	ift->prefered_lft = tmp_prefered_lft;
+	ift->cstamp = tmp_cstamp;
+	ift->tstamp = tmp_tstamp;
+	spin_unlock_bh(&ift->lock);
+
+	addrconf_dad_start(ift, 0);
+	in6_ifa_put(ift);
+	in6_dev_put(idev);
+out:
+	return ret;
+}
+#endif
+
+/*
+ *	Choose an appropriate source address
+ *	should do:
+ *	i)	get an address with an appropriate scope
+ *	ii)	see if there is a specific route for the destination and use
+ *		an address of the attached interface 
+ *	iii)	don't use deprecated addresses
+ */
+static int inline ipv6_saddr_pref(const struct inet6_ifaddr *ifp, u8 invpref)
+{
+	int pref;
+	pref = ifp->flags&IFA_F_DEPRECATED ? 0 : 2;
+#ifdef CONFIG_IPV6_PRIVACY
+	pref |= (ifp->flags^invpref)&IFA_F_TEMPORARY ? 0 : 1;
+#endif
+	return pref;
+}
+
+#ifdef CONFIG_IPV6_PRIVACY
+#define IPV6_GET_SADDR_MAXSCORE(score)	((score) == 3)
+#else
+#define IPV6_GET_SADDR_MAXSCORE(score)	(score)
+#endif
+
+int ipv6_dev_get_saddr(struct net_device *dev,
+		       struct in6_addr *daddr, struct in6_addr *saddr)
+{
+	struct inet6_ifaddr *ifp = NULL;
+	struct inet6_ifaddr *match = NULL;
+	struct inet6_dev *idev;
+	int scope;
+	int err;
+	int hiscore = -1, score;
+
+	scope = ipv6_addr_scope(daddr);
+
+	/*
+	 *	known dev
+	 *	search dev and walk through dev addresses
+	 */
+
+	if (dev) {
+		if (dev->flags & IFF_LOOPBACK)
+			scope = IFA_HOST;
+
+		read_lock(&addrconf_lock);
+		idev = __in6_dev_get(dev);
+		if (idev) {
+			read_lock_bh(&idev->lock);
+			for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
+				if (ifp->scope == scope) {
+					if (ifp->flags&IFA_F_TENTATIVE)
+						continue;
+#ifdef CONFIG_IPV6_PRIVACY
+					score = ipv6_saddr_pref(ifp, idev->cnf.use_tempaddr > 1 ? IFA_F_TEMPORARY : 0);
+#else
+					score = ipv6_saddr_pref(ifp, 0);
+#endif
+					if (score <= hiscore)
+						continue;
+
+					if (match)
+						in6_ifa_put(match);
+					match = ifp;
+					hiscore = score;
+					in6_ifa_hold(ifp);
+
+					if (IPV6_GET_SADDR_MAXSCORE(score)) {
+						read_unlock_bh(&idev->lock);
+						read_unlock(&addrconf_lock);
+						goto out;
+					}
+				}
+			}
+			read_unlock_bh(&idev->lock);
+		}
+		read_unlock(&addrconf_lock);
+	}
+
+	if (scope == IFA_LINK)
+		goto out;
+
+	/*
+	 *	dev == NULL or search failed for specified dev
+	 */
+
+	read_lock(&dev_base_lock);
+	read_lock(&addrconf_lock);
+	for (dev = dev_base; dev; dev=dev->next) {
+		idev = __in6_dev_get(dev);
+		if (idev) {
+			read_lock_bh(&idev->lock);
+			for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
+				if (ifp->scope == scope) {
+					if (ifp->flags&IFA_F_TENTATIVE)
+						continue;
+#ifdef CONFIG_IPV6_PRIVACY
+					score = ipv6_saddr_pref(ifp, idev->cnf.use_tempaddr > 1 ? IFA_F_TEMPORARY : 0);
+#else
+					score = ipv6_saddr_pref(ifp, 0);
+#endif
+					if (score <= hiscore)
+						continue;
+
+					if (match)
+						in6_ifa_put(match);
+					match = ifp;
+					hiscore = score;
+					in6_ifa_hold(ifp);
+
+					if (IPV6_GET_SADDR_MAXSCORE(score)) {
+						read_unlock_bh(&idev->lock);
+						goto out_unlock_base;
+					}
+				}
+			}
+			read_unlock_bh(&idev->lock);
+		}
+	}
+
+out_unlock_base:
+	read_unlock(&addrconf_lock);
+	read_unlock(&dev_base_lock);
+
+out:
+	err = -EADDRNOTAVAIL;
+	if (match) {
+		ipv6_addr_copy(saddr, &match->addr);
+		err = 0;
+		in6_ifa_put(match);
+	}
+
+	return err;
+}
+
+
+int ipv6_get_saddr(struct dst_entry *dst,
+		   struct in6_addr *daddr, struct in6_addr *saddr)
+{
+	return ipv6_dev_get_saddr(dst ? ((struct rt6_info *)dst)->rt6i_idev->dev : NULL, daddr, saddr);
+}
+
+
+int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr)
+{
+	struct inet6_dev *idev;
+	int err = -EADDRNOTAVAIL;
+
+	read_lock(&addrconf_lock);
+	if ((idev = __in6_dev_get(dev)) != NULL) {
+		struct inet6_ifaddr *ifp;
+
+		read_lock_bh(&idev->lock);
+		for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
+			if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) {
+				ipv6_addr_copy(addr, &ifp->addr);
+				err = 0;
+				break;
+			}
+		}
+		read_unlock_bh(&idev->lock);
+	}
+	read_unlock(&addrconf_lock);
+	return err;
+}
+
+static int ipv6_count_addresses(struct inet6_dev *idev)
+{
+	int cnt = 0;
+	struct inet6_ifaddr *ifp;
+
+	read_lock_bh(&idev->lock);
+	for (ifp=idev->addr_list; ifp; ifp=ifp->if_next)
+		cnt++;
+	read_unlock_bh(&idev->lock);
+	return cnt;
+}
+
+int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict)
+{
+	struct inet6_ifaddr * ifp;
+	u8 hash = ipv6_addr_hash(addr);
+
+	read_lock_bh(&addrconf_hash_lock);
+	for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
+		if (ipv6_addr_equal(&ifp->addr, addr) &&
+		    !(ifp->flags&IFA_F_TENTATIVE)) {
+			if (dev == NULL || ifp->idev->dev == dev ||
+			    !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))
+				break;
+		}
+	}
+	read_unlock_bh(&addrconf_hash_lock);
+	return ifp != NULL;
+}
+
+static
+int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev)
+{
+	struct inet6_ifaddr * ifp;
+	u8 hash = ipv6_addr_hash(addr);
+
+	for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
+		if (ipv6_addr_equal(&ifp->addr, addr)) {
+			if (dev == NULL || ifp->idev->dev == dev)
+				break;
+		}
+	}
+	return ifp != NULL;
+}
+
+struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device *dev, int strict)
+{
+	struct inet6_ifaddr * ifp;
+	u8 hash = ipv6_addr_hash(addr);
+
+	read_lock_bh(&addrconf_hash_lock);
+	for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
+		if (ipv6_addr_equal(&ifp->addr, addr)) {
+			if (dev == NULL || ifp->idev->dev == dev ||
+			    !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) {
+				in6_ifa_hold(ifp);
+				break;
+			}
+		}
+	}
+	read_unlock_bh(&addrconf_hash_lock);
+
+	return ifp;
+}
+
+int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
+{
+	const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
+	const struct in6_addr *sk2_rcv_saddr6 = tcp_v6_rcv_saddr(sk2);
+	u32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr;
+	u32 sk2_rcv_saddr = tcp_v4_rcv_saddr(sk2);
+	int sk_ipv6only = ipv6_only_sock(sk);
+	int sk2_ipv6only = tcp_v6_ipv6only(sk2);
+	int addr_type = ipv6_addr_type(sk_rcv_saddr6);
+	int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
+
+	if (!sk2_rcv_saddr && !sk_ipv6only)
+		return 1;
+
+	if (addr_type2 == IPV6_ADDR_ANY &&
+	    !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
+		return 1;
+
+	if (addr_type == IPV6_ADDR_ANY &&
+	    !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
+		return 1;
+
+	if (sk2_rcv_saddr6 &&
+	    ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6))
+		return 1;
+
+	if (addr_type == IPV6_ADDR_MAPPED &&
+	    !sk2_ipv6only &&
+	    (!sk2_rcv_saddr || !sk_rcv_saddr || sk_rcv_saddr == sk2_rcv_saddr))
+		return 1;
+
+	return 0;
+}
+
+/* Gets referenced address, destroys ifaddr */
+
+void addrconf_dad_failure(struct inet6_ifaddr *ifp)
+{
+	if (net_ratelimit())
+		printk(KERN_INFO "%s: duplicate address detected!\n", ifp->idev->dev->name);
+	if (ifp->flags&IFA_F_PERMANENT) {
+		spin_lock_bh(&ifp->lock);
+		addrconf_del_timer(ifp);
+		ifp->flags |= IFA_F_TENTATIVE;
+		spin_unlock_bh(&ifp->lock);
+		in6_ifa_put(ifp);
+#ifdef CONFIG_IPV6_PRIVACY
+	} else if (ifp->flags&IFA_F_TEMPORARY) {
+		struct inet6_ifaddr *ifpub;
+		spin_lock_bh(&ifp->lock);
+		ifpub = ifp->ifpub;
+		if (ifpub) {
+			in6_ifa_hold(ifpub);
+			spin_unlock_bh(&ifp->lock);
+			ipv6_create_tempaddr(ifpub, ifp);
+			in6_ifa_put(ifpub);
+		} else {
+			spin_unlock_bh(&ifp->lock);
+		}
+		ipv6_del_addr(ifp);
+#endif
+	} else
+		ipv6_del_addr(ifp);
+}
+
+
+/* Join to solicited addr multicast group. */
+
+void addrconf_join_solict(struct net_device *dev, struct in6_addr *addr)
+{
+	struct in6_addr maddr;
+
+	if (dev->flags&(IFF_LOOPBACK|IFF_NOARP))
+		return;
+
+	addrconf_addr_solict_mult(addr, &maddr);
+	ipv6_dev_mc_inc(dev, &maddr);
+}
+
+void addrconf_leave_solict(struct inet6_dev *idev, struct in6_addr *addr)
+{
+	struct in6_addr maddr;
+
+	if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP))
+		return;
+
+	addrconf_addr_solict_mult(addr, &maddr);
+	__ipv6_dev_mc_dec(idev, &maddr);
+}
+
+void addrconf_join_anycast(struct inet6_ifaddr *ifp)
+{
+	struct in6_addr addr;
+	ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
+	if (ipv6_addr_any(&addr))
+		return;
+	ipv6_dev_ac_inc(ifp->idev->dev, &addr);
+}
+
+void addrconf_leave_anycast(struct inet6_ifaddr *ifp)
+{
+	struct in6_addr addr;
+	ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
+	if (ipv6_addr_any(&addr))
+		return;
+	__ipv6_dev_ac_dec(ifp->idev, &addr);
+}
+
+static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
+{
+	switch (dev->type) {
+	case ARPHRD_ETHER:
+	case ARPHRD_FDDI:
+	case ARPHRD_IEEE802_TR:
+		if (dev->addr_len != ETH_ALEN)
+			return -1;
+		memcpy(eui, dev->dev_addr, 3);
+		memcpy(eui + 5, dev->dev_addr + 3, 3);
+
+		/*
+		 * The zSeries OSA network cards can be shared among various
+		 * OS instances, but the OSA cards have only one MAC address.
+		 * This leads to duplicate address conflicts in conjunction
+		 * with IPv6 if more than one instance uses the same card.
+		 * 
+		 * The driver for these cards can deliver a unique 16-bit
+		 * identifier for each instance sharing the same card.  It is
+		 * placed instead of 0xFFFE in the interface identifier.  The
+		 * "u" bit of the interface identifier is not inverted in this
+		 * case.  Hence the resulting interface identifier has local
+		 * scope according to RFC2373.
+		 */
+		if (dev->dev_id) {
+			eui[3] = (dev->dev_id >> 8) & 0xFF;
+			eui[4] = dev->dev_id & 0xFF;
+		} else {
+			eui[3] = 0xFF;
+			eui[4] = 0xFE;
+			eui[0] ^= 2;
+		}
+		return 0;
+	case ARPHRD_ARCNET:
+		/* XXX: inherit EUI-64 from other interface -- yoshfuji */
+		if (dev->addr_len != ARCNET_ALEN)
+			return -1;
+		memset(eui, 0, 7);
+		eui[7] = *(u8*)dev->dev_addr;
+		return 0;
+	case ARPHRD_INFINIBAND:
+		if (dev->addr_len != INFINIBAND_ALEN)
+			return -1;
+		memcpy(eui, dev->dev_addr + 12, 8);
+		eui[0] |= 2;
+		return 0;
+	}
+	return -1;
+}
+
+static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev)
+{
+	int err = -1;
+	struct inet6_ifaddr *ifp;
+
+	read_lock_bh(&idev->lock);
+	for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
+		if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) {
+			memcpy(eui, ifp->addr.s6_addr+8, 8);
+			err = 0;
+			break;
+		}
+	}
+	read_unlock_bh(&idev->lock);
+	return err;
+}
+
+#ifdef CONFIG_IPV6_PRIVACY
+/* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */
+static int __ipv6_regen_rndid(struct inet6_dev *idev)
+{
+	struct net_device *dev;
+	struct scatterlist sg[2];
+
+	sg[0].page = virt_to_page(idev->entropy);
+	sg[0].offset = offset_in_page(idev->entropy);
+	sg[0].length = 8;
+	sg[1].page = virt_to_page(idev->work_eui64);
+	sg[1].offset = offset_in_page(idev->work_eui64);
+	sg[1].length = 8;
+
+	dev = idev->dev;
+
+	if (ipv6_generate_eui64(idev->work_eui64, dev)) {
+		printk(KERN_INFO
+			"__ipv6_regen_rndid(idev=%p): cannot get EUI64 identifier; use random bytes.\n",
+			idev);
+		get_random_bytes(idev->work_eui64, sizeof(idev->work_eui64));
+	}
+regen:
+	spin_lock(&md5_tfm_lock);
+	if (unlikely(md5_tfm == NULL)) {
+		spin_unlock(&md5_tfm_lock);
+		return -1;
+	}
+	crypto_digest_init(md5_tfm);
+	crypto_digest_update(md5_tfm, sg, 2);
+	crypto_digest_final(md5_tfm, idev->work_digest);
+	spin_unlock(&md5_tfm_lock);
+
+	memcpy(idev->rndid, &idev->work_digest[0], 8);
+	idev->rndid[0] &= ~0x02;
+	memcpy(idev->entropy, &idev->work_digest[8], 8);
+
+	/*
+	 * <draft-ietf-ipngwg-temp-addresses-v2-00.txt>:
+	 * check if generated address is not inappropriate
+	 *
+	 *  - Reserved subnet anycast (RFC 2526)
+	 *	11111101 11....11 1xxxxxxx
+	 *  - ISATAP (draft-ietf-ngtrans-isatap-13.txt) 5.1
+	 *	00-00-5E-FE-xx-xx-xx-xx
+	 *  - value 0
+	 *  - XXX: already assigned to an address on the device
+	 */
+	if (idev->rndid[0] == 0xfd && 
+	    (idev->rndid[1]&idev->rndid[2]&idev->rndid[3]&idev->rndid[4]&idev->rndid[5]&idev->rndid[6]) == 0xff &&
+	    (idev->rndid[7]&0x80))
+		goto regen;
+	if ((idev->rndid[0]|idev->rndid[1]) == 0) {
+		if (idev->rndid[2] == 0x5e && idev->rndid[3] == 0xfe)
+			goto regen;
+		if ((idev->rndid[2]|idev->rndid[3]|idev->rndid[4]|idev->rndid[5]|idev->rndid[6]|idev->rndid[7]) == 0x00)
+			goto regen;
+	}
+
+	return 0;
+}
+
+static void ipv6_regen_rndid(unsigned long data)
+{
+	struct inet6_dev *idev = (struct inet6_dev *) data;
+	unsigned long expires;
+
+	read_lock_bh(&addrconf_lock);
+	write_lock_bh(&idev->lock);
+
+	if (idev->dead)
+		goto out;
+
+	if (__ipv6_regen_rndid(idev) < 0)
+		goto out;
+	
+	expires = jiffies +
+		idev->cnf.temp_prefered_lft * HZ - 
+		idev->cnf.regen_max_retry * idev->cnf.dad_transmits * idev->nd_parms->retrans_time - desync_factor;
+	if (time_before(expires, jiffies)) {
+		printk(KERN_WARNING
+			"ipv6_regen_rndid(): too short regeneration interval; timer disabled for %s.\n",
+			idev->dev->name);
+		goto out;
+	}
+
+	if (!mod_timer(&idev->regen_timer, expires))
+		in6_dev_hold(idev);
+
+out:
+	write_unlock_bh(&idev->lock);
+	read_unlock_bh(&addrconf_lock);
+	in6_dev_put(idev);
+}
+
+static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) {
+	int ret = 0;
+
+	if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0)
+		ret = __ipv6_regen_rndid(idev);
+	return ret;
+}
+#endif
+
+/*
+ *	Add prefix route.
+ */
+
+static void
+addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
+		      unsigned long expires, unsigned flags)
+{
+	struct in6_rtmsg rtmsg;
+
+	memset(&rtmsg, 0, sizeof(rtmsg));
+	ipv6_addr_copy(&rtmsg.rtmsg_dst, pfx);
+	rtmsg.rtmsg_dst_len = plen;
+	rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF;
+	rtmsg.rtmsg_ifindex = dev->ifindex;
+	rtmsg.rtmsg_info = expires;
+	rtmsg.rtmsg_flags = RTF_UP|flags;
+	rtmsg.rtmsg_type = RTMSG_NEWROUTE;
+
+	/* Prevent useless cloning on PtP SIT.
+	   This thing is done here expecting that the whole
+	   class of non-broadcast devices need not cloning.
+	 */
+	if (dev->type == ARPHRD_SIT && (dev->flags&IFF_POINTOPOINT))
+		rtmsg.rtmsg_flags |= RTF_NONEXTHOP;
+
+	ip6_route_add(&rtmsg, NULL, NULL);
+}
+
+/* Create "default" multicast route to the interface */
+
+static void addrconf_add_mroute(struct net_device *dev)
+{
+	struct in6_rtmsg rtmsg;
+
+	memset(&rtmsg, 0, sizeof(rtmsg));
+	ipv6_addr_set(&rtmsg.rtmsg_dst,
+		      htonl(0xFF000000), 0, 0, 0);
+	rtmsg.rtmsg_dst_len = 8;
+	rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF;
+	rtmsg.rtmsg_ifindex = dev->ifindex;
+	rtmsg.rtmsg_flags = RTF_UP;
+	rtmsg.rtmsg_type = RTMSG_NEWROUTE;
+	ip6_route_add(&rtmsg, NULL, NULL);
+}
+
+static void sit_route_add(struct net_device *dev)
+{
+	struct in6_rtmsg rtmsg;
+
+	memset(&rtmsg, 0, sizeof(rtmsg));
+
+	rtmsg.rtmsg_type	= RTMSG_NEWROUTE;
+	rtmsg.rtmsg_metric	= IP6_RT_PRIO_ADDRCONF;
+
+	/* prefix length - 96 bits "::d.d.d.d" */
+	rtmsg.rtmsg_dst_len	= 96;
+	rtmsg.rtmsg_flags	= RTF_UP|RTF_NONEXTHOP;
+	rtmsg.rtmsg_ifindex	= dev->ifindex;
+
+	ip6_route_add(&rtmsg, NULL, NULL);
+}
+
+static void addrconf_add_lroute(struct net_device *dev)
+{
+	struct in6_addr addr;
+
+	ipv6_addr_set(&addr,  htonl(0xFE800000), 0, 0, 0);
+	addrconf_prefix_route(&addr, 64, dev, 0, 0);
+}
+
+static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
+{
+	struct inet6_dev *idev;
+
+	ASSERT_RTNL();
+
+	if ((idev = ipv6_find_idev(dev)) == NULL)
+		return NULL;
+
+	/* Add default multicast route */
+	addrconf_add_mroute(dev);
+
+	/* Add link local route */
+	addrconf_add_lroute(dev);
+	return idev;
+}
+
+void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
+{
+	struct prefix_info *pinfo;
+	__u32 valid_lft;
+	__u32 prefered_lft;
+	int addr_type;
+	unsigned long rt_expires;
+	struct inet6_dev *in6_dev;
+
+	pinfo = (struct prefix_info *) opt;
+	
+	if (len < sizeof(struct prefix_info)) {
+		ADBG(("addrconf: prefix option too short\n"));
+		return;
+	}
+	
+	/*
+	 *	Validation checks ([ADDRCONF], page 19)
+	 */
+
+	addr_type = ipv6_addr_type(&pinfo->prefix);
+
+	if (addr_type & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL))
+		return;
+
+	valid_lft = ntohl(pinfo->valid);
+	prefered_lft = ntohl(pinfo->prefered);
+
+	if (prefered_lft > valid_lft) {
+		if (net_ratelimit())
+			printk(KERN_WARNING "addrconf: prefix option has invalid lifetime\n");
+		return;
+	}
+
+	in6_dev = in6_dev_get(dev);
+
+	if (in6_dev == NULL) {
+		if (net_ratelimit())
+			printk(KERN_DEBUG "addrconf: device %s not configured\n", dev->name);
+		return;
+	}
+
+	/*
+	 *	Two things going on here:
+	 *	1) Add routes for on-link prefixes
+	 *	2) Configure prefixes with the auto flag set
+	 */
+
+	/* Avoid arithmetic overflow. Really, we could
+	   save rt_expires in seconds, likely valid_lft,
+	   but it would require division in fib gc, that it
+	   not good.
+	 */
+	if (valid_lft >= 0x7FFFFFFF/HZ)
+		rt_expires = 0;
+	else
+		rt_expires = jiffies + valid_lft * HZ;
+
+	if (pinfo->onlink) {
+		struct rt6_info *rt;
+		rt = rt6_lookup(&pinfo->prefix, NULL, dev->ifindex, 1);
+
+		if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
+			if (rt->rt6i_flags&RTF_EXPIRES) {
+				if (valid_lft == 0) {
+					ip6_del_rt(rt, NULL, NULL);
+					rt = NULL;
+				} else {
+					rt->rt6i_expires = rt_expires;
+				}
+			}
+		} else if (valid_lft) {
+			addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
+					      dev, rt_expires, RTF_ADDRCONF|RTF_EXPIRES|RTF_PREFIX_RT);
+		}
+		if (rt)
+			dst_release(&rt->u.dst);
+	}
+
+	/* Try to figure out our local address for this prefix */
+
+	if (pinfo->autoconf && in6_dev->cnf.autoconf) {
+		struct inet6_ifaddr * ifp;
+		struct in6_addr addr;
+		int create = 0, update_lft = 0;
+
+		if (pinfo->prefix_len == 64) {
+			memcpy(&addr, &pinfo->prefix, 8);
+			if (ipv6_generate_eui64(addr.s6_addr + 8, dev) &&
+			    ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) {
+				in6_dev_put(in6_dev);
+				return;
+			}
+			goto ok;
+		}
+		if (net_ratelimit())
+			printk(KERN_DEBUG "IPv6 addrconf: prefix with wrong length %d\n",
+			       pinfo->prefix_len);
+		in6_dev_put(in6_dev);
+		return;
+
+ok:
+
+		ifp = ipv6_get_ifaddr(&addr, dev, 1);
+
+		if (ifp == NULL && valid_lft) {
+			int max_addresses = in6_dev->cnf.max_addresses;
+
+			/* Do not allow to create too much of autoconfigured
+			 * addresses; this would be too easy way to crash kernel.
+			 */
+			if (!max_addresses ||
+			    ipv6_count_addresses(in6_dev) < max_addresses)
+				ifp = ipv6_add_addr(in6_dev, &addr, pinfo->prefix_len,
+						    addr_type&IPV6_ADDR_SCOPE_MASK, 0);
+
+			if (!ifp || IS_ERR(ifp)) {
+				in6_dev_put(in6_dev);
+				return;
+			}
+
+			update_lft = create = 1;
+			ifp->cstamp = jiffies;
+			addrconf_dad_start(ifp, RTF_ADDRCONF|RTF_PREFIX_RT);
+		}
+
+		if (ifp) {
+			int flags;
+			unsigned long now;
+#ifdef CONFIG_IPV6_PRIVACY
+			struct inet6_ifaddr *ift;
+#endif
+			u32 stored_lft;
+
+			/* update lifetime (RFC2462 5.5.3 e) */
+			spin_lock(&ifp->lock);
+			now = jiffies;
+			if (ifp->valid_lft > (now - ifp->tstamp) / HZ)
+				stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
+			else
+				stored_lft = 0;
+			if (!update_lft && stored_lft) {
+				if (valid_lft > MIN_VALID_LIFETIME ||
+				    valid_lft > stored_lft)
+					update_lft = 1;
+				else if (stored_lft <= MIN_VALID_LIFETIME) {
+					/* valid_lft <= stored_lft is always true */
+					/* XXX: IPsec */
+					update_lft = 0;
+				} else {
+					valid_lft = MIN_VALID_LIFETIME;
+					if (valid_lft < prefered_lft)
+						prefered_lft = valid_lft;
+					update_lft = 1;
+				}
+			}
+
+			if (update_lft) {
+				ifp->valid_lft = valid_lft;
+				ifp->prefered_lft = prefered_lft;
+				ifp->tstamp = now;
+				flags = ifp->flags;
+				ifp->flags &= ~IFA_F_DEPRECATED;
+				spin_unlock(&ifp->lock);
+
+				if (!(flags&IFA_F_TENTATIVE))
+					ipv6_ifa_notify(0, ifp);
+			} else
+				spin_unlock(&ifp->lock);
+
+#ifdef CONFIG_IPV6_PRIVACY
+			read_lock_bh(&in6_dev->lock);
+			/* update all temporary addresses in the list */
+			for (ift=in6_dev->tempaddr_list; ift; ift=ift->tmp_next) {
+				/*
+				 * When adjusting the lifetimes of an existing
+				 * temporary address, only lower the lifetimes.
+				 * Implementations must not increase the
+				 * lifetimes of an existing temporary address
+				 * when processing a Prefix Information Option.
+				 */
+				spin_lock(&ift->lock);
+				flags = ift->flags;
+				if (ift->valid_lft > valid_lft &&
+				    ift->valid_lft - valid_lft > (jiffies - ift->tstamp) / HZ)
+					ift->valid_lft = valid_lft + (jiffies - ift->tstamp) / HZ;
+				if (ift->prefered_lft > prefered_lft &&
+				    ift->prefered_lft - prefered_lft > (jiffies - ift->tstamp) / HZ)
+					ift->prefered_lft = prefered_lft + (jiffies - ift->tstamp) / HZ;
+				spin_unlock(&ift->lock);
+				if (!(flags&IFA_F_TENTATIVE))
+					ipv6_ifa_notify(0, ift);
+			}
+
+			if (create && in6_dev->cnf.use_tempaddr > 0) {
+				/*
+				 * When a new public address is created as described in [ADDRCONF],
+				 * also create a new temporary address.
+				 */
+				read_unlock_bh(&in6_dev->lock); 
+				ipv6_create_tempaddr(ifp, NULL);
+			} else {
+				read_unlock_bh(&in6_dev->lock);
+			}
+#endif
+			in6_ifa_put(ifp);
+			addrconf_verify(0);
+		}
+	}
+	inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo);
+	in6_dev_put(in6_dev);
+}
+
+/*
+ *	Set destination address.
+ *	Special case for SIT interfaces where we create a new "virtual"
+ *	device.
+ */
+int addrconf_set_dstaddr(void __user *arg)
+{
+	struct in6_ifreq ireq;
+	struct net_device *dev;
+	int err = -EINVAL;
+
+	rtnl_lock();
+
+	err = -EFAULT;
+	if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
+		goto err_exit;
+
+	dev = __dev_get_by_index(ireq.ifr6_ifindex);
+
+	err = -ENODEV;
+	if (dev == NULL)
+		goto err_exit;
+
+	if (dev->type == ARPHRD_SIT) {
+		struct ifreq ifr;
+		mm_segment_t	oldfs;
+		struct ip_tunnel_parm p;
+
+		err = -EADDRNOTAVAIL;
+		if (!(ipv6_addr_type(&ireq.ifr6_addr) & IPV6_ADDR_COMPATv4))
+			goto err_exit;
+
+		memset(&p, 0, sizeof(p));
+		p.iph.daddr = ireq.ifr6_addr.s6_addr32[3];
+		p.iph.saddr = 0;
+		p.iph.version = 4;
+		p.iph.ihl = 5;
+		p.iph.protocol = IPPROTO_IPV6;
+		p.iph.ttl = 64;
+		ifr.ifr_ifru.ifru_data = (void __user *)&p;
+
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
+		set_fs(oldfs);
+
+		if (err == 0) {
+			err = -ENOBUFS;
+			if ((dev = __dev_get_by_name(p.name)) == NULL)
+				goto err_exit;
+			err = dev_open(dev);
+		}
+	}
+
+err_exit:
+	rtnl_unlock();
+	return err;
+}
+
+/*
+ *	Manual configuration of address on an interface
+ */
+static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen)
+{
+	struct inet6_ifaddr *ifp;
+	struct inet6_dev *idev;
+	struct net_device *dev;
+	int scope;
+
+	ASSERT_RTNL();
+	
+	if ((dev = __dev_get_by_index(ifindex)) == NULL)
+		return -ENODEV;
+	
+	if (!(dev->flags&IFF_UP))
+		return -ENETDOWN;
+
+	if ((idev = addrconf_add_dev(dev)) == NULL)
+		return -ENOBUFS;
+
+	scope = ipv6_addr_scope(pfx);
+
+	ifp = ipv6_add_addr(idev, pfx, plen, scope, IFA_F_PERMANENT);
+	if (!IS_ERR(ifp)) {
+		addrconf_dad_start(ifp, 0);
+		in6_ifa_put(ifp);
+		return 0;
+	}
+
+	return PTR_ERR(ifp);
+}
+
+static int inet6_addr_del(int ifindex, struct in6_addr *pfx, int plen)
+{
+	struct inet6_ifaddr *ifp;
+	struct inet6_dev *idev;
+	struct net_device *dev;
+	
+	if ((dev = __dev_get_by_index(ifindex)) == NULL)
+		return -ENODEV;
+
+	if ((idev = __in6_dev_get(dev)) == NULL)
+		return -ENXIO;
+
+	read_lock_bh(&idev->lock);
+	for (ifp = idev->addr_list; ifp; ifp=ifp->if_next) {
+		if (ifp->prefix_len == plen &&
+		    ipv6_addr_equal(pfx, &ifp->addr)) {
+			in6_ifa_hold(ifp);
+			read_unlock_bh(&idev->lock);
+			
+			ipv6_del_addr(ifp);
+
+			/* If the last address is deleted administratively,
+			   disable IPv6 on this interface.
+			 */
+			if (idev->addr_list == NULL)
+				addrconf_ifdown(idev->dev, 1);
+			return 0;
+		}
+	}
+	read_unlock_bh(&idev->lock);
+	return -EADDRNOTAVAIL;
+}
+
+
+int addrconf_add_ifaddr(void __user *arg)
+{
+	struct in6_ifreq ireq;
+	int err;
+	
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+	
+	if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
+		return -EFAULT;
+
+	rtnl_lock();
+	err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen);
+	rtnl_unlock();
+	return err;
+}
+
+int addrconf_del_ifaddr(void __user *arg)
+{
+	struct in6_ifreq ireq;
+	int err;
+	
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
+		return -EFAULT;
+
+	rtnl_lock();
+	err = inet6_addr_del(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen);
+	rtnl_unlock();
+	return err;
+}
+
+static void sit_add_v4_addrs(struct inet6_dev *idev)
+{
+	struct inet6_ifaddr * ifp;
+	struct in6_addr addr;
+	struct net_device *dev;
+	int scope;
+
+	ASSERT_RTNL();
+
+	memset(&addr, 0, sizeof(struct in6_addr));
+	memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4);
+
+	if (idev->dev->flags&IFF_POINTOPOINT) {
+		addr.s6_addr32[0] = htonl(0xfe800000);
+		scope = IFA_LINK;
+	} else {
+		scope = IPV6_ADDR_COMPATv4;
+	}
+
+	if (addr.s6_addr32[3]) {
+		ifp = ipv6_add_addr(idev, &addr, 128, scope, IFA_F_PERMANENT);
+		if (!IS_ERR(ifp)) {
+			spin_lock_bh(&ifp->lock);
+			ifp->flags &= ~IFA_F_TENTATIVE;
+			spin_unlock_bh(&ifp->lock);
+			ipv6_ifa_notify(RTM_NEWADDR, ifp);
+			in6_ifa_put(ifp);
+		}
+		return;
+	}
+
+        for (dev = dev_base; dev != NULL; dev = dev->next) {
+		struct in_device * in_dev = __in_dev_get(dev);
+		if (in_dev && (dev->flags & IFF_UP)) {
+			struct in_ifaddr * ifa;
+
+			int flag = scope;
+
+			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
+				int plen;
+
+				addr.s6_addr32[3] = ifa->ifa_local;
+
+				if (ifa->ifa_scope == RT_SCOPE_LINK)
+					continue;
+				if (ifa->ifa_scope >= RT_SCOPE_HOST) {
+					if (idev->dev->flags&IFF_POINTOPOINT)
+						continue;
+					flag |= IFA_HOST;
+				}
+				if (idev->dev->flags&IFF_POINTOPOINT)
+					plen = 64;
+				else
+					plen = 96;
+
+				ifp = ipv6_add_addr(idev, &addr, plen, flag,
+						    IFA_F_PERMANENT);
+				if (!IS_ERR(ifp)) {
+					spin_lock_bh(&ifp->lock);
+					ifp->flags &= ~IFA_F_TENTATIVE;
+					spin_unlock_bh(&ifp->lock);
+					ipv6_ifa_notify(RTM_NEWADDR, ifp);
+					in6_ifa_put(ifp);
+				}
+			}
+		}
+        }
+}
+
+static void init_loopback(struct net_device *dev)
+{
+	struct inet6_dev  *idev;
+	struct inet6_ifaddr * ifp;
+
+	/* ::1 */
+
+	ASSERT_RTNL();
+
+	if ((idev = ipv6_find_idev(dev)) == NULL) {
+		printk(KERN_DEBUG "init loopback: add_dev failed\n");
+		return;
+	}
+
+	ifp = ipv6_add_addr(idev, &in6addr_loopback, 128, IFA_HOST, IFA_F_PERMANENT);
+	if (!IS_ERR(ifp)) {
+		spin_lock_bh(&ifp->lock);
+		ifp->flags &= ~IFA_F_TENTATIVE;
+		spin_unlock_bh(&ifp->lock);
+		ipv6_ifa_notify(RTM_NEWADDR, ifp);
+		in6_ifa_put(ifp);
+	}
+}
+
+static void addrconf_add_linklocal(struct inet6_dev *idev, struct in6_addr *addr)
+{
+	struct inet6_ifaddr * ifp;
+
+	ifp = ipv6_add_addr(idev, addr, 64, IFA_LINK, IFA_F_PERMANENT);
+	if (!IS_ERR(ifp)) {
+		addrconf_dad_start(ifp, 0);
+		in6_ifa_put(ifp);
+	}
+}
+
+static void addrconf_dev_config(struct net_device *dev)
+{
+	struct in6_addr addr;
+	struct inet6_dev    * idev;
+
+	ASSERT_RTNL();
+
+	if ((dev->type != ARPHRD_ETHER) && 
+	    (dev->type != ARPHRD_FDDI) &&
+	    (dev->type != ARPHRD_IEEE802_TR) &&
+	    (dev->type != ARPHRD_ARCNET) &&
+	    (dev->type != ARPHRD_INFINIBAND)) {
+		/* Alas, we support only Ethernet autoconfiguration. */
+		return;
+	}
+
+	idev = addrconf_add_dev(dev);
+	if (idev == NULL)
+		return;
+
+	memset(&addr, 0, sizeof(struct in6_addr));
+	addr.s6_addr32[0] = htonl(0xFE800000);
+
+	if (ipv6_generate_eui64(addr.s6_addr + 8, dev) == 0)
+		addrconf_add_linklocal(idev, &addr);
+}
+
+static void addrconf_sit_config(struct net_device *dev)
+{
+	struct inet6_dev *idev;
+
+	ASSERT_RTNL();
+
+	/* 
+	 * Configure the tunnel with one of our IPv4 
+	 * addresses... we should configure all of 
+	 * our v4 addrs in the tunnel
+	 */
+
+	if ((idev = ipv6_find_idev(dev)) == NULL) {
+		printk(KERN_DEBUG "init sit: add_dev failed\n");
+		return;
+	}
+
+	sit_add_v4_addrs(idev);
+
+	if (dev->flags&IFF_POINTOPOINT) {
+		addrconf_add_mroute(dev);
+		addrconf_add_lroute(dev);
+	} else
+		sit_route_add(dev);
+}
+
+static inline int
+ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev)
+{
+	struct in6_addr lladdr;
+
+	if (!ipv6_get_lladdr(link_dev, &lladdr)) {
+		addrconf_add_linklocal(idev, &lladdr);
+		return 0;
+	}
+	return -1;
+}
+
+static void ip6_tnl_add_linklocal(struct inet6_dev *idev)
+{
+	struct net_device *link_dev;
+
+	/* first try to inherit the link-local address from the link device */
+	if (idev->dev->iflink &&
+	    (link_dev = __dev_get_by_index(idev->dev->iflink))) {
+		if (!ipv6_inherit_linklocal(idev, link_dev))
+			return;
+	}
+	/* then try to inherit it from any device */
+	for (link_dev = dev_base; link_dev; link_dev = link_dev->next) {
+		if (!ipv6_inherit_linklocal(idev, link_dev))
+			return;
+	}
+	printk(KERN_DEBUG "init ip6-ip6: add_linklocal failed\n");
+}
+
+/*
+ * Autoconfigure tunnel with a link-local address so routing protocols,
+ * DHCPv6, MLD etc. can be run over the virtual link
+ */
+
+static void addrconf_ip6_tnl_config(struct net_device *dev)
+{
+	struct inet6_dev *idev;
+
+	ASSERT_RTNL();
+
+	if ((idev = addrconf_add_dev(dev)) == NULL) {
+		printk(KERN_DEBUG "init ip6-ip6: add_dev failed\n");
+		return;
+	}
+	ip6_tnl_add_linklocal(idev);
+	addrconf_add_mroute(dev);
+}
+
+static int addrconf_notify(struct notifier_block *this, unsigned long event, 
+			   void * data)
+{
+	struct net_device *dev = (struct net_device *) data;
+	struct inet6_dev *idev = __in6_dev_get(dev);
+
+	switch(event) {
+	case NETDEV_UP:
+		switch(dev->type) {
+		case ARPHRD_SIT:
+			addrconf_sit_config(dev);
+			break;
+		case ARPHRD_TUNNEL6:
+			addrconf_ip6_tnl_config(dev);
+			break;
+		case ARPHRD_LOOPBACK:
+			init_loopback(dev);
+			break;
+
+		default:
+			addrconf_dev_config(dev);
+			break;
+		};
+		if (idev) {
+			/* If the MTU changed during the interface down, when the
+			   interface up, the changed MTU must be reflected in the
+			   idev as well as routers.
+			 */
+			if (idev->cnf.mtu6 != dev->mtu && dev->mtu >= IPV6_MIN_MTU) {
+				rt6_mtu_change(dev, dev->mtu);
+				idev->cnf.mtu6 = dev->mtu;
+			}
+			idev->tstamp = jiffies;
+			inet6_ifinfo_notify(RTM_NEWLINK, idev);
+			/* If the changed mtu during down is lower than IPV6_MIN_MTU
+			   stop IPv6 on this interface.
+			 */
+			if (dev->mtu < IPV6_MIN_MTU)
+				addrconf_ifdown(dev, event != NETDEV_DOWN);
+		}
+		break;
+
+	case NETDEV_CHANGEMTU:
+		if ( idev && dev->mtu >= IPV6_MIN_MTU) {
+			rt6_mtu_change(dev, dev->mtu);
+			idev->cnf.mtu6 = dev->mtu;
+			break;
+		}
+
+		/* MTU falled under IPV6_MIN_MTU. Stop IPv6 on this interface. */
+
+	case NETDEV_DOWN:
+	case NETDEV_UNREGISTER:
+		/*
+		 *	Remove all addresses from this interface.
+		 */
+		addrconf_ifdown(dev, event != NETDEV_DOWN);
+		break;
+	case NETDEV_CHANGE:
+		break;
+	case NETDEV_CHANGENAME:
+#ifdef CONFIG_SYSCTL
+		if (idev) {
+			addrconf_sysctl_unregister(&idev->cnf);
+			neigh_sysctl_unregister(idev->nd_parms);
+			neigh_sysctl_register(dev, idev->nd_parms,
+					      NET_IPV6, NET_IPV6_NEIGH, "ipv6",
+					      &ndisc_ifinfo_sysctl_change,
+					      NULL);
+			addrconf_sysctl_register(idev, &idev->cnf);
+		}
+#endif
+		break;
+	};
+
+	return NOTIFY_OK;
+}
+
+/*
+ *	addrconf module should be notified of a device going up
+ */
+static struct notifier_block ipv6_dev_notf = {
+	.notifier_call = addrconf_notify,
+	.priority = 0
+};
+
+static int addrconf_ifdown(struct net_device *dev, int how)
+{
+	struct inet6_dev *idev;
+	struct inet6_ifaddr *ifa, **bifa;
+	int i;
+
+	ASSERT_RTNL();
+
+	if (dev == &loopback_dev && how == 1)
+		how = 0;
+
+	rt6_ifdown(dev);
+	neigh_ifdown(&nd_tbl, dev);
+
+	idev = __in6_dev_get(dev);
+	if (idev == NULL)
+		return -ENODEV;
+
+	/* Step 1: remove reference to ipv6 device from parent device.
+	           Do not dev_put!
+	 */
+	if (how == 1) {
+		write_lock_bh(&addrconf_lock);
+		dev->ip6_ptr = NULL;
+		idev->dead = 1;
+		write_unlock_bh(&addrconf_lock);
+
+		/* Step 1.5: remove snmp6 entry */
+		snmp6_unregister_dev(idev);
+
+	}
+
+	/* Step 2: clear hash table */
+	for (i=0; i<IN6_ADDR_HSIZE; i++) {
+		bifa = &inet6_addr_lst[i];
+
+		write_lock_bh(&addrconf_hash_lock);
+		while ((ifa = *bifa) != NULL) {
+			if (ifa->idev == idev) {
+				*bifa = ifa->lst_next;
+				ifa->lst_next = NULL;
+				addrconf_del_timer(ifa);
+				in6_ifa_put(ifa);
+				continue;
+			}
+			bifa = &ifa->lst_next;
+		}
+		write_unlock_bh(&addrconf_hash_lock);
+	}
+
+	write_lock_bh(&idev->lock);
+
+	/* Step 3: clear flags for stateless addrconf */
+	if (how != 1)
+		idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD);
+
+	/* Step 4: clear address list */
+#ifdef CONFIG_IPV6_PRIVACY
+	if (how == 1 && del_timer(&idev->regen_timer))
+		in6_dev_put(idev);
+
+	/* clear tempaddr list */
+	while ((ifa = idev->tempaddr_list) != NULL) {
+		idev->tempaddr_list = ifa->tmp_next;
+		ifa->tmp_next = NULL;
+		ifa->dead = 1;
+		write_unlock_bh(&idev->lock);
+		spin_lock_bh(&ifa->lock);
+
+		if (ifa->ifpub) {
+			in6_ifa_put(ifa->ifpub);
+			ifa->ifpub = NULL;
+		}
+		spin_unlock_bh(&ifa->lock);
+		in6_ifa_put(ifa);
+		write_lock_bh(&idev->lock);
+	}
+#endif
+	while ((ifa = idev->addr_list) != NULL) {
+		idev->addr_list = ifa->if_next;
+		ifa->if_next = NULL;
+		ifa->dead = 1;
+		addrconf_del_timer(ifa);
+		write_unlock_bh(&idev->lock);
+
+		__ipv6_ifa_notify(RTM_DELADDR, ifa);
+		in6_ifa_put(ifa);
+
+		write_lock_bh(&idev->lock);
+	}
+	write_unlock_bh(&idev->lock);
+
+	/* Step 5: Discard multicast list */
+
+	if (how == 1)
+		ipv6_mc_destroy_dev(idev);
+	else
+		ipv6_mc_down(idev);
+
+	/* Step 5: netlink notification of this interface */
+	idev->tstamp = jiffies;
+	inet6_ifinfo_notify(RTM_NEWLINK, idev);
+	
+	/* Shot the device (if unregistered) */
+
+	if (how == 1) {
+#ifdef CONFIG_SYSCTL
+		addrconf_sysctl_unregister(&idev->cnf);
+		neigh_sysctl_unregister(idev->nd_parms);
+#endif
+		neigh_parms_release(&nd_tbl, idev->nd_parms);
+		neigh_ifdown(&nd_tbl, dev);
+		in6_dev_put(idev);
+	}
+	return 0;
+}
+
+static void addrconf_rs_timer(unsigned long data)
+{
+	struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
+
+	if (ifp->idev->cnf.forwarding)
+		goto out;
+
+	if (ifp->idev->if_flags & IF_RA_RCVD) {
+		/*
+		 *	Announcement received after solicitation
+		 *	was sent
+		 */
+		goto out;
+	}
+
+	spin_lock(&ifp->lock);
+	if (ifp->probes++ < ifp->idev->cnf.rtr_solicits) {
+		struct in6_addr all_routers;
+
+		/* The wait after the last probe can be shorter */
+		addrconf_mod_timer(ifp, AC_RS,
+				   (ifp->probes == ifp->idev->cnf.rtr_solicits) ?
+				   ifp->idev->cnf.rtr_solicit_delay :
+				   ifp->idev->cnf.rtr_solicit_interval);
+		spin_unlock(&ifp->lock);
+
+		ipv6_addr_all_routers(&all_routers);
+
+		ndisc_send_rs(ifp->idev->dev, &ifp->addr, &all_routers);
+	} else {
+		spin_unlock(&ifp->lock);
+		/*
+		 * Note: we do not support deprecated "all on-link"
+		 * assumption any longer.
+		 */
+		printk(KERN_DEBUG "%s: no IPv6 routers present\n",
+		       ifp->idev->dev->name);
+	}
+
+out:
+	in6_ifa_put(ifp);
+}
+
+/*
+ *	Duplicate Address Detection
+ */
+static void addrconf_dad_start(struct inet6_ifaddr *ifp, int flags)
+{
+	struct inet6_dev *idev = ifp->idev;
+	struct net_device *dev = idev->dev;
+	unsigned long rand_num;
+
+	addrconf_join_solict(dev, &ifp->addr);
+
+	if (ifp->prefix_len != 128 && (ifp->flags&IFA_F_PERMANENT))
+		addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, 0,
+					flags);
+
+	net_srandom(ifp->addr.s6_addr32[3]);
+	rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1);
+
+	read_lock_bh(&idev->lock);
+	if (ifp->dead)
+		goto out;
+	spin_lock_bh(&ifp->lock);
+
+	if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
+	    !(ifp->flags&IFA_F_TENTATIVE)) {
+		ifp->flags &= ~IFA_F_TENTATIVE;
+		spin_unlock_bh(&ifp->lock);
+		read_unlock_bh(&idev->lock);
+
+		addrconf_dad_completed(ifp);
+		return;
+	}
+
+	ifp->probes = idev->cnf.dad_transmits;
+	addrconf_mod_timer(ifp, AC_DAD, rand_num);
+
+	spin_unlock_bh(&ifp->lock);
+out:
+	read_unlock_bh(&idev->lock);
+}
+
+static void addrconf_dad_timer(unsigned long data)
+{
+	struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
+	struct inet6_dev *idev = ifp->idev;
+	struct in6_addr unspec;
+	struct in6_addr mcaddr;
+
+	read_lock_bh(&idev->lock);
+	if (idev->dead) {
+		read_unlock_bh(&idev->lock);
+		goto out;
+	}
+	spin_lock_bh(&ifp->lock);
+	if (ifp->probes == 0) {
+		/*
+		 * DAD was successful
+		 */
+
+		ifp->flags &= ~IFA_F_TENTATIVE;
+		spin_unlock_bh(&ifp->lock);
+		read_unlock_bh(&idev->lock);
+
+		addrconf_dad_completed(ifp);
+
+		goto out;
+	}
+
+	ifp->probes--;
+	addrconf_mod_timer(ifp, AC_DAD, ifp->idev->nd_parms->retrans_time);
+	spin_unlock_bh(&ifp->lock);
+	read_unlock_bh(&idev->lock);
+
+	/* send a neighbour solicitation for our addr */
+	memset(&unspec, 0, sizeof(unspec));
+	addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
+	ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &unspec);
+out:
+	in6_ifa_put(ifp);
+}
+
+static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
+{
+	struct net_device *	dev = ifp->idev->dev;
+
+	/*
+	 *	Configure the address for reception. Now it is valid.
+	 */
+
+	ipv6_ifa_notify(RTM_NEWADDR, ifp);
+
+	/* If added prefix is link local and forwarding is off,
+	   start sending router solicitations.
+	 */
+
+	if (ifp->idev->cnf.forwarding == 0 &&
+	    ifp->idev->cnf.rtr_solicits > 0 &&
+	    (dev->flags&IFF_LOOPBACK) == 0 &&
+	    (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) {
+		struct in6_addr all_routers;
+
+		ipv6_addr_all_routers(&all_routers);
+
+		/*
+		 *	If a host as already performed a random delay
+		 *	[...] as part of DAD [...] there is no need
+		 *	to delay again before sending the first RS
+		 */
+		ndisc_send_rs(ifp->idev->dev, &ifp->addr, &all_routers);
+
+		spin_lock_bh(&ifp->lock);
+		ifp->probes = 1;
+		ifp->idev->if_flags |= IF_RS_SENT;
+		addrconf_mod_timer(ifp, AC_RS, ifp->idev->cnf.rtr_solicit_interval);
+		spin_unlock_bh(&ifp->lock);
+	}
+}
+
+#ifdef CONFIG_PROC_FS
+struct if6_iter_state {
+	int bucket;
+};
+
+static struct inet6_ifaddr *if6_get_first(struct seq_file *seq)
+{
+	struct inet6_ifaddr *ifa = NULL;
+	struct if6_iter_state *state = seq->private;
+
+	for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
+		ifa = inet6_addr_lst[state->bucket];
+		if (ifa)
+			break;
+	}
+	return ifa;
+}
+
+static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, struct inet6_ifaddr *ifa)
+{
+	struct if6_iter_state *state = seq->private;
+
+	ifa = ifa->lst_next;
+try_again:
+	if (!ifa && ++state->bucket < IN6_ADDR_HSIZE) {
+		ifa = inet6_addr_lst[state->bucket];
+		goto try_again;
+	}
+	return ifa;
+}
+
+static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos)
+{
+	struct inet6_ifaddr *ifa = if6_get_first(seq);
+
+	if (ifa)
+		while(pos && (ifa = if6_get_next(seq, ifa)) != NULL)
+			--pos;
+	return pos ? NULL : ifa;
+}
+
+static void *if6_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	read_lock_bh(&addrconf_hash_lock);
+	return if6_get_idx(seq, *pos);
+}
+
+static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct inet6_ifaddr *ifa;
+
+	ifa = if6_get_next(seq, v);
+	++*pos;
+	return ifa;
+}
+
+static void if6_seq_stop(struct seq_file *seq, void *v)
+{
+	read_unlock_bh(&addrconf_hash_lock);
+}
+
+static int if6_seq_show(struct seq_file *seq, void *v)
+{
+	struct inet6_ifaddr *ifp = (struct inet6_ifaddr *)v;
+	seq_printf(seq,
+		   "%04x%04x%04x%04x%04x%04x%04x%04x %02x %02x %02x %02x %8s\n",
+		   NIP6(ifp->addr),
+		   ifp->idev->dev->ifindex,
+		   ifp->prefix_len,
+		   ifp->scope,
+		   ifp->flags,
+		   ifp->idev->dev->name);
+	return 0;
+}
+
+static struct seq_operations if6_seq_ops = {
+	.start	= if6_seq_start,
+	.next	= if6_seq_next,
+	.show	= if6_seq_show,
+	.stop	= if6_seq_stop,
+};
+
+static int if6_seq_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	int rc = -ENOMEM;
+	struct if6_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+
+	if (!s)
+		goto out;
+	memset(s, 0, sizeof(*s));
+
+	rc = seq_open(file, &if6_seq_ops);
+	if (rc)
+		goto out_kfree;
+
+	seq = file->private_data;
+	seq->private = s;
+out:
+	return rc;
+out_kfree:
+	kfree(s);
+	goto out;
+}
+
+static struct file_operations if6_fops = {
+	.owner		= THIS_MODULE,
+	.open		= if6_seq_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release_private,
+};
+
+int __init if6_proc_init(void)
+{
+	if (!proc_net_fops_create("if_inet6", S_IRUGO, &if6_fops))
+		return -ENOMEM;
+	return 0;
+}
+
+void if6_proc_exit(void)
+{
+	proc_net_remove("if_inet6");
+}
+#endif	/* CONFIG_PROC_FS */
+
+/*
+ *	Periodic address status verification
+ */
+
+static void addrconf_verify(unsigned long foo)
+{
+	struct inet6_ifaddr *ifp;
+	unsigned long now, next;
+	int i;
+
+	spin_lock_bh(&addrconf_verify_lock);
+	now = jiffies;
+	next = now + ADDR_CHECK_FREQUENCY;
+
+	del_timer(&addr_chk_timer);
+
+	for (i=0; i < IN6_ADDR_HSIZE; i++) {
+
+restart:
+		write_lock(&addrconf_hash_lock);
+		for (ifp=inet6_addr_lst[i]; ifp; ifp=ifp->lst_next) {
+			unsigned long age;
+#ifdef CONFIG_IPV6_PRIVACY
+			unsigned long regen_advance;
+#endif
+
+			if (ifp->flags & IFA_F_PERMANENT)
+				continue;
+
+			spin_lock(&ifp->lock);
+			age = (now - ifp->tstamp) / HZ;
+
+#ifdef CONFIG_IPV6_PRIVACY
+			regen_advance = ifp->idev->cnf.regen_max_retry * 
+					ifp->idev->cnf.dad_transmits * 
+					ifp->idev->nd_parms->retrans_time / HZ;
+#endif
+
+			if (age >= ifp->valid_lft) {
+				spin_unlock(&ifp->lock);
+				in6_ifa_hold(ifp);
+				write_unlock(&addrconf_hash_lock);
+				ipv6_del_addr(ifp);
+				goto restart;
+			} else if (age >= ifp->prefered_lft) {
+				/* jiffies - ifp->tsamp > age >= ifp->prefered_lft */
+				int deprecate = 0;
+
+				if (!(ifp->flags&IFA_F_DEPRECATED)) {
+					deprecate = 1;
+					ifp->flags |= IFA_F_DEPRECATED;
+				}
+
+				if (time_before(ifp->tstamp + ifp->valid_lft * HZ, next))
+					next = ifp->tstamp + ifp->valid_lft * HZ;
+
+				spin_unlock(&ifp->lock);
+
+				if (deprecate) {
+					in6_ifa_hold(ifp);
+					write_unlock(&addrconf_hash_lock);
+
+					ipv6_ifa_notify(0, ifp);
+					in6_ifa_put(ifp);
+					goto restart;
+				}
+#ifdef CONFIG_IPV6_PRIVACY
+			} else if ((ifp->flags&IFA_F_TEMPORARY) &&
+				   !(ifp->flags&IFA_F_TENTATIVE)) {
+				if (age >= ifp->prefered_lft - regen_advance) {
+					struct inet6_ifaddr *ifpub = ifp->ifpub;
+					if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next))
+						next = ifp->tstamp + ifp->prefered_lft * HZ;
+					if (!ifp->regen_count && ifpub) {
+						ifp->regen_count++;
+						in6_ifa_hold(ifp);
+						in6_ifa_hold(ifpub);
+						spin_unlock(&ifp->lock);
+						write_unlock(&addrconf_hash_lock);
+						ipv6_create_tempaddr(ifpub, ifp);
+						in6_ifa_put(ifpub);
+						in6_ifa_put(ifp);
+						goto restart;
+					}
+				} else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next))
+					next = ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ;
+				spin_unlock(&ifp->lock);
+#endif
+			} else {
+				/* ifp->prefered_lft <= ifp->valid_lft */
+				if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next))
+					next = ifp->tstamp + ifp->prefered_lft * HZ;
+				spin_unlock(&ifp->lock);
+			}
+		}
+		write_unlock(&addrconf_hash_lock);
+	}
+
+	addr_chk_timer.expires = time_before(next, jiffies + HZ) ? jiffies + HZ : next;
+	add_timer(&addr_chk_timer);
+	spin_unlock_bh(&addrconf_verify_lock);
+}
+
+static int
+inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct rtattr **rta = arg;
+	struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
+	struct in6_addr *pfx;
+
+	pfx = NULL;
+	if (rta[IFA_ADDRESS-1]) {
+		if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*pfx))
+			return -EINVAL;
+		pfx = RTA_DATA(rta[IFA_ADDRESS-1]);
+	}
+	if (rta[IFA_LOCAL-1]) {
+		if (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx)))
+			return -EINVAL;
+		pfx = RTA_DATA(rta[IFA_LOCAL-1]);
+	}
+	if (pfx == NULL)
+		return -EINVAL;
+
+	return inet6_addr_del(ifm->ifa_index, pfx, ifm->ifa_prefixlen);
+}
+
+static int
+inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct rtattr  **rta = arg;
+	struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
+	struct in6_addr *pfx;
+
+	pfx = NULL;
+	if (rta[IFA_ADDRESS-1]) {
+		if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*pfx))
+			return -EINVAL;
+		pfx = RTA_DATA(rta[IFA_ADDRESS-1]);
+	}
+	if (rta[IFA_LOCAL-1]) {
+		if (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx)))
+			return -EINVAL;
+		pfx = RTA_DATA(rta[IFA_LOCAL-1]);
+	}
+	if (pfx == NULL)
+		return -EINVAL;
+
+	return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen);
+}
+
+static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
+			     u32 pid, u32 seq, int event)
+{
+	struct ifaddrmsg *ifm;
+	struct nlmsghdr  *nlh;
+	struct ifa_cacheinfo ci;
+	unsigned char	 *b = skb->tail;
+
+	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm));
+	if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
+	ifm = NLMSG_DATA(nlh);
+	ifm->ifa_family = AF_INET6;
+	ifm->ifa_prefixlen = ifa->prefix_len;
+	ifm->ifa_flags = ifa->flags;
+	ifm->ifa_scope = RT_SCOPE_UNIVERSE;
+	if (ifa->scope&IFA_HOST)
+		ifm->ifa_scope = RT_SCOPE_HOST;
+	else if (ifa->scope&IFA_LINK)
+		ifm->ifa_scope = RT_SCOPE_LINK;
+	else if (ifa->scope&IFA_SITE)
+		ifm->ifa_scope = RT_SCOPE_SITE;
+	ifm->ifa_index = ifa->idev->dev->ifindex;
+	RTA_PUT(skb, IFA_ADDRESS, 16, &ifa->addr);
+	if (!(ifa->flags&IFA_F_PERMANENT)) {
+		ci.ifa_prefered = ifa->prefered_lft;
+		ci.ifa_valid = ifa->valid_lft;
+		if (ci.ifa_prefered != INFINITY_LIFE_TIME) {
+			long tval = (jiffies - ifa->tstamp)/HZ;
+			ci.ifa_prefered -= tval;
+			if (ci.ifa_valid != INFINITY_LIFE_TIME)
+				ci.ifa_valid -= tval;
+		}
+	} else {
+		ci.ifa_prefered = INFINITY_LIFE_TIME;
+		ci.ifa_valid = INFINITY_LIFE_TIME;
+	}
+	ci.cstamp = (__u32)(TIME_DELTA(ifa->cstamp, INITIAL_JIFFIES) / HZ * 100
+		    + TIME_DELTA(ifa->cstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
+	ci.tstamp = (__u32)(TIME_DELTA(ifa->tstamp, INITIAL_JIFFIES) / HZ * 100
+		    + TIME_DELTA(ifa->tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
+	RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci);
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
+				u32 pid, u32 seq, int event)
+{
+	struct ifaddrmsg *ifm;
+	struct nlmsghdr  *nlh;
+	struct ifa_cacheinfo ci;
+	unsigned char	 *b = skb->tail;
+
+	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm));
+	if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
+	ifm = NLMSG_DATA(nlh);
+	ifm->ifa_family = AF_INET6;	
+	ifm->ifa_prefixlen = 128;
+	ifm->ifa_flags = IFA_F_PERMANENT;
+	ifm->ifa_scope = RT_SCOPE_UNIVERSE;
+	if (ipv6_addr_scope(&ifmca->mca_addr)&IFA_SITE)
+		ifm->ifa_scope = RT_SCOPE_SITE;
+	ifm->ifa_index = ifmca->idev->dev->ifindex;
+	RTA_PUT(skb, IFA_MULTICAST, 16, &ifmca->mca_addr);
+	ci.cstamp = (__u32)(TIME_DELTA(ifmca->mca_cstamp, INITIAL_JIFFIES) / HZ
+		    * 100 + TIME_DELTA(ifmca->mca_cstamp, INITIAL_JIFFIES) % HZ
+		    * 100 / HZ);
+	ci.tstamp = (__u32)(TIME_DELTA(ifmca->mca_tstamp, INITIAL_JIFFIES) / HZ
+		    * 100 + TIME_DELTA(ifmca->mca_tstamp, INITIAL_JIFFIES) % HZ
+		    * 100 / HZ);
+	ci.ifa_prefered = INFINITY_LIFE_TIME;
+	ci.ifa_valid = INFINITY_LIFE_TIME;
+	RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci);
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
+				u32 pid, u32 seq, int event)
+{
+	struct ifaddrmsg *ifm;
+	struct nlmsghdr  *nlh;
+	struct ifa_cacheinfo ci;
+	unsigned char	 *b = skb->tail;
+
+	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm));
+	if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
+	ifm = NLMSG_DATA(nlh);
+	ifm->ifa_family = AF_INET6;	
+	ifm->ifa_prefixlen = 128;
+	ifm->ifa_flags = IFA_F_PERMANENT;
+	ifm->ifa_scope = RT_SCOPE_UNIVERSE;
+	if (ipv6_addr_scope(&ifaca->aca_addr)&IFA_SITE)
+		ifm->ifa_scope = RT_SCOPE_SITE;
+	ifm->ifa_index = ifaca->aca_idev->dev->ifindex;
+	RTA_PUT(skb, IFA_ANYCAST, 16, &ifaca->aca_addr);
+	ci.cstamp = (__u32)(TIME_DELTA(ifaca->aca_cstamp, INITIAL_JIFFIES) / HZ
+		    * 100 + TIME_DELTA(ifaca->aca_cstamp, INITIAL_JIFFIES) % HZ
+		    * 100 / HZ);
+	ci.tstamp = (__u32)(TIME_DELTA(ifaca->aca_tstamp, INITIAL_JIFFIES) / HZ
+		    * 100 + TIME_DELTA(ifaca->aca_tstamp, INITIAL_JIFFIES) % HZ
+		    * 100 / HZ);
+	ci.ifa_prefered = INFINITY_LIFE_TIME;
+	ci.ifa_valid = INFINITY_LIFE_TIME;
+	RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci);
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+enum addr_type_t
+{
+	UNICAST_ADDR,
+	MULTICAST_ADDR,
+	ANYCAST_ADDR,
+};
+
+static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
+			   enum addr_type_t type)
+{
+	int idx, ip_idx;
+	int s_idx, s_ip_idx;
+	int err = 1;
+	struct net_device *dev;
+	struct inet6_dev *idev = NULL;
+	struct inet6_ifaddr *ifa;
+	struct ifmcaddr6 *ifmca;
+	struct ifacaddr6 *ifaca;
+
+	s_idx = cb->args[0];
+	s_ip_idx = ip_idx = cb->args[1];
+	read_lock(&dev_base_lock);
+	
+	for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
+		if (idx < s_idx)
+			continue;
+		if (idx > s_idx)
+			s_ip_idx = 0;
+		ip_idx = 0;
+		if ((idev = in6_dev_get(dev)) == NULL)
+			continue;
+		read_lock_bh(&idev->lock);
+		switch (type) {
+		case UNICAST_ADDR:
+			/* unicast address */
+			for (ifa = idev->addr_list; ifa;
+			     ifa = ifa->if_next, ip_idx++) {
+				if (ip_idx < s_ip_idx)
+					continue;
+				if ((err = inet6_fill_ifaddr(skb, ifa, 
+				    NETLINK_CB(cb->skb).pid, 
+				    cb->nlh->nlmsg_seq, RTM_NEWADDR)) <= 0)
+					goto done;
+			}
+			/* temp addr */
+#ifdef CONFIG_IPV6_PRIVACY
+			for (ifa = idev->tempaddr_list; ifa; 
+			     ifa = ifa->tmp_next, ip_idx++) {
+				if (ip_idx < s_ip_idx)
+					continue;
+				if ((err = inet6_fill_ifaddr(skb, ifa, 
+				    NETLINK_CB(cb->skb).pid, 
+				    cb->nlh->nlmsg_seq, RTM_NEWADDR)) <= 0) 
+					goto done;
+			}
+#endif
+			break;
+		case MULTICAST_ADDR:
+			/* multicast address */
+			for (ifmca = idev->mc_list; ifmca; 
+			     ifmca = ifmca->next, ip_idx++) {
+				if (ip_idx < s_ip_idx)
+					continue;
+				if ((err = inet6_fill_ifmcaddr(skb, ifmca, 
+				    NETLINK_CB(cb->skb).pid, 
+				    cb->nlh->nlmsg_seq, RTM_GETMULTICAST)) <= 0)
+					goto done;
+			}
+			break;
+		case ANYCAST_ADDR:
+			/* anycast address */
+			for (ifaca = idev->ac_list; ifaca;
+			     ifaca = ifaca->aca_next, ip_idx++) {
+				if (ip_idx < s_ip_idx)
+					continue;
+				if ((err = inet6_fill_ifacaddr(skb, ifaca, 
+				    NETLINK_CB(cb->skb).pid, 
+				    cb->nlh->nlmsg_seq, RTM_GETANYCAST)) <= 0) 
+					goto done;
+			}
+			break;
+		default:
+			break;
+		}
+		read_unlock_bh(&idev->lock);
+		in6_dev_put(idev);
+	}
+done:
+	if (err <= 0) {
+		read_unlock_bh(&idev->lock);
+		in6_dev_put(idev);
+	}
+	read_unlock(&dev_base_lock);
+	cb->args[0] = idx;
+	cb->args[1] = ip_idx;
+	return skb->len;
+}
+
+static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	enum addr_type_t type = UNICAST_ADDR;
+	return inet6_dump_addr(skb, cb, type);
+}
+
+static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	enum addr_type_t type = MULTICAST_ADDR;
+	return inet6_dump_addr(skb, cb, type);
+}
+
+
+static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	enum addr_type_t type = ANYCAST_ADDR;
+	return inet6_dump_addr(skb, cb, type);
+}
+
+static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
+{
+	struct sk_buff *skb;
+	int size = NLMSG_SPACE(sizeof(struct ifaddrmsg)+128);
+
+	skb = alloc_skb(size, GFP_ATOMIC);
+	if (!skb) {
+		netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, ENOBUFS);
+		return;
+	}
+	if (inet6_fill_ifaddr(skb, ifa, 0, 0, event) < 0) {
+		kfree_skb(skb);
+		netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, EINVAL);
+		return;
+	}
+	NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_IFADDR;
+	netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_IFADDR, GFP_ATOMIC);
+}
+
+static void inline ipv6_store_devconf(struct ipv6_devconf *cnf,
+				__s32 *array, int bytes)
+{
+	memset(array, 0, bytes);
+	array[DEVCONF_FORWARDING] = cnf->forwarding;
+	array[DEVCONF_HOPLIMIT] = cnf->hop_limit;
+	array[DEVCONF_MTU6] = cnf->mtu6;
+	array[DEVCONF_ACCEPT_RA] = cnf->accept_ra;
+	array[DEVCONF_ACCEPT_REDIRECTS] = cnf->accept_redirects;
+	array[DEVCONF_AUTOCONF] = cnf->autoconf;
+	array[DEVCONF_DAD_TRANSMITS] = cnf->dad_transmits;
+	array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits;
+	array[DEVCONF_RTR_SOLICIT_INTERVAL] = cnf->rtr_solicit_interval;
+	array[DEVCONF_RTR_SOLICIT_DELAY] = cnf->rtr_solicit_delay;
+	array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version;
+#ifdef CONFIG_IPV6_PRIVACY
+	array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr;
+	array[DEVCONF_TEMP_VALID_LFT] = cnf->temp_valid_lft;
+	array[DEVCONF_TEMP_PREFERED_LFT] = cnf->temp_prefered_lft;
+	array[DEVCONF_REGEN_MAX_RETRY] = cnf->regen_max_retry;
+	array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor;
+#endif
+	array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses;
+}
+
+static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, 
+			     u32 pid, u32 seq, int event)
+{
+	struct net_device	*dev = idev->dev;
+	__s32			*array = NULL;
+	struct ifinfomsg	*r;
+	struct nlmsghdr 	*nlh;
+	unsigned char		*b = skb->tail;
+	struct rtattr		*subattr;
+	__u32			mtu = dev->mtu;
+	struct ifla_cacheinfo	ci;
+
+	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*r));
+	if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
+	r = NLMSG_DATA(nlh);
+	r->ifi_family = AF_INET6;
+	r->ifi_type = dev->type;
+	r->ifi_index = dev->ifindex;
+	r->ifi_flags = dev_get_flags(dev);
+	r->ifi_change = 0;
+
+	RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name);
+
+	if (dev->addr_len)
+		RTA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
+
+	RTA_PUT(skb, IFLA_MTU, sizeof(mtu), &mtu);
+	if (dev->ifindex != dev->iflink)
+		RTA_PUT(skb, IFLA_LINK, sizeof(int), &dev->iflink);
+			
+	subattr = (struct rtattr*)skb->tail;
+
+	RTA_PUT(skb, IFLA_PROTINFO, 0, NULL);
+
+	/* return the device flags */
+	RTA_PUT(skb, IFLA_INET6_FLAGS, sizeof(__u32), &idev->if_flags);
+
+	/* return interface cacheinfo */
+	ci.max_reasm_len = IPV6_MAXPLEN;
+	ci.tstamp = (__u32)(TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) / HZ * 100
+		    + TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
+	ci.reachable_time = idev->nd_parms->reachable_time;
+	ci.retrans_time = idev->nd_parms->retrans_time;
+	RTA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci);
+	
+	/* return the device sysctl params */
+	if ((array = kmalloc(DEVCONF_MAX * sizeof(*array), GFP_ATOMIC)) == NULL)
+		goto rtattr_failure;
+	ipv6_store_devconf(&idev->cnf, array, DEVCONF_MAX * sizeof(*array));
+	RTA_PUT(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(*array), array);
+
+	/* XXX - Statistics/MC not implemented */
+	subattr->rta_len = skb->tail - (u8*)subattr;
+
+	nlh->nlmsg_len = skb->tail - b;
+	kfree(array);
+	return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+	if (array)
+		kfree(array);
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int idx, err;
+	int s_idx = cb->args[0];
+	struct net_device *dev;
+	struct inet6_dev *idev;
+
+	read_lock(&dev_base_lock);
+	for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
+		if (idx < s_idx)
+			continue;
+		if ((idev = in6_dev_get(dev)) == NULL)
+			continue;
+		err = inet6_fill_ifinfo(skb, idev, NETLINK_CB(cb->skb).pid, 
+				cb->nlh->nlmsg_seq, RTM_NEWLINK);
+		in6_dev_put(idev);
+		if (err <= 0)
+			break;
+	}
+	read_unlock(&dev_base_lock);
+	cb->args[0] = idx;
+
+	return skb->len;
+}
+
+void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
+{
+	struct sk_buff *skb;
+	/* 128 bytes ?? */
+	int size = NLMSG_SPACE(sizeof(struct ifinfomsg)+128);
+	
+	skb = alloc_skb(size, GFP_ATOMIC);
+	if (!skb) {
+		netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, ENOBUFS);
+		return;
+	}
+	if (inet6_fill_ifinfo(skb, idev, 0, 0, event) < 0) {
+		kfree_skb(skb);
+		netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, EINVAL);
+		return;
+	}
+	NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_IFINFO;
+	netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_IFINFO, GFP_ATOMIC);
+}
+
+static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev,
+			struct prefix_info *pinfo, u32 pid, u32 seq, int event)
+{
+	struct prefixmsg	*pmsg;
+	struct nlmsghdr 	*nlh;
+	unsigned char		*b = skb->tail;
+	struct prefix_cacheinfo	ci;
+
+	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*pmsg));
+	
+	if (pid) 
+		nlh->nlmsg_flags |= NLM_F_MULTI;
+	
+	pmsg = NLMSG_DATA(nlh);
+	pmsg->prefix_family = AF_INET6;
+	pmsg->prefix_ifindex = idev->dev->ifindex;
+	pmsg->prefix_len = pinfo->prefix_len;
+	pmsg->prefix_type = pinfo->type;
+	
+	pmsg->prefix_flags = 0;
+	if (pinfo->onlink)
+		pmsg->prefix_flags |= IF_PREFIX_ONLINK;
+	if (pinfo->autoconf)
+		pmsg->prefix_flags |= IF_PREFIX_AUTOCONF;
+
+	RTA_PUT(skb, PREFIX_ADDRESS, sizeof(pinfo->prefix), &pinfo->prefix);
+
+	ci.preferred_time = ntohl(pinfo->prefered);
+	ci.valid_time = ntohl(pinfo->valid);
+	RTA_PUT(skb, PREFIX_CACHEINFO, sizeof(ci), &ci);
+
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static void inet6_prefix_notify(int event, struct inet6_dev *idev, 
+			 struct prefix_info *pinfo)
+{
+	struct sk_buff *skb;
+	int size = NLMSG_SPACE(sizeof(struct prefixmsg)+128);
+
+	skb = alloc_skb(size, GFP_ATOMIC);
+	if (!skb) {
+		netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, ENOBUFS);
+		return;
+	}
+	if (inet6_fill_prefix(skb, idev, pinfo, 0, 0, event) < 0) {
+		kfree_skb(skb);
+		netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, EINVAL);
+		return;
+	}
+	NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_PREFIX;
+	netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_PREFIX, GFP_ATOMIC);
+}
+
+static struct rtnetlink_link inet6_rtnetlink_table[RTM_MAX - RTM_BASE + 1] = {
+	[RTM_GETLINK - RTM_BASE] = { .dumpit	= inet6_dump_ifinfo, },
+	[RTM_NEWADDR - RTM_BASE] = { .doit	= inet6_rtm_newaddr, },
+	[RTM_DELADDR - RTM_BASE] = { .doit	= inet6_rtm_deladdr, },
+	[RTM_GETADDR - RTM_BASE] = { .dumpit	= inet6_dump_ifaddr, },
+	[RTM_GETMULTICAST - RTM_BASE] = { .dumpit = inet6_dump_ifmcaddr, },
+	[RTM_GETANYCAST - RTM_BASE] = { .dumpit	= inet6_dump_ifacaddr, },
+	[RTM_NEWROUTE - RTM_BASE] = { .doit	= inet6_rtm_newroute, },
+	[RTM_DELROUTE - RTM_BASE] = { .doit	= inet6_rtm_delroute, },
+	[RTM_GETROUTE - RTM_BASE] = { .doit	= inet6_rtm_getroute,
+				      .dumpit	= inet6_dump_fib, },
+};
+
+static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
+{
+	inet6_ifa_notify(event ? : RTM_NEWADDR, ifp);
+
+	switch (event) {
+	case RTM_NEWADDR:
+		dst_hold(&ifp->rt->u.dst);
+		if (ip6_ins_rt(ifp->rt, NULL, NULL))
+			dst_release(&ifp->rt->u.dst);
+		if (ifp->idev->cnf.forwarding)
+			addrconf_join_anycast(ifp);
+		break;
+	case RTM_DELADDR:
+		if (ifp->idev->cnf.forwarding)
+			addrconf_leave_anycast(ifp);
+		addrconf_leave_solict(ifp->idev, &ifp->addr);
+		dst_hold(&ifp->rt->u.dst);
+		if (ip6_del_rt(ifp->rt, NULL, NULL))
+			dst_free(&ifp->rt->u.dst);
+		else
+			dst_release(&ifp->rt->u.dst);
+		break;
+	}
+}
+
+static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
+{
+	read_lock_bh(&addrconf_lock);
+	if (likely(ifp->idev->dead == 0))
+		__ipv6_ifa_notify(event, ifp);
+	read_unlock_bh(&addrconf_lock);
+}
+
+#ifdef CONFIG_SYSCTL
+
+static
+int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
+			   void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int *valp = ctl->data;
+	int val = *valp;
+	int ret;
+
+	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+
+	if (write && valp != &ipv6_devconf_dflt.forwarding) {
+		if (valp != &ipv6_devconf.forwarding) {
+			if ((!*valp) ^ (!val)) {
+				struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1;
+				if (idev == NULL)
+					return ret;
+				dev_forward_change(idev);
+			}
+		} else {
+			ipv6_devconf_dflt.forwarding = ipv6_devconf.forwarding;
+			addrconf_forward_change();
+		}
+		if (*valp)
+			rt6_purge_dflt_routers();
+	}
+
+        return ret;
+}
+
+static int addrconf_sysctl_forward_strategy(ctl_table *table, 
+					    int __user *name, int nlen,
+					    void __user *oldval,
+					    size_t __user *oldlenp,
+					    void __user *newval, size_t newlen,
+					    void **context)
+{
+	int *valp = table->data;
+	int new;
+
+	if (!newval || !newlen)
+		return 0;
+	if (newlen != sizeof(int))
+		return -EINVAL;
+	if (get_user(new, (int __user *)newval))
+		return -EFAULT;
+	if (new == *valp)
+		return 0;
+	if (oldval && oldlenp) {
+		size_t len;
+		if (get_user(len, oldlenp))
+			return -EFAULT;
+		if (len) {
+			if (len > table->maxlen)
+				len = table->maxlen;
+			if (copy_to_user(oldval, valp, len))
+				return -EFAULT;
+			if (put_user(len, oldlenp))
+				return -EFAULT;
+		}
+	}
+
+	if (valp != &ipv6_devconf_dflt.forwarding) {
+		if (valp != &ipv6_devconf.forwarding) {
+			struct inet6_dev *idev = (struct inet6_dev *)table->extra1;
+			int changed;
+			if (unlikely(idev == NULL))
+				return -ENODEV;
+			changed = (!*valp) ^ (!new);
+			*valp = new;
+			if (changed)
+				dev_forward_change(idev);
+		} else {
+			*valp = new;
+			addrconf_forward_change();
+		}
+
+		if (*valp)
+			rt6_purge_dflt_routers();
+	} else
+		*valp = new;
+
+	return 1;
+}
+
+static struct addrconf_sysctl_table
+{
+	struct ctl_table_header *sysctl_header;
+	ctl_table addrconf_vars[__NET_IPV6_MAX];
+	ctl_table addrconf_dev[2];
+	ctl_table addrconf_conf_dir[2];
+	ctl_table addrconf_proto_dir[2];
+	ctl_table addrconf_root_dir[2];
+} addrconf_sysctl = {
+	.sysctl_header = NULL,
+	.addrconf_vars = {
+        	{
+			.ctl_name	=	NET_IPV6_FORWARDING,
+			.procname	=	"forwarding",
+         		.data		=	&ipv6_devconf.forwarding,
+			.maxlen		=	sizeof(int),
+			.mode		=	0644,
+         		.proc_handler	=	&addrconf_sysctl_forward,
+			.strategy	=	&addrconf_sysctl_forward_strategy,
+		},
+		{
+			.ctl_name	=	NET_IPV6_HOP_LIMIT,
+			.procname	=	"hop_limit",
+         		.data		=	&ipv6_devconf.hop_limit,
+			.maxlen		=	sizeof(int),
+			.mode		=	0644,
+			.proc_handler	=	proc_dointvec,
+		},
+		{
+			.ctl_name	=	NET_IPV6_MTU,
+			.procname	=	"mtu",
+			.data		=	&ipv6_devconf.mtu6,
+         		.maxlen		=	sizeof(int),
+			.mode		=	0644,
+         		.proc_handler	=	&proc_dointvec,
+		},
+		{
+			.ctl_name	=	NET_IPV6_ACCEPT_RA,
+			.procname	=	"accept_ra",
+         		.data		=	&ipv6_devconf.accept_ra,
+			.maxlen		=	sizeof(int),
+			.mode		=	0644,
+         		.proc_handler	=	&proc_dointvec,
+		},
+		{
+			.ctl_name	=	NET_IPV6_ACCEPT_REDIRECTS,
+			.procname	=	"accept_redirects",
+         		.data		=	&ipv6_devconf.accept_redirects,
+			.maxlen		=	sizeof(int),
+			.mode		=	0644,
+         		.proc_handler	=	&proc_dointvec,
+		},
+		{
+			.ctl_name	=	NET_IPV6_AUTOCONF,
+			.procname	=	"autoconf",
+         		.data		=	&ipv6_devconf.autoconf,
+			.maxlen		=	sizeof(int),
+			.mode		=	0644,
+         		.proc_handler	=	&proc_dointvec,
+		},
+		{
+			.ctl_name	=	NET_IPV6_DAD_TRANSMITS,
+			.procname	=	"dad_transmits",
+         		.data		=	&ipv6_devconf.dad_transmits,
+			.maxlen		=	sizeof(int),
+			.mode		=	0644,
+         		.proc_handler	=	&proc_dointvec,
+		},
+		{
+			.ctl_name	=	NET_IPV6_RTR_SOLICITS,
+			.procname	=	"router_solicitations",
+         		.data		=	&ipv6_devconf.rtr_solicits,
+			.maxlen		=	sizeof(int),
+			.mode		=	0644,
+         		.proc_handler	=	&proc_dointvec,
+		},
+		{
+			.ctl_name	=	NET_IPV6_RTR_SOLICIT_INTERVAL,
+			.procname	=	"router_solicitation_interval",
+         		.data		=	&ipv6_devconf.rtr_solicit_interval,
+			.maxlen		=	sizeof(int),
+			.mode		=	0644,
+         		.proc_handler	=	&proc_dointvec_jiffies,
+			.strategy	=	&sysctl_jiffies,
+		},
+		{
+			.ctl_name	=	NET_IPV6_RTR_SOLICIT_DELAY,
+			.procname	=	"router_solicitation_delay",
+         		.data		=	&ipv6_devconf.rtr_solicit_delay,
+			.maxlen		=	sizeof(int),
+			.mode		=	0644,
+         		.proc_handler	=	&proc_dointvec_jiffies,
+			.strategy	=	&sysctl_jiffies,
+		},
+		{
+			.ctl_name	=	NET_IPV6_FORCE_MLD_VERSION,
+			.procname	=	"force_mld_version",
+         		.data		=	&ipv6_devconf.force_mld_version,
+			.maxlen		=	sizeof(int),
+			.mode		=	0644,
+         		.proc_handler	=	&proc_dointvec,
+		},
+#ifdef CONFIG_IPV6_PRIVACY
+		{
+			.ctl_name	=	NET_IPV6_USE_TEMPADDR,
+			.procname	=	"use_tempaddr",
+	 		.data		=	&ipv6_devconf.use_tempaddr,
+			.maxlen		=	sizeof(int),
+			.mode		=	0644,
+	 		.proc_handler	=	&proc_dointvec,
+		},
+		{
+			.ctl_name	=	NET_IPV6_TEMP_VALID_LFT,
+			.procname	=	"temp_valid_lft",
+	 		.data		=	&ipv6_devconf.temp_valid_lft,
+			.maxlen		=	sizeof(int),
+			.mode		=	0644,
+	 		.proc_handler	=	&proc_dointvec,
+		},
+		{
+			.ctl_name	=	NET_IPV6_TEMP_PREFERED_LFT,
+			.procname	=	"temp_prefered_lft",
+	 		.data		=	&ipv6_devconf.temp_prefered_lft,
+			.maxlen		=	sizeof(int),
+			.mode		=	0644,
+	 		.proc_handler	=	&proc_dointvec,
+		},
+		{
+			.ctl_name	=	NET_IPV6_REGEN_MAX_RETRY,
+			.procname	=	"regen_max_retry",
+	 		.data		=	&ipv6_devconf.regen_max_retry,
+			.maxlen		=	sizeof(int),
+			.mode		=	0644,
+	 		.proc_handler	=	&proc_dointvec,
+		},
+		{
+			.ctl_name	=	NET_IPV6_MAX_DESYNC_FACTOR,
+			.procname	=	"max_desync_factor",
+	 		.data		=	&ipv6_devconf.max_desync_factor,
+			.maxlen		=	sizeof(int),
+			.mode		=	0644,
+	 		.proc_handler	=	&proc_dointvec,
+		},
+#endif
+		{
+			.ctl_name	=	NET_IPV6_MAX_ADDRESSES,
+			.procname	=	"max_addresses",
+			.data		=	&ipv6_devconf.max_addresses,
+			.maxlen		=	sizeof(int),
+			.mode		=	0644,
+			.proc_handler	=	&proc_dointvec,
+		},
+		{
+			.ctl_name	=	0,	/* sentinel */
+		}
+	},
+	.addrconf_dev = {
+		{
+			.ctl_name	=	NET_PROTO_CONF_ALL,
+			.procname	=	"all",
+			.mode		=	0555,
+			.child		=	addrconf_sysctl.addrconf_vars,
+		},
+		{
+			.ctl_name	=	0,	/* sentinel */
+		}
+	},
+	.addrconf_conf_dir = {
+		{
+			.ctl_name	=	NET_IPV6_CONF,
+			.procname	=	"conf",
+			.mode		=	0555,
+			.child		=	addrconf_sysctl.addrconf_dev,
+		},
+		{
+			.ctl_name	=	0,	/* sentinel */
+		}
+	},
+	.addrconf_proto_dir = {
+		{
+			.ctl_name	=	NET_IPV6,
+			.procname	=	"ipv6",
+			.mode		=	0555,
+			.child		=	addrconf_sysctl.addrconf_conf_dir,
+		},
+		{
+			.ctl_name	=	0,	/* sentinel */
+		}
+	},
+	.addrconf_root_dir = {
+		{
+			.ctl_name	=	CTL_NET,
+			.procname	=	"net",
+			.mode		=	0555,
+			.child		=	addrconf_sysctl.addrconf_proto_dir,
+		},
+		{
+			.ctl_name	=	0,	/* sentinel */
+		}
+	},
+};
+
+static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p)
+{
+	int i;
+	struct net_device *dev = idev ? idev->dev : NULL;
+	struct addrconf_sysctl_table *t;
+	char *dev_name = NULL;
+
+	t = kmalloc(sizeof(*t), GFP_KERNEL);
+	if (t == NULL)
+		return;
+	memcpy(t, &addrconf_sysctl, sizeof(*t));
+	for (i=0; t->addrconf_vars[i].data; i++) {
+		t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf;
+		t->addrconf_vars[i].de = NULL;
+		t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */
+	}
+	if (dev) {
+		dev_name = dev->name; 
+		t->addrconf_dev[0].ctl_name = dev->ifindex;
+	} else {
+		dev_name = "default";
+		t->addrconf_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
+	}
+
+	/* 
+	 * Make a copy of dev_name, because '.procname' is regarded as const 
+	 * by sysctl and we wouldn't want anyone to change it under our feet
+	 * (see SIOCSIFNAME).
+	 */	
+	dev_name = net_sysctl_strdup(dev_name);
+	if (!dev_name)
+	    goto free;
+
+	t->addrconf_dev[0].procname = dev_name;
+
+	t->addrconf_dev[0].child = t->addrconf_vars;
+	t->addrconf_dev[0].de = NULL;
+	t->addrconf_conf_dir[0].child = t->addrconf_dev;
+	t->addrconf_conf_dir[0].de = NULL;
+	t->addrconf_proto_dir[0].child = t->addrconf_conf_dir;
+	t->addrconf_proto_dir[0].de = NULL;
+	t->addrconf_root_dir[0].child = t->addrconf_proto_dir;
+	t->addrconf_root_dir[0].de = NULL;
+
+	t->sysctl_header = register_sysctl_table(t->addrconf_root_dir, 0);
+	if (t->sysctl_header == NULL)
+		goto free_procname;
+	else
+		p->sysctl = t;
+	return;
+
+	/* error path */
+ free_procname:
+	kfree(dev_name);
+ free:
+	kfree(t);
+
+	return;
+}
+
+static void addrconf_sysctl_unregister(struct ipv6_devconf *p)
+{
+	if (p->sysctl) {
+		struct addrconf_sysctl_table *t = p->sysctl;
+		p->sysctl = NULL;
+		unregister_sysctl_table(t->sysctl_header);
+		kfree(t->addrconf_dev[0].procname);
+		kfree(t);
+	}
+}
+
+
+#endif
+
+/*
+ *      Device notifier
+ */
+
+int register_inet6addr_notifier(struct notifier_block *nb)
+{
+        return notifier_chain_register(&inet6addr_chain, nb);
+}
+
+int unregister_inet6addr_notifier(struct notifier_block *nb)
+{
+        return notifier_chain_unregister(&inet6addr_chain,nb);
+}
+
+/*
+ *	Init / cleanup code
+ */
+
+int __init addrconf_init(void)
+{
+	int err = 0;
+
+	/* The addrconf netdev notifier requires that loopback_dev
+	 * has it's ipv6 private information allocated and setup
+	 * before it can bring up and give link-local addresses
+	 * to other devices which are up.
+	 *
+	 * Unfortunately, loopback_dev is not necessarily the first
+	 * entry in the global dev_base list of net devices.  In fact,
+	 * it is likely to be the very last entry on that list.
+	 * So this causes the notifier registry below to try and
+	 * give link-local addresses to all devices besides loopback_dev
+	 * first, then loopback_dev, which cases all the non-loopback_dev
+	 * devices to fail to get a link-local address.
+	 *
+	 * So, as a temporary fix, allocate the ipv6 structure for
+	 * loopback_dev first by hand.
+	 * Longer term, all of the dependencies ipv6 has upon the loopback
+	 * device and it being up should be removed.
+	 */
+	rtnl_lock();
+	if (!ipv6_add_dev(&loopback_dev))
+		err = -ENOMEM;
+	rtnl_unlock();
+	if (err)
+		return err;
+
+	register_netdevice_notifier(&ipv6_dev_notf);
+
+#ifdef CONFIG_IPV6_PRIVACY
+	md5_tfm = crypto_alloc_tfm("md5", 0);
+	if (unlikely(md5_tfm == NULL))
+		printk(KERN_WARNING
+			"failed to load transform for md5\n");
+#endif
+
+	addrconf_verify(0);
+	rtnetlink_links[PF_INET6] = inet6_rtnetlink_table;
+#ifdef CONFIG_SYSCTL
+	addrconf_sysctl.sysctl_header =
+		register_sysctl_table(addrconf_sysctl.addrconf_root_dir, 0);
+	addrconf_sysctl_register(NULL, &ipv6_devconf_dflt);
+#endif
+
+	return 0;
+}
+
+void __exit addrconf_cleanup(void)
+{
+ 	struct net_device *dev;
+ 	struct inet6_dev *idev;
+ 	struct inet6_ifaddr *ifa;
+	int i;
+
+	unregister_netdevice_notifier(&ipv6_dev_notf);
+
+	rtnetlink_links[PF_INET6] = NULL;
+#ifdef CONFIG_SYSCTL
+	addrconf_sysctl_unregister(&ipv6_devconf_dflt);
+	addrconf_sysctl_unregister(&ipv6_devconf);
+#endif
+
+	rtnl_lock();
+
+	/*
+	 *	clean dev list.
+	 */
+
+	for (dev=dev_base; dev; dev=dev->next) {
+		if ((idev = __in6_dev_get(dev)) == NULL)
+			continue;
+		addrconf_ifdown(dev, 1);
+	}
+	addrconf_ifdown(&loopback_dev, 2);
+
+	/*
+	 *	Check hash table.
+	 */
+
+	write_lock_bh(&addrconf_hash_lock);
+	for (i=0; i < IN6_ADDR_HSIZE; i++) {
+		for (ifa=inet6_addr_lst[i]; ifa; ) {
+			struct inet6_ifaddr *bifa;
+
+			bifa = ifa;
+			ifa = ifa->lst_next;
+			printk(KERN_DEBUG "bug: IPv6 address leakage detected: ifa=%p\n", bifa);
+			/* Do not free it; something is wrong.
+			   Now we can investigate it with debugger.
+			 */
+		}
+	}
+	write_unlock_bh(&addrconf_hash_lock);
+
+	del_timer(&addr_chk_timer);
+
+	rtnl_unlock();
+
+#ifdef CONFIG_IPV6_PRIVACY
+	if (likely(md5_tfm != NULL)) {
+		crypto_free_tfm(md5_tfm);
+		md5_tfm = NULL;
+	}
+#endif
+
+#ifdef CONFIG_PROC_FS
+	proc_net_remove("if_inet6");
+#endif
+}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
new file mode 100644
index 00000000000..768b11703da
--- /dev/null
+++ b/net/ipv6/af_inet6.c
@@ -0,0 +1,867 @@
+/*
+ *	PF_INET6 socket protocol family
+ *	Linux INET6 implementation 
+ *
+ *	Authors:
+ *	Pedro Roque		<roque@di.fc.ul.pt>	
+ *
+ *	Adapted from linux/net/ipv4/af_inet.c
+ *
+ *	$Id: af_inet6.c,v 1.66 2002/02/01 22:01:04 davem Exp $
+ *
+ * 	Fixes:
+ *	piggy, Karl Knutson	:	Socket protocol table
+ * 	Hideaki YOSHIFUJI	:	sin6_scope_id support
+ * 	Arnaldo Melo		: 	check proc_net_create return, cleanups
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+#include <linux/init.h>
+
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/icmpv6.h>
+#include <linux/smp_lock.h>
+
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/udp.h>
+#include <net/tcp.h>
+#include <net/ipip.h>
+#include <net/protocol.h>
+#include <net/inet_common.h>
+#include <net/transp_v6.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#ifdef CONFIG_IPV6_TUNNEL
+#include <net/ip6_tunnel.h>
+#endif
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+MODULE_AUTHOR("Cast of dozens");
+MODULE_DESCRIPTION("IPv6 protocol stack for Linux");
+MODULE_LICENSE("GPL");
+
+/* IPv6 procfs goodies... */
+
+#ifdef CONFIG_PROC_FS
+extern int raw6_proc_init(void);
+extern void raw6_proc_exit(void);
+extern int tcp6_proc_init(void);
+extern void tcp6_proc_exit(void);
+extern int udp6_proc_init(void);
+extern void udp6_proc_exit(void);
+extern int ipv6_misc_proc_init(void);
+extern void ipv6_misc_proc_exit(void);
+extern int ac6_proc_init(void);
+extern void ac6_proc_exit(void);
+extern int if6_proc_init(void);
+extern void if6_proc_exit(void);
+#endif
+
+int sysctl_ipv6_bindv6only;
+
+#ifdef INET_REFCNT_DEBUG
+atomic_t inet6_sock_nr;
+#endif
+
+/* The inetsw table contains everything that inet_create needs to
+ * build a new socket.
+ */
+static struct list_head inetsw6[SOCK_MAX];
+static DEFINE_SPINLOCK(inetsw6_lock);
+
+static void inet6_sock_destruct(struct sock *sk)
+{
+	inet_sock_destruct(sk);
+
+#ifdef INET_REFCNT_DEBUG
+	atomic_dec(&inet6_sock_nr);
+#endif
+}
+
+static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
+{
+	const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo);
+
+	return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
+}
+
+static int inet6_create(struct socket *sock, int protocol)
+{
+	struct inet_sock *inet;
+	struct ipv6_pinfo *np;
+	struct sock *sk;
+	struct list_head *p;
+	struct inet_protosw *answer;
+	struct proto *answer_prot;
+	unsigned char answer_flags;
+	char answer_no_check;
+	int rc;
+
+	/* Look for the requested type/protocol pair. */
+	answer = NULL;
+	rcu_read_lock();
+	list_for_each_rcu(p, &inetsw6[sock->type]) {
+		answer = list_entry(p, struct inet_protosw, list);
+
+		/* Check the non-wild match. */
+		if (protocol == answer->protocol) {
+			if (protocol != IPPROTO_IP)
+				break;
+		} else {
+			/* Check for the two wild cases. */
+			if (IPPROTO_IP == protocol) {
+				protocol = answer->protocol;
+				break;
+			}
+			if (IPPROTO_IP == answer->protocol)
+				break;
+		}
+		answer = NULL;
+	}
+
+	rc = -ESOCKTNOSUPPORT;
+	if (!answer)
+		goto out_rcu_unlock;
+	rc = -EPERM;
+	if (answer->capability > 0 && !capable(answer->capability))
+		goto out_rcu_unlock;
+	rc = -EPROTONOSUPPORT;
+	if (!protocol)
+		goto out_rcu_unlock;
+
+	sock->ops = answer->ops;
+
+	answer_prot = answer->prot;
+	answer_no_check = answer->no_check;
+	answer_flags = answer->flags;
+	rcu_read_unlock();
+
+	BUG_TRAP(answer_prot->slab != NULL);
+
+	rc = -ENOBUFS;
+	sk = sk_alloc(PF_INET6, GFP_KERNEL, answer_prot, 1);
+	if (sk == NULL)
+		goto out;
+
+	sock_init_data(sock, sk);
+
+	rc = 0;
+	sk->sk_no_check = answer_no_check;
+	if (INET_PROTOSW_REUSE & answer_flags)
+		sk->sk_reuse = 1;
+
+	inet = inet_sk(sk);
+
+	if (SOCK_RAW == sock->type) {
+		inet->num = protocol;
+		if (IPPROTO_RAW == protocol)
+			inet->hdrincl = 1;
+	}
+
+	sk->sk_destruct		= inet6_sock_destruct;
+	sk->sk_family		= PF_INET6;
+	sk->sk_protocol		= protocol;
+
+	sk->sk_backlog_rcv	= answer->prot->backlog_rcv;
+
+	inet_sk(sk)->pinet6 = np = inet6_sk_generic(sk);
+	np->hop_limit	= -1;
+	np->mcast_hops	= -1;
+	np->mc_loop	= 1;
+	np->pmtudisc	= IPV6_PMTUDISC_WANT;
+	np->ipv6only	= sysctl_ipv6_bindv6only;
+	
+	/* Init the ipv4 part of the socket since we can have sockets
+	 * using v6 API for ipv4.
+	 */
+	inet->uc_ttl	= -1;
+
+	inet->mc_loop	= 1;
+	inet->mc_ttl	= 1;
+	inet->mc_index	= 0;
+	inet->mc_list	= NULL;
+
+	if (ipv4_config.no_pmtu_disc)
+		inet->pmtudisc = IP_PMTUDISC_DONT;
+	else
+		inet->pmtudisc = IP_PMTUDISC_WANT;
+
+
+#ifdef INET_REFCNT_DEBUG
+	atomic_inc(&inet6_sock_nr);
+	atomic_inc(&inet_sock_nr);
+#endif
+	if (inet->num) {
+		/* It assumes that any protocol which allows
+		 * the user to assign a number at socket
+		 * creation time automatically shares.
+		 */
+		inet->sport = ntohs(inet->num);
+		sk->sk_prot->hash(sk);
+	}
+	if (sk->sk_prot->init) {
+		rc = sk->sk_prot->init(sk);
+		if (rc) {
+			sk_common_release(sk);
+			goto out;
+		}
+	}
+out:
+	return rc;
+out_rcu_unlock:
+	rcu_read_unlock();
+	goto out;
+}
+
+
+/* bind for INET6 API */
+int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+{
+	struct sockaddr_in6 *addr=(struct sockaddr_in6 *)uaddr;
+	struct sock *sk = sock->sk;
+	struct inet_sock *inet = inet_sk(sk);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	__u32 v4addr = 0;
+	unsigned short snum;
+	int addr_type = 0;
+	int err = 0;
+
+	/* If the socket has its own bind function then use it. */
+	if (sk->sk_prot->bind)
+		return sk->sk_prot->bind(sk, uaddr, addr_len);
+
+	if (addr_len < SIN6_LEN_RFC2133)
+		return -EINVAL;
+	addr_type = ipv6_addr_type(&addr->sin6_addr);
+	if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM)
+		return -EINVAL;
+
+	snum = ntohs(addr->sin6_port);
+	if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
+		return -EACCES;
+
+	lock_sock(sk);
+
+	/* Check these errors (active socket, double bind). */
+	if (sk->sk_state != TCP_CLOSE || inet->num) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Check if the address belongs to the host. */
+	if (addr_type == IPV6_ADDR_MAPPED) {
+		v4addr = addr->sin6_addr.s6_addr32[3];
+		if (inet_addr_type(v4addr) != RTN_LOCAL) {
+			err = -EADDRNOTAVAIL;
+			goto out;
+		}
+	} else {
+		if (addr_type != IPV6_ADDR_ANY) {
+			struct net_device *dev = NULL;
+
+			if (addr_type & IPV6_ADDR_LINKLOCAL) {
+				if (addr_len >= sizeof(struct sockaddr_in6) &&
+				    addr->sin6_scope_id) {
+					/* Override any existing binding, if another one
+					 * is supplied by user.
+					 */
+					sk->sk_bound_dev_if = addr->sin6_scope_id;
+				}
+				
+				/* Binding to link-local address requires an interface */
+				if (!sk->sk_bound_dev_if) {
+					err = -EINVAL;
+					goto out;
+				}
+				dev = dev_get_by_index(sk->sk_bound_dev_if);
+				if (!dev) {
+					err = -ENODEV;
+					goto out;
+				}
+			}
+
+			/* ipv4 addr of the socket is invalid.  Only the
+			 * unspecified and mapped address have a v4 equivalent.
+			 */
+			v4addr = LOOPBACK4_IPV6;
+			if (!(addr_type & IPV6_ADDR_MULTICAST))	{
+				if (!ipv6_chk_addr(&addr->sin6_addr, dev, 0)) {
+					if (dev)
+						dev_put(dev);
+					err = -EADDRNOTAVAIL;
+					goto out;
+				}
+			}
+			if (dev)
+				dev_put(dev);
+		}
+	}
+
+	inet->rcv_saddr = v4addr;
+	inet->saddr = v4addr;
+
+	ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr);
+		
+	if (!(addr_type & IPV6_ADDR_MULTICAST))
+		ipv6_addr_copy(&np->saddr, &addr->sin6_addr);
+
+	/* Make sure we are allowed to bind here. */
+	if (sk->sk_prot->get_port(sk, snum)) {
+		inet_reset_saddr(sk);
+		err = -EADDRINUSE;
+		goto out;
+	}
+
+	if (addr_type != IPV6_ADDR_ANY)
+		sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
+	if (snum)
+		sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
+	inet->sport = ntohs(inet->num);
+	inet->dport = 0;
+	inet->daddr = 0;
+out:
+	release_sock(sk);
+	return err;
+}
+
+int inet6_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+
+	if (sk == NULL)
+		return -EINVAL;
+
+	/* Free mc lists */
+	ipv6_sock_mc_close(sk);
+
+	/* Free ac lists */
+	ipv6_sock_ac_close(sk);
+
+	return inet_release(sock);
+}
+
+int inet6_destroy_sock(struct sock *sk)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct sk_buff *skb;
+	struct ipv6_txoptions *opt;
+
+	/*
+	 *	Release destination entry
+	 */
+
+	sk_dst_reset(sk);
+
+	/* Release rx options */
+
+	if ((skb = xchg(&np->pktoptions, NULL)) != NULL)
+		kfree_skb(skb);
+
+	/* Free flowlabels */
+	fl6_free_socklist(sk);
+
+	/* Free tx options */
+
+	if ((opt = xchg(&np->opt, NULL)) != NULL)
+		sock_kfree_s(sk, opt, opt->tot_len);
+
+	return 0;
+}
+
+/*
+ *	This does both peername and sockname.
+ */
+ 
+int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
+		 int *uaddr_len, int peer)
+{
+	struct sockaddr_in6 *sin=(struct sockaddr_in6 *)uaddr;
+	struct sock *sk = sock->sk;
+	struct inet_sock *inet = inet_sk(sk);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+  
+	sin->sin6_family = AF_INET6;
+	sin->sin6_flowinfo = 0;
+	sin->sin6_scope_id = 0;
+	if (peer) {
+		if (!inet->dport)
+			return -ENOTCONN;
+		if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) &&
+		    peer == 1)
+			return -ENOTCONN;
+		sin->sin6_port = inet->dport;
+		ipv6_addr_copy(&sin->sin6_addr, &np->daddr);
+		if (np->sndflow)
+			sin->sin6_flowinfo = np->flow_label;
+	} else {
+		if (ipv6_addr_any(&np->rcv_saddr))
+			ipv6_addr_copy(&sin->sin6_addr, &np->saddr);
+		else
+			ipv6_addr_copy(&sin->sin6_addr, &np->rcv_saddr);
+
+		sin->sin6_port = inet->sport;
+	}
+	if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
+		sin->sin6_scope_id = sk->sk_bound_dev_if;
+	*uaddr_len = sizeof(*sin);
+	return(0);
+}
+
+int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	struct sock *sk = sock->sk;
+	int err = -EINVAL;
+
+	switch(cmd) 
+	{
+	case SIOCGSTAMP:
+		return sock_get_timestamp(sk, (struct timeval __user *)arg);
+
+	case SIOCADDRT:
+	case SIOCDELRT:
+	  
+		return(ipv6_route_ioctl(cmd,(void __user *)arg));
+
+	case SIOCSIFADDR:
+		return addrconf_add_ifaddr((void __user *) arg);
+	case SIOCDIFADDR:
+		return addrconf_del_ifaddr((void __user *) arg);
+	case SIOCSIFDSTADDR:
+		return addrconf_set_dstaddr((void __user *) arg);
+	default:
+		if (!sk->sk_prot->ioctl ||
+		    (err = sk->sk_prot->ioctl(sk, cmd, arg)) == -ENOIOCTLCMD)
+			return(dev_ioctl(cmd,(void __user *) arg));		
+		return err;
+	}
+	/*NOTREACHED*/
+	return(0);
+}
+
+struct proto_ops inet6_stream_ops = {
+	.family =	PF_INET6,
+	.owner =	THIS_MODULE,
+	.release =	inet6_release,
+	.bind =		inet6_bind,
+	.connect =	inet_stream_connect,		/* ok		*/
+	.socketpair =	sock_no_socketpair,		/* a do nothing	*/
+	.accept =	inet_accept,			/* ok		*/
+	.getname =	inet6_getname, 
+	.poll =		tcp_poll,			/* ok		*/
+	.ioctl =	inet6_ioctl,			/* must change  */
+	.listen =	inet_listen,			/* ok		*/
+	.shutdown =	inet_shutdown,			/* ok		*/
+	.setsockopt =	sock_common_setsockopt,		/* ok		*/
+	.getsockopt =	sock_common_getsockopt,		/* ok		*/
+	.sendmsg =	inet_sendmsg,			/* ok		*/
+	.recvmsg =	sock_common_recvmsg,		/* ok		*/
+	.mmap =		sock_no_mmap,
+	.sendpage =	tcp_sendpage
+};
+
+struct proto_ops inet6_dgram_ops = {
+	.family =	PF_INET6,
+	.owner =	THIS_MODULE,
+	.release =	inet6_release,
+	.bind =		inet6_bind,
+	.connect =	inet_dgram_connect,		/* ok		*/
+	.socketpair =	sock_no_socketpair,		/* a do nothing	*/
+	.accept =	sock_no_accept,			/* a do nothing	*/
+	.getname =	inet6_getname, 
+	.poll =		udp_poll,			/* ok		*/
+	.ioctl =	inet6_ioctl,			/* must change  */
+	.listen =	sock_no_listen,			/* ok		*/
+	.shutdown =	inet_shutdown,			/* ok		*/
+	.setsockopt =	sock_common_setsockopt,		/* ok		*/
+	.getsockopt =	sock_common_getsockopt,		/* ok		*/
+	.sendmsg =	inet_sendmsg,			/* ok		*/
+	.recvmsg =	sock_common_recvmsg,		/* ok		*/
+	.mmap =		sock_no_mmap,
+	.sendpage =	sock_no_sendpage,
+};
+
+static struct net_proto_family inet6_family_ops = {
+	.family = PF_INET6,
+	.create = inet6_create,
+	.owner	= THIS_MODULE,
+};
+
+#ifdef CONFIG_SYSCTL
+extern void ipv6_sysctl_register(void);
+extern void ipv6_sysctl_unregister(void);
+#endif
+
+/* Same as inet6_dgram_ops, sans udp_poll.  */
+static struct proto_ops inet6_sockraw_ops = {
+	.family =	PF_INET6,
+	.owner =	THIS_MODULE,
+	.release =	inet6_release,
+	.bind =		inet6_bind,
+	.connect =	inet_dgram_connect,		/* ok		*/
+	.socketpair =	sock_no_socketpair,		/* a do nothing	*/
+	.accept =	sock_no_accept,			/* a do nothing	*/
+	.getname =	inet6_getname, 
+	.poll =		datagram_poll,			/* ok		*/
+	.ioctl =	inet6_ioctl,			/* must change  */
+	.listen =	sock_no_listen,			/* ok		*/
+	.shutdown =	inet_shutdown,			/* ok		*/
+	.setsockopt =	sock_common_setsockopt,		/* ok		*/
+	.getsockopt =	sock_common_getsockopt,		/* ok		*/
+	.sendmsg =	inet_sendmsg,			/* ok		*/
+	.recvmsg =	sock_common_recvmsg,		/* ok		*/
+	.mmap =		sock_no_mmap,
+	.sendpage =	sock_no_sendpage,
+};
+
+static struct inet_protosw rawv6_protosw = {
+	.type		= SOCK_RAW,
+	.protocol	= IPPROTO_IP,	/* wild card */
+	.prot		= &rawv6_prot,
+	.ops		= &inet6_sockraw_ops,
+	.capability	= CAP_NET_RAW,
+	.no_check	= UDP_CSUM_DEFAULT,
+	.flags		= INET_PROTOSW_REUSE,
+};
+
+void
+inet6_register_protosw(struct inet_protosw *p)
+{
+	struct list_head *lh;
+	struct inet_protosw *answer;
+	int protocol = p->protocol;
+	struct list_head *last_perm;
+
+	spin_lock_bh(&inetsw6_lock);
+
+	if (p->type >= SOCK_MAX)
+		goto out_illegal;
+
+	/* If we are trying to override a permanent protocol, bail. */
+	answer = NULL;
+	last_perm = &inetsw6[p->type];
+	list_for_each(lh, &inetsw6[p->type]) {
+		answer = list_entry(lh, struct inet_protosw, list);
+
+		/* Check only the non-wild match. */
+		if (INET_PROTOSW_PERMANENT & answer->flags) {
+			if (protocol == answer->protocol)
+				break;
+			last_perm = lh;
+		}
+
+		answer = NULL;
+	}
+	if (answer)
+		goto out_permanent;
+
+	/* Add the new entry after the last permanent entry if any, so that
+	 * the new entry does not override a permanent entry when matched with
+	 * a wild-card protocol. But it is allowed to override any existing
+	 * non-permanent entry.  This means that when we remove this entry, the 
+	 * system automatically returns to the old behavior.
+	 */
+	list_add_rcu(&p->list, last_perm);
+out:
+	spin_unlock_bh(&inetsw6_lock);
+	return;
+
+out_permanent:
+	printk(KERN_ERR "Attempt to override permanent protocol %d.\n",
+	       protocol);
+	goto out;
+
+out_illegal:
+	printk(KERN_ERR
+	       "Ignoring attempt to register invalid socket type %d.\n",
+	       p->type);
+	goto out;
+}
+
+void
+inet6_unregister_protosw(struct inet_protosw *p)
+{
+	if (INET_PROTOSW_PERMANENT & p->flags) {
+		printk(KERN_ERR
+		       "Attempt to unregister permanent protocol %d.\n",
+		       p->protocol);
+	} else {
+		spin_lock_bh(&inetsw6_lock);
+		list_del_rcu(&p->list);
+		spin_unlock_bh(&inetsw6_lock);
+
+		synchronize_net();
+	}
+}
+
+int
+snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign)
+{
+	if (ptr == NULL)
+		return -EINVAL;
+
+	ptr[0] = __alloc_percpu(mibsize, mibalign);
+	if (!ptr[0])
+		goto err0;
+
+	ptr[1] = __alloc_percpu(mibsize, mibalign);
+	if (!ptr[1])
+		goto err1;
+
+	return 0;
+
+err1:
+	free_percpu(ptr[0]);
+	ptr[0] = NULL;
+err0:
+	return -ENOMEM;
+}
+
+void
+snmp6_mib_free(void *ptr[2])
+{
+	if (ptr == NULL)
+		return;
+	if (ptr[0])
+		free_percpu(ptr[0]);
+	if (ptr[1])
+		free_percpu(ptr[1]);
+	ptr[0] = ptr[1] = NULL;
+}
+
+static int __init init_ipv6_mibs(void)
+{
+	if (snmp6_mib_init((void **)ipv6_statistics, sizeof (struct ipstats_mib),
+			   __alignof__(struct ipstats_mib)) < 0)
+		goto err_ip_mib;
+	if (snmp6_mib_init((void **)icmpv6_statistics, sizeof (struct icmpv6_mib),
+			   __alignof__(struct icmpv6_mib)) < 0)
+		goto err_icmp_mib;
+	if (snmp6_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib),
+			   __alignof__(struct udp_mib)) < 0)
+		goto err_udp_mib;
+	return 0;
+
+err_udp_mib:
+	snmp6_mib_free((void **)icmpv6_statistics);
+err_icmp_mib:
+	snmp6_mib_free((void **)ipv6_statistics);
+err_ip_mib:
+	return -ENOMEM;
+	
+}
+
+static void cleanup_ipv6_mibs(void)
+{
+	snmp6_mib_free((void **)ipv6_statistics);
+	snmp6_mib_free((void **)icmpv6_statistics);
+	snmp6_mib_free((void **)udp_stats_in6);
+}
+
+extern int ipv6_misc_proc_init(void);
+
+static int __init inet6_init(void)
+{
+	struct sk_buff *dummy_skb;
+        struct list_head *r;
+	int err;
+
+#ifdef MODULE
+#if 0 /* FIXME --RR */
+	if (!mod_member_present(&__this_module, can_unload))
+	  return -EINVAL;
+
+	__this_module.can_unload = &ipv6_unload;
+#endif
+#endif
+
+	if (sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb)) {
+		printk(KERN_CRIT "inet6_proto_init: size fault\n");
+		return -EINVAL;
+	}
+
+	err = proto_register(&tcpv6_prot, 1);
+	if (err)
+		goto out;
+
+	err = proto_register(&udpv6_prot, 1);
+	if (err)
+		goto out_unregister_tcp_proto;
+
+	err = proto_register(&rawv6_prot, 1);
+	if (err)
+		goto out_unregister_udp_proto;
+
+
+	/* Register the socket-side information for inet6_create.  */
+	for(r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r)
+		INIT_LIST_HEAD(r);
+
+	/* We MUST register RAW sockets before we create the ICMP6,
+	 * IGMP6, or NDISC control sockets.
+	 */
+	inet6_register_protosw(&rawv6_protosw);
+
+	/* Register the family here so that the init calls below will
+	 * be able to create sockets. (?? is this dangerous ??)
+	 */
+	(void) sock_register(&inet6_family_ops);
+
+	/* Initialise ipv6 mibs */
+	err = init_ipv6_mibs();
+	if (err)
+		goto out_unregister_raw_proto;
+	
+	/*
+	 *	ipngwg API draft makes clear that the correct semantics
+	 *	for TCP and UDP is to consider one TCP and UDP instance
+	 *	in a host availiable by both INET and INET6 APIs and
+	 *	able to communicate via both network protocols.
+	 */
+
+#ifdef CONFIG_SYSCTL
+	ipv6_sysctl_register();
+#endif
+	err = icmpv6_init(&inet6_family_ops);
+	if (err)
+		goto icmp_fail;
+	err = ndisc_init(&inet6_family_ops);
+	if (err)
+		goto ndisc_fail;
+	err = igmp6_init(&inet6_family_ops);
+	if (err)
+		goto igmp_fail;
+	/* Create /proc/foo6 entries. */
+#ifdef CONFIG_PROC_FS
+	err = -ENOMEM;
+	if (raw6_proc_init())
+		goto proc_raw6_fail;
+	if (tcp6_proc_init())
+		goto proc_tcp6_fail;
+	if (udp6_proc_init())
+		goto proc_udp6_fail;
+	if (ipv6_misc_proc_init())
+		goto proc_misc6_fail;
+
+	if (ac6_proc_init())
+		goto proc_anycast6_fail;
+	if (if6_proc_init())
+		goto proc_if6_fail;
+#endif
+	ipv6_packet_init();
+	ip6_route_init();
+	ip6_flowlabel_init();
+	err = addrconf_init();
+	if (err)
+		goto addrconf_fail;
+	sit_init();
+
+	/* Init v6 extension headers. */
+	ipv6_rthdr_init();
+	ipv6_frag_init();
+	ipv6_nodata_init();
+	ipv6_destopt_init();
+
+	/* Init v6 transport protocols. */
+	udpv6_init();
+	tcpv6_init();
+	err = 0;
+out:
+	return err;
+
+addrconf_fail:
+	ip6_flowlabel_cleanup();
+	ip6_route_cleanup();
+	ipv6_packet_cleanup();
+#ifdef CONFIG_PROC_FS
+	if6_proc_exit();
+proc_if6_fail:
+	ac6_proc_exit();
+proc_anycast6_fail:
+	ipv6_misc_proc_exit();
+proc_misc6_fail:
+	udp6_proc_exit();
+proc_udp6_fail:
+	tcp6_proc_exit();
+proc_tcp6_fail:
+	raw6_proc_exit();
+proc_raw6_fail:
+#endif
+	igmp6_cleanup();
+igmp_fail:
+	ndisc_cleanup();
+ndisc_fail:
+	icmpv6_cleanup();
+icmp_fail:
+#ifdef CONFIG_SYSCTL
+	ipv6_sysctl_unregister();
+#endif
+	cleanup_ipv6_mibs();
+out_unregister_raw_proto:
+	proto_unregister(&rawv6_prot);
+out_unregister_udp_proto:
+	proto_unregister(&udpv6_prot);
+out_unregister_tcp_proto:
+	proto_unregister(&tcpv6_prot);
+	goto out;
+}
+module_init(inet6_init);
+
+static void __exit inet6_exit(void)
+{
+	/* First of all disallow new sockets creation. */
+	sock_unregister(PF_INET6);
+#ifdef CONFIG_PROC_FS
+	if6_proc_exit();
+	ac6_proc_exit();
+ 	ipv6_misc_proc_exit();
+ 	udp6_proc_exit();
+ 	tcp6_proc_exit();
+ 	raw6_proc_exit();
+#endif
+	/* Cleanup code parts. */
+	sit_cleanup();
+	ip6_flowlabel_cleanup();
+	addrconf_cleanup();
+	ip6_route_cleanup();
+	ipv6_packet_cleanup();
+	igmp6_cleanup();
+	ndisc_cleanup();
+	icmpv6_cleanup();
+#ifdef CONFIG_SYSCTL
+	ipv6_sysctl_unregister();	
+#endif
+	cleanup_ipv6_mibs();
+	proto_unregister(&rawv6_prot);
+	proto_unregister(&udpv6_prot);
+	proto_unregister(&tcpv6_prot);
+}
+module_exit(inet6_exit);
+
+MODULE_ALIAS_NETPROTO(PF_INET6);
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
new file mode 100644
index 00000000000..e3ecf626cbf
--- /dev/null
+++ b/net/ipv6/ah6.c
@@ -0,0 +1,478 @@
+/*
+ * Copyright (C)2002 USAGI/WIDE Project
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * Authors
+ *
+ *	Mitsuru KANDA @USAGI       : IPv6 Support 
+ * 	Kazunori MIYAZAWA @USAGI   :
+ * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
+ * 	
+ * 	This file is derived from net/ipv4/ah.c.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <net/ip.h>
+#include <net/ah.h>
+#include <linux/crypto.h>
+#include <linux/pfkeyv2.h>
+#include <linux/string.h>
+#include <net/icmp.h>
+#include <net/ipv6.h>
+#include <net/xfrm.h>
+#include <asm/scatterlist.h>
+
+static int zero_out_mutable_opts(struct ipv6_opt_hdr *opthdr)
+{
+	u8 *opt = (u8 *)opthdr;
+	int len = ipv6_optlen(opthdr);
+	int off = 0;
+	int optlen = 0;
+
+	off += 2;
+	len -= 2;
+
+	while (len > 0) {
+
+		switch (opt[off]) {
+
+		case IPV6_TLV_PAD0:
+			optlen = 1;
+			break;
+		default:
+			if (len < 2) 
+				goto bad;
+			optlen = opt[off+1]+2;
+			if (len < optlen)
+				goto bad;
+			if (opt[off] & 0x20)
+				memset(&opt[off+2], 0, opt[off+1]);
+			break;
+		}
+
+		off += optlen;
+		len -= optlen;
+	}
+	if (len == 0)
+		return 1;
+
+bad:
+	return 0;
+}
+
+/**
+ *	ipv6_rearrange_rthdr - rearrange IPv6 routing header
+ *	@iph: IPv6 header
+ *	@rthdr: routing header
+ *
+ *	Rearrange the destination address in @iph and the addresses in @rthdr
+ *	so that they appear in the order they will at the final destination.
+ *	See Appendix A2 of RFC 2402 for details.
+ */
+static void ipv6_rearrange_rthdr(struct ipv6hdr *iph, struct ipv6_rt_hdr *rthdr)
+{
+	int segments, segments_left;
+	struct in6_addr *addrs;
+	struct in6_addr final_addr;
+
+	segments_left = rthdr->segments_left;
+	if (segments_left == 0)
+		return;
+	rthdr->segments_left = 0; 
+
+	/* The value of rthdr->hdrlen has been verified either by the system
+	 * call if it is locally generated, or by ipv6_rthdr_rcv() for incoming
+	 * packets.  So we can assume that it is even and that segments is
+	 * greater than or equal to segments_left.
+	 *
+	 * For the same reason we can assume that this option is of type 0.
+	 */
+	segments = rthdr->hdrlen >> 1;
+
+	addrs = ((struct rt0_hdr *)rthdr)->addr;
+	ipv6_addr_copy(&final_addr, addrs + segments - 1);
+
+	addrs += segments - segments_left;
+	memmove(addrs + 1, addrs, (segments_left - 1) * sizeof(*addrs));
+
+	ipv6_addr_copy(addrs, &iph->daddr);
+	ipv6_addr_copy(&iph->daddr, &final_addr);
+}
+
+static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len)
+{
+	union {
+		struct ipv6hdr *iph;
+		struct ipv6_opt_hdr *opth;
+		struct ipv6_rt_hdr *rth;
+		char *raw;
+	} exthdr = { .iph = iph };
+	char *end = exthdr.raw + len;
+	int nexthdr = iph->nexthdr;
+
+	exthdr.iph++;
+
+	while (exthdr.raw < end) {
+		switch (nexthdr) {
+		case NEXTHDR_HOP:
+		case NEXTHDR_DEST:
+			if (!zero_out_mutable_opts(exthdr.opth)) {
+				LIMIT_NETDEBUG(printk(
+					KERN_WARNING "overrun %sopts\n",
+					nexthdr == NEXTHDR_HOP ?
+						"hop" : "dest"));
+				return -EINVAL;
+			}
+			break;
+
+		case NEXTHDR_ROUTING:
+			ipv6_rearrange_rthdr(iph, exthdr.rth);
+			break;
+
+		default :
+			return 0;
+		}
+
+		nexthdr = exthdr.opth->nexthdr;
+		exthdr.raw += ipv6_optlen(exthdr.opth);
+	}
+
+	return 0;
+}
+
+static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+	int err;
+	int extlen;
+	struct ipv6hdr *top_iph;
+	struct ip_auth_hdr *ah;
+	struct ah_data *ahp;
+	u8 nexthdr;
+	char tmp_base[8];
+	struct {
+		struct in6_addr daddr;
+		char hdrs[0];
+	} *tmp_ext;
+
+	top_iph = (struct ipv6hdr *)skb->data;
+	top_iph->payload_len = htons(skb->len - sizeof(*top_iph));
+
+	nexthdr = *skb->nh.raw;
+	*skb->nh.raw = IPPROTO_AH;
+
+	/* When there are no extension headers, we only need to save the first
+	 * 8 bytes of the base IP header.
+	 */
+	memcpy(tmp_base, top_iph, sizeof(tmp_base));
+
+	tmp_ext = NULL;
+	extlen = skb->h.raw - (unsigned char *)(top_iph + 1);
+	if (extlen) {
+		extlen += sizeof(*tmp_ext);
+		tmp_ext = kmalloc(extlen, GFP_ATOMIC);
+		if (!tmp_ext) {
+			err = -ENOMEM;
+			goto error;
+		}
+		memcpy(tmp_ext, &top_iph->daddr, extlen);
+		err = ipv6_clear_mutable_options(top_iph,
+						 extlen - sizeof(*tmp_ext) +
+						 sizeof(*top_iph));
+		if (err)
+			goto error_free_iph;
+	}
+
+	ah = (struct ip_auth_hdr *)skb->h.raw;
+	ah->nexthdr = nexthdr;
+
+	top_iph->priority    = 0;
+	top_iph->flow_lbl[0] = 0;
+	top_iph->flow_lbl[1] = 0;
+	top_iph->flow_lbl[2] = 0;
+	top_iph->hop_limit   = 0;
+
+	ahp = x->data;
+	ah->hdrlen  = (XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + 
+				   ahp->icv_trunc_len) >> 2) - 2;
+
+	ah->reserved = 0;
+	ah->spi = x->id.spi;
+	ah->seq_no = htonl(++x->replay.oseq);
+	ahp->icv(ahp, skb, ah->auth_data);
+
+	err = 0;
+
+	memcpy(top_iph, tmp_base, sizeof(tmp_base));
+	if (tmp_ext) {
+		memcpy(&top_iph->daddr, tmp_ext, extlen);
+error_free_iph:
+		kfree(tmp_ext);
+	}
+
+error:
+	return err;
+}
+
+static int ah6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
+{
+	/*
+	 * Before process AH
+	 * [IPv6][Ext1][Ext2][AH][Dest][Payload]
+	 * |<-------------->| hdr_len
+	 *
+	 * To erase AH:
+	 * Keeping copy of cleared headers. After AH processing,
+	 * Moving the pointer of skb->nh.raw by using skb_pull as long as AH
+	 * header length. Then copy back the copy as long as hdr_len
+	 * If destination header following AH exists, copy it into after [Ext2].
+	 * 
+	 * |<>|[IPv6][Ext1][Ext2][Dest][Payload]
+	 * There is offset of AH before IPv6 header after the process.
+	 */
+
+	struct ipv6_auth_hdr *ah;
+	struct ah_data *ahp;
+	unsigned char *tmp_hdr = NULL;
+	u16 hdr_len;
+	u16 ah_hlen;
+	int nexthdr;
+
+	if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr)))
+		goto out;
+
+	/* We are going to _remove_ AH header to keep sockets happy,
+	 * so... Later this can change. */
+	if (skb_cloned(skb) &&
+	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+		goto out;
+
+	hdr_len = skb->data - skb->nh.raw;
+	ah = (struct ipv6_auth_hdr*)skb->data;
+	ahp = x->data;
+	nexthdr = ah->nexthdr;
+	ah_hlen = (ah->hdrlen + 2) << 2;
+
+        if (ah_hlen != XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_full_len) &&
+            ah_hlen != XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_trunc_len))
+                goto out;
+
+	if (!pskb_may_pull(skb, ah_hlen))
+		goto out;
+
+	tmp_hdr = kmalloc(hdr_len, GFP_ATOMIC);
+	if (!tmp_hdr)
+		goto out;
+	memcpy(tmp_hdr, skb->nh.raw, hdr_len);
+	if (ipv6_clear_mutable_options(skb->nh.ipv6h, hdr_len))
+		goto out;
+	skb->nh.ipv6h->priority    = 0;
+	skb->nh.ipv6h->flow_lbl[0] = 0;
+	skb->nh.ipv6h->flow_lbl[1] = 0;
+	skb->nh.ipv6h->flow_lbl[2] = 0;
+	skb->nh.ipv6h->hop_limit   = 0;
+
+        {
+		u8 auth_data[MAX_AH_AUTH_LEN];
+
+		memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
+		memset(ah->auth_data, 0, ahp->icv_trunc_len);
+		skb_push(skb, skb->data - skb->nh.raw);
+		ahp->icv(ahp, skb, ah->auth_data);
+		if (memcmp(ah->auth_data, auth_data, ahp->icv_trunc_len)) {
+			LIMIT_NETDEBUG(
+				printk(KERN_WARNING "ipsec ah authentication error\n"));
+			x->stats.integrity_failed++;
+			goto free_out;
+		}
+	}
+
+	skb->nh.raw = skb_pull(skb, ah_hlen);
+	memcpy(skb->nh.raw, tmp_hdr, hdr_len);
+	skb->nh.ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+	skb_pull(skb, hdr_len);
+	skb->h.raw = skb->data;
+
+
+	kfree(tmp_hdr);
+
+	return nexthdr;
+
+free_out:
+	kfree(tmp_hdr);
+out:
+	return -EINVAL;
+}
+
+static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 
+                    int type, int code, int offset, __u32 info)
+{
+	struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
+	struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+offset);
+	struct xfrm_state *x;
+
+	if (type != ICMPV6_DEST_UNREACH &&
+	    type != ICMPV6_PKT_TOOBIG)
+		return;
+
+	x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6);
+	if (!x)
+		return;
+
+	NETDEBUG(printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/"
+			"%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+	       ntohl(ah->spi), NIP6(iph->daddr)));
+
+	xfrm_state_put(x);
+}
+
+static int ah6_init_state(struct xfrm_state *x, void *args)
+{
+	struct ah_data *ahp = NULL;
+	struct xfrm_algo_desc *aalg_desc;
+
+	if (!x->aalg)
+		goto error;
+
+	/* null auth can use a zero length key */
+	if (x->aalg->alg_key_len > 512)
+		goto error;
+
+	if (x->encap)
+		goto error;
+
+	ahp = kmalloc(sizeof(*ahp), GFP_KERNEL);
+	if (ahp == NULL)
+		return -ENOMEM;
+
+	memset(ahp, 0, sizeof(*ahp));
+
+	ahp->key = x->aalg->alg_key;
+	ahp->key_len = (x->aalg->alg_key_len+7)/8;
+	ahp->tfm = crypto_alloc_tfm(x->aalg->alg_name, 0);
+	if (!ahp->tfm)
+		goto error;
+	ahp->icv = ah_hmac_digest;
+	
+	/*
+	 * Lookup the algorithm description maintained by xfrm_algo,
+	 * verify crypto transform properties, and store information
+	 * we need for AH processing.  This lookup cannot fail here
+	 * after a successful crypto_alloc_tfm().
+	 */
+	aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
+	BUG_ON(!aalg_desc);
+
+	if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
+	    crypto_tfm_alg_digestsize(ahp->tfm)) {
+		printk(KERN_INFO "AH: %s digestsize %u != %hu\n",
+		       x->aalg->alg_name, crypto_tfm_alg_digestsize(ahp->tfm),
+		       aalg_desc->uinfo.auth.icv_fullbits/8);
+		goto error;
+	}
+	
+	ahp->icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8;
+	ahp->icv_trunc_len = aalg_desc->uinfo.auth.icv_truncbits/8;
+	
+	BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN);
+	
+	ahp->work_icv = kmalloc(ahp->icv_full_len, GFP_KERNEL);
+	if (!ahp->work_icv)
+		goto error;
+	
+	x->props.header_len = XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_trunc_len);
+	if (x->props.mode)
+		x->props.header_len += sizeof(struct ipv6hdr);
+	x->data = ahp;
+
+	return 0;
+
+error:
+	if (ahp) {
+		if (ahp->work_icv)
+			kfree(ahp->work_icv);
+		if (ahp->tfm)
+			crypto_free_tfm(ahp->tfm);
+		kfree(ahp);
+	}
+	return -EINVAL;
+}
+
+static void ah6_destroy(struct xfrm_state *x)
+{
+	struct ah_data *ahp = x->data;
+
+	if (!ahp)
+		return;
+
+	if (ahp->work_icv) {
+		kfree(ahp->work_icv);
+		ahp->work_icv = NULL;
+	}
+	if (ahp->tfm) {
+		crypto_free_tfm(ahp->tfm);
+		ahp->tfm = NULL;
+	}
+	kfree(ahp);
+}
+
+static struct xfrm_type ah6_type =
+{
+	.description	= "AH6",
+	.owner		= THIS_MODULE,
+	.proto	     	= IPPROTO_AH,
+	.init_state	= ah6_init_state,
+	.destructor	= ah6_destroy,
+	.input		= ah6_input,
+	.output		= ah6_output
+};
+
+static struct inet6_protocol ah6_protocol = {
+	.handler	=	xfrm6_rcv,
+	.err_handler	=	ah6_err,
+	.flags		=	INET6_PROTO_NOPOLICY,
+};
+
+static int __init ah6_init(void)
+{
+	if (xfrm_register_type(&ah6_type, AF_INET6) < 0) {
+		printk(KERN_INFO "ipv6 ah init: can't add xfrm type\n");
+		return -EAGAIN;
+	}
+
+	if (inet6_add_protocol(&ah6_protocol, IPPROTO_AH) < 0) {
+		printk(KERN_INFO "ipv6 ah init: can't add protocol\n");
+		xfrm_unregister_type(&ah6_type, AF_INET6);
+		return -EAGAIN;
+	}
+
+	return 0;
+}
+
+static void __exit ah6_fini(void)
+{
+	if (inet6_del_protocol(&ah6_protocol, IPPROTO_AH) < 0)
+		printk(KERN_INFO "ipv6 ah close: can't remove protocol\n");
+
+	if (xfrm_unregister_type(&ah6_type, AF_INET6) < 0)
+		printk(KERN_INFO "ipv6 ah close: can't remove xfrm type\n");
+
+}
+
+module_init(ah6_init);
+module_exit(ah6_fini);
+
+MODULE_LICENSE("GPL");
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
new file mode 100644
index 00000000000..5d22ca3cca2
--- /dev/null
+++ b/net/ipv6/anycast.c
@@ -0,0 +1,594 @@
+/*
+ *	Anycast support for IPv6
+ *	Linux INET6 implementation 
+ *
+ *	Authors:
+ *	David L Stevens (dlstevens@us.ibm.com)
+ *
+ *	based heavily on net/ipv6/mcast.c
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/random.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/sched.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/route.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/if_inet6.h>
+#include <net/ndisc.h>
+#include <net/addrconf.h>
+#include <net/ip6_route.h>
+
+#include <net/checksum.h>
+
+static int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr);
+
+/* Big ac list lock for all the sockets */
+static DEFINE_RWLOCK(ipv6_sk_ac_lock);
+
+static int
+ip6_onlink(struct in6_addr *addr, struct net_device *dev)
+{
+	struct inet6_dev	*idev;
+	struct inet6_ifaddr	*ifa;
+	int	onlink;
+
+	onlink = 0;
+	read_lock(&addrconf_lock);
+	idev = __in6_dev_get(dev);
+	if (idev) {
+		read_lock_bh(&idev->lock);
+		for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) {
+			onlink = ipv6_prefix_equal(addr, &ifa->addr,
+						   ifa->prefix_len);
+			if (onlink)
+				break;
+		}
+		read_unlock_bh(&idev->lock);
+	}
+	read_unlock(&addrconf_lock);
+	return onlink;
+}
+
+/*
+ *	socket join an anycast group
+ */
+
+int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct net_device *dev = NULL;
+	struct inet6_dev *idev;
+	struct ipv6_ac_socklist *pac;
+	int	ishost = !ipv6_devconf.forwarding;
+	int	err = 0;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+	if (ipv6_addr_is_multicast(addr))
+		return -EINVAL;
+	if (ipv6_chk_addr(addr, NULL, 0))
+		return -EINVAL;
+
+	pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
+	if (pac == NULL)
+		return -ENOMEM;
+	pac->acl_next = NULL;
+	ipv6_addr_copy(&pac->acl_addr, addr);
+
+	if (ifindex == 0) {
+		struct rt6_info *rt;
+
+		rt = rt6_lookup(addr, NULL, 0, 0);
+		if (rt) {
+			dev = rt->rt6i_dev;
+			dev_hold(dev);
+			dst_release(&rt->u.dst);
+		} else if (ishost) {
+			err = -EADDRNOTAVAIL;
+			goto out_free_pac;
+		} else {
+			/* router, no matching interface: just pick one */
+
+			dev = dev_get_by_flags(IFF_UP, IFF_UP|IFF_LOOPBACK);
+		}
+	} else
+		dev = dev_get_by_index(ifindex);
+
+	if (dev == NULL) {
+		err = -ENODEV;
+		goto out_free_pac;
+	}
+
+	idev = in6_dev_get(dev);
+	if (!idev) {
+		if (ifindex)
+			err = -ENODEV;
+		else
+			err = -EADDRNOTAVAIL;
+		goto out_dev_put;
+	}
+	/* reset ishost, now that we have a specific device */
+	ishost = !idev->cnf.forwarding;
+	in6_dev_put(idev);
+
+	pac->acl_ifindex = dev->ifindex;
+
+	/* XXX
+	 * For hosts, allow link-local or matching prefix anycasts.
+	 * This obviates the need for propagating anycast routes while
+	 * still allowing some non-router anycast participation.
+	 */
+	if (!ip6_onlink(addr, dev)) {
+		if (ishost)
+			err = -EADDRNOTAVAIL;
+		if (err)
+			goto out_dev_put;
+	}
+
+	err = ipv6_dev_ac_inc(dev, addr);
+	if (err)
+		goto out_dev_put;
+
+	write_lock_bh(&ipv6_sk_ac_lock);
+	pac->acl_next = np->ipv6_ac_list;
+	np->ipv6_ac_list = pac;
+	write_unlock_bh(&ipv6_sk_ac_lock);
+
+	dev_put(dev);
+
+	return 0;
+
+out_dev_put:
+	dev_put(dev);
+out_free_pac:
+	sock_kfree_s(sk, pac, sizeof(*pac));
+	return err;
+}
+
+/*
+ *	socket leave an anycast group
+ */
+int ipv6_sock_ac_drop(struct sock *sk, int ifindex, struct in6_addr *addr)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct net_device *dev;
+	struct ipv6_ac_socklist *pac, *prev_pac;
+
+	write_lock_bh(&ipv6_sk_ac_lock);
+	prev_pac = NULL;
+	for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) {
+		if ((ifindex == 0 || pac->acl_ifindex == ifindex) &&
+		     ipv6_addr_equal(&pac->acl_addr, addr))
+			break;
+		prev_pac = pac;
+	}
+	if (!pac) {
+		write_unlock_bh(&ipv6_sk_ac_lock);
+		return -ENOENT;
+	}
+	if (prev_pac)
+		prev_pac->acl_next = pac->acl_next;
+	else
+		np->ipv6_ac_list = pac->acl_next;
+
+	write_unlock_bh(&ipv6_sk_ac_lock);
+
+	dev = dev_get_by_index(pac->acl_ifindex);
+	if (dev) {
+		ipv6_dev_ac_dec(dev, &pac->acl_addr);
+		dev_put(dev);
+	}
+	sock_kfree_s(sk, pac, sizeof(*pac));
+	return 0;
+}
+
+void ipv6_sock_ac_close(struct sock *sk)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct net_device *dev = NULL;
+	struct ipv6_ac_socklist *pac;
+	int	prev_index;
+
+	write_lock_bh(&ipv6_sk_ac_lock);
+	pac = np->ipv6_ac_list;
+	np->ipv6_ac_list = NULL;
+	write_unlock_bh(&ipv6_sk_ac_lock);
+
+	prev_index = 0;
+	while (pac) {
+		struct ipv6_ac_socklist *next = pac->acl_next;
+
+		if (pac->acl_ifindex != prev_index) {
+			if (dev)
+				dev_put(dev);
+			dev = dev_get_by_index(pac->acl_ifindex);
+			prev_index = pac->acl_ifindex;
+		}
+		if (dev)
+			ipv6_dev_ac_dec(dev, &pac->acl_addr);
+		sock_kfree_s(sk, pac, sizeof(*pac));
+		pac = next;
+	}
+	if (dev)
+		dev_put(dev);
+}
+
+#if 0
+/* The function is not used, which is funny. Apparently, author
+ * supposed to use it to filter out datagrams inside udp/raw but forgot.
+ *
+ * It is OK, anycasts are not special comparing to delivery to unicasts.
+ */
+
+int inet6_ac_check(struct sock *sk, struct in6_addr *addr, int ifindex)
+{
+	struct ipv6_ac_socklist *pac;
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	int	found;
+
+	found = 0;
+	read_lock(&ipv6_sk_ac_lock);
+	for (pac=np->ipv6_ac_list; pac; pac=pac->acl_next) {
+		if (ifindex && pac->acl_ifindex != ifindex)
+			continue;
+		found = ipv6_addr_equal(&pac->acl_addr, addr);
+		if (found)
+			break;
+	}
+	read_unlock(&ipv6_sk_ac_lock);
+
+	return found;
+}
+
+#endif
+
+static void aca_put(struct ifacaddr6 *ac)
+{
+	if (atomic_dec_and_test(&ac->aca_refcnt)) {
+		in6_dev_put(ac->aca_idev);
+		dst_release(&ac->aca_rt->u.dst);
+		kfree(ac);
+	}
+}
+
+/*
+ *	device anycast group inc (add if not found)
+ */
+int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr)
+{
+	struct ifacaddr6 *aca;
+	struct inet6_dev *idev;
+	struct rt6_info *rt;
+	int err;
+
+	idev = in6_dev_get(dev);
+
+	if (idev == NULL)
+		return -EINVAL;
+
+	write_lock_bh(&idev->lock);
+	if (idev->dead) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	for (aca = idev->ac_list; aca; aca = aca->aca_next) {
+		if (ipv6_addr_equal(&aca->aca_addr, addr)) {
+			aca->aca_users++;
+			err = 0;
+			goto out;
+		}
+	}
+
+	/*
+	 *	not found: create a new one.
+	 */
+
+	aca = kmalloc(sizeof(struct ifacaddr6), GFP_ATOMIC);
+
+	if (aca == NULL) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	rt = addrconf_dst_alloc(idev, addr, 1);
+	if (IS_ERR(rt)) {
+		kfree(aca);
+		err = PTR_ERR(rt);
+		goto out;
+	}
+
+	memset(aca, 0, sizeof(struct ifacaddr6));
+
+	ipv6_addr_copy(&aca->aca_addr, addr);
+	aca->aca_idev = idev;
+	aca->aca_rt = rt;
+	aca->aca_users = 1;
+	/* aca_tstamp should be updated upon changes */
+	aca->aca_cstamp = aca->aca_tstamp = jiffies;
+	atomic_set(&aca->aca_refcnt, 2);
+	spin_lock_init(&aca->aca_lock);
+
+	aca->aca_next = idev->ac_list;
+	idev->ac_list = aca;
+	write_unlock_bh(&idev->lock);
+
+	dst_hold(&rt->u.dst);
+	if (ip6_ins_rt(rt, NULL, NULL))
+		dst_release(&rt->u.dst);
+
+	addrconf_join_solict(dev, &aca->aca_addr);
+
+	aca_put(aca);
+	return 0;
+out:
+	write_unlock_bh(&idev->lock);
+	in6_dev_put(idev);
+	return err;
+}
+
+/*
+ *	device anycast group decrement
+ */
+int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr)
+{
+	struct ifacaddr6 *aca, *prev_aca;
+
+	write_lock_bh(&idev->lock);
+	prev_aca = NULL;
+	for (aca = idev->ac_list; aca; aca = aca->aca_next) {
+		if (ipv6_addr_equal(&aca->aca_addr, addr))
+			break;
+		prev_aca = aca;
+	}
+	if (!aca) {
+		write_unlock_bh(&idev->lock);
+		return -ENOENT;
+	}
+	if (--aca->aca_users > 0) {
+		write_unlock_bh(&idev->lock);
+		return 0;
+	}
+	if (prev_aca)
+		prev_aca->aca_next = aca->aca_next;
+	else
+		idev->ac_list = aca->aca_next;
+	write_unlock_bh(&idev->lock);
+	addrconf_leave_solict(idev, &aca->aca_addr);
+
+	dst_hold(&aca->aca_rt->u.dst);
+	if (ip6_del_rt(aca->aca_rt, NULL, NULL))
+		dst_free(&aca->aca_rt->u.dst);
+	else
+		dst_release(&aca->aca_rt->u.dst);
+
+	aca_put(aca);
+	return 0;
+}
+
+static int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr)
+{
+	int ret;
+	struct inet6_dev *idev = in6_dev_get(dev);
+	if (idev == NULL)
+		return -ENODEV;
+	ret = __ipv6_dev_ac_dec(idev, addr);
+	in6_dev_put(idev);
+	return ret;
+}
+	
+/*
+ *	check if the interface has this anycast address
+ */
+static int ipv6_chk_acast_dev(struct net_device *dev, struct in6_addr *addr)
+{
+	struct inet6_dev *idev;
+	struct ifacaddr6 *aca;
+
+	idev = in6_dev_get(dev);
+	if (idev) {
+		read_lock_bh(&idev->lock);
+		for (aca = idev->ac_list; aca; aca = aca->aca_next)
+			if (ipv6_addr_equal(&aca->aca_addr, addr))
+				break;
+		read_unlock_bh(&idev->lock);
+		in6_dev_put(idev);
+		return aca != 0;
+	}
+	return 0;
+}
+
+/*
+ *	check if given interface (or any, if dev==0) has this anycast address
+ */
+int ipv6_chk_acast_addr(struct net_device *dev, struct in6_addr *addr)
+{
+	if (dev)
+		return ipv6_chk_acast_dev(dev, addr);
+	read_lock(&dev_base_lock);
+	for (dev=dev_base; dev; dev=dev->next)
+		if (ipv6_chk_acast_dev(dev, addr))
+			break;
+	read_unlock(&dev_base_lock);
+	return dev != 0;
+}
+
+
+#ifdef CONFIG_PROC_FS
+struct ac6_iter_state {
+	struct net_device *dev;
+	struct inet6_dev *idev;
+};
+
+#define ac6_seq_private(seq)	((struct ac6_iter_state *)(seq)->private)
+
+static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq)
+{
+	struct ifacaddr6 *im = NULL;
+	struct ac6_iter_state *state = ac6_seq_private(seq);
+
+	for (state->dev = dev_base, state->idev = NULL;
+	     state->dev;
+	     state->dev = state->dev->next) {
+		struct inet6_dev *idev;
+		idev = in6_dev_get(state->dev);
+		if (!idev)
+			continue;
+		read_lock_bh(&idev->lock);
+		im = idev->ac_list;
+		if (im) {
+			state->idev = idev;
+			break;
+		}
+		read_unlock_bh(&idev->lock);
+	}
+	return im;
+}
+
+static struct ifacaddr6 *ac6_get_next(struct seq_file *seq, struct ifacaddr6 *im)
+{
+	struct ac6_iter_state *state = ac6_seq_private(seq);
+
+	im = im->aca_next;
+	while (!im) {
+		if (likely(state->idev != NULL)) {
+			read_unlock_bh(&state->idev->lock);
+			in6_dev_put(state->idev);
+		}
+		state->dev = state->dev->next;
+		if (!state->dev) {
+			state->idev = NULL;
+			break;
+		}
+		state->idev = in6_dev_get(state->dev);
+		if (!state->idev)
+			continue;
+		read_lock_bh(&state->idev->lock);
+		im = state->idev->ac_list;
+	}
+	return im;
+}
+
+static struct ifacaddr6 *ac6_get_idx(struct seq_file *seq, loff_t pos)
+{
+	struct ifacaddr6 *im = ac6_get_first(seq);
+	if (im)
+		while (pos && (im = ac6_get_next(seq, im)) != NULL)
+			--pos;
+	return pos ? NULL : im;
+}
+
+static void *ac6_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	read_lock(&dev_base_lock);
+	return ac6_get_idx(seq, *pos);
+}
+
+static void *ac6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct ifacaddr6 *im;
+	im = ac6_get_next(seq, v);
+	++*pos;
+	return im;
+}
+
+static void ac6_seq_stop(struct seq_file *seq, void *v)
+{
+	struct ac6_iter_state *state = ac6_seq_private(seq);
+	if (likely(state->idev != NULL)) {
+		read_unlock_bh(&state->idev->lock);
+		in6_dev_put(state->idev);
+	}
+	read_unlock(&dev_base_lock);
+}
+
+static int ac6_seq_show(struct seq_file *seq, void *v)
+{
+	struct ifacaddr6 *im = (struct ifacaddr6 *)v;
+	struct ac6_iter_state *state = ac6_seq_private(seq);
+
+	seq_printf(seq,
+		   "%-4d %-15s "
+		   "%04x%04x%04x%04x%04x%04x%04x%04x "
+		   "%5d\n",
+		   state->dev->ifindex, state->dev->name,
+		   NIP6(im->aca_addr),
+		   im->aca_users);
+	return 0;
+}
+
+static struct seq_operations ac6_seq_ops = {
+	.start	=	ac6_seq_start,
+	.next	=	ac6_seq_next,
+	.stop	=	ac6_seq_stop,
+	.show	=	ac6_seq_show,
+};
+
+static int ac6_seq_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	int rc = -ENOMEM;
+	struct ac6_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+
+	if (!s)
+		goto out;
+
+	rc = seq_open(file, &ac6_seq_ops);
+	if (rc)
+		goto out_kfree;
+
+	seq = file->private_data;
+	seq->private = s;
+	memset(s, 0, sizeof(*s));
+out:
+	return rc;
+out_kfree:
+	kfree(s);
+	goto out;
+}
+
+static struct file_operations ac6_seq_fops = {
+	.owner		=	THIS_MODULE,
+	.open		=	ac6_seq_open,
+	.read		=	seq_read,
+	.llseek		=	seq_lseek,
+	.release	=	seq_release_private,
+};
+
+int __init ac6_proc_init(void)
+{
+	if (!proc_net_fops_create("anycast6", S_IRUGO, &ac6_seq_fops))
+		return -ENOMEM;
+
+	return 0;
+}
+
+void ac6_proc_exit(void)
+{
+	proc_net_remove("anycast6");
+}
+#endif
+
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
new file mode 100644
index 00000000000..65b9375df57
--- /dev/null
+++ b/net/ipv6/datagram.c
@@ -0,0 +1,600 @@
+/*
+ *	common UDP/RAW code
+ *	Linux INET6 implementation 
+ *
+ *	Authors:
+ *	Pedro Roque		<roque@di.fc.ul.pt>	
+ *
+ *	$Id: datagram.c,v 1.24 2002/02/01 22:01:04 davem Exp $
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in6.h>
+#include <linux/ipv6.h>
+#include <linux/route.h>
+
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+#include <net/addrconf.h>
+#include <net/transp_v6.h>
+#include <net/ip6_route.h>
+
+#include <linux/errqueue.h>
+#include <asm/uaccess.h>
+
+int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+	struct sockaddr_in6	*usin = (struct sockaddr_in6 *) uaddr;
+	struct inet_sock      	*inet = inet_sk(sk);
+	struct ipv6_pinfo      	*np = inet6_sk(sk);
+	struct in6_addr		*daddr, *final_p = NULL, final;
+	struct dst_entry	*dst;
+	struct flowi		fl;
+	struct ip6_flowlabel	*flowlabel = NULL;
+	int			addr_type;
+	int			err;
+
+	if (usin->sin6_family == AF_INET) {
+		if (__ipv6_only_sock(sk))
+			return -EAFNOSUPPORT;
+		err = ip4_datagram_connect(sk, uaddr, addr_len);
+		goto ipv4_connected;
+	}
+
+	if (addr_len < SIN6_LEN_RFC2133)
+	  	return -EINVAL;
+
+	if (usin->sin6_family != AF_INET6) 
+	  	return -EAFNOSUPPORT;
+
+	memset(&fl, 0, sizeof(fl));
+	if (np->sndflow) {
+		fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
+		if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
+			flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
+			if (flowlabel == NULL)
+				return -EINVAL;
+			ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
+		}
+	}
+
+	addr_type = ipv6_addr_type(&usin->sin6_addr);
+
+	if (addr_type == IPV6_ADDR_ANY) {
+		/*
+		 *	connect to self
+		 */
+		usin->sin6_addr.s6_addr[15] = 0x01;
+	}
+
+	daddr = &usin->sin6_addr;
+
+	if (addr_type == IPV6_ADDR_MAPPED) {
+		struct sockaddr_in sin;
+
+		if (__ipv6_only_sock(sk)) {
+			err = -ENETUNREACH;
+			goto out;
+		}
+		sin.sin_family = AF_INET;
+		sin.sin_addr.s_addr = daddr->s6_addr32[3];
+		sin.sin_port = usin->sin6_port;
+
+		err = ip4_datagram_connect(sk, 
+					   (struct sockaddr*) &sin, 
+					   sizeof(sin));
+
+ipv4_connected:
+		if (err)
+			goto out;
+		
+		ipv6_addr_set(&np->daddr, 0, 0, htonl(0x0000ffff), inet->daddr);
+
+		if (ipv6_addr_any(&np->saddr)) {
+			ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000ffff),
+				      inet->saddr);
+		}
+
+		if (ipv6_addr_any(&np->rcv_saddr)) {
+			ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000ffff),
+				      inet->rcv_saddr);
+		}
+		goto out;
+	}
+
+	if (addr_type&IPV6_ADDR_LINKLOCAL) {
+		if (addr_len >= sizeof(struct sockaddr_in6) &&
+		    usin->sin6_scope_id) {
+			if (sk->sk_bound_dev_if &&
+			    sk->sk_bound_dev_if != usin->sin6_scope_id) {
+				err = -EINVAL;
+				goto out;
+			}
+			sk->sk_bound_dev_if = usin->sin6_scope_id;
+			if (!sk->sk_bound_dev_if &&
+			    (addr_type & IPV6_ADDR_MULTICAST))
+				fl.oif = np->mcast_oif;
+		}
+
+		/* Connect to link-local address requires an interface */
+		if (!sk->sk_bound_dev_if) {
+			err = -EINVAL;
+			goto out;
+		}
+	}
+
+	ipv6_addr_copy(&np->daddr, daddr);
+	np->flow_label = fl.fl6_flowlabel;
+
+	inet->dport = usin->sin6_port;
+
+	/*
+	 *	Check for a route to destination an obtain the
+	 *	destination cache for it.
+	 */
+
+	fl.proto = sk->sk_protocol;
+	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+	ipv6_addr_copy(&fl.fl6_src, &np->saddr);
+	fl.oif = sk->sk_bound_dev_if;
+	fl.fl_ip_dport = inet->dport;
+	fl.fl_ip_sport = inet->sport;
+
+	if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST))
+		fl.oif = np->mcast_oif;
+
+	if (flowlabel) {
+		if (flowlabel->opt && flowlabel->opt->srcrt) {
+			struct rt0_hdr *rt0 = (struct rt0_hdr *) flowlabel->opt->srcrt;
+			ipv6_addr_copy(&final, &fl.fl6_dst);
+			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+			final_p = &final;
+		}
+	} else if (np->opt && np->opt->srcrt) {
+		struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
+		ipv6_addr_copy(&final, &fl.fl6_dst);
+		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+		final_p = &final;
+	}
+
+	err = ip6_dst_lookup(sk, &dst, &fl);
+	if (err)
+		goto out;
+	if (final_p)
+		ipv6_addr_copy(&fl.fl6_dst, final_p);
+
+	if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
+		dst_release(dst);
+		goto out;
+	}
+
+	/* source address lookup done in ip6_dst_lookup */
+
+	if (ipv6_addr_any(&np->saddr))
+		ipv6_addr_copy(&np->saddr, &fl.fl6_src);
+
+	if (ipv6_addr_any(&np->rcv_saddr)) {
+		ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src);
+		inet->rcv_saddr = LOOPBACK4_IPV6;
+	}
+
+	ip6_dst_store(sk, dst,
+		      ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ?
+		      &np->daddr : NULL);
+
+	sk->sk_state = TCP_ESTABLISHED;
+out:
+	fl6_sock_release(flowlabel);
+	return err;
+}
+
+void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, 
+		     u16 port, u32 info, u8 *payload)
+{
+	struct ipv6_pinfo *np  = inet6_sk(sk);
+	struct icmp6hdr *icmph = (struct icmp6hdr *)skb->h.raw;
+	struct sock_exterr_skb *serr;
+
+	if (!np->recverr)
+		return;
+
+	skb = skb_clone(skb, GFP_ATOMIC);
+	if (!skb)
+		return;
+
+	serr = SKB_EXT_ERR(skb);
+	serr->ee.ee_errno = err;
+	serr->ee.ee_origin = SO_EE_ORIGIN_ICMP6;
+	serr->ee.ee_type = icmph->icmp6_type; 
+	serr->ee.ee_code = icmph->icmp6_code;
+	serr->ee.ee_pad = 0;
+	serr->ee.ee_info = info;
+	serr->ee.ee_data = 0;
+	serr->addr_offset = (u8*)&(((struct ipv6hdr*)(icmph+1))->daddr) - skb->nh.raw;
+	serr->port = port;
+
+	skb->h.raw = payload;
+	__skb_pull(skb, payload - skb->data);
+
+	if (sock_queue_err_skb(sk, skb))
+		kfree_skb(skb);
+}
+
+void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct sock_exterr_skb *serr;
+	struct ipv6hdr *iph;
+	struct sk_buff *skb;
+
+	if (!np->recverr)
+		return;
+
+	skb = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
+	if (!skb)
+		return;
+
+	iph = (struct ipv6hdr*)skb_put(skb, sizeof(struct ipv6hdr));
+	skb->nh.ipv6h = iph;
+	ipv6_addr_copy(&iph->daddr, &fl->fl6_dst);
+
+	serr = SKB_EXT_ERR(skb);
+	serr->ee.ee_errno = err;
+	serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
+	serr->ee.ee_type = 0; 
+	serr->ee.ee_code = 0;
+	serr->ee.ee_pad = 0;
+	serr->ee.ee_info = info;
+	serr->ee.ee_data = 0;
+	serr->addr_offset = (u8*)&iph->daddr - skb->nh.raw;
+	serr->port = fl->fl_ip_dport;
+
+	skb->h.raw = skb->tail;
+	__skb_pull(skb, skb->tail - skb->data);
+
+	if (sock_queue_err_skb(sk, skb))
+		kfree_skb(skb);
+}
+
+/* 
+ *	Handle MSG_ERRQUEUE
+ */
+int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct sock_exterr_skb *serr;
+	struct sk_buff *skb, *skb2;
+	struct sockaddr_in6 *sin;
+	struct {
+		struct sock_extended_err ee;
+		struct sockaddr_in6	 offender;
+	} errhdr;
+	int err;
+	int copied;
+
+	err = -EAGAIN;
+	skb = skb_dequeue(&sk->sk_error_queue);
+	if (skb == NULL)
+		goto out;
+
+	copied = skb->len;
+	if (copied > len) {
+		msg->msg_flags |= MSG_TRUNC;
+		copied = len;
+	}
+	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	if (err)
+		goto out_free_skb;
+
+	sock_recv_timestamp(msg, sk, skb);
+
+	serr = SKB_EXT_ERR(skb);
+
+	sin = (struct sockaddr_in6 *)msg->msg_name;
+	if (sin) {
+		sin->sin6_family = AF_INET6;
+		sin->sin6_flowinfo = 0;
+		sin->sin6_port = serr->port; 
+		sin->sin6_scope_id = 0;
+		if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) {
+			ipv6_addr_copy(&sin->sin6_addr,
+			  (struct in6_addr *)(skb->nh.raw + serr->addr_offset));
+			if (np->sndflow)
+				sin->sin6_flowinfo = *(u32*)(skb->nh.raw + serr->addr_offset - 24) & IPV6_FLOWINFO_MASK;
+			if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
+				sin->sin6_scope_id = IP6CB(skb)->iif;
+		} else {
+			ipv6_addr_set(&sin->sin6_addr, 0, 0,
+				      htonl(0xffff),
+				      *(u32*)(skb->nh.raw + serr->addr_offset));
+		}
+	}
+
+	memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
+	sin = &errhdr.offender;
+	sin->sin6_family = AF_UNSPEC;
+	if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) {
+		sin->sin6_family = AF_INET6;
+		sin->sin6_flowinfo = 0;
+		sin->sin6_scope_id = 0;
+		if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) {
+			ipv6_addr_copy(&sin->sin6_addr, &skb->nh.ipv6h->saddr);
+			if (np->rxopt.all)
+				datagram_recv_ctl(sk, msg, skb);
+			if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
+				sin->sin6_scope_id = IP6CB(skb)->iif;
+		} else {
+			struct inet_sock *inet = inet_sk(sk);
+
+			ipv6_addr_set(&sin->sin6_addr, 0, 0,
+				      htonl(0xffff),
+				      skb->nh.iph->saddr);
+			if (inet->cmsg_flags)
+				ip_cmsg_recv(msg, skb);
+		}
+	}
+
+	put_cmsg(msg, SOL_IPV6, IPV6_RECVERR, sizeof(errhdr), &errhdr);
+
+	/* Now we could try to dump offended packet options */
+
+	msg->msg_flags |= MSG_ERRQUEUE;
+	err = copied;
+
+	/* Reset and regenerate socket error */
+	spin_lock_irq(&sk->sk_error_queue.lock);
+	sk->sk_err = 0;
+	if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
+		sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
+		spin_unlock_irq(&sk->sk_error_queue.lock);
+		sk->sk_error_report(sk);
+	} else {
+		spin_unlock_irq(&sk->sk_error_queue.lock);
+	}
+
+out_free_skb:	
+	kfree_skb(skb);
+out:
+	return err;
+}
+
+
+
+int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct inet6_skb_parm *opt = IP6CB(skb);
+
+	if (np->rxopt.bits.rxinfo) {
+		struct in6_pktinfo src_info;
+
+		src_info.ipi6_ifindex = opt->iif;
+		ipv6_addr_copy(&src_info.ipi6_addr, &skb->nh.ipv6h->daddr);
+		put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
+	}
+
+	if (np->rxopt.bits.rxhlim) {
+		int hlim = skb->nh.ipv6h->hop_limit;
+		put_cmsg(msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim);
+	}
+
+	if (np->rxopt.bits.rxflow && (*(u32*)skb->nh.raw & IPV6_FLOWINFO_MASK)) {
+		u32 flowinfo = *(u32*)skb->nh.raw & IPV6_FLOWINFO_MASK;
+		put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo);
+	}
+	if (np->rxopt.bits.hopopts && opt->hop) {
+		u8 *ptr = skb->nh.raw + opt->hop;
+		put_cmsg(msg, SOL_IPV6, IPV6_HOPOPTS, (ptr[1]+1)<<3, ptr);
+	}
+	if (np->rxopt.bits.dstopts && opt->dst0) {
+		u8 *ptr = skb->nh.raw + opt->dst0;
+		put_cmsg(msg, SOL_IPV6, IPV6_DSTOPTS, (ptr[1]+1)<<3, ptr);
+	}
+	if (np->rxopt.bits.srcrt && opt->srcrt) {
+		struct ipv6_rt_hdr *rthdr = (struct ipv6_rt_hdr *)(skb->nh.raw + opt->srcrt);
+		put_cmsg(msg, SOL_IPV6, IPV6_RTHDR, (rthdr->hdrlen+1) << 3, rthdr);
+	}
+	if (np->rxopt.bits.dstopts && opt->dst1) {
+		u8 *ptr = skb->nh.raw + opt->dst1;
+		put_cmsg(msg, SOL_IPV6, IPV6_DSTOPTS, (ptr[1]+1)<<3, ptr);
+	}
+	return 0;
+}
+
+int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
+		      struct ipv6_txoptions *opt,
+		      int *hlimit)
+{
+	struct in6_pktinfo *src_info;
+	struct cmsghdr *cmsg;
+	struct ipv6_rt_hdr *rthdr;
+	struct ipv6_opt_hdr *hdr;
+	int len;
+	int err = 0;
+
+	for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
+		int addr_type;
+		struct net_device *dev = NULL;
+
+		if (!CMSG_OK(msg, cmsg)) {
+			err = -EINVAL;
+			goto exit_f;
+		}
+
+		if (cmsg->cmsg_level != SOL_IPV6)
+			continue;
+
+		switch (cmsg->cmsg_type) {
+ 		case IPV6_PKTINFO:
+ 			if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct in6_pktinfo))) {
+				err = -EINVAL;
+				goto exit_f;
+			}
+
+			src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg);
+			
+			if (src_info->ipi6_ifindex) {
+				if (fl->oif && src_info->ipi6_ifindex != fl->oif)
+					return -EINVAL;
+				fl->oif = src_info->ipi6_ifindex;
+			}
+
+			addr_type = ipv6_addr_type(&src_info->ipi6_addr);
+
+			if (addr_type == IPV6_ADDR_ANY)
+				break;
+			
+			if (addr_type & IPV6_ADDR_LINKLOCAL) {
+				if (!src_info->ipi6_ifindex)
+					return -EINVAL;
+				else {
+					dev = dev_get_by_index(src_info->ipi6_ifindex);
+					if (!dev)
+						return -ENODEV;
+				}
+			}
+			if (!ipv6_chk_addr(&src_info->ipi6_addr, dev, 0)) {
+				if (dev)
+					dev_put(dev);
+				err = -EINVAL;
+				goto exit_f;
+			}
+			if (dev)
+				dev_put(dev);
+
+			ipv6_addr_copy(&fl->fl6_src, &src_info->ipi6_addr);
+			break;
+
+		case IPV6_FLOWINFO:
+                        if (cmsg->cmsg_len < CMSG_LEN(4)) {
+				err = -EINVAL;
+				goto exit_f;
+			}
+
+			if (fl->fl6_flowlabel&IPV6_FLOWINFO_MASK) {
+				if ((fl->fl6_flowlabel^*(u32 *)CMSG_DATA(cmsg))&~IPV6_FLOWINFO_MASK) {
+					err = -EINVAL;
+					goto exit_f;
+				}
+			}
+			fl->fl6_flowlabel = IPV6_FLOWINFO_MASK & *(u32 *)CMSG_DATA(cmsg);
+			break;
+
+		case IPV6_HOPOPTS:
+                        if (opt->hopopt || cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) {
+				err = -EINVAL;
+				goto exit_f;
+			}
+
+			hdr = (struct ipv6_opt_hdr *)CMSG_DATA(cmsg);
+			len = ((hdr->hdrlen + 1) << 3);
+			if (cmsg->cmsg_len < CMSG_LEN(len)) {
+				err = -EINVAL;
+				goto exit_f;
+			}
+			if (!capable(CAP_NET_RAW)) {
+				err = -EPERM;
+				goto exit_f;
+			}
+			opt->opt_nflen += len;
+			opt->hopopt = hdr;
+			break;
+
+		case IPV6_DSTOPTS:
+                        if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) {
+				err = -EINVAL;
+				goto exit_f;
+			}
+
+			hdr = (struct ipv6_opt_hdr *)CMSG_DATA(cmsg);
+			len = ((hdr->hdrlen + 1) << 3);
+			if (cmsg->cmsg_len < CMSG_LEN(len)) {
+				err = -EINVAL;
+				goto exit_f;
+			}
+			if (!capable(CAP_NET_RAW)) {
+				err = -EPERM;
+				goto exit_f;
+			}
+			if (opt->dst1opt) {
+				err = -EINVAL;
+				goto exit_f;
+			}
+			opt->opt_flen += len;
+			opt->dst1opt = hdr;
+			break;
+
+		case IPV6_RTHDR:
+                        if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_rt_hdr))) {
+				err = -EINVAL;
+				goto exit_f;
+			}
+
+			rthdr = (struct ipv6_rt_hdr *)CMSG_DATA(cmsg);
+
+			/*
+			 *	TYPE 0
+			 */
+			if (rthdr->type) {
+				err = -EINVAL;
+				goto exit_f;
+			}
+
+			len = ((rthdr->hdrlen + 1) << 3);
+
+                        if (cmsg->cmsg_len < CMSG_LEN(len)) {
+				err = -EINVAL;
+				goto exit_f;
+			}
+
+			/* segments left must also match */
+			if ((rthdr->hdrlen >> 1) != rthdr->segments_left) {
+				err = -EINVAL;
+				goto exit_f;
+			}
+
+			opt->opt_nflen += len;
+			opt->srcrt = rthdr;
+
+			if (opt->dst1opt) {
+				int dsthdrlen = ((opt->dst1opt->hdrlen+1)<<3);
+
+				opt->opt_nflen += dsthdrlen;
+				opt->dst0opt = opt->dst1opt;
+				opt->dst1opt = NULL;
+				opt->opt_flen -= dsthdrlen;
+			}
+
+			break;
+
+		case IPV6_HOPLIMIT:
+			if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) {
+				err = -EINVAL;
+				goto exit_f;
+			}
+
+			*hlimit = *(int *)CMSG_DATA(cmsg);
+			break;
+
+		default:
+			LIMIT_NETDEBUG(
+				printk(KERN_DEBUG "invalid cmsg type: %d\n", cmsg->cmsg_type));
+			err = -EINVAL;
+			break;
+		};
+	}
+
+exit_f:
+	return err;
+}
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
new file mode 100644
index 00000000000..be7095d6bab
--- /dev/null
+++ b/net/ipv6/esp6.c
@@ -0,0 +1,424 @@
+/*
+ * Copyright (C)2002 USAGI/WIDE Project
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * Authors
+ *
+ *	Mitsuru KANDA @USAGI       : IPv6 Support 
+ * 	Kazunori MIYAZAWA @USAGI   :
+ * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
+ * 	
+ * 	This file is derived from net/ipv4/esp.c
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <net/ip.h>
+#include <net/xfrm.h>
+#include <net/esp.h>
+#include <asm/scatterlist.h>
+#include <linux/crypto.h>
+#include <linux/pfkeyv2.h>
+#include <linux/random.h>
+#include <net/icmp.h>
+#include <net/ipv6.h>
+#include <linux/icmpv6.h>
+
+static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+	int err;
+	int hdr_len;
+	struct ipv6hdr *top_iph;
+	struct ipv6_esp_hdr *esph;
+	struct crypto_tfm *tfm;
+	struct esp_data *esp;
+	struct sk_buff *trailer;
+	int blksize;
+	int clen;
+	int alen;
+	int nfrags;
+
+	esp = x->data;
+	hdr_len = skb->h.raw - skb->data +
+		  sizeof(*esph) + esp->conf.ivlen;
+
+	/* Strip IP+ESP header. */
+	__skb_pull(skb, hdr_len);
+
+	/* Now skb is pure payload to encrypt */
+	err = -ENOMEM;
+
+	/* Round to block size */
+	clen = skb->len;
+
+	alen = esp->auth.icv_trunc_len;
+	tfm = esp->conf.tfm;
+	blksize = (crypto_tfm_alg_blocksize(tfm) + 3) & ~3;
+	clen = (clen + 2 + blksize-1)&~(blksize-1);
+	if (esp->conf.padlen)
+		clen = (clen + esp->conf.padlen-1)&~(esp->conf.padlen-1);
+
+	if ((nfrags = skb_cow_data(skb, clen-skb->len+alen, &trailer)) < 0) {
+		goto error;
+	}
+
+	/* Fill padding... */
+	do {
+		int i;
+		for (i=0; i<clen-skb->len - 2; i++)
+			*(u8*)(trailer->tail + i) = i+1;
+	} while (0);
+	*(u8*)(trailer->tail + clen-skb->len - 2) = (clen - skb->len)-2;
+	pskb_put(skb, trailer, clen - skb->len);
+
+	top_iph = (struct ipv6hdr *)__skb_push(skb, hdr_len);
+	esph = (struct ipv6_esp_hdr *)skb->h.raw;
+	top_iph->payload_len = htons(skb->len + alen - sizeof(*top_iph));
+	*(u8*)(trailer->tail - 1) = *skb->nh.raw;
+	*skb->nh.raw = IPPROTO_ESP;
+
+	esph->spi = x->id.spi;
+	esph->seq_no = htonl(++x->replay.oseq);
+
+	if (esp->conf.ivlen)
+		crypto_cipher_set_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
+
+	do {
+		struct scatterlist *sg = &esp->sgbuf[0];
+
+		if (unlikely(nfrags > ESP_NUM_FAST_SG)) {
+			sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC);
+			if (!sg)
+				goto error;
+		}
+		skb_to_sgvec(skb, sg, esph->enc_data+esp->conf.ivlen-skb->data, clen);
+		crypto_cipher_encrypt(tfm, sg, sg, clen);
+		if (unlikely(sg != &esp->sgbuf[0]))
+			kfree(sg);
+	} while (0);
+
+	if (esp->conf.ivlen) {
+		memcpy(esph->enc_data, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
+		crypto_cipher_get_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
+	}
+
+	if (esp->auth.icv_full_len) {
+		esp->auth.icv(esp, skb, (u8*)esph-skb->data,
+			sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen+clen, trailer->tail);
+		pskb_put(skb, trailer, alen);
+	}
+
+	err = 0;
+
+error:
+	return err;
+}
+
+static int esp6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
+{
+	struct ipv6hdr *iph;
+	struct ipv6_esp_hdr *esph;
+	struct esp_data *esp = x->data;
+	struct sk_buff *trailer;
+	int blksize = crypto_tfm_alg_blocksize(esp->conf.tfm);
+	int alen = esp->auth.icv_trunc_len;
+	int elen = skb->len - sizeof(struct ipv6_esp_hdr) - esp->conf.ivlen - alen;
+
+	int hdr_len = skb->h.raw - skb->nh.raw;
+	int nfrags;
+	unsigned char *tmp_hdr = NULL;
+	int ret = 0;
+
+	if (!pskb_may_pull(skb, sizeof(struct ipv6_esp_hdr))) {
+		ret = -EINVAL;
+		goto out_nofree;
+	}
+
+	if (elen <= 0 || (elen & (blksize-1))) {
+		ret = -EINVAL;
+		goto out_nofree;
+	}
+
+	tmp_hdr = kmalloc(hdr_len, GFP_ATOMIC);
+	if (!tmp_hdr) {
+		ret = -ENOMEM;
+		goto out_nofree;
+	}
+	memcpy(tmp_hdr, skb->nh.raw, hdr_len);
+
+	/* If integrity check is required, do this. */
+        if (esp->auth.icv_full_len) {
+		u8 sum[esp->auth.icv_full_len];
+		u8 sum1[alen];
+
+		esp->auth.icv(esp, skb, 0, skb->len-alen, sum);
+
+		if (skb_copy_bits(skb, skb->len-alen, sum1, alen))
+			BUG();
+
+		if (unlikely(memcmp(sum, sum1, alen))) {
+			x->stats.integrity_failed++;
+			ret = -EINVAL;
+			goto out;
+		}
+	}
+
+	if ((nfrags = skb_cow_data(skb, 0, &trailer)) < 0) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	skb->ip_summed = CHECKSUM_NONE;
+
+	esph = (struct ipv6_esp_hdr*)skb->data;
+	iph = skb->nh.ipv6h;
+
+	/* Get ivec. This can be wrong, check against another impls. */
+	if (esp->conf.ivlen)
+		crypto_cipher_set_iv(esp->conf.tfm, esph->enc_data, crypto_tfm_alg_ivsize(esp->conf.tfm));
+
+        {
+		u8 nexthdr[2];
+		struct scatterlist *sg = &esp->sgbuf[0];
+		u8 padlen;
+
+		if (unlikely(nfrags > ESP_NUM_FAST_SG)) {
+			sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC);
+			if (!sg) {
+				ret = -ENOMEM;
+				goto out;
+			}
+		}
+		skb_to_sgvec(skb, sg, sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen, elen);
+		crypto_cipher_decrypt(esp->conf.tfm, sg, sg, elen);
+		if (unlikely(sg != &esp->sgbuf[0]))
+			kfree(sg);
+
+		if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2))
+			BUG();
+
+		padlen = nexthdr[0];
+		if (padlen+2 >= elen) {
+			LIMIT_NETDEBUG(
+				printk(KERN_WARNING "ipsec esp packet is garbage padlen=%d, elen=%d\n", padlen+2, elen));
+			ret = -EINVAL;
+			goto out;
+		}
+		/* ... check padding bits here. Silly. :-) */ 
+
+		pskb_trim(skb, skb->len - alen - padlen - 2);
+		skb->h.raw = skb_pull(skb, sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen);
+		skb->nh.raw += sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen;
+		memcpy(skb->nh.raw, tmp_hdr, hdr_len);
+		skb->nh.ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+		ret = nexthdr[1];
+	}
+
+out:
+	kfree(tmp_hdr);
+out_nofree:
+	return ret;
+}
+
+static u32 esp6_get_max_size(struct xfrm_state *x, int mtu)
+{
+	struct esp_data *esp = x->data;
+	u32 blksize = crypto_tfm_alg_blocksize(esp->conf.tfm);
+
+	if (x->props.mode) {
+		mtu = (mtu + 2 + blksize-1)&~(blksize-1);
+	} else {
+		/* The worst case. */
+		mtu += 2 + blksize;
+	}
+	if (esp->conf.padlen)
+		mtu = (mtu + esp->conf.padlen-1)&~(esp->conf.padlen-1);
+
+	return mtu + x->props.header_len + esp->auth.icv_full_len;
+}
+
+static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+                     int type, int code, int offset, __u32 info)
+{
+	struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
+	struct ipv6_esp_hdr *esph = (struct ipv6_esp_hdr*)(skb->data+offset);
+	struct xfrm_state *x;
+
+	if (type != ICMPV6_DEST_UNREACH && 
+	    type != ICMPV6_PKT_TOOBIG)
+		return;
+
+	x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6);
+	if (!x)
+		return;
+	printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/"
+			"%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", 
+			ntohl(esph->spi), NIP6(iph->daddr));
+	xfrm_state_put(x);
+}
+
+static void esp6_destroy(struct xfrm_state *x)
+{
+	struct esp_data *esp = x->data;
+
+	if (!esp)
+		return;
+
+	if (esp->conf.tfm) {
+		crypto_free_tfm(esp->conf.tfm);
+		esp->conf.tfm = NULL;
+	}
+	if (esp->conf.ivec) {
+		kfree(esp->conf.ivec);
+		esp->conf.ivec = NULL;
+	}
+	if (esp->auth.tfm) {
+		crypto_free_tfm(esp->auth.tfm);
+		esp->auth.tfm = NULL;
+	}
+	if (esp->auth.work_icv) {
+		kfree(esp->auth.work_icv);
+		esp->auth.work_icv = NULL;
+	}
+	kfree(esp);
+}
+
+static int esp6_init_state(struct xfrm_state *x, void *args)
+{
+	struct esp_data *esp = NULL;
+
+	/* null auth and encryption can have zero length keys */
+	if (x->aalg) {
+		if (x->aalg->alg_key_len > 512)
+			goto error;
+	}
+	if (x->ealg == NULL)
+		goto error;
+
+	if (x->encap)
+		goto error;
+
+	esp = kmalloc(sizeof(*esp), GFP_KERNEL);
+	if (esp == NULL)
+		return -ENOMEM;
+
+	memset(esp, 0, sizeof(*esp));
+
+	if (x->aalg) {
+		struct xfrm_algo_desc *aalg_desc;
+
+		esp->auth.key = x->aalg->alg_key;
+		esp->auth.key_len = (x->aalg->alg_key_len+7)/8;
+		esp->auth.tfm = crypto_alloc_tfm(x->aalg->alg_name, 0);
+		if (esp->auth.tfm == NULL)
+			goto error;
+		esp->auth.icv = esp_hmac_digest;
+ 
+		aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
+		BUG_ON(!aalg_desc);
+ 
+		if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
+			crypto_tfm_alg_digestsize(esp->auth.tfm)) {
+				printk(KERN_INFO "ESP: %s digestsize %u != %hu\n",
+					x->aalg->alg_name,
+					crypto_tfm_alg_digestsize(esp->auth.tfm),
+					aalg_desc->uinfo.auth.icv_fullbits/8);
+				goto error;
+		}
+ 
+		esp->auth.icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8;
+		esp->auth.icv_trunc_len = aalg_desc->uinfo.auth.icv_truncbits/8;
+ 
+		esp->auth.work_icv = kmalloc(esp->auth.icv_full_len, GFP_KERNEL);
+		if (!esp->auth.work_icv)
+			goto error;
+	}
+	esp->conf.key = x->ealg->alg_key;
+	esp->conf.key_len = (x->ealg->alg_key_len+7)/8;
+	if (x->props.ealgo == SADB_EALG_NULL)
+		esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_ECB);
+	else
+		esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_CBC);
+	if (esp->conf.tfm == NULL)
+		goto error;
+	esp->conf.ivlen = crypto_tfm_alg_ivsize(esp->conf.tfm);
+	esp->conf.padlen = 0;
+	if (esp->conf.ivlen) {
+		esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL);
+		if (unlikely(esp->conf.ivec == NULL))
+			goto error;
+		get_random_bytes(esp->conf.ivec, esp->conf.ivlen);
+	}
+	if (crypto_cipher_setkey(esp->conf.tfm, esp->conf.key, esp->conf.key_len))
+		goto error;
+	x->props.header_len = sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen;
+	if (x->props.mode)
+		x->props.header_len += sizeof(struct ipv6hdr);
+	x->data = esp;
+	return 0;
+
+error:
+	x->data = esp;
+	esp6_destroy(x);
+	x->data = NULL;
+	return -EINVAL;
+}
+
+static struct xfrm_type esp6_type =
+{
+	.description	= "ESP6",
+	.owner	     	= THIS_MODULE,
+	.proto	     	= IPPROTO_ESP,
+	.init_state	= esp6_init_state,
+	.destructor	= esp6_destroy,
+	.get_max_size	= esp6_get_max_size,
+	.input		= esp6_input,
+	.output		= esp6_output
+};
+
+static struct inet6_protocol esp6_protocol = {
+	.handler 	=	xfrm6_rcv,
+	.err_handler	=	esp6_err,
+	.flags		=	INET6_PROTO_NOPOLICY,
+};
+
+static int __init esp6_init(void)
+{
+	if (xfrm_register_type(&esp6_type, AF_INET6) < 0) {
+		printk(KERN_INFO "ipv6 esp init: can't add xfrm type\n");
+		return -EAGAIN;
+	}
+	if (inet6_add_protocol(&esp6_protocol, IPPROTO_ESP) < 0) {
+		printk(KERN_INFO "ipv6 esp init: can't add protocol\n");
+		xfrm_unregister_type(&esp6_type, AF_INET6);
+		return -EAGAIN;
+	}
+
+	return 0;
+}
+
+static void __exit esp6_fini(void)
+{
+	if (inet6_del_protocol(&esp6_protocol, IPPROTO_ESP) < 0)
+		printk(KERN_INFO "ipv6 esp close: can't remove protocol\n");
+	if (xfrm_unregister_type(&esp6_type, AF_INET6) < 0)
+		printk(KERN_INFO "ipv6 esp close: can't remove xfrm type\n");
+}
+
+module_init(esp6_init);
+module_exit(esp6_fini);
+
+MODULE_LICENSE("GPL");
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
new file mode 100644
index 00000000000..e0839eafc3a
--- /dev/null
+++ b/net/ipv6/exthdrs.c
@@ -0,0 +1,575 @@
+/*
+ *	Extension Header handling for IPv6
+ *	Linux INET6 implementation
+ *
+ *	Authors:
+ *	Pedro Roque		<roque@di.fc.ul.pt>
+ *	Andi Kleen		<ak@muc.de>
+ *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
+ *
+ *	$Id: exthdrs.c,v 1.13 2001/06/19 15:58:56 davem Exp $
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+/* Changes:
+ *	yoshfuji		: ensure not to overrun while parsing 
+ *				  tlv options.
+ *	Mitsuru KANDA @USAGI and: Remove ipv6_parse_exthdrs().
+ *	YOSHIFUJI Hideaki @USAGI  Register inbound extension header
+ *				  handlers as inet6_protocol{}.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/sched.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/in6.h>
+#include <linux/icmpv6.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/transp_v6.h>
+#include <net/rawv6.h>
+#include <net/ndisc.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+
+#include <asm/uaccess.h>
+
+/*
+ *	Parsing tlv encoded headers.
+ *
+ *	Parsing function "func" returns 1, if parsing succeed
+ *	and 0, if it failed.
+ *	It MUST NOT touch skb->h.
+ */
+
+struct tlvtype_proc {
+	int	type;
+	int	(*func)(struct sk_buff *skb, int offset);
+};
+
+/*********************
+  Generic functions
+ *********************/
+
+/* An unknown option is detected, decide what to do */
+
+static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff)
+{
+	switch ((skb->nh.raw[optoff] & 0xC0) >> 6) {
+	case 0: /* ignore */
+		return 1;
+
+	case 1: /* drop packet */
+		break;
+
+	case 3: /* Send ICMP if not a multicast address and drop packet */
+		/* Actually, it is redundant check. icmp_send
+		   will recheck in any case.
+		 */
+		if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr))
+			break;
+	case 2: /* send ICMP PARM PROB regardless and drop packet */
+		icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff);
+		return 0;
+	};
+
+	kfree_skb(skb);
+	return 0;
+}
+
+/* Parse tlv encoded option header (hop-by-hop or destination) */
+
+static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb)
+{
+	struct tlvtype_proc *curr;
+	int off = skb->h.raw - skb->nh.raw;
+	int len = ((skb->h.raw[1]+1)<<3);
+
+	if ((skb->h.raw + len) - skb->data > skb_headlen(skb))
+		goto bad;
+
+	off += 2;
+	len -= 2;
+
+	while (len > 0) {
+		int optlen = skb->nh.raw[off+1]+2;
+
+		switch (skb->nh.raw[off]) {
+		case IPV6_TLV_PAD0:
+			optlen = 1;
+			break;
+
+		case IPV6_TLV_PADN:
+			break;
+
+		default: /* Other TLV code so scan list */
+			if (optlen > len)
+				goto bad;
+			for (curr=procs; curr->type >= 0; curr++) {
+				if (curr->type == skb->nh.raw[off]) {
+					/* type specific length/alignment 
+					   checks will be performed in the 
+					   func(). */
+					if (curr->func(skb, off) == 0)
+						return 0;
+					break;
+				}
+			}
+			if (curr->type < 0) {
+				if (ip6_tlvopt_unknown(skb, off) == 0)
+					return 0;
+			}
+			break;
+		}
+		off += optlen;
+		len -= optlen;
+	}
+	if (len == 0)
+		return 1;
+bad:
+	kfree_skb(skb);
+	return 0;
+}
+
+/*****************************
+  Destination options header.
+ *****************************/
+
+static struct tlvtype_proc tlvprocdestopt_lst[] = {
+	/* No destination options are defined now */
+	{-1,			NULL}
+};
+
+static int ipv6_destopt_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
+{
+	struct sk_buff *skb = *skbp;
+	struct inet6_skb_parm *opt = IP6CB(skb);
+
+	if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) ||
+	    !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) {
+		IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+		kfree_skb(skb);
+		return -1;
+	}
+
+	opt->dst1 = skb->h.raw - skb->nh.raw;
+
+	if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) {
+		skb->h.raw += ((skb->h.raw[1]+1)<<3);
+		*nhoffp = opt->dst1;
+		return 1;
+	}
+
+	IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+	return -1;
+}
+
+static struct inet6_protocol destopt_protocol = {
+	.handler	=	ipv6_destopt_rcv,
+	.flags		=	INET6_PROTO_NOPOLICY,
+};
+
+void __init ipv6_destopt_init(void)
+{
+	if (inet6_add_protocol(&destopt_protocol, IPPROTO_DSTOPTS) < 0)
+		printk(KERN_ERR "ipv6_destopt_init: Could not register protocol\n");
+}
+
+/********************************
+  NONE header. No data in packet.
+ ********************************/
+
+static int ipv6_nodata_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
+{
+	struct sk_buff *skb = *skbp;
+
+	kfree_skb(skb);
+	return 0;
+}
+
+static struct inet6_protocol nodata_protocol = {
+	.handler	=	ipv6_nodata_rcv,
+	.flags		=	INET6_PROTO_NOPOLICY,
+};
+
+void __init ipv6_nodata_init(void)
+{
+	if (inet6_add_protocol(&nodata_protocol, IPPROTO_NONE) < 0)
+		printk(KERN_ERR "ipv6_nodata_init: Could not register protocol\n");
+}
+
+/********************************
+  Routing header.
+ ********************************/
+
+static int ipv6_rthdr_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
+{
+	struct sk_buff *skb = *skbp;
+	struct inet6_skb_parm *opt = IP6CB(skb);
+	struct in6_addr *addr;
+	struct in6_addr daddr;
+	int n, i;
+
+	struct ipv6_rt_hdr *hdr;
+	struct rt0_hdr *rthdr;
+
+	if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) ||
+	    !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) {
+		IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+		kfree_skb(skb);
+		return -1;
+	}
+
+	hdr = (struct ipv6_rt_hdr *) skb->h.raw;
+
+	if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr) ||
+	    skb->pkt_type != PACKET_HOST) {
+		IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
+		kfree_skb(skb);
+		return -1;
+	}
+
+looped_back:
+	if (hdr->segments_left == 0) {
+		opt->srcrt = skb->h.raw - skb->nh.raw;
+		skb->h.raw += (hdr->hdrlen + 1) << 3;
+		opt->dst0 = opt->dst1;
+		opt->dst1 = 0;
+		*nhoffp = (&hdr->nexthdr) - skb->nh.raw;
+		return 1;
+	}
+
+	if (hdr->type != IPV6_SRCRT_TYPE_0) {
+		IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb->nh.raw);
+		return -1;
+	}
+	
+	if (hdr->hdrlen & 0x01) {
+		IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->hdrlen) - skb->nh.raw);
+		return -1;
+	}
+
+	/*
+	 *	This is the routing header forwarding algorithm from
+	 *	RFC 2460, page 16.
+	 */
+
+	n = hdr->hdrlen >> 1;
+
+	if (hdr->segments_left > n) {
+		IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->segments_left) - skb->nh.raw);
+		return -1;
+	}
+
+	/* We are about to mangle packet header. Be careful!
+	   Do not damage packets queued somewhere.
+	 */
+	if (skb_cloned(skb)) {
+		struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC);
+		kfree_skb(skb);
+		/* the copy is a forwarded packet */
+		if (skb2 == NULL) {
+			IP6_INC_STATS_BH(IPSTATS_MIB_OUTDISCARDS);	
+			return -1;
+		}
+		*skbp = skb = skb2;
+		opt = IP6CB(skb2);
+		hdr = (struct ipv6_rt_hdr *) skb2->h.raw;
+	}
+
+	if (skb->ip_summed == CHECKSUM_HW)
+		skb->ip_summed = CHECKSUM_NONE;
+
+	i = n - --hdr->segments_left;
+
+	rthdr = (struct rt0_hdr *) hdr;
+	addr = rthdr->addr;
+	addr += i - 1;
+
+	if (ipv6_addr_is_multicast(addr)) {
+		IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
+		kfree_skb(skb);
+		return -1;
+	}
+
+	ipv6_addr_copy(&daddr, addr);
+	ipv6_addr_copy(addr, &skb->nh.ipv6h->daddr);
+	ipv6_addr_copy(&skb->nh.ipv6h->daddr, &daddr);
+
+	dst_release(xchg(&skb->dst, NULL));
+	ip6_route_input(skb);
+	if (skb->dst->error) {
+		skb_push(skb, skb->data - skb->nh.raw);
+		dst_input(skb);
+		return -1;
+	}
+
+	if (skb->dst->dev->flags&IFF_LOOPBACK) {
+		if (skb->nh.ipv6h->hop_limit <= 1) {
+			IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+			icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
+				    0, skb->dev);
+			kfree_skb(skb);
+			return -1;
+		}
+		skb->nh.ipv6h->hop_limit--;
+		goto looped_back;
+	}
+
+	skb_push(skb, skb->data - skb->nh.raw);
+	dst_input(skb);
+	return -1;
+}
+
+static struct inet6_protocol rthdr_protocol = {
+	.handler	=	ipv6_rthdr_rcv,
+	.flags		=	INET6_PROTO_NOPOLICY,
+};
+
+void __init ipv6_rthdr_init(void)
+{
+	if (inet6_add_protocol(&rthdr_protocol, IPPROTO_ROUTING) < 0)
+		printk(KERN_ERR "ipv6_rthdr_init: Could not register protocol\n");
+};
+
+/*
+   This function inverts received rthdr.
+   NOTE: specs allow to make it automatically only if
+   packet authenticated.
+
+   I will not discuss it here (though, I am really pissed off at
+   this stupid requirement making rthdr idea useless)
+
+   Actually, it creates severe problems  for us.
+   Embryonic requests has no associated sockets,
+   so that user have no control over it and
+   cannot not only to set reply options, but
+   even to know, that someone wants to connect
+   without success. :-(
+
+   For now we need to test the engine, so that I created
+   temporary (or permanent) backdoor.
+   If listening socket set IPV6_RTHDR to 2, then we invert header.
+                                                   --ANK (980729)
+ */
+
+struct ipv6_txoptions *
+ipv6_invert_rthdr(struct sock *sk, struct ipv6_rt_hdr *hdr)
+{
+	/* Received rthdr:
+
+	   [ H1 -> H2 -> ... H_prev ]  daddr=ME
+
+	   Inverted result:
+	   [ H_prev -> ... -> H1 ] daddr =sender
+
+	   Note, that IP output engine will rewrite this rthdr
+	   by rotating it left by one addr.
+	 */
+
+	int n, i;
+	struct rt0_hdr *rthdr = (struct rt0_hdr*)hdr;
+	struct rt0_hdr *irthdr;
+	struct ipv6_txoptions *opt;
+	int hdrlen = ipv6_optlen(hdr);
+
+	if (hdr->segments_left ||
+	    hdr->type != IPV6_SRCRT_TYPE_0 ||
+	    hdr->hdrlen & 0x01)
+		return NULL;
+
+	n = hdr->hdrlen >> 1;
+	opt = sock_kmalloc(sk, sizeof(*opt) + hdrlen, GFP_ATOMIC);
+	if (opt == NULL)
+		return NULL;
+	memset(opt, 0, sizeof(*opt));
+	opt->tot_len = sizeof(*opt) + hdrlen;
+	opt->srcrt = (void*)(opt+1);
+	opt->opt_nflen = hdrlen;
+
+	memcpy(opt->srcrt, hdr, sizeof(*hdr));
+	irthdr = (struct rt0_hdr*)opt->srcrt;
+	/* Obsolete field, MBZ, when originated by us */
+	irthdr->bitmap = 0;
+	opt->srcrt->segments_left = n;
+	for (i=0; i<n; i++)
+		memcpy(irthdr->addr+i, rthdr->addr+(n-1-i), 16);
+	return opt;
+}
+
+/**********************************
+  Hop-by-hop options.
+ **********************************/
+
+/* Router Alert as of RFC 2711 */
+
+static int ipv6_hop_ra(struct sk_buff *skb, int optoff)
+{
+	if (skb->nh.raw[optoff+1] == 2) {
+		IP6CB(skb)->ra = optoff;
+		return 1;
+	}
+	LIMIT_NETDEBUG(
+		 printk(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n", skb->nh.raw[optoff+1]));
+	kfree_skb(skb);
+	return 0;
+}
+
+/* Jumbo payload */
+
+static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
+{
+	u32 pkt_len;
+
+	if (skb->nh.raw[optoff+1] != 4 || (optoff&3) != 2) {
+		LIMIT_NETDEBUG(
+			 printk(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", skb->nh.raw[optoff+1]));
+		IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+		goto drop;
+	}
+
+	pkt_len = ntohl(*(u32*)(skb->nh.raw+optoff+2));
+	if (pkt_len <= IPV6_MAXPLEN) {
+		IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2);
+		return 0;
+	}
+	if (skb->nh.ipv6h->payload_len) {
+		IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff);
+		return 0;
+	}
+
+	if (pkt_len > skb->len - sizeof(struct ipv6hdr)) {
+		IP6_INC_STATS_BH(IPSTATS_MIB_INTRUNCATEDPKTS);
+		goto drop;
+	}
+	if (pkt_len + sizeof(struct ipv6hdr) < skb->len) {
+		__pskb_trim(skb, pkt_len + sizeof(struct ipv6hdr));
+		if (skb->ip_summed == CHECKSUM_HW)
+			skb->ip_summed = CHECKSUM_NONE;
+	}
+	return 1;
+
+drop:
+	kfree_skb(skb);
+	return 0;
+}
+
+static struct tlvtype_proc tlvprochopopt_lst[] = {
+	{
+		.type	= IPV6_TLV_ROUTERALERT,
+		.func	= ipv6_hop_ra,
+	},
+	{
+		.type	= IPV6_TLV_JUMBO,
+		.func	= ipv6_hop_jumbo,
+	},
+	{ -1, }
+};
+
+int ipv6_parse_hopopts(struct sk_buff *skb, int nhoff)
+{
+	IP6CB(skb)->hop = sizeof(struct ipv6hdr);
+	if (ip6_parse_tlv(tlvprochopopt_lst, skb))
+		return sizeof(struct ipv6hdr);
+	return -1;
+}
+
+/*
+ *	Creating outbound headers.
+ *
+ *	"build" functions work when skb is filled from head to tail (datagram)
+ *	"push"	functions work when headers are added from tail to head (tcp)
+ *
+ *	In both cases we assume, that caller reserved enough room
+ *	for headers.
+ */
+
+static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto,
+			    struct ipv6_rt_hdr *opt,
+			    struct in6_addr **addr_p)
+{
+	struct rt0_hdr *phdr, *ihdr;
+	int hops;
+
+	ihdr = (struct rt0_hdr *) opt;
+	
+	phdr = (struct rt0_hdr *) skb_push(skb, (ihdr->rt_hdr.hdrlen + 1) << 3);
+	memcpy(phdr, ihdr, sizeof(struct rt0_hdr));
+
+	hops = ihdr->rt_hdr.hdrlen >> 1;
+
+	if (hops > 1)
+		memcpy(phdr->addr, ihdr->addr + 1,
+		       (hops - 1) * sizeof(struct in6_addr));
+
+	ipv6_addr_copy(phdr->addr + (hops - 1), *addr_p);
+	*addr_p = ihdr->addr;
+
+	phdr->rt_hdr.nexthdr = *proto;
+	*proto = NEXTHDR_ROUTING;
+}
+
+static void ipv6_push_exthdr(struct sk_buff *skb, u8 *proto, u8 type, struct ipv6_opt_hdr *opt)
+{
+	struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb_push(skb, ipv6_optlen(opt));
+
+	memcpy(h, opt, ipv6_optlen(opt));
+	h->nexthdr = *proto;
+	*proto = type;
+}
+
+void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
+			  u8 *proto,
+			  struct in6_addr **daddr)
+{
+	if (opt->srcrt)
+		ipv6_push_rthdr(skb, proto, opt->srcrt, daddr);
+	if (opt->dst0opt)
+		ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst0opt);
+	if (opt->hopopt)
+		ipv6_push_exthdr(skb, proto, NEXTHDR_HOP, opt->hopopt);
+}
+
+void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto)
+{
+	if (opt->dst1opt)
+		ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst1opt);
+}
+
+struct ipv6_txoptions *
+ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt)
+{
+	struct ipv6_txoptions *opt2;
+
+	opt2 = sock_kmalloc(sk, opt->tot_len, GFP_ATOMIC);
+	if (opt2) {
+		long dif = (char*)opt2 - (char*)opt;
+		memcpy(opt2, opt, opt->tot_len);
+		if (opt2->hopopt)
+			*((char**)&opt2->hopopt) += dif;
+		if (opt2->dst0opt)
+			*((char**)&opt2->dst0opt) += dif;
+		if (opt2->dst1opt)
+			*((char**)&opt2->dst1opt) += dif;
+		if (opt2->srcrt)
+			*((char**)&opt2->srcrt) += dif;
+	}
+	return opt2;
+}
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
new file mode 100644
index 00000000000..6dda815c013
--- /dev/null
+++ b/net/ipv6/exthdrs_core.c
@@ -0,0 +1,109 @@
+/*
+ * IPv6 library code, needed by static components when full IPv6 support is
+ * not configured or static.
+ */
+#include <net/ipv6.h>
+
+/* 
+ * find out if nexthdr is a well-known extension header or a protocol
+ */
+
+int ipv6_ext_hdr(u8 nexthdr)
+{
+	/* 
+	 * find out if nexthdr is an extension header or a protocol
+	 */
+	return ( (nexthdr == NEXTHDR_HOP)	||
+		 (nexthdr == NEXTHDR_ROUTING)	||
+		 (nexthdr == NEXTHDR_FRAGMENT)	||
+		 (nexthdr == NEXTHDR_AUTH)	||
+		 (nexthdr == NEXTHDR_NONE)	||
+		 (nexthdr == NEXTHDR_DEST) );
+}
+
+/*
+ * Skip any extension headers. This is used by the ICMP module.
+ *
+ * Note that strictly speaking this conflicts with RFC 2460 4.0:
+ * ...The contents and semantics of each extension header determine whether 
+ * or not to proceed to the next header.  Therefore, extension headers must
+ * be processed strictly in the order they appear in the packet; a
+ * receiver must not, for example, scan through a packet looking for a
+ * particular kind of extension header and process that header prior to
+ * processing all preceding ones.
+ * 
+ * We do exactly this. This is a protocol bug. We can't decide after a
+ * seeing an unknown discard-with-error flavour TLV option if it's a 
+ * ICMP error message or not (errors should never be send in reply to
+ * ICMP error messages).
+ * 
+ * But I see no other way to do this. This might need to be reexamined
+ * when Linux implements ESP (and maybe AUTH) headers.
+ * --AK
+ *
+ * This function parses (probably truncated) exthdr set "hdr"
+ * of length "len". "nexthdrp" initially points to some place,
+ * where type of the first header can be found.
+ *
+ * It skips all well-known exthdrs, and returns pointer to the start
+ * of unparsable area i.e. the first header with unknown type.
+ * If it is not NULL *nexthdr is updated by type/protocol of this header.
+ *
+ * NOTES: - if packet terminated with NEXTHDR_NONE it returns NULL.
+ *        - it may return pointer pointing beyond end of packet,
+ *	    if the last recognized header is truncated in the middle.
+ *        - if packet is truncated, so that all parsed headers are skipped,
+ *	    it returns NULL.
+ *	  - First fragment header is skipped, not-first ones
+ *	    are considered as unparsable.
+ *	  - ESP is unparsable for now and considered like
+ *	    normal payload protocol.
+ *	  - Note also special handling of AUTH header. Thanks to IPsec wizards.
+ *
+ * --ANK (980726)
+ */
+
+int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, int len)
+{
+	u8 nexthdr = *nexthdrp;
+
+	while (ipv6_ext_hdr(nexthdr)) {
+		struct ipv6_opt_hdr _hdr, *hp;
+		int hdrlen;
+
+		if (len < (int)sizeof(struct ipv6_opt_hdr))
+			return -1;
+		if (nexthdr == NEXTHDR_NONE)
+			return -1;
+		hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
+		if (hp == NULL)
+			BUG();
+		if (nexthdr == NEXTHDR_FRAGMENT) {
+			unsigned short _frag_off, *fp;
+			fp = skb_header_pointer(skb,
+						start+offsetof(struct frag_hdr,
+							       frag_off),
+						sizeof(_frag_off),
+						&_frag_off);
+			if (fp == NULL)
+				return -1;
+
+			if (ntohs(*fp) & ~0x7)
+				break;
+			hdrlen = 8;
+		} else if (nexthdr == NEXTHDR_AUTH)
+			hdrlen = (hp->hdrlen+2)<<2; 
+		else
+			hdrlen = ipv6_optlen(hp); 
+
+		nexthdr = hp->nexthdr;
+		len -= hdrlen;
+		start += hdrlen;
+	}
+
+	*nexthdrp = nexthdr;
+	return start;
+}
+
+EXPORT_SYMBOL(ipv6_ext_hdr);
+EXPORT_SYMBOL(ipv6_skip_exthdr);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
new file mode 100644
index 00000000000..87b9082ceab
--- /dev/null
+++ b/net/ipv6/icmp.c
@@ -0,0 +1,822 @@
+/*
+ *	Internet Control Message Protocol (ICMPv6)
+ *	Linux INET6 implementation
+ *
+ *	Authors:
+ *	Pedro Roque		<roque@di.fc.ul.pt>
+ *
+ *	$Id: icmp.c,v 1.38 2002/02/08 03:57:19 davem Exp $
+ *
+ *	Based on net/ipv4/icmp.c
+ *
+ *	RFC 1885
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+/*
+ *	Changes:
+ *
+ *	Andi Kleen		:	exception handling
+ *	Andi Kleen			add rate limits. never reply to a icmp.
+ *					add more length checks and other fixes.
+ *	yoshfuji		:	ensure to sent parameter problem for
+ *					fragments.
+ *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
+ *	Randy Dunlap and
+ *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
+ *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/icmpv6.h>
+
+#include <net/ip.h>
+#include <net/sock.h>
+
+#include <net/ipv6.h>
+#include <net/ip6_checksum.h>
+#include <net/protocol.h>
+#include <net/raw.h>
+#include <net/rawv6.h>
+#include <net/transp_v6.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/icmp.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics);
+
+/*
+ *	The ICMP socket(s). This is the most convenient way to flow control
+ *	our ICMP output as well as maintain a clean interface throughout
+ *	all layers. All Socketless IP sends will soon be gone.
+ *
+ *	On SMP we have one ICMP socket per-cpu.
+ */
+static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL;
+#define icmpv6_socket	__get_cpu_var(__icmpv6_socket)
+
+static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp);
+
+static struct inet6_protocol icmpv6_protocol = {
+	.handler	=	icmpv6_rcv,
+	.flags		=	INET6_PROTO_FINAL,
+};
+
+static __inline__ int icmpv6_xmit_lock(void)
+{
+	local_bh_disable();
+
+	if (unlikely(!spin_trylock(&icmpv6_socket->sk->sk_lock.slock))) {
+		/* This can happen if the output path (f.e. SIT or
+		 * ip6ip6 tunnel) signals dst_link_failure() for an
+		 * outgoing ICMP6 packet.
+		 */
+		local_bh_enable();
+		return 1;
+	}
+	return 0;
+}
+
+static __inline__ void icmpv6_xmit_unlock(void)
+{
+	spin_unlock_bh(&icmpv6_socket->sk->sk_lock.slock);
+}
+
+/* 
+ * Slightly more convenient version of icmpv6_send.
+ */
+void icmpv6_param_prob(struct sk_buff *skb, int code, int pos)
+{
+	icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev);
+	kfree_skb(skb);
+}
+
+/*
+ * Figure out, may we reply to this packet with icmp error.
+ *
+ * We do not reply, if:
+ *	- it was icmp error message.
+ *	- it is truncated, so that it is known, that protocol is ICMPV6
+ *	  (i.e. in the middle of some exthdr)
+ *
+ *	--ANK (980726)
+ */
+
+static int is_ineligible(struct sk_buff *skb)
+{
+	int ptr = (u8*)(skb->nh.ipv6h+1) - skb->data;
+	int len = skb->len - ptr;
+	__u8 nexthdr = skb->nh.ipv6h->nexthdr;
+
+	if (len < 0)
+		return 1;
+
+	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, len);
+	if (ptr < 0)
+		return 0;
+	if (nexthdr == IPPROTO_ICMPV6) {
+		u8 _type, *tp;
+		tp = skb_header_pointer(skb,
+			ptr+offsetof(struct icmp6hdr, icmp6_type),
+			sizeof(_type), &_type);
+		if (tp == NULL ||
+		    !(*tp & ICMPV6_INFOMSG_MASK))
+			return 1;
+	}
+	return 0;
+}
+
+static int sysctl_icmpv6_time = 1*HZ; 
+
+/* 
+ * Check the ICMP output rate limit 
+ */
+static inline int icmpv6_xrlim_allow(struct sock *sk, int type,
+				     struct flowi *fl)
+{
+	struct dst_entry *dst;
+	int res = 0;
+
+	/* Informational messages are not limited. */
+	if (type & ICMPV6_INFOMSG_MASK)
+		return 1;
+
+	/* Do not limit pmtu discovery, it would break it. */
+	if (type == ICMPV6_PKT_TOOBIG)
+		return 1;
+
+	/* 
+	 * Look up the output route.
+	 * XXX: perhaps the expire for routing entries cloned by
+	 * this lookup should be more aggressive (not longer than timeout).
+	 */
+	dst = ip6_route_output(sk, fl);
+	if (dst->error) {
+		IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
+	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
+		res = 1;
+	} else {
+		struct rt6_info *rt = (struct rt6_info *)dst;
+		int tmo = sysctl_icmpv6_time;
+
+		/* Give more bandwidth to wider prefixes. */
+		if (rt->rt6i_dst.plen < 128)
+			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
+
+		res = xrlim_allow(dst, tmo);
+	}
+	dst_release(dst);
+	return res;
+}
+
+/*
+ *	an inline helper for the "simple" if statement below
+ *	checks if parameter problem report is caused by an
+ *	unrecognized IPv6 option that has the Option Type 
+ *	highest-order two bits set to 10
+ */
+
+static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
+{
+	u8 _optval, *op;
+
+	offset += skb->nh.raw - skb->data;
+	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
+	if (op == NULL)
+		return 1;
+	return (*op & 0xC0) == 0x80;
+}
+
+static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6hdr *thdr, int len)
+{
+	struct sk_buff *skb;
+	struct icmp6hdr *icmp6h;
+	int err = 0;
+
+	if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
+		goto out;
+
+	icmp6h = (struct icmp6hdr*) skb->h.raw;
+	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
+	icmp6h->icmp6_cksum = 0;
+
+	if (skb_queue_len(&sk->sk_write_queue) == 1) {
+		skb->csum = csum_partial((char *)icmp6h,
+					sizeof(struct icmp6hdr), skb->csum);
+		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
+						      &fl->fl6_dst,
+						      len, fl->proto,
+						      skb->csum);
+	} else {
+		u32 tmp_csum = 0;
+
+		skb_queue_walk(&sk->sk_write_queue, skb) {
+			tmp_csum = csum_add(tmp_csum, skb->csum);
+		}
+
+		tmp_csum = csum_partial((char *)icmp6h,
+					sizeof(struct icmp6hdr), tmp_csum);
+		tmp_csum = csum_ipv6_magic(&fl->fl6_src,
+					   &fl->fl6_dst,
+					   len, fl->proto, tmp_csum);
+		icmp6h->icmp6_cksum = tmp_csum;
+	}
+	if (icmp6h->icmp6_cksum == 0)
+		icmp6h->icmp6_cksum = -1;
+	ip6_push_pending_frames(sk);
+out:
+	return err;
+}
+
+struct icmpv6_msg {
+	struct sk_buff	*skb;
+	int		offset;
+};
+
+static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
+{
+	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
+	struct sk_buff *org_skb = msg->skb;
+	__u32 csum = 0;
+
+	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
+				      to, len, csum);
+	skb->csum = csum_block_add(skb->csum, csum, odd);
+	return 0;
+}
+
+/*
+ *	Send an ICMP message in response to a packet in error
+ */
+void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, 
+		 struct net_device *dev)
+{
+	struct inet6_dev *idev = NULL;
+	struct ipv6hdr *hdr = skb->nh.ipv6h;
+	struct sock *sk = icmpv6_socket->sk;
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct in6_addr *saddr = NULL;
+	struct dst_entry *dst;
+	struct icmp6hdr tmp_hdr;
+	struct flowi fl;
+	struct icmpv6_msg msg;
+	int iif = 0;
+	int addr_type = 0;
+	int len;
+	int hlimit;
+	int err = 0;
+
+	if ((u8*)hdr < skb->head || (u8*)(hdr+1) > skb->tail)
+		return;
+
+	/*
+	 *	Make sure we respect the rules 
+	 *	i.e. RFC 1885 2.4(e)
+	 *	Rule (e.1) is enforced by not using icmpv6_send
+	 *	in any code that processes icmp errors.
+	 */
+	addr_type = ipv6_addr_type(&hdr->daddr);
+
+	if (ipv6_chk_addr(&hdr->daddr, skb->dev, 0))
+		saddr = &hdr->daddr;
+
+	/*
+	 *	Dest addr check
+	 */
+
+	if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) {
+		if (type != ICMPV6_PKT_TOOBIG &&
+		    !(type == ICMPV6_PARAMPROB && 
+		      code == ICMPV6_UNK_OPTION && 
+		      (opt_unrec(skb, info))))
+			return;
+
+		saddr = NULL;
+	}
+
+	addr_type = ipv6_addr_type(&hdr->saddr);
+
+	/*
+	 *	Source addr check
+	 */
+
+	if (addr_type & IPV6_ADDR_LINKLOCAL)
+		iif = skb->dev->ifindex;
+
+	/*
+	 *	Must not send if we know that source is Anycast also.
+	 *	for now we don't know that.
+	 */
+	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
+		LIMIT_NETDEBUG(
+			printk(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n"));
+		return;
+	}
+
+	/* 
+	 *	Never answer to a ICMP packet.
+	 */
+	if (is_ineligible(skb)) {
+		LIMIT_NETDEBUG(
+			printk(KERN_DEBUG "icmpv6_send: no reply to icmp error\n")); 
+		return;
+	}
+
+	memset(&fl, 0, sizeof(fl));
+	fl.proto = IPPROTO_ICMPV6;
+	ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr);
+	if (saddr)
+		ipv6_addr_copy(&fl.fl6_src, saddr);
+	fl.oif = iif;
+	fl.fl_icmp_type = type;
+	fl.fl_icmp_code = code;
+
+	if (icmpv6_xmit_lock())
+		return;
+
+	if (!icmpv6_xrlim_allow(sk, type, &fl))
+		goto out;
+
+	tmp_hdr.icmp6_type = type;
+	tmp_hdr.icmp6_code = code;
+	tmp_hdr.icmp6_cksum = 0;
+	tmp_hdr.icmp6_pointer = htonl(info);
+
+	if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
+		fl.oif = np->mcast_oif;
+
+	err = ip6_dst_lookup(sk, &dst, &fl);
+	if (err)
+		goto out;
+	if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
+		goto out_dst_release;
+
+	if (ipv6_addr_is_multicast(&fl.fl6_dst))
+		hlimit = np->mcast_hops;
+	else
+		hlimit = np->hop_limit;
+	if (hlimit < 0)
+		hlimit = dst_metric(dst, RTAX_HOPLIMIT);
+	if (hlimit < 0)
+		hlimit = ipv6_get_hoplimit(dst->dev);
+
+	msg.skb = skb;
+	msg.offset = skb->nh.raw - skb->data;
+
+	len = skb->len - msg.offset;
+	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
+	if (len < 0) {
+		LIMIT_NETDEBUG(
+			printk(KERN_DEBUG "icmp: len problem\n"));
+		goto out_dst_release;
+	}
+
+	idev = in6_dev_get(skb->dev);
+
+	err = ip6_append_data(sk, icmpv6_getfrag, &msg,
+			      len + sizeof(struct icmp6hdr),
+			      sizeof(struct icmp6hdr),
+			      hlimit, NULL, &fl, (struct rt6_info*)dst,
+			      MSG_DONTWAIT);
+	if (err) {
+		ip6_flush_pending_frames(sk);
+		goto out_put;
+	}
+	err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, len + sizeof(struct icmp6hdr));
+
+	if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB)
+		ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_OUTDESTUNREACHS, type - ICMPV6_DEST_UNREACH);
+	ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
+
+out_put:
+	if (likely(idev != NULL))
+		in6_dev_put(idev);
+out_dst_release:
+	dst_release(dst);
+out:
+	icmpv6_xmit_unlock();
+}
+
+static void icmpv6_echo_reply(struct sk_buff *skb)
+{
+	struct sock *sk = icmpv6_socket->sk;
+	struct inet6_dev *idev;
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct in6_addr *saddr = NULL;
+	struct icmp6hdr *icmph = (struct icmp6hdr *) skb->h.raw;
+	struct icmp6hdr tmp_hdr;
+	struct flowi fl;
+	struct icmpv6_msg msg;
+	struct dst_entry *dst;
+	int err = 0;
+	int hlimit;
+
+	saddr = &skb->nh.ipv6h->daddr;
+
+	if (!ipv6_unicast_destination(skb))
+		saddr = NULL;
+
+	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
+	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
+
+	memset(&fl, 0, sizeof(fl));
+	fl.proto = IPPROTO_ICMPV6;
+	ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
+	if (saddr)
+		ipv6_addr_copy(&fl.fl6_src, saddr);
+	fl.oif = skb->dev->ifindex;
+	fl.fl_icmp_type = ICMPV6_ECHO_REPLY;
+
+	if (icmpv6_xmit_lock())
+		return;
+
+	if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
+		fl.oif = np->mcast_oif;
+
+	err = ip6_dst_lookup(sk, &dst, &fl);
+	if (err)
+		goto out;
+	if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
+		goto out_dst_release;
+
+	if (ipv6_addr_is_multicast(&fl.fl6_dst))
+		hlimit = np->mcast_hops;
+	else
+		hlimit = np->hop_limit;
+	if (hlimit < 0)
+		hlimit = dst_metric(dst, RTAX_HOPLIMIT);
+	if (hlimit < 0)
+		hlimit = ipv6_get_hoplimit(dst->dev);
+
+	idev = in6_dev_get(skb->dev);
+
+	msg.skb = skb;
+	msg.offset = 0;
+
+	err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
+				sizeof(struct icmp6hdr), hlimit, NULL, &fl,
+				(struct rt6_info*)dst, MSG_DONTWAIT);
+
+	if (err) {
+		ip6_flush_pending_frames(sk);
+		goto out_put;
+	}
+	err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, skb->len + sizeof(struct icmp6hdr));
+
+        ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTECHOREPLIES);
+        ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
+
+out_put: 
+	if (likely(idev != NULL))
+		in6_dev_put(idev);
+out_dst_release:
+	dst_release(dst);
+out: 
+	icmpv6_xmit_unlock();
+}
+
+static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info)
+{
+	struct in6_addr *saddr, *daddr;
+	struct inet6_protocol *ipprot;
+	struct sock *sk;
+	int inner_offset;
+	int hash;
+	u8 nexthdr;
+
+	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+		return;
+
+	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
+	if (ipv6_ext_hdr(nexthdr)) {
+		/* now skip over extension headers */
+		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, skb->len - sizeof(struct ipv6hdr));
+		if (inner_offset<0)
+			return;
+	} else {
+		inner_offset = sizeof(struct ipv6hdr);
+	}
+
+	/* Checkin header including 8 bytes of inner protocol header. */
+	if (!pskb_may_pull(skb, inner_offset+8))
+		return;
+
+	saddr = &skb->nh.ipv6h->saddr;
+	daddr = &skb->nh.ipv6h->daddr;
+
+	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
+	   Without this we will not able f.e. to make source routed
+	   pmtu discovery.
+	   Corresponding argument (opt) to notifiers is already added.
+	   --ANK (980726)
+	 */
+
+	hash = nexthdr & (MAX_INET_PROTOS - 1);
+
+	rcu_read_lock();
+	ipprot = rcu_dereference(inet6_protos[hash]);
+	if (ipprot && ipprot->err_handler)
+		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
+	rcu_read_unlock();
+
+	read_lock(&raw_v6_lock);
+	if ((sk = sk_head(&raw_v6_htable[hash])) != NULL) {
+		while((sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr))) {
+			rawv6_err(sk, skb, NULL, type, code, inner_offset, info);
+			sk = sk_next(sk);
+		}
+	}
+	read_unlock(&raw_v6_lock);
+}
+  
+/*
+ *	Handle icmp messages
+ */
+
+static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+{
+	struct sk_buff *skb = *pskb;
+	struct net_device *dev = skb->dev;
+	struct inet6_dev *idev = __in6_dev_get(dev);
+	struct in6_addr *saddr, *daddr;
+	struct ipv6hdr *orig_hdr;
+	struct icmp6hdr *hdr;
+	int type;
+
+	ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INMSGS);
+
+	saddr = &skb->nh.ipv6h->saddr;
+	daddr = &skb->nh.ipv6h->daddr;
+
+	/* Perform checksum. */
+	if (skb->ip_summed == CHECKSUM_HW) {
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		if (csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
+				    skb->csum)) {
+			LIMIT_NETDEBUG(
+				printk(KERN_DEBUG "ICMPv6 hw checksum failed\n"));
+			skb->ip_summed = CHECKSUM_NONE;
+		}
+	}
+	if (skb->ip_summed == CHECKSUM_NONE) {
+		if (csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
+				    skb_checksum(skb, 0, skb->len, 0))) {
+			LIMIT_NETDEBUG(
+				printk(KERN_DEBUG "ICMPv6 checksum failed [%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x > %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n",
+				       NIP6(*saddr), NIP6(*daddr)));
+			goto discard_it;
+		}
+	}
+
+	if (!pskb_pull(skb, sizeof(struct icmp6hdr)))
+		goto discard_it;
+
+	hdr = (struct icmp6hdr *) skb->h.raw;
+
+	type = hdr->icmp6_type;
+
+	if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB)
+		ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_INDESTUNREACHS, type - ICMPV6_DEST_UNREACH);
+	else if (type >= ICMPV6_ECHO_REQUEST && type <= NDISC_REDIRECT)
+		ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_INECHOS, type - ICMPV6_ECHO_REQUEST);
+
+	switch (type) {
+	case ICMPV6_ECHO_REQUEST:
+		icmpv6_echo_reply(skb);
+		break;
+
+	case ICMPV6_ECHO_REPLY:
+		/* we couldn't care less */
+		break;
+
+	case ICMPV6_PKT_TOOBIG:
+		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
+		   standard destination cache. Seems, only "advanced"
+		   destination cache will allow to solve this problem
+		   --ANK (980726)
+		 */
+		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+			goto discard_it;
+		hdr = (struct icmp6hdr *) skb->h.raw;
+		orig_hdr = (struct ipv6hdr *) (hdr + 1);
+		rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
+				   ntohl(hdr->icmp6_mtu));
+
+		/*
+		 *	Drop through to notify
+		 */
+
+	case ICMPV6_DEST_UNREACH:
+	case ICMPV6_TIME_EXCEED:
+	case ICMPV6_PARAMPROB:
+		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
+		break;
+
+	case NDISC_ROUTER_SOLICITATION:
+	case NDISC_ROUTER_ADVERTISEMENT:
+	case NDISC_NEIGHBOUR_SOLICITATION:
+	case NDISC_NEIGHBOUR_ADVERTISEMENT:
+	case NDISC_REDIRECT:
+		ndisc_rcv(skb);
+		break;
+
+	case ICMPV6_MGM_QUERY:
+		igmp6_event_query(skb);
+		break;
+
+	case ICMPV6_MGM_REPORT:
+		igmp6_event_report(skb);
+		break;
+
+	case ICMPV6_MGM_REDUCTION:
+	case ICMPV6_NI_QUERY:
+	case ICMPV6_NI_REPLY:
+	case ICMPV6_MLD2_REPORT:
+	case ICMPV6_DHAAD_REQUEST:
+	case ICMPV6_DHAAD_REPLY:
+	case ICMPV6_MOBILE_PREFIX_SOL:
+	case ICMPV6_MOBILE_PREFIX_ADV:
+		break;
+
+	default:
+		LIMIT_NETDEBUG(
+			printk(KERN_DEBUG "icmpv6: msg of unknown type\n"));
+
+		/* informational */
+		if (type & ICMPV6_INFOMSG_MASK)
+			break;
+
+		/* 
+		 * error of unknown type. 
+		 * must pass to upper level 
+		 */
+
+		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
+	};
+	kfree_skb(skb);
+	return 0;
+
+discard_it:
+	ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS);
+	kfree_skb(skb);
+	return 0;
+}
+
+int __init icmpv6_init(struct net_proto_family *ops)
+{
+	struct sock *sk;
+	int err, i, j;
+
+	for (i = 0; i < NR_CPUS; i++) {
+		if (!cpu_possible(i))
+			continue;
+
+		err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6,
+				       &per_cpu(__icmpv6_socket, i));
+		if (err < 0) {
+			printk(KERN_ERR
+			       "Failed to initialize the ICMP6 control socket "
+			       "(err %d).\n",
+			       err);
+			goto fail;
+		}
+
+		sk = per_cpu(__icmpv6_socket, i)->sk;
+		sk->sk_allocation = GFP_ATOMIC;
+
+		/* Enough space for 2 64K ICMP packets, including
+		 * sk_buff struct overhead.
+		 */
+		sk->sk_sndbuf =
+			(2 * ((64 * 1024) + sizeof(struct sk_buff)));
+
+		sk->sk_prot->unhash(sk);
+	}
+
+
+	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) {
+		printk(KERN_ERR "Failed to register ICMP6 protocol\n");
+		err = -EAGAIN;
+		goto fail;
+	}
+
+	return 0;
+
+ fail:
+	for (j = 0; j < i; j++) {
+		if (!cpu_possible(j))
+			continue;
+		sock_release(per_cpu(__icmpv6_socket, j));
+	}
+
+	return err;
+}
+
+void icmpv6_cleanup(void)
+{
+	int i;
+
+	for (i = 0; i < NR_CPUS; i++) {
+		if (!cpu_possible(i))
+			continue;
+		sock_release(per_cpu(__icmpv6_socket, i));
+	}
+	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
+}
+
+static struct icmp6_err {
+	int err;
+	int fatal;
+} tab_unreach[] = {
+	{	/* NOROUTE */
+		.err	= ENETUNREACH,
+		.fatal	= 0,
+	},
+	{	/* ADM_PROHIBITED */
+		.err	= EACCES,
+		.fatal	= 1,
+	},
+	{	/* Was NOT_NEIGHBOUR, now reserved */
+		.err	= EHOSTUNREACH,
+		.fatal	= 0,
+	},
+	{	/* ADDR_UNREACH	*/
+		.err	= EHOSTUNREACH,
+		.fatal	= 0,
+	},
+	{	/* PORT_UNREACH	*/
+		.err	= ECONNREFUSED,
+		.fatal	= 1,
+	},
+};
+
+int icmpv6_err_convert(int type, int code, int *err)
+{
+	int fatal = 0;
+
+	*err = EPROTO;
+
+	switch (type) {
+	case ICMPV6_DEST_UNREACH:
+		fatal = 1;
+		if (code <= ICMPV6_PORT_UNREACH) {
+			*err  = tab_unreach[code].err;
+			fatal = tab_unreach[code].fatal;
+		}
+		break;
+
+	case ICMPV6_PKT_TOOBIG:
+		*err = EMSGSIZE;
+		break;
+		
+	case ICMPV6_PARAMPROB:
+		*err = EPROTO;
+		fatal = 1;
+		break;
+
+	case ICMPV6_TIME_EXCEED:
+		*err = EHOSTUNREACH;
+		break;
+	};
+
+	return fatal;
+}
+
+#ifdef CONFIG_SYSCTL
+ctl_table ipv6_icmp_table[] = {
+	{
+		.ctl_name	= NET_IPV6_ICMP_RATELIMIT,
+		.procname	= "ratelimit",
+		.data		= &sysctl_icmpv6_time,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{ .ctl_name = 0 },
+};
+#endif
+
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
new file mode 100644
index 00000000000..405740b75ab
--- /dev/null
+++ b/net/ipv6/ip6_fib.c
@@ -0,0 +1,1225 @@
+/*
+ *	Linux INET6 implementation 
+ *	Forwarding Information Database
+ *
+ *	Authors:
+ *	Pedro Roque		<roque@di.fc.ul.pt>	
+ *
+ *	$Id: ip6_fib.c,v 1.25 2001/10/31 21:55:55 davem Exp $
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+/*
+ * 	Changes:
+ * 	Yuji SEKIYA @USAGI:	Support default route on router node;
+ * 				remove ip6_null_entry from the top of
+ * 				routing table.
+ */
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/net.h>
+#include <linux/route.h>
+#include <linux/netdevice.h>
+#include <linux/in6.h>
+#include <linux/init.h>
+
+#ifdef 	CONFIG_PROC_FS
+#include <linux/proc_fs.h>
+#endif
+
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+#include <net/addrconf.h>
+
+#include <net/ip6_fib.h>
+#include <net/ip6_route.h>
+
+#define RT6_DEBUG 2
+
+#if RT6_DEBUG >= 3
+#define RT6_TRACE(x...) printk(KERN_DEBUG x)
+#else
+#define RT6_TRACE(x...) do { ; } while (0)
+#endif
+
+struct rt6_statistics	rt6_stats;
+
+static kmem_cache_t * fib6_node_kmem;
+
+enum fib_walk_state_t
+{
+#ifdef CONFIG_IPV6_SUBTREES
+	FWS_S,
+#endif
+	FWS_L,
+	FWS_R,
+	FWS_C,
+	FWS_U
+};
+
+struct fib6_cleaner_t
+{
+	struct fib6_walker_t w;
+	int (*func)(struct rt6_info *, void *arg);
+	void *arg;
+};
+
+DEFINE_RWLOCK(fib6_walker_lock);
+
+
+#ifdef CONFIG_IPV6_SUBTREES
+#define FWS_INIT FWS_S
+#define SUBTREE(fn) ((fn)->subtree)
+#else
+#define FWS_INIT FWS_L
+#define SUBTREE(fn) NULL
+#endif
+
+static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt);
+static struct fib6_node * fib6_repair_tree(struct fib6_node *fn);
+
+/*
+ *	A routing update causes an increase of the serial number on the
+ *	affected subtree. This allows for cached routes to be asynchronously
+ *	tested when modifications are made to the destination cache as a
+ *	result of redirects, path MTU changes, etc.
+ */
+
+static __u32 rt_sernum;
+
+static struct timer_list ip6_fib_timer = TIMER_INITIALIZER(fib6_run_gc, 0, 0);
+
+struct fib6_walker_t fib6_walker_list = {
+	.prev	= &fib6_walker_list,
+	.next	= &fib6_walker_list, 
+};
+
+#define FOR_WALKERS(w) for ((w)=fib6_walker_list.next; (w) != &fib6_walker_list; (w)=(w)->next)
+
+static __inline__ u32 fib6_new_sernum(void)
+{
+	u32 n = ++rt_sernum;
+	if ((__s32)n <= 0)
+		rt_sernum = n = 1;
+	return n;
+}
+
+/*
+ *	Auxiliary address test functions for the radix tree.
+ *
+ *	These assume a 32bit processor (although it will work on 
+ *	64bit processors)
+ */
+
+/*
+ *	test bit
+ */
+
+static __inline__ int addr_bit_set(void *token, int fn_bit)
+{
+	__u32 *addr = token;
+
+	return htonl(1 << ((~fn_bit)&0x1F)) & addr[fn_bit>>5];
+}
+
+/*
+ *	find the first different bit between two addresses
+ *	length of address must be a multiple of 32bits
+ */
+
+static __inline__ int addr_diff(void *token1, void *token2, int addrlen)
+{
+	__u32 *a1 = token1;
+	__u32 *a2 = token2;
+	int i;
+
+	addrlen >>= 2;
+
+	for (i = 0; i < addrlen; i++) {
+		__u32 xb;
+
+		xb = a1[i] ^ a2[i];
+
+		if (xb) {
+			int j = 31;
+
+			xb = ntohl(xb);
+
+			while ((xb & (1 << j)) == 0)
+				j--;
+
+			return (i * 32 + 31 - j);
+		}
+	}
+
+	/*
+	 *	we should *never* get to this point since that 
+	 *	would mean the addrs are equal
+	 *
+	 *	However, we do get to it 8) And exacly, when
+	 *	addresses are equal 8)
+	 *
+	 *	ip route add 1111::/128 via ...
+	 *	ip route add 1111::/64 via ...
+	 *	and we are here.
+	 *
+	 *	Ideally, this function should stop comparison
+	 *	at prefix length. It does not, but it is still OK,
+	 *	if returned value is greater than prefix length.
+	 *					--ANK (980803)
+	 */
+
+	return addrlen<<5;
+}
+
+static __inline__ struct fib6_node * node_alloc(void)
+{
+	struct fib6_node *fn;
+
+	if ((fn = kmem_cache_alloc(fib6_node_kmem, SLAB_ATOMIC)) != NULL)
+		memset(fn, 0, sizeof(struct fib6_node));
+
+	return fn;
+}
+
+static __inline__ void node_free(struct fib6_node * fn)
+{
+	kmem_cache_free(fib6_node_kmem, fn);
+}
+
+static __inline__ void rt6_release(struct rt6_info *rt)
+{
+	if (atomic_dec_and_test(&rt->rt6i_ref))
+		dst_free(&rt->u.dst);
+}
+
+
+/*
+ *	Routing Table
+ *
+ *	return the appropriate node for a routing tree "add" operation
+ *	by either creating and inserting or by returning an existing
+ *	node.
+ */
+
+static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
+				     int addrlen, int plen,
+				     int offset)
+{
+	struct fib6_node *fn, *in, *ln;
+	struct fib6_node *pn = NULL;
+	struct rt6key *key;
+	int	bit;
+       	int	dir = 0;
+	__u32	sernum = fib6_new_sernum();
+
+	RT6_TRACE("fib6_add_1\n");
+
+	/* insert node in tree */
+
+	fn = root;
+
+	do {
+		key = (struct rt6key *)((u8 *)fn->leaf + offset);
+
+		/*
+		 *	Prefix match
+		 */
+		if (plen < fn->fn_bit ||
+		    !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
+			goto insert_above;
+		
+		/*
+		 *	Exact match ?
+		 */
+			 
+		if (plen == fn->fn_bit) {
+			/* clean up an intermediate node */
+			if ((fn->fn_flags & RTN_RTINFO) == 0) {
+				rt6_release(fn->leaf);
+				fn->leaf = NULL;
+			}
+			
+			fn->fn_sernum = sernum;
+				
+			return fn;
+		}
+
+		/*
+		 *	We have more bits to go
+		 */
+			 
+		/* Try to walk down on tree. */
+		fn->fn_sernum = sernum;
+		dir = addr_bit_set(addr, fn->fn_bit);
+		pn = fn;
+		fn = dir ? fn->right: fn->left;
+	} while (fn);
+
+	/*
+	 *	We walked to the bottom of tree.
+	 *	Create new leaf node without children.
+	 */
+
+	ln = node_alloc();
+
+	if (ln == NULL)
+		return NULL;
+	ln->fn_bit = plen;
+			
+	ln->parent = pn;
+	ln->fn_sernum = sernum;
+
+	if (dir)
+		pn->right = ln;
+	else
+		pn->left  = ln;
+
+	return ln;
+
+
+insert_above:
+	/*
+	 * split since we don't have a common prefix anymore or 
+	 * we have a less significant route.
+	 * we've to insert an intermediate node on the list
+	 * this new node will point to the one we need to create
+	 * and the current
+	 */
+
+	pn = fn->parent;
+
+	/* find 1st bit in difference between the 2 addrs.
+
+	   See comment in addr_diff: bit may be an invalid value,
+	   but if it is >= plen, the value is ignored in any case.
+	 */
+	
+	bit = addr_diff(addr, &key->addr, addrlen);
+
+	/* 
+	 *		(intermediate)[in]	
+	 *	          /	   \
+	 *	(new leaf node)[ln] (old node)[fn]
+	 */
+	if (plen > bit) {
+		in = node_alloc();
+		ln = node_alloc();
+		
+		if (in == NULL || ln == NULL) {
+			if (in)
+				node_free(in);
+			if (ln)
+				node_free(ln);
+			return NULL;
+		}
+
+		/* 
+		 * new intermediate node. 
+		 * RTN_RTINFO will
+		 * be off since that an address that chooses one of
+		 * the branches would not match less specific routes
+		 * in the other branch
+		 */
+
+		in->fn_bit = bit;
+
+		in->parent = pn;
+		in->leaf = fn->leaf;
+		atomic_inc(&in->leaf->rt6i_ref);
+
+		in->fn_sernum = sernum;
+
+		/* update parent pointer */
+		if (dir)
+			pn->right = in;
+		else
+			pn->left  = in;
+
+		ln->fn_bit = plen;
+
+		ln->parent = in;
+		fn->parent = in;
+
+		ln->fn_sernum = sernum;
+
+		if (addr_bit_set(addr, bit)) {
+			in->right = ln;
+			in->left  = fn;
+		} else {
+			in->left  = ln;
+			in->right = fn;
+		}
+	} else { /* plen <= bit */
+
+		/* 
+		 *		(new leaf node)[ln]
+		 *	          /	   \
+		 *	     (old node)[fn] NULL
+		 */
+
+		ln = node_alloc();
+
+		if (ln == NULL)
+			return NULL;
+
+		ln->fn_bit = plen;
+
+		ln->parent = pn;
+
+		ln->fn_sernum = sernum;
+		
+		if (dir)
+			pn->right = ln;
+		else
+			pn->left  = ln;
+
+		if (addr_bit_set(&key->addr, plen))
+			ln->right = fn;
+		else
+			ln->left  = fn;
+
+		fn->parent = ln;
+	}
+	return ln;
+}
+
+/*
+ *	Insert routing information in a node.
+ */
+
+static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
+    struct nlmsghdr *nlh)
+{
+	struct rt6_info *iter = NULL;
+	struct rt6_info **ins;
+
+	ins = &fn->leaf;
+
+	if (fn->fn_flags&RTN_TL_ROOT &&
+	    fn->leaf == &ip6_null_entry &&
+	    !(rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ){
+		fn->leaf = rt;
+		rt->u.next = NULL;
+		goto out;
+	}
+
+	for (iter = fn->leaf; iter; iter=iter->u.next) {
+		/*
+		 *	Search for duplicates
+		 */
+
+		if (iter->rt6i_metric == rt->rt6i_metric) {
+			/*
+			 *	Same priority level
+			 */
+
+			if (iter->rt6i_dev == rt->rt6i_dev &&
+			    iter->rt6i_idev == rt->rt6i_idev &&
+			    ipv6_addr_equal(&iter->rt6i_gateway,
+					    &rt->rt6i_gateway)) {
+				if (!(iter->rt6i_flags&RTF_EXPIRES))
+					return -EEXIST;
+				iter->rt6i_expires = rt->rt6i_expires;
+				if (!(rt->rt6i_flags&RTF_EXPIRES)) {
+					iter->rt6i_flags &= ~RTF_EXPIRES;
+					iter->rt6i_expires = 0;
+				}
+				return -EEXIST;
+			}
+		}
+
+		if (iter->rt6i_metric > rt->rt6i_metric)
+			break;
+
+		ins = &iter->u.next;
+	}
+
+	/*
+	 *	insert node
+	 */
+
+out:
+	rt->u.next = iter;
+	*ins = rt;
+	rt->rt6i_node = fn;
+	atomic_inc(&rt->rt6i_ref);
+	inet6_rt_notify(RTM_NEWROUTE, rt, nlh);
+	rt6_stats.fib_rt_entries++;
+
+	if ((fn->fn_flags & RTN_RTINFO) == 0) {
+		rt6_stats.fib_route_nodes++;
+		fn->fn_flags |= RTN_RTINFO;
+	}
+
+	return 0;
+}
+
+static __inline__ void fib6_start_gc(struct rt6_info *rt)
+{
+	if (ip6_fib_timer.expires == 0 &&
+	    (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE)))
+		mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
+}
+
+void fib6_force_start_gc(void)
+{
+	if (ip6_fib_timer.expires == 0)
+		mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
+}
+
+/*
+ *	Add routing information to the routing tree.
+ *	<destination addr>/<source addr>
+ *	with source addr info in sub-trees
+ */
+
+int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
+{
+	struct fib6_node *fn;
+	int err = -ENOMEM;
+
+	fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr),
+			rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst));
+
+	if (fn == NULL)
+		goto out;
+
+#ifdef CONFIG_IPV6_SUBTREES
+	if (rt->rt6i_src.plen) {
+		struct fib6_node *sn;
+
+		if (fn->subtree == NULL) {
+			struct fib6_node *sfn;
+
+			/*
+			 * Create subtree.
+			 *
+			 *		fn[main tree]
+			 *		|
+			 *		sfn[subtree root]
+			 *		   \
+			 *		    sn[new leaf node]
+			 */
+
+			/* Create subtree root node */
+			sfn = node_alloc();
+			if (sfn == NULL)
+				goto st_failure;
+
+			sfn->leaf = &ip6_null_entry;
+			atomic_inc(&ip6_null_entry.rt6i_ref);
+			sfn->fn_flags = RTN_ROOT;
+			sfn->fn_sernum = fib6_new_sernum();
+
+			/* Now add the first leaf node to new subtree */
+
+			sn = fib6_add_1(sfn, &rt->rt6i_src.addr,
+					sizeof(struct in6_addr), rt->rt6i_src.plen,
+					offsetof(struct rt6_info, rt6i_src));
+
+			if (sn == NULL) {
+				/* If it is failed, discard just allocated
+				   root, and then (in st_failure) stale node
+				   in main tree.
+				 */
+				node_free(sfn);
+				goto st_failure;
+			}
+
+			/* Now link new subtree to main tree */
+			sfn->parent = fn;
+			fn->subtree = sfn;
+			if (fn->leaf == NULL) {
+				fn->leaf = rt;
+				atomic_inc(&rt->rt6i_ref);
+			}
+		} else {
+			sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
+					sizeof(struct in6_addr), rt->rt6i_src.plen,
+					offsetof(struct rt6_info, rt6i_src));
+
+			if (sn == NULL)
+				goto st_failure;
+		}
+
+		fn = sn;
+	}
+#endif
+
+	err = fib6_add_rt2node(fn, rt, nlh);
+
+	if (err == 0) {
+		fib6_start_gc(rt);
+		if (!(rt->rt6i_flags&RTF_CACHE))
+			fib6_prune_clones(fn, rt);
+	}
+
+out:
+	if (err)
+		dst_free(&rt->u.dst);
+	return err;
+
+#ifdef CONFIG_IPV6_SUBTREES
+	/* Subtree creation failed, probably main tree node
+	   is orphan. If it is, shoot it.
+	 */
+st_failure:
+	if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
+		fib6_repair_tree(fn);
+	dst_free(&rt->u.dst);
+	return err;
+#endif
+}
+
+/*
+ *	Routing tree lookup
+ *
+ */
+
+struct lookup_args {
+	int		offset;		/* key offset on rt6_info	*/
+	struct in6_addr	*addr;		/* search key			*/
+};
+
+static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
+					struct lookup_args *args)
+{
+	struct fib6_node *fn;
+	int dir;
+
+	/*
+	 *	Descend on a tree
+	 */
+
+	fn = root;
+
+	for (;;) {
+		struct fib6_node *next;
+
+		dir = addr_bit_set(args->addr, fn->fn_bit);
+
+		next = dir ? fn->right : fn->left;
+
+		if (next) {
+			fn = next;
+			continue;
+		}
+
+		break;
+	}
+
+	while ((fn->fn_flags & RTN_ROOT) == 0) {
+#ifdef CONFIG_IPV6_SUBTREES
+		if (fn->subtree) {
+			struct fib6_node *st;
+			struct lookup_args *narg;
+
+			narg = args + 1;
+
+			if (narg->addr) {
+				st = fib6_lookup_1(fn->subtree, narg);
+
+				if (st && !(st->fn_flags & RTN_ROOT))
+					return st;
+			}
+		}
+#endif
+
+		if (fn->fn_flags & RTN_RTINFO) {
+			struct rt6key *key;
+
+			key = (struct rt6key *) ((u8 *) fn->leaf +
+						 args->offset);
+
+			if (ipv6_prefix_equal(&key->addr, args->addr, key->plen))
+				return fn;
+		}
+
+		fn = fn->parent;
+	}
+
+	return NULL;
+}
+
+struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr,
+			       struct in6_addr *saddr)
+{
+	struct lookup_args args[2];
+	struct fib6_node *fn;
+
+	args[0].offset = offsetof(struct rt6_info, rt6i_dst);
+	args[0].addr = daddr;
+
+#ifdef CONFIG_IPV6_SUBTREES
+	args[1].offset = offsetof(struct rt6_info, rt6i_src);
+	args[1].addr = saddr;
+#endif
+
+	fn = fib6_lookup_1(root, args);
+
+	if (fn == NULL || fn->fn_flags & RTN_TL_ROOT)
+		fn = root;
+
+	return fn;
+}
+
+/*
+ *	Get node with specified destination prefix (and source prefix,
+ *	if subtrees are used)
+ */
+
+
+static struct fib6_node * fib6_locate_1(struct fib6_node *root,
+					struct in6_addr *addr,
+					int plen, int offset)
+{
+	struct fib6_node *fn;
+
+	for (fn = root; fn ; ) {
+		struct rt6key *key = (struct rt6key *)((u8 *)fn->leaf + offset);
+
+		/*
+		 *	Prefix match
+		 */
+		if (plen < fn->fn_bit ||
+		    !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
+			return NULL;
+
+		if (plen == fn->fn_bit)
+			return fn;
+
+		/*
+		 *	We have more bits to go
+		 */
+		if (addr_bit_set(addr, fn->fn_bit))
+			fn = fn->right;
+		else
+			fn = fn->left;
+	}
+	return NULL;
+}
+
+struct fib6_node * fib6_locate(struct fib6_node *root,
+			       struct in6_addr *daddr, int dst_len,
+			       struct in6_addr *saddr, int src_len)
+{
+	struct fib6_node *fn;
+
+	fn = fib6_locate_1(root, daddr, dst_len,
+			   offsetof(struct rt6_info, rt6i_dst));
+
+#ifdef CONFIG_IPV6_SUBTREES
+	if (src_len) {
+		BUG_TRAP(saddr!=NULL);
+		if (fn == NULL)
+			fn = fn->subtree;
+		if (fn)
+			fn = fib6_locate_1(fn, saddr, src_len,
+					   offsetof(struct rt6_info, rt6i_src));
+	}
+#endif
+
+	if (fn && fn->fn_flags&RTN_RTINFO)
+		return fn;
+
+	return NULL;
+}
+
+
+/*
+ *	Deletion
+ *
+ */
+
+static struct rt6_info * fib6_find_prefix(struct fib6_node *fn)
+{
+	if (fn->fn_flags&RTN_ROOT)
+		return &ip6_null_entry;
+
+	while(fn) {
+		if(fn->left)
+			return fn->left->leaf;
+
+		if(fn->right)
+			return fn->right->leaf;
+
+		fn = SUBTREE(fn);
+	}
+	return NULL;
+}
+
+/*
+ *	Called to trim the tree of intermediate nodes when possible. "fn"
+ *	is the node we want to try and remove.
+ */
+
+static struct fib6_node * fib6_repair_tree(struct fib6_node *fn)
+{
+	int children;
+	int nstate;
+	struct fib6_node *child, *pn;
+	struct fib6_walker_t *w;
+	int iter = 0;
+
+	for (;;) {
+		RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
+		iter++;
+
+		BUG_TRAP(!(fn->fn_flags&RTN_RTINFO));
+		BUG_TRAP(!(fn->fn_flags&RTN_TL_ROOT));
+		BUG_TRAP(fn->leaf==NULL);
+
+		children = 0;
+		child = NULL;
+		if (fn->right) child = fn->right, children |= 1;
+		if (fn->left) child = fn->left, children |= 2;
+
+		if (children == 3 || SUBTREE(fn) 
+#ifdef CONFIG_IPV6_SUBTREES
+		    /* Subtree root (i.e. fn) may have one child */
+		    || (children && fn->fn_flags&RTN_ROOT)
+#endif
+		    ) {
+			fn->leaf = fib6_find_prefix(fn);
+#if RT6_DEBUG >= 2
+			if (fn->leaf==NULL) {
+				BUG_TRAP(fn->leaf);
+				fn->leaf = &ip6_null_entry;
+			}
+#endif
+			atomic_inc(&fn->leaf->rt6i_ref);
+			return fn->parent;
+		}
+
+		pn = fn->parent;
+#ifdef CONFIG_IPV6_SUBTREES
+		if (SUBTREE(pn) == fn) {
+			BUG_TRAP(fn->fn_flags&RTN_ROOT);
+			SUBTREE(pn) = NULL;
+			nstate = FWS_L;
+		} else {
+			BUG_TRAP(!(fn->fn_flags&RTN_ROOT));
+#endif
+			if (pn->right == fn) pn->right = child;
+			else if (pn->left == fn) pn->left = child;
+#if RT6_DEBUG >= 2
+			else BUG_TRAP(0);
+#endif
+			if (child)
+				child->parent = pn;
+			nstate = FWS_R;
+#ifdef CONFIG_IPV6_SUBTREES
+		}
+#endif
+
+		read_lock(&fib6_walker_lock);
+		FOR_WALKERS(w) {
+			if (child == NULL) {
+				if (w->root == fn) {
+					w->root = w->node = NULL;
+					RT6_TRACE("W %p adjusted by delroot 1\n", w);
+				} else if (w->node == fn) {
+					RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate);
+					w->node = pn;
+					w->state = nstate;
+				}
+			} else {
+				if (w->root == fn) {
+					w->root = child;
+					RT6_TRACE("W %p adjusted by delroot 2\n", w);
+				}
+				if (w->node == fn) {
+					w->node = child;
+					if (children&2) {
+						RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
+						w->state = w->state>=FWS_R ? FWS_U : FWS_INIT;
+					} else {
+						RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
+						w->state = w->state>=FWS_C ? FWS_U : FWS_INIT;
+					}
+				}
+			}
+		}
+		read_unlock(&fib6_walker_lock);
+
+		node_free(fn);
+		if (pn->fn_flags&RTN_RTINFO || SUBTREE(pn))
+			return pn;
+
+		rt6_release(pn->leaf);
+		pn->leaf = NULL;
+		fn = pn;
+	}
+}
+
+static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
+    struct nlmsghdr *nlh, void *_rtattr)
+{
+	struct fib6_walker_t *w;
+	struct rt6_info *rt = *rtp;
+
+	RT6_TRACE("fib6_del_route\n");
+
+	/* Unlink it */
+	*rtp = rt->u.next;
+	rt->rt6i_node = NULL;
+	rt6_stats.fib_rt_entries--;
+	rt6_stats.fib_discarded_routes++;
+
+	/* Adjust walkers */
+	read_lock(&fib6_walker_lock);
+	FOR_WALKERS(w) {
+		if (w->state == FWS_C && w->leaf == rt) {
+			RT6_TRACE("walker %p adjusted by delroute\n", w);
+			w->leaf = rt->u.next;
+			if (w->leaf == NULL)
+				w->state = FWS_U;
+		}
+	}
+	read_unlock(&fib6_walker_lock);
+
+	rt->u.next = NULL;
+
+	if (fn->leaf == NULL && fn->fn_flags&RTN_TL_ROOT)
+		fn->leaf = &ip6_null_entry;
+
+	/* If it was last route, expunge its radix tree node */
+	if (fn->leaf == NULL) {
+		fn->fn_flags &= ~RTN_RTINFO;
+		rt6_stats.fib_route_nodes--;
+		fn = fib6_repair_tree(fn);
+	}
+
+	if (atomic_read(&rt->rt6i_ref) != 1) {
+		/* This route is used as dummy address holder in some split
+		 * nodes. It is not leaked, but it still holds other resources,
+		 * which must be released in time. So, scan ascendant nodes
+		 * and replace dummy references to this route with references
+		 * to still alive ones.
+		 */
+		while (fn) {
+			if (!(fn->fn_flags&RTN_RTINFO) && fn->leaf == rt) {
+				fn->leaf = fib6_find_prefix(fn);
+				atomic_inc(&fn->leaf->rt6i_ref);
+				rt6_release(rt);
+			}
+			fn = fn->parent;
+		}
+		/* No more references are possible at this point. */
+		if (atomic_read(&rt->rt6i_ref) != 1) BUG();
+	}
+
+	inet6_rt_notify(RTM_DELROUTE, rt, nlh);
+	rt6_release(rt);
+}
+
+int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
+{
+	struct fib6_node *fn = rt->rt6i_node;
+	struct rt6_info **rtp;
+
+#if RT6_DEBUG >= 2
+	if (rt->u.dst.obsolete>0) {
+		BUG_TRAP(fn==NULL);
+		return -ENOENT;
+	}
+#endif
+	if (fn == NULL || rt == &ip6_null_entry)
+		return -ENOENT;
+
+	BUG_TRAP(fn->fn_flags&RTN_RTINFO);
+
+	if (!(rt->rt6i_flags&RTF_CACHE))
+		fib6_prune_clones(fn, rt);
+
+	/*
+	 *	Walk the leaf entries looking for ourself
+	 */
+
+	for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.next) {
+		if (*rtp == rt) {
+			fib6_del_route(fn, rtp, nlh, _rtattr);
+			return 0;
+		}
+	}
+	return -ENOENT;
+}
+
+/*
+ *	Tree traversal function.
+ *
+ *	Certainly, it is not interrupt safe.
+ *	However, it is internally reenterable wrt itself and fib6_add/fib6_del.
+ *	It means, that we can modify tree during walking
+ *	and use this function for garbage collection, clone pruning,
+ *	cleaning tree when a device goes down etc. etc.	
+ *
+ *	It guarantees that every node will be traversed,
+ *	and that it will be traversed only once.
+ *
+ *	Callback function w->func may return:
+ *	0 -> continue walking.
+ *	positive value -> walking is suspended (used by tree dumps,
+ *	and probably by gc, if it will be split to several slices)
+ *	negative value -> terminate walking.
+ *
+ *	The function itself returns:
+ *	0   -> walk is complete.
+ *	>0  -> walk is incomplete (i.e. suspended)
+ *	<0  -> walk is terminated by an error.
+ */
+
+int fib6_walk_continue(struct fib6_walker_t *w)
+{
+	struct fib6_node *fn, *pn;
+
+	for (;;) {
+		fn = w->node;
+		if (fn == NULL)
+			return 0;
+
+		if (w->prune && fn != w->root &&
+		    fn->fn_flags&RTN_RTINFO && w->state < FWS_C) {
+			w->state = FWS_C;
+			w->leaf = fn->leaf;
+		}
+		switch (w->state) {
+#ifdef CONFIG_IPV6_SUBTREES
+		case FWS_S:
+			if (SUBTREE(fn)) {
+				w->node = SUBTREE(fn);
+				continue;
+			}
+			w->state = FWS_L;
+#endif	
+		case FWS_L:
+			if (fn->left) {
+				w->node = fn->left;
+				w->state = FWS_INIT;
+				continue;
+			}
+			w->state = FWS_R;
+		case FWS_R:
+			if (fn->right) {
+				w->node = fn->right;
+				w->state = FWS_INIT;
+				continue;
+			}
+			w->state = FWS_C;
+			w->leaf = fn->leaf;
+		case FWS_C:
+			if (w->leaf && fn->fn_flags&RTN_RTINFO) {
+				int err = w->func(w);
+				if (err)
+					return err;
+				continue;
+			}
+			w->state = FWS_U;
+		case FWS_U:
+			if (fn == w->root)
+				return 0;
+			pn = fn->parent;
+			w->node = pn;
+#ifdef CONFIG_IPV6_SUBTREES
+			if (SUBTREE(pn) == fn) {
+				BUG_TRAP(fn->fn_flags&RTN_ROOT);
+				w->state = FWS_L;
+				continue;
+			}
+#endif
+			if (pn->left == fn) {
+				w->state = FWS_R;
+				continue;
+			}
+			if (pn->right == fn) {
+				w->state = FWS_C;
+				w->leaf = w->node->leaf;
+				continue;
+			}
+#if RT6_DEBUG >= 2
+			BUG_TRAP(0);
+#endif
+		}
+	}
+}
+
+int fib6_walk(struct fib6_walker_t *w)
+{
+	int res;
+
+	w->state = FWS_INIT;
+	w->node = w->root;
+
+	fib6_walker_link(w);
+	res = fib6_walk_continue(w);
+	if (res <= 0)
+		fib6_walker_unlink(w);
+	return res;
+}
+
+static int fib6_clean_node(struct fib6_walker_t *w)
+{
+	int res;
+	struct rt6_info *rt;
+	struct fib6_cleaner_t *c = (struct fib6_cleaner_t*)w;
+
+	for (rt = w->leaf; rt; rt = rt->u.next) {
+		res = c->func(rt, c->arg);
+		if (res < 0) {
+			w->leaf = rt;
+			res = fib6_del(rt, NULL, NULL);
+			if (res) {
+#if RT6_DEBUG >= 2
+				printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res);
+#endif
+				continue;
+			}
+			return 0;
+		}
+		BUG_TRAP(res==0);
+	}
+	w->leaf = rt;
+	return 0;
+}
+
+/*
+ *	Convenient frontend to tree walker.
+ *	
+ *	func is called on each route.
+ *		It may return -1 -> delete this route.
+ *		              0  -> continue walking
+ *
+ *	prune==1 -> only immediate children of node (certainly,
+ *	ignoring pure split nodes) will be scanned.
+ */
+
+void fib6_clean_tree(struct fib6_node *root,
+		     int (*func)(struct rt6_info *, void *arg),
+		     int prune, void *arg)
+{
+	struct fib6_cleaner_t c;
+
+	c.w.root = root;
+	c.w.func = fib6_clean_node;
+	c.w.prune = prune;
+	c.func = func;
+	c.arg = arg;
+
+	fib6_walk(&c.w);
+}
+
+static int fib6_prune_clone(struct rt6_info *rt, void *arg)
+{
+	if (rt->rt6i_flags & RTF_CACHE) {
+		RT6_TRACE("pruning clone %p\n", rt);
+		return -1;
+	}
+
+	return 0;
+}
+
+static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt)
+{
+	fib6_clean_tree(fn, fib6_prune_clone, 1, rt);
+}
+
+/*
+ *	Garbage collection
+ */
+
+static struct fib6_gc_args
+{
+	int			timeout;
+	int			more;
+} gc_args;
+
+static int fib6_age(struct rt6_info *rt, void *arg)
+{
+	unsigned long now = jiffies;
+
+	/*
+	 *	check addrconf expiration here.
+	 *	Routes are expired even if they are in use.
+	 *
+	 *	Also age clones. Note, that clones are aged out
+	 *	only if they are not in use now.
+	 */
+
+	if (rt->rt6i_flags&RTF_EXPIRES && rt->rt6i_expires) {
+		if (time_after(now, rt->rt6i_expires)) {
+			RT6_TRACE("expiring %p\n", rt);
+			rt6_reset_dflt_pointer(rt);
+			return -1;
+		}
+		gc_args.more++;
+	} else if (rt->rt6i_flags & RTF_CACHE) {
+		if (atomic_read(&rt->u.dst.__refcnt) == 0 &&
+		    time_after_eq(now, rt->u.dst.lastuse + gc_args.timeout)) {
+			RT6_TRACE("aging clone %p\n", rt);
+			return -1;
+		} else if ((rt->rt6i_flags & RTF_GATEWAY) &&
+			   (!(rt->rt6i_nexthop->flags & NTF_ROUTER))) {
+			RT6_TRACE("purging route %p via non-router but gateway\n",
+				  rt);
+			return -1;
+		}
+		gc_args.more++;
+	}
+
+	return 0;
+}
+
+static DEFINE_SPINLOCK(fib6_gc_lock);
+
+void fib6_run_gc(unsigned long dummy)
+{
+	if (dummy != ~0UL) {
+		spin_lock_bh(&fib6_gc_lock);
+		gc_args.timeout = dummy ? (int)dummy : ip6_rt_gc_interval;
+	} else {
+		local_bh_disable();
+		if (!spin_trylock(&fib6_gc_lock)) {
+			mod_timer(&ip6_fib_timer, jiffies + HZ);
+			local_bh_enable();
+			return;
+		}
+		gc_args.timeout = ip6_rt_gc_interval;
+	}
+	gc_args.more = 0;
+
+
+	write_lock_bh(&rt6_lock);
+	ndisc_dst_gc(&gc_args.more);
+	fib6_clean_tree(&ip6_routing_table, fib6_age, 0, NULL);
+	write_unlock_bh(&rt6_lock);
+
+	if (gc_args.more)
+		mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
+	else {
+		del_timer(&ip6_fib_timer);
+		ip6_fib_timer.expires = 0;
+	}
+	spin_unlock_bh(&fib6_gc_lock);
+}
+
+void __init fib6_init(void)
+{
+	fib6_node_kmem = kmem_cache_create("fib6_nodes",
+					   sizeof(struct fib6_node),
+					   0, SLAB_HWCACHE_ALIGN,
+					   NULL, NULL);
+	if (!fib6_node_kmem)
+		panic("cannot create fib6_nodes cache");
+}
+
+void fib6_gc_cleanup(void)
+{
+	del_timer(&ip6_fib_timer);
+	kmem_cache_destroy(fib6_node_kmem);
+}
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
new file mode 100644
index 00000000000..a93f6dc5197
--- /dev/null
+++ b/net/ipv6/ip6_flowlabel.c
@@ -0,0 +1,706 @@
+/*
+ *	ip6_flowlabel.c		IPv6 flowlabel manager.
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ *
+ *	Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/in6.h>
+#include <linux/route.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+#include <net/sock.h>
+
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+#include <net/protocol.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/rawv6.h>
+#include <net/icmp.h>
+#include <net/transp_v6.h>
+
+#include <asm/uaccess.h>
+
+#define FL_MIN_LINGER	6	/* Minimal linger. It is set to 6sec specified
+				   in old IPv6 RFC. Well, it was reasonable value.
+				 */
+#define FL_MAX_LINGER	60	/* Maximal linger timeout */
+
+/* FL hash table */
+
+#define FL_MAX_PER_SOCK	32
+#define FL_MAX_SIZE	4096
+#define FL_HASH_MASK	255
+#define FL_HASH(l)	(ntohl(l)&FL_HASH_MASK)
+
+static atomic_t fl_size = ATOMIC_INIT(0);
+static struct ip6_flowlabel *fl_ht[FL_HASH_MASK+1];
+
+static void ip6_fl_gc(unsigned long dummy);
+static struct timer_list ip6_fl_gc_timer = TIMER_INITIALIZER(ip6_fl_gc, 0, 0);
+
+/* FL hash table lock: it protects only of GC */
+
+static DEFINE_RWLOCK(ip6_fl_lock);
+
+/* Big socket sock */
+
+static DEFINE_RWLOCK(ip6_sk_fl_lock);
+
+
+static __inline__ struct ip6_flowlabel * __fl_lookup(u32 label)
+{
+	struct ip6_flowlabel *fl;
+
+	for (fl=fl_ht[FL_HASH(label)]; fl; fl = fl->next) {
+		if (fl->label == label)
+			return fl;
+	}
+	return NULL;
+}
+
+static struct ip6_flowlabel * fl_lookup(u32 label)
+{
+	struct ip6_flowlabel *fl;
+
+	read_lock_bh(&ip6_fl_lock);
+	fl = __fl_lookup(label);
+	if (fl)
+		atomic_inc(&fl->users);
+	read_unlock_bh(&ip6_fl_lock);
+	return fl;
+}
+
+
+static void fl_free(struct ip6_flowlabel *fl)
+{
+	if (fl)
+		kfree(fl->opt);
+	kfree(fl);
+}
+
+static void fl_release(struct ip6_flowlabel *fl)
+{
+	write_lock_bh(&ip6_fl_lock);
+
+	fl->lastuse = jiffies;
+	if (atomic_dec_and_test(&fl->users)) {
+		unsigned long ttd = fl->lastuse + fl->linger;
+		if (time_after(ttd, fl->expires))
+			fl->expires = ttd;
+		ttd = fl->expires;
+		if (fl->opt && fl->share == IPV6_FL_S_EXCL) {
+			struct ipv6_txoptions *opt = fl->opt;
+			fl->opt = NULL;
+			kfree(opt);
+		}
+		if (!timer_pending(&ip6_fl_gc_timer) ||
+		    time_after(ip6_fl_gc_timer.expires, ttd))
+			mod_timer(&ip6_fl_gc_timer, ttd);
+	}
+
+	write_unlock_bh(&ip6_fl_lock);
+}
+
+static void ip6_fl_gc(unsigned long dummy)
+{
+	int i;
+	unsigned long now = jiffies;
+	unsigned long sched = 0;
+
+	write_lock(&ip6_fl_lock);
+
+	for (i=0; i<=FL_HASH_MASK; i++) {
+		struct ip6_flowlabel *fl, **flp;
+		flp = &fl_ht[i];
+		while ((fl=*flp) != NULL) {
+			if (atomic_read(&fl->users) == 0) {
+				unsigned long ttd = fl->lastuse + fl->linger;
+				if (time_after(ttd, fl->expires))
+					fl->expires = ttd;
+				ttd = fl->expires;
+				if (time_after_eq(now, ttd)) {
+					*flp = fl->next;
+					fl_free(fl);
+					atomic_dec(&fl_size);
+					continue;
+				}
+				if (!sched || time_before(ttd, sched))
+					sched = ttd;
+			}
+			flp = &fl->next;
+		}
+	}
+	if (!sched && atomic_read(&fl_size))
+		sched = now + FL_MAX_LINGER;
+	if (sched) {
+		ip6_fl_gc_timer.expires = sched;
+		add_timer(&ip6_fl_gc_timer);
+	}
+	write_unlock(&ip6_fl_lock);
+}
+
+static int fl_intern(struct ip6_flowlabel *fl, __u32 label)
+{
+	fl->label = label & IPV6_FLOWLABEL_MASK;
+
+	write_lock_bh(&ip6_fl_lock);
+	if (label == 0) {
+		for (;;) {
+			fl->label = htonl(net_random())&IPV6_FLOWLABEL_MASK;
+			if (fl->label) {
+				struct ip6_flowlabel *lfl;
+				lfl = __fl_lookup(fl->label);
+				if (lfl == NULL)
+					break;
+			}
+		}
+	}
+
+	fl->lastuse = jiffies;
+	fl->next = fl_ht[FL_HASH(fl->label)];
+	fl_ht[FL_HASH(fl->label)] = fl;
+	atomic_inc(&fl_size);
+	write_unlock_bh(&ip6_fl_lock);
+	return 0;
+}
+
+
+
+/* Socket flowlabel lists */
+
+struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, u32 label)
+{
+	struct ipv6_fl_socklist *sfl;
+	struct ipv6_pinfo *np = inet6_sk(sk);
+
+	label &= IPV6_FLOWLABEL_MASK;
+
+	for (sfl=np->ipv6_fl_list; sfl; sfl = sfl->next) {
+		struct ip6_flowlabel *fl = sfl->fl;
+		if (fl->label == label) {
+			fl->lastuse = jiffies;
+			atomic_inc(&fl->users);
+			return fl;
+		}
+	}
+	return NULL;
+}
+
+void fl6_free_socklist(struct sock *sk)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct ipv6_fl_socklist *sfl;
+
+	while ((sfl = np->ipv6_fl_list) != NULL) {
+		np->ipv6_fl_list = sfl->next;
+		fl_release(sfl->fl);
+		kfree(sfl);
+	}
+}
+
+/* Service routines */
+
+
+/*
+   It is the only difficult place. flowlabel enforces equal headers
+   before and including routing header, however user may supply options
+   following rthdr.
+ */
+
+struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions * opt_space,
+					 struct ip6_flowlabel * fl,
+					 struct ipv6_txoptions * fopt)
+{
+	struct ipv6_txoptions * fl_opt = fl->opt;
+
+	if (fopt == NULL || fopt->opt_flen == 0)
+		return fl_opt;
+
+	if (fl_opt != NULL) {
+		opt_space->hopopt = fl_opt->hopopt;
+		opt_space->dst0opt = fl_opt->dst0opt;
+		opt_space->srcrt = fl_opt->srcrt;
+		opt_space->opt_nflen = fl_opt->opt_nflen;
+	} else {
+		if (fopt->opt_nflen == 0)
+			return fopt;
+		opt_space->hopopt = NULL;
+		opt_space->dst0opt = NULL;
+		opt_space->srcrt = NULL;
+		opt_space->opt_nflen = 0;
+	}
+	opt_space->dst1opt = fopt->dst1opt;
+	opt_space->auth = fopt->auth;
+	opt_space->opt_flen = fopt->opt_flen;
+	return opt_space;
+}
+
+static unsigned long check_linger(unsigned long ttl)
+{
+	if (ttl < FL_MIN_LINGER)
+		return FL_MIN_LINGER*HZ;
+	if (ttl > FL_MAX_LINGER && !capable(CAP_NET_ADMIN))
+		return 0;
+	return ttl*HZ;
+}
+
+static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned long expires)
+{
+	linger = check_linger(linger);
+	if (!linger)
+		return -EPERM;
+	expires = check_linger(expires);
+	if (!expires)
+		return -EPERM;
+	fl->lastuse = jiffies;
+	if (time_before(fl->linger, linger))
+		fl->linger = linger;
+	if (time_before(expires, fl->linger))
+		expires = fl->linger;
+	if (time_before(fl->expires, fl->lastuse + expires))
+		fl->expires = fl->lastuse + expires;
+	return 0;
+}
+
+static struct ip6_flowlabel *
+fl_create(struct in6_flowlabel_req *freq, char __user *optval, int optlen, int *err_p)
+{
+	struct ip6_flowlabel *fl;
+	int olen;
+	int addr_type;
+	int err;
+
+	err = -ENOMEM;
+	fl = kmalloc(sizeof(*fl), GFP_KERNEL);
+	if (fl == NULL)
+		goto done;
+	memset(fl, 0, sizeof(*fl));
+
+	olen = optlen - CMSG_ALIGN(sizeof(*freq));
+	if (olen > 0) {
+		struct msghdr msg;
+		struct flowi flowi;
+		int junk;
+
+		err = -ENOMEM;
+		fl->opt = kmalloc(sizeof(*fl->opt) + olen, GFP_KERNEL);
+		if (fl->opt == NULL)
+			goto done;
+
+		memset(fl->opt, 0, sizeof(*fl->opt));
+		fl->opt->tot_len = sizeof(*fl->opt) + olen;
+		err = -EFAULT;
+		if (copy_from_user(fl->opt+1, optval+CMSG_ALIGN(sizeof(*freq)), olen))
+			goto done;
+
+		msg.msg_controllen = olen;
+		msg.msg_control = (void*)(fl->opt+1);
+		flowi.oif = 0;
+
+		err = datagram_send_ctl(&msg, &flowi, fl->opt, &junk);
+		if (err)
+			goto done;
+		err = -EINVAL;
+		if (fl->opt->opt_flen)
+			goto done;
+		if (fl->opt->opt_nflen == 0) {
+			kfree(fl->opt);
+			fl->opt = NULL;
+		}
+	}
+
+	fl->expires = jiffies;
+	err = fl6_renew(fl, freq->flr_linger, freq->flr_expires);
+	if (err)
+		goto done;
+	fl->share = freq->flr_share;
+	addr_type = ipv6_addr_type(&freq->flr_dst);
+	if ((addr_type&IPV6_ADDR_MAPPED)
+	    || addr_type == IPV6_ADDR_ANY)
+		goto done;
+	ipv6_addr_copy(&fl->dst, &freq->flr_dst);
+	atomic_set(&fl->users, 1);
+	switch (fl->share) {
+	case IPV6_FL_S_EXCL:
+	case IPV6_FL_S_ANY:
+		break;
+	case IPV6_FL_S_PROCESS:
+		fl->owner = current->pid;
+		break;
+	case IPV6_FL_S_USER:
+		fl->owner = current->euid;
+		break;
+	default:
+		err = -EINVAL;
+		goto done;
+	}
+	return fl;
+
+done:
+	fl_free(fl);
+	*err_p = err;
+	return NULL;
+}
+
+static int mem_check(struct sock *sk)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct ipv6_fl_socklist *sfl;
+	int room = FL_MAX_SIZE - atomic_read(&fl_size);
+	int count = 0;
+
+	if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK)
+		return 0;
+
+	for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next)
+		count++;
+
+	if (room <= 0 ||
+	    ((count >= FL_MAX_PER_SOCK ||
+	     (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4)
+	     && !capable(CAP_NET_ADMIN)))
+		return -ENOBUFS;
+
+	return 0;
+}
+
+static int ipv6_hdr_cmp(struct ipv6_opt_hdr *h1, struct ipv6_opt_hdr *h2)
+{
+	if (h1 == h2)
+		return 0;
+	if (h1 == NULL || h2 == NULL)
+		return 1;
+	if (h1->hdrlen != h2->hdrlen)
+		return 1;
+	return memcmp(h1+1, h2+1, ((h1->hdrlen+1)<<3) - sizeof(*h1));
+}
+
+static int ipv6_opt_cmp(struct ipv6_txoptions *o1, struct ipv6_txoptions *o2)
+{
+	if (o1 == o2)
+		return 0;
+	if (o1 == NULL || o2 == NULL)
+		return 1;
+	if (o1->opt_nflen != o2->opt_nflen)
+		return 1;
+	if (ipv6_hdr_cmp(o1->hopopt, o2->hopopt))
+		return 1;
+	if (ipv6_hdr_cmp(o1->dst0opt, o2->dst0opt))
+		return 1;
+	if (ipv6_hdr_cmp((struct ipv6_opt_hdr *)o1->srcrt, (struct ipv6_opt_hdr *)o2->srcrt))
+		return 1;
+	return 0;
+}
+
+int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
+{
+	int err;
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct in6_flowlabel_req freq;
+	struct ipv6_fl_socklist *sfl1=NULL;
+	struct ipv6_fl_socklist *sfl, **sflp;
+	struct ip6_flowlabel *fl;
+
+	if (optlen < sizeof(freq))
+		return -EINVAL;
+
+	if (copy_from_user(&freq, optval, sizeof(freq)))
+		return -EFAULT;
+
+	switch (freq.flr_action) {
+	case IPV6_FL_A_PUT:
+		write_lock_bh(&ip6_sk_fl_lock);
+		for (sflp = &np->ipv6_fl_list; (sfl=*sflp)!=NULL; sflp = &sfl->next) {
+			if (sfl->fl->label == freq.flr_label) {
+				if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK))
+					np->flow_label &= ~IPV6_FLOWLABEL_MASK;
+				*sflp = sfl->next;
+				write_unlock_bh(&ip6_sk_fl_lock);
+				fl_release(sfl->fl);
+				kfree(sfl);
+				return 0;
+			}
+		}
+		write_unlock_bh(&ip6_sk_fl_lock);
+		return -ESRCH;
+
+	case IPV6_FL_A_RENEW:
+		read_lock_bh(&ip6_sk_fl_lock);
+		for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) {
+			if (sfl->fl->label == freq.flr_label) {
+				err = fl6_renew(sfl->fl, freq.flr_linger, freq.flr_expires);
+				read_unlock_bh(&ip6_sk_fl_lock);
+				return err;
+			}
+		}
+		read_unlock_bh(&ip6_sk_fl_lock);
+
+		if (freq.flr_share == IPV6_FL_S_NONE && capable(CAP_NET_ADMIN)) {
+			fl = fl_lookup(freq.flr_label);
+			if (fl) {
+				err = fl6_renew(fl, freq.flr_linger, freq.flr_expires);
+				fl_release(fl);
+				return err;
+			}
+		}
+		return -ESRCH;
+
+	case IPV6_FL_A_GET:
+		if (freq.flr_label & ~IPV6_FLOWLABEL_MASK)
+			return -EINVAL;
+
+		fl = fl_create(&freq, optval, optlen, &err);
+		if (fl == NULL)
+			return err;
+		sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL);
+
+		if (freq.flr_label) {
+			struct ip6_flowlabel *fl1 = NULL;
+
+			err = -EEXIST;
+			read_lock_bh(&ip6_sk_fl_lock);
+			for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) {
+				if (sfl->fl->label == freq.flr_label) {
+					if (freq.flr_flags&IPV6_FL_F_EXCL) {
+						read_unlock_bh(&ip6_sk_fl_lock);
+						goto done;
+					}
+					fl1 = sfl->fl;
+					atomic_inc(&fl->users);
+					break;
+				}
+			}
+			read_unlock_bh(&ip6_sk_fl_lock);
+
+			if (fl1 == NULL)
+				fl1 = fl_lookup(freq.flr_label);
+			if (fl1) {
+				err = -EEXIST;
+				if (freq.flr_flags&IPV6_FL_F_EXCL)
+					goto release;
+				err = -EPERM;
+				if (fl1->share == IPV6_FL_S_EXCL ||
+				    fl1->share != fl->share ||
+				    fl1->owner != fl->owner)
+					goto release;
+
+				err = -EINVAL;
+				if (!ipv6_addr_equal(&fl1->dst, &fl->dst) ||
+				    ipv6_opt_cmp(fl1->opt, fl->opt))
+					goto release;
+
+				err = -ENOMEM;
+				if (sfl1 == NULL)
+					goto release;
+				if (fl->linger > fl1->linger)
+					fl1->linger = fl->linger;
+				if ((long)(fl->expires - fl1->expires) > 0)
+					fl1->expires = fl->expires;
+				write_lock_bh(&ip6_sk_fl_lock);
+				sfl1->fl = fl1;
+				sfl1->next = np->ipv6_fl_list;
+				np->ipv6_fl_list = sfl1;
+				write_unlock_bh(&ip6_sk_fl_lock);
+				fl_free(fl);
+				return 0;
+
+release:
+				fl_release(fl1);
+				goto done;
+			}
+		}
+		err = -ENOENT;
+		if (!(freq.flr_flags&IPV6_FL_F_CREATE))
+			goto done;
+
+		err = -ENOMEM;
+		if (sfl1 == NULL || (err = mem_check(sk)) != 0)
+			goto done;
+
+		err = fl_intern(fl, freq.flr_label);
+		if (err)
+			goto done;
+
+		/* Do not check for fault */
+		if (!freq.flr_label)
+			copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label,
+				     &fl->label, sizeof(fl->label));
+
+		sfl1->fl = fl;
+		sfl1->next = np->ipv6_fl_list;
+		np->ipv6_fl_list = sfl1;
+		return 0;
+
+	default:
+		return -EINVAL;
+	}
+
+done:
+	fl_free(fl);
+	kfree(sfl1);
+	return err;
+}
+
+#ifdef CONFIG_PROC_FS
+
+struct ip6fl_iter_state {
+	int bucket;
+};
+
+#define ip6fl_seq_private(seq)	((struct ip6fl_iter_state *)(seq)->private)
+
+static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq)
+{
+	struct ip6_flowlabel *fl = NULL;
+	struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
+
+	for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) {
+		if (fl_ht[state->bucket]) {
+			fl = fl_ht[state->bucket];
+			break;
+		}
+	}
+	return fl;
+}
+
+static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flowlabel *fl)
+{
+	struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
+
+	fl = fl->next;
+	while (!fl) {
+		if (++state->bucket <= FL_HASH_MASK)
+			fl = fl_ht[state->bucket];
+	}
+	return fl;
+}
+
+static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos)
+{
+	struct ip6_flowlabel *fl = ip6fl_get_first(seq);
+	if (fl)
+		while (pos && (fl = ip6fl_get_next(seq, fl)) != NULL)
+			--pos;
+	return pos ? NULL : fl;
+}
+
+static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	read_lock_bh(&ip6_fl_lock);
+	return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+}
+
+static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct ip6_flowlabel *fl;
+
+	if (v == SEQ_START_TOKEN)
+		fl = ip6fl_get_first(seq);
+	else
+		fl = ip6fl_get_next(seq, v);
+	++*pos;
+	return fl;
+}
+
+static void ip6fl_seq_stop(struct seq_file *seq, void *v)
+{
+	read_unlock_bh(&ip6_fl_lock);
+}
+
+static void ip6fl_fl_seq_show(struct seq_file *seq, struct ip6_flowlabel *fl)
+{
+	while(fl) {
+		seq_printf(seq,
+			   "%05X %-1d %-6d %-6d %-6ld %-8ld "
+			   "%02x%02x%02x%02x%02x%02x%02x%02x "
+			   "%-4d\n",
+			   (unsigned)ntohl(fl->label),
+			   fl->share,
+			   (unsigned)fl->owner,
+			   atomic_read(&fl->users),
+			   fl->linger/HZ,
+			   (long)(fl->expires - jiffies)/HZ,
+			   NIP6(fl->dst),
+			   fl->opt ? fl->opt->opt_nflen : 0);
+		fl = fl->next;
+	}
+}
+
+static int ip6fl_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq, "Label S Owner  Users  Linger Expires  "
+			      "Dst                              Opt\n");
+	else
+		ip6fl_fl_seq_show(seq, v);
+	return 0;
+}
+
+static struct seq_operations ip6fl_seq_ops = {
+	.start	=	ip6fl_seq_start,
+	.next	=	ip6fl_seq_next,
+	.stop	=	ip6fl_seq_stop,
+	.show	=	ip6fl_seq_show,
+};
+
+static int ip6fl_seq_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	int rc = -ENOMEM;
+	struct ip6fl_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+
+	if (!s)
+		goto out;
+
+	rc = seq_open(file, &ip6fl_seq_ops);
+	if (rc)
+		goto out_kfree;
+
+	seq = file->private_data;
+	seq->private = s;
+	memset(s, 0, sizeof(*s));
+out:
+	return rc;
+out_kfree:
+	kfree(s);
+	goto out;
+}
+
+static struct file_operations ip6fl_seq_fops = {
+	.owner		=	THIS_MODULE,
+	.open		=	ip6fl_seq_open,
+	.read		=	seq_read,
+	.llseek		=	seq_lseek,
+	.release	=	seq_release_private,
+};
+#endif
+
+
+void ip6_flowlabel_init(void)
+{
+#ifdef CONFIG_PROC_FS
+	proc_net_fops_create("ip6_flowlabel", S_IRUGO, &ip6fl_seq_fops);
+#endif
+}
+
+void ip6_flowlabel_cleanup(void)
+{
+	del_timer(&ip6_fl_gc_timer);
+#ifdef CONFIG_PROC_FS
+	proc_net_remove("ip6_flowlabel");
+#endif
+}
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
new file mode 100644
index 00000000000..866f10726c5
--- /dev/null
+++ b/net/ipv6/ip6_input.c
@@ -0,0 +1,269 @@
+/*
+ *	IPv6 input
+ *	Linux INET6 implementation 
+ *
+ *	Authors:
+ *	Pedro Roque		<roque@di.fc.ul.pt>
+ *	Ian P. Morris		<I.P.Morris@soton.ac.uk>
+ *
+ *	$Id: ip6_input.c,v 1.19 2000/12/13 18:31:50 davem Exp $
+ *
+ *	Based in linux/net/ipv4/ip_input.c
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+/* Changes
+ *
+ * 	Mitsuru KANDA @USAGI and
+ * 	YOSHIFUJI Hideaki @USAGI: Remove ipv6_parse_exthdrs().
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/sched.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/in6.h>
+#include <linux/icmpv6.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/transp_v6.h>
+#include <net/rawv6.h>
+#include <net/ndisc.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/xfrm.h>
+
+
+
+static inline int ip6_rcv_finish( struct sk_buff *skb) 
+{
+	if (skb->dst == NULL)
+		ip6_route_input(skb);
+
+	return dst_input(skb);
+}
+
+int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
+{
+	struct ipv6hdr *hdr;
+	u32 		pkt_len;
+
+	if (skb->pkt_type == PACKET_OTHERHOST)
+		goto drop;
+
+	IP6_INC_STATS_BH(IPSTATS_MIB_INRECEIVES);
+
+	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
+		IP6_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
+		goto out;
+	}
+
+	/*
+	 * Store incoming device index. When the packet will
+	 * be queued, we cannot refer to skb->dev anymore.
+	 *
+	 * BTW, when we send a packet for our own local address on a
+	 * non-loopback interface (e.g. ethX), it is being delivered
+	 * via the loopback interface (lo) here; skb->dev = &loopback_dev.
+	 * It, however, should be considered as if it is being
+	 * arrived via the sending interface (ethX), because of the
+	 * nature of scoping architecture. --yoshfuji
+	 */
+	IP6CB(skb)->iif = skb->dst ? ((struct rt6_info *)skb->dst)->rt6i_idev->dev->ifindex : dev->ifindex;
+
+	if (skb->len < sizeof(struct ipv6hdr))
+		goto err;
+
+	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) {
+		IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+		goto drop;
+	}
+
+	hdr = skb->nh.ipv6h;
+
+	if (hdr->version != 6)
+		goto err;
+
+	pkt_len = ntohs(hdr->payload_len);
+
+	/* pkt_len may be zero if Jumbo payload option is present */
+	if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
+		if (pkt_len + sizeof(struct ipv6hdr) > skb->len)
+			goto truncated;
+		if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) {
+			IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+			goto drop;
+		}
+		hdr = skb->nh.ipv6h;
+	}
+
+	if (hdr->nexthdr == NEXTHDR_HOP) {
+		skb->h.raw = (u8*)(hdr+1);
+		if (ipv6_parse_hopopts(skb, offsetof(struct ipv6hdr, nexthdr)) < 0) {
+			IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+			return 0;
+		}
+		hdr = skb->nh.ipv6h;
+	}
+
+	return NF_HOOK(PF_INET6,NF_IP6_PRE_ROUTING, skb, dev, NULL, ip6_rcv_finish);
+truncated:
+	IP6_INC_STATS_BH(IPSTATS_MIB_INTRUNCATEDPKTS);
+err:
+	IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+drop:
+	kfree_skb(skb);
+out:
+	return 0;
+}
+
+/*
+ *	Deliver the packet to the host
+ */
+
+
+static inline int ip6_input_finish(struct sk_buff *skb)
+{
+	struct inet6_protocol *ipprot;
+	struct sock *raw_sk;
+	unsigned int nhoff;
+	int nexthdr;
+	u8 hash;
+
+	skb->h.raw = skb->nh.raw + sizeof(struct ipv6hdr);
+
+	/*
+	 *	Parse extension headers
+	 */
+
+	nexthdr = skb->nh.ipv6h->nexthdr;
+	nhoff = offsetof(struct ipv6hdr, nexthdr);
+
+	/* Skip hop-by-hop options, they are already parsed. */
+	if (nexthdr == NEXTHDR_HOP) {
+		nhoff = sizeof(struct ipv6hdr);
+		nexthdr = skb->h.raw[0];
+		skb->h.raw += (skb->h.raw[1]+1)<<3;
+	}
+
+	rcu_read_lock();
+resubmit:
+	if (!pskb_pull(skb, skb->h.raw - skb->data))
+		goto discard;
+	nexthdr = skb->nh.raw[nhoff];
+
+	raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]);
+	if (raw_sk)
+		ipv6_raw_deliver(skb, nexthdr);
+
+	hash = nexthdr & (MAX_INET_PROTOS - 1);
+	if ((ipprot = rcu_dereference(inet6_protos[hash])) != NULL) {
+		int ret;
+		
+		if (ipprot->flags & INET6_PROTO_FINAL) {
+			struct ipv6hdr *hdr;	
+
+			skb_postpull_rcsum(skb, skb->nh.raw,
+					   skb->h.raw - skb->nh.raw);
+			hdr = skb->nh.ipv6h;
+			if (ipv6_addr_is_multicast(&hdr->daddr) &&
+			    !ipv6_chk_mcast_addr(skb->dev, &hdr->daddr,
+			    &hdr->saddr) &&
+			    !ipv6_is_mld(skb, nexthdr))
+				goto discard;
+		}
+		if (!(ipprot->flags & INET6_PROTO_NOPOLICY) &&
+		    !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 
+			goto discard;
+		
+		ret = ipprot->handler(&skb, &nhoff);
+		if (ret > 0)
+			goto resubmit;
+		else if (ret == 0)
+			IP6_INC_STATS_BH(IPSTATS_MIB_INDELIVERS);
+	} else {
+		if (!raw_sk) {
+			if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+				IP6_INC_STATS_BH(IPSTATS_MIB_INUNKNOWNPROTOS);
+				icmpv6_param_prob(skb, ICMPV6_UNK_NEXTHDR, nhoff);
+			}
+		} else {
+			IP6_INC_STATS_BH(IPSTATS_MIB_INDELIVERS);
+			kfree_skb(skb);
+		}
+	}
+	rcu_read_unlock();
+	return 0;
+
+discard:
+	IP6_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
+	rcu_read_unlock();
+	kfree_skb(skb);
+	return 0;
+}
+
+
+int ip6_input(struct sk_buff *skb)
+{
+	return NF_HOOK(PF_INET6,NF_IP6_LOCAL_IN, skb, skb->dev, NULL, ip6_input_finish);
+}
+
+int ip6_mc_input(struct sk_buff *skb)
+{
+	struct ipv6hdr *hdr;
+	int deliver;
+
+	IP6_INC_STATS_BH(IPSTATS_MIB_INMCASTPKTS);
+
+	hdr = skb->nh.ipv6h;
+	deliver = likely(!(skb->dev->flags & (IFF_PROMISC|IFF_ALLMULTI))) ||
+	    ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, NULL);
+
+	/*
+	 *	IPv6 multicast router mode isnt currently supported.
+	 */
+#if 0
+	if (ipv6_config.multicast_route) {
+		int addr_type;
+
+		addr_type = ipv6_addr_type(&hdr->daddr);
+
+		if (!(addr_type & (IPV6_ADDR_LOOPBACK | IPV6_ADDR_LINKLOCAL))) {
+			struct sk_buff *skb2;
+			struct dst_entry *dst;
+
+			dst = skb->dst;
+			
+			if (deliver) {
+				skb2 = skb_clone(skb, GFP_ATOMIC);
+				dst_output(skb2);
+			} else {
+				dst_output(skb);
+				return 0;
+			}
+		}
+	}
+#endif
+
+	if (likely(deliver)) {
+		ip6_input(skb);
+		return 0;
+	}
+	/* discard */
+	kfree_skb(skb);
+
+	return 0;
+}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
new file mode 100644
index 00000000000..49208ba7509
--- /dev/null
+++ b/net/ipv6/ip6_output.c
@@ -0,0 +1,1197 @@
+/*
+ *	IPv6 output functions
+ *	Linux INET6 implementation 
+ *
+ *	Authors:
+ *	Pedro Roque		<roque@di.fc.ul.pt>	
+ *
+ *	$Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
+ *
+ *	Based on linux/net/ipv4/ip_output.c
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ *
+ *	Changes:
+ *	A.N.Kuznetsov	:	airthmetics in fragmentation.
+ *				extension headers are implemented.
+ *				route changes now work.
+ *				ip6_forward does not confuse sniffers.
+ *				etc.
+ *
+ *      H. von Brand    :       Added missing #include <linux/string.h>
+ *	Imran Patel	: 	frag id should be in NBO
+ *      Kazunori MIYAZAWA @USAGI
+ *			:       add ip6_append_data and related functions
+ *				for datagram xmit
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/in6.h>
+#include <linux/tcp.h>
+#include <linux/route.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+#include <net/protocol.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/rawv6.h>
+#include <net/icmp.h>
+#include <net/xfrm.h>
+#include <net/checksum.h>
+
+static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
+
+static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
+{
+	static u32 ipv6_fragmentation_id = 1;
+	static DEFINE_SPINLOCK(ip6_id_lock);
+
+	spin_lock_bh(&ip6_id_lock);
+	fhdr->identification = htonl(ipv6_fragmentation_id);
+	if (++ipv6_fragmentation_id == 0)
+		ipv6_fragmentation_id = 1;
+	spin_unlock_bh(&ip6_id_lock);
+}
+
+static inline int ip6_output_finish(struct sk_buff *skb)
+{
+
+	struct dst_entry *dst = skb->dst;
+	struct hh_cache *hh = dst->hh;
+
+	if (hh) {
+		int hh_alen;
+
+		read_lock_bh(&hh->hh_lock);
+		hh_alen = HH_DATA_ALIGN(hh->hh_len);
+		memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
+		read_unlock_bh(&hh->hh_lock);
+	        skb_push(skb, hh->hh_len);
+		return hh->hh_output(skb);
+	} else if (dst->neighbour)
+		return dst->neighbour->output(skb);
+
+	IP6_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
+	kfree_skb(skb);
+	return -EINVAL;
+
+}
+
+/* dev_loopback_xmit for use with netfilter. */
+static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
+{
+	newskb->mac.raw = newskb->data;
+	__skb_pull(newskb, newskb->nh.raw - newskb->data);
+	newskb->pkt_type = PACKET_LOOPBACK;
+	newskb->ip_summed = CHECKSUM_UNNECESSARY;
+	BUG_TRAP(newskb->dst);
+
+	netif_rx(newskb);
+	return 0;
+}
+
+
+static int ip6_output2(struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb->dst;
+	struct net_device *dev = dst->dev;
+
+	skb->protocol = htons(ETH_P_IPV6);
+	skb->dev = dev;
+
+	if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) {
+		struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
+
+		if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
+		    ipv6_chk_mcast_addr(dev, &skb->nh.ipv6h->daddr,
+				&skb->nh.ipv6h->saddr)) {
+			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
+
+			/* Do not check for IFF_ALLMULTI; multicast routing
+			   is not supported in any case.
+			 */
+			if (newskb)
+				NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL,
+					newskb->dev,
+					ip6_dev_loopback_xmit);
+
+			if (skb->nh.ipv6h->hop_limit == 0) {
+				IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+				kfree_skb(skb);
+				return 0;
+			}
+		}
+
+		IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
+	}
+
+	return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
+}
+
+int ip6_output(struct sk_buff *skb)
+{
+	if (skb->len > dst_mtu(skb->dst) || dst_allfrag(skb->dst))
+		return ip6_fragment(skb, ip6_output2);
+	else
+		return ip6_output2(skb);
+}
+
+#ifdef CONFIG_NETFILTER
+int ip6_route_me_harder(struct sk_buff *skb)
+{
+	struct ipv6hdr *iph = skb->nh.ipv6h;
+	struct dst_entry *dst;
+	struct flowi fl = {
+		.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
+		.nl_u =
+		{ .ip6_u =
+		  { .daddr = iph->daddr,
+		    .saddr = iph->saddr, } },
+		.proto = iph->nexthdr,
+	};
+
+	dst = ip6_route_output(skb->sk, &fl);
+
+	if (dst->error) {
+		IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
+		LIMIT_NETDEBUG(
+			printk(KERN_DEBUG "ip6_route_me_harder: No more route.\n"));
+		dst_release(dst);
+		return -EINVAL;
+	}
+
+	/* Drop old route. */
+	dst_release(skb->dst);
+
+	skb->dst = dst;
+	return 0;
+}
+#endif
+
+static inline int ip6_maybe_reroute(struct sk_buff *skb)
+{
+#ifdef CONFIG_NETFILTER
+	if (skb->nfcache & NFC_ALTERED){
+		if (ip6_route_me_harder(skb) != 0){
+			kfree_skb(skb);
+			return -EINVAL;
+		}
+	}
+#endif /* CONFIG_NETFILTER */
+	return dst_output(skb);
+}
+
+/*
+ *	xmit an sk_buff (used by TCP)
+ */
+
+int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
+	     struct ipv6_txoptions *opt, int ipfragok)
+{
+	struct ipv6_pinfo *np = sk ? inet6_sk(sk) : NULL;
+	struct in6_addr *first_hop = &fl->fl6_dst;
+	struct dst_entry *dst = skb->dst;
+	struct ipv6hdr *hdr;
+	u8  proto = fl->proto;
+	int seg_len = skb->len;
+	int hlimit;
+	u32 mtu;
+
+	if (opt) {
+		int head_room;
+
+		/* First: exthdrs may take lots of space (~8K for now)
+		   MAX_HEADER is not enough.
+		 */
+		head_room = opt->opt_nflen + opt->opt_flen;
+		seg_len += head_room;
+		head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
+
+		if (skb_headroom(skb) < head_room) {
+			struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
+			kfree_skb(skb);
+			skb = skb2;
+			if (skb == NULL) {	
+				IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+				return -ENOBUFS;
+			}
+			if (sk)
+				skb_set_owner_w(skb, sk);
+		}
+		if (opt->opt_flen)
+			ipv6_push_frag_opts(skb, opt, &proto);
+		if (opt->opt_nflen)
+			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
+	}
+
+	hdr = skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, sizeof(struct ipv6hdr));
+
+	/*
+	 *	Fill in the IPv6 header
+	 */
+
+	*(u32*)hdr = htonl(0x60000000) | fl->fl6_flowlabel;
+	hlimit = -1;
+	if (np)
+		hlimit = np->hop_limit;
+	if (hlimit < 0)
+		hlimit = dst_metric(dst, RTAX_HOPLIMIT);
+	if (hlimit < 0)
+		hlimit = ipv6_get_hoplimit(dst->dev);
+
+	hdr->payload_len = htons(seg_len);
+	hdr->nexthdr = proto;
+	hdr->hop_limit = hlimit;
+
+	ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
+	ipv6_addr_copy(&hdr->daddr, first_hop);
+
+	mtu = dst_mtu(dst);
+	if ((skb->len <= mtu) || ipfragok) {
+		IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
+		return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, ip6_maybe_reroute);
+	}
+
+	if (net_ratelimit())
+		printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
+	skb->dev = dst->dev;
+	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+	IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+	kfree_skb(skb);
+	return -EMSGSIZE;
+}
+
+/*
+ *	To avoid extra problems ND packets are send through this
+ *	routine. It's code duplication but I really want to avoid
+ *	extra checks since ipv6_build_header is used by TCP (which
+ *	is for us performance critical)
+ */
+
+int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
+	       struct in6_addr *saddr, struct in6_addr *daddr,
+	       int proto, int len)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct ipv6hdr *hdr;
+	int totlen;
+
+	skb->protocol = htons(ETH_P_IPV6);
+	skb->dev = dev;
+
+	totlen = len + sizeof(struct ipv6hdr);
+
+	hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
+	skb->nh.ipv6h = hdr;
+
+	*(u32*)hdr = htonl(0x60000000);
+
+	hdr->payload_len = htons(len);
+	hdr->nexthdr = proto;
+	hdr->hop_limit = np->hop_limit;
+
+	ipv6_addr_copy(&hdr->saddr, saddr);
+	ipv6_addr_copy(&hdr->daddr, daddr);
+
+	return 0;
+}
+
+static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
+{
+	struct ip6_ra_chain *ra;
+	struct sock *last = NULL;
+
+	read_lock(&ip6_ra_lock);
+	for (ra = ip6_ra_chain; ra; ra = ra->next) {
+		struct sock *sk = ra->sk;
+		if (sk && ra->sel == sel) {
+			if (last) {
+				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+				if (skb2)
+					rawv6_rcv(last, skb2);
+			}
+			last = sk;
+		}
+	}
+
+	if (last) {
+		rawv6_rcv(last, skb);
+		read_unlock(&ip6_ra_lock);
+		return 1;
+	}
+	read_unlock(&ip6_ra_lock);
+	return 0;
+}
+
+static inline int ip6_forward_finish(struct sk_buff *skb)
+{
+	return dst_output(skb);
+}
+
+int ip6_forward(struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb->dst;
+	struct ipv6hdr *hdr = skb->nh.ipv6h;
+	struct inet6_skb_parm *opt = IP6CB(skb);
+	
+	if (ipv6_devconf.forwarding == 0)
+		goto error;
+
+	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
+		IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
+		goto drop;
+	}
+
+	skb->ip_summed = CHECKSUM_NONE;
+
+	/*
+	 *	We DO NOT make any processing on
+	 *	RA packets, pushing them to user level AS IS
+	 *	without ane WARRANTY that application will be able
+	 *	to interpret them. The reason is that we
+	 *	cannot make anything clever here.
+	 *
+	 *	We are not end-node, so that if packet contains
+	 *	AH/ESP, we cannot make anything.
+	 *	Defragmentation also would be mistake, RA packets
+	 *	cannot be fragmented, because there is no warranty
+	 *	that different fragments will go along one path. --ANK
+	 */
+	if (opt->ra) {
+		u8 *ptr = skb->nh.raw + opt->ra;
+		if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
+			return 0;
+	}
+
+	/*
+	 *	check and decrement ttl
+	 */
+	if (hdr->hop_limit <= 1) {
+		/* Force OUTPUT device used as source address */
+		skb->dev = dst->dev;
+		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
+			    0, skb->dev);
+
+		kfree_skb(skb);
+		return -ETIMEDOUT;
+	}
+
+	if (!xfrm6_route_forward(skb)) {
+		IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
+		goto drop;
+	}
+	dst = skb->dst;
+
+	/* IPv6 specs say nothing about it, but it is clear that we cannot
+	   send redirects to source routed frames.
+	 */
+	if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0) {
+		struct in6_addr *target = NULL;
+		struct rt6_info *rt;
+		struct neighbour *n = dst->neighbour;
+
+		/*
+		 *	incoming and outgoing devices are the same
+		 *	send a redirect.
+		 */
+
+		rt = (struct rt6_info *) dst;
+		if ((rt->rt6i_flags & RTF_GATEWAY))
+			target = (struct in6_addr*)&n->primary_key;
+		else
+			target = &hdr->daddr;
+
+		/* Limit redirects both by destination (here)
+		   and by source (inside ndisc_send_redirect)
+		 */
+		if (xrlim_allow(dst, 1*HZ))
+			ndisc_send_redirect(skb, n, target);
+	} else if (ipv6_addr_type(&hdr->saddr)&(IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK
+						|IPV6_ADDR_LINKLOCAL)) {
+		/* This check is security critical. */
+		goto error;
+	}
+
+	if (skb->len > dst_mtu(dst)) {
+		/* Again, force OUTPUT device used as source address */
+		skb->dev = dst->dev;
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
+		IP6_INC_STATS_BH(IPSTATS_MIB_INTOOBIGERRORS);
+		IP6_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS);
+		kfree_skb(skb);
+		return -EMSGSIZE;
+	}
+
+	if (skb_cow(skb, dst->dev->hard_header_len)) {
+		IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+		goto drop;
+	}
+
+	hdr = skb->nh.ipv6h;
+
+	/* Mangling hops number delayed to point after skb COW */
+ 
+	hdr->hop_limit--;
+
+	IP6_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
+	return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
+
+error:
+	IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
+drop:
+	kfree_skb(skb);
+	return -EINVAL;
+}
+
+static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
+{
+	to->pkt_type = from->pkt_type;
+	to->priority = from->priority;
+	to->protocol = from->protocol;
+	to->security = from->security;
+	dst_release(to->dst);
+	to->dst = dst_clone(from->dst);
+	to->dev = from->dev;
+
+#ifdef CONFIG_NET_SCHED
+	to->tc_index = from->tc_index;
+#endif
+#ifdef CONFIG_NETFILTER
+	to->nfmark = from->nfmark;
+	/* Connection association is same as pre-frag packet */
+	to->nfct = from->nfct;
+	nf_conntrack_get(to->nfct);
+	to->nfctinfo = from->nfctinfo;
+#ifdef CONFIG_BRIDGE_NETFILTER
+	nf_bridge_put(to->nf_bridge);
+	to->nf_bridge = from->nf_bridge;
+	nf_bridge_get(to->nf_bridge);
+#endif
+#ifdef CONFIG_NETFILTER_DEBUG
+	to->nf_debug = from->nf_debug;
+#endif
+#endif
+}
+
+int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
+{
+	u16 offset = sizeof(struct ipv6hdr);
+	struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
+	unsigned int packet_len = skb->tail - skb->nh.raw;
+	int found_rhdr = 0;
+	*nexthdr = &skb->nh.ipv6h->nexthdr;
+
+	while (offset + 1 <= packet_len) {
+
+		switch (**nexthdr) {
+
+		case NEXTHDR_HOP:
+		case NEXTHDR_ROUTING:
+		case NEXTHDR_DEST:
+			if (**nexthdr == NEXTHDR_ROUTING) found_rhdr = 1;
+			if (**nexthdr == NEXTHDR_DEST && found_rhdr) return offset;
+			offset += ipv6_optlen(exthdr);
+			*nexthdr = &exthdr->nexthdr;
+			exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+			break;
+		default :
+			return offset;
+		}
+	}
+
+	return offset;
+}
+
+static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
+{
+	struct net_device *dev;
+	struct sk_buff *frag;
+	struct rt6_info *rt = (struct rt6_info*)skb->dst;
+	struct ipv6hdr *tmp_hdr;
+	struct frag_hdr *fh;
+	unsigned int mtu, hlen, left, len;
+	u32 frag_id = 0;
+	int ptr, offset = 0, err=0;
+	u8 *prevhdr, nexthdr = 0;
+
+	dev = rt->u.dst.dev;
+	hlen = ip6_find_1stfragopt(skb, &prevhdr);
+	nexthdr = *prevhdr;
+
+	mtu = dst_mtu(&rt->u.dst) - hlen - sizeof(struct frag_hdr);
+
+	if (skb_shinfo(skb)->frag_list) {
+		int first_len = skb_pagelen(skb);
+
+		if (first_len - hlen > mtu ||
+		    ((first_len - hlen) & 7) ||
+		    skb_cloned(skb))
+			goto slow_path;
+
+		for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
+			/* Correct geometry. */
+			if (frag->len > mtu ||
+			    ((frag->len & 7) && frag->next) ||
+			    skb_headroom(frag) < hlen)
+			    goto slow_path;
+
+			/* Correct socket ownership. */
+			if (frag->sk == NULL)
+				goto slow_path;
+
+			/* Partially cloned skb? */
+			if (skb_shared(frag))
+				goto slow_path;
+		}
+
+		err = 0;
+		offset = 0;
+		frag = skb_shinfo(skb)->frag_list;
+		skb_shinfo(skb)->frag_list = NULL;
+		/* BUILD HEADER */
+
+		tmp_hdr = kmalloc(hlen, GFP_ATOMIC);
+		if (!tmp_hdr) {
+			IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+			return -ENOMEM;
+		}
+
+		*prevhdr = NEXTHDR_FRAGMENT;
+		memcpy(tmp_hdr, skb->nh.raw, hlen);
+		__skb_pull(skb, hlen);
+		fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
+		skb->nh.raw = __skb_push(skb, hlen);
+		memcpy(skb->nh.raw, tmp_hdr, hlen);
+
+		ipv6_select_ident(skb, fh);
+		fh->nexthdr = nexthdr;
+		fh->reserved = 0;
+		fh->frag_off = htons(IP6_MF);
+		frag_id = fh->identification;
+
+		first_len = skb_pagelen(skb);
+		skb->data_len = first_len - skb_headlen(skb);
+		skb->len = first_len;
+		skb->nh.ipv6h->payload_len = htons(first_len - sizeof(struct ipv6hdr));
+ 
+
+		for (;;) {
+			/* Prepare header of the next frame,
+			 * before previous one went down. */
+			if (frag) {
+				frag->ip_summed = CHECKSUM_NONE;
+				frag->h.raw = frag->data;
+				fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
+				frag->nh.raw = __skb_push(frag, hlen);
+				memcpy(frag->nh.raw, tmp_hdr, hlen);
+				offset += skb->len - hlen - sizeof(struct frag_hdr);
+				fh->nexthdr = nexthdr;
+				fh->reserved = 0;
+				fh->frag_off = htons(offset);
+				if (frag->next != NULL)
+					fh->frag_off |= htons(IP6_MF);
+				fh->identification = frag_id;
+				frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
+				ip6_copy_metadata(frag, skb);
+			}
+			
+			err = output(skb);
+			if (err || !frag)
+				break;
+
+			skb = frag;
+			frag = skb->next;
+			skb->next = NULL;
+		}
+
+		if (tmp_hdr)
+			kfree(tmp_hdr);
+
+		if (err == 0) {
+			IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
+			return 0;
+		}
+
+		while (frag) {
+			skb = frag->next;
+			kfree_skb(frag);
+			frag = skb;
+		}
+
+		IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+		return err;
+	}
+
+slow_path:
+	left = skb->len - hlen;		/* Space per frame */
+	ptr = hlen;			/* Where to start from */
+
+	/*
+	 *	Fragment the datagram.
+	 */
+
+	*prevhdr = NEXTHDR_FRAGMENT;
+
+	/*
+	 *	Keep copying data until we run out.
+	 */
+	while(left > 0)	{
+		len = left;
+		/* IF: it doesn't fit, use 'mtu' - the data space left */
+		if (len > mtu)
+			len = mtu;
+		/* IF: we are not sending upto and including the packet end
+		   then align the next start on an eight byte boundary */
+		if (len < left)	{
+			len &= ~7;
+		}
+		/*
+		 *	Allocate buffer.
+		 */
+
+		if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
+			NETDEBUG(printk(KERN_INFO "IPv6: frag: no memory for new fragment!\n"));
+			IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+			err = -ENOMEM;
+			goto fail;
+		}
+
+		/*
+		 *	Set up data on packet
+		 */
+
+		ip6_copy_metadata(frag, skb);
+		skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
+		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
+		frag->nh.raw = frag->data;
+		fh = (struct frag_hdr*)(frag->data + hlen);
+		frag->h.raw = frag->data + hlen + sizeof(struct frag_hdr);
+
+		/*
+		 *	Charge the memory for the fragment to any owner
+		 *	it might possess
+		 */
+		if (skb->sk)
+			skb_set_owner_w(frag, skb->sk);
+
+		/*
+		 *	Copy the packet header into the new buffer.
+		 */
+		memcpy(frag->nh.raw, skb->data, hlen);
+
+		/*
+		 *	Build fragment header.
+		 */
+		fh->nexthdr = nexthdr;
+		fh->reserved = 0;
+		if (frag_id) {
+			ipv6_select_ident(skb, fh);
+			frag_id = fh->identification;
+		} else
+			fh->identification = frag_id;
+
+		/*
+		 *	Copy a block of the IP datagram.
+		 */
+		if (skb_copy_bits(skb, ptr, frag->h.raw, len))
+			BUG();
+		left -= len;
+
+		fh->frag_off = htons(offset);
+		if (left > 0)
+			fh->frag_off |= htons(IP6_MF);
+		frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
+
+		ptr += len;
+		offset += len;
+
+		/*
+		 *	Put this fragment into the sending queue.
+		 */
+
+		IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES);
+
+		err = output(frag);
+		if (err)
+			goto fail;
+	}
+	kfree_skb(skb);
+	IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
+	return err;
+
+fail:
+	kfree_skb(skb); 
+	IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+	return err;
+}
+
+int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
+{
+	int err = 0;
+
+	*dst = NULL;
+	if (sk) {
+		struct ipv6_pinfo *np = inet6_sk(sk);
+	
+		*dst = sk_dst_check(sk, np->dst_cookie);
+		if (*dst) {
+			struct rt6_info *rt = (struct rt6_info*)*dst;
+	
+				/* Yes, checking route validity in not connected
+				   case is not very simple. Take into account,
+				   that we do not support routing by source, TOS,
+				   and MSG_DONTROUTE 		--ANK (980726)
+	
+				   1. If route was host route, check that
+				      cached destination is current.
+				      If it is network route, we still may
+				      check its validity using saved pointer
+				      to the last used address: daddr_cache.
+				      We do not want to save whole address now,
+				      (because main consumer of this service
+				       is tcp, which has not this problem),
+				      so that the last trick works only on connected
+				      sockets.
+				   2. oif also should be the same.
+				 */
+	
+			if (((rt->rt6i_dst.plen != 128 ||
+			      !ipv6_addr_equal(&fl->fl6_dst, &rt->rt6i_dst.addr))
+			     && (np->daddr_cache == NULL ||
+				 !ipv6_addr_equal(&fl->fl6_dst, np->daddr_cache)))
+			    || (fl->oif && fl->oif != (*dst)->dev->ifindex)) {
+				dst_release(*dst);
+				*dst = NULL;
+			}
+		}
+	}
+
+	if (*dst == NULL)
+		*dst = ip6_route_output(sk, fl);
+
+	if ((err = (*dst)->error))
+		goto out_err_release;
+
+	if (ipv6_addr_any(&fl->fl6_src)) {
+		err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
+
+		if (err) {
+#if IP6_DEBUG >= 2
+			printk(KERN_DEBUG "ip6_dst_lookup: "
+			       "no available source address\n");
+#endif
+			goto out_err_release;
+		}
+	}
+
+	return 0;
+
+out_err_release:
+	dst_release(*dst);
+	*dst = NULL;
+	return err;
+}
+
+int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb),
+		    void *from, int length, int transhdrlen,
+		    int hlimit, struct ipv6_txoptions *opt, struct flowi *fl, struct rt6_info *rt,
+		    unsigned int flags)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct sk_buff *skb;
+	unsigned int maxfraglen, fragheaderlen;
+	int exthdrlen;
+	int hh_len;
+	int mtu;
+	int copy;
+	int err;
+	int offset = 0;
+	int csummode = CHECKSUM_NONE;
+
+	if (flags&MSG_PROBE)
+		return 0;
+	if (skb_queue_empty(&sk->sk_write_queue)) {
+		/*
+		 * setup for corking
+		 */
+		if (opt) {
+			if (np->cork.opt == NULL) {
+				np->cork.opt = kmalloc(opt->tot_len,
+						       sk->sk_allocation);
+				if (unlikely(np->cork.opt == NULL))
+					return -ENOBUFS;
+			} else if (np->cork.opt->tot_len < opt->tot_len) {
+				printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
+				return -EINVAL;
+			}
+			memcpy(np->cork.opt, opt, opt->tot_len);
+			inet->cork.flags |= IPCORK_OPT;
+			/* need source address above miyazawa*/
+		}
+		dst_hold(&rt->u.dst);
+		np->cork.rt = rt;
+		inet->cork.fl = *fl;
+		np->cork.hop_limit = hlimit;
+		inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path);
+		if (dst_allfrag(rt->u.dst.path))
+			inet->cork.flags |= IPCORK_ALLFRAG;
+		inet->cork.length = 0;
+		sk->sk_sndmsg_page = NULL;
+		sk->sk_sndmsg_off = 0;
+		exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0);
+		length += exthdrlen;
+		transhdrlen += exthdrlen;
+	} else {
+		rt = np->cork.rt;
+		fl = &inet->cork.fl;
+		if (inet->cork.flags & IPCORK_OPT)
+			opt = np->cork.opt;
+		transhdrlen = 0;
+		exthdrlen = 0;
+		mtu = inet->cork.fragsize;
+	}
+
+	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
+
+	fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0);
+	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
+
+	if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
+		if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
+			ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
+			return -EMSGSIZE;
+		}
+	}
+
+	/*
+	 * Let's try using as much space as possible.
+	 * Use MTU if total length of the message fits into the MTU.
+	 * Otherwise, we need to reserve fragment header and
+	 * fragment alignment (= 8-15 octects, in total).
+	 *
+	 * Note that we may need to "move" the data from the tail of
+	 * of the buffer to the new fragment when we split 
+	 * the message.
+	 *
+	 * FIXME: It may be fragmented into multiple chunks 
+	 *        at once if non-fragmentable extension headers
+	 *        are too large.
+	 * --yoshfuji 
+	 */
+
+	inet->cork.length += length;
+
+	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
+		goto alloc_new_skb;
+
+	while (length > 0) {
+		/* Check if the remaining data fits into current packet. */
+		copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
+		if (copy < length)
+			copy = maxfraglen - skb->len;
+
+		if (copy <= 0) {
+			char *data;
+			unsigned int datalen;
+			unsigned int fraglen;
+			unsigned int fraggap;
+			unsigned int alloclen;
+			struct sk_buff *skb_prev;
+alloc_new_skb:
+			skb_prev = skb;
+
+			/* There's no room in the current skb */
+			if (skb_prev)
+				fraggap = skb_prev->len - maxfraglen;
+			else
+				fraggap = 0;
+
+			/*
+			 * If remaining data exceeds the mtu,
+			 * we know we need more fragment(s).
+			 */
+			datalen = length + fraggap;
+			if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
+				datalen = maxfraglen - fragheaderlen;
+
+			fraglen = datalen + fragheaderlen;
+			if ((flags & MSG_MORE) &&
+			    !(rt->u.dst.dev->features&NETIF_F_SG))
+				alloclen = mtu;
+			else
+				alloclen = datalen + fragheaderlen;
+
+			/*
+			 * The last fragment gets additional space at tail.
+			 * Note: we overallocate on fragments with MSG_MODE
+			 * because we have no idea if we're the last one.
+			 */
+			if (datalen == length + fraggap)
+				alloclen += rt->u.dst.trailer_len;
+
+			/*
+			 * We just reserve space for fragment header.
+			 * Note: this may be overallocation if the message 
+			 * (without MSG_MORE) fits into the MTU.
+			 */
+			alloclen += sizeof(struct frag_hdr);
+
+			if (transhdrlen) {
+				skb = sock_alloc_send_skb(sk,
+						alloclen + hh_len,
+						(flags & MSG_DONTWAIT), &err);
+			} else {
+				skb = NULL;
+				if (atomic_read(&sk->sk_wmem_alloc) <=
+				    2 * sk->sk_sndbuf)
+					skb = sock_wmalloc(sk,
+							   alloclen + hh_len, 1,
+							   sk->sk_allocation);
+				if (unlikely(skb == NULL))
+					err = -ENOBUFS;
+			}
+			if (skb == NULL)
+				goto error;
+			/*
+			 *	Fill in the control structures
+			 */
+			skb->ip_summed = csummode;
+			skb->csum = 0;
+			/* reserve for fragmentation */
+			skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
+
+			/*
+			 *	Find where to start putting bytes
+			 */
+			data = skb_put(skb, fraglen);
+			skb->nh.raw = data + exthdrlen;
+			data += fragheaderlen;
+			skb->h.raw = data + exthdrlen;
+
+			if (fraggap) {
+				skb->csum = skb_copy_and_csum_bits(
+					skb_prev, maxfraglen,
+					data + transhdrlen, fraggap, 0);
+				skb_prev->csum = csum_sub(skb_prev->csum,
+							  skb->csum);
+				data += fraggap;
+				skb_trim(skb_prev, maxfraglen);
+			}
+			copy = datalen - transhdrlen - fraggap;
+			if (copy < 0) {
+				err = -EINVAL;
+				kfree_skb(skb);
+				goto error;
+			} else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
+				err = -EFAULT;
+				kfree_skb(skb);
+				goto error;
+			}
+
+			offset += copy;
+			length -= datalen - fraggap;
+			transhdrlen = 0;
+			exthdrlen = 0;
+			csummode = CHECKSUM_NONE;
+
+			/*
+			 * Put the packet on the pending queue
+			 */
+			__skb_queue_tail(&sk->sk_write_queue, skb);
+			continue;
+		}
+
+		if (copy > length)
+			copy = length;
+
+		if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
+			unsigned int off;
+
+			off = skb->len;
+			if (getfrag(from, skb_put(skb, copy),
+						offset, copy, off, skb) < 0) {
+				__skb_trim(skb, off);
+				err = -EFAULT;
+				goto error;
+			}
+		} else {
+			int i = skb_shinfo(skb)->nr_frags;
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
+			struct page *page = sk->sk_sndmsg_page;
+			int off = sk->sk_sndmsg_off;
+			unsigned int left;
+
+			if (page && (left = PAGE_SIZE - off) > 0) {
+				if (copy >= left)
+					copy = left;
+				if (page != frag->page) {
+					if (i == MAX_SKB_FRAGS) {
+						err = -EMSGSIZE;
+						goto error;
+					}
+					get_page(page);
+					skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
+					frag = &skb_shinfo(skb)->frags[i];
+				}
+			} else if(i < MAX_SKB_FRAGS) {
+				if (copy > PAGE_SIZE)
+					copy = PAGE_SIZE;
+				page = alloc_pages(sk->sk_allocation, 0);
+				if (page == NULL) {
+					err = -ENOMEM;
+					goto error;
+				}
+				sk->sk_sndmsg_page = page;
+				sk->sk_sndmsg_off = 0;
+
+				skb_fill_page_desc(skb, i, page, 0, 0);
+				frag = &skb_shinfo(skb)->frags[i];
+				skb->truesize += PAGE_SIZE;
+				atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
+			} else {
+				err = -EMSGSIZE;
+				goto error;
+			}
+			if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
+				err = -EFAULT;
+				goto error;
+			}
+			sk->sk_sndmsg_off += copy;
+			frag->size += copy;
+			skb->len += copy;
+			skb->data_len += copy;
+		}
+		offset += copy;
+		length -= copy;
+	}
+	return 0;
+error:
+	inet->cork.length -= length;
+	IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+	return err;
+}
+
+int ip6_push_pending_frames(struct sock *sk)
+{
+	struct sk_buff *skb, *tmp_skb;
+	struct sk_buff **tail_skb;
+	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
+	struct inet_sock *inet = inet_sk(sk);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct ipv6hdr *hdr;
+	struct ipv6_txoptions *opt = np->cork.opt;
+	struct rt6_info *rt = np->cork.rt;
+	struct flowi *fl = &inet->cork.fl;
+	unsigned char proto = fl->proto;
+	int err = 0;
+
+	if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
+		goto out;
+	tail_skb = &(skb_shinfo(skb)->frag_list);
+
+	/* move skb->data to ip header from ext header */
+	if (skb->data < skb->nh.raw)
+		__skb_pull(skb, skb->nh.raw - skb->data);
+	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
+		__skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
+		*tail_skb = tmp_skb;
+		tail_skb = &(tmp_skb->next);
+		skb->len += tmp_skb->len;
+		skb->data_len += tmp_skb->len;
+#if 0 /* Logically correct, but useless work, ip_fragment() will have to undo */
+		skb->truesize += tmp_skb->truesize;
+		__sock_put(tmp_skb->sk);
+		tmp_skb->destructor = NULL;
+		tmp_skb->sk = NULL;
+#endif
+	}
+
+	ipv6_addr_copy(final_dst, &fl->fl6_dst);
+	__skb_pull(skb, skb->h.raw - skb->nh.raw);
+	if (opt && opt->opt_flen)
+		ipv6_push_frag_opts(skb, opt, &proto);
+	if (opt && opt->opt_nflen)
+		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
+
+	skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr));
+	
+	*(u32*)hdr = fl->fl6_flowlabel | htonl(0x60000000);
+
+	if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
+		hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+	else
+		hdr->payload_len = 0;
+	hdr->hop_limit = np->cork.hop_limit;
+	hdr->nexthdr = proto;
+	ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
+	ipv6_addr_copy(&hdr->daddr, final_dst);
+
+	skb->dst = dst_clone(&rt->u.dst);
+	IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);	
+	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
+	if (err) {
+		if (err > 0)
+			err = inet->recverr ? net_xmit_errno(err) : 0;
+		if (err)
+			goto error;
+	}
+
+out:
+	inet->cork.flags &= ~IPCORK_OPT;
+	if (np->cork.opt) {
+		kfree(np->cork.opt);
+		np->cork.opt = NULL;
+	}
+	if (np->cork.rt) {
+		dst_release(&np->cork.rt->u.dst);
+		np->cork.rt = NULL;
+		inet->cork.flags &= ~IPCORK_ALLFRAG;
+	}
+	memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
+	return err;
+error:
+	goto out;
+}
+
+void ip6_flush_pending_frames(struct sock *sk)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct sk_buff *skb;
+
+	while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
+		IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+		kfree_skb(skb);
+	}
+
+	inet->cork.flags &= ~IPCORK_OPT;
+
+	if (np->cork.opt) {
+		kfree(np->cork.opt);
+		np->cork.opt = NULL;
+	}
+	if (np->cork.rt) {
+		dst_release(&np->cork.rt->u.dst);
+		np->cork.rt = NULL;
+		inet->cork.flags &= ~IPCORK_ALLFRAG;
+	}
+	memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
+}
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
new file mode 100644
index 00000000000..3b1c9fa184a
--- /dev/null
+++ b/net/ipv6/ip6_tunnel.c
@@ -0,0 +1,1163 @@
+/*
+ *	IPv6 over IPv6 tunnel device
+ *	Linux INET6 implementation
+ *
+ *	Authors:
+ *	Ville Nuorvala		<vnuorval@tcs.hut.fi>	
+ *
+ *	$Id$
+ *
+ *      Based on:
+ *      linux/net/ipv6/sit.c
+ *
+ *      RFC 2473
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/sockios.h>
+#include <linux/if.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/if_tunnel.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/icmpv6.h>
+#include <linux/init.h>
+#include <linux/route.h>
+#include <linux/rtnetlink.h>
+#include <linux/netfilter_ipv6.h>
+
+#include <asm/uaccess.h>
+#include <asm/atomic.h>
+
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/ip6_tunnel.h>
+#include <net/xfrm.h>
+#include <net/dsfield.h>
+#include <net/inet_ecn.h>
+
+MODULE_AUTHOR("Ville Nuorvala");
+MODULE_DESCRIPTION("IPv6-in-IPv6 tunnel");
+MODULE_LICENSE("GPL");
+
+#define IPV6_TLV_TEL_DST_SIZE 8
+
+#ifdef IP6_TNL_DEBUG
+#define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __FUNCTION__)
+#else
+#define IP6_TNL_TRACE(x...) do {;} while(0)
+#endif
+
+#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
+
+#define HASH_SIZE  32
+
+#define HASH(addr) (((addr)->s6_addr32[0] ^ (addr)->s6_addr32[1] ^ \
+	             (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \
+                    (HASH_SIZE - 1))
+
+static int ip6ip6_fb_tnl_dev_init(struct net_device *dev);
+static int ip6ip6_tnl_dev_init(struct net_device *dev);
+static void ip6ip6_tnl_dev_setup(struct net_device *dev);
+
+/* the IPv6 tunnel fallback device */
+static struct net_device *ip6ip6_fb_tnl_dev;
+
+
+/* lists for storing tunnels in use */
+static struct ip6_tnl *tnls_r_l[HASH_SIZE];
+static struct ip6_tnl *tnls_wc[1];
+static struct ip6_tnl **tnls[2] = { tnls_wc, tnls_r_l };
+
+/* lock for the tunnel lists */
+static DEFINE_RWLOCK(ip6ip6_lock);
+
+static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
+{
+	struct dst_entry *dst = t->dst_cache;
+
+	if (dst && dst->obsolete && 
+	    dst->ops->check(dst, t->dst_cookie) == NULL) {
+		t->dst_cache = NULL;
+		dst_release(dst);
+		return NULL;
+	}
+
+	return dst;
+}
+
+static inline void ip6_tnl_dst_reset(struct ip6_tnl *t)
+{
+	dst_release(t->dst_cache);
+	t->dst_cache = NULL;
+}
+
+static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
+{
+	struct rt6_info *rt = (struct rt6_info *) dst;
+	t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
+	dst_release(t->dst_cache);
+	t->dst_cache = dst;
+}
+
+/**
+ * ip6ip6_tnl_lookup - fetch tunnel matching the end-point addresses
+ *   @remote: the address of the tunnel exit-point 
+ *   @local: the address of the tunnel entry-point 
+ *
+ * Return:  
+ *   tunnel matching given end-points if found,
+ *   else fallback tunnel if its device is up, 
+ *   else %NULL
+ **/
+
+static struct ip6_tnl *
+ip6ip6_tnl_lookup(struct in6_addr *remote, struct in6_addr *local)
+{
+	unsigned h0 = HASH(remote);
+	unsigned h1 = HASH(local);
+	struct ip6_tnl *t;
+
+	for (t = tnls_r_l[h0 ^ h1]; t; t = t->next) {
+		if (ipv6_addr_equal(local, &t->parms.laddr) &&
+		    ipv6_addr_equal(remote, &t->parms.raddr) &&
+		    (t->dev->flags & IFF_UP))
+			return t;
+	}
+	if ((t = tnls_wc[0]) != NULL && (t->dev->flags & IFF_UP))
+		return t;
+
+	return NULL;
+}
+
+/**
+ * ip6ip6_bucket - get head of list matching given tunnel parameters
+ *   @p: parameters containing tunnel end-points 
+ *
+ * Description:
+ *   ip6ip6_bucket() returns the head of the list matching the 
+ *   &struct in6_addr entries laddr and raddr in @p.
+ *
+ * Return: head of IPv6 tunnel list 
+ **/
+
+static struct ip6_tnl **
+ip6ip6_bucket(struct ip6_tnl_parm *p)
+{
+	struct in6_addr *remote = &p->raddr;
+	struct in6_addr *local = &p->laddr;
+	unsigned h = 0;
+	int prio = 0;
+
+	if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) {
+		prio = 1;
+		h = HASH(remote) ^ HASH(local);
+	}
+	return &tnls[prio][h];
+}
+
+/**
+ * ip6ip6_tnl_link - add tunnel to hash table
+ *   @t: tunnel to be added
+ **/
+
+static void
+ip6ip6_tnl_link(struct ip6_tnl *t)
+{
+	struct ip6_tnl **tp = ip6ip6_bucket(&t->parms);
+
+	t->next = *tp;
+	write_lock_bh(&ip6ip6_lock);
+	*tp = t;
+	write_unlock_bh(&ip6ip6_lock);
+}
+
+/**
+ * ip6ip6_tnl_unlink - remove tunnel from hash table
+ *   @t: tunnel to be removed
+ **/
+
+static void
+ip6ip6_tnl_unlink(struct ip6_tnl *t)
+{
+	struct ip6_tnl **tp;
+
+	for (tp = ip6ip6_bucket(&t->parms); *tp; tp = &(*tp)->next) {
+		if (t == *tp) {
+			write_lock_bh(&ip6ip6_lock);
+			*tp = t->next;
+			write_unlock_bh(&ip6ip6_lock);
+			break;
+		}
+	}
+}
+
+/**
+ * ip6_tnl_create() - create a new tunnel
+ *   @p: tunnel parameters
+ *   @pt: pointer to new tunnel
+ *
+ * Description:
+ *   Create tunnel matching given parameters.
+ * 
+ * Return: 
+ *   0 on success
+ **/
+
+static int
+ip6_tnl_create(struct ip6_tnl_parm *p, struct ip6_tnl **pt)
+{
+	struct net_device *dev;
+	struct ip6_tnl *t;
+	char name[IFNAMSIZ];
+	int err;
+
+	if (p->name[0]) {
+		strlcpy(name, p->name, IFNAMSIZ);
+	} else {
+		int i;
+		for (i = 1; i < IP6_TNL_MAX; i++) {
+			sprintf(name, "ip6tnl%d", i);
+			if (__dev_get_by_name(name) == NULL)
+				break;
+		}
+		if (i == IP6_TNL_MAX) 
+			return -ENOBUFS;
+	}
+	dev = alloc_netdev(sizeof (*t), name, ip6ip6_tnl_dev_setup);
+	if (dev == NULL)
+		return -ENOMEM;
+
+	t = dev->priv;
+	dev->init = ip6ip6_tnl_dev_init;
+	t->parms = *p;
+
+	if ((err = register_netdevice(dev)) < 0) {
+		free_netdev(dev);
+		return err;
+	}
+	dev_hold(dev);
+
+	ip6ip6_tnl_link(t);
+	*pt = t;
+	return 0;
+}
+
+/**
+ * ip6ip6_tnl_locate - find or create tunnel matching given parameters
+ *   @p: tunnel parameters 
+ *   @create: != 0 if allowed to create new tunnel if no match found
+ *
+ * Description:
+ *   ip6ip6_tnl_locate() first tries to locate an existing tunnel
+ *   based on @parms. If this is unsuccessful, but @create is set a new
+ *   tunnel device is created and registered for use.
+ *
+ * Return:
+ *   0 if tunnel located or created,
+ *   -EINVAL if parameters incorrect,
+ *   -ENODEV if no matching tunnel available
+ **/
+
+static int
+ip6ip6_tnl_locate(struct ip6_tnl_parm *p, struct ip6_tnl **pt, int create)
+{
+	struct in6_addr *remote = &p->raddr;
+	struct in6_addr *local = &p->laddr;
+	struct ip6_tnl *t;
+
+	if (p->proto != IPPROTO_IPV6)
+		return -EINVAL;
+
+	for (t = *ip6ip6_bucket(p); t; t = t->next) {
+		if (ipv6_addr_equal(local, &t->parms.laddr) &&
+		    ipv6_addr_equal(remote, &t->parms.raddr)) {
+			*pt = t;
+			return (create ? -EEXIST : 0);
+		}
+	}
+	if (!create)
+		return -ENODEV;
+	
+	return ip6_tnl_create(p, pt);
+}
+
+/**
+ * ip6ip6_tnl_dev_uninit - tunnel device uninitializer
+ *   @dev: the device to be destroyed
+ *   
+ * Description:
+ *   ip6ip6_tnl_dev_uninit() removes tunnel from its list
+ **/
+
+static void
+ip6ip6_tnl_dev_uninit(struct net_device *dev)
+{
+	struct ip6_tnl *t = dev->priv;
+
+	if (dev == ip6ip6_fb_tnl_dev) {
+		write_lock_bh(&ip6ip6_lock);
+		tnls_wc[0] = NULL;
+		write_unlock_bh(&ip6ip6_lock);
+	} else {
+		ip6ip6_tnl_unlink(t);
+	}
+	ip6_tnl_dst_reset(t);
+	dev_put(dev);
+}
+
+/**
+ * parse_tvl_tnl_enc_lim - handle encapsulation limit option
+ *   @skb: received socket buffer
+ *
+ * Return: 
+ *   0 if none was found, 
+ *   else index to encapsulation limit
+ **/
+
+static __u16
+parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
+{
+	struct ipv6hdr *ipv6h = (struct ipv6hdr *) raw;
+	__u8 nexthdr = ipv6h->nexthdr;
+	__u16 off = sizeof (*ipv6h);
+
+	while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) {
+		__u16 optlen = 0;
+		struct ipv6_opt_hdr *hdr;
+		if (raw + off + sizeof (*hdr) > skb->data &&
+		    !pskb_may_pull(skb, raw - skb->data + off + sizeof (*hdr)))
+			break;
+
+		hdr = (struct ipv6_opt_hdr *) (raw + off);
+		if (nexthdr == NEXTHDR_FRAGMENT) {
+			struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr;
+			if (frag_hdr->frag_off)
+				break;
+			optlen = 8;
+		} else if (nexthdr == NEXTHDR_AUTH) {
+			optlen = (hdr->hdrlen + 2) << 2;
+		} else {
+			optlen = ipv6_optlen(hdr);
+		}
+		if (nexthdr == NEXTHDR_DEST) {
+			__u16 i = off + 2;
+			while (1) {
+				struct ipv6_tlv_tnl_enc_lim *tel;
+
+				/* No more room for encapsulation limit */
+				if (i + sizeof (*tel) > off + optlen)
+					break;
+
+				tel = (struct ipv6_tlv_tnl_enc_lim *) &raw[i];
+				/* return index of option if found and valid */
+				if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT &&
+				    tel->length == 1)
+					return i;
+				/* else jump to next option */
+				if (tel->type)
+					i += tel->length + 2;
+				else
+					i++;
+			}
+		}
+		nexthdr = hdr->nexthdr;
+		off += optlen;
+	}
+	return 0;
+}
+
+/**
+ * ip6ip6_err - tunnel error handler
+ *
+ * Description:
+ *   ip6ip6_err() should handle errors in the tunnel according
+ *   to the specifications in RFC 2473.
+ **/
+
+static void 
+ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+	   int type, int code, int offset, __u32 info)
+{
+	struct ipv6hdr *ipv6h = (struct ipv6hdr *) skb->data;
+	struct ip6_tnl *t;
+	int rel_msg = 0;
+	int rel_type = ICMPV6_DEST_UNREACH;
+	int rel_code = ICMPV6_ADDR_UNREACH;
+	__u32 rel_info = 0;
+	__u16 len;
+
+	/* If the packet doesn't contain the original IPv6 header we are 
+	   in trouble since we might need the source address for further 
+	   processing of the error. */
+
+	read_lock(&ip6ip6_lock);
+	if ((t = ip6ip6_tnl_lookup(&ipv6h->daddr, &ipv6h->saddr)) == NULL)
+		goto out;
+
+	switch (type) {
+		__u32 teli;
+		struct ipv6_tlv_tnl_enc_lim *tel;
+		__u32 mtu;
+	case ICMPV6_DEST_UNREACH:
+		if (net_ratelimit())
+			printk(KERN_WARNING
+			       "%s: Path to destination invalid "
+			       "or inactive!\n", t->parms.name);
+		rel_msg = 1;
+		break;
+	case ICMPV6_TIME_EXCEED:
+		if (code == ICMPV6_EXC_HOPLIMIT) {
+			if (net_ratelimit())
+				printk(KERN_WARNING
+				       "%s: Too small hop limit or "
+				       "routing loop in tunnel!\n", 
+				       t->parms.name);
+			rel_msg = 1;
+		}
+		break;
+	case ICMPV6_PARAMPROB:
+		/* ignore if parameter problem not caused by a tunnel
+		   encapsulation limit sub-option */
+		if (code != ICMPV6_HDR_FIELD) {
+			break;
+		}
+		teli = parse_tlv_tnl_enc_lim(skb, skb->data);
+
+		if (teli && teli == ntohl(info) - 2) {
+			tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
+			if (tel->encap_limit == 0) {
+				if (net_ratelimit())
+					printk(KERN_WARNING
+					       "%s: Too small encapsulation "
+					       "limit or routing loop in "
+					       "tunnel!\n", t->parms.name);
+				rel_msg = 1;
+			}
+		}
+		break;
+	case ICMPV6_PKT_TOOBIG:
+		mtu = ntohl(info) - offset;
+		if (mtu < IPV6_MIN_MTU)
+			mtu = IPV6_MIN_MTU;
+		t->dev->mtu = mtu;
+
+		if ((len = sizeof (*ipv6h) + ipv6h->payload_len) > mtu) {
+			rel_type = ICMPV6_PKT_TOOBIG;
+			rel_code = 0;
+			rel_info = mtu;
+			rel_msg = 1;
+		}
+		break;
+	}
+	if (rel_msg &&  pskb_may_pull(skb, offset + sizeof (*ipv6h))) {
+		struct rt6_info *rt;
+		struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+		if (!skb2)
+			goto out;
+
+		dst_release(skb2->dst);
+		skb2->dst = NULL;
+		skb_pull(skb2, offset);
+		skb2->nh.raw = skb2->data;
+
+		/* Try to guess incoming interface */
+		rt = rt6_lookup(&skb2->nh.ipv6h->saddr, NULL, 0, 0);
+
+		if (rt && rt->rt6i_dev)
+			skb2->dev = rt->rt6i_dev;
+
+		icmpv6_send(skb2, rel_type, rel_code, rel_info, skb2->dev);
+
+		if (rt)
+			dst_release(&rt->u.dst);
+
+		kfree_skb(skb2);
+	}
+out:
+	read_unlock(&ip6ip6_lock);
+}
+
+static inline void ip6ip6_ecn_decapsulate(struct ipv6hdr *outer_iph,
+					  struct sk_buff *skb)
+{
+	struct ipv6hdr *inner_iph = skb->nh.ipv6h;
+
+	if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph)))
+		IP6_ECN_set_ce(inner_iph);
+}
+
+/**
+ * ip6ip6_rcv - decapsulate IPv6 packet and retransmit it locally
+ *   @skb: received socket buffer
+ *
+ * Return: 0
+ **/
+
+static int 
+ip6ip6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+{
+	struct sk_buff *skb = *pskb;
+	struct ipv6hdr *ipv6h;
+	struct ip6_tnl *t;
+
+	if (!pskb_may_pull(skb, sizeof (*ipv6h)))
+		goto discard;
+
+	ipv6h = skb->nh.ipv6h;
+
+	read_lock(&ip6ip6_lock);
+
+	if ((t = ip6ip6_tnl_lookup(&ipv6h->saddr, &ipv6h->daddr)) != NULL) {
+		if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+			kfree_skb(skb);
+			return 0;
+		}
+
+		if (!(t->parms.flags & IP6_TNL_F_CAP_RCV)) {
+			t->stat.rx_dropped++;
+			read_unlock(&ip6ip6_lock);
+			goto discard;
+		}
+		secpath_reset(skb);
+		skb->mac.raw = skb->nh.raw;
+		skb->nh.raw = skb->data;
+		skb->protocol = htons(ETH_P_IPV6);
+		skb->pkt_type = PACKET_HOST;
+		memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
+		skb->dev = t->dev;
+		dst_release(skb->dst);
+		skb->dst = NULL;
+		if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
+			ipv6_copy_dscp(ipv6h, skb->nh.ipv6h);
+		ip6ip6_ecn_decapsulate(ipv6h, skb);
+		t->stat.rx_packets++;
+		t->stat.rx_bytes += skb->len;
+		netif_rx(skb);
+		read_unlock(&ip6ip6_lock);
+		return 0;
+	}
+	read_unlock(&ip6ip6_lock);
+	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
+discard:
+	return 1;
+}
+
+static inline struct ipv6_txoptions *create_tel(__u8 encap_limit)
+{
+	struct ipv6_tlv_tnl_enc_lim *tel;
+	struct ipv6_txoptions *opt;
+	__u8 *raw;
+
+	int opt_len = sizeof(*opt) + 8;
+
+	if (!(opt = kmalloc(opt_len, GFP_ATOMIC))) {
+		return NULL;
+	}
+	memset(opt, 0, opt_len);
+	opt->tot_len = opt_len;
+	opt->dst0opt = (struct ipv6_opt_hdr *) (opt + 1);
+	opt->opt_nflen = 8;
+
+	tel = (struct ipv6_tlv_tnl_enc_lim *) (opt->dst0opt + 1);
+	tel->type = IPV6_TLV_TNL_ENCAP_LIMIT;
+	tel->length = 1;
+	tel->encap_limit = encap_limit;
+
+	raw = (__u8 *) opt->dst0opt;
+	raw[5] = IPV6_TLV_PADN;
+	raw[6] = 1;
+
+	return opt;
+}
+
+/**
+ * ip6ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
+ *   @t: the outgoing tunnel device
+ *   @hdr: IPv6 header from the incoming packet 
+ *
+ * Description:
+ *   Avoid trivial tunneling loop by checking that tunnel exit-point 
+ *   doesn't match source of incoming packet.
+ *
+ * Return: 
+ *   1 if conflict,
+ *   0 else
+ **/
+
+static inline int
+ip6ip6_tnl_addr_conflict(struct ip6_tnl *t, struct ipv6hdr *hdr)
+{
+	return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
+}
+
+/**
+ * ip6ip6_tnl_xmit - encapsulate packet and send 
+ *   @skb: the outgoing socket buffer
+ *   @dev: the outgoing tunnel device 
+ *
+ * Description:
+ *   Build new header and do some sanity checks on the packet before sending
+ *   it.
+ *
+ * Return: 
+ *   0
+ **/
+
+static int 
+ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct ip6_tnl *t = (struct ip6_tnl *) dev->priv;
+	struct net_device_stats *stats = &t->stat;
+	struct ipv6hdr *ipv6h = skb->nh.ipv6h;
+	struct ipv6_txoptions *opt = NULL;
+	int encap_limit = -1;
+	__u16 offset;
+	struct flowi fl;
+	struct dst_entry *dst;
+	struct net_device *tdev;
+	int mtu;
+	int max_headroom = sizeof(struct ipv6hdr);
+	u8 proto;
+	int err;
+	int pkt_len;
+	int dsfield;
+
+	if (t->recursion++) {
+		stats->collisions++;
+		goto tx_err;
+	}
+	if (skb->protocol != htons(ETH_P_IPV6) ||
+	    !(t->parms.flags & IP6_TNL_F_CAP_XMIT) ||
+	    ip6ip6_tnl_addr_conflict(t, ipv6h)) {
+		goto tx_err;
+	}
+	if ((offset = parse_tlv_tnl_enc_lim(skb, skb->nh.raw)) > 0) {
+		struct ipv6_tlv_tnl_enc_lim *tel;
+		tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->nh.raw[offset];
+		if (tel->encap_limit == 0) {
+			icmpv6_send(skb, ICMPV6_PARAMPROB,
+				    ICMPV6_HDR_FIELD, offset + 2, skb->dev);
+			goto tx_err;
+		}
+		encap_limit = tel->encap_limit - 1;
+	} else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) {
+		encap_limit = t->parms.encap_limit;
+	}
+	memcpy(&fl, &t->fl, sizeof (fl));
+	proto = fl.proto;
+
+	dsfield = ipv6_get_dsfield(ipv6h);
+	if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
+		fl.fl6_flowlabel |= (*(__u32 *) ipv6h & IPV6_TCLASS_MASK);
+	if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL))
+		fl.fl6_flowlabel |= (*(__u32 *) ipv6h & IPV6_FLOWLABEL_MASK);
+
+	if (encap_limit >= 0 && (opt = create_tel(encap_limit)) == NULL)
+		goto tx_err;
+
+	if ((dst = ip6_tnl_dst_check(t)) != NULL)
+		dst_hold(dst);
+	else
+		dst = ip6_route_output(NULL, &fl);
+
+	if (dst->error || xfrm_lookup(&dst, &fl, NULL, 0) < 0)
+		goto tx_err_link_failure;
+
+	tdev = dst->dev;
+
+	if (tdev == dev) {
+		stats->collisions++;
+		if (net_ratelimit())
+			printk(KERN_WARNING 
+			       "%s: Local routing loop detected!\n",
+			       t->parms.name);
+		goto tx_err_dst_release;
+	}
+	mtu = dst_mtu(dst) - sizeof (*ipv6h);
+	if (opt) {
+		max_headroom += 8;
+		mtu -= 8;
+	}
+	if (mtu < IPV6_MIN_MTU)
+		mtu = IPV6_MIN_MTU;
+	if (skb->dst && mtu < dst_mtu(skb->dst)) {
+		struct rt6_info *rt = (struct rt6_info *) skb->dst;
+		rt->rt6i_flags |= RTF_MODIFIED;
+		rt->u.dst.metrics[RTAX_MTU-1] = mtu;
+	}
+	if (skb->len > mtu) {
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
+		goto tx_err_dst_release;
+	}
+
+	/*
+	 * Okay, now see if we can stuff it in the buffer as-is.
+	 */
+	max_headroom += LL_RESERVED_SPACE(tdev);
+	
+	if (skb_headroom(skb) < max_headroom || 
+	    skb_cloned(skb) || skb_shared(skb)) {
+		struct sk_buff *new_skb;
+		
+		if (!(new_skb = skb_realloc_headroom(skb, max_headroom)))
+			goto tx_err_dst_release;
+
+		if (skb->sk)
+			skb_set_owner_w(new_skb, skb->sk);
+		kfree_skb(skb);
+		skb = new_skb;
+	}
+	dst_release(skb->dst);
+	skb->dst = dst_clone(dst);
+
+	skb->h.raw = skb->nh.raw;
+
+	if (opt)
+		ipv6_push_nfrag_opts(skb, opt, &proto, NULL);
+
+	skb->nh.raw = skb_push(skb, sizeof(struct ipv6hdr));
+	ipv6h = skb->nh.ipv6h;
+	*(u32*)ipv6h = fl.fl6_flowlabel | htonl(0x60000000);
+	dsfield = INET_ECN_encapsulate(0, dsfield);
+	ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
+	ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+	ipv6h->hop_limit = t->parms.hop_limit;
+	ipv6h->nexthdr = proto;
+	ipv6_addr_copy(&ipv6h->saddr, &fl.fl6_src);
+	ipv6_addr_copy(&ipv6h->daddr, &fl.fl6_dst);
+	nf_reset(skb);
+	pkt_len = skb->len;
+	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, 
+		      skb->dst->dev, dst_output);
+
+	if (err == NET_XMIT_SUCCESS || err == NET_XMIT_CN) {
+		stats->tx_bytes += pkt_len;
+		stats->tx_packets++;
+	} else {
+		stats->tx_errors++;
+		stats->tx_aborted_errors++;
+	}
+	ip6_tnl_dst_store(t, dst);
+
+	if (opt)
+		kfree(opt);
+
+	t->recursion--;
+	return 0;
+tx_err_link_failure:
+	stats->tx_carrier_errors++;
+	dst_link_failure(skb);
+tx_err_dst_release:
+	dst_release(dst);
+	if (opt)
+		kfree(opt);
+tx_err:
+	stats->tx_errors++;
+	stats->tx_dropped++;
+	kfree_skb(skb);
+	t->recursion--;
+	return 0;
+}
+
+static void ip6_tnl_set_cap(struct ip6_tnl *t)
+{
+	struct ip6_tnl_parm *p = &t->parms;
+	struct in6_addr *laddr = &p->laddr;
+	struct in6_addr *raddr = &p->raddr;
+	int ltype = ipv6_addr_type(laddr);
+	int rtype = ipv6_addr_type(raddr);
+
+	p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV);
+
+	if (ltype != IPV6_ADDR_ANY && rtype != IPV6_ADDR_ANY &&
+	    ((ltype|rtype) &
+	     (IPV6_ADDR_UNICAST|
+	      IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL|
+	      IPV6_ADDR_MAPPED|IPV6_ADDR_RESERVED)) == IPV6_ADDR_UNICAST) {
+		struct net_device *ldev = NULL;
+		int l_ok = 1;
+		int r_ok = 1;
+
+		if (p->link)
+			ldev = dev_get_by_index(p->link);
+		
+		if (ltype&IPV6_ADDR_UNICAST && !ipv6_chk_addr(laddr, ldev, 0))
+			l_ok = 0;
+		
+		if (rtype&IPV6_ADDR_UNICAST && ipv6_chk_addr(raddr, NULL, 0))
+			r_ok = 0;
+		
+		if (l_ok && r_ok) {
+			if (ltype&IPV6_ADDR_UNICAST)
+				p->flags |= IP6_TNL_F_CAP_XMIT;
+			if (rtype&IPV6_ADDR_UNICAST)
+				p->flags |= IP6_TNL_F_CAP_RCV;
+		}
+		if (ldev)
+			dev_put(ldev);
+	}
+}
+
+static void ip6ip6_tnl_link_config(struct ip6_tnl *t)
+{
+	struct net_device *dev = t->dev;
+	struct ip6_tnl_parm *p = &t->parms;
+	struct flowi *fl = &t->fl;
+
+	memcpy(&dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
+	memcpy(&dev->broadcast, &p->raddr, sizeof(struct in6_addr));
+
+	/* Set up flowi template */
+	ipv6_addr_copy(&fl->fl6_src, &p->laddr);
+	ipv6_addr_copy(&fl->fl6_dst, &p->raddr);
+	fl->oif = p->link;
+	fl->fl6_flowlabel = 0;
+
+	if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
+		fl->fl6_flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
+	if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL))
+		fl->fl6_flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
+
+	ip6_tnl_set_cap(t);
+
+	if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV)
+		dev->flags |= IFF_POINTOPOINT;
+	else
+		dev->flags &= ~IFF_POINTOPOINT;
+
+	dev->iflink = p->link;
+
+	if (p->flags & IP6_TNL_F_CAP_XMIT) {
+		struct rt6_info *rt = rt6_lookup(&p->raddr, &p->laddr,
+						 p->link, 0);
+
+		if (rt == NULL)
+			return;
+
+		if (rt->rt6i_dev) {
+			dev->hard_header_len = rt->rt6i_dev->hard_header_len +
+				sizeof (struct ipv6hdr);
+
+			dev->mtu = rt->rt6i_dev->mtu - sizeof (struct ipv6hdr);
+
+			if (dev->mtu < IPV6_MIN_MTU)
+				dev->mtu = IPV6_MIN_MTU;
+		}
+		dst_release(&rt->u.dst);
+	}
+}
+
+/**
+ * ip6ip6_tnl_change - update the tunnel parameters
+ *   @t: tunnel to be changed
+ *   @p: tunnel configuration parameters
+ *   @active: != 0 if tunnel is ready for use
+ *
+ * Description:
+ *   ip6ip6_tnl_change() updates the tunnel parameters
+ **/
+
+static int
+ip6ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
+{
+	ipv6_addr_copy(&t->parms.laddr, &p->laddr);
+	ipv6_addr_copy(&t->parms.raddr, &p->raddr);
+	t->parms.flags = p->flags;
+	t->parms.hop_limit = p->hop_limit;
+	t->parms.encap_limit = p->encap_limit;
+	t->parms.flowinfo = p->flowinfo;
+	ip6ip6_tnl_link_config(t);
+	return 0;
+}
+
+/**
+ * ip6ip6_tnl_ioctl - configure ipv6 tunnels from userspace 
+ *   @dev: virtual device associated with tunnel
+ *   @ifr: parameters passed from userspace
+ *   @cmd: command to be performed
+ *
+ * Description:
+ *   ip6ip6_tnl_ioctl() is used for managing IPv6 tunnels 
+ *   from userspace. 
+ *
+ *   The possible commands are the following:
+ *     %SIOCGETTUNNEL: get tunnel parameters for device
+ *     %SIOCADDTUNNEL: add tunnel matching given tunnel parameters
+ *     %SIOCCHGTUNNEL: change tunnel parameters to those given
+ *     %SIOCDELTUNNEL: delete tunnel
+ *
+ *   The fallback device "ip6tnl0", created during module 
+ *   initialization, can be used for creating other tunnel devices.
+ *
+ * Return:
+ *   0 on success,
+ *   %-EFAULT if unable to copy data to or from userspace,
+ *   %-EPERM if current process hasn't %CAP_NET_ADMIN set
+ *   %-EINVAL if passed tunnel parameters are invalid,
+ *   %-EEXIST if changing a tunnel's parameters would cause a conflict
+ *   %-ENODEV if attempting to change or delete a nonexisting device
+ **/
+
+static int
+ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+	int err = 0;
+	int create;
+	struct ip6_tnl_parm p;
+	struct ip6_tnl *t = NULL;
+
+	switch (cmd) {
+	case SIOCGETTUNNEL:
+		if (dev == ip6ip6_fb_tnl_dev) {
+			if (copy_from_user(&p,
+					   ifr->ifr_ifru.ifru_data,
+					   sizeof (p))) {
+				err = -EFAULT;
+				break;
+			}
+			if ((err = ip6ip6_tnl_locate(&p, &t, 0)) == -ENODEV)
+				t = (struct ip6_tnl *) dev->priv;
+			else if (err)
+				break;
+		} else
+			t = (struct ip6_tnl *) dev->priv;
+
+		memcpy(&p, &t->parms, sizeof (p));
+		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) {
+			err = -EFAULT;
+		}
+		break;
+	case SIOCADDTUNNEL:
+	case SIOCCHGTUNNEL:
+		err = -EPERM;
+		create = (cmd == SIOCADDTUNNEL);
+		if (!capable(CAP_NET_ADMIN))
+			break;
+		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) {
+			err = -EFAULT;
+			break;
+		}
+		if (!create && dev != ip6ip6_fb_tnl_dev) {
+			t = (struct ip6_tnl *) dev->priv;
+		}
+		if (!t && (err = ip6ip6_tnl_locate(&p, &t, create))) {
+			break;
+		}
+		if (cmd == SIOCCHGTUNNEL) {
+			if (t->dev != dev) {
+				err = -EEXIST;
+				break;
+			}
+			ip6ip6_tnl_unlink(t);
+			err = ip6ip6_tnl_change(t, &p);
+			ip6ip6_tnl_link(t);
+			netdev_state_change(dev);
+		}
+		if (copy_to_user(ifr->ifr_ifru.ifru_data,
+				 &t->parms, sizeof (p))) {
+			err = -EFAULT;
+		} else {
+			err = 0;
+		}
+		break;
+	case SIOCDELTUNNEL:
+		err = -EPERM;
+		if (!capable(CAP_NET_ADMIN))
+			break;
+
+		if (dev == ip6ip6_fb_tnl_dev) {
+			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data,
+					   sizeof (p))) {
+				err = -EFAULT;
+				break;
+			}
+			err = ip6ip6_tnl_locate(&p, &t, 0);
+			if (err)
+				break;
+			if (t == ip6ip6_fb_tnl_dev->priv) {
+				err = -EPERM;
+				break;
+			}
+		} else {
+			t = (struct ip6_tnl *) dev->priv;
+		}
+		err = unregister_netdevice(t->dev);
+		break;
+	default:
+		err = -EINVAL;
+	}
+	return err;
+}
+
+/**
+ * ip6ip6_tnl_get_stats - return the stats for tunnel device 
+ *   @dev: virtual device associated with tunnel
+ *
+ * Return: stats for device
+ **/
+
+static struct net_device_stats *
+ip6ip6_tnl_get_stats(struct net_device *dev)
+{
+	return &(((struct ip6_tnl *) dev->priv)->stat);
+}
+
+/**
+ * ip6ip6_tnl_change_mtu - change mtu manually for tunnel device
+ *   @dev: virtual device associated with tunnel
+ *   @new_mtu: the new mtu
+ *
+ * Return:
+ *   0 on success,
+ *   %-EINVAL if mtu too small
+ **/
+
+static int
+ip6ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
+{
+	if (new_mtu < IPV6_MIN_MTU) {
+		return -EINVAL;
+	}
+	dev->mtu = new_mtu;
+	return 0;
+}
+
+/**
+ * ip6ip6_tnl_dev_setup - setup virtual tunnel device
+ *   @dev: virtual device associated with tunnel
+ *
+ * Description:
+ *   Initialize function pointers and device parameters
+ **/
+
+static void ip6ip6_tnl_dev_setup(struct net_device *dev)
+{
+	SET_MODULE_OWNER(dev);
+	dev->uninit = ip6ip6_tnl_dev_uninit;
+	dev->destructor = free_netdev;
+	dev->hard_start_xmit = ip6ip6_tnl_xmit;
+	dev->get_stats = ip6ip6_tnl_get_stats;
+	dev->do_ioctl = ip6ip6_tnl_ioctl;
+	dev->change_mtu = ip6ip6_tnl_change_mtu;
+
+	dev->type = ARPHRD_TUNNEL6;
+	dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr);
+	dev->mtu = ETH_DATA_LEN - sizeof (struct ipv6hdr);
+	dev->flags |= IFF_NOARP;
+	dev->addr_len = sizeof(struct in6_addr);
+}
+
+
+/**
+ * ip6ip6_tnl_dev_init_gen - general initializer for all tunnel devices
+ *   @dev: virtual device associated with tunnel
+ **/
+
+static inline void
+ip6ip6_tnl_dev_init_gen(struct net_device *dev)
+{
+	struct ip6_tnl *t = (struct ip6_tnl *) dev->priv;
+	t->fl.proto = IPPROTO_IPV6;
+	t->dev = dev;
+	strcpy(t->parms.name, dev->name);
+}
+
+/**
+ * ip6ip6_tnl_dev_init - initializer for all non fallback tunnel devices
+ *   @dev: virtual device associated with tunnel
+ **/
+
+static int
+ip6ip6_tnl_dev_init(struct net_device *dev)
+{
+	struct ip6_tnl *t = (struct ip6_tnl *) dev->priv;
+	ip6ip6_tnl_dev_init_gen(dev);
+	ip6ip6_tnl_link_config(t);
+	return 0;
+}
+
+/**
+ * ip6ip6_fb_tnl_dev_init - initializer for fallback tunnel device
+ *   @dev: fallback device
+ *
+ * Return: 0
+ **/
+
+static int 
+ip6ip6_fb_tnl_dev_init(struct net_device *dev)
+{
+	struct ip6_tnl *t = dev->priv;
+	ip6ip6_tnl_dev_init_gen(dev);
+	dev_hold(dev);
+	tnls_wc[0] = t;
+	return 0;
+}
+
+static struct xfrm6_tunnel ip6ip6_handler = {
+	.handler = ip6ip6_rcv,
+	.err_handler = ip6ip6_err,
+};
+
+/**
+ * ip6_tunnel_init - register protocol and reserve needed resources
+ *
+ * Return: 0 on success
+ **/
+
+static int __init ip6_tunnel_init(void)
+{
+	int  err;
+
+	if (xfrm6_tunnel_register(&ip6ip6_handler) < 0) {
+		printk(KERN_ERR "ip6ip6 init: can't register tunnel\n");
+		return -EAGAIN;
+	}
+	ip6ip6_fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
+					 ip6ip6_tnl_dev_setup);
+
+	if (!ip6ip6_fb_tnl_dev) {
+		err = -ENOMEM;
+		goto fail;
+	}
+	ip6ip6_fb_tnl_dev->init = ip6ip6_fb_tnl_dev_init;
+
+	if ((err = register_netdev(ip6ip6_fb_tnl_dev))) {
+		free_netdev(ip6ip6_fb_tnl_dev);
+		goto fail;
+	}
+	return 0;
+fail:
+	xfrm6_tunnel_deregister(&ip6ip6_handler);
+	return err;
+}
+
+/**
+ * ip6_tunnel_cleanup - free resources and unregister protocol
+ **/
+
+static void __exit ip6_tunnel_cleanup(void)
+{
+	if (xfrm6_tunnel_deregister(&ip6ip6_handler) < 0)
+		printk(KERN_INFO "ip6ip6 close: can't deregister tunnel\n");
+
+	unregister_netdev(ip6ip6_fb_tnl_dev);
+}
+
+module_init(ip6_tunnel_init);
+module_exit(ip6_tunnel_cleanup);
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
new file mode 100644
index 00000000000..6cde5310cd7
--- /dev/null
+++ b/net/ipv6/ipcomp6.c
@@ -0,0 +1,524 @@
+/*
+ * IP Payload Compression Protocol (IPComp) for IPv6 - RFC3173
+ *
+ * Copyright (C)2003 USAGI/WIDE Project
+ *
+ * Author	Mitsuru KANDA  <mk@linux-ipv6.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+/* 
+ * [Memo]
+ *
+ * Outbound:
+ *  The compression of IP datagram MUST be done before AH/ESP processing, 
+ *  fragmentation, and the addition of Hop-by-Hop/Routing header. 
+ *
+ * Inbound:
+ *  The decompression of IP datagram MUST be done after the reassembly, 
+ *  AH/ESP processing.
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <net/ip.h>
+#include <net/xfrm.h>
+#include <net/ipcomp.h>
+#include <asm/scatterlist.h>
+#include <asm/semaphore.h>
+#include <linux/crypto.h>
+#include <linux/pfkeyv2.h>
+#include <linux/random.h>
+#include <linux/percpu.h>
+#include <linux/smp.h>
+#include <linux/list.h>
+#include <linux/vmalloc.h>
+#include <linux/rtnetlink.h>
+#include <net/icmp.h>
+#include <net/ipv6.h>
+#include <linux/ipv6.h>
+#include <linux/icmpv6.h>
+
+struct ipcomp6_tfms {
+	struct list_head list;
+	struct crypto_tfm **tfms;
+	int users;
+};
+
+static DECLARE_MUTEX(ipcomp6_resource_sem);
+static void **ipcomp6_scratches;
+static int ipcomp6_scratch_users;
+static LIST_HEAD(ipcomp6_tfms_list);
+
+static int ipcomp6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
+{
+	int err = 0;
+	u8 nexthdr = 0;
+	int hdr_len = skb->h.raw - skb->nh.raw;
+	unsigned char *tmp_hdr = NULL;
+	struct ipv6hdr *iph;
+	int plen, dlen;
+	struct ipcomp_data *ipcd = x->data;
+	u8 *start, *scratch;
+	struct crypto_tfm *tfm;
+	int cpu;
+
+	if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
+		skb_linearize(skb, GFP_ATOMIC) != 0) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	skb->ip_summed = CHECKSUM_NONE;
+
+	/* Remove ipcomp header and decompress original payload */
+	iph = skb->nh.ipv6h;
+	tmp_hdr = kmalloc(hdr_len, GFP_ATOMIC);
+	if (!tmp_hdr)
+		goto out;
+	memcpy(tmp_hdr, iph, hdr_len);
+	nexthdr = *(u8 *)skb->data;
+	skb_pull(skb, sizeof(struct ipv6_comp_hdr)); 
+	skb->nh.raw += sizeof(struct ipv6_comp_hdr);
+	memcpy(skb->nh.raw, tmp_hdr, hdr_len);
+	iph = skb->nh.ipv6h;
+	iph->payload_len = htons(ntohs(iph->payload_len) - sizeof(struct ipv6_comp_hdr));
+	skb->h.raw = skb->data;
+
+	/* decompression */
+	plen = skb->len;
+	dlen = IPCOMP_SCRATCH_SIZE;
+	start = skb->data;
+
+	cpu = get_cpu();
+	scratch = *per_cpu_ptr(ipcomp6_scratches, cpu);
+	tfm = *per_cpu_ptr(ipcd->tfms, cpu);
+
+	err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen);
+	if (err) {
+		err = -EINVAL;
+		goto out_put_cpu;
+	}
+
+	if (dlen < (plen + sizeof(struct ipv6_comp_hdr))) {
+		err = -EINVAL;
+		goto out_put_cpu;
+	}
+
+	err = pskb_expand_head(skb, 0, dlen - plen, GFP_ATOMIC);
+	if (err) {
+		goto out_put_cpu;
+	}
+
+	skb_put(skb, dlen - plen);
+	memcpy(skb->data, scratch, dlen);
+
+	iph = skb->nh.ipv6h;
+	iph->payload_len = htons(skb->len);
+	
+out_put_cpu:
+	put_cpu();
+out:
+	if (tmp_hdr)
+		kfree(tmp_hdr);
+	if (err)
+		goto error_out;
+	return nexthdr;
+error_out:
+	return err;
+}
+
+static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+	int err;
+	struct ipv6hdr *top_iph;
+	int hdr_len;
+	struct ipv6_comp_hdr *ipch;
+	struct ipcomp_data *ipcd = x->data;
+	int plen, dlen;
+	u8 *start, *scratch;
+	struct crypto_tfm *tfm;
+	int cpu;
+
+	hdr_len = skb->h.raw - skb->data;
+
+	/* check whether datagram len is larger than threshold */
+	if ((skb->len - hdr_len) < ipcd->threshold) {
+		goto out_ok;
+	}
+
+	if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
+		skb_linearize(skb, GFP_ATOMIC) != 0) {
+		goto out_ok;
+	}
+
+	/* compression */
+	plen = skb->len - hdr_len;
+	dlen = IPCOMP_SCRATCH_SIZE;
+	start = skb->h.raw;
+
+	cpu = get_cpu();
+	scratch = *per_cpu_ptr(ipcomp6_scratches, cpu);
+	tfm = *per_cpu_ptr(ipcd->tfms, cpu);
+
+	err = crypto_comp_compress(tfm, start, plen, scratch, &dlen);
+	if (err || (dlen + sizeof(struct ipv6_comp_hdr)) >= plen) {
+		put_cpu();
+		goto out_ok;
+	}
+	memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen);
+	put_cpu();
+	pskb_trim(skb, hdr_len + dlen + sizeof(struct ip_comp_hdr));
+
+	/* insert ipcomp header and replace datagram */
+	top_iph = (struct ipv6hdr *)skb->data;
+
+	top_iph->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+
+	ipch = (struct ipv6_comp_hdr *)start;
+	ipch->nexthdr = *skb->nh.raw;
+	ipch->flags = 0;
+	ipch->cpi = htons((u16 )ntohl(x->id.spi));
+	*skb->nh.raw = IPPROTO_COMP;
+
+out_ok:
+	return 0;
+}
+
+static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+		                int type, int code, int offset, __u32 info)
+{
+	u32 spi;
+	struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
+	struct ipv6_comp_hdr *ipcomph = (struct ipv6_comp_hdr*)(skb->data+offset);
+	struct xfrm_state *x;
+
+	if (type != ICMPV6_DEST_UNREACH && type != ICMPV6_PKT_TOOBIG)
+		return;
+
+	spi = ntohl(ntohs(ipcomph->cpi));
+	x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6);
+	if (!x)
+		return;
+
+	printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/"
+			"%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+			spi, NIP6(iph->daddr));
+	xfrm_state_put(x);
+}
+
+static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
+{
+	struct xfrm_state *t = NULL;
+
+	t = xfrm_state_alloc();
+	if (!t)
+		goto out;
+
+	t->id.proto = IPPROTO_IPV6;
+	t->id.spi = xfrm6_tunnel_alloc_spi((xfrm_address_t *)&x->props.saddr);
+	memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr));
+	memcpy(&t->sel, &x->sel, sizeof(t->sel));
+	t->props.family = AF_INET6;
+	t->props.mode = 1;
+	memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr));
+
+	t->type = xfrm_get_type(IPPROTO_IPV6, t->props.family);
+	if (t->type == NULL)
+		goto error;
+
+	if (t->type->init_state(t, NULL))
+		goto error;
+
+	t->km.state = XFRM_STATE_VALID;
+	atomic_set(&t->tunnel_users, 1);
+
+out:
+	return t;
+
+error:
+	xfrm_state_put(t);
+	goto out;
+}
+
+static int ipcomp6_tunnel_attach(struct xfrm_state *x)
+{
+	int err = 0;
+	struct xfrm_state *t = NULL;
+	u32 spi;
+
+	spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&x->props.saddr);
+	if (spi)
+		t = xfrm_state_lookup((xfrm_address_t *)&x->id.daddr,
+					      spi, IPPROTO_IPV6, AF_INET6);
+	if (!t) {
+		t = ipcomp6_tunnel_create(x);
+		if (!t) {
+			err = -EINVAL;
+			goto out;
+		}
+		xfrm_state_insert(t);
+		xfrm_state_hold(t);
+	}
+	x->tunnel = t;
+	atomic_inc(&t->tunnel_users);
+
+out:
+	return err;
+}
+
+static void ipcomp6_free_scratches(void)
+{
+	int i;
+	void **scratches;
+
+	if (--ipcomp6_scratch_users)
+		return;
+
+	scratches = ipcomp6_scratches;
+	if (!scratches)
+		return;
+
+	for_each_cpu(i) {
+		void *scratch = *per_cpu_ptr(scratches, i);
+		if (scratch)
+			vfree(scratch);
+	}
+
+	free_percpu(scratches);
+}
+
+static void **ipcomp6_alloc_scratches(void)
+{
+	int i;
+	void **scratches;
+
+	if (ipcomp6_scratch_users++)
+		return ipcomp6_scratches;
+
+	scratches = alloc_percpu(void *);
+	if (!scratches)
+		return NULL;
+
+	ipcomp6_scratches = scratches;
+
+	for_each_cpu(i) {
+		void *scratch = vmalloc(IPCOMP_SCRATCH_SIZE);
+		if (!scratch)
+			return NULL;
+		*per_cpu_ptr(scratches, i) = scratch;
+	}
+
+	return scratches;
+}
+
+static void ipcomp6_free_tfms(struct crypto_tfm **tfms)
+{
+	struct ipcomp6_tfms *pos;
+	int cpu;
+
+	list_for_each_entry(pos, &ipcomp6_tfms_list, list) {
+		if (pos->tfms == tfms)
+			break;
+	}
+
+	BUG_TRAP(pos);
+
+	if (--pos->users)
+		return;
+
+	list_del(&pos->list);
+	kfree(pos);
+
+	if (!tfms)
+		return;
+
+	for_each_cpu(cpu) {
+		struct crypto_tfm *tfm = *per_cpu_ptr(tfms, cpu);
+		if (tfm)
+			crypto_free_tfm(tfm);
+	}
+	free_percpu(tfms);
+}
+
+static struct crypto_tfm **ipcomp6_alloc_tfms(const char *alg_name)
+{
+	struct ipcomp6_tfms *pos;
+	struct crypto_tfm **tfms;
+	int cpu;
+
+	/* This can be any valid CPU ID so we don't need locking. */
+	cpu = smp_processor_id();
+
+	list_for_each_entry(pos, &ipcomp6_tfms_list, list) {
+		struct crypto_tfm *tfm;
+
+		tfms = pos->tfms;
+		tfm = *per_cpu_ptr(tfms, cpu);
+
+		if (!strcmp(crypto_tfm_alg_name(tfm), alg_name)) {
+			pos->users++;
+			return tfms;
+		}
+	}
+
+	pos = kmalloc(sizeof(*pos), GFP_KERNEL);
+	if (!pos)
+		return NULL;
+
+	pos->users = 1;
+	INIT_LIST_HEAD(&pos->list);
+	list_add(&pos->list, &ipcomp6_tfms_list);
+
+	pos->tfms = tfms = alloc_percpu(struct crypto_tfm *);
+	if (!tfms)
+		goto error;
+
+	for_each_cpu(cpu) {
+		struct crypto_tfm *tfm = crypto_alloc_tfm(alg_name, 0);
+		if (!tfm)
+			goto error;
+		*per_cpu_ptr(tfms, cpu) = tfm;
+	}
+
+	return tfms;
+
+error:
+	ipcomp6_free_tfms(tfms);
+	return NULL;
+}
+
+static void ipcomp6_free_data(struct ipcomp_data *ipcd)
+{
+	if (ipcd->tfms)
+		ipcomp6_free_tfms(ipcd->tfms);
+	ipcomp6_free_scratches();
+}
+
+static void ipcomp6_destroy(struct xfrm_state *x)
+{
+	struct ipcomp_data *ipcd = x->data;
+	if (!ipcd)
+		return;
+	xfrm_state_delete_tunnel(x);
+	down(&ipcomp6_resource_sem);
+	ipcomp6_free_data(ipcd);
+	up(&ipcomp6_resource_sem);
+	kfree(ipcd);
+
+	xfrm6_tunnel_free_spi((xfrm_address_t *)&x->props.saddr);
+}
+
+static int ipcomp6_init_state(struct xfrm_state *x, void *args)
+{
+	int err;
+	struct ipcomp_data *ipcd;
+	struct xfrm_algo_desc *calg_desc;
+
+	err = -EINVAL;
+	if (!x->calg)
+		goto out;
+
+	if (x->encap)
+		goto out;
+
+	err = -ENOMEM;
+	ipcd = kmalloc(sizeof(*ipcd), GFP_KERNEL);
+	if (!ipcd)
+		goto out;
+
+	memset(ipcd, 0, sizeof(*ipcd));
+	x->props.header_len = 0;
+	if (x->props.mode)
+		x->props.header_len += sizeof(struct ipv6hdr);
+	
+	down(&ipcomp6_resource_sem);
+	if (!ipcomp6_alloc_scratches())
+		goto error;
+
+	ipcd->tfms = ipcomp6_alloc_tfms(x->calg->alg_name);
+	if (!ipcd->tfms)
+		goto error;
+	up(&ipcomp6_resource_sem);
+
+	if (x->props.mode) {
+		err = ipcomp6_tunnel_attach(x);
+		if (err)
+			goto error_tunnel;
+	}
+
+	calg_desc = xfrm_calg_get_byname(x->calg->alg_name, 0);
+	BUG_ON(!calg_desc);
+	ipcd->threshold = calg_desc->uinfo.comp.threshold;
+	x->data = ipcd;
+	err = 0;
+out:
+	return err;
+error_tunnel:
+	down(&ipcomp6_resource_sem);
+error:
+	ipcomp6_free_data(ipcd);
+	up(&ipcomp6_resource_sem);
+	kfree(ipcd);
+
+	goto out;
+}
+
+static struct xfrm_type ipcomp6_type = 
+{
+	.description	= "IPCOMP6",
+	.owner		= THIS_MODULE,
+	.proto		= IPPROTO_COMP,
+	.init_state	= ipcomp6_init_state,
+	.destructor	= ipcomp6_destroy,
+	.input		= ipcomp6_input,
+	.output		= ipcomp6_output,
+};
+
+static struct inet6_protocol ipcomp6_protocol = 
+{
+	.handler	= xfrm6_rcv,
+	.err_handler	= ipcomp6_err,
+	.flags		= INET6_PROTO_NOPOLICY,
+};
+
+static int __init ipcomp6_init(void)
+{
+	if (xfrm_register_type(&ipcomp6_type, AF_INET6) < 0) {
+		printk(KERN_INFO "ipcomp6 init: can't add xfrm type\n");
+		return -EAGAIN;
+	}
+	if (inet6_add_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0) {
+		printk(KERN_INFO "ipcomp6 init: can't add protocol\n");
+		xfrm_unregister_type(&ipcomp6_type, AF_INET6);
+		return -EAGAIN;
+	}
+	return 0;
+}
+
+static void __exit ipcomp6_fini(void)
+{
+	if (inet6_del_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0) 
+		printk(KERN_INFO "ipv6 ipcomp close: can't remove protocol\n");
+	if (xfrm_unregister_type(&ipcomp6_type, AF_INET6) < 0)
+		printk(KERN_INFO "ipv6 ipcomp close: can't remove xfrm type\n");
+}
+
+module_init(ipcomp6_init);
+module_exit(ipcomp6_fini);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) for IPv6 - RFC3173");
+MODULE_AUTHOR("Mitsuru KANDA <mk@linux-ipv6.org>");
+
+
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
new file mode 100644
index 00000000000..279ab86be66
--- /dev/null
+++ b/net/ipv6/ipv6_sockglue.c
@@ -0,0 +1,704 @@
+/*
+ *	IPv6 BSD socket options interface
+ *	Linux INET6 implementation 
+ *
+ *	Authors:
+ *	Pedro Roque		<roque@di.fc.ul.pt>	
+ *
+ *	Based on linux/net/ipv4/ip_sockglue.c
+ *
+ *	$Id: ipv6_sockglue.c,v 1.41 2002/02/01 22:01:04 davem Exp $
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ *
+ *	FIXME: Make the setsockopt code POSIX compliant: That is
+ *
+ *	o	Return -EINVAL for setsockopt of short lengths
+ *	o	Truncate getsockopt returns
+ *	o	Return an optlen of the truncated length if need be
+ *
+ *	Changes:
+ *	David L Stevens <dlstevens@us.ibm.com>:
+ *		- added multicast source filtering API for MLDv2
+ */
+
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/sched.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/init.h>
+#include <linux/sysctl.h>
+#include <linux/netfilter.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+#include <net/protocol.h>
+#include <net/transp_v6.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/inet_common.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/xfrm.h>
+
+#include <asm/uaccess.h>
+
+DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics);
+
+static struct packet_type ipv6_packet_type = {
+	.type = __constant_htons(ETH_P_IPV6), 
+	.func = ipv6_rcv,
+};
+
+struct ip6_ra_chain *ip6_ra_chain;
+DEFINE_RWLOCK(ip6_ra_lock);
+
+int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *))
+{
+	struct ip6_ra_chain *ra, *new_ra, **rap;
+
+	/* RA packet may be delivered ONLY to IPPROTO_RAW socket */
+	if (sk->sk_type != SOCK_RAW || inet_sk(sk)->num != IPPROTO_RAW)
+		return -EINVAL;
+
+	new_ra = (sel>=0) ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
+
+	write_lock_bh(&ip6_ra_lock);
+	for (rap = &ip6_ra_chain; (ra=*rap) != NULL; rap = &ra->next) {
+		if (ra->sk == sk) {
+			if (sel>=0) {
+				write_unlock_bh(&ip6_ra_lock);
+				if (new_ra)
+					kfree(new_ra);
+				return -EADDRINUSE;
+			}
+
+			*rap = ra->next;
+			write_unlock_bh(&ip6_ra_lock);
+
+			if (ra->destructor)
+				ra->destructor(sk);
+			sock_put(sk);
+			kfree(ra);
+			return 0;
+		}
+	}
+	if (new_ra == NULL) {
+		write_unlock_bh(&ip6_ra_lock);
+		return -ENOBUFS;
+	}
+	new_ra->sk = sk;
+	new_ra->sel = sel;
+	new_ra->destructor = destructor;
+	new_ra->next = ra;
+	*rap = new_ra;
+	sock_hold(sk);
+	write_unlock_bh(&ip6_ra_lock);
+	return 0;
+}
+
+extern int ip6_mc_source(int add, int omode, struct sock *sk,
+	struct group_source_req *pgsr);
+extern int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf);
+extern int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
+	struct group_filter __user *optval, int __user *optlen);
+
+
+int ipv6_setsockopt(struct sock *sk, int level, int optname,
+		    char __user *optval, int optlen)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	int val, valbool;
+	int retv = -ENOPROTOOPT;
+
+	if (level == SOL_IP && sk->sk_type != SOCK_RAW)
+		return udp_prot.setsockopt(sk, level, optname, optval, optlen);
+
+	if(level!=SOL_IPV6)
+		goto out;
+
+	if (optval == NULL)
+		val=0;
+	else if (get_user(val, (int __user *) optval))
+		return -EFAULT;
+
+	valbool = (val!=0);
+
+	lock_sock(sk);
+
+	switch (optname) {
+
+	case IPV6_ADDRFORM:
+		if (val == PF_INET) {
+			struct ipv6_txoptions *opt;
+			struct sk_buff *pktopt;
+
+			if (sk->sk_protocol != IPPROTO_UDP &&
+			    sk->sk_protocol != IPPROTO_TCP)
+				break;
+
+			if (sk->sk_state != TCP_ESTABLISHED) {
+				retv = -ENOTCONN;
+				break;
+			}
+
+			if (ipv6_only_sock(sk) ||
+			    !(ipv6_addr_type(&np->daddr) & IPV6_ADDR_MAPPED)) {
+				retv = -EADDRNOTAVAIL;
+				break;
+			}
+
+			fl6_free_socklist(sk);
+			ipv6_sock_mc_close(sk);
+
+			if (sk->sk_protocol == IPPROTO_TCP) {
+				struct tcp_sock *tp = tcp_sk(sk);
+
+				local_bh_disable();
+				sock_prot_dec_use(sk->sk_prot);
+				sock_prot_inc_use(&tcp_prot);
+				local_bh_enable();
+				sk->sk_prot = &tcp_prot;
+				tp->af_specific = &ipv4_specific;
+				sk->sk_socket->ops = &inet_stream_ops;
+				sk->sk_family = PF_INET;
+				tcp_sync_mss(sk, tp->pmtu_cookie);
+			} else {
+				local_bh_disable();
+				sock_prot_dec_use(sk->sk_prot);
+				sock_prot_inc_use(&udp_prot);
+				local_bh_enable();
+				sk->sk_prot = &udp_prot;
+				sk->sk_socket->ops = &inet_dgram_ops;
+				sk->sk_family = PF_INET;
+			}
+			opt = xchg(&np->opt, NULL);
+			if (opt)
+				sock_kfree_s(sk, opt, opt->tot_len);
+			pktopt = xchg(&np->pktoptions, NULL);
+			if (pktopt)
+				kfree_skb(pktopt);
+
+			sk->sk_destruct = inet_sock_destruct;
+#ifdef INET_REFCNT_DEBUG
+			atomic_dec(&inet6_sock_nr);
+#endif
+			module_put(THIS_MODULE);
+			retv = 0;
+			break;
+		}
+		goto e_inval;
+
+	case IPV6_V6ONLY:
+		if (inet_sk(sk)->num)
+			goto e_inval;
+		np->ipv6only = valbool;
+		retv = 0;
+		break;
+
+	case IPV6_PKTINFO:
+		np->rxopt.bits.rxinfo = valbool;
+		retv = 0;
+		break;
+
+	case IPV6_HOPLIMIT:
+		np->rxopt.bits.rxhlim = valbool;
+		retv = 0;
+		break;
+
+	case IPV6_RTHDR:
+		if (val < 0 || val > 2)
+			goto e_inval;
+		np->rxopt.bits.srcrt = val;
+		retv = 0;
+		break;
+
+	case IPV6_HOPOPTS:
+		np->rxopt.bits.hopopts = valbool;
+		retv = 0;
+		break;
+
+	case IPV6_DSTOPTS:
+		np->rxopt.bits.dstopts = valbool;
+		retv = 0;
+		break;
+
+	case IPV6_FLOWINFO:
+		np->rxopt.bits.rxflow = valbool;
+		retv = 0;
+		break;
+
+	case IPV6_PKTOPTIONS:
+	{
+		struct ipv6_txoptions *opt = NULL;
+		struct msghdr msg;
+		struct flowi fl;
+		int junk;
+
+		fl.fl6_flowlabel = 0;
+		fl.oif = sk->sk_bound_dev_if;
+
+		if (optlen == 0)
+			goto update;
+
+		/* 1K is probably excessive
+		 * 1K is surely not enough, 2K per standard header is 16K.
+		 */
+		retv = -EINVAL;
+		if (optlen > 64*1024)
+			break;
+
+		opt = sock_kmalloc(sk, sizeof(*opt) + optlen, GFP_KERNEL);
+		retv = -ENOBUFS;
+		if (opt == NULL)
+			break;
+
+		memset(opt, 0, sizeof(*opt));
+		opt->tot_len = sizeof(*opt) + optlen;
+		retv = -EFAULT;
+		if (copy_from_user(opt+1, optval, optlen))
+			goto done;
+
+		msg.msg_controllen = optlen;
+		msg.msg_control = (void*)(opt+1);
+
+		retv = datagram_send_ctl(&msg, &fl, opt, &junk);
+		if (retv)
+			goto done;
+update:
+		retv = 0;
+		if (sk->sk_type == SOCK_STREAM) {
+			if (opt) {
+				struct tcp_sock *tp = tcp_sk(sk);
+				if (!((1 << sk->sk_state) &
+				      (TCPF_LISTEN | TCPF_CLOSE))
+				    && inet_sk(sk)->daddr != LOOPBACK4_IPV6) {
+					tp->ext_header_len = opt->opt_flen + opt->opt_nflen;
+					tcp_sync_mss(sk, tp->pmtu_cookie);
+				}
+			}
+			opt = xchg(&np->opt, opt);
+			sk_dst_reset(sk);
+		} else {
+			write_lock(&sk->sk_dst_lock);
+			opt = xchg(&np->opt, opt);
+			write_unlock(&sk->sk_dst_lock);
+			sk_dst_reset(sk);
+		}
+
+done:
+		if (opt)
+			sock_kfree_s(sk, opt, opt->tot_len);
+		break;
+	}
+	case IPV6_UNICAST_HOPS:
+		if (val > 255 || val < -1)
+			goto e_inval;
+		np->hop_limit = val;
+		retv = 0;
+		break;
+
+	case IPV6_MULTICAST_HOPS:
+		if (sk->sk_type == SOCK_STREAM)
+			goto e_inval;
+		if (val > 255 || val < -1)
+			goto e_inval;
+		np->mcast_hops = val;
+		retv = 0;
+		break;
+
+	case IPV6_MULTICAST_LOOP:
+		np->mc_loop = valbool;
+		retv = 0;
+		break;
+
+	case IPV6_MULTICAST_IF:
+		if (sk->sk_type == SOCK_STREAM)
+			goto e_inval;
+		if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != val)
+			goto e_inval;
+
+		if (__dev_get_by_index(val) == NULL) {
+			retv = -ENODEV;
+			break;
+		}
+		np->mcast_oif = val;
+		retv = 0;
+		break;
+	case IPV6_ADD_MEMBERSHIP:
+	case IPV6_DROP_MEMBERSHIP:
+	{
+		struct ipv6_mreq mreq;
+
+		retv = -EFAULT;
+		if (copy_from_user(&mreq, optval, sizeof(struct ipv6_mreq)))
+			break;
+
+		if (optname == IPV6_ADD_MEMBERSHIP)
+			retv = ipv6_sock_mc_join(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr);
+		else
+			retv = ipv6_sock_mc_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr);
+		break;
+	}
+	case IPV6_JOIN_ANYCAST:
+	case IPV6_LEAVE_ANYCAST:
+	{
+		struct ipv6_mreq mreq;
+
+		if (optlen != sizeof(struct ipv6_mreq))
+			goto e_inval;
+
+		retv = -EFAULT;
+		if (copy_from_user(&mreq, optval, sizeof(struct ipv6_mreq)))
+			break;
+
+		if (optname == IPV6_JOIN_ANYCAST)
+			retv = ipv6_sock_ac_join(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_acaddr);
+		else
+			retv = ipv6_sock_ac_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_acaddr);
+		break;
+	}
+	case MCAST_JOIN_GROUP:
+	case MCAST_LEAVE_GROUP:
+	{
+		struct group_req greq;
+		struct sockaddr_in6 *psin6;
+
+		retv = -EFAULT;
+		if (copy_from_user(&greq, optval, sizeof(struct group_req)))
+			break;
+		if (greq.gr_group.ss_family != AF_INET6) {
+			retv = -EADDRNOTAVAIL;
+			break;
+		}
+		psin6 = (struct sockaddr_in6 *)&greq.gr_group;
+		if (optname == MCAST_JOIN_GROUP)
+			retv = ipv6_sock_mc_join(sk, greq.gr_interface,
+				&psin6->sin6_addr);
+		else
+			retv = ipv6_sock_mc_drop(sk, greq.gr_interface,
+				&psin6->sin6_addr);
+		break;
+	}
+	case MCAST_JOIN_SOURCE_GROUP:
+	case MCAST_LEAVE_SOURCE_GROUP:
+	case MCAST_BLOCK_SOURCE:
+	case MCAST_UNBLOCK_SOURCE:
+	{
+		struct group_source_req greqs;
+		int omode, add;
+
+		if (optlen != sizeof(struct group_source_req))
+			goto e_inval;
+		if (copy_from_user(&greqs, optval, sizeof(greqs))) {
+			retv = -EFAULT;
+			break;
+		}
+		if (greqs.gsr_group.ss_family != AF_INET6 ||
+		    greqs.gsr_source.ss_family != AF_INET6) {
+			retv = -EADDRNOTAVAIL;
+			break;
+		}
+		if (optname == MCAST_BLOCK_SOURCE) {
+			omode = MCAST_EXCLUDE;
+			add = 1;
+		} else if (optname == MCAST_UNBLOCK_SOURCE) {
+			omode = MCAST_EXCLUDE;
+			add = 0;
+		} else if (optname == MCAST_JOIN_SOURCE_GROUP) {
+			struct sockaddr_in6 *psin6;
+
+			psin6 = (struct sockaddr_in6 *)&greqs.gsr_group;
+			retv = ipv6_sock_mc_join(sk, greqs.gsr_interface,
+				&psin6->sin6_addr);
+			if (retv)
+				break;
+			omode = MCAST_INCLUDE;
+			add = 1;
+		} else /*IP_DROP_SOURCE_MEMBERSHIP */ {
+			omode = MCAST_INCLUDE;
+			add = 0;
+		}
+		retv = ip6_mc_source(add, omode, sk, &greqs);
+		break;
+	}
+	case MCAST_MSFILTER:
+	{
+		extern int sysctl_optmem_max;
+		extern int sysctl_mld_max_msf;
+		struct group_filter *gsf;
+
+		if (optlen < GROUP_FILTER_SIZE(0))
+			goto e_inval;
+		if (optlen > sysctl_optmem_max) {
+			retv = -ENOBUFS;
+			break;
+		}
+		gsf = (struct group_filter *)kmalloc(optlen,GFP_KERNEL);
+		if (gsf == 0) {
+			retv = -ENOBUFS;
+			break;
+		}
+		retv = -EFAULT;
+		if (copy_from_user(gsf, optval, optlen)) {
+			kfree(gsf);
+			break;
+		}
+		/* numsrc >= (4G-140)/128 overflow in 32 bits */
+		if (gsf->gf_numsrc >= 0x1ffffffU ||
+		    gsf->gf_numsrc > sysctl_mld_max_msf) {
+			kfree(gsf);
+			retv = -ENOBUFS;
+			break;
+		}
+		if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) {
+			kfree(gsf);
+			retv = -EINVAL;
+			break;
+		}
+		retv = ip6_mc_msfilter(sk, gsf);
+		kfree(gsf);
+
+		break;
+	}
+	case IPV6_ROUTER_ALERT:
+		retv = ip6_ra_control(sk, val, NULL);
+		break;
+	case IPV6_MTU_DISCOVER:
+		if (val<0 || val>2)
+			goto e_inval;
+		np->pmtudisc = val;
+		retv = 0;
+		break;
+	case IPV6_MTU:
+		if (val && val < IPV6_MIN_MTU)
+			goto e_inval;
+		np->frag_size = val;
+		retv = 0;
+		break;
+	case IPV6_RECVERR:
+		np->recverr = valbool;
+		if (!val)
+			skb_queue_purge(&sk->sk_error_queue);
+		retv = 0;
+		break;
+	case IPV6_FLOWINFO_SEND:
+		np->sndflow = valbool;
+		retv = 0;
+		break;
+	case IPV6_FLOWLABEL_MGR:
+		retv = ipv6_flowlabel_opt(sk, optval, optlen);
+		break;
+	case IPV6_IPSEC_POLICY:
+	case IPV6_XFRM_POLICY:
+		retv = xfrm_user_policy(sk, optname, optval, optlen);
+		break;
+
+#ifdef CONFIG_NETFILTER
+	default:
+		retv = nf_setsockopt(sk, PF_INET6, optname, optval, 
+					    optlen);
+		break;
+#endif
+
+	}
+	release_sock(sk);
+
+out:
+	return retv;
+
+e_inval:
+	release_sock(sk);
+	return -EINVAL;
+}
+
+int ipv6_getsockopt(struct sock *sk, int level, int optname,
+		    char __user *optval, int __user *optlen)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	int len;
+	int val;
+
+	if (level == SOL_IP && sk->sk_type != SOCK_RAW)
+		return udp_prot.getsockopt(sk, level, optname, optval, optlen);
+	if(level!=SOL_IPV6)
+		return -ENOPROTOOPT;
+	if (get_user(len, optlen))
+		return -EFAULT;
+	switch (optname) {
+	case IPV6_ADDRFORM:
+		if (sk->sk_protocol != IPPROTO_UDP &&
+		    sk->sk_protocol != IPPROTO_TCP)
+			return -EINVAL;
+		if (sk->sk_state != TCP_ESTABLISHED)
+			return -ENOTCONN;
+		val = sk->sk_family;
+		break;
+	case MCAST_MSFILTER:
+	{
+		struct group_filter gsf;
+		int err;
+
+		if (len < GROUP_FILTER_SIZE(0))
+			return -EINVAL;
+		if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0)))
+			return -EFAULT;
+		lock_sock(sk);
+		err = ip6_mc_msfget(sk, &gsf,
+			(struct group_filter __user *)optval, optlen);
+		release_sock(sk);
+		return err;
+	}
+
+	case IPV6_PKTOPTIONS:
+	{
+		struct msghdr msg;
+		struct sk_buff *skb;
+
+		if (sk->sk_type != SOCK_STREAM)
+			return -ENOPROTOOPT;
+
+		msg.msg_control = optval;
+		msg.msg_controllen = len;
+		msg.msg_flags = 0;
+
+		lock_sock(sk);
+		skb = np->pktoptions;
+		if (skb)
+			atomic_inc(&skb->users);
+		release_sock(sk);
+
+		if (skb) {
+			int err = datagram_recv_ctl(sk, &msg, skb);
+			kfree_skb(skb);
+			if (err)
+				return err;
+		} else {
+			if (np->rxopt.bits.rxinfo) {
+				struct in6_pktinfo src_info;
+				src_info.ipi6_ifindex = np->mcast_oif;
+				ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr);
+				put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
+			}
+			if (np->rxopt.bits.rxhlim) {
+				int hlim = np->mcast_hops;
+				put_cmsg(&msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim);
+			}
+		}
+		len -= msg.msg_controllen;
+		return put_user(len, optlen);
+	}
+	case IPV6_MTU:
+	{
+		struct dst_entry *dst;
+		val = 0;	
+		lock_sock(sk);
+		dst = sk_dst_get(sk);
+		if (dst) {
+			val = dst_mtu(dst);
+			dst_release(dst);
+		}
+		release_sock(sk);
+		if (!val)
+			return -ENOTCONN;
+		break;
+	}
+
+	case IPV6_V6ONLY:
+		val = np->ipv6only;
+		break;
+
+	case IPV6_PKTINFO:
+		val = np->rxopt.bits.rxinfo;
+		break;
+
+	case IPV6_HOPLIMIT:
+		val = np->rxopt.bits.rxhlim;
+		break;
+
+	case IPV6_RTHDR:
+		val = np->rxopt.bits.srcrt;
+		break;
+
+	case IPV6_HOPOPTS:
+		val = np->rxopt.bits.hopopts;
+		break;
+
+	case IPV6_DSTOPTS:
+		val = np->rxopt.bits.dstopts;
+		break;
+
+	case IPV6_FLOWINFO:
+		val = np->rxopt.bits.rxflow;
+		break;
+
+	case IPV6_UNICAST_HOPS:
+		val = np->hop_limit;
+		break;
+
+	case IPV6_MULTICAST_HOPS:
+		val = np->mcast_hops;
+		break;
+
+	case IPV6_MULTICAST_LOOP:
+		val = np->mc_loop;
+		break;
+
+	case IPV6_MULTICAST_IF:
+		val = np->mcast_oif;
+		break;
+
+	case IPV6_MTU_DISCOVER:
+		val = np->pmtudisc;
+		break;
+
+	case IPV6_RECVERR:
+		val = np->recverr;
+		break;
+
+	case IPV6_FLOWINFO_SEND:
+		val = np->sndflow;
+		break;
+
+	default:
+#ifdef CONFIG_NETFILTER
+		lock_sock(sk);
+		val = nf_getsockopt(sk, PF_INET6, optname, optval, 
+				    &len);
+		release_sock(sk);
+		if (val >= 0)
+			val = put_user(len, optlen);
+		return val;
+#else
+		return -EINVAL;
+#endif
+	}
+	len = min_t(unsigned int, sizeof(int), len);
+	if(put_user(len, optlen))
+		return -EFAULT;
+	if(copy_to_user(optval,&val,len))
+		return -EFAULT;
+	return 0;
+}
+
+void __init ipv6_packet_init(void)
+{
+	dev_add_pack(&ipv6_packet_type);
+}
+
+void ipv6_packet_cleanup(void)
+{
+	dev_remove_pack(&ipv6_packet_type);
+}
diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c
new file mode 100644
index 00000000000..2f4c91ddc9a
--- /dev/null
+++ b/net/ipv6/ipv6_syms.c
@@ -0,0 +1,41 @@
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <net/protocol.h>
+#include <net/ipv6.h>
+#include <net/addrconf.h>
+#include <net/ip6_route.h>
+#include <net/xfrm.h>
+
+EXPORT_SYMBOL(ipv6_addr_type);
+EXPORT_SYMBOL(icmpv6_send);
+EXPORT_SYMBOL(icmpv6_statistics);
+EXPORT_SYMBOL(icmpv6_err_convert);
+EXPORT_SYMBOL(ndisc_mc_map);
+EXPORT_SYMBOL(register_inet6addr_notifier);
+EXPORT_SYMBOL(unregister_inet6addr_notifier);
+EXPORT_SYMBOL(ip6_route_output);
+#ifdef CONFIG_NETFILTER
+EXPORT_SYMBOL(ip6_route_me_harder);
+#endif
+EXPORT_SYMBOL(addrconf_lock);
+EXPORT_SYMBOL(ipv6_setsockopt);
+EXPORT_SYMBOL(ipv6_getsockopt);
+EXPORT_SYMBOL(inet6_register_protosw);
+EXPORT_SYMBOL(inet6_unregister_protosw);
+EXPORT_SYMBOL(inet6_add_protocol);
+EXPORT_SYMBOL(inet6_del_protocol);
+EXPORT_SYMBOL(ip6_xmit);
+EXPORT_SYMBOL(inet6_release);
+EXPORT_SYMBOL(inet6_bind);
+EXPORT_SYMBOL(inet6_getname);
+EXPORT_SYMBOL(inet6_ioctl);
+EXPORT_SYMBOL(ipv6_get_saddr);
+EXPORT_SYMBOL(ipv6_chk_addr);
+EXPORT_SYMBOL(in6_dev_finish_destroy);
+#ifdef CONFIG_XFRM
+EXPORT_SYMBOL(xfrm6_rcv);
+#endif
+EXPORT_SYMBOL(rt6_lookup);
+EXPORT_SYMBOL(fl6_sock_lookup);
+EXPORT_SYMBOL(ipv6_push_nfrag_opts);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
new file mode 100644
index 00000000000..393b6e6f50a
--- /dev/null
+++ b/net/ipv6/mcast.c
@@ -0,0 +1,2499 @@
+/*
+ *	Multicast support for IPv6
+ *	Linux INET6 implementation 
+ *
+ *	Authors:
+ *	Pedro Roque		<roque@di.fc.ul.pt>	
+ *
+ *	$Id: mcast.c,v 1.40 2002/02/08 03:57:19 davem Exp $
+ *
+ *	Based on linux/ipv4/igmp.c and linux/ipv4/ip_sockglue.c 
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+/* Changes:
+ *
+ *	yoshfuji	: fix format of router-alert option
+ *	YOSHIFUJI Hideaki @USAGI:
+ *		Fixed source address for MLD message based on
+ *		<draft-ietf-magma-mld-source-05.txt>.
+ *	YOSHIFUJI Hideaki @USAGI:
+ *		- Ignore Queries for invalid addresses.
+ *		- MLD for link-local addresses.
+ *	David L Stevens <dlstevens@us.ibm.com>:
+ *		- MLDv2 support
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/jiffies.h>
+#include <linux/times.h>
+#include <linux/net.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/route.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/if_inet6.h>
+#include <net/ndisc.h>
+#include <net/addrconf.h>
+#include <net/ip6_route.h>
+
+#include <net/ip6_checksum.h>
+
+/* Set to 3 to get tracing... */
+#define MCAST_DEBUG 2
+
+#if MCAST_DEBUG >= 3
+#define MDBG(x) printk x
+#else
+#define MDBG(x)
+#endif
+
+/*
+ *  These header formats should be in a separate include file, but icmpv6.h
+ *  doesn't have in6_addr defined in all cases, there is no __u128, and no
+ *  other files reference these.
+ *
+ *  			+-DLS 4/14/03
+ */
+
+/* Multicast Listener Discovery version 2 headers */
+
+struct mld2_grec {
+	__u8		grec_type;
+	__u8		grec_auxwords;
+	__u16		grec_nsrcs;
+	struct in6_addr	grec_mca;
+	struct in6_addr	grec_src[0];
+};
+
+struct mld2_report {
+	__u8	type;
+	__u8	resv1;
+	__u16	csum;
+	__u16	resv2;
+	__u16	ngrec;
+	struct mld2_grec grec[0];
+};
+
+struct mld2_query {
+	__u8 type;
+	__u8 code;
+	__u16 csum;
+	__u16 mrc;
+	__u16 resv1;
+	struct in6_addr mca;
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+	__u8 qrv:3,
+	     suppress:1,
+	     resv2:4;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+	__u8 resv2:4,
+	     suppress:1,
+	     qrv:3;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+	__u8 qqic;
+	__u16 nsrcs;
+	struct in6_addr srcs[0];
+};
+
+static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT;
+
+/* Big mc list lock for all the sockets */
+static DEFINE_RWLOCK(ipv6_sk_mc_lock);
+
+static struct socket *igmp6_socket;
+
+int __ipv6_dev_mc_dec(struct inet6_dev *idev, struct in6_addr *addr);
+
+static void igmp6_join_group(struct ifmcaddr6 *ma);
+static void igmp6_leave_group(struct ifmcaddr6 *ma);
+static void igmp6_timer_handler(unsigned long data);
+
+static void mld_gq_timer_expire(unsigned long data);
+static void mld_ifc_timer_expire(unsigned long data);
+static void mld_ifc_event(struct inet6_dev *idev);
+static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc);
+static void mld_del_delrec(struct inet6_dev *idev, struct in6_addr *addr);
+static void mld_clear_delrec(struct inet6_dev *idev);
+static int sf_setstate(struct ifmcaddr6 *pmc);
+static void sf_markstate(struct ifmcaddr6 *pmc);
+static void ip6_mc_clear_src(struct ifmcaddr6 *pmc);
+static int ip6_mc_del_src(struct inet6_dev *idev, struct in6_addr *pmca,
+			  int sfmode, int sfcount, struct in6_addr *psfsrc,
+			  int delta);
+static int ip6_mc_add_src(struct inet6_dev *idev, struct in6_addr *pmca,
+			  int sfmode, int sfcount, struct in6_addr *psfsrc,
+			  int delta);
+static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
+			    struct inet6_dev *idev);
+
+
+#define IGMP6_UNSOLICITED_IVAL	(10*HZ)
+#define MLD_QRV_DEFAULT		2
+
+#define MLD_V1_SEEN(idev) (ipv6_devconf.force_mld_version == 1 || \
+		(idev)->cnf.force_mld_version == 1 || \
+		((idev)->mc_v1_seen && \
+		time_before(jiffies, (idev)->mc_v1_seen)))
+
+#define MLDV2_MASK(value, nb) ((nb)>=32 ? (value) : ((1<<(nb))-1) & (value))
+#define MLDV2_EXP(thresh, nbmant, nbexp, value) \
+	((value) < (thresh) ? (value) : \
+	((MLDV2_MASK(value, nbmant) | (1<<(nbmant+nbexp))) << \
+	(MLDV2_MASK((value) >> (nbmant), nbexp) + (nbexp))))
+
+#define MLDV2_QQIC(value) MLDV2_EXP(0x80, 4, 3, value)
+#define MLDV2_MRC(value) MLDV2_EXP(0x8000, 12, 3, value)
+
+#define IPV6_MLD_MAX_MSF	10
+
+int sysctl_mld_max_msf = IPV6_MLD_MAX_MSF;
+
+/*
+ *	socket join on multicast group
+ */
+
+int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr)
+{
+	struct net_device *dev = NULL;
+	struct ipv6_mc_socklist *mc_lst;
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	int err;
+
+	if (!ipv6_addr_is_multicast(addr))
+		return -EINVAL;
+
+	mc_lst = sock_kmalloc(sk, sizeof(struct ipv6_mc_socklist), GFP_KERNEL);
+
+	if (mc_lst == NULL)
+		return -ENOMEM;
+
+	mc_lst->next = NULL;
+	ipv6_addr_copy(&mc_lst->addr, addr);
+
+	if (ifindex == 0) {
+		struct rt6_info *rt;
+		rt = rt6_lookup(addr, NULL, 0, 0);
+		if (rt) {
+			dev = rt->rt6i_dev;
+			dev_hold(dev);
+			dst_release(&rt->u.dst);
+		}
+	} else
+		dev = dev_get_by_index(ifindex);
+
+	if (dev == NULL) {
+		sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
+		return -ENODEV;
+	}
+
+	mc_lst->ifindex = dev->ifindex;
+	mc_lst->sfmode = MCAST_EXCLUDE;
+	mc_lst->sflist = NULL;
+
+	/*
+	 *	now add/increase the group membership on the device
+	 */
+
+	err = ipv6_dev_mc_inc(dev, addr);
+
+	if (err) {
+		sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
+		dev_put(dev);
+		return err;
+	}
+
+	write_lock_bh(&ipv6_sk_mc_lock);
+	mc_lst->next = np->ipv6_mc_list;
+	np->ipv6_mc_list = mc_lst;
+	write_unlock_bh(&ipv6_sk_mc_lock);
+
+	dev_put(dev);
+
+	return 0;
+}
+
+/*
+ *	socket leave on multicast group
+ */
+int ipv6_sock_mc_drop(struct sock *sk, int ifindex, struct in6_addr *addr)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct ipv6_mc_socklist *mc_lst, **lnk;
+
+	write_lock_bh(&ipv6_sk_mc_lock);
+	for (lnk = &np->ipv6_mc_list; (mc_lst = *lnk) !=NULL ; lnk = &mc_lst->next) {
+		if ((ifindex == 0 || mc_lst->ifindex == ifindex) &&
+		    ipv6_addr_equal(&mc_lst->addr, addr)) {
+			struct net_device *dev;
+
+			*lnk = mc_lst->next;
+			write_unlock_bh(&ipv6_sk_mc_lock);
+
+			if ((dev = dev_get_by_index(mc_lst->ifindex)) != NULL) {
+				struct inet6_dev *idev = in6_dev_get(dev);
+
+				if (idev) {
+					(void) ip6_mc_leave_src(sk,mc_lst,idev);
+					__ipv6_dev_mc_dec(idev, &mc_lst->addr);
+					in6_dev_put(idev);
+				}
+				dev_put(dev);
+			}
+			sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
+			return 0;
+		}
+	}
+	write_unlock_bh(&ipv6_sk_mc_lock);
+
+	return -ENOENT;
+}
+
+static struct inet6_dev *ip6_mc_find_dev(struct in6_addr *group, int ifindex)
+{
+	struct net_device *dev = NULL;
+	struct inet6_dev *idev = NULL;
+
+	if (ifindex == 0) {
+		struct rt6_info *rt;
+
+		rt = rt6_lookup(group, NULL, 0, 0);
+		if (rt) {
+			dev = rt->rt6i_dev;
+			dev_hold(dev);
+			dst_release(&rt->u.dst);
+		}
+	} else
+		dev = dev_get_by_index(ifindex);
+
+	if (!dev)
+		return NULL;
+	idev = in6_dev_get(dev);
+	if (!idev) {
+		dev_put(dev);
+		return NULL;
+	}
+	read_lock_bh(&idev->lock);
+	if (idev->dead) {
+		read_unlock_bh(&idev->lock);
+		in6_dev_put(idev);
+		dev_put(dev);
+		return NULL;
+	}
+	return idev;
+}
+
+void ipv6_sock_mc_close(struct sock *sk)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct ipv6_mc_socklist *mc_lst;
+
+	write_lock_bh(&ipv6_sk_mc_lock);
+	while ((mc_lst = np->ipv6_mc_list) != NULL) {
+		struct net_device *dev;
+
+		np->ipv6_mc_list = mc_lst->next;
+		write_unlock_bh(&ipv6_sk_mc_lock);
+
+		dev = dev_get_by_index(mc_lst->ifindex);
+		if (dev) {
+			struct inet6_dev *idev = in6_dev_get(dev);
+
+			if (idev) {
+				(void) ip6_mc_leave_src(sk, mc_lst, idev);
+				__ipv6_dev_mc_dec(idev, &mc_lst->addr);
+				in6_dev_put(idev);
+			}
+			dev_put(dev);
+		}
+
+		sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
+
+		write_lock_bh(&ipv6_sk_mc_lock);
+	}
+	write_unlock_bh(&ipv6_sk_mc_lock);
+}
+
+int ip6_mc_source(int add, int omode, struct sock *sk,
+	struct group_source_req *pgsr)
+{
+	struct in6_addr *source, *group;
+	struct ipv6_mc_socklist *pmc;
+	struct net_device *dev;
+	struct inet6_dev *idev;
+	struct ipv6_pinfo *inet6 = inet6_sk(sk);
+	struct ip6_sf_socklist *psl;
+	int i, j, rv;
+	int err;
+
+	if (pgsr->gsr_group.ss_family != AF_INET6 ||
+	    pgsr->gsr_source.ss_family != AF_INET6)
+		return -EINVAL;
+
+	source = &((struct sockaddr_in6 *)&pgsr->gsr_source)->sin6_addr;
+	group = &((struct sockaddr_in6 *)&pgsr->gsr_group)->sin6_addr;
+
+	if (!ipv6_addr_is_multicast(group))
+		return -EINVAL;
+
+	idev = ip6_mc_find_dev(group, pgsr->gsr_interface);
+	if (!idev)
+		return -ENODEV;
+	dev = idev->dev;
+
+	err = -EADDRNOTAVAIL;
+
+	for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) {
+		if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface)
+			continue;
+		if (ipv6_addr_equal(&pmc->addr, group))
+			break;
+	}
+	if (!pmc)		/* must have a prior join */
+		goto done;
+	/* if a source filter was set, must be the same mode as before */
+	if (pmc->sflist) {
+		if (pmc->sfmode != omode)
+			goto done;
+	} else if (pmc->sfmode != omode) {
+		/* allow mode switches for empty-set filters */
+		ip6_mc_add_src(idev, group, omode, 0, NULL, 0);
+		ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0);
+		pmc->sfmode = omode;
+	}
+
+	psl = pmc->sflist;
+	if (!add) {
+		if (!psl)
+			goto done;
+		rv = !0;
+		for (i=0; i<psl->sl_count; i++) {
+			rv = memcmp(&psl->sl_addr[i], source,
+				sizeof(struct in6_addr));
+			if (rv == 0)
+				break;
+		}
+		if (rv)		/* source not found */
+			goto done;
+
+		/* update the interface filter */
+		ip6_mc_del_src(idev, group, omode, 1, source, 1);
+
+		for (j=i+1; j<psl->sl_count; j++)
+			psl->sl_addr[j-1] = psl->sl_addr[j];
+		psl->sl_count--;
+		err = 0;
+		goto done;
+	}
+	/* else, add a new source to the filter */
+
+	if (psl && psl->sl_count >= sysctl_mld_max_msf) {
+		err = -ENOBUFS;
+		goto done;
+	}
+	if (!psl || psl->sl_count == psl->sl_max) {
+		struct ip6_sf_socklist *newpsl;
+		int count = IP6_SFBLOCK;
+
+		if (psl)
+			count += psl->sl_max;
+		newpsl = (struct ip6_sf_socklist *)sock_kmalloc(sk,
+			IP6_SFLSIZE(count), GFP_ATOMIC);
+		if (!newpsl) {
+			err = -ENOBUFS;
+			goto done;
+		}
+		newpsl->sl_max = count;
+		newpsl->sl_count = count - IP6_SFBLOCK;
+		if (psl) {
+			for (i=0; i<psl->sl_count; i++)
+				newpsl->sl_addr[i] = psl->sl_addr[i];
+			sock_kfree_s(sk, psl, IP6_SFLSIZE(psl->sl_max));
+		}
+		pmc->sflist = psl = newpsl;
+	}
+	rv = 1;	/* > 0 for insert logic below if sl_count is 0 */
+	for (i=0; i<psl->sl_count; i++) {
+		rv = memcmp(&psl->sl_addr[i], source, sizeof(struct in6_addr));
+		if (rv == 0)
+			break;
+	}
+	if (rv == 0)		/* address already there is an error */
+		goto done;
+	for (j=psl->sl_count-1; j>=i; j--)
+		psl->sl_addr[j+1] = psl->sl_addr[j];
+	psl->sl_addr[i] = *source;
+	psl->sl_count++;
+	err = 0;
+	/* update the interface list */
+	ip6_mc_add_src(idev, group, omode, 1, source, 1);
+done:
+	read_unlock_bh(&idev->lock);
+	in6_dev_put(idev);
+	dev_put(dev);
+	return err;
+}
+
+int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
+{
+	struct in6_addr *group;
+	struct ipv6_mc_socklist *pmc;
+	struct net_device *dev;
+	struct inet6_dev *idev;
+	struct ipv6_pinfo *inet6 = inet6_sk(sk);
+	struct ip6_sf_socklist *newpsl, *psl;
+	int i, err;
+
+	group = &((struct sockaddr_in6 *)&gsf->gf_group)->sin6_addr;
+
+	if (!ipv6_addr_is_multicast(group))
+		return -EINVAL;
+	if (gsf->gf_fmode != MCAST_INCLUDE &&
+	    gsf->gf_fmode != MCAST_EXCLUDE)
+		return -EINVAL;
+
+	idev = ip6_mc_find_dev(group, gsf->gf_interface);
+
+	if (!idev)
+		return -ENODEV;
+	dev = idev->dev;
+	err = -EADDRNOTAVAIL;
+
+	for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) {
+		if (pmc->ifindex != gsf->gf_interface)
+			continue;
+		if (ipv6_addr_equal(&pmc->addr, group))
+			break;
+	}
+	if (!pmc)		/* must have a prior join */
+		goto done;
+	if (gsf->gf_numsrc) {
+		newpsl = (struct ip6_sf_socklist *)sock_kmalloc(sk,
+				IP6_SFLSIZE(gsf->gf_numsrc), GFP_ATOMIC);
+		if (!newpsl) {
+			err = -ENOBUFS;
+			goto done;
+		}
+		newpsl->sl_max = newpsl->sl_count = gsf->gf_numsrc;
+		for (i=0; i<newpsl->sl_count; ++i) {
+			struct sockaddr_in6 *psin6;
+
+			psin6 = (struct sockaddr_in6 *)&gsf->gf_slist[i];
+			newpsl->sl_addr[i] = psin6->sin6_addr;
+		}
+		err = ip6_mc_add_src(idev, group, gsf->gf_fmode,
+			newpsl->sl_count, newpsl->sl_addr, 0);
+		if (err) {
+			sock_kfree_s(sk, newpsl, IP6_SFLSIZE(newpsl->sl_max));
+			goto done;
+		}
+	} else
+		newpsl = NULL;
+	psl = pmc->sflist;
+	if (psl) {
+		(void) ip6_mc_del_src(idev, group, pmc->sfmode,
+			psl->sl_count, psl->sl_addr, 0);
+		sock_kfree_s(sk, psl, IP6_SFLSIZE(psl->sl_max));
+	} else
+		(void) ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0);
+	pmc->sflist = newpsl;
+	pmc->sfmode = gsf->gf_fmode;
+done:
+	read_unlock_bh(&idev->lock);
+	in6_dev_put(idev);
+	dev_put(dev);
+	return err;
+}
+
+int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
+	struct group_filter __user *optval, int __user *optlen)
+{
+	int err, i, count, copycount;
+	struct in6_addr *group;
+	struct ipv6_mc_socklist *pmc;
+	struct inet6_dev *idev;
+	struct net_device *dev;
+	struct ipv6_pinfo *inet6 = inet6_sk(sk);
+	struct ip6_sf_socklist *psl;
+
+	group = &((struct sockaddr_in6 *)&gsf->gf_group)->sin6_addr;
+
+	if (!ipv6_addr_is_multicast(group))
+		return -EINVAL;
+
+	idev = ip6_mc_find_dev(group, gsf->gf_interface);
+
+	if (!idev)
+		return -ENODEV;
+
+	dev = idev->dev;
+
+	err = -EADDRNOTAVAIL;
+
+	for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) {
+		if (pmc->ifindex != gsf->gf_interface)
+			continue;
+		if (ipv6_addr_equal(group, &pmc->addr))
+			break;
+	}
+	if (!pmc)		/* must have a prior join */
+		goto done;
+	gsf->gf_fmode = pmc->sfmode;
+	psl = pmc->sflist;
+	count = psl ? psl->sl_count : 0;
+	read_unlock_bh(&idev->lock);
+	in6_dev_put(idev);
+	dev_put(dev);
+
+	copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
+	gsf->gf_numsrc = count;
+	if (put_user(GROUP_FILTER_SIZE(copycount), optlen) ||
+	    copy_to_user(optval, gsf, GROUP_FILTER_SIZE(0))) {
+		return -EFAULT;
+	}
+	for (i=0; i<copycount; i++) {
+		struct sockaddr_in6 *psin6;
+		struct sockaddr_storage ss;
+
+		psin6 = (struct sockaddr_in6 *)&ss;
+		memset(&ss, 0, sizeof(ss));
+		psin6->sin6_family = AF_INET6;
+		psin6->sin6_addr = psl->sl_addr[i];
+	    	if (copy_to_user(&optval->gf_slist[i], &ss, sizeof(ss)))
+			return -EFAULT;
+	}
+	return 0;
+done:
+	read_unlock_bh(&idev->lock);
+	in6_dev_put(idev);
+	dev_put(dev);
+	return err;
+}
+
+int inet6_mc_check(struct sock *sk, struct in6_addr *mc_addr,
+	struct in6_addr *src_addr)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct ipv6_mc_socklist *mc;
+	struct ip6_sf_socklist *psl;
+	int rv = 1;
+
+	read_lock(&ipv6_sk_mc_lock);
+	for (mc = np->ipv6_mc_list; mc; mc = mc->next) {
+		if (ipv6_addr_equal(&mc->addr, mc_addr))
+			break;
+	}
+	if (!mc) {
+		read_unlock(&ipv6_sk_mc_lock);
+		return 1;
+	}
+	psl = mc->sflist;
+	if (!psl) {
+		rv = mc->sfmode == MCAST_EXCLUDE;
+	} else {
+		int i;
+
+		for (i=0; i<psl->sl_count; i++) {
+			if (ipv6_addr_equal(&psl->sl_addr[i], src_addr))
+				break;
+		}
+		if (mc->sfmode == MCAST_INCLUDE && i >= psl->sl_count)
+			rv = 0;
+		if (mc->sfmode == MCAST_EXCLUDE && i < psl->sl_count)
+			rv = 0;
+	}
+	read_unlock(&ipv6_sk_mc_lock);
+
+	return rv;
+}
+
+static void ma_put(struct ifmcaddr6 *mc)
+{
+	if (atomic_dec_and_test(&mc->mca_refcnt)) {
+		in6_dev_put(mc->idev);
+		kfree(mc);
+	}
+}
+
+static void igmp6_group_added(struct ifmcaddr6 *mc)
+{
+	struct net_device *dev = mc->idev->dev;
+	char buf[MAX_ADDR_LEN];
+
+	spin_lock_bh(&mc->mca_lock);
+	if (!(mc->mca_flags&MAF_LOADED)) {
+		mc->mca_flags |= MAF_LOADED;
+		if (ndisc_mc_map(&mc->mca_addr, buf, dev, 0) == 0)
+			dev_mc_add(dev, buf, dev->addr_len, 0);
+	}
+	spin_unlock_bh(&mc->mca_lock);
+
+	if (!(dev->flags & IFF_UP) || (mc->mca_flags & MAF_NOREPORT))
+		return;
+
+	if (MLD_V1_SEEN(mc->idev)) {
+		igmp6_join_group(mc);
+		return;
+	}
+	/* else v2 */
+
+	mc->mca_crcount = mc->idev->mc_qrv;
+	mld_ifc_event(mc->idev);
+}
+
+static void igmp6_group_dropped(struct ifmcaddr6 *mc)
+{
+	struct net_device *dev = mc->idev->dev;
+	char buf[MAX_ADDR_LEN];
+
+	spin_lock_bh(&mc->mca_lock);
+	if (mc->mca_flags&MAF_LOADED) {
+		mc->mca_flags &= ~MAF_LOADED;
+		if (ndisc_mc_map(&mc->mca_addr, buf, dev, 0) == 0)
+			dev_mc_delete(dev, buf, dev->addr_len, 0);
+	}
+
+	if (mc->mca_flags & MAF_NOREPORT)
+		goto done;
+	spin_unlock_bh(&mc->mca_lock);
+
+	if (!mc->idev->dead)
+		igmp6_leave_group(mc);
+
+	spin_lock_bh(&mc->mca_lock);
+	if (del_timer(&mc->mca_timer))
+		atomic_dec(&mc->mca_refcnt);
+done:
+	ip6_mc_clear_src(mc);
+	spin_unlock_bh(&mc->mca_lock);
+}
+
+/*
+ * deleted ifmcaddr6 manipulation
+ */
+static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
+{
+	struct ifmcaddr6 *pmc;
+
+	/* this is an "ifmcaddr6" for convenience; only the fields below
+	 * are actually used. In particular, the refcnt and users are not
+	 * used for management of the delete list. Using the same structure
+	 * for deleted items allows change reports to use common code with
+	 * non-deleted or query-response MCA's.
+	 */
+	pmc = (struct ifmcaddr6 *)kmalloc(sizeof(*pmc), GFP_ATOMIC);
+	if (!pmc)
+		return;
+	memset(pmc, 0, sizeof(*pmc));
+	spin_lock_bh(&im->mca_lock);
+	spin_lock_init(&pmc->mca_lock);
+	pmc->idev = im->idev;
+	in6_dev_hold(idev);
+	pmc->mca_addr = im->mca_addr;
+	pmc->mca_crcount = idev->mc_qrv;
+	pmc->mca_sfmode = im->mca_sfmode;
+	if (pmc->mca_sfmode == MCAST_INCLUDE) {
+		struct ip6_sf_list *psf;
+
+		pmc->mca_tomb = im->mca_tomb;
+		pmc->mca_sources = im->mca_sources;
+		im->mca_tomb = im->mca_sources = NULL;
+		for (psf=pmc->mca_sources; psf; psf=psf->sf_next)
+			psf->sf_crcount = pmc->mca_crcount;
+	}
+	spin_unlock_bh(&im->mca_lock);
+
+	write_lock_bh(&idev->mc_lock);
+	pmc->next = idev->mc_tomb;
+	idev->mc_tomb = pmc;
+	write_unlock_bh(&idev->mc_lock);
+}
+
+static void mld_del_delrec(struct inet6_dev *idev, struct in6_addr *pmca)
+{
+	struct ifmcaddr6 *pmc, *pmc_prev;
+	struct ip6_sf_list *psf, *psf_next;
+
+	write_lock_bh(&idev->mc_lock);
+	pmc_prev = NULL;
+	for (pmc=idev->mc_tomb; pmc; pmc=pmc->next) {
+		if (ipv6_addr_equal(&pmc->mca_addr, pmca))
+			break;
+		pmc_prev = pmc;
+	}
+	if (pmc) {
+		if (pmc_prev)
+			pmc_prev->next = pmc->next;
+		else
+			idev->mc_tomb = pmc->next;
+	}
+	write_unlock_bh(&idev->mc_lock);
+	if (pmc) {
+		for (psf=pmc->mca_tomb; psf; psf=psf_next) {
+			psf_next = psf->sf_next;
+			kfree(psf);
+		}
+		in6_dev_put(pmc->idev);
+		kfree(pmc);
+	}
+}
+
+static void mld_clear_delrec(struct inet6_dev *idev)
+{
+	struct ifmcaddr6 *pmc, *nextpmc;
+
+	write_lock_bh(&idev->mc_lock);
+	pmc = idev->mc_tomb;
+	idev->mc_tomb = NULL;
+	write_unlock_bh(&idev->mc_lock);
+
+	for (; pmc; pmc = nextpmc) {
+		nextpmc = pmc->next;
+		ip6_mc_clear_src(pmc);
+		in6_dev_put(pmc->idev);
+		kfree(pmc);
+	}
+
+	/* clear dead sources, too */
+	read_lock_bh(&idev->lock);
+	for (pmc=idev->mc_list; pmc; pmc=pmc->next) {
+		struct ip6_sf_list *psf, *psf_next;
+
+		spin_lock_bh(&pmc->mca_lock);
+		psf = pmc->mca_tomb;
+		pmc->mca_tomb = NULL;
+		spin_unlock_bh(&pmc->mca_lock);
+		for (; psf; psf=psf_next) {
+			psf_next = psf->sf_next;
+			kfree(psf);
+		}
+	}
+	read_unlock_bh(&idev->lock);
+}
+
+
+/*
+ *	device multicast group inc (add if not found)
+ */
+int ipv6_dev_mc_inc(struct net_device *dev, struct in6_addr *addr)
+{
+	struct ifmcaddr6 *mc;
+	struct inet6_dev *idev;
+
+	idev = in6_dev_get(dev);
+
+	if (idev == NULL)
+		return -EINVAL;
+
+	write_lock_bh(&idev->lock);
+	if (idev->dead) {
+		write_unlock_bh(&idev->lock);
+		in6_dev_put(idev);
+		return -ENODEV;
+	}
+
+	for (mc = idev->mc_list; mc; mc = mc->next) {
+		if (ipv6_addr_equal(&mc->mca_addr, addr)) {
+			mc->mca_users++;
+			write_unlock_bh(&idev->lock);
+			ip6_mc_add_src(idev, &mc->mca_addr, MCAST_EXCLUDE, 0,
+				NULL, 0);
+			in6_dev_put(idev);
+			return 0;
+		}
+	}
+
+	/*
+	 *	not found: create a new one.
+	 */
+
+	mc = kmalloc(sizeof(struct ifmcaddr6), GFP_ATOMIC);
+
+	if (mc == NULL) {
+		write_unlock_bh(&idev->lock);
+		in6_dev_put(idev);
+		return -ENOMEM;
+	}
+
+	memset(mc, 0, sizeof(struct ifmcaddr6));
+	init_timer(&mc->mca_timer);
+	mc->mca_timer.function = igmp6_timer_handler;
+	mc->mca_timer.data = (unsigned long) mc;
+
+	ipv6_addr_copy(&mc->mca_addr, addr);
+	mc->idev = idev;
+	mc->mca_users = 1;
+	/* mca_stamp should be updated upon changes */
+	mc->mca_cstamp = mc->mca_tstamp = jiffies;
+	atomic_set(&mc->mca_refcnt, 2);
+	spin_lock_init(&mc->mca_lock);
+
+	/* initial mode is (EX, empty) */
+	mc->mca_sfmode = MCAST_EXCLUDE;
+	mc->mca_sfcount[MCAST_EXCLUDE] = 1;
+
+	if (ipv6_addr_is_ll_all_nodes(&mc->mca_addr) ||
+	    IPV6_ADDR_MC_SCOPE(&mc->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL)
+		mc->mca_flags |= MAF_NOREPORT;
+
+	mc->next = idev->mc_list;
+	idev->mc_list = mc;
+	write_unlock_bh(&idev->lock);
+
+	mld_del_delrec(idev, &mc->mca_addr);
+	igmp6_group_added(mc);
+	ma_put(mc);
+	return 0;
+}
+
+/*
+ *	device multicast group del
+ */
+int __ipv6_dev_mc_dec(struct inet6_dev *idev, struct in6_addr *addr)
+{
+	struct ifmcaddr6 *ma, **map;
+
+	write_lock_bh(&idev->lock);
+	for (map = &idev->mc_list; (ma=*map) != NULL; map = &ma->next) {
+		if (ipv6_addr_equal(&ma->mca_addr, addr)) {
+			if (--ma->mca_users == 0) {
+				*map = ma->next;
+				write_unlock_bh(&idev->lock);
+
+				igmp6_group_dropped(ma);
+
+				ma_put(ma);
+				return 0;
+			}
+			write_unlock_bh(&idev->lock);
+			return 0;
+		}
+	}
+	write_unlock_bh(&idev->lock);
+
+	return -ENOENT;
+}
+
+int ipv6_dev_mc_dec(struct net_device *dev, struct in6_addr *addr)
+{
+	struct inet6_dev *idev = in6_dev_get(dev);
+	int err;
+
+	if (!idev)
+		return -ENODEV;
+
+	err = __ipv6_dev_mc_dec(idev, addr);
+
+	in6_dev_put(idev);
+
+	return err;
+}
+
+/*
+ * identify MLD packets for MLD filter exceptions
+ */
+int ipv6_is_mld(struct sk_buff *skb, int nexthdr)
+{
+	struct icmp6hdr *pic;
+
+	if (nexthdr != IPPROTO_ICMPV6)
+		return 0;
+
+	if (!pskb_may_pull(skb, sizeof(struct icmp6hdr)))
+		return 0;
+
+	pic = (struct icmp6hdr *)skb->h.raw;
+
+	switch (pic->icmp6_type) {
+	case ICMPV6_MGM_QUERY:
+	case ICMPV6_MGM_REPORT:
+	case ICMPV6_MGM_REDUCTION:
+	case ICMPV6_MLD2_REPORT:
+		return 1;
+	default:
+		break;
+	}
+	return 0;
+}
+
+/*
+ *	check if the interface/address pair is valid
+ */
+int ipv6_chk_mcast_addr(struct net_device *dev, struct in6_addr *group,
+	struct in6_addr *src_addr)
+{
+	struct inet6_dev *idev;
+	struct ifmcaddr6 *mc;
+	int rv = 0;
+
+	idev = in6_dev_get(dev);
+	if (idev) {
+		read_lock_bh(&idev->lock);
+		for (mc = idev->mc_list; mc; mc=mc->next) {
+			if (ipv6_addr_equal(&mc->mca_addr, group))
+				break;
+		}
+		if (mc) {
+			if (src_addr && !ipv6_addr_any(src_addr)) {
+				struct ip6_sf_list *psf;
+
+				spin_lock_bh(&mc->mca_lock);
+				for (psf=mc->mca_sources;psf;psf=psf->sf_next) {
+					if (ipv6_addr_equal(&psf->sf_addr, src_addr))
+						break;
+				}
+				if (psf)
+					rv = psf->sf_count[MCAST_INCLUDE] ||
+						psf->sf_count[MCAST_EXCLUDE] !=
+						mc->mca_sfcount[MCAST_EXCLUDE];
+				else
+					rv = mc->mca_sfcount[MCAST_EXCLUDE] !=0;
+				spin_unlock_bh(&mc->mca_lock);
+			} else
+				rv = 1; /* don't filter unspecified source */
+		}
+		read_unlock_bh(&idev->lock);
+		in6_dev_put(idev);
+	}
+	return rv;
+}
+
+static void mld_gq_start_timer(struct inet6_dev *idev)
+{
+	int tv = net_random() % idev->mc_maxdelay;
+
+	idev->mc_gq_running = 1;
+	if (!mod_timer(&idev->mc_gq_timer, jiffies+tv+2))
+		in6_dev_hold(idev);
+}
+
+static void mld_ifc_start_timer(struct inet6_dev *idev, int delay)
+{
+	int tv = net_random() % delay;
+
+	if (!mod_timer(&idev->mc_ifc_timer, jiffies+tv+2))
+		in6_dev_hold(idev);
+}
+
+/*
+ *	IGMP handling (alias multicast ICMPv6 messages)
+ */
+
+static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime)
+{
+	unsigned long delay = resptime;
+
+	/* Do not start timer for these addresses */
+	if (ipv6_addr_is_ll_all_nodes(&ma->mca_addr) ||
+	    IPV6_ADDR_MC_SCOPE(&ma->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL)
+		return;
+
+	if (del_timer(&ma->mca_timer)) {
+		atomic_dec(&ma->mca_refcnt);
+		delay = ma->mca_timer.expires - jiffies;
+	}
+
+	if (delay >= resptime) {
+		if (resptime)
+			delay = net_random() % resptime;
+		else
+			delay = 1;
+	}
+	ma->mca_timer.expires = jiffies + delay;
+	if (!mod_timer(&ma->mca_timer, jiffies + delay))
+		atomic_inc(&ma->mca_refcnt);
+	ma->mca_flags |= MAF_TIMER_RUNNING;
+}
+
+static void mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
+	struct in6_addr *srcs)
+{
+	struct ip6_sf_list *psf;
+	int i, scount;
+
+	scount = 0;
+	for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
+		if (scount == nsrcs)
+			break;
+		for (i=0; i<nsrcs; i++)
+			if (ipv6_addr_equal(&srcs[i], &psf->sf_addr)) {
+				psf->sf_gsresp = 1;
+				scount++;
+				break;
+			}
+	}
+}
+
+int igmp6_event_query(struct sk_buff *skb)
+{
+	struct mld2_query *mlh2 = (struct mld2_query *) skb->h.raw;
+	struct ifmcaddr6 *ma;
+	struct in6_addr *group;
+	unsigned long max_delay;
+	struct inet6_dev *idev;
+	struct icmp6hdr *hdr;
+	int group_type;
+	int mark = 0;
+	int len;
+
+	if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
+		return -EINVAL;
+
+	/* compute payload length excluding extension headers */
+	len = ntohs(skb->nh.ipv6h->payload_len) + sizeof(struct ipv6hdr);
+	len -= (char *)skb->h.raw - (char *)skb->nh.ipv6h; 
+
+	/* Drop queries with not link local source */
+	if (!(ipv6_addr_type(&skb->nh.ipv6h->saddr)&IPV6_ADDR_LINKLOCAL))
+		return -EINVAL;
+
+	idev = in6_dev_get(skb->dev);
+
+	if (idev == NULL)
+		return 0;
+
+	hdr = (struct icmp6hdr *) skb->h.raw;
+	group = (struct in6_addr *) (hdr + 1);
+	group_type = ipv6_addr_type(group);
+
+	if (group_type != IPV6_ADDR_ANY &&
+	    !(group_type&IPV6_ADDR_MULTICAST)) {
+		in6_dev_put(idev);
+		return -EINVAL;
+	}
+
+	if (len == 24) {
+		int switchback;
+		/* MLDv1 router present */
+
+		/* Translate milliseconds to jiffies */
+		max_delay = (ntohs(hdr->icmp6_maxdelay)*HZ)/1000;
+
+		switchback = (idev->mc_qrv + 1) * max_delay;
+		idev->mc_v1_seen = jiffies + switchback;
+
+		/* cancel the interface change timer */
+		idev->mc_ifc_count = 0;
+		if (del_timer(&idev->mc_ifc_timer))
+			__in6_dev_put(idev);
+		/* clear deleted report items */
+		mld_clear_delrec(idev);
+	} else if (len >= 28) {
+		max_delay = (MLDV2_MRC(ntohs(mlh2->mrc))*HZ)/1000;
+		if (!max_delay)
+			max_delay = 1;
+		idev->mc_maxdelay = max_delay;
+		if (mlh2->qrv)
+			idev->mc_qrv = mlh2->qrv;
+		if (group_type == IPV6_ADDR_ANY) { /* general query */
+			if (mlh2->nsrcs) {
+				in6_dev_put(idev);
+				return -EINVAL; /* no sources allowed */
+			}
+			mld_gq_start_timer(idev);
+			in6_dev_put(idev);
+			return 0;
+		}
+		/* mark sources to include, if group & source-specific */
+		mark = mlh2->nsrcs != 0;
+	} else {
+		in6_dev_put(idev);
+		return -EINVAL;
+	}
+
+	read_lock_bh(&idev->lock);
+	if (group_type == IPV6_ADDR_ANY) {
+		for (ma = idev->mc_list; ma; ma=ma->next) {
+			spin_lock_bh(&ma->mca_lock);
+			igmp6_group_queried(ma, max_delay);
+			spin_unlock_bh(&ma->mca_lock);
+		}
+	} else {
+		for (ma = idev->mc_list; ma; ma=ma->next) {
+			if (group_type != IPV6_ADDR_ANY &&
+			    !ipv6_addr_equal(group, &ma->mca_addr))
+				continue;
+			spin_lock_bh(&ma->mca_lock);
+			if (ma->mca_flags & MAF_TIMER_RUNNING) {
+				/* gsquery <- gsquery && mark */
+				if (!mark)
+					ma->mca_flags &= ~MAF_GSQUERY;
+			} else {
+				/* gsquery <- mark */
+				if (mark)
+					ma->mca_flags |= MAF_GSQUERY;
+				else
+					ma->mca_flags &= ~MAF_GSQUERY;
+			}
+			if (ma->mca_flags & MAF_GSQUERY)
+				mld_marksources(ma, ntohs(mlh2->nsrcs),
+					mlh2->srcs);
+			igmp6_group_queried(ma, max_delay);
+			spin_unlock_bh(&ma->mca_lock);
+			if (group_type != IPV6_ADDR_ANY)
+				break;
+		}
+	}
+	read_unlock_bh(&idev->lock);
+	in6_dev_put(idev);
+
+	return 0;
+}
+
+
+int igmp6_event_report(struct sk_buff *skb)
+{
+	struct ifmcaddr6 *ma;
+	struct in6_addr *addrp;
+	struct inet6_dev *idev;
+	struct icmp6hdr *hdr;
+	int addr_type;
+
+	/* Our own report looped back. Ignore it. */
+	if (skb->pkt_type == PACKET_LOOPBACK)
+		return 0;
+
+	if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
+		return -EINVAL;
+
+	hdr = (struct icmp6hdr*) skb->h.raw;
+
+	/* Drop reports with not link local source */
+	addr_type = ipv6_addr_type(&skb->nh.ipv6h->saddr);
+	if (addr_type != IPV6_ADDR_ANY && 
+	    !(addr_type&IPV6_ADDR_LINKLOCAL))
+		return -EINVAL;
+
+	addrp = (struct in6_addr *) (hdr + 1);
+
+	idev = in6_dev_get(skb->dev);
+	if (idev == NULL)
+		return -ENODEV;
+
+	/*
+	 *	Cancel the timer for this group
+	 */
+
+	read_lock_bh(&idev->lock);
+	for (ma = idev->mc_list; ma; ma=ma->next) {
+		if (ipv6_addr_equal(&ma->mca_addr, addrp)) {
+			spin_lock(&ma->mca_lock);
+			if (del_timer(&ma->mca_timer))
+				atomic_dec(&ma->mca_refcnt);
+			ma->mca_flags &= ~(MAF_LAST_REPORTER|MAF_TIMER_RUNNING);
+			spin_unlock(&ma->mca_lock);
+			break;
+		}
+	}
+	read_unlock_bh(&idev->lock);
+	in6_dev_put(idev);
+	return 0;
+}
+
+static int is_in(struct ifmcaddr6 *pmc, struct ip6_sf_list *psf, int type,
+	int gdeleted, int sdeleted)
+{
+	switch (type) {
+	case MLD2_MODE_IS_INCLUDE:
+	case MLD2_MODE_IS_EXCLUDE:
+		if (gdeleted || sdeleted)
+			return 0;
+		return !((pmc->mca_flags & MAF_GSQUERY) && !psf->sf_gsresp);
+	case MLD2_CHANGE_TO_INCLUDE:
+		if (gdeleted || sdeleted)
+			return 0;
+		return psf->sf_count[MCAST_INCLUDE] != 0;
+	case MLD2_CHANGE_TO_EXCLUDE:
+		if (gdeleted || sdeleted)
+			return 0;
+		if (pmc->mca_sfcount[MCAST_EXCLUDE] == 0 ||
+		    psf->sf_count[MCAST_INCLUDE])
+			return 0;
+		return pmc->mca_sfcount[MCAST_EXCLUDE] ==
+			psf->sf_count[MCAST_EXCLUDE];
+	case MLD2_ALLOW_NEW_SOURCES:
+		if (gdeleted || !psf->sf_crcount)
+			return 0;
+		return (pmc->mca_sfmode == MCAST_INCLUDE) ^ sdeleted;
+	case MLD2_BLOCK_OLD_SOURCES:
+		if (pmc->mca_sfmode == MCAST_INCLUDE)
+			return gdeleted || (psf->sf_crcount && sdeleted);
+		return psf->sf_crcount && !gdeleted && !sdeleted;
+	}
+	return 0;
+}
+
+static int
+mld_scount(struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted)
+{
+	struct ip6_sf_list *psf;
+	int scount = 0;
+
+	for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
+		if (!is_in(pmc, psf, type, gdeleted, sdeleted))
+			continue;
+		scount++;
+	}
+	return scount;
+}
+
+static struct sk_buff *mld_newpack(struct net_device *dev, int size)
+{
+	struct sock *sk = igmp6_socket->sk;
+	struct sk_buff *skb;
+	struct mld2_report *pmr;
+	struct in6_addr addr_buf;
+	int err;
+	u8 ra[8] = { IPPROTO_ICMPV6, 0,
+		     IPV6_TLV_ROUTERALERT, 2, 0, 0,
+		     IPV6_TLV_PADN, 0 };
+
+	/* we assume size > sizeof(ra) here */
+	skb = sock_alloc_send_skb(sk, size + LL_RESERVED_SPACE(dev), 1, &err);
+
+	if (skb == 0)
+		return NULL;
+
+	skb_reserve(skb, LL_RESERVED_SPACE(dev));
+	if (dev->hard_header) {
+		unsigned char ha[MAX_ADDR_LEN];
+
+		ndisc_mc_map(&mld2_all_mcr, ha, dev, 1);
+		if (dev->hard_header(skb, dev, ETH_P_IPV6,ha,NULL,size) < 0) {
+			kfree_skb(skb);
+			return NULL;
+		}
+	}
+
+	if (ipv6_get_lladdr(dev, &addr_buf)) {
+		/* <draft-ietf-magma-mld-source-05.txt>:
+		 * use unspecified address as the source address 
+		 * when a valid link-local address is not available.
+		 */
+		memset(&addr_buf, 0, sizeof(addr_buf));
+	}
+
+	ip6_nd_hdr(sk, skb, dev, &addr_buf, &mld2_all_mcr, NEXTHDR_HOP, 0);
+
+	memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra));
+
+	pmr =(struct mld2_report *)skb_put(skb, sizeof(*pmr));
+	skb->h.raw = (unsigned char *)pmr;
+	pmr->type = ICMPV6_MLD2_REPORT;
+	pmr->resv1 = 0;
+	pmr->csum = 0;
+	pmr->resv2 = 0;
+	pmr->ngrec = 0;
+	return skb;
+}
+
+static void mld_sendpack(struct sk_buff *skb)
+{
+	struct ipv6hdr *pip6 = skb->nh.ipv6h;
+	struct mld2_report *pmr = (struct mld2_report *)skb->h.raw;
+	int payload_len, mldlen;
+	struct inet6_dev *idev = in6_dev_get(skb->dev);
+	int err;
+
+	IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
+	payload_len = skb->tail - (unsigned char *)skb->nh.ipv6h -
+		sizeof(struct ipv6hdr);
+	mldlen = skb->tail - skb->h.raw;
+	pip6->payload_len = htons(payload_len);
+
+	pmr->csum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen,
+		IPPROTO_ICMPV6, csum_partial(skb->h.raw, mldlen, 0));
+	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev,
+		dev_queue_xmit);
+	if (!err) {
+		ICMP6_INC_STATS(idev,ICMP6_MIB_OUTMSGS);
+		IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
+	} else
+		IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+
+	if (likely(idev != NULL))
+		in6_dev_put(idev);
+}
+
+static int grec_size(struct ifmcaddr6 *pmc, int type, int gdel, int sdel)
+{
+	return sizeof(struct mld2_grec) + 4*mld_scount(pmc,type,gdel,sdel);
+}
+
+static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
+	int type, struct mld2_grec **ppgr)
+{
+	struct net_device *dev = pmc->idev->dev;
+	struct mld2_report *pmr;
+	struct mld2_grec *pgr;
+
+	if (!skb)
+		skb = mld_newpack(dev, dev->mtu);
+	if (!skb)
+		return NULL;
+	pgr = (struct mld2_grec *)skb_put(skb, sizeof(struct mld2_grec));
+	pgr->grec_type = type;
+	pgr->grec_auxwords = 0;
+	pgr->grec_nsrcs = 0;
+	pgr->grec_mca = pmc->mca_addr;	/* structure copy */
+	pmr = (struct mld2_report *)skb->h.raw;
+	pmr->ngrec = htons(ntohs(pmr->ngrec)+1);
+	*ppgr = pgr;
+	return skb;
+}
+
+#define AVAILABLE(skb) ((skb) ? ((skb)->dev ? (skb)->dev->mtu - (skb)->len : \
+	skb_tailroom(skb)) : 0)
+
+static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
+	int type, int gdeleted, int sdeleted)
+{
+	struct net_device *dev = pmc->idev->dev;
+	struct mld2_report *pmr;
+	struct mld2_grec *pgr = NULL;
+	struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list;
+	int scount, first, isquery, truncate;
+
+	if (pmc->mca_flags & MAF_NOREPORT)
+		return skb;
+
+	isquery = type == MLD2_MODE_IS_INCLUDE ||
+		  type == MLD2_MODE_IS_EXCLUDE;
+	truncate = type == MLD2_MODE_IS_EXCLUDE ||
+		    type == MLD2_CHANGE_TO_EXCLUDE;
+
+	psf_list = sdeleted ? &pmc->mca_tomb : &pmc->mca_sources;
+
+	if (!*psf_list) {
+		if (type == MLD2_ALLOW_NEW_SOURCES ||
+		    type == MLD2_BLOCK_OLD_SOURCES)
+			return skb;
+		if (pmc->mca_crcount || isquery) {
+			/* make sure we have room for group header and at
+			 * least one source.
+			 */
+			if (skb && AVAILABLE(skb) < sizeof(struct mld2_grec)+
+			    sizeof(struct in6_addr)) {
+				mld_sendpack(skb);
+				skb = NULL; /* add_grhead will get a new one */
+			}
+			skb = add_grhead(skb, pmc, type, &pgr);
+		}
+		return skb;
+	}
+	pmr = skb ? (struct mld2_report *)skb->h.raw : NULL;
+
+	/* EX and TO_EX get a fresh packet, if needed */
+	if (truncate) {
+		if (pmr && pmr->ngrec &&
+		    AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
+			if (skb)
+				mld_sendpack(skb);
+			skb = mld_newpack(dev, dev->mtu);
+		}
+	}
+	first = 1;
+	scount = 0;
+	psf_prev = NULL;
+	for (psf=*psf_list; psf; psf=psf_next) {
+		struct in6_addr *psrc;
+
+		psf_next = psf->sf_next;
+
+		if (!is_in(pmc, psf, type, gdeleted, sdeleted)) {
+			psf_prev = psf;
+			continue;
+		}
+
+		/* clear marks on query responses */
+		if (isquery)
+			psf->sf_gsresp = 0;
+
+		if (AVAILABLE(skb) < sizeof(*psrc) +
+		    first*sizeof(struct mld2_grec)) {
+			if (truncate && !first)
+				break;	 /* truncate these */
+			if (pgr)
+				pgr->grec_nsrcs = htons(scount);
+			if (skb)
+				mld_sendpack(skb);
+			skb = mld_newpack(dev, dev->mtu);
+			first = 1;
+			scount = 0;
+		}
+		if (first) {
+			skb = add_grhead(skb, pmc, type, &pgr);
+			first = 0;
+		}
+		psrc = (struct in6_addr *)skb_put(skb, sizeof(*psrc));
+		*psrc = psf->sf_addr;
+		scount++;
+		if ((type == MLD2_ALLOW_NEW_SOURCES ||
+		     type == MLD2_BLOCK_OLD_SOURCES) && psf->sf_crcount) {
+			psf->sf_crcount--;
+			if ((sdeleted || gdeleted) && psf->sf_crcount == 0) {
+				if (psf_prev)
+					psf_prev->sf_next = psf->sf_next;
+				else
+					*psf_list = psf->sf_next;
+				kfree(psf);
+				continue;
+			}
+		}
+		psf_prev = psf;
+	}
+	if (pgr)
+		pgr->grec_nsrcs = htons(scount);
+
+	if (isquery)
+		pmc->mca_flags &= ~MAF_GSQUERY;	/* clear query state */
+	return skb;
+}
+
+static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc)
+{
+	struct sk_buff *skb = NULL;
+	int type;
+
+	if (!pmc) {
+		read_lock_bh(&idev->lock);
+		for (pmc=idev->mc_list; pmc; pmc=pmc->next) {
+			if (pmc->mca_flags & MAF_NOREPORT)
+				continue;
+			spin_lock_bh(&pmc->mca_lock);
+			if (pmc->mca_sfcount[MCAST_EXCLUDE])
+				type = MLD2_MODE_IS_EXCLUDE;
+			else
+				type = MLD2_MODE_IS_INCLUDE;
+			skb = add_grec(skb, pmc, type, 0, 0);
+			spin_unlock_bh(&pmc->mca_lock);
+		}
+		read_unlock_bh(&idev->lock);
+	} else {
+		spin_lock_bh(&pmc->mca_lock);
+		if (pmc->mca_sfcount[MCAST_EXCLUDE])
+			type = MLD2_MODE_IS_EXCLUDE;
+		else
+			type = MLD2_MODE_IS_INCLUDE;
+		skb = add_grec(skb, pmc, type, 0, 0);
+		spin_unlock_bh(&pmc->mca_lock);
+	}
+	if (skb)
+		mld_sendpack(skb);
+}
+
+/*
+ * remove zero-count source records from a source filter list
+ */
+static void mld_clear_zeros(struct ip6_sf_list **ppsf)
+{
+	struct ip6_sf_list *psf_prev, *psf_next, *psf;
+
+	psf_prev = NULL;
+	for (psf=*ppsf; psf; psf = psf_next) {
+		psf_next = psf->sf_next;
+		if (psf->sf_crcount == 0) {
+			if (psf_prev)
+				psf_prev->sf_next = psf->sf_next;
+			else
+				*ppsf = psf->sf_next;
+			kfree(psf);
+		} else
+			psf_prev = psf;
+	}
+}
+
+static void mld_send_cr(struct inet6_dev *idev)
+{
+	struct ifmcaddr6 *pmc, *pmc_prev, *pmc_next;
+	struct sk_buff *skb = NULL;
+	int type, dtype;
+
+	read_lock_bh(&idev->lock);
+	write_lock_bh(&idev->mc_lock);
+
+	/* deleted MCA's */
+	pmc_prev = NULL;
+	for (pmc=idev->mc_tomb; pmc; pmc=pmc_next) {
+		pmc_next = pmc->next;
+		if (pmc->mca_sfmode == MCAST_INCLUDE) {
+			type = MLD2_BLOCK_OLD_SOURCES;
+			dtype = MLD2_BLOCK_OLD_SOURCES;
+			skb = add_grec(skb, pmc, type, 1, 0);
+			skb = add_grec(skb, pmc, dtype, 1, 1);
+		}
+		if (pmc->mca_crcount) {
+			pmc->mca_crcount--;
+			if (pmc->mca_sfmode == MCAST_EXCLUDE) {
+				type = MLD2_CHANGE_TO_INCLUDE;
+				skb = add_grec(skb, pmc, type, 1, 0);
+			}
+			if (pmc->mca_crcount == 0) {
+				mld_clear_zeros(&pmc->mca_tomb);
+				mld_clear_zeros(&pmc->mca_sources);
+			}
+		}
+		if (pmc->mca_crcount == 0 && !pmc->mca_tomb &&
+		    !pmc->mca_sources) {
+			if (pmc_prev)
+				pmc_prev->next = pmc_next;
+			else
+				idev->mc_tomb = pmc_next;
+			in6_dev_put(pmc->idev);
+			kfree(pmc);
+		} else
+			pmc_prev = pmc;
+	}
+	write_unlock_bh(&idev->mc_lock);
+
+	/* change recs */
+	for (pmc=idev->mc_list; pmc; pmc=pmc->next) {
+		spin_lock_bh(&pmc->mca_lock);
+		if (pmc->mca_sfcount[MCAST_EXCLUDE]) {
+			type = MLD2_BLOCK_OLD_SOURCES;
+			dtype = MLD2_ALLOW_NEW_SOURCES;
+		} else {
+			type = MLD2_ALLOW_NEW_SOURCES;
+			dtype = MLD2_BLOCK_OLD_SOURCES;
+		}
+		skb = add_grec(skb, pmc, type, 0, 0);
+		skb = add_grec(skb, pmc, dtype, 0, 1);	/* deleted sources */
+
+		/* filter mode changes */
+		if (pmc->mca_crcount) {
+			pmc->mca_crcount--;
+			if (pmc->mca_sfmode == MCAST_EXCLUDE)
+				type = MLD2_CHANGE_TO_EXCLUDE;
+			else
+				type = MLD2_CHANGE_TO_INCLUDE;
+			skb = add_grec(skb, pmc, type, 0, 0);
+		}
+		spin_unlock_bh(&pmc->mca_lock);
+	}
+	read_unlock_bh(&idev->lock);
+	if (!skb)
+		return;
+	(void) mld_sendpack(skb);
+}
+
+static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
+{
+	struct sock *sk = igmp6_socket->sk;
+	struct inet6_dev *idev;
+        struct sk_buff *skb;
+        struct icmp6hdr *hdr;
+	struct in6_addr *snd_addr;
+	struct in6_addr *addrp;
+	struct in6_addr addr_buf;
+	struct in6_addr all_routers;
+	int err, len, payload_len, full_len;
+	u8 ra[8] = { IPPROTO_ICMPV6, 0,
+		     IPV6_TLV_ROUTERALERT, 2, 0, 0,
+		     IPV6_TLV_PADN, 0 };
+
+	IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
+	snd_addr = addr;
+	if (type == ICMPV6_MGM_REDUCTION) {
+		snd_addr = &all_routers;
+		ipv6_addr_all_routers(&all_routers);
+	}
+
+	len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
+	payload_len = len + sizeof(ra);
+	full_len = sizeof(struct ipv6hdr) + payload_len;
+
+	skb = sock_alloc_send_skb(sk, LL_RESERVED_SPACE(dev) + full_len, 1, &err);
+
+	if (skb == NULL) {
+		IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+		return;
+	}
+
+	skb_reserve(skb, LL_RESERVED_SPACE(dev));
+	if (dev->hard_header) {
+		unsigned char ha[MAX_ADDR_LEN];
+		ndisc_mc_map(snd_addr, ha, dev, 1);
+		if (dev->hard_header(skb, dev, ETH_P_IPV6, ha, NULL, full_len) < 0)
+			goto out;
+	}
+
+	if (ipv6_get_lladdr(dev, &addr_buf)) {
+		/* <draft-ietf-magma-mld-source-05.txt>:
+		 * use unspecified address as the source address 
+		 * when a valid link-local address is not available.
+		 */
+		memset(&addr_buf, 0, sizeof(addr_buf));
+	}
+
+	ip6_nd_hdr(sk, skb, dev, &addr_buf, snd_addr, NEXTHDR_HOP, payload_len);
+
+	memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra));
+
+	hdr = (struct icmp6hdr *) skb_put(skb, sizeof(struct icmp6hdr));
+	memset(hdr, 0, sizeof(struct icmp6hdr));
+	hdr->icmp6_type = type;
+
+	addrp = (struct in6_addr *) skb_put(skb, sizeof(struct in6_addr));
+	ipv6_addr_copy(addrp, addr);
+
+	hdr->icmp6_cksum = csum_ipv6_magic(&addr_buf, snd_addr, len,
+					   IPPROTO_ICMPV6,
+					   csum_partial((__u8 *) hdr, len, 0));
+
+	idev = in6_dev_get(skb->dev);
+
+	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev,
+		dev_queue_xmit);
+	if (!err) {
+		if (type == ICMPV6_MGM_REDUCTION)
+			ICMP6_INC_STATS(idev, ICMP6_MIB_OUTGROUPMEMBREDUCTIONS);
+		else
+			ICMP6_INC_STATS(idev, ICMP6_MIB_OUTGROUPMEMBRESPONSES);
+		ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
+		IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
+	} else
+		IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+
+	if (likely(idev != NULL))
+		in6_dev_put(idev);
+	return;
+
+out:
+	IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+	kfree_skb(skb);
+}
+
+static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
+	struct in6_addr *psfsrc)
+{
+	struct ip6_sf_list *psf, *psf_prev;
+	int rv = 0;
+
+	psf_prev = NULL;
+	for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
+		if (ipv6_addr_equal(&psf->sf_addr, psfsrc))
+			break;
+		psf_prev = psf;
+	}
+	if (!psf || psf->sf_count[sfmode] == 0) {
+		/* source filter not found, or count wrong =>  bug */
+		return -ESRCH;
+	}
+	psf->sf_count[sfmode]--;
+	if (!psf->sf_count[MCAST_INCLUDE] && !psf->sf_count[MCAST_EXCLUDE]) {
+		struct inet6_dev *idev = pmc->idev;
+
+		/* no more filters for this source */
+		if (psf_prev)
+			psf_prev->sf_next = psf->sf_next;
+		else
+			pmc->mca_sources = psf->sf_next;
+		if (psf->sf_oldin && !(pmc->mca_flags & MAF_NOREPORT) &&
+		    !MLD_V1_SEEN(idev)) {
+			psf->sf_crcount = idev->mc_qrv;
+			psf->sf_next = pmc->mca_tomb;
+			pmc->mca_tomb = psf;
+			rv = 1;
+		} else
+			kfree(psf);
+	}
+	return rv;
+}
+
+static int ip6_mc_del_src(struct inet6_dev *idev, struct in6_addr *pmca,
+			  int sfmode, int sfcount, struct in6_addr *psfsrc,
+			  int delta)
+{
+	struct ifmcaddr6 *pmc;
+	int	changerec = 0;
+	int	i, err;
+
+	if (!idev)
+		return -ENODEV;
+	read_lock_bh(&idev->lock);
+	for (pmc=idev->mc_list; pmc; pmc=pmc->next) {
+		if (ipv6_addr_equal(pmca, &pmc->mca_addr))
+			break;
+	}
+	if (!pmc) {
+		/* MCA not found?? bug */
+		read_unlock_bh(&idev->lock);
+		return -ESRCH;
+	}
+	spin_lock_bh(&pmc->mca_lock);
+	sf_markstate(pmc);
+	if (!delta) {
+		if (!pmc->mca_sfcount[sfmode]) {
+			spin_unlock_bh(&pmc->mca_lock);
+			read_unlock_bh(&idev->lock);
+			return -EINVAL;
+		}
+		pmc->mca_sfcount[sfmode]--;
+	}
+	err = 0;
+	for (i=0; i<sfcount; i++) {
+		int rv = ip6_mc_del1_src(pmc, sfmode, &psfsrc[i]);
+
+		changerec |= rv > 0;
+		if (!err && rv < 0)
+			err = rv;
+	}
+	if (pmc->mca_sfmode == MCAST_EXCLUDE &&
+	    pmc->mca_sfcount[MCAST_EXCLUDE] == 0 &&
+	    pmc->mca_sfcount[MCAST_INCLUDE]) {
+		struct ip6_sf_list *psf;
+
+		/* filter mode change */
+		pmc->mca_sfmode = MCAST_INCLUDE;
+		pmc->mca_crcount = idev->mc_qrv;
+		idev->mc_ifc_count = pmc->mca_crcount;
+		for (psf=pmc->mca_sources; psf; psf = psf->sf_next)
+			psf->sf_crcount = 0;
+		mld_ifc_event(pmc->idev);
+	} else if (sf_setstate(pmc) || changerec)
+		mld_ifc_event(pmc->idev);
+	spin_unlock_bh(&pmc->mca_lock);
+	read_unlock_bh(&idev->lock);
+	return err;
+}
+
+/*
+ * Add multicast single-source filter to the interface list
+ */
+static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode,
+	struct in6_addr *psfsrc, int delta)
+{
+	struct ip6_sf_list *psf, *psf_prev;
+
+	psf_prev = NULL;
+	for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
+		if (ipv6_addr_equal(&psf->sf_addr, psfsrc))
+			break;
+		psf_prev = psf;
+	}
+	if (!psf) {
+		psf = (struct ip6_sf_list *)kmalloc(sizeof(*psf), GFP_ATOMIC);
+		if (!psf)
+			return -ENOBUFS;
+		memset(psf, 0, sizeof(*psf));
+		psf->sf_addr = *psfsrc;
+		if (psf_prev) {
+			psf_prev->sf_next = psf;
+		} else
+			pmc->mca_sources = psf;
+	}
+	psf->sf_count[sfmode]++;
+	return 0;
+}
+
+static void sf_markstate(struct ifmcaddr6 *pmc)
+{
+	struct ip6_sf_list *psf;
+	int mca_xcount = pmc->mca_sfcount[MCAST_EXCLUDE];
+
+	for (psf=pmc->mca_sources; psf; psf=psf->sf_next)
+		if (pmc->mca_sfcount[MCAST_EXCLUDE]) {
+			psf->sf_oldin = mca_xcount ==
+				psf->sf_count[MCAST_EXCLUDE] &&
+				!psf->sf_count[MCAST_INCLUDE];
+		} else
+			psf->sf_oldin = psf->sf_count[MCAST_INCLUDE] != 0;
+}
+
+static int sf_setstate(struct ifmcaddr6 *pmc)
+{
+	struct ip6_sf_list *psf;
+	int mca_xcount = pmc->mca_sfcount[MCAST_EXCLUDE];
+	int qrv = pmc->idev->mc_qrv;
+	int new_in, rv;
+
+	rv = 0;
+	for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
+		if (pmc->mca_sfcount[MCAST_EXCLUDE]) {
+			new_in = mca_xcount == psf->sf_count[MCAST_EXCLUDE] &&
+				!psf->sf_count[MCAST_INCLUDE];
+		} else
+			new_in = psf->sf_count[MCAST_INCLUDE] != 0;
+		if (new_in != psf->sf_oldin) {
+			psf->sf_crcount = qrv;
+			rv++;
+		}
+	}
+	return rv;
+}
+
+/*
+ * Add multicast source filter list to the interface list
+ */
+static int ip6_mc_add_src(struct inet6_dev *idev, struct in6_addr *pmca,
+			  int sfmode, int sfcount, struct in6_addr *psfsrc,
+			  int delta)
+{
+	struct ifmcaddr6 *pmc;
+	int	isexclude;
+	int	i, err;
+
+	if (!idev)
+		return -ENODEV;
+	read_lock_bh(&idev->lock);
+	for (pmc=idev->mc_list; pmc; pmc=pmc->next) {
+		if (ipv6_addr_equal(pmca, &pmc->mca_addr))
+			break;
+	}
+	if (!pmc) {
+		/* MCA not found?? bug */
+		read_unlock_bh(&idev->lock);
+		return -ESRCH;
+	}
+	spin_lock_bh(&pmc->mca_lock);
+
+	sf_markstate(pmc);
+	isexclude = pmc->mca_sfmode == MCAST_EXCLUDE;
+	if (!delta)
+		pmc->mca_sfcount[sfmode]++;
+	err = 0;
+	for (i=0; i<sfcount; i++) {
+		err = ip6_mc_add1_src(pmc, sfmode, &psfsrc[i], delta);
+		if (err)
+			break;
+	}
+	if (err) {
+		int j;
+
+		if (!delta)
+			pmc->mca_sfcount[sfmode]--;
+		for (j=0; j<i; j++)
+			(void) ip6_mc_del1_src(pmc, sfmode, &psfsrc[i]);
+	} else if (isexclude != (pmc->mca_sfcount[MCAST_EXCLUDE] != 0)) {
+		struct inet6_dev *idev = pmc->idev;
+		struct ip6_sf_list *psf;
+
+		/* filter mode change */
+		if (pmc->mca_sfcount[MCAST_EXCLUDE])
+			pmc->mca_sfmode = MCAST_EXCLUDE;
+		else if (pmc->mca_sfcount[MCAST_INCLUDE])
+			pmc->mca_sfmode = MCAST_INCLUDE;
+		/* else no filters; keep old mode for reports */
+
+		pmc->mca_crcount = idev->mc_qrv;
+		idev->mc_ifc_count = pmc->mca_crcount;
+		for (psf=pmc->mca_sources; psf; psf = psf->sf_next)
+			psf->sf_crcount = 0;
+		mld_ifc_event(idev);
+	} else if (sf_setstate(pmc))
+		mld_ifc_event(idev);
+	spin_unlock_bh(&pmc->mca_lock);
+	read_unlock_bh(&idev->lock);
+	return err;
+}
+
+static void ip6_mc_clear_src(struct ifmcaddr6 *pmc)
+{
+	struct ip6_sf_list *psf, *nextpsf;
+
+	for (psf=pmc->mca_tomb; psf; psf=nextpsf) {
+		nextpsf = psf->sf_next;
+		kfree(psf);
+	}
+	pmc->mca_tomb = NULL;
+	for (psf=pmc->mca_sources; psf; psf=nextpsf) {
+		nextpsf = psf->sf_next;
+		kfree(psf);
+	}
+	pmc->mca_sources = NULL;
+	pmc->mca_sfmode = MCAST_EXCLUDE;
+	pmc->mca_sfcount[MCAST_EXCLUDE] = 0;
+	pmc->mca_sfcount[MCAST_EXCLUDE] = 1;
+}
+
+
+static void igmp6_join_group(struct ifmcaddr6 *ma)
+{
+	unsigned long delay;
+
+	if (ma->mca_flags & MAF_NOREPORT)
+		return;
+
+	igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT);
+
+	delay = net_random() % IGMP6_UNSOLICITED_IVAL;
+
+	spin_lock_bh(&ma->mca_lock);
+	if (del_timer(&ma->mca_timer)) {
+		atomic_dec(&ma->mca_refcnt);
+		delay = ma->mca_timer.expires - jiffies;
+	}
+
+	if (!mod_timer(&ma->mca_timer, jiffies + delay))
+		atomic_inc(&ma->mca_refcnt);
+	ma->mca_flags |= MAF_TIMER_RUNNING | MAF_LAST_REPORTER;
+	spin_unlock_bh(&ma->mca_lock);
+}
+
+static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
+			    struct inet6_dev *idev)
+{
+	int err;
+
+	if (iml->sflist == 0) {
+		/* any-source empty exclude case */
+		return ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0);
+	}
+	err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode,
+		iml->sflist->sl_count, iml->sflist->sl_addr, 0);
+	sock_kfree_s(sk, iml->sflist, IP6_SFLSIZE(iml->sflist->sl_max));
+	iml->sflist = NULL;
+	return err;
+}
+
+static void igmp6_leave_group(struct ifmcaddr6 *ma)
+{
+	if (MLD_V1_SEEN(ma->idev)) {
+		if (ma->mca_flags & MAF_LAST_REPORTER)
+			igmp6_send(&ma->mca_addr, ma->idev->dev,
+				ICMPV6_MGM_REDUCTION);
+	} else {
+		mld_add_delrec(ma->idev, ma);
+		mld_ifc_event(ma->idev);
+	}
+}
+
+static void mld_gq_timer_expire(unsigned long data)
+{
+	struct inet6_dev *idev = (struct inet6_dev *)data;
+
+	idev->mc_gq_running = 0;
+	mld_send_report(idev, NULL);
+	__in6_dev_put(idev);
+}
+
+static void mld_ifc_timer_expire(unsigned long data)
+{
+	struct inet6_dev *idev = (struct inet6_dev *)data;
+
+	mld_send_cr(idev);
+	if (idev->mc_ifc_count) {
+		idev->mc_ifc_count--;
+		if (idev->mc_ifc_count)
+			mld_ifc_start_timer(idev, idev->mc_maxdelay);
+	}
+	__in6_dev_put(idev);
+}
+
+static void mld_ifc_event(struct inet6_dev *idev)
+{
+	if (MLD_V1_SEEN(idev))
+		return;
+	idev->mc_ifc_count = idev->mc_qrv;
+	mld_ifc_start_timer(idev, 1);
+}
+
+
+static void igmp6_timer_handler(unsigned long data)
+{
+	struct ifmcaddr6 *ma = (struct ifmcaddr6 *) data;
+
+	if (MLD_V1_SEEN(ma->idev))
+		igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT);
+	else
+		mld_send_report(ma->idev, ma);
+
+	spin_lock(&ma->mca_lock);
+	ma->mca_flags |=  MAF_LAST_REPORTER;
+	ma->mca_flags &= ~MAF_TIMER_RUNNING;
+	spin_unlock(&ma->mca_lock);
+	ma_put(ma);
+}
+
+/* Device going down */
+
+void ipv6_mc_down(struct inet6_dev *idev)
+{
+	struct ifmcaddr6 *i;
+
+	/* Withdraw multicast list */
+
+	read_lock_bh(&idev->lock);
+	idev->mc_ifc_count = 0;
+	if (del_timer(&idev->mc_ifc_timer))
+		__in6_dev_put(idev);
+	idev->mc_gq_running = 0;
+	if (del_timer(&idev->mc_gq_timer))
+		__in6_dev_put(idev);
+
+	for (i = idev->mc_list; i; i=i->next)
+		igmp6_group_dropped(i);
+	read_unlock_bh(&idev->lock);
+
+	mld_clear_delrec(idev);
+}
+
+
+/* Device going up */
+
+void ipv6_mc_up(struct inet6_dev *idev)
+{
+	struct ifmcaddr6 *i;
+
+	/* Install multicast list, except for all-nodes (already installed) */
+
+	read_lock_bh(&idev->lock);
+	for (i = idev->mc_list; i; i=i->next)
+		igmp6_group_added(i);
+	read_unlock_bh(&idev->lock);
+}
+
+/* IPv6 device initialization. */
+
+void ipv6_mc_init_dev(struct inet6_dev *idev)
+{
+	struct in6_addr maddr;
+
+	write_lock_bh(&idev->lock);
+	rwlock_init(&idev->mc_lock);
+	idev->mc_gq_running = 0;
+	init_timer(&idev->mc_gq_timer);
+	idev->mc_gq_timer.data = (unsigned long) idev;
+	idev->mc_gq_timer.function = &mld_gq_timer_expire;
+	idev->mc_tomb = NULL;
+	idev->mc_ifc_count = 0;
+	init_timer(&idev->mc_ifc_timer);
+	idev->mc_ifc_timer.data = (unsigned long) idev;
+	idev->mc_ifc_timer.function = &mld_ifc_timer_expire;
+	idev->mc_qrv = MLD_QRV_DEFAULT;
+	idev->mc_maxdelay = IGMP6_UNSOLICITED_IVAL;
+	idev->mc_v1_seen = 0;
+	write_unlock_bh(&idev->lock);
+
+	/* Add all-nodes address. */
+	ipv6_addr_all_nodes(&maddr);
+	ipv6_dev_mc_inc(idev->dev, &maddr);
+}
+
+/*
+ *	Device is about to be destroyed: clean up.
+ */
+
+void ipv6_mc_destroy_dev(struct inet6_dev *idev)
+{
+	struct ifmcaddr6 *i;
+	struct in6_addr maddr;
+
+	/* Deactivate timers */
+	ipv6_mc_down(idev);
+
+	/* Delete all-nodes address. */
+	ipv6_addr_all_nodes(&maddr);
+
+	/* We cannot call ipv6_dev_mc_dec() directly, our caller in
+	 * addrconf.c has NULL'd out dev->ip6_ptr so in6_dev_get() will
+	 * fail.
+	 */
+	__ipv6_dev_mc_dec(idev, &maddr);
+
+	if (idev->cnf.forwarding) {
+		ipv6_addr_all_routers(&maddr);
+		__ipv6_dev_mc_dec(idev, &maddr);
+	}
+
+	write_lock_bh(&idev->lock);
+	while ((i = idev->mc_list) != NULL) {
+		idev->mc_list = i->next;
+		write_unlock_bh(&idev->lock);
+
+		igmp6_group_dropped(i);
+		ma_put(i);
+
+		write_lock_bh(&idev->lock);
+	}
+	write_unlock_bh(&idev->lock);
+}
+
+#ifdef CONFIG_PROC_FS
+struct igmp6_mc_iter_state {
+	struct net_device *dev;
+	struct inet6_dev *idev;
+};
+
+#define igmp6_mc_seq_private(seq)	((struct igmp6_mc_iter_state *)(seq)->private)
+
+static inline struct ifmcaddr6 *igmp6_mc_get_first(struct seq_file *seq)
+{
+	struct ifmcaddr6 *im = NULL;
+	struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);
+
+	for (state->dev = dev_base, state->idev = NULL;
+	     state->dev; 
+	     state->dev = state->dev->next) {
+		struct inet6_dev *idev;
+		idev = in6_dev_get(state->dev);
+		if (!idev)
+			continue;
+		read_lock_bh(&idev->lock);
+		im = idev->mc_list;
+		if (im) {
+			state->idev = idev;
+			break;
+		}
+		read_unlock_bh(&idev->lock);
+		in6_dev_put(idev);
+	}
+	return im;
+}
+
+static struct ifmcaddr6 *igmp6_mc_get_next(struct seq_file *seq, struct ifmcaddr6 *im)
+{
+	struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);
+
+	im = im->next;
+	while (!im) {
+		if (likely(state->idev != NULL)) {
+			read_unlock_bh(&state->idev->lock);
+			in6_dev_put(state->idev);
+		}
+		state->dev = state->dev->next;
+		if (!state->dev) {
+			state->idev = NULL;
+			break;
+		}
+		state->idev = in6_dev_get(state->dev);
+		if (!state->idev)
+			continue;
+		read_lock_bh(&state->idev->lock);
+		im = state->idev->mc_list;
+	}
+	return im;
+}
+
+static struct ifmcaddr6 *igmp6_mc_get_idx(struct seq_file *seq, loff_t pos)
+{
+	struct ifmcaddr6 *im = igmp6_mc_get_first(seq);
+	if (im)
+		while (pos && (im = igmp6_mc_get_next(seq, im)) != NULL)
+			--pos;
+	return pos ? NULL : im;
+}
+
+static void *igmp6_mc_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	read_lock(&dev_base_lock);
+	return igmp6_mc_get_idx(seq, *pos);
+}
+
+static void *igmp6_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct ifmcaddr6 *im;
+	im = igmp6_mc_get_next(seq, v);
+	++*pos;
+	return im;
+}
+
+static void igmp6_mc_seq_stop(struct seq_file *seq, void *v)
+{
+	struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);
+	if (likely(state->idev != NULL)) {
+		read_unlock_bh(&state->idev->lock);
+		in6_dev_put(state->idev);
+		state->idev = NULL;
+	}
+	state->dev = NULL;
+	read_unlock(&dev_base_lock);
+}
+
+static int igmp6_mc_seq_show(struct seq_file *seq, void *v)
+{
+	struct ifmcaddr6 *im = (struct ifmcaddr6 *)v;
+	struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);
+
+	seq_printf(seq,
+		   "%-4d %-15s %04x%04x%04x%04x%04x%04x%04x%04x %5d %08X %ld\n", 
+		   state->dev->ifindex, state->dev->name,
+		   NIP6(im->mca_addr),
+		   im->mca_users, im->mca_flags,
+		   (im->mca_flags&MAF_TIMER_RUNNING) ?
+		   jiffies_to_clock_t(im->mca_timer.expires-jiffies) : 0);
+	return 0;
+}
+
+static struct seq_operations igmp6_mc_seq_ops = {
+	.start	=	igmp6_mc_seq_start,
+	.next	=	igmp6_mc_seq_next,
+	.stop	=	igmp6_mc_seq_stop,
+	.show	=	igmp6_mc_seq_show,
+};
+
+static int igmp6_mc_seq_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	int rc = -ENOMEM;
+	struct igmp6_mc_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+
+	if (!s)
+		goto out;
+
+	rc = seq_open(file, &igmp6_mc_seq_ops);
+	if (rc)
+		goto out_kfree;
+
+	seq = file->private_data;
+	seq->private = s;
+	memset(s, 0, sizeof(*s));
+out:
+	return rc;
+out_kfree:
+	kfree(s);
+	goto out;
+}
+
+static struct file_operations igmp6_mc_seq_fops = {
+	.owner		=	THIS_MODULE,
+	.open		=	igmp6_mc_seq_open,
+	.read		=	seq_read,
+	.llseek		=	seq_lseek,
+	.release	=	seq_release_private,
+};
+
+struct igmp6_mcf_iter_state {
+	struct net_device *dev;
+	struct inet6_dev *idev;
+	struct ifmcaddr6 *im;
+};
+
+#define igmp6_mcf_seq_private(seq)	((struct igmp6_mcf_iter_state *)(seq)->private)
+
+static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq)
+{
+	struct ip6_sf_list *psf = NULL;
+	struct ifmcaddr6 *im = NULL;
+	struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);
+
+	for (state->dev = dev_base, state->idev = NULL, state->im = NULL;
+	     state->dev; 
+	     state->dev = state->dev->next) {
+		struct inet6_dev *idev;
+		idev = in6_dev_get(state->dev);
+		if (unlikely(idev == NULL))
+			continue;
+		read_lock_bh(&idev->lock);
+		im = idev->mc_list;
+		if (likely(im != NULL)) {
+			spin_lock_bh(&im->mca_lock);
+			psf = im->mca_sources;
+			if (likely(psf != NULL)) {
+				state->im = im;
+				state->idev = idev;
+				break;
+			}
+			spin_unlock_bh(&im->mca_lock);
+		}
+		read_unlock_bh(&idev->lock);
+		in6_dev_put(idev);
+	}
+	return psf;
+}
+
+static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_sf_list *psf)
+{
+	struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);
+
+	psf = psf->sf_next;
+	while (!psf) {
+		spin_unlock_bh(&state->im->mca_lock);
+		state->im = state->im->next;
+		while (!state->im) {
+			if (likely(state->idev != NULL)) {
+				read_unlock_bh(&state->idev->lock);
+				in6_dev_put(state->idev);
+			}
+			state->dev = state->dev->next;
+			if (!state->dev) {
+				state->idev = NULL;
+				goto out;
+			}
+			state->idev = in6_dev_get(state->dev);
+			if (!state->idev)
+				continue;
+			read_lock_bh(&state->idev->lock);
+			state->im = state->idev->mc_list;
+		}
+		if (!state->im)
+			break;
+		spin_lock_bh(&state->im->mca_lock);
+		psf = state->im->mca_sources;
+	}
+out:
+	return psf;
+}
+
+static struct ip6_sf_list *igmp6_mcf_get_idx(struct seq_file *seq, loff_t pos)
+{
+	struct ip6_sf_list *psf = igmp6_mcf_get_first(seq);
+	if (psf)
+		while (pos && (psf = igmp6_mcf_get_next(seq, psf)) != NULL)
+			--pos;
+	return pos ? NULL : psf;
+}
+
+static void *igmp6_mcf_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	read_lock(&dev_base_lock);
+	return *pos ? igmp6_mcf_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+}
+
+static void *igmp6_mcf_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct ip6_sf_list *psf;
+	if (v == SEQ_START_TOKEN)
+		psf = igmp6_mcf_get_first(seq);
+	else
+		psf = igmp6_mcf_get_next(seq, v);
+	++*pos;
+	return psf;
+}
+
+static void igmp6_mcf_seq_stop(struct seq_file *seq, void *v)
+{
+	struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);
+	if (likely(state->im != NULL)) {
+		spin_unlock_bh(&state->im->mca_lock);
+		state->im = NULL;
+	}
+	if (likely(state->idev != NULL)) {
+		read_unlock_bh(&state->idev->lock);
+		in6_dev_put(state->idev);
+		state->idev = NULL;
+	}
+	state->dev = NULL;
+	read_unlock(&dev_base_lock);
+}
+
+static int igmp6_mcf_seq_show(struct seq_file *seq, void *v)
+{
+	struct ip6_sf_list *psf = (struct ip6_sf_list *)v;
+	struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);
+
+	if (v == SEQ_START_TOKEN) {
+		seq_printf(seq, 
+			   "%3s %6s "
+			   "%32s %32s %6s %6s\n", "Idx",
+			   "Device", "Multicast Address",
+			   "Source Address", "INC", "EXC");
+	} else {
+		seq_printf(seq,
+			   "%3d %6.6s "
+			   "%04x%04x%04x%04x%04x%04x%04x%04x "
+			   "%04x%04x%04x%04x%04x%04x%04x%04x "
+			   "%6lu %6lu\n",
+			   state->dev->ifindex, state->dev->name,
+			   NIP6(state->im->mca_addr),
+			   NIP6(psf->sf_addr),
+			   psf->sf_count[MCAST_INCLUDE],
+			   psf->sf_count[MCAST_EXCLUDE]);
+	}
+	return 0;
+}
+
+static struct seq_operations igmp6_mcf_seq_ops = {
+	.start	=	igmp6_mcf_seq_start,
+	.next	=	igmp6_mcf_seq_next,
+	.stop	=	igmp6_mcf_seq_stop,
+	.show	=	igmp6_mcf_seq_show,
+};
+
+static int igmp6_mcf_seq_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	int rc = -ENOMEM;
+	struct igmp6_mcf_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+	
+	if (!s)
+		goto out;
+
+	rc = seq_open(file, &igmp6_mcf_seq_ops);
+	if (rc)
+		goto out_kfree;
+
+	seq = file->private_data;
+	seq->private = s;
+	memset(s, 0, sizeof(*s));
+out:
+	return rc;
+out_kfree:
+	kfree(s);
+	goto out;
+}
+
+static struct file_operations igmp6_mcf_seq_fops = {
+	.owner		=	THIS_MODULE,
+	.open		=	igmp6_mcf_seq_open,
+	.read		=	seq_read,
+	.llseek		=	seq_lseek,
+	.release	=	seq_release_private,
+};
+#endif
+
+int __init igmp6_init(struct net_proto_family *ops)
+{
+	struct ipv6_pinfo *np;
+	struct sock *sk;
+	int err;
+
+	err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, &igmp6_socket);
+	if (err < 0) {
+		printk(KERN_ERR
+		       "Failed to initialize the IGMP6 control socket (err %d).\n",
+		       err);
+		igmp6_socket = NULL; /* For safety. */
+		return err;
+	}
+
+	sk = igmp6_socket->sk;
+	sk->sk_allocation = GFP_ATOMIC;
+	sk->sk_prot->unhash(sk);
+
+	np = inet6_sk(sk);
+	np->hop_limit = 1;
+
+#ifdef CONFIG_PROC_FS
+	proc_net_fops_create("igmp6", S_IRUGO, &igmp6_mc_seq_fops);
+	proc_net_fops_create("mcfilter6", S_IRUGO, &igmp6_mcf_seq_fops);
+#endif
+
+	return 0;
+}
+
+void igmp6_cleanup(void)
+{
+	sock_release(igmp6_socket);
+	igmp6_socket = NULL; /* for safety */
+
+#ifdef CONFIG_PROC_FS
+	proc_net_remove("mcfilter6");
+	proc_net_remove("igmp6");
+#endif
+}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
new file mode 100644
index 00000000000..7c291f4e9ed
--- /dev/null
+++ b/net/ipv6/ndisc.c
@@ -0,0 +1,1690 @@
+/*
+ *	Neighbour Discovery for IPv6
+ *	Linux INET6 implementation 
+ *
+ *	Authors:
+ *	Pedro Roque		<roque@di.fc.ul.pt>	
+ *	Mike Shaver		<shaver@ingenia.com>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+/*
+ *	Changes:
+ *
+ *	Lars Fenneberg			:	fixed MTU setting on receipt
+ *						of an RA.
+ *
+ *	Janos Farkas			:	kmalloc failure checks
+ *	Alexey Kuznetsov		:	state machine reworked
+ *						and moved to net/core.
+ *	Pekka Savola			:	RFC2461 validation
+ *	YOSHIFUJI Hideaki @USAGI	:	Verify ND options properly
+ */
+
+/* Set to 3 to get tracing... */
+#define ND_DEBUG 1
+
+#define ND_PRINTK(fmt, args...) do { if (net_ratelimit()) { printk(fmt, ## args); } } while(0)
+#define ND_NOPRINTK(x...) do { ; } while(0)
+#define ND_PRINTK0 ND_PRINTK
+#define ND_PRINTK1 ND_NOPRINTK
+#define ND_PRINTK2 ND_NOPRINTK
+#define ND_PRINTK3 ND_NOPRINTK
+#if ND_DEBUG >= 1
+#undef ND_PRINTK1
+#define ND_PRINTK1 ND_PRINTK
+#endif
+#if ND_DEBUG >= 2
+#undef ND_PRINTK2
+#define ND_PRINTK2 ND_PRINTK
+#endif
+#if ND_DEBUG >= 3
+#undef ND_PRINTK3
+#define ND_PRINTK3 ND_PRINTK
+#endif
+
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/sched.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/route.h>
+#include <linux/init.h>
+#include <linux/rcupdate.h>
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+
+#include <linux/if_arp.h>
+#include <linux/ipv6.h>
+#include <linux/icmpv6.h>
+#include <linux/jhash.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/ndisc.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/icmp.h>
+
+#include <net/flow.h>
+#include <net/ip6_checksum.h>
+#include <linux/proc_fs.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+
+static struct socket *ndisc_socket;
+
+static u32 ndisc_hash(const void *pkey, const struct net_device *dev);
+static int ndisc_constructor(struct neighbour *neigh);
+static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb);
+static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
+static int pndisc_constructor(struct pneigh_entry *n);
+static void pndisc_destructor(struct pneigh_entry *n);
+static void pndisc_redo(struct sk_buff *skb);
+
+static struct neigh_ops ndisc_generic_ops = {
+	.family =		AF_INET6,
+	.solicit =		ndisc_solicit,
+	.error_report =		ndisc_error_report,
+	.output =		neigh_resolve_output,
+	.connected_output =	neigh_connected_output,
+	.hh_output =		dev_queue_xmit,
+	.queue_xmit =		dev_queue_xmit,
+};
+
+static struct neigh_ops ndisc_hh_ops = {
+	.family =		AF_INET6,
+	.solicit =		ndisc_solicit,
+	.error_report =		ndisc_error_report,
+	.output =		neigh_resolve_output,
+	.connected_output =	neigh_resolve_output,
+	.hh_output =		dev_queue_xmit,
+	.queue_xmit =		dev_queue_xmit,
+};
+
+
+static struct neigh_ops ndisc_direct_ops = {
+	.family =		AF_INET6,
+	.output =		dev_queue_xmit,
+	.connected_output =	dev_queue_xmit,
+	.hh_output =		dev_queue_xmit,
+	.queue_xmit =		dev_queue_xmit,
+};
+
+struct neigh_table nd_tbl = {
+	.family =	AF_INET6,
+	.entry_size =	sizeof(struct neighbour) + sizeof(struct in6_addr),
+	.key_len =	sizeof(struct in6_addr),
+	.hash =		ndisc_hash,
+	.constructor =	ndisc_constructor,
+	.pconstructor =	pndisc_constructor,
+	.pdestructor =	pndisc_destructor,
+	.proxy_redo =	pndisc_redo,
+	.id =		"ndisc_cache",
+	.parms = {
+		.tbl =			&nd_tbl,
+		.base_reachable_time =	30 * HZ,
+		.retrans_time =	 1 * HZ,
+		.gc_staletime =	60 * HZ,
+		.reachable_time =		30 * HZ,
+		.delay_probe_time =	 5 * HZ,
+		.queue_len =		 3,
+		.ucast_probes =	 3,
+		.mcast_probes =	 3,
+		.anycast_delay =	 1 * HZ,
+		.proxy_delay =		(8 * HZ) / 10,
+		.proxy_qlen =		64,
+	},
+	.gc_interval =	  30 * HZ,
+	.gc_thresh1 =	 128,
+	.gc_thresh2 =	 512,
+	.gc_thresh3 =	1024,
+};
+
+/* ND options */
+struct ndisc_options {
+	struct nd_opt_hdr *nd_opt_array[__ND_OPT_MAX];
+};
+
+#define nd_opts_src_lladdr	nd_opt_array[ND_OPT_SOURCE_LL_ADDR]
+#define nd_opts_tgt_lladdr	nd_opt_array[ND_OPT_TARGET_LL_ADDR]
+#define nd_opts_pi		nd_opt_array[ND_OPT_PREFIX_INFO]
+#define nd_opts_pi_end		nd_opt_array[__ND_OPT_PREFIX_INFO_END]
+#define nd_opts_rh		nd_opt_array[ND_OPT_REDIRECT_HDR]
+#define nd_opts_mtu		nd_opt_array[ND_OPT_MTU]
+
+#define NDISC_OPT_SPACE(len) (((len)+2+7)&~7)
+
+/*
+ * Return the padding between the option length and the start of the
+ * link addr.  Currently only IP-over-InfiniBand needs this, although
+ * if RFC 3831 IPv6-over-Fibre Channel is ever implemented it may
+ * also need a pad of 2.
+ */
+static int ndisc_addr_option_pad(unsigned short type)
+{
+	switch (type) {
+	case ARPHRD_INFINIBAND: return 2;
+	default:                return 0;
+	}
+}
+
+static inline int ndisc_opt_addr_space(struct net_device *dev)
+{
+	return NDISC_OPT_SPACE(dev->addr_len + ndisc_addr_option_pad(dev->type));
+}
+
+static u8 *ndisc_fill_addr_option(u8 *opt, int type, void *data, int data_len,
+				  unsigned short addr_type)
+{
+	int space = NDISC_OPT_SPACE(data_len);
+	int pad   = ndisc_addr_option_pad(addr_type);
+
+	opt[0] = type;
+	opt[1] = space>>3;
+
+	memset(opt + 2, 0, pad);
+	opt   += pad;
+	space -= pad;
+
+	memcpy(opt+2, data, data_len);
+	data_len += 2;
+	opt += data_len;
+	if ((space -= data_len) > 0)
+		memset(opt, 0, space);
+	return opt + space;
+}
+
+static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
+					    struct nd_opt_hdr *end)
+{
+	int type;
+	if (!cur || !end || cur >= end)
+		return NULL;
+	type = cur->nd_opt_type;
+	do {
+		cur = ((void *)cur) + (cur->nd_opt_len << 3);
+	} while(cur < end && cur->nd_opt_type != type);
+	return (cur <= end && cur->nd_opt_type == type ? cur : NULL);
+}
+
+static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
+						 struct ndisc_options *ndopts)
+{
+	struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt;
+
+	if (!nd_opt || opt_len < 0 || !ndopts)
+		return NULL;
+	memset(ndopts, 0, sizeof(*ndopts));
+	while (opt_len) {
+		int l;
+		if (opt_len < sizeof(struct nd_opt_hdr))
+			return NULL;
+		l = nd_opt->nd_opt_len << 3;
+		if (opt_len < l || l == 0)
+			return NULL;
+		switch (nd_opt->nd_opt_type) {
+		case ND_OPT_SOURCE_LL_ADDR:
+		case ND_OPT_TARGET_LL_ADDR:
+		case ND_OPT_MTU:
+		case ND_OPT_REDIRECT_HDR:
+			if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
+				ND_PRINTK2(KERN_WARNING
+					   "%s(): duplicated ND6 option found: type=%d\n",
+					   __FUNCTION__,
+					   nd_opt->nd_opt_type);
+			} else {
+				ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
+			}
+			break;
+		case ND_OPT_PREFIX_INFO:
+			ndopts->nd_opts_pi_end = nd_opt;
+			if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0)
+				ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
+			break;
+		default:
+			/*
+			 * Unknown options must be silently ignored,
+			 * to accommodate future extension to the protocol.
+			 */
+			ND_PRINTK2(KERN_NOTICE
+				   "%s(): ignored unsupported option; type=%d, len=%d\n",
+				   __FUNCTION__,
+				   nd_opt->nd_opt_type, nd_opt->nd_opt_len);
+		}
+		opt_len -= l;
+		nd_opt = ((void *)nd_opt) + l;
+	}
+	return ndopts;
+}
+
+static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p,
+				      struct net_device *dev)
+{
+	u8 *lladdr = (u8 *)(p + 1);
+	int lladdrlen = p->nd_opt_len << 3;
+	int prepad = ndisc_addr_option_pad(dev->type);
+	if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len + prepad))
+		return NULL;
+	return (lladdr + prepad);
+}
+
+int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int dir)
+{
+	switch (dev->type) {
+	case ARPHRD_ETHER:
+	case ARPHRD_IEEE802:	/* Not sure. Check it later. --ANK */
+	case ARPHRD_FDDI:
+		ipv6_eth_mc_map(addr, buf);
+		return 0;
+	case ARPHRD_IEEE802_TR:
+		ipv6_tr_mc_map(addr,buf);
+		return 0;
+	case ARPHRD_ARCNET:
+		ipv6_arcnet_mc_map(addr, buf);
+		return 0;
+	case ARPHRD_INFINIBAND:
+		ipv6_ib_mc_map(addr, buf);
+		return 0;
+	default:
+		if (dir) {
+			memcpy(buf, dev->broadcast, dev->addr_len);
+			return 0;
+		}
+	}
+	return -EINVAL;
+}
+
+static u32 ndisc_hash(const void *pkey, const struct net_device *dev)
+{
+	const u32 *p32 = pkey;
+	u32 addr_hash, i;
+
+	addr_hash = 0;
+	for (i = 0; i < (sizeof(struct in6_addr) / sizeof(u32)); i++)
+		addr_hash ^= *p32++;
+
+	return jhash_2words(addr_hash, dev->ifindex, nd_tbl.hash_rnd);
+}
+
+static int ndisc_constructor(struct neighbour *neigh)
+{
+	struct in6_addr *addr = (struct in6_addr*)&neigh->primary_key;
+	struct net_device *dev = neigh->dev;
+	struct inet6_dev *in6_dev;
+	struct neigh_parms *parms;
+	int is_multicast = ipv6_addr_is_multicast(addr);
+
+	rcu_read_lock();
+	in6_dev = in6_dev_get(dev);
+	if (in6_dev == NULL) {
+		rcu_read_unlock();
+		return -EINVAL;
+	}
+
+	parms = in6_dev->nd_parms;
+	__neigh_parms_put(neigh->parms);
+	neigh->parms = neigh_parms_clone(parms);
+	rcu_read_unlock();
+
+	neigh->type = is_multicast ? RTN_MULTICAST : RTN_UNICAST;
+	if (dev->hard_header == NULL) {
+		neigh->nud_state = NUD_NOARP;
+		neigh->ops = &ndisc_direct_ops;
+		neigh->output = neigh->ops->queue_xmit;
+	} else {
+		if (is_multicast) {
+			neigh->nud_state = NUD_NOARP;
+			ndisc_mc_map(addr, neigh->ha, dev, 1);
+		} else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) {
+			neigh->nud_state = NUD_NOARP;
+			memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
+			if (dev->flags&IFF_LOOPBACK)
+				neigh->type = RTN_LOCAL;
+		} else if (dev->flags&IFF_POINTOPOINT) {
+			neigh->nud_state = NUD_NOARP;
+			memcpy(neigh->ha, dev->broadcast, dev->addr_len);
+		}
+		if (dev->hard_header_cache)
+			neigh->ops = &ndisc_hh_ops;
+		else
+			neigh->ops = &ndisc_generic_ops;
+		if (neigh->nud_state&NUD_VALID)
+			neigh->output = neigh->ops->connected_output;
+		else
+			neigh->output = neigh->ops->output;
+	}
+	in6_dev_put(in6_dev);
+	return 0;
+}
+
+static int pndisc_constructor(struct pneigh_entry *n)
+{
+	struct in6_addr *addr = (struct in6_addr*)&n->key;
+	struct in6_addr maddr;
+	struct net_device *dev = n->dev;
+
+	if (dev == NULL || __in6_dev_get(dev) == NULL)
+		return -EINVAL;
+	addrconf_addr_solict_mult(addr, &maddr);
+	ipv6_dev_mc_inc(dev, &maddr);
+	return 0;
+}
+
+static void pndisc_destructor(struct pneigh_entry *n)
+{
+	struct in6_addr *addr = (struct in6_addr*)&n->key;
+	struct in6_addr maddr;
+	struct net_device *dev = n->dev;
+
+	if (dev == NULL || __in6_dev_get(dev) == NULL)
+		return;
+	addrconf_addr_solict_mult(addr, &maddr);
+	ipv6_dev_mc_dec(dev, &maddr);
+}
+
+/*
+ *	Send a Neighbour Advertisement
+ */
+
+static inline void ndisc_flow_init(struct flowi *fl, u8 type,
+			    struct in6_addr *saddr, struct in6_addr *daddr)
+{
+	memset(fl, 0, sizeof(*fl));
+	ipv6_addr_copy(&fl->fl6_src, saddr);
+	ipv6_addr_copy(&fl->fl6_dst, daddr);
+	fl->proto	 	= IPPROTO_ICMPV6;
+	fl->fl_icmp_type	= type;
+	fl->fl_icmp_code	= 0;
+}
+
+static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
+		   struct in6_addr *daddr, struct in6_addr *solicited_addr,
+	 	   int router, int solicited, int override, int inc_opt) 
+{
+	struct in6_addr tmpaddr;
+	struct inet6_ifaddr *ifp;
+	struct inet6_dev *idev;
+	struct flowi fl;
+	struct dst_entry* dst;
+        struct sock *sk = ndisc_socket->sk;
+	struct in6_addr *src_addr;
+        struct nd_msg *msg;
+        int len;
+        struct sk_buff *skb;
+	int err;
+
+	len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
+
+	/* for anycast or proxy, solicited_addr != src_addr */
+	ifp = ipv6_get_ifaddr(solicited_addr, dev, 1);
+ 	if (ifp) {
+		src_addr = solicited_addr;
+		in6_ifa_put(ifp);
+	} else {
+		if (ipv6_dev_get_saddr(dev, daddr, &tmpaddr))
+			return;
+		src_addr = &tmpaddr;
+	}
+
+	ndisc_flow_init(&fl, NDISC_NEIGHBOUR_ADVERTISEMENT, src_addr, daddr);
+
+	dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output);
+	if (!dst)
+		return;
+
+	err = xfrm_lookup(&dst, &fl, NULL, 0);
+	if (err < 0) {
+		dst_release(dst);
+		return;
+	}
+
+	if (inc_opt) {
+		if (dev->addr_len)
+			len += ndisc_opt_addr_space(dev);
+		else
+			inc_opt = 0;
+	}
+
+	skb = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev),
+				  1, &err);
+
+	if (skb == NULL) {
+		ND_PRINTK0(KERN_ERR
+			   "ICMPv6 NA: %s() failed to allocate an skb.\n", 
+			   __FUNCTION__);
+		dst_release(dst);
+		return;
+	}
+
+	skb_reserve(skb, LL_RESERVED_SPACE(dev));
+	ip6_nd_hdr(sk, skb, dev, src_addr, daddr, IPPROTO_ICMPV6, len);
+
+	msg = (struct nd_msg *)skb_put(skb, len);
+	skb->h.raw = (unsigned char*)msg;
+
+        msg->icmph.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
+        msg->icmph.icmp6_code = 0;
+        msg->icmph.icmp6_cksum = 0;
+
+        msg->icmph.icmp6_unused = 0;
+        msg->icmph.icmp6_router    = router;
+        msg->icmph.icmp6_solicited = solicited;
+        msg->icmph.icmp6_override  = !!override;
+
+        /* Set the target address. */
+	ipv6_addr_copy(&msg->target, solicited_addr);
+
+	if (inc_opt)
+		ndisc_fill_addr_option(msg->opt, ND_OPT_TARGET_LL_ADDR, dev->dev_addr,
+				       dev->addr_len, dev->type);
+
+	/* checksum */
+	msg->icmph.icmp6_cksum = csum_ipv6_magic(src_addr, daddr, len, 
+						 IPPROTO_ICMPV6,
+						 csum_partial((__u8 *) msg, 
+							      len, 0));
+
+	skb->dst = dst;
+	idev = in6_dev_get(dst->dev);
+	IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
+	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output);
+	if (!err) {
+		ICMP6_INC_STATS(idev, ICMP6_MIB_OUTNEIGHBORADVERTISEMENTS);
+		ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
+	}
+
+	if (likely(idev != NULL))
+		in6_dev_put(idev);
+}        
+
+void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
+		   struct in6_addr *solicit,
+		   struct in6_addr *daddr, struct in6_addr *saddr) 
+{
+	struct flowi fl;
+	struct dst_entry* dst;
+	struct inet6_dev *idev;
+        struct sock *sk = ndisc_socket->sk;
+        struct sk_buff *skb;
+        struct nd_msg *msg;
+	struct in6_addr addr_buf;
+        int len;
+	int err;
+	int send_llinfo;
+
+	if (saddr == NULL) {
+		if (ipv6_get_lladdr(dev, &addr_buf))
+			return;
+		saddr = &addr_buf;
+	}
+
+	ndisc_flow_init(&fl, NDISC_NEIGHBOUR_SOLICITATION, saddr, daddr);
+
+	dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output);
+	if (!dst)
+		return;
+
+	err = xfrm_lookup(&dst, &fl, NULL, 0);
+	if (err < 0) {
+		dst_release(dst);
+		return;
+	}
+
+	len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
+	send_llinfo = dev->addr_len && !ipv6_addr_any(saddr);
+	if (send_llinfo)
+		len += ndisc_opt_addr_space(dev);
+
+	skb = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev),
+				  1, &err);
+	if (skb == NULL) {
+		ND_PRINTK0(KERN_ERR
+			   "ICMPv6 NA: %s() failed to allocate an skb.\n", 
+			   __FUNCTION__);
+		dst_release(dst);
+		return;
+	}
+
+	skb_reserve(skb, LL_RESERVED_SPACE(dev));
+	ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
+
+	msg = (struct nd_msg *)skb_put(skb, len);
+	skb->h.raw = (unsigned char*)msg;
+	msg->icmph.icmp6_type = NDISC_NEIGHBOUR_SOLICITATION;
+	msg->icmph.icmp6_code = 0;
+	msg->icmph.icmp6_cksum = 0;
+	msg->icmph.icmp6_unused = 0;
+
+	/* Set the target address. */
+	ipv6_addr_copy(&msg->target, solicit);
+
+	if (send_llinfo)
+		ndisc_fill_addr_option(msg->opt, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr,
+				       dev->addr_len, dev->type);
+
+	/* checksum */
+	msg->icmph.icmp6_cksum = csum_ipv6_magic(&skb->nh.ipv6h->saddr,
+						 daddr, len, 
+						 IPPROTO_ICMPV6,
+						 csum_partial((__u8 *) msg, 
+							      len, 0));
+	/* send it! */
+	skb->dst = dst;
+	idev = in6_dev_get(dst->dev);
+	IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
+	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output);
+	if (!err) {
+		ICMP6_INC_STATS(idev, ICMP6_MIB_OUTNEIGHBORSOLICITS);
+		ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
+	}
+
+	if (likely(idev != NULL))
+		in6_dev_put(idev);
+}
+
+void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
+		   struct in6_addr *daddr)
+{
+	struct flowi fl;
+	struct dst_entry* dst;
+	struct inet6_dev *idev;
+	struct sock *sk = ndisc_socket->sk;
+        struct sk_buff *skb;
+        struct icmp6hdr *hdr;
+	__u8 * opt;
+        int len;
+	int err;
+
+	ndisc_flow_init(&fl, NDISC_ROUTER_SOLICITATION, saddr, daddr);
+
+	dst = ndisc_dst_alloc(dev, NULL, daddr, ip6_output);
+	if (!dst)
+		return;
+
+	err = xfrm_lookup(&dst, &fl, NULL, 0);
+	if (err < 0) {
+		dst_release(dst);
+		return;
+	}
+
+	len = sizeof(struct icmp6hdr);
+	if (dev->addr_len)
+		len += ndisc_opt_addr_space(dev);
+
+        skb = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev),
+				  1, &err);
+	if (skb == NULL) {
+		ND_PRINTK0(KERN_ERR
+			   "ICMPv6 RS: %s() failed to allocate an skb.\n", 
+			   __FUNCTION__);
+		dst_release(dst);
+		return;
+	}
+
+	skb_reserve(skb, LL_RESERVED_SPACE(dev));
+	ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
+
+        hdr = (struct icmp6hdr *)skb_put(skb, len);
+        skb->h.raw = (unsigned char*)hdr;
+        hdr->icmp6_type = NDISC_ROUTER_SOLICITATION;
+        hdr->icmp6_code = 0;
+        hdr->icmp6_cksum = 0;
+        hdr->icmp6_unused = 0;
+
+	opt = (u8*) (hdr + 1);
+
+	if (dev->addr_len)
+		ndisc_fill_addr_option(opt, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr,
+				       dev->addr_len, dev->type);
+
+	/* checksum */
+	hdr->icmp6_cksum = csum_ipv6_magic(&skb->nh.ipv6h->saddr, daddr, len,
+					   IPPROTO_ICMPV6,
+					   csum_partial((__u8 *) hdr, len, 0));
+
+	/* send it! */
+	skb->dst = dst;
+	idev = in6_dev_get(dst->dev);
+	IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);	
+	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output);
+	if (!err) {
+		ICMP6_INC_STATS(idev, ICMP6_MIB_OUTROUTERSOLICITS);
+		ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
+	}
+
+	if (likely(idev != NULL))
+		in6_dev_put(idev);
+}
+		   
+
+static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb)
+{
+	/*
+	 *	"The sender MUST return an ICMP
+	 *	 destination unreachable"
+	 */
+	dst_link_failure(skb);
+	kfree_skb(skb);
+}
+
+/* Called with locked neigh: either read or both */
+
+static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
+{
+	struct in6_addr *saddr = NULL;
+	struct in6_addr mcaddr;
+	struct net_device *dev = neigh->dev;
+	struct in6_addr *target = (struct in6_addr *)&neigh->primary_key;
+	int probes = atomic_read(&neigh->probes);
+
+	if (skb && ipv6_chk_addr(&skb->nh.ipv6h->saddr, dev, 1))
+		saddr = &skb->nh.ipv6h->saddr;
+
+	if ((probes -= neigh->parms->ucast_probes) < 0) {
+		if (!(neigh->nud_state & NUD_VALID)) {
+			ND_PRINTK1(KERN_DEBUG
+				   "%s(): trying to ucast probe in NUD_INVALID: "
+				   "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+				   __FUNCTION__,
+				   NIP6(*target));
+		}
+		ndisc_send_ns(dev, neigh, target, target, saddr);
+	} else if ((probes -= neigh->parms->app_probes) < 0) {
+#ifdef CONFIG_ARPD
+		neigh_app_ns(neigh);
+#endif
+	} else {
+		addrconf_addr_solict_mult(target, &mcaddr);
+		ndisc_send_ns(dev, NULL, target, &mcaddr, saddr);
+	}
+}
+
+static void ndisc_recv_ns(struct sk_buff *skb)
+{
+	struct nd_msg *msg = (struct nd_msg *)skb->h.raw;
+	struct in6_addr *saddr = &skb->nh.ipv6h->saddr;
+	struct in6_addr *daddr = &skb->nh.ipv6h->daddr;
+	u8 *lladdr = NULL;
+	u32 ndoptlen = skb->tail - msg->opt;
+	struct ndisc_options ndopts;
+	struct net_device *dev = skb->dev;
+	struct inet6_ifaddr *ifp;
+	struct inet6_dev *idev = NULL;
+	struct neighbour *neigh;
+	int dad = ipv6_addr_any(saddr);
+	int inc;
+
+	if (ipv6_addr_is_multicast(&msg->target)) {
+		ND_PRINTK2(KERN_WARNING 
+			   "ICMPv6 NS: multicast target address");
+		return;
+	}
+
+	/*
+	 * RFC2461 7.1.1:
+	 * DAD has to be destined for solicited node multicast address.
+	 */
+	if (dad &&
+	    !(daddr->s6_addr32[0] == htonl(0xff020000) &&
+	      daddr->s6_addr32[1] == htonl(0x00000000) &&
+	      daddr->s6_addr32[2] == htonl(0x00000001) &&
+	      daddr->s6_addr [12] == 0xff )) {
+		ND_PRINTK2(KERN_WARNING
+			   "ICMPv6 NS: bad DAD packet (wrong destination)\n");
+		return;
+	}
+
+	if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
+		ND_PRINTK2(KERN_WARNING 
+			   "ICMPv6 NS: invalid ND options\n");
+		return;
+	}
+
+	if (ndopts.nd_opts_src_lladdr) {
+		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, dev);
+		if (!lladdr) {
+			ND_PRINTK2(KERN_WARNING
+				   "ICMPv6 NS: invalid link-layer address length\n");
+			return;
+		}
+
+		/* RFC2461 7.1.1:
+	 	 *	If the IP source address is the unspecified address, 
+		 *	there MUST NOT be source link-layer address option 
+		 *	in the message.
+		 */
+		if (dad) {
+			ND_PRINTK2(KERN_WARNING 
+				   "ICMPv6 NS: bad DAD packet (link-layer address option)\n");
+			return;
+		}
+	}
+
+	inc = ipv6_addr_is_multicast(daddr);
+
+	if ((ifp = ipv6_get_ifaddr(&msg->target, dev, 1)) != NULL) {
+		if (ifp->flags & IFA_F_TENTATIVE) {
+			/* Address is tentative. If the source
+			   is unspecified address, it is someone
+			   does DAD, otherwise we ignore solicitations
+			   until DAD timer expires.
+			 */
+			if (!dad)
+				goto out;
+			if (dev->type == ARPHRD_IEEE802_TR) {
+				unsigned char *sadr = skb->mac.raw;
+				if (((sadr[8] ^ dev->dev_addr[0]) & 0x7f) == 0 &&
+				    sadr[9] == dev->dev_addr[1] &&
+				    sadr[10] == dev->dev_addr[2] &&
+				    sadr[11] == dev->dev_addr[3] &&
+				    sadr[12] == dev->dev_addr[4] &&
+				    sadr[13] == dev->dev_addr[5]) {
+					/* looped-back to us */
+					goto out;
+				}
+			}
+			addrconf_dad_failure(ifp); 
+			return;
+		}
+
+		idev = ifp->idev;
+	} else {
+		idev = in6_dev_get(dev);
+		if (!idev) {
+			/* XXX: count this drop? */
+			return;
+		}
+
+		if (ipv6_chk_acast_addr(dev, &msg->target) ||
+		    (idev->cnf.forwarding && 
+		     pneigh_lookup(&nd_tbl, &msg->target, dev, 0))) {
+			if (skb->stamp.tv_sec != LOCALLY_ENQUEUED &&
+			    skb->pkt_type != PACKET_HOST &&
+			    inc != 0 &&
+			    idev->nd_parms->proxy_delay != 0) {
+				/*
+				 * for anycast or proxy,
+				 * sender should delay its response 
+				 * by a random time between 0 and 
+				 * MAX_ANYCAST_DELAY_TIME seconds.
+				 * (RFC2461) -- yoshfuji
+				 */
+				struct sk_buff *n = skb_clone(skb, GFP_ATOMIC);
+				if (n)
+					pneigh_enqueue(&nd_tbl, idev->nd_parms, n);
+				goto out;
+			}
+		} else
+			goto out;
+	}
+
+	if (dad) {
+		struct in6_addr maddr;
+
+		ipv6_addr_all_nodes(&maddr);
+		ndisc_send_na(dev, NULL, &maddr, &msg->target,
+			      idev->cnf.forwarding, 0, (ifp != NULL), 1);
+		goto out;
+	}
+
+	if (inc)
+		NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_mcast);
+	else
+		NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_ucast);
+
+	/* 
+	 *	update / create cache entry
+	 *	for the source address
+	 */
+	neigh = __neigh_lookup(&nd_tbl, saddr, dev,
+			       !inc || lladdr || !dev->addr_len);
+	if (neigh)
+		neigh_update(neigh, lladdr, NUD_STALE, 
+			     NEIGH_UPDATE_F_WEAK_OVERRIDE|
+			     NEIGH_UPDATE_F_OVERRIDE);
+	if (neigh || !dev->hard_header) {
+		ndisc_send_na(dev, neigh, saddr, &msg->target,
+			      idev->cnf.forwarding, 
+			      1, (ifp != NULL && inc), inc);
+		if (neigh)
+			neigh_release(neigh);
+	}
+
+out:
+	if (ifp)
+		in6_ifa_put(ifp);
+	else
+		in6_dev_put(idev);
+
+	return;
+}
+
+static void ndisc_recv_na(struct sk_buff *skb)
+{
+	struct nd_msg *msg = (struct nd_msg *)skb->h.raw;
+	struct in6_addr *saddr = &skb->nh.ipv6h->saddr;
+	struct in6_addr *daddr = &skb->nh.ipv6h->daddr;
+	u8 *lladdr = NULL;
+	u32 ndoptlen = skb->tail - msg->opt;
+	struct ndisc_options ndopts;
+	struct net_device *dev = skb->dev;
+	struct inet6_ifaddr *ifp;
+	struct neighbour *neigh;
+
+	if (skb->len < sizeof(struct nd_msg)) {
+		ND_PRINTK2(KERN_WARNING
+			   "ICMPv6 NA: packet too short\n");
+		return;
+	}
+
+	if (ipv6_addr_is_multicast(&msg->target)) {
+		ND_PRINTK2(KERN_WARNING
+			   "ICMPv6 NA: target address is multicast.\n");
+		return;
+	}
+
+	if (ipv6_addr_is_multicast(daddr) &&
+	    msg->icmph.icmp6_solicited) {
+		ND_PRINTK2(KERN_WARNING
+			   "ICMPv6 NA: solicited NA is multicasted.\n");
+		return;
+	}
+		
+	if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
+		ND_PRINTK2(KERN_WARNING
+			   "ICMPv6 NS: invalid ND option\n");
+		return;
+	}
+	if (ndopts.nd_opts_tgt_lladdr) {
+		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, dev);
+		if (!lladdr) {
+			ND_PRINTK2(KERN_WARNING
+				   "ICMPv6 NA: invalid link-layer address length\n");
+			return;
+		}
+	}
+	if ((ifp = ipv6_get_ifaddr(&msg->target, dev, 1))) {
+		if (ifp->flags & IFA_F_TENTATIVE) {
+			addrconf_dad_failure(ifp);
+			return;
+		}
+		/* What should we make now? The advertisement
+		   is invalid, but ndisc specs say nothing
+		   about it. It could be misconfiguration, or
+		   an smart proxy agent tries to help us :-)
+		 */
+		ND_PRINTK1(KERN_WARNING
+			   "ICMPv6 NA: someone advertises our address on %s!\n",
+			   ifp->idev->dev->name);
+		in6_ifa_put(ifp);
+		return;
+	}
+	neigh = neigh_lookup(&nd_tbl, &msg->target, dev);
+
+	if (neigh) {
+		u8 old_flags = neigh->flags;
+
+		if (neigh->nud_state & NUD_FAILED)
+			goto out;
+
+		neigh_update(neigh, lladdr,
+			     msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE,
+			     NEIGH_UPDATE_F_WEAK_OVERRIDE|
+			     (msg->icmph.icmp6_override ? NEIGH_UPDATE_F_OVERRIDE : 0)|
+			     NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
+			     (msg->icmph.icmp6_router ? NEIGH_UPDATE_F_ISROUTER : 0));
+
+		if ((old_flags & ~neigh->flags) & NTF_ROUTER) {
+			/*
+			 * Change: router to host
+			 */
+			struct rt6_info *rt;
+			rt = rt6_get_dflt_router(saddr, dev);
+			if (rt)
+				ip6_del_rt(rt, NULL, NULL);
+		}
+
+out:
+		neigh_release(neigh);
+	}
+}
+
+static void ndisc_recv_rs(struct sk_buff *skb)
+{
+	struct rs_msg *rs_msg = (struct rs_msg *) skb->h.raw;
+	unsigned long ndoptlen = skb->len - sizeof(*rs_msg);
+	struct neighbour *neigh;
+	struct inet6_dev *idev;
+	struct in6_addr *saddr = &skb->nh.ipv6h->saddr;
+	struct ndisc_options ndopts;
+	u8 *lladdr = NULL;
+
+	if (skb->len < sizeof(*rs_msg))
+		return;
+
+	idev = in6_dev_get(skb->dev);
+	if (!idev) {
+		if (net_ratelimit())
+			ND_PRINTK1("ICMP6 RS: can't find in6 device\n");
+		return;
+	}
+
+	/* Don't accept RS if we're not in router mode */
+	if (!idev->cnf.forwarding)
+		goto out;
+
+	/*
+	 * Don't update NCE if src = ::;
+	 * this implies that the source node has no ip address assigned yet.
+	 */
+	if (ipv6_addr_any(saddr))
+		goto out;
+
+	/* Parse ND options */
+	if (!ndisc_parse_options(rs_msg->opt, ndoptlen, &ndopts)) {
+		if (net_ratelimit())
+			ND_PRINTK2("ICMP6 NS: invalid ND option, ignored\n");
+		goto out;
+	}
+
+	if (ndopts.nd_opts_src_lladdr) {
+		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
+					     skb->dev);
+		if (!lladdr)
+			goto out;
+	}
+
+	neigh = __neigh_lookup(&nd_tbl, saddr, skb->dev, 1);
+	if (neigh) {
+		neigh_update(neigh, lladdr, NUD_STALE,
+			     NEIGH_UPDATE_F_WEAK_OVERRIDE|
+			     NEIGH_UPDATE_F_OVERRIDE|
+			     NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
+		neigh_release(neigh);
+	}
+out:
+	in6_dev_put(idev);
+}
+
+static void ndisc_router_discovery(struct sk_buff *skb)
+{
+        struct ra_msg *ra_msg = (struct ra_msg *) skb->h.raw;
+	struct neighbour *neigh = NULL;
+	struct inet6_dev *in6_dev;
+	struct rt6_info *rt;
+	int lifetime;
+	struct ndisc_options ndopts;
+	int optlen;
+
+	__u8 * opt = (__u8 *)(ra_msg + 1);
+
+	optlen = (skb->tail - skb->h.raw) - sizeof(struct ra_msg);
+
+	if (!(ipv6_addr_type(&skb->nh.ipv6h->saddr) & IPV6_ADDR_LINKLOCAL)) {
+		ND_PRINTK2(KERN_WARNING
+			   "ICMPv6 RA: source address is not link-local.\n");
+		return;
+	}
+	if (optlen < 0) {
+		ND_PRINTK2(KERN_WARNING 
+			   "ICMPv6 RA: packet too short\n");
+		return;
+	}
+
+	/*
+	 *	set the RA_RECV flag in the interface
+	 */
+
+	in6_dev = in6_dev_get(skb->dev);
+	if (in6_dev == NULL) {
+		ND_PRINTK0(KERN_ERR
+			   "ICMPv6 RA: can't find inet6 device for %s.\n",
+			   skb->dev->name);
+		return;
+	}
+	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_ra) {
+		in6_dev_put(in6_dev);
+		return;
+	}
+
+	if (!ndisc_parse_options(opt, optlen, &ndopts)) {
+		in6_dev_put(in6_dev);
+		ND_PRINTK2(KERN_WARNING
+			   "ICMP6 RA: invalid ND options\n");
+		return;
+	}
+
+	if (in6_dev->if_flags & IF_RS_SENT) {
+		/*
+		 *	flag that an RA was received after an RS was sent
+		 *	out on this interface.
+		 */
+		in6_dev->if_flags |= IF_RA_RCVD;
+	}
+
+	/*
+	 * Remember the managed/otherconf flags from most recently
+	 * received RA message (RFC 2462) -- yoshfuji
+	 */
+	in6_dev->if_flags = (in6_dev->if_flags & ~(IF_RA_MANAGED |
+				IF_RA_OTHERCONF)) |
+				(ra_msg->icmph.icmp6_addrconf_managed ?
+					IF_RA_MANAGED : 0) |
+				(ra_msg->icmph.icmp6_addrconf_other ?
+					IF_RA_OTHERCONF : 0);
+
+	lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
+
+	rt = rt6_get_dflt_router(&skb->nh.ipv6h->saddr, skb->dev);
+
+	if (rt)
+		neigh = rt->rt6i_nexthop;
+
+	if (rt && lifetime == 0) {
+		neigh_clone(neigh);
+		ip6_del_rt(rt, NULL, NULL);
+		rt = NULL;
+	}
+
+	if (rt == NULL && lifetime) {
+		ND_PRINTK3(KERN_DEBUG
+			   "ICMPv6 RA: adding default router.\n");
+
+		rt = rt6_add_dflt_router(&skb->nh.ipv6h->saddr, skb->dev);
+		if (rt == NULL) {
+			ND_PRINTK0(KERN_ERR
+				   "ICMPv6 RA: %s() failed to add default route.\n",
+				   __FUNCTION__);
+			in6_dev_put(in6_dev);
+			return;
+		}
+
+		neigh = rt->rt6i_nexthop;
+		if (neigh == NULL) {
+			ND_PRINTK0(KERN_ERR
+				   "ICMPv6 RA: %s() got default router without neighbour.\n",
+				   __FUNCTION__);
+			dst_release(&rt->u.dst);
+			in6_dev_put(in6_dev);
+			return;
+		}
+		neigh->flags |= NTF_ROUTER;
+	}
+
+	if (rt)
+		rt->rt6i_expires = jiffies + (HZ * lifetime);
+
+	if (ra_msg->icmph.icmp6_hop_limit) {
+		in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
+		if (rt)
+			rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit;
+	}
+
+	/*
+	 *	Update Reachable Time and Retrans Timer
+	 */
+
+	if (in6_dev->nd_parms) {
+		unsigned long rtime = ntohl(ra_msg->retrans_timer);
+
+		if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/HZ) {
+			rtime = (rtime*HZ)/1000;
+			if (rtime < HZ/10)
+				rtime = HZ/10;
+			in6_dev->nd_parms->retrans_time = rtime;
+			in6_dev->tstamp = jiffies;
+			inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
+		}
+
+		rtime = ntohl(ra_msg->reachable_time);
+		if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/(3*HZ)) {
+			rtime = (rtime*HZ)/1000;
+
+			if (rtime < HZ/10)
+				rtime = HZ/10;
+
+			if (rtime != in6_dev->nd_parms->base_reachable_time) {
+				in6_dev->nd_parms->base_reachable_time = rtime;
+				in6_dev->nd_parms->gc_staletime = 3 * rtime;
+				in6_dev->nd_parms->reachable_time = neigh_rand_reach_time(rtime);
+				in6_dev->tstamp = jiffies;
+				inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
+			}
+		}
+	}
+
+	/*
+	 *	Process options.
+	 */
+
+	if (!neigh)
+		neigh = __neigh_lookup(&nd_tbl, &skb->nh.ipv6h->saddr,
+				       skb->dev, 1);
+	if (neigh) {
+		u8 *lladdr = NULL;
+		if (ndopts.nd_opts_src_lladdr) {
+			lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
+						     skb->dev);
+			if (!lladdr) {
+				ND_PRINTK2(KERN_WARNING
+					   "ICMPv6 RA: invalid link-layer address length\n");
+				goto out;
+			}
+		}
+		neigh_update(neigh, lladdr, NUD_STALE,
+			     NEIGH_UPDATE_F_WEAK_OVERRIDE|
+			     NEIGH_UPDATE_F_OVERRIDE|
+			     NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
+			     NEIGH_UPDATE_F_ISROUTER);
+	}
+
+	if (ndopts.nd_opts_pi) {
+		struct nd_opt_hdr *p;
+		for (p = ndopts.nd_opts_pi;
+		     p;
+		     p = ndisc_next_option(p, ndopts.nd_opts_pi_end)) {
+			addrconf_prefix_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3);
+		}
+	}
+
+	if (ndopts.nd_opts_mtu) {
+		u32 mtu;
+
+		memcpy(&mtu, ((u8*)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu));
+		mtu = ntohl(mtu);
+
+		if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) {
+			ND_PRINTK2(KERN_WARNING
+				   "ICMPv6 RA: invalid mtu: %d\n",
+				   mtu);
+		} else if (in6_dev->cnf.mtu6 != mtu) {
+			in6_dev->cnf.mtu6 = mtu;
+
+			if (rt)
+				rt->u.dst.metrics[RTAX_MTU-1] = mtu;
+
+			rt6_mtu_change(skb->dev, mtu);
+		}
+	}
+			
+	if (ndopts.nd_opts_tgt_lladdr || ndopts.nd_opts_rh) {
+		ND_PRINTK2(KERN_WARNING
+			   "ICMPv6 RA: invalid RA options");
+	}
+out:
+	if (rt)
+		dst_release(&rt->u.dst);
+	else if (neigh)
+		neigh_release(neigh);
+	in6_dev_put(in6_dev);
+}
+
+static void ndisc_redirect_rcv(struct sk_buff *skb)
+{
+	struct inet6_dev *in6_dev;
+	struct icmp6hdr *icmph;
+	struct in6_addr *dest;
+	struct in6_addr *target;	/* new first hop to destination */
+	struct neighbour *neigh;
+	int on_link = 0;
+	struct ndisc_options ndopts;
+	int optlen;
+	u8 *lladdr = NULL;
+
+	if (!(ipv6_addr_type(&skb->nh.ipv6h->saddr) & IPV6_ADDR_LINKLOCAL)) {
+		ND_PRINTK2(KERN_WARNING
+			   "ICMPv6 Redirect: source address is not link-local.\n");
+		return;
+	}
+
+	optlen = skb->tail - skb->h.raw;
+	optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
+
+	if (optlen < 0) {
+		ND_PRINTK2(KERN_WARNING
+			   "ICMPv6 Redirect: packet too short\n");
+		return;
+	}
+
+	icmph = (struct icmp6hdr *) skb->h.raw;
+	target = (struct in6_addr *) (icmph + 1);
+	dest = target + 1;
+
+	if (ipv6_addr_is_multicast(dest)) {
+		ND_PRINTK2(KERN_WARNING
+			   "ICMPv6 Redirect: destination address is multicast.\n");
+		return;
+	}
+
+	if (ipv6_addr_equal(dest, target)) {
+		on_link = 1;
+	} else if (!(ipv6_addr_type(target) & IPV6_ADDR_LINKLOCAL)) {
+		ND_PRINTK2(KERN_WARNING 
+			   "ICMPv6 Redirect: target address is not link-local.\n");
+		return;
+	}
+
+	in6_dev = in6_dev_get(skb->dev);
+	if (!in6_dev)
+		return;
+	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) {
+		in6_dev_put(in6_dev);
+		return;
+	}
+
+	/* RFC2461 8.1: 
+	 *	The IP source address of the Redirect MUST be the same as the current
+	 *	first-hop router for the specified ICMP Destination Address.
+	 */
+		
+	if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
+		ND_PRINTK2(KERN_WARNING
+			   "ICMPv6 Redirect: invalid ND options\n");
+		in6_dev_put(in6_dev);
+		return;
+	}
+	if (ndopts.nd_opts_tgt_lladdr) {
+		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
+					     skb->dev);
+		if (!lladdr) {
+			ND_PRINTK2(KERN_WARNING
+				   "ICMPv6 Redirect: invalid link-layer address length\n");
+			in6_dev_put(in6_dev);
+			return;
+		}
+	}
+
+	neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
+	if (neigh) {
+		rt6_redirect(dest, &skb->nh.ipv6h->saddr, neigh, lladdr, 
+			     on_link);
+		neigh_release(neigh);
+	}
+	in6_dev_put(in6_dev);
+}
+
+void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
+			 struct in6_addr *target)
+{
+	struct sock *sk = ndisc_socket->sk;
+	int len = sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
+	struct sk_buff *buff;
+	struct icmp6hdr *icmph;
+	struct in6_addr saddr_buf;
+	struct in6_addr *addrp;
+	struct net_device *dev;
+	struct rt6_info *rt;
+	struct dst_entry *dst;
+	struct inet6_dev *idev;
+	struct flowi fl;
+	u8 *opt;
+	int rd_len;
+	int err;
+	int hlen;
+	u8 ha_buf[MAX_ADDR_LEN], *ha = NULL;
+
+	dev = skb->dev;
+
+	if (ipv6_get_lladdr(dev, &saddr_buf)) {
+		ND_PRINTK2(KERN_WARNING
+			   "ICMPv6 Redirect: no link-local address on %s\n",
+			   dev->name);
+ 		return;
+ 	}
+
+	ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &skb->nh.ipv6h->saddr);
+
+	dst = ip6_route_output(NULL, &fl);
+	if (dst == NULL)
+		return;
+
+	err = xfrm_lookup(&dst, &fl, NULL, 0);
+	if (err) {
+		dst_release(dst);
+		return;
+	}
+
+	rt = (struct rt6_info *) dst;
+
+	if (rt->rt6i_flags & RTF_GATEWAY) {
+		ND_PRINTK2(KERN_WARNING
+			   "ICMPv6 Redirect: destination is not a neighbour.\n");
+		dst_release(dst);
+		return;
+	}
+	if (!xrlim_allow(dst, 1*HZ)) {
+		dst_release(dst);
+		return;
+	}
+
+	if (dev->addr_len) {
+		read_lock_bh(&neigh->lock);
+		if (neigh->nud_state & NUD_VALID) {
+			memcpy(ha_buf, neigh->ha, dev->addr_len);
+			read_unlock_bh(&neigh->lock);
+			ha = ha_buf;
+			len += ndisc_opt_addr_space(dev);
+		} else
+			read_unlock_bh(&neigh->lock);
+	}
+
+	rd_len = min_t(unsigned int,
+		     IPV6_MIN_MTU-sizeof(struct ipv6hdr)-len, skb->len + 8);
+	rd_len &= ~0x7;
+	len += rd_len;
+
+	buff = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev),
+				   1, &err);
+	if (buff == NULL) {
+		ND_PRINTK0(KERN_ERR
+			   "ICMPv6 Redirect: %s() failed to allocate an skb.\n",
+			   __FUNCTION__);
+		dst_release(dst);
+		return;
+	}
+
+	hlen = 0;
+
+	skb_reserve(buff, LL_RESERVED_SPACE(dev));
+	ip6_nd_hdr(sk, buff, dev, &saddr_buf, &skb->nh.ipv6h->saddr,
+		   IPPROTO_ICMPV6, len);
+
+	icmph = (struct icmp6hdr *)skb_put(buff, len);
+	buff->h.raw = (unsigned char*)icmph;
+
+	memset(icmph, 0, sizeof(struct icmp6hdr));
+	icmph->icmp6_type = NDISC_REDIRECT;
+
+	/*
+	 *	copy target and destination addresses
+	 */
+
+	addrp = (struct in6_addr *)(icmph + 1);
+	ipv6_addr_copy(addrp, target);
+	addrp++;
+	ipv6_addr_copy(addrp, &skb->nh.ipv6h->daddr);
+
+	opt = (u8*) (addrp + 1);
+
+	/*
+	 *	include target_address option
+	 */
+
+	if (ha)
+		opt = ndisc_fill_addr_option(opt, ND_OPT_TARGET_LL_ADDR, ha,
+					     dev->addr_len, dev->type);
+
+	/*
+	 *	build redirect option and copy skb over to the new packet.
+	 */
+
+	memset(opt, 0, 8);	
+	*(opt++) = ND_OPT_REDIRECT_HDR;
+	*(opt++) = (rd_len >> 3);
+	opt += 6;
+
+	memcpy(opt, skb->nh.ipv6h, rd_len - 8);
+
+	icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &skb->nh.ipv6h->saddr,
+					     len, IPPROTO_ICMPV6,
+					     csum_partial((u8 *) icmph, len, 0));
+
+	buff->dst = dst;
+	idev = in6_dev_get(dst->dev);
+	IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
+	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, buff, NULL, dst->dev, dst_output);
+	if (!err) {
+		ICMP6_INC_STATS(idev, ICMP6_MIB_OUTREDIRECTS);
+		ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
+	}
+
+	if (likely(idev != NULL))
+		in6_dev_put(idev);
+}
+
+static void pndisc_redo(struct sk_buff *skb)
+{
+	ndisc_rcv(skb);
+	kfree_skb(skb);
+}
+
+int ndisc_rcv(struct sk_buff *skb)
+{
+	struct nd_msg *msg;
+
+	if (!pskb_may_pull(skb, skb->len))
+		return 0;
+
+	msg = (struct nd_msg *) skb->h.raw;
+
+	__skb_push(skb, skb->data-skb->h.raw);
+
+	if (skb->nh.ipv6h->hop_limit != 255) {
+		ND_PRINTK2(KERN_WARNING
+			   "ICMPv6 NDISC: invalid hop-limit: %d\n",
+			   skb->nh.ipv6h->hop_limit);
+		return 0;
+	}
+
+	if (msg->icmph.icmp6_code != 0) {
+		ND_PRINTK2(KERN_WARNING 
+			   "ICMPv6 NDISC: invalid ICMPv6 code: %d\n",
+			   msg->icmph.icmp6_code);
+		return 0;
+	}
+
+	switch (msg->icmph.icmp6_type) {
+	case NDISC_NEIGHBOUR_SOLICITATION:
+		ndisc_recv_ns(skb);
+		break;
+
+	case NDISC_NEIGHBOUR_ADVERTISEMENT:
+		ndisc_recv_na(skb);
+		break;
+
+	case NDISC_ROUTER_SOLICITATION:
+		ndisc_recv_rs(skb);
+		break;
+
+	case NDISC_ROUTER_ADVERTISEMENT:
+		ndisc_router_discovery(skb);
+		break;
+
+	case NDISC_REDIRECT:
+		ndisc_redirect_rcv(skb);
+		break;
+	};
+
+	return 0;
+}
+
+static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	struct net_device *dev = ptr;
+
+	switch (event) {
+	case NETDEV_CHANGEADDR:
+		neigh_changeaddr(&nd_tbl, dev);
+		fib6_run_gc(~0UL);
+		break;
+	case NETDEV_DOWN:
+		neigh_ifdown(&nd_tbl, dev);
+		fib6_run_gc(~0UL);
+		break;
+	default:
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block ndisc_netdev_notifier = {
+	.notifier_call = ndisc_netdev_event,
+};
+
+#ifdef CONFIG_SYSCTL
+static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl,
+					 const char *func, const char *dev_name)
+{
+	static char warncomm[TASK_COMM_LEN];
+	static int warned;
+	if (strcmp(warncomm, current->comm) && warned < 5) {
+		strcpy(warncomm, current->comm);
+		printk(KERN_WARNING
+			"process `%s' is using deprecated sysctl (%s) "
+			"net.ipv6.neigh.%s.%s; "
+			"Use net.ipv6.neigh.%s.%s_ms "
+			"instead.\n",
+			warncomm, func,
+			dev_name, ctl->procname,
+			dev_name, ctl->procname);
+		warned++;
+	}
+}
+
+int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct net_device *dev = ctl->extra1;
+	struct inet6_dev *idev;
+	int ret;
+
+	if (ctl->ctl_name == NET_NEIGH_RETRANS_TIME ||
+	    ctl->ctl_name == NET_NEIGH_REACHABLE_TIME)
+		ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default");
+
+	switch (ctl->ctl_name) {
+	case NET_NEIGH_RETRANS_TIME:
+		ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+		break;
+	case NET_NEIGH_REACHABLE_TIME:
+		ret = proc_dointvec_jiffies(ctl, write,
+					    filp, buffer, lenp, ppos);
+		break;
+	case NET_NEIGH_RETRANS_TIME_MS:
+	case NET_NEIGH_REACHABLE_TIME_MS:
+		ret = proc_dointvec_ms_jiffies(ctl, write,
+					       filp, buffer, lenp, ppos);
+		break;
+	default:
+		ret = -1;
+	}
+
+	if (write && ret == 0 && dev && (idev = in6_dev_get(dev)) != NULL) {
+		if (ctl->ctl_name == NET_NEIGH_REACHABLE_TIME ||
+		    ctl->ctl_name == NET_NEIGH_REACHABLE_TIME_MS)
+			idev->nd_parms->reachable_time = neigh_rand_reach_time(idev->nd_parms->base_reachable_time);
+		idev->tstamp = jiffies;
+		inet6_ifinfo_notify(RTM_NEWLINK, idev);
+		in6_dev_put(idev);
+	}
+	return ret;
+}
+
+static int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name,
+					int nlen, void __user *oldval,
+					size_t __user *oldlenp,
+					void __user *newval, size_t newlen,
+					void **context)
+{
+	struct net_device *dev = ctl->extra1;
+	struct inet6_dev *idev;
+	int ret;
+
+	if (ctl->ctl_name == NET_NEIGH_RETRANS_TIME ||
+	    ctl->ctl_name == NET_NEIGH_REACHABLE_TIME)
+		ndisc_warn_deprecated_sysctl(ctl, "procfs", dev ? dev->name : "default");
+
+	switch (ctl->ctl_name) {
+	case NET_NEIGH_REACHABLE_TIME:
+		ret = sysctl_jiffies(ctl, name, nlen,
+				     oldval, oldlenp, newval, newlen,
+				     context);
+		break;
+	case NET_NEIGH_RETRANS_TIME_MS:
+	case NET_NEIGH_REACHABLE_TIME_MS:
+		 ret = sysctl_ms_jiffies(ctl, name, nlen,
+					 oldval, oldlenp, newval, newlen,
+					 context);
+		 break;
+	default:
+		ret = 0;
+	}
+
+	if (newval && newlen && ret > 0 &&
+	    dev && (idev = in6_dev_get(dev)) != NULL) {
+		if (ctl->ctl_name == NET_NEIGH_REACHABLE_TIME ||
+		    ctl->ctl_name == NET_NEIGH_REACHABLE_TIME_MS)
+			idev->nd_parms->reachable_time = neigh_rand_reach_time(idev->nd_parms->base_reachable_time);
+		idev->tstamp = jiffies;
+		inet6_ifinfo_notify(RTM_NEWLINK, idev);
+		in6_dev_put(idev);
+	}
+
+	return ret;
+}
+
+#endif
+
+int __init ndisc_init(struct net_proto_family *ops)
+{
+	struct ipv6_pinfo *np;
+	struct sock *sk;
+        int err;
+
+	err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, &ndisc_socket);
+	if (err < 0) {
+		ND_PRINTK0(KERN_ERR
+			   "ICMPv6 NDISC: Failed to initialize the control socket (err %d).\n", 
+			   err);
+		ndisc_socket = NULL; /* For safety. */
+		return err;
+	}
+
+	sk = ndisc_socket->sk;
+	np = inet6_sk(sk);
+	sk->sk_allocation = GFP_ATOMIC;
+	np->hop_limit = 255;
+	/* Do not loopback ndisc messages */
+	np->mc_loop = 0;
+	sk->sk_prot->unhash(sk);
+
+        /*
+         * Initialize the neighbour table
+         */
+	
+	neigh_table_init(&nd_tbl);
+
+#ifdef CONFIG_SYSCTL
+	neigh_sysctl_register(NULL, &nd_tbl.parms, NET_IPV6, NET_IPV6_NEIGH, 
+			      "ipv6",
+			      &ndisc_ifinfo_sysctl_change,
+			      &ndisc_ifinfo_sysctl_strategy);
+#endif
+
+	register_netdevice_notifier(&ndisc_netdev_notifier);
+	return 0;
+}
+
+void ndisc_cleanup(void)
+{
+#ifdef CONFIG_SYSCTL
+	neigh_sysctl_unregister(&nd_tbl.parms);
+#endif
+	neigh_table_clear(&nd_tbl);
+	sock_release(ndisc_socket);
+	ndisc_socket = NULL; /* For safety. */
+}
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
new file mode 100644
index 00000000000..77ec704c9ee
--- /dev/null
+++ b/net/ipv6/netfilter/Kconfig
@@ -0,0 +1,242 @@
+#
+# IP netfilter configuration
+#
+
+menu "IPv6: Netfilter Configuration (EXPERIMENTAL)"
+	depends on INET && IPV6 && NETFILTER && EXPERIMENTAL
+
+#tristate 'Connection tracking (required for masq/NAT)' CONFIG_IP6_NF_CONNTRACK
+#if [ "$CONFIG_IP6_NF_CONNTRACK" != "n" ]; then
+#  dep_tristate '  FTP protocol support' CONFIG_IP6_NF_FTP $CONFIG_IP6_NF_CONNTRACK
+#fi
+config IP6_NF_QUEUE
+	tristate "Userspace queueing via NETLINK"
+	---help---
+
+	  This option adds a queue handler to the kernel for IPv6
+	  packets which lets us to receive the filtered packets
+	  with QUEUE target using libiptc as we can do with
+	  the IPv4 now.
+
+	  (C) Fernando Anton 2001
+	  IPv64 Project - Work based in IPv64 draft by Arturo Azcorra.
+	  Universidad Carlos III de Madrid
+	  Universidad Politecnica de Alcala de Henares
+	  email: <fanton@it.uc3m.es>.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP6_NF_IPTABLES
+	tristate "IP6 tables support (required for filtering/masq/NAT)"
+	help
+	  ip6tables is a general, extensible packet identification framework.
+	  Currently only the packet filtering and packet mangling subsystem
+	  for IPv6 use this, but connection tracking is going to follow.
+	  Say 'Y' or 'M' here if you want to use either of those.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+# The simple matches.
+config IP6_NF_MATCH_LIMIT
+	tristate "limit match support"
+	depends on IP6_NF_IPTABLES
+	help
+	  limit matching allows you to control the rate at which a rule can be
+	  matched: mainly useful in combination with the LOG target ("LOG
+	  target support", below) and to avoid some Denial of Service attacks.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP6_NF_MATCH_MAC
+	tristate "MAC address match support"
+	depends on IP6_NF_IPTABLES
+	help
+	  mac matching allows you to match packets based on the source
+	  Ethernet address of the packet.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP6_NF_MATCH_RT
+	tristate "Routing header match support"
+	depends on IP6_NF_IPTABLES
+	help
+	  rt matching allows you to match packets based on the routing
+	  header of the packet.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP6_NF_MATCH_OPTS
+	tristate "Hop-by-hop and Dst opts header match support"
+	depends on IP6_NF_IPTABLES
+	help
+	  This allows one to match packets based on the hop-by-hop
+	  and destination options headers of a packet.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP6_NF_MATCH_FRAG
+	tristate "Fragmentation header match support"
+	depends on IP6_NF_IPTABLES
+	help
+	  frag matching allows you to match packets based on the fragmentation
+	  header of the packet.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP6_NF_MATCH_HL
+	tristate "HL match support"
+	depends on IP6_NF_IPTABLES
+	help
+	  HL matching allows you to match packets based on the hop
+	  limit of the packet.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP6_NF_MATCH_MULTIPORT
+	tristate "Multiple port match support"
+	depends on IP6_NF_IPTABLES
+	help
+	  Multiport matching allows you to match TCP or UDP packets based on
+	  a series of source or destination ports: normally a rule can only
+	  match a single range of ports.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP6_NF_MATCH_OWNER
+	tristate "Owner match support"
+	depends on IP6_NF_IPTABLES
+	help
+	  Packet owner matching allows you to match locally-generated packets
+	  based on who created them: the user, group, process or session.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+#  dep_tristate '  MAC address match support' CONFIG_IP6_NF_MATCH_MAC $CONFIG_IP6_NF_IPTABLES
+config IP6_NF_MATCH_MARK
+	tristate "netfilter MARK match support"
+	depends on IP6_NF_IPTABLES
+	help
+	  Netfilter mark matching allows you to match packets based on the
+	  `nfmark' value in the packet.  This can be set by the MARK target
+	  (see below).
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP6_NF_MATCH_IPV6HEADER
+	tristate "IPv6 Extension Headers Match"
+	depends on IP6_NF_IPTABLES
+	help
+	  This module allows one to match packets based upon
+	  the ipv6 extension headers.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP6_NF_MATCH_AHESP
+	tristate "AH/ESP match support"
+	depends on IP6_NF_IPTABLES
+	help
+	  This module allows one to match AH and ESP packets.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP6_NF_MATCH_LENGTH
+	tristate "Packet Length match support"
+	depends on IP6_NF_IPTABLES
+	help
+	  This option allows you to match the length of a packet against a
+	  specific value or range of values.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP6_NF_MATCH_EUI64
+	tristate "EUI64 address check"
+	depends on IP6_NF_IPTABLES
+	help
+	  This module performs checking on the IPv6 source address
+	  Compares the last 64 bits with the EUI64 (delivered
+	  from the MAC address) address
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP6_NF_MATCH_PHYSDEV
+	tristate "Physdev match support"
+	depends on IP6_NF_IPTABLES && BRIDGE_NETFILTER
+	help
+	  Physdev packet matching matches against the physical bridge ports
+	  the IP packet arrived on or will leave by.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+#  dep_tristate '  Multiple port match support' CONFIG_IP6_NF_MATCH_MULTIPORT $CONFIG_IP6_NF_IPTABLES
+#  dep_tristate '  TOS match support' CONFIG_IP6_NF_MATCH_TOS $CONFIG_IP6_NF_IPTABLES
+#  if [ "$CONFIG_IP6_NF_CONNTRACK" != "n" ]; then
+#    dep_tristate '  Connection state match support' CONFIG_IP6_NF_MATCH_STATE $CONFIG_IP6_NF_CONNTRACK $CONFIG_IP6_NF_IPTABLES 
+#  fi
+#  if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
+#    dep_tristate '  Unclean match support (EXPERIMENTAL)' CONFIG_IP6_NF_MATCH_UNCLEAN $CONFIG_IP6_NF_IPTABLES
+#    dep_tristate '  Owner match support (EXPERIMENTAL)' CONFIG_IP6_NF_MATCH_OWNER $CONFIG_IP6_NF_IPTABLES
+#  fi
+# The targets
+config IP6_NF_FILTER
+	tristate "Packet filtering"
+	depends on IP6_NF_IPTABLES
+	help
+	  Packet filtering defines a table `filter', which has a series of
+	  rules for simple packet filtering at local input, forwarding and
+	  local output.  See the man page for iptables(8).
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP6_NF_TARGET_LOG
+	tristate "LOG target support"
+	depends on IP6_NF_FILTER
+	help
+	  This option adds a `LOG' target, which allows you to create rules in
+	  any iptables table which records the packet header to the syslog.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+#  if [ "$CONFIG_IP6_NF_FILTER" != "n" ]; then
+#    dep_tristate '    REJECT target support' CONFIG_IP6_NF_TARGET_REJECT $CONFIG_IP6_NF_FILTER
+#    if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
+#      dep_tristate '    MIRROR target support (EXPERIMENTAL)' CONFIG_IP6_NF_TARGET_MIRROR $CONFIG_IP6_NF_FILTER
+#    fi
+#  fi
+config IP6_NF_MANGLE
+	tristate "Packet mangling"
+	depends on IP6_NF_IPTABLES
+	help
+	  This option adds a `mangle' table to iptables: see the man page for
+	  iptables(8).  This table is used for various packet alterations
+	  which can effect how the packet is routed.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+#    dep_tristate '    TOS target support' CONFIG_IP6_NF_TARGET_TOS $CONFIG_IP_NF_MANGLE
+config IP6_NF_TARGET_MARK
+	tristate "MARK target support"
+	depends on IP6_NF_MANGLE
+	help
+	  This option adds a `MARK' target, which allows you to create rules
+	  in the `mangle' table which alter the netfilter mark (nfmark) field
+	  associated with the packet packet prior to routing. This can change
+	  the routing method (see `Use netfilter MARK value as routing
+	  key') and can also be used by other subsystems to change their
+	  behavior.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+#dep_tristate '  LOG target support' CONFIG_IP6_NF_TARGET_LOG $CONFIG_IP6_NF_IPTABLES
+config IP6_NF_RAW
+	tristate  'raw table support (required for TRACE)'
+	depends on IP6_NF_IPTABLES
+	help
+	  This option adds a `raw' table to ip6tables. This table is the very
+	  first in the netfilter framework and hooks in at the PREROUTING
+	  and OUTPUT chains.
+	
+	  If you want to compile it as a module, say M here and read
+	  <file:Documentation/modules.txt>.  If unsure, say `N'.
+
+endmenu
+
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
new file mode 100644
index 00000000000..2e51714953b
--- /dev/null
+++ b/net/ipv6/netfilter/Makefile
@@ -0,0 +1,26 @@
+#
+# Makefile for the netfilter modules on top of IPv6.
+#
+
+# Link order matters here.
+obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o
+obj-$(CONFIG_IP6_NF_MATCH_LIMIT) += ip6t_limit.o
+obj-$(CONFIG_IP6_NF_MATCH_MARK) += ip6t_mark.o
+obj-$(CONFIG_IP6_NF_MATCH_LENGTH) += ip6t_length.o
+obj-$(CONFIG_IP6_NF_MATCH_MAC) += ip6t_mac.o
+obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o
+obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o ip6t_dst.o
+obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o
+obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o
+obj-$(CONFIG_IP6_NF_MATCH_AHESP) += ip6t_esp.o ip6t_ah.o
+obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
+obj-$(CONFIG_IP6_NF_MATCH_MULTIPORT) += ip6t_multiport.o
+obj-$(CONFIG_IP6_NF_MATCH_OWNER) += ip6t_owner.o
+obj-$(CONFIG_IP6_NF_MATCH_PHYSDEV) += ip6t_physdev.o
+obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o
+obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o
+obj-$(CONFIG_IP6_NF_TARGET_MARK) += ip6t_MARK.o
+obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o
+obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o
+obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
+obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
new file mode 100644
index 00000000000..c54830b8959
--- /dev/null
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -0,0 +1,741 @@
+/*
+ * This is a module which is used for queueing IPv6 packets and
+ * communicating with userspace via netlink.
+ *
+ * (C) 2001 Fernando Anton, this code is GPL.
+ *     IPv64 Project - Work based in IPv64 draft by Arturo Azcorra.
+ *     Universidad Carlos III de Madrid - Leganes (Madrid) - Spain
+ *     Universidad Politecnica de Alcala de Henares - Alcala de H. (Madrid) - Spain
+ *     email: fanton@it.uc3m.es
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * 2001-11-06: First try. Working with ip_queue.c for IPv4 and trying
+ *             to adapt it to IPv6
+ *             HEAVILY based in ipqueue.c by James Morris. It's just
+ *             a little modified version of it, so he's nearly the
+ *             real coder of this.
+ *             Few changes needed, mainly the hard_routing code and
+ *             the netlink socket protocol (we're NETLINK_IP6_FW).
+ * 2002-06-25: Code cleanup. [JM: ported cleanup over from ip_queue.c]
+ * 2005-02-04: Added /proc counter for dropped packets; fixed so
+ *             packets aren't delivered to user space if they're going
+ *             to be dropped.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/ipv6.h>
+#include <linux/notifier.h>
+#include <linux/netdevice.h>
+#include <linux/netfilter.h>
+#include <linux/netlink.h>
+#include <linux/spinlock.h>
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include <net/sock.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <linux/netfilter_ipv4/ip_queue.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+#define IPQ_QMAX_DEFAULT 1024
+#define IPQ_PROC_FS_NAME "ip6_queue"
+#define NET_IPQ_QMAX 2088
+#define NET_IPQ_QMAX_NAME "ip6_queue_maxlen"
+
+struct ipq_rt_info {
+	struct in6_addr daddr;
+	struct in6_addr saddr;
+};
+
+struct ipq_queue_entry {
+	struct list_head list;
+	struct nf_info *info;
+	struct sk_buff *skb;
+	struct ipq_rt_info rt_info;
+};
+
+typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long);
+
+static unsigned char copy_mode = IPQ_COPY_NONE;
+static unsigned int queue_maxlen = IPQ_QMAX_DEFAULT;
+static DEFINE_RWLOCK(queue_lock);
+static int peer_pid;
+static unsigned int copy_range;
+static unsigned int queue_total;
+static unsigned int queue_dropped = 0;
+static unsigned int queue_user_dropped = 0;
+static struct sock *ipqnl;
+static LIST_HEAD(queue_list);
+static DECLARE_MUTEX(ipqnl_sem);
+
+static void
+ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
+{
+	nf_reinject(entry->skb, entry->info, verdict);
+	kfree(entry);
+}
+
+static inline void
+__ipq_enqueue_entry(struct ipq_queue_entry *entry)
+{
+       list_add(&entry->list, &queue_list);
+       queue_total++;
+}
+
+/*
+ * Find and return a queued entry matched by cmpfn, or return the last
+ * entry if cmpfn is NULL.
+ */
+static inline struct ipq_queue_entry *
+__ipq_find_entry(ipq_cmpfn cmpfn, unsigned long data)
+{
+	struct list_head *p;
+
+	list_for_each_prev(p, &queue_list) {
+		struct ipq_queue_entry *entry = (struct ipq_queue_entry *)p;
+		
+		if (!cmpfn || cmpfn(entry, data))
+			return entry;
+	}
+	return NULL;
+}
+
+static inline void
+__ipq_dequeue_entry(struct ipq_queue_entry *entry)
+{
+	list_del(&entry->list);
+	queue_total--;
+}
+
+static inline struct ipq_queue_entry *
+__ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data)
+{
+	struct ipq_queue_entry *entry;
+
+	entry = __ipq_find_entry(cmpfn, data);
+	if (entry == NULL)
+		return NULL;
+
+	__ipq_dequeue_entry(entry);
+	return entry;
+}
+
+
+static inline void
+__ipq_flush(int verdict)
+{
+	struct ipq_queue_entry *entry;
+	
+	while ((entry = __ipq_find_dequeue_entry(NULL, 0)))
+		ipq_issue_verdict(entry, verdict);
+}
+
+static inline int
+__ipq_set_mode(unsigned char mode, unsigned int range)
+{
+	int status = 0;
+	
+	switch(mode) {
+	case IPQ_COPY_NONE:
+	case IPQ_COPY_META:
+		copy_mode = mode;
+		copy_range = 0;
+		break;
+		
+	case IPQ_COPY_PACKET:
+		copy_mode = mode;
+		copy_range = range;
+		if (copy_range > 0xFFFF)
+			copy_range = 0xFFFF;
+		break;
+		
+	default:
+		status = -EINVAL;
+
+	}
+	return status;
+}
+
+static inline void
+__ipq_reset(void)
+{
+	peer_pid = 0;
+	net_disable_timestamp();
+	__ipq_set_mode(IPQ_COPY_NONE, 0);
+	__ipq_flush(NF_DROP);
+}
+
+static struct ipq_queue_entry *
+ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data)
+{
+	struct ipq_queue_entry *entry;
+	
+	write_lock_bh(&queue_lock);
+	entry = __ipq_find_dequeue_entry(cmpfn, data);
+	write_unlock_bh(&queue_lock);
+	return entry;
+}
+
+static void
+ipq_flush(int verdict)
+{
+	write_lock_bh(&queue_lock);
+	__ipq_flush(verdict);
+	write_unlock_bh(&queue_lock);
+}
+
+static struct sk_buff *
+ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
+{
+	unsigned char *old_tail;
+	size_t size = 0;
+	size_t data_len = 0;
+	struct sk_buff *skb;
+	struct ipq_packet_msg *pmsg;
+	struct nlmsghdr *nlh;
+
+	read_lock_bh(&queue_lock);
+	
+	switch (copy_mode) {
+	case IPQ_COPY_META:
+	case IPQ_COPY_NONE:
+		size = NLMSG_SPACE(sizeof(*pmsg));
+		data_len = 0;
+		break;
+	
+	case IPQ_COPY_PACKET:
+		if (copy_range == 0 || copy_range > entry->skb->len)
+			data_len = entry->skb->len;
+		else
+			data_len = copy_range;
+		
+		size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
+		break;
+	
+	default:
+		*errp = -EINVAL;
+		read_unlock_bh(&queue_lock);
+		return NULL;
+	}
+
+	read_unlock_bh(&queue_lock);
+
+	skb = alloc_skb(size, GFP_ATOMIC);
+	if (!skb)
+		goto nlmsg_failure;
+		
+	old_tail= skb->tail;
+	nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
+	pmsg = NLMSG_DATA(nlh);
+	memset(pmsg, 0, sizeof(*pmsg));
+
+	pmsg->packet_id       = (unsigned long )entry;
+	pmsg->data_len        = data_len;
+	pmsg->timestamp_sec   = entry->skb->stamp.tv_sec;
+	pmsg->timestamp_usec  = entry->skb->stamp.tv_usec;
+	pmsg->mark            = entry->skb->nfmark;
+	pmsg->hook            = entry->info->hook;
+	pmsg->hw_protocol     = entry->skb->protocol;
+	
+	if (entry->info->indev)
+		strcpy(pmsg->indev_name, entry->info->indev->name);
+	else
+		pmsg->indev_name[0] = '\0';
+	
+	if (entry->info->outdev)
+		strcpy(pmsg->outdev_name, entry->info->outdev->name);
+	else
+		pmsg->outdev_name[0] = '\0';
+	
+	if (entry->info->indev && entry->skb->dev) {
+		pmsg->hw_type = entry->skb->dev->type;
+		if (entry->skb->dev->hard_header_parse)
+			pmsg->hw_addrlen =
+				entry->skb->dev->hard_header_parse(entry->skb,
+				                                   pmsg->hw_addr);
+	}
+	
+	if (data_len)
+		if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len))
+			BUG();
+		
+	nlh->nlmsg_len = skb->tail - old_tail;
+	return skb;
+
+nlmsg_failure:
+	if (skb)
+		kfree_skb(skb);
+	*errp = -EINVAL;
+	printk(KERN_ERR "ip6_queue: error creating packet message\n");
+	return NULL;
+}
+
+static int
+ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data)
+{
+	int status = -EINVAL;
+	struct sk_buff *nskb;
+	struct ipq_queue_entry *entry;
+
+	if (copy_mode == IPQ_COPY_NONE)
+		return -EAGAIN;
+
+	entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
+	if (entry == NULL) {
+		printk(KERN_ERR "ip6_queue: OOM in ipq_enqueue_packet()\n");
+		return -ENOMEM;
+	}
+
+	entry->info = info;
+	entry->skb = skb;
+
+	if (entry->info->hook == NF_IP_LOCAL_OUT) {
+		struct ipv6hdr *iph = skb->nh.ipv6h;
+
+		entry->rt_info.daddr = iph->daddr;
+		entry->rt_info.saddr = iph->saddr;
+	}
+
+	nskb = ipq_build_packet_message(entry, &status);
+	if (nskb == NULL)
+		goto err_out_free;
+		
+	write_lock_bh(&queue_lock);
+	
+	if (!peer_pid)
+		goto err_out_free_nskb; 
+
+	if (queue_total >= queue_maxlen) {
+                queue_dropped++;
+		status = -ENOSPC;
+		if (net_ratelimit())
+		        printk (KERN_WARNING "ip6_queue: fill at %d entries, "
+				"dropping packet(s).  Dropped: %d\n", queue_total,
+				queue_dropped);
+		goto err_out_free_nskb;
+	}
+
+ 	/* netlink_unicast will either free the nskb or attach it to a socket */ 
+	status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
+	if (status < 0) {
+ 	        queue_user_dropped++;
+		goto err_out_unlock;
+	}
+	
+	__ipq_enqueue_entry(entry);
+
+	write_unlock_bh(&queue_lock);
+	return status;
+	
+err_out_free_nskb:
+	kfree_skb(nskb); 
+	
+err_out_unlock:
+	write_unlock_bh(&queue_lock);
+
+err_out_free:
+	kfree(entry);
+	return status;
+}
+
+static int
+ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
+{
+	int diff;
+	struct ipv6hdr *user_iph = (struct ipv6hdr *)v->payload;
+
+	if (v->data_len < sizeof(*user_iph))
+		return 0;
+	diff = v->data_len - e->skb->len;
+	if (diff < 0)
+		skb_trim(e->skb, v->data_len);
+	else if (diff > 0) {
+		if (v->data_len > 0xFFFF)
+			return -EINVAL;
+		if (diff > skb_tailroom(e->skb)) {
+			struct sk_buff *newskb;
+			
+			newskb = skb_copy_expand(e->skb,
+			                         skb_headroom(e->skb),
+			                         diff,
+			                         GFP_ATOMIC);
+			if (newskb == NULL) {
+				printk(KERN_WARNING "ip6_queue: OOM "
+				      "in mangle, dropping packet\n");
+				return -ENOMEM;
+			}
+			if (e->skb->sk)
+				skb_set_owner_w(newskb, e->skb->sk);
+			kfree_skb(e->skb);
+			e->skb = newskb;
+		}
+		skb_put(e->skb, diff);
+	}
+	if (!skb_ip_make_writable(&e->skb, v->data_len))
+		return -ENOMEM;
+	memcpy(e->skb->data, v->payload, v->data_len);
+	e->skb->nfcache |= NFC_ALTERED;
+
+	/*
+	 * Extra routing may needed on local out, as the QUEUE target never
+	 * returns control to the table.
+         * Not a nice way to cmp, but works
+	 */
+	if (e->info->hook == NF_IP_LOCAL_OUT) {
+		struct ipv6hdr *iph = e->skb->nh.ipv6h;
+		if (!ipv6_addr_equal(&iph->daddr, &e->rt_info.daddr) ||
+		    !ipv6_addr_equal(&iph->saddr, &e->rt_info.saddr))
+			return ip6_route_me_harder(e->skb);
+	}
+	return 0;
+}
+
+static inline int
+id_cmp(struct ipq_queue_entry *e, unsigned long id)
+{
+	return (id == (unsigned long )e);
+}
+
+static int
+ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
+{
+	struct ipq_queue_entry *entry;
+
+	if (vmsg->value > NF_MAX_VERDICT)
+		return -EINVAL;
+
+	entry = ipq_find_dequeue_entry(id_cmp, vmsg->id);
+	if (entry == NULL)
+		return -ENOENT;
+	else {
+		int verdict = vmsg->value;
+		
+		if (vmsg->data_len && vmsg->data_len == len)
+			if (ipq_mangle_ipv6(vmsg, entry) < 0)
+				verdict = NF_DROP;
+		
+		ipq_issue_verdict(entry, verdict);
+		return 0;
+	}
+}
+
+static int
+ipq_set_mode(unsigned char mode, unsigned int range)
+{
+	int status;
+
+	write_lock_bh(&queue_lock);
+	status = __ipq_set_mode(mode, range);
+	write_unlock_bh(&queue_lock);
+	return status;
+}
+
+static int
+ipq_receive_peer(struct ipq_peer_msg *pmsg,
+                 unsigned char type, unsigned int len)
+{
+	int status = 0;
+
+	if (len < sizeof(*pmsg))
+		return -EINVAL;
+
+	switch (type) {
+	case IPQM_MODE:
+		status = ipq_set_mode(pmsg->msg.mode.value,
+		                      pmsg->msg.mode.range);
+		break;
+		
+	case IPQM_VERDICT:
+		if (pmsg->msg.verdict.value > NF_MAX_VERDICT)
+			status = -EINVAL;
+		else
+			status = ipq_set_verdict(&pmsg->msg.verdict,
+			                         len - sizeof(*pmsg));
+			break;
+	default:
+		status = -EINVAL;
+	}
+	return status;
+}
+
+static int
+dev_cmp(struct ipq_queue_entry *entry, unsigned long ifindex)
+{
+	if (entry->info->indev)
+		if (entry->info->indev->ifindex == ifindex)
+			return 1;
+			
+	if (entry->info->outdev)
+		if (entry->info->outdev->ifindex == ifindex)
+			return 1;
+
+	return 0;
+}
+
+static void
+ipq_dev_drop(int ifindex)
+{
+	struct ipq_queue_entry *entry;
+	
+	while ((entry = ipq_find_dequeue_entry(dev_cmp, ifindex)) != NULL)
+		ipq_issue_verdict(entry, NF_DROP);
+}
+
+#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
+
+static inline void
+ipq_rcv_skb(struct sk_buff *skb)
+{
+	int status, type, pid, flags, nlmsglen, skblen;
+	struct nlmsghdr *nlh;
+
+	skblen = skb->len;
+	if (skblen < sizeof(*nlh))
+		return;
+
+	nlh = (struct nlmsghdr *)skb->data;
+	nlmsglen = nlh->nlmsg_len;
+	if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
+		return;
+
+	pid = nlh->nlmsg_pid;
+	flags = nlh->nlmsg_flags;
+	
+	if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI)
+		RCV_SKB_FAIL(-EINVAL);
+		
+	if (flags & MSG_TRUNC)
+		RCV_SKB_FAIL(-ECOMM);
+		
+	type = nlh->nlmsg_type;
+	if (type < NLMSG_NOOP || type >= IPQM_MAX)
+		RCV_SKB_FAIL(-EINVAL);
+		
+	if (type <= IPQM_BASE)
+		return;
+	
+	if (security_netlink_recv(skb))
+		RCV_SKB_FAIL(-EPERM);	
+
+	write_lock_bh(&queue_lock);
+	
+	if (peer_pid) {
+		if (peer_pid != pid) {
+			write_unlock_bh(&queue_lock);
+			RCV_SKB_FAIL(-EBUSY);
+		}
+	} else {
+		net_enable_timestamp();
+		peer_pid = pid;
+	}
+		
+	write_unlock_bh(&queue_lock);
+	
+	status = ipq_receive_peer(NLMSG_DATA(nlh), type,
+	                          skblen - NLMSG_LENGTH(0));
+	if (status < 0)
+		RCV_SKB_FAIL(status);
+		
+	if (flags & NLM_F_ACK)
+		netlink_ack(skb, nlh, 0);
+        return;
+}
+
+static void
+ipq_rcv_sk(struct sock *sk, int len)
+{
+	do {
+		struct sk_buff *skb;
+
+		if (down_trylock(&ipqnl_sem))
+			return;
+			
+		while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+			ipq_rcv_skb(skb);
+			kfree_skb(skb);
+		}
+		
+		up(&ipqnl_sem);
+
+	} while (ipqnl && ipqnl->sk_receive_queue.qlen);
+}
+
+static int
+ipq_rcv_dev_event(struct notifier_block *this,
+                  unsigned long event, void *ptr)
+{
+	struct net_device *dev = ptr;
+
+	/* Drop any packets associated with the downed device */
+	if (event == NETDEV_DOWN)
+		ipq_dev_drop(dev->ifindex);
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block ipq_dev_notifier = {
+	.notifier_call	= ipq_rcv_dev_event,
+};
+
+static int
+ipq_rcv_nl_event(struct notifier_block *this,
+                 unsigned long event, void *ptr)
+{
+	struct netlink_notify *n = ptr;
+
+	if (event == NETLINK_URELEASE &&
+	    n->protocol == NETLINK_IP6_FW && n->pid) {
+		write_lock_bh(&queue_lock);
+		if (n->pid == peer_pid)
+			__ipq_reset();
+		write_unlock_bh(&queue_lock);
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block ipq_nl_notifier = {
+	.notifier_call	= ipq_rcv_nl_event,
+};
+
+static struct ctl_table_header *ipq_sysctl_header;
+
+static ctl_table ipq_table[] = {
+	{
+		.ctl_name	= NET_IPQ_QMAX,
+		.procname	= NET_IPQ_QMAX_NAME,
+		.data		= &queue_maxlen,
+		.maxlen		= sizeof(queue_maxlen),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+ 	{ .ctl_name = 0 }
+};
+
+static ctl_table ipq_dir_table[] = {
+	{
+		.ctl_name	= NET_IPV6,
+		.procname	= "ipv6",
+		.mode		= 0555,
+		.child		= ipq_table
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table ipq_root_table[] = {
+	{
+		.ctl_name	= CTL_NET,
+		.procname	= "net",
+		.mode		= 0555,
+		.child		= ipq_dir_table
+	},
+	{ .ctl_name = 0 }
+};
+
+static int
+ipq_get_info(char *buffer, char **start, off_t offset, int length)
+{
+	int len;
+
+	read_lock_bh(&queue_lock);
+	
+	len = sprintf(buffer,
+	              "Peer PID          : %d\n"
+	              "Copy mode         : %hu\n"
+	              "Copy range        : %u\n"
+	              "Queue length      : %u\n"
+	              "Queue max. length : %u\n"
+		      "Queue dropped     : %u\n"
+		      "Netfilter dropped : %u\n",
+	              peer_pid,
+	              copy_mode,
+	              copy_range,
+	              queue_total,
+	              queue_maxlen,
+		      queue_dropped,
+		      queue_user_dropped);
+
+	read_unlock_bh(&queue_lock);
+	
+	*start = buffer + offset;
+	len -= offset;
+	if (len > length)
+		len = length;
+	else if (len < 0)
+		len = 0;
+	return len;
+}
+
+static int
+init_or_cleanup(int init)
+{
+	int status = -ENOMEM;
+	struct proc_dir_entry *proc;
+	
+	if (!init)
+		goto cleanup;
+
+	netlink_register_notifier(&ipq_nl_notifier);
+	ipqnl = netlink_kernel_create(NETLINK_IP6_FW, ipq_rcv_sk);
+	if (ipqnl == NULL) {
+		printk(KERN_ERR "ip6_queue: failed to create netlink socket\n");
+		goto cleanup_netlink_notifier;
+	}
+
+	proc = proc_net_create(IPQ_PROC_FS_NAME, 0, ipq_get_info);
+	if (proc)
+		proc->owner = THIS_MODULE;
+	else {
+		printk(KERN_ERR "ip6_queue: failed to create proc entry\n");
+		goto cleanup_ipqnl;
+	}
+	
+	register_netdevice_notifier(&ipq_dev_notifier);
+	ipq_sysctl_header = register_sysctl_table(ipq_root_table, 0);
+	
+	status = nf_register_queue_handler(PF_INET6, ipq_enqueue_packet, NULL);
+	if (status < 0) {
+		printk(KERN_ERR "ip6_queue: failed to register queue handler\n");
+		goto cleanup_sysctl;
+	}
+	return status;
+
+cleanup:
+	nf_unregister_queue_handler(PF_INET6);
+	synchronize_net();
+	ipq_flush(NF_DROP);
+	
+cleanup_sysctl:
+	unregister_sysctl_table(ipq_sysctl_header);
+	unregister_netdevice_notifier(&ipq_dev_notifier);
+	proc_net_remove(IPQ_PROC_FS_NAME);
+	
+cleanup_ipqnl:
+	sock_release(ipqnl->sk_socket);
+	down(&ipqnl_sem);
+	up(&ipqnl_sem);
+	
+cleanup_netlink_notifier:
+	netlink_unregister_notifier(&ipq_nl_notifier);
+	return status;
+}
+
+static int __init init(void)
+{
+	
+	return init_or_cleanup(1);
+}
+
+static void __exit fini(void)
+{
+	init_or_cleanup(0);
+}
+
+MODULE_DESCRIPTION("IPv6 packet queue handler");
+MODULE_LICENSE("GPL");
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
new file mode 100644
index 00000000000..c735276fdd5
--- /dev/null
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -0,0 +1,1970 @@
+/*
+ * Packet matching code.
+ *
+ * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
+ * Copyright (C) 2000-2002 Netfilter core team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
+ * 	- increase module usage count as soon as we have rules inside
+ * 	  a table
+ * 06 Jun 2002 Andras Kis-Szabo <kisza@sch.bme.hu>
+ *      - new extension header parser code
+ */
+#include <linux/config.h>
+#include <linux/skbuff.h>
+#include <linux/kmod.h>
+#include <linux/vmalloc.h>
+#include <linux/netdevice.h>
+#include <linux/module.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/icmpv6.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <asm/uaccess.h>
+#include <asm/semaphore.h>
+#include <linux/proc_fs.h>
+
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("IPv6 packet filter");
+
+#define IPV6_HDR_LEN	(sizeof(struct ipv6hdr))
+#define IPV6_OPTHDR_LEN	(sizeof(struct ipv6_opt_hdr))
+
+/*#define DEBUG_IP_FIREWALL*/
+/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
+/*#define DEBUG_IP_FIREWALL_USER*/
+
+#ifdef DEBUG_IP_FIREWALL
+#define dprintf(format, args...)  printk(format , ## args)
+#else
+#define dprintf(format, args...)
+#endif
+
+#ifdef DEBUG_IP_FIREWALL_USER
+#define duprintf(format, args...) printk(format , ## args)
+#else
+#define duprintf(format, args...)
+#endif
+
+#ifdef CONFIG_NETFILTER_DEBUG
+#define IP_NF_ASSERT(x)						\
+do {								\
+	if (!(x))						\
+		printk("IP_NF_ASSERT: %s:%s:%u\n",		\
+		       __FUNCTION__, __FILE__, __LINE__);	\
+} while(0)
+#else
+#define IP_NF_ASSERT(x)
+#endif
+#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
+
+static DECLARE_MUTEX(ip6t_mutex);
+
+/* Must have mutex */
+#define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ip6t_mutex) != 0)
+#define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ip6t_mutex) != 0)
+#include <linux/netfilter_ipv4/lockhelp.h>
+#include <linux/netfilter_ipv4/listhelp.h>
+
+#if 0
+/* All the better to debug you with... */
+#define static
+#define inline
+#endif
+
+/* Locking is simple: we assume at worst case there will be one packet
+   in user context and one from bottom halves (or soft irq if Alexey's
+   softnet patch was applied).
+
+   We keep a set of rules for each CPU, so we can avoid write-locking
+   them; doing a readlock_bh() stops packets coming through if we're
+   in user context.
+
+   To be cache friendly on SMP, we arrange them like so:
+   [ n-entries ]
+   ... cache-align padding ...
+   [ n-entries ]
+
+   Hence the start of any table is given by get_table() below.  */
+
+/* The table itself */
+struct ip6t_table_info
+{
+	/* Size per table */
+	unsigned int size;
+	/* Number of entries: FIXME. --RR */
+	unsigned int number;
+	/* Initial number of entries. Needed for module usage count */
+	unsigned int initial_entries;
+
+	/* Entry points and underflows */
+	unsigned int hook_entry[NF_IP6_NUMHOOKS];
+	unsigned int underflow[NF_IP6_NUMHOOKS];
+
+	/* ip6t_entry tables: one per CPU */
+	char entries[0] ____cacheline_aligned;
+};
+
+static LIST_HEAD(ip6t_target);
+static LIST_HEAD(ip6t_match);
+static LIST_HEAD(ip6t_tables);
+#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
+
+#ifdef CONFIG_SMP
+#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
+#else
+#define TABLE_OFFSET(t,p) 0
+#endif
+
+#if 0
+#define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
+#define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
+#define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
+#endif
+
+static int ip6_masked_addrcmp(struct in6_addr addr1, struct in6_addr mask,
+			      struct in6_addr addr2)
+{
+	int i;
+	for( i = 0; i < 16; i++){
+		if((addr1.s6_addr[i] & mask.s6_addr[i]) != 
+		   (addr2.s6_addr[i] & mask.s6_addr[i]))
+			return 1;
+	}
+	return 0;
+}
+
+/* Check for an extension */
+int 
+ip6t_ext_hdr(u8 nexthdr)
+{
+        return ( (nexthdr == IPPROTO_HOPOPTS)   ||
+                 (nexthdr == IPPROTO_ROUTING)   ||
+                 (nexthdr == IPPROTO_FRAGMENT)  ||
+                 (nexthdr == IPPROTO_ESP)       ||
+                 (nexthdr == IPPROTO_AH)        ||
+                 (nexthdr == IPPROTO_NONE)      ||
+                 (nexthdr == IPPROTO_DSTOPTS) );
+}
+
+/* Returns whether matches rule or not. */
+static inline int
+ip6_packet_match(const struct sk_buff *skb,
+		 const char *indev,
+		 const char *outdev,
+		 const struct ip6t_ip6 *ip6info,
+		 unsigned int *protoff,
+		 int *fragoff)
+{
+	size_t i;
+	unsigned long ret;
+	const struct ipv6hdr *ipv6 = skb->nh.ipv6h;
+
+#define FWINV(bool,invflg) ((bool) ^ !!(ip6info->invflags & invflg))
+
+	if (FWINV(ip6_masked_addrcmp(ipv6->saddr,ip6info->smsk,ip6info->src),
+		  IP6T_INV_SRCIP)
+	    || FWINV(ip6_masked_addrcmp(ipv6->daddr,ip6info->dmsk,ip6info->dst),
+		     IP6T_INV_DSTIP)) {
+		dprintf("Source or dest mismatch.\n");
+/*
+		dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr,
+			ipinfo->smsk.s_addr, ipinfo->src.s_addr,
+			ipinfo->invflags & IP6T_INV_SRCIP ? " (INV)" : "");
+		dprintf("DST: %u. Mask: %u. Target: %u.%s\n", ip->daddr,
+			ipinfo->dmsk.s_addr, ipinfo->dst.s_addr,
+			ipinfo->invflags & IP6T_INV_DSTIP ? " (INV)" : "");*/
+		return 0;
+	}
+
+	/* Look for ifname matches; this should unroll nicely. */
+	for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
+		ret |= (((const unsigned long *)indev)[i]
+			^ ((const unsigned long *)ip6info->iniface)[i])
+			& ((const unsigned long *)ip6info->iniface_mask)[i];
+	}
+
+	if (FWINV(ret != 0, IP6T_INV_VIA_IN)) {
+		dprintf("VIA in mismatch (%s vs %s).%s\n",
+			indev, ip6info->iniface,
+			ip6info->invflags&IP6T_INV_VIA_IN ?" (INV)":"");
+		return 0;
+	}
+
+	for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
+		ret |= (((const unsigned long *)outdev)[i]
+			^ ((const unsigned long *)ip6info->outiface)[i])
+			& ((const unsigned long *)ip6info->outiface_mask)[i];
+	}
+
+	if (FWINV(ret != 0, IP6T_INV_VIA_OUT)) {
+		dprintf("VIA out mismatch (%s vs %s).%s\n",
+			outdev, ip6info->outiface,
+			ip6info->invflags&IP6T_INV_VIA_OUT ?" (INV)":"");
+		return 0;
+	}
+
+/* ... might want to do something with class and flowlabel here ... */
+
+	/* look for the desired protocol header */
+	if((ip6info->flags & IP6T_F_PROTO)) {
+		u_int8_t currenthdr = ipv6->nexthdr;
+		struct ipv6_opt_hdr _hdr, *hp;
+		u_int16_t ptr;		/* Header offset in skb */
+		u_int16_t hdrlen;	/* Header */
+		u_int16_t _fragoff = 0, *fp = NULL;
+
+		ptr = IPV6_HDR_LEN;
+
+		while (ip6t_ext_hdr(currenthdr)) {
+	                /* Is there enough space for the next ext header? */
+	                if (skb->len - ptr < IPV6_OPTHDR_LEN)
+	                        return 0;
+
+			/* NONE or ESP: there isn't protocol part */
+			/* If we want to count these packets in '-p all',
+			 * we will change the return 0 to 1*/
+			if ((currenthdr == IPPROTO_NONE) || 
+				(currenthdr == IPPROTO_ESP))
+				break;
+
+			hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
+			BUG_ON(hp == NULL);
+
+			/* Size calculation */
+	                if (currenthdr == IPPROTO_FRAGMENT) {
+				fp = skb_header_pointer(skb,
+						   ptr+offsetof(struct frag_hdr,
+								frag_off),
+						   sizeof(_fragoff),
+						   &_fragoff);
+				if (fp == NULL)
+					return 0;
+
+				_fragoff = ntohs(*fp) & ~0x7;
+	                        hdrlen = 8;
+	                } else if (currenthdr == IPPROTO_AH)
+	                        hdrlen = (hp->hdrlen+2)<<2;
+	                else
+	                        hdrlen = ipv6_optlen(hp);
+
+			currenthdr = hp->nexthdr;
+	                ptr += hdrlen;
+			/* ptr is too large */
+	                if ( ptr > skb->len ) 
+				return 0;
+			if (_fragoff) {
+				if (ip6t_ext_hdr(currenthdr))
+					return 0;
+				break;
+			}
+		}
+
+		*protoff = ptr;
+		*fragoff = _fragoff;
+
+		/* currenthdr contains the protocol header */
+
+		dprintf("Packet protocol %hi ?= %s%hi.\n",
+				currenthdr, 
+				ip6info->invflags & IP6T_INV_PROTO ? "!":"",
+				ip6info->proto);
+
+		if (ip6info->proto == currenthdr) {
+			if(ip6info->invflags & IP6T_INV_PROTO) {
+				return 0;
+			}
+			return 1;
+		}
+
+		/* We need match for the '-p all', too! */
+		if ((ip6info->proto != 0) &&
+			!(ip6info->invflags & IP6T_INV_PROTO))
+			return 0;
+	}
+	return 1;
+}
+
+/* should be ip6 safe */
+static inline int 
+ip6_checkentry(const struct ip6t_ip6 *ipv6)
+{
+	if (ipv6->flags & ~IP6T_F_MASK) {
+		duprintf("Unknown flag bits set: %08X\n",
+			 ipv6->flags & ~IP6T_F_MASK);
+		return 0;
+	}
+	if (ipv6->invflags & ~IP6T_INV_MASK) {
+		duprintf("Unknown invflag bits set: %08X\n",
+			 ipv6->invflags & ~IP6T_INV_MASK);
+		return 0;
+	}
+	return 1;
+}
+
+static unsigned int
+ip6t_error(struct sk_buff **pskb,
+	  const struct net_device *in,
+	  const struct net_device *out,
+	  unsigned int hooknum,
+	  const void *targinfo,
+	  void *userinfo)
+{
+	if (net_ratelimit())
+		printk("ip6_tables: error: `%s'\n", (char *)targinfo);
+
+	return NF_DROP;
+}
+
+static inline
+int do_match(struct ip6t_entry_match *m,
+	     const struct sk_buff *skb,
+	     const struct net_device *in,
+	     const struct net_device *out,
+	     int offset,
+	     unsigned int protoff,
+	     int *hotdrop)
+{
+	/* Stop iteration if it doesn't match */
+	if (!m->u.kernel.match->match(skb, in, out, m->data,
+				      offset, protoff, hotdrop))
+		return 1;
+	else
+		return 0;
+}
+
+static inline struct ip6t_entry *
+get_entry(void *base, unsigned int offset)
+{
+	return (struct ip6t_entry *)(base + offset);
+}
+
+/* Returns one of the generic firewall policies, like NF_ACCEPT. */
+unsigned int
+ip6t_do_table(struct sk_buff **pskb,
+	      unsigned int hook,
+	      const struct net_device *in,
+	      const struct net_device *out,
+	      struct ip6t_table *table,
+	      void *userdata)
+{
+	static const char nulldevname[IFNAMSIZ];
+	int offset = 0;
+	unsigned int protoff = 0;
+	int hotdrop = 0;
+	/* Initializing verdict to NF_DROP keeps gcc happy. */
+	unsigned int verdict = NF_DROP;
+	const char *indev, *outdev;
+	void *table_base;
+	struct ip6t_entry *e, *back;
+
+	/* Initialization */
+	indev = in ? in->name : nulldevname;
+	outdev = out ? out->name : nulldevname;
+
+	/* We handle fragments by dealing with the first fragment as
+	 * if it was a normal packet.  All other fragments are treated
+	 * normally, except that they will NEVER match rules that ask
+	 * things we don't know, ie. tcp syn flag or ports).  If the
+	 * rule is also a fragment-specific rule, non-fragments won't
+	 * match it. */
+
+	read_lock_bh(&table->lock);
+	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
+	table_base = (void *)table->private->entries
+		+ TABLE_OFFSET(table->private, smp_processor_id());
+	e = get_entry(table_base, table->private->hook_entry[hook]);
+
+#ifdef CONFIG_NETFILTER_DEBUG
+	/* Check noone else using our table */
+	if (((struct ip6t_entry *)table_base)->comefrom != 0xdead57ac
+	    && ((struct ip6t_entry *)table_base)->comefrom != 0xeeeeeeec) {
+		printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
+		       smp_processor_id(),
+		       table->name,
+		       &((struct ip6t_entry *)table_base)->comefrom,
+		       ((struct ip6t_entry *)table_base)->comefrom);
+	}
+	((struct ip6t_entry *)table_base)->comefrom = 0x57acc001;
+#endif
+
+	/* For return from builtin chain */
+	back = get_entry(table_base, table->private->underflow[hook]);
+
+	do {
+		IP_NF_ASSERT(e);
+		IP_NF_ASSERT(back);
+		(*pskb)->nfcache |= e->nfcache;
+		if (ip6_packet_match(*pskb, indev, outdev, &e->ipv6,
+			&protoff, &offset)) {
+			struct ip6t_entry_target *t;
+
+			if (IP6T_MATCH_ITERATE(e, do_match,
+					       *pskb, in, out,
+					       offset, protoff, &hotdrop) != 0)
+				goto no_match;
+
+			ADD_COUNTER(e->counters,
+				    ntohs((*pskb)->nh.ipv6h->payload_len)
+				    + IPV6_HDR_LEN,
+				    1);
+
+			t = ip6t_get_target(e);
+			IP_NF_ASSERT(t->u.kernel.target);
+			/* Standard target? */
+			if (!t->u.kernel.target->target) {
+				int v;
+
+				v = ((struct ip6t_standard_target *)t)->verdict;
+				if (v < 0) {
+					/* Pop from stack? */
+					if (v != IP6T_RETURN) {
+						verdict = (unsigned)(-v) - 1;
+						break;
+					}
+					e = back;
+					back = get_entry(table_base,
+							 back->comefrom);
+					continue;
+				}
+				if (table_base + v
+				    != (void *)e + e->next_offset) {
+					/* Save old back ptr in next entry */
+					struct ip6t_entry *next
+						= (void *)e + e->next_offset;
+					next->comefrom
+						= (void *)back - table_base;
+					/* set back pointer to next entry */
+					back = next;
+				}
+
+				e = get_entry(table_base, v);
+			} else {
+				/* Targets which reenter must return
+                                   abs. verdicts */
+#ifdef CONFIG_NETFILTER_DEBUG
+				((struct ip6t_entry *)table_base)->comefrom
+					= 0xeeeeeeec;
+#endif
+				verdict = t->u.kernel.target->target(pskb,
+								     in, out,
+								     hook,
+								     t->data,
+								     userdata);
+
+#ifdef CONFIG_NETFILTER_DEBUG
+				if (((struct ip6t_entry *)table_base)->comefrom
+				    != 0xeeeeeeec
+				    && verdict == IP6T_CONTINUE) {
+					printk("Target %s reentered!\n",
+					       t->u.kernel.target->name);
+					verdict = NF_DROP;
+				}
+				((struct ip6t_entry *)table_base)->comefrom
+					= 0x57acc001;
+#endif
+				if (verdict == IP6T_CONTINUE)
+					e = (void *)e + e->next_offset;
+				else
+					/* Verdict */
+					break;
+			}
+		} else {
+
+		no_match:
+			e = (void *)e + e->next_offset;
+		}
+	} while (!hotdrop);
+
+#ifdef CONFIG_NETFILTER_DEBUG
+	((struct ip6t_entry *)table_base)->comefrom = 0xdead57ac;
+#endif
+	read_unlock_bh(&table->lock);
+
+#ifdef DEBUG_ALLOW_ALL
+	return NF_ACCEPT;
+#else
+	if (hotdrop)
+		return NF_DROP;
+	else return verdict;
+#endif
+}
+
+/* If it succeeds, returns element and locks mutex */
+static inline void *
+find_inlist_lock_noload(struct list_head *head,
+			const char *name,
+			int *error,
+			struct semaphore *mutex)
+{
+	void *ret;
+
+#if 1
+	duprintf("find_inlist: searching for `%s' in %s.\n",
+		 name, head == &ip6t_target ? "ip6t_target"
+		 : head == &ip6t_match ? "ip6t_match"
+		 : head == &ip6t_tables ? "ip6t_tables" : "UNKNOWN");
+#endif
+
+	*error = down_interruptible(mutex);
+	if (*error != 0)
+		return NULL;
+
+	ret = list_named_find(head, name);
+	if (!ret) {
+		*error = -ENOENT;
+		up(mutex);
+	}
+	return ret;
+}
+
+#ifndef CONFIG_KMOD
+#define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m))
+#else
+static void *
+find_inlist_lock(struct list_head *head,
+		 const char *name,
+		 const char *prefix,
+		 int *error,
+		 struct semaphore *mutex)
+{
+	void *ret;
+
+	ret = find_inlist_lock_noload(head, name, error, mutex);
+	if (!ret) {
+		duprintf("find_inlist: loading `%s%s'.\n", prefix, name);
+		request_module("%s%s", prefix, name);
+		ret = find_inlist_lock_noload(head, name, error, mutex);
+	}
+
+	return ret;
+}
+#endif
+
+static inline struct ip6t_table *
+ip6t_find_table_lock(const char *name, int *error, struct semaphore *mutex)
+{
+	return find_inlist_lock(&ip6t_tables, name, "ip6table_", error, mutex);
+}
+
+static inline struct ip6t_match *
+find_match_lock(const char *name, int *error, struct semaphore *mutex)
+{
+	return find_inlist_lock(&ip6t_match, name, "ip6t_", error, mutex);
+}
+
+static struct ip6t_target *
+ip6t_find_target_lock(const char *name, int *error, struct semaphore *mutex)
+{
+	return find_inlist_lock(&ip6t_target, name, "ip6t_", error, mutex);
+}
+
+/* All zeroes == unconditional rule. */
+static inline int
+unconditional(const struct ip6t_ip6 *ipv6)
+{
+	unsigned int i;
+
+	for (i = 0; i < sizeof(*ipv6); i++)
+		if (((char *)ipv6)[i])
+			break;
+
+	return (i == sizeof(*ipv6));
+}
+
+/* Figures out from what hook each rule can be called: returns 0 if
+   there are loops.  Puts hook bitmask in comefrom. */
+static int
+mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks)
+{
+	unsigned int hook;
+
+	/* No recursion; use packet counter to save back ptrs (reset
+	   to 0 as we leave), and comefrom to save source hook bitmask */
+	for (hook = 0; hook < NF_IP6_NUMHOOKS; hook++) {
+		unsigned int pos = newinfo->hook_entry[hook];
+		struct ip6t_entry *e
+			= (struct ip6t_entry *)(newinfo->entries + pos);
+
+		if (!(valid_hooks & (1 << hook)))
+			continue;
+
+		/* Set initial back pointer. */
+		e->counters.pcnt = pos;
+
+		for (;;) {
+			struct ip6t_standard_target *t
+				= (void *)ip6t_get_target(e);
+
+			if (e->comefrom & (1 << NF_IP6_NUMHOOKS)) {
+				printk("iptables: loop hook %u pos %u %08X.\n",
+				       hook, pos, e->comefrom);
+				return 0;
+			}
+			e->comefrom
+				|= ((1 << hook) | (1 << NF_IP6_NUMHOOKS));
+
+			/* Unconditional return/END. */
+			if (e->target_offset == sizeof(struct ip6t_entry)
+			    && (strcmp(t->target.u.user.name,
+				       IP6T_STANDARD_TARGET) == 0)
+			    && t->verdict < 0
+			    && unconditional(&e->ipv6)) {
+				unsigned int oldpos, size;
+
+				/* Return: backtrack through the last
+				   big jump. */
+				do {
+					e->comefrom ^= (1<<NF_IP6_NUMHOOKS);
+#ifdef DEBUG_IP_FIREWALL_USER
+					if (e->comefrom
+					    & (1 << NF_IP6_NUMHOOKS)) {
+						duprintf("Back unset "
+							 "on hook %u "
+							 "rule %u\n",
+							 hook, pos);
+					}
+#endif
+					oldpos = pos;
+					pos = e->counters.pcnt;
+					e->counters.pcnt = 0;
+
+					/* We're at the start. */
+					if (pos == oldpos)
+						goto next;
+
+					e = (struct ip6t_entry *)
+						(newinfo->entries + pos);
+				} while (oldpos == pos + e->next_offset);
+
+				/* Move along one */
+				size = e->next_offset;
+				e = (struct ip6t_entry *)
+					(newinfo->entries + pos + size);
+				e->counters.pcnt = pos;
+				pos += size;
+			} else {
+				int newpos = t->verdict;
+
+				if (strcmp(t->target.u.user.name,
+					   IP6T_STANDARD_TARGET) == 0
+				    && newpos >= 0) {
+					/* This a jump; chase it. */
+					duprintf("Jump rule %u -> %u\n",
+						 pos, newpos);
+				} else {
+					/* ... this is a fallthru */
+					newpos = pos + e->next_offset;
+				}
+				e = (struct ip6t_entry *)
+					(newinfo->entries + newpos);
+				e->counters.pcnt = pos;
+				pos = newpos;
+			}
+		}
+		next:
+		duprintf("Finished chain %u\n", hook);
+	}
+	return 1;
+}
+
+static inline int
+cleanup_match(struct ip6t_entry_match *m, unsigned int *i)
+{
+	if (i && (*i)-- == 0)
+		return 1;
+
+	if (m->u.kernel.match->destroy)
+		m->u.kernel.match->destroy(m->data,
+					   m->u.match_size - sizeof(*m));
+	module_put(m->u.kernel.match->me);
+	return 0;
+}
+
+static inline int
+standard_check(const struct ip6t_entry_target *t,
+	       unsigned int max_offset)
+{
+	struct ip6t_standard_target *targ = (void *)t;
+
+	/* Check standard info. */
+	if (t->u.target_size
+	    != IP6T_ALIGN(sizeof(struct ip6t_standard_target))) {
+		duprintf("standard_check: target size %u != %u\n",
+			 t->u.target_size,
+			 IP6T_ALIGN(sizeof(struct ip6t_standard_target)));
+		return 0;
+	}
+
+	if (targ->verdict >= 0
+	    && targ->verdict > max_offset - sizeof(struct ip6t_entry)) {
+		duprintf("ip6t_standard_check: bad verdict (%i)\n",
+			 targ->verdict);
+		return 0;
+	}
+
+	if (targ->verdict < -NF_MAX_VERDICT - 1) {
+		duprintf("ip6t_standard_check: bad negative verdict (%i)\n",
+			 targ->verdict);
+		return 0;
+	}
+	return 1;
+}
+
+static inline int
+check_match(struct ip6t_entry_match *m,
+	    const char *name,
+	    const struct ip6t_ip6 *ipv6,
+	    unsigned int hookmask,
+	    unsigned int *i)
+{
+	int ret;
+	struct ip6t_match *match;
+
+	match = find_match_lock(m->u.user.name, &ret, &ip6t_mutex);
+	if (!match) {
+	  //		duprintf("check_match: `%s' not found\n", m->u.name);
+		return ret;
+	}
+	if (!try_module_get(match->me)) {
+		up(&ip6t_mutex);
+		return -ENOENT;
+	}
+	m->u.kernel.match = match;
+	up(&ip6t_mutex);
+
+	if (m->u.kernel.match->checkentry
+	    && !m->u.kernel.match->checkentry(name, ipv6, m->data,
+					      m->u.match_size - sizeof(*m),
+					      hookmask)) {
+		module_put(m->u.kernel.match->me);
+		duprintf("ip_tables: check failed for `%s'.\n",
+			 m->u.kernel.match->name);
+		return -EINVAL;
+	}
+
+	(*i)++;
+	return 0;
+}
+
+static struct ip6t_target ip6t_standard_target;
+
+static inline int
+check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
+	    unsigned int *i)
+{
+	struct ip6t_entry_target *t;
+	struct ip6t_target *target;
+	int ret;
+	unsigned int j;
+
+	if (!ip6_checkentry(&e->ipv6)) {
+		duprintf("ip_tables: ip check failed %p %s.\n", e, name);
+		return -EINVAL;
+	}
+
+	j = 0;
+	ret = IP6T_MATCH_ITERATE(e, check_match, name, &e->ipv6, e->comefrom, &j);
+	if (ret != 0)
+		goto cleanup_matches;
+
+	t = ip6t_get_target(e);
+	target = ip6t_find_target_lock(t->u.user.name, &ret, &ip6t_mutex);
+	if (!target) {
+		duprintf("check_entry: `%s' not found\n", t->u.user.name);
+		goto cleanup_matches;
+	}
+	if (!try_module_get(target->me)) {
+		up(&ip6t_mutex);
+		ret = -ENOENT;
+		goto cleanup_matches;
+	}
+	t->u.kernel.target = target;
+	up(&ip6t_mutex);
+	if (!t->u.kernel.target) {
+		ret = -EBUSY;
+		goto cleanup_matches;
+	}
+	if (t->u.kernel.target == &ip6t_standard_target) {
+		if (!standard_check(t, size)) {
+			ret = -EINVAL;
+			goto cleanup_matches;
+		}
+	} else if (t->u.kernel.target->checkentry
+		   && !t->u.kernel.target->checkentry(name, e, t->data,
+						      t->u.target_size
+						      - sizeof(*t),
+						      e->comefrom)) {
+		module_put(t->u.kernel.target->me);
+		duprintf("ip_tables: check failed for `%s'.\n",
+			 t->u.kernel.target->name);
+		ret = -EINVAL;
+		goto cleanup_matches;
+	}
+
+	(*i)++;
+	return 0;
+
+ cleanup_matches:
+	IP6T_MATCH_ITERATE(e, cleanup_match, &j);
+	return ret;
+}
+
+static inline int
+check_entry_size_and_hooks(struct ip6t_entry *e,
+			   struct ip6t_table_info *newinfo,
+			   unsigned char *base,
+			   unsigned char *limit,
+			   const unsigned int *hook_entries,
+			   const unsigned int *underflows,
+			   unsigned int *i)
+{
+	unsigned int h;
+
+	if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0
+	    || (unsigned char *)e + sizeof(struct ip6t_entry) >= limit) {
+		duprintf("Bad offset %p\n", e);
+		return -EINVAL;
+	}
+
+	if (e->next_offset
+	    < sizeof(struct ip6t_entry) + sizeof(struct ip6t_entry_target)) {
+		duprintf("checking: element %p size %u\n",
+			 e, e->next_offset);
+		return -EINVAL;
+	}
+
+	/* Check hooks & underflows */
+	for (h = 0; h < NF_IP6_NUMHOOKS; h++) {
+		if ((unsigned char *)e - base == hook_entries[h])
+			newinfo->hook_entry[h] = hook_entries[h];
+		if ((unsigned char *)e - base == underflows[h])
+			newinfo->underflow[h] = underflows[h];
+	}
+
+	/* FIXME: underflows must be unconditional, standard verdicts
+           < 0 (not IP6T_RETURN). --RR */
+
+	/* Clear counters and comefrom */
+	e->counters = ((struct ip6t_counters) { 0, 0 });
+	e->comefrom = 0;
+
+	(*i)++;
+	return 0;
+}
+
+static inline int
+cleanup_entry(struct ip6t_entry *e, unsigned int *i)
+{
+	struct ip6t_entry_target *t;
+
+	if (i && (*i)-- == 0)
+		return 1;
+
+	/* Cleanup all matches */
+	IP6T_MATCH_ITERATE(e, cleanup_match, NULL);
+	t = ip6t_get_target(e);
+	if (t->u.kernel.target->destroy)
+		t->u.kernel.target->destroy(t->data,
+					    t->u.target_size - sizeof(*t));
+	module_put(t->u.kernel.target->me);
+	return 0;
+}
+
+/* Checks and translates the user-supplied table segment (held in
+   newinfo) */
+static int
+translate_table(const char *name,
+		unsigned int valid_hooks,
+		struct ip6t_table_info *newinfo,
+		unsigned int size,
+		unsigned int number,
+		const unsigned int *hook_entries,
+		const unsigned int *underflows)
+{
+	unsigned int i;
+	int ret;
+
+	newinfo->size = size;
+	newinfo->number = number;
+
+	/* Init all hooks to impossible value. */
+	for (i = 0; i < NF_IP6_NUMHOOKS; i++) {
+		newinfo->hook_entry[i] = 0xFFFFFFFF;
+		newinfo->underflow[i] = 0xFFFFFFFF;
+	}
+
+	duprintf("translate_table: size %u\n", newinfo->size);
+	i = 0;
+	/* Walk through entries, checking offsets. */
+	ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+				check_entry_size_and_hooks,
+				newinfo,
+				newinfo->entries,
+				newinfo->entries + size,
+				hook_entries, underflows, &i);
+	if (ret != 0)
+		return ret;
+
+	if (i != number) {
+		duprintf("translate_table: %u not %u entries\n",
+			 i, number);
+		return -EINVAL;
+	}
+
+	/* Check hooks all assigned */
+	for (i = 0; i < NF_IP6_NUMHOOKS; i++) {
+		/* Only hooks which are valid */
+		if (!(valid_hooks & (1 << i)))
+			continue;
+		if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
+			duprintf("Invalid hook entry %u %u\n",
+				 i, hook_entries[i]);
+			return -EINVAL;
+		}
+		if (newinfo->underflow[i] == 0xFFFFFFFF) {
+			duprintf("Invalid underflow %u %u\n",
+				 i, underflows[i]);
+			return -EINVAL;
+		}
+	}
+
+	if (!mark_source_chains(newinfo, valid_hooks))
+		return -ELOOP;
+
+	/* Finally, each sanity check must pass */
+	i = 0;
+	ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+				check_entry, name, size, &i);
+
+	if (ret != 0) {
+		IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+				  cleanup_entry, &i);
+		return ret;
+	}
+
+	/* And one copy for every other CPU */
+	for (i = 1; i < num_possible_cpus(); i++) {
+		memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
+		       newinfo->entries,
+		       SMP_ALIGN(newinfo->size));
+	}
+
+	return ret;
+}
+
+static struct ip6t_table_info *
+replace_table(struct ip6t_table *table,
+	      unsigned int num_counters,
+	      struct ip6t_table_info *newinfo,
+	      int *error)
+{
+	struct ip6t_table_info *oldinfo;
+
+#ifdef CONFIG_NETFILTER_DEBUG
+	{
+		struct ip6t_entry *table_base;
+		unsigned int i;
+
+		for (i = 0; i < num_possible_cpus(); i++) {
+			table_base =
+				(void *)newinfo->entries
+				+ TABLE_OFFSET(newinfo, i);
+
+			table_base->comefrom = 0xdead57ac;
+		}
+	}
+#endif
+
+	/* Do the substitution. */
+	write_lock_bh(&table->lock);
+	/* Check inside lock: is the old number correct? */
+	if (num_counters != table->private->number) {
+		duprintf("num_counters != table->private->number (%u/%u)\n",
+			 num_counters, table->private->number);
+		write_unlock_bh(&table->lock);
+		*error = -EAGAIN;
+		return NULL;
+	}
+	oldinfo = table->private;
+	table->private = newinfo;
+	newinfo->initial_entries = oldinfo->initial_entries;
+	write_unlock_bh(&table->lock);
+
+	return oldinfo;
+}
+
+/* Gets counters. */
+static inline int
+add_entry_to_counter(const struct ip6t_entry *e,
+		     struct ip6t_counters total[],
+		     unsigned int *i)
+{
+	ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
+
+	(*i)++;
+	return 0;
+}
+
+static void
+get_counters(const struct ip6t_table_info *t,
+	     struct ip6t_counters counters[])
+{
+	unsigned int cpu;
+	unsigned int i;
+
+	for (cpu = 0; cpu < num_possible_cpus(); cpu++) {
+		i = 0;
+		IP6T_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
+				  t->size,
+				  add_entry_to_counter,
+				  counters,
+				  &i);
+	}
+}
+
+static int
+copy_entries_to_user(unsigned int total_size,
+		     struct ip6t_table *table,
+		     void __user *userptr)
+{
+	unsigned int off, num, countersize;
+	struct ip6t_entry *e;
+	struct ip6t_counters *counters;
+	int ret = 0;
+
+	/* We need atomic snapshot of counters: rest doesn't change
+	   (other than comefrom, which userspace doesn't care
+	   about). */
+	countersize = sizeof(struct ip6t_counters) * table->private->number;
+	counters = vmalloc(countersize);
+
+	if (counters == NULL)
+		return -ENOMEM;
+
+	/* First, sum counters... */
+	memset(counters, 0, countersize);
+	write_lock_bh(&table->lock);
+	get_counters(table->private, counters);
+	write_unlock_bh(&table->lock);
+
+	/* ... then copy entire thing from CPU 0... */
+	if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
+		ret = -EFAULT;
+		goto free_counters;
+	}
+
+	/* FIXME: use iterator macros --RR */
+	/* ... then go back and fix counters and names */
+	for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
+		unsigned int i;
+		struct ip6t_entry_match *m;
+		struct ip6t_entry_target *t;
+
+		e = (struct ip6t_entry *)(table->private->entries + off);
+		if (copy_to_user(userptr + off
+				 + offsetof(struct ip6t_entry, counters),
+				 &counters[num],
+				 sizeof(counters[num])) != 0) {
+			ret = -EFAULT;
+			goto free_counters;
+		}
+
+		for (i = sizeof(struct ip6t_entry);
+		     i < e->target_offset;
+		     i += m->u.match_size) {
+			m = (void *)e + i;
+
+			if (copy_to_user(userptr + off + i
+					 + offsetof(struct ip6t_entry_match,
+						    u.user.name),
+					 m->u.kernel.match->name,
+					 strlen(m->u.kernel.match->name)+1)
+			    != 0) {
+				ret = -EFAULT;
+				goto free_counters;
+			}
+		}
+
+		t = ip6t_get_target(e);
+		if (copy_to_user(userptr + off + e->target_offset
+				 + offsetof(struct ip6t_entry_target,
+					    u.user.name),
+				 t->u.kernel.target->name,
+				 strlen(t->u.kernel.target->name)+1) != 0) {
+			ret = -EFAULT;
+			goto free_counters;
+		}
+	}
+
+ free_counters:
+	vfree(counters);
+	return ret;
+}
+
+static int
+get_entries(const struct ip6t_get_entries *entries,
+	    struct ip6t_get_entries __user *uptr)
+{
+	int ret;
+	struct ip6t_table *t;
+
+	t = ip6t_find_table_lock(entries->name, &ret, &ip6t_mutex);
+	if (t) {
+		duprintf("t->private->number = %u\n",
+			 t->private->number);
+		if (entries->size == t->private->size)
+			ret = copy_entries_to_user(t->private->size,
+						   t, uptr->entrytable);
+		else {
+			duprintf("get_entries: I've got %u not %u!\n",
+				 t->private->size,
+				 entries->size);
+			ret = -EINVAL;
+		}
+		up(&ip6t_mutex);
+	} else
+		duprintf("get_entries: Can't find %s!\n",
+			 entries->name);
+
+	return ret;
+}
+
+static int
+do_replace(void __user *user, unsigned int len)
+{
+	int ret;
+	struct ip6t_replace tmp;
+	struct ip6t_table *t;
+	struct ip6t_table_info *newinfo, *oldinfo;
+	struct ip6t_counters *counters;
+
+	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+		return -EFAULT;
+
+	/* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
+	if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
+		return -ENOMEM;
+
+	newinfo = vmalloc(sizeof(struct ip6t_table_info)
+			  + SMP_ALIGN(tmp.size) * num_possible_cpus());
+	if (!newinfo)
+		return -ENOMEM;
+
+	if (copy_from_user(newinfo->entries, user + sizeof(tmp),
+			   tmp.size) != 0) {
+		ret = -EFAULT;
+		goto free_newinfo;
+	}
+
+	counters = vmalloc(tmp.num_counters * sizeof(struct ip6t_counters));
+	if (!counters) {
+		ret = -ENOMEM;
+		goto free_newinfo;
+	}
+	memset(counters, 0, tmp.num_counters * sizeof(struct ip6t_counters));
+
+	ret = translate_table(tmp.name, tmp.valid_hooks,
+			      newinfo, tmp.size, tmp.num_entries,
+			      tmp.hook_entry, tmp.underflow);
+	if (ret != 0)
+		goto free_newinfo_counters;
+
+	duprintf("ip_tables: Translated table\n");
+
+	t = ip6t_find_table_lock(tmp.name, &ret, &ip6t_mutex);
+	if (!t)
+		goto free_newinfo_counters_untrans;
+
+	/* You lied! */
+	if (tmp.valid_hooks != t->valid_hooks) {
+		duprintf("Valid hook crap: %08X vs %08X\n",
+			 tmp.valid_hooks, t->valid_hooks);
+		ret = -EINVAL;
+		goto free_newinfo_counters_untrans_unlock;
+	}
+
+	/* Get a reference in advance, we're not allowed fail later */
+	if (!try_module_get(t->me)) {
+		ret = -EBUSY;
+		goto free_newinfo_counters_untrans_unlock;
+	}
+
+	oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
+	if (!oldinfo)
+		goto put_module;
+
+	/* Update module usage count based on number of rules */
+	duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
+		oldinfo->number, oldinfo->initial_entries, newinfo->number);
+	if ((oldinfo->number > oldinfo->initial_entries) || 
+	    (newinfo->number <= oldinfo->initial_entries)) 
+		module_put(t->me);
+	if ((oldinfo->number > oldinfo->initial_entries) &&
+	    (newinfo->number <= oldinfo->initial_entries))
+		module_put(t->me);
+
+	/* Get the old counters. */
+	get_counters(oldinfo, counters);
+	/* Decrease module usage counts and free resource */
+	IP6T_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
+	vfree(oldinfo);
+	/* Silent error: too late now. */
+	if (copy_to_user(tmp.counters, counters,
+			 sizeof(struct ip6t_counters) * tmp.num_counters) != 0)
+		ret = -EFAULT;
+	vfree(counters);
+	up(&ip6t_mutex);
+	return ret;
+
+ put_module:
+	module_put(t->me);
+ free_newinfo_counters_untrans_unlock:
+	up(&ip6t_mutex);
+ free_newinfo_counters_untrans:
+	IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
+ free_newinfo_counters:
+	vfree(counters);
+ free_newinfo:
+	vfree(newinfo);
+	return ret;
+}
+
+/* We're lazy, and add to the first CPU; overflow works its fey magic
+ * and everything is OK. */
+static inline int
+add_counter_to_entry(struct ip6t_entry *e,
+		     const struct ip6t_counters addme[],
+		     unsigned int *i)
+{
+#if 0
+	duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
+		 *i,
+		 (long unsigned int)e->counters.pcnt,
+		 (long unsigned int)e->counters.bcnt,
+		 (long unsigned int)addme[*i].pcnt,
+		 (long unsigned int)addme[*i].bcnt);
+#endif
+
+	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+
+	(*i)++;
+	return 0;
+}
+
+static int
+do_add_counters(void __user *user, unsigned int len)
+{
+	unsigned int i;
+	struct ip6t_counters_info tmp, *paddc;
+	struct ip6t_table *t;
+	int ret;
+
+	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+		return -EFAULT;
+
+	if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ip6t_counters))
+		return -EINVAL;
+
+	paddc = vmalloc(len);
+	if (!paddc)
+		return -ENOMEM;
+
+	if (copy_from_user(paddc, user, len) != 0) {
+		ret = -EFAULT;
+		goto free;
+	}
+
+	t = ip6t_find_table_lock(tmp.name, &ret, &ip6t_mutex);
+	if (!t)
+		goto free;
+
+	write_lock_bh(&t->lock);
+	if (t->private->number != paddc->num_counters) {
+		ret = -EINVAL;
+		goto unlock_up_free;
+	}
+
+	i = 0;
+	IP6T_ENTRY_ITERATE(t->private->entries,
+			  t->private->size,
+			  add_counter_to_entry,
+			  paddc->counters,
+			  &i);
+ unlock_up_free:
+	write_unlock_bh(&t->lock);
+	up(&ip6t_mutex);
+ free:
+	vfree(paddc);
+
+	return ret;
+}
+
+static int
+do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
+{
+	int ret;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	switch (cmd) {
+	case IP6T_SO_SET_REPLACE:
+		ret = do_replace(user, len);
+		break;
+
+	case IP6T_SO_SET_ADD_COUNTERS:
+		ret = do_add_counters(user, len);
+		break;
+
+	default:
+		duprintf("do_ip6t_set_ctl:  unknown request %i\n", cmd);
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static int
+do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+{
+	int ret;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	switch (cmd) {
+	case IP6T_SO_GET_INFO: {
+		char name[IP6T_TABLE_MAXNAMELEN];
+		struct ip6t_table *t;
+
+		if (*len != sizeof(struct ip6t_getinfo)) {
+			duprintf("length %u != %u\n", *len,
+				 sizeof(struct ip6t_getinfo));
+			ret = -EINVAL;
+			break;
+		}
+
+		if (copy_from_user(name, user, sizeof(name)) != 0) {
+			ret = -EFAULT;
+			break;
+		}
+		name[IP6T_TABLE_MAXNAMELEN-1] = '\0';
+		t = ip6t_find_table_lock(name, &ret, &ip6t_mutex);
+		if (t) {
+			struct ip6t_getinfo info;
+
+			info.valid_hooks = t->valid_hooks;
+			memcpy(info.hook_entry, t->private->hook_entry,
+			       sizeof(info.hook_entry));
+			memcpy(info.underflow, t->private->underflow,
+			       sizeof(info.underflow));
+			info.num_entries = t->private->number;
+			info.size = t->private->size;
+			memcpy(info.name, name, sizeof(info.name));
+
+			if (copy_to_user(user, &info, *len) != 0)
+				ret = -EFAULT;
+			else
+				ret = 0;
+
+			up(&ip6t_mutex);
+		}
+	}
+	break;
+
+	case IP6T_SO_GET_ENTRIES: {
+		struct ip6t_get_entries get;
+
+		if (*len < sizeof(get)) {
+			duprintf("get_entries: %u < %u\n", *len, sizeof(get));
+			ret = -EINVAL;
+		} else if (copy_from_user(&get, user, sizeof(get)) != 0) {
+			ret = -EFAULT;
+		} else if (*len != sizeof(struct ip6t_get_entries) + get.size) {
+			duprintf("get_entries: %u != %u\n", *len,
+				 sizeof(struct ip6t_get_entries) + get.size);
+			ret = -EINVAL;
+		} else
+			ret = get_entries(&get, user);
+		break;
+	}
+
+	default:
+		duprintf("do_ip6t_get_ctl: unknown request %i\n", cmd);
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+/* Registration hooks for targets. */
+int
+ip6t_register_target(struct ip6t_target *target)
+{
+	int ret;
+
+	ret = down_interruptible(&ip6t_mutex);
+	if (ret != 0)
+		return ret;
+
+	if (!list_named_insert(&ip6t_target, target)) {
+		duprintf("ip6t_register_target: `%s' already in list!\n",
+			 target->name);
+		ret = -EINVAL;
+	}
+	up(&ip6t_mutex);
+	return ret;
+}
+
+void
+ip6t_unregister_target(struct ip6t_target *target)
+{
+	down(&ip6t_mutex);
+	LIST_DELETE(&ip6t_target, target);
+	up(&ip6t_mutex);
+}
+
+int
+ip6t_register_match(struct ip6t_match *match)
+{
+	int ret;
+
+	ret = down_interruptible(&ip6t_mutex);
+	if (ret != 0)
+		return ret;
+
+	if (!list_named_insert(&ip6t_match, match)) {
+		duprintf("ip6t_register_match: `%s' already in list!\n",
+			 match->name);
+		ret = -EINVAL;
+	}
+	up(&ip6t_mutex);
+
+	return ret;
+}
+
+void
+ip6t_unregister_match(struct ip6t_match *match)
+{
+	down(&ip6t_mutex);
+	LIST_DELETE(&ip6t_match, match);
+	up(&ip6t_mutex);
+}
+
+int ip6t_register_table(struct ip6t_table *table,
+			const struct ip6t_replace *repl)
+{
+	int ret;
+	struct ip6t_table_info *newinfo;
+	static struct ip6t_table_info bootstrap
+		= { 0, 0, 0, { 0 }, { 0 }, { } };
+
+	newinfo = vmalloc(sizeof(struct ip6t_table_info)
+			  + SMP_ALIGN(repl->size) * num_possible_cpus());
+	if (!newinfo)
+		return -ENOMEM;
+
+	memcpy(newinfo->entries, repl->entries, repl->size);
+
+	ret = translate_table(table->name, table->valid_hooks,
+			      newinfo, repl->size,
+			      repl->num_entries,
+			      repl->hook_entry,
+			      repl->underflow);
+	if (ret != 0) {
+		vfree(newinfo);
+		return ret;
+	}
+
+	ret = down_interruptible(&ip6t_mutex);
+	if (ret != 0) {
+		vfree(newinfo);
+		return ret;
+	}
+
+	/* Don't autoload: we'd eat our tail... */
+	if (list_named_find(&ip6t_tables, table->name)) {
+		ret = -EEXIST;
+		goto free_unlock;
+	}
+
+	/* Simplifies replace_table code. */
+	table->private = &bootstrap;
+	if (!replace_table(table, 0, newinfo, &ret))
+		goto free_unlock;
+
+	duprintf("table->private->number = %u\n",
+		 table->private->number);
+
+	/* save number of initial entries */
+	table->private->initial_entries = table->private->number;
+
+	rwlock_init(&table->lock);
+	list_prepend(&ip6t_tables, table);
+
+ unlock:
+	up(&ip6t_mutex);
+	return ret;
+
+ free_unlock:
+	vfree(newinfo);
+	goto unlock;
+}
+
+void ip6t_unregister_table(struct ip6t_table *table)
+{
+	down(&ip6t_mutex);
+	LIST_DELETE(&ip6t_tables, table);
+	up(&ip6t_mutex);
+
+	/* Decrease module usage counts and free resources */
+	IP6T_ENTRY_ITERATE(table->private->entries, table->private->size,
+			  cleanup_entry, NULL);
+	vfree(table->private);
+}
+
+/* Returns 1 if the port is matched by the range, 0 otherwise */
+static inline int
+port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
+{
+	int ret;
+
+	ret = (port >= min && port <= max) ^ invert;
+	return ret;
+}
+
+static int
+tcp_find_option(u_int8_t option,
+		const struct sk_buff *skb,
+		unsigned int tcpoff,
+		unsigned int optlen,
+		int invert,
+		int *hotdrop)
+{
+	/* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
+	u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
+	unsigned int i;
+
+	duprintf("tcp_match: finding option\n");
+	if (!optlen)
+		return invert;
+	/* If we don't have the whole header, drop packet. */
+	op = skb_header_pointer(skb, tcpoff + sizeof(struct tcphdr), optlen,
+				_opt);
+	if (op == NULL) {
+		*hotdrop = 1;
+		return 0;
+	}
+
+	for (i = 0; i < optlen; ) {
+		if (op[i] == option) return !invert;
+		if (op[i] < 2) i++;
+		else i += op[i+1]?:1;
+	}
+
+	return invert;
+}
+
+static int
+tcp_match(const struct sk_buff *skb,
+	  const struct net_device *in,
+	  const struct net_device *out,
+	  const void *matchinfo,
+	  int offset,
+	  unsigned int protoff,
+	  int *hotdrop)
+{
+	struct tcphdr _tcph, *th;
+	const struct ip6t_tcp *tcpinfo = matchinfo;
+
+	if (offset) {
+		/* To quote Alan:
+
+		   Don't allow a fragment of TCP 8 bytes in. Nobody normal
+		   causes this. Its a cracker trying to break in by doing a
+		   flag overwrite to pass the direction checks.
+		*/
+		if (offset == 1) {
+			duprintf("Dropping evil TCP offset=1 frag.\n");
+			*hotdrop = 1;
+		}
+		/* Must not be a fragment. */
+		return 0;
+	}
+
+#define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
+
+	th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
+	if (th == NULL) {
+		/* We've been asked to examine this packet, and we
+		   can't.  Hence, no choice but to drop. */
+		duprintf("Dropping evil TCP offset=0 tinygram.\n");
+		*hotdrop = 1;
+		return 0;
+	}
+
+	if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
+			ntohs(th->source),
+			!!(tcpinfo->invflags & IP6T_TCP_INV_SRCPT)))
+		return 0;
+	if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
+			ntohs(th->dest),
+			!!(tcpinfo->invflags & IP6T_TCP_INV_DSTPT)))
+		return 0;
+	if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
+		      == tcpinfo->flg_cmp,
+		      IP6T_TCP_INV_FLAGS))
+		return 0;
+	if (tcpinfo->option) {
+		if (th->doff * 4 < sizeof(_tcph)) {
+			*hotdrop = 1;
+			return 0;
+		}
+		if (!tcp_find_option(tcpinfo->option, skb, protoff,
+				     th->doff*4 - sizeof(*th),
+				     tcpinfo->invflags & IP6T_TCP_INV_OPTION,
+				     hotdrop))
+			return 0;
+	}
+	return 1;
+}
+
+/* Called when user tries to insert an entry of this type. */
+static int
+tcp_checkentry(const char *tablename,
+	       const struct ip6t_ip6 *ipv6,
+	       void *matchinfo,
+	       unsigned int matchsize,
+	       unsigned int hook_mask)
+{
+	const struct ip6t_tcp *tcpinfo = matchinfo;
+
+	/* Must specify proto == TCP, and no unknown invflags */
+	return ipv6->proto == IPPROTO_TCP
+		&& !(ipv6->invflags & IP6T_INV_PROTO)
+		&& matchsize == IP6T_ALIGN(sizeof(struct ip6t_tcp))
+		&& !(tcpinfo->invflags & ~IP6T_TCP_INV_MASK);
+}
+
+static int
+udp_match(const struct sk_buff *skb,
+	  const struct net_device *in,
+	  const struct net_device *out,
+	  const void *matchinfo,
+	  int offset,
+	  unsigned int protoff,
+	  int *hotdrop)
+{
+	struct udphdr _udph, *uh;
+	const struct ip6t_udp *udpinfo = matchinfo;
+
+	/* Must not be a fragment. */
+	if (offset)
+		return 0;
+
+	uh = skb_header_pointer(skb, protoff, sizeof(_udph), &_udph);
+	if (uh == NULL) {
+		/* We've been asked to examine this packet, and we
+		   can't.  Hence, no choice but to drop. */
+		duprintf("Dropping evil UDP tinygram.\n");
+		*hotdrop = 1;
+		return 0;
+	}
+
+	return port_match(udpinfo->spts[0], udpinfo->spts[1],
+			  ntohs(uh->source),
+			  !!(udpinfo->invflags & IP6T_UDP_INV_SRCPT))
+		&& port_match(udpinfo->dpts[0], udpinfo->dpts[1],
+			      ntohs(uh->dest),
+			      !!(udpinfo->invflags & IP6T_UDP_INV_DSTPT));
+}
+
+/* Called when user tries to insert an entry of this type. */
+static int
+udp_checkentry(const char *tablename,
+	       const struct ip6t_ip6 *ipv6,
+	       void *matchinfo,
+	       unsigned int matchinfosize,
+	       unsigned int hook_mask)
+{
+	const struct ip6t_udp *udpinfo = matchinfo;
+
+	/* Must specify proto == UDP, and no unknown invflags */
+	if (ipv6->proto != IPPROTO_UDP || (ipv6->invflags & IP6T_INV_PROTO)) {
+		duprintf("ip6t_udp: Protocol %u != %u\n", ipv6->proto,
+			 IPPROTO_UDP);
+		return 0;
+	}
+	if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_udp))) {
+		duprintf("ip6t_udp: matchsize %u != %u\n",
+			 matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_udp)));
+		return 0;
+	}
+	if (udpinfo->invflags & ~IP6T_UDP_INV_MASK) {
+		duprintf("ip6t_udp: unknown flags %X\n",
+			 udpinfo->invflags);
+		return 0;
+	}
+
+	return 1;
+}
+
+/* Returns 1 if the type and code is matched by the range, 0 otherwise */
+static inline int
+icmp6_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
+		     u_int8_t type, u_int8_t code,
+		     int invert)
+{
+	return (type == test_type && code >= min_code && code <= max_code)
+		^ invert;
+}
+
+static int
+icmp6_match(const struct sk_buff *skb,
+	   const struct net_device *in,
+	   const struct net_device *out,
+	   const void *matchinfo,
+	   int offset,
+	   unsigned int protoff,
+	   int *hotdrop)
+{
+	struct icmp6hdr _icmp, *ic;
+	const struct ip6t_icmp *icmpinfo = matchinfo;
+
+	/* Must not be a fragment. */
+	if (offset)
+		return 0;
+
+	ic = skb_header_pointer(skb, protoff, sizeof(_icmp), &_icmp);
+	if (ic == NULL) {
+		/* We've been asked to examine this packet, and we
+		   can't.  Hence, no choice but to drop. */
+		duprintf("Dropping evil ICMP tinygram.\n");
+		*hotdrop = 1;
+		return 0;
+	}
+
+	return icmp6_type_code_match(icmpinfo->type,
+				     icmpinfo->code[0],
+				     icmpinfo->code[1],
+				     ic->icmp6_type, ic->icmp6_code,
+				     !!(icmpinfo->invflags&IP6T_ICMP_INV));
+}
+
+/* Called when user tries to insert an entry of this type. */
+static int
+icmp6_checkentry(const char *tablename,
+	   const struct ip6t_ip6 *ipv6,
+	   void *matchinfo,
+	   unsigned int matchsize,
+	   unsigned int hook_mask)
+{
+	const struct ip6t_icmp *icmpinfo = matchinfo;
+
+	/* Must specify proto == ICMP, and no unknown invflags */
+	return ipv6->proto == IPPROTO_ICMPV6
+		&& !(ipv6->invflags & IP6T_INV_PROTO)
+		&& matchsize == IP6T_ALIGN(sizeof(struct ip6t_icmp))
+		&& !(icmpinfo->invflags & ~IP6T_ICMP_INV);
+}
+
+/* The built-in targets: standard (NULL) and error. */
+static struct ip6t_target ip6t_standard_target = {
+	.name		= IP6T_STANDARD_TARGET,
+};
+
+static struct ip6t_target ip6t_error_target = {
+	.name		= IP6T_ERROR_TARGET,
+	.target		= ip6t_error,
+};
+
+static struct nf_sockopt_ops ip6t_sockopts = {
+	.pf		= PF_INET6,
+	.set_optmin	= IP6T_BASE_CTL,
+	.set_optmax	= IP6T_SO_SET_MAX+1,
+	.set		= do_ip6t_set_ctl,
+	.get_optmin	= IP6T_BASE_CTL,
+	.get_optmax	= IP6T_SO_GET_MAX+1,
+	.get		= do_ip6t_get_ctl,
+};
+
+static struct ip6t_match tcp_matchstruct = {
+	.name		= "tcp",
+	.match		= &tcp_match,
+	.checkentry	= &tcp_checkentry,
+};
+
+static struct ip6t_match udp_matchstruct = {
+	.name		= "udp",
+	.match		= &udp_match,
+	.checkentry	= &udp_checkentry,
+};
+
+static struct ip6t_match icmp6_matchstruct = {
+	.name		= "icmp6",
+	.match		= &icmp6_match,
+	.checkentry	= &icmp6_checkentry,
+};
+
+#ifdef CONFIG_PROC_FS
+static inline int print_name(const char *i,
+			     off_t start_offset, char *buffer, int length,
+			     off_t *pos, unsigned int *count)
+{
+	if ((*count)++ >= start_offset) {
+		unsigned int namelen;
+
+		namelen = sprintf(buffer + *pos, "%s\n",
+				  i + sizeof(struct list_head));
+		if (*pos + namelen > length) {
+			/* Stop iterating */
+			return 1;
+		}
+		*pos += namelen;
+	}
+	return 0;
+}
+
+static inline int print_target(const struct ip6t_target *t,
+                               off_t start_offset, char *buffer, int length,
+                               off_t *pos, unsigned int *count)
+{
+	if (t == &ip6t_standard_target || t == &ip6t_error_target)
+		return 0;
+	return print_name((char *)t, start_offset, buffer, length, pos, count);
+}
+
+static int ip6t_get_tables(char *buffer, char **start, off_t offset, int length)
+{
+	off_t pos = 0;
+	unsigned int count = 0;
+
+	if (down_interruptible(&ip6t_mutex) != 0)
+		return 0;
+
+	LIST_FIND(&ip6t_tables, print_name, char *,
+		  offset, buffer, length, &pos, &count);
+
+	up(&ip6t_mutex);
+
+	/* `start' hack - see fs/proc/generic.c line ~105 */
+	*start=(char *)((unsigned long)count-offset);
+	return pos;
+}
+
+static int ip6t_get_targets(char *buffer, char **start, off_t offset, int length)
+{
+	off_t pos = 0;
+	unsigned int count = 0;
+
+	if (down_interruptible(&ip6t_mutex) != 0)
+		return 0;
+
+	LIST_FIND(&ip6t_target, print_target, struct ip6t_target *,
+		  offset, buffer, length, &pos, &count);
+
+	up(&ip6t_mutex);
+
+	*start = (char *)((unsigned long)count - offset);
+	return pos;
+}
+
+static int ip6t_get_matches(char *buffer, char **start, off_t offset, int length)
+{
+	off_t pos = 0;
+	unsigned int count = 0;
+
+	if (down_interruptible(&ip6t_mutex) != 0)
+		return 0;
+
+	LIST_FIND(&ip6t_match, print_name, char *,
+		  offset, buffer, length, &pos, &count);
+
+	up(&ip6t_mutex);
+
+	*start = (char *)((unsigned long)count - offset);
+	return pos;
+}
+
+static struct { char *name; get_info_t *get_info; } ip6t_proc_entry[] =
+{ { "ip6_tables_names", ip6t_get_tables },
+  { "ip6_tables_targets", ip6t_get_targets },
+  { "ip6_tables_matches", ip6t_get_matches },
+  { NULL, NULL} };
+#endif /*CONFIG_PROC_FS*/
+
+static int __init init(void)
+{
+	int ret;
+
+	/* Noone else will be downing sem now, so we won't sleep */
+	down(&ip6t_mutex);
+	list_append(&ip6t_target, &ip6t_standard_target);
+	list_append(&ip6t_target, &ip6t_error_target);
+	list_append(&ip6t_match, &tcp_matchstruct);
+	list_append(&ip6t_match, &udp_matchstruct);
+	list_append(&ip6t_match, &icmp6_matchstruct);
+	up(&ip6t_mutex);
+
+	/* Register setsockopt */
+	ret = nf_register_sockopt(&ip6t_sockopts);
+	if (ret < 0) {
+		duprintf("Unable to register sockopts.\n");
+		return ret;
+	}
+
+#ifdef CONFIG_PROC_FS
+	{
+		struct proc_dir_entry *proc;
+		int i;
+
+		for (i = 0; ip6t_proc_entry[i].name; i++) {
+			proc = proc_net_create(ip6t_proc_entry[i].name, 0,
+					       ip6t_proc_entry[i].get_info);
+			if (!proc) {
+				while (--i >= 0)
+				       proc_net_remove(ip6t_proc_entry[i].name);
+				nf_unregister_sockopt(&ip6t_sockopts);
+				return -ENOMEM;
+			}
+			proc->owner = THIS_MODULE;
+		}
+	}
+#endif
+
+	printk("ip6_tables: (C) 2000-2002 Netfilter core team\n");
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	nf_unregister_sockopt(&ip6t_sockopts);
+#ifdef CONFIG_PROC_FS
+	{
+		int i;
+		for (i = 0; ip6t_proc_entry[i].name; i++)
+			proc_net_remove(ip6t_proc_entry[i].name);
+	}
+#endif
+}
+
+EXPORT_SYMBOL(ip6t_register_table);
+EXPORT_SYMBOL(ip6t_unregister_table);
+EXPORT_SYMBOL(ip6t_do_table);
+EXPORT_SYMBOL(ip6t_register_match);
+EXPORT_SYMBOL(ip6t_unregister_match);
+EXPORT_SYMBOL(ip6t_register_target);
+EXPORT_SYMBOL(ip6t_unregister_target);
+EXPORT_SYMBOL(ip6t_ext_hdr);
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
new file mode 100644
index 00000000000..bfc3d0185d1
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -0,0 +1,509 @@
+/*
+ * This is a module which is used for logging packets.
+ */
+
+/* (C) 2001 Jan Rekorajski <baggins@pld.org.pl>
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/spinlock.h>
+#include <linux/icmpv6.h>
+#include <net/udp.h>
+#include <net/tcp.h>
+#include <net/ipv6.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+MODULE_AUTHOR("Jan Rekorajski <baggins@pld.org.pl>");
+MODULE_DESCRIPTION("IP6 tables LOG target module");
+MODULE_LICENSE("GPL");
+
+static unsigned int nflog = 1;
+module_param(nflog, int, 0400);
+MODULE_PARM_DESC(nflog, "register as internal netfilter logging module");
+ 
+struct in_device;
+#include <net/route.h>
+#include <linux/netfilter_ipv6/ip6t_LOG.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+/* Use lock to serialize, so printks don't overlap */
+static DEFINE_SPINLOCK(log_lock);
+
+/* One level of recursion won't kill us */
+static void dump_packet(const struct ip6t_log_info *info,
+			const struct sk_buff *skb, unsigned int ip6hoff,
+			int recurse)
+{
+	u_int8_t currenthdr;
+	int fragment;
+	struct ipv6hdr _ip6h, *ih;
+	unsigned int ptr;
+	unsigned int hdrlen = 0;
+
+	ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h);
+	if (ih == NULL) {
+		printk("TRUNCATED");
+		return;
+	}
+
+	/* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000" */
+	printk("SRC=%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x ", NIP6(ih->saddr));
+	printk("DST=%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x ", NIP6(ih->daddr));
+
+	/* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */
+	printk("LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ",
+	       ntohs(ih->payload_len) + sizeof(struct ipv6hdr),
+	       (ntohl(*(u_int32_t *)ih) & 0x0ff00000) >> 20,
+	       ih->hop_limit,
+	       (ntohl(*(u_int32_t *)ih) & 0x000fffff));
+
+	fragment = 0;
+	ptr = ip6hoff + sizeof(struct ipv6hdr);
+	currenthdr = ih->nexthdr;
+	while (currenthdr != NEXTHDR_NONE && ip6t_ext_hdr(currenthdr)) {
+		struct ipv6_opt_hdr _hdr, *hp;
+
+		hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
+		if (hp == NULL) {
+			printk("TRUNCATED");
+			return;
+		}
+
+		/* Max length: 48 "OPT (...) " */
+		if (info->logflags & IP6T_LOG_IPOPT)
+			printk("OPT ( ");
+
+		switch (currenthdr) {
+		case IPPROTO_FRAGMENT: {
+			struct frag_hdr _fhdr, *fh;
+
+			printk("FRAG:");
+			fh = skb_header_pointer(skb, ptr, sizeof(_fhdr),
+						&_fhdr);
+			if (fh == NULL) {
+				printk("TRUNCATED ");
+				return;
+			}
+
+			/* Max length: 6 "65535 " */
+			printk("%u ", ntohs(fh->frag_off) & 0xFFF8);
+
+			/* Max length: 11 "INCOMPLETE " */
+			if (fh->frag_off & htons(0x0001))
+				printk("INCOMPLETE ");
+
+			printk("ID:%08x ", ntohl(fh->identification));
+
+			if (ntohs(fh->frag_off) & 0xFFF8)
+				fragment = 1;
+
+			hdrlen = 8;
+
+			break;
+		}
+		case IPPROTO_DSTOPTS:
+		case IPPROTO_ROUTING:
+		case IPPROTO_HOPOPTS:
+			if (fragment) {
+				if (info->logflags & IP6T_LOG_IPOPT)
+					printk(")");
+				return;
+			}
+			hdrlen = ipv6_optlen(hp);
+			break;
+		/* Max Length */
+		case IPPROTO_AH:
+			if (info->logflags & IP6T_LOG_IPOPT) {
+				struct ip_auth_hdr _ahdr, *ah;
+
+				/* Max length: 3 "AH " */
+				printk("AH ");
+
+				if (fragment) {
+					printk(")");
+					return;
+				}
+
+				ah = skb_header_pointer(skb, ptr, sizeof(_ahdr),
+							&_ahdr);
+				if (ah == NULL) {
+					/*
+					 * Max length: 26 "INCOMPLETE [65535 	
+					 *  bytes] )"
+					 */
+					printk("INCOMPLETE [%u bytes] )",
+					       skb->len - ptr);
+					return;
+				}
+
+				/* Length: 15 "SPI=0xF1234567 */
+				printk("SPI=0x%x ", ntohl(ah->spi));
+
+			}
+
+			hdrlen = (hp->hdrlen+2)<<2;
+			break;
+		case IPPROTO_ESP:
+			if (info->logflags & IP6T_LOG_IPOPT) {
+				struct ip_esp_hdr _esph, *eh;
+
+				/* Max length: 4 "ESP " */
+				printk("ESP ");
+
+				if (fragment) {
+					printk(")");
+					return;
+				}
+
+				/*
+				 * Max length: 26 "INCOMPLETE [65535 bytes] )"
+				 */
+				eh = skb_header_pointer(skb, ptr, sizeof(_esph),
+							&_esph);
+				if (eh == NULL) {
+					printk("INCOMPLETE [%u bytes] )",
+					       skb->len - ptr);
+					return;
+				}
+
+				/* Length: 16 "SPI=0xF1234567 )" */
+				printk("SPI=0x%x )", ntohl(eh->spi) );
+
+			}
+			return;
+		default:
+			/* Max length: 20 "Unknown Ext Hdr 255" */
+			printk("Unknown Ext Hdr %u", currenthdr);
+			return;
+		}
+		if (info->logflags & IP6T_LOG_IPOPT)
+			printk(") ");
+
+		currenthdr = hp->nexthdr;
+		ptr += hdrlen;
+	}
+
+	switch (currenthdr) {
+	case IPPROTO_TCP: {
+		struct tcphdr _tcph, *th;
+
+		/* Max length: 10 "PROTO=TCP " */
+		printk("PROTO=TCP ");
+
+		if (fragment)
+			break;
+
+		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
+		th = skb_header_pointer(skb, ptr, sizeof(_tcph), &_tcph);
+		if (th == NULL) {
+			printk("INCOMPLETE [%u bytes] ", skb->len - ptr);
+			return;
+		}
+
+		/* Max length: 20 "SPT=65535 DPT=65535 " */
+		printk("SPT=%u DPT=%u ",
+		       ntohs(th->source), ntohs(th->dest));
+		/* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
+		if (info->logflags & IP6T_LOG_TCPSEQ)
+			printk("SEQ=%u ACK=%u ",
+			       ntohl(th->seq), ntohl(th->ack_seq));
+		/* Max length: 13 "WINDOW=65535 " */
+		printk("WINDOW=%u ", ntohs(th->window));
+		/* Max length: 9 "RES=0x3C " */
+		printk("RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22));
+		/* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
+		if (th->cwr)
+			printk("CWR ");
+		if (th->ece)
+			printk("ECE ");
+		if (th->urg)
+			printk("URG ");
+		if (th->ack)
+			printk("ACK ");
+		if (th->psh)
+			printk("PSH ");
+		if (th->rst)
+			printk("RST ");
+		if (th->syn)
+			printk("SYN ");
+		if (th->fin)
+			printk("FIN ");
+		/* Max length: 11 "URGP=65535 " */
+		printk("URGP=%u ", ntohs(th->urg_ptr));
+
+		if ((info->logflags & IP6T_LOG_TCPOPT)
+		    && th->doff * 4 > sizeof(struct tcphdr)) {
+			u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
+			unsigned int i;
+			unsigned int optsize = th->doff * 4
+					       - sizeof(struct tcphdr);
+
+			op = skb_header_pointer(skb,
+						ptr + sizeof(struct tcphdr),
+						optsize, _opt);
+			if (op == NULL) {
+				printk("OPT (TRUNCATED)");
+				return;
+			}
+
+			/* Max length: 127 "OPT (" 15*4*2chars ") " */
+			printk("OPT (");
+			for (i =0; i < optsize; i++)
+				printk("%02X", op[i]);
+			printk(") ");
+		}
+		break;
+	}
+	case IPPROTO_UDP: {
+		struct udphdr _udph, *uh;
+
+		/* Max length: 10 "PROTO=UDP " */
+		printk("PROTO=UDP ");
+
+		if (fragment)
+			break;
+
+		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
+		uh = skb_header_pointer(skb, ptr, sizeof(_udph), &_udph);
+		if (uh == NULL) {
+			printk("INCOMPLETE [%u bytes] ", skb->len - ptr);
+			return;
+		}
+
+		/* Max length: 20 "SPT=65535 DPT=65535 " */
+		printk("SPT=%u DPT=%u LEN=%u ",
+		       ntohs(uh->source), ntohs(uh->dest),
+		       ntohs(uh->len));
+		break;
+	}
+	case IPPROTO_ICMPV6: {
+		struct icmp6hdr _icmp6h, *ic;
+
+		/* Max length: 13 "PROTO=ICMPv6 " */
+		printk("PROTO=ICMPv6 ");
+
+		if (fragment)
+			break;
+
+		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
+		ic = skb_header_pointer(skb, ptr, sizeof(_icmp6h), &_icmp6h);
+		if (ic == NULL) {
+			printk("INCOMPLETE [%u bytes] ", skb->len - ptr);
+			return;
+		}
+
+		/* Max length: 18 "TYPE=255 CODE=255 " */
+		printk("TYPE=%u CODE=%u ", ic->icmp6_type, ic->icmp6_code);
+
+		switch (ic->icmp6_type) {
+		case ICMPV6_ECHO_REQUEST:
+		case ICMPV6_ECHO_REPLY:
+			/* Max length: 19 "ID=65535 SEQ=65535 " */
+			printk("ID=%u SEQ=%u ",
+				ntohs(ic->icmp6_identifier),
+				ntohs(ic->icmp6_sequence));
+			break;
+		case ICMPV6_MGM_QUERY:
+		case ICMPV6_MGM_REPORT:
+		case ICMPV6_MGM_REDUCTION:
+			break;
+
+		case ICMPV6_PARAMPROB:
+			/* Max length: 17 "POINTER=ffffffff " */
+			printk("POINTER=%08x ", ntohl(ic->icmp6_pointer));
+			/* Fall through */
+		case ICMPV6_DEST_UNREACH:
+		case ICMPV6_PKT_TOOBIG:
+		case ICMPV6_TIME_EXCEED:
+			/* Max length: 3+maxlen */
+			if (recurse) {
+				printk("[");
+				dump_packet(info, skb, ptr + sizeof(_icmp6h),
+					    0);
+				printk("] ");
+			}
+
+			/* Max length: 10 "MTU=65535 " */
+			if (ic->icmp6_type == ICMPV6_PKT_TOOBIG)
+				printk("MTU=%u ", ntohl(ic->icmp6_mtu));
+		}
+		break;
+	}
+	/* Max length: 10 "PROTO=255 " */
+	default:
+		printk("PROTO=%u ", currenthdr);
+	}
+
+	/* Max length: 15 "UID=4294967295 " */
+	if ((info->logflags & IP6T_LOG_UID) && recurse && skb->sk) {
+		read_lock_bh(&skb->sk->sk_callback_lock);
+		if (skb->sk->sk_socket && skb->sk->sk_socket->file)
+			printk("UID=%u ", skb->sk->sk_socket->file->f_uid);
+		read_unlock_bh(&skb->sk->sk_callback_lock);
+	}
+}
+
+static void
+ip6t_log_packet(unsigned int hooknum,
+		const struct sk_buff *skb,
+		const struct net_device *in,
+		const struct net_device *out,
+		const struct ip6t_log_info *loginfo,
+		const char *level_string,
+		const char *prefix)
+{
+	struct ipv6hdr *ipv6h = skb->nh.ipv6h;
+
+	spin_lock_bh(&log_lock);
+	printk(level_string);
+	printk("%sIN=%s OUT=%s ",
+		prefix == NULL ? loginfo->prefix : prefix,
+		in ? in->name : "",
+		out ? out->name : "");
+	if (in && !out) {
+		/* MAC logging for input chain only. */
+		printk("MAC=");
+		if (skb->dev && skb->dev->hard_header_len && skb->mac.raw != (void*)ipv6h) {
+			if (skb->dev->type != ARPHRD_SIT){
+			  int i;
+			  unsigned char *p = skb->mac.raw;
+			  for (i = 0; i < skb->dev->hard_header_len; i++,p++)
+				printk("%02x%c", *p,
+			       		i==skb->dev->hard_header_len - 1
+			       		? ' ':':');
+			} else {
+			  int i;
+			  unsigned char *p = skb->mac.raw;
+			  if ( p - (ETH_ALEN*2+2) > skb->head ){
+			    p -= (ETH_ALEN+2);
+			    for (i = 0; i < (ETH_ALEN); i++,p++)
+				printk("%02x%s", *p,
+					i == ETH_ALEN-1 ? "->" : ":");
+			    p -= (ETH_ALEN*2);
+			    for (i = 0; i < (ETH_ALEN); i++,p++)
+				printk("%02x%c", *p,
+					i == ETH_ALEN-1 ? ' ' : ':');
+			  }
+			  
+			  if ((skb->dev->addr_len == 4) &&
+			      skb->dev->hard_header_len > 20){
+			    printk("TUNNEL=");
+			    p = skb->mac.raw + 12;
+			    for (i = 0; i < 4; i++,p++)
+				printk("%3d%s", *p,
+					i == 3 ? "->" : ".");
+			    for (i = 0; i < 4; i++,p++)
+				printk("%3d%c", *p,
+					i == 3 ? ' ' : '.');
+			  }
+			}
+		} else
+			printk(" ");
+	}
+
+	dump_packet(loginfo, skb, (u8*)skb->nh.ipv6h - skb->data, 1);
+	printk("\n");
+	spin_unlock_bh(&log_lock);
+}
+
+static unsigned int
+ip6t_log_target(struct sk_buff **pskb,
+		const struct net_device *in,
+		const struct net_device *out,
+		unsigned int hooknum,
+		const void *targinfo,
+		void *userinfo)
+{
+	const struct ip6t_log_info *loginfo = targinfo;
+	char level_string[4] = "< >";
+
+	level_string[1] = '0' + (loginfo->level % 8);
+	ip6t_log_packet(hooknum, *pskb, in, out, loginfo, level_string, NULL);
+
+	return IP6T_CONTINUE;
+}
+
+static void
+ip6t_logfn(unsigned int hooknum,
+	   const struct sk_buff *skb,
+	   const struct net_device *in,
+	   const struct net_device *out,
+	   const char *prefix)
+{
+	struct ip6t_log_info loginfo = {
+		.level = 0,
+		.logflags = IP6T_LOG_MASK,
+		.prefix = ""
+	};
+
+	ip6t_log_packet(hooknum, skb, in, out, &loginfo, KERN_WARNING, prefix);
+}
+
+static int ip6t_log_checkentry(const char *tablename,
+			       const struct ip6t_entry *e,
+			       void *targinfo,
+			       unsigned int targinfosize,
+			       unsigned int hook_mask)
+{
+	const struct ip6t_log_info *loginfo = targinfo;
+
+	if (targinfosize != IP6T_ALIGN(sizeof(struct ip6t_log_info))) {
+		DEBUGP("LOG: targinfosize %u != %u\n",
+		       targinfosize, IP6T_ALIGN(sizeof(struct ip6t_log_info)));
+		return 0;
+	}
+
+	if (loginfo->level >= 8) {
+		DEBUGP("LOG: level %u >= 8\n", loginfo->level);
+		return 0;
+	}
+
+	if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') {
+		DEBUGP("LOG: prefix term %i\n",
+		       loginfo->prefix[sizeof(loginfo->prefix)-1]);
+		return 0;
+	}
+
+	return 1;
+}
+
+static struct ip6t_target ip6t_log_reg = {
+	.name 		= "LOG",
+	.target 	= ip6t_log_target, 
+	.checkentry	= ip6t_log_checkentry, 
+	.me 		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	if (ip6t_register_target(&ip6t_log_reg))
+		return -EINVAL;
+	if (nflog)
+		nf_log_register(PF_INET6, &ip6t_logfn);
+
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	if (nflog)
+		nf_log_unregister(PF_INET6, &ip6t_logfn);
+	ip6t_unregister_target(&ip6t_log_reg);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_MARK.c b/net/ipv6/netfilter/ip6t_MARK.c
new file mode 100644
index 00000000000..d09ceb05013
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_MARK.c
@@ -0,0 +1,78 @@
+/* This is a module which is used for setting the NFMARK field of an skb. */
+
+/* (C) 1999-2001 Marc Boucher <marc@mbsi.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <net/checksum.h>
+
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_ipv6/ip6t_MARK.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+
+static unsigned int
+target(struct sk_buff **pskb,
+       const struct net_device *in,
+       const struct net_device *out,
+       unsigned int hooknum,
+       const void *targinfo,
+       void *userinfo)
+{
+	const struct ip6t_mark_target_info *markinfo = targinfo;
+
+	if((*pskb)->nfmark != markinfo->mark) {
+		(*pskb)->nfmark = markinfo->mark;
+		(*pskb)->nfcache |= NFC_ALTERED;
+	}
+	return IP6T_CONTINUE;
+}
+
+static int
+checkentry(const char *tablename,
+	   const struct ip6t_entry *e,
+           void *targinfo,
+           unsigned int targinfosize,
+           unsigned int hook_mask)
+{
+	if (targinfosize != IP6T_ALIGN(sizeof(struct ip6t_mark_target_info))) {
+		printk(KERN_WARNING "MARK: targinfosize %u != %Zu\n",
+		       targinfosize,
+		       IP6T_ALIGN(sizeof(struct ip6t_mark_target_info)));
+		return 0;
+	}
+
+	if (strcmp(tablename, "mangle") != 0) {
+		printk(KERN_WARNING "MARK: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
+		return 0;
+	}
+
+	return 1;
+}
+
+static struct ip6t_target ip6t_mark_reg
+= { { NULL, NULL }, "MARK", target, checkentry, NULL, THIS_MODULE };
+
+static int __init init(void)
+{
+	printk(KERN_DEBUG "registering ipv6 mark target\n");
+	if (ip6t_register_target(&ip6t_mark_reg))
+		return -EINVAL;
+
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	ip6t_unregister_target(&ip6t_mark_reg);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
new file mode 100644
index 00000000000..d5b94f142bb
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -0,0 +1,208 @@
+/* Kernel module to match AH parameters. */
+
+/* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <linux/types.h>
+#include <net/checksum.h>
+#include <net/ipv6.h>
+
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_ipv6/ip6t_ah.h>
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IPv6 AH match");
+MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+/* Returns 1 if the spi is matched by the range, 0 otherwise */
+static inline int
+spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, int invert)
+{
+	int r=0;
+	DEBUGP("ah spi_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ',
+	       min,spi,max);
+	r = (spi >= min && spi <= max) ^ invert;
+	DEBUGP(" result %s\n",r? "PASS\n" : "FAILED\n");
+	return r;
+}
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      unsigned int protoff,
+      int *hotdrop)
+{
+	struct ip_auth_hdr *ah = NULL, _ah;
+	const struct ip6t_ah *ahinfo = matchinfo;
+	unsigned int temp;
+	int len;
+	u8 nexthdr;
+	unsigned int ptr;
+	unsigned int hdrlen = 0;
+
+	/*DEBUGP("IPv6 AH entered\n");*/
+	/* if (opt->auth == 0) return 0;
+	* It does not filled on output */
+
+	/* type of the 1st exthdr */
+	nexthdr = skb->nh.ipv6h->nexthdr;
+	/* pointer to the 1st exthdr */
+	ptr = sizeof(struct ipv6hdr);
+	/* available length */
+	len = skb->len - ptr;
+	temp = 0;
+
+	while (ip6t_ext_hdr(nexthdr)) {
+		struct ipv6_opt_hdr _hdr, *hp;
+
+		DEBUGP("ipv6_ah header iteration \n");
+
+		/* Is there enough space for the next ext header? */
+		if (len < sizeof(struct ipv6_opt_hdr))
+			return 0;
+		/* No more exthdr -> evaluate */
+		if (nexthdr == NEXTHDR_NONE)
+			break;
+		/* ESP -> evaluate */
+		if (nexthdr == NEXTHDR_ESP)
+			break;
+
+		hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
+		BUG_ON(hp == NULL);
+
+		/* Calculate the header length */
+		if (nexthdr == NEXTHDR_FRAGMENT)
+			hdrlen = 8;
+		else if (nexthdr == NEXTHDR_AUTH)
+			hdrlen = (hp->hdrlen+2)<<2;
+		else
+			hdrlen = ipv6_optlen(hp);
+
+		/* AH -> evaluate */
+		if (nexthdr == NEXTHDR_AUTH) {
+			temp |= MASK_AH;
+			break;
+		}
+
+		
+		/* set the flag */
+		switch (nexthdr) {
+		case NEXTHDR_HOP:
+		case NEXTHDR_ROUTING:
+		case NEXTHDR_FRAGMENT:
+		case NEXTHDR_AUTH:
+		case NEXTHDR_DEST:
+			break;
+		default:
+			DEBUGP("ipv6_ah match: unknown nextheader %u\n",nexthdr);
+			return 0;
+		}
+
+		nexthdr = hp->nexthdr;
+		len -= hdrlen;
+		ptr += hdrlen;
+		if (ptr > skb->len) {
+			DEBUGP("ipv6_ah: new pointer too large! \n");
+			break;
+		}
+	}
+
+	/* AH header not found */
+	if (temp != MASK_AH)
+		return 0;
+
+	if (len < sizeof(struct ip_auth_hdr)){
+		*hotdrop = 1;
+		return 0;
+	}
+
+	ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah);
+	BUG_ON(ah == NULL);
+
+	DEBUGP("IPv6 AH LEN %u %u ", hdrlen, ah->hdrlen);
+	DEBUGP("RES %04X ", ah->reserved);
+	DEBUGP("SPI %u %08X\n", ntohl(ah->spi), ntohl(ah->spi));
+
+	DEBUGP("IPv6 AH spi %02X ",
+	       (spi_match(ahinfo->spis[0], ahinfo->spis[1],
+	                  ntohl(ah->spi),
+	                  !!(ahinfo->invflags & IP6T_AH_INV_SPI))));
+	DEBUGP("len %02X %04X %02X ",
+	       ahinfo->hdrlen, hdrlen,
+	       (!ahinfo->hdrlen ||
+	        (ahinfo->hdrlen == hdrlen) ^
+	        !!(ahinfo->invflags & IP6T_AH_INV_LEN)));
+	DEBUGP("res %02X %04X %02X\n",
+	       ahinfo->hdrres, ah->reserved,
+	       !(ahinfo->hdrres && ah->reserved));
+
+	return (ah != NULL)
+	       &&
+	       (spi_match(ahinfo->spis[0], ahinfo->spis[1],
+	                  ntohl(ah->spi),
+	                  !!(ahinfo->invflags & IP6T_AH_INV_SPI)))
+	       &&
+	       (!ahinfo->hdrlen ||
+	        (ahinfo->hdrlen == hdrlen) ^
+	        !!(ahinfo->invflags & IP6T_AH_INV_LEN))
+	       &&
+	       !(ahinfo->hdrres && ah->reserved);
+}
+
+/* Called when user tries to insert an entry of this type. */
+static int
+checkentry(const char *tablename,
+          const struct ip6t_ip6 *ip,
+          void *matchinfo,
+          unsigned int matchinfosize,
+          unsigned int hook_mask)
+{
+	const struct ip6t_ah *ahinfo = matchinfo;
+
+	if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_ah))) {
+		DEBUGP("ip6t_ah: matchsize %u != %u\n",
+		       matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_ah)));
+		return 0;
+	}
+	if (ahinfo->invflags & ~IP6T_AH_INV_MASK) {
+		DEBUGP("ip6t_ah: unknown flags %X\n", ahinfo->invflags);
+		return 0;
+	}
+	return 1;
+}
+
+static struct ip6t_match ah_match = {
+	.name		= "ah",
+	.match		= &match,
+	.checkentry	= &checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ip6t_register_match(&ah_match);
+}
+
+static void __exit cleanup(void)
+{
+	ip6t_unregister_match(&ah_match);
+}
+
+module_init(init);
+module_exit(cleanup);
diff --git a/net/ipv6/netfilter/ip6t_dst.c b/net/ipv6/netfilter/ip6t_dst.c
new file mode 100644
index 00000000000..540925e4a7a
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_dst.c
@@ -0,0 +1,298 @@
+/* Kernel module to match Hop-by-Hop and Destination parameters. */
+
+/* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <linux/types.h>
+#include <net/checksum.h>
+#include <net/ipv6.h>
+
+#include <asm/byteorder.h>
+
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_ipv6/ip6t_opts.h>
+
+#define HOPBYHOP	0
+
+MODULE_LICENSE("GPL");
+#if HOPBYHOP
+MODULE_DESCRIPTION("IPv6 HbH match");
+#else
+MODULE_DESCRIPTION("IPv6 DST match");
+#endif
+MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+/*
+ * (Type & 0xC0) >> 6
+ * 	0	-> ignorable
+ * 	1	-> must drop the packet
+ * 	2	-> send ICMP PARM PROB regardless and drop packet
+ * 	3	-> Send ICMP if not a multicast address and drop packet
+ *  (Type & 0x20) >> 5
+ *  	0	-> invariant
+ *  	1	-> can change the routing
+ *  (Type & 0x1F) Type
+ *      0	-> Pad1 (only 1 byte!)
+ *      1	-> PadN LENGTH info (total length = length + 2)
+ *      C0 | 2	-> JUMBO 4 x x x x ( xxxx > 64k )
+ *      5	-> RTALERT 2 x x
+ */
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      unsigned int protoff,
+      int *hotdrop)
+{
+       struct ipv6_opt_hdr _optsh, *oh;
+       const struct ip6t_opts *optinfo = matchinfo;
+       unsigned int temp;
+       unsigned int len;
+       u8 nexthdr;
+       unsigned int ptr;
+       unsigned int hdrlen = 0;
+       unsigned int ret = 0;
+       u8 _opttype, *tp = NULL;
+       u8 _optlen, *lp = NULL;
+       unsigned int optlen;
+       
+       /* type of the 1st exthdr */
+       nexthdr = skb->nh.ipv6h->nexthdr;
+       /* pointer to the 1st exthdr */
+       ptr = sizeof(struct ipv6hdr);
+       /* available length */
+       len = skb->len - ptr;
+       temp = 0;
+
+        while (ip6t_ext_hdr(nexthdr)) {
+               struct ipv6_opt_hdr _hdr, *hp;
+
+              DEBUGP("ipv6_opts header iteration \n");
+
+              /* Is there enough space for the next ext header? */
+                if (len < (int)sizeof(struct ipv6_opt_hdr))
+                        return 0;
+              /* No more exthdr -> evaluate */
+                if (nexthdr == NEXTHDR_NONE) {
+                     break;
+              }
+              /* ESP -> evaluate */
+                if (nexthdr == NEXTHDR_ESP) {
+                     break;
+              }
+
+	      hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
+	      BUG_ON(hp == NULL);
+
+              /* Calculate the header length */
+                if (nexthdr == NEXTHDR_FRAGMENT) {
+                        hdrlen = 8;
+                } else if (nexthdr == NEXTHDR_AUTH)
+                        hdrlen = (hp->hdrlen+2)<<2;
+                else
+                        hdrlen = ipv6_optlen(hp);
+
+              /* OPTS -> evaluate */
+#if HOPBYHOP
+                if (nexthdr == NEXTHDR_HOP) {
+                     temp |= MASK_HOPOPTS;
+#else
+                if (nexthdr == NEXTHDR_DEST) {
+                     temp |= MASK_DSTOPTS;
+#endif
+                     break;
+              }
+
+
+              /* set the flag */
+              switch (nexthdr){
+                     case NEXTHDR_HOP:
+                     case NEXTHDR_ROUTING:
+                     case NEXTHDR_FRAGMENT:
+                     case NEXTHDR_AUTH:
+                     case NEXTHDR_DEST:
+                            break;
+                     default:
+                            DEBUGP("ipv6_opts match: unknown nextheader %u\n",nexthdr);
+                            return 0;
+                            break;
+              }
+
+                nexthdr = hp->nexthdr;
+                len -= hdrlen;
+                ptr += hdrlen;
+		if ( ptr > skb->len ) {
+			DEBUGP("ipv6_opts: new pointer is too large! \n");
+			break;
+		}
+        }
+
+       /* OPTIONS header not found */
+#if HOPBYHOP
+       if ( temp != MASK_HOPOPTS ) return 0;
+#else
+       if ( temp != MASK_DSTOPTS ) return 0;
+#endif
+
+       if (len < (int)sizeof(struct ipv6_opt_hdr)){
+	       *hotdrop = 1;
+       		return 0;
+       }
+
+       if (len < hdrlen){
+	       /* Packet smaller than it's length field */
+       		return 0;
+       }
+
+       oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
+       BUG_ON(oh == NULL);
+
+       DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen);
+
+       DEBUGP("len %02X %04X %02X ",
+       		optinfo->hdrlen, hdrlen,
+       		(!(optinfo->flags & IP6T_OPTS_LEN) ||
+                           ((optinfo->hdrlen == hdrlen) ^
+                           !!(optinfo->invflags & IP6T_OPTS_INV_LEN))));
+
+       ret = (oh != NULL)
+       		&&
+	      	(!(optinfo->flags & IP6T_OPTS_LEN) ||
+                           ((optinfo->hdrlen == hdrlen) ^
+                           !!(optinfo->invflags & IP6T_OPTS_INV_LEN)));
+
+       ptr += 2;
+       hdrlen -= 2;
+       if ( !(optinfo->flags & IP6T_OPTS_OPTS) ){
+	       return ret;
+	} else if (optinfo->flags & IP6T_OPTS_NSTRICT) {
+		DEBUGP("Not strict - not implemented");
+	} else {
+		DEBUGP("Strict ");
+		DEBUGP("#%d ",optinfo->optsnr);
+		for(temp=0; temp<optinfo->optsnr; temp++){
+			/* type field exists ? */
+			if (hdrlen < 1)
+				break;
+			tp = skb_header_pointer(skb, ptr, sizeof(_opttype),
+						&_opttype);
+			if (tp == NULL)
+				break;
+
+			/* Type check */
+			if (*tp != (optinfo->opts[temp] & 0xFF00)>>8){
+				DEBUGP("Tbad %02X %02X\n",
+				       *tp,
+				       (optinfo->opts[temp] & 0xFF00)>>8);
+				return 0;
+			} else {
+				DEBUGP("Tok ");
+			}
+			/* Length check */
+			if (*tp) {
+				u16 spec_len;
+
+				/* length field exists ? */
+				if (hdrlen < 2)
+					break;
+				lp = skb_header_pointer(skb, ptr + 1,
+							sizeof(_optlen),
+							&_optlen);
+				if (lp == NULL)
+					break;
+				spec_len = optinfo->opts[temp] & 0x00FF;
+
+				if (spec_len != 0x00FF && spec_len != *lp) {
+					DEBUGP("Lbad %02X %04X\n", *lp,
+					       spec_len);
+					return 0;
+				}
+				DEBUGP("Lok ");
+				optlen = *lp + 2;
+			} else {
+				DEBUGP("Pad1\n");
+				optlen = 1;
+			}
+
+			/* Step to the next */
+			DEBUGP("len%04X \n", optlen);
+
+			if ((ptr > skb->len - optlen || hdrlen < optlen) &&
+			    (temp < optinfo->optsnr - 1)) {
+				DEBUGP("new pointer is too large! \n");
+				break;
+			}
+			ptr += optlen;
+			hdrlen -= optlen;
+		}
+		if (temp == optinfo->optsnr)
+			return ret;
+		else return 0;
+	}
+
+	return 0;
+}
+
+/* Called when user tries to insert an entry of this type. */
+static int
+checkentry(const char *tablename,
+          const struct ip6t_ip6 *ip,
+          void *matchinfo,
+          unsigned int matchinfosize,
+          unsigned int hook_mask)
+{
+       const struct ip6t_opts *optsinfo = matchinfo;
+
+       if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_opts))) {
+              DEBUGP("ip6t_opts: matchsize %u != %u\n",
+                      matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_opts)));
+              return 0;
+       }
+       if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) {
+              DEBUGP("ip6t_opts: unknown flags %X\n",
+                      optsinfo->invflags);
+              return 0;
+       }
+
+       return 1;
+}
+
+static struct ip6t_match opts_match = {
+#if HOPBYHOP
+	.name		= "hbh",
+#else
+	.name		= "dst",
+#endif
+	.match		= &match,
+	.checkentry	= &checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+       return ip6t_register_match(&opts_match);
+}
+
+static void __exit cleanup(void)
+{
+       ip6t_unregister_match(&opts_match);
+}
+
+module_init(init);
+module_exit(cleanup);
diff --git a/net/ipv6/netfilter/ip6t_esp.c b/net/ipv6/netfilter/ip6t_esp.c
new file mode 100644
index 00000000000..e39dd236fd8
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_esp.c
@@ -0,0 +1,181 @@
+/* Kernel module to match ESP parameters. */
+/* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <linux/types.h>
+#include <net/checksum.h>
+#include <net/ipv6.h>
+
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_ipv6/ip6t_esp.h>
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IPv6 ESP match");
+MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+/* Returns 1 if the spi is matched by the range, 0 otherwise */
+static inline int
+spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, int invert)
+{
+	int r=0;
+	DEBUGP("esp spi_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ',
+	       min,spi,max);
+	r=(spi >= min && spi <= max) ^ invert;
+	DEBUGP(" result %s\n",r? "PASS\n" : "FAILED\n");
+	return r;
+}
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      unsigned int protoff,
+      int *hotdrop)
+{
+	struct ip_esp_hdr _esp, *eh = NULL;
+	const struct ip6t_esp *espinfo = matchinfo;
+	unsigned int temp;
+	int len;
+	u8 nexthdr;
+	unsigned int ptr;
+
+	/* Make sure this isn't an evil packet */
+	/*DEBUGP("ipv6_esp entered \n");*/
+
+	/* type of the 1st exthdr */
+	nexthdr = skb->nh.ipv6h->nexthdr;
+	/* pointer to the 1st exthdr */
+	ptr = sizeof(struct ipv6hdr);
+	/* available length */
+	len = skb->len - ptr;
+	temp = 0;
+
+	while (ip6t_ext_hdr(nexthdr)) {
+		struct ipv6_opt_hdr _hdr, *hp;
+		int hdrlen;
+
+		DEBUGP("ipv6_esp header iteration \n");
+
+		/* Is there enough space for the next ext header? */
+		if (len < sizeof(struct ipv6_opt_hdr))
+			return 0;
+		/* No more exthdr -> evaluate */
+		if (nexthdr == NEXTHDR_NONE)
+			break;
+		/* ESP -> evaluate */
+		if (nexthdr == NEXTHDR_ESP) {
+			temp |= MASK_ESP;
+			break;
+		}
+
+		hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
+		BUG_ON(hp == NULL);
+
+		/* Calculate the header length */
+		if (nexthdr == NEXTHDR_FRAGMENT)
+			hdrlen = 8;
+		else if (nexthdr == NEXTHDR_AUTH)
+			hdrlen = (hp->hdrlen+2)<<2;
+		else
+			hdrlen = ipv6_optlen(hp);
+
+		/* set the flag */
+		switch (nexthdr) {
+		case NEXTHDR_HOP:
+		case NEXTHDR_ROUTING:
+		case NEXTHDR_FRAGMENT:
+		case NEXTHDR_AUTH:
+		case NEXTHDR_DEST:
+			break;
+		default:
+			DEBUGP("ipv6_esp match: unknown nextheader %u\n",nexthdr);
+			return 0;
+		}
+
+		nexthdr = hp->nexthdr;
+		len -= hdrlen;
+		ptr += hdrlen;
+		if (ptr > skb->len) {
+			DEBUGP("ipv6_esp: new pointer too large! \n");
+			break;
+		}
+	}
+
+	/* ESP header not found */
+	if (temp != MASK_ESP)
+		return 0;
+
+	if (len < sizeof(struct ip_esp_hdr)) {
+		*hotdrop = 1;
+		return 0;
+	}
+
+	eh = skb_header_pointer(skb, ptr, sizeof(_esp), &_esp);
+	BUG_ON(eh == NULL);
+
+	DEBUGP("IPv6 ESP SPI %u %08X\n", ntohl(eh->spi), ntohl(eh->spi));
+
+	return (eh != NULL)
+		&& spi_match(espinfo->spis[0], espinfo->spis[1],
+			      ntohl(eh->spi),
+			      !!(espinfo->invflags & IP6T_ESP_INV_SPI));
+}
+
+/* Called when user tries to insert an entry of this type. */
+static int
+checkentry(const char *tablename,
+	   const struct ip6t_ip6 *ip,
+	   void *matchinfo,
+	   unsigned int matchinfosize,
+	   unsigned int hook_mask)
+{
+	const struct ip6t_esp *espinfo = matchinfo;
+
+	if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_esp))) {
+		DEBUGP("ip6t_esp: matchsize %u != %u\n",
+			 matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_esp)));
+		return 0;
+	}
+	if (espinfo->invflags & ~IP6T_ESP_INV_MASK) {
+		DEBUGP("ip6t_esp: unknown flags %X\n",
+			 espinfo->invflags);
+		return 0;
+	}
+	return 1;
+}
+
+static struct ip6t_match esp_match = {
+	.name		= "esp",
+	.match		= &match,
+	.checkentry	= &checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ip6t_register_match(&esp_match);
+}
+
+static void __exit cleanup(void)
+{
+	ip6t_unregister_match(&esp_match);
+}
+
+module_init(init);
+module_exit(cleanup);
diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c
new file mode 100644
index 00000000000..616c2cbcd54
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_eui64.c
@@ -0,0 +1,101 @@
+/* Kernel module to match EUI64 address parameters. */
+
+/* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <linux/if_ether.h>
+
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+MODULE_DESCRIPTION("IPv6 EUI64 address checking match");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      unsigned int protoff,
+      int *hotdrop)
+{
+
+    unsigned char eui64[8];
+    int i=0;
+
+     if ( !(skb->mac.raw >= skb->head
+                && (skb->mac.raw + ETH_HLEN) <= skb->data)
+                && offset != 0) {
+                        *hotdrop = 1;
+                        return 0;
+                }
+    
+    memset(eui64, 0, sizeof(eui64));
+
+    if (eth_hdr(skb)->h_proto == ntohs(ETH_P_IPV6)) {
+      if (skb->nh.ipv6h->version == 0x6) { 
+         memcpy(eui64, eth_hdr(skb)->h_source, 3);
+         memcpy(eui64 + 5, eth_hdr(skb)->h_source + 3, 3);
+	 eui64[3]=0xff;
+	 eui64[4]=0xfe;
+	 eui64[0] |= 0x02;
+
+	 i=0;
+	 while ((skb->nh.ipv6h->saddr.s6_addr[8+i] ==
+			 eui64[i]) && (i<8)) i++;
+
+	 if ( i == 8 )
+	 	return 1;
+      }
+    }
+
+    return 0;
+}
+
+static int
+ip6t_eui64_checkentry(const char *tablename,
+		   const struct ip6t_ip6 *ip,
+		   void *matchinfo,
+		   unsigned int matchsize,
+		   unsigned int hook_mask)
+{
+	if (hook_mask
+	    & ~((1 << NF_IP6_PRE_ROUTING) | (1 << NF_IP6_LOCAL_IN) |
+		(1 << NF_IP6_FORWARD))) {
+		printk("ip6t_eui64: only valid for PRE_ROUTING, LOCAL_IN or FORWARD.\n");
+		return 0;
+	}
+
+	if (matchsize != IP6T_ALIGN(sizeof(int)))
+		return 0;
+
+	return 1;
+}
+
+static struct ip6t_match eui64_match = {
+	.name		= "eui64",
+	.match		= &match,
+	.checkentry	= &ip6t_eui64_checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ip6t_register_match(&eui64_match);
+}
+
+static void __exit fini(void)
+{
+	ip6t_unregister_match(&eui64_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
new file mode 100644
index 00000000000..4bfa30a9bc8
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -0,0 +1,229 @@
+/* Kernel module to match FRAG parameters. */
+
+/* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <linux/types.h>
+#include <net/checksum.h>
+#include <net/ipv6.h>
+
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_ipv6/ip6t_frag.h>
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IPv6 FRAG match");
+MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+/* Returns 1 if the id is matched by the range, 0 otherwise */
+static inline int
+id_match(u_int32_t min, u_int32_t max, u_int32_t id, int invert)
+{
+       int r=0;
+       DEBUGP("frag id_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ',
+              min,id,max);
+       r=(id >= min && id <= max) ^ invert;
+       DEBUGP(" result %s\n",r? "PASS" : "FAILED");
+       return r;
+}
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      unsigned int protoff,
+      int *hotdrop)
+{
+       struct frag_hdr _frag, *fh = NULL;
+       const struct ip6t_frag *fraginfo = matchinfo;
+       unsigned int temp;
+       int len;
+       u8 nexthdr;
+       unsigned int ptr;
+       unsigned int hdrlen = 0;
+
+       /* type of the 1st exthdr */
+       nexthdr = skb->nh.ipv6h->nexthdr;
+       /* pointer to the 1st exthdr */
+       ptr = sizeof(struct ipv6hdr);
+       /* available length */
+       len = skb->len - ptr;
+       temp = 0;
+
+        while (ip6t_ext_hdr(nexthdr)) {
+               struct ipv6_opt_hdr _hdr, *hp;
+
+              DEBUGP("ipv6_frag header iteration \n");
+
+              /* Is there enough space for the next ext header? */
+                if (len < (int)sizeof(struct ipv6_opt_hdr))
+                        return 0;
+              /* No more exthdr -> evaluate */
+                if (nexthdr == NEXTHDR_NONE) {
+                     break;
+              }
+              /* ESP -> evaluate */
+                if (nexthdr == NEXTHDR_ESP) {
+                     break;
+              }
+
+	      hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
+	      BUG_ON(hp == NULL);
+
+              /* Calculate the header length */
+                if (nexthdr == NEXTHDR_FRAGMENT) {
+                        hdrlen = 8;
+                } else if (nexthdr == NEXTHDR_AUTH)
+                        hdrlen = (hp->hdrlen+2)<<2;
+                else
+                        hdrlen = ipv6_optlen(hp);
+
+              /* FRAG -> evaluate */
+                if (nexthdr == NEXTHDR_FRAGMENT) {
+                     temp |= MASK_FRAGMENT;
+                     break;
+              }
+
+
+              /* set the flag */
+              switch (nexthdr){
+                     case NEXTHDR_HOP:
+                     case NEXTHDR_ROUTING:
+                     case NEXTHDR_FRAGMENT:
+                     case NEXTHDR_AUTH:
+                     case NEXTHDR_DEST:
+                            break;
+                     default:
+                            DEBUGP("ipv6_frag match: unknown nextheader %u\n",nexthdr);
+                            return 0;
+                            break;
+              }
+
+                nexthdr = hp->nexthdr;
+                len -= hdrlen;
+                ptr += hdrlen;
+		if ( ptr > skb->len ) {
+			DEBUGP("ipv6_frag: new pointer too large! \n");
+			break;
+		}
+        }
+
+       /* FRAG header not found */
+       if ( temp != MASK_FRAGMENT ) return 0;
+
+       if (len < sizeof(struct frag_hdr)){
+	       *hotdrop = 1;
+       		return 0;
+       }
+
+       fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag);
+       BUG_ON(fh == NULL);
+
+       DEBUGP("INFO %04X ", fh->frag_off);
+       DEBUGP("OFFSET %04X ", ntohs(fh->frag_off) & ~0x7);
+       DEBUGP("RES %02X %04X", fh->reserved, ntohs(fh->frag_off) & 0x6);
+       DEBUGP("MF %04X ", fh->frag_off & htons(IP6_MF));
+       DEBUGP("ID %u %08X\n", ntohl(fh->identification),
+	      ntohl(fh->identification));
+
+       DEBUGP("IPv6 FRAG id %02X ",
+       		(id_match(fraginfo->ids[0], fraginfo->ids[1],
+                           ntohl(fh->identification),
+                           !!(fraginfo->invflags & IP6T_FRAG_INV_IDS))));
+       DEBUGP("res %02X %02X%04X %02X ", 
+       		(fraginfo->flags & IP6T_FRAG_RES), fh->reserved,
+		ntohs(fh->frag_off) & 0x6,
+       		!((fraginfo->flags & IP6T_FRAG_RES)
+			&& (fh->reserved || (ntohs(fh->frag_off) & 0x06))));
+       DEBUGP("first %02X %02X %02X ", 
+       		(fraginfo->flags & IP6T_FRAG_FST),
+		ntohs(fh->frag_off) & ~0x7,
+       		!((fraginfo->flags & IP6T_FRAG_FST)
+			&& (ntohs(fh->frag_off) & ~0x7)));
+       DEBUGP("mf %02X %02X %02X ", 
+       		(fraginfo->flags & IP6T_FRAG_MF),
+		ntohs(fh->frag_off) & IP6_MF,
+       		!((fraginfo->flags & IP6T_FRAG_MF)
+			&& !((ntohs(fh->frag_off) & IP6_MF))));
+       DEBUGP("last %02X %02X %02X\n", 
+       		(fraginfo->flags & IP6T_FRAG_NMF),
+		ntohs(fh->frag_off) & IP6_MF,
+       		!((fraginfo->flags & IP6T_FRAG_NMF)
+			&& (ntohs(fh->frag_off) & IP6_MF)));
+
+       return (fh != NULL)
+       		&&
+       		(id_match(fraginfo->ids[0], fraginfo->ids[1],
+			  ntohl(fh->identification),
+                           !!(fraginfo->invflags & IP6T_FRAG_INV_IDS)))
+		&&
+		!((fraginfo->flags & IP6T_FRAG_RES)
+			&& (fh->reserved || (ntohs(fh->frag_off) & 0x6)))
+		&&
+		!((fraginfo->flags & IP6T_FRAG_FST)
+			&& (ntohs(fh->frag_off) & ~0x7))
+		&&
+		!((fraginfo->flags & IP6T_FRAG_MF)
+			&& !(ntohs(fh->frag_off) & IP6_MF))
+		&&
+		!((fraginfo->flags & IP6T_FRAG_NMF)
+			&& (ntohs(fh->frag_off) & IP6_MF));
+}
+
+/* Called when user tries to insert an entry of this type. */
+static int
+checkentry(const char *tablename,
+          const struct ip6t_ip6 *ip,
+          void *matchinfo,
+          unsigned int matchinfosize,
+          unsigned int hook_mask)
+{
+       const struct ip6t_frag *fraginfo = matchinfo;
+
+       if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_frag))) {
+              DEBUGP("ip6t_frag: matchsize %u != %u\n",
+                      matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_frag)));
+              return 0;
+       }
+       if (fraginfo->invflags & ~IP6T_FRAG_INV_MASK) {
+              DEBUGP("ip6t_frag: unknown flags %X\n",
+                      fraginfo->invflags);
+              return 0;
+       }
+
+       return 1;
+}
+
+static struct ip6t_match frag_match = {
+	.name		= "frag",
+	.match		= &match,
+	.checkentry	= &checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+       return ip6t_register_match(&frag_match);
+}
+
+static void __exit cleanup(void)
+{
+       ip6t_unregister_match(&frag_match);
+}
+
+module_init(init);
+module_exit(cleanup);
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
new file mode 100644
index 00000000000..27f3650d127
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -0,0 +1,298 @@
+/* Kernel module to match Hop-by-Hop and Destination parameters. */
+
+/* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <linux/types.h>
+#include <net/checksum.h>
+#include <net/ipv6.h>
+
+#include <asm/byteorder.h>
+
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_ipv6/ip6t_opts.h>
+
+#define HOPBYHOP	1
+
+MODULE_LICENSE("GPL");
+#if HOPBYHOP
+MODULE_DESCRIPTION("IPv6 HbH match");
+#else
+MODULE_DESCRIPTION("IPv6 DST match");
+#endif
+MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+/*
+ * (Type & 0xC0) >> 6
+ * 	0	-> ignorable
+ * 	1	-> must drop the packet
+ * 	2	-> send ICMP PARM PROB regardless and drop packet
+ * 	3	-> Send ICMP if not a multicast address and drop packet
+ *  (Type & 0x20) >> 5
+ *  	0	-> invariant
+ *  	1	-> can change the routing
+ *  (Type & 0x1F) Type
+ *      0	-> Pad1 (only 1 byte!)
+ *      1	-> PadN LENGTH info (total length = length + 2)
+ *      C0 | 2	-> JUMBO 4 x x x x ( xxxx > 64k )
+ *      5	-> RTALERT 2 x x
+ */
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      unsigned int protoff,
+      int *hotdrop)
+{
+       struct ipv6_opt_hdr _optsh, *oh;
+       const struct ip6t_opts *optinfo = matchinfo;
+       unsigned int temp;
+       unsigned int len;
+       u8 nexthdr;
+       unsigned int ptr;
+       unsigned int hdrlen = 0;
+       unsigned int ret = 0;
+       u8 _opttype, *tp = NULL;
+       u8 _optlen, *lp = NULL;
+       unsigned int optlen;
+       
+       /* type of the 1st exthdr */
+       nexthdr = skb->nh.ipv6h->nexthdr;
+       /* pointer to the 1st exthdr */
+       ptr = sizeof(struct ipv6hdr);
+       /* available length */
+       len = skb->len - ptr;
+       temp = 0;
+
+        while (ip6t_ext_hdr(nexthdr)) {
+               struct ipv6_opt_hdr _hdr, *hp;
+
+              DEBUGP("ipv6_opts header iteration \n");
+
+              /* Is there enough space for the next ext header? */
+                if (len < (int)sizeof(struct ipv6_opt_hdr))
+                        return 0;
+              /* No more exthdr -> evaluate */
+                if (nexthdr == NEXTHDR_NONE) {
+                     break;
+              }
+              /* ESP -> evaluate */
+                if (nexthdr == NEXTHDR_ESP) {
+                     break;
+              }
+
+	      hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
+	      BUG_ON(hp == NULL);
+
+              /* Calculate the header length */
+                if (nexthdr == NEXTHDR_FRAGMENT) {
+                        hdrlen = 8;
+                } else if (nexthdr == NEXTHDR_AUTH)
+                        hdrlen = (hp->hdrlen+2)<<2;
+                else
+                        hdrlen = ipv6_optlen(hp);
+
+              /* OPTS -> evaluate */
+#if HOPBYHOP
+                if (nexthdr == NEXTHDR_HOP) {
+                     temp |= MASK_HOPOPTS;
+#else
+                if (nexthdr == NEXTHDR_DEST) {
+                     temp |= MASK_DSTOPTS;
+#endif
+                     break;
+              }
+
+
+              /* set the flag */
+              switch (nexthdr){
+                     case NEXTHDR_HOP:
+                     case NEXTHDR_ROUTING:
+                     case NEXTHDR_FRAGMENT:
+                     case NEXTHDR_AUTH:
+                     case NEXTHDR_DEST:
+                            break;
+                     default:
+                            DEBUGP("ipv6_opts match: unknown nextheader %u\n",nexthdr);
+                            return 0;
+                            break;
+              }
+
+                nexthdr = hp->nexthdr;
+                len -= hdrlen;
+                ptr += hdrlen;
+		if ( ptr > skb->len ) {
+			DEBUGP("ipv6_opts: new pointer is too large! \n");
+			break;
+		}
+        }
+
+       /* OPTIONS header not found */
+#if HOPBYHOP
+       if ( temp != MASK_HOPOPTS ) return 0;
+#else
+       if ( temp != MASK_DSTOPTS ) return 0;
+#endif
+
+       if (len < (int)sizeof(struct ipv6_opt_hdr)){
+	       *hotdrop = 1;
+       		return 0;
+       }
+
+       if (len < hdrlen){
+	       /* Packet smaller than it's length field */
+       		return 0;
+       }
+
+       oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
+       BUG_ON(oh == NULL);
+
+       DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen);
+
+       DEBUGP("len %02X %04X %02X ",
+       		optinfo->hdrlen, hdrlen,
+       		(!(optinfo->flags & IP6T_OPTS_LEN) ||
+                           ((optinfo->hdrlen == hdrlen) ^
+                           !!(optinfo->invflags & IP6T_OPTS_INV_LEN))));
+
+       ret = (oh != NULL)
+       		&&
+	      	(!(optinfo->flags & IP6T_OPTS_LEN) ||
+                           ((optinfo->hdrlen == hdrlen) ^
+                           !!(optinfo->invflags & IP6T_OPTS_INV_LEN)));
+
+       ptr += 2;
+       hdrlen -= 2;
+       if ( !(optinfo->flags & IP6T_OPTS_OPTS) ){
+	       return ret;
+	} else if (optinfo->flags & IP6T_OPTS_NSTRICT) {
+		DEBUGP("Not strict - not implemented");
+	} else {
+		DEBUGP("Strict ");
+		DEBUGP("#%d ",optinfo->optsnr);
+		for(temp=0; temp<optinfo->optsnr; temp++){
+			/* type field exists ? */
+			if (hdrlen < 1)
+				break;
+			tp = skb_header_pointer(skb, ptr, sizeof(_opttype),
+						&_opttype);
+			if (tp == NULL)
+				break;
+
+			/* Type check */
+			if (*tp != (optinfo->opts[temp] & 0xFF00)>>8){
+				DEBUGP("Tbad %02X %02X\n",
+				       *tp,
+				       (optinfo->opts[temp] & 0xFF00)>>8);
+				return 0;
+			} else {
+				DEBUGP("Tok ");
+			}
+			/* Length check */
+			if (*tp) {
+				u16 spec_len;
+
+				/* length field exists ? */
+				if (hdrlen < 2)
+					break;
+				lp = skb_header_pointer(skb, ptr + 1,
+							sizeof(_optlen),
+							&_optlen);
+				if (lp == NULL)
+					break;
+				spec_len = optinfo->opts[temp] & 0x00FF;
+
+				if (spec_len != 0x00FF && spec_len != *lp) {
+					DEBUGP("Lbad %02X %04X\n", *lp,
+					       spec_len);
+					return 0;
+				}
+				DEBUGP("Lok ");
+				optlen = *lp + 2;
+			} else {
+				DEBUGP("Pad1\n");
+				optlen = 1;
+			}
+
+			/* Step to the next */
+			DEBUGP("len%04X \n", optlen);
+
+			if ((ptr > skb->len - optlen || hdrlen < optlen) &&
+			    (temp < optinfo->optsnr - 1)) {
+				DEBUGP("new pointer is too large! \n");
+				break;
+			}
+			ptr += optlen;
+			hdrlen -= optlen;
+		}
+		if (temp == optinfo->optsnr)
+			return ret;
+		else return 0;
+	}
+
+	return 0;
+}
+
+/* Called when user tries to insert an entry of this type. */
+static int
+checkentry(const char *tablename,
+          const struct ip6t_ip6 *ip,
+          void *matchinfo,
+          unsigned int matchinfosize,
+          unsigned int hook_mask)
+{
+       const struct ip6t_opts *optsinfo = matchinfo;
+
+       if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_opts))) {
+              DEBUGP("ip6t_opts: matchsize %u != %u\n",
+                      matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_opts)));
+              return 0;
+       }
+       if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) {
+              DEBUGP("ip6t_opts: unknown flags %X\n",
+                      optsinfo->invflags);
+              return 0;
+       }
+
+       return 1;
+}
+
+static struct ip6t_match opts_match = {
+#if HOPBYHOP
+	.name		= "hbh",
+#else
+	.name		= "dst",
+#endif
+	.match		= &match,
+	.checkentry	= &checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+       return ip6t_register_match(&opts_match);
+}
+
+static void __exit cleanup(void)
+{
+       ip6t_unregister_match(&opts_match);
+}
+
+module_init(init);
+module_exit(cleanup);
diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c
new file mode 100644
index 00000000000..0beaff5471d
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_hl.c
@@ -0,0 +1,80 @@
+/* Hop Limit matching module */
+
+/* (C) 2001-2002 Maciej Soltysiak <solt@dns.toxicfilms.tv>
+ * Based on HW's ttl module
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter_ipv6/ip6t_hl.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>");
+MODULE_DESCRIPTION("IP tables Hop Limit matching module");
+MODULE_LICENSE("GPL");
+
+static int match(const struct sk_buff *skb, const struct net_device *in,
+		 const struct net_device *out, const void *matchinfo,
+		 int offset, unsigned int protoff,
+		 int *hotdrop)
+{
+	const struct ip6t_hl_info *info = matchinfo;
+	const struct ipv6hdr *ip6h = skb->nh.ipv6h;
+
+	switch (info->mode) {
+		case IP6T_HL_EQ:
+			return (ip6h->hop_limit == info->hop_limit);
+			break;
+		case IP6T_HL_NE:
+			return (!(ip6h->hop_limit == info->hop_limit));
+			break;
+		case IP6T_HL_LT:
+			return (ip6h->hop_limit < info->hop_limit);
+			break;
+		case IP6T_HL_GT:
+			return (ip6h->hop_limit > info->hop_limit);
+			break;
+		default:
+			printk(KERN_WARNING "ip6t_hl: unknown mode %d\n", 
+				info->mode);
+			return 0;
+	}
+
+	return 0;
+}
+
+static int checkentry(const char *tablename, const struct ip6t_ip6 *ip,
+		      void *matchinfo, unsigned int matchsize,
+		      unsigned int hook_mask)
+{
+	if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_hl_info)))
+		return 0;
+
+	return 1;
+}
+
+static struct ip6t_match hl_match = {
+	.name		= "hl",
+	.match		= &match,
+	.checkentry	= &checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ip6t_register_match(&hl_match);
+}
+
+static void __exit fini(void)
+{
+	ip6t_unregister_match(&hl_match);
+
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
new file mode 100644
index 00000000000..32e67f05845
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -0,0 +1,167 @@
+/* ipv6header match - matches IPv6 packets based
+   on whether they contain certain headers */
+
+/* Original idea: Brad Chapman 
+ * Rewritten by: Andras Kis-Szabo <kisza@sch.bme.hu> */
+
+/* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <linux/types.h>
+#include <net/checksum.h>
+#include <net/ipv6.h>
+
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_ipv6/ip6t_ipv6header.h>
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IPv6 headers match");
+MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
+
+static int
+ipv6header_match(const struct sk_buff *skb,
+		 const struct net_device *in,
+		 const struct net_device *out,
+		 const void *matchinfo,
+		 int offset,
+		 unsigned int protoff,
+		 int *hotdrop)
+{
+	const struct ip6t_ipv6header_info *info = matchinfo;
+	unsigned int temp;
+	int len;
+	u8 nexthdr;
+	unsigned int ptr;
+
+	/* Make sure this isn't an evil packet */
+
+	/* type of the 1st exthdr */
+	nexthdr = skb->nh.ipv6h->nexthdr;
+	/* pointer to the 1st exthdr */
+	ptr = sizeof(struct ipv6hdr);
+	/* available length */
+	len = skb->len - ptr;
+	temp = 0;
+
+        while (ip6t_ext_hdr(nexthdr)) {
+		struct ipv6_opt_hdr _hdr, *hp;
+        	int hdrlen;
+
+		/* Is there enough space for the next ext header? */
+                if (len < (int)sizeof(struct ipv6_opt_hdr))
+                        return 0;
+		/* No more exthdr -> evaluate */
+                if (nexthdr == NEXTHDR_NONE) {
+			temp |= MASK_NONE;
+			break;
+		}
+		/* ESP -> evaluate */
+                if (nexthdr == NEXTHDR_ESP) {
+			temp |= MASK_ESP;
+			break;
+		}
+
+		hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
+		BUG_ON(hp == NULL);
+
+		/* Calculate the header length */
+                if (nexthdr == NEXTHDR_FRAGMENT) {
+                        hdrlen = 8;
+                } else if (nexthdr == NEXTHDR_AUTH)
+                        hdrlen = (hp->hdrlen+2)<<2;
+                else
+                        hdrlen = ipv6_optlen(hp);
+
+		/* set the flag */
+		switch (nexthdr){
+			case NEXTHDR_HOP:
+				temp |= MASK_HOPOPTS;
+				break;
+			case NEXTHDR_ROUTING:
+				temp |= MASK_ROUTING;
+				break;
+			case NEXTHDR_FRAGMENT:
+				temp |= MASK_FRAGMENT;
+				break;
+			case NEXTHDR_AUTH:
+				temp |= MASK_AH;
+				break;
+			case NEXTHDR_DEST:
+				temp |= MASK_DSTOPTS;
+				break;
+			default:
+				return 0;
+				break;
+		}
+
+                nexthdr = hp->nexthdr;
+                len -= hdrlen;
+                ptr += hdrlen;
+		if (ptr > skb->len)
+			break;
+        }
+
+	if ( (nexthdr != NEXTHDR_NONE ) && (nexthdr != NEXTHDR_ESP) )
+		temp |= MASK_PROTO;
+
+	if (info->modeflag)
+		return !((temp ^ info->matchflags ^ info->invflags)
+			 & info->matchflags);
+	else {
+		if (info->invflags)
+			return temp != info->matchflags;
+		else
+			return temp == info->matchflags;
+	}
+}
+
+static int
+ipv6header_checkentry(const char *tablename,
+		      const struct ip6t_ip6 *ip,
+		      void *matchinfo,
+		      unsigned int matchsize,
+		      unsigned int hook_mask)
+{
+	const struct ip6t_ipv6header_info *info = matchinfo;
+
+	/* Check for obvious errors */
+	/* This match is valid in all hooks! */
+	if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_ipv6header_info)))
+		return 0;
+
+	/* invflags is 0 or 0xff in hard mode */
+	if ((!info->modeflag) && info->invflags != 0x00
+			      && info->invflags != 0xFF)
+		return 0;
+
+	return 1;
+}
+
+static struct ip6t_match ip6t_ipv6header_match = {
+	.name		= "ipv6header",
+	.match		= &ipv6header_match,
+	.checkentry	= &ipv6header_checkentry,
+	.destroy	= NULL,
+	.me		= THIS_MODULE,
+};
+
+static int  __init ipv6header_init(void)
+{
+	return ip6t_register_match(&ip6t_ipv6header_match);
+}
+
+static void __exit ipv6header_exit(void)
+{
+	ip6t_unregister_match(&ip6t_ipv6header_match);
+}
+
+module_init(ipv6header_init);
+module_exit(ipv6header_exit);
+
diff --git a/net/ipv6/netfilter/ip6t_length.c b/net/ipv6/netfilter/ip6t_length.c
new file mode 100644
index 00000000000..e0537d3811d
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_length.c
@@ -0,0 +1,66 @@
+/* Length Match - IPv6 Port */
+
+/* (C) 1999-2001 James Morris <jmorros@intercode.com.au>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter_ipv6/ip6t_length.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
+MODULE_DESCRIPTION("IPv6 packet length match");
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      unsigned int protoff,
+      int *hotdrop)
+{
+	const struct ip6t_length_info *info = matchinfo;
+	u_int16_t pktlen = ntohs(skb->nh.ipv6h->payload_len) + sizeof(struct ipv6hdr);
+	
+	return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
+}
+
+static int
+checkentry(const char *tablename,
+           const struct ip6t_ip6 *ip,
+           void *matchinfo,
+           unsigned int matchsize,
+           unsigned int hook_mask)
+{
+	if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_length_info)))
+		return 0;
+
+	return 1;
+}
+
+static struct ip6t_match length_match = {
+	.name		= "length",
+	.match		= &match,
+	.checkentry	= &checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ip6t_register_match(&length_match);
+}
+
+static void __exit fini(void)
+{
+	ip6t_unregister_match(&length_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_limit.c b/net/ipv6/netfilter/ip6t_limit.c
new file mode 100644
index 00000000000..fb782f610be
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_limit.c
@@ -0,0 +1,147 @@
+/* Kernel module to control the rate
+ *
+ * 2 September 1999: Changed from the target RATE to the match
+ *                   `limit', removed logging.  Did I mention that
+ *                   Alexey is a fucking genius?
+ *                   Rusty Russell (rusty@rustcorp.com.au).  */
+
+/* (C) 1999 J�r�me de Vivie <devivie@info.enserb.u-bordeaux.fr>
+ * (C) 1999 Herv� Eychenne <eychenne@info.enserb.u-bordeaux.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_ipv6/ip6t_limit.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Herve Eychenne <rv@wallfire.org>");
+MODULE_DESCRIPTION("rate limiting within ip6tables");
+
+/* The algorithm used is the Simple Token Bucket Filter (TBF)
+ * see net/sched/sch_tbf.c in the linux source tree
+ */
+
+static DEFINE_SPINLOCK(limit_lock);
+
+/* Rusty: This is my (non-mathematically-inclined) understanding of
+   this algorithm.  The `average rate' in jiffies becomes your initial
+   amount of credit `credit' and the most credit you can ever have
+   `credit_cap'.  The `peak rate' becomes the cost of passing the
+   test, `cost'.
+
+   `prev' tracks the last packet hit: you gain one credit per jiffy.
+   If you get credit balance more than this, the extra credit is
+   discarded.  Every time the match passes, you lose `cost' credits;
+   if you don't have that many, the test fails.
+
+   See Alexey's formal explanation in net/sched/sch_tbf.c.
+
+   To avoid underflow, we multiply by 128 (ie. you get 128 credits per
+   jiffy).  Hence a cost of 2^32-1, means one pass per 32768 seconds
+   at 1024HZ (or one every 9 hours).  A cost of 1 means 12800 passes
+   per second at 100HZ.  */
+
+#define CREDITS_PER_JIFFY 128
+
+static int
+ip6t_limit_match(const struct sk_buff *skb,
+		const struct net_device *in,
+		const struct net_device *out,
+		const void *matchinfo,
+		int offset,
+		unsigned int protoff,
+		int *hotdrop)
+{
+	struct ip6t_rateinfo *r = ((struct ip6t_rateinfo *)matchinfo)->master;
+	unsigned long now = jiffies;
+
+	spin_lock_bh(&limit_lock);
+	r->credit += (now - xchg(&r->prev, now)) * CREDITS_PER_JIFFY;
+	if (r->credit > r->credit_cap)
+		r->credit = r->credit_cap;
+
+	if (r->credit >= r->cost) {
+		/* We're not limited. */
+		r->credit -= r->cost;
+		spin_unlock_bh(&limit_lock);
+		return 1;
+	}
+
+       	spin_unlock_bh(&limit_lock);
+	return 0;
+}
+
+/* Precision saver. */
+static u_int32_t
+user2credits(u_int32_t user)
+{
+	/* If multiplying would overflow... */
+	if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY))
+		/* Divide first. */
+		return (user / IP6T_LIMIT_SCALE) * HZ * CREDITS_PER_JIFFY;
+
+	return (user * HZ * CREDITS_PER_JIFFY) / IP6T_LIMIT_SCALE;
+}
+
+static int
+ip6t_limit_checkentry(const char *tablename,
+		     const struct ip6t_ip6 *ip,
+		     void *matchinfo,
+		     unsigned int matchsize,
+		     unsigned int hook_mask)
+{
+	struct ip6t_rateinfo *r = matchinfo;
+
+	if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_rateinfo)))
+		return 0;
+
+	/* Check for overflow. */
+	if (r->burst == 0
+	    || user2credits(r->avg * r->burst) < user2credits(r->avg)) {
+		printk("Call rusty: overflow in ip6t_limit: %u/%u\n",
+		       r->avg, r->burst);
+		return 0;
+	}
+
+	/* User avg in seconds * IP6T_LIMIT_SCALE: convert to jiffies *
+	   128. */
+	r->prev = jiffies;
+	r->credit = user2credits(r->avg * r->burst);	 /* Credits full. */
+	r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */
+	r->cost = user2credits(r->avg);
+
+	/* For SMP, we only want to use one set of counters. */
+	r->master = r;
+
+	return 1;
+}
+
+static struct ip6t_match ip6t_limit_reg = {
+	.name		= "limit",
+	.match		= ip6t_limit_match,
+	.checkentry	= ip6t_limit_checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	if (ip6t_register_match(&ip6t_limit_reg))
+		return -EINVAL;
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	ip6t_unregister_match(&ip6t_limit_reg);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_mac.c b/net/ipv6/netfilter/ip6t_mac.c
new file mode 100644
index 00000000000..526d43e3723
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_mac.c
@@ -0,0 +1,80 @@
+/* Kernel module to match MAC address parameters. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/if_ether.h>
+
+#include <linux/netfilter_ipv6/ip6t_mac.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("MAC address matching module for IPv6");
+MODULE_AUTHOR("Netfilter Core Teaam <coreteam@netfilter.org>");
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      unsigned int protoff,
+      int *hotdrop)
+{
+    const struct ip6t_mac_info *info = matchinfo;
+
+    /* Is mac pointer valid? */
+    return (skb->mac.raw >= skb->head
+	    && (skb->mac.raw + ETH_HLEN) <= skb->data
+	    /* If so, compare... */
+	    && ((memcmp(eth_hdr(skb)->h_source, info->srcaddr, ETH_ALEN)
+		== 0) ^ info->invert));
+}
+
+static int
+ip6t_mac_checkentry(const char *tablename,
+		   const struct ip6t_ip6 *ip,
+		   void *matchinfo,
+		   unsigned int matchsize,
+		   unsigned int hook_mask)
+{
+	if (hook_mask
+	    & ~((1 << NF_IP6_PRE_ROUTING) | (1 << NF_IP6_LOCAL_IN)
+		| (1 << NF_IP6_FORWARD))) {
+		printk("ip6t_mac: only valid for PRE_ROUTING, LOCAL_IN or"
+		       " FORWARD\n");
+		return 0;
+	}
+
+	if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_mac_info)))
+		return 0;
+
+	return 1;
+}
+
+static struct ip6t_match mac_match = {
+	.name		= "mac",
+	.match		= &match,
+	.checkentry	= &ip6t_mac_checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ip6t_register_match(&mac_match);
+}
+
+static void __exit fini(void)
+{
+	ip6t_unregister_match(&mac_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_mark.c b/net/ipv6/netfilter/ip6t_mark.c
new file mode 100644
index 00000000000..affc3de364f
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_mark.c
@@ -0,0 +1,66 @@
+/* Kernel module to match NFMARK values. */
+
+/* (C) 1999-2001 Marc Boucher <marc@mbsi.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter_ipv6/ip6t_mark.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("ip6tables mark match");
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      unsigned int protoff,
+      int *hotdrop)
+{
+	const struct ip6t_mark_info *info = matchinfo;
+
+	return ((skb->nfmark & info->mask) == info->mark) ^ info->invert;
+}
+
+static int
+checkentry(const char *tablename,
+           const struct ip6t_ip6 *ip,
+           void *matchinfo,
+           unsigned int matchsize,
+           unsigned int hook_mask)
+{
+	if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_mark_info)))
+		return 0;
+
+	return 1;
+}
+
+static struct ip6t_match mark_match = {
+	.name		= "mark",
+	.match		= &match,
+	.checkentry	= &checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ip6t_register_match(&mark_match);
+}
+
+static void __exit fini(void)
+{
+	ip6t_unregister_match(&mark_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_multiport.c b/net/ipv6/netfilter/ip6t_multiport.c
new file mode 100644
index 00000000000..6e3246153fa
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_multiport.c
@@ -0,0 +1,125 @@
+/* Kernel module to match one of a list of TCP/UDP ports: ports are in
+   the same place so we can treat them as equal. */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/udp.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+
+#include <linux/netfilter_ipv6/ip6t_multiport.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("ip6tables match for multiple ports");
+
+#if 0
+#define duprintf(format, args...) printk(format , ## args)
+#else
+#define duprintf(format, args...)
+#endif
+
+/* Returns 1 if the port is matched by the test, 0 otherwise. */
+static inline int
+ports_match(const u_int16_t *portlist, enum ip6t_multiport_flags flags,
+	    u_int8_t count, u_int16_t src, u_int16_t dst)
+{
+	unsigned int i;
+	for (i=0; i<count; i++) {
+		if (flags != IP6T_MULTIPORT_DESTINATION
+		    && portlist[i] == src)
+			return 1;
+
+		if (flags != IP6T_MULTIPORT_SOURCE
+		    && portlist[i] == dst)
+			return 1;
+	}
+
+	return 0;
+}
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      unsigned int protoff,
+      int *hotdrop)
+{
+	u16 _ports[2], *pptr;
+	const struct ip6t_multiport *multiinfo = matchinfo;
+
+	/* Must not be a fragment. */
+	if (offset)
+		return 0;
+
+	/* Must be big enough to read ports (both UDP and TCP have
+	   them at the start). */
+	pptr = skb_header_pointer(skb, protoff, sizeof(_ports), &_ports[0]);
+	if (pptr == NULL) {
+		/* We've been asked to examine this packet, and we
+		 * can't.  Hence, no choice but to drop.
+		 */
+		duprintf("ip6t_multiport:"
+			 " Dropping evil offset=0 tinygram.\n");
+		*hotdrop = 1;
+		return 0;
+	}
+
+	return ports_match(multiinfo->ports,
+			   multiinfo->flags, multiinfo->count,
+			   ntohs(pptr[0]), ntohs(pptr[1]));
+}
+
+/* Called when user tries to insert an entry of this type. */
+static int
+checkentry(const char *tablename,
+	   const struct ip6t_ip6 *ip,
+	   void *matchinfo,
+	   unsigned int matchsize,
+	   unsigned int hook_mask)
+{
+	const struct ip6t_multiport *multiinfo = matchinfo;
+
+	if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_multiport)))
+		return 0;
+
+	/* Must specify proto == TCP/UDP, no unknown flags or bad count */
+	return (ip->proto == IPPROTO_TCP || ip->proto == IPPROTO_UDP)
+		&& !(ip->invflags & IP6T_INV_PROTO)
+		&& matchsize == IP6T_ALIGN(sizeof(struct ip6t_multiport))
+		&& (multiinfo->flags == IP6T_MULTIPORT_SOURCE
+		    || multiinfo->flags == IP6T_MULTIPORT_DESTINATION
+		    || multiinfo->flags == IP6T_MULTIPORT_EITHER)
+		&& multiinfo->count <= IP6T_MULTI_PORTS;
+}
+
+static struct ip6t_match multiport_match = {
+	.name		= "multiport",
+	.match		= &match,
+	.checkentry	= &checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ip6t_register_match(&multiport_match);
+}
+
+static void __exit fini(void)
+{
+	ip6t_unregister_match(&multiport_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c
new file mode 100644
index 00000000000..ab0e32d3de4
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_owner.c
@@ -0,0 +1,174 @@
+/* Kernel module to match various things tied to sockets associated with
+   locally generated outgoing packets. */
+
+/* (C) 2000-2001 Marc Boucher <marc@mbsi.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/file.h>
+#include <net/sock.h>
+
+#include <linux/netfilter_ipv6/ip6t_owner.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
+MODULE_DESCRIPTION("IP6 tables owner matching module");
+MODULE_LICENSE("GPL");
+
+static int
+match_pid(const struct sk_buff *skb, pid_t pid)
+{
+	struct task_struct *p;
+	struct files_struct *files;
+	int i;
+
+	read_lock(&tasklist_lock);
+	p = find_task_by_pid(pid);
+	if (!p)
+		goto out;
+	task_lock(p);
+	files = p->files;
+	if(files) {
+		spin_lock(&files->file_lock);
+		for (i=0; i < files->max_fds; i++) {
+			if (fcheck_files(files, i) == skb->sk->sk_socket->file) {
+				spin_unlock(&files->file_lock);
+				task_unlock(p);
+				read_unlock(&tasklist_lock);
+				return 1;
+			}
+		}
+		spin_unlock(&files->file_lock);
+	}
+	task_unlock(p);
+out:
+	read_unlock(&tasklist_lock);
+	return 0;
+}
+
+static int
+match_sid(const struct sk_buff *skb, pid_t sid)
+{
+	struct task_struct *g, *p;
+	struct file *file = skb->sk->sk_socket->file;
+	int i, found=0;
+
+	read_lock(&tasklist_lock);
+	do_each_thread(g, p) {
+		struct files_struct *files;
+		if (p->signal->session != sid)
+			continue;
+
+		task_lock(p);
+		files = p->files;
+		if (files) {
+			spin_lock(&files->file_lock);
+			for (i=0; i < files->max_fds; i++) {
+				if (fcheck_files(files, i) == file) {
+					found = 1;
+					break;
+				}
+			}
+			spin_unlock(&files->file_lock);
+		}
+		task_unlock(p);
+		if (found)
+			goto out;
+	} while_each_thread(g, p);
+out:
+	read_unlock(&tasklist_lock);
+
+	return found;
+}
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      unsigned int protoff,
+      int *hotdrop)
+{
+	const struct ip6t_owner_info *info = matchinfo;
+
+	if (!skb->sk || !skb->sk->sk_socket || !skb->sk->sk_socket->file)
+		return 0;
+
+	if(info->match & IP6T_OWNER_UID) {
+		if((skb->sk->sk_socket->file->f_uid != info->uid) ^
+		    !!(info->invert & IP6T_OWNER_UID))
+			return 0;
+	}
+
+	if(info->match & IP6T_OWNER_GID) {
+		if((skb->sk->sk_socket->file->f_gid != info->gid) ^
+		    !!(info->invert & IP6T_OWNER_GID))
+			return 0;
+	}
+
+	if(info->match & IP6T_OWNER_PID) {
+		if (!match_pid(skb, info->pid) ^
+		    !!(info->invert & IP6T_OWNER_PID))
+			return 0;
+	}
+
+	if(info->match & IP6T_OWNER_SID) {
+		if (!match_sid(skb, info->sid) ^
+		    !!(info->invert & IP6T_OWNER_SID))
+			return 0;
+	}
+
+	return 1;
+}
+
+static int
+checkentry(const char *tablename,
+           const struct ip6t_ip6 *ip,
+           void *matchinfo,
+           unsigned int matchsize,
+           unsigned int hook_mask)
+{
+        if (hook_mask
+            & ~((1 << NF_IP6_LOCAL_OUT) | (1 << NF_IP6_POST_ROUTING))) {
+                printk("ip6t_owner: only valid for LOCAL_OUT or POST_ROUTING.\n");
+                return 0;
+        }
+
+	if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_owner_info)))
+		return 0;
+#ifdef CONFIG_SMP
+	/* files->file_lock can not be used in a BH */
+	if (((struct ip6t_owner_info *)matchinfo)->match
+	    & (IP6T_OWNER_PID|IP6T_OWNER_SID)) {
+		printk("ip6t_owner: pid and sid matching is broken on SMP.\n");
+		return 0;
+	}
+#endif
+	return 1;
+}
+
+static struct ip6t_match owner_match = {
+	.name		= "owner",
+	.match		= &match,
+	.checkentry	= &checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ip6t_register_match(&owner_match);
+}
+
+static void __exit fini(void)
+{
+	ip6t_unregister_match(&owner_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_physdev.c b/net/ipv6/netfilter/ip6t_physdev.c
new file mode 100644
index 00000000000..71515c86ece
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_physdev.c
@@ -0,0 +1,135 @@
+/* Kernel module to match the bridge port in and
+ * out device for IP packets coming into contact with a bridge. */
+
+/* (C) 2001-2003 Bart De Schuymer <bdschuym@pandora.be>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter_ipv6/ip6t_physdev.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_bridge.h>
+#define MATCH   1
+#define NOMATCH 0
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
+MODULE_DESCRIPTION("iptables bridge physical device match module");
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      unsigned int protoff,
+      int *hotdrop)
+{
+	int i;
+	static const char nulldevname[IFNAMSIZ];
+	const struct ip6t_physdev_info *info = matchinfo;
+	unsigned int ret;
+	const char *indev, *outdev;
+	struct nf_bridge_info *nf_bridge;
+
+	/* Not a bridged IP packet or no info available yet:
+	 * LOCAL_OUT/mangle and LOCAL_OUT/nat don't know if
+	 * the destination device will be a bridge. */
+	if (!(nf_bridge = skb->nf_bridge)) {
+		/* Return MATCH if the invert flags of the used options are on */
+		if ((info->bitmask & IP6T_PHYSDEV_OP_BRIDGED) &&
+		    !(info->invert & IP6T_PHYSDEV_OP_BRIDGED))
+			return NOMATCH;
+		if ((info->bitmask & IP6T_PHYSDEV_OP_ISIN) &&
+		    !(info->invert & IP6T_PHYSDEV_OP_ISIN))
+			return NOMATCH;
+		if ((info->bitmask & IP6T_PHYSDEV_OP_ISOUT) &&
+		    !(info->invert & IP6T_PHYSDEV_OP_ISOUT))
+			return NOMATCH;
+		if ((info->bitmask & IP6T_PHYSDEV_OP_IN) &&
+		    !(info->invert & IP6T_PHYSDEV_OP_IN))
+			return NOMATCH;
+		if ((info->bitmask & IP6T_PHYSDEV_OP_OUT) &&
+		    !(info->invert & IP6T_PHYSDEV_OP_OUT))
+			return NOMATCH;
+		return MATCH;
+	}
+
+	/* This only makes sense in the FORWARD and POSTROUTING chains */
+	if ((info->bitmask & IP6T_PHYSDEV_OP_BRIDGED) &&
+	    (!!(nf_bridge->mask & BRNF_BRIDGED) ^
+	    !(info->invert & IP6T_PHYSDEV_OP_BRIDGED)))
+		return NOMATCH;
+
+	if ((info->bitmask & IP6T_PHYSDEV_OP_ISIN &&
+	    (!nf_bridge->physindev ^ !!(info->invert & IP6T_PHYSDEV_OP_ISIN))) ||
+	    (info->bitmask & IP6T_PHYSDEV_OP_ISOUT &&
+	    (!nf_bridge->physoutdev ^ !!(info->invert & IP6T_PHYSDEV_OP_ISOUT))))
+		return NOMATCH;
+
+	if (!(info->bitmask & IP6T_PHYSDEV_OP_IN))
+		goto match_outdev;
+	indev = nf_bridge->physindev ? nf_bridge->physindev->name : nulldevname;
+	for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned int); i++) {
+		ret |= (((const unsigned int *)indev)[i]
+			^ ((const unsigned int *)info->physindev)[i])
+			& ((const unsigned int *)info->in_mask)[i];
+	}
+
+	if ((ret == 0) ^ !(info->invert & IP6T_PHYSDEV_OP_IN))
+		return NOMATCH;
+
+match_outdev:
+	if (!(info->bitmask & IP6T_PHYSDEV_OP_OUT))
+		return MATCH;
+	outdev = nf_bridge->physoutdev ?
+		 nf_bridge->physoutdev->name : nulldevname;
+	for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned int); i++) {
+		ret |= (((const unsigned int *)outdev)[i]
+			^ ((const unsigned int *)info->physoutdev)[i])
+			& ((const unsigned int *)info->out_mask)[i];
+	}
+
+	return (ret != 0) ^ !(info->invert & IP6T_PHYSDEV_OP_OUT);
+}
+
+static int
+checkentry(const char *tablename,
+		       const struct ip6t_ip6 *ip,
+		       void *matchinfo,
+		       unsigned int matchsize,
+		       unsigned int hook_mask)
+{
+	const struct ip6t_physdev_info *info = matchinfo;
+
+	if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_physdev_info)))
+		return 0;
+	if (!(info->bitmask & IP6T_PHYSDEV_OP_MASK) ||
+	    info->bitmask & ~IP6T_PHYSDEV_OP_MASK)
+		return 0;
+	return 1;
+}
+
+static struct ip6t_match physdev_match = {
+	.name		= "physdev",
+	.match		= &match,
+	.checkentry	= &checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+	return ip6t_register_match(&physdev_match);
+}
+
+static void __exit fini(void)
+{
+	ip6t_unregister_match(&physdev_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
new file mode 100644
index 00000000000..a9526b773d2
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -0,0 +1,301 @@
+/* Kernel module to match ROUTING parameters. */
+
+/* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <linux/types.h>
+#include <net/checksum.h>
+#include <net/ipv6.h>
+
+#include <asm/byteorder.h>
+
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_ipv6/ip6t_rt.h>
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IPv6 RT match");
+MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+/* Returns 1 if the id is matched by the range, 0 otherwise */
+static inline int
+segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, int invert)
+{
+       int r=0;
+       DEBUGP("rt segsleft_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ',
+              min,id,max);
+       r=(id >= min && id <= max) ^ invert;
+       DEBUGP(" result %s\n",r? "PASS" : "FAILED");
+       return r;
+}
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      unsigned int protoff,
+      int *hotdrop)
+{
+       struct ipv6_rt_hdr _route, *rh = NULL;
+       const struct ip6t_rt *rtinfo = matchinfo;
+       unsigned int temp;
+       unsigned int len;
+       u8 nexthdr;
+       unsigned int ptr;
+       unsigned int hdrlen = 0;
+       unsigned int ret = 0;
+       struct in6_addr *ap, _addr;
+
+       /* type of the 1st exthdr */
+       nexthdr = skb->nh.ipv6h->nexthdr;
+       /* pointer to the 1st exthdr */
+       ptr = sizeof(struct ipv6hdr);
+       /* available length */
+       len = skb->len - ptr;
+       temp = 0;
+
+        while (ip6t_ext_hdr(nexthdr)) {
+               struct ipv6_opt_hdr _hdr, *hp;
+
+              DEBUGP("ipv6_rt header iteration \n");
+
+              /* Is there enough space for the next ext header? */
+                if (len < (int)sizeof(struct ipv6_opt_hdr))
+                        return 0;
+              /* No more exthdr -> evaluate */
+                if (nexthdr == NEXTHDR_NONE) {
+                     break;
+              }
+              /* ESP -> evaluate */
+                if (nexthdr == NEXTHDR_ESP) {
+                     break;
+              }
+
+	      hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
+	      BUG_ON(hp == NULL);
+
+              /* Calculate the header length */
+                if (nexthdr == NEXTHDR_FRAGMENT) {
+                        hdrlen = 8;
+                } else if (nexthdr == NEXTHDR_AUTH)
+                        hdrlen = (hp->hdrlen+2)<<2;
+                else
+                        hdrlen = ipv6_optlen(hp);
+
+              /* ROUTING -> evaluate */
+                if (nexthdr == NEXTHDR_ROUTING) {
+                     temp |= MASK_ROUTING;
+                     break;
+              }
+
+
+              /* set the flag */
+              switch (nexthdr){
+                     case NEXTHDR_HOP:
+                     case NEXTHDR_ROUTING:
+                     case NEXTHDR_FRAGMENT:
+                     case NEXTHDR_AUTH:
+                     case NEXTHDR_DEST:
+                            break;
+                     default:
+                            DEBUGP("ipv6_rt match: unknown nextheader %u\n",nexthdr);
+                            return 0;
+                            break;
+              }
+
+                nexthdr = hp->nexthdr;
+                len -= hdrlen;
+                ptr += hdrlen;
+		if ( ptr > skb->len ) {
+			DEBUGP("ipv6_rt: new pointer is too large! \n");
+			break;
+		}
+        }
+
+       /* ROUTING header not found */
+       if ( temp != MASK_ROUTING ) return 0;
+
+       if (len < (int)sizeof(struct ipv6_rt_hdr)){
+	       *hotdrop = 1;
+       		return 0;
+       }
+
+       if (len < hdrlen){
+	       /* Pcket smaller than its length field */
+       		return 0;
+       }
+
+       rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route);
+       BUG_ON(rh == NULL);
+
+       DEBUGP("IPv6 RT LEN %u %u ", hdrlen, rh->hdrlen);
+       DEBUGP("TYPE %04X ", rh->type);
+       DEBUGP("SGS_LEFT %u %02X\n", rh->segments_left, rh->segments_left);
+
+       DEBUGP("IPv6 RT segsleft %02X ",
+       		(segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1],
+                           rh->segments_left,
+                           !!(rtinfo->invflags & IP6T_RT_INV_SGS))));
+       DEBUGP("type %02X %02X %02X ",
+       		rtinfo->rt_type, rh->type, 
+       		(!(rtinfo->flags & IP6T_RT_TYP) ||
+                           ((rtinfo->rt_type == rh->type) ^
+                           !!(rtinfo->invflags & IP6T_RT_INV_TYP))));
+       DEBUGP("len %02X %04X %02X ",
+       		rtinfo->hdrlen, hdrlen,
+       		(!(rtinfo->flags & IP6T_RT_LEN) ||
+                           ((rtinfo->hdrlen == hdrlen) ^
+                           !!(rtinfo->invflags & IP6T_RT_INV_LEN))));
+       DEBUGP("res %02X %02X %02X ", 
+       		(rtinfo->flags & IP6T_RT_RES), ((struct rt0_hdr *)rh)->bitmap,
+       		!((rtinfo->flags & IP6T_RT_RES) && (((struct rt0_hdr *)rh)->bitmap)));
+
+       ret = (rh != NULL)
+       		&&
+       		(segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1],
+                           rh->segments_left,
+                           !!(rtinfo->invflags & IP6T_RT_INV_SGS)))
+		&&
+	      	(!(rtinfo->flags & IP6T_RT_LEN) ||
+                           ((rtinfo->hdrlen == hdrlen) ^
+                           !!(rtinfo->invflags & IP6T_RT_INV_LEN)))
+		&&
+       		(!(rtinfo->flags & IP6T_RT_TYP) ||
+                           ((rtinfo->rt_type == rh->type) ^
+                           !!(rtinfo->invflags & IP6T_RT_INV_TYP)));
+
+	if (ret && (rtinfo->flags & IP6T_RT_RES)) {
+		u_int32_t *bp, _bitmap;
+		bp = skb_header_pointer(skb,
+					ptr + offsetof(struct rt0_hdr, bitmap),
+					sizeof(_bitmap), &_bitmap);
+
+		ret = (*bp == 0);
+	}
+
+	DEBUGP("#%d ",rtinfo->addrnr);
+       if ( !(rtinfo->flags & IP6T_RT_FST) ){
+	       return ret;
+	} else if (rtinfo->flags & IP6T_RT_FST_NSTRICT) {
+		DEBUGP("Not strict ");
+		if ( rtinfo->addrnr > (unsigned int)((hdrlen-8)/16) ){
+			DEBUGP("There isn't enough space\n");
+			return 0;
+		} else {
+			unsigned int i = 0;
+
+			DEBUGP("#%d ",rtinfo->addrnr);
+			for(temp=0; temp<(unsigned int)((hdrlen-8)/16); temp++){
+				ap = skb_header_pointer(skb,
+							ptr
+							+ sizeof(struct rt0_hdr)
+							+ temp * sizeof(_addr),
+							sizeof(_addr),
+							&_addr);
+
+				BUG_ON(ap == NULL);
+
+				if (ipv6_addr_equal(ap, &rtinfo->addrs[i])) {
+					DEBUGP("i=%d temp=%d;\n",i,temp);
+					i++;
+				}
+				if (i==rtinfo->addrnr) break;
+			}
+			DEBUGP("i=%d #%d\n", i, rtinfo->addrnr);
+			if (i == rtinfo->addrnr)
+				return ret;
+			else return 0;
+		}
+	} else {
+		DEBUGP("Strict ");
+		if ( rtinfo->addrnr > (unsigned int)((hdrlen-8)/16) ){
+			DEBUGP("There isn't enough space\n");
+			return 0;
+		} else {
+			DEBUGP("#%d ",rtinfo->addrnr);
+			for(temp=0; temp<rtinfo->addrnr; temp++){
+				ap = skb_header_pointer(skb,
+							ptr
+							+ sizeof(struct rt0_hdr)
+							+ temp * sizeof(_addr),
+							sizeof(_addr),
+							&_addr);
+				BUG_ON(ap == NULL);
+
+				if (!ipv6_addr_equal(ap, &rtinfo->addrs[temp]))
+					break;
+			}
+			DEBUGP("temp=%d #%d\n", temp, rtinfo->addrnr);
+			if ((temp == rtinfo->addrnr) && (temp == (unsigned int)((hdrlen-8)/16)))
+				return ret;
+			else return 0;
+		}
+	}
+
+	return 0;
+}
+
+/* Called when user tries to insert an entry of this type. */
+static int
+checkentry(const char *tablename,
+          const struct ip6t_ip6 *ip,
+          void *matchinfo,
+          unsigned int matchinfosize,
+          unsigned int hook_mask)
+{
+       const struct ip6t_rt *rtinfo = matchinfo;
+
+       if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_rt))) {
+              DEBUGP("ip6t_rt: matchsize %u != %u\n",
+                      matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_rt)));
+              return 0;
+       }
+       if (rtinfo->invflags & ~IP6T_RT_INV_MASK) {
+              DEBUGP("ip6t_rt: unknown flags %X\n",
+                      rtinfo->invflags);
+              return 0;
+       }
+       if ( (rtinfo->flags & (IP6T_RT_RES|IP6T_RT_FST_MASK)) && 
+		       (!(rtinfo->flags & IP6T_RT_TYP) || 
+		       (rtinfo->rt_type != 0) || 
+		       (rtinfo->invflags & IP6T_RT_INV_TYP)) ) {
+	      DEBUGP("`--rt-type 0' required before `--rt-0-*'");
+              return 0;
+       }
+
+       return 1;
+}
+
+static struct ip6t_match rt_match = {
+	.name		= "rt",
+	.match		= &match,
+	.checkentry	= &checkentry,
+	.me		= THIS_MODULE,
+};
+
+static int __init init(void)
+{
+       return ip6t_register_match(&rt_match);
+}
+
+static void __exit cleanup(void)
+{
+       ip6t_unregister_match(&rt_match);
+}
+
+module_init(init);
+module_exit(cleanup);
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
new file mode 100644
index 00000000000..4c0028671c2
--- /dev/null
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -0,0 +1,214 @@
+/*
+ * This is the 1999 rewrite of IP Firewalling, aiming for kernel 2.3.x.
+ *
+ * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
+ * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("ip6tables filter table");
+
+#define FILTER_VALID_HOOKS ((1 << NF_IP6_LOCAL_IN) | (1 << NF_IP6_FORWARD) | (1 << NF_IP6_LOCAL_OUT))
+
+/* Standard entry. */
+struct ip6t_standard
+{
+	struct ip6t_entry entry;
+	struct ip6t_standard_target target;
+};
+
+struct ip6t_error_target
+{
+	struct ip6t_entry_target target;
+	char errorname[IP6T_FUNCTION_MAXNAMELEN];
+};
+
+struct ip6t_error
+{
+	struct ip6t_entry entry;
+	struct ip6t_error_target target;
+};
+
+static struct
+{
+	struct ip6t_replace repl;
+	struct ip6t_standard entries[3];
+	struct ip6t_error term;
+} initial_table __initdata
+= { { "filter", FILTER_VALID_HOOKS, 4,
+      sizeof(struct ip6t_standard) * 3 + sizeof(struct ip6t_error),
+      { [NF_IP6_LOCAL_IN] = 0,
+	[NF_IP6_FORWARD] = sizeof(struct ip6t_standard),
+	[NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2 },
+      { [NF_IP6_LOCAL_IN] = 0,
+	[NF_IP6_FORWARD] = sizeof(struct ip6t_standard),
+	[NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2 },
+      0, NULL, { } },
+    {
+	    /* LOCAL_IN */
+	    { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 },
+		0,
+		sizeof(struct ip6t_entry),
+		sizeof(struct ip6t_standard),
+		0, { 0, 0 }, { } },
+	      { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } },
+		-NF_ACCEPT - 1 } },
+	    /* FORWARD */
+	    { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 },
+		0,
+		sizeof(struct ip6t_entry),
+		sizeof(struct ip6t_standard),
+		0, { 0, 0 }, { } },
+	      { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } },
+		-NF_ACCEPT - 1 } },
+	    /* LOCAL_OUT */
+	    { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 },
+		0,
+		sizeof(struct ip6t_entry),
+		sizeof(struct ip6t_standard),
+		0, { 0, 0 }, { } },
+	      { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } },
+		-NF_ACCEPT - 1 } }
+    },
+    /* ERROR */
+    { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 },
+	0,
+	sizeof(struct ip6t_entry),
+	sizeof(struct ip6t_error),
+	0, { 0, 0 }, { } },
+      { { { { IP6T_ALIGN(sizeof(struct ip6t_error_target)), IP6T_ERROR_TARGET } },
+	  { } },
+	"ERROR"
+      }
+    }
+};
+
+static struct ip6t_table packet_filter = {
+	.name		= "filter",
+	.valid_hooks	= FILTER_VALID_HOOKS,
+	.lock		= RW_LOCK_UNLOCKED,
+	.me		= THIS_MODULE,
+};
+
+/* The work comes in here from netfilter.c. */
+static unsigned int
+ip6t_hook(unsigned int hook,
+	 struct sk_buff **pskb,
+	 const struct net_device *in,
+	 const struct net_device *out,
+	 int (*okfn)(struct sk_buff *))
+{
+	return ip6t_do_table(pskb, hook, in, out, &packet_filter, NULL);
+}
+
+static unsigned int
+ip6t_local_out_hook(unsigned int hook,
+		   struct sk_buff **pskb,
+		   const struct net_device *in,
+		   const struct net_device *out,
+		   int (*okfn)(struct sk_buff *))
+{
+#if 0
+	/* root is playing with raw sockets. */
+	if ((*pskb)->len < sizeof(struct iphdr)
+	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
+		if (net_ratelimit())
+			printk("ip6t_hook: happy cracking.\n");
+		return NF_ACCEPT;
+	}
+#endif
+
+	return ip6t_do_table(pskb, hook, in, out, &packet_filter, NULL);
+}
+
+static struct nf_hook_ops ip6t_ops[] = {
+	{
+		.hook		= ip6t_hook,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET6,
+		.hooknum	= NF_IP6_LOCAL_IN,
+		.priority	= NF_IP6_PRI_FILTER,
+	},
+	{
+		.hook		= ip6t_hook,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET6,
+		.hooknum	= NF_IP6_FORWARD,
+		.priority	= NF_IP6_PRI_FILTER,
+	},
+	{
+		.hook		= ip6t_local_out_hook,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET6,
+		.hooknum	= NF_IP6_LOCAL_OUT,
+		.priority	= NF_IP6_PRI_FILTER,
+	},
+};
+
+/* Default to forward because I got too much mail already. */
+static int forward = NF_ACCEPT;
+module_param(forward, bool, 0000);
+
+static int __init init(void)
+{
+	int ret;
+
+	if (forward < 0 || forward > NF_MAX_VERDICT) {
+		printk("iptables forward must be 0 or 1\n");
+		return -EINVAL;
+	}
+
+	/* Entry 1 is the FORWARD hook */
+	initial_table.entries[1].target.verdict = -forward - 1;
+
+	/* Register table */
+	ret = ip6t_register_table(&packet_filter, &initial_table.repl);
+	if (ret < 0)
+		return ret;
+
+	/* Register hooks */
+	ret = nf_register_hook(&ip6t_ops[0]);
+	if (ret < 0)
+		goto cleanup_table;
+
+	ret = nf_register_hook(&ip6t_ops[1]);
+	if (ret < 0)
+		goto cleanup_hook0;
+
+	ret = nf_register_hook(&ip6t_ops[2]);
+	if (ret < 0)
+		goto cleanup_hook1;
+
+	return ret;
+
+ cleanup_hook1:
+	nf_unregister_hook(&ip6t_ops[1]);
+ cleanup_hook0:
+	nf_unregister_hook(&ip6t_ops[0]);
+ cleanup_table:
+	ip6t_unregister_table(&packet_filter);
+
+	return ret;
+}
+
+static void __exit fini(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < sizeof(ip6t_ops)/sizeof(struct nf_hook_ops); i++)
+		nf_unregister_hook(&ip6t_ops[i]);
+
+	ip6t_unregister_table(&packet_filter);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
new file mode 100644
index 00000000000..85c1e6eada1
--- /dev/null
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -0,0 +1,287 @@
+/*
+ * IPv6 packet mangling table, a port of the IPv4 mangle table to IPv6
+ *
+ * Copyright (C) 2000-2001 by Harald Welte <laforge@gnumonks.org>
+ * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Extended to all five netfilter hooks by Brad Chapman & Harald Welte
+ */
+#include <linux/module.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("ip6tables mangle table");
+
+#define MANGLE_VALID_HOOKS ((1 << NF_IP6_PRE_ROUTING) | \
+			    (1 << NF_IP6_LOCAL_IN) | \
+			    (1 << NF_IP6_FORWARD) | \
+			    (1 << NF_IP6_LOCAL_OUT) | \
+			    (1 << NF_IP6_POST_ROUTING))
+
+#if 0
+#define DEBUGP(x, args...)	printk(KERN_DEBUG x, ## args)
+#else
+#define DEBUGP(x, args...)
+#endif
+
+/* Standard entry. */
+struct ip6t_standard
+{
+	struct ip6t_entry entry;
+	struct ip6t_standard_target target;
+};
+
+struct ip6t_error_target
+{
+	struct ip6t_entry_target target;
+	char errorname[IP6T_FUNCTION_MAXNAMELEN];
+};
+
+struct ip6t_error
+{
+	struct ip6t_entry entry;
+	struct ip6t_error_target target;
+};
+
+static struct
+{
+	struct ip6t_replace repl;
+	struct ip6t_standard entries[5];
+	struct ip6t_error term;
+} initial_table __initdata
+= { { "mangle", MANGLE_VALID_HOOKS, 6,
+      sizeof(struct ip6t_standard) * 5 + sizeof(struct ip6t_error),
+      { [NF_IP6_PRE_ROUTING] 	= 0,
+	[NF_IP6_LOCAL_IN]	= sizeof(struct ip6t_standard),
+	[NF_IP6_FORWARD]	= sizeof(struct ip6t_standard) * 2,
+	[NF_IP6_LOCAL_OUT] 	= sizeof(struct ip6t_standard) * 3,
+	[NF_IP6_POST_ROUTING]	= sizeof(struct ip6t_standard) * 4},
+      { [NF_IP6_PRE_ROUTING] 	= 0,
+	[NF_IP6_LOCAL_IN]	= sizeof(struct ip6t_standard),
+	[NF_IP6_FORWARD]	= sizeof(struct ip6t_standard) * 2,
+	[NF_IP6_LOCAL_OUT] 	= sizeof(struct ip6t_standard) * 3,
+	[NF_IP6_POST_ROUTING]	= sizeof(struct ip6t_standard) * 4},
+      0, NULL, { } },
+    {
+	    /* PRE_ROUTING */
+            { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 },
+		0,
+		sizeof(struct ip6t_entry),
+		sizeof(struct ip6t_standard),
+		0, { 0, 0 }, { } },
+	      { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } },
+		-NF_ACCEPT - 1 } },
+	    /* LOCAL_IN */
+            { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 },
+		0,
+		sizeof(struct ip6t_entry),
+		sizeof(struct ip6t_standard),
+		0, { 0, 0 }, { } },
+	      { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } },
+		-NF_ACCEPT - 1 } },
+	    /* FORWARD */
+            { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 },
+		0,
+		sizeof(struct ip6t_entry),
+		sizeof(struct ip6t_standard),
+		0, { 0, 0 }, { } },
+	      { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } },
+		-NF_ACCEPT - 1 } },
+	    /* LOCAL_OUT */
+            { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 },
+		0,
+		sizeof(struct ip6t_entry),
+		sizeof(struct ip6t_standard),
+		0, { 0, 0 }, { } },
+	      { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } },
+		-NF_ACCEPT - 1 } },
+	    /* POST_ROUTING */
+	    { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 },
+		0,
+		sizeof(struct ip6t_entry),
+		sizeof(struct ip6t_standard),
+		0, { 0, 0 }, { } },
+	      { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } },
+		-NF_ACCEPT - 1 } }
+    },
+    /* ERROR */
+    { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 },
+	0,
+	sizeof(struct ip6t_entry),
+	sizeof(struct ip6t_error),
+	0, { 0, 0 }, { } },
+      { { { { IP6T_ALIGN(sizeof(struct ip6t_error_target)), IP6T_ERROR_TARGET } },
+	  { } },
+	"ERROR"
+      }
+    }
+};
+
+static struct ip6t_table packet_mangler = {
+	.name		= "mangle",
+	.valid_hooks	= MANGLE_VALID_HOOKS,
+	.lock		= RW_LOCK_UNLOCKED,
+	.me		= THIS_MODULE,
+};
+
+/* The work comes in here from netfilter.c. */
+static unsigned int
+ip6t_route_hook(unsigned int hook,
+	 struct sk_buff **pskb,
+	 const struct net_device *in,
+	 const struct net_device *out,
+	 int (*okfn)(struct sk_buff *))
+{
+	return ip6t_do_table(pskb, hook, in, out, &packet_mangler, NULL);
+}
+
+static unsigned int
+ip6t_local_hook(unsigned int hook,
+		   struct sk_buff **pskb,
+		   const struct net_device *in,
+		   const struct net_device *out,
+		   int (*okfn)(struct sk_buff *))
+{
+
+	unsigned long nfmark;
+	unsigned int ret;
+	struct in6_addr saddr, daddr;
+	u_int8_t hop_limit;
+	u_int32_t flowlabel;
+
+#if 0
+	/* root is playing with raw sockets. */
+	if ((*pskb)->len < sizeof(struct iphdr)
+	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
+		if (net_ratelimit())
+			printk("ip6t_hook: happy cracking.\n");
+		return NF_ACCEPT;
+	}
+#endif
+
+	/* save source/dest address, nfmark, hoplimit, flowlabel, priority,  */
+	memcpy(&saddr, &(*pskb)->nh.ipv6h->saddr, sizeof(saddr));
+	memcpy(&daddr, &(*pskb)->nh.ipv6h->daddr, sizeof(daddr));
+	nfmark = (*pskb)->nfmark;
+	hop_limit = (*pskb)->nh.ipv6h->hop_limit;
+
+	/* flowlabel and prio (includes version, which shouldn't change either */
+	flowlabel = *((u_int32_t *) (*pskb)->nh.ipv6h);
+
+	ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler, NULL);
+
+	if (ret != NF_DROP && ret != NF_STOLEN 
+		&& (memcmp(&(*pskb)->nh.ipv6h->saddr, &saddr, sizeof(saddr))
+		    || memcmp(&(*pskb)->nh.ipv6h->daddr, &daddr, sizeof(daddr))
+		    || (*pskb)->nfmark != nfmark
+		    || (*pskb)->nh.ipv6h->hop_limit != hop_limit)) {
+
+		/* something which could affect routing has changed */
+
+		DEBUGP("ip6table_mangle: we'd need to re-route a packet\n");
+	}
+
+	return ret;
+}
+
+static struct nf_hook_ops ip6t_ops[] = {
+	{
+		.hook		= ip6t_route_hook,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET6,
+		.hooknum	= NF_IP6_PRE_ROUTING,
+		.priority	= NF_IP6_PRI_MANGLE,
+	},
+	{
+		.hook		= ip6t_local_hook,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET6,
+		.hooknum	= NF_IP6_LOCAL_IN,
+		.priority	= NF_IP6_PRI_MANGLE,
+	},
+	{
+		.hook		= ip6t_route_hook,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET6,
+		.hooknum	= NF_IP6_FORWARD,
+		.priority	= NF_IP6_PRI_MANGLE,
+	},
+	{
+		.hook		= ip6t_local_hook,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET6,
+		.hooknum	= NF_IP6_LOCAL_OUT,
+		.priority	= NF_IP6_PRI_MANGLE,
+	},
+	{
+		.hook		= ip6t_route_hook,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET6,
+		.hooknum	= NF_IP6_POST_ROUTING,
+		.priority	= NF_IP6_PRI_MANGLE,
+	},
+};
+
+static int __init init(void)
+{
+	int ret;
+
+	/* Register table */
+	ret = ip6t_register_table(&packet_mangler, &initial_table.repl);
+	if (ret < 0)
+		return ret;
+
+	/* Register hooks */
+	ret = nf_register_hook(&ip6t_ops[0]);
+	if (ret < 0)
+		goto cleanup_table;
+
+	ret = nf_register_hook(&ip6t_ops[1]);
+	if (ret < 0)
+		goto cleanup_hook0;
+
+	ret = nf_register_hook(&ip6t_ops[2]);
+	if (ret < 0)
+		goto cleanup_hook1;
+
+	ret = nf_register_hook(&ip6t_ops[3]);
+	if (ret < 0)
+		goto cleanup_hook2;
+
+	ret = nf_register_hook(&ip6t_ops[4]);
+	if (ret < 0)
+		goto cleanup_hook3;
+
+	return ret;
+
+ cleanup_hook3:
+        nf_unregister_hook(&ip6t_ops[3]);
+ cleanup_hook2:
+	nf_unregister_hook(&ip6t_ops[2]);
+ cleanup_hook1:
+	nf_unregister_hook(&ip6t_ops[1]);
+ cleanup_hook0:
+	nf_unregister_hook(&ip6t_ops[0]);
+ cleanup_table:
+	ip6t_unregister_table(&packet_mangler);
+
+	return ret;
+}
+
+static void __exit fini(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < sizeof(ip6t_ops)/sizeof(struct nf_hook_ops); i++)
+		nf_unregister_hook(&ip6t_ops[i]);
+
+	ip6t_unregister_table(&packet_mangler);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
new file mode 100644
index 00000000000..71407beaf79
--- /dev/null
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -0,0 +1,182 @@
+/*
+ * IPv6 raw table, a port of the IPv4 raw table to IPv6
+ *
+ * Copyright (C) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ */
+#include <linux/module.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+#define RAW_VALID_HOOKS ((1 << NF_IP6_PRE_ROUTING) | (1 << NF_IP6_LOCAL_OUT))
+
+#if 0
+#define DEBUGP(x, args...)	printk(KERN_DEBUG x, ## args)
+#else
+#define DEBUGP(x, args...)
+#endif
+
+/* Standard entry. */
+struct ip6t_standard
+{
+	struct ip6t_entry entry;
+	struct ip6t_standard_target target;
+};
+
+struct ip6t_error_target
+{
+	struct ip6t_entry_target target;
+	char errorname[IP6T_FUNCTION_MAXNAMELEN];
+};
+
+struct ip6t_error
+{
+	struct ip6t_entry entry;
+	struct ip6t_error_target target;
+};
+
+static struct
+{
+	struct ip6t_replace repl;
+	struct ip6t_standard entries[2];
+	struct ip6t_error term;
+} initial_table __initdata = {
+	.repl = {
+		.name = "raw",
+		.valid_hooks = RAW_VALID_HOOKS,
+		.num_entries = 3,
+		.size = sizeof(struct ip6t_standard) * 2 + sizeof(struct ip6t_error),
+		.hook_entry = {
+			[NF_IP6_PRE_ROUTING] = 0,
+			[NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard)
+		},
+		.underflow = {
+			[NF_IP6_PRE_ROUTING] = 0,
+			[NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard)
+		},
+	},
+	.entries = {
+		/* PRE_ROUTING */
+		{
+			.entry = {
+				.target_offset = sizeof(struct ip6t_entry),
+				.next_offset = sizeof(struct ip6t_standard),
+			},
+			.target = {
+				.target = {
+					.u = {
+						.target_size = IP6T_ALIGN(sizeof(struct ip6t_standard_target)),
+					},
+				},
+				.verdict = -NF_ACCEPT - 1,
+			},
+		},
+
+		/* LOCAL_OUT */
+		{
+			.entry = {
+				.target_offset = sizeof(struct ip6t_entry),
+				.next_offset = sizeof(struct ip6t_standard),
+			},
+			.target = {
+				.target = {
+					.u = {
+						.target_size = IP6T_ALIGN(sizeof(struct ip6t_standard_target)),
+					},
+				},
+				.verdict = -NF_ACCEPT - 1,
+			},
+		},
+	},
+	/* ERROR */
+	.term = {
+		.entry = {
+			.target_offset = sizeof(struct ip6t_entry),
+			.next_offset = sizeof(struct ip6t_error),
+		},
+		.target = {
+			.target = {
+				.u = {
+					.user = {
+						.target_size = IP6T_ALIGN(sizeof(struct ip6t_error_target)),
+						.name = IP6T_ERROR_TARGET,
+					},
+				},
+			},
+			.errorname = "ERROR",
+		},
+	}
+};
+
+static struct ip6t_table packet_raw = { 
+	.name = "raw", 
+	.valid_hooks = RAW_VALID_HOOKS, 
+	.lock = RW_LOCK_UNLOCKED, 
+	.me = THIS_MODULE
+};
+
+/* The work comes in here from netfilter.c. */
+static unsigned int
+ip6t_hook(unsigned int hook,
+	 struct sk_buff **pskb,
+	 const struct net_device *in,
+	 const struct net_device *out,
+	 int (*okfn)(struct sk_buff *))
+{
+	return ip6t_do_table(pskb, hook, in, out, &packet_raw, NULL);
+}
+
+static struct nf_hook_ops ip6t_ops[] = { 
+	{
+	  .hook = ip6t_hook, 
+	  .pf = PF_INET6,
+	  .hooknum = NF_IP6_PRE_ROUTING,
+	  .priority = NF_IP6_PRI_FIRST
+	},
+	{
+	  .hook = ip6t_hook, 
+	  .pf = PF_INET6, 
+	  .hooknum = NF_IP6_LOCAL_OUT,
+	  .priority = NF_IP6_PRI_FIRST
+	},
+};
+
+static int __init init(void)
+{
+	int ret;
+
+	/* Register table */
+	ret = ip6t_register_table(&packet_raw, &initial_table.repl);
+	if (ret < 0)
+		return ret;
+
+	/* Register hooks */
+	ret = nf_register_hook(&ip6t_ops[0]);
+	if (ret < 0)
+		goto cleanup_table;
+
+	ret = nf_register_hook(&ip6t_ops[1]);
+	if (ret < 0)
+		goto cleanup_hook0;
+
+	return ret;
+
+ cleanup_hook0:
+	nf_unregister_hook(&ip6t_ops[0]);
+ cleanup_table:
+	ip6t_unregister_table(&packet_raw);
+
+	return ret;
+}
+
+static void __exit fini(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < sizeof(ip6t_ops)/sizeof(struct nf_hook_ops); i++)
+		nf_unregister_hook(&ip6t_ops[i]);
+
+	ip6t_unregister_table(&packet_raw);
+}
+
+module_init(init);
+module_exit(fini);
+MODULE_LICENSE("GPL");
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
new file mode 100644
index 00000000000..334a5967831
--- /dev/null
+++ b/net/ipv6/proc.c
@@ -0,0 +1,303 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		This file implements the various access functions for the
+ *		PROC file system.  This is very similar to the IPv4 version,
+ *		except it reports the sockets in the INET6 address family.
+ *
+ * Version:	$Id: proc.c,v 1.17 2002/02/01 22:01:04 davem Exp $
+ *
+ * Authors:	David S. Miller (davem@caip.rutgers.edu)
+ * 		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/socket.h>
+#include <linux/net.h>
+#include <linux/ipv6.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/stddef.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <net/transp_v6.h>
+#include <net/ipv6.h>
+
+#ifdef CONFIG_PROC_FS
+static struct proc_dir_entry *proc_net_devsnmp6;
+
+static int fold_prot_inuse(struct proto *proto)
+{
+	int res = 0;
+	int cpu;
+
+	for (cpu=0; cpu<NR_CPUS; cpu++)
+		res += proto->stats[cpu].inuse;
+
+	return res;
+}
+
+static int sockstat6_seq_show(struct seq_file *seq, void *v)
+{
+	seq_printf(seq, "TCP6: inuse %d\n",
+		       fold_prot_inuse(&tcpv6_prot));
+	seq_printf(seq, "UDP6: inuse %d\n",
+		       fold_prot_inuse(&udpv6_prot));
+	seq_printf(seq, "RAW6: inuse %d\n",
+		       fold_prot_inuse(&rawv6_prot));
+	seq_printf(seq, "FRAG6: inuse %d memory %d\n",
+		       ip6_frag_nqueues, atomic_read(&ip6_frag_mem));
+	return 0;
+}
+
+static struct snmp_mib snmp6_ipstats_list[] = {
+/* ipv6 mib according to RFC 2465 */
+	SNMP_MIB_ITEM("Ip6InReceives", IPSTATS_MIB_INRECEIVES),
+	SNMP_MIB_ITEM("Ip6InHdrErrors", IPSTATS_MIB_INHDRERRORS),
+	SNMP_MIB_ITEM("Ip6InTooBigErrors", IPSTATS_MIB_INTOOBIGERRORS),
+	SNMP_MIB_ITEM("Ip6InNoRoutes", IPSTATS_MIB_INNOROUTES),
+	SNMP_MIB_ITEM("Ip6InAddrErrors", IPSTATS_MIB_INADDRERRORS),
+	SNMP_MIB_ITEM("Ip6InUnknownProtos", IPSTATS_MIB_INUNKNOWNPROTOS),
+	SNMP_MIB_ITEM("Ip6InTruncatedPkts", IPSTATS_MIB_INTRUNCATEDPKTS),
+	SNMP_MIB_ITEM("Ip6InDiscards", IPSTATS_MIB_INDISCARDS),
+	SNMP_MIB_ITEM("Ip6InDelivers", IPSTATS_MIB_INDELIVERS),
+	SNMP_MIB_ITEM("Ip6OutForwDatagrams", IPSTATS_MIB_OUTFORWDATAGRAMS),
+	SNMP_MIB_ITEM("Ip6OutRequests", IPSTATS_MIB_OUTREQUESTS),
+	SNMP_MIB_ITEM("Ip6OutDiscards", IPSTATS_MIB_OUTDISCARDS),
+	SNMP_MIB_ITEM("Ip6OutNoRoutes", IPSTATS_MIB_OUTNOROUTES),
+	SNMP_MIB_ITEM("Ip6ReasmTimeout", IPSTATS_MIB_REASMTIMEOUT),
+	SNMP_MIB_ITEM("Ip6ReasmReqds", IPSTATS_MIB_REASMREQDS),
+	SNMP_MIB_ITEM("Ip6ReasmOKs", IPSTATS_MIB_REASMOKS),
+	SNMP_MIB_ITEM("Ip6ReasmFails", IPSTATS_MIB_REASMFAILS),
+	SNMP_MIB_ITEM("Ip6FragOKs", IPSTATS_MIB_FRAGOKS),
+	SNMP_MIB_ITEM("Ip6FragFails", IPSTATS_MIB_FRAGFAILS),
+	SNMP_MIB_ITEM("Ip6FragCreates", IPSTATS_MIB_FRAGCREATES),
+	SNMP_MIB_ITEM("Ip6InMcastPkts", IPSTATS_MIB_INMCASTPKTS),
+	SNMP_MIB_ITEM("Ip6OutMcastPkts", IPSTATS_MIB_OUTMCASTPKTS),
+	SNMP_MIB_SENTINEL
+};
+
+static struct snmp_mib snmp6_icmp6_list[] = {
+/* icmpv6 mib according to RFC 2466
+
+   Exceptions:  {In|Out}AdminProhibs are removed, because I see
+                no good reasons to account them separately
+		of another dest.unreachs.
+		OutErrs is zero identically.
+		OutEchos too.
+		OutRouterAdvertisements too.
+		OutGroupMembQueries too.
+ */
+	SNMP_MIB_ITEM("Icmp6InMsgs", ICMP6_MIB_INMSGS),
+	SNMP_MIB_ITEM("Icmp6InErrors", ICMP6_MIB_INERRORS),
+	SNMP_MIB_ITEM("Icmp6InDestUnreachs", ICMP6_MIB_INDESTUNREACHS),
+	SNMP_MIB_ITEM("Icmp6InPktTooBigs", ICMP6_MIB_INPKTTOOBIGS),
+	SNMP_MIB_ITEM("Icmp6InTimeExcds", ICMP6_MIB_INTIMEEXCDS),
+	SNMP_MIB_ITEM("Icmp6InParmProblems", ICMP6_MIB_INPARMPROBLEMS),
+	SNMP_MIB_ITEM("Icmp6InEchos", ICMP6_MIB_INECHOS),
+	SNMP_MIB_ITEM("Icmp6InEchoReplies", ICMP6_MIB_INECHOREPLIES),
+	SNMP_MIB_ITEM("Icmp6InGroupMembQueries", ICMP6_MIB_INGROUPMEMBQUERIES),
+	SNMP_MIB_ITEM("Icmp6InGroupMembResponses", ICMP6_MIB_INGROUPMEMBRESPONSES),
+	SNMP_MIB_ITEM("Icmp6InGroupMembReductions", ICMP6_MIB_INGROUPMEMBREDUCTIONS),
+	SNMP_MIB_ITEM("Icmp6InRouterSolicits", ICMP6_MIB_INROUTERSOLICITS),
+	SNMP_MIB_ITEM("Icmp6InRouterAdvertisements", ICMP6_MIB_INROUTERADVERTISEMENTS),
+	SNMP_MIB_ITEM("Icmp6InNeighborSolicits", ICMP6_MIB_INNEIGHBORSOLICITS),
+	SNMP_MIB_ITEM("Icmp6InNeighborAdvertisements", ICMP6_MIB_INNEIGHBORADVERTISEMENTS),
+	SNMP_MIB_ITEM("Icmp6InRedirects", ICMP6_MIB_INREDIRECTS),
+	SNMP_MIB_ITEM("Icmp6OutMsgs", ICMP6_MIB_OUTMSGS),
+	SNMP_MIB_ITEM("Icmp6OutDestUnreachs", ICMP6_MIB_OUTDESTUNREACHS),
+	SNMP_MIB_ITEM("Icmp6OutPktTooBigs", ICMP6_MIB_OUTPKTTOOBIGS),
+	SNMP_MIB_ITEM("Icmp6OutTimeExcds", ICMP6_MIB_OUTTIMEEXCDS),
+	SNMP_MIB_ITEM("Icmp6OutParmProblems", ICMP6_MIB_OUTPARMPROBLEMS),
+	SNMP_MIB_ITEM("Icmp6OutEchoReplies", ICMP6_MIB_OUTECHOREPLIES),
+	SNMP_MIB_ITEM("Icmp6OutRouterSolicits", ICMP6_MIB_OUTROUTERSOLICITS),
+	SNMP_MIB_ITEM("Icmp6OutNeighborSolicits", ICMP6_MIB_OUTNEIGHBORSOLICITS),
+	SNMP_MIB_ITEM("Icmp6OutNeighborAdvertisements", ICMP6_MIB_OUTNEIGHBORADVERTISEMENTS),
+	SNMP_MIB_ITEM("Icmp6OutRedirects", ICMP6_MIB_OUTREDIRECTS),
+	SNMP_MIB_ITEM("Icmp6OutGroupMembResponses", ICMP6_MIB_OUTGROUPMEMBRESPONSES),
+	SNMP_MIB_ITEM("Icmp6OutGroupMembReductions", ICMP6_MIB_OUTGROUPMEMBREDUCTIONS),
+	SNMP_MIB_SENTINEL
+};
+
+static struct snmp_mib snmp6_udp6_list[] = {
+	SNMP_MIB_ITEM("Udp6InDatagrams", UDP_MIB_INDATAGRAMS),
+	SNMP_MIB_ITEM("Udp6NoPorts", UDP_MIB_NOPORTS),
+	SNMP_MIB_ITEM("Udp6InErrors", UDP_MIB_INERRORS),
+	SNMP_MIB_ITEM("Udp6OutDatagrams", UDP_MIB_OUTDATAGRAMS),
+	SNMP_MIB_SENTINEL
+};
+
+static unsigned long
+fold_field(void *mib[], int offt)
+{
+        unsigned long res = 0;
+        int i;
+ 
+        for (i = 0; i < NR_CPUS; i++) {
+                if (!cpu_possible(i))
+                        continue;
+                res += *(((unsigned long *)per_cpu_ptr(mib[0], i)) + offt);
+                res += *(((unsigned long *)per_cpu_ptr(mib[1], i)) + offt);
+        }
+        return res;
+}
+
+static inline void
+snmp6_seq_show_item(struct seq_file *seq, void **mib, struct snmp_mib *itemlist)
+{
+	int i;
+	for (i=0; itemlist[i].name; i++)
+		seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, 
+				fold_field(mib, itemlist[i].entry));
+}
+
+static int snmp6_seq_show(struct seq_file *seq, void *v)
+{
+	struct inet6_dev *idev = (struct inet6_dev *)seq->private;
+
+	if (idev) {
+		seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex);
+		snmp6_seq_show_item(seq, (void **)idev->stats.icmpv6, snmp6_icmp6_list);
+	} else {
+		snmp6_seq_show_item(seq, (void **)ipv6_statistics, snmp6_ipstats_list);
+		snmp6_seq_show_item(seq, (void **)icmpv6_statistics, snmp6_icmp6_list);
+		snmp6_seq_show_item(seq, (void **)udp_stats_in6, snmp6_udp6_list);
+	}
+	return 0;
+}
+
+static int sockstat6_seq_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, sockstat6_seq_show, NULL);
+}
+
+static struct file_operations sockstat6_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open	 = sockstat6_seq_open,
+	.read	 = seq_read,
+	.llseek	 = seq_lseek,
+	.release = single_release,
+};
+
+static int snmp6_seq_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, snmp6_seq_show, PDE(inode)->data);
+}
+
+static struct file_operations snmp6_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open	 = snmp6_seq_open,
+	.read	 = seq_read,
+	.llseek	 = seq_lseek,
+	.release = single_release,
+};
+
+int snmp6_register_dev(struct inet6_dev *idev)
+{
+	struct proc_dir_entry *p;
+
+	if (!idev || !idev->dev)
+		return -EINVAL;
+
+	if (!proc_net_devsnmp6)
+		return -ENOENT;
+
+	p = create_proc_entry(idev->dev->name, S_IRUGO, proc_net_devsnmp6);
+	if (!p)
+		return -ENOMEM;
+
+	p->data = idev;
+	p->proc_fops = &snmp6_seq_fops;
+
+	idev->stats.proc_dir_entry = p;
+	return 0;
+}
+
+int snmp6_unregister_dev(struct inet6_dev *idev)
+{
+	if (!proc_net_devsnmp6)
+		return -ENOENT;
+	if (!idev || !idev->stats.proc_dir_entry)
+		return -EINVAL;
+	remove_proc_entry(idev->stats.proc_dir_entry->name,
+			  proc_net_devsnmp6);
+	return 0;
+}
+
+int __init ipv6_misc_proc_init(void)
+{
+	int rc = 0;
+
+	if (!proc_net_fops_create("snmp6", S_IRUGO, &snmp6_seq_fops))
+		goto proc_snmp6_fail;
+
+	proc_net_devsnmp6 = proc_mkdir("dev_snmp6", proc_net);
+	if (!proc_net_devsnmp6)
+		goto proc_dev_snmp6_fail;
+
+	if (!proc_net_fops_create("sockstat6", S_IRUGO, &sockstat6_seq_fops))
+		goto proc_sockstat6_fail;
+out:
+	return rc;
+
+proc_sockstat6_fail:
+	proc_net_remove("dev_snmp6");
+proc_dev_snmp6_fail:
+	proc_net_remove("snmp6");
+proc_snmp6_fail:
+	rc = -ENOMEM;
+	goto out;
+}
+
+void ipv6_misc_proc_exit(void)
+{
+	proc_net_remove("sockstat6");
+	proc_net_remove("dev_snmp6");
+	proc_net_remove("snmp6");
+}
+
+#else	/* CONFIG_PROC_FS */
+
+
+int snmp6_register_dev(struct inet6_dev *idev)
+{
+	return 0;
+}
+
+int snmp6_unregister_dev(struct inet6_dev *idev)
+{
+	return 0;
+}
+#endif	/* CONFIG_PROC_FS */
+
+int snmp6_alloc_dev(struct inet6_dev *idev)
+{
+	int err = -ENOMEM;
+
+	if (!idev || !idev->dev)
+		return -EINVAL;
+
+	if (snmp6_mib_init((void **)idev->stats.icmpv6, sizeof(struct icmpv6_mib),
+			   __alignof__(struct icmpv6_mib)) < 0)
+		goto err_icmp;
+
+	return 0;
+
+err_icmp:
+	return err;
+}
+
+int snmp6_free_dev(struct inet6_dev *idev)
+{
+	snmp6_mib_free((void **)idev->stats.icmpv6);
+	return 0;
+}
+
+
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
new file mode 100644
index 00000000000..52c1d58b6ca
--- /dev/null
+++ b/net/ipv6/protocol.c
@@ -0,0 +1,86 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		PF_INET6 protocol dispatch tables.
+ *
+ * Version:	$Id: protocol.c,v 1.10 2001/05/18 02:25:49 davem Exp $
+ *
+ * Authors:	Pedro Roque	<roque@di.fc.ul.pt>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+/*
+ *      Changes:
+ *
+ *      Vince Laviano (vince@cs.stanford.edu)       16 May 2001
+ *      - Removed unused variable 'inet6_protocol_base'
+ *      - Modified inet6_del_protocol() to correctly maintain copy bit.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/sched.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+
+struct inet6_protocol *inet6_protos[MAX_INET_PROTOS];
+static DEFINE_SPINLOCK(inet6_proto_lock);
+
+
+int inet6_add_protocol(struct inet6_protocol *prot, unsigned char protocol)
+{
+	int ret, hash = protocol & (MAX_INET_PROTOS - 1);
+
+	spin_lock_bh(&inet6_proto_lock);
+
+	if (inet6_protos[hash]) {
+		ret = -1;
+	} else {
+		inet6_protos[hash] = prot;
+		ret = 0;
+	}
+
+	spin_unlock_bh(&inet6_proto_lock);
+
+	return ret;
+}
+
+/*
+ *	Remove a protocol from the hash tables.
+ */
+ 
+int inet6_del_protocol(struct inet6_protocol *prot, unsigned char protocol)
+{
+	int ret, hash = protocol & (MAX_INET_PROTOS - 1);
+
+	spin_lock_bh(&inet6_proto_lock);
+
+	if (inet6_protos[hash] != prot) {
+		ret = -1;
+	} else {
+		inet6_protos[hash] = NULL;
+		ret = 0;
+	}
+
+	spin_unlock_bh(&inet6_proto_lock);
+
+	synchronize_net();
+
+	return ret;
+}
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
new file mode 100644
index 00000000000..5488ad0de4f
--- /dev/null
+++ b/net/ipv6/raw.c
@@ -0,0 +1,1157 @@
+/*
+ *	RAW sockets for IPv6
+ *	Linux INET6 implementation 
+ *
+ *	Authors:
+ *	Pedro Roque		<roque@di.fc.ul.pt>	
+ *
+ *	Adapted from linux/net/ipv4/raw.c
+ *
+ *	$Id: raw.c,v 1.51 2002/02/01 22:01:04 davem Exp $
+ *
+ *	Fixes:
+ *	Hideaki YOSHIFUJI	:	sin6_scope_id support
+ *	YOSHIFUJI,H.@USAGI	:	raw checksum (RFC2292(bis) compliance) 
+ *	Kazunori MIYAZAWA @USAGI:	change process style to use ip6_append_data
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/sched.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/icmpv6.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <asm/uaccess.h>
+#include <asm/ioctls.h>
+
+#include <net/ip.h>
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+#include <net/protocol.h>
+#include <net/ip6_route.h>
+#include <net/ip6_checksum.h>
+#include <net/addrconf.h>
+#include <net/transp_v6.h>
+#include <net/udp.h>
+#include <net/inet_common.h>
+
+#include <net/rawv6.h>
+#include <net/xfrm.h>
+
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE];
+DEFINE_RWLOCK(raw_v6_lock);
+
+static void raw_v6_hash(struct sock *sk)
+{
+	struct hlist_head *list = &raw_v6_htable[inet_sk(sk)->num &
+						 (RAWV6_HTABLE_SIZE - 1)];
+
+	write_lock_bh(&raw_v6_lock);
+	sk_add_node(sk, list);
+	sock_prot_inc_use(sk->sk_prot);
+ 	write_unlock_bh(&raw_v6_lock);
+}
+
+static void raw_v6_unhash(struct sock *sk)
+{
+ 	write_lock_bh(&raw_v6_lock);
+	if (sk_del_node_init(sk))
+		sock_prot_dec_use(sk->sk_prot);
+	write_unlock_bh(&raw_v6_lock);
+}
+
+
+/* Grumble... icmp and ip_input want to get at this... */
+struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num,
+			     struct in6_addr *loc_addr, struct in6_addr *rmt_addr)
+{
+	struct hlist_node *node;
+	int is_multicast = ipv6_addr_is_multicast(loc_addr);
+
+	sk_for_each_from(sk, node)
+		if (inet_sk(sk)->num == num) {
+			struct ipv6_pinfo *np = inet6_sk(sk);
+
+			if (!ipv6_addr_any(&np->daddr) &&
+			    !ipv6_addr_equal(&np->daddr, rmt_addr))
+				continue;
+
+			if (!ipv6_addr_any(&np->rcv_saddr)) {
+				if (ipv6_addr_equal(&np->rcv_saddr, loc_addr))
+					goto found;
+				if (is_multicast &&
+				    inet6_mc_check(sk, loc_addr, rmt_addr))
+					goto found;
+				continue;
+			}
+			goto found;
+		}
+	sk = NULL;
+found:
+	return sk;
+}
+
+/*
+ *	0 - deliver
+ *	1 - block
+ */
+static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb)
+{
+	struct icmp6hdr *icmph;
+	struct raw6_sock *rp = raw6_sk(sk);
+
+	if (pskb_may_pull(skb, sizeof(struct icmp6hdr))) {
+		__u32 *data = &rp->filter.data[0];
+		int bit_nr;
+
+		icmph = (struct icmp6hdr *) skb->data;
+		bit_nr = icmph->icmp6_type;
+
+		return (data[bit_nr >> 5] & (1 << (bit_nr & 31))) != 0;
+	}
+	return 0;
+}
+
+/*
+ *	demultiplex raw sockets.
+ *	(should consider queueing the skb in the sock receive_queue
+ *	without calling rawv6.c)
+ *
+ *	Caller owns SKB so we must make clones.
+ */
+void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
+{
+	struct in6_addr *saddr;
+	struct in6_addr *daddr;
+	struct sock *sk;
+	__u8 hash;
+
+	saddr = &skb->nh.ipv6h->saddr;
+	daddr = saddr + 1;
+
+	hash = nexthdr & (MAX_INET_PROTOS - 1);
+
+	read_lock(&raw_v6_lock);
+	sk = sk_head(&raw_v6_htable[hash]);
+
+	/*
+	 *	The first socket found will be delivered after
+	 *	delivery to transport protocols.
+	 */
+
+	if (sk == NULL)
+		goto out;
+
+	sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr);
+
+	while (sk) {
+		if (nexthdr != IPPROTO_ICMPV6 || !icmpv6_filter(sk, skb)) {
+			struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
+
+			/* Not releasing hash table! */
+			if (clone)
+				rawv6_rcv(sk, clone);
+		}
+		sk = __raw_v6_lookup(sk_next(sk), nexthdr, daddr, saddr);
+	}
+out:
+	read_unlock(&raw_v6_lock);
+}
+
+/* This cleans up af_inet6 a bit. -DaveM */
+static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr;
+	__u32 v4addr = 0;
+	int addr_type;
+	int err;
+
+	if (addr_len < SIN6_LEN_RFC2133)
+		return -EINVAL;
+	addr_type = ipv6_addr_type(&addr->sin6_addr);
+
+	/* Raw sockets are IPv6 only */
+	if (addr_type == IPV6_ADDR_MAPPED)
+		return(-EADDRNOTAVAIL);
+
+	lock_sock(sk);
+
+	err = -EINVAL;
+	if (sk->sk_state != TCP_CLOSE)
+		goto out;
+
+	/* Check if the address belongs to the host. */
+	if (addr_type != IPV6_ADDR_ANY) {
+		struct net_device *dev = NULL;
+
+		if (addr_type & IPV6_ADDR_LINKLOCAL) {
+			if (addr_len >= sizeof(struct sockaddr_in6) &&
+			    addr->sin6_scope_id) {
+				/* Override any existing binding, if another
+				 * one is supplied by user.
+				 */
+				sk->sk_bound_dev_if = addr->sin6_scope_id;
+			}
+			
+			/* Binding to link-local address requires an interface */
+			if (!sk->sk_bound_dev_if)
+				goto out;
+
+			dev = dev_get_by_index(sk->sk_bound_dev_if);
+			if (!dev) {
+				err = -ENODEV;
+				goto out;
+			}
+		}
+		
+		/* ipv4 addr of the socket is invalid.  Only the
+		 * unspecified and mapped address have a v4 equivalent.
+		 */
+		v4addr = LOOPBACK4_IPV6;
+		if (!(addr_type & IPV6_ADDR_MULTICAST))	{
+			err = -EADDRNOTAVAIL;
+			if (!ipv6_chk_addr(&addr->sin6_addr, dev, 0)) {
+				if (dev)
+					dev_put(dev);
+				goto out;
+			}
+		}
+		if (dev)
+			dev_put(dev);
+	}
+
+	inet->rcv_saddr = inet->saddr = v4addr;
+	ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr);
+	if (!(addr_type & IPV6_ADDR_MULTICAST))
+		ipv6_addr_copy(&np->saddr, &addr->sin6_addr);
+	err = 0;
+out:
+	release_sock(sk);
+	return err;
+}
+
+void rawv6_err(struct sock *sk, struct sk_buff *skb,
+	       struct inet6_skb_parm *opt,
+	       int type, int code, int offset, u32 info)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	int err;
+	int harderr;
+
+	/* Report error on raw socket, if:
+	   1. User requested recverr.
+	   2. Socket is connected (otherwise the error indication
+	      is useless without recverr and error is hard.
+	 */
+	if (!np->recverr && sk->sk_state != TCP_ESTABLISHED)
+		return;
+
+	harderr = icmpv6_err_convert(type, code, &err);
+	if (type == ICMPV6_PKT_TOOBIG)
+		harderr = (np->pmtudisc == IPV6_PMTUDISC_DO);
+
+	if (np->recverr) {
+		u8 *payload = skb->data;
+		if (!inet->hdrincl)
+			payload += offset;
+		ipv6_icmp_error(sk, skb, err, 0, ntohl(info), payload);
+	}
+
+	if (np->recverr || harderr) {
+		sk->sk_err = err;
+		sk->sk_error_report(sk);
+	}
+}
+
+static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
+{
+	if ((raw6_sk(sk)->checksum || sk->sk_filter) && 
+	    skb->ip_summed != CHECKSUM_UNNECESSARY) {
+		if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) {
+			/* FIXME: increment a raw6 drops counter here */
+			kfree_skb(skb);
+			return 0;
+		}
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+
+	/* Charge it to the socket. */
+	if (sock_queue_rcv_skb(sk,skb)<0) {
+		/* FIXME: increment a raw6 drops counter here */
+		kfree_skb(skb);
+		return 0;
+	}
+
+	return 0;
+}
+
+/*
+ *	This is next to useless... 
+ *	if we demultiplex in network layer we don't need the extra call
+ *	just to queue the skb... 
+ *	maybe we could have the network decide upon a hint if it 
+ *	should call raw_rcv for demultiplexing
+ */
+int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct raw6_sock *rp = raw6_sk(sk);
+
+        if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
+                kfree_skb(skb);
+                return NET_RX_DROP;
+        }
+
+	if (!rp->checksum)
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	if (skb->ip_summed != CHECKSUM_UNNECESSARY) {
+		if (skb->ip_summed == CHECKSUM_HW) {
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+			if (csum_ipv6_magic(&skb->nh.ipv6h->saddr,
+					    &skb->nh.ipv6h->daddr,
+					    skb->len, inet->num, skb->csum)) {
+				LIMIT_NETDEBUG(
+			        printk(KERN_DEBUG "raw v6 hw csum failure.\n"));
+				skb->ip_summed = CHECKSUM_NONE;
+			}
+		}
+		if (skb->ip_summed == CHECKSUM_NONE)
+			skb->csum = ~csum_ipv6_magic(&skb->nh.ipv6h->saddr,
+						     &skb->nh.ipv6h->daddr,
+						     skb->len, inet->num, 0);
+	}
+
+	if (inet->hdrincl) {
+		if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
+		    (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) {
+			/* FIXME: increment a raw6 drops counter here */
+			kfree_skb(skb);
+			return 0;
+		}
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+
+	rawv6_rcv_skb(sk, skb);
+	return 0;
+}
+
+
+/*
+ *	This should be easy, if there is something there
+ *	we return it, otherwise we block.
+ */
+
+static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
+		  struct msghdr *msg, size_t len,
+		  int noblock, int flags, int *addr_len)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)msg->msg_name;
+	struct sk_buff *skb;
+	size_t copied;
+	int err;
+
+	if (flags & MSG_OOB)
+		return -EOPNOTSUPP;
+		
+	if (addr_len) 
+		*addr_len=sizeof(*sin6);
+
+	if (flags & MSG_ERRQUEUE)
+		return ipv6_recv_error(sk, msg, len);
+
+	skb = skb_recv_datagram(sk, flags, noblock, &err);
+	if (!skb)
+		goto out;
+
+	copied = skb->len;
+  	if (copied > len) {
+  		copied = len;
+  		msg->msg_flags |= MSG_TRUNC;
+  	}
+
+	if (skb->ip_summed==CHECKSUM_UNNECESSARY) {
+		err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	} else if (msg->msg_flags&MSG_TRUNC) {
+		if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)))
+			goto csum_copy_err;
+		err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	} else {
+		err = skb_copy_and_csum_datagram_iovec(skb, 0, msg->msg_iov);
+		if (err == -EINVAL)
+			goto csum_copy_err;
+	}
+	if (err)
+		goto out_free;
+
+	/* Copy the address. */
+	if (sin6) {
+		sin6->sin6_family = AF_INET6;
+		ipv6_addr_copy(&sin6->sin6_addr, &skb->nh.ipv6h->saddr);
+		sin6->sin6_flowinfo = 0;
+		sin6->sin6_scope_id = 0;
+		if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
+			sin6->sin6_scope_id = IP6CB(skb)->iif;
+	}
+
+	sock_recv_timestamp(msg, sk, skb);
+
+	if (np->rxopt.all)
+		datagram_recv_ctl(sk, msg, skb);
+
+	err = copied;
+	if (flags & MSG_TRUNC)
+		err = skb->len;
+
+out_free:
+	skb_free_datagram(sk, skb);
+out:
+	return err;
+
+csum_copy_err:
+	/* Clear queue. */
+	if (flags&MSG_PEEK) {
+		int clear = 0;
+		spin_lock_irq(&sk->sk_receive_queue.lock);
+		if (skb == skb_peek(&sk->sk_receive_queue)) {
+			__skb_unlink(skb, &sk->sk_receive_queue);
+			clear = 1;
+		}
+		spin_unlock_irq(&sk->sk_receive_queue.lock);
+		if (clear)
+			kfree_skb(skb);
+	}
+
+	/* Error for blocking case is chosen to masquerade
+	   as some normal condition.
+	 */
+	err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH;
+	/* FIXME: increment a raw6 drops counter here */
+	goto out_free;
+}
+
+static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
+				     struct raw6_sock *rp, int len)
+{
+	struct sk_buff *skb;
+	int err = 0;
+	u16 *csum;
+	u32 tmp_csum;
+
+	if (!rp->checksum)
+		goto send;
+
+	if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
+		goto out;
+
+	if (rp->offset + 1 < len)
+		csum = (u16 *)(skb->h.raw + rp->offset);
+	else {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* should be check HW csum miyazawa */
+	if (skb_queue_len(&sk->sk_write_queue) == 1) {
+		/*
+		 * Only one fragment on the socket.
+		 */
+		tmp_csum = skb->csum;
+	} else {
+		tmp_csum = 0;
+
+		skb_queue_walk(&sk->sk_write_queue, skb) {
+			tmp_csum = csum_add(tmp_csum, skb->csum);
+		}
+	}
+
+	/* in case cksum was not initialized */
+	if (unlikely(*csum))
+		tmp_csum = csum_sub(tmp_csum, *csum);
+
+	*csum = csum_ipv6_magic(&fl->fl6_src,
+				&fl->fl6_dst,
+				len, fl->proto, tmp_csum);
+
+	if (*csum == 0)
+		*csum = -1;
+send:
+	err = ip6_push_pending_frames(sk);
+out:
+	return err;
+}
+
+static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
+			struct flowi *fl, struct rt6_info *rt, 
+			unsigned int flags)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct ipv6hdr *iph;
+	struct sk_buff *skb;
+	unsigned int hh_len;
+	int err;
+
+	if (length > rt->u.dst.dev->mtu) {
+		ipv6_local_error(sk, EMSGSIZE, fl, rt->u.dst.dev->mtu);
+		return -EMSGSIZE;
+	}
+	if (flags&MSG_PROBE)
+		goto out;
+
+	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
+
+	skb = sock_alloc_send_skb(sk, length+hh_len+15,
+				  flags&MSG_DONTWAIT, &err);
+	if (skb == NULL)
+		goto error; 
+	skb_reserve(skb, hh_len);
+
+	skb->priority = sk->sk_priority;
+	skb->dst = dst_clone(&rt->u.dst);
+
+	skb->nh.ipv6h = iph = (struct ipv6hdr *)skb_put(skb, length);
+
+	skb->ip_summed = CHECKSUM_NONE;
+
+	skb->h.raw = skb->nh.raw;
+	err = memcpy_fromiovecend((void *)iph, from, 0, length);
+	if (err)
+		goto error_fault;
+
+	IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);		
+	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
+		      dst_output);
+	if (err > 0)
+		err = inet->recverr ? net_xmit_errno(err) : 0;
+	if (err)
+		goto error;
+out:
+	return 0;
+
+error_fault:
+	err = -EFAULT;
+	kfree_skb(skb);
+error:
+	IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+	return err; 
+}
+
+static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
+{
+	struct iovec *iov;
+	u8 __user *type = NULL;
+	u8 __user *code = NULL;
+	int probed = 0;
+	int i;
+
+	if (!msg->msg_iov)
+		return;
+
+	for (i = 0; i < msg->msg_iovlen; i++) {
+		iov = &msg->msg_iov[i];
+		if (!iov)
+			continue;
+
+		switch (fl->proto) {
+		case IPPROTO_ICMPV6:
+			/* check if one-byte field is readable or not. */
+			if (iov->iov_base && iov->iov_len < 1)
+				break;
+
+			if (!type) {
+				type = iov->iov_base;
+				/* check if code field is readable or not. */
+				if (iov->iov_len > 1)
+					code = type + 1;
+			} else if (!code)
+				code = iov->iov_base;
+
+			if (type && code) {
+				get_user(fl->fl_icmp_type, type);
+				__get_user(fl->fl_icmp_code, code);
+				probed = 1;
+			}
+			break;
+		default:
+			probed = 1;
+			break;
+		}
+		if (probed)
+			break;
+	}
+}
+
+static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
+		   struct msghdr *msg, size_t len)
+{
+	struct ipv6_txoptions opt_space;
+	struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name;
+	struct in6_addr *daddr, *final_p = NULL, final;
+	struct inet_sock *inet = inet_sk(sk);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct raw6_sock *rp = raw6_sk(sk);
+	struct ipv6_txoptions *opt = NULL;
+	struct ip6_flowlabel *flowlabel = NULL;
+	struct dst_entry *dst = NULL;
+	struct flowi fl;
+	int addr_len = msg->msg_namelen;
+	int hlimit = -1;
+	u16 proto;
+	int err;
+
+	/* Rough check on arithmetic overflow,
+	   better check is made in ip6_build_xmit
+	 */
+	if (len < 0)
+		return -EMSGSIZE;
+
+	/* Mirror BSD error message compatibility */
+	if (msg->msg_flags & MSG_OOB)		
+		return -EOPNOTSUPP;
+
+	/*
+	 *	Get and verify the address. 
+	 */
+	memset(&fl, 0, sizeof(fl));
+
+	if (sin6) {
+		if (addr_len < SIN6_LEN_RFC2133) 
+			return -EINVAL;
+
+		if (sin6->sin6_family && sin6->sin6_family != AF_INET6) 
+			return(-EAFNOSUPPORT);
+
+		/* port is the proto value [0..255] carried in nexthdr */
+		proto = ntohs(sin6->sin6_port);
+
+		if (!proto)
+			proto = inet->num;
+		else if (proto != inet->num)
+			return(-EINVAL);
+
+		if (proto > 255)
+			return(-EINVAL);
+
+		daddr = &sin6->sin6_addr;
+		if (np->sndflow) {
+			fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
+			if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
+				flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
+				if (flowlabel == NULL)
+					return -EINVAL;
+				daddr = &flowlabel->dst;
+			}
+		}
+
+		/*
+		 * Otherwise it will be difficult to maintain
+		 * sk->sk_dst_cache.
+		 */
+		if (sk->sk_state == TCP_ESTABLISHED &&
+		    ipv6_addr_equal(daddr, &np->daddr))
+			daddr = &np->daddr;
+
+		if (addr_len >= sizeof(struct sockaddr_in6) &&
+		    sin6->sin6_scope_id &&
+		    ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL)
+			fl.oif = sin6->sin6_scope_id;
+	} else {
+		if (sk->sk_state != TCP_ESTABLISHED) 
+			return -EDESTADDRREQ;
+		
+		proto = inet->num;
+		daddr = &np->daddr;
+		fl.fl6_flowlabel = np->flow_label;
+	}
+
+	if (ipv6_addr_any(daddr)) {
+		/* 
+		 * unspecified destination address 
+		 * treated as error... is this correct ?
+		 */
+		fl6_sock_release(flowlabel);
+		return(-EINVAL);
+	}
+
+	if (fl.oif == 0)
+		fl.oif = sk->sk_bound_dev_if;
+
+	if (msg->msg_controllen) {
+		opt = &opt_space;
+		memset(opt, 0, sizeof(struct ipv6_txoptions));
+		opt->tot_len = sizeof(struct ipv6_txoptions);
+
+		err = datagram_send_ctl(msg, &fl, opt, &hlimit);
+		if (err < 0) {
+			fl6_sock_release(flowlabel);
+			return err;
+		}
+		if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
+			flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
+			if (flowlabel == NULL)
+				return -EINVAL;
+		}
+		if (!(opt->opt_nflen|opt->opt_flen))
+			opt = NULL;
+	}
+	if (opt == NULL)
+		opt = np->opt;
+	if (flowlabel)
+		opt = fl6_merge_options(&opt_space, flowlabel, opt);
+
+	fl.proto = proto;
+	rawv6_probe_proto_opt(&fl, msg);
+ 
+	ipv6_addr_copy(&fl.fl6_dst, daddr);
+	if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr))
+		ipv6_addr_copy(&fl.fl6_src, &np->saddr);
+
+	/* merge ip6_build_xmit from ip6_output */
+	if (opt && opt->srcrt) {
+		struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
+		ipv6_addr_copy(&final, &fl.fl6_dst);
+		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+		final_p = &final;
+	}
+
+	if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
+		fl.oif = np->mcast_oif;
+
+	err = ip6_dst_lookup(sk, &dst, &fl);
+	if (err)
+		goto out;
+	if (final_p)
+		ipv6_addr_copy(&fl.fl6_dst, final_p);
+
+	if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
+		dst_release(dst);
+		goto out;
+	}
+
+	if (hlimit < 0) {
+		if (ipv6_addr_is_multicast(&fl.fl6_dst))
+			hlimit = np->mcast_hops;
+		else
+			hlimit = np->hop_limit;
+		if (hlimit < 0)
+			hlimit = dst_metric(dst, RTAX_HOPLIMIT);
+		if (hlimit < 0)
+			hlimit = ipv6_get_hoplimit(dst->dev);
+	}
+
+	if (msg->msg_flags&MSG_CONFIRM)
+		goto do_confirm;
+
+back_from_confirm:
+	if (inet->hdrincl) {
+		err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, (struct rt6_info*)dst, msg->msg_flags);
+	} else {
+		lock_sock(sk);
+		err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0,
+					hlimit, opt, &fl, (struct rt6_info*)dst, msg->msg_flags);
+
+		if (err)
+			ip6_flush_pending_frames(sk);
+		else if (!(msg->msg_flags & MSG_MORE))
+			err = rawv6_push_pending_frames(sk, &fl, rp, len);
+	}
+done:
+	ip6_dst_store(sk, dst,
+		      ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ?
+		      &np->daddr : NULL);
+	if (err > 0)
+		err = np->recverr ? net_xmit_errno(err) : 0;
+
+	release_sock(sk);
+out:	
+	fl6_sock_release(flowlabel);
+	return err<0?err:len;
+do_confirm:
+	dst_confirm(dst);
+	if (!(msg->msg_flags & MSG_PROBE) || len)
+		goto back_from_confirm;
+	err = 0;
+	goto done;
+}
+
+static int rawv6_seticmpfilter(struct sock *sk, int level, int optname, 
+			       char __user *optval, int optlen)
+{
+	switch (optname) {
+	case ICMPV6_FILTER:
+		if (optlen > sizeof(struct icmp6_filter))
+			optlen = sizeof(struct icmp6_filter);
+		if (copy_from_user(&raw6_sk(sk)->filter, optval, optlen))
+			return -EFAULT;
+		return 0;
+	default:
+		return -ENOPROTOOPT;
+	};
+
+	return 0;
+}
+
+static int rawv6_geticmpfilter(struct sock *sk, int level, int optname, 
+			       char __user *optval, int __user *optlen)
+{
+	int len;
+
+	switch (optname) {
+	case ICMPV6_FILTER:
+		if (get_user(len, optlen))
+			return -EFAULT;
+		if (len < 0)
+			return -EINVAL;
+		if (len > sizeof(struct icmp6_filter))
+			len = sizeof(struct icmp6_filter);
+		if (put_user(len, optlen))
+			return -EFAULT;
+		if (copy_to_user(optval, &raw6_sk(sk)->filter, len))
+			return -EFAULT;
+		return 0;
+	default:
+		return -ENOPROTOOPT;
+	};
+
+	return 0;
+}
+
+
+static int rawv6_setsockopt(struct sock *sk, int level, int optname, 
+			    char __user *optval, int optlen)
+{
+	struct raw6_sock *rp = raw6_sk(sk);
+	int val;
+
+	switch(level) {
+		case SOL_RAW:
+			break;
+
+		case SOL_ICMPV6:
+			if (inet_sk(sk)->num != IPPROTO_ICMPV6)
+				return -EOPNOTSUPP;
+			return rawv6_seticmpfilter(sk, level, optname, optval,
+						   optlen);
+		case SOL_IPV6:
+			if (optname == IPV6_CHECKSUM)
+				break;
+		default:
+			return ipv6_setsockopt(sk, level, optname, optval,
+					       optlen);
+	};
+
+  	if (get_user(val, (int __user *)optval))
+		return -EFAULT;
+
+	switch (optname) {
+		case IPV6_CHECKSUM:
+			/* You may get strange result with a positive odd offset;
+			   RFC2292bis agrees with me. */
+			if (val > 0 && (val&1))
+				return(-EINVAL);
+			if (val < 0) {
+				rp->checksum = 0;
+			} else {
+				rp->checksum = 1;
+				rp->offset = val;
+			}
+
+			return 0;
+			break;
+
+		default:
+			return(-ENOPROTOOPT);
+	}
+}
+
+static int rawv6_getsockopt(struct sock *sk, int level, int optname, 
+			    char __user *optval, int __user *optlen)
+{
+	struct raw6_sock *rp = raw6_sk(sk);
+	int val, len;
+
+	switch(level) {
+		case SOL_RAW:
+			break;
+
+		case SOL_ICMPV6:
+			if (inet_sk(sk)->num != IPPROTO_ICMPV6)
+				return -EOPNOTSUPP;
+			return rawv6_geticmpfilter(sk, level, optname, optval,
+						   optlen);
+		case SOL_IPV6:
+			if (optname == IPV6_CHECKSUM)
+				break;
+		default:
+			return ipv6_getsockopt(sk, level, optname, optval,
+					       optlen);
+	};
+
+	if (get_user(len,optlen))
+		return -EFAULT;
+
+	switch (optname) {
+	case IPV6_CHECKSUM:
+		if (rp->checksum == 0)
+			val = -1;
+		else
+			val = rp->offset;
+		break;
+
+	default:
+		return -ENOPROTOOPT;
+	}
+
+	len = min_t(unsigned int, sizeof(int), len);
+
+	if (put_user(len, optlen))
+		return -EFAULT;
+	if (copy_to_user(optval,&val,len))
+		return -EFAULT;
+	return 0;
+}
+
+static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
+{
+	switch(cmd) {
+		case SIOCOUTQ:
+		{
+			int amount = atomic_read(&sk->sk_wmem_alloc);
+			return put_user(amount, (int __user *)arg);
+		}
+		case SIOCINQ:
+		{
+			struct sk_buff *skb;
+			int amount = 0;
+
+			spin_lock_irq(&sk->sk_receive_queue.lock);
+			skb = skb_peek(&sk->sk_receive_queue);
+			if (skb != NULL)
+				amount = skb->tail - skb->h.raw;
+			spin_unlock_irq(&sk->sk_receive_queue.lock);
+			return put_user(amount, (int __user *)arg);
+		}
+
+		default:
+			return -ENOIOCTLCMD;
+	}
+}
+
+static void rawv6_close(struct sock *sk, long timeout)
+{
+	if (inet_sk(sk)->num == IPPROTO_RAW)
+		ip6_ra_control(sk, -1, NULL);
+
+	sk_common_release(sk);
+}
+
+static int rawv6_init_sk(struct sock *sk)
+{
+	if (inet_sk(sk)->num == IPPROTO_ICMPV6) {
+		struct raw6_sock *rp = raw6_sk(sk);
+		rp->checksum = 1;
+		rp->offset   = 2;
+	}
+	return(0);
+}
+
+struct proto rawv6_prot = {
+	.name =		"RAWv6",
+	.owner =	THIS_MODULE,
+	.close =	rawv6_close,
+	.connect =	ip6_datagram_connect,
+	.disconnect =	udp_disconnect,
+	.ioctl =	rawv6_ioctl,
+	.init =		rawv6_init_sk,
+	.destroy =	inet6_destroy_sock,
+	.setsockopt =	rawv6_setsockopt,
+	.getsockopt =	rawv6_getsockopt,
+	.sendmsg =	rawv6_sendmsg,
+	.recvmsg =	rawv6_recvmsg,
+	.bind =		rawv6_bind,
+	.backlog_rcv =	rawv6_rcv_skb,
+	.hash =		raw_v6_hash,
+	.unhash =	raw_v6_unhash,
+	.obj_size =	sizeof(struct raw6_sock),
+};
+
+#ifdef CONFIG_PROC_FS
+struct raw6_iter_state {
+	int bucket;
+};
+
+#define raw6_seq_private(seq) ((struct raw6_iter_state *)(seq)->private)
+
+static struct sock *raw6_get_first(struct seq_file *seq)
+{
+	struct sock *sk;
+	struct hlist_node *node;
+	struct raw6_iter_state* state = raw6_seq_private(seq);
+
+	for (state->bucket = 0; state->bucket < RAWV6_HTABLE_SIZE; ++state->bucket)
+		sk_for_each(sk, node, &raw_v6_htable[state->bucket])
+			if (sk->sk_family == PF_INET6)
+				goto out;
+	sk = NULL;
+out:
+	return sk;
+}
+
+static struct sock *raw6_get_next(struct seq_file *seq, struct sock *sk)
+{
+	struct raw6_iter_state* state = raw6_seq_private(seq);
+
+	do {
+		sk = sk_next(sk);
+try_again:
+		;
+	} while (sk && sk->sk_family != PF_INET6);
+
+	if (!sk && ++state->bucket < RAWV6_HTABLE_SIZE) {
+		sk = sk_head(&raw_v6_htable[state->bucket]);
+		goto try_again;
+	}
+	return sk;
+}
+
+static struct sock *raw6_get_idx(struct seq_file *seq, loff_t pos)
+{
+	struct sock *sk = raw6_get_first(seq);
+	if (sk)
+		while (pos && (sk = raw6_get_next(seq, sk)) != NULL)
+			--pos;
+	return pos ? NULL : sk;
+}
+
+static void *raw6_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	read_lock(&raw_v6_lock);
+	return *pos ? raw6_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+}
+
+static void *raw6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct sock *sk;
+
+	if (v == SEQ_START_TOKEN)
+		sk = raw6_get_first(seq);
+	else
+		sk = raw6_get_next(seq, v);
+	++*pos;
+	return sk;
+}
+
+static void raw6_seq_stop(struct seq_file *seq, void *v)
+{
+	read_unlock(&raw_v6_lock);
+}
+
+static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
+{
+	struct ipv6_pinfo *np = inet6_sk(sp);
+	struct in6_addr *dest, *src;
+	__u16 destp, srcp;
+
+	dest  = &np->daddr;
+	src   = &np->rcv_saddr;
+	destp = 0;
+	srcp  = inet_sk(sp)->num;
+	seq_printf(seq,
+		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
+		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p\n",
+		   i,
+		   src->s6_addr32[0], src->s6_addr32[1],
+		   src->s6_addr32[2], src->s6_addr32[3], srcp,
+		   dest->s6_addr32[0], dest->s6_addr32[1],
+		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
+		   sp->sk_state, 
+		   atomic_read(&sp->sk_wmem_alloc),
+		   atomic_read(&sp->sk_rmem_alloc),
+		   0, 0L, 0,
+		   sock_i_uid(sp), 0,
+		   sock_i_ino(sp),
+		   atomic_read(&sp->sk_refcnt), sp);
+}
+
+static int raw6_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN)
+		seq_printf(seq,
+			   "  sl  "
+			   "local_address                         "
+			   "remote_address                        "
+			   "st tx_queue rx_queue tr tm->when retrnsmt"
+			   "   uid  timeout inode\n");
+	else
+		raw6_sock_seq_show(seq, v, raw6_seq_private(seq)->bucket);
+	return 0;
+}
+
+static struct seq_operations raw6_seq_ops = {
+	.start =	raw6_seq_start,
+	.next =		raw6_seq_next,
+	.stop =		raw6_seq_stop,
+	.show =		raw6_seq_show,
+};
+
+static int raw6_seq_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	int rc = -ENOMEM;
+	struct raw6_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+	if (!s)
+		goto out;
+	rc = seq_open(file, &raw6_seq_ops);
+	if (rc)
+		goto out_kfree;
+	seq = file->private_data;
+	seq->private = s;
+	memset(s, 0, sizeof(*s));
+out:
+	return rc;
+out_kfree:
+	kfree(s);
+	goto out;
+}
+
+static struct file_operations raw6_seq_fops = {
+	.owner =	THIS_MODULE,
+	.open =		raw6_seq_open,
+	.read =		seq_read,
+	.llseek =	seq_lseek,
+	.release =	seq_release_private,
+};
+
+int __init raw6_proc_init(void)
+{
+	if (!proc_net_fops_create("raw6", S_IRUGO, &raw6_seq_fops))
+		return -ENOMEM;
+	return 0;
+}
+
+void raw6_proc_exit(void)
+{
+	proc_net_remove("raw6");
+}
+#endif	/* CONFIG_PROC_FS */
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
new file mode 100644
index 00000000000..59e7c631787
--- /dev/null
+++ b/net/ipv6/reassembly.c
@@ -0,0 +1,771 @@
+/*
+ *	IPv6 fragment reassembly
+ *	Linux INET6 implementation 
+ *
+ *	Authors:
+ *	Pedro Roque		<roque@di.fc.ul.pt>	
+ *
+ *	$Id: reassembly.c,v 1.26 2001/03/07 22:00:57 davem Exp $
+ *
+ *	Based on: net/ipv4/ip_fragment.c
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+/* 
+ *	Fixes:	
+ *	Andi Kleen	Make it work with multiple hosts.
+ *			More RFC compliance.
+ *
+ *      Horst von Brand Add missing #include <linux/string.h>
+ *	Alexey Kuznetsov	SMP races, threading, cleanup.
+ *	Patrick McHardy		LRU queue of frag heads for evictor.
+ *	Mitsuru KANDA @USAGI	Register inet6_protocol{}.
+ *	David Stevens and
+ *	YOSHIFUJI,H. @USAGI	Always remove fragment header to
+ *				calculate ICV correctly.
+ */
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/jiffies.h>
+#include <linux/net.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/in6.h>
+#include <linux/ipv6.h>
+#include <linux/icmpv6.h>
+#include <linux/random.h>
+#include <linux/jhash.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/transp_v6.h>
+#include <net/rawv6.h>
+#include <net/ndisc.h>
+#include <net/addrconf.h>
+
+int sysctl_ip6frag_high_thresh = 256*1024;
+int sysctl_ip6frag_low_thresh = 192*1024;
+
+int sysctl_ip6frag_time = IPV6_FRAG_TIMEOUT;
+
+struct ip6frag_skb_cb
+{
+	struct inet6_skb_parm	h;
+	int			offset;
+};
+
+#define FRAG6_CB(skb)	((struct ip6frag_skb_cb*)((skb)->cb))
+
+
+/*
+ *	Equivalent of ipv4 struct ipq
+ */
+
+struct frag_queue
+{
+	struct frag_queue	*next;
+	struct list_head lru_list;		/* lru list member	*/
+
+	__u32			id;		/* fragment id		*/
+	struct in6_addr		saddr;
+	struct in6_addr		daddr;
+
+	spinlock_t		lock;
+	atomic_t		refcnt;
+	struct timer_list	timer;		/* expire timer		*/
+	struct sk_buff		*fragments;
+	int			len;
+	int			meat;
+	int			iif;
+	struct timeval		stamp;
+	unsigned int		csum;
+	__u8			last_in;	/* has first/last segment arrived? */
+#define COMPLETE		4
+#define FIRST_IN		2
+#define LAST_IN			1
+	__u16			nhoffset;
+	struct frag_queue	**pprev;
+};
+
+/* Hash table. */
+
+#define IP6Q_HASHSZ	64
+
+static struct frag_queue *ip6_frag_hash[IP6Q_HASHSZ];
+static DEFINE_RWLOCK(ip6_frag_lock);
+static u32 ip6_frag_hash_rnd;
+static LIST_HEAD(ip6_frag_lru_list);
+int ip6_frag_nqueues = 0;
+
+static __inline__ void __fq_unlink(struct frag_queue *fq)
+{
+	if(fq->next)
+		fq->next->pprev = fq->pprev;
+	*fq->pprev = fq->next;
+	list_del(&fq->lru_list);
+	ip6_frag_nqueues--;
+}
+
+static __inline__ void fq_unlink(struct frag_queue *fq)
+{
+	write_lock(&ip6_frag_lock);
+	__fq_unlink(fq);
+	write_unlock(&ip6_frag_lock);
+}
+
+static unsigned int ip6qhashfn(u32 id, struct in6_addr *saddr,
+			       struct in6_addr *daddr)
+{
+	u32 a, b, c;
+
+	a = saddr->s6_addr32[0];
+	b = saddr->s6_addr32[1];
+	c = saddr->s6_addr32[2];
+
+	a += JHASH_GOLDEN_RATIO;
+	b += JHASH_GOLDEN_RATIO;
+	c += ip6_frag_hash_rnd;
+	__jhash_mix(a, b, c);
+
+	a += saddr->s6_addr32[3];
+	b += daddr->s6_addr32[0];
+	c += daddr->s6_addr32[1];
+	__jhash_mix(a, b, c);
+
+	a += daddr->s6_addr32[2];
+	b += daddr->s6_addr32[3];
+	c += id;
+	__jhash_mix(a, b, c);
+
+	return c & (IP6Q_HASHSZ - 1);
+}
+
+static struct timer_list ip6_frag_secret_timer;
+int sysctl_ip6frag_secret_interval = 10 * 60 * HZ;
+
+static void ip6_frag_secret_rebuild(unsigned long dummy)
+{
+	unsigned long now = jiffies;
+	int i;
+
+	write_lock(&ip6_frag_lock);
+	get_random_bytes(&ip6_frag_hash_rnd, sizeof(u32));
+	for (i = 0; i < IP6Q_HASHSZ; i++) {
+		struct frag_queue *q;
+
+		q = ip6_frag_hash[i];
+		while (q) {
+			struct frag_queue *next = q->next;
+			unsigned int hval = ip6qhashfn(q->id,
+						       &q->saddr,
+						       &q->daddr);
+
+			if (hval != i) {
+				/* Unlink. */
+				if (q->next)
+					q->next->pprev = q->pprev;
+				*q->pprev = q->next;
+
+				/* Relink to new hash chain. */
+				if ((q->next = ip6_frag_hash[hval]) != NULL)
+					q->next->pprev = &q->next;
+				ip6_frag_hash[hval] = q;
+				q->pprev = &ip6_frag_hash[hval];
+			}
+
+			q = next;
+		}
+	}
+	write_unlock(&ip6_frag_lock);
+
+	mod_timer(&ip6_frag_secret_timer, now + sysctl_ip6frag_secret_interval);
+}
+
+atomic_t ip6_frag_mem = ATOMIC_INIT(0);
+
+/* Memory Tracking Functions. */
+static inline void frag_kfree_skb(struct sk_buff *skb, int *work)
+{
+	if (work)
+		*work -= skb->truesize;
+	atomic_sub(skb->truesize, &ip6_frag_mem);
+	kfree_skb(skb);
+}
+
+static inline void frag_free_queue(struct frag_queue *fq, int *work)
+{
+	if (work)
+		*work -= sizeof(struct frag_queue);
+	atomic_sub(sizeof(struct frag_queue), &ip6_frag_mem);
+	kfree(fq);
+}
+
+static inline struct frag_queue *frag_alloc_queue(void)
+{
+	struct frag_queue *fq = kmalloc(sizeof(struct frag_queue), GFP_ATOMIC);
+
+	if(!fq)
+		return NULL;
+	atomic_add(sizeof(struct frag_queue), &ip6_frag_mem);
+	return fq;
+}
+
+/* Destruction primitives. */
+
+/* Complete destruction of fq. */
+static void ip6_frag_destroy(struct frag_queue *fq, int *work)
+{
+	struct sk_buff *fp;
+
+	BUG_TRAP(fq->last_in&COMPLETE);
+	BUG_TRAP(del_timer(&fq->timer) == 0);
+
+	/* Release all fragment data. */
+	fp = fq->fragments;
+	while (fp) {
+		struct sk_buff *xp = fp->next;
+
+		frag_kfree_skb(fp, work);
+		fp = xp;
+	}
+
+	frag_free_queue(fq, work);
+}
+
+static __inline__ void fq_put(struct frag_queue *fq, int *work)
+{
+	if (atomic_dec_and_test(&fq->refcnt))
+		ip6_frag_destroy(fq, work);
+}
+
+/* Kill fq entry. It is not destroyed immediately,
+ * because caller (and someone more) holds reference count.
+ */
+static __inline__ void fq_kill(struct frag_queue *fq)
+{
+	if (del_timer(&fq->timer))
+		atomic_dec(&fq->refcnt);
+
+	if (!(fq->last_in & COMPLETE)) {
+		fq_unlink(fq);
+		atomic_dec(&fq->refcnt);
+		fq->last_in |= COMPLETE;
+	}
+}
+
+static void ip6_evictor(void)
+{
+	struct frag_queue *fq;
+	struct list_head *tmp;
+	int work;
+
+	work = atomic_read(&ip6_frag_mem) - sysctl_ip6frag_low_thresh;
+	if (work <= 0)
+		return;
+
+	while(work > 0) {
+		read_lock(&ip6_frag_lock);
+		if (list_empty(&ip6_frag_lru_list)) {
+			read_unlock(&ip6_frag_lock);
+			return;
+		}
+		tmp = ip6_frag_lru_list.next;
+		fq = list_entry(tmp, struct frag_queue, lru_list);
+		atomic_inc(&fq->refcnt);
+		read_unlock(&ip6_frag_lock);
+
+		spin_lock(&fq->lock);
+		if (!(fq->last_in&COMPLETE))
+			fq_kill(fq);
+		spin_unlock(&fq->lock);
+
+		fq_put(fq, &work);
+		IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
+	}
+}
+
+static void ip6_frag_expire(unsigned long data)
+{
+	struct frag_queue *fq = (struct frag_queue *) data;
+
+	spin_lock(&fq->lock);
+
+	if (fq->last_in & COMPLETE)
+		goto out;
+
+	fq_kill(fq);
+
+	IP6_INC_STATS_BH(IPSTATS_MIB_REASMTIMEOUT);
+	IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
+
+	/* Send error only if the first segment arrived. */
+	if (fq->last_in&FIRST_IN && fq->fragments) {
+		struct net_device *dev = dev_get_by_index(fq->iif);
+
+		/*
+		   But use as source device on which LAST ARRIVED
+		   segment was received. And do not use fq->dev
+		   pointer directly, device might already disappeared.
+		 */
+		if (dev) {
+			fq->fragments->dev = dev;
+			icmpv6_send(fq->fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0,
+				    dev);
+			dev_put(dev);
+		}
+	}
+out:
+	spin_unlock(&fq->lock);
+	fq_put(fq, NULL);
+}
+
+/* Creation primitives. */
+
+
+static struct frag_queue *ip6_frag_intern(unsigned int hash,
+					  struct frag_queue *fq_in)
+{
+	struct frag_queue *fq;
+
+	write_lock(&ip6_frag_lock);
+#ifdef CONFIG_SMP
+	for (fq = ip6_frag_hash[hash]; fq; fq = fq->next) {
+		if (fq->id == fq_in->id && 
+		    ipv6_addr_equal(&fq_in->saddr, &fq->saddr) &&
+		    ipv6_addr_equal(&fq_in->daddr, &fq->daddr)) {
+			atomic_inc(&fq->refcnt);
+			write_unlock(&ip6_frag_lock);
+			fq_in->last_in |= COMPLETE;
+			fq_put(fq_in, NULL);
+			return fq;
+		}
+	}
+#endif
+	fq = fq_in;
+
+	if (!mod_timer(&fq->timer, jiffies + sysctl_ip6frag_time))
+		atomic_inc(&fq->refcnt);
+
+	atomic_inc(&fq->refcnt);
+	if((fq->next = ip6_frag_hash[hash]) != NULL)
+		fq->next->pprev = &fq->next;
+	ip6_frag_hash[hash] = fq;
+	fq->pprev = &ip6_frag_hash[hash];
+	INIT_LIST_HEAD(&fq->lru_list);
+	list_add_tail(&fq->lru_list, &ip6_frag_lru_list);
+	ip6_frag_nqueues++;
+	write_unlock(&ip6_frag_lock);
+	return fq;
+}
+
+
+static struct frag_queue *
+ip6_frag_create(unsigned int hash, u32 id, struct in6_addr *src, struct in6_addr *dst)
+{
+	struct frag_queue *fq;
+
+	if ((fq = frag_alloc_queue()) == NULL)
+		goto oom;
+
+	memset(fq, 0, sizeof(struct frag_queue));
+
+	fq->id = id;
+	ipv6_addr_copy(&fq->saddr, src);
+	ipv6_addr_copy(&fq->daddr, dst);
+
+	init_timer(&fq->timer);
+	fq->timer.function = ip6_frag_expire;
+	fq->timer.data = (long) fq;
+	spin_lock_init(&fq->lock);
+	atomic_set(&fq->refcnt, 1);
+
+	return ip6_frag_intern(hash, fq);
+
+oom:
+	IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
+	return NULL;
+}
+
+static __inline__ struct frag_queue *
+fq_find(u32 id, struct in6_addr *src, struct in6_addr *dst)
+{
+	struct frag_queue *fq;
+	unsigned int hash = ip6qhashfn(id, src, dst);
+
+	read_lock(&ip6_frag_lock);
+	for(fq = ip6_frag_hash[hash]; fq; fq = fq->next) {
+		if (fq->id == id && 
+		    ipv6_addr_equal(src, &fq->saddr) &&
+		    ipv6_addr_equal(dst, &fq->daddr)) {
+			atomic_inc(&fq->refcnt);
+			read_unlock(&ip6_frag_lock);
+			return fq;
+		}
+	}
+	read_unlock(&ip6_frag_lock);
+
+	return ip6_frag_create(hash, id, src, dst);
+}
+
+
+static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, 
+			   struct frag_hdr *fhdr, int nhoff)
+{
+	struct sk_buff *prev, *next;
+	int offset, end;
+
+	if (fq->last_in & COMPLETE)
+		goto err;
+
+	offset = ntohs(fhdr->frag_off) & ~0x7;
+	end = offset + (ntohs(skb->nh.ipv6h->payload_len) -
+			((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1)));
+
+	if ((unsigned int)end > IPV6_MAXPLEN) {
+		IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ 		icmpv6_param_prob(skb,ICMPV6_HDR_FIELD, (u8*)&fhdr->frag_off - skb->nh.raw);
+ 		return;
+	}
+
+ 	if (skb->ip_summed == CHECKSUM_HW)
+ 		skb->csum = csum_sub(skb->csum,
+ 				     csum_partial(skb->nh.raw, (u8*)(fhdr+1)-skb->nh.raw, 0));
+
+	/* Is this the final fragment? */
+	if (!(fhdr->frag_off & htons(IP6_MF))) {
+		/* If we already have some bits beyond end
+		 * or have different end, the segment is corrupted.
+		 */
+		if (end < fq->len ||
+		    ((fq->last_in & LAST_IN) && end != fq->len))
+			goto err;
+		fq->last_in |= LAST_IN;
+		fq->len = end;
+	} else {
+		/* Check if the fragment is rounded to 8 bytes.
+		 * Required by the RFC.
+		 */
+		if (end & 0x7) {
+			/* RFC2460 says always send parameter problem in
+			 * this case. -DaveM
+			 */
+			IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+			icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, 
+					  offsetof(struct ipv6hdr, payload_len));
+			return;
+		}
+		if (end > fq->len) {
+			/* Some bits beyond end -> corruption. */
+			if (fq->last_in & LAST_IN)
+				goto err;
+			fq->len = end;
+		}
+	}
+
+	if (end == offset)
+		goto err;
+
+	/* Point into the IP datagram 'data' part. */
+	if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data))
+		goto err;
+	if (end-offset < skb->len) {
+		if (pskb_trim(skb, end - offset))
+			goto err;
+		if (skb->ip_summed != CHECKSUM_UNNECESSARY)
+			skb->ip_summed = CHECKSUM_NONE;
+	}
+
+	/* Find out which fragments are in front and at the back of us
+	 * in the chain of fragments so far.  We must know where to put
+	 * this fragment, right?
+	 */
+	prev = NULL;
+	for(next = fq->fragments; next != NULL; next = next->next) {
+		if (FRAG6_CB(next)->offset >= offset)
+			break;	/* bingo! */
+		prev = next;
+	}
+
+	/* We found where to put this one.  Check for overlap with
+	 * preceding fragment, and, if needed, align things so that
+	 * any overlaps are eliminated.
+	 */
+	if (prev) {
+		int i = (FRAG6_CB(prev)->offset + prev->len) - offset;
+
+		if (i > 0) {
+			offset += i;
+			if (end <= offset)
+				goto err;
+			if (!pskb_pull(skb, i))
+				goto err;
+			if (skb->ip_summed != CHECKSUM_UNNECESSARY)
+				skb->ip_summed = CHECKSUM_NONE;
+		}
+	}
+
+	/* Look for overlap with succeeding segments.
+	 * If we can merge fragments, do it.
+	 */
+	while (next && FRAG6_CB(next)->offset < end) {
+		int i = end - FRAG6_CB(next)->offset; /* overlap is 'i' bytes */
+
+		if (i < next->len) {
+			/* Eat head of the next overlapped fragment
+			 * and leave the loop. The next ones cannot overlap.
+			 */
+			if (!pskb_pull(next, i))
+				goto err;
+			FRAG6_CB(next)->offset += i;	/* next fragment */
+			fq->meat -= i;
+			if (next->ip_summed != CHECKSUM_UNNECESSARY)
+				next->ip_summed = CHECKSUM_NONE;
+			break;
+		} else {
+			struct sk_buff *free_it = next;
+
+			/* Old fragment is completely overridden with
+			 * new one drop it.
+			 */
+			next = next->next;
+
+			if (prev)
+				prev->next = next;
+			else
+				fq->fragments = next;
+
+			fq->meat -= free_it->len;
+			frag_kfree_skb(free_it, NULL);
+		}
+	}
+
+	FRAG6_CB(skb)->offset = offset;
+
+	/* Insert this fragment in the chain of fragments. */
+	skb->next = next;
+	if (prev)
+		prev->next = skb;
+	else
+		fq->fragments = skb;
+
+	if (skb->dev)
+		fq->iif = skb->dev->ifindex;
+	skb->dev = NULL;
+	fq->stamp = skb->stamp;
+	fq->meat += skb->len;
+	atomic_add(skb->truesize, &ip6_frag_mem);
+
+	/* The first fragment.
+	 * nhoffset is obtained from the first fragment, of course.
+	 */
+	if (offset == 0) {
+		fq->nhoffset = nhoff;
+		fq->last_in |= FIRST_IN;
+	}
+	write_lock(&ip6_frag_lock);
+	list_move_tail(&fq->lru_list, &ip6_frag_lru_list);
+	write_unlock(&ip6_frag_lock);
+	return;
+
+err:
+	IP6_INC_STATS(IPSTATS_MIB_REASMFAILS);
+	kfree_skb(skb);
+}
+
+/*
+ *	Check if this packet is complete.
+ *	Returns NULL on failure by any reason, and pointer
+ *	to current nexthdr field in reassembled frame.
+ *
+ *	It is called with locked fq, and caller must check that
+ *	queue is eligible for reassembly i.e. it is not COMPLETE,
+ *	the last and the first frames arrived and all the bits are here.
+ */
+static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
+			  unsigned int *nhoffp,
+			  struct net_device *dev)
+{
+	struct sk_buff *fp, *head = fq->fragments;
+	int    payload_len;
+	unsigned int nhoff;
+
+	fq_kill(fq);
+
+	BUG_TRAP(head != NULL);
+	BUG_TRAP(FRAG6_CB(head)->offset == 0);
+
+	/* Unfragmented part is taken from the first segment. */
+	payload_len = (head->data - head->nh.raw) - sizeof(struct ipv6hdr) + fq->len - sizeof(struct frag_hdr);
+	if (payload_len > IPV6_MAXPLEN)
+		goto out_oversize;
+
+	/* Head of list must not be cloned. */
+	if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
+		goto out_oom;
+
+	/* If the first fragment is fragmented itself, we split
+	 * it to two chunks: the first with data and paged part
+	 * and the second, holding only fragments. */
+	if (skb_shinfo(head)->frag_list) {
+		struct sk_buff *clone;
+		int i, plen = 0;
+
+		if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL)
+			goto out_oom;
+		clone->next = head->next;
+		head->next = clone;
+		skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
+		skb_shinfo(head)->frag_list = NULL;
+		for (i=0; i<skb_shinfo(head)->nr_frags; i++)
+			plen += skb_shinfo(head)->frags[i].size;
+		clone->len = clone->data_len = head->data_len - plen;
+		head->data_len -= clone->len;
+		head->len -= clone->len;
+		clone->csum = 0;
+		clone->ip_summed = head->ip_summed;
+		atomic_add(clone->truesize, &ip6_frag_mem);
+	}
+
+	/* We have to remove fragment header from datagram and to relocate
+	 * header in order to calculate ICV correctly. */
+	nhoff = fq->nhoffset;
+	head->nh.raw[nhoff] = head->h.raw[0];
+	memmove(head->head + sizeof(struct frag_hdr), head->head, 
+		(head->data - head->head) - sizeof(struct frag_hdr));
+	head->mac.raw += sizeof(struct frag_hdr);
+	head->nh.raw += sizeof(struct frag_hdr);
+
+	skb_shinfo(head)->frag_list = head->next;
+	head->h.raw = head->data;
+	skb_push(head, head->data - head->nh.raw);
+	atomic_sub(head->truesize, &ip6_frag_mem);
+
+	for (fp=head->next; fp; fp = fp->next) {
+		head->data_len += fp->len;
+		head->len += fp->len;
+		if (head->ip_summed != fp->ip_summed)
+			head->ip_summed = CHECKSUM_NONE;
+		else if (head->ip_summed == CHECKSUM_HW)
+			head->csum = csum_add(head->csum, fp->csum);
+		head->truesize += fp->truesize;
+		atomic_sub(fp->truesize, &ip6_frag_mem);
+	}
+
+	head->next = NULL;
+	head->dev = dev;
+	head->stamp = fq->stamp;
+	head->nh.ipv6h->payload_len = htons(payload_len);
+
+	*skb_in = head;
+
+	/* Yes, and fold redundant checksum back. 8) */
+	if (head->ip_summed == CHECKSUM_HW)
+		head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum);
+
+	IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
+	fq->fragments = NULL;
+	*nhoffp = nhoff;
+	return 1;
+
+out_oversize:
+	if (net_ratelimit())
+		printk(KERN_DEBUG "ip6_frag_reasm: payload len = %d\n", payload_len);
+	goto out_fail;
+out_oom:
+	if (net_ratelimit())
+		printk(KERN_DEBUG "ip6_frag_reasm: no memory for reassembly\n");
+out_fail:
+	IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
+	return -1;
+}
+
+static int ipv6_frag_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
+{
+	struct sk_buff *skb = *skbp; 
+	struct net_device *dev = skb->dev;
+	struct frag_hdr *fhdr;
+	struct frag_queue *fq;
+	struct ipv6hdr *hdr;
+
+	hdr = skb->nh.ipv6h;
+
+	IP6_INC_STATS_BH(IPSTATS_MIB_REASMREQDS);
+
+	/* Jumbo payload inhibits frag. header */
+	if (hdr->payload_len==0) {
+		IP6_INC_STATS(IPSTATS_MIB_INHDRERRORS);
+		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw-skb->nh.raw);
+		return -1;
+	}
+	if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+sizeof(struct frag_hdr))) {
+		IP6_INC_STATS(IPSTATS_MIB_INHDRERRORS);
+		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw-skb->nh.raw);
+		return -1;
+	}
+
+	hdr = skb->nh.ipv6h;
+	fhdr = (struct frag_hdr *)skb->h.raw;
+
+	if (!(fhdr->frag_off & htons(0xFFF9))) {
+		/* It is not a fragmented frame */
+		skb->h.raw += sizeof(struct frag_hdr);
+		IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
+
+		*nhoffp = (u8*)fhdr - skb->nh.raw;
+		return 1;
+	}
+
+	if (atomic_read(&ip6_frag_mem) > sysctl_ip6frag_high_thresh)
+		ip6_evictor();
+
+	if ((fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr)) != NULL) {
+		int ret = -1;
+
+		spin_lock(&fq->lock);
+
+		ip6_frag_queue(fq, skb, fhdr, *nhoffp);
+
+		if (fq->last_in == (FIRST_IN|LAST_IN) &&
+		    fq->meat == fq->len)
+			ret = ip6_frag_reasm(fq, skbp, nhoffp, dev);
+
+		spin_unlock(&fq->lock);
+		fq_put(fq, NULL);
+		return ret;
+	}
+
+	IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
+	kfree_skb(skb);
+	return -1;
+}
+
+static struct inet6_protocol frag_protocol =
+{
+	.handler	=	ipv6_frag_rcv,
+	.flags		=	INET6_PROTO_NOPOLICY,
+};
+
+void __init ipv6_frag_init(void)
+{
+	if (inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT) < 0)
+		printk(KERN_ERR "ipv6_frag_init: Could not register protocol\n");
+
+	ip6_frag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
+				   (jiffies ^ (jiffies >> 6)));
+
+	init_timer(&ip6_frag_secret_timer);
+	ip6_frag_secret_timer.function = ip6_frag_secret_rebuild;
+	ip6_frag_secret_timer.expires = jiffies + sysctl_ip6frag_secret_interval;
+	add_timer(&ip6_frag_secret_timer);
+}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
new file mode 100644
index 00000000000..183802902c0
--- /dev/null
+++ b/net/ipv6/route.c
@@ -0,0 +1,2131 @@
+/*
+ *	Linux INET6 implementation
+ *	FIB front-end.
+ *
+ *	Authors:
+ *	Pedro Roque		<roque@di.fc.ul.pt>	
+ *
+ *	$Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+/*	Changes:
+ *
+ *	YOSHIFUJI Hideaki @USAGI
+ *		reworked default router selection.
+ *		- respect outgoing interface
+ *		- select from (probably) reachable routers (i.e.
+ *		routers in REACHABLE, STALE, DELAY or PROBE states).
+ *		- always select the same router if it is (probably)
+ *		reachable.  otherwise, round-robin the list.
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/times.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/route.h>
+#include <linux/netdevice.h>
+#include <linux/in6.h>
+#include <linux/init.h>
+#include <linux/netlink.h>
+#include <linux/if_arp.h>
+
+#ifdef 	CONFIG_PROC_FS
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#endif
+
+#include <net/snmp.h>
+#include <net/ipv6.h>
+#include <net/ip6_fib.h>
+#include <net/ip6_route.h>
+#include <net/ndisc.h>
+#include <net/addrconf.h>
+#include <net/tcp.h>
+#include <linux/rtnetlink.h>
+#include <net/dst.h>
+#include <net/xfrm.h>
+
+#include <asm/uaccess.h>
+
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+
+/* Set to 3 to get tracing. */
+#define RT6_DEBUG 2
+
+#if RT6_DEBUG >= 3
+#define RDBG(x) printk x
+#define RT6_TRACE(x...) printk(KERN_DEBUG x)
+#else
+#define RDBG(x)
+#define RT6_TRACE(x...) do { ; } while (0)
+#endif
+
+
+static int ip6_rt_max_size = 4096;
+static int ip6_rt_gc_min_interval = HZ / 2;
+static int ip6_rt_gc_timeout = 60*HZ;
+int ip6_rt_gc_interval = 30*HZ;
+static int ip6_rt_gc_elasticity = 9;
+static int ip6_rt_mtu_expires = 10*60*HZ;
+static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
+
+static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
+static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
+static struct dst_entry *ip6_negative_advice(struct dst_entry *);
+static void		ip6_dst_destroy(struct dst_entry *);
+static void		ip6_dst_ifdown(struct dst_entry *,
+				       struct net_device *dev, int how);
+static int		 ip6_dst_gc(void);
+
+static int		ip6_pkt_discard(struct sk_buff *skb);
+static int		ip6_pkt_discard_out(struct sk_buff *skb);
+static void		ip6_link_failure(struct sk_buff *skb);
+static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
+
+static struct dst_ops ip6_dst_ops = {
+	.family			=	AF_INET6,
+	.protocol		=	__constant_htons(ETH_P_IPV6),
+	.gc			=	ip6_dst_gc,
+	.gc_thresh		=	1024,
+	.check			=	ip6_dst_check,
+	.destroy		=	ip6_dst_destroy,
+	.ifdown			=	ip6_dst_ifdown,
+	.negative_advice	=	ip6_negative_advice,
+	.link_failure		=	ip6_link_failure,
+	.update_pmtu		=	ip6_rt_update_pmtu,
+	.entry_size		=	sizeof(struct rt6_info),
+};
+
+struct rt6_info ip6_null_entry = {
+	.u = {
+		.dst = {
+			.__refcnt	= ATOMIC_INIT(1),
+			.__use		= 1,
+			.dev		= &loopback_dev,
+			.obsolete	= -1,
+			.error		= -ENETUNREACH,
+			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
+			.input		= ip6_pkt_discard,
+			.output		= ip6_pkt_discard_out,
+			.ops		= &ip6_dst_ops,
+			.path		= (struct dst_entry*)&ip6_null_entry,
+		}
+	},
+	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
+	.rt6i_metric	= ~(u32) 0,
+	.rt6i_ref	= ATOMIC_INIT(1),
+};
+
+struct fib6_node ip6_routing_table = {
+	.leaf		= &ip6_null_entry,
+	.fn_flags	= RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
+};
+
+/* Protects all the ip6 fib */
+
+DEFINE_RWLOCK(rt6_lock);
+
+
+/* allocate dst with ip6_dst_ops */
+static __inline__ struct rt6_info *ip6_dst_alloc(void)
+{
+	return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
+}
+
+static void ip6_dst_destroy(struct dst_entry *dst)
+{
+	struct rt6_info *rt = (struct rt6_info *)dst;
+	struct inet6_dev *idev = rt->rt6i_idev;
+
+	if (idev != NULL) {
+		rt->rt6i_idev = NULL;
+		in6_dev_put(idev);
+	}	
+}
+
+static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
+			   int how)
+{
+	struct rt6_info *rt = (struct rt6_info *)dst;
+	struct inet6_dev *idev = rt->rt6i_idev;
+
+	if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
+		struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
+		if (loopback_idev != NULL) {
+			rt->rt6i_idev = loopback_idev;
+			in6_dev_put(idev);
+		}
+	}
+}
+
+static __inline__ int rt6_check_expired(const struct rt6_info *rt)
+{
+	return (rt->rt6i_flags & RTF_EXPIRES &&
+		time_after(jiffies, rt->rt6i_expires));
+}
+
+/*
+ *	Route lookup. Any rt6_lock is implied.
+ */
+
+static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
+						    int oif,
+						    int strict)
+{
+	struct rt6_info *local = NULL;
+	struct rt6_info *sprt;
+
+	if (oif) {
+		for (sprt = rt; sprt; sprt = sprt->u.next) {
+			struct net_device *dev = sprt->rt6i_dev;
+			if (dev->ifindex == oif)
+				return sprt;
+			if (dev->flags & IFF_LOOPBACK) {
+				if (sprt->rt6i_idev == NULL ||
+				    sprt->rt6i_idev->dev->ifindex != oif) {
+					if (strict && oif)
+						continue;
+					if (local && (!oif || 
+						      local->rt6i_idev->dev->ifindex == oif))
+						continue;
+				}
+				local = sprt;
+			}
+		}
+
+		if (local)
+			return local;
+
+		if (strict)
+			return &ip6_null_entry;
+	}
+	return rt;
+}
+
+/*
+ *	pointer to the last default router chosen. BH is disabled locally.
+ */
+static struct rt6_info *rt6_dflt_pointer;
+static DEFINE_SPINLOCK(rt6_dflt_lock);
+
+void rt6_reset_dflt_pointer(struct rt6_info *rt)
+{
+	spin_lock_bh(&rt6_dflt_lock);
+	if (rt == NULL || rt == rt6_dflt_pointer) {
+		RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
+		rt6_dflt_pointer = NULL;
+	}
+	spin_unlock_bh(&rt6_dflt_lock);
+}
+
+/* Default Router Selection (RFC 2461 6.3.6) */
+static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
+{
+	struct rt6_info *match = NULL;
+	struct rt6_info *sprt;
+	int mpri = 0;
+
+	for (sprt = rt; sprt; sprt = sprt->u.next) {
+		struct neighbour *neigh;
+		int m = 0;
+
+		if (!oif ||
+		    (sprt->rt6i_dev &&
+		     sprt->rt6i_dev->ifindex == oif))
+			m += 8;
+
+		if (rt6_check_expired(sprt))
+			continue;
+
+		if (sprt == rt6_dflt_pointer)
+			m += 4;
+
+		if ((neigh = sprt->rt6i_nexthop) != NULL) {
+			read_lock_bh(&neigh->lock);
+			switch (neigh->nud_state) {
+			case NUD_REACHABLE:
+				m += 3;
+				break;
+
+			case NUD_STALE:
+			case NUD_DELAY:
+			case NUD_PROBE:
+				m += 2;
+				break;
+
+			case NUD_NOARP:
+			case NUD_PERMANENT:
+				m += 1;
+				break;
+
+			case NUD_INCOMPLETE:
+			default:
+				read_unlock_bh(&neigh->lock);
+				continue;
+			}
+			read_unlock_bh(&neigh->lock);
+		} else {
+			continue;
+		}
+
+		if (m > mpri || m >= 12) {
+			match = sprt;
+			mpri = m;
+			if (m >= 12) {
+				/* we choose the last default router if it
+				 * is in (probably) reachable state.
+				 * If route changed, we should do pmtu
+				 * discovery. --yoshfuji
+				 */
+				break;
+			}
+		}
+	}
+
+	spin_lock(&rt6_dflt_lock);
+	if (!match) {
+		/*
+		 *	No default routers are known to be reachable.
+		 *	SHOULD round robin
+		 */
+		if (rt6_dflt_pointer) {
+			for (sprt = rt6_dflt_pointer->u.next;
+			     sprt; sprt = sprt->u.next) {
+				if (sprt->u.dst.obsolete <= 0 &&
+				    sprt->u.dst.error == 0 &&
+				    !rt6_check_expired(sprt)) {
+					match = sprt;
+					break;
+				}
+			}
+			for (sprt = rt;
+			     !match && sprt;
+			     sprt = sprt->u.next) {
+				if (sprt->u.dst.obsolete <= 0 &&
+				    sprt->u.dst.error == 0 &&
+				    !rt6_check_expired(sprt)) {
+					match = sprt;
+					break;
+				}
+				if (sprt == rt6_dflt_pointer)
+					break;
+			}
+		}
+	}
+
+	if (match) {
+		if (rt6_dflt_pointer != match)
+			RT6_TRACE("changed default router: %p->%p\n",
+				  rt6_dflt_pointer, match);
+		rt6_dflt_pointer = match;
+	}
+	spin_unlock(&rt6_dflt_lock);
+
+	if (!match) {
+		/*
+		 * Last Resort: if no default routers found, 
+		 * use addrconf default route.
+		 * We don't record this route.
+		 */
+		for (sprt = ip6_routing_table.leaf;
+		     sprt; sprt = sprt->u.next) {
+			if (!rt6_check_expired(sprt) &&
+			    (sprt->rt6i_flags & RTF_DEFAULT) &&
+			    (!oif ||
+			     (sprt->rt6i_dev &&
+			      sprt->rt6i_dev->ifindex == oif))) {
+				match = sprt;
+				break;
+			}
+		}
+		if (!match) {
+			/* no default route.  give up. */
+			match = &ip6_null_entry;
+		}
+	}
+
+	return match;
+}
+
+struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
+			    int oif, int strict)
+{
+	struct fib6_node *fn;
+	struct rt6_info *rt;
+
+	read_lock_bh(&rt6_lock);
+	fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
+	rt = rt6_device_match(fn->leaf, oif, strict);
+	dst_hold(&rt->u.dst);
+	rt->u.dst.__use++;
+	read_unlock_bh(&rt6_lock);
+
+	rt->u.dst.lastuse = jiffies;
+	if (rt->u.dst.error == 0)
+		return rt;
+	dst_release(&rt->u.dst);
+	return NULL;
+}
+
+/* ip6_ins_rt is called with FREE rt6_lock.
+   It takes new route entry, the addition fails by any reason the
+   route is freed. In any case, if caller does not hold it, it may
+   be destroyed.
+ */
+
+int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
+{
+	int err;
+
+	write_lock_bh(&rt6_lock);
+	err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr);
+	write_unlock_bh(&rt6_lock);
+
+	return err;
+}
+
+/* No rt6_lock! If COW failed, the function returns dead route entry
+   with dst->error set to errno value.
+ */
+
+static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
+				struct in6_addr *saddr)
+{
+	int err;
+	struct rt6_info *rt;
+
+	/*
+	 *	Clone the route.
+	 */
+
+	rt = ip6_rt_copy(ort);
+
+	if (rt) {
+		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
+
+		if (!(rt->rt6i_flags&RTF_GATEWAY))
+			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
+
+		rt->rt6i_dst.plen = 128;
+		rt->rt6i_flags |= RTF_CACHE;
+		rt->u.dst.flags |= DST_HOST;
+
+#ifdef CONFIG_IPV6_SUBTREES
+		if (rt->rt6i_src.plen && saddr) {
+			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
+			rt->rt6i_src.plen = 128;
+		}
+#endif
+
+		rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
+
+		dst_hold(&rt->u.dst);
+
+		err = ip6_ins_rt(rt, NULL, NULL);
+		if (err == 0)
+			return rt;
+
+		rt->u.dst.error = err;
+
+		return rt;
+	}
+	dst_hold(&ip6_null_entry.u.dst);
+	return &ip6_null_entry;
+}
+
+#define BACKTRACK() \
+if (rt == &ip6_null_entry && strict) { \
+       while ((fn = fn->parent) != NULL) { \
+		if (fn->fn_flags & RTN_ROOT) { \
+			dst_hold(&rt->u.dst); \
+			goto out; \
+		} \
+		if (fn->fn_flags & RTN_RTINFO) \
+			goto restart; \
+	} \
+}
+
+
+void ip6_route_input(struct sk_buff *skb)
+{
+	struct fib6_node *fn;
+	struct rt6_info *rt;
+	int strict;
+	int attempts = 3;
+
+	strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
+
+relookup:
+	read_lock_bh(&rt6_lock);
+
+	fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
+			 &skb->nh.ipv6h->saddr);
+
+restart:
+	rt = fn->leaf;
+
+	if ((rt->rt6i_flags & RTF_CACHE)) {
+		rt = rt6_device_match(rt, skb->dev->ifindex, strict);
+		BACKTRACK();
+		dst_hold(&rt->u.dst);
+		goto out;
+	}
+
+	rt = rt6_device_match(rt, skb->dev->ifindex, 0);
+	BACKTRACK();
+
+	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
+		struct rt6_info *nrt;
+		dst_hold(&rt->u.dst);
+		read_unlock_bh(&rt6_lock);
+
+		nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
+			      &skb->nh.ipv6h->saddr);
+
+		dst_release(&rt->u.dst);
+		rt = nrt;
+
+		if (rt->u.dst.error != -EEXIST || --attempts <= 0)
+			goto out2;
+
+		/* Race condition! In the gap, when rt6_lock was
+		   released someone could insert this route.  Relookup.
+		*/
+		dst_release(&rt->u.dst);
+		goto relookup;
+	}
+	dst_hold(&rt->u.dst);
+
+out:
+	read_unlock_bh(&rt6_lock);
+out2:
+	rt->u.dst.lastuse = jiffies;
+	rt->u.dst.__use++;
+	skb->dst = (struct dst_entry *) rt;
+}
+
+struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
+{
+	struct fib6_node *fn;
+	struct rt6_info *rt;
+	int strict;
+	int attempts = 3;
+
+	strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
+
+relookup:
+	read_lock_bh(&rt6_lock);
+
+	fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
+
+restart:
+	rt = fn->leaf;
+
+	if ((rt->rt6i_flags & RTF_CACHE)) {
+		rt = rt6_device_match(rt, fl->oif, strict);
+		BACKTRACK();
+		dst_hold(&rt->u.dst);
+		goto out;
+	}
+	if (rt->rt6i_flags & RTF_DEFAULT) {
+		if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
+			rt = rt6_best_dflt(rt, fl->oif);
+	} else {
+		rt = rt6_device_match(rt, fl->oif, strict);
+		BACKTRACK();
+	}
+
+	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
+		struct rt6_info *nrt;
+		dst_hold(&rt->u.dst);
+		read_unlock_bh(&rt6_lock);
+
+		nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src);
+
+		dst_release(&rt->u.dst);
+		rt = nrt;
+
+		if (rt->u.dst.error != -EEXIST || --attempts <= 0)
+			goto out2;
+
+		/* Race condition! In the gap, when rt6_lock was
+		   released someone could insert this route.  Relookup.
+		*/
+		dst_release(&rt->u.dst);
+		goto relookup;
+	}
+	dst_hold(&rt->u.dst);
+
+out:
+	read_unlock_bh(&rt6_lock);
+out2:
+	rt->u.dst.lastuse = jiffies;
+	rt->u.dst.__use++;
+	return &rt->u.dst;
+}
+
+
+/*
+ *	Destination cache support functions
+ */
+
+static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
+{
+	struct rt6_info *rt;
+
+	rt = (struct rt6_info *) dst;
+
+	if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
+		return dst;
+
+	return NULL;
+}
+
+static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
+{
+	struct rt6_info *rt = (struct rt6_info *) dst;
+
+	if (rt) {
+		if (rt->rt6i_flags & RTF_CACHE)
+			ip6_del_rt(rt, NULL, NULL);
+		else
+			dst_release(dst);
+	}
+	return NULL;
+}
+
+static void ip6_link_failure(struct sk_buff *skb)
+{
+	struct rt6_info *rt;
+
+	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
+
+	rt = (struct rt6_info *) skb->dst;
+	if (rt) {
+		if (rt->rt6i_flags&RTF_CACHE) {
+			dst_set_expires(&rt->u.dst, 0);
+			rt->rt6i_flags |= RTF_EXPIRES;
+		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
+			rt->rt6i_node->fn_sernum = -1;
+	}
+}
+
+static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
+{
+	struct rt6_info *rt6 = (struct rt6_info*)dst;
+
+	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
+		rt6->rt6i_flags |= RTF_MODIFIED;
+		if (mtu < IPV6_MIN_MTU) {
+			mtu = IPV6_MIN_MTU;
+			dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
+		}
+		dst->metrics[RTAX_MTU-1] = mtu;
+	}
+}
+
+/* Protected by rt6_lock.  */
+static struct dst_entry *ndisc_dst_gc_list;
+static int ipv6_get_mtu(struct net_device *dev);
+
+static inline unsigned int ipv6_advmss(unsigned int mtu)
+{
+	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
+
+	if (mtu < ip6_rt_min_advmss)
+		mtu = ip6_rt_min_advmss;
+
+	/*
+	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
+	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
+	 * IPV6_MAXPLEN is also valid and means: "any MSS, 
+	 * rely only on pmtu discovery"
+	 */
+	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
+		mtu = IPV6_MAXPLEN;
+	return mtu;
+}
+
+struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
+				  struct neighbour *neigh,
+				  struct in6_addr *addr,
+				  int (*output)(struct sk_buff *))
+{
+	struct rt6_info *rt;
+	struct inet6_dev *idev = in6_dev_get(dev);
+
+	if (unlikely(idev == NULL))
+		return NULL;
+
+	rt = ip6_dst_alloc();
+	if (unlikely(rt == NULL)) {
+		in6_dev_put(idev);
+		goto out;
+	}
+
+	dev_hold(dev);
+	if (neigh)
+		neigh_hold(neigh);
+	else
+		neigh = ndisc_get_neigh(dev, addr);
+
+	rt->rt6i_dev	  = dev;
+	rt->rt6i_idev     = idev;
+	rt->rt6i_nexthop  = neigh;
+	atomic_set(&rt->u.dst.__refcnt, 1);
+	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
+	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
+	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
+	rt->u.dst.output  = output;
+
+#if 0	/* there's no chance to use these for ndisc */
+	rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 
+				? DST_HOST 
+				: 0;
+	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
+	rt->rt6i_dst.plen = 128;
+#endif
+
+	write_lock_bh(&rt6_lock);
+	rt->u.dst.next = ndisc_dst_gc_list;
+	ndisc_dst_gc_list = &rt->u.dst;
+	write_unlock_bh(&rt6_lock);
+
+	fib6_force_start_gc();
+
+out:
+	return (struct dst_entry *)rt;
+}
+
+int ndisc_dst_gc(int *more)
+{
+	struct dst_entry *dst, *next, **pprev;
+	int freed;
+
+	next = NULL;
+	pprev = &ndisc_dst_gc_list;
+	freed = 0;
+	while ((dst = *pprev) != NULL) {
+		if (!atomic_read(&dst->__refcnt)) {
+			*pprev = dst->next;
+			dst_free(dst);
+			freed++;
+		} else {
+			pprev = &dst->next;
+			(*more)++;
+		}
+	}
+
+	return freed;
+}
+
+static int ip6_dst_gc(void)
+{
+	static unsigned expire = 30*HZ;
+	static unsigned long last_gc;
+	unsigned long now = jiffies;
+
+	if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
+	    atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
+		goto out;
+
+	expire++;
+	fib6_run_gc(expire);
+	last_gc = now;
+	if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
+		expire = ip6_rt_gc_timeout>>1;
+
+out:
+	expire -= expire>>ip6_rt_gc_elasticity;
+	return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
+}
+
+/* Clean host part of a prefix. Not necessary in radix tree,
+   but results in cleaner routing tables.
+
+   Remove it only when all the things will work!
+ */
+
+static int ipv6_get_mtu(struct net_device *dev)
+{
+	int mtu = IPV6_MIN_MTU;
+	struct inet6_dev *idev;
+
+	idev = in6_dev_get(dev);
+	if (idev) {
+		mtu = idev->cnf.mtu6;
+		in6_dev_put(idev);
+	}
+	return mtu;
+}
+
+int ipv6_get_hoplimit(struct net_device *dev)
+{
+	int hoplimit = ipv6_devconf.hop_limit;
+	struct inet6_dev *idev;
+
+	idev = in6_dev_get(dev);
+	if (idev) {
+		hoplimit = idev->cnf.hop_limit;
+		in6_dev_put(idev);
+	}
+	return hoplimit;
+}
+
+/*
+ *
+ */
+
+int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
+{
+	int err;
+	struct rtmsg *r;
+	struct rtattr **rta;
+	struct rt6_info *rt = NULL;
+	struct net_device *dev = NULL;
+	struct inet6_dev *idev = NULL;
+	int addr_type;
+
+	rta = (struct rtattr **) _rtattr;
+
+	if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
+		return -EINVAL;
+#ifndef CONFIG_IPV6_SUBTREES
+	if (rtmsg->rtmsg_src_len)
+		return -EINVAL;
+#endif
+	if (rtmsg->rtmsg_ifindex) {
+		err = -ENODEV;
+		dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
+		if (!dev)
+			goto out;
+		idev = in6_dev_get(dev);
+		if (!idev)
+			goto out;
+	}
+
+	if (rtmsg->rtmsg_metric == 0)
+		rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
+
+	rt = ip6_dst_alloc();
+
+	if (rt == NULL) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	rt->u.dst.obsolete = -1;
+	rt->rt6i_expires = clock_t_to_jiffies(rtmsg->rtmsg_info);
+	if (nlh && (r = NLMSG_DATA(nlh))) {
+		rt->rt6i_protocol = r->rtm_protocol;
+	} else {
+		rt->rt6i_protocol = RTPROT_BOOT;
+	}
+
+	addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
+
+	if (addr_type & IPV6_ADDR_MULTICAST)
+		rt->u.dst.input = ip6_mc_input;
+	else
+		rt->u.dst.input = ip6_forward;
+
+	rt->u.dst.output = ip6_output;
+
+	ipv6_addr_prefix(&rt->rt6i_dst.addr, 
+			 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
+	rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
+	if (rt->rt6i_dst.plen == 128)
+	       rt->u.dst.flags = DST_HOST;
+
+#ifdef CONFIG_IPV6_SUBTREES
+	ipv6_addr_prefix(&rt->rt6i_src.addr, 
+			 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
+	rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
+#endif
+
+	rt->rt6i_metric = rtmsg->rtmsg_metric;
+
+	/* We cannot add true routes via loopback here,
+	   they would result in kernel looping; promote them to reject routes
+	 */
+	if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
+	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
+		/* hold loopback dev/idev if we haven't done so. */
+		if (dev != &loopback_dev) {
+			if (dev) {
+				dev_put(dev);
+				in6_dev_put(idev);
+			}
+			dev = &loopback_dev;
+			dev_hold(dev);
+			idev = in6_dev_get(dev);
+			if (!idev) {
+				err = -ENODEV;
+				goto out;
+			}
+		}
+		rt->u.dst.output = ip6_pkt_discard_out;
+		rt->u.dst.input = ip6_pkt_discard;
+		rt->u.dst.error = -ENETUNREACH;
+		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
+		goto install_route;
+	}
+
+	if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
+		struct in6_addr *gw_addr;
+		int gwa_type;
+
+		gw_addr = &rtmsg->rtmsg_gateway;
+		ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
+		gwa_type = ipv6_addr_type(gw_addr);
+
+		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
+			struct rt6_info *grt;
+
+			/* IPv6 strictly inhibits using not link-local
+			   addresses as nexthop address.
+			   Otherwise, router will not able to send redirects.
+			   It is very good, but in some (rare!) circumstances
+			   (SIT, PtP, NBMA NOARP links) it is handy to allow
+			   some exceptions. --ANK
+			 */
+			err = -EINVAL;
+			if (!(gwa_type&IPV6_ADDR_UNICAST))
+				goto out;
+
+			grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
+
+			err = -EHOSTUNREACH;
+			if (grt == NULL)
+				goto out;
+			if (dev) {
+				if (dev != grt->rt6i_dev) {
+					dst_release(&grt->u.dst);
+					goto out;
+				}
+			} else {
+				dev = grt->rt6i_dev;
+				idev = grt->rt6i_idev;
+				dev_hold(dev);
+				in6_dev_hold(grt->rt6i_idev);
+			}
+			if (!(grt->rt6i_flags&RTF_GATEWAY))
+				err = 0;
+			dst_release(&grt->u.dst);
+
+			if (err)
+				goto out;
+		}
+		err = -EINVAL;
+		if (dev == NULL || (dev->flags&IFF_LOOPBACK))
+			goto out;
+	}
+
+	err = -ENODEV;
+	if (dev == NULL)
+		goto out;
+
+	if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
+		rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
+		if (IS_ERR(rt->rt6i_nexthop)) {
+			err = PTR_ERR(rt->rt6i_nexthop);
+			rt->rt6i_nexthop = NULL;
+			goto out;
+		}
+	}
+
+	rt->rt6i_flags = rtmsg->rtmsg_flags;
+
+install_route:
+	if (rta && rta[RTA_METRICS-1]) {
+		int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
+		struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
+
+		while (RTA_OK(attr, attrlen)) {
+			unsigned flavor = attr->rta_type;
+			if (flavor) {
+				if (flavor > RTAX_MAX) {
+					err = -EINVAL;
+					goto out;
+				}
+				rt->u.dst.metrics[flavor-1] =
+					*(u32 *)RTA_DATA(attr);
+			}
+			attr = RTA_NEXT(attr, attrlen);
+		}
+	}
+
+	if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
+		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
+	if (!rt->u.dst.metrics[RTAX_MTU-1])
+		rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
+	if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
+		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
+	rt->u.dst.dev = dev;
+	rt->rt6i_idev = idev;
+	return ip6_ins_rt(rt, nlh, _rtattr);
+
+out:
+	if (dev)
+		dev_put(dev);
+	if (idev)
+		in6_dev_put(idev);
+	if (rt)
+		dst_free((struct dst_entry *) rt);
+	return err;
+}
+
+int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
+{
+	int err;
+
+	write_lock_bh(&rt6_lock);
+
+	rt6_reset_dflt_pointer(NULL);
+
+	err = fib6_del(rt, nlh, _rtattr);
+	dst_release(&rt->u.dst);
+
+	write_unlock_bh(&rt6_lock);
+
+	return err;
+}
+
+static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
+{
+	struct fib6_node *fn;
+	struct rt6_info *rt;
+	int err = -ESRCH;
+
+	read_lock_bh(&rt6_lock);
+
+	fn = fib6_locate(&ip6_routing_table,
+			 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
+			 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
+	
+	if (fn) {
+		for (rt = fn->leaf; rt; rt = rt->u.next) {
+			if (rtmsg->rtmsg_ifindex &&
+			    (rt->rt6i_dev == NULL ||
+			     rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
+				continue;
+			if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
+			    !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
+				continue;
+			if (rtmsg->rtmsg_metric &&
+			    rtmsg->rtmsg_metric != rt->rt6i_metric)
+				continue;
+			dst_hold(&rt->u.dst);
+			read_unlock_bh(&rt6_lock);
+
+			return ip6_del_rt(rt, nlh, _rtattr);
+		}
+	}
+	read_unlock_bh(&rt6_lock);
+
+	return err;
+}
+
+/*
+ *	Handle redirects
+ */
+void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
+		  struct neighbour *neigh, u8 *lladdr, int on_link)
+{
+	struct rt6_info *rt, *nrt;
+
+	/* Locate old route to this destination. */
+	rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
+
+	if (rt == NULL)
+		return;
+
+	if (neigh->dev != rt->rt6i_dev)
+		goto out;
+
+	/*
+	 * Current route is on-link; redirect is always invalid.
+	 * 
+	 * Seems, previous statement is not true. It could
+	 * be node, which looks for us as on-link (f.e. proxy ndisc)
+	 * But then router serving it might decide, that we should
+	 * know truth 8)8) --ANK (980726).
+	 */
+	if (!(rt->rt6i_flags&RTF_GATEWAY))
+		goto out;
+
+	/*
+	 *	RFC 2461 specifies that redirects should only be
+	 *	accepted if they come from the nexthop to the target.
+	 *	Due to the way default routers are chosen, this notion
+	 *	is a bit fuzzy and one might need to check all default
+	 *	routers.
+	 */
+	if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
+		if (rt->rt6i_flags & RTF_DEFAULT) {
+			struct rt6_info *rt1;
+
+			read_lock(&rt6_lock);
+			for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
+				if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
+					dst_hold(&rt1->u.dst);
+					dst_release(&rt->u.dst);
+					read_unlock(&rt6_lock);
+					rt = rt1;
+					goto source_ok;
+				}
+			}
+			read_unlock(&rt6_lock);
+		}
+		if (net_ratelimit())
+			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
+			       "for redirect target\n");
+		goto out;
+	}
+
+source_ok:
+
+	/*
+	 *	We have finally decided to accept it.
+	 */
+
+	neigh_update(neigh, lladdr, NUD_STALE, 
+		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
+		     NEIGH_UPDATE_F_OVERRIDE|
+		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
+				     NEIGH_UPDATE_F_ISROUTER))
+		     );
+
+	/*
+	 * Redirect received -> path was valid.
+	 * Look, redirects are sent only in response to data packets,
+	 * so that this nexthop apparently is reachable. --ANK
+	 */
+	dst_confirm(&rt->u.dst);
+
+	/* Duplicate redirect: silently ignore. */
+	if (neigh == rt->u.dst.neighbour)
+		goto out;
+
+	nrt = ip6_rt_copy(rt);
+	if (nrt == NULL)
+		goto out;
+
+	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
+	if (on_link)
+		nrt->rt6i_flags &= ~RTF_GATEWAY;
+
+	ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
+	nrt->rt6i_dst.plen = 128;
+	nrt->u.dst.flags |= DST_HOST;
+
+	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
+	nrt->rt6i_nexthop = neigh_clone(neigh);
+	/* Reset pmtu, it may be better */
+	nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
+	nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
+
+	if (ip6_ins_rt(nrt, NULL, NULL))
+		goto out;
+
+	if (rt->rt6i_flags&RTF_CACHE) {
+		ip6_del_rt(rt, NULL, NULL);
+		return;
+	}
+
+out:
+        dst_release(&rt->u.dst);
+	return;
+}
+
+/*
+ *	Handle ICMP "packet too big" messages
+ *	i.e. Path MTU discovery
+ */
+
+void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
+			struct net_device *dev, u32 pmtu)
+{
+	struct rt6_info *rt, *nrt;
+	int allfrag = 0;
+
+	rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
+	if (rt == NULL)
+		return;
+
+	if (pmtu >= dst_mtu(&rt->u.dst))
+		goto out;
+
+	if (pmtu < IPV6_MIN_MTU) {
+		/*
+		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link 
+		 * MTU (1280) and a fragment header should always be included
+		 * after a node receiving Too Big message reporting PMTU is
+		 * less than the IPv6 Minimum Link MTU.
+		 */
+		pmtu = IPV6_MIN_MTU;
+		allfrag = 1;
+	}
+
+	/* New mtu received -> path was valid.
+	   They are sent only in response to data packets,
+	   so that this nexthop apparently is reachable. --ANK
+	 */
+	dst_confirm(&rt->u.dst);
+
+	/* Host route. If it is static, it would be better
+	   not to override it, but add new one, so that
+	   when cache entry will expire old pmtu
+	   would return automatically.
+	 */
+	if (rt->rt6i_flags & RTF_CACHE) {
+		rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
+		if (allfrag)
+			rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
+		dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
+		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
+		goto out;
+	}
+
+	/* Network route.
+	   Two cases are possible:
+	   1. It is connected route. Action: COW
+	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
+	 */
+	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
+		nrt = rt6_cow(rt, daddr, saddr);
+		if (!nrt->u.dst.error) {
+			nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
+			if (allfrag)
+				nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
+			/* According to RFC 1981, detecting PMTU increase shouldn't be
+			   happened within 5 mins, the recommended timer is 10 mins.
+			   Here this route expiration time is set to ip6_rt_mtu_expires
+			   which is 10 mins. After 10 mins the decreased pmtu is expired
+			   and detecting PMTU increase will be automatically happened.
+			 */
+			dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
+			nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
+		}
+		dst_release(&nrt->u.dst);
+	} else {
+		nrt = ip6_rt_copy(rt);
+		if (nrt == NULL)
+			goto out;
+		ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
+		nrt->rt6i_dst.plen = 128;
+		nrt->u.dst.flags |= DST_HOST;
+		nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
+		dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
+		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
+		nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
+		if (allfrag)
+			nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
+		ip6_ins_rt(nrt, NULL, NULL);
+	}
+
+out:
+	dst_release(&rt->u.dst);
+}
+
+/*
+ *	Misc support functions
+ */
+
+static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
+{
+	struct rt6_info *rt = ip6_dst_alloc();
+
+	if (rt) {
+		rt->u.dst.input = ort->u.dst.input;
+		rt->u.dst.output = ort->u.dst.output;
+
+		memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
+		rt->u.dst.dev = ort->u.dst.dev;
+		if (rt->u.dst.dev)
+			dev_hold(rt->u.dst.dev);
+		rt->rt6i_idev = ort->rt6i_idev;
+		if (rt->rt6i_idev)
+			in6_dev_hold(rt->rt6i_idev);
+		rt->u.dst.lastuse = jiffies;
+		rt->rt6i_expires = 0;
+
+		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
+		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
+		rt->rt6i_metric = 0;
+
+		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
+#ifdef CONFIG_IPV6_SUBTREES
+		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
+#endif
+	}
+	return rt;
+}
+
+struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
+{	
+	struct rt6_info *rt;
+	struct fib6_node *fn;
+
+	fn = &ip6_routing_table;
+
+	write_lock_bh(&rt6_lock);
+	for (rt = fn->leaf; rt; rt=rt->u.next) {
+		if (dev == rt->rt6i_dev &&
+		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
+			break;
+	}
+	if (rt)
+		dst_hold(&rt->u.dst);
+	write_unlock_bh(&rt6_lock);
+	return rt;
+}
+
+struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
+				     struct net_device *dev)
+{
+	struct in6_rtmsg rtmsg;
+
+	memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
+	rtmsg.rtmsg_type = RTMSG_NEWROUTE;
+	ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
+	rtmsg.rtmsg_metric = 1024;
+	rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
+
+	rtmsg.rtmsg_ifindex = dev->ifindex;
+
+	ip6_route_add(&rtmsg, NULL, NULL);
+	return rt6_get_dflt_router(gwaddr, dev);
+}
+
+void rt6_purge_dflt_routers(void)
+{
+	struct rt6_info *rt;
+
+restart:
+	read_lock_bh(&rt6_lock);
+	for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
+		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
+			dst_hold(&rt->u.dst);
+
+			rt6_reset_dflt_pointer(NULL);
+
+			read_unlock_bh(&rt6_lock);
+
+			ip6_del_rt(rt, NULL, NULL);
+
+			goto restart;
+		}
+	}
+	read_unlock_bh(&rt6_lock);
+}
+
+int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
+{
+	struct in6_rtmsg rtmsg;
+	int err;
+
+	switch(cmd) {
+	case SIOCADDRT:		/* Add a route */
+	case SIOCDELRT:		/* Delete a route */
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+		err = copy_from_user(&rtmsg, arg,
+				     sizeof(struct in6_rtmsg));
+		if (err)
+			return -EFAULT;
+			
+		rtnl_lock();
+		switch (cmd) {
+		case SIOCADDRT:
+			err = ip6_route_add(&rtmsg, NULL, NULL);
+			break;
+		case SIOCDELRT:
+			err = ip6_route_del(&rtmsg, NULL, NULL);
+			break;
+		default:
+			err = -EINVAL;
+		}
+		rtnl_unlock();
+
+		return err;
+	};
+
+	return -EINVAL;
+}
+
+/*
+ *	Drop the packet on the floor
+ */
+
+int ip6_pkt_discard(struct sk_buff *skb)
+{
+	IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
+	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
+	kfree_skb(skb);
+	return 0;
+}
+
+int ip6_pkt_discard_out(struct sk_buff *skb)
+{
+	skb->dev = skb->dst->dev;
+	return ip6_pkt_discard(skb);
+}
+
+/*
+ *	Allocate a dst for local (unicast / anycast) address.
+ */
+
+struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
+				    const struct in6_addr *addr,
+				    int anycast)
+{
+	struct rt6_info *rt = ip6_dst_alloc();
+
+	if (rt == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	dev_hold(&loopback_dev);
+	in6_dev_hold(idev);
+
+	rt->u.dst.flags = DST_HOST;
+	rt->u.dst.input = ip6_input;
+	rt->u.dst.output = ip6_output;
+	rt->rt6i_dev = &loopback_dev;
+	rt->rt6i_idev = idev;
+	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
+	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
+	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
+	rt->u.dst.obsolete = -1;
+
+	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
+	if (!anycast)
+		rt->rt6i_flags |= RTF_LOCAL;
+	rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
+	if (rt->rt6i_nexthop == NULL) {
+		dst_free((struct dst_entry *) rt);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
+	rt->rt6i_dst.plen = 128;
+
+	atomic_set(&rt->u.dst.__refcnt, 1);
+
+	return rt;
+}
+
+static int fib6_ifdown(struct rt6_info *rt, void *arg)
+{
+	if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
+	    rt != &ip6_null_entry) {
+		RT6_TRACE("deleted by ifdown %p\n", rt);
+		return -1;
+	}
+	return 0;
+}
+
+void rt6_ifdown(struct net_device *dev)
+{
+	write_lock_bh(&rt6_lock);
+	fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
+	write_unlock_bh(&rt6_lock);
+}
+
+struct rt6_mtu_change_arg
+{
+	struct net_device *dev;
+	unsigned mtu;
+};
+
+static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
+{
+	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
+	struct inet6_dev *idev;
+
+	/* In IPv6 pmtu discovery is not optional,
+	   so that RTAX_MTU lock cannot disable it.
+	   We still use this lock to block changes
+	   caused by addrconf/ndisc.
+	*/
+
+	idev = __in6_dev_get(arg->dev);
+	if (idev == NULL)
+		return 0;
+
+	/* For administrative MTU increase, there is no way to discover
+	   IPv6 PMTU increase, so PMTU increase should be updated here.
+	   Since RFC 1981 doesn't include administrative MTU increase
+	   update PMTU increase is a MUST. (i.e. jumbo frame)
+	 */
+	/*
+	   If new MTU is less than route PMTU, this new MTU will be the
+	   lowest MTU in the path, update the route PMTU to reflect PMTU
+	   decreases; if new MTU is greater than route PMTU, and the
+	   old MTU is the lowest MTU in the path, update the route PMTU
+	   to reflect the increase. In this case if the other nodes' MTU
+	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
+	   PMTU discouvery.
+	 */
+	if (rt->rt6i_dev == arg->dev &&
+	    !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
+            (dst_mtu(&rt->u.dst) > arg->mtu ||
+             (dst_mtu(&rt->u.dst) < arg->mtu &&
+	      dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
+		rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
+	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
+	return 0;
+}
+
+void rt6_mtu_change(struct net_device *dev, unsigned mtu)
+{
+	struct rt6_mtu_change_arg arg;
+
+	arg.dev = dev;
+	arg.mtu = mtu;
+	read_lock_bh(&rt6_lock);
+	fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
+	read_unlock_bh(&rt6_lock);
+}
+
+static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
+			      struct in6_rtmsg *rtmsg)
+{
+	memset(rtmsg, 0, sizeof(*rtmsg));
+
+	rtmsg->rtmsg_dst_len = r->rtm_dst_len;
+	rtmsg->rtmsg_src_len = r->rtm_src_len;
+	rtmsg->rtmsg_flags = RTF_UP;
+	if (r->rtm_type == RTN_UNREACHABLE)
+		rtmsg->rtmsg_flags |= RTF_REJECT;
+
+	if (rta[RTA_GATEWAY-1]) {
+		if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
+			return -EINVAL;
+		memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
+		rtmsg->rtmsg_flags |= RTF_GATEWAY;
+	}
+	if (rta[RTA_DST-1]) {
+		if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
+			return -EINVAL;
+		memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
+	}
+	if (rta[RTA_SRC-1]) {
+		if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
+			return -EINVAL;
+		memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
+	}
+	if (rta[RTA_OIF-1]) {
+		if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
+			return -EINVAL;
+		memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
+	}
+	if (rta[RTA_PRIORITY-1]) {
+		if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
+			return -EINVAL;
+		memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
+	}
+	return 0;
+}
+
+int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+{
+	struct rtmsg *r = NLMSG_DATA(nlh);
+	struct in6_rtmsg rtmsg;
+
+	if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
+		return -EINVAL;
+	return ip6_route_del(&rtmsg, nlh, arg);
+}
+
+int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+{
+	struct rtmsg *r = NLMSG_DATA(nlh);
+	struct in6_rtmsg rtmsg;
+
+	if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
+		return -EINVAL;
+	return ip6_route_add(&rtmsg, nlh, arg);
+}
+
+struct rt6_rtnl_dump_arg
+{
+	struct sk_buff *skb;
+	struct netlink_callback *cb;
+};
+
+static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
+			 struct in6_addr *dst,
+			 struct in6_addr *src,
+			 int iif,
+			 int type, u32 pid, u32 seq,
+			 struct nlmsghdr *in_nlh, int prefix)
+{
+	struct rtmsg *rtm;
+	struct nlmsghdr  *nlh;
+	unsigned char	 *b = skb->tail;
+	struct rta_cacheinfo ci;
+
+	if (prefix) {	/* user wants prefix routes only */
+		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
+			/* success since this is not a prefix route */
+			return 1;
+		}
+	}
+
+	if (!pid && in_nlh) {
+		pid = in_nlh->nlmsg_pid;
+	}
+
+	nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
+	rtm = NLMSG_DATA(nlh);
+	rtm->rtm_family = AF_INET6;
+	rtm->rtm_dst_len = rt->rt6i_dst.plen;
+	rtm->rtm_src_len = rt->rt6i_src.plen;
+	rtm->rtm_tos = 0;
+	rtm->rtm_table = RT_TABLE_MAIN;
+	if (rt->rt6i_flags&RTF_REJECT)
+		rtm->rtm_type = RTN_UNREACHABLE;
+	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
+		rtm->rtm_type = RTN_LOCAL;
+	else
+		rtm->rtm_type = RTN_UNICAST;
+	rtm->rtm_flags = 0;
+	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
+	rtm->rtm_protocol = rt->rt6i_protocol;
+	if (rt->rt6i_flags&RTF_DYNAMIC)
+		rtm->rtm_protocol = RTPROT_REDIRECT;
+	else if (rt->rt6i_flags & RTF_ADDRCONF)
+		rtm->rtm_protocol = RTPROT_KERNEL;
+	else if (rt->rt6i_flags&RTF_DEFAULT)
+		rtm->rtm_protocol = RTPROT_RA;
+
+	if (rt->rt6i_flags&RTF_CACHE)
+		rtm->rtm_flags |= RTM_F_CLONED;
+
+	if (dst) {
+		RTA_PUT(skb, RTA_DST, 16, dst);
+	        rtm->rtm_dst_len = 128;
+	} else if (rtm->rtm_dst_len)
+		RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
+#ifdef CONFIG_IPV6_SUBTREES
+	if (src) {
+		RTA_PUT(skb, RTA_SRC, 16, src);
+	        rtm->rtm_src_len = 128;
+	} else if (rtm->rtm_src_len)
+		RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
+#endif
+	if (iif)
+		RTA_PUT(skb, RTA_IIF, 4, &iif);
+	else if (dst) {
+		struct in6_addr saddr_buf;
+		if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
+			RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
+	}
+	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
+		goto rtattr_failure;
+	if (rt->u.dst.neighbour)
+		RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
+	if (rt->u.dst.dev)
+		RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
+	RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
+	ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
+	if (rt->rt6i_expires)
+		ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
+	else
+		ci.rta_expires = 0;
+	ci.rta_used = rt->u.dst.__use;
+	ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
+	ci.rta_error = rt->u.dst.error;
+	ci.rta_id = 0;
+	ci.rta_ts = 0;
+	ci.rta_tsage = 0;
+	RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
+{
+	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
+	int prefix;
+
+	if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
+		struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
+		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
+	} else
+		prefix = 0;
+
+	return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
+		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
+		     NULL, prefix);
+}
+
+static int fib6_dump_node(struct fib6_walker_t *w)
+{
+	int res;
+	struct rt6_info *rt;
+
+	for (rt = w->leaf; rt; rt = rt->u.next) {
+		res = rt6_dump_route(rt, w->args);
+		if (res < 0) {
+			/* Frame is full, suspend walking */
+			w->leaf = rt;
+			return 1;
+		}
+		BUG_TRAP(res!=0);
+	}
+	w->leaf = NULL;
+	return 0;
+}
+
+static void fib6_dump_end(struct netlink_callback *cb)
+{
+	struct fib6_walker_t *w = (void*)cb->args[0];
+
+	if (w) {
+		cb->args[0] = 0;
+		fib6_walker_unlink(w);
+		kfree(w);
+	}
+	if (cb->args[1]) {
+		cb->done = (void*)cb->args[1];
+		cb->args[1] = 0;
+	}
+}
+
+static int fib6_dump_done(struct netlink_callback *cb)
+{
+	fib6_dump_end(cb);
+	return cb->done(cb);
+}
+
+int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct rt6_rtnl_dump_arg arg;
+	struct fib6_walker_t *w;
+	int res;
+
+	arg.skb = skb;
+	arg.cb = cb;
+
+	w = (void*)cb->args[0];
+	if (w == NULL) {
+		/* New dump:
+		 * 
+		 * 1. hook callback destructor.
+		 */
+		cb->args[1] = (long)cb->done;
+		cb->done = fib6_dump_done;
+
+		/*
+		 * 2. allocate and initialize walker.
+		 */
+		w = kmalloc(sizeof(*w), GFP_ATOMIC);
+		if (w == NULL)
+			return -ENOMEM;
+		RT6_TRACE("dump<%p", w);
+		memset(w, 0, sizeof(*w));
+		w->root = &ip6_routing_table;
+		w->func = fib6_dump_node;
+		w->args = &arg;
+		cb->args[0] = (long)w;
+		read_lock_bh(&rt6_lock);
+		res = fib6_walk(w);
+		read_unlock_bh(&rt6_lock);
+	} else {
+		w->args = &arg;
+		read_lock_bh(&rt6_lock);
+		res = fib6_walk_continue(w);
+		read_unlock_bh(&rt6_lock);
+	}
+#if RT6_DEBUG >= 3
+	if (res <= 0 && skb->len == 0)
+		RT6_TRACE("%p>dump end\n", w);
+#endif
+	res = res < 0 ? res : skb->len;
+	/* res < 0 is an error. (really, impossible)
+	   res == 0 means that dump is complete, but skb still can contain data.
+	   res > 0 dump is not complete, but frame is full.
+	 */
+	/* Destroy walker, if dump of this table is complete. */
+	if (res <= 0)
+		fib6_dump_end(cb);
+	return res;
+}
+
+int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
+{
+	struct rtattr **rta = arg;
+	int iif = 0;
+	int err = -ENOBUFS;
+	struct sk_buff *skb;
+	struct flowi fl;
+	struct rt6_info *rt;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (skb == NULL)
+		goto out;
+
+	/* Reserve room for dummy headers, this skb can pass
+	   through good chunk of routing engine.
+	 */
+	skb->mac.raw = skb->data;
+	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
+
+	memset(&fl, 0, sizeof(fl));
+	if (rta[RTA_SRC-1])
+		ipv6_addr_copy(&fl.fl6_src,
+			       (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
+	if (rta[RTA_DST-1])
+		ipv6_addr_copy(&fl.fl6_dst,
+			       (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
+
+	if (rta[RTA_IIF-1])
+		memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
+
+	if (iif) {
+		struct net_device *dev;
+		dev = __dev_get_by_index(iif);
+		if (!dev) {
+			err = -ENODEV;
+			goto out_free;
+		}
+	}
+
+	fl.oif = 0;
+	if (rta[RTA_OIF-1])
+		memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
+
+	rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
+
+	skb->dst = &rt->u.dst;
+
+	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
+	err = rt6_fill_node(skb, rt, 
+			    &fl.fl6_dst, &fl.fl6_src,
+			    iif,
+			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
+			    nlh->nlmsg_seq, nlh, 0);
+	if (err < 0) {
+		err = -EMSGSIZE;
+		goto out_free;
+	}
+
+	err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
+	if (err > 0)
+		err = 0;
+out:
+	return err;
+out_free:
+	kfree_skb(skb);
+	goto out;	
+}
+
+void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh)
+{
+	struct sk_buff *skb;
+	int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
+
+	skb = alloc_skb(size, gfp_any());
+	if (!skb) {
+		netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
+		return;
+	}
+	if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0) < 0) {
+		kfree_skb(skb);
+		netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
+		return;
+	}
+	NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
+	netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any());
+}
+
+/*
+ *	/proc
+ */
+
+#ifdef CONFIG_PROC_FS
+
+#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
+
+struct rt6_proc_arg
+{
+	char *buffer;
+	int offset;
+	int length;
+	int skip;
+	int len;
+};
+
+static int rt6_info_route(struct rt6_info *rt, void *p_arg)
+{
+	struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
+	int i;
+
+	if (arg->skip < arg->offset / RT6_INFO_LEN) {
+		arg->skip++;
+		return 0;
+	}
+
+	if (arg->len >= arg->length)
+		return 0;
+
+	for (i=0; i<16; i++) {
+		sprintf(arg->buffer + arg->len, "%02x",
+			rt->rt6i_dst.addr.s6_addr[i]);
+		arg->len += 2;
+	}
+	arg->len += sprintf(arg->buffer + arg->len, " %02x ",
+			    rt->rt6i_dst.plen);
+
+#ifdef CONFIG_IPV6_SUBTREES
+	for (i=0; i<16; i++) {
+		sprintf(arg->buffer + arg->len, "%02x",
+			rt->rt6i_src.addr.s6_addr[i]);
+		arg->len += 2;
+	}
+	arg->len += sprintf(arg->buffer + arg->len, " %02x ",
+			    rt->rt6i_src.plen);
+#else
+	sprintf(arg->buffer + arg->len,
+		"00000000000000000000000000000000 00 ");
+	arg->len += 36;
+#endif
+
+	if (rt->rt6i_nexthop) {
+		for (i=0; i<16; i++) {
+			sprintf(arg->buffer + arg->len, "%02x",
+				rt->rt6i_nexthop->primary_key[i]);
+			arg->len += 2;
+		}
+	} else {
+		sprintf(arg->buffer + arg->len,
+			"00000000000000000000000000000000");
+		arg->len += 32;
+	}
+	arg->len += sprintf(arg->buffer + arg->len,
+			    " %08x %08x %08x %08x %8s\n",
+			    rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
+			    rt->u.dst.__use, rt->rt6i_flags, 
+			    rt->rt6i_dev ? rt->rt6i_dev->name : "");
+	return 0;
+}
+
+static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
+{
+	struct rt6_proc_arg arg;
+	arg.buffer = buffer;
+	arg.offset = offset;
+	arg.length = length;
+	arg.skip = 0;
+	arg.len = 0;
+
+	read_lock_bh(&rt6_lock);
+	fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
+	read_unlock_bh(&rt6_lock);
+
+	*start = buffer;
+	if (offset)
+		*start += offset % RT6_INFO_LEN;
+
+	arg.len -= offset % RT6_INFO_LEN;
+
+	if (arg.len > length)
+		arg.len = length;
+	if (arg.len < 0)
+		arg.len = 0;
+
+	return arg.len;
+}
+
+extern struct rt6_statistics rt6_stats;
+
+static int rt6_stats_seq_show(struct seq_file *seq, void *v)
+{
+	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
+		      rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
+		      rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
+		      rt6_stats.fib_rt_cache,
+		      atomic_read(&ip6_dst_ops.entries),
+		      rt6_stats.fib_discarded_routes);
+
+	return 0;
+}
+
+static int rt6_stats_seq_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, rt6_stats_seq_show, NULL);
+}
+
+static struct file_operations rt6_stats_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open	 = rt6_stats_seq_open,
+	.read	 = seq_read,
+	.llseek	 = seq_lseek,
+	.release = single_release,
+};
+#endif	/* CONFIG_PROC_FS */
+
+#ifdef CONFIG_SYSCTL
+
+static int flush_delay;
+
+static
+int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
+			      void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	if (write) {
+		proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+		fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
+		return 0;
+	} else
+		return -EINVAL;
+}
+
+ctl_table ipv6_route_table[] = {
+        {
+		.ctl_name	=	NET_IPV6_ROUTE_FLUSH, 
+		.procname	=	"flush",
+         	.data		=	&flush_delay,
+		.maxlen		=	sizeof(int),
+		.mode		=	0644,
+         	.proc_handler	=	&ipv6_sysctl_rtcache_flush
+	},
+	{
+		.ctl_name	=	NET_IPV6_ROUTE_GC_THRESH,
+		.procname	=	"gc_thresh",
+         	.data		=	&ip6_dst_ops.gc_thresh,
+		.maxlen		=	sizeof(int),
+		.mode		=	0644,
+         	.proc_handler	=	&proc_dointvec,
+	},
+	{
+		.ctl_name	=	NET_IPV6_ROUTE_MAX_SIZE,
+		.procname	=	"max_size",
+         	.data		=	&ip6_rt_max_size,
+		.maxlen		=	sizeof(int),
+		.mode		=	0644,
+         	.proc_handler	=	&proc_dointvec,
+	},
+	{
+		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL,
+		.procname	=	"gc_min_interval",
+         	.data		=	&ip6_rt_gc_min_interval,
+		.maxlen		=	sizeof(int),
+		.mode		=	0644,
+         	.proc_handler	=	&proc_dointvec_jiffies,
+		.strategy	=	&sysctl_jiffies,
+	},
+	{
+		.ctl_name	=	NET_IPV6_ROUTE_GC_TIMEOUT,
+		.procname	=	"gc_timeout",
+         	.data		=	&ip6_rt_gc_timeout,
+		.maxlen		=	sizeof(int),
+		.mode		=	0644,
+         	.proc_handler	=	&proc_dointvec_jiffies,
+		.strategy	=	&sysctl_jiffies,
+	},
+	{
+		.ctl_name	=	NET_IPV6_ROUTE_GC_INTERVAL,
+		.procname	=	"gc_interval",
+         	.data		=	&ip6_rt_gc_interval,
+		.maxlen		=	sizeof(int),
+		.mode		=	0644,
+         	.proc_handler	=	&proc_dointvec_jiffies,
+		.strategy	=	&sysctl_jiffies,
+	},
+	{
+		.ctl_name	=	NET_IPV6_ROUTE_GC_ELASTICITY,
+		.procname	=	"gc_elasticity",
+         	.data		=	&ip6_rt_gc_elasticity,
+		.maxlen		=	sizeof(int),
+		.mode		=	0644,
+         	.proc_handler	=	&proc_dointvec_jiffies,
+		.strategy	=	&sysctl_jiffies,
+	},
+	{
+		.ctl_name	=	NET_IPV6_ROUTE_MTU_EXPIRES,
+		.procname	=	"mtu_expires",
+         	.data		=	&ip6_rt_mtu_expires,
+		.maxlen		=	sizeof(int),
+		.mode		=	0644,
+         	.proc_handler	=	&proc_dointvec_jiffies,
+		.strategy	=	&sysctl_jiffies,
+	},
+	{
+		.ctl_name	=	NET_IPV6_ROUTE_MIN_ADVMSS,
+		.procname	=	"min_adv_mss",
+         	.data		=	&ip6_rt_min_advmss,
+		.maxlen		=	sizeof(int),
+		.mode		=	0644,
+         	.proc_handler	=	&proc_dointvec_jiffies,
+		.strategy	=	&sysctl_jiffies,
+	},
+	{
+		.ctl_name	=	NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
+		.procname	=	"gc_min_interval_ms",
+         	.data		=	&ip6_rt_gc_min_interval,
+		.maxlen		=	sizeof(int),
+		.mode		=	0644,
+         	.proc_handler	=	&proc_dointvec_ms_jiffies,
+		.strategy	=	&sysctl_ms_jiffies,
+	},
+	{ .ctl_name = 0 }
+};
+
+#endif
+
+void __init ip6_route_init(void)
+{
+	struct proc_dir_entry *p;
+
+	ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
+						     sizeof(struct rt6_info),
+						     0, SLAB_HWCACHE_ALIGN,
+						     NULL, NULL);
+	if (!ip6_dst_ops.kmem_cachep)
+		panic("cannot create ip6_dst_cache");
+
+	fib6_init();
+#ifdef 	CONFIG_PROC_FS
+	p = proc_net_create("ipv6_route", 0, rt6_proc_info);
+	if (p)
+		p->owner = THIS_MODULE;
+
+	proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
+#endif
+#ifdef CONFIG_XFRM
+	xfrm6_init();
+#endif
+}
+
+void ip6_route_cleanup(void)
+{
+#ifdef CONFIG_PROC_FS
+	proc_net_remove("ipv6_route");
+	proc_net_remove("rt6_stats");
+#endif
+#ifdef CONFIG_XFRM
+	xfrm6_fini();
+#endif
+	rt6_ifdown(NULL);
+	fib6_gc_cleanup();
+	kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
+}
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
new file mode 100644
index 00000000000..b788f55e139
--- /dev/null
+++ b/net/ipv6/sit.c
@@ -0,0 +1,833 @@
+/*
+ *	IPv6 over IPv4 tunnel device - Simple Internet Transition (SIT)
+ *	Linux INET6 implementation
+ *
+ *	Authors:
+ *	Pedro Roque		<roque@di.fc.ul.pt>	
+ *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
+ *
+ *	$Id: sit.c,v 1.53 2001/09/25 05:09:53 davem Exp $
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ *
+ *	Changes:
+ * Roger Venning <r.venning@telstra.com>:	6to4 support
+ * Nate Thompson <nate@thebog.net>:		6to4 support
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/sched.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/icmp.h>
+#include <asm/uaccess.h>
+#include <linux/init.h>
+#include <linux/netfilter_ipv4.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/transp_v6.h>
+#include <net/ip6_fib.h>
+#include <net/ip6_route.h>
+#include <net/ndisc.h>
+#include <net/addrconf.h>
+#include <net/ip.h>
+#include <net/udp.h>
+#include <net/icmp.h>
+#include <net/ipip.h>
+#include <net/inet_ecn.h>
+#include <net/xfrm.h>
+#include <net/dsfield.h>
+
+/*
+   This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c
+
+   For comments look at net/ipv4/ip_gre.c --ANK
+ */
+
+#define HASH_SIZE  16
+#define HASH(addr) ((addr^(addr>>4))&0xF)
+
+static int ipip6_fb_tunnel_init(struct net_device *dev);
+static int ipip6_tunnel_init(struct net_device *dev);
+static void ipip6_tunnel_setup(struct net_device *dev);
+
+static struct net_device *ipip6_fb_tunnel_dev;
+
+static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
+static struct ip_tunnel *tunnels_r[HASH_SIZE];
+static struct ip_tunnel *tunnels_l[HASH_SIZE];
+static struct ip_tunnel *tunnels_wc[1];
+static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
+
+static DEFINE_RWLOCK(ipip6_lock);
+
+static struct ip_tunnel * ipip6_tunnel_lookup(u32 remote, u32 local)
+{
+	unsigned h0 = HASH(remote);
+	unsigned h1 = HASH(local);
+	struct ip_tunnel *t;
+
+	for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
+		if (local == t->parms.iph.saddr &&
+		    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
+			return t;
+	}
+	for (t = tunnels_r[h0]; t; t = t->next) {
+		if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
+			return t;
+	}
+	for (t = tunnels_l[h1]; t; t = t->next) {
+		if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
+			return t;
+	}
+	if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
+		return t;
+	return NULL;
+}
+
+static struct ip_tunnel ** ipip6_bucket(struct ip_tunnel *t)
+{
+	u32 remote = t->parms.iph.daddr;
+	u32 local = t->parms.iph.saddr;
+	unsigned h = 0;
+	int prio = 0;
+
+	if (remote) {
+		prio |= 2;
+		h ^= HASH(remote);
+	}
+	if (local) {
+		prio |= 1;
+		h ^= HASH(local);
+	}
+	return &tunnels[prio][h];
+}
+
+static void ipip6_tunnel_unlink(struct ip_tunnel *t)
+{
+	struct ip_tunnel **tp;
+
+	for (tp = ipip6_bucket(t); *tp; tp = &(*tp)->next) {
+		if (t == *tp) {
+			write_lock_bh(&ipip6_lock);
+			*tp = t->next;
+			write_unlock_bh(&ipip6_lock);
+			break;
+		}
+	}
+}
+
+static void ipip6_tunnel_link(struct ip_tunnel *t)
+{
+	struct ip_tunnel **tp = ipip6_bucket(t);
+
+	t->next = *tp;
+	write_lock_bh(&ipip6_lock);
+	*tp = t;
+	write_unlock_bh(&ipip6_lock);
+}
+
+static struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int create)
+{
+	u32 remote = parms->iph.daddr;
+	u32 local = parms->iph.saddr;
+	struct ip_tunnel *t, **tp, *nt;
+	struct net_device *dev;
+	unsigned h = 0;
+	int prio = 0;
+	char name[IFNAMSIZ];
+
+	if (remote) {
+		prio |= 2;
+		h ^= HASH(remote);
+	}
+	if (local) {
+		prio |= 1;
+		h ^= HASH(local);
+	}
+	for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
+		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
+			return t;
+	}
+	if (!create)
+		goto failed;
+
+	if (parms->name[0])
+		strlcpy(name, parms->name, IFNAMSIZ);
+	else {
+		int i;
+		for (i=1; i<100; i++) {
+			sprintf(name, "sit%d", i);
+			if (__dev_get_by_name(name) == NULL)
+				break;
+		}
+		if (i==100)
+			goto failed;
+	}
+
+	dev = alloc_netdev(sizeof(*t), name, ipip6_tunnel_setup);
+	if (dev == NULL)
+		return NULL;
+
+	nt = dev->priv;
+	dev->init = ipip6_tunnel_init;
+	nt->parms = *parms;
+
+	if (register_netdevice(dev) < 0) {
+		free_netdev(dev);
+		goto failed;
+	}
+
+	dev_hold(dev);
+
+	ipip6_tunnel_link(nt);
+	/* Do not decrement MOD_USE_COUNT here. */
+	return nt;
+
+failed:
+	return NULL;
+}
+
+static void ipip6_tunnel_uninit(struct net_device *dev)
+{
+	if (dev == ipip6_fb_tunnel_dev) {
+		write_lock_bh(&ipip6_lock);
+		tunnels_wc[0] = NULL;
+		write_unlock_bh(&ipip6_lock);
+		dev_put(dev);
+	} else {
+		ipip6_tunnel_unlink((struct ip_tunnel*)dev->priv);
+		dev_put(dev);
+	}
+}
+
+
+static void ipip6_err(struct sk_buff *skb, u32 info)
+{
+#ifndef I_WISH_WORLD_WERE_PERFECT
+
+/* It is not :-( All the routers (except for Linux) return only
+   8 bytes of packet payload. It means, that precise relaying of
+   ICMP in the real Internet is absolutely infeasible.
+ */
+	struct iphdr *iph = (struct iphdr*)skb->data;
+	int type = skb->h.icmph->type;
+	int code = skb->h.icmph->code;
+	struct ip_tunnel *t;
+
+	switch (type) {
+	default:
+	case ICMP_PARAMETERPROB:
+		return;
+
+	case ICMP_DEST_UNREACH:
+		switch (code) {
+		case ICMP_SR_FAILED:
+		case ICMP_PORT_UNREACH:
+			/* Impossible event. */
+			return;
+		case ICMP_FRAG_NEEDED:
+			/* Soft state for pmtu is maintained by IP core. */
+			return;
+		default:
+			/* All others are translated to HOST_UNREACH.
+			   rfc2003 contains "deep thoughts" about NET_UNREACH,
+			   I believe they are just ether pollution. --ANK
+			 */
+			break;
+		}
+		break;
+	case ICMP_TIME_EXCEEDED:
+		if (code != ICMP_EXC_TTL)
+			return;
+		break;
+	}
+
+	read_lock(&ipip6_lock);
+	t = ipip6_tunnel_lookup(iph->daddr, iph->saddr);
+	if (t == NULL || t->parms.iph.daddr == 0)
+		goto out;
+	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
+		goto out;
+
+	if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
+		t->err_count++;
+	else
+		t->err_count = 1;
+	t->err_time = jiffies;
+out:
+	read_unlock(&ipip6_lock);
+	return;
+#else
+	struct iphdr *iph = (struct iphdr*)dp;
+	int hlen = iph->ihl<<2;
+	struct ipv6hdr *iph6;
+	int type = skb->h.icmph->type;
+	int code = skb->h.icmph->code;
+	int rel_type = 0;
+	int rel_code = 0;
+	int rel_info = 0;
+	struct sk_buff *skb2;
+	struct rt6_info *rt6i;
+
+	if (len < hlen + sizeof(struct ipv6hdr))
+		return;
+	iph6 = (struct ipv6hdr*)(dp + hlen);
+
+	switch (type) {
+	default:
+		return;
+	case ICMP_PARAMETERPROB:
+		if (skb->h.icmph->un.gateway < hlen)
+			return;
+
+		/* So... This guy found something strange INSIDE encapsulated
+		   packet. Well, he is fool, but what can we do ?
+		 */
+		rel_type = ICMPV6_PARAMPROB;
+		rel_info = skb->h.icmph->un.gateway - hlen;
+		break;
+
+	case ICMP_DEST_UNREACH:
+		switch (code) {
+		case ICMP_SR_FAILED:
+		case ICMP_PORT_UNREACH:
+			/* Impossible event. */
+			return;
+		case ICMP_FRAG_NEEDED:
+			/* Too complicated case ... */
+			return;
+		default:
+			/* All others are translated to HOST_UNREACH.
+			   rfc2003 contains "deep thoughts" about NET_UNREACH,
+			   I believe, it is just ether pollution. --ANK
+			 */
+			rel_type = ICMPV6_DEST_UNREACH;
+			rel_code = ICMPV6_ADDR_UNREACH;
+			break;
+		}
+		break;
+	case ICMP_TIME_EXCEEDED:
+		if (code != ICMP_EXC_TTL)
+			return;
+		rel_type = ICMPV6_TIME_EXCEED;
+		rel_code = ICMPV6_EXC_HOPLIMIT;
+		break;
+	}
+
+	/* Prepare fake skb to feed it to icmpv6_send */
+	skb2 = skb_clone(skb, GFP_ATOMIC);
+	if (skb2 == NULL)
+		return;
+	dst_release(skb2->dst);
+	skb2->dst = NULL;
+	skb_pull(skb2, skb->data - (u8*)iph6);
+	skb2->nh.raw = skb2->data;
+
+	/* Try to guess incoming interface */
+	rt6i = rt6_lookup(&iph6->saddr, NULL, NULL, 0);
+	if (rt6i && rt6i->rt6i_dev) {
+		skb2->dev = rt6i->rt6i_dev;
+
+		rt6i = rt6_lookup(&iph6->daddr, &iph6->saddr, NULL, 0);
+
+		if (rt6i && rt6i->rt6i_dev && rt6i->rt6i_dev->type == ARPHRD_SIT) {
+			struct ip_tunnel * t = (struct ip_tunnel*)rt6i->rt6i_dev->priv;
+			if (rel_type == ICMPV6_TIME_EXCEED && t->parms.iph.ttl) {
+				rel_type = ICMPV6_DEST_UNREACH;
+				rel_code = ICMPV6_ADDR_UNREACH;
+			}
+			icmpv6_send(skb2, rel_type, rel_code, rel_info, skb2->dev);
+		}
+	}
+	kfree_skb(skb2);
+	return;
+#endif
+}
+
+static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
+{
+	if (INET_ECN_is_ce(iph->tos))
+		IP6_ECN_set_ce(skb->nh.ipv6h);
+}
+
+static int ipip6_rcv(struct sk_buff *skb)
+{
+	struct iphdr *iph;
+	struct ip_tunnel *tunnel;
+
+	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+		goto out;
+
+	iph = skb->nh.iph;
+
+	read_lock(&ipip6_lock);
+	if ((tunnel = ipip6_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
+		secpath_reset(skb);
+		skb->mac.raw = skb->nh.raw;
+		skb->nh.raw = skb->data;
+		memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
+		skb->protocol = htons(ETH_P_IPV6);
+		skb->pkt_type = PACKET_HOST;
+		tunnel->stat.rx_packets++;
+		tunnel->stat.rx_bytes += skb->len;
+		skb->dev = tunnel->dev;
+		dst_release(skb->dst);
+		skb->dst = NULL;
+		nf_reset(skb);
+		ipip6_ecn_decapsulate(iph, skb);
+		netif_rx(skb);
+		read_unlock(&ipip6_lock);
+		return 0;
+	}
+
+	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0);
+	kfree_skb(skb);
+	read_unlock(&ipip6_lock);
+out:
+	return 0;
+}
+
+/* Returns the embedded IPv4 address if the IPv6 address
+   comes from 6to4 (RFC 3056) addr space */
+
+static inline u32 try_6to4(struct in6_addr *v6dst)
+{
+	u32 dst = 0;
+
+	if (v6dst->s6_addr16[0] == htons(0x2002)) {
+	        /* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */
+		memcpy(&dst, &v6dst->s6_addr16[1], 4);
+	}
+	return dst;
+}
+
+/*
+ *	This function assumes it is being called from dev_queue_xmit()
+ *	and that skb is filled properly by that function.
+ */
+
+static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
+	struct net_device_stats *stats = &tunnel->stat;
+	struct iphdr  *tiph = &tunnel->parms.iph;
+	struct ipv6hdr *iph6 = skb->nh.ipv6h;
+	u8     tos = tunnel->parms.iph.tos;
+	struct rtable *rt;     			/* Route to the other host */
+	struct net_device *tdev;			/* Device to other host */
+	struct iphdr  *iph;			/* Our new IP header */
+	int    max_headroom;			/* The extra header space needed */
+	u32    dst = tiph->daddr;
+	int    mtu;
+	struct in6_addr *addr6;	
+	int addr_type;
+
+	if (tunnel->recursion++) {
+		tunnel->stat.collisions++;
+		goto tx_error;
+	}
+
+	if (skb->protocol != htons(ETH_P_IPV6))
+		goto tx_error;
+
+	if (!dst)
+		dst = try_6to4(&iph6->daddr);
+
+	if (!dst) {
+		struct neighbour *neigh = NULL;
+
+		if (skb->dst)
+			neigh = skb->dst->neighbour;
+
+		if (neigh == NULL) {
+			if (net_ratelimit())
+				printk(KERN_DEBUG "sit: nexthop == NULL\n");
+			goto tx_error;
+		}
+
+		addr6 = (struct in6_addr*)&neigh->primary_key;
+		addr_type = ipv6_addr_type(addr6);
+
+		if (addr_type == IPV6_ADDR_ANY) {
+			addr6 = &skb->nh.ipv6h->daddr;
+			addr_type = ipv6_addr_type(addr6);
+		}
+
+		if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
+			goto tx_error_icmp;
+
+		dst = addr6->s6_addr32[3];
+	}
+
+	{
+		struct flowi fl = { .nl_u = { .ip4_u =
+					      { .daddr = dst,
+						.saddr = tiph->saddr,
+						.tos = RT_TOS(tos) } },
+				    .oif = tunnel->parms.link,
+				    .proto = IPPROTO_IPV6 };
+		if (ip_route_output_key(&rt, &fl)) {
+			tunnel->stat.tx_carrier_errors++;
+			goto tx_error_icmp;
+		}
+	}
+	if (rt->rt_type != RTN_UNICAST) {
+		ip_rt_put(rt);
+		tunnel->stat.tx_carrier_errors++;
+		goto tx_error_icmp;
+	}
+	tdev = rt->u.dst.dev;
+
+	if (tdev == dev) {
+		ip_rt_put(rt);
+		tunnel->stat.collisions++;
+		goto tx_error;
+	}
+
+	if (tiph->frag_off)
+		mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
+	else
+		mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
+
+	if (mtu < 68) {
+		tunnel->stat.collisions++;
+		ip_rt_put(rt);
+		goto tx_error;
+	}
+	if (mtu < IPV6_MIN_MTU)
+		mtu = IPV6_MIN_MTU;
+	if (tunnel->parms.iph.daddr && skb->dst)
+		skb->dst->ops->update_pmtu(skb->dst, mtu);
+
+	if (skb->len > mtu) {
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
+		ip_rt_put(rt);
+		goto tx_error;
+	}
+
+	if (tunnel->err_count > 0) {
+		if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
+			tunnel->err_count--;
+			dst_link_failure(skb);
+		} else
+			tunnel->err_count = 0;
+	}
+
+	/*
+	 * Okay, now see if we can stuff it in the buffer as-is.
+	 */
+	max_headroom = LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr);
+
+	if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
+		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
+		if (!new_skb) {
+			ip_rt_put(rt);
+  			stats->tx_dropped++;
+			dev_kfree_skb(skb);
+			tunnel->recursion--;
+			return 0;
+		}
+		if (skb->sk)
+			skb_set_owner_w(new_skb, skb->sk);
+		dev_kfree_skb(skb);
+		skb = new_skb;
+		iph6 = skb->nh.ipv6h;
+	}
+
+	skb->h.raw = skb->nh.raw;
+	skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+	dst_release(skb->dst);
+	skb->dst = &rt->u.dst;
+
+	/*
+	 *	Push down and install the IPIP header.
+	 */
+
+	iph 			=	skb->nh.iph;
+	iph->version		=	4;
+	iph->ihl		=	sizeof(struct iphdr)>>2;
+	if (mtu > IPV6_MIN_MTU)
+		iph->frag_off	=	htons(IP_DF);
+	else
+		iph->frag_off	=	0;
+
+	iph->protocol		=	IPPROTO_IPV6;
+	iph->tos		=	INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6));
+	iph->daddr		=	rt->rt_dst;
+	iph->saddr		=	rt->rt_src;
+
+	if ((iph->ttl = tiph->ttl) == 0)
+		iph->ttl	=	iph6->hop_limit;
+
+	nf_reset(skb);
+
+	IPTUNNEL_XMIT();
+	tunnel->recursion--;
+	return 0;
+
+tx_error_icmp:
+	dst_link_failure(skb);
+tx_error:
+	stats->tx_errors++;
+	dev_kfree_skb(skb);
+	tunnel->recursion--;
+	return 0;
+}
+
+static int
+ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+	int err = 0;
+	struct ip_tunnel_parm p;
+	struct ip_tunnel *t;
+
+	switch (cmd) {
+	case SIOCGETTUNNEL:
+		t = NULL;
+		if (dev == ipip6_fb_tunnel_dev) {
+			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
+				err = -EFAULT;
+				break;
+			}
+			t = ipip6_tunnel_locate(&p, 0);
+		}
+		if (t == NULL)
+			t = (struct ip_tunnel*)dev->priv;
+		memcpy(&p, &t->parms, sizeof(p));
+		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+			err = -EFAULT;
+		break;
+
+	case SIOCADDTUNNEL:
+	case SIOCCHGTUNNEL:
+		err = -EPERM;
+		if (!capable(CAP_NET_ADMIN))
+			goto done;
+
+		err = -EFAULT;
+		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+			goto done;
+
+		err = -EINVAL;
+		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPV6 ||
+		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
+			goto done;
+		if (p.iph.ttl)
+			p.iph.frag_off |= htons(IP_DF);
+
+		t = ipip6_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
+
+		if (dev != ipip6_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
+			if (t != NULL) {
+				if (t->dev != dev) {
+					err = -EEXIST;
+					break;
+				}
+			} else {
+				if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
+				    (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
+					err = -EINVAL;
+					break;
+				}
+				t = (struct ip_tunnel*)dev->priv;
+				ipip6_tunnel_unlink(t);
+				t->parms.iph.saddr = p.iph.saddr;
+				t->parms.iph.daddr = p.iph.daddr;
+				memcpy(dev->dev_addr, &p.iph.saddr, 4);
+				memcpy(dev->broadcast, &p.iph.daddr, 4);
+				ipip6_tunnel_link(t);
+				netdev_state_change(dev);
+			}
+		}
+
+		if (t) {
+			err = 0;
+			if (cmd == SIOCCHGTUNNEL) {
+				t->parms.iph.ttl = p.iph.ttl;
+				t->parms.iph.tos = p.iph.tos;
+			}
+			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
+				err = -EFAULT;
+		} else
+			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
+		break;
+
+	case SIOCDELTUNNEL:
+		err = -EPERM;
+		if (!capable(CAP_NET_ADMIN))
+			goto done;
+
+		if (dev == ipip6_fb_tunnel_dev) {
+			err = -EFAULT;
+			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+				goto done;
+			err = -ENOENT;
+			if ((t = ipip6_tunnel_locate(&p, 0)) == NULL)
+				goto done;
+			err = -EPERM;
+			if (t == ipip6_fb_tunnel_dev->priv)
+				goto done;
+			dev = t->dev;
+		}
+		err = unregister_netdevice(dev);
+		break;
+
+	default:
+		err = -EINVAL;
+	}
+
+done:
+	return err;
+}
+
+static struct net_device_stats *ipip6_tunnel_get_stats(struct net_device *dev)
+{
+	return &(((struct ip_tunnel*)dev->priv)->stat);
+}
+
+static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu)
+{
+	if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - sizeof(struct iphdr))
+		return -EINVAL;
+	dev->mtu = new_mtu;
+	return 0;
+}
+
+static void ipip6_tunnel_setup(struct net_device *dev)
+{
+	SET_MODULE_OWNER(dev);
+	dev->uninit		= ipip6_tunnel_uninit;
+	dev->destructor 	= free_netdev;
+	dev->hard_start_xmit	= ipip6_tunnel_xmit;
+	dev->get_stats		= ipip6_tunnel_get_stats;
+	dev->do_ioctl		= ipip6_tunnel_ioctl;
+	dev->change_mtu		= ipip6_tunnel_change_mtu;
+
+	dev->type		= ARPHRD_SIT;
+	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr);
+	dev->mtu		= 1500 - sizeof(struct iphdr);
+	dev->flags		= IFF_NOARP;
+	dev->iflink		= 0;
+	dev->addr_len		= 4;
+}
+
+static int ipip6_tunnel_init(struct net_device *dev)
+{
+	struct net_device *tdev = NULL;
+	struct ip_tunnel *tunnel;
+	struct iphdr *iph;
+
+	tunnel = (struct ip_tunnel*)dev->priv;
+	iph = &tunnel->parms.iph;
+
+	tunnel->dev = dev;
+	strcpy(tunnel->parms.name, dev->name);
+
+	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
+	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
+
+	if (iph->daddr) {
+		struct flowi fl = { .nl_u = { .ip4_u =
+					      { .daddr = iph->daddr,
+						.saddr = iph->saddr,
+						.tos = RT_TOS(iph->tos) } },
+				    .oif = tunnel->parms.link,
+				    .proto = IPPROTO_IPV6 };
+		struct rtable *rt;
+		if (!ip_route_output_key(&rt, &fl)) {
+			tdev = rt->u.dst.dev;
+			ip_rt_put(rt);
+		}
+		dev->flags |= IFF_POINTOPOINT;
+	}
+
+	if (!tdev && tunnel->parms.link)
+		tdev = __dev_get_by_index(tunnel->parms.link);
+
+	if (tdev) {
+		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
+		dev->mtu = tdev->mtu - sizeof(struct iphdr);
+		if (dev->mtu < IPV6_MIN_MTU)
+			dev->mtu = IPV6_MIN_MTU;
+	}
+	dev->iflink = tunnel->parms.link;
+
+	return 0;
+}
+
+int __init ipip6_fb_tunnel_init(struct net_device *dev)
+{
+	struct ip_tunnel *tunnel = dev->priv;
+	struct iphdr *iph = &tunnel->parms.iph;
+
+	tunnel->dev = dev;
+	strcpy(tunnel->parms.name, dev->name);
+
+	iph->version		= 4;
+	iph->protocol		= IPPROTO_IPV6;
+	iph->ihl		= 5;
+	iph->ttl		= 64;
+
+	dev_hold(dev);
+	tunnels_wc[0]		= tunnel;
+	return 0;
+}
+
+static struct net_protocol sit_protocol = {
+	.handler	=	ipip6_rcv,
+	.err_handler	=	ipip6_err,
+};
+
+void __exit sit_cleanup(void)
+{
+	inet_del_protocol(&sit_protocol, IPPROTO_IPV6);
+	unregister_netdev(ipip6_fb_tunnel_dev);
+}
+
+int __init sit_init(void)
+{
+	int err;
+
+	printk(KERN_INFO "IPv6 over IPv4 tunneling driver\n");
+
+	if (inet_add_protocol(&sit_protocol, IPPROTO_IPV6) < 0) {
+		printk(KERN_INFO "sit init: Can't add protocol\n");
+		return -EAGAIN;
+	}
+
+	ipip6_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0", 
+					   ipip6_tunnel_setup);
+	if (!ipip6_fb_tunnel_dev) {
+		err = -ENOMEM;
+		goto err1;
+	}
+
+	ipip6_fb_tunnel_dev->init = ipip6_fb_tunnel_init;
+
+	if ((err =  register_netdev(ipip6_fb_tunnel_dev)))
+		goto err2;
+
+ out:
+	return err;
+ err2:
+	free_netdev(ipip6_fb_tunnel_dev);
+ err1:
+	inet_del_protocol(&sit_protocol, IPPROTO_IPV6);
+	goto out;
+}
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
new file mode 100644
index 00000000000..3a18e0e6ffe
--- /dev/null
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -0,0 +1,125 @@
+/*
+ * sysctl_net_ipv6.c: sysctl interface to net IPV6 subsystem.
+ *
+ * Changes:
+ * YOSHIFUJI Hideaki @USAGI:	added icmp sysctl table.
+ */
+
+#include <linux/mm.h>
+#include <linux/sysctl.h>
+#include <linux/config.h>
+#include <linux/in6.h>
+#include <linux/ipv6.h>
+#include <net/ndisc.h>
+#include <net/ipv6.h>
+#include <net/addrconf.h>
+
+extern ctl_table ipv6_route_table[];
+extern ctl_table ipv6_icmp_table[];
+
+#ifdef CONFIG_SYSCTL
+
+static ctl_table ipv6_table[] = {
+	{
+		.ctl_name	= NET_IPV6_ROUTE,
+		.procname	= "route",
+		.maxlen		= 0,
+		.mode		= 0555,
+		.child		= ipv6_route_table
+	},
+	{
+		.ctl_name	= NET_IPV6_ICMP,
+		.procname	= "icmp",
+		.maxlen		= 0,
+		.mode		= 0555,
+		.child		= ipv6_icmp_table
+	},
+	{
+		.ctl_name	= NET_IPV6_BINDV6ONLY,
+		.procname	= "bindv6only",
+		.data		= &sysctl_ipv6_bindv6only,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_IPV6_IP6FRAG_HIGH_THRESH,
+		.procname	= "ip6frag_high_thresh",
+		.data		= &sysctl_ip6frag_high_thresh,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_IPV6_IP6FRAG_LOW_THRESH,
+		.procname	= "ip6frag_low_thresh",
+		.data		= &sysctl_ip6frag_low_thresh,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_IPV6_IP6FRAG_TIME,
+		.procname	= "ip6frag_time",
+		.data		= &sysctl_ip6frag_time,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+		.strategy	= &sysctl_jiffies,
+	},
+	{
+		.ctl_name	= NET_IPV6_IP6FRAG_SECRET_INTERVAL,
+		.procname	= "ip6frag_secret_interval",
+		.data		= &sysctl_ip6frag_secret_interval,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+		.strategy	= &sysctl_jiffies
+	},
+	{
+		.ctl_name	= NET_IPV6_MLD_MAX_MSF,
+		.procname	= "mld_max_msf",
+		.data		= &sysctl_mld_max_msf,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{ .ctl_name = 0 }
+};
+
+static struct ctl_table_header *ipv6_sysctl_header;
+
+static ctl_table ipv6_net_table[] = {
+	{
+		.ctl_name	= NET_IPV6,
+		.procname	= "ipv6",
+		.mode		= 0555,
+		.child		= ipv6_table
+	},
+        { .ctl_name = 0 }
+};
+
+static ctl_table ipv6_root_table[] = {
+	{
+		.ctl_name	= CTL_NET,
+		.procname	= "net",
+		.mode		= 0555,
+		.child		= ipv6_net_table
+	},
+        { .ctl_name = 0 }
+};
+
+void ipv6_sysctl_register(void)
+{
+	ipv6_sysctl_header = register_sysctl_table(ipv6_root_table, 0);
+}
+
+void ipv6_sysctl_unregister(void)
+{
+	unregister_sysctl_table(ipv6_sysctl_header);
+}
+
+#endif /* CONFIG_SYSCTL */
+
+
+
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
new file mode 100644
index 00000000000..4760c85e19d
--- /dev/null
+++ b/net/ipv6/tcp_ipv6.c
@@ -0,0 +1,2265 @@
+/*
+ *	TCP over IPv6
+ *	Linux INET6 implementation 
+ *
+ *	Authors:
+ *	Pedro Roque		<roque@di.fc.ul.pt>	
+ *
+ *	$Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
+ *
+ *	Based on: 
+ *	linux/net/ipv4/tcp.c
+ *	linux/net/ipv4/tcp_input.c
+ *	linux/net/ipv4/tcp_output.c
+ *
+ *	Fixes:
+ *	Hideaki YOSHIFUJI	:	sin6_scope_id support
+ *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
+ *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
+ *					a single port at the same time.
+ *	YOSHIFUJI Hideaki @USAGI:	convert /proc/net/tcp6 to seq_file.
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/jiffies.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/init.h>
+#include <linux/jhash.h>
+#include <linux/ipsec.h>
+#include <linux/times.h>
+
+#include <linux/ipv6.h>
+#include <linux/icmpv6.h>
+#include <linux/random.h>
+
+#include <net/tcp.h>
+#include <net/ndisc.h>
+#include <net/ipv6.h>
+#include <net/transp_v6.h>
+#include <net/addrconf.h>
+#include <net/ip6_route.h>
+#include <net/ip6_checksum.h>
+#include <net/inet_ecn.h>
+#include <net/protocol.h>
+#include <net/xfrm.h>
+#include <net/addrconf.h>
+#include <net/snmp.h>
+#include <net/dsfield.h>
+
+#include <asm/uaccess.h>
+
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+static void	tcp_v6_send_reset(struct sk_buff *skb);
+static void	tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req);
+static void	tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, 
+				  struct sk_buff *skb);
+
+static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
+static int	tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
+
+static struct tcp_func ipv6_mapped;
+static struct tcp_func ipv6_specific;
+
+/* I have no idea if this is a good hash for v6 or not. -DaveM */
+static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
+				    struct in6_addr *faddr, u16 fport)
+{
+	int hashent = (lport ^ fport);
+
+	hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
+	hashent ^= hashent>>16;
+	hashent ^= hashent>>8;
+	return (hashent & (tcp_ehash_size - 1));
+}
+
+static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct in6_addr *laddr = &np->rcv_saddr;
+	struct in6_addr *faddr = &np->daddr;
+	__u16 lport = inet->num;
+	__u16 fport = inet->dport;
+	return tcp_v6_hashfn(laddr, lport, faddr, fport);
+}
+
+static inline int tcp_v6_bind_conflict(struct sock *sk,
+				       struct tcp_bind_bucket *tb)
+{
+	struct sock *sk2;
+	struct hlist_node *node;
+
+	/* We must walk the whole port owner list in this case. -DaveM */
+	sk_for_each_bound(sk2, node, &tb->owners) {
+		if (sk != sk2 &&
+		    (!sk->sk_bound_dev_if ||
+		     !sk2->sk_bound_dev_if ||
+		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
+		    (!sk->sk_reuse || !sk2->sk_reuse ||
+		     sk2->sk_state == TCP_LISTEN) &&
+		     ipv6_rcv_saddr_equal(sk, sk2))
+			break;
+	}
+
+	return node != NULL;
+}
+
+/* Grrr, addr_type already calculated by caller, but I don't want
+ * to add some silly "cookie" argument to this method just for that.
+ * But it doesn't matter, the recalculation is in the rarest path
+ * this function ever takes.
+ */
+static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
+{
+	struct tcp_bind_hashbucket *head;
+	struct tcp_bind_bucket *tb;
+	struct hlist_node *node;
+	int ret;
+
+	local_bh_disable();
+	if (snum == 0) {
+		int low = sysctl_local_port_range[0];
+		int high = sysctl_local_port_range[1];
+		int remaining = (high - low) + 1;
+		int rover;
+
+		spin_lock(&tcp_portalloc_lock);
+		rover = tcp_port_rover;
+		do {	rover++;
+			if ((rover < low) || (rover > high))
+				rover = low;
+			head = &tcp_bhash[tcp_bhashfn(rover)];
+			spin_lock(&head->lock);
+			tb_for_each(tb, node, &head->chain)
+				if (tb->port == rover)
+					goto next;
+			break;
+		next:
+			spin_unlock(&head->lock);
+		} while (--remaining > 0);
+		tcp_port_rover = rover;
+		spin_unlock(&tcp_portalloc_lock);
+
+		/* Exhausted local port range during search? */
+		ret = 1;
+		if (remaining <= 0)
+			goto fail;
+
+		/* OK, here is the one we will use. */
+		snum = rover;
+	} else {
+		head = &tcp_bhash[tcp_bhashfn(snum)];
+		spin_lock(&head->lock);
+		tb_for_each(tb, node, &head->chain)
+			if (tb->port == snum)
+				goto tb_found;
+	}
+	tb = NULL;
+	goto tb_not_found;
+tb_found:
+	if (tb && !hlist_empty(&tb->owners)) {
+		if (tb->fastreuse > 0 && sk->sk_reuse &&
+		    sk->sk_state != TCP_LISTEN) {
+			goto success;
+		} else {
+			ret = 1;
+			if (tcp_v6_bind_conflict(sk, tb))
+				goto fail_unlock;
+		}
+	}
+tb_not_found:
+	ret = 1;
+	if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
+		goto fail_unlock;
+	if (hlist_empty(&tb->owners)) {
+		if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
+			tb->fastreuse = 1;
+		else
+			tb->fastreuse = 0;
+	} else if (tb->fastreuse &&
+		   (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
+		tb->fastreuse = 0;
+
+success:
+	if (!tcp_sk(sk)->bind_hash)
+		tcp_bind_hash(sk, tb, snum);
+	BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
+	ret = 0;
+
+fail_unlock:
+	spin_unlock(&head->lock);
+fail:
+	local_bh_enable();
+	return ret;
+}
+
+static __inline__ void __tcp_v6_hash(struct sock *sk)
+{
+	struct hlist_head *list;
+	rwlock_t *lock;
+
+	BUG_TRAP(sk_unhashed(sk));
+
+	if (sk->sk_state == TCP_LISTEN) {
+		list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
+		lock = &tcp_lhash_lock;
+		tcp_listen_wlock();
+	} else {
+		sk->sk_hashent = tcp_v6_sk_hashfn(sk);
+		list = &tcp_ehash[sk->sk_hashent].chain;
+		lock = &tcp_ehash[sk->sk_hashent].lock;
+		write_lock(lock);
+	}
+
+	__sk_add_node(sk, list);
+	sock_prot_inc_use(sk->sk_prot);
+	write_unlock(lock);
+}
+
+
+static void tcp_v6_hash(struct sock *sk)
+{
+	if (sk->sk_state != TCP_CLOSE) {
+		struct tcp_sock *tp = tcp_sk(sk);
+
+		if (tp->af_specific == &ipv6_mapped) {
+			tcp_prot.hash(sk);
+			return;
+		}
+		local_bh_disable();
+		__tcp_v6_hash(sk);
+		local_bh_enable();
+	}
+}
+
+static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
+{
+	struct sock *sk;
+	struct hlist_node *node;
+	struct sock *result = NULL;
+	int score, hiscore;
+
+	hiscore=0;
+	read_lock(&tcp_lhash_lock);
+	sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum)]) {
+		if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
+			struct ipv6_pinfo *np = inet6_sk(sk);
+			
+			score = 1;
+			if (!ipv6_addr_any(&np->rcv_saddr)) {
+				if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
+					continue;
+				score++;
+			}
+			if (sk->sk_bound_dev_if) {
+				if (sk->sk_bound_dev_if != dif)
+					continue;
+				score++;
+			}
+			if (score == 3) {
+				result = sk;
+				break;
+			}
+			if (score > hiscore) {
+				hiscore = score;
+				result = sk;
+			}
+		}
+	}
+	if (result)
+		sock_hold(result);
+	read_unlock(&tcp_lhash_lock);
+	return result;
+}
+
+/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
+ * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
+ *
+ * The sockhash lock must be held as a reader here.
+ */
+
+static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
+						       struct in6_addr *daddr, u16 hnum,
+						       int dif)
+{
+	struct tcp_ehash_bucket *head;
+	struct sock *sk;
+	struct hlist_node *node;
+	__u32 ports = TCP_COMBINED_PORTS(sport, hnum);
+	int hash;
+
+	/* Optimize here for direct hit, only listening connections can
+	 * have wildcards anyways.
+	 */
+	hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
+	head = &tcp_ehash[hash];
+	read_lock(&head->lock);
+	sk_for_each(sk, node, &head->chain) {
+		/* For IPV6 do the cheaper port and family tests first. */
+		if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif))
+			goto hit; /* You sunk my battleship! */
+	}
+	/* Must check for a TIME_WAIT'er before going to listener hash. */
+	sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
+		/* FIXME: acme: check this... */
+		struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
+
+		if(*((__u32 *)&(tw->tw_dport))	== ports	&&
+		   sk->sk_family		== PF_INET6) {
+			if(ipv6_addr_equal(&tw->tw_v6_daddr, saddr)	&&
+			   ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr)	&&
+			   (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
+				goto hit;
+		}
+	}
+	read_unlock(&head->lock);
+	return NULL;
+
+hit:
+	sock_hold(sk);
+	read_unlock(&head->lock);
+	return sk;
+}
+
+
+static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
+					   struct in6_addr *daddr, u16 hnum,
+					   int dif)
+{
+	struct sock *sk;
+
+	sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
+
+	if (sk)
+		return sk;
+
+	return tcp_v6_lookup_listener(daddr, hnum, dif);
+}
+
+inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
+				  struct in6_addr *daddr, u16 dport,
+				  int dif)
+{
+	struct sock *sk;
+
+	local_bh_disable();
+	sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
+	local_bh_enable();
+
+	return sk;
+}
+
+EXPORT_SYMBOL_GPL(tcp_v6_lookup);
+
+
+/*
+ * Open request hash tables.
+ */
+
+static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
+{
+	u32 a, b, c;
+
+	a = raddr->s6_addr32[0];
+	b = raddr->s6_addr32[1];
+	c = raddr->s6_addr32[2];
+
+	a += JHASH_GOLDEN_RATIO;
+	b += JHASH_GOLDEN_RATIO;
+	c += rnd;
+	__jhash_mix(a, b, c);
+
+	a += raddr->s6_addr32[3];
+	b += (u32) rport;
+	__jhash_mix(a, b, c);
+
+	return c & (TCP_SYNQ_HSIZE - 1);
+}
+
+static struct open_request *tcp_v6_search_req(struct tcp_sock *tp,
+					      struct open_request ***prevp,
+					      __u16 rport,
+					      struct in6_addr *raddr,
+					      struct in6_addr *laddr,
+					      int iif)
+{
+	struct tcp_listen_opt *lopt = tp->listen_opt;
+	struct open_request *req, **prev;  
+
+	for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
+	     (req = *prev) != NULL;
+	     prev = &req->dl_next) {
+		if (req->rmt_port == rport &&
+		    req->class->family == AF_INET6 &&
+		    ipv6_addr_equal(&req->af.v6_req.rmt_addr, raddr) &&
+		    ipv6_addr_equal(&req->af.v6_req.loc_addr, laddr) &&
+		    (!req->af.v6_req.iif || req->af.v6_req.iif == iif)) {
+			BUG_TRAP(req->sk == NULL);
+			*prevp = prev;
+			return req;
+		}
+	}
+
+	return NULL;
+}
+
+static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
+				   struct in6_addr *saddr, 
+				   struct in6_addr *daddr, 
+				   unsigned long base)
+{
+	return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
+}
+
+static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
+{
+	if (skb->protocol == htons(ETH_P_IPV6)) {
+		return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
+						    skb->nh.ipv6h->saddr.s6_addr32,
+						    skb->h.th->dest,
+						    skb->h.th->source);
+	} else {
+		return secure_tcp_sequence_number(skb->nh.iph->daddr,
+						  skb->nh.iph->saddr,
+						  skb->h.th->dest,
+						  skb->h.th->source);
+	}
+}
+
+static int __tcp_v6_check_established(struct sock *sk, __u16 lport,
+				      struct tcp_tw_bucket **twp)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct in6_addr *daddr = &np->rcv_saddr;
+	struct in6_addr *saddr = &np->daddr;
+	int dif = sk->sk_bound_dev_if;
+	u32 ports = TCP_COMBINED_PORTS(inet->dport, lport);
+	int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
+	struct tcp_ehash_bucket *head = &tcp_ehash[hash];
+	struct sock *sk2;
+	struct hlist_node *node;
+	struct tcp_tw_bucket *tw;
+
+	write_lock(&head->lock);
+
+	/* Check TIME-WAIT sockets first. */
+	sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
+		tw = (struct tcp_tw_bucket*)sk2;
+
+		if(*((__u32 *)&(tw->tw_dport))	== ports	&&
+		   sk2->sk_family		== PF_INET6	&&
+		   ipv6_addr_equal(&tw->tw_v6_daddr, saddr)	&&
+		   ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr)	&&
+		   sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
+			struct tcp_sock *tp = tcp_sk(sk);
+
+			if (tw->tw_ts_recent_stamp &&
+			    (!twp || (sysctl_tcp_tw_reuse &&
+				      xtime.tv_sec - 
+				      tw->tw_ts_recent_stamp > 1))) {
+				/* See comment in tcp_ipv4.c */
+				tp->write_seq = tw->tw_snd_nxt + 65535 + 2;
+				if (!tp->write_seq)
+					tp->write_seq = 1;
+				tp->rx_opt.ts_recent = tw->tw_ts_recent;
+				tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
+				sock_hold(sk2);
+				goto unique;
+			} else
+				goto not_unique;
+		}
+	}
+	tw = NULL;
+
+	/* And established part... */
+	sk_for_each(sk2, node, &head->chain) {
+		if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif))
+			goto not_unique;
+	}
+
+unique:
+	BUG_TRAP(sk_unhashed(sk));
+	__sk_add_node(sk, &head->chain);
+	sk->sk_hashent = hash;
+	sock_prot_inc_use(sk->sk_prot);
+	write_unlock(&head->lock);
+
+	if (twp) {
+		*twp = tw;
+		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+	} else if (tw) {
+		/* Silly. Should hash-dance instead... */
+		tcp_tw_deschedule(tw);
+		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+
+		tcp_tw_put(tw);
+	}
+	return 0;
+
+not_unique:
+	write_unlock(&head->lock);
+	return -EADDRNOTAVAIL;
+}
+
+static inline u32 tcpv6_port_offset(const struct sock *sk)
+{
+	const struct inet_sock *inet = inet_sk(sk);
+	const struct ipv6_pinfo *np = inet6_sk(sk);
+
+	return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
+					   np->daddr.s6_addr32,
+					   inet->dport);
+}
+
+static int tcp_v6_hash_connect(struct sock *sk)
+{
+	unsigned short snum = inet_sk(sk)->num;
+ 	struct tcp_bind_hashbucket *head;
+ 	struct tcp_bind_bucket *tb;
+	int ret;
+
+ 	if (!snum) {
+ 		int low = sysctl_local_port_range[0];
+ 		int high = sysctl_local_port_range[1];
+		int range = high - low;
+ 		int i;
+		int port;
+		static u32 hint;
+		u32 offset = hint + tcpv6_port_offset(sk);
+		struct hlist_node *node;
+ 		struct tcp_tw_bucket *tw = NULL;
+
+ 		local_bh_disable();
+		for (i = 1; i <= range; i++) {
+			port = low + (i + offset) % range;
+ 			head = &tcp_bhash[tcp_bhashfn(port)];
+ 			spin_lock(&head->lock);
+
+ 			/* Does not bother with rcv_saddr checks,
+ 			 * because the established check is already
+ 			 * unique enough.
+ 			 */
+			tb_for_each(tb, node, &head->chain) {
+ 				if (tb->port == port) {
+ 					BUG_TRAP(!hlist_empty(&tb->owners));
+ 					if (tb->fastreuse >= 0)
+ 						goto next_port;
+ 					if (!__tcp_v6_check_established(sk,
+									port,
+									&tw))
+ 						goto ok;
+ 					goto next_port;
+ 				}
+ 			}
+
+ 			tb = tcp_bucket_create(head, port);
+ 			if (!tb) {
+ 				spin_unlock(&head->lock);
+ 				break;
+ 			}
+ 			tb->fastreuse = -1;
+ 			goto ok;
+
+ 		next_port:
+ 			spin_unlock(&head->lock);
+ 		}
+ 		local_bh_enable();
+
+ 		return -EADDRNOTAVAIL;
+
+ok:
+		hint += i;
+
+ 		/* Head lock still held and bh's disabled */
+ 		tcp_bind_hash(sk, tb, port);
+		if (sk_unhashed(sk)) {
+ 			inet_sk(sk)->sport = htons(port);
+ 			__tcp_v6_hash(sk);
+ 		}
+ 		spin_unlock(&head->lock);
+
+ 		if (tw) {
+ 			tcp_tw_deschedule(tw);
+ 			tcp_tw_put(tw);
+ 		}
+
+		ret = 0;
+		goto out;
+ 	}
+
+ 	head  = &tcp_bhash[tcp_bhashfn(snum)];
+ 	tb  = tcp_sk(sk)->bind_hash;
+	spin_lock_bh(&head->lock);
+
+	if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
+		__tcp_v6_hash(sk);
+		spin_unlock_bh(&head->lock);
+		return 0;
+	} else {
+		spin_unlock(&head->lock);
+		/* No definite answer... Walk to established hash table */
+		ret = __tcp_v6_check_established(sk, snum, NULL);
+out:
+		local_bh_enable();
+		return ret;
+	}
+}
+
+static __inline__ int tcp_v6_iif(struct sk_buff *skb)
+{
+	return IP6CB(skb)->iif;
+}
+
+static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 
+			  int addr_len)
+{
+	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
+	struct inet_sock *inet = inet_sk(sk);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct in6_addr *saddr = NULL, *final_p = NULL, final;
+	struct flowi fl;
+	struct dst_entry *dst;
+	int addr_type;
+	int err;
+
+	if (addr_len < SIN6_LEN_RFC2133) 
+		return -EINVAL;
+
+	if (usin->sin6_family != AF_INET6) 
+		return(-EAFNOSUPPORT);
+
+	memset(&fl, 0, sizeof(fl));
+
+	if (np->sndflow) {
+		fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
+		IP6_ECN_flow_init(fl.fl6_flowlabel);
+		if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
+			struct ip6_flowlabel *flowlabel;
+			flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
+			if (flowlabel == NULL)
+				return -EINVAL;
+			ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
+			fl6_sock_release(flowlabel);
+		}
+	}
+
+	/*
+  	 *	connect() to INADDR_ANY means loopback (BSD'ism).
+  	 */
+  	
+  	if(ipv6_addr_any(&usin->sin6_addr))
+		usin->sin6_addr.s6_addr[15] = 0x1; 
+
+	addr_type = ipv6_addr_type(&usin->sin6_addr);
+
+	if(addr_type & IPV6_ADDR_MULTICAST)
+		return -ENETUNREACH;
+
+	if (addr_type&IPV6_ADDR_LINKLOCAL) {
+		if (addr_len >= sizeof(struct sockaddr_in6) &&
+		    usin->sin6_scope_id) {
+			/* If interface is set while binding, indices
+			 * must coincide.
+			 */
+			if (sk->sk_bound_dev_if &&
+			    sk->sk_bound_dev_if != usin->sin6_scope_id)
+				return -EINVAL;
+
+			sk->sk_bound_dev_if = usin->sin6_scope_id;
+		}
+
+		/* Connect to link-local address requires an interface */
+		if (!sk->sk_bound_dev_if)
+			return -EINVAL;
+	}
+
+	if (tp->rx_opt.ts_recent_stamp &&
+	    !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
+		tp->rx_opt.ts_recent = 0;
+		tp->rx_opt.ts_recent_stamp = 0;
+		tp->write_seq = 0;
+	}
+
+	ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
+	np->flow_label = fl.fl6_flowlabel;
+
+	/*
+	 *	TCP over IPv4
+	 */
+
+	if (addr_type == IPV6_ADDR_MAPPED) {
+		u32 exthdrlen = tp->ext_header_len;
+		struct sockaddr_in sin;
+
+		SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
+
+		if (__ipv6_only_sock(sk))
+			return -ENETUNREACH;
+
+		sin.sin_family = AF_INET;
+		sin.sin_port = usin->sin6_port;
+		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
+
+		tp->af_specific = &ipv6_mapped;
+		sk->sk_backlog_rcv = tcp_v4_do_rcv;
+
+		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
+
+		if (err) {
+			tp->ext_header_len = exthdrlen;
+			tp->af_specific = &ipv6_specific;
+			sk->sk_backlog_rcv = tcp_v6_do_rcv;
+			goto failure;
+		} else {
+			ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
+				      inet->saddr);
+			ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
+				      inet->rcv_saddr);
+		}
+
+		return err;
+	}
+
+	if (!ipv6_addr_any(&np->rcv_saddr))
+		saddr = &np->rcv_saddr;
+
+	fl.proto = IPPROTO_TCP;
+	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+	ipv6_addr_copy(&fl.fl6_src,
+		       (saddr ? saddr : &np->saddr));
+	fl.oif = sk->sk_bound_dev_if;
+	fl.fl_ip_dport = usin->sin6_port;
+	fl.fl_ip_sport = inet->sport;
+
+	if (np->opt && np->opt->srcrt) {
+		struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
+		ipv6_addr_copy(&final, &fl.fl6_dst);
+		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+		final_p = &final;
+	}
+
+	err = ip6_dst_lookup(sk, &dst, &fl);
+	if (err)
+		goto failure;
+	if (final_p)
+		ipv6_addr_copy(&fl.fl6_dst, final_p);
+
+	if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
+		dst_release(dst);
+		goto failure;
+	}
+
+	if (saddr == NULL) {
+		saddr = &fl.fl6_src;
+		ipv6_addr_copy(&np->rcv_saddr, saddr);
+	}
+
+	/* set the source address */
+	ipv6_addr_copy(&np->saddr, saddr);
+	inet->rcv_saddr = LOOPBACK4_IPV6;
+
+	ip6_dst_store(sk, dst, NULL);
+	sk->sk_route_caps = dst->dev->features &
+		~(NETIF_F_IP_CSUM | NETIF_F_TSO);
+
+	tp->ext_header_len = 0;
+	if (np->opt)
+		tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
+
+	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
+
+	inet->dport = usin->sin6_port;
+
+	tcp_set_state(sk, TCP_SYN_SENT);
+	err = tcp_v6_hash_connect(sk);
+	if (err)
+		goto late_failure;
+
+	if (!tp->write_seq)
+		tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
+							     np->daddr.s6_addr32,
+							     inet->sport,
+							     inet->dport);
+
+	err = tcp_connect(sk);
+	if (err)
+		goto late_failure;
+
+	return 0;
+
+late_failure:
+	tcp_set_state(sk, TCP_CLOSE);
+	__sk_dst_reset(sk);
+failure:
+	inet->dport = 0;
+	sk->sk_route_caps = 0;
+	return err;
+}
+
+static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+		int type, int code, int offset, __u32 info)
+{
+	struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
+	struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
+	struct ipv6_pinfo *np;
+	struct sock *sk;
+	int err;
+	struct tcp_sock *tp; 
+	__u32 seq;
+
+	sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
+
+	if (sk == NULL) {
+		ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
+		return;
+	}
+
+	if (sk->sk_state == TCP_TIME_WAIT) {
+		tcp_tw_put((struct tcp_tw_bucket*)sk);
+		return;
+	}
+
+	bh_lock_sock(sk);
+	if (sock_owned_by_user(sk))
+		NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
+
+	if (sk->sk_state == TCP_CLOSE)
+		goto out;
+
+	tp = tcp_sk(sk);
+	seq = ntohl(th->seq); 
+	if (sk->sk_state != TCP_LISTEN &&
+	    !between(seq, tp->snd_una, tp->snd_nxt)) {
+		NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
+		goto out;
+	}
+
+	np = inet6_sk(sk);
+
+	if (type == ICMPV6_PKT_TOOBIG) {
+		struct dst_entry *dst = NULL;
+
+		if (sock_owned_by_user(sk))
+			goto out;
+		if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
+			goto out;
+
+		/* icmp should have updated the destination cache entry */
+		dst = __sk_dst_check(sk, np->dst_cookie);
+
+		if (dst == NULL) {
+			struct inet_sock *inet = inet_sk(sk);
+			struct flowi fl;
+
+			/* BUGGG_FUTURE: Again, it is not clear how
+			   to handle rthdr case. Ignore this complexity
+			   for now.
+			 */
+			memset(&fl, 0, sizeof(fl));
+			fl.proto = IPPROTO_TCP;
+			ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+			ipv6_addr_copy(&fl.fl6_src, &np->saddr);
+			fl.oif = sk->sk_bound_dev_if;
+			fl.fl_ip_dport = inet->dport;
+			fl.fl_ip_sport = inet->sport;
+
+			if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
+				sk->sk_err_soft = -err;
+				goto out;
+			}
+
+			if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
+				sk->sk_err_soft = -err;
+				goto out;
+			}
+
+		} else
+			dst_hold(dst);
+
+		if (tp->pmtu_cookie > dst_mtu(dst)) {
+			tcp_sync_mss(sk, dst_mtu(dst));
+			tcp_simple_retransmit(sk);
+		} /* else let the usual retransmit timer handle it */
+		dst_release(dst);
+		goto out;
+	}
+
+	icmpv6_err_convert(type, code, &err);
+
+	/* Might be for an open_request */
+	switch (sk->sk_state) {
+		struct open_request *req, **prev;
+	case TCP_LISTEN:
+		if (sock_owned_by_user(sk))
+			goto out;
+
+		req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr,
+					&hdr->saddr, tcp_v6_iif(skb));
+		if (!req)
+			goto out;
+
+		/* ICMPs are not backlogged, hence we cannot get
+		 * an established socket here.
+		 */
+		BUG_TRAP(req->sk == NULL);
+
+		if (seq != req->snt_isn) {
+			NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
+			goto out;
+		}
+
+		tcp_synq_drop(sk, req, prev);
+		goto out;
+
+	case TCP_SYN_SENT:
+	case TCP_SYN_RECV:  /* Cannot happen.
+			       It can, it SYNs are crossed. --ANK */ 
+		if (!sock_owned_by_user(sk)) {
+			TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
+			sk->sk_err = err;
+			sk->sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
+
+			tcp_done(sk);
+		} else
+			sk->sk_err_soft = err;
+		goto out;
+	}
+
+	if (!sock_owned_by_user(sk) && np->recverr) {
+		sk->sk_err = err;
+		sk->sk_error_report(sk);
+	} else
+		sk->sk_err_soft = err;
+
+out:
+	bh_unlock_sock(sk);
+	sock_put(sk);
+}
+
+
+static int tcp_v6_send_synack(struct sock *sk, struct open_request *req,
+			      struct dst_entry *dst)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct sk_buff * skb;
+	struct ipv6_txoptions *opt = NULL;
+	struct in6_addr * final_p = NULL, final;
+	struct flowi fl;
+	int err = -1;
+
+	memset(&fl, 0, sizeof(fl));
+	fl.proto = IPPROTO_TCP;
+	ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
+	ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
+	fl.fl6_flowlabel = 0;
+	fl.oif = req->af.v6_req.iif;
+	fl.fl_ip_dport = req->rmt_port;
+	fl.fl_ip_sport = inet_sk(sk)->sport;
+
+	if (dst == NULL) {
+		opt = np->opt;
+		if (opt == NULL &&
+		    np->rxopt.bits.srcrt == 2 &&
+		    req->af.v6_req.pktopts) {
+			struct sk_buff *pktopts = req->af.v6_req.pktopts;
+			struct inet6_skb_parm *rxopt = IP6CB(pktopts);
+			if (rxopt->srcrt)
+				opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
+		}
+
+		if (opt && opt->srcrt) {
+			struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
+			ipv6_addr_copy(&final, &fl.fl6_dst);
+			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+			final_p = &final;
+		}
+
+		err = ip6_dst_lookup(sk, &dst, &fl);
+		if (err)
+			goto done;
+		if (final_p)
+			ipv6_addr_copy(&fl.fl6_dst, final_p);
+		if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
+			goto done;
+	}
+
+	skb = tcp_make_synack(sk, dst, req);
+	if (skb) {
+		struct tcphdr *th = skb->h.th;
+
+		th->check = tcp_v6_check(th, skb->len,
+					 &req->af.v6_req.loc_addr, &req->af.v6_req.rmt_addr,
+					 csum_partial((char *)th, skb->len, skb->csum));
+
+		ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
+		err = ip6_xmit(sk, skb, &fl, opt, 0);
+		if (err == NET_XMIT_CN)
+			err = 0;
+	}
+
+done:
+	dst_release(dst);
+        if (opt && opt != np->opt)
+		sock_kfree_s(sk, opt, opt->tot_len);
+	return err;
+}
+
+static void tcp_v6_or_free(struct open_request *req)
+{
+	if (req->af.v6_req.pktopts)
+		kfree_skb(req->af.v6_req.pktopts);
+}
+
+static struct or_calltable or_ipv6 = {
+	.family		=	AF_INET6,
+	.rtx_syn_ack	=	tcp_v6_send_synack,
+	.send_ack	=	tcp_v6_or_send_ack,
+	.destructor	=	tcp_v6_or_free,
+	.send_reset	=	tcp_v6_send_reset
+};
+
+static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct inet6_skb_parm *opt = IP6CB(skb);
+
+	if (np->rxopt.all) {
+		if ((opt->hop && np->rxopt.bits.hopopts) ||
+		    ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
+		     np->rxopt.bits.rxflow) ||
+		    (opt->srcrt && np->rxopt.bits.srcrt) ||
+		    ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
+			return 1;
+	}
+	return 0;
+}
+
+
+static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, 
+			      struct sk_buff *skb)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+
+	if (skb->ip_summed == CHECKSUM_HW) {
+		th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,  0);
+		skb->csum = offsetof(struct tcphdr, check);
+	} else {
+		th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 
+					    csum_partial((char *)th, th->doff<<2, 
+							 skb->csum));
+	}
+}
+
+
+static void tcp_v6_send_reset(struct sk_buff *skb)
+{
+	struct tcphdr *th = skb->h.th, *t1; 
+	struct sk_buff *buff;
+	struct flowi fl;
+
+	if (th->rst)
+		return;
+
+	if (!ipv6_unicast_destination(skb))
+		return; 
+
+	/*
+	 * We need to grab some memory, and put together an RST,
+	 * and then put it into the queue to be sent.
+	 */
+
+	buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
+			 GFP_ATOMIC);
+	if (buff == NULL) 
+	  	return;
+
+	skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
+
+	t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
+
+	/* Swap the send and the receive. */
+	memset(t1, 0, sizeof(*t1));
+	t1->dest = th->source;
+	t1->source = th->dest;
+	t1->doff = sizeof(*t1)/4;
+	t1->rst = 1;
+  
+	if(th->ack) {
+	  	t1->seq = th->ack_seq;
+	} else {
+		t1->ack = 1;
+		t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
+				    + skb->len - (th->doff<<2));
+	}
+
+	buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
+
+	memset(&fl, 0, sizeof(fl));
+	ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
+	ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
+
+	t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
+				    sizeof(*t1), IPPROTO_TCP,
+				    buff->csum);
+
+	fl.proto = IPPROTO_TCP;
+	fl.oif = tcp_v6_iif(skb);
+	fl.fl_ip_dport = t1->dest;
+	fl.fl_ip_sport = t1->source;
+
+	/* sk = NULL, but it is safe for now. RST socket required. */
+	if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
+
+		if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
+			dst_release(buff->dst);
+			return;
+		}
+
+		ip6_xmit(NULL, buff, &fl, NULL, 0);
+		TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
+		TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
+		return;
+	}
+
+	kfree_skb(buff);
+}
+
+static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
+{
+	struct tcphdr *th = skb->h.th, *t1;
+	struct sk_buff *buff;
+	struct flowi fl;
+	int tot_len = sizeof(struct tcphdr);
+
+	if (ts)
+		tot_len += 3*4;
+
+	buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
+			 GFP_ATOMIC);
+	if (buff == NULL)
+		return;
+
+	skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
+
+	t1 = (struct tcphdr *) skb_push(buff,tot_len);
+
+	/* Swap the send and the receive. */
+	memset(t1, 0, sizeof(*t1));
+	t1->dest = th->source;
+	t1->source = th->dest;
+	t1->doff = tot_len/4;
+	t1->seq = htonl(seq);
+	t1->ack_seq = htonl(ack);
+	t1->ack = 1;
+	t1->window = htons(win);
+	
+	if (ts) {
+		u32 *ptr = (u32*)(t1 + 1);
+		*ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
+			       (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
+		*ptr++ = htonl(tcp_time_stamp);
+		*ptr = htonl(ts);
+	}
+
+	buff->csum = csum_partial((char *)t1, tot_len, 0);
+
+	memset(&fl, 0, sizeof(fl));
+	ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
+	ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
+
+	t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
+				    tot_len, IPPROTO_TCP,
+				    buff->csum);
+
+	fl.proto = IPPROTO_TCP;
+	fl.oif = tcp_v6_iif(skb);
+	fl.fl_ip_dport = t1->dest;
+	fl.fl_ip_sport = t1->source;
+
+	if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
+		if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
+			dst_release(buff->dst);
+			return;
+		}
+		ip6_xmit(NULL, buff, &fl, NULL, 0);
+		TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
+		return;
+	}
+
+	kfree_skb(buff);
+}
+
+static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
+{
+	struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
+
+	tcp_v6_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
+			tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
+
+	tcp_tw_put(tw);
+}
+
+static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req)
+{
+	tcp_v6_send_ack(skb, req->snt_isn+1, req->rcv_isn+1, req->rcv_wnd, req->ts_recent);
+}
+
+
+static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
+{
+	struct open_request *req, **prev;
+	struct tcphdr *th = skb->h.th;
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sock *nsk;
+
+	/* Find possible connection requests. */
+	req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr,
+				&skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
+	if (req)
+		return tcp_check_req(sk, skb, req, prev);
+
+	nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
+					  th->source,
+					  &skb->nh.ipv6h->daddr,
+					  ntohs(th->dest),
+					  tcp_v6_iif(skb));
+
+	if (nsk) {
+		if (nsk->sk_state != TCP_TIME_WAIT) {
+			bh_lock_sock(nsk);
+			return nsk;
+		}
+		tcp_tw_put((struct tcp_tw_bucket*)nsk);
+		return NULL;
+	}
+
+#if 0 /*def CONFIG_SYN_COOKIES*/
+	if (!th->rst && !th->syn && th->ack)
+		sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
+#endif
+	return sk;
+}
+
+static void tcp_v6_synq_add(struct sock *sk, struct open_request *req)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct tcp_listen_opt *lopt = tp->listen_opt;
+	u32 h = tcp_v6_synq_hash(&req->af.v6_req.rmt_addr, req->rmt_port, lopt->hash_rnd);
+
+	req->sk = NULL;
+	req->expires = jiffies + TCP_TIMEOUT_INIT;
+	req->retrans = 0;
+	req->dl_next = lopt->syn_table[h];
+
+	write_lock(&tp->syn_wait_lock);
+	lopt->syn_table[h] = req;
+	write_unlock(&tp->syn_wait_lock);
+
+	tcp_synq_added(sk);
+}
+
+
+/* FIXME: this is substantially similar to the ipv4 code.
+ * Can some kind of merge be done? -- erics
+ */
+static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct tcp_options_received tmp_opt;
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct open_request *req = NULL;
+	__u32 isn = TCP_SKB_CB(skb)->when;
+
+	if (skb->protocol == htons(ETH_P_IP))
+		return tcp_v4_conn_request(sk, skb);
+
+	if (!ipv6_unicast_destination(skb))
+		goto drop; 
+
+	/*
+	 *	There are no SYN attacks on IPv6, yet...	
+	 */
+	if (tcp_synq_is_full(sk) && !isn) {
+		if (net_ratelimit())
+			printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
+		goto drop;		
+	}
+
+	if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
+		goto drop;
+
+	req = tcp_openreq_alloc();
+	if (req == NULL)
+		goto drop;
+
+	tcp_clear_options(&tmp_opt);
+	tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
+	tmp_opt.user_mss = tp->rx_opt.user_mss;
+
+	tcp_parse_options(skb, &tmp_opt, 0);
+
+	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
+	tcp_openreq_init(req, &tmp_opt, skb);
+
+	req->class = &or_ipv6;
+	ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr);
+	ipv6_addr_copy(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr);
+	TCP_ECN_create_request(req, skb->h.th);
+	req->af.v6_req.pktopts = NULL;
+	if (ipv6_opt_accepted(sk, skb) ||
+	    np->rxopt.bits.rxinfo ||
+	    np->rxopt.bits.rxhlim) {
+		atomic_inc(&skb->users);
+		req->af.v6_req.pktopts = skb;
+	}
+	req->af.v6_req.iif = sk->sk_bound_dev_if;
+
+	/* So that link locals have meaning */
+	if (!sk->sk_bound_dev_if &&
+	    ipv6_addr_type(&req->af.v6_req.rmt_addr) & IPV6_ADDR_LINKLOCAL)
+		req->af.v6_req.iif = tcp_v6_iif(skb);
+
+	if (isn == 0) 
+		isn = tcp_v6_init_sequence(sk,skb);
+
+	req->snt_isn = isn;
+
+	if (tcp_v6_send_synack(sk, req, NULL))
+		goto drop;
+
+	tcp_v6_synq_add(sk, req);
+
+	return 0;
+
+drop:
+	if (req)
+		tcp_openreq_free(req);
+
+	TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
+	return 0; /* don't send reset */
+}
+
+static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
+					  struct open_request *req,
+					  struct dst_entry *dst)
+{
+	struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
+	struct tcp6_sock *newtcp6sk;
+	struct inet_sock *newinet;
+	struct tcp_sock *newtp;
+	struct sock *newsk;
+	struct ipv6_txoptions *opt;
+
+	if (skb->protocol == htons(ETH_P_IP)) {
+		/*
+		 *	v6 mapped
+		 */
+
+		newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
+
+		if (newsk == NULL) 
+			return NULL;
+
+		newtcp6sk = (struct tcp6_sock *)newsk;
+		inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
+
+		newinet = inet_sk(newsk);
+		newnp = inet6_sk(newsk);
+		newtp = tcp_sk(newsk);
+
+		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
+
+		ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
+			      newinet->daddr);
+
+		ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
+			      newinet->saddr);
+
+		ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
+
+		newtp->af_specific = &ipv6_mapped;
+		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
+		newnp->pktoptions  = NULL;
+		newnp->opt	   = NULL;
+		newnp->mcast_oif   = tcp_v6_iif(skb);
+		newnp->mcast_hops  = skb->nh.ipv6h->hop_limit;
+
+		/* Charge newly allocated IPv6 socket. Though it is mapped,
+		 * it is IPv6 yet.
+		 */
+#ifdef INET_REFCNT_DEBUG
+		atomic_inc(&inet6_sock_nr);
+#endif
+
+		/* It is tricky place. Until this moment IPv4 tcp
+		   worked with IPv6 af_tcp.af_specific.
+		   Sync it now.
+		 */
+		tcp_sync_mss(newsk, newtp->pmtu_cookie);
+
+		return newsk;
+	}
+
+	opt = np->opt;
+
+	if (sk_acceptq_is_full(sk))
+		goto out_overflow;
+
+	if (np->rxopt.bits.srcrt == 2 &&
+	    opt == NULL && req->af.v6_req.pktopts) {
+		struct inet6_skb_parm *rxopt = IP6CB(req->af.v6_req.pktopts);
+		if (rxopt->srcrt)
+			opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(req->af.v6_req.pktopts->nh.raw+rxopt->srcrt));
+	}
+
+	if (dst == NULL) {
+		struct in6_addr *final_p = NULL, final;
+		struct flowi fl;
+
+		memset(&fl, 0, sizeof(fl));
+		fl.proto = IPPROTO_TCP;
+		ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
+		if (opt && opt->srcrt) {
+			struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
+			ipv6_addr_copy(&final, &fl.fl6_dst);
+			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+			final_p = &final;
+		}
+		ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
+		fl.oif = sk->sk_bound_dev_if;
+		fl.fl_ip_dport = req->rmt_port;
+		fl.fl_ip_sport = inet_sk(sk)->sport;
+
+		if (ip6_dst_lookup(sk, &dst, &fl))
+			goto out;
+
+		if (final_p)
+			ipv6_addr_copy(&fl.fl6_dst, final_p);
+
+		if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
+			goto out;
+	} 
+
+	newsk = tcp_create_openreq_child(sk, req, skb);
+	if (newsk == NULL)
+		goto out;
+
+	/* Charge newly allocated IPv6 socket */
+#ifdef INET_REFCNT_DEBUG
+	atomic_inc(&inet6_sock_nr);
+#endif
+
+	ip6_dst_store(newsk, dst, NULL);
+	newsk->sk_route_caps = dst->dev->features &
+		~(NETIF_F_IP_CSUM | NETIF_F_TSO);
+
+	newtcp6sk = (struct tcp6_sock *)newsk;
+	inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
+
+	newtp = tcp_sk(newsk);
+	newinet = inet_sk(newsk);
+	newnp = inet6_sk(newsk);
+
+	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
+
+	ipv6_addr_copy(&newnp->daddr, &req->af.v6_req.rmt_addr);
+	ipv6_addr_copy(&newnp->saddr, &req->af.v6_req.loc_addr);
+	ipv6_addr_copy(&newnp->rcv_saddr, &req->af.v6_req.loc_addr);
+	newsk->sk_bound_dev_if = req->af.v6_req.iif;
+
+	/* Now IPv6 options... 
+
+	   First: no IPv4 options.
+	 */
+	newinet->opt = NULL;
+
+	/* Clone RX bits */
+	newnp->rxopt.all = np->rxopt.all;
+
+	/* Clone pktoptions received with SYN */
+	newnp->pktoptions = NULL;
+	if (req->af.v6_req.pktopts) {
+		newnp->pktoptions = skb_clone(req->af.v6_req.pktopts,
+					      GFP_ATOMIC);
+		kfree_skb(req->af.v6_req.pktopts);
+		req->af.v6_req.pktopts = NULL;
+		if (newnp->pktoptions)
+			skb_set_owner_r(newnp->pktoptions, newsk);
+	}
+	newnp->opt	  = NULL;
+	newnp->mcast_oif  = tcp_v6_iif(skb);
+	newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
+
+	/* Clone native IPv6 options from listening socket (if any)
+
+	   Yes, keeping reference count would be much more clever,
+	   but we make one more one thing there: reattach optmem
+	   to newsk.
+	 */
+	if (opt) {
+		newnp->opt = ipv6_dup_options(newsk, opt);
+		if (opt != np->opt)
+			sock_kfree_s(sk, opt, opt->tot_len);
+	}
+
+	newtp->ext_header_len = 0;
+	if (newnp->opt)
+		newtp->ext_header_len = newnp->opt->opt_nflen +
+					newnp->opt->opt_flen;
+
+	tcp_sync_mss(newsk, dst_mtu(dst));
+	newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
+	tcp_initialize_rcv_mss(newsk);
+
+	newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
+
+	__tcp_v6_hash(newsk);
+	tcp_inherit_port(sk, newsk);
+
+	return newsk;
+
+out_overflow:
+	NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
+out:
+	NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
+	if (opt && opt != np->opt)
+		sock_kfree_s(sk, opt, opt->tot_len);
+	dst_release(dst);
+	return NULL;
+}
+
+static int tcp_v6_checksum_init(struct sk_buff *skb)
+{
+	if (skb->ip_summed == CHECKSUM_HW) {
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
+				  &skb->nh.ipv6h->daddr,skb->csum))
+			return 0;
+		LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n"));
+	}
+	if (skb->len <= 76) {
+		if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
+				 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
+			return -1;
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	} else {
+		skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
+					  &skb->nh.ipv6h->daddr,0);
+	}
+	return 0;
+}
+
+/* The socket must have it's spinlock held when we get
+ * here.
+ *
+ * We have a potential double-lock case here, so even when
+ * doing backlog processing we use the BH locking scheme.
+ * This is because we cannot sleep with the original spinlock
+ * held.
+ */
+static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct tcp_sock *tp;
+	struct sk_buff *opt_skb = NULL;
+
+	/* Imagine: socket is IPv6. IPv4 packet arrives,
+	   goes to IPv4 receive handler and backlogged.
+	   From backlog it always goes here. Kerboom...
+	   Fortunately, tcp_rcv_established and rcv_established
+	   handle them correctly, but it is not case with
+	   tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
+	 */
+
+	if (skb->protocol == htons(ETH_P_IP))
+		return tcp_v4_do_rcv(sk, skb);
+
+	if (sk_filter(sk, skb, 0))
+		goto discard;
+
+	/*
+	 *	socket locking is here for SMP purposes as backlog rcv
+	 *	is currently called with bh processing disabled.
+	 */
+
+	/* Do Stevens' IPV6_PKTOPTIONS.
+
+	   Yes, guys, it is the only place in our code, where we
+	   may make it not affecting IPv4.
+	   The rest of code is protocol independent,
+	   and I do not like idea to uglify IPv4.
+
+	   Actually, all the idea behind IPV6_PKTOPTIONS
+	   looks not very well thought. For now we latch
+	   options, received in the last packet, enqueued
+	   by tcp. Feel free to propose better solution.
+	                                       --ANK (980728)
+	 */
+	if (np->rxopt.all)
+		opt_skb = skb_clone(skb, GFP_ATOMIC);
+
+	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
+		TCP_CHECK_TIMER(sk);
+		if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
+			goto reset;
+		TCP_CHECK_TIMER(sk);
+		if (opt_skb)
+			goto ipv6_pktoptions;
+		return 0;
+	}
+
+	if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
+		goto csum_err;
+
+	if (sk->sk_state == TCP_LISTEN) { 
+		struct sock *nsk = tcp_v6_hnd_req(sk, skb);
+		if (!nsk)
+			goto discard;
+
+		/*
+		 * Queue it on the new socket if the new socket is active,
+		 * otherwise we just shortcircuit this and continue with
+		 * the new socket..
+		 */
+ 		if(nsk != sk) {
+			if (tcp_child_process(sk, nsk, skb))
+				goto reset;
+			if (opt_skb)
+				__kfree_skb(opt_skb);
+			return 0;
+		}
+	}
+
+	TCP_CHECK_TIMER(sk);
+	if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
+		goto reset;
+	TCP_CHECK_TIMER(sk);
+	if (opt_skb)
+		goto ipv6_pktoptions;
+	return 0;
+
+reset:
+	tcp_v6_send_reset(skb);
+discard:
+	if (opt_skb)
+		__kfree_skb(opt_skb);
+	kfree_skb(skb);
+	return 0;
+csum_err:
+	TCP_INC_STATS_BH(TCP_MIB_INERRS);
+	goto discard;
+
+
+ipv6_pktoptions:
+	/* Do you ask, what is it?
+
+	   1. skb was enqueued by tcp.
+	   2. skb is added to tail of read queue, rather than out of order.
+	   3. socket is not in passive state.
+	   4. Finally, it really contains options, which user wants to receive.
+	 */
+	tp = tcp_sk(sk);
+	if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
+	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
+		if (np->rxopt.bits.rxinfo)
+			np->mcast_oif = tcp_v6_iif(opt_skb);
+		if (np->rxopt.bits.rxhlim)
+			np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
+		if (ipv6_opt_accepted(sk, opt_skb)) {
+			skb_set_owner_r(opt_skb, sk);
+			opt_skb = xchg(&np->pktoptions, opt_skb);
+		} else {
+			__kfree_skb(opt_skb);
+			opt_skb = xchg(&np->pktoptions, NULL);
+		}
+	}
+
+	if (opt_skb)
+		kfree_skb(opt_skb);
+	return 0;
+}
+
+static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+{
+	struct sk_buff *skb = *pskb;
+	struct tcphdr *th;	
+	struct sock *sk;
+	int ret;
+
+	if (skb->pkt_type != PACKET_HOST)
+		goto discard_it;
+
+	/*
+	 *	Count it even if it's bad.
+	 */
+	TCP_INC_STATS_BH(TCP_MIB_INSEGS);
+
+	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
+		goto discard_it;
+
+	th = skb->h.th;
+
+	if (th->doff < sizeof(struct tcphdr)/4)
+		goto bad_packet;
+	if (!pskb_may_pull(skb, th->doff*4))
+		goto discard_it;
+
+	if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
+	     tcp_v6_checksum_init(skb) < 0))
+		goto bad_packet;
+
+	th = skb->h.th;
+	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
+	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
+				    skb->len - th->doff*4);
+	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
+	TCP_SKB_CB(skb)->when = 0;
+	TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
+	TCP_SKB_CB(skb)->sacked = 0;
+
+	sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
+			     &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
+
+	if (!sk)
+		goto no_tcp_socket;
+
+process:
+	if (sk->sk_state == TCP_TIME_WAIT)
+		goto do_time_wait;
+
+	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
+		goto discard_and_relse;
+
+	if (sk_filter(sk, skb, 0))
+		goto discard_and_relse;
+
+	skb->dev = NULL;
+
+	bh_lock_sock(sk);
+	ret = 0;
+	if (!sock_owned_by_user(sk)) {
+		if (!tcp_prequeue(sk, skb))
+			ret = tcp_v6_do_rcv(sk, skb);
+	} else
+		sk_add_backlog(sk, skb);
+	bh_unlock_sock(sk);
+
+	sock_put(sk);
+	return ret ? -1 : 0;
+
+no_tcp_socket:
+	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
+		goto discard_it;
+
+	if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
+bad_packet:
+		TCP_INC_STATS_BH(TCP_MIB_INERRS);
+	} else {
+		tcp_v6_send_reset(skb);
+	}
+
+discard_it:
+
+	/*
+	 *	Discard frame
+	 */
+
+	kfree_skb(skb);
+	return 0;
+
+discard_and_relse:
+	sock_put(sk);
+	goto discard_it;
+
+do_time_wait:
+	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+		tcp_tw_put((struct tcp_tw_bucket *) sk);
+		goto discard_it;
+	}
+
+	if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
+		TCP_INC_STATS_BH(TCP_MIB_INERRS);
+		tcp_tw_put((struct tcp_tw_bucket *) sk);
+		goto discard_it;
+	}
+
+	switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
+					  skb, th, skb->len)) {
+	case TCP_TW_SYN:
+	{
+		struct sock *sk2;
+
+		sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
+		if (sk2 != NULL) {
+			tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
+			tcp_tw_put((struct tcp_tw_bucket *)sk);
+			sk = sk2;
+			goto process;
+		}
+		/* Fall through to ACK */
+	}
+	case TCP_TW_ACK:
+		tcp_v6_timewait_ack(sk, skb);
+		break;
+	case TCP_TW_RST:
+		goto no_tcp_socket;
+	case TCP_TW_SUCCESS:;
+	}
+	goto discard_it;
+}
+
+static int tcp_v6_rebuild_header(struct sock *sk)
+{
+	int err;
+	struct dst_entry *dst;
+	struct ipv6_pinfo *np = inet6_sk(sk);
+
+	dst = __sk_dst_check(sk, np->dst_cookie);
+
+	if (dst == NULL) {
+		struct inet_sock *inet = inet_sk(sk);
+		struct in6_addr *final_p = NULL, final;
+		struct flowi fl;
+
+		memset(&fl, 0, sizeof(fl));
+		fl.proto = IPPROTO_TCP;
+		ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+		ipv6_addr_copy(&fl.fl6_src, &np->saddr);
+		fl.fl6_flowlabel = np->flow_label;
+		fl.oif = sk->sk_bound_dev_if;
+		fl.fl_ip_dport = inet->dport;
+		fl.fl_ip_sport = inet->sport;
+
+		if (np->opt && np->opt->srcrt) {
+			struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
+			ipv6_addr_copy(&final, &fl.fl6_dst);
+			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+			final_p = &final;
+		}
+
+		err = ip6_dst_lookup(sk, &dst, &fl);
+		if (err) {
+			sk->sk_route_caps = 0;
+			return err;
+		}
+		if (final_p)
+			ipv6_addr_copy(&fl.fl6_dst, final_p);
+
+		if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
+			sk->sk_err_soft = -err;
+			dst_release(dst);
+			return err;
+		}
+
+		ip6_dst_store(sk, dst, NULL);
+		sk->sk_route_caps = dst->dev->features &
+			~(NETIF_F_IP_CSUM | NETIF_F_TSO);
+	}
+
+	return 0;
+}
+
+static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
+{
+	struct sock *sk = skb->sk;
+	struct inet_sock *inet = inet_sk(sk);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct flowi fl;
+	struct dst_entry *dst;
+	struct in6_addr *final_p = NULL, final;
+
+	memset(&fl, 0, sizeof(fl));
+	fl.proto = IPPROTO_TCP;
+	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+	ipv6_addr_copy(&fl.fl6_src, &np->saddr);
+	fl.fl6_flowlabel = np->flow_label;
+	IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
+	fl.oif = sk->sk_bound_dev_if;
+	fl.fl_ip_sport = inet->sport;
+	fl.fl_ip_dport = inet->dport;
+
+	if (np->opt && np->opt->srcrt) {
+		struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
+		ipv6_addr_copy(&final, &fl.fl6_dst);
+		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+		final_p = &final;
+	}
+
+	dst = __sk_dst_check(sk, np->dst_cookie);
+
+	if (dst == NULL) {
+		int err = ip6_dst_lookup(sk, &dst, &fl);
+
+		if (err) {
+			sk->sk_err_soft = -err;
+			return err;
+		}
+
+		if (final_p)
+			ipv6_addr_copy(&fl.fl6_dst, final_p);
+
+		if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
+			sk->sk_route_caps = 0;
+			dst_release(dst);
+			return err;
+		}
+
+		ip6_dst_store(sk, dst, NULL);
+		sk->sk_route_caps = dst->dev->features &
+			~(NETIF_F_IP_CSUM | NETIF_F_TSO);
+	}
+
+	skb->dst = dst_clone(dst);
+
+	/* Restore final destination back after routing done */
+	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+
+	return ip6_xmit(sk, skb, &fl, np->opt, 0);
+}
+
+static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
+
+	sin6->sin6_family = AF_INET6;
+	ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
+	sin6->sin6_port	= inet_sk(sk)->dport;
+	/* We do not store received flowlabel for TCP */
+	sin6->sin6_flowinfo = 0;
+	sin6->sin6_scope_id = 0;
+	if (sk->sk_bound_dev_if &&
+	    ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
+		sin6->sin6_scope_id = sk->sk_bound_dev_if;
+}
+
+static int tcp_v6_remember_stamp(struct sock *sk)
+{
+	/* Alas, not yet... */
+	return 0;
+}
+
+static struct tcp_func ipv6_specific = {
+	.queue_xmit	=	tcp_v6_xmit,
+	.send_check	=	tcp_v6_send_check,
+	.rebuild_header	=	tcp_v6_rebuild_header,
+	.conn_request	=	tcp_v6_conn_request,
+	.syn_recv_sock	=	tcp_v6_syn_recv_sock,
+	.remember_stamp	=	tcp_v6_remember_stamp,
+	.net_header_len	=	sizeof(struct ipv6hdr),
+
+	.setsockopt	=	ipv6_setsockopt,
+	.getsockopt	=	ipv6_getsockopt,
+	.addr2sockaddr	=	v6_addr2sockaddr,
+	.sockaddr_len	=	sizeof(struct sockaddr_in6)
+};
+
+/*
+ *	TCP over IPv4 via INET6 API
+ */
+
+static struct tcp_func ipv6_mapped = {
+	.queue_xmit	=	ip_queue_xmit,
+	.send_check	=	tcp_v4_send_check,
+	.rebuild_header	=	tcp_v4_rebuild_header,
+	.conn_request	=	tcp_v6_conn_request,
+	.syn_recv_sock	=	tcp_v6_syn_recv_sock,
+	.remember_stamp	=	tcp_v4_remember_stamp,
+	.net_header_len	=	sizeof(struct iphdr),
+
+	.setsockopt	=	ipv6_setsockopt,
+	.getsockopt	=	ipv6_getsockopt,
+	.addr2sockaddr	=	v6_addr2sockaddr,
+	.sockaddr_len	=	sizeof(struct sockaddr_in6)
+};
+
+
+
+/* NOTE: A lot of things set to zero explicitly by call to
+ *       sk_alloc() so need not be done here.
+ */
+static int tcp_v6_init_sock(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	skb_queue_head_init(&tp->out_of_order_queue);
+	tcp_init_xmit_timers(sk);
+	tcp_prequeue_init(tp);
+
+	tp->rto  = TCP_TIMEOUT_INIT;
+	tp->mdev = TCP_TIMEOUT_INIT;
+
+	/* So many TCP implementations out there (incorrectly) count the
+	 * initial SYN frame in their delayed-ACK and congestion control
+	 * algorithms that we must have the following bandaid to talk
+	 * efficiently to them.  -DaveM
+	 */
+	tp->snd_cwnd = 2;
+
+	/* See draft-stevens-tcpca-spec-01 for discussion of the
+	 * initialization of these values.
+	 */
+	tp->snd_ssthresh = 0x7fffffff;
+	tp->snd_cwnd_clamp = ~0;
+	tp->mss_cache_std = tp->mss_cache = 536;
+
+	tp->reordering = sysctl_tcp_reordering;
+
+	sk->sk_state = TCP_CLOSE;
+
+	tp->af_specific = &ipv6_specific;
+
+	sk->sk_write_space = sk_stream_write_space;
+	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
+
+	sk->sk_sndbuf = sysctl_tcp_wmem[1];
+	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
+
+	atomic_inc(&tcp_sockets_allocated);
+
+	return 0;
+}
+
+static int tcp_v6_destroy_sock(struct sock *sk)
+{
+	extern int tcp_v4_destroy_sock(struct sock *sk);
+
+	tcp_v4_destroy_sock(sk);
+	return inet6_destroy_sock(sk);
+}
+
+/* Proc filesystem TCPv6 sock list dumping. */
+static void get_openreq6(struct seq_file *seq, 
+			 struct sock *sk, struct open_request *req, int i, int uid)
+{
+	struct in6_addr *dest, *src;
+	int ttd = req->expires - jiffies;
+
+	if (ttd < 0)
+		ttd = 0;
+
+	src = &req->af.v6_req.loc_addr;
+	dest = &req->af.v6_req.rmt_addr;
+	seq_printf(seq,
+		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
+		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
+		   i,
+		   src->s6_addr32[0], src->s6_addr32[1],
+		   src->s6_addr32[2], src->s6_addr32[3],
+		   ntohs(inet_sk(sk)->sport),
+		   dest->s6_addr32[0], dest->s6_addr32[1],
+		   dest->s6_addr32[2], dest->s6_addr32[3],
+		   ntohs(req->rmt_port),
+		   TCP_SYN_RECV,
+		   0,0, /* could print option size, but that is af dependent. */
+		   1,   /* timers active (only the expire timer) */  
+		   jiffies_to_clock_t(ttd), 
+		   req->retrans,
+		   uid,
+		   0,  /* non standard timer */  
+		   0, /* open_requests have no inode */
+		   0, req);
+}
+
+static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
+{
+	struct in6_addr *dest, *src;
+	__u16 destp, srcp;
+	int timer_active;
+	unsigned long timer_expires;
+	struct inet_sock *inet = inet_sk(sp);
+	struct tcp_sock *tp = tcp_sk(sp);
+	struct ipv6_pinfo *np = inet6_sk(sp);
+
+	dest  = &np->daddr;
+	src   = &np->rcv_saddr;
+	destp = ntohs(inet->dport);
+	srcp  = ntohs(inet->sport);
+	if (tp->pending == TCP_TIME_RETRANS) {
+		timer_active	= 1;
+		timer_expires	= tp->timeout;
+	} else if (tp->pending == TCP_TIME_PROBE0) {
+		timer_active	= 4;
+		timer_expires	= tp->timeout;
+	} else if (timer_pending(&sp->sk_timer)) {
+		timer_active	= 2;
+		timer_expires	= sp->sk_timer.expires;
+	} else {
+		timer_active	= 0;
+		timer_expires = jiffies;
+	}
+
+	seq_printf(seq,
+		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
+		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
+		   i,
+		   src->s6_addr32[0], src->s6_addr32[1],
+		   src->s6_addr32[2], src->s6_addr32[3], srcp,
+		   dest->s6_addr32[0], dest->s6_addr32[1],
+		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
+		   sp->sk_state, 
+		   tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
+		   timer_active,
+		   jiffies_to_clock_t(timer_expires - jiffies),
+		   tp->retransmits,
+		   sock_i_uid(sp),
+		   tp->probes_out,
+		   sock_i_ino(sp),
+		   atomic_read(&sp->sk_refcnt), sp,
+		   tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
+		   tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
+		   );
+}
+
+static void get_timewait6_sock(struct seq_file *seq, 
+			       struct tcp_tw_bucket *tw, int i)
+{
+	struct in6_addr *dest, *src;
+	__u16 destp, srcp;
+	int ttd = tw->tw_ttd - jiffies;
+
+	if (ttd < 0)
+		ttd = 0;
+
+	dest  = &tw->tw_v6_daddr;
+	src   = &tw->tw_v6_rcv_saddr;
+	destp = ntohs(tw->tw_dport);
+	srcp  = ntohs(tw->tw_sport);
+
+	seq_printf(seq,
+		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
+		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
+		   i,
+		   src->s6_addr32[0], src->s6_addr32[1],
+		   src->s6_addr32[2], src->s6_addr32[3], srcp,
+		   dest->s6_addr32[0], dest->s6_addr32[1],
+		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
+		   tw->tw_substate, 0, 0,
+		   3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
+		   atomic_read(&tw->tw_refcnt), tw);
+}
+
+#ifdef CONFIG_PROC_FS
+static int tcp6_seq_show(struct seq_file *seq, void *v)
+{
+	struct tcp_iter_state *st;
+
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(seq,
+			 "  sl  "
+			 "local_address                         "
+			 "remote_address                        "
+			 "st tx_queue rx_queue tr tm->when retrnsmt"
+			 "   uid  timeout inode\n");
+		goto out;
+	}
+	st = seq->private;
+
+	switch (st->state) {
+	case TCP_SEQ_STATE_LISTENING:
+	case TCP_SEQ_STATE_ESTABLISHED:
+		get_tcp6_sock(seq, v, st->num);
+		break;
+	case TCP_SEQ_STATE_OPENREQ:
+		get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
+		break;
+	case TCP_SEQ_STATE_TIME_WAIT:
+		get_timewait6_sock(seq, v, st->num);
+		break;
+	}
+out:
+	return 0;
+}
+
+static struct file_operations tcp6_seq_fops;
+static struct tcp_seq_afinfo tcp6_seq_afinfo = {
+	.owner		= THIS_MODULE,
+	.name		= "tcp6",
+	.family		= AF_INET6,
+	.seq_show	= tcp6_seq_show,
+	.seq_fops	= &tcp6_seq_fops,
+};
+
+int __init tcp6_proc_init(void)
+{
+	return tcp_proc_register(&tcp6_seq_afinfo);
+}
+
+void tcp6_proc_exit(void)
+{
+	tcp_proc_unregister(&tcp6_seq_afinfo);
+}
+#endif
+
+struct proto tcpv6_prot = {
+	.name			= "TCPv6",
+	.owner			= THIS_MODULE,
+	.close			= tcp_close,
+	.connect		= tcp_v6_connect,
+	.disconnect		= tcp_disconnect,
+	.accept			= tcp_accept,
+	.ioctl			= tcp_ioctl,
+	.init			= tcp_v6_init_sock,
+	.destroy		= tcp_v6_destroy_sock,
+	.shutdown		= tcp_shutdown,
+	.setsockopt		= tcp_setsockopt,
+	.getsockopt		= tcp_getsockopt,
+	.sendmsg		= tcp_sendmsg,
+	.recvmsg		= tcp_recvmsg,
+	.backlog_rcv		= tcp_v6_do_rcv,
+	.hash			= tcp_v6_hash,
+	.unhash			= tcp_unhash,
+	.get_port		= tcp_v6_get_port,
+	.enter_memory_pressure	= tcp_enter_memory_pressure,
+	.sockets_allocated	= &tcp_sockets_allocated,
+	.memory_allocated	= &tcp_memory_allocated,
+	.memory_pressure	= &tcp_memory_pressure,
+	.sysctl_mem		= sysctl_tcp_mem,
+	.sysctl_wmem		= sysctl_tcp_wmem,
+	.sysctl_rmem		= sysctl_tcp_rmem,
+	.max_header		= MAX_TCP_HEADER,
+	.obj_size		= sizeof(struct tcp6_sock),
+};
+
+static struct inet6_protocol tcpv6_protocol = {
+	.handler	=	tcp_v6_rcv,
+	.err_handler	=	tcp_v6_err,
+	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
+};
+
+extern struct proto_ops inet6_stream_ops;
+
+static struct inet_protosw tcpv6_protosw = {
+	.type		=	SOCK_STREAM,
+	.protocol	=	IPPROTO_TCP,
+	.prot		=	&tcpv6_prot,
+	.ops		=	&inet6_stream_ops,
+	.capability	=	-1,
+	.no_check	=	0,
+	.flags		=	INET_PROTOSW_PERMANENT,
+};
+
+void __init tcpv6_init(void)
+{
+	/* register inet6 protocol */
+	if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
+		printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
+	inet6_register_protosw(&tcpv6_protosw);
+}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
new file mode 100644
index 00000000000..e251d0ba4f3
--- /dev/null
+++ b/net/ipv6/udp.c
@@ -0,0 +1,1075 @@
+/*
+ *	UDP over IPv6
+ *	Linux INET6 implementation 
+ *
+ *	Authors:
+ *	Pedro Roque		<roque@di.fc.ul.pt>	
+ *
+ *	Based on linux/ipv4/udp.c
+ *
+ *	$Id: udp.c,v 1.65 2002/02/01 22:01:04 davem Exp $
+ *
+ *	Fixes:
+ *	Hideaki YOSHIFUJI	:	sin6_scope_id support
+ *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
+ *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
+ *					a single port at the same time.
+ *      Kazunori MIYAZAWA @USAGI:       change process style to use ip6_append_data
+ *      YOSHIFUJI Hideaki @USAGI:	convert /proc/net/udp6 to seq_file.
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/sched.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/ipv6.h>
+#include <linux/icmpv6.h>
+#include <linux/init.h>
+#include <asm/uaccess.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+#include <net/protocol.h>
+#include <net/transp_v6.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/ip.h>
+#include <net/udp.h>
+#include <net/raw.h>
+#include <net/inet_common.h>
+
+#include <net/ip6_checksum.h>
+#include <net/xfrm.h>
+
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6);
+
+/* Grrr, addr_type already calculated by caller, but I don't want
+ * to add some silly "cookie" argument to this method just for that.
+ */
+static int udp_v6_get_port(struct sock *sk, unsigned short snum)
+{
+	struct sock *sk2;
+	struct hlist_node *node;
+
+	write_lock_bh(&udp_hash_lock);
+	if (snum == 0) {
+		int best_size_so_far, best, result, i;
+
+		if (udp_port_rover > sysctl_local_port_range[1] ||
+		    udp_port_rover < sysctl_local_port_range[0])
+			udp_port_rover = sysctl_local_port_range[0];
+		best_size_so_far = 32767;
+		best = result = udp_port_rover;
+		for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
+			int size;
+			struct hlist_head *list;
+
+			list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)];
+			if (hlist_empty(list)) {
+				if (result > sysctl_local_port_range[1])
+					result = sysctl_local_port_range[0] +
+						((result - sysctl_local_port_range[0]) &
+						 (UDP_HTABLE_SIZE - 1));
+				goto gotit;
+			}
+			size = 0;
+			sk_for_each(sk2, node, list)
+				if (++size >= best_size_so_far)
+					goto next;
+			best_size_so_far = size;
+			best = result;
+		next:;
+		}
+		result = best;
+		for(;; result += UDP_HTABLE_SIZE) {
+			if (result > sysctl_local_port_range[1])
+				result = sysctl_local_port_range[0]
+					+ ((result - sysctl_local_port_range[0]) &
+					   (UDP_HTABLE_SIZE - 1));
+			if (!udp_lport_inuse(result))
+				break;
+		}
+gotit:
+		udp_port_rover = snum = result;
+	} else {
+		sk_for_each(sk2, node,
+			    &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) {
+			if (inet_sk(sk2)->num == snum &&
+			    sk2 != sk &&
+			    (!sk2->sk_bound_dev_if ||
+			     !sk->sk_bound_dev_if ||
+			     sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
+			    (!sk2->sk_reuse || !sk->sk_reuse) &&
+			    ipv6_rcv_saddr_equal(sk, sk2))
+				goto fail;
+		}
+	}
+
+	inet_sk(sk)->num = snum;
+	if (sk_unhashed(sk)) {
+		sk_add_node(sk, &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]);
+		sock_prot_inc_use(sk->sk_prot);
+	}
+	write_unlock_bh(&udp_hash_lock);
+	return 0;
+
+fail:
+	write_unlock_bh(&udp_hash_lock);
+	return 1;
+}
+
+static void udp_v6_hash(struct sock *sk)
+{
+	BUG();
+}
+
+static void udp_v6_unhash(struct sock *sk)
+{
+ 	write_lock_bh(&udp_hash_lock);
+	if (sk_del_node_init(sk)) {
+		inet_sk(sk)->num = 0;
+		sock_prot_dec_use(sk->sk_prot);
+	}
+	write_unlock_bh(&udp_hash_lock);
+}
+
+static struct sock *udp_v6_lookup(struct in6_addr *saddr, u16 sport,
+				  struct in6_addr *daddr, u16 dport, int dif)
+{
+	struct sock *sk, *result = NULL;
+	struct hlist_node *node;
+	unsigned short hnum = ntohs(dport);
+	int badness = -1;
+
+ 	read_lock(&udp_hash_lock);
+	sk_for_each(sk, node, &udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]) {
+		struct inet_sock *inet = inet_sk(sk);
+
+		if (inet->num == hnum && sk->sk_family == PF_INET6) {
+			struct ipv6_pinfo *np = inet6_sk(sk);
+			int score = 0;
+			if (inet->dport) {
+				if (inet->dport != sport)
+					continue;
+				score++;
+			}
+			if (!ipv6_addr_any(&np->rcv_saddr)) {
+				if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
+					continue;
+				score++;
+			}
+			if (!ipv6_addr_any(&np->daddr)) {
+				if (!ipv6_addr_equal(&np->daddr, saddr))
+					continue;
+				score++;
+			}
+			if (sk->sk_bound_dev_if) {
+				if (sk->sk_bound_dev_if != dif)
+					continue;
+				score++;
+			}
+			if(score == 4) {
+				result = sk;
+				break;
+			} else if(score > badness) {
+				result = sk;
+				badness = score;
+			}
+		}
+	}
+	if (result)
+		sock_hold(result);
+ 	read_unlock(&udp_hash_lock);
+	return result;
+}
+
+/*
+ *
+ */
+
+static void udpv6_close(struct sock *sk, long timeout)
+{
+	sk_common_release(sk);
+}
+
+/*
+ * 	This should be easy, if there is something there we
+ * 	return it, otherwise we block.
+ */
+
+static int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, 
+		  struct msghdr *msg, size_t len,
+		  int noblock, int flags, int *addr_len)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct inet_sock *inet = inet_sk(sk);
+  	struct sk_buff *skb;
+	size_t copied;
+  	int err;
+
+  	if (addr_len)
+  		*addr_len=sizeof(struct sockaddr_in6);
+  
+	if (flags & MSG_ERRQUEUE)
+		return ipv6_recv_error(sk, msg, len);
+
+try_again:
+	skb = skb_recv_datagram(sk, flags, noblock, &err);
+	if (!skb)
+		goto out;
+
+ 	copied = skb->len - sizeof(struct udphdr);
+  	if (copied > len) {
+  		copied = len;
+  		msg->msg_flags |= MSG_TRUNC;
+  	}
+
+	if (skb->ip_summed==CHECKSUM_UNNECESSARY) {
+		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
+					      copied);
+	} else if (msg->msg_flags&MSG_TRUNC) {
+		if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)))
+			goto csum_copy_err;
+		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
+					      copied);
+	} else {
+		err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
+		if (err == -EINVAL)
+			goto csum_copy_err;
+	}
+	if (err)
+		goto out_free;
+
+	sock_recv_timestamp(msg, sk, skb);
+
+	/* Copy the address. */
+	if (msg->msg_name) {
+		struct sockaddr_in6 *sin6;
+	  
+		sin6 = (struct sockaddr_in6 *) msg->msg_name;
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_port = skb->h.uh->source;
+		sin6->sin6_flowinfo = 0;
+		sin6->sin6_scope_id = 0;
+
+		if (skb->protocol == htons(ETH_P_IP))
+			ipv6_addr_set(&sin6->sin6_addr, 0, 0,
+				      htonl(0xffff), skb->nh.iph->saddr);
+		else {
+			ipv6_addr_copy(&sin6->sin6_addr, &skb->nh.ipv6h->saddr);
+			if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
+				sin6->sin6_scope_id = IP6CB(skb)->iif;
+		}
+
+	}
+	if (skb->protocol == htons(ETH_P_IP)) {
+		if (inet->cmsg_flags)
+			ip_cmsg_recv(msg, skb);
+	} else {
+		if (np->rxopt.all)
+			datagram_recv_ctl(sk, msg, skb);
+  	}
+
+	err = copied;
+	if (flags & MSG_TRUNC)
+		err = skb->len - sizeof(struct udphdr);
+
+out_free:
+	skb_free_datagram(sk, skb);
+out:
+	return err;
+
+csum_copy_err:
+	/* Clear queue. */
+	if (flags&MSG_PEEK) {
+		int clear = 0;
+		spin_lock_irq(&sk->sk_receive_queue.lock);
+		if (skb == skb_peek(&sk->sk_receive_queue)) {
+			__skb_unlink(skb, &sk->sk_receive_queue);
+			clear = 1;
+		}
+		spin_unlock_irq(&sk->sk_receive_queue.lock);
+		if (clear)
+			kfree_skb(skb);
+	}
+
+	skb_free_datagram(sk, skb);
+
+	if (flags & MSG_DONTWAIT) {
+		UDP6_INC_STATS_USER(UDP_MIB_INERRORS);
+		return -EAGAIN;
+	}
+	goto try_again;
+}
+
+static void udpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+	       int type, int code, int offset, __u32 info)
+{
+	struct ipv6_pinfo *np;
+	struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
+	struct net_device *dev = skb->dev;
+	struct in6_addr *saddr = &hdr->saddr;
+	struct in6_addr *daddr = &hdr->daddr;
+	struct udphdr *uh = (struct udphdr*)(skb->data+offset);
+	struct sock *sk;
+	int err;
+
+	sk = udp_v6_lookup(daddr, uh->dest, saddr, uh->source, dev->ifindex);
+   
+	if (sk == NULL)
+		return;
+
+	np = inet6_sk(sk);
+
+	if (!icmpv6_err_convert(type, code, &err) && !np->recverr)
+		goto out;
+
+	if (sk->sk_state != TCP_ESTABLISHED && !np->recverr)
+		goto out;
+
+	if (np->recverr)
+		ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1));
+
+	sk->sk_err = err;
+	sk->sk_error_report(sk);
+out:
+	sock_put(sk);
+}
+
+static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
+{
+	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
+		kfree_skb(skb);
+		return -1;
+	}
+
+	if (skb->ip_summed != CHECKSUM_UNNECESSARY) {
+		if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) {
+			UDP6_INC_STATS_BH(UDP_MIB_INERRORS);
+			kfree_skb(skb);
+			return 0;
+		}
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+
+	if (sock_queue_rcv_skb(sk,skb)<0) {
+		UDP6_INC_STATS_BH(UDP_MIB_INERRORS);
+		kfree_skb(skb);
+		return 0;
+	}
+	UDP6_INC_STATS_BH(UDP_MIB_INDATAGRAMS);
+	return 0;
+}
+
+static struct sock *udp_v6_mcast_next(struct sock *sk,
+				      u16 loc_port, struct in6_addr *loc_addr,
+				      u16 rmt_port, struct in6_addr *rmt_addr,
+				      int dif)
+{
+	struct hlist_node *node;
+	struct sock *s = sk;
+	unsigned short num = ntohs(loc_port);
+
+	sk_for_each_from(s, node) {
+		struct inet_sock *inet = inet_sk(s);
+
+		if (inet->num == num && s->sk_family == PF_INET6) {
+			struct ipv6_pinfo *np = inet6_sk(s);
+			if (inet->dport) {
+				if (inet->dport != rmt_port)
+					continue;
+			}
+			if (!ipv6_addr_any(&np->daddr) &&
+			    !ipv6_addr_equal(&np->daddr, rmt_addr))
+				continue;
+
+			if (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)
+				continue;
+
+			if (!ipv6_addr_any(&np->rcv_saddr)) {
+				if (ipv6_addr_equal(&np->rcv_saddr, loc_addr))
+					return s;
+				continue;
+			}
+			if(!inet6_mc_check(s, loc_addr, rmt_addr))
+				continue;
+			return s;
+		}
+	}
+	return NULL;
+}
+
+/*
+ * Note: called only from the BH handler context,
+ * so we don't need to lock the hashes.
+ */
+static void udpv6_mcast_deliver(struct udphdr *uh,
+				struct in6_addr *saddr, struct in6_addr *daddr,
+				struct sk_buff *skb)
+{
+	struct sock *sk, *sk2;
+	int dif;
+
+	read_lock(&udp_hash_lock);
+	sk = sk_head(&udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
+	dif = skb->dev->ifindex;
+	sk = udp_v6_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
+	if (!sk) {
+		kfree_skb(skb);
+		goto out;
+	}
+
+	sk2 = sk;
+	while ((sk2 = udp_v6_mcast_next(sk_next(sk2), uh->dest, daddr,
+					uh->source, saddr, dif))) {
+		struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC);
+		if (buff)
+			udpv6_queue_rcv_skb(sk2, buff);
+	}
+	udpv6_queue_rcv_skb(sk, skb);
+out:
+	read_unlock(&udp_hash_lock);
+}
+
+static int udpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+{
+	struct sk_buff *skb = *pskb;
+	struct sock *sk;
+  	struct udphdr *uh;
+	struct net_device *dev = skb->dev;
+	struct in6_addr *saddr, *daddr;
+	u32 ulen = 0;
+
+	if (!pskb_may_pull(skb, sizeof(struct udphdr)))
+		goto short_packet;
+
+	saddr = &skb->nh.ipv6h->saddr;
+	daddr = &skb->nh.ipv6h->daddr;
+	uh = skb->h.uh;
+
+	ulen = ntohs(uh->len);
+
+	/* Check for jumbo payload */
+	if (ulen == 0)
+		ulen = skb->len;
+
+	if (ulen > skb->len || ulen < sizeof(*uh))
+		goto short_packet;
+
+	if (uh->check == 0) {
+		/* RFC 2460 section 8.1 says that we SHOULD log
+		   this error. Well, it is reasonable.
+		 */
+		LIMIT_NETDEBUG(
+			printk(KERN_INFO "IPv6: udp checksum is 0\n"));
+		goto discard;
+	}
+
+	if (ulen < skb->len) {
+		if (__pskb_trim(skb, ulen))
+			goto discard;
+		saddr = &skb->nh.ipv6h->saddr;
+		daddr = &skb->nh.ipv6h->daddr;
+		uh = skb->h.uh;
+	}
+
+	if (skb->ip_summed==CHECKSUM_HW) {
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		if (csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum)) {
+			LIMIT_NETDEBUG(printk(KERN_DEBUG "udp v6 hw csum failure.\n"));
+			skb->ip_summed = CHECKSUM_NONE;
+		}
+	}
+	if (skb->ip_summed != CHECKSUM_UNNECESSARY)
+		skb->csum = ~csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, 0);
+
+	/* 
+	 *	Multicast receive code 
+	 */
+	if (ipv6_addr_is_multicast(daddr)) {
+		udpv6_mcast_deliver(uh, saddr, daddr, skb);
+		return 0;
+	}
+
+	/* Unicast */
+
+	/* 
+	 * check socket cache ... must talk to Alan about his plans
+	 * for sock caches... i'll skip this for now.
+	 */
+	sk = udp_v6_lookup(saddr, uh->source, daddr, uh->dest, dev->ifindex);
+
+	if (sk == NULL) {
+		if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
+			goto discard;
+
+		if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
+		    (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)))
+			goto discard;
+		UDP6_INC_STATS_BH(UDP_MIB_NOPORTS);
+
+		icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, dev);
+
+		kfree_skb(skb);
+		return(0);
+	}
+	
+	/* deliver */
+	
+	udpv6_queue_rcv_skb(sk, skb);
+	sock_put(sk);
+	return(0);
+
+short_packet:	
+	if (net_ratelimit())
+		printk(KERN_DEBUG "UDP: short packet: %d/%u\n", ulen, skb->len);
+
+discard:
+	UDP6_INC_STATS_BH(UDP_MIB_INERRORS);
+	kfree_skb(skb);
+	return(0);	
+}
+/*
+ * Throw away all pending data and cancel the corking. Socket is locked.
+ */
+static void udp_v6_flush_pending_frames(struct sock *sk)
+{
+	struct udp_sock *up = udp_sk(sk);
+
+	if (up->pending) {
+		up->len = 0;
+		up->pending = 0;
+		ip6_flush_pending_frames(sk);
+        }
+}
+
+/*
+ *	Sending
+ */
+
+static int udp_v6_push_pending_frames(struct sock *sk, struct udp_sock *up)
+{
+	struct sk_buff *skb;
+	struct udphdr *uh;
+	struct inet_sock *inet = inet_sk(sk);
+	struct flowi *fl = &inet->cork.fl;
+	int err = 0;
+
+	/* Grab the skbuff where UDP header space exists. */
+	if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
+		goto out;
+
+	/*
+	 * Create a UDP header
+	 */
+	uh = skb->h.uh;
+	uh->source = fl->fl_ip_sport;
+	uh->dest = fl->fl_ip_dport;
+	uh->len = htons(up->len);
+	uh->check = 0;
+
+	if (sk->sk_no_check == UDP_CSUM_NOXMIT) {
+		skb->ip_summed = CHECKSUM_NONE;
+		goto send;
+	}
+
+	if (skb_queue_len(&sk->sk_write_queue) == 1) {
+		skb->csum = csum_partial((char *)uh,
+				sizeof(struct udphdr), skb->csum);
+		uh->check = csum_ipv6_magic(&fl->fl6_src,
+					    &fl->fl6_dst,
+					    up->len, fl->proto, skb->csum);
+	} else {
+		u32 tmp_csum = 0;
+
+		skb_queue_walk(&sk->sk_write_queue, skb) {
+			tmp_csum = csum_add(tmp_csum, skb->csum);
+		}
+		tmp_csum = csum_partial((char *)uh,
+				sizeof(struct udphdr), tmp_csum);
+                tmp_csum = csum_ipv6_magic(&fl->fl6_src,
+					   &fl->fl6_dst,
+					   up->len, fl->proto, tmp_csum);
+                uh->check = tmp_csum;
+
+	}
+	if (uh->check == 0)
+		uh->check = -1;
+
+send:
+	err = ip6_push_pending_frames(sk);
+out:
+	up->len = 0;
+	up->pending = 0;
+	return err;
+}
+
+static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, 
+		  struct msghdr *msg, size_t len)
+{
+	struct ipv6_txoptions opt_space;
+	struct udp_sock *up = udp_sk(sk);
+	struct inet_sock *inet = inet_sk(sk);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name;
+	struct in6_addr *daddr, *final_p = NULL, final;
+	struct ipv6_txoptions *opt = NULL;
+	struct ip6_flowlabel *flowlabel = NULL;
+	struct flowi *fl = &inet->cork.fl;
+	struct dst_entry *dst;
+	int addr_len = msg->msg_namelen;
+	int ulen = len;
+	int hlimit = -1;
+	int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
+	int err;
+
+	/* destination address check */
+	if (sin6) {
+		if (addr_len < offsetof(struct sockaddr, sa_data))
+			return -EINVAL;
+
+		switch (sin6->sin6_family) {
+		case AF_INET6:
+			if (addr_len < SIN6_LEN_RFC2133)
+				return -EINVAL;
+			daddr = &sin6->sin6_addr;
+			break;
+		case AF_INET:
+			goto do_udp_sendmsg;
+		case AF_UNSPEC:
+			msg->msg_name = sin6 = NULL;
+			msg->msg_namelen = addr_len = 0;
+			daddr = NULL;
+			break;
+		default:
+			return -EINVAL;
+		}
+	} else if (!up->pending) {
+		if (sk->sk_state != TCP_ESTABLISHED)
+			return -EDESTADDRREQ;
+		daddr = &np->daddr;
+	} else 
+		daddr = NULL;
+
+	if (daddr) {
+		if (ipv6_addr_type(daddr) == IPV6_ADDR_MAPPED) {
+			struct sockaddr_in sin;
+			sin.sin_family = AF_INET;
+			sin.sin_port = sin6 ? sin6->sin6_port : inet->dport;
+			sin.sin_addr.s_addr = daddr->s6_addr32[3];
+			msg->msg_name = &sin;
+			msg->msg_namelen = sizeof(sin);
+do_udp_sendmsg:
+			if (__ipv6_only_sock(sk))
+				return -ENETUNREACH;
+			return udp_sendmsg(iocb, sk, msg, len);
+		}
+	}
+
+	if (up->pending == AF_INET)
+		return udp_sendmsg(iocb, sk, msg, len);
+
+	/* Rough check on arithmetic overflow,
+	   better check is made in ip6_build_xmit
+	   */
+	if (len > INT_MAX - sizeof(struct udphdr))
+		return -EMSGSIZE;
+	
+	if (up->pending) {
+		/*
+		 * There are pending frames.
+		 * The socket lock must be held while it's corked.
+		 */
+		lock_sock(sk);
+		if (likely(up->pending)) {
+			if (unlikely(up->pending != AF_INET6)) {
+				release_sock(sk);
+				return -EAFNOSUPPORT;
+			}
+			dst = NULL;
+			goto do_append_data;
+		}
+		release_sock(sk);
+	}
+	ulen += sizeof(struct udphdr);
+
+	memset(fl, 0, sizeof(*fl));
+
+	if (sin6) {
+		if (sin6->sin6_port == 0)
+			return -EINVAL;
+
+		fl->fl_ip_dport = sin6->sin6_port;
+		daddr = &sin6->sin6_addr;
+
+		if (np->sndflow) {
+			fl->fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
+			if (fl->fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
+				flowlabel = fl6_sock_lookup(sk, fl->fl6_flowlabel);
+				if (flowlabel == NULL)
+					return -EINVAL;
+				daddr = &flowlabel->dst;
+			}
+		}
+
+		/*
+		 * Otherwise it will be difficult to maintain
+		 * sk->sk_dst_cache.
+		 */
+		if (sk->sk_state == TCP_ESTABLISHED &&
+		    ipv6_addr_equal(daddr, &np->daddr))
+			daddr = &np->daddr;
+
+		if (addr_len >= sizeof(struct sockaddr_in6) &&
+		    sin6->sin6_scope_id &&
+		    ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL)
+			fl->oif = sin6->sin6_scope_id;
+	} else {
+		if (sk->sk_state != TCP_ESTABLISHED)
+			return -EDESTADDRREQ;
+
+		fl->fl_ip_dport = inet->dport;
+		daddr = &np->daddr;
+		fl->fl6_flowlabel = np->flow_label;
+	}
+
+	if (!fl->oif)
+		fl->oif = sk->sk_bound_dev_if;
+
+	if (msg->msg_controllen) {
+		opt = &opt_space;
+		memset(opt, 0, sizeof(struct ipv6_txoptions));
+		opt->tot_len = sizeof(*opt);
+
+		err = datagram_send_ctl(msg, fl, opt, &hlimit);
+		if (err < 0) {
+			fl6_sock_release(flowlabel);
+			return err;
+		}
+		if ((fl->fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
+			flowlabel = fl6_sock_lookup(sk, fl->fl6_flowlabel);
+			if (flowlabel == NULL)
+				return -EINVAL;
+		}
+		if (!(opt->opt_nflen|opt->opt_flen))
+			opt = NULL;
+	}
+	if (opt == NULL)
+		opt = np->opt;
+	if (flowlabel)
+		opt = fl6_merge_options(&opt_space, flowlabel, opt);
+
+	fl->proto = IPPROTO_UDP;
+	ipv6_addr_copy(&fl->fl6_dst, daddr);
+	if (ipv6_addr_any(&fl->fl6_src) && !ipv6_addr_any(&np->saddr))
+		ipv6_addr_copy(&fl->fl6_src, &np->saddr);
+	fl->fl_ip_sport = inet->sport;
+	
+	/* merge ip6_build_xmit from ip6_output */
+	if (opt && opt->srcrt) {
+		struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
+		ipv6_addr_copy(&final, &fl->fl6_dst);
+		ipv6_addr_copy(&fl->fl6_dst, rt0->addr);
+		final_p = &final;
+	}
+
+	if (!fl->oif && ipv6_addr_is_multicast(&fl->fl6_dst))
+		fl->oif = np->mcast_oif;
+
+	err = ip6_dst_lookup(sk, &dst, fl);
+	if (err)
+		goto out;
+	if (final_p)
+		ipv6_addr_copy(&fl->fl6_dst, final_p);
+
+	if ((err = xfrm_lookup(&dst, fl, sk, 0)) < 0) {
+		dst_release(dst);
+		goto out;
+	}
+
+	if (hlimit < 0) {
+		if (ipv6_addr_is_multicast(&fl->fl6_dst))
+			hlimit = np->mcast_hops;
+		else
+			hlimit = np->hop_limit;
+		if (hlimit < 0)
+			hlimit = dst_metric(dst, RTAX_HOPLIMIT);
+		if (hlimit < 0)
+			hlimit = ipv6_get_hoplimit(dst->dev);
+	}
+
+	if (msg->msg_flags&MSG_CONFIRM)
+		goto do_confirm;
+back_from_confirm:
+
+	lock_sock(sk);
+	if (unlikely(up->pending)) {
+		/* The socket is already corked while preparing it. */
+		/* ... which is an evident application bug. --ANK */
+		release_sock(sk);
+
+		LIMIT_NETDEBUG(printk(KERN_DEBUG "udp cork app bug 2\n"));
+		err = -EINVAL;
+		goto out;
+	}
+
+	up->pending = AF_INET6;
+
+do_append_data:
+	up->len += ulen;
+	err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen, sizeof(struct udphdr),
+			      hlimit, opt, fl, (struct rt6_info*)dst,
+			      corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
+	if (err)
+		udp_v6_flush_pending_frames(sk);
+	else if (!corkreq)
+		err = udp_v6_push_pending_frames(sk, up);
+
+	if (dst)
+		ip6_dst_store(sk, dst,
+			      ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ?
+			      &np->daddr : NULL);
+	if (err > 0)
+		err = np->recverr ? net_xmit_errno(err) : 0;
+	release_sock(sk);
+out:
+	fl6_sock_release(flowlabel);
+	if (!err) {
+		UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS);
+		return len;
+	}
+	return err;
+
+do_confirm:
+	dst_confirm(dst);
+	if (!(msg->msg_flags&MSG_PROBE) || len)
+		goto back_from_confirm;
+	err = 0;
+	goto out;
+}
+
+static int udpv6_destroy_sock(struct sock *sk)
+{
+	lock_sock(sk);
+	udp_v6_flush_pending_frames(sk);
+	release_sock(sk);
+
+	inet6_destroy_sock(sk);
+
+	return 0;
+}
+
+/*
+ *	Socket option code for UDP
+ */
+static int udpv6_setsockopt(struct sock *sk, int level, int optname, 
+			  char __user *optval, int optlen)
+{
+	struct udp_sock *up = udp_sk(sk);
+	int val;
+	int err = 0;
+
+	if (level != SOL_UDP)
+		return ipv6_setsockopt(sk, level, optname, optval, optlen);
+
+	if(optlen<sizeof(int))
+		return -EINVAL;
+
+	if (get_user(val, (int __user *)optval))
+		return -EFAULT;
+
+	switch(optname) {
+	case UDP_CORK:
+		if (val != 0) {
+			up->corkflag = 1;
+		} else {
+			up->corkflag = 0;
+			lock_sock(sk);
+			udp_v6_push_pending_frames(sk, up);
+			release_sock(sk);
+		}
+		break;
+		
+	case UDP_ENCAP:
+		switch (val) {
+		case 0:
+			up->encap_type = val;
+			break;
+		default:
+			err = -ENOPROTOOPT;
+			break;
+		}
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	};
+
+	return err;
+}
+
+static int udpv6_getsockopt(struct sock *sk, int level, int optname, 
+			  char __user *optval, int __user *optlen)
+{
+	struct udp_sock *up = udp_sk(sk);
+	int val, len;
+
+	if (level != SOL_UDP)
+		return ipv6_getsockopt(sk, level, optname, optval, optlen);
+
+	if(get_user(len,optlen))
+		return -EFAULT;
+
+	len = min_t(unsigned int, len, sizeof(int));
+	
+	if(len < 0)
+		return -EINVAL;
+
+	switch(optname) {
+	case UDP_CORK:
+		val = up->corkflag;
+		break;
+
+	case UDP_ENCAP:
+		val = up->encap_type;
+		break;
+
+	default:
+		return -ENOPROTOOPT;
+	};
+
+  	if(put_user(len, optlen))
+  		return -EFAULT;
+	if(copy_to_user(optval, &val,len))
+		return -EFAULT;
+  	return 0;
+}
+
+static struct inet6_protocol udpv6_protocol = {
+	.handler	=	udpv6_rcv,
+	.err_handler	=	udpv6_err,
+	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
+};
+
+/* ------------------------------------------------------------------------ */
+#ifdef CONFIG_PROC_FS
+
+static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket)
+{
+	struct inet_sock *inet = inet_sk(sp);
+	struct ipv6_pinfo *np = inet6_sk(sp);
+	struct in6_addr *dest, *src;
+	__u16 destp, srcp;
+
+	dest  = &np->daddr;
+	src   = &np->rcv_saddr;
+	destp = ntohs(inet->dport);
+	srcp  = ntohs(inet->sport);
+	seq_printf(seq,
+		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
+		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p\n",
+		   bucket,
+		   src->s6_addr32[0], src->s6_addr32[1],
+		   src->s6_addr32[2], src->s6_addr32[3], srcp,
+		   dest->s6_addr32[0], dest->s6_addr32[1],
+		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
+		   sp->sk_state, 
+		   atomic_read(&sp->sk_wmem_alloc),
+		   atomic_read(&sp->sk_rmem_alloc),
+		   0, 0L, 0,
+		   sock_i_uid(sp), 0,
+		   sock_i_ino(sp),
+		   atomic_read(&sp->sk_refcnt), sp);
+}
+
+static int udp6_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN)
+		seq_printf(seq,
+			   "  sl  "
+			   "local_address                         "
+			   "remote_address                        "
+			   "st tx_queue rx_queue tr tm->when retrnsmt"
+			   "   uid  timeout inode\n");
+	else
+		udp6_sock_seq_show(seq, v, ((struct udp_iter_state *)seq->private)->bucket);
+	return 0;
+}
+
+static struct file_operations udp6_seq_fops;
+static struct udp_seq_afinfo udp6_seq_afinfo = {
+	.owner		= THIS_MODULE,
+	.name		= "udp6",
+	.family		= AF_INET6,
+	.seq_show	= udp6_seq_show,
+	.seq_fops	= &udp6_seq_fops,
+};
+
+int __init udp6_proc_init(void)
+{
+	return udp_proc_register(&udp6_seq_afinfo);
+}
+
+void udp6_proc_exit(void) {
+	udp_proc_unregister(&udp6_seq_afinfo);
+}
+#endif /* CONFIG_PROC_FS */
+
+/* ------------------------------------------------------------------------ */
+
+struct proto udpv6_prot = {
+	.name =		"UDPv6",
+	.owner =	THIS_MODULE,
+	.close =	udpv6_close,
+	.connect =	ip6_datagram_connect,
+	.disconnect =	udp_disconnect,
+	.ioctl =	udp_ioctl,
+	.destroy =	udpv6_destroy_sock,
+	.setsockopt =	udpv6_setsockopt,
+	.getsockopt =	udpv6_getsockopt,
+	.sendmsg =	udpv6_sendmsg,
+	.recvmsg =	udpv6_recvmsg,
+	.backlog_rcv =	udpv6_queue_rcv_skb,
+	.hash =		udp_v6_hash,
+	.unhash =	udp_v6_unhash,
+	.get_port =	udp_v6_get_port,
+	.obj_size =	sizeof(struct udp6_sock),
+};
+
+extern struct proto_ops inet6_dgram_ops;
+
+static struct inet_protosw udpv6_protosw = {
+	.type =      SOCK_DGRAM,
+	.protocol =  IPPROTO_UDP,
+	.prot =      &udpv6_prot,
+	.ops =       &inet6_dgram_ops,
+	.capability =-1,
+	.no_check =  UDP_CSUM_DEFAULT,
+	.flags =     INET_PROTOSW_PERMANENT,
+};
+
+
+void __init udpv6_init(void)
+{
+	if (inet6_add_protocol(&udpv6_protocol, IPPROTO_UDP) < 0)
+		printk(KERN_ERR "udpv6_init: Could not register protocol\n");
+	inet6_register_protosw(&udpv6_protosw);
+}
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
new file mode 100644
index 00000000000..28c29d78338
--- /dev/null
+++ b/net/ipv6/xfrm6_input.c
@@ -0,0 +1,150 @@
+/*
+ * xfrm6_input.c: based on net/ipv4/xfrm4_input.c
+ *
+ * Authors:
+ *	Mitsuru KANDA @USAGI
+ * 	Kazunori MIYAZAWA @USAGI
+ * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
+ *	YOSHIFUJI Hideaki @USAGI
+ *		IPv6 support
+ */
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <net/dsfield.h>
+#include <net/inet_ecn.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/xfrm.h>
+
+static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
+{
+	struct ipv6hdr *outer_iph = skb->nh.ipv6h;
+	struct ipv6hdr *inner_iph = skb->h.ipv6h;
+
+	if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph)))
+		IP6_ECN_set_ce(inner_iph);
+}
+
+int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi)
+{
+	struct sk_buff *skb = *pskb;
+	int err;
+	u32 seq;
+	struct sec_decap_state xfrm_vec[XFRM_MAX_DEPTH];
+	struct xfrm_state *x;
+	int xfrm_nr = 0;
+	int decaps = 0;
+	int nexthdr;
+	unsigned int nhoff;
+
+	nhoff = *nhoffp;
+	nexthdr = skb->nh.raw[nhoff];
+
+	seq = 0;
+	if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0)
+		goto drop;
+	
+	do {
+		struct ipv6hdr *iph = skb->nh.ipv6h;
+
+		if (xfrm_nr == XFRM_MAX_DEPTH)
+			goto drop;
+
+		x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi, nexthdr, AF_INET6);
+		if (x == NULL)
+			goto drop;
+		spin_lock(&x->lock);
+		if (unlikely(x->km.state != XFRM_STATE_VALID))
+			goto drop_unlock;
+
+		if (x->props.replay_window && xfrm_replay_check(x, seq))
+			goto drop_unlock;
+
+		if (xfrm_state_check_expire(x))
+			goto drop_unlock;
+
+		nexthdr = x->type->input(x, &(xfrm_vec[xfrm_nr].decap), skb);
+		if (nexthdr <= 0)
+			goto drop_unlock;
+
+		skb->nh.raw[nhoff] = nexthdr;
+
+		if (x->props.replay_window)
+			xfrm_replay_advance(x, seq);
+
+		x->curlft.bytes += skb->len;
+		x->curlft.packets++;
+
+		spin_unlock(&x->lock);
+
+		xfrm_vec[xfrm_nr++].xvec = x;
+
+		if (x->props.mode) { /* XXX */
+			if (nexthdr != IPPROTO_IPV6)
+				goto drop;
+			if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+				goto drop;
+			if (skb_cloned(skb) &&
+			    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+				goto drop;
+			if (x->props.flags & XFRM_STATE_DECAP_DSCP)
+				ipv6_copy_dscp(skb->nh.ipv6h, skb->h.ipv6h);
+			if (!(x->props.flags & XFRM_STATE_NOECN))
+				ipip6_ecn_decapsulate(skb);
+			skb->mac.raw = memmove(skb->data - skb->mac_len,
+					       skb->mac.raw, skb->mac_len);
+			skb->nh.raw = skb->data;
+			decaps = 1;
+			break;
+		}
+
+		if ((err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) < 0)
+			goto drop;
+	} while (!err);
+
+	/* Allocate new secpath or COW existing one. */
+	if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
+		struct sec_path *sp;
+		sp = secpath_dup(skb->sp);
+		if (!sp)
+			goto drop;
+		if (skb->sp)
+			secpath_put(skb->sp);
+		skb->sp = sp;
+	}
+
+	if (xfrm_nr + skb->sp->len > XFRM_MAX_DEPTH)
+		goto drop;
+
+	memcpy(skb->sp->x+skb->sp->len, xfrm_vec, xfrm_nr*sizeof(struct sec_decap_state));
+	skb->sp->len += xfrm_nr;
+	skb->ip_summed = CHECKSUM_NONE;
+
+	if (decaps) {
+		if (!(skb->dev->flags&IFF_LOOPBACK)) {
+			dst_release(skb->dst);
+			skb->dst = NULL;
+		}
+		netif_rx(skb);
+		return -1;
+	} else {
+		return 1;
+	}
+
+drop_unlock:
+	spin_unlock(&x->lock);
+	xfrm_state_put(x);
+drop:
+	while (--xfrm_nr >= 0)
+		xfrm_state_put(xfrm_vec[xfrm_nr].xvec);
+	kfree_skb(skb);
+	return -1;
+}
+
+EXPORT_SYMBOL(xfrm6_rcv_spi);
+
+int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+{
+	return xfrm6_rcv_spi(pskb, nhoffp, 0);
+}
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
new file mode 100644
index 00000000000..601a148f60f
--- /dev/null
+++ b/net/ipv6/xfrm6_output.c
@@ -0,0 +1,143 @@
+/*
+ * xfrm6_output.c - Common IPsec encapsulation code for IPv6.
+ * Copyright (C) 2002 USAGI/WIDE Project
+ * Copyright (c) 2004 Herbert Xu <herbert@gondor.apana.org.au>
+ * 
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/icmpv6.h>
+#include <net/dsfield.h>
+#include <net/inet_ecn.h>
+#include <net/ipv6.h>
+#include <net/xfrm.h>
+
+/* Add encapsulation header.
+ *
+ * In transport mode, the IP header and mutable extension headers will be moved
+ * forward to make space for the encapsulation header.
+ *
+ * In tunnel mode, the top IP header will be constructed per RFC 2401.
+ * The following fields in it shall be filled in by x->type->output:
+ *	payload_len
+ *
+ * On exit, skb->h will be set to the start of the encapsulation header to be
+ * filled in by x->type->output and skb->nh will be set to the nextheader field
+ * of the extension header directly preceding the encapsulation header, or in
+ * its absence, that of the top IP header.  The value of skb->data will always
+ * point to the top IP header.
+ */
+static void xfrm6_encap(struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb->dst;
+	struct xfrm_state *x = dst->xfrm;
+	struct ipv6hdr *iph, *top_iph;
+	int dsfield;
+
+	skb_push(skb, x->props.header_len);
+	iph = skb->nh.ipv6h;
+
+	if (!x->props.mode) {
+		u8 *prevhdr;
+		int hdr_len;
+
+		hdr_len = ip6_find_1stfragopt(skb, &prevhdr);
+		skb->nh.raw = prevhdr - x->props.header_len;
+		skb->h.raw = skb->data + hdr_len;
+		memmove(skb->data, iph, hdr_len);
+		return;
+	}
+
+	skb->nh.raw = skb->data;
+	top_iph = skb->nh.ipv6h;
+	skb->nh.raw = &top_iph->nexthdr;
+	skb->h.ipv6h = top_iph + 1;
+
+	top_iph->version = 6;
+	top_iph->priority = iph->priority;
+	top_iph->flow_lbl[0] = iph->flow_lbl[0];
+	top_iph->flow_lbl[1] = iph->flow_lbl[1];
+	top_iph->flow_lbl[2] = iph->flow_lbl[2];
+	dsfield = ipv6_get_dsfield(top_iph);
+	dsfield = INET_ECN_encapsulate(dsfield, dsfield);
+	if (x->props.flags & XFRM_STATE_NOECN)
+		dsfield &= ~INET_ECN_MASK;
+	ipv6_change_dsfield(top_iph, 0, dsfield);
+	top_iph->nexthdr = IPPROTO_IPV6; 
+	top_iph->hop_limit = dst_metric(dst->child, RTAX_HOPLIMIT);
+	ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr);
+	ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr);
+}
+
+static int xfrm6_tunnel_check_size(struct sk_buff *skb)
+{
+	int mtu, ret = 0;
+	struct dst_entry *dst = skb->dst;
+
+	mtu = dst_mtu(dst);
+	if (mtu < IPV6_MIN_MTU)
+		mtu = IPV6_MIN_MTU;
+
+	if (skb->len > mtu) {
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		ret = -EMSGSIZE;
+	}
+
+	return ret;
+}
+
+int xfrm6_output(struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb->dst;
+	struct xfrm_state *x = dst->xfrm;
+	int err;
+	
+	if (skb->ip_summed == CHECKSUM_HW) {
+		err = skb_checksum_help(skb, 0);
+		if (err)
+			goto error_nolock;
+	}
+
+	if (x->props.mode) {
+		err = xfrm6_tunnel_check_size(skb);
+		if (err)
+			goto error_nolock;
+	}
+
+	spin_lock_bh(&x->lock);
+	err = xfrm_state_check(x, skb);
+	if (err)
+		goto error;
+
+	xfrm6_encap(skb);
+
+	err = x->type->output(x, skb);
+	if (err)
+		goto error;
+
+	x->curlft.bytes += skb->len;
+	x->curlft.packets++;
+
+	spin_unlock_bh(&x->lock);
+
+	skb->nh.raw = skb->data;
+	
+	if (!(skb->dst = dst_pop(dst))) {
+		err = -EHOSTUNREACH;
+		goto error_nolock;
+	}
+	err = NET_XMIT_BYPASS;
+
+out_exit:
+	return err;
+error:
+	spin_unlock_bh(&x->lock);
+error_nolock:
+	kfree_skb(skb);
+	goto out_exit;
+}
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
new file mode 100644
index 00000000000..8a4f37de4d2
--- /dev/null
+++ b/net/ipv6/xfrm6_policy.c
@@ -0,0 +1,295 @@
+/*
+ * xfrm6_policy.c: based on xfrm4_policy.c
+ *
+ * Authors:
+ *	Mitsuru KANDA @USAGI
+ * 	Kazunori MIYAZAWA @USAGI
+ * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
+ * 		IPv6 support
+ * 	YOSHIFUJI Hideaki
+ * 		Split up af-specific portion
+ * 
+ */
+
+#include <linux/config.h>
+#include <net/xfrm.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+
+static struct dst_ops xfrm6_dst_ops;
+static struct xfrm_policy_afinfo xfrm6_policy_afinfo;
+
+static struct xfrm_type_map xfrm6_type_map = { .lock = RW_LOCK_UNLOCKED };
+
+static int xfrm6_dst_lookup(struct xfrm_dst **dst, struct flowi *fl)
+{
+	int err = 0;
+	*dst = (struct xfrm_dst*)ip6_route_output(NULL, fl);
+	if (!*dst)
+		err = -ENETUNREACH;
+	return err;
+}
+
+static struct dst_entry *
+__xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
+{
+	struct dst_entry *dst;
+
+	/* Still not clear if we should set fl->fl6_{src,dst}... */
+	read_lock_bh(&policy->lock);
+	for (dst = policy->bundles; dst; dst = dst->next) {
+		struct xfrm_dst *xdst = (struct xfrm_dst*)dst;
+		struct in6_addr fl_dst_prefix, fl_src_prefix;
+
+		ipv6_addr_prefix(&fl_dst_prefix,
+				 &fl->fl6_dst,
+				 xdst->u.rt6.rt6i_dst.plen);
+		ipv6_addr_prefix(&fl_src_prefix,
+				 &fl->fl6_src,
+				 xdst->u.rt6.rt6i_src.plen);
+		if (ipv6_addr_equal(&xdst->u.rt6.rt6i_dst.addr, &fl_dst_prefix) &&
+		    ipv6_addr_equal(&xdst->u.rt6.rt6i_src.addr, &fl_src_prefix) &&
+		    xfrm_bundle_ok(xdst, fl, AF_INET6)) {
+			dst_clone(dst);
+			break;
+		}
+	}
+	read_unlock_bh(&policy->lock);
+	return dst;
+}
+
+/* Allocate chain of dst_entry's, attach known xfrm's, calculate
+ * all the metrics... Shortly, bundle a bundle.
+ */
+
+static int
+__xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
+		      struct flowi *fl, struct dst_entry **dst_p)
+{
+	struct dst_entry *dst, *dst_prev;
+	struct rt6_info *rt0 = (struct rt6_info*)(*dst_p);
+	struct rt6_info *rt  = rt0;
+	struct in6_addr *remote = &fl->fl6_dst;
+	struct in6_addr *local  = &fl->fl6_src;
+	struct flowi fl_tunnel = {
+		.nl_u = {
+			.ip6_u = {
+				.saddr = *local,
+				.daddr = *remote
+			}
+		}
+	};
+	int i;
+	int err = 0;
+	int header_len = 0;
+	int trailer_len = 0;
+
+	dst = dst_prev = NULL;
+	dst_hold(&rt->u.dst);
+
+	for (i = 0; i < nx; i++) {
+		struct dst_entry *dst1 = dst_alloc(&xfrm6_dst_ops);
+		struct xfrm_dst *xdst;
+		int tunnel = 0;
+
+		if (unlikely(dst1 == NULL)) {
+			err = -ENOBUFS;
+			dst_release(&rt->u.dst);
+			goto error;
+		}
+
+		if (!dst)
+			dst = dst1;
+		else {
+			dst_prev->child = dst1;
+			dst1->flags |= DST_NOHASH;
+			dst_clone(dst1);
+		}
+
+		xdst = (struct xfrm_dst *)dst1;
+		xdst->route = &rt->u.dst;
+
+		dst1->next = dst_prev;
+		dst_prev = dst1;
+		if (xfrm[i]->props.mode) {
+			remote = (struct in6_addr*)&xfrm[i]->id.daddr;
+			local  = (struct in6_addr*)&xfrm[i]->props.saddr;
+			tunnel = 1;
+		}
+		header_len += xfrm[i]->props.header_len;
+		trailer_len += xfrm[i]->props.trailer_len;
+
+		if (tunnel) {
+			ipv6_addr_copy(&fl_tunnel.fl6_dst, remote);
+			ipv6_addr_copy(&fl_tunnel.fl6_src, local);
+			err = xfrm_dst_lookup((struct xfrm_dst **) &rt,
+					      &fl_tunnel, AF_INET6);
+			if (err)
+				goto error;
+		} else
+			dst_hold(&rt->u.dst);
+	}
+
+	dst_prev->child = &rt->u.dst;
+	dst->path = &rt->u.dst;
+
+	*dst_p = dst;
+	dst = dst_prev;
+
+	dst_prev = *dst_p;
+	i = 0;
+	for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) {
+		struct xfrm_dst *x = (struct xfrm_dst*)dst_prev;
+
+		dst_prev->xfrm = xfrm[i++];
+		dst_prev->dev = rt->u.dst.dev;
+		if (rt->u.dst.dev)
+			dev_hold(rt->u.dst.dev);
+		dst_prev->obsolete	= -1;
+		dst_prev->flags	       |= DST_HOST;
+		dst_prev->lastuse	= jiffies;
+		dst_prev->header_len	= header_len;
+		dst_prev->trailer_len	= trailer_len;
+		memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics));
+
+		/* Copy neighbour for reachability confirmation */
+		dst_prev->neighbour	= neigh_clone(rt->u.dst.neighbour);
+		dst_prev->input		= rt->u.dst.input;
+		dst_prev->output	= xfrm6_output;
+		/* Sheit... I remember I did this right. Apparently,
+		 * it was magically lost, so this code needs audit */
+		x->u.rt6.rt6i_flags    = rt0->rt6i_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL);
+		x->u.rt6.rt6i_metric   = rt0->rt6i_metric;
+		x->u.rt6.rt6i_node     = rt0->rt6i_node;
+		x->u.rt6.rt6i_gateway  = rt0->rt6i_gateway;
+		memcpy(&x->u.rt6.rt6i_gateway, &rt0->rt6i_gateway, sizeof(x->u.rt6.rt6i_gateway)); 
+		x->u.rt6.rt6i_dst      = rt0->rt6i_dst;
+		x->u.rt6.rt6i_src      = rt0->rt6i_src;	
+		header_len -= x->u.dst.xfrm->props.header_len;
+		trailer_len -= x->u.dst.xfrm->props.trailer_len;
+	}
+
+	xfrm_init_pmtu(dst);
+	return 0;
+
+error:
+	if (dst)
+		dst_free(dst);
+	return err;
+}
+
+static inline void
+_decode_session6(struct sk_buff *skb, struct flowi *fl)
+{
+	u16 offset = sizeof(struct ipv6hdr);
+	struct ipv6hdr *hdr = skb->nh.ipv6h;
+	struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+	u8 nexthdr = skb->nh.ipv6h->nexthdr;
+
+	memset(fl, 0, sizeof(struct flowi));
+	ipv6_addr_copy(&fl->fl6_dst, &hdr->daddr);
+	ipv6_addr_copy(&fl->fl6_src, &hdr->saddr);
+
+	while (pskb_may_pull(skb, skb->nh.raw + offset + 1 - skb->data)) {
+		switch (nexthdr) {
+		case NEXTHDR_ROUTING:
+		case NEXTHDR_HOP:
+		case NEXTHDR_DEST:
+			offset += ipv6_optlen(exthdr);
+			nexthdr = exthdr->nexthdr;
+			exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+			break;
+
+		case IPPROTO_UDP:
+		case IPPROTO_TCP:
+		case IPPROTO_SCTP:
+			if (pskb_may_pull(skb, skb->nh.raw + offset + 4 - skb->data)) {
+				u16 *ports = (u16 *)exthdr;
+
+				fl->fl_ip_sport = ports[0];
+				fl->fl_ip_dport = ports[1];
+			}
+			fl->proto = nexthdr;
+			return;
+
+		case IPPROTO_ICMPV6:
+			if (pskb_may_pull(skb, skb->nh.raw + offset + 2 - skb->data)) {
+				u8 *icmp = (u8 *)exthdr;
+
+				fl->fl_icmp_type = icmp[0];
+				fl->fl_icmp_code = icmp[1];
+			}
+			fl->proto = nexthdr;
+			return;
+
+		/* XXX Why are there these headers? */
+		case IPPROTO_AH:
+		case IPPROTO_ESP:
+		case IPPROTO_COMP:
+		default:
+			fl->fl_ipsec_spi = 0;
+			fl->proto = nexthdr;
+			return;
+		};
+	}
+}
+
+static inline int xfrm6_garbage_collect(void)
+{
+	read_lock(&xfrm6_policy_afinfo.lock);
+	xfrm6_policy_afinfo.garbage_collect();
+	read_unlock(&xfrm6_policy_afinfo.lock);
+	return (atomic_read(&xfrm6_dst_ops.entries) > xfrm6_dst_ops.gc_thresh*2);
+}
+
+static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu)
+{
+	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+	struct dst_entry *path = xdst->route;
+
+	path->ops->update_pmtu(path, mtu);
+}
+
+static struct dst_ops xfrm6_dst_ops = {
+	.family =		AF_INET6,
+	.protocol =		__constant_htons(ETH_P_IPV6),
+	.gc =			xfrm6_garbage_collect,
+	.update_pmtu =		xfrm6_update_pmtu,
+	.gc_thresh =		1024,
+	.entry_size =		sizeof(struct xfrm_dst),
+};
+
+static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
+	.family =		AF_INET6,
+	.lock = 		RW_LOCK_UNLOCKED,
+	.type_map = 		&xfrm6_type_map,
+	.dst_ops =		&xfrm6_dst_ops,
+	.dst_lookup =		xfrm6_dst_lookup,
+	.find_bundle =		__xfrm6_find_bundle,
+	.bundle_create =	__xfrm6_bundle_create,
+	.decode_session =	_decode_session6,
+};
+
+static void __init xfrm6_policy_init(void)
+{
+	xfrm_policy_register_afinfo(&xfrm6_policy_afinfo);
+}
+
+static void xfrm6_policy_fini(void)
+{
+	xfrm_policy_unregister_afinfo(&xfrm6_policy_afinfo);
+}
+
+void __init xfrm6_init(void)
+{
+	xfrm6_policy_init();
+	xfrm6_state_init();
+}
+
+void xfrm6_fini(void)
+{
+	//xfrm6_input_fini();
+	xfrm6_policy_fini();
+	xfrm6_state_fini();
+}
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
new file mode 100644
index 00000000000..bf0d0abc387
--- /dev/null
+++ b/net/ipv6/xfrm6_state.c
@@ -0,0 +1,136 @@
+/*
+ * xfrm6_state.c: based on xfrm4_state.c
+ *
+ * Authors:
+ *	Mitsuru KANDA @USAGI
+ * 	Kazunori MIYAZAWA @USAGI
+ * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
+ * 		IPv6 support
+ * 	YOSHIFUJI Hideaki @USAGI
+ * 		Split up af-specific portion
+ * 	
+ */
+
+#include <net/xfrm.h>
+#include <linux/pfkeyv2.h>
+#include <linux/ipsec.h>
+#include <net/ipv6.h>
+
+static struct xfrm_state_afinfo xfrm6_state_afinfo;
+
+static void
+__xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl,
+		     struct xfrm_tmpl *tmpl,
+		     xfrm_address_t *daddr, xfrm_address_t *saddr)
+{
+	/* Initialize temporary selector matching only
+	 * to current session. */
+	ipv6_addr_copy((struct in6_addr *)&x->sel.daddr, &fl->fl6_dst);
+	ipv6_addr_copy((struct in6_addr *)&x->sel.saddr, &fl->fl6_src);
+	x->sel.dport = xfrm_flowi_dport(fl);
+	x->sel.dport_mask = ~0;
+	x->sel.sport = xfrm_flowi_sport(fl);
+	x->sel.sport_mask = ~0;
+	x->sel.prefixlen_d = 128;
+	x->sel.prefixlen_s = 128;
+	x->sel.proto = fl->proto;
+	x->sel.ifindex = fl->oif;
+	x->id = tmpl->id;
+	if (ipv6_addr_any((struct in6_addr*)&x->id.daddr))
+		memcpy(&x->id.daddr, daddr, sizeof(x->sel.daddr));
+	memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr));
+	if (ipv6_addr_any((struct in6_addr*)&x->props.saddr))
+		memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr));
+	x->props.mode = tmpl->mode;
+	x->props.reqid = tmpl->reqid;
+	x->props.family = AF_INET6;
+}
+
+static struct xfrm_state *
+__xfrm6_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto)
+{
+	unsigned h = __xfrm6_spi_hash(daddr, spi, proto);
+	struct xfrm_state *x;
+
+	list_for_each_entry(x, xfrm6_state_afinfo.state_byspi+h, byspi) {
+		if (x->props.family == AF_INET6 &&
+		    spi == x->id.spi &&
+		    ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) &&
+		    proto == x->id.proto) {
+			xfrm_state_hold(x);
+			return x;
+		}
+	}
+	return NULL;
+}
+
+static struct xfrm_state *
+__xfrm6_find_acq(u8 mode, u32 reqid, u8 proto, 
+		 xfrm_address_t *daddr, xfrm_address_t *saddr, 
+		 int create)
+{
+	struct xfrm_state *x, *x0;
+	unsigned h = __xfrm6_dst_hash(daddr);
+
+	x0 = NULL;
+
+	list_for_each_entry(x, xfrm6_state_afinfo.state_bydst+h, bydst) {
+		if (x->props.family == AF_INET6 &&
+		    ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) &&
+		    mode == x->props.mode &&
+		    proto == x->id.proto &&
+		    ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)x->props.saddr.a6) &&
+		    reqid == x->props.reqid &&
+		    x->km.state == XFRM_STATE_ACQ &&
+		    !x->id.spi) {
+			    x0 = x;
+			    break;
+		    }
+	}
+	if (!x0 && create && (x0 = xfrm_state_alloc()) != NULL) {
+		ipv6_addr_copy((struct in6_addr *)x0->sel.daddr.a6,
+			       (struct in6_addr *)daddr);
+		ipv6_addr_copy((struct in6_addr *)x0->sel.saddr.a6,
+			       (struct in6_addr *)saddr);
+		x0->sel.prefixlen_d = 128;
+		x0->sel.prefixlen_s = 128;
+		ipv6_addr_copy((struct in6_addr *)x0->props.saddr.a6,
+			       (struct in6_addr *)saddr);
+		x0->km.state = XFRM_STATE_ACQ;
+		ipv6_addr_copy((struct in6_addr *)x0->id.daddr.a6,
+			       (struct in6_addr *)daddr);
+		x0->id.proto = proto;
+		x0->props.family = AF_INET6;
+		x0->props.mode = mode;
+		x0->props.reqid = reqid;
+		x0->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
+		xfrm_state_hold(x0);
+		x0->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
+		add_timer(&x0->timer);
+		xfrm_state_hold(x0);
+		list_add_tail(&x0->bydst, xfrm6_state_afinfo.state_bydst+h);
+		wake_up(&km_waitq);
+	}
+	if (x0)
+		xfrm_state_hold(x0);
+	return x0;
+}
+
+static struct xfrm_state_afinfo xfrm6_state_afinfo = {
+	.family			= AF_INET6,
+	.lock			= RW_LOCK_UNLOCKED,
+	.init_tempsel		= __xfrm6_init_tempsel,
+	.state_lookup		= __xfrm6_state_lookup,
+	.find_acq		= __xfrm6_find_acq,
+};
+
+void __init xfrm6_state_init(void)
+{
+	xfrm_state_register_afinfo(&xfrm6_state_afinfo);
+}
+
+void xfrm6_state_fini(void)
+{
+	xfrm_state_unregister_afinfo(&xfrm6_state_afinfo);
+}
+
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
new file mode 100644
index 00000000000..ffcadd68b95
--- /dev/null
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -0,0 +1,543 @@
+/*
+ * Copyright (C)2003,2004 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * Authors	Mitsuru KANDA  <mk@linux-ipv6.org>
+ * 		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
+ *
+ * Based on net/ipv4/xfrm4_tunnel.c
+ *
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/xfrm.h>
+#include <linux/list.h>
+#include <net/ip.h>
+#include <net/xfrm.h>
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <linux/ipv6.h>
+#include <linux/icmpv6.h>
+
+#ifdef CONFIG_IPV6_XFRM6_TUNNEL_DEBUG
+# define X6TDEBUG	3
+#else
+# define X6TDEBUG	1
+#endif
+
+#define X6TPRINTK(fmt, args...)		printk(fmt, ## args)
+#define X6TNOPRINTK(fmt, args...)	do { ; } while(0)
+
+#if X6TDEBUG >= 1
+# define X6TPRINTK1	X6TPRINTK
+#else
+# define X6TPRINTK1	X6TNOPRINTK
+#endif
+
+#if X6TDEBUG >= 3
+# define X6TPRINTK3	X6TPRINTK
+#else
+# define X6TPRINTK3	X6TNOPRINTK
+#endif
+
+/*
+ * xfrm_tunnel_spi things are for allocating unique id ("spi") 
+ * per xfrm_address_t.
+ */
+struct xfrm6_tunnel_spi {
+	struct hlist_node list_byaddr;
+	struct hlist_node list_byspi;
+	xfrm_address_t addr;
+	u32 spi;
+	atomic_t refcnt;
+#ifdef XFRM6_TUNNEL_SPI_MAGIC
+	u32 magic;
+#endif
+};
+
+#ifdef CONFIG_IPV6_XFRM6_TUNNEL_DEBUG
+# define XFRM6_TUNNEL_SPI_MAGIC 0xdeadbeef
+#endif
+
+static DEFINE_RWLOCK(xfrm6_tunnel_spi_lock);
+
+static u32 xfrm6_tunnel_spi;
+
+#define XFRM6_TUNNEL_SPI_MIN	1
+#define XFRM6_TUNNEL_SPI_MAX	0xffffffff
+
+static kmem_cache_t *xfrm6_tunnel_spi_kmem;
+
+#define XFRM6_TUNNEL_SPI_BYADDR_HSIZE 256
+#define XFRM6_TUNNEL_SPI_BYSPI_HSIZE 256
+
+static struct hlist_head xfrm6_tunnel_spi_byaddr[XFRM6_TUNNEL_SPI_BYADDR_HSIZE];
+static struct hlist_head xfrm6_tunnel_spi_byspi[XFRM6_TUNNEL_SPI_BYSPI_HSIZE];
+
+#ifdef XFRM6_TUNNEL_SPI_MAGIC
+static int x6spi_check_magic(const struct xfrm6_tunnel_spi *x6spi,
+			     const char *name)
+{
+	if (unlikely(x6spi->magic != XFRM6_TUNNEL_SPI_MAGIC)) {
+		X6TPRINTK3(KERN_DEBUG "%s(): x6spi object "
+				      "at %p has corrupted magic %08x "
+				      "(should be %08x)\n",
+			   name, x6spi, x6spi->magic, XFRM6_TUNNEL_SPI_MAGIC);
+		return -1;
+	}
+	return 0;
+}
+#else
+static int inline x6spi_check_magic(const struct xfrm6_tunnel_spi *x6spi,
+				    const char *name)
+{
+	return 0;
+}
+#endif
+
+#define X6SPI_CHECK_MAGIC(x6spi) x6spi_check_magic((x6spi), __FUNCTION__)
+
+
+static unsigned inline xfrm6_tunnel_spi_hash_byaddr(xfrm_address_t *addr)
+{
+	unsigned h;
+
+	X6TPRINTK3(KERN_DEBUG "%s(addr=%p)\n", __FUNCTION__, addr);
+
+	h = addr->a6[0] ^ addr->a6[1] ^ addr->a6[2] ^ addr->a6[3];
+	h ^= h >> 16;
+	h ^= h >> 8;
+	h &= XFRM6_TUNNEL_SPI_BYADDR_HSIZE - 1;
+
+	X6TPRINTK3(KERN_DEBUG "%s() = %u\n", __FUNCTION__, h);
+
+	return h;
+}
+
+static unsigned inline xfrm6_tunnel_spi_hash_byspi(u32 spi)
+{
+	return spi % XFRM6_TUNNEL_SPI_BYSPI_HSIZE;
+}
+
+
+static int xfrm6_tunnel_spi_init(void)
+{
+	int i;
+
+	X6TPRINTK3(KERN_DEBUG "%s()\n", __FUNCTION__);
+
+	xfrm6_tunnel_spi = 0;
+	xfrm6_tunnel_spi_kmem = kmem_cache_create("xfrm6_tunnel_spi",
+						  sizeof(struct xfrm6_tunnel_spi),
+						  0, SLAB_HWCACHE_ALIGN,
+						  NULL, NULL);
+	if (!xfrm6_tunnel_spi_kmem) {
+		X6TPRINTK1(KERN_ERR
+			   "%s(): failed to allocate xfrm6_tunnel_spi_kmem\n",
+		           __FUNCTION__);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++)
+		INIT_HLIST_HEAD(&xfrm6_tunnel_spi_byaddr[i]);
+	for (i = 0; i < XFRM6_TUNNEL_SPI_BYSPI_HSIZE; i++)
+		INIT_HLIST_HEAD(&xfrm6_tunnel_spi_byspi[i]);
+	return 0;
+}
+
+static void xfrm6_tunnel_spi_fini(void)
+{
+	int i;
+
+	X6TPRINTK3(KERN_DEBUG "%s()\n", __FUNCTION__);
+
+	for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++) {
+		if (!hlist_empty(&xfrm6_tunnel_spi_byaddr[i]))
+			goto err;
+	}
+	for (i = 0; i < XFRM6_TUNNEL_SPI_BYSPI_HSIZE; i++) {
+		if (!hlist_empty(&xfrm6_tunnel_spi_byspi[i]))
+			goto err;
+	}
+	kmem_cache_destroy(xfrm6_tunnel_spi_kmem);
+	xfrm6_tunnel_spi_kmem = NULL;
+	return;
+err:
+	X6TPRINTK1(KERN_ERR "%s(): table is not empty\n", __FUNCTION__);
+	return;
+}
+
+static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr)
+{
+	struct xfrm6_tunnel_spi *x6spi;
+	struct hlist_node *pos;
+
+	X6TPRINTK3(KERN_DEBUG "%s(saddr=%p)\n", __FUNCTION__, saddr);
+
+	hlist_for_each_entry(x6spi, pos,
+			     &xfrm6_tunnel_spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
+			     list_byaddr) {
+		if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) {
+			X6SPI_CHECK_MAGIC(x6spi);
+			X6TPRINTK3(KERN_DEBUG "%s() = %p(%u)\n", __FUNCTION__, x6spi, x6spi->spi);
+			return x6spi;
+		}
+	}
+
+	X6TPRINTK3(KERN_DEBUG "%s() = NULL(0)\n", __FUNCTION__);
+	return NULL;
+}
+
+u32 xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr)
+{
+	struct xfrm6_tunnel_spi *x6spi;
+	u32 spi;
+
+	X6TPRINTK3(KERN_DEBUG "%s(saddr=%p)\n", __FUNCTION__, saddr);
+
+	read_lock_bh(&xfrm6_tunnel_spi_lock);
+	x6spi = __xfrm6_tunnel_spi_lookup(saddr);
+	spi = x6spi ? x6spi->spi : 0;
+	read_unlock_bh(&xfrm6_tunnel_spi_lock);
+	return spi;
+}
+
+EXPORT_SYMBOL(xfrm6_tunnel_spi_lookup);
+
+static u32 __xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr)
+{
+	u32 spi;
+	struct xfrm6_tunnel_spi *x6spi;
+	struct hlist_node *pos;
+	unsigned index;
+
+	X6TPRINTK3(KERN_DEBUG "%s(saddr=%p)\n", __FUNCTION__, saddr);
+
+	if (xfrm6_tunnel_spi < XFRM6_TUNNEL_SPI_MIN ||
+	    xfrm6_tunnel_spi >= XFRM6_TUNNEL_SPI_MAX)
+		xfrm6_tunnel_spi = XFRM6_TUNNEL_SPI_MIN;
+	else
+		xfrm6_tunnel_spi++;
+
+	for (spi = xfrm6_tunnel_spi; spi <= XFRM6_TUNNEL_SPI_MAX; spi++) {
+		index = xfrm6_tunnel_spi_hash_byspi(spi);
+		hlist_for_each_entry(x6spi, pos, 
+				     &xfrm6_tunnel_spi_byspi[index], 
+				     list_byspi) {
+			if (x6spi->spi == spi)
+				goto try_next_1;
+		}
+		xfrm6_tunnel_spi = spi;
+		goto alloc_spi;
+try_next_1:;
+	}
+	for (spi = XFRM6_TUNNEL_SPI_MIN; spi < xfrm6_tunnel_spi; spi++) {
+		index = xfrm6_tunnel_spi_hash_byspi(spi);
+		hlist_for_each_entry(x6spi, pos, 
+				     &xfrm6_tunnel_spi_byspi[index], 
+				     list_byspi) {
+			if (x6spi->spi == spi)
+				goto try_next_2;
+		}
+		xfrm6_tunnel_spi = spi;
+		goto alloc_spi;
+try_next_2:;
+	}
+	spi = 0;
+	goto out;
+alloc_spi:
+	X6TPRINTK3(KERN_DEBUG "%s(): allocate new spi for "
+			      "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", 
+			      __FUNCTION__, 
+			      NIP6(*(struct in6_addr *)saddr));
+	x6spi = kmem_cache_alloc(xfrm6_tunnel_spi_kmem, SLAB_ATOMIC);
+	if (!x6spi) {
+		X6TPRINTK1(KERN_ERR "%s(): kmem_cache_alloc() failed\n", 
+			   __FUNCTION__);
+		goto out;
+	}
+#ifdef XFRM6_TUNNEL_SPI_MAGIC
+	x6spi->magic = XFRM6_TUNNEL_SPI_MAGIC;
+#endif
+	memcpy(&x6spi->addr, saddr, sizeof(x6spi->addr));
+	x6spi->spi = spi;
+	atomic_set(&x6spi->refcnt, 1);
+
+	hlist_add_head(&x6spi->list_byspi, &xfrm6_tunnel_spi_byspi[index]);
+
+	index = xfrm6_tunnel_spi_hash_byaddr(saddr);
+	hlist_add_head(&x6spi->list_byaddr, &xfrm6_tunnel_spi_byaddr[index]);
+	X6SPI_CHECK_MAGIC(x6spi);
+out:
+	X6TPRINTK3(KERN_DEBUG "%s() = %u\n", __FUNCTION__, spi);
+	return spi;
+}
+
+u32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr)
+{
+	struct xfrm6_tunnel_spi *x6spi;
+	u32 spi;
+
+	X6TPRINTK3(KERN_DEBUG "%s(saddr=%p)\n", __FUNCTION__, saddr);
+
+	write_lock_bh(&xfrm6_tunnel_spi_lock);
+	x6spi = __xfrm6_tunnel_spi_lookup(saddr);
+	if (x6spi) {
+		atomic_inc(&x6spi->refcnt);
+		spi = x6spi->spi;
+	} else
+		spi = __xfrm6_tunnel_alloc_spi(saddr);
+	write_unlock_bh(&xfrm6_tunnel_spi_lock);
+
+	X6TPRINTK3(KERN_DEBUG "%s() = %u\n", __FUNCTION__, spi);
+
+	return spi;
+}
+
+EXPORT_SYMBOL(xfrm6_tunnel_alloc_spi);
+
+void xfrm6_tunnel_free_spi(xfrm_address_t *saddr)
+{
+	struct xfrm6_tunnel_spi *x6spi;
+	struct hlist_node *pos, *n;
+
+	X6TPRINTK3(KERN_DEBUG "%s(saddr=%p)\n", __FUNCTION__, saddr);
+
+	write_lock_bh(&xfrm6_tunnel_spi_lock);
+
+	hlist_for_each_entry_safe(x6spi, pos, n, 
+				  &xfrm6_tunnel_spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
+				  list_byaddr)
+	{
+		if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) {
+			X6TPRINTK3(KERN_DEBUG "%s(): x6spi object "
+					      "for %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x "
+					      "found at %p\n",
+				   __FUNCTION__, 
+				   NIP6(*(struct in6_addr *)saddr),
+				   x6spi);
+			X6SPI_CHECK_MAGIC(x6spi);
+			if (atomic_dec_and_test(&x6spi->refcnt)) {
+				hlist_del(&x6spi->list_byaddr);
+				hlist_del(&x6spi->list_byspi);
+				kmem_cache_free(xfrm6_tunnel_spi_kmem, x6spi);
+				break;
+			}
+		}
+	}
+	write_unlock_bh(&xfrm6_tunnel_spi_lock);
+}
+
+EXPORT_SYMBOL(xfrm6_tunnel_free_spi);
+
+static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct ipv6hdr *top_iph;
+
+	top_iph = (struct ipv6hdr *)skb->data;
+	top_iph->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+
+	return 0;
+}
+
+static int xfrm6_tunnel_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
+{
+	return 0;
+}
+
+static struct xfrm6_tunnel *xfrm6_tunnel_handler;
+static DECLARE_MUTEX(xfrm6_tunnel_sem);
+
+int xfrm6_tunnel_register(struct xfrm6_tunnel *handler)
+{
+	int ret;
+
+	down(&xfrm6_tunnel_sem);
+	ret = 0;
+	if (xfrm6_tunnel_handler != NULL)
+		ret = -EINVAL;
+	if (!ret)
+		xfrm6_tunnel_handler = handler;
+	up(&xfrm6_tunnel_sem);
+
+	return ret;
+}
+
+EXPORT_SYMBOL(xfrm6_tunnel_register);
+
+int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler)
+{
+	int ret;
+
+	down(&xfrm6_tunnel_sem);
+	ret = 0;
+	if (xfrm6_tunnel_handler != handler)
+		ret = -EINVAL;
+	if (!ret)
+		xfrm6_tunnel_handler = NULL;
+	up(&xfrm6_tunnel_sem);
+
+	synchronize_net();
+
+	return ret;
+}
+
+EXPORT_SYMBOL(xfrm6_tunnel_deregister);
+
+static int xfrm6_tunnel_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+{
+	struct sk_buff *skb = *pskb;
+	struct xfrm6_tunnel *handler = xfrm6_tunnel_handler;
+	struct ipv6hdr *iph = skb->nh.ipv6h;
+	u32 spi;
+
+	/* device-like_ip6ip6_handler() */
+	if (handler && handler->handler(pskb, nhoffp) == 0)
+		return 0;
+
+	spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr);
+	return xfrm6_rcv_spi(pskb, nhoffp, spi);
+}
+
+static void xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+			     int type, int code, int offset, __u32 info)
+{
+	struct xfrm6_tunnel *handler = xfrm6_tunnel_handler;
+
+	/* call here first for device-like ip6ip6 err handling */
+	if (handler) {
+		handler->err_handler(skb, opt, type, code, offset, info);
+		return;
+	}
+
+	/* xfrm6_tunnel native err handling */
+	switch (type) {
+	case ICMPV6_DEST_UNREACH: 
+		switch (code) {
+		case ICMPV6_NOROUTE: 
+		case ICMPV6_ADM_PROHIBITED:
+		case ICMPV6_NOT_NEIGHBOUR:
+		case ICMPV6_ADDR_UNREACH:
+		case ICMPV6_PORT_UNREACH:
+		default:
+			X6TPRINTK3(KERN_DEBUG
+				   "xfrm6_tunnel: Destination Unreach.\n");
+			break;
+		}
+		break;
+	case ICMPV6_PKT_TOOBIG:
+			X6TPRINTK3(KERN_DEBUG 
+				   "xfrm6_tunnel: Packet Too Big.\n");
+		break;
+	case ICMPV6_TIME_EXCEED:
+		switch (code) {
+		case ICMPV6_EXC_HOPLIMIT:
+			X6TPRINTK3(KERN_DEBUG
+				   "xfrm6_tunnel: Too small Hoplimit.\n");
+			break;
+		case ICMPV6_EXC_FRAGTIME:
+		default: 
+			break;
+		}
+		break;
+	case ICMPV6_PARAMPROB:
+		switch (code) {
+		case ICMPV6_HDR_FIELD: break;
+		case ICMPV6_UNK_NEXTHDR: break;
+		case ICMPV6_UNK_OPTION: break;
+		}
+		break;
+	default:
+		break;
+	}
+	return;
+}
+
+static int xfrm6_tunnel_init_state(struct xfrm_state *x, void *args)
+{
+	if (!x->props.mode)
+		return -EINVAL;
+
+	if (x->encap)
+		return -EINVAL;
+
+	x->props.header_len = sizeof(struct ipv6hdr);
+
+	return 0;
+}
+
+static void xfrm6_tunnel_destroy(struct xfrm_state *x)
+{
+	xfrm6_tunnel_free_spi((xfrm_address_t *)&x->props.saddr);
+}
+
+static struct xfrm_type xfrm6_tunnel_type = {
+	.description	= "IP6IP6",
+	.owner          = THIS_MODULE,
+	.proto		= IPPROTO_IPV6,
+	.init_state	= xfrm6_tunnel_init_state,
+	.destructor	= xfrm6_tunnel_destroy,
+	.input		= xfrm6_tunnel_input,
+	.output		= xfrm6_tunnel_output,
+};
+
+static struct inet6_protocol xfrm6_tunnel_protocol = {
+	.handler	= xfrm6_tunnel_rcv,
+	.err_handler	= xfrm6_tunnel_err, 
+	.flags          = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
+};
+
+static int __init xfrm6_tunnel_init(void)
+{
+	X6TPRINTK3(KERN_DEBUG "%s()\n", __FUNCTION__);
+
+	if (xfrm_register_type(&xfrm6_tunnel_type, AF_INET6) < 0) {
+		X6TPRINTK1(KERN_ERR
+			   "xfrm6_tunnel init: can't add xfrm type\n");
+		return -EAGAIN;
+	}
+	if (inet6_add_protocol(&xfrm6_tunnel_protocol, IPPROTO_IPV6) < 0) {
+		X6TPRINTK1(KERN_ERR
+			   "xfrm6_tunnel init(): can't add protocol\n");
+		xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6);
+		return -EAGAIN;
+	}
+	if (xfrm6_tunnel_spi_init() < 0) {
+		X6TPRINTK1(KERN_ERR
+			   "xfrm6_tunnel init: failed to initialize spi\n");
+		inet6_del_protocol(&xfrm6_tunnel_protocol, IPPROTO_IPV6);
+		xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6);
+		return -EAGAIN;
+	}
+	return 0;
+}
+
+static void __exit xfrm6_tunnel_fini(void)
+{
+	X6TPRINTK3(KERN_DEBUG "%s()\n", __FUNCTION__);
+
+	xfrm6_tunnel_spi_fini();
+	if (inet6_del_protocol(&xfrm6_tunnel_protocol, IPPROTO_IPV6) < 0)
+		X6TPRINTK1(KERN_ERR 
+			   "xfrm6_tunnel close: can't remove protocol\n");
+	if (xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6) < 0)
+		X6TPRINTK1(KERN_ERR
+			   "xfrm6_tunnel close: can't remove xfrm type\n");
+}
+
+module_init(xfrm6_tunnel_init);
+module_exit(xfrm6_tunnel_fini);
+MODULE_LICENSE("GPL");
diff --git a/net/ipx/ChangeLog b/net/ipx/ChangeLog
new file mode 100644
index 00000000000..3b29763751a
--- /dev/null
+++ b/net/ipx/ChangeLog
@@ -0,0 +1,101 @@
+ Revision 0.21:	Uses the new generic socket option code.
+
+ Revision 0.22:	Gcc clean ups and drop out device registration. Use the
+ 		new multi-protocol edition of hard_header
+
+ Revision 0.23: IPX /proc by Mark Evans. Adding a route will
+ 		will overwrite any existing route to the same network.
+
+ Revision 0.24:	Supports new /proc with no 4K limit
+
+ Revision 0.25:	Add ephemeral sockets, passive local network
+ 		identification, support for local net 0 and
+ 		multiple datalinks <Greg Page>
+
+ Revision 0.26: Device drop kills IPX routes via it. (needed for module)
+
+ Revision 0.27: Autobind <Mark Evans>
+
+ Revision 0.28: Small fix for multiple local networks <Thomas Winder>
+
+ Revision 0.29: Assorted major errors removed <Mark Evans>
+ 		Small correction to promisc mode error fix <Alan Cox>
+ 		Asynchronous I/O support. Changed to use notifiers
+ 		and the newer packet_type stuff. Assorted major
+ 		fixes <Alejandro Liu>
+
+ Revision 0.30:	Moved to net/ipx/...	<Alan Cox>
+ 		Don't set address length on recvfrom that errors.
+ 		Incorrect verify_area.
+
+ Revision 0.31:	New sk_buffs. This still needs a lot of 
+ 		testing. <Alan Cox>
+
+ Revision 0.32: Using sock_alloc_send_skb, firewall hooks. <Alan Cox>
+ 		Supports sendmsg/recvmsg
+
+ Revision 0.33:	Internal network support, routing changes, uses a
+ 		protocol private area for ipx data.
+
+ Revision 0.34:	Module support. <Jim Freeman>
+
+ Revision 0.35: Checksum support. <Neil Turton>, hooked in by <Alan Cox>
+ 		Handles WIN95 discovery packets <Volker Lendecke>
+
+ Revision 0.36:	Internal bump up for 2.1
+
+ Revision 0.37:	Began adding POSIXisms.
+
+ Revision 0.38: Asynchronous socket stuff made current.
+
+ Revision 0.39: SPX interfaces
+
+ Revision 0.40: Tiny SIOCGSTAMP fix (chris@cybernet.co.nz)
+
+ Revision 0.41: 802.2TR removed (p.norton@computer.org)
+ 		Fixed connecting to primary net,
+ 		Automatic binding on send & receive,
+ 		Martijn van Oosterhout <kleptogimp@geocities.com>
+
+ Revision 042:  Multithreading - use spinlocks and refcounting to
+ 		protect some structures: ipx_interface sock list, list
+ 		of ipx interfaces, etc. 
+ 		Bugfixes - do refcounting on net_devices, check function
+ 		results, etc. Thanks to davem and freitag for
+ 		suggestions and guidance.
+ 		Arnaldo Carvalho de Melo <acme@conectiva.com.br>,
+ 		November, 2000
+
+ Revision 043:	Shared SKBs, don't mangle packets, some cleanups
+ 		Arnaldo Carvalho de Melo <acme@conectiva.com.br>,
+ 		December, 2000
+
+ Revision 044:	Call ipxitf_hold on NETDEV_UP - acme
+
+ Revision 045:	fix PPROP routing bug - acme
+
+ Revision 046:	Further fixes to PPROP, ipxitf_create_internal was
+ 		doing an unneeded MOD_INC_USE_COUNT, implement
+ 		sysctl for ipx_pprop_broacasting, fix the ipx sysctl
+ 		handling, making it dynamic, some cleanups, thanks to
+ 		Petr Vandrovec for review and good suggestions. (acme)
+
+ Revision 047:	Cleanups, CodingStyle changes, move the ncp connection
+ 		hack out of line - acme
+
+ Revision 048:	Use sk->protinfo to store the pointer to IPX private
+ 		area, remove af_ipx from sk->protinfo and move ipx_opt
+ 		to include/net/ipx.h, use IPX_SK like DecNET, etc - acme
+
+ Revision 049:	SPX support dropped, see comment in ipx_create - acme
+
+ Revision 050:	Use seq_file for proc stuff, moving it to ipx_proc.c - acme
+
+Other fixes:
+ 
+ Protect the module by a MOD_INC_USE_COUNT/MOD_DEC_USE_COUNT pair. Also, now
+ usage count is managed this way:
+ -Count one if the auto_interface mode is on
+ -Count one per configured interface
+ 
+ Jacques Gelinas (jacques@solucorp.qc.ca)
diff --git a/net/ipx/Kconfig b/net/ipx/Kconfig
new file mode 100644
index 00000000000..a16237c0e78
--- /dev/null
+++ b/net/ipx/Kconfig
@@ -0,0 +1,31 @@
+#
+# IPX configuration
+#
+config IPX_INTERN
+	bool "IPX: Full internal IPX network"
+	depends on IPX
+	---help---
+	  Every IPX network has an address that identifies it. Sometimes it is
+	  useful to give an IPX "network" address to your Linux box as well
+	  (for example if your box is acting as a file server for different
+	  IPX networks: it will then be accessible from everywhere using the
+	  same address). The way this is done is to create a virtual internal
+	  "network" inside your box and to assign an IPX address to this
+	  network. Say Y here if you want to do this; read the IPX-HOWTO at
+	  <http://www.tldp.org/docs.html#howto> for details.
+
+	  The full internal IPX network enables you to allocate sockets on
+	  different virtual nodes of the internal network. This is done by
+	  evaluating the field sipx_node of the socket address given to the
+	  bind call. So applications should always initialize the node field
+	  to 0 when binding a socket on the primary network. In this case the
+	  socket is assigned the default node that has been given to the
+	  kernel when the internal network was created. By enabling the full
+	  internal IPX network the cross-forwarding of packets targeted at
+	  'special' sockets to sockets listening on the primary network is
+	  disabled. This might break existing applications, especially RIP/SAP
+	  daemons. A RIP/SAP daemon that works well with the full internal net
+	  can be found on <ftp://ftp.gwdg.de/pub/linux/misc/ncpfs/>.
+
+	  If you don't know what you are doing, say N.
+
diff --git a/net/ipx/Makefile b/net/ipx/Makefile
new file mode 100644
index 00000000000..4b95e3ea0f8
--- /dev/null
+++ b/net/ipx/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile for the Linux IPX layer.
+#
+
+obj-$(CONFIG_IPX) += ipx.o
+
+ipx-y			:= af_ipx.o ipx_route.o ipx_proc.o
+ipx-$(CONFIG_SYSCTL)	+= sysctl_net_ipx.o
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
new file mode 100644
index 00000000000..5a27e5df588
--- /dev/null
+++ b/net/ipx/af_ipx.c
@@ -0,0 +1,2024 @@
+/*
+ *	Implements an IPX socket layer.
+ *
+ *	This code is derived from work by
+ *		Ross Biro	: 	Writing the original IP stack
+ *		Fred Van Kempen :	Tidying up the TCP/IP
+ *
+ *	Many thanks go to Keith Baker, Institute For Industrial Information
+ *	Technology Ltd, Swansea University for allowing me to work on this
+ *	in my own time even though it was in some ways related to commercial
+ *	work I am currently employed to do there.
+ *
+ *	All the material in this file is subject to the Gnu license version 2.
+ *	Neither Alan Cox nor the Swansea University Computer Society admit 
+ *	liability nor provide warranty for any of this software. This material
+ *	is provided as is and at no charge.
+ *
+ *	Portions Copyright (c) 2000-2003 Conectiva, Inc. <acme@conectiva.com.br>
+ *	Neither Arnaldo Carvalho de Melo nor Conectiva, Inc. admit liability nor
+ *	provide warranty for any of this software. This material is provided
+ *	"AS-IS" and at no charge.
+ *
+ * 	Portions Copyright (c) 1995 Caldera, Inc. <greg@caldera.com>
+ *	Neither Greg Page nor Caldera, Inc. admit liability nor provide
+ *	warranty for any of this software. This material is provided
+ *	"AS-IS" and at no charge.
+ *
+ *	See net/ipx/ChangeLog.
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/if_arp.h>
+#include <linux/if_ether.h>
+#include <linux/init.h>
+#include <linux/ipx.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/uio.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/string.h>
+#include <linux/tcp.h>
+#include <linux/types.h>
+#include <linux/termios.h>
+
+#include <net/ipx.h>
+#include <net/p8022.h>
+#include <net/psnap.h>
+#include <net/sock.h>
+
+#include <asm/uaccess.h>
+
+#ifdef CONFIG_SYSCTL
+extern void ipx_register_sysctl(void);
+extern void ipx_unregister_sysctl(void);
+#else
+#define ipx_register_sysctl()
+#define ipx_unregister_sysctl()
+#endif
+
+/* Configuration Variables */
+static unsigned char ipxcfg_max_hops = 16;
+static char ipxcfg_auto_select_primary;
+static char ipxcfg_auto_create_interfaces;
+int sysctl_ipx_pprop_broadcasting = 1;
+
+/* Global Variables */
+static struct datalink_proto *p8022_datalink;
+static struct datalink_proto *pEII_datalink;
+static struct datalink_proto *p8023_datalink;
+static struct datalink_proto *pSNAP_datalink;
+
+static struct proto_ops ipx_dgram_ops;
+
+LIST_HEAD(ipx_interfaces);
+DEFINE_SPINLOCK(ipx_interfaces_lock);
+
+struct ipx_interface *ipx_primary_net;
+struct ipx_interface *ipx_internal_net;
+
+extern int ipxrtr_add_route(__u32 network, struct ipx_interface *intrfc,
+			    unsigned char *node);
+extern void ipxrtr_del_routes(struct ipx_interface *intrfc);
+extern int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx,
+			       struct iovec *iov, int len, int noblock);
+extern int ipxrtr_route_skb(struct sk_buff *skb);
+extern struct ipx_route *ipxrtr_lookup(__u32 net);
+extern int ipxrtr_ioctl(unsigned int cmd, void __user *arg);
+
+#undef IPX_REFCNT_DEBUG
+#ifdef IPX_REFCNT_DEBUG
+atomic_t ipx_sock_nr;
+#endif
+
+struct ipx_interface *ipx_interfaces_head(void)
+{
+	struct ipx_interface *rc = NULL;
+
+	if (!list_empty(&ipx_interfaces))
+		rc = list_entry(ipx_interfaces.next,
+				struct ipx_interface, node);
+	return rc;
+}
+
+static void ipxcfg_set_auto_select(char val)
+{
+	ipxcfg_auto_select_primary = val;
+	if (val && !ipx_primary_net)
+		ipx_primary_net = ipx_interfaces_head();
+}
+
+static int ipxcfg_get_config_data(struct ipx_config_data __user *arg)
+{
+	struct ipx_config_data vals;
+
+	vals.ipxcfg_auto_create_interfaces = ipxcfg_auto_create_interfaces;
+	vals.ipxcfg_auto_select_primary	   = ipxcfg_auto_select_primary;
+
+	return copy_to_user(arg, &vals, sizeof(vals)) ? -EFAULT : 0;
+}
+
+/*
+ * Note: Sockets may not be removed _during_ an interrupt or inet_bh
+ * handler using this technique. They can be added although we do not
+ * use this facility.
+ */
+
+static void ipx_remove_socket(struct sock *sk)
+{
+	/* Determine interface with which socket is associated */
+	struct ipx_interface *intrfc = ipx_sk(sk)->intrfc;
+
+	if (!intrfc)
+		goto out;
+
+	ipxitf_hold(intrfc);
+	spin_lock_bh(&intrfc->if_sklist_lock);
+	sk_del_node_init(sk);
+	spin_unlock_bh(&intrfc->if_sklist_lock);
+	ipxitf_put(intrfc);
+out:
+	return;
+}
+
+static void ipx_destroy_socket(struct sock *sk)
+{
+	ipx_remove_socket(sk);
+	skb_queue_purge(&sk->sk_receive_queue);
+#ifdef IPX_REFCNT_DEBUG
+        atomic_dec(&ipx_sock_nr);
+        printk(KERN_DEBUG "IPX socket %p released, %d are still alive\n", sk,
+			atomic_read(&ipx_sock_nr));
+	if (atomic_read(&sk->sk_refcnt) != 1)
+		printk(KERN_DEBUG "Destruction sock ipx %p delayed, cnt=%d\n",
+				sk, atomic_read(&sk->sk_refcnt));
+#endif
+	sock_put(sk);
+}
+
+/* 
+ * The following code is used to support IPX Interfaces (IPXITF).  An
+ * IPX interface is defined by a physical device and a frame type.
+ */
+
+/* ipxitf_clear_primary_net has to be called with ipx_interfaces_lock held */
+
+static void ipxitf_clear_primary_net(void)
+{
+	ipx_primary_net = NULL;
+	if (ipxcfg_auto_select_primary)
+		ipx_primary_net = ipx_interfaces_head();
+}
+
+static struct ipx_interface *__ipxitf_find_using_phys(struct net_device *dev,
+						      unsigned short datalink)
+{
+	struct ipx_interface *i;
+
+	list_for_each_entry(i, &ipx_interfaces, node)
+		if (i->if_dev == dev && i->if_dlink_type == datalink)
+			goto out;
+	i = NULL;
+out:
+	return i;
+}
+
+static struct ipx_interface *ipxitf_find_using_phys(struct net_device *dev,
+						    unsigned short datalink)
+{
+	struct ipx_interface *i;
+
+	spin_lock_bh(&ipx_interfaces_lock);
+	i = __ipxitf_find_using_phys(dev, datalink);
+	if (i)
+		ipxitf_hold(i);
+	spin_unlock_bh(&ipx_interfaces_lock);
+	return i;
+}
+
+struct ipx_interface *ipxitf_find_using_net(__u32 net)
+{
+	struct ipx_interface *i;
+
+	spin_lock_bh(&ipx_interfaces_lock);
+	if (net) {
+		list_for_each_entry(i, &ipx_interfaces, node)
+			if (i->if_netnum == net)
+				goto hold;
+		i = NULL;
+		goto unlock;
+	}
+
+	i = ipx_primary_net;
+	if (i)
+hold:
+		ipxitf_hold(i);
+unlock:
+	spin_unlock_bh(&ipx_interfaces_lock);
+	return i;
+}
+
+/* Sockets are bound to a particular IPX interface. */
+static void ipxitf_insert_socket(struct ipx_interface *intrfc, struct sock *sk)
+{
+	ipxitf_hold(intrfc);
+	spin_lock_bh(&intrfc->if_sklist_lock);
+	ipx_sk(sk)->intrfc = intrfc;
+	sk_add_node(sk, &intrfc->if_sklist);
+	spin_unlock_bh(&intrfc->if_sklist_lock);
+	ipxitf_put(intrfc);
+}
+
+/* caller must hold intrfc->if_sklist_lock */
+static struct sock *__ipxitf_find_socket(struct ipx_interface *intrfc,
+					 unsigned short port)
+{
+	struct sock *s;
+	struct hlist_node *node;
+
+	sk_for_each(s, node, &intrfc->if_sklist)
+		if (ipx_sk(s)->port == port)
+			goto found;
+	s = NULL;
+found:
+	return s;
+}
+
+/* caller must hold a reference to intrfc */
+static struct sock *ipxitf_find_socket(struct ipx_interface *intrfc,
+					unsigned short port)
+{
+	struct sock *s;
+
+	spin_lock_bh(&intrfc->if_sklist_lock);
+	s = __ipxitf_find_socket(intrfc, port);
+	if (s)
+		sock_hold(s);
+	spin_unlock_bh(&intrfc->if_sklist_lock);
+
+	return s;
+}
+
+#ifdef CONFIG_IPX_INTERN
+static struct sock *ipxitf_find_internal_socket(struct ipx_interface *intrfc,
+						unsigned char *ipx_node,
+						unsigned short port)
+{
+	struct sock *s;
+	struct hlist_node *node;
+
+	ipxitf_hold(intrfc);
+	spin_lock_bh(&intrfc->if_sklist_lock);
+
+	sk_for_each(s, node, &intrfc->if_sklist) {
+		struct ipx_sock *ipxs = ipx_sk(s);
+
+		if (ipxs->port == port &&
+		    !memcmp(ipx_node, ipxs->node, IPX_NODE_LEN))
+			goto found;
+	}
+	s = NULL;
+found:
+	spin_unlock_bh(&intrfc->if_sklist_lock);
+	ipxitf_put(intrfc);
+	return s;
+}
+#endif
+
+static void __ipxitf_down(struct ipx_interface *intrfc)
+{
+	struct sock *s;
+	struct hlist_node *node, *t;
+
+	/* Delete all routes associated with this interface */
+	ipxrtr_del_routes(intrfc);
+
+	spin_lock_bh(&intrfc->if_sklist_lock);
+	/* error sockets */
+	sk_for_each_safe(s, node, t, &intrfc->if_sklist) {
+		struct ipx_sock *ipxs = ipx_sk(s);
+
+		s->sk_err = ENOLINK;
+		s->sk_error_report(s);
+		ipxs->intrfc = NULL;
+		ipxs->port   = 0;
+		sock_set_flag(s, SOCK_ZAPPED); /* Indicates it is no longer bound */
+		sk_del_node_init(s);
+	}
+	INIT_HLIST_HEAD(&intrfc->if_sklist);
+	spin_unlock_bh(&intrfc->if_sklist_lock);
+
+	/* remove this interface from list */
+	list_del(&intrfc->node);
+
+	/* remove this interface from *special* networks */
+	if (intrfc == ipx_primary_net)
+		ipxitf_clear_primary_net();
+	if (intrfc == ipx_internal_net)
+		ipx_internal_net = NULL;
+
+	if (intrfc->if_dev)
+		dev_put(intrfc->if_dev);
+	kfree(intrfc);
+}
+
+void ipxitf_down(struct ipx_interface *intrfc)
+{
+	spin_lock_bh(&ipx_interfaces_lock);
+	__ipxitf_down(intrfc);
+	spin_unlock_bh(&ipx_interfaces_lock);
+}
+
+static __inline__ void __ipxitf_put(struct ipx_interface *intrfc)
+{
+	if (atomic_dec_and_test(&intrfc->refcnt))
+		__ipxitf_down(intrfc);
+}
+
+static int ipxitf_device_event(struct notifier_block *notifier,
+				unsigned long event, void *ptr)
+{
+	struct net_device *dev = ptr;
+	struct ipx_interface *i, *tmp;
+
+	if (event != NETDEV_DOWN && event != NETDEV_UP)
+		goto out;
+
+	spin_lock_bh(&ipx_interfaces_lock);
+	list_for_each_entry_safe(i, tmp, &ipx_interfaces, node)
+		if (i->if_dev == dev) {
+			if (event == NETDEV_UP)
+				ipxitf_hold(i);
+			else
+				__ipxitf_put(i);
+		}
+	spin_unlock_bh(&ipx_interfaces_lock);
+out:
+	return NOTIFY_DONE;
+}
+
+
+static __exit void ipxitf_cleanup(void)
+{
+	struct ipx_interface *i, *tmp;
+
+	spin_lock_bh(&ipx_interfaces_lock);
+	list_for_each_entry_safe(i, tmp, &ipx_interfaces, node) 
+		__ipxitf_put(i);
+	spin_unlock_bh(&ipx_interfaces_lock);
+}
+
+static void ipxitf_def_skb_handler(struct sock *sock, struct sk_buff *skb)
+{
+	if (sock_queue_rcv_skb(sock, skb) < 0)
+		kfree_skb(skb);
+}
+
+/*
+ * On input skb->sk is NULL. Nobody is charged for the memory.
+ */
+
+/* caller must hold a reference to intrfc */
+
+#ifdef CONFIG_IPX_INTERN
+static int ipxitf_demux_socket(struct ipx_interface *intrfc,
+			       struct sk_buff *skb, int copy)
+{
+	struct ipxhdr *ipx = ipx_hdr(skb);
+	int is_broadcast = !memcmp(ipx->ipx_dest.node, ipx_broadcast_node,
+				   IPX_NODE_LEN);
+	struct sock *s;
+	struct hlist_node *node;
+	int rc;
+
+	spin_lock_bh(&intrfc->if_sklist_lock);
+
+	sk_for_each(s, node, &intrfc->if_sklist) {
+		struct ipx_sock *ipxs = ipx_sk(s);
+
+		if (ipxs->port == ipx->ipx_dest.sock &&
+		    (is_broadcast || !memcmp(ipx->ipx_dest.node,
+					     ipxs->node, IPX_NODE_LEN))) {
+			/* We found a socket to which to send */
+			struct sk_buff *skb1;
+
+			if (copy) {
+				skb1 = skb_clone(skb, GFP_ATOMIC);
+				rc = -ENOMEM;
+				if (!skb1)
+					goto out;
+			} else {
+				skb1 = skb;
+				copy = 1; /* skb may only be used once */
+			}
+			ipxitf_def_skb_handler(s, skb1);
+
+			/* On an external interface, one socket can listen */
+			if (intrfc != ipx_internal_net)
+				break;
+		}
+	}
+
+	/* skb was solely for us, and we did not make a copy, so free it. */
+	if (!copy)
+		kfree_skb(skb);
+
+	rc = 0;
+out:
+	spin_unlock_bh(&intrfc->if_sklist_lock);
+	return rc;
+}
+#else
+static struct sock *ncp_connection_hack(struct ipx_interface *intrfc,
+					struct ipxhdr *ipx)
+{
+	/* The packet's target is a NCP connection handler. We want to hand it
+	 * to the correct socket directly within the kernel, so that the
+	 * mars_nwe packet distribution process does not have to do it. Here we
+	 * only care about NCP and BURST packets.
+	 *
+	 * You might call this a hack, but believe me, you do not want a
+	 * complete NCP layer in the kernel, and this is VERY fast as well. */
+	struct sock *sk = NULL;
+ 	int connection = 0;
+	u8 *ncphdr = (u8 *)(ipx + 1);
+
+ 	if (*ncphdr == 0x22 && *(ncphdr + 1) == 0x22) /* NCP request */
+		connection = (((int) *(ncphdr + 5)) << 8) | (int) *(ncphdr + 3);
+	else if (*ncphdr == 0x77 && *(ncphdr + 1) == 0x77) /* BURST packet */
+		connection = (((int) *(ncphdr + 9)) << 8) | (int) *(ncphdr + 8);
+
+	if (connection) {
+		struct hlist_node *node;
+		/* Now we have to look for a special NCP connection handling
+		 * socket. Only these sockets have ipx_ncp_conn != 0, set by
+		 * SIOCIPXNCPCONN. */
+		spin_lock_bh(&intrfc->if_sklist_lock);
+		sk_for_each(sk, node, &intrfc->if_sklist)
+			if (ipx_sk(sk)->ipx_ncp_conn == connection) {
+				sock_hold(sk);
+				goto found;
+			}
+		sk = NULL;
+	found:
+		spin_unlock_bh(&intrfc->if_sklist_lock);
+	}
+	return sk;
+}
+
+static int ipxitf_demux_socket(struct ipx_interface *intrfc,
+			       struct sk_buff *skb, int copy)
+{
+	struct ipxhdr *ipx = ipx_hdr(skb);
+	struct sock *sock1 = NULL, *sock2 = NULL;
+	struct sk_buff *skb1 = NULL, *skb2 = NULL;
+	int rc;
+
+	if (intrfc == ipx_primary_net && ntohs(ipx->ipx_dest.sock) == 0x451)
+		sock1 = ncp_connection_hack(intrfc, ipx);
+        if (!sock1)
+		/* No special socket found, forward the packet the normal way */
+		sock1 = ipxitf_find_socket(intrfc, ipx->ipx_dest.sock);
+
+	/*
+	 * We need to check if there is a primary net and if
+	 * this is addressed to one of the *SPECIAL* sockets because
+	 * these need to be propagated to the primary net.
+	 * The *SPECIAL* socket list contains: 0x452(SAP), 0x453(RIP) and
+	 * 0x456(Diagnostic).
+	 */
+
+	if (ipx_primary_net && intrfc != ipx_primary_net) {
+		const int dsock = ntohs(ipx->ipx_dest.sock);
+
+		if (dsock == 0x452 || dsock == 0x453 || dsock == 0x456)
+			/* The appropriate thing to do here is to dup the
+			 * packet and route to the primary net interface via
+			 * ipxitf_send; however, we'll cheat and just demux it
+			 * here. */
+			sock2 = ipxitf_find_socket(ipx_primary_net,
+							ipx->ipx_dest.sock);
+	}
+
+	/*
+	 * If there is nothing to do return. The kfree will cancel any charging.
+	 */
+	rc = 0;
+	if (!sock1 && !sock2) {
+		if (!copy)
+			kfree_skb(skb);
+		goto out;
+	}
+
+	/*
+	 * This next segment of code is a little awkward, but it sets it up
+	 * so that the appropriate number of copies of the SKB are made and
+	 * that skb1 and skb2 point to it (them) so that it (they) can be
+	 * demuxed to sock1 and/or sock2.  If we are unable to make enough
+	 * copies, we do as much as is possible.
+	 */
+
+	if (copy)
+		skb1 = skb_clone(skb, GFP_ATOMIC);
+	else
+		skb1 = skb;
+
+	rc = -ENOMEM;
+	if (!skb1)
+		goto out_put;
+
+	/* Do we need 2 SKBs? */
+	if (sock1 && sock2)
+		skb2 = skb_clone(skb1, GFP_ATOMIC);
+	else
+		skb2 = skb1;
+
+	if (sock1)
+		ipxitf_def_skb_handler(sock1, skb1);
+
+	if (!skb2)
+		goto out_put;
+
+	if (sock2)
+		ipxitf_def_skb_handler(sock2, skb2);
+
+	rc = 0;
+out_put:
+	if (sock1)
+		sock_put(sock1);
+	if (sock2)
+		sock_put(sock2);
+out:
+	return rc;
+}
+#endif	/* CONFIG_IPX_INTERN */
+
+static struct sk_buff *ipxitf_adjust_skbuff(struct ipx_interface *intrfc,
+					    struct sk_buff *skb)
+{
+	struct sk_buff *skb2;
+	int in_offset = (unsigned char *)ipx_hdr(skb) - skb->head;
+	int out_offset = intrfc->if_ipx_offset;
+	int len;
+
+	/* Hopefully, most cases */
+	if (in_offset >= out_offset)
+		return skb;
+
+	/* Need new SKB */
+	len  = skb->len + out_offset;
+	skb2 = alloc_skb(len, GFP_ATOMIC);
+	if (skb2) {
+		skb_reserve(skb2, out_offset);
+		skb2->nh.raw = skb2->h.raw = skb_put(skb2, skb->len);
+		memcpy(ipx_hdr(skb2), ipx_hdr(skb), skb->len);
+		memcpy(skb2->cb, skb->cb, sizeof(skb->cb));
+	}
+	kfree_skb(skb);
+	return skb2;
+}
+
+/* caller must hold a reference to intrfc and the skb has to be unshared */
+int ipxitf_send(struct ipx_interface *intrfc, struct sk_buff *skb, char *node)
+{
+	struct ipxhdr *ipx = ipx_hdr(skb);
+	struct net_device *dev = intrfc->if_dev;
+	struct datalink_proto *dl = intrfc->if_dlink;
+	char dest_node[IPX_NODE_LEN];
+	int send_to_wire = 1;
+	int addr_len;
+
+	ipx->ipx_tctrl = IPX_SKB_CB(skb)->ipx_tctrl;
+	ipx->ipx_dest.net = IPX_SKB_CB(skb)->ipx_dest_net;
+	ipx->ipx_source.net = IPX_SKB_CB(skb)->ipx_source_net;
+
+	/* see if we need to include the netnum in the route list */
+	if (IPX_SKB_CB(skb)->last_hop.index >= 0) {
+		u32 *last_hop = (u32 *)(((u8 *) skb->data) +
+				sizeof(struct ipxhdr) +
+				IPX_SKB_CB(skb)->last_hop.index *
+				sizeof(u32));
+		*last_hop = IPX_SKB_CB(skb)->last_hop.netnum;
+		IPX_SKB_CB(skb)->last_hop.index = -1;
+	}
+	
+	/* 
+	 * We need to know how many skbuffs it will take to send out this
+	 * packet to avoid unnecessary copies.
+	 */
+	 
+	if (!dl || !dev || dev->flags & IFF_LOOPBACK) 
+		send_to_wire = 0;	/* No non looped */
+
+	/*
+	 * See if this should be demuxed to sockets on this interface 
+	 *
+	 * We want to ensure the original was eaten or that we only use
+	 * up clones.
+	 */
+	 
+	if (ipx->ipx_dest.net == intrfc->if_netnum) {
+		/*
+		 * To our own node, loop and free the original.
+		 * The internal net will receive on all node address.
+		 */
+		if (intrfc == ipx_internal_net ||
+		    !memcmp(intrfc->if_node, node, IPX_NODE_LEN)) {
+			/* Don't charge sender */
+			skb_orphan(skb);
+
+			/* Will charge receiver */
+			return ipxitf_demux_socket(intrfc, skb, 0);
+		}
+
+		/* Broadcast, loop and possibly keep to send on. */
+		if (!memcmp(ipx_broadcast_node, node, IPX_NODE_LEN)) {
+			if (!send_to_wire)
+				skb_orphan(skb);
+			ipxitf_demux_socket(intrfc, skb, send_to_wire);
+			if (!send_to_wire)
+				goto out;
+		}
+	}
+
+	/*
+	 * If the originating net is not equal to our net; this is routed
+	 * We are still charging the sender. Which is right - the driver
+	 * free will handle this fairly.
+	 */
+	if (ipx->ipx_source.net != intrfc->if_netnum) {
+		/*
+		 * Unshare the buffer before modifying the count in
+		 * case it's a flood or tcpdump
+		 */
+		skb = skb_unshare(skb, GFP_ATOMIC);
+		if (!skb)
+			goto out;
+		if (++ipx->ipx_tctrl > ipxcfg_max_hops)
+			send_to_wire = 0;
+	}
+
+	if (!send_to_wire) {
+		kfree_skb(skb);
+		goto out;
+	}
+
+	/* Determine the appropriate hardware address */
+	addr_len = dev->addr_len;
+	if (!memcmp(ipx_broadcast_node, node, IPX_NODE_LEN))
+		memcpy(dest_node, dev->broadcast, addr_len);
+	else
+		memcpy(dest_node, &(node[IPX_NODE_LEN-addr_len]), addr_len);
+
+	/* Make any compensation for differing physical/data link size */
+	skb = ipxitf_adjust_skbuff(intrfc, skb);
+	if (!skb)
+		goto out;
+
+	/* set up data link and physical headers */
+	skb->dev	= dev;
+	skb->protocol	= htons(ETH_P_IPX);
+
+	/* Send it out */
+	dl->request(dl, skb, dest_node);
+out:
+	return 0;
+}
+
+static int ipxitf_add_local_route(struct ipx_interface *intrfc)
+{
+	return ipxrtr_add_route(intrfc->if_netnum, intrfc, NULL);
+}
+
+static void ipxitf_discover_netnum(struct ipx_interface *intrfc,
+				   struct sk_buff *skb);
+static int ipxitf_pprop(struct ipx_interface *intrfc, struct sk_buff *skb);
+
+static int ipxitf_rcv(struct ipx_interface *intrfc, struct sk_buff *skb)
+{
+	struct ipxhdr *ipx = ipx_hdr(skb);
+	int rc = 0;
+
+	ipxitf_hold(intrfc);
+
+	/* See if we should update our network number */
+	if (!intrfc->if_netnum) /* net number of intrfc not known yet */
+ 		ipxitf_discover_netnum(intrfc, skb);
+	
+	IPX_SKB_CB(skb)->last_hop.index = -1;
+	if (ipx->ipx_type == IPX_TYPE_PPROP) {
+		rc = ipxitf_pprop(intrfc, skb);
+		if (rc)
+			goto out_free_skb;
+	}
+
+	/* local processing follows */
+	if (!IPX_SKB_CB(skb)->ipx_dest_net)
+		IPX_SKB_CB(skb)->ipx_dest_net = intrfc->if_netnum;
+	if (!IPX_SKB_CB(skb)->ipx_source_net)
+		IPX_SKB_CB(skb)->ipx_source_net = intrfc->if_netnum;
+
+	/* it doesn't make sense to route a pprop packet, there's no meaning
+	 * in the ipx_dest_net for such packets */
+	if (ipx->ipx_type != IPX_TYPE_PPROP &&
+	    intrfc->if_netnum != IPX_SKB_CB(skb)->ipx_dest_net) {
+		/* We only route point-to-point packets. */
+		if (skb->pkt_type == PACKET_HOST) {
+			skb = skb_unshare(skb, GFP_ATOMIC);
+			if (skb)
+				rc = ipxrtr_route_skb(skb);
+			goto out_intrfc;
+		}
+
+		goto out_free_skb;
+	}
+
+	/* see if we should keep it */
+	if (!memcmp(ipx_broadcast_node, ipx->ipx_dest.node, IPX_NODE_LEN) ||
+	    !memcmp(intrfc->if_node, ipx->ipx_dest.node, IPX_NODE_LEN)) {
+		rc = ipxitf_demux_socket(intrfc, skb, 0);
+		goto out_intrfc;
+	}
+
+	/* we couldn't pawn it off so unload it */
+out_free_skb:
+	kfree_skb(skb);
+out_intrfc:
+	ipxitf_put(intrfc);
+	return rc;
+}
+
+static void ipxitf_discover_netnum(struct ipx_interface *intrfc,
+				   struct sk_buff *skb)
+{ 
+	const struct ipx_cb *cb = IPX_SKB_CB(skb);
+
+	/* see if this is an intra packet: source_net == dest_net */
+	if (cb->ipx_source_net == cb->ipx_dest_net && cb->ipx_source_net) {
+		struct ipx_interface *i =
+				ipxitf_find_using_net(cb->ipx_source_net);
+		/* NB: NetWare servers lie about their hop count so we
+		 * dropped the test based on it. This is the best way
+		 * to determine this is a 0 hop count packet. */
+		if (!i) {
+			intrfc->if_netnum = cb->ipx_source_net;
+			ipxitf_add_local_route(intrfc);
+		} else {
+			printk(KERN_WARNING "IPX: Network number collision "
+				"%lx\n        %s %s and %s %s\n",
+				(unsigned long) htonl(cb->ipx_source_net),
+				ipx_device_name(i),
+				ipx_frame_name(i->if_dlink_type),
+				ipx_device_name(intrfc),
+				ipx_frame_name(intrfc->if_dlink_type));
+			ipxitf_put(i);
+		}
+	}
+}
+
+/**
+ * ipxitf_pprop - Process packet propagation IPX packet type 0x14, used for
+ * 		  NetBIOS broadcasts
+ * @intrfc: IPX interface receiving this packet
+ * @skb: Received packet
+ *
+ * Checks if packet is valid: if its more than %IPX_MAX_PPROP_HOPS hops or if it
+ * is smaller than a IPX header + the room for %IPX_MAX_PPROP_HOPS hops we drop
+ * it, not even processing it locally, if it has exact %IPX_MAX_PPROP_HOPS we
+ * don't broadcast it, but process it locally. See chapter 5 of Novell's "IPX
+ * RIP and SAP Router Specification", Part Number 107-000029-001.
+ * 
+ * If it is valid, check if we have pprop broadcasting enabled by the user,
+ * if not, just return zero for local processing.
+ *
+ * If it is enabled check the packet and don't broadcast it if we have already
+ * seen this packet.
+ *
+ * Broadcast: send it to the interfaces that aren't on the packet visited nets
+ * array, just after the IPX header.
+ *
+ * Returns -EINVAL for invalid packets, so that the calling function drops
+ * the packet without local processing. 0 if packet is to be locally processed.
+ */
+static int ipxitf_pprop(struct ipx_interface *intrfc, struct sk_buff *skb)
+{
+	struct ipxhdr *ipx = ipx_hdr(skb);
+	int i, rc = -EINVAL;
+	struct ipx_interface *ifcs;
+	char *c;
+	u32 *l;
+
+	/* Illegal packet - too many hops or too short */
+	/* We decide to throw it away: no broadcasting, no local processing.
+	 * NetBIOS unaware implementations route them as normal packets -
+	 * tctrl <= 15, any data payload... */
+	if (IPX_SKB_CB(skb)->ipx_tctrl > IPX_MAX_PPROP_HOPS ||
+	    ntohs(ipx->ipx_pktsize) < sizeof(struct ipxhdr) +
+	    				IPX_MAX_PPROP_HOPS * sizeof(u32))
+		goto out;
+	/* are we broadcasting this damn thing? */
+	rc = 0;
+	if (!sysctl_ipx_pprop_broadcasting)
+		goto out;
+	/* We do broadcast packet on the IPX_MAX_PPROP_HOPS hop, but we
+	 * process it locally. All previous hops broadcasted it, and process it
+	 * locally. */
+	if (IPX_SKB_CB(skb)->ipx_tctrl == IPX_MAX_PPROP_HOPS)
+		goto out;
+	
+	c = ((u8 *) ipx) + sizeof(struct ipxhdr);
+	l = (u32 *) c;
+
+	/* Don't broadcast packet if already seen this net */
+	for (i = 0; i < IPX_SKB_CB(skb)->ipx_tctrl; i++)
+		if (*l++ == intrfc->if_netnum)
+			goto out;
+
+	/* < IPX_MAX_PPROP_HOPS hops && input interface not in list. Save the
+	 * position where we will insert recvd netnum into list, later on,
+	 * in ipxitf_send */
+	IPX_SKB_CB(skb)->last_hop.index = i;
+	IPX_SKB_CB(skb)->last_hop.netnum = intrfc->if_netnum;
+	/* xmit on all other interfaces... */
+	spin_lock_bh(&ipx_interfaces_lock);
+	list_for_each_entry(ifcs, &ipx_interfaces, node) {
+		/* Except unconfigured interfaces */
+		if (!ifcs->if_netnum)
+			continue;
+					
+		/* That aren't in the list */
+		if (ifcs == intrfc)
+			continue;
+		l = (__u32 *) c;
+		/* don't consider the last entry in the packet list,
+		 * it is our netnum, and it is not there yet */
+		for (i = 0; i < IPX_SKB_CB(skb)->ipx_tctrl; i++)
+			if (ifcs->if_netnum == *l++)
+				break;
+		if (i == IPX_SKB_CB(skb)->ipx_tctrl) {
+			struct sk_buff *s = skb_copy(skb, GFP_ATOMIC);
+
+			if (s) {
+				IPX_SKB_CB(s)->ipx_dest_net = ifcs->if_netnum;
+				ipxrtr_route_skb(s);
+			}
+		}
+	}
+	spin_unlock_bh(&ipx_interfaces_lock);
+out:
+	return rc;
+}
+
+static void ipxitf_insert(struct ipx_interface *intrfc)
+{
+	spin_lock_bh(&ipx_interfaces_lock);
+	list_add_tail(&intrfc->node, &ipx_interfaces);
+	spin_unlock_bh(&ipx_interfaces_lock);
+
+	if (ipxcfg_auto_select_primary && !ipx_primary_net)
+		ipx_primary_net = intrfc;
+}
+
+static struct ipx_interface *ipxitf_alloc(struct net_device *dev, __u32 netnum,
+					  unsigned short dlink_type,
+					  struct datalink_proto *dlink,
+					  unsigned char internal,
+					  int ipx_offset)
+{
+	struct ipx_interface *intrfc = kmalloc(sizeof(*intrfc), GFP_ATOMIC);
+
+	if (intrfc) {
+		intrfc->if_dev		= dev;
+		intrfc->if_netnum	= netnum;
+		intrfc->if_dlink_type 	= dlink_type;
+		intrfc->if_dlink 	= dlink;
+		intrfc->if_internal 	= internal;
+		intrfc->if_ipx_offset 	= ipx_offset;
+		intrfc->if_sknum 	= IPX_MIN_EPHEMERAL_SOCKET;
+		INIT_HLIST_HEAD(&intrfc->if_sklist);
+		atomic_set(&intrfc->refcnt, 1);
+		spin_lock_init(&intrfc->if_sklist_lock);
+	}
+
+	return intrfc;
+}
+
+static int ipxitf_create_internal(struct ipx_interface_definition *idef)
+{
+	struct ipx_interface *intrfc;
+	int rc = -EEXIST;
+
+	/* Only one primary network allowed */
+	if (ipx_primary_net)
+		goto out;
+
+	/* Must have a valid network number */
+	rc = -EADDRNOTAVAIL;
+	if (!idef->ipx_network)
+		goto out;
+	intrfc = ipxitf_find_using_net(idef->ipx_network);
+	rc = -EADDRINUSE;
+	if (intrfc) {
+		ipxitf_put(intrfc);
+		goto out;
+	}
+	intrfc = ipxitf_alloc(NULL, idef->ipx_network, 0, NULL, 1, 0);
+	rc = -EAGAIN;
+	if (!intrfc)
+		goto out;
+	memcpy((char *)&(intrfc->if_node), idef->ipx_node, IPX_NODE_LEN);
+	ipx_internal_net = ipx_primary_net = intrfc;
+	ipxitf_hold(intrfc);
+	ipxitf_insert(intrfc);
+
+	rc = ipxitf_add_local_route(intrfc);
+	ipxitf_put(intrfc);
+out:
+	return rc;
+}
+
+static int ipx_map_frame_type(unsigned char type)
+{
+	int rc = 0;
+
+	switch (type) {
+	case IPX_FRAME_ETHERII:	rc = htons(ETH_P_IPX);		break;
+	case IPX_FRAME_8022:	rc = htons(ETH_P_802_2);	break;
+	case IPX_FRAME_SNAP:	rc = htons(ETH_P_SNAP);		break;
+	case IPX_FRAME_8023:	rc = htons(ETH_P_802_3);	break;
+	}
+
+	return rc;
+}
+
+static int ipxitf_create(struct ipx_interface_definition *idef)
+{
+	struct net_device *dev;
+	unsigned short dlink_type = 0;
+	struct datalink_proto *datalink = NULL;
+	struct ipx_interface *intrfc;
+	int rc;
+
+	if (idef->ipx_special == IPX_INTERNAL) {
+		rc = ipxitf_create_internal(idef);
+		goto out;
+	}
+
+	rc = -EEXIST;
+	if (idef->ipx_special == IPX_PRIMARY && ipx_primary_net)
+		goto out;
+
+	intrfc = ipxitf_find_using_net(idef->ipx_network);
+	rc = -EADDRINUSE;
+	if (idef->ipx_network && intrfc) {
+		ipxitf_put(intrfc);
+		goto out;
+	}
+
+	if (intrfc)
+		ipxitf_put(intrfc);
+
+	dev = dev_get_by_name(idef->ipx_device);
+	rc = -ENODEV;
+	if (!dev)
+		goto out;
+
+	switch (idef->ipx_dlink_type) {
+	case IPX_FRAME_TR_8022:
+		printk(KERN_WARNING "IPX frame type 802.2TR is "
+			"obsolete Use 802.2 instead.\n");
+		/* fall through */
+	case IPX_FRAME_8022:
+		dlink_type 	= htons(ETH_P_802_2);
+		datalink 	= p8022_datalink;
+		break;
+	case IPX_FRAME_ETHERII:
+		if (dev->type != ARPHRD_IEEE802) {
+			dlink_type 	= htons(ETH_P_IPX);
+			datalink 	= pEII_datalink;
+			break;
+		} else 
+			printk(KERN_WARNING "IPX frame type EtherII over "
+					"token-ring is obsolete. Use SNAP "
+					"instead.\n");
+		/* fall through */
+	case IPX_FRAME_SNAP:
+		dlink_type 	= htons(ETH_P_SNAP);
+		datalink 	= pSNAP_datalink;
+		break;
+	case IPX_FRAME_8023:
+		dlink_type 	= htons(ETH_P_802_3);
+		datalink 	= p8023_datalink;
+		break;
+	case IPX_FRAME_NONE:
+	default:
+		rc = -EPROTONOSUPPORT;
+		goto out_dev;
+	}
+
+	rc = -ENETDOWN;
+	if (!(dev->flags & IFF_UP))
+		goto out_dev;
+
+	/* Check addresses are suitable */
+	rc = -EINVAL;
+	if (dev->addr_len > IPX_NODE_LEN)
+		goto out_dev;
+
+	intrfc = ipxitf_find_using_phys(dev, dlink_type);
+	if (!intrfc) {
+		/* Ok now create */
+		intrfc = ipxitf_alloc(dev, idef->ipx_network, dlink_type,
+				      datalink, 0, dev->hard_header_len +
+					datalink->header_length);
+		rc = -EAGAIN;
+		if (!intrfc)
+			goto out_dev;
+		/* Setup primary if necessary */
+		if (idef->ipx_special == IPX_PRIMARY)
+			ipx_primary_net = intrfc;
+		if (!memcmp(idef->ipx_node, "\000\000\000\000\000\000",
+			    IPX_NODE_LEN)) {
+			memset(intrfc->if_node, 0, IPX_NODE_LEN);
+			memcpy(intrfc->if_node + IPX_NODE_LEN - dev->addr_len,
+				dev->dev_addr, dev->addr_len);
+		} else
+			memcpy(intrfc->if_node, idef->ipx_node, IPX_NODE_LEN);
+		ipxitf_hold(intrfc);
+		ipxitf_insert(intrfc);
+	}
+
+
+	/* If the network number is known, add a route */
+	rc = 0;
+	if (!intrfc->if_netnum)
+		goto out_intrfc;
+
+	rc = ipxitf_add_local_route(intrfc);
+out_intrfc:
+	ipxitf_put(intrfc);
+	goto out;
+out_dev:
+	dev_put(dev);
+out:
+	return rc;
+}
+
+static int ipxitf_delete(struct ipx_interface_definition *idef)
+{
+	struct net_device *dev = NULL;
+	unsigned short dlink_type = 0;
+	struct ipx_interface *intrfc;
+	int rc = 0;
+
+	spin_lock_bh(&ipx_interfaces_lock);
+	if (idef->ipx_special == IPX_INTERNAL) {
+		if (ipx_internal_net) {
+			__ipxitf_put(ipx_internal_net);
+			goto out;
+		}
+		rc = -ENOENT;
+		goto out;
+	}
+
+	dlink_type = ipx_map_frame_type(idef->ipx_dlink_type);
+	rc = -EPROTONOSUPPORT;
+	if (!dlink_type)
+		goto out;
+
+	dev = __dev_get_by_name(idef->ipx_device);
+	rc = -ENODEV;
+	if (!dev)
+		goto out;
+
+	intrfc = __ipxitf_find_using_phys(dev, dlink_type);
+	rc = -EINVAL;
+	if (!intrfc)
+		goto out;
+	__ipxitf_put(intrfc);
+
+	rc = 0;
+out:
+	spin_unlock_bh(&ipx_interfaces_lock);
+	return rc;
+}
+
+static struct ipx_interface *ipxitf_auto_create(struct net_device *dev,
+						unsigned short dlink_type)
+{
+	struct ipx_interface *intrfc = NULL;
+	struct datalink_proto *datalink;
+
+	if (!dev)
+		goto out;
+
+	/* Check addresses are suitable */
+	if (dev->addr_len > IPX_NODE_LEN)
+		goto out;
+
+	switch (htons(dlink_type)) {
+	case ETH_P_IPX:		datalink = pEII_datalink;	break;
+	case ETH_P_802_2:	datalink = p8022_datalink;	break;
+	case ETH_P_SNAP:	datalink = pSNAP_datalink;	break;
+	case ETH_P_802_3:	datalink = p8023_datalink;	break;
+	default:		goto out;
+	}
+
+	intrfc = ipxitf_alloc(dev, 0, dlink_type, datalink, 0,
+				dev->hard_header_len + datalink->header_length);
+
+	if (intrfc) {
+		memset(intrfc->if_node, 0, IPX_NODE_LEN);
+		memcpy((char *)&(intrfc->if_node[IPX_NODE_LEN-dev->addr_len]),
+			dev->dev_addr, dev->addr_len);
+		spin_lock_init(&intrfc->if_sklist_lock);
+		atomic_set(&intrfc->refcnt, 1);
+		ipxitf_insert(intrfc);
+		dev_hold(dev);
+	}
+
+out:
+	return intrfc;
+}
+
+static int ipxitf_ioctl(unsigned int cmd, void __user *arg)
+{
+	int rc = -EINVAL;
+	struct ifreq ifr;
+	int val;
+
+	switch (cmd) {
+	case SIOCSIFADDR: {
+		struct sockaddr_ipx *sipx;
+		struct ipx_interface_definition f;
+
+		rc = -EFAULT;
+		if (copy_from_user(&ifr, arg, sizeof(ifr)))
+			break;
+		sipx = (struct sockaddr_ipx *)&ifr.ifr_addr;
+		rc = -EINVAL;
+		if (sipx->sipx_family != AF_IPX)
+			break;
+		f.ipx_network = sipx->sipx_network;
+		memcpy(f.ipx_device, ifr.ifr_name,
+			sizeof(f.ipx_device));
+		memcpy(f.ipx_node, sipx->sipx_node, IPX_NODE_LEN);
+		f.ipx_dlink_type = sipx->sipx_type;
+		f.ipx_special = sipx->sipx_special;
+
+		if (sipx->sipx_action == IPX_DLTITF)
+			rc = ipxitf_delete(&f);
+		else
+			rc = ipxitf_create(&f);
+		break;
+	}
+	case SIOCGIFADDR: {
+		struct sockaddr_ipx *sipx;
+		struct ipx_interface *ipxif;
+		struct net_device *dev;
+
+		rc = -EFAULT;
+		if (copy_from_user(&ifr, arg, sizeof(ifr)))
+			break;
+		sipx = (struct sockaddr_ipx *)&ifr.ifr_addr;
+		dev  = __dev_get_by_name(ifr.ifr_name);
+		rc   = -ENODEV;
+		if (!dev)
+			break;
+		ipxif = ipxitf_find_using_phys(dev,
+					   ipx_map_frame_type(sipx->sipx_type));
+		rc = -EADDRNOTAVAIL;
+		if (!ipxif)
+			break;
+
+		sipx->sipx_family	= AF_IPX;
+		sipx->sipx_network	= ipxif->if_netnum;
+		memcpy(sipx->sipx_node, ipxif->if_node,
+			sizeof(sipx->sipx_node));
+		rc = -EFAULT;
+		if (copy_to_user(arg, &ifr, sizeof(ifr)))
+			break;
+		ipxitf_put(ipxif);
+		rc = 0;
+		break;
+	}
+	case SIOCAIPXITFCRT: 
+		rc = -EFAULT;
+		if (get_user(val, (unsigned char __user *) arg))
+			break;
+		rc = 0;
+		ipxcfg_auto_create_interfaces = val;
+		break;
+	case SIOCAIPXPRISLT: 
+		rc = -EFAULT;
+		if (get_user(val, (unsigned char __user *) arg))
+			break;
+		rc = 0;
+		ipxcfg_set_auto_select(val);
+		break;
+	}
+
+	return rc;
+}
+
+/*
+ *	Checksum routine for IPX
+ */
+ 
+/* Note: We assume ipx_tctrl==0 and htons(length)==ipx_pktsize */
+/* This functions should *not* mess with packet contents */
+
+__u16 ipx_cksum(struct ipxhdr *packet, int length) 
+{
+	/* 
+	 *	NOTE: sum is a net byte order quantity, which optimizes the 
+	 *	loop. This only works on big and little endian machines. (I
+	 *	don't know of a machine that isn't.)
+	 */
+	/* start at ipx_dest - We skip the checksum field and start with
+	 * ipx_type before the loop, not considering ipx_tctrl in the calc */
+	__u16 *p = (__u16 *)&packet->ipx_dest;
+	__u32 i = (length >> 1) - 1; /* Number of complete words */
+	__u32 sum = packet->ipx_type << sizeof(packet->ipx_tctrl); 
+
+	/* Loop through all complete words except the checksum field,
+	 * ipx_type (accounted above) and ipx_tctrl (not used in the cksum) */
+	while (--i)
+		sum += *p++;
+
+	/* Add on the last part word if it exists */
+	if (packet->ipx_pktsize & htons(1))
+		sum += ntohs(0xff00) & *p;
+
+	/* Do final fixup */
+	sum = (sum & 0xffff) + (sum >> 16);
+
+	/* It's a pity there's no concept of carry in C */
+	if (sum >= 0x10000)
+		sum++;
+
+	return ~sum;
+}
+
+const char *ipx_frame_name(unsigned short frame)
+{
+	char* rc = "None";
+
+	switch (ntohs(frame)) {
+	case ETH_P_IPX:		rc = "EtherII";	break;
+	case ETH_P_802_2:	rc = "802.2";	break;
+	case ETH_P_SNAP:	rc = "SNAP";	break;
+	case ETH_P_802_3:	rc = "802.3";	break;
+	case ETH_P_TR_802_2:	rc = "802.2TR";	break;
+	}
+
+	return rc;
+}
+
+const char *ipx_device_name(struct ipx_interface *intrfc)
+{
+	return intrfc->if_internal ? "Internal" :
+		intrfc->if_dev ? intrfc->if_dev->name : "Unknown";
+}
+
+/* Handling for system calls applied via the various interfaces to an IPX
+ * socket object. */
+
+static int ipx_setsockopt(struct socket *sock, int level, int optname,
+			  char __user *optval, int optlen)
+{
+	struct sock *sk = sock->sk;
+	int opt;
+	int rc = -EINVAL;
+
+	if (optlen != sizeof(int))
+		goto out;
+
+	rc = -EFAULT;
+	if (get_user(opt, (unsigned int __user *)optval))
+		goto out;
+
+	rc = -ENOPROTOOPT;
+	if (!(level == SOL_IPX && optname == IPX_TYPE))
+		goto out;
+
+	ipx_sk(sk)->type = opt;
+	rc = 0;
+out:
+	return rc;
+}
+
+static int ipx_getsockopt(struct socket *sock, int level, int optname,
+	char __user *optval, int __user *optlen)
+{
+	struct sock *sk = sock->sk;
+	int val = 0;
+	int len;
+	int rc = -ENOPROTOOPT;
+
+	if (!(level == SOL_IPX && optname == IPX_TYPE))
+		goto out;
+
+	val = ipx_sk(sk)->type;
+
+	rc = -EFAULT;
+	if (get_user(len, optlen))
+		goto out;
+
+	len = min_t(unsigned int, len, sizeof(int));
+	rc = -EINVAL;
+	if(len < 0)
+		goto out;
+		
+	rc = -EFAULT;
+	if (put_user(len, optlen) || copy_to_user(optval, &val, len))
+		goto out;
+
+	rc = 0;
+out:
+	return rc;
+}
+
+static struct proto ipx_proto = {
+	.name	  = "IPX",
+	.owner	  = THIS_MODULE,
+	.obj_size = sizeof(struct ipx_sock),
+};
+
+static int ipx_create(struct socket *sock, int protocol)
+{
+	int rc = -ESOCKTNOSUPPORT;
+	struct sock *sk;
+
+	/*
+	 * SPX support is not anymore in the kernel sources. If you want to
+	 * ressurrect it, completing it and making it understand shared skbs,
+	 * be fully multithreaded, etc, grab the sources in an early 2.5 kernel
+	 * tree.
+	 */
+	if (sock->type != SOCK_DGRAM)
+		goto out;
+
+       	rc = -ENOMEM;
+	sk = sk_alloc(PF_IPX, GFP_KERNEL, &ipx_proto, 1);
+	if (!sk)
+		goto out;
+#ifdef IPX_REFCNT_DEBUG
+        atomic_inc(&ipx_sock_nr);
+        printk(KERN_DEBUG "IPX socket %p created, now we have %d alive\n", sk,
+			atomic_read(&ipx_sock_nr));
+#endif
+	sock_init_data(sock, sk);
+	sk->sk_no_check = 1;		/* Checksum off by default */
+	sock->ops = &ipx_dgram_ops;
+	rc = 0;
+out:
+	return rc;
+}
+
+static int ipx_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+
+	if (!sk)
+		goto out;
+
+	if (!sock_flag(sk, SOCK_DEAD))
+		sk->sk_state_change(sk);
+
+	sock_set_flag(sk, SOCK_DEAD);
+	sock->sk = NULL;
+	ipx_destroy_socket(sk);
+out:
+	return 0;
+}
+
+/* caller must hold a reference to intrfc */
+
+static unsigned short ipx_first_free_socketnum(struct ipx_interface *intrfc)
+{
+	unsigned short socketNum = intrfc->if_sknum;
+
+	spin_lock_bh(&intrfc->if_sklist_lock);
+
+	if (socketNum < IPX_MIN_EPHEMERAL_SOCKET)
+		socketNum = IPX_MIN_EPHEMERAL_SOCKET;
+
+	while (__ipxitf_find_socket(intrfc, ntohs(socketNum)))
+		if (socketNum > IPX_MAX_EPHEMERAL_SOCKET)
+			socketNum = IPX_MIN_EPHEMERAL_SOCKET;
+		else
+			socketNum++;
+
+	spin_unlock_bh(&intrfc->if_sklist_lock);
+	intrfc->if_sknum = socketNum;
+
+	return ntohs(socketNum);
+}
+
+static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+{
+	struct sock *sk = sock->sk;
+	struct ipx_sock *ipxs = ipx_sk(sk);
+	struct ipx_interface *intrfc;
+	struct sockaddr_ipx *addr = (struct sockaddr_ipx *)uaddr;
+	int rc = -EINVAL;
+
+	if (!sock_flag(sk, SOCK_ZAPPED) || addr_len != sizeof(struct sockaddr_ipx))
+		goto out;
+
+	intrfc = ipxitf_find_using_net(addr->sipx_network);
+	rc = -EADDRNOTAVAIL;
+	if (!intrfc)
+		goto out;
+
+	if (!addr->sipx_port) {
+		addr->sipx_port = ipx_first_free_socketnum(intrfc);
+		rc = -EINVAL;
+		if (!addr->sipx_port)
+			goto out_put;
+	}
+
+	/* protect IPX system stuff like routing/sap */
+	rc = -EACCES;
+	if (ntohs(addr->sipx_port) < IPX_MIN_EPHEMERAL_SOCKET &&
+	    !capable(CAP_NET_ADMIN))
+		goto out_put;
+
+	ipxs->port = addr->sipx_port;
+
+#ifdef CONFIG_IPX_INTERN
+	if (intrfc == ipx_internal_net) {
+		/* The source address is to be set explicitly if the
+		 * socket is to be bound on the internal network. If a
+		 * node number 0 was specified, the default is used.
+		 */
+
+		rc = -EINVAL;
+		if (!memcmp(addr->sipx_node, ipx_broadcast_node, IPX_NODE_LEN))
+			goto out_put;
+		if (!memcmp(addr->sipx_node, ipx_this_node, IPX_NODE_LEN))
+			memcpy(ipxs->node, intrfc->if_node, IPX_NODE_LEN);
+		else
+			memcpy(ipxs->node, addr->sipx_node, IPX_NODE_LEN);
+
+		rc = -EADDRINUSE;
+		if (ipxitf_find_internal_socket(intrfc, ipxs->node,
+						ipxs->port)) {
+			SOCK_DEBUG(sk,
+				"IPX: bind failed because port %X in use.\n",
+				ntohs((int)addr->sipx_port));
+			goto out_put;
+		}
+	} else {
+		/* Source addresses are easy. It must be our
+		 * network:node pair for an interface routed to IPX
+		 * with the ipx routing ioctl()
+		 */
+
+		memcpy(ipxs->node, intrfc->if_node, IPX_NODE_LEN);
+
+		rc = -EADDRINUSE;
+		if (ipxitf_find_socket(intrfc, addr->sipx_port)) {
+			SOCK_DEBUG(sk,
+				"IPX: bind failed because port %X in use.\n",
+				ntohs((int)addr->sipx_port));
+			goto out_put;
+		}
+	}
+
+#else	/* !def CONFIG_IPX_INTERN */
+
+	/* Source addresses are easy. It must be our network:node pair for
+	   an interface routed to IPX with the ipx routing ioctl() */
+
+	rc = -EADDRINUSE;
+	if (ipxitf_find_socket(intrfc, addr->sipx_port)) {
+		SOCK_DEBUG(sk, "IPX: bind failed because port %X in use.\n",
+				ntohs((int)addr->sipx_port));
+		goto out_put;
+	}
+
+#endif	/* CONFIG_IPX_INTERN */
+
+	ipxitf_insert_socket(intrfc, sk);
+	sock_reset_flag(sk, SOCK_ZAPPED);
+
+	rc = 0;
+out_put:
+	ipxitf_put(intrfc);
+out:
+	return rc;
+}
+
+static int ipx_connect(struct socket *sock, struct sockaddr *uaddr,
+	int addr_len, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct ipx_sock *ipxs = ipx_sk(sk);
+	struct sockaddr_ipx *addr;
+	int rc = -EINVAL;
+	struct ipx_route *rt;
+
+	sk->sk_state	= TCP_CLOSE;
+	sock->state 	= SS_UNCONNECTED;
+
+	if (addr_len != sizeof(*addr))
+		goto out;
+	addr = (struct sockaddr_ipx *)uaddr;
+
+	/* put the autobinding in */
+	if (!ipxs->port) {
+		struct sockaddr_ipx uaddr;
+
+		uaddr.sipx_port		= 0;
+		uaddr.sipx_network 	= 0;
+
+#ifdef CONFIG_IPX_INTERN
+		rc = -ENETDOWN;
+		if (!ipxs->intrfc)
+			goto out; /* Someone zonked the iface */
+		memcpy(uaddr.sipx_node, ipxs->intrfc->if_node,
+			IPX_NODE_LEN);
+#endif	/* CONFIG_IPX_INTERN */
+
+		rc = ipx_bind(sock, (struct sockaddr *)&uaddr,
+			      sizeof(struct sockaddr_ipx));
+		if (rc)
+			goto out;
+	}
+
+        /* We can either connect to primary network or somewhere
+	 * we can route to */
+	rt = ipxrtr_lookup(addr->sipx_network);
+	rc = -ENETUNREACH;
+	if (!rt && !(!addr->sipx_network && ipx_primary_net))
+		goto out;
+
+	ipxs->dest_addr.net  = addr->sipx_network;
+	ipxs->dest_addr.sock = addr->sipx_port;
+	memcpy(ipxs->dest_addr.node, addr->sipx_node, IPX_NODE_LEN);
+	ipxs->type = addr->sipx_type;
+
+	if (sock->type == SOCK_DGRAM) {
+		sock->state 	= SS_CONNECTED;
+		sk->sk_state 	= TCP_ESTABLISHED;
+	}
+
+	if (rt)
+		ipxrtr_put(rt);
+	rc = 0;
+out:
+	return rc;
+}
+
+
+static int ipx_getname(struct socket *sock, struct sockaddr *uaddr,
+			int *uaddr_len, int peer)
+{
+	struct ipx_address *addr;
+	struct sockaddr_ipx sipx;
+	struct sock *sk = sock->sk;
+	struct ipx_sock *ipxs = ipx_sk(sk);
+	int rc;
+
+	*uaddr_len = sizeof(struct sockaddr_ipx);
+
+	if (peer) {
+		rc = -ENOTCONN;
+		if (sk->sk_state != TCP_ESTABLISHED)
+			goto out;
+
+		addr = &ipxs->dest_addr;
+		sipx.sipx_network	= addr->net;
+		sipx.sipx_port		= addr->sock;
+		memcpy(sipx.sipx_node, addr->node, IPX_NODE_LEN);
+	} else {
+		if (ipxs->intrfc) {
+			sipx.sipx_network = ipxs->intrfc->if_netnum;
+#ifdef CONFIG_IPX_INTERN
+			memcpy(sipx.sipx_node, ipxs->node, IPX_NODE_LEN);
+#else
+			memcpy(sipx.sipx_node, ipxs->intrfc->if_node,
+				IPX_NODE_LEN);
+#endif	/* CONFIG_IPX_INTERN */
+
+		} else {
+			sipx.sipx_network = 0;
+			memset(sipx.sipx_node, '\0', IPX_NODE_LEN);
+		}
+
+		sipx.sipx_port = ipxs->port;
+	}
+
+	sipx.sipx_family = AF_IPX;
+	sipx.sipx_type	 = ipxs->type;
+	sipx.sipx_zero	 = 0;
+	memcpy(uaddr, &sipx, sizeof(sipx));
+
+	rc = 0;
+out:
+	return rc;
+}
+
+static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
+{
+	/* NULL here for pt means the packet was looped back */
+	struct ipx_interface *intrfc;
+	struct ipxhdr *ipx;
+	u16 ipx_pktsize;
+	int rc = 0;
+		
+	/* Not ours */	
+        if (skb->pkt_type == PACKET_OTHERHOST)
+        	goto drop;
+
+	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
+		goto out;
+
+	ipx		= ipx_hdr(skb);
+	ipx_pktsize	= ntohs(ipx->ipx_pktsize);
+	
+	/* Too small or invalid header? */
+	if (ipx_pktsize < sizeof(struct ipxhdr) || ipx_pktsize > skb->len)
+		goto drop;
+                        
+	if (ipx->ipx_checksum != IPX_NO_CHECKSUM &&
+	   ipx->ipx_checksum != ipx_cksum(ipx, ipx_pktsize))
+		goto drop;
+
+	IPX_SKB_CB(skb)->ipx_tctrl	= ipx->ipx_tctrl;
+	IPX_SKB_CB(skb)->ipx_dest_net	= ipx->ipx_dest.net;
+	IPX_SKB_CB(skb)->ipx_source_net = ipx->ipx_source.net;
+
+	/* Determine what local ipx endpoint this is */
+	intrfc = ipxitf_find_using_phys(dev, pt->type);
+	if (!intrfc) {
+		if (ipxcfg_auto_create_interfaces &&
+		   ntohl(IPX_SKB_CB(skb)->ipx_dest_net)) {
+			intrfc = ipxitf_auto_create(dev, pt->type);
+			if (intrfc)
+				ipxitf_hold(intrfc);
+		}
+
+		if (!intrfc)	/* Not one of ours */
+				/* or invalid packet for auto creation */
+			goto drop;
+	}
+
+	rc = ipxitf_rcv(intrfc, skb);
+	ipxitf_put(intrfc);
+	goto out;
+drop:
+	kfree_skb(skb);
+out:
+	return rc;
+}
+
+static int ipx_sendmsg(struct kiocb *iocb, struct socket *sock,
+	struct msghdr *msg, size_t len)
+{
+	struct sock *sk = sock->sk;
+	struct ipx_sock *ipxs = ipx_sk(sk);
+	struct sockaddr_ipx *usipx = (struct sockaddr_ipx *)msg->msg_name;
+	struct sockaddr_ipx local_sipx;
+	int rc = -EINVAL;
+	int flags = msg->msg_flags;
+
+	/* Socket gets bound below anyway */
+/*	if (sk->sk_zapped)
+		return -EIO; */	/* Socket not bound */
+	if (flags & ~(MSG_DONTWAIT|MSG_CMSG_COMPAT))
+		goto out;
+
+	/* Max possible packet size limited by 16 bit pktsize in header */
+	if (len >= 65535 - sizeof(struct ipxhdr))
+		goto out;
+
+	if (usipx) {
+		if (!ipxs->port) {
+			struct sockaddr_ipx uaddr;
+
+			uaddr.sipx_port		= 0;
+			uaddr.sipx_network	= 0;
+#ifdef CONFIG_IPX_INTERN
+			rc = -ENETDOWN;
+			if (!ipxs->intrfc)
+				goto out; /* Someone zonked the iface */
+			memcpy(uaddr.sipx_node, ipxs->intrfc->if_node,
+				IPX_NODE_LEN);
+#endif
+			rc = ipx_bind(sock, (struct sockaddr *)&uaddr,
+					sizeof(struct sockaddr_ipx));
+			if (rc)
+				goto out;
+		}
+
+		rc = -EINVAL;
+		if (msg->msg_namelen < sizeof(*usipx) ||
+		    usipx->sipx_family != AF_IPX)
+			goto out;
+	} else {
+		rc = -ENOTCONN;
+		if (sk->sk_state != TCP_ESTABLISHED)
+			goto out;
+
+		usipx = &local_sipx;
+		usipx->sipx_family 	= AF_IPX;
+		usipx->sipx_type 	= ipxs->type;
+		usipx->sipx_port 	= ipxs->dest_addr.sock;
+		usipx->sipx_network 	= ipxs->dest_addr.net;
+		memcpy(usipx->sipx_node, ipxs->dest_addr.node, IPX_NODE_LEN);
+	}
+
+	rc = ipxrtr_route_packet(sk, usipx, msg->msg_iov, len,
+				 flags & MSG_DONTWAIT);
+	if (rc >= 0)
+		rc = len;
+out:
+	return rc;
+}
+
+
+static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock,
+		struct msghdr *msg, size_t size, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct ipx_sock *ipxs = ipx_sk(sk);
+	struct sockaddr_ipx *sipx = (struct sockaddr_ipx *)msg->msg_name;
+	struct ipxhdr *ipx = NULL;
+	struct sk_buff *skb;
+	int copied, rc;
+
+	/* put the autobinding in */
+	if (!ipxs->port) {
+		struct sockaddr_ipx uaddr;
+
+		uaddr.sipx_port		= 0;
+		uaddr.sipx_network 	= 0;
+
+#ifdef CONFIG_IPX_INTERN
+		rc = -ENETDOWN;
+		if (!ipxs->intrfc)
+			goto out; /* Someone zonked the iface */
+		memcpy(uaddr.sipx_node, ipxs->intrfc->if_node, IPX_NODE_LEN);
+#endif	/* CONFIG_IPX_INTERN */
+
+		rc = ipx_bind(sock, (struct sockaddr *)&uaddr,
+			      sizeof(struct sockaddr_ipx));
+		if (rc)
+			goto out;
+	}
+	
+	rc = -ENOTCONN;
+	if (sock_flag(sk, SOCK_ZAPPED))
+		goto out;
+
+	skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
+				flags & MSG_DONTWAIT, &rc);
+	if (!skb)
+		goto out;
+
+	ipx 	= ipx_hdr(skb);
+	copied 	= ntohs(ipx->ipx_pktsize) - sizeof(struct ipxhdr);
+	if (copied > size) {
+		copied = size;
+		msg->msg_flags |= MSG_TRUNC;
+	}
+
+	rc = skb_copy_datagram_iovec(skb, sizeof(struct ipxhdr), msg->msg_iov,
+				     copied);
+	if (rc)
+		goto out_free;
+	if (skb->stamp.tv_sec)
+		sk->sk_stamp = skb->stamp;
+
+	msg->msg_namelen = sizeof(*sipx);
+
+	if (sipx) {
+		sipx->sipx_family	= AF_IPX;
+		sipx->sipx_port		= ipx->ipx_source.sock;
+		memcpy(sipx->sipx_node, ipx->ipx_source.node, IPX_NODE_LEN);
+		sipx->sipx_network	= IPX_SKB_CB(skb)->ipx_source_net;
+		sipx->sipx_type 	= ipx->ipx_type;
+		sipx->sipx_zero		= 0;
+	}
+	rc = copied;
+
+out_free:
+	skb_free_datagram(sk, skb);
+out:
+	return rc;
+}
+
+
+static int ipx_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	int rc = 0;
+	long amount = 0;
+	struct sock *sk = sock->sk;
+	void __user *argp = (void __user *)arg;
+
+	switch (cmd) {
+	case TIOCOUTQ:
+		amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc);
+		if (amount < 0)
+			amount = 0;
+		rc = put_user(amount, (int __user *)argp);
+		break;
+	case TIOCINQ: {
+		struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
+		/* These two are safe on a single CPU system as only
+		 * user tasks fiddle here */
+		if (skb)
+			amount = skb->len - sizeof(struct ipxhdr);
+		rc = put_user(amount, (int __user *)argp);
+		break;
+	}
+	case SIOCADDRT:
+	case SIOCDELRT:
+		rc = -EPERM;
+		if (capable(CAP_NET_ADMIN))
+			rc = ipxrtr_ioctl(cmd, argp);
+		break;
+	case SIOCSIFADDR:
+	case SIOCAIPXITFCRT:
+	case SIOCAIPXPRISLT:
+		rc = -EPERM;
+		if (!capable(CAP_NET_ADMIN))
+			break;
+	case SIOCGIFADDR:
+		rc = ipxitf_ioctl(cmd, argp);
+		break;
+	case SIOCIPXCFGDATA:
+		rc = ipxcfg_get_config_data(argp);
+		break;
+	case SIOCIPXNCPCONN:
+		/*
+		 * This socket wants to take care of the NCP connection
+		 * handed to us in arg.
+		 */
+        	rc = -EPERM;
+        	if (!capable(CAP_NET_ADMIN))
+			break;
+		rc = get_user(ipx_sk(sk)->ipx_ncp_conn,
+			      (const unsigned short __user *)argp);
+		break;
+	case SIOCGSTAMP:
+		rc = -EINVAL;
+		if (sk) 
+			rc = sock_get_timestamp(sk, argp);
+		break;
+	case SIOCGIFDSTADDR:
+	case SIOCSIFDSTADDR:
+	case SIOCGIFBRDADDR:
+	case SIOCSIFBRDADDR:
+	case SIOCGIFNETMASK:
+	case SIOCSIFNETMASK:
+		rc = -EINVAL;
+		break;
+	default:
+		rc = dev_ioctl(cmd, argp);
+		break;
+	}
+
+	return rc;
+}
+
+/*
+ * Socket family declarations
+ */
+
+static struct net_proto_family ipx_family_ops = {
+	.family		= PF_IPX,
+	.create		= ipx_create,
+	.owner		= THIS_MODULE,
+};
+
+static struct proto_ops SOCKOPS_WRAPPED(ipx_dgram_ops) = {
+	.family		= PF_IPX,
+	.owner		= THIS_MODULE,
+	.release	= ipx_release,
+	.bind		= ipx_bind,
+	.connect	= ipx_connect,
+	.socketpair	= sock_no_socketpair,
+	.accept		= sock_no_accept,
+	.getname	= ipx_getname,
+	.poll		= datagram_poll,
+	.ioctl		= ipx_ioctl,
+	.listen		= sock_no_listen,
+	.shutdown	= sock_no_shutdown, /* FIXME: support shutdown */
+	.setsockopt	= ipx_setsockopt,
+	.getsockopt	= ipx_getsockopt,
+	.sendmsg	= ipx_sendmsg,
+	.recvmsg	= ipx_recvmsg,
+	.mmap		= sock_no_mmap,
+	.sendpage	= sock_no_sendpage,
+};
+
+#include <linux/smp_lock.h>
+SOCKOPS_WRAP(ipx_dgram, PF_IPX);
+
+static struct packet_type ipx_8023_packet_type = {
+	.type		= __constant_htons(ETH_P_802_3),
+	.func		= ipx_rcv,
+};
+
+static struct packet_type ipx_dix_packet_type = {
+	.type		= __constant_htons(ETH_P_IPX),
+	.func		= ipx_rcv,
+};
+
+static struct notifier_block ipx_dev_notifier = {
+	.notifier_call	= ipxitf_device_event,
+};
+
+extern struct datalink_proto *make_EII_client(void);
+extern struct datalink_proto *make_8023_client(void);
+extern void destroy_EII_client(struct datalink_proto *);
+extern void destroy_8023_client(struct datalink_proto *);
+
+static unsigned char ipx_8022_type = 0xE0;
+static unsigned char ipx_snap_id[5] = { 0x0, 0x0, 0x0, 0x81, 0x37 };
+static char ipx_EII_err_msg[] __initdata =
+	KERN_CRIT "IPX: Unable to register with Ethernet II\n";
+static char ipx_8023_err_msg[] __initdata =
+	KERN_CRIT "IPX: Unable to register with 802.3\n";
+static char ipx_llc_err_msg[] __initdata =
+	KERN_CRIT "IPX: Unable to register with 802.2\n";
+static char ipx_snap_err_msg[] __initdata =
+	KERN_CRIT "IPX: Unable to register with SNAP\n";
+
+static int __init ipx_init(void)
+{
+	int rc = proto_register(&ipx_proto, 1);
+
+	if (rc != 0)
+		goto out;
+
+	sock_register(&ipx_family_ops);
+
+	pEII_datalink = make_EII_client();
+	if (pEII_datalink)
+		dev_add_pack(&ipx_dix_packet_type);
+	else
+		printk(ipx_EII_err_msg);
+
+	p8023_datalink = make_8023_client();
+	if (p8023_datalink)
+		dev_add_pack(&ipx_8023_packet_type);
+	else
+		printk(ipx_8023_err_msg);
+
+	p8022_datalink = register_8022_client(ipx_8022_type, ipx_rcv);
+	if (!p8022_datalink)
+		printk(ipx_llc_err_msg);
+
+	pSNAP_datalink = register_snap_client(ipx_snap_id, ipx_rcv);
+	if (!pSNAP_datalink)
+		printk(ipx_snap_err_msg);
+
+	register_netdevice_notifier(&ipx_dev_notifier);
+	ipx_register_sysctl();
+	ipx_proc_init();
+out:
+	return rc;
+}
+
+static void __exit ipx_proto_finito(void)
+{
+	ipx_proc_exit();
+	ipx_unregister_sysctl();
+
+	unregister_netdevice_notifier(&ipx_dev_notifier);
+
+	ipxitf_cleanup();
+
+	unregister_snap_client(pSNAP_datalink);
+	pSNAP_datalink = NULL;
+
+	unregister_8022_client(p8022_datalink);
+	p8022_datalink = NULL;
+
+	dev_remove_pack(&ipx_8023_packet_type);
+	destroy_8023_client(p8023_datalink);
+	p8023_datalink = NULL;
+
+	dev_remove_pack(&ipx_dix_packet_type);
+	destroy_EII_client(pEII_datalink);
+	pEII_datalink = NULL;
+
+	proto_unregister(&ipx_proto);
+	sock_unregister(ipx_family_ops.family);
+}
+
+module_init(ipx_init);
+module_exit(ipx_proto_finito);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETPROTO(PF_IPX);
diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c
new file mode 100644
index 00000000000..b6761913445
--- /dev/null
+++ b/net/ipx/ipx_proc.c
@@ -0,0 +1,408 @@
+/*
+ *	IPX proc routines
+ *
+ * 	Copyright(C) Arnaldo Carvalho de Melo <acme@conectiva.com.br>, 2002
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+#ifdef CONFIG_PROC_FS
+#include <linux/proc_fs.h>
+#include <linux/spinlock.h>
+#include <linux/seq_file.h>
+#include <linux/tcp.h>
+#include <net/ipx.h>
+
+static __inline__ struct ipx_interface *ipx_get_interface_idx(loff_t pos)
+{
+	struct ipx_interface *i;
+
+	list_for_each_entry(i, &ipx_interfaces, node)
+		if (!pos--)
+			goto out;
+	i = NULL;
+out:
+	return i;
+}
+
+static struct ipx_interface *ipx_interfaces_next(struct ipx_interface *i)
+{
+	struct ipx_interface *rc = NULL;
+
+	if (i->node.next != &ipx_interfaces)
+		rc = list_entry(i->node.next, struct ipx_interface, node);
+	return rc;
+}
+
+static void *ipx_seq_interface_start(struct seq_file *seq, loff_t *pos)
+{
+	loff_t l = *pos;
+
+	spin_lock_bh(&ipx_interfaces_lock);
+	return l ? ipx_get_interface_idx(--l) : SEQ_START_TOKEN;
+}
+
+static void *ipx_seq_interface_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct ipx_interface *i;
+
+	++*pos;
+	if (v == SEQ_START_TOKEN)
+		i = ipx_interfaces_head();
+	else
+		i = ipx_interfaces_next(v);
+	return i;
+}
+
+static void ipx_seq_interface_stop(struct seq_file *seq, void *v)
+{
+	spin_unlock_bh(&ipx_interfaces_lock);
+}
+
+static int ipx_seq_interface_show(struct seq_file *seq, void *v)
+{
+	struct ipx_interface *i;
+
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(seq, "Network    Node_Address   Primary  Device     "
+			      "Frame_Type");
+#ifdef IPX_REFCNT_DEBUG
+		seq_puts(seq, "  refcnt");
+#endif
+		seq_puts(seq, "\n");
+		goto out;
+	}
+
+	i = v;
+	seq_printf(seq, "%08lX   ", (unsigned long int)ntohl(i->if_netnum));
+	seq_printf(seq, "%02X%02X%02X%02X%02X%02X   ",
+			i->if_node[0], i->if_node[1], i->if_node[2],
+			i->if_node[3], i->if_node[4], i->if_node[5]);
+	seq_printf(seq, "%-9s", i == ipx_primary_net ? "Yes" : "No");
+	seq_printf(seq, "%-11s", ipx_device_name(i));
+	seq_printf(seq, "%-9s", ipx_frame_name(i->if_dlink_type));
+#ifdef IPX_REFCNT_DEBUG
+	seq_printf(seq, "%6d", atomic_read(&i->refcnt));
+#endif
+	seq_puts(seq, "\n");
+out:
+	return 0;
+}
+
+static struct ipx_route *ipx_routes_head(void)
+{
+	struct ipx_route *rc = NULL;
+
+	if (!list_empty(&ipx_routes))
+		rc = list_entry(ipx_routes.next, struct ipx_route, node);
+	return rc;
+}
+
+static struct ipx_route *ipx_routes_next(struct ipx_route *r)
+{
+	struct ipx_route *rc = NULL;
+
+	if (r->node.next != &ipx_routes)
+		rc = list_entry(r->node.next, struct ipx_route, node);
+	return rc;
+}
+
+static __inline__ struct ipx_route *ipx_get_route_idx(loff_t pos)
+{
+	struct ipx_route *r;
+
+	list_for_each_entry(r, &ipx_routes, node)
+		if (!pos--)
+			goto out;
+	r = NULL;
+out:
+	return r;
+}
+
+static void *ipx_seq_route_start(struct seq_file *seq, loff_t *pos)
+{
+	loff_t l = *pos;
+	read_lock_bh(&ipx_routes_lock);
+	return l ? ipx_get_route_idx(--l) : SEQ_START_TOKEN;
+}
+
+static void *ipx_seq_route_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct ipx_route *r;
+
+	++*pos;
+	if (v == SEQ_START_TOKEN)
+		r = ipx_routes_head();
+	else
+		r = ipx_routes_next(v);
+	return r;
+}
+
+static void ipx_seq_route_stop(struct seq_file *seq, void *v)
+{
+	read_unlock_bh(&ipx_routes_lock);
+}
+
+static int ipx_seq_route_show(struct seq_file *seq, void *v)
+{
+	struct ipx_route *rt;
+
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(seq, "Network    Router_Net   Router_Node\n");
+		goto out;
+	}
+	rt = v;
+	seq_printf(seq, "%08lX   ", (unsigned long int)ntohl(rt->ir_net));
+	if (rt->ir_routed)
+		seq_printf(seq, "%08lX     %02X%02X%02X%02X%02X%02X\n",
+			   (long unsigned int)ntohl(rt->ir_intrfc->if_netnum),
+			   rt->ir_router_node[0], rt->ir_router_node[1],
+			   rt->ir_router_node[2], rt->ir_router_node[3],
+			   rt->ir_router_node[4], rt->ir_router_node[5]);
+	else
+		seq_puts(seq, "Directly     Connected\n");
+out:
+	return 0;
+}
+
+static __inline__ struct sock *ipx_get_socket_idx(loff_t pos)
+{
+	struct sock *s = NULL;
+	struct hlist_node *node;
+	struct ipx_interface *i;
+
+	list_for_each_entry(i, &ipx_interfaces, node) {
+		spin_lock_bh(&i->if_sklist_lock);
+		sk_for_each(s, node, &i->if_sklist) {
+			if (!pos)
+				break;
+			--pos;
+		}
+		spin_unlock_bh(&i->if_sklist_lock);
+		if (!pos) {
+			if (node)
+				goto found;
+			break;
+		}
+	}
+	s = NULL;
+found:
+	return s;
+}
+
+static void *ipx_seq_socket_start(struct seq_file *seq, loff_t *pos)
+{
+	loff_t l = *pos;
+
+	spin_lock_bh(&ipx_interfaces_lock);
+	return l ? ipx_get_socket_idx(--l) : SEQ_START_TOKEN;
+}
+
+static void *ipx_seq_socket_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct sock* sk, *next;
+	struct ipx_interface *i;
+	struct ipx_sock *ipxs;
+
+	++*pos;
+	if (v == SEQ_START_TOKEN) {
+		sk = NULL;
+		i = ipx_interfaces_head();
+		if (!i)
+			goto out;
+		sk = sk_head(&i->if_sklist);
+		if (sk)
+			spin_lock_bh(&i->if_sklist_lock);
+		goto out;
+	}
+	sk = v;
+	next = sk_next(sk);
+	if (next) {
+		sk = next;
+		goto out;
+	}
+	ipxs = ipx_sk(sk);
+	i = ipxs->intrfc;
+	spin_unlock_bh(&i->if_sklist_lock);
+	sk = NULL;
+	for (;;) {
+		i = ipx_interfaces_next(i);
+		if (!i)
+			break;
+		spin_lock_bh(&i->if_sklist_lock);
+		if (!hlist_empty(&i->if_sklist)) {
+			sk = sk_head(&i->if_sklist);
+			break;
+		}
+		spin_unlock_bh(&i->if_sklist_lock);
+	}
+out:
+	return sk;
+}
+
+static int ipx_seq_socket_show(struct seq_file *seq, void *v)
+{
+	struct sock *s;
+	struct ipx_sock *ipxs;
+
+	if (v == SEQ_START_TOKEN) {
+#ifdef CONFIG_IPX_INTERN
+		seq_puts(seq, "Local_Address               "
+			      "Remote_Address              Tx_Queue  "
+			      "Rx_Queue  State  Uid\n");
+#else
+		seq_puts(seq, "Local_Address  Remote_Address              "
+			      "Tx_Queue  Rx_Queue  State  Uid\n");
+#endif
+		goto out;
+	}
+
+	s = v;
+	ipxs = ipx_sk(s);
+#ifdef CONFIG_IPX_INTERN
+	seq_printf(seq, "%08lX:%02X%02X%02X%02X%02X%02X:%04X  ",
+		   (unsigned long)htonl(ipxs->intrfc->if_netnum),
+		   ipxs->node[0], ipxs->node[1], ipxs->node[2], ipxs->node[3],
+		   ipxs->node[4], ipxs->node[5], htons(ipxs->port));
+#else
+	seq_printf(seq, "%08lX:%04X  ", (unsigned long) htonl(ipxs->intrfc->if_netnum),
+		   htons(ipxs->port));
+#endif	/* CONFIG_IPX_INTERN */
+	if (s->sk_state != TCP_ESTABLISHED)
+		seq_printf(seq, "%-28s", "Not_Connected");
+	else {
+		seq_printf(seq, "%08lX:%02X%02X%02X%02X%02X%02X:%04X  ",
+			   (unsigned long)htonl(ipxs->dest_addr.net),
+			   ipxs->dest_addr.node[0], ipxs->dest_addr.node[1],
+			   ipxs->dest_addr.node[2], ipxs->dest_addr.node[3],
+			   ipxs->dest_addr.node[4], ipxs->dest_addr.node[5],
+			   htons(ipxs->dest_addr.sock));
+	}
+
+	seq_printf(seq, "%08X  %08X  %02X     %03d\n",
+		   atomic_read(&s->sk_wmem_alloc),
+		   atomic_read(&s->sk_rmem_alloc),
+		   s->sk_state, SOCK_INODE(s->sk_socket)->i_uid);
+out:
+	return 0;
+}
+
+static struct seq_operations ipx_seq_interface_ops = {
+	.start  = ipx_seq_interface_start,
+	.next   = ipx_seq_interface_next,
+	.stop   = ipx_seq_interface_stop,
+	.show   = ipx_seq_interface_show,
+};
+
+static struct seq_operations ipx_seq_route_ops = {
+	.start  = ipx_seq_route_start,
+	.next   = ipx_seq_route_next,
+	.stop   = ipx_seq_route_stop,
+	.show   = ipx_seq_route_show,
+};
+
+static struct seq_operations ipx_seq_socket_ops = {
+	.start  = ipx_seq_socket_start,
+	.next   = ipx_seq_socket_next,
+	.stop   = ipx_seq_interface_stop,
+	.show   = ipx_seq_socket_show,
+};
+
+static int ipx_seq_route_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &ipx_seq_route_ops);
+}
+
+static int ipx_seq_interface_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &ipx_seq_interface_ops);
+}
+
+static int ipx_seq_socket_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &ipx_seq_socket_ops);
+}
+
+static struct file_operations ipx_seq_interface_fops = {
+	.owner		= THIS_MODULE,
+	.open           = ipx_seq_interface_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release        = seq_release,
+};
+
+static struct file_operations ipx_seq_route_fops = {
+	.owner		= THIS_MODULE,
+	.open           = ipx_seq_route_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release        = seq_release,
+};
+
+static struct file_operations ipx_seq_socket_fops = {
+	.owner		= THIS_MODULE,
+	.open           = ipx_seq_socket_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release        = seq_release,
+};
+
+static struct proc_dir_entry *ipx_proc_dir;
+
+int __init ipx_proc_init(void)
+{
+	struct proc_dir_entry *p;
+	int rc = -ENOMEM;
+       
+	ipx_proc_dir = proc_mkdir("ipx", proc_net);
+
+	if (!ipx_proc_dir)
+		goto out;
+	p = create_proc_entry("interface", S_IRUGO, ipx_proc_dir);
+	if (!p)
+		goto out_interface;
+
+	p->proc_fops = &ipx_seq_interface_fops;
+	p = create_proc_entry("route", S_IRUGO, ipx_proc_dir);
+	if (!p)
+		goto out_route;
+
+	p->proc_fops = &ipx_seq_route_fops;
+	p = create_proc_entry("socket", S_IRUGO, ipx_proc_dir);
+	if (!p)
+		goto out_socket;
+
+	p->proc_fops = &ipx_seq_socket_fops;
+
+	rc = 0;
+out:
+	return rc;
+out_socket:
+	remove_proc_entry("route", ipx_proc_dir);
+out_route:
+	remove_proc_entry("interface", ipx_proc_dir);
+out_interface:
+	remove_proc_entry("ipx", proc_net);
+	goto out;
+}
+
+void __exit ipx_proc_exit(void)
+{
+	remove_proc_entry("interface", ipx_proc_dir);
+	remove_proc_entry("route", ipx_proc_dir);
+	remove_proc_entry("socket", ipx_proc_dir);
+	remove_proc_entry("ipx", proc_net);
+}
+
+#else /* CONFIG_PROC_FS */
+
+int __init ipx_proc_init(void)
+{
+	return 0;
+}
+
+void __exit ipx_proc_exit(void)
+{
+}
+
+#endif /* CONFIG_PROC_FS */
diff --git a/net/ipx/ipx_route.c b/net/ipx/ipx_route.c
new file mode 100644
index 00000000000..67774448efd
--- /dev/null
+++ b/net/ipx/ipx_route.c
@@ -0,0 +1,293 @@
+/*
+ *	Implements the IPX routing routines.
+ *	Code moved from af_ipx.c.
+ *
+ *	Arnaldo Carvalho de Melo <acme@conectiva.com.br>, 2003
+ *
+ *	See net/ipx/ChangeLog.
+ */
+
+#include <linux/config.h>
+#include <linux/list.h>
+#include <linux/route.h>
+#include <linux/spinlock.h>
+
+#include <net/ipx.h>
+#include <net/sock.h>
+
+LIST_HEAD(ipx_routes);
+DEFINE_RWLOCK(ipx_routes_lock);
+
+extern struct ipx_interface *ipx_internal_net;
+
+extern __u16 ipx_cksum(struct ipxhdr *packet, int length);
+extern struct ipx_interface *ipxitf_find_using_net(__u32 net);
+extern int ipxitf_demux_socket(struct ipx_interface *intrfc,
+			       struct sk_buff *skb, int copy);
+extern int ipxitf_demux_socket(struct ipx_interface *intrfc,
+			       struct sk_buff *skb, int copy);
+extern int ipxitf_send(struct ipx_interface *intrfc, struct sk_buff *skb,
+		       char *node);
+extern struct ipx_interface *ipxitf_find_using_net(__u32 net);
+
+struct ipx_route *ipxrtr_lookup(__u32 net)
+{
+	struct ipx_route *r;
+
+	read_lock_bh(&ipx_routes_lock);
+	list_for_each_entry(r, &ipx_routes, node)
+		if (r->ir_net == net) {
+			ipxrtr_hold(r);
+			goto unlock;
+		}
+	r = NULL;
+unlock:
+	read_unlock_bh(&ipx_routes_lock);
+	return r;
+}
+
+/*
+ * Caller must hold a reference to intrfc
+ */
+int ipxrtr_add_route(__u32 network, struct ipx_interface *intrfc,
+		     unsigned char *node)
+{
+	struct ipx_route *rt;
+	int rc;
+
+	/* Get a route structure; either existing or create */
+	rt = ipxrtr_lookup(network);
+	if (!rt) {
+		rt = kmalloc(sizeof(*rt), GFP_ATOMIC);
+		rc = -EAGAIN;
+		if (!rt)
+			goto out;
+
+		atomic_set(&rt->refcnt, 1);
+		ipxrtr_hold(rt);
+		write_lock_bh(&ipx_routes_lock);
+		list_add(&rt->node, &ipx_routes);
+		write_unlock_bh(&ipx_routes_lock);
+	} else {
+		rc = -EEXIST;
+		if (intrfc == ipx_internal_net)
+			goto out_put;
+	}
+
+	rt->ir_net 	= network;
+	rt->ir_intrfc 	= intrfc;
+	if (!node) {
+		memset(rt->ir_router_node, '\0', IPX_NODE_LEN);
+		rt->ir_routed = 0;
+	} else {
+		memcpy(rt->ir_router_node, node, IPX_NODE_LEN);
+		rt->ir_routed = 1;
+	}
+
+	rc = 0;
+out_put:
+	ipxrtr_put(rt);
+out:
+	return rc;
+}
+
+void ipxrtr_del_routes(struct ipx_interface *intrfc)
+{
+	struct ipx_route *r, *tmp;
+
+	write_lock_bh(&ipx_routes_lock);
+	list_for_each_entry_safe(r, tmp, &ipx_routes, node)
+		if (r->ir_intrfc == intrfc) {
+			list_del(&r->node);
+			ipxrtr_put(r);
+		}
+	write_unlock_bh(&ipx_routes_lock);
+}
+
+static int ipxrtr_create(struct ipx_route_definition *rd)
+{
+	struct ipx_interface *intrfc;
+	int rc = -ENETUNREACH;
+
+	/* Find the appropriate interface */
+	intrfc = ipxitf_find_using_net(rd->ipx_router_network);
+	if (!intrfc)
+		goto out;
+	rc = ipxrtr_add_route(rd->ipx_network, intrfc, rd->ipx_router_node);
+	ipxitf_put(intrfc);
+out:
+	return rc;
+}
+
+static int ipxrtr_delete(long net)
+{
+	struct ipx_route *r, *tmp;
+	int rc;
+
+	write_lock_bh(&ipx_routes_lock);
+	list_for_each_entry_safe(r, tmp, &ipx_routes, node)
+		if (r->ir_net == net) {
+			/* Directly connected; can't lose route */
+			rc = -EPERM;
+			if (!r->ir_routed)
+				goto out;
+			list_del(&r->node);
+			ipxrtr_put(r);
+			rc = 0;
+			goto out;
+		}
+	rc = -ENOENT;
+out:
+	write_unlock_bh(&ipx_routes_lock);
+	return rc;
+}
+
+/*
+ * The skb has to be unshared, we'll end up calling ipxitf_send, that'll
+ * modify the packet
+ */
+int ipxrtr_route_skb(struct sk_buff *skb)
+{
+	struct ipxhdr *ipx = ipx_hdr(skb);
+	struct ipx_route *r = ipxrtr_lookup(IPX_SKB_CB(skb)->ipx_dest_net);
+
+	if (!r) {	/* no known route */
+		kfree_skb(skb);
+		return 0;
+	}
+
+	ipxitf_hold(r->ir_intrfc);
+	ipxitf_send(r->ir_intrfc, skb, r->ir_routed ?
+			r->ir_router_node : ipx->ipx_dest.node);
+	ipxitf_put(r->ir_intrfc);
+	ipxrtr_put(r);
+
+	return 0;
+}
+
+/*
+ * Route an outgoing frame from a socket.
+ */
+int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx,
+			struct iovec *iov, size_t len, int noblock)
+{
+	struct sk_buff *skb;
+	struct ipx_sock *ipxs = ipx_sk(sk);
+	struct ipx_interface *intrfc;
+	struct ipxhdr *ipx;
+	size_t size;
+	int ipx_offset;
+	struct ipx_route *rt = NULL;
+	int rc;
+
+	/* Find the appropriate interface on which to send packet */
+	if (!usipx->sipx_network && ipx_primary_net) {
+		usipx->sipx_network = ipx_primary_net->if_netnum;
+		intrfc = ipx_primary_net;
+	} else {
+		rt = ipxrtr_lookup(usipx->sipx_network);
+		rc = -ENETUNREACH;
+		if (!rt)
+			goto out;
+		intrfc = rt->ir_intrfc;
+	}
+
+	ipxitf_hold(intrfc);
+	ipx_offset = intrfc->if_ipx_offset;
+	size = sizeof(struct ipxhdr) + len + ipx_offset;
+
+	skb = sock_alloc_send_skb(sk, size, noblock, &rc);
+	if (!skb)
+		goto out_put;
+
+	skb_reserve(skb, ipx_offset);
+	skb->sk = sk;
+
+	/* Fill in IPX header */
+	skb->h.raw = skb->nh.raw = skb_put(skb, sizeof(struct ipxhdr));
+	ipx = ipx_hdr(skb);
+	ipx->ipx_pktsize = htons(len + sizeof(struct ipxhdr));
+	IPX_SKB_CB(skb)->ipx_tctrl = 0;
+	ipx->ipx_type 	 = usipx->sipx_type;
+
+	IPX_SKB_CB(skb)->last_hop.index = -1;
+#ifdef CONFIG_IPX_INTERN
+	IPX_SKB_CB(skb)->ipx_source_net = ipxs->intrfc->if_netnum;
+	memcpy(ipx->ipx_source.node, ipxs->node, IPX_NODE_LEN);
+#else
+	rc = ntohs(ipxs->port);
+	if (rc == 0x453 || rc == 0x452) {
+		/* RIP/SAP special handling for mars_nwe */
+		IPX_SKB_CB(skb)->ipx_source_net = intrfc->if_netnum;
+		memcpy(ipx->ipx_source.node, intrfc->if_node, IPX_NODE_LEN);
+	} else {
+		IPX_SKB_CB(skb)->ipx_source_net = ipxs->intrfc->if_netnum;
+		memcpy(ipx->ipx_source.node, ipxs->intrfc->if_node,
+			IPX_NODE_LEN);
+	}
+#endif	/* CONFIG_IPX_INTERN */
+	ipx->ipx_source.sock		= ipxs->port;
+	IPX_SKB_CB(skb)->ipx_dest_net	= usipx->sipx_network;
+	memcpy(ipx->ipx_dest.node, usipx->sipx_node, IPX_NODE_LEN);
+	ipx->ipx_dest.sock		= usipx->sipx_port;
+
+	rc = memcpy_fromiovec(skb_put(skb, len), iov, len);
+	if (rc) {
+		kfree_skb(skb);
+		goto out_put;
+	}	
+
+	/* Apply checksum. Not allowed on 802.3 links. */
+	if (sk->sk_no_check || intrfc->if_dlink_type == IPX_FRAME_8023)
+		ipx->ipx_checksum = 0xFFFF;
+	else
+		ipx->ipx_checksum = ipx_cksum(ipx, len + sizeof(struct ipxhdr));
+
+	rc = ipxitf_send(intrfc, skb, (rt && rt->ir_routed) ? 
+			 rt->ir_router_node : ipx->ipx_dest.node);
+out_put:
+	ipxitf_put(intrfc);
+	if (rt)
+		ipxrtr_put(rt);
+out:
+	return rc;
+}
+
+/*
+ * We use a normal struct rtentry for route handling
+ */
+int ipxrtr_ioctl(unsigned int cmd, void __user *arg)
+{
+	struct rtentry rt;	/* Use these to behave like 'other' stacks */
+	struct sockaddr_ipx *sg, *st;
+	int rc = -EFAULT;
+
+	if (copy_from_user(&rt, arg, sizeof(rt)))
+		goto out;
+
+	sg = (struct sockaddr_ipx *)&rt.rt_gateway;
+	st = (struct sockaddr_ipx *)&rt.rt_dst;
+
+	rc = -EINVAL;
+	if (!(rt.rt_flags & RTF_GATEWAY) || /* Direct routes are fixed */
+	    sg->sipx_family != AF_IPX ||
+	    st->sipx_family != AF_IPX)
+		goto out;
+
+	switch (cmd) {
+	case SIOCDELRT:
+		rc = ipxrtr_delete(st->sipx_network);
+		break;
+	case SIOCADDRT: {
+		struct ipx_route_definition f;
+		f.ipx_network		= st->sipx_network;
+		f.ipx_router_network	= sg->sipx_network;
+		memcpy(f.ipx_router_node, sg->sipx_node, IPX_NODE_LEN);
+		rc = ipxrtr_create(&f);
+		break;
+	}
+	}
+
+out:
+	return rc;
+}
diff --git a/net/ipx/sysctl_net_ipx.c b/net/ipx/sysctl_net_ipx.c
new file mode 100644
index 00000000000..510eda96d10
--- /dev/null
+++ b/net/ipx/sysctl_net_ipx.c
@@ -0,0 +1,62 @@
+/* -*- linux-c -*-
+ * sysctl_net_ipx.c: sysctl interface to net IPX subsystem.
+ *
+ * Begun April 1, 1996, Mike Shaver.
+ * Added /proc/sys/net/ipx directory entry (empty =) ). [MS]
+ * Added /proc/sys/net/ipx/ipx_pprop_broadcasting - acme March 4, 2001
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/sysctl.h>
+
+#ifndef CONFIG_SYSCTL
+#error This file should not be compiled without CONFIG_SYSCTL defined
+#endif
+
+/* From af_ipx.c */
+extern int sysctl_ipx_pprop_broadcasting;
+
+static struct ctl_table ipx_table[] = {
+	{
+		.ctl_name	= NET_IPX_PPROP_BROADCASTING,
+		.procname	= "ipx_pprop_broadcasting",
+		.data		= &sysctl_ipx_pprop_broadcasting,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{ 0 },
+};
+
+static struct ctl_table ipx_dir_table[] = {
+	{
+		.ctl_name	= NET_IPX,
+		.procname	= "ipx",
+		.mode		= 0555,
+		.child		= ipx_table,
+       	},
+	{ 0 },
+};
+
+static struct ctl_table ipx_root_table[] = {
+	{
+		.ctl_name	= CTL_NET,
+		.procname	= "net",
+		.mode		= 0555,
+		.child		= ipx_dir_table,
+	},
+	{ 0 },
+};
+
+static struct ctl_table_header *ipx_table_header;
+
+void ipx_register_sysctl(void)
+{
+	ipx_table_header = register_sysctl_table(ipx_root_table, 1);
+}
+
+void ipx_unregister_sysctl(void)
+{
+	unregister_sysctl_table(ipx_table_header);
+}
diff --git a/net/irda/Kconfig b/net/irda/Kconfig
new file mode 100644
index 00000000000..9efb17ba48a
--- /dev/null
+++ b/net/irda/Kconfig
@@ -0,0 +1,96 @@
+#
+# IrDA protocol configuration
+#
+
+menuconfig IRDA
+	depends on NET
+	tristate "IrDA (infrared) subsystem support"
+	select CRC_CCITT
+	---help---
+	  Say Y here if you want to build support for the IrDA (TM) protocols.
+	  The Infrared Data Associations (tm) specifies standards for wireless
+	  infrared communication and is supported by most laptops and PDA's.
+
+	  To use Linux support for the IrDA (tm) protocols, you will also need
+	  some user-space utilities like irattach.  For more information, see
+	  the file <file:Documentation/networking/irda.txt>.  You also want to
+	  read the IR-HOWTO, available at
+	  <http://www.tldp.org/docs.html#howto>.
+
+	  If you want to exchange bits of data (vCal, vCard) with a PDA, you
+	  will need to install some OBEX application, such as OpenObex :
+	  <http://sourceforge.net/projects/openobex/>
+
+	  To compile this support as a module, choose M here: the module will
+	  be called irda.
+
+comment "IrDA protocols"
+	depends on IRDA
+
+source "net/irda/irlan/Kconfig"
+
+source "net/irda/irnet/Kconfig"
+
+source "net/irda/ircomm/Kconfig"
+
+config IRDA_ULTRA
+	bool "Ultra (connectionless) protocol"
+	depends on IRDA
+	help
+	  Say Y here to support the connectionless Ultra IRDA protocol.
+	  Ultra allows to exchange data over IrDA with really simple devices
+	  (watch, beacon) without the overhead of the IrDA protocol (no handshaking,
+	  no management frames, simple fixed header).
+	  Ultra is available as a special socket : socket(AF_IRDA, SOCK_DGRAM, 1);
+
+comment "IrDA options"
+	depends on IRDA
+
+config IRDA_CACHE_LAST_LSAP
+	bool "Cache last LSAP"
+	depends on IRDA
+	help
+	  Say Y here if you want IrLMP to cache the last LSAP used.  This
+	  makes sense since most frames will be sent/received on the same
+	  connection.  Enabling this option will save a hash-lookup per frame.
+
+	  If unsure, say Y.
+
+config IRDA_FAST_RR
+	bool "Fast RRs (low latency)"
+	depends on IRDA
+	---help---
+	  Say Y here is you want IrLAP to send fast RR (Receive Ready) frames
+	  when acting as a primary station.
+	  Disabling this option will make latency over IrDA very bad. Enabling
+	  this option will make the IrDA stack send more packet than strictly
+	  necessary, thus reduce your battery life (but not that much).
+
+	  Fast RR will make IrLAP send out a RR frame immediately when
+	  receiving a frame if its own transmit queue is currently empty. This
+	  will give a lot of speed improvement when receiving much data since
+	  the secondary station will not have to wait the max. turn around
+	  time (usually 500ms) before it is allowed to transmit the next time.
+	  If the transmit queue of the secondary is also empty, the primary will
+	  start backing-off before sending another RR frame, waiting longer
+	  each time until the back-off reaches the max. turn around time.
+	  This back-off increase in controlled via
+	  /proc/sys/net/irda/fast_poll_increase
+
+	  If unsure, say Y.
+
+config IRDA_DEBUG
+	bool "Debug information"
+	depends on IRDA
+	help
+	  Say Y here if you want the IrDA subsystem to write debug information
+	  to your syslog. You can change the debug level in
+	  /proc/sys/net/irda/debug .
+	  When this option is enabled, the IrDA also perform many extra internal
+	  verifications which will usually prevent the kernel to crash in case of
+	  bugs.
+
+	  If unsure, say Y (since it makes it easier to find the bugs).
+
+source "drivers/net/irda/Kconfig"
+
diff --git a/net/irda/Makefile b/net/irda/Makefile
new file mode 100644
index 00000000000..d1366c2a39c
--- /dev/null
+++ b/net/irda/Makefile
@@ -0,0 +1,15 @@
+#
+# Makefile for the Linux IrDA protocol layer.
+#
+
+obj-$(CONFIG_IRDA) += irda.o
+obj-$(CONFIG_IRLAN) += irlan/
+obj-$(CONFIG_IRNET) += irnet/
+obj-$(CONFIG_IRCOMM) += ircomm/
+
+irda-y := iriap.o iriap_event.o irlmp.o irlmp_event.o irlmp_frame.o \
+          irlap.o irlap_event.o irlap_frame.o timer.o qos.o irqueue.o \
+          irttp.o irda_device.o irias_object.o wrapper.o af_irda.o \
+	  discovery.o parameters.o irmod.o
+irda-$(CONFIG_PROC_FS) += irproc.o
+irda-$(CONFIG_SYSCTL) += irsysctl.o
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
new file mode 100644
index 00000000000..92c6e8d4e73
--- /dev/null
+++ b/net/irda/af_irda.c
@@ -0,0 +1,2586 @@
+/*********************************************************************
+ *
+ * Filename:      af_irda.c
+ * Version:       0.9
+ * Description:   IrDA sockets implementation
+ * Status:        Stable
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Sun May 31 10:12:43 1998
+ * Modified at:   Sat Dec 25 21:10:23 1999
+ * Modified by:   Dag Brattli <dag@brattli.net>
+ * Sources:       af_netroom.c, af_ax25.c, af_rose.c, af_x25.c etc.
+ *
+ *     Copyright (c) 1999 Dag Brattli <dagb@cs.uit.no>
+ *     Copyright (c) 1999-2003 Jean Tourrilhes <jt@hpl.hp.com>
+ *     All Rights Reserved.
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of
+ *     the License, or (at your option) any later version.
+ *
+ *     This program is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *     GNU General Public License for more details.
+ *
+ *     You should have received a copy of the GNU General Public License
+ *     along with this program; if not, write to the Free Software
+ *     Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ *     MA 02111-1307 USA
+ *
+ *     Linux-IrDA now supports four different types of IrDA sockets:
+ *
+ *     o SOCK_STREAM:    TinyTP connections with SAR disabled. The
+ *                       max SDU size is 0 for conn. of this type
+ *     o SOCK_SEQPACKET: TinyTP connections with SAR enabled. TTP may
+ *                       fragment the messages, but will preserve
+ *                       the message boundaries
+ *     o SOCK_DGRAM:     IRDAPROTO_UNITDATA: TinyTP connections with Unitdata
+ *                       (unreliable) transfers
+ *                       IRDAPROTO_ULTRA: Connectionless and unreliable data
+ *
+ ********************************************************************/
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/init.h>
+#include <linux/net.h>
+#include <linux/irda.h>
+#include <linux/poll.h>
+
+#include <asm/ioctls.h>		/* TIOCOUTQ, TIOCINQ */
+#include <asm/uaccess.h>
+
+#include <net/sock.h>
+#include <net/tcp.h>
+
+#include <net/irda/af_irda.h>
+
+static int irda_create(struct socket *sock, int protocol);
+
+static struct proto_ops irda_stream_ops;
+static struct proto_ops irda_seqpacket_ops;
+static struct proto_ops irda_dgram_ops;
+
+#ifdef CONFIG_IRDA_ULTRA
+static struct proto_ops irda_ultra_ops;
+#define ULTRA_MAX_DATA 382
+#endif /* CONFIG_IRDA_ULTRA */
+
+#define IRDA_MAX_HEADER (TTP_MAX_HEADER)
+
+/*
+ * Function irda_data_indication (instance, sap, skb)
+ *
+ *    Received some data from TinyTP. Just queue it on the receive queue
+ *
+ */
+static int irda_data_indication(void *instance, void *sap, struct sk_buff *skb)
+{
+	struct irda_sock *self;
+	struct sock *sk;
+	int err;
+
+	IRDA_DEBUG(3, "%s()\n", __FUNCTION__);
+
+	self = instance;
+	sk = instance;
+	IRDA_ASSERT(sk != NULL, return -1;);
+
+	err = sock_queue_rcv_skb(sk, skb);
+	if (err) {
+		IRDA_DEBUG(1, "%s(), error: no more mem!\n", __FUNCTION__);
+		self->rx_flow = FLOW_STOP;
+
+		/* When we return error, TTP will need to requeue the skb */
+		return err;
+	}
+
+	return 0;
+}
+
+/*
+ * Function irda_disconnect_indication (instance, sap, reason, skb)
+ *
+ *    Connection has been closed. Check reason to find out why
+ *
+ */
+static void irda_disconnect_indication(void *instance, void *sap,
+				       LM_REASON reason, struct sk_buff *skb)
+{
+	struct irda_sock *self;
+	struct sock *sk;
+
+	self = instance;
+
+	IRDA_DEBUG(2, "%s(%p)\n", __FUNCTION__, self);
+
+	/* Don't care about it, but let's not leak it */
+	if(skb)
+		dev_kfree_skb(skb);
+
+	sk = instance;
+	if (sk == NULL) {
+		IRDA_DEBUG(0, "%s(%p) : BUG : sk is NULL\n",
+			   __FUNCTION__, self);
+		return;
+	}
+
+	/* Prevent race conditions with irda_release() and irda_shutdown() */
+	if (!sock_flag(sk, SOCK_DEAD) && sk->sk_state != TCP_CLOSE) {
+		sk->sk_state     = TCP_CLOSE;
+		sk->sk_err       = ECONNRESET;
+		sk->sk_shutdown |= SEND_SHUTDOWN;
+
+		sk->sk_state_change(sk);
+		/* Uh-oh... Should use sock_orphan ? */
+                sock_set_flag(sk, SOCK_DEAD);
+
+		/* Close our TSAP.
+		 * If we leave it open, IrLMP put it back into the list of
+		 * unconnected LSAPs. The problem is that any incoming request
+		 * can then be matched to this socket (and it will be, because
+		 * it is at the head of the list). This would prevent any
+		 * listening socket waiting on the same TSAP to get those
+		 * requests. Some apps forget to close sockets, or hang to it
+		 * a bit too long, so we may stay in this dead state long
+		 * enough to be noticed...
+		 * Note : all socket function do check sk->sk_state, so we are
+		 * safe...
+		 * Jean II
+		 */
+		if (self->tsap) {
+			irttp_close_tsap(self->tsap);
+			self->tsap = NULL;
+		}
+        }
+
+	/* Note : once we are there, there is not much you want to do
+	 * with the socket anymore, apart from closing it.
+	 * For example, bind() and connect() won't reset sk->sk_err,
+	 * sk->sk_shutdown and sk->sk_flags to valid values...
+	 * Jean II
+	 */
+}
+
+/*
+ * Function irda_connect_confirm (instance, sap, qos, max_sdu_size, skb)
+ *
+ *    Connections has been confirmed by the remote device
+ *
+ */
+static void irda_connect_confirm(void *instance, void *sap,
+				 struct qos_info *qos,
+				 __u32 max_sdu_size, __u8 max_header_size,
+				 struct sk_buff *skb)
+{
+	struct irda_sock *self;
+	struct sock *sk;
+
+	self = instance;
+
+	IRDA_DEBUG(2, "%s(%p)\n", __FUNCTION__, self);
+
+	sk = instance;
+	if (sk == NULL) {
+		dev_kfree_skb(skb);
+		return;
+	}
+
+	dev_kfree_skb(skb);
+	// Should be ??? skb_queue_tail(&sk->sk_receive_queue, skb);
+
+	/* How much header space do we need to reserve */
+	self->max_header_size = max_header_size;
+
+	/* IrTTP max SDU size in transmit direction */
+	self->max_sdu_size_tx = max_sdu_size;
+
+	/* Find out what the largest chunk of data that we can transmit is */
+	switch (sk->sk_type) {
+	case SOCK_STREAM:
+		if (max_sdu_size != 0) {
+			IRDA_ERROR("%s: max_sdu_size must be 0\n",
+				   __FUNCTION__);
+			return;
+		}
+		self->max_data_size = irttp_get_max_seg_size(self->tsap);
+		break;
+	case SOCK_SEQPACKET:
+		if (max_sdu_size == 0) {
+			IRDA_ERROR("%s: max_sdu_size cannot be 0\n",
+				   __FUNCTION__);
+			return;
+		}
+		self->max_data_size = max_sdu_size;
+		break;
+	default:
+		self->max_data_size = irttp_get_max_seg_size(self->tsap);
+	};
+
+	IRDA_DEBUG(2, "%s(), max_data_size=%d\n", __FUNCTION__,
+		   self->max_data_size);
+
+	memcpy(&self->qos_tx, qos, sizeof(struct qos_info));
+
+	/* We are now connected! */
+	sk->sk_state = TCP_ESTABLISHED;
+	sk->sk_state_change(sk);
+}
+
+/*
+ * Function irda_connect_indication(instance, sap, qos, max_sdu_size, userdata)
+ *
+ *    Incoming connection
+ *
+ */
+static void irda_connect_indication(void *instance, void *sap,
+				    struct qos_info *qos, __u32 max_sdu_size,
+				    __u8 max_header_size, struct sk_buff *skb)
+{
+	struct irda_sock *self;
+	struct sock *sk;
+
+	self = instance;
+
+	IRDA_DEBUG(2, "%s(%p)\n", __FUNCTION__, self);
+
+	sk = instance;
+	if (sk == NULL) {
+		dev_kfree_skb(skb);
+		return;
+	}
+
+	/* How much header space do we need to reserve */
+	self->max_header_size = max_header_size;
+
+	/* IrTTP max SDU size in transmit direction */
+	self->max_sdu_size_tx = max_sdu_size;
+
+	/* Find out what the largest chunk of data that we can transmit is */
+	switch (sk->sk_type) {
+	case SOCK_STREAM:
+		if (max_sdu_size != 0) {
+			IRDA_ERROR("%s: max_sdu_size must be 0\n",
+				   __FUNCTION__);
+			kfree_skb(skb);
+			return;
+		}
+		self->max_data_size = irttp_get_max_seg_size(self->tsap);
+		break;
+	case SOCK_SEQPACKET:
+		if (max_sdu_size == 0) {
+			IRDA_ERROR("%s: max_sdu_size cannot be 0\n",
+				   __FUNCTION__);
+			kfree_skb(skb);
+			return;
+		}
+		self->max_data_size = max_sdu_size;
+		break;
+	default:
+		self->max_data_size = irttp_get_max_seg_size(self->tsap);
+	};
+
+	IRDA_DEBUG(2, "%s(), max_data_size=%d\n", __FUNCTION__,
+		   self->max_data_size);
+
+	memcpy(&self->qos_tx, qos, sizeof(struct qos_info));
+
+	skb_queue_tail(&sk->sk_receive_queue, skb);
+	sk->sk_state_change(sk);
+}
+
+/*
+ * Function irda_connect_response (handle)
+ *
+ *    Accept incoming connection
+ *
+ */
+static void irda_connect_response(struct irda_sock *self)
+{
+	struct sk_buff *skb;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return;);
+
+	skb = dev_alloc_skb(64);
+	if (skb == NULL) {
+		IRDA_DEBUG(0, "%s() Unable to allocate sk_buff!\n",
+			   __FUNCTION__);
+		return;
+	}
+
+	/* Reserve space for MUX_CONTROL and LAP header */
+	skb_reserve(skb, IRDA_MAX_HEADER);
+
+	irttp_connect_response(self->tsap, self->max_sdu_size_rx, skb);
+}
+
+/*
+ * Function irda_flow_indication (instance, sap, flow)
+ *
+ *    Used by TinyTP to tell us if it can accept more data or not
+ *
+ */
+static void irda_flow_indication(void *instance, void *sap, LOCAL_FLOW flow)
+{
+	struct irda_sock *self;
+	struct sock *sk;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
+
+	self = instance;
+	sk = instance;
+	IRDA_ASSERT(sk != NULL, return;);
+
+	switch (flow) {
+	case FLOW_STOP:
+		IRDA_DEBUG(1, "%s(), IrTTP wants us to slow down\n",
+			   __FUNCTION__);
+		self->tx_flow = flow;
+		break;
+	case FLOW_START:
+		self->tx_flow = flow;
+		IRDA_DEBUG(1, "%s(), IrTTP wants us to start again\n",
+			   __FUNCTION__);
+		wake_up_interruptible(sk->sk_sleep);
+		break;
+	default:
+		IRDA_DEBUG(0, "%s(), Unknown flow command!\n", __FUNCTION__);
+		/* Unknown flow command, better stop */
+		self->tx_flow = flow;
+		break;
+	}
+}
+
+/*
+ * Function irda_getvalue_confirm (obj_id, value, priv)
+ *
+ *    Got answer from remote LM-IAS, just pass object to requester...
+ *
+ * Note : duplicate from above, but we need our own version that
+ * doesn't touch the dtsap_sel and save the full value structure...
+ */
+static void irda_getvalue_confirm(int result, __u16 obj_id,
+				  struct ias_value *value, void *priv)
+{
+	struct irda_sock *self;
+
+	self = (struct irda_sock *) priv;
+	if (!self) {
+		IRDA_WARNING("%s: lost myself!\n", __FUNCTION__);
+		return;
+	}
+
+	IRDA_DEBUG(2, "%s(%p)\n", __FUNCTION__, self);
+
+	/* We probably don't need to make any more queries */
+	iriap_close(self->iriap);
+	self->iriap = NULL;
+
+	/* Check if request succeeded */
+	if (result != IAS_SUCCESS) {
+		IRDA_DEBUG(1, "%s(), IAS query failed! (%d)\n", __FUNCTION__,
+			   result);
+
+		self->errno = result;	/* We really need it later */
+
+		/* Wake up any processes waiting for result */
+		wake_up_interruptible(&self->query_wait);
+
+		return;
+	}
+
+	/* Pass the object to the caller (so the caller must delete it) */
+	self->ias_result = value;
+	self->errno = 0;
+
+	/* Wake up any processes waiting for result */
+	wake_up_interruptible(&self->query_wait);
+}
+
+/*
+ * Function irda_selective_discovery_indication (discovery)
+ *
+ *    Got a selective discovery indication from IrLMP.
+ *
+ * IrLMP is telling us that this node is new and matching our hint bit
+ * filter. Wake up any process waiting for answer...
+ */
+static void irda_selective_discovery_indication(discinfo_t *discovery,
+						DISCOVERY_MODE mode,
+						void *priv)
+{
+	struct irda_sock *self;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
+
+	self = (struct irda_sock *) priv;
+	if (!self) {
+		IRDA_WARNING("%s: lost myself!\n", __FUNCTION__);
+		return;
+	}
+
+	/* Pass parameter to the caller */
+	self->cachedaddr = discovery->daddr;
+
+	/* Wake up process if its waiting for device to be discovered */
+	wake_up_interruptible(&self->query_wait);
+}
+
+/*
+ * Function irda_discovery_timeout (priv)
+ *
+ *    Timeout in the selective discovery process
+ *
+ * We were waiting for a node to be discovered, but nothing has come up
+ * so far. Wake up the user and tell him that we failed...
+ */
+static void irda_discovery_timeout(u_long priv)
+{
+	struct irda_sock *self;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
+
+	self = (struct irda_sock *) priv;
+	IRDA_ASSERT(self != NULL, return;);
+
+	/* Nothing for the caller */
+	self->cachelog = NULL;
+	self->cachedaddr = 0;
+	self->errno = -ETIME;
+
+	/* Wake up process if its still waiting... */
+	wake_up_interruptible(&self->query_wait);
+}
+
+/*
+ * Function irda_open_tsap (self)
+ *
+ *    Open local Transport Service Access Point (TSAP)
+ *
+ */
+static int irda_open_tsap(struct irda_sock *self, __u8 tsap_sel, char *name)
+{
+	notify_t notify;
+
+	if (self->tsap) {
+		IRDA_WARNING("%s: busy!\n", __FUNCTION__);
+		return -EBUSY;
+	}
+
+	/* Initialize callbacks to be used by the IrDA stack */
+	irda_notify_init(&notify);
+	notify.connect_confirm       = irda_connect_confirm;
+	notify.connect_indication    = irda_connect_indication;
+	notify.disconnect_indication = irda_disconnect_indication;
+	notify.data_indication       = irda_data_indication;
+	notify.udata_indication	     = irda_data_indication;
+	notify.flow_indication       = irda_flow_indication;
+	notify.instance = self;
+	strncpy(notify.name, name, NOTIFY_MAX_NAME);
+
+	self->tsap = irttp_open_tsap(tsap_sel, DEFAULT_INITIAL_CREDIT,
+				     &notify);
+	if (self->tsap == NULL) {
+		IRDA_DEBUG(0, "%s(), Unable to allocate TSAP!\n",
+			   __FUNCTION__);
+		return -ENOMEM;
+	}
+	/* Remember which TSAP selector we actually got */
+	self->stsap_sel = self->tsap->stsap_sel;
+
+	return 0;
+}
+
+/*
+ * Function irda_open_lsap (self)
+ *
+ *    Open local Link Service Access Point (LSAP). Used for opening Ultra
+ *    sockets
+ */
+#ifdef CONFIG_IRDA_ULTRA
+static int irda_open_lsap(struct irda_sock *self, int pid)
+{
+	notify_t notify;
+
+	if (self->lsap) {
+		IRDA_WARNING("%s(), busy!\n", __FUNCTION__);
+		return -EBUSY;
+	}
+
+	/* Initialize callbacks to be used by the IrDA stack */
+	irda_notify_init(&notify);
+	notify.udata_indication	= irda_data_indication;
+	notify.instance = self;
+	strncpy(notify.name, "Ultra", NOTIFY_MAX_NAME);
+
+	self->lsap = irlmp_open_lsap(LSAP_CONNLESS, &notify, pid);
+	if (self->lsap == NULL) {
+		IRDA_DEBUG( 0, "%s(), Unable to allocate LSAP!\n", __FUNCTION__);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+#endif /* CONFIG_IRDA_ULTRA */
+
+/*
+ * Function irda_find_lsap_sel (self, name)
+ *
+ *    Try to lookup LSAP selector in remote LM-IAS
+ *
+ * Basically, we start a IAP query, and then go to sleep. When the query
+ * return, irda_getvalue_confirm will wake us up, and we can examine the
+ * result of the query...
+ * Note that in some case, the query fail even before we go to sleep,
+ * creating some races...
+ */
+static int irda_find_lsap_sel(struct irda_sock *self, char *name)
+{
+	IRDA_DEBUG(2, "%s(%p, %s)\n", __FUNCTION__, self, name);
+
+	IRDA_ASSERT(self != NULL, return -1;);
+
+	if (self->iriap) {
+		IRDA_WARNING("%s(): busy with a previous query\n",
+			     __FUNCTION__);
+		return -EBUSY;
+	}
+
+	self->iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self,
+				 irda_getvalue_confirm);
+	if(self->iriap == NULL)
+		return -ENOMEM;
+
+	/* Treat unexpected wakeup as disconnect */
+	self->errno = -EHOSTUNREACH;
+
+	/* Query remote LM-IAS */
+	iriap_getvaluebyclass_request(self->iriap, self->saddr, self->daddr,
+				      name, "IrDA:TinyTP:LsapSel");
+
+	/* Wait for answer, if not yet finished (or failed) */
+	if (wait_event_interruptible(self->query_wait, (self->iriap==NULL)))
+		/* Treat signals as disconnect */
+		return -EHOSTUNREACH;
+
+	/* Check what happened */
+	if (self->errno)
+	{
+		/* Requested object/attribute doesn't exist */
+		if((self->errno == IAS_CLASS_UNKNOWN) ||
+		   (self->errno == IAS_ATTRIB_UNKNOWN))
+			return (-EADDRNOTAVAIL);
+		else
+			return (-EHOSTUNREACH);
+	}
+
+	/* Get the remote TSAP selector */
+	switch (self->ias_result->type) {
+	case IAS_INTEGER:
+		IRDA_DEBUG(4, "%s() int=%d\n",
+			   __FUNCTION__, self->ias_result->t.integer);
+
+		if (self->ias_result->t.integer != -1)
+			self->dtsap_sel = self->ias_result->t.integer;
+		else
+			self->dtsap_sel = 0;
+		break;
+	default:
+		self->dtsap_sel = 0;
+		IRDA_DEBUG(0, "%s(), bad type!\n", __FUNCTION__);
+		break;
+	}
+	if (self->ias_result)
+		irias_delete_value(self->ias_result);
+
+	if (self->dtsap_sel)
+		return 0;
+
+	return -EADDRNOTAVAIL;
+}
+
+/*
+ * Function irda_discover_daddr_and_lsap_sel (self, name)
+ *
+ *    This try to find a device with the requested service.
+ *
+ * It basically look into the discovery log. For each address in the list,
+ * it queries the LM-IAS of the device to find if this device offer
+ * the requested service.
+ * If there is more than one node supporting the service, we complain
+ * to the user (it should move devices around).
+ * The, we set both the destination address and the lsap selector to point
+ * on the service on the unique device we have found.
+ *
+ * Note : this function fails if there is more than one device in range,
+ * because IrLMP doesn't disconnect the LAP when the last LSAP is closed.
+ * Moreover, we would need to wait the LAP disconnection...
+ */
+static int irda_discover_daddr_and_lsap_sel(struct irda_sock *self, char *name)
+{
+	discinfo_t *discoveries;	/* Copy of the discovery log */
+	int	number;			/* Number of nodes in the log */
+	int	i;
+	int	err = -ENETUNREACH;
+	__u32	daddr = DEV_ADDR_ANY;	/* Address we found the service on */
+	__u8	dtsap_sel = 0x0;	/* TSAP associated with it */
+
+	IRDA_DEBUG(2, "%s(), name=%s\n", __FUNCTION__, name);
+
+	IRDA_ASSERT(self != NULL, return -1;);
+
+	/* Ask lmp for the current discovery log
+	 * Note : we have to use irlmp_get_discoveries(), as opposed
+	 * to play with the cachelog directly, because while we are
+	 * making our ias query, le log might change... */
+	discoveries = irlmp_get_discoveries(&number, self->mask.word,
+					    self->nslots);
+	/* Check if the we got some results */
+	if (discoveries == NULL)
+		return -ENETUNREACH;	/* No nodes discovered */
+
+	/*
+	 * Now, check all discovered devices (if any), and connect
+	 * client only about the services that the client is
+	 * interested in...
+	 */
+	for(i = 0; i < number; i++) {
+		/* Try the address in the log */
+		self->daddr = discoveries[i].daddr;
+		self->saddr = 0x0;
+		IRDA_DEBUG(1, "%s(), trying daddr = %08x\n",
+			   __FUNCTION__, self->daddr);
+
+		/* Query remote LM-IAS for this service */
+		err = irda_find_lsap_sel(self, name);
+		switch (err) {
+		case 0:
+			/* We found the requested service */
+			if(daddr != DEV_ADDR_ANY) {
+				IRDA_DEBUG(1, "%s(), discovered service ''%s'' in two different devices !!!\n",
+					   __FUNCTION__, name);
+				self->daddr = DEV_ADDR_ANY;
+				kfree(discoveries);
+				return(-ENOTUNIQ);
+			}
+			/* First time we found that one, save it ! */
+			daddr = self->daddr;
+			dtsap_sel = self->dtsap_sel;
+			break;
+		case -EADDRNOTAVAIL:
+			/* Requested service simply doesn't exist on this node */
+			break;
+		default:
+			/* Something bad did happen :-( */
+			IRDA_DEBUG(0, "%s(), unexpected IAS query failure\n", __FUNCTION__);
+			self->daddr = DEV_ADDR_ANY;
+			kfree(discoveries);
+			return(-EHOSTUNREACH);
+			break;
+		}
+	}
+	/* Cleanup our copy of the discovery log */
+	kfree(discoveries);
+
+	/* Check out what we found */
+	if(daddr == DEV_ADDR_ANY) {
+		IRDA_DEBUG(1, "%s(), cannot discover service ''%s'' in any device !!!\n",
+			   __FUNCTION__, name);
+		self->daddr = DEV_ADDR_ANY;
+		return(-EADDRNOTAVAIL);
+	}
+
+	/* Revert back to discovered device & service */
+	self->daddr = daddr;
+	self->saddr = 0x0;
+	self->dtsap_sel = dtsap_sel;
+
+	IRDA_DEBUG(1, "%s(), discovered requested service ''%s'' at address %08x\n",
+		   __FUNCTION__, name, self->daddr);
+
+	return 0;
+}
+
+/*
+ * Function irda_getname (sock, uaddr, uaddr_len, peer)
+ *
+ *    Return the our own, or peers socket address (sockaddr_irda)
+ *
+ */
+static int irda_getname(struct socket *sock, struct sockaddr *uaddr,
+			int *uaddr_len, int peer)
+{
+	struct sockaddr_irda saddr;
+	struct sock *sk = sock->sk;
+	struct irda_sock *self = irda_sk(sk);
+
+	if (peer) {
+		if (sk->sk_state != TCP_ESTABLISHED)
+			return -ENOTCONN;
+
+		saddr.sir_family = AF_IRDA;
+		saddr.sir_lsap_sel = self->dtsap_sel;
+		saddr.sir_addr = self->daddr;
+	} else {
+		saddr.sir_family = AF_IRDA;
+		saddr.sir_lsap_sel = self->stsap_sel;
+		saddr.sir_addr = self->saddr;
+	}
+
+	IRDA_DEBUG(1, "%s(), tsap_sel = %#x\n", __FUNCTION__, saddr.sir_lsap_sel);
+	IRDA_DEBUG(1, "%s(), addr = %08x\n", __FUNCTION__, saddr.sir_addr);
+
+	/* uaddr_len come to us uninitialised */
+	*uaddr_len = sizeof (struct sockaddr_irda);
+	memcpy(uaddr, &saddr, *uaddr_len);
+
+	return 0;
+}
+
+/*
+ * Function irda_listen (sock, backlog)
+ *
+ *    Just move to the listen state
+ *
+ */
+static int irda_listen(struct socket *sock, int backlog)
+{
+	struct sock *sk = sock->sk;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
+
+	if ((sk->sk_type != SOCK_STREAM) && (sk->sk_type != SOCK_SEQPACKET) &&
+	    (sk->sk_type != SOCK_DGRAM))
+		return -EOPNOTSUPP;
+
+	if (sk->sk_state != TCP_LISTEN) {
+		sk->sk_max_ack_backlog = backlog;
+		sk->sk_state           = TCP_LISTEN;
+
+		return 0;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+/*
+ * Function irda_bind (sock, uaddr, addr_len)
+ *
+ *    Used by servers to register their well known TSAP
+ *
+ */
+static int irda_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+{
+	struct sock *sk = sock->sk;
+	struct sockaddr_irda *addr = (struct sockaddr_irda *) uaddr;
+	struct irda_sock *self = irda_sk(sk);
+	int err;
+
+	IRDA_ASSERT(self != NULL, return -1;);
+
+	IRDA_DEBUG(2, "%s(%p)\n", __FUNCTION__, self);
+
+	if (addr_len != sizeof(struct sockaddr_irda))
+		return -EINVAL;
+
+#ifdef CONFIG_IRDA_ULTRA
+	/* Special care for Ultra sockets */
+	if ((sk->sk_type == SOCK_DGRAM) &&
+	    (sk->sk_protocol == IRDAPROTO_ULTRA)) {
+		self->pid = addr->sir_lsap_sel;
+		if (self->pid & 0x80) {
+			IRDA_DEBUG(0, "%s(), extension in PID not supp!\n", __FUNCTION__);
+			return -EOPNOTSUPP;
+		}
+		err = irda_open_lsap(self, self->pid);
+		if (err < 0)
+			return err;
+
+		/* Pretend we are connected */
+		sock->state = SS_CONNECTED;
+		sk->sk_state   = TCP_ESTABLISHED;
+
+		return 0;
+	}
+#endif /* CONFIG_IRDA_ULTRA */
+
+	err = irda_open_tsap(self, addr->sir_lsap_sel, addr->sir_name);
+	if (err < 0)
+		return err;
+
+	/*  Register with LM-IAS */
+	self->ias_obj = irias_new_object(addr->sir_name, jiffies);
+	irias_add_integer_attrib(self->ias_obj, "IrDA:TinyTP:LsapSel",
+				 self->stsap_sel, IAS_KERNEL_ATTR);
+	irias_insert_object(self->ias_obj);
+
+	return 0;
+}
+
+/*
+ * Function irda_accept (sock, newsock, flags)
+ *
+ *    Wait for incoming connection
+ *
+ */
+static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct irda_sock *new, *self = irda_sk(sk);
+	struct sock *newsk;
+	struct sk_buff *skb;
+	int err;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return -1;);
+
+	err = irda_create(newsock, sk->sk_protocol);
+	if (err)
+		return err;
+
+	if (sock->state != SS_UNCONNECTED)
+		return -EINVAL;
+
+	if ((sk = sock->sk) == NULL)
+		return -EINVAL;
+
+	if ((sk->sk_type != SOCK_STREAM) && (sk->sk_type != SOCK_SEQPACKET) &&
+	    (sk->sk_type != SOCK_DGRAM))
+		return -EOPNOTSUPP;
+
+	if (sk->sk_state != TCP_LISTEN)
+		return -EINVAL;
+
+	/*
+	 *	The read queue this time is holding sockets ready to use
+	 *	hooked into the SABM we saved
+	 */
+
+	/*
+	 * We can perform the accept only if there is incoming data
+	 * on the listening socket.
+	 * So, we will block the caller until we receive any data.
+	 * If the caller was waiting on select() or poll() before
+	 * calling us, the data is waiting for us ;-)
+	 * Jean II
+	 */
+	skb = skb_dequeue(&sk->sk_receive_queue);
+	if (skb == NULL) {
+		int ret = 0;
+		DECLARE_WAITQUEUE(waitq, current);
+
+		/* Non blocking operation */
+		if (flags & O_NONBLOCK)
+			return -EWOULDBLOCK;
+
+		/* The following code is a cut'n'paste of the
+		 * wait_event_interruptible() macro.
+		 * We don't us the macro because the condition has
+		 * side effects : we want to make sure that only one
+		 * skb get dequeued - Jean II */
+		add_wait_queue(sk->sk_sleep, &waitq);
+		for (;;) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			skb = skb_dequeue(&sk->sk_receive_queue);
+			if (skb != NULL)
+				break;
+			if (!signal_pending(current)) {
+				schedule();
+				continue;
+			}
+			ret = -ERESTARTSYS;
+			break;
+		}
+		current->state = TASK_RUNNING;
+		remove_wait_queue(sk->sk_sleep, &waitq);
+		if(ret)
+			return -ERESTARTSYS;
+	}
+
+	newsk = newsock->sk;
+	newsk->sk_state = TCP_ESTABLISHED;
+
+	new = irda_sk(newsk);
+	IRDA_ASSERT(new != NULL, return -1;);
+
+	/* Now attach up the new socket */
+	new->tsap = irttp_dup(self->tsap, new);
+	if (!new->tsap) {
+		IRDA_DEBUG(0, "%s(), dup failed!\n", __FUNCTION__);
+		kfree_skb(skb);
+		return -1;
+	}
+
+	new->stsap_sel = new->tsap->stsap_sel;
+	new->dtsap_sel = new->tsap->dtsap_sel;
+	new->saddr = irttp_get_saddr(new->tsap);
+	new->daddr = irttp_get_daddr(new->tsap);
+
+	new->max_sdu_size_tx = self->max_sdu_size_tx;
+	new->max_sdu_size_rx = self->max_sdu_size_rx;
+	new->max_data_size   = self->max_data_size;
+	new->max_header_size = self->max_header_size;
+
+	memcpy(&new->qos_tx, &self->qos_tx, sizeof(struct qos_info));
+
+	/* Clean up the original one to keep it in listen state */
+	irttp_listen(self->tsap);
+
+	/* Wow ! What is that ? Jean II */
+	skb->sk = NULL;
+	skb->destructor = NULL;
+	kfree_skb(skb);
+	sk->sk_ack_backlog--;
+
+	newsock->state = SS_CONNECTED;
+
+	irda_connect_response(new);
+
+	return 0;
+}
+
+/*
+ * Function irda_connect (sock, uaddr, addr_len, flags)
+ *
+ *    Connect to a IrDA device
+ *
+ * The main difference with a "standard" connect is that with IrDA we need
+ * to resolve the service name into a TSAP selector (in TCP, port number
+ * doesn't have to be resolved).
+ * Because of this service name resoltion, we can offer "auto-connect",
+ * where we connect to a service without specifying a destination address.
+ *
+ * Note : by consulting "errno", the user space caller may learn the cause
+ * of the failure. Most of them are visible in the function, others may come
+ * from subroutines called and are listed here :
+ *	o EBUSY : already processing a connect
+ *	o EHOSTUNREACH : bad addr->sir_addr argument
+ *	o EADDRNOTAVAIL : bad addr->sir_name argument
+ *	o ENOTUNIQ : more than one node has addr->sir_name (auto-connect)
+ *	o ENETUNREACH : no node found on the network (auto-connect)
+ */
+static int irda_connect(struct socket *sock, struct sockaddr *uaddr,
+			int addr_len, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct sockaddr_irda *addr = (struct sockaddr_irda *) uaddr;
+	struct irda_sock *self = irda_sk(sk);
+	int err;
+
+	IRDA_DEBUG(2, "%s(%p)\n", __FUNCTION__, self);
+
+	/* Don't allow connect for Ultra sockets */
+	if ((sk->sk_type == SOCK_DGRAM) && (sk->sk_protocol == IRDAPROTO_ULTRA))
+		return -ESOCKTNOSUPPORT;
+
+	if (sk->sk_state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) {
+		sock->state = SS_CONNECTED;
+		return 0;   /* Connect completed during a ERESTARTSYS event */
+	}
+
+	if (sk->sk_state == TCP_CLOSE && sock->state == SS_CONNECTING) {
+		sock->state = SS_UNCONNECTED;
+		return -ECONNREFUSED;
+	}
+
+	if (sk->sk_state == TCP_ESTABLISHED)
+		return -EISCONN;      /* No reconnect on a seqpacket socket */
+
+	sk->sk_state   = TCP_CLOSE;
+	sock->state = SS_UNCONNECTED;
+
+	if (addr_len != sizeof(struct sockaddr_irda))
+		return -EINVAL;
+
+	/* Check if user supplied any destination device address */
+	if ((!addr->sir_addr) || (addr->sir_addr == DEV_ADDR_ANY)) {
+		/* Try to find one suitable */
+		err = irda_discover_daddr_and_lsap_sel(self, addr->sir_name);
+		if (err) {
+			IRDA_DEBUG(0, "%s(), auto-connect failed!\n", __FUNCTION__);
+			return err;
+		}
+	} else {
+		/* Use the one provided by the user */
+		self->daddr = addr->sir_addr;
+		IRDA_DEBUG(1, "%s(), daddr = %08x\n", __FUNCTION__, self->daddr);
+
+		/* If we don't have a valid service name, we assume the
+		 * user want to connect on a specific LSAP. Prevent
+		 * the use of invalid LSAPs (IrLMP 1.1 p10). Jean II */
+		if((addr->sir_name[0] != '\0') ||
+		   (addr->sir_lsap_sel >= 0x70)) {
+			/* Query remote LM-IAS using service name */
+			err = irda_find_lsap_sel(self, addr->sir_name);
+			if (err) {
+				IRDA_DEBUG(0, "%s(), connect failed!\n", __FUNCTION__);
+				return err;
+			}
+		} else {
+			/* Directly connect to the remote LSAP
+			 * specified by the sir_lsap field.
+			 * Please use with caution, in IrDA LSAPs are
+			 * dynamic and there is no "well-known" LSAP. */
+			self->dtsap_sel = addr->sir_lsap_sel;
+		}
+	}
+
+	/* Check if we have opened a local TSAP */
+	if (!self->tsap)
+		irda_open_tsap(self, LSAP_ANY, addr->sir_name);
+
+	/* Move to connecting socket, start sending Connect Requests */
+	sock->state = SS_CONNECTING;
+	sk->sk_state   = TCP_SYN_SENT;
+
+	/* Connect to remote device */
+	err = irttp_connect_request(self->tsap, self->dtsap_sel,
+				    self->saddr, self->daddr, NULL,
+				    self->max_sdu_size_rx, NULL);
+	if (err) {
+		IRDA_DEBUG(0, "%s(), connect failed!\n", __FUNCTION__);
+		return err;
+	}
+
+	/* Now the loop */
+	if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK))
+		return -EINPROGRESS;
+
+	if (wait_event_interruptible(*(sk->sk_sleep),
+				     (sk->sk_state != TCP_SYN_SENT)))
+		return -ERESTARTSYS;
+
+	if (sk->sk_state != TCP_ESTABLISHED) {
+		sock->state = SS_UNCONNECTED;
+		return sock_error(sk);	/* Always set at this point */
+	}
+
+	sock->state = SS_CONNECTED;
+
+	/* At this point, IrLMP has assigned our source address */
+	self->saddr = irttp_get_saddr(self->tsap);
+
+	return 0;
+}
+
+static struct proto irda_proto = {
+	.name	  = "IRDA",
+	.owner	  = THIS_MODULE,
+	.obj_size = sizeof(struct irda_sock),
+};
+
+/*
+ * Function irda_create (sock, protocol)
+ *
+ *    Create IrDA socket
+ *
+ */
+static int irda_create(struct socket *sock, int protocol)
+{
+	struct sock *sk;
+	struct irda_sock *self;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
+
+	/* Check for valid socket type */
+	switch (sock->type) {
+	case SOCK_STREAM:     /* For TTP connections with SAR disabled */
+	case SOCK_SEQPACKET:  /* For TTP connections with SAR enabled */
+	case SOCK_DGRAM:      /* For TTP Unitdata or LMP Ultra transfers */
+		break;
+	default:
+		return -ESOCKTNOSUPPORT;
+	}
+
+	/* Allocate networking socket */
+	sk = sk_alloc(PF_IRDA, GFP_ATOMIC, &irda_proto, 1);
+	if (sk == NULL)
+		return -ENOMEM;
+
+	self = irda_sk(sk);
+	IRDA_DEBUG(2, "%s() : self is %p\n", __FUNCTION__, self);
+
+	init_waitqueue_head(&self->query_wait);
+
+	/* Initialise networking socket struct */
+	sock_init_data(sock, sk);	/* Note : set sk->sk_refcnt to 1 */
+	sk->sk_family = PF_IRDA;
+	sk->sk_protocol = protocol;
+
+	switch (sock->type) {
+	case SOCK_STREAM:
+		sock->ops = &irda_stream_ops;
+		self->max_sdu_size_rx = TTP_SAR_DISABLE;
+		break;
+	case SOCK_SEQPACKET:
+		sock->ops = &irda_seqpacket_ops;
+		self->max_sdu_size_rx = TTP_SAR_UNBOUND;
+		break;
+	case SOCK_DGRAM:
+		switch (protocol) {
+#ifdef CONFIG_IRDA_ULTRA
+		case IRDAPROTO_ULTRA:
+			sock->ops = &irda_ultra_ops;
+			/* Initialise now, because we may send on unbound
+			 * sockets. Jean II */
+			self->max_data_size = ULTRA_MAX_DATA - LMP_PID_HEADER;
+			self->max_header_size = IRDA_MAX_HEADER + LMP_PID_HEADER;
+			break;
+#endif /* CONFIG_IRDA_ULTRA */
+		case IRDAPROTO_UNITDATA:
+			sock->ops = &irda_dgram_ops;
+			/* We let Unitdata conn. be like seqpack conn. */
+			self->max_sdu_size_rx = TTP_SAR_UNBOUND;
+			break;
+		default:
+			IRDA_ERROR("%s: protocol not supported!\n",
+				   __FUNCTION__);
+			return -ESOCKTNOSUPPORT;
+		}
+		break;
+	default:
+		return -ESOCKTNOSUPPORT;
+	}
+
+	/* Register as a client with IrLMP */
+	self->ckey = irlmp_register_client(0, NULL, NULL, NULL);
+	self->mask.word = 0xffff;
+	self->rx_flow = self->tx_flow = FLOW_START;
+	self->nslots = DISCOVERY_DEFAULT_SLOTS;
+	self->daddr = DEV_ADDR_ANY;	/* Until we get connected */
+	self->saddr = 0x0;		/* so IrLMP assign us any link */
+	return 0;
+}
+
+/*
+ * Function irda_destroy_socket (self)
+ *
+ *    Destroy socket
+ *
+ */
+static void irda_destroy_socket(struct irda_sock *self)
+{
+	IRDA_DEBUG(2, "%s(%p)\n", __FUNCTION__, self);
+
+	IRDA_ASSERT(self != NULL, return;);
+
+	/* Unregister with IrLMP */
+	irlmp_unregister_client(self->ckey);
+	irlmp_unregister_service(self->skey);
+
+	/* Unregister with LM-IAS */
+	if (self->ias_obj) {
+		irias_delete_object(self->ias_obj);
+		self->ias_obj = NULL;
+	}
+
+	if (self->iriap) {
+		iriap_close(self->iriap);
+		self->iriap = NULL;
+	}
+
+	if (self->tsap) {
+		irttp_disconnect_request(self->tsap, NULL, P_NORMAL);
+		irttp_close_tsap(self->tsap);
+		self->tsap = NULL;
+	}
+#ifdef CONFIG_IRDA_ULTRA
+	if (self->lsap) {
+		irlmp_close_lsap(self->lsap);
+		self->lsap = NULL;
+	}
+#endif /* CONFIG_IRDA_ULTRA */
+}
+
+/*
+ * Function irda_release (sock)
+ */
+static int irda_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
+
+        if (sk == NULL)
+		return 0;
+
+	sk->sk_state       = TCP_CLOSE;
+	sk->sk_shutdown   |= SEND_SHUTDOWN;
+	sk->sk_state_change(sk);
+
+	/* Destroy IrDA socket */
+	irda_destroy_socket(irda_sk(sk));
+
+	sock_orphan(sk);
+	sock->sk   = NULL;
+
+	/* Purge queues (see sock_init_data()) */
+	skb_queue_purge(&sk->sk_receive_queue);
+
+	/* Destroy networking socket if we are the last reference on it,
+	 * i.e. if(sk->sk_refcnt == 0) -> sk_free(sk) */
+	sock_put(sk);
+
+	/* Notes on socket locking and deallocation... - Jean II
+	 * In theory we should put pairs of sock_hold() / sock_put() to
+	 * prevent the socket to be destroyed whenever there is an
+	 * outstanding request or outstanding incoming packet or event.
+	 *
+	 * 1) This may include IAS request, both in connect and getsockopt.
+	 * Unfortunately, the situation is a bit more messy than it looks,
+	 * because we close iriap and kfree(self) above.
+	 *
+	 * 2) This may include selective discovery in getsockopt.
+	 * Same stuff as above, irlmp registration and self are gone.
+	 *
+	 * Probably 1 and 2 may not matter, because it's all triggered
+	 * by a process and the socket layer already prevent the
+	 * socket to go away while a process is holding it, through
+	 * sockfd_put() and fput()...
+	 *
+	 * 3) This may include deferred TSAP closure. In particular,
+	 * we may receive a late irda_disconnect_indication()
+	 * Fortunately, (tsap_cb *)->close_pend should protect us
+	 * from that.
+	 *
+	 * I did some testing on SMP, and it looks solid. And the socket
+	 * memory leak is now gone... - Jean II
+	 */
+
+        return 0;
+}
+
+/*
+ * Function irda_sendmsg (iocb, sock, msg, len)
+ *
+ *    Send message down to TinyTP. This function is used for both STREAM and
+ *    SEQPACK services. This is possible since it forces the client to
+ *    fragment the message if necessary
+ */
+static int irda_sendmsg(struct kiocb *iocb, struct socket *sock,
+			struct msghdr *msg, size_t len)
+{
+	struct sock *sk = sock->sk;
+	struct irda_sock *self;
+	struct sk_buff *skb;
+	unsigned char *asmptr;
+	int err;
+
+	IRDA_DEBUG(4, "%s(), len=%zd\n", __FUNCTION__, len);
+
+	/* Note : socket.c set MSG_EOR on SEQPACKET sockets */
+	if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_CMSG_COMPAT))
+		return -EINVAL;
+
+	if (sk->sk_shutdown & SEND_SHUTDOWN) {
+		send_sig(SIGPIPE, current, 0);
+		return -EPIPE;
+	}
+
+	if (sk->sk_state != TCP_ESTABLISHED)
+		return -ENOTCONN;
+
+	self = irda_sk(sk);
+	IRDA_ASSERT(self != NULL, return -1;);
+
+	/* Check if IrTTP is wants us to slow down */
+
+	if (wait_event_interruptible(*(sk->sk_sleep),
+	    (self->tx_flow != FLOW_STOP  ||  sk->sk_state != TCP_ESTABLISHED)))
+		return -ERESTARTSYS;
+
+	/* Check if we are still connected */
+	if (sk->sk_state != TCP_ESTABLISHED)
+		return -ENOTCONN;
+
+	/* Check that we don't send out to big frames */
+	if (len > self->max_data_size) {
+		IRDA_DEBUG(2, "%s(), Chopping frame from %zd to %d bytes!\n",
+			   __FUNCTION__, len, self->max_data_size);
+		len = self->max_data_size;
+	}
+
+	skb = sock_alloc_send_skb(sk, len + self->max_header_size + 16, 
+				  msg->msg_flags & MSG_DONTWAIT, &err);
+	if (!skb)
+		return -ENOBUFS;
+
+	skb_reserve(skb, self->max_header_size + 16);
+
+	asmptr = skb->h.raw = skb_put(skb, len);
+	err = memcpy_fromiovec(asmptr, msg->msg_iov, len);
+	if (err) {
+		kfree_skb(skb);
+		return err;
+	}
+
+	/*
+	 * Just send the message to TinyTP, and let it deal with possible
+	 * errors. No need to duplicate all that here
+	 */
+	err = irttp_data_request(self->tsap, skb);
+	if (err) {
+		IRDA_DEBUG(0, "%s(), err=%d\n", __FUNCTION__, err);
+		return err;
+	}
+	/* Tell client how much data we actually sent */
+	return len;
+}
+
+/*
+ * Function irda_recvmsg_dgram (iocb, sock, msg, size, flags)
+ *
+ *    Try to receive message and copy it to user. The frame is discarded
+ *    after being read, regardless of how much the user actually read
+ */
+static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock,
+			      struct msghdr *msg, size_t size, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct irda_sock *self = irda_sk(sk);
+	struct sk_buff *skb;
+	size_t copied;
+	int err;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return -1;);
+
+	skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
+				flags & MSG_DONTWAIT, &err);
+	if (!skb)
+		return err;
+
+	skb->h.raw = skb->data;
+	copied     = skb->len;
+
+	if (copied > size) {
+		IRDA_DEBUG(2, "%s(), Received truncated frame (%zd < %zd)!\n",
+			   __FUNCTION__, copied, size);
+		copied = size;
+		msg->msg_flags |= MSG_TRUNC;
+	}
+	skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+
+	skb_free_datagram(sk, skb);
+
+	/*
+	 *  Check if we have previously stopped IrTTP and we know
+	 *  have more free space in our rx_queue. If so tell IrTTP
+	 *  to start delivering frames again before our rx_queue gets
+	 *  empty
+	 */
+	if (self->rx_flow == FLOW_STOP) {
+		if ((atomic_read(&sk->sk_rmem_alloc) << 2) <= sk->sk_rcvbuf) {
+			IRDA_DEBUG(2, "%s(), Starting IrTTP\n", __FUNCTION__);
+			self->rx_flow = FLOW_START;
+			irttp_flow_request(self->tsap, FLOW_START);
+		}
+	}
+
+	return copied;
+}
+
+/*
+ * Function irda_recvmsg_stream (iocb, sock, msg, size, flags)
+ */
+static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
+			       struct msghdr *msg, size_t size, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct irda_sock *self = irda_sk(sk);
+	int noblock = flags & MSG_DONTWAIT;
+	size_t copied = 0;
+	int target = 1;
+	DECLARE_WAITQUEUE(waitq, current);
+
+	IRDA_DEBUG(3, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return -1;);
+
+	if (sock->flags & __SO_ACCEPTCON)
+		return(-EINVAL);
+
+	if (flags & MSG_OOB)
+		return -EOPNOTSUPP;
+
+	if (flags & MSG_WAITALL)
+		target = size;
+
+	msg->msg_namelen = 0;
+
+	do {
+		int chunk;
+		struct sk_buff *skb = skb_dequeue(&sk->sk_receive_queue);
+
+		if (skb==NULL) {
+			int ret = 0;
+
+			if (copied >= target)
+				break;
+
+			/* The following code is a cut'n'paste of the
+			 * wait_event_interruptible() macro.
+			 * We don't us the macro because the test condition
+			 * is messy. - Jean II */
+			set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+			add_wait_queue(sk->sk_sleep, &waitq);
+			set_current_state(TASK_INTERRUPTIBLE);
+
+			/*
+			 *	POSIX 1003.1g mandates this order.
+			 */
+			if (sk->sk_err)
+				ret = sock_error(sk);
+			else if (sk->sk_shutdown & RCV_SHUTDOWN)
+				;
+			else if (noblock)
+				ret = -EAGAIN;
+			else if (signal_pending(current))
+				ret = -ERESTARTSYS;
+			else if (skb_peek(&sk->sk_receive_queue) == NULL)
+				/* Wait process until data arrives */
+				schedule();
+
+			current->state = TASK_RUNNING;
+			remove_wait_queue(sk->sk_sleep, &waitq);
+			clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+
+			if(ret)
+				return(ret);
+			if (sk->sk_shutdown & RCV_SHUTDOWN)
+				break;
+
+			continue;
+		}
+
+		chunk = min_t(unsigned int, skb->len, size);
+		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
+			skb_queue_head(&sk->sk_receive_queue, skb);
+			if (copied == 0)
+				copied = -EFAULT;
+			break;
+		}
+		copied += chunk;
+		size -= chunk;
+
+		/* Mark read part of skb as used */
+		if (!(flags & MSG_PEEK)) {
+			skb_pull(skb, chunk);
+
+			/* put the skb back if we didn't use it up.. */
+			if (skb->len) {
+				IRDA_DEBUG(1, "%s(), back on q!\n",
+					   __FUNCTION__);
+				skb_queue_head(&sk->sk_receive_queue, skb);
+				break;
+			}
+
+			kfree_skb(skb);
+		} else {
+			IRDA_DEBUG(0, "%s() questionable!?\n", __FUNCTION__);
+
+			/* put message back and return */
+			skb_queue_head(&sk->sk_receive_queue, skb);
+			break;
+		}
+	} while (size);
+
+	/*
+	 *  Check if we have previously stopped IrTTP and we know
+	 *  have more free space in our rx_queue. If so tell IrTTP
+	 *  to start delivering frames again before our rx_queue gets
+	 *  empty
+	 */
+	if (self->rx_flow == FLOW_STOP) {
+		if ((atomic_read(&sk->sk_rmem_alloc) << 2) <= sk->sk_rcvbuf) {
+			IRDA_DEBUG(2, "%s(), Starting IrTTP\n", __FUNCTION__);
+			self->rx_flow = FLOW_START;
+			irttp_flow_request(self->tsap, FLOW_START);
+		}
+	}
+
+	return copied;
+}
+
+/*
+ * Function irda_sendmsg_dgram (iocb, sock, msg, len)
+ *
+ *    Send message down to TinyTP for the unreliable sequenced
+ *    packet service...
+ *
+ */
+static int irda_sendmsg_dgram(struct kiocb *iocb, struct socket *sock,
+			      struct msghdr *msg, size_t len)
+{
+	struct sock *sk = sock->sk;
+	struct irda_sock *self;
+	struct sk_buff *skb;
+	unsigned char *asmptr;
+	int err;
+
+	IRDA_DEBUG(4, "%s(), len=%zd\n", __FUNCTION__, len);
+
+	if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_CMSG_COMPAT))
+		return -EINVAL;
+
+	if (sk->sk_shutdown & SEND_SHUTDOWN) {
+		send_sig(SIGPIPE, current, 0);
+		return -EPIPE;
+	}
+
+	if (sk->sk_state != TCP_ESTABLISHED)
+		return -ENOTCONN;
+
+	self = irda_sk(sk);
+	IRDA_ASSERT(self != NULL, return -1;);
+
+	/*
+	 * Check that we don't send out to big frames. This is an unreliable
+	 * service, so we have no fragmentation and no coalescence
+	 */
+	if (len > self->max_data_size) {
+		IRDA_DEBUG(0, "%s(), Warning to much data! "
+			   "Chopping frame from %zd to %d bytes!\n",
+			   __FUNCTION__, len, self->max_data_size);
+		len = self->max_data_size;
+	}
+
+	skb = sock_alloc_send_skb(sk, len + self->max_header_size,
+				  msg->msg_flags & MSG_DONTWAIT, &err);
+	if (!skb)
+		return -ENOBUFS;
+
+	skb_reserve(skb, self->max_header_size);
+
+	IRDA_DEBUG(4, "%s(), appending user data\n", __FUNCTION__);
+	asmptr = skb->h.raw = skb_put(skb, len);
+	err = memcpy_fromiovec(asmptr, msg->msg_iov, len);
+	if (err) {
+		kfree_skb(skb);
+		return err;
+	}
+
+	/*
+	 * Just send the message to TinyTP, and let it deal with possible
+	 * errors. No need to duplicate all that here
+	 */
+	err = irttp_udata_request(self->tsap, skb);
+	if (err) {
+		IRDA_DEBUG(0, "%s(), err=%d\n", __FUNCTION__, err);
+		return err;
+	}
+	return len;
+}
+
+/*
+ * Function irda_sendmsg_ultra (iocb, sock, msg, len)
+ *
+ *    Send message down to IrLMP for the unreliable Ultra
+ *    packet service...
+ */
+#ifdef CONFIG_IRDA_ULTRA
+static int irda_sendmsg_ultra(struct kiocb *iocb, struct socket *sock,
+			      struct msghdr *msg, size_t len)
+{
+	struct sock *sk = sock->sk;
+	struct irda_sock *self;
+	__u8 pid = 0;
+	int bound = 0;
+	struct sk_buff *skb;
+	unsigned char *asmptr;
+	int err;
+
+	IRDA_DEBUG(4, "%s(), len=%zd\n", __FUNCTION__, len);
+
+	if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_CMSG_COMPAT))
+		return -EINVAL;
+
+	if (sk->sk_shutdown & SEND_SHUTDOWN) {
+		send_sig(SIGPIPE, current, 0);
+		return -EPIPE;
+	}
+
+	self = irda_sk(sk);
+	IRDA_ASSERT(self != NULL, return -1;);
+
+	/* Check if an address was specified with sendto. Jean II */
+	if (msg->msg_name) {
+		struct sockaddr_irda *addr = (struct sockaddr_irda *) msg->msg_name;
+		/* Check address, extract pid. Jean II */
+		if (msg->msg_namelen < sizeof(*addr))
+			return -EINVAL;
+		if (addr->sir_family != AF_IRDA)
+			return -EINVAL;
+
+		pid = addr->sir_lsap_sel;
+		if (pid & 0x80) {
+			IRDA_DEBUG(0, "%s(), extension in PID not supp!\n", __FUNCTION__);
+			return -EOPNOTSUPP;
+		}
+	} else {
+		/* Check that the socket is properly bound to an Ultra
+		 * port. Jean II */
+		if ((self->lsap == NULL) ||
+		    (sk->sk_state != TCP_ESTABLISHED)) {
+			IRDA_DEBUG(0, "%s(), socket not bound to Ultra PID.\n",
+				   __FUNCTION__);
+			return -ENOTCONN;
+		}
+		/* Use PID from socket */
+		bound = 1;
+	}
+
+	/*
+	 * Check that we don't send out to big frames. This is an unreliable
+	 * service, so we have no fragmentation and no coalescence
+	 */
+	if (len > self->max_data_size) {
+		IRDA_DEBUG(0, "%s(), Warning to much data! "
+			   "Chopping frame from %zd to %d bytes!\n",
+			   __FUNCTION__, len, self->max_data_size);
+		len = self->max_data_size;
+	}
+
+	skb = sock_alloc_send_skb(sk, len + self->max_header_size,
+				  msg->msg_flags & MSG_DONTWAIT, &err);
+	if (!skb)
+		return -ENOBUFS;
+
+	skb_reserve(skb, self->max_header_size);
+
+	IRDA_DEBUG(4, "%s(), appending user data\n", __FUNCTION__);
+	asmptr = skb->h.raw = skb_put(skb, len);
+	err = memcpy_fromiovec(asmptr, msg->msg_iov, len);
+	if (err) {
+		kfree_skb(skb);
+		return err;
+	}
+
+	err = irlmp_connless_data_request((bound ? self->lsap : NULL),
+					  skb, pid);
+	if (err) {
+		IRDA_DEBUG(0, "%s(), err=%d\n", __FUNCTION__, err);
+		return err;
+	}
+	return len;
+}
+#endif /* CONFIG_IRDA_ULTRA */
+
+/*
+ * Function irda_shutdown (sk, how)
+ */
+static int irda_shutdown(struct socket *sock, int how)
+{
+	struct sock *sk = sock->sk;
+	struct irda_sock *self = irda_sk(sk);
+
+	IRDA_ASSERT(self != NULL, return -1;);
+
+	IRDA_DEBUG(1, "%s(%p)\n", __FUNCTION__, self);
+
+	sk->sk_state       = TCP_CLOSE;
+	sk->sk_shutdown   |= SEND_SHUTDOWN;
+	sk->sk_state_change(sk);
+
+	if (self->iriap) {
+		iriap_close(self->iriap);
+		self->iriap = NULL;
+	}
+
+	if (self->tsap) {
+		irttp_disconnect_request(self->tsap, NULL, P_NORMAL);
+		irttp_close_tsap(self->tsap);
+		self->tsap = NULL;
+	}
+
+	/* A few cleanup so the socket look as good as new... */
+	self->rx_flow = self->tx_flow = FLOW_START;	/* needed ??? */
+	self->daddr = DEV_ADDR_ANY;	/* Until we get re-connected */
+	self->saddr = 0x0;		/* so IrLMP assign us any link */
+
+        return 0;
+}
+
+/*
+ * Function irda_poll (file, sock, wait)
+ */
+static unsigned int irda_poll(struct file * file, struct socket *sock,
+			      poll_table *wait)
+{
+	struct sock *sk = sock->sk;
+	struct irda_sock *self = irda_sk(sk);
+	unsigned int mask;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	poll_wait(file, sk->sk_sleep, wait);
+	mask = 0;
+
+	/* Exceptional events? */
+	if (sk->sk_err)
+		mask |= POLLERR;
+	if (sk->sk_shutdown & RCV_SHUTDOWN) {
+		IRDA_DEBUG(0, "%s(), POLLHUP\n", __FUNCTION__);
+		mask |= POLLHUP;
+	}
+
+	/* Readable? */
+	if (!skb_queue_empty(&sk->sk_receive_queue)) {
+		IRDA_DEBUG(4, "Socket is readable\n");
+		mask |= POLLIN | POLLRDNORM;
+	}
+
+	/* Connection-based need to check for termination and startup */
+	switch (sk->sk_type) {
+	case SOCK_STREAM:
+		if (sk->sk_state == TCP_CLOSE) {
+			IRDA_DEBUG(0, "%s(), POLLHUP\n", __FUNCTION__);
+			mask |= POLLHUP;
+		}
+
+		if (sk->sk_state == TCP_ESTABLISHED) {
+			if ((self->tx_flow == FLOW_START) &&
+			    sock_writeable(sk))
+			{
+				mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+			}
+		}
+		break;
+	case SOCK_SEQPACKET:
+		if ((self->tx_flow == FLOW_START) &&
+		    sock_writeable(sk))
+		{
+			mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+		}
+		break;
+	case SOCK_DGRAM:
+		if (sock_writeable(sk))
+			mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+		break;
+	default:
+		break;
+	}
+	return mask;
+}
+
+/*
+ * Function irda_ioctl (sock, cmd, arg)
+ */
+static int irda_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	struct sock *sk = sock->sk;
+
+	IRDA_DEBUG(4, "%s(), cmd=%#x\n", __FUNCTION__, cmd);
+
+	switch (cmd) {
+	case TIOCOUTQ: {
+		long amount;
+		amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc);
+		if (amount < 0)
+			amount = 0;
+		if (put_user(amount, (unsigned int __user *)arg))
+			return -EFAULT;
+		return 0;
+	}
+
+	case TIOCINQ: {
+		struct sk_buff *skb;
+		long amount = 0L;
+		/* These two are safe on a single CPU system as only user tasks fiddle here */
+		if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL)
+			amount = skb->len;
+		if (put_user(amount, (unsigned int __user *)arg))
+			return -EFAULT;
+		return 0;
+	}
+
+	case SIOCGSTAMP:
+		if (sk != NULL)
+			return sock_get_timestamp(sk, (struct timeval __user *)arg);
+		return -EINVAL;
+
+	case SIOCGIFADDR:
+	case SIOCSIFADDR:
+	case SIOCGIFDSTADDR:
+	case SIOCSIFDSTADDR:
+	case SIOCGIFBRDADDR:
+	case SIOCSIFBRDADDR:
+	case SIOCGIFNETMASK:
+	case SIOCSIFNETMASK:
+	case SIOCGIFMETRIC:
+	case SIOCSIFMETRIC:
+		return -EINVAL;
+	default:
+		IRDA_DEBUG(1, "%s(), doing device ioctl!\n", __FUNCTION__);
+		return dev_ioctl(cmd, (void __user *) arg);
+	}
+
+	/*NOTREACHED*/
+	return 0;
+}
+
+/*
+ * Function irda_setsockopt (sock, level, optname, optval, optlen)
+ *
+ *    Set some options for the socket
+ *
+ */
+static int irda_setsockopt(struct socket *sock, int level, int optname,
+			   char __user *optval, int optlen)
+{
+	struct sock *sk = sock->sk;
+	struct irda_sock *self = irda_sk(sk);
+	struct irda_ias_set    *ias_opt;
+	struct ias_object      *ias_obj;
+	struct ias_attrib *	ias_attr;	/* Attribute in IAS object */
+	int opt;
+
+	IRDA_ASSERT(self != NULL, return -1;);
+
+	IRDA_DEBUG(2, "%s(%p)\n", __FUNCTION__, self);
+
+	if (level != SOL_IRLMP)
+		return -ENOPROTOOPT;
+
+	switch (optname) {
+	case IRLMP_IAS_SET:
+		/* The user want to add an attribute to an existing IAS object
+		 * (in the IAS database) or to create a new object with this
+		 * attribute.
+		 * We first query IAS to know if the object exist, and then
+		 * create the right attribute...
+		 */
+
+		if (optlen != sizeof(struct irda_ias_set))
+			return -EINVAL;
+
+		ias_opt = kmalloc(sizeof(struct irda_ias_set), GFP_ATOMIC);
+		if (ias_opt == NULL)
+			return -ENOMEM;
+
+		/* Copy query to the driver. */
+		if (copy_from_user(ias_opt, optval, optlen)) {
+			kfree(ias_opt);
+			return -EFAULT;
+		}
+
+		/* Find the object we target.
+		 * If the user gives us an empty string, we use the object
+		 * associated with this socket. This will workaround
+		 * duplicated class name - Jean II */
+		if(ias_opt->irda_class_name[0] == '\0') {
+			if(self->ias_obj == NULL) {
+				kfree(ias_opt);
+				return -EINVAL;
+			}
+			ias_obj = self->ias_obj;
+		} else
+			ias_obj = irias_find_object(ias_opt->irda_class_name);
+
+		/* Only ROOT can mess with the global IAS database.
+		 * Users can only add attributes to the object associated
+		 * with the socket they own - Jean II */
+		if((!capable(CAP_NET_ADMIN)) &&
+		   ((ias_obj == NULL) || (ias_obj != self->ias_obj))) {
+			kfree(ias_opt);
+			return -EPERM;
+		}
+
+		/* If the object doesn't exist, create it */
+		if(ias_obj == (struct ias_object *) NULL) {
+			/* Create a new object */
+			ias_obj = irias_new_object(ias_opt->irda_class_name,
+						   jiffies);
+		}
+
+		/* Do we have the attribute already ? */
+		if(irias_find_attrib(ias_obj, ias_opt->irda_attrib_name)) {
+			kfree(ias_opt);
+			return -EINVAL;
+		}
+
+		/* Look at the type */
+		switch(ias_opt->irda_attrib_type) {
+		case IAS_INTEGER:
+			/* Add an integer attribute */
+			irias_add_integer_attrib(
+				ias_obj,
+				ias_opt->irda_attrib_name,
+				ias_opt->attribute.irda_attrib_int,
+				IAS_USER_ATTR);
+			break;
+		case IAS_OCT_SEQ:
+			/* Check length */
+			if(ias_opt->attribute.irda_attrib_octet_seq.len >
+			   IAS_MAX_OCTET_STRING) {
+				kfree(ias_opt);
+				return -EINVAL;
+			}
+			/* Add an octet sequence attribute */
+			irias_add_octseq_attrib(
+			      ias_obj,
+			      ias_opt->irda_attrib_name,
+			      ias_opt->attribute.irda_attrib_octet_seq.octet_seq,
+			      ias_opt->attribute.irda_attrib_octet_seq.len,
+			      IAS_USER_ATTR);
+			break;
+		case IAS_STRING:
+			/* Should check charset & co */
+			/* Check length */
+			/* The length is encoded in a __u8, and
+			 * IAS_MAX_STRING == 256, so there is no way
+			 * userspace can pass us a string too large.
+			 * Jean II */
+			/* NULL terminate the string (avoid troubles) */
+			ias_opt->attribute.irda_attrib_string.string[ias_opt->attribute.irda_attrib_string.len] = '\0';
+			/* Add a string attribute */
+			irias_add_string_attrib(
+				ias_obj,
+				ias_opt->irda_attrib_name,
+				ias_opt->attribute.irda_attrib_string.string,
+				IAS_USER_ATTR);
+			break;
+		default :
+			kfree(ias_opt);
+			return -EINVAL;
+		}
+		irias_insert_object(ias_obj);
+		kfree(ias_opt);
+		break;
+	case IRLMP_IAS_DEL:
+		/* The user want to delete an object from our local IAS
+		 * database. We just need to query the IAS, check is the
+		 * object is not owned by the kernel and delete it.
+		 */
+
+		if (optlen != sizeof(struct irda_ias_set))
+			return -EINVAL;
+
+		ias_opt = kmalloc(sizeof(struct irda_ias_set), GFP_ATOMIC);
+		if (ias_opt == NULL)
+			return -ENOMEM;
+
+		/* Copy query to the driver. */
+		if (copy_from_user(ias_opt, optval, optlen)) {
+			kfree(ias_opt);
+			return -EFAULT;
+		}
+
+		/* Find the object we target.
+		 * If the user gives us an empty string, we use the object
+		 * associated with this socket. This will workaround
+		 * duplicated class name - Jean II */
+		if(ias_opt->irda_class_name[0] == '\0')
+			ias_obj = self->ias_obj;
+		else
+			ias_obj = irias_find_object(ias_opt->irda_class_name);
+		if(ias_obj == (struct ias_object *) NULL) {
+			kfree(ias_opt);
+			return -EINVAL;
+		}
+
+		/* Only ROOT can mess with the global IAS database.
+		 * Users can only del attributes from the object associated
+		 * with the socket they own - Jean II */
+		if((!capable(CAP_NET_ADMIN)) &&
+		   ((ias_obj == NULL) || (ias_obj != self->ias_obj))) {
+			kfree(ias_opt);
+			return -EPERM;
+		}
+
+		/* Find the attribute (in the object) we target */
+		ias_attr = irias_find_attrib(ias_obj,
+					     ias_opt->irda_attrib_name);
+		if(ias_attr == (struct ias_attrib *) NULL) {
+			kfree(ias_opt);
+			return -EINVAL;
+		}
+
+		/* Check is the user space own the object */
+		if(ias_attr->value->owner != IAS_USER_ATTR) {
+			IRDA_DEBUG(1, "%s(), attempting to delete a kernel attribute\n", __FUNCTION__);
+			kfree(ias_opt);
+			return -EPERM;
+		}
+
+		/* Remove the attribute (and maybe the object) */
+		irias_delete_attrib(ias_obj, ias_attr, 1);
+		kfree(ias_opt);
+		break;
+	case IRLMP_MAX_SDU_SIZE:
+		if (optlen < sizeof(int))
+			return -EINVAL;
+
+		if (get_user(opt, (int __user *)optval))
+			return -EFAULT;
+
+		/* Only possible for a seqpacket service (TTP with SAR) */
+		if (sk->sk_type != SOCK_SEQPACKET) {
+			IRDA_DEBUG(2, "%s(), setting max_sdu_size = %d\n",
+				   __FUNCTION__, opt);
+			self->max_sdu_size_rx = opt;
+		} else {
+			IRDA_WARNING("%s: not allowed to set MAXSDUSIZE for this socket type!\n",
+				     __FUNCTION__);
+			return -ENOPROTOOPT;
+		}
+		break;
+	case IRLMP_HINTS_SET:
+		if (optlen < sizeof(int))
+			return -EINVAL;
+
+		/* The input is really a (__u8 hints[2]), easier as an int */
+		if (get_user(opt, (int __user *)optval))
+			return -EFAULT;
+
+		/* Unregister any old registration */
+		if (self->skey)
+			irlmp_unregister_service(self->skey);
+
+		self->skey = irlmp_register_service((__u16) opt);
+		break;
+	case IRLMP_HINT_MASK_SET:
+		/* As opposed to the previous case which set the hint bits
+		 * that we advertise, this one set the filter we use when
+		 * making a discovery (nodes which don't match any hint
+		 * bit in the mask are not reported).
+		 */
+		if (optlen < sizeof(int))
+			return -EINVAL;
+
+		/* The input is really a (__u8 hints[2]), easier as an int */
+		if (get_user(opt, (int __user *)optval))
+			return -EFAULT;
+
+		/* Set the new hint mask */
+		self->mask.word = (__u16) opt;
+		/* Mask out extension bits */
+		self->mask.word &= 0x7f7f;
+		/* Check if no bits */
+		if(!self->mask.word)
+			self->mask.word = 0xFFFF;
+
+		break;
+	default:
+		return -ENOPROTOOPT;
+	}
+	return 0;
+}
+
+/*
+ * Function irda_extract_ias_value(ias_opt, ias_value)
+ *
+ *    Translate internal IAS value structure to the user space representation
+ *
+ * The external representation of IAS values, as we exchange them with
+ * user space program is quite different from the internal representation,
+ * as stored in the IAS database (because we need a flat structure for
+ * crossing kernel boundary).
+ * This function transform the former in the latter. We also check
+ * that the value type is valid.
+ */
+static int irda_extract_ias_value(struct irda_ias_set *ias_opt,
+				  struct ias_value *ias_value)
+{
+	/* Look at the type */
+	switch (ias_value->type) {
+	case IAS_INTEGER:
+		/* Copy the integer */
+		ias_opt->attribute.irda_attrib_int = ias_value->t.integer;
+		break;
+	case IAS_OCT_SEQ:
+		/* Set length */
+		ias_opt->attribute.irda_attrib_octet_seq.len = ias_value->len;
+		/* Copy over */
+		memcpy(ias_opt->attribute.irda_attrib_octet_seq.octet_seq,
+		       ias_value->t.oct_seq, ias_value->len);
+		break;
+	case IAS_STRING:
+		/* Set length */
+		ias_opt->attribute.irda_attrib_string.len = ias_value->len;
+		ias_opt->attribute.irda_attrib_string.charset = ias_value->charset;
+		/* Copy over */
+		memcpy(ias_opt->attribute.irda_attrib_string.string,
+		       ias_value->t.string, ias_value->len);
+		/* NULL terminate the string (avoid troubles) */
+		ias_opt->attribute.irda_attrib_string.string[ias_value->len] = '\0';
+		break;
+	case IAS_MISSING:
+	default :
+		return -EINVAL;
+	}
+
+	/* Copy type over */
+	ias_opt->irda_attrib_type = ias_value->type;
+
+	return 0;
+}
+
+/*
+ * Function irda_getsockopt (sock, level, optname, optval, optlen)
+ */
+static int irda_getsockopt(struct socket *sock, int level, int optname,
+			   char __user *optval, int __user *optlen)
+{
+	struct sock *sk = sock->sk;
+	struct irda_sock *self = irda_sk(sk);
+	struct irda_device_list list;
+	struct irda_device_info *discoveries;
+	struct irda_ias_set *	ias_opt;	/* IAS get/query params */
+	struct ias_object *	ias_obj;	/* Object in IAS */
+	struct ias_attrib *	ias_attr;	/* Attribute in IAS object */
+	int daddr = DEV_ADDR_ANY;	/* Dest address for IAS queries */
+	int val = 0;
+	int len = 0;
+	int err;
+	int offset, total;
+
+	IRDA_DEBUG(2, "%s(%p)\n", __FUNCTION__, self);
+
+	if (level != SOL_IRLMP)
+		return -ENOPROTOOPT;
+
+	if (get_user(len, optlen))
+		return -EFAULT;
+
+	if(len < 0)
+		return -EINVAL;
+
+	switch (optname) {
+	case IRLMP_ENUMDEVICES:
+		/* Ask lmp for the current discovery log */
+		discoveries = irlmp_get_discoveries(&list.len, self->mask.word,
+						    self->nslots);
+		/* Check if the we got some results */
+		if (discoveries == NULL)
+			return -EAGAIN;		/* Didn't find any devices */
+		err = 0;
+
+		/* Write total list length back to client */
+		if (copy_to_user(optval, &list,
+				 sizeof(struct irda_device_list) -
+				 sizeof(struct irda_device_info)))
+			err = -EFAULT;
+
+		/* Offset to first device entry */
+		offset = sizeof(struct irda_device_list) -
+			sizeof(struct irda_device_info);
+
+		/* Copy the list itself - watch for overflow */
+		if(list.len > 2048)
+		{
+			err = -EINVAL;
+			goto bed;
+		}
+		total = offset + (list.len * sizeof(struct irda_device_info));
+		if (total > len)
+			total = len;
+		if (copy_to_user(optval+offset, discoveries, total - offset))
+			err = -EFAULT;
+
+		/* Write total number of bytes used back to client */
+		if (put_user(total, optlen))
+			err = -EFAULT;
+bed:
+		/* Free up our buffer */
+		kfree(discoveries);
+		if (err)
+			return err;
+		break;
+	case IRLMP_MAX_SDU_SIZE:
+		val = self->max_data_size;
+		len = sizeof(int);
+		if (put_user(len, optlen))
+			return -EFAULT;
+
+		if (copy_to_user(optval, &val, len))
+			return -EFAULT;
+		break;
+	case IRLMP_IAS_GET:
+		/* The user want an object from our local IAS database.
+		 * We just need to query the IAS and return the value
+		 * that we found */
+
+		/* Check that the user has allocated the right space for us */
+		if (len != sizeof(struct irda_ias_set))
+			return -EINVAL;
+
+		ias_opt = kmalloc(sizeof(struct irda_ias_set), GFP_ATOMIC);
+		if (ias_opt == NULL)
+			return -ENOMEM;
+
+		/* Copy query to the driver. */
+		if (copy_from_user(ias_opt, optval, len)) {
+			kfree(ias_opt);
+			return -EFAULT;
+		}
+
+		/* Find the object we target.
+		 * If the user gives us an empty string, we use the object
+		 * associated with this socket. This will workaround
+		 * duplicated class name - Jean II */
+		if(ias_opt->irda_class_name[0] == '\0')
+			ias_obj = self->ias_obj;
+		else
+			ias_obj = irias_find_object(ias_opt->irda_class_name);
+		if(ias_obj == (struct ias_object *) NULL) {
+			kfree(ias_opt);
+			return -EINVAL;
+		}
+
+		/* Find the attribute (in the object) we target */
+		ias_attr = irias_find_attrib(ias_obj,
+					     ias_opt->irda_attrib_name);
+		if(ias_attr == (struct ias_attrib *) NULL) {
+			kfree(ias_opt);
+			return -EINVAL;
+		}
+
+		/* Translate from internal to user structure */
+		err = irda_extract_ias_value(ias_opt, ias_attr->value);
+		if(err) {
+			kfree(ias_opt);
+			return err;
+		}
+
+		/* Copy reply to the user */
+		if (copy_to_user(optval, ias_opt,
+				 sizeof(struct irda_ias_set))) {
+			kfree(ias_opt);
+			return -EFAULT;
+		}
+		/* Note : don't need to put optlen, we checked it */
+		kfree(ias_opt);
+		break;
+	case IRLMP_IAS_QUERY:
+		/* The user want an object from a remote IAS database.
+		 * We need to use IAP to query the remote database and
+		 * then wait for the answer to come back. */
+
+		/* Check that the user has allocated the right space for us */
+		if (len != sizeof(struct irda_ias_set))
+			return -EINVAL;
+
+		ias_opt = kmalloc(sizeof(struct irda_ias_set), GFP_ATOMIC);
+		if (ias_opt == NULL)
+			return -ENOMEM;
+
+		/* Copy query to the driver. */
+		if (copy_from_user(ias_opt, optval, len)) {
+			kfree(ias_opt);
+			return -EFAULT;
+		}
+
+		/* At this point, there are two cases...
+		 * 1) the socket is connected - that's the easy case, we
+		 *	just query the device we are connected to...
+		 * 2) the socket is not connected - the user doesn't want
+		 *	to connect and/or may not have a valid service name
+		 *	(so can't create a fake connection). In this case,
+		 *	we assume that the user pass us a valid destination
+		 *	address in the requesting structure...
+		 */
+		if(self->daddr != DEV_ADDR_ANY) {
+			/* We are connected - reuse known daddr */
+			daddr = self->daddr;
+		} else {
+			/* We are not connected, we must specify a valid
+			 * destination address */
+			daddr = ias_opt->daddr;
+			if((!daddr) || (daddr == DEV_ADDR_ANY)) {
+				kfree(ias_opt);
+				return -EINVAL;
+			}
+		}
+
+		/* Check that we can proceed with IAP */
+		if (self->iriap) {
+			IRDA_WARNING("%s: busy with a previous query\n",
+				     __FUNCTION__);
+			kfree(ias_opt);
+			return -EBUSY;
+		}
+
+		self->iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self,
+					 irda_getvalue_confirm);
+
+		if (self->iriap == NULL) {
+			kfree(ias_opt);
+			return -ENOMEM;
+		}
+
+		/* Treat unexpected wakeup as disconnect */
+		self->errno = -EHOSTUNREACH;
+
+		/* Query remote LM-IAS */
+		iriap_getvaluebyclass_request(self->iriap,
+					      self->saddr, daddr,
+					      ias_opt->irda_class_name,
+					      ias_opt->irda_attrib_name);
+
+		/* Wait for answer, if not yet finished (or failed) */
+		if (wait_event_interruptible(self->query_wait,
+					     (self->iriap == NULL))) {
+			/* pending request uses copy of ias_opt-content
+			 * we can free it regardless! */
+			kfree(ias_opt);
+			/* Treat signals as disconnect */
+			return -EHOSTUNREACH;
+		}
+
+		/* Check what happened */
+		if (self->errno)
+		{
+			kfree(ias_opt);
+			/* Requested object/attribute doesn't exist */
+			if((self->errno == IAS_CLASS_UNKNOWN) ||
+			   (self->errno == IAS_ATTRIB_UNKNOWN))
+				return (-EADDRNOTAVAIL);
+			else
+				return (-EHOSTUNREACH);
+		}
+
+		/* Translate from internal to user structure */
+		err = irda_extract_ias_value(ias_opt, self->ias_result);
+		if (self->ias_result)
+			irias_delete_value(self->ias_result);
+		if (err) {
+			kfree(ias_opt);
+			return err;
+		}
+
+		/* Copy reply to the user */
+		if (copy_to_user(optval, ias_opt,
+				 sizeof(struct irda_ias_set))) {
+			kfree(ias_opt);
+			return -EFAULT;
+		}
+		/* Note : don't need to put optlen, we checked it */
+		kfree(ias_opt);
+		break;
+	case IRLMP_WAITDEVICE:
+		/* This function is just another way of seeing life ;-)
+		 * IRLMP_ENUMDEVICES assumes that you have a static network,
+		 * and that you just want to pick one of the devices present.
+		 * On the other hand, in here we assume that no device is
+		 * present and that at some point in the future a device will
+		 * come into range. When this device arrive, we just wake
+		 * up the caller, so that he has time to connect to it before
+		 * the device goes away...
+		 * Note : once the node has been discovered for more than a
+		 * few second, it won't trigger this function, unless it
+		 * goes away and come back changes its hint bits (so we
+		 * might call it IRLMP_WAITNEWDEVICE).
+		 */
+
+		/* Check that the user is passing us an int */
+		if (len != sizeof(int))
+			return -EINVAL;
+		/* Get timeout in ms (max time we block the caller) */
+		if (get_user(val, (int __user *)optval))
+			return -EFAULT;
+
+		/* Tell IrLMP we want to be notified */
+		irlmp_update_client(self->ckey, self->mask.word,
+				    irda_selective_discovery_indication,
+				    NULL, (void *) self);
+
+		/* Do some discovery (and also return cached results) */
+		irlmp_discovery_request(self->nslots);
+
+		/* Wait until a node is discovered */
+		if (!self->cachedaddr) {
+			int ret = 0;
+
+			IRDA_DEBUG(1, "%s(), nothing discovered yet, going to sleep...\n", __FUNCTION__);
+
+			/* Set watchdog timer to expire in <val> ms. */
+			self->errno = 0;
+			init_timer(&self->watchdog);
+			self->watchdog.function = irda_discovery_timeout;
+			self->watchdog.data = (unsigned long) self;
+			self->watchdog.expires = jiffies + (val * HZ/1000);
+			add_timer(&(self->watchdog));
+
+			/* Wait for IR-LMP to call us back */
+			__wait_event_interruptible(self->query_wait,
+			      (self->cachedaddr != 0 || self->errno == -ETIME),
+						   ret);
+
+			/* If watchdog is still activated, kill it! */
+			if(timer_pending(&(self->watchdog)))
+				del_timer(&(self->watchdog));
+
+			IRDA_DEBUG(1, "%s(), ...waking up !\n", __FUNCTION__);
+
+			if (ret != 0)
+				return ret;
+		}
+		else
+			IRDA_DEBUG(1, "%s(), found immediately !\n",
+				   __FUNCTION__);
+
+		/* Tell IrLMP that we have been notified */
+		irlmp_update_client(self->ckey, self->mask.word,
+				    NULL, NULL, NULL);
+
+		/* Check if the we got some results */
+		if (!self->cachedaddr)
+			return -EAGAIN;		/* Didn't find any devices */
+		daddr = self->cachedaddr;
+		/* Cleanup */
+		self->cachedaddr = 0;
+
+		/* We return the daddr of the device that trigger the
+		 * wakeup. As irlmp pass us only the new devices, we
+		 * are sure that it's not an old device.
+		 * If the user want more details, he should query
+		 * the whole discovery log and pick one device...
+		 */
+		if (put_user(daddr, (int __user *)optval))
+			return -EFAULT;
+
+		break;
+	default:
+		return -ENOPROTOOPT;
+	}
+
+	return 0;
+}
+
+static struct net_proto_family irda_family_ops = {
+	.family = PF_IRDA,
+	.create = irda_create,
+	.owner	= THIS_MODULE,
+};
+
+static struct proto_ops SOCKOPS_WRAPPED(irda_stream_ops) = {
+	.family =	PF_IRDA,
+	.owner =	THIS_MODULE,
+	.release =	irda_release,
+	.bind =		irda_bind,
+	.connect =	irda_connect,
+	.socketpair =	sock_no_socketpair,
+	.accept =	irda_accept,
+	.getname =	irda_getname,
+	.poll =		irda_poll,
+	.ioctl =	irda_ioctl,
+	.listen =	irda_listen,
+	.shutdown =	irda_shutdown,
+	.setsockopt =	irda_setsockopt,
+	.getsockopt =	irda_getsockopt,
+	.sendmsg =	irda_sendmsg,
+	.recvmsg =	irda_recvmsg_stream,
+	.mmap =		sock_no_mmap,
+	.sendpage =	sock_no_sendpage,
+};
+
+static struct proto_ops SOCKOPS_WRAPPED(irda_seqpacket_ops) = {
+	.family =	PF_IRDA,
+	.owner =	THIS_MODULE,
+	.release =	irda_release,
+	.bind =		irda_bind,
+	.connect =	irda_connect,
+	.socketpair =	sock_no_socketpair,
+	.accept =	irda_accept,
+	.getname =	irda_getname,
+	.poll =		datagram_poll,
+	.ioctl =	irda_ioctl,
+	.listen =	irda_listen,
+	.shutdown =	irda_shutdown,
+	.setsockopt =	irda_setsockopt,
+	.getsockopt =	irda_getsockopt,
+	.sendmsg =	irda_sendmsg,
+	.recvmsg =	irda_recvmsg_dgram,
+	.mmap =		sock_no_mmap,
+	.sendpage =	sock_no_sendpage,
+};
+
+static struct proto_ops SOCKOPS_WRAPPED(irda_dgram_ops) = {
+	.family =	PF_IRDA,
+	.owner =	THIS_MODULE,
+	.release =	irda_release,
+	.bind =		irda_bind,
+	.connect =	irda_connect,
+	.socketpair =	sock_no_socketpair,
+	.accept =	irda_accept,
+	.getname =	irda_getname,
+	.poll =		datagram_poll,
+	.ioctl =	irda_ioctl,
+	.listen =	irda_listen,
+	.shutdown =	irda_shutdown,
+	.setsockopt =	irda_setsockopt,
+	.getsockopt =	irda_getsockopt,
+	.sendmsg =	irda_sendmsg_dgram,
+	.recvmsg =	irda_recvmsg_dgram,
+	.mmap =		sock_no_mmap,
+	.sendpage =	sock_no_sendpage,
+};
+
+#ifdef CONFIG_IRDA_ULTRA
+static struct proto_ops SOCKOPS_WRAPPED(irda_ultra_ops) = {
+	.family =	PF_IRDA,
+	.owner =	THIS_MODULE,
+	.release =	irda_release,
+	.bind =		irda_bind,
+	.connect =	sock_no_connect,
+	.socketpair =	sock_no_socketpair,
+	.accept =	sock_no_accept,
+	.getname =	irda_getname,
+	.poll =		datagram_poll,
+	.ioctl =	irda_ioctl,
+	.listen =	sock_no_listen,
+	.shutdown =	irda_shutdown,
+	.setsockopt =	irda_setsockopt,
+	.getsockopt =	irda_getsockopt,
+	.sendmsg =	irda_sendmsg_ultra,
+	.recvmsg =	irda_recvmsg_dgram,
+	.mmap =		sock_no_mmap,
+	.sendpage =	sock_no_sendpage,
+};
+#endif /* CONFIG_IRDA_ULTRA */
+
+#include <linux/smp_lock.h>
+SOCKOPS_WRAP(irda_stream, PF_IRDA);
+SOCKOPS_WRAP(irda_seqpacket, PF_IRDA);
+SOCKOPS_WRAP(irda_dgram, PF_IRDA);
+#ifdef CONFIG_IRDA_ULTRA
+SOCKOPS_WRAP(irda_ultra, PF_IRDA);
+#endif /* CONFIG_IRDA_ULTRA */
+
+/*
+ * Function irsock_init (pro)
+ *
+ *    Initialize IrDA protocol
+ *
+ */
+int __init irsock_init(void)
+{
+	int rc = proto_register(&irda_proto, 0);
+
+	if (rc == 0)
+		rc = sock_register(&irda_family_ops);
+
+	return rc;
+}
+
+/*
+ * Function irsock_cleanup (void)
+ *
+ *    Remove IrDA protocol
+ *
+ */
+void __exit irsock_cleanup(void)
+{
+	sock_unregister(PF_IRDA);
+	proto_unregister(&irda_proto);
+}
diff --git a/net/irda/discovery.c b/net/irda/discovery.c
new file mode 100644
index 00000000000..c4ba5fa1446
--- /dev/null
+++ b/net/irda/discovery.c
@@ -0,0 +1,419 @@
+/*********************************************************************
+ *                
+ * Filename:      discovery.c
+ * Version:       0.1
+ * Description:   Routines for handling discoveries at the IrLMP layer
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Tue Apr  6 15:33:50 1999
+ * Modified at:   Sat Oct  9 17:11:31 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * Modified at:   Fri May 28  3:11 CST 1999
+ * Modified by:   Horst von Brand <vonbrand@sleipnir.valparaiso.cl>
+ * 
+ *     Copyright (c) 1999 Dag Brattli, All Rights Reserved.
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ * 
+ *     This program is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *     GNU General Public License for more details.
+ * 
+ *     You should have received a copy of the GNU General Public License 
+ *     along with this program; if not, write to the Free Software 
+ *     Foundation, Inc., 59 Temple Place, Suite 330, Boston, 
+ *     MA 02111-1307 USA
+ *     
+ ********************************************************************/
+
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/seq_file.h>
+
+#include <net/irda/irda.h>
+#include <net/irda/irlmp.h>
+
+#include <net/irda/discovery.h>
+
+/*
+ * Function irlmp_add_discovery (cachelog, discovery)
+ *
+ *    Add a new discovery to the cachelog, and remove any old discoveries
+ *    from the same device
+ *
+ * Note : we try to preserve the time this device was *first* discovered
+ * (as opposed to the time of last discovery used for cleanup). This is
+ * used by clients waiting for discovery events to tell if the device
+ * discovered is "new" or just the same old one. They can't rely there
+ * on a binary flag (new/old), because not all discovery events are
+ * propagated to them, and they might not always listen, so they would
+ * miss some new devices popping up...
+ * Jean II
+ */
+void irlmp_add_discovery(hashbin_t *cachelog, discovery_t *new)
+{
+	discovery_t *discovery, *node;
+	unsigned long flags;
+
+	/* Set time of first discovery if node is new (see below) */
+	new->firststamp = new->timestamp;
+
+	spin_lock_irqsave(&cachelog->hb_spinlock, flags);
+
+	/* 
+	 * Remove all discoveries of devices that has previously been 
+	 * discovered on the same link with the same name (info), or the 
+	 * same daddr. We do this since some devices (mostly PDAs) change
+	 * their device address between every discovery.
+	 */
+	discovery = (discovery_t *) hashbin_get_first(cachelog);
+	while (discovery != NULL ) {
+		node = discovery;
+
+		/* Be sure to stay one item ahead */
+		discovery = (discovery_t *) hashbin_get_next(cachelog);
+
+		if ((node->data.saddr == new->data.saddr) &&
+		    ((node->data.daddr == new->data.daddr) || 
+		     (strcmp(node->data.info, new->data.info) == 0)))
+		{
+			/* This discovery is a previous discovery 
+			 * from the same device, so just remove it
+			 */
+			hashbin_remove_this(cachelog, (irda_queue_t *) node);
+			/* Check if hints bits are unchanged */
+			if(u16ho(node->data.hints) == u16ho(new->data.hints))
+				/* Set time of first discovery for this node */
+				new->firststamp = node->firststamp;
+			kfree(node);
+		}
+	}
+
+	/* Insert the new and updated version */
+	hashbin_insert(cachelog, (irda_queue_t *) new, new->data.daddr, NULL);
+
+	spin_unlock_irqrestore(&cachelog->hb_spinlock, flags);
+}
+
+/*
+ * Function irlmp_add_discovery_log (cachelog, log)
+ *
+ *    Merge a disovery log into the cachelog.
+ *
+ */
+void irlmp_add_discovery_log(hashbin_t *cachelog, hashbin_t *log)
+{
+	discovery_t *discovery;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	/*
+	 *  If log is missing this means that IrLAP was unable to perform the
+	 *  discovery, so restart discovery again with just the half timeout
+	 *  of the normal one.
+	 */
+	/* Well... It means that there was nobody out there - Jean II */
+	if (log == NULL) {
+		/* irlmp_start_discovery_timer(irlmp, 150); */
+		return;
+	}
+
+	/*
+	 * Locking : we are the only owner of this discovery log, so
+	 * no need to lock it.
+	 * We just need to lock the global log in irlmp_add_discovery().
+	 */
+	discovery = (discovery_t *) hashbin_remove_first(log);
+	while (discovery != NULL) {
+		irlmp_add_discovery(cachelog, discovery);
+
+		discovery = (discovery_t *) hashbin_remove_first(log);
+	}
+	
+	/* Delete the now empty log */
+	hashbin_delete(log, (FREE_FUNC) kfree);
+}
+
+/*
+ * Function irlmp_expire_discoveries (log, saddr, force)
+ *
+ *    Go through all discoveries and expire all that has stayed too long
+ *
+ * Note : this assume that IrLAP won't change its saddr, which
+ * currently is a valid assumption...
+ */
+void irlmp_expire_discoveries(hashbin_t *log, __u32 saddr, int force)
+{
+	discovery_t *		discovery;
+	discovery_t *		curr;
+	unsigned long		flags;
+	discinfo_t *		buffer = NULL;
+	int			n;		/* Size of the full log */
+	int			i = 0;		/* How many we expired */
+
+	IRDA_ASSERT(log != NULL, return;);
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	spin_lock_irqsave(&log->hb_spinlock, flags);
+
+	discovery = (discovery_t *) hashbin_get_first(log);
+	while (discovery != NULL) {
+		/* Be sure to be one item ahead */
+		curr = discovery;
+		discovery = (discovery_t *) hashbin_get_next(log);
+
+		/* Test if it's time to expire this discovery */
+		if ((curr->data.saddr == saddr) &&
+		    (force ||
+		     ((jiffies - curr->timestamp) > DISCOVERY_EXPIRE_TIMEOUT)))
+		{
+			/* Create buffer as needed.
+			 * As this function get called a lot and most time
+			 * we don't have anything to put in the log (we are
+			 * quite picky), we can save a lot of overhead
+			 * by not calling kmalloc. Jean II */
+			if(buffer == NULL) {
+				/* Create the client specific buffer */
+				n = HASHBIN_GET_SIZE(log);
+				buffer = kmalloc(n * sizeof(struct irda_device_info), GFP_ATOMIC);
+				if (buffer == NULL) {
+					spin_unlock_irqrestore(&log->hb_spinlock, flags);
+					return;
+				}
+
+			}
+
+			/* Copy discovery information */
+			memcpy(&(buffer[i]), &(curr->data),
+			       sizeof(discinfo_t));
+			i++;
+
+			/* Remove it from the log */
+			curr = hashbin_remove_this(log, (irda_queue_t *) curr);
+			if (curr)
+				kfree(curr);
+		}
+	}
+
+	/* Drop the spinlock before calling the higher layers, as
+	 * we can't guarantee they won't call us back and create a
+	 * deadlock. We will work on our own private data, so we
+	 * don't care to be interupted. - Jean II */
+	spin_unlock_irqrestore(&log->hb_spinlock, flags);
+
+	if(buffer == NULL)
+		return;
+
+	/* Tell IrLMP and registered clients about it */
+	irlmp_discovery_expiry(buffer, i);
+
+	/* Free up our buffer */
+	kfree(buffer);
+}
+
+#if 0
+/*
+ * Function irlmp_dump_discoveries (log)
+ *
+ *    Print out all discoveries in log
+ *
+ */
+void irlmp_dump_discoveries(hashbin_t *log)
+{
+	discovery_t *discovery;
+
+	IRDA_ASSERT(log != NULL, return;);
+
+	discovery = (discovery_t *) hashbin_get_first(log);
+	while (discovery != NULL) {
+		IRDA_DEBUG(0, "Discovery:\n");
+		IRDA_DEBUG(0, "  daddr=%08x\n", discovery->data.daddr);
+		IRDA_DEBUG(0, "  saddr=%08x\n", discovery->data.saddr); 
+		IRDA_DEBUG(0, "  nickname=%s\n", discovery->data.info);
+
+		discovery = (discovery_t *) hashbin_get_next(log);
+	}
+}
+#endif
+
+/*
+ * Function irlmp_copy_discoveries (log, pn, mask)
+ *
+ *    Copy all discoveries in a buffer
+ *
+ * This function implement a safe way for lmp clients to access the
+ * discovery log. The basic problem is that we don't want the log
+ * to change (add/remove) while the client is reading it. If the
+ * lmp client manipulate directly the hashbin, he is sure to get
+ * into troubles...
+ * The idea is that we copy all the current discovery log in a buffer
+ * which is specific to the client and pass this copy to him. As we
+ * do this operation with the spinlock grabbed, we are safe...
+ * Note : we don't want those clients to grab the spinlock, because
+ * we have no control on how long they will hold it...
+ * Note : we choose to copy the log in "struct irda_device_info" to
+ * save space...
+ * Note : the client must kfree himself() the log...
+ * Jean II
+ */
+struct irda_device_info *irlmp_copy_discoveries(hashbin_t *log, int *pn,
+						__u16 mask, int old_entries)
+{
+	discovery_t *		discovery;
+	unsigned long		flags;
+	discinfo_t *		buffer = NULL;
+	int			j_timeout = (sysctl_discovery_timeout * HZ);
+	int			n;		/* Size of the full log */
+	int			i = 0;		/* How many we picked */
+
+	IRDA_ASSERT(pn != NULL, return NULL;);
+	IRDA_ASSERT(log != NULL, return NULL;);
+
+	/* Save spin lock */
+	spin_lock_irqsave(&log->hb_spinlock, flags);
+
+	discovery = (discovery_t *) hashbin_get_first(log);
+	while (discovery != NULL) {
+		/* Mask out the ones we don't want :
+		 * We want to match the discovery mask, and to get only
+		 * the most recent one (unless we want old ones) */
+		if ((u16ho(discovery->data.hints) & mask) &&
+		    ((old_entries) ||
+		     ((jiffies - discovery->firststamp) < j_timeout)) ) {
+			/* Create buffer as needed.
+			 * As this function get called a lot and most time
+			 * we don't have anything to put in the log (we are
+			 * quite picky), we can save a lot of overhead
+			 * by not calling kmalloc. Jean II */
+			if(buffer == NULL) {
+				/* Create the client specific buffer */
+				n = HASHBIN_GET_SIZE(log);
+				buffer = kmalloc(n * sizeof(struct irda_device_info), GFP_ATOMIC);
+				if (buffer == NULL) {
+					spin_unlock_irqrestore(&log->hb_spinlock, flags);
+					return NULL;
+				}
+
+			}
+
+			/* Copy discovery information */
+			memcpy(&(buffer[i]), &(discovery->data),
+			       sizeof(discinfo_t));
+			i++;
+		}
+		discovery = (discovery_t *) hashbin_get_next(log);
+	}
+
+	spin_unlock_irqrestore(&log->hb_spinlock, flags);
+
+	/* Get the actual number of device in the buffer and return */
+	*pn = i;
+	return(buffer);
+}
+
+#ifdef CONFIG_PROC_FS
+static inline discovery_t *discovery_seq_idx(loff_t pos)
+
+{
+	discovery_t *discovery;
+
+	for (discovery = (discovery_t *) hashbin_get_first(irlmp->cachelog); 
+	     discovery != NULL;
+	     discovery = (discovery_t *) hashbin_get_next(irlmp->cachelog)) {
+		if (pos-- == 0)
+			break;
+	}
+		
+	return discovery;
+}
+
+static void *discovery_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	spin_lock_irq(&irlmp->cachelog->hb_spinlock);
+        return *pos ? discovery_seq_idx(*pos - 1) : SEQ_START_TOKEN;
+}
+
+static void *discovery_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	++*pos;
+	return (v == SEQ_START_TOKEN) 
+		? (void *) hashbin_get_first(irlmp->cachelog)
+		: (void *) hashbin_get_next(irlmp->cachelog);
+}
+
+static void discovery_seq_stop(struct seq_file *seq, void *v)
+{
+	spin_unlock_irq(&irlmp->cachelog->hb_spinlock);
+}
+
+static int discovery_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq, "IrLMP: Discovery log:\n\n");
+	else {
+		const discovery_t *discovery = v;
+
+		seq_printf(seq, "nickname: %s, hint: 0x%02x%02x", 
+			   discovery->data.info,
+			   discovery->data.hints[0], 
+			   discovery->data.hints[1]);
+#if 0
+		if ( discovery->data.hints[0] & HINT_PNP)
+			seq_puts(seq, "PnP Compatible ");
+		if ( discovery->data.hints[0] & HINT_PDA)
+			seq_puts(seq, "PDA/Palmtop ");
+		if ( discovery->data.hints[0] & HINT_COMPUTER)
+			seq_puts(seq, "Computer ");
+		if ( discovery->data.hints[0] & HINT_PRINTER)
+			seq_puts(seq, "Printer ");
+		if ( discovery->data.hints[0] & HINT_MODEM)
+			seq_puts(seq, "Modem ");
+		if ( discovery->data.hints[0] & HINT_FAX)
+			seq_puts(seq, "Fax ");
+		if ( discovery->data.hints[0] & HINT_LAN)
+			seq_puts(seq, "LAN Access ");
+		
+		if ( discovery->data.hints[1] & HINT_TELEPHONY)
+			seq_puts(seq, "Telephony ");
+		if ( discovery->data.hints[1] & HINT_FILE_SERVER)
+			seq_puts(seq, "File Server ");       
+		if ( discovery->data.hints[1] & HINT_COMM)
+			seq_puts(seq, "IrCOMM ");
+		if ( discovery->data.hints[1] & HINT_OBEX)
+			seq_puts(seq, "IrOBEX ");
+#endif		
+		seq_printf(seq,", saddr: 0x%08x, daddr: 0x%08x\n\n",
+			       discovery->data.saddr,
+			       discovery->data.daddr);
+		
+		seq_putc(seq, '\n');
+	}
+	return 0;
+}
+
+static struct seq_operations discovery_seq_ops = {
+	.start  = discovery_seq_start,
+	.next   = discovery_seq_next,
+	.stop   = discovery_seq_stop,
+	.show   = discovery_seq_show,
+};
+
+static int discovery_seq_open(struct inode *inode, struct file *file)
+{
+	IRDA_ASSERT(irlmp != NULL, return -EINVAL;);
+
+	return seq_open(file, &discovery_seq_ops);
+}
+
+struct file_operations discovery_seq_fops = {
+	.owner		= THIS_MODULE,
+	.open           = discovery_seq_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release	= seq_release,
+};
+#endif
diff --git a/net/irda/ircomm/Kconfig b/net/irda/ircomm/Kconfig
new file mode 100644
index 00000000000..2d4c6b4a78d
--- /dev/null
+++ b/net/irda/ircomm/Kconfig
@@ -0,0 +1,12 @@
+config IRCOMM
+	tristate "IrCOMM protocol"
+	depends on IRDA
+	help
+	  Say Y here if you want to build support for the IrCOMM protocol.
+	  To compile it as modules, choose M here: the modules will be
+	  called ircomm and ircomm_tty.
+	  IrCOMM implements serial port emulation, and makes it possible to
+	  use all existing applications that understands TTY's with an
+	  infrared link.  Thus you should be able to use application like PPP,
+	  minicom and others.
+
diff --git a/net/irda/ircomm/Makefile b/net/irda/ircomm/Makefile
new file mode 100644
index 00000000000..48689458c08
--- /dev/null
+++ b/net/irda/ircomm/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile for the Linux IrDA IrCOMM protocol layer.
+#
+
+obj-$(CONFIG_IRCOMM) += ircomm.o ircomm-tty.o
+
+ircomm-objs := ircomm_core.o ircomm_event.o ircomm_lmp.o ircomm_ttp.o
+ircomm-tty-objs := ircomm_tty.o ircomm_tty_attach.o ircomm_tty_ioctl.o ircomm_param.o
diff --git a/net/irda/ircomm/ircomm_core.c b/net/irda/ircomm/ircomm_core.c
new file mode 100644
index 00000000000..28688197885
--- /dev/null
+++ b/net/irda/ircomm/ircomm_core.c
@@ -0,0 +1,587 @@
+/*********************************************************************
+ *                
+ * Filename:      ircomm_core.c
+ * Version:       1.0
+ * Description:   IrCOMM service interface
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Sun Jun  6 20:37:34 1999
+ * Modified at:   Tue Dec 21 13:26:41 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1999 Dag Brattli, All Rights Reserved.
+ *     Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ * 
+ *     This program is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *     GNU General Public License for more details.
+ * 
+ *     You should have received a copy of the GNU General Public License 
+ *     along with this program; if not, write to the Free Software 
+ *     Foundation, Inc., 59 Temple Place, Suite 330, Boston, 
+ *     MA 02111-1307 USA
+ *     
+ ********************************************************************/
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+
+#include <net/irda/irda.h>
+#include <net/irda/irmod.h>
+#include <net/irda/irlmp.h>
+#include <net/irda/iriap.h>
+#include <net/irda/irttp.h>
+#include <net/irda/irias_object.h>
+
+#include <net/irda/ircomm_event.h>
+#include <net/irda/ircomm_lmp.h>
+#include <net/irda/ircomm_ttp.h>
+#include <net/irda/ircomm_param.h>
+#include <net/irda/ircomm_core.h>
+
+static int __ircomm_close(struct ircomm_cb *self);
+static void ircomm_control_indication(struct ircomm_cb *self, 
+				      struct sk_buff *skb, int clen);
+
+#ifdef CONFIG_PROC_FS
+extern struct proc_dir_entry *proc_irda;
+static int ircomm_seq_open(struct inode *, struct file *);
+
+static struct file_operations ircomm_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open           = ircomm_seq_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release	= seq_release,
+};
+#endif /* CONFIG_PROC_FS */
+
+hashbin_t *ircomm = NULL;
+
+static int __init ircomm_init(void)
+{
+	ircomm = hashbin_new(HB_LOCK); 
+	if (ircomm == NULL) {
+		IRDA_ERROR("%s(), can't allocate hashbin!\n", __FUNCTION__);
+		return -ENOMEM;
+	}
+	
+#ifdef CONFIG_PROC_FS
+	{ struct proc_dir_entry *ent;
+	ent = create_proc_entry("ircomm", 0, proc_irda);
+	if (ent) 
+		ent->proc_fops = &ircomm_proc_fops;
+	}
+#endif /* CONFIG_PROC_FS */
+	
+	IRDA_MESSAGE("IrCOMM protocol (Dag Brattli)\n");
+		
+	return 0;
+}
+
+static void __exit ircomm_cleanup(void)
+{
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	hashbin_delete(ircomm, (FREE_FUNC) __ircomm_close);
+
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("ircomm", proc_irda);
+#endif /* CONFIG_PROC_FS */
+}
+
+/*
+ * Function ircomm_open (client_notify)
+ *
+ *    Start a new IrCOMM instance
+ *
+ */
+struct ircomm_cb *ircomm_open(notify_t *notify, __u8 service_type, int line)
+{
+	struct ircomm_cb *self = NULL;
+	int ret;
+
+	IRDA_DEBUG(2, "%s(), service_type=0x%02x\n", __FUNCTION__ ,
+		   service_type);
+
+	IRDA_ASSERT(ircomm != NULL, return NULL;);
+
+	self = kmalloc(sizeof(struct ircomm_cb), GFP_ATOMIC);
+	if (self == NULL)
+		return NULL;
+
+	memset(self, 0, sizeof(struct ircomm_cb));
+
+	self->notify = *notify;
+	self->magic = IRCOMM_MAGIC;
+
+	/* Check if we should use IrLMP or IrTTP */
+	if (service_type & IRCOMM_3_WIRE_RAW) {
+		self->flow_status = FLOW_START;
+		ret = ircomm_open_lsap(self);
+	} else
+		ret = ircomm_open_tsap(self);
+
+	if (ret < 0) {
+		kfree(self);
+		return NULL;
+	}
+
+	self->service_type = service_type;
+	self->line = line;
+
+	hashbin_insert(ircomm, (irda_queue_t *) self, line, NULL);
+
+	ircomm_next_state(self, IRCOMM_IDLE);	
+
+	return self;
+}
+
+EXPORT_SYMBOL(ircomm_open);
+
+/*
+ * Function ircomm_close_instance (self)
+ *
+ *    Remove IrCOMM instance
+ *
+ */
+static int __ircomm_close(struct ircomm_cb *self)
+{
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	/* Disconnect link if any */
+	ircomm_do_event(self, IRCOMM_DISCONNECT_REQUEST, NULL, NULL);
+
+	/* Remove TSAP */
+	if (self->tsap) {
+		irttp_close_tsap(self->tsap);
+		self->tsap = NULL;
+	}
+
+	/* Remove LSAP */
+	if (self->lsap) {
+		irlmp_close_lsap(self->lsap);
+		self->lsap = NULL;
+	}
+	self->magic = 0;
+
+	kfree(self);
+
+	return 0;
+}
+
+/*
+ * Function ircomm_close (self)
+ *
+ *    Closes and removes the specified IrCOMM instance
+ *
+ */
+int ircomm_close(struct ircomm_cb *self)
+{
+	struct ircomm_cb *entry;
+
+	IRDA_ASSERT(self != NULL, return -EIO;);
+	IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return -EIO;);
+
+	IRDA_DEBUG(0, "%s()\n", __FUNCTION__ );
+
+	entry = hashbin_remove(ircomm, self->line, NULL);
+
+	IRDA_ASSERT(entry == self, return -1;);
+	
+        return __ircomm_close(self);
+}
+
+EXPORT_SYMBOL(ircomm_close);
+
+/*
+ * Function ircomm_connect_request (self, service_type)
+ *
+ *    Impl. of this function is differ from one of the reference. This
+ *    function does discovery as well as sending connect request
+ * 
+ */
+int ircomm_connect_request(struct ircomm_cb *self, __u8 dlsap_sel, 
+			   __u32 saddr, __u32 daddr, struct sk_buff *skb,
+			   __u8 service_type)
+{
+	struct ircomm_info info;
+	int ret;
+
+	IRDA_DEBUG(2 , "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return -1;);
+
+	self->service_type= service_type;
+
+	info.dlsap_sel = dlsap_sel;
+	info.saddr = saddr;
+	info.daddr = daddr;
+
+	ret = ircomm_do_event(self, IRCOMM_CONNECT_REQUEST, skb, &info);
+
+	return ret;
+}
+
+EXPORT_SYMBOL(ircomm_connect_request);
+
+/*
+ * Function ircomm_connect_indication (self, qos, skb)
+ *
+ *    Notify user layer about the incoming connection
+ *
+ */
+void ircomm_connect_indication(struct ircomm_cb *self, struct sk_buff *skb,
+			       struct ircomm_info *info)
+{
+	int clen = 0;
+	
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	/* Check if the packet contains data on the control channel */
+	if (skb->len > 0)
+		clen = skb->data[0];
+	
+	/* 
+	 * If there are any data hiding in the control channel, we must 
+	 * deliver it first. The side effect is that the control channel 
+	 * will be removed from the skb
+	 */
+	if (self->notify.connect_indication)
+		self->notify.connect_indication(self->notify.instance, self, 
+						info->qos, info->max_data_size,
+						info->max_header_size, skb);
+	else {
+		IRDA_DEBUG(0, "%s(), missing handler\n", __FUNCTION__ );
+	}
+}
+
+/*
+ * Function ircomm_connect_response (self, userdata, max_sdu_size)
+ *
+ *    User accepts connection
+ *
+ */
+int ircomm_connect_response(struct ircomm_cb *self, struct sk_buff *userdata)
+{
+	int ret;
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return -1;);
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+
+	ret = ircomm_do_event(self, IRCOMM_CONNECT_RESPONSE, userdata, NULL);
+
+	return ret;
+}	
+
+EXPORT_SYMBOL(ircomm_connect_response);
+
+/*
+ * Function connect_confirm (self, skb)
+ *
+ *    Notify user layer that the link is now connected
+ *
+ */
+void ircomm_connect_confirm(struct ircomm_cb *self, struct sk_buff *skb,
+			    struct ircomm_info *info)
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+
+	if (self->notify.connect_confirm )
+		self->notify.connect_confirm(self->notify.instance,
+					     self, info->qos, 
+					     info->max_data_size,
+					     info->max_header_size, skb);
+	else {
+		IRDA_DEBUG(0, "%s(), missing handler\n", __FUNCTION__ );
+	}
+}
+
+/*
+ * Function ircomm_data_request (self, userdata)
+ *
+ *    Send IrCOMM data to peer device
+ *
+ */
+int ircomm_data_request(struct ircomm_cb *self, struct sk_buff *skb)
+{
+	int ret;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return -EFAULT;);
+	IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return -EFAULT;);
+	IRDA_ASSERT(skb != NULL, return -EFAULT;);
+	
+	ret = ircomm_do_event(self, IRCOMM_DATA_REQUEST, skb, NULL);
+
+	return ret;
+}
+
+EXPORT_SYMBOL(ircomm_data_request);
+
+/*
+ * Function ircomm_data_indication (self, skb)
+ *
+ *    Data arrived, so deliver it to user
+ *
+ */
+void ircomm_data_indication(struct ircomm_cb *self, struct sk_buff *skb)
+{	
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(skb->len > 0, return;);
+
+	if (self->notify.data_indication)
+		self->notify.data_indication(self->notify.instance, self, skb);
+	else {
+		IRDA_DEBUG(0, "%s(), missing handler\n", __FUNCTION__ );
+	}
+}
+
+/*
+ * Function ircomm_process_data (self, skb)
+ *
+ *    Data arrived which may contain control channel data
+ *
+ */
+void ircomm_process_data(struct ircomm_cb *self, struct sk_buff *skb)
+{
+	int clen;
+
+	IRDA_ASSERT(skb->len > 0, return;);
+
+	clen = skb->data[0];
+
+	/* 
+	 * If there are any data hiding in the control channel, we must 
+	 * deliver it first. The side effect is that the control channel 
+	 * will be removed from the skb
+	 */
+	if (clen > 0)
+		ircomm_control_indication(self, skb, clen);
+
+	/* Remove control channel from data channel */
+	skb_pull(skb, clen+1);
+
+	if (skb->len)
+		ircomm_data_indication(self, skb);		
+	else {
+		IRDA_DEBUG(4, "%s(), data was control info only!\n",
+			   __FUNCTION__ );
+	}
+}
+
+/*
+ * Function ircomm_control_request (self, params)
+ *
+ *    Send control data to peer device
+ *
+ */
+int ircomm_control_request(struct ircomm_cb *self, struct sk_buff *skb)
+{
+	int ret;
+	
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return -EFAULT;);
+	IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return -EFAULT;);
+	IRDA_ASSERT(skb != NULL, return -EFAULT;);
+	
+	ret = ircomm_do_event(self, IRCOMM_CONTROL_REQUEST, skb, NULL);
+
+	return ret;
+}
+
+EXPORT_SYMBOL(ircomm_control_request);
+
+/*
+ * Function ircomm_control_indication (self, skb)
+ *
+ *    Data has arrived on the control channel
+ *
+ */
+static void ircomm_control_indication(struct ircomm_cb *self, 
+				      struct sk_buff *skb, int clen)
+{
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );	
+
+	/* Use udata for delivering data on the control channel */
+	if (self->notify.udata_indication) {
+		struct sk_buff *ctrl_skb;
+
+		/* We don't own the skb, so clone it */
+		ctrl_skb = skb_clone(skb, GFP_ATOMIC);
+		if (!ctrl_skb)
+			return;
+
+		/* Remove data channel from control channel */
+		skb_trim(ctrl_skb, clen+1);
+	
+		self->notify.udata_indication(self->notify.instance, self, 
+					      ctrl_skb);
+
+		/* Drop reference count -
+		 * see ircomm_tty_control_indication(). */
+		dev_kfree_skb(ctrl_skb);
+	} else {
+		IRDA_DEBUG(0, "%s(), missing handler\n", __FUNCTION__ );
+	}
+}
+
+/*
+ * Function ircomm_disconnect_request (self, userdata, priority)
+ *
+ *    User layer wants to disconnect the IrCOMM connection
+ *
+ */
+int ircomm_disconnect_request(struct ircomm_cb *self, struct sk_buff *userdata)
+{
+	struct ircomm_info info;
+	int ret;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return -1;);
+
+	ret = ircomm_do_event(self, IRCOMM_DISCONNECT_REQUEST, userdata, 
+			      &info);
+	return ret;
+}
+
+EXPORT_SYMBOL(ircomm_disconnect_request);
+
+/*
+ * Function disconnect_indication (self, skb)
+ *
+ *    Tell user that the link has been disconnected
+ *
+ */
+void ircomm_disconnect_indication(struct ircomm_cb *self, struct sk_buff *skb,
+				  struct ircomm_info *info)
+{
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+       
+	IRDA_ASSERT(info != NULL, return;);
+
+	if (self->notify.disconnect_indication) {
+		self->notify.disconnect_indication(self->notify.instance, self,
+						   info->reason, skb);
+	} else {
+		IRDA_DEBUG(0, "%s(), missing handler\n", __FUNCTION__ );
+	}
+}
+
+/*
+ * Function ircomm_flow_request (self, flow)
+ *
+ *    
+ *
+ */
+void ircomm_flow_request(struct ircomm_cb *self, LOCAL_FLOW flow)
+{
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return;);
+
+	if (self->service_type == IRCOMM_3_WIRE_RAW)
+		return;
+
+	irttp_flow_request(self->tsap, flow);
+}
+
+EXPORT_SYMBOL(ircomm_flow_request);
+
+#ifdef CONFIG_PROC_FS
+static void *ircomm_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct ircomm_cb *self;
+	loff_t off = 0;
+
+	spin_lock_irq(&ircomm->hb_spinlock);
+
+	for (self = (struct ircomm_cb *) hashbin_get_first(ircomm);
+	     self != NULL;
+	     self = (struct ircomm_cb *) hashbin_get_next(ircomm)) {
+		if (off++ == *pos)
+			break;
+		
+	}
+	return self;
+}
+
+static void *ircomm_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	++*pos;
+
+	return (void *) hashbin_get_next(ircomm);
+}
+
+static void ircomm_seq_stop(struct seq_file *seq, void *v)
+{
+	spin_unlock_irq(&ircomm->hb_spinlock);
+}
+
+static int ircomm_seq_show(struct seq_file *seq, void *v)
+{ 	
+	const struct ircomm_cb *self = v;
+
+	IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return -EINVAL; );
+
+	if(self->line < 0x10)
+		seq_printf(seq, "ircomm%d", self->line);
+	else
+		seq_printf(seq, "irlpt%d", self->line - 0x10);
+
+	seq_printf(seq,
+		   " state: %s, slsap_sel: %#02x, dlsap_sel: %#02x, mode:",
+		   ircomm_state[ self->state],
+		   self->slsap_sel, self->dlsap_sel); 
+
+	if(self->service_type & IRCOMM_3_WIRE_RAW)
+		seq_printf(seq, " 3-wire-raw");
+	if(self->service_type & IRCOMM_3_WIRE)
+		seq_printf(seq, " 3-wire");
+	if(self->service_type & IRCOMM_9_WIRE)
+		seq_printf(seq, " 9-wire");
+	if(self->service_type & IRCOMM_CENTRONICS)
+		seq_printf(seq, " Centronics");
+	seq_putc(seq, '\n');
+
+	return 0;
+}
+
+static struct seq_operations ircomm_seq_ops = {
+	.start  = ircomm_seq_start,
+	.next   = ircomm_seq_next,
+	.stop   = ircomm_seq_stop,
+	.show   = ircomm_seq_show,
+};
+
+static int ircomm_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &ircomm_seq_ops);
+}
+#endif /* CONFIG_PROC_FS */
+
+MODULE_AUTHOR("Dag Brattli <dag@brattli.net>");
+MODULE_DESCRIPTION("IrCOMM protocol");
+MODULE_LICENSE("GPL");
+
+module_init(ircomm_init);
+module_exit(ircomm_cleanup);
diff --git a/net/irda/ircomm/ircomm_event.c b/net/irda/ircomm/ircomm_event.c
new file mode 100644
index 00000000000..01f4e801a1b
--- /dev/null
+++ b/net/irda/ircomm/ircomm_event.c
@@ -0,0 +1,251 @@
+/*********************************************************************
+ *                
+ * Filename:      ircomm_event.c
+ * Version:       1.0
+ * Description:   IrCOMM layer state machine
+ * Status:        Stable
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Sun Jun  6 20:33:11 1999
+ * Modified at:   Sun Dec 12 13:44:32 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1999 Dag Brattli, All Rights Reserved.
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ * 
+ *     This program is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *     GNU General Public License for more details.
+ * 
+ *     You should have received a copy of the GNU General Public License 
+ *     along with this program; if not, write to the Free Software 
+ *     Foundation, Inc., 59 Temple Place, Suite 330, Boston, 
+ *     MA 02111-1307 USA
+ *     
+ ********************************************************************/
+
+#include <linux/sched.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+
+#include <net/irda/irda.h>
+#include <net/irda/irlmp.h>
+#include <net/irda/iriap.h>
+#include <net/irda/irttp.h>
+#include <net/irda/irias_object.h>
+
+#include <net/irda/ircomm_core.h>
+#include <net/irda/ircomm_event.h>
+
+static int ircomm_state_idle(struct ircomm_cb *self, IRCOMM_EVENT event, 
+			     struct sk_buff *skb, struct ircomm_info *info);
+static int ircomm_state_waiti(struct ircomm_cb *self, IRCOMM_EVENT event, 
+			      struct sk_buff *skb, struct ircomm_info *info);
+static int ircomm_state_waitr(struct ircomm_cb *self, IRCOMM_EVENT event, 
+			      struct sk_buff *skb, struct ircomm_info *info);
+static int ircomm_state_conn(struct ircomm_cb *self, IRCOMM_EVENT event, 
+			     struct sk_buff *skb, struct ircomm_info *info);
+
+char *ircomm_state[] = {
+	"IRCOMM_IDLE",
+	"IRCOMM_WAITI",
+	"IRCOMM_WAITR",
+	"IRCOMM_CONN",
+};
+
+#ifdef CONFIG_IRDA_DEBUG
+static char *ircomm_event[] = {
+	"IRCOMM_CONNECT_REQUEST",
+        "IRCOMM_CONNECT_RESPONSE",
+        "IRCOMM_TTP_CONNECT_INDICATION",
+	"IRCOMM_LMP_CONNECT_INDICATION",
+        "IRCOMM_TTP_CONNECT_CONFIRM",
+	"IRCOMM_LMP_CONNECT_CONFIRM",
+
+        "IRCOMM_LMP_DISCONNECT_INDICATION",
+	"IRCOMM_TTP_DISCONNECT_INDICATION",
+        "IRCOMM_DISCONNECT_REQUEST",
+
+        "IRCOMM_TTP_DATA_INDICATION",
+	"IRCOMM_LMP_DATA_INDICATION",
+        "IRCOMM_DATA_REQUEST",
+        "IRCOMM_CONTROL_REQUEST",
+        "IRCOMM_CONTROL_INDICATION",
+};
+#endif /* CONFIG_IRDA_DEBUG */
+
+static int (*state[])(struct ircomm_cb *self, IRCOMM_EVENT event,
+		      struct sk_buff *skb, struct ircomm_info *info) = 
+{
+	ircomm_state_idle,
+	ircomm_state_waiti,
+	ircomm_state_waitr,
+	ircomm_state_conn,
+};
+
+/*
+ * Function ircomm_state_idle (self, event, skb)
+ *
+ *    IrCOMM is currently idle
+ *
+ */
+static int ircomm_state_idle(struct ircomm_cb *self, IRCOMM_EVENT event, 
+			     struct sk_buff *skb, struct ircomm_info *info)
+{
+	int ret = 0;
+
+	switch (event) {
+	case IRCOMM_CONNECT_REQUEST:
+		ircomm_next_state(self, IRCOMM_WAITI);		
+		ret = self->issue.connect_request(self, skb, info);
+		break;
+	case IRCOMM_TTP_CONNECT_INDICATION:
+	case IRCOMM_LMP_CONNECT_INDICATION:
+		ircomm_next_state(self, IRCOMM_WAITR);
+		ircomm_connect_indication(self, skb, info);
+		break;
+	default:
+		IRDA_DEBUG(4, "%s(), unknown event: %s\n", __FUNCTION__ ,
+			   ircomm_event[event]);
+		ret = -EINVAL;
+	}
+	return ret;
+}
+
+/*
+ * Function ircomm_state_waiti (self, event, skb)
+ *
+ *    The IrCOMM user has requested an IrCOMM connection to the remote 
+ *    device and is awaiting confirmation
+ */
+static int ircomm_state_waiti(struct ircomm_cb *self, IRCOMM_EVENT event, 
+			      struct sk_buff *skb, struct ircomm_info *info)
+{
+	int ret = 0;
+
+	switch (event) {
+	case IRCOMM_TTP_CONNECT_CONFIRM:
+	case IRCOMM_LMP_CONNECT_CONFIRM:
+		ircomm_next_state(self, IRCOMM_CONN);
+		ircomm_connect_confirm(self, skb, info);
+		break;
+	case IRCOMM_TTP_DISCONNECT_INDICATION:
+	case IRCOMM_LMP_DISCONNECT_INDICATION:
+		ircomm_next_state(self, IRCOMM_IDLE);
+		ircomm_disconnect_indication(self, skb, info);
+		break;
+	default:
+		IRDA_DEBUG(0, "%s(), unknown event: %s\n", __FUNCTION__ ,
+			   ircomm_event[event]);
+		ret = -EINVAL;
+	}
+	return ret;
+}
+
+/*
+ * Function ircomm_state_waitr (self, event, skb)
+ *
+ *    IrCOMM has received an incoming connection request and is awaiting
+ *    response from the user
+ */
+static int ircomm_state_waitr(struct ircomm_cb *self, IRCOMM_EVENT event, 
+			      struct sk_buff *skb, struct ircomm_info *info) 
+{
+	int ret = 0;
+
+	switch (event) {
+	case IRCOMM_CONNECT_RESPONSE:
+		ircomm_next_state(self, IRCOMM_CONN);
+		ret = self->issue.connect_response(self, skb);
+		break;
+	case IRCOMM_DISCONNECT_REQUEST:
+		ircomm_next_state(self, IRCOMM_IDLE);
+		ret = self->issue.disconnect_request(self, skb, info);
+		break;
+	case IRCOMM_TTP_DISCONNECT_INDICATION:
+	case IRCOMM_LMP_DISCONNECT_INDICATION:
+		ircomm_next_state(self, IRCOMM_IDLE);
+		ircomm_disconnect_indication(self, skb, info);
+		break;
+	default:
+		IRDA_DEBUG(0, "%s(), unknown event = %s\n", __FUNCTION__ ,
+			   ircomm_event[event]);
+		ret = -EINVAL;
+	}
+	return ret;
+}
+
+/*
+ * Function ircomm_state_conn (self, event, skb)
+ *
+ *    IrCOMM is connected to the peer IrCOMM device
+ *
+ */
+static int ircomm_state_conn(struct ircomm_cb *self, IRCOMM_EVENT event, 
+			     struct sk_buff *skb, struct ircomm_info *info)
+{
+	int ret = 0;
+
+	switch (event) {
+	case IRCOMM_DATA_REQUEST:
+		ret = self->issue.data_request(self, skb, 0);
+		break;
+	case IRCOMM_TTP_DATA_INDICATION:
+		ircomm_process_data(self, skb);
+		break;
+	case IRCOMM_LMP_DATA_INDICATION:
+		ircomm_data_indication(self, skb);
+		break;
+	case IRCOMM_CONTROL_REQUEST:
+		/* Just send a separate frame for now */
+		ret = self->issue.data_request(self, skb, skb->len);
+		break;
+	case IRCOMM_TTP_DISCONNECT_INDICATION:
+	case IRCOMM_LMP_DISCONNECT_INDICATION:
+		ircomm_next_state(self, IRCOMM_IDLE);
+		ircomm_disconnect_indication(self, skb, info);
+		break;
+	case IRCOMM_DISCONNECT_REQUEST:
+		ircomm_next_state(self, IRCOMM_IDLE);
+		ret = self->issue.disconnect_request(self, skb, info);
+		break;
+	default:
+		IRDA_DEBUG(0, "%s(), unknown event = %s\n", __FUNCTION__ ,
+			   ircomm_event[event]);
+		ret = -EINVAL;
+	}
+	return ret;
+}
+
+/*
+ * Function ircomm_do_event (self, event, skb)
+ *
+ *    Process event
+ *
+ */
+int ircomm_do_event(struct ircomm_cb *self, IRCOMM_EVENT event,
+		    struct sk_buff *skb, struct ircomm_info *info) 
+{
+	IRDA_DEBUG(4, "%s: state=%s, event=%s\n", __FUNCTION__ ,
+		   ircomm_state[self->state], ircomm_event[event]);
+
+	return (*state[self->state])(self, event, skb, info);
+}
+
+/*
+ * Function ircomm_next_state (self, state)
+ *
+ *    Switch state
+ *
+ */
+void ircomm_next_state(struct ircomm_cb *self, IRCOMM_STATE state)
+{
+	self->state = state;
+	
+	IRDA_DEBUG(4, "%s: next state=%s, service type=%d\n", __FUNCTION__ , 
+		   ircomm_state[self->state], self->service_type);
+}
diff --git a/net/irda/ircomm/ircomm_lmp.c b/net/irda/ircomm/ircomm_lmp.c
new file mode 100644
index 00000000000..d9097207aed
--- /dev/null
+++ b/net/irda/ircomm/ircomm_lmp.c
@@ -0,0 +1,372 @@
+/*********************************************************************
+ *                
+ * Filename:      ircomm_lmp.c
+ * Version:       1.0
+ * Description:   Interface between IrCOMM and IrLMP
+ * Status:        Stable
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Sun Jun  6 20:48:27 1999
+ * Modified at:   Sun Dec 12 13:44:17 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * Sources:       Previous IrLPT work by Thomas Davis
+ * 
+ *     Copyright (c) 1999 Dag Brattli, All Rights Reserved.
+ *     Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ * 
+ *     This program is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *     GNU General Public License for more details.
+ * 
+ *     You should have received a copy of the GNU General Public License 
+ *     along with this program; if not, write to the Free Software 
+ *     Foundation, Inc., 59 Temple Place, Suite 330, Boston, 
+ *     MA 02111-1307 USA
+ *     
+ ********************************************************************/
+
+#include <linux/sched.h>
+#include <linux/init.h>
+
+#include <net/irda/irda.h>
+#include <net/irda/irlmp.h>
+#include <net/irda/iriap.h>
+#include <net/irda/irda_device.h>	/* struct irda_skb_cb */
+
+#include <net/irda/ircomm_event.h>
+#include <net/irda/ircomm_lmp.h>
+
+
+/*
+ * Function ircomm_lmp_connect_request (self, userdata)
+ *
+ *    
+ *
+ */
+static int ircomm_lmp_connect_request(struct ircomm_cb *self, 
+				      struct sk_buff *userdata, 
+				      struct ircomm_info *info)
+{
+	int ret = 0;
+
+	IRDA_DEBUG(0, "%s()\n", __FUNCTION__ );
+
+	/* Don't forget to refcount it - should be NULL anyway */
+	if(userdata)
+		skb_get(userdata);
+
+	ret = irlmp_connect_request(self->lsap, info->dlsap_sel,
+				    info->saddr, info->daddr, NULL, userdata); 
+	return ret;
+}	
+
+/*
+ * Function ircomm_lmp_connect_response (self, skb)
+ *
+ *    
+ *
+ */
+static int ircomm_lmp_connect_response(struct ircomm_cb *self,
+				       struct sk_buff *userdata)
+{
+	struct sk_buff *tx_skb;
+	int ret;
+
+	IRDA_DEBUG(0, "%s()\n", __FUNCTION__ );
+	
+	/* Any userdata supplied? */
+	if (userdata == NULL) {
+		tx_skb = dev_alloc_skb(64);
+		if (!tx_skb)
+			return -ENOMEM;
+
+		/* Reserve space for MUX and LAP header */
+		skb_reserve(tx_skb, LMP_MAX_HEADER);
+	} else {
+		/*  
+		 *  Check that the client has reserved enough space for 
+		 *  headers
+		 */
+		IRDA_ASSERT(skb_headroom(userdata) >= LMP_MAX_HEADER,
+			    return -1;);
+
+		/* Don't forget to refcount it - should be NULL anyway */
+		skb_get(userdata);
+		tx_skb = userdata;
+	}
+
+	ret = irlmp_connect_response(self->lsap, tx_skb);
+
+	return 0;
+}
+
+static int ircomm_lmp_disconnect_request(struct ircomm_cb *self, 
+					 struct sk_buff *userdata, 
+					 struct ircomm_info *info)
+{
+        struct sk_buff *tx_skb;
+	int ret;
+
+	IRDA_DEBUG(0, "%s()\n", __FUNCTION__ );
+
+        if (!userdata) {
+		tx_skb = dev_alloc_skb(64);
+		if (!tx_skb)
+			return -ENOMEM;
+		
+		/*  Reserve space for MUX and LAP header */
+		skb_reserve(tx_skb, LMP_MAX_HEADER);		
+		userdata = tx_skb;
+	} else {
+		/* Don't forget to refcount it - should be NULL anyway */
+		skb_get(userdata);
+	}
+
+	ret = irlmp_disconnect_request(self->lsap, userdata);
+
+	return ret;
+}
+
+/*
+ * Function ircomm_lmp_flow_control (skb)
+ *
+ *    This function is called when a data frame we have sent to IrLAP has
+ *    been deallocated. We do this to make sure we don't flood IrLAP with 
+ *    frames, since we are not using the IrTTP flow control mechanism
+ */
+static void ircomm_lmp_flow_control(struct sk_buff *skb)
+{
+	struct irda_skb_cb *cb;
+	struct ircomm_cb *self;
+	int line;
+
+	IRDA_ASSERT(skb != NULL, return;);
+
+	cb = (struct irda_skb_cb *) skb->cb;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+ 
+        line = cb->line;
+
+	self = (struct ircomm_cb *) hashbin_lock_find(ircomm, line, NULL);
+        if (!self) {
+		IRDA_DEBUG(2, "%s(), didn't find myself\n", __FUNCTION__ );
+                return;
+	}
+
+        IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return;);
+
+	self->pkt_count--;
+
+        if ((self->pkt_count < 2) && (self->flow_status == FLOW_STOP)) {
+                IRDA_DEBUG(2, "%s(), asking TTY to start again!\n", __FUNCTION__ );
+                self->flow_status = FLOW_START;
+                if (self->notify.flow_indication)
+                        self->notify.flow_indication(self->notify.instance, 
+						     self, FLOW_START);
+        }
+}
+    
+/*
+ * Function ircomm_lmp_data_request (self, userdata)
+ *
+ *    Send data frame to peer device
+ *
+ */
+static int ircomm_lmp_data_request(struct ircomm_cb *self,
+				   struct sk_buff *skb, 
+				   int not_used)
+{
+	struct irda_skb_cb *cb;
+	int ret;
+
+	IRDA_ASSERT(skb != NULL, return -1;);
+
+	cb = (struct irda_skb_cb *) skb->cb;
+	
+        cb->line = self->line;
+
+	IRDA_DEBUG(4, "%s(), sending frame\n", __FUNCTION__ );
+
+	/* Don't forget to refcount it - see ircomm_tty_do_softint() */
+	skb_get(skb);
+
+	skb->destructor = ircomm_lmp_flow_control;
+
+        if ((self->pkt_count++ > 7) && (self->flow_status == FLOW_START)) {
+		IRDA_DEBUG(2, "%s(), asking TTY to slow down!\n", __FUNCTION__ );
+	        self->flow_status = FLOW_STOP;
+                if (self->notify.flow_indication)
+             	        self->notify.flow_indication(self->notify.instance, 
+				                     self, FLOW_STOP);
+        }
+	ret = irlmp_data_request(self->lsap, skb);
+	if (ret) {
+		IRDA_ERROR("%s(), failed\n", __FUNCTION__);
+		/* irlmp_data_request already free the packet */
+	}
+
+	return ret;
+}
+
+/*
+ * Function ircomm_lmp_data_indication (instance, sap, skb)
+ *
+ *    Incoming data which we must deliver to the state machine, to check
+ *    we are still connected.
+ */
+static int ircomm_lmp_data_indication(void *instance, void *sap,
+				      struct sk_buff *skb)
+{
+	struct ircomm_cb *self = (struct ircomm_cb *) instance;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+	
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return -1;);
+	IRDA_ASSERT(skb != NULL, return -1;);
+	
+	ircomm_do_event(self, IRCOMM_LMP_DATA_INDICATION, skb, NULL);
+
+	/* Drop reference count - see ircomm_tty_data_indication(). */
+	dev_kfree_skb(skb);
+
+	return 0;
+}
+
+/*
+ * Function ircomm_lmp_connect_confirm (instance, sap, qos, max_sdu_size, 
+ *                                       max_header_size, skb)
+ *
+ *    Connection has been confirmed by peer device
+ *
+ */
+static void ircomm_lmp_connect_confirm(void *instance, void *sap,
+				       struct qos_info *qos, 
+				       __u32 max_seg_size, 
+				       __u8 max_header_size,
+				       struct sk_buff *skb)
+{
+	struct ircomm_cb *self = (struct ircomm_cb *) instance;
+	struct ircomm_info info;
+
+	IRDA_DEBUG(0, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return;);
+	IRDA_ASSERT(skb != NULL, return;);
+	IRDA_ASSERT(qos != NULL, return;);
+
+	info.max_data_size = max_seg_size;
+	info.max_header_size = max_header_size;
+	info.qos = qos;
+
+	ircomm_do_event(self, IRCOMM_LMP_CONNECT_CONFIRM, skb, &info);
+
+	/* Drop reference count - see ircomm_tty_connect_confirm(). */
+	dev_kfree_skb(skb);
+}
+
+/*
+ * Function ircomm_lmp_connect_indication (instance, sap, qos, max_sdu_size,
+ *                                         max_header_size, skb)
+ *
+ *    Peer device wants to make a connection with us
+ *
+ */
+static void ircomm_lmp_connect_indication(void *instance, void *sap,
+					  struct qos_info *qos,
+					  __u32 max_seg_size,
+					  __u8 max_header_size,
+					  struct sk_buff *skb)
+{
+	struct ircomm_cb *self = (struct ircomm_cb *)instance;
+	struct ircomm_info info;
+
+	IRDA_DEBUG(0, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return;);
+	IRDA_ASSERT(skb != NULL, return;);
+	IRDA_ASSERT(qos != NULL, return;);
+
+	info.max_data_size = max_seg_size;
+	info.max_header_size = max_header_size;
+	info.qos = qos;
+
+	ircomm_do_event(self, IRCOMM_LMP_CONNECT_INDICATION, skb, &info);
+
+	/* Drop reference count - see ircomm_tty_connect_indication(). */
+	dev_kfree_skb(skb);
+}
+
+/*
+ * Function ircomm_lmp_disconnect_indication (instance, sap, reason, skb)
+ *
+ *    Peer device has closed the connection, or the link went down for some
+ *    other reason
+ */
+static void ircomm_lmp_disconnect_indication(void *instance, void *sap, 
+					     LM_REASON reason,
+					     struct sk_buff *skb)
+{
+	struct ircomm_cb *self = (struct ircomm_cb *) instance;
+	struct ircomm_info info;
+
+	IRDA_DEBUG(0, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return;);
+
+	info.reason = reason;
+
+	ircomm_do_event(self, IRCOMM_LMP_DISCONNECT_INDICATION, skb, &info);
+
+	/* Drop reference count - see ircomm_tty_disconnect_indication(). */
+	if(skb)
+		dev_kfree_skb(skb);
+}
+/*
+ * Function ircomm_open_lsap (self)
+ *
+ *    Open LSAP. This function will only be used when using "raw" services
+ *
+ */
+int ircomm_open_lsap(struct ircomm_cb *self)
+{
+	notify_t notify;
+
+	IRDA_DEBUG(0, "%s()\n", __FUNCTION__ );
+
+	/* Register callbacks */
+	irda_notify_init(&notify);
+	notify.data_indication       = ircomm_lmp_data_indication;
+	notify.connect_confirm       = ircomm_lmp_connect_confirm;
+	notify.connect_indication    = ircomm_lmp_connect_indication;
+	notify.disconnect_indication = ircomm_lmp_disconnect_indication;
+	notify.instance = self;
+	strlcpy(notify.name, "IrCOMM", sizeof(notify.name));
+
+	self->lsap = irlmp_open_lsap(LSAP_ANY, &notify, 0);
+	if (!self->lsap) {
+		IRDA_DEBUG(0,"%sfailed to allocate tsap\n", __FUNCTION__ );
+		return -1;
+	}
+	self->slsap_sel = self->lsap->slsap_sel;
+
+	/*
+	 *  Initialize the call-table for issuing commands
+	 */
+	self->issue.data_request       = ircomm_lmp_data_request;
+	self->issue.connect_request    = ircomm_lmp_connect_request;
+	self->issue.connect_response   = ircomm_lmp_connect_response;
+	self->issue.disconnect_request = ircomm_lmp_disconnect_request;
+
+	return 0;
+}
diff --git a/net/irda/ircomm/ircomm_param.c b/net/irda/ircomm/ircomm_param.c
new file mode 100644
index 00000000000..6009bab0509
--- /dev/null
+++ b/net/irda/ircomm/ircomm_param.c
@@ -0,0 +1,511 @@
+/*********************************************************************
+ *                
+ * Filename:      ircomm_param.c
+ * Version:       1.0
+ * Description:   Parameter handling for the IrCOMM protocol
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Mon Jun  7 10:25:11 1999
+ * Modified at:   Sun Jan 30 14:32:03 2000
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved.
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ * 
+ *     This program is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *     GNU General Public License for more details.
+ * 
+ *     You should have received a copy of the GNU General Public License 
+ *     along with this program; if not, write to the Free Software 
+ *     Foundation, Inc., 59 Temple Place, Suite 330, Boston, 
+ *     MA 02111-1307 USA
+ *     
+ ********************************************************************/
+
+#include <linux/sched.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+
+#include <net/irda/irda.h>
+#include <net/irda/parameters.h>
+
+#include <net/irda/ircomm_core.h>
+#include <net/irda/ircomm_tty_attach.h>
+#include <net/irda/ircomm_tty.h>
+
+#include <net/irda/ircomm_param.h>
+
+static int ircomm_param_service_type(void *instance, irda_param_t *param, 
+				     int get);
+static int ircomm_param_port_type(void *instance, irda_param_t *param, 
+				  int get);
+static int ircomm_param_port_name(void *instance, irda_param_t *param, 
+				  int get);
+static int ircomm_param_service_type(void *instance, irda_param_t *param, 
+				     int get);
+static int ircomm_param_data_rate(void *instance, irda_param_t *param, 
+				  int get);
+static int ircomm_param_data_format(void *instance, irda_param_t *param, 
+				    int get);
+static int ircomm_param_flow_control(void *instance, irda_param_t *param, 
+				     int get);
+static int ircomm_param_xon_xoff(void *instance, irda_param_t *param, int get);
+static int ircomm_param_enq_ack(void *instance, irda_param_t *param, int get);
+static int ircomm_param_line_status(void *instance, irda_param_t *param, 
+				    int get);
+static int ircomm_param_dte(void *instance, irda_param_t *param, int get);
+static int ircomm_param_dce(void *instance, irda_param_t *param, int get);
+static int ircomm_param_poll(void *instance, irda_param_t *param, int get);
+
+static pi_minor_info_t pi_minor_call_table_common[] = {
+	{ ircomm_param_service_type, PV_INT_8_BITS },
+	{ ircomm_param_port_type,    PV_INT_8_BITS },
+	{ ircomm_param_port_name,    PV_STRING }
+};
+static pi_minor_info_t pi_minor_call_table_non_raw[] = {
+	{ ircomm_param_data_rate,    PV_INT_32_BITS | PV_BIG_ENDIAN },
+	{ ircomm_param_data_format,  PV_INT_8_BITS },
+	{ ircomm_param_flow_control, PV_INT_8_BITS },
+	{ ircomm_param_xon_xoff,     PV_INT_16_BITS },
+	{ ircomm_param_enq_ack,      PV_INT_16_BITS },
+	{ ircomm_param_line_status,  PV_INT_8_BITS }
+};
+static pi_minor_info_t pi_minor_call_table_9_wire[] = {
+	{ ircomm_param_dte,          PV_INT_8_BITS },
+	{ ircomm_param_dce,          PV_INT_8_BITS },
+	{ ircomm_param_poll,         PV_NO_VALUE },
+};
+
+static pi_major_info_t pi_major_call_table[] = {
+	{ pi_minor_call_table_common,  3 },
+	{ pi_minor_call_table_non_raw, 6 },
+ 	{ pi_minor_call_table_9_wire,  3 }
+/* 	{ pi_minor_call_table_centronics }  */
+};
+
+pi_param_info_t ircomm_param_info = { pi_major_call_table, 3, 0x0f, 4 };
+
+/*
+ * Function ircomm_param_request (self, pi, flush)
+ *
+ *    Queue a parameter for the control channel
+ *
+ */
+int ircomm_param_request(struct ircomm_tty_cb *self, __u8 pi, int flush)
+{
+	struct tty_struct *tty;
+	unsigned long flags;
+	struct sk_buff *skb;
+	int count;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
+
+	tty = self->tty;
+	if (!tty)
+		return 0;
+
+	/* Make sure we don't send parameters for raw mode */
+	if (self->service_type == IRCOMM_3_WIRE_RAW)
+		return 0;
+
+	spin_lock_irqsave(&self->spinlock, flags);
+
+	skb = self->ctrl_skb;	
+	if (!skb) {
+		skb = dev_alloc_skb(256);
+		if (!skb) {
+			spin_unlock_irqrestore(&self->spinlock, flags);
+			return -ENOMEM;
+		}
+		
+		skb_reserve(skb, self->max_header_size);
+		self->ctrl_skb = skb;
+	}
+	/* 
+	 * Inserting is a little bit tricky since we don't know how much
+	 * room we will need. But this should hopefully work OK 
+	 */
+	count = irda_param_insert(self, pi, skb->tail, skb_tailroom(skb),
+				  &ircomm_param_info);
+	if (count < 0) {
+		IRDA_WARNING("%s(), no room for parameter!\n", __FUNCTION__);
+		spin_unlock_irqrestore(&self->spinlock, flags);
+		return -1;
+	}
+	skb_put(skb, count);
+
+	spin_unlock_irqrestore(&self->spinlock, flags);
+
+	IRDA_DEBUG(2, "%s(), skb->len=%d\n", __FUNCTION__ , skb->len);
+
+	if (flush) {
+		/* ircomm_tty_do_softint will take care of the rest */
+		schedule_work(&self->tqueue);
+	}
+
+	return count;
+}
+
+/*
+ * Function ircomm_param_service_type (self, buf, len)
+ *
+ *    Handle service type, this function will both be called after the LM-IAS
+ *    query and then the remote device sends its initial parameters
+ *
+ */
+static int ircomm_param_service_type(void *instance, irda_param_t *param, 
+				     int get)
+{
+	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
+	__u8 service_type = (__u8) param->pv.i;
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
+
+	if (get) {
+		param->pv.i = self->settings.service_type;
+		return 0;
+	}
+
+	/* Find all common service types */
+	service_type &= self->service_type;
+	if (!service_type) {
+		IRDA_DEBUG(2, 
+			   "%s(), No common service type to use!\n", __FUNCTION__ );
+		return -1;
+	}
+	IRDA_DEBUG(0, "%s(), services in common=%02x\n", __FUNCTION__ ,
+		   service_type);
+
+	/*
+	 * Now choose a preferred service type of those available
+	 */
+	if (service_type & IRCOMM_CENTRONICS)
+		self->settings.service_type = IRCOMM_CENTRONICS;
+	else if (service_type & IRCOMM_9_WIRE)
+		self->settings.service_type = IRCOMM_9_WIRE;
+	else if (service_type & IRCOMM_3_WIRE)
+		self->settings.service_type = IRCOMM_3_WIRE;
+	else if (service_type & IRCOMM_3_WIRE_RAW)
+		self->settings.service_type = IRCOMM_3_WIRE_RAW;
+
+	IRDA_DEBUG(0, "%s(), resulting service type=0x%02x\n", __FUNCTION__ , 
+		   self->settings.service_type);
+
+	/* 
+	 * Now the line is ready for some communication. Check if we are a
+         * server, and send over some initial parameters.
+	 * Client do it in ircomm_tty_state_setup().
+	 * Note : we may get called from ircomm_tty_getvalue_confirm(),
+	 * therefore before we even have open any socket. And self->client
+	 * is initialised to TRUE only later. So, we check if the link is
+	 * really initialised. - Jean II
+	 */
+	if ((self->max_header_size != IRCOMM_TTY_HDR_UNINITIALISED) &&
+	    (!self->client) &&
+	    (self->settings.service_type != IRCOMM_3_WIRE_RAW))
+	{
+		/* Init connection */
+		ircomm_tty_send_initial_parameters(self);
+		ircomm_tty_link_established(self);
+	}
+
+	return 0;
+}
+
+/*
+ * Function ircomm_param_port_type (self, param)
+ *
+ *    The port type parameter tells if the devices are serial or parallel.
+ *    Since we only advertise serial service, this parameter should only
+ *    be equal to IRCOMM_SERIAL.
+ */
+static int ircomm_param_port_type(void *instance, irda_param_t *param, int get)
+{
+	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
+	
+	if (get)
+		param->pv.i = IRCOMM_SERIAL;
+	else {
+		self->settings.port_type = (__u8) param->pv.i;
+
+		IRDA_DEBUG(0, "%s(), port type=%d\n", __FUNCTION__ , 
+			   self->settings.port_type);
+	}
+	return 0;
+}
+
+/*
+ * Function ircomm_param_port_name (self, param)
+ *
+ *    Exchange port name
+ *
+ */
+static int ircomm_param_port_name(void *instance, irda_param_t *param, int get)
+{
+	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
+	
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
+
+	if (get) {
+		IRDA_DEBUG(0, "%s(), not imp!\n", __FUNCTION__ );
+	} else {
+		IRDA_DEBUG(0, "%s(), port-name=%s\n", __FUNCTION__ , param->pv.c);
+		strncpy(self->settings.port_name, param->pv.c, 32);
+	}
+
+	return 0;
+}
+
+/*
+ * Function ircomm_param_data_rate (self, param)
+ *
+ *    Exchange data rate to be used in this settings
+ *
+ */
+static int ircomm_param_data_rate(void *instance, irda_param_t *param, int get)
+{
+	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
+	
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
+
+	if (get)
+		param->pv.i = self->settings.data_rate;
+	else
+		self->settings.data_rate = param->pv.i;
+	
+	IRDA_DEBUG(2, "%s(), data rate = %d\n", __FUNCTION__ , param->pv.i);
+
+	return 0;
+}
+
+/*
+ * Function ircomm_param_data_format (self, param)
+ *
+ *    Exchange data format to be used in this settings
+ *
+ */
+static int ircomm_param_data_format(void *instance, irda_param_t *param, 
+				    int get)
+{
+	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
+
+	if (get)
+		param->pv.i = self->settings.data_format;
+	else
+		self->settings.data_format = (__u8) param->pv.i;
+	
+	return 0;
+}
+
+/*
+ * Function ircomm_param_flow_control (self, param)
+ *
+ *    Exchange flow control settings to be used in this settings
+ *
+ */
+static int ircomm_param_flow_control(void *instance, irda_param_t *param, 
+				     int get)
+{
+	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
+	
+	if (get)
+		param->pv.i = self->settings.flow_control;
+	else
+		self->settings.flow_control = (__u8) param->pv.i;
+
+	IRDA_DEBUG(1, "%s(), flow control = 0x%02x\n", __FUNCTION__ , (__u8) param->pv.i);
+
+	return 0;
+}
+
+/*
+ * Function ircomm_param_xon_xoff (self, param)
+ *
+ *    Exchange XON/XOFF characters
+ *
+ */
+static int ircomm_param_xon_xoff(void *instance, irda_param_t *param, int get)
+{
+	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
+	
+	if (get) {
+		param->pv.i = self->settings.xonxoff[0];
+		param->pv.i |= self->settings.xonxoff[1] << 8;
+	} else {
+		self->settings.xonxoff[0] = (__u16) param->pv.i & 0xff;
+		self->settings.xonxoff[1] = (__u16) param->pv.i >> 8;
+	}
+
+	IRDA_DEBUG(0, "%s(), XON/XOFF = 0x%02x,0x%02x\n", __FUNCTION__ , 
+		   param->pv.i & 0xff, param->pv.i >> 8);
+
+	return 0;
+}
+
+/*
+ * Function ircomm_param_enq_ack (self, param)
+ *
+ *    Exchange ENQ/ACK characters
+ *
+ */
+static int ircomm_param_enq_ack(void *instance, irda_param_t *param, int get)
+{
+	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
+	
+	if (get) {
+		param->pv.i = self->settings.enqack[0];
+		param->pv.i |= self->settings.enqack[1] << 8;
+	} else {
+		self->settings.enqack[0] = (__u16) param->pv.i & 0xff;
+		self->settings.enqack[1] = (__u16) param->pv.i >> 8;
+	}
+
+	IRDA_DEBUG(0, "%s(), ENQ/ACK = 0x%02x,0x%02x\n", __FUNCTION__ ,
+		   param->pv.i & 0xff, param->pv.i >> 8);
+
+	return 0;
+}
+
+/*
+ * Function ircomm_param_line_status (self, param)
+ *
+ *    
+ *
+ */
+static int ircomm_param_line_status(void *instance, irda_param_t *param, 
+				    int get)
+{
+	IRDA_DEBUG(2, "%s(), not impl.\n", __FUNCTION__ );
+
+	return 0;
+}
+
+/*
+ * Function ircomm_param_dte (instance, param)
+ *
+ *    If we get here, there must be some sort of null-modem connection, and
+ *    we are probably working in server mode as well.
+ */
+static int ircomm_param_dte(void *instance, irda_param_t *param, int get)
+{
+	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
+	__u8 dte;
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
+
+	if (get)
+		param->pv.i = self->settings.dte;
+	else {
+		dte = (__u8) param->pv.i;
+
+		self->settings.dce = 0;
+				
+		if (dte & IRCOMM_DELTA_DTR)
+			self->settings.dce |= (IRCOMM_DELTA_DSR|
+					      IRCOMM_DELTA_RI |
+					      IRCOMM_DELTA_CD);
+		if (dte & IRCOMM_DTR)
+			self->settings.dce |= (IRCOMM_DSR|
+					      IRCOMM_RI |
+					      IRCOMM_CD);
+		
+		if (dte & IRCOMM_DELTA_RTS)
+			self->settings.dce |= IRCOMM_DELTA_CTS;
+		if (dte & IRCOMM_RTS)
+			self->settings.dce |= IRCOMM_CTS;
+
+		/* Take appropriate actions */
+		ircomm_tty_check_modem_status(self);
+
+		/* Null modem cable emulator */
+		self->settings.null_modem = TRUE;
+	}
+
+	return 0;
+}
+
+/*
+ * Function ircomm_param_dce (instance, param)
+ *
+ *    
+ *
+ */
+static int ircomm_param_dce(void *instance, irda_param_t *param, int get)
+{
+	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
+	__u8 dce;
+
+	IRDA_DEBUG(1, "%s(), dce = 0x%02x\n", __FUNCTION__ , (__u8) param->pv.i);
+
+	dce = (__u8) param->pv.i;
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
+
+	self->settings.dce = dce;
+
+	/* Check if any of the settings have changed */
+	if (dce & 0x0f) {
+		if (dce & IRCOMM_DELTA_CTS) {
+			IRDA_DEBUG(2, "%s(), CTS \n", __FUNCTION__ );
+		}
+	}
+
+	ircomm_tty_check_modem_status(self);
+
+	return 0;
+}
+
+/*
+ * Function ircomm_param_poll (instance, param)
+ *
+ *    Called when the peer device is polling for the line settings
+ *
+ */
+static int ircomm_param_poll(void *instance, irda_param_t *param, int get)
+{
+	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
+
+	/* Poll parameters are always of lenght 0 (just a signal) */
+	if (!get) {
+		/* Respond with DTE line settings */
+		ircomm_param_request(self, IRCOMM_DTE, TRUE);
+	}
+	return 0;
+}
+
+
+
+
+
diff --git a/net/irda/ircomm/ircomm_ttp.c b/net/irda/ircomm/ircomm_ttp.c
new file mode 100644
index 00000000000..d98bf3570d2
--- /dev/null
+++ b/net/irda/ircomm/ircomm_ttp.c
@@ -0,0 +1,369 @@
+/*********************************************************************
+ *                
+ * Filename:      ircomm_ttp.c
+ * Version:       1.0
+ * Description:   Interface between IrCOMM and IrTTP
+ * Status:        Stable
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Sun Jun  6 20:48:27 1999
+ * Modified at:   Mon Dec 13 11:35:13 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1999 Dag Brattli, All Rights Reserved.
+ *     Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ * 
+ *     This program is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *     GNU General Public License for more details.
+ * 
+ *     You should have received a copy of the GNU General Public License 
+ *     along with this program; if not, write to the Free Software 
+ *     Foundation, Inc., 59 Temple Place, Suite 330, Boston, 
+ *     MA 02111-1307 USA
+ *     
+ ********************************************************************/
+
+#include <linux/sched.h>
+#include <linux/init.h>
+
+#include <net/irda/irda.h>
+#include <net/irda/irlmp.h>
+#include <net/irda/iriap.h>
+#include <net/irda/irttp.h>
+
+#include <net/irda/ircomm_event.h>
+#include <net/irda/ircomm_ttp.h>
+
+static int ircomm_ttp_data_indication(void *instance, void *sap,
+				      struct sk_buff *skb);
+static void ircomm_ttp_connect_confirm(void *instance, void *sap,
+				       struct qos_info *qos, 
+				       __u32 max_sdu_size, 
+				       __u8 max_header_size,
+				       struct sk_buff *skb);
+static void ircomm_ttp_connect_indication(void *instance, void *sap,
+					  struct qos_info *qos,
+					  __u32 max_sdu_size,
+					  __u8 max_header_size,
+					  struct sk_buff *skb);
+static void ircomm_ttp_flow_indication(void *instance, void *sap,
+				       LOCAL_FLOW cmd);
+static void ircomm_ttp_disconnect_indication(void *instance, void *sap, 
+					     LM_REASON reason,
+					     struct sk_buff *skb);
+static int ircomm_ttp_data_request(struct ircomm_cb *self,
+				   struct sk_buff *skb, 
+				   int clen);
+static int ircomm_ttp_connect_request(struct ircomm_cb *self, 
+				      struct sk_buff *userdata, 
+				      struct ircomm_info *info);
+static int ircomm_ttp_connect_response(struct ircomm_cb *self,
+				       struct sk_buff *userdata);
+static int ircomm_ttp_disconnect_request(struct ircomm_cb *self, 
+					 struct sk_buff *userdata, 
+					 struct ircomm_info *info);
+
+/*
+ * Function ircomm_open_tsap (self)
+ *
+ *    
+ *
+ */
+int ircomm_open_tsap(struct ircomm_cb *self)
+{
+	notify_t notify;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+
+	/* Register callbacks */
+	irda_notify_init(&notify);
+	notify.data_indication       = ircomm_ttp_data_indication;
+	notify.connect_confirm       = ircomm_ttp_connect_confirm;
+	notify.connect_indication    = ircomm_ttp_connect_indication;
+	notify.flow_indication       = ircomm_ttp_flow_indication;
+	notify.disconnect_indication = ircomm_ttp_disconnect_indication;
+	notify.instance = self;
+	strlcpy(notify.name, "IrCOMM", sizeof(notify.name));
+
+	self->tsap = irttp_open_tsap(LSAP_ANY, DEFAULT_INITIAL_CREDIT,
+				     &notify);
+	if (!self->tsap) {
+		IRDA_DEBUG(0, "%sfailed to allocate tsap\n", __FUNCTION__ );
+		return -1;
+	}
+	self->slsap_sel = self->tsap->stsap_sel;
+
+	/*
+	 *  Initialize the call-table for issuing commands
+	 */
+	self->issue.data_request       = ircomm_ttp_data_request;
+	self->issue.connect_request    = ircomm_ttp_connect_request;
+	self->issue.connect_response   = ircomm_ttp_connect_response;
+	self->issue.disconnect_request = ircomm_ttp_disconnect_request;
+
+	return 0;
+}
+
+/*
+ * Function ircomm_ttp_connect_request (self, userdata)
+ *
+ *    
+ *
+ */
+static int ircomm_ttp_connect_request(struct ircomm_cb *self, 
+				      struct sk_buff *userdata, 
+				      struct ircomm_info *info)
+{
+	int ret = 0;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+
+	/* Don't forget to refcount it - should be NULL anyway */
+	if(userdata)
+		skb_get(userdata);
+
+	ret = irttp_connect_request(self->tsap, info->dlsap_sel,
+				    info->saddr, info->daddr, NULL, 
+				    TTP_SAR_DISABLE, userdata); 
+
+	return ret;
+}	
+
+/*
+ * Function ircomm_ttp_connect_response (self, skb)
+ *
+ *    
+ *
+ */
+static int ircomm_ttp_connect_response(struct ircomm_cb *self,
+				       struct sk_buff *userdata)
+{
+	int ret;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+	
+	/* Don't forget to refcount it - should be NULL anyway */
+	if(userdata)
+		skb_get(userdata);
+
+	ret = irttp_connect_response(self->tsap, TTP_SAR_DISABLE, userdata);
+
+	return ret;
+}
+
+/*
+ * Function ircomm_ttp_data_request (self, userdata)
+ *
+ *    Send IrCOMM data to IrTTP layer. Currently we do not try to combine 
+ *    control data with pure data, so they will be sent as separate frames. 
+ *    Should not be a big problem though, since control frames are rare. But
+ *    some of them are sent after connection establishment, so this can 
+ *    increase the latency a bit.
+ */
+static int ircomm_ttp_data_request(struct ircomm_cb *self,
+				   struct sk_buff *skb, 
+				   int clen)
+{
+	int ret;
+
+	IRDA_ASSERT(skb != NULL, return -1;);
+
+	IRDA_DEBUG(2, "%s(), clen=%d\n", __FUNCTION__ , clen);
+
+	/* 
+	 * Insert clen field, currently we either send data only, or control
+	 * only frames, to make things easier and avoid queueing
+	 */
+	IRDA_ASSERT(skb_headroom(skb) >= IRCOMM_HEADER_SIZE, return -1;);
+
+	/* Don't forget to refcount it - see ircomm_tty_do_softint() */
+	skb_get(skb);
+
+	skb_push(skb, IRCOMM_HEADER_SIZE);
+
+	skb->data[0] = clen;
+
+	ret = irttp_data_request(self->tsap, skb);
+	if (ret) {
+		IRDA_ERROR("%s(), failed\n", __FUNCTION__);
+		/* irttp_data_request already free the packet */
+	}
+
+	return ret;
+}
+
+/*
+ * Function ircomm_ttp_data_indication (instance, sap, skb)
+ *
+ *    Incoming data
+ *
+ */
+static int ircomm_ttp_data_indication(void *instance, void *sap,
+				      struct sk_buff *skb)
+{
+	struct ircomm_cb *self = (struct ircomm_cb *) instance;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+	
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return -1;);
+	IRDA_ASSERT(skb != NULL, return -1;);
+
+	ircomm_do_event(self, IRCOMM_TTP_DATA_INDICATION, skb, NULL);
+
+	/* Drop reference count - see ircomm_tty_data_indication(). */
+	dev_kfree_skb(skb);
+
+	return 0;
+}
+
+static void ircomm_ttp_connect_confirm(void *instance, void *sap,
+				       struct qos_info *qos, 
+				       __u32 max_sdu_size, 
+				       __u8 max_header_size,
+				       struct sk_buff *skb)
+{
+	struct ircomm_cb *self = (struct ircomm_cb *) instance;
+	struct ircomm_info info;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return;);
+	IRDA_ASSERT(skb != NULL, return;);
+	IRDA_ASSERT(qos != NULL, goto out;);
+
+	if (max_sdu_size != TTP_SAR_DISABLE) {
+		IRDA_ERROR("%s(), SAR not allowed for IrCOMM!\n",
+			   __FUNCTION__);
+		goto out;
+	}
+
+	info.max_data_size = irttp_get_max_seg_size(self->tsap)
+		- IRCOMM_HEADER_SIZE;
+	info.max_header_size = max_header_size + IRCOMM_HEADER_SIZE;
+	info.qos = qos;
+
+	ircomm_do_event(self, IRCOMM_TTP_CONNECT_CONFIRM, skb, &info);
+
+out:
+	/* Drop reference count - see ircomm_tty_connect_confirm(). */
+	dev_kfree_skb(skb);
+}
+
+/*
+ * Function ircomm_ttp_connect_indication (instance, sap, qos, max_sdu_size,
+ *                                         max_header_size, skb)
+ *
+ *    
+ *
+ */
+static void ircomm_ttp_connect_indication(void *instance, void *sap,
+					  struct qos_info *qos,
+					  __u32 max_sdu_size,
+					  __u8 max_header_size,
+					  struct sk_buff *skb)
+{
+	struct ircomm_cb *self = (struct ircomm_cb *)instance;
+	struct ircomm_info info;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return;);
+	IRDA_ASSERT(skb != NULL, return;);
+	IRDA_ASSERT(qos != NULL, goto out;);
+
+	if (max_sdu_size != TTP_SAR_DISABLE) {
+		IRDA_ERROR("%s(), SAR not allowed for IrCOMM!\n",
+			   __FUNCTION__);
+		goto out;
+	}
+
+	info.max_data_size = irttp_get_max_seg_size(self->tsap)
+		- IRCOMM_HEADER_SIZE;
+	info.max_header_size = max_header_size + IRCOMM_HEADER_SIZE;
+	info.qos = qos;
+
+	ircomm_do_event(self, IRCOMM_TTP_CONNECT_INDICATION, skb, &info);
+
+out:
+	/* Drop reference count - see ircomm_tty_connect_indication(). */
+	dev_kfree_skb(skb);
+}
+
+/*
+ * Function ircomm_ttp_disconnect_request (self, userdata, info)
+ *
+ *    
+ *
+ */
+static int ircomm_ttp_disconnect_request(struct ircomm_cb *self, 
+					 struct sk_buff *userdata, 
+					 struct ircomm_info *info)
+{
+	int ret;
+
+	/* Don't forget to refcount it - should be NULL anyway */
+	if(userdata)
+		skb_get(userdata);
+
+	ret = irttp_disconnect_request(self->tsap, userdata, P_NORMAL);
+
+	return ret;
+}
+
+/*
+ * Function ircomm_ttp_disconnect_indication (instance, sap, reason, skb)
+ *
+ *    
+ *
+ */
+static void ircomm_ttp_disconnect_indication(void *instance, void *sap, 
+					     LM_REASON reason,
+					     struct sk_buff *skb)
+{
+	struct ircomm_cb *self = (struct ircomm_cb *) instance;
+	struct ircomm_info info;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return;);
+
+	info.reason = reason;
+
+	ircomm_do_event(self, IRCOMM_TTP_DISCONNECT_INDICATION, skb, &info);
+
+	/* Drop reference count - see ircomm_tty_disconnect_indication(). */
+	if(skb)
+		dev_kfree_skb(skb);
+}
+
+/*
+ * Function ircomm_ttp_flow_indication (instance, sap, cmd)
+ *
+ *    Layer below is telling us to start or stop the flow of data
+ *
+ */
+static void ircomm_ttp_flow_indication(void *instance, void *sap,
+				       LOCAL_FLOW cmd)
+{
+	struct ircomm_cb *self = (struct ircomm_cb *) instance;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return;);
+	
+	if (self->notify.flow_indication)
+		self->notify.flow_indication(self->notify.instance, self, cmd);
+}
+
+
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
new file mode 100644
index 00000000000..5d1e61168eb
--- /dev/null
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -0,0 +1,1405 @@
+/*********************************************************************
+ *                
+ * Filename:      ircomm_tty.c
+ * Version:       1.0
+ * Description:   IrCOMM serial TTY driver
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Sun Jun  6 21:00:56 1999
+ * Modified at:   Wed Feb 23 00:09:02 2000
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * Sources:       serial.c and previous IrCOMM work by Takahide Higuchi
+ * 
+ *     Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved.
+ *     Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ * 
+ *     This program is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *     GNU General Public License for more details.
+ * 
+ *     You should have received a copy of the GNU General Public License 
+ *     along with this program; if not, write to the Free Software 
+ *     Foundation, Inc., 59 Temple Place, Suite 330, Boston, 
+ *     MA 02111-1307 USA
+ *     
+ ********************************************************************/
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/termios.h>
+#include <linux/tty.h>
+#include <linux/interrupt.h>
+#include <linux/device.h>		/* for MODULE_ALIAS_CHARDEV_MAJOR */
+
+#include <asm/uaccess.h>
+
+#include <net/irda/irda.h>
+#include <net/irda/irmod.h>
+
+#include <net/irda/ircomm_core.h>
+#include <net/irda/ircomm_param.h>
+#include <net/irda/ircomm_tty_attach.h>
+#include <net/irda/ircomm_tty.h>
+
+static int  ircomm_tty_open(struct tty_struct *tty, struct file *filp);
+static void ircomm_tty_close(struct tty_struct * tty, struct file *filp);
+static int  ircomm_tty_write(struct tty_struct * tty,
+			     const unsigned char *buf, int count);
+static int  ircomm_tty_write_room(struct tty_struct *tty);
+static void ircomm_tty_throttle(struct tty_struct *tty);
+static void ircomm_tty_unthrottle(struct tty_struct *tty);
+static int  ircomm_tty_chars_in_buffer(struct tty_struct *tty);
+static void ircomm_tty_flush_buffer(struct tty_struct *tty);
+static void ircomm_tty_send_xchar(struct tty_struct *tty, char ch);
+static void ircomm_tty_wait_until_sent(struct tty_struct *tty, int timeout);
+static void ircomm_tty_hangup(struct tty_struct *tty);
+static void ircomm_tty_do_softint(void *private_);
+static void ircomm_tty_shutdown(struct ircomm_tty_cb *self);
+static void ircomm_tty_stop(struct tty_struct *tty);
+
+static int ircomm_tty_data_indication(void *instance, void *sap,
+				      struct sk_buff *skb);
+static int ircomm_tty_control_indication(void *instance, void *sap,
+					 struct sk_buff *skb);
+static void ircomm_tty_flow_indication(void *instance, void *sap, 
+				       LOCAL_FLOW cmd);
+#ifdef CONFIG_PROC_FS
+static int ircomm_tty_read_proc(char *buf, char **start, off_t offset, int len,
+				int *eof, void *unused);
+#endif /* CONFIG_PROC_FS */
+static struct tty_driver *driver;
+
+hashbin_t *ircomm_tty = NULL;
+
+static struct tty_operations ops = {
+	.open            = ircomm_tty_open,
+	.close           = ircomm_tty_close,
+	.write           = ircomm_tty_write,
+	.write_room      = ircomm_tty_write_room,
+	.chars_in_buffer = ircomm_tty_chars_in_buffer,
+	.flush_buffer    = ircomm_tty_flush_buffer,
+	.ioctl           = ircomm_tty_ioctl,	/* ircomm_tty_ioctl.c */
+	.tiocmget        = ircomm_tty_tiocmget,	/* ircomm_tty_ioctl.c */
+	.tiocmset        = ircomm_tty_tiocmset,	/* ircomm_tty_ioctl.c */
+	.throttle        = ircomm_tty_throttle,
+	.unthrottle      = ircomm_tty_unthrottle,
+	.send_xchar      = ircomm_tty_send_xchar,
+	.set_termios     = ircomm_tty_set_termios,
+	.stop            = ircomm_tty_stop,
+	.start           = ircomm_tty_start,
+	.hangup          = ircomm_tty_hangup,
+	.wait_until_sent = ircomm_tty_wait_until_sent,
+#ifdef CONFIG_PROC_FS
+	.read_proc       = ircomm_tty_read_proc,
+#endif /* CONFIG_PROC_FS */
+};
+
+/*
+ * Function ircomm_tty_init()
+ *
+ *    Init IrCOMM TTY layer/driver
+ *
+ */
+static int __init ircomm_tty_init(void)
+{
+	driver = alloc_tty_driver(IRCOMM_TTY_PORTS);
+	if (!driver)
+		return -ENOMEM;
+	ircomm_tty = hashbin_new(HB_LOCK); 
+	if (ircomm_tty == NULL) {
+		IRDA_ERROR("%s(), can't allocate hashbin!\n", __FUNCTION__);
+		put_tty_driver(driver);
+		return -ENOMEM;
+	}
+
+	driver->owner		= THIS_MODULE;
+	driver->driver_name     = "ircomm";
+	driver->name            = "ircomm";
+	driver->devfs_name      = "ircomm";
+	driver->major           = IRCOMM_TTY_MAJOR;
+	driver->minor_start     = IRCOMM_TTY_MINOR;
+	driver->type            = TTY_DRIVER_TYPE_SERIAL;
+	driver->subtype         = SERIAL_TYPE_NORMAL;
+	driver->init_termios    = tty_std_termios;
+	driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL;
+	driver->flags           = TTY_DRIVER_REAL_RAW;
+	tty_set_operations(driver, &ops);
+	if (tty_register_driver(driver)) {
+		IRDA_ERROR("%s(): Couldn't register serial driver\n",
+			   __FUNCTION__);
+		put_tty_driver(driver);
+		return -1;
+	}
+	return 0;
+}
+
+static void __exit __ircomm_tty_cleanup(struct ircomm_tty_cb *self)
+{
+	IRDA_DEBUG(0, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
+
+	ircomm_tty_shutdown(self);
+
+	self->magic = 0;
+	kfree(self);
+}
+
+/*
+ * Function ircomm_tty_cleanup ()
+ *
+ *    Remove IrCOMM TTY layer/driver
+ *
+ */
+static void __exit ircomm_tty_cleanup(void)
+{
+	int ret;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );	
+
+	ret = tty_unregister_driver(driver);
+        if (ret) {
+                IRDA_ERROR("%s(), failed to unregister driver\n",
+			   __FUNCTION__);
+		return;
+	}
+
+	hashbin_delete(ircomm_tty, (FREE_FUNC) __ircomm_tty_cleanup);
+	put_tty_driver(driver);
+}
+
+/*
+ * Function ircomm_startup (self)
+ *
+ *    
+ *
+ */
+static int ircomm_tty_startup(struct ircomm_tty_cb *self)
+{
+	notify_t notify;
+	int ret = -ENODEV;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
+
+	/* Check if already open */
+	if (test_and_set_bit(ASYNC_B_INITIALIZED, &self->flags)) {
+		IRDA_DEBUG(2, "%s(), already open so break out!\n", __FUNCTION__ );
+		return 0;
+	}
+
+	/* Register with IrCOMM */
+	irda_notify_init(&notify);
+	/* These callbacks we must handle ourselves */
+	notify.data_indication       = ircomm_tty_data_indication;
+	notify.udata_indication      = ircomm_tty_control_indication;
+ 	notify.flow_indication       = ircomm_tty_flow_indication;
+
+	/* Use the ircomm_tty interface for these ones */
+ 	notify.disconnect_indication = ircomm_tty_disconnect_indication;
+	notify.connect_confirm       = ircomm_tty_connect_confirm;
+ 	notify.connect_indication    = ircomm_tty_connect_indication;
+	strlcpy(notify.name, "ircomm_tty", sizeof(notify.name));
+	notify.instance = self;
+
+	if (!self->ircomm) {
+		self->ircomm = ircomm_open(&notify, self->service_type, 
+					   self->line);
+	}
+	if (!self->ircomm)
+		goto err;
+
+	self->slsap_sel = self->ircomm->slsap_sel;
+
+	/* Connect IrCOMM link with remote device */
+	ret = ircomm_tty_attach_cable(self);
+	if (ret < 0) {
+		IRDA_ERROR("%s(), error attaching cable!\n", __FUNCTION__);
+		goto err;
+	}
+
+	return 0;
+err:
+	clear_bit(ASYNC_B_INITIALIZED, &self->flags);
+	return ret;
+}
+
+/*
+ * Function ircomm_block_til_ready (self, filp)
+ *
+ *    
+ *
+ */
+static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, 
+				      struct file *filp)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	int		retval;
+	int		do_clocal = 0, extra_count = 0;
+	unsigned long	flags;
+	struct tty_struct *tty;
+	
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	tty = self->tty;
+
+	/*
+	 * If non-blocking mode is set, or the port is not enabled,
+	 * then make the check up front and then exit.
+	 */	
+	if (filp->f_flags & O_NONBLOCK || tty->flags & (1 << TTY_IO_ERROR)){
+		/* nonblock mode is set or port is not enabled */
+		self->flags |= ASYNC_NORMAL_ACTIVE;
+		IRDA_DEBUG(1, "%s(), O_NONBLOCK requested!\n", __FUNCTION__ );
+		return 0;
+	}
+
+	if (tty->termios->c_cflag & CLOCAL) {
+		IRDA_DEBUG(1, "%s(), doing CLOCAL!\n", __FUNCTION__ );
+		do_clocal = 1;
+	}
+	
+	/* Wait for carrier detect and the line to become
+	 * free (i.e., not in use by the callout).  While we are in
+	 * this loop, self->open_count is dropped by one, so that
+	 * mgsl_close() knows when to free things.  We restore it upon
+	 * exit, either normal or abnormal.
+	 */
+	 
+	retval = 0;
+	add_wait_queue(&self->open_wait, &wait);
+	
+	IRDA_DEBUG(2, "%s(%d):block_til_ready before block on %s open_count=%d\n",
+	      __FILE__,__LINE__, tty->driver->name, self->open_count );
+
+	/* As far as I can see, we protect open_count - Jean II */
+	spin_lock_irqsave(&self->spinlock, flags);
+	if (!tty_hung_up_p(filp)) {
+		extra_count = 1;
+		self->open_count--;
+	}
+	spin_unlock_irqrestore(&self->spinlock, flags);
+	self->blocked_open++;
+	
+	while (1) {
+		if (tty->termios->c_cflag & CBAUD) {
+			/* Here, we use to lock those two guys, but
+			 * as ircomm_param_request() does it itself,
+			 * I don't see the point (and I see the deadlock).
+			 * Jean II */
+			self->settings.dte |= IRCOMM_RTS + IRCOMM_DTR;
+		 	
+			ircomm_param_request(self, IRCOMM_DTE, TRUE);
+		}
+		
+		current->state = TASK_INTERRUPTIBLE;
+		
+		if (tty_hung_up_p(filp) ||
+		    !test_bit(ASYNC_B_INITIALIZED, &self->flags)) {
+			retval = (self->flags & ASYNC_HUP_NOTIFY) ?
+					-EAGAIN : -ERESTARTSYS;
+			break;
+		}
+		
+		/*  
+		 * Check if link is ready now. Even if CLOCAL is
+		 * specified, we cannot return before the IrCOMM link is
+		 * ready 
+		 */
+ 		if (!test_bit(ASYNC_B_CLOSING, &self->flags) &&
+ 		    (do_clocal || (self->settings.dce & IRCOMM_CD)) &&
+		    self->state == IRCOMM_TTY_READY)
+		{
+ 			break;
+		}
+			
+		if (signal_pending(current)) {
+			retval = -ERESTARTSYS;
+			break;
+		}
+		
+		IRDA_DEBUG(1, "%s(%d):block_til_ready blocking on %s open_count=%d\n",
+		      __FILE__,__LINE__, tty->driver->name, self->open_count );
+		
+		schedule();
+	}
+	
+	__set_current_state(TASK_RUNNING);
+	remove_wait_queue(&self->open_wait, &wait);
+	
+	if (extra_count) {
+		/* ++ is not atomic, so this should be protected - Jean II */
+		spin_lock_irqsave(&self->spinlock, flags);
+		self->open_count++;
+		spin_unlock_irqrestore(&self->spinlock, flags);
+	}
+	self->blocked_open--;
+	
+	IRDA_DEBUG(1, "%s(%d):block_til_ready after blocking on %s open_count=%d\n",
+	      __FILE__,__LINE__, tty->driver->name, self->open_count);
+			 
+	if (!retval)
+		self->flags |= ASYNC_NORMAL_ACTIVE;
+		
+	return retval;	
+}
+
+/*
+ * Function ircomm_tty_open (tty, filp)
+ *
+ *    This routine is called when a particular tty device is opened. This
+ *    routine is mandatory; if this routine is not filled in, the attempted
+ *    open will fail with ENODEV.
+ */
+static int ircomm_tty_open(struct tty_struct *tty, struct file *filp)
+{
+	struct ircomm_tty_cb *self;
+	unsigned int line;
+	unsigned long	flags;
+	int ret;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	line = tty->index;
+	if ((line < 0) || (line >= IRCOMM_TTY_PORTS)) {
+		return -ENODEV;
+	}
+
+	/* Check if instance already exists */
+	self = hashbin_lock_find(ircomm_tty, line, NULL);
+	if (!self) {
+		/* No, so make new instance */
+		self = kmalloc(sizeof(struct ircomm_tty_cb), GFP_KERNEL);
+		if (self == NULL) {
+			IRDA_ERROR("%s(), kmalloc failed!\n", __FUNCTION__);
+			return -ENOMEM;
+		}
+		memset(self, 0, sizeof(struct ircomm_tty_cb));
+		
+		self->magic = IRCOMM_TTY_MAGIC;
+		self->flow = FLOW_STOP;
+
+		self->line = line;
+		INIT_WORK(&self->tqueue, ircomm_tty_do_softint, self);
+		self->max_header_size = IRCOMM_TTY_HDR_UNINITIALISED;
+		self->max_data_size = IRCOMM_TTY_DATA_UNINITIALISED;
+		self->close_delay = 5*HZ/10;
+		self->closing_wait = 30*HZ;
+
+		/* Init some important stuff */
+		init_timer(&self->watchdog_timer);
+		init_waitqueue_head(&self->open_wait);
+ 		init_waitqueue_head(&self->close_wait);
+		spin_lock_init(&self->spinlock);
+
+		/* 
+		 * Force TTY into raw mode by default which is usually what
+		 * we want for IrCOMM and IrLPT. This way applications will
+		 * not have to twiddle with printcap etc.  
+		 */
+		tty->termios->c_iflag = 0;
+		tty->termios->c_oflag = 0;
+
+		/* Insert into hash */
+		hashbin_insert(ircomm_tty, (irda_queue_t *) self, line, NULL);
+	}
+	/* ++ is not atomic, so this should be protected - Jean II */
+	spin_lock_irqsave(&self->spinlock, flags);
+	self->open_count++;
+
+	tty->driver_data = self;
+	self->tty = tty;
+	spin_unlock_irqrestore(&self->spinlock, flags);
+
+	IRDA_DEBUG(1, "%s(), %s%d, count = %d\n", __FUNCTION__ , tty->driver->name, 
+		   self->line, self->open_count);
+
+	/* Not really used by us, but lets do it anyway */
+	self->tty->low_latency = (self->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
+
+	/*
+	 * If the port is the middle of closing, bail out now
+	 */
+	if (tty_hung_up_p(filp) ||
+	    test_bit(ASYNC_B_CLOSING, &self->flags)) {
+
+		/* Hm, why are we blocking on ASYNC_CLOSING if we
+		 * do return -EAGAIN/-ERESTARTSYS below anyway?
+		 * IMHO it's either not needed in the first place
+		 * or for some reason we need to make sure the async
+		 * closing has been finished - if so, wouldn't we
+		 * probably better sleep uninterruptible?
+		 */
+
+		if (wait_event_interruptible(self->close_wait, !test_bit(ASYNC_B_CLOSING, &self->flags))) {
+			IRDA_WARNING("%s - got signal while blocking on ASYNC_CLOSING!\n",
+				     __FUNCTION__);
+			return -ERESTARTSYS;
+		}
+
+#ifdef SERIAL_DO_RESTART
+		return ((self->flags & ASYNC_HUP_NOTIFY) ?
+			-EAGAIN : -ERESTARTSYS);
+#else
+		return -EAGAIN;
+#endif
+	}
+
+	/* Check if this is a "normal" ircomm device, or an irlpt device */
+	if (line < 0x10) {
+		self->service_type = IRCOMM_3_WIRE | IRCOMM_9_WIRE;
+		self->settings.service_type = IRCOMM_9_WIRE; /* 9 wire as default */
+		/* Jan Kiszka -> add DSR/RI -> Conform to IrCOMM spec */
+		self->settings.dce = IRCOMM_CTS | IRCOMM_CD | IRCOMM_DSR | IRCOMM_RI; /* Default line settings */
+		IRDA_DEBUG(2, "%s(), IrCOMM device\n", __FUNCTION__ );
+	} else {
+		IRDA_DEBUG(2, "%s(), IrLPT device\n", __FUNCTION__ );
+		self->service_type = IRCOMM_3_WIRE_RAW;
+		self->settings.service_type = IRCOMM_3_WIRE_RAW; /* Default */
+	}
+
+	ret = ircomm_tty_startup(self);
+	if (ret)
+		return ret;
+
+	ret = ircomm_tty_block_til_ready(self, filp);
+	if (ret) {
+		IRDA_DEBUG(2, 
+		      "%s(), returning after block_til_ready with %d\n", __FUNCTION__ ,
+		      ret);
+
+		return ret;
+	}
+	return 0;
+}
+
+/*
+ * Function ircomm_tty_close (tty, filp)
+ *
+ *    This routine is called when a particular tty device is closed.
+ *
+ */
+static void ircomm_tty_close(struct tty_struct *tty, struct file *filp)
+{
+	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
+	unsigned long flags;
+
+	IRDA_DEBUG(0, "%s()\n", __FUNCTION__ );
+
+	if (!tty)
+		return;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
+
+	spin_lock_irqsave(&self->spinlock, flags);
+
+	if (tty_hung_up_p(filp)) {
+		spin_unlock_irqrestore(&self->spinlock, flags);
+
+		IRDA_DEBUG(0, "%s(), returning 1\n", __FUNCTION__ );
+		return;
+	}
+
+	if ((tty->count == 1) && (self->open_count != 1)) {
+		/*
+		 * Uh, oh.  tty->count is 1, which means that the tty
+		 * structure will be freed.  state->count should always
+		 * be one in these conditions.  If it's greater than
+		 * one, we've got real problems, since it means the
+		 * serial port won't be shutdown.
+		 */
+		IRDA_DEBUG(0, "%s(), bad serial port count; "
+			   "tty->count is 1, state->count is %d\n", __FUNCTION__ , 
+			   self->open_count);
+		self->open_count = 1;
+	}
+
+	if (--self->open_count < 0) {
+		IRDA_ERROR("%s(), bad serial port count for ttys%d: %d\n",
+			   __FUNCTION__, self->line, self->open_count);
+		self->open_count = 0;
+	}
+	if (self->open_count) {
+		spin_unlock_irqrestore(&self->spinlock, flags);
+
+		IRDA_DEBUG(0, "%s(), open count > 0\n", __FUNCTION__ );
+		return;
+	}
+
+	/* Hum... Should be test_and_set_bit ??? - Jean II */
+	set_bit(ASYNC_B_CLOSING, &self->flags);
+
+	/* We need to unlock here (we were unlocking at the end of this
+	 * function), because tty_wait_until_sent() may schedule.
+	 * I don't know if the rest should be protected somehow,
+	 * so someone should check. - Jean II */
+	spin_unlock_irqrestore(&self->spinlock, flags);
+
+	/*
+	 * Now we wait for the transmit buffer to clear; and we notify 
+	 * the line discipline to only process XON/XOFF characters.
+	 */
+	tty->closing = 1;
+	if (self->closing_wait != ASYNC_CLOSING_WAIT_NONE)
+		tty_wait_until_sent(tty, self->closing_wait);
+
+	ircomm_tty_shutdown(self);
+
+	if (tty->driver->flush_buffer)
+		tty->driver->flush_buffer(tty);
+	if (tty->ldisc.flush_buffer)
+		tty->ldisc.flush_buffer(tty);
+
+	tty->closing = 0;
+	self->tty = NULL;
+
+	if (self->blocked_open) {
+		if (self->close_delay) {
+			current->state = TASK_INTERRUPTIBLE;
+			schedule_timeout(self->close_delay);
+		}
+		wake_up_interruptible(&self->open_wait);
+	}
+
+	self->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
+	wake_up_interruptible(&self->close_wait);
+}
+
+/*
+ * Function ircomm_tty_flush_buffer (tty)
+ *
+ *    
+ *
+ */
+static void ircomm_tty_flush_buffer(struct tty_struct *tty)
+{
+	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
+
+	/* 
+	 * Let do_softint() do this to avoid race condition with 
+	 * do_softint() ;-) 
+	 */
+	schedule_work(&self->tqueue);
+}
+
+/*
+ * Function ircomm_tty_do_softint (private_)
+ *
+ *    We use this routine to give the write wakeup to the user at at a
+ *    safe time (as fast as possible after write have completed). This 
+ *    can be compared to the Tx interrupt.
+ */
+static void ircomm_tty_do_softint(void *private_)
+{
+	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) private_;
+	struct tty_struct *tty;
+	unsigned long flags;
+	struct sk_buff *skb, *ctrl_skb;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	if (!self || self->magic != IRCOMM_TTY_MAGIC)
+		return;
+
+	tty = self->tty;
+	if (!tty)
+		return;
+
+	/* Unlink control buffer */
+	spin_lock_irqsave(&self->spinlock, flags);
+
+	ctrl_skb = self->ctrl_skb;
+	self->ctrl_skb = NULL;
+
+	spin_unlock_irqrestore(&self->spinlock, flags);
+
+	/* Flush control buffer if any */
+	if(ctrl_skb) {
+		if(self->flow == FLOW_START)
+			ircomm_control_request(self->ircomm, ctrl_skb);
+		/* Drop reference count - see ircomm_ttp_data_request(). */
+		dev_kfree_skb(ctrl_skb);
+	}
+
+	if (tty->hw_stopped)
+		return;
+
+	/* Unlink transmit buffer */
+	spin_lock_irqsave(&self->spinlock, flags);
+	
+	skb = self->tx_skb;
+	self->tx_skb = NULL;
+
+	spin_unlock_irqrestore(&self->spinlock, flags);
+
+	/* Flush transmit buffer if any */
+	if (skb) {
+		ircomm_tty_do_event(self, IRCOMM_TTY_DATA_REQUEST, skb, NULL);
+		/* Drop reference count - see ircomm_ttp_data_request(). */
+		dev_kfree_skb(skb);
+	}
+		
+	/* Check if user (still) wants to be waken up */
+	if ((tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) && 
+	    tty->ldisc.write_wakeup)
+	{
+		(tty->ldisc.write_wakeup)(tty);
+	}
+	wake_up_interruptible(&tty->write_wait);
+}
+
+/*
+ * Function ircomm_tty_write (tty, buf, count)
+ *
+ *    This routine is called by the kernel to write a series of characters
+ *    to the tty device. The characters may come from user space or kernel
+ *    space. This routine will return the number of characters actually
+ *    accepted for writing. This routine is mandatory.
+ */
+static int ircomm_tty_write(struct tty_struct *tty,
+			    const unsigned char *buf, int count)
+{
+	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
+	unsigned long flags;
+	struct sk_buff *skb;
+	int tailroom = 0;
+	int len = 0;
+	int size;
+
+	IRDA_DEBUG(2, "%s(), count=%d, hw_stopped=%d\n", __FUNCTION__ , count,
+		   tty->hw_stopped);
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
+
+	/* We may receive packets from the TTY even before we have finished
+	 * our setup. Not cool.
+	 * The problem is that we don't know the final header and data size
+	 * to create the proper skb, so any skb we would create would have
+	 * bogus header and data size, so need care.
+	 * We use a bogus header size to safely detect this condition.
+	 * Another problem is that hw_stopped was set to 0 way before it
+	 * should be, so we would drop this skb. It should now be fixed.
+	 * One option is to not accept data until we are properly setup.
+	 * But, I suspect that when it happens, the ppp line discipline
+	 * just "drops" the data, which might screw up connect scripts.
+	 * The second option is to create a "safe skb", with large header
+	 * and small size (see ircomm_tty_open() for values).
+	 * We just need to make sure that when the real values get filled,
+	 * we don't mess up the original "safe skb" (see tx_data_size).
+	 * Jean II */
+	if (self->max_header_size == IRCOMM_TTY_HDR_UNINITIALISED) {
+		IRDA_DEBUG(1, "%s() : not initialised\n", __FUNCTION__);
+#ifdef IRCOMM_NO_TX_BEFORE_INIT
+		/* We didn't consume anything, TTY will retry */
+		return 0;
+#endif
+	}
+
+	if (count < 1)
+		return 0;
+
+	/* Protect our manipulation of self->tx_skb and related */
+	spin_lock_irqsave(&self->spinlock, flags);
+
+	/* Fetch current transmit buffer */
+	skb = self->tx_skb;
+
+	/*  
+	 * Send out all the data we get, possibly as multiple fragmented
+	 * frames, but this will only happen if the data is larger than the
+	 * max data size. The normal case however is just the opposite, and
+	 * this function may be called multiple times, and will then actually
+	 * defragment the data and send it out as one packet as soon as 
+	 * possible, but at a safer point in time
+	 */
+	while (count) {
+		size = count;
+
+		/* Adjust data size to the max data size */
+		if (size > self->max_data_size)
+			size = self->max_data_size;
+		
+		/* 
+		 * Do we already have a buffer ready for transmit, or do
+		 * we need to allocate a new frame 
+		 */
+		if (skb) {			
+			/* 
+			 * Any room for more data at the end of the current 
+			 * transmit buffer? Cannot use skb_tailroom, since
+			 * dev_alloc_skb gives us a larger skb than we 
+			 * requested
+			 * Note : use tx_data_size, because max_data_size
+			 * may have changed and we don't want to overwrite
+			 * the skb. - Jean II
+			 */
+			if ((tailroom = (self->tx_data_size - skb->len)) > 0) {
+				/* Adjust data to tailroom */
+				if (size > tailroom)
+					size = tailroom;
+			} else {
+				/* 
+				 * Current transmit frame is full, so break 
+				 * out, so we can send it as soon as possible
+				 */
+				break;
+			}
+		} else {
+			/* Prepare a full sized frame */
+			skb = dev_alloc_skb(self->max_data_size+
+					    self->max_header_size);
+			if (!skb) {
+				spin_unlock_irqrestore(&self->spinlock, flags);
+				return -ENOBUFS;
+			}
+			skb_reserve(skb, self->max_header_size);
+			self->tx_skb = skb;
+			/* Remember skb size because max_data_size may
+			 * change later on - Jean II */
+			self->tx_data_size = self->max_data_size;
+		}
+
+		/* Copy data */
+		memcpy(skb_put(skb,size), buf + len, size);
+
+		count -= size;
+		len += size;
+	}
+
+	spin_unlock_irqrestore(&self->spinlock, flags);
+
+	/*     
+	 * Schedule a new thread which will transmit the frame as soon
+	 * as possible, but at a safe point in time. We do this so the
+	 * "user" can give us data multiple times, as PPP does (because of
+	 * its 256 byte tx buffer). We will then defragment and send out
+	 * all this data as one single packet.  
+	 */
+	schedule_work(&self->tqueue);
+	
+	return len;
+}
+
+/*
+ * Function ircomm_tty_write_room (tty)
+ *
+ *    This routine returns the numbers of characters the tty driver will
+ *    accept for queuing to be written. This number is subject to change as
+ *    output buffers get emptied, or if the output flow control is acted.
+ */
+static int ircomm_tty_write_room(struct tty_struct *tty)
+{
+	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
+	unsigned long flags;
+	int ret;
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
+
+#ifdef IRCOMM_NO_TX_BEFORE_INIT
+	/* max_header_size tells us if the channel is initialised or not. */
+	if (self->max_header_size == IRCOMM_TTY_HDR_UNINITIALISED)
+		/* Don't bother us yet */
+		return 0;
+#endif
+
+	/* Check if we are allowed to transmit any data.
+	 * hw_stopped is the regular flow control.
+	 * Jean II */
+	if (tty->hw_stopped)
+		ret = 0;
+	else {
+		spin_lock_irqsave(&self->spinlock, flags);
+		if (self->tx_skb)
+			ret = self->tx_data_size - self->tx_skb->len;
+		else
+			ret = self->max_data_size;
+		spin_unlock_irqrestore(&self->spinlock, flags);
+	}
+	IRDA_DEBUG(2, "%s(), ret=%d\n", __FUNCTION__ , ret);
+
+	return ret;
+}
+
+/*
+ * Function ircomm_tty_wait_until_sent (tty, timeout)
+ *
+ *    This routine waits until the device has written out all of the
+ *    characters in its transmitter FIFO.
+ */
+static void ircomm_tty_wait_until_sent(struct tty_struct *tty, int timeout)
+{
+	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
+	unsigned long orig_jiffies, poll_time;
+	unsigned long flags;
+	
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
+
+	orig_jiffies = jiffies;
+
+	/* Set poll time to 200 ms */
+	poll_time = IRDA_MIN(timeout, msecs_to_jiffies(200));
+
+	spin_lock_irqsave(&self->spinlock, flags);
+	while (self->tx_skb && self->tx_skb->len) {
+		spin_unlock_irqrestore(&self->spinlock, flags);
+		current->state = TASK_INTERRUPTIBLE;
+		schedule_timeout(poll_time);
+		spin_lock_irqsave(&self->spinlock, flags);
+		if (signal_pending(current))
+			break;
+		if (timeout && time_after(jiffies, orig_jiffies + timeout))
+			break;
+	}
+	spin_unlock_irqrestore(&self->spinlock, flags);
+	current->state = TASK_RUNNING;
+}
+
+/*
+ * Function ircomm_tty_throttle (tty)
+ *
+ *    This routine notifies the tty driver that input buffers for the line
+ *    discipline are close to full, and it should somehow signal that no
+ *    more characters should be sent to the tty.  
+ */
+static void ircomm_tty_throttle(struct tty_struct *tty)
+{
+	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
+
+	/* Software flow control? */
+	if (I_IXOFF(tty))
+		ircomm_tty_send_xchar(tty, STOP_CHAR(tty));
+	
+	/* Hardware flow control? */
+	if (tty->termios->c_cflag & CRTSCTS) {
+		self->settings.dte &= ~IRCOMM_RTS;
+		self->settings.dte |= IRCOMM_DELTA_RTS;
+	
+		ircomm_param_request(self, IRCOMM_DTE, TRUE);
+	}
+
+        ircomm_flow_request(self->ircomm, FLOW_STOP);
+}
+
+/*
+ * Function ircomm_tty_unthrottle (tty)
+ *
+ *    This routine notifies the tty drivers that it should signals that
+ *    characters can now be sent to the tty without fear of overrunning the
+ *    input buffers of the line disciplines.
+ */
+static void ircomm_tty_unthrottle(struct tty_struct *tty)
+{
+	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
+
+	/* Using software flow control? */
+	if (I_IXOFF(tty)) {
+		ircomm_tty_send_xchar(tty, START_CHAR(tty));
+	}
+
+	/* Using hardware flow control? */
+	if (tty->termios->c_cflag & CRTSCTS) {
+		self->settings.dte |= (IRCOMM_RTS|IRCOMM_DELTA_RTS);
+
+		ircomm_param_request(self, IRCOMM_DTE, TRUE);
+		IRDA_DEBUG(1, "%s(), FLOW_START\n", __FUNCTION__ );
+	}
+        ircomm_flow_request(self->ircomm, FLOW_START);
+}
+
+/*
+ * Function ircomm_tty_chars_in_buffer (tty)
+ *
+ *    Indicates if there are any data in the buffer
+ *
+ */
+static int ircomm_tty_chars_in_buffer(struct tty_struct *tty)
+{
+	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
+	unsigned long flags;
+	int len = 0;
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
+
+	spin_lock_irqsave(&self->spinlock, flags);
+
+	if (self->tx_skb)
+		len = self->tx_skb->len;
+
+	spin_unlock_irqrestore(&self->spinlock, flags);
+
+	return len;
+}
+
+static void ircomm_tty_shutdown(struct ircomm_tty_cb *self)
+{
+	unsigned long flags;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
+
+	IRDA_DEBUG(0, "%s()\n", __FUNCTION__ );
+
+	if (!test_and_clear_bit(ASYNC_B_INITIALIZED, &self->flags))
+		return;
+
+	ircomm_tty_detach_cable(self);
+
+	spin_lock_irqsave(&self->spinlock, flags);
+
+	del_timer(&self->watchdog_timer);
+	
+	/* Free parameter buffer */
+	if (self->ctrl_skb) {
+		dev_kfree_skb(self->ctrl_skb);
+		self->ctrl_skb = NULL;
+	}
+
+	/* Free transmit buffer */
+	if (self->tx_skb) {
+		dev_kfree_skb(self->tx_skb);
+		self->tx_skb = NULL;
+	}
+
+	if (self->ircomm) {
+		ircomm_close(self->ircomm);
+		self->ircomm = NULL;
+	}
+
+	spin_unlock_irqrestore(&self->spinlock, flags);
+}
+
+/*
+ * Function ircomm_tty_hangup (tty)
+ *
+ *    This routine notifies the tty driver that it should hangup the tty
+ *    device.
+ * 
+ */
+static void ircomm_tty_hangup(struct tty_struct *tty)
+{
+	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
+	unsigned long	flags;
+
+	IRDA_DEBUG(0, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
+
+	if (!tty)
+		return;
+
+	/* ircomm_tty_flush_buffer(tty); */
+	ircomm_tty_shutdown(self);
+
+	/* I guess we need to lock here - Jean II */
+	spin_lock_irqsave(&self->spinlock, flags);
+	self->flags &= ~ASYNC_NORMAL_ACTIVE;
+	self->tty = NULL;
+	self->open_count = 0;
+	spin_unlock_irqrestore(&self->spinlock, flags);
+
+	wake_up_interruptible(&self->open_wait);
+}
+
+/*
+ * Function ircomm_tty_send_xchar (tty, ch)
+ *
+ *    This routine is used to send a high-priority XON/XOFF character to
+ *    the device.
+ */
+static void ircomm_tty_send_xchar(struct tty_struct *tty, char ch)
+{
+	IRDA_DEBUG(0, "%s(), not impl\n", __FUNCTION__ );
+}
+
+/*
+ * Function ircomm_tty_start (tty)
+ *
+ *    This routine notifies the tty driver that it resume sending
+ *    characters to the tty device.  
+ */
+void ircomm_tty_start(struct tty_struct *tty)
+{
+	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
+
+	ircomm_flow_request(self->ircomm, FLOW_START);
+}
+
+/*
+ * Function ircomm_tty_stop (tty)
+ *
+ *     This routine notifies the tty driver that it should stop outputting
+ *     characters to the tty device. 
+ */
+static void ircomm_tty_stop(struct tty_struct *tty) 
+{
+	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
+
+	ircomm_flow_request(self->ircomm, FLOW_STOP);
+}
+
+/*
+ * Function ircomm_check_modem_status (self)
+ *
+ *    Check for any changes in the DCE's line settings. This function should
+ *    be called whenever the dce parameter settings changes, to update the
+ *    flow control settings and other things
+ */
+void ircomm_tty_check_modem_status(struct ircomm_tty_cb *self)
+{
+	struct tty_struct *tty;
+	int status;
+
+	IRDA_DEBUG(0, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
+
+	tty = self->tty;
+
+	status = self->settings.dce;
+
+	if (status & IRCOMM_DCE_DELTA_ANY) {
+		/*wake_up_interruptible(&self->delta_msr_wait);*/
+	}
+	if ((self->flags & ASYNC_CHECK_CD) && (status & IRCOMM_DELTA_CD)) {
+		IRDA_DEBUG(2, 
+			   "%s(), ircomm%d CD now %s...\n", __FUNCTION__ , self->line,
+			   (status & IRCOMM_CD) ? "on" : "off");
+
+		if (status & IRCOMM_CD) {
+			wake_up_interruptible(&self->open_wait);
+		} else {
+			IRDA_DEBUG(2, 
+				   "%s(), Doing serial hangup..\n", __FUNCTION__ );
+			if (tty)
+				tty_hangup(tty);
+
+			/* Hangup will remote the tty, so better break out */
+			return;
+		}
+	}
+	if (self->flags & ASYNC_CTS_FLOW) {
+		if (tty->hw_stopped) {
+			if (status & IRCOMM_CTS) {
+				IRDA_DEBUG(2, 
+					   "%s(), CTS tx start...\n", __FUNCTION__ );
+				tty->hw_stopped = 0;
+				
+				/* Wake up processes blocked on open */
+				wake_up_interruptible(&self->open_wait);
+
+				schedule_work(&self->tqueue);
+				return;
+			}
+		} else {
+			if (!(status & IRCOMM_CTS)) {
+				IRDA_DEBUG(2, 
+					   "%s(), CTS tx stop...\n", __FUNCTION__ );
+				tty->hw_stopped = 1;
+			}
+		}
+	}
+}
+
+/*
+ * Function ircomm_tty_data_indication (instance, sap, skb)
+ *
+ *    Handle incoming data, and deliver it to the line discipline
+ *
+ */
+static int ircomm_tty_data_indication(void *instance, void *sap,
+				      struct sk_buff *skb)
+{
+	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+	
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
+	IRDA_ASSERT(skb != NULL, return -1;);
+
+	if (!self->tty) {
+		IRDA_DEBUG(0, "%s(), no tty!\n", __FUNCTION__ );
+		return 0;
+	}
+
+	/* 
+	 * If we receive data when hardware is stopped then something is wrong.
+	 * We try to poll the peers line settings to check if we are up todate.
+	 * Devices like WinCE can do this, and since they don't send any 
+	 * params, we can just as well declare the hardware for running.
+	 */
+	if (self->tty->hw_stopped && (self->flow == FLOW_START)) {
+		IRDA_DEBUG(0, "%s(), polling for line settings!\n", __FUNCTION__ );
+		ircomm_param_request(self, IRCOMM_POLL, TRUE);
+
+		/* We can just as well declare the hardware for running */
+		ircomm_tty_send_initial_parameters(self);
+		ircomm_tty_link_established(self);
+	}
+
+	/* 
+	 * Just give it over to the line discipline. There is no need to
+	 * involve the flip buffers, since we are not running in an interrupt 
+	 * handler
+	 */
+	self->tty->ldisc.receive_buf(self->tty, skb->data, NULL, skb->len);
+
+	/* No need to kfree_skb - see ircomm_ttp_data_indication() */
+
+	return 0;
+}
+
+/*
+ * Function ircomm_tty_control_indication (instance, sap, skb)
+ *
+ *    Parse all incoming parameters (easy!)
+ *
+ */
+static int ircomm_tty_control_indication(void *instance, void *sap,
+					 struct sk_buff *skb)
+{
+	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
+	int clen;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+	
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
+	IRDA_ASSERT(skb != NULL, return -1;);
+
+	clen = skb->data[0];
+
+	irda_param_extract_all(self, skb->data+1, IRDA_MIN(skb->len-1, clen), 
+			       &ircomm_param_info);
+
+	/* No need to kfree_skb - see ircomm_control_indication() */
+
+	return 0;
+}
+
+/*
+ * Function ircomm_tty_flow_indication (instance, sap, cmd)
+ *
+ *    This function is called by IrTTP when it wants us to slow down the
+ *    transmission of data. We just mark the hardware as stopped, and wait
+ *    for IrTTP to notify us that things are OK again.
+ */
+static void ircomm_tty_flow_indication(void *instance, void *sap, 
+				       LOCAL_FLOW cmd)
+{
+	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
+	struct tty_struct *tty;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
+
+	tty = self->tty;
+
+	switch (cmd) {
+	case FLOW_START:
+		IRDA_DEBUG(2, "%s(), hw start!\n", __FUNCTION__ );
+		tty->hw_stopped = 0;
+
+		/* ircomm_tty_do_softint will take care of the rest */
+		schedule_work(&self->tqueue);
+		break;
+	default:  /* If we get here, something is very wrong, better stop */
+	case FLOW_STOP:
+		IRDA_DEBUG(2, "%s(), hw stopped!\n", __FUNCTION__ );
+		tty->hw_stopped = 1;
+		break;
+	}
+	self->flow = cmd;
+}
+
+static int ircomm_tty_line_info(struct ircomm_tty_cb *self, char *buf)
+{
+        int  ret=0;
+
+	ret += sprintf(buf+ret, "State: %s\n", ircomm_tty_state[self->state]);
+
+	ret += sprintf(buf+ret, "Service type: ");
+	if (self->service_type & IRCOMM_9_WIRE)
+		ret += sprintf(buf+ret, "9_WIRE");
+	else if (self->service_type & IRCOMM_3_WIRE)
+		ret += sprintf(buf+ret, "3_WIRE");
+	else if (self->service_type & IRCOMM_3_WIRE_RAW)
+		ret += sprintf(buf+ret, "3_WIRE_RAW");
+	else
+		ret += sprintf(buf+ret, "No common service type!\n");
+        ret += sprintf(buf+ret, "\n");
+
+	ret += sprintf(buf+ret, "Port name: %s\n", self->settings.port_name);
+
+	ret += sprintf(buf+ret, "DTE status: ");	
+        if (self->settings.dte & IRCOMM_RTS)
+                ret += sprintf(buf+ret, "RTS|");
+        if (self->settings.dte & IRCOMM_DTR)
+                ret += sprintf(buf+ret, "DTR|");
+	if (self->settings.dte)
+		ret--; /* remove the last | */
+        ret += sprintf(buf+ret, "\n");
+
+	ret += sprintf(buf+ret, "DCE status: ");
+        if (self->settings.dce & IRCOMM_CTS)
+                ret += sprintf(buf+ret, "CTS|");
+        if (self->settings.dce & IRCOMM_DSR)
+                ret += sprintf(buf+ret, "DSR|");
+        if (self->settings.dce & IRCOMM_CD)
+                ret += sprintf(buf+ret, "CD|");
+        if (self->settings.dce & IRCOMM_RI) 
+                ret += sprintf(buf+ret, "RI|");
+	if (self->settings.dce)
+		ret--; /* remove the last | */
+        ret += sprintf(buf+ret, "\n");
+
+	ret += sprintf(buf+ret, "Configuration: ");
+	if (!self->settings.null_modem)
+		ret += sprintf(buf+ret, "DTE <-> DCE\n");
+	else
+		ret += sprintf(buf+ret, 
+			       "DTE <-> DTE (null modem emulation)\n");
+
+	ret += sprintf(buf+ret, "Data rate: %d\n", self->settings.data_rate);
+
+	ret += sprintf(buf+ret, "Flow control: ");
+	if (self->settings.flow_control & IRCOMM_XON_XOFF_IN)
+		ret += sprintf(buf+ret, "XON_XOFF_IN|");
+	if (self->settings.flow_control & IRCOMM_XON_XOFF_OUT)
+		ret += sprintf(buf+ret, "XON_XOFF_OUT|");
+	if (self->settings.flow_control & IRCOMM_RTS_CTS_IN)
+		ret += sprintf(buf+ret, "RTS_CTS_IN|");
+	if (self->settings.flow_control & IRCOMM_RTS_CTS_OUT)
+		ret += sprintf(buf+ret, "RTS_CTS_OUT|");
+	if (self->settings.flow_control & IRCOMM_DSR_DTR_IN)
+		ret += sprintf(buf+ret, "DSR_DTR_IN|");
+	if (self->settings.flow_control & IRCOMM_DSR_DTR_OUT)
+		ret += sprintf(buf+ret, "DSR_DTR_OUT|");
+	if (self->settings.flow_control & IRCOMM_ENQ_ACK_IN)
+		ret += sprintf(buf+ret, "ENQ_ACK_IN|");
+	if (self->settings.flow_control & IRCOMM_ENQ_ACK_OUT)
+		ret += sprintf(buf+ret, "ENQ_ACK_OUT|");
+	if (self->settings.flow_control)
+		ret--; /* remove the last | */
+        ret += sprintf(buf+ret, "\n");
+
+	ret += sprintf(buf+ret, "Flags: ");
+	if (self->flags & ASYNC_CTS_FLOW)
+		ret += sprintf(buf+ret, "ASYNC_CTS_FLOW|");
+	if (self->flags & ASYNC_CHECK_CD)
+		ret += sprintf(buf+ret, "ASYNC_CHECK_CD|");
+	if (self->flags & ASYNC_INITIALIZED)
+		ret += sprintf(buf+ret, "ASYNC_INITIALIZED|");
+	if (self->flags & ASYNC_LOW_LATENCY)
+		ret += sprintf(buf+ret, "ASYNC_LOW_LATENCY|");
+	if (self->flags & ASYNC_CLOSING)
+		ret += sprintf(buf+ret, "ASYNC_CLOSING|");
+	if (self->flags & ASYNC_NORMAL_ACTIVE)
+		ret += sprintf(buf+ret, "ASYNC_NORMAL_ACTIVE|");
+	if (self->flags)
+		ret--; /* remove the last | */
+	ret += sprintf(buf+ret, "\n");
+
+	ret += sprintf(buf+ret, "Role: %s\n", self->client ? 
+		       "client" : "server");
+	ret += sprintf(buf+ret, "Open count: %d\n", self->open_count);
+	ret += sprintf(buf+ret, "Max data size: %d\n", self->max_data_size);
+	ret += sprintf(buf+ret, "Max header size: %d\n", self->max_header_size);
+		
+	if (self->tty)
+		ret += sprintf(buf+ret, "Hardware: %s\n", 
+			       self->tty->hw_stopped ? "Stopped" : "Running");
+
+        ret += sprintf(buf+ret, "\n");
+        return ret;
+}
+
+
+/*
+ * Function ircomm_tty_read_proc (buf, start, offset, len, eof, unused)
+ *
+ *    
+ *
+ */
+#ifdef CONFIG_PROC_FS
+static int ircomm_tty_read_proc(char *buf, char **start, off_t offset, int len,
+				int *eof, void *unused)
+{
+	struct ircomm_tty_cb *self;
+        int count = 0, l;
+        off_t begin = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ircomm_tty->hb_spinlock, flags);
+
+	self = (struct ircomm_tty_cb *) hashbin_get_first(ircomm_tty);
+	while ((self != NULL) && (count < 4000)) {
+		if (self->magic != IRCOMM_TTY_MAGIC)
+			break;
+
+                l = ircomm_tty_line_info(self, buf + count);
+                count += l;
+                if (count+begin > offset+len)
+                        goto done;
+                if (count+begin < offset) {
+                        begin += count;
+                        count = 0;
+                }
+				
+		self = (struct ircomm_tty_cb *) hashbin_get_next(ircomm_tty);
+        }
+        *eof = 1;
+done:
+	spin_unlock_irqrestore(&ircomm_tty->hb_spinlock, flags);
+
+        if (offset >= count+begin)
+                return 0;
+        *start = buf + (offset-begin);
+        return ((len < begin+count-offset) ? len : begin+count-offset);
+}
+#endif /* CONFIG_PROC_FS */
+
+MODULE_AUTHOR("Dag Brattli <dagb@cs.uit.no>");
+MODULE_DESCRIPTION("IrCOMM serial TTY driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_CHARDEV_MAJOR(IRCOMM_TTY_MAJOR);
+
+module_init(ircomm_tty_init);
+module_exit(ircomm_tty_cleanup);
diff --git a/net/irda/ircomm/ircomm_tty_attach.c b/net/irda/ircomm/ircomm_tty_attach.c
new file mode 100644
index 00000000000..99f5eddbb4b
--- /dev/null
+++ b/net/irda/ircomm/ircomm_tty_attach.c
@@ -0,0 +1,1006 @@
+/*********************************************************************
+ *                
+ * Filename:      ircomm_tty_attach.c
+ * Version:       
+ * Description:   Code for attaching the serial driver to IrCOMM
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Sat Jun  5 17:42:00 1999
+ * Modified at:   Tue Jan  4 14:20:49 2000
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved.
+ *     Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ * 
+ *     This program is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *     GNU General Public License for more details.
+ * 
+ *     You should have received a copy of the GNU General Public License 
+ *     along with this program; if not, write to the Free Software 
+ *     Foundation, Inc., 59 Temple Place, Suite 330, Boston, 
+ *     MA 02111-1307 USA
+ *     
+ ********************************************************************/
+
+#include <linux/sched.h>
+#include <linux/init.h>
+
+#include <net/irda/irda.h>
+#include <net/irda/irlmp.h>
+#include <net/irda/iriap.h>
+#include <net/irda/irttp.h>
+#include <net/irda/irias_object.h>
+#include <net/irda/parameters.h>
+
+#include <net/irda/ircomm_core.h>
+#include <net/irda/ircomm_param.h>
+#include <net/irda/ircomm_event.h>
+
+#include <net/irda/ircomm_tty.h>
+#include <net/irda/ircomm_tty_attach.h>
+
+static void ircomm_tty_ias_register(struct ircomm_tty_cb *self);
+static void ircomm_tty_discovery_indication(discinfo_t *discovery,
+					    DISCOVERY_MODE mode,
+					    void *priv);
+static void ircomm_tty_getvalue_confirm(int result, __u16 obj_id, 
+					struct ias_value *value, void *priv);
+static void ircomm_tty_start_watchdog_timer(struct ircomm_tty_cb *self,
+					    int timeout);
+static void ircomm_tty_watchdog_timer_expired(void *data);
+
+static int ircomm_tty_state_idle(struct ircomm_tty_cb *self, 
+				 IRCOMM_TTY_EVENT event, 
+				 struct sk_buff *skb, 
+				 struct ircomm_tty_info *info);
+static int ircomm_tty_state_search(struct ircomm_tty_cb *self, 
+				   IRCOMM_TTY_EVENT event, 
+				   struct sk_buff *skb, 
+				   struct ircomm_tty_info *info);
+static int ircomm_tty_state_query_parameters(struct ircomm_tty_cb *self, 
+					     IRCOMM_TTY_EVENT event, 
+					     struct sk_buff *skb, 
+					     struct ircomm_tty_info *info);
+static int ircomm_tty_state_query_lsap_sel(struct ircomm_tty_cb *self, 
+					   IRCOMM_TTY_EVENT event, 
+					   struct sk_buff *skb, 
+					   struct ircomm_tty_info *info);
+static int ircomm_tty_state_setup(struct ircomm_tty_cb *self, 
+				  IRCOMM_TTY_EVENT event, 
+				  struct sk_buff *skb, 
+				  struct ircomm_tty_info *info);
+static int ircomm_tty_state_ready(struct ircomm_tty_cb *self, 
+				  IRCOMM_TTY_EVENT event, 
+				  struct sk_buff *skb, 
+				  struct ircomm_tty_info *info);
+
+char *ircomm_tty_state[] = {
+	"IRCOMM_TTY_IDLE",
+	"IRCOMM_TTY_SEARCH",
+	"IRCOMM_TTY_QUERY_PARAMETERS",
+	"IRCOMM_TTY_QUERY_LSAP_SEL",
+	"IRCOMM_TTY_SETUP",
+	"IRCOMM_TTY_READY",
+	"*** ERROR *** ",
+};
+
+#ifdef CONFIG_IRDA_DEBUG
+static char *ircomm_tty_event[] = {
+	"IRCOMM_TTY_ATTACH_CABLE",
+	"IRCOMM_TTY_DETACH_CABLE",
+	"IRCOMM_TTY_DATA_REQUEST",
+	"IRCOMM_TTY_DATA_INDICATION",
+	"IRCOMM_TTY_DISCOVERY_REQUEST",
+	"IRCOMM_TTY_DISCOVERY_INDICATION",
+	"IRCOMM_TTY_CONNECT_CONFIRM",
+	"IRCOMM_TTY_CONNECT_INDICATION",
+	"IRCOMM_TTY_DISCONNECT_REQUEST",
+	"IRCOMM_TTY_DISCONNECT_INDICATION",
+	"IRCOMM_TTY_WD_TIMER_EXPIRED",
+	"IRCOMM_TTY_GOT_PARAMETERS",
+	"IRCOMM_TTY_GOT_LSAPSEL",
+	"*** ERROR ****",
+};
+#endif /* CONFIG_IRDA_DEBUG */
+
+static int (*state[])(struct ircomm_tty_cb *self, IRCOMM_TTY_EVENT event,
+		      struct sk_buff *skb, struct ircomm_tty_info *info) = 
+{
+	ircomm_tty_state_idle,
+	ircomm_tty_state_search,
+	ircomm_tty_state_query_parameters,
+	ircomm_tty_state_query_lsap_sel,
+	ircomm_tty_state_setup,
+	ircomm_tty_state_ready,
+};
+
+/*
+ * Function ircomm_tty_attach_cable (driver)
+ *
+ *    Try to attach cable (IrCOMM link). This function will only return
+ *    when the link has been connected, or if an error condition occurs. 
+ *    If success, the return value is the resulting service type.
+ */
+int ircomm_tty_attach_cable(struct ircomm_tty_cb *self)
+{
+	IRDA_DEBUG(0, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
+
+       	/* Check if somebody has already connected to us */
+	if (ircomm_is_connected(self->ircomm)) {
+		IRDA_DEBUG(0, "%s(), already connected!\n", __FUNCTION__ );
+		return 0;
+	}
+
+	/* Make sure nobody tries to write before the link is up */
+	self->tty->hw_stopped = 1;
+
+	ircomm_tty_ias_register(self);
+
+	ircomm_tty_do_event(self, IRCOMM_TTY_ATTACH_CABLE, NULL, NULL);
+
+	return 0;
+}
+
+/*
+ * Function ircomm_detach_cable (driver)
+ *
+ *    Detach cable, or cable has been detached by peer
+ *
+ */
+void ircomm_tty_detach_cable(struct ircomm_tty_cb *self)
+{
+	IRDA_DEBUG(0, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
+
+	del_timer(&self->watchdog_timer);
+
+	/* Remove discovery handler */
+	if (self->ckey) {
+		irlmp_unregister_client(self->ckey);
+		self->ckey = NULL;
+	}
+	/* Remove IrCOMM hint bits */
+	if (self->skey) {
+		irlmp_unregister_service(self->skey);
+		self->skey = NULL;
+	}
+
+	if (self->iriap) { 
+		iriap_close(self->iriap);
+		self->iriap = NULL;
+	}
+
+	/* Remove LM-IAS object */
+	if (self->obj) {
+		irias_delete_object(self->obj);
+		self->obj = NULL;
+	}
+
+	ircomm_tty_do_event(self, IRCOMM_TTY_DETACH_CABLE, NULL, NULL);
+
+	/* Reset some values */
+	self->daddr = self->saddr = 0;
+	self->dlsap_sel = self->slsap_sel = 0;
+
+	memset(&self->settings, 0, sizeof(struct ircomm_params));
+}
+
+/*
+ * Function ircomm_tty_ias_register (self)
+ *
+ *    Register with LM-IAS depending on which service type we are
+ *
+ */
+static void ircomm_tty_ias_register(struct ircomm_tty_cb *self)
+{
+	__u8 oct_seq[6];
+	__u16 hints;
+
+	IRDA_DEBUG(0, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
+	
+	/* Compute hint bits based on service */
+	hints = irlmp_service_to_hint(S_COMM);
+	if (self->service_type & IRCOMM_3_WIRE_RAW)
+		hints |= irlmp_service_to_hint(S_PRINTER);
+
+	/* Advertise IrCOMM hint bit in discovery */
+	if (!self->skey)
+		self->skey = irlmp_register_service(hints);
+	/* Set up a discovery handler */
+	if (!self->ckey)
+		self->ckey = irlmp_register_client(hints,
+						   ircomm_tty_discovery_indication,
+						   NULL, (void *) self);
+
+	/* If already done, no need to do it again */
+	if (self->obj)
+		return;
+
+	if (self->service_type & IRCOMM_3_WIRE_RAW) {
+		/* Register IrLPT with LM-IAS */
+		self->obj = irias_new_object("IrLPT", IAS_IRLPT_ID);
+		irias_add_integer_attrib(self->obj, "IrDA:IrLMP:LsapSel", 
+					 self->slsap_sel, IAS_KERNEL_ATTR);
+	} else {
+		/* Register IrCOMM with LM-IAS */
+		self->obj = irias_new_object("IrDA:IrCOMM", IAS_IRCOMM_ID);
+		irias_add_integer_attrib(self->obj, "IrDA:TinyTP:LsapSel", 
+					 self->slsap_sel, IAS_KERNEL_ATTR);
+		
+		/* Code the parameters into the buffer */
+		irda_param_pack(oct_seq, "bbbbbb", 
+				IRCOMM_SERVICE_TYPE, 1, self->service_type,
+				IRCOMM_PORT_TYPE,    1, IRCOMM_SERIAL);
+		
+		/* Register parameters with LM-IAS */
+		irias_add_octseq_attrib(self->obj, "Parameters", oct_seq, 6,
+					IAS_KERNEL_ATTR);
+	}
+	irias_insert_object(self->obj);
+}
+
+/*
+ * Function ircomm_tty_ias_unregister (self)
+ *
+ *    Remove our IAS object and client hook while connected.
+ *
+ */
+static void ircomm_tty_ias_unregister(struct ircomm_tty_cb *self)
+{
+	/* Remove LM-IAS object now so it is not reused.
+	 * IrCOMM deals very poorly with multiple incoming connections.
+	 * It should looks a lot more like IrNET, and "dup" a server TSAP
+	 * to the application TSAP (based on various rules).
+	 * This is a cheap workaround allowing multiple clients to
+	 * connect to us. It will not always work.
+	 * Each IrCOMM socket has an IAS entry. Incoming connection will
+	 * pick the first one found. So, when we are fully connected,
+	 * we remove our IAS entries so that the next IAS entry is used.
+	 * We do that for *both* client and server, because a server
+	 * can also create client instances.
+	 * Jean II */
+	if (self->obj) {
+		irias_delete_object(self->obj);
+		self->obj = NULL;
+	}
+
+#if 0
+	/* Remove discovery handler.
+	 * While we are connected, we no longer need to receive
+	 * discovery events. This would be the case if there is
+	 * multiple IrLAP interfaces. Jean II */
+	if (self->ckey) {
+		irlmp_unregister_client(self->ckey);
+		self->ckey = NULL;
+	}
+#endif
+}
+
+/*
+ * Function ircomm_send_initial_parameters (self)
+ *
+ *    Send initial parameters to the remote IrCOMM device. These parameters
+ *    must be sent before any data.
+ */
+int ircomm_tty_send_initial_parameters(struct ircomm_tty_cb *self)
+{
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
+
+	if (self->service_type & IRCOMM_3_WIRE_RAW) 
+		return 0;
+
+	/* 
+	 * Set default values, but only if the application for some reason 
+	 * haven't set them already
+	 */
+	IRDA_DEBUG(2, "%s(), data-rate = %d\n", __FUNCTION__ , 
+		   self->settings.data_rate);
+	if (!self->settings.data_rate)
+		self->settings.data_rate = 9600;
+	IRDA_DEBUG(2, "%s(), data-format = %d\n", __FUNCTION__ , 
+		   self->settings.data_format);
+	if (!self->settings.data_format)
+		self->settings.data_format = IRCOMM_WSIZE_8;  /* 8N1 */
+
+	IRDA_DEBUG(2, "%s(), flow-control = %d\n", __FUNCTION__ , 
+		   self->settings.flow_control);
+	/*self->settings.flow_control = IRCOMM_RTS_CTS_IN|IRCOMM_RTS_CTS_OUT;*/
+
+	/* Do not set delta values for the initial parameters */
+	self->settings.dte = IRCOMM_DTR | IRCOMM_RTS;
+
+	/* Only send service type parameter when we are the client */
+	if (self->client)
+		ircomm_param_request(self, IRCOMM_SERVICE_TYPE, FALSE);
+	ircomm_param_request(self, IRCOMM_DATA_RATE, FALSE);
+	ircomm_param_request(self, IRCOMM_DATA_FORMAT, FALSE);
+	
+	/* For a 3 wire service, we just flush the last parameter and return */
+	if (self->settings.service_type == IRCOMM_3_WIRE) {
+		ircomm_param_request(self, IRCOMM_FLOW_CONTROL, TRUE);
+		return 0;
+	}
+
+	/* Only 9-wire service types continue here */
+	ircomm_param_request(self, IRCOMM_FLOW_CONTROL, FALSE);
+#if 0
+	ircomm_param_request(self, IRCOMM_XON_XOFF, FALSE);
+	ircomm_param_request(self, IRCOMM_ENQ_ACK, FALSE);
+#endif	
+	/* Notify peer that we are ready to receive data */
+	ircomm_param_request(self, IRCOMM_DTE, TRUE);
+	
+	return 0;
+}
+
+/*
+ * Function ircomm_tty_discovery_indication (discovery)
+ *
+ *    Remote device is discovered, try query the remote IAS to see which
+ *    device it is, and which services it has.
+ *
+ */
+static void ircomm_tty_discovery_indication(discinfo_t *discovery,
+					    DISCOVERY_MODE mode,
+					    void *priv)
+{
+	struct ircomm_tty_cb *self;
+	struct ircomm_tty_info info;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	/* Important note :
+	 * We need to drop all passive discoveries.
+	 * The LSAP management of IrComm is deficient and doesn't deal
+	 * with the case of two instance connecting to each other
+	 * simultaneously (it will deadlock in LMP).
+	 * The proper fix would be to use the same technique as in IrNET,
+	 * to have one server socket and separate instances for the
+	 * connecting/connected socket.
+	 * The workaround is to drop passive discovery, which drastically
+	 * reduce the probability of this happening.
+	 * Jean II */
+	if(mode == DISCOVERY_PASSIVE)
+		return;
+
+	info.daddr = discovery->daddr;
+	info.saddr = discovery->saddr;
+
+	/* FIXME. We have a locking problem on the hashbin here.
+	 * We probably need to use hashbin_find_next(), but we first
+	 * need to ensure that "line" is unique. - Jean II */
+	self = (struct ircomm_tty_cb *) hashbin_get_first(ircomm_tty);
+	while (self != NULL) {
+		IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
+		
+		ircomm_tty_do_event(self, IRCOMM_TTY_DISCOVERY_INDICATION, 
+				    NULL, &info);
+
+		self = (struct ircomm_tty_cb *) hashbin_get_next(ircomm_tty);
+	}
+}
+
+/*
+ * Function ircomm_tty_disconnect_indication (instance, sap, reason, skb)
+ *
+ *    Link disconnected
+ *
+ */
+void ircomm_tty_disconnect_indication(void *instance, void *sap, 
+				      LM_REASON reason,
+				      struct sk_buff *skb)
+{
+	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
+
+	if (!self->tty)
+		return;
+
+	/* This will stop control data transfers */
+	self->flow = FLOW_STOP;
+
+	/* Stop data transfers */
+	self->tty->hw_stopped = 1;
+
+	ircomm_tty_do_event(self, IRCOMM_TTY_DISCONNECT_INDICATION, NULL, 
+			    NULL);
+}
+
+/*
+ * Function ircomm_tty_getvalue_confirm (result, obj_id, value, priv)
+ *
+ *    Got result from the IAS query we make
+ *
+ */
+static void ircomm_tty_getvalue_confirm(int result, __u16 obj_id, 
+					struct ias_value *value, 
+					void *priv)
+{
+	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) priv;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
+
+	/* We probably don't need to make any more queries */
+	iriap_close(self->iriap);
+	self->iriap = NULL;
+
+	/* Check if request succeeded */
+	if (result != IAS_SUCCESS) {
+		IRDA_DEBUG(4, "%s(), got NULL value!\n", __FUNCTION__ );
+		return;
+	}
+
+	switch (value->type) {
+ 	case IAS_OCT_SEQ:
+		IRDA_DEBUG(2, "%s(), got octet sequence\n", __FUNCTION__ );
+
+		irda_param_extract_all(self, value->t.oct_seq, value->len,
+				       &ircomm_param_info);
+
+		ircomm_tty_do_event(self, IRCOMM_TTY_GOT_PARAMETERS, NULL, 
+				    NULL);
+		break;
+	case IAS_INTEGER:
+		/* Got LSAP selector */	
+		IRDA_DEBUG(2, "%s(), got lsapsel = %d\n", __FUNCTION__ , 
+			   value->t.integer);
+
+		if (value->t.integer == -1) {
+			IRDA_DEBUG(0, "%s(), invalid value!\n", __FUNCTION__ );
+		} else
+			self->dlsap_sel = value->t.integer;
+
+		ircomm_tty_do_event(self, IRCOMM_TTY_GOT_LSAPSEL, NULL, NULL);
+		break;
+	case IAS_MISSING:
+		IRDA_DEBUG(0, "%s(), got IAS_MISSING\n", __FUNCTION__ );
+		break;
+	default:
+		IRDA_DEBUG(0, "%s(), got unknown type!\n", __FUNCTION__ );
+		break;
+	}
+	irias_delete_value(value);
+}
+
+/*
+ * Function ircomm_tty_connect_confirm (instance, sap, qos, max_sdu_size, skb)
+ *
+ *    Connection confirmed
+ *
+ */
+void ircomm_tty_connect_confirm(void *instance, void *sap, 
+				struct qos_info *qos, 
+				__u32 max_data_size, 
+				__u8 max_header_size, 
+				struct sk_buff *skb)
+{
+	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
+
+	self->client = TRUE;
+	self->max_data_size = max_data_size;
+	self->max_header_size = max_header_size;
+	self->flow = FLOW_START;
+
+	ircomm_tty_do_event(self, IRCOMM_TTY_CONNECT_CONFIRM, NULL, NULL);
+
+	/* No need to kfree_skb - see ircomm_ttp_connect_confirm() */
+}
+
+/*
+ * Function ircomm_tty_connect_indication (instance, sap, qos, max_sdu_size, 
+ *                                         skb)
+ *
+ *    we are discovered and being requested to connect by remote device !
+ *
+ */
+void ircomm_tty_connect_indication(void *instance, void *sap, 
+				   struct qos_info *qos, 
+				   __u32 max_data_size,
+				   __u8 max_header_size, 
+				   struct sk_buff *skb)
+{
+	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
+	int clen;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
+
+	self->client = FALSE;
+	self->max_data_size = max_data_size;
+	self->max_header_size = max_header_size;
+	self->flow = FLOW_START;
+
+	clen = skb->data[0];
+	if (clen)
+		irda_param_extract_all(self, skb->data+1, 
+				       IRDA_MIN(skb->len, clen), 
+				       &ircomm_param_info);
+
+	ircomm_tty_do_event(self, IRCOMM_TTY_CONNECT_INDICATION, NULL, NULL);
+
+	/* No need to kfree_skb - see ircomm_ttp_connect_indication() */
+}
+
+/*
+ * Function ircomm_tty_link_established (self)
+ *
+ *    Called when the IrCOMM link is established
+ *
+ */
+void ircomm_tty_link_established(struct ircomm_tty_cb *self)
+{
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
+
+	if (!self->tty)
+		return;
+	
+	del_timer(&self->watchdog_timer);
+
+	/* 
+	 * IrCOMM link is now up, and if we are not using hardware
+	 * flow-control, then declare the hardware as running. Otherwise we
+	 * will have to wait for the peer device (DCE) to raise the CTS
+	 * line.  
+	 */
+	if ((self->flags & ASYNC_CTS_FLOW) && ((self->settings.dce & IRCOMM_CTS) == 0)) {
+		IRDA_DEBUG(0, "%s(), waiting for CTS ...\n", __FUNCTION__ );
+		return;
+	} else {
+		IRDA_DEBUG(1, "%s(), starting hardware!\n", __FUNCTION__ );
+
+		self->tty->hw_stopped = 0;
+	
+		/* Wake up processes blocked on open */
+		wake_up_interruptible(&self->open_wait);
+	}
+
+	schedule_work(&self->tqueue);
+}
+
+/*
+ * Function ircomm_tty_start_watchdog_timer (self, timeout)
+ *
+ *    Start the watchdog timer. This timer is used to make sure that any 
+ *    connection attempt is successful, and if not, we will retry after 
+ *    the timeout
+ */
+static void ircomm_tty_start_watchdog_timer(struct ircomm_tty_cb *self,
+					    int timeout)
+{
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
+
+	irda_start_timer(&self->watchdog_timer, timeout, (void *) self,
+			 ircomm_tty_watchdog_timer_expired);
+}
+
+/*
+ * Function ircomm_tty_watchdog_timer_expired (data)
+ *
+ *    Called when the connect procedure have taken to much time.
+ *
+ */
+static void ircomm_tty_watchdog_timer_expired(void *data)
+{
+	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) data;
+	
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
+
+	ircomm_tty_do_event(self, IRCOMM_TTY_WD_TIMER_EXPIRED, NULL, NULL);
+}
+
+
+/*
+ * Function ircomm_tty_do_event (self, event, skb)
+ *
+ *    Process event
+ *
+ */
+int ircomm_tty_do_event(struct ircomm_tty_cb *self, IRCOMM_TTY_EVENT event,
+			struct sk_buff *skb, struct ircomm_tty_info *info) 
+{
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
+
+	IRDA_DEBUG(2, "%s: state=%s, event=%s\n", __FUNCTION__ ,
+		   ircomm_tty_state[self->state], ircomm_tty_event[event]);
+	
+	return (*state[self->state])(self, event, skb, info);
+}
+
+/*
+ * Function ircomm_tty_next_state (self, state)
+ *
+ *    Switch state
+ *
+ */
+static inline void ircomm_tty_next_state(struct ircomm_tty_cb *self, IRCOMM_TTY_STATE state)
+{
+	/*
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
+
+	IRDA_DEBUG(2, "%s: next state=%s, service type=%d\n", __FUNCTION__ , 
+		   ircomm_tty_state[self->state], self->service_type);
+	*/
+	self->state = state;
+}
+
+/*
+ * Function ircomm_tty_state_idle (self, event, skb, info)
+ *
+ *    Just hanging around
+ *
+ */
+static int ircomm_tty_state_idle(struct ircomm_tty_cb *self, 
+				 IRCOMM_TTY_EVENT event, 
+				 struct sk_buff *skb, 
+				 struct ircomm_tty_info *info)
+{
+	int ret = 0;
+
+	IRDA_DEBUG(2, "%s: state=%s, event=%s\n", __FUNCTION__ ,
+		   ircomm_tty_state[self->state], ircomm_tty_event[event]);
+	switch (event) {
+	case IRCOMM_TTY_ATTACH_CABLE:
+		/* Try to discover any remote devices */		
+		ircomm_tty_start_watchdog_timer(self, 3*HZ);
+		ircomm_tty_next_state(self, IRCOMM_TTY_SEARCH);
+		
+		irlmp_discovery_request(DISCOVERY_DEFAULT_SLOTS);
+		break;
+	case IRCOMM_TTY_DISCOVERY_INDICATION:
+		self->daddr = info->daddr;
+		self->saddr = info->saddr;
+
+		if (self->iriap) {
+			IRDA_WARNING("%s(), busy with a previous query\n",
+				     __FUNCTION__);
+			return -EBUSY;
+		}
+
+		self->iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self,
+					 ircomm_tty_getvalue_confirm);
+
+		iriap_getvaluebyclass_request(self->iriap,
+					      self->saddr, self->daddr,
+					      "IrDA:IrCOMM", "Parameters");
+		
+		ircomm_tty_start_watchdog_timer(self, 3*HZ);
+		ircomm_tty_next_state(self, IRCOMM_TTY_QUERY_PARAMETERS);
+		break;
+	case IRCOMM_TTY_CONNECT_INDICATION:
+		del_timer(&self->watchdog_timer);
+
+		/* Accept connection */
+		ircomm_connect_response(self->ircomm, NULL);
+		ircomm_tty_next_state(self, IRCOMM_TTY_READY);
+		break;
+	case IRCOMM_TTY_WD_TIMER_EXPIRED:
+		/* Just stay idle */
+		break;
+	case IRCOMM_TTY_DETACH_CABLE:
+		ircomm_tty_next_state(self, IRCOMM_TTY_IDLE);
+		break;
+	default:
+		IRDA_DEBUG(2, "%s(), unknown event: %s\n", __FUNCTION__ ,
+			   ircomm_tty_event[event]);
+		ret = -EINVAL;
+	}
+	return ret;
+}
+
+/*
+ * Function ircomm_tty_state_search (self, event, skb, info)
+ *
+ *    Trying to discover an IrCOMM device
+ *
+ */
+static int ircomm_tty_state_search(struct ircomm_tty_cb *self, 
+				   IRCOMM_TTY_EVENT event, 
+				   struct sk_buff *skb, 
+				   struct ircomm_tty_info *info)
+{
+	int ret = 0;
+
+	IRDA_DEBUG(2, "%s: state=%s, event=%s\n", __FUNCTION__ ,
+		   ircomm_tty_state[self->state], ircomm_tty_event[event]);
+
+	switch (event) {
+	case IRCOMM_TTY_DISCOVERY_INDICATION:
+		self->daddr = info->daddr;
+		self->saddr = info->saddr;
+
+		if (self->iriap) {
+			IRDA_WARNING("%s(), busy with a previous query\n",
+				     __FUNCTION__);
+			return -EBUSY;
+		}
+		
+		self->iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self,
+					 ircomm_tty_getvalue_confirm);
+		
+		if (self->service_type == IRCOMM_3_WIRE_RAW) {
+			iriap_getvaluebyclass_request(self->iriap, self->saddr,
+						      self->daddr, "IrLPT", 
+						      "IrDA:IrLMP:LsapSel");
+			ircomm_tty_next_state(self, IRCOMM_TTY_QUERY_LSAP_SEL);
+		} else {
+			iriap_getvaluebyclass_request(self->iriap, self->saddr,
+						      self->daddr, 
+						      "IrDA:IrCOMM", 
+						      "Parameters");
+
+			ircomm_tty_next_state(self, IRCOMM_TTY_QUERY_PARAMETERS);
+		}
+		ircomm_tty_start_watchdog_timer(self, 3*HZ);
+		break;
+	case IRCOMM_TTY_CONNECT_INDICATION:
+		del_timer(&self->watchdog_timer);
+		ircomm_tty_ias_unregister(self);
+
+		/* Accept connection */
+		ircomm_connect_response(self->ircomm, NULL);
+		ircomm_tty_next_state(self, IRCOMM_TTY_READY);
+		break;
+	case IRCOMM_TTY_WD_TIMER_EXPIRED:
+#if 1
+		/* Give up */
+#else
+		/* Try to discover any remote devices */		
+		ircomm_tty_start_watchdog_timer(self, 3*HZ);
+		irlmp_discovery_request(DISCOVERY_DEFAULT_SLOTS);
+#endif
+		break;
+	case IRCOMM_TTY_DETACH_CABLE:
+		ircomm_tty_next_state(self, IRCOMM_TTY_IDLE);
+		break;
+	default:
+		IRDA_DEBUG(2, "%s(), unknown event: %s\n", __FUNCTION__ ,
+			   ircomm_tty_event[event]);
+		ret = -EINVAL;
+	}
+	return ret;
+}
+
+/*
+ * Function ircomm_tty_state_query (self, event, skb, info)
+ *
+ *    Querying the remote LM-IAS for IrCOMM parameters
+ *
+ */
+static int ircomm_tty_state_query_parameters(struct ircomm_tty_cb *self, 
+					     IRCOMM_TTY_EVENT event, 
+					     struct sk_buff *skb, 
+					     struct ircomm_tty_info *info)
+{
+	int ret = 0;
+
+	IRDA_DEBUG(2, "%s: state=%s, event=%s\n", __FUNCTION__ ,
+		   ircomm_tty_state[self->state], ircomm_tty_event[event]);
+
+	switch (event) {
+	case IRCOMM_TTY_GOT_PARAMETERS:
+		if (self->iriap) {
+			IRDA_WARNING("%s(), busy with a previous query\n",
+				     __FUNCTION__);
+			return -EBUSY;
+		}
+		
+		self->iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self,
+					 ircomm_tty_getvalue_confirm);
+
+		iriap_getvaluebyclass_request(self->iriap, self->saddr, 
+					      self->daddr, "IrDA:IrCOMM", 
+					      "IrDA:TinyTP:LsapSel");
+
+		ircomm_tty_start_watchdog_timer(self, 3*HZ);
+		ircomm_tty_next_state(self, IRCOMM_TTY_QUERY_LSAP_SEL);
+		break;
+	case IRCOMM_TTY_WD_TIMER_EXPIRED:
+		/* Go back to search mode */
+		ircomm_tty_next_state(self, IRCOMM_TTY_SEARCH);
+		ircomm_tty_start_watchdog_timer(self, 3*HZ); 
+		break;
+	case IRCOMM_TTY_CONNECT_INDICATION:
+		del_timer(&self->watchdog_timer);
+		ircomm_tty_ias_unregister(self);
+
+		/* Accept connection */
+		ircomm_connect_response(self->ircomm, NULL);
+		ircomm_tty_next_state(self, IRCOMM_TTY_READY);
+		break;
+	case IRCOMM_TTY_DETACH_CABLE:
+		ircomm_tty_next_state(self, IRCOMM_TTY_IDLE);
+		break;
+	default:
+		IRDA_DEBUG(2, "%s(), unknown event: %s\n", __FUNCTION__ ,
+			   ircomm_tty_event[event]);
+		ret = -EINVAL;
+	}
+	return ret;
+}
+
+/*
+ * Function ircomm_tty_state_query_lsap_sel (self, event, skb, info)
+ *
+ *    Query remote LM-IAS for the LSAP selector which we can connect to
+ *
+ */
+static int ircomm_tty_state_query_lsap_sel(struct ircomm_tty_cb *self, 
+					   IRCOMM_TTY_EVENT event, 
+					   struct sk_buff *skb, 
+					   struct ircomm_tty_info *info)
+{
+	int ret = 0;
+
+	IRDA_DEBUG(2, "%s: state=%s, event=%s\n", __FUNCTION__ ,
+		   ircomm_tty_state[self->state], ircomm_tty_event[event]);
+
+	switch (event) {
+	case IRCOMM_TTY_GOT_LSAPSEL:
+		/* Connect to remote device */
+		ret = ircomm_connect_request(self->ircomm, self->dlsap_sel,
+					     self->saddr, self->daddr, 
+					     NULL, self->service_type);
+		ircomm_tty_start_watchdog_timer(self, 3*HZ);
+		ircomm_tty_next_state(self, IRCOMM_TTY_SETUP);
+		break;
+	case IRCOMM_TTY_WD_TIMER_EXPIRED:
+		/* Go back to search mode */
+		ircomm_tty_next_state(self, IRCOMM_TTY_SEARCH);
+		ircomm_tty_start_watchdog_timer(self, 3*HZ);
+		break;
+	case IRCOMM_TTY_CONNECT_INDICATION:
+		del_timer(&self->watchdog_timer);
+		ircomm_tty_ias_unregister(self);
+
+		/* Accept connection */
+		ircomm_connect_response(self->ircomm, NULL);
+		ircomm_tty_next_state(self, IRCOMM_TTY_READY);
+		break;
+	case IRCOMM_TTY_DETACH_CABLE:
+		ircomm_tty_next_state(self, IRCOMM_TTY_IDLE);
+		break;
+	default:
+		IRDA_DEBUG(2, "%s(), unknown event: %s\n", __FUNCTION__ ,
+			   ircomm_tty_event[event]);
+		ret = -EINVAL;
+	}
+	return ret;
+}
+
+/*
+ * Function ircomm_tty_state_setup (self, event, skb, info)
+ *
+ *    Trying to connect
+ *
+ */
+static int ircomm_tty_state_setup(struct ircomm_tty_cb *self, 
+				  IRCOMM_TTY_EVENT event, 
+				  struct sk_buff *skb, 
+				  struct ircomm_tty_info *info)
+{
+	int ret = 0;
+
+	IRDA_DEBUG(2, "%s: state=%s, event=%s\n", __FUNCTION__ ,
+		   ircomm_tty_state[self->state], ircomm_tty_event[event]);
+
+	switch (event) {
+	case IRCOMM_TTY_CONNECT_CONFIRM:
+		del_timer(&self->watchdog_timer);
+		ircomm_tty_ias_unregister(self);
+		
+		/* 
+		 * Send initial parameters. This will also send out queued
+		 * parameters waiting for the connection to come up 
+		 */
+		ircomm_tty_send_initial_parameters(self);
+		ircomm_tty_link_established(self);
+		ircomm_tty_next_state(self, IRCOMM_TTY_READY);
+		break;
+	case IRCOMM_TTY_CONNECT_INDICATION:
+		del_timer(&self->watchdog_timer);
+		ircomm_tty_ias_unregister(self);
+		
+		/* Accept connection */
+		ircomm_connect_response(self->ircomm, NULL);
+		ircomm_tty_next_state(self, IRCOMM_TTY_READY);
+		break;
+	case IRCOMM_TTY_WD_TIMER_EXPIRED:
+		/* Go back to search mode */
+		ircomm_tty_next_state(self, IRCOMM_TTY_SEARCH);
+		ircomm_tty_start_watchdog_timer(self, 3*HZ);
+		break;
+	case IRCOMM_TTY_DETACH_CABLE:
+		/* ircomm_disconnect_request(self->ircomm, NULL); */
+		ircomm_tty_next_state(self, IRCOMM_TTY_IDLE);
+		break;
+	default:
+		IRDA_DEBUG(2, "%s(), unknown event: %s\n", __FUNCTION__ ,
+			   ircomm_tty_event[event]);
+		ret = -EINVAL;
+	}
+	return ret;
+}
+
+/*
+ * Function ircomm_tty_state_ready (self, event, skb, info)
+ *
+ *    IrCOMM is now connected
+ *
+ */
+static int ircomm_tty_state_ready(struct ircomm_tty_cb *self, 
+				  IRCOMM_TTY_EVENT event, 
+				  struct sk_buff *skb, 
+				  struct ircomm_tty_info *info)
+{
+	int ret = 0;
+
+	switch (event) {
+	case IRCOMM_TTY_DATA_REQUEST:
+		ret = ircomm_data_request(self->ircomm, skb);
+		break;		
+	case IRCOMM_TTY_DETACH_CABLE:
+		ircomm_disconnect_request(self->ircomm, NULL);
+		ircomm_tty_next_state(self, IRCOMM_TTY_IDLE);
+		break;
+	case IRCOMM_TTY_DISCONNECT_INDICATION:
+		ircomm_tty_ias_register(self);
+		ircomm_tty_next_state(self, IRCOMM_TTY_SEARCH);
+		ircomm_tty_start_watchdog_timer(self, 3*HZ);
+
+		if (self->flags & ASYNC_CHECK_CD) {
+			/* Drop carrier */
+			self->settings.dce = IRCOMM_DELTA_CD;
+			ircomm_tty_check_modem_status(self);
+		} else {
+			IRDA_DEBUG(0, "%s(), hanging up!\n", __FUNCTION__ );
+			if (self->tty)
+				tty_hangup(self->tty);
+		}
+		break;
+	default:
+		IRDA_DEBUG(2, "%s(), unknown event: %s\n", __FUNCTION__ ,
+			   ircomm_tty_event[event]);
+		ret = -EINVAL;
+	}
+	return ret;
+}
+
diff --git a/net/irda/ircomm/ircomm_tty_ioctl.c b/net/irda/ircomm/ircomm_tty_ioctl.c
new file mode 100644
index 00000000000..197e3e7ed7e
--- /dev/null
+++ b/net/irda/ircomm/ircomm_tty_ioctl.c
@@ -0,0 +1,428 @@
+/*********************************************************************
+ *                
+ * Filename:      ircomm_tty_ioctl.c
+ * Version:       
+ * Description:   
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Thu Jun 10 14:39:09 1999
+ * Modified at:   Wed Jan  5 14:45:43 2000
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved.
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ * 
+ *     This program is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *     GNU General Public License for more details.
+ * 
+ *     You should have received a copy of the GNU General Public License 
+ *     along with this program; if not, write to the Free Software 
+ *     Foundation, Inc., 59 Temple Place, Suite 330, Boston, 
+ *     MA 02111-1307 USA
+ *     
+ ********************************************************************/
+
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/termios.h>
+#include <linux/tty.h>
+#include <linux/serial.h>
+
+#include <asm/uaccess.h>
+
+#include <net/irda/irda.h>
+#include <net/irda/irmod.h>
+
+#include <net/irda/ircomm_core.h>
+#include <net/irda/ircomm_param.h>
+#include <net/irda/ircomm_tty_attach.h>
+#include <net/irda/ircomm_tty.h>
+
+#define RELEVANT_IFLAG(iflag) (iflag & (IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK))
+
+/*
+ * Function ircomm_tty_change_speed (driver)
+ *
+ *    Change speed of the driver. If the remote device is a DCE, then this
+ *    should make it change the speed of its serial port
+ */
+static void ircomm_tty_change_speed(struct ircomm_tty_cb *self)
+{
+	unsigned cflag, cval;
+	int baud;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	if (!self->tty || !self->tty->termios || !self->ircomm)
+		return;
+
+	cflag = self->tty->termios->c_cflag;
+
+	/*  byte size and parity */
+	switch (cflag & CSIZE) {
+	case CS5: cval = IRCOMM_WSIZE_5; break;
+	case CS6: cval = IRCOMM_WSIZE_6; break;
+	case CS7: cval = IRCOMM_WSIZE_7; break;
+	case CS8: cval = IRCOMM_WSIZE_8; break;
+	default:  cval = IRCOMM_WSIZE_5; break;
+	}
+	if (cflag & CSTOPB)
+		cval |= IRCOMM_2_STOP_BIT;
+	
+	if (cflag & PARENB)
+		cval |= IRCOMM_PARITY_ENABLE;
+	if (!(cflag & PARODD))
+		cval |= IRCOMM_PARITY_EVEN;
+
+	/* Determine divisor based on baud rate */
+	baud = tty_get_baud_rate(self->tty);
+	if (!baud)
+		baud = 9600;	/* B0 transition handled in rs_set_termios */
+
+	self->settings.data_rate = baud;
+	ircomm_param_request(self, IRCOMM_DATA_RATE, FALSE);
+	
+	/* CTS flow control flag and modem status interrupts */
+	if (cflag & CRTSCTS) {
+		self->flags |= ASYNC_CTS_FLOW;
+		self->settings.flow_control |= IRCOMM_RTS_CTS_IN;
+		/* This got me. Bummer. Jean II */
+		if (self->service_type == IRCOMM_3_WIRE_RAW)
+			IRDA_WARNING("%s(), enabling RTS/CTS on link that doesn't support it (3-wire-raw)\n", __FUNCTION__);
+	} else {
+		self->flags &= ~ASYNC_CTS_FLOW;
+		self->settings.flow_control &= ~IRCOMM_RTS_CTS_IN;
+	}
+	if (cflag & CLOCAL)
+		self->flags &= ~ASYNC_CHECK_CD;
+	else
+		self->flags |= ASYNC_CHECK_CD;
+#if 0	
+	/*
+	 * Set up parity check flag
+	 */
+
+	if (I_INPCK(self->tty))
+		driver->read_status_mask |= LSR_FE | LSR_PE;
+	if (I_BRKINT(driver->tty) || I_PARMRK(driver->tty))
+		driver->read_status_mask |= LSR_BI;
+	
+	/*
+	 * Characters to ignore
+	 */
+	driver->ignore_status_mask = 0;
+	if (I_IGNPAR(driver->tty))
+		driver->ignore_status_mask |= LSR_PE | LSR_FE;
+
+	if (I_IGNBRK(self->tty)) {
+		self->ignore_status_mask |= LSR_BI;
+		/*
+		 * If we're ignore parity and break indicators, ignore 
+		 * overruns too. (For real raw support).
+		 */
+		if (I_IGNPAR(self->tty)) 
+			self->ignore_status_mask |= LSR_OE;
+	}
+#endif
+	self->settings.data_format = cval;
+
+	ircomm_param_request(self, IRCOMM_DATA_FORMAT, FALSE);
+ 	ircomm_param_request(self, IRCOMM_FLOW_CONTROL, TRUE);
+}
+
+/*
+ * Function ircomm_tty_set_termios (tty, old_termios)
+ *
+ *    This routine allows the tty driver to be notified when device's
+ *    termios settings have changed.  Note that a well-designed tty driver
+ *    should be prepared to accept the case where old == NULL, and try to
+ *    do something rational.
+ */
+void ircomm_tty_set_termios(struct tty_struct *tty, 
+			    struct termios *old_termios)
+{
+	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
+	unsigned int cflag = tty->termios->c_cflag;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	if ((cflag == old_termios->c_cflag) && 
+	    (RELEVANT_IFLAG(tty->termios->c_iflag) == 
+	     RELEVANT_IFLAG(old_termios->c_iflag)))
+	{
+		return;
+	}
+
+	ircomm_tty_change_speed(self);
+
+	/* Handle transition to B0 status */
+	if ((old_termios->c_cflag & CBAUD) &&
+	    !(cflag & CBAUD)) {
+		self->settings.dte &= ~(IRCOMM_DTR|IRCOMM_RTS);
+		ircomm_param_request(self, IRCOMM_DTE, TRUE);
+	}
+	
+	/* Handle transition away from B0 status */
+	if (!(old_termios->c_cflag & CBAUD) &&
+	    (cflag & CBAUD)) {
+		self->settings.dte |= IRCOMM_DTR;
+		if (!(tty->termios->c_cflag & CRTSCTS) || 
+		    !test_bit(TTY_THROTTLED, &tty->flags)) {
+			self->settings.dte |= IRCOMM_RTS;
+		}
+		ircomm_param_request(self, IRCOMM_DTE, TRUE);
+	}
+	
+	/* Handle turning off CRTSCTS */
+	if ((old_termios->c_cflag & CRTSCTS) &&
+	    !(tty->termios->c_cflag & CRTSCTS)) 
+	{
+		tty->hw_stopped = 0;
+		ircomm_tty_start(tty);
+	}
+}
+
+/*
+ * Function ircomm_tty_tiocmget (tty, file)
+ *
+ *    
+ *
+ */
+int ircomm_tty_tiocmget(struct tty_struct *tty, struct file *file)
+{
+	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
+	unsigned int result;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	if (tty->flags & (1 << TTY_IO_ERROR))
+		return -EIO;
+
+	result =  ((self->settings.dte & IRCOMM_RTS) ? TIOCM_RTS : 0)
+		| ((self->settings.dte & IRCOMM_DTR) ? TIOCM_DTR : 0)
+		| ((self->settings.dce & IRCOMM_CD)  ? TIOCM_CAR : 0)
+		| ((self->settings.dce & IRCOMM_RI)  ? TIOCM_RNG : 0)
+		| ((self->settings.dce & IRCOMM_DSR) ? TIOCM_DSR : 0)
+		| ((self->settings.dce & IRCOMM_CTS) ? TIOCM_CTS : 0);
+	return result;
+}
+
+/*
+ * Function ircomm_tty_tiocmset (tty, file, set, clear)
+ *
+ *    
+ *
+ */
+int ircomm_tty_tiocmset(struct tty_struct *tty, struct file *file,
+			unsigned int set, unsigned int clear)
+{ 
+	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	if (tty->flags & (1 << TTY_IO_ERROR))
+		return -EIO;
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
+
+	if (set & TIOCM_RTS)
+		self->settings.dte |= IRCOMM_RTS;
+	if (set & TIOCM_DTR)
+		self->settings.dte |= IRCOMM_DTR;
+
+	if (clear & TIOCM_RTS)
+		self->settings.dte &= ~IRCOMM_RTS;
+	if (clear & TIOCM_DTR)
+		self->settings.dte &= ~IRCOMM_DTR;
+
+	if ((set|clear) & TIOCM_RTS)
+		self->settings.dte |= IRCOMM_DELTA_RTS;
+	if ((set|clear) & TIOCM_DTR)
+		self->settings.dte |= IRCOMM_DELTA_DTR;
+
+	ircomm_param_request(self, IRCOMM_DTE, TRUE);
+	
+	return 0;
+}
+
+/*
+ * Function get_serial_info (driver, retinfo)
+ *
+ *    
+ *
+ */
+static int ircomm_tty_get_serial_info(struct ircomm_tty_cb *self,
+				      struct serial_struct __user *retinfo)
+{
+	struct serial_struct info;
+   
+	if (!retinfo)
+		return -EFAULT;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	memset(&info, 0, sizeof(info));
+	info.line = self->line;
+	info.flags = self->flags;
+	info.baud_base = self->settings.data_rate;
+	info.close_delay = self->close_delay;
+	info.closing_wait = self->closing_wait;
+
+	/* For compatibility  */
+ 	info.type = PORT_16550A;
+ 	info.port = 0;
+ 	info.irq = 0;
+	info.xmit_fifo_size = 0;
+	info.hub6 = 0;   
+	info.custom_divisor = 0;
+
+	if (copy_to_user(retinfo, &info, sizeof(*retinfo)))
+		return -EFAULT;
+
+	return 0;
+}
+
+/*
+ * Function set_serial_info (driver, new_info)
+ *
+ *    
+ *
+ */
+static int ircomm_tty_set_serial_info(struct ircomm_tty_cb *self,
+				      struct serial_struct __user *new_info)
+{
+#if 0
+	struct serial_struct new_serial;
+	struct ircomm_tty_cb old_state, *state;
+
+	IRDA_DEBUG(0, "%s()\n", __FUNCTION__ );
+
+	if (copy_from_user(&new_serial,new_info,sizeof(new_serial)))
+		return -EFAULT;
+
+
+	state = self
+	old_state = *self;
+  
+	if (!capable(CAP_SYS_ADMIN)) {
+		if ((new_serial.baud_base != state->settings.data_rate) ||
+		    (new_serial.close_delay != state->close_delay) ||
+		    ((new_serial.flags & ~ASYNC_USR_MASK) !=
+		     (self->flags & ~ASYNC_USR_MASK)))
+			return -EPERM;
+		state->flags = ((state->flags & ~ASYNC_USR_MASK) |
+				 (new_serial.flags & ASYNC_USR_MASK));
+		self->flags = ((self->flags & ~ASYNC_USR_MASK) |
+			       (new_serial.flags & ASYNC_USR_MASK));
+		/* self->custom_divisor = new_serial.custom_divisor; */
+		goto check_and_exit;
+	}
+
+	/*
+	 * OK, past this point, all the error checking has been done.
+	 * At this point, we start making changes.....
+	 */
+
+	if (self->settings.data_rate != new_serial.baud_base) {
+		self->settings.data_rate = new_serial.baud_base;
+		ircomm_param_request(self, IRCOMM_DATA_RATE, TRUE);
+	}
+
+	self->close_delay = new_serial.close_delay * HZ/100;
+	self->closing_wait = new_serial.closing_wait * HZ/100;
+	/* self->custom_divisor = new_serial.custom_divisor; */
+
+	self->flags = ((self->flags & ~ASYNC_FLAGS) |
+		       (new_serial.flags & ASYNC_FLAGS));
+	self->tty->low_latency = (self->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
+
+ check_and_exit:
+
+	if (self->flags & ASYNC_INITIALIZED) {
+		if (((old_state.flags & ASYNC_SPD_MASK) !=
+		     (self->flags & ASYNC_SPD_MASK)) ||
+		    (old_driver.custom_divisor != driver->custom_divisor)) {
+			if ((driver->flags & ASYNC_SPD_MASK) == ASYNC_SPD_HI)
+				driver->tty->alt_speed = 57600;
+			if ((driver->flags & ASYNC_SPD_MASK) == ASYNC_SPD_VHI)
+				driver->tty->alt_speed = 115200;
+			if ((driver->flags & ASYNC_SPD_MASK) == ASYNC_SPD_SHI)
+				driver->tty->alt_speed = 230400;
+			if ((driver->flags & ASYNC_SPD_MASK) == ASYNC_SPD_WARP)
+				driver->tty->alt_speed = 460800;
+			ircomm_tty_change_speed(driver);
+		}
+	}
+#endif
+	return 0;
+}
+
+/*
+ * Function ircomm_tty_ioctl (tty, file, cmd, arg)
+ *
+ *    
+ *
+ */
+int ircomm_tty_ioctl(struct tty_struct *tty, struct file *file, 
+		     unsigned int cmd, unsigned long arg)
+{
+	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
+	int ret = 0;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	if ((cmd != TIOCGSERIAL) && (cmd != TIOCSSERIAL) &&
+	    (cmd != TIOCSERCONFIG) && (cmd != TIOCSERGSTRUCT) &&
+	    (cmd != TIOCMIWAIT) && (cmd != TIOCGICOUNT)) {
+		if (tty->flags & (1 << TTY_IO_ERROR))
+		    return -EIO;
+	}
+
+	switch (cmd) {
+	case TIOCGSERIAL:
+		ret = ircomm_tty_get_serial_info(self, (struct serial_struct __user *) arg);
+		break;
+	case TIOCSSERIAL:
+		ret = ircomm_tty_set_serial_info(self, (struct serial_struct __user *) arg);
+		break;
+	case TIOCMIWAIT:
+		IRDA_DEBUG(0, "(), TIOCMIWAIT, not impl!\n");
+		break;
+
+	case TIOCGICOUNT:
+		IRDA_DEBUG(0, "%s(), TIOCGICOUNT not impl!\n", __FUNCTION__ );
+#if 0
+		save_flags(flags); cli();
+		cnow = driver->icount;
+		restore_flags(flags);
+		p_cuser = (struct serial_icounter_struct __user *) arg;
+		if (put_user(cnow.cts, &p_cuser->cts) ||
+		    put_user(cnow.dsr, &p_cuser->dsr) ||
+		    put_user(cnow.rng, &p_cuser->rng) ||
+		    put_user(cnow.dcd, &p_cuser->dcd) ||
+		    put_user(cnow.rx, &p_cuser->rx) ||
+		    put_user(cnow.tx, &p_cuser->tx) ||
+		    put_user(cnow.frame, &p_cuser->frame) ||
+		    put_user(cnow.overrun, &p_cuser->overrun) ||
+		    put_user(cnow.parity, &p_cuser->parity) ||
+		    put_user(cnow.brk, &p_cuser->brk) ||
+		    put_user(cnow.buf_overrun, &p_cuser->buf_overrun))
+			return -EFAULT;
+#endif		
+		return 0;
+	default:
+		ret = -ENOIOCTLCMD;  /* ioctls which we must ignore */
+	}
+	return ret;
+}
+
+
+
diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c
new file mode 100644
index 00000000000..fda299e300c
--- /dev/null
+++ b/net/irda/irda_device.c
@@ -0,0 +1,489 @@
+/*********************************************************************
+ *
+ * Filename:      irda_device.c
+ * Version:       0.9
+ * Description:   Utility functions used by the device drivers
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Sat Oct  9 09:22:27 1999
+ * Modified at:   Sun Jan 23 17:41:24 2000
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ *
+ *     Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved.
+ *     Copyright (c) 2000-2001 Jean Tourrilhes <jt@hpl.hp.com>
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of
+ *     the License, or (at your option) any later version.
+ *
+ *     This program is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *     GNU General Public License for more details.
+ *
+ *     You should have received a copy of the GNU General Public License
+ *     along with this program; if not, write to the Free Software
+ *     Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ *     MA 02111-1307 USA
+ *
+ ********************************************************************/
+
+#include <linux/config.h>
+#include <linux/string.h>
+#include <linux/proc_fs.h>
+#include <linux/skbuff.h>
+#include <linux/if.h>
+#include <linux/if_ether.h>
+#include <linux/if_arp.h>
+#include <linux/netdevice.h>
+#include <linux/init.h>
+#include <linux/tty.h>
+#include <linux/kmod.h>
+#include <linux/spinlock.h>
+
+#include <asm/ioctls.h>
+#include <asm/uaccess.h>
+#include <asm/dma.h>
+#include <asm/io.h>
+
+#include <net/irda/irda_device.h>
+#include <net/irda/irlap.h>
+#include <net/irda/timer.h>
+#include <net/irda/wrapper.h>
+
+static void __irda_task_delete(struct irda_task *task);
+
+static hashbin_t *dongles = NULL;
+static hashbin_t *tasks = NULL;
+
+#ifdef CONFIG_IRDA_DEBUG
+static const char *task_state[] = {
+	"IRDA_TASK_INIT",
+	"IRDA_TASK_DONE",
+	"IRDA_TASK_WAIT",
+	"IRDA_TASK_WAIT1",
+	"IRDA_TASK_WAIT2",
+	"IRDA_TASK_WAIT3",
+	"IRDA_TASK_CHILD_INIT",
+	"IRDA_TASK_CHILD_WAIT",
+	"IRDA_TASK_CHILD_DONE",
+};
+#endif	/* CONFIG_IRDA_DEBUG */
+
+static void irda_task_timer_expired(void *data);
+
+int __init irda_device_init( void)
+{
+	dongles = hashbin_new(HB_NOLOCK);
+	if (dongles == NULL) {
+		IRDA_WARNING("IrDA: Can't allocate dongles hashbin!\n");
+		return -ENOMEM;
+	}
+	spin_lock_init(&dongles->hb_spinlock);
+
+	tasks = hashbin_new(HB_LOCK);
+	if (tasks == NULL) {
+		IRDA_WARNING("IrDA: Can't allocate tasks hashbin!\n");
+		hashbin_delete(dongles, NULL);
+		return -ENOMEM;
+	}
+
+	/* We no longer initialise the driver ourselves here, we let
+	 * the system do it for us... - Jean II */
+
+	return 0;
+}
+
+static void __exit leftover_dongle(void *arg)
+{
+	struct dongle_reg *reg = arg;
+	IRDA_WARNING("IrDA: Dongle type %x not unregistered\n",
+		     reg->type);
+}
+
+void __exit irda_device_cleanup(void)
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	hashbin_delete(tasks, (FREE_FUNC) __irda_task_delete);
+
+	hashbin_delete(dongles, leftover_dongle);
+}
+
+/*
+ * Function irda_device_set_media_busy (self, status)
+ *
+ *    Called when we have detected that another station is transmitting
+ *    in contention mode.
+ */
+void irda_device_set_media_busy(struct net_device *dev, int status)
+{
+	struct irlap_cb *self;
+
+	IRDA_DEBUG(4, "%s(%s)\n", __FUNCTION__, status ? "TRUE" : "FALSE");
+
+	self = (struct irlap_cb *) dev->atalk_ptr;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+
+	if (status) {
+		self->media_busy = TRUE;
+		if (status == SMALL)
+			irlap_start_mbusy_timer(self, SMALLBUSY_TIMEOUT);
+		else
+			irlap_start_mbusy_timer(self, MEDIABUSY_TIMEOUT);
+		IRDA_DEBUG( 4, "Media busy!\n");
+	} else {
+		self->media_busy = FALSE;
+		irlap_stop_mbusy_timer(self);
+	}
+}
+EXPORT_SYMBOL(irda_device_set_media_busy);
+
+
+/*
+ * Function irda_device_is_receiving (dev)
+ *
+ *    Check if the device driver is currently receiving data
+ *
+ */
+int irda_device_is_receiving(struct net_device *dev)
+{
+	struct if_irda_req req;
+	int ret;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
+
+	if (!dev->do_ioctl) {
+		IRDA_ERROR("%s: do_ioctl not impl. by device driver\n",
+			   __FUNCTION__);
+		return -1;
+	}
+
+	ret = dev->do_ioctl(dev, (struct ifreq *) &req, SIOCGRECEIVING);
+	if (ret < 0)
+		return ret;
+
+	return req.ifr_receiving;
+}
+
+void irda_task_next_state(struct irda_task *task, IRDA_TASK_STATE state)
+{
+	IRDA_DEBUG(2, "%s(), state = %s\n", __FUNCTION__, task_state[state]);
+
+	task->state = state;
+}
+EXPORT_SYMBOL(irda_task_next_state);
+
+static void __irda_task_delete(struct irda_task *task)
+{
+	del_timer(&task->timer);
+
+	kfree(task);
+}
+
+void irda_task_delete(struct irda_task *task)
+{
+	/* Unregister task */
+	hashbin_remove(tasks, (long) task, NULL);
+
+	__irda_task_delete(task);
+}
+EXPORT_SYMBOL(irda_task_delete);
+
+/*
+ * Function irda_task_kick (task)
+ *
+ *    Tries to execute a task possible multiple times until the task is either
+ *    finished, or askes for a timeout. When a task is finished, we do post
+ *    processing, and notify the parent task, that is waiting for this task
+ *    to complete.
+ */
+static int irda_task_kick(struct irda_task *task)
+{
+	int finished = TRUE;
+	int count = 0;
+	int timeout;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(task != NULL, return -1;);
+	IRDA_ASSERT(task->magic == IRDA_TASK_MAGIC, return -1;);
+
+	/* Execute task until it's finished, or askes for a timeout */
+	do {
+		timeout = task->function(task);
+		if (count++ > 100) {
+			IRDA_ERROR("%s: error in task handler!\n",
+				   __FUNCTION__);
+			irda_task_delete(task);
+			return TRUE;
+		}
+	} while ((timeout == 0) && (task->state != IRDA_TASK_DONE));
+
+	if (timeout < 0) {
+		IRDA_ERROR("%s: Error executing task!\n", __FUNCTION__);
+		irda_task_delete(task);
+		return TRUE;
+	}
+
+	/* Check if we are finished */
+	if (task->state == IRDA_TASK_DONE) {
+		del_timer(&task->timer);
+
+		/* Do post processing */
+		if (task->finished)
+			task->finished(task);
+
+		/* Notify parent */
+		if (task->parent) {
+			/* Check if parent is waiting for us to complete */
+			if (task->parent->state == IRDA_TASK_CHILD_WAIT) {
+				task->parent->state = IRDA_TASK_CHILD_DONE;
+
+				/* Stop timer now that we are here */
+				del_timer(&task->parent->timer);
+
+				/* Kick parent task */
+				irda_task_kick(task->parent);
+			}
+		}
+		irda_task_delete(task);
+	} else if (timeout > 0) {
+		irda_start_timer(&task->timer, timeout, (void *) task,
+				 irda_task_timer_expired);
+		finished = FALSE;
+	} else {
+		IRDA_DEBUG(0, "%s(), not finished, and no timeout!\n",
+			   __FUNCTION__);
+		finished = FALSE;
+	}
+
+	return finished;
+}
+
+/*
+ * Function irda_task_execute (instance, function, finished)
+ *
+ *    This function registers and tries to execute tasks that may take some
+ *    time to complete. We do it this hairy way since we may have been
+ *    called from interrupt context, so it's not possible to use
+ *    schedule_timeout()
+ * Two important notes :
+ *	o Make sure you irda_task_delete(task); in case you delete the
+ *	  calling instance.
+ *	o No real need to lock when calling this function, but you may
+ *	  want to lock within the task handler.
+ * Jean II
+ */
+struct irda_task *irda_task_execute(void *instance,
+				    IRDA_TASK_CALLBACK function,
+				    IRDA_TASK_CALLBACK finished,
+				    struct irda_task *parent, void *param)
+{
+	struct irda_task *task;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
+
+	task = kmalloc(sizeof(struct irda_task), GFP_ATOMIC);
+	if (!task)
+		return NULL;
+
+	task->state    = IRDA_TASK_INIT;
+	task->instance = instance;
+	task->function = function;
+	task->finished = finished;
+	task->parent   = parent;
+	task->param    = param;
+	task->magic    = IRDA_TASK_MAGIC;
+
+	init_timer(&task->timer);
+
+	/* Register task */
+	hashbin_insert(tasks, (irda_queue_t *) task, (long) task, NULL);
+
+	/* No time to waste, so lets get going! */
+	return irda_task_kick(task) ? NULL : task;
+}
+EXPORT_SYMBOL(irda_task_execute);
+
+/*
+ * Function irda_task_timer_expired (data)
+ *
+ *    Task time has expired. We now try to execute task (again), and restart
+ *    the timer if the task has not finished yet
+ */
+static void irda_task_timer_expired(void *data)
+{
+	struct irda_task *task;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
+
+	task = (struct irda_task *) data;
+
+	irda_task_kick(task);
+}
+
+/*
+ * Function irda_device_setup (dev)
+ *
+ *    This function should be used by low level device drivers in a similar way
+ *    as ether_setup() is used by normal network device drivers
+ */
+static void irda_device_setup(struct net_device *dev)
+{
+        dev->hard_header_len = 0;
+        dev->addr_len        = 0;
+
+        dev->type            = ARPHRD_IRDA;
+        dev->tx_queue_len    = 8; /* Window size + 1 s-frame */
+
+	memset(dev->broadcast, 0xff, 4);
+
+	dev->mtu = 2048;
+	dev->flags = IFF_NOARP;
+}
+
+/*
+ * Funciton  alloc_irdadev 
+ * 	Allocates and sets up an IRDA device in a manner similar to
+ * 	alloc_etherdev.
+ */
+struct net_device *alloc_irdadev(int sizeof_priv)
+{
+	return alloc_netdev(sizeof_priv, "irda%d", irda_device_setup);
+}
+EXPORT_SYMBOL(alloc_irdadev);
+
+/*
+ * Function irda_device_init_dongle (self, type, qos)
+ *
+ *    Initialize attached dongle.
+ *
+ * Important : request_module require us to call this function with
+ * a process context and irq enabled. - Jean II
+ */
+dongle_t *irda_device_dongle_init(struct net_device *dev, int type)
+{
+	struct dongle_reg *reg;
+	dongle_t *dongle = NULL;
+
+	might_sleep();
+
+	spin_lock(&dongles->hb_spinlock);
+	reg = hashbin_find(dongles, type, NULL);
+
+#ifdef CONFIG_KMOD
+	/* Try to load the module needed */
+	if (!reg && capable(CAP_SYS_MODULE)) {
+		spin_unlock(&dongles->hb_spinlock);
+	
+		request_module("irda-dongle-%d", type);
+		
+		spin_lock(&dongles->hb_spinlock);
+		reg = hashbin_find(dongles, type, NULL);
+	}
+#endif
+
+	if (!reg || !try_module_get(reg->owner) ) {
+		IRDA_ERROR("IrDA: Unable to find requested dongle type %x\n",
+			   type);
+		goto out;
+	}
+
+	/* Allocate dongle info for this instance */
+	dongle = kmalloc(sizeof(dongle_t), GFP_KERNEL);
+	if (!dongle)
+		goto out;
+
+	memset(dongle, 0, sizeof(dongle_t));
+
+	/* Bind the registration info to this particular instance */
+	dongle->issue = reg;
+	dongle->dev = dev;
+
+ out:
+	spin_unlock(&dongles->hb_spinlock);
+	return dongle;
+}
+EXPORT_SYMBOL(irda_device_dongle_init);
+
+/*
+ * Function irda_device_dongle_cleanup (dongle)
+ */
+int irda_device_dongle_cleanup(dongle_t *dongle)
+{
+	IRDA_ASSERT(dongle != NULL, return -1;);
+
+	dongle->issue->close(dongle);
+	module_put(dongle->issue->owner);
+	kfree(dongle);
+
+	return 0;
+}
+EXPORT_SYMBOL(irda_device_dongle_cleanup);
+
+/*
+ * Function irda_device_register_dongle (dongle)
+ */
+int irda_device_register_dongle(struct dongle_reg *new)
+{
+	spin_lock(&dongles->hb_spinlock);
+	/* Check if this dongle has been registered before */
+	if (hashbin_find(dongles, new->type, NULL)) {
+		IRDA_MESSAGE("%s: Dongle type %x already registered\n", 
+			     __FUNCTION__, new->type);
+        } else {
+		/* Insert IrDA dongle into hashbin */
+		hashbin_insert(dongles, (irda_queue_t *) new, new->type, NULL);
+	}
+	spin_unlock(&dongles->hb_spinlock);
+
+        return 0;
+}
+EXPORT_SYMBOL(irda_device_register_dongle);
+
+/*
+ * Function irda_device_unregister_dongle (dongle)
+ *
+ *    Unregister dongle, and remove dongle from list of registered dongles
+ *
+ */
+void irda_device_unregister_dongle(struct dongle_reg *dongle)
+{
+	struct dongle *node;
+
+	spin_lock(&dongles->hb_spinlock);
+	node = hashbin_remove(dongles, dongle->type, NULL);
+	if (!node) 
+		IRDA_ERROR("%s: dongle not found!\n", __FUNCTION__);
+	spin_unlock(&dongles->hb_spinlock);
+}
+EXPORT_SYMBOL(irda_device_unregister_dongle);
+
+#ifdef CONFIG_ISA
+/*
+ * Function setup_dma (idev, buffer, count, mode)
+ *
+ *    Setup the DMA channel. Commonly used by ISA FIR drivers
+ *
+ */
+void irda_setup_dma(int channel, dma_addr_t buffer, int count, int mode)
+{
+	unsigned long flags;
+
+	flags = claim_dma_lock();
+
+	disable_dma(channel);
+	clear_dma_ff(channel);
+	set_dma_mode(channel, mode);
+	set_dma_addr(channel, buffer);
+	set_dma_count(channel, count);
+	enable_dma(channel);
+
+	release_dma_lock(flags);
+}
+EXPORT_SYMBOL(irda_setup_dma);
+#endif
diff --git a/net/irda/iriap.c b/net/irda/iriap.c
new file mode 100644
index 00000000000..b8bb78af8b8
--- /dev/null
+++ b/net/irda/iriap.c
@@ -0,0 +1,1089 @@
+/*********************************************************************
+ *
+ * Filename:      iriap.c
+ * Version:       0.8
+ * Description:   Information Access Protocol (IAP)
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Thu Aug 21 00:02:07 1997
+ * Modified at:   Sat Dec 25 16:42:42 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ *
+ *     Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>,
+ *     All Rights Reserved.
+ *     Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of
+ *     the License, or (at your option) any later version.
+ *
+ *     Neither Dag Brattli nor University of Troms� admit liability nor
+ *     provide warranty for any of this software. This material is
+ *     provided "AS-IS" and at no charge.
+ *
+ ********************************************************************/
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+
+#include <asm/byteorder.h>
+#include <asm/unaligned.h>
+
+#include <net/irda/irda.h>
+#include <net/irda/irttp.h>
+#include <net/irda/irlmp.h>
+#include <net/irda/irias_object.h>
+#include <net/irda/iriap_event.h>
+#include <net/irda/iriap.h>
+
+#ifdef CONFIG_IRDA_DEBUG
+/* FIXME: This one should go in irlmp.c */
+static const char *ias_charset_types[] = {
+	"CS_ASCII",
+	"CS_ISO_8859_1",
+	"CS_ISO_8859_2",
+	"CS_ISO_8859_3",
+	"CS_ISO_8859_4",
+	"CS_ISO_8859_5",
+	"CS_ISO_8859_6",
+	"CS_ISO_8859_7",
+	"CS_ISO_8859_8",
+	"CS_ISO_8859_9",
+	"CS_UNICODE"
+};
+#endif	/* CONFIG_IRDA_DEBUG */
+
+static hashbin_t *iriap = NULL;
+static void *service_handle;
+
+static void __iriap_close(struct iriap_cb *self);
+static int iriap_register_lsap(struct iriap_cb *self, __u8 slsap_sel, int mode);
+static void iriap_disconnect_indication(void *instance, void *sap,
+					LM_REASON reason, struct sk_buff *skb);
+static void iriap_connect_indication(void *instance, void *sap,
+				     struct qos_info *qos, __u32 max_sdu_size,
+				     __u8 max_header_size,
+				     struct sk_buff *skb);
+static void iriap_connect_confirm(void *instance, void *sap,
+				  struct qos_info *qos,
+				  __u32 max_sdu_size, __u8 max_header_size,
+				  struct sk_buff *skb);
+static int iriap_data_indication(void *instance, void *sap,
+				 struct sk_buff *skb);
+
+static void iriap_watchdog_timer_expired(void *data);
+
+static inline void iriap_start_watchdog_timer(struct iriap_cb *self, 
+					      int timeout) 
+{
+	irda_start_timer(&self->watchdog_timer, timeout, self, 
+			 iriap_watchdog_timer_expired);
+}
+
+/*
+ * Function iriap_init (void)
+ *
+ *    Initializes the IrIAP layer, called by the module initialization code
+ *    in irmod.c
+ */
+int __init iriap_init(void)
+{
+	struct ias_object *obj;
+	struct iriap_cb *server;
+	__u8 oct_seq[6];
+	__u16 hints;
+
+	/* Allocate master array */
+	iriap = hashbin_new(HB_LOCK);
+	if (!iriap)
+		return -ENOMEM;
+
+	/* Object repository - defined in irias_object.c */
+	irias_objects = hashbin_new(HB_LOCK);
+	if (!irias_objects) {
+		IRDA_WARNING("%s: Can't allocate irias_objects hashbin!\n",
+			     __FUNCTION__);
+		hashbin_delete(iriap, NULL);
+		return -ENOMEM;
+	}
+
+	/*
+	 *  Register some default services for IrLMP
+	 */
+	hints  = irlmp_service_to_hint(S_COMPUTER);
+	service_handle = irlmp_register_service(hints);
+
+	/* Register the Device object with LM-IAS */
+	obj = irias_new_object("Device", IAS_DEVICE_ID);
+	irias_add_string_attrib(obj, "DeviceName", "Linux", IAS_KERNEL_ATTR);
+
+	oct_seq[0] = 0x01;  /* Version 1 */
+	oct_seq[1] = 0x00;  /* IAS support bits */
+	oct_seq[2] = 0x00;  /* LM-MUX support bits */
+#ifdef CONFIG_IRDA_ULTRA
+	oct_seq[2] |= 0x04; /* Connectionless Data support */
+#endif
+	irias_add_octseq_attrib(obj, "IrLMPSupport", oct_seq, 3,
+				IAS_KERNEL_ATTR);
+	irias_insert_object(obj);
+
+	/*
+	 *  Register server support with IrLMP so we can accept incoming
+	 *  connections
+	 */
+	server = iriap_open(LSAP_IAS, IAS_SERVER, NULL, NULL);
+	if (!server) {
+		IRDA_DEBUG(0, "%s(), unable to open server\n", __FUNCTION__);
+		return -1;
+	}
+	iriap_register_lsap(server, LSAP_IAS, IAS_SERVER);
+
+	return 0;
+}
+
+/*
+ * Function iriap_cleanup (void)
+ *
+ *    Initializes the IrIAP layer, called by the module cleanup code in
+ *    irmod.c
+ */
+void __exit iriap_cleanup(void)
+{
+	irlmp_unregister_service(service_handle);
+
+	hashbin_delete(iriap, (FREE_FUNC) __iriap_close);
+	hashbin_delete(irias_objects, (FREE_FUNC) __irias_delete_object);
+}
+
+/*
+ * Function iriap_open (void)
+ *
+ *    Opens an instance of the IrIAP layer, and registers with IrLMP
+ */
+struct iriap_cb *iriap_open(__u8 slsap_sel, int mode, void *priv,
+			    CONFIRM_CALLBACK callback)
+{
+	struct iriap_cb *self;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
+
+	self = kmalloc(sizeof(struct iriap_cb), GFP_ATOMIC);
+	if (!self) {
+		IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__);
+		return NULL;
+	}
+
+	/*
+	 *  Initialize instance
+	 */
+	memset(self, 0, sizeof(struct iriap_cb));
+
+	self->magic = IAS_MAGIC;
+	self->mode = mode;
+	if (mode == IAS_CLIENT)
+		iriap_register_lsap(self, slsap_sel, mode);
+
+	self->confirm = callback;
+	self->priv = priv;
+
+	/* iriap_getvaluebyclass_request() will construct packets before
+	 * we connect, so this must have a sane value... Jean II */
+	self->max_header_size = LMP_MAX_HEADER;
+
+	init_timer(&self->watchdog_timer);
+
+	hashbin_insert(iriap, (irda_queue_t *) self, (long) self, NULL);
+
+	/* Initialize state machines */
+	iriap_next_client_state(self, S_DISCONNECT);
+	iriap_next_call_state(self, S_MAKE_CALL);
+	iriap_next_server_state(self, R_DISCONNECT);
+	iriap_next_r_connect_state(self, R_WAITING);
+
+	return self;
+}
+EXPORT_SYMBOL(iriap_open);
+
+/*
+ * Function __iriap_close (self)
+ *
+ *    Removes (deallocates) the IrIAP instance
+ *
+ */
+static void __iriap_close(struct iriap_cb *self)
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
+
+	del_timer(&self->watchdog_timer);
+
+	if (self->request_skb)
+		dev_kfree_skb(self->request_skb);
+
+	self->magic = 0;
+
+	kfree(self);
+}
+
+/*
+ * Function iriap_close (void)
+ *
+ *    Closes IrIAP and deregisters with IrLMP
+ */
+void iriap_close(struct iriap_cb *self)
+{
+	struct iriap_cb *entry;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
+
+	if (self->lsap) {
+		irlmp_close_lsap(self->lsap);
+		self->lsap = NULL;
+	}
+
+	entry = (struct iriap_cb *) hashbin_remove(iriap, (long) self, NULL);
+	IRDA_ASSERT(entry == self, return;);
+
+	__iriap_close(self);
+}
+EXPORT_SYMBOL(iriap_close);
+
+static int iriap_register_lsap(struct iriap_cb *self, __u8 slsap_sel, int mode)
+{
+	notify_t notify;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
+
+	irda_notify_init(&notify);
+	notify.connect_confirm       = iriap_connect_confirm;
+	notify.connect_indication    = iriap_connect_indication;
+	notify.disconnect_indication = iriap_disconnect_indication;
+	notify.data_indication       = iriap_data_indication;
+	notify.instance = self;
+	if (mode == IAS_CLIENT)
+		strcpy(notify.name, "IrIAS cli");
+	else
+		strcpy(notify.name, "IrIAS srv");
+
+	self->lsap = irlmp_open_lsap(slsap_sel, &notify, 0);
+	if (self->lsap == NULL) {
+		IRDA_ERROR("%s: Unable to allocated LSAP!\n", __FUNCTION__);
+		return -1;
+	}
+	self->slsap_sel = self->lsap->slsap_sel;
+
+	return 0;
+}
+
+/*
+ * Function iriap_disconnect_indication (handle, reason)
+ *
+ *    Got disconnect, so clean up everything associated with this connection
+ *
+ */
+static void iriap_disconnect_indication(void *instance, void *sap,
+					LM_REASON reason,
+					struct sk_buff *skb)
+{
+	struct iriap_cb *self;
+
+	IRDA_DEBUG(4, "%s(), reason=%s\n", __FUNCTION__, irlmp_reasons[reason]);
+
+	self = (struct iriap_cb *) instance;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
+
+	IRDA_ASSERT(iriap != NULL, return;);
+
+	del_timer(&self->watchdog_timer);
+
+	/* Not needed */
+	if (skb)
+		dev_kfree_skb(skb);
+
+	if (self->mode == IAS_CLIENT) {
+		IRDA_DEBUG(4, "%s(), disconnect as client\n", __FUNCTION__);
+
+
+		iriap_do_client_event(self, IAP_LM_DISCONNECT_INDICATION,
+				      NULL);
+		/*
+		 * Inform service user that the request failed by sending
+		 * it a NULL value. Warning, the client might close us, so
+		 * remember no to use self anymore after calling confirm
+		 */
+		if (self->confirm)
+			self->confirm(IAS_DISCONNECT, 0, NULL, self->priv);
+	} else {
+		IRDA_DEBUG(4, "%s(), disconnect as server\n", __FUNCTION__);
+		iriap_do_server_event(self, IAP_LM_DISCONNECT_INDICATION,
+				      NULL);
+		iriap_close(self);
+	}
+}
+
+/*
+ * Function iriap_disconnect_request (handle)
+ */
+static void iriap_disconnect_request(struct iriap_cb *self)
+{
+	struct sk_buff *tx_skb;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
+
+	tx_skb = dev_alloc_skb(64);
+	if (tx_skb == NULL) {
+		IRDA_DEBUG(0, "%s(), Could not allocate an sk_buff of length %d\n", 
+			__FUNCTION__, 64);
+		return;
+	}
+
+	/*
+	 *  Reserve space for MUX control and LAP header
+	 */
+	skb_reserve(tx_skb, LMP_MAX_HEADER);
+
+	irlmp_disconnect_request(self->lsap, tx_skb);
+}
+
+/*
+ * Function iriap_getvaluebyclass (addr, name, attr)
+ *
+ *    Retreive all values from attribute in all objects with given class
+ *    name
+ */
+int iriap_getvaluebyclass_request(struct iriap_cb *self,
+				  __u32 saddr, __u32 daddr,
+				  char *name, char *attr)
+{
+	struct sk_buff *tx_skb;
+	int name_len, attr_len, skb_len;
+	__u8 *frame;
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == IAS_MAGIC, return -1;);
+
+	/* Client must supply the destination device address */
+	if (!daddr)
+		return -1;
+
+	self->daddr = daddr;
+	self->saddr = saddr;
+
+	/*
+	 *  Save operation, so we know what the later indication is about
+	 */
+	self->operation = GET_VALUE_BY_CLASS;
+
+	/* Give ourselves 10 secs to finish this operation */
+	iriap_start_watchdog_timer(self, 10*HZ);
+
+	name_len = strlen(name);	/* Up to IAS_MAX_CLASSNAME = 60 */
+	attr_len = strlen(attr);	/* Up to IAS_MAX_ATTRIBNAME = 60 */
+
+	skb_len = self->max_header_size+2+name_len+1+attr_len+4;
+	tx_skb = dev_alloc_skb(skb_len);
+	if (!tx_skb)
+		return -ENOMEM;
+
+	/* Reserve space for MUX and LAP header */
+	skb_reserve(tx_skb, self->max_header_size);
+	skb_put(tx_skb, 3+name_len+attr_len);
+	frame = tx_skb->data;
+
+	/* Build frame */
+	frame[0] = IAP_LST | GET_VALUE_BY_CLASS;
+	frame[1] = name_len;                       /* Insert length of name */
+	memcpy(frame+2, name, name_len);           /* Insert name */
+	frame[2+name_len] = attr_len;              /* Insert length of attr */
+	memcpy(frame+3+name_len, attr, attr_len);  /* Insert attr */
+
+	iriap_do_client_event(self, IAP_CALL_REQUEST_GVBC, tx_skb);
+
+	/* Drop reference count - see state_s_disconnect(). */
+	dev_kfree_skb(tx_skb);
+
+	return 0;
+}
+EXPORT_SYMBOL(iriap_getvaluebyclass_request);
+
+/*
+ * Function iriap_getvaluebyclass_confirm (self, skb)
+ *
+ *    Got result from GetValueByClass command. Parse it and return result
+ *    to service user.
+ *
+ */
+static void iriap_getvaluebyclass_confirm(struct iriap_cb *self,
+					  struct sk_buff *skb)
+{
+	struct ias_value *value;
+	int charset;
+	__u32 value_len;
+	__u32 tmp_cpu32;
+	__u16 obj_id;
+	__u16 len;
+	__u8  type;
+	__u8 *fp;
+	int n;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
+	IRDA_ASSERT(skb != NULL, return;);
+
+	/* Initialize variables */
+	fp = skb->data;
+	n = 2;
+
+	/* Get length, MSB first */
+	len = be16_to_cpu(get_unaligned((__u16 *)(fp+n))); n += 2;
+
+	IRDA_DEBUG(4, "%s(), len=%d\n", __FUNCTION__, len);
+
+	/* Get object ID, MSB first */
+	obj_id = be16_to_cpu(get_unaligned((__u16 *)(fp+n))); n += 2;
+
+	type = fp[n++];
+	IRDA_DEBUG(4, "%s(), Value type = %d\n", __FUNCTION__, type);
+
+	switch (type) {
+	case IAS_INTEGER:
+		memcpy(&tmp_cpu32, fp+n, 4); n += 4;
+		be32_to_cpus(&tmp_cpu32);
+		value = irias_new_integer_value(tmp_cpu32);
+
+		/*  Legal values restricted to 0x01-0x6f, page 15 irttp */
+		IRDA_DEBUG(4, "%s(), lsap=%d\n", __FUNCTION__, value->t.integer);
+		break;
+	case IAS_STRING:
+		charset = fp[n++];
+
+		switch (charset) {
+		case CS_ASCII:
+			break;
+/*		case CS_ISO_8859_1: */
+/*		case CS_ISO_8859_2: */
+/*		case CS_ISO_8859_3: */
+/*		case CS_ISO_8859_4: */
+/*		case CS_ISO_8859_5: */
+/*		case CS_ISO_8859_6: */
+/*		case CS_ISO_8859_7: */
+/*		case CS_ISO_8859_8: */
+/*		case CS_ISO_8859_9: */
+/*		case CS_UNICODE: */
+		default:
+			IRDA_DEBUG(0, "%s(), charset %s, not supported\n",
+				   __FUNCTION__, ias_charset_types[charset]);
+
+			/* Aborting, close connection! */
+			iriap_disconnect_request(self);
+			return;
+			/* break; */
+		}
+		value_len = fp[n++];
+		IRDA_DEBUG(4, "%s(), strlen=%d\n", __FUNCTION__, value_len);
+
+		/* Make sure the string is null-terminated */
+		fp[n+value_len] = 0x00;
+		IRDA_DEBUG(4, "Got string %s\n", fp+n);
+
+		/* Will truncate to IAS_MAX_STRING bytes */
+		value = irias_new_string_value(fp+n);
+		break;
+	case IAS_OCT_SEQ:
+		value_len = be16_to_cpu(get_unaligned((__u16 *)(fp+n)));
+		n += 2;
+
+		/* Will truncate to IAS_MAX_OCTET_STRING bytes */
+		value = irias_new_octseq_value(fp+n, value_len);
+		break;
+	default:
+		value = irias_new_missing_value();
+		break;
+	}
+
+	/* Finished, close connection! */
+	iriap_disconnect_request(self);
+
+	/* Warning, the client might close us, so remember no to use self
+	 * anymore after calling confirm
+	 */
+	if (self->confirm)
+		self->confirm(IAS_SUCCESS, obj_id, value, self->priv);
+	else {
+		IRDA_DEBUG(0, "%s(), missing handler!\n", __FUNCTION__);
+		irias_delete_value(value);
+	}
+}
+
+/*
+ * Function iriap_getvaluebyclass_response ()
+ *
+ *    Send answer back to remote LM-IAS
+ *
+ */
+static void iriap_getvaluebyclass_response(struct iriap_cb *self,
+					   __u16 obj_id,
+					   __u8 ret_code,
+					   struct ias_value *value)
+{
+	struct sk_buff *tx_skb;
+	int n;
+	__u32 tmp_be32, tmp_be16;
+	__u8 *fp;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
+	IRDA_ASSERT(value != NULL, return;);
+	IRDA_ASSERT(value->len <= 1024, return;);
+
+	/* Initialize variables */
+	n = 0;
+
+	/*
+	 *  We must adjust the size of the response after the length of the
+	 *  value. We add 32 bytes because of the 6 bytes for the frame and
+	 *  max 5 bytes for the value coding.
+	 */
+	tx_skb = dev_alloc_skb(value->len + self->max_header_size + 32);
+	if (!tx_skb)
+		return;
+
+	/* Reserve space for MUX and LAP header */
+	skb_reserve(tx_skb, self->max_header_size);
+	skb_put(tx_skb, 6);
+
+	fp = tx_skb->data;
+
+	/* Build frame */
+	fp[n++] = GET_VALUE_BY_CLASS | IAP_LST;
+	fp[n++] = ret_code;
+
+	/* Insert list length (MSB first) */
+	tmp_be16 = __constant_htons(0x0001);
+	memcpy(fp+n, &tmp_be16, 2);  n += 2;
+
+	/* Insert object identifier ( MSB first) */
+	tmp_be16 = cpu_to_be16(obj_id);
+	memcpy(fp+n, &tmp_be16, 2); n += 2;
+
+	switch (value->type) {
+	case IAS_STRING:
+		skb_put(tx_skb, 3 + value->len);
+		fp[n++] = value->type;
+		fp[n++] = 0; /* ASCII */
+		fp[n++] = (__u8) value->len;
+		memcpy(fp+n, value->t.string, value->len); n+=value->len;
+		break;
+	case IAS_INTEGER:
+		skb_put(tx_skb, 5);
+		fp[n++] = value->type;
+
+		tmp_be32 = cpu_to_be32(value->t.integer);
+		memcpy(fp+n, &tmp_be32, 4); n += 4;
+		break;
+	case IAS_OCT_SEQ:
+		skb_put(tx_skb, 3 + value->len);
+		fp[n++] = value->type;
+
+		tmp_be16 = cpu_to_be16(value->len);
+		memcpy(fp+n, &tmp_be16, 2); n += 2;
+		memcpy(fp+n, value->t.oct_seq, value->len); n+=value->len;
+		break;
+	case IAS_MISSING:
+		IRDA_DEBUG( 3, "%s: sending IAS_MISSING\n", __FUNCTION__);
+		skb_put(tx_skb, 1);
+		fp[n++] = value->type;
+		break;
+	default:
+		IRDA_DEBUG(0, "%s(), type not implemented!\n", __FUNCTION__);
+		break;
+	}
+	iriap_do_r_connect_event(self, IAP_CALL_RESPONSE, tx_skb);
+
+	/* Drop reference count - see state_r_execute(). */
+	dev_kfree_skb(tx_skb);
+}
+
+/*
+ * Function iriap_getvaluebyclass_indication (self, skb)
+ *
+ *    getvaluebyclass is requested from peer LM-IAS
+ *
+ */
+static void iriap_getvaluebyclass_indication(struct iriap_cb *self,
+					     struct sk_buff *skb)
+{
+	struct ias_object *obj;
+	struct ias_attrib *attrib;
+	int name_len;
+	int attr_len;
+	char name[IAS_MAX_CLASSNAME + 1];	/* 60 bytes */
+	char attr[IAS_MAX_ATTRIBNAME + 1];	/* 60 bytes */
+	__u8 *fp;
+	int n;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
+	IRDA_ASSERT(skb != NULL, return;);
+
+	fp = skb->data;
+	n = 1;
+
+	name_len = fp[n++];
+	memcpy(name, fp+n, name_len); n+=name_len;
+	name[name_len] = '\0';
+
+	attr_len = fp[n++];
+	memcpy(attr, fp+n, attr_len); n+=attr_len;
+	attr[attr_len] = '\0';
+
+	IRDA_DEBUG(4, "LM-IAS: Looking up %s: %s\n", name, attr);
+	obj = irias_find_object(name);
+
+	if (obj == NULL) {
+		IRDA_DEBUG(2, "LM-IAS: Object %s not found\n", name);
+		iriap_getvaluebyclass_response(self, 0x1235, IAS_CLASS_UNKNOWN,
+					       &irias_missing);
+		return;
+	}
+	IRDA_DEBUG(4, "LM-IAS: found %s, id=%d\n", obj->name, obj->id);
+
+	attrib = irias_find_attrib(obj, attr);
+	if (attrib == NULL) {
+		IRDA_DEBUG(2, "LM-IAS: Attribute %s not found\n", attr);
+		iriap_getvaluebyclass_response(self, obj->id,
+					       IAS_ATTRIB_UNKNOWN, 
+					       &irias_missing);
+		return;
+	}
+
+	/* We have a match; send the value.  */
+	iriap_getvaluebyclass_response(self, obj->id, IAS_SUCCESS,
+				       attrib->value);
+
+	return;
+}
+
+/*
+ * Function iriap_send_ack (void)
+ *
+ *    Currently not used
+ *
+ */
+void iriap_send_ack(struct iriap_cb *self)
+{
+	struct sk_buff *tx_skb;
+	__u8 *frame;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
+
+	tx_skb = dev_alloc_skb(64);
+	if (!tx_skb)
+		return;
+
+	/* Reserve space for MUX and LAP header */
+	skb_reserve(tx_skb, self->max_header_size);
+	skb_put(tx_skb, 1);
+	frame = tx_skb->data;
+
+	/* Build frame */
+	frame[0] = IAP_LST | IAP_ACK | self->operation;
+
+	irlmp_data_request(self->lsap, tx_skb);
+}
+
+void iriap_connect_request(struct iriap_cb *self)
+{
+	int ret;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
+
+	ret = irlmp_connect_request(self->lsap, LSAP_IAS,
+				    self->saddr, self->daddr,
+				    NULL, NULL);
+	if (ret < 0) {
+		IRDA_DEBUG(0, "%s(), connect failed!\n", __FUNCTION__);
+		self->confirm(IAS_DISCONNECT, 0, NULL, self->priv);
+	}
+}
+
+/*
+ * Function iriap_connect_confirm (handle, skb)
+ *
+ *    LSAP connection confirmed!
+ *
+ */
+static void iriap_connect_confirm(void *instance, void *sap,
+				  struct qos_info *qos, __u32 max_seg_size,
+				  __u8 max_header_size,
+				  struct sk_buff *skb)
+{
+	struct iriap_cb *self;
+
+	self = (struct iriap_cb *) instance;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
+	IRDA_ASSERT(skb != NULL, return;);
+
+	self->max_data_size = max_seg_size;
+	self->max_header_size = max_header_size;
+
+	del_timer(&self->watchdog_timer);
+
+	iriap_do_client_event(self, IAP_LM_CONNECT_CONFIRM, skb);
+
+	/* Drop reference count - see state_s_make_call(). */
+	dev_kfree_skb(skb);
+}
+
+/*
+ * Function iriap_connect_indication ( handle, skb)
+ *
+ *    Remote LM-IAS is requesting connection
+ *
+ */
+static void iriap_connect_indication(void *instance, void *sap,
+				     struct qos_info *qos, __u32 max_seg_size,
+				     __u8 max_header_size,
+				     struct sk_buff *skb)
+{
+	struct iriap_cb *self, *new;
+
+	IRDA_DEBUG(1, "%s()\n", __FUNCTION__);
+
+	self = (struct iriap_cb *) instance;
+
+	IRDA_ASSERT(skb != NULL, return;);
+	IRDA_ASSERT(self != NULL, goto out;);
+	IRDA_ASSERT(self->magic == IAS_MAGIC, goto out;);
+
+	/* Start new server */
+	new = iriap_open(LSAP_IAS, IAS_SERVER, NULL, NULL);
+	if (!new) {
+		IRDA_DEBUG(0, "%s(), open failed\n", __FUNCTION__);
+		goto out;
+	}
+
+	/* Now attach up the new "socket" */
+	new->lsap = irlmp_dup(self->lsap, new);
+	if (!new->lsap) {
+		IRDA_DEBUG(0, "%s(), dup failed!\n", __FUNCTION__);
+		goto out;
+	}
+
+	new->max_data_size = max_seg_size;
+	new->max_header_size = max_header_size;
+
+	/* Clean up the original one to keep it in listen state */
+	irlmp_listen(self->lsap);
+
+	iriap_do_server_event(new, IAP_LM_CONNECT_INDICATION, skb);
+
+out:
+	/* Drop reference count - see state_r_disconnect(). */
+	dev_kfree_skb(skb);
+}
+
+/*
+ * Function iriap_data_indication (handle, skb)
+ *
+ *    Receives data from connection identified by handle from IrLMP
+ *
+ */
+static int iriap_data_indication(void *instance, void *sap,
+				 struct sk_buff *skb)
+{
+	struct iriap_cb *self;
+	__u8  *frame;
+	__u8  opcode;
+
+	IRDA_DEBUG(3, "%s()\n", __FUNCTION__);
+
+	self = (struct iriap_cb *) instance;
+
+	IRDA_ASSERT(skb != NULL, return 0;);
+	IRDA_ASSERT(self != NULL, goto out;);
+	IRDA_ASSERT(self->magic == IAS_MAGIC, goto out;);
+
+	frame = skb->data;
+
+	if (self->mode == IAS_SERVER) {
+		/* Call server */
+		IRDA_DEBUG(4, "%s(), Calling server!\n", __FUNCTION__);
+		iriap_do_r_connect_event(self, IAP_RECV_F_LST, skb);
+		goto out;
+	}
+	opcode = frame[0];
+	if (~opcode & IAP_LST) {
+		IRDA_WARNING("%s:, IrIAS multiframe commands or "
+			     "results is not implemented yet!\n",
+			     __FUNCTION__);
+		goto out;
+	}
+
+	/* Check for ack frames since they don't contain any data */
+	if (opcode & IAP_ACK) {
+		IRDA_DEBUG(0, "%s() Got ack frame!\n", __FUNCTION__);
+		goto out;
+	}
+
+	opcode &= ~IAP_LST; /* Mask away LST bit */
+
+	switch (opcode) {
+	case GET_INFO_BASE:
+		IRDA_DEBUG(0, "IrLMP GetInfoBaseDetails not implemented!\n");
+		break;
+	case GET_VALUE_BY_CLASS:
+		iriap_do_call_event(self, IAP_RECV_F_LST, NULL);
+
+		switch (frame[1]) {
+		case IAS_SUCCESS:
+			iriap_getvaluebyclass_confirm(self, skb);
+			break;
+		case IAS_CLASS_UNKNOWN:
+			IRDA_DEBUG(1, "%s(), No such class!\n", __FUNCTION__);
+			/* Finished, close connection! */
+			iriap_disconnect_request(self);
+
+			/*
+			 * Warning, the client might close us, so remember
+			 * no to use self anymore after calling confirm
+			 */
+			if (self->confirm)
+				self->confirm(IAS_CLASS_UNKNOWN, 0, NULL,
+					      self->priv);
+			break;
+		case IAS_ATTRIB_UNKNOWN:
+			IRDA_DEBUG(1, "%s(), No such attribute!\n", __FUNCTION__);
+			/* Finished, close connection! */
+			iriap_disconnect_request(self);
+
+			/*
+			 * Warning, the client might close us, so remember
+			 * no to use self anymore after calling confirm
+			 */
+			if (self->confirm)
+				self->confirm(IAS_ATTRIB_UNKNOWN, 0, NULL,
+					      self->priv);
+			break;
+		}
+		break;
+	default:
+		IRDA_DEBUG(0, "%s(), Unknown op-code: %02x\n", __FUNCTION__,
+			   opcode);
+		break;
+	}
+
+out:
+	/* Cleanup - sub-calls will have done skb_get() as needed. */
+	dev_kfree_skb(skb);
+	return 0;
+}
+
+/*
+ * Function iriap_call_indication (self, skb)
+ *
+ *    Received call to server from peer LM-IAS
+ *
+ */
+void iriap_call_indication(struct iriap_cb *self, struct sk_buff *skb)
+{
+	__u8 *fp;
+	__u8 opcode;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
+	IRDA_ASSERT(skb != NULL, return;);
+
+	fp = skb->data;
+
+	opcode = fp[0];
+	if (~opcode & 0x80) {
+		IRDA_WARNING("%s: IrIAS multiframe commands or results"
+			     "is not implemented yet!\n", __FUNCTION__);
+		return;
+	}
+	opcode &= 0x7f; /* Mask away LST bit */
+
+	switch (opcode) {
+	case GET_INFO_BASE:
+		IRDA_WARNING("%s: GetInfoBaseDetails not implemented yet!\n",
+			     __FUNCTION__);
+		break;
+	case GET_VALUE_BY_CLASS:
+		iriap_getvaluebyclass_indication(self, skb);
+		break;
+	}
+	/* skb will be cleaned up in iriap_data_indication */
+}
+
+/*
+ * Function iriap_watchdog_timer_expired (data)
+ *
+ *    Query has taken too long time, so abort
+ *
+ */
+static void iriap_watchdog_timer_expired(void *data)
+{
+	struct iriap_cb *self = (struct iriap_cb *) data;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
+
+	/* iriap_close(self); */
+}
+
+#ifdef CONFIG_PROC_FS
+
+static const char *ias_value_types[] = {
+	"IAS_MISSING",
+	"IAS_INTEGER",
+	"IAS_OCT_SEQ",
+	"IAS_STRING"
+};
+
+static inline struct ias_object *irias_seq_idx(loff_t pos) 
+{
+	struct ias_object *obj;
+
+	for (obj = (struct ias_object *) hashbin_get_first(irias_objects);
+	     obj; obj = (struct ias_object *) hashbin_get_next(irias_objects)) {
+		if (pos-- == 0)
+			break;
+	}
+		
+	return obj;
+}
+
+static void *irias_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	spin_lock_irq(&irias_objects->hb_spinlock);
+
+	return *pos ? irias_seq_idx(*pos - 1) : SEQ_START_TOKEN;
+}
+
+static void *irias_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	++*pos;
+
+	return (v == SEQ_START_TOKEN) 
+		? (void *) hashbin_get_first(irias_objects)
+		: (void *) hashbin_get_next(irias_objects);
+}
+
+static void irias_seq_stop(struct seq_file *seq, void *v)
+{
+	spin_unlock_irq(&irias_objects->hb_spinlock);
+}
+
+static int irias_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq, "LM-IAS Objects:\n");
+	else {
+		struct ias_object *obj = v;
+		struct ias_attrib *attrib;
+
+		IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return -EINVAL;);
+
+		seq_printf(seq, "name: %s, id=%d\n",
+			   obj->name, obj->id);
+
+		/* Careful for priority inversions here !
+		 * All other uses of attrib spinlock are independent of
+		 * the object spinlock, so we are safe. Jean II */
+		spin_lock(&obj->attribs->hb_spinlock);
+
+		/* List all attributes for this object */
+		for (attrib = (struct ias_attrib *) hashbin_get_first(obj->attribs);
+		     attrib != NULL;
+		     attrib = (struct ias_attrib *) hashbin_get_next(obj->attribs)) {
+		     
+			IRDA_ASSERT(attrib->magic == IAS_ATTRIB_MAGIC,
+				    goto outloop; );
+
+			seq_printf(seq, " - Attribute name: \"%s\", ",
+				   attrib->name);
+			seq_printf(seq, "value[%s]: ",
+				   ias_value_types[attrib->value->type]);
+
+			switch (attrib->value->type) {
+			case IAS_INTEGER:
+				seq_printf(seq, "%d\n",
+					   attrib->value->t.integer);
+				break;
+			case IAS_STRING:
+				seq_printf(seq, "\"%s\"\n",
+					   attrib->value->t.string);
+				break;
+			case IAS_OCT_SEQ:
+				seq_printf(seq, "octet sequence (%d bytes)\n", 
+					   attrib->value->len);
+				break;
+			case IAS_MISSING:
+				seq_puts(seq, "missing\n");
+				break;
+			default:
+				seq_printf(seq, "type %d?\n", 
+					   attrib->value->type);
+			}
+			seq_putc(seq, '\n');
+
+		}
+	IRDA_ASSERT_LABEL(outloop:)
+		spin_unlock(&obj->attribs->hb_spinlock);
+	}
+
+	return 0;
+}
+
+static struct seq_operations irias_seq_ops = {
+	.start  = irias_seq_start,
+	.next   = irias_seq_next,
+	.stop   = irias_seq_stop,
+	.show   = irias_seq_show,
+};
+
+static int irias_seq_open(struct inode *inode, struct file *file)
+{
+	IRDA_ASSERT( irias_objects != NULL, return -EINVAL;);
+
+	return seq_open(file, &irias_seq_ops);
+}
+
+struct file_operations irias_seq_fops = {
+	.owner		= THIS_MODULE,
+	.open           = irias_seq_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release	= seq_release,
+};
+
+#endif /* PROC_FS */
diff --git a/net/irda/iriap_event.c b/net/irda/iriap_event.c
new file mode 100644
index 00000000000..a73607450de
--- /dev/null
+++ b/net/irda/iriap_event.c
@@ -0,0 +1,502 @@
+/*********************************************************************
+ *
+ * Filename:      iriap_event.c
+ * Version:       0.1
+ * Description:   IAP Finite State Machine
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Thu Aug 21 00:02:07 1997
+ * Modified at:   Wed Mar  1 11:28:34 2000
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ *
+ *     Copyright (c) 1997, 1999-2000 Dag Brattli <dagb@cs.uit.no>,
+ *     All Rights Reserved.
+ *     Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of
+ *     the License, or (at your option) any later version.
+ *
+ *     Neither Dag Brattli nor University of Troms� admit liability nor
+ *     provide warranty for any of this software. This material is
+ *     provided "AS-IS" and at no charge.
+ *
+ ********************************************************************/
+
+#include <net/irda/irda.h>
+#include <net/irda/irlmp.h>
+#include <net/irda/iriap.h>
+#include <net/irda/iriap_event.h>
+
+static void state_s_disconnect   (struct iriap_cb *self, IRIAP_EVENT event,
+				  struct sk_buff *skb);
+static void state_s_connecting   (struct iriap_cb *self, IRIAP_EVENT event,
+				  struct sk_buff *skb);
+static void state_s_call         (struct iriap_cb *self, IRIAP_EVENT event,
+				  struct sk_buff *skb);
+
+static void state_s_make_call    (struct iriap_cb *self, IRIAP_EVENT event,
+				  struct sk_buff *skb);
+static void state_s_calling      (struct iriap_cb *self, IRIAP_EVENT event,
+				  struct sk_buff *skb);
+static void state_s_outstanding  (struct iriap_cb *self, IRIAP_EVENT event,
+				  struct sk_buff *skb);
+static void state_s_replying     (struct iriap_cb *self, IRIAP_EVENT event,
+				  struct sk_buff *skb);
+static void state_s_wait_for_call(struct iriap_cb *self, IRIAP_EVENT event,
+				  struct sk_buff *skb);
+static void state_s_wait_active  (struct iriap_cb *self, IRIAP_EVENT event,
+				  struct sk_buff *skb);
+
+static void state_r_disconnect   (struct iriap_cb *self, IRIAP_EVENT event,
+				  struct sk_buff *skb);
+static void state_r_call         (struct iriap_cb *self, IRIAP_EVENT event,
+				  struct sk_buff *skb);
+static void state_r_waiting      (struct iriap_cb *self, IRIAP_EVENT event,
+				  struct sk_buff *skb);
+static void state_r_wait_active  (struct iriap_cb *self, IRIAP_EVENT event,
+				  struct sk_buff *skb);
+static void state_r_receiving    (struct iriap_cb *self, IRIAP_EVENT event,
+				  struct sk_buff *skb);
+static void state_r_execute      (struct iriap_cb *self, IRIAP_EVENT event,
+				  struct sk_buff *skb);
+static void state_r_returning    (struct iriap_cb *self, IRIAP_EVENT event,
+				  struct sk_buff *skb);
+
+static void (*iriap_state[])(struct iriap_cb *self, IRIAP_EVENT event,
+			     struct sk_buff *skb) = {
+	/* Client FSM */
+	state_s_disconnect,
+	state_s_connecting,
+	state_s_call,
+
+	/* S-Call FSM */
+	state_s_make_call,
+	state_s_calling,
+	state_s_outstanding,
+	state_s_replying,
+	state_s_wait_for_call,
+	state_s_wait_active,
+
+	/* Server FSM */
+	state_r_disconnect,
+	state_r_call,
+
+	/* R-Connect FSM */
+	state_r_waiting,
+	state_r_wait_active,
+	state_r_receiving,
+	state_r_execute,
+	state_r_returning,
+};
+
+void iriap_next_client_state(struct iriap_cb *self, IRIAP_STATE state)
+{
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
+
+	self->client_state = state;
+}
+
+void iriap_next_call_state(struct iriap_cb *self, IRIAP_STATE state)
+{
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
+
+	self->call_state = state;
+}
+
+void iriap_next_server_state(struct iriap_cb *self, IRIAP_STATE state)
+{
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
+
+	self->server_state = state;
+}
+
+void iriap_next_r_connect_state(struct iriap_cb *self, IRIAP_STATE state)
+{
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
+
+	self->r_connect_state = state;
+}
+
+void iriap_do_client_event(struct iriap_cb *self, IRIAP_EVENT event,
+			   struct sk_buff *skb)
+{
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
+
+	(*iriap_state[ self->client_state]) (self, event, skb);
+}
+
+void iriap_do_call_event(struct iriap_cb *self, IRIAP_EVENT event,
+			 struct sk_buff *skb)
+{
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
+
+	(*iriap_state[ self->call_state]) (self, event, skb);
+}
+
+void iriap_do_server_event(struct iriap_cb *self, IRIAP_EVENT event,
+			   struct sk_buff *skb)
+{
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
+
+	(*iriap_state[ self->server_state]) (self, event, skb);
+}
+
+void iriap_do_r_connect_event(struct iriap_cb *self, IRIAP_EVENT event,
+			      struct sk_buff *skb)
+{
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
+
+	(*iriap_state[ self->r_connect_state]) (self, event, skb);
+}
+
+
+/*
+ * Function state_s_disconnect (event, skb)
+ *
+ *    S-Disconnect, The device has no LSAP connection to a particular
+ *    remote device.
+ */
+static void state_s_disconnect(struct iriap_cb *self, IRIAP_EVENT event,
+			       struct sk_buff *skb)
+{
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
+
+	switch (event) {
+	case IAP_CALL_REQUEST_GVBC:
+		iriap_next_client_state(self, S_CONNECTING);
+		IRDA_ASSERT(self->request_skb == NULL, return;);
+		/* Don't forget to refcount it -
+		 * see iriap_getvaluebyclass_request(). */
+		skb_get(skb);
+		self->request_skb = skb;
+		iriap_connect_request(self);
+		break;
+	case IAP_LM_DISCONNECT_INDICATION:
+		break;
+	default:
+		IRDA_DEBUG(0, "%s(), Unknown event %d\n", __FUNCTION__, event);
+		break;
+	}
+}
+
+/*
+ * Function state_s_connecting (self, event, skb)
+ *
+ *    S-Connecting
+ *
+ */
+static void state_s_connecting(struct iriap_cb *self, IRIAP_EVENT event,
+			       struct sk_buff *skb)
+{
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
+
+	switch (event) {
+	case IAP_LM_CONNECT_CONFIRM:
+		/*
+		 *  Jump to S-Call FSM
+		 */
+		iriap_do_call_event(self, IAP_CALL_REQUEST, skb);
+		/* iriap_call_request(self, 0,0,0); */
+		iriap_next_client_state(self, S_CALL);
+		break;
+	case IAP_LM_DISCONNECT_INDICATION:
+		/* Abort calls */
+		iriap_next_call_state(self, S_MAKE_CALL);
+		iriap_next_client_state(self, S_DISCONNECT);
+		break;
+	default:
+		IRDA_DEBUG(0, "%s(), Unknown event %d\n", __FUNCTION__, event);
+		break;
+	}
+}
+
+/*
+ * Function state_s_call (self, event, skb)
+ *
+ *    S-Call, The device can process calls to a specific remote
+ *    device. Whenever the LSAP connection is disconnected, this state
+ *    catches that event and clears up
+ */
+static void state_s_call(struct iriap_cb *self, IRIAP_EVENT event,
+			 struct sk_buff *skb)
+{
+	IRDA_ASSERT(self != NULL, return;);
+
+	switch (event) {
+	case IAP_LM_DISCONNECT_INDICATION:
+		/* Abort calls */
+		iriap_next_call_state(self, S_MAKE_CALL);
+		iriap_next_client_state(self, S_DISCONNECT);
+		break;
+	default:
+		IRDA_DEBUG(0, "state_s_call: Unknown event %d\n", event);
+		break;
+	}
+}
+
+/*
+ * Function state_s_make_call (event, skb)
+ *
+ *    S-Make-Call
+ *
+ */
+static void state_s_make_call(struct iriap_cb *self, IRIAP_EVENT event,
+			      struct sk_buff *skb)
+{
+	struct sk_buff *tx_skb;
+
+	IRDA_ASSERT(self != NULL, return;);
+
+	switch (event) {
+	case IAP_CALL_REQUEST:
+		/* Already refcounted - see state_s_disconnect() */
+		tx_skb = self->request_skb;
+		self->request_skb = NULL;
+
+		irlmp_data_request(self->lsap, tx_skb);
+		iriap_next_call_state(self, S_OUTSTANDING);
+		break;
+	default:
+		IRDA_DEBUG(0, "%s(), Unknown event %d\n", __FUNCTION__, event);
+		break;
+	}
+}
+
+/*
+ * Function state_s_calling (event, skb)
+ *
+ *    S-Calling
+ *
+ */
+static void state_s_calling(struct iriap_cb *self, IRIAP_EVENT event,
+			    struct sk_buff *skb)
+{
+	IRDA_DEBUG(0, "%s(), Not implemented\n", __FUNCTION__);
+}
+
+/*
+ * Function state_s_outstanding (event, skb)
+ *
+ *    S-Outstanding, The device is waiting for a response to a command
+ *
+ */
+static void state_s_outstanding(struct iriap_cb *self, IRIAP_EVENT event,
+				struct sk_buff *skb)
+{
+	IRDA_ASSERT(self != NULL, return;);
+
+	switch (event) {
+	case IAP_RECV_F_LST:
+		/*iriap_send_ack(self);*/
+		/*LM_Idle_request(idle); */
+
+		iriap_next_call_state(self, S_WAIT_FOR_CALL);
+		break;
+	default:
+		IRDA_DEBUG(0, "%s(), Unknown event %d\n", __FUNCTION__, event);
+		break;
+	}
+}
+
+/*
+ * Function state_s_replying (event, skb)
+ *
+ *    S-Replying, The device is collecting a multiple part response
+ */
+static void state_s_replying(struct iriap_cb *self, IRIAP_EVENT event,
+			     struct sk_buff *skb)
+{
+	IRDA_DEBUG(0, "%s(), Not implemented\n", __FUNCTION__);
+}
+
+/*
+ * Function state_s_wait_for_call (event, skb)
+ *
+ *    S-Wait-for-Call
+ *
+ */
+static void state_s_wait_for_call(struct iriap_cb *self, IRIAP_EVENT event,
+				  struct sk_buff *skb)
+{
+	IRDA_DEBUG(0, "%s(), Not implemented\n", __FUNCTION__);
+}
+
+
+/*
+ * Function state_s_wait_active (event, skb)
+ *
+ *    S-Wait-Active
+ *
+ */
+static void state_s_wait_active(struct iriap_cb *self, IRIAP_EVENT event,
+				struct sk_buff *skb)
+{
+	IRDA_DEBUG(0, "%s(), Not implemented\n", __FUNCTION__);
+}
+
+/**************************************************************************
+ *
+ *  Server FSM
+ *
+ **************************************************************************/
+
+/*
+ * Function state_r_disconnect (self, event, skb)
+ *
+ *    LM-IAS server is disconnected (not processing any requests!)
+ *
+ */
+static void state_r_disconnect(struct iriap_cb *self, IRIAP_EVENT event,
+			       struct sk_buff *skb)
+{
+	struct sk_buff *tx_skb;
+
+	switch (event) {
+	case IAP_LM_CONNECT_INDICATION:
+		tx_skb = dev_alloc_skb(64);
+		if (tx_skb == NULL) {
+			IRDA_WARNING("%s: unable to malloc!\n", __FUNCTION__);
+			return;
+		}
+
+		/* Reserve space for MUX_CONTROL and LAP header */
+		skb_reserve(tx_skb, LMP_MAX_HEADER);
+
+		irlmp_connect_response(self->lsap, tx_skb);
+		/*LM_Idle_request(idle); */
+
+		iriap_next_server_state(self, R_CALL);
+
+		/*
+		 *  Jump to R-Connect FSM, we skip R-Waiting since we do not
+		 *  care about LM_Idle_request()!
+		 */
+		iriap_next_r_connect_state(self, R_RECEIVING);
+		break;
+	default:
+		IRDA_DEBUG(0, "%s(), unknown event %d\n", __FUNCTION__, event);
+		break;
+	}
+}
+
+/*
+ * Function state_r_call (self, event, skb)
+ */
+static void state_r_call(struct iriap_cb *self, IRIAP_EVENT event,
+			 struct sk_buff *skb)
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	switch (event) {
+	case IAP_LM_DISCONNECT_INDICATION:
+		/* Abort call */
+		iriap_next_server_state(self, R_DISCONNECT);
+		iriap_next_r_connect_state(self, R_WAITING);
+		break;
+	default:
+		IRDA_DEBUG(0, "%s(), unknown event!\n", __FUNCTION__);
+		break;
+	}
+}
+
+/*
+ *  R-Connect FSM
+ */
+
+/*
+ * Function state_r_waiting (self, event, skb)
+ */
+static void state_r_waiting(struct iriap_cb *self, IRIAP_EVENT event,
+			    struct sk_buff *skb)
+{
+	IRDA_DEBUG(0, "%s(), Not implemented\n", __FUNCTION__);
+}
+
+static void state_r_wait_active(struct iriap_cb *self, IRIAP_EVENT event,
+				struct sk_buff *skb)
+{
+	IRDA_DEBUG(0, "%s(), Not implemented\n", __FUNCTION__);
+}
+
+/*
+ * Function state_r_receiving (self, event, skb)
+ *
+ *    We are receiving a command
+ *
+ */
+static void state_r_receiving(struct iriap_cb *self, IRIAP_EVENT event,
+			      struct sk_buff *skb)
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	switch (event) {
+	case IAP_RECV_F_LST:
+		iriap_next_r_connect_state(self, R_EXECUTE);
+
+		iriap_call_indication(self, skb);
+		break;
+	default:
+		IRDA_DEBUG(0, "%s(), unknown event!\n", __FUNCTION__);
+		break;
+	}
+}
+
+/*
+ * Function state_r_execute (self, event, skb)
+ *
+ *    The server is processing the request
+ *
+ */
+static void state_r_execute(struct iriap_cb *self, IRIAP_EVENT event,
+			    struct sk_buff *skb)
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(skb != NULL, return;);
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
+
+	switch (event) {
+	case IAP_CALL_RESPONSE:
+		/*
+		 *  Since we don't implement the Waiting state, we return
+		 *  to state Receiving instead, DB.
+		 */
+		iriap_next_r_connect_state(self, R_RECEIVING);
+
+		/* Don't forget to refcount it - see
+		 * iriap_getvaluebyclass_response(). */
+		skb_get(skb);
+
+		irlmp_data_request(self->lsap, skb);
+		break;
+	default:
+		IRDA_DEBUG(0, "%s(), unknown event!\n", __FUNCTION__);
+		break;
+	}
+}
+
+static void state_r_returning(struct iriap_cb *self, IRIAP_EVENT event,
+			      struct sk_buff *skb)
+{
+	IRDA_DEBUG(0, "%s(), event=%d\n", __FUNCTION__, event);
+
+	switch (event) {
+	case IAP_RECV_F_LST:
+		break;
+	default:
+		break;
+	}
+}
diff --git a/net/irda/irias_object.c b/net/irda/irias_object.c
new file mode 100644
index 00000000000..6fec428b451
--- /dev/null
+++ b/net/irda/irias_object.c
@@ -0,0 +1,580 @@
+/*********************************************************************
+ *
+ * Filename:      irias_object.c
+ * Version:       0.3
+ * Description:   IAS object database and functions
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Thu Oct  1 22:50:04 1998
+ * Modified at:   Wed Dec 15 11:23:16 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ *
+ *     Copyright (c) 1998-1999 Dag Brattli, All Rights Reserved.
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of
+ *     the License, or (at your option) any later version.
+ *
+ *     Neither Dag Brattli nor University of Troms� admit liability nor
+ *     provide warranty for any of this software. This material is
+ *     provided "AS-IS" and at no charge.
+ *
+ ********************************************************************/
+
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/module.h>
+
+#include <net/irda/irda.h>
+#include <net/irda/irias_object.h>
+
+hashbin_t *irias_objects;
+
+/*
+ *  Used when a missing value needs to be returned
+ */
+struct ias_value irias_missing = { IAS_MISSING, 0, 0, 0, {0}};
+
+/*
+ * Function strndup (str, max)
+ *
+ *    My own kernel version of strndup!
+ *
+ * Faster, check boundary... Jean II
+ */
+static char *strndup(char *str, int max)
+{
+	char *new_str;
+	int len;
+
+	/* Check string */
+	if (str == NULL)
+		return NULL;
+	/* Check length, truncate */
+	len = strlen(str);
+	if(len > max)
+		len = max;
+
+	/* Allocate new string */
+        new_str = kmalloc(len + 1, GFP_ATOMIC);
+        if (new_str == NULL) {
+		IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__);
+		return NULL;
+	}
+
+	/* Copy and truncate */
+	memcpy(new_str, str, len);
+	new_str[len] = '\0';
+
+	return new_str;
+}
+
+/*
+ * Function ias_new_object (name, id)
+ *
+ *    Create a new IAS object
+ *
+ */
+struct ias_object *irias_new_object( char *name, int id)
+{
+        struct ias_object *obj;
+
+	IRDA_DEBUG( 4, "%s()\n", __FUNCTION__);
+
+	obj = (struct ias_object *) kmalloc(sizeof(struct ias_object),
+					    GFP_ATOMIC);
+	if (obj == NULL) {
+		IRDA_WARNING("%s(), Unable to allocate object!\n",
+			     __FUNCTION__);
+		return NULL;
+	}
+	memset(obj, 0, sizeof( struct ias_object));
+
+	obj->magic = IAS_OBJECT_MAGIC;
+	obj->name = strndup(name, IAS_MAX_CLASSNAME);
+	obj->id = id;
+
+	/* Locking notes : the attrib spinlock has lower precendence
+	 * than the objects spinlock. Never grap the objects spinlock
+	 * while holding any attrib spinlock (risk of deadlock). Jean II */
+	obj->attribs = hashbin_new(HB_LOCK);
+
+	if (obj->attribs == NULL) {
+		IRDA_WARNING("%s(), Unable to allocate attribs!\n",
+			     __FUNCTION__);
+		kfree(obj);
+		return NULL;
+	}
+
+	return obj;
+}
+EXPORT_SYMBOL(irias_new_object);
+
+/*
+ * Function irias_delete_attrib (attrib)
+ *
+ *    Delete given attribute and deallocate all its memory
+ *
+ */
+static void __irias_delete_attrib(struct ias_attrib *attrib)
+{
+	IRDA_ASSERT(attrib != NULL, return;);
+	IRDA_ASSERT(attrib->magic == IAS_ATTRIB_MAGIC, return;);
+
+	if (attrib->name)
+		kfree(attrib->name);
+
+	irias_delete_value(attrib->value);
+	attrib->magic = ~IAS_ATTRIB_MAGIC;
+
+	kfree(attrib);
+}
+
+void __irias_delete_object(struct ias_object *obj)
+{
+	IRDA_ASSERT(obj != NULL, return;);
+	IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return;);
+
+	if (obj->name)
+		kfree(obj->name);
+
+	hashbin_delete(obj->attribs, (FREE_FUNC) __irias_delete_attrib);
+
+	obj->magic = ~IAS_OBJECT_MAGIC;
+
+	kfree(obj);
+}
+
+/*
+ * Function irias_delete_object (obj)
+ *
+ *    Remove object from hashbin and deallocate all attributes associated with
+ *    with this object and the object itself
+ *
+ */
+int irias_delete_object(struct ias_object *obj)
+{
+	struct ias_object *node;
+
+	IRDA_ASSERT(obj != NULL, return -1;);
+	IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return -1;);
+
+	/* Remove from list */
+	node = hashbin_remove_this(irias_objects, (irda_queue_t *) obj);
+	if (!node)
+		IRDA_DEBUG( 0, "%s(), object already removed!\n",
+			    __FUNCTION__);
+
+	/* Destroy */
+	__irias_delete_object(obj);
+
+	return 0;
+}
+EXPORT_SYMBOL(irias_delete_object);
+
+/*
+ * Function irias_delete_attrib (obj)
+ *
+ *    Remove attribute from hashbin and, if it was the last attribute of
+ *    the object, remove the object as well.
+ *
+ */
+int irias_delete_attrib(struct ias_object *obj, struct ias_attrib *attrib,
+			int cleanobject)
+{
+	struct ias_attrib *node;
+
+	IRDA_ASSERT(obj != NULL, return -1;);
+	IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return -1;);
+	IRDA_ASSERT(attrib != NULL, return -1;);
+
+	/* Remove attribute from object */
+	node = hashbin_remove_this(obj->attribs, (irda_queue_t *) attrib);
+	if (!node)
+		return 0; /* Already removed or non-existent */
+
+	/* Deallocate attribute */
+	__irias_delete_attrib(node);
+
+	/* Check if object has still some attributes, destroy it if none.
+	 * At first glance, this look dangerous, as the kernel reference
+	 * various IAS objects. However, we only use this function on
+	 * user attributes, not kernel attributes, so there is no risk
+	 * of deleting a kernel object this way. Jean II */
+	node = (struct ias_attrib *) hashbin_get_first(obj->attribs);
+	if (cleanobject && !node)
+		irias_delete_object(obj);
+
+	return 0;
+}
+
+/*
+ * Function irias_insert_object (obj)
+ *
+ *    Insert an object into the LM-IAS database
+ *
+ */
+void irias_insert_object(struct ias_object *obj)
+{
+	IRDA_ASSERT(obj != NULL, return;);
+	IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return;);
+
+	hashbin_insert(irias_objects, (irda_queue_t *) obj, 0, obj->name);
+}
+EXPORT_SYMBOL(irias_insert_object);
+
+/*
+ * Function irias_find_object (name)
+ *
+ *    Find object with given name
+ *
+ */
+struct ias_object *irias_find_object(char *name)
+{
+	IRDA_ASSERT(name != NULL, return NULL;);
+
+	/* Unsafe (locking), object might change */
+	return hashbin_lock_find(irias_objects, 0, name);
+}
+EXPORT_SYMBOL(irias_find_object);
+
+/*
+ * Function irias_find_attrib (obj, name)
+ *
+ *    Find named attribute in object
+ *
+ */
+struct ias_attrib *irias_find_attrib(struct ias_object *obj, char *name)
+{
+	struct ias_attrib *attrib;
+
+	IRDA_ASSERT(obj != NULL, return NULL;);
+	IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return NULL;);
+	IRDA_ASSERT(name != NULL, return NULL;);
+
+	attrib = hashbin_lock_find(obj->attribs, 0, name);
+	if (attrib == NULL)
+		return NULL;
+
+	/* Unsafe (locking), attrib might change */
+	return attrib;
+}
+EXPORT_SYMBOL(irias_find_attrib);
+
+/*
+ * Function irias_add_attribute (obj, attrib)
+ *
+ *    Add attribute to object
+ *
+ */
+static void irias_add_attrib(struct ias_object *obj, struct ias_attrib *attrib,
+			     int owner)
+{
+	IRDA_ASSERT(obj != NULL, return;);
+	IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return;);
+
+	IRDA_ASSERT(attrib != NULL, return;);
+	IRDA_ASSERT(attrib->magic == IAS_ATTRIB_MAGIC, return;);
+
+	/* Set if attrib is owned by kernel or user space */
+	attrib->value->owner = owner;
+
+	hashbin_insert(obj->attribs, (irda_queue_t *) attrib, 0, attrib->name);
+}
+
+/*
+ * Function irias_object_change_attribute (obj_name, attrib_name, new_value)
+ *
+ *    Change the value of an objects attribute.
+ *
+ */
+int irias_object_change_attribute(char *obj_name, char *attrib_name,
+				  struct ias_value *new_value)
+{
+	struct ias_object *obj;
+	struct ias_attrib *attrib;
+	unsigned long flags;
+
+	/* Find object */
+	obj = hashbin_lock_find(irias_objects, 0, obj_name);
+	if (obj == NULL) {
+		IRDA_WARNING("%s: Unable to find object: %s\n", __FUNCTION__,
+			     obj_name);
+		return -1;
+	}
+
+	/* Slightly unsafe (obj might get removed under us) */
+	spin_lock_irqsave(&obj->attribs->hb_spinlock, flags);
+
+	/* Find attribute */
+	attrib = hashbin_find(obj->attribs, 0, attrib_name);
+	if (attrib == NULL) {
+		IRDA_WARNING("%s: Unable to find attribute: %s\n",
+			     __FUNCTION__, attrib_name);
+		spin_unlock_irqrestore(&obj->attribs->hb_spinlock, flags);
+		return -1;
+	}
+
+	if ( attrib->value->type != new_value->type) {
+		IRDA_DEBUG( 0, "%s(), changing value type not allowed!\n",
+			    __FUNCTION__);
+		spin_unlock_irqrestore(&obj->attribs->hb_spinlock, flags);
+		return -1;
+	}
+
+	/* Delete old value */
+	irias_delete_value(attrib->value);
+
+	/* Insert new value */
+	attrib->value = new_value;
+
+	/* Success */
+	spin_unlock_irqrestore(&obj->attribs->hb_spinlock, flags);
+	return 0;
+}
+EXPORT_SYMBOL(irias_object_change_attribute);
+
+/*
+ * Function irias_object_add_integer_attrib (obj, name, value)
+ *
+ *    Add an integer attribute to an LM-IAS object
+ *
+ */
+void irias_add_integer_attrib(struct ias_object *obj, char *name, int value,
+			      int owner)
+{
+	struct ias_attrib *attrib;
+
+	IRDA_ASSERT(obj != NULL, return;);
+	IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return;);
+	IRDA_ASSERT(name != NULL, return;);
+
+	attrib = (struct ias_attrib *) kmalloc(sizeof(struct ias_attrib),
+					       GFP_ATOMIC);
+	if (attrib == NULL) {
+		IRDA_WARNING("%s: Unable to allocate attribute!\n",
+			     __FUNCTION__);
+		return;
+	}
+	memset(attrib, 0, sizeof( struct ias_attrib));
+
+	attrib->magic = IAS_ATTRIB_MAGIC;
+	attrib->name = strndup(name, IAS_MAX_ATTRIBNAME);
+
+	/* Insert value */
+	attrib->value = irias_new_integer_value(value);
+
+	irias_add_attrib(obj, attrib, owner);
+}
+EXPORT_SYMBOL(irias_add_integer_attrib);
+
+ /*
+ * Function irias_add_octseq_attrib (obj, name, octet_seq, len)
+ *
+ *    Add a octet sequence attribute to an LM-IAS object
+ *
+ */
+
+void irias_add_octseq_attrib(struct ias_object *obj, char *name, __u8 *octets,
+			     int len, int owner)
+{
+	struct ias_attrib *attrib;
+
+	IRDA_ASSERT(obj != NULL, return;);
+	IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return;);
+
+	IRDA_ASSERT(name != NULL, return;);
+	IRDA_ASSERT(octets != NULL, return;);
+
+	attrib = (struct ias_attrib *) kmalloc(sizeof(struct ias_attrib),
+					       GFP_ATOMIC);
+	if (attrib == NULL) {
+		IRDA_WARNING("%s: Unable to allocate attribute!\n",
+			     __FUNCTION__);
+		return;
+	}
+	memset(attrib, 0, sizeof( struct ias_attrib));
+
+	attrib->magic = IAS_ATTRIB_MAGIC;
+	attrib->name = strndup(name, IAS_MAX_ATTRIBNAME);
+
+	attrib->value = irias_new_octseq_value( octets, len);
+
+	irias_add_attrib(obj, attrib, owner);
+}
+EXPORT_SYMBOL(irias_add_octseq_attrib);
+
+/*
+ * Function irias_object_add_string_attrib (obj, string)
+ *
+ *    Add a string attribute to an LM-IAS object
+ *
+ */
+void irias_add_string_attrib(struct ias_object *obj, char *name, char *value,
+			     int owner)
+{
+	struct ias_attrib *attrib;
+
+	IRDA_ASSERT(obj != NULL, return;);
+	IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return;);
+
+	IRDA_ASSERT(name != NULL, return;);
+	IRDA_ASSERT(value != NULL, return;);
+
+	attrib = (struct ias_attrib *) kmalloc(sizeof( struct ias_attrib),
+					       GFP_ATOMIC);
+	if (attrib == NULL) {
+		IRDA_WARNING("%s: Unable to allocate attribute!\n",
+			     __FUNCTION__);
+		return;
+	}
+	memset(attrib, 0, sizeof( struct ias_attrib));
+
+	attrib->magic = IAS_ATTRIB_MAGIC;
+	attrib->name = strndup(name, IAS_MAX_ATTRIBNAME);
+
+	attrib->value = irias_new_string_value(value);
+
+	irias_add_attrib(obj, attrib, owner);
+}
+EXPORT_SYMBOL(irias_add_string_attrib);
+
+/*
+ * Function irias_new_integer_value (integer)
+ *
+ *    Create new IAS integer value
+ *
+ */
+struct ias_value *irias_new_integer_value(int integer)
+{
+	struct ias_value *value;
+
+	value = kmalloc(sizeof(struct ias_value), GFP_ATOMIC);
+	if (value == NULL) {
+		IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__);
+		return NULL;
+	}
+	memset(value, 0, sizeof(struct ias_value));
+
+	value->type = IAS_INTEGER;
+	value->len = 4;
+	value->t.integer = integer;
+
+	return value;
+}
+EXPORT_SYMBOL(irias_new_integer_value);
+
+/*
+ * Function irias_new_string_value (string)
+ *
+ *    Create new IAS string value
+ *
+ * Per IrLMP 1.1, 4.3.3.2, strings are up to 256 chars - Jean II
+ */
+struct ias_value *irias_new_string_value(char *string)
+{
+	struct ias_value *value;
+
+	value = kmalloc(sizeof(struct ias_value), GFP_ATOMIC);
+	if (value == NULL) {
+		IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__);
+		return NULL;
+	}
+	memset( value, 0, sizeof( struct ias_value));
+
+	value->type = IAS_STRING;
+	value->charset = CS_ASCII;
+	value->t.string = strndup(string, IAS_MAX_STRING);
+	value->len = strlen(value->t.string);
+
+	return value;
+}
+EXPORT_SYMBOL(irias_new_string_value);
+
+/*
+ * Function irias_new_octseq_value (octets, len)
+ *
+ *    Create new IAS octet-sequence value
+ *
+ * Per IrLMP 1.1, 4.3.3.2, octet-sequence are up to 1024 bytes - Jean II
+ */
+struct ias_value *irias_new_octseq_value(__u8 *octseq , int len)
+{
+	struct ias_value *value;
+
+	value = kmalloc(sizeof(struct ias_value), GFP_ATOMIC);
+	if (value == NULL) {
+		IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__);
+		return NULL;
+	}
+	memset(value, 0, sizeof(struct ias_value));
+
+	value->type = IAS_OCT_SEQ;
+	/* Check length */
+	if(len > IAS_MAX_OCTET_STRING)
+		len = IAS_MAX_OCTET_STRING;
+	value->len = len;
+
+	value->t.oct_seq = kmalloc(len, GFP_ATOMIC);
+	if (value->t.oct_seq == NULL){
+		IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__);
+		kfree(value);
+		return NULL;
+	}
+	memcpy(value->t.oct_seq, octseq , len);
+	return value;
+}
+EXPORT_SYMBOL(irias_new_octseq_value);
+
+struct ias_value *irias_new_missing_value(void)
+{
+	struct ias_value *value;
+
+	value = kmalloc(sizeof(struct ias_value), GFP_ATOMIC);
+	if (value == NULL) {
+		IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__);
+		return NULL;
+	}
+	memset(value, 0, sizeof(struct ias_value));
+
+	value->type = IAS_MISSING;
+	value->len = 0;
+
+	return value;
+}
+
+/*
+ * Function irias_delete_value (value)
+ *
+ *    Delete IAS value
+ *
+ */
+void irias_delete_value(struct ias_value *value)
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(value != NULL, return;);
+
+	switch (value->type) {
+	case IAS_INTEGER: /* Fallthrough */
+	case IAS_MISSING:
+		/* No need to deallocate */
+		break;
+	case IAS_STRING:
+		/* If string, deallocate string */
+		if (value->t.string != NULL)
+			kfree(value->t.string);
+		break;
+	case IAS_OCT_SEQ:
+		/* If byte stream, deallocate byte stream */
+		 if (value->t.oct_seq != NULL)
+			 kfree(value->t.oct_seq);
+		 break;
+	default:
+		IRDA_DEBUG(0, "%s(), Unknown value type!\n", __FUNCTION__);
+		break;
+	}
+	kfree(value);
+}
+EXPORT_SYMBOL(irias_delete_value);
diff --git a/net/irda/irlan/Kconfig b/net/irda/irlan/Kconfig
new file mode 100644
index 00000000000..951abc2e3a7
--- /dev/null
+++ b/net/irda/irlan/Kconfig
@@ -0,0 +1,14 @@
+config IRLAN
+	tristate "IrLAN protocol"
+	depends on IRDA
+	help
+	  Say Y here if you want to build support for the IrLAN protocol.
+	  To compile it as a module, choose M here: the module will be called
+	  irlan.  IrLAN emulates an Ethernet and makes it possible to put up
+	  a wireless LAN using infrared beams.
+
+	  The IrLAN protocol can be used to talk with infrared access points
+	  like the HP NetbeamIR, or the ESI JetEye NET.  You can also connect
+	  to another Linux machine running the IrLAN protocol for ad-hoc
+	  networking!
+
diff --git a/net/irda/irlan/Makefile b/net/irda/irlan/Makefile
new file mode 100644
index 00000000000..77549bc8641
--- /dev/null
+++ b/net/irda/irlan/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the Linux IrDA IrLAN protocol layer.
+#
+
+obj-$(CONFIG_IRLAN) += irlan.o
+
+irlan-objs := irlan_common.o irlan_eth.o irlan_event.o irlan_client.o irlan_provider.o irlan_filter.o irlan_provider_event.o irlan_client_event.o
diff --git a/net/irda/irlan/irlan_client.c b/net/irda/irlan/irlan_client.c
new file mode 100644
index 00000000000..f8e6cb0db04
--- /dev/null
+++ b/net/irda/irlan/irlan_client.c
@@ -0,0 +1,576 @@
+/*********************************************************************
+ *                
+ * Filename:      irlan_client.c
+ * Version:       0.9
+ * Description:   IrDA LAN Access Protocol (IrLAN) Client
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Sun Aug 31 20:14:37 1997
+ * Modified at:   Tue Dec 14 15:47:02 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * Sources:       skeleton.c by Donald Becker <becker@CESDIS.gsfc.nasa.gov>
+ *                slip.c by Laurence Culhane, <loz@holmes.demon.co.uk>
+ *                          Fred N. van Kempen, <waltje@uwalt.nl.mugnet.org>
+ * 
+ *     Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>, 
+ *     All Rights Reserved.
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ *
+ *     Neither Dag Brattli nor University of Troms� admit liability nor
+ *     provide warranty for any of this software. This material is 
+ *     provided "AS-IS" and at no charge.
+ *
+ ********************************************************************/
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/if_arp.h>
+#include <linux/bitops.h>
+#include <net/arp.h>
+
+#include <asm/system.h>
+#include <asm/byteorder.h>
+
+#include <net/irda/irda.h>
+#include <net/irda/irttp.h>
+#include <net/irda/irlmp.h>
+#include <net/irda/irias_object.h>
+#include <net/irda/iriap.h>
+#include <net/irda/timer.h>
+
+#include <net/irda/irlan_common.h>
+#include <net/irda/irlan_event.h>
+#include <net/irda/irlan_eth.h>
+#include <net/irda/irlan_provider.h>
+#include <net/irda/irlan_client.h>
+
+#undef CONFIG_IRLAN_GRATUITOUS_ARP
+
+static void irlan_client_ctrl_disconnect_indication(void *instance, void *sap, 
+						    LM_REASON reason, 
+						    struct sk_buff *);
+static int irlan_client_ctrl_data_indication(void *instance, void *sap, 
+					     struct sk_buff *skb);
+static void irlan_client_ctrl_connect_confirm(void *instance, void *sap, 
+					      struct qos_info *qos, 
+					      __u32 max_sdu_size,
+					      __u8 max_header_size,
+					      struct sk_buff *);
+static void irlan_check_response_param(struct irlan_cb *self, char *param, 
+				       char *value, int val_len);
+static void irlan_client_open_ctrl_tsap(struct irlan_cb *self);
+
+static void irlan_client_kick_timer_expired(void *data)
+{
+	struct irlan_cb *self = (struct irlan_cb *) data;
+	
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
+	
+	/*  
+	 * If we are in peer mode, the client may not have got the discovery
+	 * indication it needs to make progress. If the client is still in 
+	 * IDLE state, we must kick it to, but only if the provider is not IDLE
+ 	 */
+	if ((self->provider.access_type == ACCESS_PEER) && 
+	    (self->client.state == IRLAN_IDLE) &&
+	    (self->provider.state != IRLAN_IDLE)) {
+		irlan_client_wakeup(self, self->saddr, self->daddr);
+	}
+}
+
+static void irlan_client_start_kick_timer(struct irlan_cb *self, int timeout)
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+	
+	irda_start_timer(&self->client.kick_timer, timeout, (void *) self, 
+			 irlan_client_kick_timer_expired);
+}
+
+/*
+ * Function irlan_client_wakeup (self, saddr, daddr)
+ *
+ *    Wake up client
+ *
+ */
+void irlan_client_wakeup(struct irlan_cb *self, __u32 saddr, __u32 daddr)
+{
+	IRDA_DEBUG(1, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
+
+	/* 
+	 * Check if we are already awake, or if we are a provider in direct
+	 * mode (in that case we must leave the client idle
+	 */
+	if ((self->client.state != IRLAN_IDLE) || 
+	    (self->provider.access_type == ACCESS_DIRECT))
+	{
+			IRDA_DEBUG(0, "%s(), already awake!\n", __FUNCTION__ );
+			return;
+	}
+
+	/* Addresses may have changed! */
+	self->saddr = saddr;
+	self->daddr = daddr;
+
+	if (self->disconnect_reason == LM_USER_REQUEST) {
+			IRDA_DEBUG(0, "%s(), still stopped by user\n", __FUNCTION__ );
+			return;
+	}
+
+	/* Open TSAPs */
+	irlan_client_open_ctrl_tsap(self);
+	irlan_open_data_tsap(self);
+
+	irlan_do_client_event(self, IRLAN_DISCOVERY_INDICATION, NULL);
+	
+	/* Start kick timer */
+	irlan_client_start_kick_timer(self, 2*HZ);
+}
+
+/*
+ * Function irlan_discovery_indication (daddr)
+ *
+ *    Remote device with IrLAN server support discovered
+ *
+ */
+void irlan_client_discovery_indication(discinfo_t *discovery,
+				       DISCOVERY_MODE mode,
+				       void *priv) 
+{
+	struct irlan_cb *self;
+	__u32 saddr, daddr;
+	
+	IRDA_DEBUG(1, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(discovery != NULL, return;);
+
+	/*
+	 * I didn't check it, but I bet that IrLAN suffer from the same
+	 * deficiency as IrComm and doesn't handle two instances
+	 * simultaneously connecting to each other.
+	 * Same workaround, drop passive discoveries.
+	 * Jean II */
+	if(mode == DISCOVERY_PASSIVE)
+		return;
+
+	saddr = discovery->saddr;
+	daddr = discovery->daddr;
+
+	/* Find instance */
+	rcu_read_lock();
+	self = irlan_get_any();
+	if (self) {
+		IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
+
+		IRDA_DEBUG(1, "%s(), Found instance (%08x)!\n", __FUNCTION__ ,
+		      daddr);
+		
+		irlan_client_wakeup(self, saddr, daddr);
+	}
+	rcu_read_unlock();
+}
+	
+/*
+ * Function irlan_client_data_indication (handle, skb)
+ *
+ *    This function gets the data that is received on the control channel
+ *
+ */
+static int irlan_client_ctrl_data_indication(void *instance, void *sap, 
+					     struct sk_buff *skb)
+{
+	struct irlan_cb *self;
+	
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+	
+	self = (struct irlan_cb *) instance;
+	
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;);
+	IRDA_ASSERT(skb != NULL, return -1;);
+	
+	irlan_do_client_event(self, IRLAN_DATA_INDICATION, skb); 
+
+	/* Ready for a new command */	
+	IRDA_DEBUG(2, "%s(), clearing tx_busy\n", __FUNCTION__ );
+	self->client.tx_busy = FALSE;
+
+	/* Check if we have some queued commands waiting to be sent */
+	irlan_run_ctrl_tx_queue(self);
+
+	return 0;
+}
+
+static void irlan_client_ctrl_disconnect_indication(void *instance, void *sap, 
+						    LM_REASON reason, 
+						    struct sk_buff *userdata) 
+{
+	struct irlan_cb *self;
+	struct tsap_cb *tsap;
+	struct sk_buff *skb;
+
+	IRDA_DEBUG(4, "%s(), reason=%d\n", __FUNCTION__ , reason);
+	
+	self = (struct irlan_cb *) instance;
+	tsap = (struct tsap_cb *) sap;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);	
+	IRDA_ASSERT(tsap != NULL, return;);
+	IRDA_ASSERT(tsap->magic == TTP_TSAP_MAGIC, return;);
+	
+	IRDA_ASSERT(tsap == self->client.tsap_ctrl, return;);
+
+       	/* Remove frames queued on the control channel */
+	while ((skb = skb_dequeue(&self->client.txq)) != NULL) {
+		dev_kfree_skb(skb);
+	}
+	self->client.tx_busy = FALSE;
+
+	irlan_do_client_event(self, IRLAN_LMP_DISCONNECT, NULL);
+}
+
+/*
+ * Function irlan_client_open_tsaps (self)
+ *
+ *    Initialize callbacks and open IrTTP TSAPs
+ *
+ */
+static void irlan_client_open_ctrl_tsap(struct irlan_cb *self)
+{
+	struct tsap_cb *tsap;
+	notify_t notify;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
+
+	/* Check if already open */
+	if (self->client.tsap_ctrl)
+		return;
+
+	irda_notify_init(&notify);
+
+	/* Set up callbacks */
+	notify.data_indication       = irlan_client_ctrl_data_indication;
+	notify.connect_confirm       = irlan_client_ctrl_connect_confirm;
+	notify.disconnect_indication = irlan_client_ctrl_disconnect_indication;
+	notify.instance = self;
+	strlcpy(notify.name, "IrLAN ctrl (c)", sizeof(notify.name));
+	
+	tsap = irttp_open_tsap(LSAP_ANY, DEFAULT_INITIAL_CREDIT, &notify);
+	if (!tsap) {
+		IRDA_DEBUG(2, "%s(), Got no tsap!\n", __FUNCTION__ );
+		return;
+	}
+	self->client.tsap_ctrl = tsap;
+}
+
+/*
+ * Function irlan_client_connect_confirm (handle, skb)
+ *
+ *    Connection to peer IrLAN laye confirmed
+ *
+ */
+static void irlan_client_ctrl_connect_confirm(void *instance, void *sap, 
+					      struct qos_info *qos, 
+					      __u32 max_sdu_size,
+					      __u8 max_header_size,
+					      struct sk_buff *skb) 
+{
+	struct irlan_cb *self;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+
+	self = (struct irlan_cb *) instance;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
+
+	self->client.max_sdu_size = max_sdu_size;
+	self->client.max_header_size = max_header_size;
+
+	/* TODO: we could set the MTU depending on the max_sdu_size */
+
+	irlan_do_client_event(self, IRLAN_CONNECT_COMPLETE, NULL);
+}
+
+/*
+ * Function print_ret_code (code)
+ *
+ *    Print return code of request to peer IrLAN layer.
+ *
+ */
+static void print_ret_code(__u8 code) 
+{
+	switch(code) {
+	case 0:
+		printk(KERN_INFO "Success\n");
+		break;
+	case 1:
+		IRDA_WARNING("IrLAN: Insufficient resources\n");
+		break;
+	case 2:
+		IRDA_WARNING("IrLAN: Invalid command format\n");
+		break;
+	case 3:
+		IRDA_WARNING("IrLAN: Command not supported\n");
+		break;
+	case 4:
+		IRDA_WARNING("IrLAN: Parameter not supported\n");
+		break;
+	case 5:
+		IRDA_WARNING("IrLAN: Value not supported\n");
+		break;
+	case 6:
+		IRDA_WARNING("IrLAN: Not open\n");
+		break;
+	case 7:
+		IRDA_WARNING("IrLAN: Authentication required\n");
+		break;
+	case 8:
+		IRDA_WARNING("IrLAN: Invalid password\n");
+		break;
+	case 9:
+		IRDA_WARNING("IrLAN: Protocol error\n");
+		break;
+	case 255:
+		IRDA_WARNING("IrLAN: Asynchronous status\n");
+		break;
+	}
+}
+
+/*
+ * Function irlan_client_parse_response (self, skb)
+ *
+ *    Extract all parameters from received buffer, then feed them to 
+ *    check_params for parsing
+ */
+void irlan_client_parse_response(struct irlan_cb *self, struct sk_buff *skb)
+{
+	__u8 *frame;
+	__u8 *ptr;
+	int count;
+	int ret;
+	__u16 val_len;
+	int i;
+        char *name;
+        char *value;
+
+	IRDA_ASSERT(skb != NULL, return;);	
+	
+	IRDA_DEBUG(4, "%s() skb->len=%d\n", __FUNCTION__ , (int) skb->len);
+	
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
+	
+	if (!skb) {
+		IRDA_ERROR("%s(), Got NULL skb!\n", __FUNCTION__);
+		return;
+	}
+	frame = skb->data;
+	
+	/* 
+	 *  Check return code and print it if not success 
+	 */
+	if (frame[0]) {
+		print_ret_code(frame[0]);
+		return;
+	}
+	
+	name = kmalloc(255, GFP_ATOMIC);
+	if (!name)
+		return;
+	value = kmalloc(1016, GFP_ATOMIC);
+	if (!value) {
+		kfree(name);
+		return;
+	}
+
+	/* How many parameters? */
+	count = frame[1];
+
+	IRDA_DEBUG(4, "%s(), got %d parameters\n", __FUNCTION__ , count);
+	
+	ptr = frame+2;
+
+	/* For all parameters */
+ 	for (i=0; i<count;i++) {
+		ret = irlan_extract_param(ptr, name, value, &val_len);
+		if (ret < 0) {
+			IRDA_DEBUG(2, "%s(), IrLAN, Error!\n", __FUNCTION__ );
+			break;
+		}
+		ptr += ret;
+		irlan_check_response_param(self, name, value, val_len);
+ 	}
+	/* Cleanup */
+	kfree(name);
+	kfree(value);
+}
+
+/*
+ * Function irlan_check_response_param (self, param, value, val_len)
+ *
+ *     Check which parameter is received and update local variables
+ *
+ */
+static void irlan_check_response_param(struct irlan_cb *self, char *param, 
+				       char *value, int val_len) 
+{
+	__u16 tmp_cpu; /* Temporary value in host order */
+	__u8 *bytes;
+	int i;
+
+	IRDA_DEBUG(4, "%s(), parm=%s\n", __FUNCTION__ , param);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
+
+	/* Media type */
+	if (strcmp(param, "MEDIA") == 0) {
+		if (strcmp(value, "802.3") == 0)
+			self->media = MEDIA_802_3;
+		else
+			self->media = MEDIA_802_5;
+		return;
+	}
+	if (strcmp(param, "FILTER_TYPE") == 0) {
+		if (strcmp(value, "DIRECTED") == 0)
+			self->client.filter_type |= IRLAN_DIRECTED;
+		else if (strcmp(value, "FUNCTIONAL") == 0)
+			self->client.filter_type |= IRLAN_FUNCTIONAL;
+		else if (strcmp(value, "GROUP") == 0)
+			self->client.filter_type |= IRLAN_GROUP;
+		else if (strcmp(value, "MAC_FRAME") == 0)
+			self->client.filter_type |= IRLAN_MAC_FRAME;
+		else if (strcmp(value, "MULTICAST") == 0)
+			self->client.filter_type |= IRLAN_MULTICAST;
+		else if (strcmp(value, "BROADCAST") == 0)
+			self->client.filter_type |= IRLAN_BROADCAST;
+		else if (strcmp(value, "IPX_SOCKET") == 0)
+			self->client.filter_type |= IRLAN_IPX_SOCKET;
+		
+	}
+	if (strcmp(param, "ACCESS_TYPE") == 0) {
+		if (strcmp(value, "DIRECT") == 0)
+			self->client.access_type = ACCESS_DIRECT;
+		else if (strcmp(value, "PEER") == 0)
+			self->client.access_type = ACCESS_PEER;
+		else if (strcmp(value, "HOSTED") == 0)
+			self->client.access_type = ACCESS_HOSTED;
+		else {
+			IRDA_DEBUG(2, "%s(), unknown access type!\n", __FUNCTION__ );
+		}
+	}
+	/* IRLAN version */
+	if (strcmp(param, "IRLAN_VER") == 0) {
+		IRDA_DEBUG(4, "IrLAN version %d.%d\n", (__u8) value[0], 
+		      (__u8) value[1]);
+
+		self->version[0] = value[0];
+		self->version[1] = value[1];
+		return;
+	}
+	/* Which remote TSAP to use for data channel */
+	if (strcmp(param, "DATA_CHAN") == 0) {
+		self->dtsap_sel_data = value[0];
+		IRDA_DEBUG(4, "Data TSAP = %02x\n", self->dtsap_sel_data);
+		return;
+	}
+	if (strcmp(param, "CON_ARB") == 0) {
+		memcpy(&tmp_cpu, value, 2); /* Align value */
+		le16_to_cpus(&tmp_cpu);     /* Convert to host order */
+		self->client.recv_arb_val = tmp_cpu;
+		IRDA_DEBUG(2, "%s(), receive arb val=%d\n", __FUNCTION__ , 
+			   self->client.recv_arb_val);
+	}
+	if (strcmp(param, "MAX_FRAME") == 0) {
+		memcpy(&tmp_cpu, value, 2); /* Align value */
+		le16_to_cpus(&tmp_cpu);     /* Convert to host order */
+		self->client.max_frame = tmp_cpu;
+		IRDA_DEBUG(4, "%s(), max frame=%d\n", __FUNCTION__ , 
+			   self->client.max_frame);
+	}
+	 
+	/* RECONNECT_KEY, in case the link goes down! */
+	if (strcmp(param, "RECONNECT_KEY") == 0) {
+		IRDA_DEBUG(4, "Got reconnect key: ");
+		/* for (i = 0; i < val_len; i++) */
+/* 			printk("%02x", value[i]); */
+		memcpy(self->client.reconnect_key, value, val_len);
+		self->client.key_len = val_len;
+		IRDA_DEBUG(4, "\n");
+	}
+	/* FILTER_ENTRY, have we got an ethernet address? */
+	if (strcmp(param, "FILTER_ENTRY") == 0) {
+		bytes = value;
+		IRDA_DEBUG(4, "Ethernet address = %02x:%02x:%02x:%02x:%02x:%02x\n",
+		      bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], 
+		      bytes[5]);
+		for (i = 0; i < 6; i++) 
+			self->dev->dev_addr[i] = bytes[i];
+	}
+}
+
+/*
+ * Function irlan_client_get_value_confirm (obj_id, value)
+ *
+ *    Got results from remote LM-IAS
+ *
+ */
+void irlan_client_get_value_confirm(int result, __u16 obj_id, 
+				    struct ias_value *value, void *priv) 
+{
+	struct irlan_cb *self;
+	
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(priv != NULL, return;);
+
+	self = (struct irlan_cb *) priv;
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
+
+	/* We probably don't need to make any more queries */
+	iriap_close(self->client.iriap);
+	self->client.iriap = NULL;
+
+	/* Check if request succeeded */
+	if (result != IAS_SUCCESS) {
+		IRDA_DEBUG(2, "%s(), got NULL value!\n", __FUNCTION__ );
+		irlan_do_client_event(self, IRLAN_IAS_PROVIDER_NOT_AVAIL, 
+				      NULL);
+		return;
+	}
+
+	switch (value->type) {
+	case IAS_INTEGER:
+		self->dtsap_sel_ctrl = value->t.integer;
+
+		if (value->t.integer != -1) {
+			irlan_do_client_event(self, IRLAN_IAS_PROVIDER_AVAIL,
+					      NULL);
+			return;
+		}
+		irias_delete_value(value);
+		break;
+	default:
+		IRDA_DEBUG(2, "%s(), unknown type!\n", __FUNCTION__ );
+		break;
+	}
+	irlan_do_client_event(self, IRLAN_IAS_PROVIDER_NOT_AVAIL, NULL);
+}
diff --git a/net/irda/irlan/irlan_client_event.c b/net/irda/irlan/irlan_client_event.c
new file mode 100644
index 00000000000..ce943b69e99
--- /dev/null
+++ b/net/irda/irlan/irlan_client_event.c
@@ -0,0 +1,533 @@
+/*********************************************************************
+ *                
+ * Filename:      irlan_client_event.c
+ * Version:       0.9
+ * Description:   IrLAN client state machine
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Sun Aug 31 20:14:37 1997
+ * Modified at:   Sun Dec 26 21:52:24 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>, 
+ *     All Rights Reserved.
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ *
+ *     Neither Dag Brattli nor University of Troms� admit liability nor
+ *     provide warranty for any of this software. This material is 
+ *     provided "AS-IS" and at no charge.
+ *
+ ********************************************************************/
+
+#include <linux/skbuff.h>
+
+#include <net/irda/irda.h>
+#include <net/irda/timer.h>
+#include <net/irda/irmod.h>
+#include <net/irda/iriap.h>
+#include <net/irda/irlmp.h>
+#include <net/irda/irttp.h>
+
+#include <net/irda/irlan_common.h>
+#include <net/irda/irlan_client.h>
+#include <net/irda/irlan_event.h>
+
+static int irlan_client_state_idle (struct irlan_cb *self, IRLAN_EVENT event, 
+				    struct sk_buff *skb);
+static int irlan_client_state_query(struct irlan_cb *self, IRLAN_EVENT event, 
+				    struct sk_buff *skb);
+static int irlan_client_state_conn (struct irlan_cb *self, IRLAN_EVENT event, 
+				    struct sk_buff *skb);
+static int irlan_client_state_info (struct irlan_cb *self, IRLAN_EVENT event, 
+				    struct sk_buff *skb);
+static int irlan_client_state_media(struct irlan_cb *self, IRLAN_EVENT event, 
+				    struct sk_buff *skb);
+static int irlan_client_state_open (struct irlan_cb *self, IRLAN_EVENT event, 
+				    struct sk_buff *skb);
+static int irlan_client_state_wait (struct irlan_cb *self, IRLAN_EVENT event, 
+				    struct sk_buff *skb);
+static int irlan_client_state_arb  (struct irlan_cb *self, IRLAN_EVENT event, 
+				    struct sk_buff *skb);
+static int irlan_client_state_data (struct irlan_cb *self, IRLAN_EVENT event, 
+				    struct sk_buff *skb);
+static int irlan_client_state_close(struct irlan_cb *self, IRLAN_EVENT event, 
+				    struct sk_buff *skb);
+static int irlan_client_state_sync (struct irlan_cb *self, IRLAN_EVENT event, 
+				    struct sk_buff *skb);
+
+static int (*state[])(struct irlan_cb *, IRLAN_EVENT event, struct sk_buff *) =
+{ 
+	irlan_client_state_idle,
+	irlan_client_state_query,
+	irlan_client_state_conn,
+	irlan_client_state_info,
+	irlan_client_state_media,
+	irlan_client_state_open,
+	irlan_client_state_wait,
+	irlan_client_state_arb,
+	irlan_client_state_data,
+	irlan_client_state_close,
+	irlan_client_state_sync
+};
+
+void irlan_do_client_event(struct irlan_cb *self, IRLAN_EVENT event, 
+			   struct sk_buff *skb) 
+{
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
+
+	(*state[ self->client.state]) (self, event, skb);
+}
+
+/*
+ * Function irlan_client_state_idle (event, skb, info)
+ *
+ *    IDLE, We are waiting for an indication that there is a provider
+ *    available.
+ */
+static int irlan_client_state_idle(struct irlan_cb *self, IRLAN_EVENT event, 
+				   struct sk_buff *skb) 
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;);
+	
+	switch (event) {
+	case IRLAN_DISCOVERY_INDICATION:
+		if (self->client.iriap) {
+			IRDA_WARNING("%s(), busy with a previous query\n",
+				     __FUNCTION__);
+			return -EBUSY;
+		}
+		
+		self->client.iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self,
+						irlan_client_get_value_confirm);
+		/* Get some values from peer IAS */
+		irlan_next_client_state(self, IRLAN_QUERY);
+		iriap_getvaluebyclass_request(self->client.iriap,
+					      self->saddr, self->daddr,
+					      "IrLAN", "IrDA:TinyTP:LsapSel");
+		break;
+	case IRLAN_WATCHDOG_TIMEOUT:
+		IRDA_DEBUG(2, "%s(), IRLAN_WATCHDOG_TIMEOUT\n", __FUNCTION__ );
+		break;
+	default:
+		IRDA_DEBUG(4, "%s(), Unknown event %d\n", __FUNCTION__ , event);
+		break;
+	}
+	if (skb) 
+		dev_kfree_skb(skb);
+
+	return 0;
+}
+
+/*
+ * Function irlan_client_state_query (event, skb, info)
+ *
+ *    QUERY, We have queryed the remote IAS and is ready to connect
+ *    to provider, just waiting for the confirm.
+ *
+ */
+static int irlan_client_state_query(struct irlan_cb *self, IRLAN_EVENT event, 
+				    struct sk_buff *skb) 
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;);
+	
+	switch(event) {
+	case IRLAN_IAS_PROVIDER_AVAIL:
+		IRDA_ASSERT(self->dtsap_sel_ctrl != 0, return -1;);
+
+		self->client.open_retries = 0;
+		
+		irttp_connect_request(self->client.tsap_ctrl, 
+				      self->dtsap_sel_ctrl, 
+				      self->saddr, self->daddr, NULL, 
+				      IRLAN_MTU, NULL);
+		irlan_next_client_state(self, IRLAN_CONN);
+		break;
+	case IRLAN_IAS_PROVIDER_NOT_AVAIL:
+		IRDA_DEBUG(2, "%s(), IAS_PROVIDER_NOT_AVAIL\n", __FUNCTION__ );
+		irlan_next_client_state(self, IRLAN_IDLE);
+
+		/* Give the client a kick! */
+		if ((self->provider.access_type == ACCESS_PEER) && 
+		    (self->provider.state != IRLAN_IDLE))
+			irlan_client_wakeup(self, self->saddr, self->daddr);
+		break;
+	case IRLAN_LMP_DISCONNECT:
+	case IRLAN_LAP_DISCONNECT:
+		irlan_next_client_state(self, IRLAN_IDLE);
+		break;
+	case IRLAN_WATCHDOG_TIMEOUT:
+		IRDA_DEBUG(2, "%s(), IRLAN_WATCHDOG_TIMEOUT\n", __FUNCTION__ );
+		break;
+	default:
+		IRDA_DEBUG(2, "%s(), Unknown event %d\n", __FUNCTION__ , event);
+		break;
+	}
+	if (skb)
+		dev_kfree_skb(skb);
+	
+	return 0;
+}
+
+/*
+ * Function irlan_client_state_conn (event, skb, info)
+ *
+ *    CONN, We have connected to a provider but has not issued any
+ *    commands yet.
+ *
+ */
+static int irlan_client_state_conn(struct irlan_cb *self, IRLAN_EVENT event, 
+				   struct sk_buff *skb) 
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+	
+	IRDA_ASSERT(self != NULL, return -1;);
+	
+	switch (event) {
+	case IRLAN_CONNECT_COMPLETE:
+		/* Send getinfo cmd */
+		irlan_get_provider_info(self);
+		irlan_next_client_state(self, IRLAN_INFO);
+		break;
+	case IRLAN_LMP_DISCONNECT:
+	case IRLAN_LAP_DISCONNECT:
+		irlan_next_client_state(self, IRLAN_IDLE);
+		break;
+	case IRLAN_WATCHDOG_TIMEOUT:
+		IRDA_DEBUG(2, "%s(), IRLAN_WATCHDOG_TIMEOUT\n", __FUNCTION__ );
+		break;
+	default:
+		IRDA_DEBUG(2, "%s(), Unknown event %d\n", __FUNCTION__ , event);
+		break;
+	}
+	if (skb)
+		dev_kfree_skb(skb);
+	
+	return 0;
+}
+
+/*
+ * Function irlan_client_state_info (self, event, skb, info)
+ *
+ *    INFO, We have issued a GetInfo command and is awaiting a reply.
+ */
+static int irlan_client_state_info(struct irlan_cb *self, IRLAN_EVENT event, 
+				   struct sk_buff *skb) 
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	
+	switch (event) {
+	case IRLAN_DATA_INDICATION:
+		IRDA_ASSERT(skb != NULL, return -1;);
+	
+		irlan_client_parse_response(self, skb);
+		
+		irlan_next_client_state(self, IRLAN_MEDIA);
+		
+		irlan_get_media_char(self);
+		break;
+		
+	case IRLAN_LMP_DISCONNECT:
+	case IRLAN_LAP_DISCONNECT:
+		irlan_next_client_state(self, IRLAN_IDLE);
+		break;
+	case IRLAN_WATCHDOG_TIMEOUT:
+		IRDA_DEBUG(2, "%s(), IRLAN_WATCHDOG_TIMEOUT\n", __FUNCTION__ );
+		break;
+	default:
+		IRDA_DEBUG(2, "%s(), Unknown event %d\n", __FUNCTION__ , event);
+		break;
+	}
+	if (skb)
+		dev_kfree_skb(skb);
+	
+	return 0;
+}
+
+/*
+ * Function irlan_client_state_media (self, event, skb, info)
+ *
+ *    MEDIA, The irlan_client has issued a GetMedia command and is awaiting a
+ *    reply.
+ *
+ */
+static int irlan_client_state_media(struct irlan_cb *self, IRLAN_EVENT event, 
+				    struct sk_buff *skb) 
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+	
+	IRDA_ASSERT(self != NULL, return -1;);
+
+	switch(event) {
+	case IRLAN_DATA_INDICATION:
+		irlan_client_parse_response(self, skb);
+		irlan_open_data_channel(self);
+		irlan_next_client_state(self, IRLAN_OPEN);
+		break;
+	case IRLAN_LMP_DISCONNECT:
+	case IRLAN_LAP_DISCONNECT:
+		irlan_next_client_state(self, IRLAN_IDLE);
+		break;
+	case IRLAN_WATCHDOG_TIMEOUT:
+		IRDA_DEBUG(2, "%s(), IRLAN_WATCHDOG_TIMEOUT\n", __FUNCTION__ );
+		break;
+	default:
+		IRDA_DEBUG(2, "%s(), Unknown event %d\n", __FUNCTION__ , event);
+		break;
+	}
+	if (skb)
+		dev_kfree_skb(skb);
+	
+	return 0;
+}
+
+/*
+ * Function irlan_client_state_open (self, event, skb, info)
+ *
+ *    OPEN, The irlan_client has issued a OpenData command and is awaiting a
+ *    reply
+ *
+ */
+static int irlan_client_state_open(struct irlan_cb *self, IRLAN_EVENT event, 
+				   struct sk_buff *skb) 
+{
+	struct qos_info qos;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+	
+	IRDA_ASSERT(self != NULL, return -1;);
+
+	switch(event) {
+	case IRLAN_DATA_INDICATION:
+		irlan_client_parse_response(self, skb);
+		
+		/*
+		 *  Check if we have got the remote TSAP for data 
+		 *  communications
+		 */
+	  	IRDA_ASSERT(self->dtsap_sel_data != 0, return -1;);
+
+		/* Check which access type we are dealing with */
+		switch (self->client.access_type) {
+		case ACCESS_PEER:
+		    if (self->provider.state == IRLAN_OPEN) {
+			    
+			    irlan_next_client_state(self, IRLAN_ARB);
+			    irlan_do_client_event(self, IRLAN_CHECK_CON_ARB, 
+						  NULL);
+		    } else {
+			
+			    irlan_next_client_state(self, IRLAN_WAIT);
+		    }
+		    break;
+		case ACCESS_DIRECT:
+		case ACCESS_HOSTED:
+			qos.link_disc_time.bits = 0x01; /* 3 secs */
+			
+			irttp_connect_request(self->tsap_data, 
+					      self->dtsap_sel_data, 
+					      self->saddr, self->daddr, &qos, 
+					      IRLAN_MTU, NULL);
+			
+			irlan_next_client_state(self, IRLAN_DATA);
+			break;
+		default:
+			IRDA_DEBUG(2, "%s(), unknown access type!\n", __FUNCTION__ );
+			break;
+		}
+		break;
+	case IRLAN_LMP_DISCONNECT:
+	case IRLAN_LAP_DISCONNECT:
+		irlan_next_client_state(self, IRLAN_IDLE);
+		break;
+	case IRLAN_WATCHDOG_TIMEOUT:
+		IRDA_DEBUG(2, "%s(), IRLAN_WATCHDOG_TIMEOUT\n", __FUNCTION__ );
+		break;
+	default:
+		IRDA_DEBUG(2, "%s(), Unknown event %d\n", __FUNCTION__ , event);
+		break;
+	}
+	
+	if (skb)
+		dev_kfree_skb(skb);
+
+	return 0;
+}
+
+/*
+ * Function irlan_client_state_wait (self, event, skb, info)
+ *
+ *    WAIT, The irlan_client is waiting for the local provider to enter the
+ *    provider OPEN state.
+ *
+ */
+static int irlan_client_state_wait(struct irlan_cb *self, IRLAN_EVENT event, 
+				   struct sk_buff *skb) 
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+	
+	IRDA_ASSERT(self != NULL, return -1;);
+	
+	switch(event) {
+	case IRLAN_PROVIDER_SIGNAL:
+		irlan_next_client_state(self, IRLAN_ARB);
+		irlan_do_client_event(self, IRLAN_CHECK_CON_ARB, NULL);
+		break;
+	case IRLAN_LMP_DISCONNECT:
+	case IRLAN_LAP_DISCONNECT:
+		irlan_next_client_state(self, IRLAN_IDLE);
+		break;
+	case IRLAN_WATCHDOG_TIMEOUT:
+		IRDA_DEBUG(2, "%s(), IRLAN_WATCHDOG_TIMEOUT\n", __FUNCTION__ );
+		break;
+	default:
+		IRDA_DEBUG(2, "%s(), Unknown event %d\n", __FUNCTION__ , event);
+		break;
+	}
+	if (skb)
+		dev_kfree_skb(skb);
+	
+	return 0;
+}
+
+static int irlan_client_state_arb(struct irlan_cb *self, IRLAN_EVENT event, 
+				  struct sk_buff *skb) 
+{
+	struct qos_info qos;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	
+	switch(event) {
+	case IRLAN_CHECK_CON_ARB:
+		if (self->client.recv_arb_val == self->provider.send_arb_val) {
+			irlan_next_client_state(self, IRLAN_CLOSE);
+			irlan_close_data_channel(self);
+		} else if (self->client.recv_arb_val < 
+			   self->provider.send_arb_val) 
+		{
+			qos.link_disc_time.bits = 0x01; /* 3 secs */
+
+			irlan_next_client_state(self, IRLAN_DATA);
+			irttp_connect_request(self->tsap_data, 
+					      self->dtsap_sel_data, 
+					      self->saddr, self->daddr, &qos, 
+					      IRLAN_MTU, NULL);
+		} else if (self->client.recv_arb_val >
+			   self->provider.send_arb_val) 
+		{
+			IRDA_DEBUG(2, "%s(), lost the battle :-(\n", __FUNCTION__ );
+		}
+		break;
+	case IRLAN_DATA_CONNECT_INDICATION:
+		irlan_next_client_state(self, IRLAN_DATA);
+		break;
+	case IRLAN_LMP_DISCONNECT:
+	case IRLAN_LAP_DISCONNECT:
+		irlan_next_client_state(self, IRLAN_IDLE);
+		break;
+	case IRLAN_WATCHDOG_TIMEOUT:
+		IRDA_DEBUG(2, "%s(), IRLAN_WATCHDOG_TIMEOUT\n", __FUNCTION__ );
+		break;
+	default:
+		IRDA_DEBUG(2, "%s(), Unknown event %d\n", __FUNCTION__ , event);
+		break;
+	}
+	if (skb)
+		dev_kfree_skb(skb);
+	
+	return 0;
+}
+
+/*
+ * Function irlan_client_state_data (self, event, skb, info)
+ *
+ *    DATA, The data channel is connected, allowing data transfers between
+ *    the local and remote machines.
+ *
+ */
+static int irlan_client_state_data(struct irlan_cb *self, IRLAN_EVENT event, 
+				   struct sk_buff *skb) 
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;);
+
+	switch(event) {
+	case IRLAN_DATA_INDICATION:
+		irlan_client_parse_response(self, skb);
+		break;		
+	case IRLAN_LMP_DISCONNECT: /* FALLTHROUGH */
+	case IRLAN_LAP_DISCONNECT:
+		irlan_next_client_state(self, IRLAN_IDLE);
+		break;
+	default:
+		IRDA_DEBUG(2, "%s(), Unknown event %d\n", __FUNCTION__ , event);
+		break;
+	}
+	if (skb)
+		dev_kfree_skb(skb);
+	
+	return 0;
+}
+
+/*
+ * Function irlan_client_state_close (self, event, skb, info)
+ *
+ *    
+ *
+ */
+static int irlan_client_state_close(struct irlan_cb *self, IRLAN_EVENT event, 
+				    struct sk_buff *skb) 
+{
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	if (skb)
+		dev_kfree_skb(skb);
+
+	return 0;
+}
+
+/*
+ * Function irlan_client_state_sync (self, event, skb, info)
+ *
+ *    
+ *
+ */
+static int irlan_client_state_sync(struct irlan_cb *self, IRLAN_EVENT event, 
+				   struct sk_buff *skb) 
+{
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+	
+	if (skb)
+		dev_kfree_skb(skb);
+	
+	return 0;
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c
new file mode 100644
index 00000000000..657d1221057
--- /dev/null
+++ b/net/irda/irlan/irlan_common.c
@@ -0,0 +1,1200 @@
+/*********************************************************************
+ *                
+ * Filename:      irlan_common.c
+ * Version:       0.9
+ * Description:   IrDA LAN Access Protocol Implementation
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Sun Aug 31 20:14:37 1997
+ * Modified at:   Sun Dec 26 21:53:10 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1997, 1999 Dag Brattli <dagb@cs.uit.no>, 
+ *     All Rights Reserved.
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ *
+ *     Neither Dag Brattli nor University of Troms� admit liability nor
+ *     provide warranty for any of this software. This material is 
+ *     provided "AS-IS" and at no charge.
+ *
+ ********************************************************************/
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/random.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/moduleparam.h>
+#include <linux/bitops.h>
+
+#include <asm/system.h>
+#include <asm/byteorder.h>
+
+#include <net/irda/irda.h>
+#include <net/irda/irttp.h>
+#include <net/irda/irlmp.h>
+#include <net/irda/iriap.h>
+#include <net/irda/timer.h>
+
+#include <net/irda/irlan_common.h>
+#include <net/irda/irlan_client.h>
+#include <net/irda/irlan_provider.h> 
+#include <net/irda/irlan_eth.h>
+#include <net/irda/irlan_filter.h>
+
+
+/* 
+ * Send gratuitous ARP when connected to a new AP or not. May be a clever
+ * thing to do, but for some reason the machine crashes if you use DHCP. So
+ * lets not use it by default.
+ */
+#undef CONFIG_IRLAN_SEND_GRATUITOUS_ARP
+
+/* extern char sysctl_devname[]; */
+
+/*
+ *  Master structure
+ */
+static LIST_HEAD(irlans);
+
+static void *ckey;
+static void *skey;
+
+/* Module parameters */
+static int eth;   /* Use "eth" or "irlan" name for devices */
+static int access = ACCESS_PEER; /* PEER, DIRECT or HOSTED */
+
+#ifdef CONFIG_PROC_FS
+static const char *irlan_access[] = {
+	"UNKNOWN",
+	"DIRECT",
+	"PEER",
+	"HOSTED"
+};
+
+static const char *irlan_media[] = {
+	"UNKNOWN",
+	"802.3",
+	"802.5"
+};
+
+extern struct proc_dir_entry *proc_irda;
+
+static int irlan_seq_open(struct inode *inode, struct file *file);
+
+static struct file_operations irlan_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = irlan_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+};
+
+extern struct proc_dir_entry *proc_irda;
+#endif /* CONFIG_PROC_FS */
+
+static struct irlan_cb *irlan_open(__u32 saddr, __u32 daddr);
+static void __irlan_close(struct irlan_cb *self);
+static int __irlan_insert_param(struct sk_buff *skb, char *param, int type, 
+				__u8 value_byte, __u16 value_short, 
+				__u8 *value_array, __u16 value_len);
+static void irlan_open_unicast_addr(struct irlan_cb *self);
+static void irlan_get_unicast_addr(struct irlan_cb *self);
+void irlan_close_tsaps(struct irlan_cb *self);
+
+/*
+ * Function irlan_init (void)
+ *
+ *    Initialize IrLAN layer
+ *
+ */
+static int __init irlan_init(void)
+{
+	struct irlan_cb *new;
+	__u16 hints;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+#ifdef CONFIG_PROC_FS
+	{ struct proc_dir_entry *proc;
+	proc = create_proc_entry("irlan", 0, proc_irda);
+	if (!proc) {
+		printk(KERN_ERR "irlan_init: can't create /proc entry!\n");
+		return -ENODEV;
+	}
+
+	proc->proc_fops = &irlan_fops;
+	}
+#endif /* CONFIG_PROC_FS */
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+	hints = irlmp_service_to_hint(S_LAN);
+
+	/* Register with IrLMP as a client */
+	ckey = irlmp_register_client(hints, &irlan_client_discovery_indication,
+				     NULL, NULL);
+	
+	/* Register with IrLMP as a service */
+ 	skey = irlmp_register_service(hints);
+
+	/* Start the master IrLAN instance (the only one for now) */
+ 	new = irlan_open(DEV_ADDR_ANY, DEV_ADDR_ANY);
+
+	/* The master will only open its (listen) control TSAP */
+	irlan_provider_open_ctrl_tsap(new);
+
+	/* Do some fast discovery! */
+	irlmp_discovery_request(DISCOVERY_DEFAULT_SLOTS);
+
+	return 0;
+}
+
+static void __exit irlan_cleanup(void) 
+{
+	struct irlan_cb *self, *next;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+
+	irlmp_unregister_client(ckey);
+	irlmp_unregister_service(skey);
+
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("irlan", proc_irda);
+#endif /* CONFIG_PROC_FS */
+
+	/* Cleanup any leftover network devices */
+	rtnl_lock();
+	list_for_each_entry_safe(self, next, &irlans, dev_list) {
+		__irlan_close(self);
+	}
+	rtnl_unlock();
+}
+
+/*
+ * Function irlan_open (void)
+ *
+ *    Open new instance of a client/provider, we should only register the 
+ *    network device if this instance is ment for a particular client/provider
+ */
+static struct irlan_cb *irlan_open(__u32 saddr, __u32 daddr)
+{
+	struct net_device *dev;
+	struct irlan_cb *self;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	/* Create network device with irlan */
+	dev = alloc_irlandev(eth ? "eth%d" : "irlan%d");
+	if (!dev)
+		return NULL;
+
+	self = dev->priv;
+	self->dev = dev;
+
+	/*
+	 *  Initialize local device structure
+	 */
+	self->magic = IRLAN_MAGIC;
+	self->saddr = saddr;
+	self->daddr = daddr;
+
+	/* Provider access can only be PEER, DIRECT, or HOSTED */
+	self->provider.access_type = access;
+	if (access == ACCESS_DIRECT) {
+		/*  
+		 * Since we are emulating an IrLAN sever we will have to
+		 * give ourself an ethernet address!  
+		 */
+		dev->dev_addr[0] = 0x40;
+		dev->dev_addr[1] = 0x00;
+		dev->dev_addr[2] = 0x00;
+		dev->dev_addr[3] = 0x00;
+		get_random_bytes(dev->dev_addr+4, 1);
+		get_random_bytes(dev->dev_addr+5, 1);
+	}
+
+	self->media = MEDIA_802_3;
+	self->disconnect_reason = LM_USER_REQUEST;
+	init_timer(&self->watchdog_timer);
+	init_timer(&self->client.kick_timer);
+	init_waitqueue_head(&self->open_wait);	
+	
+	skb_queue_head_init(&self->client.txq);
+	
+	irlan_next_client_state(self, IRLAN_IDLE);
+	irlan_next_provider_state(self, IRLAN_IDLE);
+
+	if (register_netdev(dev)) {
+		IRDA_DEBUG(2, "%s(), register_netdev() failed!\n", 
+			   __FUNCTION__ );
+		self = NULL;
+		free_netdev(dev);
+	} else {
+		rtnl_lock();
+		list_add_rcu(&self->dev_list, &irlans);
+		rtnl_unlock();
+	}
+
+	return self;
+}
+/*
+ * Function __irlan_close (self)
+ *
+ *    This function closes and deallocates the IrLAN client instances. Be 
+ *    aware that other functions which calls client_close() must
+ *    remove self from irlans list first.
+ */
+static void __irlan_close(struct irlan_cb *self)
+{
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+	
+	ASSERT_RTNL();
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
+
+	del_timer_sync(&self->watchdog_timer);
+	del_timer_sync(&self->client.kick_timer);
+
+	/* Close all open connections and remove TSAPs */
+	irlan_close_tsaps(self);
+	
+	if (self->client.iriap) 
+		iriap_close(self->client.iriap);
+
+	/* Remove frames queued on the control channel */
+	skb_queue_purge(&self->client.txq);
+
+	/* Unregister and free self via destructor */
+	unregister_netdevice(self->dev);
+}
+
+/* Find any instance of irlan, used for client discovery wakeup */
+struct irlan_cb *irlan_get_any(void)
+{
+	struct irlan_cb *self;
+
+	list_for_each_entry_rcu(self, &irlans, dev_list) {
+		return self;
+	}
+	return NULL;
+}
+
+/*
+ * Function irlan_connect_indication (instance, sap, qos, max_sdu_size, skb)
+ *
+ *    Here we receive the connect indication for the data channel
+ *
+ */
+static void irlan_connect_indication(void *instance, void *sap,
+				     struct qos_info *qos,
+				     __u32 max_sdu_size,
+				     __u8 max_header_size, 
+				     struct sk_buff *skb)
+{
+	struct irlan_cb *self;
+	struct tsap_cb *tsap;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+	
+	self = (struct irlan_cb *) instance;
+	tsap = (struct tsap_cb *) sap;
+	
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
+	IRDA_ASSERT(tsap == self->tsap_data,return;);
+
+	self->max_sdu_size = max_sdu_size;
+	self->max_header_size = max_header_size;
+
+	IRDA_DEBUG(0, "%s: We are now connected!\n", __FUNCTION__);
+
+	del_timer(&self->watchdog_timer);
+
+	/* If you want to pass the skb to *both* state machines, you will
+	 * need to skb_clone() it, so that you don't free it twice.
+	 * As the state machines don't need it, git rid of it here...
+	 * Jean II */
+	if (skb)
+		dev_kfree_skb(skb);
+
+	irlan_do_provider_event(self, IRLAN_DATA_CONNECT_INDICATION, NULL);
+	irlan_do_client_event(self, IRLAN_DATA_CONNECT_INDICATION, NULL);
+
+	if (self->provider.access_type == ACCESS_PEER) {
+		/* 
+		 * Data channel is open, so we are now allowed to
+		 * configure the remote filter 
+		 */
+		irlan_get_unicast_addr(self);
+		irlan_open_unicast_addr(self);
+	}
+	/* Ready to transfer Ethernet frames (at last) */
+	netif_start_queue(self->dev); /* Clear reason */
+}
+
+static void irlan_connect_confirm(void *instance, void *sap,
+				  struct qos_info *qos, 
+				  __u32 max_sdu_size,
+				  __u8 max_header_size, 
+				  struct sk_buff *skb) 
+{
+	struct irlan_cb *self;
+
+	self = (struct irlan_cb *) instance;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
+
+	self->max_sdu_size = max_sdu_size;
+	self->max_header_size = max_header_size;
+
+	/* TODO: we could set the MTU depending on the max_sdu_size */
+
+	IRDA_DEBUG(0, "%s: We are now connected!\n", __FUNCTION__);
+	del_timer(&self->watchdog_timer);
+
+	/* 
+	 * Data channel is open, so we are now allowed to configure the remote
+	 * filter 
+	 */
+	irlan_get_unicast_addr(self);
+	irlan_open_unicast_addr(self);
+	
+	/* Open broadcast and multicast filter by default */
+ 	irlan_set_broadcast_filter(self, TRUE);
+ 	irlan_set_multicast_filter(self, TRUE);
+
+	/* Ready to transfer Ethernet frames */
+	netif_start_queue(self->dev);
+	self->disconnect_reason = 0; /* Clear reason */
+#ifdef CONFIG_IRLAN_SEND_GRATUITOUS_ARP
+	irlan_eth_send_gratuitous_arp(&self->dev);
+#endif
+	wake_up_interruptible(&self->open_wait);
+}
+
+/*
+ * Function irlan_client_disconnect_indication (handle)
+ *
+ *    Callback function for the IrTTP layer. Indicates a disconnection of
+ *    the specified connection (handle)
+ */
+static void irlan_disconnect_indication(void *instance,
+					void *sap, LM_REASON reason, 
+					struct sk_buff *userdata) 
+{
+	struct irlan_cb *self;
+	struct tsap_cb *tsap;
+
+	IRDA_DEBUG(0, "%s(), reason=%d\n", __FUNCTION__ , reason);
+	
+	self = (struct irlan_cb *) instance;
+	tsap = (struct tsap_cb *) sap;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);	
+	IRDA_ASSERT(tsap != NULL, return;);
+	IRDA_ASSERT(tsap->magic == TTP_TSAP_MAGIC, return;);
+	
+	IRDA_ASSERT(tsap == self->tsap_data, return;);
+
+	IRDA_DEBUG(2, "IrLAN, data channel disconnected by peer!\n");
+
+	/* Save reason so we know if we should try to reconnect or not */
+	self->disconnect_reason = reason;
+	
+	switch (reason) {
+	case LM_USER_REQUEST: /* User request */
+		IRDA_DEBUG(2, "%s(), User requested\n", __FUNCTION__ );
+		break;
+	case LM_LAP_DISCONNECT: /* Unexpected IrLAP disconnect */
+		IRDA_DEBUG(2, "%s(), Unexpected IrLAP disconnect\n", __FUNCTION__ );
+		break;
+	case LM_CONNECT_FAILURE: /* Failed to establish IrLAP connection */
+		IRDA_DEBUG(2, "%s(), IrLAP connect failed\n", __FUNCTION__ );
+		break;
+	case LM_LAP_RESET:  /* IrLAP reset */
+		IRDA_DEBUG(2, "%s(), IrLAP reset\n", __FUNCTION__ );
+		break;
+	case LM_INIT_DISCONNECT:
+		IRDA_DEBUG(2, "%s(), IrLMP connect failed\n", __FUNCTION__ );
+		break;
+	default:
+		IRDA_ERROR("%s(), Unknown disconnect reason\n", __FUNCTION__);
+		break;
+	}
+	
+	/* If you want to pass the skb to *both* state machines, you will
+	 * need to skb_clone() it, so that you don't free it twice.
+	 * As the state machines don't need it, git rid of it here...
+	 * Jean II */
+	if (userdata)
+		dev_kfree_skb(userdata);
+
+	irlan_do_client_event(self, IRLAN_LMP_DISCONNECT, NULL);
+	irlan_do_provider_event(self, IRLAN_LMP_DISCONNECT, NULL);
+	
+	wake_up_interruptible(&self->open_wait);
+}
+
+void irlan_open_data_tsap(struct irlan_cb *self)
+{
+	struct tsap_cb *tsap;
+	notify_t notify;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
+
+	/* Check if already open */
+	if (self->tsap_data)
+		return;
+
+	irda_notify_init(&notify);
+	
+	notify.data_indication       = irlan_eth_receive;
+	notify.udata_indication      = irlan_eth_receive;
+	notify.connect_indication    = irlan_connect_indication;
+	notify.connect_confirm       = irlan_connect_confirm;
+ 	notify.flow_indication       = irlan_eth_flow_indication;
+	notify.disconnect_indication = irlan_disconnect_indication;
+	notify.instance              = self;
+	strlcpy(notify.name, "IrLAN data", sizeof(notify.name));
+
+	tsap = irttp_open_tsap(LSAP_ANY, DEFAULT_INITIAL_CREDIT, &notify);
+	if (!tsap) {
+		IRDA_DEBUG(2, "%s(), Got no tsap!\n", __FUNCTION__ );
+		return;
+	}
+	self->tsap_data = tsap;
+
+	/* 
+	 *  This is the data TSAP selector which we will pass to the client
+	 *  when the client ask for it.
+	 */
+	self->stsap_sel_data = self->tsap_data->stsap_sel;
+}
+
+void irlan_close_tsaps(struct irlan_cb *self)
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
+
+	/* Disconnect and close all open TSAP connections */
+	if (self->tsap_data) {
+		irttp_disconnect_request(self->tsap_data, NULL, P_NORMAL);
+		irttp_close_tsap(self->tsap_data);
+		self->tsap_data = NULL;
+	}
+	if (self->client.tsap_ctrl) {
+		irttp_disconnect_request(self->client.tsap_ctrl, NULL, 
+					 P_NORMAL);
+		irttp_close_tsap(self->client.tsap_ctrl);
+		self->client.tsap_ctrl = NULL;
+	}
+	if (self->provider.tsap_ctrl) {
+		irttp_disconnect_request(self->provider.tsap_ctrl, NULL, 
+					 P_NORMAL);
+		irttp_close_tsap(self->provider.tsap_ctrl);
+		self->provider.tsap_ctrl = NULL;
+	}
+	self->disconnect_reason = LM_USER_REQUEST;
+}
+
+/*
+ * Function irlan_ias_register (self, tsap_sel)
+ *
+ *    Register with LM-IAS
+ *
+ */
+void irlan_ias_register(struct irlan_cb *self, __u8 tsap_sel)
+{
+	struct ias_object *obj;
+	struct ias_value *new_value;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
+	
+	/* 
+	 * Check if object has already been registered by a previous provider.
+	 * If that is the case, we just change the value of the attribute
+	 */
+	if (!irias_find_object("IrLAN")) {
+		obj = irias_new_object("IrLAN", IAS_IRLAN_ID);
+		irias_add_integer_attrib(obj, "IrDA:TinyTP:LsapSel", tsap_sel,
+					 IAS_KERNEL_ATTR);
+		irias_insert_object(obj);
+	} else {
+		new_value = irias_new_integer_value(tsap_sel);
+		irias_object_change_attribute("IrLAN", "IrDA:TinyTP:LsapSel",
+					      new_value);
+	}
+	
+        /* Register PnP object only if not registered before */
+        if (!irias_find_object("PnP")) {
+		obj = irias_new_object("PnP", IAS_PNP_ID);
+#if 0
+		irias_add_string_attrib(obj, "Name", sysctl_devname,
+					IAS_KERNEL_ATTR);
+#else
+		irias_add_string_attrib(obj, "Name", "Linux", IAS_KERNEL_ATTR);
+#endif
+		irias_add_string_attrib(obj, "DeviceID", "HWP19F0",
+					IAS_KERNEL_ATTR);
+		irias_add_integer_attrib(obj, "CompCnt", 1, IAS_KERNEL_ATTR);
+		if (self->provider.access_type == ACCESS_PEER)
+			irias_add_string_attrib(obj, "Comp#01", "PNP8389",
+						IAS_KERNEL_ATTR);
+		else
+			irias_add_string_attrib(obj, "Comp#01", "PNP8294",
+						IAS_KERNEL_ATTR);
+
+		irias_add_string_attrib(obj, "Manufacturer",
+					"Linux-IrDA Project", IAS_KERNEL_ATTR);
+		irias_insert_object(obj);
+	}
+}
+
+/*
+ * Function irlan_run_ctrl_tx_queue (self)
+ *
+ *    Try to send the next command in the control transmit queue
+ *
+ */
+int irlan_run_ctrl_tx_queue(struct irlan_cb *self)
+{
+	struct sk_buff *skb;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	if (irda_lock(&self->client.tx_busy) == FALSE)
+		return -EBUSY;
+
+	skb = skb_dequeue(&self->client.txq);
+	if (!skb) {
+		self->client.tx_busy = FALSE;
+		return 0;
+	}
+	
+	/* Check that it's really possible to send commands */
+	if ((self->client.tsap_ctrl == NULL) || 
+	    (self->client.state == IRLAN_IDLE)) 
+	{
+		self->client.tx_busy = FALSE;
+		dev_kfree_skb(skb);
+		return -1;
+	}
+	IRDA_DEBUG(2, "%s(), sending ...\n", __FUNCTION__ );
+
+	return irttp_data_request(self->client.tsap_ctrl, skb);
+}
+
+/*
+ * Function irlan_ctrl_data_request (self, skb)
+ *
+ *    This function makes sure that commands on the control channel is being
+ *    sent in a command/response fashion
+ */
+static void irlan_ctrl_data_request(struct irlan_cb *self, struct sk_buff *skb)
+{
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	/* Queue command */
+	skb_queue_tail(&self->client.txq, skb);
+
+	/* Try to send command */
+	irlan_run_ctrl_tx_queue(self);
+}
+
+/*
+ * Function irlan_get_provider_info (self)
+ *
+ *    Send Get Provider Information command to peer IrLAN layer
+ *
+ */
+void irlan_get_provider_info(struct irlan_cb *self)
+{
+	struct sk_buff *skb;
+	__u8 *frame;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+	
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
+
+	skb = dev_alloc_skb(64);
+	if (!skb)
+		return;
+
+	/* Reserve space for TTP, LMP, and LAP header */
+	skb_reserve(skb, self->client.max_header_size);
+	skb_put(skb, 2);
+	
+	frame = skb->data;
+	
+ 	frame[0] = CMD_GET_PROVIDER_INFO;
+	frame[1] = 0x00;                 /* Zero parameters */
+	
+	irlan_ctrl_data_request(self, skb);
+}
+
+/*
+ * Function irlan_open_data_channel (self)
+ *
+ *    Send an Open Data Command to provider
+ *
+ */
+void irlan_open_data_channel(struct irlan_cb *self) 
+{
+	struct sk_buff *skb;
+	__u8 *frame;
+	
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
+	
+	skb = dev_alloc_skb(64);
+	if (!skb)
+		return;
+
+	skb_reserve(skb, self->client.max_header_size);
+	skb_put(skb, 2);
+	
+	frame = skb->data;
+	
+	/* Build frame */
+ 	frame[0] = CMD_OPEN_DATA_CHANNEL;
+	frame[1] = 0x02; /* Two parameters */
+
+	irlan_insert_string_param(skb, "MEDIA", "802.3");
+	irlan_insert_string_param(skb, "ACCESS_TYPE", "DIRECT");
+	/* irlan_insert_string_param(skb, "MODE", "UNRELIABLE"); */
+
+/* 	self->use_udata = TRUE; */
+
+	irlan_ctrl_data_request(self, skb);
+}
+
+void irlan_close_data_channel(struct irlan_cb *self) 
+{
+	struct sk_buff *skb;
+	__u8 *frame;
+	
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
+
+	/* Check if the TSAP is still there */
+	if (self->client.tsap_ctrl == NULL)
+		return;
+
+	skb = dev_alloc_skb(64);
+	if (!skb)
+		return;
+
+	skb_reserve(skb, self->client.max_header_size);
+	skb_put(skb, 2);
+	
+	frame = skb->data;
+	
+	/* Build frame */
+ 	frame[0] = CMD_CLOSE_DATA_CHAN;
+	frame[1] = 0x01; /* Two parameters */
+
+	irlan_insert_byte_param(skb, "DATA_CHAN", self->dtsap_sel_data);
+
+	irlan_ctrl_data_request(self, skb);
+}
+
+/*
+ * Function irlan_open_unicast_addr (self)
+ *
+ *    Make IrLAN provider accept ethernet frames addressed to the unicast 
+ *    address.
+ *
+ */
+static void irlan_open_unicast_addr(struct irlan_cb *self)
+{
+	struct sk_buff *skb;
+	__u8 *frame;
+	
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);	
+	
+	skb = dev_alloc_skb(128);
+	if (!skb)
+		return;
+
+	/* Reserve space for TTP, LMP, and LAP header */
+	skb_reserve(skb, self->max_header_size);
+	skb_put(skb, 2);
+	
+	frame = skb->data;
+	
+ 	frame[0] = CMD_FILTER_OPERATION;
+	frame[1] = 0x03;                 /* Three parameters */
+ 	irlan_insert_byte_param(skb, "DATA_CHAN" , self->dtsap_sel_data);
+ 	irlan_insert_string_param(skb, "FILTER_TYPE", "DIRECTED");
+ 	irlan_insert_string_param(skb, "FILTER_MODE", "FILTER"); 
+	
+	irlan_ctrl_data_request(self, skb);
+}
+
+/*
+ * Function irlan_set_broadcast_filter (self, status)
+ *
+ *    Make IrLAN provider accept ethernet frames addressed to the broadcast
+ *    address. Be careful with the use of this one, since there may be a lot
+ *    of broadcast traffic out there. We can still function without this
+ *    one but then _we_ have to initiate all communication with other
+ *    hosts, since ARP request for this host will not be answered.
+ */
+void irlan_set_broadcast_filter(struct irlan_cb *self, int status) 
+{
+	struct sk_buff *skb;
+	__u8 *frame;
+	
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
+	
+ 	skb = dev_alloc_skb(128);
+	if (!skb)
+		return;
+
+	/* Reserve space for TTP, LMP, and LAP header */
+	skb_reserve(skb, self->client.max_header_size);
+	skb_put(skb, 2);
+	
+	frame = skb->data;
+	
+ 	frame[0] = CMD_FILTER_OPERATION;
+	frame[1] = 0x03;                 /* Three parameters */
+ 	irlan_insert_byte_param(skb, "DATA_CHAN", self->dtsap_sel_data);
+ 	irlan_insert_string_param(skb, "FILTER_TYPE", "BROADCAST");
+	if (status)
+		irlan_insert_string_param(skb, "FILTER_MODE", "FILTER"); 
+	else
+		irlan_insert_string_param(skb, "FILTER_MODE", "NONE"); 
+
+	irlan_ctrl_data_request(self, skb);
+}
+
+/*
+ * Function irlan_set_multicast_filter (self, status)
+ *
+ *    Make IrLAN provider accept ethernet frames addressed to the multicast
+ *    address. 
+ *
+ */
+void irlan_set_multicast_filter(struct irlan_cb *self, int status) 
+{
+	struct sk_buff *skb;
+	__u8 *frame;
+	
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
+
+ 	skb = dev_alloc_skb(128);
+	if (!skb)
+		return;
+	
+	/* Reserve space for TTP, LMP, and LAP header */
+	skb_reserve(skb, self->client.max_header_size);
+	skb_put(skb, 2);
+	
+	frame = skb->data;
+	
+ 	frame[0] = CMD_FILTER_OPERATION;
+	frame[1] = 0x03;                 /* Three parameters */
+ 	irlan_insert_byte_param(skb, "DATA_CHAN", self->dtsap_sel_data);
+ 	irlan_insert_string_param(skb, "FILTER_TYPE", "MULTICAST");
+	if (status)
+		irlan_insert_string_param(skb, "FILTER_MODE", "ALL"); 
+	else
+		irlan_insert_string_param(skb, "FILTER_MODE", "NONE"); 
+
+	irlan_ctrl_data_request(self, skb);
+}
+
+/*
+ * Function irlan_get_unicast_addr (self)
+ *
+ *    Retrieves the unicast address from the IrLAN provider. This address
+ *    will be inserted into the devices structure, so the ethernet layer
+ *    can construct its packets.
+ *
+ */
+static void irlan_get_unicast_addr(struct irlan_cb *self)
+{
+	struct sk_buff *skb;
+	__u8 *frame;
+		
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
+	
+	skb = dev_alloc_skb(128);
+	if (!skb)
+		return;
+
+	/* Reserve space for TTP, LMP, and LAP header */
+	skb_reserve(skb, self->client.max_header_size);
+	skb_put(skb, 2);
+	
+	frame = skb->data;
+	
+ 	frame[0] = CMD_FILTER_OPERATION;
+	frame[1] = 0x03;                 /* Three parameters */
+ 	irlan_insert_byte_param(skb, "DATA_CHAN", self->dtsap_sel_data);
+ 	irlan_insert_string_param(skb, "FILTER_TYPE", "DIRECTED");
+ 	irlan_insert_string_param(skb, "FILTER_OPERATION", "DYNAMIC"); 
+	
+	irlan_ctrl_data_request(self, skb);
+}
+
+/*
+ * Function irlan_get_media_char (self)
+ *
+ *    
+ *
+ */
+void irlan_get_media_char(struct irlan_cb *self) 
+{
+	struct sk_buff *skb;
+	__u8 *frame;
+	
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
+	
+	skb = dev_alloc_skb(64);
+	if (!skb)
+		return;
+
+	/* Reserve space for TTP, LMP, and LAP header */
+	skb_reserve(skb, self->client.max_header_size);
+	skb_put(skb, 2);
+	
+	frame = skb->data;
+	
+	/* Build frame */
+ 	frame[0] = CMD_GET_MEDIA_CHAR;
+	frame[1] = 0x01; /* One parameter */
+	
+	irlan_insert_string_param(skb, "MEDIA", "802.3");
+	irlan_ctrl_data_request(self, skb);
+}
+
+/*
+ * Function insert_byte_param (skb, param, value)
+ *
+ *    Insert byte parameter into frame
+ *
+ */
+int irlan_insert_byte_param(struct sk_buff *skb, char *param, __u8 value)
+{
+	return __irlan_insert_param(skb, param, IRLAN_BYTE, value, 0, NULL, 0);
+}
+
+int irlan_insert_short_param(struct sk_buff *skb, char *param, __u16 value)
+{
+	return __irlan_insert_param(skb, param, IRLAN_SHORT, 0, value, NULL, 0);
+}
+
+/*
+ * Function insert_string (skb, param, value)
+ *
+ *    Insert string parameter into frame
+ *
+ */
+int irlan_insert_string_param(struct sk_buff *skb, char *param, char *string)
+{
+	int string_len = strlen(string);
+
+	return __irlan_insert_param(skb, param, IRLAN_ARRAY, 0, 0, string, 
+				    string_len);
+}
+
+/*
+ * Function insert_array_param(skb, param, value, len_value)
+ *
+ *    Insert array parameter into frame
+ *
+ */
+int irlan_insert_array_param(struct sk_buff *skb, char *name, __u8 *array,
+			     __u16 array_len)
+{
+	return __irlan_insert_param(skb, name, IRLAN_ARRAY, 0, 0, array, 
+				    array_len);
+}
+
+/*
+ * Function insert_param (skb, param, value, byte)
+ *
+ *    Insert parameter at end of buffer, structure of a parameter is:
+ *
+ *    -----------------------------------------------------------------------
+ *    | Name Length[1] | Param Name[1..255] | Val Length[2] | Value[0..1016]|
+ *    -----------------------------------------------------------------------
+ */
+static int __irlan_insert_param(struct sk_buff *skb, char *param, int type, 
+				__u8 value_byte, __u16 value_short, 
+				__u8 *value_array, __u16 value_len)
+{
+	__u8 *frame;
+	__u8 param_len;
+	__u16 tmp_le; /* Temporary value in little endian format */
+	int n=0;
+	
+	if (skb == NULL) {
+		IRDA_DEBUG(2, "%s(), Got NULL skb\n", __FUNCTION__ );
+		return 0;
+	}	
+
+	param_len = strlen(param);
+	switch (type) {
+	case IRLAN_BYTE:
+		value_len = 1;
+		break;
+	case IRLAN_SHORT:
+		value_len = 2;
+		break;
+	case IRLAN_ARRAY:
+		IRDA_ASSERT(value_array != NULL, return 0;);
+		IRDA_ASSERT(value_len > 0, return 0;);
+		break;
+	default:
+		IRDA_DEBUG(2, "%s(), Unknown parameter type!\n", __FUNCTION__ );
+		return 0;
+		break;
+	}
+	
+	/* Insert at end of sk-buffer */
+	frame = skb->tail;
+
+	/* Make space for data */
+	if (skb_tailroom(skb) < (param_len+value_len+3)) {
+		IRDA_DEBUG(2, "%s(), No more space at end of skb\n", __FUNCTION__ );
+		return 0;
+	}	
+	skb_put(skb, param_len+value_len+3);
+	
+	/* Insert parameter length */
+	frame[n++] = param_len;
+	
+	/* Insert parameter */
+	memcpy(frame+n, param, param_len); n += param_len;
+	
+	/* Insert value length (2 byte little endian format, LSB first) */
+	tmp_le = cpu_to_le16(value_len);
+	memcpy(frame+n, &tmp_le, 2); n += 2; /* To avoid alignment problems */
+
+	/* Insert value */
+	switch (type) {
+	case IRLAN_BYTE:
+		frame[n++] = value_byte;
+		break;
+	case IRLAN_SHORT:
+		tmp_le = cpu_to_le16(value_short);
+		memcpy(frame+n, &tmp_le, 2); n += 2;
+		break;
+	case IRLAN_ARRAY:
+		memcpy(frame+n, value_array, value_len); n+=value_len;
+		break;
+	default:
+		break;
+	}
+	IRDA_ASSERT(n == (param_len+value_len+3), return 0;);
+
+	return param_len+value_len+3;
+}
+
+/*
+ * Function irlan_extract_param (buf, name, value, len)
+ *
+ *    Extracts a single parameter name/value pair from buffer and updates
+ *    the buffer pointer to point to the next name/value pair. 
+ */
+int irlan_extract_param(__u8 *buf, char *name, char *value, __u16 *len)
+{
+	__u8 name_len;
+	__u16 val_len;
+	int n=0;
+	
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+	
+	/* get length of parameter name (1 byte) */
+	name_len = buf[n++];
+	
+	if (name_len > 254) {
+		IRDA_DEBUG(2, "%s(), name_len > 254\n", __FUNCTION__ );
+		return -RSP_INVALID_COMMAND_FORMAT;
+	}
+	
+	/* get parameter name */
+	memcpy(name, buf+n, name_len);
+	name[name_len] = '\0';
+	n+=name_len;
+	
+	/*  
+	 *  Get length of parameter value (2 bytes in little endian 
+	 *  format) 
+	 */
+	memcpy(&val_len, buf+n, 2); /* To avoid alignment problems */
+	le16_to_cpus(&val_len); n+=2;
+	
+	if (val_len > 1016) {
+		IRDA_DEBUG(2, "%s(), parameter length to long\n", __FUNCTION__ );
+		return -RSP_INVALID_COMMAND_FORMAT;
+	}
+	*len = val_len;
+
+	/* get parameter value */
+	memcpy(value, buf+n, val_len);
+	value[val_len] = '\0';
+	n+=val_len;
+	
+	IRDA_DEBUG(4, "Parameter: %s ", name); 
+	IRDA_DEBUG(4, "Value: %s\n", value); 
+
+	return n;
+}
+
+#ifdef CONFIG_PROC_FS
+
+/*
+ * Start of reading /proc entries.
+ * Return entry at pos, 
+ *	or start_token to indicate print header line
+ *	or NULL if end of file
+ */
+static void *irlan_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	int i = 1;
+	struct irlan_cb *self;
+
+	rcu_read_lock();
+	if (*pos == 0)
+		return SEQ_START_TOKEN;
+
+	list_for_each_entry(self, &irlans, dev_list) {
+		if (*pos == i) 
+			return self;
+		++i;
+	}
+	return NULL;
+}
+
+/* Return entry after v, and increment pos */
+static void *irlan_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct list_head *nxt;
+
+	++*pos;
+	if (v == SEQ_START_TOKEN) 
+		nxt = irlans.next;
+	else
+		nxt = ((struct irlan_cb *)v)->dev_list.next;
+
+	return (nxt == &irlans) ? NULL 
+		: list_entry(nxt, struct irlan_cb, dev_list);
+}
+
+/* End of reading /proc file */
+static void irlan_seq_stop(struct seq_file *seq, void *v)
+{
+	rcu_read_unlock();
+}
+
+
+/*
+ * Show one entry in /proc file.
+ */
+static int irlan_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq, "IrLAN instances:\n");
+	else {
+		struct irlan_cb *self = v;
+		
+		IRDA_ASSERT(self != NULL, return -1;);
+		IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;);
+
+		seq_printf(seq,"ifname: %s,\n",
+			       self->dev->name);
+		seq_printf(seq,"client state: %s, ",
+			       irlan_state[ self->client.state]);
+		seq_printf(seq,"provider state: %s,\n",
+			       irlan_state[ self->provider.state]);
+		seq_printf(seq,"saddr: %#08x, ",
+			       self->saddr);
+		seq_printf(seq,"daddr: %#08x\n",
+			       self->daddr);
+		seq_printf(seq,"version: %d.%d,\n",
+			       self->version[1], self->version[0]);
+		seq_printf(seq,"access type: %s\n", 
+			       irlan_access[self->client.access_type]);
+		seq_printf(seq,"media: %s\n", 
+			       irlan_media[self->media]);
+		
+		seq_printf(seq,"local filter:\n");
+		seq_printf(seq,"remote filter: ");
+		irlan_print_filter(seq, self->client.filter_type);
+		seq_printf(seq,"tx busy: %s\n", 
+			       netif_queue_stopped(self->dev) ? "TRUE" : "FALSE");
+			
+		seq_putc(seq,'\n');
+	}
+	return 0;
+}
+
+static struct seq_operations irlan_seq_ops = {
+	.start = irlan_seq_start,
+	.next  = irlan_seq_next,
+	.stop  = irlan_seq_stop,
+	.show  = irlan_seq_show,
+};
+
+static int irlan_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &irlan_seq_ops);
+}
+#endif
+
+MODULE_AUTHOR("Dag Brattli <dagb@cs.uit.no>");
+MODULE_DESCRIPTION("The Linux IrDA LAN protocol"); 
+MODULE_LICENSE("GPL");
+
+module_param(eth, bool, 0);
+MODULE_PARM_DESC(eth, "Name devices ethX (0) or irlanX (1)");
+module_param(access, int, 0);
+MODULE_PARM_DESC(access, "Access type DIRECT=1, PEER=2, HOSTED=3");
+
+module_init(irlan_init);
+module_exit(irlan_cleanup);
+
diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
new file mode 100644
index 00000000000..071cd2cefd8
--- /dev/null
+++ b/net/irda/irlan/irlan_eth.c
@@ -0,0 +1,387 @@
+/*********************************************************************
+ *                
+ * Filename:      irlan_eth.c
+ * Version:       
+ * Description:   
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Thu Oct 15 08:37:58 1998
+ * Modified at:   Tue Mar 21 09:06:41 2000
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * Sources:       skeleton.c by Donald Becker <becker@CESDIS.gsfc.nasa.gov>
+ *                slip.c by Laurence Culhane,   <loz@holmes.demon.co.uk>
+ *                          Fred N. van Kempen, <waltje@uwalt.nl.mugnet.org>
+ * 
+ *     Copyright (c) 1998-2000 Dag Brattli, All Rights Reserved.
+ *      
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ *  
+ *     Neither Dag Brattli nor University of Troms� admit liability nor
+ *     provide warranty for any of this software. This material is 
+ *     provided "AS-IS" and at no charge.
+ *     
+ ********************************************************************/
+
+#include <linux/config.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/if_arp.h>
+#include <linux/module.h>
+#include <net/arp.h>
+
+#include <net/irda/irda.h>
+#include <net/irda/irmod.h>
+#include <net/irda/irlan_common.h>
+#include <net/irda/irlan_client.h>
+#include <net/irda/irlan_event.h>
+#include <net/irda/irlan_eth.h>
+
+static int  irlan_eth_open(struct net_device *dev);
+static int  irlan_eth_close(struct net_device *dev);
+static int  irlan_eth_xmit(struct sk_buff *skb, struct net_device *dev);
+static void irlan_eth_set_multicast_list( struct net_device *dev);
+static struct net_device_stats *irlan_eth_get_stats(struct net_device *dev);
+
+/*
+ * Function irlan_eth_setup (dev)
+ *
+ *    The network device initialization function.
+ *
+ */
+static void irlan_eth_setup(struct net_device *dev)
+{
+	dev->open               = irlan_eth_open;
+	dev->stop               = irlan_eth_close;
+	dev->hard_start_xmit    = irlan_eth_xmit; 
+	dev->get_stats	        = irlan_eth_get_stats;
+	dev->set_multicast_list = irlan_eth_set_multicast_list;
+	dev->destructor		= free_netdev;
+
+	SET_MODULE_OWNER(dev);
+
+	ether_setup(dev);
+	
+	/* 
+	 * Lets do all queueing in IrTTP instead of this device driver.
+	 * Queueing here as well can introduce some strange latency
+	 * problems, which we will avoid by setting the queue size to 0.
+	 */
+	/*
+	 * The bugs in IrTTP and IrLAN that created this latency issue
+	 * have now been fixed, and we can propagate flow control properly
+	 * to the network layer. However, this requires a minimal queue of
+	 * packets for the device.
+	 * Without flow control, the Tx Queue is 14 (ttp) + 0 (dev) = 14
+	 * With flow control, the Tx Queue is 7 (ttp) + 4 (dev) = 11
+	 * See irlan_eth_flow_indication()...
+	 * Note : this number was randomly selected and would need to
+	 * be adjusted.
+	 * Jean II */
+	dev->tx_queue_len = 4;
+}
+
+/*
+ * Function alloc_irlandev
+ *
+ *    Allocate network device and control block
+ *
+ */
+struct net_device *alloc_irlandev(const char *name)
+{
+	return alloc_netdev(sizeof(struct irlan_cb), name,
+			    irlan_eth_setup);
+}
+
+/*
+ * Function irlan_eth_open (dev)
+ *
+ *    Network device has been opened by user
+ *
+ */
+static int irlan_eth_open(struct net_device *dev)
+{
+	struct irlan_cb *self = netdev_priv(dev);
+	
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	/* Ready to play! */
+ 	netif_stop_queue(dev); /* Wait until data link is ready */
+
+	/* We are now open, so time to do some work */
+	self->disconnect_reason = 0;
+	irlan_client_wakeup(self, self->saddr, self->daddr);
+
+	/* Make sure we have a hardware address before we return, 
+	   so DHCP clients gets happy */
+	return wait_event_interruptible(self->open_wait,
+					!self->tsap_data->connected);
+}
+
+/*
+ * Function irlan_eth_close (dev)
+ *
+ *    Stop the ether network device, his function will usually be called by
+ *    ifconfig down. We should now disconnect the link, We start the 
+ *    close timer, so that the instance will be removed if we are unable
+ *    to discover the remote device after the disconnect.
+ */
+static int irlan_eth_close(struct net_device *dev)
+{
+	struct irlan_cb *self = netdev_priv(dev);
+	
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+	
+	/* Stop device */
+	netif_stop_queue(dev);
+	
+	irlan_close_data_channel(self);
+	irlan_close_tsaps(self);
+
+	irlan_do_client_event(self, IRLAN_LMP_DISCONNECT, NULL);
+	irlan_do_provider_event(self, IRLAN_LMP_DISCONNECT, NULL);	
+	
+	/* Remove frames queued on the control channel */
+	skb_queue_purge(&self->client.txq);
+
+	self->client.tx_busy = 0;
+	
+	return 0;
+}
+
+/*
+ * Function irlan_eth_tx (skb)
+ *
+ *    Transmits ethernet frames over IrDA link.
+ *
+ */
+static int irlan_eth_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct irlan_cb *self = netdev_priv(dev);
+	int ret;
+
+	/* skb headroom large enough to contain all IrDA-headers? */
+	if ((skb_headroom(skb) < self->max_header_size) || (skb_shared(skb))) {
+		struct sk_buff *new_skb = 
+			skb_realloc_headroom(skb, self->max_header_size);
+
+		/*  We have to free the original skb anyway */
+		dev_kfree_skb(skb);
+
+		/* Did the realloc succeed? */
+		if (new_skb == NULL)
+			return 0;
+
+		/* Use the new skb instead */
+		skb = new_skb;
+	} 
+
+	dev->trans_start = jiffies;
+
+	/* Now queue the packet in the transport layer */
+	if (self->use_udata)
+		ret = irttp_udata_request(self->tsap_data, skb);
+	else
+		ret = irttp_data_request(self->tsap_data, skb);
+
+	if (ret < 0) {
+		/*   
+		 * IrTTPs tx queue is full, so we just have to
+		 * drop the frame! You might think that we should
+		 * just return -1 and don't deallocate the frame,
+		 * but that is dangerous since it's possible that
+		 * we have replaced the original skb with a new
+		 * one with larger headroom, and that would really
+		 * confuse do_dev_queue_xmit() in dev.c! I have
+		 * tried :-) DB 
+		 */
+		/* irttp_data_request already free the packet */
+		self->stats.tx_dropped++;
+	} else {
+		self->stats.tx_packets++;
+		self->stats.tx_bytes += skb->len; 
+	}
+	
+	return 0;
+}
+
+/*
+ * Function irlan_eth_receive (handle, skb)
+ *
+ *    This function gets the data that is received on the data channel
+ *
+ */
+int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb)
+{
+	struct irlan_cb *self = instance;
+
+	if (skb == NULL) {
+		++self->stats.rx_dropped; 
+		return 0;
+	}
+	if (skb->len < ETH_HLEN) {
+		IRDA_DEBUG(0, "%s() : IrLAN frame too short (%d)\n",
+			   __FUNCTION__, skb->len);
+		++self->stats.rx_dropped; 
+		dev_kfree_skb(skb);
+		return 0;
+	}
+		
+	/* 
+	 * Adopt this frame! Important to set all these fields since they 
+	 * might have been previously set by the low level IrDA network
+	 * device driver 
+	 */
+	skb->dev = self->dev;
+	skb->protocol=eth_type_trans(skb, skb->dev); /* Remove eth header */
+	
+	self->stats.rx_packets++;
+	self->stats.rx_bytes += skb->len; 
+
+	netif_rx(skb);   /* Eat it! */
+	
+	return 0;
+}
+
+/*
+ * Function irlan_eth_flow (status)
+ *
+ *    Do flow control between IP/Ethernet and IrLAN/IrTTP. This is done by 
+ *    controlling the queue stop/start.
+ *
+ * The IrDA link layer has the advantage to have flow control, and
+ * IrTTP now properly handles that. Flow controlling the higher layers
+ * prevent us to drop Tx packets in here (up to 15% for a TCP socket,
+ * more for UDP socket).
+ * Also, this allow us to reduce the overall transmit queue, which means
+ * less latency in case of mixed traffic.
+ * Jean II
+ */
+void irlan_eth_flow_indication(void *instance, void *sap, LOCAL_FLOW flow)
+{
+	struct irlan_cb *self;
+	struct net_device *dev;
+
+	self = (struct irlan_cb *) instance;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
+	
+	dev = self->dev;
+
+	IRDA_ASSERT(dev != NULL, return;);
+	
+	IRDA_DEBUG(0, "%s() : flow %s ; running %d\n", __FUNCTION__,
+		   flow == FLOW_STOP ? "FLOW_STOP" : "FLOW_START",
+		   netif_running(dev));
+
+	switch (flow) {
+	case FLOW_STOP:
+		/* IrTTP is full, stop higher layers */
+		netif_stop_queue(dev);
+		break;
+	case FLOW_START:
+	default:
+		/* Tell upper layers that its time to transmit frames again */
+		/* Schedule network layer */
+		netif_wake_queue(dev);
+		break;
+	}
+}
+
+/*
+ * Function irlan_etc_send_gratuitous_arp (dev)
+ *
+ *    Send gratuitous ARP to announce that we have changed
+ *    hardware address, so that all peers updates their ARP tables
+ */
+void irlan_eth_send_gratuitous_arp(struct net_device *dev)
+{
+	struct in_device *in_dev;
+
+	/* 
+	 * When we get a new MAC address do a gratuitous ARP. This
+	 * is useful if we have changed access points on the same
+	 * subnet.  
+	 */
+#ifdef CONFIG_INET
+	IRDA_DEBUG(4, "IrLAN: Sending gratuitous ARP\n");
+	rcu_read_lock();
+	in_dev = __in_dev_get(dev);
+	if (in_dev == NULL)
+		goto out;
+	if (in_dev->ifa_list)
+		
+	arp_send(ARPOP_REQUEST, ETH_P_ARP, 
+		 in_dev->ifa_list->ifa_address,
+		 dev, 
+		 in_dev->ifa_list->ifa_address,
+		 NULL, dev->dev_addr, NULL);
+out:
+	rcu_read_unlock();
+#endif /* CONFIG_INET */
+}
+
+/*
+ * Function set_multicast_list (dev)
+ *
+ *    Configure the filtering of the device
+ *
+ */
+#define HW_MAX_ADDRS 4 /* Must query to get it! */
+static void irlan_eth_set_multicast_list(struct net_device *dev) 
+{
+ 	struct irlan_cb *self = netdev_priv(dev);
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__ );
+
+	/* Check if data channel has been connected yet */
+	if (self->client.state != IRLAN_DATA) {
+		IRDA_DEBUG(1, "%s(), delaying!\n", __FUNCTION__ );
+		return;
+	}
+
+	if (dev->flags & IFF_PROMISC) {
+		/* Enable promiscuous mode */
+		IRDA_WARNING("Promiscous mode not implemented by IrLAN!\n");
+	} 
+	else if ((dev->flags & IFF_ALLMULTI) || dev->mc_count > HW_MAX_ADDRS) {
+		/* Disable promiscuous mode, use normal mode. */
+		IRDA_DEBUG(4, "%s(), Setting multicast filter\n", __FUNCTION__ );
+		/* hardware_set_filter(NULL); */
+
+		irlan_set_multicast_filter(self, TRUE);
+	}
+	else if (dev->mc_count) {
+		IRDA_DEBUG(4, "%s(), Setting multicast filter\n", __FUNCTION__ );
+		/* Walk the address list, and load the filter */
+		/* hardware_set_filter(dev->mc_list); */
+
+		irlan_set_multicast_filter(self, TRUE);
+	}
+	else {
+		IRDA_DEBUG(4, "%s(), Clearing multicast filter\n", __FUNCTION__ );
+		irlan_set_multicast_filter(self, FALSE);
+	}
+
+	if (dev->flags & IFF_BROADCAST)
+		irlan_set_broadcast_filter(self, TRUE);
+	else
+		irlan_set_broadcast_filter(self, FALSE);
+}
+
+/*
+ * Function irlan_get_stats (dev)
+ *
+ *    Get the current statistics for this device
+ *
+ */
+static struct net_device_stats *irlan_eth_get_stats(struct net_device *dev) 
+{
+	struct irlan_cb *self = netdev_priv(dev);
+
+	return &self->stats;
+}
diff --git a/net/irda/irlan/irlan_event.c b/net/irda/irlan/irlan_event.c
new file mode 100644
index 00000000000..2778d8c6aa3
--- /dev/null
+++ b/net/irda/irlan/irlan_event.c
@@ -0,0 +1,60 @@
+/*********************************************************************
+ *                
+ * Filename:      irlan_event.c
+ * Version:       
+ * Description:   
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Tue Oct 20 09:10:16 1998
+ * Modified at:   Sat Oct 30 12:59:01 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1998-1999 Dag Brattli, All Rights Reserved.
+ *      
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ *  
+ *     Neither Dag Brattli nor University of Troms� admit liability nor
+ *     provide warranty for any of this software. This material is 
+ *     provided "AS-IS" and at no charge.
+ *     
+ ********************************************************************/
+
+#include <net/irda/irlan_event.h>
+
+char *irlan_state[] = {
+	"IRLAN_IDLE",
+	"IRLAN_QUERY",
+	"IRLAN_CONN",
+	"IRLAN_INFO",
+	"IRLAN_MEDIA",
+	"IRLAN_OPEN",
+	"IRLAN_WAIT",
+	"IRLAN_ARB",
+	"IRLAN_DATA",
+	"IRLAN_CLOSE",
+	"IRLAN_SYNC",
+};
+
+void irlan_next_client_state(struct irlan_cb *self, IRLAN_STATE state) 
+{
+	IRDA_DEBUG(2, "%s(), %s\n", __FUNCTION__ , irlan_state[state]);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
+
+	self->client.state = state;
+}
+
+void irlan_next_provider_state(struct irlan_cb *self, IRLAN_STATE state) 
+{
+	IRDA_DEBUG(2, "%s(), %s\n", __FUNCTION__ , irlan_state[state]);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
+
+	self->provider.state = state;
+}
+
diff --git a/net/irda/irlan/irlan_filter.c b/net/irda/irlan/irlan_filter.c
new file mode 100644
index 00000000000..343c5d4a1a1
--- /dev/null
+++ b/net/irda/irlan/irlan_filter.c
@@ -0,0 +1,246 @@
+/*********************************************************************
+ *                
+ * Filename:      irlan_filter.c
+ * Version:       
+ * Description:   
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Fri Jan 29 11:16:38 1999
+ * Modified at:   Sat Oct 30 12:58:45 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1998-1999 Dag Brattli, All Rights Reserved.
+ *      
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ *  
+ *     Neither Dag Brattli nor University of Troms� admit liability nor
+ *     provide warranty for any of this software. This material is 
+ *     provided "AS-IS" and at no charge.
+ *     
+ ********************************************************************/
+
+#include <linux/skbuff.h>
+#include <linux/random.h>
+#include <linux/seq_file.h>
+
+#include <net/irda/irlan_common.h>
+
+/*
+ * Function irlan_filter_request (self, skb)
+ *
+ *    Handle filter request from client peer device
+ *
+ */
+void irlan_filter_request(struct irlan_cb *self, struct sk_buff *skb)
+{
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
+
+	if ((self->provider.filter_type == IRLAN_DIRECTED) && 
+	    (self->provider.filter_operation == DYNAMIC))
+	{
+		IRDA_DEBUG(0, "Giving peer a dynamic Ethernet address\n");
+		self->provider.mac_address[0] = 0x40;
+		self->provider.mac_address[1] = 0x00;
+		self->provider.mac_address[2] = 0x00;
+		self->provider.mac_address[3] = 0x00;
+		
+		/* Use arbitration value to generate MAC address */
+		if (self->provider.access_type == ACCESS_PEER) {
+			self->provider.mac_address[4] = 
+				self->provider.send_arb_val & 0xff;
+			self->provider.mac_address[5] = 
+				(self->provider.send_arb_val >> 8) & 0xff;
+		} else {
+			/* Just generate something for now */
+			get_random_bytes(self->provider.mac_address+4, 1);
+			get_random_bytes(self->provider.mac_address+5, 1);
+		}
+
+		skb->data[0] = 0x00; /* Success */
+		skb->data[1] = 0x03;
+		irlan_insert_string_param(skb, "FILTER_MODE", "NONE");
+		irlan_insert_short_param(skb, "MAX_ENTRY", 0x0001);
+		irlan_insert_array_param(skb, "FILTER_ENTRY", 
+					 self->provider.mac_address, 6);
+		return;
+	}
+	
+	if ((self->provider.filter_type == IRLAN_DIRECTED) && 
+	    (self->provider.filter_mode == FILTER))
+	{
+		IRDA_DEBUG(0, "Directed filter on\n");
+		skb->data[0] = 0x00; /* Success */
+		skb->data[1] = 0x00;
+		return;
+	}
+	if ((self->provider.filter_type == IRLAN_DIRECTED) && 
+	    (self->provider.filter_mode == NONE))
+	{
+		IRDA_DEBUG(0, "Directed filter off\n");
+		skb->data[0] = 0x00; /* Success */
+		skb->data[1] = 0x00;
+		return;
+	}
+
+	if ((self->provider.filter_type == IRLAN_BROADCAST) && 
+	    (self->provider.filter_mode == FILTER))
+	{
+		IRDA_DEBUG(0, "Broadcast filter on\n");
+		skb->data[0] = 0x00; /* Success */
+		skb->data[1] = 0x00;
+		return;
+	}
+	if ((self->provider.filter_type == IRLAN_BROADCAST) && 
+	    (self->provider.filter_mode == NONE))
+	{
+		IRDA_DEBUG(0, "Broadcast filter off\n");
+		skb->data[0] = 0x00; /* Success */
+		skb->data[1] = 0x00;
+		return;
+	}
+	if ((self->provider.filter_type == IRLAN_MULTICAST) && 
+	    (self->provider.filter_mode == FILTER))
+	{
+		IRDA_DEBUG(0, "Multicast filter on\n");
+		skb->data[0] = 0x00; /* Success */
+		skb->data[1] = 0x00;
+		return;
+	}
+	if ((self->provider.filter_type == IRLAN_MULTICAST) && 
+	    (self->provider.filter_mode == NONE))
+	{
+		IRDA_DEBUG(0, "Multicast filter off\n");
+		skb->data[0] = 0x00; /* Success */
+		skb->data[1] = 0x00;
+		return;
+	}
+	if ((self->provider.filter_type == IRLAN_MULTICAST) && 
+	    (self->provider.filter_operation == GET))
+	{
+		IRDA_DEBUG(0, "Multicast filter get\n");
+		skb->data[0] = 0x00; /* Success? */
+		skb->data[1] = 0x02;
+		irlan_insert_string_param(skb, "FILTER_MODE", "NONE");
+		irlan_insert_short_param(skb, "MAX_ENTRY", 16);
+		return;
+	}
+	skb->data[0] = 0x00; /* Command not supported */
+	skb->data[1] = 0x00;
+
+	IRDA_DEBUG(0, "Not implemented!\n");
+}
+
+/*
+ * Function check_request_param (self, param, value)
+ *
+ *    Check parameters in request from peer device
+ *
+ */
+void irlan_check_command_param(struct irlan_cb *self, char *param, char *value)
+{
+	__u8 *bytes;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+
+	bytes = value;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
+
+	IRDA_DEBUG(4, "%s, %s\n", param, value);
+
+	/*
+	 *  This is experimental!! DB.
+	 */
+	 if (strcmp(param, "MODE") == 0) {
+		IRDA_DEBUG(0, "%s()\n", __FUNCTION__ );
+		self->use_udata = TRUE;
+		return;
+	}
+
+	/*
+	 *  FILTER_TYPE
+	 */
+	if (strcmp(param, "FILTER_TYPE") == 0) {
+		if (strcmp(value, "DIRECTED") == 0) {
+			self->provider.filter_type = IRLAN_DIRECTED;
+			return;
+		}
+		if (strcmp(value, "MULTICAST") == 0) {
+			self->provider.filter_type = IRLAN_MULTICAST;
+			return;
+		}
+		if (strcmp(value, "BROADCAST") == 0) {
+			self->provider.filter_type = IRLAN_BROADCAST;
+			return;
+		}
+	}
+	/*
+	 *  FILTER_MODE
+	 */
+	if (strcmp(param, "FILTER_MODE") == 0) {
+		if (strcmp(value, "ALL") == 0) {
+			self->provider.filter_mode = ALL;
+			return;
+		}
+		if (strcmp(value, "FILTER") == 0) {
+			self->provider.filter_mode = FILTER;
+			return;
+		}
+		if (strcmp(value, "NONE") == 0) {
+			self->provider.filter_mode = FILTER;
+			return;
+		}
+	}
+	/*
+	 *  FILTER_OPERATION
+	 */
+	if (strcmp(param, "FILTER_OPERATION") == 0) {
+		if (strcmp(value, "DYNAMIC") == 0) {
+			self->provider.filter_operation = DYNAMIC;
+			return;
+		}
+		if (strcmp(value, "GET") == 0) {
+			self->provider.filter_operation = GET;
+			return;
+		}
+	}
+}
+
+/*
+ * Function irlan_print_filter (filter_type, buf)
+ *
+ *    Print status of filter. Used by /proc file system
+ *
+ */
+#ifdef CONFIG_PROC_FS
+#define MASK2STR(m,s)	{ .mask = m, .str = s }
+
+void irlan_print_filter(struct seq_file *seq, int filter_type)
+{
+	static struct {
+		int mask;
+		const char *str;
+	} filter_mask2str[] = {
+		MASK2STR(IRLAN_DIRECTED,	"DIRECTED"),
+		MASK2STR(IRLAN_FUNCTIONAL,	"FUNCTIONAL"),
+		MASK2STR(IRLAN_GROUP,		"GROUP"),
+		MASK2STR(IRLAN_MAC_FRAME,	"MAC_FRAME"),
+		MASK2STR(IRLAN_MULTICAST,	"MULTICAST"),
+		MASK2STR(IRLAN_BROADCAST,	"BROADCAST"),
+		MASK2STR(IRLAN_IPX_SOCKET,	"IPX_SOCKET"),
+		MASK2STR(0,			NULL)
+	}, *p;
+
+	for (p = filter_mask2str; p->str; p++) {
+		if (filter_type & p->mask)
+			seq_printf(seq, "%s ", p->str);
+	}
+	seq_putc(seq, '\n');
+}
+#undef MASK2STR
+#endif
diff --git a/net/irda/irlan/irlan_provider.c b/net/irda/irlan/irlan_provider.c
new file mode 100644
index 00000000000..39c202d1c37
--- /dev/null
+++ b/net/irda/irlan/irlan_provider.c
@@ -0,0 +1,413 @@
+/*********************************************************************
+ *                
+ * Filename:      irlan_provider.c
+ * Version:       0.9
+ * Description:   IrDA LAN Access Protocol Implementation
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Sun Aug 31 20:14:37 1997
+ * Modified at:   Sat Oct 30 12:52:10 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * Sources:       skeleton.c by Donald Becker <becker@CESDIS.gsfc.nasa.gov>
+ *                slip.c by Laurence Culhane,   <loz@holmes.demon.co.uk>
+ *                          Fred N. van Kempen, <waltje@uwalt.nl.mugnet.org>
+ * 
+ *     Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>, 
+ *     All Rights Reserved.
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ *
+ *     Neither Dag Brattli nor University of Troms� admit liability nor
+ *     provide warranty for any of this software. This material is 
+ *     provided "AS-IS" and at no charge.
+ *
+ ********************************************************************/
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/init.h>
+#include <linux/random.h>
+#include <linux/bitops.h>
+
+#include <asm/system.h>
+#include <asm/byteorder.h>
+
+#include <net/irda/irda.h>
+#include <net/irda/irttp.h>
+#include <net/irda/irlmp.h>
+#include <net/irda/irias_object.h>
+#include <net/irda/iriap.h>
+#include <net/irda/timer.h>
+
+#include <net/irda/irlan_common.h>
+#include <net/irda/irlan_eth.h>
+#include <net/irda/irlan_event.h>
+#include <net/irda/irlan_provider.h>
+#include <net/irda/irlan_filter.h>
+#include <net/irda/irlan_client.h>
+
+static void irlan_provider_connect_indication(void *instance, void *sap, 
+					      struct qos_info *qos, 
+					      __u32 max_sdu_size,
+					      __u8 max_header_size,
+					      struct sk_buff *skb);
+
+/*
+ * Function irlan_provider_control_data_indication (handle, skb)
+ *
+ *    This function gets the data that is received on the control channel
+ *
+ */
+static int irlan_provider_data_indication(void *instance, void *sap, 
+					  struct sk_buff *skb) 
+{
+	struct irlan_cb *self;
+	__u8 code;
+	
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+	
+	self = (struct irlan_cb *) instance;
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;);
+
+	IRDA_ASSERT(skb != NULL, return -1;);
+
+	code = skb->data[0];
+	switch(code) {
+	case CMD_GET_PROVIDER_INFO:
+		IRDA_DEBUG(4, "Got GET_PROVIDER_INFO command!\n");
+		irlan_do_provider_event(self, IRLAN_GET_INFO_CMD, skb); 
+		break;
+
+	case CMD_GET_MEDIA_CHAR:
+		IRDA_DEBUG(4, "Got GET_MEDIA_CHAR command!\n");
+		irlan_do_provider_event(self, IRLAN_GET_MEDIA_CMD, skb); 
+		break;
+	case CMD_OPEN_DATA_CHANNEL:
+		IRDA_DEBUG(4, "Got OPEN_DATA_CHANNEL command!\n");
+		irlan_do_provider_event(self, IRLAN_OPEN_DATA_CMD, skb); 
+		break;
+	case CMD_FILTER_OPERATION:
+		IRDA_DEBUG(4, "Got FILTER_OPERATION command!\n");
+		irlan_do_provider_event(self, IRLAN_FILTER_CONFIG_CMD, skb);
+		break;
+	case CMD_RECONNECT_DATA_CHAN:
+		IRDA_DEBUG(2, "%s(), Got RECONNECT_DATA_CHAN command\n", __FUNCTION__ );
+		IRDA_DEBUG(2, "%s(), NOT IMPLEMENTED\n", __FUNCTION__ );
+		break;
+	case CMD_CLOSE_DATA_CHAN:
+		IRDA_DEBUG(2, "Got CLOSE_DATA_CHAN command!\n");
+		IRDA_DEBUG(2, "%s(), NOT IMPLEMENTED\n", __FUNCTION__ );
+		break;
+	default:
+		IRDA_DEBUG(2, "%s(), Unknown command!\n", __FUNCTION__ );
+		break;
+	}
+	return 0;
+}
+
+/*
+ * Function irlan_provider_connect_indication (handle, skb, priv)
+ *
+ *    Got connection from peer IrLAN client
+ *
+ */
+static void irlan_provider_connect_indication(void *instance, void *sap, 
+					      struct qos_info *qos,
+					      __u32 max_sdu_size, 
+					      __u8 max_header_size,
+					      struct sk_buff *skb)
+{
+	struct irlan_cb *self;
+	struct tsap_cb *tsap;
+	__u32 saddr, daddr;
+
+	IRDA_DEBUG(0, "%s()\n", __FUNCTION__ );
+	
+	self = (struct irlan_cb *) instance;
+	tsap = (struct tsap_cb *) sap;
+	
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
+	
+	IRDA_ASSERT(tsap == self->provider.tsap_ctrl,return;);
+	IRDA_ASSERT(self->provider.state == IRLAN_IDLE, return;);
+
+	daddr = irttp_get_daddr(tsap);
+	saddr = irttp_get_saddr(tsap);
+	self->provider.max_sdu_size = max_sdu_size;
+	self->provider.max_header_size = max_header_size;
+
+	irlan_do_provider_event(self, IRLAN_CONNECT_INDICATION, NULL);
+
+	/*  
+	 * If we are in peer mode, the client may not have got the discovery
+	 * indication it needs to make progress. If the client is still in 
+	 * IDLE state, we must kick it. 
+	 */
+	if ((self->provider.access_type == ACCESS_PEER) && 
+	    (self->client.state == IRLAN_IDLE)) 
+	{
+		irlan_client_wakeup(self, self->saddr, self->daddr);
+	}
+}
+
+/*
+ * Function irlan_provider_connect_response (handle)
+ *
+ *    Accept incoming connection
+ *
+ */
+void irlan_provider_connect_response(struct irlan_cb *self,
+				     struct tsap_cb *tsap)
+{
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
+
+	/* Just accept */
+	irttp_connect_response(tsap, IRLAN_MTU, NULL);
+}
+
+static void irlan_provider_disconnect_indication(void *instance, void *sap, 
+						 LM_REASON reason, 
+						 struct sk_buff *userdata) 
+{
+	struct irlan_cb *self;
+	struct tsap_cb *tsap;
+
+	IRDA_DEBUG(4, "%s(), reason=%d\n", __FUNCTION__ , reason);
+	
+	self = (struct irlan_cb *) instance;
+	tsap = (struct tsap_cb *) sap;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);	
+	IRDA_ASSERT(tsap != NULL, return;);
+	IRDA_ASSERT(tsap->magic == TTP_TSAP_MAGIC, return;);
+	
+	IRDA_ASSERT(tsap == self->provider.tsap_ctrl, return;);
+	
+	irlan_do_provider_event(self, IRLAN_LMP_DISCONNECT, NULL);
+}
+
+/*
+ * Function irlan_parse_open_data_cmd (self, skb)
+ *
+ *    
+ *
+ */
+int irlan_parse_open_data_cmd(struct irlan_cb *self, struct sk_buff *skb)
+{
+	int ret;
+	
+	ret = irlan_provider_parse_command(self, CMD_OPEN_DATA_CHANNEL, skb);
+
+	/* Open data channel */
+	irlan_open_data_tsap(self);
+
+	return ret;
+}
+
+/*
+ * Function parse_command (skb)
+ *
+ *    Extract all parameters from received buffer, then feed them to 
+ *    check_params for parsing
+ *
+ */
+int irlan_provider_parse_command(struct irlan_cb *self, int cmd,
+				 struct sk_buff *skb) 
+{
+	__u8 *frame;
+	__u8 *ptr;
+	int count;
+	__u16 val_len;
+	int i;
+	char *name;
+        char *value;
+	int ret = RSP_SUCCESS;
+	
+	IRDA_ASSERT(skb != NULL, return -RSP_PROTOCOL_ERROR;);
+	
+	IRDA_DEBUG(4, "%s(), skb->len=%d\n", __FUNCTION__ , (int)skb->len);
+
+	IRDA_ASSERT(self != NULL, return -RSP_PROTOCOL_ERROR;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -RSP_PROTOCOL_ERROR;);
+	
+	if (!skb)
+		return -RSP_PROTOCOL_ERROR;
+
+	frame = skb->data;
+
+	name = kmalloc(255, GFP_ATOMIC);
+	if (!name)
+		return -RSP_INSUFFICIENT_RESOURCES;
+	value = kmalloc(1016, GFP_ATOMIC);
+	if (!value) {
+		kfree(name);
+		return -RSP_INSUFFICIENT_RESOURCES;
+	}
+
+	/* How many parameters? */
+	count = frame[1];
+
+	IRDA_DEBUG(4, "Got %d parameters\n", count);
+	
+	ptr = frame+2;
+	
+	/* For all parameters */
+ 	for (i=0; i<count;i++) {
+		ret = irlan_extract_param(ptr, name, value, &val_len);
+		if (ret < 0) {
+			IRDA_DEBUG(2, "%s(), IrLAN, Error!\n", __FUNCTION__ );
+			break;
+		}
+		ptr+=ret;
+		ret = RSP_SUCCESS;
+		irlan_check_command_param(self, name, value);
+	}
+	/* Cleanup */
+	kfree(name);
+	kfree(value);
+
+	return ret;
+}
+
+/*
+ * Function irlan_provider_send_reply (self, info)
+ *
+ *    Send reply to query to peer IrLAN layer
+ *
+ */
+void irlan_provider_send_reply(struct irlan_cb *self, int command, 
+			       int ret_code)
+{
+	struct sk_buff *skb;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
+
+	skb = dev_alloc_skb(128);
+	if (!skb)
+		return;
+
+	/* Reserve space for TTP, LMP, and LAP header */
+	skb_reserve(skb, self->provider.max_header_size);
+	skb_put(skb, 2);
+       
+	switch (command) {
+	case CMD_GET_PROVIDER_INFO:
+		skb->data[0] = 0x00; /* Success */
+		skb->data[1] = 0x02; /* 2 parameters */
+		switch (self->media) {
+		case MEDIA_802_3:
+			irlan_insert_string_param(skb, "MEDIA", "802.3");
+			break;
+		case MEDIA_802_5:
+			irlan_insert_string_param(skb, "MEDIA", "802.5");
+			break;
+		default:
+			IRDA_DEBUG(2, "%s(), unknown media type!\n", __FUNCTION__ );
+			break;
+		}
+		irlan_insert_short_param(skb, "IRLAN_VER", 0x0101);
+		break;
+
+	case CMD_GET_MEDIA_CHAR:
+		skb->data[0] = 0x00; /* Success */
+		skb->data[1] = 0x05; /* 5 parameters */
+		irlan_insert_string_param(skb, "FILTER_TYPE", "DIRECTED");
+		irlan_insert_string_param(skb, "FILTER_TYPE", "BROADCAST");
+		irlan_insert_string_param(skb, "FILTER_TYPE", "MULTICAST");
+
+		switch (self->provider.access_type) {
+		case ACCESS_DIRECT:
+			irlan_insert_string_param(skb, "ACCESS_TYPE", "DIRECT");
+			break;
+		case ACCESS_PEER:
+			irlan_insert_string_param(skb, "ACCESS_TYPE", "PEER");
+			break;
+		case ACCESS_HOSTED:
+			irlan_insert_string_param(skb, "ACCESS_TYPE", "HOSTED");
+			break;
+		default:
+			IRDA_DEBUG(2, "%s(), Unknown access type\n", __FUNCTION__ );
+			break;
+		}
+		irlan_insert_short_param(skb, "MAX_FRAME", 0x05ee);
+		break;
+	case CMD_OPEN_DATA_CHANNEL:
+		skb->data[0] = 0x00; /* Success */
+		if (self->provider.send_arb_val) {
+			skb->data[1] = 0x03; /* 3 parameters */
+			irlan_insert_short_param(skb, "CON_ARB", 
+						 self->provider.send_arb_val);
+		} else
+			skb->data[1] = 0x02; /* 2 parameters */
+		irlan_insert_byte_param(skb, "DATA_CHAN", self->stsap_sel_data);
+		irlan_insert_array_param(skb, "RECONNECT_KEY", "LINUX RULES!",
+					 12);
+		break;
+	case CMD_FILTER_OPERATION:
+		irlan_filter_request(self, skb);
+		break;
+	default:
+		IRDA_DEBUG(2, "%s(), Unknown command!\n", __FUNCTION__ );
+		break;
+	}
+
+	irttp_data_request(self->provider.tsap_ctrl, skb);
+}
+
+/*
+ * Function irlan_provider_register(void)
+ *
+ *    Register provider support so we can accept incoming connections.
+ * 
+ */
+int irlan_provider_open_ctrl_tsap(struct irlan_cb *self)
+{
+	struct tsap_cb *tsap;
+	notify_t notify;
+	
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;);
+
+	/* Check if already open */
+	if (self->provider.tsap_ctrl)
+		return -1;
+	
+	/*
+	 *  First register well known control TSAP
+	 */
+	irda_notify_init(&notify);
+	notify.data_indication       = irlan_provider_data_indication;
+	notify.connect_indication    = irlan_provider_connect_indication;
+	notify.disconnect_indication = irlan_provider_disconnect_indication;
+	notify.instance = self;
+	strlcpy(notify.name, "IrLAN ctrl (p)", sizeof(notify.name));
+
+	tsap = irttp_open_tsap(LSAP_ANY, 1, &notify);
+	if (!tsap) {
+		IRDA_DEBUG(2, "%s(), Got no tsap!\n", __FUNCTION__ );
+		return -1;
+	}
+	self->provider.tsap_ctrl = tsap;
+
+	/* Register with LM-IAS */
+	irlan_ias_register(self, tsap->stsap_sel);
+
+	return 0;
+}
+
diff --git a/net/irda/irlan/irlan_provider_event.c b/net/irda/irlan/irlan_provider_event.c
new file mode 100644
index 00000000000..5a086f9827e
--- /dev/null
+++ b/net/irda/irlan/irlan_provider_event.c
@@ -0,0 +1,241 @@
+/*********************************************************************
+ *                
+ * Filename:      irlan_provider_event.c
+ * Version:       0.9
+ * Description:   IrLAN provider state machine)
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Sun Aug 31 20:14:37 1997
+ * Modified at:   Sat Oct 30 12:52:41 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>, All Rights Reserved.
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ *
+ *     Neither Dag Brattli nor University of Troms� admit liability nor
+ *     provide warranty for any of this software. This material is 
+ *     provided "AS-IS" and at no charge.
+ *
+ ********************************************************************/
+
+#include <net/irda/irda.h>
+#include <net/irda/iriap.h>
+#include <net/irda/irlmp.h>
+#include <net/irda/irttp.h>
+
+#include <net/irda/irlan_provider.h>
+#include <net/irda/irlan_event.h>
+
+static int irlan_provider_state_idle(struct irlan_cb *self, IRLAN_EVENT event, 
+				     struct sk_buff *skb);
+static int irlan_provider_state_info(struct irlan_cb *self, IRLAN_EVENT event, 
+				     struct sk_buff *skb);
+static int irlan_provider_state_open(struct irlan_cb *self, IRLAN_EVENT event, 
+				     struct sk_buff *skb);
+static int irlan_provider_state_data(struct irlan_cb *self, IRLAN_EVENT event, 
+				     struct sk_buff *skb);
+
+static int (*state[])(struct irlan_cb *self, IRLAN_EVENT event, 
+		      struct sk_buff *skb) = 
+{ 
+	irlan_provider_state_idle,
+	NULL, /* Query */
+	NULL, /* Info */
+	irlan_provider_state_info,
+	NULL, /* Media */
+	irlan_provider_state_open,
+	NULL, /* Wait */
+	NULL, /* Arb */
+	irlan_provider_state_data,
+	NULL, /* Close */
+	NULL, /* Sync */
+};
+
+void irlan_do_provider_event(struct irlan_cb *self, IRLAN_EVENT event, 
+			     struct sk_buff *skb) 
+{
+	IRDA_ASSERT(*state[ self->provider.state] != NULL, return;);
+
+	(*state[self->provider.state]) (self, event, skb);
+}
+
+/*
+ * Function irlan_provider_state_idle (event, skb, info)
+ *
+ *    IDLE, We are waiting for an indication that there is a provider
+ *    available.
+ */
+static int irlan_provider_state_idle(struct irlan_cb *self, IRLAN_EVENT event,
+				     struct sk_buff *skb)
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+	
+	IRDA_ASSERT(self != NULL, return -1;);
+	
+	switch(event) {
+	case IRLAN_CONNECT_INDICATION:
+	     irlan_provider_connect_response( self, self->provider.tsap_ctrl);
+	     irlan_next_provider_state( self, IRLAN_INFO);
+	     break;
+	default:
+		IRDA_DEBUG(4, "%s(), Unknown event %d\n", __FUNCTION__ , event);
+		break;
+	}
+	if (skb)
+		dev_kfree_skb(skb);
+
+	return 0;
+}
+
+/*
+ * Function irlan_provider_state_info (self, event, skb, info)
+ *
+ *    INFO, We have issued a GetInfo command and is awaiting a reply.
+ */
+static int irlan_provider_state_info(struct irlan_cb *self, IRLAN_EVENT event, 
+				     struct sk_buff *skb) 
+{
+	int ret;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+	
+	IRDA_ASSERT(self != NULL, return -1;);
+
+	switch(event) {
+	case IRLAN_GET_INFO_CMD:
+		/* Be sure to use 802.3 in case of peer mode */
+		if (self->provider.access_type == ACCESS_PEER) {
+			self->media = MEDIA_802_3;
+			
+			/* Check if client has started yet */
+			if (self->client.state == IRLAN_IDLE) {
+				/* This should get the client going */
+				irlmp_discovery_request(8);
+			}
+		}
+
+		irlan_provider_send_reply(self, CMD_GET_PROVIDER_INFO, 
+					  RSP_SUCCESS);
+		/* Keep state */
+		break;
+	case IRLAN_GET_MEDIA_CMD: 
+		irlan_provider_send_reply(self, CMD_GET_MEDIA_CHAR, 
+					  RSP_SUCCESS);
+		/* Keep state */
+		break;		
+	case IRLAN_OPEN_DATA_CMD:
+		ret = irlan_parse_open_data_cmd(self, skb);
+		if (self->provider.access_type == ACCESS_PEER) {
+			/* FIXME: make use of random functions! */
+			self->provider.send_arb_val = (jiffies & 0xffff);
+		}
+		irlan_provider_send_reply(self, CMD_OPEN_DATA_CHANNEL, ret);
+
+		if (ret == RSP_SUCCESS) {
+			irlan_next_provider_state(self, IRLAN_OPEN);
+
+			/* Signal client that we are now open */
+			irlan_do_client_event(self, IRLAN_PROVIDER_SIGNAL, NULL);
+		}
+		break;
+	case IRLAN_LMP_DISCONNECT:  /* FALLTHROUGH */
+	case IRLAN_LAP_DISCONNECT:
+		irlan_next_provider_state(self, IRLAN_IDLE);
+		break;
+	default:
+		IRDA_DEBUG( 0, "%s(), Unknown event %d\n", __FUNCTION__ , event);
+		break;
+	}
+	if (skb)
+		dev_kfree_skb(skb);
+	
+	return 0;
+}
+
+/*
+ * Function irlan_provider_state_open (self, event, skb, info)
+ *
+ *    OPEN, The client has issued a OpenData command and is awaiting a
+ *    reply
+ *
+ */
+static int irlan_provider_state_open(struct irlan_cb *self, IRLAN_EVENT event, 
+				     struct sk_buff *skb)
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return -1;);
+
+	switch(event) {
+	case IRLAN_FILTER_CONFIG_CMD:
+		irlan_provider_parse_command(self, CMD_FILTER_OPERATION, skb);
+		irlan_provider_send_reply(self, CMD_FILTER_OPERATION, 
+					  RSP_SUCCESS);
+		/* Keep state */
+		break;
+	case IRLAN_DATA_CONNECT_INDICATION: 
+		irlan_next_provider_state(self, IRLAN_DATA);
+		irlan_provider_connect_response(self, self->tsap_data);
+		break;
+	case IRLAN_LMP_DISCONNECT:  /* FALLTHROUGH */
+	case IRLAN_LAP_DISCONNECT:
+		irlan_next_provider_state(self, IRLAN_IDLE);
+		break;
+	default:
+		IRDA_DEBUG(2, "%s(), Unknown event %d\n", __FUNCTION__ , event);
+		break;
+	}
+	if (skb)
+		dev_kfree_skb(skb);
+
+	return 0;
+}
+
+/*
+ * Function irlan_provider_state_data (self, event, skb, info)
+ *
+ *    DATA, The data channel is connected, allowing data transfers between
+ *    the local and remote machines.
+ *
+ */
+static int irlan_provider_state_data(struct irlan_cb *self, IRLAN_EVENT event, 
+				     struct sk_buff *skb) 
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;);
+
+	switch(event) {
+	case IRLAN_FILTER_CONFIG_CMD:
+		irlan_provider_parse_command(self, CMD_FILTER_OPERATION, skb);
+		irlan_provider_send_reply(self, CMD_FILTER_OPERATION, 
+					  RSP_SUCCESS);
+		break;
+	case IRLAN_LMP_DISCONNECT: /* FALLTHROUGH */
+	case IRLAN_LAP_DISCONNECT:
+		irlan_next_provider_state(self, IRLAN_IDLE);
+		break;
+	default:
+		IRDA_DEBUG( 0, "%s(), Unknown event %d\n", __FUNCTION__ , event);
+		break;
+	}
+	if (skb)
+		dev_kfree_skb(skb);
+	
+	return 0;
+}
+
+
+
+
+
+
+
+
+
+
diff --git a/net/irda/irlap.c b/net/irda/irlap.c
new file mode 100644
index 00000000000..046ad0750e4
--- /dev/null
+++ b/net/irda/irlap.c
@@ -0,0 +1,1258 @@
+/*********************************************************************
+ *
+ * Filename:      irlap.c
+ * Version:       1.0
+ * Description:   IrLAP implementation for Linux
+ * Status:        Stable
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Mon Aug  4 20:40:53 1997
+ * Modified at:   Tue Dec 14 09:26:44 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ *
+ *     Copyright (c) 1998-1999 Dag Brattli, All Rights Reserved.
+ *     Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of
+ *     the License, or (at your option) any later version.
+ *
+ *     This program is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *     GNU General Public License for more details.
+ *
+ *     You should have received a copy of the GNU General Public License
+ *     along with this program; if not, write to the Free Software
+ *     Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ *     MA 02111-1307 USA
+ *
+ ********************************************************************/
+
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/delay.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/random.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+
+#include <net/irda/irda.h>
+#include <net/irda/irda_device.h>
+#include <net/irda/irqueue.h>
+#include <net/irda/irlmp.h>
+#include <net/irda/irlmp_frame.h>
+#include <net/irda/irlap_frame.h>
+#include <net/irda/irlap.h>
+#include <net/irda/timer.h>
+#include <net/irda/qos.h>
+
+static hashbin_t *irlap = NULL;
+int sysctl_slot_timeout = SLOT_TIMEOUT * 1000 / HZ;
+
+/* This is the delay of missed pf period before generating an event
+ * to the application. The spec mandate 3 seconds, but in some cases
+ * it's way too long. - Jean II */
+int sysctl_warn_noreply_time = 3;
+
+extern void irlap_queue_xmit(struct irlap_cb *self, struct sk_buff *skb);
+static void __irlap_close(struct irlap_cb *self);
+static void irlap_init_qos_capabilities(struct irlap_cb *self,
+					struct qos_info *qos_user);
+
+#ifdef CONFIG_IRDA_DEBUG
+static char *lap_reasons[] = {
+	"ERROR, NOT USED",
+	"LAP_DISC_INDICATION",
+	"LAP_NO_RESPONSE",
+	"LAP_RESET_INDICATION",
+	"LAP_FOUND_NONE",
+	"LAP_MEDIA_BUSY",
+	"LAP_PRIMARY_CONFLICT",
+	"ERROR, NOT USED",
+};
+#endif	/* CONFIG_IRDA_DEBUG */
+
+int __init irlap_init(void)
+{
+	/* Check if the compiler did its job properly.
+	 * May happen on some ARM configuration, check with Russell King. */
+	IRDA_ASSERT(sizeof(struct xid_frame) == 14, ;);
+	IRDA_ASSERT(sizeof(struct test_frame) == 10, ;);
+	IRDA_ASSERT(sizeof(struct ua_frame) == 10, ;);
+	IRDA_ASSERT(sizeof(struct snrm_frame) == 11, ;);
+
+	/* Allocate master array */
+	irlap = hashbin_new(HB_LOCK);
+	if (irlap == NULL) {
+	        IRDA_ERROR("%s: can't allocate irlap hashbin!\n",
+			   __FUNCTION__);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+void __exit irlap_cleanup(void)
+{
+	IRDA_ASSERT(irlap != NULL, return;);
+
+	hashbin_delete(irlap, (FREE_FUNC) __irlap_close);
+}
+
+/*
+ * Function irlap_open (driver)
+ *
+ *    Initialize IrLAP layer
+ *
+ */
+struct irlap_cb *irlap_open(struct net_device *dev, struct qos_info *qos,
+			    const char *hw_name)
+{
+	struct irlap_cb *self;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	/* Initialize the irlap structure. */
+	self = kmalloc(sizeof(struct irlap_cb), GFP_KERNEL);
+	if (self == NULL)
+		return NULL;
+
+	memset(self, 0, sizeof(struct irlap_cb));
+	self->magic = LAP_MAGIC;
+
+	/* Make a binding between the layers */
+	self->netdev = dev;
+	self->qos_dev = qos;
+	/* Copy hardware name */
+	if(hw_name != NULL) {
+		strlcpy(self->hw_name, hw_name, sizeof(self->hw_name));
+	} else {
+		self->hw_name[0] = '\0';
+	}
+
+	/* FIXME: should we get our own field? */
+	dev->atalk_ptr = self;
+
+	self->state = LAP_OFFLINE;
+
+	/* Initialize transmit queue */
+	skb_queue_head_init(&self->txq);
+	skb_queue_head_init(&self->txq_ultra);
+	skb_queue_head_init(&self->wx_list);
+
+	/* My unique IrLAP device address! */
+	/* We don't want the broadcast address, neither the NULL address
+	 * (most often used to signify "invalid"), and we don't want an
+	 * address already in use (otherwise connect won't be able
+	 * to select the proper link). - Jean II */
+	do {
+		get_random_bytes(&self->saddr, sizeof(self->saddr));
+	} while ((self->saddr == 0x0) || (self->saddr == BROADCAST) ||
+		 (hashbin_lock_find(irlap, self->saddr, NULL)) );
+	/* Copy to the driver */
+	memcpy(dev->dev_addr, &self->saddr, 4);
+
+	init_timer(&self->slot_timer);
+	init_timer(&self->query_timer);
+	init_timer(&self->discovery_timer);
+	init_timer(&self->final_timer);
+	init_timer(&self->poll_timer);
+	init_timer(&self->wd_timer);
+	init_timer(&self->backoff_timer);
+	init_timer(&self->media_busy_timer);
+
+	irlap_apply_default_connection_parameters(self);
+
+	self->N3 = 3; /* # connections attemts to try before giving up */
+
+	self->state = LAP_NDM;
+
+	hashbin_insert(irlap, (irda_queue_t *) self, self->saddr, NULL);
+
+	irlmp_register_link(self, self->saddr, &self->notify);
+
+	return self;
+}
+EXPORT_SYMBOL(irlap_open);
+
+/*
+ * Function __irlap_close (self)
+ *
+ *    Remove IrLAP and all allocated memory. Stop any pending timers.
+ *
+ */
+static void __irlap_close(struct irlap_cb *self)
+{
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+
+	/* Stop timers */
+	del_timer(&self->slot_timer);
+	del_timer(&self->query_timer);
+	del_timer(&self->discovery_timer);
+	del_timer(&self->final_timer);
+	del_timer(&self->poll_timer);
+	del_timer(&self->wd_timer);
+	del_timer(&self->backoff_timer);
+	del_timer(&self->media_busy_timer);
+
+	irlap_flush_all_queues(self);
+
+	self->magic = 0;
+
+	kfree(self);
+}
+
+/*
+ * Function irlap_close (self)
+ *
+ *    Remove IrLAP instance
+ *
+ */
+void irlap_close(struct irlap_cb *self)
+{
+	struct irlap_cb *lap;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+
+	/* We used to send a LAP_DISC_INDICATION here, but this was
+	 * racy. This has been move within irlmp_unregister_link()
+	 * itself. Jean II */
+
+	/* Kill the LAP and all LSAPs on top of it */
+	irlmp_unregister_link(self->saddr);
+	self->notify.instance = NULL;
+
+	/* Be sure that we manage to remove ourself from the hash */
+	lap = hashbin_remove(irlap, self->saddr, NULL);
+	if (!lap) {
+		IRDA_DEBUG(1, "%s(), Didn't find myself!\n", __FUNCTION__);
+		return;
+	}
+	__irlap_close(lap);
+}
+EXPORT_SYMBOL(irlap_close);
+
+/*
+ * Function irlap_connect_indication (self, skb)
+ *
+ *    Another device is attempting to make a connection
+ *
+ */
+void irlap_connect_indication(struct irlap_cb *self, struct sk_buff *skb)
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+
+	irlap_init_qos_capabilities(self, NULL); /* No user QoS! */
+
+	irlmp_link_connect_indication(self->notify.instance, self->saddr,
+				      self->daddr, &self->qos_tx, skb);
+}
+
+/*
+ * Function irlap_connect_response (self, skb)
+ *
+ *    Service user has accepted incoming connection
+ *
+ */
+void irlap_connect_response(struct irlap_cb *self, struct sk_buff *userdata)
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	irlap_do_event(self, CONNECT_RESPONSE, userdata, NULL);
+}
+
+/*
+ * Function irlap_connect_request (self, daddr, qos_user, sniff)
+ *
+ *    Request connection with another device, sniffing is not implemented
+ *    yet.
+ *
+ */
+void irlap_connect_request(struct irlap_cb *self, __u32 daddr,
+			   struct qos_info *qos_user, int sniff)
+{
+	IRDA_DEBUG(3, "%s(), daddr=0x%08x\n", __FUNCTION__, daddr);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+
+	self->daddr = daddr;
+
+	/*
+	 *  If the service user specifies QoS values for this connection,
+	 *  then use them
+	 */
+	irlap_init_qos_capabilities(self, qos_user);
+
+	if ((self->state == LAP_NDM) && !self->media_busy)
+		irlap_do_event(self, CONNECT_REQUEST, NULL, NULL);
+	else
+		self->connect_pending = TRUE;
+}
+
+/*
+ * Function irlap_connect_confirm (self, skb)
+ *
+ *    Connection request has been accepted
+ *
+ */
+void irlap_connect_confirm(struct irlap_cb *self, struct sk_buff *skb)
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+
+	irlmp_link_connect_confirm(self->notify.instance, &self->qos_tx, skb);
+}
+
+/*
+ * Function irlap_data_indication (self, skb)
+ *
+ *    Received data frames from IR-port, so we just pass them up to
+ *    IrLMP for further processing
+ *
+ */
+void irlap_data_indication(struct irlap_cb *self, struct sk_buff *skb,
+			   int unreliable)
+{
+	/* Hide LAP header from IrLMP layer */
+	skb_pull(skb, LAP_ADDR_HEADER+LAP_CTRL_HEADER);
+
+	irlmp_link_data_indication(self->notify.instance, skb, unreliable);
+}
+
+
+/*
+ * Function irlap_data_request (self, skb)
+ *
+ *    Queue data for transmission, must wait until XMIT state
+ *
+ */
+void irlap_data_request(struct irlap_cb *self, struct sk_buff *skb,
+			int unreliable)
+{
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+
+	IRDA_DEBUG(3, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(skb_headroom(skb) >= (LAP_ADDR_HEADER+LAP_CTRL_HEADER),
+		    return;);
+	skb_push(skb, LAP_ADDR_HEADER+LAP_CTRL_HEADER);
+
+	/*
+	 *  Must set frame format now so that the rest of the code knows
+	 *  if its dealing with an I or an UI frame
+	 */
+	if (unreliable)
+		skb->data[1] = UI_FRAME;
+	else
+		skb->data[1] = I_FRAME;
+
+	/* Don't forget to refcount it - see irlmp_connect_request(). */
+	skb_get(skb);
+
+	/* Add at the end of the queue (keep ordering) - Jean II */
+	skb_queue_tail(&self->txq, skb);
+
+	/*
+	 *  Send event if this frame only if we are in the right state
+	 *  FIXME: udata should be sent first! (skb_queue_head?)
+	 */
+	if ((self->state == LAP_XMIT_P) || (self->state == LAP_XMIT_S)) {
+		/* If we are not already processing the Tx queue, trigger
+		 * transmission immediately - Jean II */
+		if((skb_queue_len(&self->txq) <= 1) && (!self->local_busy))
+			irlap_do_event(self, DATA_REQUEST, skb, NULL);
+		/* Otherwise, the packets will be sent normally at the
+		 * next pf-poll - Jean II */
+	}
+}
+
+/*
+ * Function irlap_unitdata_request (self, skb)
+ *
+ *    Send Ultra data. This is data that must be sent outside any connection
+ *
+ */
+#ifdef CONFIG_IRDA_ULTRA
+void irlap_unitdata_request(struct irlap_cb *self, struct sk_buff *skb)
+{
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+
+	IRDA_DEBUG(3, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(skb_headroom(skb) >= (LAP_ADDR_HEADER+LAP_CTRL_HEADER),
+	       return;);
+	skb_push(skb, LAP_ADDR_HEADER+LAP_CTRL_HEADER);
+
+	skb->data[0] = CBROADCAST;
+	skb->data[1] = UI_FRAME;
+
+	/* Don't need to refcount, see irlmp_connless_data_request() */
+
+	skb_queue_tail(&self->txq_ultra, skb);
+
+	irlap_do_event(self, SEND_UI_FRAME, NULL, NULL);
+}
+#endif /*CONFIG_IRDA_ULTRA */
+
+/*
+ * Function irlap_udata_indication (self, skb)
+ *
+ *    Receive Ultra data. This is data that is received outside any connection
+ *
+ */
+#ifdef CONFIG_IRDA_ULTRA
+void irlap_unitdata_indication(struct irlap_cb *self, struct sk_buff *skb)
+{
+	IRDA_DEBUG(1, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+	IRDA_ASSERT(skb != NULL, return;);
+
+	/* Hide LAP header from IrLMP layer */
+	skb_pull(skb, LAP_ADDR_HEADER+LAP_CTRL_HEADER);
+
+	irlmp_link_unitdata_indication(self->notify.instance, skb);
+}
+#endif /* CONFIG_IRDA_ULTRA */
+
+/*
+ * Function irlap_disconnect_request (void)
+ *
+ *    Request to disconnect connection by service user
+ */
+void irlap_disconnect_request(struct irlap_cb *self)
+{
+	IRDA_DEBUG(3, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+
+	/* Don't disconnect until all data frames are successfully sent */
+	if (skb_queue_len(&self->txq) > 0) {
+		self->disconnect_pending = TRUE;
+
+		return;
+	}
+
+	/* Check if we are in the right state for disconnecting */
+	switch (self->state) {
+	case LAP_XMIT_P:        /* FALLTROUGH */
+	case LAP_XMIT_S:        /* FALLTROUGH */
+	case LAP_CONN:          /* FALLTROUGH */
+	case LAP_RESET_WAIT:    /* FALLTROUGH */
+	case LAP_RESET_CHECK:
+		irlap_do_event(self, DISCONNECT_REQUEST, NULL, NULL);
+		break;
+	default:
+		IRDA_DEBUG(2, "%s(), disconnect pending!\n", __FUNCTION__);
+		self->disconnect_pending = TRUE;
+		break;
+	}
+}
+
+/*
+ * Function irlap_disconnect_indication (void)
+ *
+ *    Disconnect request from other device
+ *
+ */
+void irlap_disconnect_indication(struct irlap_cb *self, LAP_REASON reason)
+{
+	IRDA_DEBUG(1, "%s(), reason=%s\n", __FUNCTION__, lap_reasons[reason]);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+
+	/* Flush queues */
+	irlap_flush_all_queues(self);
+
+	switch (reason) {
+	case LAP_RESET_INDICATION:
+		IRDA_DEBUG(1, "%s(), Sending reset request!\n", __FUNCTION__);
+		irlap_do_event(self, RESET_REQUEST, NULL, NULL);
+		break;
+	case LAP_NO_RESPONSE:	   /* FALLTROUGH */
+	case LAP_DISC_INDICATION:  /* FALLTROUGH */
+	case LAP_FOUND_NONE:       /* FALLTROUGH */
+	case LAP_MEDIA_BUSY:
+		irlmp_link_disconnect_indication(self->notify.instance, self,
+						 reason, NULL);
+		break;
+	default:
+		IRDA_ERROR("%s: Unknown reason %d\n", __FUNCTION__, reason);
+	}
+}
+
+/*
+ * Function irlap_discovery_request (gen_addr_bit)
+ *
+ *    Start one single discovery operation.
+ *
+ */
+void irlap_discovery_request(struct irlap_cb *self, discovery_t *discovery)
+{
+	struct irlap_info info;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+	IRDA_ASSERT(discovery != NULL, return;);
+
+	IRDA_DEBUG(4, "%s(), nslots = %d\n", __FUNCTION__, discovery->nslots);
+
+	IRDA_ASSERT((discovery->nslots == 1) || (discovery->nslots == 6) ||
+		    (discovery->nslots == 8) || (discovery->nslots == 16),
+		    return;);
+
+	/* Discovery is only possible in NDM mode */
+	if (self->state != LAP_NDM) {
+		IRDA_DEBUG(4, "%s(), discovery only possible in NDM mode\n",
+			   __FUNCTION__);
+		irlap_discovery_confirm(self, NULL);
+		/* Note : in theory, if we are not in NDM, we could postpone
+		 * the discovery like we do for connection request.
+		 * In practice, it's not worth it. If the media was busy,
+		 * it's likely next time around it won't be busy. If we are
+		 * in REPLY state, we will get passive discovery info & event.
+		 * Jean II */
+		return;
+	}
+
+	/* Check if last discovery request finished in time, or if
+	 * it was aborted due to the media busy flag. */
+	if (self->discovery_log != NULL) {
+		hashbin_delete(self->discovery_log, (FREE_FUNC) kfree);
+		self->discovery_log = NULL;
+	}
+
+	/* All operations will occur at predictable time, no need to lock */
+	self->discovery_log = hashbin_new(HB_NOLOCK);
+
+	if (self->discovery_log == NULL) {
+		IRDA_WARNING("%s(), Unable to allocate discovery log!\n",
+			     __FUNCTION__);
+		return;
+	}
+
+	info.S = discovery->nslots; /* Number of slots */
+	info.s = 0; /* Current slot */
+
+	self->discovery_cmd = discovery;
+	info.discovery = discovery;
+
+	/* sysctl_slot_timeout bounds are checked in irsysctl.c - Jean II */
+	self->slot_timeout = sysctl_slot_timeout * HZ / 1000;
+
+	irlap_do_event(self, DISCOVERY_REQUEST, NULL, &info);
+}
+
+/*
+ * Function irlap_discovery_confirm (log)
+ *
+ *    A device has been discovered in front of this station, we
+ *    report directly to LMP.
+ */
+void irlap_discovery_confirm(struct irlap_cb *self, hashbin_t *discovery_log)
+{
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+
+	IRDA_ASSERT(self->notify.instance != NULL, return;);
+
+	/*
+	 * Check for successful discovery, since we are then allowed to clear
+	 * the media busy condition (IrLAP 6.13.4 - p.94). This should allow
+	 * us to make connection attempts much faster and easier (i.e. no
+	 * collisions).
+	 * Setting media busy to false will also generate an event allowing
+	 * to process pending events in NDM state machine.
+	 * Note : the spec doesn't define what's a successful discovery is.
+	 * If we want Ultra to work, it's successful even if there is
+	 * nobody discovered - Jean II
+	 */
+	if (discovery_log)
+		irda_device_set_media_busy(self->netdev, FALSE);
+
+	/* Inform IrLMP */
+	irlmp_link_discovery_confirm(self->notify.instance, discovery_log);
+}
+
+/*
+ * Function irlap_discovery_indication (log)
+ *
+ *    Somebody is trying to discover us!
+ *
+ */
+void irlap_discovery_indication(struct irlap_cb *self, discovery_t *discovery)
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+	IRDA_ASSERT(discovery != NULL, return;);
+
+	IRDA_ASSERT(self->notify.instance != NULL, return;);
+
+	/* A device is very likely to connect immediately after it performs
+	 * a successful discovery. This means that in our case, we are much
+	 * more likely to receive a connection request over the medium.
+	 * So, we backoff to avoid collisions.
+	 * IrLAP spec 6.13.4 suggest 100ms...
+	 * Note : this little trick actually make a *BIG* difference. If I set
+	 * my Linux box with discovery enabled and one Ultra frame sent every
+	 * second, my Palm has no trouble connecting to it every time !
+	 * Jean II */
+	irda_device_set_media_busy(self->netdev, SMALL);
+
+	irlmp_link_discovery_indication(self->notify.instance, discovery);
+}
+
+/*
+ * Function irlap_status_indication (quality_of_link)
+ */
+void irlap_status_indication(struct irlap_cb *self, int quality_of_link)
+{
+	switch (quality_of_link) {
+	case STATUS_NO_ACTIVITY:
+		IRDA_MESSAGE("IrLAP, no activity on link!\n");
+		break;
+	case STATUS_NOISY:
+		IRDA_MESSAGE("IrLAP, noisy link!\n");
+		break;
+	default:
+		break;
+	}
+	irlmp_status_indication(self->notify.instance,
+				quality_of_link, LOCK_NO_CHANGE);
+}
+
+/*
+ * Function irlap_reset_indication (void)
+ */
+void irlap_reset_indication(struct irlap_cb *self)
+{
+	IRDA_DEBUG(1, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+
+	if (self->state == LAP_RESET_WAIT)
+		irlap_do_event(self, RESET_REQUEST, NULL, NULL);
+	else
+		irlap_do_event(self, RESET_RESPONSE, NULL, NULL);
+}
+
+/*
+ * Function irlap_reset_confirm (void)
+ */
+void irlap_reset_confirm(void)
+{
+	IRDA_DEBUG(1, "%s()\n", __FUNCTION__);
+}
+
+/*
+ * Function irlap_generate_rand_time_slot (S, s)
+ *
+ *    Generate a random time slot between s and S-1 where
+ *    S = Number of slots (0 -> S-1)
+ *    s = Current slot
+ */
+int irlap_generate_rand_time_slot(int S, int s)
+{
+	static int rand;
+	int slot;
+
+	IRDA_ASSERT((S - s) > 0, return 0;);
+
+	rand += jiffies;
+	rand ^= (rand << 12);
+	rand ^= (rand >> 20);
+
+	slot = s + rand % (S-s);
+
+	IRDA_ASSERT((slot >= s) || (slot < S), return 0;);
+
+	return slot;
+}
+
+/*
+ * Function irlap_update_nr_received (nr)
+ *
+ *    Remove all acknowledged frames in current window queue. This code is
+ *    not intuitive and you should not try to change it. If you think it
+ *    contains bugs, please mail a patch to the author instead.
+ */
+void irlap_update_nr_received(struct irlap_cb *self, int nr)
+{
+	struct sk_buff *skb = NULL;
+	int count = 0;
+
+	/*
+         * Remove all the ack-ed frames from the window queue.
+         */
+
+	/*
+	 *  Optimize for the common case. It is most likely that the receiver
+	 *  will acknowledge all the frames we have sent! So in that case we
+	 *  delete all frames stored in window.
+	 */
+	if (nr == self->vs) {
+		while ((skb = skb_dequeue(&self->wx_list)) != NULL) {
+			dev_kfree_skb(skb);
+		}
+		/* The last acked frame is the next to send minus one */
+		self->va = nr - 1;
+	} else {
+		/* Remove all acknowledged frames in current window */
+		while ((skb_peek(&self->wx_list) != NULL) &&
+		       (((self->va+1) % 8) != nr))
+		{
+			skb = skb_dequeue(&self->wx_list);
+			dev_kfree_skb(skb);
+
+			self->va = (self->va + 1) % 8;
+			count++;
+		}
+	}
+
+	/* Advance window */
+	self->window = self->window_size - skb_queue_len(&self->wx_list);
+}
+
+/*
+ * Function irlap_validate_ns_received (ns)
+ *
+ *    Validate the next to send (ns) field from received frame.
+ */
+int irlap_validate_ns_received(struct irlap_cb *self, int ns)
+{
+	/*  ns as expected?  */
+	if (ns == self->vr)
+		return NS_EXPECTED;
+	/*
+	 *  Stations are allowed to treat invalid NS as unexpected NS
+	 *  IrLAP, Recv ... with-invalid-Ns. p. 84
+	 */
+	return NS_UNEXPECTED;
+
+	/* return NR_INVALID; */
+}
+/*
+ * Function irlap_validate_nr_received (nr)
+ *
+ *    Validate the next to receive (nr) field from received frame.
+ *
+ */
+int irlap_validate_nr_received(struct irlap_cb *self, int nr)
+{
+	/*  nr as expected?  */
+	if (nr == self->vs) {
+		IRDA_DEBUG(4, "%s(), expected!\n", __FUNCTION__);
+		return NR_EXPECTED;
+	}
+
+	/*
+	 *  unexpected nr? (but within current window), first we check if the
+	 *  ns numbers of the frames in the current window wrap.
+	 */
+	if (self->va < self->vs) {
+		if ((nr >= self->va) && (nr <= self->vs))
+			return NR_UNEXPECTED;
+	} else {
+		if ((nr >= self->va) || (nr <= self->vs))
+			return NR_UNEXPECTED;
+	}
+
+	/* Invalid nr!  */
+	return NR_INVALID;
+}
+
+/*
+ * Function irlap_initiate_connection_state ()
+ *
+ *    Initialize the connection state parameters
+ *
+ */
+void irlap_initiate_connection_state(struct irlap_cb *self)
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+
+	/* Next to send and next to receive */
+	self->vs = self->vr = 0;
+
+	/* Last frame which got acked (0 - 1) % 8 */
+	self->va = 7;
+
+	self->window = 1;
+
+	self->remote_busy = FALSE;
+	self->retry_count = 0;
+}
+
+/*
+ * Function irlap_wait_min_turn_around (self, qos)
+ *
+ *    Wait negotiated minimum turn around time, this function actually sets
+ *    the number of BOS's that must be sent before the next transmitted
+ *    frame in order to delay for the specified amount of time. This is
+ *    done to avoid using timers, and the forbidden udelay!
+ */
+void irlap_wait_min_turn_around(struct irlap_cb *self, struct qos_info *qos)
+{
+	__u32 min_turn_time;
+	__u32 speed;
+
+	/* Get QoS values.  */
+	speed = qos->baud_rate.value;
+	min_turn_time = qos->min_turn_time.value;
+
+	/* No need to calculate XBOFs for speeds over 115200 bps */
+	if (speed > 115200) {
+		self->mtt_required = min_turn_time;
+		return;
+	}
+
+	/*
+	 *  Send additional BOF's for the next frame for the requested
+	 *  min turn time, so now we must calculate how many chars (XBOF's) we
+	 *  must send for the requested time period (min turn time)
+	 */
+	self->xbofs_delay = irlap_min_turn_time_in_bytes(speed, min_turn_time);
+}
+
+/*
+ * Function irlap_flush_all_queues (void)
+ *
+ *    Flush all queues
+ *
+ */
+void irlap_flush_all_queues(struct irlap_cb *self)
+{
+	struct sk_buff* skb;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+
+	/* Free transmission queue */
+	while ((skb = skb_dequeue(&self->txq)) != NULL)
+		dev_kfree_skb(skb);
+
+	while ((skb = skb_dequeue(&self->txq_ultra)) != NULL)
+		dev_kfree_skb(skb);
+
+	/* Free sliding window buffered packets */
+	while ((skb = skb_dequeue(&self->wx_list)) != NULL)
+		dev_kfree_skb(skb);
+}
+
+/*
+ * Function irlap_setspeed (self, speed)
+ *
+ *    Change the speed of the IrDA port
+ *
+ */
+static void irlap_change_speed(struct irlap_cb *self, __u32 speed, int now)
+{
+	struct sk_buff *skb;
+
+	IRDA_DEBUG(0, "%s(), setting speed to %d\n", __FUNCTION__, speed);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+
+	self->speed = speed;
+
+	/* Change speed now, or just piggyback speed on frames */
+	if (now) {
+		/* Send down empty frame to trigger speed change */
+		skb = dev_alloc_skb(0);
+		irlap_queue_xmit(self, skb);
+	}
+}
+
+/*
+ * Function irlap_init_qos_capabilities (self, qos)
+ *
+ *    Initialize QoS for this IrLAP session, What we do is to compute the
+ *    intersection of the QoS capabilities for the user, driver and for
+ *    IrLAP itself. Normally, IrLAP will not specify any values, but it can
+ *    be used to restrict certain values.
+ */
+static void irlap_init_qos_capabilities(struct irlap_cb *self,
+					struct qos_info *qos_user)
+{
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+	IRDA_ASSERT(self->netdev != NULL, return;);
+
+	/* Start out with the maximum QoS support possible */
+	irda_init_max_qos_capabilies(&self->qos_rx);
+
+	/* Apply drivers QoS capabilities */
+	irda_qos_compute_intersection(&self->qos_rx, self->qos_dev);
+
+	/*
+	 *  Check for user supplied QoS parameters. The service user is only
+	 *  allowed to supply these values. We check each parameter since the
+	 *  user may not have set all of them.
+	 */
+	if (qos_user) {
+		IRDA_DEBUG(1, "%s(), Found user specified QoS!\n", __FUNCTION__);
+
+		if (qos_user->baud_rate.bits)
+			self->qos_rx.baud_rate.bits &= qos_user->baud_rate.bits;
+
+		if (qos_user->max_turn_time.bits)
+			self->qos_rx.max_turn_time.bits &= qos_user->max_turn_time.bits;
+		if (qos_user->data_size.bits)
+			self->qos_rx.data_size.bits &= qos_user->data_size.bits;
+
+		if (qos_user->link_disc_time.bits)
+			self->qos_rx.link_disc_time.bits &= qos_user->link_disc_time.bits;
+	}
+
+	/* Use 500ms in IrLAP for now */
+	self->qos_rx.max_turn_time.bits &= 0x01;
+
+	/* Set data size */
+	/*self->qos_rx.data_size.bits &= 0x03;*/
+
+	irda_qos_bits_to_value(&self->qos_rx);
+}
+
+/*
+ * Function irlap_apply_default_connection_parameters (void, now)
+ *
+ *    Use the default connection and transmission parameters
+ */
+void irlap_apply_default_connection_parameters(struct irlap_cb *self)
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+
+	/* xbofs : Default value in NDM */
+	self->next_bofs   = 12;
+	self->bofs_count  = 12;
+
+	/* NDM Speed is 9600 */
+	irlap_change_speed(self, 9600, TRUE);
+
+	/* Set mbusy when going to NDM state */
+	irda_device_set_media_busy(self->netdev, TRUE);
+
+	/*
+	 * Generate random connection address for this session, which must
+	 * be 7 bits wide and different from 0x00 and 0xfe
+	 */
+	while ((self->caddr == 0x00) || (self->caddr == 0xfe)) {
+		get_random_bytes(&self->caddr, sizeof(self->caddr));
+		self->caddr &= 0xfe;
+	}
+
+	/* Use default values until connection has been negitiated */
+	self->slot_timeout = sysctl_slot_timeout;
+	self->final_timeout = FINAL_TIMEOUT;
+	self->poll_timeout = POLL_TIMEOUT;
+	self->wd_timeout = WD_TIMEOUT;
+
+	/* Set some default values */
+	self->qos_tx.baud_rate.value = 9600;
+	self->qos_rx.baud_rate.value = 9600;
+	self->qos_tx.max_turn_time.value = 0;
+	self->qos_rx.max_turn_time.value = 0;
+	self->qos_tx.min_turn_time.value = 0;
+	self->qos_rx.min_turn_time.value = 0;
+	self->qos_tx.data_size.value = 64;
+	self->qos_rx.data_size.value = 64;
+	self->qos_tx.window_size.value = 1;
+	self->qos_rx.window_size.value = 1;
+	self->qos_tx.additional_bofs.value = 12;
+	self->qos_rx.additional_bofs.value = 12;
+	self->qos_tx.link_disc_time.value = 0;
+	self->qos_rx.link_disc_time.value = 0;
+
+	irlap_flush_all_queues(self);
+
+	self->disconnect_pending = FALSE;
+	self->connect_pending = FALSE;
+}
+
+/*
+ * Function irlap_apply_connection_parameters (qos, now)
+ *
+ *    Initialize IrLAP with the negotiated QoS values
+ *
+ * If 'now' is false, the speed and xbofs will be changed after the next
+ * frame is sent.
+ * If 'now' is true, the speed and xbofs is changed immediately
+ */
+void irlap_apply_connection_parameters(struct irlap_cb *self, int now)
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+
+	/* Set the negotiated xbofs value */
+	self->next_bofs   = self->qos_tx.additional_bofs.value;
+	if (now)
+		self->bofs_count = self->next_bofs;
+
+	/* Set the negotiated link speed (may need the new xbofs value) */
+	irlap_change_speed(self, self->qos_tx.baud_rate.value, now);
+
+	self->window_size = self->qos_tx.window_size.value;
+	self->window      = self->qos_tx.window_size.value;
+
+#ifdef CONFIG_IRDA_DYNAMIC_WINDOW
+	/*
+	 *  Calculate how many bytes it is possible to transmit before the
+	 *  link must be turned around
+	 */
+	self->line_capacity =
+		irlap_max_line_capacity(self->qos_tx.baud_rate.value,
+					self->qos_tx.max_turn_time.value);
+	self->bytes_left = self->line_capacity;
+#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */
+
+
+	/*
+	 *  Initialize timeout values, some of the rules are listed on
+	 *  page 92 in IrLAP.
+	 */
+	IRDA_ASSERT(self->qos_tx.max_turn_time.value != 0, return;);
+	IRDA_ASSERT(self->qos_rx.max_turn_time.value != 0, return;);
+	/* The poll timeout applies only to the primary station.
+	 * It defines the maximum time the primary stay in XMIT mode
+	 * before timeout and turning the link around (sending a RR).
+	 * Or, this is how much we can keep the pf bit in primary mode.
+	 * Therefore, it must be lower or equal than our *OWN* max turn around.
+	 * Jean II */
+	self->poll_timeout = self->qos_tx.max_turn_time.value * HZ / 1000;
+	/* The Final timeout applies only to the primary station.
+	 * It defines the maximum time the primary wait (mostly in RECV mode)
+	 * for an answer from the secondary station before polling it again.
+	 * Therefore, it must be greater or equal than our *PARTNER*
+	 * max turn around time - Jean II */
+	self->final_timeout = self->qos_rx.max_turn_time.value * HZ / 1000;
+	/* The Watchdog Bit timeout applies only to the secondary station.
+	 * It defines the maximum time the secondary wait (mostly in RECV mode)
+	 * for poll from the primary station before getting annoyed.
+	 * Therefore, it must be greater or equal than our *PARTNER*
+	 * max turn around time - Jean II */
+	self->wd_timeout = self->final_timeout * 2;
+
+	/*
+	 * N1 and N2 are maximum retry count for *both* the final timer
+	 * and the wd timer (with a factor 2) as defined above.
+	 * After N1 retry of a timer, we give a warning to the user.
+	 * After N2 retry, we consider the link dead and disconnect it.
+	 * Jean II
+	 */
+
+	/*
+	 *  Set N1 to 0 if Link Disconnect/Threshold Time = 3 and set it to
+	 *  3 seconds otherwise. See page 71 in IrLAP for more details.
+	 *  Actually, it's not always 3 seconds, as we allow to set
+	 *  it via sysctl... Max maxtt is 500ms, and N1 need to be multiple
+	 *  of 2, so 1 second is minimum we can allow. - Jean II
+	 */
+	if (self->qos_tx.link_disc_time.value == sysctl_warn_noreply_time)
+		/*
+		 * If we set N1 to 0, it will trigger immediately, which is
+		 * not what we want. What we really want is to disable it,
+		 * Jean II
+		 */
+		self->N1 = -2; /* Disable - Need to be multiple of 2*/
+	else
+		self->N1 = sysctl_warn_noreply_time * 1000 /
+		  self->qos_rx.max_turn_time.value;
+
+	IRDA_DEBUG(4, "Setting N1 = %d\n", self->N1);
+
+	/* Set N2 to match our own disconnect time */
+	self->N2 = self->qos_tx.link_disc_time.value * 1000 /
+		self->qos_rx.max_turn_time.value;
+	IRDA_DEBUG(4, "Setting N2 = %d\n", self->N2);
+}
+
+#ifdef CONFIG_PROC_FS
+struct irlap_iter_state {
+	int id;
+};
+
+static void *irlap_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct irlap_iter_state *iter = seq->private;
+	struct irlap_cb *self;
+
+	/* Protect our access to the tsap list */
+	spin_lock_irq(&irlap->hb_spinlock);
+	iter->id = 0;
+
+	for (self = (struct irlap_cb *) hashbin_get_first(irlap); 
+	     self; self = (struct irlap_cb *) hashbin_get_next(irlap)) {
+		if (iter->id == *pos)
+			break;
+		++iter->id;
+	}
+		
+	return self;
+}
+
+static void *irlap_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct irlap_iter_state *iter = seq->private;
+
+	++*pos;
+	++iter->id;
+	return (void *) hashbin_get_next(irlap);
+}
+
+static void irlap_seq_stop(struct seq_file *seq, void *v)
+{
+	spin_unlock_irq(&irlap->hb_spinlock);
+}
+
+static int irlap_seq_show(struct seq_file *seq, void *v)
+{
+	const struct irlap_iter_state *iter = seq->private;
+	const struct irlap_cb *self = v;
+	
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return -EINVAL;);
+
+	seq_printf(seq, "irlap%d ", iter->id);
+	seq_printf(seq, "state: %s\n",
+		   irlap_state[self->state]);
+
+	seq_printf(seq, "  device name: %s, ",
+		   (self->netdev) ? self->netdev->name : "bug");
+	seq_printf(seq, "hardware name: %s\n", self->hw_name);
+
+	seq_printf(seq, "  caddr: %#02x, ", self->caddr);
+	seq_printf(seq, "saddr: %#08x, ", self->saddr);
+	seq_printf(seq, "daddr: %#08x\n", self->daddr);
+
+	seq_printf(seq, "  win size: %d, ",
+		   self->window_size);
+	seq_printf(seq, "win: %d, ", self->window);
+#ifdef CONFIG_IRDA_DYNAMIC_WINDOW
+	seq_printf(seq, "line capacity: %d, ",
+		   self->line_capacity);
+	seq_printf(seq, "bytes left: %d\n", self->bytes_left);
+#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */
+	seq_printf(seq, "  tx queue len: %d ",
+		   skb_queue_len(&self->txq));
+	seq_printf(seq, "win queue len: %d ",
+		   skb_queue_len(&self->wx_list));
+	seq_printf(seq, "rbusy: %s", self->remote_busy ?
+		   "TRUE" : "FALSE");
+	seq_printf(seq, " mbusy: %s\n", self->media_busy ?
+		   "TRUE" : "FALSE");
+
+	seq_printf(seq, "  retrans: %d ", self->retry_count);
+	seq_printf(seq, "vs: %d ", self->vs);
+	seq_printf(seq, "vr: %d ", self->vr);
+	seq_printf(seq, "va: %d\n", self->va);
+
+	seq_printf(seq, "  qos\tbps\tmaxtt\tdsize\twinsize\taddbofs\tmintt\tldisc\tcomp\n");
+
+	seq_printf(seq, "  tx\t%d\t",
+		   self->qos_tx.baud_rate.value);
+	seq_printf(seq, "%d\t",
+		   self->qos_tx.max_turn_time.value);
+	seq_printf(seq, "%d\t",
+		   self->qos_tx.data_size.value);
+	seq_printf(seq, "%d\t",
+		   self->qos_tx.window_size.value);
+	seq_printf(seq, "%d\t",
+		   self->qos_tx.additional_bofs.value);
+	seq_printf(seq, "%d\t",
+		   self->qos_tx.min_turn_time.value);
+	seq_printf(seq, "%d\t",
+		   self->qos_tx.link_disc_time.value);
+	seq_printf(seq, "\n");
+
+	seq_printf(seq, "  rx\t%d\t",
+		   self->qos_rx.baud_rate.value);
+	seq_printf(seq, "%d\t",
+		   self->qos_rx.max_turn_time.value);
+	seq_printf(seq, "%d\t",
+		   self->qos_rx.data_size.value);
+	seq_printf(seq, "%d\t",
+		   self->qos_rx.window_size.value);
+	seq_printf(seq, "%d\t",
+		   self->qos_rx.additional_bofs.value);
+	seq_printf(seq, "%d\t",
+		   self->qos_rx.min_turn_time.value);
+	seq_printf(seq, "%d\n",
+		   self->qos_rx.link_disc_time.value);
+
+	return 0;
+}
+
+static struct seq_operations irlap_seq_ops = {
+	.start  = irlap_seq_start,
+	.next   = irlap_seq_next,
+	.stop   = irlap_seq_stop,
+	.show   = irlap_seq_show,
+};
+
+static int irlap_seq_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	int rc = -ENOMEM;
+	struct irlap_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+       
+	if (!s)
+		goto out;
+
+	if (irlap == NULL) {
+		rc = -EINVAL;
+		goto out_kfree;
+	}
+
+	rc = seq_open(file, &irlap_seq_ops);
+	if (rc)
+		goto out_kfree;
+
+	seq	     = file->private_data;
+	seq->private = s;
+	memset(s, 0, sizeof(*s));
+out:
+	return rc;
+out_kfree:
+	kfree(s);
+	goto out;
+}
+
+struct file_operations irlap_seq_fops = {
+	.owner		= THIS_MODULE,
+	.open           = irlap_seq_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release	= seq_release_private,
+};
+
+#endif /* CONFIG_PROC_FS */
diff --git a/net/irda/irlap_event.c b/net/irda/irlap_event.c
new file mode 100644
index 00000000000..1cd89f5f3b7
--- /dev/null
+++ b/net/irda/irlap_event.c
@@ -0,0 +1,2334 @@
+/*********************************************************************
+ *
+ * Filename:      irlap_event.c
+ * Version:       0.9
+ * Description:   IrLAP state machine implementation
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dag@brattli.net>
+ * Created at:    Sat Aug 16 00:59:29 1997
+ * Modified at:   Sat Dec 25 21:07:57 1999
+ * Modified by:   Dag Brattli <dag@brattli.net>
+ *
+ *     Copyright (c) 1998-2000 Dag Brattli <dag@brattli.net>,
+ *     Copyright (c) 1998      Thomas Davis <ratbert@radiks.net>
+ *     All Rights Reserved.
+ *     Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of
+ *     the License, or (at your option) any later version.
+ *
+ *     Neither Dag Brattli nor University of Troms� admit liability nor
+ *     provide warranty for any of this software. This material is
+ *     provided "AS-IS" and at no charge.
+ *
+ ********************************************************************/
+
+#include <linux/config.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/skbuff.h>
+
+#include <net/irda/irda.h>
+#include <net/irda/irlap_event.h>
+
+#include <net/irda/timer.h>
+#include <net/irda/irlap.h>
+#include <net/irda/irlap_frame.h>
+#include <net/irda/qos.h>
+#include <net/irda/parameters.h>
+#include <net/irda/irlmp.h>		/* irlmp_flow_indication(), ... */
+
+#include <net/irda/irda_device.h>
+
+#ifdef CONFIG_IRDA_FAST_RR
+int sysctl_fast_poll_increase = 50;
+#endif
+
+static int irlap_state_ndm    (struct irlap_cb *self, IRLAP_EVENT event,
+			       struct sk_buff *skb, struct irlap_info *info);
+static int irlap_state_query  (struct irlap_cb *self, IRLAP_EVENT event,
+			       struct sk_buff *skb, struct irlap_info *info);
+static int irlap_state_reply  (struct irlap_cb *self, IRLAP_EVENT event,
+			       struct sk_buff *skb, struct irlap_info *info);
+static int irlap_state_conn   (struct irlap_cb *self, IRLAP_EVENT event,
+			       struct sk_buff *skb, struct irlap_info *info);
+static int irlap_state_setup  (struct irlap_cb *self, IRLAP_EVENT event,
+			       struct sk_buff *skb, struct irlap_info *info);
+static int irlap_state_offline(struct irlap_cb *self, IRLAP_EVENT event,
+			       struct sk_buff *skb, struct irlap_info *info);
+static int irlap_state_xmit_p (struct irlap_cb *self, IRLAP_EVENT event,
+			       struct sk_buff *skb, struct irlap_info *info);
+static int irlap_state_pclose (struct irlap_cb *self, IRLAP_EVENT event,
+			       struct sk_buff *skb, struct irlap_info *info);
+static int irlap_state_nrm_p  (struct irlap_cb *self, IRLAP_EVENT event,
+			       struct sk_buff *skb, struct irlap_info *info);
+static int irlap_state_reset_wait(struct irlap_cb *self, IRLAP_EVENT event,
+				  struct sk_buff *skb, struct irlap_info *info);
+static int irlap_state_reset  (struct irlap_cb *self, IRLAP_EVENT event,
+			       struct sk_buff *skb, struct irlap_info *info);
+static int irlap_state_nrm_s  (struct irlap_cb *self, IRLAP_EVENT event,
+			       struct sk_buff *skb, struct irlap_info *info);
+static int irlap_state_xmit_s (struct irlap_cb *self, IRLAP_EVENT event,
+			       struct sk_buff *skb, struct irlap_info *info);
+static int irlap_state_sclose (struct irlap_cb *self, IRLAP_EVENT event,
+			       struct sk_buff *skb, struct irlap_info *info);
+static int irlap_state_reset_check(struct irlap_cb *, IRLAP_EVENT event,
+				   struct sk_buff *, struct irlap_info *);
+
+#ifdef CONFIG_IRDA_DEBUG
+static const char *irlap_event[] = {
+	"DISCOVERY_REQUEST",
+	"CONNECT_REQUEST",
+	"CONNECT_RESPONSE",
+	"DISCONNECT_REQUEST",
+	"DATA_REQUEST",
+	"RESET_REQUEST",
+	"RESET_RESPONSE",
+	"SEND_I_CMD",
+	"SEND_UI_FRAME",
+	"RECV_DISCOVERY_XID_CMD",
+	"RECV_DISCOVERY_XID_RSP",
+	"RECV_SNRM_CMD",
+	"RECV_TEST_CMD",
+	"RECV_TEST_RSP",
+	"RECV_UA_RSP",
+	"RECV_DM_RSP",
+	"RECV_RD_RSP",
+	"RECV_I_CMD",
+	"RECV_I_RSP",
+	"RECV_UI_FRAME",
+	"RECV_FRMR_RSP",
+	"RECV_RR_CMD",
+	"RECV_RR_RSP",
+	"RECV_RNR_CMD",
+	"RECV_RNR_RSP",
+	"RECV_REJ_CMD",
+	"RECV_REJ_RSP",
+	"RECV_SREJ_CMD",
+	"RECV_SREJ_RSP",
+	"RECV_DISC_CMD",
+	"SLOT_TIMER_EXPIRED",
+	"QUERY_TIMER_EXPIRED",
+	"FINAL_TIMER_EXPIRED",
+	"POLL_TIMER_EXPIRED",
+	"DISCOVERY_TIMER_EXPIRED",
+	"WD_TIMER_EXPIRED",
+	"BACKOFF_TIMER_EXPIRED",
+	"MEDIA_BUSY_TIMER_EXPIRED",
+};
+#endif	/* CONFIG_IRDA_DEBUG */
+
+const char *irlap_state[] = {
+	"LAP_NDM",
+	"LAP_QUERY",
+	"LAP_REPLY",
+	"LAP_CONN",
+	"LAP_SETUP",
+	"LAP_OFFLINE",
+	"LAP_XMIT_P",
+	"LAP_PCLOSE",
+	"LAP_NRM_P",
+	"LAP_RESET_WAIT",
+	"LAP_RESET",
+	"LAP_NRM_S",
+	"LAP_XMIT_S",
+	"LAP_SCLOSE",
+	"LAP_RESET_CHECK",
+};
+
+static int (*state[])(struct irlap_cb *self, IRLAP_EVENT event,
+		      struct sk_buff *skb, struct irlap_info *info) =
+{
+	irlap_state_ndm,
+	irlap_state_query,
+	irlap_state_reply,
+	irlap_state_conn,
+	irlap_state_setup,
+	irlap_state_offline,
+	irlap_state_xmit_p,
+	irlap_state_pclose,
+	irlap_state_nrm_p,
+	irlap_state_reset_wait,
+	irlap_state_reset,
+	irlap_state_nrm_s,
+	irlap_state_xmit_s,
+	irlap_state_sclose,
+	irlap_state_reset_check,
+};
+
+/*
+ * Function irda_poll_timer_expired (data)
+ *
+ *    Poll timer has expired. Normally we must now send a RR frame to the
+ *    remote device
+ */
+static void irlap_poll_timer_expired(void *data)
+{
+	struct irlap_cb *self = (struct irlap_cb *) data;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+
+	irlap_do_event(self, POLL_TIMER_EXPIRED, NULL, NULL);
+}
+
+/*
+ * Calculate and set time before we will have to send back the pf bit
+ * to the peer. Use in primary.
+ * Make sure that state is XMIT_P/XMIT_S when calling this function
+ * (and that nobody messed up with the state). - Jean II
+ */
+static void irlap_start_poll_timer(struct irlap_cb *self, int timeout)
+{
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+
+#ifdef CONFIG_IRDA_FAST_RR
+	/*
+	 * Send out the RR frames faster if our own transmit queue is empty, or
+	 * if the peer is busy. The effect is a much faster conversation
+	 */
+	if ((skb_queue_len(&self->txq) == 0) || (self->remote_busy)) {
+		if (self->fast_RR == TRUE) {
+			/*
+			 *  Assert that the fast poll timer has not reached the
+			 *  normal poll timer yet
+			 */
+			if (self->fast_RR_timeout < timeout) {
+				/*
+				 *  FIXME: this should be a more configurable
+				 *         function
+				 */
+				self->fast_RR_timeout +=
+					(sysctl_fast_poll_increase * HZ/1000);
+
+				/* Use this fast(er) timeout instead */
+				timeout = self->fast_RR_timeout;
+			}
+		} else {
+			self->fast_RR = TRUE;
+
+			/* Start with just 0 ms */
+			self->fast_RR_timeout = 0;
+			timeout = 0;
+		}
+	} else
+		self->fast_RR = FALSE;
+
+	IRDA_DEBUG(3, "%s(), timeout=%d (%ld)\n", __FUNCTION__, timeout, jiffies);
+#endif /* CONFIG_IRDA_FAST_RR */
+
+	if (timeout == 0)
+		irlap_do_event(self, POLL_TIMER_EXPIRED, NULL, NULL);
+	else
+		irda_start_timer(&self->poll_timer, timeout, self,
+				 irlap_poll_timer_expired);
+}
+
+/*
+ * Function irlap_do_event (event, skb, info)
+ *
+ *    Rushes through the state machine without any delay. If state == XMIT
+ *    then send queued data frames.
+ */
+void irlap_do_event(struct irlap_cb *self, IRLAP_EVENT event,
+		    struct sk_buff *skb, struct irlap_info *info)
+{
+	int ret;
+
+	if (!self || self->magic != LAP_MAGIC)
+		return;
+
+	IRDA_DEBUG(3, "%s(), event = %s, state = %s\n", __FUNCTION__,
+		   irlap_event[event], irlap_state[self->state]);
+
+	ret = (*state[self->state])(self, event, skb, info);
+
+	/*
+	 *  Check if there are any pending events that needs to be executed
+	 */
+	switch (self->state) {
+	case LAP_XMIT_P: /* FALLTHROUGH */
+	case LAP_XMIT_S:
+		/*
+		 * We just received the pf bit and are at the beginning
+		 * of a new LAP transmit window.
+		 * Check if there are any queued data frames, and do not
+		 * try to disconnect link if we send any data frames, since
+		 * that will change the state away form XMIT
+		 */
+		IRDA_DEBUG(2, "%s() : queue len = %d\n", __FUNCTION__,
+			   skb_queue_len(&self->txq));
+
+		if (skb_queue_len(&self->txq)) {
+			/* Prevent race conditions with irlap_data_request() */
+			self->local_busy = TRUE;
+
+			/* Theory of operation.
+			 * We send frames up to when we fill the window or
+			 * reach line capacity. Those frames will queue up
+			 * in the device queue, and the driver will slowly
+			 * send them.
+			 * After each frame that we send, we poll the higher
+			 * layer for more data. It's the right time to do
+			 * that because the link layer need to perform the mtt
+			 * and then send the first frame, so we can afford
+			 * to send a bit of time in kernel space.
+			 * The explicit flow indication allow to minimise
+			 * buffers (== lower latency), to avoid higher layer
+			 * polling via timers (== less context switches) and
+			 * to implement a crude scheduler - Jean II */
+
+			/* Try to send away all queued data frames */
+			while ((skb = skb_dequeue(&self->txq)) != NULL) {
+				/* Send one frame */
+				ret = (*state[self->state])(self, SEND_I_CMD,
+							    skb, NULL);
+				/* Drop reference count.
+				 * It will be increase as needed in
+				 * irlap_send_data_xxx() */
+				kfree_skb(skb);
+
+				/* Poll the higher layers for one more frame */
+				irlmp_flow_indication(self->notify.instance,
+						      FLOW_START);
+
+				if (ret == -EPROTO)
+					break; /* Try again later! */
+			}
+			/* Finished transmitting */
+			self->local_busy = FALSE;
+		} else if (self->disconnect_pending) {
+			self->disconnect_pending = FALSE;
+
+			ret = (*state[self->state])(self, DISCONNECT_REQUEST,
+						    NULL, NULL);
+		}
+		break;
+/*	case LAP_NDM: */
+/*	case LAP_CONN: */
+/*	case LAP_RESET_WAIT: */
+/*	case LAP_RESET_CHECK: */
+	default:
+		break;
+	}
+}
+
+/*
+ * Function irlap_next_state (self, state)
+ *
+ *    Switches state and provides debug information
+ *
+ */
+static inline void irlap_next_state(struct irlap_cb *self, IRLAP_STATE state)
+{
+	/*
+	if (!self || self->magic != LAP_MAGIC)
+		return;
+
+	IRDA_DEBUG(4, "next LAP state = %s\n", irlap_state[state]);
+	*/
+	self->state = state;
+}
+
+/*
+ * Function irlap_state_ndm (event, skb, frame)
+ *
+ *    NDM (Normal Disconnected Mode) state
+ *
+ */
+static int irlap_state_ndm(struct irlap_cb *self, IRLAP_EVENT event,
+			   struct sk_buff *skb, struct irlap_info *info)
+{
+	discovery_t *discovery_rsp;
+	int ret = 0;
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
+
+	switch (event) {
+	case CONNECT_REQUEST:
+		IRDA_ASSERT(self->netdev != NULL, return -1;);
+
+		if (self->media_busy) {
+			/* Note : this will never happen, because we test
+			 * media busy in irlap_connect_request() and
+			 * postpone the event... - Jean II */
+			IRDA_DEBUG(0, "%s(), CONNECT_REQUEST: media busy!\n",
+				   __FUNCTION__);
+
+			/* Always switch state before calling upper layers */
+			irlap_next_state(self, LAP_NDM);
+
+			irlap_disconnect_indication(self, LAP_MEDIA_BUSY);
+		} else {
+			irlap_send_snrm_frame(self, &self->qos_rx);
+
+			/* Start Final-bit timer */
+			irlap_start_final_timer(self, self->final_timeout);
+
+			self->retry_count = 0;
+			irlap_next_state(self, LAP_SETUP);
+		}
+		break;
+	case RECV_SNRM_CMD:
+		/* Check if the frame contains and I field */
+		if (info) {
+			self->daddr = info->daddr;
+			self->caddr = info->caddr;
+
+			irlap_next_state(self, LAP_CONN);
+
+			irlap_connect_indication(self, skb);
+		} else {
+			IRDA_DEBUG(0, "%s(), SNRM frame does not "
+				   "contain an I field!\n", __FUNCTION__);
+		}
+		break;
+	case DISCOVERY_REQUEST:
+		IRDA_ASSERT(info != NULL, return -1;);
+
+		if (self->media_busy) {
+			IRDA_DEBUG(1, "%s(), DISCOVERY_REQUEST: media busy!\n",
+				   __FUNCTION__);
+			/* irlap->log.condition = MEDIA_BUSY; */
+
+			/* This will make IrLMP try again */
+			irlap_discovery_confirm(self, NULL);
+			/* Note : the discovery log is not cleaned up here,
+			 * it will be done in irlap_discovery_request()
+			 * Jean II */
+			return 0;
+		}
+
+		self->S = info->S;
+		self->s = info->s;
+		irlap_send_discovery_xid_frame(self, info->S, info->s, TRUE,
+					       info->discovery);
+		self->frame_sent = FALSE;
+		self->s++;
+
+		irlap_start_slot_timer(self, self->slot_timeout);
+		irlap_next_state(self, LAP_QUERY);
+		break;
+	case RECV_DISCOVERY_XID_CMD:
+		IRDA_ASSERT(info != NULL, return -1;);
+
+		/* Assert that this is not the final slot */
+		if (info->s <= info->S) {
+			self->slot = irlap_generate_rand_time_slot(info->S,
+								   info->s);
+			if (self->slot == info->s) {
+				discovery_rsp = irlmp_get_discovery_response();
+				discovery_rsp->data.daddr = info->daddr;
+
+				irlap_send_discovery_xid_frame(self, info->S,
+							       self->slot,
+							       FALSE,
+							       discovery_rsp);
+				self->frame_sent = TRUE;
+			} else
+				self->frame_sent = FALSE;
+
+			/*
+			 * Go to reply state until end of discovery to
+			 * inhibit our own transmissions. Set the timer
+			 * to not stay forever there... Jean II
+			 */
+			irlap_start_query_timer(self, info->S, info->s);
+			irlap_next_state(self, LAP_REPLY);
+		} else {
+		/* This is the final slot. How is it possible ?
+		 * This would happen is both discoveries are just slightly
+		 * offset (if they are in sync, all packets are lost).
+		 * Most often, all the discovery requests will be received
+		 * in QUERY state (see my comment there), except for the
+		 * last frame that will come here.
+		 * The big trouble when it happen is that active discovery
+		 * doesn't happen, because nobody answer the discoveries
+		 * frame of the other guy, so the log shows up empty.
+		 * What should we do ?
+		 * Not much. It's too late to answer those discovery frames,
+		 * so we just pass the info to IrLMP who will put it in the
+		 * log (and post an event).
+		 * Another cause would be devices that do discovery much
+		 * slower than us, however the latest fixes should minimise
+		 * those cases...
+		 * Jean II
+		 */
+			IRDA_DEBUG(1, "%s(), Receiving final discovery request, missed the discovery slots :-(\n", __FUNCTION__);
+
+			/* Last discovery request -> in the log */
+			irlap_discovery_indication(self, info->discovery);
+		}
+		break;
+	case MEDIA_BUSY_TIMER_EXPIRED:
+		/* A bunch of events may be postponed because the media is
+		 * busy (usually immediately after we close a connection),
+		 * or while we are doing discovery (state query/reply).
+		 * In all those cases, the media busy flag will be cleared
+		 * when it's OK for us to process those postponed events.
+		 * This event is not mentioned in the state machines in the
+		 * IrLAP spec. It's because they didn't consider Ultra and
+		 * postponing connection request is optional.
+		 * Jean II */
+#ifdef CONFIG_IRDA_ULTRA
+		/* Send any pending Ultra frames if any */
+		if (!skb_queue_empty(&self->txq_ultra)) {
+			/* We don't send the frame, just post an event.
+			 * Also, previously this code was in timer.c...
+			 * Jean II */
+			ret = (*state[self->state])(self, SEND_UI_FRAME,
+						    NULL, NULL);
+		}
+#endif /* CONFIG_IRDA_ULTRA */
+		/* Check if we should try to connect.
+		 * This code was previously in irlap_do_event() */
+		if (self->connect_pending) {
+			self->connect_pending = FALSE;
+
+			/* This one *should* not pend in this state, except
+			 * if a socket try to connect and immediately
+			 * disconnect. - clear - Jean II */
+			if (self->disconnect_pending)
+				irlap_disconnect_indication(self, LAP_DISC_INDICATION);
+			else
+				ret = (*state[self->state])(self,
+							    CONNECT_REQUEST,
+							    NULL, NULL);
+			self->disconnect_pending = FALSE;
+		}
+		/* Note : one way to test if this code works well (including
+		 * media busy and small busy) is to create a user space
+		 * application generating an Ultra packet every 3.05 sec (or
+		 * 2.95 sec) and to see how it interact with discovery.
+		 * It's fairly easy to check that no packet is lost, that the
+		 * packets are postponed during discovery and that after
+		 * discovery indication you have a 100ms "gap".
+		 * As connection request and Ultra are now processed the same
+		 * way, this avoid the tedious job of trying IrLAP connection
+		 * in all those cases...
+		 * Jean II */
+		break;
+#ifdef CONFIG_IRDA_ULTRA
+	case SEND_UI_FRAME:
+	{
+		int i;
+		/* Only allowed to repeat an operation twice */
+		for (i=0; ((i<2) && (self->media_busy == FALSE)); i++) {
+			skb = skb_dequeue(&self->txq_ultra);
+			if (skb)
+				irlap_send_ui_frame(self, skb, CBROADCAST,
+						    CMD_FRAME);
+			else
+				break;
+			/* irlap_send_ui_frame() won't increase skb reference
+			 * count, so no dev_kfree_skb() - Jean II */
+		}
+		if (i == 2) {
+			/* Force us to listen 500 ms again */
+			irda_device_set_media_busy(self->netdev, TRUE);
+		}
+		break;
+	}
+	case RECV_UI_FRAME:
+		/* Only accept broadcast frames in NDM mode */
+		if (info->caddr != CBROADCAST) {
+			IRDA_DEBUG(0, "%s(), not a broadcast frame!\n",
+				   __FUNCTION__);
+		} else
+			irlap_unitdata_indication(self, skb);
+		break;
+#endif /* CONFIG_IRDA_ULTRA */
+	case RECV_TEST_CMD:
+		/* Remove test frame header */
+		skb_pull(skb, sizeof(struct test_frame));
+
+		/*
+		 * Send response. This skb will not be sent out again, and
+		 * will only be used to send out the same info as the cmd
+		 */
+		irlap_send_test_frame(self, CBROADCAST, info->daddr, skb);
+		break;
+	case RECV_TEST_RSP:
+		IRDA_DEBUG(0, "%s() not implemented!\n", __FUNCTION__);
+		break;
+	default:
+		IRDA_DEBUG(2, "%s(), Unknown event %s\n", __FUNCTION__,
+			   irlap_event[event]);
+
+		ret = -1;
+		break;
+	}
+	return ret;
+}
+
+/*
+ * Function irlap_state_query (event, skb, info)
+ *
+ *    QUERY state
+ *
+ */
+static int irlap_state_query(struct irlap_cb *self, IRLAP_EVENT event,
+			     struct sk_buff *skb, struct irlap_info *info)
+{
+	int ret = 0;
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
+
+	switch (event) {
+	case RECV_DISCOVERY_XID_RSP:
+		IRDA_ASSERT(info != NULL, return -1;);
+		IRDA_ASSERT(info->discovery != NULL, return -1;);
+
+		IRDA_DEBUG(4, "%s(), daddr=%08x\n", __FUNCTION__,
+			   info->discovery->data.daddr);
+
+		if (!self->discovery_log) {
+			IRDA_WARNING("%s: discovery log is gone! "
+				     "maybe the discovery timeout has been set"
+				     " to short?\n", __FUNCTION__);
+			break;
+		}
+		hashbin_insert(self->discovery_log,
+			       (irda_queue_t *) info->discovery,
+			       info->discovery->data.daddr, NULL);
+
+		/* Keep state */
+		/* irlap_next_state(self, LAP_QUERY);  */
+
+		break;
+	case RECV_DISCOVERY_XID_CMD:
+		/* Yes, it is possible to receive those frames in this mode.
+		 * Note that most often the last discovery request won't
+		 * occur here but in NDM state (see my comment there).
+		 * What should we do ?
+		 * Not much. We are currently performing our own discovery,
+		 * therefore we can't answer those frames. We don't want
+		 * to change state either. We just pass the info to
+		 * IrLMP who will put it in the log (and post an event).
+		 * Jean II
+		 */
+
+		IRDA_ASSERT(info != NULL, return -1;);
+
+		IRDA_DEBUG(1, "%s(), Receiving discovery request (s = %d) while performing discovery :-(\n", __FUNCTION__, info->s);
+
+		/* Last discovery request ? */
+		if (info->s == 0xff)
+			irlap_discovery_indication(self, info->discovery);
+		break;
+	case SLOT_TIMER_EXPIRED:
+		/*
+		 * Wait a little longer if we detect an incoming frame. This
+		 * is not mentioned in the spec, but is a good thing to do,
+		 * since we want to work even with devices that violate the
+		 * timing requirements.
+		 */
+		if (irda_device_is_receiving(self->netdev) && !self->add_wait) {
+			IRDA_DEBUG(2, "%s(), device is slow to answer, "
+				   "waiting some more!\n", __FUNCTION__);
+			irlap_start_slot_timer(self, msecs_to_jiffies(10));
+			self->add_wait = TRUE;
+			return ret;
+		}
+		self->add_wait = FALSE;
+
+		if (self->s < self->S) {
+			irlap_send_discovery_xid_frame(self, self->S,
+						       self->s, TRUE,
+						       self->discovery_cmd);
+			self->s++;
+			irlap_start_slot_timer(self, self->slot_timeout);
+
+			/* Keep state */
+			irlap_next_state(self, LAP_QUERY);
+		} else {
+			/* This is the final slot! */
+			irlap_send_discovery_xid_frame(self, self->S, 0xff,
+						       TRUE,
+						       self->discovery_cmd);
+
+			/* Always switch state before calling upper layers */
+			irlap_next_state(self, LAP_NDM);
+
+			/*
+			 *  We are now finished with the discovery procedure,
+			 *  so now we must return the results
+			 */
+			irlap_discovery_confirm(self, self->discovery_log);
+
+			/* IrLMP should now have taken care of the log */
+			self->discovery_log = NULL;
+		}
+		break;
+	default:
+		IRDA_DEBUG(2, "%s(), Unknown event %s\n", __FUNCTION__,
+			   irlap_event[event]);
+
+		ret = -1;
+		break;
+	}
+	return ret;
+}
+
+/*
+ * Function irlap_state_reply (self, event, skb, info)
+ *
+ *    REPLY, we have received a XID discovery frame from a device and we
+ *    are waiting for the right time slot to send a response XID frame
+ *
+ */
+static int irlap_state_reply(struct irlap_cb *self, IRLAP_EVENT event,
+			     struct sk_buff *skb, struct irlap_info *info)
+{
+	discovery_t *discovery_rsp;
+	int ret=0;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
+
+	switch (event) {
+	case QUERY_TIMER_EXPIRED:
+		IRDA_DEBUG(0, "%s(), QUERY_TIMER_EXPIRED <%ld>\n",
+			   __FUNCTION__, jiffies);
+		irlap_next_state(self, LAP_NDM);
+		break;
+	case RECV_DISCOVERY_XID_CMD:
+		IRDA_ASSERT(info != NULL, return -1;);
+		/* Last frame? */
+		if (info->s == 0xff) {
+			del_timer(&self->query_timer);
+
+			/* info->log.condition = REMOTE; */
+
+			/* Always switch state before calling upper layers */
+			irlap_next_state(self, LAP_NDM);
+
+			irlap_discovery_indication(self, info->discovery);
+		} else {
+			/* If it's our slot, send our reply */
+			if ((info->s >= self->slot) && (!self->frame_sent)) {
+				discovery_rsp = irlmp_get_discovery_response();
+				discovery_rsp->data.daddr = info->daddr;
+
+				irlap_send_discovery_xid_frame(self, info->S,
+							       self->slot,
+							       FALSE,
+							       discovery_rsp);
+
+				self->frame_sent = TRUE;
+			}
+			/* Readjust our timer to accomodate devices
+			 * doing faster or slower discovery than us...
+			 * Jean II */
+			irlap_start_query_timer(self, info->S, info->s);
+
+			/* Keep state */
+			//irlap_next_state(self, LAP_REPLY);
+		}
+		break;
+	default:
+		IRDA_DEBUG(1, "%s(), Unknown event %d, %s\n", __FUNCTION__,
+			   event, irlap_event[event]);
+
+		ret = -1;
+		break;
+	}
+	return ret;
+}
+
+/*
+ * Function irlap_state_conn (event, skb, info)
+ *
+ *    CONN, we have received a SNRM command and is waiting for the upper
+ *    layer to accept or refuse connection
+ *
+ */
+static int irlap_state_conn(struct irlap_cb *self, IRLAP_EVENT event,
+			    struct sk_buff *skb, struct irlap_info *info)
+{
+	int ret = 0;
+
+	IRDA_DEBUG(4, "%s(), event=%s\n", __FUNCTION__, irlap_event[ event]);
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
+
+	switch (event) {
+	case CONNECT_RESPONSE:
+		skb_pull(skb, sizeof(struct snrm_frame));
+
+		IRDA_ASSERT(self->netdev != NULL, return -1;);
+
+		irlap_qos_negotiate(self, skb);
+
+		irlap_initiate_connection_state(self);
+
+		/*
+		 * Applying the parameters now will make sure we change speed
+		 * *after* we have sent the next frame
+		 */
+		irlap_apply_connection_parameters(self, FALSE);
+
+		/*
+		 * Sending this frame will force a speed change after it has
+		 * been sent (i.e. the frame will be sent at 9600).
+		 */
+		irlap_send_ua_response_frame(self, &self->qos_rx);
+
+#if 0
+		/*
+		 * We are allowed to send two frames, but this may increase
+		 * the connect latency, so lets not do it for now.
+		 */
+		/* This is full of good intentions, but doesn't work in
+		 * practice.
+		 * After sending the first UA response, we switch the
+		 * dongle to the negotiated speed, which is usually
+		 * different than 9600 kb/s.
+		 * From there, there is two solutions :
+		 * 1) The other end has received the first UA response :
+		 * it will set up the connection, move to state LAP_NRM_P,
+		 * and will ignore and drop the second UA response.
+		 * Actually, it's even worse : the other side will almost
+		 * immediately send a RR that will likely collide with the
+		 * UA response (depending on negotiated turnaround).
+		 * 2) The other end has not received the first UA response,
+		 * will stay at 9600 and will never see the second UA response.
+		 * Jean II */
+		irlap_send_ua_response_frame(self, &self->qos_rx);
+#endif
+
+		/*
+		 *  The WD-timer could be set to the duration of the P-timer
+		 *  for this case, but it is recommended to use twice the
+		 *  value (note 3 IrLAP p. 60).
+		 */
+		irlap_start_wd_timer(self, self->wd_timeout);
+		irlap_next_state(self, LAP_NRM_S);
+
+		break;
+	case RECV_DISCOVERY_XID_CMD:
+		IRDA_DEBUG(3, "%s(), event RECV_DISCOVER_XID_CMD!\n",
+			   __FUNCTION__);
+		irlap_next_state(self, LAP_NDM);
+
+		break;
+	case DISCONNECT_REQUEST:
+		IRDA_DEBUG(0, "%s(), Disconnect request!\n", __FUNCTION__);
+		irlap_send_dm_frame(self);
+		irlap_next_state( self, LAP_NDM);
+		irlap_disconnect_indication(self, LAP_DISC_INDICATION);
+		break;
+	default:
+		IRDA_DEBUG(1, "%s(), Unknown event %d, %s\n", __FUNCTION__, 
+			   event, irlap_event[event]);
+
+		ret = -1;
+		break;
+	}
+
+	return ret;
+}
+
+/*
+ * Function irlap_state_setup (event, skb, frame)
+ *
+ *    SETUP state, The local layer has transmitted a SNRM command frame to
+ *    a remote peer layer and is awaiting a reply .
+ *
+ */
+static int irlap_state_setup(struct irlap_cb *self, IRLAP_EVENT event,
+			     struct sk_buff *skb, struct irlap_info *info)
+{
+	int ret = 0;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
+
+	switch (event) {
+	case FINAL_TIMER_EXPIRED:
+		if (self->retry_count < self->N3) {
+/*
+ *  Perform random backoff, Wait a random number of time units, minimum
+ *  duration half the time taken to transmitt a SNRM frame, maximum duration
+ *  1.5 times the time taken to transmit a SNRM frame. So this time should
+ *  between 15 msecs and 45 msecs.
+ */
+			irlap_start_backoff_timer(self, msecs_to_jiffies(20 +
+						        (jiffies % 30)));
+		} else {
+			/* Always switch state before calling upper layers */
+			irlap_next_state(self, LAP_NDM);
+
+			irlap_disconnect_indication(self, LAP_FOUND_NONE);
+		}
+		break;
+	case BACKOFF_TIMER_EXPIRED:
+		irlap_send_snrm_frame(self, &self->qos_rx);
+		irlap_start_final_timer(self, self->final_timeout);
+		self->retry_count++;
+		break;
+	case RECV_SNRM_CMD:
+		IRDA_DEBUG(4, "%s(), SNRM battle!\n", __FUNCTION__);
+
+		IRDA_ASSERT(skb != NULL, return 0;);
+		IRDA_ASSERT(info != NULL, return 0;);
+
+		/*
+		 *  The device with the largest device address wins the battle
+		 *  (both have sent a SNRM command!)
+		 */
+		if (info &&(info->daddr > self->saddr)) {
+			del_timer(&self->final_timer);
+			irlap_initiate_connection_state(self);
+
+			IRDA_ASSERT(self->netdev != NULL, return -1;);
+
+			skb_pull(skb, sizeof(struct snrm_frame));
+
+			irlap_qos_negotiate(self, skb);
+
+			/* Send UA frame and then change link settings */
+			irlap_apply_connection_parameters(self, FALSE);
+			irlap_send_ua_response_frame(self, &self->qos_rx);
+
+			irlap_next_state(self, LAP_NRM_S);
+			irlap_connect_confirm(self, skb);
+
+			/*
+			 *  The WD-timer could be set to the duration of the
+			 *  P-timer for this case, but it is recommended
+			 *  to use twice the value (note 3 IrLAP p. 60).
+			 */
+			irlap_start_wd_timer(self, self->wd_timeout);
+		} else {
+			/* We just ignore the other device! */
+			irlap_next_state(self, LAP_SETUP);
+		}
+		break;
+	case RECV_UA_RSP:
+		/* Stop F-timer */
+		del_timer(&self->final_timer);
+
+		/* Initiate connection state */
+		irlap_initiate_connection_state(self);
+
+		/* Negotiate connection parameters */
+		IRDA_ASSERT(skb->len > 10, return -1;);
+
+		skb_pull(skb, sizeof(struct ua_frame));
+
+		IRDA_ASSERT(self->netdev != NULL, return -1;);
+
+		irlap_qos_negotiate(self, skb);
+
+		/* Set the new link setting *now* (before the rr frame) */
+		irlap_apply_connection_parameters(self, TRUE);
+		self->retry_count = 0;
+
+		/* Wait for turnaround time to give a chance to the other
+		 * device to be ready to receive us.
+		 * Note : the time to switch speed is typically larger
+		 * than the turnaround time, but as we don't have the other
+		 * side speed switch time, that's our best guess...
+		 * Jean II */
+		irlap_wait_min_turn_around(self, &self->qos_tx);
+
+		/* This frame will actually be sent at the new speed */
+		irlap_send_rr_frame(self, CMD_FRAME);
+
+		/* The timer is set to half the normal timer to quickly
+		 * detect a failure to negociate the new connection
+		 * parameters. IrLAP 6.11.3.2, note 3.
+		 * Note that currently we don't process this failure
+		 * properly, as we should do a quick disconnect.
+		 * Jean II */
+		irlap_start_final_timer(self, self->final_timeout/2);
+		irlap_next_state(self, LAP_NRM_P);
+
+		irlap_connect_confirm(self, skb);
+		break;
+	case RECV_DM_RSP:     /* FALLTHROUGH */
+	case RECV_DISC_CMD:
+		del_timer(&self->final_timer);
+		irlap_next_state(self, LAP_NDM);
+
+		irlap_disconnect_indication(self, LAP_DISC_INDICATION);
+		break;
+	default:
+		IRDA_DEBUG(1, "%s(), Unknown event %d, %s\n", __FUNCTION__,
+			   event, irlap_event[event]);
+
+		ret = -1;
+		break;
+	}
+	return ret;
+}
+
+/*
+ * Function irlap_state_offline (self, event, skb, info)
+ *
+ *    OFFLINE state, not used for now!
+ *
+ */
+static int irlap_state_offline(struct irlap_cb *self, IRLAP_EVENT event,
+			       struct sk_buff *skb, struct irlap_info *info)
+{
+	IRDA_DEBUG( 0, "%s(), Unknown event\n", __FUNCTION__);
+
+	return -1;
+}
+
+/*
+ * Function irlap_state_xmit_p (self, event, skb, info)
+ *
+ *    XMIT, Only the primary station has right to transmit, and we
+ *    therefore do not expect to receive any transmissions from other
+ *    stations.
+ *
+ */
+static int irlap_state_xmit_p(struct irlap_cb *self, IRLAP_EVENT event,
+			      struct sk_buff *skb, struct irlap_info *info)
+{
+	int ret = 0;
+
+	switch (event) {
+	case SEND_I_CMD:
+		/*
+		 *  Only send frame if send-window > 0.
+		 */
+		if ((self->window > 0) && (!self->remote_busy)) {
+			int nextfit;
+#ifdef CONFIG_IRDA_DYNAMIC_WINDOW
+			struct sk_buff *skb_next;
+
+			/* With DYNAMIC_WINDOW, we keep the window size
+			 * maximum, and adapt on the packets we are sending.
+			 * At 115k, we can send only 2 packets of 2048 bytes
+			 * in a 500 ms turnaround. Without this option, we
+			 * would always limit the window to 2. With this
+			 * option, if we send smaller packets, we can send
+			 * up to 7 of them (always depending on QoS).
+			 * Jean II */
+
+			/* Look at the next skb. This is safe, as we are
+			 * the only consumer of the Tx queue (if we are not,
+			 * we have other problems) - Jean II */
+			skb_next = skb_peek(&self->txq);
+
+			/* Check if a subsequent skb exist and would fit in
+			 * the current window (with respect to turnaround
+			 * time).
+			 * This allow us to properly mark the current packet
+			 * with the pf bit, to avoid falling back on the
+			 * second test below, and avoid waiting the
+			 * end of the window and sending a extra RR.
+			 * Note : (skb_next != NULL) <=> (skb_queue_len() > 0)
+			 * Jean II */
+			nextfit = ((skb_next != NULL) &&
+				   ((skb_next->len + skb->len) <=
+				    self->bytes_left));
+
+			/*
+			 * The current packet may not fit ! Because of test
+			 * above, this should not happen any more !!!
+			 *  Test if we have transmitted more bytes over the
+			 *  link than its possible to do with the current
+			 *  speed and turn-around-time.
+			 */
+			if((!nextfit) && (skb->len > self->bytes_left)) {
+				IRDA_DEBUG(0, "%s(), Not allowed to transmit"
+					   " more bytes!\n", __FUNCTION__);
+				/* Requeue the skb */
+				skb_queue_head(&self->txq, skb_get(skb));
+				/*
+				 *  We should switch state to LAP_NRM_P, but
+				 *  that is not possible since we must be sure
+				 *  that we poll the other side. Since we have
+				 *  used up our time, the poll timer should
+				 *  trigger anyway now, so we just wait for it
+				 *  DB
+				 */
+				/*
+				 * Sorry, but that's not totally true. If
+				 * we send 2000B packets, we may wait another
+				 * 1000B until our turnaround expire. That's
+				 * why we need to be proactive in avoiding
+				 * coming here. - Jean II
+				 */
+				return -EPROTO;
+			}
+
+			/* Substract space used by this skb */
+			self->bytes_left -= skb->len;
+#else	/* CONFIG_IRDA_DYNAMIC_WINDOW */
+			/* Window has been adjusted for the max packet
+			 * size, so much simpler... - Jean II */
+			nextfit = (skb_queue_len(&self->txq) > 0);
+#endif	/* CONFIG_IRDA_DYNAMIC_WINDOW */
+			/*
+			 *  Send data with poll bit cleared only if window > 1
+			 *  and there is more frames after this one to be sent
+			 */
+			if ((self->window > 1) && (nextfit)) {
+				/* More packet to send in current window */
+				irlap_send_data_primary(self, skb);
+				irlap_next_state(self, LAP_XMIT_P);
+			} else {
+				/* Final packet of window */
+				irlap_send_data_primary_poll(self, skb);
+				irlap_next_state(self, LAP_NRM_P);
+
+				/*
+				 * Make sure state machine does not try to send
+				 * any more frames
+				 */
+				ret = -EPROTO;
+			}
+#ifdef CONFIG_IRDA_FAST_RR
+			/* Peer may want to reply immediately */
+			self->fast_RR = FALSE;
+#endif /* CONFIG_IRDA_FAST_RR */
+		} else {
+			IRDA_DEBUG(4, "%s(), Unable to send! remote busy?\n",
+				   __FUNCTION__);
+			skb_queue_head(&self->txq, skb_get(skb));
+
+			/*
+			 *  The next ret is important, because it tells
+			 *  irlap_next_state _not_ to deliver more frames
+			 */
+			ret = -EPROTO;
+		}
+		break;
+	case POLL_TIMER_EXPIRED:
+		IRDA_DEBUG(3, "%s(), POLL_TIMER_EXPIRED <%ld>\n",
+			    __FUNCTION__, jiffies);
+		irlap_send_rr_frame(self, CMD_FRAME);
+		/* Return to NRM properly - Jean II  */
+		self->window = self->window_size;
+#ifdef CONFIG_IRDA_DYNAMIC_WINDOW
+		/* Allowed to transmit a maximum number of bytes again. */
+		self->bytes_left = self->line_capacity;
+#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */
+		irlap_start_final_timer(self, self->final_timeout);
+		irlap_next_state(self, LAP_NRM_P);
+		break;
+	case DISCONNECT_REQUEST:
+		del_timer(&self->poll_timer);
+		irlap_wait_min_turn_around(self, &self->qos_tx);
+		irlap_send_disc_frame(self);
+		irlap_flush_all_queues(self);
+		irlap_start_final_timer(self, self->final_timeout);
+		self->retry_count = 0;
+		irlap_next_state(self, LAP_PCLOSE);
+		break;
+	case DATA_REQUEST:
+		/* Nothing to do, irlap_do_event() will send the packet
+		 * when we return... - Jean II */
+		break;
+	default:
+		IRDA_DEBUG(0, "%s(), Unknown event %s\n",
+			   __FUNCTION__, irlap_event[event]);
+
+		ret = -EINVAL;
+		break;
+	}
+	return ret;
+}
+
+/*
+ * Function irlap_state_pclose (event, skb, info)
+ *
+ *    PCLOSE state
+ */
+static int irlap_state_pclose(struct irlap_cb *self, IRLAP_EVENT event,
+			      struct sk_buff *skb, struct irlap_info *info)
+{
+	int ret = 0;
+
+	IRDA_DEBUG(1, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
+
+	switch (event) {
+	case RECV_UA_RSP: /* FALLTHROUGH */
+	case RECV_DM_RSP:
+		del_timer(&self->final_timer);
+
+		/* Set new link parameters */
+		irlap_apply_default_connection_parameters(self);
+
+		/* Always switch state before calling upper layers */
+		irlap_next_state(self, LAP_NDM);
+
+		irlap_disconnect_indication(self, LAP_DISC_INDICATION);
+		break;
+	case FINAL_TIMER_EXPIRED:
+		if (self->retry_count < self->N3) {
+			irlap_wait_min_turn_around(self, &self->qos_tx);
+			irlap_send_disc_frame(self);
+			irlap_start_final_timer(self, self->final_timeout);
+			self->retry_count++;
+			/* Keep state */
+		} else {
+			irlap_apply_default_connection_parameters(self);
+
+			/*  Always switch state before calling upper layers */
+			irlap_next_state(self, LAP_NDM);
+
+			irlap_disconnect_indication(self, LAP_NO_RESPONSE);
+		}
+		break;
+	default:
+		IRDA_DEBUG(1, "%s(), Unknown event %d\n", __FUNCTION__, event);
+
+		ret = -1;
+		break;
+	}
+	return ret;
+}
+
+/*
+ * Function irlap_state_nrm_p (self, event, skb, info)
+ *
+ *   NRM_P (Normal Response Mode as Primary), The primary station has given
+ *   permissions to a secondary station to transmit IrLAP resonse frames
+ *   (by sending a frame with the P bit set). The primary station will not
+ *   transmit any frames and is expecting to receive frames only from the
+ *   secondary to which transmission permissions has been given.
+ */
+static int irlap_state_nrm_p(struct irlap_cb *self, IRLAP_EVENT event,
+			     struct sk_buff *skb, struct irlap_info *info)
+{
+	int ret = 0;
+	int ns_status;
+	int nr_status;
+
+	switch (event) {
+	case RECV_I_RSP: /* Optimize for the common case */
+		/* FIXME: must check for remote_busy below */
+#ifdef CONFIG_IRDA_FAST_RR
+		/*
+		 *  Reset the fast_RR so we can use the fast RR code with
+		 *  full speed the next time since peer may have more frames
+		 *  to transmitt
+		 */
+		self->fast_RR = FALSE;
+#endif /* CONFIG_IRDA_FAST_RR */
+		IRDA_ASSERT( info != NULL, return -1;);
+
+		ns_status = irlap_validate_ns_received(self, info->ns);
+		nr_status = irlap_validate_nr_received(self, info->nr);
+
+		/*
+		 *  Check for expected I(nformation) frame
+		 */
+		if ((ns_status == NS_EXPECTED) && (nr_status == NR_EXPECTED)) {
+
+			/* Update Vr (next frame for us to receive) */
+			self->vr = (self->vr + 1) % 8;
+
+			/* Update Nr received, cleanup our retry queue */
+			irlap_update_nr_received(self, info->nr);
+
+			/*
+			 *  Got expected NR, so reset the
+			 *  retry_count. This is not done by IrLAP spec,
+			 *  which is strange!
+			 */
+			self->retry_count = 0;
+			self->ack_required = TRUE;
+
+			/*  poll bit cleared?  */
+			if (!info->pf) {
+				/* Keep state, do not move this line */
+				irlap_next_state(self, LAP_NRM_P);
+
+				irlap_data_indication(self, skb, FALSE);
+			} else {
+				/* No longer waiting for pf */
+				del_timer(&self->final_timer);
+
+				irlap_wait_min_turn_around(self, &self->qos_tx);
+
+				/* Call higher layer *before* changing state
+				 * to give them a chance to send data in the
+				 * next LAP frame.
+				 * Jean II */
+				irlap_data_indication(self, skb, FALSE);
+
+				/* XMIT states are the most dangerous state
+				 * to be in, because user requests are
+				 * processed directly and may change state.
+				 * On the other hand, in NDM_P, those
+				 * requests are queued and we will process
+				 * them when we return to irlap_do_event().
+				 * Jean II
+				 */
+				irlap_next_state(self, LAP_XMIT_P);
+
+				/* This is the last frame.
+				 * Make sure it's always called in XMIT state.
+				 * - Jean II */
+				irlap_start_poll_timer(self, self->poll_timeout);
+			}
+			break;
+
+		}
+		/* Unexpected next to send (Ns) */
+		if ((ns_status == NS_UNEXPECTED) && (nr_status == NR_EXPECTED))
+		{
+			if (!info->pf) {
+				irlap_update_nr_received(self, info->nr);
+
+				/*
+				 *  Wait until the last frame before doing
+				 *  anything
+				 */
+
+				/* Keep state */
+				irlap_next_state(self, LAP_NRM_P);
+			} else {
+				IRDA_DEBUG(4,
+				       "%s(), missing or duplicate frame!\n",
+					   __FUNCTION__);
+
+				/* Update Nr received */
+				irlap_update_nr_received(self, info->nr);
+
+				irlap_wait_min_turn_around(self, &self->qos_tx);
+				irlap_send_rr_frame(self, CMD_FRAME);
+
+				self->ack_required = FALSE;
+
+				irlap_start_final_timer(self, self->final_timeout);
+				irlap_next_state(self, LAP_NRM_P);
+			}
+			break;
+		}
+		/*
+		 *  Unexpected next to receive (Nr)
+		 */
+		if ((ns_status == NS_EXPECTED) && (nr_status == NR_UNEXPECTED))
+		{
+			if (info->pf) {
+				self->vr = (self->vr + 1) % 8;
+
+				/* Update Nr received */
+				irlap_update_nr_received(self, info->nr);
+
+				/* Resend rejected frames */
+				irlap_resend_rejected_frames(self, CMD_FRAME);
+
+				self->ack_required = FALSE;
+
+				/* Make sure we account for the time
+				 * to transmit our frames. See comemnts
+				 * in irlap_send_data_primary_poll().
+				 * Jean II */
+				irlap_start_final_timer(self, 2 * self->final_timeout);
+
+				/* Keep state, do not move this line */
+				irlap_next_state(self, LAP_NRM_P);
+
+				irlap_data_indication(self, skb, FALSE);
+			} else {
+				/*
+				 *  Do not resend frames until the last
+				 *  frame has arrived from the other
+				 *  device. This is not documented in
+				 *  IrLAP!!
+				 */
+				self->vr = (self->vr + 1) % 8;
+
+				/* Update Nr received */
+				irlap_update_nr_received(self, info->nr);
+
+				self->ack_required = FALSE;
+
+				/* Keep state, do not move this line!*/
+				irlap_next_state(self, LAP_NRM_P);
+
+				irlap_data_indication(self, skb, FALSE);
+			}
+			break;
+		}
+		/*
+		 *  Unexpected next to send (Ns) and next to receive (Nr)
+		 *  Not documented by IrLAP!
+		 */
+		if ((ns_status == NS_UNEXPECTED) &&
+		    (nr_status == NR_UNEXPECTED))
+		{
+			IRDA_DEBUG(4, "%s(), unexpected nr and ns!\n",
+				   __FUNCTION__);
+			if (info->pf) {
+				/* Resend rejected frames */
+				irlap_resend_rejected_frames(self, CMD_FRAME);
+
+				/* Give peer some time to retransmit! 
+				 * But account for our own Tx. */
+				irlap_start_final_timer(self, 2 * self->final_timeout);
+
+				/* Keep state, do not move this line */
+				irlap_next_state(self, LAP_NRM_P);
+			} else {
+				/* Update Nr received */
+				/* irlap_update_nr_received( info->nr); */
+
+				self->ack_required = FALSE;
+			}
+			break;
+		}
+
+		/*
+		 *  Invalid NR or NS
+		 */
+		if ((nr_status == NR_INVALID) || (ns_status == NS_INVALID)) {
+			if (info->pf) {
+				del_timer(&self->final_timer);
+
+				irlap_next_state(self, LAP_RESET_WAIT);
+
+				irlap_disconnect_indication(self, LAP_RESET_INDICATION);
+				self->xmitflag = TRUE;
+			} else {
+				del_timer(&self->final_timer);
+
+				irlap_disconnect_indication(self, LAP_RESET_INDICATION);
+
+				self->xmitflag = FALSE;
+			}
+			break;
+		}
+		IRDA_DEBUG(1, "%s(), Not implemented!\n", __FUNCTION__);
+		IRDA_DEBUG(1, "%s(), event=%s, ns_status=%d, nr_status=%d\n",
+		       __FUNCTION__, irlap_event[event], ns_status, nr_status);
+		break;
+	case RECV_UI_FRAME:
+		/* Poll bit cleared? */
+		if (!info->pf) {
+			irlap_data_indication(self, skb, TRUE);
+			irlap_next_state(self, LAP_NRM_P);
+		} else {
+			del_timer(&self->final_timer);
+			irlap_data_indication(self, skb, TRUE);
+			irlap_next_state(self, LAP_XMIT_P);
+			IRDA_DEBUG(1, "%s: RECV_UI_FRAME: next state %s\n", __FUNCTION__, irlap_state[self->state]);
+			irlap_start_poll_timer(self, self->poll_timeout);
+		}
+		break;
+	case RECV_RR_RSP:
+		/*
+		 *  If you get a RR, the remote isn't busy anymore,
+		 *  no matter what the NR
+		 */
+		self->remote_busy = FALSE;
+
+		/*
+		 *  Nr as expected?
+		 */
+		ret = irlap_validate_nr_received(self, info->nr);
+		if (ret == NR_EXPECTED) {
+			/* Stop final timer */
+			del_timer(&self->final_timer);
+
+			/* Update Nr received */
+			irlap_update_nr_received(self, info->nr);
+
+			/*
+			 *  Got expected NR, so reset the retry_count. This
+			 *  is not done by the IrLAP standard , which is
+			 *  strange! DB.
+			 */
+			self->retry_count = 0;
+			irlap_wait_min_turn_around(self, &self->qos_tx);
+
+			irlap_next_state(self, LAP_XMIT_P);
+
+			/* Start poll timer */
+			irlap_start_poll_timer(self, self->poll_timeout);
+		} else if (ret == NR_UNEXPECTED) {
+			IRDA_ASSERT(info != NULL, return -1;);
+			/*
+			 *  Unexpected nr!
+			 */
+
+			/* Update Nr received */
+			irlap_update_nr_received(self, info->nr);
+
+			IRDA_DEBUG(4, "RECV_RR_FRAME: Retrans:%d, nr=%d, va=%d, "
+			      "vs=%d, vr=%d\n",
+			      self->retry_count, info->nr, self->va,
+			      self->vs, self->vr);
+
+			/* Resend rejected frames */
+			irlap_resend_rejected_frames(self, CMD_FRAME);
+
+			/* Final timer ??? Jean II */
+
+			irlap_next_state(self, LAP_NRM_P);
+		} else if (ret == NR_INVALID) {
+			IRDA_DEBUG(1, "%s(), Received RR with "
+				   "invalid nr !\n", __FUNCTION__);
+			del_timer(&self->final_timer);
+
+			irlap_next_state(self, LAP_RESET_WAIT);
+
+			irlap_disconnect_indication(self, LAP_RESET_INDICATION);
+			self->xmitflag = TRUE;
+		}
+		break;
+	case RECV_RNR_RSP:
+		IRDA_ASSERT(info != NULL, return -1;);
+
+		/* Stop final timer */
+		del_timer(&self->final_timer);
+		self->remote_busy = TRUE;
+
+		/* Update Nr received */
+		irlap_update_nr_received(self, info->nr);
+		irlap_next_state(self, LAP_XMIT_P);
+
+		/* Start poll timer */
+		irlap_start_poll_timer(self, self->poll_timeout);
+		break;
+	case RECV_FRMR_RSP:
+		del_timer(&self->final_timer);
+		self->xmitflag = TRUE;
+		irlap_next_state(self, LAP_RESET_WAIT);
+		irlap_reset_indication(self);
+		break;
+	case FINAL_TIMER_EXPIRED:
+		/*
+		 *  We are allowed to wait for additional 300 ms if
+		 *  final timer expires when we are in the middle
+		 *  of receiving a frame (page 45, IrLAP). Check that
+		 *  we only do this once for each frame.
+		 */
+		if (irda_device_is_receiving(self->netdev) && !self->add_wait) {
+			IRDA_DEBUG(1, "FINAL_TIMER_EXPIRED when receiving a "
+			      "frame! Waiting a little bit more!\n");
+			irlap_start_final_timer(self, msecs_to_jiffies(300));
+
+			/*
+			 *  Don't allow this to happen one more time in a row,
+			 *  or else we can get a pretty tight loop here if
+			 *  if we only receive half a frame. DB.
+			 */
+			self->add_wait = TRUE;
+			break;
+		}
+		self->add_wait = FALSE;
+
+		/* N2 is the disconnect timer. Until we reach it, we retry */
+		if (self->retry_count < self->N2) {
+			/* Retry sending the pf bit to the secondary */
+			irlap_wait_min_turn_around(self, &self->qos_tx);
+			irlap_send_rr_frame(self, CMD_FRAME);
+
+			irlap_start_final_timer(self, self->final_timeout);
+			self->retry_count++;
+			IRDA_DEBUG(4, "irlap_state_nrm_p: FINAL_TIMER_EXPIRED:"
+				   " retry_count=%d\n", self->retry_count);
+
+			/* Early warning event. I'm using a pretty liberal
+			 * interpretation of the spec and generate an event
+			 * every time the timer is multiple of N1 (and not
+			 * only the first time). This allow application
+			 * to know precisely if connectivity restart...
+			 * Jean II */
+			if((self->retry_count % self->N1) == 0)
+				irlap_status_indication(self,
+							STATUS_NO_ACTIVITY);
+
+			/* Keep state */
+		} else {
+			irlap_apply_default_connection_parameters(self);
+
+			/* Always switch state before calling upper layers */
+			irlap_next_state(self, LAP_NDM);
+			irlap_disconnect_indication(self, LAP_NO_RESPONSE);
+		}
+		break;
+	case RECV_REJ_RSP:
+		irlap_update_nr_received(self, info->nr);
+		if (self->remote_busy) {
+			irlap_wait_min_turn_around(self, &self->qos_tx);
+			irlap_send_rr_frame(self, CMD_FRAME);
+		} else
+			irlap_resend_rejected_frames(self, CMD_FRAME);
+		irlap_start_final_timer(self, 2 * self->final_timeout);
+		break;
+	case RECV_SREJ_RSP:
+		irlap_update_nr_received(self, info->nr);
+		if (self->remote_busy) {
+			irlap_wait_min_turn_around(self, &self->qos_tx);
+			irlap_send_rr_frame(self, CMD_FRAME);
+		} else
+			irlap_resend_rejected_frame(self, CMD_FRAME);
+		irlap_start_final_timer(self, 2 * self->final_timeout);
+		break;
+	case RECV_RD_RSP:
+		IRDA_DEBUG(1, "%s(), RECV_RD_RSP\n", __FUNCTION__);
+
+		irlap_flush_all_queues(self);
+		irlap_next_state(self, LAP_XMIT_P);
+		/* Call back the LAP state machine to do a proper disconnect */
+		irlap_disconnect_request(self);
+		break;
+	default:
+		IRDA_DEBUG(1, "%s(), Unknown event %s\n",
+			    __FUNCTION__, irlap_event[event]);
+
+		ret = -1;
+		break;
+	}
+	return ret;
+}
+
+/*
+ * Function irlap_state_reset_wait (event, skb, info)
+ *
+ *    We have informed the service user of a reset condition, and is
+ *    awaiting reset of disconnect request.
+ *
+ */
+static int irlap_state_reset_wait(struct irlap_cb *self, IRLAP_EVENT event,
+				  struct sk_buff *skb, struct irlap_info *info)
+{
+	int ret = 0;
+
+	IRDA_DEBUG(3, "%s(), event = %s\n", __FUNCTION__, irlap_event[event]);
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
+
+	switch (event) {
+	case RESET_REQUEST:
+		if (self->xmitflag) {
+			irlap_wait_min_turn_around(self, &self->qos_tx);
+			irlap_send_snrm_frame(self, NULL);
+			irlap_start_final_timer(self, self->final_timeout);
+			irlap_next_state(self, LAP_RESET);
+		} else {
+			irlap_start_final_timer(self, self->final_timeout);
+			irlap_next_state(self, LAP_RESET);
+		}
+		break;
+	case DISCONNECT_REQUEST:
+		irlap_wait_min_turn_around( self, &self->qos_tx);
+		irlap_send_disc_frame( self);
+		irlap_flush_all_queues( self);
+		irlap_start_final_timer( self, self->final_timeout);
+		self->retry_count = 0;
+		irlap_next_state( self, LAP_PCLOSE);
+		break;
+	default:
+		IRDA_DEBUG(2, "%s(), Unknown event %s\n", __FUNCTION__,
+			   irlap_event[event]);
+
+		ret = -1;
+		break;
+	}
+	return ret;
+}
+
+/*
+ * Function irlap_state_reset (self, event, skb, info)
+ *
+ *    We have sent a SNRM reset command to the peer layer, and is awaiting
+ *    reply.
+ *
+ */
+static int irlap_state_reset(struct irlap_cb *self, IRLAP_EVENT event,
+			     struct sk_buff *skb, struct irlap_info *info)
+{
+	int ret = 0;
+
+	IRDA_DEBUG(3, "%s(), event = %s\n", __FUNCTION__, irlap_event[event]);
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
+
+	switch (event) {
+	case RECV_DISC_CMD:
+		del_timer(&self->final_timer);
+
+		irlap_apply_default_connection_parameters(self);
+
+		/* Always switch state before calling upper layers */
+		irlap_next_state(self, LAP_NDM);
+
+		irlap_disconnect_indication(self, LAP_NO_RESPONSE);
+
+		break;
+	case RECV_UA_RSP:
+		del_timer(&self->final_timer);
+
+		/* Initiate connection state */
+		irlap_initiate_connection_state(self);
+
+		irlap_reset_confirm();
+
+		self->remote_busy = FALSE;
+
+		irlap_next_state(self, LAP_XMIT_P);
+
+		irlap_start_poll_timer(self, self->poll_timeout);
+
+		break;
+	case FINAL_TIMER_EXPIRED:
+		if (self->retry_count < 3) {
+			irlap_wait_min_turn_around(self, &self->qos_tx);
+
+			IRDA_ASSERT(self->netdev != NULL, return -1;);
+			irlap_send_snrm_frame(self, self->qos_dev);
+
+			self->retry_count++; /* Experimental!! */
+
+			irlap_start_final_timer(self, self->final_timeout);
+			irlap_next_state(self, LAP_RESET);
+		} else if (self->retry_count >= self->N3) {
+			irlap_apply_default_connection_parameters(self);
+
+			/* Always switch state before calling upper layers */
+			irlap_next_state(self, LAP_NDM);
+
+			irlap_disconnect_indication(self, LAP_NO_RESPONSE);
+		}
+		break;
+	case RECV_SNRM_CMD:
+		/*
+		 * SNRM frame is not allowed to contain an I-field in this
+		 * state
+		 */
+		if (!info) {
+			IRDA_DEBUG(3, "%s(), RECV_SNRM_CMD\n", __FUNCTION__);
+			irlap_initiate_connection_state(self);
+			irlap_wait_min_turn_around(self, &self->qos_tx);
+			irlap_send_ua_response_frame(self, &self->qos_rx);
+			irlap_reset_confirm();
+			irlap_start_wd_timer(self, self->wd_timeout);
+			irlap_next_state(self, LAP_NDM);
+		} else {
+			IRDA_DEBUG(0,
+				   "%s(), SNRM frame contained an I field!\n",
+				   __FUNCTION__);
+		}
+		break;
+	default:
+		IRDA_DEBUG(1, "%s(), Unknown event %s\n",
+			   __FUNCTION__, irlap_event[event]);
+
+		ret = -1;
+		break;
+	}
+	return ret;
+}
+
+/*
+ * Function irlap_state_xmit_s (event, skb, info)
+ *
+ *   XMIT_S, The secondary station has been given the right to transmit,
+ *   and we therefor do not expect to receive any transmissions from other
+ *   stations.
+ */
+static int irlap_state_xmit_s(struct irlap_cb *self, IRLAP_EVENT event,
+			      struct sk_buff *skb, struct irlap_info *info)
+{
+	int ret = 0;
+
+	IRDA_DEBUG(4, "%s(), event=%s\n", __FUNCTION__, irlap_event[event]);
+
+	IRDA_ASSERT(self != NULL, return -ENODEV;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return -EBADR;);
+
+	switch (event) {
+	case SEND_I_CMD:
+		/*
+		 *  Send frame only if send window > 0
+		 */
+		if ((self->window > 0) && (!self->remote_busy)) {
+			int nextfit;
+#ifdef CONFIG_IRDA_DYNAMIC_WINDOW
+			struct sk_buff *skb_next;
+
+			/*
+			 * Same deal as in irlap_state_xmit_p(), so see
+			 * the comments at that point.
+			 * We are the secondary, so there are only subtle
+			 * differences. - Jean II
+			 */
+
+			/* Check if a subsequent skb exist and would fit in
+			 * the current window (with respect to turnaround
+			 * time). - Jean II */
+			skb_next = skb_peek(&self->txq);
+			nextfit = ((skb_next != NULL) &&
+				   ((skb_next->len + skb->len) <=
+				    self->bytes_left));
+
+			/*
+			 *  Test if we have transmitted more bytes over the
+			 *  link than its possible to do with the current
+			 *  speed and turn-around-time.
+			 */
+			if((!nextfit) && (skb->len > self->bytes_left)) {
+				IRDA_DEBUG(0, "%s(), Not allowed to transmit"
+					   " more bytes!\n", __FUNCTION__);
+				/* Requeue the skb */
+				skb_queue_head(&self->txq, skb_get(skb));
+
+				/*
+				 *  Switch to NRM_S, this is only possible
+				 *  when we are in secondary mode, since we
+				 *  must be sure that we don't miss any RR
+				 *  frames
+				 */
+				self->window = self->window_size;
+				self->bytes_left = self->line_capacity;
+				irlap_start_wd_timer(self, self->wd_timeout);
+
+				irlap_next_state(self, LAP_NRM_S);
+				/* Slight difference with primary :
+				 * here we would wait for the other side to
+				 * expire the turnaround. - Jean II */
+
+				return -EPROTO; /* Try again later */
+			}
+			/* Substract space used by this skb */
+			self->bytes_left -= skb->len;
+#else	/* CONFIG_IRDA_DYNAMIC_WINDOW */
+			/* Window has been adjusted for the max packet
+			 * size, so much simpler... - Jean II */
+			nextfit = (skb_queue_len(&self->txq) > 0);
+#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */
+			/*
+			 *  Send data with final bit cleared only if window > 1
+			 *  and there is more frames to be sent
+			 */
+			if ((self->window > 1) && (nextfit)) {
+				irlap_send_data_secondary(self, skb);
+				irlap_next_state(self, LAP_XMIT_S);
+			} else {
+				irlap_send_data_secondary_final(self, skb);
+				irlap_next_state(self, LAP_NRM_S);
+
+				/*
+				 * Make sure state machine does not try to send
+				 * any more frames
+				 */
+				ret = -EPROTO;
+			}
+		} else {
+			IRDA_DEBUG(2, "%s(), Unable to send!\n", __FUNCTION__);
+			skb_queue_head(&self->txq, skb_get(skb));
+			ret = -EPROTO;
+		}
+		break;
+	case DISCONNECT_REQUEST:
+		irlap_send_rd_frame(self);
+		irlap_flush_all_queues(self);
+		irlap_start_wd_timer(self, self->wd_timeout);
+		irlap_next_state(self, LAP_SCLOSE);
+		break;
+	case DATA_REQUEST:
+		/* Nothing to do, irlap_do_event() will send the packet
+		 * when we return... - Jean II */
+		break;
+	default:
+		IRDA_DEBUG(2, "%s(), Unknown event %s\n", __FUNCTION__,
+			   irlap_event[event]);
+
+		ret = -EINVAL;
+		break;
+	}
+	return ret;
+}
+
+/*
+ * Function irlap_state_nrm_s (event, skb, info)
+ *
+ *    NRM_S (Normal Response Mode as Secondary) state, in this state we are
+ *    expecting to receive frames from the primary station
+ *
+ */
+static int irlap_state_nrm_s(struct irlap_cb *self, IRLAP_EVENT event,
+			     struct sk_buff *skb, struct irlap_info *info)
+{
+	int ns_status;
+	int nr_status;
+	int ret = 0;
+
+	IRDA_DEBUG(4, "%s(), event=%s\n", __FUNCTION__, irlap_event[ event]);
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
+
+	switch (event) {
+	case RECV_I_CMD: /* Optimize for the common case */
+		/* FIXME: must check for remote_busy below */
+		IRDA_DEBUG(4, "%s(), event=%s nr=%d, vs=%d, ns=%d, "
+			   "vr=%d, pf=%d\n", __FUNCTION__,
+			   irlap_event[event], info->nr,
+			   self->vs, info->ns, self->vr, info->pf);
+
+		self->retry_count = 0;
+
+		ns_status = irlap_validate_ns_received(self, info->ns);
+		nr_status = irlap_validate_nr_received(self, info->nr);
+		/*
+		 *  Check for expected I(nformation) frame
+		 */
+		if ((ns_status == NS_EXPECTED) && (nr_status == NR_EXPECTED)) {
+
+			/* Update Vr (next frame for us to receive) */
+			self->vr = (self->vr + 1) % 8;
+
+			/* Update Nr received */
+			irlap_update_nr_received(self, info->nr);
+
+			/*
+			 *  poll bit cleared?
+			 */
+			if (!info->pf) {
+
+				self->ack_required = TRUE;
+
+				/*
+				 *  Starting WD-timer here is optional, but
+				 *  not recommended. Note 6 IrLAP p. 83
+				 */
+#if 0
+				irda_start_timer(WD_TIMER, self->wd_timeout);
+#endif
+				/* Keep state, do not move this line */
+				irlap_next_state(self, LAP_NRM_S);
+
+				irlap_data_indication(self, skb, FALSE);
+				break;
+			} else {
+				/*
+				 *  We should wait before sending RR, and
+				 *  also before changing to XMIT_S
+				 *  state. (note 1, IrLAP p. 82)
+				 */
+				irlap_wait_min_turn_around(self, &self->qos_tx);
+
+				/*
+				 * Give higher layers a chance to
+				 * immediately reply with some data before
+				 * we decide if we should send a RR frame
+				 * or not
+				 */
+				irlap_data_indication(self, skb, FALSE);
+
+				/* Any pending data requests?  */
+				if ((skb_queue_len(&self->txq) > 0) &&
+				    (self->window > 0))
+				{
+					self->ack_required = TRUE;
+
+					del_timer(&self->wd_timer);
+
+					irlap_next_state(self, LAP_XMIT_S);
+				} else {
+					irlap_send_rr_frame(self, RSP_FRAME);
+					irlap_start_wd_timer(self,
+							     self->wd_timeout);
+
+					/* Keep the state */
+					irlap_next_state(self, LAP_NRM_S);
+				}
+				break;
+			}
+		}
+		/*
+		 *  Check for Unexpected next to send (Ns)
+		 */
+		if ((ns_status == NS_UNEXPECTED) && (nr_status == NR_EXPECTED))
+		{
+			/* Unexpected next to send, with final bit cleared */
+			if (!info->pf) {
+				irlap_update_nr_received(self, info->nr);
+
+				irlap_start_wd_timer(self, self->wd_timeout);
+			} else {
+				/* Update Nr received */
+				irlap_update_nr_received(self, info->nr);
+
+				irlap_wait_min_turn_around(self, &self->qos_tx);
+				irlap_send_rr_frame(self, RSP_FRAME);
+
+				irlap_start_wd_timer(self, self->wd_timeout);
+			}
+			break;
+		}
+
+		/*
+		 *  Unexpected Next to Receive(NR) ?
+		 */
+		if ((ns_status == NS_EXPECTED) && (nr_status == NR_UNEXPECTED))
+		{
+			if (info->pf) {
+				IRDA_DEBUG(4, "RECV_I_RSP: frame(s) lost\n");
+
+				self->vr = (self->vr + 1) % 8;
+
+				/* Update Nr received */
+				irlap_update_nr_received(self, info->nr);
+
+				/* Resend rejected frames */
+				irlap_resend_rejected_frames(self, RSP_FRAME);
+
+				/* Keep state, do not move this line */
+				irlap_next_state(self, LAP_NRM_S);
+
+				irlap_data_indication(self, skb, FALSE);
+				irlap_start_wd_timer(self, self->wd_timeout);
+				break;
+			}
+			/*
+			 *  This is not documented in IrLAP!! Unexpected NR
+			 *  with poll bit cleared
+			 */
+			if (!info->pf) {
+				self->vr = (self->vr + 1) % 8;
+
+				/* Update Nr received */
+				irlap_update_nr_received(self, info->nr);
+
+				/* Keep state, do not move this line */
+				irlap_next_state(self, LAP_NRM_S);
+
+				irlap_data_indication(self, skb, FALSE);
+				irlap_start_wd_timer(self, self->wd_timeout);
+			}
+			break;
+		}
+
+		if (ret == NR_INVALID) {
+			IRDA_DEBUG(0, "NRM_S, NR_INVALID not implemented!\n");
+		}
+		if (ret == NS_INVALID) {
+			IRDA_DEBUG(0, "NRM_S, NS_INVALID not implemented!\n");
+		}
+		break;
+	case RECV_UI_FRAME:
+		/*
+		 *  poll bit cleared?
+		 */
+		if (!info->pf) {
+			irlap_data_indication(self, skb, TRUE);
+			irlap_next_state(self, LAP_NRM_S); /* Keep state */
+		} else {
+			/*
+			 *  Any pending data requests?
+			 */
+			if ((skb_queue_len(&self->txq) > 0) &&
+			    (self->window > 0) && !self->remote_busy)
+			{
+				irlap_data_indication(self, skb, TRUE);
+
+				del_timer(&self->wd_timer);
+
+				irlap_next_state(self, LAP_XMIT_S);
+			} else {
+				irlap_data_indication(self, skb, TRUE);
+
+				irlap_wait_min_turn_around(self, &self->qos_tx);
+
+				irlap_send_rr_frame(self, RSP_FRAME);
+				self->ack_required = FALSE;
+
+				irlap_start_wd_timer(self, self->wd_timeout);
+
+				/* Keep the state */
+				irlap_next_state(self, LAP_NRM_S);
+			}
+		}
+		break;
+	case RECV_RR_CMD:
+		self->retry_count = 0;
+
+		/*
+		 *  Nr as expected?
+		 */
+		nr_status = irlap_validate_nr_received(self, info->nr);
+		if (nr_status == NR_EXPECTED) {
+			if ((skb_queue_len( &self->txq) > 0) &&
+			    (self->window > 0)) {
+				self->remote_busy = FALSE;
+
+				/* Update Nr received */
+				irlap_update_nr_received(self, info->nr);
+				del_timer(&self->wd_timer);
+
+				irlap_wait_min_turn_around(self, &self->qos_tx);
+				irlap_next_state(self, LAP_XMIT_S);
+			} else {
+				self->remote_busy = FALSE;
+				/* Update Nr received */
+				irlap_update_nr_received(self, info->nr);
+				irlap_wait_min_turn_around(self, &self->qos_tx);
+				irlap_start_wd_timer(self, self->wd_timeout);
+
+				/* Note : if the link is idle (this case),
+				 * we never go in XMIT_S, so we never get a
+				 * chance to process any DISCONNECT_REQUEST.
+				 * Do it now ! - Jean II */
+				if (self->disconnect_pending) {
+					/* Disconnect */
+					irlap_send_rd_frame(self);
+					irlap_flush_all_queues(self);
+
+					irlap_next_state(self, LAP_SCLOSE);
+				} else {
+					/* Just send back pf bit */
+					irlap_send_rr_frame(self, RSP_FRAME);
+
+					irlap_next_state(self, LAP_NRM_S);
+				}
+			}
+		} else if (nr_status == NR_UNEXPECTED) {
+			self->remote_busy = FALSE;
+			irlap_update_nr_received(self, info->nr);
+			irlap_resend_rejected_frames(self, RSP_FRAME);
+
+			irlap_start_wd_timer(self, self->wd_timeout);
+
+			/* Keep state */
+			irlap_next_state(self, LAP_NRM_S);
+		} else {
+			IRDA_DEBUG(1, "%s(), invalid nr not implemented!\n",
+				   __FUNCTION__);
+		}
+		break;
+	case RECV_SNRM_CMD:
+		/* SNRM frame is not allowed to contain an I-field */
+		if (!info) {
+			del_timer(&self->wd_timer);
+			IRDA_DEBUG(1, "%s(), received SNRM cmd\n", __FUNCTION__);
+			irlap_next_state(self, LAP_RESET_CHECK);
+
+			irlap_reset_indication(self);
+		} else {
+			IRDA_DEBUG(0,
+				   "%s(), SNRM frame contained an I-field!\n",
+				   __FUNCTION__);
+
+		}
+		break;
+	case RECV_REJ_CMD:
+		irlap_update_nr_received(self, info->nr);
+		if (self->remote_busy) {
+			irlap_wait_min_turn_around(self, &self->qos_tx);
+			irlap_send_rr_frame(self, RSP_FRAME);
+		} else
+			irlap_resend_rejected_frames(self, RSP_FRAME);
+		irlap_start_wd_timer(self, self->wd_timeout);
+		break;
+	case RECV_SREJ_CMD:
+		irlap_update_nr_received(self, info->nr);
+		if (self->remote_busy) {
+			irlap_wait_min_turn_around(self, &self->qos_tx);
+			irlap_send_rr_frame(self, RSP_FRAME);
+		} else
+			irlap_resend_rejected_frame(self, RSP_FRAME);
+		irlap_start_wd_timer(self, self->wd_timeout);
+		break;
+	case WD_TIMER_EXPIRED:
+		/*
+		 *  Wait until retry_count * n matches negotiated threshold/
+		 *  disconnect time (note 2 in IrLAP p. 82)
+		 *
+		 * Similar to irlap_state_nrm_p() -> FINAL_TIMER_EXPIRED
+		 * Note : self->wd_timeout = (self->final_timeout * 2),
+		 *   which explain why we use (self->N2 / 2) here !!!
+		 * Jean II
+		 */
+		IRDA_DEBUG(1, "%s(), retry_count = %d\n", __FUNCTION__,
+			   self->retry_count);
+
+		if (self->retry_count < (self->N2 / 2)) {
+			/* No retry, just wait for primary */
+			irlap_start_wd_timer(self, self->wd_timeout);
+			self->retry_count++;
+
+			if((self->retry_count % (self->N1 / 2)) == 0)
+				irlap_status_indication(self,
+							STATUS_NO_ACTIVITY);
+		} else {
+			irlap_apply_default_connection_parameters(self);
+
+			/* Always switch state before calling upper layers */
+			irlap_next_state(self, LAP_NDM);
+			irlap_disconnect_indication(self, LAP_NO_RESPONSE);
+		}
+		break;
+	case RECV_DISC_CMD:
+		/* Always switch state before calling upper layers */
+		irlap_next_state(self, LAP_NDM);
+
+		/* Send disconnect response */
+		irlap_wait_min_turn_around(self, &self->qos_tx);
+		irlap_send_ua_response_frame(self, NULL);
+
+		del_timer(&self->wd_timer);
+		irlap_flush_all_queues(self);
+		/* Set default link parameters */
+		irlap_apply_default_connection_parameters(self);
+
+		irlap_disconnect_indication(self, LAP_DISC_INDICATION);
+		break;
+	case RECV_DISCOVERY_XID_CMD:
+		irlap_wait_min_turn_around(self, &self->qos_tx);
+		irlap_send_rr_frame(self, RSP_FRAME);
+		self->ack_required = TRUE;
+		irlap_start_wd_timer(self, self->wd_timeout);
+		irlap_next_state(self, LAP_NRM_S);
+
+		break;
+	case RECV_TEST_CMD:
+		/* Remove test frame header (only LAP header in NRM) */
+		skb_pull(skb, LAP_ADDR_HEADER + LAP_CTRL_HEADER);
+
+		irlap_wait_min_turn_around(self, &self->qos_tx);
+		irlap_start_wd_timer(self, self->wd_timeout);
+
+		/* Send response (info will be copied) */
+		irlap_send_test_frame(self, self->caddr, info->daddr, skb);
+		break;
+	default:
+		IRDA_DEBUG(1, "%s(), Unknown event %d, (%s)\n", __FUNCTION__,
+			   event, irlap_event[event]);
+
+		ret = -EINVAL;
+		break;
+	}
+	return ret;
+}
+
+/*
+ * Function irlap_state_sclose (self, event, skb, info)
+ */
+static int irlap_state_sclose(struct irlap_cb *self, IRLAP_EVENT event,
+			      struct sk_buff *skb, struct irlap_info *info)
+{
+	int ret = 0;
+
+	IRDA_DEBUG(1, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return -ENODEV;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return -EBADR;);
+
+	switch (event) {
+	case RECV_DISC_CMD:
+		/* Always switch state before calling upper layers */
+		irlap_next_state(self, LAP_NDM);
+
+		/* Send disconnect response */
+		irlap_wait_min_turn_around(self, &self->qos_tx);
+		irlap_send_ua_response_frame(self, NULL);
+
+		del_timer(&self->wd_timer);
+		/* Set default link parameters */
+		irlap_apply_default_connection_parameters(self);
+
+		irlap_disconnect_indication(self, LAP_DISC_INDICATION);
+		break;
+	case RECV_DM_RSP:
+		/* IrLAP-1.1 p.82: in SCLOSE, S and I type RSP frames
+		 * shall take us down into default NDM state, like DM_RSP
+		 */
+	case RECV_RR_RSP:
+	case RECV_RNR_RSP:
+	case RECV_REJ_RSP:
+	case RECV_SREJ_RSP:
+	case RECV_I_RSP:
+		/* Always switch state before calling upper layers */
+		irlap_next_state(self, LAP_NDM);
+
+		del_timer(&self->wd_timer);
+		irlap_apply_default_connection_parameters(self);
+
+		irlap_disconnect_indication(self, LAP_DISC_INDICATION);
+		break;
+	case WD_TIMER_EXPIRED:
+		/* Always switch state before calling upper layers */
+		irlap_next_state(self, LAP_NDM);
+
+		irlap_apply_default_connection_parameters(self);
+
+		irlap_disconnect_indication(self, LAP_DISC_INDICATION);
+		break;
+	default:
+		/* IrLAP-1.1 p.82: in SCLOSE, basically any received frame
+		 * with pf=1 shall restart the wd-timer and resend the rd:rsp
+		 */
+		if (info != NULL  &&  info->pf) {
+			del_timer(&self->wd_timer);
+			irlap_wait_min_turn_around(self, &self->qos_tx);
+			irlap_send_rd_frame(self);
+			irlap_start_wd_timer(self, self->wd_timeout);
+			break;		/* stay in SCLOSE */
+		}
+
+		IRDA_DEBUG(1, "%s(), Unknown event %d, (%s)\n", __FUNCTION__,
+			   event, irlap_event[event]);
+
+		ret = -EINVAL;
+		break;
+	}
+
+	return -1;
+}
+
+static int irlap_state_reset_check( struct irlap_cb *self, IRLAP_EVENT event,
+				   struct sk_buff *skb,
+				   struct irlap_info *info)
+{
+	int ret = 0;
+
+	IRDA_DEBUG(1, "%s(), event=%s\n", __FUNCTION__, irlap_event[event]);
+
+	IRDA_ASSERT(self != NULL, return -ENODEV;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return -EBADR;);
+
+	switch (event) {
+	case RESET_RESPONSE:
+		irlap_send_ua_response_frame(self, &self->qos_rx);
+		irlap_initiate_connection_state(self);
+		irlap_start_wd_timer(self, WD_TIMEOUT);
+		irlap_flush_all_queues(self);
+
+		irlap_next_state(self, LAP_NRM_S);
+		break;
+	case DISCONNECT_REQUEST:
+		irlap_wait_min_turn_around(self, &self->qos_tx);
+		irlap_send_rd_frame(self);
+		irlap_start_wd_timer(self, WD_TIMEOUT);
+		irlap_next_state(self, LAP_SCLOSE);
+		break;
+	default:
+		IRDA_DEBUG(1, "%s(), Unknown event %d, (%s)\n", __FUNCTION__,
+			   event, irlap_event[event]);
+
+		ret = -EINVAL;
+		break;
+	}
+	return ret;
+}
diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c
new file mode 100644
index 00000000000..040abe714aa
--- /dev/null
+++ b/net/irda/irlap_frame.c
@@ -0,0 +1,1437 @@
+/*********************************************************************
+ *
+ * Filename:      irlap_frame.c
+ * Version:       1.0
+ * Description:   Build and transmit IrLAP frames
+ * Status:        Stable
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Tue Aug 19 10:27:26 1997
+ * Modified at:   Wed Jan  5 08:59:04 2000
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ *
+ *     Copyright (c) 1998-2000 Dag Brattli <dagb@cs.uit.no>,
+ *     All Rights Reserved.
+ *     Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of
+ *     the License, or (at your option) any later version.
+ *
+ *     Neither Dag Brattli nor University of Troms� admit liability nor
+ *     provide warranty for any of this software. This material is
+ *     provided "AS-IS" and at no charge.
+ *
+ ********************************************************************/
+
+#include <linux/skbuff.h>
+#include <linux/if.h>
+#include <linux/if_ether.h>
+#include <linux/netdevice.h>
+#include <linux/irda.h>
+
+#include <net/pkt_sched.h>
+#include <net/sock.h>
+
+#include <asm/byteorder.h>
+
+#include <net/irda/irda.h>
+#include <net/irda/irda_device.h>
+#include <net/irda/irlap.h>
+#include <net/irda/wrapper.h>
+#include <net/irda/timer.h>
+#include <net/irda/irlap_frame.h>
+#include <net/irda/qos.h>
+
+static void irlap_send_i_frame(struct irlap_cb *self, struct sk_buff *skb,
+			       int command);
+
+/*
+ * Function irlap_insert_info (self, skb)
+ *
+ *    Insert minimum turnaround time and speed information into the skb. We
+ *    need to do this since it's per packet relevant information. Safe to
+ *    have this function inlined since it's only called from one place
+ */
+static inline void irlap_insert_info(struct irlap_cb *self,
+				     struct sk_buff *skb)
+{
+	struct irda_skb_cb *cb = (struct irda_skb_cb *) skb->cb;
+
+	/*
+	 * Insert MTT (min. turn time) and speed into skb, so that the
+	 * device driver knows which settings to use
+	 */
+	cb->magic = LAP_MAGIC;
+	cb->mtt = self->mtt_required;
+	cb->next_speed = self->speed;
+
+	/* Reset */
+	self->mtt_required = 0;
+
+	/*
+	 * Delay equals negotiated BOFs count, plus the number of BOFs to
+	 * force the negotiated minimum turnaround time
+	 */
+	cb->xbofs = self->bofs_count;
+	cb->next_xbofs = self->next_bofs;
+	cb->xbofs_delay = self->xbofs_delay;
+
+	/* Reset XBOF's delay (used only for getting min turn time) */
+	self->xbofs_delay = 0;
+	/* Put the correct xbofs value for the next packet */
+	self->bofs_count = self->next_bofs;
+}
+
+/*
+ * Function irlap_queue_xmit (self, skb)
+ *
+ *    A little wrapper for dev_queue_xmit, so we can insert some common
+ *    code into it.
+ */
+void irlap_queue_xmit(struct irlap_cb *self, struct sk_buff *skb)
+{
+	/* Some common init stuff */
+	skb->dev = self->netdev;
+	skb->h.raw = skb->nh.raw = skb->mac.raw = skb->data;
+	skb->protocol = htons(ETH_P_IRDA);
+	skb->priority = TC_PRIO_BESTEFFORT;
+
+	irlap_insert_info(self, skb);
+
+	dev_queue_xmit(skb);
+}
+
+/*
+ * Function irlap_send_snrm_cmd (void)
+ *
+ *    Transmits a connect SNRM command frame
+ */
+void irlap_send_snrm_frame(struct irlap_cb *self, struct qos_info *qos)
+{
+	struct sk_buff *tx_skb;
+	struct snrm_frame *frame;
+	int ret;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+
+	/* Allocate frame */
+	tx_skb = dev_alloc_skb(64);
+	if (!tx_skb)
+		return;
+
+	frame = (struct snrm_frame *) skb_put(tx_skb, 2);
+
+	/* Insert connection address field */
+	if (qos)
+		frame->caddr = CMD_FRAME | CBROADCAST;
+	else
+		frame->caddr = CMD_FRAME | self->caddr;
+
+	/* Insert control field */
+	frame->control = SNRM_CMD | PF_BIT;
+
+	/*
+	 *  If we are establishing a connection then insert QoS paramerters
+	 */
+	if (qos) {
+		skb_put(tx_skb, 9); /* 21 left */
+		frame->saddr = cpu_to_le32(self->saddr);
+		frame->daddr = cpu_to_le32(self->daddr);
+
+		frame->ncaddr = self->caddr;
+
+		ret = irlap_insert_qos_negotiation_params(self, tx_skb);
+		if (ret < 0) {
+			dev_kfree_skb(tx_skb);
+			return;
+		}
+	}
+	irlap_queue_xmit(self, tx_skb);
+}
+
+/*
+ * Function irlap_recv_snrm_cmd (skb, info)
+ *
+ *    Received SNRM (Set Normal Response Mode) command frame
+ *
+ */
+static void irlap_recv_snrm_cmd(struct irlap_cb *self, struct sk_buff *skb,
+				struct irlap_info *info)
+{
+	struct snrm_frame *frame;
+
+	if (pskb_may_pull(skb,sizeof(struct snrm_frame))) {
+		frame = (struct snrm_frame *) skb->data;
+
+		/* Copy the new connection address ignoring the C/R bit */
+		info->caddr = frame->ncaddr & 0xFE;
+
+		/* Check if the new connection address is valid */
+		if ((info->caddr == 0x00) || (info->caddr == 0xfe)) {
+			IRDA_DEBUG(3, "%s(), invalid connection address!\n",
+				   __FUNCTION__);
+			return;
+		}
+
+		/* Copy peer device address */
+		info->daddr = le32_to_cpu(frame->saddr);
+		info->saddr = le32_to_cpu(frame->daddr);
+
+		/* Only accept if addressed directly to us */
+		if (info->saddr != self->saddr) {
+			IRDA_DEBUG(2, "%s(), not addressed to us!\n",
+				   __FUNCTION__);
+			return;
+		}
+		irlap_do_event(self, RECV_SNRM_CMD, skb, info);
+	} else {
+		/* Signal that this SNRM frame does not contain and I-field */
+		irlap_do_event(self, RECV_SNRM_CMD, skb, NULL);
+	}
+}
+
+/*
+ * Function irlap_send_ua_response_frame (qos)
+ *
+ *    Send UA (Unnumbered Acknowledgement) frame
+ *
+ */
+void irlap_send_ua_response_frame(struct irlap_cb *self, struct qos_info *qos)
+{
+	struct sk_buff *tx_skb;
+	struct ua_frame *frame;
+	int ret;
+
+	IRDA_DEBUG(2, "%s() <%ld>\n", __FUNCTION__, jiffies);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+
+	/* Allocate frame */
+	tx_skb = dev_alloc_skb(64);
+	if (!tx_skb)
+		return;
+
+	frame = (struct ua_frame *) skb_put(tx_skb, 10);
+
+	/* Build UA response */
+	frame->caddr = self->caddr;
+	frame->control = UA_RSP | PF_BIT;
+
+	frame->saddr = cpu_to_le32(self->saddr);
+	frame->daddr = cpu_to_le32(self->daddr);
+
+	/* Should we send QoS negotiation parameters? */
+	if (qos) {
+		ret = irlap_insert_qos_negotiation_params(self, tx_skb);
+		if (ret < 0) {
+			dev_kfree_skb(tx_skb);
+			return;
+		}
+	}
+
+	irlap_queue_xmit(self, tx_skb);
+}
+
+
+/*
+ * Function irlap_send_dm_frame (void)
+ *
+ *    Send disconnected mode (DM) frame
+ *
+ */
+void irlap_send_dm_frame( struct irlap_cb *self)
+{
+	struct sk_buff *tx_skb = NULL;
+	__u8 *frame;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+
+	tx_skb = dev_alloc_skb(32);
+	if (!tx_skb)
+		return;
+
+	frame = skb_put(tx_skb, 2);
+
+	if (self->state == LAP_NDM)
+		frame[0] = CBROADCAST;
+	else
+		frame[0] = self->caddr;
+
+	frame[1] = DM_RSP | PF_BIT;
+
+	irlap_queue_xmit(self, tx_skb);
+}
+
+/*
+ * Function irlap_send_disc_frame (void)
+ *
+ *    Send disconnect (DISC) frame
+ *
+ */
+void irlap_send_disc_frame(struct irlap_cb *self)
+{
+	struct sk_buff *tx_skb = NULL;
+	__u8 *frame;
+
+	IRDA_DEBUG(3, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+
+	tx_skb = dev_alloc_skb(16);
+	if (!tx_skb)
+		return;
+
+	frame = skb_put(tx_skb, 2);
+
+	frame[0] = self->caddr | CMD_FRAME;
+	frame[1] = DISC_CMD | PF_BIT;
+
+	irlap_queue_xmit(self, tx_skb);
+}
+
+/*
+ * Function irlap_send_discovery_xid_frame (S, s, command)
+ *
+ *    Build and transmit a XID (eXchange station IDentifier) discovery
+ *    frame.
+ */
+void irlap_send_discovery_xid_frame(struct irlap_cb *self, int S, __u8 s,
+				    __u8 command, discovery_t *discovery)
+{
+	struct sk_buff *tx_skb = NULL;
+	struct xid_frame *frame;
+	__u32 bcast = BROADCAST;
+	__u8 *info;
+
+	IRDA_DEBUG(4, "%s(), s=%d, S=%d, command=%d\n", __FUNCTION__,
+		   s, S, command);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+	IRDA_ASSERT(discovery != NULL, return;);
+
+	tx_skb = dev_alloc_skb(64);
+	if (!tx_skb)
+		return;
+
+	skb_put(tx_skb, 14);
+	frame = (struct xid_frame *) tx_skb->data;
+
+	if (command) {
+		frame->caddr = CBROADCAST | CMD_FRAME;
+		frame->control =  XID_CMD | PF_BIT;
+	} else {
+		frame->caddr = CBROADCAST;
+		frame->control =  XID_RSP | PF_BIT;
+	}
+	frame->ident = XID_FORMAT;
+
+	frame->saddr = cpu_to_le32(self->saddr);
+
+	if (command)
+		frame->daddr = cpu_to_le32(bcast);
+	else
+		frame->daddr = cpu_to_le32(discovery->data.daddr);
+
+	switch (S) {
+	case 1:
+		frame->flags = 0x00;
+		break;
+	case 6:
+		frame->flags = 0x01;
+		break;
+	case 8:
+		frame->flags = 0x02;
+		break;
+	case 16:
+		frame->flags = 0x03;
+		break;
+	default:
+		frame->flags = 0x02;
+		break;
+	}
+
+	frame->slotnr = s;
+	frame->version = 0x00;
+
+	/*
+	 *  Provide info for final slot only in commands, and for all
+	 *  responses. Send the second byte of the hint only if the
+	 *  EXTENSION bit is set in the first byte.
+	 */
+	if (!command || (frame->slotnr == 0xff)) {
+		int len;
+
+		if (discovery->data.hints[0] & HINT_EXTENSION) {
+			info = skb_put(tx_skb, 2);
+			info[0] = discovery->data.hints[0];
+			info[1] = discovery->data.hints[1];
+		} else {
+			info = skb_put(tx_skb, 1);
+			info[0] = discovery->data.hints[0];
+		}
+		info = skb_put(tx_skb, 1);
+		info[0] = discovery->data.charset;
+
+		len = IRDA_MIN(discovery->name_len, skb_tailroom(tx_skb));
+		info = skb_put(tx_skb, len);
+		memcpy(info, discovery->data.info, len);
+	}
+	irlap_queue_xmit(self, tx_skb);
+}
+
+/*
+ * Function irlap_recv_discovery_xid_rsp (skb, info)
+ *
+ *    Received a XID discovery response
+ *
+ */
+static void irlap_recv_discovery_xid_rsp(struct irlap_cb *self,
+					 struct sk_buff *skb,
+					 struct irlap_info *info)
+{
+	struct xid_frame *xid;
+	discovery_t *discovery = NULL;
+	__u8 *discovery_info;
+	char *text;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+
+	if (!pskb_may_pull(skb, sizeof(struct xid_frame))) {
+		IRDA_ERROR("%s: frame to short!\n", __FUNCTION__);
+		return;
+	}
+		
+	xid = (struct xid_frame *) skb->data;
+
+	info->daddr = le32_to_cpu(xid->saddr);
+	info->saddr = le32_to_cpu(xid->daddr);
+
+	/* Make sure frame is addressed to us */
+	if ((info->saddr != self->saddr) && (info->saddr != BROADCAST)) {
+		IRDA_DEBUG(0, "%s(), frame is not addressed to us!\n",
+			   __FUNCTION__);
+		return;
+	}
+
+	if ((discovery = kmalloc(sizeof(discovery_t), GFP_ATOMIC)) == NULL) {
+		IRDA_WARNING("%s: kmalloc failed!\n", __FUNCTION__);
+		return;
+	}
+	memset(discovery, 0, sizeof(discovery_t));
+
+	discovery->data.daddr = info->daddr;
+	discovery->data.saddr = self->saddr;
+	discovery->timestamp = jiffies;
+
+	IRDA_DEBUG(4, "%s(), daddr=%08x\n", __FUNCTION__,
+		   discovery->data.daddr);
+
+	discovery_info = skb_pull(skb, sizeof(struct xid_frame));
+
+	/* Get info returned from peer */
+	discovery->data.hints[0] = discovery_info[0];
+	if (discovery_info[0] & HINT_EXTENSION) {
+		IRDA_DEBUG(4, "EXTENSION\n");
+		discovery->data.hints[1] = discovery_info[1];
+		discovery->data.charset = discovery_info[2];
+		text = (char *) &discovery_info[3];
+	} else {
+		discovery->data.hints[1] = 0;
+		discovery->data.charset = discovery_info[1];
+		text = (char *) &discovery_info[2];
+	}
+	/*
+	 *  Terminate info string, should be safe since this is where the
+	 *  FCS bytes resides.
+	 */
+	skb->data[skb->len] = '\0';
+	strncpy(discovery->data.info, text, NICKNAME_MAX_LEN);
+	discovery->name_len = strlen(discovery->data.info);
+
+	info->discovery = discovery;
+
+	irlap_do_event(self, RECV_DISCOVERY_XID_RSP, skb, info);
+}
+
+/*
+ * Function irlap_recv_discovery_xid_cmd (skb, info)
+ *
+ *    Received a XID discovery command
+ *
+ */
+static void irlap_recv_discovery_xid_cmd(struct irlap_cb *self,
+					 struct sk_buff *skb,
+					 struct irlap_info *info)
+{
+	struct xid_frame *xid;
+	discovery_t *discovery = NULL;
+	__u8 *discovery_info;
+	char *text;
+
+	if (!pskb_may_pull(skb, sizeof(struct xid_frame))) {
+		IRDA_ERROR("%s: frame to short!\n", __FUNCTION__);
+		return;
+	}
+	
+	xid = (struct xid_frame *) skb->data;
+
+	info->daddr = le32_to_cpu(xid->saddr);
+	info->saddr = le32_to_cpu(xid->daddr);
+
+	/* Make sure frame is addressed to us */
+	if ((info->saddr != self->saddr) && (info->saddr != BROADCAST)) {
+		IRDA_DEBUG(0, "%s(), frame is not addressed to us!\n",
+			   __FUNCTION__);
+		return;
+	}
+
+	switch (xid->flags & 0x03) {
+	case 0x00:
+		info->S = 1;
+		break;
+	case 0x01:
+		info->S = 6;
+		break;
+	case 0x02:
+		info->S = 8;
+		break;
+	case 0x03:
+		info->S = 16;
+		break;
+	default:
+		/* Error!! */
+		return;
+	}
+	info->s = xid->slotnr;
+
+	discovery_info = skb_pull(skb, sizeof(struct xid_frame));
+
+	/*
+	 *  Check if last frame
+	 */
+	if (info->s == 0xff) {
+		/* Check if things are sane at this point... */
+		if((discovery_info == NULL) || 
+		   !pskb_may_pull(skb, 3)) {
+			IRDA_ERROR("%s: discovery frame to short!\n",
+				   __FUNCTION__);
+			return;
+		}
+
+		/*
+		 *  We now have some discovery info to deliver!
+		 */
+		discovery = kmalloc(sizeof(discovery_t), GFP_ATOMIC);
+		if (!discovery) {
+			IRDA_WARNING("%s: unable to malloc!\n", __FUNCTION__);
+			return;
+		}
+
+		discovery->data.daddr = info->daddr;
+		discovery->data.saddr = self->saddr;
+		discovery->timestamp = jiffies;
+
+		discovery->data.hints[0] = discovery_info[0];
+		if (discovery_info[0] & HINT_EXTENSION) {
+			discovery->data.hints[1] = discovery_info[1];
+			discovery->data.charset = discovery_info[2];
+			text = (char *) &discovery_info[3];
+		} else {
+			discovery->data.hints[1] = 0;
+			discovery->data.charset = discovery_info[1];
+			text = (char *) &discovery_info[2];
+		}
+		/*
+		 *  Terminate string, should be safe since this is where the
+		 *  FCS bytes resides.
+		 */
+		skb->data[skb->len] = '\0';
+		strncpy(discovery->data.info, text, NICKNAME_MAX_LEN);
+		discovery->name_len = strlen(discovery->data.info);
+
+		info->discovery = discovery;
+	} else
+		info->discovery = NULL;
+
+	irlap_do_event(self, RECV_DISCOVERY_XID_CMD, skb, info);
+}
+
+/*
+ * Function irlap_send_rr_frame (self, command)
+ *
+ *    Build and transmit RR (Receive Ready) frame. Notice that it is currently
+ *    only possible to send RR frames with the poll bit set.
+ */
+void irlap_send_rr_frame(struct irlap_cb *self, int command)
+{
+	struct sk_buff *tx_skb;
+	__u8 *frame;
+
+	tx_skb = dev_alloc_skb(16);
+	if (!tx_skb)
+		return;
+
+	frame = skb_put(tx_skb, 2);
+
+	frame[0] = self->caddr;
+	frame[0] |= (command) ? CMD_FRAME : 0;
+
+	frame[1] = RR | PF_BIT | (self->vr << 5);
+
+	irlap_queue_xmit(self, tx_skb);
+}
+
+/*
+ * Function irlap_send_rd_frame (self)
+ *
+ *    Request disconnect. Used by a secondary station to request the
+ *    disconnection of the link.
+ */
+void irlap_send_rd_frame(struct irlap_cb *self)
+{
+	struct sk_buff *tx_skb;
+	__u8 *frame;
+
+	tx_skb = dev_alloc_skb(16);
+	if (!tx_skb)
+		return;
+
+	frame = skb_put(tx_skb, 2);
+
+	frame[0] = self->caddr;
+	frame[1] = RD_RSP | PF_BIT;
+
+	irlap_queue_xmit(self, tx_skb);
+}
+
+/*
+ * Function irlap_recv_rr_frame (skb, info)
+ *
+ *    Received RR (Receive Ready) frame from peer station, no harm in
+ *    making it inline since its called only from one single place
+ *    (irlap_driver_rcv).
+ */
+static inline void irlap_recv_rr_frame(struct irlap_cb *self,
+				       struct sk_buff *skb,
+				       struct irlap_info *info, int command)
+{
+	info->nr = skb->data[1] >> 5;
+
+	/* Check if this is a command or a response frame */
+	if (command)
+		irlap_do_event(self, RECV_RR_CMD, skb, info);
+	else
+		irlap_do_event(self, RECV_RR_RSP, skb, info);
+}
+
+/*
+ * Function irlap_recv_rnr_frame (self, skb, info)
+ *
+ *    Received RNR (Receive Not Ready) frame from peer station
+ *
+ */
+static void irlap_recv_rnr_frame(struct irlap_cb *self, struct sk_buff *skb,
+				 struct irlap_info *info, int command)
+{
+	info->nr = skb->data[1] >> 5;
+
+	IRDA_DEBUG(4, "%s(), nr=%d, %ld\n", __FUNCTION__, info->nr, jiffies);
+
+	if (command)
+		irlap_do_event(self, RECV_RNR_CMD, skb, info);
+	else
+		irlap_do_event(self, RECV_RNR_RSP, skb, info);
+}
+
+static void irlap_recv_rej_frame(struct irlap_cb *self, struct sk_buff *skb,
+				 struct irlap_info *info, int command)
+{
+	IRDA_DEBUG(0, "%s()\n", __FUNCTION__);
+
+	info->nr = skb->data[1] >> 5;
+
+	/* Check if this is a command or a response frame */
+	if (command)
+		irlap_do_event(self, RECV_REJ_CMD, skb, info);
+	else
+		irlap_do_event(self, RECV_REJ_RSP, skb, info);
+}
+
+static void irlap_recv_srej_frame(struct irlap_cb *self, struct sk_buff *skb,
+				  struct irlap_info *info, int command)
+{
+	IRDA_DEBUG(0, "%s()\n", __FUNCTION__);
+
+	info->nr = skb->data[1] >> 5;
+
+	/* Check if this is a command or a response frame */
+	if (command)
+		irlap_do_event(self, RECV_SREJ_CMD, skb, info);
+	else
+		irlap_do_event(self, RECV_SREJ_RSP, skb, info);
+}
+
+static void irlap_recv_disc_frame(struct irlap_cb *self, struct sk_buff *skb,
+				  struct irlap_info *info, int command)
+{
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
+
+	/* Check if this is a command or a response frame */
+	if (command)
+		irlap_do_event(self, RECV_DISC_CMD, skb, info);
+	else
+		irlap_do_event(self, RECV_RD_RSP, skb, info);
+}
+
+/*
+ * Function irlap_recv_ua_frame (skb, frame)
+ *
+ *    Received UA (Unnumbered Acknowledgement) frame
+ *
+ */
+static inline void irlap_recv_ua_frame(struct irlap_cb *self,
+				       struct sk_buff *skb,
+				       struct irlap_info *info)
+{
+	irlap_do_event(self, RECV_UA_RSP, skb, info);
+}
+
+/*
+ * Function irlap_send_data_primary(self, skb)
+ *
+ *    Send I-frames as the primary station but without the poll bit set
+ *
+ */
+void irlap_send_data_primary(struct irlap_cb *self, struct sk_buff *skb)
+{
+	struct sk_buff *tx_skb;
+
+	if (skb->data[1] == I_FRAME) {
+
+		/*
+		 *  Insert frame sequence number (Vs) in control field before
+		 *  inserting into transmit window queue.
+		 */
+		skb->data[1] = I_FRAME | (self->vs << 1);
+
+		/*
+		 *  Insert frame in store, in case of retransmissions
+		 *  Increase skb reference count, see irlap_do_event()
+		 */
+		skb_get(skb);
+		skb_queue_tail(&self->wx_list, skb);
+
+		/* Copy buffer */
+		tx_skb = skb_clone(skb, GFP_ATOMIC);
+		if (tx_skb == NULL) {
+			return;
+		}
+
+		self->vs = (self->vs + 1) % 8;
+		self->ack_required = FALSE;
+		self->window -= 1;
+
+		irlap_send_i_frame( self, tx_skb, CMD_FRAME);
+	} else {
+		IRDA_DEBUG(4, "%s(), sending unreliable frame\n", __FUNCTION__);
+		irlap_send_ui_frame(self, skb_get(skb), self->caddr, CMD_FRAME);
+		self->window -= 1;
+	}
+}
+/*
+ * Function irlap_send_data_primary_poll (self, skb)
+ *
+ *    Send I(nformation) frame as primary with poll bit set
+ */
+void irlap_send_data_primary_poll(struct irlap_cb *self, struct sk_buff *skb)
+{
+	struct sk_buff *tx_skb;
+	int transmission_time;
+
+	/* Stop P timer */
+	del_timer(&self->poll_timer);
+
+	/* Is this reliable or unreliable data? */
+	if (skb->data[1] == I_FRAME) {
+
+		/*
+		 *  Insert frame sequence number (Vs) in control field before
+		 *  inserting into transmit window queue.
+		 */
+		skb->data[1] = I_FRAME | (self->vs << 1);
+
+		/*
+		 *  Insert frame in store, in case of retransmissions
+		 *  Increase skb reference count, see irlap_do_event()
+		 */
+		skb_get(skb);
+		skb_queue_tail(&self->wx_list, skb);
+
+		/* Copy buffer */
+		tx_skb = skb_clone(skb, GFP_ATOMIC);
+		if (tx_skb == NULL) {
+			return;
+		}
+
+		/*
+		 *  Set poll bit if necessary. We do this to the copied
+		 *  skb, since retransmitted need to set or clear the poll
+		 *  bit depending on when they are sent.
+		 */
+		tx_skb->data[1] |= PF_BIT;
+
+		self->vs = (self->vs + 1) % 8;
+		self->ack_required = FALSE;
+
+		irlap_send_i_frame(self, tx_skb, CMD_FRAME);
+	} else {
+		IRDA_DEBUG(4, "%s(), sending unreliable frame\n", __FUNCTION__);
+
+		if (self->ack_required) {
+			irlap_send_ui_frame(self, skb_get(skb), self->caddr, CMD_FRAME);
+			irlap_send_rr_frame(self, CMD_FRAME);
+			self->ack_required = FALSE;
+		} else {
+			skb->data[1] |= PF_BIT;
+			irlap_send_ui_frame(self, skb_get(skb), self->caddr, CMD_FRAME);
+		}
+	}
+
+	/* How much time we took for transmission of all frames.
+	 * We don't know, so let assume we used the full window. Jean II */
+	transmission_time = self->final_timeout;
+
+	/* Reset parameter so that we can fill next window */
+	self->window = self->window_size;
+
+#ifdef CONFIG_IRDA_DYNAMIC_WINDOW
+	/* Remove what we have not used. Just do a prorata of the
+	 * bytes left in window to window capacity.
+	 * See max_line_capacities[][] in qos.c for details. Jean II */
+	transmission_time -= (self->final_timeout * self->bytes_left
+			      / self->line_capacity);
+	IRDA_DEBUG(4, "%s() adjusting transmission_time : ft=%d, bl=%d, lc=%d -> tt=%d\n", __FUNCTION__, self->final_timeout, self->bytes_left, self->line_capacity, transmission_time);
+
+	/* We are allowed to transmit a maximum number of bytes again. */
+	self->bytes_left = self->line_capacity;
+#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */
+
+	/*
+	 * The network layer has a intermediate buffer between IrLAP
+	 * and the IrDA driver which can contain 8 frames. So, even
+	 * though IrLAP is currently sending the *last* frame of the
+	 * tx-window, the driver most likely has only just started
+	 * sending the *first* frame of the same tx-window.
+	 * I.e. we are always at the very begining of or Tx window.
+	 * Now, we are supposed to set the final timer from the end
+	 * of our tx-window to let the other peer reply. So, we need
+	 * to add extra time to compensate for the fact that we
+	 * are really at the start of tx-window, otherwise the final timer
+	 * might expire before he can answer...
+	 * Jean II
+	 */
+	irlap_start_final_timer(self, self->final_timeout + transmission_time);
+
+	/*
+	 * The clever amongst you might ask why we do this adjustement
+	 * only here, and not in all the other cases in irlap_event.c.
+	 * In all those other case, we only send a very short management
+	 * frame (few bytes), so the adjustement would be lost in the
+	 * noise...
+	 * The exception of course is irlap_resend_rejected_frame().
+	 * Jean II */
+}
+
+/*
+ * Function irlap_send_data_secondary_final (self, skb)
+ *
+ *    Send I(nformation) frame as secondary with final bit set
+ *
+ */
+void irlap_send_data_secondary_final(struct irlap_cb *self,
+				     struct sk_buff *skb)
+{
+	struct sk_buff *tx_skb = NULL;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+	IRDA_ASSERT(skb != NULL, return;);
+
+	/* Is this reliable or unreliable data? */
+	if (skb->data[1] == I_FRAME) {
+
+		/*
+		 *  Insert frame sequence number (Vs) in control field before
+		 *  inserting into transmit window queue.
+		 */
+		skb->data[1] = I_FRAME | (self->vs << 1);
+
+		/*
+		 *  Insert frame in store, in case of retransmissions
+		 *  Increase skb reference count, see irlap_do_event()
+		 */
+		skb_get(skb);
+		skb_queue_tail(&self->wx_list, skb);
+
+		tx_skb = skb_clone(skb, GFP_ATOMIC);
+		if (tx_skb == NULL) {
+			return;
+		}
+
+		tx_skb->data[1] |= PF_BIT;
+
+		self->vs = (self->vs + 1) % 8;
+		self->ack_required = FALSE;
+
+		irlap_send_i_frame(self, tx_skb, RSP_FRAME);
+	} else {
+		if (self->ack_required) {
+			irlap_send_ui_frame(self, skb_get(skb), self->caddr, RSP_FRAME);
+			irlap_send_rr_frame(self, RSP_FRAME);
+			self->ack_required = FALSE;
+		} else {
+			skb->data[1] |= PF_BIT;
+			irlap_send_ui_frame(self, skb_get(skb), self->caddr, RSP_FRAME);
+		}
+	}
+
+	self->window = self->window_size;
+#ifdef CONFIG_IRDA_DYNAMIC_WINDOW
+	/* We are allowed to transmit a maximum number of bytes again. */
+	self->bytes_left = self->line_capacity;
+#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */
+
+	irlap_start_wd_timer(self, self->wd_timeout);
+}
+
+/*
+ * Function irlap_send_data_secondary (self, skb)
+ *
+ *    Send I(nformation) frame as secondary without final bit set
+ *
+ */
+void irlap_send_data_secondary(struct irlap_cb *self, struct sk_buff *skb)
+{
+	struct sk_buff *tx_skb = NULL;
+
+	/* Is this reliable or unreliable data? */
+	if (skb->data[1] == I_FRAME) {
+
+		/*
+		 *  Insert frame sequence number (Vs) in control field before
+		 *  inserting into transmit window queue.
+		 */
+		skb->data[1] = I_FRAME | (self->vs << 1);
+
+		/*
+		 *  Insert frame in store, in case of retransmissions
+		 *  Increase skb reference count, see irlap_do_event()
+		 */
+		skb_get(skb);
+		skb_queue_tail(&self->wx_list, skb);
+
+		tx_skb = skb_clone(skb, GFP_ATOMIC);
+		if (tx_skb == NULL) {
+			return;
+		}
+
+		self->vs = (self->vs + 1) % 8;
+		self->ack_required = FALSE;
+		self->window -= 1;
+
+		irlap_send_i_frame(self, tx_skb, RSP_FRAME);
+	} else {
+		irlap_send_ui_frame(self, skb_get(skb), self->caddr, RSP_FRAME);
+		self->window -= 1;
+	}
+}
+
+/*
+ * Function irlap_resend_rejected_frames (nr)
+ *
+ *    Resend frames which has not been acknowledged. Should be safe to
+ *    traverse the list without locking it since this function will only be
+ *    called from interrupt context (BH)
+ */
+void irlap_resend_rejected_frames(struct irlap_cb *self, int command)
+{
+	struct sk_buff *tx_skb;
+	struct sk_buff *skb;
+	int count;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+
+	/* Initialize variables */
+	count = skb_queue_len(&self->wx_list);
+
+	/*  Resend unacknowledged frame(s) */
+	skb = skb_peek(&self->wx_list);
+	while (skb != NULL) {
+		irlap_wait_min_turn_around(self, &self->qos_tx);
+
+		/* We copy the skb to be retransmitted since we will have to
+		 * modify it. Cloning will confuse packet sniffers
+		 */
+		/* tx_skb = skb_clone( skb, GFP_ATOMIC); */
+		tx_skb = skb_copy(skb, GFP_ATOMIC);
+		if (!tx_skb) {
+			IRDA_DEBUG(0, "%s(), unable to copy\n", __FUNCTION__);
+			return;
+		}
+		/* Unlink tx_skb from list */
+		tx_skb->next = tx_skb->prev = NULL;
+		tx_skb->list = NULL;
+
+		/* Clear old Nr field + poll bit */
+		tx_skb->data[1] &= 0x0f;
+
+		/*
+		 *  Set poll bit on the last frame retransmitted
+		 */
+		if (count-- == 1)
+			tx_skb->data[1] |= PF_BIT; /* Set p/f bit */
+		else
+			tx_skb->data[1] &= ~PF_BIT; /* Clear p/f bit */
+
+		irlap_send_i_frame(self, tx_skb, command);
+
+		/*
+		 *  If our skb is the last buffer in the list, then
+		 *  we are finished, if not, move to the next sk-buffer
+		 */
+		if (skb == skb_peek_tail(&self->wx_list))
+			skb = NULL;
+		else
+			skb = skb->next;
+	}
+#if 0 /* Not yet */
+	/*
+	 *  We can now fill the window with additional data frames
+	 */
+	while (skb_queue_len( &self->txq) > 0) {
+
+		IRDA_DEBUG(0, "%s(), sending additional frames!\n", __FUNCTION__);
+		if ((skb_queue_len( &self->txq) > 0) &&
+		    (self->window > 0)) {
+			skb = skb_dequeue( &self->txq);
+			IRDA_ASSERT(skb != NULL, return;);
+
+			/*
+			 *  If send window > 1 then send frame with pf
+			 *  bit cleared
+			 */
+			if ((self->window > 1) &&
+			    skb_queue_len(&self->txq) > 0)
+			{
+				irlap_send_data_primary(self, skb);
+			} else {
+				irlap_send_data_primary_poll(self, skb);
+			}
+			kfree_skb(skb);
+		}
+	}
+#endif
+}
+
+void irlap_resend_rejected_frame(struct irlap_cb *self, int command)
+{
+	struct sk_buff *tx_skb;
+	struct sk_buff *skb;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+
+	/*  Resend unacknowledged frame(s) */
+	skb = skb_peek(&self->wx_list);
+	if (skb != NULL) {
+		irlap_wait_min_turn_around(self, &self->qos_tx);
+
+		/* We copy the skb to be retransmitted since we will have to
+		 * modify it. Cloning will confuse packet sniffers
+		 */
+		/* tx_skb = skb_clone( skb, GFP_ATOMIC); */
+		tx_skb = skb_copy(skb, GFP_ATOMIC);
+		if (!tx_skb) {
+			IRDA_DEBUG(0, "%s(), unable to copy\n", __FUNCTION__);
+			return;
+		}
+		/* Unlink tx_skb from list */
+		tx_skb->next = tx_skb->prev = NULL;
+		tx_skb->list = NULL;
+
+		/* Clear old Nr field + poll bit */
+		tx_skb->data[1] &= 0x0f;
+
+		/*  Set poll/final bit */
+		tx_skb->data[1] |= PF_BIT; /* Set p/f bit */
+
+		irlap_send_i_frame(self, tx_skb, command);
+	}
+}
+
+/*
+ * Function irlap_send_ui_frame (self, skb, command)
+ *
+ *    Contruct and transmit an Unnumbered Information (UI) frame
+ *
+ */
+void irlap_send_ui_frame(struct irlap_cb *self, struct sk_buff *skb,
+			 __u8 caddr, int command)
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+	IRDA_ASSERT(skb != NULL, return;);
+
+	/* Insert connection address */
+	skb->data[0] = caddr | ((command) ? CMD_FRAME : 0);
+
+	irlap_queue_xmit(self, skb);
+}
+
+/*
+ * Function irlap_send_i_frame (skb)
+ *
+ *    Contruct and transmit Information (I) frame
+ */
+static void irlap_send_i_frame(struct irlap_cb *self, struct sk_buff *skb,
+			       int command)
+{
+	/* Insert connection address */
+	skb->data[0] = self->caddr;
+	skb->data[0] |= (command) ? CMD_FRAME : 0;
+
+	/* Insert next to receive (Vr) */
+	skb->data[1] |= (self->vr << 5);  /* insert nr */
+
+	irlap_queue_xmit(self, skb);
+}
+
+/*
+ * Function irlap_recv_i_frame (skb, frame)
+ *
+ *    Receive and parse an I (Information) frame, no harm in making it inline
+ *    since it's called only from one single place (irlap_driver_rcv).
+ */
+static inline void irlap_recv_i_frame(struct irlap_cb *self,
+				      struct sk_buff *skb,
+				      struct irlap_info *info, int command)
+{
+	info->nr = skb->data[1] >> 5;          /* Next to receive */
+	info->pf = skb->data[1] & PF_BIT;      /* Final bit */
+	info->ns = (skb->data[1] >> 1) & 0x07; /* Next to send */
+
+	/* Check if this is a command or a response frame */
+	if (command)
+		irlap_do_event(self, RECV_I_CMD, skb, info);
+	else
+		irlap_do_event(self, RECV_I_RSP, skb, info);
+}
+
+/*
+ * Function irlap_recv_ui_frame (self, skb, info)
+ *
+ *    Receive and parse an Unnumbered Information (UI) frame
+ *
+ */
+static void irlap_recv_ui_frame(struct irlap_cb *self, struct sk_buff *skb,
+				struct irlap_info *info)
+{
+	IRDA_DEBUG( 4, "%s()\n", __FUNCTION__);
+
+	info->pf = skb->data[1] & PF_BIT;      /* Final bit */
+
+	irlap_do_event(self, RECV_UI_FRAME, skb, info);
+}
+
+/*
+ * Function irlap_recv_frmr_frame (skb, frame)
+ *
+ *    Received Frame Reject response.
+ *
+ */
+static void irlap_recv_frmr_frame(struct irlap_cb *self, struct sk_buff *skb,
+				  struct irlap_info *info)
+{
+	__u8 *frame;
+	int w, x, y, z;
+
+	IRDA_DEBUG(0, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+	IRDA_ASSERT(skb != NULL, return;);
+	IRDA_ASSERT(info != NULL, return;);
+
+	if (!pskb_may_pull(skb, 4)) {
+		IRDA_ERROR("%s: frame to short!\n", __FUNCTION__);
+		return;
+	}
+
+	frame = skb->data;
+
+	info->nr = frame[2] >> 5;          /* Next to receive */
+	info->pf = frame[2] & PF_BIT;      /* Final bit */
+	info->ns = (frame[2] >> 1) & 0x07; /* Next to send */
+
+	w = frame[3] & 0x01;
+	x = frame[3] & 0x02;
+	y = frame[3] & 0x04;
+	z = frame[3] & 0x08;
+
+	if (w) {
+		IRDA_DEBUG(0, "Rejected control field is undefined or not "
+		      "implemented.\n");
+	}
+	if (x) {
+		IRDA_DEBUG(0, "Rejected control field was invalid because it "
+		      "contained a non permitted I field.\n");
+	}
+	if (y) {
+		IRDA_DEBUG(0, "Received I field exceeded the maximum negotiated "
+		      "for the existing connection or exceeded the maximum "
+		      "this station supports if no connection exists.\n");
+	}
+	if (z) {
+		IRDA_DEBUG(0, "Rejected control field control field contained an "
+		      "invalid Nr count.\n");
+	}
+	irlap_do_event(self, RECV_FRMR_RSP, skb, info);
+}
+
+/*
+ * Function irlap_send_test_frame (self, daddr)
+ *
+ *    Send a test frame response
+ *
+ */
+void irlap_send_test_frame(struct irlap_cb *self, __u8 caddr, __u32 daddr,
+			   struct sk_buff *cmd)
+{
+	struct sk_buff *tx_skb;
+	struct test_frame *frame;
+	__u8 *info;
+
+	tx_skb = dev_alloc_skb(cmd->len+sizeof(struct test_frame));
+	if (!tx_skb)
+		return;
+
+	/* Broadcast frames must include saddr and daddr fields */
+	if (caddr == CBROADCAST) {
+		frame = (struct test_frame *)
+			skb_put(tx_skb, sizeof(struct test_frame));
+
+		/* Insert the swapped addresses */
+		frame->saddr = cpu_to_le32(self->saddr);
+		frame->daddr = cpu_to_le32(daddr);
+	} else
+		frame = (struct test_frame *) skb_put(tx_skb, LAP_ADDR_HEADER + LAP_CTRL_HEADER);
+
+	frame->caddr = caddr;
+	frame->control = TEST_RSP | PF_BIT;
+
+	/* Copy info */
+	info = skb_put(tx_skb, cmd->len);
+	memcpy(info, cmd->data, cmd->len);
+
+	/* Return to sender */
+	irlap_wait_min_turn_around(self, &self->qos_tx);
+	irlap_queue_xmit(self, tx_skb);
+}
+
+/*
+ * Function irlap_recv_test_frame (self, skb)
+ *
+ *    Receive a test frame
+ *
+ */
+static void irlap_recv_test_frame(struct irlap_cb *self, struct sk_buff *skb,
+				  struct irlap_info *info, int command)
+{
+	struct test_frame *frame;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
+
+	if (!pskb_may_pull(skb, sizeof(*frame))) {
+		IRDA_ERROR("%s: frame to short!\n", __FUNCTION__);
+		return;
+	}
+	frame = (struct test_frame *) skb->data;
+
+	/* Broadcast frames must carry saddr and daddr fields */
+	if (info->caddr == CBROADCAST) {
+		if (skb->len < sizeof(struct test_frame)) {
+			IRDA_DEBUG(0, "%s() test frame to short!\n",
+				   __FUNCTION__);
+			return;
+		}
+
+		/* Read and swap addresses */
+		info->daddr = le32_to_cpu(frame->saddr);
+		info->saddr = le32_to_cpu(frame->daddr);
+
+		/* Make sure frame is addressed to us */
+		if ((info->saddr != self->saddr) &&
+		    (info->saddr != BROADCAST)) {
+			return;
+		}
+	}
+
+	if (command)
+		irlap_do_event(self, RECV_TEST_CMD, skb, info);
+	else
+		irlap_do_event(self, RECV_TEST_RSP, skb, info);
+}
+
+/*
+ * Function irlap_driver_rcv (skb, netdev, ptype)
+ *
+ *    Called when a frame is received. Dispatches the right receive function
+ *    for processing of the frame.
+ *
+ * Note on skb management :
+ * After calling the higher layers of the IrDA stack, we always
+ * kfree() the skb, which drop the reference count (and potentially
+ * destroy it).
+ * If a higher layer of the stack want to keep the skb around (to put
+ * in a queue or pass it to the higher layer), it will need to use
+ * skb_get() to keep a reference on it. This is usually done at the
+ * LMP level in irlmp.c.
+ * Jean II
+ */
+int irlap_driver_rcv(struct sk_buff *skb, struct net_device *dev,
+		     struct packet_type *ptype)
+{
+	struct irlap_info info;
+	struct irlap_cb *self;
+	int command;
+	__u8 control;
+
+	/* FIXME: should we get our own field? */
+	self = (struct irlap_cb *) dev->atalk_ptr;
+
+	/* If the net device is down, then IrLAP is gone! */
+	if (!self || self->magic != LAP_MAGIC) {
+		dev_kfree_skb(skb);
+		return -1;
+	}
+
+	/* We are no longer an "old" protocol, so we need to handle
+	 * share and non linear skbs. This should never happen, so
+	 * we don't need to be clever about it. Jean II */
+	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
+		IRDA_ERROR("%s: can't clone shared skb!\n", __FUNCTION__);
+		dev_kfree_skb(skb);
+		return -1;
+	}
+
+	/* Check if frame is large enough for parsing */
+	if (!pskb_may_pull(skb, 2)) {
+		IRDA_ERROR("%s: frame to short!\n", __FUNCTION__);
+		dev_kfree_skb(skb);
+		return -1;
+	}
+
+	command    = skb->data[0] & CMD_FRAME;
+	info.caddr = skb->data[0] & CBROADCAST;
+
+	info.pf      = skb->data[1] &  PF_BIT;
+	info.control = skb->data[1] & ~PF_BIT; /* Mask away poll/final bit */
+
+	control = info.control;
+
+	/*  First we check if this frame has a valid connection address */
+	if ((info.caddr != self->caddr) && (info.caddr != CBROADCAST)) {
+		IRDA_DEBUG(0, "%s(), wrong connection address!\n",
+			   __FUNCTION__);
+		goto out;
+	}
+	/*
+	 *  Optimize for the common case and check if the frame is an
+	 *  I(nformation) frame. Only I-frames have bit 0 set to 0
+	 */
+	if (~control & 0x01) {
+		irlap_recv_i_frame(self, skb, &info, command);
+		goto out;
+	}
+	/*
+	 *  We now check is the frame is an S(upervisory) frame. Only
+	 *  S-frames have bit 0 set to 1 and bit 1 set to 0
+	 */
+	if (~control & 0x02) {
+		/*
+		 *  Received S(upervisory) frame, check which frame type it is
+		 *  only the first nibble is of interest
+		 */
+		switch (control & 0x0f) {
+		case RR:
+			irlap_recv_rr_frame(self, skb, &info, command);
+			break;
+		case RNR:
+			irlap_recv_rnr_frame(self, skb, &info, command);
+			break;
+		case REJ:
+			irlap_recv_rej_frame(self, skb, &info, command);
+			break;
+		case SREJ:
+			irlap_recv_srej_frame(self, skb, &info, command);
+			break;
+		default:
+			IRDA_WARNING("%s: Unknown S-frame %02x received!\n",
+				__FUNCTION__, info.control);
+			break;
+		}
+		goto out;
+	}
+	/*
+	 *  This must be a C(ontrol) frame
+	 */
+	switch (control) {
+	case XID_RSP:
+		irlap_recv_discovery_xid_rsp(self, skb, &info);
+		break;
+	case XID_CMD:
+		irlap_recv_discovery_xid_cmd(self, skb, &info);
+		break;
+	case SNRM_CMD:
+		irlap_recv_snrm_cmd(self, skb, &info);
+		break;
+	case DM_RSP:
+		irlap_do_event(self, RECV_DM_RSP, skb, &info);
+		break;
+	case DISC_CMD: /* And RD_RSP since they have the same value */
+		irlap_recv_disc_frame(self, skb, &info, command);
+		break;
+	case TEST_CMD:
+		irlap_recv_test_frame(self, skb, &info, command);
+		break;
+	case UA_RSP:
+		irlap_recv_ua_frame(self, skb, &info);
+		break;
+	case FRMR_RSP:
+		irlap_recv_frmr_frame(self, skb, &info);
+		break;
+	case UI_FRAME:
+		irlap_recv_ui_frame(self, skb, &info);
+		break;
+	default:
+		IRDA_WARNING("%s: Unknown frame %02x received!\n",
+				__FUNCTION__, info.control);
+		break;
+	}
+out:
+	/* Always drop our reference on the skb */
+	dev_kfree_skb(skb);
+	return 0;
+}
diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c
new file mode 100644
index 00000000000..7a4a4d7fbe6
--- /dev/null
+++ b/net/irda/irlmp.c
@@ -0,0 +1,2041 @@
+/*********************************************************************
+ *
+ * Filename:      irlmp.c
+ * Version:       1.0
+ * Description:   IrDA Link Management Protocol (LMP) layer
+ * Status:        Stable.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Sun Aug 17 20:54:32 1997
+ * Modified at:   Wed Jan  5 11:26:03 2000
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ *
+ *     Copyright (c) 1998-2000 Dag Brattli <dagb@cs.uit.no>,
+ *     All Rights Reserved.
+ *     Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of
+ *     the License, or (at your option) any later version.
+ *
+ *     Neither Dag Brattli nor University of Troms� admit liability nor
+ *     provide warranty for any of this software. This material is
+ *     provided "AS-IS" and at no charge.
+ *
+ ********************************************************************/
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/types.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/kmod.h>
+#include <linux/random.h>
+#include <linux/seq_file.h>
+
+#include <net/irda/irda.h>
+#include <net/irda/timer.h>
+#include <net/irda/qos.h>
+#include <net/irda/irlap.h>
+#include <net/irda/iriap.h>
+#include <net/irda/irlmp.h>
+#include <net/irda/irlmp_frame.h>
+
+static __u8 irlmp_find_free_slsap(void);
+static int irlmp_slsap_inuse(__u8 slsap_sel);
+
+/* Master structure */
+struct irlmp_cb *irlmp = NULL;
+
+/* These can be altered by the sysctl interface */
+int  sysctl_discovery         = 0;
+int  sysctl_discovery_timeout = 3; /* 3 seconds by default */
+EXPORT_SYMBOL(sysctl_discovery_timeout);
+int  sysctl_discovery_slots   = 6; /* 6 slots by default */
+int  sysctl_lap_keepalive_time = LM_IDLE_TIMEOUT * 1000 / HZ;
+char sysctl_devname[65];
+
+const char *irlmp_reasons[] = {
+	"ERROR, NOT USED",
+	"LM_USER_REQUEST",
+	"LM_LAP_DISCONNECT",
+	"LM_CONNECT_FAILURE",
+	"LM_LAP_RESET",
+	"LM_INIT_DISCONNECT",
+	"ERROR, NOT USED",
+};
+EXPORT_SYMBOL(irlmp_reasons);
+
+/*
+ * Function irlmp_init (void)
+ *
+ *    Create (allocate) the main IrLMP structure
+ *
+ */
+int __init irlmp_init(void)
+{
+	IRDA_DEBUG(1, "%s()\n", __FUNCTION__);
+	/* Initialize the irlmp structure. */
+	irlmp = kmalloc( sizeof(struct irlmp_cb), GFP_KERNEL);
+	if (irlmp == NULL)
+		return -ENOMEM;
+	memset(irlmp, 0, sizeof(struct irlmp_cb));
+
+	irlmp->magic = LMP_MAGIC;
+
+	irlmp->clients = hashbin_new(HB_LOCK);
+	irlmp->services = hashbin_new(HB_LOCK);
+	irlmp->links = hashbin_new(HB_LOCK);
+	irlmp->unconnected_lsaps = hashbin_new(HB_LOCK);
+	irlmp->cachelog = hashbin_new(HB_NOLOCK);
+
+	if ((irlmp->clients == NULL) ||
+	    (irlmp->services == NULL) ||
+	    (irlmp->links == NULL) ||
+	    (irlmp->unconnected_lsaps == NULL) ||
+	    (irlmp->cachelog == NULL)) {
+		return -ENOMEM;
+	}
+
+	spin_lock_init(&irlmp->cachelog->hb_spinlock);
+
+	irlmp->last_lsap_sel = 0x0f; /* Reserved 0x00-0x0f */
+	strcpy(sysctl_devname, "Linux");
+
+	/* Do discovery every 3 seconds */
+	init_timer(&irlmp->discovery_timer);
+	irlmp_start_discovery_timer(irlmp, sysctl_discovery_timeout*HZ);
+
+	return 0;
+}
+
+/*
+ * Function irlmp_cleanup (void)
+ *
+ *    Remove IrLMP layer
+ *
+ */
+void __exit irlmp_cleanup(void) 
+{
+	/* Check for main structure */
+	IRDA_ASSERT(irlmp != NULL, return;);
+	IRDA_ASSERT(irlmp->magic == LMP_MAGIC, return;);
+
+	del_timer(&irlmp->discovery_timer);
+
+	hashbin_delete(irlmp->links, (FREE_FUNC) kfree);
+	hashbin_delete(irlmp->unconnected_lsaps, (FREE_FUNC) kfree);
+	hashbin_delete(irlmp->clients, (FREE_FUNC) kfree);
+	hashbin_delete(irlmp->services, (FREE_FUNC) kfree);
+	hashbin_delete(irlmp->cachelog, (FREE_FUNC) kfree);
+
+	/* De-allocate main structure */
+	kfree(irlmp);
+	irlmp = NULL;
+}
+
+/*
+ * Function irlmp_open_lsap (slsap, notify)
+ *
+ *   Register with IrLMP and create a local LSAP,
+ *   returns handle to LSAP.
+ */
+struct lsap_cb *irlmp_open_lsap(__u8 slsap_sel, notify_t *notify, __u8 pid)
+{
+	struct lsap_cb *self;
+
+	IRDA_ASSERT(notify != NULL, return NULL;);
+	IRDA_ASSERT(irlmp != NULL, return NULL;);
+	IRDA_ASSERT(irlmp->magic == LMP_MAGIC, return NULL;);
+	IRDA_ASSERT(notify->instance != NULL, return NULL;);
+
+	/*  Does the client care which Source LSAP selector it gets?  */
+	if (slsap_sel == LSAP_ANY) {
+		slsap_sel = irlmp_find_free_slsap();
+		if (!slsap_sel)
+			return NULL;
+	} else if (irlmp_slsap_inuse(slsap_sel))
+		return NULL;
+
+	/* Allocate new instance of a LSAP connection */
+	self = kmalloc(sizeof(struct lsap_cb), GFP_ATOMIC);
+	if (self == NULL) {
+		IRDA_ERROR("%s: can't allocate memory\n", __FUNCTION__);
+		return NULL;
+	}
+	memset(self, 0, sizeof(struct lsap_cb));
+
+	self->magic = LMP_LSAP_MAGIC;
+	self->slsap_sel = slsap_sel;
+
+	/* Fix connectionless LSAP's */
+	if (slsap_sel == LSAP_CONNLESS) {
+#ifdef CONFIG_IRDA_ULTRA
+		self->dlsap_sel = LSAP_CONNLESS;
+		self->pid = pid;
+#endif /* CONFIG_IRDA_ULTRA */
+	} else
+		self->dlsap_sel = LSAP_ANY;
+	/* self->connected = FALSE; -> already NULL via memset() */
+
+	init_timer(&self->watchdog_timer);
+
+	self->notify = *notify;
+
+	self->lsap_state = LSAP_DISCONNECTED;
+
+	/* Insert into queue of unconnected LSAPs */
+	hashbin_insert(irlmp->unconnected_lsaps, (irda_queue_t *) self,
+		       (long) self, NULL);
+
+	return self;
+}
+EXPORT_SYMBOL(irlmp_open_lsap);
+
+/*
+ * Function __irlmp_close_lsap (self)
+ *
+ *    Remove an instance of LSAP
+ */
+static void __irlmp_close_lsap(struct lsap_cb *self)
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;);
+
+	/*
+	 *  Set some of the variables to preset values
+	 */
+	self->magic = 0;
+	del_timer(&self->watchdog_timer); /* Important! */
+
+	if (self->conn_skb)
+		dev_kfree_skb(self->conn_skb);
+
+	kfree(self);
+}
+
+/*
+ * Function irlmp_close_lsap (self)
+ *
+ *    Close and remove LSAP
+ *
+ */
+void irlmp_close_lsap(struct lsap_cb *self)
+{
+	struct lap_cb *lap;
+	struct lsap_cb *lsap = NULL;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;);
+
+	/*
+	 *  Find out if we should remove this LSAP from a link or from the
+	 *  list of unconnected lsaps (not associated with a link)
+	 */
+	lap = self->lap;
+	if (lap) {
+		IRDA_ASSERT(lap->magic == LMP_LAP_MAGIC, return;);
+		/* We might close a LSAP before it has completed the
+		 * connection setup. In those case, higher layers won't
+		 * send a proper disconnect request. Harmless, except
+		 * that we will forget to close LAP... - Jean II */
+		if(self->lsap_state != LSAP_DISCONNECTED) {
+			self->lsap_state = LSAP_DISCONNECTED;
+			irlmp_do_lap_event(self->lap,
+					   LM_LAP_DISCONNECT_REQUEST, NULL);
+		}
+		/* Now, remove from the link */
+		lsap = hashbin_remove(lap->lsaps, (long) self, NULL);
+#ifdef CONFIG_IRDA_CACHE_LAST_LSAP
+		lap->cache.valid = FALSE;
+#endif
+	}
+	self->lap = NULL;
+	/* Check if we found the LSAP! If not then try the unconnected lsaps */
+	if (!lsap) {
+		lsap = hashbin_remove(irlmp->unconnected_lsaps, (long) self,
+				      NULL);
+	}
+	if (!lsap) {
+		IRDA_DEBUG(0,
+		     "%s(), Looks like somebody has removed me already!\n",
+			   __FUNCTION__);
+		return;
+	}
+	__irlmp_close_lsap(self);
+}
+EXPORT_SYMBOL(irlmp_close_lsap);
+
+/*
+ * Function irlmp_register_irlap (saddr, notify)
+ *
+ *    Register IrLAP layer with IrLMP. There is possible to have multiple
+ *    instances of the IrLAP layer, each connected to different IrDA ports
+ *
+ */
+void irlmp_register_link(struct irlap_cb *irlap, __u32 saddr, notify_t *notify)
+{
+	struct lap_cb *lap;
+
+	IRDA_ASSERT(irlmp != NULL, return;);
+	IRDA_ASSERT(irlmp->magic == LMP_MAGIC, return;);
+	IRDA_ASSERT(notify != NULL, return;);
+
+	/*
+	 *  Allocate new instance of a LSAP connection
+	 */
+	lap = kmalloc(sizeof(struct lap_cb), GFP_KERNEL);
+	if (lap == NULL) {
+		IRDA_ERROR("%s: unable to kmalloc\n", __FUNCTION__);
+		return;
+	}
+	memset(lap, 0, sizeof(struct lap_cb));
+
+	lap->irlap = irlap;
+	lap->magic = LMP_LAP_MAGIC;
+	lap->saddr = saddr;
+	lap->daddr = DEV_ADDR_ANY;
+#ifdef CONFIG_IRDA_CACHE_LAST_LSAP
+	lap->cache.valid = FALSE;
+#endif
+	lap->lsaps = hashbin_new(HB_LOCK);
+	if (lap->lsaps == NULL) {
+		IRDA_WARNING("%s(), unable to kmalloc lsaps\n", __FUNCTION__);
+		kfree(lap);
+		return;
+	}
+
+	lap->lap_state = LAP_STANDBY;
+
+	init_timer(&lap->idle_timer);
+
+	/*
+	 *  Insert into queue of LMP links
+	 */
+	hashbin_insert(irlmp->links, (irda_queue_t *) lap, lap->saddr, NULL);
+
+	/*
+	 *  We set only this variable so IrLAP can tell us on which link the
+	 *  different events happened on
+	 */
+	irda_notify_init(notify);
+	notify->instance = lap;
+}
+
+/*
+ * Function irlmp_unregister_irlap (saddr)
+ *
+ *    IrLAP layer has been removed!
+ *
+ */
+void irlmp_unregister_link(__u32 saddr)
+{
+	struct lap_cb *link;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	/* We must remove ourselves from the hashbin *first*. This ensure
+	 * that no more LSAPs will be open on this link and no discovery
+	 * will be triggered anymore. Jean II */
+	link = hashbin_remove(irlmp->links, saddr, NULL);
+	if (link) {
+		IRDA_ASSERT(link->magic == LMP_LAP_MAGIC, return;);
+
+		/* Kill all the LSAPs on this link. Jean II */
+		link->reason = LAP_DISC_INDICATION;
+		link->daddr = DEV_ADDR_ANY;
+		irlmp_do_lap_event(link, LM_LAP_DISCONNECT_INDICATION, NULL);
+
+		/* Remove all discoveries discovered at this link */
+		irlmp_expire_discoveries(irlmp->cachelog, link->saddr, TRUE);
+
+		/* Final cleanup */
+		del_timer(&link->idle_timer);
+		link->magic = 0;
+		kfree(link);
+	}
+}
+
+/*
+ * Function irlmp_connect_request (handle, dlsap, userdata)
+ *
+ *    Connect with a peer LSAP
+ *
+ */
+int irlmp_connect_request(struct lsap_cb *self, __u8 dlsap_sel,
+			  __u32 saddr, __u32 daddr,
+			  struct qos_info *qos, struct sk_buff *userdata)
+{
+	struct sk_buff *tx_skb = userdata;
+	struct lap_cb *lap;
+	struct lsap_cb *lsap;
+	int ret;
+
+	IRDA_ASSERT(self != NULL, return -EBADR;);
+	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -EBADR;);
+
+	IRDA_DEBUG(2,
+	      "%s(), slsap_sel=%02x, dlsap_sel=%02x, saddr=%08x, daddr=%08x\n",
+	      __FUNCTION__, self->slsap_sel, dlsap_sel, saddr, daddr);
+
+	if (test_bit(0, &self->connected)) {
+		ret = -EISCONN;
+		goto err;
+	}
+
+	/* Client must supply destination device address */
+	if (!daddr) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	/* Any userdata? */
+	if (tx_skb == NULL) {
+		tx_skb = dev_alloc_skb(64);
+		if (!tx_skb)
+			return -ENOMEM;
+
+		skb_reserve(tx_skb, LMP_MAX_HEADER);
+	}
+
+	/* Make room for MUX control header (3 bytes) */
+	IRDA_ASSERT(skb_headroom(tx_skb) >= LMP_CONTROL_HEADER, return -1;);
+	skb_push(tx_skb, LMP_CONTROL_HEADER);
+
+	self->dlsap_sel = dlsap_sel;
+
+	/*
+	 * Find the link to where we should try to connect since there may
+	 * be more than one IrDA port on this machine. If the client has
+	 * passed us the saddr (and already knows which link to use), then
+	 * we use that to find the link, if not then we have to look in the
+	 * discovery log and check if any of the links has discovered a
+	 * device with the given daddr
+	 */
+	if ((!saddr) || (saddr == DEV_ADDR_ANY)) {
+		discovery_t *discovery;
+		unsigned long flags;
+
+		spin_lock_irqsave(&irlmp->cachelog->hb_spinlock, flags);
+		if (daddr != DEV_ADDR_ANY)
+			discovery = hashbin_find(irlmp->cachelog, daddr, NULL);
+		else {
+			IRDA_DEBUG(2, "%s(), no daddr\n", __FUNCTION__);
+			discovery = (discovery_t *)
+				hashbin_get_first(irlmp->cachelog);
+		}
+
+		if (discovery) {
+			saddr = discovery->data.saddr;
+			daddr = discovery->data.daddr;
+		}
+		spin_unlock_irqrestore(&irlmp->cachelog->hb_spinlock, flags);
+	}
+	lap = hashbin_lock_find(irlmp->links, saddr, NULL);
+	if (lap == NULL) {
+		IRDA_DEBUG(1, "%s(), Unable to find a usable link!\n", __FUNCTION__);
+		ret = -EHOSTUNREACH;
+		goto err;
+	}
+
+	/* Check if LAP is disconnected or already connected */
+	if (lap->daddr == DEV_ADDR_ANY)
+		lap->daddr = daddr;
+	else if (lap->daddr != daddr) {
+		/* Check if some LSAPs are active on this LAP */
+		if (HASHBIN_GET_SIZE(lap->lsaps) == 0) {
+			/* No active connection, but LAP hasn't been
+			 * disconnected yet (waiting for timeout in LAP).
+			 * Maybe we could give LAP a bit of help in this case.
+			 */
+			IRDA_DEBUG(0, "%s(), sorry, but I'm waiting for LAP to timeout!\n", __FUNCTION__);
+			ret = -EAGAIN;
+			goto err;
+		}
+
+		/* LAP is already connected to a different node, and LAP
+		 * can only talk to one node at a time */
+		IRDA_DEBUG(0, "%s(), sorry, but link is busy!\n", __FUNCTION__);
+		ret = -EBUSY;
+		goto err;
+	}
+
+	self->lap = lap;
+
+	/*
+	 *  Remove LSAP from list of unconnected LSAPs and insert it into the
+	 *  list of connected LSAPs for the particular link
+	 */
+	lsap = hashbin_remove(irlmp->unconnected_lsaps, (long) self, NULL);
+
+	IRDA_ASSERT(lsap != NULL, return -1;);
+	IRDA_ASSERT(lsap->magic == LMP_LSAP_MAGIC, return -1;);
+	IRDA_ASSERT(lsap->lap != NULL, return -1;);
+	IRDA_ASSERT(lsap->lap->magic == LMP_LAP_MAGIC, return -1;);
+
+	hashbin_insert(self->lap->lsaps, (irda_queue_t *) self, (long) self,
+		       NULL);
+
+	set_bit(0, &self->connected);	/* TRUE */
+
+	/*
+	 *  User supplied qos specifications?
+	 */
+	if (qos)
+		self->qos = *qos;
+
+	irlmp_do_lsap_event(self, LM_CONNECT_REQUEST, tx_skb);
+
+	/* Drop reference count - see irlap_data_request(). */
+	dev_kfree_skb(tx_skb);
+
+	return 0;
+
+err:
+	/* Cleanup */
+	if(tx_skb)
+		dev_kfree_skb(tx_skb);
+	return ret;
+}
+EXPORT_SYMBOL(irlmp_connect_request);
+
+/*
+ * Function irlmp_connect_indication (self)
+ *
+ *    Incoming connection
+ *
+ */
+void irlmp_connect_indication(struct lsap_cb *self, struct sk_buff *skb)
+{
+	int max_seg_size;
+	int lap_header_size;
+	int max_header_size;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;);
+	IRDA_ASSERT(skb != NULL, return;);
+	IRDA_ASSERT(self->lap != NULL, return;);
+
+	IRDA_DEBUG(2, "%s(), slsap_sel=%02x, dlsap_sel=%02x\n",
+		   __FUNCTION__, self->slsap_sel, self->dlsap_sel);
+
+	/* Note : self->lap is set in irlmp_link_data_indication(),
+	 * (case CONNECT_CMD:) because we have no way to set it here.
+	 * Similarly, self->dlsap_sel is usually set in irlmp_find_lsap().
+	 * Jean II */
+
+	self->qos = *self->lap->qos;
+
+	max_seg_size = self->lap->qos->data_size.value-LMP_HEADER;
+	lap_header_size = IRLAP_GET_HEADER_SIZE(self->lap->irlap);
+	max_header_size = LMP_HEADER + lap_header_size;
+
+	/* Hide LMP_CONTROL_HEADER header from layer above */
+	skb_pull(skb, LMP_CONTROL_HEADER);
+
+	if (self->notify.connect_indication) {
+		/* Don't forget to refcount it - see irlap_driver_rcv(). */
+		skb_get(skb);
+		self->notify.connect_indication(self->notify.instance, self,
+						&self->qos, max_seg_size,
+						max_header_size, skb);
+	}
+}
+
+/*
+ * Function irlmp_connect_response (handle, userdata)
+ *
+ *    Service user is accepting connection
+ *
+ */
+int irlmp_connect_response(struct lsap_cb *self, struct sk_buff *userdata)
+{
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -1;);
+	IRDA_ASSERT(userdata != NULL, return -1;);
+
+	/* We set the connected bit and move the lsap to the connected list
+	 * in the state machine itself. Jean II */
+
+	IRDA_DEBUG(2, "%s(), slsap_sel=%02x, dlsap_sel=%02x\n",
+		   __FUNCTION__, self->slsap_sel, self->dlsap_sel);
+
+	/* Make room for MUX control header (3 bytes) */
+	IRDA_ASSERT(skb_headroom(userdata) >= LMP_CONTROL_HEADER, return -1;);
+	skb_push(userdata, LMP_CONTROL_HEADER);
+
+	irlmp_do_lsap_event(self, LM_CONNECT_RESPONSE, userdata);
+
+	/* Drop reference count - see irlap_data_request(). */
+	dev_kfree_skb(userdata);
+
+	return 0;
+}
+EXPORT_SYMBOL(irlmp_connect_response);
+
+/*
+ * Function irlmp_connect_confirm (handle, skb)
+ *
+ *    LSAP connection confirmed peer device!
+ */
+void irlmp_connect_confirm(struct lsap_cb *self, struct sk_buff *skb)
+{
+	int max_header_size;
+	int lap_header_size;
+	int max_seg_size;
+
+	IRDA_DEBUG(3, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(skb != NULL, return;);
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;);
+	IRDA_ASSERT(self->lap != NULL, return;);
+
+	self->qos = *self->lap->qos;
+
+	max_seg_size    = self->lap->qos->data_size.value-LMP_HEADER;
+	lap_header_size = IRLAP_GET_HEADER_SIZE(self->lap->irlap);
+	max_header_size = LMP_HEADER + lap_header_size;
+
+	IRDA_DEBUG(2, "%s(), max_header_size=%d\n",
+		   __FUNCTION__, max_header_size);
+
+	/* Hide LMP_CONTROL_HEADER header from layer above */
+	skb_pull(skb, LMP_CONTROL_HEADER);
+
+	if (self->notify.connect_confirm) {
+		/* Don't forget to refcount it - see irlap_driver_rcv() */
+		skb_get(skb);
+		self->notify.connect_confirm(self->notify.instance, self,
+					     &self->qos, max_seg_size,
+					     max_header_size, skb);
+	}
+}
+
+/*
+ * Function irlmp_dup (orig, instance)
+ *
+ *    Duplicate LSAP, can be used by servers to confirm a connection on a
+ *    new LSAP so it can keep listening on the old one.
+ *
+ */
+struct lsap_cb *irlmp_dup(struct lsap_cb *orig, void *instance)
+{
+	struct lsap_cb *new;
+	unsigned long flags;
+
+	IRDA_DEBUG(1, "%s()\n", __FUNCTION__);
+
+	spin_lock_irqsave(&irlmp->unconnected_lsaps->hb_spinlock, flags);
+
+	/* Only allowed to duplicate unconnected LSAP's, and only LSAPs
+	 * that have received a connect indication. Jean II */
+	if ((!hashbin_find(irlmp->unconnected_lsaps, (long) orig, NULL)) ||
+	    (orig->lap == NULL)) {
+		IRDA_DEBUG(0, "%s(), invalid LSAP (wrong state)\n",
+			   __FUNCTION__);
+		spin_unlock_irqrestore(&irlmp->unconnected_lsaps->hb_spinlock,
+				       flags);
+		return NULL;
+	}
+
+	/* Allocate a new instance */
+	new = kmalloc(sizeof(struct lsap_cb), GFP_ATOMIC);
+	if (!new)  {
+		IRDA_DEBUG(0, "%s(), unable to kmalloc\n", __FUNCTION__);
+		spin_unlock_irqrestore(&irlmp->unconnected_lsaps->hb_spinlock,
+				       flags);
+		return NULL;
+	}
+	/* Dup */
+	memcpy(new, orig, sizeof(struct lsap_cb));
+	/* new->lap = orig->lap; => done in the memcpy() */
+	/* new->slsap_sel = orig->slsap_sel; => done in the memcpy() */
+	new->conn_skb = NULL;
+
+	spin_unlock_irqrestore(&irlmp->unconnected_lsaps->hb_spinlock, flags);
+
+	/* Not everything is the same */
+	new->notify.instance = instance;
+
+	init_timer(&new->watchdog_timer);
+
+	hashbin_insert(irlmp->unconnected_lsaps, (irda_queue_t *) new,
+		       (long) new, NULL);
+
+#ifdef CONFIG_IRDA_CACHE_LAST_LSAP
+	/* Make sure that we invalidate the LSAP cache */
+	new->lap->cache.valid = FALSE;
+#endif /* CONFIG_IRDA_CACHE_LAST_LSAP */
+
+	return new;
+}
+EXPORT_SYMBOL(irlmp_dup);
+
+/*
+ * Function irlmp_disconnect_request (handle, userdata)
+ *
+ *    The service user is requesting disconnection, this will not remove the
+ *    LSAP, but only mark it as disconnected
+ */
+int irlmp_disconnect_request(struct lsap_cb *self, struct sk_buff *userdata)
+{
+	struct lsap_cb *lsap;
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -1;);
+	IRDA_ASSERT(userdata != NULL, return -1;);
+
+	/* Already disconnected ?
+	 * There is a race condition between irlmp_disconnect_indication()
+	 * and us that might mess up the hashbins below. This fixes it.
+	 * Jean II */
+	if (! test_and_clear_bit(0, &self->connected)) {
+		IRDA_DEBUG(0, "%s(), already disconnected!\n", __FUNCTION__);
+		dev_kfree_skb(userdata);
+		return -1;
+	}
+
+	skb_push(userdata, LMP_CONTROL_HEADER);
+
+	/*
+	 *  Do the event before the other stuff since we must know
+	 *  which lap layer that the frame should be transmitted on
+	 */
+	irlmp_do_lsap_event(self, LM_DISCONNECT_REQUEST, userdata);
+
+	/* Drop reference count - see irlap_data_request(). */
+	dev_kfree_skb(userdata);
+
+	/*
+	 *  Remove LSAP from list of connected LSAPs for the particular link
+	 *  and insert it into the list of unconnected LSAPs
+	 */
+	IRDA_ASSERT(self->lap != NULL, return -1;);
+	IRDA_ASSERT(self->lap->magic == LMP_LAP_MAGIC, return -1;);
+	IRDA_ASSERT(self->lap->lsaps != NULL, return -1;);
+
+	lsap = hashbin_remove(self->lap->lsaps, (long) self, NULL);
+#ifdef CONFIG_IRDA_CACHE_LAST_LSAP
+	self->lap->cache.valid = FALSE;
+#endif
+
+	IRDA_ASSERT(lsap != NULL, return -1;);
+	IRDA_ASSERT(lsap->magic == LMP_LSAP_MAGIC, return -1;);
+	IRDA_ASSERT(lsap == self, return -1;);
+
+	hashbin_insert(irlmp->unconnected_lsaps, (irda_queue_t *) self,
+		       (long) self, NULL);
+
+	/* Reset some values */
+	self->dlsap_sel = LSAP_ANY;
+	self->lap = NULL;
+
+	return 0;
+}
+EXPORT_SYMBOL(irlmp_disconnect_request);
+
+/*
+ * Function irlmp_disconnect_indication (reason, userdata)
+ *
+ *    LSAP is being closed!
+ */
+void irlmp_disconnect_indication(struct lsap_cb *self, LM_REASON reason,
+				 struct sk_buff *skb)
+{
+	struct lsap_cb *lsap;
+
+	IRDA_DEBUG(1, "%s(), reason=%s\n", __FUNCTION__, irlmp_reasons[reason]);
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;);
+
+	IRDA_DEBUG(3, "%s(), slsap_sel=%02x, dlsap_sel=%02x\n",
+		   __FUNCTION__, self->slsap_sel, self->dlsap_sel);
+
+	/* Already disconnected ?
+	 * There is a race condition between irlmp_disconnect_request()
+	 * and us that might mess up the hashbins below. This fixes it.
+	 * Jean II */
+	if (! test_and_clear_bit(0, &self->connected)) {
+		IRDA_DEBUG(0, "%s(), already disconnected!\n", __FUNCTION__);
+		return;
+	}
+
+	/*
+	 *  Remove association between this LSAP and the link it used
+	 */
+	IRDA_ASSERT(self->lap != NULL, return;);
+	IRDA_ASSERT(self->lap->lsaps != NULL, return;);
+
+	lsap = hashbin_remove(self->lap->lsaps, (long) self, NULL);
+#ifdef CONFIG_IRDA_CACHE_LAST_LSAP
+	self->lap->cache.valid = FALSE;
+#endif
+
+	IRDA_ASSERT(lsap != NULL, return;);
+	IRDA_ASSERT(lsap == self, return;);
+	hashbin_insert(irlmp->unconnected_lsaps, (irda_queue_t *) lsap,
+		       (long) lsap, NULL);
+
+	self->dlsap_sel = LSAP_ANY;
+	self->lap = NULL;
+
+	/*
+	 *  Inform service user
+	 */
+	if (self->notify.disconnect_indication) {
+		/* Don't forget to refcount it - see irlap_driver_rcv(). */
+		if(skb)
+			skb_get(skb);
+		self->notify.disconnect_indication(self->notify.instance,
+						   self, reason, skb);
+	} else {
+		IRDA_DEBUG(0, "%s(), no handler\n", __FUNCTION__);
+	}
+}
+
+/*
+ * Function irlmp_do_expiry (void)
+ *
+ *    Do a cleanup of the discovery log (remove old entries)
+ *
+ * Note : separate from irlmp_do_discovery() so that we can handle
+ * passive discovery properly.
+ */
+void irlmp_do_expiry(void)
+{
+	struct lap_cb *lap;
+
+	/*
+	 * Expire discovery on all links which are *not* connected.
+	 * On links which are connected, we can't do discovery
+	 * anymore and can't refresh the log, so we freeze the
+	 * discovery log to keep info about the device we are
+	 * connected to.
+	 * This info is mandatory if we want irlmp_connect_request()
+	 * to work properly. - Jean II
+	 */
+	lap = (struct lap_cb *) hashbin_get_first(irlmp->links);
+	while (lap != NULL) {
+		IRDA_ASSERT(lap->magic == LMP_LAP_MAGIC, return;);
+
+		if (lap->lap_state == LAP_STANDBY) {
+			/* Expire discoveries discovered on this link */
+			irlmp_expire_discoveries(irlmp->cachelog, lap->saddr,
+						 FALSE);
+		}
+		lap = (struct lap_cb *) hashbin_get_next(irlmp->links);
+	}
+}
+
+/*
+ * Function irlmp_do_discovery (nslots)
+ *
+ *    Do some discovery on all links
+ *
+ * Note : log expiry is done above.
+ */
+void irlmp_do_discovery(int nslots)
+{
+	struct lap_cb *lap;
+
+	/* Make sure the value is sane */
+	if ((nslots != 1) && (nslots != 6) && (nslots != 8) && (nslots != 16)){
+		IRDA_WARNING("%s: invalid value for number of slots!\n",
+			     __FUNCTION__);
+		nslots = sysctl_discovery_slots = 8;
+	}
+
+	/* Construct new discovery info to be used by IrLAP, */
+	u16ho(irlmp->discovery_cmd.data.hints) = irlmp->hints.word;
+
+	/*
+	 *  Set character set for device name (we use ASCII), and
+	 *  copy device name. Remember to make room for a \0 at the
+	 *  end
+	 */
+	irlmp->discovery_cmd.data.charset = CS_ASCII;
+	strncpy(irlmp->discovery_cmd.data.info, sysctl_devname,
+		NICKNAME_MAX_LEN);
+	irlmp->discovery_cmd.name_len = strlen(irlmp->discovery_cmd.data.info);
+	irlmp->discovery_cmd.nslots = nslots;
+
+	/*
+	 * Try to send discovery packets on all links
+	 */
+	lap = (struct lap_cb *) hashbin_get_first(irlmp->links);
+	while (lap != NULL) {
+		IRDA_ASSERT(lap->magic == LMP_LAP_MAGIC, return;);
+
+		if (lap->lap_state == LAP_STANDBY) {
+			/* Try to discover */
+			irlmp_do_lap_event(lap, LM_LAP_DISCOVERY_REQUEST,
+					   NULL);
+		}
+		lap = (struct lap_cb *) hashbin_get_next(irlmp->links);
+	}
+}
+
+/*
+ * Function irlmp_discovery_request (nslots)
+ *
+ *    Do a discovery of devices in front of the computer
+ *
+ * If the caller has registered a client discovery callback, this
+ * allow him to receive the full content of the discovery log through
+ * this callback (as normally he will receive only new discoveries).
+ */
+void irlmp_discovery_request(int nslots)
+{
+	/* Return current cached discovery log (in full) */
+	irlmp_discovery_confirm(irlmp->cachelog, DISCOVERY_LOG);
+
+	/*
+	 * Start a single discovery operation if discovery is not already
+         * running
+	 */
+	if (!sysctl_discovery) {
+		/* Check if user wants to override the default */
+		if (nslots == DISCOVERY_DEFAULT_SLOTS)
+			nslots = sysctl_discovery_slots;
+
+		irlmp_do_discovery(nslots);
+		/* Note : we never do expiry here. Expiry will run on the
+		 * discovery timer regardless of the state of sysctl_discovery
+		 * Jean II */
+	}
+}
+EXPORT_SYMBOL(irlmp_discovery_request);
+
+/*
+ * Function irlmp_get_discoveries (pn, mask, slots)
+ *
+ *    Return the current discovery log
+ *
+ * If discovery is not enabled, you should call this function again
+ * after 1 or 2 seconds (i.e. after discovery has been done).
+ */
+struct irda_device_info *irlmp_get_discoveries(int *pn, __u16 mask, int nslots)
+{
+	/* If discovery is not enabled, it's likely that the discovery log
+	 * will be empty. So, we trigger a single discovery, so that next
+	 * time the user call us there might be some results in the log.
+	 * Jean II
+	 */
+	if (!sysctl_discovery) {
+		/* Check if user wants to override the default */
+		if (nslots == DISCOVERY_DEFAULT_SLOTS)
+			nslots = sysctl_discovery_slots;
+
+		/* Start discovery - will complete sometime later */
+		irlmp_do_discovery(nslots);
+		/* Note : we never do expiry here. Expiry will run on the
+		 * discovery timer regardless of the state of sysctl_discovery
+		 * Jean II */
+	}
+
+	/* Return current cached discovery log */
+	return(irlmp_copy_discoveries(irlmp->cachelog, pn, mask, TRUE));
+}
+EXPORT_SYMBOL(irlmp_get_discoveries);
+
+/*
+ * Function irlmp_notify_client (log)
+ *
+ *    Notify all about discovered devices
+ *
+ * Clients registered with IrLMP are :
+ *	o IrComm
+ *	o IrLAN
+ *	o Any socket (in any state - ouch, that may be a lot !)
+ * The client may have defined a callback to be notified in case of
+ * partial/selective discovery based on the hints that it passed to IrLMP.
+ */
+static inline void
+irlmp_notify_client(irlmp_client_t *client,
+		    hashbin_t *log, DISCOVERY_MODE mode)
+{
+	discinfo_t *discoveries;	/* Copy of the discovery log */
+	int	number;			/* Number of nodes in the log */
+	int	i;
+
+	IRDA_DEBUG(3, "%s()\n", __FUNCTION__);
+
+	/* Check if client wants or not partial/selective log (optimisation) */
+	if (!client->disco_callback)
+		return;
+
+	/*
+	 * Locking notes :
+	 * the old code was manipulating the log directly, which was
+	 * very racy. Now, we use copy_discoveries, that protects
+	 * itself while dumping the log for us.
+	 * The overhead of the copy is compensated by the fact that
+	 * we only pass new discoveries in normal mode and don't
+	 * pass the same old entry every 3s to the caller as we used
+	 * to do (virtual function calling is expensive).
+	 * Jean II
+	 */
+
+	/*
+	 * Now, check all discovered devices (if any), and notify client
+	 * only about the services that the client is interested in
+	 * We also notify only about the new devices unless the caller
+	 * explicitly request a dump of the log. Jean II
+	 */
+	discoveries = irlmp_copy_discoveries(log, &number,
+					     client->hint_mask.word,
+					     (mode == DISCOVERY_LOG));
+	/* Check if the we got some results */
+	if (discoveries == NULL)
+		return;	/* No nodes discovered */
+
+	/* Pass all entries to the listener */
+	for(i = 0; i < number; i++)
+		client->disco_callback(&(discoveries[i]), mode, client->priv);
+
+	/* Free up our buffer */
+	kfree(discoveries);
+}
+
+/*
+ * Function irlmp_discovery_confirm ( self, log)
+ *
+ *    Some device(s) answered to our discovery request! Check to see which
+ *    device it is, and give indication to the client(s)
+ *
+ */
+void irlmp_discovery_confirm(hashbin_t *log, DISCOVERY_MODE mode)
+{
+	irlmp_client_t *client;
+	irlmp_client_t *client_next;
+
+	IRDA_DEBUG(3, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(log != NULL, return;);
+
+	if (!(HASHBIN_GET_SIZE(log)))
+		return;
+
+	/* For each client - notify callback may touch client list */
+	client = (irlmp_client_t *) hashbin_get_first(irlmp->clients);
+	while (NULL != hashbin_find_next(irlmp->clients, (long) client, NULL,
+					 (void *) &client_next) ) {
+		/* Check if we should notify client */
+		irlmp_notify_client(client, log, mode);
+
+		client = client_next;
+	}
+}
+
+/*
+ * Function irlmp_discovery_expiry (expiry)
+ *
+ *	This device is no longer been discovered, and therefore it is being
+ *	purged from the discovery log. Inform all clients who have
+ *	registered for this event...
+ *
+ *	Note : called exclusively from discovery.c
+ *	Note : this is no longer called under discovery spinlock, so the
+ *		client can do whatever he wants in the callback.
+ */
+void irlmp_discovery_expiry(discinfo_t *expiries, int number)
+{
+	irlmp_client_t *client;
+	irlmp_client_t *client_next;
+	int		i;
+
+	IRDA_DEBUG(3, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(expiries != NULL, return;);
+
+	/* For each client - notify callback may touch client list */
+	client = (irlmp_client_t *) hashbin_get_first(irlmp->clients);
+	while (NULL != hashbin_find_next(irlmp->clients, (long) client, NULL,
+					 (void *) &client_next) ) {
+
+		/* Pass all entries to the listener */
+		for(i = 0; i < number; i++) {
+			/* Check if we should notify client */
+			if ((client->expir_callback) &&
+			    (client->hint_mask.word & u16ho(expiries[i].hints)
+			     & 0x7f7f) )
+				client->expir_callback(&(expiries[i]),
+						       EXPIRY_TIMEOUT,
+						       client->priv);
+		}
+
+		/* Next client */
+		client = client_next;
+	}
+}
+
+/*
+ * Function irlmp_get_discovery_response ()
+ *
+ *    Used by IrLAP to get the discovery info it needs when answering
+ *    discovery requests by other devices.
+ */
+discovery_t *irlmp_get_discovery_response(void)
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(irlmp != NULL, return NULL;);
+
+	u16ho(irlmp->discovery_rsp.data.hints) = irlmp->hints.word;
+
+	/*
+	 *  Set character set for device name (we use ASCII), and
+	 *  copy device name. Remember to make room for a \0 at the
+	 *  end
+	 */
+	irlmp->discovery_rsp.data.charset = CS_ASCII;
+
+	strncpy(irlmp->discovery_rsp.data.info, sysctl_devname,
+		NICKNAME_MAX_LEN);
+	irlmp->discovery_rsp.name_len = strlen(irlmp->discovery_rsp.data.info);
+
+	return &irlmp->discovery_rsp;
+}
+
+/*
+ * Function irlmp_data_request (self, skb)
+ *
+ *    Send some data to peer device
+ *
+ * Note on skb management :
+ * After calling the lower layers of the IrDA stack, we always
+ * kfree() the skb, which drop the reference count (and potentially
+ * destroy it).
+ * IrLMP and IrLAP may queue the packet, and in those cases will need
+ * to use skb_get() to keep it around.
+ * Jean II
+ */
+int irlmp_data_request(struct lsap_cb *self, struct sk_buff *userdata)
+{
+	int	ret;
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -1;);
+
+	/* Make room for MUX header */
+	IRDA_ASSERT(skb_headroom(userdata) >= LMP_HEADER, return -1;);
+	skb_push(userdata, LMP_HEADER);
+
+	ret = irlmp_do_lsap_event(self, LM_DATA_REQUEST, userdata);
+
+	/* Drop reference count - see irlap_data_request(). */
+	dev_kfree_skb(userdata);
+
+	return ret;
+}
+EXPORT_SYMBOL(irlmp_data_request);
+
+/*
+ * Function irlmp_data_indication (handle, skb)
+ *
+ *    Got data from LAP layer so pass it up to upper layer
+ *
+ */
+void irlmp_data_indication(struct lsap_cb *self, struct sk_buff *skb)
+{
+	/* Hide LMP header from layer above */
+	skb_pull(skb, LMP_HEADER);
+
+	if (self->notify.data_indication) {
+		/* Don't forget to refcount it - see irlap_driver_rcv(). */
+		skb_get(skb);
+		self->notify.data_indication(self->notify.instance, self, skb);
+	}
+}
+
+/*
+ * Function irlmp_udata_request (self, skb)
+ */
+int irlmp_udata_request(struct lsap_cb *self, struct sk_buff *userdata)
+{
+	int	ret;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(userdata != NULL, return -1;);
+
+	/* Make room for MUX header */
+	IRDA_ASSERT(skb_headroom(userdata) >= LMP_HEADER, return -1;);
+	skb_push(userdata, LMP_HEADER);
+
+	ret = irlmp_do_lsap_event(self, LM_UDATA_REQUEST, userdata);
+
+	/* Drop reference count - see irlap_data_request(). */
+	dev_kfree_skb(userdata);
+
+	return ret;
+}
+
+/*
+ * Function irlmp_udata_indication (self, skb)
+ *
+ *    Send unreliable data (but still within the connection)
+ *
+ */
+void irlmp_udata_indication(struct lsap_cb *self, struct sk_buff *skb)
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;);
+	IRDA_ASSERT(skb != NULL, return;);
+
+	/* Hide LMP header from layer above */
+	skb_pull(skb, LMP_HEADER);
+
+	if (self->notify.udata_indication) {
+		/* Don't forget to refcount it - see irlap_driver_rcv(). */
+		skb_get(skb);
+		self->notify.udata_indication(self->notify.instance, self,
+					      skb);
+	}
+}
+
+/*
+ * Function irlmp_connless_data_request (self, skb)
+ */
+#ifdef CONFIG_IRDA_ULTRA
+int irlmp_connless_data_request(struct lsap_cb *self, struct sk_buff *userdata,
+				__u8 pid)
+{
+	struct sk_buff *clone_skb;
+	struct lap_cb *lap;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(userdata != NULL, return -1;);
+
+	/* Make room for MUX and PID header */
+	IRDA_ASSERT(skb_headroom(userdata) >= LMP_HEADER+LMP_PID_HEADER,
+		    return -1;);
+
+	/* Insert protocol identifier */
+	skb_push(userdata, LMP_PID_HEADER);
+	if(self != NULL)
+	  userdata->data[0] = self->pid;
+	else
+	  userdata->data[0] = pid;
+
+	/* Connectionless sockets must use 0x70 */
+	skb_push(userdata, LMP_HEADER);
+	userdata->data[0] = userdata->data[1] = LSAP_CONNLESS;
+
+	/* Try to send Connectionless  packets out on all links */
+	lap = (struct lap_cb *) hashbin_get_first(irlmp->links);
+	while (lap != NULL) {
+		IRDA_ASSERT(lap->magic == LMP_LAP_MAGIC, return -1;);
+
+		clone_skb = skb_clone(userdata, GFP_ATOMIC);
+		if (!clone_skb) {
+			dev_kfree_skb(userdata);
+			return -ENOMEM;
+		}
+
+		irlap_unitdata_request(lap->irlap, clone_skb);
+		/* irlap_unitdata_request() don't increase refcount,
+		 * so no dev_kfree_skb() - Jean II */
+
+		lap = (struct lap_cb *) hashbin_get_next(irlmp->links);
+	}
+	dev_kfree_skb(userdata);
+
+	return 0;
+}
+#endif /* CONFIG_IRDA_ULTRA */
+
+/*
+ * Function irlmp_connless_data_indication (self, skb)
+ *
+ *    Receive unreliable data outside any connection. Mostly used by Ultra
+ *
+ */
+#ifdef CONFIG_IRDA_ULTRA
+void irlmp_connless_data_indication(struct lsap_cb *self, struct sk_buff *skb)
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;);
+	IRDA_ASSERT(skb != NULL, return;);
+
+	/* Hide LMP and PID header from layer above */
+	skb_pull(skb, LMP_HEADER+LMP_PID_HEADER);
+
+	if (self->notify.udata_indication) {
+		/* Don't forget to refcount it - see irlap_driver_rcv(). */
+		skb_get(skb);
+		self->notify.udata_indication(self->notify.instance, self,
+					      skb);
+	}
+}
+#endif /* CONFIG_IRDA_ULTRA */
+
+/*
+ * Propagate status indication from LAP to LSAPs (via LMP)
+ * This don't trigger any change of state in lap_cb, lmp_cb or lsap_cb,
+ * and the event is stateless, therefore we can bypass both state machines
+ * and send the event direct to the LSAP user.
+ * Jean II
+ */
+void irlmp_status_indication(struct lap_cb *self,
+			     LINK_STATUS link, LOCK_STATUS lock)
+{
+	struct lsap_cb *next;
+	struct lsap_cb *curr;
+
+	/* Send status_indication to all LSAPs using this link */
+	curr = (struct lsap_cb *) hashbin_get_first( self->lsaps);
+	while (NULL != hashbin_find_next(self->lsaps, (long) curr, NULL,
+					 (void *) &next) ) {
+		IRDA_ASSERT(curr->magic == LMP_LSAP_MAGIC, return;);
+		/*
+		 *  Inform service user if he has requested it
+		 */
+		if (curr->notify.status_indication != NULL)
+			curr->notify.status_indication(curr->notify.instance,
+						       link, lock);
+		else
+			IRDA_DEBUG(2, "%s(), no handler\n", __FUNCTION__);
+
+		curr = next;
+	}
+}
+
+/*
+ * Receive flow control indication from LAP.
+ * LAP want us to send it one more frame. We implement a simple round
+ * robin scheduler between the active sockets so that we get a bit of
+ * fairness. Note that the round robin is far from perfect, but it's
+ * better than nothing.
+ * We then poll the selected socket so that we can do synchronous
+ * refilling of IrLAP (which allow to minimise the number of buffers).
+ * Jean II
+ */
+void irlmp_flow_indication(struct lap_cb *self, LOCAL_FLOW flow)
+{
+	struct lsap_cb *next;
+	struct lsap_cb *curr;
+	int	lsap_todo;
+
+	IRDA_ASSERT(self->magic == LMP_LAP_MAGIC, return;);
+	IRDA_ASSERT(flow == FLOW_START, return;);
+
+	/* Get the number of lsap. That's the only safe way to know
+	 * that we have looped around... - Jean II */
+	lsap_todo = HASHBIN_GET_SIZE(self->lsaps);
+	IRDA_DEBUG(4, "%s() : %d lsaps to scan\n", __FUNCTION__, lsap_todo);
+
+	/* Poll lsap in order until the queue is full or until we
+	 * tried them all.
+	 * Most often, the current LSAP will have something to send,
+	 * so we will go through this loop only once. - Jean II */
+	while((lsap_todo--) &&
+	      (IRLAP_GET_TX_QUEUE_LEN(self->irlap) < LAP_HIGH_THRESHOLD)) {
+		/* Try to find the next lsap we should poll. */
+		next = self->flow_next;
+		/* If we have no lsap, restart from first one */
+		if(next == NULL)
+			next = (struct lsap_cb *) hashbin_get_first(self->lsaps);
+		/* Verify current one and find the next one */
+		curr = hashbin_find_next(self->lsaps, (long) next, NULL,
+					 (void *) &self->flow_next);
+		/* Uh-oh... Paranoia */
+		if(curr == NULL)
+			break;
+		IRDA_DEBUG(4, "%s() : curr is %p, next was %p and is now %p, still %d to go - queue len = %d\n", __FUNCTION__, curr, next, self->flow_next, lsap_todo, IRLAP_GET_TX_QUEUE_LEN(self->irlap));
+
+		/* Inform lsap user that it can send one more packet. */
+		if (curr->notify.flow_indication != NULL)
+			curr->notify.flow_indication(curr->notify.instance,
+						     curr, flow);
+		else
+			IRDA_DEBUG(1, "%s(), no handler\n", __FUNCTION__);
+	}
+}
+
+#if 0
+/*
+ * Function irlmp_hint_to_service (hint)
+ *
+ *    Returns a list of all servics contained in the given hint bits. This
+ *    function assumes that the hint bits have the size of two bytes only
+ */
+__u8 *irlmp_hint_to_service(__u8 *hint)
+{
+	__u8 *service;
+	int i = 0;
+
+	/*
+	 * Allocate array to store services in. 16 entries should be safe
+	 * since we currently only support 2 hint bytes
+	 */
+	service = kmalloc(16, GFP_ATOMIC);
+	if (!service) {
+		IRDA_DEBUG(1, "%s(), Unable to kmalloc!\n", __FUNCTION__);
+		return NULL;
+	}
+
+	if (!hint[0]) {
+		IRDA_DEBUG(1, "<None>\n");
+		kfree(service);
+		return NULL;
+	}
+	if (hint[0] & HINT_PNP)
+		IRDA_DEBUG(1, "PnP Compatible ");
+	if (hint[0] & HINT_PDA)
+		IRDA_DEBUG(1, "PDA/Palmtop ");
+	if (hint[0] & HINT_COMPUTER)
+		IRDA_DEBUG(1, "Computer ");
+	if (hint[0] & HINT_PRINTER) {
+		IRDA_DEBUG(1, "Printer ");
+		service[i++] = S_PRINTER;
+	}
+	if (hint[0] & HINT_MODEM)
+		IRDA_DEBUG(1, "Modem ");
+	if (hint[0] & HINT_FAX)
+		IRDA_DEBUG(1, "Fax ");
+	if (hint[0] & HINT_LAN) {
+		IRDA_DEBUG(1, "LAN Access ");
+		service[i++] = S_LAN;
+	}
+	/*
+	 *  Test if extension byte exists. This byte will usually be
+	 *  there, but this is not really required by the standard.
+	 *  (IrLMP p. 29)
+	 */
+	if (hint[0] & HINT_EXTENSION) {
+		if (hint[1] & HINT_TELEPHONY) {
+			IRDA_DEBUG(1, "Telephony ");
+			service[i++] = S_TELEPHONY;
+		} if (hint[1] & HINT_FILE_SERVER)
+			IRDA_DEBUG(1, "File Server ");
+
+		if (hint[1] & HINT_COMM) {
+			IRDA_DEBUG(1, "IrCOMM ");
+			service[i++] = S_COMM;
+		}
+		if (hint[1] & HINT_OBEX) {
+			IRDA_DEBUG(1, "IrOBEX ");
+			service[i++] = S_OBEX;
+		}
+	}
+	IRDA_DEBUG(1, "\n");
+
+	/* So that client can be notified about any discovery */
+	service[i++] = S_ANY;
+
+	service[i] = S_END;
+
+	return service;
+}
+#endif
+
+static const __u16 service_hint_mapping[S_END][2] = {
+	{ HINT_PNP,		0 },			/* S_PNP */
+	{ HINT_PDA,		0 },			/* S_PDA */
+	{ HINT_COMPUTER,	0 },			/* S_COMPUTER */
+	{ HINT_PRINTER,		0 },			/* S_PRINTER */
+	{ HINT_MODEM,		0 },			/* S_MODEM */
+	{ HINT_FAX,		0 },			/* S_FAX */
+	{ HINT_LAN,		0 },			/* S_LAN */
+	{ HINT_EXTENSION,	HINT_TELEPHONY },	/* S_TELEPHONY */
+	{ HINT_EXTENSION,	HINT_COMM },		/* S_COMM */
+	{ HINT_EXTENSION,	HINT_OBEX },		/* S_OBEX */
+	{ 0xFF,			0xFF },			/* S_ANY */
+};
+
+/*
+ * Function irlmp_service_to_hint (service)
+ *
+ *    Converts a service type, to a hint bit
+ *
+ *    Returns: a 16 bit hint value, with the service bit set
+ */
+__u16 irlmp_service_to_hint(int service)
+{
+	__u16_host_order hint;
+
+	hint.byte[0] = service_hint_mapping[service][0];
+	hint.byte[1] = service_hint_mapping[service][1];
+
+	return hint.word;
+}
+EXPORT_SYMBOL(irlmp_service_to_hint);
+
+/*
+ * Function irlmp_register_service (service)
+ *
+ *    Register local service with IrLMP
+ *
+ */
+void *irlmp_register_service(__u16 hints)
+{
+	irlmp_service_t *service;
+
+	IRDA_DEBUG(4, "%s(), hints = %04x\n", __FUNCTION__, hints);
+
+	/* Make a new registration */
+	service = kmalloc(sizeof(irlmp_service_t), GFP_ATOMIC);
+	if (!service) {
+		IRDA_DEBUG(1, "%s(), Unable to kmalloc!\n", __FUNCTION__);
+		return NULL;
+	}
+	service->hints.word = hints;
+	hashbin_insert(irlmp->services, (irda_queue_t *) service,
+		       (long) service, NULL);
+
+	irlmp->hints.word |= hints;
+
+	return (void *)service;
+}
+EXPORT_SYMBOL(irlmp_register_service);
+
+/*
+ * Function irlmp_unregister_service (handle)
+ *
+ *    Unregister service with IrLMP.
+ *
+ *    Returns: 0 on success, -1 on error
+ */
+int irlmp_unregister_service(void *handle)
+{
+	irlmp_service_t *service;
+	unsigned long flags;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	if (!handle)
+		return -1;
+
+	/* Caller may call with invalid handle (it's legal) - Jean II */
+	service = hashbin_lock_find(irlmp->services, (long) handle, NULL);
+	if (!service) {
+		IRDA_DEBUG(1, "%s(), Unknown service!\n", __FUNCTION__);
+		return -1;
+	}
+
+	hashbin_remove_this(irlmp->services, (irda_queue_t *) service);
+	kfree(service);
+
+	/* Remove old hint bits */
+	irlmp->hints.word = 0;
+
+	/* Refresh current hint bits */
+	spin_lock_irqsave(&irlmp->services->hb_spinlock, flags);
+        service = (irlmp_service_t *) hashbin_get_first(irlmp->services);
+        while (service) {
+		irlmp->hints.word |= service->hints.word;
+
+                service = (irlmp_service_t *)hashbin_get_next(irlmp->services);
+        }
+	spin_unlock_irqrestore(&irlmp->services->hb_spinlock, flags);
+	return 0;
+}
+EXPORT_SYMBOL(irlmp_unregister_service);
+
+/*
+ * Function irlmp_register_client (hint_mask, callback1, callback2)
+ *
+ *    Register a local client with IrLMP
+ *	First callback is selective discovery (based on hints)
+ *	Second callback is for selective discovery expiries
+ *
+ *    Returns: handle > 0 on success, 0 on error
+ */
+void *irlmp_register_client(__u16 hint_mask, DISCOVERY_CALLBACK1 disco_clb,
+			    DISCOVERY_CALLBACK2 expir_clb, void *priv)
+{
+	irlmp_client_t *client;
+
+	IRDA_DEBUG(1, "%s()\n", __FUNCTION__);
+	IRDA_ASSERT(irlmp != NULL, return NULL;);
+
+	/* Make a new registration */
+	client = kmalloc(sizeof(irlmp_client_t), GFP_ATOMIC);
+	if (!client) {
+		IRDA_DEBUG( 1, "%s(), Unable to kmalloc!\n", __FUNCTION__);
+		return NULL;
+	}
+
+	/* Register the details */
+	client->hint_mask.word = hint_mask;
+	client->disco_callback = disco_clb;
+	client->expir_callback = expir_clb;
+	client->priv = priv;
+
+	hashbin_insert(irlmp->clients, (irda_queue_t *) client,
+		       (long) client, NULL);
+
+	return (void *) client;
+}
+EXPORT_SYMBOL(irlmp_register_client);
+
+/*
+ * Function irlmp_update_client (handle, hint_mask, callback1, callback2)
+ *
+ *    Updates specified client (handle) with possibly new hint_mask and
+ *    callback
+ *
+ *    Returns: 0 on success, -1 on error
+ */
+int irlmp_update_client(void *handle, __u16 hint_mask,
+			DISCOVERY_CALLBACK1 disco_clb,
+			DISCOVERY_CALLBACK2 expir_clb, void *priv)
+{
+	irlmp_client_t *client;
+
+	if (!handle)
+		return -1;
+
+	client = hashbin_lock_find(irlmp->clients, (long) handle, NULL);
+	if (!client) {
+		IRDA_DEBUG(1, "%s(), Unknown client!\n", __FUNCTION__);
+		return -1;
+	}
+
+	client->hint_mask.word = hint_mask;
+	client->disco_callback = disco_clb;
+	client->expir_callback = expir_clb;
+	client->priv = priv;
+
+	return 0;
+}
+EXPORT_SYMBOL(irlmp_update_client);
+
+/*
+ * Function irlmp_unregister_client (handle)
+ *
+ *    Returns: 0 on success, -1 on error
+ *
+ */
+int irlmp_unregister_client(void *handle)
+{
+	struct irlmp_client *client;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	if (!handle)
+		return -1;
+
+	/* Caller may call with invalid handle (it's legal) - Jean II */
+	client = hashbin_lock_find(irlmp->clients, (long) handle, NULL);
+	if (!client) {
+		IRDA_DEBUG(1, "%s(), Unknown client!\n", __FUNCTION__);
+		return -1;
+	}
+
+	IRDA_DEBUG(4, "%s(), removing client!\n", __FUNCTION__);
+	hashbin_remove_this(irlmp->clients, (irda_queue_t *) client);
+	kfree(client);
+
+	return 0;
+}
+EXPORT_SYMBOL(irlmp_unregister_client);
+
+/*
+ * Function irlmp_slsap_inuse (slsap)
+ *
+ *    Check if the given source LSAP selector is in use
+ *
+ * This function is clearly not very efficient. On the mitigating side, the
+ * stack make sure that in 99% of the cases, we are called only once
+ * for each socket allocation. We could probably keep a bitmap
+ * of the allocated LSAP, but I'm not sure the complexity is worth it.
+ * Jean II
+ */
+static int irlmp_slsap_inuse(__u8 slsap_sel)
+{
+	struct lsap_cb *self;
+	struct lap_cb *lap;
+	unsigned long flags;
+
+	IRDA_ASSERT(irlmp != NULL, return TRUE;);
+	IRDA_ASSERT(irlmp->magic == LMP_MAGIC, return TRUE;);
+	IRDA_ASSERT(slsap_sel != LSAP_ANY, return TRUE;);
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+#ifdef CONFIG_IRDA_ULTRA
+	/* Accept all bindings to the connectionless LSAP */
+	if (slsap_sel == LSAP_CONNLESS)
+		return FALSE;
+#endif /* CONFIG_IRDA_ULTRA */
+
+	/* Valid values are between 0 and 127 (0x0-0x6F) */
+	if (slsap_sel > LSAP_MAX)
+		return TRUE;
+
+	/*
+	 *  Check if slsap is already in use. To do this we have to loop over
+	 *  every IrLAP connection and check every LSAP associated with each
+	 *  the connection.
+	 */
+	spin_lock_irqsave(&irlmp->links->hb_spinlock, flags);
+	lap = (struct lap_cb *) hashbin_get_first(irlmp->links);
+	while (lap != NULL) {
+		IRDA_ASSERT(lap->magic == LMP_LAP_MAGIC, goto errlap;);
+
+		/* Careful for priority inversions here !
+		 * irlmp->links is never taken while another IrDA
+		 * spinlock is held, so we are safe. Jean II */
+		spin_lock(&lap->lsaps->hb_spinlock);
+
+		/* For this IrLAP, check all the LSAPs */
+		self = (struct lsap_cb *) hashbin_get_first(lap->lsaps);
+		while (self != NULL) {
+			IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC,
+				    goto errlsap;);
+
+			if ((self->slsap_sel == slsap_sel)) {
+				IRDA_DEBUG(4, "Source LSAP selector=%02x in use\n",
+					   self->slsap_sel);
+				goto errlsap;
+			}
+			self = (struct lsap_cb*) hashbin_get_next(lap->lsaps);
+		}
+		spin_unlock(&lap->lsaps->hb_spinlock);
+
+		/* Next LAP */
+		lap = (struct lap_cb *) hashbin_get_next(irlmp->links);
+	}
+	spin_unlock_irqrestore(&irlmp->links->hb_spinlock, flags);
+
+	/*
+	 * Server sockets are typically waiting for connections and
+	 * therefore reside in the unconnected list. We don't want
+	 * to give out their LSAPs for obvious reasons...
+	 * Jean II
+	 */
+	spin_lock_irqsave(&irlmp->unconnected_lsaps->hb_spinlock, flags);
+
+	self = (struct lsap_cb *) hashbin_get_first(irlmp->unconnected_lsaps);
+	while (self != NULL) {
+		IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, goto erruncon;);
+		if ((self->slsap_sel == slsap_sel)) {
+			IRDA_DEBUG(4, "Source LSAP selector=%02x in use (unconnected)\n",
+				   self->slsap_sel);
+			goto erruncon;
+		}
+		self = (struct lsap_cb*) hashbin_get_next(irlmp->unconnected_lsaps);
+	}
+	spin_unlock_irqrestore(&irlmp->unconnected_lsaps->hb_spinlock, flags);
+
+	return FALSE;
+
+	/* Error exit from within one of the two nested loops.
+	 * Make sure we release the right spinlock in the righ order.
+	 * Jean II */
+errlsap:
+	spin_unlock(&lap->lsaps->hb_spinlock);
+IRDA_ASSERT_LABEL(errlap:)
+	spin_unlock_irqrestore(&irlmp->links->hb_spinlock, flags);
+	return TRUE;
+
+	/* Error exit from within the unconnected loop.
+	 * Just one spinlock to release... Jean II */
+erruncon:
+	spin_unlock_irqrestore(&irlmp->unconnected_lsaps->hb_spinlock, flags);
+	return TRUE;
+}
+
+/*
+ * Function irlmp_find_free_slsap ()
+ *
+ *    Find a free source LSAP to use. This function is called if the service
+ *    user has requested a source LSAP equal to LM_ANY
+ */
+static __u8 irlmp_find_free_slsap(void)
+{
+	__u8 lsap_sel;
+	int wrapped = 0;
+
+	IRDA_ASSERT(irlmp != NULL, return -1;);
+	IRDA_ASSERT(irlmp->magic == LMP_MAGIC, return -1;);
+
+	/* Most users don't really care which LSAPs they are given,
+	 * and therefore we automatically give them a free LSAP.
+	 * This function try to find a suitable LSAP, i.e. which is
+	 * not in use and is within the acceptable range. Jean II */
+
+	do {
+		/* Always increment to LSAP number before using it.
+		 * In theory, we could reuse the last LSAP number, as long
+		 * as it is no longer in use. Some IrDA stack do that.
+		 * However, the previous socket may be half closed, i.e.
+		 * we closed it, we think it's no longer in use, but the
+		 * other side did not receive our close and think it's
+		 * active and still send data on it.
+		 * This is similar to what is done with PIDs and TCP ports.
+		 * Also, this reduce the number of calls to irlmp_slsap_inuse()
+		 * which is an expensive function to call.
+		 * Jean II */
+		irlmp->last_lsap_sel++;
+
+		/* Check if we need to wraparound (0x70-0x7f are reserved) */
+		if (irlmp->last_lsap_sel > LSAP_MAX) {
+			/* 0x00-0x10 are also reserved for well know ports */
+			irlmp->last_lsap_sel = 0x10;
+
+			/* Make sure we terminate the loop */
+			if (wrapped++) {
+				IRDA_ERROR("%s: no more free LSAPs !\n",
+					   __FUNCTION__);
+				return 0;
+			}
+		}
+
+		/* If the LSAP is in use, try the next one.
+		 * Despite the autoincrement, we need to check if the lsap
+		 * is really in use or not, first because LSAP may be
+		 * directly allocated in irlmp_open_lsap(), and also because
+		 * we may wraparound on old sockets. Jean II */
+	} while (irlmp_slsap_inuse(irlmp->last_lsap_sel));
+
+	/* Got it ! */
+	lsap_sel = irlmp->last_lsap_sel;
+	IRDA_DEBUG(4, "%s(), found free lsap_sel=%02x\n",
+		   __FUNCTION__, lsap_sel);
+
+	return lsap_sel;
+}
+
+/*
+ * Function irlmp_convert_lap_reason (lap_reason)
+ *
+ *    Converts IrLAP disconnect reason codes to IrLMP disconnect reason
+ *    codes
+ *
+ */
+LM_REASON irlmp_convert_lap_reason( LAP_REASON lap_reason)
+{
+	int reason = LM_LAP_DISCONNECT;
+
+	switch (lap_reason) {
+	case LAP_DISC_INDICATION: /* Received a disconnect request from peer */
+		IRDA_DEBUG( 1, "%s(), LAP_DISC_INDICATION\n", __FUNCTION__);
+		reason = LM_USER_REQUEST;
+		break;
+	case LAP_NO_RESPONSE:    /* To many retransmits without response */
+		IRDA_DEBUG( 1, "%s(), LAP_NO_RESPONSE\n", __FUNCTION__);
+		reason = LM_LAP_DISCONNECT;
+		break;
+	case LAP_RESET_INDICATION:
+		IRDA_DEBUG( 1, "%s(), LAP_RESET_INDICATION\n", __FUNCTION__);
+		reason = LM_LAP_RESET;
+		break;
+	case LAP_FOUND_NONE:
+	case LAP_MEDIA_BUSY:
+	case LAP_PRIMARY_CONFLICT:
+		IRDA_DEBUG(1, "%s(), LAP_FOUND_NONE, LAP_MEDIA_BUSY or LAP_PRIMARY_CONFLICT\n", __FUNCTION__);
+		reason = LM_CONNECT_FAILURE;
+		break;
+	default:
+		IRDA_DEBUG(1, "%s(), Unknow IrLAP disconnect reason %d!\n",
+			   __FUNCTION__, lap_reason);
+		reason = LM_LAP_DISCONNECT;
+		break;
+	}
+
+	return reason;
+}
+
+#ifdef CONFIG_PROC_FS
+
+struct irlmp_iter_state {
+	hashbin_t *hashbin;
+};
+
+#define LSAP_START_TOKEN	((void *)1)
+#define LINK_START_TOKEN	((void *)2)
+
+static void *irlmp_seq_hb_idx(struct irlmp_iter_state *iter, loff_t *off)
+{
+	void *element;
+
+	spin_lock_irq(&iter->hashbin->hb_spinlock);
+	for (element = hashbin_get_first(iter->hashbin);
+	     element != NULL; 
+	     element = hashbin_get_next(iter->hashbin)) {
+		if (!off || *off-- == 0) {
+			/* NB: hashbin left locked */
+			return element;
+		}
+	}
+	spin_unlock_irq(&iter->hashbin->hb_spinlock);
+	iter->hashbin = NULL;
+	return NULL;
+}
+
+
+static void *irlmp_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct irlmp_iter_state *iter = seq->private;
+	void *v;
+	loff_t off = *pos;
+
+	iter->hashbin = NULL;
+	if (off-- == 0)
+		return LSAP_START_TOKEN;
+
+	iter->hashbin = irlmp->unconnected_lsaps;
+	v = irlmp_seq_hb_idx(iter, &off);
+	if (v)
+		return v;
+
+	if (off-- == 0)
+		return LINK_START_TOKEN;
+
+	iter->hashbin = irlmp->links;
+	return irlmp_seq_hb_idx(iter, &off);
+}
+
+static void *irlmp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct irlmp_iter_state *iter = seq->private;
+
+	++*pos;
+
+	if (v == LSAP_START_TOKEN) {		/* start of list of lsaps */
+		iter->hashbin = irlmp->unconnected_lsaps;
+		v = irlmp_seq_hb_idx(iter, NULL);
+		return v ? v : LINK_START_TOKEN;
+	}
+
+	if (v == LINK_START_TOKEN) {		/* start of list of links */
+		iter->hashbin = irlmp->links;
+		return irlmp_seq_hb_idx(iter, NULL);
+	}
+
+	v = hashbin_get_next(iter->hashbin);
+
+	if (v == NULL) {			/* no more in this hash bin */
+		spin_unlock_irq(&iter->hashbin->hb_spinlock);
+
+		if (iter->hashbin == irlmp->unconnected_lsaps) 
+			v =  LINK_START_TOKEN;
+
+		iter->hashbin = NULL;
+	}
+	return v;
+}
+
+static void irlmp_seq_stop(struct seq_file *seq, void *v)
+{
+	struct irlmp_iter_state *iter = seq->private;
+
+	if (iter->hashbin)
+		spin_unlock_irq(&iter->hashbin->hb_spinlock);
+}
+
+static int irlmp_seq_show(struct seq_file *seq, void *v)
+{
+	const struct irlmp_iter_state *iter = seq->private;
+	struct lsap_cb *self = v;
+
+	if (v == LSAP_START_TOKEN)
+		seq_puts(seq, "Unconnected LSAPs:\n");
+	else if (v == LINK_START_TOKEN)
+		seq_puts(seq, "\nRegistered Link Layers:\n");
+	else if (iter->hashbin == irlmp->unconnected_lsaps) {
+		self = v;
+		IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -EINVAL; );
+		seq_printf(seq, "lsap state: %s, ",
+			   irlsap_state[ self->lsap_state]);
+		seq_printf(seq,
+			   "slsap_sel: %#02x, dlsap_sel: %#02x, ",
+			   self->slsap_sel, self->dlsap_sel);
+		seq_printf(seq, "(%s)", self->notify.name);
+		seq_printf(seq, "\n");
+	} else if (iter->hashbin == irlmp->links) {
+		struct lap_cb *lap = v;
+
+		seq_printf(seq, "lap state: %s, ",
+			   irlmp_state[lap->lap_state]);
+
+		seq_printf(seq, "saddr: %#08x, daddr: %#08x, ",
+			   lap->saddr, lap->daddr);
+		seq_printf(seq, "num lsaps: %d",
+			   HASHBIN_GET_SIZE(lap->lsaps));
+		seq_printf(seq, "\n");
+
+		/* Careful for priority inversions here !
+		 * All other uses of attrib spinlock are independent of
+		 * the object spinlock, so we are safe. Jean II */
+		spin_lock(&lap->lsaps->hb_spinlock);
+
+		seq_printf(seq, "\n  Connected LSAPs:\n");
+		for (self = (struct lsap_cb *) hashbin_get_first(lap->lsaps);
+		     self != NULL;
+		     self = (struct lsap_cb *)hashbin_get_next(lap->lsaps)) {
+			IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC,
+				    goto outloop;);
+			seq_printf(seq, "  lsap state: %s, ",
+				   irlsap_state[ self->lsap_state]);
+			seq_printf(seq,
+				   "slsap_sel: %#02x, dlsap_sel: %#02x, ",
+				   self->slsap_sel, self->dlsap_sel);
+			seq_printf(seq, "(%s)", self->notify.name);
+			seq_putc(seq, '\n');
+
+		}
+	IRDA_ASSERT_LABEL(outloop:)
+		spin_unlock(&lap->lsaps->hb_spinlock);
+		seq_putc(seq, '\n');
+	} else
+		return -EINVAL;
+
+	return 0;
+}
+
+static struct seq_operations irlmp_seq_ops = {
+	.start  = irlmp_seq_start,
+	.next   = irlmp_seq_next,
+	.stop   = irlmp_seq_stop,
+	.show   = irlmp_seq_show,
+};
+
+static int irlmp_seq_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	int rc = -ENOMEM;
+	struct irlmp_iter_state *s;
+
+	IRDA_ASSERT(irlmp != NULL, return -EINVAL;);
+
+	s = kmalloc(sizeof(*s), GFP_KERNEL);
+	if (!s)
+		goto out;
+
+	rc = seq_open(file, &irlmp_seq_ops);
+	if (rc)
+		goto out_kfree;
+
+	seq	     = file->private_data;
+	seq->private = s;
+out:
+	return rc;
+out_kfree:
+	kfree(s);
+	goto out;
+}
+
+struct file_operations irlmp_seq_fops = {
+	.owner		= THIS_MODULE,
+	.open           = irlmp_seq_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release	= seq_release_private,
+};
+
+#endif /* PROC_FS */
diff --git a/net/irda/irlmp_event.c b/net/irda/irlmp_event.c
new file mode 100644
index 00000000000..26649f6528e
--- /dev/null
+++ b/net/irda/irlmp_event.c
@@ -0,0 +1,912 @@
+/*********************************************************************
+ *
+ * Filename:      irlmp_event.c
+ * Version:       0.8
+ * Description:   An IrDA LMP event driver for Linux
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Mon Aug  4 20:40:53 1997
+ * Modified at:   Tue Dec 14 23:04:16 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ *
+ *     Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>,
+ *     All Rights Reserved.
+ *     Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of
+ *     the License, or (at your option) any later version.
+ *
+ *     Neither Dag Brattli nor University of Troms� admit liability nor
+ *     provide warranty for any of this software. This material is
+ *     provided "AS-IS" and at no charge.
+ *
+ ********************************************************************/
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+
+#include <net/irda/irda.h>
+#include <net/irda/timer.h>
+#include <net/irda/irlap.h>
+#include <net/irda/irlmp.h>
+#include <net/irda/irlmp_frame.h>
+#include <net/irda/irlmp_event.h>
+
+const char *irlmp_state[] = {
+	"LAP_STANDBY",
+	"LAP_U_CONNECT",
+	"LAP_ACTIVE",
+};
+
+const char *irlsap_state[] = {
+	"LSAP_DISCONNECTED",
+	"LSAP_CONNECT",
+	"LSAP_CONNECT_PEND",
+	"LSAP_DATA_TRANSFER_READY",
+	"LSAP_SETUP",
+	"LSAP_SETUP_PEND",
+};
+
+#ifdef CONFIG_IRDA_DEBUG
+static const char *irlmp_event[] = {
+	"LM_CONNECT_REQUEST",
+	"LM_CONNECT_CONFIRM",
+	"LM_CONNECT_RESPONSE",
+	"LM_CONNECT_INDICATION",
+
+	"LM_DISCONNECT_INDICATION",
+	"LM_DISCONNECT_REQUEST",
+
+	"LM_DATA_REQUEST",
+	"LM_UDATA_REQUEST",
+	"LM_DATA_INDICATION",
+	"LM_UDATA_INDICATION",
+
+	"LM_WATCHDOG_TIMEOUT",
+
+	/* IrLAP events */
+	"LM_LAP_CONNECT_REQUEST",
+	"LM_LAP_CONNECT_INDICATION",
+	"LM_LAP_CONNECT_CONFIRM",
+	"LM_LAP_DISCONNECT_INDICATION",
+	"LM_LAP_DISCONNECT_REQUEST",
+	"LM_LAP_DISCOVERY_REQUEST",
+	"LM_LAP_DISCOVERY_CONFIRM",
+	"LM_LAP_IDLE_TIMEOUT",
+};
+#endif	/* CONFIG_IRDA_DEBUG */
+
+/* LAP Connection control proto declarations */
+static void irlmp_state_standby  (struct lap_cb *, IRLMP_EVENT,
+				  struct sk_buff *);
+static void irlmp_state_u_connect(struct lap_cb *, IRLMP_EVENT,
+				  struct sk_buff *);
+static void irlmp_state_active   (struct lap_cb *, IRLMP_EVENT,
+				  struct sk_buff *);
+
+/* LSAP Connection control proto declarations */
+static int irlmp_state_disconnected(struct lsap_cb *, IRLMP_EVENT,
+				    struct sk_buff *);
+static int irlmp_state_connect     (struct lsap_cb *, IRLMP_EVENT,
+				    struct sk_buff *);
+static int irlmp_state_connect_pend(struct lsap_cb *, IRLMP_EVENT,
+				    struct sk_buff *);
+static int irlmp_state_dtr         (struct lsap_cb *, IRLMP_EVENT,
+				    struct sk_buff *);
+static int irlmp_state_setup       (struct lsap_cb *, IRLMP_EVENT,
+				    struct sk_buff *);
+static int irlmp_state_setup_pend  (struct lsap_cb *, IRLMP_EVENT,
+				    struct sk_buff *);
+
+static void (*lap_state[]) (struct lap_cb *, IRLMP_EVENT, struct sk_buff *) =
+{
+	irlmp_state_standby,
+	irlmp_state_u_connect,
+	irlmp_state_active,
+};
+
+static int (*lsap_state[])( struct lsap_cb *, IRLMP_EVENT, struct sk_buff *) =
+{
+	irlmp_state_disconnected,
+	irlmp_state_connect,
+	irlmp_state_connect_pend,
+	irlmp_state_dtr,
+	irlmp_state_setup,
+	irlmp_state_setup_pend
+};
+
+static inline void irlmp_next_lap_state(struct lap_cb *self,
+					IRLMP_STATE state)
+{
+	/*
+	IRDA_DEBUG(4, "%s(), LMP LAP = %s\n", __FUNCTION__, irlmp_state[state]);
+	*/
+	self->lap_state = state;
+}
+
+static inline void irlmp_next_lsap_state(struct lsap_cb *self,
+					 LSAP_STATE state)
+{
+	/*
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_DEBUG(4, "%s(), LMP LSAP = %s\n", __FUNCTION__, irlsap_state[state]);
+	*/
+	self->lsap_state = state;
+}
+
+/* Do connection control events */
+int irlmp_do_lsap_event(struct lsap_cb *self, IRLMP_EVENT event,
+			struct sk_buff *skb)
+{
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -1;);
+
+	IRDA_DEBUG(4, "%s(), EVENT = %s, STATE = %s\n",
+		__FUNCTION__, irlmp_event[event], irlsap_state[ self->lsap_state]);
+
+	return (*lsap_state[self->lsap_state]) (self, event, skb);
+}
+
+/*
+ * Function do_lap_event (event, skb, info)
+ *
+ *    Do IrLAP control events
+ *
+ */
+void irlmp_do_lap_event(struct lap_cb *self, IRLMP_EVENT event,
+			struct sk_buff *skb)
+{
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LMP_LAP_MAGIC, return;);
+
+	IRDA_DEBUG(4, "%s(), EVENT = %s, STATE = %s\n", __FUNCTION__,
+		   irlmp_event[event],
+		   irlmp_state[self->lap_state]);
+
+	(*lap_state[self->lap_state]) (self, event, skb);
+}
+
+void irlmp_discovery_timer_expired(void *data)
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	/* We always cleanup the log (active & passive discovery) */
+	irlmp_do_expiry();
+
+	/* Active discovery is conditional */
+	if (sysctl_discovery)
+		irlmp_do_discovery(sysctl_discovery_slots);
+
+	/* Restart timer */
+	irlmp_start_discovery_timer(irlmp, sysctl_discovery_timeout * HZ);
+}
+
+void irlmp_watchdog_timer_expired(void *data)
+{
+	struct lsap_cb *self = (struct lsap_cb *) data;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;);
+
+	irlmp_do_lsap_event(self, LM_WATCHDOG_TIMEOUT, NULL);
+}
+
+void irlmp_idle_timer_expired(void *data)
+{
+	struct lap_cb *self = (struct lap_cb *) data;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LMP_LAP_MAGIC, return;);
+
+	irlmp_do_lap_event(self, LM_LAP_IDLE_TIMEOUT, NULL);
+}
+
+/*
+ * Send an event on all LSAPs attached to this LAP.
+ */
+static inline void
+irlmp_do_all_lsap_event(hashbin_t *	lsap_hashbin,
+			IRLMP_EVENT	event)
+{
+	struct lsap_cb *lsap;
+	struct lsap_cb *lsap_next;
+
+	/* Note : this function use the new hashbin_find_next()
+	 * function, instead of the old hashbin_get_next().
+	 * This make sure that we are always pointing one lsap
+	 * ahead, so that if the current lsap is removed as the
+	 * result of sending the event, we don't care.
+	 * Also, as we store the context ourselves, if an enumeration
+	 * of the same lsap hashbin happens as the result of sending the
+	 * event, we don't care.
+	 * The only problem is if the next lsap is removed. In that case,
+	 * hashbin_find_next() will return NULL and we will abort the
+	 * enumeration. - Jean II */
+
+	/* Also : we don't accept any skb in input. We can *NOT* pass
+	 * the same skb to multiple clients safely, we would need to
+	 * skb_clone() it. - Jean II */
+
+	lsap = (struct lsap_cb *) hashbin_get_first(lsap_hashbin);
+
+	while (NULL != hashbin_find_next(lsap_hashbin,
+					 (long) lsap,
+					 NULL,
+					 (void *) &lsap_next) ) {
+		irlmp_do_lsap_event(lsap, event, NULL);
+		lsap = lsap_next;
+	}
+}
+
+/*********************************************************************
+ *
+ *    LAP connection control states
+ *
+ ********************************************************************/
+
+/*
+ * Function irlmp_state_standby (event, skb, info)
+ *
+ *    STANDBY, The IrLAP connection does not exist.
+ *
+ */
+static void irlmp_state_standby(struct lap_cb *self, IRLMP_EVENT event,
+				struct sk_buff *skb)
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+	IRDA_ASSERT(self->irlap != NULL, return;);
+
+	switch (event) {
+	case LM_LAP_DISCOVERY_REQUEST:
+		/* irlmp_next_station_state( LMP_DISCOVER); */
+
+		irlap_discovery_request(self->irlap, &irlmp->discovery_cmd);
+		break;
+	case LM_LAP_CONNECT_INDICATION:
+		/*  It's important to switch state first, to avoid IrLMP to
+		 *  think that the link is free since IrLMP may then start
+		 *  discovery before the connection is properly set up. DB.
+		 */
+		irlmp_next_lap_state(self, LAP_ACTIVE);
+
+		/* Just accept connection TODO, this should be fixed */
+		irlap_connect_response(self->irlap, skb);
+		break;
+	case LM_LAP_CONNECT_REQUEST:
+		IRDA_DEBUG(4, "%s() LS_CONNECT_REQUEST\n", __FUNCTION__);
+
+		irlmp_next_lap_state(self, LAP_U_CONNECT);
+
+		/* FIXME: need to set users requested QoS */
+		irlap_connect_request(self->irlap, self->daddr, NULL, 0);
+		break;
+	case LM_LAP_DISCONNECT_INDICATION:
+		IRDA_DEBUG(4, "%s(), Error LM_LAP_DISCONNECT_INDICATION\n",
+			   __FUNCTION__);
+
+		irlmp_next_lap_state(self, LAP_STANDBY);
+		break;
+	default:
+		IRDA_DEBUG(0, "%s(), Unknown event %s\n",
+			   __FUNCTION__, irlmp_event[event]);
+		break;
+	}
+}
+
+/*
+ * Function irlmp_state_u_connect (event, skb, info)
+ *
+ *    U_CONNECT, The layer above has tried to open an LSAP connection but
+ *    since the IrLAP connection does not exist, we must first start an
+ *    IrLAP connection. We are now waiting response from IrLAP.
+ * */
+static void irlmp_state_u_connect(struct lap_cb *self, IRLMP_EVENT event,
+				  struct sk_buff *skb)
+{
+	IRDA_DEBUG(2, "%s(), event=%s\n", __FUNCTION__, irlmp_event[event]);
+
+	switch (event) {
+	case LM_LAP_CONNECT_INDICATION:
+		/*  It's important to switch state first, to avoid IrLMP to
+		 *  think that the link is free since IrLMP may then start
+		 *  discovery before the connection is properly set up. DB.
+		 */
+		irlmp_next_lap_state(self, LAP_ACTIVE);
+
+		/* Just accept connection TODO, this should be fixed */
+		irlap_connect_response(self->irlap, skb);
+
+		/* Tell LSAPs that they can start sending data */
+		irlmp_do_all_lsap_event(self->lsaps, LM_LAP_CONNECT_CONFIRM);
+
+		/* Note : by the time we get there (LAP retries and co),
+		 * the lsaps may already have gone. This avoid getting stuck
+		 * forever in LAP_ACTIVE state - Jean II */
+		if (HASHBIN_GET_SIZE(self->lsaps) == 0) {
+			IRDA_DEBUG(0, "%s() NO LSAPs !\n",  __FUNCTION__);
+			irlmp_start_idle_timer(self, LM_IDLE_TIMEOUT);
+		}
+		break;
+	case LM_LAP_CONNECT_REQUEST:
+		/* Already trying to connect */
+		break;
+	case LM_LAP_CONNECT_CONFIRM:
+		/* For all lsap_ce E Associated do LS_Connect_confirm */
+		irlmp_next_lap_state(self, LAP_ACTIVE);
+
+		/* Tell LSAPs that they can start sending data */
+		irlmp_do_all_lsap_event(self->lsaps, LM_LAP_CONNECT_CONFIRM);
+
+		/* Note : by the time we get there (LAP retries and co),
+		 * the lsaps may already have gone. This avoid getting stuck
+		 * forever in LAP_ACTIVE state - Jean II */
+		if (HASHBIN_GET_SIZE(self->lsaps) == 0) {
+			IRDA_DEBUG(0, "%s() NO LSAPs !\n",  __FUNCTION__);
+			irlmp_start_idle_timer(self, LM_IDLE_TIMEOUT);
+		}
+		break;
+	case LM_LAP_DISCONNECT_INDICATION:
+		IRDA_DEBUG(4, "%s(), LM_LAP_DISCONNECT_INDICATION\n",  __FUNCTION__);
+		irlmp_next_lap_state(self, LAP_STANDBY);
+
+		/* Send disconnect event to all LSAPs using this link */
+		irlmp_do_all_lsap_event(self->lsaps,
+					LM_LAP_DISCONNECT_INDICATION);
+		break;
+	case LM_LAP_DISCONNECT_REQUEST:
+		IRDA_DEBUG(4, "%s(), LM_LAP_DISCONNECT_REQUEST\n",  __FUNCTION__);
+
+		/* One of the LSAP did timeout or was closed, if it was
+		 * the last one, try to get out of here - Jean II */
+		if (HASHBIN_GET_SIZE(self->lsaps) <= 1) {
+			irlap_disconnect_request(self->irlap);
+		}
+		break;
+	default:
+		IRDA_DEBUG(0, "%s(), Unknown event %s\n",
+			 __FUNCTION__, irlmp_event[event]);
+		break;
+	}
+}
+
+/*
+ * Function irlmp_state_active (event, skb, info)
+ *
+ *    ACTIVE, IrLAP connection is active
+ *
+ */
+static void irlmp_state_active(struct lap_cb *self, IRLMP_EVENT event,
+			       struct sk_buff *skb)
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	switch (event) {
+	case LM_LAP_CONNECT_REQUEST:
+		IRDA_DEBUG(4, "%s(), LS_CONNECT_REQUEST\n", __FUNCTION__);
+
+		/*
+		 * IrLAP may have a pending disconnect. We tried to close
+		 * IrLAP, but it was postponed because the link was
+		 * busy or we were still sending packets. As we now
+		 * need it, make sure it stays on. Jean II
+		 */
+		irlap_clear_disconnect(self->irlap);
+
+		/*
+		 *  LAP connection already active, just bounce back! Since we
+		 *  don't know which LSAP that tried to do this, we have to
+		 *  notify all LSAPs using this LAP, but that should be safe to
+		 *  do anyway.
+		 */
+		irlmp_do_all_lsap_event(self->lsaps, LM_LAP_CONNECT_CONFIRM);
+
+		/* Needed by connect indication */
+		irlmp_do_all_lsap_event(irlmp->unconnected_lsaps,
+					LM_LAP_CONNECT_CONFIRM);
+		/* Keep state */
+		break;
+	case LM_LAP_DISCONNECT_REQUEST:
+		/*
+		 *  Need to find out if we should close IrLAP or not. If there
+		 *  is only one LSAP connection left on this link, that LSAP
+		 *  must be the one that tries to close IrLAP. It will be
+		 *  removed later and moved to the list of unconnected LSAPs
+		 */
+		if (HASHBIN_GET_SIZE(self->lsaps) > 0) {
+			/* Timer value is checked in irsysctl - Jean II */
+			irlmp_start_idle_timer(self, sysctl_lap_keepalive_time * HZ / 1000);
+		} else {
+			/* No more connections, so close IrLAP */
+
+			/* We don't want to change state just yet, because
+			 * we want to reflect accurately the real state of
+			 * the LAP, not the state we wish it was in,
+			 * so that we don't lose LM_LAP_CONNECT_REQUEST.
+			 * In some cases, IrLAP won't close the LAP
+			 * immediately. For example, it might still be
+			 * retrying packets or waiting for the pf bit.
+			 * As the LAP always send a DISCONNECT_INDICATION
+			 * in PCLOSE or SCLOSE, just change state on that.
+			 * Jean II */
+			irlap_disconnect_request(self->irlap);
+		}
+		break;
+	case LM_LAP_IDLE_TIMEOUT:
+		if (HASHBIN_GET_SIZE(self->lsaps) == 0) {
+			/* Same reasoning as above - keep state */
+			irlap_disconnect_request(self->irlap);
+		}
+		break;
+	case LM_LAP_DISCONNECT_INDICATION:
+		irlmp_next_lap_state(self, LAP_STANDBY);
+
+		/* In some case, at this point our side has already closed
+		 * all lsaps, and we are waiting for the idle_timer to
+		 * expire. If another device reconnect immediately, the
+		 * idle timer will expire in the midle of the connection
+		 * initialisation, screwing up things a lot...
+		 * Therefore, we must stop the timer... */
+		irlmp_stop_idle_timer(self);
+
+		/*
+		 *  Inform all connected LSAP's using this link
+		 */
+		irlmp_do_all_lsap_event(self->lsaps,
+					LM_LAP_DISCONNECT_INDICATION);
+
+		/* Force an expiry of the discovery log.
+		 * Now that the LAP is free, the system may attempt to
+		 * connect to another device. Unfortunately, our entries
+		 * are stale. There is a small window (<3s) before the
+		 * normal discovery will run and where irlmp_connect_request()
+		 * can get the wrong info, so make sure things get
+		 * cleaned *NOW* ;-) - Jean II */
+		irlmp_do_expiry();
+		break;
+	default:
+		IRDA_DEBUG(0, "%s(), Unknown event %s\n",
+			 __FUNCTION__, irlmp_event[event]);
+		break;
+	}
+}
+
+/*********************************************************************
+ *
+ *    LSAP connection control states
+ *
+ ********************************************************************/
+
+/*
+ * Function irlmp_state_disconnected (event, skb, info)
+ *
+ *    DISCONNECTED
+ *
+ */
+static int irlmp_state_disconnected(struct lsap_cb *self, IRLMP_EVENT event,
+				    struct sk_buff *skb)
+{
+	int ret = 0;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -1;);
+
+	switch (event) {
+#ifdef CONFIG_IRDA_ULTRA
+	case LM_UDATA_INDICATION:
+		/* This is most bizzare. Those packets are  aka unreliable
+		 * connected, aka IrLPT or SOCK_DGRAM/IRDAPROTO_UNITDATA.
+		 * Why do we pass them as Ultra ??? Jean II */
+		irlmp_connless_data_indication(self, skb);
+		break;
+#endif /* CONFIG_IRDA_ULTRA */
+	case LM_CONNECT_REQUEST:
+		IRDA_DEBUG(4, "%s(), LM_CONNECT_REQUEST\n", __FUNCTION__);
+
+		if (self->conn_skb) {
+			IRDA_WARNING("%s: busy with another request!\n",
+				     __FUNCTION__);
+			return -EBUSY;
+		}
+		/* Don't forget to refcount it (see irlmp_connect_request()) */
+		skb_get(skb);
+		self->conn_skb = skb;
+
+		irlmp_next_lsap_state(self, LSAP_SETUP_PEND);
+
+		/* Start watchdog timer (5 secs for now) */
+		irlmp_start_watchdog_timer(self, 5*HZ);
+
+		irlmp_do_lap_event(self->lap, LM_LAP_CONNECT_REQUEST, NULL);
+		break;
+	case LM_CONNECT_INDICATION:
+		if (self->conn_skb) {
+			IRDA_WARNING("%s: busy with another request!\n",
+				     __FUNCTION__);
+			return -EBUSY;
+		}
+		/* Don't forget to refcount it (see irlap_driver_rcv()) */
+		skb_get(skb);
+		self->conn_skb = skb;
+
+		irlmp_next_lsap_state(self, LSAP_CONNECT_PEND);
+
+		/* Start watchdog timer
+		 * This is not mentionned in the spec, but there is a rare
+		 * race condition that can get the socket stuck.
+		 * If we receive this event while our LAP is closing down,
+		 * the LM_LAP_CONNECT_REQUEST get lost and we get stuck in
+		 * CONNECT_PEND state forever.
+		 * The other cause of getting stuck down there is if the
+		 * higher layer never reply to the CONNECT_INDICATION.
+		 * Anyway, it make sense to make sure that we always have
+		 * a backup plan. 1 second is plenty (should be immediate).
+		 * Jean II */
+		irlmp_start_watchdog_timer(self, 1*HZ);
+
+		irlmp_do_lap_event(self->lap, LM_LAP_CONNECT_REQUEST, NULL);
+		break;
+	default:
+		IRDA_DEBUG(1, "%s(), Unknown event %s on LSAP %#02x\n",
+			   __FUNCTION__, irlmp_event[event], self->slsap_sel);
+		break;
+	}
+	return ret;
+}
+
+/*
+ * Function irlmp_state_connect (self, event, skb)
+ *
+ *    CONNECT
+ *
+ */
+static int irlmp_state_connect(struct lsap_cb *self, IRLMP_EVENT event,
+				struct sk_buff *skb)
+{
+	struct lsap_cb *lsap;
+	int ret = 0;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -1;);
+
+	switch (event) {
+	case LM_CONNECT_RESPONSE:
+		/*
+		 *  Bind this LSAP to the IrLAP link where the connect was
+		 *  received
+		 */
+		lsap = hashbin_remove(irlmp->unconnected_lsaps, (long) self,
+				      NULL);
+
+		IRDA_ASSERT(lsap == self, return -1;);
+		IRDA_ASSERT(self->lap != NULL, return -1;);
+		IRDA_ASSERT(self->lap->lsaps != NULL, return -1;);
+
+		hashbin_insert(self->lap->lsaps, (irda_queue_t *) self,
+			       (long) self, NULL);
+
+		set_bit(0, &self->connected);	/* TRUE */
+
+		irlmp_send_lcf_pdu(self->lap, self->dlsap_sel,
+				   self->slsap_sel, CONNECT_CNF, skb);
+
+		del_timer(&self->watchdog_timer);
+
+		irlmp_next_lsap_state(self, LSAP_DATA_TRANSFER_READY);
+		break;
+	case LM_WATCHDOG_TIMEOUT:
+		/* May happen, who knows...
+		 * Jean II */
+		IRDA_DEBUG(0, "%s() WATCHDOG_TIMEOUT!\n",  __FUNCTION__);
+
+		/* Disconnect, get out... - Jean II */
+		self->lap = NULL;
+		self->dlsap_sel = LSAP_ANY;
+		irlmp_next_lsap_state(self, LSAP_DISCONNECTED);
+		break;
+	default:
+		/* LM_LAP_DISCONNECT_INDICATION : Should never happen, we
+		 * are *not* yet bound to the IrLAP link. Jean II */
+		IRDA_DEBUG(0, "%s(), Unknown event %s on LSAP %#02x\n", 
+			   __FUNCTION__, irlmp_event[event], self->slsap_sel);
+		break;
+	}
+	return ret;
+}
+
+/*
+ * Function irlmp_state_connect_pend (event, skb, info)
+ *
+ *    CONNECT_PEND
+ *
+ */
+static int irlmp_state_connect_pend(struct lsap_cb *self, IRLMP_EVENT event,
+				    struct sk_buff *skb)
+{
+	struct sk_buff *tx_skb;
+	int ret = 0;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -1;);
+
+	switch (event) {
+	case LM_CONNECT_REQUEST:
+		/* Keep state */
+		break;
+	case LM_CONNECT_RESPONSE:
+		IRDA_DEBUG(0, "%s(), LM_CONNECT_RESPONSE, "
+			   "no indication issued yet\n",  __FUNCTION__);
+		/* Keep state */
+		break;
+	case LM_DISCONNECT_REQUEST:
+		IRDA_DEBUG(0, "%s(), LM_DISCONNECT_REQUEST, "
+			   "not yet bound to IrLAP connection\n",  __FUNCTION__);
+		/* Keep state */
+		break;
+	case LM_LAP_CONNECT_CONFIRM:
+		IRDA_DEBUG(4, "%s(), LS_CONNECT_CONFIRM\n",  __FUNCTION__);
+		irlmp_next_lsap_state(self, LSAP_CONNECT);
+
+		tx_skb = self->conn_skb;
+		self->conn_skb = NULL;
+
+		irlmp_connect_indication(self, tx_skb);
+		/* Drop reference count - see irlmp_connect_indication(). */
+		dev_kfree_skb(tx_skb);
+		break;
+	case LM_WATCHDOG_TIMEOUT:
+		/* Will happen in some rare cases because of a race condition.
+		 * Just make sure we don't stay there forever...
+		 * Jean II */
+		IRDA_DEBUG(0, "%s() WATCHDOG_TIMEOUT!\n",  __FUNCTION__);
+
+		/* Go back to disconnected mode, keep the socket waiting */
+		self->lap = NULL;
+		self->dlsap_sel = LSAP_ANY;
+		if(self->conn_skb)
+			dev_kfree_skb(self->conn_skb);
+		self->conn_skb = NULL;
+		irlmp_next_lsap_state(self, LSAP_DISCONNECTED);
+		break;
+	default:
+		/* LM_LAP_DISCONNECT_INDICATION : Should never happen, we
+		 * are *not* yet bound to the IrLAP link. Jean II */
+		IRDA_DEBUG(0, "%s(), Unknown event %s on LSAP %#02x\n",
+			   __FUNCTION__, irlmp_event[event], self->slsap_sel);
+		break;
+	}
+	return ret;
+}
+
+/*
+ * Function irlmp_state_dtr (self, event, skb)
+ *
+ *    DATA_TRANSFER_READY
+ *
+ */
+static int irlmp_state_dtr(struct lsap_cb *self, IRLMP_EVENT event,
+			   struct sk_buff *skb)
+{
+	LM_REASON reason;
+	int ret = 0;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -1;);
+	IRDA_ASSERT(self->lap != NULL, return -1;);
+
+	switch (event) {
+	case LM_DATA_REQUEST: /* Optimize for the common case */
+		irlmp_send_data_pdu(self->lap, self->dlsap_sel,
+				    self->slsap_sel, FALSE, skb);
+		break;
+	case LM_DATA_INDICATION: /* Optimize for the common case */
+		irlmp_data_indication(self, skb);
+		break;
+	case LM_UDATA_REQUEST:
+		IRDA_ASSERT(skb != NULL, return -1;);
+		irlmp_send_data_pdu(self->lap, self->dlsap_sel,
+				    self->slsap_sel, TRUE, skb);
+		break;
+	case LM_UDATA_INDICATION:
+		irlmp_udata_indication(self, skb);
+		break;
+	case LM_CONNECT_REQUEST:
+		IRDA_DEBUG(0, "%s(), LM_CONNECT_REQUEST, "
+			   "error, LSAP already connected\n", __FUNCTION__);
+		/* Keep state */
+		break;
+	case LM_CONNECT_RESPONSE:
+		IRDA_DEBUG(0, "%s(), LM_CONNECT_RESPONSE, "
+			   "error, LSAP already connected\n", __FUNCTION__);
+		/* Keep state */
+		break;
+	case LM_DISCONNECT_REQUEST:
+		irlmp_send_lcf_pdu(self->lap, self->dlsap_sel, self->slsap_sel,
+				   DISCONNECT, skb);
+		irlmp_next_lsap_state(self, LSAP_DISCONNECTED);
+		/* Called only from irlmp_disconnect_request(), will
+		 * unbind from LAP over there. Jean II */
+
+		/* Try to close the LAP connection if its still there */
+		if (self->lap) {
+			IRDA_DEBUG(4, "%s(), trying to close IrLAP\n",
+				   __FUNCTION__);
+			irlmp_do_lap_event(self->lap,
+					   LM_LAP_DISCONNECT_REQUEST,
+					   NULL);
+		}
+		break;
+	case LM_LAP_DISCONNECT_INDICATION:
+		irlmp_next_lsap_state(self, LSAP_DISCONNECTED);
+
+		reason = irlmp_convert_lap_reason(self->lap->reason);
+
+		irlmp_disconnect_indication(self, reason, NULL);
+		break;
+	case LM_DISCONNECT_INDICATION:
+		irlmp_next_lsap_state(self, LSAP_DISCONNECTED);
+
+		IRDA_ASSERT(self->lap != NULL, return -1;);
+		IRDA_ASSERT(self->lap->magic == LMP_LAP_MAGIC, return -1;);
+
+		IRDA_ASSERT(skb != NULL, return -1;);
+		IRDA_ASSERT(skb->len > 3, return -1;);
+		reason = skb->data[3];
+
+		 /* Try to close the LAP connection */
+		IRDA_DEBUG(4, "%s(), trying to close IrLAP\n", __FUNCTION__);
+		irlmp_do_lap_event(self->lap, LM_LAP_DISCONNECT_REQUEST, NULL);
+
+		irlmp_disconnect_indication(self, reason, skb);
+		break;
+	default:
+		IRDA_DEBUG(0, "%s(), Unknown event %s on LSAP %#02x\n",
+			   __FUNCTION__, irlmp_event[event], self->slsap_sel);
+		break;
+	}
+	return ret;
+}
+
+/*
+ * Function irlmp_state_setup (event, skb, info)
+ *
+ *    SETUP, Station Control has set up the underlying IrLAP connection.
+ *    An LSAP connection request has been transmitted to the peer
+ *    LSAP-Connection Control FSM and we are awaiting reply.
+ */
+static int irlmp_state_setup(struct lsap_cb *self, IRLMP_EVENT event,
+			     struct sk_buff *skb)
+{
+	LM_REASON reason;
+	int ret = 0;
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -1;);
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	switch (event) {
+	case LM_CONNECT_CONFIRM:
+		irlmp_next_lsap_state(self, LSAP_DATA_TRANSFER_READY);
+
+		del_timer(&self->watchdog_timer);
+
+		irlmp_connect_confirm(self, skb);
+		break;
+	case LM_DISCONNECT_INDICATION:
+		irlmp_next_lsap_state(self, LSAP_DISCONNECTED);
+
+		IRDA_ASSERT(self->lap != NULL, return -1;);
+		IRDA_ASSERT(self->lap->magic == LMP_LAP_MAGIC, return -1;);
+
+		IRDA_ASSERT(skb != NULL, return -1;);
+		IRDA_ASSERT(skb->len > 3, return -1;);
+		reason = skb->data[3];
+
+		 /* Try to close the LAP connection */
+		IRDA_DEBUG(4, "%s(), trying to close IrLAP\n",  __FUNCTION__);
+		irlmp_do_lap_event(self->lap, LM_LAP_DISCONNECT_REQUEST, NULL);
+
+		irlmp_disconnect_indication(self, reason, skb);
+		break;
+	case LM_LAP_DISCONNECT_INDICATION:
+		irlmp_next_lsap_state(self, LSAP_DISCONNECTED);
+
+		del_timer(&self->watchdog_timer);
+
+		IRDA_ASSERT(self->lap != NULL, return -1;);
+		IRDA_ASSERT(self->lap->magic == LMP_LAP_MAGIC, return -1;);
+
+		reason = irlmp_convert_lap_reason(self->lap->reason);
+
+		irlmp_disconnect_indication(self, reason, skb);
+		break;
+	case LM_WATCHDOG_TIMEOUT:
+		IRDA_DEBUG(0, "%s() WATCHDOG_TIMEOUT!\n", __FUNCTION__);
+
+		IRDA_ASSERT(self->lap != NULL, return -1;);
+		irlmp_do_lap_event(self->lap, LM_LAP_DISCONNECT_REQUEST, NULL);
+		irlmp_next_lsap_state(self, LSAP_DISCONNECTED);
+
+		irlmp_disconnect_indication(self, LM_CONNECT_FAILURE, NULL);
+		break;
+	default:
+		IRDA_DEBUG(0, "%s(), Unknown event %s on LSAP %#02x\n",
+			   __FUNCTION__, irlmp_event[event], self->slsap_sel);
+		break;
+	}
+	return ret;
+}
+
+/*
+ * Function irlmp_state_setup_pend (event, skb, info)
+ *
+ *    SETUP_PEND, An LM_CONNECT_REQUEST has been received from the service
+ *    user to set up an LSAP connection. A request has been sent to the
+ *    LAP FSM to set up the underlying IrLAP connection, and we
+ *    are awaiting confirm.
+ */
+static int irlmp_state_setup_pend(struct lsap_cb *self, IRLMP_EVENT event,
+				  struct sk_buff *skb)
+{
+	struct sk_buff *tx_skb;
+	LM_REASON reason;
+	int ret = 0;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(irlmp != NULL, return -1;);
+
+	switch (event) {
+	case LM_LAP_CONNECT_CONFIRM:
+		IRDA_ASSERT(self->conn_skb != NULL, return -1;);
+
+		tx_skb = self->conn_skb;
+		self->conn_skb = NULL;
+
+		irlmp_send_lcf_pdu(self->lap, self->dlsap_sel,
+				   self->slsap_sel, CONNECT_CMD, tx_skb);
+		/* Drop reference count - see irlap_data_request(). */
+		dev_kfree_skb(tx_skb);
+
+		irlmp_next_lsap_state(self, LSAP_SETUP);
+		break;
+	case LM_WATCHDOG_TIMEOUT:
+		IRDA_DEBUG(0, "%s() : WATCHDOG_TIMEOUT !\n",  __FUNCTION__);
+
+		IRDA_ASSERT(self->lap != NULL, return -1;);
+		irlmp_do_lap_event(self->lap, LM_LAP_DISCONNECT_REQUEST, NULL);
+		irlmp_next_lsap_state(self, LSAP_DISCONNECTED);
+
+		irlmp_disconnect_indication(self, LM_CONNECT_FAILURE, NULL);
+		break;
+	case LM_LAP_DISCONNECT_INDICATION: /* LS_Disconnect.indication */
+		del_timer( &self->watchdog_timer);
+
+		irlmp_next_lsap_state(self, LSAP_DISCONNECTED);
+
+		reason = irlmp_convert_lap_reason(self->lap->reason);
+
+		irlmp_disconnect_indication(self, reason, NULL);
+		break;
+	default:
+		IRDA_DEBUG(0, "%s(), Unknown event %s on LSAP %#02x\n",
+			   __FUNCTION__, irlmp_event[event], self->slsap_sel);
+		break;
+	}
+	return ret;
+}
diff --git a/net/irda/irlmp_frame.c b/net/irda/irlmp_frame.c
new file mode 100644
index 00000000000..91cd268172f
--- /dev/null
+++ b/net/irda/irlmp_frame.c
@@ -0,0 +1,491 @@
+/*********************************************************************
+ *                
+ * Filename:      irlmp_frame.c
+ * Version:       0.9
+ * Description:   IrLMP frame implementation
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Tue Aug 19 02:09:59 1997
+ * Modified at:   Mon Dec 13 13:41:12 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>
+ *     All Rights Reserved.
+ *     Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ *
+ *     Neither Dag Brattli nor University of Troms� admit liability nor
+ *     provide warranty for any of this software. This material is 
+ *     provided "AS-IS" and at no charge.
+ *
+ ********************************************************************/
+
+#include <linux/config.h>
+#include <linux/skbuff.h>
+#include <linux/kernel.h>
+
+#include <net/irda/irda.h>
+#include <net/irda/irlap.h>
+#include <net/irda/timer.h>
+#include <net/irda/irlmp.h>
+#include <net/irda/irlmp_frame.h>
+#include <net/irda/discovery.h>
+
+static struct lsap_cb *irlmp_find_lsap(struct lap_cb *self, __u8 dlsap, 
+				       __u8 slsap, int status, hashbin_t *);
+
+inline void irlmp_send_data_pdu(struct lap_cb *self, __u8 dlsap, __u8 slsap,
+				int expedited, struct sk_buff *skb)
+{
+	skb->data[0] = dlsap;
+	skb->data[1] = slsap;
+
+	if (expedited) {
+		IRDA_DEBUG(4, "%s(), sending expedited data\n", __FUNCTION__);
+		irlap_data_request(self->irlap, skb, TRUE);
+	} else
+		irlap_data_request(self->irlap, skb, FALSE);
+}
+
+/*
+ * Function irlmp_send_lcf_pdu (dlsap, slsap, opcode,skb)
+ *
+ *    Send Link Control Frame to IrLAP
+ */
+void irlmp_send_lcf_pdu(struct lap_cb *self, __u8 dlsap, __u8 slsap,
+			__u8 opcode, struct sk_buff *skb) 
+{
+	__u8 *frame;
+	
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LMP_LAP_MAGIC, return;);
+	IRDA_ASSERT(skb != NULL, return;);
+	
+	frame = skb->data;
+	
+	frame[0] = dlsap | CONTROL_BIT;
+	frame[1] = slsap;
+
+	frame[2] = opcode;
+
+	if (opcode == DISCONNECT)
+		frame[3] = 0x01; /* Service user request */
+	else
+		frame[3] = 0x00; /* rsvd */
+
+	irlap_data_request(self->irlap, skb, FALSE);
+}
+
+/*
+ * Function irlmp_input (skb)
+ *
+ *    Used by IrLAP to pass received data frames to IrLMP layer
+ *
+ */
+void irlmp_link_data_indication(struct lap_cb *self, struct sk_buff *skb, 
+				int unreliable)
+{
+	struct lsap_cb *lsap;
+	__u8   slsap_sel;   /* Source (this) LSAP address */
+	__u8   dlsap_sel;   /* Destination LSAP address */
+	__u8   *fp;
+	
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LMP_LAP_MAGIC, return;);
+	IRDA_ASSERT(skb->len > 2, return;);
+
+	fp = skb->data;
+
+	/*
+	 *  The next statements may be confusing, but we do this so that 
+	 *  destination LSAP of received frame is source LSAP in our view
+	 */
+	slsap_sel = fp[0] & LSAP_MASK; 
+	dlsap_sel = fp[1];	
+
+	/*
+	 *  Check if this is an incoming connection, since we must deal with
+	 *  it in a different way than other established connections.
+	 */
+	if ((fp[0] & CONTROL_BIT) && (fp[2] == CONNECT_CMD)) {
+		IRDA_DEBUG(3, "%s(), incoming connection, "
+			   "source LSAP=%d, dest LSAP=%d\n",
+			   __FUNCTION__, slsap_sel, dlsap_sel);
+		
+		/* Try to find LSAP among the unconnected LSAPs */
+		lsap = irlmp_find_lsap(self, dlsap_sel, slsap_sel, CONNECT_CMD,
+				       irlmp->unconnected_lsaps);
+		
+		/* Maybe LSAP was already connected, so try one more time */
+		if (!lsap) {
+			IRDA_DEBUG(1, "%s(), incoming connection for LSAP already connected\n", __FUNCTION__);
+			lsap = irlmp_find_lsap(self, dlsap_sel, slsap_sel, 0,
+					       self->lsaps);
+		}
+	} else
+		lsap = irlmp_find_lsap(self, dlsap_sel, slsap_sel, 0, 
+				       self->lsaps);
+	
+	if (lsap == NULL) {
+		IRDA_DEBUG(2, "IrLMP, Sorry, no LSAP for received frame!\n");
+		IRDA_DEBUG(2, "%s(), slsap_sel = %02x, dlsap_sel = %02x\n",
+			   __FUNCTION__, slsap_sel, dlsap_sel);
+		if (fp[0] & CONTROL_BIT) {
+			IRDA_DEBUG(2, "%s(), received control frame %02x\n",
+				   __FUNCTION__, fp[2]);
+		} else {
+			IRDA_DEBUG(2, "%s(), received data frame\n", __FUNCTION__);
+		}
+		return;
+	}
+
+	/* 
+	 *  Check if we received a control frame? 
+	 */
+	if (fp[0] & CONTROL_BIT) {
+		switch (fp[2]) {
+		case CONNECT_CMD:
+			lsap->lap = self;
+			irlmp_do_lsap_event(lsap, LM_CONNECT_INDICATION, skb);
+			break;
+		case CONNECT_CNF:
+			irlmp_do_lsap_event(lsap, LM_CONNECT_CONFIRM, skb);
+			break;
+		case DISCONNECT:
+			IRDA_DEBUG(4, "%s(), Disconnect indication!\n",
+				   __FUNCTION__);
+			irlmp_do_lsap_event(lsap, LM_DISCONNECT_INDICATION, 
+					    skb);
+			break;
+		case ACCESSMODE_CMD:
+			IRDA_DEBUG(0, "Access mode cmd not implemented!\n");
+			break;
+		case ACCESSMODE_CNF:
+			IRDA_DEBUG(0, "Access mode cnf not implemented!\n");
+			break;
+		default:
+			IRDA_DEBUG(0, "%s(), Unknown control frame %02x\n",
+				   __FUNCTION__, fp[2]);
+			break;
+		}
+	} else if (unreliable) {
+		/* Optimize and bypass the state machine if possible */
+		if (lsap->lsap_state == LSAP_DATA_TRANSFER_READY)
+			irlmp_udata_indication(lsap, skb);
+		else
+			irlmp_do_lsap_event(lsap, LM_UDATA_INDICATION, skb);
+	} else {	
+		/* Optimize and bypass the state machine if possible */
+		if (lsap->lsap_state == LSAP_DATA_TRANSFER_READY)
+			irlmp_data_indication(lsap, skb);
+		else
+			irlmp_do_lsap_event(lsap, LM_DATA_INDICATION, skb);
+	}
+}
+
+/*
+ * Function irlmp_link_unitdata_indication (self, skb)
+ *
+ *    
+ *
+ */
+#ifdef CONFIG_IRDA_ULTRA
+void irlmp_link_unitdata_indication(struct lap_cb *self, struct sk_buff *skb)
+{
+	struct lsap_cb *lsap;
+	__u8   slsap_sel;   /* Source (this) LSAP address */
+	__u8   dlsap_sel;   /* Destination LSAP address */
+	__u8   pid;         /* Protocol identifier */
+	__u8   *fp;
+	unsigned long flags;
+	
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LMP_LAP_MAGIC, return;);
+	IRDA_ASSERT(skb->len > 2, return;);
+
+	fp = skb->data;
+
+	/*
+	 *  The next statements may be confusing, but we do this so that 
+	 *  destination LSAP of received frame is source LSAP in our view
+	 */
+	slsap_sel = fp[0] & LSAP_MASK; 
+	dlsap_sel = fp[1];
+	pid       = fp[2];
+	
+	if (pid & 0x80) {
+		IRDA_DEBUG(0, "%s(), extension in PID not supp!\n",
+			   __FUNCTION__);
+		return;
+	}
+
+	/* Check if frame is addressed to the connectionless LSAP */
+	if ((slsap_sel != LSAP_CONNLESS) || (dlsap_sel != LSAP_CONNLESS)) {
+		IRDA_DEBUG(0, "%s(), dropping frame!\n", __FUNCTION__);
+		return;
+	}
+	
+	/* Search the connectionless LSAP */
+	spin_lock_irqsave(&irlmp->unconnected_lsaps->hb_spinlock, flags);
+	lsap = (struct lsap_cb *) hashbin_get_first(irlmp->unconnected_lsaps);
+	while (lsap != NULL) {
+		/*
+		 *  Check if source LSAP and dest LSAP selectors and PID match.
+		 */
+		if ((lsap->slsap_sel == slsap_sel) && 
+		    (lsap->dlsap_sel == dlsap_sel) && 
+		    (lsap->pid == pid)) 
+		{			
+			break;
+		}
+		lsap = (struct lsap_cb *) hashbin_get_next(irlmp->unconnected_lsaps);
+	}
+	spin_unlock_irqrestore(&irlmp->unconnected_lsaps->hb_spinlock, flags);
+
+	if (lsap)
+		irlmp_connless_data_indication(lsap, skb);
+	else {
+		IRDA_DEBUG(0, "%s(), found no matching LSAP!\n", __FUNCTION__);
+	}
+}
+#endif /* CONFIG_IRDA_ULTRA */
+
+/*
+ * Function irlmp_link_disconnect_indication (reason, userdata)
+ *
+ *    IrLAP has disconnected 
+ *
+ */
+void irlmp_link_disconnect_indication(struct lap_cb *lap, 
+				      struct irlap_cb *irlap, 
+				      LAP_REASON reason, 
+				      struct sk_buff *skb)
+{
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(lap != NULL, return;);
+	IRDA_ASSERT(lap->magic == LMP_LAP_MAGIC, return;);
+
+	lap->reason = reason;
+	lap->daddr = DEV_ADDR_ANY;
+
+        /* FIXME: must do something with the skb if any */
+	
+	/*
+	 *  Inform station state machine
+	 */
+	irlmp_do_lap_event(lap, LM_LAP_DISCONNECT_INDICATION, NULL);
+}
+
+/*
+ * Function irlmp_link_connect_indication (qos)
+ *
+ *    Incoming LAP connection!
+ *
+ */
+void irlmp_link_connect_indication(struct lap_cb *self, __u32 saddr, 
+				   __u32 daddr, struct qos_info *qos,
+				   struct sk_buff *skb) 
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	/* Copy QoS settings for this session */
+	self->qos = qos;
+
+	/* Update destination device address */
+	self->daddr = daddr;
+	IRDA_ASSERT(self->saddr == saddr, return;);
+
+	irlmp_do_lap_event(self, LM_LAP_CONNECT_INDICATION, skb);
+}
+
+/*
+ * Function irlmp_link_connect_confirm (qos)
+ *
+ *    LAP connection confirmed!
+ *
+ */
+void irlmp_link_connect_confirm(struct lap_cb *self, struct qos_info *qos, 
+				struct sk_buff *skb)
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LMP_LAP_MAGIC, return;);
+	IRDA_ASSERT(qos != NULL, return;);
+
+	/* Don't need use the skb for now */
+
+	/* Copy QoS settings for this session */
+	self->qos = qos;
+
+	irlmp_do_lap_event(self, LM_LAP_CONNECT_CONFIRM, NULL);
+}
+
+/*
+ * Function irlmp_link_discovery_indication (self, log)
+ *
+ *    Device is discovering us
+ *
+ * It's not an answer to our own discoveries, just another device trying
+ * to perform discovery, but we don't want to miss the opportunity
+ * to exploit this information, because :
+ *	o We may not actively perform discovery (just passive discovery)
+ *	o This type of discovery is much more reliable. In some cases, it
+ *	  seem that less than 50% of our discoveries get an answer, while
+ *	  we always get ~100% of these.
+ *	o Make faster discovery, statistically divide time of discovery
+ *	  events by 2 (important for the latency aspect and user feel)
+ *	o Even is we do active discovery, the other node might not
+ *	  answer our discoveries (ex: Palm). The Palm will just perform
+ *	  one active discovery and connect directly to us.
+ *
+ * However, when both devices discover each other, they might attempt to
+ * connect to each other following the discovery event, and it would create
+ * collisions on the medium (SNRM battle).
+ * The "fix" for that is to disable all connection requests in IrLAP
+ * for 100ms after a discovery indication by setting the media_busy flag.
+ * Previously, we used to postpone the event which was quite ugly. Now
+ * that IrLAP takes care of this problem, just pass the event up...
+ *
+ * Jean II
+ */
+void irlmp_link_discovery_indication(struct lap_cb *self, 
+				     discovery_t *discovery)
+{
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LMP_LAP_MAGIC, return;);
+
+	/* Add to main log, cleanup */
+	irlmp_add_discovery(irlmp->cachelog, discovery);
+	
+	/* Just handle it the same way as a discovery confirm,
+	 * bypass the LM_LAP state machine (see below) */
+	irlmp_discovery_confirm(irlmp->cachelog, DISCOVERY_PASSIVE);
+}
+
+/*
+ * Function irlmp_link_discovery_confirm (self, log)
+ *
+ *    Called by IrLAP with a list of discoveries after the discovery
+ *    request has been carried out. A NULL log is received if IrLAP
+ *    was unable to carry out the discovery request
+ *
+ */
+void irlmp_link_discovery_confirm(struct lap_cb *self, hashbin_t *log)
+{
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LMP_LAP_MAGIC, return;);
+	
+	/* Add to main log, cleanup */
+	irlmp_add_discovery_log(irlmp->cachelog, log);
+
+	/* Propagate event to various LSAPs registered for it.
+	 * We bypass the LM_LAP state machine because
+	 *	1) We do it regardless of the LM_LAP state
+	 *	2) It doesn't affect the LM_LAP state
+	 *	3) Faster, slimer, simpler, ...
+	 * Jean II */
+	irlmp_discovery_confirm(irlmp->cachelog, DISCOVERY_ACTIVE);
+}
+
+#ifdef CONFIG_IRDA_CACHE_LAST_LSAP
+static inline void irlmp_update_cache(struct lap_cb *lap,
+				      struct lsap_cb *lsap)
+{
+	/* Prevent concurrent read to get garbage */
+	lap->cache.valid = FALSE;
+	/* Update cache entry */
+	lap->cache.dlsap_sel = lsap->dlsap_sel;
+	lap->cache.slsap_sel = lsap->slsap_sel;
+	lap->cache.lsap = lsap;
+	lap->cache.valid = TRUE;
+}
+#endif
+
+/*
+ * Function irlmp_find_handle (self, dlsap_sel, slsap_sel, status, queue)
+ *
+ *    Find handle associated with destination and source LSAP
+ *
+ * Any IrDA connection (LSAP/TSAP) is uniquely identified by
+ * 3 parameters, the local lsap, the remote lsap and the remote address. 
+ * We may initiate multiple connections to the same remote service
+ * (they will have different local lsap), a remote device may initiate
+ * multiple connections to the same local service (they will have
+ * different remote lsap), or multiple devices may connect to the same
+ * service and may use the same remote lsap (and they will have
+ * different remote address).
+ * So, where is the remote address ? Each LAP connection is made with
+ * a single remote device, so imply a specific remote address.
+ * Jean II
+ */
+static struct lsap_cb *irlmp_find_lsap(struct lap_cb *self, __u8 dlsap_sel,
+				       __u8 slsap_sel, int status,
+				       hashbin_t *queue) 
+{
+	struct lsap_cb *lsap;
+	unsigned long flags;
+	
+	/* 
+	 *  Optimize for the common case. We assume that the last frame
+	 *  received is in the same connection as the last one, so check in
+	 *  cache first to avoid the linear search
+	 */
+#ifdef CONFIG_IRDA_CACHE_LAST_LSAP
+	if ((self->cache.valid) && 
+	    (self->cache.slsap_sel == slsap_sel) && 
+	    (self->cache.dlsap_sel == dlsap_sel)) 
+	{
+		return (self->cache.lsap);
+	}
+#endif
+
+	spin_lock_irqsave(&queue->hb_spinlock, flags);
+
+	lsap = (struct lsap_cb *) hashbin_get_first(queue);
+	while (lsap != NULL) {
+		/* 
+		 *  If this is an incoming connection, then the destination 
+		 *  LSAP selector may have been specified as LM_ANY so that 
+		 *  any client can connect. In that case we only need to check
+		 *  if the source LSAP (in our view!) match!
+		 */
+		if ((status == CONNECT_CMD) && 
+		    (lsap->slsap_sel == slsap_sel) &&      
+		    (lsap->dlsap_sel == LSAP_ANY)) {
+			/* This is where the dest lsap sel is set on incoming
+			 * lsaps */
+			lsap->dlsap_sel = dlsap_sel;
+			break;
+		}
+		/*
+		 *  Check if source LSAP and dest LSAP selectors match.
+		 */
+		if ((lsap->slsap_sel == slsap_sel) && 
+		    (lsap->dlsap_sel == dlsap_sel)) 
+			break;
+
+		lsap = (struct lsap_cb *) hashbin_get_next(queue);
+	}
+#ifdef CONFIG_IRDA_CACHE_LAST_LSAP
+	if(lsap)
+		irlmp_update_cache(self, lsap);
+#endif
+	spin_unlock_irqrestore(&queue->hb_spinlock, flags);
+
+	/* Return what we've found or NULL */
+	return lsap;
+}
diff --git a/net/irda/irmod.c b/net/irda/irmod.c
new file mode 100644
index 00000000000..6ffaed4544e
--- /dev/null
+++ b/net/irda/irmod.c
@@ -0,0 +1,185 @@
+/*********************************************************************
+ *                
+ * Filename:      irmod.c
+ * Version:       0.9
+ * Description:   IrDA stack main entry points
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Mon Dec 15 13:55:39 1997
+ * Modified at:   Wed Jan  5 15:12:41 2000
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1997, 1999-2000 Dag Brattli, All Rights Reserved.
+ *     Copyright (c) 2000-2004 Jean Tourrilhes <jt@hpl.hp.com>
+ *      
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ *  
+ *     Neither Dag Brattli nor University of Troms� admit liability nor
+ *     provide warranty for any of this software. This material is 
+ *     provided "AS-IS" and at no charge.
+ *     
+ ********************************************************************/
+
+/*
+ * This file contains the main entry points of the IrDA stack.
+ * They are in this file and not af_irda.c because some developpers
+ * are using the IrDA stack without the socket API (compiling out
+ * af_irda.c).
+ * Jean II
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+
+#include <net/irda/irda.h>
+#include <net/irda/irmod.h>		/* notify_t */
+#include <net/irda/irlap.h>		/* irlap_init */
+#include <net/irda/irlmp.h>		/* irlmp_init */
+#include <net/irda/iriap.h>		/* iriap_init */
+#include <net/irda/irttp.h>		/* irttp_init */
+#include <net/irda/irda_device.h>	/* irda_device_init */
+
+/* irproc.c */
+extern void irda_proc_register(void);
+extern void irda_proc_unregister(void);
+/* irsysctl.c */
+extern int  irda_sysctl_register(void);
+extern void irda_sysctl_unregister(void);
+/* af_irda.c */
+extern int  irsock_init(void);
+extern void irsock_cleanup(void);
+/* irlap_frame.c */
+extern int  irlap_driver_rcv(struct sk_buff *, struct net_device *, 
+			     struct packet_type *);
+
+/*
+ * Module parameters
+ */
+#ifdef CONFIG_IRDA_DEBUG
+unsigned int irda_debug = IRDA_DEBUG_LEVEL;
+module_param_named(debug, irda_debug, uint, 0);
+MODULE_PARM_DESC(debug, "IRDA debugging level");
+EXPORT_SYMBOL(irda_debug);
+#endif
+
+/* Packet type handler.
+ * Tell the kernel how IrDA packets should be handled.
+ */
+static struct packet_type irda_packet_type = {
+	.type	= __constant_htons(ETH_P_IRDA),
+	.func	= irlap_driver_rcv,	/* Packet type handler irlap_frame.c */
+};
+
+/*
+ * Function irda_notify_init (notify)
+ *
+ *    Used for initializing the notify structure
+ *
+ */
+void irda_notify_init(notify_t *notify)
+{
+	notify->data_indication = NULL;
+	notify->udata_indication = NULL;
+	notify->connect_confirm = NULL;
+	notify->connect_indication = NULL;
+	notify->disconnect_indication = NULL;
+	notify->flow_indication = NULL;
+	notify->status_indication = NULL;
+	notify->instance = NULL;
+	strlcpy(notify->name, "Unknown", sizeof(notify->name));
+}
+EXPORT_SYMBOL(irda_notify_init);
+
+/*
+ * Function irda_init (void)
+ *
+ *  Protocol stack initialisation entry point.
+ *  Initialise the various components of the IrDA stack
+ */
+static int __init irda_init(void)
+{
+	IRDA_DEBUG(0, "%s()\n", __FUNCTION__);
+
+	/* Lower layer of the stack */
+ 	irlmp_init();
+	irlap_init();
+	
+	/* Higher layers of the stack */
+	iriap_init();
+ 	irttp_init();
+	irsock_init();
+	
+	/* Add IrDA packet type (Start receiving packets) */
+        dev_add_pack(&irda_packet_type);
+
+	/* External APIs */
+#ifdef CONFIG_PROC_FS
+	irda_proc_register();
+#endif
+#ifdef CONFIG_SYSCTL
+	irda_sysctl_register();
+#endif
+
+	/* Driver/dongle support */
+ 	irda_device_init();
+
+	return 0;
+}
+
+/*
+ * Function irda_cleanup (void)
+ *
+ *  Protocol stack cleanup/removal entry point.
+ *  Cleanup the various components of the IrDA stack
+ */
+static void __exit irda_cleanup(void)
+{
+	/* Remove External APIs */
+#ifdef CONFIG_SYSCTL
+	irda_sysctl_unregister();
+#endif	
+#ifdef CONFIG_PROC_FS
+	irda_proc_unregister();
+#endif
+
+	/* Remove IrDA packet type (stop receiving packets) */
+        dev_remove_pack(&irda_packet_type);
+	
+	/* Remove higher layers */
+	irsock_cleanup();
+	irttp_cleanup();
+	iriap_cleanup();
+
+	/* Remove lower layers */
+	irda_device_cleanup();
+	irlap_cleanup(); /* Must be done before irlmp_cleanup()! DB */
+
+	/* Remove middle layer */
+	irlmp_cleanup();
+}
+
+/*
+ * The IrDA stack must be initialised *before* drivers get initialised,
+ * and *before* higher protocols (IrLAN/IrCOMM/IrNET) get initialised,
+ * otherwise bad things will happen (hashbins will be NULL for example).
+ * Those modules are at module_init()/device_initcall() level.
+ *
+ * On the other hand, it needs to be initialised *after* the basic
+ * networking, the /proc/net filesystem and sysctl module. Those are
+ * currently initialised in .../init/main.c (before initcalls).
+ * Also, IrDA drivers needs to be initialised *after* the random number
+ * generator (main stack and higher layer init don't need it anymore).
+ *
+ * Jean II
+ */
+subsys_initcall(irda_init);
+module_exit(irda_cleanup);
+ 
+MODULE_AUTHOR("Dag Brattli <dagb@cs.uit.no> & Jean Tourrilhes <jt@hpl.hp.com>");
+MODULE_DESCRIPTION("The Linux IrDA Protocol Stack"); 
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETPROTO(PF_IRDA);
diff --git a/net/irda/irnet/Kconfig b/net/irda/irnet/Kconfig
new file mode 100644
index 00000000000..28c557f0fdd
--- /dev/null
+++ b/net/irda/irnet/Kconfig
@@ -0,0 +1,13 @@
+config IRNET
+	tristate "IrNET protocol"
+	depends on IRDA && PPP
+	help
+	  Say Y here if you want to build support for the IrNET protocol.
+	  To compile it as a module, choose M here: the module will be
+	  called irnet.  IrNET is a PPP driver, so you will also need a
+	  working PPP subsystem (driver, daemon and config)...
+
+	  IrNET is an alternate way to transfer TCP/IP traffic over IrDA.  It
+	  uses synchronous PPP over a set of point to point IrDA sockets.  You
+	  can use it between Linux machine or with W2k.
+
diff --git a/net/irda/irnet/Makefile b/net/irda/irnet/Makefile
new file mode 100644
index 00000000000..b3ee01e0def
--- /dev/null
+++ b/net/irda/irnet/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the Linux IrDA IrNET protocol layer.
+#
+
+obj-$(CONFIG_IRNET) += irnet.o
+
+irnet-objs := irnet_ppp.o irnet_irda.o
diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h
new file mode 100644
index 00000000000..9004f7349a7
--- /dev/null
+++ b/net/irda/irnet/irnet.h
@@ -0,0 +1,529 @@
+/*
+ *	IrNET protocol module : Synchronous PPP over an IrDA socket.
+ *
+ *		Jean II - HPL `00 - <jt@hpl.hp.com>
+ *
+ * This file contains definitions and declarations global to the IrNET module,
+ * all grouped in one place...
+ * This file is a *private* header, so other modules don't want to know
+ * what's in there...
+ *
+ * Note : as most part of the Linux kernel, this module is available
+ * under the GNU General Public License (GPL).
+ */
+
+#ifndef IRNET_H
+#define IRNET_H
+
+/************************** DOCUMENTATION ***************************/
+/*
+ * What is IrNET
+ * -------------
+ * IrNET is a protocol allowing to carry TCP/IP traffic between two
+ * IrDA peers in an efficient fashion. It is a thin layer, passing PPP
+ * packets to IrTTP and vice versa. It uses PPP in synchronous mode,
+ * because IrTTP offer a reliable sequenced packet service (as opposed
+ * to a byte stream). In fact, you could see IrNET as carrying TCP/IP
+ * in a IrDA socket, using PPP to provide the glue.
+ *
+ * The main difference with traditional PPP over IrCOMM is that we
+ * avoid the framing and serial emulation which are a performance
+ * bottleneck. It also allows multipoint communications in a sensible
+ * fashion.
+ *
+ * The main difference with IrLAN is that we use PPP for the link
+ * management, which is more standard, interoperable and flexible than
+ * the IrLAN protocol. For example, PPP adds authentication,
+ * encryption, compression, header compression and automated routing
+ * setup. And, as IrNET let PPP do the hard work, the implementation
+ * is much simpler than IrLAN.
+ *
+ * The Linux implementation
+ * ------------------------
+ * IrNET is written on top of the Linux-IrDA stack, and interface with
+ * the generic Linux PPP driver. Because IrNET depend on recent
+ * changes of the PPP driver interface, IrNET will work only with very
+ * recent kernel (2.3.99-pre6 and up).
+ * 
+ * The present implementation offer the following features :
+ *	o simple user interface using pppd
+ *	o efficient implementation (interface directly to PPP and IrTTP)
+ *	o addressing (you can specify the name of the IrNET recipient)
+ *	o multipoint operation (limited by IrLAP specification)
+ *	o information in /proc/net/irda/irnet
+ *	o IrNET events on /dev/irnet (for user space daemon)
+ *	o IrNET daemon (irnetd) to automatically handle incoming requests
+ *	o Windows 2000 compatibility (tested, but need more work)
+ * Currently missing :
+ *	o Lot's of testing (that's your job)
+ *	o Connection retries (may be too hard to do)
+ *	o Check pppd persist mode
+ *	o User space daemon (to automatically handle incoming requests)
+ *
+ * The setup is not currently the most easy, but this should get much
+ * better when everything will get integrated...
+ *
+ * Acknowledgements
+ * ----------------
+ * This module is based on :
+ *	o The PPP driver (ppp_synctty/ppp_generic) by Paul Mackerras
+ *	o The IrLAN protocol (irlan_common/XXX) by Dag Brattli
+ *	o The IrSock interface (af_irda) by Dag Brattli
+ *	o Some other bits from the kernel and my drivers...
+ * Infinite thanks to those brave souls for providing the infrastructure
+ * upon which IrNET is built.
+ *
+ * Thanks to all my collegues in HP for helping me. In particular,
+ * thanks to Salil Pradhan and Bill Serra for W2k testing...
+ * Thanks to Luiz Magalhaes for irnetd and much testing...
+ *
+ * Thanks to Alan Cox for answering lot's of my stupid questions, and
+ * to Paul Mackerras answering my questions on how to best integrate
+ * IrNET and pppd.
+ *
+ * Jean II
+ *
+ * Note on some implementations choices...
+ * ------------------------------------
+ *	1) Direct interface vs tty/socket
+ * I could have used a tty interface to hook to ppp and use the full
+ * socket API to connect to IrDA. The code would have been easier to
+ * maintain, and maybe the code would have been smaller...
+ * Instead, we hook directly to ppp_generic and to IrTTP, which make
+ * things more complicated...
+ *
+ * The first reason is flexibility : this allow us to create IrNET
+ * instances on demand (no /dev/ircommX crap) and to allow linkname
+ * specification on pppd command line...
+ *
+ * Second reason is speed optimisation. If you look closely at the
+ * transmit and receive paths, you will notice that they are "super lean"
+ * (that's why they look ugly), with no function calls and as little data
+ * copy and modification as I could...
+ *
+ *	2) irnetd in user space
+ * irnetd is implemented in user space, which is necessary to call pppd.
+ * This also give maximum benefits in term of flexibility and customability,
+ * and allow to offer the event channel, useful for other stuff like debug.
+ *
+ * On the other hand, this require a loose coordination between the
+ * present module and irnetd. One critical area is how incoming request
+ * are handled.
+ * When irnet receive an incoming request, it send an event to irnetd and
+ * drop the incoming IrNET socket.
+ * irnetd start a pppd instance, which create a new IrNET socket. This new
+ * socket is then connected in the originating node to the pppd instance.
+ * At this point, in the originating node, the first socket is closed.
+ *
+ * I admit, this is a bit messy and waste some resources. The alternative
+ * is caching incoming socket, and that's also quite messy and waste
+ * resources.
+ * We also make connection time slower. For example, on a 115 kb/s link it
+ * adds 60ms to the connection time (770 ms). However, this is slower than
+ * the time it takes to fire up pppd on my P133...
+ *
+ *
+ * History :
+ * -------
+ *
+ * v1 - 15.5.00 - Jean II
+ *	o Basic IrNET (hook to ppp_generic & IrTTP - incl. multipoint)
+ *	o control channel on /dev/irnet (set name/address)
+ *	o event channel on /dev/irnet (for user space daemon)
+ *
+ * v2 - 5.6.00 - Jean II
+ *	o Enable DROP_NOT_READY to avoid PPP timeouts & other weirdness...
+ *	o Add DISCONNECT_TO event and rename DISCONNECT_FROM.
+ *	o Set official device number alloaction on /dev/irnet
+ *
+ * v3 - 30.8.00 - Jean II
+ *	o Update to latest Linux-IrDA changes :
+ *		- queue_t => irda_queue_t
+ *	o Update to ppp-2.4.0 :
+ *		- move irda_irnet_connect from PPPIOCATTACH to TIOCSETD
+ *	o Add EXPIRE event (depend on new IrDA-Linux patch)
+ *	o Switch from `hashbin_remove' to `hashbin_remove_this' to fix
+ *	  a multilink bug... (depend on new IrDA-Linux patch)
+ *	o fix a self->daddr to self->raddr in irda_irnet_connect to fix
+ *	  another multilink bug (darn !)
+ *	o Remove LINKNAME_IOCTL cruft
+ *
+ * v3b - 31.8.00 - Jean II
+ *	o Dump discovery log at event channel startup
+ *
+ * v4 - 28.9.00 - Jean II
+ *	o Fix interaction between poll/select and dump discovery log
+ *	o Add IRNET_BLOCKED_LINK event (depend on new IrDA-Linux patch)
+ *	o Add IRNET_NOANSWER_FROM event (mostly to help support)
+ *	o Release flow control in disconnect_indication
+ *	o Block packets while connecting (speed up connections)
+ *
+ * v5 - 11.01.01 - Jean II
+ *	o Init self->max_header_size, just in case...
+ *	o Set up ap->chan.hdrlen, to get zero copy on tx side working.
+ *	o avoid tx->ttp->flow->ppp->tx->... loop, by checking flow state
+ *		Thanks to Christian Gennerat for finding this bug !
+ *	---
+ *	o Declare the proper MTU/MRU that we can support
+ *		(but PPP doesn't read the MTU value :-()
+ *	o Declare hashbin HB_NOLOCK instead of HB_LOCAL to avoid
+ *		disabling and enabling irq twice
+ *
+ * v6 - 31.05.01 - Jean II
+ *	o Print source address in Found, Discovery, Expiry & Request events
+ *	o Print requested source address in /proc/net/irnet
+ *	o Change control channel input. Allow multiple commands in one line.
+ *	o Add saddr command to change ap->rsaddr (and use that in IrDA)
+ *	---
+ *	o Make the IrDA connection procedure totally asynchronous.
+ *	  Heavy rewrite of the IAS query code and the whole connection
+ *	  procedure. Now, irnet_connect() no longer need to be called from
+ *	  a process context...
+ *	o Enable IrDA connect retries in ppp_irnet_send(). The good thing
+ *	  is that IrDA connect retries are directly driven by PPP LCP
+ *	  retries (we retry for each LCP packet), so that everything
+ *	  is transparently controlled from pppd lcp-max-configure.
+ *	o Add ttp_connect flag to prevent rentry on the connect procedure
+ *	o Test and fixups to eliminate side effects of retries
+ *
+ * v7 - 22.08.01 - Jean II
+ *	o Cleanup : Change "saddr = 0x0" to "saddr = DEV_ADDR_ANY"
+ *	o Fix bug in BLOCK_WHEN_CONNECT introduced in v6 : due to the
+ *	  asynchronous IAS query, self->tsap is NULL when PPP send the
+ *	  first packet.  This was preventing "connect-delay 0" to work.
+ *	  Change the test in ppp_irnet_send() to self->ttp_connect.
+ *
+ * v8 - 1.11.01 - Jean II
+ *	o Tighten the use of self->ttp_connect and self->ttp_open to
+ *	  prevent various race conditions.
+ *	o Avoid leaking discovery log and skb
+ *	o Replace "self" with "server" in irnet_connect_indication() to
+ *	  better detect cut'n'paste error ;-)
+ *
+ * v9 - 29.11.01 - Jean II
+ *	o Fix event generation in disconnect indication that I broke in v8
+ *	  It was always generation "No-Answer" because I was testing ttp_open
+ *	  just after clearing it. *blush*.
+ *	o Use newly created irttp_listen() to fix potential crash when LAP
+ *	  destroyed before irnet module removed.
+ *
+ * v10 - 4.3.2 - Jean II
+ *	o When receiving a disconnect indication, don't reenable the
+ *	  PPP Tx queue, this will trigger a reconnect. Instead, close
+ *	  the channel, which will kill pppd...
+ *
+ * v11 - 20.3.02 - Jean II
+ *	o Oops ! v10 fix disabled IrNET retries and passive behaviour.
+ *	  Better fix in irnet_disconnect_indication() :
+ *	  - if connected, kill pppd via hangup.
+ *	  - if not connected, reenable ppp Tx, which trigger IrNET retry.
+ *
+ * v12 - 10.4.02 - Jean II
+ *	o Fix race condition in irnet_connect_indication().
+ *	  If the socket was already trying to connect, drop old connection
+ *	  and use new one only if acting as primary. See comments.
+ *
+ * v13 - 30.5.02 - Jean II
+ *	o Update module init code
+ *
+ * v14 - 20.2.03 - Jean II
+ *	o Add discovery hint bits in the control channel.
+ *	o Remove obsolete MOD_INC/DEC_USE_COUNT in favor of .owner
+ *
+ * v15 - 7.4.03 - Jean II
+ *	o Replace spin_lock_irqsave() with spin_lock_bh() so that we can
+ *	  use ppp_unit_number(). It's probably also better overall...
+ *	o Disable call to ppp_unregister_channel(), because we can't do it.
+ */
+
+/***************************** INCLUDES *****************************/
+
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/tty.h>
+#include <linux/proc_fs.h>
+#include <linux/devfs_fs_kernel.h>
+#include <linux/netdevice.h>
+#include <linux/miscdevice.h>
+#include <linux/poll.h>
+#include <linux/config.h>
+#include <linux/ctype.h>	/* isspace() */
+#include <asm/uaccess.h>
+#include <linux/init.h>
+
+#include <linux/ppp_defs.h>
+#include <linux/if_ppp.h>
+#include <linux/ppp_channel.h>
+
+#include <net/irda/irda.h>
+#include <net/irda/iriap.h>
+#include <net/irda/irias_object.h>
+#include <net/irda/irlmp.h>
+#include <net/irda/irttp.h>
+#include <net/irda/discovery.h>
+
+/***************************** OPTIONS *****************************/
+/*
+ * Define or undefine to compile or not some optional part of the
+ * IrNET driver...
+ * Note : the present defaults make sense, play with that at your
+ * own risk...
+ */
+/* IrDA side of the business... */
+#define DISCOVERY_NOMASK	/* To enable W2k compatibility... */
+#define ADVERTISE_HINT		/* Advertise IrLAN hint bit */
+#define ALLOW_SIMULT_CONNECT	/* This seem to work, cross fingers... */
+#define DISCOVERY_EVENTS	/* Query the discovery log to post events */
+#define INITIAL_DISCOVERY	/* Dump current discovery log as events */
+#undef STREAM_COMPAT		/* Not needed - potentially messy */
+#undef CONNECT_INDIC_KICK	/* Might mess IrDA, not needed */
+#undef FAIL_SEND_DISCONNECT	/* Might mess IrDA, not needed */
+#undef PASS_CONNECT_PACKETS	/* Not needed ? Safe */
+#undef MISSING_PPP_API		/* Stuff I wish I could do */
+
+/* PPP side of the business */
+#define BLOCK_WHEN_CONNECT	/* Block packets when connecting */
+#define CONNECT_IN_SEND		/* Retry IrDA connection procedure */
+#undef FLUSH_TO_PPP		/* Not sure about this one, let's play safe */
+#undef SECURE_DEVIRNET		/* Bah... */
+
+/****************************** DEBUG ******************************/
+
+/*
+ * This set of flags enable and disable all the various warning,
+ * error and debug message of this driver.
+ * Each section can be enabled and disabled independently
+ */
+/* In the PPP part */
+#define DEBUG_CTRL_TRACE	0	/* Control channel */
+#define DEBUG_CTRL_INFO		0	/* various info */
+#define DEBUG_CTRL_ERROR	1	/* problems */
+#define DEBUG_FS_TRACE		0	/* filesystem callbacks */
+#define DEBUG_FS_INFO		0	/* various info */
+#define DEBUG_FS_ERROR		1	/* problems */
+#define DEBUG_PPP_TRACE		0	/* PPP related functions */
+#define DEBUG_PPP_INFO		0	/* various info */
+#define DEBUG_PPP_ERROR		1	/* problems */
+#define DEBUG_MODULE_TRACE	0	/* module insertion/removal */
+#define DEBUG_MODULE_ERROR	1	/* problems */
+
+/* In the IrDA part */
+#define DEBUG_IRDA_SR_TRACE	0	/* IRDA subroutines */
+#define DEBUG_IRDA_SR_INFO	0	/* various info */
+#define DEBUG_IRDA_SR_ERROR	1	/* problems */
+#define DEBUG_IRDA_SOCK_TRACE	0	/* IRDA main socket functions */
+#define DEBUG_IRDA_SOCK_INFO	0	/* various info */
+#define DEBUG_IRDA_SOCK_ERROR	1	/* problems */
+#define DEBUG_IRDA_SERV_TRACE	0	/* The IrNET server */
+#define DEBUG_IRDA_SERV_INFO	0	/* various info */
+#define DEBUG_IRDA_SERV_ERROR	1	/* problems */
+#define DEBUG_IRDA_TCB_TRACE	0	/* IRDA IrTTP callbacks */
+#define DEBUG_IRDA_CB_INFO	0	/* various info */
+#define DEBUG_IRDA_CB_ERROR	1	/* problems */
+#define DEBUG_IRDA_OCB_TRACE	0	/* IRDA other callbacks */
+#define DEBUG_IRDA_OCB_INFO	0	/* various info */
+#define DEBUG_IRDA_OCB_ERROR	1	/* problems */
+
+#define DEBUG_ASSERT		0	/* Verify all assertions */
+
+/* 
+ * These are the macros we are using to actually print the debug
+ * statements. Don't look at it, it's ugly...
+ *
+ * One of the trick is that, as the DEBUG_XXX are constant, the
+ * compiler will optimise away the if() in all cases.
+ */
+/* All error messages (will show up in the normal logs) */
+#define DERROR(dbg, format, args...) \
+	{if(DEBUG_##dbg) \
+		printk(KERN_INFO "irnet: %s(): " format, __FUNCTION__ , ##args);}
+
+/* Normal debug message (will show up in /var/log/debug) */
+#define DEBUG(dbg, format, args...) \
+	{if(DEBUG_##dbg) \
+		printk(KERN_DEBUG "irnet: %s(): " format, __FUNCTION__ , ##args);}
+
+/* Entering a function (trace) */
+#define DENTER(dbg, format, args...) \
+	{if(DEBUG_##dbg) \
+		printk(KERN_DEBUG "irnet: -> %s" format, __FUNCTION__ , ##args);}
+
+/* Entering and exiting a function in one go (trace) */
+#define DPASS(dbg, format, args...) \
+	{if(DEBUG_##dbg) \
+		printk(KERN_DEBUG "irnet: <>%s" format, __FUNCTION__ , ##args);}
+
+/* Exiting a function (trace) */
+#define DEXIT(dbg, format, args...) \
+	{if(DEBUG_##dbg) \
+		printk(KERN_DEBUG "irnet: <-%s()" format, __FUNCTION__ , ##args);}
+
+/* Exit a function with debug */
+#define DRETURN(ret, dbg, args...) \
+	{DEXIT(dbg, ": " args);\
+	return ret; }
+
+/* Exit a function on failed condition */
+#define DABORT(cond, ret, dbg, args...) \
+	{if(cond) {\
+		DERROR(dbg, args);\
+		return ret; }}
+
+/* Invalid assertion, print out an error and exit... */
+#define DASSERT(cond, ret, dbg, args...) \
+	{if((DEBUG_ASSERT) && !(cond)) {\
+		DERROR(dbg, "Invalid assertion: " args);\
+		return ret; }}
+
+/************************ CONSTANTS & MACROS ************************/
+
+/* Paranoia */
+#define IRNET_MAGIC	0xB00754
+
+/* Number of control events in the control channel buffer... */
+#define IRNET_MAX_EVENTS	8	/* Should be more than enough... */
+
+/****************************** TYPES ******************************/
+
+/*
+ * This is the main structure where we store all the data pertaining to
+ * one instance of irnet.
+ * Note : in irnet functions, a pointer this structure is usually called
+ * "ap" or "self". If the code is borrowed from the IrDA stack, it tend
+ * to be called "self", and if it is borrowed from the PPP driver it is
+ * "ap". Apart from that, it's exactly the same structure ;-)
+ */
+typedef struct irnet_socket
+{
+  /* ------------------- Instance management ------------------- */
+  /* We manage a linked list of IrNET socket instances */
+  irda_queue_t		q;		/* Must be first - for hasbin */
+  int			magic;		/* Paranoia */
+
+  /* --------------------- FileSystem part --------------------- */
+  /* "pppd" interact directly with us on a /dev/ file */
+  struct file *		file;		/* File descriptor of this instance */
+  /* TTY stuff - to keep "pppd" happy */
+  struct termios	termios;	/* Various tty flags */
+  /* Stuff for the control channel */
+  int			event_index;	/* Last read in the event log */
+
+  /* ------------------------- PPP part ------------------------- */
+  /* We interface directly to the ppp_generic driver in the kernel */
+  int			ppp_open;	/* registered with ppp_generic */
+  struct ppp_channel	chan;		/* Interface to generic ppp layer */
+
+  int			mru;		/* Max size of PPP payload */
+  u32			xaccm[8];	/* Asynchronous character map (just */
+  u32			raccm;		/* to please pppd - dummy) */
+  unsigned int		flags;		/* PPP flags (compression, ...) */
+  unsigned int		rbits;		/* Unused receive flags ??? */
+
+  /* ------------------------ IrTTP part ------------------------ */
+  /* We create a pseudo "socket" over the IrDA tranport */
+  unsigned long		ttp_open;	/* Set when IrTTP is ready */
+  unsigned long		ttp_connect;	/* Set when IrTTP is connecting */
+  struct tsap_cb *	tsap;		/* IrTTP instance (the connection) */
+
+  char			rname[NICKNAME_MAX_LEN + 1];
+					/* IrDA nickname of destination */
+  __u32			rdaddr;		/* Requested peer IrDA address */
+  __u32			rsaddr;		/* Requested local IrDA address */
+  __u32			daddr;		/* actual peer IrDA address */
+  __u32			saddr;		/* my local IrDA address */
+  __u8			dtsap_sel;	/* Remote TSAP selector */
+  __u8			stsap_sel;	/* Local TSAP selector */
+
+  __u32			max_sdu_size_rx;/* Socket parameters used for IrTTP */
+  __u32			max_sdu_size_tx;
+  __u32			max_data_size;
+  __u8			max_header_size;
+  LOCAL_FLOW		tx_flow;	/* State of the Tx path in IrTTP */
+
+  /* ------------------- IrLMP and IrIAS part ------------------- */
+  /* Used for IrDA Discovery and socket name resolution */
+  void *		ckey;		/* IrLMP client handle */
+  __u16			mask;		/* Hint bits mask (filter discov.)*/
+  int			nslots;		/* Number of slots for discovery */
+
+  struct iriap_cb *	iriap;		/* Used to query remote IAS */
+  int			errno;		/* status of the IAS query */
+
+  /* -------------------- Discovery log part -------------------- */
+  /* Used by initial discovery on the control channel
+   * and by irnet_discover_daddr_and_lsap_sel() */
+  struct irda_device_info *discoveries;	/* Copy of the discovery log */
+  int			disco_index;	/* Last read in the discovery log */
+  int			disco_number;	/* Size of the discovery log */
+
+} irnet_socket;
+
+/*
+ * This is the various event that we will generate on the control channel
+ */
+typedef enum irnet_event
+{
+  IRNET_DISCOVER,		/* New IrNET node discovered */
+  IRNET_EXPIRE,			/* IrNET node expired */
+  IRNET_CONNECT_TO,		/* IrNET socket has connected to other node */
+  IRNET_CONNECT_FROM,		/* Other node has connected to IrNET socket */
+  IRNET_REQUEST_FROM,		/* Non satisfied connection request */
+  IRNET_NOANSWER_FROM,		/* Failed connection request */
+  IRNET_BLOCKED_LINK,		/* Link (IrLAP) is blocked for > 3s */
+  IRNET_DISCONNECT_FROM,	/* IrNET socket has disconnected */
+  IRNET_DISCONNECT_TO		/* Closing IrNET socket */
+} irnet_event;
+
+/*
+ * This is the storage for an event and its arguments
+ */
+typedef struct irnet_log
+{
+  irnet_event	event;
+  int		unit;
+  __u32		saddr;
+  __u32		daddr;
+  char		name[NICKNAME_MAX_LEN + 1];	/* 21 + 1 */
+  __u16_host_order hints;			/* Discovery hint bits */
+} irnet_log;
+
+/*
+ * This is the storage for all events and related stuff...
+ */
+typedef struct irnet_ctrl_channel
+{
+  irnet_log	log[IRNET_MAX_EVENTS];	/* Event log */
+  int		index;		/* Current index in log */
+  spinlock_t	spinlock;	/* Serialize access to the event log */
+  wait_queue_head_t	rwait;	/* processes blocked on read (or poll) */
+} irnet_ctrl_channel;
+
+/**************************** PROTOTYPES ****************************/
+/*
+ * Global functions of the IrNET module
+ * Note : we list here also functions called from one file to the other.
+ */
+
+/* -------------------------- IRDA PART -------------------------- */
+extern int
+	irda_irnet_create(irnet_socket *);	/* Initialise a IrNET socket */
+extern int
+	irda_irnet_connect(irnet_socket *);	/* Try to connect over IrDA */
+extern void
+	irda_irnet_destroy(irnet_socket *);	/* Teardown  a IrNET socket */
+extern int
+	irda_irnet_init(void);		/* Initialise IrDA part of IrNET */
+extern void
+	irda_irnet_cleanup(void);	/* Teardown IrDA part of IrNET */
+/* ---------------------------- MODULE ---------------------------- */
+extern int
+	irnet_init(void);		/* Initialise IrNET module */
+
+/**************************** VARIABLES ****************************/
+
+/* Control channel stuff - allocated in irnet_irda.h */
+extern struct irnet_ctrl_channel	irnet_events;
+
+#endif /* IRNET_H */
diff --git a/net/irda/irnet/irnet_irda.c b/net/irda/irnet/irnet_irda.c
new file mode 100644
index 00000000000..07ec326c71f
--- /dev/null
+++ b/net/irda/irnet/irnet_irda.c
@@ -0,0 +1,1866 @@
+/*
+ *	IrNET protocol module : Synchronous PPP over an IrDA socket.
+ *
+ *		Jean II - HPL `00 - <jt@hpl.hp.com>
+ *
+ * This file implement the IRDA interface of IrNET.
+ * Basically, we sit on top of IrTTP. We set up IrTTP, IrIAS properly,
+ * and exchange frames with IrTTP.
+ */
+
+#include "irnet_irda.h"		/* Private header */
+
+/************************* CONTROL CHANNEL *************************/
+/*
+ * When ppp is not active, /dev/irnet act as a control channel.
+ * Writing allow to set up the IrDA destination of the IrNET channel,
+ * and any application may be read events happening on IrNET...
+ */
+
+/*------------------------------------------------------------------*/
+/*
+ * Post an event to the control channel...
+ * Put the event in the log, and then wait all process blocked on read
+ * so they can read the log...
+ */
+static void
+irnet_post_event(irnet_socket *	ap,
+		 irnet_event	event,
+		 __u32		saddr,
+		 __u32		daddr,
+		 char *		name,
+		 __u16		hints)
+{
+  int			index;		/* In the log */
+
+  DENTER(CTRL_TRACE, "(ap=0x%p, event=%d, daddr=%08x, name=``%s'')\n",
+	 ap, event, daddr, name);
+
+  /* Protect this section via spinlock.
+   * Note : as we are the only event producer, we only need to exclude
+   * ourself when touching the log, which is nice and easy.
+   */
+  spin_lock_bh(&irnet_events.spinlock);
+
+  /* Copy the event in the log */
+  index = irnet_events.index;
+  irnet_events.log[index].event = event;
+  irnet_events.log[index].daddr = daddr;
+  irnet_events.log[index].saddr = saddr;
+  /* Try to copy IrDA nickname */
+  if(name)
+    strcpy(irnet_events.log[index].name, name);
+  else
+    irnet_events.log[index].name[0] = '\0';
+  /* Copy hints */
+  irnet_events.log[index].hints.word = hints;
+  /* Try to get ppp unit number */
+  if((ap != (irnet_socket *) NULL) && (ap->ppp_open))
+    irnet_events.log[index].unit = ppp_unit_number(&ap->chan);
+  else
+    irnet_events.log[index].unit = -1;
+
+  /* Increment the index
+   * Note that we increment the index only after the event is written,
+   * to make sure that the readers don't get garbage... */
+  irnet_events.index = (index + 1) % IRNET_MAX_EVENTS;
+
+  DEBUG(CTRL_INFO, "New event index is %d\n", irnet_events.index);
+
+  /* Spin lock end */
+  spin_unlock_bh(&irnet_events.spinlock);
+
+  /* Now : wake up everybody waiting for events... */
+  wake_up_interruptible_all(&irnet_events.rwait);
+
+  DEXIT(CTRL_TRACE, "\n");
+}
+
+/************************* IRDA SUBROUTINES *************************/
+/*
+ * These are a bunch of subroutines called from other functions
+ * down there, mostly common code or to improve readability...
+ *
+ * Note : we duplicate quite heavily some routines of af_irda.c,
+ * because our input structure (self) is quite different
+ * (struct irnet instead of struct irda_sock), which make sharing
+ * the same code impossible (at least, without templates).
+ */
+
+/*------------------------------------------------------------------*/
+/*
+ * Function irda_open_tsap (self)
+ *
+ *    Open local Transport Service Access Point (TSAP)
+ *
+ * Create a IrTTP instance for us and set all the IrTTP callbacks.
+ */
+static inline int
+irnet_open_tsap(irnet_socket *	self)
+{
+  notify_t	notify;		/* Callback structure */
+
+  DENTER(IRDA_SR_TRACE, "(self=0x%p)\n", self);
+
+  DABORT(self->tsap != NULL, -EBUSY, IRDA_SR_ERROR, "Already busy !\n");
+
+  /* Initialize IrTTP callbacks to be used by the IrDA stack */
+  irda_notify_init(&notify);
+  notify.connect_confirm	= irnet_connect_confirm;
+  notify.connect_indication	= irnet_connect_indication;
+  notify.disconnect_indication	= irnet_disconnect_indication;
+  notify.data_indication	= irnet_data_indication;
+  /*notify.udata_indication	= NULL;*/
+  notify.flow_indication	= irnet_flow_indication;
+  notify.status_indication	= irnet_status_indication;
+  notify.instance		= self;
+  strlcpy(notify.name, IRNET_NOTIFY_NAME, sizeof(notify.name));
+
+  /* Open an IrTTP instance */
+  self->tsap = irttp_open_tsap(LSAP_ANY, DEFAULT_INITIAL_CREDIT,
+			       &notify);	
+  DABORT(self->tsap == NULL, -ENOMEM,
+	 IRDA_SR_ERROR, "Unable to allocate TSAP !\n");
+
+  /* Remember which TSAP selector we actually got */
+  self->stsap_sel = self->tsap->stsap_sel;
+
+  DEXIT(IRDA_SR_TRACE, " - tsap=0x%p, sel=0x%X\n",
+	self->tsap, self->stsap_sel);
+  return 0;
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Function irnet_ias_to_tsap (self, result, value)
+ *
+ *    Examine an IAS object and extract TSAP
+ *
+ * We do an IAP query to find the TSAP associated with the IrNET service.
+ * When IrIAP pass us the result of the query, this function look at
+ * the return values to check for failures and extract the TSAP if
+ * possible.
+ * Also deallocate value
+ * The failure is in self->errno
+ * Return TSAP or -1
+ */
+static inline __u8
+irnet_ias_to_tsap(irnet_socket *	self,
+		  int			result,
+		  struct ias_value *	value)
+{
+  __u8	dtsap_sel = 0;		/* TSAP we are looking for */
+
+  DENTER(IRDA_SR_TRACE, "(self=0x%p)\n", self);
+
+  /* By default, no error */
+  self->errno = 0;
+
+  /* Check if request succeeded */
+  switch(result)
+    {
+      /* Standard errors : service not available */
+    case IAS_CLASS_UNKNOWN:
+    case IAS_ATTRIB_UNKNOWN:
+      DEBUG(IRDA_SR_INFO, "IAS object doesn't exist ! (%d)\n", result);
+      self->errno = -EADDRNOTAVAIL;
+      break;
+
+      /* Other errors, most likely IrDA stack failure */
+    default :
+      DEBUG(IRDA_SR_INFO, "IAS query failed ! (%d)\n", result);
+      self->errno = -EHOSTUNREACH;
+      break;
+
+      /* Success : we got what we wanted */
+    case IAS_SUCCESS:
+      break;
+    }
+
+  /* Check what was returned to us */
+  if(value != NULL)
+    {
+      /* What type of argument have we got ? */
+      switch(value->type)
+	{
+	case IAS_INTEGER:
+	  DEBUG(IRDA_SR_INFO, "result=%d\n", value->t.integer);
+	  if(value->t.integer != -1)
+	    /* Get the remote TSAP selector */
+	    dtsap_sel = value->t.integer;
+	  else 
+	    self->errno = -EADDRNOTAVAIL;
+	  break;
+	default:
+	  self->errno = -EADDRNOTAVAIL;
+	  DERROR(IRDA_SR_ERROR, "bad type ! (0x%X)\n", value->type);
+	  break;
+	}
+
+      /* Cleanup */
+      irias_delete_value(value);
+    }
+  else	/* value == NULL */
+    {
+      /* Nothing returned to us - usually result != SUCCESS */
+      if(!(self->errno))
+	{
+	  DERROR(IRDA_SR_ERROR,
+		 "IrDA bug : result == SUCCESS && value == NULL\n");
+	  self->errno = -EHOSTUNREACH;
+	}
+    }
+  DEXIT(IRDA_SR_TRACE, "\n");
+
+  /* Return the TSAP */
+  return(dtsap_sel);
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Function irnet_find_lsap_sel (self)
+ *
+ *    Try to lookup LSAP selector in remote LM-IAS
+ *
+ * Basically, we start a IAP query, and then go to sleep. When the query
+ * return, irnet_getvalue_confirm will wake us up, and we can examine the
+ * result of the query...
+ * Note that in some case, the query fail even before we go to sleep,
+ * creating some races...
+ */
+static inline int
+irnet_find_lsap_sel(irnet_socket *	self)
+{
+  DENTER(IRDA_SR_TRACE, "(self=0x%p)\n", self);
+
+  /* This should not happen */
+  DABORT(self->iriap, -EBUSY, IRDA_SR_ERROR, "busy with a previous query.\n");
+
+  /* Create an IAP instance, will be closed in irnet_getvalue_confirm() */
+  self->iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self,
+			   irnet_getvalue_confirm);
+
+  /* Treat unexpected signals as disconnect */
+  self->errno = -EHOSTUNREACH;
+
+  /* Query remote LM-IAS */
+  iriap_getvaluebyclass_request(self->iriap, self->rsaddr, self->daddr,
+				IRNET_SERVICE_NAME, IRNET_IAS_VALUE);
+
+  /* The above request is non-blocking.
+   * After a while, IrDA will call us back in irnet_getvalue_confirm()
+   * We will then call irnet_ias_to_tsap() and finish the
+   * connection procedure */
+
+  DEXIT(IRDA_SR_TRACE, "\n");
+  return 0;
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Function irnet_connect_tsap (self)
+ *
+ *    Initialise the TTP socket and initiate TTP connection
+ *
+ */
+static inline int
+irnet_connect_tsap(irnet_socket *	self)
+{
+  int		err;
+
+  DENTER(IRDA_SR_TRACE, "(self=0x%p)\n", self);
+
+  /* Open a local TSAP (an IrTTP instance) */
+  err = irnet_open_tsap(self);
+  if(err != 0)
+    {
+      clear_bit(0, &self->ttp_connect);
+      DERROR(IRDA_SR_ERROR, "connect aborted!\n");
+      return(err);
+    }
+
+  /* Connect to remote device */
+  err = irttp_connect_request(self->tsap, self->dtsap_sel, 
+			      self->rsaddr, self->daddr, NULL, 
+			      self->max_sdu_size_rx, NULL);
+  if(err != 0)
+    {
+      clear_bit(0, &self->ttp_connect);
+      DERROR(IRDA_SR_ERROR, "connect aborted!\n");
+      return(err);
+    }
+
+  /* The above call is non-blocking.
+   * After a while, the IrDA stack will either call us back in
+   * irnet_connect_confirm() or irnet_disconnect_indication()
+   * See you there ;-) */
+
+  DEXIT(IRDA_SR_TRACE, "\n");
+  return(err);
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Function irnet_discover_next_daddr (self)
+ *
+ *    Query the IrNET TSAP of the next device in the log.
+ *
+ * Used in the TSAP discovery procedure.
+ */
+static inline int
+irnet_discover_next_daddr(irnet_socket *	self)
+{
+  /* Close the last instance of IrIAP, and open a new one.
+   * We can't reuse the IrIAP instance in the IrIAP callback */
+  if(self->iriap)
+    {
+      iriap_close(self->iriap);
+      self->iriap = NULL;
+    }
+  /* Create a new IAP instance */
+  self->iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self,
+			   irnet_discovervalue_confirm);
+  if(self->iriap == NULL)
+    return -ENOMEM;
+
+  /* Next discovery - before the call to avoid races */
+  self->disco_index++;
+
+  /* Check if we have one more address to try */
+  if(self->disco_index < self->disco_number)
+    {
+      /* Query remote LM-IAS */
+      iriap_getvaluebyclass_request(self->iriap,
+				    self->discoveries[self->disco_index].saddr,
+				    self->discoveries[self->disco_index].daddr,
+				    IRNET_SERVICE_NAME, IRNET_IAS_VALUE);
+      /* The above request is non-blocking.
+       * After a while, IrDA will call us back in irnet_discovervalue_confirm()
+       * We will then call irnet_ias_to_tsap() and come back here again... */
+      return(0);
+    }
+  else
+    return(1);
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Function irnet_discover_daddr_and_lsap_sel (self)
+ *
+ *    This try to find a device with the requested service.
+ *
+ * Initiate a TSAP discovery procedure.
+ * It basically look into the discovery log. For each address in the list,
+ * it queries the LM-IAS of the device to find if this device offer
+ * the requested service.
+ * If there is more than one node supporting the service, we complain
+ * to the user (it should move devices around).
+ * If we find one node which have the requested TSAP, we connect to it.
+ *
+ * This function just start the whole procedure. It request the discovery
+ * log and submit the first IAS query.
+ * The bulk of the job is handled in irnet_discovervalue_confirm()
+ *
+ * Note : this procedure fails if there is more than one device in range
+ * on the same dongle, because IrLMP doesn't disconnect the LAP when the
+ * last LSAP is closed. Moreover, we would need to wait the LAP
+ * disconnection...
+ */
+static inline int
+irnet_discover_daddr_and_lsap_sel(irnet_socket *	self)
+{
+  int	ret;
+
+  DENTER(IRDA_SR_TRACE, "(self=0x%p)\n", self);
+
+  /* Ask lmp for the current discovery log */
+  self->discoveries = irlmp_get_discoveries(&self->disco_number, self->mask,
+					    DISCOVERY_DEFAULT_SLOTS);
+
+  /* Check if the we got some results */
+  if(self->discoveries == NULL)
+    {
+      self->disco_number = -1;
+      clear_bit(0, &self->ttp_connect);
+      DRETURN(-ENETUNREACH, IRDA_SR_INFO, "No Cachelog...\n");
+    }
+  DEBUG(IRDA_SR_INFO, "Got the log (0x%p), size is %d\n",
+	self->discoveries, self->disco_number);
+
+  /* Start with the first discovery */
+  self->disco_index = -1;
+  self->daddr = DEV_ADDR_ANY;
+
+  /* This will fail if the log is empty - this is non-blocking */
+  ret = irnet_discover_next_daddr(self);
+  if(ret)
+    {
+      /* Close IAP */
+      if(self->iriap)
+	iriap_close(self->iriap);
+      self->iriap = NULL;
+
+      /* Cleanup our copy of the discovery log */
+      kfree(self->discoveries);
+      self->discoveries = NULL;
+
+      clear_bit(0, &self->ttp_connect);
+      DRETURN(-ENETUNREACH, IRDA_SR_INFO, "Cachelog empty...\n");
+    }
+
+  /* Follow me in irnet_discovervalue_confirm() */
+
+  DEXIT(IRDA_SR_TRACE, "\n");
+  return(0);
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Function irnet_dname_to_daddr (self)
+ *
+ *    Convert an IrDA nickname to a valid IrDA address
+ *
+ * It basically look into the discovery log until there is a match.
+ */
+static inline int
+irnet_dname_to_daddr(irnet_socket *	self)
+{
+  struct irda_device_info *discoveries;	/* Copy of the discovery log */
+  int	number;			/* Number of nodes in the log */
+  int	i;
+
+  DENTER(IRDA_SR_TRACE, "(self=0x%p)\n", self);
+
+  /* Ask lmp for the current discovery log */
+  discoveries = irlmp_get_discoveries(&number, 0xffff,
+				      DISCOVERY_DEFAULT_SLOTS);
+  /* Check if the we got some results */
+  if(discoveries == NULL)
+    DRETURN(-ENETUNREACH, IRDA_SR_INFO, "Cachelog empty...\n");
+
+  /* 
+   * Now, check all discovered devices (if any), and connect
+   * client only about the services that the client is
+   * interested in...
+   */
+  for(i = 0; i < number; i++)
+    {
+      /* Does the name match ? */
+      if(!strncmp(discoveries[i].info, self->rname, NICKNAME_MAX_LEN))
+	{
+	  /* Yes !!! Get it.. */
+	  self->daddr = discoveries[i].daddr;
+	  DEBUG(IRDA_SR_INFO, "discovered device ``%s'' at address 0x%08x.\n",
+		self->rname, self->daddr);
+	  kfree(discoveries);
+	  DEXIT(IRDA_SR_TRACE, "\n");
+	  return 0;
+	}
+    }
+  /* No luck ! */
+  DEBUG(IRDA_SR_INFO, "cannot discover device ``%s'' !!!\n", self->rname);
+  kfree(discoveries);
+  return(-EADDRNOTAVAIL);
+}
+
+
+/************************* SOCKET ROUTINES *************************/
+/*
+ * This are the main operations on IrNET sockets, basically to create
+ * and destroy IrNET sockets. These are called from the PPP part...
+ */
+
+/*------------------------------------------------------------------*/
+/*
+ * Create a IrNET instance : just initialise some parameters...
+ */
+int
+irda_irnet_create(irnet_socket *	self)
+{
+  DENTER(IRDA_SOCK_TRACE, "(self=0x%p)\n", self);
+
+  self->magic = IRNET_MAGIC;	/* Paranoia */
+
+  self->ttp_open = 0;		/* Prevent higher layer from accessing IrTTP */
+  self->ttp_connect = 0;	/* Not connecting yet */
+  self->rname[0] = '\0';	/* May be set via control channel */
+  self->rdaddr = DEV_ADDR_ANY;	/* May be set via control channel */
+  self->rsaddr = DEV_ADDR_ANY;	/* May be set via control channel */
+  self->daddr = DEV_ADDR_ANY;	/* Until we get connected */
+  self->saddr = DEV_ADDR_ANY;	/* Until we get connected */
+  self->max_sdu_size_rx = TTP_SAR_UNBOUND;
+
+  /* Register as a client with IrLMP */
+  self->ckey = irlmp_register_client(0, NULL, NULL, NULL);
+#ifdef DISCOVERY_NOMASK
+  self->mask = 0xffff;		/* For W2k compatibility */
+#else /* DISCOVERY_NOMASK */
+  self->mask = irlmp_service_to_hint(S_LAN);
+#endif /* DISCOVERY_NOMASK */
+  self->tx_flow = FLOW_START;	/* Flow control from IrTTP */
+
+  DEXIT(IRDA_SOCK_TRACE, "\n");
+  return(0);
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Connect to the other side :
+ *	o convert device name to an address
+ *	o find the socket number (dlsap)
+ *	o Establish the connection
+ *
+ * Note : We no longer mimic af_irda. The IAS query for finding the TSAP
+ * is done asynchronously, like the TTP connection. This allow us to
+ * call this function from any context (not only process).
+ * The downside is that following what's happening in there is tricky
+ * because it involve various functions all over the place...
+ */
+int
+irda_irnet_connect(irnet_socket *	self)
+{
+  int		err;
+
+  DENTER(IRDA_SOCK_TRACE, "(self=0x%p)\n", self);
+
+  /* Check if we are already trying to connect.
+   * Because irda_irnet_connect() can be called directly by pppd plus
+   * packet retries in ppp_generic and connect may take time, plus we may
+   * race with irnet_connect_indication(), we need to be careful there... */
+  if(test_and_set_bit(0, &self->ttp_connect))
+    DRETURN(-EBUSY, IRDA_SOCK_INFO, "Already connecting...\n");
+  if((self->iriap != NULL) || (self->tsap != NULL))
+    DERROR(IRDA_SOCK_ERROR, "Socket not cleaned up...\n");
+
+  /* Insert ourselves in the hashbin so that the IrNET server can find us.
+   * Notes : 4th arg is string of 32 char max and must be null terminated
+   *	     When 4th arg is used (string), 3rd arg isn't (int)
+   *	     Can't re-insert (MUST remove first) so check for that... */
+  if((irnet_server.running) && (self->q.q_next == NULL))
+    {
+      spin_lock_bh(&irnet_server.spinlock);
+      hashbin_insert(irnet_server.list, (irda_queue_t *) self, 0, self->rname);
+      spin_unlock_bh(&irnet_server.spinlock);
+      DEBUG(IRDA_SOCK_INFO, "Inserted ``%s'' in hashbin...\n", self->rname);
+    }
+
+  /* If we don't have anything (no address, no name) */
+  if((self->rdaddr == DEV_ADDR_ANY) && (self->rname[0] == '\0'))
+    {
+      /* Try to find a suitable address */
+      if((err = irnet_discover_daddr_and_lsap_sel(self)) != 0)
+	DRETURN(err, IRDA_SOCK_INFO, "auto-connect failed!\n");
+      /* In most cases, the call above is non-blocking */
+    }
+  else
+    {
+      /* If we have only the name (no address), try to get an address */
+      if(self->rdaddr == DEV_ADDR_ANY)
+	{
+	  if((err = irnet_dname_to_daddr(self)) != 0)
+	    DRETURN(err, IRDA_SOCK_INFO, "name connect failed!\n");
+	}
+      else
+	/* Use the requested destination address */
+	self->daddr = self->rdaddr;
+
+      /* Query remote LM-IAS to find LSAP selector */
+      irnet_find_lsap_sel(self);
+      /* The above call is non blocking */
+    }
+
+  /* At this point, we are waiting for the IrDA stack to call us back,
+   * or we have already failed.
+   * We will finish the connection procedure in irnet_connect_tsap().
+   */
+  DEXIT(IRDA_SOCK_TRACE, "\n");
+  return(0);
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Function irda_irnet_destroy(self)
+ *
+ *    Destroy irnet instance
+ *
+ * Note : this need to be called from a process context.
+ */
+void
+irda_irnet_destroy(irnet_socket *	self)
+{
+  DENTER(IRDA_SOCK_TRACE, "(self=0x%p)\n", self);
+  if(self == NULL)
+    return;
+
+  /* Remove ourselves from hashbin (if we are queued in hashbin)
+   * Note : `irnet_server.running' protect us from calls in hashbin_delete() */
+  if((irnet_server.running) && (self->q.q_next != NULL))
+    {
+      struct irnet_socket *	entry;
+      DEBUG(IRDA_SOCK_INFO, "Removing from hash..\n");
+      spin_lock_bh(&irnet_server.spinlock);
+      entry = hashbin_remove_this(irnet_server.list, (irda_queue_t *) self);
+      self->q.q_next = NULL;
+      spin_unlock_bh(&irnet_server.spinlock);
+      DASSERT(entry == self, , IRDA_SOCK_ERROR, "Can't remove from hash.\n");
+    }
+
+  /* If we were connected, post a message */
+  if(test_bit(0, &self->ttp_open))
+    {
+      /* Note : as the disconnect comes from ppp_generic, the unit number
+       * doesn't exist anymore when we post the event, so we need to pass
+       * NULL as the first arg... */
+      irnet_post_event(NULL, IRNET_DISCONNECT_TO,
+		       self->saddr, self->daddr, self->rname, 0);
+    }
+
+  /* Prevent various IrDA callbacks from messing up things
+   * Need to be first */
+  clear_bit(0, &self->ttp_connect);
+
+  /* Prevent higher layer from accessing IrTTP */
+  clear_bit(0, &self->ttp_open);
+
+  /* Unregister with IrLMP */
+  irlmp_unregister_client(self->ckey);
+
+  /* Unregister with LM-IAS */
+  if(self->iriap)
+    { 
+      iriap_close(self->iriap);
+      self->iriap = NULL;
+    }
+
+  /* Cleanup eventual discoveries from connection attempt or control channel */
+  if(self->discoveries != NULL)
+    {
+      /* Cleanup our copy of the discovery log */
+      kfree(self->discoveries);
+      self->discoveries = NULL;
+    }
+
+  /* Close our IrTTP connection */
+  if(self->tsap)
+    {
+      DEBUG(IRDA_SOCK_INFO, "Closing our TTP connection.\n");
+      irttp_disconnect_request(self->tsap, NULL, P_NORMAL);
+      irttp_close_tsap(self->tsap);
+      self->tsap = NULL;
+    }
+  self->stsap_sel = 0;
+
+  DEXIT(IRDA_SOCK_TRACE, "\n");
+  return;
+}
+
+
+/************************** SERVER SOCKET **************************/
+/*
+ * The IrNET service is composed of one server socket and a variable
+ * number of regular IrNET sockets. The server socket is supposed to
+ * handle incoming connections and redirect them to one IrNET sockets.
+ * It's a superset of the regular IrNET socket, but has a very distinct
+ * behaviour...
+ */
+
+/*------------------------------------------------------------------*/
+/*
+ * Function irnet_daddr_to_dname (self)
+ *
+ *    Convert an IrDA address to a IrDA nickname
+ *
+ * It basically look into the discovery log until there is a match.
+ */
+static inline int
+irnet_daddr_to_dname(irnet_socket *	self)
+{
+  struct irda_device_info *discoveries;	/* Copy of the discovery log */
+  int	number;			/* Number of nodes in the log */
+  int	i;
+
+  DENTER(IRDA_SERV_TRACE, "(self=0x%p)\n", self);
+
+  /* Ask lmp for the current discovery log */
+  discoveries = irlmp_get_discoveries(&number, 0xffff,
+				      DISCOVERY_DEFAULT_SLOTS);
+  /* Check if the we got some results */
+  if (discoveries == NULL)
+    DRETURN(-ENETUNREACH, IRDA_SERV_INFO, "Cachelog empty...\n");
+
+  /* Now, check all discovered devices (if any) */
+  for(i = 0; i < number; i++)
+    {
+      /* Does the name match ? */
+      if(discoveries[i].daddr == self->daddr)
+	{
+	  /* Yes !!! Get it.. */
+	  strlcpy(self->rname, discoveries[i].info, sizeof(self->rname));
+	  self->rname[NICKNAME_MAX_LEN + 1] = '\0';
+	  DEBUG(IRDA_SERV_INFO, "Device 0x%08x is in fact ``%s''.\n",
+		self->daddr, self->rname);
+	  kfree(discoveries);
+	  DEXIT(IRDA_SERV_TRACE, "\n");
+	  return 0;
+	}
+    }
+  /* No luck ! */
+  DEXIT(IRDA_SERV_INFO, ": cannot discover device 0x%08x !!!\n", self->daddr);
+  kfree(discoveries);
+  return(-EADDRNOTAVAIL);
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Function irda_find_socket (self)
+ *
+ *    Find the correct IrNET socket
+ *
+ * Look into the list of IrNET sockets and finds one with the right
+ * properties...
+ */
+static inline irnet_socket *
+irnet_find_socket(irnet_socket *	self)
+{
+  irnet_socket *	new = (irnet_socket *) NULL;
+  int			err;
+
+  DENTER(IRDA_SERV_TRACE, "(self=0x%p)\n", self);
+
+  /* Get the addresses of the requester */
+  self->daddr = irttp_get_daddr(self->tsap);
+  self->saddr = irttp_get_saddr(self->tsap);
+
+  /* Try to get the IrDA nickname of the requester */
+  err = irnet_daddr_to_dname(self);
+
+  /* Protect access to the instance list */
+  spin_lock_bh(&irnet_server.spinlock);
+
+  /* So now, try to get an socket having specifically
+   * requested that nickname */
+  if(err == 0)
+    {
+      new = (irnet_socket *) hashbin_find(irnet_server.list,
+					  0, self->rname);
+      if(new)
+	DEBUG(IRDA_SERV_INFO, "Socket 0x%p matches rname ``%s''.\n",
+	      new, new->rname);
+    }
+
+  /* If no name matches, try to find an socket by the destination address */
+  /* It can be either the requested destination address (set via the
+   * control channel), or the current destination address if the
+   * socket is in the middle of a connection request */
+  if(new == (irnet_socket *) NULL)
+    {
+      new = (irnet_socket *) hashbin_get_first(irnet_server.list);
+      while(new !=(irnet_socket *) NULL)
+	{
+	  /* Does it have the same address ? */
+	  if((new->rdaddr == self->daddr) || (new->daddr == self->daddr))
+	    {
+	      /* Yes !!! Get it.. */
+	      DEBUG(IRDA_SERV_INFO, "Socket 0x%p matches daddr %#08x.\n",
+		    new, self->daddr);
+	      break;
+	    }
+	  new = (irnet_socket *) hashbin_get_next(irnet_server.list);
+	}
+    }
+
+  /* If we don't have any socket, get the first unconnected socket */
+  if(new == (irnet_socket *) NULL)
+    {
+      new = (irnet_socket *) hashbin_get_first(irnet_server.list);
+      while(new !=(irnet_socket *) NULL)
+	{
+	  /* Is it available ? */
+	  if(!(test_bit(0, &new->ttp_open)) && (new->rdaddr == DEV_ADDR_ANY) &&
+	     (new->rname[0] == '\0') && (new->ppp_open))
+	    {
+	      /* Yes !!! Get it.. */
+	      DEBUG(IRDA_SERV_INFO, "Socket 0x%p is free.\n",
+		    new);
+	      break;
+	    }
+	  new = (irnet_socket *) hashbin_get_next(irnet_server.list);
+	}
+    }
+
+  /* Spin lock end */
+  spin_unlock_bh(&irnet_server.spinlock);
+
+  DEXIT(IRDA_SERV_TRACE, " - new = 0x%p\n", new);
+  return new;
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Function irda_connect_socket (self)
+ *
+ *    Connect an incoming connection to the socket
+ *
+ */
+static inline int
+irnet_connect_socket(irnet_socket *	server,
+		     irnet_socket *	new,
+		     struct qos_info *	qos,
+		     __u32		max_sdu_size,
+		     __u8		max_header_size)
+{
+  DENTER(IRDA_SERV_TRACE, "(server=0x%p, new=0x%p)\n",
+	 server, new);
+
+  /* Now attach up the new socket */
+  new->tsap = irttp_dup(server->tsap, new);
+  DABORT(new->tsap == NULL, -1, IRDA_SERV_ERROR, "dup failed!\n");
+
+  /* Set up all the relevant parameters on the new socket */
+  new->stsap_sel = new->tsap->stsap_sel;
+  new->dtsap_sel = new->tsap->dtsap_sel;
+  new->saddr = irttp_get_saddr(new->tsap);
+  new->daddr = irttp_get_daddr(new->tsap);
+
+  new->max_header_size = max_header_size;
+  new->max_sdu_size_tx = max_sdu_size;
+  new->max_data_size   = max_sdu_size;
+#ifdef STREAM_COMPAT
+  /* If we want to receive "stream sockets" */
+  if(max_sdu_size == 0)
+    new->max_data_size = irttp_get_max_seg_size(new->tsap);
+#endif /* STREAM_COMPAT */
+
+  /* Clean up the original one to keep it in listen state */
+  irttp_listen(server->tsap);
+
+  /* Send a connection response on the new socket */
+  irttp_connect_response(new->tsap, new->max_sdu_size_rx, NULL);
+
+  /* Allow PPP to send its junk over the new socket... */
+  set_bit(0, &new->ttp_open);
+
+  /* Not connecting anymore, and clean up last possible remains
+   * of connection attempts on the socket */
+  clear_bit(0, &new->ttp_connect);
+  if(new->iriap)
+    {
+      iriap_close(new->iriap);
+      new->iriap = NULL;
+    }
+  if(new->discoveries != NULL)
+    {
+      kfree(new->discoveries);
+      new->discoveries = NULL;
+    }
+
+#ifdef CONNECT_INDIC_KICK
+  /* As currently we don't block packets in ppp_irnet_send() while passive,
+   * this is not really needed...
+   * Also, not doing it give IrDA a chance to finish the setup properly
+   * before being swamped with packets... */
+  ppp_output_wakeup(&new->chan);
+#endif /* CONNECT_INDIC_KICK */
+
+  /* Notify the control channel */
+  irnet_post_event(new, IRNET_CONNECT_FROM,
+		   new->saddr, new->daddr, server->rname, 0);
+
+  DEXIT(IRDA_SERV_TRACE, "\n");
+  return 0;
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Function irda_disconnect_server (self)
+ *
+ *    Cleanup the server socket when the incoming connection abort
+ *
+ */
+static inline void
+irnet_disconnect_server(irnet_socket *	self,
+			struct sk_buff *skb)
+{
+  DENTER(IRDA_SERV_TRACE, "(self=0x%p)\n", self);
+
+  /* Put the received packet in the black hole */
+  kfree_skb(skb);
+
+#ifdef FAIL_SEND_DISCONNECT
+  /* Tell the other party we don't want to be connected */
+  /* Hum... Is it the right thing to do ? And do we need to send
+   * a connect response before ? It looks ok without this... */
+  irttp_disconnect_request(self->tsap, NULL, P_NORMAL);
+#endif /* FAIL_SEND_DISCONNECT */
+
+  /* Notify the control channel (see irnet_find_socket()) */
+  irnet_post_event(NULL, IRNET_REQUEST_FROM,
+		   self->saddr, self->daddr, self->rname, 0);
+
+  /* Clean up the server to keep it in listen state */
+  irttp_listen(self->tsap);
+
+  DEXIT(IRDA_SERV_TRACE, "\n");
+  return;
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Function irda_setup_server (self)
+ *
+ *    Create a IrTTP server and set it up...
+ *
+ * Register the IrLAN hint bit, create a IrTTP instance for us,
+ * set all the IrTTP callbacks and create an IrIAS entry...
+ */
+static inline int
+irnet_setup_server(void)
+{
+  __u16		hints;
+
+  DENTER(IRDA_SERV_TRACE, "()\n");
+
+  /* Initialise the regular socket part of the server */
+  irda_irnet_create(&irnet_server.s);
+
+  /* Open a local TSAP (an IrTTP instance) for the server */
+  irnet_open_tsap(&irnet_server.s);
+
+  /* PPP part setup */
+  irnet_server.s.ppp_open = 0;
+  irnet_server.s.chan.private = NULL;
+  irnet_server.s.file = NULL;
+
+  /* Get the hint bit corresponding to IrLAN */
+  /* Note : we overload the IrLAN hint bit. As it is only a "hint", and as
+   * we provide roughly the same functionality as IrLAN, this is ok.
+   * In fact, the situation is similar as JetSend overloading the Obex hint
+   */
+  hints = irlmp_service_to_hint(S_LAN);
+
+#ifdef ADVERTISE_HINT
+  /* Register with IrLMP as a service (advertise our hint bit) */
+  irnet_server.skey = irlmp_register_service(hints);
+#endif /* ADVERTISE_HINT */
+
+  /* Register with LM-IAS (so that people can connect to us) */
+  irnet_server.ias_obj = irias_new_object(IRNET_SERVICE_NAME, jiffies);
+  irias_add_integer_attrib(irnet_server.ias_obj, IRNET_IAS_VALUE, 
+			   irnet_server.s.stsap_sel, IAS_KERNEL_ATTR);
+  irias_insert_object(irnet_server.ias_obj);
+
+#ifdef DISCOVERY_EVENTS
+  /* Tell IrLMP we want to be notified of newly discovered nodes */
+  irlmp_update_client(irnet_server.s.ckey, hints,
+		      irnet_discovery_indication, irnet_expiry_indication,
+		      (void *) &irnet_server.s);
+#endif
+
+  DEXIT(IRDA_SERV_TRACE, " - self=0x%p\n", &irnet_server.s);
+  return 0;
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Function irda_destroy_server (self)
+ *
+ *    Destroy the IrTTP server...
+ *
+ * Reverse of the previous function...
+ */
+static inline void
+irnet_destroy_server(void)
+{
+  DENTER(IRDA_SERV_TRACE, "()\n");
+
+#ifdef ADVERTISE_HINT
+  /* Unregister with IrLMP */
+  irlmp_unregister_service(irnet_server.skey);
+#endif /* ADVERTISE_HINT */
+
+  /* Unregister with LM-IAS */
+  if(irnet_server.ias_obj)
+    irias_delete_object(irnet_server.ias_obj);
+
+  /* Cleanup the socket part */
+  irda_irnet_destroy(&irnet_server.s);
+
+  DEXIT(IRDA_SERV_TRACE, "\n");
+  return;
+}
+
+
+/************************ IRDA-TTP CALLBACKS ************************/
+/*
+ * When we create a IrTTP instance, we pass to it a set of callbacks
+ * that IrTTP will call in case of various events.
+ * We take care of those events here.
+ */
+
+/*------------------------------------------------------------------*/
+/*
+ * Function irnet_data_indication (instance, sap, skb)
+ *
+ *    Received some data from TinyTP. Just queue it on the receive queue
+ *
+ */
+static int
+irnet_data_indication(void *	instance,
+		      void *	sap,
+		      struct sk_buff *skb)
+{
+  irnet_socket *	ap = (irnet_socket *) instance;
+  unsigned char *	p;
+  int			code = 0;
+
+  DENTER(IRDA_TCB_TRACE, "(self/ap=0x%p, skb=0x%p)\n",
+	 ap, skb);
+  DASSERT(skb != NULL, 0, IRDA_CB_ERROR, "skb is NULL !!!\n");
+
+  /* Check is ppp is ready to receive our packet */
+  if(!ap->ppp_open)
+    {
+      DERROR(IRDA_CB_ERROR, "PPP not ready, dropping packet...\n");
+      /* When we return error, TTP will need to requeue the skb and
+       * will stop the sender. IrTTP will stall until we send it a
+       * flow control request... */
+      return -ENOMEM;
+    }
+
+  /* strip address/control field if present */
+  p = skb->data;
+  if((p[0] == PPP_ALLSTATIONS) && (p[1] == PPP_UI))
+    {
+      /* chop off address/control */
+      if(skb->len < 3)
+	goto err_exit;
+      p = skb_pull(skb, 2);
+    }
+
+  /* decompress protocol field if compressed */
+  if(p[0] & 1)
+    {
+      /* protocol is compressed */
+      skb_push(skb, 1)[0] = 0;
+    }
+  else
+    if(skb->len < 2)
+      goto err_exit;
+
+  /* pass to generic ppp layer */
+  /* Note : how do I know if ppp can accept or not the packet ? This is
+   * essential if I want to manage flow control smoothly... */
+  ppp_input(&ap->chan, skb);
+
+  DEXIT(IRDA_TCB_TRACE, "\n");
+  return 0;
+
+ err_exit:
+  DERROR(IRDA_CB_ERROR, "Packet too small, dropping...\n");
+  kfree_skb(skb);
+  ppp_input_error(&ap->chan, code);
+  return 0;	/* Don't return an error code, only for flow control... */
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Function irnet_disconnect_indication (instance, sap, reason, skb)
+ *
+ *    Connection has been closed. Chech reason to find out why
+ *
+ * Note : there are many cases where we come here :
+ *	o attempted to connect, timeout
+ *	o connected, link is broken, LAP has timeout
+ *	o connected, other side close the link
+ *	o connection request on the server not handled
+ */
+static void
+irnet_disconnect_indication(void *	instance,
+			    void *	sap, 
+			    LM_REASON	reason,
+			    struct sk_buff *skb)
+{
+  irnet_socket *	self = (irnet_socket *) instance;
+  int			test_open;
+  int			test_connect;
+
+  DENTER(IRDA_TCB_TRACE, "(self=0x%p)\n", self);
+  DASSERT(self != NULL, , IRDA_CB_ERROR, "Self is NULL !!!\n");
+
+  /* Don't care about it, but let's not leak it */
+  if(skb)
+    dev_kfree_skb(skb);
+
+  /* Prevent higher layer from accessing IrTTP */
+  test_open = test_and_clear_bit(0, &self->ttp_open);
+  /* Not connecting anymore...
+   * (note : TSAP is open, so IAP callbacks are no longer pending...) */
+  test_connect = test_and_clear_bit(0, &self->ttp_connect);
+
+  /* If both self->ttp_open and self->ttp_connect are NULL, it mean that we
+   * have a race condition with irda_irnet_destroy() or
+   * irnet_connect_indication(), so don't mess up tsap...
+   */
+  if(!(test_open || test_connect))
+    {
+      DERROR(IRDA_CB_ERROR, "Race condition detected...\n");
+      return;
+    }
+
+  /* If we were active, notify the control channel */
+  if(test_open)
+    irnet_post_event(self, IRNET_DISCONNECT_FROM,
+		     self->saddr, self->daddr, self->rname, 0);
+  else
+    /* If we were trying to connect, notify the control channel */
+    if((self->tsap) && (self != &irnet_server.s))
+      irnet_post_event(self, IRNET_NOANSWER_FROM,
+		       self->saddr, self->daddr, self->rname, 0);
+
+  /* Close our IrTTP connection, cleanup tsap */
+  if((self->tsap) && (self != &irnet_server.s))
+    {
+      DEBUG(IRDA_CB_INFO, "Closing our TTP connection.\n");
+      irttp_close_tsap(self->tsap);
+      self->tsap = NULL;
+    }
+  /* Cleanup the socket in case we want to reconnect in ppp_output_wakeup() */
+  self->stsap_sel = 0;
+  self->daddr = DEV_ADDR_ANY;
+  self->tx_flow = FLOW_START;
+
+  /* Deal with the ppp instance if it's still alive */
+  if(self->ppp_open)
+    {
+      if(test_open)
+	{
+#ifdef MISSING_PPP_API
+	  /* ppp_unregister_channel() wants a user context, which we
+	   * are guaranteed to NOT have here. What are we supposed
+	   * to do here ? Jean II */
+	  /* If we were connected, cleanup & close the PPP channel,
+	   * which will kill pppd (hangup) and the rest */
+	  ppp_unregister_channel(&self->chan);
+	  self->ppp_open = 0;
+#endif
+	}
+      else
+	{
+	  /* If we were trying to connect, flush (drain) ppp_generic
+	   * Tx queue (most often we have blocked it), which will
+	   * trigger an other attempt to connect. If we are passive,
+	   * this will empty the Tx queue after last try. */
+	  ppp_output_wakeup(&self->chan);
+	}
+    }
+
+  DEXIT(IRDA_TCB_TRACE, "\n");
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Function irnet_connect_confirm (instance, sap, qos, max_sdu_size, skb)
+ *
+ *    Connections has been confirmed by the remote device
+ *
+ */
+static void
+irnet_connect_confirm(void *	instance,
+		      void *	sap, 
+		      struct qos_info *qos,
+		      __u32	max_sdu_size,
+		      __u8	max_header_size, 
+		      struct sk_buff *skb)
+{
+  irnet_socket *	self = (irnet_socket *) instance;
+
+  DENTER(IRDA_TCB_TRACE, "(self=0x%p)\n", self);
+
+  /* Check if socket is closing down (via irda_irnet_destroy()) */
+  if(! test_bit(0, &self->ttp_connect))
+    {
+      DERROR(IRDA_CB_ERROR, "Socket no longer connecting. Ouch !\n");
+      return;
+    }
+
+  /* How much header space do we need to reserve */
+  self->max_header_size = max_header_size;
+
+  /* IrTTP max SDU size in transmit direction */
+  self->max_sdu_size_tx = max_sdu_size;
+  self->max_data_size = max_sdu_size;
+#ifdef STREAM_COMPAT
+  if(max_sdu_size == 0)
+    self->max_data_size = irttp_get_max_seg_size(self->tsap);
+#endif /* STREAM_COMPAT */
+
+  /* At this point, IrLMP has assigned our source address */
+  self->saddr = irttp_get_saddr(self->tsap);
+
+  /* Allow higher layer to access IrTTP */
+  set_bit(0, &self->ttp_open);
+  clear_bit(0, &self->ttp_connect);	/* Not racy, IrDA traffic is serial */
+  /* Give a kick in the ass of ppp_generic so that he sends us some data */
+  ppp_output_wakeup(&self->chan);
+
+  /* Check size of received packet */
+  if(skb->len > 0)
+    {
+#ifdef PASS_CONNECT_PACKETS
+      DEBUG(IRDA_CB_INFO, "Passing connect packet to PPP.\n");
+      /* Try to pass it to PPP */
+      irnet_data_indication(instance, sap, skb);
+#else /* PASS_CONNECT_PACKETS */
+      DERROR(IRDA_CB_ERROR, "Dropping non empty packet.\n");
+      kfree_skb(skb);	/* Note : will be optimised with other kfree... */
+#endif /* PASS_CONNECT_PACKETS */
+    }
+  else
+    kfree_skb(skb);
+
+  /* Notify the control channel */
+  irnet_post_event(self, IRNET_CONNECT_TO,
+		   self->saddr, self->daddr, self->rname, 0);
+
+  DEXIT(IRDA_TCB_TRACE, "\n");
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Function irnet_flow_indication (instance, sap, flow)
+ *
+ *    Used by TinyTP to tell us if it can accept more data or not
+ *
+ */
+static void
+irnet_flow_indication(void *	instance,
+		      void *	sap,
+		      LOCAL_FLOW flow) 
+{
+  irnet_socket *	self = (irnet_socket *) instance;
+  LOCAL_FLOW		oldflow = self->tx_flow;
+
+  DENTER(IRDA_TCB_TRACE, "(self=0x%p, flow=%d)\n", self, flow);
+
+  /* Update our state */
+  self->tx_flow = flow;
+
+  /* Check what IrTTP want us to do... */
+  switch(flow)
+    {
+    case FLOW_START:
+      DEBUG(IRDA_CB_INFO, "IrTTP wants us to start again\n");
+      /* Check if we really need to wake up PPP */
+      if(oldflow == FLOW_STOP)
+	ppp_output_wakeup(&self->chan);
+      else
+	DEBUG(IRDA_CB_INFO, "But we were already transmitting !!!\n");
+      break;
+    case FLOW_STOP:
+      DEBUG(IRDA_CB_INFO, "IrTTP wants us to slow down\n");
+      break;
+    default:
+      DEBUG(IRDA_CB_INFO, "Unknown flow command!\n");
+      break;
+    }
+
+  DEXIT(IRDA_TCB_TRACE, "\n");
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Function irnet_status_indication (instance, sap, reason, skb)
+ *
+ *    Link (IrLAP) status report.
+ *
+ */
+static void
+irnet_status_indication(void *	instance,
+			LINK_STATUS link,
+			LOCK_STATUS lock)
+{
+  irnet_socket *	self = (irnet_socket *) instance;
+
+  DENTER(IRDA_TCB_TRACE, "(self=0x%p)\n", self);
+  DASSERT(self != NULL, , IRDA_CB_ERROR, "Self is NULL !!!\n");
+
+  /* We can only get this event if we are connected */
+  switch(link)
+    {
+    case STATUS_NO_ACTIVITY:
+      irnet_post_event(self, IRNET_BLOCKED_LINK,
+		       self->saddr, self->daddr, self->rname, 0);
+      break;
+    default:
+      DEBUG(IRDA_CB_INFO, "Unknown status...\n");
+    }
+
+  DEXIT(IRDA_TCB_TRACE, "\n");
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Function irnet_connect_indication(instance, sap, qos, max_sdu_size, userdata)
+ *
+ *    Incoming connection
+ *
+ * In theory, this function is called only on the server socket.
+ * Some other node is attempting to connect to the IrNET service, and has
+ * sent a connection request on our server socket.
+ * We just redirect the connection to the relevant IrNET socket.
+ * 
+ * Note : we also make sure that between 2 irnet nodes, there can
+ * exist only one irnet connection.
+ */
+static void
+irnet_connect_indication(void *		instance,
+			 void *		sap, 
+			 struct qos_info *qos,
+			 __u32		max_sdu_size,
+			 __u8		max_header_size,
+			 struct sk_buff *skb)
+{
+  irnet_socket *	server = &irnet_server.s;
+  irnet_socket *	new = (irnet_socket *) NULL;
+
+  DENTER(IRDA_TCB_TRACE, "(server=0x%p)\n", server);
+  DASSERT(instance == &irnet_server, , IRDA_CB_ERROR,
+	  "Invalid instance (0x%p) !!!\n", instance);
+  DASSERT(sap == irnet_server.s.tsap, , IRDA_CB_ERROR, "Invalid sap !!!\n");
+
+  /* Try to find the most appropriate IrNET socket */
+  new = irnet_find_socket(server);
+
+  /* After all this hard work, do we have an socket ? */
+  if(new == (irnet_socket *) NULL)
+    {
+      DEXIT(IRDA_CB_INFO, ": No socket waiting for this connection.\n");
+      irnet_disconnect_server(server, skb);
+      return;
+    }
+
+  /* Is the socket already busy ? */
+  if(test_bit(0, &new->ttp_open))
+    {
+      DEXIT(IRDA_CB_INFO, ": Socket already connected.\n");
+      irnet_disconnect_server(server, skb);
+      return;
+    }
+
+  /* The following code is a bit tricky, so need comments ;-)
+   */
+  /* If ttp_connect is set, the socket is trying to connect to the other
+   * end and may have sent a IrTTP connection request and is waiting for
+   * a connection response (that may never come).
+   * Now, the pain is that the socket may have opened a tsap and is
+   * waiting on it, while the other end is trying to connect to it on
+   * another tsap.
+   * Because IrNET can be peer to peer, we need to workaround this.
+   * Furthermore, the way the irnetd script is implemented, the
+   * target will create a second IrNET connection back to the
+   * originator and expect the originator to bind this new connection
+   * to the original PPPD instance.
+   * And of course, if we don't use irnetd, we can have a race when
+   * both side try to connect simultaneously, which could leave both
+   * connections half closed (yuck).
+   * Conclusions :
+   *	1) The "originator" must accept the new connection and get rid
+   *	   of the old one so that irnetd works
+   *	2) One side must deny the new connection to avoid races,
+   *	   but both side must agree on which side it is...
+   * Most often, the originator is primary at the LAP layer.
+   * Jean II
+   */
+  /* Now, let's look at the way I wrote the test...
+   * We need to clear up the ttp_connect flag atomically to prevent
+   * irnet_disconnect_indication() to mess up the tsap we are going to close.
+   * We want to clear the ttp_connect flag only if we close the tsap,
+   * otherwise we will never close it, so we need to check for primary
+   * *before* doing the test on the flag.
+   * And of course, ALLOW_SIMULT_CONNECT can disable this entirely...
+   * Jean II
+   */
+
+  /* Socket already connecting ? On primary ? */
+  if(0
+#ifdef ALLOW_SIMULT_CONNECT
+     || ((irttp_is_primary(server->tsap) == 1)	/* primary */
+	 && (test_and_clear_bit(0, &new->ttp_connect)))
+#endif /* ALLOW_SIMULT_CONNECT */
+     )
+    {
+      DERROR(IRDA_CB_ERROR, "Socket already connecting, but going to reuse it !\n");
+
+      /* Cleanup the old TSAP if necessary - IrIAP will be cleaned up later */
+      if(new->tsap != NULL)
+	{
+	  /* Close the old connection the new socket was attempting,
+	   * so that we can hook it up to the new connection.
+	   * It's now safe to do it... */
+	  irttp_close_tsap(new->tsap);
+	  new->tsap = NULL;
+	}
+    }
+  else
+    {
+      /* Three options :
+       * 1) socket was not connecting or connected : ttp_connect should be 0.
+       * 2) we don't want to connect the socket because we are secondary or
+       * ALLOW_SIMULT_CONNECT is undefined. ttp_connect should be 1.
+       * 3) we are half way in irnet_disconnect_indication(), and it's a
+       * nice race condition... Fortunately, we can detect that by checking
+       * if tsap is still alive. On the other hand, we can't be in
+       * irda_irnet_destroy() otherwise we would not have found this
+       * socket in the hashbin.
+       * Jean II */
+      if((test_bit(0, &new->ttp_connect)) || (new->tsap != NULL))
+	{
+	  /* Don't mess this socket, somebody else in in charge... */
+	  DERROR(IRDA_CB_ERROR, "Race condition detected, socket in use, abort connect...\n");
+	  irnet_disconnect_server(server, skb);
+	  return;
+	}
+    }
+
+  /* So : at this point, we have a socket, and it is idle. Good ! */
+  irnet_connect_socket(server, new, qos, max_sdu_size, max_header_size);
+
+  /* Check size of received packet */
+  if(skb->len > 0)
+    {
+#ifdef PASS_CONNECT_PACKETS
+      DEBUG(IRDA_CB_INFO, "Passing connect packet to PPP.\n");
+      /* Try to pass it to PPP */
+      irnet_data_indication(new, new->tsap, skb);
+#else /* PASS_CONNECT_PACKETS */
+      DERROR(IRDA_CB_ERROR, "Dropping non empty packet.\n");
+      kfree_skb(skb);	/* Note : will be optimised with other kfree... */
+#endif /* PASS_CONNECT_PACKETS */
+    }
+  else
+    kfree_skb(skb);
+
+  DEXIT(IRDA_TCB_TRACE, "\n");
+}
+
+
+/********************** IRDA-IAS/LMP CALLBACKS **********************/
+/*
+ * These are the callbacks called by other layers of the IrDA stack,
+ * mainly LMP for discovery and IAS for name queries.
+ */
+
+/*------------------------------------------------------------------*/
+/*
+ * Function irnet_getvalue_confirm (result, obj_id, value, priv)
+ *
+ *    Got answer from remote LM-IAS, just connect
+ *
+ * This is the reply to a IAS query we were doing to find the TSAP of
+ * the device we want to connect to.
+ * If we have found a valid TSAP, just initiate the TTP connection
+ * on this TSAP.
+ */
+static void
+irnet_getvalue_confirm(int	result,
+		       __u16	obj_id, 
+		       struct ias_value *value,
+		       void *	priv)
+{
+  irnet_socket *	self = (irnet_socket *) priv;
+
+  DENTER(IRDA_OCB_TRACE, "(self=0x%p)\n", self);
+  DASSERT(self != NULL, , IRDA_OCB_ERROR, "Self is NULL !!!\n");
+
+  /* Check if already connected (via irnet_connect_socket())
+   * or socket is closing down (via irda_irnet_destroy()) */
+  if(! test_bit(0, &self->ttp_connect))
+    {
+      DERROR(IRDA_OCB_ERROR, "Socket no longer connecting. Ouch !\n");
+      return;
+    }
+
+  /* We probably don't need to make any more queries */
+  iriap_close(self->iriap);
+  self->iriap = NULL;
+
+  /* Post process the IAS reply */
+  self->dtsap_sel = irnet_ias_to_tsap(self, result, value);
+
+  /* If error, just go out */
+  if(self->errno)
+    {
+      clear_bit(0, &self->ttp_connect);
+      DERROR(IRDA_OCB_ERROR, "IAS connect failed ! (0x%X)\n", self->errno);
+      return;
+    }
+
+  DEBUG(IRDA_OCB_INFO, "daddr = %08x, lsap = %d, starting IrTTP connection\n",
+	self->daddr, self->dtsap_sel);
+
+  /* Start up TTP - non blocking */
+  irnet_connect_tsap(self);
+
+  DEXIT(IRDA_OCB_TRACE, "\n");
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Function irnet_discovervalue_confirm (result, obj_id, value, priv)
+ *
+ *    Handle the TSAP discovery procedure state machine.
+ *    Got answer from remote LM-IAS, try next device
+ *
+ * We are doing a  TSAP discovery procedure, and we got an answer to
+ * a IAS query we were doing to find the TSAP on one of the address
+ * in the discovery log.
+ *
+ * If we have found a valid TSAP for the first time, save it. If it's
+ * not the first time we found one, complain.
+ *
+ * If we have more addresses in the log, just initiate a new query.
+ * Note that those query may fail (see irnet_discover_daddr_and_lsap_sel())
+ *
+ * Otherwise, wrap up the procedure (cleanup), check if we have found
+ * any device and connect to it.
+ */
+static void
+irnet_discovervalue_confirm(int		result,
+			    __u16	obj_id, 
+			    struct ias_value *value,
+			    void *	priv)
+{
+  irnet_socket *	self = (irnet_socket *) priv;
+  __u8			dtsap_sel;		/* TSAP we are looking for */
+
+  DENTER(IRDA_OCB_TRACE, "(self=0x%p)\n", self);
+  DASSERT(self != NULL, , IRDA_OCB_ERROR, "Self is NULL !!!\n");
+
+  /* Check if already connected (via irnet_connect_socket())
+   * or socket is closing down (via irda_irnet_destroy()) */
+  if(! test_bit(0, &self->ttp_connect))
+    {
+      DERROR(IRDA_OCB_ERROR, "Socket no longer connecting. Ouch !\n");
+      return;
+    }
+
+  /* Post process the IAS reply */
+  dtsap_sel = irnet_ias_to_tsap(self, result, value);
+
+  /* Have we got something ? */
+  if(self->errno == 0)
+    {
+      /* We found the requested service */
+      if(self->daddr != DEV_ADDR_ANY)
+	{
+	  DERROR(IRDA_OCB_ERROR, "More than one device in range supports IrNET...\n");
+	}
+      else
+	{
+	  /* First time we found that one, save it ! */
+	  self->daddr = self->discoveries[self->disco_index].daddr;
+	  self->dtsap_sel = dtsap_sel;
+	}
+    }
+
+  /* If no failure */
+  if((self->errno == -EADDRNOTAVAIL) || (self->errno == 0))
+    {
+      int	ret;
+
+      /* Search the next node */
+      ret = irnet_discover_next_daddr(self);
+      if(!ret)
+	{
+	  /* In this case, the above request was non-blocking.
+	   * We will return here after a while... */
+	  return;
+	}
+      /* In this case, we have processed the last discovery item */
+    }
+
+  /* No more queries to be done (failure or last one) */
+
+  /* We probably don't need to make any more queries */
+  iriap_close(self->iriap);
+  self->iriap = NULL;
+
+  /* No more items : remove the log and signal termination */
+  DEBUG(IRDA_OCB_INFO, "Cleaning up log (0x%p)\n",
+	self->discoveries);
+  if(self->discoveries != NULL)
+    {
+      /* Cleanup our copy of the discovery log */
+      kfree(self->discoveries);
+      self->discoveries = NULL;
+    }
+  self->disco_number = -1;
+
+  /* Check out what we found */
+  if(self->daddr == DEV_ADDR_ANY)
+    {
+      self->daddr = DEV_ADDR_ANY;
+      clear_bit(0, &self->ttp_connect);
+      DEXIT(IRDA_OCB_TRACE, ": cannot discover IrNET in any device !!!\n");
+      return;
+    }
+
+  /* We have a valid address - just connect */
+
+  DEBUG(IRDA_OCB_INFO, "daddr = %08x, lsap = %d, starting IrTTP connection\n",
+	self->daddr, self->dtsap_sel);
+
+  /* Start up TTP - non blocking */
+  irnet_connect_tsap(self);
+
+  DEXIT(IRDA_OCB_TRACE, "\n");
+}
+
+#ifdef DISCOVERY_EVENTS
+/*------------------------------------------------------------------*/
+/*
+ * Function irnet_discovery_indication (discovery)
+ *
+ *    Got a discovery indication from IrLMP, post an event
+ *
+ * Note : IrLMP take care of matching the hint mask for us, and also
+ * check if it is a "new" node for us...
+ *
+ * As IrLMP filter on the IrLAN hint bit, we get both IrLAN and IrNET
+ * nodes, so it's only at connection time that we will know if the
+ * node support IrNET, IrLAN or both. The other solution is to check
+ * in IAS the PNP ids and service name.
+ * Note : even if a node support IrNET (or IrLAN), it's no guarantee
+ * that we will be able to connect to it, the node might already be
+ * busy...
+ *
+ * One last thing : in some case, this function will trigger duplicate
+ * discovery events. On the other hand, we should catch all
+ * discoveries properly (i.e. not miss one). Filtering duplicate here
+ * is to messy, so we leave that to user space...
+ */
+static void
+irnet_discovery_indication(discinfo_t *		discovery,
+			   DISCOVERY_MODE	mode,
+			   void *		priv)
+{
+  irnet_socket *	self = &irnet_server.s;
+	
+  DENTER(IRDA_OCB_TRACE, "(self=0x%p)\n", self);
+  DASSERT(priv == &irnet_server, , IRDA_OCB_ERROR,
+	  "Invalid instance (0x%p) !!!\n", priv);
+
+  DEBUG(IRDA_OCB_INFO, "Discovered new IrNET/IrLAN node %s...\n",
+	discovery->info);
+
+  /* Notify the control channel */
+  irnet_post_event(NULL, IRNET_DISCOVER,
+		   discovery->saddr, discovery->daddr, discovery->info,
+		   u16ho(discovery->hints));
+
+  DEXIT(IRDA_OCB_TRACE, "\n");
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Function irnet_expiry_indication (expiry)
+ *
+ *    Got a expiry indication from IrLMP, post an event
+ *
+ * Note : IrLMP take care of matching the hint mask for us, we only
+ * check if it is a "new" node...
+ */
+static void
+irnet_expiry_indication(discinfo_t *	expiry,
+			DISCOVERY_MODE	mode,
+			void *		priv)
+{
+  irnet_socket *	self = &irnet_server.s;
+	
+  DENTER(IRDA_OCB_TRACE, "(self=0x%p)\n", self);
+  DASSERT(priv == &irnet_server, , IRDA_OCB_ERROR,
+	  "Invalid instance (0x%p) !!!\n", priv);
+
+  DEBUG(IRDA_OCB_INFO, "IrNET/IrLAN node %s expired...\n",
+	expiry->info);
+
+  /* Notify the control channel */
+  irnet_post_event(NULL, IRNET_EXPIRE,
+		   expiry->saddr, expiry->daddr, expiry->info,
+		   u16ho(expiry->hints));
+
+  DEXIT(IRDA_OCB_TRACE, "\n");
+}
+#endif /* DISCOVERY_EVENTS */
+
+
+/*********************** PROC ENTRY CALLBACKS ***********************/
+/*
+ * We create a instance in the /proc filesystem, and here we take care
+ * of that...
+ */
+
+#ifdef CONFIG_PROC_FS
+/*------------------------------------------------------------------*/
+/*
+ * Function irnet_proc_read (buf, start, offset, len, unused)
+ *
+ *    Give some info to the /proc file system
+ */
+static int
+irnet_proc_read(char *	buf,
+		char **	start,
+		off_t	offset,
+		int	len)
+{
+  irnet_socket *	self;
+  char *		state;
+  int			i = 0;
+
+  len = 0;
+	
+  /* Get the IrNET server information... */
+  len += sprintf(buf+len, "IrNET server - ");
+  len += sprintf(buf+len, "IrDA state: %s, ",
+		 (irnet_server.running ? "running" : "dead"));
+  len += sprintf(buf+len, "stsap_sel: %02x, ", irnet_server.s.stsap_sel);
+  len += sprintf(buf+len, "dtsap_sel: %02x\n", irnet_server.s.dtsap_sel);
+
+  /* Do we need to continue ? */
+  if(!irnet_server.running)
+    return len;
+
+  /* Protect access to the instance list */
+  spin_lock_bh(&irnet_server.spinlock);
+
+  /* Get the sockets one by one... */
+  self = (irnet_socket *) hashbin_get_first(irnet_server.list);
+  while(self != NULL)
+    {
+      /* Start printing info about the socket. */
+      len += sprintf(buf+len, "\nIrNET socket %d - ", i++);
+
+      /* First, get the requested configuration */
+      len += sprintf(buf+len, "Requested IrDA name: \"%s\", ", self->rname);
+      len += sprintf(buf+len, "daddr: %08x, ", self->rdaddr);
+      len += sprintf(buf+len, "saddr: %08x\n", self->rsaddr);
+
+      /* Second, get all the PPP info */
+      len += sprintf(buf+len, "	PPP state: %s",
+		 (self->ppp_open ? "registered" : "unregistered"));
+      if(self->ppp_open)
+	{
+	  len += sprintf(buf+len, ", unit: ppp%d",
+			 ppp_unit_number(&self->chan));
+	  len += sprintf(buf+len, ", channel: %d",
+			 ppp_channel_index(&self->chan));
+	  len += sprintf(buf+len, ", mru: %d",
+			 self->mru);
+	  /* Maybe add self->flags ? Later... */
+	}
+
+      /* Then, get all the IrDA specific info... */
+      if(self->ttp_open)
+	state = "connected";
+      else
+	if(self->tsap != NULL)
+	  state = "connecting";
+	else
+	  if(self->iriap != NULL)
+	    state = "searching";
+	  else
+	    if(self->ttp_connect)
+	      state = "weird";
+	    else
+	      state = "idle";
+      len += sprintf(buf+len, "\n	IrDA state: %s, ", state);
+      len += sprintf(buf+len, "daddr: %08x, ", self->daddr);
+      len += sprintf(buf+len, "stsap_sel: %02x, ", self->stsap_sel);
+      len += sprintf(buf+len, "dtsap_sel: %02x\n", self->dtsap_sel);
+
+      /* Next socket, please... */
+      self = (irnet_socket *) hashbin_get_next(irnet_server.list);
+    }
+
+  /* Spin lock end */
+  spin_unlock_bh(&irnet_server.spinlock);
+
+  return len;
+}
+#endif /* PROC_FS */
+
+
+/********************** CONFIGURATION/CLEANUP **********************/
+/*
+ * Initialisation and teardown of the IrDA part, called at module
+ * insertion and removal...
+ */
+
+/*------------------------------------------------------------------*/
+/*
+ * Prepare the IrNET layer for operation...
+ */
+int __init
+irda_irnet_init(void)
+{
+  int		err = 0;
+
+  DENTER(MODULE_TRACE, "()\n");
+
+  /* Pure paranoia - should be redundant */
+  memset(&irnet_server, 0, sizeof(struct irnet_root));
+
+  /* Setup start of irnet instance list */
+  irnet_server.list = hashbin_new(HB_NOLOCK); 
+  DABORT(irnet_server.list == NULL, -ENOMEM,
+	 MODULE_ERROR, "Can't allocate hashbin!\n");
+  /* Init spinlock for instance list */
+  spin_lock_init(&irnet_server.spinlock);
+
+  /* Initialise control channel */
+  init_waitqueue_head(&irnet_events.rwait);
+  irnet_events.index = 0;
+  /* Init spinlock for event logging */
+  spin_lock_init(&irnet_events.spinlock);
+
+#ifdef CONFIG_PROC_FS
+  /* Add a /proc file for irnet infos */
+  create_proc_info_entry("irnet", 0, proc_irda, irnet_proc_read);
+#endif /* CONFIG_PROC_FS */
+
+  /* Setup the IrNET server */
+  err = irnet_setup_server();
+
+  if(!err)
+    /* We are no longer functional... */
+    irnet_server.running = 1;
+
+  DEXIT(MODULE_TRACE, "\n");
+  return err;
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Cleanup at exit...
+ */
+void __exit
+irda_irnet_cleanup(void)
+{
+  DENTER(MODULE_TRACE, "()\n");
+
+  /* We are no longer there... */
+  irnet_server.running = 0;
+
+#ifdef CONFIG_PROC_FS
+  /* Remove our /proc file */
+  remove_proc_entry("irnet", proc_irda);
+#endif /* CONFIG_PROC_FS */
+
+  /* Remove our IrNET server from existence */
+  irnet_destroy_server();
+
+  /* Remove all instances of IrNET socket still present */
+  hashbin_delete(irnet_server.list, (FREE_FUNC) irda_irnet_destroy);
+
+  DEXIT(MODULE_TRACE, "\n");
+}
diff --git a/net/irda/irnet/irnet_irda.h b/net/irda/irnet/irnet_irda.h
new file mode 100644
index 00000000000..f2fecd32d8f
--- /dev/null
+++ b/net/irda/irnet/irnet_irda.h
@@ -0,0 +1,186 @@
+/*
+ *	IrNET protocol module : Synchronous PPP over an IrDA socket.
+ *
+ *		Jean II - HPL `00 - <jt@hpl.hp.com>
+ *
+ * This file contains all definitions and declarations necessary for the
+ * IRDA part of the IrNET module (dealing with IrTTP, IrIAS and co).
+ * This file is a private header, so other modules don't want to know
+ * what's in there...
+ */
+
+#ifndef IRNET_IRDA_H
+#define IRNET_IRDA_H
+
+/***************************** INCLUDES *****************************/
+/* Please add other headers in irnet.h */
+
+#include "irnet.h"		/* Module global include */
+
+/************************ CONSTANTS & MACROS ************************/
+
+/*
+ * Name of the service (socket name) used by IrNET
+ */
+/* IAS object name (or part of it) */
+#define IRNET_SERVICE_NAME	"IrNetv1"
+/* IAS attribute */
+#define IRNET_IAS_VALUE		"IrDA:TinyTP:LsapSel"
+/* LMP notify name for client (only for /proc/net/irda/irlmp) */
+#define IRNET_NOTIFY_NAME	"IrNET socket"
+/* LMP notify name for server (only for /proc/net/irda/irlmp) */
+#define IRNET_NOTIFY_NAME_SERV	"IrNET server"
+
+/****************************** TYPES ******************************/
+
+/*
+ * This is the main structure where we store all the data pertaining to
+ * the IrNET server (listen for connection requests) and the root
+ * of the IrNET socket list
+ */
+typedef struct irnet_root
+{
+  irnet_socket		s;		/* To pretend we are a client... */
+
+  /* Generic stuff */
+  int			magic;		/* Paranoia */
+  int			running;	/* Are we operational ? */
+
+  /* Link list of all IrNET instances opened */
+  hashbin_t *		list;
+  spinlock_t		spinlock;	/* Serialize access to the list */
+  /* Note : the way hashbin has been designed is absolutely not
+   * reentrant, beware... So, we blindly protect all with spinlock */
+
+  /* Handle for the hint bit advertised in IrLMP */
+  void *		skey;
+
+  /* Server socket part */
+  struct ias_object *	ias_obj;	/* Our service name + lsap in IAS */
+
+} irnet_root;
+
+
+/**************************** PROTOTYPES ****************************/
+
+/* ----------------------- CONTROL CHANNEL ----------------------- */
+static void
+	irnet_post_event(irnet_socket *,
+			 irnet_event,
+			 __u32,
+			 __u32,
+			 char *,
+			 __u16);
+/* ----------------------- IRDA SUBROUTINES ----------------------- */
+static inline int
+	irnet_open_tsap(irnet_socket *);
+static inline __u8
+	irnet_ias_to_tsap(irnet_socket *,
+			  int,
+			  struct ias_value *);
+static inline int
+	irnet_find_lsap_sel(irnet_socket *);
+static inline int
+	irnet_connect_tsap(irnet_socket *);
+static inline int
+	irnet_discover_next_daddr(irnet_socket *);
+static inline int
+	irnet_discover_daddr_and_lsap_sel(irnet_socket *);
+static inline int
+	irnet_dname_to_daddr(irnet_socket *);
+/* ------------------------ SERVER SOCKET ------------------------ */
+static inline int
+	irnet_daddr_to_dname(irnet_socket *);
+static inline irnet_socket *
+	irnet_find_socket(irnet_socket *);
+static inline int
+	irnet_connect_socket(irnet_socket *,
+			     irnet_socket *,
+			     struct qos_info *,
+			     __u32,
+			     __u8);
+static inline void
+	irnet_disconnect_server(irnet_socket *,
+				struct sk_buff *);
+static inline int
+	irnet_setup_server(void);
+static inline void
+	irnet_destroy_server(void);
+/* ---------------------- IRDA-TTP CALLBACKS ---------------------- */
+static int
+	irnet_data_indication(void *,		/* instance */
+			      void *,		/* sap */
+			      struct sk_buff *);
+static void
+	irnet_disconnect_indication(void *,
+				    void *,
+				    LM_REASON,
+				    struct sk_buff *);
+static void
+	irnet_connect_confirm(void *,
+			      void *,
+			      struct qos_info *,
+			      __u32,
+			      __u8,
+			      struct sk_buff *);
+static void
+	irnet_flow_indication(void *,
+			      void *,
+			      LOCAL_FLOW);
+static void
+	irnet_status_indication(void *,
+				LINK_STATUS,
+				LOCK_STATUS);
+static void
+	irnet_connect_indication(void *,
+				 void *,
+				 struct qos_info *,
+				 __u32,
+				 __u8,
+				 struct sk_buff *);
+/* -------------------- IRDA-IAS/LMP CALLBACKS -------------------- */
+static void
+	irnet_getvalue_confirm(int,
+			       __u16,
+			       struct ias_value *,
+			       void *);
+static void
+	irnet_discovervalue_confirm(int,
+				    __u16, 
+				    struct ias_value *,
+				    void *);
+#ifdef DISCOVERY_EVENTS
+static void
+	irnet_discovery_indication(discinfo_t *,
+				   DISCOVERY_MODE,
+				   void *);
+static void
+	irnet_expiry_indication(discinfo_t *,
+				DISCOVERY_MODE,
+				void *);
+#endif
+/* -------------------------- PROC ENTRY -------------------------- */
+#ifdef CONFIG_PROC_FS
+static int
+	irnet_proc_read(char *,
+			char **,
+			off_t,
+			int);
+#endif /* CONFIG_PROC_FS */
+
+/**************************** VARIABLES ****************************/
+
+/*
+ * The IrNET server. Listen to connection requests and co...
+ */
+static struct irnet_root	irnet_server;
+
+/* Control channel stuff (note : extern) */
+struct irnet_ctrl_channel	irnet_events;
+
+/* The /proc/net/irda directory, defined elsewhere... */
+#ifdef CONFIG_PROC_FS
+extern struct proc_dir_entry *proc_irda;
+#endif /* CONFIG_PROC_FS */
+
+#endif /* IRNET_IRDA_H */
diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
new file mode 100644
index 00000000000..f8f984bb992
--- /dev/null
+++ b/net/irda/irnet/irnet_ppp.c
@@ -0,0 +1,1142 @@
+/*
+ *	IrNET protocol module : Synchronous PPP over an IrDA socket.
+ *
+ *		Jean II - HPL `00 - <jt@hpl.hp.com>
+ *
+ * This file implement the PPP interface and /dev/irnet character device.
+ * The PPP interface hook to the ppp_generic module, handle all our
+ *	relationship to the PPP code in the kernel (and by extension to pppd),
+ *	and exchange PPP frames with this module (send/receive).
+ * The /dev/irnet device is used primarily for 2 functions :
+ *	1) as a stub for pppd (the ppp daemon), so that we can appropriately
+ *	generate PPP sessions (we pretend we are a tty).
+ *	2) as a control channel (write commands, read events)
+ */
+
+#include "irnet_ppp.h"		/* Private header */
+/* Please put other headers in irnet.h - Thanks */
+
+/* Generic PPP callbacks (to call us) */
+static struct ppp_channel_ops irnet_ppp_ops = {
+	.start_xmit = ppp_irnet_send,
+	.ioctl = ppp_irnet_ioctl
+};
+
+/************************* CONTROL CHANNEL *************************/
+/*
+ * When a pppd instance is not active on /dev/irnet, it acts as a control
+ * channel.
+ * Writing allow to set up the IrDA destination of the IrNET channel,
+ * and any application may be read events happening in IrNET...
+ */
+
+/*------------------------------------------------------------------*/
+/*
+ * Write is used to send a command to configure a IrNET channel
+ * before it is open by pppd. The syntax is : "command argument"
+ * Currently there is only two defined commands :
+ *	o name : set the requested IrDA nickname of the IrNET peer.
+ *	o addr : set the requested IrDA address of the IrNET peer.
+ * Note : the code is crude, but effective...
+ */
+static inline ssize_t
+irnet_ctrl_write(irnet_socket *	ap,
+		 const char __user *buf,
+		 size_t		count)
+{
+  char		command[IRNET_MAX_COMMAND];
+  char *	start;		/* Current command being processed */
+  char *	next;		/* Next command to process */
+  int		length;		/* Length of current command */
+
+  DENTER(CTRL_TRACE, "(ap=0x%p, count=%Zd)\n", ap, count);
+
+  /* Check for overflow... */
+  DABORT(count >= IRNET_MAX_COMMAND, -ENOMEM,
+	 CTRL_ERROR, "Too much data !!!\n");
+
+  /* Get the data in the driver */
+  if(copy_from_user(command, buf, count))
+    {
+      DERROR(CTRL_ERROR, "Invalid user space pointer.\n");
+      return -EFAULT;
+    }
+
+  /* Safe terminate the string */
+  command[count] = '\0';
+  DEBUG(CTRL_INFO, "Command line received is ``%s'' (%Zd).\n",
+	command, count);
+
+  /* Check every commands in the command line */
+  next = command;
+  while(next != NULL)
+    {
+      /* Look at the next command */
+      start = next;
+
+      /* Scrap whitespaces before the command */
+      while(isspace(*start))
+	start++;
+
+      /* ',' is our command separator */
+      next = strchr(start, ',');
+      if(next)
+	{
+	  *next = '\0';			/* Terminate command */
+	  length = next - start;	/* Length */
+	  next++;			/* Skip the '\0' */
+	}
+      else
+	length = strlen(start);
+
+      DEBUG(CTRL_INFO, "Found command ``%s'' (%d).\n", start, length);
+
+      /* Check if we recognised one of the known command
+       * We can't use "switch" with strings, so hack with "continue" */
+      
+      /* First command : name -> Requested IrDA nickname */
+      if(!strncmp(start, "name", 4))
+	{
+	  /* Copy the name only if is included and not "any" */
+	  if((length > 5) && (strcmp(start + 5, "any")))
+	    {
+	      /* Strip out trailing whitespaces */
+	      while(isspace(start[length - 1]))
+		length--;
+
+	      /* Copy the name for later reuse */
+	      memcpy(ap->rname, start + 5, length - 5);
+	      ap->rname[length - 5] = '\0';
+	    }
+	  else
+	    ap->rname[0] = '\0';
+	  DEBUG(CTRL_INFO, "Got rname = ``%s''\n", ap->rname);
+
+	  /* Restart the loop */
+	  continue;
+	}
+
+      /* Second command : addr, daddr -> Requested IrDA destination address
+       * Also process : saddr -> Requested IrDA source address */
+      if((!strncmp(start, "addr", 4)) ||
+	 (!strncmp(start, "daddr", 5)) ||
+	 (!strncmp(start, "saddr", 5)))
+	{
+	  __u32		addr = DEV_ADDR_ANY;
+
+	  /* Copy the address only if is included and not "any" */
+	  if((length > 5) && (strcmp(start + 5, "any")))
+	    {
+	      char *	begp = start + 5;
+	      char *	endp;
+
+	      /* Scrap whitespaces before the command */
+	      while(isspace(*begp))
+		begp++;
+
+	      /* Convert argument to a number (last arg is the base) */
+	      addr = simple_strtoul(begp, &endp, 16);
+	      /* Has it worked  ? (endp should be start + length) */
+	      DABORT(endp <= (start + 5), -EINVAL,
+		     CTRL_ERROR, "Invalid address.\n");
+	    }
+	  /* Which type of address ? */
+	  if(start[0] == 's')
+	    {
+	      /* Save it */
+	      ap->rsaddr = addr;
+	      DEBUG(CTRL_INFO, "Got rsaddr = %08x\n", ap->rsaddr);
+	    }
+	  else
+	    {
+	      /* Save it */
+	      ap->rdaddr = addr;
+	      DEBUG(CTRL_INFO, "Got rdaddr = %08x\n", ap->rdaddr);
+	    }
+
+	  /* Restart the loop */
+	  continue;
+	}
+
+      /* Other possible command : connect N (number of retries) */
+
+      /* No command matched -> Failed... */
+      DABORT(1, -EINVAL, CTRL_ERROR, "Not a recognised IrNET command.\n");
+    }
+
+  /* Success : we have parsed all commands successfully */
+  return(count);
+}
+
+#ifdef INITIAL_DISCOVERY
+/*------------------------------------------------------------------*/
+/*
+ * Function irnet_get_discovery_log (self)
+ *
+ *    Query the content on the discovery log if not done
+ *
+ * This function query the current content of the discovery log
+ * at the startup of the event channel and save it in the internal struct.
+ */
+static void
+irnet_get_discovery_log(irnet_socket *	ap)
+{
+  __u16		mask = irlmp_service_to_hint(S_LAN);
+
+  /* Ask IrLMP for the current discovery log */
+  ap->discoveries = irlmp_get_discoveries(&ap->disco_number, mask,
+					  DISCOVERY_DEFAULT_SLOTS);
+
+  /* Check if the we got some results */
+  if(ap->discoveries == NULL)
+    ap->disco_number = -1;
+
+  DEBUG(CTRL_INFO, "Got the log (0x%p), size is %d\n",
+	ap->discoveries, ap->disco_number);
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Function irnet_read_discovery_log (self, event)
+ *
+ *    Read the content on the discovery log
+ *
+ * This function dump the current content of the discovery log
+ * at the startup of the event channel.
+ * Return 1 if wrote an event on the control channel...
+ *
+ * State of the ap->disco_XXX variables :
+ * Socket creation :  discoveries = NULL ; disco_index = 0 ; disco_number = 0
+ * While reading :    discoveries = ptr  ; disco_index = X ; disco_number = Y
+ * After reading :    discoveries = NULL ; disco_index = Y ; disco_number = -1
+ */
+static inline int
+irnet_read_discovery_log(irnet_socket *	ap,
+			 char *		event)
+{
+  int		done_event = 0;
+
+  DENTER(CTRL_TRACE, "(ap=0x%p, event=0x%p)\n",
+	 ap, event);
+
+  /* Test if we have some work to do or we have already finished */
+  if(ap->disco_number == -1)
+    {
+      DEBUG(CTRL_INFO, "Already done\n");
+      return 0;
+    }
+
+  /* Test if it's the first time and therefore we need to get the log */
+  if(ap->discoveries == NULL)
+    irnet_get_discovery_log(ap);
+
+  /* Check if we have more item to dump */
+  if(ap->disco_index < ap->disco_number)
+    {
+      /* Write an event */
+      sprintf(event, "Found %08x (%s) behind %08x {hints %02X-%02X}\n",
+	      ap->discoveries[ap->disco_index].daddr,
+	      ap->discoveries[ap->disco_index].info,
+	      ap->discoveries[ap->disco_index].saddr,
+	      ap->discoveries[ap->disco_index].hints[0],
+	      ap->discoveries[ap->disco_index].hints[1]);
+      DEBUG(CTRL_INFO, "Writing discovery %d : %s\n",
+	    ap->disco_index, ap->discoveries[ap->disco_index].info);
+
+      /* We have an event */
+      done_event = 1;
+      /* Next discovery */
+      ap->disco_index++;
+    }
+
+  /* Check if we have done the last item */
+  if(ap->disco_index >= ap->disco_number)
+    {
+      /* No more items : remove the log and signal termination */
+      DEBUG(CTRL_INFO, "Cleaning up log (0x%p)\n",
+	    ap->discoveries);
+      if(ap->discoveries != NULL)
+	{
+	  /* Cleanup our copy of the discovery log */
+	  kfree(ap->discoveries);
+	  ap->discoveries = NULL;
+	}
+      ap->disco_number = -1;
+    }
+
+  return done_event;
+}
+#endif /* INITIAL_DISCOVERY */
+
+/*------------------------------------------------------------------*/
+/*
+ * Read is used to get IrNET events
+ */
+static inline ssize_t
+irnet_ctrl_read(irnet_socket *	ap,
+		struct file *	file,
+		char __user *	buf,
+		size_t		count)
+{
+  DECLARE_WAITQUEUE(wait, current);
+  char		event[64];	/* Max event is 61 char */
+  ssize_t	ret = 0;
+
+  DENTER(CTRL_TRACE, "(ap=0x%p, count=%Zd)\n", ap, count);
+
+  /* Check if we can write an event out in one go */
+  DABORT(count < sizeof(event), -EOVERFLOW, CTRL_ERROR, "Buffer to small.\n");
+
+#ifdef INITIAL_DISCOVERY
+  /* Check if we have read the log */
+  if(irnet_read_discovery_log(ap, event))
+    {
+      /* We have an event !!! Copy it to the user */
+      if(copy_to_user(buf, event, strlen(event)))
+	{
+	  DERROR(CTRL_ERROR, "Invalid user space pointer.\n");
+	  return -EFAULT;
+	}
+
+      DEXIT(CTRL_TRACE, "\n");
+      return(strlen(event));
+    }
+#endif /* INITIAL_DISCOVERY */
+
+  /* Put ourselves on the wait queue to be woken up */
+  add_wait_queue(&irnet_events.rwait, &wait);
+  current->state = TASK_INTERRUPTIBLE;
+  for(;;)
+    {
+      /* If there is unread events */
+      ret = 0;
+      if(ap->event_index != irnet_events.index)
+	break;
+      ret = -EAGAIN;
+      if(file->f_flags & O_NONBLOCK)
+	break;
+      ret = -ERESTARTSYS;
+      if(signal_pending(current))
+	break;
+      /* Yield and wait to be woken up */
+      schedule();
+    }
+  current->state = TASK_RUNNING;
+  remove_wait_queue(&irnet_events.rwait, &wait);
+
+  /* Did we got it ? */
+  if(ret != 0)
+    {
+      /* No, return the error code */
+      DEXIT(CTRL_TRACE, " - ret %Zd\n", ret);
+      return ret;
+    }
+
+  /* Which event is it ? */
+  switch(irnet_events.log[ap->event_index].event)
+    {
+    case IRNET_DISCOVER:
+      sprintf(event, "Discovered %08x (%s) behind %08x {hints %02X-%02X}\n",
+	      irnet_events.log[ap->event_index].daddr,
+	      irnet_events.log[ap->event_index].name,
+	      irnet_events.log[ap->event_index].saddr,
+	      irnet_events.log[ap->event_index].hints.byte[0],
+	      irnet_events.log[ap->event_index].hints.byte[1]);
+      break;
+    case IRNET_EXPIRE:
+      sprintf(event, "Expired %08x (%s) behind %08x {hints %02X-%02X}\n",
+	      irnet_events.log[ap->event_index].daddr,
+	      irnet_events.log[ap->event_index].name,
+	      irnet_events.log[ap->event_index].saddr,
+	      irnet_events.log[ap->event_index].hints.byte[0],
+	      irnet_events.log[ap->event_index].hints.byte[1]);
+      break;
+    case IRNET_CONNECT_TO:
+      sprintf(event, "Connected to %08x (%s) on ppp%d\n",
+	      irnet_events.log[ap->event_index].daddr,
+	      irnet_events.log[ap->event_index].name,
+	      irnet_events.log[ap->event_index].unit);
+      break;
+    case IRNET_CONNECT_FROM:
+      sprintf(event, "Connection from %08x (%s) on ppp%d\n",
+	      irnet_events.log[ap->event_index].daddr,
+	      irnet_events.log[ap->event_index].name,
+	      irnet_events.log[ap->event_index].unit);
+      break;
+    case IRNET_REQUEST_FROM:
+      sprintf(event, "Request from %08x (%s) behind %08x\n",
+	      irnet_events.log[ap->event_index].daddr,
+	      irnet_events.log[ap->event_index].name,
+	      irnet_events.log[ap->event_index].saddr);
+      break;
+    case IRNET_NOANSWER_FROM:
+      sprintf(event, "No-answer from %08x (%s) on ppp%d\n",
+	      irnet_events.log[ap->event_index].daddr,
+	      irnet_events.log[ap->event_index].name,
+	      irnet_events.log[ap->event_index].unit);
+      break;
+    case IRNET_BLOCKED_LINK:
+      sprintf(event, "Blocked link with %08x (%s) on ppp%d\n",
+	      irnet_events.log[ap->event_index].daddr,
+	      irnet_events.log[ap->event_index].name,
+	      irnet_events.log[ap->event_index].unit);
+      break;
+    case IRNET_DISCONNECT_FROM:
+      sprintf(event, "Disconnection from %08x (%s) on ppp%d\n",
+	      irnet_events.log[ap->event_index].daddr,
+	      irnet_events.log[ap->event_index].name,
+	      irnet_events.log[ap->event_index].unit);
+      break;
+    case IRNET_DISCONNECT_TO:
+      sprintf(event, "Disconnected to %08x (%s)\n",
+	      irnet_events.log[ap->event_index].daddr,
+	      irnet_events.log[ap->event_index].name);
+      break;
+    default:
+      sprintf(event, "Bug\n");
+    }
+  /* Increment our event index */
+  ap->event_index = (ap->event_index + 1) % IRNET_MAX_EVENTS;
+
+  DEBUG(CTRL_INFO, "Event is :%s", event);
+
+  /* Copy it to the user */
+  if(copy_to_user(buf, event, strlen(event)))
+    {
+      DERROR(CTRL_ERROR, "Invalid user space pointer.\n");
+      return -EFAULT;
+    }
+
+  DEXIT(CTRL_TRACE, "\n");
+  return(strlen(event));
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Poll : called when someone do a select on /dev/irnet.
+ * Just check if there are new events...
+ */
+static inline unsigned int
+irnet_ctrl_poll(irnet_socket *	ap,
+		struct file *	file,
+		poll_table *	wait)
+{
+  unsigned int mask;
+
+  DENTER(CTRL_TRACE, "(ap=0x%p)\n", ap);
+
+  poll_wait(file, &irnet_events.rwait, wait);
+  mask = POLLOUT | POLLWRNORM;
+  /* If there is unread events */
+  if(ap->event_index != irnet_events.index)
+    mask |= POLLIN | POLLRDNORM;
+#ifdef INITIAL_DISCOVERY
+  if(ap->disco_number != -1)
+    {
+      /* Test if it's the first time and therefore we need to get the log */
+      if(ap->discoveries == NULL)
+	irnet_get_discovery_log(ap);
+      /* Recheck */
+      if(ap->disco_number != -1)
+	mask |= POLLIN | POLLRDNORM;
+    }
+#endif /* INITIAL_DISCOVERY */
+
+  DEXIT(CTRL_TRACE, " - mask=0x%X\n", mask);
+  return mask;
+}
+
+
+/*********************** FILESYSTEM CALLBACKS ***********************/
+/*
+ * Implement the usual open, read, write functions that will be called
+ * by the file system when some action is performed on /dev/irnet.
+ * Most of those actions will in fact be performed by "pppd" or
+ * the control channel, we just act as a redirector...
+ */
+
+/*------------------------------------------------------------------*/
+/*
+ * Open : when somebody open /dev/irnet
+ * We basically create a new instance of irnet and initialise it.
+ */
+static int
+dev_irnet_open(struct inode *	inode,
+	       struct file *	file)
+{
+  struct irnet_socket *	ap;
+  int			err;
+
+  DENTER(FS_TRACE, "(file=0x%p)\n", file);
+
+#ifdef SECURE_DEVIRNET
+  /* This could (should?) be enforced by the permissions on /dev/irnet. */
+  if(!capable(CAP_NET_ADMIN))
+    return -EPERM;
+#endif /* SECURE_DEVIRNET */
+
+  /* Allocate a private structure for this IrNET instance */
+  ap = kmalloc(sizeof(*ap), GFP_KERNEL);
+  DABORT(ap == NULL, -ENOMEM, FS_ERROR, "Can't allocate struct irnet...\n");
+
+  /* initialize the irnet structure */
+  memset(ap, 0, sizeof(*ap));
+  ap->file = file;
+
+  /* PPP channel setup */
+  ap->ppp_open = 0;
+  ap->chan.private = ap;
+  ap->chan.ops = &irnet_ppp_ops;
+  ap->chan.mtu = (2048 - TTP_MAX_HEADER - 2 - PPP_HDRLEN);
+  ap->chan.hdrlen = 2 + TTP_MAX_HEADER;		/* for A/C + Max IrDA hdr */
+  /* PPP parameters */
+  ap->mru = (2048 - TTP_MAX_HEADER - 2 - PPP_HDRLEN);
+  ap->xaccm[0] = ~0U;
+  ap->xaccm[3] = 0x60000000U;
+  ap->raccm = ~0U;
+
+  /* Setup the IrDA part... */
+  err = irda_irnet_create(ap);
+  if(err)
+    {
+      DERROR(FS_ERROR, "Can't setup IrDA link...\n");
+      kfree(ap);
+      return err;
+    }
+
+  /* For the control channel */
+  ap->event_index = irnet_events.index;	/* Cancel all past events */
+
+  /* Put our stuff where we will be able to find it later */
+  file->private_data = ap;
+
+  DEXIT(FS_TRACE, " - ap=0x%p\n", ap);
+  return 0;
+}
+
+
+/*------------------------------------------------------------------*/
+/*
+ * Close : when somebody close /dev/irnet
+ * Destroy the instance of /dev/irnet
+ */
+static int
+dev_irnet_close(struct inode *	inode,
+		struct file *	file)
+{
+  irnet_socket *	ap = (struct irnet_socket *) file->private_data;
+
+  DENTER(FS_TRACE, "(file=0x%p, ap=0x%p)\n",
+	 file, ap);
+  DABORT(ap == NULL, 0, FS_ERROR, "ap is NULL !!!\n");
+
+  /* Detach ourselves */
+  file->private_data = NULL;
+
+  /* Close IrDA stuff */
+  irda_irnet_destroy(ap);
+
+  /* Disconnect from the generic PPP layer if not already done */
+  if(ap->ppp_open)
+    {
+      DERROR(FS_ERROR, "Channel still registered - deregistering !\n");
+      ap->ppp_open = 0;
+      ppp_unregister_channel(&ap->chan);
+    }
+
+  kfree(ap);
+
+  DEXIT(FS_TRACE, "\n");
+  return 0;
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Write does nothing.
+ * (we receive packet from ppp_generic through ppp_irnet_send())
+ */
+static ssize_t
+dev_irnet_write(struct file *	file,
+		const char __user *buf,
+		size_t		count,
+		loff_t *	ppos)
+{
+  irnet_socket *	ap = (struct irnet_socket *) file->private_data;
+
+  DPASS(FS_TRACE, "(file=0x%p, ap=0x%p, count=%Zd)\n",
+	file, ap, count);
+  DABORT(ap == NULL, -ENXIO, FS_ERROR, "ap is NULL !!!\n");
+
+  /* If we are connected to ppp_generic, let it handle the job */
+  if(ap->ppp_open)
+    return -EAGAIN;
+  else
+    return irnet_ctrl_write(ap, buf, count);
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Read doesn't do much either.
+ * (pppd poll us, but ultimately reads through /dev/ppp)
+ */
+static ssize_t
+dev_irnet_read(struct file *	file,
+	       char __user *	buf,
+	       size_t		count,
+	       loff_t *		ppos)
+{
+  irnet_socket *	ap = (struct irnet_socket *) file->private_data;
+
+  DPASS(FS_TRACE, "(file=0x%p, ap=0x%p, count=%Zd)\n",
+	file, ap, count);
+  DABORT(ap == NULL, -ENXIO, FS_ERROR, "ap is NULL !!!\n");
+
+  /* If we are connected to ppp_generic, let it handle the job */
+  if(ap->ppp_open)
+    return -EAGAIN;
+  else
+    return irnet_ctrl_read(ap, file, buf, count);
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Poll : called when someone do a select on /dev/irnet
+ */
+static unsigned int
+dev_irnet_poll(struct file *	file,
+	       poll_table *	wait)
+{
+  irnet_socket *	ap = (struct irnet_socket *) file->private_data;
+  unsigned int		mask;
+
+  DENTER(FS_TRACE, "(file=0x%p, ap=0x%p)\n",
+	 file, ap);
+
+  mask = POLLOUT | POLLWRNORM;
+  DABORT(ap == NULL, mask, FS_ERROR, "ap is NULL !!!\n");
+
+  /* If we are connected to ppp_generic, let it handle the job */
+  if(!ap->ppp_open)
+    mask |= irnet_ctrl_poll(ap, file, wait);
+
+  DEXIT(FS_TRACE, " - mask=0x%X\n", mask);
+  return(mask);
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * IOCtl : Called when someone does some ioctls on /dev/irnet
+ * This is the way pppd configure us and control us while the PPP
+ * instance is active.
+ */
+static int
+dev_irnet_ioctl(struct inode *	inode,
+		struct file *	file,
+		unsigned int	cmd,
+		unsigned long	arg)
+{
+  irnet_socket *	ap = (struct irnet_socket *) file->private_data;
+  int			err;
+  int			val;
+  void __user *argp = (void __user *)arg;
+
+  DENTER(FS_TRACE, "(file=0x%p, ap=0x%p, cmd=0x%X)\n",
+	 file, ap, cmd);
+
+  /* Basic checks... */
+  DASSERT(ap != NULL, -ENXIO, PPP_ERROR, "ap is NULL...\n");
+#ifdef SECURE_DEVIRNET
+  if(!capable(CAP_NET_ADMIN))
+    return -EPERM;
+#endif /* SECURE_DEVIRNET */
+
+  err = -EFAULT;
+  switch(cmd)
+    {
+      /* Set discipline (should be N_SYNC_PPP or N_TTY) */
+    case TIOCSETD:
+      if(get_user(val, (int __user *)argp))
+	break;
+      if((val == N_SYNC_PPP) || (val == N_PPP))
+	{
+	  DEBUG(FS_INFO, "Entering PPP discipline.\n");
+	  /* PPP channel setup (ap->chan in configued in dev_irnet_open())*/
+	  err = ppp_register_channel(&ap->chan);
+	  if(err == 0)
+	    {
+	      /* Our ppp side is active */
+	      ap->ppp_open = 1;
+
+	      DEBUG(FS_INFO, "Trying to establish a connection.\n");
+	      /* Setup the IrDA link now - may fail... */
+	      irda_irnet_connect(ap);
+	    }
+	  else
+	    DERROR(FS_ERROR, "Can't setup PPP channel...\n");
+	}
+      else
+	{
+	  /* In theory, should be N_TTY */
+	  DEBUG(FS_INFO, "Exiting PPP discipline.\n");
+	  /* Disconnect from the generic PPP layer */
+	  if(ap->ppp_open)
+	    {
+	      ap->ppp_open = 0;
+	      ppp_unregister_channel(&ap->chan);
+	    }
+	  else
+	    DERROR(FS_ERROR, "Channel not registered !\n");
+	  err = 0;
+	}
+      break;
+
+      /* Query PPP channel and unit number */
+    case PPPIOCGCHAN:
+      if(!ap->ppp_open)
+	break;
+      if(put_user(ppp_channel_index(&ap->chan), (int __user *)argp))
+	break;
+      DEBUG(FS_INFO, "Query channel.\n");
+      err = 0;
+      break;
+    case PPPIOCGUNIT:
+      if(!ap->ppp_open)
+	break;
+      if(put_user(ppp_unit_number(&ap->chan), (int __user *)argp))
+	break;
+      DEBUG(FS_INFO, "Query unit number.\n");
+      err = 0;
+      break;
+
+      /* All these ioctls can be passed both directly and from ppp_generic,
+       * so we just deal with them in one place...
+       */
+    case PPPIOCGFLAGS:
+    case PPPIOCSFLAGS:
+    case PPPIOCGASYNCMAP:
+    case PPPIOCSASYNCMAP:
+    case PPPIOCGRASYNCMAP:
+    case PPPIOCSRASYNCMAP:
+    case PPPIOCGXASYNCMAP:
+    case PPPIOCSXASYNCMAP:
+    case PPPIOCGMRU:
+    case PPPIOCSMRU:
+      DEBUG(FS_INFO, "Standard PPP ioctl.\n");
+      if(!capable(CAP_NET_ADMIN))
+	err = -EPERM;
+      else
+	err = ppp_irnet_ioctl(&ap->chan, cmd, arg);
+      break;
+
+      /* TTY IOCTLs : Pretend that we are a tty, to keep pppd happy */
+      /* Get termios */
+    case TCGETS:
+      DEBUG(FS_INFO, "Get termios.\n");
+      if(kernel_termios_to_user_termios((struct termios __user *)argp, &ap->termios))
+	break;
+      err = 0;
+      break;
+      /* Set termios */
+    case TCSETSF:
+      DEBUG(FS_INFO, "Set termios.\n");
+      if(user_termios_to_kernel_termios(&ap->termios, (struct termios __user *)argp))
+	break;
+      err = 0;
+      break;
+
+      /* Set DTR/RTS */
+    case TIOCMBIS: 
+    case TIOCMBIC:
+      /* Set exclusive/non-exclusive mode */
+    case TIOCEXCL:
+    case TIOCNXCL:
+      DEBUG(FS_INFO, "TTY compatibility.\n");
+      err = 0;
+      break;
+
+    case TCGETA:
+      DEBUG(FS_INFO, "TCGETA\n");
+      break;
+
+    case TCFLSH:
+      DEBUG(FS_INFO, "TCFLSH\n");
+      /* Note : this will flush buffers in PPP, so it *must* be done
+       * We should also worry that we don't accept junk here and that
+       * we get rid of our own buffers */
+#ifdef FLUSH_TO_PPP
+      ppp_output_wakeup(&ap->chan);
+#endif /* FLUSH_TO_PPP */
+      err = 0;
+      break;
+
+    case FIONREAD:
+      DEBUG(FS_INFO, "FIONREAD\n");
+      val = 0;
+      if(put_user(val, (int __user *)argp))
+	break;
+      err = 0;
+      break;
+
+    default:
+      DERROR(FS_ERROR, "Unsupported ioctl (0x%X)\n", cmd);
+      err = -ENOIOCTLCMD;
+    }
+
+  DEXIT(FS_TRACE, " - err = 0x%X\n", err);
+  return err;
+}
+
+/************************** PPP CALLBACKS **************************/
+/*
+ * This are the functions that the generic PPP driver in the kernel
+ * will call to communicate to us.
+ */
+
+/*------------------------------------------------------------------*/
+/*
+ * Prepare the ppp frame for transmission over the IrDA socket.
+ * We make sure that the header space is enough, and we change ppp header
+ * according to flags passed by pppd.
+ * This is not a callback, but just a helper function used in ppp_irnet_send()
+ */
+static inline struct sk_buff *
+irnet_prepare_skb(irnet_socket *	ap,
+		  struct sk_buff *	skb)
+{
+  unsigned char *	data;
+  int			proto;		/* PPP protocol */
+  int			islcp;		/* Protocol == LCP */
+  int			needaddr;	/* Need PPP address */
+
+  DENTER(PPP_TRACE, "(ap=0x%p, skb=0x%p)\n",
+	 ap, skb);
+
+  /* Extract PPP protocol from the frame */
+  data  = skb->data;
+  proto = (data[0] << 8) + data[1];
+
+  /* LCP packets with codes between 1 (configure-request)
+   * and 7 (code-reject) must be sent as though no options
+   * have been negotiated. */
+  islcp = (proto == PPP_LCP) && (1 <= data[2]) && (data[2] <= 7);
+
+  /* compress protocol field if option enabled */
+  if((data[0] == 0) && (ap->flags & SC_COMP_PROT) && (!islcp))
+    skb_pull(skb,1);
+
+  /* Check if we need address/control fields */
+  needaddr = 2*((ap->flags & SC_COMP_AC) == 0 || islcp);
+
+  /* Is the skb headroom large enough to contain all IrDA-headers? */
+  if((skb_headroom(skb) < (ap->max_header_size + needaddr)) ||
+      (skb_shared(skb)))
+    {
+      struct sk_buff *	new_skb;
+
+      DEBUG(PPP_INFO, "Reallocating skb\n");
+
+      /* Create a new skb */
+      new_skb = skb_realloc_headroom(skb, ap->max_header_size + needaddr);
+
+      /* We have to free the original skb anyway */
+      dev_kfree_skb(skb);
+
+      /* Did the realloc succeed ? */
+      DABORT(new_skb == NULL, NULL, PPP_ERROR, "Could not realloc skb\n");
+
+      /* Use the new skb instead */
+      skb = new_skb;
+    }
+
+  /* prepend address/control fields if necessary */
+  if(needaddr)
+    {
+      skb_push(skb, 2);
+      skb->data[0] = PPP_ALLSTATIONS;
+      skb->data[1] = PPP_UI;
+    }
+
+  DEXIT(PPP_TRACE, "\n");
+
+  return skb;
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Send a packet to the peer over the IrTTP connection.
+ * Returns 1 iff the packet was accepted.
+ * Returns 0 iff packet was not consumed.
+ * If the packet was not accepted, we will call ppp_output_wakeup
+ * at some later time to reactivate flow control in ppp_generic.
+ */
+static int
+ppp_irnet_send(struct ppp_channel *	chan,
+	       struct sk_buff *		skb)
+{
+  irnet_socket *	self = (struct irnet_socket *) chan->private;
+  int			ret;
+
+  DENTER(PPP_TRACE, "(channel=0x%p, ap/self=0x%p)\n",
+	 chan, self);
+
+  /* Check if things are somewhat valid... */
+  DASSERT(self != NULL, 0, PPP_ERROR, "Self is NULL !!!\n");
+
+  /* Check if we are connected */
+  if(!(test_bit(0, &self->ttp_open)))
+    {
+#ifdef CONNECT_IN_SEND
+      /* Let's try to connect one more time... */
+      /* Note : we won't be connected after this call, but we should be
+       * ready for next packet... */
+      /* If we are already connecting, this will fail */
+      irda_irnet_connect(self);
+#endif /* CONNECT_IN_SEND */
+
+      DEBUG(PPP_INFO, "IrTTP not ready ! (%ld-%ld)\n",
+	    self->ttp_open, self->ttp_connect);
+
+      /* Note : we can either drop the packet or block the packet.
+       *
+       * Blocking the packet allow us a better connection time,
+       * because by calling ppp_output_wakeup() we can have
+       * ppp_generic resending the LCP request immediately to us,
+       * rather than waiting for one of pppd periodic transmission of
+       * LCP request.
+       *
+       * On the other hand, if we block all packet, all those periodic
+       * transmissions of pppd accumulate in ppp_generic, creating a
+       * backlog of LCP request. When we eventually connect later on,
+       * we have to transmit all this backlog before we can connect
+       * proper (if we don't timeout before).
+       *
+       * The current strategy is as follow :
+       * While we are attempting to connect, we block packets to get
+       * a better connection time.
+       * If we fail to connect, we drain the queue and start dropping packets
+       */
+#ifdef BLOCK_WHEN_CONNECT
+      /* If we are attempting to connect */
+      if(test_bit(0, &self->ttp_connect))
+	{
+	  /* Blocking packet, ppp_generic will retry later */
+	  return 0;
+	}
+#endif /* BLOCK_WHEN_CONNECT */
+
+      /* Dropping packet, pppd will retry later */
+      dev_kfree_skb(skb);
+      return 1;
+    }
+
+  /* Check if the queue can accept any packet, otherwise block */
+  if(self->tx_flow != FLOW_START)
+    DRETURN(0, PPP_INFO, "IrTTP queue full (%d skbs)...\n",
+	    skb_queue_len(&self->tsap->tx_queue));
+
+  /* Prepare ppp frame for transmission */
+  skb = irnet_prepare_skb(self, skb);
+  DABORT(skb == NULL, 1, PPP_ERROR, "Prepare skb for Tx failed.\n");
+
+  /* Send the packet to IrTTP */
+  ret = irttp_data_request(self->tsap, skb);
+  if(ret < 0)
+    {
+      /*   
+       * > IrTTPs tx queue is full, so we just have to
+       * > drop the frame! You might think that we should
+       * > just return -1 and don't deallocate the frame,
+       * > but that is dangerous since it's possible that
+       * > we have replaced the original skb with a new
+       * > one with larger headroom, and that would really
+       * > confuse do_dev_queue_xmit() in dev.c! I have
+       * > tried :-) DB 
+       * Correction : we verify the flow control above (self->tx_flow),
+       * so we come here only if IrTTP doesn't like the packet (empty,
+       * too large, IrTTP not connected). In those rare cases, it's ok
+       * to drop it, we don't want to see it here again...
+       * Jean II
+       */
+      DERROR(PPP_ERROR, "IrTTP doesn't like this packet !!! (0x%X)\n", ret);
+      /* irttp_data_request already free the packet */
+    }
+
+  DEXIT(PPP_TRACE, "\n");
+  return 1;	/* Packet has been consumed */
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Take care of the ioctls that ppp_generic doesn't want to deal with...
+ * Note : we are also called from dev_irnet_ioctl().
+ */
+static int
+ppp_irnet_ioctl(struct ppp_channel *	chan,
+		unsigned int		cmd,
+		unsigned long		arg)
+{
+  irnet_socket *	ap = (struct irnet_socket *) chan->private;
+  int			err;
+  int			val;
+  u32			accm[8];
+  void __user *argp = (void __user *)arg;
+
+  DENTER(PPP_TRACE, "(channel=0x%p, ap=0x%p, cmd=0x%X)\n",
+	 chan, ap, cmd);
+
+  /* Basic checks... */
+  DASSERT(ap != NULL, -ENXIO, PPP_ERROR, "ap is NULL...\n");
+
+  err = -EFAULT;
+  switch(cmd)
+    {
+      /* PPP flags */
+    case PPPIOCGFLAGS:
+      val = ap->flags | ap->rbits;
+      if(put_user(val, (int __user *) argp))
+	break;
+      err = 0;
+      break;
+    case PPPIOCSFLAGS:
+      if(get_user(val, (int __user *) argp))
+	break;
+      ap->flags = val & ~SC_RCV_BITS;
+      ap->rbits = val & SC_RCV_BITS;
+      err = 0;
+      break;
+
+      /* Async map stuff - all dummy to please pppd */
+    case PPPIOCGASYNCMAP:
+      if(put_user(ap->xaccm[0], (u32 __user *) argp))
+	break;
+      err = 0;
+      break;
+    case PPPIOCSASYNCMAP:
+      if(get_user(ap->xaccm[0], (u32 __user *) argp))
+	break;
+      err = 0;
+      break;
+    case PPPIOCGRASYNCMAP:
+      if(put_user(ap->raccm, (u32 __user *) argp))
+	break;
+      err = 0;
+      break;
+    case PPPIOCSRASYNCMAP:
+      if(get_user(ap->raccm, (u32 __user *) argp))
+	break;
+      err = 0;
+      break;
+    case PPPIOCGXASYNCMAP:
+      if(copy_to_user(argp, ap->xaccm, sizeof(ap->xaccm)))
+	break;
+      err = 0;
+      break;
+    case PPPIOCSXASYNCMAP:
+      if(copy_from_user(accm, argp, sizeof(accm)))
+	break;
+      accm[2] &= ~0x40000000U;		/* can't escape 0x5e */
+      accm[3] |= 0x60000000U;		/* must escape 0x7d, 0x7e */
+      memcpy(ap->xaccm, accm, sizeof(ap->xaccm));
+      err = 0;
+      break;
+
+      /* Max PPP frame size */
+    case PPPIOCGMRU:
+      if(put_user(ap->mru, (int __user *) argp))
+	break;
+      err = 0;
+      break;
+    case PPPIOCSMRU:
+      if(get_user(val, (int __user *) argp))
+	break;
+      if(val < PPP_MRU)
+	val = PPP_MRU;
+      ap->mru = val;
+      err = 0;
+      break;
+
+    default:
+      DEBUG(PPP_INFO, "Unsupported ioctl (0x%X)\n", cmd);
+      err = -ENOIOCTLCMD;
+    }
+
+  DEXIT(PPP_TRACE, " - err = 0x%X\n", err);
+  return err;
+}
+
+/************************** INITIALISATION **************************/
+/*
+ * Module initialisation and all that jazz...
+ */
+
+/*------------------------------------------------------------------*/
+/*
+ * Hook our device callbacks in the filesystem, to connect our code
+ * to /dev/irnet
+ */
+static inline int __init
+ppp_irnet_init(void)
+{
+  int err = 0;
+
+  DENTER(MODULE_TRACE, "()\n");
+
+  /* Allocate ourselves as a minor in the misc range */
+  err = misc_register(&irnet_misc_device);
+
+  DEXIT(MODULE_TRACE, "\n");
+  return err;
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Cleanup at exit...
+ */
+static inline void __exit
+ppp_irnet_cleanup(void)
+{
+  DENTER(MODULE_TRACE, "()\n");
+
+  /* De-allocate /dev/irnet minor in misc range */
+  misc_deregister(&irnet_misc_device);
+
+  DEXIT(MODULE_TRACE, "\n");
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Module main entry point
+ */
+int __init
+irnet_init(void)
+{
+  int err;
+
+  /* Initialise both parts... */
+  err = irda_irnet_init();
+  if(!err)
+    err = ppp_irnet_init();
+  return err;
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Module exit
+ */
+static void __exit
+irnet_cleanup(void)
+{
+  irda_irnet_cleanup();
+  ppp_irnet_cleanup();
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Module magic
+ */
+module_init(irnet_init);
+module_exit(irnet_cleanup);
+MODULE_AUTHOR("Jean Tourrilhes <jt@hpl.hp.com>");
+MODULE_DESCRIPTION("IrNET : Synchronous PPP over IrDA"); 
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_CHARDEV(10, 187);
diff --git a/net/irda/irnet/irnet_ppp.h b/net/irda/irnet/irnet_ppp.h
new file mode 100644
index 00000000000..d2beb7df8f7
--- /dev/null
+++ b/net/irda/irnet/irnet_ppp.h
@@ -0,0 +1,119 @@
+/*
+ *	IrNET protocol module : Synchronous PPP over an IrDA socket.
+ *
+ *		Jean II - HPL `00 - <jt@hpl.hp.com>
+ *
+ * This file contains all definitions and declarations necessary for the
+ * PPP part of the IrNET module.
+ * This file is a private header, so other modules don't want to know
+ * what's in there...
+ */
+
+#ifndef IRNET_PPP_H
+#define IRNET_PPP_H
+
+/***************************** INCLUDES *****************************/
+
+#include "irnet.h"		/* Module global include */
+
+/************************ CONSTANTS & MACROS ************************/
+
+/* /dev/irnet file constants */
+#define IRNET_MAJOR	10	/* Misc range */
+#define IRNET_MINOR	187	/* Official allocation */
+
+/* IrNET control channel stuff */
+#define IRNET_MAX_COMMAND	256	/* Max length of a command line */
+
+/* PPP hardcore stuff */
+
+/* Bits in rbits (PPP flags in irnet struct) */
+#define SC_RCV_BITS	(SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP)
+
+/* Bit numbers in busy */
+#define XMIT_BUSY	0
+#define RECV_BUSY	1
+#define XMIT_WAKEUP	2
+#define XMIT_FULL	3
+
+/* Queue management */
+#define PPPSYNC_MAX_RQLEN	32	/* arbitrary */
+
+/****************************** TYPES ******************************/
+
+
+/**************************** PROTOTYPES ****************************/
+
+/* ----------------------- CONTROL CHANNEL ----------------------- */
+static inline ssize_t
+	irnet_ctrl_write(irnet_socket *,
+			 const char *,
+			 size_t);
+static inline ssize_t
+	irnet_ctrl_read(irnet_socket *,
+			struct file *,
+			char *,
+			size_t);
+static inline unsigned int
+	irnet_ctrl_poll(irnet_socket *,
+			struct file *,
+			poll_table *);
+/* ----------------------- CHARACTER DEVICE ----------------------- */
+static int
+	dev_irnet_open(struct inode *,	/* fs callback : open */
+		       struct file *),
+	dev_irnet_close(struct inode *,
+			struct file *);
+static ssize_t
+	dev_irnet_write(struct file *,
+			const char __user *,
+			size_t,
+			loff_t *),
+	dev_irnet_read(struct file *,
+		       char __user *,
+		       size_t,
+		       loff_t *);
+static unsigned int
+	dev_irnet_poll(struct file *,
+		       poll_table *);
+static int
+	dev_irnet_ioctl(struct inode *,
+			struct file *,
+			unsigned int,
+			unsigned long);
+/* ------------------------ PPP INTERFACE ------------------------ */
+static inline struct sk_buff *
+	irnet_prepare_skb(irnet_socket *,
+			  struct sk_buff *);
+static int
+	ppp_irnet_send(struct ppp_channel *,
+		      struct sk_buff *);
+static int
+	ppp_irnet_ioctl(struct ppp_channel *,
+			unsigned int,
+			unsigned long);
+
+/**************************** VARIABLES ****************************/
+
+/* Filesystem callbacks (to call us) */
+static struct file_operations irnet_device_fops =
+{
+	.owner		= THIS_MODULE,
+	.read		= dev_irnet_read,
+	.write		= dev_irnet_write,
+	.poll		= dev_irnet_poll,
+	.ioctl		= dev_irnet_ioctl,
+	.open		= dev_irnet_open,
+	.release	= dev_irnet_close
+  /* Also : llseek, readdir, mmap, flush, fsync, fasync, lock, readv, writev */
+};
+
+/* Structure so that the misc major (drivers/char/misc.c) take care of us... */
+static struct miscdevice irnet_misc_device =
+{
+	IRNET_MINOR,
+	"irnet",
+	&irnet_device_fops
+};
+
+#endif /* IRNET_PPP_H */
diff --git a/net/irda/irproc.c b/net/irda/irproc.c
new file mode 100644
index 00000000000..88b9c43f637
--- /dev/null
+++ b/net/irda/irproc.c
@@ -0,0 +1,100 @@
+/*********************************************************************
+ *                
+ * Filename:      irproc.c
+ * Version:       1.0
+ * Description:   Various entries in the /proc file system
+ * Status:        Experimental.
+ * Author:        Thomas Davis, <ratbert@radiks.net>
+ * Created at:    Sat Feb 21 21:33:24 1998
+ * Modified at:   Sun Nov 14 08:54:54 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ *
+ *     Copyright (c) 1998-1999, Dag Brattli <dagb@cs.uit.no>
+ *     Copyright (c) 1998, Thomas Davis, <ratbert@radiks.net>, 
+ *     All Rights Reserved.
+ *      
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ *  
+ *     I, Thomas Davis, provide no warranty for any of this software. 
+ *     This material is provided "AS-IS" and at no charge. 
+ *     
+ ********************************************************************/
+
+#include <linux/miscdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <net/irda/irda.h>
+#include <net/irda/irlap.h>
+#include <net/irda/irlmp.h>
+
+extern struct file_operations discovery_seq_fops;
+extern struct file_operations irlap_seq_fops;
+extern struct file_operations irlmp_seq_fops;
+extern struct file_operations irttp_seq_fops;
+extern struct file_operations irias_seq_fops;
+
+struct irda_entry {
+	const char *name;
+	struct file_operations *fops;
+};
+
+struct proc_dir_entry *proc_irda;
+EXPORT_SYMBOL(proc_irda);
+ 
+static struct irda_entry irda_dirs[] = {
+	{"discovery",	&discovery_seq_fops},
+	{"irttp",	&irttp_seq_fops},
+	{"irlmp",	&irlmp_seq_fops},
+	{"irlap",	&irlap_seq_fops},
+	{"irias",	&irias_seq_fops},
+};
+
+/*
+ * Function irda_proc_register (void)
+ *
+ *    Register irda entry in /proc file system
+ *
+ */
+void __init irda_proc_register(void) 
+{
+	int i;
+	struct proc_dir_entry *d;
+
+	proc_irda = proc_mkdir("irda", proc_net);
+	if (proc_irda == NULL)
+		return;
+	proc_irda->owner = THIS_MODULE;
+
+	for (i=0; i<ARRAY_SIZE(irda_dirs); i++) {
+		d = create_proc_entry(irda_dirs[i].name, 0, proc_irda);
+		if (d) 
+			d->proc_fops = irda_dirs[i].fops;
+	}
+}
+
+/*
+ * Function irda_proc_unregister (void)
+ *
+ *    Unregister irda entry in /proc file system
+ *
+ */
+void __exit irda_proc_unregister(void) 
+{
+	int i;
+
+        if (proc_irda) {
+                for (i=0; i<ARRAY_SIZE(irda_dirs); i++)
+                        remove_proc_entry(irda_dirs[i].name, proc_irda);
+
+                remove_proc_entry("irda", proc_net);
+                proc_irda = NULL;
+        }
+}
+
+
diff --git a/net/irda/irqueue.c b/net/irda/irqueue.c
new file mode 100644
index 00000000000..b0dd3ea3599
--- /dev/null
+++ b/net/irda/irqueue.c
@@ -0,0 +1,915 @@
+/*********************************************************************
+ *                
+ * Filename:      irqueue.c
+ * Version:       0.3
+ * Description:   General queue implementation
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Tue Jun  9 13:29:31 1998
+ * Modified at:   Sun Dec 12 13:48:22 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * Modified at:   Thu Jan  4 14:29:10 CET 2001
+ * Modified by:   Marc Zyngier <mzyngier@freesurf.fr>
+ * 
+ *     Copyright (C) 1998-1999, Aage Kvalnes <aage@cs.uit.no>
+ *     Copyright (C) 1998, Dag Brattli, 
+ *     All Rights Reserved.
+ *
+ *     This code is taken from the Vortex Operating System written by Aage
+ *     Kvalnes. Aage has agreed that this code can use the GPL licence,
+ *     although he does not use that licence in his own code.
+ *     
+ *     This copyright does however _not_ include the ELF hash() function
+ *     which I currently don't know which licence or copyright it
+ *     has. Please inform me if you know.
+ *      
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ *  
+ *     Neither Dag Brattli nor University of Troms� admit liability nor
+ *     provide warranty for any of this software. This material is 
+ *     provided "AS-IS" and at no charge.
+ *     
+ ********************************************************************/
+
+/*
+ * NOTE :
+ * There are various problems with this package :
+ *	o the hash function for ints is pathetic (but could be changed)
+ *	o locking is sometime suspicious (especially during enumeration)
+ *	o most users have only a few elements (== overhead)
+ *	o most users never use seach, so don't benefit from hashing
+ * Problem already fixed :
+ *	o not 64 bit compliant (most users do hashv = (int) self)
+ *	o hashbin_remove() is broken => use hashbin_remove_this()
+ * I think most users would be better served by a simple linked list
+ * (like include/linux/list.h) with a global spinlock per list.
+ * Jean II
+ */
+
+/*
+ * Notes on the concurrent access to hashbin and other SMP issues
+ * -------------------------------------------------------------
+ *	Hashbins are very often in the IrDA stack a global repository of
+ * information, and therefore used in a very asynchronous manner following
+ * various events (driver calls, timers, user calls...).
+ *	Therefore, very often it is highly important to consider the
+ * management of concurrent access to the hashbin and how to guarantee the
+ * consistency of the operations on it.
+ *
+ *	First, we need to define the objective of locking :
+ *		1) Protect user data (content pointed by the hashbin)
+ *		2) Protect hashbin structure itself (linked list in each bin)
+ *
+ *			     OLD LOCKING
+ *			     -----------
+ *
+ *	The previous locking strategy, either HB_LOCAL or HB_GLOBAL were
+ * both inadequate in *both* aspect.
+ *		o HB_GLOBAL was using a spinlock for each bin (local locking).
+ *		o HB_LOCAL was disabling irq on *all* CPUs, so use a single
+ *		  global semaphore.
+ *	The problems were :
+ *		A) Global irq disabling is no longer supported by the kernel
+ *		B) No protection for the hashbin struct global data
+ *			o hashbin_delete()
+ *			o hb_current
+ *		C) No protection for user data in some cases
+ *
+ *	A) HB_LOCAL use global irq disabling, so doesn't work on kernel
+ * 2.5.X. Even when it is supported (kernel 2.4.X and earlier), its
+ * performance is not satisfactory on SMP setups. Most hashbins were
+ * HB_LOCAL, so (A) definitely need fixing.
+ *	B) HB_LOCAL could be modified to fix (B). However, because HB_GLOBAL
+ * lock only the individual bins, it will never be able to lock the
+ * global data, so can't do (B).
+ *	C) Some functions return pointer to data that is still in the
+ * hashbin :
+ *		o hashbin_find()
+ *		o hashbin_get_first()
+ *		o hashbin_get_next()
+ *	As the data is still in the hashbin, it may be changed or free'd
+ * while the caller is examinimg the data. In those case, locking can't
+ * be done within the hashbin, but must include use of the data within
+ * the caller.
+ *	The caller can easily do this with HB_LOCAL (just disable irqs).
+ * However, this is impossible with HB_GLOBAL because the caller has no
+ * way to know the proper bin, so don't know which spinlock to use.
+ *
+ *	Quick summary : can no longer use HB_LOCAL, and HB_GLOBAL is
+ * fundamentally broken and will never work.
+ *
+ *			     NEW LOCKING
+ *			     -----------
+ *
+ *	To fix those problems, I've introduce a few changes in the
+ * hashbin locking :
+ *		1) New HB_LOCK scheme
+ *		2) hashbin->hb_spinlock
+ *		3) New hashbin usage policy
+ *
+ * HB_LOCK :
+ * -------
+ *	HB_LOCK is a locking scheme intermediate between the old HB_LOCAL
+ * and HB_GLOBAL. It uses a single spinlock to protect the whole content
+ * of the hashbin. As it is a single spinlock, it can protect the global
+ * data of the hashbin and not only the bins themselves.
+ *	HB_LOCK can only protect some of the hashbin calls, so it only lock
+ * call that can be made 100% safe and leave other call unprotected.
+ *	HB_LOCK in theory is slower than HB_GLOBAL, but as the hashbin
+ * content is always small contention is not high, so it doesn't matter
+ * much. HB_LOCK is probably faster than HB_LOCAL.
+ *
+ * hashbin->hb_spinlock :
+ * --------------------
+ *	The spinlock that HB_LOCK uses is available for caller, so that
+ * the caller can protect unprotected calls (see below).
+ *	If the caller want to do entirely its own locking (HB_NOLOCK), he
+ * can do so and may use safely this spinlock.
+ *	Locking is done like this :
+ *		spin_lock_irqsave(&hashbin->hb_spinlock, flags);
+ *	Releasing the lock :
+ *		spin_unlock_irqrestore(&hashbin->hb_spinlock, flags);
+ *
+ * Safe & Protected calls :
+ * ----------------------
+ *	The following calls are safe or protected via HB_LOCK :
+ *		o hashbin_new()		-> safe
+ *		o hashbin_delete()
+ *		o hashbin_insert()
+ *		o hashbin_remove_first()
+ *		o hashbin_remove()
+ *		o hashbin_remove_this()
+ *		o HASHBIN_GET_SIZE()	-> atomic
+ *
+ *	The following calls only protect the hashbin itself :
+ *		o hashbin_lock_find()
+ *		o hashbin_find_next()
+ *
+ * Unprotected calls :
+ * -----------------
+ *	The following calls need to be protected by the caller :
+ *		o hashbin_find()
+ *		o hashbin_get_first()
+ *		o hashbin_get_next()
+ *
+ * Locking Policy :
+ * --------------
+ *	If the hashbin is used only in a single thread of execution
+ * (explicitly or implicitely), you can use HB_NOLOCK
+ *	If the calling module already provide concurrent access protection,
+ * you may use HB_NOLOCK.
+ *
+ *	In all other cases, you need to use HB_LOCK and lock the hashbin
+ * every time before calling one of the unprotected calls. You also must
+ * use the pointer returned by the unprotected call within the locked
+ * region.
+ *
+ * Extra care for enumeration :
+ * --------------------------
+ *	hashbin_get_first() and hashbin_get_next() use the hashbin to
+ * store the current position, in hb_current.
+ *	As long as the hashbin remains locked, this is safe. If you unlock
+ * the hashbin, the current position may change if anybody else modify
+ * or enumerate the hashbin.
+ *	Summary : do the full enumeration while locked.
+ *
+ *	Alternatively, you may use hashbin_find_next(). But, this will
+ * be slower, is more complex to use and doesn't protect the hashbin
+ * content. So, care is needed here as well.
+ *
+ * Other issues :
+ * ------------
+ *	I believe that we are overdoing it by using spin_lock_irqsave()
+ * and we should use only spin_lock_bh() or similar. But, I don't have
+ * the balls to try it out.
+ *	Don't believe that because hashbin are now (somewhat) SMP safe
+ * that the rest of the code is. Higher layers tend to be safest,
+ * but LAP and LMP would need some serious dedicated love.
+ *
+ * Jean II
+ */
+#include <linux/module.h>
+
+#include <net/irda/irda.h>
+#include <net/irda/irqueue.h>
+
+/************************ QUEUE SUBROUTINES ************************/
+
+/*
+ * Hashbin
+ */
+#define GET_HASHBIN(x) ( x & HASHBIN_MASK )
+
+/*
+ * Function hash (name)
+ *
+ *    This function hash the input string 'name' using the ELF hash
+ *    function for strings.
+ */
+static __u32 hash( const char* name)
+{
+	__u32 h = 0;
+	__u32 g;
+	
+	while(*name) {
+		h = (h<<4) + *name++;
+		if ((g = (h & 0xf0000000)))
+			h ^=g>>24;
+		h &=~g;
+	}
+	return h;
+}
+
+/*
+ * Function enqueue_first (queue, proc)
+ *
+ *    Insert item first in queue.
+ *
+ */
+static void enqueue_first(irda_queue_t **queue, irda_queue_t* element)
+{
+	
+	IRDA_DEBUG( 4, "%s()\n", __FUNCTION__);
+
+	/*
+	 * Check if queue is empty.
+	 */
+	if ( *queue == NULL ) {
+		/*
+		 * Queue is empty.  Insert one element into the queue.
+		 */
+		element->q_next = element->q_prev = *queue = element;
+		
+	} else {
+		/*
+		 * Queue is not empty.  Insert element into front of queue.
+		 */
+		element->q_next          = (*queue);
+		(*queue)->q_prev->q_next = element;
+		element->q_prev          = (*queue)->q_prev;
+		(*queue)->q_prev         = element;
+		(*queue)                 = element;
+	}
+}
+
+
+/*
+ * Function dequeue (queue)
+ *
+ *    Remove first entry in queue
+ *
+ */
+static irda_queue_t *dequeue_first(irda_queue_t **queue)
+{
+	irda_queue_t *ret;
+
+	IRDA_DEBUG( 4, "dequeue_first()\n");
+	
+	/*
+	 * Set return value
+	 */
+	ret =  *queue;
+	
+	if ( *queue == NULL ) {
+		/*
+		 * Queue was empty.
+		 */
+	} else if ( (*queue)->q_next == *queue ) {
+		/* 
+		 *  Queue only contained a single element. It will now be
+		 *  empty.  
+		 */
+		*queue = NULL;
+	} else {
+		/*
+		 * Queue contained several element.  Remove the first one.
+		 */
+		(*queue)->q_prev->q_next = (*queue)->q_next;
+		(*queue)->q_next->q_prev = (*queue)->q_prev;
+		*queue = (*queue)->q_next;
+	}
+	
+	/*
+	 * Return the removed entry (or NULL of queue was empty).
+	 */
+	return ret;
+}
+
+/*
+ * Function dequeue_general (queue, element)
+ *
+ *
+ */
+static irda_queue_t *dequeue_general(irda_queue_t **queue, irda_queue_t* element)
+{
+	irda_queue_t *ret;
+	
+	IRDA_DEBUG( 4, "dequeue_general()\n");
+	
+	/*
+	 * Set return value
+	 */
+	ret =  *queue;
+		
+	if ( *queue == NULL ) {
+		/*
+		 * Queue was empty.
+		 */
+	} else if ( (*queue)->q_next == *queue ) {
+		/* 
+		 *  Queue only contained a single element. It will now be
+		 *  empty.  
+		 */
+		*queue = NULL;
+		
+	} else {
+		/*
+		 *  Remove specific element.
+		 */
+		element->q_prev->q_next = element->q_next;
+		element->q_next->q_prev = element->q_prev;
+		if ( (*queue) == element)
+			(*queue) = element->q_next;
+	}
+	
+	/*
+	 * Return the removed entry (or NULL of queue was empty).
+	 */
+	return ret;
+}
+
+/************************ HASHBIN MANAGEMENT ************************/
+
+/*
+ * Function hashbin_create ( type, name )
+ *
+ *    Create hashbin!
+ *
+ */
+hashbin_t *hashbin_new(int type)
+{
+	hashbin_t* hashbin;
+	
+	/*
+	 * Allocate new hashbin
+	 */
+	hashbin = kmalloc( sizeof(hashbin_t), GFP_ATOMIC);
+	if (!hashbin)
+		return NULL;
+
+	/*
+	 * Initialize structure
+	 */
+	memset(hashbin, 0, sizeof(hashbin_t));
+	hashbin->hb_type = type;
+	hashbin->magic = HB_MAGIC;
+	//hashbin->hb_current = NULL;
+
+	/* Make sure all spinlock's are unlocked */
+	if ( hashbin->hb_type & HB_LOCK ) {
+		spin_lock_init(&hashbin->hb_spinlock);
+	}
+
+	return hashbin;
+}
+EXPORT_SYMBOL(hashbin_new);
+
+
+/*
+ * Function hashbin_delete (hashbin, free_func)
+ *
+ *    Destroy hashbin, the free_func can be a user supplied special routine 
+ *    for deallocating this structure if it's complex. If not the user can 
+ *    just supply kfree, which should take care of the job.
+ */
+int hashbin_delete( hashbin_t* hashbin, FREE_FUNC free_func)
+{
+	irda_queue_t* queue;
+	unsigned long flags = 0;
+	int i;
+
+	IRDA_ASSERT(hashbin != NULL, return -1;);
+	IRDA_ASSERT(hashbin->magic == HB_MAGIC, return -1;);
+	
+	/* Synchronize */
+	if ( hashbin->hb_type & HB_LOCK ) {
+		spin_lock_irqsave(&hashbin->hb_spinlock, flags);
+	}
+
+	/*
+	 *  Free the entries in the hashbin, TODO: use hashbin_clear when
+	 *  it has been shown to work
+	 */
+	for (i = 0; i < HASHBIN_SIZE; i ++ ) {
+		queue = dequeue_first((irda_queue_t**) &hashbin->hb_queue[i]);
+		while (queue ) {
+			if (free_func)
+				(*free_func)(queue);
+			queue = dequeue_first( 
+				(irda_queue_t**) &hashbin->hb_queue[i]);
+		}
+	}
+	
+	/* Cleanup local data */
+	hashbin->hb_current = NULL;
+	hashbin->magic = ~HB_MAGIC;
+
+	/* Release lock */
+	if ( hashbin->hb_type & HB_LOCK) {
+		spin_unlock_irqrestore(&hashbin->hb_spinlock, flags);
+	}
+
+	/*
+	 *  Free the hashbin structure
+	 */
+	kfree(hashbin);
+
+	return 0;
+}
+EXPORT_SYMBOL(hashbin_delete);
+
+/********************* HASHBIN LIST OPERATIONS *********************/
+
+/*
+ * Function hashbin_insert (hashbin, entry, name)
+ *
+ *    Insert an entry into the hashbin
+ *
+ */
+void hashbin_insert(hashbin_t* hashbin, irda_queue_t* entry, long hashv, 
+		    const char* name)
+{
+	unsigned long flags = 0;
+	int bin;
+
+	IRDA_DEBUG( 4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT( hashbin != NULL, return;);
+	IRDA_ASSERT( hashbin->magic == HB_MAGIC, return;);
+
+	/*
+	 * Locate hashbin
+	 */
+	if ( name )
+		hashv = hash( name );
+	bin = GET_HASHBIN( hashv );
+
+	/* Synchronize */
+	if ( hashbin->hb_type & HB_LOCK ) {
+		spin_lock_irqsave(&hashbin->hb_spinlock, flags);
+	} /* Default is no-lock  */
+	
+	/*
+	 * Store name and key
+	 */
+	entry->q_hash = hashv;
+	if ( name )
+		strlcpy( entry->q_name, name, sizeof(entry->q_name));
+	
+	/*
+	 * Insert new entry first
+	 */
+	enqueue_first( (irda_queue_t**) &hashbin->hb_queue[ bin ],
+		       entry);
+	hashbin->hb_size++;
+
+	/* Release lock */
+	if ( hashbin->hb_type & HB_LOCK ) {
+		spin_unlock_irqrestore(&hashbin->hb_spinlock, flags);
+	} /* Default is no-lock  */
+}
+EXPORT_SYMBOL(hashbin_insert);
+
+/* 
+ *  Function hashbin_remove_first (hashbin)
+ *
+ *    Remove first entry of the hashbin
+ *
+ * Note : this function no longer use hashbin_remove(), but does things
+ * similar to hashbin_remove_this(), so can be considered safe.
+ * Jean II
+ */
+void *hashbin_remove_first( hashbin_t *hashbin)
+{
+	unsigned long flags = 0;
+	irda_queue_t *entry = NULL;
+
+	/* Synchronize */
+	if ( hashbin->hb_type & HB_LOCK ) {
+		spin_lock_irqsave(&hashbin->hb_spinlock, flags);
+	} /* Default is no-lock  */
+
+	entry = hashbin_get_first( hashbin);
+	if ( entry != NULL) {
+		int	bin;
+		long	hashv;
+		/*
+		 * Locate hashbin
+		 */
+		hashv = entry->q_hash;
+		bin = GET_HASHBIN( hashv );
+
+		/*
+		 * Dequeue the entry...
+		 */
+		dequeue_general( (irda_queue_t**) &hashbin->hb_queue[ bin ],
+				 (irda_queue_t*) entry );
+		hashbin->hb_size--;
+		entry->q_next = NULL;
+		entry->q_prev = NULL;
+
+		/*
+		 *  Check if this item is the currently selected item, and in
+		 *  that case we must reset hb_current
+		 */
+		if ( entry == hashbin->hb_current)
+			hashbin->hb_current = NULL;
+	}
+
+	/* Release lock */
+	if ( hashbin->hb_type & HB_LOCK ) {
+		spin_unlock_irqrestore(&hashbin->hb_spinlock, flags);
+	} /* Default is no-lock  */
+
+	return entry;
+}
+
+
+/* 
+ *  Function hashbin_remove (hashbin, hashv, name)
+ *
+ *    Remove entry with the given name
+ *
+ *  The use of this function is highly discouraged, because the whole
+ *  concept behind hashbin_remove() is broken. In many cases, it's not
+ *  possible to guarantee the unicity of the index (either hashv or name),
+ *  leading to removing the WRONG entry.
+ *  The only simple safe use is :
+ *		hashbin_remove(hasbin, (int) self, NULL);
+ *  In other case, you must think hard to guarantee unicity of the index.
+ *  Jean II
+ */
+void* hashbin_remove( hashbin_t* hashbin, long hashv, const char* name)
+{
+	int bin, found = FALSE;
+	unsigned long flags = 0;
+	irda_queue_t* entry;
+
+	IRDA_DEBUG( 4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT( hashbin != NULL, return NULL;);
+	IRDA_ASSERT( hashbin->magic == HB_MAGIC, return NULL;);
+	
+	/*
+	 * Locate hashbin
+	 */
+	if ( name )
+		hashv = hash( name );
+	bin = GET_HASHBIN( hashv );
+
+	/* Synchronize */
+	if ( hashbin->hb_type & HB_LOCK ) {
+		spin_lock_irqsave(&hashbin->hb_spinlock, flags);
+	} /* Default is no-lock  */
+
+	/*
+	 * Search for entry
+	 */
+	entry = hashbin->hb_queue[ bin ];
+	if ( entry ) {
+		do {
+			/*
+			 * Check for key
+			 */
+			if ( entry->q_hash == hashv ) {
+				/*
+				 * Name compare too?
+				 */
+				if ( name ) {
+					if ( strcmp( entry->q_name, name) == 0)
+					{
+						found = TRUE;
+						break;
+					}
+				} else {
+					found = TRUE;
+					break;
+				}
+			}
+			entry = entry->q_next;
+		} while ( entry != hashbin->hb_queue[ bin ] );
+	}
+	
+	/*
+	 * If entry was found, dequeue it
+	 */
+	if ( found ) {
+		dequeue_general( (irda_queue_t**) &hashbin->hb_queue[ bin ],
+				 (irda_queue_t*) entry );
+		hashbin->hb_size--;
+
+		/*
+		 *  Check if this item is the currently selected item, and in
+		 *  that case we must reset hb_current
+		 */
+		if ( entry == hashbin->hb_current)
+			hashbin->hb_current = NULL;
+	}
+
+	/* Release lock */
+	if ( hashbin->hb_type & HB_LOCK ) {
+		spin_unlock_irqrestore(&hashbin->hb_spinlock, flags);
+	} /* Default is no-lock  */
+       
+	
+	/* Return */
+	if ( found ) 
+		return entry;
+	else
+		return NULL;
+	
+}
+EXPORT_SYMBOL(hashbin_remove);
+
+/* 
+ *  Function hashbin_remove_this (hashbin, entry)
+ *
+ *    Remove entry with the given name
+ *
+ * In some cases, the user of hashbin can't guarantee the unicity
+ * of either the hashv or name.
+ * In those cases, using the above function is guaranteed to cause troubles,
+ * so we use this one instead...
+ * And by the way, it's also faster, because we skip the search phase ;-)
+ */
+void* hashbin_remove_this( hashbin_t* hashbin, irda_queue_t* entry)
+{
+	unsigned long flags = 0;
+	int	bin;
+	long	hashv;
+
+	IRDA_DEBUG( 4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT( hashbin != NULL, return NULL;);
+	IRDA_ASSERT( hashbin->magic == HB_MAGIC, return NULL;);
+	IRDA_ASSERT( entry != NULL, return NULL;);
+	
+	/* Synchronize */
+	if ( hashbin->hb_type & HB_LOCK ) {
+		spin_lock_irqsave(&hashbin->hb_spinlock, flags);
+	} /* Default is no-lock  */
+
+	/* Check if valid and not already removed... */
+	if((entry->q_next == NULL) || (entry->q_prev == NULL)) {
+		entry = NULL;
+		goto out;
+	}
+
+	/*
+	 * Locate hashbin
+	 */
+	hashv = entry->q_hash;
+	bin = GET_HASHBIN( hashv );
+
+	/*
+	 * Dequeue the entry...
+	 */
+	dequeue_general( (irda_queue_t**) &hashbin->hb_queue[ bin ],
+			 (irda_queue_t*) entry );
+	hashbin->hb_size--;
+	entry->q_next = NULL;
+	entry->q_prev = NULL;
+
+	/*
+	 *  Check if this item is the currently selected item, and in
+	 *  that case we must reset hb_current
+	 */
+	if ( entry == hashbin->hb_current)
+		hashbin->hb_current = NULL;
+out:
+	/* Release lock */
+	if ( hashbin->hb_type & HB_LOCK ) {
+		spin_unlock_irqrestore(&hashbin->hb_spinlock, flags);
+	} /* Default is no-lock  */
+
+	return entry;
+}
+EXPORT_SYMBOL(hashbin_remove_this);
+
+/*********************** HASHBIN ENUMERATION ***********************/
+
+/*
+ * Function hashbin_common_find (hashbin, hashv, name)
+ *
+ *    Find item with the given hashv or name
+ *
+ */
+void* hashbin_find( hashbin_t* hashbin, long hashv, const char* name )
+{
+	int bin;
+	irda_queue_t* entry;
+
+	IRDA_DEBUG( 4, "hashbin_find()\n");
+
+	IRDA_ASSERT( hashbin != NULL, return NULL;);
+	IRDA_ASSERT( hashbin->magic == HB_MAGIC, return NULL;);
+
+	/*
+	 * Locate hashbin
+	 */
+	if ( name )
+		hashv = hash( name );
+	bin = GET_HASHBIN( hashv );
+	
+	/*
+	 * Search for entry
+	 */
+	entry = hashbin->hb_queue[ bin];
+	if ( entry ) {
+		do {
+			/*
+			 * Check for key
+			 */
+			if ( entry->q_hash == hashv ) {
+				/*
+				 * Name compare too?
+				 */
+				if ( name ) {
+					if ( strcmp( entry->q_name, name ) == 0 ) {
+						return entry;
+					}
+				} else {
+					return entry;
+				}
+			}
+			entry = entry->q_next;
+		} while ( entry != hashbin->hb_queue[ bin ] );
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL(hashbin_find);
+
+/*
+ * Function hashbin_lock_find (hashbin, hashv, name)
+ *
+ *    Find item with the given hashv or name
+ *
+ * Same, but with spinlock protection...
+ * I call it safe, but it's only safe with respect to the hashbin, not its
+ * content. - Jean II
+ */
+void* hashbin_lock_find( hashbin_t* hashbin, long hashv, const char* name )
+{
+	unsigned long flags = 0;
+	irda_queue_t* entry;
+
+	/* Synchronize */
+	spin_lock_irqsave(&hashbin->hb_spinlock, flags);
+
+	/*
+	 * Search for entry
+	 */
+	entry = (irda_queue_t* ) hashbin_find( hashbin, hashv, name );
+
+	/* Release lock */
+	spin_unlock_irqrestore(&hashbin->hb_spinlock, flags);
+
+	return entry;
+}
+EXPORT_SYMBOL(hashbin_lock_find);
+
+/*
+ * Function hashbin_find (hashbin, hashv, name, pnext)
+ *
+ *    Find an item with the given hashv or name, and its successor
+ *
+ * This function allow to do concurrent enumerations without the
+ * need to lock over the whole session, because the caller keep the
+ * context of the search. On the other hand, it might fail and return
+ * NULL if the entry is removed. - Jean II
+ */
+void* hashbin_find_next( hashbin_t* hashbin, long hashv, const char* name,
+			 void ** pnext)
+{
+	unsigned long flags = 0;
+	irda_queue_t* entry;
+
+	/* Synchronize */
+	spin_lock_irqsave(&hashbin->hb_spinlock, flags);
+
+	/*
+	 * Search for current entry
+	 * This allow to check if the current item is still in the
+	 * hashbin or has been removed.
+	 */
+	entry = (irda_queue_t* ) hashbin_find( hashbin, hashv, name );
+
+	/*
+	 * Trick hashbin_get_next() to return what we want
+	 */
+	if(entry) {
+		hashbin->hb_current = entry;
+		*pnext = hashbin_get_next( hashbin );
+	} else
+		*pnext = NULL;
+
+	/* Release lock */
+	spin_unlock_irqrestore(&hashbin->hb_spinlock, flags);
+
+	return entry;
+}
+EXPORT_SYMBOL(hashbin_find_next);
+
+/*
+ * Function hashbin_get_first (hashbin)
+ *
+ *    Get a pointer to first element in hashbin, this function must be
+ *    called before any calls to hashbin_get_next()!
+ *
+ */
+irda_queue_t *hashbin_get_first( hashbin_t* hashbin) 
+{
+	irda_queue_t *entry;
+	int i;
+
+	IRDA_ASSERT( hashbin != NULL, return NULL;);
+	IRDA_ASSERT( hashbin->magic == HB_MAGIC, return NULL;);
+
+	if ( hashbin == NULL)
+		return NULL;
+
+	for ( i = 0; i < HASHBIN_SIZE; i ++ ) {
+		entry = hashbin->hb_queue[ i];
+		if ( entry) {
+			hashbin->hb_current = entry;
+			return entry;
+		}
+	}
+	/*
+	 *  Did not find any item in hashbin
+	 */
+	return NULL;
+}
+EXPORT_SYMBOL(hashbin_get_first);
+
+/*
+ * Function hashbin_get_next (hashbin)
+ *
+ *    Get next item in hashbin. A series of hashbin_get_next() calls must
+ *    be started by a call to hashbin_get_first(). The function returns
+ *    NULL when all items have been traversed
+ * 
+ * The context of the search is stored within the hashbin, so you must
+ * protect yourself from concurrent enumerations. - Jean II
+ */
+irda_queue_t *hashbin_get_next( hashbin_t *hashbin)
+{
+	irda_queue_t* entry;
+	int bin;
+	int i;
+
+	IRDA_ASSERT( hashbin != NULL, return NULL;);
+	IRDA_ASSERT( hashbin->magic == HB_MAGIC, return NULL;);
+
+	if ( hashbin->hb_current == NULL) {
+		IRDA_ASSERT( hashbin->hb_current != NULL, return NULL;);
+		return NULL;
+	}	
+	entry = hashbin->hb_current->q_next;
+	bin = GET_HASHBIN( entry->q_hash);
+
+	/*  
+	 *  Make sure that we are not back at the beginning of the queue
+	 *  again 
+	 */
+	if ( entry != hashbin->hb_queue[ bin ]) {
+		hashbin->hb_current = entry;
+
+		return entry;
+	}
+
+	/*
+	 *  Check that this is not the last queue in hashbin
+	 */
+	if ( bin >= HASHBIN_SIZE)
+		return NULL;
+	
+	/*
+	 *  Move to next queue in hashbin
+	 */
+	bin++;
+	for ( i = bin; i < HASHBIN_SIZE; i++ ) {
+		entry = hashbin->hb_queue[ i];
+		if ( entry) {
+			hashbin->hb_current = entry;
+			
+			return entry;
+		}
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(hashbin_get_next);
diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c
new file mode 100644
index 00000000000..1b1c4193359
--- /dev/null
+++ b/net/irda/irsysctl.c
@@ -0,0 +1,297 @@
+/*********************************************************************
+ *                
+ * Filename:      irsysctl.c
+ * Version:       1.0
+ * Description:   Sysctl interface for IrDA
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Sun May 24 22:12:06 1998
+ * Modified at:   Fri Jun  4 02:50:15 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1997, 1999 Dag Brattli, All Rights Reserved.
+ *     Copyright (c) 2000-2001 Jean Tourrilhes <jt@hpl.hp.com>
+ *      
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ *  
+ *     Neither Dag Brattli nor University of Troms� admit liability nor
+ *     provide warranty for any of this software. This material is 
+ *     provided "AS-IS" and at no charge.
+ *     
+ ********************************************************************/
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/ctype.h>
+#include <linux/sysctl.h>
+#include <linux/init.h>
+
+#include <net/irda/irda.h>		/* irda_debug */
+#include <net/irda/irias_object.h>
+
+#define NET_IRDA 412 /* Random number */
+enum { DISCOVERY=1, DEVNAME, DEBUG, FAST_POLL, DISCOVERY_SLOTS,
+       DISCOVERY_TIMEOUT, SLOT_TIMEOUT, MAX_BAUD_RATE, MIN_TX_TURN_TIME,
+       MAX_TX_DATA_SIZE, MAX_TX_WINDOW, MAX_NOREPLY_TIME, WARN_NOREPLY_TIME,
+       LAP_KEEPALIVE_TIME };
+
+extern int  sysctl_discovery;
+extern int  sysctl_discovery_slots;
+extern int  sysctl_discovery_timeout;
+extern int  sysctl_slot_timeout;
+extern int  sysctl_fast_poll_increase;
+extern char sysctl_devname[];
+extern int  sysctl_max_baud_rate;
+extern int  sysctl_min_tx_turn_time;
+extern int  sysctl_max_tx_data_size;
+extern int  sysctl_max_tx_window;
+extern int  sysctl_max_noreply_time;
+extern int  sysctl_warn_noreply_time;
+extern int  sysctl_lap_keepalive_time;
+
+/* this is needed for the proc_dointvec_minmax - Jean II */
+static int max_discovery_slots = 16;		/* ??? */
+static int min_discovery_slots = 1;
+/* IrLAP 6.13.2 says 25ms to 10+70ms - allow higher since some devices
+ * seems to require it. (from Dag's comment) */
+static int max_slot_timeout = 160;
+static int min_slot_timeout = 20;
+static int max_max_baud_rate = 16000000;	/* See qos.c - IrLAP spec */
+static int min_max_baud_rate = 2400;
+static int max_min_tx_turn_time = 10000;	/* See qos.c - IrLAP spec */
+static int min_min_tx_turn_time;
+static int max_max_tx_data_size = 2048;		/* See qos.c - IrLAP spec */
+static int min_max_tx_data_size = 64;
+static int max_max_tx_window = 7;		/* See qos.c - IrLAP spec */
+static int min_max_tx_window = 1;
+static int max_max_noreply_time = 40;		/* See qos.c - IrLAP spec */
+static int min_max_noreply_time = 3;
+static int max_warn_noreply_time = 3;		/* 3s == standard */
+static int min_warn_noreply_time = 1;		/* 1s == min WD_TIMER */
+static int max_lap_keepalive_time = 10000;	/* 10s */
+static int min_lap_keepalive_time = 100;	/* 100us */
+/* For other sysctl, I've no idea of the range. Maybe Dag could help
+ * us on that - Jean II */
+
+static int do_devname(ctl_table *table, int write, struct file *filp,
+		      void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int ret;
+
+	ret = proc_dostring(table, write, filp, buffer, lenp, ppos);
+	if (ret == 0 && write) {
+		struct ias_value *val;
+
+		val = irias_new_string_value(sysctl_devname);
+		if (val)
+			irias_object_change_attribute("Device", "DeviceName", val);
+	}
+	return ret;
+}
+
+/* One file */
+static ctl_table irda_table[] = {
+	{
+		.ctl_name	= DISCOVERY,
+		.procname	= "discovery",
+		.data		= &sysctl_discovery,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= DEVNAME,
+		.procname	= "devname",
+		.data		= sysctl_devname,
+		.maxlen		= 65,
+		.mode		= 0644,
+		.proc_handler	= &do_devname,
+		.strategy	= &sysctl_string
+	},
+#ifdef CONFIG_IRDA_DEBUG
+        {
+		.ctl_name	= DEBUG,
+		.procname	= "debug",
+		.data		= &irda_debug,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+#endif
+#ifdef CONFIG_IRDA_FAST_RR
+        {
+		.ctl_name	= FAST_POLL,
+		.procname	= "fast_poll_increase",
+		.data		= &sysctl_fast_poll_increase,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+#endif
+	{
+		.ctl_name	= DISCOVERY_SLOTS,
+		.procname	= "discovery_slots",
+		.data		= &sysctl_discovery_slots,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_discovery_slots,
+		.extra2		= &max_discovery_slots
+	},
+	{
+		.ctl_name	= DISCOVERY_TIMEOUT,
+		.procname	= "discovery_timeout",
+		.data		= &sysctl_discovery_timeout,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= SLOT_TIMEOUT,
+		.procname	= "slot_timeout",
+		.data		= &sysctl_slot_timeout,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_slot_timeout,
+		.extra2		= &max_slot_timeout
+	},
+	{
+		.ctl_name	= MAX_BAUD_RATE,
+		.procname	= "max_baud_rate",
+		.data		= &sysctl_max_baud_rate,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_max_baud_rate,
+		.extra2		= &max_max_baud_rate
+	},
+	{
+		.ctl_name	= MIN_TX_TURN_TIME,
+		.procname	= "min_tx_turn_time",
+		.data		= &sysctl_min_tx_turn_time,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_min_tx_turn_time,
+		.extra2		= &max_min_tx_turn_time
+	},
+	{
+		.ctl_name	= MAX_TX_DATA_SIZE,
+		.procname	= "max_tx_data_size",
+		.data		= &sysctl_max_tx_data_size,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_max_tx_data_size,
+		.extra2		= &max_max_tx_data_size
+	},
+	{
+		.ctl_name	= MAX_TX_WINDOW,
+		.procname	= "max_tx_window",
+		.data		= &sysctl_max_tx_window,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_max_tx_window,
+		.extra2		= &max_max_tx_window
+	},
+	{
+		.ctl_name	= MAX_NOREPLY_TIME,
+		.procname	= "max_noreply_time",
+		.data		= &sysctl_max_noreply_time,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_max_noreply_time,
+		.extra2		= &max_max_noreply_time
+	},
+	{
+		.ctl_name	= WARN_NOREPLY_TIME,
+		.procname	= "warn_noreply_time",
+		.data		= &sysctl_warn_noreply_time,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_warn_noreply_time,
+		.extra2		= &max_warn_noreply_time
+	},
+	{
+		.ctl_name	= LAP_KEEPALIVE_TIME,
+		.procname	= "lap_keepalive_time",
+		.data		= &sysctl_lap_keepalive_time,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_lap_keepalive_time,
+		.extra2		= &max_lap_keepalive_time
+	},
+	{ .ctl_name = 0 }
+};
+
+/* One directory */
+static ctl_table irda_net_table[] = {
+	{
+		.ctl_name	= NET_IRDA,
+		.procname	= "irda",
+		.maxlen		= 0,
+		.mode		= 0555,
+		.child		= irda_table
+	},
+	{ .ctl_name = 0 }
+};
+
+/* The parent directory */
+static ctl_table irda_root_table[] = {
+	{
+		.ctl_name	= CTL_NET,
+		.procname	= "net",
+		.maxlen		= 0,
+		.mode		= 0555,
+		.child		= irda_net_table
+	},
+	{ .ctl_name = 0 }
+};
+
+static struct ctl_table_header *irda_table_header;
+
+/*
+ * Function irda_sysctl_register (void)
+ *
+ *    Register our sysctl interface
+ *
+ */
+int __init irda_sysctl_register(void)
+{
+	irda_table_header = register_sysctl_table(irda_root_table, 0);
+	if (!irda_table_header)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/*
+ * Function irda_sysctl_unregister (void)
+ *
+ *    Unregister our sysctl interface
+ *
+ */
+void __exit irda_sysctl_unregister(void) 
+{
+	unregister_sysctl_table(irda_table_header);
+}
+
+
+
diff --git a/net/irda/irttp.c b/net/irda/irttp.c
new file mode 100644
index 00000000000..d091ccf773b
--- /dev/null
+++ b/net/irda/irttp.c
@@ -0,0 +1,1912 @@
+/*********************************************************************
+ *                
+ * Filename:      irttp.c
+ * Version:       1.2
+ * Description:   Tiny Transport Protocol (TTP) implementation
+ * Status:        Stable
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Sun Aug 31 20:14:31 1997
+ * Modified at:   Wed Jan  5 11:31:27 2000
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1998-2000 Dag Brattli <dagb@cs.uit.no>, 
+ *     All Rights Reserved.
+ *     Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ *
+ *     Neither Dag Brattli nor University of Troms� admit liability nor
+ *     provide warranty for any of this software. This material is 
+ *     provided "AS-IS" and at no charge.
+ *
+ ********************************************************************/
+
+#include <linux/config.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+
+#include <asm/byteorder.h>
+#include <asm/unaligned.h>
+
+#include <net/irda/irda.h>
+#include <net/irda/irlap.h>
+#include <net/irda/irlmp.h>
+#include <net/irda/parameters.h>
+#include <net/irda/irttp.h>
+
+static struct irttp_cb *irttp = NULL;
+
+static void __irttp_close_tsap(struct tsap_cb *self);
+
+static int irttp_data_indication(void *instance, void *sap, 
+				 struct sk_buff *skb);
+static int irttp_udata_indication(void *instance, void *sap, 
+				  struct sk_buff *skb);
+static void irttp_disconnect_indication(void *instance, void *sap,  
+					LM_REASON reason, struct sk_buff *);
+static void irttp_connect_indication(void *instance, void *sap, 
+				     struct qos_info *qos, __u32 max_sdu_size,
+				     __u8 header_size, struct sk_buff *skb);
+static void irttp_connect_confirm(void *instance, void *sap, 
+				  struct qos_info *qos, __u32 max_sdu_size, 
+				  __u8 header_size, struct sk_buff *skb);
+static void irttp_run_tx_queue(struct tsap_cb *self);
+static void irttp_run_rx_queue(struct tsap_cb *self);
+
+static void irttp_flush_queues(struct tsap_cb *self);
+static void irttp_fragment_skb(struct tsap_cb *self, struct sk_buff *skb);
+static struct sk_buff *irttp_reassemble_skb(struct tsap_cb *self);
+static void irttp_todo_expired(unsigned long data);
+static int irttp_param_max_sdu_size(void *instance, irda_param_t *param, 
+				    int get);
+
+static void irttp_flow_indication(void *instance, void *sap, LOCAL_FLOW flow);
+static void irttp_status_indication(void *instance,
+				    LINK_STATUS link, LOCK_STATUS lock);
+
+/* Information for parsing parameters in IrTTP */
+static pi_minor_info_t pi_minor_call_table[] = {
+	{ NULL, 0 },                                             /* 0x00 */
+	{ irttp_param_max_sdu_size, PV_INTEGER | PV_BIG_ENDIAN } /* 0x01 */
+};
+static pi_major_info_t pi_major_call_table[] = {{ pi_minor_call_table, 2 }};
+static pi_param_info_t param_info = { pi_major_call_table, 1, 0x0f, 4 };
+
+/************************ GLOBAL PROCEDURES ************************/
+
+/*
+ * Function irttp_init (void)
+ *
+ *    Initialize the IrTTP layer. Called by module initialization code
+ *
+ */
+int __init irttp_init(void)
+{
+	/* Initialize the irttp structure. */
+	if (irttp == NULL) {
+		irttp = kmalloc(sizeof(struct irttp_cb), GFP_KERNEL);
+		if (irttp == NULL)
+			return -ENOMEM;
+	}
+	memset(irttp, 0, sizeof(struct irttp_cb));
+
+	irttp->magic = TTP_MAGIC;
+
+	irttp->tsaps = hashbin_new(HB_LOCK);
+	if (!irttp->tsaps) {
+		IRDA_ERROR("%s: can't allocate IrTTP hashbin!\n",
+			   __FUNCTION__);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/*
+ * Function irttp_cleanup (void)
+ *
+ *    Called by module destruction/cleanup code
+ *
+ */
+void __exit irttp_cleanup(void) 
+{
+	/* Check for main structure */
+	IRDA_ASSERT(irttp != NULL, return;);
+	IRDA_ASSERT(irttp->magic == TTP_MAGIC, return;);
+
+	/*
+	 *  Delete hashbin and close all TSAP instances in it
+	 */
+	hashbin_delete(irttp->tsaps, (FREE_FUNC) __irttp_close_tsap);
+
+	irttp->magic = 0;
+
+	/* De-allocate main structure */
+	kfree(irttp);
+
+	irttp = NULL;
+}
+
+/*************************** SUBROUTINES ***************************/
+
+/*
+ * Function irttp_start_todo_timer (self, timeout)
+ *
+ *    Start todo timer.
+ *
+ * Made it more effient and unsensitive to race conditions - Jean II
+ */
+static inline void irttp_start_todo_timer(struct tsap_cb *self, int timeout)
+{
+	/* Set new value for timer */
+	mod_timer(&self->todo_timer, jiffies + timeout);
+}
+
+/*
+ * Function irttp_todo_expired (data)
+ *
+ *    Todo timer has expired!
+ *
+ * One of the restriction of the timer is that it is run only on the timer
+ * interrupt which run every 10ms. This mean that even if you set the timer
+ * with a delay of 0, it may take up to 10ms before it's run.
+ * So, to minimise latency and keep cache fresh, we try to avoid using
+ * it as much as possible.
+ * Note : we can't use tasklets, because they can't be asynchronously
+ * killed (need user context), and we can't guarantee that here...
+ * Jean II
+ */
+static void irttp_todo_expired(unsigned long data)
+{
+	struct tsap_cb *self = (struct tsap_cb *) data;
+
+	/* Check that we still exist */
+	if (!self || self->magic != TTP_TSAP_MAGIC)
+		return;
+
+	IRDA_DEBUG(4, "%s(instance=%p)\n", __FUNCTION__, self);
+
+	/* Try to make some progress, especially on Tx side - Jean II */
+	irttp_run_rx_queue(self);
+	irttp_run_tx_queue(self);
+
+	/* Check if time for disconnect */
+	if (test_bit(0, &self->disconnect_pend)) {
+		/* Check if it's possible to disconnect yet */
+		if (skb_queue_empty(&self->tx_queue)) {
+			/* Make sure disconnect is not pending anymore */
+			clear_bit(0, &self->disconnect_pend);	/* FALSE */
+
+			/* Note : self->disconnect_skb may be NULL */
+			irttp_disconnect_request(self, self->disconnect_skb,
+						 P_NORMAL);
+			self->disconnect_skb = NULL;
+		} else {
+			/* Try again later */
+			irttp_start_todo_timer(self, HZ/10);
+
+			/* No reason to try and close now */
+			return;
+		}
+	}
+
+	/* Check if it's closing time */
+	if (self->close_pend)
+		/* Finish cleanup */
+		irttp_close_tsap(self);
+}
+
+/*
+ * Function irttp_flush_queues (self)
+ *
+ *     Flushes (removes all frames) in transitt-buffer (tx_list)
+ */
+void irttp_flush_queues(struct tsap_cb *self)
+{
+	struct sk_buff* skb;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;);
+
+	/* Deallocate frames waiting to be sent */
+	while ((skb = skb_dequeue(&self->tx_queue)) != NULL)
+		dev_kfree_skb(skb);
+
+	/* Deallocate received frames */
+	while ((skb = skb_dequeue(&self->rx_queue)) != NULL)
+		dev_kfree_skb(skb);
+
+	/* Deallocate received fragments */
+	while ((skb = skb_dequeue(&self->rx_fragments)) != NULL)
+		dev_kfree_skb(skb);
+}
+
+/*
+ * Function irttp_reassemble (self)
+ *
+ *    Makes a new (continuous) skb of all the fragments in the fragment
+ *    queue
+ *
+ */
+static struct sk_buff *irttp_reassemble_skb(struct tsap_cb *self)
+{
+	struct sk_buff *skb, *frag;
+	int n = 0;  /* Fragment index */
+
+	IRDA_ASSERT(self != NULL, return NULL;);
+	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return NULL;);
+
+	IRDA_DEBUG(2, "%s(), self->rx_sdu_size=%d\n", __FUNCTION__,
+		   self->rx_sdu_size);
+
+	skb = dev_alloc_skb(TTP_HEADER + self->rx_sdu_size);
+	if (!skb)
+		return NULL;
+
+	/*
+	 * Need to reserve space for TTP header in case this skb needs to
+	 * be requeued in case delivery failes
+	 */
+	skb_reserve(skb, TTP_HEADER);
+	skb_put(skb, self->rx_sdu_size);
+
+	/*
+	 *  Copy all fragments to a new buffer
+	 */
+	while ((frag = skb_dequeue(&self->rx_fragments)) != NULL) {
+		memcpy(skb->data+n, frag->data, frag->len);
+		n += frag->len;
+
+		dev_kfree_skb(frag);
+	}
+
+	IRDA_DEBUG(2,
+		   "%s(), frame len=%d, rx_sdu_size=%d, rx_max_sdu_size=%d\n",
+		   __FUNCTION__, n, self->rx_sdu_size, self->rx_max_sdu_size);
+	/* Note : irttp_run_rx_queue() calculate self->rx_sdu_size
+	 * by summing the size of all fragments, so we should always
+	 * have n == self->rx_sdu_size, except in cases where we
+	 * droped the last fragment (when self->rx_sdu_size exceed
+	 * self->rx_max_sdu_size), where n < self->rx_sdu_size.
+	 * Jean II */
+	IRDA_ASSERT(n <= self->rx_sdu_size, n = self->rx_sdu_size;);
+
+	/* Set the new length */
+	skb_trim(skb, n);
+
+	self->rx_sdu_size = 0;
+
+	return skb;
+}
+
+/*
+ * Function irttp_fragment_skb (skb)
+ *
+ *    Fragments a frame and queues all the fragments for transmission
+ *
+ */
+static inline void irttp_fragment_skb(struct tsap_cb *self,
+				      struct sk_buff *skb)
+{
+	struct sk_buff *frag;
+	__u8 *frame;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;);
+	IRDA_ASSERT(skb != NULL, return;);
+
+	/*
+	 *  Split frame into a number of segments
+	 */
+	while (skb->len > self->max_seg_size) {
+		IRDA_DEBUG(2, "%s(), fragmenting ...\n", __FUNCTION__);
+
+		/* Make new segment */
+		frag = dev_alloc_skb(self->max_seg_size+self->max_header_size);
+		if (!frag)
+			return;
+
+		skb_reserve(frag, self->max_header_size);
+
+		/* Copy data from the original skb into this fragment. */
+		memcpy(skb_put(frag, self->max_seg_size), skb->data,
+		       self->max_seg_size);
+
+		/* Insert TTP header, with the more bit set */
+		frame = skb_push(frag, TTP_HEADER);
+		frame[0] = TTP_MORE;
+
+		/* Hide the copied data from the original skb */
+		skb_pull(skb, self->max_seg_size);
+
+		/* Queue fragment */
+		skb_queue_tail(&self->tx_queue, frag);
+	}
+	/* Queue what is left of the original skb */
+	IRDA_DEBUG(2, "%s(), queuing last segment\n", __FUNCTION__);
+
+	frame = skb_push(skb, TTP_HEADER);
+	frame[0] = 0x00; /* Clear more bit */
+
+	/* Queue fragment */
+	skb_queue_tail(&self->tx_queue, skb);
+}
+
+/*
+ * Function irttp_param_max_sdu_size (self, param)
+ *
+ *    Handle the MaxSduSize parameter in the connect frames, this function
+ *    will be called both when this parameter needs to be inserted into, and
+ *    extracted from the connect frames
+ */
+static int irttp_param_max_sdu_size(void *instance, irda_param_t *param,
+				    int get)
+{
+	struct tsap_cb *self;
+
+	self = (struct tsap_cb *) instance;
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;);
+
+	if (get)
+		param->pv.i = self->tx_max_sdu_size;
+	else
+		self->tx_max_sdu_size = param->pv.i;
+
+	IRDA_DEBUG(1, "%s(), MaxSduSize=%d\n", __FUNCTION__, param->pv.i);
+
+	return 0;
+}
+
+/*************************** CLIENT CALLS ***************************/
+/************************** LMP CALLBACKS **************************/
+/* Everything is happily mixed up. Waiting for next clean up - Jean II */
+
+/*
+ * Function irttp_open_tsap (stsap, notify)
+ *
+ *    Create TSAP connection endpoint,
+ */
+struct tsap_cb *irttp_open_tsap(__u8 stsap_sel, int credit, notify_t *notify)
+{
+	struct tsap_cb *self;
+	struct lsap_cb *lsap;
+	notify_t ttp_notify;
+
+	IRDA_ASSERT(irttp != NULL, return NULL;);
+	IRDA_ASSERT(irttp->magic == TTP_MAGIC, return NULL;);
+
+	/* The IrLMP spec (IrLMP 1.1 p10) says that we have the right to
+	 * use only 0x01-0x6F. Of course, we can use LSAP_ANY as well.
+	 * JeanII */
+	if((stsap_sel != LSAP_ANY) &&
+	   ((stsap_sel < 0x01) || (stsap_sel >= 0x70))) {
+		IRDA_DEBUG(0, "%s(), invalid tsap!\n", __FUNCTION__);
+		return NULL;
+	}
+
+	self = kmalloc(sizeof(struct tsap_cb), GFP_ATOMIC);
+	if (self == NULL) {
+		IRDA_DEBUG(0, "%s(), unable to kmalloc!\n", __FUNCTION__);
+		return NULL;
+	}
+	memset(self, 0, sizeof(struct tsap_cb));
+	spin_lock_init(&self->lock);
+
+	/* Initialise todo timer */
+	init_timer(&self->todo_timer);
+	self->todo_timer.data     = (unsigned long) self;
+	self->todo_timer.function = &irttp_todo_expired;
+
+	/* Initialize callbacks for IrLMP to use */
+	irda_notify_init(&ttp_notify);
+	ttp_notify.connect_confirm = irttp_connect_confirm;
+	ttp_notify.connect_indication = irttp_connect_indication;
+	ttp_notify.disconnect_indication = irttp_disconnect_indication;
+	ttp_notify.data_indication = irttp_data_indication;
+	ttp_notify.udata_indication = irttp_udata_indication;
+	ttp_notify.flow_indication = irttp_flow_indication;
+	if(notify->status_indication != NULL)
+		ttp_notify.status_indication = irttp_status_indication;
+	ttp_notify.instance = self;
+	strncpy(ttp_notify.name, notify->name, NOTIFY_MAX_NAME);
+
+	self->magic = TTP_TSAP_MAGIC;
+	self->connected = FALSE;
+
+	skb_queue_head_init(&self->rx_queue);
+	skb_queue_head_init(&self->tx_queue);
+	skb_queue_head_init(&self->rx_fragments);
+	/*
+	 *  Create LSAP at IrLMP layer
+	 */
+	lsap = irlmp_open_lsap(stsap_sel, &ttp_notify, 0);
+	if (lsap == NULL) {
+		IRDA_WARNING("%s: unable to allocate LSAP!!\n", __FUNCTION__);
+		return NULL;
+	}
+
+	/*
+	 *  If user specified LSAP_ANY as source TSAP selector, then IrLMP
+	 *  will replace it with whatever source selector which is free, so
+	 *  the stsap_sel we have might not be valid anymore
+	 */
+	self->stsap_sel = lsap->slsap_sel;
+	IRDA_DEBUG(4, "%s(), stsap_sel=%02x\n", __FUNCTION__, self->stsap_sel);
+
+	self->notify = *notify;
+	self->lsap = lsap;
+
+	hashbin_insert(irttp->tsaps, (irda_queue_t *) self, (long) self, NULL);
+
+	if (credit > TTP_RX_MAX_CREDIT)
+		self->initial_credit = TTP_RX_MAX_CREDIT;
+	else
+		self->initial_credit = credit;
+
+	return self;
+}
+EXPORT_SYMBOL(irttp_open_tsap);
+
+/*
+ * Function irttp_close (handle)
+ *
+ *    Remove an instance of a TSAP. This function should only deal with the
+ *    deallocation of the TSAP, and resetting of the TSAPs values;
+ *
+ */
+static void __irttp_close_tsap(struct tsap_cb *self)
+{
+	/* First make sure we're connected. */
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;);
+
+	irttp_flush_queues(self);
+
+	del_timer(&self->todo_timer);
+
+	/* This one won't be cleaned up if we are disconnect_pend + close_pend
+	 * and we receive a disconnect_indication */
+	if (self->disconnect_skb)
+		dev_kfree_skb(self->disconnect_skb);
+
+	self->connected = FALSE;
+	self->magic = ~TTP_TSAP_MAGIC;
+
+	kfree(self);
+}
+
+/*
+ * Function irttp_close (self)
+ *
+ *    Remove TSAP from list of all TSAPs and then deallocate all resources
+ *    associated with this TSAP
+ *
+ * Note : because we *free* the tsap structure, it is the responsibility
+ * of the caller to make sure we are called only once and to deal with
+ * possible race conditions. - Jean II
+ */
+int irttp_close_tsap(struct tsap_cb *self)
+{
+	struct tsap_cb *tsap;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;);
+
+	/* Make sure tsap has been disconnected */
+	if (self->connected) {
+		/* Check if disconnect is not pending */
+		if (!test_bit(0, &self->disconnect_pend)) {
+			IRDA_WARNING("%s: TSAP still connected!\n",
+				     __FUNCTION__);
+			irttp_disconnect_request(self, NULL, P_NORMAL);
+		}
+		self->close_pend = TRUE;
+		irttp_start_todo_timer(self, HZ/10);
+
+		return 0; /* Will be back! */
+	}
+
+	tsap = hashbin_remove(irttp->tsaps, (long) self, NULL);
+
+	IRDA_ASSERT(tsap == self, return -1;);
+
+	/* Close corresponding LSAP */
+	if (self->lsap) {
+		irlmp_close_lsap(self->lsap);
+		self->lsap = NULL;
+	}
+
+	__irttp_close_tsap(self);
+
+	return 0;
+}
+EXPORT_SYMBOL(irttp_close_tsap);
+
+/*
+ * Function irttp_udata_request (self, skb)
+ *
+ *    Send unreliable data on this TSAP
+ *
+ */
+int irttp_udata_request(struct tsap_cb *self, struct sk_buff *skb)
+{
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;);
+	IRDA_ASSERT(skb != NULL, return -1;);
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	/* Check that nothing bad happens */
+	if ((skb->len == 0) || (!self->connected)) {
+		IRDA_DEBUG(1, "%s(), No data, or not connected\n",
+			   __FUNCTION__);
+		goto err;
+	}
+
+	if (skb->len > self->max_seg_size) {
+		IRDA_DEBUG(1, "%s(), UData is to large for IrLAP!\n",
+			   __FUNCTION__);
+		goto err;
+	}
+
+	irlmp_udata_request(self->lsap, skb);
+	self->stats.tx_packets++;
+
+	return 0;
+
+err:
+	dev_kfree_skb(skb);
+	return -1;
+}
+EXPORT_SYMBOL(irttp_udata_request);
+
+
+/*
+ * Function irttp_data_request (handle, skb)
+ *
+ *    Queue frame for transmission. If SAR is enabled, fragement the frame
+ *    and queue the fragments for transmission
+ */
+int irttp_data_request(struct tsap_cb *self, struct sk_buff *skb)
+{
+	__u8 *frame;
+	int ret;
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;);
+	IRDA_ASSERT(skb != NULL, return -1;);
+
+	IRDA_DEBUG(2, "%s() : queue len = %d\n", __FUNCTION__,
+		   skb_queue_len(&self->tx_queue));
+
+	/* Check that nothing bad happens */
+	if ((skb->len == 0) || (!self->connected)) {
+		IRDA_WARNING("%s: No data, or not connected\n", __FUNCTION__);
+		ret = -ENOTCONN;
+		goto err;
+	}
+
+	/*
+	 *  Check if SAR is disabled, and the frame is larger than what fits
+	 *  inside an IrLAP frame
+	 */
+	if ((self->tx_max_sdu_size == 0) && (skb->len > self->max_seg_size)) {
+		IRDA_ERROR("%s: SAR disabled, and data is to large for IrLAP!\n",
+			   __FUNCTION__);
+		ret = -EMSGSIZE;
+		goto err;
+	}
+
+	/*
+	 *  Check if SAR is enabled, and the frame is larger than the
+	 *  TxMaxSduSize
+	 */
+	if ((self->tx_max_sdu_size != 0) &&
+	    (self->tx_max_sdu_size != TTP_SAR_UNBOUND) &&
+	    (skb->len > self->tx_max_sdu_size))
+	{
+		IRDA_ERROR("%s: SAR enabled, but data is larger than TxMaxSduSize!\n",
+			   __FUNCTION__);
+		ret = -EMSGSIZE;
+		goto err;
+	}
+	/*
+	 *  Check if transmit queue is full
+	 */
+	if (skb_queue_len(&self->tx_queue) >= TTP_TX_MAX_QUEUE) {
+		/*
+		 *  Give it a chance to empty itself
+		 */
+		irttp_run_tx_queue(self);
+
+		/* Drop packet. This error code should trigger the caller
+		 * to resend the data in the client code - Jean II */
+		ret = -ENOBUFS;
+		goto err;
+	}
+
+	/* Queue frame, or queue frame segments */
+	if ((self->tx_max_sdu_size == 0) || (skb->len < self->max_seg_size)) {
+		/* Queue frame */
+		IRDA_ASSERT(skb_headroom(skb) >= TTP_HEADER, return -1;);
+		frame = skb_push(skb, TTP_HEADER);
+		frame[0] = 0x00; /* Clear more bit */
+
+		skb_queue_tail(&self->tx_queue, skb);
+	} else {
+		/*
+		 *  Fragment the frame, this function will also queue the
+		 *  fragments, we don't care about the fact the transmit
+		 *  queue may be overfilled by all the segments for a little
+		 *  while
+		 */
+		irttp_fragment_skb(self, skb);
+	}
+
+	/* Check if we can accept more data from client */
+	if ((!self->tx_sdu_busy) &&
+	    (skb_queue_len(&self->tx_queue) > TTP_TX_HIGH_THRESHOLD)) {
+		/* Tx queue filling up, so stop client. */
+		if (self->notify.flow_indication) {
+			self->notify.flow_indication(self->notify.instance,
+						     self, FLOW_STOP);
+		}
+		/* self->tx_sdu_busy is the state of the client.
+		 * Update state after notifying client to avoid
+		 * race condition with irttp_flow_indication().
+		 * If the queue empty itself after our test but before
+		 * we set the flag, we will fix ourselves below in
+		 * irttp_run_tx_queue().
+		 * Jean II */
+		self->tx_sdu_busy = TRUE;
+	}
+
+	/* Try to make some progress */
+	irttp_run_tx_queue(self);
+
+	return 0;
+
+err:
+	dev_kfree_skb(skb);
+	return ret;
+}
+EXPORT_SYMBOL(irttp_data_request);
+
+/*
+ * Function irttp_run_tx_queue (self)
+ *
+ *    Transmit packets queued for transmission (if possible)
+ *
+ */
+static void irttp_run_tx_queue(struct tsap_cb *self)
+{
+	struct sk_buff *skb;
+	unsigned long flags;
+	int n;
+
+	IRDA_DEBUG(2, "%s() : send_credit = %d, queue_len = %d\n",
+		   __FUNCTION__,
+		   self->send_credit, skb_queue_len(&self->tx_queue));
+
+	/* Get exclusive access to the tx queue, otherwise don't touch it */
+	if (irda_lock(&self->tx_queue_lock) == FALSE)
+		return;
+
+	/* Try to send out frames as long as we have credits
+	 * and as long as LAP is not full. If LAP is full, it will
+	 * poll us through irttp_flow_indication() - Jean II */
+	while ((self->send_credit > 0) &&
+	       (!irlmp_lap_tx_queue_full(self->lsap)) &&
+	       (skb = skb_dequeue(&self->tx_queue)))
+	{
+		/*
+		 *  Since we can transmit and receive frames concurrently,
+		 *  the code below is a critical region and we must assure that
+		 *  nobody messes with the credits while we update them.
+		 */
+		spin_lock_irqsave(&self->lock, flags);
+
+		n = self->avail_credit;
+		self->avail_credit = 0;
+
+		/* Only room for 127 credits in frame */
+		if (n > 127) {
+			self->avail_credit = n-127;
+			n = 127;
+		}
+		self->remote_credit += n;
+		self->send_credit--;
+
+		spin_unlock_irqrestore(&self->lock, flags);
+
+		/*
+		 *  More bit must be set by the data_request() or fragment()
+		 *  functions
+		 */
+		skb->data[0] |= (n & 0x7f);
+
+		/* Detach from socket.
+		 * The current skb has a reference to the socket that sent
+		 * it (skb->sk). When we pass it to IrLMP, the skb will be
+		 * stored in in IrLAP (self->wx_list). When we are within
+		 * IrLAP, we lose the notion of socket, so we should not
+		 * have a reference to a socket. So, we drop it here.
+		 *
+		 * Why does it matter ?
+		 * When the skb is freed (kfree_skb), if it is associated
+		 * with a socket, it release buffer space on the socket
+		 * (through sock_wfree() and sock_def_write_space()).
+		 * If the socket no longer exist, we may crash. Hard.
+		 * When we close a socket, we make sure that associated packets
+		 * in IrTTP are freed. However, we have no way to cancel
+		 * the packet that we have passed to IrLAP. So, if a packet
+		 * remains in IrLAP (retry on the link or else) after we
+		 * close the socket, we are dead !
+		 * Jean II */
+		if (skb->sk != NULL) {
+			/* IrSOCK application, IrOBEX, ... */
+			skb_orphan(skb);
+		}
+			/* IrCOMM over IrTTP, IrLAN, ... */
+
+		/* Pass the skb to IrLMP - done */
+		irlmp_data_request(self->lsap, skb);
+		self->stats.tx_packets++;
+	}
+
+	/* Check if we can accept more frames from client.
+	 * We don't want to wait until the todo timer to do that, and we
+	 * can't use tasklets (grr...), so we are obliged to give control
+	 * to client. That's ok, this test will be true not too often
+	 * (max once per LAP window) and we are called from places
+	 * where we can spend a bit of time doing stuff. - Jean II */
+	if ((self->tx_sdu_busy) &&
+	    (skb_queue_len(&self->tx_queue) < TTP_TX_LOW_THRESHOLD) &&
+	    (!self->close_pend))
+	{
+		if (self->notify.flow_indication)
+			self->notify.flow_indication(self->notify.instance,
+						     self, FLOW_START);
+
+		/* self->tx_sdu_busy is the state of the client.
+		 * We don't really have a race here, but it's always safer
+		 * to update our state after the client - Jean II */
+		self->tx_sdu_busy = FALSE;
+	}
+
+	/* Reset lock */
+	self->tx_queue_lock = 0;
+}
+
+/*
+ * Function irttp_give_credit (self)
+ *
+ *    Send a dataless flowdata TTP-PDU and give available credit to peer
+ *    TSAP
+ */
+static inline void irttp_give_credit(struct tsap_cb *self)
+{
+	struct sk_buff *tx_skb = NULL;
+	unsigned long flags;
+	int n;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;);
+
+	IRDA_DEBUG(4, "%s() send=%d,avail=%d,remote=%d\n",
+		   __FUNCTION__,
+		   self->send_credit, self->avail_credit, self->remote_credit);
+
+	/* Give credit to peer */
+	tx_skb = dev_alloc_skb(64);
+	if (!tx_skb)
+		return;
+
+	/* Reserve space for LMP, and LAP header */
+	skb_reserve(tx_skb, self->max_header_size);
+
+	/*
+	 *  Since we can transmit and receive frames concurrently,
+	 *  the code below is a critical region and we must assure that
+	 *  nobody messes with the credits while we update them.
+	 */
+	spin_lock_irqsave(&self->lock, flags);
+
+	n = self->avail_credit;
+	self->avail_credit = 0;
+
+	/* Only space for 127 credits in frame */
+	if (n > 127) {
+		self->avail_credit = n - 127;
+		n = 127;
+	}
+	self->remote_credit += n;
+
+	spin_unlock_irqrestore(&self->lock, flags);
+
+	skb_put(tx_skb, 1);
+	tx_skb->data[0] = (__u8) (n & 0x7f);
+
+	irlmp_data_request(self->lsap, tx_skb);
+	self->stats.tx_packets++;
+}
+
+/*
+ * Function irttp_udata_indication (instance, sap, skb)
+ *
+ *    Received some unit-data (unreliable)
+ *
+ */
+static int irttp_udata_indication(void *instance, void *sap,
+				  struct sk_buff *skb)
+{
+	struct tsap_cb *self;
+	int err;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	self = (struct tsap_cb *) instance;
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;);
+	IRDA_ASSERT(skb != NULL, return -1;);
+
+	self->stats.rx_packets++;
+
+	/* Just pass data to layer above */
+	if (self->notify.udata_indication) {
+		err = self->notify.udata_indication(self->notify.instance,
+						    self,skb);
+		/* Same comment as in irttp_do_data_indication() */
+		if (!err) 
+			return 0;
+	}
+	/* Either no handler, or handler returns an error */
+	dev_kfree_skb(skb);
+
+	return 0;
+}
+
+/*
+ * Function irttp_data_indication (instance, sap, skb)
+ *
+ *    Receive segment from IrLMP.
+ *
+ */
+static int irttp_data_indication(void *instance, void *sap,
+				 struct sk_buff *skb)
+{
+	struct tsap_cb *self;
+	unsigned long flags;
+	int n;
+
+	self = (struct tsap_cb *) instance;
+
+	n = skb->data[0] & 0x7f;     /* Extract the credits */
+
+	self->stats.rx_packets++;
+
+	/*  Deal with inbound credit
+	 *  Since we can transmit and receive frames concurrently,
+	 *  the code below is a critical region and we must assure that
+	 *  nobody messes with the credits while we update them.
+	 */
+	spin_lock_irqsave(&self->lock, flags);
+	self->send_credit += n;
+	if (skb->len > 1)
+		self->remote_credit--;
+	spin_unlock_irqrestore(&self->lock, flags);
+
+	/*
+	 *  Data or dataless packet? Dataless frames contains only the
+	 *  TTP_HEADER.
+	 */
+	if (skb->len > 1) {
+		/*
+		 *  We don't remove the TTP header, since we must preserve the
+		 *  more bit, so the defragment routing knows what to do
+		 */
+		skb_queue_tail(&self->rx_queue, skb);
+	} else {
+		/* Dataless flowdata TTP-PDU */
+		dev_kfree_skb(skb);
+	}
+
+
+	/* Push data to the higher layer.
+	 * We do it synchronously because running the todo timer for each
+	 * receive packet would be too much overhead and latency.
+	 * By passing control to the higher layer, we run the risk that
+	 * it may take time or grab a lock. Most often, the higher layer
+	 * will only put packet in a queue.
+	 * Anyway, packets are only dripping through the IrDA, so we can
+	 * have time before the next packet.
+	 * Further, we are run from NET_BH, so the worse that can happen is
+	 * us missing the optimal time to send back the PF bit in LAP.
+	 * Jean II */
+	irttp_run_rx_queue(self);
+
+	/* We now give credits to peer in irttp_run_rx_queue().
+	 * We need to send credit *NOW*, otherwise we are going
+	 * to miss the next Tx window. The todo timer may take
+	 * a while before it's run... - Jean II */
+
+	/*
+	 * If the peer device has given us some credits and we didn't have
+         * anyone from before, then we need to shedule the tx queue.
+	 * We need to do that because our Tx have stopped (so we may not
+	 * get any LAP flow indication) and the user may be stopped as
+	 * well. - Jean II
+	 */
+	if (self->send_credit == n) {
+		/* Restart pushing stuff to LAP */
+		irttp_run_tx_queue(self);
+		/* Note : we don't want to schedule the todo timer
+		 * because it has horrible latency. No tasklets
+		 * because the tasklet API is broken. - Jean II */
+	}
+
+	return 0;
+}
+
+/*
+ * Function irttp_status_indication (self, reason)
+ *
+ *    Status_indication, just pass to the higher layer...
+ *
+ */
+static void irttp_status_indication(void *instance,
+				    LINK_STATUS link, LOCK_STATUS lock)
+{
+	struct tsap_cb *self;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	self = (struct tsap_cb *) instance;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;);
+
+	/* Check if client has already closed the TSAP and gone away */
+	if (self->close_pend)
+		return;
+
+	/*
+	 *  Inform service user if he has requested it
+	 */
+	if (self->notify.status_indication != NULL)
+		self->notify.status_indication(self->notify.instance,
+					       link, lock);
+	else
+		IRDA_DEBUG(2, "%s(), no handler\n", __FUNCTION__);
+}
+
+/*
+ * Function irttp_flow_indication (self, reason)
+ *
+ *    Flow_indication : IrLAP tells us to send more data.
+ *
+ */
+static void irttp_flow_indication(void *instance, void *sap, LOCAL_FLOW flow)
+{
+	struct tsap_cb *self;
+
+	self = (struct tsap_cb *) instance;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;);
+
+	IRDA_DEBUG(4, "%s(instance=%p)\n", __FUNCTION__, self);
+
+	/* We are "polled" directly from LAP, and the LAP want to fill
+	 * its Tx window. We want to do our best to send it data, so that
+	 * we maximise the window. On the other hand, we want to limit the
+	 * amount of work here so that LAP doesn't hang forever waiting
+	 * for packets. - Jean II */
+
+	/* Try to send some packets. Currently, LAP calls us every time
+	 * there is one free slot, so we will send only one packet.
+	 * This allow the scheduler to do its round robin - Jean II */
+	irttp_run_tx_queue(self);
+
+	/* Note regarding the interraction with higher layer.
+	 * irttp_run_tx_queue() may call the client when its queue
+	 * start to empty, via notify.flow_indication(). Initially.
+	 * I wanted this to happen in a tasklet, to avoid client
+	 * grabbing the CPU, but we can't use tasklets safely. And timer
+	 * is definitely too slow.
+	 * This will happen only once per LAP window, and usually at
+	 * the third packet (unless window is smaller). LAP is still
+	 * doing mtt and sending first packet so it's sort of OK
+	 * to do that. Jean II */
+
+	/* If we need to send disconnect. try to do it now */
+	if(self->disconnect_pend)
+		irttp_start_todo_timer(self, 0);
+}
+
+/*
+ * Function irttp_flow_request (self, command)
+ *
+ *    This function could be used by the upper layers to tell IrTTP to stop
+ *    delivering frames if the receive queues are starting to get full, or
+ *    to tell IrTTP to start delivering frames again.
+ */
+void irttp_flow_request(struct tsap_cb *self, LOCAL_FLOW flow)
+{
+	IRDA_DEBUG(1, "%s()\n", __FUNCTION__);
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;);
+
+	switch (flow) {
+	case FLOW_STOP:
+		IRDA_DEBUG(1, "%s(), flow stop\n", __FUNCTION__);
+		self->rx_sdu_busy = TRUE;
+		break;
+	case FLOW_START:
+		IRDA_DEBUG(1, "%s(), flow start\n", __FUNCTION__);
+		self->rx_sdu_busy = FALSE;
+
+		/* Client say he can accept more data, try to free our
+		 * queues ASAP - Jean II */
+		irttp_run_rx_queue(self);
+
+		break;
+	default:
+		IRDA_DEBUG(1, "%s(), Unknown flow command!\n", __FUNCTION__);
+	}
+}
+EXPORT_SYMBOL(irttp_flow_request);
+
+/*
+ * Function irttp_connect_request (self, dtsap_sel, daddr, qos)
+ *
+ *    Try to connect to remote destination TSAP selector
+ *
+ */
+int irttp_connect_request(struct tsap_cb *self, __u8 dtsap_sel,
+			  __u32 saddr, __u32 daddr,
+			  struct qos_info *qos, __u32 max_sdu_size,
+			  struct sk_buff *userdata)
+{
+	struct sk_buff *tx_skb;
+	__u8 *frame;
+	__u8 n;
+
+	IRDA_DEBUG(4, "%s(), max_sdu_size=%d\n", __FUNCTION__, max_sdu_size);
+
+	IRDA_ASSERT(self != NULL, return -EBADR;);
+	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -EBADR;);
+
+	if (self->connected) {
+		if(userdata)
+			dev_kfree_skb(userdata);
+		return -EISCONN;
+	}
+
+	/* Any userdata supplied? */
+	if (userdata == NULL) {
+		tx_skb = dev_alloc_skb(64);
+		if (!tx_skb)
+			return -ENOMEM;
+
+		/* Reserve space for MUX_CONTROL and LAP header */
+		skb_reserve(tx_skb, TTP_MAX_HEADER);
+	} else {
+		tx_skb = userdata;
+		/*
+		 *  Check that the client has reserved enough space for
+		 *  headers
+		 */
+		IRDA_ASSERT(skb_headroom(userdata) >= TTP_MAX_HEADER,
+			{ dev_kfree_skb(userdata); return -1; } );
+	}
+
+	/* Initialize connection parameters */
+	self->connected = FALSE;
+	self->avail_credit = 0;
+	self->rx_max_sdu_size = max_sdu_size;
+	self->rx_sdu_size = 0;
+	self->rx_sdu_busy = FALSE;
+	self->dtsap_sel = dtsap_sel;
+
+	n = self->initial_credit;
+
+	self->remote_credit = 0;
+	self->send_credit = 0;
+
+	/*
+	 *  Give away max 127 credits for now
+	 */
+	if (n > 127) {
+		self->avail_credit=n-127;
+		n = 127;
+	}
+
+	self->remote_credit = n;
+
+	/* SAR enabled? */
+	if (max_sdu_size > 0) {
+		IRDA_ASSERT(skb_headroom(tx_skb) >= (TTP_MAX_HEADER + TTP_SAR_HEADER),
+			{ dev_kfree_skb(tx_skb); return -1; } );
+
+		/* Insert SAR parameters */
+		frame = skb_push(tx_skb, TTP_HEADER+TTP_SAR_HEADER);
+
+		frame[0] = TTP_PARAMETERS | n;
+		frame[1] = 0x04; /* Length */
+		frame[2] = 0x01; /* MaxSduSize */
+		frame[3] = 0x02; /* Value length */
+
+		put_unaligned(cpu_to_be16((__u16) max_sdu_size),
+			      (__u16 *)(frame+4));
+	} else {
+		/* Insert plain TTP header */
+		frame = skb_push(tx_skb, TTP_HEADER);
+
+		/* Insert initial credit in frame */
+		frame[0] = n & 0x7f;
+	}
+
+	/* Connect with IrLMP. No QoS parameters for now */
+	return irlmp_connect_request(self->lsap, dtsap_sel, saddr, daddr, qos,
+				     tx_skb);
+}
+EXPORT_SYMBOL(irttp_connect_request);
+
+/*
+ * Function irttp_connect_confirm (handle, qos, skb)
+ *
+ *    Sevice user confirms TSAP connection with peer.
+ *
+ */
+static void irttp_connect_confirm(void *instance, void *sap,
+				  struct qos_info *qos, __u32 max_seg_size,
+				  __u8 max_header_size, struct sk_buff *skb)
+{
+	struct tsap_cb *self;
+	int parameters;
+	int ret;
+	__u8 plen;
+	__u8 n;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	self = (struct tsap_cb *) instance;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;);
+	IRDA_ASSERT(skb != NULL, return;);
+
+	self->max_seg_size = max_seg_size - TTP_HEADER;
+	self->max_header_size = max_header_size + TTP_HEADER;
+
+	/*
+	 *  Check if we have got some QoS parameters back! This should be the
+	 *  negotiated QoS for the link.
+	 */
+	if (qos) {
+		IRDA_DEBUG(4, "IrTTP, Negotiated BAUD_RATE: %02x\n",
+		       qos->baud_rate.bits);
+		IRDA_DEBUG(4, "IrTTP, Negotiated BAUD_RATE: %d bps.\n",
+		       qos->baud_rate.value);
+	}
+
+	n = skb->data[0] & 0x7f;
+
+	IRDA_DEBUG(4, "%s(), Initial send_credit=%d\n", __FUNCTION__, n);
+
+	self->send_credit = n;
+	self->tx_max_sdu_size = 0;
+	self->connected = TRUE;
+
+	parameters = skb->data[0] & 0x80;
+
+	IRDA_ASSERT(skb->len >= TTP_HEADER, return;);
+	skb_pull(skb, TTP_HEADER);
+
+	if (parameters) {
+		plen = skb->data[0];
+
+		ret = irda_param_extract_all(self, skb->data+1,
+					     IRDA_MIN(skb->len-1, plen),
+					     &param_info);
+
+		/* Any errors in the parameter list? */
+		if (ret < 0) {
+			IRDA_WARNING("%s: error extracting parameters\n",
+				     __FUNCTION__);
+			dev_kfree_skb(skb);
+
+			/* Do not accept this connection attempt */
+			return;
+		}
+		/* Remove parameters */
+		skb_pull(skb, IRDA_MIN(skb->len, plen+1));
+	}
+
+	IRDA_DEBUG(4, "%s() send=%d,avail=%d,remote=%d\n", __FUNCTION__,
+	      self->send_credit, self->avail_credit, self->remote_credit);
+
+	IRDA_DEBUG(2, "%s(), MaxSduSize=%d\n", __FUNCTION__,
+		   self->tx_max_sdu_size);
+
+	if (self->notify.connect_confirm) {
+		self->notify.connect_confirm(self->notify.instance, self, qos,
+					     self->tx_max_sdu_size,
+					     self->max_header_size, skb);
+	} else
+		dev_kfree_skb(skb);
+}
+
+/*
+ * Function irttp_connect_indication (handle, skb)
+ *
+ *    Some other device is connecting to this TSAP
+ *
+ */
+void irttp_connect_indication(void *instance, void *sap, struct qos_info *qos,
+			      __u32 max_seg_size, __u8 max_header_size,
+			      struct sk_buff *skb)
+{
+	struct tsap_cb *self;
+	struct lsap_cb *lsap;
+	int parameters;
+	int ret;
+	__u8 plen;
+	__u8 n;
+
+	self = (struct tsap_cb *) instance;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;);
+	IRDA_ASSERT(skb != NULL, return;);
+
+	lsap = (struct lsap_cb *) sap;
+
+	self->max_seg_size = max_seg_size - TTP_HEADER;
+	self->max_header_size = max_header_size+TTP_HEADER;
+
+	IRDA_DEBUG(4, "%s(), TSAP sel=%02x\n", __FUNCTION__, self->stsap_sel);
+
+	/* Need to update dtsap_sel if its equal to LSAP_ANY */
+	self->dtsap_sel = lsap->dlsap_sel;
+
+	n = skb->data[0] & 0x7f;
+
+	self->send_credit = n;
+	self->tx_max_sdu_size = 0;
+
+	parameters = skb->data[0] & 0x80;
+
+	IRDA_ASSERT(skb->len >= TTP_HEADER, return;);
+	skb_pull(skb, TTP_HEADER);
+
+	if (parameters) {
+		plen = skb->data[0];
+
+		ret = irda_param_extract_all(self, skb->data+1,
+					     IRDA_MIN(skb->len-1, plen),
+					     &param_info);
+
+		/* Any errors in the parameter list? */
+		if (ret < 0) {
+			IRDA_WARNING("%s: error extracting parameters\n",
+				     __FUNCTION__);
+			dev_kfree_skb(skb);
+
+			/* Do not accept this connection attempt */
+			return;
+		}
+
+		/* Remove parameters */
+		skb_pull(skb, IRDA_MIN(skb->len, plen+1));
+	}
+
+	if (self->notify.connect_indication) {
+		self->notify.connect_indication(self->notify.instance, self,
+						qos, self->tx_max_sdu_size,
+						self->max_header_size, skb);
+	} else
+		dev_kfree_skb(skb);
+}
+
+/*
+ * Function irttp_connect_response (handle, userdata)
+ *
+ *    Service user is accepting the connection, just pass it down to
+ *    IrLMP!
+ *
+ */
+int irttp_connect_response(struct tsap_cb *self, __u32 max_sdu_size,
+			   struct sk_buff *userdata)
+{
+	struct sk_buff *tx_skb;
+	__u8 *frame;
+	int ret;
+	__u8 n;
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;);
+
+	IRDA_DEBUG(4, "%s(), Source TSAP selector=%02x\n", __FUNCTION__,
+		   self->stsap_sel);
+
+	/* Any userdata supplied? */
+	if (userdata == NULL) {
+		tx_skb = dev_alloc_skb(64);
+		if (!tx_skb)
+			return -ENOMEM;
+
+		/* Reserve space for MUX_CONTROL and LAP header */
+		skb_reserve(tx_skb, TTP_MAX_HEADER);
+	} else {
+		tx_skb = userdata;
+		/*
+		 *  Check that the client has reserved enough space for
+		 *  headers
+		 */
+		IRDA_ASSERT(skb_headroom(userdata) >= TTP_MAX_HEADER,
+			{ dev_kfree_skb(userdata); return -1; } );
+	}
+
+	self->avail_credit = 0;
+	self->remote_credit = 0;
+	self->rx_max_sdu_size = max_sdu_size;
+	self->rx_sdu_size = 0;
+	self->rx_sdu_busy = FALSE;
+
+	n = self->initial_credit;
+
+	/* Frame has only space for max 127 credits (7 bits) */
+	if (n > 127) {
+		self->avail_credit = n - 127;
+		n = 127;
+	}
+
+	self->remote_credit = n;
+	self->connected = TRUE;
+
+	/* SAR enabled? */
+	if (max_sdu_size > 0) {
+		IRDA_ASSERT(skb_headroom(tx_skb) >= (TTP_MAX_HEADER + TTP_SAR_HEADER),
+			{ dev_kfree_skb(tx_skb); return -1; } );
+
+		/* Insert TTP header with SAR parameters */
+		frame = skb_push(tx_skb, TTP_HEADER+TTP_SAR_HEADER);
+
+		frame[0] = TTP_PARAMETERS | n;
+		frame[1] = 0x04; /* Length */
+
+		/* irda_param_insert(self, IRTTP_MAX_SDU_SIZE, frame+1,  */
+/*				  TTP_SAR_HEADER, &param_info) */
+
+		frame[2] = 0x01; /* MaxSduSize */
+		frame[3] = 0x02; /* Value length */
+
+		put_unaligned(cpu_to_be16((__u16) max_sdu_size),
+			      (__u16 *)(frame+4));
+	} else {
+		/* Insert TTP header */
+		frame = skb_push(tx_skb, TTP_HEADER);
+
+		frame[0] = n & 0x7f;
+	}
+
+	ret = irlmp_connect_response(self->lsap, tx_skb);
+
+	return ret;
+}
+EXPORT_SYMBOL(irttp_connect_response);
+
+/*
+ * Function irttp_dup (self, instance)
+ *
+ *    Duplicate TSAP, can be used by servers to confirm a connection on a
+ *    new TSAP so it can keep listening on the old one.
+ */
+struct tsap_cb *irttp_dup(struct tsap_cb *orig, void *instance)
+{
+	struct tsap_cb *new;
+	unsigned long flags;
+
+	IRDA_DEBUG(1, "%s()\n", __FUNCTION__);
+
+	/* Protect our access to the old tsap instance */
+	spin_lock_irqsave(&irttp->tsaps->hb_spinlock, flags);
+
+	/* Find the old instance */
+	if (!hashbin_find(irttp->tsaps, (long) orig, NULL)) {
+		IRDA_DEBUG(0, "%s(), unable to find TSAP\n", __FUNCTION__);
+		spin_unlock_irqrestore(&irttp->tsaps->hb_spinlock, flags);
+		return NULL;
+	}
+
+	/* Allocate a new instance */
+	new = kmalloc(sizeof(struct tsap_cb), GFP_ATOMIC);
+	if (!new) {
+		IRDA_DEBUG(0, "%s(), unable to kmalloc\n", __FUNCTION__);
+		spin_unlock_irqrestore(&irttp->tsaps->hb_spinlock, flags);
+		return NULL;
+	}
+	/* Dup */
+	memcpy(new, orig, sizeof(struct tsap_cb));
+
+	/* We don't need the old instance any more */
+	spin_unlock_irqrestore(&irttp->tsaps->hb_spinlock, flags);
+
+	/* Try to dup the LSAP (may fail if we were too slow) */
+	new->lsap = irlmp_dup(orig->lsap, new);
+	if (!new->lsap) {
+		IRDA_DEBUG(0, "%s(), dup failed!\n", __FUNCTION__);
+		kfree(new);
+		return NULL;
+	}
+
+	/* Not everything should be copied */
+	new->notify.instance = instance;
+	init_timer(&new->todo_timer);
+
+	skb_queue_head_init(&new->rx_queue);
+	skb_queue_head_init(&new->tx_queue);
+	skb_queue_head_init(&new->rx_fragments);
+
+	/* This is locked */
+	hashbin_insert(irttp->tsaps, (irda_queue_t *) new, (long) new, NULL);
+
+	return new;
+}
+EXPORT_SYMBOL(irttp_dup);
+
+/*
+ * Function irttp_disconnect_request (self)
+ *
+ *    Close this connection please! If priority is high, the queued data
+ *    segments, if any, will be deallocated first
+ *
+ */
+int irttp_disconnect_request(struct tsap_cb *self, struct sk_buff *userdata,
+			     int priority)
+{
+	int ret;
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;);
+
+	/* Already disconnected? */
+	if (!self->connected) {
+		IRDA_DEBUG(4, "%s(), already disconnected!\n", __FUNCTION__);
+		if (userdata)
+			dev_kfree_skb(userdata);
+		return -1;
+	}
+
+	/* Disconnect already pending ?
+	 * We need to use an atomic operation to prevent reentry. This
+	 * function may be called from various context, like user, timer
+	 * for following a disconnect_indication() (i.e. net_bh).
+	 * Jean II */
+	if(test_and_set_bit(0, &self->disconnect_pend)) {
+		IRDA_DEBUG(0, "%s(), disconnect already pending\n",
+			   __FUNCTION__);
+		if (userdata)
+			dev_kfree_skb(userdata);
+
+		/* Try to make some progress */
+		irttp_run_tx_queue(self);
+		return -1;
+	}
+
+	/*
+	 *  Check if there is still data segments in the transmit queue
+	 */
+	if (skb_queue_len(&self->tx_queue) > 0) {
+		if (priority == P_HIGH) {
+			/*
+			 *  No need to send the queued data, if we are
+			 *  disconnecting right now since the data will
+			 *  not have any usable connection to be sent on
+			 */
+			IRDA_DEBUG(1, "%s(): High priority!!()\n", __FUNCTION__);
+			irttp_flush_queues(self);
+		} else if (priority == P_NORMAL) {
+			/*
+			 *  Must delay disconnect until after all data segments
+			 *  have been sent and the tx_queue is empty
+			 */
+			/* We'll reuse this one later for the disconnect */
+			self->disconnect_skb = userdata;  /* May be NULL */
+
+			irttp_run_tx_queue(self);
+
+			irttp_start_todo_timer(self, HZ/10);
+			return -1;
+		}
+	}
+	/* Note : we don't need to check if self->rx_queue is full and the
+	 * state of self->rx_sdu_busy because the disconnect response will
+	 * be sent at the LMP level (so even if the peer has its Tx queue
+	 * full of data). - Jean II */
+
+	IRDA_DEBUG(1, "%s(), Disconnecting ...\n", __FUNCTION__);
+	self->connected = FALSE;
+
+	if (!userdata) {
+		struct sk_buff *tx_skb;
+		tx_skb = dev_alloc_skb(64);
+		if (!tx_skb)
+			return -ENOMEM;
+
+		/*
+		 *  Reserve space for MUX and LAP header
+		 */
+		skb_reserve(tx_skb, TTP_MAX_HEADER);
+
+		userdata = tx_skb;
+	}
+	ret = irlmp_disconnect_request(self->lsap, userdata);
+
+	/* The disconnect is no longer pending */
+	clear_bit(0, &self->disconnect_pend);	/* FALSE */
+
+	return ret;
+}
+EXPORT_SYMBOL(irttp_disconnect_request);
+
+/*
+ * Function irttp_disconnect_indication (self, reason)
+ *
+ *    Disconnect indication, TSAP disconnected by peer?
+ *
+ */
+void irttp_disconnect_indication(void *instance, void *sap, LM_REASON reason,
+				 struct sk_buff *skb)
+{
+	struct tsap_cb *self;
+
+	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
+
+	self = (struct tsap_cb *) instance;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;);
+
+	/* Prevent higher layer to send more data */
+	self->connected = FALSE;
+
+	/* Check if client has already tried to close the TSAP */
+	if (self->close_pend) {
+		/* In this case, the higher layer is probably gone. Don't
+		 * bother it and clean up the remains - Jean II */
+		if (skb)
+			dev_kfree_skb(skb);
+		irttp_close_tsap(self);
+		return;
+	}
+
+	/* If we are here, we assume that is the higher layer is still
+	 * waiting for the disconnect notification and able to process it,
+	 * even if he tried to disconnect. Otherwise, it would have already
+	 * attempted to close the tsap and self->close_pend would be TRUE.
+	 * Jean II */
+
+	/* No need to notify the client if has already tried to disconnect */
+	if(self->notify.disconnect_indication)
+		self->notify.disconnect_indication(self->notify.instance, self,
+						   reason, skb);
+	else
+		if (skb)
+			dev_kfree_skb(skb);
+}
+
+/*
+ * Function irttp_do_data_indication (self, skb)
+ *
+ *    Try to deliver reassembled skb to layer above, and requeue it if that
+ *    for some reason should fail. We mark rx sdu as busy to apply back
+ *    pressure is necessary.
+ */
+static void irttp_do_data_indication(struct tsap_cb *self, struct sk_buff *skb)
+{
+	int err;
+
+	/* Check if client has already closed the TSAP and gone away */
+	if (self->close_pend) {
+		dev_kfree_skb(skb);
+		return;
+	}
+
+	err = self->notify.data_indication(self->notify.instance, self, skb);
+
+	/* Usually the layer above will notify that it's input queue is
+	 * starting to get filled by using the flow request, but this may
+	 * be difficult, so it can instead just refuse to eat it and just
+	 * give an error back
+	 */
+	if (err) {
+		IRDA_DEBUG(0, "%s() requeueing skb!\n", __FUNCTION__);
+
+		/* Make sure we take a break */
+		self->rx_sdu_busy = TRUE;
+
+		/* Need to push the header in again */
+		skb_push(skb, TTP_HEADER);
+		skb->data[0] = 0x00; /* Make sure MORE bit is cleared */
+
+		/* Put skb back on queue */
+		skb_queue_head(&self->rx_queue, skb);
+	}
+}
+
+/*
+ * Function irttp_run_rx_queue (self)
+ *
+ *     Check if we have any frames to be transmitted, or if we have any
+ *     available credit to give away.
+ */
+void irttp_run_rx_queue(struct tsap_cb *self)
+{
+	struct sk_buff *skb;
+	int more = 0;
+
+	IRDA_DEBUG(2, "%s() send=%d,avail=%d,remote=%d\n", __FUNCTION__,
+		   self->send_credit, self->avail_credit, self->remote_credit);
+
+	/* Get exclusive access to the rx queue, otherwise don't touch it */
+	if (irda_lock(&self->rx_queue_lock) == FALSE)
+		return;
+
+	/*
+	 *  Reassemble all frames in receive queue and deliver them
+	 */
+	while (!self->rx_sdu_busy && (skb = skb_dequeue(&self->rx_queue))) {
+		/* This bit will tell us if it's the last fragment or not */
+		more = skb->data[0] & 0x80;
+
+		/* Remove TTP header */
+		skb_pull(skb, TTP_HEADER);
+
+		/* Add the length of the remaining data */
+		self->rx_sdu_size += skb->len;
+
+		/*
+		 * If SAR is disabled, or user has requested no reassembly
+		 * of received fragments then we just deliver them
+		 * immediately. This can be requested by clients that
+		 * implements byte streams without any message boundaries
+		 */
+		if (self->rx_max_sdu_size == TTP_SAR_DISABLE) {
+			irttp_do_data_indication(self, skb);
+			self->rx_sdu_size = 0;
+
+			continue;
+		}
+
+		/* Check if this is a fragment, and not the last fragment */
+		if (more) {
+			/*
+			 *  Queue the fragment if we still are within the
+			 *  limits of the maximum size of the rx_sdu
+			 */
+			if (self->rx_sdu_size <= self->rx_max_sdu_size) {
+				IRDA_DEBUG(4, "%s(), queueing frag\n",
+					   __FUNCTION__);
+				skb_queue_tail(&self->rx_fragments, skb);
+			} else {
+				/* Free the part of the SDU that is too big */
+				dev_kfree_skb(skb);
+			}
+			continue;
+		}
+		/*
+		 *  This is the last fragment, so time to reassemble!
+		 */
+		if ((self->rx_sdu_size <= self->rx_max_sdu_size) ||
+		    (self->rx_max_sdu_size == TTP_SAR_UNBOUND))
+		{
+			/*
+			 * A little optimizing. Only queue the fragment if
+			 * there are other fragments. Since if this is the
+			 * last and only fragment, there is no need to
+			 * reassemble :-)
+			 */
+			if (!skb_queue_empty(&self->rx_fragments)) {
+				skb_queue_tail(&self->rx_fragments,
+					       skb);
+
+				skb = irttp_reassemble_skb(self);
+			}
+
+			/* Now we can deliver the reassembled skb */
+			irttp_do_data_indication(self, skb);
+		} else {
+			IRDA_DEBUG(1, "%s(), Truncated frame\n", __FUNCTION__);
+
+			/* Free the part of the SDU that is too big */
+			dev_kfree_skb(skb);
+
+			/* Deliver only the valid but truncated part of SDU */
+			skb = irttp_reassemble_skb(self);
+
+			irttp_do_data_indication(self, skb);
+		}
+		self->rx_sdu_size = 0;
+	}
+
+	/*
+	 * It's not trivial to keep track of how many credits are available
+	 * by incrementing at each packet, because delivery may fail
+	 * (irttp_do_data_indication() may requeue the frame) and because
+	 * we need to take care of fragmentation.
+	 * We want the other side to send up to initial_credit packets.
+	 * We have some frames in our queues, and we have already allowed it
+	 * to send remote_credit.
+	 * No need to spinlock, write is atomic and self correcting...
+	 * Jean II
+	 */
+	self->avail_credit = (self->initial_credit -
+			      (self->remote_credit +
+			       skb_queue_len(&self->rx_queue) +
+			       skb_queue_len(&self->rx_fragments)));
+
+	/* Do we have too much credits to send to peer ? */
+	if ((self->remote_credit <= TTP_RX_MIN_CREDIT) &&
+	    (self->avail_credit > 0)) {
+		/* Send explicit credit frame */
+		irttp_give_credit(self);
+		/* Note : do *NOT* check if tx_queue is non-empty, that
+		 * will produce deadlocks. I repeat : send a credit frame
+		 * even if we have something to send in our Tx queue.
+		 * If we have credits, it means that our Tx queue is blocked.
+		 *
+		 * Let's suppose the peer can't keep up with our Tx. He will
+		 * flow control us by not sending us any credits, and we
+		 * will stop Tx and start accumulating credits here.
+		 * Up to the point where the peer will stop its Tx queue,
+		 * for lack of credits.
+		 * Let's assume the peer application is single threaded.
+		 * It will block on Tx and never consume any Rx buffer.
+		 * Deadlock. Guaranteed. - Jean II
+		 */
+	}
+
+	/* Reset lock */
+	self->rx_queue_lock = 0;
+}
+
+#ifdef CONFIG_PROC_FS
+struct irttp_iter_state {
+	int id;
+};
+
+static void *irttp_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct irttp_iter_state *iter = seq->private;
+	struct tsap_cb *self;
+
+	/* Protect our access to the tsap list */
+	spin_lock_irq(&irttp->tsaps->hb_spinlock);
+	iter->id = 0;
+
+	for (self = (struct tsap_cb *) hashbin_get_first(irttp->tsaps); 
+	     self != NULL;
+	     self = (struct tsap_cb *) hashbin_get_next(irttp->tsaps)) {
+		if (iter->id == *pos)
+			break;
+		++iter->id;
+	}
+		
+	return self;
+}
+
+static void *irttp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct irttp_iter_state *iter = seq->private;
+
+	++*pos;
+	++iter->id;
+	return (void *) hashbin_get_next(irttp->tsaps);
+}
+
+static void irttp_seq_stop(struct seq_file *seq, void *v)
+{
+	spin_unlock_irq(&irttp->tsaps->hb_spinlock);
+}
+
+static int irttp_seq_show(struct seq_file *seq, void *v)
+{
+	const struct irttp_iter_state *iter = seq->private;
+	const struct tsap_cb *self = v;
+
+	seq_printf(seq, "TSAP %d, ", iter->id);
+	seq_printf(seq, "stsap_sel: %02x, ",
+		   self->stsap_sel);
+	seq_printf(seq, "dtsap_sel: %02x\n",
+		   self->dtsap_sel);
+	seq_printf(seq, "  connected: %s, ",
+		   self->connected? "TRUE":"FALSE");
+	seq_printf(seq, "avail credit: %d, ",
+		   self->avail_credit);
+	seq_printf(seq, "remote credit: %d, ",
+		   self->remote_credit);
+	seq_printf(seq, "send credit: %d\n",
+		   self->send_credit);
+	seq_printf(seq, "  tx packets: %ld, ",
+		   self->stats.tx_packets);
+	seq_printf(seq, "rx packets: %ld, ",
+		   self->stats.rx_packets);
+	seq_printf(seq, "tx_queue len: %d ",
+		   skb_queue_len(&self->tx_queue));
+	seq_printf(seq, "rx_queue len: %d\n",
+		   skb_queue_len(&self->rx_queue));
+	seq_printf(seq, "  tx_sdu_busy: %s, ",
+		   self->tx_sdu_busy? "TRUE":"FALSE");
+	seq_printf(seq, "rx_sdu_busy: %s\n",
+		   self->rx_sdu_busy? "TRUE":"FALSE");
+	seq_printf(seq, "  max_seg_size: %d, ",
+		   self->max_seg_size);
+	seq_printf(seq, "tx_max_sdu_size: %d, ",
+		   self->tx_max_sdu_size);
+	seq_printf(seq, "rx_max_sdu_size: %d\n",
+		   self->rx_max_sdu_size);
+
+	seq_printf(seq, "  Used by (%s)\n\n",
+		   self->notify.name);
+	return 0;
+}
+
+static struct seq_operations irttp_seq_ops = {
+	.start  = irttp_seq_start,
+	.next   = irttp_seq_next,
+	.stop   = irttp_seq_stop,
+	.show   = irttp_seq_show,
+};
+
+static int irttp_seq_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	int rc = -ENOMEM;
+	struct irttp_iter_state *s;
+       
+	IRDA_ASSERT(irttp != NULL, return -EINVAL;);
+
+	s = kmalloc(sizeof(*s), GFP_KERNEL);
+	if (!s)
+		goto out;
+
+	rc = seq_open(file, &irttp_seq_ops);
+	if (rc)
+		goto out_kfree;
+
+	seq	     = file->private_data;
+	seq->private = s;
+	memset(s, 0, sizeof(*s));
+out:
+	return rc;
+out_kfree:
+	kfree(s);
+	goto out;
+}
+
+struct file_operations irttp_seq_fops = {
+	.owner		= THIS_MODULE,
+	.open           = irttp_seq_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release	= seq_release_private,
+};
+
+#endif /* PROC_FS */
diff --git a/net/irda/parameters.c b/net/irda/parameters.c
new file mode 100644
index 00000000000..1324942f976
--- /dev/null
+++ b/net/irda/parameters.c
@@ -0,0 +1,589 @@
+/*********************************************************************
+ *
+ * Filename:      parameters.c
+ * Version:       1.0
+ * Description:   A more general way to handle (pi,pl,pv) parameters
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Mon Jun  7 10:25:11 1999
+ * Modified at:   Sun Jan 30 14:08:39 2000
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ *
+ *     Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved.
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of
+ *     the License, or (at your option) any later version.
+ *
+ *     This program is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *     GNU General Public License for more details.
+ *
+ *     You should have received a copy of the GNU General Public License
+ *     along with this program; if not, write to the Free Software
+ *     Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ *     MA 02111-1307 USA
+ *
+ ********************************************************************/
+
+#include <linux/types.h>
+#include <linux/module.h>
+
+#include <asm/unaligned.h>
+#include <asm/byteorder.h>
+
+#include <net/irda/irda.h>
+#include <net/irda/parameters.h>
+
+static int irda_extract_integer(void *self, __u8 *buf, int len, __u8 pi,
+				PV_TYPE type, PI_HANDLER func);
+static int irda_extract_string(void *self, __u8 *buf, int len, __u8 pi,
+			       PV_TYPE type, PI_HANDLER func);
+static int irda_extract_octseq(void *self, __u8 *buf, int len, __u8 pi,
+			       PV_TYPE type, PI_HANDLER func);
+static int irda_extract_no_value(void *self, __u8 *buf, int len, __u8 pi,
+				 PV_TYPE type, PI_HANDLER func);
+
+static int irda_insert_integer(void *self, __u8 *buf, int len, __u8 pi,
+			       PV_TYPE type, PI_HANDLER func);
+static int irda_insert_no_value(void *self, __u8 *buf, int len, __u8 pi,
+				PV_TYPE type, PI_HANDLER func);
+
+static int irda_param_unpack(__u8 *buf, char *fmt, ...);
+
+/* Parameter value call table. Must match PV_TYPE */
+static PV_HANDLER pv_extract_table[] = {
+	irda_extract_integer, /* Handler for any length integers */
+	irda_extract_integer, /* Handler for 8  bits integers */
+	irda_extract_integer, /* Handler for 16 bits integers */
+	irda_extract_string,  /* Handler for strings */
+	irda_extract_integer, /* Handler for 32 bits integers */
+	irda_extract_octseq,  /* Handler for octet sequences */
+	irda_extract_no_value /* Handler for no value parameters */
+};
+
+static PV_HANDLER pv_insert_table[] = {
+	irda_insert_integer, /* Handler for any length integers */
+	irda_insert_integer, /* Handler for 8  bits integers */
+	irda_insert_integer, /* Handler for 16 bits integers */
+	NULL,                /* Handler for strings */
+	irda_insert_integer, /* Handler for 32 bits integers */
+	NULL,                /* Handler for octet sequences */
+	irda_insert_no_value /* Handler for no value parameters */
+};
+
+/*
+ * Function irda_insert_no_value (self, buf, len, pi, type, func)
+ */
+static int irda_insert_no_value(void *self, __u8 *buf, int len, __u8 pi,
+				PV_TYPE type, PI_HANDLER func)
+{
+	irda_param_t p;
+	int ret;
+
+	p.pi = pi;
+	p.pl = 0;
+
+	/* Call handler for this parameter */
+	ret = (*func)(self, &p, PV_GET);
+
+	/* Extract values anyway, since handler may need them */
+	irda_param_pack(buf, "bb", p.pi, p.pl);
+
+	if (ret < 0)
+		return ret;
+
+	return 2; /* Inserted pl+2 bytes */
+}
+
+/*
+ * Function irda_extract_no_value (self, buf, len, type, func)
+ *
+ *    Extracts a parameter without a pv field (pl=0)
+ *
+ */
+static int irda_extract_no_value(void *self, __u8 *buf, int len, __u8 pi,
+				 PV_TYPE type, PI_HANDLER func)
+{
+	irda_param_t p;
+	int ret;
+
+	/* Extract values anyway, since handler may need them */
+	irda_param_unpack(buf, "bb", &p.pi, &p.pl);
+
+	/* Call handler for this parameter */
+	ret = (*func)(self, &p, PV_PUT);
+
+	if (ret < 0)
+		return ret;
+
+	return 2; /* Extracted pl+2 bytes */
+}
+
+/*
+ * Function irda_insert_integer (self, buf, len, pi, type, func)
+ */
+static int irda_insert_integer(void *self, __u8 *buf, int len, __u8 pi,
+			       PV_TYPE type, PI_HANDLER func)
+{
+	irda_param_t p;
+	int n = 0;
+	int err;
+
+	p.pi = pi;             /* In case handler needs to know */
+	p.pl = type & PV_MASK; /* The integer type codes the lenght as well */
+	p.pv.i = 0;            /* Clear value */
+
+	/* Call handler for this parameter */
+	err = (*func)(self, &p, PV_GET);
+	if (err < 0)
+		return err;
+
+	/*
+	 * If parameter lenght is still 0, then (1) this is an any length
+	 * integer, and (2) the handler function does not care which length
+	 * we choose to use, so we pick the one the gives the fewest bytes.
+	 */
+	if (p.pl == 0) {
+		if (p.pv.i < 0xff) {
+			IRDA_DEBUG(2, "%s(), using 1 byte\n", __FUNCTION__);
+			p.pl = 1;
+		} else if (p.pv.i < 0xffff) {
+			IRDA_DEBUG(2, "%s(), using 2 bytes\n", __FUNCTION__);
+			p.pl = 2;
+		} else {
+			IRDA_DEBUG(2, "%s(), using 4 bytes\n", __FUNCTION__);
+			p.pl = 4; /* Default length */
+		}
+	}
+	/* Check if buffer is long enough for insertion */
+	if (len < (2+p.pl)) {
+		IRDA_WARNING("%s: buffer to short for insertion!\n",
+			     __FUNCTION__);
+		return -1;
+	}
+	IRDA_DEBUG(2, "%s(), pi=%#x, pl=%d, pi=%d\n", __FUNCTION__,
+		   p.pi, p.pl, p.pv.i);
+	switch (p.pl) {
+	case 1:
+		n += irda_param_pack(buf, "bbb", p.pi, p.pl, (__u8) p.pv.i);
+		break;
+	case 2:
+		if (type & PV_BIG_ENDIAN)
+			p.pv.i = cpu_to_be16((__u16) p.pv.i);
+		else
+			p.pv.i = cpu_to_le16((__u16) p.pv.i);
+		n += irda_param_pack(buf, "bbs", p.pi, p.pl, (__u16) p.pv.i);
+		break;
+	case 4:
+		if (type & PV_BIG_ENDIAN)
+			cpu_to_be32s(&p.pv.i);
+		else
+			cpu_to_le32s(&p.pv.i);
+		n += irda_param_pack(buf, "bbi", p.pi, p.pl, p.pv.i);
+
+		break;
+	default:
+		IRDA_WARNING("%s: length %d not supported\n",
+			     __FUNCTION__, p.pl);
+		/* Skip parameter */
+		return -1;
+	}
+
+	return p.pl+2; /* Inserted pl+2 bytes */
+}
+
+/*
+ * Function irda_extract integer (self, buf, len, pi, type, func)
+ *
+ *    Extract a possibly variable length integer from buffer, and call
+ *    handler for processing of the parameter
+ */
+static int irda_extract_integer(void *self, __u8 *buf, int len, __u8 pi,
+				PV_TYPE type, PI_HANDLER func)
+{
+	irda_param_t p;
+	int n = 0;
+	int extract_len;	/* Real lenght we extract */
+	int err;
+
+	p.pi = pi;     /* In case handler needs to know */
+	p.pl = buf[1]; /* Extract lenght of value */
+	p.pv.i = 0;    /* Clear value */
+	extract_len = p.pl;	/* Default : extract all */
+
+	/* Check if buffer is long enough for parsing */
+	if (len < (2+p.pl)) {
+		IRDA_WARNING("%s: buffer to short for parsing! "
+			     "Need %d bytes, but len is only %d\n",
+			     __FUNCTION__, p.pl, len);
+		return -1;
+	}
+
+	/*
+	 * Check that the integer length is what we expect it to be. If the
+	 * handler want a 16 bits integer then a 32 bits is not good enough
+	 * PV_INTEGER means that the handler is flexible.
+	 */
+	if (((type & PV_MASK) != PV_INTEGER) && ((type & PV_MASK) != p.pl)) {
+		IRDA_ERROR("%s: invalid parameter length! "
+			   "Expected %d bytes, but value had %d bytes!\n",
+			   __FUNCTION__, type & PV_MASK, p.pl);
+
+		/* Most parameters are bit/byte fields or little endian,
+		 * so it's ok to only extract a subset of it (the subset
+		 * that the handler expect). This is necessary, as some
+		 * broken implementations seems to add extra undefined bits.
+		 * If the parameter is shorter than we expect or is big
+		 * endian, we can't play those tricks. Jean II */
+		if((p.pl < (type & PV_MASK)) || (type & PV_BIG_ENDIAN)) {
+			/* Skip parameter */
+			return p.pl+2;
+		} else {
+			/* Extract subset of it, fallthrough */
+			extract_len = type & PV_MASK;
+		}
+	}
+
+
+	switch (extract_len) {
+	case 1:
+		n += irda_param_unpack(buf+2, "b", &p.pv.i);
+		break;
+	case 2:
+		n += irda_param_unpack(buf+2, "s", &p.pv.i);
+		if (type & PV_BIG_ENDIAN)
+			p.pv.i = be16_to_cpu((__u16) p.pv.i);
+		else
+			p.pv.i = le16_to_cpu((__u16) p.pv.i);
+		break;
+	case 4:
+		n += irda_param_unpack(buf+2, "i", &p.pv.i);
+		if (type & PV_BIG_ENDIAN)
+			be32_to_cpus(&p.pv.i);
+		else
+			le32_to_cpus(&p.pv.i);
+		break;
+	default:
+		IRDA_WARNING("%s: length %d not supported\n",
+			     __FUNCTION__, p.pl);
+
+		/* Skip parameter */
+		return p.pl+2;
+	}
+
+	IRDA_DEBUG(2, "%s(), pi=%#x, pl=%d, pi=%d\n", __FUNCTION__,
+		   p.pi, p.pl, p.pv.i);
+	/* Call handler for this parameter */
+	err = (*func)(self, &p, PV_PUT);
+	if (err < 0)
+		return err;
+
+	return p.pl+2; /* Extracted pl+2 bytes */
+}
+
+/*
+ * Function irda_extract_string (self, buf, len, type, func)
+ */
+static int irda_extract_string(void *self, __u8 *buf, int len, __u8 pi,
+			       PV_TYPE type, PI_HANDLER func)
+{
+	char str[33];
+	irda_param_t p;
+	int err;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
+
+	p.pi = pi;     /* In case handler needs to know */
+	p.pl = buf[1]; /* Extract lenght of value */
+
+	IRDA_DEBUG(2, "%s(), pi=%#x, pl=%d\n", __FUNCTION__,
+		   p.pi, p.pl);
+
+	/* Check if buffer is long enough for parsing */
+	if (len < (2+p.pl)) {
+		IRDA_WARNING("%s: buffer to short for parsing! "
+			     "Need %d bytes, but len is only %d\n",
+			     __FUNCTION__, p.pl, len);
+		return -1;
+	}
+
+	/* Should be safe to copy string like this since we have already
+	 * checked that the buffer is long enough */
+	strncpy(str, buf+2, p.pl);
+
+	IRDA_DEBUG(2, "%s(), str=0x%02x 0x%02x\n", __FUNCTION__,
+		   (__u8) str[0], (__u8) str[1]);
+
+	/* Null terminate string */
+	str[p.pl+1] = '\0';
+
+	p.pv.c = str; /* Handler will need to take a copy */
+
+	/* Call handler for this parameter */
+	err = (*func)(self, &p, PV_PUT);
+	if (err < 0)
+		return err;
+
+	return p.pl+2; /* Extracted pl+2 bytes */
+}
+
+/*
+ * Function irda_extract_octseq (self, buf, len, type, func)
+ */
+static int irda_extract_octseq(void *self, __u8 *buf, int len, __u8 pi,
+			       PV_TYPE type, PI_HANDLER func)
+{
+	irda_param_t p;
+
+	p.pi = pi;     /* In case handler needs to know */
+	p.pl = buf[1]; /* Extract lenght of value */
+
+	/* Check if buffer is long enough for parsing */
+	if (len < (2+p.pl)) {
+		IRDA_WARNING("%s: buffer to short for parsing! "
+			     "Need %d bytes, but len is only %d\n",
+			     __FUNCTION__, p.pl, len);
+		return -1;
+	}
+
+	IRDA_DEBUG(0, "%s(), not impl\n", __FUNCTION__);
+
+	return p.pl+2; /* Extracted pl+2 bytes */
+}
+
+/*
+ * Function irda_param_pack (skb, fmt, ...)
+ *
+ *    Format:
+ *        'i' = 32 bits integer
+ *        's' = string
+ *
+ */
+int irda_param_pack(__u8 *buf, char *fmt, ...)
+{
+	irda_pv_t arg;
+	va_list args;
+	char *p;
+	int n = 0;
+
+	va_start(args, fmt);
+
+	for (p = fmt; *p != '\0'; p++) {
+		switch (*p) {
+		case 'b':  /* 8 bits unsigned byte */
+			buf[n++] = (__u8)va_arg(args, int);
+			break;
+		case 's':  /* 16 bits unsigned short */
+			arg.i = (__u16)va_arg(args, int);
+			put_unaligned((__u16)arg.i, (__u16 *)(buf+n)); n+=2;
+			break;
+		case 'i':  /* 32 bits unsigned integer */
+			arg.i = va_arg(args, __u32);
+			put_unaligned(arg.i, (__u32 *)(buf+n)); n+=4;
+			break;
+#if 0
+		case 'c': /* \0 terminated string */
+			arg.c = va_arg(args, char *);
+			strcpy(buf+n, arg.c);
+			n += strlen(arg.c) + 1;
+			break;
+#endif
+		default:
+			va_end(args);
+			return -1;
+		}
+	}
+	va_end(args);
+
+	return 0;
+}
+EXPORT_SYMBOL(irda_param_pack);
+
+/*
+ * Function irda_param_unpack (skb, fmt, ...)
+ */
+static int irda_param_unpack(__u8 *buf, char *fmt, ...)
+{
+	irda_pv_t arg;
+	va_list args;
+	char *p;
+	int n = 0;
+
+	va_start(args, fmt);
+
+	for (p = fmt; *p != '\0'; p++) {
+		switch (*p) {
+		case 'b':  /* 8 bits byte */
+			arg.ip = va_arg(args, __u32 *);
+			*arg.ip = buf[n++];
+			break;
+		case 's':  /* 16 bits short */
+			arg.ip = va_arg(args, __u32 *);
+			*arg.ip = get_unaligned((__u16 *)(buf+n)); n+=2;
+			break;
+		case 'i':  /* 32 bits unsigned integer */
+			arg.ip = va_arg(args, __u32 *);
+			*arg.ip = get_unaligned((__u32 *)(buf+n)); n+=4;
+			break;
+#if 0
+		case 'c':   /* \0 terminated string */
+			arg.c = va_arg(args, char *);
+			strcpy(arg.c, buf+n);
+			n += strlen(arg.c) + 1;
+			break;
+#endif
+		default:
+			va_end(args);
+			return -1;
+		}
+
+	}
+	va_end(args);
+
+	return 0;
+}
+
+/*
+ * Function irda_param_insert (self, pi, buf, len, info)
+ *
+ *    Insert the specified parameter (pi) into buffer. Returns number of
+ *    bytes inserted
+ */
+int irda_param_insert(void *self, __u8 pi, __u8 *buf, int len,
+		      pi_param_info_t *info)
+{
+	pi_minor_info_t *pi_minor_info;
+	__u8 pi_minor;
+	__u8 pi_major;
+	int type;
+	int ret = -1;
+	int n = 0;
+
+	IRDA_ASSERT(buf != NULL, return ret;);
+	IRDA_ASSERT(info != 0, return ret;);
+
+	pi_minor = pi & info->pi_mask;
+	pi_major = pi >> info->pi_major_offset;
+
+	/* Check if the identifier value (pi) is valid */
+	if ((pi_major > info->len-1) ||
+	    (pi_minor > info->tables[pi_major].len-1))
+	{
+		IRDA_DEBUG(0, "%s(), no handler for parameter=0x%02x\n",
+			   __FUNCTION__, pi);
+
+		/* Skip this parameter */
+		return -1;
+	}
+
+	/* Lookup the info on how to parse this parameter */
+	pi_minor_info = &info->tables[pi_major].pi_minor_call_table[pi_minor];
+
+	/* Find expected data type for this parameter identifier (pi)*/
+	type = pi_minor_info->type;
+
+	/*  Check if handler has been implemented */
+	if (!pi_minor_info->func) {
+		IRDA_MESSAGE("%s: no handler for pi=%#x\n", __FUNCTION__, pi);
+		/* Skip this parameter */
+		return -1;
+	}
+
+	/* Insert parameter value */
+	ret = (*pv_insert_table[type & PV_MASK])(self, buf+n, len, pi, type,
+						 pi_minor_info->func);
+	return ret;
+}
+EXPORT_SYMBOL(irda_param_insert);
+
+/*
+ * Function irda_param_extract (self, buf, len, info)
+ *
+ *    Parse all parameters. If len is correct, then everything should be
+ *    safe. Returns the number of bytes that was parsed
+ *
+ */
+static int irda_param_extract(void *self, __u8 *buf, int len,
+			      pi_param_info_t *info)
+{
+	pi_minor_info_t *pi_minor_info;
+	__u8 pi_minor;
+	__u8 pi_major;
+	int type;
+	int ret = -1;
+	int n = 0;
+
+	IRDA_ASSERT(buf != NULL, return ret;);
+	IRDA_ASSERT(info != 0, return ret;);
+
+	pi_minor = buf[n] & info->pi_mask;
+	pi_major = buf[n] >> info->pi_major_offset;
+
+	/* Check if the identifier value (pi) is valid */
+	if ((pi_major > info->len-1) ||
+	    (pi_minor > info->tables[pi_major].len-1))
+	{
+		IRDA_DEBUG(0, "%s(), no handler for parameter=0x%02x\n",
+			   __FUNCTION__, buf[0]);
+
+		/* Skip this parameter */
+		return 2 + buf[n + 1];  /* Continue */
+	}
+
+	/* Lookup the info on how to parse this parameter */
+	pi_minor_info = &info->tables[pi_major].pi_minor_call_table[pi_minor];
+
+	/* Find expected data type for this parameter identifier (pi)*/
+	type = pi_minor_info->type;
+
+	IRDA_DEBUG(3, "%s(), pi=[%d,%d], type=%d\n", __FUNCTION__,
+		   pi_major, pi_minor, type);
+
+	/*  Check if handler has been implemented */
+	if (!pi_minor_info->func) {
+		IRDA_MESSAGE("%s: no handler for pi=%#x\n",
+			     __FUNCTION__, buf[n]);
+		/* Skip this parameter */
+		return 2 + buf[n + 1]; /* Continue */
+	}
+
+	/* Parse parameter value */
+	ret = (*pv_extract_table[type & PV_MASK])(self, buf+n, len, buf[n],
+						  type, pi_minor_info->func);
+	return ret;
+}
+
+/*
+ * Function irda_param_extract_all (self, buf, len, info)
+ *
+ *    Parse all parameters. If len is correct, then everything should be
+ *    safe. Returns the number of bytes that was parsed
+ *
+ */
+int irda_param_extract_all(void *self, __u8 *buf, int len, 
+			   pi_param_info_t *info)
+{
+	int ret = -1;
+	int n = 0;
+
+	IRDA_ASSERT(buf != NULL, return ret;);
+	IRDA_ASSERT(info != 0, return ret;);
+
+	/*
+	 * Parse all parameters. Each parameter must be at least two bytes
+	 * long or else there is no point in trying to parse it
+	 */
+	while (len > 2) {
+		ret = irda_param_extract(self, buf+n, len, info);
+		if (ret < 0)
+			return ret;
+
+		n += ret;
+		len -= ret;
+	}
+	return n;
+}
+EXPORT_SYMBOL(irda_param_extract_all);
diff --git a/net/irda/qos.c b/net/irda/qos.c
new file mode 100644
index 00000000000..df732d56cc5
--- /dev/null
+++ b/net/irda/qos.c
@@ -0,0 +1,774 @@
+/*********************************************************************
+ *                                
+ * Filename:      qos.c
+ * Version:       1.0
+ * Description:   IrLAP QoS parameter negotiation
+ * Status:        Stable
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Tue Sep  9 00:00:26 1997
+ * Modified at:   Sun Jan 30 14:29:16 2000
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1998-2000 Dag Brattli <dagb@cs.uit.no>, 
+ *     All Rights Reserved.
+ *     Copyright (c) 2000-2001 Jean Tourrilhes <jt@hpl.hp.com>
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ * 
+ *     This program is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *     GNU General Public License for more details.
+ * 
+ *     You should have received a copy of the GNU General Public License 
+ *     along with this program; if not, write to the Free Software 
+ *     Foundation, Inc., 59 Temple Place, Suite 330, Boston, 
+ *     MA 02111-1307 USA
+ *     
+ ********************************************************************/
+
+#include <linux/config.h>
+#include <asm/byteorder.h>
+
+#include <net/irda/irda.h>
+#include <net/irda/parameters.h>
+#include <net/irda/qos.h>
+#include <net/irda/irlap.h>
+
+/*
+ * Maximum values of the baud rate we negociate with the other end.
+ * Most often, you don't have to change that, because Linux-IrDA will
+ * use the maximum offered by the link layer, which usually works fine.
+ * In some very rare cases, you may want to limit it to lower speeds...
+ */
+int sysctl_max_baud_rate = 16000000;
+/*
+ * Maximum value of the lap disconnect timer we negociate with the other end.
+ * Most often, the value below represent the best compromise, but some user
+ * may want to keep the LAP alive longuer or shorter in case of link failure.
+ * Remember that the threshold time (early warning) is fixed to 3s...
+ */
+int sysctl_max_noreply_time = 12;
+/*
+ * Minimum turn time to be applied before transmitting to the peer.
+ * Nonzero values (usec) are used as lower limit to the per-connection
+ * mtt value which was announced by the other end during negotiation.
+ * Might be helpful if the peer device provides too short mtt.
+ * Default is 10us which means using the unmodified value given by the
+ * peer except if it's 0 (0 is likely a bug in the other stack).
+ */
+unsigned sysctl_min_tx_turn_time = 10;
+/*
+ * Maximum data size to be used in transmission in payload of LAP frame.
+ * There is a bit of confusion in the IrDA spec :
+ * The LAP spec defines the payload of a LAP frame (I field) to be
+ * 2048 bytes max (IrLAP 1.1, chapt 6.6.5, p40).
+ * On the other hand, the PHY mention frames of 2048 bytes max (IrPHY
+ * 1.2, chapt 5.3.2.1, p41). But, this number includes the LAP header
+ * (2 bytes), and CRC (32 bits at 4 Mb/s). So, for the I field (LAP
+ * payload), that's only 2042 bytes. Oups !
+ * My nsc-ircc hardware has troubles receiving 2048 bytes frames at 4 Mb/s,
+ * so adjust to 2042... I don't know if this bug applies only for 2048
+ * bytes frames or all negotiated frame sizes, but you can use the sysctl
+ * to play with this value anyway.
+ * Jean II */
+unsigned sysctl_max_tx_data_size = 2042;
+/*
+ * Maximum transmit window, i.e. number of LAP frames between turn-around.
+ * This allow to override what the peer told us. Some peers are buggy and
+ * don't always support what they tell us.
+ * Jean II */
+unsigned sysctl_max_tx_window = 7;
+
+static int irlap_param_baud_rate(void *instance, irda_param_t *param, int get);
+static int irlap_param_link_disconnect(void *instance, irda_param_t *parm, 
+				       int get);
+static int irlap_param_max_turn_time(void *instance, irda_param_t *param, 
+				     int get);
+static int irlap_param_data_size(void *instance, irda_param_t *param, int get);
+static int irlap_param_window_size(void *instance, irda_param_t *param, 
+				   int get);
+static int irlap_param_additional_bofs(void *instance, irda_param_t *parm, 
+				       int get);
+static int irlap_param_min_turn_time(void *instance, irda_param_t *param, 
+				     int get);
+
+#ifndef CONFIG_IRDA_DYNAMIC_WINDOW
+static __u32 irlap_requested_line_capacity(struct qos_info *qos);
+#endif
+
+static __u32 min_turn_times[]  = { 10000, 5000, 1000, 500, 100, 50, 10, 0 }; /* us */
+static __u32 baud_rates[]      = { 2400, 9600, 19200, 38400, 57600, 115200, 576000, 
+				   1152000, 4000000, 16000000 };           /* bps */
+static __u32 data_sizes[]      = { 64, 128, 256, 512, 1024, 2048 };        /* bytes */
+static __u32 add_bofs[]        = { 48, 24, 12, 5, 3, 2, 1, 0 };            /* bytes */
+static __u32 max_turn_times[]  = { 500, 250, 100, 50 };                    /* ms */
+static __u32 link_disc_times[] = { 3, 8, 12, 16, 20, 25, 30, 40 };         /* secs */
+
+static __u32 max_line_capacities[10][4] = {
+       /* 500 ms     250 ms  100 ms  50 ms (max turn time) */
+	{    100,      0,      0,     0 }, /*     2400 bps */
+	{    400,      0,      0,     0 }, /*     9600 bps */
+	{    800,      0,      0,     0 }, /*    19200 bps */
+	{   1600,      0,      0,     0 }, /*    38400 bps */
+	{   2360,      0,      0,     0 }, /*    57600 bps */
+	{   4800,   2400,    960,   480 }, /*   115200 bps */
+	{  28800,  11520,   5760,  2880 }, /*   576000 bps */
+	{  57600,  28800,  11520,  5760 }, /*  1152000 bps */
+	{ 200000, 100000,  40000, 20000 }, /*  4000000 bps */
+	{ 800000, 400000, 160000, 80000 }, /* 16000000 bps */
+};
+
+static pi_minor_info_t pi_minor_call_table_type_0[] = {
+	{ NULL, 0 },
+/* 01 */{ irlap_param_baud_rate,       PV_INTEGER | PV_LITTLE_ENDIAN },
+	{ NULL, 0 },
+	{ NULL, 0 },
+	{ NULL, 0 },
+	{ NULL, 0 },
+	{ NULL, 0 },
+	{ NULL, 0 },
+/* 08 */{ irlap_param_link_disconnect, PV_INT_8_BITS }
+};
+
+static pi_minor_info_t pi_minor_call_table_type_1[] = {
+	{ NULL, 0 },
+	{ NULL, 0 },
+/* 82 */{ irlap_param_max_turn_time,   PV_INT_8_BITS },
+/* 83 */{ irlap_param_data_size,       PV_INT_8_BITS },
+/* 84 */{ irlap_param_window_size,     PV_INT_8_BITS },
+/* 85 */{ irlap_param_additional_bofs, PV_INT_8_BITS },
+/* 86 */{ irlap_param_min_turn_time,   PV_INT_8_BITS },
+};
+
+static pi_major_info_t pi_major_call_table[] = {
+	{ pi_minor_call_table_type_0, 9 },
+	{ pi_minor_call_table_type_1, 7 },
+};
+
+static pi_param_info_t irlap_param_info = { pi_major_call_table, 2, 0x7f, 7 };
+
+/* ---------------------- LOCAL SUBROUTINES ---------------------- */
+/* Note : we start with a bunch of local subroutines.
+ * As the compiler is "one pass", this is the only way to get them to
+ * inline properly...
+ * Jean II
+ */
+/*
+ * Function value_index (value, array, size)
+ *
+ *    Returns the index to the value in the specified array
+ */
+static inline int value_index(__u32 value, __u32 *array, int size)
+{
+	int i;
+	
+	for (i=0; i < size; i++)
+		if (array[i] == value)
+			break;
+	return i;
+}
+
+/*
+ * Function index_value (index, array)
+ *
+ *    Returns value to index in array, easy!
+ *
+ */
+static inline __u32 index_value(int index, __u32 *array) 
+{
+	return array[index];
+}
+
+/*
+ * Function msb_index (word)
+ *
+ *    Returns index to most significant bit (MSB) in word
+ *
+ */
+static int msb_index (__u16 word) 
+{
+	__u16 msb = 0x8000;
+	int index = 15;   /* Current MSB */
+
+	/* Check for buggy peers.
+	 * Note : there is a small probability that it could be us, but I
+	 * would expect driver authors to catch that pretty early and be
+	 * able to check precisely what's going on. If a end user sees this,
+	 * it's very likely the peer. - Jean II */
+	if (word == 0) {
+		IRDA_WARNING("%s(), Detected buggy peer, adjust null PV to 0x1!\n",
+			 __FUNCTION__);
+		/* The only safe choice (we don't know the array size) */
+		word = 0x1;
+	}
+
+	while (msb) {
+		if (word & msb)
+			break;   /* Found it! */
+		msb >>=1;
+		index--;
+	}
+	return index;
+}
+
+/*
+ * Function value_lower_bits (value, array)
+ *
+ *    Returns a bit field marking all possibility lower than value.
+ */
+static inline int value_lower_bits(__u32 value, __u32 *array, int size, __u16 *field)
+{
+	int	i;
+	__u16	mask = 0x1;
+	__u16	result = 0x0;
+
+	for (i=0; i < size; i++) {
+		/* Add the current value to the bit field, shift mask */
+		result |= mask;
+		mask <<= 1;
+		/* Finished ? */
+		if (array[i] >= value)
+			break;
+	}
+	/* Send back a valid index */
+	if(i >= size)
+	  i = size - 1;	/* Last item */
+	*field = result;
+	return i;
+}
+
+/*
+ * Function value_highest_bit (value, array)
+ *
+ *    Returns a bit field marking the highest possibility lower than value.
+ */
+static inline int value_highest_bit(__u32 value, __u32 *array, int size, __u16 *field)
+{
+	int	i;
+	__u16	mask = 0x1;
+	__u16	result = 0x0;
+
+	for (i=0; i < size; i++) {
+		/* Finished ? */
+		if (array[i] <= value)
+			break;
+		/* Shift mask */
+		mask <<= 1;
+	}
+	/* Set the current value to the bit field */
+	result |= mask;
+	/* Send back a valid index */
+	if(i >= size)
+	  i = size - 1;	/* Last item */
+	*field = result;
+	return i;
+}
+
+/* -------------------------- MAIN CALLS -------------------------- */
+
+/*
+ * Function irda_qos_compute_intersection (qos, new)
+ *
+ *    Compute the intersection of the old QoS capabilities with new ones
+ *
+ */
+void irda_qos_compute_intersection(struct qos_info *qos, struct qos_info *new)
+{
+	IRDA_ASSERT(qos != NULL, return;);
+	IRDA_ASSERT(new != NULL, return;);
+
+	/* Apply */
+	qos->baud_rate.bits       &= new->baud_rate.bits;
+	qos->window_size.bits     &= new->window_size.bits;
+	qos->min_turn_time.bits   &= new->min_turn_time.bits;
+	qos->max_turn_time.bits   &= new->max_turn_time.bits;
+	qos->data_size.bits       &= new->data_size.bits;
+	qos->link_disc_time.bits  &= new->link_disc_time.bits;
+	qos->additional_bofs.bits &= new->additional_bofs.bits;
+
+	irda_qos_bits_to_value(qos);
+}
+
+/*
+ * Function irda_init_max_qos_capabilies (qos)
+ *
+ *    The purpose of this function is for layers and drivers to be able to
+ *    set the maximum QoS possible and then "and in" their own limitations
+ * 
+ */
+void irda_init_max_qos_capabilies(struct qos_info *qos)
+{
+	int i;
+	/* 
+	 *  These are the maximum supported values as specified on pages
+	 *  39-43 in IrLAP
+	 */
+
+	/* Use sysctl to set some configurable values... */
+	/* Set configured max speed */
+	i = value_lower_bits(sysctl_max_baud_rate, baud_rates, 10,
+			     &qos->baud_rate.bits);
+	sysctl_max_baud_rate = index_value(i, baud_rates);
+
+	/* Set configured max disc time */
+	i = value_lower_bits(sysctl_max_noreply_time, link_disc_times, 8,
+			     &qos->link_disc_time.bits);
+	sysctl_max_noreply_time = index_value(i, link_disc_times);
+
+	/* LSB is first byte, MSB is second byte */
+	qos->baud_rate.bits    &= 0x03ff;
+
+	qos->window_size.bits     = 0x7f;
+	qos->min_turn_time.bits   = 0xff;
+	qos->max_turn_time.bits   = 0x0f;
+	qos->data_size.bits       = 0x3f;
+	qos->link_disc_time.bits &= 0xff;
+	qos->additional_bofs.bits = 0xff;
+}
+EXPORT_SYMBOL(irda_init_max_qos_capabilies);
+
+/*
+ * Function irlap_adjust_qos_settings (qos)
+ *
+ *     Adjust QoS settings in case some values are not possible to use because
+ *     of other settings
+ */
+static void irlap_adjust_qos_settings(struct qos_info *qos)
+{
+	__u32 line_capacity;
+	int index;
+
+	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
+
+	/*
+	 * Make sure the mintt is sensible.
+	 * Main culprit : Ericsson T39. - Jean II
+	 */
+	if (sysctl_min_tx_turn_time > qos->min_turn_time.value) {
+		int i;
+
+		IRDA_WARNING("%s(), Detected buggy peer, adjust mtt to %dus!\n",
+			 __FUNCTION__, sysctl_min_tx_turn_time);
+
+		/* We don't really need bits, but easier this way */
+		i = value_highest_bit(sysctl_min_tx_turn_time, min_turn_times,
+				      8, &qos->min_turn_time.bits);
+		sysctl_min_tx_turn_time = index_value(i, min_turn_times);
+		qos->min_turn_time.value = sysctl_min_tx_turn_time;
+	}
+
+	/* 
+	 * Not allowed to use a max turn time less than 500 ms if the baudrate
+	 * is less than 115200
+	 */
+	if ((qos->baud_rate.value < 115200) && 
+	    (qos->max_turn_time.value < 500))
+	{
+		IRDA_DEBUG(0, 
+			   "%s(), adjusting max turn time from %d to 500 ms\n",
+			   __FUNCTION__, qos->max_turn_time.value);
+		qos->max_turn_time.value = 500;
+	}
+	
+	/*
+	 * The data size must be adjusted according to the baud rate and max 
+	 * turn time
+	 */
+	index = value_index(qos->data_size.value, data_sizes, 6);
+	line_capacity = irlap_max_line_capacity(qos->baud_rate.value, 
+						qos->max_turn_time.value);
+
+#ifdef CONFIG_IRDA_DYNAMIC_WINDOW
+	while ((qos->data_size.value > line_capacity) && (index > 0)) {
+		qos->data_size.value = data_sizes[index--];
+		IRDA_DEBUG(2, "%s(), reducing data size to %d\n",
+			   __FUNCTION__, qos->data_size.value);
+	}
+#else /* Use method described in section 6.6.11 of IrLAP */
+	while (irlap_requested_line_capacity(qos) > line_capacity) {
+		IRDA_ASSERT(index != 0, return;);
+
+		/* Must be able to send at least one frame */
+		if (qos->window_size.value > 1) {
+			qos->window_size.value--;
+			IRDA_DEBUG(2, "%s(), reducing window size to %d\n",
+				   __FUNCTION__, qos->window_size.value);
+		} else if (index > 1) {
+			qos->data_size.value = data_sizes[index--];
+			IRDA_DEBUG(2, "%s(), reducing data size to %d\n",
+				   __FUNCTION__, qos->data_size.value);
+		} else {
+			IRDA_WARNING("%s(), nothing more we can do!\n",
+				     __FUNCTION__);
+		}
+	}
+#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */
+	/*
+	 * Fix tx data size according to user limits - Jean II
+	 */
+	if (qos->data_size.value > sysctl_max_tx_data_size)
+		/* Allow non discrete adjustement to avoid loosing capacity */
+		qos->data_size.value = sysctl_max_tx_data_size;
+	/*
+	 * Override Tx window if user request it. - Jean II
+	 */
+	if (qos->window_size.value > sysctl_max_tx_window)
+		qos->window_size.value = sysctl_max_tx_window;
+}
+
+/*
+ * Function irlap_negotiate (qos_device, qos_session, skb)
+ *
+ *    Negotiate QoS values, not really that much negotiation :-)
+ *    We just set the QoS capabilities for the peer station
+ *
+ */
+int irlap_qos_negotiate(struct irlap_cb *self, struct sk_buff *skb) 
+{
+	int ret;
+	
+	ret = irda_param_extract_all(self, skb->data, skb->len, 
+				     &irlap_param_info);
+	
+	/* Convert the negotiated bits to values */
+	irda_qos_bits_to_value(&self->qos_tx);
+	irda_qos_bits_to_value(&self->qos_rx);
+
+	irlap_adjust_qos_settings(&self->qos_tx);
+
+	IRDA_DEBUG(2, "Setting BAUD_RATE to %d bps.\n", 
+		   self->qos_tx.baud_rate.value);
+	IRDA_DEBUG(2, "Setting DATA_SIZE to %d bytes\n",
+		   self->qos_tx.data_size.value);
+	IRDA_DEBUG(2, "Setting WINDOW_SIZE to %d\n", 
+		   self->qos_tx.window_size.value);
+	IRDA_DEBUG(2, "Setting XBOFS to %d\n", 
+		   self->qos_tx.additional_bofs.value);
+	IRDA_DEBUG(2, "Setting MAX_TURN_TIME to %d ms.\n",
+		   self->qos_tx.max_turn_time.value);
+	IRDA_DEBUG(2, "Setting MIN_TURN_TIME to %d usecs.\n",
+		   self->qos_tx.min_turn_time.value);
+	IRDA_DEBUG(2, "Setting LINK_DISC to %d secs.\n", 
+		   self->qos_tx.link_disc_time.value);
+	return ret;
+}
+
+/*
+ * Function irlap_insert_negotiation_params (qos, fp)
+ *
+ *    Insert QoS negotiaion pararameters into frame
+ *
+ */
+int irlap_insert_qos_negotiation_params(struct irlap_cb *self, 
+					struct sk_buff *skb)
+{
+	int ret;
+
+	/* Insert data rate */
+	ret = irda_param_insert(self, PI_BAUD_RATE, skb->tail, 
+				skb_tailroom(skb), &irlap_param_info);
+	if (ret < 0)
+		return ret;
+	skb_put(skb, ret);
+
+	/* Insert max turnaround time */
+	ret = irda_param_insert(self, PI_MAX_TURN_TIME, skb->tail, 
+				skb_tailroom(skb), &irlap_param_info);
+	if (ret < 0)
+		return ret;
+	skb_put(skb, ret);
+
+	/* Insert data size */
+	ret = irda_param_insert(self, PI_DATA_SIZE, skb->tail, 
+				skb_tailroom(skb), &irlap_param_info);
+	if (ret < 0)
+		return ret;
+	skb_put(skb, ret);
+
+	/* Insert window size */
+	ret = irda_param_insert(self, PI_WINDOW_SIZE, skb->tail, 
+				skb_tailroom(skb), &irlap_param_info);
+	if (ret < 0)
+		return ret;
+	skb_put(skb, ret);
+
+	/* Insert additional BOFs */
+	ret = irda_param_insert(self, PI_ADD_BOFS, skb->tail, 
+				skb_tailroom(skb), &irlap_param_info);
+	if (ret < 0)
+		return ret;
+	skb_put(skb, ret);
+
+	/* Insert minimum turnaround time */
+	ret = irda_param_insert(self, PI_MIN_TURN_TIME, skb->tail, 
+				skb_tailroom(skb), &irlap_param_info);
+	if (ret < 0)
+		return ret;
+	skb_put(skb, ret);
+
+	/* Insert link disconnect/threshold time */
+	ret = irda_param_insert(self, PI_LINK_DISC, skb->tail, 
+				skb_tailroom(skb), &irlap_param_info);
+	if (ret < 0)
+		return ret;
+	skb_put(skb, ret);
+
+	return 0;
+}
+
+/*
+ * Function irlap_param_baud_rate (instance, param, get)
+ *
+ *    Negotiate data-rate
+ *
+ */
+static int irlap_param_baud_rate(void *instance, irda_param_t *param, int get)
+{
+	__u16 final;
+
+	struct irlap_cb *self = (struct irlap_cb *) instance;
+
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
+
+	if (get) {
+		param->pv.i = self->qos_rx.baud_rate.bits;
+		IRDA_DEBUG(2, "%s(), baud rate = 0x%02x\n", 
+			   __FUNCTION__, param->pv.i);		
+	} else {
+		/* 
+		 *  Stations must agree on baud rate, so calculate
+		 *  intersection 
+		 */
+		IRDA_DEBUG(2, "Requested BAUD_RATE: 0x%04x\n", (__u16) param->pv.i);
+		final = (__u16) param->pv.i & self->qos_rx.baud_rate.bits;
+
+		IRDA_DEBUG(2, "Final BAUD_RATE: 0x%04x\n", final);
+		self->qos_tx.baud_rate.bits = final;
+		self->qos_rx.baud_rate.bits = final;
+	}
+
+	return 0;
+}
+
+/*
+ * Function irlap_param_link_disconnect (instance, param, get)
+ *
+ *    Negotiate link disconnect/threshold time. 
+ *
+ */
+static int irlap_param_link_disconnect(void *instance, irda_param_t *param, 
+				       int get)
+{
+	__u16 final;
+	
+	struct irlap_cb *self = (struct irlap_cb *) instance;
+	
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
+	
+	if (get)
+		param->pv.i = self->qos_rx.link_disc_time.bits;
+	else {
+		/*  
+		 *  Stations must agree on link disconnect/threshold 
+		 *  time.
+		 */
+		IRDA_DEBUG(2, "LINK_DISC: %02x\n", (__u8) param->pv.i);
+		final = (__u8) param->pv.i & self->qos_rx.link_disc_time.bits;
+
+		IRDA_DEBUG(2, "Final LINK_DISC: %02x\n", final);
+		self->qos_tx.link_disc_time.bits = final;
+		self->qos_rx.link_disc_time.bits = final;
+	}
+	return 0;
+}
+
+/*
+ * Function irlap_param_max_turn_time (instance, param, get)
+ *
+ *    Negotiate the maximum turnaround time. This is a type 1 parameter and
+ *    will be negotiated independently for each station
+ *
+ */
+static int irlap_param_max_turn_time(void *instance, irda_param_t *param, 
+				     int get)
+{
+	struct irlap_cb *self = (struct irlap_cb *) instance;
+	
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
+	
+	if (get)
+		param->pv.i = self->qos_rx.max_turn_time.bits;
+	else
+		self->qos_tx.max_turn_time.bits = (__u8) param->pv.i;
+
+	return 0;
+}
+
+/*
+ * Function irlap_param_data_size (instance, param, get)
+ *
+ *    Negotiate the data size. This is a type 1 parameter and
+ *    will be negotiated independently for each station
+ *
+ */
+static int irlap_param_data_size(void *instance, irda_param_t *param, int get)
+{
+	struct irlap_cb *self = (struct irlap_cb *) instance;
+	
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
+	
+	if (get)
+		param->pv.i = self->qos_rx.data_size.bits;
+	else
+		self->qos_tx.data_size.bits = (__u8) param->pv.i;
+
+	return 0;
+}
+
+/*
+ * Function irlap_param_window_size (instance, param, get)
+ *
+ *    Negotiate the window size. This is a type 1 parameter and
+ *    will be negotiated independently for each station
+ *
+ */
+static int irlap_param_window_size(void *instance, irda_param_t *param, 
+				   int get)
+{
+	struct irlap_cb *self = (struct irlap_cb *) instance;
+	
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
+	
+	if (get)
+		param->pv.i = self->qos_rx.window_size.bits;
+	else
+		self->qos_tx.window_size.bits = (__u8) param->pv.i;
+
+	return 0;
+}
+
+/*
+ * Function irlap_param_additional_bofs (instance, param, get)
+ *
+ *    Negotiate additional BOF characters. This is a type 1 parameter and
+ *    will be negotiated independently for each station.
+ */
+static int irlap_param_additional_bofs(void *instance, irda_param_t *param, int get)
+{
+	struct irlap_cb *self = (struct irlap_cb *) instance;
+	
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
+	
+	if (get)
+		param->pv.i = self->qos_rx.additional_bofs.bits;
+	else
+		self->qos_tx.additional_bofs.bits = (__u8) param->pv.i;
+
+	return 0;
+}
+
+/*
+ * Function irlap_param_min_turn_time (instance, param, get)
+ *
+ *    Negotiate the minimum turn around time. This is a type 1 parameter and
+ *    will be negotiated independently for each station
+ */
+static int irlap_param_min_turn_time(void *instance, irda_param_t *param, 
+				     int get)
+{
+	struct irlap_cb *self = (struct irlap_cb *) instance;
+	
+	IRDA_ASSERT(self != NULL, return -1;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
+	
+	if (get)
+		param->pv.i = self->qos_rx.min_turn_time.bits;
+	else
+		self->qos_tx.min_turn_time.bits = (__u8) param->pv.i;
+
+	return 0;
+}
+
+/*
+ * Function irlap_max_line_capacity (speed, max_turn_time, min_turn_time)
+ *
+ *    Calculate the maximum line capacity
+ *
+ */
+__u32 irlap_max_line_capacity(__u32 speed, __u32 max_turn_time)
+{
+	__u32 line_capacity;
+	int i,j;
+
+	IRDA_DEBUG(2, "%s(), speed=%d, max_turn_time=%d\n",
+		   __FUNCTION__, speed, max_turn_time);
+
+	i = value_index(speed, baud_rates, 10);
+	j = value_index(max_turn_time, max_turn_times, 4);
+
+	IRDA_ASSERT(((i >=0) && (i <10)), return 0;);
+	IRDA_ASSERT(((j >=0) && (j <4)), return 0;);
+
+	line_capacity = max_line_capacities[i][j];
+
+	IRDA_DEBUG(2, "%s(), line capacity=%d bytes\n", 
+		   __FUNCTION__, line_capacity);
+	
+	return line_capacity;
+}
+
+#ifndef CONFIG_IRDA_DYNAMIC_WINDOW
+static __u32 irlap_requested_line_capacity(struct qos_info *qos)
+{
+	__u32 line_capacity;
+
+	line_capacity = qos->window_size.value *
+		(qos->data_size.value + 6 + qos->additional_bofs.value) +
+		irlap_min_turn_time_in_bytes(qos->baud_rate.value,
+					     qos->min_turn_time.value);
+
+	IRDA_DEBUG(2, "%s(), requested line capacity=%d\n",
+		   __FUNCTION__, line_capacity);
+
+	return line_capacity;
+}
+#endif
+
+void irda_qos_bits_to_value(struct qos_info *qos)
+{
+	int index;
+
+	IRDA_ASSERT(qos != NULL, return;);
+	
+	index = msb_index(qos->baud_rate.bits);
+	qos->baud_rate.value = baud_rates[index];
+
+	index = msb_index(qos->data_size.bits);
+	qos->data_size.value = data_sizes[index];
+
+	index = msb_index(qos->window_size.bits);
+	qos->window_size.value = index+1;
+
+	index = msb_index(qos->min_turn_time.bits);
+	qos->min_turn_time.value = min_turn_times[index];
+	
+	index = msb_index(qos->max_turn_time.bits);
+	qos->max_turn_time.value = max_turn_times[index];
+
+	index = msb_index(qos->link_disc_time.bits);
+	qos->link_disc_time.value = link_disc_times[index];
+	
+	index = msb_index(qos->additional_bofs.bits);
+	qos->additional_bofs.value = add_bofs[index];
+}
+EXPORT_SYMBOL(irda_qos_bits_to_value);
diff --git a/net/irda/timer.c b/net/irda/timer.c
new file mode 100644
index 00000000000..0e17f976add
--- /dev/null
+++ b/net/irda/timer.c
@@ -0,0 +1,233 @@
+/*********************************************************************
+ *                
+ * Filename:      timer.c
+ * Version:       
+ * Description:   
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Sat Aug 16 00:59:29 1997
+ * Modified at:   Wed Dec  8 12:50:34 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1997, 1999 Dag Brattli <dagb@cs.uit.no>, 
+ *     All Rights Reserved.
+ *     Copyright (c) 2000-2002 Jean Tourrilhes <jt@hpl.hp.com>
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ *
+ *     Neither Dag Brattli nor University of Troms� admit liability nor
+ *     provide warranty for any of this software. This material is 
+ *     provided "AS-IS" and at no charge.
+ *
+ ********************************************************************/
+
+#include <asm/system.h>
+#include <linux/config.h>
+#include <linux/delay.h>
+
+#include <net/irda/timer.h>
+#include <net/irda/irda.h>
+#include <net/irda/irda_device.h>
+#include <net/irda/irlap.h>
+#include <net/irda/irlmp.h>
+
+extern int  sysctl_slot_timeout;
+
+static void irlap_slot_timer_expired(void* data);
+static void irlap_query_timer_expired(void* data);
+static void irlap_final_timer_expired(void* data);
+static void irlap_wd_timer_expired(void* data);
+static void irlap_backoff_timer_expired(void* data);
+static void irlap_media_busy_expired(void* data); 
+
+void irlap_start_slot_timer(struct irlap_cb *self, int timeout)
+{
+	irda_start_timer(&self->slot_timer, timeout, (void *) self, 
+			 irlap_slot_timer_expired);
+}
+
+void irlap_start_query_timer(struct irlap_cb *self, int S, int s)
+{
+	int timeout;
+
+	/* Calculate when the peer discovery should end. Normally, we
+	 * get the end-of-discovery frame, so this is just in case
+	 * we miss it.
+	 * Basically, we multiply the number of remaining slots by our
+	 * slot time, plus add some extra time to properly receive the last
+	 * discovery packet (which is longer due to extra discovery info),
+	 * to avoid messing with for incomming connections requests and
+	 * to accomodate devices that perform discovery slower than us.
+	 * Jean II */
+	timeout = ((sysctl_slot_timeout * HZ / 1000) * (S - s)
+		   + XIDEXTRA_TIMEOUT + SMALLBUSY_TIMEOUT);
+
+	/* Set or re-set the timer. We reset the timer for each received
+	 * discovery query, which allow us to automatically adjust to
+	 * the speed of the peer discovery (faster or slower). Jean II */
+	irda_start_timer( &self->query_timer, timeout, (void *) self, 
+			  irlap_query_timer_expired);
+}
+
+void irlap_start_final_timer(struct irlap_cb *self, int timeout)
+{
+	irda_start_timer(&self->final_timer, timeout, (void *) self, 
+			 irlap_final_timer_expired);
+}
+
+void irlap_start_wd_timer(struct irlap_cb *self, int timeout)
+{
+	irda_start_timer(&self->wd_timer, timeout, (void *) self, 
+			 irlap_wd_timer_expired);
+}
+
+void irlap_start_backoff_timer(struct irlap_cb *self, int timeout)
+{
+	irda_start_timer(&self->backoff_timer, timeout, (void *) self, 
+			 irlap_backoff_timer_expired);
+}
+
+void irlap_start_mbusy_timer(struct irlap_cb *self, int timeout)
+{
+	irda_start_timer(&self->media_busy_timer, timeout, 
+			 (void *) self, irlap_media_busy_expired);
+}
+
+void irlap_stop_mbusy_timer(struct irlap_cb *self)
+{
+	/* If timer is activated, kill it! */
+	del_timer(&self->media_busy_timer);
+
+	/* If we are in NDM, there is a bunch of events in LAP that
+	 * that be pending due to the media_busy condition, such as
+	 * CONNECT_REQUEST and SEND_UI_FRAME. If we don't generate
+	 * an event, they will wait forever...
+	 * Jean II */
+	if (self->state == LAP_NDM)
+		irlap_do_event(self, MEDIA_BUSY_TIMER_EXPIRED, NULL, NULL);
+}
+
+void irlmp_start_watchdog_timer(struct lsap_cb *self, int timeout) 
+{
+	irda_start_timer(&self->watchdog_timer, timeout, (void *) self,
+			 irlmp_watchdog_timer_expired);
+}
+
+void irlmp_start_discovery_timer(struct irlmp_cb *self, int timeout) 
+{
+	irda_start_timer(&self->discovery_timer, timeout, (void *) self,
+			 irlmp_discovery_timer_expired);
+}
+
+void irlmp_start_idle_timer(struct lap_cb *self, int timeout) 
+{
+	irda_start_timer(&self->idle_timer, timeout, (void *) self,
+			 irlmp_idle_timer_expired);
+}
+
+void irlmp_stop_idle_timer(struct lap_cb *self) 
+{
+	/* If timer is activated, kill it! */
+	del_timer(&self->idle_timer);
+}
+
+/*
+ * Function irlap_slot_timer_expired (data)
+ *
+ *    IrLAP slot timer has expired
+ *
+ */
+static void irlap_slot_timer_expired(void *data)
+{
+	struct irlap_cb *self = (struct irlap_cb *) data;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+
+	irlap_do_event(self, SLOT_TIMER_EXPIRED, NULL, NULL);
+} 
+
+/*
+ * Function irlap_query_timer_expired (data)
+ *
+ *    IrLAP query timer has expired
+ *
+ */
+static void irlap_query_timer_expired(void *data)
+{
+	struct irlap_cb *self = (struct irlap_cb *) data;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+
+	irlap_do_event(self, QUERY_TIMER_EXPIRED, NULL, NULL);
+} 
+
+/*
+ * Function irda_final_timer_expired (data)
+ *
+ *    
+ *
+ */
+static void irlap_final_timer_expired(void *data)
+{
+	struct irlap_cb *self = (struct irlap_cb *) data;
+
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+
+	irlap_do_event(self, FINAL_TIMER_EXPIRED, NULL, NULL);
+}
+
+/*
+ * Function irda_wd_timer_expired (data)
+ *
+ *    
+ *
+ */
+static void irlap_wd_timer_expired(void *data)
+{
+	struct irlap_cb *self = (struct irlap_cb *) data;
+	
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+	
+	irlap_do_event(self, WD_TIMER_EXPIRED, NULL, NULL);
+}
+
+/*
+ * Function irda_backoff_timer_expired (data)
+ *
+ *    
+ *
+ */
+static void irlap_backoff_timer_expired(void *data)
+{
+	struct irlap_cb *self = (struct irlap_cb *) data;
+	
+	IRDA_ASSERT(self != NULL, return;);
+	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
+	
+	irlap_do_event(self, BACKOFF_TIMER_EXPIRED, NULL, NULL);
+}
+
+
+/*
+ * Function irtty_media_busy_expired (data)
+ *
+ *    
+ */
+void irlap_media_busy_expired(void* data)
+{
+	struct irlap_cb *self = (struct irlap_cb *) data;
+
+	IRDA_ASSERT(self != NULL, return;);
+
+	irda_device_set_media_busy(self->netdev, FALSE);
+	/* Note : the LAP event will be send in irlap_stop_mbusy_timer(),
+	* to catch other cases where the flag is cleared (for example
+	* after a discovery) - Jean II */
+}
diff --git a/net/irda/wrapper.c b/net/irda/wrapper.c
new file mode 100644
index 00000000000..87130c1c869
--- /dev/null
+++ b/net/irda/wrapper.c
@@ -0,0 +1,491 @@
+/*********************************************************************
+ *
+ * Filename:      wrapper.c
+ * Version:       1.2
+ * Description:   IrDA SIR async wrapper layer
+ * Status:        Stable
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Mon Aug  4 20:40:53 1997
+ * Modified at:   Fri Jan 28 13:21:09 2000
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * Modified at:   Fri May 28  3:11 CST 1999
+ * Modified by:   Horst von Brand <vonbrand@sleipnir.valparaiso.cl>
+ *
+ *     Copyright (c) 1998-2000 Dag Brattli <dagb@cs.uit.no>,
+ *     All Rights Reserved.
+ *     Copyright (c) 2000-2002 Jean Tourrilhes <jt@hpl.hp.com>
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of
+ *     the License, or (at your option) any later version.
+ *
+ *     Neither Dag Brattli nor University of Troms� admit liability nor
+ *     provide warranty for any of this software. This material is
+ *     provided "AS-IS" and at no charge.
+ *
+ ********************************************************************/
+
+#include <linux/skbuff.h>
+#include <linux/string.h>
+#include <linux/module.h>
+#include <asm/byteorder.h>
+
+#include <net/irda/irda.h>
+#include <net/irda/wrapper.h>
+#include <net/irda/crc.h>
+#include <net/irda/irlap.h>
+#include <net/irda/irlap_frame.h>
+#include <net/irda/irda_device.h>
+
+/************************** FRAME WRAPPING **************************/
+/*
+ * Unwrap and unstuff SIR frames
+ *
+ * Note : at FIR and MIR, HDLC framing is used and usually handled
+ * by the controller, so we come here only for SIR... Jean II
+ */
+
+/*
+ * Function stuff_byte (byte, buf)
+ *
+ *    Byte stuff one single byte and put the result in buffer pointed to by
+ *    buf. The buffer must at all times be able to have two bytes inserted.
+ *
+ * This is in a tight loop, better inline it, so need to be prior to callers.
+ * (2000 bytes on P6 200MHz, non-inlined ~370us, inline ~170us) - Jean II
+ */
+static inline int stuff_byte(__u8 byte, __u8 *buf)
+{
+	switch (byte) {
+	case BOF: /* FALLTHROUGH */
+	case EOF: /* FALLTHROUGH */
+	case CE:
+		/* Insert transparently coded */
+		buf[0] = CE;               /* Send link escape */
+		buf[1] = byte^IRDA_TRANS;    /* Complement bit 5 */
+		return 2;
+		/* break; */
+	default:
+		 /* Non-special value, no transparency required */
+		buf[0] = byte;
+		return 1;
+		/* break; */
+	}
+}
+
+/*
+ * Function async_wrap (skb, *tx_buff, buffsize)
+ *
+ *    Makes a new buffer with wrapping and stuffing, should check that
+ *    we don't get tx buffer overflow.
+ */
+int async_wrap_skb(struct sk_buff *skb, __u8 *tx_buff, int buffsize)
+{
+	struct irda_skb_cb *cb = (struct irda_skb_cb *) skb->cb;
+	int xbofs;
+	int i;
+	int n;
+	union {
+		__u16 value;
+		__u8 bytes[2];
+	} fcs;
+
+	/* Initialize variables */
+	fcs.value = INIT_FCS;
+	n = 0;
+
+	/*
+	 *  Send  XBOF's for required min. turn time and for the negotiated
+	 *  additional XBOFS
+	 */
+
+	if (cb->magic != LAP_MAGIC) {
+		/*
+		 * This will happen for all frames sent from user-space.
+		 * Nothing to worry about, but we set the default number of
+		 * BOF's
+		 */
+		IRDA_DEBUG(1, "%s(), wrong magic in skb!\n", __FUNCTION__);
+		xbofs = 10;
+	} else
+		xbofs = cb->xbofs + cb->xbofs_delay;
+
+	IRDA_DEBUG(4, "%s(), xbofs=%d\n", __FUNCTION__, xbofs);
+
+	/* Check that we never use more than 115 + 48 xbofs */
+	if (xbofs > 163) {
+		IRDA_DEBUG(0, "%s(), too many xbofs (%d)\n", __FUNCTION__,
+			   xbofs);
+		xbofs = 163;
+	}
+
+	memset(tx_buff + n, XBOF, xbofs);
+	n += xbofs;
+
+	/* Start of packet character BOF */
+	tx_buff[n++] = BOF;
+
+	/* Insert frame and calc CRC */
+	for (i=0; i < skb->len; i++) {
+		/*
+		 *  Check for the possibility of tx buffer overflow. We use
+		 *  bufsize-5 since the maximum number of bytes that can be
+		 *  transmitted after this point is 5.
+		 */
+		if(n >= (buffsize-5)) {
+			IRDA_ERROR("%s(), tx buffer overflow (n=%d)\n",
+				   __FUNCTION__, n);
+			return n;
+		}
+
+		n += stuff_byte(skb->data[i], tx_buff+n);
+		fcs.value = irda_fcs(fcs.value, skb->data[i]);
+	}
+
+	/* Insert CRC in little endian format (LSB first) */
+	fcs.value = ~fcs.value;
+#ifdef __LITTLE_ENDIAN
+	n += stuff_byte(fcs.bytes[0], tx_buff+n);
+	n += stuff_byte(fcs.bytes[1], tx_buff+n);
+#else /* ifdef __BIG_ENDIAN */
+	n += stuff_byte(fcs.bytes[1], tx_buff+n);
+	n += stuff_byte(fcs.bytes[0], tx_buff+n);
+#endif
+	tx_buff[n++] = EOF;
+
+	return n;
+}
+EXPORT_SYMBOL(async_wrap_skb);
+
+/************************* FRAME UNWRAPPING *************************/
+/*
+ * Unwrap and unstuff SIR frames
+ *
+ * Complete rewrite by Jean II :
+ * More inline, faster, more compact, more logical. Jean II
+ * (16 bytes on P6 200MHz, old 5 to 7 us, new 4 to 6 us)
+ * (24 bytes on P6 200MHz, old 9 to 10 us, new 7 to 8 us)
+ * (for reference, 115200 b/s is 1 byte every 69 us)
+ * And reduce wrapper.o by ~900B in the process ;-)
+ *
+ * Then, we have the addition of ZeroCopy, which is optional
+ * (i.e. the driver must initiate it) and improve final processing.
+ * (2005 B frame + EOF on P6 200MHz, without 30 to 50 us, with 10 to 25 us)
+ *
+ * Note : at FIR and MIR, HDLC framing is used and usually handled
+ * by the controller, so we come here only for SIR... Jean II
+ */
+
+/*
+ * We can also choose where we want to do the CRC calculation. We can
+ * do it "inline", as we receive the bytes, or "postponed", when
+ * receiving the End-Of-Frame.
+ * (16 bytes on P6 200MHz, inlined 4 to 6 us, postponed 4 to 5 us)
+ * (24 bytes on P6 200MHz, inlined 7 to 8 us, postponed 5 to 7 us)
+ * With ZeroCopy :
+ * (2005 B frame on P6 200MHz, inlined 10 to 25 us, postponed 140 to 180 us)
+ * Without ZeroCopy :
+ * (2005 B frame on P6 200MHz, inlined 30 to 50 us, postponed 150 to 180 us)
+ * (Note : numbers taken with irq disabled)
+ *
+ * From those numbers, it's not clear which is the best strategy, because
+ * we end up running through a lot of data one way or another (i.e. cache
+ * misses). I personally prefer to avoid the huge latency spike of the
+ * "postponed" solution, because it come just at the time when we have
+ * lot's of protocol processing to do and it will hurt our ability to
+ * reach low link turnaround times... Jean II
+ */
+//#define POSTPONE_RX_CRC
+
+/*
+ * Function async_bump (buf, len, stats)
+ *
+ *    Got a frame, make a copy of it, and pass it up the stack! We can try
+ *    to inline it since it's only called from state_inside_frame
+ */
+static inline void
+async_bump(struct net_device *dev,
+	   struct net_device_stats *stats,
+	   iobuff_t *rx_buff)
+{
+	struct sk_buff *newskb;
+	struct sk_buff *dataskb;
+	int		docopy;
+
+	/* Check if we need to copy the data to a new skb or not.
+	 * If the driver doesn't use ZeroCopy Rx, we have to do it.
+	 * With ZeroCopy Rx, the rx_buff already point to a valid
+	 * skb. But, if the frame is small, it is more efficient to
+	 * copy it to save memory (copy will be fast anyway - that's
+	 * called Rx-copy-break). Jean II */
+	docopy = ((rx_buff->skb == NULL) ||
+		  (rx_buff->len < IRDA_RX_COPY_THRESHOLD));
+
+	/* Allocate a new skb */
+	newskb = dev_alloc_skb(docopy ? rx_buff->len + 1 : rx_buff->truesize);
+	if (!newskb)  {
+		stats->rx_dropped++;
+		/* We could deliver the current skb if doing ZeroCopy Rx,
+		 * but this would stall the Rx path. Better drop the
+		 * packet... Jean II */
+		return;
+	}
+
+	/* Align IP header to 20 bytes (i.e. increase skb->data)
+	 * Note this is only useful with IrLAN, as PPP has a variable
+	 * header size (2 or 1 bytes) - Jean II */
+	skb_reserve(newskb, 1);
+
+	if(docopy) {
+		/* Copy data without CRC (lenght already checked) */
+		memcpy(newskb->data, rx_buff->data, rx_buff->len - 2);
+		/* Deliver this skb */
+		dataskb = newskb;
+	} else {
+		/* We are using ZeroCopy. Deliver old skb */
+		dataskb = rx_buff->skb;
+		/* And hook the new skb to the rx_buff */
+		rx_buff->skb = newskb;
+		rx_buff->head = newskb->data;	/* NOT newskb->head */
+		//printk(KERN_DEBUG "ZeroCopy : len = %d, dataskb = %p, newskb = %p\n", rx_buff->len, dataskb, newskb);
+	}
+
+	/* Set proper length on skb (without CRC) */
+	skb_put(dataskb, rx_buff->len - 2);
+
+	/* Feed it to IrLAP layer */
+	dataskb->dev = dev;
+	dataskb->mac.raw  = dataskb->data;
+	dataskb->protocol = htons(ETH_P_IRDA);
+
+	netif_rx(dataskb);
+
+	stats->rx_packets++;
+	stats->rx_bytes += rx_buff->len;
+
+	/* Clean up rx_buff (redundant with async_unwrap_bof() ???) */
+	rx_buff->data = rx_buff->head;
+	rx_buff->len = 0;
+}
+
+/*
+ * Function async_unwrap_bof(dev, byte)
+ *
+ *    Handle Beginning Of Frame character received within a frame
+ *
+ */
+static inline void
+async_unwrap_bof(struct net_device *dev,
+		 struct net_device_stats *stats,
+		 iobuff_t *rx_buff, __u8 byte)
+{
+	switch(rx_buff->state) {
+	case LINK_ESCAPE:
+	case INSIDE_FRAME:
+		/* Not supposed to happen, the previous frame is not
+		 * finished - Jean II */
+		IRDA_DEBUG(1, "%s(), Discarding incomplete frame\n",
+			   __FUNCTION__);
+		stats->rx_errors++;
+		stats->rx_missed_errors++;
+		irda_device_set_media_busy(dev, TRUE);
+		break;
+
+	case OUTSIDE_FRAME:
+	case BEGIN_FRAME:
+	default:
+		/* We may receive multiple BOF at the start of frame */ 
+		break;
+	}
+
+	/* Now receiving frame */
+	rx_buff->state = BEGIN_FRAME;
+	rx_buff->in_frame = TRUE;
+
+	/* Time to initialize receive buffer */
+	rx_buff->data = rx_buff->head;
+	rx_buff->len = 0;
+	rx_buff->fcs = INIT_FCS;
+}
+
+/*
+ * Function async_unwrap_eof(dev, byte)
+ *
+ *    Handle End Of Frame character received within a frame
+ *
+ */
+static inline void
+async_unwrap_eof(struct net_device *dev,
+		 struct net_device_stats *stats,
+		 iobuff_t *rx_buff, __u8 byte)
+{
+#ifdef POSTPONE_RX_CRC
+	int	i;
+#endif
+
+	switch(rx_buff->state) {
+	case OUTSIDE_FRAME:
+		/* Probably missed the BOF */
+		stats->rx_errors++;
+		stats->rx_missed_errors++;
+		irda_device_set_media_busy(dev, TRUE);
+		break;
+
+	case BEGIN_FRAME:
+	case LINK_ESCAPE:
+	case INSIDE_FRAME:
+	default:
+		/* Note : in the case of BEGIN_FRAME and LINK_ESCAPE,
+		 * the fcs will most likely not match and generate an
+		 * error, as expected - Jean II */
+		rx_buff->state = OUTSIDE_FRAME;
+		rx_buff->in_frame = FALSE;
+
+#ifdef POSTPONE_RX_CRC
+		/* If we haven't done the CRC as we receive bytes, we
+		 * must do it now... Jean II */
+		for(i = 0; i < rx_buff->len; i++)
+			rx_buff->fcs = irda_fcs(rx_buff->fcs,
+						rx_buff->data[i]);
+#endif
+
+		/* Test FCS and signal success if the frame is good */
+		if (rx_buff->fcs == GOOD_FCS) {
+			/* Deliver frame */
+			async_bump(dev, stats, rx_buff);
+			break;
+		} else {
+			/* Wrong CRC, discard frame!  */
+			irda_device_set_media_busy(dev, TRUE);
+
+			IRDA_DEBUG(1, "%s(), crc error\n", __FUNCTION__);
+			stats->rx_errors++;
+			stats->rx_crc_errors++;
+		}
+		break;
+	}
+}
+
+/*
+ * Function async_unwrap_ce(dev, byte)
+ *
+ *    Handle Character Escape character received within a frame
+ *
+ */
+static inline void
+async_unwrap_ce(struct net_device *dev,
+		 struct net_device_stats *stats,
+		 iobuff_t *rx_buff, __u8 byte)
+{
+	switch(rx_buff->state) {
+	case OUTSIDE_FRAME:
+		/* Activate carrier sense */
+		irda_device_set_media_busy(dev, TRUE);
+		break;
+
+	case LINK_ESCAPE:
+		IRDA_WARNING("%s: state not defined\n", __FUNCTION__);
+		break;
+
+	case BEGIN_FRAME:
+	case INSIDE_FRAME:
+	default:
+		/* Stuffed byte coming */
+		rx_buff->state = LINK_ESCAPE;
+		break;
+	}
+}
+
+/*
+ * Function async_unwrap_other(dev, byte)
+ *
+ *    Handle other characters received within a frame
+ *
+ */
+static inline void
+async_unwrap_other(struct net_device *dev,
+		   struct net_device_stats *stats,
+		   iobuff_t *rx_buff, __u8 byte)
+{
+	switch(rx_buff->state) {
+		/* This is on the critical path, case are ordered by
+		 * probability (most frequent first) - Jean II */
+	case INSIDE_FRAME:
+		/* Must be the next byte of the frame */
+		if (rx_buff->len < rx_buff->truesize)  {
+			rx_buff->data[rx_buff->len++] = byte;
+#ifndef POSTPONE_RX_CRC
+			rx_buff->fcs = irda_fcs(rx_buff->fcs, byte);
+#endif
+		} else {
+			IRDA_DEBUG(1, "%s(), Rx buffer overflow, aborting\n",
+				   __FUNCTION__);
+			rx_buff->state = OUTSIDE_FRAME;
+		}
+		break;
+
+	case LINK_ESCAPE:
+		/*
+		 *  Stuffed char, complement bit 5 of byte
+		 *  following CE, IrLAP p.114
+		 */
+		byte ^= IRDA_TRANS;
+		if (rx_buff->len < rx_buff->truesize)  {
+			rx_buff->data[rx_buff->len++] = byte;
+#ifndef POSTPONE_RX_CRC
+			rx_buff->fcs = irda_fcs(rx_buff->fcs, byte);
+#endif
+			rx_buff->state = INSIDE_FRAME;
+		} else {
+			IRDA_DEBUG(1, "%s(), Rx buffer overflow, aborting\n",
+				   __FUNCTION__);
+			rx_buff->state = OUTSIDE_FRAME;
+		}
+		break;
+
+	case OUTSIDE_FRAME:
+		/* Activate carrier sense */
+		if(byte != XBOF)
+			irda_device_set_media_busy(dev, TRUE);
+		break;
+
+	case BEGIN_FRAME:
+	default:
+		rx_buff->data[rx_buff->len++] = byte;
+#ifndef POSTPONE_RX_CRC
+		rx_buff->fcs = irda_fcs(rx_buff->fcs, byte);
+#endif
+		rx_buff->state = INSIDE_FRAME;
+		break;
+	}
+}
+
+/*
+ * Function async_unwrap_char (dev, rx_buff, byte)
+ *
+ *    Parse and de-stuff frame received from the IrDA-port
+ *
+ * This is the main entry point for SIR drivers.
+ */
+void async_unwrap_char(struct net_device *dev,
+		       struct net_device_stats *stats,
+		       iobuff_t *rx_buff, __u8 byte)
+{
+	switch(byte) {
+	case CE:
+		async_unwrap_ce(dev, stats, rx_buff, byte);
+		break;
+	case BOF:
+		async_unwrap_bof(dev, stats, rx_buff, byte);
+		break;
+	case EOF:
+		async_unwrap_eof(dev, stats, rx_buff, byte);
+		break;
+	default:
+		async_unwrap_other(dev, stats, rx_buff, byte);
+		break;
+	}
+}
+EXPORT_SYMBOL(async_unwrap_char);
+
diff --git a/net/key/Makefile b/net/key/Makefile
new file mode 100644
index 00000000000..85760804247
--- /dev/null
+++ b/net/key/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the key AF.
+#
+
+obj-$(CONFIG_NET_KEY) += af_key.o
diff --git a/net/key/af_key.c b/net/key/af_key.c
new file mode 100644
index 00000000000..ce980aa94ed
--- /dev/null
+++ b/net/key/af_key.c
@@ -0,0 +1,2903 @@
+/*
+ * net/key/af_key.c	An implementation of PF_KEYv2 sockets.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Maxim Giryaev	<gem@asplinux.ru>
+ *		David S. Miller	<davem@redhat.com>
+ *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+ *		Kunihiro Ishiguro <kunihiro@ipinfusion.com>
+ *		Kazunori MIYAZAWA / USAGI Project <miyazawa@linux-ipv6.org>
+ *		Derek Atkins <derek@ihtfp.com>
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/socket.h>
+#include <linux/pfkeyv2.h>
+#include <linux/ipsec.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <net/xfrm.h>
+
+#include <net/sock.h>
+
+#define _X2KEY(x) ((x) == XFRM_INF ? 0 : (x))
+#define _KEY2X(x) ((x) == 0 ? XFRM_INF : (x))
+
+
+/* List of all pfkey sockets. */
+static HLIST_HEAD(pfkey_table);
+static DECLARE_WAIT_QUEUE_HEAD(pfkey_table_wait);
+static DEFINE_RWLOCK(pfkey_table_lock);
+static atomic_t pfkey_table_users = ATOMIC_INIT(0);
+
+static atomic_t pfkey_socks_nr = ATOMIC_INIT(0);
+
+struct pfkey_sock {
+	/* struct sock must be the first member of struct pfkey_sock */
+	struct sock	sk;
+	int		registered;
+	int		promisc;
+};
+
+static inline struct pfkey_sock *pfkey_sk(struct sock *sk)
+{
+	return (struct pfkey_sock *)sk;
+}
+
+static void pfkey_sock_destruct(struct sock *sk)
+{
+	skb_queue_purge(&sk->sk_receive_queue);
+
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		printk("Attempt to release alive pfkey socket: %p\n", sk);
+		return;
+	}
+
+	BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
+	BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
+
+	atomic_dec(&pfkey_socks_nr);
+}
+
+static void pfkey_table_grab(void)
+{
+	write_lock_bh(&pfkey_table_lock);
+
+	if (atomic_read(&pfkey_table_users)) {
+		DECLARE_WAITQUEUE(wait, current);
+
+		add_wait_queue_exclusive(&pfkey_table_wait, &wait);
+		for(;;) {
+			set_current_state(TASK_UNINTERRUPTIBLE);
+			if (atomic_read(&pfkey_table_users) == 0)
+				break;
+			write_unlock_bh(&pfkey_table_lock);
+			schedule();
+			write_lock_bh(&pfkey_table_lock);
+		}
+
+		__set_current_state(TASK_RUNNING);
+		remove_wait_queue(&pfkey_table_wait, &wait);
+	}
+}
+
+static __inline__ void pfkey_table_ungrab(void)
+{
+	write_unlock_bh(&pfkey_table_lock);
+	wake_up(&pfkey_table_wait);
+}
+
+static __inline__ void pfkey_lock_table(void)
+{
+	/* read_lock() synchronizes us to pfkey_table_grab */
+
+	read_lock(&pfkey_table_lock);
+	atomic_inc(&pfkey_table_users);
+	read_unlock(&pfkey_table_lock);
+}
+
+static __inline__ void pfkey_unlock_table(void)
+{
+	if (atomic_dec_and_test(&pfkey_table_users))
+		wake_up(&pfkey_table_wait);
+}
+
+
+static struct proto_ops pfkey_ops;
+
+static void pfkey_insert(struct sock *sk)
+{
+	pfkey_table_grab();
+	sk_add_node(sk, &pfkey_table);
+	pfkey_table_ungrab();
+}
+
+static void pfkey_remove(struct sock *sk)
+{
+	pfkey_table_grab();
+	sk_del_node_init(sk);
+	pfkey_table_ungrab();
+}
+
+static struct proto key_proto = {
+	.name	  = "KEY",
+	.owner	  = THIS_MODULE,
+	.obj_size = sizeof(struct pfkey_sock),
+};
+
+static int pfkey_create(struct socket *sock, int protocol)
+{
+	struct sock *sk;
+	int err;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+	if (sock->type != SOCK_RAW)
+		return -ESOCKTNOSUPPORT;
+	if (protocol != PF_KEY_V2)
+		return -EPROTONOSUPPORT;
+
+	err = -ENOMEM;
+	sk = sk_alloc(PF_KEY, GFP_KERNEL, &key_proto, 1);
+	if (sk == NULL)
+		goto out;
+	
+	sock->ops = &pfkey_ops;
+	sock_init_data(sock, sk);
+
+	sk->sk_family = PF_KEY;
+	sk->sk_destruct = pfkey_sock_destruct;
+
+	atomic_inc(&pfkey_socks_nr);
+
+	pfkey_insert(sk);
+
+	return 0;
+out:
+	return err;
+}
+
+static int pfkey_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+
+	if (!sk)
+		return 0;
+
+	pfkey_remove(sk);
+
+	sock_orphan(sk);
+	sock->sk = NULL;
+	skb_queue_purge(&sk->sk_write_queue);
+	sock_put(sk);
+
+	return 0;
+}
+
+static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2,
+			       int allocation, struct sock *sk)
+{
+	int err = -ENOBUFS;
+
+	sock_hold(sk);
+	if (*skb2 == NULL) {
+		if (atomic_read(&skb->users) != 1) {
+			*skb2 = skb_clone(skb, allocation);
+		} else {
+			*skb2 = skb;
+			atomic_inc(&skb->users);
+		}
+	}
+	if (*skb2 != NULL) {
+		if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) {
+			skb_orphan(*skb2);
+			skb_set_owner_r(*skb2, sk);
+			skb_queue_tail(&sk->sk_receive_queue, *skb2);
+			sk->sk_data_ready(sk, (*skb2)->len);
+			*skb2 = NULL;
+			err = 0;
+		}
+	}
+	sock_put(sk);
+	return err;
+}
+
+/* Send SKB to all pfkey sockets matching selected criteria.  */
+#define BROADCAST_ALL		0
+#define BROADCAST_ONE		1
+#define BROADCAST_REGISTERED	2
+#define BROADCAST_PROMISC_ONLY	4
+static int pfkey_broadcast(struct sk_buff *skb, int allocation,
+			   int broadcast_flags, struct sock *one_sk)
+{
+	struct sock *sk;
+	struct hlist_node *node;
+	struct sk_buff *skb2 = NULL;
+	int err = -ESRCH;
+
+	/* XXX Do we need something like netlink_overrun?  I think
+	 * XXX PF_KEY socket apps will not mind current behavior.
+	 */
+	if (!skb)
+		return -ENOMEM;
+
+	pfkey_lock_table();
+	sk_for_each(sk, node, &pfkey_table) {
+		struct pfkey_sock *pfk = pfkey_sk(sk);
+		int err2;
+
+		/* Yes, it means that if you are meant to receive this
+		 * pfkey message you receive it twice as promiscuous
+		 * socket.
+		 */
+		if (pfk->promisc)
+			pfkey_broadcast_one(skb, &skb2, allocation, sk);
+
+		/* the exact target will be processed later */
+		if (sk == one_sk)
+			continue;
+		if (broadcast_flags != BROADCAST_ALL) {
+			if (broadcast_flags & BROADCAST_PROMISC_ONLY)
+				continue;
+			if ((broadcast_flags & BROADCAST_REGISTERED) &&
+			    !pfk->registered)
+				continue;
+			if (broadcast_flags & BROADCAST_ONE)
+				continue;
+		}
+
+		err2 = pfkey_broadcast_one(skb, &skb2, allocation, sk);
+
+		/* Error is cleare after succecful sending to at least one
+		 * registered KM */
+		if ((broadcast_flags & BROADCAST_REGISTERED) && err)
+			err = err2;
+	}
+	pfkey_unlock_table();
+
+	if (one_sk != NULL)
+		err = pfkey_broadcast_one(skb, &skb2, allocation, one_sk);
+
+	if (skb2)
+		kfree_skb(skb2);
+	kfree_skb(skb);
+	return err;
+}
+
+static inline void pfkey_hdr_dup(struct sadb_msg *new, struct sadb_msg *orig)
+{
+	*new = *orig;
+}
+
+static int pfkey_error(struct sadb_msg *orig, int err, struct sock *sk)
+{
+	struct sk_buff *skb = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_KERNEL);
+	struct sadb_msg *hdr;
+
+	if (!skb)
+		return -ENOBUFS;
+
+	/* Woe be to the platform trying to support PFKEY yet
+	 * having normal errnos outside the 1-255 range, inclusive.
+	 */
+	err = -err;
+	if (err == ERESTARTSYS ||
+	    err == ERESTARTNOHAND ||
+	    err == ERESTARTNOINTR)
+		err = EINTR;
+	if (err >= 512)
+		err = EINVAL;
+	if (err <= 0 || err >= 256)
+		BUG();
+
+	hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg));
+	pfkey_hdr_dup(hdr, orig);
+	hdr->sadb_msg_errno = (uint8_t) err;
+	hdr->sadb_msg_len = (sizeof(struct sadb_msg) /
+			     sizeof(uint64_t));
+
+	pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ONE, sk);
+
+	return 0;
+}
+
+static u8 sadb_ext_min_len[] = {
+	[SADB_EXT_RESERVED]		= (u8) 0,
+	[SADB_EXT_SA]			= (u8) sizeof(struct sadb_sa),
+	[SADB_EXT_LIFETIME_CURRENT]	= (u8) sizeof(struct sadb_lifetime),
+	[SADB_EXT_LIFETIME_HARD]	= (u8) sizeof(struct sadb_lifetime),
+	[SADB_EXT_LIFETIME_SOFT]	= (u8) sizeof(struct sadb_lifetime),
+	[SADB_EXT_ADDRESS_SRC]		= (u8) sizeof(struct sadb_address),
+	[SADB_EXT_ADDRESS_DST]		= (u8) sizeof(struct sadb_address),
+	[SADB_EXT_ADDRESS_PROXY]	= (u8) sizeof(struct sadb_address),
+	[SADB_EXT_KEY_AUTH]		= (u8) sizeof(struct sadb_key),
+	[SADB_EXT_KEY_ENCRYPT]		= (u8) sizeof(struct sadb_key),
+	[SADB_EXT_IDENTITY_SRC]		= (u8) sizeof(struct sadb_ident),
+	[SADB_EXT_IDENTITY_DST]		= (u8) sizeof(struct sadb_ident),
+	[SADB_EXT_SENSITIVITY]		= (u8) sizeof(struct sadb_sens),
+	[SADB_EXT_PROPOSAL]		= (u8) sizeof(struct sadb_prop),
+	[SADB_EXT_SUPPORTED_AUTH]	= (u8) sizeof(struct sadb_supported),
+	[SADB_EXT_SUPPORTED_ENCRYPT]	= (u8) sizeof(struct sadb_supported),
+	[SADB_EXT_SPIRANGE]		= (u8) sizeof(struct sadb_spirange),
+	[SADB_X_EXT_KMPRIVATE]		= (u8) sizeof(struct sadb_x_kmprivate),
+	[SADB_X_EXT_POLICY]		= (u8) sizeof(struct sadb_x_policy),
+	[SADB_X_EXT_SA2]		= (u8) sizeof(struct sadb_x_sa2),
+	[SADB_X_EXT_NAT_T_TYPE]		= (u8) sizeof(struct sadb_x_nat_t_type),
+	[SADB_X_EXT_NAT_T_SPORT]	= (u8) sizeof(struct sadb_x_nat_t_port),
+	[SADB_X_EXT_NAT_T_DPORT]	= (u8) sizeof(struct sadb_x_nat_t_port),
+	[SADB_X_EXT_NAT_T_OA]		= (u8) sizeof(struct sadb_address),
+};
+
+/* Verify sadb_address_{len,prefixlen} against sa_family.  */
+static int verify_address_len(void *p)
+{
+	struct sadb_address *sp = p;
+	struct sockaddr *addr = (struct sockaddr *)(sp + 1);
+	struct sockaddr_in *sin;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct sockaddr_in6 *sin6;
+#endif
+	int len;
+
+	switch (addr->sa_family) {
+	case AF_INET:
+		len  = sizeof(*sp) + sizeof(*sin) + (sizeof(uint64_t) - 1);
+		len /= sizeof(uint64_t);
+		if (sp->sadb_address_len != len ||
+		    sp->sadb_address_prefixlen > 32)
+			return -EINVAL;
+		break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case AF_INET6:
+		len  = sizeof(*sp) + sizeof(*sin6) + (sizeof(uint64_t) - 1);
+		len /= sizeof(uint64_t);
+		if (sp->sadb_address_len != len ||
+		    sp->sadb_address_prefixlen > 128)
+			return -EINVAL;
+		break;
+#endif
+	default:
+		/* It is user using kernel to keep track of security
+		 * associations for another protocol, such as
+		 * OSPF/RSVP/RIPV2/MIP.  It is user's job to verify
+		 * lengths.
+		 *
+		 * XXX Actually, association/policy database is not yet
+		 * XXX able to cope with arbitrary sockaddr families.
+		 * XXX When it can, remove this -EINVAL.  -DaveM
+		 */
+		return -EINVAL;
+		break;
+	};
+
+	return 0;
+}
+
+static int present_and_same_family(struct sadb_address *src,
+				   struct sadb_address *dst)
+{
+	struct sockaddr *s_addr, *d_addr;
+
+	if (!src || !dst)
+		return 0;
+
+	s_addr = (struct sockaddr *)(src + 1);
+	d_addr = (struct sockaddr *)(dst + 1);
+	if (s_addr->sa_family != d_addr->sa_family)
+		return 0;
+	if (s_addr->sa_family != AF_INET
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	    && s_addr->sa_family != AF_INET6
+#endif
+		)
+		return 0;
+
+	return 1;
+}
+
+static int parse_exthdrs(struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+{
+	char *p = (char *) hdr;
+	int len = skb->len;
+
+	len -= sizeof(*hdr);
+	p += sizeof(*hdr);
+	while (len > 0) {
+		struct sadb_ext *ehdr = (struct sadb_ext *) p;
+		uint16_t ext_type;
+		int ext_len;
+
+		ext_len  = ehdr->sadb_ext_len;
+		ext_len *= sizeof(uint64_t);
+		ext_type = ehdr->sadb_ext_type;
+		if (ext_len < sizeof(uint64_t) ||
+		    ext_len > len ||
+		    ext_type == SADB_EXT_RESERVED)
+			return -EINVAL;
+
+		if (ext_type <= SADB_EXT_MAX) {
+			int min = (int) sadb_ext_min_len[ext_type];
+			if (ext_len < min)
+				return -EINVAL;
+			if (ext_hdrs[ext_type-1] != NULL)
+				return -EINVAL;
+			if (ext_type == SADB_EXT_ADDRESS_SRC ||
+			    ext_type == SADB_EXT_ADDRESS_DST ||
+			    ext_type == SADB_EXT_ADDRESS_PROXY ||
+			    ext_type == SADB_X_EXT_NAT_T_OA) {
+				if (verify_address_len(p))
+					return -EINVAL;
+			}				
+			ext_hdrs[ext_type-1] = p;
+		}
+		p   += ext_len;
+		len -= ext_len;
+	}
+
+	return 0;
+}
+
+static uint16_t
+pfkey_satype2proto(uint8_t satype)
+{
+	switch (satype) {
+	case SADB_SATYPE_UNSPEC:
+		return IPSEC_PROTO_ANY;
+	case SADB_SATYPE_AH:
+		return IPPROTO_AH;
+	case SADB_SATYPE_ESP:
+		return IPPROTO_ESP;
+	case SADB_X_SATYPE_IPCOMP:
+		return IPPROTO_COMP;
+		break;
+	default:
+		return 0;
+	}
+	/* NOTREACHED */
+}
+
+static uint8_t
+pfkey_proto2satype(uint16_t proto)
+{
+	switch (proto) {
+	case IPPROTO_AH:
+		return SADB_SATYPE_AH;
+	case IPPROTO_ESP:
+		return SADB_SATYPE_ESP;
+	case IPPROTO_COMP:
+		return SADB_X_SATYPE_IPCOMP;
+		break;
+	default:
+		return 0;
+	}
+	/* NOTREACHED */
+}
+
+/* BTW, this scheme means that there is no way with PFKEY2 sockets to
+ * say specifically 'just raw sockets' as we encode them as 255.
+ */
+
+static uint8_t pfkey_proto_to_xfrm(uint8_t proto)
+{
+	return (proto == IPSEC_PROTO_ANY ? 0 : proto);
+}
+
+static uint8_t pfkey_proto_from_xfrm(uint8_t proto)
+{
+	return (proto ? proto : IPSEC_PROTO_ANY);
+}
+
+static int pfkey_sadb_addr2xfrm_addr(struct sadb_address *addr,
+				     xfrm_address_t *xaddr)
+{
+	switch (((struct sockaddr*)(addr + 1))->sa_family) {
+	case AF_INET:
+		xaddr->a4 = 
+			((struct sockaddr_in *)(addr + 1))->sin_addr.s_addr;
+		return AF_INET;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case AF_INET6:
+		memcpy(xaddr->a6, 
+		       &((struct sockaddr_in6 *)(addr + 1))->sin6_addr,
+		       sizeof(struct in6_addr));
+		return AF_INET6;
+#endif
+	default:
+		return 0;
+	}
+	/* NOTREACHED */
+}
+
+static struct  xfrm_state *pfkey_xfrm_state_lookup(struct sadb_msg *hdr, void **ext_hdrs)
+{
+	struct sadb_sa *sa;
+	struct sadb_address *addr;
+	uint16_t proto;
+	unsigned short family;
+	xfrm_address_t *xaddr;
+
+	sa = (struct sadb_sa *) ext_hdrs[SADB_EXT_SA-1];
+	if (sa == NULL)
+		return NULL;
+
+	proto = pfkey_satype2proto(hdr->sadb_msg_satype);
+	if (proto == 0)
+		return NULL;
+
+	/* sadb_address_len should be checked by caller */
+	addr = (struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_DST-1];
+	if (addr == NULL)
+		return NULL;
+
+	family = ((struct sockaddr *)(addr + 1))->sa_family;
+	switch (family) {
+	case AF_INET:
+		xaddr = (xfrm_address_t *)&((struct sockaddr_in *)(addr + 1))->sin_addr;
+		break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case AF_INET6:
+		xaddr = (xfrm_address_t *)&((struct sockaddr_in6 *)(addr + 1))->sin6_addr;
+		break;
+#endif
+	default:
+		xaddr = NULL;
+	}
+
+	if (!xaddr)
+		return NULL;
+
+	return xfrm_state_lookup(xaddr, sa->sadb_sa_spi, proto, family);
+}
+
+#define PFKEY_ALIGN8(a) (1 + (((a) - 1) | (8 - 1)))
+static int
+pfkey_sockaddr_size(sa_family_t family)
+{
+	switch (family) {
+	case AF_INET:
+		return PFKEY_ALIGN8(sizeof(struct sockaddr_in));
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case AF_INET6:
+		return PFKEY_ALIGN8(sizeof(struct sockaddr_in6));
+#endif
+	default:
+		return 0;
+	}
+	/* NOTREACHED */
+}
+
+static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys, int hsc)
+{
+	struct sk_buff *skb;
+	struct sadb_msg *hdr;
+	struct sadb_sa *sa;
+	struct sadb_lifetime *lifetime;
+	struct sadb_address *addr;
+	struct sadb_key *key;
+	struct sadb_x_sa2 *sa2;
+	struct sockaddr_in *sin;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct sockaddr_in6 *sin6;
+#endif
+	int size;
+	int auth_key_size = 0;
+	int encrypt_key_size = 0;
+	int sockaddr_size;
+	struct xfrm_encap_tmpl *natt = NULL;
+
+	/* address family check */
+	sockaddr_size = pfkey_sockaddr_size(x->props.family);
+	if (!sockaddr_size)
+		return ERR_PTR(-EINVAL);
+
+	/* base, SA, (lifetime (HSC),) address(SD), (address(P),)
+	   key(AE), (identity(SD),) (sensitivity)> */
+	size = sizeof(struct sadb_msg) +sizeof(struct sadb_sa) + 
+		sizeof(struct sadb_lifetime) +
+		((hsc & 1) ? sizeof(struct sadb_lifetime) : 0) +
+		((hsc & 2) ? sizeof(struct sadb_lifetime) : 0) +
+			sizeof(struct sadb_address)*2 + 
+				sockaddr_size*2 +
+					sizeof(struct sadb_x_sa2);
+	/* identity & sensitivity */
+
+	if ((x->props.family == AF_INET &&
+	     x->sel.saddr.a4 != x->props.saddr.a4)
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	    || (x->props.family == AF_INET6 &&
+		memcmp (x->sel.saddr.a6, x->props.saddr.a6, sizeof (struct in6_addr)))
+#endif
+		)
+		size += sizeof(struct sadb_address) + sockaddr_size;
+
+	if (add_keys) {
+		if (x->aalg && x->aalg->alg_key_len) {
+			auth_key_size = 
+				PFKEY_ALIGN8((x->aalg->alg_key_len + 7) / 8); 
+			size += sizeof(struct sadb_key) + auth_key_size;
+		}
+		if (x->ealg && x->ealg->alg_key_len) {
+			encrypt_key_size = 
+				PFKEY_ALIGN8((x->ealg->alg_key_len+7) / 8); 
+			size += sizeof(struct sadb_key) + encrypt_key_size;
+		}
+	}
+	if (x->encap)
+		natt = x->encap;
+
+	if (natt && natt->encap_type) {
+		size += sizeof(struct sadb_x_nat_t_type);
+		size += sizeof(struct sadb_x_nat_t_port);
+		size += sizeof(struct sadb_x_nat_t_port);
+	}
+
+	skb =  alloc_skb(size + 16, GFP_ATOMIC);
+	if (skb == NULL)
+		return ERR_PTR(-ENOBUFS);
+
+	/* call should fill header later */
+	hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg));
+	memset(hdr, 0, size);	/* XXX do we need this ? */
+	hdr->sadb_msg_len = size / sizeof(uint64_t);
+
+	/* sa */
+	sa = (struct sadb_sa *)  skb_put(skb, sizeof(struct sadb_sa));
+	sa->sadb_sa_len = sizeof(struct sadb_sa)/sizeof(uint64_t);
+	sa->sadb_sa_exttype = SADB_EXT_SA;
+	sa->sadb_sa_spi = x->id.spi;
+	sa->sadb_sa_replay = x->props.replay_window;
+	sa->sadb_sa_state = SADB_SASTATE_DYING;
+	if (x->km.state == XFRM_STATE_VALID && !x->km.dying)
+		sa->sadb_sa_state = SADB_SASTATE_MATURE;
+	else if (x->km.state == XFRM_STATE_ACQ)
+		sa->sadb_sa_state = SADB_SASTATE_LARVAL;
+	else if (x->km.state == XFRM_STATE_EXPIRED)
+		sa->sadb_sa_state = SADB_SASTATE_DEAD;
+	sa->sadb_sa_auth = 0;
+	if (x->aalg) {
+		struct xfrm_algo_desc *a = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
+		sa->sadb_sa_auth = a ? a->desc.sadb_alg_id : 0;
+	}
+	sa->sadb_sa_encrypt = 0;
+	BUG_ON(x->ealg && x->calg);
+	if (x->ealg) {
+		struct xfrm_algo_desc *a = xfrm_ealg_get_byname(x->ealg->alg_name, 0);
+		sa->sadb_sa_encrypt = a ? a->desc.sadb_alg_id : 0;
+	}
+	/* KAME compatible: sadb_sa_encrypt is overloaded with calg id */
+	if (x->calg) {
+		struct xfrm_algo_desc *a = xfrm_calg_get_byname(x->calg->alg_name, 0);
+		sa->sadb_sa_encrypt = a ? a->desc.sadb_alg_id : 0;
+	}
+
+	sa->sadb_sa_flags = 0;
+	if (x->props.flags & XFRM_STATE_NOECN)
+		sa->sadb_sa_flags |= SADB_SAFLAGS_NOECN;
+	if (x->props.flags & XFRM_STATE_DECAP_DSCP)
+		sa->sadb_sa_flags |= SADB_SAFLAGS_DECAP_DSCP;
+
+	/* hard time */
+	if (hsc & 2) {
+		lifetime = (struct sadb_lifetime *)  skb_put(skb, 
+							     sizeof(struct sadb_lifetime));
+		lifetime->sadb_lifetime_len =
+			sizeof(struct sadb_lifetime)/sizeof(uint64_t);
+		lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
+		lifetime->sadb_lifetime_allocations =  _X2KEY(x->lft.hard_packet_limit);
+		lifetime->sadb_lifetime_bytes = _X2KEY(x->lft.hard_byte_limit);
+		lifetime->sadb_lifetime_addtime = x->lft.hard_add_expires_seconds;
+		lifetime->sadb_lifetime_usetime = x->lft.hard_use_expires_seconds;
+	}
+	/* soft time */
+	if (hsc & 1) {
+		lifetime = (struct sadb_lifetime *)  skb_put(skb, 
+							     sizeof(struct sadb_lifetime));
+		lifetime->sadb_lifetime_len =
+			sizeof(struct sadb_lifetime)/sizeof(uint64_t);
+		lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
+		lifetime->sadb_lifetime_allocations =  _X2KEY(x->lft.soft_packet_limit);
+		lifetime->sadb_lifetime_bytes = _X2KEY(x->lft.soft_byte_limit);
+		lifetime->sadb_lifetime_addtime = x->lft.soft_add_expires_seconds;
+		lifetime->sadb_lifetime_usetime = x->lft.soft_use_expires_seconds;
+	}
+	/* current time */
+	lifetime = (struct sadb_lifetime *)  skb_put(skb,
+						     sizeof(struct sadb_lifetime));
+	lifetime->sadb_lifetime_len =
+		sizeof(struct sadb_lifetime)/sizeof(uint64_t);
+	lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
+	lifetime->sadb_lifetime_allocations = x->curlft.packets;
+	lifetime->sadb_lifetime_bytes = x->curlft.bytes;
+	lifetime->sadb_lifetime_addtime = x->curlft.add_time;
+	lifetime->sadb_lifetime_usetime = x->curlft.use_time;
+	/* src address */
+	addr = (struct sadb_address*) skb_put(skb, 
+					      sizeof(struct sadb_address)+sockaddr_size);
+	addr->sadb_address_len = 
+		(sizeof(struct sadb_address)+sockaddr_size)/
+			sizeof(uint64_t);
+	addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC;
+	/* "if the ports are non-zero, then the sadb_address_proto field, 
+	   normally zero, MUST be filled in with the transport 
+	   protocol's number." - RFC2367 */
+	addr->sadb_address_proto = 0; 
+	addr->sadb_address_reserved = 0;
+	if (x->props.family == AF_INET) {
+		addr->sadb_address_prefixlen = 32;
+
+		sin = (struct sockaddr_in *) (addr + 1);
+		sin->sin_family = AF_INET;
+		sin->sin_addr.s_addr = x->props.saddr.a4;
+		sin->sin_port = 0;
+		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+	}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	else if (x->props.family == AF_INET6) {
+ 		addr->sadb_address_prefixlen = 128;
+
+		sin6 = (struct sockaddr_in6 *) (addr + 1);
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_port = 0;
+		sin6->sin6_flowinfo = 0;
+ 		memcpy(&sin6->sin6_addr, x->props.saddr.a6,
+		       sizeof(struct in6_addr));
+		sin6->sin6_scope_id = 0;
+ 	}
+#endif
+	else
+		BUG();
+
+	/* dst address */
+	addr = (struct sadb_address*) skb_put(skb, 
+					      sizeof(struct sadb_address)+sockaddr_size);
+	addr->sadb_address_len = 
+		(sizeof(struct sadb_address)+sockaddr_size)/
+			sizeof(uint64_t);
+	addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST;
+	addr->sadb_address_proto = 0; 
+	addr->sadb_address_prefixlen = 32; /* XXX */ 
+	addr->sadb_address_reserved = 0;
+	if (x->props.family == AF_INET) {
+		sin = (struct sockaddr_in *) (addr + 1);
+		sin->sin_family = AF_INET;
+		sin->sin_addr.s_addr = x->id.daddr.a4;
+		sin->sin_port = 0;
+		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+
+		if (x->sel.saddr.a4 != x->props.saddr.a4) {
+			addr = (struct sadb_address*) skb_put(skb, 
+				sizeof(struct sadb_address)+sockaddr_size);
+			addr->sadb_address_len = 
+				(sizeof(struct sadb_address)+sockaddr_size)/
+				sizeof(uint64_t);
+			addr->sadb_address_exttype = SADB_EXT_ADDRESS_PROXY;
+			addr->sadb_address_proto =
+				pfkey_proto_from_xfrm(x->sel.proto);
+			addr->sadb_address_prefixlen = x->sel.prefixlen_s;
+			addr->sadb_address_reserved = 0;
+
+			sin = (struct sockaddr_in *) (addr + 1);
+			sin->sin_family = AF_INET;
+			sin->sin_addr.s_addr = x->sel.saddr.a4;
+			sin->sin_port = x->sel.sport;
+			memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+		}
+	}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	else if (x->props.family == AF_INET6) {
+		addr->sadb_address_prefixlen = 128;
+
+		sin6 = (struct sockaddr_in6 *) (addr + 1);
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_port = 0;
+		sin6->sin6_flowinfo = 0;
+		memcpy(&sin6->sin6_addr, x->id.daddr.a6, sizeof(struct in6_addr));
+		sin6->sin6_scope_id = 0;
+
+		if (memcmp (x->sel.saddr.a6, x->props.saddr.a6,
+			    sizeof(struct in6_addr))) {
+			addr = (struct sadb_address *) skb_put(skb, 
+				sizeof(struct sadb_address)+sockaddr_size);
+			addr->sadb_address_len = 
+				(sizeof(struct sadb_address)+sockaddr_size)/
+				sizeof(uint64_t);
+			addr->sadb_address_exttype = SADB_EXT_ADDRESS_PROXY;
+			addr->sadb_address_proto =
+				pfkey_proto_from_xfrm(x->sel.proto);
+			addr->sadb_address_prefixlen = x->sel.prefixlen_s;
+			addr->sadb_address_reserved = 0;
+
+			sin6 = (struct sockaddr_in6 *) (addr + 1);
+			sin6->sin6_family = AF_INET6;
+			sin6->sin6_port = x->sel.sport;
+			sin6->sin6_flowinfo = 0;
+			memcpy(&sin6->sin6_addr, x->sel.saddr.a6,
+			       sizeof(struct in6_addr));
+			sin6->sin6_scope_id = 0;
+		}
+	}
+#endif
+	else
+		BUG();
+
+	/* auth key */
+	if (add_keys && auth_key_size) {
+		key = (struct sadb_key *) skb_put(skb, 
+						  sizeof(struct sadb_key)+auth_key_size);
+		key->sadb_key_len = (sizeof(struct sadb_key) + auth_key_size) /
+			sizeof(uint64_t);
+		key->sadb_key_exttype = SADB_EXT_KEY_AUTH;
+		key->sadb_key_bits = x->aalg->alg_key_len;
+		key->sadb_key_reserved = 0;
+		memcpy(key + 1, x->aalg->alg_key, (x->aalg->alg_key_len+7)/8);
+	}
+	/* encrypt key */
+	if (add_keys && encrypt_key_size) {
+		key = (struct sadb_key *) skb_put(skb, 
+						  sizeof(struct sadb_key)+encrypt_key_size);
+		key->sadb_key_len = (sizeof(struct sadb_key) + 
+				     encrypt_key_size) / sizeof(uint64_t);
+		key->sadb_key_exttype = SADB_EXT_KEY_ENCRYPT;
+		key->sadb_key_bits = x->ealg->alg_key_len;
+		key->sadb_key_reserved = 0;
+		memcpy(key + 1, x->ealg->alg_key, 
+		       (x->ealg->alg_key_len+7)/8);
+	}
+
+	/* sa */
+	sa2 = (struct sadb_x_sa2 *)  skb_put(skb, sizeof(struct sadb_x_sa2));
+	sa2->sadb_x_sa2_len = sizeof(struct sadb_x_sa2)/sizeof(uint64_t);
+	sa2->sadb_x_sa2_exttype = SADB_X_EXT_SA2;
+	sa2->sadb_x_sa2_mode = x->props.mode + 1;
+	sa2->sadb_x_sa2_reserved1 = 0;
+	sa2->sadb_x_sa2_reserved2 = 0;
+	sa2->sadb_x_sa2_sequence = 0;
+	sa2->sadb_x_sa2_reqid = x->props.reqid;
+
+	if (natt && natt->encap_type) {
+		struct sadb_x_nat_t_type *n_type;
+		struct sadb_x_nat_t_port *n_port;
+
+		/* type */
+		n_type = (struct sadb_x_nat_t_type*) skb_put(skb, sizeof(*n_type));
+		n_type->sadb_x_nat_t_type_len = sizeof(*n_type)/sizeof(uint64_t);
+		n_type->sadb_x_nat_t_type_exttype = SADB_X_EXT_NAT_T_TYPE;
+		n_type->sadb_x_nat_t_type_type = natt->encap_type;
+		n_type->sadb_x_nat_t_type_reserved[0] = 0;
+		n_type->sadb_x_nat_t_type_reserved[1] = 0;
+		n_type->sadb_x_nat_t_type_reserved[2] = 0;
+
+		/* source port */
+		n_port = (struct sadb_x_nat_t_port*) skb_put(skb, sizeof (*n_port));
+		n_port->sadb_x_nat_t_port_len = sizeof(*n_port)/sizeof(uint64_t);
+		n_port->sadb_x_nat_t_port_exttype = SADB_X_EXT_NAT_T_SPORT;
+		n_port->sadb_x_nat_t_port_port = natt->encap_sport;
+		n_port->sadb_x_nat_t_port_reserved = 0;
+
+		/* dest port */
+		n_port = (struct sadb_x_nat_t_port*) skb_put(skb, sizeof (*n_port));
+		n_port->sadb_x_nat_t_port_len = sizeof(*n_port)/sizeof(uint64_t);
+		n_port->sadb_x_nat_t_port_exttype = SADB_X_EXT_NAT_T_DPORT;
+		n_port->sadb_x_nat_t_port_port = natt->encap_dport;
+		n_port->sadb_x_nat_t_port_reserved = 0;
+	}
+
+	return skb;
+}
+
+static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr, 
+						void **ext_hdrs)
+{
+	struct xfrm_state *x; 
+	struct sadb_lifetime *lifetime;
+	struct sadb_sa *sa;
+	struct sadb_key *key;
+	uint16_t proto;
+	int err;
+	
+
+	sa = (struct sadb_sa *) ext_hdrs[SADB_EXT_SA-1];
+	if (!sa ||
+	    !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
+				     ext_hdrs[SADB_EXT_ADDRESS_DST-1]))
+		return ERR_PTR(-EINVAL);
+	if (hdr->sadb_msg_satype == SADB_SATYPE_ESP &&
+	    !ext_hdrs[SADB_EXT_KEY_ENCRYPT-1])
+		return ERR_PTR(-EINVAL);
+	if (hdr->sadb_msg_satype == SADB_SATYPE_AH &&
+	    !ext_hdrs[SADB_EXT_KEY_AUTH-1])
+		return ERR_PTR(-EINVAL);
+	if (!!ext_hdrs[SADB_EXT_LIFETIME_HARD-1] !=
+	    !!ext_hdrs[SADB_EXT_LIFETIME_SOFT-1])
+		return ERR_PTR(-EINVAL);
+
+	proto = pfkey_satype2proto(hdr->sadb_msg_satype);
+	if (proto == 0)
+		return ERR_PTR(-EINVAL);
+
+	/* default error is no buffer space */
+	err = -ENOBUFS;
+
+	/* RFC2367:
+
+   Only SADB_SASTATE_MATURE SAs may be submitted in an SADB_ADD message.
+   SADB_SASTATE_LARVAL SAs are created by SADB_GETSPI and it is not
+   sensible to add a new SA in the DYING or SADB_SASTATE_DEAD state.
+   Therefore, the sadb_sa_state field of all submitted SAs MUST be
+   SADB_SASTATE_MATURE and the kernel MUST return an error if this is
+   not true.
+
+           However, KAME setkey always uses SADB_SASTATE_LARVAL.
+	   Hence, we have to _ignore_ sadb_sa_state, which is also reasonable.
+	 */
+	if (sa->sadb_sa_auth > SADB_AALG_MAX ||
+	    (hdr->sadb_msg_satype == SADB_X_SATYPE_IPCOMP &&
+	     sa->sadb_sa_encrypt > SADB_X_CALG_MAX) ||
+	    sa->sadb_sa_encrypt > SADB_EALG_MAX)
+		return ERR_PTR(-EINVAL);
+	key = (struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1];
+	if (key != NULL &&
+	    sa->sadb_sa_auth != SADB_X_AALG_NULL &&
+	    ((key->sadb_key_bits+7) / 8 == 0 ||
+	     (key->sadb_key_bits+7) / 8 > key->sadb_key_len * sizeof(uint64_t)))
+		return ERR_PTR(-EINVAL);
+	key = ext_hdrs[SADB_EXT_KEY_ENCRYPT-1];
+	if (key != NULL &&
+	    sa->sadb_sa_encrypt != SADB_EALG_NULL &&
+	    ((key->sadb_key_bits+7) / 8 == 0 ||
+	     (key->sadb_key_bits+7) / 8 > key->sadb_key_len * sizeof(uint64_t)))
+		return ERR_PTR(-EINVAL);
+
+	x = xfrm_state_alloc();
+	if (x == NULL)
+		return ERR_PTR(-ENOBUFS);
+
+	x->id.proto = proto;
+	x->id.spi = sa->sadb_sa_spi;
+	x->props.replay_window = sa->sadb_sa_replay;
+	if (sa->sadb_sa_flags & SADB_SAFLAGS_NOECN)
+		x->props.flags |= XFRM_STATE_NOECN;
+	if (sa->sadb_sa_flags & SADB_SAFLAGS_DECAP_DSCP)
+		x->props.flags |= XFRM_STATE_DECAP_DSCP;
+
+	lifetime = (struct sadb_lifetime*) ext_hdrs[SADB_EXT_LIFETIME_HARD-1];
+	if (lifetime != NULL) {
+		x->lft.hard_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations);
+		x->lft.hard_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes);
+		x->lft.hard_add_expires_seconds = lifetime->sadb_lifetime_addtime;
+		x->lft.hard_use_expires_seconds = lifetime->sadb_lifetime_usetime;
+	}
+	lifetime = (struct sadb_lifetime*) ext_hdrs[SADB_EXT_LIFETIME_SOFT-1];
+	if (lifetime != NULL) {
+		x->lft.soft_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations);
+		x->lft.soft_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes);
+		x->lft.soft_add_expires_seconds = lifetime->sadb_lifetime_addtime;
+		x->lft.soft_use_expires_seconds = lifetime->sadb_lifetime_usetime;
+	}
+	key = (struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1];
+	if (sa->sadb_sa_auth) {
+		int keysize = 0;
+		struct xfrm_algo_desc *a = xfrm_aalg_get_byid(sa->sadb_sa_auth);
+		if (!a) {
+			err = -ENOSYS;
+			goto out;
+		}
+		if (key)
+			keysize = (key->sadb_key_bits + 7) / 8;
+		x->aalg = kmalloc(sizeof(*x->aalg) + keysize, GFP_KERNEL);
+		if (!x->aalg)
+			goto out;
+		strcpy(x->aalg->alg_name, a->name);
+		x->aalg->alg_key_len = 0;
+		if (key) {
+			x->aalg->alg_key_len = key->sadb_key_bits;
+			memcpy(x->aalg->alg_key, key+1, keysize);
+		}
+		x->props.aalgo = sa->sadb_sa_auth;
+		/* x->algo.flags = sa->sadb_sa_flags; */
+	}
+	if (sa->sadb_sa_encrypt) {
+		if (hdr->sadb_msg_satype == SADB_X_SATYPE_IPCOMP) {
+			struct xfrm_algo_desc *a = xfrm_calg_get_byid(sa->sadb_sa_encrypt);
+			if (!a) {
+				err = -ENOSYS;
+				goto out;
+			}
+			x->calg = kmalloc(sizeof(*x->calg), GFP_KERNEL);
+			if (!x->calg)
+				goto out;
+			strcpy(x->calg->alg_name, a->name);
+			x->props.calgo = sa->sadb_sa_encrypt;
+		} else {
+			int keysize = 0;
+			struct xfrm_algo_desc *a = xfrm_ealg_get_byid(sa->sadb_sa_encrypt);
+			if (!a) {
+				err = -ENOSYS;
+				goto out;
+			}
+			key = (struct sadb_key*) ext_hdrs[SADB_EXT_KEY_ENCRYPT-1];
+			if (key)
+				keysize = (key->sadb_key_bits + 7) / 8;
+			x->ealg = kmalloc(sizeof(*x->ealg) + keysize, GFP_KERNEL);
+			if (!x->ealg)
+				goto out;
+			strcpy(x->ealg->alg_name, a->name);
+			x->ealg->alg_key_len = 0;
+			if (key) {
+				x->ealg->alg_key_len = key->sadb_key_bits;
+				memcpy(x->ealg->alg_key, key+1, keysize);
+			}
+			x->props.ealgo = sa->sadb_sa_encrypt;
+		}
+	}
+	/* x->algo.flags = sa->sadb_sa_flags; */
+
+	x->props.family = pfkey_sadb_addr2xfrm_addr((struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_SRC-1], 
+						    &x->props.saddr);
+	if (!x->props.family) {
+		err = -EAFNOSUPPORT;
+		goto out;
+	}
+	pfkey_sadb_addr2xfrm_addr((struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_DST-1], 
+				  &x->id.daddr);
+
+	if (ext_hdrs[SADB_X_EXT_SA2-1]) {
+		struct sadb_x_sa2 *sa2 = (void*)ext_hdrs[SADB_X_EXT_SA2-1];
+		x->props.mode = sa2->sadb_x_sa2_mode;
+		if (x->props.mode)
+			x->props.mode--;
+		x->props.reqid = sa2->sadb_x_sa2_reqid;
+	}
+
+	if (ext_hdrs[SADB_EXT_ADDRESS_PROXY-1]) {
+		struct sadb_address *addr = ext_hdrs[SADB_EXT_ADDRESS_PROXY-1];
+
+		/* Nobody uses this, but we try. */
+		x->sel.family = pfkey_sadb_addr2xfrm_addr(addr, &x->sel.saddr);
+		x->sel.prefixlen_s = addr->sadb_address_prefixlen;
+	}
+
+	if (ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1]) {
+		struct sadb_x_nat_t_type* n_type;
+		struct xfrm_encap_tmpl *natt;
+
+		x->encap = kmalloc(sizeof(*x->encap), GFP_KERNEL);
+		if (!x->encap)
+			goto out;
+
+		natt = x->encap;
+		n_type = ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1];
+		natt->encap_type = n_type->sadb_x_nat_t_type_type;
+
+		if (ext_hdrs[SADB_X_EXT_NAT_T_SPORT-1]) {
+			struct sadb_x_nat_t_port* n_port =
+				ext_hdrs[SADB_X_EXT_NAT_T_SPORT-1];
+			natt->encap_sport = n_port->sadb_x_nat_t_port_port;
+		}
+		if (ext_hdrs[SADB_X_EXT_NAT_T_DPORT-1]) {
+			struct sadb_x_nat_t_port* n_port =
+				ext_hdrs[SADB_X_EXT_NAT_T_DPORT-1];
+			natt->encap_dport = n_port->sadb_x_nat_t_port_port;
+		}
+	}
+
+	x->type = xfrm_get_type(proto, x->props.family);
+	if (x->type == NULL) {
+		err = -ENOPROTOOPT;
+		goto out;
+	}
+	if (x->type->init_state(x, NULL)) {
+		err = -EINVAL;
+		goto out;
+	}
+	x->km.seq = hdr->sadb_msg_seq;
+	x->km.state = XFRM_STATE_VALID;
+	return x;
+
+out:
+	x->km.state = XFRM_STATE_DEAD;
+	xfrm_state_put(x);
+	return ERR_PTR(err);
+}
+
+static int pfkey_reserved(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+{
+	return -EOPNOTSUPP;
+}
+
+static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+{
+	struct sk_buff *resp_skb;
+	struct sadb_x_sa2 *sa2;
+	struct sadb_address *saddr, *daddr;
+	struct sadb_msg *out_hdr;
+	struct xfrm_state *x = NULL;
+	u8 mode;
+	u32 reqid;
+	u8 proto;
+	unsigned short family;
+	xfrm_address_t *xsaddr = NULL, *xdaddr = NULL;
+
+	if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
+				     ext_hdrs[SADB_EXT_ADDRESS_DST-1]))
+		return -EINVAL;
+
+	proto = pfkey_satype2proto(hdr->sadb_msg_satype);
+	if (proto == 0)
+		return -EINVAL;
+
+	if ((sa2 = ext_hdrs[SADB_X_EXT_SA2-1]) != NULL) {
+		mode = sa2->sadb_x_sa2_mode - 1;
+		reqid = sa2->sadb_x_sa2_reqid;
+	} else {
+		mode = 0;
+		reqid = 0;
+	}
+
+	saddr = ext_hdrs[SADB_EXT_ADDRESS_SRC-1];
+	daddr = ext_hdrs[SADB_EXT_ADDRESS_DST-1];
+
+	family = ((struct sockaddr *)(saddr + 1))->sa_family;
+	switch (family) {
+	case AF_INET:
+		xdaddr = (xfrm_address_t *)&((struct sockaddr_in *)(daddr + 1))->sin_addr.s_addr;
+		xsaddr = (xfrm_address_t *)&((struct sockaddr_in *)(saddr + 1))->sin_addr.s_addr;
+		break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case AF_INET6:
+		xdaddr = (xfrm_address_t *)&((struct sockaddr_in6 *)(daddr + 1))->sin6_addr;
+		xsaddr = (xfrm_address_t *)&((struct sockaddr_in6 *)(saddr + 1))->sin6_addr;
+		break;
+#endif
+	}
+
+	if (hdr->sadb_msg_seq) {
+		x = xfrm_find_acq_byseq(hdr->sadb_msg_seq);
+		if (x && xfrm_addr_cmp(&x->id.daddr, xdaddr, family)) {
+			xfrm_state_put(x);
+			x = NULL;
+		}
+	}
+
+	if (!x)
+		x = xfrm_find_acq(mode, reqid, proto, xdaddr, xsaddr, 1, family);
+
+	if (x == NULL)
+		return -ENOENT;
+
+	resp_skb = ERR_PTR(-ENOENT);
+
+	spin_lock_bh(&x->lock);
+	if (x->km.state != XFRM_STATE_DEAD) {
+		struct sadb_spirange *range = ext_hdrs[SADB_EXT_SPIRANGE-1];
+		u32 min_spi, max_spi;
+
+		if (range != NULL) {
+			min_spi = range->sadb_spirange_min;
+			max_spi = range->sadb_spirange_max;
+		} else {
+			min_spi = 0x100;
+			max_spi = 0x0fffffff;
+		}
+		xfrm_alloc_spi(x, htonl(min_spi), htonl(max_spi));
+		if (x->id.spi)
+			resp_skb = pfkey_xfrm_state2msg(x, 0, 3);
+	}
+	spin_unlock_bh(&x->lock);
+
+	if (IS_ERR(resp_skb)) {
+		xfrm_state_put(x);
+		return  PTR_ERR(resp_skb);
+	}
+
+	out_hdr = (struct sadb_msg *) resp_skb->data;
+	out_hdr->sadb_msg_version = hdr->sadb_msg_version;
+	out_hdr->sadb_msg_type = SADB_GETSPI;
+	out_hdr->sadb_msg_satype = pfkey_proto2satype(proto);
+	out_hdr->sadb_msg_errno = 0;
+	out_hdr->sadb_msg_reserved = 0;
+	out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
+	out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
+
+	xfrm_state_put(x);
+
+	pfkey_broadcast(resp_skb, GFP_KERNEL, BROADCAST_ONE, sk);
+
+	return 0;
+}
+
+static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+{
+	struct xfrm_state *x;
+
+	if (hdr->sadb_msg_len != sizeof(struct sadb_msg)/8)
+		return -EOPNOTSUPP;
+
+	if (hdr->sadb_msg_seq == 0 || hdr->sadb_msg_errno == 0)
+		return 0;
+
+	x = xfrm_find_acq_byseq(hdr->sadb_msg_seq);
+	if (x == NULL)
+		return 0;
+
+	spin_lock_bh(&x->lock);
+	if (x->km.state == XFRM_STATE_ACQ) {
+		x->km.state = XFRM_STATE_ERROR;
+		wake_up(&km_waitq);
+	}
+	spin_unlock_bh(&x->lock);
+	xfrm_state_put(x);
+	return 0;
+}
+
+
+static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+{
+	struct sk_buff *out_skb;
+	struct sadb_msg *out_hdr;
+	struct xfrm_state *x;
+	int err;
+
+	xfrm_probe_algs();
+	
+	x = pfkey_msg2xfrm_state(hdr, ext_hdrs);
+	if (IS_ERR(x))
+		return PTR_ERR(x);
+
+	if (hdr->sadb_msg_type == SADB_ADD)
+		err = xfrm_state_add(x);
+	else
+		err = xfrm_state_update(x);
+
+	if (err < 0) {
+		x->km.state = XFRM_STATE_DEAD;
+		xfrm_state_put(x);
+		return err;
+	}
+
+	out_skb = pfkey_xfrm_state2msg(x, 0, 3);
+	if (IS_ERR(out_skb))
+		return  PTR_ERR(out_skb); /* XXX Should we return 0 here ? */
+
+	out_hdr = (struct sadb_msg *) out_skb->data;
+	out_hdr->sadb_msg_version = hdr->sadb_msg_version;
+	out_hdr->sadb_msg_type = hdr->sadb_msg_type;
+	out_hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto);
+	out_hdr->sadb_msg_errno = 0;
+	out_hdr->sadb_msg_reserved = 0;
+	out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
+	out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
+
+	pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, sk);
+
+	return 0;
+}
+
+static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+{
+	struct xfrm_state *x;
+
+	if (!ext_hdrs[SADB_EXT_SA-1] ||
+	    !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
+				     ext_hdrs[SADB_EXT_ADDRESS_DST-1]))
+		return -EINVAL;
+
+	x = pfkey_xfrm_state_lookup(hdr, ext_hdrs);
+	if (x == NULL)
+		return -ESRCH;
+
+	if (xfrm_state_kern(x)) {
+		xfrm_state_put(x);
+		return -EPERM;
+	}
+	
+	xfrm_state_delete(x);
+	xfrm_state_put(x);
+
+	pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, 
+			BROADCAST_ALL, sk);
+
+	return 0;
+}
+
+static int pfkey_get(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+{
+	__u8 proto;
+	struct sk_buff *out_skb;
+	struct sadb_msg *out_hdr;
+	struct xfrm_state *x;
+
+	if (!ext_hdrs[SADB_EXT_SA-1] ||
+	    !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
+				     ext_hdrs[SADB_EXT_ADDRESS_DST-1]))
+		return -EINVAL;
+
+	x = pfkey_xfrm_state_lookup(hdr, ext_hdrs);
+	if (x == NULL)
+		return -ESRCH;
+
+	out_skb = pfkey_xfrm_state2msg(x, 1, 3);
+	proto = x->id.proto;
+	xfrm_state_put(x);
+	if (IS_ERR(out_skb))
+		return  PTR_ERR(out_skb);
+
+	out_hdr = (struct sadb_msg *) out_skb->data;
+	out_hdr->sadb_msg_version = hdr->sadb_msg_version;
+	out_hdr->sadb_msg_type = SADB_DUMP;
+	out_hdr->sadb_msg_satype = pfkey_proto2satype(proto);
+	out_hdr->sadb_msg_errno = 0;
+	out_hdr->sadb_msg_reserved = 0;
+	out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
+	out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
+	pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk);
+
+	return 0;
+}
+
+static struct sk_buff *compose_sadb_supported(struct sadb_msg *orig, int allocation)
+{
+	struct sk_buff *skb;
+	struct sadb_msg *hdr;
+	int len, auth_len, enc_len, i;
+
+	auth_len = xfrm_count_auth_supported();
+	if (auth_len) {
+		auth_len *= sizeof(struct sadb_alg);
+		auth_len += sizeof(struct sadb_supported);
+	}
+	
+	enc_len = xfrm_count_enc_supported();
+	if (enc_len) {
+		enc_len *= sizeof(struct sadb_alg);
+		enc_len += sizeof(struct sadb_supported);
+	}
+	
+	len = enc_len + auth_len + sizeof(struct sadb_msg);
+
+	skb = alloc_skb(len + 16, allocation);
+	if (!skb)
+		goto out_put_algs;
+
+	hdr = (struct sadb_msg *) skb_put(skb, sizeof(*hdr));
+	pfkey_hdr_dup(hdr, orig);
+	hdr->sadb_msg_errno = 0;
+	hdr->sadb_msg_len = len / sizeof(uint64_t);
+
+	if (auth_len) {
+		struct sadb_supported *sp;
+		struct sadb_alg *ap;
+
+		sp = (struct sadb_supported *) skb_put(skb, auth_len);
+		ap = (struct sadb_alg *) (sp + 1);
+
+		sp->sadb_supported_len = auth_len / sizeof(uint64_t);
+		sp->sadb_supported_exttype = SADB_EXT_SUPPORTED_AUTH;
+
+		for (i = 0; ; i++) {
+			struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i);
+			if (!aalg)
+				break;
+			if (aalg->available)
+				*ap++ = aalg->desc;
+		}
+	}
+
+	if (enc_len) {
+		struct sadb_supported *sp;
+		struct sadb_alg *ap;
+
+		sp = (struct sadb_supported *) skb_put(skb, enc_len);
+		ap = (struct sadb_alg *) (sp + 1);
+
+		sp->sadb_supported_len = enc_len / sizeof(uint64_t);
+		sp->sadb_supported_exttype = SADB_EXT_SUPPORTED_ENCRYPT;
+
+		for (i = 0; ; i++) {
+			struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i);
+			if (!ealg)
+				break;
+			if (ealg->available)
+				*ap++ = ealg->desc;
+		}
+	}
+
+out_put_algs:
+	return skb;
+}
+
+static int pfkey_register(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+{
+	struct pfkey_sock *pfk = pfkey_sk(sk);
+	struct sk_buff *supp_skb;
+
+	if (hdr->sadb_msg_satype > SADB_SATYPE_MAX)
+		return -EINVAL;
+
+	if (hdr->sadb_msg_satype != SADB_SATYPE_UNSPEC) {
+		if (pfk->registered&(1<<hdr->sadb_msg_satype))
+			return -EEXIST;
+		pfk->registered |= (1<<hdr->sadb_msg_satype);
+	}
+
+	xfrm_probe_algs();
+	
+	supp_skb = compose_sadb_supported(hdr, GFP_KERNEL);
+	if (!supp_skb) {
+		if (hdr->sadb_msg_satype != SADB_SATYPE_UNSPEC)
+			pfk->registered &= ~(1<<hdr->sadb_msg_satype);
+
+		return -ENOBUFS;
+	}
+
+	pfkey_broadcast(supp_skb, GFP_KERNEL, BROADCAST_REGISTERED, sk);
+
+	return 0;
+}
+
+static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+{
+	unsigned proto;
+	struct sk_buff *skb_out;
+	struct sadb_msg *hdr_out;
+
+	proto = pfkey_satype2proto(hdr->sadb_msg_satype);
+	if (proto == 0)
+		return -EINVAL;
+
+	skb_out = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_KERNEL);
+	if (!skb_out)
+		return -ENOBUFS;
+
+	xfrm_state_flush(proto);
+
+	hdr_out = (struct sadb_msg *) skb_put(skb_out, sizeof(struct sadb_msg));
+	pfkey_hdr_dup(hdr_out, hdr);
+	hdr_out->sadb_msg_errno = (uint8_t) 0;
+	hdr_out->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
+
+	pfkey_broadcast(skb_out, GFP_KERNEL, BROADCAST_ALL, NULL);
+
+	return 0;
+}
+
+struct pfkey_dump_data
+{
+	struct sk_buff *skb;
+	struct sadb_msg *hdr;
+	struct sock *sk;
+};
+
+static int dump_sa(struct xfrm_state *x, int count, void *ptr)
+{
+	struct pfkey_dump_data *data = ptr;
+	struct sk_buff *out_skb;
+	struct sadb_msg *out_hdr;
+
+	out_skb = pfkey_xfrm_state2msg(x, 1, 3);
+	if (IS_ERR(out_skb))
+		return PTR_ERR(out_skb);
+
+	out_hdr = (struct sadb_msg *) out_skb->data;
+	out_hdr->sadb_msg_version = data->hdr->sadb_msg_version;
+	out_hdr->sadb_msg_type = SADB_DUMP;
+	out_hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto);
+	out_hdr->sadb_msg_errno = 0;
+	out_hdr->sadb_msg_reserved = 0;
+	out_hdr->sadb_msg_seq = count;
+	out_hdr->sadb_msg_pid = data->hdr->sadb_msg_pid;
+	pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, data->sk);
+	return 0;
+}
+
+static int pfkey_dump(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+{
+	u8 proto;
+	struct pfkey_dump_data data = { .skb = skb, .hdr = hdr, .sk = sk };
+
+	proto = pfkey_satype2proto(hdr->sadb_msg_satype);
+	if (proto == 0)
+		return -EINVAL;
+
+	return xfrm_state_walk(proto, dump_sa, &data);
+}
+
+static int pfkey_promisc(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+{
+	struct pfkey_sock *pfk = pfkey_sk(sk);
+	int satype = hdr->sadb_msg_satype;
+
+	if (hdr->sadb_msg_len == (sizeof(*hdr) / sizeof(uint64_t))) {
+		/* XXX we mangle packet... */
+		hdr->sadb_msg_errno = 0;
+		if (satype != 0 && satype != 1)
+			return -EINVAL;
+		pfk->promisc = satype;
+	}
+	pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, BROADCAST_ALL, NULL);
+	return 0;
+}
+
+static int check_reqid(struct xfrm_policy *xp, int dir, int count, void *ptr)
+{
+	int i;
+	u32 reqid = *(u32*)ptr;
+
+	for (i=0; i<xp->xfrm_nr; i++) {
+		if (xp->xfrm_vec[i].reqid == reqid)
+			return -EEXIST;
+	}
+	return 0;
+}
+
+static u32 gen_reqid(void)
+{
+	u32 start;
+	static u32 reqid = IPSEC_MANUAL_REQID_MAX;
+
+	start = reqid;
+	do {
+		++reqid;
+		if (reqid == 0)
+			reqid = IPSEC_MANUAL_REQID_MAX+1;
+		if (xfrm_policy_walk(check_reqid, (void*)&reqid) != -EEXIST)
+			return reqid;
+	} while (reqid != start);
+	return 0;
+}
+
+static int
+parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq)
+{
+	struct xfrm_tmpl *t = xp->xfrm_vec + xp->xfrm_nr;
+	struct sockaddr_in *sin;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct sockaddr_in6 *sin6;
+#endif
+
+	if (xp->xfrm_nr >= XFRM_MAX_DEPTH)
+		return -ELOOP;
+
+	if (rq->sadb_x_ipsecrequest_mode == 0)
+		return -EINVAL;
+
+	t->id.proto = rq->sadb_x_ipsecrequest_proto; /* XXX check proto */
+	t->mode = rq->sadb_x_ipsecrequest_mode-1;
+	if (rq->sadb_x_ipsecrequest_level == IPSEC_LEVEL_USE)
+		t->optional = 1;
+	else if (rq->sadb_x_ipsecrequest_level == IPSEC_LEVEL_UNIQUE) {
+		t->reqid = rq->sadb_x_ipsecrequest_reqid;
+		if (t->reqid > IPSEC_MANUAL_REQID_MAX)
+			t->reqid = 0;
+		if (!t->reqid && !(t->reqid = gen_reqid()))
+			return -ENOBUFS;
+	}
+
+	/* addresses present only in tunnel mode */
+	if (t->mode) {
+		switch (xp->family) {
+		case AF_INET:
+			sin = (void*)(rq+1);
+			if (sin->sin_family != AF_INET)
+				return -EINVAL;
+			t->saddr.a4 = sin->sin_addr.s_addr;
+			sin++;
+			if (sin->sin_family != AF_INET)
+				return -EINVAL;
+			t->id.daddr.a4 = sin->sin_addr.s_addr;
+			break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		case AF_INET6:
+			sin6 = (void *)(rq+1);
+			if (sin6->sin6_family != AF_INET6)
+				return -EINVAL;
+			memcpy(t->saddr.a6, &sin6->sin6_addr, sizeof(struct in6_addr));
+			sin6++;
+			if (sin6->sin6_family != AF_INET6)
+				return -EINVAL;
+			memcpy(t->id.daddr.a6, &sin6->sin6_addr, sizeof(struct in6_addr));
+			break;
+#endif
+		default:
+			return -EINVAL;
+		}
+	}
+	/* No way to set this via kame pfkey */
+	t->aalgos = t->ealgos = t->calgos = ~0;
+	xp->xfrm_nr++;
+	return 0;
+}
+
+static int
+parse_ipsecrequests(struct xfrm_policy *xp, struct sadb_x_policy *pol)
+{
+	int err;
+	int len = pol->sadb_x_policy_len*8 - sizeof(struct sadb_x_policy);
+	struct sadb_x_ipsecrequest *rq = (void*)(pol+1);
+
+	while (len >= sizeof(struct sadb_x_ipsecrequest)) {
+		if ((err = parse_ipsecrequest(xp, rq)) < 0)
+			return err;
+		len -= rq->sadb_x_ipsecrequest_len;
+		rq = (void*)((u8*)rq + rq->sadb_x_ipsecrequest_len);
+	}
+	return 0;
+}
+
+static int pfkey_xfrm_policy2msg_size(struct xfrm_policy *xp)
+{
+	int sockaddr_size = pfkey_sockaddr_size(xp->family);
+	int socklen = (xp->family == AF_INET ?
+		       sizeof(struct sockaddr_in) :
+		       sizeof(struct sockaddr_in6));
+
+	return sizeof(struct sadb_msg) +
+		(sizeof(struct sadb_lifetime) * 3) +
+		(sizeof(struct sadb_address) * 2) + 
+		(sockaddr_size * 2) +
+		sizeof(struct sadb_x_policy) +
+		(xp->xfrm_nr * (sizeof(struct sadb_x_ipsecrequest) +
+				(socklen * 2)));
+}
+
+static struct sk_buff * pfkey_xfrm_policy2msg_prep(struct xfrm_policy *xp)
+{
+	struct sk_buff *skb;
+	int size;
+
+	size = pfkey_xfrm_policy2msg_size(xp);
+
+	skb =  alloc_skb(size + 16, GFP_ATOMIC);
+	if (skb == NULL)
+		return ERR_PTR(-ENOBUFS);
+
+	return skb;
+}
+
+static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, int dir)
+{
+	struct sadb_msg *hdr;
+	struct sadb_address *addr;
+	struct sadb_lifetime *lifetime;
+	struct sadb_x_policy *pol;
+	struct sockaddr_in   *sin;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct sockaddr_in6  *sin6;
+#endif
+	int i;
+	int size;
+	int sockaddr_size = pfkey_sockaddr_size(xp->family);
+	int socklen = (xp->family == AF_INET ?
+		       sizeof(struct sockaddr_in) :
+		       sizeof(struct sockaddr_in6));
+
+	size = pfkey_xfrm_policy2msg_size(xp);
+
+	/* call should fill header later */
+	hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg));
+	memset(hdr, 0, size);	/* XXX do we need this ? */
+
+	/* src address */
+	addr = (struct sadb_address*) skb_put(skb, 
+					      sizeof(struct sadb_address)+sockaddr_size);
+	addr->sadb_address_len = 
+		(sizeof(struct sadb_address)+sockaddr_size)/
+			sizeof(uint64_t);
+	addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC;
+	addr->sadb_address_proto = pfkey_proto_from_xfrm(xp->selector.proto);
+	addr->sadb_address_prefixlen = xp->selector.prefixlen_s;
+	addr->sadb_address_reserved = 0;
+	/* src address */
+	if (xp->family == AF_INET) {
+		sin = (struct sockaddr_in *) (addr + 1);
+		sin->sin_family = AF_INET;
+		sin->sin_addr.s_addr = xp->selector.saddr.a4;
+		sin->sin_port = xp->selector.sport;
+		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+	}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	else if (xp->family == AF_INET6) {
+		sin6 = (struct sockaddr_in6 *) (addr + 1);
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_port = xp->selector.sport;
+		sin6->sin6_flowinfo = 0;
+		memcpy(&sin6->sin6_addr, xp->selector.saddr.a6,
+		       sizeof(struct in6_addr));
+		sin6->sin6_scope_id = 0;
+	}
+#endif
+	else
+		BUG();
+
+	/* dst address */
+	addr = (struct sadb_address*) skb_put(skb, 
+					      sizeof(struct sadb_address)+sockaddr_size);
+	addr->sadb_address_len =
+		(sizeof(struct sadb_address)+sockaddr_size)/
+			sizeof(uint64_t);
+	addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST;
+	addr->sadb_address_proto = pfkey_proto_from_xfrm(xp->selector.proto);
+	addr->sadb_address_prefixlen = xp->selector.prefixlen_d; 
+	addr->sadb_address_reserved = 0;
+	if (xp->family == AF_INET) {
+		sin = (struct sockaddr_in *) (addr + 1);
+		sin->sin_family = AF_INET;
+		sin->sin_addr.s_addr = xp->selector.daddr.a4;
+		sin->sin_port = xp->selector.dport;
+		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+	}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	else if (xp->family == AF_INET6) {
+		sin6 = (struct sockaddr_in6 *) (addr + 1);
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_port = xp->selector.dport;
+		sin6->sin6_flowinfo = 0;
+		memcpy(&sin6->sin6_addr, xp->selector.daddr.a6,
+		       sizeof(struct in6_addr));
+		sin6->sin6_scope_id = 0;
+	}
+#endif
+	else
+		BUG();
+
+	/* hard time */
+	lifetime = (struct sadb_lifetime *)  skb_put(skb, 
+						     sizeof(struct sadb_lifetime));
+	lifetime->sadb_lifetime_len =
+		sizeof(struct sadb_lifetime)/sizeof(uint64_t);
+	lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
+	lifetime->sadb_lifetime_allocations =  _X2KEY(xp->lft.hard_packet_limit);
+	lifetime->sadb_lifetime_bytes = _X2KEY(xp->lft.hard_byte_limit);
+	lifetime->sadb_lifetime_addtime = xp->lft.hard_add_expires_seconds;
+	lifetime->sadb_lifetime_usetime = xp->lft.hard_use_expires_seconds;
+	/* soft time */
+	lifetime = (struct sadb_lifetime *)  skb_put(skb, 
+						     sizeof(struct sadb_lifetime));
+	lifetime->sadb_lifetime_len =
+		sizeof(struct sadb_lifetime)/sizeof(uint64_t);
+	lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
+	lifetime->sadb_lifetime_allocations =  _X2KEY(xp->lft.soft_packet_limit);
+	lifetime->sadb_lifetime_bytes = _X2KEY(xp->lft.soft_byte_limit);
+	lifetime->sadb_lifetime_addtime = xp->lft.soft_add_expires_seconds;
+	lifetime->sadb_lifetime_usetime = xp->lft.soft_use_expires_seconds;
+	/* current time */
+	lifetime = (struct sadb_lifetime *)  skb_put(skb, 
+						     sizeof(struct sadb_lifetime));
+	lifetime->sadb_lifetime_len =
+		sizeof(struct sadb_lifetime)/sizeof(uint64_t);
+	lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
+	lifetime->sadb_lifetime_allocations = xp->curlft.packets;
+	lifetime->sadb_lifetime_bytes = xp->curlft.bytes;
+	lifetime->sadb_lifetime_addtime = xp->curlft.add_time;
+	lifetime->sadb_lifetime_usetime = xp->curlft.use_time;
+
+	pol = (struct sadb_x_policy *)  skb_put(skb, sizeof(struct sadb_x_policy));
+	pol->sadb_x_policy_len = sizeof(struct sadb_x_policy)/sizeof(uint64_t);
+	pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY;
+	pol->sadb_x_policy_type = IPSEC_POLICY_DISCARD;
+	if (xp->action == XFRM_POLICY_ALLOW) {
+		if (xp->xfrm_nr)
+			pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC;
+		else
+			pol->sadb_x_policy_type = IPSEC_POLICY_NONE;
+	}
+	pol->sadb_x_policy_dir = dir+1;
+	pol->sadb_x_policy_id = xp->index;
+	pol->sadb_x_policy_priority = xp->priority;
+
+	for (i=0; i<xp->xfrm_nr; i++) {
+		struct sadb_x_ipsecrequest *rq;
+		struct xfrm_tmpl *t = xp->xfrm_vec + i;
+		int req_size;
+
+		req_size = sizeof(struct sadb_x_ipsecrequest);
+		if (t->mode)
+			req_size += 2*socklen;
+		else
+			size -= 2*socklen;
+		rq = (void*)skb_put(skb, req_size);
+		pol->sadb_x_policy_len += req_size/8;
+		memset(rq, 0, sizeof(*rq));
+		rq->sadb_x_ipsecrequest_len = req_size;
+		rq->sadb_x_ipsecrequest_proto = t->id.proto;
+		rq->sadb_x_ipsecrequest_mode = t->mode+1;
+		rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_REQUIRE;
+		if (t->reqid)
+			rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_UNIQUE;
+		if (t->optional)
+			rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_USE;
+		rq->sadb_x_ipsecrequest_reqid = t->reqid;
+		if (t->mode) {
+			switch (xp->family) {
+			case AF_INET:
+				sin = (void*)(rq+1);
+				sin->sin_family = AF_INET;
+				sin->sin_addr.s_addr = t->saddr.a4;
+				sin->sin_port = 0;
+				memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+				sin++;
+				sin->sin_family = AF_INET;
+				sin->sin_addr.s_addr = t->id.daddr.a4;
+				sin->sin_port = 0;
+				memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+				break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+			case AF_INET6:
+				sin6 = (void*)(rq+1);
+				sin6->sin6_family = AF_INET6;
+				sin6->sin6_port = 0;
+				sin6->sin6_flowinfo = 0;
+				memcpy(&sin6->sin6_addr, t->saddr.a6,
+				       sizeof(struct in6_addr));
+				sin6->sin6_scope_id = 0;
+
+				sin6++;
+				sin6->sin6_family = AF_INET6;
+				sin6->sin6_port = 0;
+				sin6->sin6_flowinfo = 0;
+				memcpy(&sin6->sin6_addr, t->id.daddr.a6,
+				       sizeof(struct in6_addr));
+				sin6->sin6_scope_id = 0;
+				break;
+#endif
+			default:
+				break;
+			}
+		}
+	}
+	hdr->sadb_msg_len = size / sizeof(uint64_t);
+	hdr->sadb_msg_reserved = atomic_read(&xp->refcnt);
+}
+
+static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+{
+	int err;
+	struct sadb_lifetime *lifetime;
+	struct sadb_address *sa;
+	struct sadb_x_policy *pol;
+	struct xfrm_policy *xp;
+	struct sk_buff *out_skb;
+	struct sadb_msg *out_hdr;
+
+	if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
+				     ext_hdrs[SADB_EXT_ADDRESS_DST-1]) ||
+	    !ext_hdrs[SADB_X_EXT_POLICY-1])
+		return -EINVAL;
+
+	pol = ext_hdrs[SADB_X_EXT_POLICY-1];
+	if (pol->sadb_x_policy_type > IPSEC_POLICY_IPSEC)
+		return -EINVAL;
+	if (!pol->sadb_x_policy_dir || pol->sadb_x_policy_dir >= IPSEC_DIR_MAX)
+		return -EINVAL;
+
+	xp = xfrm_policy_alloc(GFP_KERNEL);
+	if (xp == NULL)
+		return -ENOBUFS;
+
+	xp->action = (pol->sadb_x_policy_type == IPSEC_POLICY_DISCARD ?
+		      XFRM_POLICY_BLOCK : XFRM_POLICY_ALLOW);
+	xp->priority = pol->sadb_x_policy_priority;
+
+	sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1], 
+	xp->family = pfkey_sadb_addr2xfrm_addr(sa, &xp->selector.saddr);
+	if (!xp->family) {
+		err = -EINVAL;
+		goto out;
+	}
+	xp->selector.family = xp->family;
+	xp->selector.prefixlen_s = sa->sadb_address_prefixlen;
+	xp->selector.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto);
+	xp->selector.sport = ((struct sockaddr_in *)(sa+1))->sin_port;
+	if (xp->selector.sport)
+		xp->selector.sport_mask = ~0;
+
+	sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1], 
+	pfkey_sadb_addr2xfrm_addr(sa, &xp->selector.daddr);
+	xp->selector.prefixlen_d = sa->sadb_address_prefixlen;
+
+	/* Amusing, we set this twice.  KAME apps appear to set same value
+	 * in both addresses.
+	 */
+	xp->selector.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto);
+
+	xp->selector.dport = ((struct sockaddr_in *)(sa+1))->sin_port;
+	if (xp->selector.dport)
+		xp->selector.dport_mask = ~0;
+
+	xp->lft.soft_byte_limit = XFRM_INF;
+	xp->lft.hard_byte_limit = XFRM_INF;
+	xp->lft.soft_packet_limit = XFRM_INF;
+	xp->lft.hard_packet_limit = XFRM_INF;
+	if ((lifetime = ext_hdrs[SADB_EXT_LIFETIME_HARD-1]) != NULL) {
+		xp->lft.hard_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations);
+		xp->lft.hard_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes);
+		xp->lft.hard_add_expires_seconds = lifetime->sadb_lifetime_addtime;
+		xp->lft.hard_use_expires_seconds = lifetime->sadb_lifetime_usetime;
+	}
+	if ((lifetime = ext_hdrs[SADB_EXT_LIFETIME_SOFT-1]) != NULL) {
+		xp->lft.soft_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations);
+		xp->lft.soft_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes);
+		xp->lft.soft_add_expires_seconds = lifetime->sadb_lifetime_addtime;
+		xp->lft.soft_use_expires_seconds = lifetime->sadb_lifetime_usetime;
+	}
+	xp->xfrm_nr = 0;
+	if (pol->sadb_x_policy_type == IPSEC_POLICY_IPSEC &&
+	    (err = parse_ipsecrequests(xp, pol)) < 0)
+		goto out;
+
+	out_skb = pfkey_xfrm_policy2msg_prep(xp);
+	if (IS_ERR(out_skb)) {
+		err =  PTR_ERR(out_skb);
+		goto out;
+	}
+
+	err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp,
+				 hdr->sadb_msg_type != SADB_X_SPDUPDATE);
+	if (err) {
+		kfree_skb(out_skb);
+		goto out;
+	}
+
+	pfkey_xfrm_policy2msg(out_skb, xp, pol->sadb_x_policy_dir-1);
+
+	xfrm_pol_put(xp);
+
+	out_hdr = (struct sadb_msg *) out_skb->data;
+	out_hdr->sadb_msg_version = hdr->sadb_msg_version;
+	out_hdr->sadb_msg_type = hdr->sadb_msg_type;
+	out_hdr->sadb_msg_satype = 0;
+	out_hdr->sadb_msg_errno = 0;
+	out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
+	out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
+	pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, sk);
+	return 0;
+
+out:
+	kfree(xp);
+	return err;
+}
+
+static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+{
+	int err;
+	struct sadb_address *sa;
+	struct sadb_x_policy *pol;
+	struct xfrm_policy *xp;
+	struct sk_buff *out_skb;
+	struct sadb_msg *out_hdr;
+	struct xfrm_selector sel;
+
+	if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
+				     ext_hdrs[SADB_EXT_ADDRESS_DST-1]) ||
+	    !ext_hdrs[SADB_X_EXT_POLICY-1])
+		return -EINVAL;
+
+	pol = ext_hdrs[SADB_X_EXT_POLICY-1];
+	if (!pol->sadb_x_policy_dir || pol->sadb_x_policy_dir >= IPSEC_DIR_MAX)
+		return -EINVAL;
+
+	memset(&sel, 0, sizeof(sel));
+
+	sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1], 
+	sel.family = pfkey_sadb_addr2xfrm_addr(sa, &sel.saddr);
+	sel.prefixlen_s = sa->sadb_address_prefixlen;
+	sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto);
+	sel.sport = ((struct sockaddr_in *)(sa+1))->sin_port;
+	if (sel.sport)
+		sel.sport_mask = ~0;
+
+	sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1], 
+	pfkey_sadb_addr2xfrm_addr(sa, &sel.daddr);
+	sel.prefixlen_d = sa->sadb_address_prefixlen;
+	sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto);
+	sel.dport = ((struct sockaddr_in *)(sa+1))->sin_port;
+	if (sel.dport)
+		sel.dport_mask = ~0;
+
+	xp = xfrm_policy_bysel(pol->sadb_x_policy_dir-1, &sel, 1);
+	if (xp == NULL)
+		return -ENOENT;
+
+	err = 0;
+
+	out_skb = pfkey_xfrm_policy2msg_prep(xp);
+	if (IS_ERR(out_skb)) {
+		err =  PTR_ERR(out_skb);
+		goto out;
+	}
+	pfkey_xfrm_policy2msg(out_skb, xp, pol->sadb_x_policy_dir-1);
+
+	out_hdr = (struct sadb_msg *) out_skb->data;
+	out_hdr->sadb_msg_version = hdr->sadb_msg_version;
+	out_hdr->sadb_msg_type = SADB_X_SPDDELETE;
+	out_hdr->sadb_msg_satype = 0;
+	out_hdr->sadb_msg_errno = 0;
+	out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
+	out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
+	pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, sk);
+	err = 0;
+
+out:
+	xfrm_pol_put(xp);
+	return err;
+}
+
+static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+{
+	int err;
+	struct sadb_x_policy *pol;
+	struct xfrm_policy *xp;
+	struct sk_buff *out_skb;
+	struct sadb_msg *out_hdr;
+
+	if ((pol = ext_hdrs[SADB_X_EXT_POLICY-1]) == NULL)
+		return -EINVAL;
+
+	xp = xfrm_policy_byid(0, pol->sadb_x_policy_id,
+			      hdr->sadb_msg_type == SADB_X_SPDDELETE2);
+	if (xp == NULL)
+		return -ENOENT;
+
+	err = 0;
+
+	out_skb = pfkey_xfrm_policy2msg_prep(xp);
+	if (IS_ERR(out_skb)) {
+		err =  PTR_ERR(out_skb);
+		goto out;
+	}
+	pfkey_xfrm_policy2msg(out_skb, xp, pol->sadb_x_policy_dir-1);
+
+	out_hdr = (struct sadb_msg *) out_skb->data;
+	out_hdr->sadb_msg_version = hdr->sadb_msg_version;
+	out_hdr->sadb_msg_type = hdr->sadb_msg_type;
+	out_hdr->sadb_msg_satype = 0;
+	out_hdr->sadb_msg_errno = 0;
+	out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
+	out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
+	pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, sk);
+	err = 0;
+
+out:
+	xfrm_pol_put(xp);
+	return err;
+}
+
+static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr)
+{
+	struct pfkey_dump_data *data = ptr;
+	struct sk_buff *out_skb;
+	struct sadb_msg *out_hdr;
+
+	out_skb = pfkey_xfrm_policy2msg_prep(xp);
+	if (IS_ERR(out_skb))
+		return PTR_ERR(out_skb);
+
+	pfkey_xfrm_policy2msg(out_skb, xp, dir);
+
+	out_hdr = (struct sadb_msg *) out_skb->data;
+	out_hdr->sadb_msg_version = data->hdr->sadb_msg_version;
+	out_hdr->sadb_msg_type = SADB_X_SPDDUMP;
+	out_hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC;
+	out_hdr->sadb_msg_errno = 0;
+	out_hdr->sadb_msg_seq = count;
+	out_hdr->sadb_msg_pid = data->hdr->sadb_msg_pid;
+	pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, data->sk);
+	return 0;
+}
+
+static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+{
+	struct pfkey_dump_data data = { .skb = skb, .hdr = hdr, .sk = sk };
+
+	return xfrm_policy_walk(dump_sp, &data);
+}
+
+static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+{
+	struct sk_buff *skb_out;
+	struct sadb_msg *hdr_out;
+
+	skb_out = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_KERNEL);
+	if (!skb_out)
+		return -ENOBUFS;
+
+	xfrm_policy_flush();
+
+	hdr_out = (struct sadb_msg *) skb_put(skb_out, sizeof(struct sadb_msg));
+	pfkey_hdr_dup(hdr_out, hdr);
+	hdr_out->sadb_msg_errno = (uint8_t) 0;
+	hdr_out->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
+	pfkey_broadcast(skb_out, GFP_KERNEL, BROADCAST_ALL, NULL);
+
+	return 0;
+}
+
+typedef int (*pfkey_handler)(struct sock *sk, struct sk_buff *skb,
+			     struct sadb_msg *hdr, void **ext_hdrs);
+static pfkey_handler pfkey_funcs[SADB_MAX + 1] = {
+	[SADB_RESERVED]		= pfkey_reserved,
+	[SADB_GETSPI]		= pfkey_getspi,
+	[SADB_UPDATE]		= pfkey_add,
+	[SADB_ADD]		= pfkey_add,
+	[SADB_DELETE]		= pfkey_delete,
+	[SADB_GET]		= pfkey_get,
+	[SADB_ACQUIRE]		= pfkey_acquire,
+	[SADB_REGISTER]		= pfkey_register,
+	[SADB_EXPIRE]		= NULL,
+	[SADB_FLUSH]		= pfkey_flush,
+	[SADB_DUMP]		= pfkey_dump,
+	[SADB_X_PROMISC]	= pfkey_promisc,
+	[SADB_X_PCHANGE]	= NULL,
+	[SADB_X_SPDUPDATE]	= pfkey_spdadd,
+	[SADB_X_SPDADD]		= pfkey_spdadd,
+	[SADB_X_SPDDELETE]	= pfkey_spddelete,
+	[SADB_X_SPDGET]		= pfkey_spdget,
+	[SADB_X_SPDACQUIRE]	= NULL,
+	[SADB_X_SPDDUMP]	= pfkey_spddump,
+	[SADB_X_SPDFLUSH]	= pfkey_spdflush,
+	[SADB_X_SPDSETIDX]	= pfkey_spdadd,
+	[SADB_X_SPDDELETE2]	= pfkey_spdget,
+};
+
+static int pfkey_process(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr)
+{
+	void *ext_hdrs[SADB_EXT_MAX];
+	int err;
+
+	pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL,
+			BROADCAST_PROMISC_ONLY, NULL);
+
+	memset(ext_hdrs, 0, sizeof(ext_hdrs));
+	err = parse_exthdrs(skb, hdr, ext_hdrs);
+	if (!err) {
+		err = -EOPNOTSUPP;
+		if (pfkey_funcs[hdr->sadb_msg_type])
+			err = pfkey_funcs[hdr->sadb_msg_type](sk, skb, hdr, ext_hdrs);
+	}
+	return err;
+}
+
+static struct sadb_msg *pfkey_get_base_msg(struct sk_buff *skb, int *errp)
+{
+	struct sadb_msg *hdr = NULL;
+
+	if (skb->len < sizeof(*hdr)) {
+		*errp = -EMSGSIZE;
+	} else {
+		hdr = (struct sadb_msg *) skb->data;
+		if (hdr->sadb_msg_version != PF_KEY_V2 ||
+		    hdr->sadb_msg_reserved != 0 ||
+		    (hdr->sadb_msg_type <= SADB_RESERVED ||
+		     hdr->sadb_msg_type > SADB_MAX)) {
+			hdr = NULL;
+			*errp = -EINVAL;
+		} else if (hdr->sadb_msg_len != (skb->len /
+						 sizeof(uint64_t)) ||
+			   hdr->sadb_msg_len < (sizeof(struct sadb_msg) /
+						sizeof(uint64_t))) {
+			hdr = NULL;
+			*errp = -EMSGSIZE;
+		} else {
+			*errp = 0;
+		}
+	}
+	return hdr;
+}
+
+static inline int aalg_tmpl_set(struct xfrm_tmpl *t, struct xfrm_algo_desc *d)
+{
+	return t->aalgos & (1 << d->desc.sadb_alg_id);
+}
+
+static inline int ealg_tmpl_set(struct xfrm_tmpl *t, struct xfrm_algo_desc *d)
+{
+	return t->ealgos & (1 << d->desc.sadb_alg_id);
+}
+
+static int count_ah_combs(struct xfrm_tmpl *t)
+{
+	int i, sz = 0;
+
+	for (i = 0; ; i++) {
+		struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i);
+		if (!aalg)
+			break;
+		if (aalg_tmpl_set(t, aalg) && aalg->available)
+			sz += sizeof(struct sadb_comb);
+	}
+	return sz + sizeof(struct sadb_prop);
+}
+
+static int count_esp_combs(struct xfrm_tmpl *t)
+{
+	int i, k, sz = 0;
+
+	for (i = 0; ; i++) {
+		struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i);
+		if (!ealg)
+			break;
+			
+		if (!(ealg_tmpl_set(t, ealg) && ealg->available))
+			continue;
+			
+		for (k = 1; ; k++) {
+			struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k);
+			if (!aalg)
+				break;
+				
+			if (aalg_tmpl_set(t, aalg) && aalg->available)
+				sz += sizeof(struct sadb_comb);
+		}
+	}
+	return sz + sizeof(struct sadb_prop);
+}
+
+static void dump_ah_combs(struct sk_buff *skb, struct xfrm_tmpl *t)
+{
+	struct sadb_prop *p;
+	int i;
+
+	p = (struct sadb_prop*)skb_put(skb, sizeof(struct sadb_prop));
+	p->sadb_prop_len = sizeof(struct sadb_prop)/8;
+	p->sadb_prop_exttype = SADB_EXT_PROPOSAL;
+	p->sadb_prop_replay = 32;
+	memset(p->sadb_prop_reserved, 0, sizeof(p->sadb_prop_reserved));
+
+	for (i = 0; ; i++) {
+		struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i);
+		if (!aalg)
+			break;
+
+		if (aalg_tmpl_set(t, aalg) && aalg->available) {
+			struct sadb_comb *c;
+			c = (struct sadb_comb*)skb_put(skb, sizeof(struct sadb_comb));
+			memset(c, 0, sizeof(*c));
+			p->sadb_prop_len += sizeof(struct sadb_comb)/8;
+			c->sadb_comb_auth = aalg->desc.sadb_alg_id;
+			c->sadb_comb_auth_minbits = aalg->desc.sadb_alg_minbits;
+			c->sadb_comb_auth_maxbits = aalg->desc.sadb_alg_maxbits;
+			c->sadb_comb_hard_addtime = 24*60*60;
+			c->sadb_comb_soft_addtime = 20*60*60;
+			c->sadb_comb_hard_usetime = 8*60*60;
+			c->sadb_comb_soft_usetime = 7*60*60;
+		}
+	}
+}
+
+static void dump_esp_combs(struct sk_buff *skb, struct xfrm_tmpl *t)
+{
+	struct sadb_prop *p;
+	int i, k;
+
+	p = (struct sadb_prop*)skb_put(skb, sizeof(struct sadb_prop));
+	p->sadb_prop_len = sizeof(struct sadb_prop)/8;
+	p->sadb_prop_exttype = SADB_EXT_PROPOSAL;
+	p->sadb_prop_replay = 32;
+	memset(p->sadb_prop_reserved, 0, sizeof(p->sadb_prop_reserved));
+
+	for (i=0; ; i++) {
+		struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i);
+		if (!ealg)
+			break;
+	
+		if (!(ealg_tmpl_set(t, ealg) && ealg->available))
+			continue;
+			
+		for (k = 1; ; k++) {
+			struct sadb_comb *c;
+			struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k);
+			if (!aalg)
+				break;
+			if (!(aalg_tmpl_set(t, aalg) && aalg->available))
+				continue;
+			c = (struct sadb_comb*)skb_put(skb, sizeof(struct sadb_comb));
+			memset(c, 0, sizeof(*c));
+			p->sadb_prop_len += sizeof(struct sadb_comb)/8;
+			c->sadb_comb_auth = aalg->desc.sadb_alg_id;
+			c->sadb_comb_auth_minbits = aalg->desc.sadb_alg_minbits;
+			c->sadb_comb_auth_maxbits = aalg->desc.sadb_alg_maxbits;
+			c->sadb_comb_encrypt = ealg->desc.sadb_alg_id;
+			c->sadb_comb_encrypt_minbits = ealg->desc.sadb_alg_minbits;
+			c->sadb_comb_encrypt_maxbits = ealg->desc.sadb_alg_maxbits;
+			c->sadb_comb_hard_addtime = 24*60*60;
+			c->sadb_comb_soft_addtime = 20*60*60;
+			c->sadb_comb_hard_usetime = 8*60*60;
+			c->sadb_comb_soft_usetime = 7*60*60;
+		}
+	}
+}
+
+static int pfkey_send_notify(struct xfrm_state *x, int hard)
+{
+	struct sk_buff *out_skb;
+	struct sadb_msg *out_hdr;
+	int hsc = (hard ? 2 : 1);
+
+	out_skb = pfkey_xfrm_state2msg(x, 0, hsc);
+	if (IS_ERR(out_skb))
+		return PTR_ERR(out_skb);
+
+	out_hdr = (struct sadb_msg *) out_skb->data;
+	out_hdr->sadb_msg_version = PF_KEY_V2;
+	out_hdr->sadb_msg_type = SADB_EXPIRE;
+	out_hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto);
+	out_hdr->sadb_msg_errno = 0;
+	out_hdr->sadb_msg_reserved = 0;
+	out_hdr->sadb_msg_seq = 0;
+	out_hdr->sadb_msg_pid = 0;
+
+	pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL);
+	return 0;
+}
+
+static u32 get_acqseq(void)
+{
+	u32 res;
+	static u32 acqseq;
+	static DEFINE_SPINLOCK(acqseq_lock);
+
+	spin_lock_bh(&acqseq_lock);
+	res = (++acqseq ? : ++acqseq);
+	spin_unlock_bh(&acqseq_lock);
+	return res;
+}
+
+static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *xp, int dir)
+{
+	struct sk_buff *skb;
+	struct sadb_msg *hdr;
+	struct sadb_address *addr;
+	struct sadb_x_policy *pol;
+	struct sockaddr_in *sin;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct sockaddr_in6 *sin6;
+#endif
+	int sockaddr_size;
+	int size;
+	
+	sockaddr_size = pfkey_sockaddr_size(x->props.family);
+	if (!sockaddr_size)
+		return -EINVAL;
+
+	size = sizeof(struct sadb_msg) +
+		(sizeof(struct sadb_address) * 2) +
+		(sockaddr_size * 2) +
+		sizeof(struct sadb_x_policy);
+	
+	if (x->id.proto == IPPROTO_AH)
+		size += count_ah_combs(t);
+	else if (x->id.proto == IPPROTO_ESP)
+		size += count_esp_combs(t);
+
+	skb =  alloc_skb(size + 16, GFP_ATOMIC);
+	if (skb == NULL)
+		return -ENOMEM;
+	
+	hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg));
+	hdr->sadb_msg_version = PF_KEY_V2;
+	hdr->sadb_msg_type = SADB_ACQUIRE;
+	hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto);
+	hdr->sadb_msg_len = size / sizeof(uint64_t);
+	hdr->sadb_msg_errno = 0;
+	hdr->sadb_msg_reserved = 0;
+	hdr->sadb_msg_seq = x->km.seq = get_acqseq();
+	hdr->sadb_msg_pid = 0;
+
+	/* src address */
+	addr = (struct sadb_address*) skb_put(skb, 
+					      sizeof(struct sadb_address)+sockaddr_size);
+	addr->sadb_address_len = 
+		(sizeof(struct sadb_address)+sockaddr_size)/
+			sizeof(uint64_t);
+	addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC;
+	addr->sadb_address_proto = 0;
+	addr->sadb_address_reserved = 0;
+	if (x->props.family == AF_INET) {
+		addr->sadb_address_prefixlen = 32;
+
+		sin = (struct sockaddr_in *) (addr + 1);
+		sin->sin_family = AF_INET;
+		sin->sin_addr.s_addr = x->props.saddr.a4;
+		sin->sin_port = 0;
+		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+	}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	else if (x->props.family == AF_INET6) {
+		addr->sadb_address_prefixlen = 128;
+
+		sin6 = (struct sockaddr_in6 *) (addr + 1);
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_port = 0;
+		sin6->sin6_flowinfo = 0;
+		memcpy(&sin6->sin6_addr,
+		       x->props.saddr.a6, sizeof(struct in6_addr));
+		sin6->sin6_scope_id = 0;
+	}
+#endif
+	else
+		BUG();
+	
+	/* dst address */
+	addr = (struct sadb_address*) skb_put(skb, 
+					      sizeof(struct sadb_address)+sockaddr_size);
+	addr->sadb_address_len =
+		(sizeof(struct sadb_address)+sockaddr_size)/
+			sizeof(uint64_t);
+	addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST;
+	addr->sadb_address_proto = 0;
+	addr->sadb_address_reserved = 0;
+	if (x->props.family == AF_INET) {
+		addr->sadb_address_prefixlen = 32; 
+
+		sin = (struct sockaddr_in *) (addr + 1);
+		sin->sin_family = AF_INET;
+		sin->sin_addr.s_addr = x->id.daddr.a4;
+		sin->sin_port = 0;
+		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+	}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	else if (x->props.family == AF_INET6) {
+		addr->sadb_address_prefixlen = 128; 
+
+		sin6 = (struct sockaddr_in6 *) (addr + 1);
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_port = 0;
+		sin6->sin6_flowinfo = 0;
+		memcpy(&sin6->sin6_addr,
+		       x->id.daddr.a6, sizeof(struct in6_addr));
+		sin6->sin6_scope_id = 0;
+	}
+#endif
+	else
+		BUG();
+
+	pol = (struct sadb_x_policy *)  skb_put(skb, sizeof(struct sadb_x_policy));
+	pol->sadb_x_policy_len = sizeof(struct sadb_x_policy)/sizeof(uint64_t);
+	pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY;
+	pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC;
+	pol->sadb_x_policy_dir = dir+1;
+	pol->sadb_x_policy_id = xp->index;
+
+	/* Set sadb_comb's. */
+	if (x->id.proto == IPPROTO_AH)
+		dump_ah_combs(skb, t);
+	else if (x->id.proto == IPPROTO_ESP)
+		dump_esp_combs(skb, t);
+
+	return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL);
+}
+
+static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt,
+                                                u8 *data, int len, int *dir)
+{
+	struct xfrm_policy *xp;
+	struct sadb_x_policy *pol = (struct sadb_x_policy*)data;
+
+	switch (family) {
+	case AF_INET:
+		if (opt != IP_IPSEC_POLICY) {
+			*dir = -EOPNOTSUPP;
+			return NULL;
+		}
+		break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case AF_INET6:
+		if (opt != IPV6_IPSEC_POLICY) {
+			*dir = -EOPNOTSUPP;
+			return NULL;
+		}
+		break;
+#endif
+	default:
+		*dir = -EINVAL;
+		return NULL;
+	}
+
+	*dir = -EINVAL;
+
+	if (len < sizeof(struct sadb_x_policy) ||
+	    pol->sadb_x_policy_len*8 > len ||
+	    pol->sadb_x_policy_type > IPSEC_POLICY_BYPASS ||
+	    (!pol->sadb_x_policy_dir || pol->sadb_x_policy_dir > IPSEC_DIR_OUTBOUND))
+		return NULL;
+
+	xp = xfrm_policy_alloc(GFP_ATOMIC);
+	if (xp == NULL) {
+		*dir = -ENOBUFS;
+		return NULL;
+	}
+
+	xp->action = (pol->sadb_x_policy_type == IPSEC_POLICY_DISCARD ?
+		      XFRM_POLICY_BLOCK : XFRM_POLICY_ALLOW);
+
+	xp->lft.soft_byte_limit = XFRM_INF;
+	xp->lft.hard_byte_limit = XFRM_INF;
+	xp->lft.soft_packet_limit = XFRM_INF;
+	xp->lft.hard_packet_limit = XFRM_INF;
+	xp->family = family;
+
+	xp->xfrm_nr = 0;
+	if (pol->sadb_x_policy_type == IPSEC_POLICY_IPSEC &&
+	    (*dir = parse_ipsecrequests(xp, pol)) < 0)
+		goto out;
+
+	*dir = pol->sadb_x_policy_dir-1;
+	return xp;
+
+out:
+	kfree(xp);
+	return NULL;
+}
+
+static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
+{
+	struct sk_buff *skb;
+	struct sadb_msg *hdr;
+	struct sadb_sa *sa;
+	struct sadb_address *addr;
+	struct sadb_x_nat_t_port *n_port;
+	struct sockaddr_in *sin;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct sockaddr_in6 *sin6;
+#endif
+	int sockaddr_size;
+	int size;
+	__u8 satype = (x->id.proto == IPPROTO_ESP ? SADB_SATYPE_ESP : 0);
+	struct xfrm_encap_tmpl *natt = NULL;
+
+	sockaddr_size = pfkey_sockaddr_size(x->props.family);
+	if (!sockaddr_size)
+		return -EINVAL;
+
+	if (!satype)
+		return -EINVAL;
+
+	if (!x->encap)
+		return -EINVAL;
+
+	natt = x->encap;
+
+	/* Build an SADB_X_NAT_T_NEW_MAPPING message:
+	 *
+	 * HDR | SA | ADDRESS_SRC (old addr) | NAT_T_SPORT (old port) |
+	 * ADDRESS_DST (new addr) | NAT_T_DPORT (new port)
+	 */
+	
+	size = sizeof(struct sadb_msg) +
+		sizeof(struct sadb_sa) +
+		(sizeof(struct sadb_address) * 2) +
+		(sockaddr_size * 2) +
+		(sizeof(struct sadb_x_nat_t_port) * 2);
+	
+	skb =  alloc_skb(size + 16, GFP_ATOMIC);
+	if (skb == NULL)
+		return -ENOMEM;
+	
+	hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg));
+	hdr->sadb_msg_version = PF_KEY_V2;
+	hdr->sadb_msg_type = SADB_X_NAT_T_NEW_MAPPING;
+	hdr->sadb_msg_satype = satype;
+	hdr->sadb_msg_len = size / sizeof(uint64_t);
+	hdr->sadb_msg_errno = 0;
+	hdr->sadb_msg_reserved = 0;
+	hdr->sadb_msg_seq = x->km.seq = get_acqseq();
+	hdr->sadb_msg_pid = 0;
+
+	/* SA */
+	sa = (struct sadb_sa *) skb_put(skb, sizeof(struct sadb_sa));
+	sa->sadb_sa_len = sizeof(struct sadb_sa)/sizeof(uint64_t);
+	sa->sadb_sa_exttype = SADB_EXT_SA;
+	sa->sadb_sa_spi = x->id.spi;
+	sa->sadb_sa_replay = 0;
+	sa->sadb_sa_state = 0;
+	sa->sadb_sa_auth = 0;
+	sa->sadb_sa_encrypt = 0;
+	sa->sadb_sa_flags = 0;
+
+	/* ADDRESS_SRC (old addr) */
+	addr = (struct sadb_address*)
+		skb_put(skb, sizeof(struct sadb_address)+sockaddr_size);
+	addr->sadb_address_len = 
+		(sizeof(struct sadb_address)+sockaddr_size)/
+			sizeof(uint64_t);
+	addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC;
+	addr->sadb_address_proto = 0;
+	addr->sadb_address_reserved = 0;
+	if (x->props.family == AF_INET) {
+		addr->sadb_address_prefixlen = 32;
+
+		sin = (struct sockaddr_in *) (addr + 1);
+		sin->sin_family = AF_INET;
+		sin->sin_addr.s_addr = x->props.saddr.a4;
+		sin->sin_port = 0;
+		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+	}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	else if (x->props.family == AF_INET6) {
+		addr->sadb_address_prefixlen = 128;
+
+		sin6 = (struct sockaddr_in6 *) (addr + 1);
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_port = 0;
+		sin6->sin6_flowinfo = 0;
+		memcpy(&sin6->sin6_addr,
+		       x->props.saddr.a6, sizeof(struct in6_addr));
+		sin6->sin6_scope_id = 0;
+	}
+#endif
+	else
+		BUG();
+
+	/* NAT_T_SPORT (old port) */
+	n_port = (struct sadb_x_nat_t_port*) skb_put(skb, sizeof (*n_port));
+	n_port->sadb_x_nat_t_port_len = sizeof(*n_port)/sizeof(uint64_t);
+	n_port->sadb_x_nat_t_port_exttype = SADB_X_EXT_NAT_T_SPORT;
+	n_port->sadb_x_nat_t_port_port = natt->encap_sport;
+	n_port->sadb_x_nat_t_port_reserved = 0;
+
+	/* ADDRESS_DST (new addr) */
+	addr = (struct sadb_address*)
+		skb_put(skb, sizeof(struct sadb_address)+sockaddr_size);
+	addr->sadb_address_len = 
+		(sizeof(struct sadb_address)+sockaddr_size)/
+			sizeof(uint64_t);
+	addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST;
+	addr->sadb_address_proto = 0;
+	addr->sadb_address_reserved = 0;
+	if (x->props.family == AF_INET) {
+		addr->sadb_address_prefixlen = 32;
+
+		sin = (struct sockaddr_in *) (addr + 1);
+		sin->sin_family = AF_INET;
+		sin->sin_addr.s_addr = ipaddr->a4;
+		sin->sin_port = 0;
+		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+	}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	else if (x->props.family == AF_INET6) {
+		addr->sadb_address_prefixlen = 128;
+
+		sin6 = (struct sockaddr_in6 *) (addr + 1);
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_port = 0;
+		sin6->sin6_flowinfo = 0;
+		memcpy(&sin6->sin6_addr, &ipaddr->a6, sizeof(struct in6_addr));
+		sin6->sin6_scope_id = 0;
+	}
+#endif
+	else
+		BUG();
+
+	/* NAT_T_DPORT (new port) */
+	n_port = (struct sadb_x_nat_t_port*) skb_put(skb, sizeof (*n_port));
+	n_port->sadb_x_nat_t_port_len = sizeof(*n_port)/sizeof(uint64_t);
+	n_port->sadb_x_nat_t_port_exttype = SADB_X_EXT_NAT_T_DPORT;
+	n_port->sadb_x_nat_t_port_port = sport;
+	n_port->sadb_x_nat_t_port_reserved = 0;
+
+	return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL);
+}
+
+static int pfkey_sendmsg(struct kiocb *kiocb,
+			 struct socket *sock, struct msghdr *msg, size_t len)
+{
+	struct sock *sk = sock->sk;
+	struct sk_buff *skb = NULL;
+	struct sadb_msg *hdr = NULL;
+	int err;
+
+	err = -EOPNOTSUPP;
+	if (msg->msg_flags & MSG_OOB)
+		goto out;
+
+	err = -EMSGSIZE;
+	if ((unsigned)len > sk->sk_sndbuf - 32)
+		goto out;
+
+	err = -ENOBUFS;
+	skb = alloc_skb(len, GFP_KERNEL);
+	if (skb == NULL)
+		goto out;
+
+	err = -EFAULT;
+	if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len))
+		goto out;
+
+	hdr = pfkey_get_base_msg(skb, &err);
+	if (!hdr)
+		goto out;
+
+	down(&xfrm_cfg_sem);
+	err = pfkey_process(sk, skb, hdr);
+	up(&xfrm_cfg_sem);
+
+out:
+	if (err && hdr && pfkey_error(hdr, err, sk) == 0)
+		err = 0;
+	if (skb)
+		kfree_skb(skb);
+
+	return err ? : len;
+}
+
+static int pfkey_recvmsg(struct kiocb *kiocb,
+			 struct socket *sock, struct msghdr *msg, size_t len,
+			 int flags)
+{
+	struct sock *sk = sock->sk;
+	struct sk_buff *skb;
+	int copied, err;
+
+	err = -EINVAL;
+	if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
+		goto out;
+
+	msg->msg_namelen = 0;
+	skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
+	if (skb == NULL)
+		goto out;
+
+	copied = skb->len;
+	if (copied > len) {
+		msg->msg_flags |= MSG_TRUNC;
+		copied = len;
+	}
+
+	skb->h.raw = skb->data;
+	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	if (err)
+		goto out_free;
+
+	sock_recv_timestamp(msg, sk, skb);
+
+	err = (flags & MSG_TRUNC) ? skb->len : copied;
+
+out_free:
+	skb_free_datagram(sk, skb);
+out:
+	return err;
+}
+
+static struct proto_ops pfkey_ops = {
+	.family		=	PF_KEY,
+	.owner		=	THIS_MODULE,
+	/* Operations that make no sense on pfkey sockets. */
+	.bind		=	sock_no_bind,
+	.connect	=	sock_no_connect,
+	.socketpair	=	sock_no_socketpair,
+	.accept		=	sock_no_accept,
+	.getname	=	sock_no_getname,
+	.ioctl		=	sock_no_ioctl,
+	.listen		=	sock_no_listen,
+	.shutdown	=	sock_no_shutdown,
+	.setsockopt	=	sock_no_setsockopt,
+	.getsockopt	=	sock_no_getsockopt,
+	.mmap		=	sock_no_mmap,
+	.sendpage	=	sock_no_sendpage,
+
+	/* Now the operations that really occur. */
+	.release	=	pfkey_release,
+	.poll		=	datagram_poll,
+	.sendmsg	=	pfkey_sendmsg,
+	.recvmsg	=	pfkey_recvmsg,
+};
+
+static struct net_proto_family pfkey_family_ops = {
+	.family	=	PF_KEY,
+	.create	=	pfkey_create,
+	.owner	=	THIS_MODULE,
+};
+
+#ifdef CONFIG_PROC_FS
+static int pfkey_read_proc(char *buffer, char **start, off_t offset,
+			   int length, int *eof, void *data)
+{
+	off_t pos = 0;
+	off_t begin = 0;
+	int len = 0;
+	struct sock *s;
+	struct hlist_node *node;
+
+	len += sprintf(buffer,"sk       RefCnt Rmem   Wmem   User   Inode\n");
+
+	read_lock(&pfkey_table_lock);
+
+	sk_for_each(s, node, &pfkey_table) {
+		len += sprintf(buffer+len,"%p %-6d %-6u %-6u %-6u %-6lu",
+			       s,
+			       atomic_read(&s->sk_refcnt),
+			       atomic_read(&s->sk_rmem_alloc),
+			       atomic_read(&s->sk_wmem_alloc),
+			       sock_i_uid(s),
+			       sock_i_ino(s)
+			       );
+
+		buffer[len++] = '\n';
+		
+		pos = begin + len;
+		if (pos < offset) {
+			len = 0;
+			begin = pos;
+		}
+		if(pos > offset + length)
+			goto done;
+	}
+	*eof = 1;
+
+done:
+	read_unlock(&pfkey_table_lock);
+
+	*start = buffer + (offset - begin);
+	len -= (offset - begin);
+
+	if (len > length)
+		len = length;
+	if (len < 0)
+		len = 0;
+
+	return len;
+}
+#endif
+
+static struct xfrm_mgr pfkeyv2_mgr =
+{
+	.id		= "pfkeyv2",
+	.notify		= pfkey_send_notify,
+	.acquire	= pfkey_send_acquire,
+	.compile_policy	= pfkey_compile_policy,
+	.new_mapping	= pfkey_send_new_mapping,
+};
+
+static void __exit ipsec_pfkey_exit(void)
+{
+	xfrm_unregister_km(&pfkeyv2_mgr);
+	remove_proc_entry("net/pfkey", NULL);
+	sock_unregister(PF_KEY);
+	proto_unregister(&key_proto);
+}
+
+static int __init ipsec_pfkey_init(void)
+{
+	int err = proto_register(&key_proto, 0);
+
+	if (err != 0)
+		goto out;
+
+	err = sock_register(&pfkey_family_ops);
+	if (err != 0)
+		goto out_unregister_key_proto;
+#ifdef CONFIG_PROC_FS
+	err = -ENOMEM;
+	if (create_proc_read_entry("net/pfkey", 0, NULL, pfkey_read_proc, NULL) == NULL)
+		goto out_sock_unregister;
+#endif
+	err = xfrm_register_km(&pfkeyv2_mgr);
+	if (err != 0)
+		goto out_remove_proc_entry;
+out:
+	return err;
+out_remove_proc_entry:
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("net/pfkey", NULL);
+out_sock_unregister:
+#endif
+	sock_unregister(PF_KEY);
+out_unregister_key_proto:
+	proto_unregister(&key_proto);
+	goto out;
+}
+
+module_init(ipsec_pfkey_init);
+module_exit(ipsec_pfkey_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETPROTO(PF_KEY);
diff --git a/net/lapb/Makefile b/net/lapb/Makefile
new file mode 100644
index 00000000000..53f7c90db16
--- /dev/null
+++ b/net/lapb/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the Linux LAPB layer.
+#
+
+obj-$(CONFIG_LAPB) += lapb.o
+
+lapb-objs := lapb_in.o lapb_out.o lapb_subr.o lapb_timer.o lapb_iface.o
diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c
new file mode 100644
index 00000000000..aea6616cea3
--- /dev/null
+++ b/net/lapb/lapb_iface.c
@@ -0,0 +1,449 @@
+/*
+ *	LAPB release 002
+ *
+ *	This code REQUIRES 2.1.15 or higher/ NET3.038
+ *
+ *	This module:
+ *		This module is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *	History
+ *	LAPB 001	Jonathan Naylor	Started Coding
+ *	LAPB 002	Jonathan Naylor	New timer architecture.
+ *	2000-10-29	Henner Eisen	lapb_data_indication() return status.
+ */
+ 
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/inet.h>
+#include <linux/if_arp.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/stat.h>
+#include <linux/init.h>
+#include <net/lapb.h>
+
+static struct list_head lapb_list = LIST_HEAD_INIT(lapb_list);
+static DEFINE_RWLOCK(lapb_list_lock);
+
+/*
+ *	Free an allocated lapb control block. 
+ */
+static void lapb_free_cb(struct lapb_cb *lapb)
+{
+	kfree(lapb);
+}
+
+static __inline__ void lapb_hold(struct lapb_cb *lapb)
+{
+	atomic_inc(&lapb->refcnt);
+}
+
+static __inline__ void lapb_put(struct lapb_cb *lapb)
+{
+	if (atomic_dec_and_test(&lapb->refcnt))
+		lapb_free_cb(lapb);
+}
+
+/*
+ *	Socket removal during an interrupt is now safe.
+ */
+static void __lapb_remove_cb(struct lapb_cb *lapb)
+{
+	if (lapb->node.next) {
+		list_del(&lapb->node);
+		lapb_put(lapb);
+	}
+}
+
+/*
+ *	Add a socket to the bound sockets list.
+ */
+static void __lapb_insert_cb(struct lapb_cb *lapb)
+{
+	list_add(&lapb->node, &lapb_list);
+	lapb_hold(lapb);
+}
+
+static struct lapb_cb *__lapb_devtostruct(struct net_device *dev)
+{
+	struct list_head *entry;
+	struct lapb_cb *lapb, *use = NULL;
+
+	list_for_each(entry, &lapb_list) {
+		lapb = list_entry(entry, struct lapb_cb, node);
+		if (lapb->dev == dev) {
+			use = lapb;
+			break;
+		}
+	}
+
+	if (use)
+		lapb_hold(use);
+
+	return use;
+}
+
+static struct lapb_cb *lapb_devtostruct(struct net_device *dev)
+{
+	struct lapb_cb *rc;
+
+	read_lock_bh(&lapb_list_lock);
+	rc = __lapb_devtostruct(dev);
+	read_unlock_bh(&lapb_list_lock);
+
+	return rc;
+}
+/*
+ *	Create an empty LAPB control block.
+ */
+static struct lapb_cb *lapb_create_cb(void)
+{
+	struct lapb_cb *lapb = kmalloc(sizeof(*lapb), GFP_ATOMIC);
+
+
+	if (!lapb)
+		goto out;
+
+	memset(lapb, 0x00, sizeof(*lapb));
+
+	skb_queue_head_init(&lapb->write_queue);
+	skb_queue_head_init(&lapb->ack_queue);
+
+	init_timer(&lapb->t1timer);
+	init_timer(&lapb->t2timer);
+
+	lapb->t1      = LAPB_DEFAULT_T1;
+	lapb->t2      = LAPB_DEFAULT_T2;
+	lapb->n2      = LAPB_DEFAULT_N2;
+	lapb->mode    = LAPB_DEFAULT_MODE;
+	lapb->window  = LAPB_DEFAULT_WINDOW;
+	lapb->state   = LAPB_STATE_0;
+	atomic_set(&lapb->refcnt, 1);
+out:
+	return lapb;
+}
+
+int lapb_register(struct net_device *dev, struct lapb_register_struct *callbacks)
+{
+	struct lapb_cb *lapb;
+	int rc = LAPB_BADTOKEN;
+
+	write_lock_bh(&lapb_list_lock);
+
+	lapb = __lapb_devtostruct(dev);
+	if (lapb) {
+		lapb_put(lapb);
+		goto out;
+	}
+
+	lapb = lapb_create_cb();
+	rc = LAPB_NOMEM;
+	if (!lapb)
+		goto out;
+
+	lapb->dev       = dev;
+	lapb->callbacks = *callbacks;
+
+	__lapb_insert_cb(lapb);
+
+	lapb_start_t1timer(lapb);
+
+	rc = LAPB_OK;
+out:
+	write_unlock_bh(&lapb_list_lock);
+	return rc;
+}
+
+int lapb_unregister(struct net_device *dev)
+{
+	struct lapb_cb *lapb;
+	int rc = LAPB_BADTOKEN;
+
+	write_lock_bh(&lapb_list_lock);
+	lapb = __lapb_devtostruct(dev);
+	if (!lapb)
+		goto out;
+
+	lapb_stop_t1timer(lapb);
+	lapb_stop_t2timer(lapb);
+
+	lapb_clear_queues(lapb);
+
+	__lapb_remove_cb(lapb);
+
+	lapb_put(lapb);
+	rc = LAPB_OK;
+out:
+	write_unlock_bh(&lapb_list_lock);
+	return rc;
+}
+
+int lapb_getparms(struct net_device *dev, struct lapb_parms_struct *parms)
+{
+	int rc = LAPB_BADTOKEN;
+	struct lapb_cb *lapb = lapb_devtostruct(dev);
+
+	if (!lapb)
+		goto out;
+
+	parms->t1      = lapb->t1 / HZ;
+	parms->t2      = lapb->t2 / HZ;
+	parms->n2      = lapb->n2;
+	parms->n2count = lapb->n2count;
+	parms->state   = lapb->state;
+	parms->window  = lapb->window;
+	parms->mode    = lapb->mode;
+
+	if (!timer_pending(&lapb->t1timer))
+		parms->t1timer = 0;
+	else
+		parms->t1timer = (lapb->t1timer.expires - jiffies) / HZ;
+
+	if (!timer_pending(&lapb->t2timer))
+		parms->t2timer = 0;
+	else
+		parms->t2timer = (lapb->t2timer.expires - jiffies) / HZ;
+
+	lapb_put(lapb);
+	rc = LAPB_OK;
+out:
+	return rc;
+}
+
+int lapb_setparms(struct net_device *dev, struct lapb_parms_struct *parms)
+{
+	int rc = LAPB_BADTOKEN;
+	struct lapb_cb *lapb = lapb_devtostruct(dev);
+
+	if (!lapb)
+		goto out;
+
+	rc = LAPB_INVALUE;
+	if (parms->t1 < 1 || parms->t2 < 1 || parms->n2 < 1)
+		goto out_put;
+
+	if (lapb->state == LAPB_STATE_0) {
+		if (((parms->mode & LAPB_EXTENDED) &&
+		     (parms->window < 1 || parms->window > 127)) ||
+		    (parms->window < 1 || parms->window > 7))
+			goto out_put;
+
+		lapb->mode    = parms->mode;
+		lapb->window  = parms->window;
+	}
+
+	lapb->t1    = parms->t1 * HZ;
+	lapb->t2    = parms->t2 * HZ;
+	lapb->n2    = parms->n2;
+
+	rc = LAPB_OK;
+out_put:
+	lapb_put(lapb);
+out:
+	return rc;
+}
+
+int lapb_connect_request(struct net_device *dev)
+{
+	struct lapb_cb *lapb = lapb_devtostruct(dev);
+	int rc = LAPB_BADTOKEN;
+
+	if (!lapb)
+		goto out;
+
+	rc = LAPB_OK;
+	if (lapb->state == LAPB_STATE_1)
+		goto out_put;
+
+	rc = LAPB_CONNECTED;
+	if (lapb->state == LAPB_STATE_3 || lapb->state == LAPB_STATE_4)
+		goto out_put;
+
+	lapb_establish_data_link(lapb);
+
+#if LAPB_DEBUG > 0
+	printk(KERN_DEBUG "lapb: (%p) S0 -> S1\n", lapb->dev);
+#endif
+	lapb->state = LAPB_STATE_1;
+
+	rc = LAPB_OK;
+out_put:
+	lapb_put(lapb);
+out:
+	return rc;
+}
+
+int lapb_disconnect_request(struct net_device *dev)
+{
+	struct lapb_cb *lapb = lapb_devtostruct(dev);
+	int rc = LAPB_BADTOKEN;
+
+	if (!lapb)
+		goto out;
+
+	switch (lapb->state) {
+		case LAPB_STATE_0:
+			rc = LAPB_NOTCONNECTED;
+			goto out_put;
+
+		case LAPB_STATE_1:
+#if LAPB_DEBUG > 1
+			printk(KERN_DEBUG "lapb: (%p) S1 TX DISC(1)\n", lapb->dev);
+#endif
+#if LAPB_DEBUG > 0
+			printk(KERN_DEBUG "lapb: (%p) S1 -> S0\n", lapb->dev);
+#endif
+			lapb_send_control(lapb, LAPB_DISC, LAPB_POLLON, LAPB_COMMAND);
+			lapb->state = LAPB_STATE_0;
+			lapb_start_t1timer(lapb);
+			rc = LAPB_NOTCONNECTED;
+			goto out_put;
+
+		case LAPB_STATE_2:
+			rc = LAPB_OK;
+			goto out_put;
+	}
+
+	lapb_clear_queues(lapb);
+	lapb->n2count = 0;
+	lapb_send_control(lapb, LAPB_DISC, LAPB_POLLON, LAPB_COMMAND);
+	lapb_start_t1timer(lapb);
+	lapb_stop_t2timer(lapb);
+	lapb->state = LAPB_STATE_2;
+
+#if LAPB_DEBUG > 1
+	printk(KERN_DEBUG "lapb: (%p) S3 DISC(1)\n", lapb->dev);
+#endif
+#if LAPB_DEBUG > 0
+	printk(KERN_DEBUG "lapb: (%p) S3 -> S2\n", lapb->dev);
+#endif
+
+	rc = LAPB_OK;
+out_put:
+	lapb_put(lapb);
+out:
+	return rc;
+}
+
+int lapb_data_request(struct net_device *dev, struct sk_buff *skb)
+{
+	struct lapb_cb *lapb = lapb_devtostruct(dev);
+	int rc = LAPB_BADTOKEN;
+
+	if (!lapb)
+		goto out;
+
+	rc = LAPB_NOTCONNECTED;
+	if (lapb->state != LAPB_STATE_3 && lapb->state != LAPB_STATE_4)
+		goto out_put;
+
+	skb_queue_tail(&lapb->write_queue, skb);
+	lapb_kick(lapb);
+	rc = LAPB_OK;
+out_put:
+	lapb_put(lapb);
+out:
+	return rc;
+}
+
+int lapb_data_received(struct net_device *dev, struct sk_buff *skb)
+{
+	struct lapb_cb *lapb = lapb_devtostruct(dev);
+	int rc = LAPB_BADTOKEN;
+
+	if (lapb) {
+		lapb_data_input(lapb, skb);
+		lapb_put(lapb);
+		rc = LAPB_OK;
+	}
+
+	return rc;
+}
+
+void lapb_connect_confirmation(struct lapb_cb *lapb, int reason)
+{
+	if (lapb->callbacks.connect_confirmation)
+		lapb->callbacks.connect_confirmation(lapb->dev, reason);
+}
+
+void lapb_connect_indication(struct lapb_cb *lapb, int reason)
+{
+	if (lapb->callbacks.connect_indication)
+		lapb->callbacks.connect_indication(lapb->dev, reason);
+}
+
+void lapb_disconnect_confirmation(struct lapb_cb *lapb, int reason)
+{
+	if (lapb->callbacks.disconnect_confirmation)
+		lapb->callbacks.disconnect_confirmation(lapb->dev, reason);
+}
+
+void lapb_disconnect_indication(struct lapb_cb *lapb, int reason)
+{
+	if (lapb->callbacks.disconnect_indication)
+		lapb->callbacks.disconnect_indication(lapb->dev, reason);
+}
+
+int lapb_data_indication(struct lapb_cb *lapb, struct sk_buff *skb)
+{
+	if (lapb->callbacks.data_indication)
+		return lapb->callbacks.data_indication(lapb->dev, skb);
+
+	kfree_skb(skb);
+	return NET_RX_CN_HIGH; /* For now; must be != NET_RX_DROP */ 
+}
+
+int lapb_data_transmit(struct lapb_cb *lapb, struct sk_buff *skb)
+{
+	int used = 0;
+
+	if (lapb->callbacks.data_transmit) {
+		lapb->callbacks.data_transmit(lapb->dev, skb);
+		used = 1;
+	}
+
+	return used;
+}
+
+EXPORT_SYMBOL(lapb_register);
+EXPORT_SYMBOL(lapb_unregister);
+EXPORT_SYMBOL(lapb_getparms);
+EXPORT_SYMBOL(lapb_setparms);
+EXPORT_SYMBOL(lapb_connect_request);
+EXPORT_SYMBOL(lapb_disconnect_request);
+EXPORT_SYMBOL(lapb_data_request);
+EXPORT_SYMBOL(lapb_data_received);
+
+static int __init lapb_init(void)
+{
+	return 0;
+}
+
+static void __exit lapb_exit(void)
+{
+	WARN_ON(!list_empty(&lapb_list));
+}
+
+MODULE_AUTHOR("Jonathan Naylor <g4klx@g4klx.demon.co.uk>");
+MODULE_DESCRIPTION("The X.25 Link Access Procedure B link layer protocol");
+MODULE_LICENSE("GPL");
+
+module_init(lapb_init);
+module_exit(lapb_exit);
diff --git a/net/lapb/lapb_in.c b/net/lapb/lapb_in.c
new file mode 100644
index 00000000000..b0f8713f66c
--- /dev/null
+++ b/net/lapb/lapb_in.c
@@ -0,0 +1,724 @@
+/*
+ *	LAPB release 002
+ *
+ *	This code REQUIRES 2.1.15 or higher/ NET3.038
+ *
+ *	This module:
+ *		This module is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *	History
+ *	LAPB 001	Jonathan Naulor	Started Coding
+ *	LAPB 002	Jonathan Naylor	New timer architecture.
+ *	2000-10-29	Henner Eisen	lapb_data_indication() return status.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <net/lapb.h>
+
+/*
+ *	State machine for state 0, Disconnected State.
+ *	The handling of the timer(s) is in file lapb_timer.c.
+ */
+static void lapb_state0_machine(struct lapb_cb *lapb, struct sk_buff *skb,
+				struct lapb_frame *frame)
+{
+	switch (frame->type) {
+		case LAPB_SABM:
+#if LAPB_DEBUG > 1
+			printk(KERN_DEBUG "lapb: (%p) S0 RX SABM(%d)\n",
+			       lapb->dev, frame->pf);
+#endif
+			if (lapb->mode & LAPB_EXTENDED) {
+#if LAPB_DEBUG > 1
+				printk(KERN_DEBUG "lapb: (%p) S0 TX DM(%d)\n",
+				       lapb->dev, frame->pf);
+#endif
+				lapb_send_control(lapb, LAPB_DM, frame->pf,
+						  LAPB_RESPONSE);
+			} else {
+#if LAPB_DEBUG > 1
+				printk(KERN_DEBUG "lapb: (%p) S0 TX UA(%d)\n",
+				       lapb->dev, frame->pf);
+#endif
+#if LAPB_DEBUG > 0
+				printk(KERN_DEBUG "lapb: (%p) S0 -> S3\n",
+				       lapb->dev);
+#endif
+				lapb_send_control(lapb, LAPB_UA, frame->pf,
+						  LAPB_RESPONSE);
+				lapb_stop_t1timer(lapb);
+				lapb_stop_t2timer(lapb);
+				lapb->state     = LAPB_STATE_3;
+				lapb->condition = 0x00;
+				lapb->n2count   = 0;
+				lapb->vs        = 0;
+				lapb->vr        = 0;
+				lapb->va        = 0;
+				lapb_connect_indication(lapb, LAPB_OK);
+			}
+			break;
+
+		case LAPB_SABME:
+#if LAPB_DEBUG > 1
+			printk(KERN_DEBUG "lapb: (%p) S0 RX SABME(%d)\n",
+			       lapb->dev, frame->pf);
+#endif
+			if (lapb->mode & LAPB_EXTENDED) {
+#if LAPB_DEBUG > 1
+				printk(KERN_DEBUG "lapb: (%p) S0 TX UA(%d)\n",
+				       lapb->dev, frame->pf);
+#endif
+#if LAPB_DEBUG > 0
+				printk(KERN_DEBUG "lapb: (%p) S0 -> S3\n",
+				       lapb->dev);
+#endif
+				lapb_send_control(lapb, LAPB_UA, frame->pf,
+						  LAPB_RESPONSE);
+				lapb_stop_t1timer(lapb);
+				lapb_stop_t2timer(lapb);
+				lapb->state     = LAPB_STATE_3;
+				lapb->condition = 0x00;
+				lapb->n2count   = 0;
+				lapb->vs        = 0;
+				lapb->vr        = 0;
+				lapb->va        = 0;
+				lapb_connect_indication(lapb, LAPB_OK);
+			} else {
+#if LAPB_DEBUG > 1
+				printk(KERN_DEBUG "lapb: (%p) S0 TX DM(%d)\n",
+				       lapb->dev, frame->pf);
+#endif
+				lapb_send_control(lapb, LAPB_DM, frame->pf,
+						  LAPB_RESPONSE);
+			}
+			break;
+
+		case LAPB_DISC:
+#if LAPB_DEBUG > 1
+			printk(KERN_DEBUG "lapb: (%p) S0 RX DISC(%d)\n",
+			       lapb->dev, frame->pf);
+			printk(KERN_DEBUG "lapb: (%p) S0 TX UA(%d)\n",
+			       lapb->dev, frame->pf);
+#endif
+			lapb_send_control(lapb, LAPB_UA, frame->pf,
+					  LAPB_RESPONSE);
+			break;
+
+		default:
+			break;
+	}
+
+	kfree_skb(skb);
+}
+
+/*
+ *	State machine for state 1, Awaiting Connection State.
+ *	The handling of the timer(s) is in file lapb_timer.c.
+ */
+static void lapb_state1_machine(struct lapb_cb *lapb, struct sk_buff *skb,
+				struct lapb_frame *frame)
+{
+	switch (frame->type) {
+		case LAPB_SABM:
+#if LAPB_DEBUG > 1
+			printk(KERN_DEBUG "lapb: (%p) S1 RX SABM(%d)\n",
+			       lapb->dev, frame->pf);
+#endif
+			if (lapb->mode & LAPB_EXTENDED) {
+#if LAPB_DEBUG > 1
+				printk(KERN_DEBUG "lapb: (%p) S1 TX DM(%d)\n",
+				       lapb->dev, frame->pf);
+#endif
+				lapb_send_control(lapb, LAPB_DM, frame->pf,
+						  LAPB_RESPONSE);
+			} else {
+#if LAPB_DEBUG > 1
+				printk(KERN_DEBUG "lapb: (%p) S1 TX UA(%d)\n",
+				       lapb->dev, frame->pf);
+#endif
+				lapb_send_control(lapb, LAPB_UA, frame->pf,
+						  LAPB_RESPONSE);
+			}
+			break;
+
+		case LAPB_SABME:
+#if LAPB_DEBUG > 1
+			printk(KERN_DEBUG "lapb: (%p) S1 RX SABME(%d)\n",
+			       lapb->dev, frame->pf);
+#endif
+			if (lapb->mode & LAPB_EXTENDED) {
+#if LAPB_DEBUG > 1
+				printk(KERN_DEBUG "lapb: (%p) S1 TX UA(%d)\n",
+				       lapb->dev, frame->pf);
+#endif
+				lapb_send_control(lapb, LAPB_UA, frame->pf,
+						  LAPB_RESPONSE);
+			} else {
+#if LAPB_DEBUG > 1
+				printk(KERN_DEBUG "lapb: (%p) S1 TX DM(%d)\n",
+				       lapb->dev, frame->pf);
+#endif
+				lapb_send_control(lapb, LAPB_DM, frame->pf,
+						  LAPB_RESPONSE);
+			}
+			break;
+
+		case LAPB_DISC:
+#if LAPB_DEBUG > 1
+			printk(KERN_DEBUG "lapb: (%p) S1 RX DISC(%d)\n",
+			       lapb->dev, frame->pf);
+			printk(KERN_DEBUG "lapb: (%p) S1 TX DM(%d)\n",
+			       lapb->dev, frame->pf);
+#endif
+			lapb_send_control(lapb, LAPB_DM, frame->pf,
+					  LAPB_RESPONSE);
+			break;
+
+		case LAPB_UA:
+#if LAPB_DEBUG > 1
+			printk(KERN_DEBUG "lapb: (%p) S1 RX UA(%d)\n",
+			       lapb->dev, frame->pf);
+#endif
+			if (frame->pf) {
+#if LAPB_DEBUG > 0
+				printk(KERN_DEBUG "lapb: (%p) S1 -> S3\n",
+				       lapb->dev);
+#endif
+				lapb_stop_t1timer(lapb);
+				lapb_stop_t2timer(lapb);
+				lapb->state     = LAPB_STATE_3;
+				lapb->condition = 0x00;
+				lapb->n2count   = 0;
+				lapb->vs        = 0;
+				lapb->vr        = 0;
+				lapb->va        = 0;
+				lapb_connect_confirmation(lapb, LAPB_OK);
+			}
+			break;
+
+		case LAPB_DM:
+#if LAPB_DEBUG > 1
+			printk(KERN_DEBUG "lapb: (%p) S1 RX DM(%d)\n",
+			       lapb->dev, frame->pf);
+#endif
+			if (frame->pf) {
+#if LAPB_DEBUG > 0
+				printk(KERN_DEBUG "lapb: (%p) S1 -> S0\n",
+				       lapb->dev);
+#endif
+				lapb_clear_queues(lapb);
+				lapb->state = LAPB_STATE_0;
+				lapb_start_t1timer(lapb);
+				lapb_stop_t2timer(lapb);
+				lapb_disconnect_indication(lapb, LAPB_REFUSED);
+			}
+			break;
+	}
+
+	kfree_skb(skb);
+}
+
+/*
+ *	State machine for state 2, Awaiting Release State.
+ *	The handling of the timer(s) is in file lapb_timer.c
+ */
+static void lapb_state2_machine(struct lapb_cb *lapb, struct sk_buff *skb,
+				struct lapb_frame *frame)
+{
+	switch (frame->type) {
+		case LAPB_SABM:
+		case LAPB_SABME:
+#if LAPB_DEBUG > 1
+			printk(KERN_DEBUG "lapb: (%p) S2 RX {SABM,SABME}(%d)\n",
+			       lapb->dev, frame->pf);
+			printk(KERN_DEBUG "lapb: (%p) S2 TX DM(%d)\n",
+			       lapb->dev, frame->pf);
+#endif
+			lapb_send_control(lapb, LAPB_DM, frame->pf,
+					  LAPB_RESPONSE);
+			break;
+
+		case LAPB_DISC:
+#if LAPB_DEBUG > 1
+			printk(KERN_DEBUG "lapb: (%p) S2 RX DISC(%d)\n",
+			       lapb->dev, frame->pf);
+			printk(KERN_DEBUG "lapb: (%p) S2 TX UA(%d)\n",
+			       lapb->dev, frame->pf);
+#endif
+			lapb_send_control(lapb, LAPB_UA, frame->pf,
+					  LAPB_RESPONSE);
+			break;
+
+		case LAPB_UA:
+#if LAPB_DEBUG > 1
+			printk(KERN_DEBUG "lapb: (%p) S2 RX UA(%d)\n",
+			       lapb->dev, frame->pf);
+#endif
+			if (frame->pf) {
+#if LAPB_DEBUG > 0
+				printk(KERN_DEBUG "lapb: (%p) S2 -> S0\n",
+				       lapb->dev);
+#endif
+				lapb->state = LAPB_STATE_0;
+				lapb_start_t1timer(lapb);
+				lapb_stop_t2timer(lapb);
+				lapb_disconnect_confirmation(lapb, LAPB_OK);
+			}
+			break;
+
+		case LAPB_DM:
+#if LAPB_DEBUG > 1
+			printk(KERN_DEBUG "lapb: (%p) S2 RX DM(%d)\n",
+			       lapb->dev, frame->pf);
+#endif
+			if (frame->pf) {
+#if LAPB_DEBUG > 0
+				printk(KERN_DEBUG "lapb: (%p) S2 -> S0\n",
+				       lapb->dev);
+#endif
+				lapb->state = LAPB_STATE_0;
+				lapb_start_t1timer(lapb);
+				lapb_stop_t2timer(lapb);
+				lapb_disconnect_confirmation(lapb,
+							     LAPB_NOTCONNECTED);
+			}
+			break;
+
+		case LAPB_I:
+		case LAPB_REJ:
+		case LAPB_RNR:
+		case LAPB_RR:
+#if LAPB_DEBUG > 1
+			printk(KERN_DEBUG "lapb: (%p) S2 RX {I,REJ,RNR,RR}"
+			       "(%d)\n", lapb->dev, frame->pf);
+			printk(KERN_DEBUG "lapb: (%p) S2 RX DM(%d)\n",
+			       lapb->dev, frame->pf);
+#endif
+			if (frame->pf)
+				lapb_send_control(lapb, LAPB_DM, frame->pf,
+						  LAPB_RESPONSE);
+			break;
+	}
+
+	kfree_skb(skb);
+}
+
+/*
+ *	State machine for state 3, Connected State.
+ *	The handling of the timer(s) is in file lapb_timer.c
+ */
+static void lapb_state3_machine(struct lapb_cb *lapb, struct sk_buff *skb,
+				struct lapb_frame *frame)
+{
+	int queued = 0;
+	int modulus = (lapb->mode & LAPB_EXTENDED) ? LAPB_EMODULUS :
+						     LAPB_SMODULUS;
+
+	switch (frame->type) {
+		case LAPB_SABM:
+#if LAPB_DEBUG > 1
+			printk(KERN_DEBUG "lapb: (%p) S3 RX SABM(%d)\n",
+			       lapb->dev, frame->pf);
+#endif
+			if (lapb->mode & LAPB_EXTENDED) {
+#if LAPB_DEBUG > 1
+				printk(KERN_DEBUG "lapb: (%p) S3 TX DM(%d)\n",
+				       lapb->dev, frame->pf);
+#endif
+				lapb_send_control(lapb, LAPB_DM, frame->pf,
+						  LAPB_RESPONSE);
+			} else {
+#if LAPB_DEBUG > 1
+				printk(KERN_DEBUG "lapb: (%p) S3 TX UA(%d)\n",
+				       lapb->dev, frame->pf);
+#endif
+				lapb_send_control(lapb, LAPB_UA, frame->pf,
+						  LAPB_RESPONSE);
+				lapb_stop_t1timer(lapb);
+				lapb_stop_t2timer(lapb);
+				lapb->condition = 0x00;
+				lapb->n2count   = 0;
+				lapb->vs        = 0;
+				lapb->vr        = 0;
+				lapb->va        = 0;
+				lapb_requeue_frames(lapb);
+			}
+			break;
+
+		case LAPB_SABME:
+#if LAPB_DEBUG > 1
+			printk(KERN_DEBUG "lapb: (%p) S3 RX SABME(%d)\n",
+			       lapb->dev, frame->pf);
+#endif
+			if (lapb->mode & LAPB_EXTENDED) {
+#if LAPB_DEBUG > 1
+				printk(KERN_DEBUG "lapb: (%p) S3 TX UA(%d)\n",
+				       lapb->dev, frame->pf);
+#endif
+				lapb_send_control(lapb, LAPB_UA, frame->pf,
+						  LAPB_RESPONSE);
+				lapb_stop_t1timer(lapb);
+				lapb_stop_t2timer(lapb);
+				lapb->condition = 0x00;
+				lapb->n2count   = 0;
+				lapb->vs        = 0;
+				lapb->vr        = 0;
+				lapb->va        = 0;
+				lapb_requeue_frames(lapb);
+			} else {
+#if LAPB_DEBUG > 1
+				printk(KERN_DEBUG "lapb: (%p) S3 TX DM(%d)\n",
+				       lapb->dev, frame->pf);
+#endif
+				lapb_send_control(lapb, LAPB_DM, frame->pf,
+						  LAPB_RESPONSE);
+			}
+			break;
+
+		case LAPB_DISC:
+#if LAPB_DEBUG > 1
+			printk(KERN_DEBUG "lapb: (%p) S3 RX DISC(%d)\n",
+			       lapb->dev, frame->pf);
+#endif
+#if LAPB_DEBUG > 0
+			printk(KERN_DEBUG "lapb: (%p) S3 -> S0\n",
+			       lapb->dev);
+#endif
+			lapb_clear_queues(lapb);
+			lapb_send_control(lapb, LAPB_UA, frame->pf,
+					  LAPB_RESPONSE);
+			lapb_start_t1timer(lapb);
+			lapb_stop_t2timer(lapb);
+			lapb->state = LAPB_STATE_0;
+			lapb_disconnect_indication(lapb, LAPB_OK);
+			break;
+
+		case LAPB_DM:
+#if LAPB_DEBUG > 1
+			printk(KERN_DEBUG "lapb: (%p) S3 RX DM(%d)\n",
+			       lapb->dev, frame->pf);
+#endif
+#if LAPB_DEBUG > 0
+			printk(KERN_DEBUG "lapb: (%p) S3 -> S0\n",
+			       lapb->dev);
+#endif
+			lapb_clear_queues(lapb);
+			lapb->state = LAPB_STATE_0;
+			lapb_start_t1timer(lapb);
+			lapb_stop_t2timer(lapb);
+			lapb_disconnect_indication(lapb, LAPB_NOTCONNECTED);
+			break;
+
+		case LAPB_RNR:
+#if LAPB_DEBUG > 1
+			printk(KERN_DEBUG "lapb: (%p) S3 RX RNR(%d) R%d\n",
+			       lapb->dev, frame->pf, frame->nr);
+#endif
+			lapb->condition |= LAPB_PEER_RX_BUSY_CONDITION;
+			lapb_check_need_response(lapb, frame->cr, frame->pf);
+			if (lapb_validate_nr(lapb, frame->nr)) {
+				lapb_check_iframes_acked(lapb, frame->nr);
+			} else {
+				lapb->frmr_data = *frame;
+				lapb->frmr_type = LAPB_FRMR_Z;
+				lapb_transmit_frmr(lapb);
+#if LAPB_DEBUG > 0
+				printk(KERN_DEBUG "lapb: (%p) S3 -> S4\n",
+				       lapb->dev);
+#endif
+				lapb_start_t1timer(lapb);
+				lapb_stop_t2timer(lapb);
+				lapb->state   = LAPB_STATE_4;
+				lapb->n2count = 0;
+			}
+			break;
+
+		case LAPB_RR:
+#if LAPB_DEBUG > 1
+			printk(KERN_DEBUG "lapb: (%p) S3 RX RR(%d) R%d\n",
+			       lapb->dev, frame->pf, frame->nr);
+#endif
+			lapb->condition &= ~LAPB_PEER_RX_BUSY_CONDITION;
+			lapb_check_need_response(lapb, frame->cr, frame->pf);
+			if (lapb_validate_nr(lapb, frame->nr)) {
+				lapb_check_iframes_acked(lapb, frame->nr);
+			} else {
+				lapb->frmr_data = *frame;
+				lapb->frmr_type = LAPB_FRMR_Z;
+				lapb_transmit_frmr(lapb);
+#if LAPB_DEBUG > 0
+				printk(KERN_DEBUG "lapb: (%p) S3 -> S4\n",
+				       lapb->dev);
+#endif
+				lapb_start_t1timer(lapb);
+				lapb_stop_t2timer(lapb);
+				lapb->state   = LAPB_STATE_4;
+				lapb->n2count = 0;
+			}
+			break;
+
+		case LAPB_REJ:
+#if LAPB_DEBUG > 1
+			printk(KERN_DEBUG "lapb: (%p) S3 RX REJ(%d) R%d\n",
+			       lapb->dev, frame->pf, frame->nr);
+#endif
+			lapb->condition &= ~LAPB_PEER_RX_BUSY_CONDITION;
+			lapb_check_need_response(lapb, frame->cr, frame->pf);
+			if (lapb_validate_nr(lapb, frame->nr)) {
+				lapb_frames_acked(lapb, frame->nr);
+				lapb_stop_t1timer(lapb);
+				lapb->n2count = 0;
+				lapb_requeue_frames(lapb);
+			} else {
+				lapb->frmr_data = *frame;
+				lapb->frmr_type = LAPB_FRMR_Z;
+				lapb_transmit_frmr(lapb);
+#if LAPB_DEBUG > 0
+				printk(KERN_DEBUG "lapb: (%p) S3 -> S4\n",
+				       lapb->dev);
+#endif
+				lapb_start_t1timer(lapb);
+				lapb_stop_t2timer(lapb);
+				lapb->state   = LAPB_STATE_4;
+				lapb->n2count = 0;
+			}
+			break;
+
+		case LAPB_I:
+#if LAPB_DEBUG > 1
+			printk(KERN_DEBUG "lapb: (%p) S3 RX I(%d) S%d R%d\n",
+			       lapb->dev, frame->pf, frame->ns, frame->nr);
+#endif
+			if (!lapb_validate_nr(lapb, frame->nr)) {
+				lapb->frmr_data = *frame;
+				lapb->frmr_type = LAPB_FRMR_Z;
+				lapb_transmit_frmr(lapb);
+#if LAPB_DEBUG > 0
+				printk(KERN_DEBUG "lapb: (%p) S3 -> S4\n",
+				       lapb->dev);
+#endif
+				lapb_start_t1timer(lapb);
+				lapb_stop_t2timer(lapb);
+				lapb->state   = LAPB_STATE_4;
+				lapb->n2count = 0;
+				break;
+			}
+			if (lapb->condition & LAPB_PEER_RX_BUSY_CONDITION)
+				lapb_frames_acked(lapb, frame->nr);
+			else
+				lapb_check_iframes_acked(lapb, frame->nr);
+
+			if (frame->ns == lapb->vr) {
+				int cn;
+				cn = lapb_data_indication(lapb, skb);
+				queued = 1;
+				/*
+				 * If upper layer has dropped the frame, we
+				 * basically ignore any further protocol
+				 * processing. This will cause the peer
+				 * to re-transmit the frame later like
+				 * a frame lost on the wire.
+				 */
+				if (cn == NET_RX_DROP) {
+					printk(KERN_DEBUG
+					       "LAPB: rx congestion\n");
+					break;
+				}
+				lapb->vr = (lapb->vr + 1) % modulus;
+				lapb->condition &= ~LAPB_REJECT_CONDITION;
+				if (frame->pf)
+					lapb_enquiry_response(lapb);
+				else {
+					if (!(lapb->condition &
+					      LAPB_ACK_PENDING_CONDITION)) {
+						lapb->condition |= LAPB_ACK_PENDING_CONDITION;
+						lapb_start_t2timer(lapb);
+					}
+				}
+			} else {
+				if (lapb->condition & LAPB_REJECT_CONDITION) {
+					if (frame->pf)
+						lapb_enquiry_response(lapb);
+				} else {
+#if LAPB_DEBUG > 1
+					printk(KERN_DEBUG
+					       "lapb: (%p) S3 TX REJ(%d) R%d\n",
+					       lapb->dev, frame->pf, lapb->vr);
+#endif
+					lapb->condition |= LAPB_REJECT_CONDITION;
+					lapb_send_control(lapb, LAPB_REJ,
+							  frame->pf,
+							  LAPB_RESPONSE);
+					lapb->condition &= ~LAPB_ACK_PENDING_CONDITION;
+				}
+			}
+			break;
+
+		case LAPB_FRMR:
+#if LAPB_DEBUG > 1
+			printk(KERN_DEBUG "lapb: (%p) S3 RX FRMR(%d) %02X "
+			       "%02X %02X %02X %02X\n", lapb->dev, frame->pf,
+			       skb->data[0], skb->data[1], skb->data[2],
+			       skb->data[3], skb->data[4]);
+#endif
+			lapb_establish_data_link(lapb);
+#if LAPB_DEBUG > 0
+			printk(KERN_DEBUG "lapb: (%p) S3 -> S1\n",
+			       lapb->dev);
+#endif
+			lapb_requeue_frames(lapb);
+			lapb->state = LAPB_STATE_1;
+			break;
+
+		case LAPB_ILLEGAL:
+#if LAPB_DEBUG > 1
+			printk(KERN_DEBUG "lapb: (%p) S3 RX ILLEGAL(%d)\n",
+			       lapb->dev, frame->pf);
+#endif
+			lapb->frmr_data = *frame;
+			lapb->frmr_type = LAPB_FRMR_W;
+			lapb_transmit_frmr(lapb);
+#if LAPB_DEBUG > 0
+			printk(KERN_DEBUG "lapb: (%p) S3 -> S4\n", lapb->dev);
+#endif
+			lapb_start_t1timer(lapb);
+			lapb_stop_t2timer(lapb);
+			lapb->state   = LAPB_STATE_4;
+			lapb->n2count = 0;
+			break;
+	}
+
+	if (!queued)
+		kfree_skb(skb);
+}
+
+/*
+ *	State machine for state 4, Frame Reject State.
+ *	The handling of the timer(s) is in file lapb_timer.c.
+ */
+static void lapb_state4_machine(struct lapb_cb *lapb, struct sk_buff *skb,
+				struct lapb_frame *frame)
+{
+	switch (frame->type) {
+		case LAPB_SABM:
+#if LAPB_DEBUG > 1
+			printk(KERN_DEBUG "lapb: (%p) S4 RX SABM(%d)\n",
+			       lapb->dev, frame->pf);
+#endif
+			if (lapb->mode & LAPB_EXTENDED) {
+#if LAPB_DEBUG > 1
+				printk(KERN_DEBUG "lapb: (%p) S4 TX DM(%d)\n",
+				       lapb->dev, frame->pf);
+#endif
+				lapb_send_control(lapb, LAPB_DM, frame->pf,
+						  LAPB_RESPONSE);
+			} else {
+#if LAPB_DEBUG > 1
+				printk(KERN_DEBUG "lapb: (%p) S4 TX UA(%d)\n",
+				       lapb->dev, frame->pf);
+#endif
+#if LAPB_DEBUG > 0
+				printk(KERN_DEBUG "lapb: (%p) S4 -> S3\n",
+				       lapb->dev);
+#endif
+				lapb_send_control(lapb, LAPB_UA, frame->pf,
+						  LAPB_RESPONSE);
+				lapb_stop_t1timer(lapb);
+				lapb_stop_t2timer(lapb);
+				lapb->state     = LAPB_STATE_3;
+				lapb->condition = 0x00;
+				lapb->n2count   = 0;
+				lapb->vs        = 0;
+				lapb->vr        = 0;
+				lapb->va        = 0;
+				lapb_connect_indication(lapb, LAPB_OK);
+			}
+			break;
+
+		case LAPB_SABME:
+#if LAPB_DEBUG > 1
+			printk(KERN_DEBUG "lapb: (%p) S4 RX SABME(%d)\n",
+			       lapb->dev, frame->pf);
+#endif
+			if (lapb->mode & LAPB_EXTENDED) {
+#if LAPB_DEBUG > 1
+				printk(KERN_DEBUG "lapb: (%p) S4 TX UA(%d)\n",
+				       lapb->dev, frame->pf);
+#endif
+#if LAPB_DEBUG > 0
+				printk(KERN_DEBUG "lapb: (%p) S4 -> S3\n",
+				       lapb->dev);
+#endif
+				lapb_send_control(lapb, LAPB_UA, frame->pf,
+						  LAPB_RESPONSE);
+				lapb_stop_t1timer(lapb);
+				lapb_stop_t2timer(lapb);
+				lapb->state     = LAPB_STATE_3;
+				lapb->condition = 0x00;
+				lapb->n2count   = 0;
+				lapb->vs        = 0;
+				lapb->vr        = 0;
+				lapb->va        = 0;
+				lapb_connect_indication(lapb, LAPB_OK);
+			} else {
+#if LAPB_DEBUG > 1
+				printk(KERN_DEBUG "lapb: (%p) S4 TX DM(%d)\n",
+				       lapb->dev, frame->pf);
+#endif
+				lapb_send_control(lapb, LAPB_DM, frame->pf,
+						  LAPB_RESPONSE);
+			}
+			break;
+	}
+
+	kfree_skb(skb);
+}
+
+/*
+ *	Process an incoming LAPB frame
+ */
+void lapb_data_input(struct lapb_cb *lapb, struct sk_buff *skb)
+{
+	struct lapb_frame frame;
+
+	if (lapb_decode(lapb, skb, &frame) < 0) {
+		kfree_skb(skb);
+		return;
+	}
+
+	switch (lapb->state) {
+	case LAPB_STATE_0:
+		lapb_state0_machine(lapb, skb, &frame); break;
+	case LAPB_STATE_1:
+		lapb_state1_machine(lapb, skb, &frame); break;
+	case LAPB_STATE_2:
+		lapb_state2_machine(lapb, skb, &frame); break;
+	case LAPB_STATE_3:
+		lapb_state3_machine(lapb, skb, &frame); break;
+	case LAPB_STATE_4:
+		lapb_state4_machine(lapb, skb, &frame); break;
+	}
+
+	lapb_kick(lapb);
+}
diff --git a/net/lapb/lapb_out.c b/net/lapb/lapb_out.c
new file mode 100644
index 00000000000..49a761bd931
--- /dev/null
+++ b/net/lapb/lapb_out.c
@@ -0,0 +1,224 @@
+/*
+ *	LAPB release 002
+ *
+ *	This code REQUIRES 2.1.15 or higher/ NET3.038
+ *
+ *	This module:
+ *		This module is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *	History
+ *	LAPB 001	Jonathan Naylor	Started Coding
+ *	LAPB 002	Jonathan Naylor	New timer architecture.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/inet.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <net/lapb.h>
+
+/* 
+ *  This procedure is passed a buffer descriptor for an iframe. It builds
+ *  the rest of the control part of the frame and then writes it out.
+ */
+static void lapb_send_iframe(struct lapb_cb *lapb, struct sk_buff *skb, int poll_bit)
+{
+	unsigned char *frame;
+
+	if (!skb)
+		return;
+
+	if (lapb->mode & LAPB_EXTENDED) {
+		frame = skb_push(skb, 2);
+
+		frame[0] = LAPB_I;
+		frame[0] |= lapb->vs << 1;
+		frame[1] = poll_bit ? LAPB_EPF : 0;
+		frame[1] |= lapb->vr << 1;
+	} else {
+		frame = skb_push(skb, 1);
+
+		*frame = LAPB_I;
+		*frame |= poll_bit ? LAPB_SPF : 0;
+		*frame |= lapb->vr << 5;
+		*frame |= lapb->vs << 1;
+	}
+
+#if LAPB_DEBUG > 1
+	printk(KERN_DEBUG "lapb: (%p) S%d TX I(%d) S%d R%d\n",
+	       lapb->dev, lapb->state, poll_bit, lapb->vs, lapb->vr);
+#endif
+
+	lapb_transmit_buffer(lapb, skb, LAPB_COMMAND);	
+}
+
+void lapb_kick(struct lapb_cb *lapb)
+{
+	struct sk_buff *skb, *skbn;
+	unsigned short modulus, start, end;
+
+	modulus = (lapb->mode & LAPB_EXTENDED) ? LAPB_EMODULUS : LAPB_SMODULUS;
+	start = !skb_peek(&lapb->ack_queue) ? lapb->va : lapb->vs;
+	end   = (lapb->va + lapb->window) % modulus;
+
+	if (!(lapb->condition & LAPB_PEER_RX_BUSY_CONDITION) &&
+	    start != end && skb_peek(&lapb->write_queue)) {
+		lapb->vs = start;
+
+		/*
+		 * Dequeue the frame and copy it.
+		 */
+		skb = skb_dequeue(&lapb->write_queue);
+
+		do {
+			if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL) {
+				skb_queue_head(&lapb->write_queue, skb);
+				break;
+			}
+
+			if (skb->sk)
+				skb_set_owner_w(skbn, skb->sk);
+
+			/*
+			 * Transmit the frame copy.
+			 */
+			lapb_send_iframe(lapb, skbn, LAPB_POLLOFF);
+
+			lapb->vs = (lapb->vs + 1) % modulus;
+
+			/*
+			 * Requeue the original data frame.
+			 */
+			skb_queue_tail(&lapb->ack_queue, skb);
+
+		} while (lapb->vs != end && (skb = skb_dequeue(&lapb->write_queue)) != NULL);
+
+		lapb->condition &= ~LAPB_ACK_PENDING_CONDITION;
+
+		if (!lapb_t1timer_running(lapb))
+			lapb_start_t1timer(lapb);
+	}
+}
+
+void lapb_transmit_buffer(struct lapb_cb *lapb, struct sk_buff *skb, int type)
+{
+	unsigned char *ptr;
+
+	ptr = skb_push(skb, 1);
+
+	if (lapb->mode & LAPB_MLP) {
+		if (lapb->mode & LAPB_DCE) {
+			if (type == LAPB_COMMAND)
+				*ptr = LAPB_ADDR_C;
+			if (type == LAPB_RESPONSE)
+				*ptr = LAPB_ADDR_D;
+		} else {
+			if (type == LAPB_COMMAND)
+				*ptr = LAPB_ADDR_D;
+			if (type == LAPB_RESPONSE)
+				*ptr = LAPB_ADDR_C;
+		}
+	} else {
+		if (lapb->mode & LAPB_DCE) {
+			if (type == LAPB_COMMAND)
+				*ptr = LAPB_ADDR_A;
+			if (type == LAPB_RESPONSE)
+				*ptr = LAPB_ADDR_B;
+		} else {
+			if (type == LAPB_COMMAND)
+				*ptr = LAPB_ADDR_B;
+			if (type == LAPB_RESPONSE)
+				*ptr = LAPB_ADDR_A;
+		}
+	}
+
+#if LAPB_DEBUG > 2
+	printk(KERN_DEBUG "lapb: (%p) S%d TX %02X %02X %02X\n",
+	       lapb->dev, lapb->state,
+	       skb->data[0], skb->data[1], skb->data[2]);
+#endif
+
+	if (!lapb_data_transmit(lapb, skb))
+		kfree_skb(skb);
+}
+
+void lapb_establish_data_link(struct lapb_cb *lapb)
+{
+	lapb->condition = 0x00;
+	lapb->n2count   = 0;
+
+	if (lapb->mode & LAPB_EXTENDED) {
+#if LAPB_DEBUG > 1
+		printk(KERN_DEBUG "lapb: (%p) S%d TX SABME(1)\n",
+		       lapb->dev, lapb->state);
+#endif
+		lapb_send_control(lapb, LAPB_SABME, LAPB_POLLON, LAPB_COMMAND);
+	} else {
+#if LAPB_DEBUG > 1
+		printk(KERN_DEBUG "lapb: (%p) S%d TX SABM(1)\n",
+		       lapb->dev, lapb->state);
+#endif
+		lapb_send_control(lapb, LAPB_SABM, LAPB_POLLON, LAPB_COMMAND);
+	}
+
+	lapb_start_t1timer(lapb);
+	lapb_stop_t2timer(lapb);
+}
+
+void lapb_enquiry_response(struct lapb_cb *lapb)
+{
+#if LAPB_DEBUG > 1
+	printk(KERN_DEBUG "lapb: (%p) S%d TX RR(1) R%d\n",
+	       lapb->dev, lapb->state, lapb->vr);
+#endif
+
+	lapb_send_control(lapb, LAPB_RR, LAPB_POLLON, LAPB_RESPONSE);
+
+	lapb->condition &= ~LAPB_ACK_PENDING_CONDITION;
+}
+
+void lapb_timeout_response(struct lapb_cb *lapb)
+{
+#if LAPB_DEBUG > 1
+	printk(KERN_DEBUG "lapb: (%p) S%d TX RR(0) R%d\n",
+	       lapb->dev, lapb->state, lapb->vr);
+#endif
+	lapb_send_control(lapb, LAPB_RR, LAPB_POLLOFF, LAPB_RESPONSE);
+
+	lapb->condition &= ~LAPB_ACK_PENDING_CONDITION;
+}
+
+void lapb_check_iframes_acked(struct lapb_cb *lapb, unsigned short nr)
+{
+	if (lapb->vs == nr) {
+		lapb_frames_acked(lapb, nr);
+		lapb_stop_t1timer(lapb);
+		lapb->n2count = 0;
+	} else if (lapb->va != nr) {
+		lapb_frames_acked(lapb, nr);
+		lapb_start_t1timer(lapb);
+	}
+}
+
+void lapb_check_need_response(struct lapb_cb *lapb, int type, int pf)
+{
+	if (type == LAPB_COMMAND && pf)
+		lapb_enquiry_response(lapb);
+}
diff --git a/net/lapb/lapb_subr.c b/net/lapb/lapb_subr.c
new file mode 100644
index 00000000000..5de05a0bc0f
--- /dev/null
+++ b/net/lapb/lapb_subr.c
@@ -0,0 +1,313 @@
+/*
+ *	LAPB release 002
+ *
+ *	This code REQUIRES 2.1.15 or higher/ NET3.038
+ *
+ *	This module:
+ *		This module is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *	History
+ *	LAPB 001	Jonathan Naylor	Started Coding
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/inet.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <net/lapb.h>
+
+/*
+ *	This routine purges all the queues of frames.
+ */
+void lapb_clear_queues(struct lapb_cb *lapb)
+{
+	skb_queue_purge(&lapb->write_queue);
+	skb_queue_purge(&lapb->ack_queue);
+}
+
+/*
+ * This routine purges the input queue of those frames that have been
+ * acknowledged. This replaces the boxes labelled "V(a) <- N(r)" on the
+ * SDL diagram.
+ */
+void lapb_frames_acked(struct lapb_cb *lapb, unsigned short nr)
+{
+	struct sk_buff *skb;
+	int modulus;
+
+	modulus = (lapb->mode & LAPB_EXTENDED) ? LAPB_EMODULUS : LAPB_SMODULUS;
+
+	/*
+	 * Remove all the ack-ed frames from the ack queue.
+	 */
+	if (lapb->va != nr)
+		while (skb_peek(&lapb->ack_queue) && lapb->va != nr) {
+		        skb = skb_dequeue(&lapb->ack_queue);
+			kfree_skb(skb);
+			lapb->va = (lapb->va + 1) % modulus;
+		}
+}
+
+void lapb_requeue_frames(struct lapb_cb *lapb)
+{
+        struct sk_buff *skb, *skb_prev = NULL;
+
+	/*
+	 * Requeue all the un-ack-ed frames on the output queue to be picked
+	 * up by lapb_kick called from the timer. This arrangement handles the
+	 * possibility of an empty output queue.
+	 */
+	while ((skb = skb_dequeue(&lapb->ack_queue)) != NULL) {
+		if (!skb_prev)
+			skb_queue_head(&lapb->write_queue, skb);
+		else
+			skb_append(skb_prev, skb);
+		skb_prev = skb;
+	}
+}
+
+/*
+ *	Validate that the value of nr is between va and vs. Return true or
+ *	false for testing.
+ */
+int lapb_validate_nr(struct lapb_cb *lapb, unsigned short nr)
+{
+	unsigned short vc = lapb->va;
+	int modulus;
+	
+	modulus = (lapb->mode & LAPB_EXTENDED) ? LAPB_EMODULUS : LAPB_SMODULUS;
+
+	while (vc != lapb->vs) {
+		if (nr == vc)
+			return 1;
+		vc = (vc + 1) % modulus;
+	}
+	
+	return nr == lapb->vs;
+}
+
+/*
+ *	This routine is the centralised routine for parsing the control
+ *	information for the different frame formats.
+ */
+int lapb_decode(struct lapb_cb *lapb, struct sk_buff *skb,
+		struct lapb_frame *frame)
+{
+	frame->type = LAPB_ILLEGAL;
+
+#if LAPB_DEBUG > 2
+	printk(KERN_DEBUG "lapb: (%p) S%d RX %02X %02X %02X\n",
+	       lapb->dev, lapb->state,
+	       skb->data[0], skb->data[1], skb->data[2]);
+#endif
+
+	/* We always need to look at 2 bytes, sometimes we need
+	 * to look at 3 and those cases are handled below.
+	 */
+	if (!pskb_may_pull(skb, 2))
+		return -1;
+
+	if (lapb->mode & LAPB_MLP) {
+		if (lapb->mode & LAPB_DCE) {
+			if (skb->data[0] == LAPB_ADDR_D)
+				frame->cr = LAPB_COMMAND;
+			if (skb->data[0] == LAPB_ADDR_C)
+				frame->cr = LAPB_RESPONSE;
+		} else {
+			if (skb->data[0] == LAPB_ADDR_C)
+				frame->cr = LAPB_COMMAND;
+			if (skb->data[0] == LAPB_ADDR_D)
+				frame->cr = LAPB_RESPONSE;
+		}
+	} else {
+		if (lapb->mode & LAPB_DCE) {
+			if (skb->data[0] == LAPB_ADDR_B)
+				frame->cr = LAPB_COMMAND;
+			if (skb->data[0] == LAPB_ADDR_A)
+				frame->cr = LAPB_RESPONSE;
+		} else {
+			if (skb->data[0] == LAPB_ADDR_A)
+				frame->cr = LAPB_COMMAND;
+			if (skb->data[0] == LAPB_ADDR_B)
+				frame->cr = LAPB_RESPONSE;
+		}
+	}
+		
+	skb_pull(skb, 1);
+
+	if (lapb->mode & LAPB_EXTENDED) {
+		if (!(skb->data[0] & LAPB_S)) {
+			if (!pskb_may_pull(skb, 2))
+				return -1;
+			/*
+			 * I frame - carries NR/NS/PF
+			 */
+			frame->type       = LAPB_I;
+			frame->ns         = (skb->data[0] >> 1) & 0x7F;
+			frame->nr         = (skb->data[1] >> 1) & 0x7F;
+			frame->pf         = skb->data[1] & LAPB_EPF;
+			frame->control[0] = skb->data[0];
+			frame->control[1] = skb->data[1];
+			skb_pull(skb, 2);
+		} else if ((skb->data[0] & LAPB_U) == 1) {
+			if (!pskb_may_pull(skb, 2))
+				return -1;
+			/*
+			 * S frame - take out PF/NR
+			 */
+			frame->type       = skb->data[0] & 0x0F;
+			frame->nr         = (skb->data[1] >> 1) & 0x7F;
+			frame->pf         = skb->data[1] & LAPB_EPF;
+			frame->control[0] = skb->data[0];
+			frame->control[1] = skb->data[1];
+			skb_pull(skb, 2);
+		} else if ((skb->data[0] & LAPB_U) == 3) {
+			/*
+			 * U frame - take out PF
+			 */
+			frame->type       = skb->data[0] & ~LAPB_SPF;
+			frame->pf         = skb->data[0] & LAPB_SPF;
+			frame->control[0] = skb->data[0];
+			frame->control[1] = 0x00;
+			skb_pull(skb, 1);
+		}
+	} else {
+		if (!(skb->data[0] & LAPB_S)) {
+			/*
+			 * I frame - carries NR/NS/PF
+			 */
+			frame->type = LAPB_I;
+			frame->ns   = (skb->data[0] >> 1) & 0x07;
+			frame->nr   = (skb->data[0] >> 5) & 0x07;
+			frame->pf   = skb->data[0] & LAPB_SPF;
+		} else if ((skb->data[0] & LAPB_U) == 1) {
+			/*
+			 * S frame - take out PF/NR
+			 */
+			frame->type = skb->data[0] & 0x0F;
+			frame->nr   = (skb->data[0] >> 5) & 0x07;
+			frame->pf   = skb->data[0] & LAPB_SPF;
+		} else if ((skb->data[0] & LAPB_U) == 3) {
+			/*
+			 * U frame - take out PF
+			 */
+			frame->type = skb->data[0] & ~LAPB_SPF;
+			frame->pf   = skb->data[0] & LAPB_SPF;
+		}
+
+		frame->control[0] = skb->data[0];
+
+		skb_pull(skb, 1);
+	}
+
+	return 0;
+}
+
+/* 
+ *	This routine is called when the HDLC layer internally  generates a
+ *	command or  response  for  the remote machine ( eg. RR, UA etc. ). 
+ *	Only supervisory or unnumbered frames are processed, FRMRs are handled
+ *	by lapb_transmit_frmr below.
+ */
+void lapb_send_control(struct lapb_cb *lapb, int frametype,
+		       int poll_bit, int type)
+{
+	struct sk_buff *skb;
+	unsigned char  *dptr;
+
+	if ((skb = alloc_skb(LAPB_HEADER_LEN + 3, GFP_ATOMIC)) == NULL)
+		return;
+
+	skb_reserve(skb, LAPB_HEADER_LEN + 1);
+
+	if (lapb->mode & LAPB_EXTENDED) {
+		if ((frametype & LAPB_U) == LAPB_U) {
+			dptr   = skb_put(skb, 1);
+			*dptr  = frametype;
+			*dptr |= poll_bit ? LAPB_SPF : 0;
+		} else {
+			dptr     = skb_put(skb, 2);
+			dptr[0]  = frametype;
+			dptr[1]  = (lapb->vr << 1);
+			dptr[1] |= poll_bit ? LAPB_EPF : 0;
+		}
+	} else {
+		dptr   = skb_put(skb, 1);
+		*dptr  = frametype;
+		*dptr |= poll_bit ? LAPB_SPF : 0;
+		if ((frametype & LAPB_U) == LAPB_S)	/* S frames carry NR */
+			*dptr |= (lapb->vr << 5);
+	}
+
+	lapb_transmit_buffer(lapb, skb, type);
+}
+
+/* 
+ *	This routine generates FRMRs based on information previously stored in
+ *	the LAPB control block.
+ */
+void lapb_transmit_frmr(struct lapb_cb *lapb)
+{
+	struct sk_buff *skb;
+	unsigned char  *dptr;
+
+	if ((skb = alloc_skb(LAPB_HEADER_LEN + 7, GFP_ATOMIC)) == NULL)
+		return;
+
+	skb_reserve(skb, LAPB_HEADER_LEN + 1);
+
+	if (lapb->mode & LAPB_EXTENDED) {
+		dptr    = skb_put(skb, 6);
+		*dptr++ = LAPB_FRMR;
+		*dptr++ = lapb->frmr_data.control[0];
+		*dptr++ = lapb->frmr_data.control[1];
+		*dptr++ = (lapb->vs << 1) & 0xFE;
+		*dptr   = (lapb->vr << 1) & 0xFE;
+		if (lapb->frmr_data.cr == LAPB_RESPONSE)
+			*dptr |= 0x01;
+		dptr++;
+		*dptr++ = lapb->frmr_type;
+
+#if LAPB_DEBUG > 1
+	printk(KERN_DEBUG "lapb: (%p) S%d TX FRMR %02X %02X %02X %02X %02X\n",
+	       lapb->dev, lapb->state,
+	       skb->data[1], skb->data[2], skb->data[3],
+	       skb->data[4], skb->data[5]);
+#endif
+	} else {
+		dptr    = skb_put(skb, 4);
+		*dptr++ = LAPB_FRMR;
+		*dptr++ = lapb->frmr_data.control[0];
+		*dptr   = (lapb->vs << 1) & 0x0E;
+		*dptr  |= (lapb->vr << 5) & 0xE0;
+		if (lapb->frmr_data.cr == LAPB_RESPONSE)
+			*dptr |= 0x10;
+		dptr++;
+		*dptr++ = lapb->frmr_type;
+
+#if LAPB_DEBUG > 1
+	printk(KERN_DEBUG "lapb: (%p) S%d TX FRMR %02X %02X %02X\n",
+	       lapb->dev, lapb->state, skb->data[1],
+	       skb->data[2], skb->data[3]);
+#endif
+	}
+
+	lapb_transmit_buffer(lapb, skb, LAPB_RESPONSE);
+}
diff --git a/net/lapb/lapb_timer.c b/net/lapb/lapb_timer.c
new file mode 100644
index 00000000000..2c8f0f80922
--- /dev/null
+++ b/net/lapb/lapb_timer.c
@@ -0,0 +1,189 @@
+/*
+ *	LAPB release 002
+ *
+ *	This code REQUIRES 2.1.15 or higher/ NET3.038
+ *
+ *	This module:
+ *		This module is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *	History
+ *	LAPB 001	Jonathan Naylor	Started Coding
+ *	LAPB 002	Jonathan Naylor	New timer architecture.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/inet.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <net/lapb.h>
+
+static void lapb_t1timer_expiry(unsigned long);
+static void lapb_t2timer_expiry(unsigned long);
+
+void lapb_start_t1timer(struct lapb_cb *lapb)
+{
+	del_timer(&lapb->t1timer);
+
+	lapb->t1timer.data     = (unsigned long)lapb;
+	lapb->t1timer.function = &lapb_t1timer_expiry;
+	lapb->t1timer.expires  = jiffies + lapb->t1;
+
+	add_timer(&lapb->t1timer);
+}
+
+void lapb_start_t2timer(struct lapb_cb *lapb)
+{
+	del_timer(&lapb->t2timer);
+
+	lapb->t2timer.data     = (unsigned long)lapb;
+	lapb->t2timer.function = &lapb_t2timer_expiry;
+	lapb->t2timer.expires  = jiffies + lapb->t2;
+
+	add_timer(&lapb->t2timer);
+}
+
+void lapb_stop_t1timer(struct lapb_cb *lapb)
+{
+	del_timer(&lapb->t1timer);
+}
+
+void lapb_stop_t2timer(struct lapb_cb *lapb)
+{
+	del_timer(&lapb->t2timer);
+}
+
+int lapb_t1timer_running(struct lapb_cb *lapb)
+{
+	return timer_pending(&lapb->t1timer);
+}
+
+static void lapb_t2timer_expiry(unsigned long param)
+{
+	struct lapb_cb *lapb = (struct lapb_cb *)param;
+
+	if (lapb->condition & LAPB_ACK_PENDING_CONDITION) {
+		lapb->condition &= ~LAPB_ACK_PENDING_CONDITION;
+		lapb_timeout_response(lapb);
+	}
+}
+
+static void lapb_t1timer_expiry(unsigned long param)
+{
+	struct lapb_cb *lapb = (struct lapb_cb *)param;
+
+	switch (lapb->state) {
+
+		/*
+		 *	If we are a DCE, keep going DM .. DM .. DM
+		 */
+		case LAPB_STATE_0:
+			if (lapb->mode & LAPB_DCE)
+				lapb_send_control(lapb, LAPB_DM, LAPB_POLLOFF, LAPB_RESPONSE);
+			break;
+
+		/*
+		 *	Awaiting connection state, send SABM(E), up to N2 times.
+		 */
+		case LAPB_STATE_1: 
+			if (lapb->n2count == lapb->n2) {
+				lapb_clear_queues(lapb);
+				lapb->state = LAPB_STATE_0;
+				lapb_disconnect_indication(lapb, LAPB_TIMEDOUT);
+#if LAPB_DEBUG > 0
+				printk(KERN_DEBUG "lapb: (%p) S1 -> S0\n", lapb->dev);
+#endif
+				return;
+			} else {
+				lapb->n2count++;
+				if (lapb->mode & LAPB_EXTENDED) {
+#if LAPB_DEBUG > 1
+					printk(KERN_DEBUG "lapb: (%p) S1 TX SABME(1)\n", lapb->dev);
+#endif
+					lapb_send_control(lapb, LAPB_SABME, LAPB_POLLON, LAPB_COMMAND);
+				} else {
+#if LAPB_DEBUG > 1
+					printk(KERN_DEBUG "lapb: (%p) S1 TX SABM(1)\n", lapb->dev);
+#endif
+					lapb_send_control(lapb, LAPB_SABM, LAPB_POLLON, LAPB_COMMAND);
+				}
+			}
+			break;
+
+		/*
+		 *	Awaiting disconnection state, send DISC, up to N2 times.
+		 */
+		case LAPB_STATE_2:
+			if (lapb->n2count == lapb->n2) {
+				lapb_clear_queues(lapb);
+				lapb->state = LAPB_STATE_0;
+				lapb_disconnect_confirmation(lapb, LAPB_TIMEDOUT);
+#if LAPB_DEBUG > 0
+				printk(KERN_DEBUG "lapb: (%p) S2 -> S0\n", lapb->dev);
+#endif
+				return;
+			} else {
+				lapb->n2count++;
+#if LAPB_DEBUG > 1
+				printk(KERN_DEBUG "lapb: (%p) S2 TX DISC(1)\n", lapb->dev);
+#endif
+				lapb_send_control(lapb, LAPB_DISC, LAPB_POLLON, LAPB_COMMAND);
+			}
+			break;
+
+		/*
+		 *	Data transfer state, restransmit I frames, up to N2 times.
+		 */
+		case LAPB_STATE_3:
+			if (lapb->n2count == lapb->n2) {
+				lapb_clear_queues(lapb);
+				lapb->state = LAPB_STATE_0;
+				lapb_stop_t2timer(lapb);
+				lapb_disconnect_indication(lapb, LAPB_TIMEDOUT);
+#if LAPB_DEBUG > 0
+				printk(KERN_DEBUG "lapb: (%p) S3 -> S0\n", lapb->dev);
+#endif
+				return;
+			} else {
+				lapb->n2count++;
+				lapb_requeue_frames(lapb);
+			}
+			break;
+
+		/*
+		 *	Frame reject state, restransmit FRMR frames, up to N2 times.
+		 */
+		case LAPB_STATE_4:
+			if (lapb->n2count == lapb->n2) {
+				lapb_clear_queues(lapb);
+				lapb->state = LAPB_STATE_0;
+				lapb_disconnect_indication(lapb, LAPB_TIMEDOUT);
+#if LAPB_DEBUG > 0
+				printk(KERN_DEBUG "lapb: (%p) S4 -> S0\n", lapb->dev);
+#endif
+				return;
+			} else {
+				lapb->n2count++;
+				lapb_transmit_frmr(lapb);
+			}
+			break;
+	}
+
+	lapb_start_t1timer(lapb);
+}
diff --git a/net/llc/Kconfig b/net/llc/Kconfig
new file mode 100644
index 00000000000..b91c6510816
--- /dev/null
+++ b/net/llc/Kconfig
@@ -0,0 +1,10 @@
+config LLC
+	tristate
+	depends on NET
+
+config LLC2
+	tristate "ANSI/IEEE 802.2 LLC type 2 Support"
+	select LLC
+	help
+	  This is a Logical Link Layer type 2, connection oriented support. 
+	  Select this if you want to have support for PF_LLC sockets.
diff --git a/net/llc/Makefile b/net/llc/Makefile
new file mode 100644
index 00000000000..5ebd4ed2bd4
--- /dev/null
+++ b/net/llc/Makefile
@@ -0,0 +1,24 @@
+###########################################################################
+# Makefile for the Linux 802.2 LLC (fully-functional) layer.
+#
+# Copyright (c) 1997 by Procom Technology,Inc.
+#		2001-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+#
+# This program can be redistributed or modified under the terms of the 
+# GNU General Public License as published by the Free Software Foundation.
+# This program is distributed without any warranty or implied warranty
+# of merchantability or fitness for a particular purpose.
+#
+# See the GNU General Public License for more details.
+###########################################################################
+
+obj-$(CONFIG_LLC) += llc.o
+
+llc-y := llc_core.o llc_input.o llc_output.o
+
+obj-$(CONFIG_LLC2) += llc2.o
+
+llc2-y := llc_if.o llc_c_ev.o llc_c_ac.o llc_conn.o llc_c_st.o llc_pdu.o \
+	  llc_sap.o llc_s_ac.o llc_s_ev.o llc_s_st.o af_llc.o llc_station.o
+
+llc2-$(CONFIG_PROC_FS) += llc_proc.o
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
new file mode 100644
index 00000000000..20b4cfebd74
--- /dev/null
+++ b/net/llc/af_llc.c
@@ -0,0 +1,1079 @@
+/*
+ * af_llc.c - LLC User Interface SAPs
+ * Description:
+ *   Functions in this module are implementation of socket based llc
+ *   communications for the Linux operating system. Support of llc class
+ *   one and class two is provided via SOCK_DGRAM and SOCK_STREAM
+ *   respectively.
+ *
+ *   An llc2 connection is (mac + sap), only one llc2 sap connection
+ *   is allowed per mac. Though one sap may have multiple mac + sap
+ *   connections.
+ *
+ * Copyright (c) 2001 by Jay Schulist <jschlst@samba.org>
+ *		 2002-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * This program can be redistributed or modified under the terms of the
+ * GNU General Public License as published by the Free Software Foundation.
+ * This program is distributed without any warranty or implied warranty
+ * of merchantability or fitness for a particular purpose.
+ *
+ * See the GNU General Public License for more details.
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/tcp.h>
+#include <linux/rtnetlink.h>
+#include <linux/init.h>
+#include <net/llc.h>
+#include <net/llc_sap.h>
+#include <net/llc_pdu.h>
+#include <net/llc_conn.h>
+
+/* remember: uninitialized global data is zeroed because its in .bss */
+static u16 llc_ui_sap_last_autoport = LLC_SAP_DYN_START;
+static u16 llc_ui_sap_link_no_max[256];
+static struct sockaddr_llc llc_ui_addrnull;
+static struct proto_ops llc_ui_ops;
+
+static int llc_ui_wait_for_conn(struct sock *sk, int timeout);
+static int llc_ui_wait_for_disc(struct sock *sk, int timeout);
+static int llc_ui_wait_for_data(struct sock *sk, int timeout);
+static int llc_ui_wait_for_busy_core(struct sock *sk, int timeout);
+
+#if 0
+#define dprintk(args...) printk(KERN_DEBUG args)
+#else
+#define dprintk(args...)
+#endif
+
+/**
+ *	llc_ui_next_link_no - return the next unused link number for a sap
+ *	@sap: Address of sap to get link number from.
+ *
+ *	Return the next unused link number for a given sap.
+ */
+static __inline__ u16 llc_ui_next_link_no(int sap)
+{
+	return llc_ui_sap_link_no_max[sap]++;
+}
+
+/**
+ *	llc_proto_type - return eth protocol for ARP header type
+ *	@arphrd: ARP header type.
+ *
+ *	Given an ARP header type return the corresponding ethernet protocol.
+ */
+static __inline__ u16 llc_proto_type(u16 arphrd)
+{
+	return arphrd == ARPHRD_IEEE802_TR ?
+		         htons(ETH_P_TR_802_2) : htons(ETH_P_802_2);
+}
+
+/**
+ *	llc_ui_addr_null - determines if a address structure is null
+ *	@addr: Address to test if null.
+ */
+static __inline__ u8 llc_ui_addr_null(struct sockaddr_llc *addr)
+{
+	return !memcmp(addr, &llc_ui_addrnull, sizeof(*addr));
+}
+
+/**
+ *	llc_ui_header_len - return length of llc header based on operation
+ *	@sk: Socket which contains a valid llc socket type.
+ *	@addr: Complete sockaddr_llc structure received from the user.
+ *
+ *	Provide the length of the llc header depending on what kind of
+ *	operation the user would like to perform and the type of socket.
+ *	Returns the correct llc header length.
+ */
+static __inline__ u8 llc_ui_header_len(struct sock *sk,
+				       struct sockaddr_llc *addr)
+{
+	u8 rc = LLC_PDU_LEN_U;
+
+	if (addr->sllc_test || addr->sllc_xid)
+		rc = LLC_PDU_LEN_U;
+	else if (sk->sk_type == SOCK_STREAM)
+		rc = LLC_PDU_LEN_I;
+	return rc;
+}
+
+/**
+ *	llc_ui_send_data - send data via reliable llc2 connection
+ *	@sk: Connection the socket is using.
+ *	@skb: Data the user wishes to send.
+ *	@addr: Source and destination fields provided by the user.
+ *	@noblock: can we block waiting for data?
+ *
+ *	Send data via reliable llc2 connection.
+ *	Returns 0 upon success, non-zero if action did not succeed.
+ */
+static int llc_ui_send_data(struct sock* sk, struct sk_buff *skb, int noblock)
+{
+	struct llc_sock* llc = llc_sk(sk);
+	int rc = 0;
+
+	if (llc_data_accept_state(llc->state) || llc->p_flag) {
+		int timeout = sock_sndtimeo(sk, noblock);
+
+		rc = llc_ui_wait_for_busy_core(sk, timeout);
+	}
+	if (!rc)
+		rc = llc_build_and_send_pkt(sk, skb);
+	return rc;
+}
+
+static void llc_ui_sk_init(struct socket *sock, struct sock *sk)
+{
+	sk->sk_type	= sock->type;
+	sk->sk_sleep	= &sock->wait;
+	sk->sk_socket	= sock;
+	sock->sk	= sk;
+	sock->ops	= &llc_ui_ops;
+}
+
+static struct proto llc_proto = {
+	.name	  = "DDP",
+	.owner	  = THIS_MODULE,
+	.obj_size = sizeof(struct llc_sock),
+};
+
+/**
+ *	llc_ui_create - alloc and init a new llc_ui socket
+ *	@sock: Socket to initialize and attach allocated sk to.
+ *	@protocol: Unused.
+ *
+ *	Allocate and initialize a new llc_ui socket, validate the user wants a
+ *	socket type we have available.
+ *	Returns 0 upon success, negative upon failure.
+ */
+static int llc_ui_create(struct socket *sock, int protocol)
+{
+	struct sock *sk;
+	int rc = -ESOCKTNOSUPPORT;
+
+	if (sock->type == SOCK_DGRAM || sock->type == SOCK_STREAM) {
+		rc = -ENOMEM;
+		sk = llc_sk_alloc(PF_LLC, GFP_KERNEL, &llc_proto);
+		if (sk) {
+			rc = 0;
+			llc_ui_sk_init(sock, sk);
+		}
+	}
+	return rc;
+}
+
+/**
+ *	llc_ui_release - shutdown socket
+ *	@sock: Socket to release.
+ *
+ *	Shutdown and deallocate an existing socket.
+ */
+static int llc_ui_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	struct llc_sock *llc;
+
+	if (!sk)
+		goto out;
+	sock_hold(sk);
+	lock_sock(sk);
+	llc = llc_sk(sk);
+	dprintk("%s: closing local(%02X) remote(%02X)\n", __FUNCTION__,
+		llc->laddr.lsap, llc->daddr.lsap);
+	if (!llc_send_disc(sk))
+		llc_ui_wait_for_disc(sk, sk->sk_rcvtimeo);
+	if (!sock_flag(sk, SOCK_ZAPPED))
+		llc_sap_remove_socket(llc->sap, sk);
+	release_sock(sk);
+	if (llc->sap && hlist_empty(&llc->sap->sk_list.list)) {
+		llc_release_sockets(llc->sap);
+		llc_sap_close(llc->sap);
+	}
+	if (llc->dev)
+		dev_put(llc->dev);
+	sock_put(sk);
+	llc_sk_free(sk);
+out:
+	return 0;
+}
+
+/**
+ *	llc_ui_autoport - provide dynamically allocate SAP number
+ *
+ *	Provide the caller with a dynamically allocated SAP number according
+ *	to the rules that are set in this function. Returns: 0, upon failure,
+ *	SAP number otherwise.
+ */
+static int llc_ui_autoport(void)
+{
+	struct llc_sap *sap;
+	int i, tries = 0;
+
+	while (tries < LLC_SAP_DYN_TRIES) {
+		for (i = llc_ui_sap_last_autoport;
+		     i < LLC_SAP_DYN_STOP; i += 2) {
+			sap = llc_sap_find(i);
+			if (!sap) {
+				llc_ui_sap_last_autoport = i + 2;
+				goto out;
+			}
+		}
+		llc_ui_sap_last_autoport = LLC_SAP_DYN_START;
+		tries++;
+	}
+	i = 0;
+out:
+	return i;
+}
+
+/**
+ *	llc_ui_autobind - Bind a socket to a specific address.
+ *	@sk: Socket to bind an address to.
+ *	@addr: Address the user wants the socket bound to.
+ *
+ *	Bind a socket to a specific address. For llc a user is able to bind to
+ *	a specific sap only or mac + sap. If the user only specifies a sap and
+ *	a null dmac (all zeros) the user is attempting to bind to an entire
+ *	sap. This will stop anyone else on the local system from using that
+ *	sap.  If someone else has a mac + sap open the bind to null + sap will
+ *	fail.
+ *	If the user desires to bind to a specific mac + sap, it is possible to
+ *	have multiple sap connections via multiple macs.
+ *	Bind and autobind for that matter must enforce the correct sap usage
+ *	otherwise all hell will break loose.
+ *	Returns: 0 upon success, negative otherwise.
+ */
+static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr)
+{
+	struct sock *sk = sock->sk;
+	struct llc_sock *llc = llc_sk(sk);
+	struct llc_sap *sap;
+	int rc = -EINVAL;
+
+	if (!sock_flag(sk, SOCK_ZAPPED))
+		goto out;
+	rc = -ENODEV;
+	llc->dev = dev_getfirstbyhwtype(addr->sllc_arphrd);
+	if (!llc->dev)
+		goto out;
+	rc = -EUSERS;
+	llc->laddr.lsap = llc_ui_autoport();
+	if (!llc->laddr.lsap)
+		goto out;
+	rc = -EBUSY; /* some other network layer is using the sap */
+	sap = llc_sap_open(llc->laddr.lsap, NULL);
+	if (!sap)
+		goto out;
+	memcpy(llc->laddr.mac, llc->dev->dev_addr, IFHWADDRLEN);
+	memcpy(&llc->addr, addr, sizeof(llc->addr));
+	/* assign new connection to its SAP */
+	llc_sap_add_socket(sap, sk);
+	sock_reset_flag(sk, SOCK_ZAPPED);
+	rc = 0;
+out:
+	return rc;
+}
+
+/**
+ *	llc_ui_bind - bind a socket to a specific address.
+ *	@sock: Socket to bind an address to.
+ *	@uaddr: Address the user wants the socket bound to.
+ *	@addrlen: Length of the uaddr structure.
+ *
+ *	Bind a socket to a specific address. For llc a user is able to bind to
+ *	a specific sap only or mac + sap. If the user only specifies a sap and
+ *	a null dmac (all zeros) the user is attempting to bind to an entire
+ *	sap. This will stop anyone else on the local system from using that
+ *	sap. If someone else has a mac + sap open the bind to null + sap will
+ *	fail.
+ *	If the user desires to bind to a specific mac + sap, it is possible to
+ *	have multiple sap connections via multiple macs.
+ *	Bind and autobind for that matter must enforce the correct sap usage
+ *	otherwise all hell will break loose.
+ *	Returns: 0 upon success, negative otherwise.
+ */
+static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
+{
+	struct sockaddr_llc *addr = (struct sockaddr_llc *)uaddr;
+	struct sock *sk = sock->sk;
+	struct llc_sock *llc = llc_sk(sk);
+	struct llc_sap *sap;
+	int rc = -EINVAL;
+
+	dprintk("%s: binding %02X\n", __FUNCTION__, addr->sllc_sap);
+	if (!sock_flag(sk, SOCK_ZAPPED) || addrlen != sizeof(*addr))
+		goto out;
+	rc = -EAFNOSUPPORT;
+	if (addr->sllc_family != AF_LLC)
+		goto out;
+	if (!addr->sllc_sap) {
+		rc = -EUSERS;
+		addr->sllc_sap = llc_ui_autoport();
+		if (!addr->sllc_sap)
+			goto out;
+	}
+	sap = llc_sap_find(addr->sllc_sap);
+	if (!sap) {
+		sap = llc_sap_open(addr->sllc_sap, NULL);
+		rc = -EBUSY; /* some other network layer is using the sap */
+		if (!sap)
+			goto out;
+	} else {
+		struct llc_addr laddr, daddr;
+		struct sock *ask;
+
+		memset(&laddr, 0, sizeof(laddr));
+		memset(&daddr, 0, sizeof(daddr));
+		/*
+		 * FIXME: check if the the address is multicast,
+		 * 	  only SOCK_DGRAM can do this.
+		 */
+		memcpy(laddr.mac, addr->sllc_mac, IFHWADDRLEN);
+		laddr.lsap = addr->sllc_sap;
+		rc = -EADDRINUSE; /* mac + sap clash. */
+		ask = llc_lookup_established(sap, &daddr, &laddr);
+		if (ask) {
+			sock_put(ask);
+			goto out;
+		}
+	}
+	llc->laddr.lsap = addr->sllc_sap;
+	memcpy(llc->laddr.mac, addr->sllc_mac, IFHWADDRLEN);
+	memcpy(&llc->addr, addr, sizeof(llc->addr));
+	/* assign new connection to its SAP */
+	llc_sap_add_socket(sap, sk);
+	sock_reset_flag(sk, SOCK_ZAPPED);
+	rc = 0;
+out:
+	return rc;
+}
+
+/**
+ *	llc_ui_shutdown - shutdown a connect llc2 socket.
+ *	@sock: Socket to shutdown.
+ *	@how: What part of the socket to shutdown.
+ *
+ *	Shutdown a connected llc2 socket. Currently this function only supports
+ *	shutting down both sends and receives (2), we could probably make this
+ *	function such that a user can shutdown only half the connection but not
+ *	right now.
+ *	Returns: 0 upon success, negative otherwise.
+ */
+static int llc_ui_shutdown(struct socket *sock, int how)
+{
+	struct sock *sk = sock->sk;
+	int rc = -ENOTCONN;
+
+	lock_sock(sk);
+	if (sk->sk_state != TCP_ESTABLISHED)
+		goto out;
+	rc = -EINVAL;
+	if (how != 2)
+		goto out;
+	rc = llc_send_disc(sk);
+	if (!rc)
+		rc = llc_ui_wait_for_disc(sk, sk->sk_rcvtimeo);
+	/* Wake up anyone sleeping in poll */
+	sk->sk_state_change(sk);
+out:
+	release_sock(sk);
+	return rc;
+}
+
+/**
+ *	llc_ui_connect - Connect to a remote llc2 mac + sap.
+ *	@sock: Socket which will be connected to the remote destination.
+ *	@uaddr: Remote and possibly the local address of the new connection.
+ *	@addrlen: Size of uaddr structure.
+ *	@flags: Operational flags specified by the user.
+ *
+ *	Connect to a remote llc2 mac + sap. The caller must specify the
+ *	destination mac and address to connect to. If the user hasn't previously
+ *	called bind(2) with a smac the address of the first interface of the
+ *	specified arp type will be used.
+ *	This function will autobind if user did not previously call bind.
+ *	Returns: 0 upon success, negative otherwise.
+ */
+static int llc_ui_connect(struct socket *sock, struct sockaddr *uaddr,
+			  int addrlen, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct llc_sock *llc = llc_sk(sk);
+	struct sockaddr_llc *addr = (struct sockaddr_llc *)uaddr;
+	struct net_device *dev;
+	int rc = -EINVAL;
+
+	lock_sock(sk);
+	if (addrlen != sizeof(*addr))
+		goto out;
+	rc = -EAFNOSUPPORT;
+	if (addr->sllc_family != AF_LLC)
+		goto out;
+	/* bind connection to sap if user hasn't done it. */
+	if (sock_flag(sk, SOCK_ZAPPED)) {
+		/* bind to sap with null dev, exclusive */
+		rc = llc_ui_autobind(sock, addr);
+		if (rc)
+			goto out;
+		llc->daddr.lsap = addr->sllc_sap;
+		memcpy(llc->daddr.mac, addr->sllc_mac, IFHWADDRLEN);
+	}
+	dev = llc->dev;
+	if (sk->sk_type != SOCK_STREAM)
+		goto out;
+	rc = -EALREADY;
+	if (sock->state == SS_CONNECTING)
+		goto out;
+	sock->state = SS_CONNECTING;
+	sk->sk_state   = TCP_SYN_SENT;
+	llc->link   = llc_ui_next_link_no(llc->sap->laddr.lsap);
+	rc = llc_establish_connection(sk, dev->dev_addr,
+				      addr->sllc_mac, addr->sllc_sap);
+	if (rc) {
+		dprintk("%s: llc_ui_send_conn failed :-(\n", __FUNCTION__);
+		sock->state  = SS_UNCONNECTED;
+		sk->sk_state = TCP_CLOSE;
+		goto out;
+	}
+	rc = llc_ui_wait_for_conn(sk, sk->sk_rcvtimeo);
+	if (rc)
+		dprintk("%s: llc_ui_wait_for_conn failed=%d\n", __FUNCTION__, rc);
+out:
+	release_sock(sk);
+	return rc;
+}
+
+/**
+ *	llc_ui_listen - allow a normal socket to accept incoming connections
+ *	@sock: Socket to allow incoming connections on.
+ *	@backlog: Number of connections to queue.
+ *
+ *	Allow a normal socket to accept incoming connections.
+ *	Returns 0 upon success, negative otherwise.
+ */
+static int llc_ui_listen(struct socket *sock, int backlog)
+{
+	struct sock *sk = sock->sk;
+	int rc = -EINVAL;
+
+	lock_sock(sk);
+	if (sock->state != SS_UNCONNECTED)
+		goto out;
+	rc = -EOPNOTSUPP;
+	if (sk->sk_type != SOCK_STREAM)
+		goto out;
+	rc = -EAGAIN;
+	if (sock_flag(sk, SOCK_ZAPPED))
+		goto out;
+	rc = 0;
+	if (!(unsigned)backlog)	/* BSDism */
+		backlog = 1;
+	sk->sk_max_ack_backlog = backlog;
+	if (sk->sk_state != TCP_LISTEN) {
+		sk->sk_ack_backlog = 0;
+		sk->sk_state	   = TCP_LISTEN;
+	}
+	sk->sk_socket->flags |= __SO_ACCEPTCON;
+out:
+	release_sock(sk);
+	return rc;
+}
+
+static int llc_ui_wait_for_disc(struct sock *sk, int timeout)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	int rc;
+
+	add_wait_queue_exclusive(sk->sk_sleep, &wait);
+	for (;;) {
+		__set_current_state(TASK_INTERRUPTIBLE);
+		rc = 0;
+		if (sk->sk_state != TCP_CLOSE) {
+			release_sock(sk);
+			timeout = schedule_timeout(timeout);
+			lock_sock(sk);
+		} else
+			break;
+		rc = -ERESTARTSYS;
+		if (signal_pending(current))
+			break;
+		rc = -EAGAIN;
+		if (!timeout)
+			break;
+	}
+	__set_current_state(TASK_RUNNING);
+	remove_wait_queue(sk->sk_sleep, &wait);
+	return rc;
+}
+
+static int llc_ui_wait_for_conn(struct sock *sk, int timeout)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	int rc;
+
+	add_wait_queue_exclusive(sk->sk_sleep, &wait);
+	for (;;) {
+		__set_current_state(TASK_INTERRUPTIBLE);
+		rc = -EAGAIN;
+		if (sk->sk_state == TCP_CLOSE)
+			break;
+		rc = 0;
+		if (sk->sk_state != TCP_ESTABLISHED) {
+			release_sock(sk);
+			timeout = schedule_timeout(timeout);
+			lock_sock(sk);
+		} else
+			break;
+		rc = -ERESTARTSYS;
+		if (signal_pending(current))
+			break;
+		rc = -EAGAIN;
+		if (!timeout)
+			break;
+	}
+	__set_current_state(TASK_RUNNING);
+	remove_wait_queue(sk->sk_sleep, &wait);
+	return rc;
+}
+
+static int llc_ui_wait_for_data(struct sock *sk, int timeout)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	int rc = 0;
+
+	add_wait_queue_exclusive(sk->sk_sleep, &wait);
+	for (;;) {
+		__set_current_state(TASK_INTERRUPTIBLE);
+		if (sk->sk_shutdown & RCV_SHUTDOWN)
+			break;
+		/*
+		 * Well, if we have backlog, try to process it now.
+		 */
+                if (sk->sk_backlog.tail) {
+			release_sock(sk);
+			lock_sock(sk);
+		}
+		rc = 0;
+		if (skb_queue_empty(&sk->sk_receive_queue)) {
+			release_sock(sk);
+			timeout = schedule_timeout(timeout);
+			lock_sock(sk);
+		} else
+			break;
+		rc = -ERESTARTSYS;
+		if (signal_pending(current))
+			break;
+		rc = -EAGAIN;
+		if (!timeout)
+			break;
+	}
+	__set_current_state(TASK_RUNNING);
+	remove_wait_queue(sk->sk_sleep, &wait);
+	return rc;
+}
+
+static int llc_ui_wait_for_busy_core(struct sock *sk, int timeout)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	struct llc_sock *llc = llc_sk(sk);
+	int rc;
+
+	add_wait_queue_exclusive(sk->sk_sleep, &wait);
+	for (;;) {
+		dprintk("%s: looping...\n", __FUNCTION__);
+		__set_current_state(TASK_INTERRUPTIBLE);
+		rc = -ENOTCONN;
+		if (sk->sk_shutdown & RCV_SHUTDOWN)
+			break;
+		rc = 0;
+		if (llc_data_accept_state(llc->state) || llc->p_flag) {
+			release_sock(sk);
+			timeout = schedule_timeout(timeout);
+			lock_sock(sk);
+		} else
+			break;
+		rc = -ERESTARTSYS;
+		if (signal_pending(current))
+			break;
+		rc = -EAGAIN;
+		if (!timeout)
+			break;
+	}
+	__set_current_state(TASK_RUNNING);
+	remove_wait_queue(sk->sk_sleep, &wait);
+	return rc;
+}
+
+/**
+ *	llc_ui_accept - accept a new incoming connection.
+ *	@sock: Socket which connections arrive on.
+ *	@newsock: Socket to move incoming connection to.
+ *	@flags: User specified operational flags.
+ *
+ *	Accept a new incoming connection.
+ *	Returns 0 upon success, negative otherwise.
+ */
+static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags)
+{
+	struct sock *sk = sock->sk, *newsk;
+	struct llc_sock *llc, *newllc;
+	struct sk_buff *skb;
+	int rc = -EOPNOTSUPP;
+
+	dprintk("%s: accepting on %02X\n", __FUNCTION__,
+	        llc_sk(sk)->laddr.lsap);
+	lock_sock(sk);
+	if (sk->sk_type != SOCK_STREAM)
+		goto out;
+	rc = -EINVAL;
+	if (sock->state != SS_UNCONNECTED || sk->sk_state != TCP_LISTEN)
+		goto out;
+	/* wait for a connection to arrive. */
+	rc = llc_ui_wait_for_data(sk, sk->sk_rcvtimeo);
+	if (rc)
+		goto out;
+	dprintk("%s: got a new connection on %02X\n", __FUNCTION__,
+	        llc_sk(sk)->laddr.lsap);
+	skb = skb_dequeue(&sk->sk_receive_queue);
+	rc = -EINVAL;
+	if (!skb->sk)
+		goto frees;
+	rc = 0;
+	newsk = skb->sk;
+	/* attach connection to a new socket. */
+	llc_ui_sk_init(newsock, newsk);
+	sock_reset_flag(newsk, SOCK_ZAPPED);
+	newsk->sk_state		= TCP_ESTABLISHED;
+	newsock->state		= SS_CONNECTED;
+	llc			= llc_sk(sk);
+	newllc			= llc_sk(newsk);
+	memcpy(&newllc->addr, &llc->addr, sizeof(newllc->addr));
+	newllc->link = llc_ui_next_link_no(newllc->laddr.lsap);
+
+	/* put original socket back into a clean listen state. */
+	sk->sk_state = TCP_LISTEN;
+	sk->sk_ack_backlog--;
+	skb->sk = NULL;
+	dprintk("%s: ok success on %02X, client on %02X\n", __FUNCTION__,
+		llc_sk(sk)->addr.sllc_sap, newllc->daddr.lsap);
+frees:
+	kfree_skb(skb);
+out:
+	release_sock(sk);
+	return rc;
+}
+
+/**
+ *	llc_ui_recvmsg - copy received data to the socket user.
+ *	@sock: Socket to copy data from.
+ *	@msg: Various user space related information.
+ *	@size: Size of user buffer.
+ *	@flags: User specified flags.
+ *
+ *	Copy received data to the socket user.
+ *	Returns non-negative upon success, negative otherwise.
+ */
+static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock,
+			  struct msghdr *msg, size_t size, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct sockaddr_llc *uaddr = (struct sockaddr_llc *)msg->msg_name;
+	struct sk_buff *skb;
+	size_t copied = 0;
+	int rc = -ENOMEM, timeout;
+	int noblock = flags & MSG_DONTWAIT;
+
+	dprintk("%s: receiving in %02X from %02X\n", __FUNCTION__,
+		llc_sk(sk)->laddr.lsap, llc_sk(sk)->daddr.lsap);
+	lock_sock(sk);
+	timeout = sock_rcvtimeo(sk, noblock);
+	rc = llc_ui_wait_for_data(sk, timeout);
+	if (rc) {
+		dprintk("%s: llc_ui_wait_for_data failed recv "
+			"in %02X from %02X\n", __FUNCTION__,
+			llc_sk(sk)->laddr.lsap, llc_sk(sk)->daddr.lsap);
+		goto out;
+	}
+	skb = skb_dequeue(&sk->sk_receive_queue);
+	if (!skb) /* shutdown */
+		goto out;
+	copied = skb->len;
+	if (copied > size)
+		copied = size;
+	rc = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	if (rc)
+		goto dgram_free;
+	if (skb->len > copied) {
+		skb_pull(skb, copied);
+		skb_queue_head(&sk->sk_receive_queue, skb);
+	}
+	if (uaddr)
+		memcpy(uaddr, llc_ui_skb_cb(skb), sizeof(*uaddr));
+	msg->msg_namelen = sizeof(*uaddr);
+	if (!skb->list) {
+dgram_free:
+		kfree_skb(skb);
+	}
+out:
+	release_sock(sk);
+	return rc ? : copied;
+}
+
+/**
+ *	llc_ui_sendmsg - Transmit data provided by the socket user.
+ *	@sock: Socket to transmit data from.
+ *	@msg: Various user related information.
+ *	@len: Length of data to transmit.
+ *
+ *	Transmit data provided by the socket user.
+ *	Returns non-negative upon success, negative otherwise.
+ */
+static int llc_ui_sendmsg(struct kiocb *iocb, struct socket *sock,
+			  struct msghdr *msg, size_t len)
+{
+	struct sock *sk = sock->sk;
+	struct llc_sock *llc = llc_sk(sk);
+	struct sockaddr_llc *addr = (struct sockaddr_llc *)msg->msg_name;
+	int flags = msg->msg_flags;
+	int noblock = flags & MSG_DONTWAIT;
+	struct net_device *dev;
+	struct sk_buff *skb;
+	size_t size = 0;
+	int rc = -EINVAL, copied = 0, hdrlen;
+
+	dprintk("%s: sending from %02X to %02X\n", __FUNCTION__,
+		llc->laddr.lsap, llc->daddr.lsap);
+	lock_sock(sk);
+	if (addr) {
+		if (msg->msg_namelen < sizeof(*addr))
+			goto release;
+	} else {
+		if (llc_ui_addr_null(&llc->addr))
+			goto release;
+		addr = &llc->addr;
+	}
+	/* must bind connection to sap if user hasn't done it. */
+	if (sock_flag(sk, SOCK_ZAPPED)) {
+		/* bind to sap with null dev, exclusive. */
+		rc = llc_ui_autobind(sock, addr);
+		if (rc)
+			goto release;
+	}
+	dev = llc->dev;
+	hdrlen = dev->hard_header_len + llc_ui_header_len(sk, addr);
+	size = hdrlen + len;
+	if (size > dev->mtu)
+		size = dev->mtu;
+	copied = size - hdrlen;
+	release_sock(sk);
+	skb = sock_alloc_send_skb(sk, size, noblock, &rc);
+	lock_sock(sk);
+	if (!skb)
+		goto release;
+	skb->sk	      = sk;
+	skb->dev      = dev;
+	skb->protocol = llc_proto_type(addr->sllc_arphrd);
+	skb_reserve(skb, hdrlen); 
+	rc = memcpy_fromiovec(skb_put(skb, copied), msg->msg_iov, copied);
+	if (rc)
+		goto out;
+	if (sk->sk_type == SOCK_DGRAM || addr->sllc_ua) {
+		llc_build_and_send_ui_pkt(llc->sap, skb, addr->sllc_mac,
+					  addr->sllc_sap);
+		goto out;
+	}
+	if (addr->sllc_test) {
+		llc_build_and_send_test_pkt(llc->sap, skb, addr->sllc_mac,
+					    addr->sllc_sap);
+		goto out;
+	}
+	if (addr->sllc_xid) {
+		llc_build_and_send_xid_pkt(llc->sap, skb, addr->sllc_mac,
+					   addr->sllc_sap);
+		goto out;
+	}
+	rc = -ENOPROTOOPT;
+	if (!(sk->sk_type == SOCK_STREAM && !addr->sllc_ua))
+		goto out;
+	rc = llc_ui_send_data(sk, skb, noblock);
+	if (rc)
+		dprintk("%s: llc_ui_send_data failed: %d\n", __FUNCTION__, rc);
+out:
+	if (rc)
+		kfree_skb(skb);
+release:
+	if (rc)
+		dprintk("%s: failed sending from %02X to %02X: %d\n",
+			__FUNCTION__, llc->laddr.lsap, llc->daddr.lsap, rc);
+	release_sock(sk);
+	return rc ? : copied;
+}
+
+/**
+ *	llc_ui_getname - return the address info of a socket
+ *	@sock: Socket to get address of.
+ *	@uaddr: Address structure to return information.
+ *	@uaddrlen: Length of address structure.
+ *	@peer: Does user want local or remote address information.
+ *
+ *	Return the address information of a socket.
+ */
+static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr,
+			  int *uaddrlen, int peer)
+{
+	struct sockaddr_llc sllc;
+	struct sock *sk = sock->sk;
+	struct llc_sock *llc = llc_sk(sk);
+	int rc = 0;
+
+	lock_sock(sk);
+	if (sock_flag(sk, SOCK_ZAPPED))
+		goto out;
+	*uaddrlen = sizeof(sllc);
+	memset(uaddr, 0, *uaddrlen);
+	if (peer) {
+		rc = -ENOTCONN;
+		if (sk->sk_state != TCP_ESTABLISHED)
+			goto out;
+		if(llc->dev)
+			sllc.sllc_arphrd = llc->dev->type;
+		sllc.sllc_sap = llc->daddr.lsap;
+		memcpy(&sllc.sllc_mac, &llc->daddr.mac, IFHWADDRLEN);
+	} else {
+		rc = -EINVAL;
+		if (!llc->sap)
+			goto out;
+		sllc.sllc_sap = llc->sap->laddr.lsap;
+
+		if (llc->dev) {
+			sllc.sllc_arphrd = llc->dev->type;
+			memcpy(&sllc.sllc_mac, &llc->dev->dev_addr,
+			       IFHWADDRLEN);
+		}
+	}
+	rc = 0;
+	sllc.sllc_family = AF_LLC;
+	memcpy(uaddr, &sllc, sizeof(sllc));
+out:
+	release_sock(sk);
+	return rc;
+}
+
+/**
+ *	llc_ui_ioctl - io controls for PF_LLC
+ *	@sock: Socket to get/set info
+ *	@cmd: command
+ *	@arg: optional argument for cmd
+ *
+ *	get/set info on llc sockets
+ */
+static int llc_ui_ioctl(struct socket *sock, unsigned int cmd,
+			unsigned long arg)
+{
+	return dev_ioctl(cmd, (void __user *)arg);
+}
+
+/**
+ *	llc_ui_setsockopt - set various connection specific parameters.
+ *	@sock: Socket to set options on.
+ *	@level: Socket level user is requesting operations on.
+ *	@optname: Operation name.
+ *	@optval User provided operation data.
+ *	@optlen: Length of optval.
+ *
+ *	Set various connection specific parameters.
+ */
+static int llc_ui_setsockopt(struct socket *sock, int level, int optname,
+			     char __user *optval, int optlen)
+{
+	struct sock *sk = sock->sk;
+	struct llc_sock *llc = llc_sk(sk);
+	int rc = -EINVAL, opt;
+
+	lock_sock(sk);
+	if (level != SOL_LLC || optlen != sizeof(int))
+		goto out;
+	rc = get_user(opt, (int __user *)optval);
+	if (rc)
+		goto out;
+	rc = -EINVAL;
+	switch (optname) {
+	case LLC_OPT_RETRY:
+		if (opt > LLC_OPT_MAX_RETRY)
+			goto out;
+		llc->n2 = opt;
+		break;
+	case LLC_OPT_SIZE:
+		if (opt > LLC_OPT_MAX_SIZE)
+			goto out;
+		llc->n1 = opt;
+		break;
+	case LLC_OPT_ACK_TMR_EXP:
+		if (opt > LLC_OPT_MAX_ACK_TMR_EXP)
+			goto out;
+		llc->ack_timer.expire = opt;
+		break;
+	case LLC_OPT_P_TMR_EXP:
+		if (opt > LLC_OPT_MAX_P_TMR_EXP)
+			goto out;
+		llc->pf_cycle_timer.expire = opt;
+		break;
+	case LLC_OPT_REJ_TMR_EXP:
+		if (opt > LLC_OPT_MAX_REJ_TMR_EXP)
+			goto out;
+		llc->rej_sent_timer.expire = opt;
+		break;
+	case LLC_OPT_BUSY_TMR_EXP:
+		if (opt > LLC_OPT_MAX_BUSY_TMR_EXP)
+			goto out;
+		llc->busy_state_timer.expire = opt;
+		break;
+	case LLC_OPT_TX_WIN:
+		if (opt > LLC_OPT_MAX_WIN)
+			goto out;
+		llc->k = opt;
+		break;
+	case LLC_OPT_RX_WIN:
+		if (opt > LLC_OPT_MAX_WIN)
+			goto out;
+		llc->rw = opt;
+		break;
+	default:
+		rc = -ENOPROTOOPT;
+		goto out;
+	}
+	rc = 0;
+out:
+	release_sock(sk);
+	return rc;
+}
+
+/**
+ *	llc_ui_getsockopt - get connection specific socket info
+ *	@sock: Socket to get information from.
+ *	@level: Socket level user is requesting operations on.
+ *	@optname: Operation name.
+ *	@optval: Variable to return operation data in.
+ *	@optlen: Length of optval.
+ *
+ *	Get connection specific socket information.
+ */
+static int llc_ui_getsockopt(struct socket *sock, int level, int optname,
+			     char __user *optval, int __user *optlen)
+{
+	struct sock *sk = sock->sk;
+	struct llc_sock *llc = llc_sk(sk);
+	int val = 0, len = 0, rc = -EINVAL;
+
+	lock_sock(sk);
+	if (level != SOL_LLC)
+		goto out;
+	rc = get_user(len, optlen);
+	if (rc)
+		goto out;
+	rc = -EINVAL;
+	if (len != sizeof(int))
+		goto out;
+	switch (optname) {
+	case LLC_OPT_RETRY:
+		val = llc->n2;				break;
+	case LLC_OPT_SIZE:
+		val = llc->n1;				break;
+	case LLC_OPT_ACK_TMR_EXP:
+		val = llc->ack_timer.expire;		break;
+	case LLC_OPT_P_TMR_EXP:
+		val = llc->pf_cycle_timer.expire;	break;
+	case LLC_OPT_REJ_TMR_EXP:
+		val = llc->rej_sent_timer.expire;	break;
+	case LLC_OPT_BUSY_TMR_EXP:
+		val = llc->busy_state_timer.expire;	break;
+	case LLC_OPT_TX_WIN:
+		val = llc->k;				break;
+	case LLC_OPT_RX_WIN:
+		val = llc->rw;				break;
+	default:
+		rc = -ENOPROTOOPT;
+		goto out;
+	}
+	rc = 0;
+	if (put_user(len, optlen) || copy_to_user(optval, &val, len))
+		rc = -EFAULT;
+out:
+	release_sock(sk);
+	return rc;
+}
+
+static struct net_proto_family llc_ui_family_ops = {
+	.family = PF_LLC,
+	.create = llc_ui_create,
+	.owner	= THIS_MODULE,
+};
+
+static struct proto_ops llc_ui_ops = {
+	.family	     = PF_LLC,
+	.owner       = THIS_MODULE,
+	.release     = llc_ui_release,
+	.bind	     = llc_ui_bind,
+	.connect     = llc_ui_connect,
+	.socketpair  = sock_no_socketpair,
+	.accept      = llc_ui_accept,
+	.getname     = llc_ui_getname,
+	.poll	     = datagram_poll,
+	.ioctl       = llc_ui_ioctl,
+	.listen      = llc_ui_listen,
+	.shutdown    = llc_ui_shutdown,
+	.setsockopt  = llc_ui_setsockopt,
+	.getsockopt  = llc_ui_getsockopt,
+	.sendmsg     = llc_ui_sendmsg,
+	.recvmsg     = llc_ui_recvmsg,
+	.mmap	     = sock_no_mmap,
+	.sendpage    = sock_no_sendpage,
+};
+
+extern void llc_sap_handler(struct llc_sap *sap, struct sk_buff *skb);
+extern void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb);
+
+static int __init llc2_init(void)
+{
+	int rc = proto_register(&llc_proto, 0);
+
+	if (rc != 0)
+		goto out;
+
+	llc_build_offset_table();
+	llc_station_init();
+	llc_ui_sap_last_autoport = LLC_SAP_DYN_START;
+	rc = llc_proc_init();
+	if (rc != 0)
+		goto out_unregister_llc_proto;
+	sock_register(&llc_ui_family_ops);
+	llc_add_pack(LLC_DEST_SAP, llc_sap_handler);
+	llc_add_pack(LLC_DEST_CONN, llc_conn_handler);
+out:
+	return rc;
+out_unregister_llc_proto:
+	proto_unregister(&llc_proto);
+	goto out;
+}
+
+static void __exit llc2_exit(void)
+{
+	llc_station_exit();
+	llc_remove_pack(LLC_DEST_SAP);
+	llc_remove_pack(LLC_DEST_CONN);
+	sock_unregister(PF_LLC);
+	llc_proc_exit();
+	proto_unregister(&llc_proto);
+}
+
+module_init(llc2_init);
+module_exit(llc2_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Procom 1997, Jay Schullist 2001, Arnaldo C. Melo 2001-2003");
+MODULE_DESCRIPTION("IEEE 802.2 PF_LLC support");
+MODULE_ALIAS_NETPROTO(PF_LLC);
diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c
new file mode 100644
index 00000000000..b218be4c10e
--- /dev/null
+++ b/net/llc/llc_c_ac.c
@@ -0,0 +1,1514 @@
+/*
+ * llc_c_ac.c - actions performed during connection state transition.
+ *
+ * Description:
+ *   Functions in this module are implementation of connection component actions
+ *   Details of actions can be found in IEEE-802.2 standard document.
+ *   All functions have one connection and one event as input argument. All of
+ *   them return 0 On success and 1 otherwise.
+ *
+ * Copyright (c) 1997 by Procom Technology, Inc.
+ * 		 2001-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * This program can be redistributed or modified under the terms of the
+ * GNU General Public License as published by the Free Software Foundation.
+ * This program is distributed without any warranty or implied warranty
+ * of merchantability or fitness for a particular purpose.
+ *
+ * See the GNU General Public License for more details.
+ */
+#include <linux/netdevice.h>
+#include <net/llc_conn.h>
+#include <net/llc_sap.h>
+#include <net/sock.h>
+#include <net/llc_c_ev.h>
+#include <net/llc_c_ac.h>
+#include <net/llc_c_st.h>
+#include <net/llc_pdu.h>
+#include <net/llc.h>
+
+#include "llc_output.h"
+
+static int llc_conn_ac_inc_vs_by_1(struct sock *sk, struct sk_buff *skb);
+static void llc_process_tmr_ev(struct sock *sk, struct sk_buff *skb);
+static int llc_conn_ac_data_confirm(struct sock *sk, struct sk_buff *ev);
+
+static int llc_conn_ac_inc_npta_value(struct sock *sk, struct sk_buff *skb);
+
+static int llc_conn_ac_send_rr_rsp_f_set_ackpf(struct sock *sk,
+					       struct sk_buff *skb);
+
+static int llc_conn_ac_set_p_flag_1(struct sock *sk, struct sk_buff *skb);
+
+#define INCORRECT 0
+
+int llc_conn_ac_clear_remote_busy(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_sock *llc = llc_sk(sk);
+
+	if (llc->remote_busy_flag) {
+		u8 nr;
+		struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+
+		llc->remote_busy_flag = 0;
+		del_timer(&llc->busy_state_timer.timer);
+		nr = LLC_I_GET_NR(pdu);
+		llc_conn_resend_i_pdu_as_cmd(sk, nr, 0);
+	}
+	return 0;
+}
+
+int llc_conn_ac_conn_ind(struct sock *sk, struct sk_buff *skb)
+{
+	int rc = -ENOTCONN;
+	u8 dsap;
+	struct llc_sap *sap;
+
+	llc_pdu_decode_dsap(skb, &dsap);
+	sap = llc_sap_find(dsap);
+	if (sap) {
+		struct llc_conn_state_ev *ev = llc_conn_ev(skb);
+		struct llc_sock *llc = llc_sk(sk);
+
+		llc_pdu_decode_sa(skb, llc->daddr.mac);
+		llc_pdu_decode_da(skb, llc->laddr.mac);
+		llc->dev = skb->dev;
+		ev->ind_prim = LLC_CONN_PRIM;
+		rc = 0;
+	}
+	return rc;
+}
+
+int llc_conn_ac_conn_confirm(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_conn_state_ev *ev = llc_conn_ev(skb);
+
+	ev->cfm_prim = LLC_CONN_PRIM;
+	return 0;
+}
+
+static int llc_conn_ac_data_confirm(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_conn_state_ev *ev = llc_conn_ev(skb);
+
+	ev->cfm_prim = LLC_DATA_PRIM;
+	return 0;
+}
+
+int llc_conn_ac_data_ind(struct sock *sk, struct sk_buff *skb)
+{
+	llc_conn_rtn_pdu(sk, skb);
+	return 0;
+}
+
+int llc_conn_ac_disc_ind(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_conn_state_ev *ev = llc_conn_ev(skb);
+	u8 reason = 0;
+	int rc = 0;
+
+	if (ev->type == LLC_CONN_EV_TYPE_PDU) {
+		struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb);
+
+		if (LLC_PDU_IS_RSP(pdu) &&
+		    LLC_PDU_TYPE_IS_U(pdu) &&
+		    LLC_U_PDU_RSP(pdu) == LLC_2_PDU_RSP_DM)
+			reason = LLC_DISC_REASON_RX_DM_RSP_PDU;
+		else if (LLC_PDU_IS_CMD(pdu) &&
+			   LLC_PDU_TYPE_IS_U(pdu) &&
+			   LLC_U_PDU_CMD(pdu) == LLC_2_PDU_CMD_DISC)
+			reason = LLC_DISC_REASON_RX_DISC_CMD_PDU;
+	} else if (ev->type == LLC_CONN_EV_TYPE_ACK_TMR)
+		reason = LLC_DISC_REASON_ACK_TMR_EXP;
+	else {
+		reason = 0;
+		rc = -EINVAL;
+	}
+	if (!rc) {
+		ev->reason   = reason;
+		ev->ind_prim = LLC_DISC_PRIM;
+	}
+	return rc;
+}
+
+int llc_conn_ac_disc_confirm(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_conn_state_ev *ev = llc_conn_ev(skb);
+
+	ev->reason   = ev->status;
+	ev->cfm_prim = LLC_DISC_PRIM;
+	return 0;
+}
+
+int llc_conn_ac_rst_ind(struct sock *sk, struct sk_buff *skb)
+{
+	u8 reason = 0;
+	int rc = 1;
+	struct llc_conn_state_ev *ev = llc_conn_ev(skb);
+	struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb);
+	struct llc_sock *llc = llc_sk(sk);
+
+	switch (ev->type) {
+	case LLC_CONN_EV_TYPE_PDU:
+		if (LLC_PDU_IS_RSP(pdu) &&
+		    LLC_PDU_TYPE_IS_U(pdu) &&
+		    LLC_U_PDU_RSP(pdu) == LLC_2_PDU_RSP_FRMR) {
+			reason = LLC_RESET_REASON_LOCAL;
+			rc = 0;
+		} else if (LLC_PDU_IS_CMD(pdu) &&
+			   LLC_PDU_TYPE_IS_U(pdu) &&
+			   LLC_U_PDU_CMD(pdu) == LLC_2_PDU_CMD_SABME) {
+			reason = LLC_RESET_REASON_REMOTE;
+			rc = 0;
+		} else {
+			reason = 0;
+			rc  = 1;
+		}
+		break;
+	case LLC_CONN_EV_TYPE_ACK_TMR:
+	case LLC_CONN_EV_TYPE_P_TMR:
+	case LLC_CONN_EV_TYPE_REJ_TMR:
+	case LLC_CONN_EV_TYPE_BUSY_TMR:
+		if (llc->retry_count > llc->n2) {
+			reason = LLC_RESET_REASON_LOCAL;
+			rc = 0;
+		} else
+			rc = 1;
+		break;
+	}
+	if (!rc) {
+		ev->reason   = reason;
+		ev->ind_prim = LLC_RESET_PRIM;
+	}
+	return rc;
+}
+
+int llc_conn_ac_rst_confirm(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_conn_state_ev *ev = llc_conn_ev(skb);
+
+	ev->reason   = 0;
+	ev->cfm_prim = LLC_RESET_PRIM;
+	return 0;
+}
+
+int llc_conn_ac_clear_remote_busy_if_f_eq_1(struct sock *sk,
+					    struct sk_buff *skb)
+{
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+
+	if (LLC_PDU_IS_RSP(pdu) &&
+	    LLC_PDU_TYPE_IS_I(pdu) &&
+	    LLC_I_PF_IS_1(pdu) && llc_sk(sk)->ack_pf)
+		llc_conn_ac_clear_remote_busy(sk, skb);
+	return 0;
+}
+
+int llc_conn_ac_stop_rej_tmr_if_data_flag_eq_2(struct sock *sk,
+					       struct sk_buff *skb)
+{
+	struct llc_sock *llc = llc_sk(sk);
+
+	if (llc->data_flag == 2)
+		del_timer(&llc->rej_sent_timer.timer);
+	return 0;
+}
+
+int llc_conn_ac_send_disc_cmd_p_set_x(struct sock *sk, struct sk_buff *skb)
+{
+	int rc = -ENOBUFS;
+	struct sk_buff *nskb = llc_alloc_frame();
+
+	if (nskb) {
+		struct llc_sock *llc = llc_sk(sk);
+		struct llc_sap *sap = llc->sap;
+
+		nskb->dev = llc->dev;
+		llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap,
+				    llc->daddr.lsap, LLC_PDU_CMD);
+		llc_pdu_init_as_disc_cmd(nskb, 1);
+		rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac);
+		if (rc)
+			goto free;
+		llc_conn_send_pdu(sk, nskb);
+		llc_conn_ac_set_p_flag_1(sk, skb);
+	}
+out:
+	return rc;
+free:
+	kfree_skb(nskb);
+	goto out;
+}
+
+int llc_conn_ac_send_dm_rsp_f_set_p(struct sock *sk, struct sk_buff *skb)
+{
+	int rc = -ENOBUFS;
+	struct sk_buff *nskb = llc_alloc_frame();
+
+	if (nskb) {
+		struct llc_sock *llc = llc_sk(sk);
+		struct llc_sap *sap = llc->sap;
+		u8 f_bit;
+
+		nskb->dev = llc->dev;
+		llc_pdu_decode_pf_bit(skb, &f_bit);
+		llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap,
+				    llc->daddr.lsap, LLC_PDU_RSP);
+		llc_pdu_init_as_dm_rsp(nskb, f_bit);
+		rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac);
+		if (rc)
+			goto free;
+		llc_conn_send_pdu(sk, nskb);
+	}
+out:
+	return rc;
+free:
+	kfree_skb(nskb);
+	goto out;
+}
+
+int llc_conn_ac_send_dm_rsp_f_set_1(struct sock *sk, struct sk_buff *skb)
+{
+	int rc = -ENOBUFS;
+	struct sk_buff *nskb = llc_alloc_frame();
+
+	if (nskb) {
+		struct llc_sock *llc = llc_sk(sk);
+		struct llc_sap *sap = llc->sap;
+		u8 f_bit = 1;
+
+		nskb->dev = llc->dev;
+		llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap,
+				    llc->daddr.lsap, LLC_PDU_RSP);
+		llc_pdu_init_as_dm_rsp(nskb, f_bit);
+		rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac);
+		if (rc)
+			goto free;
+		llc_conn_send_pdu(sk, nskb);
+	}
+out:
+	return rc;
+free:
+	kfree_skb(nskb);
+	goto out;
+}
+
+int llc_conn_ac_send_frmr_rsp_f_set_x(struct sock *sk, struct sk_buff *skb)
+{
+	u8 f_bit;
+	int rc = -ENOBUFS;
+	struct sk_buff *nskb;
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+	struct llc_sock *llc = llc_sk(sk);
+
+	llc->rx_pdu_hdr = *((u32 *)pdu);
+	if (LLC_PDU_IS_CMD(pdu))
+		llc_pdu_decode_pf_bit(skb, &f_bit);
+	else
+		f_bit = 0;
+	nskb = llc_alloc_frame();
+	if (nskb) {
+		struct llc_sap *sap = llc->sap;
+
+		nskb->dev = llc->dev;
+		llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap,
+				    llc->daddr.lsap, LLC_PDU_RSP);
+		llc_pdu_init_as_frmr_rsp(nskb, pdu, f_bit, llc->vS,
+					 llc->vR, INCORRECT);
+		rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac);
+		if (rc)
+			goto free;
+		llc_conn_send_pdu(sk, nskb);
+	}
+out:
+	return rc;
+free:
+	kfree_skb(nskb);
+	goto out;
+}
+
+int llc_conn_ac_resend_frmr_rsp_f_set_0(struct sock *sk, struct sk_buff *skb)
+{
+	int rc = -ENOBUFS;
+	struct sk_buff *nskb = llc_alloc_frame();
+
+	if (nskb) {
+		u8 f_bit = 0;
+		struct llc_sock *llc = llc_sk(sk);
+		struct llc_sap *sap = llc->sap;
+		struct llc_pdu_sn *pdu = (struct llc_pdu_sn *)&llc->rx_pdu_hdr;
+
+		nskb->dev = llc->dev;
+		llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap,
+				    llc->daddr.lsap, LLC_PDU_RSP);
+		llc_pdu_init_as_frmr_rsp(nskb, pdu, f_bit, llc->vS,
+					 llc->vR, INCORRECT);
+		rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac);
+		if (rc)
+			goto free;
+		llc_conn_send_pdu(sk, nskb);
+	}
+out:
+	return rc;
+free:
+	kfree_skb(nskb);
+	goto out;
+}
+
+int llc_conn_ac_resend_frmr_rsp_f_set_p(struct sock *sk, struct sk_buff *skb)
+{
+	u8 f_bit;
+	int rc = -ENOBUFS;
+	struct sk_buff *nskb;
+
+	llc_pdu_decode_pf_bit(skb, &f_bit);
+	nskb = llc_alloc_frame();
+	if (nskb) {
+		struct llc_sock *llc = llc_sk(sk);
+		struct llc_sap *sap = llc->sap;
+		struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+
+		nskb->dev = llc->dev;
+		llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap,
+				    llc->daddr.lsap, LLC_PDU_RSP);
+		llc_pdu_init_as_frmr_rsp(nskb, pdu, f_bit, llc->vS,
+					 llc->vR, INCORRECT);
+		rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac);
+		if (rc)
+			goto free;
+		llc_conn_send_pdu(sk, nskb);
+	}
+out:
+	return rc;
+free:
+	kfree_skb(nskb);
+	goto out;
+}
+
+int llc_conn_ac_send_i_cmd_p_set_1(struct sock *sk, struct sk_buff *skb)
+{
+	int rc;
+	struct llc_sock *llc = llc_sk(sk);
+	struct llc_sap *sap = llc->sap;
+
+	llc_pdu_header_init(skb, LLC_PDU_TYPE_I, sap->laddr.lsap,
+			    llc->daddr.lsap, LLC_PDU_CMD);
+	llc_pdu_init_as_i_cmd(skb, 1, llc->vS, llc->vR);
+	rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac);
+	if (!rc) {
+		llc_conn_send_pdu(sk, skb);
+		llc_conn_ac_inc_vs_by_1(sk, skb);
+	}
+	return rc;
+}
+
+static int llc_conn_ac_send_i_cmd_p_set_0(struct sock *sk, struct sk_buff *skb)
+{
+	int rc;
+	struct llc_sock *llc = llc_sk(sk);
+	struct llc_sap *sap = llc->sap;
+
+	llc_pdu_header_init(skb, LLC_PDU_TYPE_I, sap->laddr.lsap,
+			    llc->daddr.lsap, LLC_PDU_CMD);
+	llc_pdu_init_as_i_cmd(skb, 0, llc->vS, llc->vR);
+	rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac);
+	if (!rc) {
+		llc_conn_send_pdu(sk, skb);
+		llc_conn_ac_inc_vs_by_1(sk, skb);
+	}
+	return rc;
+}
+
+int llc_conn_ac_send_i_xxx_x_set_0(struct sock *sk, struct sk_buff *skb)
+{
+	int rc;
+	struct llc_sock *llc = llc_sk(sk);
+	struct llc_sap *sap = llc->sap;
+
+	llc_pdu_header_init(skb, LLC_PDU_TYPE_I, sap->laddr.lsap,
+			    llc->daddr.lsap, LLC_PDU_CMD);
+	llc_pdu_init_as_i_cmd(skb, 0, llc->vS, llc->vR);
+	rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac);
+	if (!rc) {
+		llc_conn_send_pdu(sk, skb);
+		llc_conn_ac_inc_vs_by_1(sk, skb);
+	}
+	return 0;
+}
+
+int llc_conn_ac_resend_i_xxx_x_set_0(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+	u8 nr = LLC_I_GET_NR(pdu);
+
+	llc_conn_resend_i_pdu_as_cmd(sk, nr, 0);
+	return 0;
+}
+
+int llc_conn_ac_resend_i_xxx_x_set_0_or_send_rr(struct sock *sk,
+						struct sk_buff *skb)
+{
+	u8 nr;
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+	int rc = -ENOBUFS;
+	struct sk_buff *nskb = llc_alloc_frame();
+
+	if (nskb) {
+		struct llc_sock *llc = llc_sk(sk);
+		struct llc_sap *sap = llc->sap;
+
+		nskb->dev = llc->dev;
+		llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap,
+				    llc->daddr.lsap, LLC_PDU_RSP);
+		llc_pdu_init_as_rr_rsp(nskb, 0, llc->vR);
+		rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac);
+		if (!rc)
+			llc_conn_send_pdu(sk, nskb);
+		else
+			kfree_skb(skb);
+	}
+	if (rc) {
+		nr = LLC_I_GET_NR(pdu);
+		rc = 0;
+		llc_conn_resend_i_pdu_as_cmd(sk, nr, 0);
+	}
+	return rc;
+}
+
+int llc_conn_ac_resend_i_rsp_f_set_1(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+	u8 nr = LLC_I_GET_NR(pdu);
+
+	llc_conn_resend_i_pdu_as_rsp(sk, nr, 1);
+	return 0;
+}
+
+int llc_conn_ac_send_rej_cmd_p_set_1(struct sock *sk, struct sk_buff *skb)
+{
+	int rc = -ENOBUFS;
+	struct sk_buff *nskb = llc_alloc_frame();
+
+	if (nskb) {
+		struct llc_sock *llc = llc_sk(sk);
+		struct llc_sap *sap = llc->sap;
+
+		nskb->dev = llc->dev;
+		llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap,
+				    llc->daddr.lsap, LLC_PDU_CMD);
+		llc_pdu_init_as_rej_cmd(nskb, 1, llc->vR);
+		rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac);
+		if (rc)
+			goto free;
+		llc_conn_send_pdu(sk, nskb);
+	}
+out:
+	return rc;
+free:
+	kfree_skb(nskb);
+	goto out;
+}
+
+int llc_conn_ac_send_rej_rsp_f_set_1(struct sock *sk, struct sk_buff *skb)
+{
+	int rc = -ENOBUFS;
+	struct sk_buff *nskb = llc_alloc_frame();
+
+	if (nskb) {
+		u8 f_bit = 1;
+		struct llc_sock *llc = llc_sk(sk);
+		struct llc_sap *sap = llc->sap;
+
+		nskb->dev = llc->dev;
+		llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap,
+				    llc->daddr.lsap, LLC_PDU_RSP);
+		llc_pdu_init_as_rej_rsp(nskb, f_bit, llc->vR);
+		rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac);
+		if (rc)
+			goto free;
+		llc_conn_send_pdu(sk, nskb);
+	}
+out:
+	return rc;
+free:
+	kfree_skb(nskb);
+	goto out;
+}
+
+int llc_conn_ac_send_rej_xxx_x_set_0(struct sock *sk, struct sk_buff *skb)
+{
+	int rc = -ENOBUFS;
+	struct sk_buff *nskb = llc_alloc_frame();
+
+	if (nskb) {
+		struct llc_sock *llc = llc_sk(sk);
+		struct llc_sap *sap = llc->sap;
+		u8 f_bit = 0;
+
+		nskb->dev = llc->dev;
+		llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap,
+				    llc->daddr.lsap, LLC_PDU_RSP);
+		llc_pdu_init_as_rej_rsp(nskb, f_bit, llc->vR);
+		rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac);
+		if (rc)
+			goto free;
+		llc_conn_send_pdu(sk, nskb);
+	}
+out:
+	return rc;
+free:
+	kfree_skb(nskb);
+	goto out;
+}
+
+int llc_conn_ac_send_rnr_cmd_p_set_1(struct sock *sk, struct sk_buff *skb)
+{
+	int rc = -ENOBUFS;
+	struct sk_buff *nskb = llc_alloc_frame();
+
+	if (nskb) {
+		struct llc_sock *llc = llc_sk(sk);
+		struct llc_sap *sap = llc->sap;
+
+		nskb->dev = llc->dev;
+		llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap,
+				    llc->daddr.lsap, LLC_PDU_CMD);
+		llc_pdu_init_as_rnr_cmd(nskb, 1, llc->vR);
+		rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac);
+		if (rc)
+			goto free;
+		llc_conn_send_pdu(sk, nskb);
+	}
+out:
+	return rc;
+free:
+	kfree_skb(nskb);
+	goto out;
+}
+
+int llc_conn_ac_send_rnr_rsp_f_set_1(struct sock *sk, struct sk_buff *skb)
+{
+	int rc = -ENOBUFS;
+	struct sk_buff *nskb = llc_alloc_frame();
+
+	if (nskb) {
+		struct llc_sock *llc = llc_sk(sk);
+		struct llc_sap *sap = llc->sap;
+		u8 f_bit = 1;
+
+		nskb->dev = llc->dev;
+		llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap,
+				    llc->daddr.lsap, LLC_PDU_RSP);
+		llc_pdu_init_as_rnr_rsp(nskb, f_bit, llc->vR);
+		rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac);
+		if (rc)
+			goto free;
+		llc_conn_send_pdu(sk, nskb);
+	}
+out:
+	return rc;
+free:
+	kfree_skb(nskb);
+	goto out;
+}
+
+int llc_conn_ac_send_rnr_xxx_x_set_0(struct sock *sk, struct sk_buff *skb)
+{
+	int rc = -ENOBUFS;
+	struct sk_buff *nskb = llc_alloc_frame();
+
+	if (nskb) {
+		u8 f_bit = 0;
+		struct llc_sock *llc = llc_sk(sk);
+		struct llc_sap *sap = llc->sap;
+
+		nskb->dev = llc->dev;
+		llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap,
+				    llc->daddr.lsap, LLC_PDU_RSP);
+		llc_pdu_init_as_rnr_rsp(nskb, f_bit, llc->vR);
+		rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac);
+		if (rc)
+			goto free;
+		llc_conn_send_pdu(sk, nskb);
+	}
+out:
+	return rc;
+free:
+	kfree_skb(nskb);
+	goto out;
+}
+
+int llc_conn_ac_set_remote_busy(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_sock *llc = llc_sk(sk);
+
+	if (!llc->remote_busy_flag) {
+		llc->remote_busy_flag = 1;
+		mod_timer(&llc->busy_state_timer.timer,
+			 jiffies + llc->busy_state_timer.expire * HZ);
+	}
+	return 0;
+}
+
+int llc_conn_ac_opt_send_rnr_xxx_x_set_0(struct sock *sk, struct sk_buff *skb)
+{
+	int rc = -ENOBUFS;
+	struct sk_buff *nskb = llc_alloc_frame();
+
+	if (nskb) {
+		struct llc_sock *llc = llc_sk(sk);
+		struct llc_sap *sap = llc->sap;
+
+		nskb->dev = llc->dev;
+		llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap,
+				    llc->daddr.lsap, LLC_PDU_RSP);
+		llc_pdu_init_as_rnr_rsp(nskb, 0, llc->vR);
+		rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac);
+		if (rc)
+			goto free;
+		llc_conn_send_pdu(sk, nskb);
+	}
+out:
+	return rc;
+free:
+	kfree_skb(nskb);
+	goto out;
+}
+
+int llc_conn_ac_send_rr_cmd_p_set_1(struct sock *sk, struct sk_buff *skb)
+{
+	int rc = -ENOBUFS;
+	struct sk_buff *nskb = llc_alloc_frame();
+
+	if (nskb) {
+		struct llc_sock *llc = llc_sk(sk);
+		struct llc_sap *sap = llc->sap;
+
+		nskb->dev = llc->dev;
+		llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap,
+				    llc->daddr.lsap, LLC_PDU_CMD);
+		llc_pdu_init_as_rr_cmd(nskb, 1, llc->vR);
+		rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac);
+		if (rc)
+			goto free;
+		llc_conn_send_pdu(sk, nskb);
+	}
+out:
+	return rc;
+free:
+	kfree_skb(nskb);
+	goto out;
+}
+
+int llc_conn_ac_send_rr_rsp_f_set_1(struct sock *sk, struct sk_buff *skb)
+{
+	int rc = -ENOBUFS;
+	struct sk_buff *nskb = llc_alloc_frame();
+
+	if (nskb) {
+		struct llc_sock *llc = llc_sk(sk);
+		struct llc_sap *sap = llc->sap;
+		u8 f_bit = 1;
+
+		nskb->dev = llc->dev;
+		llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap,
+				    llc->daddr.lsap, LLC_PDU_RSP);
+		llc_pdu_init_as_rr_rsp(nskb, f_bit, llc->vR);
+		rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac);
+		if (rc)
+			goto free;
+		llc_conn_send_pdu(sk, nskb);
+	}
+out:
+	return rc;
+free:
+	kfree_skb(nskb);
+	goto out;
+}
+
+int llc_conn_ac_send_ack_rsp_f_set_1(struct sock *sk, struct sk_buff *skb)
+{
+	int rc = -ENOBUFS;
+	struct sk_buff *nskb = llc_alloc_frame();
+
+	if (nskb) {
+		struct llc_sock *llc = llc_sk(sk);
+		struct llc_sap *sap = llc->sap;
+		u8 f_bit = 1;
+
+		nskb->dev = llc->dev;
+		llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap,
+				    llc->daddr.lsap, LLC_PDU_RSP);
+		llc_pdu_init_as_rr_rsp(nskb, f_bit, llc->vR);
+		rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac);
+		if (rc)
+			goto free;
+		llc_conn_send_pdu(sk, nskb);
+	}
+out:
+	return rc;
+free:
+	kfree_skb(nskb);
+	goto out;
+}
+
+int llc_conn_ac_send_rr_xxx_x_set_0(struct sock *sk, struct sk_buff *skb)
+{
+	int rc = -ENOBUFS;
+	struct sk_buff *nskb = llc_alloc_frame();
+
+	if (nskb) {
+		struct llc_sock *llc = llc_sk(sk);
+		struct llc_sap *sap = llc->sap;
+
+		nskb->dev = llc->dev;
+		llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap,
+				    llc->daddr.lsap, LLC_PDU_RSP);
+		llc_pdu_init_as_rr_rsp(nskb, 0, llc->vR);
+		rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac);
+		if (rc)
+			goto free;
+		llc_conn_send_pdu(sk, nskb);
+	}
+out:
+	return rc;
+free:
+	kfree_skb(nskb);
+	goto out;
+}
+
+int llc_conn_ac_send_ack_xxx_x_set_0(struct sock *sk, struct sk_buff *skb)
+{
+	int rc = -ENOBUFS;
+	struct sk_buff *nskb = llc_alloc_frame();
+
+	if (nskb) {
+		struct llc_sock *llc = llc_sk(sk);
+		struct llc_sap *sap = llc->sap;
+
+		nskb->dev = llc->dev;
+		llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap,
+				    llc->daddr.lsap, LLC_PDU_RSP);
+		llc_pdu_init_as_rr_rsp(nskb, 0, llc->vR);
+		rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac);
+		if (rc)
+			goto free;
+		llc_conn_send_pdu(sk, nskb);
+	}
+out:
+	return rc;
+free:
+	kfree_skb(nskb);
+	goto out;
+}
+
+void llc_conn_set_p_flag(struct sock *sk, u8 value)
+{
+	int state_changed = llc_sk(sk)->p_flag && !value;
+
+	llc_sk(sk)->p_flag = value;
+
+	if (state_changed)
+		sk->sk_state_change(sk);
+}
+
+int llc_conn_ac_send_sabme_cmd_p_set_x(struct sock *sk, struct sk_buff *skb)
+{
+	int rc = -ENOBUFS;
+	struct sk_buff *nskb = llc_alloc_frame();
+	struct llc_sock *llc = llc_sk(sk);
+
+	if (nskb) {
+		struct llc_sap *sap = llc->sap;
+		u8 *dmac = llc->daddr.mac;
+
+		if (llc->dev->flags & IFF_LOOPBACK)
+			dmac = llc->dev->dev_addr;
+		nskb->dev = llc->dev;
+		llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap,
+				    llc->daddr.lsap, LLC_PDU_CMD);
+		llc_pdu_init_as_sabme_cmd(nskb, 1);
+		rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, dmac);
+		if (rc)
+			goto free;
+		llc_conn_send_pdu(sk, nskb);
+		llc_conn_set_p_flag(sk, 1);
+	}
+out:
+	return rc;
+free:
+	kfree_skb(nskb);
+	goto out;
+}
+
+int llc_conn_ac_send_ua_rsp_f_set_p(struct sock *sk, struct sk_buff *skb)
+{
+	u8 f_bit;
+	int rc = -ENOBUFS;
+	struct sk_buff *nskb = llc_alloc_frame();
+
+	llc_pdu_decode_pf_bit(skb, &f_bit);
+	if (nskb) {
+		struct llc_sock *llc = llc_sk(sk);
+		struct llc_sap *sap = llc->sap;
+
+		nskb->dev = llc->dev;
+		llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap,
+				    llc->daddr.lsap, LLC_PDU_RSP);
+		llc_pdu_init_as_ua_rsp(nskb, f_bit);
+		rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac);
+		if (rc)
+			goto free;
+		llc_conn_send_pdu(sk, nskb);
+	}
+out:
+	return rc;
+free:
+	kfree_skb(nskb);
+	goto out;
+}
+
+int llc_conn_ac_set_s_flag_0(struct sock *sk, struct sk_buff *skb)
+{
+	llc_sk(sk)->s_flag = 0;
+	return 0;
+}
+
+int llc_conn_ac_set_s_flag_1(struct sock *sk, struct sk_buff *skb)
+{
+	llc_sk(sk)->s_flag = 1;
+	return 0;
+}
+
+int llc_conn_ac_start_p_timer(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_sock *llc = llc_sk(sk);
+
+	llc_conn_set_p_flag(sk, 1);
+	mod_timer(&llc->pf_cycle_timer.timer,
+		  jiffies + llc->pf_cycle_timer.expire * HZ);
+	return 0;
+}
+
+/**
+ *	llc_conn_ac_send_ack_if_needed - check if ack is needed
+ *	@sk: current connection structure
+ *	@skb: current event
+ *
+ *	Checks number of received PDUs which have not been acknowledged, yet,
+ *	If number of them reaches to "npta"(Number of PDUs To Acknowledge) then
+ *	sends an RR response as acknowledgement for them.  Returns 0 for
+ *	success, 1 otherwise.
+ */
+int llc_conn_ac_send_ack_if_needed(struct sock *sk, struct sk_buff *skb)
+{
+	u8 pf_bit;
+	struct llc_sock *llc = llc_sk(sk);
+
+	llc_pdu_decode_pf_bit(skb, &pf_bit);
+	llc->ack_pf |= pf_bit & 1;
+	if (!llc->ack_must_be_send) {
+		llc->first_pdu_Ns = llc->vR;
+		llc->ack_must_be_send = 1;
+		llc->ack_pf = pf_bit & 1;
+	}
+	if (((llc->vR - llc->first_pdu_Ns + 129) % 128) >= llc->npta) {
+		llc_conn_ac_send_rr_rsp_f_set_ackpf(sk, skb);
+		llc->ack_must_be_send	= 0;
+		llc->ack_pf		= 0;
+		llc_conn_ac_inc_npta_value(sk, skb);
+	}
+	return 0;
+}
+
+/**
+ *	llc_conn_ac_rst_sendack_flag - resets ack_must_be_send flag
+ *	@sk: current connection structure
+ *	@skb: current event
+ *
+ *	This action resets ack_must_be_send flag of given connection, this flag
+ *	indicates if there is any PDU which has not been acknowledged yet.
+ *	Returns 0 for success, 1 otherwise.
+ */
+int llc_conn_ac_rst_sendack_flag(struct sock *sk, struct sk_buff *skb)
+{
+	llc_sk(sk)->ack_must_be_send = llc_sk(sk)->ack_pf = 0;
+	return 0;
+}
+
+/**
+ *	llc_conn_ac_send_i_rsp_f_set_ackpf - acknowledge received PDUs
+ *	@sk: current connection structure
+ *	@skb: current event
+ *
+ *	Sends an I response PDU with f-bit set to ack_pf flag as acknowledge to
+ *	all received PDUs which have not been acknowledged, yet. ack_pf flag is
+ *	set to one if one PDU with p-bit set to one is received.  Returns 0 for
+ *	success, 1 otherwise.
+ */
+static int llc_conn_ac_send_i_rsp_f_set_ackpf(struct sock *sk,
+					      struct sk_buff *skb)
+{
+	int rc;
+	struct llc_sock *llc = llc_sk(sk);
+	struct llc_sap *sap = llc->sap;
+
+	llc_pdu_header_init(skb, LLC_PDU_TYPE_I, sap->laddr.lsap,
+			    llc->daddr.lsap, LLC_PDU_RSP);
+	llc_pdu_init_as_i_cmd(skb, llc->ack_pf, llc->vS, llc->vR);
+	rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac);
+	if (!rc) {
+		llc_conn_send_pdu(sk, skb);
+		llc_conn_ac_inc_vs_by_1(sk, skb);
+	}
+	return rc;
+}
+
+/**
+ *	llc_conn_ac_send_i_as_ack - sends an I-format PDU to acknowledge rx PDUs
+ *	@sk: current connection structure.
+ *	@skb: current event.
+ *
+ *	This action sends an I-format PDU as acknowledge to received PDUs which
+ *	have not been acknowledged, yet, if there is any. By using of this
+ *	action number of acknowledgements decreases, this technic is called
+ *	piggy backing. Returns 0 for success, 1 otherwise.
+ */
+int llc_conn_ac_send_i_as_ack(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_sock *llc = llc_sk(sk);
+
+	if (llc->ack_must_be_send) {
+		llc_conn_ac_send_i_rsp_f_set_ackpf(sk, skb);
+		llc->ack_must_be_send = 0 ;
+		llc->ack_pf = 0;
+	} else
+		llc_conn_ac_send_i_cmd_p_set_0(sk, skb);
+	return 0;
+}
+
+/**
+ *	llc_conn_ac_send_rr_rsp_f_set_ackpf - ack all rx PDUs not yet acked
+ *	@sk: current connection structure.
+ *	@skb: current event.
+ *
+ *	This action sends an RR response with f-bit set to ack_pf flag as
+ *	acknowledge to all received PDUs which have not been acknowledged, yet,
+ *	if there is any. ack_pf flag indicates if a PDU has been received with
+ *	p-bit set to one. Returns 0 for success, 1 otherwise.
+ */
+static int llc_conn_ac_send_rr_rsp_f_set_ackpf(struct sock *sk,
+					       struct sk_buff *skb)
+{
+	int rc = -ENOBUFS;
+	struct sk_buff *nskb = llc_alloc_frame();
+
+	if (nskb) {
+		struct llc_sock *llc = llc_sk(sk);
+		struct llc_sap *sap = llc->sap;
+
+		nskb->dev = llc->dev;
+		llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap,
+				    llc->daddr.lsap, LLC_PDU_RSP);
+		llc_pdu_init_as_rr_rsp(nskb, llc->ack_pf, llc->vR);
+		rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac);
+		if (rc)
+			goto free;
+		llc_conn_send_pdu(sk, nskb);
+	}
+out:
+	return rc;
+free:
+	kfree_skb(nskb);
+	goto out;
+}
+
+/**
+ *	llc_conn_ac_inc_npta_value - tries to make value of npta greater
+ *	@sk: current connection structure.
+ *	@skb: current event.
+ *
+ *	After "inc_cntr" times calling of this action, "npta" increase by one.
+ *	this action tries to make vale of "npta" greater as possible; number of
+ *	acknowledgements decreases by increasing of "npta". Returns 0 for
+ *	success, 1 otherwise.
+ */
+static int llc_conn_ac_inc_npta_value(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_sock *llc = llc_sk(sk);
+
+	if (!llc->inc_cntr) {
+		llc->dec_step = 0;
+		llc->dec_cntr = llc->inc_cntr = 2;
+		++llc->npta;
+		if (llc->npta > 127)
+			llc->npta = 127 ;
+	} else
+		--llc->inc_cntr;
+	return 0;
+}
+
+/**
+ *	llc_conn_ac_adjust_npta_by_rr - decreases "npta" by one
+ *	@sk: current connection structure.
+ *	@skb: current event.
+ *
+ *	After receiving "dec_cntr" times RR command, this action decreases
+ *	"npta" by one. Returns 0 for success, 1 otherwise.
+ */
+int llc_conn_ac_adjust_npta_by_rr(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_sock *llc = llc_sk(sk);
+
+	if (!llc->connect_step && !llc->remote_busy_flag) {
+		if (!llc->dec_step) {
+			if (!llc->dec_cntr) {
+				llc->inc_cntr = llc->dec_cntr = 2;
+				if (llc->npta > 0)
+					llc->npta = llc->npta - 1;
+			} else
+				llc->dec_cntr -=1;
+		}
+	} else
+		llc->connect_step = 0 ;
+	return 0;
+}
+
+/**
+ *	llc_conn_ac_adjust_npta_by_rnr - decreases "npta" by one
+ *	@sk: current connection structure.
+ *	@skb: current event.
+ *
+ *	After receiving "dec_cntr" times RNR command, this action decreases
+ *	"npta" by one. Returns 0 for success, 1 otherwise.
+ */
+int llc_conn_ac_adjust_npta_by_rnr(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_sock *llc = llc_sk(sk);
+
+	if (llc->remote_busy_flag)
+		if (!llc->dec_step) {
+			if (!llc->dec_cntr) {
+				llc->inc_cntr = llc->dec_cntr = 2;
+				if (llc->npta > 0)
+					--llc->npta;
+			} else
+				--llc->dec_cntr;
+		}
+	return 0;
+}
+
+/**
+ *	llc_conn_ac_dec_tx_win_size - decreases tx window size
+ *	@sk: current connection structure.
+ *	@skb: current event.
+ *
+ *	After receiving of a REJ command or response, transmit window size is
+ *	decreased by number of PDUs which are outstanding yet. Returns 0 for
+ *	success, 1 otherwise.
+ */
+int llc_conn_ac_dec_tx_win_size(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_sock *llc = llc_sk(sk);
+	u8 unacked_pdu = skb_queue_len(&llc->pdu_unack_q);
+
+	llc->k -= unacked_pdu;
+	if (llc->k < 2)
+		llc->k = 2;
+	return 0;
+}
+
+/**
+ *	llc_conn_ac_inc_tx_win_size - tx window size is inc by 1
+ *	@sk: current connection structure.
+ *	@skb: current event.
+ *
+ *	After receiving an RR response with f-bit set to one, transmit window
+ *	size is increased by one. Returns 0 for success, 1 otherwise.
+ */
+int llc_conn_ac_inc_tx_win_size(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_sock *llc = llc_sk(sk);
+
+	llc->k += 1;
+	if (llc->k > 128)
+		llc->k = 128 ;
+	return 0;
+}
+
+int llc_conn_ac_stop_all_timers(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_sock *llc = llc_sk(sk);
+
+	del_timer(&llc->pf_cycle_timer.timer);
+	del_timer(&llc->ack_timer.timer);
+	del_timer(&llc->rej_sent_timer.timer);
+	del_timer(&llc->busy_state_timer.timer);
+	llc->ack_must_be_send = 0;
+	llc->ack_pf = 0;
+	return 0;
+}
+
+int llc_conn_ac_stop_other_timers(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_sock *llc = llc_sk(sk);
+
+	del_timer(&llc->rej_sent_timer.timer);
+	del_timer(&llc->pf_cycle_timer.timer);
+	del_timer(&llc->busy_state_timer.timer);
+	llc->ack_must_be_send = 0;
+	llc->ack_pf = 0;
+	return 0;
+}
+
+int llc_conn_ac_start_ack_timer(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_sock *llc = llc_sk(sk);
+
+	mod_timer(&llc->ack_timer.timer, jiffies + llc->ack_timer.expire * HZ);
+	return 0;
+}
+
+int llc_conn_ac_start_rej_timer(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_sock *llc = llc_sk(sk);
+
+	mod_timer(&llc->rej_sent_timer.timer,
+		  jiffies + llc->rej_sent_timer.expire * HZ);
+	return 0;
+}
+
+int llc_conn_ac_start_ack_tmr_if_not_running(struct sock *sk,
+					     struct sk_buff *skb)
+{
+	struct llc_sock *llc = llc_sk(sk);
+
+	if (!timer_pending(&llc->ack_timer.timer))
+		mod_timer(&llc->ack_timer.timer,
+			  jiffies + llc->ack_timer.expire * HZ);
+	return 0;
+}
+
+int llc_conn_ac_stop_ack_timer(struct sock *sk, struct sk_buff *skb)
+{
+	del_timer(&llc_sk(sk)->ack_timer.timer);
+	return 0;
+}
+
+int llc_conn_ac_stop_p_timer(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_sock *llc = llc_sk(sk);
+
+	del_timer(&llc->pf_cycle_timer.timer);
+	llc_conn_set_p_flag(sk, 0);
+	return 0;
+}
+
+int llc_conn_ac_stop_rej_timer(struct sock *sk, struct sk_buff *skb)
+{
+	del_timer(&llc_sk(sk)->rej_sent_timer.timer);
+	return 0;
+}
+
+int llc_conn_ac_upd_nr_received(struct sock *sk, struct sk_buff *skb)
+{
+	int acked;
+	u16 unacked = 0;
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+	struct llc_sock *llc = llc_sk(sk);
+
+	llc->last_nr = PDU_SUPV_GET_Nr(pdu);
+	acked = llc_conn_remove_acked_pdus(sk, llc->last_nr, &unacked);
+	/* On loopback we don't queue I frames in unack_pdu_q queue. */
+	if (acked > 0 || (llc->dev->flags & IFF_LOOPBACK)) {
+		llc->retry_count = 0;
+		del_timer(&llc->ack_timer.timer);
+		if (llc->failed_data_req) {
+			/* already, we did not accept data from upper layer
+			 * (tx_window full or unacceptable state). Now, we
+			 * can send data and must inform to upper layer.
+			 */
+			llc->failed_data_req = 0;
+			llc_conn_ac_data_confirm(sk, skb);
+		}
+		if (unacked)
+			mod_timer(&llc->ack_timer.timer,
+				  jiffies + llc->ack_timer.expire * HZ);
+	} else if (llc->failed_data_req) {
+		u8 f_bit;
+
+		llc_pdu_decode_pf_bit(skb, &f_bit);
+		if (f_bit == 1) {
+			llc->failed_data_req = 0;
+			llc_conn_ac_data_confirm(sk, skb);
+		}
+	}
+	return 0;
+}
+
+int llc_conn_ac_upd_p_flag(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+
+	if (LLC_PDU_IS_RSP(pdu)) {
+		u8 f_bit;
+
+		llc_pdu_decode_pf_bit(skb, &f_bit);
+		if (f_bit) {
+			llc_conn_set_p_flag(sk, 0);
+			llc_conn_ac_stop_p_timer(sk, skb);
+		}
+	}
+	return 0;
+}
+
+int llc_conn_ac_set_data_flag_2(struct sock *sk, struct sk_buff *skb)
+{
+	llc_sk(sk)->data_flag = 2;
+	return 0;
+}
+
+int llc_conn_ac_set_data_flag_0(struct sock *sk, struct sk_buff *skb)
+{
+	llc_sk(sk)->data_flag = 0;
+	return 0;
+}
+
+int llc_conn_ac_set_data_flag_1(struct sock *sk, struct sk_buff *skb)
+{
+	llc_sk(sk)->data_flag = 1;
+	return 0;
+}
+
+int llc_conn_ac_set_data_flag_1_if_data_flag_eq_0(struct sock *sk,
+						  struct sk_buff *skb)
+{
+	if (!llc_sk(sk)->data_flag)
+		llc_sk(sk)->data_flag = 1;
+	return 0;
+}
+
+int llc_conn_ac_set_p_flag_0(struct sock *sk, struct sk_buff *skb)
+{
+	llc_conn_set_p_flag(sk, 0);
+	return 0;
+}
+
+static int llc_conn_ac_set_p_flag_1(struct sock *sk, struct sk_buff *skb)
+{
+	llc_conn_set_p_flag(sk, 1);
+	return 0;
+}
+
+int llc_conn_ac_set_remote_busy_0(struct sock *sk, struct sk_buff *skb)
+{
+	llc_sk(sk)->remote_busy_flag = 0;
+	return 0;
+}
+
+int llc_conn_ac_set_cause_flag_0(struct sock *sk, struct sk_buff *skb)
+{
+	llc_sk(sk)->cause_flag = 0;
+	return 0;
+}
+
+int llc_conn_ac_set_cause_flag_1(struct sock *sk, struct sk_buff *skb)
+{
+	llc_sk(sk)->cause_flag = 1;
+	return 0;
+}
+
+int llc_conn_ac_set_retry_cnt_0(struct sock *sk, struct sk_buff *skb)
+{
+	llc_sk(sk)->retry_count = 0;
+	return 0;
+}
+
+int llc_conn_ac_inc_retry_cnt_by_1(struct sock *sk, struct sk_buff *skb)
+{
+	llc_sk(sk)->retry_count++;
+	return 0;
+}
+
+int llc_conn_ac_set_vr_0(struct sock *sk, struct sk_buff *skb)
+{
+	llc_sk(sk)->vR = 0;
+	return 0;
+}
+
+int llc_conn_ac_inc_vr_by_1(struct sock *sk, struct sk_buff *skb)
+{
+	llc_sk(sk)->vR = PDU_GET_NEXT_Vr(llc_sk(sk)->vR);
+	return 0;
+}
+
+int llc_conn_ac_set_vs_0(struct sock *sk, struct sk_buff *skb)
+{
+	llc_sk(sk)->vS = 0;
+	return 0;
+}
+
+int llc_conn_ac_set_vs_nr(struct sock *sk, struct sk_buff *skb)
+{
+	llc_sk(sk)->vS = llc_sk(sk)->last_nr;
+	return 0;
+}
+
+int llc_conn_ac_inc_vs_by_1(struct sock *sk, struct sk_buff *skb)
+{
+	llc_sk(sk)->vS = (llc_sk(sk)->vS + 1) % 128;
+	return 0;
+}
+
+void llc_conn_pf_cycle_tmr_cb(unsigned long timeout_data)
+{
+	struct sock *sk = (struct sock *)timeout_data;
+	struct sk_buff *skb = alloc_skb(0, GFP_ATOMIC);
+
+	bh_lock_sock(sk);
+	if (skb) {
+		struct llc_conn_state_ev *ev = llc_conn_ev(skb);
+
+		skb->sk  = sk;
+		ev->type = LLC_CONN_EV_TYPE_P_TMR;
+		llc_process_tmr_ev(sk, skb);
+	}
+	bh_unlock_sock(sk);
+}
+
+void llc_conn_busy_tmr_cb(unsigned long timeout_data)
+{
+	struct sock *sk = (struct sock *)timeout_data;
+	struct sk_buff *skb = alloc_skb(0, GFP_ATOMIC);
+
+	bh_lock_sock(sk);
+	if (skb) {
+		struct llc_conn_state_ev *ev = llc_conn_ev(skb);
+
+		skb->sk  = sk;
+		ev->type = LLC_CONN_EV_TYPE_BUSY_TMR;
+		llc_process_tmr_ev(sk, skb);
+	}
+	bh_unlock_sock(sk);
+}
+
+void llc_conn_ack_tmr_cb(unsigned long timeout_data)
+{
+	struct sock* sk = (struct sock *)timeout_data;
+	struct sk_buff *skb = alloc_skb(0, GFP_ATOMIC);
+
+	bh_lock_sock(sk);
+	if (skb) {
+		struct llc_conn_state_ev *ev = llc_conn_ev(skb);
+
+		skb->sk  = sk;
+		ev->type = LLC_CONN_EV_TYPE_ACK_TMR;
+		llc_process_tmr_ev(sk, skb);
+	}
+	bh_unlock_sock(sk);
+}
+
+void llc_conn_rej_tmr_cb(unsigned long timeout_data)
+{
+	struct sock *sk = (struct sock *)timeout_data;
+	struct sk_buff *skb = alloc_skb(0, GFP_ATOMIC);
+
+	bh_lock_sock(sk);
+	if (skb) {
+		struct llc_conn_state_ev *ev = llc_conn_ev(skb);
+
+		skb->sk  = sk;
+		ev->type = LLC_CONN_EV_TYPE_REJ_TMR;
+		llc_process_tmr_ev(sk, skb);
+	}
+	bh_unlock_sock(sk);
+}
+
+int llc_conn_ac_rst_vs(struct sock *sk, struct sk_buff *skb)
+{
+	llc_sk(sk)->X = llc_sk(sk)->vS;
+	llc_conn_ac_set_vs_nr(sk, skb);
+	return 0;
+}
+
+int llc_conn_ac_upd_vs(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+	u8 nr = PDU_SUPV_GET_Nr(pdu);
+
+	if (llc_circular_between(llc_sk(sk)->vS, nr, llc_sk(sk)->X))
+		llc_conn_ac_set_vs_nr(sk, skb);
+	return 0;
+}
+
+/*
+ * Non-standard actions; these not contained in IEEE specification; for
+ * our own usage
+ */
+/**
+ *	llc_conn_disc - removes connection from SAP list and frees it
+ *	@sk: closed connection
+ *	@skb: occurred event
+ */
+int llc_conn_disc(struct sock *sk, struct sk_buff *skb)
+{
+	/* FIXME: this thing seems to want to die */
+	return 0;
+}
+
+/**
+ *	llc_conn_reset - resets connection
+ *	@sk : reseting connection.
+ *	@skb: occurred event.
+ *
+ *	Stop all timers, empty all queues and reset all flags.
+ */
+int llc_conn_reset(struct sock *sk, struct sk_buff *skb)
+{
+	llc_sk_reset(sk);
+	return 0;
+}
+
+/**
+ *	llc_circular_between - designates that b is between a and c or not
+ *	@a: lower bound
+ *	@b: element to see if is between a and b
+ *	@c: upper bound
+ *
+ *	This function designates that b is between a and c or not (for example,
+ *	0 is between 127 and 1). Returns 1 if b is between a and c, 0
+ *	otherwise.
+ */
+u8 llc_circular_between(u8 a, u8 b, u8 c)
+{
+	b = b - a;
+	c = c - a;
+	return b <= c;
+}
+
+/**
+ *	llc_process_tmr_ev - timer backend
+ *	@sk: active connection
+ *	@skb: occurred event
+ *
+ *	This function is called from timer callback functions. When connection
+ *	is busy (during sending a data frame) timer expiration event must be
+ *	queued. Otherwise this event can be sent to connection state machine.
+ *	Queued events will process by llc_backlog_rcv function after sending
+ *	data frame.
+ */
+static void llc_process_tmr_ev(struct sock *sk, struct sk_buff *skb)
+{
+	if (llc_sk(sk)->state == LLC_CONN_OUT_OF_SVC) {
+		printk(KERN_WARNING "%s: timer called on closed connection\n",
+		       __FUNCTION__);
+		kfree_skb(skb);
+	} else {
+		if (!sock_owned_by_user(sk))
+			llc_conn_state_process(sk, skb);
+		else {
+			llc_set_backlog_type(skb, LLC_EVENT);
+			sk_add_backlog(sk, skb);
+		}
+	}
+}
diff --git a/net/llc/llc_c_ev.c b/net/llc/llc_c_ev.c
new file mode 100644
index 00000000000..cd130c3b72b
--- /dev/null
+++ b/net/llc/llc_c_ev.c
@@ -0,0 +1,769 @@
+/*
+ * llc_c_ev.c - Connection component state transition event qualifiers
+ *
+ * A 'state' consists of a number of possible event matching functions,
+ * the actions associated with each being executed when that event is
+ * matched; a 'state machine' accepts events in a serial fashion from an
+ * event queue. Each event is passed to each successive event matching
+ * function until a match is made (the event matching function returns
+ * success, or '0') or the list of event matching functions is exhausted.
+ * If a match is made, the actions associated with the event are executed
+ * and the state is changed to that event's transition state. Before some
+ * events are recognized, even after a match has been made, a certain
+ * number of 'event qualifier' functions must also be executed. If these
+ * all execute successfully, then the event is finally executed.
+ *
+ * These event functions must return 0 for success, to show a matched
+ * event, of 1 if the event does not match. Event qualifier functions
+ * must return a 0 for success or a non-zero for failure. Each function
+ * is simply responsible for verifying one single thing and returning
+ * either a success or failure.
+ *
+ * All of followed event functions are described in 802.2 LLC Protocol
+ * standard document except two functions that we added that will explain
+ * in their comments, at below.
+ *
+ * Copyright (c) 1997 by Procom Technology, Inc.
+ * 		 2001-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * This program can be redistributed or modified under the terms of the
+ * GNU General Public License as published by the Free Software Foundation.
+ * This program is distributed without any warranty or implied warranty
+ * of merchantability or fitness for a particular purpose.
+ *
+ * See the GNU General Public License for more details.
+ */
+#include <linux/netdevice.h>
+#include <net/llc_conn.h>
+#include <net/llc_sap.h>
+#include <net/sock.h>
+#include <net/llc_c_ev.h>
+#include <net/llc_pdu.h>
+
+#if 1
+#define dprintk(args...) printk(KERN_DEBUG args)
+#else
+#define dprintk(args...)
+#endif
+
+extern u16 llc_circular_between(u8 a, u8 b, u8 c);
+
+/**
+ *	llc_util_ns_inside_rx_window - check if sequence number is in rx window
+ *	@ns: sequence number of received pdu.
+ *	@vr: sequence number which receiver expects to receive.
+ *	@rw: receive window size of receiver.
+ *
+ *	Checks if sequence number of received PDU is in range of receive
+ *	window. Returns 0 for success, 1 otherwise
+ */
+static u16 llc_util_ns_inside_rx_window(u8 ns, u8 vr, u8 rw)
+{
+	return !llc_circular_between(vr, ns,
+				     (vr + rw - 1) % LLC_2_SEQ_NBR_MODULO);
+}
+
+/**
+ *	llc_util_nr_inside_tx_window - check if sequence number is in tx window
+ *	@sk: current connection.
+ *	@nr: N(R) of received PDU.
+ *
+ *	This routine checks if N(R) of received PDU is in range of transmit
+ *	window; on the other hand checks if received PDU acknowledges some
+ *	outstanding PDUs that are in transmit window. Returns 0 for success, 1
+ *	otherwise.
+ */
+static u16 llc_util_nr_inside_tx_window(struct sock *sk, u8 nr)
+{
+	u8 nr1, nr2;
+	struct sk_buff *skb;
+	struct llc_pdu_sn *pdu;
+	struct llc_sock *llc = llc_sk(sk);
+	int rc = 0;
+
+	if (llc->dev->flags & IFF_LOOPBACK)
+		goto out;
+	rc = 1;
+	if (!skb_queue_len(&llc->pdu_unack_q))
+		goto out;
+	skb = skb_peek(&llc->pdu_unack_q);
+	pdu = llc_pdu_sn_hdr(skb);
+	nr1 = LLC_I_GET_NS(pdu);
+	skb = skb_peek_tail(&llc->pdu_unack_q);
+	pdu = llc_pdu_sn_hdr(skb);
+	nr2 = LLC_I_GET_NS(pdu);
+	rc = !llc_circular_between(nr1, nr, (nr2 + 1) % LLC_2_SEQ_NBR_MODULO);
+out:
+	return rc;
+}
+
+int llc_conn_ev_conn_req(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_conn_state_ev *ev = llc_conn_ev(skb);
+
+	return ev->prim == LLC_CONN_PRIM &&
+	       ev->prim_type == LLC_PRIM_TYPE_REQ ? 0 : 1;
+}
+
+int llc_conn_ev_data_req(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_conn_state_ev *ev = llc_conn_ev(skb);
+
+	return ev->prim == LLC_DATA_PRIM &&
+	       ev->prim_type == LLC_PRIM_TYPE_REQ ? 0 : 1;
+}
+
+int llc_conn_ev_disc_req(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_conn_state_ev *ev = llc_conn_ev(skb);
+
+	return ev->prim == LLC_DISC_PRIM &&
+	       ev->prim_type == LLC_PRIM_TYPE_REQ ? 0 : 1;
+}
+
+int llc_conn_ev_rst_req(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_conn_state_ev *ev = llc_conn_ev(skb);
+
+	return ev->prim == LLC_RESET_PRIM &&
+	       ev->prim_type == LLC_PRIM_TYPE_REQ ? 0 : 1;
+}
+
+int llc_conn_ev_local_busy_detected(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_conn_state_ev *ev = llc_conn_ev(skb);
+
+	return ev->type == LLC_CONN_EV_TYPE_SIMPLE &&
+	       ev->prim_type == LLC_CONN_EV_LOCAL_BUSY_DETECTED ? 0 : 1;
+}
+
+int llc_conn_ev_local_busy_cleared(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_conn_state_ev *ev = llc_conn_ev(skb);
+
+	return ev->type == LLC_CONN_EV_TYPE_SIMPLE &&
+	       ev->prim_type == LLC_CONN_EV_LOCAL_BUSY_CLEARED ? 0 : 1;
+}
+
+int llc_conn_ev_rx_bad_pdu(struct sock *sk, struct sk_buff *skb)
+{
+	return 1;
+}
+
+int llc_conn_ev_rx_disc_cmd_pbit_set_x(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb);
+
+	return LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_U(pdu) &&
+	       LLC_U_PDU_CMD(pdu) == LLC_2_PDU_CMD_DISC ? 0 : 1;
+}
+
+int llc_conn_ev_rx_dm_rsp_fbit_set_x(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb);
+
+	return LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_U(pdu) &&
+	       LLC_U_PDU_RSP(pdu) == LLC_2_PDU_RSP_DM ? 0 : 1;
+}
+
+int llc_conn_ev_rx_frmr_rsp_fbit_set_x(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb);
+
+	return LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_U(pdu) &&
+	       LLC_U_PDU_RSP(pdu) == LLC_2_PDU_RSP_FRMR ? 0 : 1;
+}
+
+int llc_conn_ev_rx_i_cmd_pbit_set_0(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+
+	return llc_conn_space(sk, skb) &&
+	       LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_I(pdu) &&
+	       LLC_I_PF_IS_0(pdu) &&
+	       LLC_I_GET_NS(pdu) == llc_sk(sk)->vR ? 0 : 1;
+}
+
+int llc_conn_ev_rx_i_cmd_pbit_set_1(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+
+	return llc_conn_space(sk, skb) &&
+	       LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_I(pdu) &&
+	       LLC_I_PF_IS_1(pdu) &&
+	       LLC_I_GET_NS(pdu) == llc_sk(sk)->vR ? 0 : 1;
+}
+
+int llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns(struct sock *sk,
+					      struct sk_buff *skb)
+{
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+	u8 vr = llc_sk(sk)->vR;
+	u8 ns = LLC_I_GET_NS(pdu);
+
+	return LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_I(pdu) &&
+	       LLC_I_PF_IS_0(pdu) && ns != vr &&
+	       !llc_util_ns_inside_rx_window(ns, vr, llc_sk(sk)->rw) ? 0 : 1;
+}
+
+int llc_conn_ev_rx_i_cmd_pbit_set_1_unexpd_ns(struct sock *sk,
+					      struct sk_buff *skb)
+{
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+	u8 vr = llc_sk(sk)->vR;
+	u8 ns = LLC_I_GET_NS(pdu);
+
+	return LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_I(pdu) &&
+	       LLC_I_PF_IS_1(pdu) && ns != vr &&
+	       !llc_util_ns_inside_rx_window(ns, vr, llc_sk(sk)->rw) ? 0 : 1;
+}
+
+int llc_conn_ev_rx_i_cmd_pbit_set_x_inval_ns(struct sock *sk,
+					     struct sk_buff *skb)
+{
+	struct llc_pdu_sn * pdu = llc_pdu_sn_hdr(skb);
+	u8 vr = llc_sk(sk)->vR;
+	u8 ns = LLC_I_GET_NS(pdu);
+	u16 rc = LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_I(pdu) && ns != vr &&
+		 llc_util_ns_inside_rx_window(ns, vr, llc_sk(sk)->rw) ? 0 : 1;
+	if (!rc)
+		dprintk("%s: matched, state=%d, ns=%d, vr=%d\n",
+			__FUNCTION__, llc_sk(sk)->state, ns, vr);
+	return rc;
+}
+
+int llc_conn_ev_rx_i_rsp_fbit_set_0(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+
+	return llc_conn_space(sk, skb) &&
+	       LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_I(pdu) &&
+	       LLC_I_PF_IS_0(pdu) &&
+	       LLC_I_GET_NS(pdu) == llc_sk(sk)->vR ? 0 : 1;
+}
+
+int llc_conn_ev_rx_i_rsp_fbit_set_1(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+
+	return LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_I(pdu) &&
+	       LLC_I_PF_IS_1(pdu) &&
+	       LLC_I_GET_NS(pdu) == llc_sk(sk)->vR ? 0 : 1;
+}
+
+int llc_conn_ev_rx_i_rsp_fbit_set_x(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+
+	return llc_conn_space(sk, skb) &&
+	       LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_I(pdu) &&
+	       LLC_I_GET_NS(pdu) == llc_sk(sk)->vR ? 0 : 1;
+}
+
+int llc_conn_ev_rx_i_rsp_fbit_set_0_unexpd_ns(struct sock *sk,
+					      struct sk_buff *skb)
+{
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+	u8 vr = llc_sk(sk)->vR;
+	u8 ns = LLC_I_GET_NS(pdu);
+
+	return LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_I(pdu) &&
+	       LLC_I_PF_IS_0(pdu) && ns != vr &&
+	       !llc_util_ns_inside_rx_window(ns, vr, llc_sk(sk)->rw) ? 0 : 1;
+}
+
+int llc_conn_ev_rx_i_rsp_fbit_set_1_unexpd_ns(struct sock *sk,
+					      struct sk_buff *skb)
+{
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+	u8 vr = llc_sk(sk)->vR;
+	u8 ns = LLC_I_GET_NS(pdu);
+
+	return LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_I(pdu) &&
+	       LLC_I_PF_IS_1(pdu) && ns != vr &&
+	       !llc_util_ns_inside_rx_window(ns, vr, llc_sk(sk)->rw) ? 0 : 1;
+}
+
+int llc_conn_ev_rx_i_rsp_fbit_set_x_unexpd_ns(struct sock *sk,
+					      struct sk_buff *skb)
+{
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+	u8 vr = llc_sk(sk)->vR;
+	u8 ns = LLC_I_GET_NS(pdu);
+
+	return LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_I(pdu) && ns != vr &&
+	       !llc_util_ns_inside_rx_window(ns, vr, llc_sk(sk)->rw) ? 0 : 1;
+}
+
+int llc_conn_ev_rx_i_rsp_fbit_set_x_inval_ns(struct sock *sk,
+					     struct sk_buff *skb)
+{
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+	u8 vr = llc_sk(sk)->vR;
+	u8 ns = LLC_I_GET_NS(pdu);
+	u16 rc = LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_I(pdu) && ns != vr &&
+		 llc_util_ns_inside_rx_window(ns, vr, llc_sk(sk)->rw) ? 0 : 1;
+	if (!rc)
+		dprintk("%s: matched, state=%d, ns=%d, vr=%d\n",
+			__FUNCTION__, llc_sk(sk)->state, ns, vr);
+	return rc;
+}
+
+int llc_conn_ev_rx_rej_cmd_pbit_set_0(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+
+	return LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_S(pdu) &&
+	       LLC_S_PF_IS_0(pdu) &&
+	       LLC_S_PDU_CMD(pdu) == LLC_2_PDU_CMD_REJ ? 0 : 1;
+}
+
+int llc_conn_ev_rx_rej_cmd_pbit_set_1(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+
+	return LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_S(pdu) &&
+	       LLC_S_PF_IS_1(pdu) &&
+	       LLC_S_PDU_CMD(pdu) == LLC_2_PDU_CMD_REJ ? 0 : 1;
+}
+
+int llc_conn_ev_rx_rej_rsp_fbit_set_0(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+
+	return LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_S(pdu) &&
+	       LLC_S_PF_IS_0(pdu) &&
+	       LLC_S_PDU_RSP(pdu) == LLC_2_PDU_RSP_REJ ? 0 : 1;
+}
+
+int llc_conn_ev_rx_rej_rsp_fbit_set_1(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+
+	return LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_S(pdu) &&
+	       LLC_S_PF_IS_1(pdu) &&
+	       LLC_S_PDU_RSP(pdu) == LLC_2_PDU_RSP_REJ ? 0 : 1;
+}
+
+int llc_conn_ev_rx_rej_rsp_fbit_set_x(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb);
+
+	return LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_S(pdu) &&
+	       LLC_S_PDU_RSP(pdu) == LLC_2_PDU_RSP_REJ ? 0 : 1;
+}
+
+int llc_conn_ev_rx_rnr_cmd_pbit_set_0(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+
+	return LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_S(pdu) &&
+	       LLC_S_PF_IS_0(pdu) &&
+	       LLC_S_PDU_CMD(pdu) == LLC_2_PDU_CMD_RNR ? 0 : 1;
+}
+
+int llc_conn_ev_rx_rnr_cmd_pbit_set_1(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+
+	return LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_S(pdu) &&
+	       LLC_S_PF_IS_1(pdu) &&
+	       LLC_S_PDU_CMD(pdu) == LLC_2_PDU_CMD_RNR ? 0 : 1;
+}
+
+int llc_conn_ev_rx_rnr_rsp_fbit_set_0(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+
+	return LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_S(pdu) &&
+	       LLC_S_PF_IS_0(pdu) &&
+	       LLC_S_PDU_RSP(pdu) == LLC_2_PDU_RSP_RNR ? 0 : 1;
+}
+
+int llc_conn_ev_rx_rnr_rsp_fbit_set_1(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+
+	return LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_S(pdu) &&
+	       LLC_S_PF_IS_1(pdu) &&
+	       LLC_S_PDU_RSP(pdu) == LLC_2_PDU_RSP_RNR ? 0 : 1;
+}
+
+int llc_conn_ev_rx_rr_cmd_pbit_set_0(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+
+	return LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_S(pdu) &&
+	       LLC_S_PF_IS_0(pdu) &&
+	       LLC_S_PDU_CMD(pdu) == LLC_2_PDU_CMD_RR ? 0 : 1;
+}
+
+int llc_conn_ev_rx_rr_cmd_pbit_set_1(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+
+	return LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_S(pdu) &&
+	       LLC_S_PF_IS_1(pdu) &&
+	       LLC_S_PDU_CMD(pdu) == LLC_2_PDU_CMD_RR ? 0 : 1;
+}
+
+int llc_conn_ev_rx_rr_rsp_fbit_set_0(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+
+	return llc_conn_space(sk, skb) &&
+	       LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_S(pdu) &&
+	       LLC_S_PF_IS_0(pdu) &&
+	       LLC_S_PDU_RSP(pdu) == LLC_2_PDU_RSP_RR ? 0 : 1;
+}
+
+int llc_conn_ev_rx_rr_rsp_fbit_set_1(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+
+	return llc_conn_space(sk, skb) &&
+	       LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_S(pdu) &&
+	       LLC_S_PF_IS_1(pdu) &&
+	       LLC_S_PDU_RSP(pdu) == LLC_2_PDU_RSP_RR ? 0 : 1;
+}
+
+int llc_conn_ev_rx_sabme_cmd_pbit_set_x(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb);
+
+	return LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_U(pdu) &&
+	       LLC_U_PDU_CMD(pdu) == LLC_2_PDU_CMD_SABME ? 0 : 1;
+}
+
+int llc_conn_ev_rx_ua_rsp_fbit_set_x(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb);
+
+	return LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_U(pdu) &&
+	       LLC_U_PDU_RSP(pdu) == LLC_2_PDU_RSP_UA ? 0 : 1;
+}
+
+int llc_conn_ev_rx_xxx_cmd_pbit_set_1(struct sock *sk, struct sk_buff *skb)
+{
+	u16 rc = 1;
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+
+	if (LLC_PDU_IS_CMD(pdu)) {
+		if (LLC_PDU_TYPE_IS_I(pdu) || LLC_PDU_TYPE_IS_S(pdu)) {
+			if (LLC_I_PF_IS_1(pdu))
+				rc = 0;
+		} else if (LLC_PDU_TYPE_IS_U(pdu) && LLC_U_PF_IS_1(pdu))
+			rc = 0;
+	}
+	return rc;
+}
+
+int llc_conn_ev_rx_xxx_cmd_pbit_set_x(struct sock *sk, struct sk_buff *skb)
+{
+	u16 rc = 1;
+	struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb);
+
+	if (LLC_PDU_IS_CMD(pdu)) {
+		if (LLC_PDU_TYPE_IS_I(pdu) || LLC_PDU_TYPE_IS_S(pdu))
+			rc = 0;
+		else if (LLC_PDU_TYPE_IS_U(pdu))
+			switch (LLC_U_PDU_CMD(pdu)) {
+			case LLC_2_PDU_CMD_SABME:
+			case LLC_2_PDU_CMD_DISC:
+				rc = 0;
+				break;
+			}
+	}
+	return rc;
+}
+
+int llc_conn_ev_rx_xxx_rsp_fbit_set_1(struct sock *sk, struct sk_buff *skb)
+{
+	u16 rc = 1;
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+
+	if (LLC_PDU_IS_RSP(pdu)) {
+		if (LLC_PDU_TYPE_IS_I(pdu) || LLC_PDU_TYPE_IS_S(pdu)) {
+			if (LLC_I_PF_IS_1(pdu))
+				rc = 0;
+		} else if (LLC_PDU_TYPE_IS_U(pdu))
+			switch (LLC_U_PDU_RSP(pdu)) {
+			case LLC_2_PDU_RSP_UA:
+			case LLC_2_PDU_RSP_DM:
+			case LLC_2_PDU_RSP_FRMR:
+				if (LLC_U_PF_IS_1(pdu))
+					rc = 0;
+				break;
+			}
+	}
+	return rc;
+}
+
+int llc_conn_ev_rx_xxx_rsp_fbit_set_x(struct sock *sk, struct sk_buff *skb)
+{
+	u16 rc = 1;
+	struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb);
+
+	if (LLC_PDU_IS_RSP(pdu)) {
+		if (LLC_PDU_TYPE_IS_I(pdu) || LLC_PDU_TYPE_IS_S(pdu))
+			rc = 0;
+		else if (LLC_PDU_TYPE_IS_U(pdu))
+			switch (LLC_U_PDU_RSP(pdu)) {
+			case LLC_2_PDU_RSP_UA:
+			case LLC_2_PDU_RSP_DM:
+			case LLC_2_PDU_RSP_FRMR:
+				rc = 0;
+				break;
+			}
+	}
+
+	return rc;
+}
+
+int llc_conn_ev_rx_zzz_cmd_pbit_set_x_inval_nr(struct sock *sk,
+					       struct sk_buff *skb)
+{
+	u16 rc = 1;
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+	u8 vs = llc_sk(sk)->vS;
+	u8 nr = LLC_I_GET_NR(pdu);
+
+	if (LLC_PDU_IS_CMD(pdu) &&
+	    (LLC_PDU_TYPE_IS_I(pdu) || LLC_PDU_TYPE_IS_S(pdu)) &&
+	    nr != vs && llc_util_nr_inside_tx_window(sk, nr)) {
+		dprintk("%s: matched, state=%d, vs=%d, nr=%d\n",
+			__FUNCTION__, llc_sk(sk)->state, vs, nr);
+		rc = 0;
+	}
+	return rc;
+}
+
+int llc_conn_ev_rx_zzz_rsp_fbit_set_x_inval_nr(struct sock *sk,
+					       struct sk_buff *skb)
+{
+	u16 rc = 1;
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+	u8 vs = llc_sk(sk)->vS;
+	u8 nr = LLC_I_GET_NR(pdu);
+
+	if (LLC_PDU_IS_RSP(pdu) &&
+	    (LLC_PDU_TYPE_IS_I(pdu) || LLC_PDU_TYPE_IS_S(pdu)) &&
+	    nr != vs && llc_util_nr_inside_tx_window(sk, nr)) {
+		rc = 0;
+		dprintk("%s: matched, state=%d, vs=%d, nr=%d\n",
+			__FUNCTION__, llc_sk(sk)->state, vs, nr);
+	}
+	return rc;
+}
+
+int llc_conn_ev_rx_any_frame(struct sock *sk, struct sk_buff *skb)
+{
+	return 0;
+}
+
+int llc_conn_ev_p_tmr_exp(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_conn_state_ev *ev = llc_conn_ev(skb);
+
+	return ev->type != LLC_CONN_EV_TYPE_P_TMR;
+}
+
+int llc_conn_ev_ack_tmr_exp(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_conn_state_ev *ev = llc_conn_ev(skb);
+
+	return ev->type != LLC_CONN_EV_TYPE_ACK_TMR;
+}
+
+int llc_conn_ev_rej_tmr_exp(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_conn_state_ev *ev = llc_conn_ev(skb);
+
+	return ev->type != LLC_CONN_EV_TYPE_REJ_TMR;
+}
+
+int llc_conn_ev_busy_tmr_exp(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_conn_state_ev *ev = llc_conn_ev(skb);
+
+	return ev->type != LLC_CONN_EV_TYPE_BUSY_TMR;
+}
+
+int llc_conn_ev_init_p_f_cycle(struct sock *sk, struct sk_buff *skb)
+{
+	return 1;
+}
+
+int llc_conn_ev_tx_buffer_full(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_conn_state_ev *ev = llc_conn_ev(skb);
+
+	return ev->type == LLC_CONN_EV_TYPE_SIMPLE &&
+	       ev->prim_type == LLC_CONN_EV_TX_BUFF_FULL ? 0 : 1;
+}
+
+/* Event qualifier functions
+ *
+ * these functions simply verify the value of a state flag associated with
+ * the connection and return either a 0 for success or a non-zero value
+ * for not-success; verify the event is the type we expect
+ */
+int llc_conn_ev_qlfy_data_flag_eq_1(struct sock *sk, struct sk_buff *skb)
+{
+	return llc_sk(sk)->data_flag != 1;
+}
+
+int llc_conn_ev_qlfy_data_flag_eq_0(struct sock *sk, struct sk_buff *skb)
+{
+	return llc_sk(sk)->data_flag;
+}
+
+int llc_conn_ev_qlfy_data_flag_eq_2(struct sock *sk, struct sk_buff *skb)
+{
+	return llc_sk(sk)->data_flag != 2;
+}
+
+int llc_conn_ev_qlfy_p_flag_eq_1(struct sock *sk, struct sk_buff *skb)
+{
+	return llc_sk(sk)->p_flag != 1;
+}
+
+/**
+ *	conn_ev_qlfy_last_frame_eq_1 - checks if frame is last in tx window
+ *	@sk: current connection structure.
+ *	@skb: current event.
+ *
+ *	This function determines when frame which is sent, is last frame of
+ *	transmit window, if it is then this function return zero else return
+ *	one.  This function is used for sending last frame of transmit window
+ *	as I-format command with p-bit set to one. Returns 0 if frame is last
+ *	frame, 1 otherwise.
+ */
+int llc_conn_ev_qlfy_last_frame_eq_1(struct sock *sk, struct sk_buff *skb)
+{
+	return !(skb_queue_len(&llc_sk(sk)->pdu_unack_q) + 1 == llc_sk(sk)->k);
+}
+
+/**
+ *	conn_ev_qlfy_last_frame_eq_0 - checks if frame isn't last in tx window
+ *	@sk: current connection structure.
+ *	@skb: current event.
+ *
+ *	This function determines when frame which is sent, isn't last frame of
+ *	transmit window, if it isn't then this function return zero else return
+ *	one. Returns 0 if frame isn't last frame, 1 otherwise.
+ */
+int llc_conn_ev_qlfy_last_frame_eq_0(struct sock *sk, struct sk_buff *skb)
+{
+	return skb_queue_len(&llc_sk(sk)->pdu_unack_q) + 1 == llc_sk(sk)->k;
+}
+
+int llc_conn_ev_qlfy_p_flag_eq_0(struct sock *sk, struct sk_buff *skb)
+{
+	return llc_sk(sk)->p_flag;
+}
+
+int llc_conn_ev_qlfy_p_flag_eq_f(struct sock *sk, struct sk_buff *skb)
+{
+	u8 f_bit;
+
+	llc_pdu_decode_pf_bit(skb, &f_bit);
+	return llc_sk(sk)->p_flag == f_bit ? 0 : 1;
+}
+
+int llc_conn_ev_qlfy_remote_busy_eq_0(struct sock *sk, struct sk_buff *skb)
+{
+	return llc_sk(sk)->remote_busy_flag;
+}
+
+int llc_conn_ev_qlfy_remote_busy_eq_1(struct sock *sk, struct sk_buff *skb)
+{
+	return !llc_sk(sk)->remote_busy_flag;
+}
+
+int llc_conn_ev_qlfy_retry_cnt_lt_n2(struct sock *sk, struct sk_buff *skb)
+{
+	return !(llc_sk(sk)->retry_count < llc_sk(sk)->n2);
+}
+
+int llc_conn_ev_qlfy_retry_cnt_gte_n2(struct sock *sk, struct sk_buff *skb)
+{
+	return !(llc_sk(sk)->retry_count >= llc_sk(sk)->n2);
+}
+
+int llc_conn_ev_qlfy_s_flag_eq_1(struct sock *sk, struct sk_buff *skb)
+{
+	return !llc_sk(sk)->s_flag;
+}
+
+int llc_conn_ev_qlfy_s_flag_eq_0(struct sock *sk, struct sk_buff *skb)
+{
+	return llc_sk(sk)->s_flag;
+}
+
+int llc_conn_ev_qlfy_cause_flag_eq_1(struct sock *sk, struct sk_buff *skb)
+{
+	return !llc_sk(sk)->cause_flag;
+}
+
+int llc_conn_ev_qlfy_cause_flag_eq_0(struct sock *sk, struct sk_buff *skb)
+{
+	return llc_sk(sk)->cause_flag;
+}
+
+int llc_conn_ev_qlfy_set_status_conn(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_conn_state_ev *ev = llc_conn_ev(skb);
+
+	ev->status = LLC_STATUS_CONN;
+	return 0;
+}
+
+int llc_conn_ev_qlfy_set_status_disc(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_conn_state_ev *ev = llc_conn_ev(skb);
+
+	ev->status = LLC_STATUS_DISC;
+	return 0;
+}
+
+int llc_conn_ev_qlfy_set_status_failed(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_conn_state_ev *ev = llc_conn_ev(skb);
+
+	ev->status = LLC_STATUS_FAILED;
+	return 0;
+}
+
+int llc_conn_ev_qlfy_set_status_remote_busy(struct sock *sk,
+					    struct sk_buff *skb)
+{
+	struct llc_conn_state_ev *ev = llc_conn_ev(skb);
+
+	ev->status = LLC_STATUS_REMOTE_BUSY;
+	return 0;
+}
+
+int llc_conn_ev_qlfy_set_status_refuse(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_conn_state_ev *ev = llc_conn_ev(skb);
+
+	ev->status = LLC_STATUS_REFUSE;
+	return 0;
+}
+
+int llc_conn_ev_qlfy_set_status_conflict(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_conn_state_ev *ev = llc_conn_ev(skb);
+
+	ev->status = LLC_STATUS_CONFLICT;
+	return 0;
+}
+
+int llc_conn_ev_qlfy_set_status_rst_done(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_conn_state_ev *ev = llc_conn_ev(skb);
+
+	ev->status = LLC_STATUS_RESET_DONE;
+	return 0;
+}
diff --git a/net/llc/llc_c_st.c b/net/llc/llc_c_st.c
new file mode 100644
index 00000000000..818a9428823
--- /dev/null
+++ b/net/llc/llc_c_st.c
@@ -0,0 +1,4946 @@
+/*
+ * llc_c_st.c - This module contains state transition of connection component.
+ *
+ * Description of event functions and actions there is in 802.2 LLC standard,
+ * or in "llc_c_ac.c" and "llc_c_ev.c" modules.
+ *
+ * Copyright (c) 1997 by Procom Technology, Inc.
+ * 		 2001-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * This program can be redistributed or modified under the terms of the
+ * GNU General Public License as published by the Free Software Foundation.
+ * This program is distributed without any warranty or implied warranty
+ * of merchantability or fitness for a particular purpose.
+ *
+ * See the GNU General Public License for more details.
+ */
+#include <linux/types.h>
+#include <net/llc_if.h>
+#include <net/llc_sap.h>
+#include <net/llc_c_ev.h>
+#include <net/llc_c_ac.h>
+#include <net/llc_c_st.h>
+
+#define NONE NULL
+
+/* COMMON CONNECTION STATE transitions
+ * Common transitions for
+ * LLC_CONN_STATE_NORMAL,
+ * LLC_CONN_STATE_BUSY,
+ * LLC_CONN_STATE_REJ,
+ * LLC_CONN_STATE_AWAIT,
+ * LLC_CONN_STATE_AWAIT_BUSY and
+ * LLC_CONN_STATE_AWAIT_REJ states
+ */
+/* State transitions for LLC_CONN_EV_DISC_REQ event */
+static llc_conn_action_t llc_common_actions_1[] = {
+	[0] = llc_conn_ac_send_disc_cmd_p_set_x,
+	[1] = llc_conn_ac_start_ack_timer,
+	[2] = llc_conn_ac_stop_other_timers,
+	[3] = llc_conn_ac_set_retry_cnt_0,
+	[4] = llc_conn_ac_set_cause_flag_1,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_common_state_trans_1 = {
+	.ev	       = llc_conn_ev_disc_req,
+	.next_state    = LLC_CONN_STATE_D_CONN,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_common_actions_1,
+};
+
+/* State transitions for LLC_CONN_EV_RESET_REQ event */
+static llc_conn_action_t llc_common_actions_2[] = {
+	[0] = llc_conn_ac_send_sabme_cmd_p_set_x,
+	[1] = llc_conn_ac_start_ack_timer,
+	[2] = llc_conn_ac_stop_other_timers,
+	[3] = llc_conn_ac_set_retry_cnt_0,
+	[4] = llc_conn_ac_set_cause_flag_1,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_common_state_trans_2 = {
+	.ev	       = llc_conn_ev_rst_req,
+	.next_state    = LLC_CONN_STATE_RESET,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_common_actions_2,
+};
+
+/* State transitions for LLC_CONN_EV_RX_SABME_CMD_Pbit_SET_X event */
+static llc_conn_action_t llc_common_actions_3[] = {
+	[0] = llc_conn_ac_stop_all_timers,
+	[1] = llc_conn_ac_set_vs_0,
+	[2] = llc_conn_ac_set_vr_0,
+	[3] = llc_conn_ac_send_ua_rsp_f_set_p,
+	[4] = llc_conn_ac_rst_ind,
+	[5] = llc_conn_ac_set_p_flag_0,
+	[6] = llc_conn_ac_set_remote_busy_0,
+	[7] = llc_conn_reset,
+	[8] = NULL,
+};
+
+static struct llc_conn_state_trans llc_common_state_trans_3 = {
+	.ev	       = llc_conn_ev_rx_sabme_cmd_pbit_set_x,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_common_actions_3,
+};
+
+/* State transitions for LLC_CONN_EV_RX_DISC_CMD_Pbit_SET_X event */
+static llc_conn_action_t llc_common_actions_4[] = {
+	[0] = llc_conn_ac_stop_all_timers,
+	[1] = llc_conn_ac_send_ua_rsp_f_set_p,
+	[2] = llc_conn_ac_disc_ind,
+	[3] = llc_conn_disc,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_common_state_trans_4 = {
+	.ev	       = llc_conn_ev_rx_disc_cmd_pbit_set_x,
+	.next_state    = LLC_CONN_STATE_ADM,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_common_actions_4,
+};
+
+/* State transitions for LLC_CONN_EV_RX_FRMR_RSP_Fbit_SET_X event */
+static llc_conn_action_t llc_common_actions_5[] = {
+	[0] = llc_conn_ac_send_sabme_cmd_p_set_x,
+	[1] = llc_conn_ac_start_ack_timer,
+	[2] = llc_conn_ac_stop_other_timers,
+	[3] = llc_conn_ac_set_retry_cnt_0,
+	[4] = llc_conn_ac_rst_ind,
+	[5] = llc_conn_ac_set_cause_flag_0,
+	[6] = llc_conn_reset,
+	[7] = NULL,
+};
+
+static struct llc_conn_state_trans llc_common_state_trans_5 = {
+	.ev	       = llc_conn_ev_rx_frmr_rsp_fbit_set_x,
+	.next_state    = LLC_CONN_STATE_RESET,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_common_actions_5,
+};
+
+/* State transitions for LLC_CONN_EV_RX_DM_RSP_Fbit_SET_X event */
+static llc_conn_action_t llc_common_actions_6[] = {
+	[0] = llc_conn_ac_disc_ind,
+	[1] = llc_conn_ac_stop_all_timers,
+	[2] = llc_conn_disc,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_common_state_trans_6 = {
+	.ev	       = llc_conn_ev_rx_dm_rsp_fbit_set_x,
+	.next_state    = LLC_CONN_STATE_ADM,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_common_actions_6,
+};
+
+/* State transitions for LLC_CONN_EV_RX_ZZZ_CMD_Pbit_SET_X_INVAL_Nr event */
+static llc_conn_action_t llc_common_actions_7a[] = {
+	[0] = llc_conn_ac_send_frmr_rsp_f_set_x,
+	[1] = llc_conn_ac_start_ack_timer,
+	[2] = llc_conn_ac_stop_other_timers,
+	[3] = llc_conn_ac_set_retry_cnt_0,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_common_state_trans_7a = {
+	.ev	       = llc_conn_ev_rx_zzz_cmd_pbit_set_x_inval_nr,
+	.next_state    = LLC_CONN_STATE_ERROR,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_common_actions_7a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_CMD_Pbit_SET_X_INVAL_Ns event */
+static llc_conn_action_t llc_common_actions_7b[] = {
+	[0] = llc_conn_ac_send_frmr_rsp_f_set_x,
+	[1] = llc_conn_ac_start_ack_timer,
+	[2] = llc_conn_ac_stop_other_timers,
+	[3] = llc_conn_ac_set_retry_cnt_0,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_common_state_trans_7b = {
+	.ev	       = llc_conn_ev_rx_i_cmd_pbit_set_x_inval_ns,
+	.next_state    = LLC_CONN_STATE_ERROR,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_common_actions_7b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_ZZZ_RSP_Fbit_SET_X_INVAL_Nr event */
+static llc_conn_action_t llc_common_actions_8a[] = {
+	[0] = llc_conn_ac_send_frmr_rsp_f_set_x,
+	[1] = llc_conn_ac_start_ack_timer,
+	[2] = llc_conn_ac_stop_other_timers,
+	[3] = llc_conn_ac_set_retry_cnt_0,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_common_state_trans_8a = {
+	.ev	       = llc_conn_ev_rx_zzz_rsp_fbit_set_x_inval_nr,
+	.next_state    = LLC_CONN_STATE_ERROR,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_common_actions_8a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_RSP_Fbit_SET_X_INVAL_Ns event */
+static llc_conn_action_t llc_common_actions_8b[] = {
+	[0] = llc_conn_ac_send_frmr_rsp_f_set_x,
+	[1] = llc_conn_ac_start_ack_timer,
+	[2] = llc_conn_ac_stop_other_timers,
+	[3] = llc_conn_ac_set_retry_cnt_0,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_common_state_trans_8b = {
+	.ev	       = llc_conn_ev_rx_i_rsp_fbit_set_x_inval_ns,
+	.next_state    = LLC_CONN_STATE_ERROR,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_common_actions_8b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_BAD_PDU event */
+static llc_conn_action_t llc_common_actions_8c[] = {
+	[0] = llc_conn_ac_send_frmr_rsp_f_set_x,
+	[1] = llc_conn_ac_start_ack_timer,
+	[2] = llc_conn_ac_stop_other_timers,
+	[3] = llc_conn_ac_set_retry_cnt_0,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_common_state_trans_8c = {
+	.ev	       = llc_conn_ev_rx_bad_pdu,
+	.next_state    = LLC_CONN_STATE_ERROR,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_common_actions_8c,
+};
+
+/* State transitions for LLC_CONN_EV_RX_UA_RSP_Fbit_SET_X event */
+static llc_conn_action_t llc_common_actions_9[] = {
+	[0] = llc_conn_ac_send_frmr_rsp_f_set_x,
+	[1] = llc_conn_ac_start_ack_timer,
+	[2] = llc_conn_ac_stop_other_timers,
+	[3] = llc_conn_ac_set_retry_cnt_0,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_common_state_trans_9 = {
+	.ev	       = llc_conn_ev_rx_ua_rsp_fbit_set_x,
+	.next_state    = LLC_CONN_STATE_ERROR,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_common_actions_9,
+};
+
+/* State transitions for LLC_CONN_EV_RX_XXX_RSP_Fbit_SET_1 event */
+#if 0
+static llc_conn_ev_qfyr_t llc_common_ev_qfyrs_10[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_0,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_common_actions_10[] = {
+	[0] = llc_conn_ac_send_frmr_rsp_f_set_x,
+	[1] = llc_conn_ac_start_ack_timer,
+	[2] = llc_conn_ac_stop_other_timers,
+	[3] = llc_conn_ac_set_retry_cnt_0,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_common_state_trans_10 = {
+	.ev	       = llc_conn_ev_rx_xxx_rsp_fbit_set_1,
+	.next_state    = LLC_CONN_STATE_ERROR,
+	.ev_qualifiers = llc_common_ev_qfyrs_10,
+	.ev_actions    = llc_common_actions_10,
+};
+#endif
+
+/* State transitions for LLC_CONN_EV_P_TMR_EXP event */
+static llc_conn_ev_qfyr_t llc_common_ev_qfyrs_11a[] = {
+	[0] = llc_conn_ev_qlfy_retry_cnt_gte_n2,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_common_actions_11a[] = {
+	[0] = llc_conn_ac_send_sabme_cmd_p_set_x,
+	[1] = llc_conn_ac_start_ack_timer,
+	[2] = llc_conn_ac_stop_other_timers,
+	[3] = llc_conn_ac_set_retry_cnt_0,
+	[4] = llc_conn_ac_set_cause_flag_0,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_common_state_trans_11a = {
+	.ev	       = llc_conn_ev_p_tmr_exp,
+	.next_state    = LLC_CONN_STATE_RESET,
+	.ev_qualifiers = llc_common_ev_qfyrs_11a,
+	.ev_actions    = llc_common_actions_11a,
+};
+
+/* State transitions for LLC_CONN_EV_ACK_TMR_EXP event */
+static llc_conn_ev_qfyr_t llc_common_ev_qfyrs_11b[] = {
+	[0] = llc_conn_ev_qlfy_retry_cnt_gte_n2,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_common_actions_11b[] = {
+	[0] = llc_conn_ac_send_sabme_cmd_p_set_x,
+	[1] = llc_conn_ac_start_ack_timer,
+	[2] = llc_conn_ac_stop_other_timers,
+	[3] = llc_conn_ac_set_retry_cnt_0,
+	[4] = llc_conn_ac_set_cause_flag_0,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_common_state_trans_11b = {
+	.ev	       = llc_conn_ev_ack_tmr_exp,
+	.next_state    = LLC_CONN_STATE_RESET,
+	.ev_qualifiers = llc_common_ev_qfyrs_11b,
+	.ev_actions    = llc_common_actions_11b,
+};
+
+/* State transitions for LLC_CONN_EV_REJ_TMR_EXP event */
+static llc_conn_ev_qfyr_t llc_common_ev_qfyrs_11c[] = {
+	[0] = llc_conn_ev_qlfy_retry_cnt_gte_n2,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_common_actions_11c[] = {
+	[0] = llc_conn_ac_send_sabme_cmd_p_set_x,
+	[1] = llc_conn_ac_start_ack_timer,
+	[2] = llc_conn_ac_stop_other_timers,
+	[3] = llc_conn_ac_set_retry_cnt_0,
+	[4] = llc_conn_ac_set_cause_flag_0,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_common_state_trans_11c = {
+	.ev	       = llc_conn_ev_rej_tmr_exp,
+	.next_state    = LLC_CONN_STATE_RESET,
+	.ev_qualifiers = llc_common_ev_qfyrs_11c,
+	.ev_actions    = llc_common_actions_11c,
+};
+
+/* State transitions for LLC_CONN_EV_BUSY_TMR_EXP event */
+static llc_conn_ev_qfyr_t llc_common_ev_qfyrs_11d[] = {
+	[0] = llc_conn_ev_qlfy_retry_cnt_gte_n2,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_common_actions_11d[] = {
+	[0] = llc_conn_ac_send_sabme_cmd_p_set_x,
+	[1] = llc_conn_ac_start_ack_timer,
+	[2] = llc_conn_ac_stop_other_timers,
+	[3] = llc_conn_ac_set_retry_cnt_0,
+	[4] = llc_conn_ac_set_cause_flag_0,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_common_state_trans_11d = {
+	.ev	       = llc_conn_ev_busy_tmr_exp,
+	.next_state    = LLC_CONN_STATE_RESET,
+	.ev_qualifiers = llc_common_ev_qfyrs_11d,
+	.ev_actions    = llc_common_actions_11d,
+};
+
+/*
+ * Common dummy state transition; must be last entry for all state
+ * transition groups - it'll be on .bss, so will be zeroed.
+ */
+static struct llc_conn_state_trans llc_common_state_trans_end;
+
+/* LLC_CONN_STATE_ADM transitions */
+/* State transitions for LLC_CONN_EV_CONN_REQ event */
+static llc_conn_action_t llc_adm_actions_1[] = {
+	[0] = llc_conn_ac_send_sabme_cmd_p_set_x,
+	[1] = llc_conn_ac_start_ack_timer,
+	[2] = llc_conn_ac_set_retry_cnt_0,
+	[3] = llc_conn_ac_set_s_flag_0,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_adm_state_trans_1 = {
+	.ev	       = llc_conn_ev_conn_req,
+	.next_state    = LLC_CONN_STATE_SETUP,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_adm_actions_1,
+};
+
+/* State transitions for LLC_CONN_EV_RX_SABME_CMD_Pbit_SET_X event */
+static llc_conn_action_t llc_adm_actions_2[] = {
+	[0] = llc_conn_ac_send_ua_rsp_f_set_p,
+	[1] = llc_conn_ac_set_vs_0,
+	[2] = llc_conn_ac_set_vr_0,
+	[3] = llc_conn_ac_set_retry_cnt_0,
+	[4] = llc_conn_ac_set_p_flag_0,
+	[5] = llc_conn_ac_set_remote_busy_0,
+	[6] = llc_conn_ac_conn_ind,
+	[7] = NULL,
+};
+
+static struct llc_conn_state_trans llc_adm_state_trans_2 = {
+	.ev	       = llc_conn_ev_rx_sabme_cmd_pbit_set_x,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_adm_actions_2,
+};
+
+/* State transitions for LLC_CONN_EV_RX_DISC_CMD_Pbit_SET_X event */
+static llc_conn_action_t llc_adm_actions_3[] = {
+	[0] = llc_conn_ac_send_dm_rsp_f_set_p,
+	[1] = llc_conn_disc,
+	[2] = NULL,
+};
+
+static struct llc_conn_state_trans llc_adm_state_trans_3 = {
+	.ev	       = llc_conn_ev_rx_disc_cmd_pbit_set_x,
+	.next_state    = LLC_CONN_STATE_ADM,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_adm_actions_3,
+};
+
+/* State transitions for LLC_CONN_EV_RX_XXX_CMD_Pbit_SET_1 event */
+static llc_conn_action_t llc_adm_actions_4[] = {
+	[0] = llc_conn_ac_send_dm_rsp_f_set_1,
+	[1] = llc_conn_disc,
+	[2] = NULL,
+};
+
+static struct llc_conn_state_trans llc_adm_state_trans_4 = {
+	.ev	       = llc_conn_ev_rx_xxx_cmd_pbit_set_1,
+	.next_state    = LLC_CONN_STATE_ADM,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_adm_actions_4,
+};
+
+/* State transitions for LLC_CONN_EV_RX_XXX_YYY event */
+static llc_conn_action_t llc_adm_actions_5[] = {
+	[0] = llc_conn_disc,
+	[1] = NULL,
+};
+
+static struct llc_conn_state_trans llc_adm_state_trans_5 = {
+	.ev	       = llc_conn_ev_rx_any_frame,
+	.next_state    = LLC_CONN_OUT_OF_SVC,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_adm_actions_5,
+};
+
+/*
+ * Array of pointers;
+ * one to each transition
+ */
+static struct llc_conn_state_trans *llc_adm_state_transitions[] = {
+	[0] = &llc_adm_state_trans_1,		/* Request */
+	[1] = &llc_common_state_trans_end,
+	[2] = &llc_common_state_trans_end,	/* local_busy */
+	[3] = &llc_common_state_trans_end,	/* init_pf_cycle */
+	[4] = &llc_common_state_trans_end,	/* timer */
+	[5] = &llc_adm_state_trans_2,		/* Receive frame */
+	[6] = &llc_adm_state_trans_3,
+	[7] = &llc_adm_state_trans_4,
+	[8] = &llc_adm_state_trans_5,
+	[9] = &llc_common_state_trans_end,
+};
+
+/* LLC_CONN_STATE_SETUP transitions */
+/* State transitions for LLC_CONN_EV_RX_SABME_CMD_Pbit_SET_X event */
+static llc_conn_action_t llc_setup_actions_1[] = {
+	[0] = llc_conn_ac_send_ua_rsp_f_set_p,
+	[1] = llc_conn_ac_set_vs_0,
+	[2] = llc_conn_ac_set_vr_0,
+	[3] = llc_conn_ac_set_s_flag_1,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_setup_state_trans_1 = {
+	.ev	       = llc_conn_ev_rx_sabme_cmd_pbit_set_x,
+	.next_state    = LLC_CONN_STATE_SETUP,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_setup_actions_1,
+};
+
+/* State transitions for LLC_CONN_EV_RX_UA_RSP_Fbit_SET_X event */
+static llc_conn_ev_qfyr_t llc_setup_ev_qfyrs_2[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_f,
+	[1] = llc_conn_ev_qlfy_set_status_conn,
+	[2] = NULL,
+};
+
+static llc_conn_action_t llc_setup_actions_2[] = {
+	[0] = llc_conn_ac_stop_ack_timer,
+	[1] = llc_conn_ac_set_vs_0,
+	[2] = llc_conn_ac_set_vr_0,
+	[3] = llc_conn_ac_upd_p_flag,
+	[4] = llc_conn_ac_set_remote_busy_0,
+	[5] = llc_conn_ac_conn_confirm,
+	[6] = NULL,
+};
+
+static struct llc_conn_state_trans llc_setup_state_trans_2 = {
+	.ev	       = llc_conn_ev_rx_ua_rsp_fbit_set_x,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = llc_setup_ev_qfyrs_2,
+	.ev_actions    = llc_setup_actions_2,
+};
+
+/* State transitions for LLC_CONN_EV_ACK_TMR_EXP event */
+static llc_conn_ev_qfyr_t llc_setup_ev_qfyrs_3[] = {
+	[0] = llc_conn_ev_qlfy_s_flag_eq_1,
+	[1] = llc_conn_ev_qlfy_set_status_conn,
+	[2] = NULL,
+};
+
+static llc_conn_action_t llc_setup_actions_3[] = {
+	[0] = llc_conn_ac_set_p_flag_0,
+	[1] = llc_conn_ac_set_remote_busy_0,
+	[2] = llc_conn_ac_conn_confirm,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_setup_state_trans_3 = {
+	.ev	       = llc_conn_ev_ack_tmr_exp,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = llc_setup_ev_qfyrs_3,
+	.ev_actions    = llc_setup_actions_3,
+};
+
+/* State transitions for LLC_CONN_EV_RX_DISC_CMD_Pbit_SET_X event */
+static llc_conn_ev_qfyr_t llc_setup_ev_qfyrs_4[] = {
+	[0] = llc_conn_ev_qlfy_set_status_disc,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_setup_actions_4[] = {
+	[0] = llc_conn_ac_send_dm_rsp_f_set_p,
+	[1] = llc_conn_ac_stop_ack_timer,
+	[2] = llc_conn_ac_conn_confirm,
+	[3] = llc_conn_disc,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_setup_state_trans_4 = {
+	.ev	       = llc_conn_ev_rx_disc_cmd_pbit_set_x,
+	.next_state    = LLC_CONN_STATE_ADM,
+	.ev_qualifiers = llc_setup_ev_qfyrs_4,
+	.ev_actions    = llc_setup_actions_4,
+};
+
+/* State transitions for LLC_CONN_EV_RX_DM_RSP_Fbit_SET_X event */
+static llc_conn_ev_qfyr_t llc_setup_ev_qfyrs_5[] = {
+	[0] = llc_conn_ev_qlfy_set_status_disc,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_setup_actions_5[] = {
+	[0] = llc_conn_ac_stop_ack_timer,
+	[1] = llc_conn_ac_conn_confirm,
+	[2] = llc_conn_disc,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_setup_state_trans_5 = {
+	.ev	       = llc_conn_ev_rx_dm_rsp_fbit_set_x,
+	.next_state    = LLC_CONN_STATE_ADM,
+	.ev_qualifiers = llc_setup_ev_qfyrs_5,
+	.ev_actions    = llc_setup_actions_5,
+};
+
+/* State transitions for LLC_CONN_EV_ACK_TMR_EXP event */
+static llc_conn_ev_qfyr_t llc_setup_ev_qfyrs_7[] = {
+	[0] = llc_conn_ev_qlfy_retry_cnt_lt_n2,
+	[1] = llc_conn_ev_qlfy_s_flag_eq_0,
+	[2] = NULL,
+};
+
+static llc_conn_action_t llc_setup_actions_7[] = {
+	[0] = llc_conn_ac_send_sabme_cmd_p_set_x,
+	[1] = llc_conn_ac_start_ack_timer,
+	[2] = llc_conn_ac_inc_retry_cnt_by_1,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_setup_state_trans_7 = {
+	.ev	       = llc_conn_ev_ack_tmr_exp,
+	.next_state    = LLC_CONN_STATE_SETUP,
+	.ev_qualifiers = llc_setup_ev_qfyrs_7,
+	.ev_actions    = llc_setup_actions_7,
+};
+
+/* State transitions for LLC_CONN_EV_ACK_TMR_EXP event */
+static llc_conn_ev_qfyr_t llc_setup_ev_qfyrs_8[] = {
+	[0] = llc_conn_ev_qlfy_retry_cnt_gte_n2,
+	[1] = llc_conn_ev_qlfy_s_flag_eq_0,
+	[2] = llc_conn_ev_qlfy_set_status_failed,
+	[3] = NULL,
+};
+
+static llc_conn_action_t llc_setup_actions_8[] = {
+	[0] = llc_conn_ac_conn_confirm,
+	[1] = llc_conn_disc,
+	[2] = NULL,
+};
+
+static struct llc_conn_state_trans llc_setup_state_trans_8 = {
+	.ev	       = llc_conn_ev_ack_tmr_exp,
+	.next_state    = LLC_CONN_STATE_ADM,
+	.ev_qualifiers = llc_setup_ev_qfyrs_8,
+	.ev_actions    = llc_setup_actions_8,
+};
+
+/*
+ * Array of pointers;
+ * one to each transition
+ */
+static struct llc_conn_state_trans *llc_setup_state_transitions[] = {
+	 [0] = &llc_common_state_trans_end,	/* Request */
+	 [1] = &llc_common_state_trans_end,	/* local busy */
+	 [2] = &llc_common_state_trans_end,	/* init_pf_cycle */
+	 [3] = &llc_setup_state_trans_3,	/* Timer */
+	 [4] = &llc_setup_state_trans_7,
+	 [5] = &llc_setup_state_trans_8,
+	 [6] = &llc_common_state_trans_end,
+	 [7] = &llc_setup_state_trans_1,	/* Receive frame */
+	 [8] = &llc_setup_state_trans_2,
+	 [9] = &llc_setup_state_trans_4,
+	[10] = &llc_setup_state_trans_5,
+	[11] = &llc_common_state_trans_end,
+};
+
+/* LLC_CONN_STATE_NORMAL transitions */
+/* State transitions for LLC_CONN_EV_DATA_REQ event */
+static llc_conn_ev_qfyr_t llc_normal_ev_qfyrs_1[] = {
+	[0] = llc_conn_ev_qlfy_remote_busy_eq_0,
+	[1] = llc_conn_ev_qlfy_p_flag_eq_0,
+	[2] = llc_conn_ev_qlfy_last_frame_eq_0,
+	[3] = NULL,
+};
+
+static llc_conn_action_t llc_normal_actions_1[] = {
+	[0] = llc_conn_ac_send_i_as_ack,
+	[1] = llc_conn_ac_start_ack_tmr_if_not_running,
+	[2] = NULL,
+};
+
+static struct llc_conn_state_trans llc_normal_state_trans_1 = {
+	.ev	       = llc_conn_ev_data_req,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = llc_normal_ev_qfyrs_1,
+	.ev_actions    = llc_normal_actions_1,
+};
+
+/* State transitions for LLC_CONN_EV_DATA_REQ event */
+static llc_conn_ev_qfyr_t llc_normal_ev_qfyrs_2[] = {
+	[0] = llc_conn_ev_qlfy_remote_busy_eq_0,
+	[1] = llc_conn_ev_qlfy_p_flag_eq_0,
+	[2] = llc_conn_ev_qlfy_last_frame_eq_1,
+	[3] = NULL,
+};
+
+static llc_conn_action_t llc_normal_actions_2[] = {
+	[0] = llc_conn_ac_send_i_cmd_p_set_1,
+	[1] = llc_conn_ac_start_p_timer,
+	[2] = NULL,
+};
+
+static struct llc_conn_state_trans llc_normal_state_trans_2 = {
+	.ev	       = llc_conn_ev_data_req,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = llc_normal_ev_qfyrs_2,
+	.ev_actions    = llc_normal_actions_2,
+};
+
+/* State transitions for LLC_CONN_EV_DATA_REQ event */
+static llc_conn_ev_qfyr_t llc_normal_ev_qfyrs_2_1[] = {
+	[0] = llc_conn_ev_qlfy_remote_busy_eq_1,
+	[1] = llc_conn_ev_qlfy_set_status_remote_busy,
+	[2] = NULL,
+};
+
+/* just one member, NULL, .bss zeroes it */
+static llc_conn_action_t llc_normal_actions_2_1[1];
+
+static struct llc_conn_state_trans llc_normal_state_trans_2_1 = {
+	.ev	       = llc_conn_ev_data_req,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = llc_normal_ev_qfyrs_2_1,
+	.ev_actions    = llc_normal_actions_2_1,
+};
+
+/* State transitions for LLC_CONN_EV_LOCAL_BUSY_DETECTED event */
+static llc_conn_ev_qfyr_t llc_normal_ev_qfyrs_3[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_0,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_normal_actions_3[] = {
+	[0] = llc_conn_ac_rst_sendack_flag,
+	[1] = llc_conn_ac_send_rnr_xxx_x_set_0,
+	[2] = llc_conn_ac_set_data_flag_0,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_normal_state_trans_3 = {
+	.ev	       = llc_conn_ev_local_busy_detected,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = llc_normal_ev_qfyrs_3,
+	.ev_actions    = llc_normal_actions_3,
+};
+
+/* State transitions for LLC_CONN_EV_LOCAL_BUSY_DETECTED event */
+static llc_conn_ev_qfyr_t llc_normal_ev_qfyrs_4[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_1,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_normal_actions_4[] = {
+	[0] = llc_conn_ac_rst_sendack_flag,
+	[1] = llc_conn_ac_send_rnr_xxx_x_set_0,
+	[2] = llc_conn_ac_set_data_flag_0,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_normal_state_trans_4 = {
+	.ev	       = llc_conn_ev_local_busy_detected,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = llc_normal_ev_qfyrs_4,
+	.ev_actions    = llc_normal_actions_4,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_CMD_Pbit_SET_0_UNEXPD_Ns event */
+static llc_conn_ev_qfyr_t llc_normal_ev_qfyrs_5a[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_0,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_normal_actions_5a[] = {
+	[0] = llc_conn_ac_rst_sendack_flag,
+	[1] = llc_conn_ac_send_rej_xxx_x_set_0,
+	[2] = llc_conn_ac_upd_nr_received,
+	[3] = llc_conn_ac_upd_p_flag,
+	[4] = llc_conn_ac_start_rej_timer,
+	[5] = llc_conn_ac_clear_remote_busy_if_f_eq_1,
+	[6] = NULL,
+};
+
+static struct llc_conn_state_trans llc_normal_state_trans_5a = {
+	.ev	       = llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = llc_normal_ev_qfyrs_5a,
+	.ev_actions    = llc_normal_actions_5a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_RSP_Fbit_SET_0_UNEXPD_Ns event */
+static llc_conn_ev_qfyr_t llc_normal_ev_qfyrs_5b[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_0,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_normal_actions_5b[] = {
+	[0] = llc_conn_ac_rst_sendack_flag,
+	[1] = llc_conn_ac_send_rej_xxx_x_set_0,
+	[2] = llc_conn_ac_upd_nr_received,
+	[3] = llc_conn_ac_upd_p_flag,
+	[4] = llc_conn_ac_start_rej_timer,
+	[5] = llc_conn_ac_clear_remote_busy_if_f_eq_1,
+	[6] = NULL,
+};
+
+static struct llc_conn_state_trans llc_normal_state_trans_5b = {
+	.ev	       = llc_conn_ev_rx_i_rsp_fbit_set_0_unexpd_ns,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = llc_normal_ev_qfyrs_5b,
+	.ev_actions    = llc_normal_actions_5b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_RSP_Fbit_SET_1_UNEXPD_Ns event */
+static llc_conn_ev_qfyr_t llc_normal_ev_qfyrs_5c[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_1,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_normal_actions_5c[] = {
+	[0] = llc_conn_ac_rst_sendack_flag,
+	[1] = llc_conn_ac_send_rej_xxx_x_set_0,
+	[2] = llc_conn_ac_upd_nr_received,
+	[3] = llc_conn_ac_upd_p_flag,
+	[4] = llc_conn_ac_start_rej_timer,
+	[5] = llc_conn_ac_clear_remote_busy_if_f_eq_1,
+	[6] = NULL,
+};
+
+static struct llc_conn_state_trans llc_normal_state_trans_5c = {
+	.ev	       = llc_conn_ev_rx_i_rsp_fbit_set_1_unexpd_ns,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = llc_normal_ev_qfyrs_5c,
+	.ev_actions    = llc_normal_actions_5c,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_CMD_Pbit_SET_0_UNEXPD_Ns event */
+static llc_conn_ev_qfyr_t llc_normal_ev_qfyrs_6a[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_1,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_normal_actions_6a[] = {
+	[0] = llc_conn_ac_rst_sendack_flag,
+	[1] = llc_conn_ac_send_rej_xxx_x_set_0,
+	[2] = llc_conn_ac_upd_nr_received,
+	[3] = llc_conn_ac_start_rej_timer,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_normal_state_trans_6a = {
+	.ev	       = llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = llc_normal_ev_qfyrs_6a,
+	.ev_actions    = llc_normal_actions_6a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_RSP_Fbit_SET_0_UNEXPD_Ns event */
+static llc_conn_ev_qfyr_t llc_normal_ev_qfyrs_6b[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_1,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_normal_actions_6b[] = {
+	[0] = llc_conn_ac_rst_sendack_flag,
+	[1] = llc_conn_ac_send_rej_xxx_x_set_0,
+	[2] = llc_conn_ac_upd_nr_received,
+	[3] = llc_conn_ac_start_rej_timer,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_normal_state_trans_6b = {
+	.ev	       = llc_conn_ev_rx_i_rsp_fbit_set_0_unexpd_ns,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = llc_normal_ev_qfyrs_6b,
+	.ev_actions    = llc_normal_actions_6b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_CMD_Pbit_SET_1_UNEXPD_Ns event */
+static llc_conn_action_t llc_normal_actions_7[] = {
+	[0] = llc_conn_ac_rst_sendack_flag,
+	[1] = llc_conn_ac_send_rej_rsp_f_set_1,
+	[2] = llc_conn_ac_upd_nr_received,
+	[3] = llc_conn_ac_start_rej_timer,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_normal_state_trans_7 = {
+	.ev	       = llc_conn_ev_rx_i_cmd_pbit_set_1_unexpd_ns,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_normal_actions_7,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_RSP_Fbit_SET_X event */
+static llc_conn_ev_qfyr_t llc_normal_ev_qfyrs_8a[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_f,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_normal_actions_8[] = {
+	[0] = llc_conn_ac_inc_vr_by_1,
+	[1] = llc_conn_ac_data_ind,
+	[2] = llc_conn_ac_upd_p_flag,
+	[3] = llc_conn_ac_upd_nr_received,
+	[4] = llc_conn_ac_clear_remote_busy_if_f_eq_1,
+	[5] = llc_conn_ac_send_ack_if_needed,
+	[6] = NULL,
+};
+
+static struct llc_conn_state_trans llc_normal_state_trans_8a = {
+	.ev	       = llc_conn_ev_rx_i_rsp_fbit_set_x,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = llc_normal_ev_qfyrs_8a,
+	.ev_actions    = llc_normal_actions_8,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_CMD_Pbit_SET_0 event */
+static llc_conn_ev_qfyr_t llc_normal_ev_qfyrs_8b[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_0,
+	[1] = NULL,
+};
+
+static struct llc_conn_state_trans llc_normal_state_trans_8b = {
+	.ev	       = llc_conn_ev_rx_i_cmd_pbit_set_0,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = llc_normal_ev_qfyrs_8b,
+	.ev_actions    = llc_normal_actions_8,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_RSP_Fbit_SET_0 event */
+static llc_conn_ev_qfyr_t llc_normal_ev_qfyrs_9a[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_1,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_normal_actions_9a[] = {
+	[0] = llc_conn_ac_inc_vr_by_1,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_data_ind,
+	[3] = llc_conn_ac_send_ack_if_needed,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_normal_state_trans_9a = {
+	.ev	       = llc_conn_ev_rx_i_rsp_fbit_set_0,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = llc_normal_ev_qfyrs_9a,
+	.ev_actions    = llc_normal_actions_9a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_CMD_Pbit_SET_0 event */
+static llc_conn_ev_qfyr_t llc_normal_ev_qfyrs_9b[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_1,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_normal_actions_9b[] = {
+	[0] = llc_conn_ac_inc_vr_by_1,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_data_ind,
+	[3] = llc_conn_ac_send_ack_if_needed,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_normal_state_trans_9b = {
+	.ev	       = llc_conn_ev_rx_i_cmd_pbit_set_0,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = llc_normal_ev_qfyrs_9b,
+	.ev_actions    = llc_normal_actions_9b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_CMD_Pbit_SET_1 event */
+static llc_conn_action_t llc_normal_actions_10[] = {
+	[0] = llc_conn_ac_inc_vr_by_1,
+	[1] = llc_conn_ac_send_ack_rsp_f_set_1,
+	[2] = llc_conn_ac_rst_sendack_flag,
+	[3] = llc_conn_ac_upd_nr_received,
+	[4] = llc_conn_ac_data_ind,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_normal_state_trans_10 = {
+	.ev	       = llc_conn_ev_rx_i_cmd_pbit_set_1,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_normal_actions_10,
+};
+
+/* State transitions for * LLC_CONN_EV_RX_RR_CMD_Pbit_SET_0 event */
+static llc_conn_action_t llc_normal_actions_11a[] = {
+	[0] = llc_conn_ac_upd_p_flag,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_clear_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_normal_state_trans_11a = {
+	.ev	       = llc_conn_ev_rx_rr_cmd_pbit_set_0,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_normal_actions_11a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RR_RSP_Fbit_SET_0 event */
+static llc_conn_action_t llc_normal_actions_11b[] = {
+	[0] = llc_conn_ac_upd_p_flag,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_clear_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_normal_state_trans_11b = {
+	.ev	       = llc_conn_ev_rx_rr_rsp_fbit_set_0,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_normal_actions_11b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RR_RSP_Fbit_SET_1 event */
+static llc_conn_ev_qfyr_t llc_normal_ev_qfyrs_11c[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_1,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_normal_actions_11c[] = {
+	[0] = llc_conn_ac_upd_p_flag,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_inc_tx_win_size,
+	[3] = llc_conn_ac_clear_remote_busy,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_normal_state_trans_11c = {
+	.ev	       = llc_conn_ev_rx_rr_rsp_fbit_set_1,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = llc_normal_ev_qfyrs_11c,
+	.ev_actions    = llc_normal_actions_11c,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RR_CMD_Pbit_SET_1 event */
+static llc_conn_action_t llc_normal_actions_12[] = {
+	[0] = llc_conn_ac_send_ack_rsp_f_set_1,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_adjust_npta_by_rr,
+	[3] = llc_conn_ac_rst_sendack_flag,
+	[4] = llc_conn_ac_clear_remote_busy,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_normal_state_trans_12 = {
+	.ev	       = llc_conn_ev_rx_rr_cmd_pbit_set_1,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_normal_actions_12,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RNR_CMD_Pbit_SET_0 event */
+static llc_conn_action_t llc_normal_actions_13a[] = {
+	[0] = llc_conn_ac_upd_p_flag,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_set_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_normal_state_trans_13a = {
+	.ev	       = llc_conn_ev_rx_rnr_cmd_pbit_set_0,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_normal_actions_13a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RNR_RSP_Fbit_SET_0 event */
+static llc_conn_action_t llc_normal_actions_13b[] = {
+	[0] = llc_conn_ac_upd_p_flag,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_set_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_normal_state_trans_13b = {
+	.ev	       = llc_conn_ev_rx_rnr_rsp_fbit_set_0,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_normal_actions_13b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RNR_RSP_Fbit_SET_1 event */
+static llc_conn_ev_qfyr_t llc_normal_ev_qfyrs_13c[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_1,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_normal_actions_13c[] = {
+	[0] = llc_conn_ac_upd_p_flag,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_set_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_normal_state_trans_13c = {
+	.ev	       = llc_conn_ev_rx_rnr_rsp_fbit_set_1,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = llc_normal_ev_qfyrs_13c,
+	.ev_actions    = llc_normal_actions_13c,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RNR_CMD_Pbit_SET_1 event */
+static llc_conn_action_t llc_normal_actions_14[] = {
+	[0] = llc_conn_ac_send_rr_rsp_f_set_1,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_adjust_npta_by_rnr,
+	[3] = llc_conn_ac_rst_sendack_flag,
+	[4] = llc_conn_ac_set_remote_busy,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_normal_state_trans_14 = {
+	.ev	       = llc_conn_ev_rx_rnr_cmd_pbit_set_1,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_normal_actions_14,
+};
+
+/* State transitions for LLC_CONN_EV_RX_REJ_CMD_Pbit_SET_0 event */
+static llc_conn_ev_qfyr_t llc_normal_ev_qfyrs_15a[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_0,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_normal_actions_15a[] = {
+	[0] = llc_conn_ac_set_vs_nr,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_upd_p_flag,
+	[3] = llc_conn_ac_dec_tx_win_size,
+	[4] = llc_conn_ac_resend_i_xxx_x_set_0,
+	[5] = llc_conn_ac_clear_remote_busy,
+	[6] = NULL,
+};
+
+static struct llc_conn_state_trans llc_normal_state_trans_15a = {
+	.ev	       = llc_conn_ev_rx_rej_cmd_pbit_set_0,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = llc_normal_ev_qfyrs_15a,
+	.ev_actions    = llc_normal_actions_15a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_REJ_RSP_Fbit_SET_X event */
+static llc_conn_ev_qfyr_t llc_normal_ev_qfyrs_15b[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_f,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_normal_actions_15b[] = {
+	[0] = llc_conn_ac_set_vs_nr,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_upd_p_flag,
+	[3] = llc_conn_ac_dec_tx_win_size,
+	[4] = llc_conn_ac_resend_i_xxx_x_set_0,
+	[5] = llc_conn_ac_clear_remote_busy,
+	[6] = NULL,
+};
+
+static struct llc_conn_state_trans llc_normal_state_trans_15b = {
+	.ev	       = llc_conn_ev_rx_rej_rsp_fbit_set_x,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = llc_normal_ev_qfyrs_15b,
+	.ev_actions    = llc_normal_actions_15b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_REJ_CMD_Pbit_SET_0 event */
+static llc_conn_ev_qfyr_t llc_normal_ev_qfyrs_16a[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_1,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_normal_actions_16a[] = {
+	[0] = llc_conn_ac_set_vs_nr,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_dec_tx_win_size,
+	[3] = llc_conn_ac_resend_i_xxx_x_set_0,
+	[4] = llc_conn_ac_clear_remote_busy,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_normal_state_trans_16a = {
+	.ev	       = llc_conn_ev_rx_rej_cmd_pbit_set_0,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = llc_normal_ev_qfyrs_16a,
+	.ev_actions    = llc_normal_actions_16a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_REJ_RSP_Fbit_SET_0 event */
+static llc_conn_ev_qfyr_t llc_normal_ev_qfyrs_16b[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_1,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_normal_actions_16b[] = {
+	[0] = llc_conn_ac_set_vs_nr,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_dec_tx_win_size,
+	[3] = llc_conn_ac_resend_i_xxx_x_set_0,
+	[4] = llc_conn_ac_clear_remote_busy,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_normal_state_trans_16b = {
+	.ev	       = llc_conn_ev_rx_rej_rsp_fbit_set_0,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = llc_normal_ev_qfyrs_16b,
+	.ev_actions    = llc_normal_actions_16b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_REJ_CMD_Pbit_SET_1 event */
+static llc_conn_action_t llc_normal_actions_17[] = {
+	[0] = llc_conn_ac_set_vs_nr,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_dec_tx_win_size,
+	[3] = llc_conn_ac_resend_i_rsp_f_set_1,
+	[4] = llc_conn_ac_clear_remote_busy,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_normal_state_trans_17 = {
+	.ev	       = llc_conn_ev_rx_rej_cmd_pbit_set_1,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_normal_actions_17,
+};
+
+/* State transitions for LLC_CONN_EV_INIT_P_F_CYCLE event */
+static llc_conn_ev_qfyr_t llc_normal_ev_qfyrs_18[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_0,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_normal_actions_18[] = {
+	[0] = llc_conn_ac_send_rr_cmd_p_set_1,
+	[1] = llc_conn_ac_start_p_timer,
+	[2] = NULL,
+};
+
+static struct llc_conn_state_trans llc_normal_state_trans_18 = {
+	.ev	       = llc_conn_ev_init_p_f_cycle,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = llc_normal_ev_qfyrs_18,
+	.ev_actions    = llc_normal_actions_18,
+};
+
+/* State transitions for LLC_CONN_EV_P_TMR_EXP event */
+static llc_conn_ev_qfyr_t llc_normal_ev_qfyrs_19[] = {
+	[0] = llc_conn_ev_qlfy_retry_cnt_lt_n2,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_normal_actions_19[] = {
+	[0] = llc_conn_ac_rst_sendack_flag,
+	[1] = llc_conn_ac_send_rr_cmd_p_set_1,
+	[2] = llc_conn_ac_rst_vs,
+	[3] = llc_conn_ac_start_p_timer,
+	[4] = llc_conn_ac_inc_retry_cnt_by_1,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_normal_state_trans_19 = {
+	.ev	       = llc_conn_ev_p_tmr_exp,
+	.next_state    = LLC_CONN_STATE_AWAIT,
+	.ev_qualifiers = llc_normal_ev_qfyrs_19,
+	.ev_actions    = llc_normal_actions_19,
+};
+
+/* State transitions for LLC_CONN_EV_ACK_TMR_EXP event */
+static llc_conn_ev_qfyr_t llc_normal_ev_qfyrs_20a[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_0,
+	[1] = llc_conn_ev_qlfy_retry_cnt_lt_n2,
+	[2] = NULL,
+};
+
+static llc_conn_action_t llc_normal_actions_20a[] = {
+	[0] = llc_conn_ac_rst_sendack_flag,
+	[1] = llc_conn_ac_send_rr_cmd_p_set_1,
+	[2] = llc_conn_ac_rst_vs,
+	[3] = llc_conn_ac_start_p_timer,
+	[4] = llc_conn_ac_inc_retry_cnt_by_1,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_normal_state_trans_20a = {
+	.ev	       = llc_conn_ev_ack_tmr_exp,
+	.next_state    = LLC_CONN_STATE_AWAIT,
+	.ev_qualifiers = llc_normal_ev_qfyrs_20a,
+	.ev_actions    = llc_normal_actions_20a,
+};
+
+/* State transitions for LLC_CONN_EV_BUSY_TMR_EXP event */
+static llc_conn_ev_qfyr_t llc_normal_ev_qfyrs_20b[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_0,
+	[1] = llc_conn_ev_qlfy_retry_cnt_lt_n2,
+	[2] = NULL,
+};
+
+static llc_conn_action_t llc_normal_actions_20b[] = {
+	[0] = llc_conn_ac_rst_sendack_flag,
+	[1] = llc_conn_ac_send_rr_cmd_p_set_1,
+	[2] = llc_conn_ac_rst_vs,
+	[3] = llc_conn_ac_start_p_timer,
+	[4] = llc_conn_ac_inc_retry_cnt_by_1,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_normal_state_trans_20b = {
+	.ev	       = llc_conn_ev_busy_tmr_exp,
+	.next_state    = LLC_CONN_STATE_AWAIT,
+	.ev_qualifiers = llc_normal_ev_qfyrs_20b,
+	.ev_actions    = llc_normal_actions_20b,
+};
+
+/* State transitions for LLC_CONN_EV_TX_BUFF_FULL event */
+static llc_conn_ev_qfyr_t llc_normal_ev_qfyrs_21[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_0,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_normal_actions_21[] = {
+	[0] = llc_conn_ac_send_rr_cmd_p_set_1,
+	[1] = llc_conn_ac_start_p_timer,
+	[2] = NULL,
+};
+
+static struct llc_conn_state_trans llc_normal_state_trans_21 = {
+	.ev	       = llc_conn_ev_tx_buffer_full,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = llc_normal_ev_qfyrs_21,
+	.ev_actions    = llc_normal_actions_21,
+};
+
+/*
+ * Array of pointers;
+ * one to each transition
+ */
+static struct llc_conn_state_trans *llc_normal_state_transitions[] = {
+	 [0] = &llc_normal_state_trans_1,	/* Requests */
+	 [1] = &llc_normal_state_trans_2,
+	 [2] = &llc_normal_state_trans_2_1,
+	 [3] = &llc_common_state_trans_1,
+	 [4] = &llc_common_state_trans_2,
+	 [5] = &llc_common_state_trans_end,
+	 [6] = &llc_normal_state_trans_21,
+	 [7] = &llc_normal_state_trans_3,	/* Local busy */
+	 [8] = &llc_normal_state_trans_4,
+	 [9] = &llc_common_state_trans_end,
+	[10] = &llc_normal_state_trans_18,	/* Init pf cycle */
+	[11] = &llc_common_state_trans_end,
+	[12] = &llc_common_state_trans_11a,	/* Timers */
+	[13] = &llc_common_state_trans_11b,
+	[14] = &llc_common_state_trans_11c,
+	[15] = &llc_common_state_trans_11d,
+	[16] = &llc_normal_state_trans_19,
+	[17] = &llc_normal_state_trans_20a,
+	[18] = &llc_normal_state_trans_20b,
+	[19] = &llc_common_state_trans_end,
+	[20] = &llc_normal_state_trans_8b,	/* Receive frames */
+	[21] = &llc_normal_state_trans_9b,
+	[22] = &llc_normal_state_trans_10,
+	[23] = &llc_normal_state_trans_11b,
+	[24] = &llc_normal_state_trans_11c,
+	[25] = &llc_normal_state_trans_5a,
+	[26] = &llc_normal_state_trans_5b,
+	[27] = &llc_normal_state_trans_5c,
+	[28] = &llc_normal_state_trans_6a,
+	[29] = &llc_normal_state_trans_6b,
+	[30] = &llc_normal_state_trans_7,
+	[31] = &llc_normal_state_trans_8a,
+	[32] = &llc_normal_state_trans_9a,
+	[33] = &llc_normal_state_trans_11a,
+	[34] = &llc_normal_state_trans_12,
+	[35] = &llc_normal_state_trans_13a,
+	[36] = &llc_normal_state_trans_13b,
+	[37] = &llc_normal_state_trans_13c,
+	[38] = &llc_normal_state_trans_14,
+	[39] = &llc_normal_state_trans_15a,
+	[40] = &llc_normal_state_trans_15b,
+	[41] = &llc_normal_state_trans_16a,
+	[42] = &llc_normal_state_trans_16b,
+	[43] = &llc_normal_state_trans_17,
+	[44] = &llc_common_state_trans_3,
+	[45] = &llc_common_state_trans_4,
+	[46] = &llc_common_state_trans_5,
+	[47] = &llc_common_state_trans_6,
+	[48] = &llc_common_state_trans_7a,
+	[49] = &llc_common_state_trans_7b,
+	[50] = &llc_common_state_trans_8a,
+	[51] = &llc_common_state_trans_8b,
+	[52] = &llc_common_state_trans_8c,
+	[53] = &llc_common_state_trans_9,
+	/* [54] = &llc_common_state_trans_10, */
+	[54] = &llc_common_state_trans_end,
+};
+
+/* LLC_CONN_STATE_BUSY transitions */
+/* State transitions for LLC_CONN_EV_DATA_REQ event */
+static llc_conn_ev_qfyr_t llc_busy_ev_qfyrs_1[] = {
+	[0] = llc_conn_ev_qlfy_remote_busy_eq_0,
+	[1] = llc_conn_ev_qlfy_p_flag_eq_0,
+	[2] = NULL,
+};
+
+static llc_conn_action_t llc_busy_actions_1[] = {
+	[0] = llc_conn_ac_send_i_xxx_x_set_0,
+	[1] = llc_conn_ac_start_ack_tmr_if_not_running,
+	[2] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_1 = {
+	.ev	       = llc_conn_ev_data_req,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = llc_busy_ev_qfyrs_1,
+	.ev_actions    = llc_busy_actions_1,
+};
+
+/* State transitions for LLC_CONN_EV_DATA_REQ event */
+static llc_conn_ev_qfyr_t llc_busy_ev_qfyrs_2[] = {
+	[0] = llc_conn_ev_qlfy_remote_busy_eq_0,
+	[1] = llc_conn_ev_qlfy_p_flag_eq_1,
+	[2] = NULL,
+};
+
+static llc_conn_action_t llc_busy_actions_2[] = {
+	[0] = llc_conn_ac_send_i_xxx_x_set_0,
+	[1] = llc_conn_ac_start_ack_tmr_if_not_running,
+	[2] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_2 = {
+	.ev	       = llc_conn_ev_data_req,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = llc_busy_ev_qfyrs_2,
+	.ev_actions    = llc_busy_actions_2,
+};
+
+/* State transitions for LLC_CONN_EV_DATA_REQ event */
+static llc_conn_ev_qfyr_t llc_busy_ev_qfyrs_2_1[] = {
+	[0] = llc_conn_ev_qlfy_remote_busy_eq_1,
+	[1] = llc_conn_ev_qlfy_set_status_remote_busy,
+	[2] = NULL,
+};
+
+/* just one member, NULL, .bss zeroes it */
+static llc_conn_action_t llc_busy_actions_2_1[1];
+
+static struct llc_conn_state_trans llc_busy_state_trans_2_1 = {
+	.ev	       = llc_conn_ev_data_req,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = llc_busy_ev_qfyrs_2_1,
+	.ev_actions    = llc_busy_actions_2_1,
+};
+
+/* State transitions for LLC_CONN_EV_LOCAL_BUSY_CLEARED event */
+static llc_conn_ev_qfyr_t llc_busy_ev_qfyrs_3[] = {
+	[0] = llc_conn_ev_qlfy_data_flag_eq_1,
+	[1] = llc_conn_ev_qlfy_p_flag_eq_0,
+	[2] = NULL,
+};
+
+static llc_conn_action_t llc_busy_actions_3[] = {
+	[0] = llc_conn_ac_send_rej_xxx_x_set_0,
+	[1] = llc_conn_ac_start_rej_timer,
+	[2] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_3 = {
+	.ev	       = llc_conn_ev_local_busy_cleared,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = llc_busy_ev_qfyrs_3,
+	.ev_actions    = llc_busy_actions_3,
+};
+
+/* State transitions for LLC_CONN_EV_LOCAL_BUSY_CLEARED event */
+static llc_conn_ev_qfyr_t llc_busy_ev_qfyrs_4[] = {
+	[0] = llc_conn_ev_qlfy_data_flag_eq_1,
+	[1] = llc_conn_ev_qlfy_p_flag_eq_1,
+	[2] = NULL,
+};
+
+static llc_conn_action_t llc_busy_actions_4[] = {
+	[0] = llc_conn_ac_send_rej_xxx_x_set_0,
+	[1] = llc_conn_ac_start_rej_timer,
+	[2] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_4 = {
+	.ev	       = llc_conn_ev_local_busy_cleared,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = llc_busy_ev_qfyrs_4,
+	.ev_actions    = llc_busy_actions_4,
+};
+
+/* State transitions for LLC_CONN_EV_LOCAL_BUSY_CLEARED event */
+static llc_conn_ev_qfyr_t llc_busy_ev_qfyrs_5[] = {
+	[0] = llc_conn_ev_qlfy_data_flag_eq_0,
+	[1] = llc_conn_ev_qlfy_p_flag_eq_0,
+	[2] = NULL,
+};
+
+static llc_conn_action_t llc_busy_actions_5[] = {
+	[0] = llc_conn_ac_send_rr_xxx_x_set_0,
+	[1] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_5 = {
+	.ev	       = llc_conn_ev_local_busy_cleared,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = llc_busy_ev_qfyrs_5,
+	.ev_actions    = llc_busy_actions_5,
+};
+
+/* State transitions for LLC_CONN_EV_LOCAL_BUSY_CLEARED event */
+static llc_conn_ev_qfyr_t llc_busy_ev_qfyrs_6[] = {
+	[0] = llc_conn_ev_qlfy_data_flag_eq_0,
+	[1] = llc_conn_ev_qlfy_p_flag_eq_1,
+	[2] = NULL,
+};
+
+static llc_conn_action_t llc_busy_actions_6[] = {
+	[0] = llc_conn_ac_send_rr_xxx_x_set_0,
+	[1] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_6 = {
+	.ev	       = llc_conn_ev_local_busy_cleared,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = llc_busy_ev_qfyrs_6,
+	.ev_actions    = llc_busy_actions_6,
+};
+
+/* State transitions for LLC_CONN_EV_LOCAL_BUSY_CLEARED event */
+static llc_conn_ev_qfyr_t llc_busy_ev_qfyrs_7[] = {
+	[0] = llc_conn_ev_qlfy_data_flag_eq_2,
+	[1] = llc_conn_ev_qlfy_p_flag_eq_0,
+	[2] = NULL,
+};
+
+static llc_conn_action_t llc_busy_actions_7[] = {
+	[0] = llc_conn_ac_send_rr_xxx_x_set_0,
+	[1] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_7 = {
+	.ev	       = llc_conn_ev_local_busy_cleared,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = llc_busy_ev_qfyrs_7,
+	.ev_actions    = llc_busy_actions_7,
+};
+
+/* State transitions for LLC_CONN_EV_LOCAL_BUSY_CLEARED event */
+static llc_conn_ev_qfyr_t llc_busy_ev_qfyrs_8[] = {
+	[0] = llc_conn_ev_qlfy_data_flag_eq_2,
+	[1] = llc_conn_ev_qlfy_p_flag_eq_1,
+	[2] = NULL,
+};
+
+static llc_conn_action_t llc_busy_actions_8[] = {
+	[0] = llc_conn_ac_send_rr_xxx_x_set_0,
+	[1] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_8 = {
+	.ev	       = llc_conn_ev_local_busy_cleared,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = llc_busy_ev_qfyrs_8,
+	.ev_actions    = llc_busy_actions_8,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_RSP_Fbit_SET_X_UNEXPD_Ns event */
+static llc_conn_ev_qfyr_t llc_busy_ev_qfyrs_9a[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_f,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_busy_actions_9a[] = {
+	[0] = llc_conn_ac_opt_send_rnr_xxx_x_set_0,
+	[1] = llc_conn_ac_upd_p_flag,
+	[2] = llc_conn_ac_upd_nr_received,
+	[3] = llc_conn_ac_set_data_flag_1_if_data_flag_eq_0,
+	[4] = llc_conn_ac_clear_remote_busy_if_f_eq_1,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_9a = {
+	.ev	       = llc_conn_ev_rx_i_rsp_fbit_set_x_unexpd_ns,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = llc_busy_ev_qfyrs_9a,
+	.ev_actions    = llc_busy_actions_9a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_CMD_Pbit_SET_0_UNEXPD_Ns event */
+static llc_conn_ev_qfyr_t llc_busy_ev_qfyrs_9b[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_0,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_busy_actions_9b[] = {
+	[0] = llc_conn_ac_opt_send_rnr_xxx_x_set_0,
+	[1] = llc_conn_ac_upd_p_flag,
+	[2] = llc_conn_ac_upd_nr_received,
+	[3] = llc_conn_ac_set_data_flag_1_if_data_flag_eq_0,
+	[4] = llc_conn_ac_clear_remote_busy_if_f_eq_1,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_9b = {
+	.ev	       = llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = llc_busy_ev_qfyrs_9b,
+	.ev_actions    = llc_busy_actions_9b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_RSP_Fbit_SET_0_UNEXPD_Ns event */
+static llc_conn_ev_qfyr_t llc_busy_ev_qfyrs_10a[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_1,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_busy_actions_10a[] = {
+	[0] = llc_conn_ac_opt_send_rnr_xxx_x_set_0,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_set_data_flag_1_if_data_flag_eq_0,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_10a = {
+	.ev	       = llc_conn_ev_rx_i_rsp_fbit_set_0_unexpd_ns,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = llc_busy_ev_qfyrs_10a,
+	.ev_actions    = llc_busy_actions_10a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_CMD_Pbit_SET_0_UNEXPD_Ns event */
+static llc_conn_ev_qfyr_t llc_busy_ev_qfyrs_10b[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_1,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_busy_actions_10b[] = {
+	[0] = llc_conn_ac_opt_send_rnr_xxx_x_set_0,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_set_data_flag_1_if_data_flag_eq_0,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_10b = {
+	.ev	       = llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = llc_busy_ev_qfyrs_10b,
+	.ev_actions    = llc_busy_actions_10b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_CMD_Pbit_SET_1_UNEXPD_Ns event */
+static llc_conn_action_t llc_busy_actions_11[] = {
+	[0] = llc_conn_ac_send_rnr_rsp_f_set_1,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_set_data_flag_1_if_data_flag_eq_0,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_11 = {
+	.ev	       = llc_conn_ev_rx_i_cmd_pbit_set_1_unexpd_ns,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_busy_actions_11,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_CMD_Pbit_SET_1 event */
+static llc_conn_action_t llc_busy_actions_12[] = {
+	[0] = llc_conn_ac_inc_vr_by_1,
+	[1] = llc_conn_ac_data_ind,
+	[2] = llc_conn_ac_send_rnr_rsp_f_set_1,
+	[3] = llc_conn_ac_upd_nr_received,
+	[4] = llc_conn_ac_stop_rej_tmr_if_data_flag_eq_2,
+	[5] = llc_conn_ac_set_data_flag_0,
+	[6] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_12 = {
+	.ev	       = llc_conn_ev_rx_i_cmd_pbit_set_1,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_busy_actions_12,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_RSP_Fbit_SET_X event */
+static llc_conn_ev_qfyr_t llc_busy_ev_qfyrs_13a[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_f,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_busy_actions_13a[] = {
+	[0] = llc_conn_ac_inc_vr_by_1,
+	[1] = llc_conn_ac_data_ind,
+	[2] = llc_conn_ac_upd_p_flag,
+	[3] = llc_conn_ac_opt_send_rnr_xxx_x_set_0,
+	[4] = llc_conn_ac_upd_nr_received,
+	[5] = llc_conn_ac_stop_rej_tmr_if_data_flag_eq_2,
+	[6] = llc_conn_ac_set_data_flag_0,
+	[7] = llc_conn_ac_clear_remote_busy_if_f_eq_1,
+	[8] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_13a = {
+	.ev	       = llc_conn_ev_rx_i_rsp_fbit_set_x,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = llc_busy_ev_qfyrs_13a,
+	.ev_actions    = llc_busy_actions_13a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_CMD_Pbit_SET_0 event */
+static llc_conn_ev_qfyr_t llc_busy_ev_qfyrs_13b[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_0,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_busy_actions_13b[] = {
+	[0] = llc_conn_ac_inc_vr_by_1,
+	[1] = llc_conn_ac_data_ind,
+	[2] = llc_conn_ac_upd_p_flag,
+	[3] = llc_conn_ac_opt_send_rnr_xxx_x_set_0,
+	[4] = llc_conn_ac_upd_nr_received,
+	[5] = llc_conn_ac_stop_rej_tmr_if_data_flag_eq_2,
+	[6] = llc_conn_ac_set_data_flag_0,
+	[7] = llc_conn_ac_clear_remote_busy_if_f_eq_1,
+	[8] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_13b = {
+	.ev	       = llc_conn_ev_rx_i_cmd_pbit_set_0,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = llc_busy_ev_qfyrs_13b,
+	.ev_actions    = llc_busy_actions_13b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_RSP_Fbit_SET_0 event */
+static llc_conn_ev_qfyr_t llc_busy_ev_qfyrs_14a[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_1,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_busy_actions_14a[] = {
+	[0] = llc_conn_ac_inc_vr_by_1,
+	[1] = llc_conn_ac_data_ind,
+	[2] = llc_conn_ac_opt_send_rnr_xxx_x_set_0,
+	[3] = llc_conn_ac_upd_nr_received,
+	[4] = llc_conn_ac_stop_rej_tmr_if_data_flag_eq_2,
+	[5] = llc_conn_ac_set_data_flag_0,
+	[6] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_14a = {
+	.ev	       = llc_conn_ev_rx_i_rsp_fbit_set_0,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = llc_busy_ev_qfyrs_14a,
+	.ev_actions    = llc_busy_actions_14a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_CMD_Pbit_SET_0 event */
+static llc_conn_ev_qfyr_t llc_busy_ev_qfyrs_14b[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_1,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_busy_actions_14b[] = {
+	[0] = llc_conn_ac_inc_vr_by_1,
+	[1] = llc_conn_ac_data_ind,
+	[2] = llc_conn_ac_opt_send_rnr_xxx_x_set_0,
+	[3] = llc_conn_ac_upd_nr_received,
+	[4] = llc_conn_ac_stop_rej_tmr_if_data_flag_eq_2,
+	[5] = llc_conn_ac_set_data_flag_0,
+	[6] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_14b = {
+	.ev	       = llc_conn_ev_rx_i_cmd_pbit_set_0,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = llc_busy_ev_qfyrs_14b,
+	.ev_actions    = llc_busy_actions_14b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RR_CMD_Pbit_SET_0 event */
+static llc_conn_action_t llc_busy_actions_15a[] = {
+	[0] = llc_conn_ac_upd_p_flag,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_clear_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_15a = {
+	.ev	       = llc_conn_ev_rx_rr_cmd_pbit_set_0,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_busy_actions_15a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RR_RSP_Fbit_SET_0 event */
+static llc_conn_action_t llc_busy_actions_15b[] = {
+	[0] = llc_conn_ac_upd_p_flag,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_clear_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_15b = {
+	.ev	       = llc_conn_ev_rx_rr_rsp_fbit_set_0,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_busy_actions_15b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RR_RSP_Fbit_SET_1 event */
+static llc_conn_ev_qfyr_t llc_busy_ev_qfyrs_15c[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_1,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_busy_actions_15c[] = {
+	[0] = llc_conn_ac_upd_p_flag,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_clear_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_15c = {
+	.ev	       = llc_conn_ev_rx_rr_rsp_fbit_set_1,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = llc_busy_ev_qfyrs_15c,
+	.ev_actions    = llc_busy_actions_15c,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RR_CMD_Pbit_SET_1 event */
+static llc_conn_action_t llc_busy_actions_16[] = {
+	[0] = llc_conn_ac_send_rnr_rsp_f_set_1,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_clear_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_16 = {
+	.ev	       = llc_conn_ev_rx_rr_cmd_pbit_set_1,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_busy_actions_16,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RNR_CMD_Pbit_SET_0 event */
+static llc_conn_action_t llc_busy_actions_17a[] = {
+	[0] = llc_conn_ac_upd_p_flag,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_set_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_17a = {
+	.ev	       = llc_conn_ev_rx_rnr_cmd_pbit_set_0,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_busy_actions_17a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RNR_RSP_Fbit_SET_0 event */
+static llc_conn_action_t llc_busy_actions_17b[] = {
+	[0] = llc_conn_ac_upd_p_flag,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_set_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_17b = {
+	.ev	       = llc_conn_ev_rx_rnr_rsp_fbit_set_0,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_busy_actions_17b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RNR_RSP_Fbit_SET_1 event */
+static llc_conn_ev_qfyr_t llc_busy_ev_qfyrs_17c[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_1,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_busy_actions_17c[] = {
+	[0] = llc_conn_ac_upd_p_flag,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_set_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_17c = {
+	.ev	       = llc_conn_ev_rx_rnr_rsp_fbit_set_1,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = llc_busy_ev_qfyrs_17c,
+	.ev_actions    = llc_busy_actions_17c,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RNR_CMD_Pbit_SET_1 event */
+static llc_conn_action_t llc_busy_actions_18[] = {
+	[0] = llc_conn_ac_send_rnr_rsp_f_set_1,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_set_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_18 = {
+	.ev	       = llc_conn_ev_rx_rnr_cmd_pbit_set_1,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_busy_actions_18,
+};
+
+/* State transitions for LLC_CONN_EV_RX_REJ_CMD_Pbit_SET_0 event */
+static llc_conn_ev_qfyr_t llc_busy_ev_qfyrs_19a[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_0,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_busy_actions_19a[] = {
+	[0] = llc_conn_ac_set_vs_nr,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_upd_p_flag,
+	[3] = llc_conn_ac_resend_i_xxx_x_set_0,
+	[4] = llc_conn_ac_clear_remote_busy,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_19a = {
+	.ev	       = llc_conn_ev_rx_rej_cmd_pbit_set_0,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = llc_busy_ev_qfyrs_19a,
+	.ev_actions    = llc_busy_actions_19a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_REJ_RSP_Fbit_SET_X event */
+static llc_conn_ev_qfyr_t llc_busy_ev_qfyrs_19b[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_f,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_busy_actions_19b[] = {
+	[0] = llc_conn_ac_set_vs_nr,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_upd_p_flag,
+	[3] = llc_conn_ac_resend_i_xxx_x_set_0,
+	[4] = llc_conn_ac_clear_remote_busy,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_19b = {
+	.ev	       = llc_conn_ev_rx_rej_rsp_fbit_set_x,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = llc_busy_ev_qfyrs_19b,
+	.ev_actions    = llc_busy_actions_19b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_REJ_CMD_Pbit_SET_0 event */
+static llc_conn_ev_qfyr_t llc_busy_ev_qfyrs_20a[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_1,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_busy_actions_20a[] = {
+	[0] = llc_conn_ac_set_vs_nr,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_resend_i_xxx_x_set_0,
+	[3] = llc_conn_ac_clear_remote_busy,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_20a = {
+	.ev	       = llc_conn_ev_rx_rej_cmd_pbit_set_0,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = llc_busy_ev_qfyrs_20a,
+	.ev_actions    = llc_busy_actions_20a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_REJ_RSP_Fbit_SET_0 event */
+static llc_conn_ev_qfyr_t llc_busy_ev_qfyrs_20b[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_1,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_busy_actions_20b[] = {
+	[0] = llc_conn_ac_set_vs_nr,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_resend_i_xxx_x_set_0,
+	[3] = llc_conn_ac_clear_remote_busy,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_20b = {
+	.ev	       = llc_conn_ev_rx_rej_rsp_fbit_set_0,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = llc_busy_ev_qfyrs_20b,
+	.ev_actions    = llc_busy_actions_20b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_REJ_CMD_Pbit_SET_1 event */
+static llc_conn_action_t llc_busy_actions_21[] = {
+	[0] = llc_conn_ac_set_vs_nr,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_send_rnr_rsp_f_set_1,
+	[3] = llc_conn_ac_resend_i_xxx_x_set_0,
+	[4] = llc_conn_ac_clear_remote_busy,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_21 = {
+	.ev	       = llc_conn_ev_rx_rej_cmd_pbit_set_1,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_busy_actions_21,
+};
+
+/* State transitions for LLC_CONN_EV_INIT_P_F_CYCLE event */
+static llc_conn_ev_qfyr_t llc_busy_ev_qfyrs_22[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_0,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_busy_actions_22[] = {
+	[0] = llc_conn_ac_send_rnr_cmd_p_set_1,
+	[1] = llc_conn_ac_start_p_timer,
+	[2] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_22 = {
+	.ev	       = llc_conn_ev_init_p_f_cycle,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = llc_busy_ev_qfyrs_22,
+	.ev_actions    = llc_busy_actions_22,
+};
+
+/* State transitions for LLC_CONN_EV_P_TMR_EXP event */
+static llc_conn_ev_qfyr_t llc_busy_ev_qfyrs_23[] = {
+	[0] = llc_conn_ev_qlfy_retry_cnt_lt_n2,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_busy_actions_23[] = {
+	[0] = llc_conn_ac_send_rnr_cmd_p_set_1,
+	[1] = llc_conn_ac_rst_vs,
+	[2] = llc_conn_ac_start_p_timer,
+	[3] = llc_conn_ac_inc_retry_cnt_by_1,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_23 = {
+	.ev	       = llc_conn_ev_p_tmr_exp,
+	.next_state    = LLC_CONN_STATE_AWAIT_BUSY,
+	.ev_qualifiers = llc_busy_ev_qfyrs_23,
+	.ev_actions    = llc_busy_actions_23,
+};
+
+/* State transitions for LLC_CONN_EV_ACK_TMR_EXP event */
+static llc_conn_ev_qfyr_t llc_busy_ev_qfyrs_24a[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_0,
+	[1] = llc_conn_ev_qlfy_retry_cnt_lt_n2,
+	[2] = NULL,
+};
+
+static llc_conn_action_t llc_busy_actions_24a[] = {
+	[0] = llc_conn_ac_send_rnr_cmd_p_set_1,
+	[1] = llc_conn_ac_start_p_timer,
+	[2] = llc_conn_ac_inc_retry_cnt_by_1,
+	[3] = llc_conn_ac_rst_vs,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_24a = {
+	.ev	       = llc_conn_ev_ack_tmr_exp,
+	.next_state    = LLC_CONN_STATE_AWAIT_BUSY,
+	.ev_qualifiers = llc_busy_ev_qfyrs_24a,
+	.ev_actions    = llc_busy_actions_24a,
+};
+
+/* State transitions for LLC_CONN_EV_BUSY_TMR_EXP event */
+static llc_conn_ev_qfyr_t llc_busy_ev_qfyrs_24b[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_0,
+	[1] = llc_conn_ev_qlfy_retry_cnt_lt_n2,
+	[2] = NULL,
+};
+
+static llc_conn_action_t llc_busy_actions_24b[] = {
+	[0] = llc_conn_ac_send_rnr_cmd_p_set_1,
+	[1] = llc_conn_ac_start_p_timer,
+	[2] = llc_conn_ac_inc_retry_cnt_by_1,
+	[3] = llc_conn_ac_rst_vs,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_24b = {
+	.ev	       = llc_conn_ev_busy_tmr_exp,
+	.next_state    = LLC_CONN_STATE_AWAIT_BUSY,
+	.ev_qualifiers = llc_busy_ev_qfyrs_24b,
+	.ev_actions    = llc_busy_actions_24b,
+};
+
+/* State transitions for LLC_CONN_EV_REJ_TMR_EXP event */
+static llc_conn_ev_qfyr_t llc_busy_ev_qfyrs_25[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_0,
+	[1] = llc_conn_ev_qlfy_retry_cnt_lt_n2,
+	[2] = NULL,
+};
+
+static llc_conn_action_t llc_busy_actions_25[] = {
+	[0] = llc_conn_ac_send_rnr_cmd_p_set_1,
+	[1] = llc_conn_ac_start_p_timer,
+	[2] = llc_conn_ac_inc_retry_cnt_by_1,
+	[3] = llc_conn_ac_rst_vs,
+	[4] = llc_conn_ac_set_data_flag_1,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_25 = {
+	.ev	       = llc_conn_ev_rej_tmr_exp,
+	.next_state    = LLC_CONN_STATE_AWAIT_BUSY,
+	.ev_qualifiers = llc_busy_ev_qfyrs_25,
+	.ev_actions    = llc_busy_actions_25,
+};
+
+/* State transitions for LLC_CONN_EV_REJ_TMR_EXP event */
+static llc_conn_ev_qfyr_t llc_busy_ev_qfyrs_26[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_1,
+	[1] = llc_conn_ev_qlfy_retry_cnt_lt_n2,
+	[2] = NULL,
+};
+
+static llc_conn_action_t llc_busy_actions_26[] = {
+	[0] = llc_conn_ac_set_data_flag_1,
+	[1] = NULL,
+};
+
+static struct llc_conn_state_trans llc_busy_state_trans_26 = {
+	.ev	       = llc_conn_ev_rej_tmr_exp,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = llc_busy_ev_qfyrs_26,
+	.ev_actions    = llc_busy_actions_26,
+};
+
+/*
+ * Array of pointers;
+ * one to each transition
+ */
+static struct llc_conn_state_trans *llc_busy_state_transitions[] = {
+	 [0] = &llc_common_state_trans_1,	/* Request */
+	 [1] = &llc_common_state_trans_2,
+	 [2] = &llc_busy_state_trans_1,
+	 [3] = &llc_busy_state_trans_2,
+	 [4] = &llc_busy_state_trans_2_1,
+	 [5] = &llc_common_state_trans_end,
+	 [6] = &llc_busy_state_trans_3,		/* Local busy */
+	 [7] = &llc_busy_state_trans_4,
+	 [8] = &llc_busy_state_trans_5,
+	 [9] = &llc_busy_state_trans_6,
+	[10] = &llc_busy_state_trans_7,
+	[11] = &llc_busy_state_trans_8,
+	[12] = &llc_common_state_trans_end,
+	[13] = &llc_busy_state_trans_22,	/* Initiate PF cycle */
+	[14] = &llc_common_state_trans_end,
+	[15] = &llc_common_state_trans_11a,	/* Timer */
+	[16] = &llc_common_state_trans_11b,
+	[17] = &llc_common_state_trans_11c,
+	[18] = &llc_common_state_trans_11d,
+	[19] = &llc_busy_state_trans_23,
+	[20] = &llc_busy_state_trans_24a,
+	[21] = &llc_busy_state_trans_24b,
+	[22] = &llc_busy_state_trans_25,
+	[23] = &llc_busy_state_trans_26,
+	[24] = &llc_common_state_trans_end,
+	[25] = &llc_busy_state_trans_9a,	/* Receive frame */
+	[26] = &llc_busy_state_trans_9b,
+	[27] = &llc_busy_state_trans_10a,
+	[28] = &llc_busy_state_trans_10b,
+	[29] = &llc_busy_state_trans_11,
+	[30] = &llc_busy_state_trans_12,
+	[31] = &llc_busy_state_trans_13a,
+	[32] = &llc_busy_state_trans_13b,
+	[33] = &llc_busy_state_trans_14a,
+	[34] = &llc_busy_state_trans_14b,
+	[35] = &llc_busy_state_trans_15a,
+	[36] = &llc_busy_state_trans_15b,
+	[37] = &llc_busy_state_trans_15c,
+	[38] = &llc_busy_state_trans_16,
+	[39] = &llc_busy_state_trans_17a,
+	[40] = &llc_busy_state_trans_17b,
+	[41] = &llc_busy_state_trans_17c,
+	[42] = &llc_busy_state_trans_18,
+	[43] = &llc_busy_state_trans_19a,
+	[44] = &llc_busy_state_trans_19b,
+	[45] = &llc_busy_state_trans_20a,
+	[46] = &llc_busy_state_trans_20b,
+	[47] = &llc_busy_state_trans_21,
+	[48] = &llc_common_state_trans_3,
+	[49] = &llc_common_state_trans_4,
+	[50] = &llc_common_state_trans_5,
+	[51] = &llc_common_state_trans_6,
+	[52] = &llc_common_state_trans_7a,
+	[53] = &llc_common_state_trans_7b,
+	[54] = &llc_common_state_trans_8a,
+	[55] = &llc_common_state_trans_8b,
+	[56] = &llc_common_state_trans_8c,
+	[57] = &llc_common_state_trans_9,
+	/* [58] = &llc_common_state_trans_10, */
+	[58] = &llc_common_state_trans_end,
+};
+
+/* LLC_CONN_STATE_REJ transitions */
+/* State transitions for LLC_CONN_EV_DATA_REQ event */
+static llc_conn_ev_qfyr_t llc_reject_ev_qfyrs_1[] = {
+	[0] = llc_conn_ev_qlfy_remote_busy_eq_0,
+	[1] = llc_conn_ev_qlfy_p_flag_eq_0,
+	[2] = NULL,
+};
+
+static llc_conn_action_t llc_reject_actions_1[] = {
+	[0] = llc_conn_ac_send_i_xxx_x_set_0,
+	[1] = NULL,
+};
+
+static struct llc_conn_state_trans llc_reject_state_trans_1 = {
+	.ev	       = llc_conn_ev_data_req,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = llc_reject_ev_qfyrs_1,
+	.ev_actions    = llc_reject_actions_1,
+};
+
+/* State transitions for LLC_CONN_EV_DATA_REQ event */
+static llc_conn_ev_qfyr_t llc_reject_ev_qfyrs_2[] = {
+	[0] = llc_conn_ev_qlfy_remote_busy_eq_0,
+	[1] = llc_conn_ev_qlfy_p_flag_eq_1,
+	[2] = NULL,
+};
+
+static llc_conn_action_t llc_reject_actions_2[] = {
+	[0] = llc_conn_ac_send_i_xxx_x_set_0,
+	[1] = NULL,
+};
+
+static struct llc_conn_state_trans llc_reject_state_trans_2 = {
+	.ev	       = llc_conn_ev_data_req,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = llc_reject_ev_qfyrs_2,
+	.ev_actions    = llc_reject_actions_2,
+};
+
+/* State transitions for LLC_CONN_EV_DATA_REQ event */
+static llc_conn_ev_qfyr_t llc_reject_ev_qfyrs_2_1[] = {
+	[0] = llc_conn_ev_qlfy_remote_busy_eq_1,
+	[1] = llc_conn_ev_qlfy_set_status_remote_busy,
+	[2] = NULL,
+};
+
+/* just one member, NULL, .bss zeroes it */
+static llc_conn_action_t llc_reject_actions_2_1[1];
+
+static struct llc_conn_state_trans llc_reject_state_trans_2_1 = {
+	.ev	       = llc_conn_ev_data_req,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = llc_reject_ev_qfyrs_2_1,
+	.ev_actions    = llc_reject_actions_2_1,
+};
+
+
+/* State transitions for LLC_CONN_EV_LOCAL_BUSY_DETECTED event */
+static llc_conn_ev_qfyr_t llc_reject_ev_qfyrs_3[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_0,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_reject_actions_3[] = {
+	[0] = llc_conn_ac_send_rnr_xxx_x_set_0,
+	[1] = llc_conn_ac_set_data_flag_2,
+	[2] = NULL,
+};
+
+static struct llc_conn_state_trans llc_reject_state_trans_3 = {
+	.ev	       = llc_conn_ev_local_busy_detected,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = llc_reject_ev_qfyrs_3,
+	.ev_actions    = llc_reject_actions_3,
+};
+
+/* State transitions for LLC_CONN_EV_LOCAL_BUSY_DETECTED event */
+static llc_conn_ev_qfyr_t llc_reject_ev_qfyrs_4[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_1,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_reject_actions_4[] = {
+	[0] = llc_conn_ac_send_rnr_xxx_x_set_0,
+	[1] = llc_conn_ac_set_data_flag_2,
+	[2] = NULL,
+};
+
+static struct llc_conn_state_trans llc_reject_state_trans_4 = {
+	.ev	       = llc_conn_ev_local_busy_detected,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = llc_reject_ev_qfyrs_4,
+	.ev_actions    = llc_reject_actions_4,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_CMD_Pbit_SET_0_UNEXPD_Ns event */
+static llc_conn_action_t llc_reject_actions_5a[] = {
+	[0] = llc_conn_ac_upd_nr_received,
+	[1] = llc_conn_ac_upd_p_flag,
+	[2] = llc_conn_ac_clear_remote_busy_if_f_eq_1,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_reject_state_trans_5a = {
+	.ev	       = llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_reject_actions_5a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_RSP_Fbit_SET_0_UNEXPD_Ns event */
+static llc_conn_action_t llc_reject_actions_5b[] = {
+	[0] = llc_conn_ac_upd_nr_received,
+	[1] = llc_conn_ac_upd_p_flag,
+	[2] = llc_conn_ac_clear_remote_busy_if_f_eq_1,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_reject_state_trans_5b = {
+	.ev	       = llc_conn_ev_rx_i_rsp_fbit_set_0_unexpd_ns,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_reject_actions_5b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_RSP_Fbit_SET_1_UNEXPD_Ns event */
+static llc_conn_ev_qfyr_t llc_reject_ev_qfyrs_5c[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_1,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_reject_actions_5c[] = {
+	[0] = llc_conn_ac_upd_nr_received,
+	[1] = llc_conn_ac_upd_p_flag,
+	[2] = llc_conn_ac_clear_remote_busy_if_f_eq_1,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_reject_state_trans_5c = {
+	.ev	       = llc_conn_ev_rx_i_rsp_fbit_set_1_unexpd_ns,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = llc_reject_ev_qfyrs_5c,
+	.ev_actions    = llc_reject_actions_5c,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_CMD_Pbit_SET_1_UNEXPD_Ns event */
+static llc_conn_action_t llc_reject_actions_6[] = {
+	[0] = llc_conn_ac_send_rr_rsp_f_set_1,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = NULL,
+};
+
+static struct llc_conn_state_trans llc_reject_state_trans_6 = {
+	.ev	       = llc_conn_ev_rx_i_cmd_pbit_set_1_unexpd_ns,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_reject_actions_6,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_RSP_Fbit_SET_X event */
+static llc_conn_ev_qfyr_t llc_reject_ev_qfyrs_7a[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_f,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_reject_actions_7a[] = {
+	[0] = llc_conn_ac_inc_vr_by_1,
+	[1] = llc_conn_ac_data_ind,
+	[2] = llc_conn_ac_upd_p_flag,
+	[3] = llc_conn_ac_send_ack_xxx_x_set_0,
+	[4] = llc_conn_ac_upd_nr_received,
+	[5] = llc_conn_ac_clear_remote_busy_if_f_eq_1,
+	[6] = llc_conn_ac_stop_rej_timer,
+	[7] = NULL,
+
+};
+
+static struct llc_conn_state_trans llc_reject_state_trans_7a = {
+	.ev	       = llc_conn_ev_rx_i_rsp_fbit_set_x,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = llc_reject_ev_qfyrs_7a,
+	.ev_actions    = llc_reject_actions_7a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_CMD_Pbit_SET_0 event */
+static llc_conn_ev_qfyr_t llc_reject_ev_qfyrs_7b[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_0,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_reject_actions_7b[] = {
+	[0] = llc_conn_ac_inc_vr_by_1,
+	[1] = llc_conn_ac_data_ind,
+	[2] = llc_conn_ac_upd_p_flag,
+	[3] = llc_conn_ac_send_ack_xxx_x_set_0,
+	[4] = llc_conn_ac_upd_nr_received,
+	[5] = llc_conn_ac_clear_remote_busy_if_f_eq_1,
+	[6] = llc_conn_ac_stop_rej_timer,
+	[7] = NULL,
+};
+
+static struct llc_conn_state_trans llc_reject_state_trans_7b = {
+	.ev	       = llc_conn_ev_rx_i_cmd_pbit_set_0,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = llc_reject_ev_qfyrs_7b,
+	.ev_actions    = llc_reject_actions_7b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_RSP_Fbit_SET_0 event */
+static llc_conn_ev_qfyr_t llc_reject_ev_qfyrs_8a[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_1,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_reject_actions_8a[] = {
+	[0] = llc_conn_ac_inc_vr_by_1,
+	[1] = llc_conn_ac_data_ind,
+	[2] = llc_conn_ac_send_ack_xxx_x_set_0,
+	[3] = llc_conn_ac_upd_nr_received,
+	[4] = llc_conn_ac_stop_rej_timer,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_reject_state_trans_8a = {
+	.ev	       = llc_conn_ev_rx_i_rsp_fbit_set_0,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = llc_reject_ev_qfyrs_8a,
+	.ev_actions    = llc_reject_actions_8a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_CMD_Pbit_SET_0 event */
+static llc_conn_ev_qfyr_t llc_reject_ev_qfyrs_8b[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_1,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_reject_actions_8b[] = {
+	[0] = llc_conn_ac_inc_vr_by_1,
+	[1] = llc_conn_ac_data_ind,
+	[2] = llc_conn_ac_send_ack_xxx_x_set_0,
+	[3] = llc_conn_ac_upd_nr_received,
+	[4] = llc_conn_ac_stop_rej_timer,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_reject_state_trans_8b = {
+	.ev	       = llc_conn_ev_rx_i_cmd_pbit_set_0,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = llc_reject_ev_qfyrs_8b,
+	.ev_actions    = llc_reject_actions_8b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_CMD_Pbit_SET_1 event */
+static llc_conn_action_t llc_reject_actions_9[] = {
+	[0] = llc_conn_ac_inc_vr_by_1,
+	[1] = llc_conn_ac_data_ind,
+	[2] = llc_conn_ac_send_ack_rsp_f_set_1,
+	[3] = llc_conn_ac_upd_nr_received,
+	[4] = llc_conn_ac_stop_rej_timer,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_reject_state_trans_9 = {
+	.ev	       = llc_conn_ev_rx_i_cmd_pbit_set_1,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_reject_actions_9,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RR_CMD_Pbit_SET_0 event */
+static llc_conn_action_t llc_reject_actions_10a[] = {
+	[0] = llc_conn_ac_upd_p_flag,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_clear_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_reject_state_trans_10a = {
+	.ev	       = llc_conn_ev_rx_rr_cmd_pbit_set_0,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_reject_actions_10a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RR_RSP_Fbit_SET_0 event */
+static llc_conn_action_t llc_reject_actions_10b[] = {
+	[0] = llc_conn_ac_upd_p_flag,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_clear_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_reject_state_trans_10b = {
+	.ev	       = llc_conn_ev_rx_rr_rsp_fbit_set_0,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_reject_actions_10b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RR_RSP_Fbit_SET_1 event */
+static llc_conn_ev_qfyr_t llc_reject_ev_qfyrs_10c[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_1,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_reject_actions_10c[] = {
+	[0] = llc_conn_ac_upd_p_flag,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_clear_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_reject_state_trans_10c = {
+	.ev	       = llc_conn_ev_rx_rr_rsp_fbit_set_1,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = llc_reject_ev_qfyrs_10c,
+	.ev_actions    = llc_reject_actions_10c,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RR_CMD_Pbit_SET_1 event */
+static llc_conn_action_t llc_reject_actions_11[] = {
+	[0] = llc_conn_ac_send_ack_rsp_f_set_1,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_clear_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_reject_state_trans_11 = {
+	.ev	       = llc_conn_ev_rx_rr_cmd_pbit_set_1,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_reject_actions_11,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RNR_CMD_Pbit_SET_0 event */
+static llc_conn_action_t llc_reject_actions_12a[] = {
+	[0] = llc_conn_ac_upd_p_flag,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_set_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_reject_state_trans_12a = {
+	.ev	       = llc_conn_ev_rx_rnr_cmd_pbit_set_0,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_reject_actions_12a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RNR_RSP_Fbit_SET_0 event */
+static llc_conn_action_t llc_reject_actions_12b[] = {
+	[0] = llc_conn_ac_upd_p_flag,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_set_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_reject_state_trans_12b = {
+	.ev	       = llc_conn_ev_rx_rnr_rsp_fbit_set_0,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_reject_actions_12b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RNR_RSP_Fbit_SET_1 event */
+static llc_conn_ev_qfyr_t llc_reject_ev_qfyrs_12c[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_1,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_reject_actions_12c[] = {
+	[0] = llc_conn_ac_upd_p_flag,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_set_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_reject_state_trans_12c = {
+	.ev	       = llc_conn_ev_rx_rnr_rsp_fbit_set_1,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = llc_reject_ev_qfyrs_12c,
+	.ev_actions    = llc_reject_actions_12c,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RNR_CMD_Pbit_SET_1 event */
+static llc_conn_action_t llc_reject_actions_13[] = {
+	[0] = llc_conn_ac_send_rr_rsp_f_set_1,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_set_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_reject_state_trans_13 = {
+	.ev	       = llc_conn_ev_rx_rnr_cmd_pbit_set_1,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_reject_actions_13,
+};
+
+/* State transitions for LLC_CONN_EV_RX_REJ_CMD_Pbit_SET_0 event */
+static llc_conn_ev_qfyr_t llc_reject_ev_qfyrs_14a[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_0,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_reject_actions_14a[] = {
+	[0] = llc_conn_ac_set_vs_nr,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_upd_p_flag,
+	[3] = llc_conn_ac_resend_i_xxx_x_set_0,
+	[4] = llc_conn_ac_clear_remote_busy,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_reject_state_trans_14a = {
+	.ev	       = llc_conn_ev_rx_rej_cmd_pbit_set_0,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = llc_reject_ev_qfyrs_14a,
+	.ev_actions    = llc_reject_actions_14a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_REJ_RSP_Fbit_SET_X event */
+static llc_conn_ev_qfyr_t llc_reject_ev_qfyrs_14b[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_f,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_reject_actions_14b[] = {
+	[0] = llc_conn_ac_set_vs_nr,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_upd_p_flag,
+	[3] = llc_conn_ac_resend_i_xxx_x_set_0,
+	[4] = llc_conn_ac_clear_remote_busy,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_reject_state_trans_14b = {
+	.ev	       = llc_conn_ev_rx_rej_rsp_fbit_set_x,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = llc_reject_ev_qfyrs_14b,
+	.ev_actions    = llc_reject_actions_14b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_REJ_CMD_Pbit_SET_0 event */
+static llc_conn_ev_qfyr_t llc_reject_ev_qfyrs_15a[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_1,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_reject_actions_15a[] = {
+	[0] = llc_conn_ac_set_vs_nr,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_resend_i_xxx_x_set_0,
+	[3] = llc_conn_ac_clear_remote_busy,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_reject_state_trans_15a = {
+	.ev	       = llc_conn_ev_rx_rej_cmd_pbit_set_0,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = llc_reject_ev_qfyrs_15a,
+	.ev_actions    = llc_reject_actions_15a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_REJ_RSP_Fbit_SET_0 event */
+static llc_conn_ev_qfyr_t llc_reject_ev_qfyrs_15b[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_1,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_reject_actions_15b[] = {
+	[0] = llc_conn_ac_set_vs_nr,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_resend_i_xxx_x_set_0,
+	[3] = llc_conn_ac_clear_remote_busy,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_reject_state_trans_15b = {
+	.ev	       = llc_conn_ev_rx_rej_rsp_fbit_set_0,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = llc_reject_ev_qfyrs_15b,
+	.ev_actions    = llc_reject_actions_15b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_REJ_CMD_Pbit_SET_1 event */
+static llc_conn_action_t llc_reject_actions_16[] = {
+	[0] = llc_conn_ac_set_vs_nr,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_resend_i_rsp_f_set_1,
+	[3] = llc_conn_ac_clear_remote_busy,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_reject_state_trans_16 = {
+	.ev	       = llc_conn_ev_rx_rej_cmd_pbit_set_1,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_reject_actions_16,
+};
+
+/* State transitions for LLC_CONN_EV_INIT_P_F_CYCLE event */
+static llc_conn_ev_qfyr_t llc_reject_ev_qfyrs_17[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_0,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_reject_actions_17[] = {
+	[0] = llc_conn_ac_send_rr_cmd_p_set_1,
+	[1] = llc_conn_ac_start_p_timer,
+	[2] = NULL,
+};
+
+static struct llc_conn_state_trans llc_reject_state_trans_17 = {
+	.ev	       = llc_conn_ev_init_p_f_cycle,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = llc_reject_ev_qfyrs_17,
+	.ev_actions    = llc_reject_actions_17,
+};
+
+/* State transitions for LLC_CONN_EV_REJ_TMR_EXP event */
+static llc_conn_ev_qfyr_t llc_reject_ev_qfyrs_18[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_0,
+	[1] = llc_conn_ev_qlfy_retry_cnt_lt_n2,
+	[2] = NULL,
+};
+
+static llc_conn_action_t llc_reject_actions_18[] = {
+	[0] = llc_conn_ac_send_rej_cmd_p_set_1,
+	[1] = llc_conn_ac_start_p_timer,
+	[2] = llc_conn_ac_start_rej_timer,
+	[3] = llc_conn_ac_inc_retry_cnt_by_1,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_reject_state_trans_18 = {
+	.ev	       = llc_conn_ev_rej_tmr_exp,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = llc_reject_ev_qfyrs_18,
+	.ev_actions    = llc_reject_actions_18,
+};
+
+/* State transitions for LLC_CONN_EV_P_TMR_EXP event */
+static llc_conn_ev_qfyr_t llc_reject_ev_qfyrs_19[] = {
+	[0] = llc_conn_ev_qlfy_retry_cnt_lt_n2,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_reject_actions_19[] = {
+	[0] = llc_conn_ac_send_rr_cmd_p_set_1,
+	[1] = llc_conn_ac_start_p_timer,
+	[2] = llc_conn_ac_start_rej_timer,
+	[3] = llc_conn_ac_inc_retry_cnt_by_1,
+	[4] = llc_conn_ac_rst_vs,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_reject_state_trans_19 = {
+	.ev	       = llc_conn_ev_p_tmr_exp,
+	.next_state    = LLC_CONN_STATE_AWAIT_REJ,
+	.ev_qualifiers = llc_reject_ev_qfyrs_19,
+	.ev_actions    = llc_reject_actions_19,
+};
+
+/* State transitions for LLC_CONN_EV_ACK_TMR_EXP event */
+static llc_conn_ev_qfyr_t llc_reject_ev_qfyrs_20a[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_0,
+	[1] = llc_conn_ev_qlfy_retry_cnt_lt_n2,
+	[2] = NULL,
+};
+
+static llc_conn_action_t llc_reject_actions_20a[] = {
+	[0] = llc_conn_ac_send_rr_cmd_p_set_1,
+	[1] = llc_conn_ac_start_p_timer,
+	[2] = llc_conn_ac_start_rej_timer,
+	[3] = llc_conn_ac_inc_retry_cnt_by_1,
+	[4] = llc_conn_ac_rst_vs,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_reject_state_trans_20a = {
+	.ev	       = llc_conn_ev_ack_tmr_exp,
+	.next_state    = LLC_CONN_STATE_AWAIT_REJ,
+	.ev_qualifiers = llc_reject_ev_qfyrs_20a,
+	.ev_actions    = llc_reject_actions_20a,
+};
+
+/* State transitions for LLC_CONN_EV_BUSY_TMR_EXP event */
+static llc_conn_ev_qfyr_t llc_reject_ev_qfyrs_20b[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_0,
+	[1] = llc_conn_ev_qlfy_retry_cnt_lt_n2,
+	[2] = NULL,
+};
+
+static llc_conn_action_t llc_reject_actions_20b[] = {
+	[0] = llc_conn_ac_send_rr_cmd_p_set_1,
+	[1] = llc_conn_ac_start_p_timer,
+	[2] = llc_conn_ac_start_rej_timer,
+	[3] = llc_conn_ac_inc_retry_cnt_by_1,
+	[4] = llc_conn_ac_rst_vs,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_reject_state_trans_20b = {
+	.ev	       = llc_conn_ev_busy_tmr_exp,
+	.next_state    = LLC_CONN_STATE_AWAIT_REJ,
+	.ev_qualifiers = llc_reject_ev_qfyrs_20b,
+	.ev_actions    = llc_reject_actions_20b,
+};
+
+/*
+ * Array of pointers;
+ * one to each transition
+ */
+static struct llc_conn_state_trans *llc_reject_state_transitions[] = {
+	 [0] = &llc_common_state_trans_1,	/* Request */
+	 [1] = &llc_common_state_trans_2,
+	 [2] = &llc_common_state_trans_end,
+	 [3] = &llc_reject_state_trans_1,
+	 [4] = &llc_reject_state_trans_2,
+	 [5] = &llc_reject_state_trans_2_1,
+	 [6] = &llc_reject_state_trans_3,	/* Local busy */
+	 [7] = &llc_reject_state_trans_4,
+	 [8] = &llc_common_state_trans_end,
+	 [9] = &llc_reject_state_trans_17,	/* Initiate PF cycle */
+	[10] = &llc_common_state_trans_end,
+	[11] = &llc_common_state_trans_11a,	/* Timer */
+	[12] = &llc_common_state_trans_11b,
+	[13] = &llc_common_state_trans_11c,
+	[14] = &llc_common_state_trans_11d,
+	[15] = &llc_reject_state_trans_18,
+	[16] = &llc_reject_state_trans_19,
+	[17] = &llc_reject_state_trans_20a,
+	[18] = &llc_reject_state_trans_20b,
+	[19] = &llc_common_state_trans_end,
+	[20] = &llc_common_state_trans_3,	/* Receive frame */
+	[21] = &llc_common_state_trans_4,
+	[22] = &llc_common_state_trans_5,
+	[23] = &llc_common_state_trans_6,
+	[24] = &llc_common_state_trans_7a,
+	[25] = &llc_common_state_trans_7b,
+	[26] = &llc_common_state_trans_8a,
+	[27] = &llc_common_state_trans_8b,
+	[28] = &llc_common_state_trans_8c,
+	[29] = &llc_common_state_trans_9,
+	/* [30] = &llc_common_state_trans_10, */
+	[30] = &llc_reject_state_trans_5a,
+	[31] = &llc_reject_state_trans_5b,
+	[32] = &llc_reject_state_trans_5c,
+	[33] = &llc_reject_state_trans_6,
+	[34] = &llc_reject_state_trans_7a,
+	[35] = &llc_reject_state_trans_7b,
+	[36] = &llc_reject_state_trans_8a,
+	[37] = &llc_reject_state_trans_8b,
+	[38] = &llc_reject_state_trans_9,
+	[39] = &llc_reject_state_trans_10a,
+	[40] = &llc_reject_state_trans_10b,
+	[41] = &llc_reject_state_trans_10c,
+	[42] = &llc_reject_state_trans_11,
+	[43] = &llc_reject_state_trans_12a,
+	[44] = &llc_reject_state_trans_12b,
+	[45] = &llc_reject_state_trans_12c,
+	[46] = &llc_reject_state_trans_13,
+	[47] = &llc_reject_state_trans_14a,
+	[48] = &llc_reject_state_trans_14b,
+	[49] = &llc_reject_state_trans_15a,
+	[50] = &llc_reject_state_trans_15b,
+	[51] = &llc_reject_state_trans_16,
+	[52] = &llc_common_state_trans_end,
+};
+
+/* LLC_CONN_STATE_AWAIT transitions */
+/* State transitions for LLC_CONN_EV_DATA_REQ event */
+static llc_conn_ev_qfyr_t llc_await_ev_qfyrs_1_0[] = {
+	[0] = llc_conn_ev_qlfy_set_status_refuse,
+	[1] = NULL,
+};
+
+/* just one member, NULL, .bss zeroes it */
+static llc_conn_action_t llc_await_actions_1_0[1];
+
+static struct llc_conn_state_trans llc_await_state_trans_1_0 = {
+	.ev	       = llc_conn_ev_data_req,
+	.next_state    = LLC_CONN_STATE_AWAIT,
+	.ev_qualifiers = llc_await_ev_qfyrs_1_0,
+	.ev_actions    = llc_await_actions_1_0,
+};
+
+/* State transitions for LLC_CONN_EV_LOCAL_BUSY_DETECTED event */
+static llc_conn_action_t llc_await_actions_1[] = {
+	[0] = llc_conn_ac_send_rnr_xxx_x_set_0,
+	[1] = llc_conn_ac_set_data_flag_0,
+	[2] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_state_trans_1 = {
+	.ev	       = llc_conn_ev_local_busy_detected,
+	.next_state    = LLC_CONN_STATE_AWAIT_BUSY,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_actions_1,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_RSP_Fbit_SET_1_UNEXPD_Ns event */
+static llc_conn_action_t llc_await_actions_2[] = {
+	[0] = llc_conn_ac_send_rej_xxx_x_set_0,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_upd_vs,
+	[3] = llc_conn_ac_stop_p_timer,
+	[4] = llc_conn_ac_resend_i_xxx_x_set_0,
+	[5] = llc_conn_ac_start_rej_timer,
+	[6] = llc_conn_ac_clear_remote_busy,
+	[7] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_state_trans_2 = {
+	.ev	       = llc_conn_ev_rx_i_rsp_fbit_set_1_unexpd_ns,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_actions_2,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_CMD_Pbit_SET_0_UNEXPD_Ns event */
+static llc_conn_action_t llc_await_actions_3a[] = {
+	[0] = llc_conn_ac_send_rej_xxx_x_set_0,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_upd_vs,
+	[3] = llc_conn_ac_start_rej_timer,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_state_trans_3a = {
+	.ev	       = llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns,
+	.next_state    = LLC_CONN_STATE_AWAIT_REJ,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_actions_3a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_RSP_Fbit_SET_0_UNEXPD_Ns event */
+static llc_conn_action_t llc_await_actions_3b[] = {
+	[0] = llc_conn_ac_send_rej_xxx_x_set_0,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_upd_vs,
+	[3] = llc_conn_ac_start_rej_timer,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_state_trans_3b = {
+	.ev	       = llc_conn_ev_rx_i_rsp_fbit_set_0_unexpd_ns,
+	.next_state    = LLC_CONN_STATE_AWAIT_REJ,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_actions_3b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_CMD_Pbit_SET_1_UNEXPD_Ns event */
+static llc_conn_action_t llc_await_actions_4[] = {
+	[0] = llc_conn_ac_send_rej_rsp_f_set_1,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_upd_vs,
+	[3] = llc_conn_ac_start_rej_timer,
+	[4] = llc_conn_ac_start_p_timer,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_state_trans_4 = {
+	.ev	       = llc_conn_ev_rx_i_cmd_pbit_set_1_unexpd_ns,
+	.next_state    = LLC_CONN_STATE_AWAIT_REJ,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_actions_4,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_RSP_Fbit_SET_1 event */
+static llc_conn_action_t llc_await_actions_5[] = {
+	[0] = llc_conn_ac_inc_vr_by_1,
+	[1] = llc_conn_ac_data_ind,
+	[2] = llc_conn_ac_stop_p_timer,
+	[3] = llc_conn_ac_upd_nr_received,
+	[4] = llc_conn_ac_upd_vs,
+	[5] = llc_conn_ac_resend_i_xxx_x_set_0_or_send_rr,
+	[6] = llc_conn_ac_clear_remote_busy,
+	[7] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_state_trans_5 = {
+	.ev	       = llc_conn_ev_rx_i_rsp_fbit_set_1,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_actions_5,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_RSP_Fbit_SET_0 event */
+static llc_conn_action_t llc_await_actions_6a[] = {
+	[0] = llc_conn_ac_inc_vr_by_1,
+	[1] = llc_conn_ac_data_ind,
+	[2] = llc_conn_ac_send_rr_xxx_x_set_0,
+	[3] = llc_conn_ac_upd_nr_received,
+	[4] = llc_conn_ac_upd_vs,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_state_trans_6a = {
+	.ev	       = llc_conn_ev_rx_i_rsp_fbit_set_0,
+	.next_state    = LLC_CONN_STATE_AWAIT,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_actions_6a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_CMD_Pbit_SET_0 event */
+static llc_conn_action_t llc_await_actions_6b[] = {
+	[0] = llc_conn_ac_inc_vr_by_1,
+	[1] = llc_conn_ac_data_ind,
+	[2] = llc_conn_ac_send_rr_xxx_x_set_0,
+	[3] = llc_conn_ac_upd_nr_received,
+	[4] = llc_conn_ac_upd_vs,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_state_trans_6b = {
+	.ev	       = llc_conn_ev_rx_i_cmd_pbit_set_0,
+	.next_state    = LLC_CONN_STATE_AWAIT,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_actions_6b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_CMD_Pbit_SET_1 event */
+static llc_conn_action_t llc_await_actions_7[] = {
+	[0] = llc_conn_ac_inc_vr_by_1,
+	[1] = llc_conn_ac_data_ind,
+	[2] = llc_conn_ac_send_rr_rsp_f_set_1,
+	[3] = llc_conn_ac_upd_nr_received,
+	[4] = llc_conn_ac_upd_vs,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_state_trans_7 = {
+	.ev	       = llc_conn_ev_rx_i_cmd_pbit_set_1,
+	.next_state    = LLC_CONN_STATE_AWAIT,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_actions_7,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RR_RSP_Fbit_SET_1 event */
+static llc_conn_action_t llc_await_actions_8a[] = {
+	[0] = llc_conn_ac_upd_nr_received,
+	[1] = llc_conn_ac_upd_vs,
+	[2] = llc_conn_ac_stop_p_timer,
+	[3] = llc_conn_ac_resend_i_xxx_x_set_0,
+	[4] = llc_conn_ac_clear_remote_busy,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_state_trans_8a = {
+	.ev	       = llc_conn_ev_rx_rr_rsp_fbit_set_1,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_actions_8a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_REJ_RSP_Fbit_SET_1 event */
+static llc_conn_action_t llc_await_actions_8b[] = {
+	[0] = llc_conn_ac_upd_nr_received,
+	[1] = llc_conn_ac_upd_vs,
+	[2] = llc_conn_ac_stop_p_timer,
+	[3] = llc_conn_ac_resend_i_xxx_x_set_0,
+	[4] = llc_conn_ac_clear_remote_busy,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_state_trans_8b = {
+	.ev	       = llc_conn_ev_rx_rej_rsp_fbit_set_1,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_actions_8b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RR_CMD_Pbit_SET_0 event */
+static llc_conn_action_t llc_await_actions_9a[] = {
+	[0] = llc_conn_ac_upd_nr_received,
+	[1] = llc_conn_ac_upd_vs,
+	[2] = llc_conn_ac_clear_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_state_trans_9a = {
+	.ev	       = llc_conn_ev_rx_rr_cmd_pbit_set_0,
+	.next_state    = LLC_CONN_STATE_AWAIT,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_actions_9a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RR_RSP_Fbit_SET_0 event */
+static llc_conn_action_t llc_await_actions_9b[] = {
+	[0] = llc_conn_ac_upd_nr_received,
+	[1] = llc_conn_ac_upd_vs,
+	[2] = llc_conn_ac_clear_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_state_trans_9b = {
+	.ev	       = llc_conn_ev_rx_rr_rsp_fbit_set_0,
+	.next_state    = LLC_CONN_STATE_AWAIT,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_actions_9b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_REJ_CMD_Pbit_SET_0 event */
+static llc_conn_action_t llc_await_actions_9c[] = {
+	[0] = llc_conn_ac_upd_nr_received,
+	[1] = llc_conn_ac_upd_vs,
+	[2] = llc_conn_ac_clear_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_state_trans_9c = {
+	.ev	       = llc_conn_ev_rx_rej_cmd_pbit_set_0,
+	.next_state    = LLC_CONN_STATE_AWAIT,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_actions_9c,
+};
+
+/* State transitions for LLC_CONN_EV_RX_REJ_RSP_Fbit_SET_0 event */
+static llc_conn_action_t llc_await_actions_9d[] = {
+	[0] = llc_conn_ac_upd_nr_received,
+	[1] = llc_conn_ac_upd_vs,
+	[2] = llc_conn_ac_clear_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_state_trans_9d = {
+	.ev	       = llc_conn_ev_rx_rej_rsp_fbit_set_0,
+	.next_state    = LLC_CONN_STATE_AWAIT,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_actions_9d,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RR_CMD_Pbit_SET_1 event */
+static llc_conn_action_t llc_await_actions_10a[] = {
+	[0] = llc_conn_ac_send_rr_rsp_f_set_1,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_upd_vs,
+	[3] = llc_conn_ac_clear_remote_busy,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_state_trans_10a = {
+	.ev	       = llc_conn_ev_rx_rr_cmd_pbit_set_1,
+	.next_state    = LLC_CONN_STATE_AWAIT,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_actions_10a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_REJ_CMD_Pbit_SET_1 event */
+static llc_conn_action_t llc_await_actions_10b[] = {
+	[0] = llc_conn_ac_send_rr_rsp_f_set_1,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_upd_vs,
+	[3] = llc_conn_ac_clear_remote_busy,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_state_trans_10b = {
+	.ev	       = llc_conn_ev_rx_rej_cmd_pbit_set_1,
+	.next_state    = LLC_CONN_STATE_AWAIT,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_actions_10b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RNR_RSP_Fbit_SET_1 event */
+static llc_conn_action_t llc_await_actions_11[] = {
+	[0] = llc_conn_ac_upd_nr_received,
+	[1] = llc_conn_ac_upd_vs,
+	[2] = llc_conn_ac_stop_p_timer,
+	[3] = llc_conn_ac_set_remote_busy,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_state_trans_11 = {
+	.ev	       = llc_conn_ev_rx_rnr_rsp_fbit_set_1,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_actions_11,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RNR_CMD_Pbit_SET_0 event */
+static llc_conn_action_t llc_await_actions_12a[] = {
+	[0] = llc_conn_ac_upd_nr_received,
+	[1] = llc_conn_ac_upd_vs,
+	[2] = llc_conn_ac_set_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_state_trans_12a = {
+	.ev	       = llc_conn_ev_rx_rnr_cmd_pbit_set_0,
+	.next_state    = LLC_CONN_STATE_AWAIT,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_actions_12a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RNR_RSP_Fbit_SET_0 event */
+static llc_conn_action_t llc_await_actions_12b[] = {
+	[0] = llc_conn_ac_upd_nr_received,
+	[1] = llc_conn_ac_upd_vs,
+	[2] = llc_conn_ac_set_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_state_trans_12b = {
+	.ev	       = llc_conn_ev_rx_rnr_rsp_fbit_set_0,
+	.next_state    = LLC_CONN_STATE_AWAIT,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_actions_12b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RNR_CMD_Pbit_SET_1 event */
+static llc_conn_action_t llc_await_actions_13[] = {
+	[0] = llc_conn_ac_send_rr_rsp_f_set_1,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_upd_vs,
+	[3] = llc_conn_ac_set_remote_busy,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_state_trans_13 = {
+	.ev	       = llc_conn_ev_rx_rnr_cmd_pbit_set_1,
+	.next_state    = LLC_CONN_STATE_AWAIT,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_actions_13,
+};
+
+/* State transitions for LLC_CONN_EV_P_TMR_EXP event */
+static llc_conn_ev_qfyr_t llc_await_ev_qfyrs_14[] = {
+	[0] = llc_conn_ev_qlfy_retry_cnt_lt_n2,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_await_actions_14[] = {
+	[0] = llc_conn_ac_send_rr_cmd_p_set_1,
+	[1] = llc_conn_ac_start_p_timer,
+	[2] = llc_conn_ac_inc_retry_cnt_by_1,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_state_trans_14 = {
+	.ev	       = llc_conn_ev_p_tmr_exp,
+	.next_state    = LLC_CONN_STATE_AWAIT,
+	.ev_qualifiers = llc_await_ev_qfyrs_14,
+	.ev_actions    = llc_await_actions_14,
+};
+
+/*
+ * Array of pointers;
+ * one to each transition
+ */
+static struct llc_conn_state_trans *llc_await_state_transitions[] = {
+	 [0] = &llc_common_state_trans_1,	/* Request */
+	 [1] = &llc_common_state_trans_2,
+	 [2] = &llc_await_state_trans_1_0,
+	 [3] = &llc_common_state_trans_end,
+	 [4] = &llc_await_state_trans_1,	/* Local busy */
+	 [5] = &llc_common_state_trans_end,
+	 [6] = &llc_common_state_trans_end,	/* Initiate PF Cycle */
+	 [7] = &llc_common_state_trans_11a,	/* Timer */
+	 [8] = &llc_common_state_trans_11b,
+	 [9] = &llc_common_state_trans_11c,
+	[10] = &llc_common_state_trans_11d,
+	[11] = &llc_await_state_trans_14,
+	[12] = &llc_common_state_trans_end,
+	[13] = &llc_common_state_trans_3,	/* Receive frame */
+	[14] = &llc_common_state_trans_4,
+	[15] = &llc_common_state_trans_5,
+	[16] = &llc_common_state_trans_6,
+	[17] = &llc_common_state_trans_7a,
+	[18] = &llc_common_state_trans_7b,
+	[19] = &llc_common_state_trans_8a,
+	[20] = &llc_common_state_trans_8b,
+	[21] = &llc_common_state_trans_8c,
+	[22] = &llc_common_state_trans_9,
+	/* [23] = &llc_common_state_trans_10, */
+	[23] = &llc_await_state_trans_2,
+	[24] = &llc_await_state_trans_3a,
+	[25] = &llc_await_state_trans_3b,
+	[26] = &llc_await_state_trans_4,
+	[27] = &llc_await_state_trans_5,
+	[28] = &llc_await_state_trans_6a,
+	[29] = &llc_await_state_trans_6b,
+	[30] = &llc_await_state_trans_7,
+	[31] = &llc_await_state_trans_8a,
+	[32] = &llc_await_state_trans_8b,
+	[33] = &llc_await_state_trans_9a,
+	[34] = &llc_await_state_trans_9b,
+	[35] = &llc_await_state_trans_9c,
+	[36] = &llc_await_state_trans_9d,
+	[37] = &llc_await_state_trans_10a,
+	[38] = &llc_await_state_trans_10b,
+	[39] = &llc_await_state_trans_11,
+	[40] = &llc_await_state_trans_12a,
+	[41] = &llc_await_state_trans_12b,
+	[42] = &llc_await_state_trans_13,
+	[43] = &llc_common_state_trans_end,
+};
+
+/* LLC_CONN_STATE_AWAIT_BUSY transitions */
+/* State transitions for LLC_CONN_EV_DATA_CONN_REQ event */
+static llc_conn_ev_qfyr_t llc_await_busy_ev_qfyrs_1_0[] = {
+	[0] = llc_conn_ev_qlfy_set_status_refuse,
+	[1] = NULL,
+};
+
+/* just one member, NULL, .bss zeroes it */
+static llc_conn_action_t llc_await_busy_actions_1_0[1];
+
+static struct llc_conn_state_trans llc_await_busy_state_trans_1_0 = {
+	.ev	       = llc_conn_ev_data_req,
+	.next_state    = LLC_CONN_STATE_AWAIT_BUSY,
+	.ev_qualifiers = llc_await_busy_ev_qfyrs_1_0,
+	.ev_actions    = llc_await_busy_actions_1_0,
+};
+
+/* State transitions for LLC_CONN_EV_LOCAL_BUSY_CLEARED event */
+static llc_conn_ev_qfyr_t llc_await_busy_ev_qfyrs_1[] = {
+	[0] = llc_conn_ev_qlfy_data_flag_eq_1,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_await_busy_actions_1[] = {
+	[0] = llc_conn_ac_send_rej_xxx_x_set_0,
+	[1] = llc_conn_ac_start_rej_timer,
+	[2] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_busy_state_trans_1 = {
+	.ev	       = llc_conn_ev_local_busy_cleared,
+	.next_state    = LLC_CONN_STATE_AWAIT_REJ,
+	.ev_qualifiers = llc_await_busy_ev_qfyrs_1,
+	.ev_actions    = llc_await_busy_actions_1,
+};
+
+/* State transitions for LLC_CONN_EV_LOCAL_BUSY_CLEARED event */
+static llc_conn_ev_qfyr_t llc_await_busy_ev_qfyrs_2[] = {
+	[0] = llc_conn_ev_qlfy_data_flag_eq_0,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_await_busy_actions_2[] = {
+	[0] = llc_conn_ac_send_rr_xxx_x_set_0,
+	[1] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_busy_state_trans_2 = {
+	.ev	       = llc_conn_ev_local_busy_cleared,
+	.next_state    = LLC_CONN_STATE_AWAIT,
+	.ev_qualifiers = llc_await_busy_ev_qfyrs_2,
+	.ev_actions    = llc_await_busy_actions_2,
+};
+
+/* State transitions for LLC_CONN_EV_LOCAL_BUSY_CLEARED event */
+static llc_conn_ev_qfyr_t llc_await_busy_ev_qfyrs_3[] = {
+	[0] = llc_conn_ev_qlfy_data_flag_eq_2,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_await_busy_actions_3[] = {
+	[0] = llc_conn_ac_send_rr_xxx_x_set_0,
+	[1] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_busy_state_trans_3 = {
+	.ev	       = llc_conn_ev_local_busy_cleared,
+	.next_state    = LLC_CONN_STATE_AWAIT_REJ,
+	.ev_qualifiers = llc_await_busy_ev_qfyrs_3,
+	.ev_actions    = llc_await_busy_actions_3,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_RSP_Fbit_SET_1_UNEXPD_Ns event */
+static llc_conn_action_t llc_await_busy_actions_4[] = {
+	[0] = llc_conn_ac_opt_send_rnr_xxx_x_set_0,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_upd_vs,
+	[3] = llc_conn_ac_stop_p_timer,
+	[4] = llc_conn_ac_set_data_flag_1,
+	[5] = llc_conn_ac_clear_remote_busy,
+	[6] = llc_conn_ac_resend_i_xxx_x_set_0,
+	[7] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_busy_state_trans_4 = {
+	.ev	       = llc_conn_ev_rx_i_rsp_fbit_set_1_unexpd_ns,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_busy_actions_4,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_CMD_Pbit_SET_0_UNEXPD_Ns event */
+static llc_conn_action_t llc_await_busy_actions_5a[] = {
+	[0] = llc_conn_ac_opt_send_rnr_xxx_x_set_0,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_upd_vs,
+	[3] = llc_conn_ac_set_data_flag_1,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_busy_state_trans_5a = {
+	.ev	       = llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns,
+	.next_state    = LLC_CONN_STATE_AWAIT_BUSY,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_busy_actions_5a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_RSP_Fbit_SET_0_UNEXPD_Ns event */
+static llc_conn_action_t llc_await_busy_actions_5b[] = {
+	[0] = llc_conn_ac_opt_send_rnr_xxx_x_set_0,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_upd_vs,
+	[3] = llc_conn_ac_set_data_flag_1,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_busy_state_trans_5b = {
+	.ev	       = llc_conn_ev_rx_i_rsp_fbit_set_0_unexpd_ns,
+	.next_state    = LLC_CONN_STATE_AWAIT_BUSY,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_busy_actions_5b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_CMD_Pbit_SET_1_UNEXPD_Ns event */
+static llc_conn_action_t llc_await_busy_actions_6[] = {
+	[0] = llc_conn_ac_send_rnr_rsp_f_set_1,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_upd_vs,
+	[3] = llc_conn_ac_set_data_flag_1,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_busy_state_trans_6 = {
+	.ev	       = llc_conn_ev_rx_i_cmd_pbit_set_1_unexpd_ns,
+	.next_state    = LLC_CONN_STATE_AWAIT_BUSY,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_busy_actions_6,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_RSP_Fbit_SET_1 event */
+static llc_conn_action_t llc_await_busy_actions_7[] = {
+	[0] = llc_conn_ac_opt_send_rnr_xxx_x_set_0,
+	[1] = llc_conn_ac_inc_vr_by_1,
+	[2] = llc_conn_ac_data_ind,
+	[3] = llc_conn_ac_stop_p_timer,
+	[4] = llc_conn_ac_upd_nr_received,
+	[5] = llc_conn_ac_upd_vs,
+	[6] = llc_conn_ac_set_data_flag_0,
+	[7] = llc_conn_ac_clear_remote_busy,
+	[8] = llc_conn_ac_resend_i_xxx_x_set_0,
+	[9] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_busy_state_trans_7 = {
+	.ev	       = llc_conn_ev_rx_i_rsp_fbit_set_1,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_busy_actions_7,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_RSP_Fbit_SET_0 event */
+static llc_conn_action_t llc_await_busy_actions_8a[] = {
+	[0] = llc_conn_ac_opt_send_rnr_xxx_x_set_0,
+	[1] = llc_conn_ac_inc_vr_by_1,
+	[2] = llc_conn_ac_data_ind,
+	[3] = llc_conn_ac_upd_nr_received,
+	[4] = llc_conn_ac_upd_vs,
+	[5] = llc_conn_ac_set_data_flag_0,
+	[6] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_busy_state_trans_8a = {
+	.ev	       = llc_conn_ev_rx_i_rsp_fbit_set_0,
+	.next_state    = LLC_CONN_STATE_AWAIT_BUSY,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_busy_actions_8a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_CMD_Pbit_SET_0 event */
+static llc_conn_action_t llc_await_busy_actions_8b[] = {
+	[0] = llc_conn_ac_opt_send_rnr_xxx_x_set_0,
+	[1] = llc_conn_ac_inc_vr_by_1,
+	[2] = llc_conn_ac_data_ind,
+	[3] = llc_conn_ac_upd_nr_received,
+	[4] = llc_conn_ac_upd_vs,
+	[5] = llc_conn_ac_set_data_flag_0,
+	[6] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_busy_state_trans_8b = {
+	.ev	       = llc_conn_ev_rx_i_cmd_pbit_set_0,
+	.next_state    = LLC_CONN_STATE_AWAIT_BUSY,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_busy_actions_8b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_CMD_Pbit_SET_1 event */
+static llc_conn_action_t llc_await_busy_actions_9[] = {
+	[0] = llc_conn_ac_send_rnr_rsp_f_set_1,
+	[1] = llc_conn_ac_inc_vr_by_1,
+	[2] = llc_conn_ac_data_ind,
+	[3] = llc_conn_ac_upd_nr_received,
+	[4] = llc_conn_ac_upd_vs,
+	[5] = llc_conn_ac_set_data_flag_0,
+	[6] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_busy_state_trans_9 = {
+	.ev	       = llc_conn_ev_rx_i_cmd_pbit_set_1,
+	.next_state    = LLC_CONN_STATE_AWAIT_BUSY,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_busy_actions_9,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RR_RSP_Fbit_SET_1 event */
+static llc_conn_action_t llc_await_busy_actions_10a[] = {
+	[0] = llc_conn_ac_upd_nr_received,
+	[1] = llc_conn_ac_upd_vs,
+	[2] = llc_conn_ac_stop_p_timer,
+	[3] = llc_conn_ac_resend_i_xxx_x_set_0,
+	[4] = llc_conn_ac_clear_remote_busy,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_busy_state_trans_10a = {
+	.ev	       = llc_conn_ev_rx_rr_rsp_fbit_set_1,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_busy_actions_10a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_REJ_RSP_Fbit_SET_1 event */
+static llc_conn_action_t llc_await_busy_actions_10b[] = {
+	[0] = llc_conn_ac_upd_nr_received,
+	[1] = llc_conn_ac_upd_vs,
+	[2] = llc_conn_ac_stop_p_timer,
+	[3] = llc_conn_ac_resend_i_xxx_x_set_0,
+	[4] = llc_conn_ac_clear_remote_busy,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_busy_state_trans_10b = {
+	.ev	       = llc_conn_ev_rx_rej_rsp_fbit_set_1,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_busy_actions_10b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RR_CMD_Pbit_SET_0 event */
+static llc_conn_action_t llc_await_busy_actions_11a[] = {
+	[0] = llc_conn_ac_upd_nr_received,
+	[1] = llc_conn_ac_upd_vs,
+	[2] = llc_conn_ac_clear_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_busy_state_trans_11a = {
+	.ev	       = llc_conn_ev_rx_rr_cmd_pbit_set_0,
+	.next_state    = LLC_CONN_STATE_AWAIT_BUSY,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_busy_actions_11a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RR_RSP_Fbit_SET_0 event */
+static llc_conn_action_t llc_await_busy_actions_11b[] = {
+	[0] = llc_conn_ac_upd_nr_received,
+	[1] = llc_conn_ac_upd_vs,
+	[2] = llc_conn_ac_clear_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_busy_state_trans_11b = {
+	.ev	       = llc_conn_ev_rx_rr_rsp_fbit_set_0,
+	.next_state    = LLC_CONN_STATE_AWAIT_BUSY,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_busy_actions_11b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_REJ_CMD_Pbit_SET_0 event */
+static llc_conn_action_t llc_await_busy_actions_11c[] = {
+	[0] = llc_conn_ac_upd_nr_received,
+	[1] = llc_conn_ac_upd_vs,
+	[2] = llc_conn_ac_clear_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_busy_state_trans_11c = {
+	.ev	       = llc_conn_ev_rx_rej_cmd_pbit_set_0,
+	.next_state    = LLC_CONN_STATE_AWAIT_BUSY,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_busy_actions_11c,
+};
+
+/* State transitions for LLC_CONN_EV_RX_REJ_RSP_Fbit_SET_0 event */
+static llc_conn_action_t llc_await_busy_actions_11d[] = {
+	[0] = llc_conn_ac_upd_nr_received,
+	[1] = llc_conn_ac_upd_vs,
+	[2] = llc_conn_ac_clear_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_busy_state_trans_11d = {
+	.ev	       = llc_conn_ev_rx_rej_rsp_fbit_set_0,
+	.next_state    = LLC_CONN_STATE_AWAIT_BUSY,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_busy_actions_11d,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RR_CMD_Pbit_SET_1 event */
+static llc_conn_action_t llc_await_busy_actions_12a[] = {
+	[0] = llc_conn_ac_send_rnr_rsp_f_set_1,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_upd_vs,
+	[3] = llc_conn_ac_clear_remote_busy,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_busy_state_trans_12a = {
+	.ev	       = llc_conn_ev_rx_rr_cmd_pbit_set_1,
+	.next_state    = LLC_CONN_STATE_AWAIT_BUSY,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_busy_actions_12a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_REJ_CMD_Pbit_SET_1 event */
+static llc_conn_action_t llc_await_busy_actions_12b[] = {
+	[0] = llc_conn_ac_send_rnr_rsp_f_set_1,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_upd_vs,
+	[3] = llc_conn_ac_clear_remote_busy,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_busy_state_trans_12b = {
+	.ev	       = llc_conn_ev_rx_rej_cmd_pbit_set_1,
+	.next_state    = LLC_CONN_STATE_AWAIT_BUSY,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_busy_actions_12b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RNR_RSP_Fbit_SET_1 event */
+static llc_conn_action_t llc_await_busy_actions_13[] = {
+	[0] = llc_conn_ac_upd_nr_received,
+	[1] = llc_conn_ac_upd_vs,
+	[2] = llc_conn_ac_stop_p_timer,
+	[3] = llc_conn_ac_set_remote_busy,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_busy_state_trans_13 = {
+	.ev	       = llc_conn_ev_rx_rnr_rsp_fbit_set_1,
+	.next_state    = LLC_CONN_STATE_BUSY,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_busy_actions_13,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RNR_CMD_Pbit_SET_0 event */
+static llc_conn_action_t llc_await_busy_actions_14a[] = {
+	[0] = llc_conn_ac_upd_nr_received,
+	[1] = llc_conn_ac_upd_vs,
+	[2] = llc_conn_ac_set_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_busy_state_trans_14a = {
+	.ev	       = llc_conn_ev_rx_rnr_cmd_pbit_set_0,
+	.next_state    = LLC_CONN_STATE_AWAIT_BUSY,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_busy_actions_14a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RNR_RSP_Fbit_SET_0 event */
+static llc_conn_action_t llc_await_busy_actions_14b[] = {
+	[0] = llc_conn_ac_upd_nr_received,
+	[1] = llc_conn_ac_upd_vs,
+	[2] = llc_conn_ac_set_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_busy_state_trans_14b = {
+	.ev	       = llc_conn_ev_rx_rnr_rsp_fbit_set_0,
+	.next_state    = LLC_CONN_STATE_AWAIT_BUSY,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_busy_actions_14b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RNR_CMD_Pbit_SET_1 event */
+static llc_conn_action_t llc_await_busy_actions_15[] = {
+	[0] = llc_conn_ac_send_rnr_rsp_f_set_1,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_upd_vs,
+	[3] = llc_conn_ac_set_remote_busy,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_busy_state_trans_15 = {
+	.ev	       = llc_conn_ev_rx_rnr_cmd_pbit_set_1,
+	.next_state    = LLC_CONN_STATE_AWAIT_BUSY,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_busy_actions_15,
+};
+
+/* State transitions for LLC_CONN_EV_P_TMR_EXP event */
+static llc_conn_ev_qfyr_t llc_await_busy_ev_qfyrs_16[] = {
+	[0] = llc_conn_ev_qlfy_retry_cnt_lt_n2,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_await_busy_actions_16[] = {
+	[0] = llc_conn_ac_send_rnr_cmd_p_set_1,
+	[1] = llc_conn_ac_start_p_timer,
+	[2] = llc_conn_ac_inc_retry_cnt_by_1,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_busy_state_trans_16 = {
+	.ev	       = llc_conn_ev_p_tmr_exp,
+	.next_state    = LLC_CONN_STATE_AWAIT_BUSY,
+	.ev_qualifiers = llc_await_busy_ev_qfyrs_16,
+	.ev_actions    = llc_await_busy_actions_16,
+};
+
+/*
+ * Array of pointers;
+ * one to each transition
+ */
+static struct llc_conn_state_trans *llc_await_busy_state_transitions[] = {
+	 [0] = &llc_common_state_trans_1,		/* Request */
+	 [1] = &llc_common_state_trans_2,
+	 [2] = &llc_await_busy_state_trans_1_0,
+	 [3] = &llc_common_state_trans_end,
+	 [4] = &llc_await_busy_state_trans_1,		/* Local busy */
+	 [5] = &llc_await_busy_state_trans_2,
+	 [6] = &llc_await_busy_state_trans_3,
+	 [7] = &llc_common_state_trans_end,
+	 [8] = &llc_common_state_trans_end,		/* Initiate PF cycle */
+	 [9] = &llc_common_state_trans_11a,		/* Timer */
+	[10] = &llc_common_state_trans_11b,
+	[11] = &llc_common_state_trans_11c,
+	[12] = &llc_common_state_trans_11d,
+	[13] = &llc_await_busy_state_trans_16,
+	[14] = &llc_common_state_trans_end,
+	[15] = &llc_await_busy_state_trans_4,		/* Receive frame */
+	[16] = &llc_await_busy_state_trans_5a,
+	[17] = &llc_await_busy_state_trans_5b,
+	[18] = &llc_await_busy_state_trans_6,
+	[19] = &llc_await_busy_state_trans_7,
+	[20] = &llc_await_busy_state_trans_8a,
+	[21] = &llc_await_busy_state_trans_8b,
+	[22] = &llc_await_busy_state_trans_9,
+	[23] = &llc_await_busy_state_trans_10a,
+	[24] = &llc_await_busy_state_trans_10b,
+	[25] = &llc_await_busy_state_trans_11a,
+	[26] = &llc_await_busy_state_trans_11b,
+	[27] = &llc_await_busy_state_trans_11c,
+	[28] = &llc_await_busy_state_trans_11d,
+	[29] = &llc_await_busy_state_trans_12a,
+	[30] = &llc_await_busy_state_trans_12b,
+	[31] = &llc_await_busy_state_trans_13,
+	[32] = &llc_await_busy_state_trans_14a,
+	[33] = &llc_await_busy_state_trans_14b,
+	[34] = &llc_await_busy_state_trans_15,
+	[35] = &llc_common_state_trans_3,
+	[36] = &llc_common_state_trans_4,
+	[37] = &llc_common_state_trans_5,
+	[38] = &llc_common_state_trans_6,
+	[39] = &llc_common_state_trans_7a,
+	[40] = &llc_common_state_trans_7b,
+	[41] = &llc_common_state_trans_8a,
+	[42] = &llc_common_state_trans_8b,
+	[43] = &llc_common_state_trans_8c,
+	[44] = &llc_common_state_trans_9,
+	/* [45] = &llc_common_state_trans_10, */
+	[45] = &llc_common_state_trans_end,
+};
+
+/* ----------------- LLC_CONN_STATE_AWAIT_REJ transitions --------------- */
+/* State transitions for LLC_CONN_EV_DATA_CONN_REQ event */
+static llc_conn_ev_qfyr_t llc_await_reject_ev_qfyrs_1_0[] = {
+	[0] = llc_conn_ev_qlfy_set_status_refuse,
+	[1] = NULL,
+};
+
+/* just one member, NULL, .bss zeroes it */
+static llc_conn_action_t llc_await_reject_actions_1_0[1];
+
+static struct llc_conn_state_trans llc_await_reject_state_trans_1_0 = {
+	.ev	       = llc_conn_ev_data_req,
+	.next_state    = LLC_CONN_STATE_AWAIT_REJ,
+	.ev_qualifiers = llc_await_reject_ev_qfyrs_1_0,
+	.ev_actions    = llc_await_reject_actions_1_0,
+};
+
+/* State transitions for LLC_CONN_EV_LOCAL_BUSY_DETECTED event */
+static llc_conn_action_t llc_await_rejct_actions_1[] = {
+	[0] = llc_conn_ac_send_rnr_xxx_x_set_0,
+	[1] = llc_conn_ac_set_data_flag_2,
+	[2] = NULL
+};
+
+static struct llc_conn_state_trans llc_await_rejct_state_trans_1 = {
+	.ev	       = llc_conn_ev_local_busy_detected,
+	.next_state    = LLC_CONN_STATE_AWAIT_BUSY,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_rejct_actions_1,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_CMD_Pbit_SET_0_UNEXPD_Ns event */
+static llc_conn_action_t llc_await_rejct_actions_2a[] = {
+	[0] = llc_conn_ac_upd_nr_received,
+	[1] = llc_conn_ac_upd_vs,
+	[2] = NULL
+};
+
+static struct llc_conn_state_trans llc_await_rejct_state_trans_2a = {
+	.ev	       = llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns,
+	.next_state    = LLC_CONN_STATE_AWAIT_REJ,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_rejct_actions_2a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_RSP_Fbit_SET_0_UNEXPD_Ns event */
+static llc_conn_action_t llc_await_rejct_actions_2b[] = {
+	[0] = llc_conn_ac_upd_nr_received,
+	[1] = llc_conn_ac_upd_vs,
+	[2] = NULL
+};
+
+static struct llc_conn_state_trans llc_await_rejct_state_trans_2b = {
+	.ev	       = llc_conn_ev_rx_i_rsp_fbit_set_0_unexpd_ns,
+	.next_state    = LLC_CONN_STATE_AWAIT_REJ,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_rejct_actions_2b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_CMD_Pbit_SET_1_UNEXPD_Ns event */
+static llc_conn_action_t llc_await_rejct_actions_3[] = {
+	[0] = llc_conn_ac_send_rr_rsp_f_set_1,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_upd_vs,
+	[3] = NULL
+};
+
+static struct llc_conn_state_trans llc_await_rejct_state_trans_3 = {
+	.ev	       = llc_conn_ev_rx_i_cmd_pbit_set_1_unexpd_ns,
+	.next_state    = LLC_CONN_STATE_AWAIT_REJ,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_rejct_actions_3,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_RSP_Fbit_SET_1 event */
+static llc_conn_action_t llc_await_rejct_actions_4[] = {
+	[0] = llc_conn_ac_inc_vr_by_1,
+	[1] = llc_conn_ac_data_ind,
+	[2] = llc_conn_ac_stop_p_timer,
+	[3] = llc_conn_ac_stop_rej_timer,
+	[4] = llc_conn_ac_upd_nr_received,
+	[5] = llc_conn_ac_upd_vs,
+	[6] = llc_conn_ac_resend_i_xxx_x_set_0_or_send_rr,
+	[7] = llc_conn_ac_clear_remote_busy,
+	[8] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_rejct_state_trans_4 = {
+	.ev	       = llc_conn_ev_rx_i_rsp_fbit_set_1,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_rejct_actions_4,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_RSP_Fbit_SET_0 event */
+static llc_conn_action_t llc_await_rejct_actions_5a[] = {
+	[0] = llc_conn_ac_inc_vr_by_1,
+	[1] = llc_conn_ac_data_ind,
+	[2] = llc_conn_ac_send_rr_xxx_x_set_0,
+	[3] = llc_conn_ac_stop_rej_timer,
+	[4] = llc_conn_ac_upd_nr_received,
+	[5] = llc_conn_ac_upd_vs,
+	[6] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_rejct_state_trans_5a = {
+	.ev	       = llc_conn_ev_rx_i_rsp_fbit_set_0,
+	.next_state    = LLC_CONN_STATE_AWAIT,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_rejct_actions_5a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_CMD_Pbit_SET_0 event */
+static llc_conn_action_t llc_await_rejct_actions_5b[] = {
+	[0] = llc_conn_ac_inc_vr_by_1,
+	[1] = llc_conn_ac_data_ind,
+	[2] = llc_conn_ac_send_rr_xxx_x_set_0,
+	[3] = llc_conn_ac_stop_rej_timer,
+	[4] = llc_conn_ac_upd_nr_received,
+	[5] = llc_conn_ac_upd_vs,
+	[6] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_rejct_state_trans_5b = {
+	.ev	       = llc_conn_ev_rx_i_cmd_pbit_set_0,
+	.next_state    = LLC_CONN_STATE_AWAIT,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_rejct_actions_5b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_CMD_Pbit_SET_1 event */
+static llc_conn_action_t llc_await_rejct_actions_6[] = {
+	[0] = llc_conn_ac_inc_vr_by_1,
+	[1] = llc_conn_ac_data_ind,
+	[2] = llc_conn_ac_send_rr_rsp_f_set_1,
+	[3] = llc_conn_ac_stop_rej_timer,
+	[4] = llc_conn_ac_upd_nr_received,
+	[5] = llc_conn_ac_upd_vs,
+	[6] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_rejct_state_trans_6 = {
+	.ev	       = llc_conn_ev_rx_i_cmd_pbit_set_1,
+	.next_state    = LLC_CONN_STATE_AWAIT,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_rejct_actions_6,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RR_RSP_Fbit_SET_1 event */
+static llc_conn_action_t llc_await_rejct_actions_7a[] = {
+	[0] = llc_conn_ac_upd_nr_received,
+	[1] = llc_conn_ac_upd_vs,
+	[2] = llc_conn_ac_stop_p_timer,
+	[3] = llc_conn_ac_resend_i_xxx_x_set_0,
+	[4] = llc_conn_ac_clear_remote_busy,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_rejct_state_trans_7a = {
+	.ev	       = llc_conn_ev_rx_rr_rsp_fbit_set_1,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_rejct_actions_7a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_REJ_RSP_Fbit_SET_1 event */
+static llc_conn_action_t llc_await_rejct_actions_7b[] = {
+	[0] = llc_conn_ac_upd_nr_received,
+	[1] = llc_conn_ac_upd_vs,
+	[2] = llc_conn_ac_stop_p_timer,
+	[3] = llc_conn_ac_resend_i_xxx_x_set_0,
+	[4] = llc_conn_ac_clear_remote_busy,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_rejct_state_trans_7b = {
+	.ev	       = llc_conn_ev_rx_rej_rsp_fbit_set_1,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_rejct_actions_7b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_I_RSP_Fbit_SET_1_UNEXPD_Ns event */
+static llc_conn_action_t llc_await_rejct_actions_7c[] = {
+	[0] = llc_conn_ac_upd_nr_received,
+	[1] = llc_conn_ac_upd_vs,
+	[2] = llc_conn_ac_stop_p_timer,
+	[3] = llc_conn_ac_resend_i_xxx_x_set_0,
+	[4] = llc_conn_ac_clear_remote_busy,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_rejct_state_trans_7c = {
+	.ev	       = llc_conn_ev_rx_i_rsp_fbit_set_1_unexpd_ns,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_rejct_actions_7c,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RR_CMD_Pbit_SET_0 event */
+static llc_conn_action_t llc_await_rejct_actions_8a[] = {
+	[0] = llc_conn_ac_upd_nr_received,
+	[1] = llc_conn_ac_upd_vs,
+	[2] = llc_conn_ac_clear_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_rejct_state_trans_8a = {
+	.ev	       = llc_conn_ev_rx_rr_cmd_pbit_set_0,
+	.next_state    = LLC_CONN_STATE_AWAIT_REJ,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_rejct_actions_8a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RR_RSP_Fbit_SET_0 event */
+static llc_conn_action_t llc_await_rejct_actions_8b[] = {
+	[0] = llc_conn_ac_upd_nr_received,
+	[1] = llc_conn_ac_upd_vs,
+	[2] = llc_conn_ac_clear_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_rejct_state_trans_8b = {
+	.ev	       = llc_conn_ev_rx_rr_rsp_fbit_set_0,
+	.next_state    = LLC_CONN_STATE_AWAIT_REJ,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_rejct_actions_8b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_REJ_CMD_Pbit_SET_0 event */
+static llc_conn_action_t llc_await_rejct_actions_8c[] = {
+	[0] = llc_conn_ac_upd_nr_received,
+	[1] = llc_conn_ac_upd_vs,
+	[2] = llc_conn_ac_clear_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_rejct_state_trans_8c = {
+	.ev	       = llc_conn_ev_rx_rej_cmd_pbit_set_0,
+	.next_state    = LLC_CONN_STATE_AWAIT_REJ,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_rejct_actions_8c,
+};
+
+/* State transitions for LLC_CONN_EV_RX_REJ_RSP_Fbit_SET_0 event */
+static llc_conn_action_t llc_await_rejct_actions_8d[] = {
+	[0] = llc_conn_ac_upd_nr_received,
+	[1] = llc_conn_ac_upd_vs,
+	[2] = llc_conn_ac_clear_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_rejct_state_trans_8d = {
+	.ev	       = llc_conn_ev_rx_rej_rsp_fbit_set_0,
+	.next_state    = LLC_CONN_STATE_AWAIT_REJ,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_rejct_actions_8d,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RR_CMD_Pbit_SET_1 event */
+static llc_conn_action_t llc_await_rejct_actions_9a[] = {
+	[0] = llc_conn_ac_send_rr_rsp_f_set_1,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_upd_vs,
+	[3] = llc_conn_ac_clear_remote_busy,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_rejct_state_trans_9a = {
+	.ev	       = llc_conn_ev_rx_rr_cmd_pbit_set_1,
+	.next_state    = LLC_CONN_STATE_AWAIT_REJ,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_rejct_actions_9a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_REJ_CMD_Pbit_SET_1 event */
+static llc_conn_action_t llc_await_rejct_actions_9b[] = {
+	[0] = llc_conn_ac_send_rr_rsp_f_set_1,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_upd_vs,
+	[3] = llc_conn_ac_clear_remote_busy,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_rejct_state_trans_9b = {
+	.ev	       = llc_conn_ev_rx_rej_cmd_pbit_set_1,
+	.next_state    = LLC_CONN_STATE_AWAIT_REJ,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_rejct_actions_9b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RNR_RSP_Fbit_SET_1 event */
+static llc_conn_action_t llc_await_rejct_actions_10[] = {
+	[0] = llc_conn_ac_upd_nr_received,
+	[1] = llc_conn_ac_upd_vs,
+	[2] = llc_conn_ac_stop_p_timer,
+	[3] = llc_conn_ac_set_remote_busy,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_rejct_state_trans_10 = {
+	.ev	       = llc_conn_ev_rx_rnr_rsp_fbit_set_1,
+	.next_state    = LLC_CONN_STATE_REJ,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_rejct_actions_10,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RNR_CMD_Pbit_SET_0 event */
+static llc_conn_action_t llc_await_rejct_actions_11a[] = {
+	[0] = llc_conn_ac_upd_nr_received,
+	[1] = llc_conn_ac_upd_vs,
+	[2] = llc_conn_ac_set_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_rejct_state_trans_11a = {
+	.ev	       = llc_conn_ev_rx_rnr_cmd_pbit_set_0,
+	.next_state    = LLC_CONN_STATE_AWAIT_REJ,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_rejct_actions_11a,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RNR_RSP_Fbit_SET_0 event */
+static llc_conn_action_t llc_await_rejct_actions_11b[] = {
+	[0] = llc_conn_ac_upd_nr_received,
+	[1] = llc_conn_ac_upd_vs,
+	[2] = llc_conn_ac_set_remote_busy,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_rejct_state_trans_11b = {
+	.ev	       = llc_conn_ev_rx_rnr_rsp_fbit_set_0,
+	.next_state    = LLC_CONN_STATE_AWAIT_REJ,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_rejct_actions_11b,
+};
+
+/* State transitions for LLC_CONN_EV_RX_RNR_CMD_Pbit_SET_1 event */
+static llc_conn_action_t llc_await_rejct_actions_12[] = {
+	[0] = llc_conn_ac_send_rr_rsp_f_set_1,
+	[1] = llc_conn_ac_upd_nr_received,
+	[2] = llc_conn_ac_upd_vs,
+	[3] = llc_conn_ac_set_remote_busy,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_rejct_state_trans_12 = {
+	.ev	       = llc_conn_ev_rx_rnr_cmd_pbit_set_1,
+	.next_state    = LLC_CONN_STATE_AWAIT_REJ,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_await_rejct_actions_12,
+};
+
+/* State transitions for LLC_CONN_EV_P_TMR_EXP event */
+static llc_conn_ev_qfyr_t llc_await_rejct_ev_qfyrs_13[] = {
+	[0] = llc_conn_ev_qlfy_retry_cnt_lt_n2,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_await_rejct_actions_13[] = {
+	[0] = llc_conn_ac_send_rej_cmd_p_set_1,
+	[1] = llc_conn_ac_stop_p_timer,
+	[2] = llc_conn_ac_inc_retry_cnt_by_1,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_await_rejct_state_trans_13 = {
+	.ev	       = llc_conn_ev_p_tmr_exp,
+	.next_state    = LLC_CONN_STATE_AWAIT_REJ,
+	.ev_qualifiers = llc_await_rejct_ev_qfyrs_13,
+	.ev_actions    = llc_await_rejct_actions_13,
+};
+
+/*
+ * Array of pointers;
+ * one to each transition
+ */
+static struct llc_conn_state_trans *llc_await_rejct_state_transitions[] = {
+	 [0] = &llc_await_reject_state_trans_1_0,
+	 [1] = &llc_common_state_trans_1,		/* requests */
+	 [2] = &llc_common_state_trans_2,
+	 [3] = &llc_common_state_trans_end,
+	 [4] = &llc_await_rejct_state_trans_1,		/* local busy */
+	 [5] = &llc_common_state_trans_end,
+	 [6] = &llc_common_state_trans_end,		/* Initiate PF cycle */
+	 [7] = &llc_await_rejct_state_trans_13,	/* timers */
+	 [8] = &llc_common_state_trans_11a,
+	 [9] = &llc_common_state_trans_11b,
+	[10] = &llc_common_state_trans_11c,
+	[11] = &llc_common_state_trans_11d,
+	[12] = &llc_common_state_trans_end,
+	[13] = &llc_await_rejct_state_trans_2a,	/* receive frames */
+	[14] = &llc_await_rejct_state_trans_2b,
+	[15] = &llc_await_rejct_state_trans_3,
+	[16] = &llc_await_rejct_state_trans_4,
+	[17] = &llc_await_rejct_state_trans_5a,
+	[18] = &llc_await_rejct_state_trans_5b,
+	[19] = &llc_await_rejct_state_trans_6,
+	[20] = &llc_await_rejct_state_trans_7a,
+	[21] = &llc_await_rejct_state_trans_7b,
+	[22] = &llc_await_rejct_state_trans_7c,
+	[23] = &llc_await_rejct_state_trans_8a,
+	[24] = &llc_await_rejct_state_trans_8b,
+	[25] = &llc_await_rejct_state_trans_8c,
+	[26] = &llc_await_rejct_state_trans_8d,
+	[27] = &llc_await_rejct_state_trans_9a,
+	[28] = &llc_await_rejct_state_trans_9b,
+	[29] = &llc_await_rejct_state_trans_10,
+	[30] = &llc_await_rejct_state_trans_11a,
+	[31] = &llc_await_rejct_state_trans_11b,
+	[32] = &llc_await_rejct_state_trans_12,
+	[33] = &llc_common_state_trans_3,
+	[34] = &llc_common_state_trans_4,
+	[35] = &llc_common_state_trans_5,
+	[36] = &llc_common_state_trans_6,
+	[37] = &llc_common_state_trans_7a,
+	[38] = &llc_common_state_trans_7b,
+	[39] = &llc_common_state_trans_8a,
+	[40] = &llc_common_state_trans_8b,
+	[41] = &llc_common_state_trans_8c,
+	[42] = &llc_common_state_trans_9,
+	/* [43] = &llc_common_state_trans_10, */
+	[43] = &llc_common_state_trans_end,
+};
+
+/* LLC_CONN_STATE_D_CONN transitions */
+/* State transitions for LLC_CONN_EV_RX_SABME_CMD_Pbit_SET_X event,
+ * cause_flag = 1 */
+static llc_conn_ev_qfyr_t llc_d_conn_ev_qfyrs_1[] = {
+	[0] = llc_conn_ev_qlfy_cause_flag_eq_1,
+	[1] = llc_conn_ev_qlfy_set_status_conflict,
+	[2] = NULL,
+};
+
+static llc_conn_action_t llc_d_conn_actions_1[] = {
+	[0] = llc_conn_ac_send_dm_rsp_f_set_p,
+	[1] = llc_conn_ac_stop_ack_timer,
+	[2] = llc_conn_ac_disc_confirm,
+	[3] = llc_conn_disc,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_d_conn_state_trans_1 = {
+	.ev	       = llc_conn_ev_rx_sabme_cmd_pbit_set_x,
+	.next_state    = LLC_CONN_STATE_ADM,
+	.ev_qualifiers = llc_d_conn_ev_qfyrs_1,
+	.ev_actions    = llc_d_conn_actions_1,
+};
+
+/* State transitions for LLC_CONN_EV_RX_SABME_CMD_Pbit_SET_X event,
+ * cause_flag = 0
+ */
+static llc_conn_ev_qfyr_t llc_d_conn_ev_qfyrs_1_1[] = {
+	[0] = llc_conn_ev_qlfy_cause_flag_eq_0,
+	[1] = llc_conn_ev_qlfy_set_status_conflict,
+	[2] = NULL,
+};
+
+static llc_conn_action_t llc_d_conn_actions_1_1[] = {
+	[0] = llc_conn_ac_send_dm_rsp_f_set_p,
+	[1] = llc_conn_ac_stop_ack_timer,
+	[2] = llc_conn_disc,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_d_conn_state_trans_1_1 = {
+	.ev	       = llc_conn_ev_rx_sabme_cmd_pbit_set_x,
+	.next_state    = LLC_CONN_STATE_ADM,
+	.ev_qualifiers = llc_d_conn_ev_qfyrs_1_1,
+	.ev_actions    = llc_d_conn_actions_1_1,
+};
+
+/* State transitions for LLC_CONN_EV_RX_UA_RSP_Fbit_SET_X event,
+ * cause_flag = 1
+ */
+static llc_conn_ev_qfyr_t llc_d_conn_ev_qfyrs_2[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_f,
+	[1] = llc_conn_ev_qlfy_cause_flag_eq_1,
+	[2] = llc_conn_ev_qlfy_set_status_disc,
+	[3] = NULL,
+};
+
+static llc_conn_action_t llc_d_conn_actions_2[] = {
+	[0] = llc_conn_ac_stop_ack_timer,
+	[1] = llc_conn_ac_disc_confirm,
+	[2] = llc_conn_disc,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_d_conn_state_trans_2 = {
+	.ev	       = llc_conn_ev_rx_ua_rsp_fbit_set_x,
+	.next_state    = LLC_CONN_STATE_ADM,
+	.ev_qualifiers = llc_d_conn_ev_qfyrs_2,
+	.ev_actions    = llc_d_conn_actions_2,
+};
+
+/* State transitions for LLC_CONN_EV_RX_UA_RSP_Fbit_SET_X event,
+ * cause_flag = 0
+ */
+static llc_conn_ev_qfyr_t llc_d_conn_ev_qfyrs_2_1[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_f,
+	[1] = llc_conn_ev_qlfy_cause_flag_eq_0,
+	[2] = llc_conn_ev_qlfy_set_status_disc,
+	[3] = NULL,
+};
+
+static llc_conn_action_t llc_d_conn_actions_2_1[] = {
+	[0] = llc_conn_ac_stop_ack_timer,
+	[1] = llc_conn_disc,
+	[2] = NULL,
+};
+
+static struct llc_conn_state_trans llc_d_conn_state_trans_2_1 = {
+	.ev	       = llc_conn_ev_rx_ua_rsp_fbit_set_x,
+	.next_state    = LLC_CONN_STATE_ADM,
+	.ev_qualifiers = llc_d_conn_ev_qfyrs_2_1,
+	.ev_actions    = llc_d_conn_actions_2_1,
+};
+
+/* State transitions for LLC_CONN_EV_RX_DISC_CMD_Pbit_SET_X event */
+static llc_conn_action_t llc_d_conn_actions_3[] = {
+	[0] = llc_conn_ac_send_ua_rsp_f_set_p,
+	[1] = NULL,
+};
+
+static struct llc_conn_state_trans llc_d_conn_state_trans_3 = {
+	.ev	       = llc_conn_ev_rx_disc_cmd_pbit_set_x,
+	.next_state    = LLC_CONN_STATE_D_CONN,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_d_conn_actions_3,
+};
+
+/* State transitions for LLC_CONN_EV_RX_DM_RSP_Fbit_SET_X event,
+ * cause_flag = 1
+ */
+static llc_conn_ev_qfyr_t llc_d_conn_ev_qfyrs_4[] = {
+	[0] = llc_conn_ev_qlfy_cause_flag_eq_1,
+	[1] = llc_conn_ev_qlfy_set_status_disc,
+	[2] = NULL,
+};
+
+static llc_conn_action_t llc_d_conn_actions_4[] = {
+	[0] = llc_conn_ac_stop_ack_timer,
+	[1] = llc_conn_ac_disc_confirm,
+	[2] = llc_conn_disc,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_d_conn_state_trans_4 = {
+	.ev	       = llc_conn_ev_rx_dm_rsp_fbit_set_x,
+	.next_state    = LLC_CONN_STATE_ADM,
+	.ev_qualifiers = llc_d_conn_ev_qfyrs_4,
+	.ev_actions    = llc_d_conn_actions_4,
+};
+
+/* State transitions for LLC_CONN_EV_RX_DM_RSP_Fbit_SET_X event,
+ * cause_flag = 0
+ */
+static llc_conn_ev_qfyr_t llc_d_conn_ev_qfyrs_4_1[] = {
+	[0] = llc_conn_ev_qlfy_cause_flag_eq_0,
+	[1] = llc_conn_ev_qlfy_set_status_disc,
+	[2] = NULL,
+};
+
+static llc_conn_action_t llc_d_conn_actions_4_1[] = {
+	[0] = llc_conn_ac_stop_ack_timer,
+	[1] = llc_conn_disc,
+	[2] = NULL,
+};
+
+static struct llc_conn_state_trans llc_d_conn_state_trans_4_1 = {
+	.ev	       = llc_conn_ev_rx_dm_rsp_fbit_set_x,
+	.next_state    = LLC_CONN_STATE_ADM,
+	.ev_qualifiers = llc_d_conn_ev_qfyrs_4_1,
+	.ev_actions    = llc_d_conn_actions_4_1,
+};
+
+/*
+ * State transition for
+ * LLC_CONN_EV_DATA_CONN_REQ event
+ */
+static llc_conn_ev_qfyr_t llc_d_conn_ev_qfyrs_5[] = {
+	[0] = llc_conn_ev_qlfy_set_status_refuse,
+	[1] = NULL,
+};
+
+/* just one member, NULL, .bss zeroes it */
+static llc_conn_action_t llc_d_conn_actions_5[1];
+
+static struct llc_conn_state_trans llc_d_conn_state_trans_5 = {
+	.ev	       = llc_conn_ev_data_req,
+	.next_state    = LLC_CONN_STATE_D_CONN,
+	.ev_qualifiers = llc_d_conn_ev_qfyrs_5,
+	.ev_actions    = llc_d_conn_actions_5,
+};
+
+/* State transitions for LLC_CONN_EV_ACK_TMR_EXP event */
+static llc_conn_ev_qfyr_t llc_d_conn_ev_qfyrs_6[] = {
+	[0] = llc_conn_ev_qlfy_retry_cnt_lt_n2,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_d_conn_actions_6[] = {
+	[0] = llc_conn_ac_send_disc_cmd_p_set_x,
+	[1] = llc_conn_ac_start_ack_timer,
+	[2] = llc_conn_ac_inc_retry_cnt_by_1,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_d_conn_state_trans_6 = {
+	.ev	       = llc_conn_ev_ack_tmr_exp,
+	.next_state    = LLC_CONN_STATE_D_CONN,
+	.ev_qualifiers = llc_d_conn_ev_qfyrs_6,
+	.ev_actions    = llc_d_conn_actions_6,
+};
+
+/* State transitions for LLC_CONN_EV_ACK_TMR_EXP event, cause_flag = 1 */
+static llc_conn_ev_qfyr_t llc_d_conn_ev_qfyrs_7[] = {
+	[0] = llc_conn_ev_qlfy_retry_cnt_gte_n2,
+	[1] = llc_conn_ev_qlfy_cause_flag_eq_1,
+	[2] = llc_conn_ev_qlfy_set_status_failed,
+	[3] = NULL,
+};
+
+static llc_conn_action_t llc_d_conn_actions_7[] = {
+	[0] = llc_conn_ac_disc_confirm,
+	[1] = llc_conn_disc,
+	[2] = NULL,
+};
+
+static struct llc_conn_state_trans llc_d_conn_state_trans_7 = {
+	.ev	       = llc_conn_ev_ack_tmr_exp,
+	.next_state    = LLC_CONN_STATE_ADM,
+	.ev_qualifiers = llc_d_conn_ev_qfyrs_7,
+	.ev_actions    = llc_d_conn_actions_7,
+};
+
+/* State transitions for LLC_CONN_EV_ACK_TMR_EXP event, cause_flag = 0 */
+static llc_conn_ev_qfyr_t llc_d_conn_ev_qfyrs_8[] = {
+	[0] = llc_conn_ev_qlfy_retry_cnt_gte_n2,
+	[1] = llc_conn_ev_qlfy_cause_flag_eq_0,
+	[2] = llc_conn_ev_qlfy_set_status_failed,
+	[3] = NULL,
+};
+
+static llc_conn_action_t llc_d_conn_actions_8[] = {
+	[0] = llc_conn_disc,
+	[1] = NULL,
+};
+
+static struct llc_conn_state_trans llc_d_conn_state_trans_8 = {
+	.ev	       = llc_conn_ev_ack_tmr_exp,
+	.next_state    = LLC_CONN_STATE_ADM,
+	.ev_qualifiers = llc_d_conn_ev_qfyrs_8,
+	.ev_actions    = llc_d_conn_actions_8,
+};
+
+/*
+ * Array of pointers;
+ * one to each transition
+ */
+static struct llc_conn_state_trans *llc_d_conn_state_transitions[] = {
+	 [0] = &llc_d_conn_state_trans_5,	/* Request */
+	 [1] = &llc_common_state_trans_end,
+	 [2] = &llc_common_state_trans_end,	/* Local busy */
+	 [3] = &llc_common_state_trans_end,	/* Initiate PF cycle */
+	 [4] = &llc_d_conn_state_trans_6,	/* Timer */
+	 [5] = &llc_d_conn_state_trans_7,
+	 [6] = &llc_d_conn_state_trans_8,
+	 [7] = &llc_common_state_trans_end,
+	 [8] = &llc_d_conn_state_trans_1,	/* Receive frame */
+	 [9] = &llc_d_conn_state_trans_1_1,
+	[10] = &llc_d_conn_state_trans_2,
+	[11] = &llc_d_conn_state_trans_2_1,
+	[12] = &llc_d_conn_state_trans_3,
+	[13] = &llc_d_conn_state_trans_4,
+	[14] = &llc_d_conn_state_trans_4_1,
+	[15] = &llc_common_state_trans_end,
+};
+
+/* LLC_CONN_STATE_RESET transitions */
+/* State transitions for LLC_CONN_EV_RX_SABME_CMD_Pbit_SET_X event */
+static llc_conn_action_t llc_rst_actions_1[] = {
+	[0] = llc_conn_ac_set_vs_0,
+	[1] = llc_conn_ac_set_vr_0,
+	[2] = llc_conn_ac_set_s_flag_1,
+	[3] = llc_conn_ac_send_ua_rsp_f_set_p,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_rst_state_trans_1 = {
+	.ev	       = llc_conn_ev_rx_sabme_cmd_pbit_set_x,
+	.next_state    = LLC_CONN_STATE_RESET,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_rst_actions_1,
+};
+
+/* State transitions for LLC_CONN_EV_RX_UA_RSP_Fbit_SET_X event,
+ * cause_flag = 1
+ */
+static llc_conn_ev_qfyr_t llc_rst_ev_qfyrs_2[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_f,
+	[1] = llc_conn_ev_qlfy_cause_flag_eq_1,
+	[2] = llc_conn_ev_qlfy_set_status_conn,
+	[3] = NULL,
+};
+
+static llc_conn_action_t llc_rst_actions_2[] = {
+	[0] = llc_conn_ac_stop_ack_timer,
+	[1] = llc_conn_ac_set_vs_0,
+	[2] = llc_conn_ac_set_vr_0,
+	[3] = llc_conn_ac_upd_p_flag,
+	[4] = llc_conn_ac_rst_confirm,
+	[5] = llc_conn_ac_set_remote_busy_0,
+	[6] = llc_conn_reset,
+	[7] = NULL,
+};
+
+static struct llc_conn_state_trans llc_rst_state_trans_2 = {
+	.ev	       = llc_conn_ev_rx_ua_rsp_fbit_set_x,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = llc_rst_ev_qfyrs_2,
+	.ev_actions    = llc_rst_actions_2,
+};
+
+/* State transitions for LLC_CONN_EV_RX_UA_RSP_Fbit_SET_X event,
+ * cause_flag = 0
+ */
+static llc_conn_ev_qfyr_t llc_rst_ev_qfyrs_2_1[] = {
+	[0] = llc_conn_ev_qlfy_p_flag_eq_f,
+	[1] = llc_conn_ev_qlfy_cause_flag_eq_0,
+	[2] = llc_conn_ev_qlfy_set_status_rst_done,
+	[3] = NULL,
+};
+
+static llc_conn_action_t llc_rst_actions_2_1[] = {
+	[0] = llc_conn_ac_stop_ack_timer,
+	[1] = llc_conn_ac_set_vs_0,
+	[2] = llc_conn_ac_set_vr_0,
+	[3] = llc_conn_ac_upd_p_flag,
+	[4] = llc_conn_ac_rst_confirm,
+	[5] = llc_conn_ac_set_remote_busy_0,
+	[6] = llc_conn_reset,
+	[7] = NULL,
+};
+
+static struct llc_conn_state_trans llc_rst_state_trans_2_1 = {
+	.ev	       = llc_conn_ev_rx_ua_rsp_fbit_set_x,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = llc_rst_ev_qfyrs_2_1,
+	.ev_actions    = llc_rst_actions_2_1,
+};
+
+/* State transitions for LLC_CONN_EV_ACK_TMR_EXP event */
+static llc_conn_ev_qfyr_t llc_rst_ev_qfyrs_3[] = {
+	[0] = llc_conn_ev_qlfy_s_flag_eq_1,
+	[1] = llc_conn_ev_qlfy_set_status_rst_done,
+	[2] = NULL,
+};
+
+static llc_conn_action_t llc_rst_actions_3[] = {
+	[0] = llc_conn_ac_set_p_flag_0,
+	[1] = llc_conn_ac_set_remote_busy_0,
+	[2] = NULL,
+};
+
+static struct llc_conn_state_trans llc_rst_state_trans_3 = {
+	.ev	       = llc_conn_ev_ack_tmr_exp,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = llc_rst_ev_qfyrs_3,
+	.ev_actions    = llc_rst_actions_3,
+};
+
+/* State transitions for LLC_CONN_EV_RX_DISC_CMD_Pbit_SET_X event,
+ * cause_flag = 1
+ */
+static llc_conn_ev_qfyr_t llc_rst_ev_qfyrs_4[] = {
+	[0] = llc_conn_ev_qlfy_cause_flag_eq_1,
+	[1] = llc_conn_ev_qlfy_set_status_disc,
+	[2] = NULL,
+};
+static llc_conn_action_t llc_rst_actions_4[] = {
+	[0] = llc_conn_ac_send_dm_rsp_f_set_p,
+	[1] = llc_conn_ac_disc_ind,
+	[2] = llc_conn_ac_stop_ack_timer,
+	[3] = llc_conn_disc,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_rst_state_trans_4 = {
+	.ev	       = llc_conn_ev_rx_disc_cmd_pbit_set_x,
+	.next_state    = LLC_CONN_STATE_ADM,
+	.ev_qualifiers = llc_rst_ev_qfyrs_4,
+	.ev_actions    = llc_rst_actions_4,
+};
+
+/* State transitions for LLC_CONN_EV_RX_DISC_CMD_Pbit_SET_X event,
+ * cause_flag = 0
+ */
+static llc_conn_ev_qfyr_t llc_rst_ev_qfyrs_4_1[] = {
+	[0] = llc_conn_ev_qlfy_cause_flag_eq_0,
+	[1] = llc_conn_ev_qlfy_set_status_refuse,
+	[2] = NULL,
+};
+
+static llc_conn_action_t llc_rst_actions_4_1[] = {
+	[0] = llc_conn_ac_send_dm_rsp_f_set_p,
+	[1] = llc_conn_ac_stop_ack_timer,
+	[2] = llc_conn_disc,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_rst_state_trans_4_1 = {
+	.ev	       = llc_conn_ev_rx_disc_cmd_pbit_set_x,
+	.next_state    = LLC_CONN_STATE_ADM,
+	.ev_qualifiers = llc_rst_ev_qfyrs_4_1,
+	.ev_actions    = llc_rst_actions_4_1,
+};
+
+/* State transitions for LLC_CONN_EV_RX_DM_RSP_Fbit_SET_X event,
+ * cause_flag = 1
+ */
+static llc_conn_ev_qfyr_t llc_rst_ev_qfyrs_5[] = {
+	[0] = llc_conn_ev_qlfy_cause_flag_eq_1,
+	[1] = llc_conn_ev_qlfy_set_status_disc,
+	[2] = NULL,
+};
+
+static llc_conn_action_t llc_rst_actions_5[] = {
+	[0] = llc_conn_ac_disc_ind,
+	[1] = llc_conn_ac_stop_ack_timer,
+	[2] = llc_conn_disc,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_rst_state_trans_5 = {
+	.ev	       = llc_conn_ev_rx_dm_rsp_fbit_set_x,
+	.next_state    = LLC_CONN_STATE_ADM,
+	.ev_qualifiers = llc_rst_ev_qfyrs_5,
+	.ev_actions    = llc_rst_actions_5,
+};
+
+/* State transitions for LLC_CONN_EV_RX_DM_RSP_Fbit_SET_X event,
+ * cause_flag = 0
+ */
+static llc_conn_ev_qfyr_t llc_rst_ev_qfyrs_5_1[] = {
+	[0] = llc_conn_ev_qlfy_cause_flag_eq_0,
+	[1] = llc_conn_ev_qlfy_set_status_refuse,
+	[2] = NULL,
+};
+
+static llc_conn_action_t llc_rst_actions_5_1[] = {
+	[0] = llc_conn_ac_stop_ack_timer,
+	[1] = llc_conn_disc,
+	[2] = NULL,
+};
+
+static struct llc_conn_state_trans llc_rst_state_trans_5_1 = {
+	.ev	       = llc_conn_ev_rx_dm_rsp_fbit_set_x,
+	.next_state    = LLC_CONN_STATE_ADM,
+	.ev_qualifiers = llc_rst_ev_qfyrs_5_1,
+	.ev_actions    = llc_rst_actions_5_1,
+};
+
+/* State transitions for DATA_CONN_REQ event */
+static llc_conn_ev_qfyr_t llc_rst_ev_qfyrs_6[] = {
+	[0] = llc_conn_ev_qlfy_set_status_refuse,
+	[1] = NULL,
+};
+
+/* just one member, NULL, .bss zeroes it */
+static llc_conn_action_t llc_rst_actions_6[1];
+
+static struct llc_conn_state_trans llc_rst_state_trans_6 = {
+	.ev	       = llc_conn_ev_data_req,
+	.next_state    = LLC_CONN_STATE_RESET,
+	.ev_qualifiers = llc_rst_ev_qfyrs_6,
+	.ev_actions    = llc_rst_actions_6,
+};
+
+/* State transitions for LLC_CONN_EV_ACK_TMR_EXP event */
+static llc_conn_ev_qfyr_t llc_rst_ev_qfyrs_7[] = {
+	[0] = llc_conn_ev_qlfy_retry_cnt_lt_n2,
+	[1] = llc_conn_ev_qlfy_s_flag_eq_0,
+	[2] = NULL,
+};
+
+static llc_conn_action_t llc_rst_actions_7[] = {
+	[0] = llc_conn_ac_send_sabme_cmd_p_set_x,
+	[1] = llc_conn_ac_start_ack_timer,
+	[2] = llc_conn_ac_inc_retry_cnt_by_1,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_rst_state_trans_7 = {
+	.ev	       = llc_conn_ev_ack_tmr_exp,
+	.next_state    = LLC_CONN_STATE_RESET,
+	.ev_qualifiers = llc_rst_ev_qfyrs_7,
+	.ev_actions    = llc_rst_actions_7,
+};
+
+/* State transitions for LLC_CONN_EV_ACK_TMR_EXP event */
+static llc_conn_ev_qfyr_t llc_rst_ev_qfyrs_8[] = {
+	[0] = llc_conn_ev_qlfy_retry_cnt_gte_n2,
+	[1] = llc_conn_ev_qlfy_s_flag_eq_0,
+	[2] = llc_conn_ev_qlfy_cause_flag_eq_1,
+	[3] = llc_conn_ev_qlfy_set_status_failed,
+	[4] = NULL,
+};
+static llc_conn_action_t llc_rst_actions_8[] = {
+	[0] = llc_conn_ac_disc_ind,
+	[1] = llc_conn_disc,
+	[2] = NULL,
+};
+
+static struct llc_conn_state_trans llc_rst_state_trans_8 = {
+	.ev	       = llc_conn_ev_ack_tmr_exp,
+	.next_state    = LLC_CONN_STATE_ADM,
+	.ev_qualifiers = llc_rst_ev_qfyrs_8,
+	.ev_actions    = llc_rst_actions_8,
+};
+
+/* State transitions for LLC_CONN_EV_ACK_TMR_EXP event */
+static llc_conn_ev_qfyr_t llc_rst_ev_qfyrs_8_1[] = {
+	[0] = llc_conn_ev_qlfy_retry_cnt_gte_n2,
+	[1] = llc_conn_ev_qlfy_s_flag_eq_0,
+	[2] = llc_conn_ev_qlfy_cause_flag_eq_0,
+	[3] = llc_conn_ev_qlfy_set_status_failed,
+	[4] = NULL,
+};
+static llc_conn_action_t llc_rst_actions_8_1[] = {
+	[0] = llc_conn_ac_disc_ind,
+	[1] = llc_conn_disc,
+	[2] = NULL,
+};
+
+static struct llc_conn_state_trans llc_rst_state_trans_8_1 = {
+	.ev	       = llc_conn_ev_ack_tmr_exp,
+	.next_state    = LLC_CONN_STATE_ADM,
+	.ev_qualifiers = llc_rst_ev_qfyrs_8_1,
+	.ev_actions    = llc_rst_actions_8_1,
+};
+
+/*
+ * Array of pointers;
+ * one to each transition
+ */
+static struct llc_conn_state_trans *llc_rst_state_transitions[] = {
+	 [0] = &llc_rst_state_trans_6,		/* Request */
+	 [1] = &llc_common_state_trans_end,
+	 [2] = &llc_common_state_trans_end,	/* Local busy */
+	 [3] = &llc_common_state_trans_end,	/* Initiate PF cycle */
+	 [4] = &llc_rst_state_trans_3,		/* Timer */
+	 [5] = &llc_rst_state_trans_7,
+	 [6] = &llc_rst_state_trans_8,
+	 [7] = &llc_rst_state_trans_8_1,
+	 [8] = &llc_common_state_trans_end,
+	 [9] = &llc_rst_state_trans_1,		/* Receive frame */
+	[10] = &llc_rst_state_trans_2,
+	[11] = &llc_rst_state_trans_2_1,
+	[12] = &llc_rst_state_trans_4,
+	[13] = &llc_rst_state_trans_4_1,
+	[14] = &llc_rst_state_trans_5,
+	[15] = &llc_rst_state_trans_5_1,
+	[16] = &llc_common_state_trans_end,
+};
+
+/* LLC_CONN_STATE_ERROR transitions */
+/* State transitions for LLC_CONN_EV_RX_SABME_CMD_Pbit_SET_X event */
+static llc_conn_action_t llc_error_actions_1[] = {
+	[0] = llc_conn_ac_set_vs_0,
+	[1] = llc_conn_ac_set_vr_0,
+	[2] = llc_conn_ac_send_ua_rsp_f_set_p,
+	[3] = llc_conn_ac_rst_ind,
+	[4] = llc_conn_ac_set_p_flag_0,
+	[5] = llc_conn_ac_set_remote_busy_0,
+	[6] = llc_conn_ac_stop_ack_timer,
+	[7] = llc_conn_reset,
+	[8] = NULL,
+};
+
+static struct llc_conn_state_trans llc_error_state_trans_1 = {
+	.ev	       = llc_conn_ev_rx_sabme_cmd_pbit_set_x,
+	.next_state    = LLC_CONN_STATE_NORMAL,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_error_actions_1,
+};
+
+/* State transitions for LLC_CONN_EV_RX_DISC_CMD_Pbit_SET_X event */
+static llc_conn_action_t llc_error_actions_2[] = {
+	[0] = llc_conn_ac_send_ua_rsp_f_set_p,
+	[1] = llc_conn_ac_disc_ind,
+	[2] = llc_conn_ac_stop_ack_timer,
+	[3] = llc_conn_disc,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_error_state_trans_2 = {
+	.ev	       = llc_conn_ev_rx_disc_cmd_pbit_set_x,
+	.next_state    = LLC_CONN_STATE_ADM,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_error_actions_2,
+};
+
+/* State transitions for LLC_CONN_EV_RX_DM_RSP_Fbit_SET_X event */
+static llc_conn_action_t llc_error_actions_3[] = {
+	[0] = llc_conn_ac_disc_ind,
+	[1] = llc_conn_ac_stop_ack_timer,
+	[2] = llc_conn_disc,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_error_state_trans_3 = {
+	.ev	       = llc_conn_ev_rx_dm_rsp_fbit_set_x,
+	.next_state    = LLC_CONN_STATE_ADM,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_error_actions_3,
+};
+
+/* State transitions for LLC_CONN_EV_RX_FRMR_RSP_Fbit_SET_X event */
+static llc_conn_action_t llc_error_actions_4[] = {
+	[0] = llc_conn_ac_send_sabme_cmd_p_set_x,
+	[1] = llc_conn_ac_start_ack_timer,
+	[2] = llc_conn_ac_set_retry_cnt_0,
+	[3] = llc_conn_ac_set_cause_flag_0,
+	[4] = NULL,
+};
+
+static struct llc_conn_state_trans llc_error_state_trans_4 = {
+	.ev	       = llc_conn_ev_rx_frmr_rsp_fbit_set_x,
+	.next_state    = LLC_CONN_STATE_RESET,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_error_actions_4,
+};
+
+/* State transitions for LLC_CONN_EV_RX_XXX_CMD_Pbit_SET_X event */
+static llc_conn_action_t llc_error_actions_5[] = {
+	[0] = llc_conn_ac_resend_frmr_rsp_f_set_p,
+	[1] = NULL,
+};
+
+static struct llc_conn_state_trans llc_error_state_trans_5 = {
+	.ev	       = llc_conn_ev_rx_xxx_cmd_pbit_set_x,
+	.next_state    = LLC_CONN_STATE_ERROR,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_error_actions_5,
+};
+
+/* State transitions for LLC_CONN_EV_RX_XXX_RSP_Fbit_SET_X event */
+static struct llc_conn_state_trans llc_error_state_trans_6 = {
+	.ev	       = llc_conn_ev_rx_xxx_rsp_fbit_set_x,
+	.next_state    = LLC_CONN_STATE_ERROR,
+	.ev_qualifiers = NONE,
+	.ev_actions    = NONE,
+};
+
+/* State transitions for LLC_CONN_EV_ACK_TMR_EXP event */
+static llc_conn_ev_qfyr_t llc_error_ev_qfyrs_7[] = {
+	[0] = llc_conn_ev_qlfy_retry_cnt_lt_n2,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_error_actions_7[] = {
+	[0] = llc_conn_ac_resend_frmr_rsp_f_set_0,
+	[1] = llc_conn_ac_start_ack_timer,
+	[2] = llc_conn_ac_inc_retry_cnt_by_1,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_error_state_trans_7 = {
+	.ev	       = llc_conn_ev_ack_tmr_exp,
+	.next_state    = LLC_CONN_STATE_ERROR,
+	.ev_qualifiers = llc_error_ev_qfyrs_7,
+	.ev_actions    = llc_error_actions_7,
+};
+
+/* State transitions for LLC_CONN_EV_ACK_TMR_EXP event */
+static llc_conn_ev_qfyr_t llc_error_ev_qfyrs_8[] = {
+	[0] = llc_conn_ev_qlfy_retry_cnt_gte_n2,
+	[1] = NULL,
+};
+
+static llc_conn_action_t llc_error_actions_8[] = {
+	[0] = llc_conn_ac_send_sabme_cmd_p_set_x,
+	[1] = llc_conn_ac_set_s_flag_0,
+	[2] = llc_conn_ac_start_ack_timer,
+	[3] = llc_conn_ac_set_retry_cnt_0,
+	[4] = llc_conn_ac_set_cause_flag_0,
+	[5] = NULL,
+};
+
+static struct llc_conn_state_trans llc_error_state_trans_8 = {
+	.ev	       = llc_conn_ev_ack_tmr_exp,
+	.next_state    = LLC_CONN_STATE_RESET,
+	.ev_qualifiers = llc_error_ev_qfyrs_8,
+	.ev_actions    = llc_error_actions_8,
+};
+
+/* State transitions for LLC_CONN_EV_DATA_CONN_REQ event */
+static llc_conn_ev_qfyr_t llc_error_ev_qfyrs_9[] = {
+	[0] = llc_conn_ev_qlfy_set_status_refuse,
+	[1] = NULL,
+};
+
+/* just one member, NULL, .bss zeroes it */
+static llc_conn_action_t llc_error_actions_9[1];
+
+static struct llc_conn_state_trans llc_error_state_trans_9 = {
+	.ev	       = llc_conn_ev_data_req,
+	.next_state    = LLC_CONN_STATE_ERROR,
+	.ev_qualifiers = llc_error_ev_qfyrs_9,
+	.ev_actions    = llc_error_actions_9,
+};
+
+/*
+ * Array of pointers;
+ * one to each transition
+ */
+static struct llc_conn_state_trans *llc_error_state_transitions[] = {
+	 [0] = &llc_error_state_trans_9,	/* Request */
+	 [1] = &llc_common_state_trans_end,
+	 [2] = &llc_common_state_trans_end,	/* Local busy */
+	 [3] = &llc_common_state_trans_end,	/* Initiate PF cycle */
+	 [4] = &llc_error_state_trans_7,	/* Timer */
+	 [5] = &llc_error_state_trans_8,
+	 [6] = &llc_common_state_trans_end,
+	 [7] = &llc_error_state_trans_1,	/* Receive frame */
+	 [8] = &llc_error_state_trans_2,
+	 [9] = &llc_error_state_trans_3,
+	[10] = &llc_error_state_trans_4,
+	[11] = &llc_error_state_trans_5,
+	[12] = &llc_error_state_trans_6,
+	[13] = &llc_common_state_trans_end,
+};
+
+/* LLC_CONN_STATE_TEMP transitions */
+/* State transitions for LLC_CONN_EV_DISC_REQ event */
+static llc_conn_action_t llc_temp_actions_1[] = {
+	[0] = llc_conn_ac_stop_all_timers,
+	[1] = llc_conn_ac_send_disc_cmd_p_set_x,
+	[2] = llc_conn_disc,
+	[3] = NULL,
+};
+
+static struct llc_conn_state_trans llc_temp_state_trans_1 = {
+	.ev	       = llc_conn_ev_disc_req,
+	.next_state    = LLC_CONN_STATE_ADM,
+	.ev_qualifiers = NONE,
+	.ev_actions    = llc_temp_actions_1,
+};
+
+/*
+ * Array of pointers;
+ * one to each transition
+ */
+static struct llc_conn_state_trans *llc_temp_state_transitions[] = {
+	[0] = &llc_temp_state_trans_1,		/* requests */
+	[1] = &llc_common_state_trans_end,
+	[2] = &llc_common_state_trans_end,	/* local busy */
+	[3] = &llc_common_state_trans_end,	/* init_pf_cycle */
+	[4] = &llc_common_state_trans_end,	/* timer */
+	[5] = &llc_common_state_trans_end,	/* receive */
+};
+
+/* Connection State Transition Table */
+struct llc_conn_state llc_conn_state_table[NBR_CONN_STATES] = {
+	[LLC_CONN_STATE_ADM - 1] = {
+		.current_state	= LLC_CONN_STATE_ADM,
+		.transitions	= llc_adm_state_transitions,
+	},
+	[LLC_CONN_STATE_SETUP - 1] = {
+		.current_state	= LLC_CONN_STATE_SETUP,
+		.transitions	= llc_setup_state_transitions,
+	},
+	[LLC_CONN_STATE_NORMAL - 1] = {
+		.current_state	= LLC_CONN_STATE_NORMAL,
+		.transitions	= llc_normal_state_transitions,
+	},
+	[LLC_CONN_STATE_BUSY - 1] = {
+		.current_state	= LLC_CONN_STATE_BUSY,
+		.transitions	= llc_busy_state_transitions,
+	},
+	[LLC_CONN_STATE_REJ - 1] = {
+		.current_state	= LLC_CONN_STATE_REJ,
+		.transitions	= llc_reject_state_transitions,
+	},
+	[LLC_CONN_STATE_AWAIT - 1] = {
+		.current_state	= LLC_CONN_STATE_AWAIT,
+		.transitions	= llc_await_state_transitions,
+	},
+	[LLC_CONN_STATE_AWAIT_BUSY - 1] = {
+		.current_state	= LLC_CONN_STATE_AWAIT_BUSY,
+		.transitions	= llc_await_busy_state_transitions,
+	},
+	[LLC_CONN_STATE_AWAIT_REJ - 1] = {
+		.current_state	= LLC_CONN_STATE_AWAIT_REJ,
+		.transitions	= llc_await_rejct_state_transitions,
+	},
+	[LLC_CONN_STATE_D_CONN - 1] = {
+		.current_state	= LLC_CONN_STATE_D_CONN,
+		.transitions	= llc_d_conn_state_transitions,
+	},
+	[LLC_CONN_STATE_RESET - 1] = {
+		.current_state	= LLC_CONN_STATE_RESET,
+		.transitions	= llc_rst_state_transitions,
+	},
+	[LLC_CONN_STATE_ERROR - 1] = {
+		.current_state	= LLC_CONN_STATE_ERROR,
+		.transitions	= llc_error_state_transitions,
+	},
+	[LLC_CONN_STATE_TEMP - 1] = {
+		.current_state	= LLC_CONN_STATE_TEMP,
+		.transitions	= llc_temp_state_transitions,
+	},
+};
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
new file mode 100644
index 00000000000..eba812a9c69
--- /dev/null
+++ b/net/llc/llc_conn.c
@@ -0,0 +1,915 @@
+/*
+ * llc_conn.c - Driver routines for connection component.
+ *
+ * Copyright (c) 1997 by Procom Technology, Inc.
+ *		 2001-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * This program can be redistributed or modified under the terms of the
+ * GNU General Public License as published by the Free Software Foundation.
+ * This program is distributed without any warranty or implied warranty
+ * of merchantability or fitness for a particular purpose.
+ *
+ * See the GNU General Public License for more details.
+ */
+
+#include <linux/init.h>
+#include <net/llc_sap.h>
+#include <net/llc_conn.h>
+#include <net/sock.h>
+#include <linux/tcp.h>
+#include <net/llc_c_ev.h>
+#include <net/llc_c_ac.h>
+#include <net/llc_c_st.h>
+#include <net/llc_pdu.h>
+
+#if 0
+#define dprintk(args...) printk(KERN_DEBUG args)
+#else
+#define dprintk(args...)
+#endif
+
+static int llc_find_offset(int state, int ev_type);
+static void llc_conn_send_pdus(struct sock *sk);
+static int llc_conn_service(struct sock *sk, struct sk_buff *skb);
+static int llc_exec_conn_trans_actions(struct sock *sk,
+				       struct llc_conn_state_trans *trans,
+				       struct sk_buff *ev);
+static struct llc_conn_state_trans *llc_qualify_conn_ev(struct sock *sk,
+							struct sk_buff *skb);
+
+/* Offset table on connection states transition diagram */
+static int llc_offset_table[NBR_CONN_STATES][NBR_CONN_EV];
+
+/**
+ *	llc_conn_state_process - sends event to connection state machine
+ *	@sk: connection
+ *	@skb: occurred event
+ *
+ *	Sends an event to connection state machine. After processing event
+ *	(executing it's actions and changing state), upper layer will be
+ *	indicated or confirmed, if needed. Returns 0 for success, 1 for
+ *	failure. The socket lock has to be held before calling this function.
+ */
+int llc_conn_state_process(struct sock *sk, struct sk_buff *skb)
+{
+	int rc;
+	struct llc_sock *llc = llc_sk(sk);
+	struct llc_conn_state_ev *ev = llc_conn_ev(skb);
+
+	/*
+	 * We have to hold the skb, because llc_conn_service will kfree it in
+	 * the sending path and we need to look at the skb->cb, where we encode
+	 * llc_conn_state_ev.
+	 */
+	skb_get(skb);
+	ev->ind_prim = ev->cfm_prim = 0;
+	rc = llc_conn_service(sk, skb); /* sending event to state machine */
+	if (rc) {
+		printk(KERN_ERR "%s: llc_conn_service failed\n", __FUNCTION__);
+		goto out_kfree_skb;
+	}
+
+	if (!ev->ind_prim && !ev->cfm_prim) {
+		/* indicate or confirm not required */
+		if (!skb->list)
+			goto out_kfree_skb;
+		goto out_skb_put;
+	}
+
+	if (ev->ind_prim && ev->cfm_prim) /* Paranoia */
+		skb_get(skb);
+
+	switch (ev->ind_prim) {
+	case LLC_DATA_PRIM:
+		llc_save_primitive(skb, LLC_DATA_PRIM);
+		if (sock_queue_rcv_skb(sk, skb)) {
+			/*
+			 * shouldn't happen
+			 */
+			printk(KERN_ERR "%s: sock_queue_rcv_skb failed!\n",
+			       __FUNCTION__);
+			kfree_skb(skb);
+		}
+		break;
+	case LLC_CONN_PRIM: {
+		struct sock *parent = skb->sk;
+
+		skb->sk = sk;
+		skb_queue_tail(&parent->sk_receive_queue, skb);
+		sk->sk_state_change(parent);
+	}
+		break;
+	case LLC_DISC_PRIM:
+		sock_hold(sk);
+		if (sk->sk_type == SOCK_STREAM &&
+		    sk->sk_state == TCP_ESTABLISHED) {
+			sk->sk_shutdown       = SHUTDOWN_MASK;
+			sk->sk_socket->state  = SS_UNCONNECTED;
+			sk->sk_state          = TCP_CLOSE;
+			if (!sock_flag(sk, SOCK_DEAD)) {
+				sk->sk_state_change(sk);
+				sock_set_flag(sk, SOCK_DEAD);
+			}
+		}
+		kfree_skb(skb);
+		sock_put(sk);
+		break;
+	case LLC_RESET_PRIM:
+		/*
+		 * FIXME:
+		 * RESET is not being notified to upper layers for now
+		 */
+		printk(KERN_INFO "%s: received a reset ind!\n", __FUNCTION__);
+		kfree_skb(skb);
+		break;
+	default:
+		if (ev->ind_prim) {
+			printk(KERN_INFO "%s: received unknown %d prim!\n",
+				__FUNCTION__, ev->ind_prim);
+			kfree_skb(skb);
+		}
+		/* No indication */
+		break;
+	}
+
+	switch (ev->cfm_prim) {
+	case LLC_DATA_PRIM:
+		if (!llc_data_accept_state(llc->state))
+			sk->sk_write_space(sk);
+		else
+			rc = llc->failed_data_req = 1;
+		break;
+	case LLC_CONN_PRIM:
+		if (sk->sk_type == SOCK_STREAM &&
+		    sk->sk_state == TCP_SYN_SENT) {
+			if (ev->status) {
+				sk->sk_socket->state = SS_UNCONNECTED;
+				sk->sk_state         = TCP_CLOSE;
+			} else {
+				sk->sk_socket->state = SS_CONNECTED;
+				sk->sk_state         = TCP_ESTABLISHED;
+			}
+			sk->sk_state_change(sk);
+		}
+		break;
+	case LLC_DISC_PRIM:
+		sock_hold(sk);
+		if (sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_CLOSING) {
+			sk->sk_socket->state = SS_UNCONNECTED;
+			sk->sk_state         = TCP_CLOSE;
+			sk->sk_state_change(sk);
+		}
+		sock_put(sk);
+		break;
+	case LLC_RESET_PRIM:
+		/*
+		 * FIXME:
+		 * RESET is not being notified to upper layers for now
+		 */
+		printk(KERN_INFO "%s: received a reset conf!\n", __FUNCTION__);
+		break;
+	default:
+		if (ev->cfm_prim) {
+			printk(KERN_INFO "%s: received unknown %d prim!\n",
+					__FUNCTION__, ev->cfm_prim);
+			break;
+		}
+		goto out_skb_put; /* No confirmation */
+	}
+out_kfree_skb:
+	kfree_skb(skb);
+out_skb_put:
+	kfree_skb(skb);
+	return rc;
+}
+
+void llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb)
+{
+	/* queue PDU to send to MAC layer */
+	skb_queue_tail(&sk->sk_write_queue, skb);
+	llc_conn_send_pdus(sk);
+}
+
+/**
+ *	llc_conn_rtn_pdu - sends received data pdu to upper layer
+ *	@sk: Active connection
+ *	@skb: Received data frame
+ *
+ *	Sends received data pdu to upper layer (by using indicate function).
+ *	Prepares service parameters (prim and prim_data). calling indication
+ *	function will be done in llc_conn_state_process.
+ */
+void llc_conn_rtn_pdu(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_conn_state_ev *ev = llc_conn_ev(skb);
+
+	ev->ind_prim = LLC_DATA_PRIM;
+}
+
+/**
+ *	llc_conn_resend_i_pdu_as_cmd - resend all all unacknowledged I PDUs
+ *	@sk: active connection
+ *	@nr: NR
+ *	@first_p_bit: p_bit value of first pdu
+ *
+ *	Resend all unacknowledged I PDUs, starting with the NR; send first as
+ *	command PDU with P bit equal first_p_bit; if more than one send
+ *	subsequent as command PDUs with P bit equal zero (0).
+ */
+void llc_conn_resend_i_pdu_as_cmd(struct sock *sk, u8 nr, u8 first_p_bit)
+{
+	struct sk_buff *skb;
+	struct llc_pdu_sn *pdu;
+	u16 nbr_unack_pdus;
+	struct llc_sock *llc;
+	u8 howmany_resend = 0;
+
+	llc_conn_remove_acked_pdus(sk, nr, &nbr_unack_pdus);
+	if (!nbr_unack_pdus)
+		goto out;
+	/*
+	 * Process unack PDUs only if unack queue is not empty; remove
+	 * appropriate PDUs, fix them up, and put them on mac_pdu_q.
+	 */
+	llc = llc_sk(sk);
+
+	while ((skb = skb_dequeue(&llc->pdu_unack_q)) != NULL) {
+		pdu = llc_pdu_sn_hdr(skb);
+		llc_pdu_set_cmd_rsp(skb, LLC_PDU_CMD);
+		llc_pdu_set_pf_bit(skb, first_p_bit);
+		skb_queue_tail(&sk->sk_write_queue, skb);
+		first_p_bit = 0;
+		llc->vS = LLC_I_GET_NS(pdu);
+		howmany_resend++;
+	}
+	if (howmany_resend > 0)
+		llc->vS = (llc->vS + 1) % LLC_2_SEQ_NBR_MODULO;
+	/* any PDUs to re-send are queued up; start sending to MAC */
+	llc_conn_send_pdus(sk);
+out:;
+}
+
+/**
+ *	llc_conn_resend_i_pdu_as_rsp - Resend all unacknowledged I PDUs
+ *	@sk: active connection.
+ *	@nr: NR
+ *	@first_f_bit: f_bit value of first pdu.
+ *
+ *	Resend all unacknowledged I PDUs, starting with the NR; send first as
+ *	response PDU with F bit equal first_f_bit; if more than one send
+ *	subsequent as response PDUs with F bit equal zero (0).
+ */
+void llc_conn_resend_i_pdu_as_rsp(struct sock *sk, u8 nr, u8 first_f_bit)
+{
+	struct sk_buff *skb;
+	u16 nbr_unack_pdus;
+	struct llc_sock *llc = llc_sk(sk);
+	u8 howmany_resend = 0;
+
+	llc_conn_remove_acked_pdus(sk, nr, &nbr_unack_pdus);
+	if (!nbr_unack_pdus)
+		goto out;
+	/*
+	 * Process unack PDUs only if unack queue is not empty; remove
+	 * appropriate PDUs, fix them up, and put them on mac_pdu_q
+	 */
+	while ((skb = skb_dequeue(&llc->pdu_unack_q)) != NULL) {
+		struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+
+		llc_pdu_set_cmd_rsp(skb, LLC_PDU_RSP);
+		llc_pdu_set_pf_bit(skb, first_f_bit);
+		skb_queue_tail(&sk->sk_write_queue, skb);
+		first_f_bit = 0;
+		llc->vS = LLC_I_GET_NS(pdu);
+		howmany_resend++;
+	}
+	if (howmany_resend > 0)
+		llc->vS = (llc->vS + 1) % LLC_2_SEQ_NBR_MODULO;
+	/* any PDUs to re-send are queued up; start sending to MAC */
+	llc_conn_send_pdus(sk);
+out:;
+}
+
+/**
+ *	llc_conn_remove_acked_pdus - Removes acknowledged pdus from tx queue
+ *	@sk: active connection
+ *	nr: NR
+ *	how_many_unacked: size of pdu_unack_q after removing acked pdus
+ *
+ *	Removes acknowledged pdus from transmit queue (pdu_unack_q). Returns
+ *	the number of pdus that removed from queue.
+ */
+int llc_conn_remove_acked_pdus(struct sock *sk, u8 nr, u16 *how_many_unacked)
+{
+	int pdu_pos, i;
+	struct sk_buff *skb;
+	struct llc_pdu_sn *pdu;
+	int nbr_acked = 0;
+	struct llc_sock *llc = llc_sk(sk);
+	int q_len = skb_queue_len(&llc->pdu_unack_q);
+
+	if (!q_len)
+		goto out;
+	skb = skb_peek(&llc->pdu_unack_q);
+	pdu = llc_pdu_sn_hdr(skb);
+
+	/* finding position of last acked pdu in queue */
+	pdu_pos = ((int)LLC_2_SEQ_NBR_MODULO + (int)nr -
+			(int)LLC_I_GET_NS(pdu)) % LLC_2_SEQ_NBR_MODULO;
+
+	for (i = 0; i < pdu_pos && i < q_len; i++) {
+		skb = skb_dequeue(&llc->pdu_unack_q);
+		if (skb)
+			kfree_skb(skb);
+		nbr_acked++;
+	}
+out:
+	*how_many_unacked = skb_queue_len(&llc->pdu_unack_q);
+	return nbr_acked;
+}
+
+/**
+ *	llc_conn_send_pdus - Sends queued PDUs
+ *	@sk: active connection
+ *
+ *	Sends queued pdus to MAC layer for transmission.
+ */
+static void llc_conn_send_pdus(struct sock *sk)
+{
+	struct sk_buff *skb;
+
+	while ((skb = skb_dequeue(&sk->sk_write_queue)) != NULL) {
+		struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+
+		if (LLC_PDU_TYPE_IS_I(pdu) &&
+		    !(skb->dev->flags & IFF_LOOPBACK)) {
+			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+
+			skb_queue_tail(&llc_sk(sk)->pdu_unack_q, skb);
+			if (!skb2)
+				break;
+			skb = skb2;
+		}
+		dev_queue_xmit(skb);
+	}
+}
+
+/**
+ *	llc_conn_service - finds transition and changes state of connection
+ *	@sk: connection
+ *	@skb: happened event
+ *
+ *	This function finds transition that matches with happened event, then
+ *	executes related actions and finally changes state of connection.
+ *	Returns 0 for success, 1 for failure.
+ */
+static int llc_conn_service(struct sock *sk, struct sk_buff *skb)
+{
+	int rc = 1;
+	struct llc_sock *llc = llc_sk(sk);
+	struct llc_conn_state_trans *trans;
+
+	if (llc->state > NBR_CONN_STATES)
+		goto out;
+	rc = 0;
+	trans = llc_qualify_conn_ev(sk, skb);
+	if (trans) {
+		rc = llc_exec_conn_trans_actions(sk, trans, skb);
+		if (!rc && trans->next_state != NO_STATE_CHANGE) {
+			llc->state = trans->next_state;
+			if (!llc_data_accept_state(llc->state))
+				sk->sk_state_change(sk);
+		}
+	}
+out:
+	return rc;
+}
+
+/**
+ *	llc_qualify_conn_ev - finds transition for event
+ *	@sk: connection
+ *	@skb: happened event
+ *
+ *	This function finds transition that matches with happened event.
+ *	Returns pointer to found transition on success, %NULL otherwise.
+ */
+static struct llc_conn_state_trans *llc_qualify_conn_ev(struct sock *sk,
+							struct sk_buff *skb)
+{
+	struct llc_conn_state_trans **next_trans;
+	llc_conn_ev_qfyr_t *next_qualifier;
+	struct llc_conn_state_ev *ev = llc_conn_ev(skb);
+	struct llc_sock *llc = llc_sk(sk);
+	struct llc_conn_state *curr_state =
+					&llc_conn_state_table[llc->state - 1];
+
+	/* search thru events for this state until
+	 * list exhausted or until no more
+	 */
+	for (next_trans = curr_state->transitions +
+		llc_find_offset(llc->state - 1, ev->type);
+	     (*next_trans)->ev; next_trans++) {
+		if (!((*next_trans)->ev)(sk, skb)) {
+			/* got POSSIBLE event match; the event may require
+			 * qualification based on the values of a number of
+			 * state flags; if all qualifications are met (i.e.,
+			 * if all qualifying functions return success, or 0,
+			 * then this is THE event we're looking for
+			 */
+			for (next_qualifier = (*next_trans)->ev_qualifiers;
+			     next_qualifier && *next_qualifier &&
+			     !(*next_qualifier)(sk, skb); next_qualifier++)
+				/* nothing */;
+			if (!next_qualifier || !*next_qualifier)
+				/* all qualifiers executed successfully; this is
+				 * our transition; return it so we can perform
+				 * the associated actions & change the state
+				 */
+				return *next_trans;
+		}
+	}
+	return NULL;
+}
+
+/**
+ *	llc_exec_conn_trans_actions - executes related actions
+ *	@sk: connection
+ *	@trans: transition that it's actions must be performed
+ *	@skb: event
+ *
+ *	Executes actions that is related to happened event. Returns 0 for
+ *	success, 1 to indicate failure of at least one action.
+ */
+static int llc_exec_conn_trans_actions(struct sock *sk,
+				       struct llc_conn_state_trans *trans,
+				       struct sk_buff *skb)
+{
+	int rc = 0;
+	llc_conn_action_t *next_action;
+
+	for (next_action = trans->ev_actions;
+	     next_action && *next_action; next_action++) {
+		int rc2 = (*next_action)(sk, skb);
+
+		if (rc2 == 2) {
+			rc = rc2;
+			break;
+		} else if (rc2)
+			rc = 1;
+	}
+	return rc;
+}
+
+/**
+ *	llc_lookup_established - Finds connection for the remote/local sap/mac
+ *	@sap: SAP
+ *	@daddr: address of remote LLC (MAC + SAP)
+ *	@laddr: address of local LLC (MAC + SAP)
+ *
+ *	Search connection list of the SAP and finds connection using the remote
+ *	mac, remote sap, local mac, and local sap. Returns pointer for
+ *	connection found, %NULL otherwise.
+ */
+struct sock *llc_lookup_established(struct llc_sap *sap, struct llc_addr *daddr,
+				    struct llc_addr *laddr)
+{
+	struct sock *rc;
+	struct hlist_node *node;
+
+	read_lock_bh(&sap->sk_list.lock);
+	sk_for_each(rc, node, &sap->sk_list.list) {
+		struct llc_sock *llc = llc_sk(rc);
+
+		if (llc->laddr.lsap == laddr->lsap &&
+		    llc->daddr.lsap == daddr->lsap &&
+		    llc_mac_match(llc->laddr.mac, laddr->mac) &&
+		    llc_mac_match(llc->daddr.mac, daddr->mac)) {
+			sock_hold(rc);
+			goto found;
+		}
+	}
+	rc = NULL;
+found:
+	read_unlock_bh(&sap->sk_list.lock);
+	return rc;
+}
+
+/**
+ *	llc_lookup_listener - Finds listener for local MAC + SAP
+ *	@sap: SAP
+ *	@laddr: address of local LLC (MAC + SAP)
+ *
+ *	Search connection list of the SAP and finds connection listening on
+ *	local mac, and local sap. Returns pointer for parent socket found,
+ *	%NULL otherwise.
+ */
+static struct sock *llc_lookup_listener(struct llc_sap *sap,
+					struct llc_addr *laddr)
+{
+	struct sock *rc;
+	struct hlist_node *node;
+
+	read_lock_bh(&sap->sk_list.lock);
+	sk_for_each(rc, node, &sap->sk_list.list) {
+		struct llc_sock *llc = llc_sk(rc);
+
+		if (rc->sk_type == SOCK_STREAM && rc->sk_state == TCP_LISTEN &&
+		    llc->laddr.lsap == laddr->lsap &&
+		    (llc_mac_match(llc->laddr.mac, laddr->mac) ||
+		     llc_mac_null(llc->laddr.mac))) {
+			sock_hold(rc);
+			goto found;
+		}
+	}
+	rc = NULL;
+found:
+	read_unlock_bh(&sap->sk_list.lock);
+	return rc;
+}
+
+/**
+ *	llc_data_accept_state - designates if in this state data can be sent.
+ *	@state: state of connection.
+ *
+ *	Returns 0 if data can be sent, 1 otherwise.
+ */
+u8 llc_data_accept_state(u8 state)
+{
+	return state != LLC_CONN_STATE_NORMAL && state != LLC_CONN_STATE_BUSY &&
+	       state != LLC_CONN_STATE_REJ;
+}
+
+/**
+ *	find_next_offset - finds offset for next category of transitions
+ *	@state: state table.
+ *	@offset: start offset.
+ *
+ *	Finds offset of next category of transitions in transition table.
+ *	Returns the start index of next category.
+ */
+static u16 find_next_offset(struct llc_conn_state *state, u16 offset)
+{
+	u16 cnt = 0;
+	struct llc_conn_state_trans **next_trans;
+
+	for (next_trans = state->transitions + offset;
+	     (*next_trans)->ev; next_trans++)
+		++cnt;
+	return cnt;
+}
+
+/**
+ *	llc_build_offset_table - builds offset table of connection
+ *
+ *	Fills offset table of connection state transition table
+ *	(llc_offset_table).
+ */
+void __init llc_build_offset_table(void)
+{
+	struct llc_conn_state *curr_state;
+	int state, ev_type, next_offset;
+
+	for (state = 0; state < NBR_CONN_STATES; state++) {
+		curr_state = &llc_conn_state_table[state];
+		next_offset = 0;
+		for (ev_type = 0; ev_type < NBR_CONN_EV; ev_type++) {
+			llc_offset_table[state][ev_type] = next_offset;
+			next_offset += find_next_offset(curr_state,
+							next_offset) + 1;
+		}
+	}
+}
+
+/**
+ *	llc_find_offset - finds start offset of category of transitions
+ *	@state: state of connection
+ *	@ev_type: type of happened event
+ *
+ *	Finds start offset of desired category of transitions. Returns the
+ *	desired start offset.
+ */
+static int llc_find_offset(int state, int ev_type)
+{
+	int rc = 0;
+	/* at this stage, llc_offset_table[..][2] is not important. it is for
+	 * init_pf_cycle and I don't know what is it.
+	 */
+	switch (ev_type) {
+	case LLC_CONN_EV_TYPE_PRIM:
+		rc = llc_offset_table[state][0]; break;
+	case LLC_CONN_EV_TYPE_PDU:
+		rc = llc_offset_table[state][4]; break;
+	case LLC_CONN_EV_TYPE_SIMPLE:
+		rc = llc_offset_table[state][1]; break;
+	case LLC_CONN_EV_TYPE_P_TMR:
+	case LLC_CONN_EV_TYPE_ACK_TMR:
+	case LLC_CONN_EV_TYPE_REJ_TMR:
+	case LLC_CONN_EV_TYPE_BUSY_TMR:
+		rc = llc_offset_table[state][3]; break;
+	}
+	return rc;
+}
+
+/**
+ *	llc_sap_add_socket - adds a socket to a SAP
+ *	@sap: SAP
+ *	@sk: socket
+ *
+ *	This function adds a socket to sk_list of a SAP.
+ */
+void llc_sap_add_socket(struct llc_sap *sap, struct sock *sk)
+{
+	write_lock_bh(&sap->sk_list.lock);
+	llc_sk(sk)->sap = sap;
+	sk_add_node(sk, &sap->sk_list.list);
+	write_unlock_bh(&sap->sk_list.lock);
+}
+
+/**
+ *	llc_sap_remove_socket - removes a socket from SAP
+ *	@sap: SAP
+ *	@sk: socket
+ *
+ *	This function removes a connection from sk_list.list of a SAP if
+ *	the connection was in this list.
+ */
+void llc_sap_remove_socket(struct llc_sap *sap, struct sock *sk)
+{
+	write_lock_bh(&sap->sk_list.lock);
+	sk_del_node_init(sk);
+	write_unlock_bh(&sap->sk_list.lock);
+}
+
+/**
+ *	llc_conn_rcv - sends received pdus to the connection state machine
+ *	@sk: current connection structure.
+ *	@skb: received frame.
+ *
+ *	Sends received pdus to the connection state machine.
+ */
+static int llc_conn_rcv(struct sock* sk, struct sk_buff *skb)
+{
+	struct llc_conn_state_ev *ev = llc_conn_ev(skb);
+	struct llc_sock *llc = llc_sk(sk);
+
+	if (!llc->dev)
+		llc->dev = skb->dev;
+	ev->type   = LLC_CONN_EV_TYPE_PDU;
+	ev->reason = 0;
+	return llc_conn_state_process(sk, skb);
+}
+
+void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb)
+{
+	struct llc_addr saddr, daddr;
+	struct sock *sk;
+
+	llc_pdu_decode_sa(skb, saddr.mac);
+	llc_pdu_decode_ssap(skb, &saddr.lsap);
+	llc_pdu_decode_da(skb, daddr.mac);
+	llc_pdu_decode_dsap(skb, &daddr.lsap);
+
+	sk = llc_lookup_established(sap, &saddr, &daddr);
+	if (!sk) {
+		/*
+		 * Didn't find an active connection; verify if there
+		 * is a listening socket for this llc addr
+		 */
+		struct llc_sock *llc;
+		struct sock *parent = llc_lookup_listener(sap, &daddr);
+
+		if (!parent) {
+			dprintk("llc_lookup_listener failed!\n");
+			goto drop;
+		}
+
+		sk = llc_sk_alloc(parent->sk_family, GFP_ATOMIC, parent->sk_prot);
+		if (!sk) {
+			sock_put(parent);
+			goto drop;
+		}
+		llc = llc_sk(sk);
+		memcpy(&llc->laddr, &daddr, sizeof(llc->laddr));
+		memcpy(&llc->daddr, &saddr, sizeof(llc->daddr));
+		llc_sap_add_socket(sap, sk);
+		sock_hold(sk);
+		sock_put(parent);
+		skb->sk = parent;
+	} else
+		skb->sk = sk;
+	bh_lock_sock(sk);
+	if (!sock_owned_by_user(sk))
+		llc_conn_rcv(sk, skb);
+	else {
+		dprintk("%s: adding to backlog...\n", __FUNCTION__);
+		llc_set_backlog_type(skb, LLC_PACKET);
+		sk_add_backlog(sk, skb);
+	}
+	bh_unlock_sock(sk);
+	sock_put(sk);
+	return;
+drop:
+	kfree_skb(skb);
+}
+
+#undef LLC_REFCNT_DEBUG
+#ifdef LLC_REFCNT_DEBUG
+static atomic_t llc_sock_nr;
+#endif
+
+/**
+ *	llc_release_sockets - releases all sockets in a sap
+ *	@sap: sap to release its sockets
+ *
+ *	Releases all connections of a sap. Returns 0 if all actions complete
+ *	successfully, nonzero otherwise
+ */
+int llc_release_sockets(struct llc_sap *sap)
+{
+	int rc = 0;
+	struct sock *sk;
+	struct hlist_node *node;
+
+	write_lock_bh(&sap->sk_list.lock);
+
+	sk_for_each(sk, node, &sap->sk_list.list) {
+		llc_sk(sk)->state = LLC_CONN_STATE_TEMP;
+
+		if (llc_send_disc(sk))
+			rc = 1;
+	}
+
+	write_unlock_bh(&sap->sk_list.lock);
+	return rc;
+}
+
+/**
+ *	llc_backlog_rcv - Processes rx frames and expired timers.
+ *	@sk: LLC sock (p8022 connection)
+ *	@skb: queued rx frame or event
+ *
+ *	This function processes frames that has received and timers that has
+ *	expired during sending an I pdu (refer to data_req_handler).  frames
+ *	queue by llc_rcv function (llc_mac.c) and timers queue by timer
+ *	callback functions(llc_c_ac.c).
+ */
+static int llc_backlog_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	int rc = 0;
+	struct llc_sock *llc = llc_sk(sk);
+
+	if (llc_backlog_type(skb) == LLC_PACKET) {
+		if (llc->state > 1) /* not closed */
+			rc = llc_conn_rcv(sk, skb);
+		else
+			goto out_kfree_skb;
+	} else if (llc_backlog_type(skb) == LLC_EVENT) {
+		/* timer expiration event */
+		if (llc->state > 1)  /* not closed */
+			rc = llc_conn_state_process(sk, skb);
+		else
+			goto out_kfree_skb;
+	} else {
+		printk(KERN_ERR "%s: invalid skb in backlog\n", __FUNCTION__);
+		goto out_kfree_skb;
+	}
+out:
+	return rc;
+out_kfree_skb:
+	kfree_skb(skb);
+	goto out;
+}
+
+/**
+ *     llc_sk_init - Initializes a socket with default llc values.
+ *     @sk: socket to initialize.
+ *
+ *     Initializes a socket with default llc values.
+ */
+static void llc_sk_init(struct sock* sk)
+{
+	struct llc_sock *llc = llc_sk(sk);
+
+	llc->state    = LLC_CONN_STATE_ADM;
+	llc->inc_cntr = llc->dec_cntr = 2;
+	llc->dec_step = llc->connect_step = 1;
+
+	init_timer(&llc->ack_timer.timer);
+	llc->ack_timer.expire	      = LLC_ACK_TIME;
+	llc->ack_timer.timer.data     = (unsigned long)sk;
+	llc->ack_timer.timer.function = llc_conn_ack_tmr_cb;
+
+	init_timer(&llc->pf_cycle_timer.timer);
+	llc->pf_cycle_timer.expire	   = LLC_P_TIME;
+	llc->pf_cycle_timer.timer.data     = (unsigned long)sk;
+	llc->pf_cycle_timer.timer.function = llc_conn_pf_cycle_tmr_cb;
+
+	init_timer(&llc->rej_sent_timer.timer);
+	llc->rej_sent_timer.expire	   = LLC_REJ_TIME;
+	llc->rej_sent_timer.timer.data     = (unsigned long)sk;
+	llc->rej_sent_timer.timer.function = llc_conn_rej_tmr_cb;
+
+	init_timer(&llc->busy_state_timer.timer);
+	llc->busy_state_timer.expire	     = LLC_BUSY_TIME;
+	llc->busy_state_timer.timer.data     = (unsigned long)sk;
+	llc->busy_state_timer.timer.function = llc_conn_busy_tmr_cb;
+
+	llc->n2 = 2;   /* max retransmit */
+	llc->k  = 2;   /* tx win size, will adjust dynam */
+	llc->rw = 128; /* rx win size (opt and equal to
+		        * tx_win of remote LLC) */
+	skb_queue_head_init(&llc->pdu_unack_q);
+	sk->sk_backlog_rcv = llc_backlog_rcv;
+}
+
+/**
+ *	llc_sk_alloc - Allocates LLC sock
+ *	@family: upper layer protocol family
+ *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
+ *
+ *	Allocates a LLC sock and initializes it. Returns the new LLC sock
+ *	or %NULL if there's no memory available for one
+ */
+struct sock *llc_sk_alloc(int family, int priority, struct proto *prot)
+{
+	struct sock *sk = sk_alloc(family, priority, prot, 1);
+
+	if (!sk)
+		goto out;
+	llc_sk_init(sk);
+	sock_init_data(NULL, sk);
+#ifdef LLC_REFCNT_DEBUG
+	atomic_inc(&llc_sock_nr);
+	printk(KERN_DEBUG "LLC socket %p created in %s, now we have %d alive\n", sk,
+		__FUNCTION__, atomic_read(&llc_sock_nr));
+#endif
+out:
+	return sk;
+}
+
+/**
+ *	llc_sk_free - Frees a LLC socket
+ *	@sk - socket to free
+ *
+ *	Frees a LLC socket
+ */
+void llc_sk_free(struct sock *sk)
+{
+	struct llc_sock *llc = llc_sk(sk);
+
+	llc->state = LLC_CONN_OUT_OF_SVC;
+	/* Stop all (possibly) running timers */
+	llc_conn_ac_stop_all_timers(sk, NULL);
+#ifdef DEBUG_LLC_CONN_ALLOC
+	printk(KERN_INFO "%s: unackq=%d, txq=%d\n", __FUNCTION__,
+		skb_queue_len(&llc->pdu_unack_q),
+		skb_queue_len(&sk->sk_write_queue));
+#endif
+	skb_queue_purge(&sk->sk_receive_queue);
+	skb_queue_purge(&sk->sk_write_queue);
+	skb_queue_purge(&llc->pdu_unack_q);
+#ifdef LLC_REFCNT_DEBUG
+	if (atomic_read(&sk->sk_refcnt) != 1) {
+		printk(KERN_DEBUG "Destruction of LLC sock %p delayed in %s, cnt=%d\n",
+			sk, __FUNCTION__, atomic_read(&sk->sk_refcnt));
+		printk(KERN_DEBUG "%d LLC sockets are still alive\n",
+			atomic_read(&llc_sock_nr));
+	} else {
+		atomic_dec(&llc_sock_nr);
+		printk(KERN_DEBUG "LLC socket %p released in %s, %d are still alive\n", sk,
+			__FUNCTION__, atomic_read(&llc_sock_nr));
+	}
+#endif
+	sock_put(sk);
+}
+
+/**
+ *	llc_sk_reset - resets a connection
+ *	@sk: LLC socket to reset
+ *
+ *	Resets a connection to the out of service state. Stops its timers
+ *	and frees any frames in the queues of the connection.
+ */
+void llc_sk_reset(struct sock *sk)
+{
+	struct llc_sock *llc = llc_sk(sk);
+
+	llc_conn_ac_stop_all_timers(sk, NULL);
+	skb_queue_purge(&sk->sk_write_queue);
+	skb_queue_purge(&llc->pdu_unack_q);
+	llc->remote_busy_flag	= 0;
+	llc->cause_flag		= 0;
+	llc->retry_count	= 0;
+	llc_conn_set_p_flag(sk, 0);
+	llc->f_flag		= 0;
+	llc->s_flag		= 0;
+	llc->ack_pf		= 0;
+	llc->first_pdu_Ns	= 0;
+	llc->ack_must_be_send	= 0;
+	llc->dec_step		= 1;
+	llc->inc_cntr		= 2;
+	llc->dec_cntr		= 2;
+	llc->X			= 0;
+	llc->failed_data_req	= 0 ;
+	llc->last_nr		= 0;
+}
diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c
new file mode 100644
index 00000000000..5ff02c080a0
--- /dev/null
+++ b/net/llc/llc_core.c
@@ -0,0 +1,179 @@
+/*
+ * llc_core.c - Minimum needed routines for sap handling and module init/exit
+ *
+ * Copyright (c) 1997 by Procom Technology, Inc.
+ * 		 2001-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * This program can be redistributed or modified under the terms of the
+ * GNU General Public License as published by the Free Software Foundation.
+ * This program is distributed without any warranty or implied warranty
+ * of merchantability or fitness for a particular purpose.
+ *
+ * See the GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/netdevice.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <net/llc.h>
+
+LIST_HEAD(llc_sap_list);
+DEFINE_RWLOCK(llc_sap_list_lock);
+
+unsigned char llc_station_mac_sa[ETH_ALEN];
+
+/**
+ *	llc_sap_alloc - allocates and initializes sap.
+ *
+ *	Allocates and initializes sap.
+ */
+static struct llc_sap *llc_sap_alloc(void)
+{
+	struct llc_sap *sap = kmalloc(sizeof(*sap), GFP_ATOMIC);
+
+	if (sap) {
+		memset(sap, 0, sizeof(*sap));
+		sap->state = LLC_SAP_STATE_ACTIVE;
+		memcpy(sap->laddr.mac, llc_station_mac_sa, ETH_ALEN);
+		rwlock_init(&sap->sk_list.lock);
+	}
+	return sap;
+}
+
+/**
+ *	llc_add_sap - add sap to station list
+ *	@sap: Address of the sap
+ *
+ *	Adds a sap to the LLC's station sap list.
+ */
+static void llc_add_sap(struct llc_sap *sap)
+{
+	write_lock_bh(&llc_sap_list_lock);
+	list_add_tail(&sap->node, &llc_sap_list);
+	write_unlock_bh(&llc_sap_list_lock);
+}
+
+/**
+ *	llc_del_sap - del sap from station list
+ *	@sap: Address of the sap
+ *
+ *	Removes a sap to the LLC's station sap list.
+ */
+static void llc_del_sap(struct llc_sap *sap)
+{
+	write_lock_bh(&llc_sap_list_lock);
+	list_del(&sap->node);
+	write_unlock_bh(&llc_sap_list_lock);
+}
+
+/**
+ *	llc_sap_find - searchs a SAP in station
+ *	@sap_value: sap to be found
+ *
+ *	Searchs for a sap in the sap list of the LLC's station upon the sap ID.
+ *	Returns the sap or %NULL if not found.
+ */
+struct llc_sap *llc_sap_find(unsigned char sap_value)
+{
+	struct llc_sap* sap;
+
+	read_lock_bh(&llc_sap_list_lock);
+	list_for_each_entry(sap, &llc_sap_list, node)
+		if (sap->laddr.lsap == sap_value)
+			goto out;
+	sap = NULL;
+out:
+	read_unlock_bh(&llc_sap_list_lock);
+	return sap;
+}
+
+/**
+ *	llc_sap_open - open interface to the upper layers.
+ *	@lsap: SAP number.
+ *	@func: rcv func for datalink protos
+ *
+ *	Interface function to upper layer. Each one who wants to get a SAP
+ *	(for example NetBEUI) should call this function. Returns the opened
+ *	SAP for success, NULL for failure.
+ */
+struct llc_sap *llc_sap_open(unsigned char lsap,
+			     int (*func)(struct sk_buff *skb,
+					 struct net_device *dev,
+					 struct packet_type *pt))
+{
+	struct llc_sap *sap = llc_sap_find(lsap);
+
+	if (sap) { /* SAP already exists */
+		sap = NULL;
+		goto out;
+	}
+	sap = llc_sap_alloc();
+	if (!sap)
+		goto out;
+	sap->laddr.lsap = lsap;
+	sap->rcv_func	= func;
+	llc_add_sap(sap);
+out:
+	return sap;
+}
+
+/**
+ *	llc_sap_close - close interface for upper layers.
+ *	@sap: SAP to be closed.
+ *
+ *	Close interface function to upper layer. Each one who wants to
+ *	close an open SAP (for example NetBEUI) should call this function.
+ * 	Removes this sap from the list of saps in the station and then
+ * 	frees the memory for this sap.
+ */
+void llc_sap_close(struct llc_sap *sap)
+{
+	WARN_ON(!hlist_empty(&sap->sk_list.list));
+	llc_del_sap(sap);
+	kfree(sap);
+}
+
+static struct packet_type llc_packet_type = {
+	.type = __constant_htons(ETH_P_802_2),
+	.func = llc_rcv,
+};
+
+static struct packet_type llc_tr_packet_type = {
+	.type = __constant_htons(ETH_P_TR_802_2),
+	.func = llc_rcv,
+};
+
+static int __init llc_init(void)
+{
+	if (dev_base->next)
+		memcpy(llc_station_mac_sa, dev_base->next->dev_addr, ETH_ALEN);
+	else
+		memset(llc_station_mac_sa, 0, ETH_ALEN);
+	dev_add_pack(&llc_packet_type);
+	dev_add_pack(&llc_tr_packet_type);
+	return 0;
+}
+
+static void __exit llc_exit(void)
+{
+	dev_remove_pack(&llc_packet_type);
+	dev_remove_pack(&llc_tr_packet_type);
+}
+
+module_init(llc_init);
+module_exit(llc_exit);
+
+EXPORT_SYMBOL(llc_station_mac_sa);
+EXPORT_SYMBOL(llc_sap_list);
+EXPORT_SYMBOL(llc_sap_list_lock);
+EXPORT_SYMBOL(llc_sap_find);
+EXPORT_SYMBOL(llc_sap_open);
+EXPORT_SYMBOL(llc_sap_close);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Procom 1997, Jay Schullist 2001, Arnaldo C. Melo 2001-2003");
+MODULE_DESCRIPTION("LLC IEEE 802.2 core support");
diff --git a/net/llc/llc_if.c b/net/llc/llc_if.c
new file mode 100644
index 00000000000..0f9fc48aeaf
--- /dev/null
+++ b/net/llc/llc_if.c
@@ -0,0 +1,157 @@
+/*
+ * llc_if.c - Defines LLC interface to upper layer
+ *
+ * Copyright (c) 1997 by Procom Technology, Inc.
+ * 		 2001-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * This program can be redistributed or modified under the terms of the
+ * GNU General Public License as published by the Free Software Foundation.
+ * This program is distributed without any warranty or implied warranty
+ * of merchantability or fitness for a particular purpose.
+ *
+ * See the GNU General Public License for more details.
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/tcp.h>
+#include <asm/errno.h>
+#include <net/llc_if.h>
+#include <net/llc_sap.h>
+#include <net/llc_s_ev.h>
+#include <net/llc_conn.h>
+#include <net/sock.h>
+#include <net/llc_c_ev.h>
+#include <net/llc_c_ac.h>
+#include <net/llc_c_st.h>
+
+u8 llc_mac_null_var[IFHWADDRLEN];
+
+/**
+ *	llc_build_and_send_pkt - Connection data sending for upper layers.
+ *	@sk: connection
+ *	@skb: packet to send
+ *
+ *	This function is called when upper layer wants to send data using
+ *	connection oriented communication mode. During sending data, connection
+ *	will be locked and received frames and expired timers will be queued.
+ *	Returns 0 for success, -ECONNABORTED when the connection already
+ *	closed and -EBUSY when sending data is not permitted in this state or
+ *	LLC has send an I pdu with p bit set to 1 and is waiting for it's
+ *	response.
+ */
+int llc_build_and_send_pkt(struct sock *sk, struct sk_buff *skb)
+{
+	struct llc_conn_state_ev *ev;
+	int rc = -ECONNABORTED;
+	struct llc_sock *llc = llc_sk(sk);
+
+	if (llc->state == LLC_CONN_STATE_ADM)
+		goto out;
+	rc = -EBUSY;
+	if (llc_data_accept_state(llc->state)) { /* data_conn_refuse */
+		llc->failed_data_req = 1;
+		goto out;
+	}
+	if (llc->p_flag) {
+		llc->failed_data_req = 1;
+		goto out;
+	}
+	ev = llc_conn_ev(skb);
+	ev->type      = LLC_CONN_EV_TYPE_PRIM;
+	ev->prim      = LLC_DATA_PRIM;
+	ev->prim_type = LLC_PRIM_TYPE_REQ;
+	skb->dev      = llc->dev;
+	rc = llc_conn_state_process(sk, skb);
+out:
+	return rc;
+}
+
+/**
+ *	llc_establish_connection - Called by upper layer to establish a conn
+ *	@sk: connection
+ *	@lmac: local mac address
+ *	@dmac: destination mac address
+ *	@dsap: destination sap
+ *
+ *	Upper layer calls this to establish an LLC connection with a remote
+ *	machine. This function packages a proper event and sends it connection
+ *	component state machine. Success or failure of connection
+ *	establishment will inform to upper layer via calling it's confirm
+ *	function and passing proper information.
+ */
+int llc_establish_connection(struct sock *sk, u8 *lmac, u8 *dmac, u8 dsap)
+{
+	int rc = -EISCONN;
+	struct llc_addr laddr, daddr;
+	struct sk_buff *skb;
+	struct llc_sock *llc = llc_sk(sk);
+	struct sock *existing;
+
+	laddr.lsap = llc->sap->laddr.lsap;
+	daddr.lsap = dsap;
+	memcpy(daddr.mac, dmac, sizeof(daddr.mac));
+	memcpy(laddr.mac, lmac, sizeof(laddr.mac));
+	existing = llc_lookup_established(llc->sap, &daddr, &laddr);
+	if (existing) {
+		if (existing->sk_state == TCP_ESTABLISHED) {
+			sk = existing;
+			goto out_put;
+		} else
+			sock_put(existing);
+	}
+	sock_hold(sk);
+	rc = -ENOMEM;
+	skb = alloc_skb(0, GFP_ATOMIC);
+	if (skb) {
+		struct llc_conn_state_ev *ev = llc_conn_ev(skb);
+
+		ev->type      = LLC_CONN_EV_TYPE_PRIM;
+		ev->prim      = LLC_CONN_PRIM;
+		ev->prim_type = LLC_PRIM_TYPE_REQ;
+		rc = llc_conn_state_process(sk, skb);
+	}
+out_put:
+	sock_put(sk);
+	return rc;
+}
+
+/**
+ *	llc_send_disc - Called by upper layer to close a connection
+ *	@sk: connection to be closed
+ *
+ *	Upper layer calls this when it wants to close an established LLC
+ *	connection with a remote machine. This function packages a proper event
+ *	and sends it to connection component state machine. Returns 0 for
+ *	success, 1 otherwise.
+ */
+int llc_send_disc(struct sock *sk)
+{
+	u16 rc = 1;
+	struct llc_conn_state_ev *ev;
+	struct sk_buff *skb;
+
+	sock_hold(sk);
+	if (sk->sk_type != SOCK_STREAM || sk->sk_state != TCP_ESTABLISHED ||
+	    llc_sk(sk)->state == LLC_CONN_STATE_ADM ||
+	    llc_sk(sk)->state == LLC_CONN_OUT_OF_SVC)
+		goto out;
+	/*
+	 * Postpone unassigning the connection from its SAP and returning the
+	 * connection until all ACTIONs have been completely executed
+	 */
+	skb = alloc_skb(0, GFP_ATOMIC);
+	if (!skb)
+		goto out;
+	sk->sk_state  = TCP_CLOSING;
+	ev	      = llc_conn_ev(skb);
+	ev->type      = LLC_CONN_EV_TYPE_PRIM;
+	ev->prim      = LLC_DISC_PRIM;
+	ev->prim_type = LLC_PRIM_TYPE_REQ;
+	rc = llc_conn_state_process(sk, skb);
+out:
+	sock_put(sk);
+	return rc;
+}
+
diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c
new file mode 100644
index 00000000000..4da6976efc9
--- /dev/null
+++ b/net/llc/llc_input.c
@@ -0,0 +1,189 @@
+/*
+ * llc_input.c - Minimal input path for LLC
+ *
+ * Copyright (c) 1997 by Procom Technology, Inc.
+ * 		 2001-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * This program can be redistributed or modified under the terms of the
+ * GNU General Public License as published by the Free Software Foundation.
+ * This program is distributed without any warranty or implied warranty
+ * of merchantability or fitness for a particular purpose.
+ *
+ * See the GNU General Public License for more details.
+ */
+#include <linux/netdevice.h>
+#include <net/llc.h>
+#include <net/llc_pdu.h>
+#include <net/llc_sap.h>
+
+#if 0
+#define dprintk(args...) printk(KERN_DEBUG args)
+#else
+#define dprintk(args...)
+#endif
+
+/*
+ * Packet handler for the station, registerable because in the minimal
+ * LLC core that is taking shape only the very minimal subset of LLC that
+ * is needed for things like IPX, Appletalk, etc will stay, with all the
+ * rest in the llc1 and llc2 modules.
+ */
+static void (*llc_station_handler)(struct sk_buff *skb);
+
+/*
+ * Packet handlers for LLC_DEST_SAP and LLC_DEST_CONN.
+ */
+static void (*llc_type_handlers[2])(struct llc_sap *sap,
+				    struct sk_buff *skb);
+
+void llc_add_pack(int type, void (*handler)(struct llc_sap *sap,
+					    struct sk_buff *skb))
+{
+	if (type == LLC_DEST_SAP || type == LLC_DEST_CONN)
+		llc_type_handlers[type - 1] = handler;
+}
+
+void llc_remove_pack(int type)
+{
+	if (type == LLC_DEST_SAP || type == LLC_DEST_CONN)
+		llc_type_handlers[type - 1] = NULL;
+}
+
+void llc_set_station_handler(void (*handler)(struct sk_buff *skb))
+{
+	llc_station_handler = handler;
+}
+
+/**
+ *	llc_pdu_type - returns which LLC component must handle for PDU
+ *	@skb: input skb
+ *
+ *	This function returns which LLC component must handle this PDU.
+ */
+static __inline__ int llc_pdu_type(struct sk_buff *skb)
+{
+	int type = LLC_DEST_CONN; /* I-PDU or S-PDU type */
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+
+	if ((pdu->ctrl_1 & LLC_PDU_TYPE_MASK) != LLC_PDU_TYPE_U)
+		goto out;
+	switch (LLC_U_PDU_CMD(pdu)) {
+	case LLC_1_PDU_CMD_XID:
+	case LLC_1_PDU_CMD_UI:
+	case LLC_1_PDU_CMD_TEST:
+		type = LLC_DEST_SAP;
+		break;
+	case LLC_2_PDU_CMD_SABME:
+	case LLC_2_PDU_CMD_DISC:
+	case LLC_2_PDU_RSP_UA:
+	case LLC_2_PDU_RSP_DM:
+	case LLC_2_PDU_RSP_FRMR:
+		break;
+	default:
+		type = LLC_DEST_INVALID;
+		break;
+	}
+out:
+	return type;
+}
+
+/**
+ *	llc_fixup_skb - initializes skb pointers
+ *	@skb: This argument points to incoming skb
+ *
+ *	Initializes internal skb pointer to start of network layer by deriving
+ *	length of LLC header; finds length of LLC control field in LLC header
+ *	by looking at the two lowest-order bits of the first control field
+ *	byte; field is either 3 or 4 bytes long.
+ */
+static inline int llc_fixup_skb(struct sk_buff *skb)
+{
+	u8 llc_len = 2;
+	struct llc_pdu_sn *pdu;
+
+	if (!pskb_may_pull(skb, sizeof(*pdu)))
+		return 0;
+
+	pdu = (struct llc_pdu_sn *)skb->data;
+	if ((pdu->ctrl_1 & LLC_PDU_TYPE_MASK) == LLC_PDU_TYPE_U)
+		llc_len = 1;
+	llc_len += 2;
+	skb->h.raw += llc_len;
+	skb_pull(skb, llc_len);
+	if (skb->protocol == htons(ETH_P_802_2)) {
+		u16 pdulen = eth_hdr(skb)->h_proto,
+		    data_size = ntohs(pdulen) - llc_len;
+
+		skb_trim(skb, data_size);
+	}
+	return 1;
+}
+
+/**
+ *	llc_rcv - 802.2 entry point from net lower layers
+ *	@skb: received pdu
+ *	@dev: device that receive pdu
+ *	@pt: packet type
+ *
+ *	When the system receives a 802.2 frame this function is called. It
+ *	checks SAP and connection of received pdu and passes frame to
+ *	llc_{station,sap,conn}_rcv for sending to proper state machine. If
+ *	the frame is related to a busy connection (a connection is sending
+ *	data now), it queues this frame in the connection's backlog.
+ */
+int llc_rcv(struct sk_buff *skb, struct net_device *dev,
+	    struct packet_type *pt)
+{
+	struct llc_sap *sap;
+	struct llc_pdu_sn *pdu;
+	int dest;
+
+	/*
+	 * When the interface is in promisc. mode, drop all the crap that it
+	 * receives, do not try to analyse it.
+	 */
+	if (unlikely(skb->pkt_type == PACKET_OTHERHOST)) {
+		dprintk("%s: PACKET_OTHERHOST\n", __FUNCTION__);
+		goto drop;
+	}
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (unlikely(!skb))
+		goto out;
+	if (unlikely(!llc_fixup_skb(skb)))
+		goto drop;
+	pdu = llc_pdu_sn_hdr(skb);
+	if (unlikely(!pdu->dsap)) /* NULL DSAP, refer to station */
+	       goto handle_station;
+	sap = llc_sap_find(pdu->dsap);
+	if (unlikely(!sap)) {/* unknown SAP */
+		dprintk("%s: llc_sap_find(%02X) failed!\n", __FUNCTION__,
+		        pdu->dsap);
+		goto drop;
+	}
+	/*
+	 * First the upper layer protocols that don't need the full
+	 * LLC functionality
+	 */
+	if (sap->rcv_func) {
+		sap->rcv_func(skb, dev, pt);
+		goto out;
+	}
+	dest = llc_pdu_type(skb);
+	if (unlikely(!dest || !llc_type_handlers[dest - 1]))
+		goto drop;
+	llc_type_handlers[dest - 1](sap, skb);
+out:
+	return 0;
+drop:
+	kfree_skb(skb);
+	goto out;
+handle_station:
+	if (!llc_station_handler)
+		goto drop;
+	llc_station_handler(skb);
+	goto out;
+}
+
+EXPORT_SYMBOL(llc_add_pack);
+EXPORT_SYMBOL(llc_remove_pack);
+EXPORT_SYMBOL(llc_set_station_handler);
diff --git a/net/llc/llc_output.c b/net/llc/llc_output.c
new file mode 100644
index 00000000000..ab5784cf163
--- /dev/null
+++ b/net/llc/llc_output.c
@@ -0,0 +1,107 @@
+/*
+ * llc_output.c - LLC minimal output path
+ *
+ * Copyright (c) 1997 by Procom Technology, Inc.
+ * 		 2001-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * This program can be redistributed or modified under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation.
+ * This program is distributed without any warranty or implied warranty
+ * of merchantability or fitness for a particular purpose.
+ *
+ * See the GNU General Public License version 2 for more details.
+ */
+
+#include <linux/if_arp.h>
+#include <linux/if_tr.h>
+#include <linux/netdevice.h>
+#include <linux/trdevice.h>
+#include <linux/skbuff.h>
+#include <net/llc.h>
+#include <net/llc_pdu.h>
+
+/**
+ *	llc_mac_hdr_init - fills MAC header fields
+ *	@skb: Address of the frame to initialize its MAC header
+ *	@sa: The MAC source address
+ *	@da: The MAC destination address
+ *
+ *	Fills MAC header fields, depending on MAC type. Returns 0, If MAC type
+ *	is a valid type and initialization completes correctly 1, otherwise.
+ */
+int llc_mac_hdr_init(struct sk_buff *skb, unsigned char *sa, unsigned char *da)
+{
+	int rc = 0;
+
+	switch (skb->dev->type) {
+#ifdef CONFIG_TR
+	case ARPHRD_IEEE802_TR: {
+		struct net_device *dev = skb->dev;
+		struct trh_hdr *trh;
+		
+		skb->mac.raw = skb_push(skb, sizeof(*trh));
+		trh = tr_hdr(skb);
+		trh->ac = AC;
+		trh->fc = LLC_FRAME;
+		if (sa)
+			memcpy(trh->saddr, sa, dev->addr_len);
+		else
+			memset(trh->saddr, 0, dev->addr_len);
+		if (da) {
+			memcpy(trh->daddr, da, dev->addr_len);
+			tr_source_route(skb, trh, dev);
+			skb->mac.raw = skb->data;
+		}
+		break;
+	}
+#endif
+	case ARPHRD_ETHER:
+	case ARPHRD_LOOPBACK: {
+		unsigned short len = skb->len;
+		struct ethhdr *eth;
+
+		skb->mac.raw = skb_push(skb, sizeof(*eth));
+		eth = eth_hdr(skb);
+		eth->h_proto = htons(len);
+		memcpy(eth->h_dest, da, ETH_ALEN);
+		memcpy(eth->h_source, sa, ETH_ALEN);
+		break;
+	}
+	default:
+		printk(KERN_WARNING "device type not supported: %d\n",
+		       skb->dev->type);
+		rc = -EINVAL;
+	}
+	return rc;
+}
+
+/**
+ *	llc_build_and_send_ui_pkt - unitdata request interface for upper layers
+ *	@sap: sap to use
+ *	@skb: packet to send
+ *	@dmac: destination mac address
+ *	@dsap: destination sap
+ *
+ *	Upper layers calls this function when upper layer wants to send data
+ *	using connection-less mode communication (UI pdu).
+ *
+ *	Accept data frame from network layer to be sent using connection-
+ *	less mode communication; timeout/retries handled by network layer;
+ *	package primitive as an event and send to SAP event handler
+ */
+int llc_build_and_send_ui_pkt(struct llc_sap *sap, struct sk_buff *skb,
+			      unsigned char *dmac, unsigned char dsap)
+{
+	int rc;
+	llc_pdu_header_init(skb, LLC_PDU_TYPE_U, sap->laddr.lsap,
+			    dsap, LLC_PDU_CMD);
+	llc_pdu_init_as_ui_cmd(skb);
+	rc = llc_mac_hdr_init(skb, skb->dev->dev_addr, dmac);
+	if (!rc)
+		rc = dev_queue_xmit(skb);
+	return rc;
+}
+
+EXPORT_SYMBOL(llc_mac_hdr_init);
+EXPORT_SYMBOL(llc_build_and_send_ui_pkt);
diff --git a/net/llc/llc_output.h b/net/llc/llc_output.h
new file mode 100644
index 00000000000..179edf753f0
--- /dev/null
+++ b/net/llc/llc_output.h
@@ -0,0 +1,20 @@
+#ifndef LLC_OUTPUT_H
+#define LLC_OUTPUT_H
+/*
+ * Copyright (c) 1997 by Procom Technology, Inc.
+ * 		 2001-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * This program can be redistributed or modified under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation.
+ * This program is distributed without any warranty or implied warranty
+ * of merchantability or fitness for a particular purpose.
+ *
+ * See the GNU General Public License version 2 for more details.
+ */
+
+struct sk_buff;
+
+int llc_mac_hdr_init(struct sk_buff *skb, unsigned char *sa, unsigned char *da);
+
+#endif /* LLC_OUTPUT_H */
diff --git a/net/llc/llc_pdu.c b/net/llc/llc_pdu.c
new file mode 100644
index 00000000000..a28ce525d20
--- /dev/null
+++ b/net/llc/llc_pdu.c
@@ -0,0 +1,372 @@
+/*
+ * llc_pdu.c - access to PDU internals
+ *
+ * Copyright (c) 1997 by Procom Technology, Inc.
+ *		 2001-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * This program can be redistributed or modified under the terms of the
+ * GNU General Public License as published by the Free Software Foundation.
+ * This program is distributed without any warranty or implied warranty
+ * of merchantability or fitness for a particular purpose.
+ *
+ * See the GNU General Public License for more details.
+ */
+
+#include <linux/netdevice.h>
+#include <net/llc_pdu.h>
+
+static void llc_pdu_decode_pdu_type(struct sk_buff *skb, u8 *type);
+static u8 llc_pdu_get_pf_bit(struct llc_pdu_sn *pdu);
+
+void llc_pdu_set_cmd_rsp(struct sk_buff *skb, u8 pdu_type)
+{
+	llc_pdu_un_hdr(skb)->ssap |= pdu_type;
+}
+
+/**
+ *	pdu_set_pf_bit - sets poll/final bit in LLC header
+ *	@pdu_frame: input frame that p/f bit must be set into it.
+ *	@bit_value: poll/final bit (0 or 1).
+ *
+ *	This function sets poll/final bit in LLC header (based on type of PDU).
+ *	in I or S pdus, p/f bit is right bit of fourth byte in header. in U
+ *	pdus p/f bit is fifth bit of third byte.
+ */
+void llc_pdu_set_pf_bit(struct sk_buff *skb, u8 bit_value)
+{
+	u8 pdu_type;
+	struct llc_pdu_sn *pdu;
+
+	llc_pdu_decode_pdu_type(skb, &pdu_type);
+	pdu = llc_pdu_sn_hdr(skb);
+	
+	switch (pdu_type) {
+	case LLC_PDU_TYPE_I:
+	case LLC_PDU_TYPE_S:
+		pdu->ctrl_2 = (pdu->ctrl_2 & 0xFE) | bit_value;
+		break;
+	case LLC_PDU_TYPE_U:
+		pdu->ctrl_1 |= (pdu->ctrl_1 & 0xEF) | (bit_value << 4);
+		break;
+	}
+}
+
+/**
+ *	llc_pdu_decode_pf_bit - extracs poll/final bit from LLC header
+ *	@skb: input skb that p/f bit must be extracted from it
+ *	@pf_bit: poll/final bit (0 or 1)
+ *
+ *	This function extracts poll/final bit from LLC header (based on type of
+ *	PDU). In I or S pdus, p/f bit is right bit of fourth byte in header. In
+ *	U pdus p/f bit is fifth bit of third byte.
+ */
+void llc_pdu_decode_pf_bit(struct sk_buff *skb, u8 *pf_bit)
+{
+	u8 pdu_type;
+	struct llc_pdu_sn *pdu;
+
+	llc_pdu_decode_pdu_type(skb, &pdu_type);
+	pdu = llc_pdu_sn_hdr(skb);
+
+	switch (pdu_type) {
+	case LLC_PDU_TYPE_I:
+	case LLC_PDU_TYPE_S:
+		*pf_bit = pdu->ctrl_2 & LLC_S_PF_BIT_MASK;
+		break;
+	case LLC_PDU_TYPE_U:
+		*pf_bit = (pdu->ctrl_1 & LLC_U_PF_BIT_MASK) >> 4;
+		break;
+	}
+}
+
+/**
+ *	llc_pdu_init_as_disc_cmd - Builds DISC PDU
+ *	@skb: Address of the skb to build
+ *	@p_bit: The P bit to set in the PDU
+ *
+ *	Builds a pdu frame as a DISC command.
+ */
+void llc_pdu_init_as_disc_cmd(struct sk_buff *skb, u8 p_bit)
+{
+	struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb);
+
+	pdu->ctrl_1  = LLC_PDU_TYPE_U;
+	pdu->ctrl_1 |= LLC_2_PDU_CMD_DISC;
+	pdu->ctrl_1 |= ((p_bit & 1) << 4) & LLC_U_PF_BIT_MASK;
+}
+
+/**
+ *	llc_pdu_init_as_i_cmd - builds I pdu
+ *	@skb: Address of the skb to build
+ *	@p_bit: The P bit to set in the PDU
+ *	@ns: The sequence number of the data PDU
+ *	@nr: The seq. number of the expected I PDU from the remote
+ *
+ *	Builds a pdu frame as an I command.
+ */
+void llc_pdu_init_as_i_cmd(struct sk_buff *skb, u8 p_bit, u8 ns, u8 nr)
+{
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+
+	pdu->ctrl_1  = LLC_PDU_TYPE_I;
+	pdu->ctrl_2  = 0;
+	pdu->ctrl_2 |= (p_bit & LLC_I_PF_BIT_MASK); /* p/f bit */
+	pdu->ctrl_1 |= (ns << 1) & 0xFE;   /* set N(S) in bits 2..8 */
+	pdu->ctrl_2 |= (nr << 1) & 0xFE;   /* set N(R) in bits 10..16 */
+}
+
+/**
+ *	llc_pdu_init_as_rej_cmd - builds REJ PDU
+ *	@skb: Address of the skb to build
+ *	@p_bit: The P bit to set in the PDU
+ *	@nr: The seq. number of the expected I PDU from the remote
+ *
+ *	Builds a pdu frame as a REJ command.
+ */
+void llc_pdu_init_as_rej_cmd(struct sk_buff *skb, u8 p_bit, u8 nr)
+{
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+
+	pdu->ctrl_1  = LLC_PDU_TYPE_S;
+	pdu->ctrl_1 |= LLC_2_PDU_CMD_REJ;
+	pdu->ctrl_2  = 0;
+	pdu->ctrl_2 |= p_bit & LLC_S_PF_BIT_MASK;
+	pdu->ctrl_1 &= 0x0F;    /* setting bits 5..8 to zero(reserved) */
+	pdu->ctrl_2 |= (nr << 1) & 0xFE; /* set N(R) in bits 10..16 */
+}
+
+/**
+ *	llc_pdu_init_as_rnr_cmd - builds RNR pdu
+ *	@skb: Address of the skb to build
+ *	@p_bit: The P bit to set in the PDU
+ *	@nr: The seq. number of the expected I PDU from the remote
+ *
+ *	Builds a pdu frame as an RNR command.
+ */
+void llc_pdu_init_as_rnr_cmd(struct sk_buff *skb, u8 p_bit, u8 nr)
+{
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+
+	pdu->ctrl_1  = LLC_PDU_TYPE_S;
+	pdu->ctrl_1 |= LLC_2_PDU_CMD_RNR;
+	pdu->ctrl_2  = 0;
+	pdu->ctrl_2 |= p_bit & LLC_S_PF_BIT_MASK;
+	pdu->ctrl_1 &= 0x0F;    /* setting bits 5..8 to zero(reserved) */
+	pdu->ctrl_2 |= (nr << 1) & 0xFE; /* set N(R) in bits 10..16 */
+}
+
+/**
+ *	llc_pdu_init_as_rr_cmd - Builds RR pdu
+ *	@skb: Address of the skb to build
+ *	@p_bit: The P bit to set in the PDU
+ *	@nr: The seq. number of the expected I PDU from the remote
+ *
+ *	Builds a pdu frame as an RR command.
+ */
+void llc_pdu_init_as_rr_cmd(struct sk_buff *skb, u8 p_bit, u8 nr)
+{
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+
+	pdu->ctrl_1  = LLC_PDU_TYPE_S;
+	pdu->ctrl_1 |= LLC_2_PDU_CMD_RR;
+	pdu->ctrl_2  = p_bit & LLC_S_PF_BIT_MASK;
+	pdu->ctrl_1 &= 0x0F;    /* setting bits 5..8 to zero(reserved) */
+	pdu->ctrl_2 |= (nr << 1) & 0xFE; /* set N(R) in bits 10..16 */
+}
+
+/**
+ *	llc_pdu_init_as_sabme_cmd - builds SABME pdu
+ *	@skb: Address of the skb to build
+ *	@p_bit: The P bit to set in the PDU
+ *
+ *	Builds a pdu frame as an SABME command.
+ */
+void llc_pdu_init_as_sabme_cmd(struct sk_buff *skb, u8 p_bit)
+{
+	struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb);
+
+	pdu->ctrl_1  = LLC_PDU_TYPE_U;
+	pdu->ctrl_1 |= LLC_2_PDU_CMD_SABME;
+	pdu->ctrl_1 |= ((p_bit & 1) << 4) & LLC_U_PF_BIT_MASK;
+}
+
+/**
+ *	llc_pdu_init_as_dm_rsp - builds DM response pdu
+ *	@skb: Address of the skb to build
+ *	@f_bit: The F bit to set in the PDU
+ *
+ *	Builds a pdu frame as a DM response.
+ */
+void llc_pdu_init_as_dm_rsp(struct sk_buff *skb, u8 f_bit)
+{
+	struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb);
+
+	pdu->ctrl_1  = LLC_PDU_TYPE_U;
+	pdu->ctrl_1 |= LLC_2_PDU_RSP_DM;
+	pdu->ctrl_1 |= ((f_bit & 1) << 4) & LLC_U_PF_BIT_MASK;
+}
+
+/**
+ *	llc_pdu_init_as_frmr_rsp - builds FRMR response PDU
+ *	@skb: Address of the frame to build
+ *	@prev_pdu: The rejected PDU frame
+ *	@f_bit: The F bit to set in the PDU
+ *	@vs: tx state vari value for the data link conn at the rejecting LLC
+ *	@vr: rx state var value for the data link conn at the rejecting LLC
+ *	@vzyxw: completely described in the IEEE Std 802.2 document (Pg 55)
+ *
+ *	Builds a pdu frame as a FRMR response.
+ */
+void llc_pdu_init_as_frmr_rsp(struct sk_buff *skb, struct llc_pdu_sn *prev_pdu,
+			      u8 f_bit, u8 vs, u8 vr, u8 vzyxw)
+{
+	struct llc_frmr_info *frmr_info;
+	u8 prev_pf = 0;
+	u8 *ctrl;
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+
+	pdu->ctrl_1  = LLC_PDU_TYPE_U;
+	pdu->ctrl_1 |= LLC_2_PDU_RSP_FRMR;
+	pdu->ctrl_1 |= ((f_bit & 1) << 4) & LLC_U_PF_BIT_MASK;
+
+	frmr_info = (struct llc_frmr_info *)&pdu->ctrl_2;
+	ctrl = (u8 *)&prev_pdu->ctrl_1;
+	FRMR_INFO_SET_REJ_CNTRL(frmr_info,ctrl);
+	FRMR_INFO_SET_Vs(frmr_info, vs);
+	FRMR_INFO_SET_Vr(frmr_info, vr);
+	prev_pf = llc_pdu_get_pf_bit(prev_pdu);
+	FRMR_INFO_SET_C_R_BIT(frmr_info, prev_pf);
+	FRMR_INFO_SET_INVALID_PDU_CTRL_IND(frmr_info, vzyxw);
+	FRMR_INFO_SET_INVALID_PDU_INFO_IND(frmr_info, vzyxw);
+	FRMR_INFO_SET_PDU_INFO_2LONG_IND(frmr_info, vzyxw);
+	FRMR_INFO_SET_PDU_INVALID_Nr_IND(frmr_info, vzyxw);
+	FRMR_INFO_SET_PDU_INVALID_Ns_IND(frmr_info, vzyxw);
+	skb_put(skb, 5);
+}
+
+/**
+ *	llc_pdu_init_as_rr_rsp - builds RR response pdu
+ *	@skb: Address of the skb to build
+ *	@f_bit: The F bit to set in the PDU
+ *	@nr: The seq. number of the expected data PDU from the remote
+ *
+ *	Builds a pdu frame as an RR response.
+ */
+void llc_pdu_init_as_rr_rsp(struct sk_buff *skb, u8 f_bit, u8 nr)
+{
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+
+	pdu->ctrl_1  = LLC_PDU_TYPE_S;
+	pdu->ctrl_1 |= LLC_2_PDU_RSP_RR;
+	pdu->ctrl_2  = 0;
+	pdu->ctrl_2 |= f_bit & LLC_S_PF_BIT_MASK;
+	pdu->ctrl_1 &= 0x0F;    /* setting bits 5..8 to zero(reserved) */
+	pdu->ctrl_2 |= (nr << 1) & 0xFE;  /* set N(R) in bits 10..16 */
+}
+
+/**
+ *	llc_pdu_init_as_rej_rsp - builds REJ response pdu
+ *	@skb: Address of the skb to build
+ *	@f_bit: The F bit to set in the PDU
+ *	@nr: The seq. number of the expected data PDU from the remote
+ *
+ *	Builds a pdu frame as a REJ response.
+ */
+void llc_pdu_init_as_rej_rsp(struct sk_buff *skb, u8 f_bit, u8 nr)
+{
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+
+	pdu->ctrl_1  = LLC_PDU_TYPE_S;
+	pdu->ctrl_1 |= LLC_2_PDU_RSP_REJ;
+	pdu->ctrl_2  = 0;
+	pdu->ctrl_2 |= f_bit & LLC_S_PF_BIT_MASK;
+	pdu->ctrl_1 &= 0x0F;    /* setting bits 5..8 to zero(reserved) */
+	pdu->ctrl_2 |= (nr << 1) & 0xFE;  /* set N(R) in bits 10..16 */
+}
+
+/**
+ *	llc_pdu_init_as_rnr_rsp - builds RNR response pdu
+ *	@skb: Address of the frame to build
+ *	@f_bit: The F bit to set in the PDU
+ *	@nr: The seq. number of the expected data PDU from the remote
+ *
+ *	Builds a pdu frame as an RNR response.
+ */
+void llc_pdu_init_as_rnr_rsp(struct sk_buff *skb, u8 f_bit, u8 nr)
+{
+	struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
+
+	pdu->ctrl_1  = LLC_PDU_TYPE_S;
+	pdu->ctrl_1 |= LLC_2_PDU_RSP_RNR;
+	pdu->ctrl_2  = 0;
+	pdu->ctrl_2 |= f_bit & LLC_S_PF_BIT_MASK;
+	pdu->ctrl_1 &= 0x0F;    /* setting bits 5..8 to zero(reserved) */
+	pdu->ctrl_2 |= (nr << 1) & 0xFE;  /* set N(R) in bits 10..16 */
+}
+
+/**
+ *	llc_pdu_init_as_ua_rsp - builds UA response pdu
+ *	@skb: Address of the frame to build
+ *	@f_bit: The F bit to set in the PDU
+ *
+ *	Builds a pdu frame as a UA response.
+ */
+void llc_pdu_init_as_ua_rsp(struct sk_buff *skb, u8 f_bit)
+{
+	struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb);
+
+	pdu->ctrl_1  = LLC_PDU_TYPE_U;
+	pdu->ctrl_1 |= LLC_2_PDU_RSP_UA;
+	pdu->ctrl_1 |= ((f_bit & 1) << 4) & LLC_U_PF_BIT_MASK;
+}
+
+/**
+ *	llc_pdu_decode_pdu_type - designates PDU type
+ *	@skb: input skb that type of it must be designated.
+ *	@type: type of PDU (output argument).
+ *
+ *	This function designates type of PDU (I, S or U).
+ */
+static void llc_pdu_decode_pdu_type(struct sk_buff *skb, u8 *type)
+{
+	struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb);
+
+	if (pdu->ctrl_1 & 1) {
+		if ((pdu->ctrl_1 & LLC_PDU_TYPE_U) == LLC_PDU_TYPE_U)
+			*type = LLC_PDU_TYPE_U;
+		else
+			*type = LLC_PDU_TYPE_S;
+	} else
+		*type = LLC_PDU_TYPE_I;
+}
+
+/**
+ *	llc_pdu_get_pf_bit - extracts p/f bit of input PDU
+ *	@pdu: pointer to LLC header.
+ *
+ *	This function extracts p/f bit of input PDU. at first examines type of
+ *	PDU and then extracts p/f bit. Returns the p/f bit.
+ */
+static u8 llc_pdu_get_pf_bit(struct llc_pdu_sn *pdu)
+{
+	u8 pdu_type;
+	u8 pf_bit = 0;
+
+	if (pdu->ctrl_1 & 1) {
+		if ((pdu->ctrl_1 & LLC_PDU_TYPE_U) == LLC_PDU_TYPE_U)
+			pdu_type = LLC_PDU_TYPE_U;
+		else
+			pdu_type = LLC_PDU_TYPE_S;
+	} else
+		pdu_type = LLC_PDU_TYPE_I;
+	switch (pdu_type) {
+	case LLC_PDU_TYPE_I:
+	case LLC_PDU_TYPE_S:
+		pf_bit = pdu->ctrl_2 & LLC_S_PF_BIT_MASK;
+		break;
+	case LLC_PDU_TYPE_U:
+		pf_bit = (pdu->ctrl_1 & LLC_U_PF_BIT_MASK) >> 4;
+		break;
+	}
+	return pf_bit;
+}
diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c
new file mode 100644
index 00000000000..36e8db3fa1a
--- /dev/null
+++ b/net/llc/llc_proc.c
@@ -0,0 +1,267 @@
+/*
+ * proc_llc.c - proc interface for LLC
+ *
+ * Copyright (c) 2001 by Jay Schulist <jschlst@samba.org>
+ *		 2002-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * This program can be redistributed or modified under the terms of the
+ * GNU General Public License as published by the Free Software Foundation.
+ * This program is distributed without any warranty or implied warranty
+ * of merchantability or fitness for a particular purpose.
+ *
+ * See the GNU General Public License for more details.
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/proc_fs.h>
+#include <linux/errno.h>
+#include <linux/seq_file.h>
+#include <net/sock.h>
+#include <net/llc.h>
+#include <net/llc_c_ac.h>
+#include <net/llc_c_ev.h>
+#include <net/llc_c_st.h>
+#include <net/llc_conn.h>
+
+static void llc_ui_format_mac(struct seq_file *seq, unsigned char *mac)
+{
+	seq_printf(seq, "%02X:%02X:%02X:%02X:%02X:%02X",
+		   mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
+}
+
+static struct sock *llc_get_sk_idx(loff_t pos)
+{
+	struct list_head *sap_entry;
+	struct llc_sap *sap;
+	struct hlist_node *node;
+	struct sock *sk = NULL;
+
+	list_for_each(sap_entry, &llc_sap_list) {
+		sap = list_entry(sap_entry, struct llc_sap, node);
+
+		read_lock_bh(&sap->sk_list.lock);
+		sk_for_each(sk, node, &sap->sk_list.list) {
+			if (!pos)
+				goto found;
+			--pos;
+		}
+		read_unlock_bh(&sap->sk_list.lock);
+	}
+	sk = NULL;
+found:
+	return sk;
+}
+
+static void *llc_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	loff_t l = *pos;
+
+	read_lock_bh(&llc_sap_list_lock);
+	return l ? llc_get_sk_idx(--l) : SEQ_START_TOKEN;
+}
+
+static void *llc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct sock* sk, *next;
+	struct llc_sock *llc;
+	struct llc_sap *sap;
+
+	++*pos;
+	if (v == SEQ_START_TOKEN) {
+		sk = llc_get_sk_idx(0);
+		goto out;
+	}
+	sk = v;
+	next = sk_next(sk);
+	if (next) {
+		sk = next;
+		goto out;
+	}
+	llc = llc_sk(sk);
+	sap = llc->sap;
+	read_unlock_bh(&sap->sk_list.lock);
+	sk = NULL;
+	for (;;) {
+		if (sap->node.next == &llc_sap_list)
+			break;
+		sap = list_entry(sap->node.next, struct llc_sap, node);
+		read_lock_bh(&sap->sk_list.lock);
+		if (!hlist_empty(&sap->sk_list.list)) {
+			sk = sk_head(&sap->sk_list.list);
+			break;
+		}
+		read_unlock_bh(&sap->sk_list.lock);
+	}
+out:
+	return sk;
+}
+
+static void llc_seq_stop(struct seq_file *seq, void *v)
+{
+	if (v && v != SEQ_START_TOKEN) {
+		struct sock *sk = v;
+		struct llc_sock *llc = llc_sk(sk);
+		struct llc_sap *sap = llc->sap;
+
+		read_unlock_bh(&sap->sk_list.lock);
+	}
+	read_unlock_bh(&llc_sap_list_lock);
+}
+
+static int llc_seq_socket_show(struct seq_file *seq, void *v)
+{
+	struct sock* sk;
+	struct llc_sock *llc;
+
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(seq, "SKt Mc local_mac_sap        remote_mac_sap   "
+			      "    tx_queue rx_queue st uid link\n");
+		goto out;
+	}
+	sk = v;
+	llc = llc_sk(sk);
+
+	/* FIXME: check if the address is multicast */
+	seq_printf(seq, "%2X  %2X ", sk->sk_type, 0);
+
+	if (llc->dev)
+		llc_ui_format_mac(seq, llc->dev->dev_addr);
+	else
+		seq_printf(seq, "00:00:00:00:00:00");
+	seq_printf(seq, "@%02X ", llc->sap->laddr.lsap);
+	llc_ui_format_mac(seq, llc->daddr.mac);
+	seq_printf(seq, "@%02X %8d %8d %2d %3d %4d\n", llc->daddr.lsap,
+		   atomic_read(&sk->sk_wmem_alloc),
+		   atomic_read(&sk->sk_rmem_alloc),
+		   sk->sk_state,
+		   sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : -1,
+		   llc->link);
+out:
+	return 0;
+}
+
+static char *llc_conn_state_names[] = {
+	[LLC_CONN_STATE_ADM] =        "adm", 
+	[LLC_CONN_STATE_SETUP] =      "setup", 
+	[LLC_CONN_STATE_NORMAL] =     "normal",
+	[LLC_CONN_STATE_BUSY] =       "busy", 
+	[LLC_CONN_STATE_REJ] =        "rej", 
+	[LLC_CONN_STATE_AWAIT] =      "await", 
+	[LLC_CONN_STATE_AWAIT_BUSY] = "await_busy",
+	[LLC_CONN_STATE_AWAIT_REJ] =  "await_rej",
+	[LLC_CONN_STATE_D_CONN]	=     "d_conn",
+	[LLC_CONN_STATE_RESET] =      "reset", 
+	[LLC_CONN_STATE_ERROR] =      "error", 
+	[LLC_CONN_STATE_TEMP] =       "temp", 
+};
+
+static int llc_seq_core_show(struct seq_file *seq, void *v)
+{
+	struct sock* sk;
+	struct llc_sock *llc;
+
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(seq, "Connection list:\n"
+			      "dsap state      retr txw rxw pf ff sf df rs cs "
+			      "tack tpfc trs tbs blog busr\n");
+		goto out;
+	}
+	sk = v;
+	llc = llc_sk(sk);
+
+	seq_printf(seq, " %02X  %-10s %3d  %3d %3d %2d %2d %2d %2d %2d %2d "
+			"%4d %4d %3d %3d %4d %4d\n",
+		   llc->daddr.lsap, llc_conn_state_names[llc->state],
+		   llc->retry_count, llc->k, llc->rw, llc->p_flag, llc->f_flag,
+		   llc->s_flag, llc->data_flag, llc->remote_busy_flag,
+		   llc->cause_flag, timer_pending(&llc->ack_timer.timer),
+		   timer_pending(&llc->pf_cycle_timer.timer),
+		   timer_pending(&llc->rej_sent_timer.timer),
+		   timer_pending(&llc->busy_state_timer.timer),
+		   !!sk->sk_backlog.tail, !!sock_owned_by_user(sk));
+out:
+	return 0;
+}
+
+static struct seq_operations llc_seq_socket_ops = {
+	.start  = llc_seq_start,
+	.next   = llc_seq_next,
+	.stop   = llc_seq_stop,
+	.show   = llc_seq_socket_show,
+};
+
+static struct seq_operations llc_seq_core_ops = {
+	.start  = llc_seq_start,
+	.next   = llc_seq_next,
+	.stop   = llc_seq_stop,
+	.show   = llc_seq_core_show,
+};
+
+static int llc_seq_socket_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &llc_seq_socket_ops);
+}
+
+static int llc_seq_core_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &llc_seq_core_ops);
+}
+
+static struct file_operations llc_seq_socket_fops = {
+	.owner		= THIS_MODULE,
+	.open		= llc_seq_socket_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static struct file_operations llc_seq_core_fops = {
+	.owner		= THIS_MODULE,
+	.open		= llc_seq_core_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static struct proc_dir_entry *llc_proc_dir;
+
+int __init llc_proc_init(void)
+{
+	int rc = -ENOMEM;
+	struct proc_dir_entry *p;
+
+	llc_proc_dir = proc_mkdir("llc", proc_net);
+	if (!llc_proc_dir)
+		goto out;
+	llc_proc_dir->owner = THIS_MODULE;
+
+	p = create_proc_entry("socket", S_IRUGO, llc_proc_dir);
+	if (!p)
+		goto out_socket;
+
+	p->proc_fops = &llc_seq_socket_fops;
+
+	p = create_proc_entry("core", S_IRUGO, llc_proc_dir);
+	if (!p)
+		goto out_core;
+
+	p->proc_fops = &llc_seq_core_fops;
+
+	rc = 0;
+out:
+	return rc;
+out_core:
+	remove_proc_entry("socket", llc_proc_dir);
+out_socket:
+	remove_proc_entry("llc", proc_net);
+	goto out;
+}
+
+void llc_proc_exit(void)
+{
+	remove_proc_entry("socket", llc_proc_dir);
+	remove_proc_entry("core", llc_proc_dir);
+	remove_proc_entry("llc", proc_net);
+}
diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c
new file mode 100644
index 00000000000..ed8ba7de612
--- /dev/null
+++ b/net/llc/llc_s_ac.c
@@ -0,0 +1,205 @@
+/*
+ * llc_s_ac.c - actions performed during sap state transition.
+ *
+ * Description :
+ *   Functions in this module are implementation of sap component actions.
+ *   Details of actions can be found in IEEE-802.2 standard document.
+ *   All functions have one sap and one event as input argument. All of
+ *   them return 0 On success and 1 otherwise.
+ *
+ * Copyright (c) 1997 by Procom Technology, Inc.
+ *		 2001-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * This program can be redistributed or modified under the terms of the
+ * GNU General Public License as published by the Free Software Foundation.
+ * This program is distributed without any warranty or implied warranty
+ * of merchantability or fitness for a particular purpose.
+ *
+ * See the GNU General Public License for more details.
+ */
+
+#include <linux/netdevice.h>
+#include <net/llc.h>
+#include <net/llc_pdu.h>
+#include <net/llc_s_ac.h>
+#include <net/llc_s_ev.h>
+#include <net/llc_sap.h>
+#include "llc_output.h"
+
+/**
+ *	llc_sap_action_unit_data_ind - forward UI PDU to network layer
+ *	@sap: SAP
+ *	@skb: the event to forward
+ *
+ *	Received a UI PDU from MAC layer; forward to network layer as a
+ *	UNITDATA INDICATION; verify our event is the kind we expect
+ */
+int llc_sap_action_unitdata_ind(struct llc_sap *sap, struct sk_buff *skb)
+{
+	llc_sap_rtn_pdu(sap, skb);
+	return 0;
+}
+
+/**
+ *	llc_sap_action_send_ui - sends UI PDU resp to UNITDATA REQ to MAC layer
+ *	@sap: SAP
+ *	@skb: the event to send
+ *
+ *	Sends a UI PDU to the MAC layer in response to a UNITDATA REQUEST
+ *	primitive from the network layer. Verifies event is a primitive type of
+ *	event. Verify the primitive is a UNITDATA REQUEST.
+ */
+int llc_sap_action_send_ui(struct llc_sap *sap, struct sk_buff *skb)
+{
+	struct llc_sap_state_ev *ev = llc_sap_ev(skb);
+	int rc;
+
+	llc_pdu_header_init(skb, LLC_PDU_TYPE_U, ev->saddr.lsap,
+			    ev->daddr.lsap, LLC_PDU_CMD);
+	llc_pdu_init_as_ui_cmd(skb);
+	rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac);
+	if (!rc)
+		rc = dev_queue_xmit(skb);
+	return rc;
+}
+
+/**
+ *	llc_sap_action_send_xid_c - send XID PDU as response to XID REQ
+ *	@sap: SAP
+ *	@skb: the event to send
+ *
+ *	Send a XID command PDU to MAC layer in response to a XID REQUEST
+ *	primitive from the network layer. Verify event is a primitive type
+ *	event. Verify the primitive is a XID REQUEST.
+ */
+int llc_sap_action_send_xid_c(struct llc_sap *sap, struct sk_buff *skb)
+{
+	struct llc_sap_state_ev *ev = llc_sap_ev(skb);
+	int rc;
+
+	llc_pdu_header_init(skb, LLC_PDU_TYPE_U, ev->saddr.lsap,
+			    ev->daddr.lsap, LLC_PDU_CMD);
+	llc_pdu_init_as_xid_cmd(skb, LLC_XID_NULL_CLASS_2, 0);
+	rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac);
+	if (!rc)
+		rc = dev_queue_xmit(skb);
+	return rc;
+}
+
+/**
+ *	llc_sap_action_send_xid_r - send XID PDU resp to MAC for received XID
+ *	@sap: SAP
+ *	@skb: the event to send
+ *
+ *	Send XID response PDU to MAC in response to an earlier received XID
+ *	command PDU. Verify event is a PDU type event
+ */
+int llc_sap_action_send_xid_r(struct llc_sap *sap, struct sk_buff *skb)
+{
+	u8 mac_da[ETH_ALEN], mac_sa[ETH_ALEN], dsap;
+	int rc = 1;
+	struct sk_buff *nskb;
+
+	llc_pdu_decode_sa(skb, mac_da);
+	llc_pdu_decode_da(skb, mac_sa);
+	llc_pdu_decode_ssap(skb, &dsap);
+	nskb = llc_alloc_frame();
+	if (!nskb)
+		goto out;
+	nskb->dev = skb->dev;
+	llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, dsap,
+			    LLC_PDU_RSP);
+	llc_pdu_init_as_xid_rsp(nskb, LLC_XID_NULL_CLASS_2, 0);
+	rc = llc_mac_hdr_init(nskb, mac_sa, mac_da);
+	if (!rc)
+		rc = dev_queue_xmit(nskb);
+out:
+	return rc;
+}
+
+/**
+ *	llc_sap_action_send_test_c - send TEST PDU to MAC in resp to TEST REQ
+ *	@sap: SAP
+ *	@skb: the event to send
+ *
+ *	Send a TEST command PDU to the MAC layer in response to a TEST REQUEST
+ *	primitive from the network layer. Verify event is a primitive type
+ *	event; verify the primitive is a TEST REQUEST.
+ */
+int llc_sap_action_send_test_c(struct llc_sap *sap, struct sk_buff *skb)
+{
+	struct llc_sap_state_ev *ev = llc_sap_ev(skb);
+	int rc;
+
+	llc_pdu_header_init(skb, LLC_PDU_TYPE_U, ev->saddr.lsap,
+			    ev->daddr.lsap, LLC_PDU_CMD);
+	llc_pdu_init_as_test_cmd(skb);
+	rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac);
+	if (!rc)
+		rc = dev_queue_xmit(skb);
+	return rc;
+}
+
+int llc_sap_action_send_test_r(struct llc_sap *sap, struct sk_buff *skb)
+{
+	u8 mac_da[ETH_ALEN], mac_sa[ETH_ALEN], dsap;
+	struct sk_buff *nskb;
+	int rc = 1;
+
+	llc_pdu_decode_sa(skb, mac_da);
+	llc_pdu_decode_da(skb, mac_sa);
+	llc_pdu_decode_ssap(skb, &dsap);
+	nskb = llc_alloc_frame();
+	if (!nskb)
+		goto out;
+	nskb->dev = skb->dev;
+	llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, dsap,
+			    LLC_PDU_RSP);
+	llc_pdu_init_as_test_rsp(nskb, skb);
+	rc = llc_mac_hdr_init(nskb, mac_sa, mac_da);
+	if (!rc)
+		rc = dev_queue_xmit(nskb);
+out:
+	return rc;
+}
+
+/**
+ *	llc_sap_action_report_status - report data link status to layer mgmt
+ *	@sap: SAP
+ *	@skb: the event to send
+ *
+ *	Report data link status to layer management. Verify our event is the
+ *	kind we expect.
+ */
+int llc_sap_action_report_status(struct llc_sap *sap, struct sk_buff *skb)
+{
+	return 0;
+}
+
+/**
+ *	llc_sap_action_xid_ind - send XID PDU resp to net layer via XID IND
+ *	@sap: SAP
+ *	@skb: the event to send
+ *
+ *	Send a XID response PDU to the network layer via a XID INDICATION
+ *	primitive.
+ */
+int llc_sap_action_xid_ind(struct llc_sap *sap, struct sk_buff *skb)
+{
+	llc_sap_rtn_pdu(sap, skb);
+	return 0;
+}
+
+/**
+ *	llc_sap_action_test_ind - send TEST PDU to net layer via TEST IND
+ *	@sap: SAP
+ *	@skb: the event to send
+ *
+ *	Send a TEST response PDU to the network layer via a TEST INDICATION
+ *	primitive. Verify our event is a PDU type event.
+ */
+int llc_sap_action_test_ind(struct llc_sap *sap, struct sk_buff *skb)
+{
+	llc_sap_rtn_pdu(sap, skb);
+	return 0;
+}
diff --git a/net/llc/llc_s_ev.c b/net/llc/llc_s_ev.c
new file mode 100644
index 00000000000..a74d2a1d658
--- /dev/null
+++ b/net/llc/llc_s_ev.c
@@ -0,0 +1,115 @@
+/*
+ * llc_s_ev.c - Defines SAP component events
+ *
+ * The followed event functions are SAP component events which are described
+ * in 802.2 LLC protocol standard document.
+ *
+ * Copyright (c) 1997 by Procom Technology, Inc.
+ *		 2001-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * This program can be redistributed or modified under the terms of the
+ * GNU General Public License as published by the Free Software Foundation.
+ * This program is distributed without any warranty or implied warranty
+ * of merchantability or fitness for a particular purpose.
+ *
+ * See the GNU General Public License for more details.
+ */
+#include <linux/socket.h>
+#include <net/sock.h>
+#include <net/llc_if.h>
+#include <net/llc_s_ev.h>
+#include <net/llc_pdu.h>
+
+int llc_sap_ev_activation_req(struct llc_sap *sap, struct sk_buff *skb)
+{
+	struct llc_sap_state_ev *ev = llc_sap_ev(skb);
+
+	return ev->type == LLC_SAP_EV_TYPE_SIMPLE &&
+	       ev->prim_type == LLC_SAP_EV_ACTIVATION_REQ ? 0 : 1;
+}
+
+int llc_sap_ev_rx_ui(struct llc_sap *sap, struct sk_buff *skb)
+{
+	struct llc_sap_state_ev *ev = llc_sap_ev(skb);
+	struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb);
+
+	return ev->type == LLC_SAP_EV_TYPE_PDU && LLC_PDU_IS_CMD(pdu) &&
+	       LLC_PDU_TYPE_IS_U(pdu) &&
+	       LLC_U_PDU_CMD(pdu) == LLC_1_PDU_CMD_UI ? 0 : 1;
+}
+
+int llc_sap_ev_unitdata_req(struct llc_sap *sap, struct sk_buff *skb)
+{
+	struct llc_sap_state_ev *ev = llc_sap_ev(skb);
+
+	return ev->type == LLC_SAP_EV_TYPE_PRIM &&
+	       ev->prim == LLC_DATAUNIT_PRIM &&
+	       ev->prim_type == LLC_PRIM_TYPE_REQ ? 0 : 1;
+
+}
+
+int llc_sap_ev_xid_req(struct llc_sap *sap, struct sk_buff *skb)
+{
+	struct llc_sap_state_ev *ev = llc_sap_ev(skb);
+
+	return ev->type == LLC_SAP_EV_TYPE_PRIM &&
+	       ev->prim == LLC_XID_PRIM &&
+	       ev->prim_type == LLC_PRIM_TYPE_REQ ? 0 : 1;
+}
+
+int llc_sap_ev_rx_xid_c(struct llc_sap *sap, struct sk_buff *skb)
+{
+	struct llc_sap_state_ev *ev = llc_sap_ev(skb);
+	struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb);
+
+	return ev->type == LLC_SAP_EV_TYPE_PDU && LLC_PDU_IS_CMD(pdu) &&
+	       LLC_PDU_TYPE_IS_U(pdu) &&
+	       LLC_U_PDU_CMD(pdu) == LLC_1_PDU_CMD_XID ? 0 : 1;
+}
+
+int llc_sap_ev_rx_xid_r(struct llc_sap *sap, struct sk_buff *skb)
+{
+	struct llc_sap_state_ev *ev = llc_sap_ev(skb);
+	struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb);
+
+	return ev->type == LLC_SAP_EV_TYPE_PDU && LLC_PDU_IS_RSP(pdu) &&
+	       LLC_PDU_TYPE_IS_U(pdu) &&
+	       LLC_U_PDU_RSP(pdu) == LLC_1_PDU_CMD_XID ? 0 : 1;
+}
+
+int llc_sap_ev_test_req(struct llc_sap *sap, struct sk_buff *skb)
+{
+	struct llc_sap_state_ev *ev = llc_sap_ev(skb);
+
+	return ev->type == LLC_SAP_EV_TYPE_PRIM &&
+	       ev->prim == LLC_TEST_PRIM &&
+	       ev->prim_type == LLC_PRIM_TYPE_REQ ? 0 : 1;
+}
+
+int llc_sap_ev_rx_test_c(struct llc_sap *sap, struct sk_buff *skb)
+{
+	struct llc_sap_state_ev *ev = llc_sap_ev(skb);
+	struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb);
+
+	return ev->type == LLC_SAP_EV_TYPE_PDU && LLC_PDU_IS_CMD(pdu) &&
+	       LLC_PDU_TYPE_IS_U(pdu) &&
+	       LLC_U_PDU_CMD(pdu) == LLC_1_PDU_CMD_TEST ? 0 : 1;
+}
+
+int llc_sap_ev_rx_test_r(struct llc_sap *sap, struct sk_buff *skb)
+{
+	struct llc_sap_state_ev *ev = llc_sap_ev(skb);
+	struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb);
+
+	return ev->type == LLC_SAP_EV_TYPE_PDU && LLC_PDU_IS_RSP(pdu) &&
+	       LLC_PDU_TYPE_IS_U(pdu) &&
+	       LLC_U_PDU_RSP(pdu) == LLC_1_PDU_CMD_TEST ? 0 : 1;
+}
+
+int llc_sap_ev_deactivation_req(struct llc_sap *sap, struct sk_buff *skb)
+{
+	struct llc_sap_state_ev *ev = llc_sap_ev(skb);
+
+	return ev->type == LLC_SAP_EV_TYPE_SIMPLE &&
+	       ev->prim_type == LLC_SAP_EV_DEACTIVATION_REQ ? 0 : 1;
+}
diff --git a/net/llc/llc_s_st.c b/net/llc/llc_s_st.c
new file mode 100644
index 00000000000..6a43201aa32
--- /dev/null
+++ b/net/llc/llc_s_st.c
@@ -0,0 +1,183 @@
+/*
+ * llc_s_st.c - Defines SAP component state machine transitions.
+ *
+ * The followed transitions are SAP component state machine transitions
+ * which are described in 802.2 LLC protocol standard document.
+ *
+ * Copyright (c) 1997 by Procom Technology, Inc.
+ *		 2001-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * This program can be redistributed or modified under the terms of the
+ * GNU General Public License as published by the Free Software Foundation.
+ * This program is distributed without any warranty or implied warranty
+ * of merchantability or fitness for a particular purpose.
+ *
+ * See the GNU General Public License for more details.
+ */
+#include <linux/types.h>
+#include <net/llc_if.h>
+#include <net/llc_s_ev.h>
+#include <net/llc_s_ac.h>
+#include <net/llc_s_st.h>
+
+/* dummy last-transition indicator; common to all state transition groups
+ * last entry for this state
+ * all members are zeros, .bss zeroes it
+ */
+static struct llc_sap_state_trans llc_sap_state_trans_end;
+
+/* state LLC_SAP_STATE_INACTIVE transition for
+ * LLC_SAP_EV_ACTIVATION_REQ event
+ */
+static llc_sap_action_t llc_sap_inactive_state_actions_1[] = {
+	[0] = llc_sap_action_report_status,
+	[1] = NULL,
+};
+
+static struct llc_sap_state_trans llc_sap_inactive_state_trans_1 = {
+	.ev =		llc_sap_ev_activation_req,
+	.next_state =	LLC_SAP_STATE_ACTIVE,
+	.ev_actions =	llc_sap_inactive_state_actions_1,
+};
+
+/* array of pointers; one to each transition */
+static struct llc_sap_state_trans *llc_sap_inactive_state_transitions[] = {
+	[0] = &llc_sap_inactive_state_trans_1,
+	[1] = &llc_sap_state_trans_end,
+};
+
+/* state LLC_SAP_STATE_ACTIVE transition for LLC_SAP_EV_RX_UI event */
+static llc_sap_action_t llc_sap_active_state_actions_1[] = {
+	[0] = llc_sap_action_unitdata_ind,
+	[1] = NULL,
+};
+
+static struct llc_sap_state_trans llc_sap_active_state_trans_1 = {
+	.ev =		llc_sap_ev_rx_ui,
+	.next_state =	LLC_SAP_STATE_ACTIVE,
+	.ev_actions =	llc_sap_active_state_actions_1,
+};
+
+/* state LLC_SAP_STATE_ACTIVE transition for LLC_SAP_EV_UNITDATA_REQ event */
+static llc_sap_action_t llc_sap_active_state_actions_2[] = {
+	[0] = llc_sap_action_send_ui,
+	[1] = NULL,
+};
+
+static struct llc_sap_state_trans llc_sap_active_state_trans_2 = {
+	.ev =		llc_sap_ev_unitdata_req,
+	.next_state =	LLC_SAP_STATE_ACTIVE,
+	.ev_actions =	llc_sap_active_state_actions_2,
+};
+
+/* state LLC_SAP_STATE_ACTIVE transition for LLC_SAP_EV_XID_REQ event */
+static llc_sap_action_t llc_sap_active_state_actions_3[] = {
+	[0] = llc_sap_action_send_xid_c,
+	[1] = NULL,
+};
+
+static struct llc_sap_state_trans llc_sap_active_state_trans_3 = {
+	.ev =		llc_sap_ev_xid_req,
+	.next_state =	LLC_SAP_STATE_ACTIVE,
+	.ev_actions =	llc_sap_active_state_actions_3,
+};
+
+/* state LLC_SAP_STATE_ACTIVE transition for LLC_SAP_EV_RX_XID_C event */
+static llc_sap_action_t llc_sap_active_state_actions_4[] = {
+	[0] = llc_sap_action_send_xid_r,
+	[1] = NULL,
+};
+
+static struct llc_sap_state_trans llc_sap_active_state_trans_4 = {
+	.ev =		llc_sap_ev_rx_xid_c,
+	.next_state =	LLC_SAP_STATE_ACTIVE,
+	.ev_actions =	llc_sap_active_state_actions_4,
+};
+
+/* state LLC_SAP_STATE_ACTIVE transition for LLC_SAP_EV_RX_XID_R event */
+static llc_sap_action_t llc_sap_active_state_actions_5[] = {
+	[0] = llc_sap_action_xid_ind,
+	[1] = NULL,
+};
+
+static struct llc_sap_state_trans llc_sap_active_state_trans_5 = {
+	.ev =		llc_sap_ev_rx_xid_r,
+	.next_state =	LLC_SAP_STATE_ACTIVE,
+	.ev_actions =	llc_sap_active_state_actions_5,
+};
+
+/* state LLC_SAP_STATE_ACTIVE transition for LLC_SAP_EV_TEST_REQ event */
+static llc_sap_action_t llc_sap_active_state_actions_6[] = {
+	[0] = llc_sap_action_send_test_c,
+	[1] = NULL,
+};
+
+static struct llc_sap_state_trans llc_sap_active_state_trans_6 = {
+	.ev =		llc_sap_ev_test_req,
+	.next_state =	LLC_SAP_STATE_ACTIVE,
+	.ev_actions =	llc_sap_active_state_actions_6,
+};
+
+/* state LLC_SAP_STATE_ACTIVE transition for LLC_SAP_EV_RX_TEST_C event */
+static llc_sap_action_t llc_sap_active_state_actions_7[] = {
+	[0] = llc_sap_action_send_test_r,
+	[1] = NULL,
+};
+
+static struct llc_sap_state_trans llc_sap_active_state_trans_7 = {
+	.ev =		llc_sap_ev_rx_test_c,
+	.next_state =	LLC_SAP_STATE_ACTIVE,
+	.ev_actions =	llc_sap_active_state_actions_7
+};
+
+/* state LLC_SAP_STATE_ACTIVE transition for LLC_SAP_EV_RX_TEST_R event */
+static llc_sap_action_t llc_sap_active_state_actions_8[] = {
+	[0] = llc_sap_action_test_ind,
+	[1] = NULL,
+};
+
+static struct llc_sap_state_trans llc_sap_active_state_trans_8 = {
+	.ev =		llc_sap_ev_rx_test_r,
+	.next_state =	LLC_SAP_STATE_ACTIVE,
+	.ev_actions =	llc_sap_active_state_actions_8,
+};
+
+/* state LLC_SAP_STATE_ACTIVE transition for
+ * LLC_SAP_EV_DEACTIVATION_REQ event
+ */
+static llc_sap_action_t llc_sap_active_state_actions_9[] = {
+	[0] = llc_sap_action_report_status,
+	[1] = NULL,
+};
+
+static struct llc_sap_state_trans llc_sap_active_state_trans_9 = {
+	.ev =		llc_sap_ev_deactivation_req,
+	.next_state =	LLC_SAP_STATE_INACTIVE,
+	.ev_actions =	llc_sap_active_state_actions_9
+};
+
+/* array of pointers; one to each transition */
+static struct llc_sap_state_trans *llc_sap_active_state_transitions[] = {
+	[0] = &llc_sap_active_state_trans_2,
+	[1] = &llc_sap_active_state_trans_1,
+	[2] = &llc_sap_active_state_trans_3,
+	[3] = &llc_sap_active_state_trans_4,
+	[4] = &llc_sap_active_state_trans_5,
+	[5] = &llc_sap_active_state_trans_6,
+	[6] = &llc_sap_active_state_trans_7,
+	[7] = &llc_sap_active_state_trans_8,
+	[8] = &llc_sap_active_state_trans_9,
+	[9] = &llc_sap_state_trans_end,
+};
+
+/* SAP state transition table */
+struct llc_sap_state llc_sap_state_table[LLC_NR_SAP_STATES] = {
+	[LLC_SAP_STATE_INACTIVE - 1] = {
+		.curr_state	= LLC_SAP_STATE_INACTIVE,
+		.transitions	= llc_sap_inactive_state_transitions,
+       	},
+	[LLC_SAP_STATE_ACTIVE - 1] = {
+		.curr_state	= LLC_SAP_STATE_ACTIVE,
+		.transitions	= llc_sap_active_state_transitions,
+	},
+};
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
new file mode 100644
index 00000000000..965c94eb4bb
--- /dev/null
+++ b/net/llc/llc_sap.c
@@ -0,0 +1,316 @@
+/*
+ * llc_sap.c - driver routines for SAP component.
+ *
+ * Copyright (c) 1997 by Procom Technology, Inc.
+ * 		 2001-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * This program can be redistributed or modified under the terms of the
+ * GNU General Public License as published by the Free Software Foundation.
+ * This program is distributed without any warranty or implied warranty
+ * of merchantability or fitness for a particular purpose.
+ *
+ * See the GNU General Public License for more details.
+ */
+
+#include <net/llc.h>
+#include <net/llc_if.h>
+#include <net/llc_conn.h>
+#include <net/llc_pdu.h>
+#include <net/llc_sap.h>
+#include <net/llc_s_ac.h>
+#include <net/llc_s_ev.h>
+#include <net/llc_s_st.h>
+#include <net/sock.h>
+#include <linux/tcp.h>
+#include <linux/llc.h>
+
+/**
+ *	llc_alloc_frame - allocates sk_buff for frame
+ *
+ *	Allocates an sk_buff for frame and initializes sk_buff fields.
+ *	Returns allocated skb or %NULL when out of memory.
+ */
+struct sk_buff *llc_alloc_frame(void)
+{
+	struct sk_buff *skb = alloc_skb(128, GFP_ATOMIC);
+
+	if (skb) {
+		skb_reserve(skb, 50);
+		skb->nh.raw   = skb->h.raw = skb->data;
+		skb->protocol = htons(ETH_P_802_2);
+		skb->dev      = dev_base->next;
+		skb->mac.raw  = skb->head;
+	}
+	return skb;
+}
+
+void llc_save_primitive(struct sk_buff* skb, u8 prim)
+{
+	struct sockaddr_llc *addr = llc_ui_skb_cb(skb);
+
+       /* save primitive for use by the user. */
+	addr->sllc_family = skb->sk->sk_family;
+	addr->sllc_arphrd = skb->dev->type;
+	addr->sllc_test   = prim == LLC_TEST_PRIM;
+	addr->sllc_xid    = prim == LLC_XID_PRIM;
+	addr->sllc_ua     = prim == LLC_DATAUNIT_PRIM;
+	llc_pdu_decode_sa(skb, addr->sllc_mac);
+	llc_pdu_decode_ssap(skb, &addr->sllc_sap);
+}
+
+/**
+ *	llc_sap_rtn_pdu - Informs upper layer on rx of an UI, XID or TEST pdu.
+ *	@sap: pointer to SAP
+ *	@skb: received pdu
+ */
+void llc_sap_rtn_pdu(struct llc_sap *sap, struct sk_buff *skb)
+{
+	struct llc_sap_state_ev *ev = llc_sap_ev(skb);
+	struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb);
+
+	switch (LLC_U_PDU_RSP(pdu)) {
+	case LLC_1_PDU_CMD_TEST:
+		ev->prim = LLC_TEST_PRIM;	break;
+	case LLC_1_PDU_CMD_XID:
+		ev->prim = LLC_XID_PRIM;	break;
+	case LLC_1_PDU_CMD_UI:
+		ev->prim = LLC_DATAUNIT_PRIM;	break;
+	}
+	ev->ind_cfm_flag = LLC_IND;
+}
+
+/**
+ *	llc_find_sap_trans - finds transition for event
+ *	@sap: pointer to SAP
+ *	@skb: happened event
+ *
+ *	This function finds transition that matches with happened event.
+ *	Returns the pointer to found transition on success or %NULL for
+ *	failure.
+ */
+static struct llc_sap_state_trans *llc_find_sap_trans(struct llc_sap *sap,
+						      struct sk_buff* skb)
+{
+	int i = 0;
+	struct llc_sap_state_trans *rc = NULL;
+	struct llc_sap_state_trans **next_trans;
+	struct llc_sap_state *curr_state = &llc_sap_state_table[sap->state - 1];
+	/*
+	 * Search thru events for this state until list exhausted or until
+	 * its obvious the event is not valid for the current state
+	 */
+	for (next_trans = curr_state->transitions; next_trans[i]->ev; i++)
+		if (!next_trans[i]->ev(sap, skb)) {
+			rc = next_trans[i]; /* got event match; return it */
+			break;
+		}
+	return rc;
+}
+
+/**
+ *	llc_exec_sap_trans_actions - execute actions related to event
+ *	@sap: pointer to SAP
+ *	@trans: pointer to transition that it's actions must be performed
+ *	@skb: happened event.
+ *
+ *	This function executes actions that is related to happened event.
+ *	Returns 0 for success and 1 for failure of at least one action.
+ */
+static int llc_exec_sap_trans_actions(struct llc_sap *sap,
+				      struct llc_sap_state_trans *trans,
+				      struct sk_buff *skb)
+{
+	int rc = 0;
+	llc_sap_action_t *next_action = trans->ev_actions;
+
+	for (; next_action && *next_action; next_action++)
+		if ((*next_action)(sap, skb))
+			rc = 1;
+	return rc;
+}
+
+/**
+ *	llc_sap_next_state - finds transition, execs actions & change SAP state
+ *	@sap: pointer to SAP
+ *	@skb: happened event
+ *
+ *	This function finds transition that matches with happened event, then
+ *	executes related actions and finally changes state of SAP. It returns
+ *	0 on success and 1 for failure.
+ */
+static int llc_sap_next_state(struct llc_sap *sap, struct sk_buff *skb)
+{
+	int rc = 1;
+	struct llc_sap_state_trans *trans;
+
+	if (sap->state > LLC_NR_SAP_STATES)
+		goto out;
+	trans = llc_find_sap_trans(sap, skb);
+	if (!trans)
+		goto out;
+	/*
+	 * Got the state to which we next transition; perform the actions
+	 * associated with this transition before actually transitioning to the
+	 * next state
+	 */
+	rc = llc_exec_sap_trans_actions(sap, trans, skb);
+	if (rc)
+		goto out;
+	/*
+	 * Transition SAP to next state if all actions execute successfully
+	 */
+	sap->state = trans->next_state;
+out:
+	return rc;
+}
+
+/**
+ *	llc_sap_state_process - sends event to SAP state machine
+ *	@sap: sap to use
+ *	@skb: pointer to occurred event
+ *
+ *	After executing actions of the event, upper layer will be indicated
+ *	if needed(on receiving an UI frame). sk can be null for the
+ *	datalink_proto case.
+ */
+static void llc_sap_state_process(struct llc_sap *sap, struct sk_buff *skb)
+{
+	struct llc_sap_state_ev *ev = llc_sap_ev(skb);
+
+	/*
+	 * We have to hold the skb, because llc_sap_next_state
+	 * will kfree it in the sending path and we need to
+	 * look at the skb->cb, where we encode llc_sap_state_ev.
+	 */
+	skb_get(skb);
+	ev->ind_cfm_flag = 0;
+	llc_sap_next_state(sap, skb);
+	if (ev->ind_cfm_flag == LLC_IND) {
+		if (skb->sk->sk_state == TCP_LISTEN)
+			kfree_skb(skb);
+		else {
+			llc_save_primitive(skb, ev->prim);
+
+			/* queue skb to the user. */
+			if (sock_queue_rcv_skb(skb->sk, skb))
+				kfree_skb(skb);
+		}
+	} 
+	kfree_skb(skb);
+}
+
+/**
+ *	llc_build_and_send_test_pkt - TEST interface for upper layers.
+ *	@sap: sap to use
+ *	@skb: packet to send
+ *	@dmac: destination mac address
+ *	@dsap: destination sap
+ *
+ *	This function is called when upper layer wants to send a TEST pdu.
+ *	Returns 0 for success, 1 otherwise.
+ */
+void llc_build_and_send_test_pkt(struct llc_sap *sap, 
+				 struct sk_buff *skb, u8 *dmac, u8 dsap)
+{
+	struct llc_sap_state_ev *ev = llc_sap_ev(skb);
+
+	ev->saddr.lsap = sap->laddr.lsap;
+	ev->daddr.lsap = dsap;
+	memcpy(ev->saddr.mac, skb->dev->dev_addr, IFHWADDRLEN);
+	memcpy(ev->daddr.mac, dmac, IFHWADDRLEN);
+	
+	ev->type      = LLC_SAP_EV_TYPE_PRIM;
+	ev->prim      = LLC_TEST_PRIM;
+	ev->prim_type = LLC_PRIM_TYPE_REQ;
+	llc_sap_state_process(sap, skb);
+}
+
+/**
+ *	llc_build_and_send_xid_pkt - XID interface for upper layers
+ *	@sap: sap to use
+ *	@skb: packet to send
+ *	@dmac: destination mac address
+ *	@dsap: destination sap
+ *
+ *	This function is called when upper layer wants to send a XID pdu.
+ *	Returns 0 for success, 1 otherwise.
+ */
+void llc_build_and_send_xid_pkt(struct llc_sap *sap, struct sk_buff *skb,
+				u8 *dmac, u8 dsap)
+{
+	struct llc_sap_state_ev *ev = llc_sap_ev(skb);
+
+	ev->saddr.lsap = sap->laddr.lsap;
+	ev->daddr.lsap = dsap;
+	memcpy(ev->saddr.mac, skb->dev->dev_addr, IFHWADDRLEN);
+	memcpy(ev->daddr.mac, dmac, IFHWADDRLEN);
+
+	ev->type      = LLC_SAP_EV_TYPE_PRIM;
+	ev->prim      = LLC_XID_PRIM;
+	ev->prim_type = LLC_PRIM_TYPE_REQ;
+	llc_sap_state_process(sap, skb);
+}
+
+/**
+ *	llc_sap_rcv - sends received pdus to the sap state machine
+ *	@sap: current sap component structure.
+ *	@skb: received frame.
+ *
+ *	Sends received pdus to the sap state machine.
+ */
+static void llc_sap_rcv(struct llc_sap *sap, struct sk_buff *skb)
+{
+	struct llc_sap_state_ev *ev = llc_sap_ev(skb);
+
+	ev->type   = LLC_SAP_EV_TYPE_PDU;
+	ev->reason = 0;
+	llc_sap_state_process(sap, skb);
+}
+
+/**
+ *	llc_lookup_dgram - Finds dgram socket for the local sap/mac
+ *	@sap: SAP
+ *	@laddr: address of local LLC (MAC + SAP)
+ *
+ *	Search socket list of the SAP and finds connection using the local
+ *	mac, and local sap. Returns pointer for socket found, %NULL otherwise.
+ */
+static struct sock *llc_lookup_dgram(struct llc_sap *sap,
+				     struct llc_addr *laddr)
+{
+	struct sock *rc;
+	struct hlist_node *node;
+
+	read_lock_bh(&sap->sk_list.lock);
+	sk_for_each(rc, node, &sap->sk_list.list) {
+		struct llc_sock *llc = llc_sk(rc);
+
+		if (rc->sk_type == SOCK_DGRAM &&
+		    llc->laddr.lsap == laddr->lsap &&
+		    llc_mac_match(llc->laddr.mac, laddr->mac)) {
+			sock_hold(rc);
+			goto found;
+		}
+	}
+	rc = NULL;
+found:
+	read_unlock_bh(&sap->sk_list.lock);
+	return rc;
+}
+
+void llc_sap_handler(struct llc_sap *sap, struct sk_buff *skb)
+{
+	struct llc_addr laddr;
+	struct sock *sk;
+
+	llc_pdu_decode_da(skb, laddr.mac);
+	llc_pdu_decode_dsap(skb, &laddr.lsap);
+
+	sk = llc_lookup_dgram(sap, &laddr);
+	if (sk) {
+		skb->sk = sk;
+		llc_sap_rcv(sap, skb);
+		sock_put(sk);
+	} else
+		kfree_skb(skb);
+}
diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c
new file mode 100644
index 00000000000..8fe48a24bad
--- /dev/null
+++ b/net/llc/llc_station.c
@@ -0,0 +1,713 @@
+/*
+ * llc_station.c - station component of LLC
+ *
+ * Copyright (c) 1997 by Procom Technology, Inc.
+ * 		 2001-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * This program can be redistributed or modified under the terms of the
+ * GNU General Public License as published by the Free Software Foundation.
+ * This program is distributed without any warranty or implied warranty
+ * of merchantability or fitness for a particular purpose.
+ *
+ * See the GNU General Public License for more details.
+ */
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <net/llc.h>
+#include <net/llc_sap.h>
+#include <net/llc_conn.h>
+#include <net/llc_c_ac.h>
+#include <net/llc_s_ac.h>
+#include <net/llc_c_ev.h>
+#include <net/llc_c_st.h>
+#include <net/llc_s_ev.h>
+#include <net/llc_s_st.h>
+#include <net/llc_pdu.h>
+
+/**
+ * struct llc_station - LLC station component
+ *
+ * SAP and connection resource manager, one per adapter.
+ *
+ * @state - state of station
+ * @xid_r_count - XID response PDU counter
+ * @mac_sa - MAC source address
+ * @sap_list - list of related SAPs
+ * @ev_q - events entering state mach.
+ * @mac_pdu_q - PDUs ready to send to MAC
+ */
+struct llc_station {
+	u8			    state;
+	u8			    xid_r_count;
+	struct timer_list	    ack_timer;
+	u8			    retry_count;
+	u8			    maximum_retry;
+	struct {
+		struct sk_buff_head list;
+		spinlock_t	    lock;
+	} ev_q;
+	struct sk_buff_head	    mac_pdu_q;
+};
+
+/* Types of events (possible values in 'ev->type') */
+#define LLC_STATION_EV_TYPE_SIMPLE	1
+#define LLC_STATION_EV_TYPE_CONDITION	2
+#define LLC_STATION_EV_TYPE_PRIM	3
+#define LLC_STATION_EV_TYPE_PDU		4       /* command/response PDU */
+#define LLC_STATION_EV_TYPE_ACK_TMR	5
+#define LLC_STATION_EV_TYPE_RPT_STATUS	6
+
+/* Events */
+#define LLC_STATION_EV_ENABLE_WITH_DUP_ADDR_CHECK		1
+#define LLC_STATION_EV_ENABLE_WITHOUT_DUP_ADDR_CHECK		2
+#define LLC_STATION_EV_ACK_TMR_EXP_LT_RETRY_CNT_MAX_RETRY	3
+#define LLC_STATION_EV_ACK_TMR_EXP_EQ_RETRY_CNT_MAX_RETRY	4
+#define LLC_STATION_EV_RX_NULL_DSAP_XID_C			5
+#define LLC_STATION_EV_RX_NULL_DSAP_0_XID_R_XID_R_CNT_EQ	6
+#define LLC_STATION_EV_RX_NULL_DSAP_1_XID_R_XID_R_CNT_EQ	7
+#define LLC_STATION_EV_RX_NULL_DSAP_TEST_C			8
+#define LLC_STATION_EV_DISABLE_REQ				9
+
+struct llc_station_state_ev {
+	u8		 type;
+	u8		 prim;
+	u8		 prim_type;
+	u8		 reason;
+	struct list_head node; /* node in station->ev_q.list */
+};
+
+static __inline__ struct llc_station_state_ev *
+					llc_station_ev(struct sk_buff *skb)
+{
+	return (struct llc_station_state_ev *)skb->cb;
+}
+
+typedef int (*llc_station_ev_t)(struct sk_buff *skb);
+
+#define LLC_STATION_STATE_DOWN		1	/* initial state */
+#define LLC_STATION_STATE_DUP_ADDR_CHK	2
+#define LLC_STATION_STATE_UP		3
+
+#define LLC_NBR_STATION_STATES		3	/* size of state table */
+
+typedef int (*llc_station_action_t)(struct sk_buff *skb);
+
+/* Station component state table structure */
+struct llc_station_state_trans {
+	llc_station_ev_t ev;
+	u8 next_state;
+	llc_station_action_t *ev_actions;
+};
+
+struct llc_station_state {
+	u8 curr_state;
+	struct llc_station_state_trans **transitions;
+};
+
+static struct llc_station llc_main_station;
+
+static int llc_stat_ev_enable_with_dup_addr_check(struct sk_buff *skb)
+{
+	struct llc_station_state_ev *ev = llc_station_ev(skb);	
+	
+	return ev->type == LLC_STATION_EV_TYPE_SIMPLE &&
+	       ev->prim_type ==
+	       		      LLC_STATION_EV_ENABLE_WITH_DUP_ADDR_CHECK ? 0 : 1;
+}
+
+static int llc_stat_ev_enable_without_dup_addr_check(struct sk_buff *skb)
+{
+	struct llc_station_state_ev *ev = llc_station_ev(skb);	
+	
+	return ev->type == LLC_STATION_EV_TYPE_SIMPLE &&
+	       ev->prim_type ==
+			LLC_STATION_EV_ENABLE_WITHOUT_DUP_ADDR_CHECK ? 0 : 1;
+}
+
+static int llc_stat_ev_ack_tmr_exp_lt_retry_cnt_max_retry(struct sk_buff *skb)
+{
+	struct llc_station_state_ev *ev = llc_station_ev(skb);	
+	
+	return ev->type == LLC_STATION_EV_TYPE_ACK_TMR &&
+		llc_main_station.retry_count <
+		llc_main_station.maximum_retry ? 0 : 1;
+}
+
+static int llc_stat_ev_ack_tmr_exp_eq_retry_cnt_max_retry(struct sk_buff *skb)
+{
+	struct llc_station_state_ev *ev = llc_station_ev(skb);	
+	
+	return ev->type == LLC_STATION_EV_TYPE_ACK_TMR &&
+		llc_main_station.retry_count ==
+		llc_main_station.maximum_retry ? 0 : 1;
+}
+
+static int llc_stat_ev_rx_null_dsap_xid_c(struct sk_buff *skb)
+{
+	struct llc_station_state_ev *ev = llc_station_ev(skb);	
+	struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb);
+
+	return ev->type == LLC_STATION_EV_TYPE_PDU &&
+	       LLC_PDU_IS_CMD(pdu) &&			/* command PDU */
+	       LLC_PDU_TYPE_IS_U(pdu) &&		/* U type PDU */
+	       LLC_U_PDU_CMD(pdu) == LLC_1_PDU_CMD_XID &&
+	       !pdu->dsap ? 0 : 1;			/* NULL DSAP value */
+}
+
+static int llc_stat_ev_rx_null_dsap_0_xid_r_xid_r_cnt_eq(struct sk_buff *skb)
+{
+	struct llc_station_state_ev *ev = llc_station_ev(skb);
+	struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb);
+
+	return ev->type == LLC_STATION_EV_TYPE_PDU &&
+	       LLC_PDU_IS_RSP(pdu) &&			/* response PDU */
+	       LLC_PDU_TYPE_IS_U(pdu) &&		/* U type PDU */
+	       LLC_U_PDU_RSP(pdu) == LLC_1_PDU_CMD_XID &&
+	       !pdu->dsap &&				/* NULL DSAP value */
+	       !llc_main_station.xid_r_count ? 0 : 1;
+}
+
+static int llc_stat_ev_rx_null_dsap_1_xid_r_xid_r_cnt_eq(struct sk_buff *skb)
+{
+	struct llc_station_state_ev *ev = llc_station_ev(skb);
+	struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb);
+
+	return ev->type == LLC_STATION_EV_TYPE_PDU &&
+	       LLC_PDU_IS_RSP(pdu) &&			/* response PDU */
+	       LLC_PDU_TYPE_IS_U(pdu) &&		/* U type PDU */
+	       LLC_U_PDU_RSP(pdu) == LLC_1_PDU_CMD_XID &&
+	       !pdu->dsap &&				/* NULL DSAP value */
+	       llc_main_station.xid_r_count == 1 ? 0 : 1;
+}
+
+static int llc_stat_ev_rx_null_dsap_test_c(struct sk_buff *skb)
+{
+	struct llc_station_state_ev *ev = llc_station_ev(skb);
+	struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb);
+
+	return ev->type == LLC_STATION_EV_TYPE_PDU &&
+	       LLC_PDU_IS_CMD(pdu) &&			/* command PDU */
+	       LLC_PDU_TYPE_IS_U(pdu) &&		/* U type PDU */
+	       LLC_U_PDU_CMD(pdu) == LLC_1_PDU_CMD_TEST &&
+	       !pdu->dsap ? 0 : 1;			/* NULL DSAP */
+}
+
+static int llc_stat_ev_disable_req(struct sk_buff *skb)
+{
+	struct llc_station_state_ev *ev = llc_station_ev(skb);
+
+	return ev->type == LLC_STATION_EV_TYPE_PRIM &&
+	       ev->prim == LLC_DISABLE_PRIM &&
+	       ev->prim_type == LLC_PRIM_TYPE_REQ ? 0 : 1;
+}
+
+/**
+ *	llc_station_send_pdu - queues PDU to send
+ *	@skb: Address of the PDU
+ *
+ *	Queues a PDU to send to the MAC layer.
+ */
+static void llc_station_send_pdu(struct sk_buff *skb)
+{
+	skb_queue_tail(&llc_main_station.mac_pdu_q, skb);
+	while ((skb = skb_dequeue(&llc_main_station.mac_pdu_q)) != NULL)
+		if (dev_queue_xmit(skb))
+			break;
+}
+
+static int llc_station_ac_start_ack_timer(struct sk_buff *skb)
+{
+	mod_timer(&llc_main_station.ack_timer, jiffies + LLC_ACK_TIME * HZ);
+	return 0;
+}
+
+static int llc_station_ac_set_retry_cnt_0(struct sk_buff *skb)
+{
+	llc_main_station.retry_count = 0;
+	return 0;
+}
+
+static int llc_station_ac_inc_retry_cnt_by_1(struct sk_buff *skb)
+{
+	llc_main_station.retry_count++;
+	return 0;
+}
+
+static int llc_station_ac_set_xid_r_cnt_0(struct sk_buff *skb)
+{
+	llc_main_station.xid_r_count = 0;
+	return 0;
+}
+
+static int llc_station_ac_inc_xid_r_cnt_by_1(struct sk_buff *skb)
+{
+	llc_main_station.xid_r_count++;
+	return 0;
+}
+
+static int llc_station_ac_send_null_dsap_xid_c(struct sk_buff *skb)
+{
+	int rc = 1;
+	struct sk_buff *nskb = llc_alloc_frame();
+
+	if (!nskb)
+		goto out;
+	llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, 0, 0, LLC_PDU_CMD);
+	llc_pdu_init_as_xid_cmd(nskb, LLC_XID_NULL_CLASS_2, 127);
+	rc = llc_mac_hdr_init(nskb, llc_station_mac_sa, llc_station_mac_sa);
+	if (rc)
+		goto free;
+	llc_station_send_pdu(nskb);
+out:
+	return rc;
+free:
+	kfree_skb(skb);
+	goto out;
+}
+
+static int llc_station_ac_send_xid_r(struct sk_buff *skb)
+{
+	u8 mac_da[ETH_ALEN], dsap;
+	int rc = 1;
+	struct sk_buff* nskb = llc_alloc_frame();
+
+	if (!nskb)
+		goto out;
+	rc = 0;
+	nskb->dev = skb->dev;
+	llc_pdu_decode_sa(skb, mac_da);
+	llc_pdu_decode_ssap(skb, &dsap);
+	llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, 0, dsap, LLC_PDU_RSP);
+	llc_pdu_init_as_xid_rsp(nskb, LLC_XID_NULL_CLASS_2, 127);
+	rc = llc_mac_hdr_init(nskb, llc_station_mac_sa, mac_da);
+	if (rc)
+		goto free;
+	llc_station_send_pdu(nskb);
+out:
+	return rc;
+free:
+	kfree_skb(skb);
+	goto out;
+}
+
+static int llc_station_ac_send_test_r(struct sk_buff *skb)
+{
+	u8 mac_da[ETH_ALEN], dsap;
+	int rc = 1;
+	struct sk_buff *nskb = llc_alloc_frame();
+
+	if (!nskb)
+		goto out;
+	rc = 0;
+	nskb->dev = skb->dev;
+	llc_pdu_decode_sa(skb, mac_da);
+	llc_pdu_decode_ssap(skb, &dsap);
+	llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, 0, dsap, LLC_PDU_RSP);
+       	llc_pdu_init_as_test_rsp(nskb, skb);
+	rc = llc_mac_hdr_init(nskb, llc_station_mac_sa, mac_da);
+	if (rc)
+		goto free;
+	llc_station_send_pdu(nskb);
+out:
+	return rc;
+free:
+	kfree_skb(skb);
+	goto out;
+}
+
+static int llc_station_ac_report_status(struct sk_buff *skb)
+{
+	return 0;
+}
+
+/* COMMON STATION STATE transitions */
+
+/* dummy last-transition indicator; common to all state transition groups
+ * last entry for this state
+ * all members are zeros, .bss zeroes it
+ */
+static struct llc_station_state_trans llc_stat_state_trans_end;
+
+/* DOWN STATE transitions */
+
+/* state transition for LLC_STATION_EV_ENABLE_WITH_DUP_ADDR_CHECK event */
+static llc_station_action_t llc_stat_down_state_actions_1[] = {
+	[0] = llc_station_ac_start_ack_timer,
+	[1] = llc_station_ac_set_retry_cnt_0,
+	[2] = llc_station_ac_set_xid_r_cnt_0,
+	[3] = llc_station_ac_send_null_dsap_xid_c,
+	[4] = NULL,
+};
+
+static struct llc_station_state_trans llc_stat_down_state_trans_1 = {
+	.ev	    = llc_stat_ev_enable_with_dup_addr_check,
+	.next_state = LLC_STATION_STATE_DUP_ADDR_CHK,
+	.ev_actions = llc_stat_down_state_actions_1,
+};
+
+/* state transition for LLC_STATION_EV_ENABLE_WITHOUT_DUP_ADDR_CHECK event */
+static llc_station_action_t llc_stat_down_state_actions_2[] = {
+	[0] = llc_station_ac_report_status,	/* STATION UP */
+	[1] = NULL,
+};
+
+static struct llc_station_state_trans llc_stat_down_state_trans_2 = {
+	.ev	    = llc_stat_ev_enable_without_dup_addr_check,
+	.next_state = LLC_STATION_STATE_UP,
+	.ev_actions = llc_stat_down_state_actions_2,
+};
+
+/* array of pointers; one to each transition */
+static struct llc_station_state_trans *llc_stat_dwn_state_trans[] = {
+	[0] = &llc_stat_down_state_trans_1,
+	[1] = &llc_stat_down_state_trans_2,
+	[2] = &llc_stat_state_trans_end,
+};
+
+/* UP STATE transitions */
+/* state transition for LLC_STATION_EV_DISABLE_REQ event */
+static llc_station_action_t llc_stat_up_state_actions_1[] = {
+	[0] = llc_station_ac_report_status,	/* STATION DOWN */
+	[1] = NULL,
+};
+
+static struct llc_station_state_trans llc_stat_up_state_trans_1 = {
+	.ev	    = llc_stat_ev_disable_req,
+	.next_state = LLC_STATION_STATE_DOWN,
+	.ev_actions = llc_stat_up_state_actions_1,
+};
+
+/* state transition for LLC_STATION_EV_RX_NULL_DSAP_XID_C event */
+static llc_station_action_t llc_stat_up_state_actions_2[] = {
+	[0] = llc_station_ac_send_xid_r,
+	[1] = NULL,
+};
+
+static struct llc_station_state_trans llc_stat_up_state_trans_2 = {
+	.ev	    = llc_stat_ev_rx_null_dsap_xid_c,
+	.next_state = LLC_STATION_STATE_UP,
+	.ev_actions = llc_stat_up_state_actions_2,
+};
+
+/* state transition for LLC_STATION_EV_RX_NULL_DSAP_TEST_C event */
+static llc_station_action_t llc_stat_up_state_actions_3[] = {
+	[0] = llc_station_ac_send_test_r,
+	[1] = NULL,
+};
+
+static struct llc_station_state_trans llc_stat_up_state_trans_3 = {
+	.ev	    = llc_stat_ev_rx_null_dsap_test_c,
+	.next_state = LLC_STATION_STATE_UP,
+	.ev_actions = llc_stat_up_state_actions_3,
+};
+
+/* array of pointers; one to each transition */
+static struct llc_station_state_trans *llc_stat_up_state_trans [] = {
+	[0] = &llc_stat_up_state_trans_1,
+	[1] = &llc_stat_up_state_trans_2,
+	[2] = &llc_stat_up_state_trans_3,
+	[3] = &llc_stat_state_trans_end,
+};
+
+/* DUP ADDR CHK STATE transitions */
+/* state transition for LLC_STATION_EV_RX_NULL_DSAP_0_XID_R_XID_R_CNT_EQ
+ * event
+ */
+static llc_station_action_t llc_stat_dupaddr_state_actions_1[] = {
+	[0] = llc_station_ac_inc_xid_r_cnt_by_1,
+	[1] = NULL,
+};
+
+static struct llc_station_state_trans llc_stat_dupaddr_state_trans_1 = {
+	.ev	    = llc_stat_ev_rx_null_dsap_0_xid_r_xid_r_cnt_eq,
+	.next_state = LLC_STATION_STATE_DUP_ADDR_CHK,
+	.ev_actions = llc_stat_dupaddr_state_actions_1,
+};
+
+/* state transition for LLC_STATION_EV_RX_NULL_DSAP_1_XID_R_XID_R_CNT_EQ
+ * event
+ */
+static llc_station_action_t llc_stat_dupaddr_state_actions_2[] = {
+	[0] = llc_station_ac_report_status,	/* DUPLICATE ADDRESS FOUND */
+	[1] = NULL,
+};
+
+static struct llc_station_state_trans llc_stat_dupaddr_state_trans_2 = {
+	.ev	    = llc_stat_ev_rx_null_dsap_1_xid_r_xid_r_cnt_eq,
+	.next_state = LLC_STATION_STATE_DOWN,
+	.ev_actions = llc_stat_dupaddr_state_actions_2,
+};
+
+/* state transition for LLC_STATION_EV_RX_NULL_DSAP_XID_C event */
+static llc_station_action_t llc_stat_dupaddr_state_actions_3[] = {
+	[0] = llc_station_ac_send_xid_r,
+	[1] = NULL,
+};
+
+static struct llc_station_state_trans llc_stat_dupaddr_state_trans_3 = {
+	.ev	    = llc_stat_ev_rx_null_dsap_xid_c,
+	.next_state = LLC_STATION_STATE_DUP_ADDR_CHK,
+	.ev_actions = llc_stat_dupaddr_state_actions_3,
+};
+
+/* state transition for LLC_STATION_EV_ACK_TMR_EXP_LT_RETRY_CNT_MAX_RETRY
+ * event
+ */
+static llc_station_action_t llc_stat_dupaddr_state_actions_4[] = {
+	[0] = llc_station_ac_start_ack_timer,
+	[1] = llc_station_ac_inc_retry_cnt_by_1,
+	[2] = llc_station_ac_set_xid_r_cnt_0,
+	[3] = llc_station_ac_send_null_dsap_xid_c,
+	[4] = NULL,
+};
+
+static struct llc_station_state_trans llc_stat_dupaddr_state_trans_4 = {
+	.ev	    = llc_stat_ev_ack_tmr_exp_lt_retry_cnt_max_retry,
+	.next_state = LLC_STATION_STATE_DUP_ADDR_CHK,
+	.ev_actions = llc_stat_dupaddr_state_actions_4,
+};
+
+/* state transition for LLC_STATION_EV_ACK_TMR_EXP_EQ_RETRY_CNT_MAX_RETRY
+ * event
+ */
+static llc_station_action_t llc_stat_dupaddr_state_actions_5[] = {
+	[0] = llc_station_ac_report_status,	/* STATION UP */
+	[1] = NULL,
+};
+
+static struct llc_station_state_trans llc_stat_dupaddr_state_trans_5 = {
+	.ev	    = llc_stat_ev_ack_tmr_exp_eq_retry_cnt_max_retry,
+	.next_state = LLC_STATION_STATE_UP,
+	.ev_actions = llc_stat_dupaddr_state_actions_5,
+};
+
+/* state transition for LLC_STATION_EV_DISABLE_REQ event */
+static llc_station_action_t llc_stat_dupaddr_state_actions_6[] = {
+	[0] = llc_station_ac_report_status,	/* STATION DOWN */
+	[1] = NULL,
+};
+
+static struct llc_station_state_trans llc_stat_dupaddr_state_trans_6 = {
+	.ev	    = llc_stat_ev_disable_req,
+	.next_state = LLC_STATION_STATE_DOWN,
+	.ev_actions = llc_stat_dupaddr_state_actions_6,
+};
+
+/* array of pointers; one to each transition */
+static struct llc_station_state_trans *llc_stat_dupaddr_state_trans[] = {
+	[0] = &llc_stat_dupaddr_state_trans_6,	/* Request */
+	[1] = &llc_stat_dupaddr_state_trans_4,	/* Timer */
+	[2] = &llc_stat_dupaddr_state_trans_5,
+	[3] = &llc_stat_dupaddr_state_trans_1,	/* Receive frame */
+	[4] = &llc_stat_dupaddr_state_trans_2,
+	[5] = &llc_stat_dupaddr_state_trans_3,
+	[6] = &llc_stat_state_trans_end,
+};
+
+static struct llc_station_state
+			llc_station_state_table[LLC_NBR_STATION_STATES] = {
+	[LLC_STATION_STATE_DOWN - 1] = {
+		.curr_state  = LLC_STATION_STATE_DOWN,
+		.transitions = llc_stat_dwn_state_trans,
+	},
+	[LLC_STATION_STATE_DUP_ADDR_CHK - 1] = {
+		.curr_state  = LLC_STATION_STATE_DUP_ADDR_CHK,
+		.transitions = llc_stat_dupaddr_state_trans,
+	},
+	[LLC_STATION_STATE_UP - 1] = {
+		.curr_state  = LLC_STATION_STATE_UP,
+		.transitions = llc_stat_up_state_trans,
+	},
+};
+
+/**
+ *	llc_exec_station_trans_actions - executes actions for transition
+ *	@trans: Address of the transition
+ *	@skb: Address of the event that caused the transition
+ *
+ *	Executes actions of a transition of the station state machine. Returns
+ *	0 if all actions complete successfully, nonzero otherwise.
+ */
+static u16 llc_exec_station_trans_actions(struct llc_station_state_trans *trans,
+					  struct sk_buff *skb)
+{
+	u16 rc = 0;
+	llc_station_action_t *next_action = trans->ev_actions;
+
+	for (; next_action && *next_action; next_action++)
+		if ((*next_action)(skb))
+			rc = 1;
+	return rc;
+}
+
+/**
+ *	llc_find_station_trans - finds transition for this event
+ *	@skb: Address of the event
+ *
+ *	Search thru events of the current state of the station until list
+ *	exhausted or it's obvious that the event is not valid for the current
+ *	state. Returns the address of the transition if cound, %NULL otherwise.
+ */
+static struct llc_station_state_trans *
+				llc_find_station_trans(struct sk_buff *skb)
+{
+	int i = 0;
+	struct llc_station_state_trans *rc = NULL;
+	struct llc_station_state_trans **next_trans;
+	struct llc_station_state *curr_state =
+				&llc_station_state_table[llc_main_station.state - 1];
+
+	for (next_trans = curr_state->transitions; next_trans[i]->ev; i++)
+		if (!next_trans[i]->ev(skb)) {
+			rc = next_trans[i];
+			break;
+		}
+	return rc;
+}
+
+/**
+ *	llc_station_free_ev - frees an event
+ *	@skb: Address of the event
+ *
+ *	Frees an event.
+ */
+static void llc_station_free_ev(struct sk_buff *skb)
+{
+	struct llc_station_state_ev *ev = llc_station_ev(skb);
+
+	if (ev->type == LLC_STATION_EV_TYPE_PDU)
+		kfree_skb(skb);
+}
+
+/**
+ *	llc_station_next_state - processes event and goes to the next state
+ *	@skb: Address of the event
+ *
+ *	Processes an event, executes any transitions related to that event and
+ *	updates the state of the station.
+ */
+static u16 llc_station_next_state(struct sk_buff *skb)
+{
+	u16 rc = 1;
+	struct llc_station_state_trans *trans;
+
+	if (llc_main_station.state > LLC_NBR_STATION_STATES)
+		goto out;
+	trans = llc_find_station_trans(skb);
+	if (trans) {
+		/* got the state to which we next transition; perform the
+		 * actions associated with this transition before actually
+		 * transitioning to the next state
+		 */
+		rc = llc_exec_station_trans_actions(trans, skb);
+		if (!rc)
+			/* transition station to next state if all actions
+			 * execute successfully; done; wait for next event
+			 */
+			llc_main_station.state = trans->next_state;
+	} else
+		/* event not recognized in current state; re-queue it for
+		 * processing again at a later time; return failure
+		 */
+		rc = 0;
+out:
+	llc_station_free_ev(skb);
+	return rc;
+}
+
+/**
+ *	llc_station_service_events - service events in the queue
+ *
+ *	Get an event from the station event queue (if any); attempt to service
+ *	the event; if event serviced, get the next event (if any) on the event
+ *	queue; if event not service, re-queue the event on the event queue and
+ *	attempt to service the next event; when serviced all events in queue,
+ *	finished; if don't transition to different state, just service all
+ *	events once; if transition to new state, service all events again.
+ *	Caller must hold llc_main_station.ev_q.lock.
+ */
+static void llc_station_service_events(void)
+{
+	struct sk_buff *skb;
+
+	while ((skb = skb_dequeue(&llc_main_station.ev_q.list)) != NULL)
+		llc_station_next_state(skb);
+}
+
+/**
+ *	llc_station_state_process: queue event and try to process queue.
+ *	@skb: Address of the event
+ *
+ *	Queues an event (on the station event queue) for handling by the
+ *	station state machine and attempts to process any queued-up events.
+ */
+static void llc_station_state_process(struct sk_buff *skb)
+{
+	spin_lock_bh(&llc_main_station.ev_q.lock);
+	skb_queue_tail(&llc_main_station.ev_q.list, skb);
+	llc_station_service_events();
+	spin_unlock_bh(&llc_main_station.ev_q.lock);
+}
+
+static void llc_station_ack_tmr_cb(unsigned long timeout_data)
+{
+	struct sk_buff *skb = alloc_skb(0, GFP_ATOMIC);
+
+	if (skb) {
+		struct llc_station_state_ev *ev = llc_station_ev(skb);
+
+		ev->type = LLC_STATION_EV_TYPE_ACK_TMR;
+		llc_station_state_process(skb);
+	}
+}
+
+/*
+ *	llc_station_rcv - send received pdu to the station state machine
+ *	@skb: received frame.
+ *
+ *	Sends data unit to station state machine.
+ */
+static void llc_station_rcv(struct sk_buff *skb)
+{
+	struct llc_station_state_ev *ev = llc_station_ev(skb);
+
+	ev->type   = LLC_STATION_EV_TYPE_PDU;
+	ev->reason = 0;
+	llc_station_state_process(skb);
+}
+
+int __init llc_station_init(void)
+{
+	u16 rc = -ENOBUFS;
+	struct sk_buff *skb;
+	struct llc_station_state_ev *ev;
+
+	skb_queue_head_init(&llc_main_station.mac_pdu_q);
+	skb_queue_head_init(&llc_main_station.ev_q.list);
+	spin_lock_init(&llc_main_station.ev_q.lock);
+	init_timer(&llc_main_station.ack_timer);
+	llc_main_station.ack_timer.data     = (unsigned long)&llc_main_station;
+	llc_main_station.ack_timer.function = llc_station_ack_tmr_cb;
+
+	skb = alloc_skb(0, GFP_ATOMIC);
+	if (!skb)
+		goto out;
+	rc = 0;
+	llc_set_station_handler(llc_station_rcv);
+	ev = llc_station_ev(skb);
+	memset(ev, 0, sizeof(*ev));
+	llc_main_station.ack_timer.expires = jiffies + 3 * HZ;
+	llc_main_station.maximum_retry	= 1;
+	llc_main_station.state		= LLC_STATION_STATE_DOWN;
+	ev->type	= LLC_STATION_EV_TYPE_SIMPLE;
+	ev->prim_type	= LLC_STATION_EV_ENABLE_WITHOUT_DUP_ADDR_CHECK;
+	rc = llc_station_next_state(skb);
+out:
+	return rc;
+}
+
+void __exit llc_station_exit(void)
+{
+	llc_set_station_handler(NULL);
+}
diff --git a/net/netlink/Makefile b/net/netlink/Makefile
new file mode 100644
index 00000000000..39d9c2dcd03
--- /dev/null
+++ b/net/netlink/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the netlink driver.
+#
+
+obj-y  				:= af_netlink.o
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
new file mode 100644
index 00000000000..1d5905c90cd
--- /dev/null
+++ b/net/netlink/af_netlink.c
@@ -0,0 +1,1454 @@
+/*
+ * NETLINK      Kernel-user communication protocol.
+ *
+ * 		Authors:	Alan Cox <alan@redhat.com>
+ * 				Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ * 
+ * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith
+ *                               added netlink_proto_exit
+ * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br>
+ * 				 use nlk_sk, as sk->protinfo is on a diet 8)
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/major.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/socket.h>
+#include <linux/un.h>
+#include <linux/fcntl.h>
+#include <linux/termios.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <asm/uaccess.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/smp_lock.h>
+#include <linux/notifier.h>
+#include <linux/security.h>
+#include <linux/jhash.h>
+#include <linux/jiffies.h>
+#include <linux/random.h>
+#include <linux/bitops.h>
+#include <linux/mm.h>
+#include <linux/types.h>
+#include <net/sock.h>
+#include <net/scm.h>
+
+#define Nprintk(a...)
+
+struct netlink_sock {
+	/* struct sock has to be the first member of netlink_sock */
+	struct sock		sk;
+	u32			pid;
+	unsigned int		groups;
+	u32			dst_pid;
+	unsigned int		dst_groups;
+	unsigned long		state;
+	wait_queue_head_t	wait;
+	struct netlink_callback	*cb;
+	spinlock_t		cb_lock;
+	void			(*data_ready)(struct sock *sk, int bytes);
+};
+
+static inline struct netlink_sock *nlk_sk(struct sock *sk)
+{
+	return (struct netlink_sock *)sk;
+}
+
+struct nl_pid_hash {
+	struct hlist_head *table;
+	unsigned long rehash_time;
+
+	unsigned int mask;
+	unsigned int shift;
+
+	unsigned int entries;
+	unsigned int max_shift;
+
+	u32 rnd;
+};
+
+struct netlink_table {
+	struct nl_pid_hash hash;
+	struct hlist_head mc_list;
+	unsigned int nl_nonroot;
+};
+
+static struct netlink_table *nl_table;
+
+static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
+
+static int netlink_dump(struct sock *sk);
+static void netlink_destroy_callback(struct netlink_callback *cb);
+
+static DEFINE_RWLOCK(nl_table_lock);
+static atomic_t nl_table_users = ATOMIC_INIT(0);
+
+static struct notifier_block *netlink_chain;
+
+static struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid)
+{
+	return &hash->table[jhash_1word(pid, hash->rnd) & hash->mask];
+}
+
+static void netlink_sock_destruct(struct sock *sk)
+{
+	skb_queue_purge(&sk->sk_receive_queue);
+
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		printk("Freeing alive netlink socket %p\n", sk);
+		return;
+	}
+	BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
+	BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
+	BUG_TRAP(!nlk_sk(sk)->cb);
+}
+
+/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on SMP.
+ * Look, when several writers sleep and reader wakes them up, all but one
+ * immediately hit write lock and grab all the cpus. Exclusive sleep solves
+ * this, _but_ remember, it adds useless work on UP machines.
+ */
+
+static void netlink_table_grab(void)
+{
+	write_lock_bh(&nl_table_lock);
+
+	if (atomic_read(&nl_table_users)) {
+		DECLARE_WAITQUEUE(wait, current);
+
+		add_wait_queue_exclusive(&nl_table_wait, &wait);
+		for(;;) {
+			set_current_state(TASK_UNINTERRUPTIBLE);
+			if (atomic_read(&nl_table_users) == 0)
+				break;
+			write_unlock_bh(&nl_table_lock);
+			schedule();
+			write_lock_bh(&nl_table_lock);
+		}
+
+		__set_current_state(TASK_RUNNING);
+		remove_wait_queue(&nl_table_wait, &wait);
+	}
+}
+
+static __inline__ void netlink_table_ungrab(void)
+{
+	write_unlock_bh(&nl_table_lock);
+	wake_up(&nl_table_wait);
+}
+
+static __inline__ void
+netlink_lock_table(void)
+{
+	/* read_lock() synchronizes us to netlink_table_grab */
+
+	read_lock(&nl_table_lock);
+	atomic_inc(&nl_table_users);
+	read_unlock(&nl_table_lock);
+}
+
+static __inline__ void
+netlink_unlock_table(void)
+{
+	if (atomic_dec_and_test(&nl_table_users))
+		wake_up(&nl_table_wait);
+}
+
+static __inline__ struct sock *netlink_lookup(int protocol, u32 pid)
+{
+	struct nl_pid_hash *hash = &nl_table[protocol].hash;
+	struct hlist_head *head;
+	struct sock *sk;
+	struct hlist_node *node;
+
+	read_lock(&nl_table_lock);
+	head = nl_pid_hashfn(hash, pid);
+	sk_for_each(sk, node, head) {
+		if (nlk_sk(sk)->pid == pid) {
+			sock_hold(sk);
+			goto found;
+		}
+	}
+	sk = NULL;
+found:
+	read_unlock(&nl_table_lock);
+	return sk;
+}
+
+static inline struct hlist_head *nl_pid_hash_alloc(size_t size)
+{
+	if (size <= PAGE_SIZE)
+		return kmalloc(size, GFP_ATOMIC);
+	else
+		return (struct hlist_head *)
+			__get_free_pages(GFP_ATOMIC, get_order(size));
+}
+
+static inline void nl_pid_hash_free(struct hlist_head *table, size_t size)
+{
+	if (size <= PAGE_SIZE)
+		kfree(table);
+	else
+		free_pages((unsigned long)table, get_order(size));
+}
+
+static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow)
+{
+	unsigned int omask, mask, shift;
+	size_t osize, size;
+	struct hlist_head *otable, *table;
+	int i;
+
+	omask = mask = hash->mask;
+	osize = size = (mask + 1) * sizeof(*table);
+	shift = hash->shift;
+
+	if (grow) {
+		if (++shift > hash->max_shift)
+			return 0;
+		mask = mask * 2 + 1;
+		size *= 2;
+	}
+
+	table = nl_pid_hash_alloc(size);
+	if (!table)
+		return 0;
+
+	memset(table, 0, size);
+	otable = hash->table;
+	hash->table = table;
+	hash->mask = mask;
+	hash->shift = shift;
+	get_random_bytes(&hash->rnd, sizeof(hash->rnd));
+
+	for (i = 0; i <= omask; i++) {
+		struct sock *sk;
+		struct hlist_node *node, *tmp;
+
+		sk_for_each_safe(sk, node, tmp, &otable[i])
+			__sk_add_node(sk, nl_pid_hashfn(hash, nlk_sk(sk)->pid));
+	}
+
+	nl_pid_hash_free(otable, osize);
+	hash->rehash_time = jiffies + 10 * 60 * HZ;
+	return 1;
+}
+
+static inline int nl_pid_hash_dilute(struct nl_pid_hash *hash, int len)
+{
+	int avg = hash->entries >> hash->shift;
+
+	if (unlikely(avg > 1) && nl_pid_hash_rehash(hash, 1))
+		return 1;
+
+	if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) {
+		nl_pid_hash_rehash(hash, 0);
+		return 1;
+	}
+
+	return 0;
+}
+
+static struct proto_ops netlink_ops;
+
+static int netlink_insert(struct sock *sk, u32 pid)
+{
+	struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash;
+	struct hlist_head *head;
+	int err = -EADDRINUSE;
+	struct sock *osk;
+	struct hlist_node *node;
+	int len;
+
+	netlink_table_grab();
+	head = nl_pid_hashfn(hash, pid);
+	len = 0;
+	sk_for_each(osk, node, head) {
+		if (nlk_sk(osk)->pid == pid)
+			break;
+		len++;
+	}
+	if (node)
+		goto err;
+
+	err = -EBUSY;
+	if (nlk_sk(sk)->pid)
+		goto err;
+
+	err = -ENOMEM;
+	if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX))
+		goto err;
+
+	if (len && nl_pid_hash_dilute(hash, len))
+		head = nl_pid_hashfn(hash, pid);
+	hash->entries++;
+	nlk_sk(sk)->pid = pid;
+	sk_add_node(sk, head);
+	err = 0;
+
+err:
+	netlink_table_ungrab();
+	return err;
+}
+
+static void netlink_remove(struct sock *sk)
+{
+	netlink_table_grab();
+	nl_table[sk->sk_protocol].hash.entries--;
+	sk_del_node_init(sk);
+	if (nlk_sk(sk)->groups)
+		__sk_del_bind_node(sk);
+	netlink_table_ungrab();
+}
+
+static struct proto netlink_proto = {
+	.name	  = "NETLINK",
+	.owner	  = THIS_MODULE,
+	.obj_size = sizeof(struct netlink_sock),
+};
+
+static int netlink_create(struct socket *sock, int protocol)
+{
+	struct sock *sk;
+	struct netlink_sock *nlk;
+
+	sock->state = SS_UNCONNECTED;
+
+	if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
+		return -ESOCKTNOSUPPORT;
+
+	if (protocol<0 || protocol >= MAX_LINKS)
+		return -EPROTONOSUPPORT;
+
+	sock->ops = &netlink_ops;
+
+	sk = sk_alloc(PF_NETLINK, GFP_KERNEL, &netlink_proto, 1);
+	if (!sk)
+		return -ENOMEM;
+
+	sock_init_data(sock, sk);
+
+	nlk = nlk_sk(sk);
+
+	spin_lock_init(&nlk->cb_lock);
+	init_waitqueue_head(&nlk->wait);
+	sk->sk_destruct = netlink_sock_destruct;
+
+	sk->sk_protocol = protocol;
+	return 0;
+}
+
+static int netlink_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	struct netlink_sock *nlk;
+
+	if (!sk)
+		return 0;
+
+	netlink_remove(sk);
+	nlk = nlk_sk(sk);
+
+	spin_lock(&nlk->cb_lock);
+	if (nlk->cb) {
+		nlk->cb->done(nlk->cb);
+		netlink_destroy_callback(nlk->cb);
+		nlk->cb = NULL;
+		__sock_put(sk);
+	}
+	spin_unlock(&nlk->cb_lock);
+
+	/* OK. Socket is unlinked, and, therefore,
+	   no new packets will arrive */
+
+	sock_orphan(sk);
+	sock->sk = NULL;
+	wake_up_interruptible_all(&nlk->wait);
+
+	skb_queue_purge(&sk->sk_write_queue);
+
+	if (nlk->pid && !nlk->groups) {
+		struct netlink_notify n = {
+						.protocol = sk->sk_protocol,
+						.pid = nlk->pid,
+					  };
+		notifier_call_chain(&netlink_chain, NETLINK_URELEASE, &n);
+	}	
+	
+	sock_put(sk);
+	return 0;
+}
+
+static int netlink_autobind(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash;
+	struct hlist_head *head;
+	struct sock *osk;
+	struct hlist_node *node;
+	s32 pid = current->pid;
+	int err;
+	static s32 rover = -4097;
+
+retry:
+	cond_resched();
+	netlink_table_grab();
+	head = nl_pid_hashfn(hash, pid);
+	sk_for_each(osk, node, head) {
+		if (nlk_sk(osk)->pid == pid) {
+			/* Bind collision, search negative pid values. */
+			pid = rover--;
+			if (rover > -4097)
+				rover = -4097;
+			netlink_table_ungrab();
+			goto retry;
+		}
+	}
+	netlink_table_ungrab();
+
+	err = netlink_insert(sk, pid);
+	if (err == -EADDRINUSE)
+		goto retry;
+	return 0;
+}
+
+static inline int netlink_capable(struct socket *sock, unsigned int flag) 
+{ 
+	return (nl_table[sock->sk->sk_protocol].nl_nonroot & flag) ||
+	       capable(CAP_NET_ADMIN);
+} 
+
+static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
+{
+	struct sock *sk = sock->sk;
+	struct netlink_sock *nlk = nlk_sk(sk);
+	struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
+	int err;
+	
+	if (nladdr->nl_family != AF_NETLINK)
+		return -EINVAL;
+
+	/* Only superuser is allowed to listen multicasts */
+	if (nladdr->nl_groups && !netlink_capable(sock, NL_NONROOT_RECV))
+		return -EPERM;
+
+	if (nlk->pid) {
+		if (nladdr->nl_pid != nlk->pid)
+			return -EINVAL;
+	} else {
+		err = nladdr->nl_pid ?
+			netlink_insert(sk, nladdr->nl_pid) :
+			netlink_autobind(sock);
+		if (err)
+			return err;
+	}
+
+	if (!nladdr->nl_groups && !nlk->groups)
+		return 0;
+
+	netlink_table_grab();
+	if (nlk->groups && !nladdr->nl_groups)
+		__sk_del_bind_node(sk);
+	else if (!nlk->groups && nladdr->nl_groups)
+		sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list);
+	nlk->groups = nladdr->nl_groups;
+	netlink_table_ungrab();
+
+	return 0;
+}
+
+static int netlink_connect(struct socket *sock, struct sockaddr *addr,
+			   int alen, int flags)
+{
+	int err = 0;
+	struct sock *sk = sock->sk;
+	struct netlink_sock *nlk = nlk_sk(sk);
+	struct sockaddr_nl *nladdr=(struct sockaddr_nl*)addr;
+
+	if (addr->sa_family == AF_UNSPEC) {
+		sk->sk_state	= NETLINK_UNCONNECTED;
+		nlk->dst_pid	= 0;
+		nlk->dst_groups = 0;
+		return 0;
+	}
+	if (addr->sa_family != AF_NETLINK)
+		return -EINVAL;
+
+	/* Only superuser is allowed to send multicasts */
+	if (nladdr->nl_groups && !netlink_capable(sock, NL_NONROOT_SEND))
+		return -EPERM;
+
+	if (!nlk->pid)
+		err = netlink_autobind(sock);
+
+	if (err == 0) {
+		sk->sk_state	= NETLINK_CONNECTED;
+		nlk->dst_pid 	= nladdr->nl_pid;
+		nlk->dst_groups = nladdr->nl_groups;
+	}
+
+	return err;
+}
+
+static int netlink_getname(struct socket *sock, struct sockaddr *addr, int *addr_len, int peer)
+{
+	struct sock *sk = sock->sk;
+	struct netlink_sock *nlk = nlk_sk(sk);
+	struct sockaddr_nl *nladdr=(struct sockaddr_nl *)addr;
+	
+	nladdr->nl_family = AF_NETLINK;
+	nladdr->nl_pad = 0;
+	*addr_len = sizeof(*nladdr);
+
+	if (peer) {
+		nladdr->nl_pid = nlk->dst_pid;
+		nladdr->nl_groups = nlk->dst_groups;
+	} else {
+		nladdr->nl_pid = nlk->pid;
+		nladdr->nl_groups = nlk->groups;
+	}
+	return 0;
+}
+
+static void netlink_overrun(struct sock *sk)
+{
+	if (!test_and_set_bit(0, &nlk_sk(sk)->state)) {
+		sk->sk_err = ENOBUFS;
+		sk->sk_error_report(sk);
+	}
+}
+
+static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid)
+{
+	int protocol = ssk->sk_protocol;
+	struct sock *sock;
+	struct netlink_sock *nlk;
+
+	sock = netlink_lookup(protocol, pid);
+	if (!sock)
+		return ERR_PTR(-ECONNREFUSED);
+
+	/* Don't bother queuing skb if kernel socket has no input function */
+	nlk = nlk_sk(sock);
+	if ((nlk->pid == 0 && !nlk->data_ready) ||
+	    (sock->sk_state == NETLINK_CONNECTED &&
+	     nlk->dst_pid != nlk_sk(ssk)->pid)) {
+		sock_put(sock);
+		return ERR_PTR(-ECONNREFUSED);
+	}
+	return sock;
+}
+
+struct sock *netlink_getsockbyfilp(struct file *filp)
+{
+	struct inode *inode = filp->f_dentry->d_inode;
+	struct sock *sock;
+
+	if (!S_ISSOCK(inode->i_mode))
+		return ERR_PTR(-ENOTSOCK);
+
+	sock = SOCKET_I(inode)->sk;
+	if (sock->sk_family != AF_NETLINK)
+		return ERR_PTR(-EINVAL);
+
+	sock_hold(sock);
+	return sock;
+}
+
+/*
+ * Attach a skb to a netlink socket.
+ * The caller must hold a reference to the destination socket. On error, the
+ * reference is dropped. The skb is not send to the destination, just all
+ * all error checks are performed and memory in the queue is reserved.
+ * Return values:
+ * < 0: error. skb freed, reference to sock dropped.
+ * 0: continue
+ * 1: repeat lookup - reference dropped while waiting for socket memory.
+ */
+int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long timeo)
+{
+	struct netlink_sock *nlk;
+
+	nlk = nlk_sk(sk);
+
+	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
+	    test_bit(0, &nlk->state)) {
+		DECLARE_WAITQUEUE(wait, current);
+		if (!timeo) {
+			if (!nlk->pid)
+				netlink_overrun(sk);
+			sock_put(sk);
+			kfree_skb(skb);
+			return -EAGAIN;
+		}
+
+		__set_current_state(TASK_INTERRUPTIBLE);
+		add_wait_queue(&nlk->wait, &wait);
+
+		if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
+		     test_bit(0, &nlk->state)) &&
+		    !sock_flag(sk, SOCK_DEAD))
+			timeo = schedule_timeout(timeo);
+
+		__set_current_state(TASK_RUNNING);
+		remove_wait_queue(&nlk->wait, &wait);
+		sock_put(sk);
+
+		if (signal_pending(current)) {
+			kfree_skb(skb);
+			return sock_intr_errno(timeo);
+		}
+		return 1;
+	}
+	skb_set_owner_r(skb, sk);
+	return 0;
+}
+
+int netlink_sendskb(struct sock *sk, struct sk_buff *skb, int protocol)
+{
+	struct netlink_sock *nlk;
+	int len = skb->len;
+
+	nlk = nlk_sk(sk);
+
+	skb_queue_tail(&sk->sk_receive_queue, skb);
+	sk->sk_data_ready(sk, len);
+	sock_put(sk);
+	return len;
+}
+
+void netlink_detachskb(struct sock *sk, struct sk_buff *skb)
+{
+	kfree_skb(skb);
+	sock_put(sk);
+}
+
+static inline struct sk_buff *netlink_trim(struct sk_buff *skb, int allocation)
+{
+	int delta;
+
+	skb_orphan(skb);
+
+	delta = skb->end - skb->tail;
+	if (delta * 2 < skb->truesize)
+		return skb;
+
+	if (skb_shared(skb)) {
+		struct sk_buff *nskb = skb_clone(skb, allocation);
+		if (!nskb)
+			return skb;
+		kfree_skb(skb);
+		skb = nskb;
+	}
+
+	if (!pskb_expand_head(skb, 0, -delta, allocation))
+		skb->truesize -= delta;
+
+	return skb;
+}
+
+int netlink_unicast(struct sock *ssk, struct sk_buff *skb, u32 pid, int nonblock)
+{
+	struct sock *sk;
+	int err;
+	long timeo;
+
+	skb = netlink_trim(skb, gfp_any());
+
+	timeo = sock_sndtimeo(ssk, nonblock);
+retry:
+	sk = netlink_getsockbypid(ssk, pid);
+	if (IS_ERR(sk)) {
+		kfree_skb(skb);
+		return PTR_ERR(sk);
+	}
+	err = netlink_attachskb(sk, skb, nonblock, timeo);
+	if (err == 1)
+		goto retry;
+	if (err)
+		return err;
+
+	return netlink_sendskb(sk, skb, ssk->sk_protocol);
+}
+
+static __inline__ int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
+{
+	struct netlink_sock *nlk = nlk_sk(sk);
+
+	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
+	    !test_bit(0, &nlk->state)) {
+		skb_set_owner_r(skb, sk);
+		skb_queue_tail(&sk->sk_receive_queue, skb);
+		sk->sk_data_ready(sk, skb->len);
+		return atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf;
+	}
+	return -1;
+}
+
+struct netlink_broadcast_data {
+	struct sock *exclude_sk;
+	u32 pid;
+	u32 group;
+	int failure;
+	int congested;
+	int delivered;
+	int allocation;
+	struct sk_buff *skb, *skb2;
+};
+
+static inline int do_one_broadcast(struct sock *sk,
+				   struct netlink_broadcast_data *p)
+{
+	struct netlink_sock *nlk = nlk_sk(sk);
+	int val;
+
+	if (p->exclude_sk == sk)
+		goto out;
+
+	if (nlk->pid == p->pid || !(nlk->groups & p->group))
+		goto out;
+
+	if (p->failure) {
+		netlink_overrun(sk);
+		goto out;
+	}
+
+	sock_hold(sk);
+	if (p->skb2 == NULL) {
+		if (atomic_read(&p->skb->users) != 1) {
+			p->skb2 = skb_clone(p->skb, p->allocation);
+		} else {
+			p->skb2 = p->skb;
+			atomic_inc(&p->skb->users);
+		}
+	}
+	if (p->skb2 == NULL) {
+		netlink_overrun(sk);
+		/* Clone failed. Notify ALL listeners. */
+		p->failure = 1;
+	} else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) {
+		netlink_overrun(sk);
+	} else {
+		p->congested |= val;
+		p->delivered = 1;
+		p->skb2 = NULL;
+	}
+	sock_put(sk);
+
+out:
+	return 0;
+}
+
+int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
+		      u32 group, int allocation)
+{
+	struct netlink_broadcast_data info;
+	struct hlist_node *node;
+	struct sock *sk;
+
+	skb = netlink_trim(skb, allocation);
+
+	info.exclude_sk = ssk;
+	info.pid = pid;
+	info.group = group;
+	info.failure = 0;
+	info.congested = 0;
+	info.delivered = 0;
+	info.allocation = allocation;
+	info.skb = skb;
+	info.skb2 = NULL;
+
+	/* While we sleep in clone, do not allow to change socket list */
+
+	netlink_lock_table();
+
+	sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
+		do_one_broadcast(sk, &info);
+
+	netlink_unlock_table();
+
+	if (info.skb2)
+		kfree_skb(info.skb2);
+	kfree_skb(skb);
+
+	if (info.delivered) {
+		if (info.congested && (allocation & __GFP_WAIT))
+			yield();
+		return 0;
+	}
+	if (info.failure)
+		return -ENOBUFS;
+	return -ESRCH;
+}
+
+struct netlink_set_err_data {
+	struct sock *exclude_sk;
+	u32 pid;
+	u32 group;
+	int code;
+};
+
+static inline int do_one_set_err(struct sock *sk,
+				 struct netlink_set_err_data *p)
+{
+	struct netlink_sock *nlk = nlk_sk(sk);
+
+	if (sk == p->exclude_sk)
+		goto out;
+
+	if (nlk->pid == p->pid || !(nlk->groups & p->group))
+		goto out;
+
+	sk->sk_err = p->code;
+	sk->sk_error_report(sk);
+out:
+	return 0;
+}
+
+void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
+{
+	struct netlink_set_err_data info;
+	struct hlist_node *node;
+	struct sock *sk;
+
+	info.exclude_sk = ssk;
+	info.pid = pid;
+	info.group = group;
+	info.code = code;
+
+	read_lock(&nl_table_lock);
+
+	sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
+		do_one_set_err(sk, &info);
+
+	read_unlock(&nl_table_lock);
+}
+
+static inline void netlink_rcv_wake(struct sock *sk)
+{
+	struct netlink_sock *nlk = nlk_sk(sk);
+
+	if (!skb_queue_len(&sk->sk_receive_queue))
+		clear_bit(0, &nlk->state);
+	if (!test_bit(0, &nlk->state))
+		wake_up_interruptible(&nlk->wait);
+}
+
+static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
+			   struct msghdr *msg, size_t len)
+{
+	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
+	struct sock *sk = sock->sk;
+	struct netlink_sock *nlk = nlk_sk(sk);
+	struct sockaddr_nl *addr=msg->msg_name;
+	u32 dst_pid;
+	u32 dst_groups;
+	struct sk_buff *skb;
+	int err;
+	struct scm_cookie scm;
+
+	if (msg->msg_flags&MSG_OOB)
+		return -EOPNOTSUPP;
+
+	if (NULL == siocb->scm)
+		siocb->scm = &scm;
+	err = scm_send(sock, msg, siocb->scm);
+	if (err < 0)
+		return err;
+
+	if (msg->msg_namelen) {
+		if (addr->nl_family != AF_NETLINK)
+			return -EINVAL;
+		dst_pid = addr->nl_pid;
+		dst_groups = addr->nl_groups;
+		if (dst_groups && !netlink_capable(sock, NL_NONROOT_SEND))
+			return -EPERM;
+	} else {
+		dst_pid = nlk->dst_pid;
+		dst_groups = nlk->dst_groups;
+	}
+
+	if (!nlk->pid) {
+		err = netlink_autobind(sock);
+		if (err)
+			goto out;
+	}
+
+	err = -EMSGSIZE;
+	if (len > sk->sk_sndbuf - 32)
+		goto out;
+	err = -ENOBUFS;
+	skb = alloc_skb(len, GFP_KERNEL);
+	if (skb==NULL)
+		goto out;
+
+	NETLINK_CB(skb).pid	= nlk->pid;
+	NETLINK_CB(skb).groups	= nlk->groups;
+	NETLINK_CB(skb).dst_pid = dst_pid;
+	NETLINK_CB(skb).dst_groups = dst_groups;
+	memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
+
+	/* What can I do? Netlink is asynchronous, so that
+	   we will have to save current capabilities to
+	   check them, when this message will be delivered
+	   to corresponding kernel module.   --ANK (980802)
+	 */
+
+	err = -EFAULT;
+	if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len)) {
+		kfree_skb(skb);
+		goto out;
+	}
+
+	err = security_netlink_send(sk, skb);
+	if (err) {
+		kfree_skb(skb);
+		goto out;
+	}
+
+	if (dst_groups) {
+		atomic_inc(&skb->users);
+		netlink_broadcast(sk, skb, dst_pid, dst_groups, GFP_KERNEL);
+	}
+	err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT);
+
+out:
+	return err;
+}
+
+static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
+			   struct msghdr *msg, size_t len,
+			   int flags)
+{
+	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
+	struct scm_cookie scm;
+	struct sock *sk = sock->sk;
+	struct netlink_sock *nlk = nlk_sk(sk);
+	int noblock = flags&MSG_DONTWAIT;
+	size_t copied;
+	struct sk_buff *skb;
+	int err;
+
+	if (flags&MSG_OOB)
+		return -EOPNOTSUPP;
+
+	copied = 0;
+
+	skb = skb_recv_datagram(sk,flags,noblock,&err);
+	if (skb==NULL)
+		goto out;
+
+	msg->msg_namelen = 0;
+
+	copied = skb->len;
+	if (len < copied) {
+		msg->msg_flags |= MSG_TRUNC;
+		copied = len;
+	}
+
+	skb->h.raw = skb->data;
+	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+
+	if (msg->msg_name) {
+		struct sockaddr_nl *addr = (struct sockaddr_nl*)msg->msg_name;
+		addr->nl_family = AF_NETLINK;
+		addr->nl_pad    = 0;
+		addr->nl_pid	= NETLINK_CB(skb).pid;
+		addr->nl_groups	= NETLINK_CB(skb).dst_groups;
+		msg->msg_namelen = sizeof(*addr);
+	}
+
+	if (NULL == siocb->scm) {
+		memset(&scm, 0, sizeof(scm));
+		siocb->scm = &scm;
+	}
+	siocb->scm->creds = *NETLINK_CREDS(skb);
+	skb_free_datagram(sk, skb);
+
+	if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2)
+		netlink_dump(sk);
+
+	scm_recv(sock, msg, siocb->scm, flags);
+
+out:
+	netlink_rcv_wake(sk);
+	return err ? : copied;
+}
+
+static void netlink_data_ready(struct sock *sk, int len)
+{
+	struct netlink_sock *nlk = nlk_sk(sk);
+
+	if (nlk->data_ready)
+		nlk->data_ready(sk, len);
+	netlink_rcv_wake(sk);
+}
+
+/*
+ *	We export these functions to other modules. They provide a 
+ *	complete set of kernel non-blocking support for message
+ *	queueing.
+ */
+
+struct sock *
+netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len))
+{
+	struct socket *sock;
+	struct sock *sk;
+
+	if (!nl_table)
+		return NULL;
+
+	if (unit<0 || unit>=MAX_LINKS)
+		return NULL;
+
+	if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
+		return NULL;
+
+	if (netlink_create(sock, unit) < 0) {
+		sock_release(sock);
+		return NULL;
+	}
+	sk = sock->sk;
+	sk->sk_data_ready = netlink_data_ready;
+	if (input)
+		nlk_sk(sk)->data_ready = input;
+
+	if (netlink_insert(sk, 0)) {
+		sock_release(sock);
+		return NULL;
+	}
+	return sk;
+}
+
+void netlink_set_nonroot(int protocol, unsigned int flags)
+{ 
+	if ((unsigned int)protocol < MAX_LINKS) 
+		nl_table[protocol].nl_nonroot = flags;
+} 
+
+static void netlink_destroy_callback(struct netlink_callback *cb)
+{
+	if (cb->skb)
+		kfree_skb(cb->skb);
+	kfree(cb);
+}
+
+/*
+ * It looks a bit ugly.
+ * It would be better to create kernel thread.
+ */
+
+static int netlink_dump(struct sock *sk)
+{
+	struct netlink_sock *nlk = nlk_sk(sk);
+	struct netlink_callback *cb;
+	struct sk_buff *skb;
+	struct nlmsghdr *nlh;
+	int len;
+	
+	skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+
+	spin_lock(&nlk->cb_lock);
+
+	cb = nlk->cb;
+	if (cb == NULL) {
+		spin_unlock(&nlk->cb_lock);
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	len = cb->dump(skb, cb);
+
+	if (len > 0) {
+		spin_unlock(&nlk->cb_lock);
+		skb_queue_tail(&sk->sk_receive_queue, skb);
+		sk->sk_data_ready(sk, len);
+		return 0;
+	}
+
+	nlh = __nlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, NLMSG_DONE, sizeof(int));
+	nlh->nlmsg_flags |= NLM_F_MULTI;
+	memcpy(NLMSG_DATA(nlh), &len, sizeof(len));
+	skb_queue_tail(&sk->sk_receive_queue, skb);
+	sk->sk_data_ready(sk, skb->len);
+
+	cb->done(cb);
+	nlk->cb = NULL;
+	spin_unlock(&nlk->cb_lock);
+
+	netlink_destroy_callback(cb);
+	__sock_put(sk);
+	return 0;
+}
+
+int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
+		       struct nlmsghdr *nlh,
+		       int (*dump)(struct sk_buff *skb, struct netlink_callback*),
+		       int (*done)(struct netlink_callback*))
+{
+	struct netlink_callback *cb;
+	struct sock *sk;
+	struct netlink_sock *nlk;
+
+	cb = kmalloc(sizeof(*cb), GFP_KERNEL);
+	if (cb == NULL)
+		return -ENOBUFS;
+
+	memset(cb, 0, sizeof(*cb));
+	cb->dump = dump;
+	cb->done = done;
+	cb->nlh = nlh;
+	atomic_inc(&skb->users);
+	cb->skb = skb;
+
+	sk = netlink_lookup(ssk->sk_protocol, NETLINK_CB(skb).pid);
+	if (sk == NULL) {
+		netlink_destroy_callback(cb);
+		return -ECONNREFUSED;
+	}
+	nlk = nlk_sk(sk);
+	/* A dump is in progress... */
+	spin_lock(&nlk->cb_lock);
+	if (nlk->cb) {
+		spin_unlock(&nlk->cb_lock);
+		netlink_destroy_callback(cb);
+		sock_put(sk);
+		return -EBUSY;
+	}
+	nlk->cb = cb;
+	sock_hold(sk);
+	spin_unlock(&nlk->cb_lock);
+
+	netlink_dump(sk);
+	sock_put(sk);
+	return 0;
+}
+
+void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
+{
+	struct sk_buff *skb;
+	struct nlmsghdr *rep;
+	struct nlmsgerr *errmsg;
+	int size;
+
+	if (err == 0)
+		size = NLMSG_SPACE(sizeof(struct nlmsgerr));
+	else
+		size = NLMSG_SPACE(4 + NLMSG_ALIGN(nlh->nlmsg_len));
+
+	skb = alloc_skb(size, GFP_KERNEL);
+	if (!skb) {
+		struct sock *sk;
+
+		sk = netlink_lookup(in_skb->sk->sk_protocol,
+				    NETLINK_CB(in_skb).pid);
+		if (sk) {
+			sk->sk_err = ENOBUFS;
+			sk->sk_error_report(sk);
+			sock_put(sk);
+		}
+		return;
+	}
+
+	rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
+			  NLMSG_ERROR, sizeof(struct nlmsgerr));
+	errmsg = NLMSG_DATA(rep);
+	errmsg->error = err;
+	memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(struct nlmsghdr));
+	netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
+}
+
+
+#ifdef CONFIG_PROC_FS
+struct nl_seq_iter {
+	int link;
+	int hash_idx;
+};
+
+static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
+{
+	struct nl_seq_iter *iter = seq->private;
+	int i, j;
+	struct sock *s;
+	struct hlist_node *node;
+	loff_t off = 0;
+
+	for (i=0; i<MAX_LINKS; i++) {
+		struct nl_pid_hash *hash = &nl_table[i].hash;
+
+		for (j = 0; j <= hash->mask; j++) {
+			sk_for_each(s, node, &hash->table[j]) {
+				if (off == pos) {
+					iter->link = i;
+					iter->hash_idx = j;
+					return s;
+				}
+				++off;
+			}
+		}
+	}
+	return NULL;
+}
+
+static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	read_lock(&nl_table_lock);
+	return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+}
+
+static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct sock *s;
+	struct nl_seq_iter *iter;
+	int i, j;
+
+	++*pos;
+
+	if (v == SEQ_START_TOKEN)
+		return netlink_seq_socket_idx(seq, 0);
+		
+	s = sk_next(v);
+	if (s)
+		return s;
+
+	iter = seq->private;
+	i = iter->link;
+	j = iter->hash_idx + 1;
+
+	do {
+		struct nl_pid_hash *hash = &nl_table[i].hash;
+
+		for (; j <= hash->mask; j++) {
+			s = sk_head(&hash->table[j]);
+			if (s) {
+				iter->link = i;
+				iter->hash_idx = j;
+				return s;
+			}
+		}
+
+		j = 0;
+	} while (++i < MAX_LINKS);
+
+	return NULL;
+}
+
+static void netlink_seq_stop(struct seq_file *seq, void *v)
+{
+	read_unlock(&nl_table_lock);
+}
+
+
+static int netlink_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq,
+			 "sk       Eth Pid    Groups   "
+			 "Rmem     Wmem     Dump     Locks\n");
+	else {
+		struct sock *s = v;
+		struct netlink_sock *nlk = nlk_sk(s);
+
+		seq_printf(seq, "%p %-3d %-6d %08x %-8d %-8d %p %d\n",
+			   s,
+			   s->sk_protocol,
+			   nlk->pid,
+			   nlk->groups,
+			   atomic_read(&s->sk_rmem_alloc),
+			   atomic_read(&s->sk_wmem_alloc),
+			   nlk->cb,
+			   atomic_read(&s->sk_refcnt)
+			);
+
+	}
+	return 0;
+}
+
+static struct seq_operations netlink_seq_ops = {
+	.start  = netlink_seq_start,
+	.next   = netlink_seq_next,
+	.stop   = netlink_seq_stop,
+	.show   = netlink_seq_show,
+};
+
+
+static int netlink_seq_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	struct nl_seq_iter *iter;
+	int err;
+
+	iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+	if (!iter)
+		return -ENOMEM;
+
+	err = seq_open(file, &netlink_seq_ops);
+	if (err) {
+		kfree(iter);
+		return err;
+	}
+
+	memset(iter, 0, sizeof(*iter));
+	seq = file->private_data;
+	seq->private = iter;
+	return 0;
+}
+
+static struct file_operations netlink_seq_fops = {
+	.owner		= THIS_MODULE,
+	.open		= netlink_seq_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release_private,
+};
+
+#endif
+
+int netlink_register_notifier(struct notifier_block *nb)
+{
+	return notifier_chain_register(&netlink_chain, nb);
+}
+
+int netlink_unregister_notifier(struct notifier_block *nb)
+{
+	return notifier_chain_unregister(&netlink_chain, nb);
+}
+                
+static struct proto_ops netlink_ops = {
+	.family =	PF_NETLINK,
+	.owner =	THIS_MODULE,
+	.release =	netlink_release,
+	.bind =		netlink_bind,
+	.connect =	netlink_connect,
+	.socketpair =	sock_no_socketpair,
+	.accept =	sock_no_accept,
+	.getname =	netlink_getname,
+	.poll =		datagram_poll,
+	.ioctl =	sock_no_ioctl,
+	.listen =	sock_no_listen,
+	.shutdown =	sock_no_shutdown,
+	.setsockopt =	sock_no_setsockopt,
+	.getsockopt =	sock_no_getsockopt,
+	.sendmsg =	netlink_sendmsg,
+	.recvmsg =	netlink_recvmsg,
+	.mmap =		sock_no_mmap,
+	.sendpage =	sock_no_sendpage,
+};
+
+static struct net_proto_family netlink_family_ops = {
+	.family = PF_NETLINK,
+	.create = netlink_create,
+	.owner	= THIS_MODULE,	/* for consistency 8) */
+};
+
+extern void netlink_skb_parms_too_large(void);
+
+static int __init netlink_proto_init(void)
+{
+	struct sk_buff *dummy_skb;
+	int i;
+	unsigned long max;
+	unsigned int order;
+	int err = proto_register(&netlink_proto, 0);
+
+	if (err != 0)
+		goto out;
+
+	if (sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb))
+		netlink_skb_parms_too_large();
+
+	nl_table = kmalloc(sizeof(*nl_table) * MAX_LINKS, GFP_KERNEL);
+	if (!nl_table) {
+enomem:
+		printk(KERN_CRIT "netlink_init: Cannot allocate nl_table\n");
+		return -ENOMEM;
+	}
+
+	memset(nl_table, 0, sizeof(*nl_table) * MAX_LINKS);
+
+	if (num_physpages >= (128 * 1024))
+		max = num_physpages >> (21 - PAGE_SHIFT);
+	else
+		max = num_physpages >> (23 - PAGE_SHIFT);
+
+	order = get_bitmask_order(max) - 1 + PAGE_SHIFT;
+	max = (1UL << order) / sizeof(struct hlist_head);
+	order = get_bitmask_order(max > UINT_MAX ? UINT_MAX : max) - 1;
+
+	for (i = 0; i < MAX_LINKS; i++) {
+		struct nl_pid_hash *hash = &nl_table[i].hash;
+
+		hash->table = nl_pid_hash_alloc(1 * sizeof(*hash->table));
+		if (!hash->table) {
+			while (i-- > 0)
+				nl_pid_hash_free(nl_table[i].hash.table,
+						 1 * sizeof(*hash->table));
+			kfree(nl_table);
+			goto enomem;
+		}
+		memset(hash->table, 0, 1 * sizeof(*hash->table));
+		hash->max_shift = order;
+		hash->shift = 0;
+		hash->mask = 0;
+		hash->rehash_time = jiffies;
+	}
+
+	sock_register(&netlink_family_ops);
+#ifdef CONFIG_PROC_FS
+	proc_net_fops_create("netlink", 0, &netlink_seq_fops);
+#endif
+	/* The netlink device handler may be needed early. */ 
+	rtnetlink_init();
+out:
+	return err;
+}
+
+static void __exit netlink_proto_exit(void)
+{
+	sock_unregister(PF_NETLINK);
+	proc_net_remove("netlink");
+	kfree(nl_table);
+	nl_table = NULL;
+	proto_unregister(&netlink_proto);
+}
+
+core_initcall(netlink_proto_init);
+module_exit(netlink_proto_exit);
+
+MODULE_LICENSE("GPL");
+
+MODULE_ALIAS_NETPROTO(PF_NETLINK);
+
+EXPORT_SYMBOL(netlink_ack);
+EXPORT_SYMBOL(netlink_broadcast);
+EXPORT_SYMBOL(netlink_dump_start);
+EXPORT_SYMBOL(netlink_kernel_create);
+EXPORT_SYMBOL(netlink_register_notifier);
+EXPORT_SYMBOL(netlink_set_err);
+EXPORT_SYMBOL(netlink_set_nonroot);
+EXPORT_SYMBOL(netlink_unicast);
+EXPORT_SYMBOL(netlink_unregister_notifier);
+
diff --git a/net/netrom/Makefile b/net/netrom/Makefile
new file mode 100644
index 00000000000..2660f5a1699
--- /dev/null
+++ b/net/netrom/Makefile
@@ -0,0 +1,9 @@
+#
+# Makefile for the Linux NET/ROM layer.
+#
+
+obj-$(CONFIG_NETROM) += netrom.o
+
+netrom-y		:= af_netrom.o nr_dev.o nr_in.o nr_loopback.o \
+			   nr_out.o nr_route.o nr_subr.o nr_timer.o
+netrom-$(CONFIG_SYSCTL)	+= sysctl_net_netrom.o
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
new file mode 100644
index 00000000000..31ed4a9a1d0
--- /dev/null
+++ b/net/netrom/af_netrom.c
@@ -0,0 +1,1485 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
+ * Copyright Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk)
+ * Copyright Darryl Miles G7LED (dlm@g7led.demon.co.uk)
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/stat.h>
+#include <net/ax25.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/fcntl.h>
+#include <linux/termios.h>	/* For TIOCINQ/OUTQ */
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <net/netrom.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <net/ip.h>
+#include <net/tcp.h>
+#include <net/arp.h>
+#include <linux/init.h>
+
+static int nr_ndevs = 4;
+
+int sysctl_netrom_default_path_quality            = NR_DEFAULT_QUAL;
+int sysctl_netrom_obsolescence_count_initialiser  = NR_DEFAULT_OBS;
+int sysctl_netrom_network_ttl_initialiser         = NR_DEFAULT_TTL;
+int sysctl_netrom_transport_timeout               = NR_DEFAULT_T1;
+int sysctl_netrom_transport_maximum_tries         = NR_DEFAULT_N2;
+int sysctl_netrom_transport_acknowledge_delay     = NR_DEFAULT_T2;
+int sysctl_netrom_transport_busy_delay            = NR_DEFAULT_T4;
+int sysctl_netrom_transport_requested_window_size = NR_DEFAULT_WINDOW;
+int sysctl_netrom_transport_no_activity_timeout   = NR_DEFAULT_IDLE;
+int sysctl_netrom_routing_control                 = NR_DEFAULT_ROUTING;
+int sysctl_netrom_link_fails_count                = NR_DEFAULT_FAILS;
+
+static unsigned short circuit = 0x101;
+
+static HLIST_HEAD(nr_list);
+static DEFINE_SPINLOCK(nr_list_lock);
+
+static struct proto_ops nr_proto_ops;
+
+/*
+ *	Socket removal during an interrupt is now safe.
+ */
+static void nr_remove_socket(struct sock *sk)
+{
+	spin_lock_bh(&nr_list_lock);
+	sk_del_node_init(sk);
+	spin_unlock_bh(&nr_list_lock);
+}
+
+/*
+ *	Kill all bound sockets on a dropped device.
+ */
+static void nr_kill_by_device(struct net_device *dev)
+{
+	struct sock *s;
+	struct hlist_node *node;
+
+	spin_lock_bh(&nr_list_lock);
+	sk_for_each(s, node, &nr_list)
+		if (nr_sk(s)->device == dev)
+			nr_disconnect(s, ENETUNREACH);
+	spin_unlock_bh(&nr_list_lock);
+}
+
+/*
+ *	Handle device status changes.
+ */
+static int nr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	struct net_device *dev = (struct net_device *)ptr;
+
+	if (event != NETDEV_DOWN)
+		return NOTIFY_DONE;
+
+	nr_kill_by_device(dev);
+	nr_rt_device_down(dev);
+	
+	return NOTIFY_DONE;
+}
+
+/*
+ *	Add a socket to the bound sockets list.
+ */
+static void nr_insert_socket(struct sock *sk)
+{
+	spin_lock_bh(&nr_list_lock);
+	sk_add_node(sk, &nr_list);
+	spin_unlock_bh(&nr_list_lock);
+}
+
+/*
+ *	Find a socket that wants to accept the Connect Request we just
+ *	received.
+ */
+static struct sock *nr_find_listener(ax25_address *addr)
+{
+	struct sock *s;
+	struct hlist_node *node;
+
+	spin_lock_bh(&nr_list_lock);
+	sk_for_each(s, node, &nr_list)
+		if (!ax25cmp(&nr_sk(s)->source_addr, addr) &&
+		    s->sk_state == TCP_LISTEN) {
+		    	bh_lock_sock(s);
+			goto found;
+		}
+	s = NULL;
+found:
+	spin_unlock_bh(&nr_list_lock);
+	return s;
+}
+
+/*
+ *	Find a connected NET/ROM socket given my circuit IDs.
+ */
+static struct sock *nr_find_socket(unsigned char index, unsigned char id)
+{
+	struct sock *s;
+	struct hlist_node *node;
+
+	spin_lock_bh(&nr_list_lock);
+	sk_for_each(s, node, &nr_list) {
+		struct nr_sock *nr = nr_sk(s);
+		
+		if (nr->my_index == index && nr->my_id == id) {
+			bh_lock_sock(s);
+			goto found;
+		}
+	}
+	s = NULL;
+found:
+	spin_unlock_bh(&nr_list_lock);
+	return s;
+}
+
+/*
+ *	Find a connected NET/ROM socket given their circuit IDs.
+ */
+static struct sock *nr_find_peer(unsigned char index, unsigned char id,
+	ax25_address *dest)
+{
+	struct sock *s;
+	struct hlist_node *node;
+
+	spin_lock_bh(&nr_list_lock);
+	sk_for_each(s, node, &nr_list) {
+		struct nr_sock *nr = nr_sk(s);
+		
+		if (nr->your_index == index && nr->your_id == id &&
+		    !ax25cmp(&nr->dest_addr, dest)) {
+		    	bh_lock_sock(s);
+			goto found;
+		}
+	}
+	s = NULL;
+found:
+	spin_unlock_bh(&nr_list_lock);
+	return s;
+}
+
+/*
+ *	Find next free circuit ID.
+ */
+static unsigned short nr_find_next_circuit(void)
+{
+	unsigned short id = circuit;
+	unsigned char i, j;
+	struct sock *sk;
+
+	for (;;) {
+		i = id / 256;
+		j = id % 256;
+
+		if (i != 0 && j != 0) {
+			if ((sk=nr_find_socket(i, j)) == NULL)
+				break;
+			bh_unlock_sock(sk);
+		}
+
+		id++;
+	}
+
+	return id;
+}
+
+/*
+ *	Deferred destroy.
+ */
+void nr_destroy_socket(struct sock *);
+
+/*
+ *	Handler for deferred kills.
+ */
+static void nr_destroy_timer(unsigned long data)
+{
+	struct sock *sk=(struct sock *)data;
+	bh_lock_sock(sk);
+	sock_hold(sk);
+	nr_destroy_socket(sk);
+	bh_unlock_sock(sk);
+	sock_put(sk);
+}
+
+/*
+ *	This is called from user mode and the timers. Thus it protects itself
+ *	against interrupt users but doesn't worry about being called during
+ *	work. Once it is removed from the queue no interrupt or bottom half
+ *	will touch it and we are (fairly 8-) ) safe.
+ */
+void nr_destroy_socket(struct sock *sk)
+{
+	struct sk_buff *skb;
+
+	nr_remove_socket(sk);
+
+	nr_stop_heartbeat(sk);
+	nr_stop_t1timer(sk);
+	nr_stop_t2timer(sk);
+	nr_stop_t4timer(sk);
+	nr_stop_idletimer(sk);
+
+	nr_clear_queues(sk);		/* Flush the queues */
+
+	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+		if (skb->sk != sk) { /* A pending connection */
+			/* Queue the unaccepted socket for death */
+			sock_set_flag(skb->sk, SOCK_DEAD);
+			nr_start_heartbeat(skb->sk);
+			nr_sk(skb->sk)->state = NR_STATE_0;
+		}
+
+		kfree_skb(skb);
+	}
+
+	if (atomic_read(&sk->sk_wmem_alloc) ||
+	    atomic_read(&sk->sk_rmem_alloc)) {
+		/* Defer: outstanding buffers */
+		sk->sk_timer.function = nr_destroy_timer;
+		sk->sk_timer.expires  = jiffies + 2 * HZ;
+		add_timer(&sk->sk_timer);
+	} else
+		sock_put(sk);
+}
+
+/*
+ *	Handling for system calls applied via the various interfaces to a
+ *	NET/ROM socket object.
+ */
+
+static int nr_setsockopt(struct socket *sock, int level, int optname,
+	char __user *optval, int optlen)
+{
+	struct sock *sk = sock->sk;
+	struct nr_sock *nr = nr_sk(sk);
+	int opt;
+
+	if (level != SOL_NETROM)
+		return -ENOPROTOOPT;
+
+	if (optlen < sizeof(int))
+		return -EINVAL;
+
+	if (get_user(opt, (int __user *)optval))
+		return -EFAULT;
+
+	switch (optname) {
+	case NETROM_T1:
+		if (opt < 1)
+			return -EINVAL;
+		nr->t1 = opt * HZ;
+		return 0;
+
+	case NETROM_T2:
+		if (opt < 1)
+			return -EINVAL;
+		nr->t2 = opt * HZ;
+		return 0;
+
+	case NETROM_N2:
+		if (opt < 1 || opt > 31)
+			return -EINVAL;
+		nr->n2 = opt;
+		return 0;
+
+	case NETROM_T4:
+		if (opt < 1)
+			return -EINVAL;
+		nr->t4 = opt * HZ;
+		return 0;
+
+	case NETROM_IDLE:
+		if (opt < 0)
+			return -EINVAL;
+		nr->idle = opt * 60 * HZ;
+		return 0;
+
+	default:
+		return -ENOPROTOOPT;
+	}
+}
+
+static int nr_getsockopt(struct socket *sock, int level, int optname,
+	char __user *optval, int __user *optlen)
+{
+	struct sock *sk = sock->sk;
+	struct nr_sock *nr = nr_sk(sk);
+	int val = 0;
+	int len; 
+
+	if (level != SOL_NETROM)
+		return -ENOPROTOOPT;
+	
+	if (get_user(len, optlen))
+		return -EFAULT;
+
+	if (len < 0)
+		return -EINVAL;
+		
+	switch (optname) {
+	case NETROM_T1:
+		val = nr->t1 / HZ;
+		break;
+
+	case NETROM_T2:
+		val = nr->t2 / HZ;
+		break;
+
+	case NETROM_N2:
+		val = nr->n2;
+		break;
+
+	case NETROM_T4:
+		val = nr->t4 / HZ;
+		break;
+
+	case NETROM_IDLE:
+		val = nr->idle / (60 * HZ);
+		break;
+
+	default:
+		return -ENOPROTOOPT;
+	}
+
+	len = min_t(unsigned int, len, sizeof(int));
+
+	if (put_user(len, optlen))
+		return -EFAULT;
+
+	return copy_to_user(optval, &val, len) ? -EFAULT : 0;
+}
+
+static int nr_listen(struct socket *sock, int backlog)
+{
+	struct sock *sk = sock->sk;
+
+	lock_sock(sk);
+	if (sk->sk_state != TCP_LISTEN) {
+		memset(&nr_sk(sk)->user_addr, 0, AX25_ADDR_LEN);
+		sk->sk_max_ack_backlog = backlog;
+		sk->sk_state           = TCP_LISTEN;
+		release_sock(sk);
+		return 0;
+	}
+	release_sock(sk);
+
+	return -EOPNOTSUPP;
+}
+
+static struct proto nr_proto = {
+	.name	  = "NETROM",
+	.owner	  = THIS_MODULE,
+	.obj_size = sizeof(struct nr_sock),
+};
+
+static int nr_create(struct socket *sock, int protocol)
+{
+	struct sock *sk;
+	struct nr_sock *nr;
+
+	if (sock->type != SOCK_SEQPACKET || protocol != 0)
+		return -ESOCKTNOSUPPORT;
+
+	if ((sk = sk_alloc(PF_NETROM, GFP_ATOMIC, &nr_proto, 1)) == NULL)
+		return -ENOMEM;
+
+	nr = nr_sk(sk);
+
+	sock_init_data(sock, sk);
+
+	sock->ops    = &nr_proto_ops;
+	sk->sk_protocol = protocol;
+
+	skb_queue_head_init(&nr->ack_queue);
+	skb_queue_head_init(&nr->reseq_queue);
+	skb_queue_head_init(&nr->frag_queue);
+
+	nr_init_timers(sk);
+
+	nr->t1     = sysctl_netrom_transport_timeout;
+	nr->t2     = sysctl_netrom_transport_acknowledge_delay;
+	nr->n2     = sysctl_netrom_transport_maximum_tries;
+	nr->t4     = sysctl_netrom_transport_busy_delay;
+	nr->idle   = sysctl_netrom_transport_no_activity_timeout;
+	nr->window = sysctl_netrom_transport_requested_window_size;
+
+	nr->bpqext = 1;
+	nr->state  = NR_STATE_0;
+
+	return 0;
+}
+
+static struct sock *nr_make_new(struct sock *osk)
+{
+	struct sock *sk;
+	struct nr_sock *nr, *onr;
+
+	if (osk->sk_type != SOCK_SEQPACKET)
+		return NULL;
+
+	if ((sk = sk_alloc(PF_NETROM, GFP_ATOMIC, osk->sk_prot, 1)) == NULL)
+		return NULL;
+
+	nr = nr_sk(sk);
+
+	sock_init_data(NULL, sk);
+
+	sk->sk_type     = osk->sk_type;
+	sk->sk_socket   = osk->sk_socket;
+	sk->sk_priority = osk->sk_priority;
+	sk->sk_protocol = osk->sk_protocol;
+	sk->sk_rcvbuf   = osk->sk_rcvbuf;
+	sk->sk_sndbuf   = osk->sk_sndbuf;
+	sk->sk_state    = TCP_ESTABLISHED;
+	sk->sk_sleep    = osk->sk_sleep;
+
+	if (sock_flag(osk, SOCK_ZAPPED))
+		sock_set_flag(sk, SOCK_ZAPPED);
+
+	if (sock_flag(osk, SOCK_DBG))
+		sock_set_flag(sk, SOCK_DBG);
+
+	skb_queue_head_init(&nr->ack_queue);
+	skb_queue_head_init(&nr->reseq_queue);
+	skb_queue_head_init(&nr->frag_queue);
+
+	nr_init_timers(sk);
+
+	onr = nr_sk(osk);
+
+	nr->t1      = onr->t1;
+	nr->t2      = onr->t2;
+	nr->n2      = onr->n2;
+	nr->t4      = onr->t4;
+	nr->idle    = onr->idle;
+	nr->window  = onr->window;
+
+	nr->device  = onr->device;
+	nr->bpqext  = onr->bpqext;
+
+	return sk;
+}
+
+static int nr_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	struct nr_sock *nr;
+
+	if (sk == NULL) return 0;
+
+	sock_hold(sk);
+	lock_sock(sk);
+	nr = nr_sk(sk);
+
+	switch (nr->state) {
+	case NR_STATE_0:
+	case NR_STATE_1:
+	case NR_STATE_2:
+		nr_disconnect(sk, 0);
+		nr_destroy_socket(sk);
+		break;
+
+	case NR_STATE_3:
+		nr_clear_queues(sk);
+		nr->n2count = 0;
+		nr_write_internal(sk, NR_DISCREQ);
+		nr_start_t1timer(sk);
+		nr_stop_t2timer(sk);
+		nr_stop_t4timer(sk);
+		nr_stop_idletimer(sk);
+		nr->state    = NR_STATE_2;
+		sk->sk_state    = TCP_CLOSE;
+		sk->sk_shutdown |= SEND_SHUTDOWN;
+		sk->sk_state_change(sk);
+		sock_orphan(sk);
+		sock_set_flag(sk, SOCK_DESTROY);
+		sk->sk_socket   = NULL;
+		break;
+
+	default:
+		sk->sk_socket = NULL;
+		break;
+	}
+
+	sock->sk   = NULL;	
+	release_sock(sk);
+	sock_put(sk);
+
+	return 0;
+}
+
+static int nr_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+{
+	struct sock *sk = sock->sk;
+	struct nr_sock *nr = nr_sk(sk);
+	struct full_sockaddr_ax25 *addr = (struct full_sockaddr_ax25 *)uaddr;
+	struct net_device *dev;
+	ax25_address *user, *source;
+
+	lock_sock(sk);
+	if (!sock_flag(sk, SOCK_ZAPPED)) {
+		release_sock(sk);
+		return -EINVAL;
+	}
+	if (addr_len < sizeof(struct sockaddr_ax25) || addr_len > sizeof(struct full_sockaddr_ax25)) {
+		release_sock(sk);
+		return -EINVAL;
+	}
+	if (addr_len < (addr->fsa_ax25.sax25_ndigis * sizeof(ax25_address) + sizeof(struct sockaddr_ax25))) {
+		release_sock(sk);
+		return -EINVAL;
+	}
+	if (addr->fsa_ax25.sax25_family != AF_NETROM) {
+		release_sock(sk);
+		return -EINVAL;
+	}
+	if ((dev = nr_dev_get(&addr->fsa_ax25.sax25_call)) == NULL) {
+		SOCK_DEBUG(sk, "NET/ROM: bind failed: invalid node callsign\n");
+		release_sock(sk);
+		return -EADDRNOTAVAIL;
+	}
+
+	/*
+	 * Only the super user can set an arbitrary user callsign.
+	 */
+	if (addr->fsa_ax25.sax25_ndigis == 1) {
+		if (!capable(CAP_NET_BIND_SERVICE)) {
+			dev_put(dev);
+			release_sock(sk);
+			return -EACCES;
+		}
+		nr->user_addr   = addr->fsa_digipeater[0];
+		nr->source_addr = addr->fsa_ax25.sax25_call;
+	} else {
+		source = &addr->fsa_ax25.sax25_call;
+
+		if ((user = ax25_findbyuid(current->euid)) == NULL) {
+			if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE)) {
+				release_sock(sk);
+				dev_put(dev);
+				return -EPERM;
+			}
+			user = source;
+		}
+
+		nr->user_addr   = *user;
+		nr->source_addr = *source;
+	}
+
+	nr->device = dev;
+	nr_insert_socket(sk);
+
+	sock_reset_flag(sk, SOCK_ZAPPED);
+	dev_put(dev);
+	release_sock(sk);
+	SOCK_DEBUG(sk, "NET/ROM: socket is bound\n");
+	return 0;
+}
+
+static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
+	int addr_len, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct nr_sock *nr = nr_sk(sk);
+	struct sockaddr_ax25 *addr = (struct sockaddr_ax25 *)uaddr;
+	ax25_address *user, *source = NULL;
+	struct net_device *dev;
+
+	lock_sock(sk);
+	if (sk->sk_state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) {
+		sock->state = SS_CONNECTED;
+		release_sock(sk);
+		return 0;	/* Connect completed during a ERESTARTSYS event */
+	}
+
+	if (sk->sk_state == TCP_CLOSE && sock->state == SS_CONNECTING) {
+		sock->state = SS_UNCONNECTED;
+		release_sock(sk);
+		return -ECONNREFUSED;
+	}
+
+	if (sk->sk_state == TCP_ESTABLISHED) {
+		release_sock(sk);
+		return -EISCONN;	/* No reconnect on a seqpacket socket */
+	}
+
+	sk->sk_state   = TCP_CLOSE;	
+	sock->state = SS_UNCONNECTED;
+
+	if (addr_len != sizeof(struct sockaddr_ax25) && addr_len != sizeof(struct full_sockaddr_ax25)) {
+		release_sock(sk);
+		return -EINVAL;
+	}
+	if (addr->sax25_family != AF_NETROM) {
+		release_sock(sk);
+		return -EINVAL;
+	}
+	if (sock_flag(sk, SOCK_ZAPPED)) {	/* Must bind first - autobinding in this may or may not work */
+		sock_reset_flag(sk, SOCK_ZAPPED);
+
+		if ((dev = nr_dev_first()) == NULL) {
+			release_sock(sk);
+			return -ENETUNREACH;
+		}
+		source = (ax25_address *)dev->dev_addr;
+
+		if ((user = ax25_findbyuid(current->euid)) == NULL) {
+			if (ax25_uid_policy && !capable(CAP_NET_ADMIN)) {
+				dev_put(dev);
+				release_sock(sk);
+				return -EPERM;
+			}
+			user = source;
+		}
+
+		nr->user_addr   = *user;
+		nr->source_addr = *source;
+		nr->device      = dev;
+
+		dev_put(dev);
+		nr_insert_socket(sk);		/* Finish the bind */
+	}
+
+	nr->dest_addr = addr->sax25_call;
+
+	release_sock(sk);
+	circuit = nr_find_next_circuit();
+	lock_sock(sk);
+
+	nr->my_index = circuit / 256;
+	nr->my_id    = circuit % 256;
+
+	circuit++;
+
+	/* Move to connecting socket, start sending Connect Requests */
+	sock->state  = SS_CONNECTING;
+	sk->sk_state = TCP_SYN_SENT;
+
+	nr_establish_data_link(sk);
+
+	nr->state = NR_STATE_1;
+
+	nr_start_heartbeat(sk);
+
+	/* Now the loop */
+	if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK)) {
+		release_sock(sk);
+		return -EINPROGRESS;
+	}
+		
+	/*
+	 * A Connect Ack with Choke or timeout or failed routing will go to
+	 * closed.
+	 */
+	if (sk->sk_state == TCP_SYN_SENT) {
+		struct task_struct *tsk = current;
+		DECLARE_WAITQUEUE(wait, tsk);
+
+		add_wait_queue(sk->sk_sleep, &wait);
+		for (;;) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			if (sk->sk_state != TCP_SYN_SENT)
+				break;
+			release_sock(sk);
+			if (!signal_pending(tsk)) {
+				schedule();
+				lock_sock(sk);
+				continue;
+			}
+			current->state = TASK_RUNNING;
+			remove_wait_queue(sk->sk_sleep, &wait);
+			return -ERESTARTSYS;
+		}
+		current->state = TASK_RUNNING;
+		remove_wait_queue(sk->sk_sleep, &wait);
+	}
+
+	if (sk->sk_state != TCP_ESTABLISHED) {
+		sock->state = SS_UNCONNECTED;
+		release_sock(sk);
+		return sock_error(sk);	/* Always set at this point */
+	}
+
+	sock->state = SS_CONNECTED;
+	release_sock(sk);
+
+	return 0;
+}
+
+static int nr_accept(struct socket *sock, struct socket *newsock, int flags)
+{
+	struct task_struct *tsk = current;
+	DECLARE_WAITQUEUE(wait, tsk);
+	struct sk_buff *skb;
+	struct sock *newsk;
+	struct sock *sk;
+	int err = 0;
+
+	if ((sk = sock->sk) == NULL)
+		return -EINVAL;
+
+	lock_sock(sk);
+	if (sk->sk_type != SOCK_SEQPACKET) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (sk->sk_state != TCP_LISTEN) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/*
+	 *	The write queue this time is holding sockets ready to use
+	 *	hooked into the SABM we saved
+	 */
+	add_wait_queue(sk->sk_sleep, &wait);
+	for (;;) {
+		skb = skb_dequeue(&sk->sk_receive_queue);
+		if (skb)
+			break;
+
+		current->state = TASK_INTERRUPTIBLE;
+		release_sock(sk);
+		if (flags & O_NONBLOCK) {
+			current->state = TASK_RUNNING;
+			remove_wait_queue(sk->sk_sleep, &wait);
+			return -EWOULDBLOCK;
+		}
+		if (!signal_pending(tsk)) {
+			schedule();
+			lock_sock(sk);
+			continue;
+		}
+		current->state = TASK_RUNNING;
+		remove_wait_queue(sk->sk_sleep, &wait);
+		return -ERESTARTSYS;
+	}
+	current->state = TASK_RUNNING;
+	remove_wait_queue(sk->sk_sleep, &wait);
+
+	newsk = skb->sk;
+	newsk->sk_socket = newsock;
+	newsk->sk_sleep = &newsock->wait;
+
+	/* Now attach up the new socket */
+	kfree_skb(skb);
+	sk->sk_ack_backlog--;
+	newsock->sk = newsk;
+
+out:
+	release_sock(sk);
+	return err;
+}
+
+static int nr_getname(struct socket *sock, struct sockaddr *uaddr,
+	int *uaddr_len, int peer)
+{
+	struct full_sockaddr_ax25 *sax = (struct full_sockaddr_ax25 *)uaddr;
+	struct sock *sk = sock->sk;
+	struct nr_sock *nr = nr_sk(sk);
+
+	lock_sock(sk);
+	if (peer != 0) {
+		if (sk->sk_state != TCP_ESTABLISHED) {
+			release_sock(sk);
+			return -ENOTCONN;
+		}
+		sax->fsa_ax25.sax25_family = AF_NETROM;
+		sax->fsa_ax25.sax25_ndigis = 1;
+		sax->fsa_ax25.sax25_call   = nr->user_addr;
+		sax->fsa_digipeater[0]     = nr->dest_addr;
+		*uaddr_len = sizeof(struct full_sockaddr_ax25);
+	} else {
+		sax->fsa_ax25.sax25_family = AF_NETROM;
+		sax->fsa_ax25.sax25_ndigis = 0;
+		sax->fsa_ax25.sax25_call   = nr->source_addr;
+		*uaddr_len = sizeof(struct sockaddr_ax25);
+	}
+	release_sock(sk);
+
+	return 0;
+}
+
+int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
+{
+	struct sock *sk;
+	struct sock *make;	
+	struct nr_sock *nr_make;
+	ax25_address *src, *dest, *user;
+	unsigned short circuit_index, circuit_id;
+	unsigned short peer_circuit_index, peer_circuit_id;
+	unsigned short frametype, flags, window, timeout;
+	int ret;
+
+	skb->sk = NULL;		/* Initially we don't know who it's for */
+
+	/*
+	 *	skb->data points to the netrom frame start
+	 */
+
+	src  = (ax25_address *)(skb->data + 0);
+	dest = (ax25_address *)(skb->data + 7);
+
+	circuit_index      = skb->data[15];
+	circuit_id         = skb->data[16];
+	peer_circuit_index = skb->data[17];
+	peer_circuit_id    = skb->data[18];
+	frametype          = skb->data[19] & 0x0F;
+	flags              = skb->data[19] & 0xF0;
+
+#ifdef CONFIG_INET
+	/*
+	 * Check for an incoming IP over NET/ROM frame.
+	 */
+	if (frametype == NR_PROTOEXT && circuit_index == NR_PROTO_IP && circuit_id == NR_PROTO_IP) {
+		skb_pull(skb, NR_NETWORK_LEN + NR_TRANSPORT_LEN);
+		skb->h.raw = skb->data;
+
+		return nr_rx_ip(skb, dev);
+	}
+#endif
+
+	/*
+	 * Find an existing socket connection, based on circuit ID, if it's
+	 * a Connect Request base it on their circuit ID.
+	 *
+	 * Circuit ID 0/0 is not valid but it could still be a "reset" for a
+	 * circuit that no longer exists at the other end ...
+	 */
+
+	sk = NULL;
+
+	if (circuit_index == 0 && circuit_id == 0) {
+		if (frametype == NR_CONNACK && flags == NR_CHOKE_FLAG)
+			sk = nr_find_peer(peer_circuit_index, peer_circuit_id, src);
+	} else {
+		if (frametype == NR_CONNREQ)
+			sk = nr_find_peer(circuit_index, circuit_id, src);
+		else
+			sk = nr_find_socket(circuit_index, circuit_id);
+	}
+
+	if (sk != NULL) {
+		skb->h.raw = skb->data;
+
+		if (frametype == NR_CONNACK && skb->len == 22)
+			nr_sk(sk)->bpqext = 1;
+		else
+			nr_sk(sk)->bpqext = 0;
+
+		ret = nr_process_rx_frame(sk, skb);
+		bh_unlock_sock(sk);
+		return ret;
+	}
+
+	/*
+	 * Now it should be a CONNREQ.
+	 */
+	if (frametype != NR_CONNREQ) {
+		/*
+		 * Here it would be nice to be able to send a reset but
+		 * NET/ROM doesn't have one. The following hack would
+		 * have been a way to extend the protocol but apparently
+		 * it kills BPQ boxes... :-(
+		 */
+#if 0
+		/*
+		 * Never reply to a CONNACK/CHOKE.
+		 */
+		if (frametype != NR_CONNACK || flags != NR_CHOKE_FLAG)
+			nr_transmit_refusal(skb, 1);
+#endif
+		return 0;
+	}
+
+	sk = nr_find_listener(dest);
+
+	user = (ax25_address *)(skb->data + 21);
+
+	if (sk == NULL || sk_acceptq_is_full(sk) ||
+	    (make = nr_make_new(sk)) == NULL) {
+		nr_transmit_refusal(skb, 0);
+		if (sk)
+			bh_unlock_sock(sk);
+		return 0;
+	}
+
+	window = skb->data[20];
+
+	skb->sk             = make;
+	make->sk_state	    = TCP_ESTABLISHED;
+
+	/* Fill in his circuit details */
+	nr_make = nr_sk(make);
+	nr_make->source_addr = *dest;
+	nr_make->dest_addr   = *src;
+	nr_make->user_addr   = *user;
+
+	nr_make->your_index  = circuit_index;
+	nr_make->your_id     = circuit_id;
+
+	bh_unlock_sock(sk);
+	circuit = nr_find_next_circuit();
+	bh_lock_sock(sk);
+
+	nr_make->my_index    = circuit / 256;
+	nr_make->my_id       = circuit % 256;
+
+	circuit++;
+
+	/* Window negotiation */
+	if (window < nr_make->window)
+		nr_make->window = window;
+
+	/* L4 timeout negotiation */
+	if (skb->len == 37) {
+		timeout = skb->data[36] * 256 + skb->data[35];
+		if (timeout * HZ < nr_make->t1)
+			nr_make->t1 = timeout * HZ;
+		nr_make->bpqext = 1;
+	} else {
+		nr_make->bpqext = 0;
+	}
+
+	nr_write_internal(make, NR_CONNACK);
+
+	nr_make->condition = 0x00;
+	nr_make->vs        = 0;
+	nr_make->va        = 0;
+	nr_make->vr        = 0;
+	nr_make->vl        = 0;
+	nr_make->state     = NR_STATE_3;
+	sk->sk_ack_backlog++;
+
+	nr_insert_socket(make);
+
+	skb_queue_head(&sk->sk_receive_queue, skb);
+
+	nr_start_heartbeat(make);
+	nr_start_idletimer(make);
+
+	if (!sock_flag(sk, SOCK_DEAD))
+		sk->sk_data_ready(sk, skb->len);
+
+	bh_unlock_sock(sk);
+	return 1;
+}
+
+static int nr_sendmsg(struct kiocb *iocb, struct socket *sock,
+		      struct msghdr *msg, size_t len)
+{
+	struct sock *sk = sock->sk;
+	struct nr_sock *nr = nr_sk(sk);
+	struct sockaddr_ax25 *usax = (struct sockaddr_ax25 *)msg->msg_name;
+	int err;
+	struct sockaddr_ax25 sax;
+	struct sk_buff *skb;
+	unsigned char *asmptr;
+	int size;
+
+	if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_CMSG_COMPAT))
+		return -EINVAL;
+
+	lock_sock(sk);
+	if (sock_flag(sk, SOCK_ZAPPED)) {
+		err = -EADDRNOTAVAIL;
+		goto out;
+	}
+
+	if (sk->sk_shutdown & SEND_SHUTDOWN) {
+		send_sig(SIGPIPE, current, 0);
+		err = -EPIPE;
+		goto out;
+	}
+
+	if (nr->device == NULL) {
+		err = -ENETUNREACH;
+		goto out;
+	}
+
+	if (usax) {
+		if (msg->msg_namelen < sizeof(sax)) {
+			err = -EINVAL;
+			goto out;
+		}
+		sax = *usax;
+		if (ax25cmp(&nr->dest_addr, &sax.sax25_call) != 0) {
+			err = -EISCONN;
+			goto out;
+		}
+		if (sax.sax25_family != AF_NETROM) {
+			err = -EINVAL;
+			goto out;
+		}
+	} else {
+		if (sk->sk_state != TCP_ESTABLISHED) {
+			err = -ENOTCONN;
+			goto out;
+		}
+		sax.sax25_family = AF_NETROM;
+		sax.sax25_call   = nr->dest_addr;
+	}
+
+	SOCK_DEBUG(sk, "NET/ROM: sendto: Addresses built.\n");
+
+	/* Build a packet */
+	SOCK_DEBUG(sk, "NET/ROM: sendto: building packet.\n");
+	size = len + NR_NETWORK_LEN + NR_TRANSPORT_LEN;
+
+	if ((skb = sock_alloc_send_skb(sk, size, msg->msg_flags & MSG_DONTWAIT, &err)) == NULL)
+		goto out;
+
+	skb_reserve(skb, size - len);
+
+	/*
+	 *	Push down the NET/ROM header
+	 */
+
+	asmptr = skb_push(skb, NR_TRANSPORT_LEN);
+	SOCK_DEBUG(sk, "Building NET/ROM Header.\n");
+
+	/* Build a NET/ROM Transport header */
+
+	*asmptr++ = nr->your_index;
+	*asmptr++ = nr->your_id;
+	*asmptr++ = 0;		/* To be filled in later */
+	*asmptr++ = 0;		/*      Ditto            */
+	*asmptr++ = NR_INFO;
+	SOCK_DEBUG(sk, "Built header.\n");
+
+	/*
+	 *	Put the data on the end
+	 */
+
+	skb->h.raw = skb_put(skb, len);
+
+	asmptr = skb->h.raw;
+	SOCK_DEBUG(sk, "NET/ROM: Appending user data\n");
+
+	/* User data follows immediately after the NET/ROM transport header */
+	if (memcpy_fromiovec(asmptr, msg->msg_iov, len)) {
+		kfree_skb(skb);
+		err = -EFAULT;
+		goto out;
+	}
+
+	SOCK_DEBUG(sk, "NET/ROM: Transmitting buffer\n");
+
+	if (sk->sk_state != TCP_ESTABLISHED) {
+		kfree_skb(skb);
+		err = -ENOTCONN;
+		goto out;
+	}
+
+	nr_output(sk, skb);	/* Shove it onto the queue */
+
+	err = len;
+out:
+	release_sock(sk);
+	return err;
+}
+
+static int nr_recvmsg(struct kiocb *iocb, struct socket *sock,
+		      struct msghdr *msg, size_t size, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct sockaddr_ax25 *sax = (struct sockaddr_ax25 *)msg->msg_name;
+	size_t copied;
+	struct sk_buff *skb;
+	int er;
+
+	/*
+	 * This works for seqpacket too. The receiver has ordered the queue for
+	 * us! We do one quick check first though
+	 */
+
+	lock_sock(sk);
+	if (sk->sk_state != TCP_ESTABLISHED) {
+		release_sock(sk);
+		return -ENOTCONN;
+	}
+
+	/* Now we can treat all alike */
+	if ((skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &er)) == NULL) {
+		release_sock(sk);
+		return er;
+	}
+
+	skb->h.raw = skb->data;
+	copied     = skb->len;
+
+	if (copied > size) {
+		copied = size;
+		msg->msg_flags |= MSG_TRUNC;
+	}
+
+	skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+
+	if (sax != NULL) {
+		sax->sax25_family = AF_NETROM;
+		memcpy(sax->sax25_call.ax25_call, skb->data + 7, AX25_ADDR_LEN);
+	}
+
+	msg->msg_namelen = sizeof(*sax);
+
+	skb_free_datagram(sk, skb);
+
+	release_sock(sk);
+	return copied;
+}
+
+
+static int nr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	struct sock *sk = sock->sk;
+	void __user *argp = (void __user *)arg;
+	int ret;
+
+	lock_sock(sk);
+	switch (cmd) {
+	case TIOCOUTQ: {
+		long amount;
+		amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc);
+		if (amount < 0)
+			amount = 0;
+		release_sock(sk);
+		return put_user(amount, (int __user *)argp);
+	}
+
+	case TIOCINQ: {
+		struct sk_buff *skb;
+		long amount = 0L;
+		/* These two are safe on a single CPU system as only user tasks fiddle here */
+		if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL)
+			amount = skb->len;
+		release_sock(sk);
+		return put_user(amount, (int __user *)argp);
+	}
+
+	case SIOCGSTAMP:
+		ret = -EINVAL;
+		if (sk != NULL)
+			ret = sock_get_timestamp(sk, argp);
+		release_sock(sk);
+		return ret;
+
+	case SIOCGIFADDR:
+	case SIOCSIFADDR:
+	case SIOCGIFDSTADDR:
+	case SIOCSIFDSTADDR:
+	case SIOCGIFBRDADDR:
+	case SIOCSIFBRDADDR:
+	case SIOCGIFNETMASK:
+	case SIOCSIFNETMASK:
+	case SIOCGIFMETRIC:
+	case SIOCSIFMETRIC:
+		release_sock(sk);
+		return -EINVAL;
+
+	case SIOCADDRT:
+	case SIOCDELRT:
+	case SIOCNRDECOBS:
+		release_sock(sk);
+		if (!capable(CAP_NET_ADMIN)) return -EPERM;
+		return nr_rt_ioctl(cmd, argp);
+
+	default:
+		release_sock(sk);
+		return dev_ioctl(cmd, argp);
+	}
+	release_sock(sk);
+
+	return 0;
+}
+
+#ifdef CONFIG_PROC_FS
+
+static void *nr_info_start(struct seq_file *seq, loff_t *pos)
+{
+	struct sock *s;
+	struct hlist_node *node;
+	int i = 1;
+
+	spin_lock_bh(&nr_list_lock);
+	if (*pos == 0)
+		return SEQ_START_TOKEN;
+
+	sk_for_each(s, node, &nr_list) {
+		if (i == *pos)
+			return s;
+		++i;
+	}
+	return NULL;
+}
+
+static void *nr_info_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	++*pos;
+
+	return (v == SEQ_START_TOKEN) ? sk_head(&nr_list) 
+		: sk_next((struct sock *)v);
+}
+	
+static void nr_info_stop(struct seq_file *seq, void *v)
+{
+	spin_unlock_bh(&nr_list_lock);
+}
+
+static int nr_info_show(struct seq_file *seq, void *v)
+{
+	struct sock *s = v;
+	struct net_device *dev;
+	struct nr_sock *nr;
+	const char *devname;
+
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq,
+"user_addr dest_node src_node  dev    my  your  st  vs  vr  va    t1     t2     t4      idle   n2  wnd Snd-Q Rcv-Q inode\n");
+
+	else {
+
+		bh_lock_sock(s);
+		nr = nr_sk(s);
+
+		if ((dev = nr->device) == NULL)
+			devname = "???";
+		else
+			devname = dev->name;
+
+		seq_printf(seq, "%-9s ", ax2asc(&nr->user_addr));
+		seq_printf(seq, "%-9s ", ax2asc(&nr->dest_addr));
+		seq_printf(seq, 
+"%-9s %-3s  %02X/%02X %02X/%02X %2d %3d %3d %3d %3lu/%03lu %2lu/%02lu %3lu/%03lu %3lu/%03lu %2d/%02d %3d %5d %5d %ld\n",
+			ax2asc(&nr->source_addr),
+			devname,
+			nr->my_index,
+			nr->my_id,
+			nr->your_index,
+			nr->your_id,
+			nr->state,
+			nr->vs,
+			nr->vr,
+			nr->va,
+			ax25_display_timer(&nr->t1timer) / HZ,
+			nr->t1 / HZ,
+			ax25_display_timer(&nr->t2timer) / HZ,
+			nr->t2 / HZ,
+			ax25_display_timer(&nr->t4timer) / HZ,
+			nr->t4 / HZ,
+			ax25_display_timer(&nr->idletimer) / (60 * HZ),
+			nr->idle / (60 * HZ),
+			nr->n2count,
+			nr->n2,
+			nr->window,
+			atomic_read(&s->sk_wmem_alloc),
+			atomic_read(&s->sk_rmem_alloc),
+			s->sk_socket ? SOCK_INODE(s->sk_socket)->i_ino : 0L);
+
+		bh_unlock_sock(s);
+	}
+	return 0;
+}
+
+static struct seq_operations nr_info_seqops = {
+	.start = nr_info_start,
+	.next = nr_info_next,
+	.stop = nr_info_stop,
+	.show = nr_info_show,
+};
+ 
+static int nr_info_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &nr_info_seqops);
+}
+ 
+static struct file_operations nr_info_fops = {
+	.owner = THIS_MODULE,
+	.open = nr_info_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+#endif	/* CONFIG_PROC_FS */
+
+static struct net_proto_family nr_family_ops = {
+	.family		=	PF_NETROM,
+	.create		=	nr_create,
+	.owner		=	THIS_MODULE,
+};
+
+static struct proto_ops nr_proto_ops = {
+	.family		=	PF_NETROM,
+	.owner		=	THIS_MODULE,
+	.release	=	nr_release,
+	.bind		=	nr_bind,
+	.connect	=	nr_connect,
+	.socketpair	=	sock_no_socketpair,
+	.accept		=	nr_accept,
+	.getname	=	nr_getname,
+	.poll		=	datagram_poll,
+	.ioctl		=	nr_ioctl,
+	.listen		=	nr_listen,
+	.shutdown	=	sock_no_shutdown,
+	.setsockopt	=	nr_setsockopt,
+	.getsockopt	=	nr_getsockopt,
+	.sendmsg	=	nr_sendmsg,
+	.recvmsg	=	nr_recvmsg,
+	.mmap		=	sock_no_mmap,
+	.sendpage	=	sock_no_sendpage,
+};
+
+static struct notifier_block nr_dev_notifier = {
+	.notifier_call	=	nr_device_event,
+};
+
+static struct net_device **dev_nr;
+
+static char banner[] __initdata = KERN_INFO "G4KLX NET/ROM for Linux. Version 0.7 for AX25.037 Linux 2.4\n";
+
+static int __init nr_proto_init(void)
+{
+	int i;
+	int rc = proto_register(&nr_proto, 0);
+
+	if (rc != 0)
+		goto out;
+
+	if (nr_ndevs > 0x7fffffff/sizeof(struct net_device *)) {
+		printk(KERN_ERR "NET/ROM: nr_proto_init - nr_ndevs parameter to large\n");
+		return -1;
+	}
+
+	dev_nr = kmalloc(nr_ndevs * sizeof(struct net_device *), GFP_KERNEL);
+	if (dev_nr == NULL) {
+		printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device array\n");
+		return -1;
+	}
+
+	memset(dev_nr, 0x00, nr_ndevs * sizeof(struct net_device *));
+
+	for (i = 0; i < nr_ndevs; i++) {
+		char name[IFNAMSIZ];
+		struct net_device *dev;
+
+		sprintf(name, "nr%d", i);
+		dev = alloc_netdev(sizeof(struct net_device_stats), name,
+					  nr_setup);
+		if (!dev) {
+			printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device structure\n");
+			goto fail;
+		}
+		
+		dev->base_addr = i;
+		if (register_netdev(dev)) {
+			printk(KERN_ERR "NET/ROM: nr_proto_init - unable to register network device\n");
+			free_netdev(dev);
+			goto fail;
+		}
+		dev_nr[i] = dev;
+	}
+
+	if (sock_register(&nr_family_ops)) {
+		printk(KERN_ERR "NET/ROM: nr_proto_init - unable to register socket family\n");
+		goto fail;
+	}
+		
+	register_netdevice_notifier(&nr_dev_notifier);
+	printk(banner);
+
+	ax25_protocol_register(AX25_P_NETROM, nr_route_frame);
+	ax25_linkfail_register(nr_link_failed);
+
+#ifdef CONFIG_SYSCTL
+	nr_register_sysctl();
+#endif
+
+	nr_loopback_init();
+
+	proc_net_fops_create("nr", S_IRUGO, &nr_info_fops);
+	proc_net_fops_create("nr_neigh", S_IRUGO, &nr_neigh_fops);
+	proc_net_fops_create("nr_nodes", S_IRUGO, &nr_nodes_fops);
+out:
+	return rc;
+fail:
+	while (--i >= 0) {
+		unregister_netdev(dev_nr[i]);
+		free_netdev(dev_nr[i]);
+	}
+	kfree(dev_nr);
+	proto_unregister(&nr_proto);
+	rc = -1;
+	goto out;
+}
+
+module_init(nr_proto_init);
+
+module_param(nr_ndevs, int, 0);
+MODULE_PARM_DESC(nr_ndevs, "number of NET/ROM devices");
+
+MODULE_AUTHOR("Jonathan Naylor G4KLX <g4klx@g4klx.demon.co.uk>");
+MODULE_DESCRIPTION("The amateur radio NET/ROM network and transport layer protocol");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETPROTO(PF_NETROM);
+
+static void __exit nr_exit(void)
+{
+	int i;
+
+	proc_net_remove("nr");
+	proc_net_remove("nr_neigh");
+	proc_net_remove("nr_nodes");
+	nr_loopback_clear();
+
+	nr_rt_free();
+
+#ifdef CONFIG_SYSCTL
+	nr_unregister_sysctl();
+#endif
+
+	ax25_linkfail_release(nr_link_failed);
+	ax25_protocol_release(AX25_P_NETROM);
+
+	unregister_netdevice_notifier(&nr_dev_notifier);
+
+	sock_unregister(PF_NETROM);
+
+	for (i = 0; i < nr_ndevs; i++) {
+		struct net_device *dev = dev_nr[i];
+		if (dev) {
+			unregister_netdev(dev);
+			free_netdev(dev);
+		}
+	}
+
+	kfree(dev_nr);
+	proto_unregister(&nr_proto);
+}
+module_exit(nr_exit);
diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c
new file mode 100644
index 00000000000..220bf7494f7
--- /dev/null
+++ b/net/netrom/nr_dev.c
@@ -0,0 +1,220 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <linux/sysctl.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>	/* For the statistics structure. */
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/if_arp.h>
+#include <linux/skbuff.h>
+
+#include <net/ip.h>
+#include <net/arp.h>
+
+#include <net/ax25.h>
+#include <net/netrom.h>
+
+#ifdef CONFIG_INET
+
+/*
+ *	Only allow IP over NET/ROM frames through if the netrom device is up.
+ */
+
+int nr_rx_ip(struct sk_buff *skb, struct net_device *dev)
+{
+	struct net_device_stats *stats = netdev_priv(dev);
+
+	if (!netif_running(dev)) {
+		stats->rx_errors++;
+		return 0;
+	}
+
+	stats->rx_packets++;
+	stats->rx_bytes += skb->len;
+
+	skb->protocol = htons(ETH_P_IP);
+
+	/* Spoof incoming device */
+	skb->dev      = dev;
+	skb->h.raw    = skb->data;
+	skb->nh.raw   = skb->data;
+	skb->pkt_type = PACKET_HOST;
+
+	ip_rcv(skb, skb->dev, NULL);
+
+	return 1;
+}
+
+
+static int nr_rebuild_header(struct sk_buff *skb)
+{
+	struct net_device *dev = skb->dev;
+	struct net_device_stats *stats = netdev_priv(dev);
+	struct sk_buff *skbn;
+	unsigned char *bp = skb->data;
+	int len;
+
+	if (arp_find(bp + 7, skb)) {
+		return 1;
+	}
+
+	bp[6] &= ~AX25_CBIT;
+	bp[6] &= ~AX25_EBIT;
+	bp[6] |= AX25_SSSID_SPARE;
+	bp    += AX25_ADDR_LEN;
+
+	bp[6] &= ~AX25_CBIT;
+	bp[6] |= AX25_EBIT;
+	bp[6] |= AX25_SSSID_SPARE;
+
+	if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL) {
+		kfree_skb(skb);
+		return 1;
+	}
+
+	if (skb->sk != NULL)
+		skb_set_owner_w(skbn, skb->sk);
+
+	kfree_skb(skb);
+
+	len = skbn->len;
+
+	if (!nr_route_frame(skbn, NULL)) {
+		kfree_skb(skbn);
+		stats->tx_errors++;
+	}
+
+	stats->tx_packets++;
+	stats->tx_bytes += len;
+
+	return 1;
+}
+
+#else
+
+static int nr_rebuild_header(struct sk_buff *skb)
+{
+	return 1;
+}
+
+#endif
+
+static int nr_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
+	void *daddr, void *saddr, unsigned len)
+{
+	unsigned char *buff = skb_push(skb, NR_NETWORK_LEN + NR_TRANSPORT_LEN);
+
+	memcpy(buff, (saddr != NULL) ? saddr : dev->dev_addr, dev->addr_len);
+	buff[6] &= ~AX25_CBIT;
+	buff[6] &= ~AX25_EBIT;
+	buff[6] |= AX25_SSSID_SPARE;
+	buff    += AX25_ADDR_LEN;
+
+	if (daddr != NULL)
+		memcpy(buff, daddr, dev->addr_len);
+	buff[6] &= ~AX25_CBIT;
+	buff[6] |= AX25_EBIT;
+	buff[6] |= AX25_SSSID_SPARE;
+	buff    += AX25_ADDR_LEN;
+
+	*buff++ = sysctl_netrom_network_ttl_initialiser;
+
+	*buff++ = NR_PROTO_IP;
+	*buff++ = NR_PROTO_IP;
+	*buff++ = 0;
+	*buff++ = 0;
+	*buff++ = NR_PROTOEXT;
+
+	if (daddr != NULL)
+		return 37;
+
+	return -37;
+}
+
+static int nr_set_mac_address(struct net_device *dev, void *addr)
+{
+	struct sockaddr *sa = addr;
+
+	if (dev->flags & IFF_UP)
+		ax25_listen_release((ax25_address *)dev->dev_addr, NULL);
+
+	memcpy(dev->dev_addr, sa->sa_data, dev->addr_len);
+
+	if (dev->flags & IFF_UP)
+		ax25_listen_register((ax25_address *)dev->dev_addr, NULL);
+
+	return 0;
+}
+
+static int nr_open(struct net_device *dev)
+{
+	netif_start_queue(dev);
+	ax25_listen_register((ax25_address *)dev->dev_addr, NULL);
+	return 0;
+}
+
+static int nr_close(struct net_device *dev)
+{
+	ax25_listen_release((ax25_address *)dev->dev_addr, NULL);
+	netif_stop_queue(dev);
+	return 0;
+}
+
+static int nr_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct net_device_stats *stats = netdev_priv(dev);
+	dev_kfree_skb(skb);
+	stats->tx_errors++;
+	return 0;
+}
+
+static struct net_device_stats *nr_get_stats(struct net_device *dev)
+{
+	return netdev_priv(dev);
+}
+
+void nr_setup(struct net_device *dev)
+{
+	SET_MODULE_OWNER(dev);
+	dev->mtu		= NR_MAX_PACKET_SIZE;
+	dev->hard_start_xmit	= nr_xmit;
+	dev->open		= nr_open;
+	dev->stop		= nr_close;
+
+	dev->hard_header	= nr_header;
+	dev->hard_header_len	= NR_NETWORK_LEN + NR_TRANSPORT_LEN;
+	dev->addr_len		= AX25_ADDR_LEN;
+	dev->type		= ARPHRD_NETROM;
+	dev->tx_queue_len	= 40;
+	dev->rebuild_header	= nr_rebuild_header;
+	dev->set_mac_address    = nr_set_mac_address;
+
+	/* New-style flags. */
+	dev->flags		= 0;
+
+	dev->get_stats 		= nr_get_stats;
+}
diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c
new file mode 100644
index 00000000000..9c44b379412
--- /dev/null
+++ b/net/netrom/nr_in.c
@@ -0,0 +1,290 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
+ * Copyright Darryl Miles G7LED (dlm@g7led.demon.co.uk)
+ */
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <net/ax25.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <net/ip.h>			/* For ip_rcv */
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <net/netrom.h>
+
+static int nr_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more)
+{
+	struct sk_buff *skbo, *skbn = skb;
+	struct nr_sock *nr = nr_sk(sk);
+
+	skb_pull(skb, NR_NETWORK_LEN + NR_TRANSPORT_LEN);
+
+	nr_start_idletimer(sk);
+
+	if (more) {
+		nr->fraglen += skb->len;
+		skb_queue_tail(&nr->frag_queue, skb);
+		return 0;
+	}
+
+	if (!more && nr->fraglen > 0) {	/* End of fragment */
+		nr->fraglen += skb->len;
+		skb_queue_tail(&nr->frag_queue, skb);
+
+		if ((skbn = alloc_skb(nr->fraglen, GFP_ATOMIC)) == NULL)
+			return 1;
+
+		skbn->h.raw = skbn->data;
+
+		while ((skbo = skb_dequeue(&nr->frag_queue)) != NULL) {
+			memcpy(skb_put(skbn, skbo->len), skbo->data, skbo->len);
+			kfree_skb(skbo);
+		}
+
+		nr->fraglen = 0;
+	}
+
+	return sock_queue_rcv_skb(sk, skbn);
+}
+
+/*
+ * State machine for state 1, Awaiting Connection State.
+ * The handling of the timer(s) is in file nr_timer.c.
+ * Handling of state 0 and connection release is in netrom.c.
+ */
+static int nr_state1_machine(struct sock *sk, struct sk_buff *skb,
+	int frametype)
+{
+	switch (frametype) {
+	case NR_CONNACK: {
+		struct nr_sock *nr = nr_sk(sk);
+
+		nr_stop_t1timer(sk);
+		nr_start_idletimer(sk);
+		nr->your_index = skb->data[17];
+		nr->your_id    = skb->data[18];
+		nr->vs	       = 0;
+		nr->va	       = 0;
+		nr->vr	       = 0;
+		nr->vl	       = 0;
+		nr->state      = NR_STATE_3;
+		nr->n2count    = 0;
+		nr->window     = skb->data[20];
+		sk->sk_state   = TCP_ESTABLISHED;
+		if (!sock_flag(sk, SOCK_DEAD))
+			sk->sk_state_change(sk);
+		break;
+	}
+
+	case NR_CONNACK | NR_CHOKE_FLAG:
+		nr_disconnect(sk, ECONNREFUSED);
+		break;
+
+	default:
+		break;
+	}
+	return 0;
+}
+
+/*
+ * State machine for state 2, Awaiting Release State.
+ * The handling of the timer(s) is in file nr_timer.c
+ * Handling of state 0 and connection release is in netrom.c.
+ */
+static int nr_state2_machine(struct sock *sk, struct sk_buff *skb,
+	int frametype)
+{
+	switch (frametype) {
+	case NR_CONNACK | NR_CHOKE_FLAG:
+		nr_disconnect(sk, ECONNRESET);
+		break;
+
+	case NR_DISCREQ:
+		nr_write_internal(sk, NR_DISCACK);
+
+	case NR_DISCACK:
+		nr_disconnect(sk, 0);
+		break;
+
+	default:
+		break;
+	}
+	return 0;
+}
+
+/*
+ * State machine for state 3, Connected State.
+ * The handling of the timer(s) is in file nr_timer.c
+ * Handling of state 0 and connection release is in netrom.c.
+ */
+static int nr_state3_machine(struct sock *sk, struct sk_buff *skb, int frametype)
+{
+	struct nr_sock *nrom = nr_sk(sk);
+	struct sk_buff_head temp_queue;
+	struct sk_buff *skbn;
+	unsigned short save_vr;
+	unsigned short nr, ns;
+	int queued = 0;
+
+	nr = skb->data[18];
+	ns = skb->data[17];
+
+	switch (frametype) {
+	case NR_CONNREQ:
+		nr_write_internal(sk, NR_CONNACK);
+		break;
+
+	case NR_DISCREQ:
+		nr_write_internal(sk, NR_DISCACK);
+		nr_disconnect(sk, 0);
+		break;
+
+	case NR_CONNACK | NR_CHOKE_FLAG:
+	case NR_DISCACK:
+		nr_disconnect(sk, ECONNRESET);
+		break;
+
+	case NR_INFOACK:
+	case NR_INFOACK | NR_CHOKE_FLAG:
+	case NR_INFOACK | NR_NAK_FLAG:
+	case NR_INFOACK | NR_NAK_FLAG | NR_CHOKE_FLAG:
+		if (frametype & NR_CHOKE_FLAG) {
+			nrom->condition |= NR_COND_PEER_RX_BUSY;
+			nr_start_t4timer(sk);
+		} else {
+			nrom->condition &= ~NR_COND_PEER_RX_BUSY;
+			nr_stop_t4timer(sk);
+		}
+		if (!nr_validate_nr(sk, nr)) {
+			break;
+		}
+		if (frametype & NR_NAK_FLAG) {
+			nr_frames_acked(sk, nr);
+			nr_send_nak_frame(sk);
+		} else {
+			if (nrom->condition & NR_COND_PEER_RX_BUSY) {
+				nr_frames_acked(sk, nr);
+			} else {
+				nr_check_iframes_acked(sk, nr);
+			}
+		}
+		break;
+
+	case NR_INFO:
+	case NR_INFO | NR_NAK_FLAG:
+	case NR_INFO | NR_CHOKE_FLAG:
+	case NR_INFO | NR_MORE_FLAG:
+	case NR_INFO | NR_NAK_FLAG | NR_CHOKE_FLAG:
+	case NR_INFO | NR_CHOKE_FLAG | NR_MORE_FLAG:
+	case NR_INFO | NR_NAK_FLAG | NR_MORE_FLAG:
+	case NR_INFO | NR_NAK_FLAG | NR_CHOKE_FLAG | NR_MORE_FLAG:
+		if (frametype & NR_CHOKE_FLAG) {
+			nrom->condition |= NR_COND_PEER_RX_BUSY;
+			nr_start_t4timer(sk);
+		} else {
+			nrom->condition &= ~NR_COND_PEER_RX_BUSY;
+			nr_stop_t4timer(sk);
+		}
+		if (nr_validate_nr(sk, nr)) {
+			if (frametype & NR_NAK_FLAG) {
+				nr_frames_acked(sk, nr);
+				nr_send_nak_frame(sk);
+			} else {
+				if (nrom->condition & NR_COND_PEER_RX_BUSY) {
+					nr_frames_acked(sk, nr);
+				} else {
+					nr_check_iframes_acked(sk, nr);
+				}
+			}
+		}
+		queued = 1;
+		skb_queue_head(&nrom->reseq_queue, skb);
+		if (nrom->condition & NR_COND_OWN_RX_BUSY)
+			break;
+		skb_queue_head_init(&temp_queue);
+		do {
+			save_vr = nrom->vr;
+			while ((skbn = skb_dequeue(&nrom->reseq_queue)) != NULL) {
+				ns = skbn->data[17];
+				if (ns == nrom->vr) {
+					if (nr_queue_rx_frame(sk, skbn, frametype & NR_MORE_FLAG) == 0) {
+						nrom->vr = (nrom->vr + 1) % NR_MODULUS;
+					} else {
+						nrom->condition |= NR_COND_OWN_RX_BUSY;
+						skb_queue_tail(&temp_queue, skbn);
+					}
+				} else if (nr_in_rx_window(sk, ns)) {
+					skb_queue_tail(&temp_queue, skbn);
+				} else {
+					kfree_skb(skbn);
+				}
+			}
+			while ((skbn = skb_dequeue(&temp_queue)) != NULL) {
+				skb_queue_tail(&nrom->reseq_queue, skbn);
+			}
+		} while (save_vr != nrom->vr);
+		/*
+		 * Window is full, ack it immediately.
+		 */
+		if (((nrom->vl + nrom->window) % NR_MODULUS) == nrom->vr) {
+			nr_enquiry_response(sk);
+		} else {
+			if (!(nrom->condition & NR_COND_ACK_PENDING)) {
+				nrom->condition |= NR_COND_ACK_PENDING;
+				nr_start_t2timer(sk);
+			}
+		}
+		break;
+
+	default:
+		break;
+	}
+	return queued;
+}
+
+/* Higher level upcall for a LAPB frame - called with sk locked */
+int nr_process_rx_frame(struct sock *sk, struct sk_buff *skb)
+{
+	struct nr_sock *nr = nr_sk(sk);
+	int queued = 0, frametype;
+
+	if (nr->state == NR_STATE_0)
+		return 0;
+
+	frametype = skb->data[19];
+
+	switch (nr->state) {
+	case NR_STATE_1:
+		queued = nr_state1_machine(sk, skb, frametype);
+		break;
+	case NR_STATE_2:
+		queued = nr_state2_machine(sk, skb, frametype);
+		break;
+	case NR_STATE_3:
+		queued = nr_state3_machine(sk, skb, frametype);
+		break;
+	}
+
+	nr_kick(sk);
+
+	return queued;
+}
diff --git a/net/netrom/nr_loopback.c b/net/netrom/nr_loopback.c
new file mode 100644
index 00000000000..165b2abce11
--- /dev/null
+++ b/net/netrom/nr_loopback.c
@@ -0,0 +1,76 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright Tomi Manninen OH2BNS (oh2bns@sral.fi)
+ */
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/timer.h>
+#include <net/ax25.h>
+#include <linux/skbuff.h>
+#include <net/netrom.h>
+#include <linux/init.h>
+
+static void nr_loopback_timer(unsigned long);
+
+static struct sk_buff_head loopback_queue;
+static struct timer_list loopback_timer = TIMER_INITIALIZER(nr_loopback_timer, 0, 0);
+
+void __init nr_loopback_init(void)
+{
+	skb_queue_head_init(&loopback_queue);
+}
+
+static inline int nr_loopback_running(void)
+{
+	return timer_pending(&loopback_timer);
+}
+
+int nr_loopback_queue(struct sk_buff *skb)
+{
+	struct sk_buff *skbn;
+
+	if ((skbn = alloc_skb(skb->len, GFP_ATOMIC)) != NULL) {
+		memcpy(skb_put(skbn, skb->len), skb->data, skb->len);
+		skbn->h.raw = skbn->data;
+
+		skb_queue_tail(&loopback_queue, skbn);
+
+		if (!nr_loopback_running())
+			mod_timer(&loopback_timer, jiffies + 10);
+	}
+
+	kfree_skb(skb);
+	return 1;
+}
+
+static void nr_loopback_timer(unsigned long param)
+{
+	struct sk_buff *skb;
+	ax25_address *nr_dest;
+	struct net_device *dev;
+
+	if ((skb = skb_dequeue(&loopback_queue)) != NULL) {
+		nr_dest = (ax25_address *)(skb->data + 7);
+
+		dev = nr_dev_get(nr_dest);
+
+		if (dev == NULL || nr_rx_frame(skb, dev) == 0)
+			kfree_skb(skb);
+
+		if (dev != NULL)
+			dev_put(dev);
+
+		if (!skb_queue_empty(&loopback_queue) && !nr_loopback_running())
+			mod_timer(&loopback_timer, jiffies + 10);
+	}
+}
+
+void __exit nr_loopback_clear(void)
+{
+	del_timer_sync(&loopback_timer);
+	skb_queue_purge(&loopback_queue);
+}
diff --git a/net/netrom/nr_out.c b/net/netrom/nr_out.c
new file mode 100644
index 00000000000..7939ded9c98
--- /dev/null
+++ b/net/netrom/nr_out.c
@@ -0,0 +1,274 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
+ * Copyright Darryl Miles G7LED (dlm@g7led.demon.co.uk)
+ */
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <net/ax25.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <net/netrom.h>
+
+/*
+ *	This is where all NET/ROM frames pass, except for IP-over-NET/ROM which
+ *	cannot be fragmented in this manner.
+ */
+void nr_output(struct sock *sk, struct sk_buff *skb)
+{
+	struct sk_buff *skbn;
+	unsigned char transport[NR_TRANSPORT_LEN];
+	int err, frontlen, len;
+
+	if (skb->len - NR_TRANSPORT_LEN > NR_MAX_PACKET_SIZE) {
+		/* Save a copy of the Transport Header */
+		memcpy(transport, skb->data, NR_TRANSPORT_LEN);
+		skb_pull(skb, NR_TRANSPORT_LEN);
+
+		frontlen = skb_headroom(skb);
+
+		while (skb->len > 0) {
+			if ((skbn = sock_alloc_send_skb(sk, frontlen + NR_MAX_PACKET_SIZE, 0, &err)) == NULL)
+				return;
+
+			skb_reserve(skbn, frontlen);
+
+			len = (NR_MAX_PACKET_SIZE > skb->len) ? skb->len : NR_MAX_PACKET_SIZE;
+
+			/* Copy the user data */
+			memcpy(skb_put(skbn, len), skb->data, len);
+			skb_pull(skb, len);
+
+			/* Duplicate the Transport Header */
+			skb_push(skbn, NR_TRANSPORT_LEN);
+			memcpy(skbn->data, transport, NR_TRANSPORT_LEN);
+
+			if (skb->len > 0)
+				skbn->data[4] |= NR_MORE_FLAG;
+
+			skb_queue_tail(&sk->sk_write_queue, skbn); /* Throw it on the queue */
+		}
+
+		kfree_skb(skb);
+	} else {
+		skb_queue_tail(&sk->sk_write_queue, skb);		/* Throw it on the queue */
+	}
+
+	nr_kick(sk);
+}
+
+/*
+ *	This procedure is passed a buffer descriptor for an iframe. It builds
+ *	the rest of the control part of the frame and then writes it out.
+ */
+static void nr_send_iframe(struct sock *sk, struct sk_buff *skb)
+{
+	struct nr_sock *nr = nr_sk(sk);
+
+	if (skb == NULL)
+		return;
+
+	skb->data[2] = nr->vs;
+	skb->data[3] = nr->vr;
+
+	if (nr->condition & NR_COND_OWN_RX_BUSY)
+		skb->data[4] |= NR_CHOKE_FLAG;
+
+	nr_start_idletimer(sk);
+
+	nr_transmit_buffer(sk, skb);
+}
+
+void nr_send_nak_frame(struct sock *sk)
+{
+	struct sk_buff *skb, *skbn;
+	struct nr_sock *nr = nr_sk(sk);
+
+	if ((skb = skb_peek(&nr->ack_queue)) == NULL)
+		return;
+
+	if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL)
+		return;
+
+	skbn->data[2] = nr->va;
+	skbn->data[3] = nr->vr;
+
+	if (nr->condition & NR_COND_OWN_RX_BUSY)
+		skbn->data[4] |= NR_CHOKE_FLAG;
+
+	nr_transmit_buffer(sk, skbn);
+
+	nr->condition &= ~NR_COND_ACK_PENDING;
+	nr->vl         = nr->vr;
+
+	nr_stop_t1timer(sk);
+}
+
+void nr_kick(struct sock *sk)
+{
+	struct nr_sock *nr = nr_sk(sk);
+	struct sk_buff *skb, *skbn;
+	unsigned short start, end;
+
+	if (nr->state != NR_STATE_3)
+		return;
+
+	if (nr->condition & NR_COND_PEER_RX_BUSY)
+		return;
+
+	if (!skb_peek(&sk->sk_write_queue))
+		return;
+
+	start = (skb_peek(&nr->ack_queue) == NULL) ? nr->va : nr->vs;
+	end   = (nr->va + nr->window) % NR_MODULUS;
+
+	if (start == end)
+		return;
+
+	nr->vs = start;
+
+	/*
+	 * Transmit data until either we're out of data to send or
+	 * the window is full.
+	 */
+
+	/*
+	 * Dequeue the frame and copy it.
+	 */
+	skb = skb_dequeue(&sk->sk_write_queue);
+
+	do {
+		if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL) {
+			skb_queue_head(&sk->sk_write_queue, skb);
+			break;
+		}
+
+		skb_set_owner_w(skbn, sk);
+
+		/*
+		 * Transmit the frame copy.
+		 */
+		nr_send_iframe(sk, skbn);
+
+		nr->vs = (nr->vs + 1) % NR_MODULUS;
+
+		/*
+		 * Requeue the original data frame.
+		 */
+		skb_queue_tail(&nr->ack_queue, skb);
+
+	} while (nr->vs != end &&
+		 (skb = skb_dequeue(&sk->sk_write_queue)) != NULL);
+
+	nr->vl         = nr->vr;
+	nr->condition &= ~NR_COND_ACK_PENDING;
+
+	if (!nr_t1timer_running(sk))
+		nr_start_t1timer(sk);
+}
+
+void nr_transmit_buffer(struct sock *sk, struct sk_buff *skb)
+{
+	struct nr_sock *nr = nr_sk(sk);
+	unsigned char *dptr;
+
+	/*
+	 *	Add the protocol byte and network header.
+	 */
+	dptr = skb_push(skb, NR_NETWORK_LEN);
+
+	memcpy(dptr, &nr->source_addr, AX25_ADDR_LEN);
+	dptr[6] &= ~AX25_CBIT;
+	dptr[6] &= ~AX25_EBIT;
+	dptr[6] |= AX25_SSSID_SPARE;
+	dptr += AX25_ADDR_LEN;
+
+	memcpy(dptr, &nr->dest_addr, AX25_ADDR_LEN);
+	dptr[6] &= ~AX25_CBIT;
+	dptr[6] |= AX25_EBIT;
+	dptr[6] |= AX25_SSSID_SPARE;
+	dptr += AX25_ADDR_LEN;
+
+	*dptr++ = sysctl_netrom_network_ttl_initialiser;
+
+	if (!nr_route_frame(skb, NULL)) {
+		kfree_skb(skb);
+		nr_disconnect(sk, ENETUNREACH);
+	}
+}
+
+/*
+ * The following routines are taken from page 170 of the 7th ARRL Computer
+ * Networking Conference paper, as is the whole state machine.
+ */
+
+void nr_establish_data_link(struct sock *sk)
+{
+	struct nr_sock *nr = nr_sk(sk);
+
+	nr->condition = 0x00;
+	nr->n2count   = 0;
+
+	nr_write_internal(sk, NR_CONNREQ);
+
+	nr_stop_t2timer(sk);
+	nr_stop_t4timer(sk);
+	nr_stop_idletimer(sk);
+	nr_start_t1timer(sk);
+}
+
+/*
+ * Never send a NAK when we are CHOKEd.
+ */
+void nr_enquiry_response(struct sock *sk)
+{
+	struct nr_sock *nr = nr_sk(sk);
+	int frametype = NR_INFOACK;
+
+	if (nr->condition & NR_COND_OWN_RX_BUSY) {
+		frametype |= NR_CHOKE_FLAG;
+	} else {
+		if (skb_peek(&nr->reseq_queue) != NULL)
+			frametype |= NR_NAK_FLAG;
+	}
+
+	nr_write_internal(sk, frametype);
+
+	nr->vl         = nr->vr;
+	nr->condition &= ~NR_COND_ACK_PENDING;
+}
+
+void nr_check_iframes_acked(struct sock *sk, unsigned short nr)
+{
+	struct nr_sock *nrom = nr_sk(sk);
+
+	if (nrom->vs == nr) {
+		nr_frames_acked(sk, nr);
+		nr_stop_t1timer(sk);
+		nrom->n2count = 0;
+	} else {
+		if (nrom->va != nr) {
+			nr_frames_acked(sk, nr);
+			nr_start_t1timer(sk);
+		}
+	}
+}
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
new file mode 100644
index 00000000000..7a86b36cba5
--- /dev/null
+++ b/net/netrom/nr_route.c
@@ -0,0 +1,1041 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
+ * Copyright Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk)
+ * Copyright Tomi Manninen OH2BNS (oh2bns@sral.fi)
+ */
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <net/ax25.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <net/arp.h>
+#include <linux/if_arp.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/fcntl.h>
+#include <linux/termios.h>	/* For TIOCINQ/OUTQ */
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <linux/netfilter.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <net/netrom.h>
+#include <linux/seq_file.h>
+
+static unsigned int nr_neigh_no = 1;
+
+static HLIST_HEAD(nr_node_list);
+static DEFINE_SPINLOCK(nr_node_list_lock);
+static HLIST_HEAD(nr_neigh_list);
+static DEFINE_SPINLOCK(nr_neigh_list_lock);
+
+static struct nr_node *nr_node_get(ax25_address *callsign)
+{
+	struct nr_node *found = NULL;
+	struct nr_node *nr_node;
+	struct hlist_node *node;
+
+	spin_lock_bh(&nr_node_list_lock);
+	nr_node_for_each(nr_node, node, &nr_node_list)
+		if (ax25cmp(callsign, &nr_node->callsign) == 0) {
+			nr_node_hold(nr_node);
+			found = nr_node;
+			break;
+		}
+	spin_unlock_bh(&nr_node_list_lock);
+	return found;
+}
+
+static struct nr_neigh *nr_neigh_get_dev(ax25_address *callsign,
+					 struct net_device *dev)
+{
+	struct nr_neigh *found = NULL;
+	struct nr_neigh *nr_neigh;
+	struct hlist_node *node;
+
+	spin_lock_bh(&nr_neigh_list_lock);
+	nr_neigh_for_each(nr_neigh, node, &nr_neigh_list)
+		if (ax25cmp(callsign, &nr_neigh->callsign) == 0 &&
+		    nr_neigh->dev == dev) {
+			nr_neigh_hold(nr_neigh);
+			found = nr_neigh;
+			break;
+		}
+	spin_unlock_bh(&nr_neigh_list_lock);
+	return found;
+}
+
+static void nr_remove_neigh(struct nr_neigh *);
+
+/*
+ *	Add a new route to a node, and in the process add the node and the
+ *	neighbour if it is new.
+ */
+static int nr_add_node(ax25_address *nr, const char *mnemonic, ax25_address *ax25,
+	ax25_digi *ax25_digi, struct net_device *dev, int quality, int obs_count)
+{
+	struct nr_node  *nr_node;
+	struct nr_neigh *nr_neigh;
+	struct nr_route nr_route;
+	int i, found;
+	struct net_device *odev;
+
+	if ((odev=nr_dev_get(nr)) != NULL) {	/* Can't add routes to ourself */
+		dev_put(odev);
+		return -EINVAL;
+	}
+
+	nr_node = nr_node_get(nr);
+
+	nr_neigh = nr_neigh_get_dev(ax25, dev);
+
+	/*
+	 * The L2 link to a neighbour has failed in the past
+	 * and now a frame comes from this neighbour. We assume
+	 * it was a temporary trouble with the link and reset the
+	 * routes now (and not wait for a node broadcast).
+	 */
+	if (nr_neigh != NULL && nr_neigh->failed != 0 && quality == 0) {
+		struct nr_node *nr_nodet;
+		struct hlist_node *node;
+
+		spin_lock_bh(&nr_node_list_lock);
+		nr_node_for_each(nr_nodet, node, &nr_node_list) {
+			nr_node_lock(nr_nodet);
+			for (i = 0; i < nr_nodet->count; i++)
+				if (nr_nodet->routes[i].neighbour == nr_neigh)
+					if (i < nr_nodet->which)
+						nr_nodet->which = i;
+			nr_node_unlock(nr_nodet);
+		}
+		spin_unlock_bh(&nr_node_list_lock);
+	}
+
+	if (nr_neigh != NULL)
+		nr_neigh->failed = 0;
+
+	if (quality == 0 && nr_neigh != NULL && nr_node != NULL) {
+		nr_neigh_put(nr_neigh);
+		nr_node_put(nr_node);
+		return 0;
+	}
+
+	if (nr_neigh == NULL) {
+		if ((nr_neigh = kmalloc(sizeof(*nr_neigh), GFP_ATOMIC)) == NULL) {
+			if (nr_node)
+				nr_node_put(nr_node);
+			return -ENOMEM;
+		}
+
+		nr_neigh->callsign = *ax25;
+		nr_neigh->digipeat = NULL;
+		nr_neigh->ax25     = NULL;
+		nr_neigh->dev      = dev;
+		nr_neigh->quality  = sysctl_netrom_default_path_quality;
+		nr_neigh->locked   = 0;
+		nr_neigh->count    = 0;
+		nr_neigh->number   = nr_neigh_no++;
+		nr_neigh->failed   = 0;
+		atomic_set(&nr_neigh->refcount, 1);
+
+		if (ax25_digi != NULL && ax25_digi->ndigi > 0) {
+			if ((nr_neigh->digipeat = kmalloc(sizeof(*ax25_digi), GFP_KERNEL)) == NULL) {
+				kfree(nr_neigh);
+				if (nr_node)
+					nr_node_put(nr_node);
+				return -ENOMEM;
+			}
+			memcpy(nr_neigh->digipeat, ax25_digi,
+					sizeof(*ax25_digi));
+		}
+
+		spin_lock_bh(&nr_neigh_list_lock);
+		hlist_add_head(&nr_neigh->neigh_node, &nr_neigh_list);
+		nr_neigh_hold(nr_neigh);
+		spin_unlock_bh(&nr_neigh_list_lock);
+	}
+
+	if (quality != 0 && ax25cmp(nr, ax25) == 0 && !nr_neigh->locked)
+		nr_neigh->quality = quality;
+
+	if (nr_node == NULL) {
+		if ((nr_node = kmalloc(sizeof(*nr_node), GFP_ATOMIC)) == NULL) {
+			if (nr_neigh)
+				nr_neigh_put(nr_neigh);
+			return -ENOMEM;
+		}
+
+		nr_node->callsign = *nr;
+		strcpy(nr_node->mnemonic, mnemonic);
+
+		nr_node->which = 0;
+		nr_node->count = 1;
+		atomic_set(&nr_node->refcount, 1);
+		spin_lock_init(&nr_node->node_lock);
+
+		nr_node->routes[0].quality   = quality;
+		nr_node->routes[0].obs_count = obs_count;
+		nr_node->routes[0].neighbour = nr_neigh;
+
+		nr_neigh_hold(nr_neigh);
+		nr_neigh->count++;
+
+		spin_lock_bh(&nr_node_list_lock);
+		hlist_add_head(&nr_node->node_node, &nr_node_list);
+		/* refcount initialized at 1 */
+		spin_unlock_bh(&nr_node_list_lock);
+
+		return 0;
+	}
+	nr_node_lock(nr_node);
+
+	if (quality != 0)
+		strcpy(nr_node->mnemonic, mnemonic);
+
+	for (found = 0, i = 0; i < nr_node->count; i++) {
+		if (nr_node->routes[i].neighbour == nr_neigh) {
+			nr_node->routes[i].quality   = quality;
+			nr_node->routes[i].obs_count = obs_count;
+			found = 1;
+			break;
+		}
+	}
+
+	if (!found) {
+		/* We have space at the bottom, slot it in */
+		if (nr_node->count < 3) {
+			nr_node->routes[2] = nr_node->routes[1];
+			nr_node->routes[1] = nr_node->routes[0];
+
+			nr_node->routes[0].quality   = quality;
+			nr_node->routes[0].obs_count = obs_count;
+			nr_node->routes[0].neighbour = nr_neigh;
+
+			nr_node->which++;
+			nr_node->count++;
+			nr_neigh_hold(nr_neigh);
+			nr_neigh->count++;
+		} else {
+			/* It must be better than the worst */
+			if (quality > nr_node->routes[2].quality) {
+				nr_node->routes[2].neighbour->count--;
+				nr_neigh_put(nr_node->routes[2].neighbour);
+
+				if (nr_node->routes[2].neighbour->count == 0 && !nr_node->routes[2].neighbour->locked)
+					nr_remove_neigh(nr_node->routes[2].neighbour);
+
+				nr_node->routes[2].quality   = quality;
+				nr_node->routes[2].obs_count = obs_count;
+				nr_node->routes[2].neighbour = nr_neigh;
+
+				nr_neigh_hold(nr_neigh);
+				nr_neigh->count++;
+			}
+		}
+	}
+
+	/* Now re-sort the routes in quality order */
+	switch (nr_node->count) {
+	case 3:
+		if (nr_node->routes[1].quality > nr_node->routes[0].quality) {
+			switch (nr_node->which) {
+				case 0:  nr_node->which = 1; break;
+				case 1:  nr_node->which = 0; break;
+				default: break;
+			}
+			nr_route           = nr_node->routes[0];
+			nr_node->routes[0] = nr_node->routes[1];
+			nr_node->routes[1] = nr_route;
+		}
+		if (nr_node->routes[2].quality > nr_node->routes[1].quality) {
+			switch (nr_node->which) {
+			case 1:  nr_node->which = 2;
+				break;
+
+			case 2:  nr_node->which = 1;
+				break;
+
+			default:
+				break;
+			}
+			nr_route           = nr_node->routes[1];
+			nr_node->routes[1] = nr_node->routes[2];
+			nr_node->routes[2] = nr_route;
+		}
+	case 2:
+		if (nr_node->routes[1].quality > nr_node->routes[0].quality) {
+			switch (nr_node->which) {
+			case 0:  nr_node->which = 1;
+				break;
+
+			case 1:  nr_node->which = 0;
+				break;
+
+			default: break;
+			}
+			nr_route           = nr_node->routes[0];
+			nr_node->routes[0] = nr_node->routes[1];
+			nr_node->routes[1] = nr_route;
+			}
+	case 1:
+		break;
+	}
+
+	for (i = 0; i < nr_node->count; i++) {
+		if (nr_node->routes[i].neighbour == nr_neigh) {
+			if (i < nr_node->which)
+				nr_node->which = i;
+			break;
+		}
+	}
+
+	nr_neigh_put(nr_neigh);
+	nr_node_unlock(nr_node);
+	nr_node_put(nr_node);
+	return 0;
+}
+
+static inline void __nr_remove_node(struct nr_node *nr_node)
+{
+	hlist_del_init(&nr_node->node_node);
+	nr_node_put(nr_node);
+}
+
+#define nr_remove_node_locked(__node) \
+	__nr_remove_node(__node)
+
+static void nr_remove_node(struct nr_node *nr_node)
+{
+	spin_lock_bh(&nr_node_list_lock);
+	__nr_remove_node(nr_node);
+	spin_unlock_bh(&nr_node_list_lock);
+}
+
+static inline void __nr_remove_neigh(struct nr_neigh *nr_neigh)
+{
+	hlist_del_init(&nr_neigh->neigh_node);
+	nr_neigh_put(nr_neigh);
+}
+
+#define nr_remove_neigh_locked(__neigh) \
+	__nr_remove_neigh(__neigh)
+
+static void nr_remove_neigh(struct nr_neigh *nr_neigh)
+{
+	spin_lock_bh(&nr_neigh_list_lock);
+	__nr_remove_neigh(nr_neigh);
+	spin_unlock_bh(&nr_neigh_list_lock);
+}
+
+/*
+ *	"Delete" a node. Strictly speaking remove a route to a node. The node
+ *	is only deleted if no routes are left to it.
+ */
+static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct net_device *dev)
+{
+	struct nr_node  *nr_node;
+	struct nr_neigh *nr_neigh;
+	int i;
+
+	nr_node = nr_node_get(callsign);
+
+	if (nr_node == NULL)
+		return -EINVAL;
+
+	nr_neigh = nr_neigh_get_dev(neighbour, dev);
+
+	if (nr_neigh == NULL) {
+		nr_node_put(nr_node);
+		return -EINVAL;
+	}
+
+	nr_node_lock(nr_node);
+	for (i = 0; i < nr_node->count; i++) {
+		if (nr_node->routes[i].neighbour == nr_neigh) {
+			nr_neigh->count--;
+			nr_neigh_put(nr_neigh);
+
+			if (nr_neigh->count == 0 && !nr_neigh->locked)
+				nr_remove_neigh(nr_neigh);
+			nr_neigh_put(nr_neigh);
+
+			nr_node->count--;
+
+			if (nr_node->count == 0) {
+				nr_remove_node(nr_node);
+			} else {
+				switch (i) {
+				case 0:
+					nr_node->routes[0] = nr_node->routes[1];
+				case 1:
+					nr_node->routes[1] = nr_node->routes[2];
+				case 2:
+					break;
+				}
+				nr_node_put(nr_node);
+			}
+			nr_node_unlock(nr_node);
+
+			return 0;
+		}
+	}
+	nr_neigh_put(nr_neigh);
+	nr_node_unlock(nr_node);
+	nr_node_put(nr_node);
+
+	return -EINVAL;
+}
+
+/*
+ *	Lock a neighbour with a quality.
+ */
+static int nr_add_neigh(ax25_address *callsign, ax25_digi *ax25_digi, struct net_device *dev, unsigned int quality)
+{
+	struct nr_neigh *nr_neigh;
+
+	nr_neigh = nr_neigh_get_dev(callsign, dev);
+	if (nr_neigh) {
+		nr_neigh->quality = quality;
+		nr_neigh->locked  = 1;
+		nr_neigh_put(nr_neigh);
+		return 0;
+	}
+
+	if ((nr_neigh = kmalloc(sizeof(*nr_neigh), GFP_ATOMIC)) == NULL)
+		return -ENOMEM;
+
+	nr_neigh->callsign = *callsign;
+	nr_neigh->digipeat = NULL;
+	nr_neigh->ax25     = NULL;
+	nr_neigh->dev      = dev;
+	nr_neigh->quality  = quality;
+	nr_neigh->locked   = 1;
+	nr_neigh->count    = 0;
+	nr_neigh->number   = nr_neigh_no++;
+	nr_neigh->failed   = 0;
+	atomic_set(&nr_neigh->refcount, 1);
+
+	if (ax25_digi != NULL && ax25_digi->ndigi > 0) {
+		if ((nr_neigh->digipeat = kmalloc(sizeof(*ax25_digi), GFP_KERNEL)) == NULL) {
+			kfree(nr_neigh);
+			return -ENOMEM;
+		}
+		memcpy(nr_neigh->digipeat, ax25_digi, sizeof(*ax25_digi));
+	}
+
+	spin_lock_bh(&nr_neigh_list_lock);
+	hlist_add_head(&nr_neigh->neigh_node, &nr_neigh_list);
+	/* refcount is initialized at 1 */
+	spin_unlock_bh(&nr_neigh_list_lock);
+
+	return 0;
+}
+
+/*
+ *	"Delete" a neighbour. The neighbour is only removed if the number
+ *	of nodes that may use it is zero.
+ */
+static int nr_del_neigh(ax25_address *callsign, struct net_device *dev, unsigned int quality)
+{
+	struct nr_neigh *nr_neigh;
+
+	nr_neigh = nr_neigh_get_dev(callsign, dev);
+
+	if (nr_neigh == NULL) return -EINVAL;
+
+	nr_neigh->quality = quality;
+	nr_neigh->locked  = 0;
+
+	if (nr_neigh->count == 0)
+		nr_remove_neigh(nr_neigh);
+	nr_neigh_put(nr_neigh);
+
+	return 0;
+}
+
+/*
+ *	Decrement the obsolescence count by one. If a route is reduced to a
+ *	count of zero, remove it. Also remove any unlocked neighbours with
+ *	zero nodes routing via it.
+ */
+static int nr_dec_obs(void)
+{
+	struct nr_neigh *nr_neigh;
+	struct nr_node  *s;
+	struct hlist_node *node, *nodet;
+	int i;
+
+	spin_lock_bh(&nr_node_list_lock);
+	nr_node_for_each_safe(s, node, nodet, &nr_node_list) {
+		nr_node_lock(s);
+		for (i = 0; i < s->count; i++) {
+			switch (s->routes[i].obs_count) {
+			case 0:		/* A locked entry */
+				break;
+
+			case 1:		/* From 1 -> 0 */
+				nr_neigh = s->routes[i].neighbour;
+
+				nr_neigh->count--;
+				nr_neigh_put(nr_neigh);
+
+				if (nr_neigh->count == 0 && !nr_neigh->locked)
+					nr_remove_neigh(nr_neigh);
+
+				s->count--;
+
+				switch (i) {
+					case 0:
+						s->routes[0] = s->routes[1];
+					case 1:
+						s->routes[1] = s->routes[2];
+					case 2:
+						break;
+				}
+				break;
+
+			default:
+				s->routes[i].obs_count--;
+				break;
+
+			}
+		}
+
+		if (s->count <= 0)
+			nr_remove_node_locked(s);
+		nr_node_unlock(s);
+	}
+	spin_unlock_bh(&nr_node_list_lock);
+
+	return 0;
+}
+
+/*
+ *	A device has been removed. Remove its routes and neighbours.
+ */
+void nr_rt_device_down(struct net_device *dev)
+{
+	struct nr_neigh *s;
+	struct hlist_node *node, *nodet, *node2, *node2t;
+	struct nr_node  *t;
+	int i;
+
+	spin_lock_bh(&nr_neigh_list_lock);
+	nr_neigh_for_each_safe(s, node, nodet, &nr_neigh_list) {
+		if (s->dev == dev) {
+			spin_lock_bh(&nr_node_list_lock);
+			nr_node_for_each_safe(t, node2, node2t, &nr_node_list) {
+				nr_node_lock(t);
+				for (i = 0; i < t->count; i++) {
+					if (t->routes[i].neighbour == s) {
+						t->count--;
+
+						switch (i) {
+						case 0:
+							t->routes[0] = t->routes[1];
+						case 1:
+							t->routes[1] = t->routes[2];
+						case 2:
+							break;
+						}
+					}
+				}
+
+				if (t->count <= 0)
+					nr_remove_node_locked(t);
+				nr_node_unlock(t);
+			}
+			spin_unlock_bh(&nr_node_list_lock);
+
+			nr_remove_neigh_locked(s);
+		}
+	}
+	spin_unlock_bh(&nr_neigh_list_lock);
+}
+
+/*
+ *	Check that the device given is a valid AX.25 interface that is "up".
+ *	Or a valid ethernet interface with an AX.25 callsign binding.
+ */
+static struct net_device *nr_ax25_dev_get(char *devname)
+{
+	struct net_device *dev;
+
+	if ((dev = dev_get_by_name(devname)) == NULL)
+		return NULL;
+
+	if ((dev->flags & IFF_UP) && dev->type == ARPHRD_AX25)
+		return dev;
+
+	dev_put(dev);
+	return NULL;
+}
+
+/*
+ *	Find the first active NET/ROM device, usually "nr0".
+ */
+struct net_device *nr_dev_first(void)
+{
+	struct net_device *dev, *first = NULL;
+
+	read_lock(&dev_base_lock);
+	for (dev = dev_base; dev != NULL; dev = dev->next) {
+		if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM)
+			if (first == NULL || strncmp(dev->name, first->name, 3) < 0)
+				first = dev;
+	}
+	if (first)
+		dev_hold(first);
+	read_unlock(&dev_base_lock);
+
+	return first;
+}
+
+/*
+ *	Find the NET/ROM device for the given callsign.
+ */
+struct net_device *nr_dev_get(ax25_address *addr)
+{
+	struct net_device *dev;
+
+	read_lock(&dev_base_lock);
+	for (dev = dev_base; dev != NULL; dev = dev->next) {
+		if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM && ax25cmp(addr, (ax25_address *)dev->dev_addr) == 0) {
+			dev_hold(dev);
+			goto out;
+		}
+	}
+out:
+	read_unlock(&dev_base_lock);
+	return dev;
+}
+
+static ax25_digi *nr_call_to_digi(int ndigis, ax25_address *digipeaters)
+{
+	static ax25_digi ax25_digi;
+	int i;
+
+	if (ndigis == 0)
+		return NULL;
+
+	for (i = 0; i < ndigis; i++) {
+		ax25_digi.calls[i]    = digipeaters[i];
+		ax25_digi.repeated[i] = 0;
+	}
+
+	ax25_digi.ndigi      = ndigis;
+	ax25_digi.lastrepeat = -1;
+
+	return &ax25_digi;
+}
+
+/*
+ *	Handle the ioctls that control the routing functions.
+ */
+int nr_rt_ioctl(unsigned int cmd, void __user *arg)
+{
+	struct nr_route_struct nr_route;
+	struct net_device *dev;
+	int ret;
+
+	switch (cmd) {
+	case SIOCADDRT:
+		if (copy_from_user(&nr_route, arg, sizeof(struct nr_route_struct)))
+			return -EFAULT;
+		if ((dev = nr_ax25_dev_get(nr_route.device)) == NULL)
+			return -EINVAL;
+		if (nr_route.ndigis < 0 || nr_route.ndigis > AX25_MAX_DIGIS) {
+			dev_put(dev);
+			return -EINVAL;
+		}
+		switch (nr_route.type) {
+		case NETROM_NODE:
+			ret = nr_add_node(&nr_route.callsign,
+				nr_route.mnemonic,
+				&nr_route.neighbour,
+				nr_call_to_digi(nr_route.ndigis, nr_route.digipeaters),
+				dev, nr_route.quality,
+				nr_route.obs_count);
+			break;
+		case NETROM_NEIGH:
+			ret = nr_add_neigh(&nr_route.callsign,
+				nr_call_to_digi(nr_route.ndigis, nr_route.digipeaters),
+				dev, nr_route.quality);
+			break;
+		default:
+			ret = -EINVAL;
+		}
+		dev_put(dev);
+		return ret;
+
+	case SIOCDELRT:
+		if (copy_from_user(&nr_route, arg, sizeof(struct nr_route_struct)))
+			return -EFAULT;
+		if ((dev = nr_ax25_dev_get(nr_route.device)) == NULL)
+			return -EINVAL;
+		switch (nr_route.type) {
+		case NETROM_NODE:
+			ret = nr_del_node(&nr_route.callsign,
+				&nr_route.neighbour, dev);
+			break;
+		case NETROM_NEIGH:
+			ret = nr_del_neigh(&nr_route.callsign,
+				dev, nr_route.quality);
+			break;
+		default:
+			ret = -EINVAL;
+		}
+		dev_put(dev);
+		return ret;
+
+	case SIOCNRDECOBS:
+		return nr_dec_obs();
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * 	A level 2 link has timed out, therefore it appears to be a poor link,
+ *	then don't use that neighbour until it is reset.
+ */
+void nr_link_failed(ax25_cb *ax25, int reason)
+{
+	struct nr_neigh *s, *nr_neigh = NULL;
+	struct hlist_node *node;
+	struct nr_node  *nr_node = NULL;
+
+	spin_lock_bh(&nr_neigh_list_lock);
+	nr_neigh_for_each(s, node, &nr_neigh_list)
+		if (s->ax25 == ax25) {
+			nr_neigh_hold(s);
+			nr_neigh = s;
+			break;
+		}
+	spin_unlock_bh(&nr_neigh_list_lock);
+
+	if (nr_neigh == NULL) return;
+
+	nr_neigh->ax25 = NULL;
+	ax25_cb_put(ax25);
+
+	if (++nr_neigh->failed < sysctl_netrom_link_fails_count) {
+		nr_neigh_put(nr_neigh);
+		return;
+	}
+	spin_lock_bh(&nr_node_list_lock);
+	nr_node_for_each(nr_node, node, &nr_node_list)
+		nr_node_lock(nr_node);
+		if (nr_node->which < nr_node->count && nr_node->routes[nr_node->which].neighbour == nr_neigh)
+			nr_node->which++;
+		nr_node_unlock(nr_node);
+	spin_unlock_bh(&nr_node_list_lock);
+	nr_neigh_put(nr_neigh);
+}
+
+/*
+ *	Route a frame to an appropriate AX.25 connection. A NULL ax25_cb
+ *	indicates an internally generated frame.
+ */
+int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25)
+{
+	ax25_address *nr_src, *nr_dest;
+	struct nr_neigh *nr_neigh;
+	struct nr_node  *nr_node;
+	struct net_device *dev;
+	unsigned char *dptr;
+	ax25_cb *ax25s;
+	int ret;
+	struct sk_buff *skbn;
+
+
+	nr_src  = (ax25_address *)(skb->data + 0);
+	nr_dest = (ax25_address *)(skb->data + 7);
+
+	if (ax25 != NULL)
+		nr_add_node(nr_src, "", &ax25->dest_addr, ax25->digipeat,
+			    ax25->ax25_dev->dev, 0, sysctl_netrom_obsolescence_count_initialiser);
+
+	if ((dev = nr_dev_get(nr_dest)) != NULL) {	/* Its for me */
+		if (ax25 == NULL)			/* Its from me */
+			ret = nr_loopback_queue(skb);
+		else
+			ret = nr_rx_frame(skb, dev);
+		dev_put(dev);
+		return ret;
+	}
+
+	if (!sysctl_netrom_routing_control && ax25 != NULL)
+		return 0;
+
+	/* Its Time-To-Live has expired */
+	if (skb->data[14] == 1) {
+		return 0;
+	}
+
+	nr_node = nr_node_get(nr_dest);
+	if (nr_node == NULL)
+		return 0;
+	nr_node_lock(nr_node);
+
+	if (nr_node->which >= nr_node->count) {
+		nr_node_unlock(nr_node);
+		nr_node_put(nr_node);
+		return 0;
+	}
+
+	nr_neigh = nr_node->routes[nr_node->which].neighbour;
+
+	if ((dev = nr_dev_first()) == NULL) {
+		nr_node_unlock(nr_node);
+		nr_node_put(nr_node);
+		return 0;
+	}
+
+	/* We are going to change the netrom headers so we should get our
+	   own skb, we also did not know until now how much header space
+	   we had to reserve... - RXQ */
+	if ((skbn=skb_copy_expand(skb, dev->hard_header_len, 0, GFP_ATOMIC)) == NULL) {
+		nr_node_unlock(nr_node);
+		nr_node_put(nr_node);
+		dev_put(dev);
+		return 0;
+	}
+	kfree_skb(skb);
+	skb=skbn;
+	skb->data[14]--;
+
+	dptr  = skb_push(skb, 1);
+	*dptr = AX25_P_NETROM;
+
+	ax25s = ax25_send_frame(skb, 256, (ax25_address *)dev->dev_addr, &nr_neigh->callsign, nr_neigh->digipeat, nr_neigh->dev);
+	if (nr_neigh->ax25 && ax25s) {
+		/* We were already holding this ax25_cb */
+		ax25_cb_put(ax25s);
+	}
+	nr_neigh->ax25 = ax25s;
+
+	dev_put(dev);
+	ret = (nr_neigh->ax25 != NULL);
+	nr_node_unlock(nr_node);
+	nr_node_put(nr_node);
+	return ret;
+}
+
+#ifdef CONFIG_PROC_FS
+
+static void *nr_node_start(struct seq_file *seq, loff_t *pos)
+{
+	struct nr_node *nr_node;
+	struct hlist_node *node;
+	int i = 1;
+ 
+ 	spin_lock_bh(&nr_node_list_lock);
+	if (*pos == 0)
+		return SEQ_START_TOKEN;
+
+	nr_node_for_each(nr_node, node, &nr_node_list) {
+		if (i == *pos)
+			return nr_node;
+		++i;
+	}
+
+	return NULL;
+}
+
+static void *nr_node_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct hlist_node *node;
+	++*pos;
+	
+	node = (v == SEQ_START_TOKEN)  
+		? nr_node_list.first
+		: ((struct nr_node *)v)->node_node.next;
+
+	return hlist_entry(node, struct nr_node, node_node);
+}
+
+static void nr_node_stop(struct seq_file *seq, void *v)
+{
+	spin_unlock_bh(&nr_node_list_lock);
+}
+
+static int nr_node_show(struct seq_file *seq, void *v)
+{
+	int i;
+
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq,
+			 "callsign  mnemonic w n qual obs neigh qual obs neigh qual obs neigh\n");
+	else {
+		struct nr_node *nr_node = v;
+		nr_node_lock(nr_node);
+		seq_printf(seq, "%-9s %-7s  %d %d",
+			ax2asc(&nr_node->callsign),
+			(nr_node->mnemonic[0] == '\0') ? "*" : nr_node->mnemonic,
+			nr_node->which + 1,
+			nr_node->count);
+
+		for (i = 0; i < nr_node->count; i++) {
+			seq_printf(seq, "  %3d   %d %05d",
+				nr_node->routes[i].quality,
+				nr_node->routes[i].obs_count,
+				nr_node->routes[i].neighbour->number);
+		}
+		nr_node_unlock(nr_node);
+
+		seq_puts(seq, "\n");
+	}
+	return 0;
+}
+
+static struct seq_operations nr_node_seqops = {
+	.start = nr_node_start,
+	.next = nr_node_next,
+	.stop = nr_node_stop,
+	.show = nr_node_show,
+};
+
+static int nr_node_info_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &nr_node_seqops);
+}
+
+struct file_operations nr_nodes_fops = {
+	.owner = THIS_MODULE,
+	.open = nr_node_info_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+static void *nr_neigh_start(struct seq_file *seq, loff_t *pos)
+{
+	struct nr_neigh *nr_neigh;
+	struct hlist_node *node;
+	int i = 1;
+
+	spin_lock_bh(&nr_neigh_list_lock);
+	if (*pos == 0)
+		return SEQ_START_TOKEN;
+
+	nr_neigh_for_each(nr_neigh, node, &nr_neigh_list) {
+		if (i == *pos)
+			return nr_neigh;
+	}
+	return NULL;
+}
+
+static void *nr_neigh_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct hlist_node *node;
+	++*pos;
+	
+	node = (v == SEQ_START_TOKEN)  
+		? nr_neigh_list.first
+		: ((struct nr_neigh *)v)->neigh_node.next;
+
+	return hlist_entry(node, struct nr_neigh, neigh_node);
+}
+
+static void nr_neigh_stop(struct seq_file *seq, void *v)
+{
+	spin_unlock_bh(&nr_neigh_list_lock);
+}
+
+static int nr_neigh_show(struct seq_file *seq, void *v)
+{
+	int i;
+
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq, "addr  callsign  dev  qual lock count failed digipeaters\n");
+	else {
+		struct nr_neigh *nr_neigh = v;
+
+		seq_printf(seq, "%05d %-9s %-4s  %3d    %d   %3d    %3d",
+			nr_neigh->number,
+			ax2asc(&nr_neigh->callsign),
+			nr_neigh->dev ? nr_neigh->dev->name : "???",
+			nr_neigh->quality,
+			nr_neigh->locked,
+			nr_neigh->count,
+			nr_neigh->failed);
+
+		if (nr_neigh->digipeat != NULL) {
+			for (i = 0; i < nr_neigh->digipeat->ndigi; i++)
+				seq_printf(seq, " %s", 
+					   ax2asc(&nr_neigh->digipeat->calls[i]));
+		}
+
+		seq_puts(seq, "\n");
+	}
+	return 0;
+}
+
+static struct seq_operations nr_neigh_seqops = {
+	.start = nr_neigh_start,
+	.next = nr_neigh_next,
+	.stop = nr_neigh_stop,
+	.show = nr_neigh_show,
+};
+
+static int nr_neigh_info_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &nr_neigh_seqops);
+}
+
+struct file_operations nr_neigh_fops = {
+	.owner = THIS_MODULE,
+	.open = nr_neigh_info_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+#endif
+
+/*
+ *	Free all memory associated with the nodes and routes lists.
+ */
+void __exit nr_rt_free(void)
+{
+	struct nr_neigh *s = NULL;
+	struct nr_node  *t = NULL;
+	struct hlist_node *node, *nodet;
+
+	spin_lock_bh(&nr_neigh_list_lock);
+	spin_lock_bh(&nr_node_list_lock);
+	nr_node_for_each_safe(t, node, nodet, &nr_node_list) {
+		nr_node_lock(t);
+		nr_remove_node_locked(t);
+		nr_node_unlock(t);
+	}
+	nr_neigh_for_each_safe(s, node, nodet, &nr_neigh_list) {
+		while(s->count) {
+			s->count--;
+			nr_neigh_put(s);
+		}
+		nr_remove_neigh_locked(s);
+	}
+	spin_unlock_bh(&nr_node_list_lock);
+	spin_unlock_bh(&nr_neigh_list_lock);
+}
diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c
new file mode 100644
index 00000000000..0627347b14b
--- /dev/null
+++ b/net/netrom/nr_subr.c
@@ -0,0 +1,283 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
+ */
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <net/ax25.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <net/netrom.h>
+
+/*
+ *	This routine purges all of the queues of frames.
+ */
+void nr_clear_queues(struct sock *sk)
+{
+	struct nr_sock *nr = nr_sk(sk);
+
+	skb_queue_purge(&sk->sk_write_queue);
+	skb_queue_purge(&nr->ack_queue);
+	skb_queue_purge(&nr->reseq_queue);
+	skb_queue_purge(&nr->frag_queue);
+}
+
+/*
+ * This routine purges the input queue of those frames that have been
+ * acknowledged. This replaces the boxes labelled "V(a) <- N(r)" on the
+ * SDL diagram.
+ */
+void nr_frames_acked(struct sock *sk, unsigned short nr)
+{
+	struct nr_sock *nrom = nr_sk(sk);
+	struct sk_buff *skb;
+
+	/*
+	 * Remove all the ack-ed frames from the ack queue.
+	 */
+	if (nrom->va != nr) {
+		while (skb_peek(&nrom->ack_queue) != NULL && nrom->va != nr) {
+		        skb = skb_dequeue(&nrom->ack_queue);
+			kfree_skb(skb);
+			nrom->va = (nrom->va + 1) % NR_MODULUS;
+		}
+	}
+}
+
+/*
+ * Requeue all the un-ack-ed frames on the output queue to be picked
+ * up by nr_kick called from the timer. This arrangement handles the
+ * possibility of an empty output queue.
+ */
+void nr_requeue_frames(struct sock *sk)
+{
+	struct sk_buff *skb, *skb_prev = NULL;
+
+	while ((skb = skb_dequeue(&nr_sk(sk)->ack_queue)) != NULL) {
+		if (skb_prev == NULL)
+			skb_queue_head(&sk->sk_write_queue, skb);
+		else
+			skb_append(skb_prev, skb);
+		skb_prev = skb;
+	}
+}
+
+/*
+ *	Validate that the value of nr is between va and vs. Return true or
+ *	false for testing.
+ */
+int nr_validate_nr(struct sock *sk, unsigned short nr)
+{
+	struct nr_sock *nrom = nr_sk(sk);
+	unsigned short vc = nrom->va;
+
+	while (vc != nrom->vs) {
+		if (nr == vc) return 1;
+		vc = (vc + 1) % NR_MODULUS;
+	}
+
+	return nr == nrom->vs;
+}
+
+/*
+ *	Check that ns is within the receive window.
+ */
+int nr_in_rx_window(struct sock *sk, unsigned short ns)
+{
+	struct nr_sock *nr = nr_sk(sk);
+	unsigned short vc = nr->vr;
+	unsigned short vt = (nr->vl + nr->window) % NR_MODULUS;
+
+	while (vc != vt) {
+		if (ns == vc) return 1;
+		vc = (vc + 1) % NR_MODULUS;
+	}
+
+	return 0;
+}
+
+/*
+ *  This routine is called when the HDLC layer internally generates a
+ *  control frame.
+ */
+void nr_write_internal(struct sock *sk, int frametype)
+{
+	struct nr_sock *nr = nr_sk(sk);
+	struct sk_buff *skb;
+	unsigned char  *dptr;
+	int len, timeout;
+
+	len = NR_NETWORK_LEN + NR_TRANSPORT_LEN;
+
+	switch (frametype & 0x0F) {
+	case NR_CONNREQ:
+		len += 17;
+		break;
+	case NR_CONNACK:
+		len += (nr->bpqext) ? 2 : 1;
+		break;
+	case NR_DISCREQ:
+	case NR_DISCACK:
+	case NR_INFOACK:
+		break;
+	default:
+		printk(KERN_ERR "NET/ROM: nr_write_internal - invalid frame type %d\n", frametype);
+		return;
+	}
+
+	if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL)
+		return;
+
+	/*
+	 *	Space for AX.25 and NET/ROM network header
+	 */
+	skb_reserve(skb, NR_NETWORK_LEN);
+
+	dptr = skb_put(skb, skb_tailroom(skb));
+
+	switch (frametype & 0x0F) {
+	case NR_CONNREQ:
+		timeout  = nr->t1 / HZ;
+		*dptr++  = nr->my_index;
+		*dptr++  = nr->my_id;
+		*dptr++  = 0;
+		*dptr++  = 0;
+		*dptr++  = frametype;
+		*dptr++  = nr->window;
+		memcpy(dptr, &nr->user_addr, AX25_ADDR_LEN);
+		dptr[6] &= ~AX25_CBIT;
+		dptr[6] &= ~AX25_EBIT;
+		dptr[6] |= AX25_SSSID_SPARE;
+		dptr    += AX25_ADDR_LEN;
+		memcpy(dptr, &nr->source_addr, AX25_ADDR_LEN);
+		dptr[6] &= ~AX25_CBIT;
+		dptr[6] &= ~AX25_EBIT;
+		dptr[6] |= AX25_SSSID_SPARE;
+		dptr    += AX25_ADDR_LEN;
+		*dptr++  = timeout % 256;
+		*dptr++  = timeout / 256;
+		break;
+
+	case NR_CONNACK:
+		*dptr++ = nr->your_index;
+		*dptr++ = nr->your_id;
+		*dptr++ = nr->my_index;
+		*dptr++ = nr->my_id;
+		*dptr++ = frametype;
+		*dptr++ = nr->window;
+		if (nr->bpqext) *dptr++ = sysctl_netrom_network_ttl_initialiser;
+		break;
+
+	case NR_DISCREQ:
+	case NR_DISCACK:
+		*dptr++ = nr->your_index;
+		*dptr++ = nr->your_id;
+		*dptr++ = 0;
+		*dptr++ = 0;
+		*dptr++ = frametype;
+		break;
+
+	case NR_INFOACK:
+		*dptr++ = nr->your_index;
+		*dptr++ = nr->your_id;
+		*dptr++ = 0;
+		*dptr++ = nr->vr;
+		*dptr++ = frametype;
+		break;
+	}
+
+	nr_transmit_buffer(sk, skb);
+}
+
+/*
+ * This routine is called when a Connect Acknowledge with the Choke Flag
+ * set is needed to refuse a connection.
+ */
+void nr_transmit_refusal(struct sk_buff *skb, int mine)
+{
+	struct sk_buff *skbn;
+	unsigned char *dptr;
+	int len;
+
+	len = NR_NETWORK_LEN + NR_TRANSPORT_LEN + 1;
+
+	if ((skbn = alloc_skb(len, GFP_ATOMIC)) == NULL)
+		return;
+
+	skb_reserve(skbn, 0);
+
+	dptr = skb_put(skbn, NR_NETWORK_LEN + NR_TRANSPORT_LEN);
+
+	memcpy(dptr, skb->data + 7, AX25_ADDR_LEN);
+	dptr[6] &= ~AX25_CBIT;
+	dptr[6] &= ~AX25_EBIT;
+	dptr[6] |= AX25_SSSID_SPARE;
+	dptr += AX25_ADDR_LEN;
+
+	memcpy(dptr, skb->data + 0, AX25_ADDR_LEN);
+	dptr[6] &= ~AX25_CBIT;
+	dptr[6] |= AX25_EBIT;
+	dptr[6] |= AX25_SSSID_SPARE;
+	dptr += AX25_ADDR_LEN;
+
+	*dptr++ = sysctl_netrom_network_ttl_initialiser;
+
+	if (mine) {
+		*dptr++ = 0;
+		*dptr++ = 0;
+		*dptr++ = skb->data[15];
+		*dptr++ = skb->data[16];
+	} else {
+		*dptr++ = skb->data[15];
+		*dptr++ = skb->data[16];
+		*dptr++ = 0;
+		*dptr++ = 0;
+	}
+
+	*dptr++ = NR_CONNACK | NR_CHOKE_FLAG;
+	*dptr++ = 0;
+
+	if (!nr_route_frame(skbn, NULL))
+		kfree_skb(skbn);
+}
+
+void nr_disconnect(struct sock *sk, int reason)
+{
+	nr_stop_t1timer(sk);
+	nr_stop_t2timer(sk);
+	nr_stop_t4timer(sk);
+	nr_stop_idletimer(sk);
+
+	nr_clear_queues(sk);
+
+	nr_sk(sk)->state = NR_STATE_0;
+
+	sk->sk_state     = TCP_CLOSE;
+	sk->sk_err       = reason;
+	sk->sk_shutdown |= SEND_SHUTDOWN;
+
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		sk->sk_state_change(sk);
+		sock_set_flag(sk, SOCK_DEAD);
+	}
+}
diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c
new file mode 100644
index 00000000000..faabda8088b
--- /dev/null
+++ b/net/netrom/nr_timer.c
@@ -0,0 +1,260 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
+ * Copyright (C) 2002 Ralf Baechle DO1GRB (ralf@gnu.org)
+ */
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <net/ax25.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <net/netrom.h>
+
+static void nr_heartbeat_expiry(unsigned long);
+static void nr_t1timer_expiry(unsigned long);
+static void nr_t2timer_expiry(unsigned long);
+static void nr_t4timer_expiry(unsigned long);
+static void nr_idletimer_expiry(unsigned long);
+
+void nr_init_timers(struct sock *sk)
+{
+	struct nr_sock *nr = nr_sk(sk);
+
+	init_timer(&nr->t1timer);
+	nr->t1timer.data     = (unsigned long)sk;
+	nr->t1timer.function = &nr_t1timer_expiry;
+	
+	init_timer(&nr->t2timer);
+	nr->t2timer.data     = (unsigned long)sk;
+	nr->t2timer.function = &nr_t2timer_expiry;
+
+	init_timer(&nr->t4timer);
+	nr->t4timer.data     = (unsigned long)sk;
+	nr->t4timer.function = &nr_t4timer_expiry;
+
+	init_timer(&nr->idletimer);
+	nr->idletimer.data     = (unsigned long)sk;
+	nr->idletimer.function = &nr_idletimer_expiry;
+
+	/* initialized by sock_init_data */
+	sk->sk_timer.data     = (unsigned long)sk;
+	sk->sk_timer.function = &nr_heartbeat_expiry;
+}
+
+void nr_start_t1timer(struct sock *sk)
+{
+	struct nr_sock *nr = nr_sk(sk);
+
+	mod_timer(&nr->t1timer, jiffies + nr->t1);
+}
+
+void nr_start_t2timer(struct sock *sk)
+{
+	struct nr_sock *nr = nr_sk(sk);
+
+	mod_timer(&nr->t2timer, jiffies + nr->t2);
+}
+
+void nr_start_t4timer(struct sock *sk)
+{
+	struct nr_sock *nr = nr_sk(sk);
+
+	mod_timer(&nr->t4timer, jiffies + nr->t4);
+}
+
+void nr_start_idletimer(struct sock *sk)
+{
+	struct nr_sock *nr = nr_sk(sk);
+
+	if (nr->idle > 0)
+		mod_timer(&nr->idletimer, jiffies + nr->idle);
+}
+
+void nr_start_heartbeat(struct sock *sk)
+{
+	mod_timer(&sk->sk_timer, jiffies + 5 * HZ);
+}
+
+void nr_stop_t1timer(struct sock *sk)
+{
+	del_timer(&nr_sk(sk)->t1timer);
+}
+
+void nr_stop_t2timer(struct sock *sk)
+{
+	del_timer(&nr_sk(sk)->t2timer);
+}
+
+void nr_stop_t4timer(struct sock *sk)
+{
+	del_timer(&nr_sk(sk)->t4timer);
+}
+
+void nr_stop_idletimer(struct sock *sk)
+{
+	del_timer(&nr_sk(sk)->idletimer);
+}
+
+void nr_stop_heartbeat(struct sock *sk)
+{
+	del_timer(&sk->sk_timer);
+}
+
+int nr_t1timer_running(struct sock *sk)
+{
+	return timer_pending(&nr_sk(sk)->t1timer);
+}
+
+static void nr_heartbeat_expiry(unsigned long param)
+{
+	struct sock *sk = (struct sock *)param;
+	struct nr_sock *nr = nr_sk(sk);
+
+	bh_lock_sock(sk);
+	switch (nr->state) {
+	case NR_STATE_0:
+		/* Magic here: If we listen() and a new link dies before it
+		   is accepted() it isn't 'dead' so doesn't get removed. */
+		if (sock_flag(sk, SOCK_DESTROY) ||
+		    (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) {
+			sock_hold(sk);
+			nr_destroy_socket(sk);
+			bh_unlock_sock(sk);
+			sock_put(sk);
+			return;
+		}
+		break;
+
+	case NR_STATE_3:
+		/*
+		 * Check for the state of the receive buffer.
+		 */
+		if (atomic_read(&sk->sk_rmem_alloc) < (sk->sk_rcvbuf / 2) &&
+		    (nr->condition & NR_COND_OWN_RX_BUSY)) {
+			nr->condition &= ~NR_COND_OWN_RX_BUSY;
+			nr->condition &= ~NR_COND_ACK_PENDING;
+			nr->vl         = nr->vr;
+			nr_write_internal(sk, NR_INFOACK);
+			break;
+		}
+		break;
+	}
+
+	nr_start_heartbeat(sk);
+	bh_unlock_sock(sk);
+}
+
+static void nr_t2timer_expiry(unsigned long param)
+{
+	struct sock *sk = (struct sock *)param;
+	struct nr_sock *nr = nr_sk(sk);
+
+	bh_lock_sock(sk);
+	if (nr->condition & NR_COND_ACK_PENDING) {
+		nr->condition &= ~NR_COND_ACK_PENDING;
+		nr_enquiry_response(sk);
+	}
+	bh_unlock_sock(sk);
+}
+
+static void nr_t4timer_expiry(unsigned long param)
+{
+	struct sock *sk = (struct sock *)param;
+
+	bh_lock_sock(sk);
+	nr_sk(sk)->condition &= ~NR_COND_PEER_RX_BUSY;
+	bh_unlock_sock(sk);
+}
+
+static void nr_idletimer_expiry(unsigned long param)
+{
+	struct sock *sk = (struct sock *)param;
+	struct nr_sock *nr = nr_sk(sk);
+
+	bh_lock_sock(sk);
+
+	nr_clear_queues(sk);
+
+	nr->n2count = 0;
+	nr_write_internal(sk, NR_DISCREQ);
+	nr->state = NR_STATE_2;
+
+	nr_start_t1timer(sk);
+	nr_stop_t2timer(sk);
+	nr_stop_t4timer(sk);
+
+	sk->sk_state     = TCP_CLOSE;
+	sk->sk_err       = 0;
+	sk->sk_shutdown |= SEND_SHUTDOWN;
+
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		sk->sk_state_change(sk);
+		sock_set_flag(sk, SOCK_DEAD);
+	}
+	bh_unlock_sock(sk);
+}
+
+static void nr_t1timer_expiry(unsigned long param)
+{
+	struct sock *sk = (struct sock *)param;
+	struct nr_sock *nr = nr_sk(sk);
+
+	bh_lock_sock(sk);
+	switch (nr->state) {
+	case NR_STATE_1:
+		if (nr->n2count == nr->n2) {
+			nr_disconnect(sk, ETIMEDOUT);
+			bh_unlock_sock(sk);
+			return;
+		} else {
+			nr->n2count++;
+			nr_write_internal(sk, NR_CONNREQ);
+		}
+		break;
+
+	case NR_STATE_2:
+		if (nr->n2count == nr->n2) {
+			nr_disconnect(sk, ETIMEDOUT);
+			bh_unlock_sock(sk);
+			return;
+		} else {
+			nr->n2count++;
+			nr_write_internal(sk, NR_DISCREQ);
+		}
+		break;
+
+	case NR_STATE_3:
+		if (nr->n2count == nr->n2) {
+			nr_disconnect(sk, ETIMEDOUT);
+			bh_unlock_sock(sk);
+			return;
+		} else {
+			nr->n2count++;
+			nr_requeue_frames(sk);
+		}
+		break;
+	}
+
+	nr_start_t1timer(sk);
+	bh_unlock_sock(sk);
+}
diff --git a/net/netrom/sysctl_net_netrom.c b/net/netrom/sysctl_net_netrom.c
new file mode 100644
index 00000000000..c9ed50382ea
--- /dev/null
+++ b/net/netrom/sysctl_net_netrom.c
@@ -0,0 +1,189 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright (C) 1996 Mike Shaver (shaver@zeroknowledge.com)
+ */
+#include <linux/mm.h>
+#include <linux/sysctl.h>
+#include <linux/init.h>
+#include <net/ax25.h>
+#include <net/netrom.h>
+
+/*
+ *	Values taken from NET/ROM documentation.
+ */
+static int min_quality[] = {0}, max_quality[] = {255};
+static int min_obs[]     = {0}, max_obs[]     = {255};
+static int min_ttl[]     = {0}, max_ttl[]     = {255};
+static int min_t1[]      = {5 * HZ};
+static int max_t1[]      = {600 * HZ};
+static int min_n2[]      = {2}, max_n2[]      = {127};
+static int min_t2[]      = {1 * HZ};
+static int max_t2[]      = {60 * HZ};
+static int min_t4[]      = {1 * HZ};
+static int max_t4[]      = {1000 * HZ};
+static int min_window[]  = {1}, max_window[]  = {127};
+static int min_idle[]    = {0 * HZ};
+static int max_idle[]    = {65535 * HZ};
+static int min_route[]   = {0}, max_route[]   = {1};
+static int min_fails[]   = {1}, max_fails[]   = {10};
+
+static struct ctl_table_header *nr_table_header;
+
+static ctl_table nr_table[] = {
+        {
+		.ctl_name	= NET_NETROM_DEFAULT_PATH_QUALITY,
+		.procname	= "default_path_quality",
+		.data		= &sysctl_netrom_default_path_quality,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_quality,
+		.extra2		= &max_quality
+	},
+        {
+		.ctl_name	= NET_NETROM_OBSOLESCENCE_COUNT_INITIALISER,
+		.procname	= "obsolescence_count_initialiser",
+		.data		= &sysctl_netrom_obsolescence_count_initialiser,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+ 		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_obs,
+		.extra2		= &max_obs
+	},
+        {
+		.ctl_name	= NET_NETROM_NETWORK_TTL_INITIALISER,
+		.procname	= "network_ttl_initialiser",
+		.data		= &sysctl_netrom_network_ttl_initialiser,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_ttl,
+		.extra2		= &max_ttl
+	},
+        {
+		.ctl_name	= NET_NETROM_TRANSPORT_TIMEOUT,
+		.procname	= "transport_timeout",
+		.data		= &sysctl_netrom_transport_timeout,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_t1,
+		.extra2		= &max_t1
+	},
+        {
+		.ctl_name	= NET_NETROM_TRANSPORT_MAXIMUM_TRIES,
+		.procname	= "transport_maximum_tries",
+		.data		= &sysctl_netrom_transport_maximum_tries,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_n2,
+		.extra2		= &max_n2
+	},
+        {
+		.ctl_name	= NET_NETROM_TRANSPORT_ACKNOWLEDGE_DELAY,
+		.procname	= "transport_acknowledge_delay",
+		.data		= &sysctl_netrom_transport_acknowledge_delay,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_t2,
+		.extra2		= &max_t2
+	},
+        {
+		.ctl_name	= NET_NETROM_TRANSPORT_BUSY_DELAY,
+		.procname	= "transport_busy_delay",
+		.data		= &sysctl_netrom_transport_busy_delay,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_t4,
+		.extra2		= &max_t4
+	},
+        {
+		.ctl_name	= NET_NETROM_TRANSPORT_REQUESTED_WINDOW_SIZE,
+		.procname	= "transport_requested_window_size",
+		.data		= &sysctl_netrom_transport_requested_window_size,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_window,
+		.extra2		= &max_window
+	},
+        {
+		.ctl_name	= NET_NETROM_TRANSPORT_NO_ACTIVITY_TIMEOUT,
+		.procname	= "transport_no_activity_timeout",
+		.data		= &sysctl_netrom_transport_no_activity_timeout,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_idle,
+		.extra2		= &max_idle
+	},
+        {
+		.ctl_name	= NET_NETROM_ROUTING_CONTROL,
+		.procname	= "routing_control",
+		.data		= &sysctl_netrom_routing_control,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_route,
+		.extra2		= &max_route
+	},
+        {
+		.ctl_name	= NET_NETROM_LINK_FAILS_COUNT,
+		.procname	= "link_fails_count",
+		.data		= &sysctl_netrom_link_fails_count,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_fails,
+		.extra2		= &max_fails
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table nr_dir_table[] = {
+	{
+		.ctl_name	= NET_NETROM,
+		.procname	= "netrom",
+		.mode		= 0555,
+		.child		= nr_table
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table nr_root_table[] = {
+	{
+		.ctl_name	= CTL_NET,
+		.procname	= "net",
+		.mode		= 0555,
+		.child		= nr_dir_table
+	},
+	{ .ctl_name = 0 }
+};
+
+void __init nr_register_sysctl(void)
+{
+	nr_table_header = register_sysctl_table(nr_root_table, 1);
+}
+
+void nr_unregister_sysctl(void)
+{
+	unregister_sysctl_table(nr_table_header);
+}
diff --git a/net/nonet.c b/net/nonet.c
new file mode 100644
index 00000000000..e5241dceaa5
--- /dev/null
+++ b/net/nonet.c
@@ -0,0 +1,30 @@
+/*
+ * net/nonet.c
+ *
+ * Dummy functions to allow us to configure network support entirely
+ * out of the kernel.
+ *
+ * Distributed under the terms of the GNU GPL version 2.
+ * Copyright (c) Matthew Wilcox 2003
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+
+void __init sock_init(void)
+{
+	printk(KERN_INFO "Linux NoNET1.0 for Linux 2.6\n");
+}
+
+static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
+{
+	return -ENXIO;
+}
+
+struct file_operations bad_sock_fops = {
+	.owner = THIS_MODULE,
+	.open = sock_no_open,
+};
diff --git a/net/packet/Makefile b/net/packet/Makefile
new file mode 100644
index 00000000000..81183eabfde
--- /dev/null
+++ b/net/packet/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the packet AF.
+#
+
+obj-$(CONFIG_PACKET) += af_packet.o
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
new file mode 100644
index 00000000000..64acea0adaa
--- /dev/null
+++ b/net/packet/af_packet.c
@@ -0,0 +1,1907 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		PACKET - implements raw packet sockets.
+ *
+ * Version:	$Id: af_packet.c,v 1.61 2002/02/08 03:57:19 davem Exp $
+ *
+ * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *		Alan Cox, <gw4pts@gw4pts.ampr.org>
+ *
+ * Fixes:	
+ *		Alan Cox	:	verify_area() now used correctly
+ *		Alan Cox	:	new skbuff lists, look ma no backlogs!
+ *		Alan Cox	:	tidied skbuff lists.
+ *		Alan Cox	:	Now uses generic datagram routines I
+ *					added. Also fixed the peek/read crash
+ *					from all old Linux datagram code.
+ *		Alan Cox	:	Uses the improved datagram code.
+ *		Alan Cox	:	Added NULL's for socket options.
+ *		Alan Cox	:	Re-commented the code.
+ *		Alan Cox	:	Use new kernel side addressing
+ *		Rob Janssen	:	Correct MTU usage.
+ *		Dave Platt	:	Counter leaks caused by incorrect
+ *					interrupt locking and some slightly
+ *					dubious gcc output. Can you read
+ *					compiler: it said _VOLATILE_
+ *	Richard Kooijman	:	Timestamp fixes.
+ *		Alan Cox	:	New buffers. Use sk->mac.raw.
+ *		Alan Cox	:	sendmsg/recvmsg support.
+ *		Alan Cox	:	Protocol setting support
+ *	Alexey Kuznetsov	:	Untied from IPv4 stack.
+ *	Cyrus Durgin		:	Fixed kerneld for kmod.
+ *	Michal Ostrowski        :       Module initialization cleanup.
+ *         Ulises Alonso        :       Frame number limit removal and 
+ *                                      packet_set_ring memory leak.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ */
+ 
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/fcntl.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/if_packet.h>
+#include <linux/wireless.h>
+#include <linux/kmod.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <linux/errno.h>
+#include <linux/timer.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/ioctls.h>
+#include <asm/page.h>
+#include <asm/io.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/poll.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#ifdef CONFIG_INET
+#include <net/inet_common.h>
+#endif
+
+#define CONFIG_SOCK_PACKET	1
+
+/*
+   Proposed replacement for SIOC{ADD,DEL}MULTI and
+   IFF_PROMISC, IFF_ALLMULTI flags.
+
+   It is more expensive, but I believe,
+   it is really correct solution: reentereble, safe and fault tolerant.
+
+   IFF_PROMISC/IFF_ALLMULTI/SIOC{ADD/DEL}MULTI are faked by keeping
+   reference count and global flag, so that real status is
+   (gflag|(count != 0)), so that we can use obsolete faulty interface
+   not harming clever users.
+ */
+#define CONFIG_PACKET_MULTICAST	1
+
+/*
+   Assumptions:
+   - if device has no dev->hard_header routine, it adds and removes ll header
+     inside itself. In this case ll header is invisible outside of device,
+     but higher levels still should reserve dev->hard_header_len.
+     Some devices are enough clever to reallocate skb, when header
+     will not fit to reserved space (tunnel), another ones are silly
+     (PPP).
+   - packet socket receives packets with pulled ll header,
+     so that SOCK_RAW should push it back.
+
+On receive:
+-----------
+
+Incoming, dev->hard_header!=NULL
+   mac.raw -> ll header
+   data    -> data
+
+Outgoing, dev->hard_header!=NULL
+   mac.raw -> ll header
+   data    -> ll header
+
+Incoming, dev->hard_header==NULL
+   mac.raw -> UNKNOWN position. It is very likely, that it points to ll header.
+              PPP makes it, that is wrong, because introduce assymetry
+	      between rx and tx paths.
+   data    -> data
+
+Outgoing, dev->hard_header==NULL
+   mac.raw -> data. ll header is still not built!
+   data    -> data
+
+Resume
+  If dev->hard_header==NULL we are unlikely to restore sensible ll header.
+
+
+On transmit:
+------------
+
+dev->hard_header != NULL
+   mac.raw -> ll header
+   data    -> ll header
+
+dev->hard_header == NULL (ll header is added by device, we cannot control it)
+   mac.raw -> data
+   data -> data
+
+   We should set nh.raw on output to correct posistion,
+   packet classifier depends on it.
+ */
+
+/* List of all packet sockets. */
+static HLIST_HEAD(packet_sklist);
+static DEFINE_RWLOCK(packet_sklist_lock);
+
+static atomic_t packet_socks_nr;
+
+
+/* Private packet socket structures. */
+
+#ifdef CONFIG_PACKET_MULTICAST
+struct packet_mclist
+{
+	struct packet_mclist	*next;
+	int			ifindex;
+	int			count;
+	unsigned short		type;
+	unsigned short		alen;
+	unsigned char		addr[8];
+};
+#endif
+#ifdef CONFIG_PACKET_MMAP
+static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
+#endif
+
+static void packet_flush_mclist(struct sock *sk);
+
+struct packet_sock {
+	/* struct sock has to be the first member of packet_sock */
+	struct sock		sk;
+	struct tpacket_stats	stats;
+#ifdef CONFIG_PACKET_MMAP
+	char *			*pg_vec;
+	unsigned int		head;
+	unsigned int            frames_per_block;
+	unsigned int		frame_size;
+	unsigned int		frame_max;
+	int			copy_thresh;
+#endif
+	struct packet_type	prot_hook;
+	spinlock_t		bind_lock;
+	char			running;	/* prot_hook is attached*/
+	int			ifindex;	/* bound device		*/
+	unsigned short		num;
+#ifdef CONFIG_PACKET_MULTICAST
+	struct packet_mclist	*mclist;
+#endif
+#ifdef CONFIG_PACKET_MMAP
+	atomic_t		mapped;
+	unsigned int            pg_vec_order;
+	unsigned int		pg_vec_pages;
+	unsigned int		pg_vec_len;
+#endif
+};
+
+#ifdef CONFIG_PACKET_MMAP
+
+static inline char *packet_lookup_frame(struct packet_sock *po, unsigned int position)
+{
+	unsigned int pg_vec_pos, frame_offset;
+	char *frame;
+
+	pg_vec_pos = position / po->frames_per_block;
+	frame_offset = position % po->frames_per_block;
+
+	frame = po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size);
+	
+	return frame;
+}
+#endif
+
+static inline struct packet_sock *pkt_sk(struct sock *sk)
+{
+	return (struct packet_sock *)sk;
+}
+
+static void packet_sock_destruct(struct sock *sk)
+{
+	BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
+	BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
+
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		printk("Attempt to release alive packet socket: %p\n", sk);
+		return;
+	}
+
+	atomic_dec(&packet_socks_nr);
+#ifdef PACKET_REFCNT_DEBUG
+	printk(KERN_DEBUG "PACKET socket %p is free, %d are alive\n", sk, atomic_read(&packet_socks_nr));
+#endif
+}
+
+
+static struct proto_ops packet_ops;
+
+#ifdef CONFIG_SOCK_PACKET
+static struct proto_ops packet_ops_spkt;
+
+static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt)
+{
+	struct sock *sk;
+	struct sockaddr_pkt *spkt;
+
+	/*
+	 *	When we registered the protocol we saved the socket in the data
+	 *	field for just this event.
+	 */
+
+	sk = pt->af_packet_priv;
+	
+	/*
+	 *	Yank back the headers [hope the device set this
+	 *	right or kerboom...]
+	 *
+	 *	Incoming packets have ll header pulled,
+	 *	push it back.
+	 *
+	 *	For outgoing ones skb->data == skb->mac.raw
+	 *	so that this procedure is noop.
+	 */
+
+	if (skb->pkt_type == PACKET_LOOPBACK)
+		goto out;
+
+	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
+		goto oom;
+
+	/* drop any routing info */
+	dst_release(skb->dst);
+	skb->dst = NULL;
+
+	spkt = (struct sockaddr_pkt*)skb->cb;
+
+	skb_push(skb, skb->data-skb->mac.raw);
+
+	/*
+	 *	The SOCK_PACKET socket receives _all_ frames.
+	 */
+
+	spkt->spkt_family = dev->type;
+	strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
+	spkt->spkt_protocol = skb->protocol;
+
+	/*
+	 *	Charge the memory to the socket. This is done specifically
+	 *	to prevent sockets using all the memory up.
+	 */
+
+	if (sock_queue_rcv_skb(sk,skb) == 0)
+		return 0;
+
+out:
+	kfree_skb(skb);
+oom:
+	return 0;
+}
+
+
+/*
+ *	Output a raw packet to a device layer. This bypasses all the other
+ *	protocol layers and you must therefore supply it with a complete frame
+ */
+ 
+static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
+			       struct msghdr *msg, size_t len)
+{
+	struct sock *sk = sock->sk;
+	struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
+	struct sk_buff *skb;
+	struct net_device *dev;
+	unsigned short proto=0;
+	int err;
+	
+	/*
+	 *	Get and verify the address. 
+	 */
+
+	if (saddr)
+	{
+		if (msg->msg_namelen < sizeof(struct sockaddr))
+			return(-EINVAL);
+		if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
+			proto=saddr->spkt_protocol;
+	}
+	else
+		return(-ENOTCONN);	/* SOCK_PACKET must be sent giving an address */
+
+	/*
+	 *	Find the device first to size check it 
+	 */
+
+	saddr->spkt_device[13] = 0;
+	dev = dev_get_by_name(saddr->spkt_device);
+	err = -ENODEV;
+	if (dev == NULL)
+		goto out_unlock;
+	
+	/*
+	 *	You may not queue a frame bigger than the mtu. This is the lowest level
+	 *	raw protocol and you must do your own fragmentation at this level.
+	 */
+	 
+	err = -EMSGSIZE;
+ 	if(len>dev->mtu+dev->hard_header_len)
+		goto out_unlock;
+
+	err = -ENOBUFS;
+	skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL);
+
+	/*
+	 *	If the write buffer is full, then tough. At this level the user gets to
+	 *	deal with the problem - do your own algorithmic backoffs. That's far
+	 *	more flexible.
+	 */
+	 
+	if (skb == NULL) 
+		goto out_unlock;
+
+	/*
+	 *	Fill it in 
+	 */
+	 
+	/* FIXME: Save some space for broken drivers that write a
+	 * hard header at transmission time by themselves. PPP is the
+	 * notable one here. This should really be fixed at the driver level.
+	 */
+	skb_reserve(skb, LL_RESERVED_SPACE(dev));
+	skb->nh.raw = skb->data;
+
+	/* Try to align data part correctly */
+	if (dev->hard_header) {
+		skb->data -= dev->hard_header_len;
+		skb->tail -= dev->hard_header_len;
+		if (len < dev->hard_header_len)
+			skb->nh.raw = skb->data;
+	}
+
+	/* Returns -EFAULT on error */
+	err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
+	skb->protocol = proto;
+	skb->dev = dev;
+	skb->priority = sk->sk_priority;
+	if (err)
+		goto out_free;
+
+	err = -ENETDOWN;
+	if (!(dev->flags & IFF_UP))
+		goto out_free;
+
+	/*
+	 *	Now send it
+	 */
+
+	dev_queue_xmit(skb);
+	dev_put(dev);
+	return(len);
+
+out_free:
+	kfree_skb(skb);
+out_unlock:
+	if (dev)
+		dev_put(dev);
+	return err;
+}
+#endif
+
+static inline unsigned run_filter(struct sk_buff *skb, struct sock *sk, unsigned res)
+{
+	struct sk_filter *filter;
+
+	bh_lock_sock(sk);
+	filter = sk->sk_filter;
+	/*
+	 * Our caller already checked that filter != NULL but we need to
+	 * verify that under bh_lock_sock() to be safe
+	 */
+	if (likely(filter != NULL))
+		res = sk_run_filter(skb, filter->insns, filter->len);
+	bh_unlock_sock(sk);
+
+	return res;
+}
+
+/*
+   This function makes lazy skb cloning in hope that most of packets
+   are discarded by BPF.
+
+   Note tricky part: we DO mangle shared skb! skb->data, skb->len
+   and skb->cb are mangled. It works because (and until) packets
+   falling here are owned by current CPU. Output packets are cloned
+   by dev_queue_xmit_nit(), input packets are processed by net_bh
+   sequencially, so that if we return skb to original state on exit,
+   we will not harm anyone.
+ */
+
+static int packet_rcv(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt)
+{
+	struct sock *sk;
+	struct sockaddr_ll *sll;
+	struct packet_sock *po;
+	u8 * skb_head = skb->data;
+	int skb_len = skb->len;
+	unsigned snaplen;
+
+	if (skb->pkt_type == PACKET_LOOPBACK)
+		goto drop;
+
+	sk = pt->af_packet_priv;
+	po = pkt_sk(sk);
+
+	skb->dev = dev;
+
+	if (dev->hard_header) {
+		/* The device has an explicit notion of ll header,
+		   exported to higher levels.
+
+		   Otherwise, the device hides datails of it frame
+		   structure, so that corresponding packet head
+		   never delivered to user.
+		 */
+		if (sk->sk_type != SOCK_DGRAM)
+			skb_push(skb, skb->data - skb->mac.raw);
+		else if (skb->pkt_type == PACKET_OUTGOING) {
+			/* Special case: outgoing packets have ll header at head */
+			skb_pull(skb, skb->nh.raw - skb->data);
+		}
+	}
+
+	snaplen = skb->len;
+
+	if (sk->sk_filter) {
+		unsigned res = run_filter(skb, sk, snaplen);
+		if (res == 0)
+			goto drop_n_restore;
+		if (snaplen > res)
+			snaplen = res;
+	}
+
+	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
+	    (unsigned)sk->sk_rcvbuf)
+		goto drop_n_acct;
+
+	if (skb_shared(skb)) {
+		struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
+		if (nskb == NULL)
+			goto drop_n_acct;
+
+		if (skb_head != skb->data) {
+			skb->data = skb_head;
+			skb->len = skb_len;
+		}
+		kfree_skb(skb);
+		skb = nskb;
+	}
+
+	sll = (struct sockaddr_ll*)skb->cb;
+	sll->sll_family = AF_PACKET;
+	sll->sll_hatype = dev->type;
+	sll->sll_protocol = skb->protocol;
+	sll->sll_pkttype = skb->pkt_type;
+	sll->sll_ifindex = dev->ifindex;
+	sll->sll_halen = 0;
+
+	if (dev->hard_header_parse)
+		sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
+
+	if (pskb_trim(skb, snaplen))
+		goto drop_n_acct;
+
+	skb_set_owner_r(skb, sk);
+	skb->dev = NULL;
+	dst_release(skb->dst);
+	skb->dst = NULL;
+
+	spin_lock(&sk->sk_receive_queue.lock);
+	po->stats.tp_packets++;
+	__skb_queue_tail(&sk->sk_receive_queue, skb);
+	spin_unlock(&sk->sk_receive_queue.lock);
+	sk->sk_data_ready(sk, skb->len);
+	return 0;
+
+drop_n_acct:
+	spin_lock(&sk->sk_receive_queue.lock);
+	po->stats.tp_drops++;
+	spin_unlock(&sk->sk_receive_queue.lock);
+
+drop_n_restore:
+	if (skb_head != skb->data && skb_shared(skb)) {
+		skb->data = skb_head;
+		skb->len = skb_len;
+	}
+drop:
+	kfree_skb(skb);
+	return 0;
+}
+
+#ifdef CONFIG_PACKET_MMAP
+static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt)
+{
+	struct sock *sk;
+	struct packet_sock *po;
+	struct sockaddr_ll *sll;
+	struct tpacket_hdr *h;
+	u8 * skb_head = skb->data;
+	int skb_len = skb->len;
+	unsigned snaplen;
+	unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
+	unsigned short macoff, netoff;
+	struct sk_buff *copy_skb = NULL;
+
+	if (skb->pkt_type == PACKET_LOOPBACK)
+		goto drop;
+
+	sk = pt->af_packet_priv;
+	po = pkt_sk(sk);
+
+	if (dev->hard_header) {
+		if (sk->sk_type != SOCK_DGRAM)
+			skb_push(skb, skb->data - skb->mac.raw);
+		else if (skb->pkt_type == PACKET_OUTGOING) {
+			/* Special case: outgoing packets have ll header at head */
+			skb_pull(skb, skb->nh.raw - skb->data);
+			if (skb->ip_summed == CHECKSUM_HW)
+				status |= TP_STATUS_CSUMNOTREADY;
+		}
+	}
+
+	snaplen = skb->len;
+
+	if (sk->sk_filter) {
+		unsigned res = run_filter(skb, sk, snaplen);
+		if (res == 0)
+			goto drop_n_restore;
+		if (snaplen > res)
+			snaplen = res;
+	}
+
+	if (sk->sk_type == SOCK_DGRAM) {
+		macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
+	} else {
+		unsigned maclen = skb->nh.raw - skb->data;
+		netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
+		macoff = netoff - maclen;
+	}
+
+	if (macoff + snaplen > po->frame_size) {
+		if (po->copy_thresh &&
+		    atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
+		    (unsigned)sk->sk_rcvbuf) {
+			if (skb_shared(skb)) {
+				copy_skb = skb_clone(skb, GFP_ATOMIC);
+			} else {
+				copy_skb = skb_get(skb);
+				skb_head = skb->data;
+			}
+			if (copy_skb)
+				skb_set_owner_r(copy_skb, sk);
+		}
+		snaplen = po->frame_size - macoff;
+		if ((int)snaplen < 0)
+			snaplen = 0;
+	}
+	if (snaplen > skb->len-skb->data_len)
+		snaplen = skb->len-skb->data_len;
+
+	spin_lock(&sk->sk_receive_queue.lock);
+	h = (struct tpacket_hdr *)packet_lookup_frame(po, po->head);
+	
+	if (h->tp_status)
+		goto ring_is_full;
+	po->head = po->head != po->frame_max ? po->head+1 : 0;
+	po->stats.tp_packets++;
+	if (copy_skb) {
+		status |= TP_STATUS_COPY;
+		__skb_queue_tail(&sk->sk_receive_queue, copy_skb);
+	}
+	if (!po->stats.tp_drops)
+		status &= ~TP_STATUS_LOSING;
+	spin_unlock(&sk->sk_receive_queue.lock);
+
+	memcpy((u8*)h + macoff, skb->data, snaplen);
+
+	h->tp_len = skb->len;
+	h->tp_snaplen = snaplen;
+	h->tp_mac = macoff;
+	h->tp_net = netoff;
+	if (skb->stamp.tv_sec == 0) { 
+		do_gettimeofday(&skb->stamp);
+		sock_enable_timestamp(sk);
+	}
+	h->tp_sec = skb->stamp.tv_sec;
+	h->tp_usec = skb->stamp.tv_usec;
+
+	sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
+	sll->sll_halen = 0;
+	if (dev->hard_header_parse)
+		sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
+	sll->sll_family = AF_PACKET;
+	sll->sll_hatype = dev->type;
+	sll->sll_protocol = skb->protocol;
+	sll->sll_pkttype = skb->pkt_type;
+	sll->sll_ifindex = dev->ifindex;
+
+	h->tp_status = status;
+	mb();
+
+	{
+		struct page *p_start, *p_end;
+		u8 *h_end = (u8 *)h + macoff + snaplen - 1;
+
+		p_start = virt_to_page(h);
+		p_end = virt_to_page(h_end);
+		while (p_start <= p_end) {
+			flush_dcache_page(p_start);
+			p_start++;
+		}
+	}
+
+	sk->sk_data_ready(sk, 0);
+
+drop_n_restore:
+	if (skb_head != skb->data && skb_shared(skb)) {
+		skb->data = skb_head;
+		skb->len = skb_len;
+	}
+drop:
+        kfree_skb(skb);
+	return 0;
+
+ring_is_full:
+	po->stats.tp_drops++;
+	spin_unlock(&sk->sk_receive_queue.lock);
+
+	sk->sk_data_ready(sk, 0);
+	if (copy_skb)
+		kfree_skb(copy_skb);
+	goto drop_n_restore;
+}
+
+#endif
+
+
+static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
+			  struct msghdr *msg, size_t len)
+{
+	struct sock *sk = sock->sk;
+	struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
+	struct sk_buff *skb;
+	struct net_device *dev;
+	unsigned short proto;
+	unsigned char *addr;
+	int ifindex, err, reserve = 0;
+
+	/*
+	 *	Get and verify the address. 
+	 */
+	 
+	if (saddr == NULL) {
+		struct packet_sock *po = pkt_sk(sk);
+
+		ifindex	= po->ifindex;
+		proto	= po->num;
+		addr	= NULL;
+	} else {
+		err = -EINVAL;
+		if (msg->msg_namelen < sizeof(struct sockaddr_ll))
+			goto out;
+		ifindex	= saddr->sll_ifindex;
+		proto	= saddr->sll_protocol;
+		addr	= saddr->sll_addr;
+	}
+
+
+	dev = dev_get_by_index(ifindex);
+	err = -ENXIO;
+	if (dev == NULL)
+		goto out_unlock;
+	if (sock->type == SOCK_RAW)
+		reserve = dev->hard_header_len;
+
+	err = -EMSGSIZE;
+	if (len > dev->mtu+reserve)
+		goto out_unlock;
+
+	skb = sock_alloc_send_skb(sk, len + LL_RESERVED_SPACE(dev),
+				msg->msg_flags & MSG_DONTWAIT, &err);
+	if (skb==NULL)
+		goto out_unlock;
+
+	skb_reserve(skb, LL_RESERVED_SPACE(dev));
+	skb->nh.raw = skb->data;
+
+	if (dev->hard_header) {
+		int res;
+		err = -EINVAL;
+		res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
+		if (sock->type != SOCK_DGRAM) {
+			skb->tail = skb->data;
+			skb->len = 0;
+		} else if (res < 0)
+			goto out_free;
+	}
+
+	/* Returns -EFAULT on error */
+	err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
+	if (err)
+		goto out_free;
+
+	skb->protocol = proto;
+	skb->dev = dev;
+	skb->priority = sk->sk_priority;
+
+	err = -ENETDOWN;
+	if (!(dev->flags & IFF_UP))
+		goto out_free;
+
+	/*
+	 *	Now send it
+	 */
+
+	err = dev_queue_xmit(skb);
+	if (err > 0 && (err = net_xmit_errno(err)) != 0)
+		goto out_unlock;
+
+	dev_put(dev);
+
+	return(len);
+
+out_free:
+	kfree_skb(skb);
+out_unlock:
+	if (dev)
+		dev_put(dev);
+out:
+	return err;
+}
+
+/*
+ *	Close a PACKET socket. This is fairly simple. We immediately go
+ *	to 'closed' state and remove our protocol entry in the device list.
+ */
+
+static int packet_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	struct packet_sock *po;
+
+	if (!sk)
+		return 0;
+
+	po = pkt_sk(sk);
+
+	write_lock_bh(&packet_sklist_lock);
+	sk_del_node_init(sk);
+	write_unlock_bh(&packet_sklist_lock);
+
+	/*
+	 *	Unhook packet receive handler.
+	 */
+
+	if (po->running) {
+		/*
+		 *	Remove the protocol hook
+		 */
+		dev_remove_pack(&po->prot_hook);
+		po->running = 0;
+		po->num = 0;
+		__sock_put(sk);
+	}
+
+#ifdef CONFIG_PACKET_MULTICAST
+	packet_flush_mclist(sk);
+#endif
+
+#ifdef CONFIG_PACKET_MMAP
+	if (po->pg_vec) {
+		struct tpacket_req req;
+		memset(&req, 0, sizeof(req));
+		packet_set_ring(sk, &req, 1);
+	}
+#endif
+
+	/*
+	 *	Now the socket is dead. No more input will appear.
+	 */
+
+	sock_orphan(sk);
+	sock->sk = NULL;
+
+	/* Purge queues */
+
+	skb_queue_purge(&sk->sk_receive_queue);
+
+	sock_put(sk);
+	return 0;
+}
+
+/*
+ *	Attach a packet hook.
+ */
+
+static int packet_do_bind(struct sock *sk, struct net_device *dev, int protocol)
+{
+	struct packet_sock *po = pkt_sk(sk);
+	/*
+	 *	Detach an existing hook if present.
+	 */
+
+	lock_sock(sk);
+
+	spin_lock(&po->bind_lock);
+	if (po->running) {
+		__sock_put(sk);
+		po->running = 0;
+		po->num = 0;
+		spin_unlock(&po->bind_lock);
+		dev_remove_pack(&po->prot_hook);
+		spin_lock(&po->bind_lock);
+	}
+
+	po->num = protocol;
+	po->prot_hook.type = protocol;
+	po->prot_hook.dev = dev;
+
+	po->ifindex = dev ? dev->ifindex : 0;
+
+	if (protocol == 0)
+		goto out_unlock;
+
+	if (dev) {
+		if (dev->flags&IFF_UP) {
+			dev_add_pack(&po->prot_hook);
+			sock_hold(sk);
+			po->running = 1;
+		} else {
+			sk->sk_err = ENETDOWN;
+			if (!sock_flag(sk, SOCK_DEAD))
+				sk->sk_error_report(sk);
+		}
+	} else {
+		dev_add_pack(&po->prot_hook);
+		sock_hold(sk);
+		po->running = 1;
+	}
+
+out_unlock:
+	spin_unlock(&po->bind_lock);
+	release_sock(sk);
+	return 0;
+}
+
+/*
+ *	Bind a packet socket to a device
+ */
+
+#ifdef CONFIG_SOCK_PACKET
+
+static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+{
+	struct sock *sk=sock->sk;
+	char name[15];
+	struct net_device *dev;
+	int err = -ENODEV;
+	
+	/*
+	 *	Check legality
+	 */
+	 
+	if(addr_len!=sizeof(struct sockaddr))
+		return -EINVAL;
+	strlcpy(name,uaddr->sa_data,sizeof(name));
+
+	dev = dev_get_by_name(name);
+	if (dev) {
+		err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
+		dev_put(dev);
+	}
+	return err;
+}
+#endif
+
+static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+{
+	struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
+	struct sock *sk=sock->sk;
+	struct net_device *dev = NULL;
+	int err;
+
+
+	/*
+	 *	Check legality
+	 */
+	 
+	if (addr_len < sizeof(struct sockaddr_ll))
+		return -EINVAL;
+	if (sll->sll_family != AF_PACKET)
+		return -EINVAL;
+
+	if (sll->sll_ifindex) {
+		err = -ENODEV;
+		dev = dev_get_by_index(sll->sll_ifindex);
+		if (dev == NULL)
+			goto out;
+	}
+	err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
+	if (dev)
+		dev_put(dev);
+
+out:
+	return err;
+}
+
+static struct proto packet_proto = {
+	.name	  = "PACKET",
+	.owner	  = THIS_MODULE,
+	.obj_size = sizeof(struct packet_sock),
+};
+
+/*
+ *	Create a packet of type SOCK_PACKET. 
+ */
+
+static int packet_create(struct socket *sock, int protocol)
+{
+	struct sock *sk;
+	struct packet_sock *po;
+	int err;
+
+	if (!capable(CAP_NET_RAW))
+		return -EPERM;
+	if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW
+#ifdef CONFIG_SOCK_PACKET
+	    && sock->type != SOCK_PACKET
+#endif
+	    )
+		return -ESOCKTNOSUPPORT;
+
+	sock->state = SS_UNCONNECTED;
+
+	err = -ENOBUFS;
+	sk = sk_alloc(PF_PACKET, GFP_KERNEL, &packet_proto, 1);
+	if (sk == NULL)
+		goto out;
+
+	sock->ops = &packet_ops;
+#ifdef CONFIG_SOCK_PACKET
+	if (sock->type == SOCK_PACKET)
+		sock->ops = &packet_ops_spkt;
+#endif
+	sock_init_data(sock, sk);
+
+	po = pkt_sk(sk);
+	sk->sk_family = PF_PACKET;
+	po->num = protocol;
+
+	sk->sk_destruct = packet_sock_destruct;
+	atomic_inc(&packet_socks_nr);
+
+	/*
+	 *	Attach a protocol block
+	 */
+
+	spin_lock_init(&po->bind_lock);
+	po->prot_hook.func = packet_rcv;
+#ifdef CONFIG_SOCK_PACKET
+	if (sock->type == SOCK_PACKET)
+		po->prot_hook.func = packet_rcv_spkt;
+#endif
+	po->prot_hook.af_packet_priv = sk;
+
+	if (protocol) {
+		po->prot_hook.type = protocol;
+		dev_add_pack(&po->prot_hook);
+		sock_hold(sk);
+		po->running = 1;
+	}
+
+	write_lock_bh(&packet_sklist_lock);
+	sk_add_node(sk, &packet_sklist);
+	write_unlock_bh(&packet_sklist_lock);
+	return(0);
+out:
+	return err;
+}
+
+/*
+ *	Pull a packet from our receive queue and hand it to the user.
+ *	If necessary we block.
+ */
+
+static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
+			  struct msghdr *msg, size_t len, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct sk_buff *skb;
+	int copied, err;
+
+	err = -EINVAL;
+	if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
+		goto out;
+
+#if 0
+	/* What error should we return now? EUNATTACH? */
+	if (pkt_sk(sk)->ifindex < 0)
+		return -ENODEV;
+#endif
+
+	/*
+	 *	If the address length field is there to be filled in, we fill
+	 *	it in now.
+	 */
+
+	if (sock->type == SOCK_PACKET)
+		msg->msg_namelen = sizeof(struct sockaddr_pkt);
+	else
+		msg->msg_namelen = sizeof(struct sockaddr_ll);
+
+	/*
+	 *	Call the generic datagram receiver. This handles all sorts
+	 *	of horrible races and re-entrancy so we can forget about it
+	 *	in the protocol layers.
+	 *
+	 *	Now it will return ENETDOWN, if device have just gone down,
+	 *	but then it will block.
+	 */
+
+	skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
+
+	/*
+	 *	An error occurred so return it. Because skb_recv_datagram() 
+	 *	handles the blocking we don't see and worry about blocking
+	 *	retries.
+	 */
+
+	if(skb==NULL)
+		goto out;
+
+	/*
+	 *	You lose any data beyond the buffer you gave. If it worries a
+	 *	user program they can ask the device for its MTU anyway.
+	 */
+
+	copied = skb->len;
+	if (copied > len)
+	{
+		copied=len;
+		msg->msg_flags|=MSG_TRUNC;
+	}
+
+	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	if (err)
+		goto out_free;
+
+	sock_recv_timestamp(msg, sk, skb);
+
+	if (msg->msg_name)
+		memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
+
+	/*
+	 *	Free or return the buffer as appropriate. Again this
+	 *	hides all the races and re-entrancy issues from us.
+	 */
+	err = (flags&MSG_TRUNC) ? skb->len : copied;
+
+out_free:
+	skb_free_datagram(sk, skb);
+out:
+	return err;
+}
+
+#ifdef CONFIG_SOCK_PACKET
+static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
+			       int *uaddr_len, int peer)
+{
+	struct net_device *dev;
+	struct sock *sk	= sock->sk;
+
+	if (peer)
+		return -EOPNOTSUPP;
+
+	uaddr->sa_family = AF_PACKET;
+	dev = dev_get_by_index(pkt_sk(sk)->ifindex);
+	if (dev) {
+		strlcpy(uaddr->sa_data, dev->name, 15);
+		dev_put(dev);
+	} else
+		memset(uaddr->sa_data, 0, 14);
+	*uaddr_len = sizeof(*uaddr);
+
+	return 0;
+}
+#endif
+
+static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
+			  int *uaddr_len, int peer)
+{
+	struct net_device *dev;
+	struct sock *sk = sock->sk;
+	struct packet_sock *po = pkt_sk(sk);
+	struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
+
+	if (peer)
+		return -EOPNOTSUPP;
+
+	sll->sll_family = AF_PACKET;
+	sll->sll_ifindex = po->ifindex;
+	sll->sll_protocol = po->num;
+	dev = dev_get_by_index(po->ifindex);
+	if (dev) {
+		sll->sll_hatype = dev->type;
+		sll->sll_halen = dev->addr_len;
+		memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
+		dev_put(dev);
+	} else {
+		sll->sll_hatype = 0;	/* Bad: we have no ARPHRD_UNSPEC */
+		sll->sll_halen = 0;
+	}
+	*uaddr_len = sizeof(*sll);
+
+	return 0;
+}
+
+#ifdef CONFIG_PACKET_MULTICAST
+static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what)
+{
+	switch (i->type) {
+	case PACKET_MR_MULTICAST:
+		if (what > 0)
+			dev_mc_add(dev, i->addr, i->alen, 0);
+		else
+			dev_mc_delete(dev, i->addr, i->alen, 0);
+		break;
+	case PACKET_MR_PROMISC:
+		dev_set_promiscuity(dev, what);
+		break;
+	case PACKET_MR_ALLMULTI:
+		dev_set_allmulti(dev, what);
+		break;
+	default:;
+	}
+}
+
+static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
+{
+	for ( ; i; i=i->next) {
+		if (i->ifindex == dev->ifindex)
+			packet_dev_mc(dev, i, what);
+	}
+}
+
+static int packet_mc_add(struct sock *sk, struct packet_mreq *mreq)
+{
+	struct packet_sock *po = pkt_sk(sk);
+	struct packet_mclist *ml, *i;
+	struct net_device *dev;
+	int err;
+
+	rtnl_lock();
+
+	err = -ENODEV;
+	dev = __dev_get_by_index(mreq->mr_ifindex);
+	if (!dev)
+		goto done;
+
+	err = -EINVAL;
+	if (mreq->mr_alen > dev->addr_len)
+		goto done;
+
+	err = -ENOBUFS;
+	i = (struct packet_mclist *)kmalloc(sizeof(*i), GFP_KERNEL);
+	if (i == NULL)
+		goto done;
+
+	err = 0;
+	for (ml = po->mclist; ml; ml = ml->next) {
+		if (ml->ifindex == mreq->mr_ifindex &&
+		    ml->type == mreq->mr_type &&
+		    ml->alen == mreq->mr_alen &&
+		    memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
+			ml->count++;
+			/* Free the new element ... */
+			kfree(i);
+			goto done;
+		}
+	}
+
+	i->type = mreq->mr_type;
+	i->ifindex = mreq->mr_ifindex;
+	i->alen = mreq->mr_alen;
+	memcpy(i->addr, mreq->mr_address, i->alen);
+	i->count = 1;
+	i->next = po->mclist;
+	po->mclist = i;
+	packet_dev_mc(dev, i, +1);
+
+done:
+	rtnl_unlock();
+	return err;
+}
+
+static int packet_mc_drop(struct sock *sk, struct packet_mreq *mreq)
+{
+	struct packet_mclist *ml, **mlp;
+
+	rtnl_lock();
+
+	for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
+		if (ml->ifindex == mreq->mr_ifindex &&
+		    ml->type == mreq->mr_type &&
+		    ml->alen == mreq->mr_alen &&
+		    memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
+			if (--ml->count == 0) {
+				struct net_device *dev;
+				*mlp = ml->next;
+				dev = dev_get_by_index(ml->ifindex);
+				if (dev) {
+					packet_dev_mc(dev, ml, -1);
+					dev_put(dev);
+				}
+				kfree(ml);
+			}
+			rtnl_unlock();
+			return 0;
+		}
+	}
+	rtnl_unlock();
+	return -EADDRNOTAVAIL;
+}
+
+static void packet_flush_mclist(struct sock *sk)
+{
+	struct packet_sock *po = pkt_sk(sk);
+	struct packet_mclist *ml;
+
+	if (!po->mclist)
+		return;
+
+	rtnl_lock();
+	while ((ml = po->mclist) != NULL) {
+		struct net_device *dev;
+
+		po->mclist = ml->next;
+		if ((dev = dev_get_by_index(ml->ifindex)) != NULL) {
+			packet_dev_mc(dev, ml, -1);
+			dev_put(dev);
+		}
+		kfree(ml);
+	}
+	rtnl_unlock();
+}
+#endif
+
+static int
+packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
+{
+	struct sock *sk = sock->sk;
+	int ret;
+
+	if (level != SOL_PACKET)
+		return -ENOPROTOOPT;
+
+	switch(optname)	{
+#ifdef CONFIG_PACKET_MULTICAST
+	case PACKET_ADD_MEMBERSHIP:	
+	case PACKET_DROP_MEMBERSHIP:
+	{
+		struct packet_mreq mreq;
+		if (optlen<sizeof(mreq))
+			return -EINVAL;
+		if (copy_from_user(&mreq,optval,sizeof(mreq)))
+			return -EFAULT;
+		if (optname == PACKET_ADD_MEMBERSHIP)
+			ret = packet_mc_add(sk, &mreq);
+		else
+			ret = packet_mc_drop(sk, &mreq);
+		return ret;
+	}
+#endif
+#ifdef CONFIG_PACKET_MMAP
+	case PACKET_RX_RING:
+	{
+		struct tpacket_req req;
+
+		if (optlen<sizeof(req))
+			return -EINVAL;
+		if (copy_from_user(&req,optval,sizeof(req)))
+			return -EFAULT;
+		return packet_set_ring(sk, &req, 0);
+	}
+	case PACKET_COPY_THRESH:
+	{
+		int val;
+
+		if (optlen!=sizeof(val))
+			return -EINVAL;
+		if (copy_from_user(&val,optval,sizeof(val)))
+			return -EFAULT;
+
+		pkt_sk(sk)->copy_thresh = val;
+		return 0;
+	}
+#endif
+	default:
+		return -ENOPROTOOPT;
+	}
+}
+
+static int packet_getsockopt(struct socket *sock, int level, int optname,
+			     char __user *optval, int __user *optlen)
+{
+	int len;
+	struct sock *sk = sock->sk;
+	struct packet_sock *po = pkt_sk(sk);
+
+	if (level != SOL_PACKET)
+		return -ENOPROTOOPT;
+
+  	if (get_user(len,optlen))
+  		return -EFAULT;
+
+	if (len < 0)
+		return -EINVAL;
+		
+	switch(optname)	{
+	case PACKET_STATISTICS:
+	{
+		struct tpacket_stats st;
+
+		if (len > sizeof(struct tpacket_stats))
+			len = sizeof(struct tpacket_stats);
+		spin_lock_bh(&sk->sk_receive_queue.lock);
+		st = po->stats;
+		memset(&po->stats, 0, sizeof(st));
+		spin_unlock_bh(&sk->sk_receive_queue.lock);
+		st.tp_packets += st.tp_drops;
+
+		if (copy_to_user(optval, &st, len))
+			return -EFAULT;
+		break;
+	}
+	default:
+		return -ENOPROTOOPT;
+	}
+
+  	if (put_user(len, optlen))
+  		return -EFAULT;
+  	return 0;
+}
+
+
+static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
+{
+	struct sock *sk;
+	struct hlist_node *node;
+	struct net_device *dev = (struct net_device*)data;
+
+	read_lock(&packet_sklist_lock);
+	sk_for_each(sk, node, &packet_sklist) {
+		struct packet_sock *po = pkt_sk(sk);
+
+		switch (msg) {
+		case NETDEV_UNREGISTER:
+#ifdef CONFIG_PACKET_MULTICAST
+			if (po->mclist)
+				packet_dev_mclist(dev, po->mclist, -1);
+			// fallthrough
+#endif
+		case NETDEV_DOWN:
+			if (dev->ifindex == po->ifindex) {
+				spin_lock(&po->bind_lock);
+				if (po->running) {
+					__dev_remove_pack(&po->prot_hook);
+					__sock_put(sk);
+					po->running = 0;
+					sk->sk_err = ENETDOWN;
+					if (!sock_flag(sk, SOCK_DEAD))
+						sk->sk_error_report(sk);
+				}
+				if (msg == NETDEV_UNREGISTER) {
+					po->ifindex = -1;
+					po->prot_hook.dev = NULL;
+				}
+				spin_unlock(&po->bind_lock);
+			}
+			break;
+		case NETDEV_UP:
+			spin_lock(&po->bind_lock);
+			if (dev->ifindex == po->ifindex && po->num &&
+			    !po->running) {
+				dev_add_pack(&po->prot_hook);
+				sock_hold(sk);
+				po->running = 1;
+			}
+			spin_unlock(&po->bind_lock);
+			break;
+		}
+	}
+	read_unlock(&packet_sklist_lock);
+	return NOTIFY_DONE;
+}
+
+
+static int packet_ioctl(struct socket *sock, unsigned int cmd,
+			unsigned long arg)
+{
+	struct sock *sk = sock->sk;
+
+	switch(cmd) {
+		case SIOCOUTQ:
+		{
+			int amount = atomic_read(&sk->sk_wmem_alloc);
+			return put_user(amount, (int __user *)arg);
+		}
+		case SIOCINQ:
+		{
+			struct sk_buff *skb;
+			int amount = 0;
+
+			spin_lock_bh(&sk->sk_receive_queue.lock);
+			skb = skb_peek(&sk->sk_receive_queue);
+			if (skb)
+				amount = skb->len;
+			spin_unlock_bh(&sk->sk_receive_queue.lock);
+			return put_user(amount, (int __user *)arg);
+		}
+		case SIOCGSTAMP:
+			return sock_get_timestamp(sk, (struct timeval __user *)arg);
+			
+#ifdef CONFIG_INET
+		case SIOCADDRT:
+		case SIOCDELRT:
+		case SIOCDARP:
+		case SIOCGARP:
+		case SIOCSARP:
+		case SIOCGIFADDR:
+		case SIOCSIFADDR:
+		case SIOCGIFBRDADDR:
+		case SIOCSIFBRDADDR:
+		case SIOCGIFNETMASK:
+		case SIOCSIFNETMASK:
+		case SIOCGIFDSTADDR:
+		case SIOCSIFDSTADDR:
+		case SIOCSIFFLAGS:
+			return inet_dgram_ops.ioctl(sock, cmd, arg);
+#endif
+
+		default:
+			return dev_ioctl(cmd, (void __user *)arg);
+	}
+	return 0;
+}
+
+#ifndef CONFIG_PACKET_MMAP
+#define packet_mmap sock_no_mmap
+#define packet_poll datagram_poll
+#else
+
+static unsigned int packet_poll(struct file * file, struct socket *sock,
+				poll_table *wait)
+{
+	struct sock *sk = sock->sk;
+	struct packet_sock *po = pkt_sk(sk);
+	unsigned int mask = datagram_poll(file, sock, wait);
+
+	spin_lock_bh(&sk->sk_receive_queue.lock);
+	if (po->pg_vec) {
+		unsigned last = po->head ? po->head-1 : po->frame_max;
+		struct tpacket_hdr *h;
+
+		h = (struct tpacket_hdr *)packet_lookup_frame(po, last);
+
+		if (h->tp_status)
+			mask |= POLLIN | POLLRDNORM;
+	}
+	spin_unlock_bh(&sk->sk_receive_queue.lock);
+	return mask;
+}
+
+
+/* Dirty? Well, I still did not learn better way to account
+ * for user mmaps.
+ */
+
+static void packet_mm_open(struct vm_area_struct *vma)
+{
+	struct file *file = vma->vm_file;
+	struct inode *inode = file->f_dentry->d_inode;
+	struct socket * sock = SOCKET_I(inode);
+	struct sock *sk = sock->sk;
+	
+	if (sk)
+		atomic_inc(&pkt_sk(sk)->mapped);
+}
+
+static void packet_mm_close(struct vm_area_struct *vma)
+{
+	struct file *file = vma->vm_file;
+	struct inode *inode = file->f_dentry->d_inode;
+	struct socket * sock = SOCKET_I(inode);
+	struct sock *sk = sock->sk;
+	
+	if (sk)
+		atomic_dec(&pkt_sk(sk)->mapped);
+}
+
+static struct vm_operations_struct packet_mmap_ops = {
+	.open =	packet_mm_open,
+	.close =packet_mm_close,
+};
+
+static inline struct page *pg_vec_endpage(char *one_pg_vec, unsigned int order)
+{
+	return virt_to_page(one_pg_vec + (PAGE_SIZE << order) - 1);
+}
+
+static void free_pg_vec(char **pg_vec, unsigned order, unsigned len)
+{
+	int i;
+
+	for (i=0; i<len; i++) {
+		if (pg_vec[i]) {
+			struct page *page, *pend;
+
+			pend = pg_vec_endpage(pg_vec[i], order);
+			for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
+				ClearPageReserved(page);
+			free_pages((unsigned long)pg_vec[i], order);
+		}
+	}
+	kfree(pg_vec);
+}
+
+
+static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
+{
+	char **pg_vec = NULL;
+	struct packet_sock *po = pkt_sk(sk);
+	int was_running, num, order = 0;
+	int err = 0;
+	
+	if (req->tp_block_nr) {
+		int i, l;
+
+		/* Sanity tests and some calculations */
+
+		if (po->pg_vec)
+			return -EBUSY;
+
+		if ((int)req->tp_block_size <= 0)
+			return -EINVAL;
+		if (req->tp_block_size&(PAGE_SIZE-1))
+			return -EINVAL;
+		if (req->tp_frame_size < TPACKET_HDRLEN)
+			return -EINVAL;
+		if (req->tp_frame_size&(TPACKET_ALIGNMENT-1))
+			return -EINVAL;
+
+		po->frames_per_block = req->tp_block_size/req->tp_frame_size;
+		if (po->frames_per_block <= 0)
+			return -EINVAL;
+		if (po->frames_per_block*req->tp_block_nr != req->tp_frame_nr)
+			return -EINVAL;
+		/* OK! */
+
+		/* Allocate page vector */
+		while ((PAGE_SIZE<<order) < req->tp_block_size)
+			order++;
+
+		err = -ENOMEM;
+
+		pg_vec = kmalloc(req->tp_block_nr*sizeof(char *), GFP_KERNEL);
+		if (pg_vec == NULL)
+			goto out;
+		memset(pg_vec, 0, req->tp_block_nr*sizeof(char **));
+
+		for (i=0; i<req->tp_block_nr; i++) {
+			struct page *page, *pend;
+			pg_vec[i] = (char *)__get_free_pages(GFP_KERNEL, order);
+			if (!pg_vec[i])
+				goto out_free_pgvec;
+
+			pend = pg_vec_endpage(pg_vec[i], order);
+			for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
+				SetPageReserved(page);
+		}
+		/* Page vector is allocated */
+
+		l = 0;
+		for (i=0; i<req->tp_block_nr; i++) {
+			char *ptr = pg_vec[i];
+			struct tpacket_hdr *header;
+			int k;
+
+			for (k=0; k<po->frames_per_block; k++) {
+				
+				header = (struct tpacket_hdr*)ptr;
+				header->tp_status = TP_STATUS_KERNEL;
+				ptr += req->tp_frame_size;
+			}
+		}
+		/* Done */
+	} else {
+		if (req->tp_frame_nr)
+			return -EINVAL;
+	}
+
+	lock_sock(sk);
+
+	/* Detach socket from network */
+	spin_lock(&po->bind_lock);
+	was_running = po->running;
+	num = po->num;
+	if (was_running) {
+		__dev_remove_pack(&po->prot_hook);
+		po->num = 0;
+		po->running = 0;
+		__sock_put(sk);
+	}
+	spin_unlock(&po->bind_lock);
+		
+	synchronize_net();
+
+	err = -EBUSY;
+	if (closing || atomic_read(&po->mapped) == 0) {
+		err = 0;
+#define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
+
+		spin_lock_bh(&sk->sk_receive_queue.lock);
+		pg_vec = XC(po->pg_vec, pg_vec);
+		po->frame_max = req->tp_frame_nr-1;
+		po->head = 0;
+		po->frame_size = req->tp_frame_size;
+		spin_unlock_bh(&sk->sk_receive_queue.lock);
+
+		order = XC(po->pg_vec_order, order);
+		req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
+
+		po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
+		po->prot_hook.func = po->pg_vec ? tpacket_rcv : packet_rcv;
+		skb_queue_purge(&sk->sk_receive_queue);
+#undef XC
+		if (atomic_read(&po->mapped))
+			printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
+	}
+
+	spin_lock(&po->bind_lock);
+	if (was_running && !po->running) {
+		sock_hold(sk);
+		po->running = 1;
+		po->num = num;
+		dev_add_pack(&po->prot_hook);
+	}
+	spin_unlock(&po->bind_lock);
+
+	release_sock(sk);
+
+out_free_pgvec:
+	if (pg_vec)
+		free_pg_vec(pg_vec, order, req->tp_block_nr);
+out:
+	return err;
+}
+
+static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
+{
+	struct sock *sk = sock->sk;
+	struct packet_sock *po = pkt_sk(sk);
+	unsigned long size;
+	unsigned long start;
+	int err = -EINVAL;
+	int i;
+
+	if (vma->vm_pgoff)
+		return -EINVAL;
+
+	size = vma->vm_end - vma->vm_start;
+
+	lock_sock(sk);
+	if (po->pg_vec == NULL)
+		goto out;
+	if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
+		goto out;
+
+	atomic_inc(&po->mapped);
+	start = vma->vm_start;
+	err = -EAGAIN;
+	for (i=0; i<po->pg_vec_len; i++) {
+		if (remap_pfn_range(vma, start,
+				     __pa(po->pg_vec[i]) >> PAGE_SHIFT,
+				     po->pg_vec_pages*PAGE_SIZE,
+				     vma->vm_page_prot))
+			goto out;
+		start += po->pg_vec_pages*PAGE_SIZE;
+	}
+	vma->vm_ops = &packet_mmap_ops;
+	err = 0;
+
+out:
+	release_sock(sk);
+	return err;
+}
+#endif
+
+
+#ifdef CONFIG_SOCK_PACKET
+static struct proto_ops packet_ops_spkt = {
+	.family =	PF_PACKET,
+	.owner =	THIS_MODULE,
+	.release =	packet_release,
+	.bind =		packet_bind_spkt,
+	.connect =	sock_no_connect,
+	.socketpair =	sock_no_socketpair,
+	.accept =	sock_no_accept,
+	.getname =	packet_getname_spkt,
+	.poll =		datagram_poll,
+	.ioctl =	packet_ioctl,
+	.listen =	sock_no_listen,
+	.shutdown =	sock_no_shutdown,
+	.setsockopt =	sock_no_setsockopt,
+	.getsockopt =	sock_no_getsockopt,
+	.sendmsg =	packet_sendmsg_spkt,
+	.recvmsg =	packet_recvmsg,
+	.mmap =		sock_no_mmap,
+	.sendpage =	sock_no_sendpage,
+};
+#endif
+
+static struct proto_ops packet_ops = {
+	.family =	PF_PACKET,
+	.owner =	THIS_MODULE,
+	.release =	packet_release,
+	.bind =		packet_bind,
+	.connect =	sock_no_connect,
+	.socketpair =	sock_no_socketpair,
+	.accept =	sock_no_accept,
+	.getname =	packet_getname, 
+	.poll =		packet_poll,
+	.ioctl =	packet_ioctl,
+	.listen =	sock_no_listen,
+	.shutdown =	sock_no_shutdown,
+	.setsockopt =	packet_setsockopt,
+	.getsockopt =	packet_getsockopt,
+	.sendmsg =	packet_sendmsg,
+	.recvmsg =	packet_recvmsg,
+	.mmap =		packet_mmap,
+	.sendpage =	sock_no_sendpage,
+};
+
+static struct net_proto_family packet_family_ops = {
+	.family =	PF_PACKET,
+	.create =	packet_create,
+	.owner	=	THIS_MODULE,
+};
+
+static struct notifier_block packet_netdev_notifier = {
+	.notifier_call =packet_notifier,
+};
+
+#ifdef CONFIG_PROC_FS
+static inline struct sock *packet_seq_idx(loff_t off)
+{
+	struct sock *s;
+	struct hlist_node *node;
+
+	sk_for_each(s, node, &packet_sklist) {
+		if (!off--)
+			return s;
+	}
+	return NULL;
+}
+
+static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	read_lock(&packet_sklist_lock);
+	return *pos ? packet_seq_idx(*pos - 1) : SEQ_START_TOKEN;
+}
+
+static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	++*pos;
+	return  (v == SEQ_START_TOKEN) 
+		? sk_head(&packet_sklist) 
+		: sk_next((struct sock*)v) ;
+}
+
+static void packet_seq_stop(struct seq_file *seq, void *v)
+{
+	read_unlock(&packet_sklist_lock);		
+}
+
+static int packet_seq_show(struct seq_file *seq, void *v) 
+{
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq, "sk       RefCnt Type Proto  Iface R Rmem   User   Inode\n");
+	else {
+		struct sock *s = v;
+		const struct packet_sock *po = pkt_sk(s);
+
+		seq_printf(seq,
+			   "%p %-6d %-4d %04x   %-5d %1d %-6u %-6u %-6lu\n",
+			   s,
+			   atomic_read(&s->sk_refcnt),
+			   s->sk_type,
+			   ntohs(po->num),
+			   po->ifindex,
+			   po->running,
+			   atomic_read(&s->sk_rmem_alloc),
+			   sock_i_uid(s),
+			   sock_i_ino(s) );
+	}
+
+	return 0;
+}
+
+static struct seq_operations packet_seq_ops = {
+	.start	= packet_seq_start,
+	.next	= packet_seq_next,
+	.stop	= packet_seq_stop,
+	.show	= packet_seq_show,
+};
+
+static int packet_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &packet_seq_ops);
+}
+
+static struct file_operations packet_seq_fops = {
+	.owner		= THIS_MODULE,
+	.open		= packet_seq_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+#endif
+
+static void __exit packet_exit(void)
+{
+	proc_net_remove("packet");
+	unregister_netdevice_notifier(&packet_netdev_notifier);
+	sock_unregister(PF_PACKET);
+	proto_unregister(&packet_proto);
+}
+
+static int __init packet_init(void)
+{
+	int rc = proto_register(&packet_proto, 0);
+
+	if (rc != 0)
+		goto out;
+
+	sock_register(&packet_family_ops);
+	register_netdevice_notifier(&packet_netdev_notifier);
+	proc_net_fops_create("packet", 0, &packet_seq_fops);
+out:
+	return rc;
+}
+
+module_init(packet_init);
+module_exit(packet_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETPROTO(PF_PACKET);
diff --git a/net/rose/Makefile b/net/rose/Makefile
new file mode 100644
index 00000000000..fa248116fd5
--- /dev/null
+++ b/net/rose/Makefile
@@ -0,0 +1,9 @@
+#
+# Makefile for the Linux Rose (X.25 PLP) layer.
+#
+
+obj-$(CONFIG_ROSE) += rose.o
+
+rose-y	  := af_rose.o rose_dev.o rose_in.o rose_link.o rose_loopback.o \
+	     rose_out.o rose_route.o rose_subr.o rose_timer.o
+rose-$(CONFIG_SYSCTL) += sysctl_net_rose.o
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
new file mode 100644
index 00000000000..7eb6a5bf93e
--- /dev/null
+++ b/net/rose/af_rose.c
@@ -0,0 +1,1589 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
+ * Copyright (C) Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk)
+ * Copyright (C) Terry Dawson VK2KTJ (terry@animats.net)
+ * Copyright (C) Tomi Manninen OH2BNS (oh2bns@sral.fi)
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/stat.h>
+#include <net/ax25.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <linux/fcntl.h>
+#include <linux/termios.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <net/rose.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <net/tcp.h>
+#include <net/ip.h>
+#include <net/arp.h>
+
+static int rose_ndevs = 10;
+
+int sysctl_rose_restart_request_timeout = ROSE_DEFAULT_T0;
+int sysctl_rose_call_request_timeout    = ROSE_DEFAULT_T1;
+int sysctl_rose_reset_request_timeout   = ROSE_DEFAULT_T2;
+int sysctl_rose_clear_request_timeout   = ROSE_DEFAULT_T3;
+int sysctl_rose_no_activity_timeout     = ROSE_DEFAULT_IDLE;
+int sysctl_rose_ack_hold_back_timeout   = ROSE_DEFAULT_HB;
+int sysctl_rose_routing_control         = ROSE_DEFAULT_ROUTING;
+int sysctl_rose_link_fail_timeout       = ROSE_DEFAULT_FAIL_TIMEOUT;
+int sysctl_rose_maximum_vcs             = ROSE_DEFAULT_MAXVC;
+int sysctl_rose_window_size             = ROSE_DEFAULT_WINDOW_SIZE;
+
+static HLIST_HEAD(rose_list);
+static DEFINE_SPINLOCK(rose_list_lock);
+
+static struct proto_ops rose_proto_ops;
+
+ax25_address rose_callsign;
+
+/*
+ *	Convert a ROSE address into text.
+ */
+const char *rose2asc(const rose_address *addr)
+{
+	static char buffer[11];
+
+	if (addr->rose_addr[0] == 0x00 && addr->rose_addr[1] == 0x00 &&
+	    addr->rose_addr[2] == 0x00 && addr->rose_addr[3] == 0x00 &&
+	    addr->rose_addr[4] == 0x00) {
+		strcpy(buffer, "*");
+	} else {
+		sprintf(buffer, "%02X%02X%02X%02X%02X", addr->rose_addr[0] & 0xFF,
+						addr->rose_addr[1] & 0xFF,
+						addr->rose_addr[2] & 0xFF,
+						addr->rose_addr[3] & 0xFF,
+						addr->rose_addr[4] & 0xFF);
+	}
+
+	return buffer;
+}
+
+/*
+ *	Compare two ROSE addresses, 0 == equal.
+ */
+int rosecmp(rose_address *addr1, rose_address *addr2)
+{
+	int i;
+
+	for (i = 0; i < 5; i++)
+		if (addr1->rose_addr[i] != addr2->rose_addr[i])
+			return 1;
+
+	return 0;
+}
+
+/*
+ *	Compare two ROSE addresses for only mask digits, 0 == equal.
+ */
+int rosecmpm(rose_address *addr1, rose_address *addr2, unsigned short mask)
+{
+	int i, j;
+
+	if (mask > 10)
+		return 1;
+
+	for (i = 0; i < mask; i++) {
+		j = i / 2;
+
+		if ((i % 2) != 0) {
+			if ((addr1->rose_addr[j] & 0x0F) != (addr2->rose_addr[j] & 0x0F))
+				return 1;
+		} else {
+			if ((addr1->rose_addr[j] & 0xF0) != (addr2->rose_addr[j] & 0xF0))
+				return 1;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ *	Socket removal during an interrupt is now safe.
+ */
+static void rose_remove_socket(struct sock *sk)
+{
+	spin_lock_bh(&rose_list_lock);
+	sk_del_node_init(sk);
+	spin_unlock_bh(&rose_list_lock);
+}
+
+/*
+ *	Kill all bound sockets on a broken link layer connection to a
+ *	particular neighbour.
+ */
+void rose_kill_by_neigh(struct rose_neigh *neigh)
+{
+	struct sock *s;
+	struct hlist_node *node;
+
+	spin_lock_bh(&rose_list_lock);
+	sk_for_each(s, node, &rose_list) {
+		struct rose_sock *rose = rose_sk(s);
+
+		if (rose->neighbour == neigh) {
+			rose_disconnect(s, ENETUNREACH, ROSE_OUT_OF_ORDER, 0);
+			rose->neighbour->use--;
+			rose->neighbour = NULL;
+		}
+	}
+	spin_unlock_bh(&rose_list_lock);
+}
+
+/*
+ *	Kill all bound sockets on a dropped device.
+ */
+static void rose_kill_by_device(struct net_device *dev)
+{
+	struct sock *s;
+	struct hlist_node *node;
+
+	spin_lock_bh(&rose_list_lock);
+	sk_for_each(s, node, &rose_list) {
+		struct rose_sock *rose = rose_sk(s);
+
+		if (rose->device == dev) {
+			rose_disconnect(s, ENETUNREACH, ROSE_OUT_OF_ORDER, 0);
+			rose->neighbour->use--;
+			rose->device = NULL;
+		}
+	}
+	spin_unlock_bh(&rose_list_lock);
+}
+
+/*
+ *	Handle device status changes.
+ */
+static int rose_device_event(struct notifier_block *this, unsigned long event,
+	void *ptr)
+{
+	struct net_device *dev = (struct net_device *)ptr;
+
+	if (event != NETDEV_DOWN)
+		return NOTIFY_DONE;
+
+	switch (dev->type) {
+	case ARPHRD_ROSE:
+		rose_kill_by_device(dev);
+		break;
+	case ARPHRD_AX25:
+		rose_link_device_down(dev);
+		rose_rt_device_down(dev);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+/*
+ *	Add a socket to the bound sockets list.
+ */
+static void rose_insert_socket(struct sock *sk)
+{
+
+	spin_lock_bh(&rose_list_lock);
+	sk_add_node(sk, &rose_list);
+	spin_unlock_bh(&rose_list_lock);
+}
+
+/*
+ *	Find a socket that wants to accept the Call Request we just
+ *	received.
+ */
+static struct sock *rose_find_listener(rose_address *addr, ax25_address *call)
+{
+	struct sock *s;
+	struct hlist_node *node;
+
+	spin_lock_bh(&rose_list_lock);
+	sk_for_each(s, node, &rose_list) {
+		struct rose_sock *rose = rose_sk(s);
+
+		if (!rosecmp(&rose->source_addr, addr) &&
+		    !ax25cmp(&rose->source_call, call) &&
+		    !rose->source_ndigis && s->sk_state == TCP_LISTEN)
+			goto found;
+	}
+
+	sk_for_each(s, node, &rose_list) {
+		struct rose_sock *rose = rose_sk(s);
+
+		if (!rosecmp(&rose->source_addr, addr) &&
+		    !ax25cmp(&rose->source_call, &null_ax25_address) &&
+		    s->sk_state == TCP_LISTEN)
+			goto found;
+	}
+	s = NULL;
+found:
+	spin_unlock_bh(&rose_list_lock);
+	return s;
+}
+
+/*
+ *	Find a connected ROSE socket given my LCI and device.
+ */
+struct sock *rose_find_socket(unsigned int lci, struct rose_neigh *neigh)
+{
+	struct sock *s;
+	struct hlist_node *node;
+
+	spin_lock_bh(&rose_list_lock);
+	sk_for_each(s, node, &rose_list) {
+		struct rose_sock *rose = rose_sk(s);
+
+		if (rose->lci == lci && rose->neighbour == neigh)
+			goto found;
+	}
+	s = NULL;
+found:
+	spin_unlock_bh(&rose_list_lock);
+	return s;
+}
+
+/*
+ *	Find a unique LCI for a given device.
+ */
+unsigned int rose_new_lci(struct rose_neigh *neigh)
+{
+	int lci;
+
+	if (neigh->dce_mode) {
+		for (lci = 1; lci <= sysctl_rose_maximum_vcs; lci++)
+			if (rose_find_socket(lci, neigh) == NULL && rose_route_free_lci(lci, neigh) == NULL)
+				return lci;
+	} else {
+		for (lci = sysctl_rose_maximum_vcs; lci > 0; lci--)
+			if (rose_find_socket(lci, neigh) == NULL && rose_route_free_lci(lci, neigh) == NULL)
+				return lci;
+	}
+
+	return 0;
+}
+
+/*
+ *	Deferred destroy.
+ */
+void rose_destroy_socket(struct sock *);
+
+/*
+ *	Handler for deferred kills.
+ */
+static void rose_destroy_timer(unsigned long data)
+{
+	rose_destroy_socket((struct sock *)data);
+}
+
+/*
+ *	This is called from user mode and the timers. Thus it protects itself
+ *	against interrupt users but doesn't worry about being called during
+ *	work.  Once it is removed from the queue no interrupt or bottom half
+ *	will touch it and we are (fairly 8-) ) safe.
+ */
+void rose_destroy_socket(struct sock *sk)
+{
+	struct sk_buff *skb;
+
+	rose_remove_socket(sk);
+	rose_stop_heartbeat(sk);
+	rose_stop_idletimer(sk);
+	rose_stop_timer(sk);
+
+	rose_clear_queues(sk);		/* Flush the queues */
+
+	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+		if (skb->sk != sk) {	/* A pending connection */
+			/* Queue the unaccepted socket for death */
+			sock_set_flag(skb->sk, SOCK_DEAD);
+			rose_start_heartbeat(skb->sk);
+			rose_sk(skb->sk)->state = ROSE_STATE_0;
+		}
+
+		kfree_skb(skb);
+	}
+
+	if (atomic_read(&sk->sk_wmem_alloc) ||
+	    atomic_read(&sk->sk_rmem_alloc)) {
+		/* Defer: outstanding buffers */
+		init_timer(&sk->sk_timer);
+		sk->sk_timer.expires  = jiffies + 10 * HZ;
+		sk->sk_timer.function = rose_destroy_timer;
+		sk->sk_timer.data     = (unsigned long)sk;
+		add_timer(&sk->sk_timer);
+	} else
+		sock_put(sk);
+}
+
+/*
+ *	Handling for system calls applied via the various interfaces to a
+ *	ROSE socket object.
+ */
+
+static int rose_setsockopt(struct socket *sock, int level, int optname,
+	char __user *optval, int optlen)
+{
+	struct sock *sk = sock->sk;
+	struct rose_sock *rose = rose_sk(sk);
+	int opt;
+
+	if (level != SOL_ROSE)
+		return -ENOPROTOOPT;
+
+	if (optlen < sizeof(int))
+		return -EINVAL;
+
+	if (get_user(opt, (int __user *)optval))
+		return -EFAULT;
+
+	switch (optname) {
+	case ROSE_DEFER:
+		rose->defer = opt ? 1 : 0;
+		return 0;
+
+	case ROSE_T1:
+		if (opt < 1)
+			return -EINVAL;
+		rose->t1 = opt * HZ;
+		return 0;
+
+	case ROSE_T2:
+		if (opt < 1)
+			return -EINVAL;
+		rose->t2 = opt * HZ;
+		return 0;
+
+	case ROSE_T3:
+		if (opt < 1)
+			return -EINVAL;
+		rose->t3 = opt * HZ;
+		return 0;
+
+	case ROSE_HOLDBACK:
+		if (opt < 1)
+			return -EINVAL;
+		rose->hb = opt * HZ;
+		return 0;
+
+	case ROSE_IDLE:
+		if (opt < 0)
+			return -EINVAL;
+		rose->idle = opt * 60 * HZ;
+		return 0;
+
+	case ROSE_QBITINCL:
+		rose->qbitincl = opt ? 1 : 0;
+		return 0;
+
+	default:
+		return -ENOPROTOOPT;
+	}
+}
+
+static int rose_getsockopt(struct socket *sock, int level, int optname,
+	char __user *optval, int __user *optlen)
+{
+	struct sock *sk = sock->sk;
+	struct rose_sock *rose = rose_sk(sk);
+	int val = 0;
+	int len;
+
+	if (level != SOL_ROSE)
+		return -ENOPROTOOPT;
+
+	if (get_user(len, optlen))
+		return -EFAULT;
+
+	if (len < 0)
+		return -EINVAL;
+
+	switch (optname) {
+	case ROSE_DEFER:
+		val = rose->defer;
+		break;
+
+	case ROSE_T1:
+		val = rose->t1 / HZ;
+		break;
+
+	case ROSE_T2:
+		val = rose->t2 / HZ;
+		break;
+
+	case ROSE_T3:
+		val = rose->t3 / HZ;
+		break;
+
+	case ROSE_HOLDBACK:
+		val = rose->hb / HZ;
+		break;
+
+	case ROSE_IDLE:
+		val = rose->idle / (60 * HZ);
+		break;
+
+	case ROSE_QBITINCL:
+		val = rose->qbitincl;
+		break;
+
+	default:
+		return -ENOPROTOOPT;
+	}
+
+	len = min_t(unsigned int, len, sizeof(int));
+
+	if (put_user(len, optlen))
+		return -EFAULT;
+
+	return copy_to_user(optval, &val, len) ? -EFAULT : 0;
+}
+
+static int rose_listen(struct socket *sock, int backlog)
+{
+	struct sock *sk = sock->sk;
+
+	if (sk->sk_state != TCP_LISTEN) {
+		struct rose_sock *rose = rose_sk(sk);
+
+		rose->dest_ndigis = 0;
+		memset(&rose->dest_addr, 0, ROSE_ADDR_LEN);
+		memset(&rose->dest_call, 0, AX25_ADDR_LEN);
+		memset(rose->dest_digis, 0, AX25_ADDR_LEN * ROSE_MAX_DIGIS);
+		sk->sk_max_ack_backlog = backlog;
+		sk->sk_state           = TCP_LISTEN;
+		return 0;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static struct proto rose_proto = {
+	.name	  = "ROSE",
+	.owner	  = THIS_MODULE,
+	.obj_size = sizeof(struct rose_sock),
+};
+
+static int rose_create(struct socket *sock, int protocol)
+{
+	struct sock *sk;
+	struct rose_sock *rose;
+
+	if (sock->type != SOCK_SEQPACKET || protocol != 0)
+		return -ESOCKTNOSUPPORT;
+
+	if ((sk = sk_alloc(PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL)
+		return -ENOMEM;
+
+	rose = rose_sk(sk);
+
+	sock_init_data(sock, sk);
+
+	skb_queue_head_init(&rose->ack_queue);
+#ifdef M_BIT
+	skb_queue_head_init(&rose->frag_queue);
+	rose->fraglen    = 0;
+#endif
+
+	sock->ops    = &rose_proto_ops;
+	sk->sk_protocol = protocol;
+
+	init_timer(&rose->timer);
+	init_timer(&rose->idletimer);
+
+	rose->t1   = sysctl_rose_call_request_timeout;
+	rose->t2   = sysctl_rose_reset_request_timeout;
+	rose->t3   = sysctl_rose_clear_request_timeout;
+	rose->hb   = sysctl_rose_ack_hold_back_timeout;
+	rose->idle = sysctl_rose_no_activity_timeout;
+
+	rose->state = ROSE_STATE_0;
+
+	return 0;
+}
+
+static struct sock *rose_make_new(struct sock *osk)
+{
+	struct sock *sk;
+	struct rose_sock *rose, *orose;
+
+	if (osk->sk_type != SOCK_SEQPACKET)
+		return NULL;
+
+	if ((sk = sk_alloc(PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL)
+		return NULL;
+
+	rose = rose_sk(sk);
+
+	sock_init_data(NULL, sk);
+
+	skb_queue_head_init(&rose->ack_queue);
+#ifdef M_BIT
+	skb_queue_head_init(&rose->frag_queue);
+	rose->fraglen  = 0;
+#endif
+
+	sk->sk_type     = osk->sk_type;
+	sk->sk_socket   = osk->sk_socket;
+	sk->sk_priority = osk->sk_priority;
+	sk->sk_protocol = osk->sk_protocol;
+	sk->sk_rcvbuf   = osk->sk_rcvbuf;
+	sk->sk_sndbuf   = osk->sk_sndbuf;
+	sk->sk_state    = TCP_ESTABLISHED;
+	sk->sk_sleep    = osk->sk_sleep;
+
+	if (sock_flag(osk, SOCK_ZAPPED))
+		sock_set_flag(sk, SOCK_ZAPPED);
+
+	if (sock_flag(osk, SOCK_DBG))
+		sock_set_flag(sk, SOCK_DBG);
+
+	init_timer(&rose->timer);
+	init_timer(&rose->idletimer);
+
+	orose		= rose_sk(osk);
+	rose->t1	= orose->t1;
+	rose->t2	= orose->t2;
+	rose->t3	= orose->t3;
+	rose->hb	= orose->hb;
+	rose->idle	= orose->idle;
+	rose->defer	= orose->defer;
+	rose->device	= orose->device;
+	rose->qbitincl	= orose->qbitincl;
+
+	return sk;
+}
+
+static int rose_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	struct rose_sock *rose;
+
+	if (sk == NULL) return 0;
+
+	rose = rose_sk(sk);
+
+	switch (rose->state) {
+	case ROSE_STATE_0:
+		rose_disconnect(sk, 0, -1, -1);
+		rose_destroy_socket(sk);
+		break;
+
+	case ROSE_STATE_2:
+		rose->neighbour->use--;
+		rose_disconnect(sk, 0, -1, -1);
+		rose_destroy_socket(sk);
+		break;
+
+	case ROSE_STATE_1:
+	case ROSE_STATE_3:
+	case ROSE_STATE_4:
+	case ROSE_STATE_5:
+		rose_clear_queues(sk);
+		rose_stop_idletimer(sk);
+		rose_write_internal(sk, ROSE_CLEAR_REQUEST);
+		rose_start_t3timer(sk);
+		rose->state  = ROSE_STATE_2;
+		sk->sk_state    = TCP_CLOSE;
+		sk->sk_shutdown |= SEND_SHUTDOWN;
+		sk->sk_state_change(sk);
+		sock_set_flag(sk, SOCK_DEAD);
+		sock_set_flag(sk, SOCK_DESTROY);
+		break;
+
+	default:
+		break;
+	}
+
+	sock->sk = NULL;
+
+	return 0;
+}
+
+static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+{
+	struct sock *sk = sock->sk;
+	struct rose_sock *rose = rose_sk(sk);
+	struct sockaddr_rose *addr = (struct sockaddr_rose *)uaddr;
+	struct net_device *dev;
+	ax25_address *user, *source;
+	int n;
+
+	if (!sock_flag(sk, SOCK_ZAPPED))
+		return -EINVAL;
+
+	if (addr_len != sizeof(struct sockaddr_rose) && addr_len != sizeof(struct full_sockaddr_rose))
+		return -EINVAL;
+
+	if (addr->srose_family != AF_ROSE)
+		return -EINVAL;
+
+	if (addr_len == sizeof(struct sockaddr_rose) && addr->srose_ndigis > 1)
+		return -EINVAL;
+
+	if (addr->srose_ndigis > ROSE_MAX_DIGIS)
+		return -EINVAL;
+
+	if ((dev = rose_dev_get(&addr->srose_addr)) == NULL) {
+		SOCK_DEBUG(sk, "ROSE: bind failed: invalid address\n");
+		return -EADDRNOTAVAIL;
+	}
+
+	source = &addr->srose_call;
+
+	if ((user = ax25_findbyuid(current->euid)) == NULL) {
+		if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE))
+			return -EACCES;
+		user = source;
+	}
+
+	rose->source_addr   = addr->srose_addr;
+	rose->source_call   = *user;
+	rose->device        = dev;
+	rose->source_ndigis = addr->srose_ndigis;
+
+	if (addr_len == sizeof(struct full_sockaddr_rose)) {
+		struct full_sockaddr_rose *full_addr = (struct full_sockaddr_rose *)uaddr;
+		for (n = 0 ; n < addr->srose_ndigis ; n++)
+			rose->source_digis[n] = full_addr->srose_digis[n];
+	} else {
+		if (rose->source_ndigis == 1) {
+			rose->source_digis[0] = addr->srose_digi;
+		}
+	}
+
+	rose_insert_socket(sk);
+
+	sock_reset_flag(sk, SOCK_ZAPPED);
+	SOCK_DEBUG(sk, "ROSE: socket is bound\n");
+	return 0;
+}
+
+static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct rose_sock *rose = rose_sk(sk);
+	struct sockaddr_rose *addr = (struct sockaddr_rose *)uaddr;
+	unsigned char cause, diagnostic;
+	ax25_address *user;
+	struct net_device *dev;
+	int n;
+
+	if (sk->sk_state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) {
+		sock->state = SS_CONNECTED;
+		return 0;	/* Connect completed during a ERESTARTSYS event */
+	}
+
+	if (sk->sk_state == TCP_CLOSE && sock->state == SS_CONNECTING) {
+		sock->state = SS_UNCONNECTED;
+		return -ECONNREFUSED;
+	}
+
+	if (sk->sk_state == TCP_ESTABLISHED)
+		return -EISCONN;	/* No reconnect on a seqpacket socket */
+
+	sk->sk_state   = TCP_CLOSE;
+	sock->state = SS_UNCONNECTED;
+
+	if (addr_len != sizeof(struct sockaddr_rose) && addr_len != sizeof(struct full_sockaddr_rose))
+		return -EINVAL;
+
+	if (addr->srose_family != AF_ROSE)
+		return -EINVAL;
+
+	if (addr_len == sizeof(struct sockaddr_rose) && addr->srose_ndigis > 1)
+		return -EINVAL;
+
+	if (addr->srose_ndigis > ROSE_MAX_DIGIS)
+		return -EINVAL;
+
+	/* Source + Destination digis should not exceed ROSE_MAX_DIGIS */
+	if ((rose->source_ndigis + addr->srose_ndigis) > ROSE_MAX_DIGIS)
+		return -EINVAL;
+
+	rose->neighbour = rose_get_neigh(&addr->srose_addr, &cause,
+					 &diagnostic);
+	if (!rose->neighbour)
+		return -ENETUNREACH;
+
+	rose->lci = rose_new_lci(rose->neighbour);
+	if (!rose->lci)
+		return -ENETUNREACH;
+
+	if (sock_flag(sk, SOCK_ZAPPED)) {	/* Must bind first - autobinding in this may or may not work */
+		sock_reset_flag(sk, SOCK_ZAPPED);
+
+		if ((dev = rose_dev_first()) == NULL)
+			return -ENETUNREACH;
+
+		if ((user = ax25_findbyuid(current->euid)) == NULL)
+			return -EINVAL;
+
+		memcpy(&rose->source_addr, dev->dev_addr, ROSE_ADDR_LEN);
+		rose->source_call = *user;
+		rose->device      = dev;
+
+		rose_insert_socket(sk);		/* Finish the bind */
+	}
+
+	rose->dest_addr   = addr->srose_addr;
+	rose->dest_call   = addr->srose_call;
+	rose->rand        = ((long)rose & 0xFFFF) + rose->lci;
+	rose->dest_ndigis = addr->srose_ndigis;
+
+	if (addr_len == sizeof(struct full_sockaddr_rose)) {
+		struct full_sockaddr_rose *full_addr = (struct full_sockaddr_rose *)uaddr;
+		for (n = 0 ; n < addr->srose_ndigis ; n++)
+			rose->dest_digis[n] = full_addr->srose_digis[n];
+	} else {
+		if (rose->dest_ndigis == 1) {
+			rose->dest_digis[0] = addr->srose_digi;
+		}
+	}
+
+	/* Move to connecting socket, start sending Connect Requests */
+	sock->state   = SS_CONNECTING;
+	sk->sk_state     = TCP_SYN_SENT;
+
+	rose->state = ROSE_STATE_1;
+
+	rose->neighbour->use++;
+
+	rose_write_internal(sk, ROSE_CALL_REQUEST);
+	rose_start_heartbeat(sk);
+	rose_start_t1timer(sk);
+
+	/* Now the loop */
+	if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK))
+		return -EINPROGRESS;
+
+	/*
+	 * A Connect Ack with Choke or timeout or failed routing will go to
+	 * closed.
+	 */
+	if (sk->sk_state == TCP_SYN_SENT) {
+		struct task_struct *tsk = current;
+		DECLARE_WAITQUEUE(wait, tsk);
+
+		add_wait_queue(sk->sk_sleep, &wait);
+		for (;;) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			if (sk->sk_state != TCP_SYN_SENT)
+				break;
+			if (!signal_pending(tsk)) {
+				schedule();
+				continue;
+			}
+			current->state = TASK_RUNNING;
+			remove_wait_queue(sk->sk_sleep, &wait);
+			return -ERESTARTSYS;
+		}
+		current->state = TASK_RUNNING;
+		remove_wait_queue(sk->sk_sleep, &wait);
+	}
+
+	if (sk->sk_state != TCP_ESTABLISHED) {
+		sock->state = SS_UNCONNECTED;
+		return sock_error(sk);	/* Always set at this point */
+	}
+
+	sock->state = SS_CONNECTED;
+
+	return 0;
+}
+
+static int rose_accept(struct socket *sock, struct socket *newsock, int flags)
+{
+	struct task_struct *tsk = current;
+	DECLARE_WAITQUEUE(wait, tsk);
+	struct sk_buff *skb;
+	struct sock *newsk;
+	struct sock *sk;
+	int err = 0;
+
+	if ((sk = sock->sk) == NULL)
+		return -EINVAL;
+
+	lock_sock(sk);
+	if (sk->sk_type != SOCK_SEQPACKET) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (sk->sk_state != TCP_LISTEN) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/*
+	 *	The write queue this time is holding sockets ready to use
+	 *	hooked into the SABM we saved
+	 */
+	add_wait_queue(sk->sk_sleep, &wait);
+	for (;;) {
+		skb = skb_dequeue(&sk->sk_receive_queue);
+		if (skb)
+			break;
+
+		current->state = TASK_INTERRUPTIBLE;
+		release_sock(sk);
+		if (flags & O_NONBLOCK) {
+			current->state = TASK_RUNNING;
+			remove_wait_queue(sk->sk_sleep, &wait);
+			return -EWOULDBLOCK;
+		}
+		if (!signal_pending(tsk)) {
+			schedule();
+			lock_sock(sk);
+			continue;
+		}
+		return -ERESTARTSYS;
+	}
+	current->state = TASK_RUNNING;
+	remove_wait_queue(sk->sk_sleep, &wait);
+
+	newsk = skb->sk;
+	newsk->sk_socket = newsock;
+	newsk->sk_sleep = &newsock->wait;
+
+	/* Now attach up the new socket */
+	skb->sk = NULL;
+	kfree_skb(skb);
+	sk->sk_ack_backlog--;
+	newsock->sk = newsk;
+
+out:
+	release_sock(sk);
+
+	return err;
+}
+
+static int rose_getname(struct socket *sock, struct sockaddr *uaddr,
+	int *uaddr_len, int peer)
+{
+	struct full_sockaddr_rose *srose = (struct full_sockaddr_rose *)uaddr;
+	struct sock *sk = sock->sk;
+	struct rose_sock *rose = rose_sk(sk);
+	int n;
+
+	if (peer != 0) {
+		if (sk->sk_state != TCP_ESTABLISHED)
+			return -ENOTCONN;
+		srose->srose_family = AF_ROSE;
+		srose->srose_addr   = rose->dest_addr;
+		srose->srose_call   = rose->dest_call;
+		srose->srose_ndigis = rose->dest_ndigis;
+		for (n = 0; n < rose->dest_ndigis; n++)
+			srose->srose_digis[n] = rose->dest_digis[n];
+	} else {
+		srose->srose_family = AF_ROSE;
+		srose->srose_addr   = rose->source_addr;
+		srose->srose_call   = rose->source_call;
+		srose->srose_ndigis = rose->source_ndigis;
+		for (n = 0; n < rose->source_ndigis; n++)
+			srose->srose_digis[n] = rose->source_digis[n];
+	}
+
+	*uaddr_len = sizeof(struct full_sockaddr_rose);
+	return 0;
+}
+
+int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct rose_neigh *neigh, unsigned int lci)
+{
+	struct sock *sk;
+	struct sock *make;
+	struct rose_sock *make_rose;
+	struct rose_facilities_struct facilities;
+	int n, len;
+
+	skb->sk = NULL;		/* Initially we don't know who it's for */
+
+	/*
+	 *	skb->data points to the rose frame start
+	 */
+	memset(&facilities, 0x00, sizeof(struct rose_facilities_struct));
+
+	len  = (((skb->data[3] >> 4) & 0x0F) + 1) / 2;
+	len += (((skb->data[3] >> 0) & 0x0F) + 1) / 2;
+	if (!rose_parse_facilities(skb->data + len + 4, &facilities)) {
+		rose_transmit_clear_request(neigh, lci, ROSE_INVALID_FACILITY, 76);
+		return 0;
+	}
+
+	sk = rose_find_listener(&facilities.source_addr, &facilities.source_call);
+
+	/*
+	 * We can't accept the Call Request.
+	 */
+	if (sk == NULL || sk_acceptq_is_full(sk) ||
+	    (make = rose_make_new(sk)) == NULL) {
+		rose_transmit_clear_request(neigh, lci, ROSE_NETWORK_CONGESTION, 120);
+		return 0;
+	}
+
+	skb->sk     = make;
+	make->sk_state = TCP_ESTABLISHED;
+	make_rose = rose_sk(make);
+
+	make_rose->lci           = lci;
+	make_rose->dest_addr     = facilities.dest_addr;
+	make_rose->dest_call     = facilities.dest_call;
+	make_rose->dest_ndigis   = facilities.dest_ndigis;
+	for (n = 0 ; n < facilities.dest_ndigis ; n++)
+		make_rose->dest_digis[n] = facilities.dest_digis[n];
+	make_rose->source_addr   = facilities.source_addr;
+	make_rose->source_call   = facilities.source_call;
+	make_rose->source_ndigis = facilities.source_ndigis;
+	for (n = 0 ; n < facilities.source_ndigis ; n++)
+		make_rose->source_digis[n]= facilities.source_digis[n];
+	make_rose->neighbour     = neigh;
+	make_rose->device        = dev;
+	make_rose->facilities    = facilities;
+
+	make_rose->neighbour->use++;
+
+	if (rose_sk(sk)->defer) {
+		make_rose->state = ROSE_STATE_5;
+	} else {
+		rose_write_internal(make, ROSE_CALL_ACCEPTED);
+		make_rose->state = ROSE_STATE_3;
+		rose_start_idletimer(make);
+	}
+
+	make_rose->condition = 0x00;
+	make_rose->vs        = 0;
+	make_rose->va        = 0;
+	make_rose->vr        = 0;
+	make_rose->vl        = 0;
+	sk->sk_ack_backlog++;
+
+	rose_insert_socket(make);
+
+	skb_queue_head(&sk->sk_receive_queue, skb);
+
+	rose_start_heartbeat(make);
+
+	if (!sock_flag(sk, SOCK_DEAD))
+		sk->sk_data_ready(sk, skb->len);
+
+	return 1;
+}
+
+static int rose_sendmsg(struct kiocb *iocb, struct socket *sock,
+			struct msghdr *msg, size_t len)
+{
+	struct sock *sk = sock->sk;
+	struct rose_sock *rose = rose_sk(sk);
+	struct sockaddr_rose *usrose = (struct sockaddr_rose *)msg->msg_name;
+	int err;
+	struct full_sockaddr_rose srose;
+	struct sk_buff *skb;
+	unsigned char *asmptr;
+	int n, size, qbit = 0;
+
+	if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_CMSG_COMPAT))
+		return -EINVAL;
+
+	if (sock_flag(sk, SOCK_ZAPPED))
+		return -EADDRNOTAVAIL;
+
+	if (sk->sk_shutdown & SEND_SHUTDOWN) {
+		send_sig(SIGPIPE, current, 0);
+		return -EPIPE;
+	}
+
+	if (rose->neighbour == NULL || rose->device == NULL)
+		return -ENETUNREACH;
+
+	if (usrose != NULL) {
+		if (msg->msg_namelen != sizeof(struct sockaddr_rose) && msg->msg_namelen != sizeof(struct full_sockaddr_rose))
+			return -EINVAL;
+		memset(&srose, 0, sizeof(struct full_sockaddr_rose));
+		memcpy(&srose, usrose, msg->msg_namelen);
+		if (rosecmp(&rose->dest_addr, &srose.srose_addr) != 0 ||
+		    ax25cmp(&rose->dest_call, &srose.srose_call) != 0)
+			return -EISCONN;
+		if (srose.srose_ndigis != rose->dest_ndigis)
+			return -EISCONN;
+		if (srose.srose_ndigis == rose->dest_ndigis) {
+			for (n = 0 ; n < srose.srose_ndigis ; n++)
+				if (ax25cmp(&rose->dest_digis[n],
+					    &srose.srose_digis[n]))
+					return -EISCONN;
+		}
+		if (srose.srose_family != AF_ROSE)
+			return -EINVAL;
+	} else {
+		if (sk->sk_state != TCP_ESTABLISHED)
+			return -ENOTCONN;
+
+		srose.srose_family = AF_ROSE;
+		srose.srose_addr   = rose->dest_addr;
+		srose.srose_call   = rose->dest_call;
+		srose.srose_ndigis = rose->dest_ndigis;
+		for (n = 0 ; n < rose->dest_ndigis ; n++)
+			srose.srose_digis[n] = rose->dest_digis[n];
+	}
+
+	SOCK_DEBUG(sk, "ROSE: sendto: Addresses built.\n");
+
+	/* Build a packet */
+	SOCK_DEBUG(sk, "ROSE: sendto: building packet.\n");
+	size = len + AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN;
+
+	if ((skb = sock_alloc_send_skb(sk, size, msg->msg_flags & MSG_DONTWAIT, &err)) == NULL)
+		return err;
+
+	skb_reserve(skb, AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN);
+
+	/*
+	 *	Put the data on the end
+	 */
+	SOCK_DEBUG(sk, "ROSE: Appending user data\n");
+
+	asmptr = skb->h.raw = skb_put(skb, len);
+
+	err = memcpy_fromiovec(asmptr, msg->msg_iov, len);
+	if (err) {
+		kfree_skb(skb);
+		return err;
+	}
+
+	/*
+	 *	If the Q BIT Include socket option is in force, the first
+	 *	byte of the user data is the logical value of the Q Bit.
+	 */
+	if (rose->qbitincl) {
+		qbit = skb->data[0];
+		skb_pull(skb, 1);
+	}
+
+	/*
+	 *	Push down the ROSE header
+	 */
+	asmptr = skb_push(skb, ROSE_MIN_LEN);
+
+	SOCK_DEBUG(sk, "ROSE: Building Network Header.\n");
+
+	/* Build a ROSE Network header */
+	asmptr[0] = ((rose->lci >> 8) & 0x0F) | ROSE_GFI;
+	asmptr[1] = (rose->lci >> 0) & 0xFF;
+	asmptr[2] = ROSE_DATA;
+
+	if (qbit)
+		asmptr[0] |= ROSE_Q_BIT;
+
+	SOCK_DEBUG(sk, "ROSE: Built header.\n");
+
+	SOCK_DEBUG(sk, "ROSE: Transmitting buffer\n");
+
+	if (sk->sk_state != TCP_ESTABLISHED) {
+		kfree_skb(skb);
+		return -ENOTCONN;
+	}
+
+#ifdef M_BIT
+#define ROSE_PACLEN (256-ROSE_MIN_LEN)
+	if (skb->len - ROSE_MIN_LEN > ROSE_PACLEN) {
+		unsigned char header[ROSE_MIN_LEN];
+		struct sk_buff *skbn;
+		int frontlen;
+		int lg;
+
+		/* Save a copy of the Header */
+		memcpy(header, skb->data, ROSE_MIN_LEN);
+		skb_pull(skb, ROSE_MIN_LEN);
+
+		frontlen = skb_headroom(skb);
+
+		while (skb->len > 0) {
+			if ((skbn = sock_alloc_send_skb(sk, frontlen + ROSE_PACLEN, 0, &err)) == NULL) {
+				kfree_skb(skb);
+				return err;
+			}
+
+			skbn->sk   = sk;
+			skbn->free = 1;
+			skbn->arp  = 1;
+
+			skb_reserve(skbn, frontlen);
+
+			lg = (ROSE_PACLEN > skb->len) ? skb->len : ROSE_PACLEN;
+
+			/* Copy the user data */
+			memcpy(skb_put(skbn, lg), skb->data, lg);
+			skb_pull(skb, lg);
+
+			/* Duplicate the Header */
+			skb_push(skbn, ROSE_MIN_LEN);
+			memcpy(skbn->data, header, ROSE_MIN_LEN);
+
+			if (skb->len > 0)
+				skbn->data[2] |= M_BIT;
+
+			skb_queue_tail(&sk->sk_write_queue, skbn); /* Throw it on the queue */
+		}
+
+		skb->free = 1;
+		kfree_skb(skb);
+	} else {
+		skb_queue_tail(&sk->sk_write_queue, skb);		/* Throw it on the queue */
+	}
+#else
+	skb_queue_tail(&sk->sk_write_queue, skb);	/* Shove it onto the queue */
+#endif
+
+	rose_kick(sk);
+
+	return len;
+}
+
+
+static int rose_recvmsg(struct kiocb *iocb, struct socket *sock,
+			struct msghdr *msg, size_t size, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct rose_sock *rose = rose_sk(sk);
+	struct sockaddr_rose *srose = (struct sockaddr_rose *)msg->msg_name;
+	size_t copied;
+	unsigned char *asmptr;
+	struct sk_buff *skb;
+	int n, er, qbit;
+
+	/*
+	 * This works for seqpacket too. The receiver has ordered the queue for
+	 * us! We do one quick check first though
+	 */
+	if (sk->sk_state != TCP_ESTABLISHED)
+		return -ENOTCONN;
+
+	/* Now we can treat all alike */
+	if ((skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &er)) == NULL)
+		return er;
+
+	qbit = (skb->data[0] & ROSE_Q_BIT) == ROSE_Q_BIT;
+
+	skb_pull(skb, ROSE_MIN_LEN);
+
+	if (rose->qbitincl) {
+		asmptr  = skb_push(skb, 1);
+		*asmptr = qbit;
+	}
+
+	skb->h.raw = skb->data;
+	copied     = skb->len;
+
+	if (copied > size) {
+		copied = size;
+		msg->msg_flags |= MSG_TRUNC;
+	}
+
+	skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+
+	if (srose != NULL) {
+		srose->srose_family = AF_ROSE;
+		srose->srose_addr   = rose->dest_addr;
+		srose->srose_call   = rose->dest_call;
+		srose->srose_ndigis = rose->dest_ndigis;
+		if (msg->msg_namelen >= sizeof(struct full_sockaddr_rose)) {
+			struct full_sockaddr_rose *full_srose = (struct full_sockaddr_rose *)msg->msg_name;
+			for (n = 0 ; n < rose->dest_ndigis ; n++)
+				full_srose->srose_digis[n] = rose->dest_digis[n];
+			msg->msg_namelen = sizeof(struct full_sockaddr_rose);
+		} else {
+			if (rose->dest_ndigis >= 1) {
+				srose->srose_ndigis = 1;
+				srose->srose_digi = rose->dest_digis[0];
+			}
+			msg->msg_namelen = sizeof(struct sockaddr_rose);
+		}
+	}
+
+	skb_free_datagram(sk, skb);
+
+	return copied;
+}
+
+
+static int rose_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	struct sock *sk = sock->sk;
+	struct rose_sock *rose = rose_sk(sk);
+	void __user *argp = (void __user *)arg;
+
+	switch (cmd) {
+	case TIOCOUTQ: {
+		long amount;
+		amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc);
+		if (amount < 0)
+			amount = 0;
+		return put_user(amount, (unsigned int __user *)argp);
+	}
+
+	case TIOCINQ: {
+		struct sk_buff *skb;
+		long amount = 0L;
+		/* These two are safe on a single CPU system as only user tasks fiddle here */
+		if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL)
+			amount = skb->len;
+		return put_user(amount, (unsigned int __user *)argp);
+	}
+
+	case SIOCGSTAMP:
+		if (sk != NULL) 
+			return sock_get_timestamp(sk, (struct timeval __user *)argp);
+		return -EINVAL;
+
+	case SIOCGIFADDR:
+	case SIOCSIFADDR:
+	case SIOCGIFDSTADDR:
+	case SIOCSIFDSTADDR:
+	case SIOCGIFBRDADDR:
+	case SIOCSIFBRDADDR:
+	case SIOCGIFNETMASK:
+	case SIOCSIFNETMASK:
+	case SIOCGIFMETRIC:
+	case SIOCSIFMETRIC:
+		return -EINVAL;
+
+	case SIOCADDRT:
+	case SIOCDELRT:
+	case SIOCRSCLRRT:
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+		return rose_rt_ioctl(cmd, argp);
+
+	case SIOCRSGCAUSE: {
+		struct rose_cause_struct rose_cause;
+		rose_cause.cause      = rose->cause;
+		rose_cause.diagnostic = rose->diagnostic;
+		return copy_to_user(argp, &rose_cause, sizeof(struct rose_cause_struct)) ? -EFAULT : 0;
+	}
+
+	case SIOCRSSCAUSE: {
+		struct rose_cause_struct rose_cause;
+		if (copy_from_user(&rose_cause, argp, sizeof(struct rose_cause_struct)))
+			return -EFAULT;
+		rose->cause      = rose_cause.cause;
+		rose->diagnostic = rose_cause.diagnostic;
+		return 0;
+	}
+
+	case SIOCRSSL2CALL:
+		if (!capable(CAP_NET_ADMIN)) return -EPERM;
+		if (ax25cmp(&rose_callsign, &null_ax25_address) != 0)
+			ax25_listen_release(&rose_callsign, NULL);
+		if (copy_from_user(&rose_callsign, argp, sizeof(ax25_address)))
+			return -EFAULT;
+		if (ax25cmp(&rose_callsign, &null_ax25_address) != 0)
+			ax25_listen_register(&rose_callsign, NULL);
+		return 0;
+
+	case SIOCRSGL2CALL:
+		return copy_to_user(argp, &rose_callsign, sizeof(ax25_address)) ? -EFAULT : 0;
+
+	case SIOCRSACCEPT:
+		if (rose->state == ROSE_STATE_5) {
+			rose_write_internal(sk, ROSE_CALL_ACCEPTED);
+			rose_start_idletimer(sk);
+			rose->condition = 0x00;
+			rose->vs        = 0;
+			rose->va        = 0;
+			rose->vr        = 0;
+			rose->vl        = 0;
+			rose->state     = ROSE_STATE_3;
+		}
+		return 0;
+
+	default:
+		return dev_ioctl(cmd, argp);
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_PROC_FS
+static void *rose_info_start(struct seq_file *seq, loff_t *pos)
+{
+	int i;
+	struct sock *s;
+	struct hlist_node *node;
+
+	spin_lock_bh(&rose_list_lock);
+	if (*pos == 0)
+		return SEQ_START_TOKEN;
+	
+	i = 1;
+	sk_for_each(s, node, &rose_list) {
+		if (i == *pos)
+			return s;
+		++i;
+	}
+	return NULL;
+}
+
+static void *rose_info_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	++*pos;
+
+	return (v == SEQ_START_TOKEN) ? sk_head(&rose_list) 
+		: sk_next((struct sock *)v);
+}
+	
+static void rose_info_stop(struct seq_file *seq, void *v)
+{
+	spin_unlock_bh(&rose_list_lock);
+}
+
+static int rose_info_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq, 
+			 "dest_addr  dest_call src_addr   src_call  dev   lci neigh st vs vr va   t  t1  t2  t3  hb    idle Snd-Q Rcv-Q inode\n");
+
+	else {
+		struct sock *s = v;
+		struct rose_sock *rose = rose_sk(s);
+		const char *devname, *callsign;
+		const struct net_device *dev = rose->device;
+
+		if (!dev)
+			devname = "???";
+		else
+			devname = dev->name;
+		
+		seq_printf(seq, "%-10s %-9s ",
+			rose2asc(&rose->dest_addr),
+			ax2asc(&rose->dest_call));
+
+		if (ax25cmp(&rose->source_call, &null_ax25_address) == 0)
+			callsign = "??????-?";
+		else
+			callsign = ax2asc(&rose->source_call);
+
+		seq_printf(seq,
+			   "%-10s %-9s %-5s %3.3X %05d  %d  %d  %d  %d %3lu %3lu %3lu %3lu %3lu %3lu/%03lu %5d %5d %ld\n",
+			rose2asc(&rose->source_addr),
+			callsign,
+			devname,
+			rose->lci & 0x0FFF,
+			(rose->neighbour) ? rose->neighbour->number : 0,
+			rose->state,
+			rose->vs,
+			rose->vr,
+			rose->va,
+			ax25_display_timer(&rose->timer) / HZ,
+			rose->t1 / HZ,
+			rose->t2 / HZ,
+			rose->t3 / HZ,
+			rose->hb / HZ,
+			ax25_display_timer(&rose->idletimer) / (60 * HZ),
+			rose->idle / (60 * HZ),
+			atomic_read(&s->sk_wmem_alloc),
+			atomic_read(&s->sk_rmem_alloc),
+			s->sk_socket ? SOCK_INODE(s->sk_socket)->i_ino : 0L);
+	}
+
+	return 0;
+}
+
+static struct seq_operations rose_info_seqops = {
+	.start = rose_info_start,
+	.next = rose_info_next,
+	.stop = rose_info_stop,
+	.show = rose_info_show,
+};
+
+static int rose_info_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &rose_info_seqops);
+}
+
+static struct file_operations rose_info_fops = {
+	.owner = THIS_MODULE,
+	.open = rose_info_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+#endif	/* CONFIG_PROC_FS */
+
+static struct net_proto_family rose_family_ops = {
+	.family		=	PF_ROSE,
+	.create		=	rose_create,
+	.owner		=	THIS_MODULE,
+};
+
+static struct proto_ops rose_proto_ops = {
+	.family		=	PF_ROSE,
+	.owner		=	THIS_MODULE,
+	.release	=	rose_release,
+	.bind		=	rose_bind,
+	.connect	=	rose_connect,
+	.socketpair	=	sock_no_socketpair,
+	.accept		=	rose_accept,
+	.getname	=	rose_getname,
+	.poll		=	datagram_poll,
+	.ioctl		=	rose_ioctl,
+	.listen		=	rose_listen,
+	.shutdown	=	sock_no_shutdown,
+	.setsockopt	=	rose_setsockopt,
+	.getsockopt	=	rose_getsockopt,
+	.sendmsg	=	rose_sendmsg,
+	.recvmsg	=	rose_recvmsg,
+	.mmap		=	sock_no_mmap,
+	.sendpage	=	sock_no_sendpage,
+};
+
+static struct notifier_block rose_dev_notifier = {
+	.notifier_call	=	rose_device_event,
+};
+
+static struct net_device **dev_rose;
+
+static const char banner[] = KERN_INFO "F6FBB/G4KLX ROSE for Linux. Version 0.62 for AX25.037 Linux 2.4\n";
+
+static int __init rose_proto_init(void)
+{
+	int i;
+	int rc = proto_register(&rose_proto, 0);
+
+	if (rc != 0)
+		goto out;
+
+	rose_callsign = null_ax25_address;
+
+	if (rose_ndevs > 0x7FFFFFFF/sizeof(struct net_device *)) {
+		printk(KERN_ERR "ROSE: rose_proto_init - rose_ndevs parameter to large\n");
+		return -1;
+	}
+
+	dev_rose = kmalloc(rose_ndevs * sizeof(struct net_device *), GFP_KERNEL);
+	if (dev_rose == NULL) {
+		printk(KERN_ERR "ROSE: rose_proto_init - unable to allocate device structure\n");
+		return -1;
+	}
+
+	memset(dev_rose, 0x00, rose_ndevs * sizeof(struct net_device*));
+	for (i = 0; i < rose_ndevs; i++) {
+		struct net_device *dev;
+		char name[IFNAMSIZ];
+
+		sprintf(name, "rose%d", i);
+		dev = alloc_netdev(sizeof(struct net_device_stats), 
+				   name, rose_setup);
+		if (!dev) {
+			printk(KERN_ERR "ROSE: rose_proto_init - unable to allocate memory\n");
+			goto fail;
+		}
+		if (register_netdev(dev)) {
+			printk(KERN_ERR "ROSE: netdevice regeistration failed\n");
+			free_netdev(dev);
+			goto fail;
+		}
+		dev_rose[i] = dev;
+	}
+
+	sock_register(&rose_family_ops);
+	register_netdevice_notifier(&rose_dev_notifier);
+	printk(banner);
+
+	ax25_protocol_register(AX25_P_ROSE, rose_route_frame);
+	ax25_linkfail_register(rose_link_failed);
+
+#ifdef CONFIG_SYSCTL
+	rose_register_sysctl();
+#endif
+	rose_loopback_init();
+
+	rose_add_loopback_neigh();
+
+	proc_net_fops_create("rose", S_IRUGO, &rose_info_fops);
+	proc_net_fops_create("rose_neigh", S_IRUGO, &rose_neigh_fops);
+	proc_net_fops_create("rose_nodes", S_IRUGO, &rose_nodes_fops);
+	proc_net_fops_create("rose_routes", S_IRUGO, &rose_routes_fops);
+out:
+	return rc;
+fail:
+	while (--i >= 0) {
+		unregister_netdev(dev_rose[i]);
+		free_netdev(dev_rose[i]);
+	}
+	kfree(dev_rose);
+	proto_unregister(&rose_proto);
+	return -ENOMEM;
+}
+module_init(rose_proto_init);
+
+module_param(rose_ndevs, int, 0);
+MODULE_PARM_DESC(rose_ndevs, "number of ROSE devices");
+
+MODULE_AUTHOR("Jonathan Naylor G4KLX <g4klx@g4klx.demon.co.uk>");
+MODULE_DESCRIPTION("The amateur radio ROSE network layer protocol");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETPROTO(PF_ROSE);
+
+static void __exit rose_exit(void)
+{
+	int i;
+
+	proc_net_remove("rose");
+	proc_net_remove("rose_neigh");
+	proc_net_remove("rose_nodes");
+	proc_net_remove("rose_routes");
+	rose_loopback_clear();
+
+	rose_rt_free();
+
+	ax25_protocol_release(AX25_P_ROSE);
+	ax25_linkfail_release(rose_link_failed);
+
+	if (ax25cmp(&rose_callsign, &null_ax25_address) != 0)
+		ax25_listen_release(&rose_callsign, NULL);
+
+#ifdef CONFIG_SYSCTL
+	rose_unregister_sysctl();
+#endif
+	unregister_netdevice_notifier(&rose_dev_notifier);
+
+	sock_unregister(PF_ROSE);
+
+	for (i = 0; i < rose_ndevs; i++) {
+		struct net_device *dev = dev_rose[i];
+
+		if (dev) {
+			unregister_netdev(dev);
+			free_netdev(dev);
+		}
+	}
+
+	kfree(dev_rose);
+	proto_unregister(&rose_proto);
+}
+
+module_exit(rose_exit);
diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c
new file mode 100644
index 00000000000..a8ed9a1d09f
--- /dev/null
+++ b/net/rose/rose_dev.c
@@ -0,0 +1,154 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <linux/sysctl.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+
+#include <asm/system.h>
+#include <asm/io.h>
+
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/if_arp.h>
+#include <linux/skbuff.h>
+
+#include <net/ip.h>
+#include <net/arp.h>
+
+#include <net/ax25.h>
+#include <net/rose.h>
+
+static int rose_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
+	void *daddr, void *saddr, unsigned len)
+{
+	unsigned char *buff = skb_push(skb, ROSE_MIN_LEN + 2);
+
+	*buff++ = ROSE_GFI | ROSE_Q_BIT;
+	*buff++ = 0x00;
+	*buff++ = ROSE_DATA;
+	*buff++ = 0x7F;
+	*buff++ = AX25_P_IP;
+
+	if (daddr != NULL)
+		return 37;
+
+	return -37;
+}
+
+static int rose_rebuild_header(struct sk_buff *skb)
+{
+	struct net_device *dev = skb->dev;
+	struct net_device_stats *stats = netdev_priv(dev);
+	unsigned char *bp = (unsigned char *)skb->data;
+	struct sk_buff *skbn;
+
+#ifdef CONFIG_INET
+	if (arp_find(bp + 7, skb)) {
+		return 1;
+	}
+
+	if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL) {
+		kfree_skb(skb);
+		return 1;
+	}
+
+	if (skb->sk != NULL)
+		skb_set_owner_w(skbn, skb->sk);
+
+	kfree_skb(skb);
+
+	if (!rose_route_frame(skbn, NULL)) {
+		kfree_skb(skbn);
+		stats->tx_errors++;
+		return 1;
+	}
+
+	stats->tx_packets++;
+	stats->tx_bytes += skbn->len;
+#endif
+	return 1;
+}
+
+static int rose_set_mac_address(struct net_device *dev, void *addr)
+{
+	struct sockaddr *sa = addr;
+
+	rose_del_loopback_node((rose_address *)dev->dev_addr);
+
+	memcpy(dev->dev_addr, sa->sa_data, dev->addr_len);
+
+	rose_add_loopback_node((rose_address *)dev->dev_addr);
+
+	return 0;
+}
+
+static int rose_open(struct net_device *dev)
+{
+	netif_start_queue(dev);
+	rose_add_loopback_node((rose_address *)dev->dev_addr);
+	return 0;
+}
+
+static int rose_close(struct net_device *dev)
+{
+	netif_stop_queue(dev);
+	rose_del_loopback_node((rose_address *)dev->dev_addr);
+	return 0;
+}
+
+static int rose_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct net_device_stats *stats = netdev_priv(dev);
+
+	if (!netif_running(dev)) {
+		printk(KERN_ERR "ROSE: rose_xmit - called when iface is down\n");
+		return 1;
+	}
+	dev_kfree_skb(skb);
+	stats->tx_errors++;
+	return 0;
+}
+
+static struct net_device_stats *rose_get_stats(struct net_device *dev)
+{
+	return netdev_priv(dev);
+}
+
+void rose_setup(struct net_device *dev)
+{
+	SET_MODULE_OWNER(dev);
+	dev->mtu		= ROSE_MAX_PACKET_SIZE - 2;
+	dev->hard_start_xmit	= rose_xmit;
+	dev->open		= rose_open;
+	dev->stop		= rose_close;
+
+	dev->hard_header	= rose_header;
+	dev->hard_header_len	= AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN;
+	dev->addr_len		= ROSE_ADDR_LEN;
+	dev->type		= ARPHRD_ROSE;
+	dev->rebuild_header	= rose_rebuild_header;
+	dev->set_mac_address    = rose_set_mac_address;
+
+	/* New-style flags. */
+	dev->flags		= 0;
+	dev->get_stats = rose_get_stats;
+}
diff --git a/net/rose/rose_in.c b/net/rose/rose_in.c
new file mode 100644
index 00000000000..ef475a1bb1b
--- /dev/null
+++ b/net/rose/rose_in.c
@@ -0,0 +1,297 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
+ *
+ * Most of this code is based on the SDL diagrams published in the 7th ARRL
+ * Computer Networking Conference papers. The diagrams have mistakes in them,
+ * but are mostly correct. Before you modify the code could you read the SDL
+ * diagrams as the code is not obvious and probably very easy to break.
+ */
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <net/ax25.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/ip.h>			/* For ip_rcv */
+#include <net/tcp.h>
+#include <asm/system.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <net/rose.h>
+
+/*
+ * State machine for state 1, Awaiting Call Accepted State.
+ * The handling of the timer(s) is in file rose_timer.c.
+ * Handling of state 0 and connection release is in af_rose.c.
+ */
+static int rose_state1_machine(struct sock *sk, struct sk_buff *skb, int frametype)
+{
+	struct rose_sock *rose = rose_sk(sk);
+
+	switch (frametype) {
+	case ROSE_CALL_ACCEPTED:
+		rose_stop_timer(sk);
+		rose_start_idletimer(sk);
+		rose->condition = 0x00;
+		rose->vs        = 0;
+		rose->va        = 0;
+		rose->vr        = 0;
+		rose->vl        = 0;
+		rose->state     = ROSE_STATE_3;
+		sk->sk_state	= TCP_ESTABLISHED;
+		if (!sock_flag(sk, SOCK_DEAD))
+			sk->sk_state_change(sk);
+		break;
+
+	case ROSE_CLEAR_REQUEST:
+		rose_write_internal(sk, ROSE_CLEAR_CONFIRMATION);
+		rose_disconnect(sk, ECONNREFUSED, skb->data[3], skb->data[4]);
+		rose->neighbour->use--;
+		break;
+
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+/*
+ * State machine for state 2, Awaiting Clear Confirmation State.
+ * The handling of the timer(s) is in file rose_timer.c
+ * Handling of state 0 and connection release is in af_rose.c.
+ */
+static int rose_state2_machine(struct sock *sk, struct sk_buff *skb, int frametype)
+{
+	struct rose_sock *rose = rose_sk(sk);
+
+	switch (frametype) {
+	case ROSE_CLEAR_REQUEST:
+		rose_write_internal(sk, ROSE_CLEAR_CONFIRMATION);
+		rose_disconnect(sk, 0, skb->data[3], skb->data[4]);
+		rose->neighbour->use--;
+		break;
+
+	case ROSE_CLEAR_CONFIRMATION:
+		rose_disconnect(sk, 0, -1, -1);
+		rose->neighbour->use--;
+		break;
+
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+/*
+ * State machine for state 3, Connected State.
+ * The handling of the timer(s) is in file rose_timer.c
+ * Handling of state 0 and connection release is in af_rose.c.
+ */
+static int rose_state3_machine(struct sock *sk, struct sk_buff *skb, int frametype, int ns, int nr, int q, int d, int m)
+{
+	struct rose_sock *rose = rose_sk(sk);
+	int queued = 0;
+
+	switch (frametype) {
+	case ROSE_RESET_REQUEST:
+		rose_stop_timer(sk);
+		rose_start_idletimer(sk);
+		rose_write_internal(sk, ROSE_RESET_CONFIRMATION);
+		rose->condition = 0x00;
+		rose->vs        = 0;
+		rose->vr        = 0;
+		rose->va        = 0;
+		rose->vl        = 0;
+		rose_requeue_frames(sk);
+		break;
+
+	case ROSE_CLEAR_REQUEST:
+		rose_write_internal(sk, ROSE_CLEAR_CONFIRMATION);
+		rose_disconnect(sk, 0, skb->data[3], skb->data[4]);
+		rose->neighbour->use--;
+		break;
+
+	case ROSE_RR:
+	case ROSE_RNR:
+		if (!rose_validate_nr(sk, nr)) {
+			rose_write_internal(sk, ROSE_RESET_REQUEST);
+			rose->condition = 0x00;
+			rose->vs        = 0;
+			rose->vr        = 0;
+			rose->va        = 0;
+			rose->vl        = 0;
+			rose->state     = ROSE_STATE_4;
+			rose_start_t2timer(sk);
+			rose_stop_idletimer(sk);
+		} else {
+			rose_frames_acked(sk, nr);
+			if (frametype == ROSE_RNR) {
+				rose->condition |= ROSE_COND_PEER_RX_BUSY;
+			} else {
+				rose->condition &= ~ROSE_COND_PEER_RX_BUSY;
+			}
+		}
+		break;
+
+	case ROSE_DATA:	/* XXX */
+		rose->condition &= ~ROSE_COND_PEER_RX_BUSY;
+		if (!rose_validate_nr(sk, nr)) {
+			rose_write_internal(sk, ROSE_RESET_REQUEST);
+			rose->condition = 0x00;
+			rose->vs        = 0;
+			rose->vr        = 0;
+			rose->va        = 0;
+			rose->vl        = 0;
+			rose->state     = ROSE_STATE_4;
+			rose_start_t2timer(sk);
+			rose_stop_idletimer(sk);
+			break;
+		}
+		rose_frames_acked(sk, nr);
+		if (ns == rose->vr) {
+			rose_start_idletimer(sk);
+			if (sock_queue_rcv_skb(sk, skb) == 0) {
+				rose->vr = (rose->vr + 1) % ROSE_MODULUS;
+				queued = 1;
+			} else {
+				/* Should never happen ! */
+				rose_write_internal(sk, ROSE_RESET_REQUEST);
+				rose->condition = 0x00;
+				rose->vs        = 0;
+				rose->vr        = 0;
+				rose->va        = 0;
+				rose->vl        = 0;
+				rose->state     = ROSE_STATE_4;
+				rose_start_t2timer(sk);
+				rose_stop_idletimer(sk);
+				break;
+			}
+			if (atomic_read(&sk->sk_rmem_alloc) >
+			    (sk->sk_rcvbuf / 2))
+				rose->condition |= ROSE_COND_OWN_RX_BUSY;
+		}
+		/*
+		 * If the window is full, ack the frame, else start the
+		 * acknowledge hold back timer.
+		 */
+		if (((rose->vl + sysctl_rose_window_size) % ROSE_MODULUS) == rose->vr) {
+			rose->condition &= ~ROSE_COND_ACK_PENDING;
+			rose_stop_timer(sk);
+			rose_enquiry_response(sk);
+		} else {
+			rose->condition |= ROSE_COND_ACK_PENDING;
+			rose_start_hbtimer(sk);
+		}
+		break;
+
+	default:
+		printk(KERN_WARNING "ROSE: unknown %02X in state 3\n", frametype);
+		break;
+	}
+
+	return queued;
+}
+
+/*
+ * State machine for state 4, Awaiting Reset Confirmation State.
+ * The handling of the timer(s) is in file rose_timer.c
+ * Handling of state 0 and connection release is in af_rose.c.
+ */
+static int rose_state4_machine(struct sock *sk, struct sk_buff *skb, int frametype)
+{
+	struct rose_sock *rose = rose_sk(sk);
+
+	switch (frametype) {
+	case ROSE_RESET_REQUEST:
+		rose_write_internal(sk, ROSE_RESET_CONFIRMATION);
+	case ROSE_RESET_CONFIRMATION:
+		rose_stop_timer(sk);
+		rose_start_idletimer(sk);
+		rose->condition = 0x00;
+		rose->va        = 0;
+		rose->vr        = 0;
+		rose->vs        = 0;
+		rose->vl        = 0;
+		rose->state     = ROSE_STATE_3;
+		rose_requeue_frames(sk);
+		break;
+
+	case ROSE_CLEAR_REQUEST:
+		rose_write_internal(sk, ROSE_CLEAR_CONFIRMATION);
+		rose_disconnect(sk, 0, skb->data[3], skb->data[4]);
+		rose->neighbour->use--;
+		break;
+
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+/*
+ * State machine for state 5, Awaiting Call Acceptance State.
+ * The handling of the timer(s) is in file rose_timer.c
+ * Handling of state 0 and connection release is in af_rose.c.
+ */
+static int rose_state5_machine(struct sock *sk, struct sk_buff *skb, int frametype)
+{
+	if (frametype == ROSE_CLEAR_REQUEST) {
+		rose_write_internal(sk, ROSE_CLEAR_CONFIRMATION);
+		rose_disconnect(sk, 0, skb->data[3], skb->data[4]);
+		rose_sk(sk)->neighbour->use--;
+	}
+
+	return 0;
+}
+
+/* Higher level upcall for a LAPB frame */
+int rose_process_rx_frame(struct sock *sk, struct sk_buff *skb)
+{
+	struct rose_sock *rose = rose_sk(sk);
+	int queued = 0, frametype, ns, nr, q, d, m;
+
+	if (rose->state == ROSE_STATE_0)
+		return 0;
+
+	frametype = rose_decode(skb, &ns, &nr, &q, &d, &m);
+
+	switch (rose->state) {
+	case ROSE_STATE_1:
+		queued = rose_state1_machine(sk, skb, frametype);
+		break;
+	case ROSE_STATE_2:
+		queued = rose_state2_machine(sk, skb, frametype);
+		break;
+	case ROSE_STATE_3:
+		queued = rose_state3_machine(sk, skb, frametype, ns, nr, q, d, m);
+		break;
+	case ROSE_STATE_4:
+		queued = rose_state4_machine(sk, skb, frametype);
+		break;
+	case ROSE_STATE_5:
+		queued = rose_state5_machine(sk, skb, frametype);
+		break;
+	}
+
+	rose_kick(sk);
+
+	return queued;
+}
diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c
new file mode 100644
index 00000000000..09e9e9d04d9
--- /dev/null
+++ b/net/rose/rose_link.c
@@ -0,0 +1,288 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
+ */
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <net/ax25.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <asm/system.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/netfilter.h>
+#include <net/rose.h>
+
+static void rose_ftimer_expiry(unsigned long);
+static void rose_t0timer_expiry(unsigned long);
+
+static void rose_transmit_restart_confirmation(struct rose_neigh *neigh);
+static void rose_transmit_restart_request(struct rose_neigh *neigh);
+
+void rose_start_ftimer(struct rose_neigh *neigh)
+{
+	del_timer(&neigh->ftimer);
+
+	neigh->ftimer.data     = (unsigned long)neigh;
+	neigh->ftimer.function = &rose_ftimer_expiry;
+	neigh->ftimer.expires  = jiffies + sysctl_rose_link_fail_timeout;
+
+	add_timer(&neigh->ftimer);
+}
+
+static void rose_start_t0timer(struct rose_neigh *neigh)
+{
+	del_timer(&neigh->t0timer);
+
+	neigh->t0timer.data     = (unsigned long)neigh;
+	neigh->t0timer.function = &rose_t0timer_expiry;
+	neigh->t0timer.expires  = jiffies + sysctl_rose_restart_request_timeout;
+
+	add_timer(&neigh->t0timer);
+}
+
+void rose_stop_ftimer(struct rose_neigh *neigh)
+{
+	del_timer(&neigh->ftimer);
+}
+
+void rose_stop_t0timer(struct rose_neigh *neigh)
+{
+	del_timer(&neigh->t0timer);
+}
+
+int rose_ftimer_running(struct rose_neigh *neigh)
+{
+	return timer_pending(&neigh->ftimer);
+}
+
+static int rose_t0timer_running(struct rose_neigh *neigh)
+{
+	return timer_pending(&neigh->t0timer);
+}
+
+static void rose_ftimer_expiry(unsigned long param)
+{
+}
+
+static void rose_t0timer_expiry(unsigned long param)
+{
+	struct rose_neigh *neigh = (struct rose_neigh *)param;
+
+	rose_transmit_restart_request(neigh);
+
+	neigh->dce_mode = 0;
+
+	rose_start_t0timer(neigh);
+}
+
+/*
+ *	Interface to ax25_send_frame. Changes my level 2 callsign depending
+ *	on whether we have a global ROSE callsign or use the default port
+ *	callsign.
+ */
+static int rose_send_frame(struct sk_buff *skb, struct rose_neigh *neigh)
+{
+	ax25_address *rose_call;
+
+	if (ax25cmp(&rose_callsign, &null_ax25_address) == 0)
+		rose_call = (ax25_address *)neigh->dev->dev_addr;
+	else
+		rose_call = &rose_callsign;
+
+	neigh->ax25 = ax25_send_frame(skb, 260, rose_call, &neigh->callsign, neigh->digipeat, neigh->dev);
+
+	return (neigh->ax25 != NULL);
+}
+
+/*
+ *	Interface to ax25_link_up. Changes my level 2 callsign depending
+ *	on whether we have a global ROSE callsign or use the default port
+ *	callsign.
+ */
+static int rose_link_up(struct rose_neigh *neigh)
+{
+	ax25_address *rose_call;
+
+	if (ax25cmp(&rose_callsign, &null_ax25_address) == 0)
+		rose_call = (ax25_address *)neigh->dev->dev_addr;
+	else
+		rose_call = &rose_callsign;
+
+	neigh->ax25 = ax25_find_cb(rose_call, &neigh->callsign, neigh->digipeat, neigh->dev);
+
+	return (neigh->ax25 != NULL);
+}
+
+/*
+ *	This handles all restart and diagnostic frames.
+ */
+void rose_link_rx_restart(struct sk_buff *skb, struct rose_neigh *neigh, unsigned short frametype)
+{
+	struct sk_buff *skbn;
+
+	switch (frametype) {
+	case ROSE_RESTART_REQUEST:
+		rose_stop_t0timer(neigh);
+		neigh->restarted = 1;
+		neigh->dce_mode  = (skb->data[3] == ROSE_DTE_ORIGINATED);
+		rose_transmit_restart_confirmation(neigh);
+		break;
+
+	case ROSE_RESTART_CONFIRMATION:
+		rose_stop_t0timer(neigh);
+		neigh->restarted = 1;
+		break;
+
+	case ROSE_DIAGNOSTIC:
+		printk(KERN_WARNING "ROSE: received diagnostic #%d - %02X %02X %02X\n", skb->data[3], skb->data[4], skb->data[5], skb->data[6]);
+		break;
+
+	default:
+		printk(KERN_WARNING "ROSE: received unknown %02X with LCI 000\n", frametype);
+		break;
+	}
+
+	if (neigh->restarted) {
+		while ((skbn = skb_dequeue(&neigh->queue)) != NULL)
+			if (!rose_send_frame(skbn, neigh))
+				kfree_skb(skbn);
+	}
+}
+
+/*
+ *	This routine is called when a Restart Request is needed
+ */
+static void rose_transmit_restart_request(struct rose_neigh *neigh)
+{
+	struct sk_buff *skb;
+	unsigned char *dptr;
+	int len;
+
+	len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN + 3;
+
+	if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL)
+		return;
+
+	skb_reserve(skb, AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN);
+
+	dptr = skb_put(skb, ROSE_MIN_LEN + 3);
+
+	*dptr++ = AX25_P_ROSE;
+	*dptr++ = ROSE_GFI;
+	*dptr++ = 0x00;
+	*dptr++ = ROSE_RESTART_REQUEST;
+	*dptr++ = ROSE_DTE_ORIGINATED;
+	*dptr++ = 0;
+
+	if (!rose_send_frame(skb, neigh))
+		kfree_skb(skb);
+}
+
+/*
+ * This routine is called when a Restart Confirmation is needed
+ */
+static void rose_transmit_restart_confirmation(struct rose_neigh *neigh)
+{
+	struct sk_buff *skb;
+	unsigned char *dptr;
+	int len;
+
+	len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN + 1;
+
+	if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL)
+		return;
+
+	skb_reserve(skb, AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN);
+
+	dptr = skb_put(skb, ROSE_MIN_LEN + 1);
+
+	*dptr++ = AX25_P_ROSE;
+	*dptr++ = ROSE_GFI;
+	*dptr++ = 0x00;
+	*dptr++ = ROSE_RESTART_CONFIRMATION;
+
+	if (!rose_send_frame(skb, neigh))
+		kfree_skb(skb);
+}
+
+/*
+ * This routine is called when a Clear Request is needed outside of the context
+ * of a connected socket.
+ */
+void rose_transmit_clear_request(struct rose_neigh *neigh, unsigned int lci, unsigned char cause, unsigned char diagnostic)
+{
+	struct sk_buff *skb;
+	unsigned char *dptr;
+	int len;
+
+	len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN + 3;
+
+	if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL)
+		return;
+
+	skb_reserve(skb, AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN);
+
+	dptr = skb_put(skb, ROSE_MIN_LEN + 3);
+
+	*dptr++ = AX25_P_ROSE;
+	*dptr++ = ((lci >> 8) & 0x0F) | ROSE_GFI;
+	*dptr++ = ((lci >> 0) & 0xFF);
+	*dptr++ = ROSE_CLEAR_REQUEST;
+	*dptr++ = cause;
+	*dptr++ = diagnostic;
+
+	if (!rose_send_frame(skb, neigh))
+		kfree_skb(skb);
+}
+
+void rose_transmit_link(struct sk_buff *skb, struct rose_neigh *neigh)
+{
+	unsigned char *dptr;
+
+#if 0
+	if (call_fw_firewall(PF_ROSE, skb->dev, skb->data, NULL, &skb) != FW_ACCEPT) {
+		kfree_skb(skb);
+		return;
+	}
+#endif
+
+	if (neigh->loopback) {
+		rose_loopback_queue(skb, neigh);
+		return;
+	}
+
+	if (!rose_link_up(neigh))
+		neigh->restarted = 0;
+
+	dptr = skb_push(skb, 1);
+	*dptr++ = AX25_P_ROSE;
+
+	if (neigh->restarted) {
+		if (!rose_send_frame(skb, neigh))
+			kfree_skb(skb);
+	} else {
+		skb_queue_tail(&neigh->queue, skb);
+
+		if (!rose_t0timer_running(neigh)) {
+			rose_transmit_restart_request(neigh);
+			neigh->dce_mode = 0;
+			rose_start_t0timer(neigh);
+		}
+	}
+}
diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c
new file mode 100644
index 00000000000..103b4d38f88
--- /dev/null
+++ b/net/rose/rose_loopback.c
@@ -0,0 +1,111 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
+ */
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/timer.h>
+#include <net/ax25.h>
+#include <linux/skbuff.h>
+#include <net/rose.h>
+#include <linux/init.h>
+
+static struct sk_buff_head loopback_queue;
+static struct timer_list loopback_timer;
+
+static void rose_set_loopback_timer(void);
+
+void rose_loopback_init(void)
+{
+	skb_queue_head_init(&loopback_queue);
+
+	init_timer(&loopback_timer);
+}
+
+static int rose_loopback_running(void)
+{
+	return timer_pending(&loopback_timer);
+}
+
+int rose_loopback_queue(struct sk_buff *skb, struct rose_neigh *neigh)
+{
+	struct sk_buff *skbn;
+
+	skbn = skb_clone(skb, GFP_ATOMIC);
+
+	kfree_skb(skb);
+
+	if (skbn != NULL) {
+		skb_queue_tail(&loopback_queue, skbn);
+
+		if (!rose_loopback_running())
+			rose_set_loopback_timer();
+	}
+
+	return 1;
+}
+
+static void rose_loopback_timer(unsigned long);
+
+static void rose_set_loopback_timer(void)
+{
+	del_timer(&loopback_timer);
+
+	loopback_timer.data     = 0;
+	loopback_timer.function = &rose_loopback_timer;
+	loopback_timer.expires  = jiffies + 10;
+
+	add_timer(&loopback_timer);
+}
+
+static void rose_loopback_timer(unsigned long param)
+{
+	struct sk_buff *skb;
+	struct net_device *dev;
+	rose_address *dest;
+	struct sock *sk;
+	unsigned short frametype;
+	unsigned int lci_i, lci_o;
+
+	while ((skb = skb_dequeue(&loopback_queue)) != NULL) {
+		lci_i     = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF);
+		frametype = skb->data[2];
+		dest      = (rose_address *)(skb->data + 4);
+		lci_o     = 0xFFF - lci_i;
+
+		skb->h.raw = skb->data;
+
+		if ((sk = rose_find_socket(lci_o, rose_loopback_neigh)) != NULL) {
+			if (rose_process_rx_frame(sk, skb) == 0)
+				kfree_skb(skb);
+			continue;
+		}
+
+		if (frametype == ROSE_CALL_REQUEST) {
+			if ((dev = rose_dev_get(dest)) != NULL) {
+				if (rose_rx_call_request(skb, dev, rose_loopback_neigh, lci_o) == 0)
+					kfree_skb(skb);
+			} else {
+				kfree_skb(skb);
+			}
+		} else {
+			kfree_skb(skb);
+		}
+	}
+}
+
+void __exit rose_loopback_clear(void)
+{
+	struct sk_buff *skb;
+
+	del_timer(&loopback_timer);
+
+	while ((skb = skb_dequeue(&loopback_queue)) != NULL) {
+		skb->sk = NULL;
+		kfree_skb(skb);
+	}
+}
diff --git a/net/rose/rose_out.c b/net/rose/rose_out.c
new file mode 100644
index 00000000000..2965ffc83b9
--- /dev/null
+++ b/net/rose/rose_out.c
@@ -0,0 +1,126 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
+ */
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <net/ax25.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <asm/system.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <net/rose.h>
+
+/*
+ *	This procedure is passed a buffer descriptor for an iframe. It builds
+ *	the rest of the control part of the frame and then writes it out.
+ */
+static void rose_send_iframe(struct sock *sk, struct sk_buff *skb)
+{
+	struct rose_sock *rose = rose_sk(sk);
+
+	if (skb == NULL)
+		return;
+
+	skb->data[2] |= (rose->vr << 5) & 0xE0;
+	skb->data[2] |= (rose->vs << 1) & 0x0E;
+
+	rose_start_idletimer(sk);
+
+	rose_transmit_link(skb, rose->neighbour);
+}
+
+void rose_kick(struct sock *sk)
+{
+	struct rose_sock *rose = rose_sk(sk);
+	struct sk_buff *skb, *skbn;
+	unsigned short start, end;
+
+	if (rose->state != ROSE_STATE_3)
+		return;
+
+	if (rose->condition & ROSE_COND_PEER_RX_BUSY)
+		return;
+
+	if (!skb_peek(&sk->sk_write_queue))
+		return;
+
+	start = (skb_peek(&rose->ack_queue) == NULL) ? rose->va : rose->vs;
+	end   = (rose->va + sysctl_rose_window_size) % ROSE_MODULUS;
+
+	if (start == end)
+		return;
+
+	rose->vs = start;
+
+	/*
+	 * Transmit data until either we're out of data to send or
+	 * the window is full.
+	 */
+
+	skb  = skb_dequeue(&sk->sk_write_queue);
+
+	do {
+		if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL) {
+			skb_queue_head(&sk->sk_write_queue, skb);
+			break;
+		}
+
+		skb_set_owner_w(skbn, sk);
+
+		/*
+		 * Transmit the frame copy.
+		 */
+		rose_send_iframe(sk, skbn);
+
+		rose->vs = (rose->vs + 1) % ROSE_MODULUS;
+
+		/*
+		 * Requeue the original data frame.
+		 */
+		skb_queue_tail(&rose->ack_queue, skb);
+
+	} while (rose->vs != end &&
+		 (skb = skb_dequeue(&sk->sk_write_queue)) != NULL);
+
+	rose->vl         = rose->vr;
+	rose->condition &= ~ROSE_COND_ACK_PENDING;
+
+	rose_stop_timer(sk);
+}
+
+/*
+ * The following routines are taken from page 170 of the 7th ARRL Computer
+ * Networking Conference paper, as is the whole state machine.
+ */
+
+void rose_enquiry_response(struct sock *sk)
+{
+	struct rose_sock *rose = rose_sk(sk);
+
+	if (rose->condition & ROSE_COND_OWN_RX_BUSY)
+		rose_write_internal(sk, ROSE_RNR);
+	else
+		rose_write_internal(sk, ROSE_RR);
+
+	rose->vl         = rose->vr;
+	rose->condition &= ~ROSE_COND_ACK_PENDING;
+
+	rose_stop_timer(sk);
+}
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
new file mode 100644
index 00000000000..ff73ebb912b
--- /dev/null
+++ b/net/rose/rose_route.c
@@ -0,0 +1,1343 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
+ * Copyright (C) Terry Dawson VK2KTJ (terry@animats.net)
+ */
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <net/ax25.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <net/arp.h>
+#include <linux/if_arp.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <linux/fcntl.h>
+#include <linux/termios.h>	/* For TIOCINQ/OUTQ */
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <linux/netfilter.h>
+#include <linux/init.h>
+#include <net/rose.h>
+#include <linux/seq_file.h>
+
+static unsigned int rose_neigh_no = 1;
+
+static struct rose_node  *rose_node_list;
+static DEFINE_SPINLOCK(rose_node_list_lock);
+static struct rose_neigh *rose_neigh_list;
+static DEFINE_SPINLOCK(rose_neigh_list_lock);
+static struct rose_route *rose_route_list;
+static DEFINE_SPINLOCK(rose_route_list_lock);
+
+struct rose_neigh *rose_loopback_neigh;
+
+static void rose_remove_neigh(struct rose_neigh *);
+
+/*
+ *	Add a new route to a node, and in the process add the node and the
+ *	neighbour if it is new.
+ */
+static int rose_add_node(struct rose_route_struct *rose_route,
+	struct net_device *dev)
+{
+	struct rose_node  *rose_node, *rose_tmpn, *rose_tmpp;
+	struct rose_neigh *rose_neigh;
+	int i, res = 0;
+
+	spin_lock_bh(&rose_node_list_lock);
+	spin_lock_bh(&rose_neigh_list_lock);
+
+	rose_node = rose_node_list;
+	while (rose_node != NULL) {
+		if ((rose_node->mask == rose_route->mask) &&
+		    (rosecmpm(&rose_route->address, &rose_node->address,
+		              rose_route->mask) == 0))
+			break;
+		rose_node = rose_node->next;
+	}
+
+	if (rose_node != NULL && rose_node->loopback) {
+		res = -EINVAL;
+		goto out;
+	}
+
+	rose_neigh = rose_neigh_list;
+	while (rose_neigh != NULL) {
+		if (ax25cmp(&rose_route->neighbour, &rose_neigh->callsign) == 0
+		    && rose_neigh->dev == dev)
+			break;
+		rose_neigh = rose_neigh->next;
+	}
+
+	if (rose_neigh == NULL) {
+		rose_neigh = kmalloc(sizeof(*rose_neigh), GFP_ATOMIC);
+		if (rose_neigh == NULL) {
+			res = -ENOMEM;
+			goto out;
+		}
+
+		rose_neigh->callsign  = rose_route->neighbour;
+		rose_neigh->digipeat  = NULL;
+		rose_neigh->ax25      = NULL;
+		rose_neigh->dev       = dev;
+		rose_neigh->count     = 0;
+		rose_neigh->use       = 0;
+		rose_neigh->dce_mode  = 0;
+		rose_neigh->loopback  = 0;
+		rose_neigh->number    = rose_neigh_no++;
+		rose_neigh->restarted = 0;
+
+		skb_queue_head_init(&rose_neigh->queue);
+
+		init_timer(&rose_neigh->ftimer);
+		init_timer(&rose_neigh->t0timer);
+
+		if (rose_route->ndigis != 0) {
+			if ((rose_neigh->digipeat = kmalloc(sizeof(ax25_digi), GFP_KERNEL)) == NULL) {
+				kfree(rose_neigh);
+				res = -ENOMEM;
+				goto out;
+			}
+
+			rose_neigh->digipeat->ndigi      = rose_route->ndigis;
+			rose_neigh->digipeat->lastrepeat = -1;
+
+			for (i = 0; i < rose_route->ndigis; i++) {
+				rose_neigh->digipeat->calls[i]    =
+					rose_route->digipeaters[i];
+				rose_neigh->digipeat->repeated[i] = 0;
+			}
+		}
+
+		rose_neigh->next = rose_neigh_list;
+		rose_neigh_list  = rose_neigh;
+	}
+
+	/*
+	 * This is a new node to be inserted into the list. Find where it needs
+	 * to be inserted into the list, and insert it. We want to be sure
+	 * to order the list in descending order of mask size to ensure that
+	 * later when we are searching this list the first match will be the
+	 * best match.
+	 */
+	if (rose_node == NULL) {
+		rose_tmpn = rose_node_list;
+		rose_tmpp = NULL;
+
+		while (rose_tmpn != NULL) {
+			if (rose_tmpn->mask > rose_route->mask) {
+				rose_tmpp = rose_tmpn;
+				rose_tmpn = rose_tmpn->next;
+			} else {
+				break;
+			}
+		}
+
+		/* create new node */
+		rose_node = kmalloc(sizeof(*rose_node), GFP_ATOMIC);
+		if (rose_node == NULL) {
+			res = -ENOMEM;
+			goto out;
+		}
+
+		rose_node->address      = rose_route->address;
+		rose_node->mask         = rose_route->mask;
+		rose_node->count        = 1;
+		rose_node->loopback     = 0;
+		rose_node->neighbour[0] = rose_neigh;
+
+		if (rose_tmpn == NULL) {
+			if (rose_tmpp == NULL) {	/* Empty list */
+				rose_node_list  = rose_node;
+				rose_node->next = NULL;
+			} else {
+				rose_tmpp->next = rose_node;
+				rose_node->next = NULL;
+			}
+		} else {
+			if (rose_tmpp == NULL) {	/* 1st node */
+				rose_node->next = rose_node_list;
+				rose_node_list  = rose_node;
+			} else {
+				rose_tmpp->next = rose_node;
+				rose_node->next = rose_tmpn;
+			}
+		}
+		rose_neigh->count++;
+
+		goto out;
+	}
+
+	/* We have space, slot it in */
+	if (rose_node->count < 3) {
+		rose_node->neighbour[rose_node->count] = rose_neigh;
+		rose_node->count++;
+		rose_neigh->count++;
+	}
+
+out:
+	spin_unlock_bh(&rose_neigh_list_lock);
+	spin_unlock_bh(&rose_node_list_lock);
+
+	return res;
+}
+
+/*
+ * Caller is holding rose_node_list_lock.
+ */
+static void rose_remove_node(struct rose_node *rose_node)
+{
+	struct rose_node *s;
+
+	if ((s = rose_node_list) == rose_node) {
+		rose_node_list = rose_node->next;
+		kfree(rose_node);
+		return;
+	}
+
+	while (s != NULL && s->next != NULL) {
+		if (s->next == rose_node) {
+			s->next = rose_node->next;
+			kfree(rose_node);
+			return;
+		}
+
+		s = s->next;
+	}
+}
+
+/*
+ * Caller is holding rose_neigh_list_lock.
+ */
+static void rose_remove_neigh(struct rose_neigh *rose_neigh)
+{
+	struct rose_neigh *s;
+
+	rose_stop_ftimer(rose_neigh);
+	rose_stop_t0timer(rose_neigh);
+
+	skb_queue_purge(&rose_neigh->queue);
+
+	spin_lock_bh(&rose_neigh_list_lock);
+
+	if ((s = rose_neigh_list) == rose_neigh) {
+		rose_neigh_list = rose_neigh->next;
+		spin_unlock_bh(&rose_neigh_list_lock);
+		if (rose_neigh->digipeat != NULL)
+			kfree(rose_neigh->digipeat);
+		kfree(rose_neigh);
+		return;
+	}
+
+	while (s != NULL && s->next != NULL) {
+		if (s->next == rose_neigh) {
+			s->next = rose_neigh->next;
+			spin_unlock_bh(&rose_neigh_list_lock);
+			if (rose_neigh->digipeat != NULL)
+				kfree(rose_neigh->digipeat);
+			kfree(rose_neigh);
+			return;
+		}
+
+		s = s->next;
+	}
+	spin_unlock_bh(&rose_neigh_list_lock);
+}
+
+/*
+ * Caller is holding rose_route_list_lock.
+ */
+static void rose_remove_route(struct rose_route *rose_route)
+{
+	struct rose_route *s;
+
+	if (rose_route->neigh1 != NULL)
+		rose_route->neigh1->use--;
+
+	if (rose_route->neigh2 != NULL)
+		rose_route->neigh2->use--;
+
+	if ((s = rose_route_list) == rose_route) {
+		rose_route_list = rose_route->next;
+		kfree(rose_route);
+		return;
+	}
+
+	while (s != NULL && s->next != NULL) {
+		if (s->next == rose_route) {
+			s->next = rose_route->next;
+			kfree(rose_route);
+			return;
+		}
+
+		s = s->next;
+	}
+}
+
+/*
+ *	"Delete" a node. Strictly speaking remove a route to a node. The node
+ *	is only deleted if no routes are left to it.
+ */
+static int rose_del_node(struct rose_route_struct *rose_route,
+	struct net_device *dev)
+{
+	struct rose_node  *rose_node;
+	struct rose_neigh *rose_neigh;
+	int i, err = 0;
+
+	spin_lock_bh(&rose_node_list_lock);
+	spin_lock_bh(&rose_neigh_list_lock);
+
+	rose_node = rose_node_list;
+	while (rose_node != NULL) {
+		if ((rose_node->mask == rose_route->mask) &&
+		    (rosecmpm(&rose_route->address, &rose_node->address,
+		              rose_route->mask) == 0))
+			break;
+		rose_node = rose_node->next;
+	}
+
+	if (rose_node == NULL || rose_node->loopback) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	rose_neigh = rose_neigh_list;
+	while (rose_neigh != NULL) {
+		if (ax25cmp(&rose_route->neighbour, &rose_neigh->callsign) == 0
+		    && rose_neigh->dev == dev)
+			break;
+		rose_neigh = rose_neigh->next;
+	}
+
+	if (rose_neigh == NULL) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	for (i = 0; i < rose_node->count; i++) {
+		if (rose_node->neighbour[i] == rose_neigh) {
+			rose_neigh->count--;
+
+			if (rose_neigh->count == 0 && rose_neigh->use == 0)
+				rose_remove_neigh(rose_neigh);
+
+			rose_node->count--;
+
+			if (rose_node->count == 0) {
+				rose_remove_node(rose_node);
+			} else {
+				switch (i) {
+				case 0:
+					rose_node->neighbour[0] =
+						rose_node->neighbour[1];
+				case 1:
+					rose_node->neighbour[1] =
+						rose_node->neighbour[2];
+				case 2:
+					break;
+				}
+			}
+			goto out;
+		}
+	}
+	err = -EINVAL;
+
+out:
+	spin_unlock_bh(&rose_neigh_list_lock);
+	spin_unlock_bh(&rose_node_list_lock);
+
+	return err;
+}
+
+/*
+ *	Add the loopback neighbour.
+ */
+int rose_add_loopback_neigh(void)
+{
+	if ((rose_loopback_neigh = kmalloc(sizeof(struct rose_neigh), GFP_ATOMIC)) == NULL)
+		return -ENOMEM;
+
+	rose_loopback_neigh->callsign  = null_ax25_address;
+	rose_loopback_neigh->digipeat  = NULL;
+	rose_loopback_neigh->ax25      = NULL;
+	rose_loopback_neigh->dev       = NULL;
+	rose_loopback_neigh->count     = 0;
+	rose_loopback_neigh->use       = 0;
+	rose_loopback_neigh->dce_mode  = 1;
+	rose_loopback_neigh->loopback  = 1;
+	rose_loopback_neigh->number    = rose_neigh_no++;
+	rose_loopback_neigh->restarted = 1;
+
+	skb_queue_head_init(&rose_loopback_neigh->queue);
+
+	init_timer(&rose_loopback_neigh->ftimer);
+	init_timer(&rose_loopback_neigh->t0timer);
+
+	spin_lock_bh(&rose_neigh_list_lock);
+	rose_loopback_neigh->next = rose_neigh_list;
+	rose_neigh_list           = rose_loopback_neigh;
+	spin_unlock_bh(&rose_neigh_list_lock);
+
+	return 0;
+}
+
+/*
+ *	Add a loopback node.
+ */
+int rose_add_loopback_node(rose_address *address)
+{
+	struct rose_node *rose_node;
+	unsigned int err = 0;
+
+	spin_lock_bh(&rose_node_list_lock);
+
+	rose_node = rose_node_list;
+	while (rose_node != NULL) {
+		if ((rose_node->mask == 10) &&
+		     (rosecmpm(address, &rose_node->address, 10) == 0) &&
+		     rose_node->loopback)
+			break;
+		rose_node = rose_node->next;
+	}
+
+	if (rose_node != NULL)
+		goto out;
+
+	if ((rose_node = kmalloc(sizeof(*rose_node), GFP_ATOMIC)) == NULL) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	rose_node->address      = *address;
+	rose_node->mask         = 10;
+	rose_node->count        = 1;
+	rose_node->loopback     = 1;
+	rose_node->neighbour[0] = rose_loopback_neigh;
+
+	/* Insert at the head of list. Address is always mask=10 */
+	rose_node->next = rose_node_list;
+	rose_node_list  = rose_node;
+
+	rose_loopback_neigh->count++;
+
+out:
+	spin_unlock_bh(&rose_node_list_lock);
+
+	return 0;
+}
+
+/*
+ *	Delete a loopback node.
+ */
+void rose_del_loopback_node(rose_address *address)
+{
+	struct rose_node *rose_node;
+
+	spin_lock_bh(&rose_node_list_lock);
+
+	rose_node = rose_node_list;
+	while (rose_node != NULL) {
+		if ((rose_node->mask == 10) &&
+		    (rosecmpm(address, &rose_node->address, 10) == 0) &&
+		    rose_node->loopback)
+			break;
+		rose_node = rose_node->next;
+	}
+
+	if (rose_node == NULL)
+		goto out;
+
+	rose_remove_node(rose_node);
+
+	rose_loopback_neigh->count--;
+
+out:
+	spin_unlock_bh(&rose_node_list_lock);
+}
+
+/*
+ *	A device has been removed. Remove its routes and neighbours.
+ */
+void rose_rt_device_down(struct net_device *dev)
+{
+	struct rose_neigh *s, *rose_neigh;
+	struct rose_node  *t, *rose_node;
+	int i;
+
+	spin_lock_bh(&rose_node_list_lock);
+	spin_lock_bh(&rose_neigh_list_lock);
+	rose_neigh = rose_neigh_list;
+	while (rose_neigh != NULL) {
+		s          = rose_neigh;
+		rose_neigh = rose_neigh->next;
+
+		if (s->dev != dev)
+			continue;
+
+		rose_node = rose_node_list;
+
+		while (rose_node != NULL) {
+			t         = rose_node;
+			rose_node = rose_node->next;
+
+			for (i = 0; i < t->count; i++) {
+				if (t->neighbour[i] != s)
+					continue;
+
+				t->count--;
+
+				switch (i) {
+				case 0:
+					t->neighbour[0] = t->neighbour[1];
+				case 1:
+					t->neighbour[1] = t->neighbour[2];
+				case 2:
+					break;
+				}
+			}
+
+			if (t->count <= 0)
+				rose_remove_node(t);
+		}
+
+		rose_remove_neigh(s);
+	}
+	spin_unlock_bh(&rose_neigh_list_lock);
+	spin_unlock_bh(&rose_node_list_lock);
+}
+
+#if 0 /* Currently unused */
+/*
+ *	A device has been removed. Remove its links.
+ */
+void rose_route_device_down(struct net_device *dev)
+{
+	struct rose_route *s, *rose_route;
+
+	spin_lock_bh(&rose_route_list_lock);
+	rose_route = rose_route_list;
+	while (rose_route != NULL) {
+		s          = rose_route;
+		rose_route = rose_route->next;
+
+		if (s->neigh1->dev == dev || s->neigh2->dev == dev)
+			rose_remove_route(s);
+	}
+	spin_unlock_bh(&rose_route_list_lock);
+}
+#endif
+
+/*
+ *	Clear all nodes and neighbours out, except for neighbours with
+ *	active connections going through them.
+ *  Do not clear loopback neighbour and nodes.
+ */
+static int rose_clear_routes(void)
+{
+	struct rose_neigh *s, *rose_neigh;
+	struct rose_node  *t, *rose_node;
+
+	spin_lock_bh(&rose_node_list_lock);
+	spin_lock_bh(&rose_neigh_list_lock);
+
+	rose_neigh = rose_neigh_list;
+	rose_node  = rose_node_list;
+
+	while (rose_node != NULL) {
+		t         = rose_node;
+		rose_node = rose_node->next;
+		if (!t->loopback)
+			rose_remove_node(t);
+	}
+
+	while (rose_neigh != NULL) {
+		s          = rose_neigh;
+		rose_neigh = rose_neigh->next;
+
+		if (s->use == 0 && !s->loopback) {
+			s->count = 0;
+			rose_remove_neigh(s);
+		}
+	}
+
+	spin_unlock_bh(&rose_neigh_list_lock);
+	spin_unlock_bh(&rose_node_list_lock);
+
+	return 0;
+}
+
+/*
+ *	Check that the device given is a valid AX.25 interface that is "up".
+ */
+static struct net_device *rose_ax25_dev_get(char *devname)
+{
+	struct net_device *dev;
+
+	if ((dev = dev_get_by_name(devname)) == NULL)
+		return NULL;
+
+	if ((dev->flags & IFF_UP) && dev->type == ARPHRD_AX25)
+		return dev;
+
+	dev_put(dev);
+	return NULL;
+}
+
+/*
+ *	Find the first active ROSE device, usually "rose0".
+ */
+struct net_device *rose_dev_first(void)
+{
+	struct net_device *dev, *first = NULL;
+
+	read_lock(&dev_base_lock);
+	for (dev = dev_base; dev != NULL; dev = dev->next) {
+		if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE)
+			if (first == NULL || strncmp(dev->name, first->name, 3) < 0)
+				first = dev;
+	}
+	read_unlock(&dev_base_lock);
+
+	return first;
+}
+
+/*
+ *	Find the ROSE device for the given address.
+ */
+struct net_device *rose_dev_get(rose_address *addr)
+{
+	struct net_device *dev;
+
+	read_lock(&dev_base_lock);
+	for (dev = dev_base; dev != NULL; dev = dev->next) {
+		if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0) {
+			dev_hold(dev);
+			goto out;
+		}
+	}
+out:
+	read_unlock(&dev_base_lock);
+	return dev;
+}
+
+static int rose_dev_exists(rose_address *addr)
+{
+	struct net_device *dev;
+
+	read_lock(&dev_base_lock);
+	for (dev = dev_base; dev != NULL; dev = dev->next) {
+		if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0)
+			goto out;
+	}
+out:
+	read_unlock(&dev_base_lock);
+	return dev != NULL;
+}
+
+
+
+
+struct rose_route *rose_route_free_lci(unsigned int lci, struct rose_neigh *neigh)
+{
+	struct rose_route *rose_route;
+
+	for (rose_route = rose_route_list; rose_route != NULL; rose_route = rose_route->next)
+		if ((rose_route->neigh1 == neigh && rose_route->lci1 == lci) ||
+		    (rose_route->neigh2 == neigh && rose_route->lci2 == lci))
+			return rose_route;
+
+	return NULL;
+}
+
+/*
+ *	Find a neighbour given a ROSE address.
+ */
+struct rose_neigh *rose_get_neigh(rose_address *addr, unsigned char *cause,
+	unsigned char *diagnostic)
+{
+	struct rose_neigh *res = NULL;
+	struct rose_node *node;
+	int failed = 0;
+	int i;
+
+	spin_lock_bh(&rose_node_list_lock);
+	for (node = rose_node_list; node != NULL; node = node->next) {
+		if (rosecmpm(addr, &node->address, node->mask) == 0) {
+			for (i = 0; i < node->count; i++) {
+				if (!rose_ftimer_running(node->neighbour[i])) {
+					res = node->neighbour[i];
+					goto out;
+				} else
+					failed = 1;
+			}
+			break;
+		}
+	}
+
+	if (failed) {
+		*cause      = ROSE_OUT_OF_ORDER;
+		*diagnostic = 0;
+	} else {
+		*cause      = ROSE_NOT_OBTAINABLE;
+		*diagnostic = 0;
+	}
+
+out:
+	spin_unlock_bh(&rose_node_list_lock);
+
+	return res;
+}
+
+/*
+ *	Handle the ioctls that control the routing functions.
+ */
+int rose_rt_ioctl(unsigned int cmd, void __user *arg)
+{
+	struct rose_route_struct rose_route;
+	struct net_device *dev;
+	int err;
+
+	switch (cmd) {
+	case SIOCADDRT:
+		if (copy_from_user(&rose_route, arg, sizeof(struct rose_route_struct)))
+			return -EFAULT;
+		if ((dev = rose_ax25_dev_get(rose_route.device)) == NULL)
+			return -EINVAL;
+		if (rose_dev_exists(&rose_route.address)) { /* Can't add routes to ourself */
+			dev_put(dev);
+			return -EINVAL;
+		}
+		if (rose_route.mask > 10) /* Mask can't be more than 10 digits */
+			return -EINVAL;
+		if (rose_route.ndigis > 8) /* No more than 8 digipeats */
+			return -EINVAL;
+		err = rose_add_node(&rose_route, dev);
+		dev_put(dev);
+		return err;
+
+	case SIOCDELRT:
+		if (copy_from_user(&rose_route, arg, sizeof(struct rose_route_struct)))
+			return -EFAULT;
+		if ((dev = rose_ax25_dev_get(rose_route.device)) == NULL)
+			return -EINVAL;
+		err = rose_del_node(&rose_route, dev);
+		dev_put(dev);
+		return err;
+
+	case SIOCRSCLRRT:
+		return rose_clear_routes();
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void rose_del_route_by_neigh(struct rose_neigh *rose_neigh)
+{
+	struct rose_route *rose_route, *s;
+
+	rose_neigh->restarted = 0;
+
+	rose_stop_t0timer(rose_neigh);
+	rose_start_ftimer(rose_neigh);
+
+	skb_queue_purge(&rose_neigh->queue);
+
+	spin_lock_bh(&rose_route_list_lock);
+
+	rose_route = rose_route_list;
+
+	while (rose_route != NULL) {
+		if ((rose_route->neigh1 == rose_neigh && rose_route->neigh2 == rose_neigh) ||
+		    (rose_route->neigh1 == rose_neigh && rose_route->neigh2 == NULL)       ||
+		    (rose_route->neigh2 == rose_neigh && rose_route->neigh1 == NULL)) {
+			s = rose_route->next;
+			rose_remove_route(rose_route);
+			rose_route = s;
+			continue;
+		}
+
+		if (rose_route->neigh1 == rose_neigh) {
+			rose_route->neigh1->use--;
+			rose_route->neigh1 = NULL;
+			rose_transmit_clear_request(rose_route->neigh2, rose_route->lci2, ROSE_OUT_OF_ORDER, 0);
+		}
+
+		if (rose_route->neigh2 == rose_neigh) {
+			rose_route->neigh2->use--;
+			rose_route->neigh2 = NULL;
+			rose_transmit_clear_request(rose_route->neigh1, rose_route->lci1, ROSE_OUT_OF_ORDER, 0);
+		}
+
+		rose_route = rose_route->next;
+	}
+	spin_unlock_bh(&rose_route_list_lock);
+}
+
+/*
+ * 	A level 2 link has timed out, therefore it appears to be a poor link,
+ *	then don't use that neighbour until it is reset. Blow away all through
+ *	routes and connections using this route.
+ */
+void rose_link_failed(ax25_cb *ax25, int reason)
+{
+	struct rose_neigh *rose_neigh;
+
+	spin_lock_bh(&rose_neigh_list_lock);
+	rose_neigh = rose_neigh_list;
+	while (rose_neigh != NULL) {
+		if (rose_neigh->ax25 == ax25)
+			break;
+		rose_neigh = rose_neigh->next;
+	}
+
+	if (rose_neigh != NULL) {
+		rose_neigh->ax25 = NULL;
+
+		rose_del_route_by_neigh(rose_neigh);
+		rose_kill_by_neigh(rose_neigh);
+	}
+	spin_unlock_bh(&rose_neigh_list_lock);
+}
+
+/*
+ * 	A device has been "downed" remove its link status. Blow away all
+ *	through routes and connections that use this device.
+ */
+void rose_link_device_down(struct net_device *dev)
+{
+	struct rose_neigh *rose_neigh;
+
+	for (rose_neigh = rose_neigh_list; rose_neigh != NULL; rose_neigh = rose_neigh->next) {
+		if (rose_neigh->dev == dev) {
+			rose_del_route_by_neigh(rose_neigh);
+			rose_kill_by_neigh(rose_neigh);
+		}
+	}
+}
+
+/*
+ *	Route a frame to an appropriate AX.25 connection.
+ */
+int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25)
+{
+	struct rose_neigh *rose_neigh, *new_neigh;
+	struct rose_route *rose_route;
+	struct rose_facilities_struct facilities;
+	rose_address *src_addr, *dest_addr;
+	struct sock *sk;
+	unsigned short frametype;
+	unsigned int lci, new_lci;
+	unsigned char cause, diagnostic;
+	struct net_device *dev;
+	int len, res = 0;
+
+#if 0
+	if (call_in_firewall(PF_ROSE, skb->dev, skb->data, NULL, &skb) != FW_ACCEPT)
+		return res;
+#endif
+
+	frametype = skb->data[2];
+	lci = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF);
+	src_addr  = (rose_address *)(skb->data + 9);
+	dest_addr = (rose_address *)(skb->data + 4);
+
+	spin_lock_bh(&rose_node_list_lock);
+	spin_lock_bh(&rose_neigh_list_lock);
+	spin_lock_bh(&rose_route_list_lock);
+
+	rose_neigh = rose_neigh_list;
+	while (rose_neigh != NULL) {
+		if (ax25cmp(&ax25->dest_addr, &rose_neigh->callsign) == 0 &&
+		    ax25->ax25_dev->dev == rose_neigh->dev)
+			break;
+		rose_neigh = rose_neigh->next;
+	}
+
+	if (rose_neigh == NULL) {
+		printk("rose_route : unknown neighbour or device %s\n",
+		       ax2asc(&ax25->dest_addr));
+		goto out;
+	}
+
+	/*
+	 *	Obviously the link is working, halt the ftimer.
+	 */
+	rose_stop_ftimer(rose_neigh);
+
+	/*
+	 *	LCI of zero is always for us, and its always a restart
+	 * 	frame.
+	 */
+	if (lci == 0) {
+		rose_link_rx_restart(skb, rose_neigh, frametype);
+		goto out;
+	}
+
+	/*
+	 *	Find an existing socket.
+	 */
+	if ((sk = rose_find_socket(lci, rose_neigh)) != NULL) {
+		if (frametype == ROSE_CALL_REQUEST) {
+			struct rose_sock *rose = rose_sk(sk);
+
+			/* Remove an existing unused socket */
+			rose_clear_queues(sk);
+			rose->cause	 = ROSE_NETWORK_CONGESTION;
+			rose->diagnostic = 0;
+			rose->neighbour->use--;
+			rose->neighbour	 = NULL;
+			rose->lci	 = 0;
+			rose->state	 = ROSE_STATE_0;
+			sk->sk_state	 = TCP_CLOSE;
+			sk->sk_err	 = 0;
+			sk->sk_shutdown	 |= SEND_SHUTDOWN;
+			if (!sock_flag(sk, SOCK_DEAD)) {
+				sk->sk_state_change(sk);
+				sock_set_flag(sk, SOCK_DEAD);
+			}
+		}
+		else {
+			skb->h.raw = skb->data;
+			res = rose_process_rx_frame(sk, skb);
+			goto out;
+		}
+	}
+
+	/*
+	 *	Is is a Call Request and is it for us ?
+	 */
+	if (frametype == ROSE_CALL_REQUEST)
+		if ((dev = rose_dev_get(dest_addr)) != NULL) {
+			res = rose_rx_call_request(skb, dev, rose_neigh, lci);
+			dev_put(dev);
+			goto out;
+		}
+
+	if (!sysctl_rose_routing_control) {
+		rose_transmit_clear_request(rose_neigh, lci, ROSE_NOT_OBTAINABLE, 0);
+		goto out;
+	}
+
+	/*
+	 *	Route it to the next in line if we have an entry for it.
+	 */
+	rose_route = rose_route_list;
+	while (rose_route != NULL) {
+		if (rose_route->lci1 == lci &&
+		    rose_route->neigh1 == rose_neigh) {
+			if (frametype == ROSE_CALL_REQUEST) {
+				/* F6FBB - Remove an existing unused route */
+				rose_remove_route(rose_route);
+				break;
+			} else if (rose_route->neigh2 != NULL) {
+				skb->data[0] &= 0xF0;
+				skb->data[0] |= (rose_route->lci2 >> 8) & 0x0F;
+				skb->data[1]  = (rose_route->lci2 >> 0) & 0xFF;
+				rose_transmit_link(skb, rose_route->neigh2);
+				if (frametype == ROSE_CLEAR_CONFIRMATION)
+					rose_remove_route(rose_route);
+				res = 1;
+				goto out;
+			} else {
+				if (frametype == ROSE_CLEAR_CONFIRMATION)
+					rose_remove_route(rose_route);
+				goto out;
+			}
+		}
+		if (rose_route->lci2 == lci &&
+		    rose_route->neigh2 == rose_neigh) {
+			if (frametype == ROSE_CALL_REQUEST) {
+				/* F6FBB - Remove an existing unused route */
+				rose_remove_route(rose_route);
+				break;
+			} else if (rose_route->neigh1 != NULL) {
+				skb->data[0] &= 0xF0;
+				skb->data[0] |= (rose_route->lci1 >> 8) & 0x0F;
+				skb->data[1]  = (rose_route->lci1 >> 0) & 0xFF;
+				rose_transmit_link(skb, rose_route->neigh1);
+				if (frametype == ROSE_CLEAR_CONFIRMATION)
+					rose_remove_route(rose_route);
+				res = 1;
+				goto out;
+			} else {
+				if (frametype == ROSE_CLEAR_CONFIRMATION)
+					rose_remove_route(rose_route);
+				goto out;
+			}
+		}
+		rose_route = rose_route->next;
+	}
+
+	/*
+	 *	We know that:
+	 *	1. The frame isn't for us,
+	 *	2. It isn't "owned" by any existing route.
+	 */
+	if (frametype != ROSE_CALL_REQUEST)	/* XXX */
+		return 0;
+
+	len  = (((skb->data[3] >> 4) & 0x0F) + 1) / 2;
+	len += (((skb->data[3] >> 0) & 0x0F) + 1) / 2;
+
+	memset(&facilities, 0x00, sizeof(struct rose_facilities_struct));
+
+	if (!rose_parse_facilities(skb->data + len + 4, &facilities)) {
+		rose_transmit_clear_request(rose_neigh, lci, ROSE_INVALID_FACILITY, 76);
+		goto out;
+	}
+
+	/*
+	 *	Check for routing loops.
+	 */
+	rose_route = rose_route_list;
+	while (rose_route != NULL) {
+		if (rose_route->rand == facilities.rand &&
+		    rosecmp(src_addr, &rose_route->src_addr) == 0 &&
+		    ax25cmp(&facilities.dest_call, &rose_route->src_call) == 0 &&
+		    ax25cmp(&facilities.source_call, &rose_route->dest_call) == 0) {
+			rose_transmit_clear_request(rose_neigh, lci, ROSE_NOT_OBTAINABLE, 120);
+			goto out;
+		}
+		rose_route = rose_route->next;
+	}
+
+	if ((new_neigh = rose_get_neigh(dest_addr, &cause, &diagnostic)) == NULL) {
+		rose_transmit_clear_request(rose_neigh, lci, cause, diagnostic);
+		goto out;
+	}
+
+	if ((new_lci = rose_new_lci(new_neigh)) == 0) {
+		rose_transmit_clear_request(rose_neigh, lci, ROSE_NETWORK_CONGESTION, 71);
+		goto out;
+	}
+
+	if ((rose_route = kmalloc(sizeof(*rose_route), GFP_ATOMIC)) == NULL) {
+		rose_transmit_clear_request(rose_neigh, lci, ROSE_NETWORK_CONGESTION, 120);
+		goto out;
+	}
+
+	rose_route->lci1      = lci;
+	rose_route->src_addr  = *src_addr;
+	rose_route->dest_addr = *dest_addr;
+	rose_route->src_call  = facilities.dest_call;
+	rose_route->dest_call = facilities.source_call;
+	rose_route->rand      = facilities.rand;
+	rose_route->neigh1    = rose_neigh;
+	rose_route->lci2      = new_lci;
+	rose_route->neigh2    = new_neigh;
+
+	rose_route->neigh1->use++;
+	rose_route->neigh2->use++;
+
+	rose_route->next = rose_route_list;
+	rose_route_list  = rose_route;
+
+	skb->data[0] &= 0xF0;
+	skb->data[0] |= (rose_route->lci2 >> 8) & 0x0F;
+	skb->data[1]  = (rose_route->lci2 >> 0) & 0xFF;
+
+	rose_transmit_link(skb, rose_route->neigh2);
+	res = 1;
+
+out:
+	spin_unlock_bh(&rose_route_list_lock);
+	spin_unlock_bh(&rose_neigh_list_lock);
+	spin_unlock_bh(&rose_node_list_lock);
+
+	return res;
+}
+
+#ifdef CONFIG_PROC_FS
+
+static void *rose_node_start(struct seq_file *seq, loff_t *pos)
+{
+	struct rose_node *rose_node;
+	int i = 1;
+
+	spin_lock_bh(&rose_neigh_list_lock);
+	if (*pos == 0)
+		return SEQ_START_TOKEN;
+
+	for (rose_node = rose_node_list; rose_node && i < *pos; 
+	     rose_node = rose_node->next, ++i);
+
+	return (i == *pos) ? rose_node : NULL;
+}
+
+static void *rose_node_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	++*pos;
+	
+	return (v == SEQ_START_TOKEN) ? rose_node_list 
+		: ((struct rose_node *)v)->next;
+}
+
+static void rose_node_stop(struct seq_file *seq, void *v)
+{
+	spin_unlock_bh(&rose_neigh_list_lock);
+}
+
+static int rose_node_show(struct seq_file *seq, void *v)
+{
+	int i;
+
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq, "address    mask n neigh neigh neigh\n");
+	else {
+		const struct rose_node *rose_node = v;
+		/* if (rose_node->loopback) {
+			seq_printf(seq, "%-10s %04d 1 loopback\n",
+				rose2asc(&rose_node->address),
+				rose_node->mask);
+		} else { */
+			seq_printf(seq, "%-10s %04d %d",
+				rose2asc(&rose_node->address),
+				rose_node->mask,
+				rose_node->count);
+
+			for (i = 0; i < rose_node->count; i++)
+				seq_printf(seq, " %05d",
+					rose_node->neighbour[i]->number);
+
+			seq_puts(seq, "\n");
+		/* } */
+	}
+	return 0;
+}
+
+static struct seq_operations rose_node_seqops = {
+	.start = rose_node_start,
+	.next = rose_node_next,
+	.stop = rose_node_stop,
+	.show = rose_node_show,
+};
+
+static int rose_nodes_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &rose_node_seqops);
+}
+
+struct file_operations rose_nodes_fops = {
+	.owner = THIS_MODULE,
+	.open = rose_nodes_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+static void *rose_neigh_start(struct seq_file *seq, loff_t *pos)
+{
+	struct rose_neigh *rose_neigh;
+	int i = 1;
+
+	spin_lock_bh(&rose_neigh_list_lock);
+	if (*pos == 0)
+		return SEQ_START_TOKEN;
+
+	for (rose_neigh = rose_neigh_list; rose_neigh && i < *pos; 
+	     rose_neigh = rose_neigh->next, ++i);
+
+	return (i == *pos) ? rose_neigh : NULL;
+}
+
+static void *rose_neigh_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	++*pos;
+	
+	return (v == SEQ_START_TOKEN) ? rose_neigh_list 
+		: ((struct rose_neigh *)v)->next;
+}
+
+static void rose_neigh_stop(struct seq_file *seq, void *v)
+{
+	spin_unlock_bh(&rose_neigh_list_lock);
+}
+
+static int rose_neigh_show(struct seq_file *seq, void *v)
+{
+	int i;
+
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq, 
+			 "addr  callsign  dev  count use mode restart  t0  tf digipeaters\n");
+	else {
+		struct rose_neigh *rose_neigh = v;
+
+		/* if (!rose_neigh->loopback) { */
+		seq_printf(seq, "%05d %-9s %-4s   %3d %3d  %3s     %3s %3lu %3lu",
+			   rose_neigh->number,
+			   (rose_neigh->loopback) ? "RSLOOP-0" : ax2asc(&rose_neigh->callsign),
+			   rose_neigh->dev ? rose_neigh->dev->name : "???",
+			   rose_neigh->count,
+			   rose_neigh->use,
+			   (rose_neigh->dce_mode) ? "DCE" : "DTE",
+			   (rose_neigh->restarted) ? "yes" : "no",
+			   ax25_display_timer(&rose_neigh->t0timer) / HZ,
+			   ax25_display_timer(&rose_neigh->ftimer)  / HZ);
+
+		if (rose_neigh->digipeat != NULL) {
+			for (i = 0; i < rose_neigh->digipeat->ndigi; i++)
+				seq_printf(seq, " %s", ax2asc(&rose_neigh->digipeat->calls[i]));
+		}
+
+		seq_puts(seq, "\n");
+	}
+	return 0;
+}
+
+
+static struct seq_operations rose_neigh_seqops = {
+	.start = rose_neigh_start,
+	.next = rose_neigh_next,
+	.stop = rose_neigh_stop,
+	.show = rose_neigh_show,
+};
+
+static int rose_neigh_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &rose_neigh_seqops);
+}
+
+struct file_operations rose_neigh_fops = {
+	.owner = THIS_MODULE,
+	.open = rose_neigh_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+
+static void *rose_route_start(struct seq_file *seq, loff_t *pos)
+{
+	struct rose_route *rose_route;
+	int i = 1;
+
+	spin_lock_bh(&rose_route_list_lock);
+	if (*pos == 0)
+		return SEQ_START_TOKEN;
+
+	for (rose_route = rose_route_list; rose_route && i < *pos; 
+	     rose_route = rose_route->next, ++i);
+
+	return (i == *pos) ? rose_route : NULL;
+}
+
+static void *rose_route_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	++*pos;
+	
+	return (v == SEQ_START_TOKEN) ? rose_route_list 
+		: ((struct rose_route *)v)->next;
+}
+
+static void rose_route_stop(struct seq_file *seq, void *v)
+{
+	spin_unlock_bh(&rose_route_list_lock);
+}
+
+static int rose_route_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq, 
+			 "lci  address     callsign   neigh  <-> lci  address     callsign   neigh\n");
+	else {
+		struct rose_route *rose_route = v;
+
+		if (rose_route->neigh1) 
+			seq_printf(seq,
+				   "%3.3X  %-10s  %-9s  %05d      ",
+				   rose_route->lci1,
+				   rose2asc(&rose_route->src_addr),
+				   ax2asc(&rose_route->src_call),
+				   rose_route->neigh1->number);
+		else 
+			seq_puts(seq, 
+				 "000  *           *          00000      ");
+
+		if (rose_route->neigh2) 
+			seq_printf(seq,
+				   "%3.3X  %-10s  %-9s  %05d\n",
+				rose_route->lci2,
+				rose2asc(&rose_route->dest_addr),
+				ax2asc(&rose_route->dest_call),
+				rose_route->neigh2->number);
+		 else 
+			 seq_puts(seq,
+				  "000  *           *          00000\n");
+		}
+	return 0;
+}
+
+static struct seq_operations rose_route_seqops = {
+	.start = rose_route_start,
+	.next = rose_route_next,
+	.stop = rose_route_stop,
+	.show = rose_route_show,
+};
+
+static int rose_route_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &rose_route_seqops);
+}
+
+struct file_operations rose_routes_fops = {
+	.owner = THIS_MODULE,
+	.open = rose_route_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+#endif /* CONFIG_PROC_FS */
+
+/*
+ *	Release all memory associated with ROSE routing structures.
+ */
+void __exit rose_rt_free(void)
+{
+	struct rose_neigh *s, *rose_neigh = rose_neigh_list;
+	struct rose_node  *t, *rose_node  = rose_node_list;
+	struct rose_route *u, *rose_route = rose_route_list;
+
+	while (rose_neigh != NULL) {
+		s          = rose_neigh;
+		rose_neigh = rose_neigh->next;
+
+		rose_remove_neigh(s);
+	}
+
+	while (rose_node != NULL) {
+		t         = rose_node;
+		rose_node = rose_node->next;
+
+		rose_remove_node(t);
+	}
+
+	while (rose_route != NULL) {
+		u          = rose_route;
+		rose_route = rose_route->next;
+
+		rose_remove_route(u);
+	}
+}
diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c
new file mode 100644
index 00000000000..7db7e1cedc3
--- /dev/null
+++ b/net/rose/rose_subr.c
@@ -0,0 +1,519 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
+ */
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <net/ax25.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <asm/system.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <net/rose.h>
+
+static int rose_create_facilities(unsigned char *buffer, struct rose_sock *rose);
+
+/*
+ *	This routine purges all of the queues of frames.
+ */
+void rose_clear_queues(struct sock *sk)
+{
+	skb_queue_purge(&sk->sk_write_queue);
+	skb_queue_purge(&rose_sk(sk)->ack_queue);
+}
+
+/*
+ * This routine purges the input queue of those frames that have been
+ * acknowledged. This replaces the boxes labelled "V(a) <- N(r)" on the
+ * SDL diagram.
+ */
+void rose_frames_acked(struct sock *sk, unsigned short nr)
+{
+	struct sk_buff *skb;
+	struct rose_sock *rose = rose_sk(sk);
+
+	/*
+	 * Remove all the ack-ed frames from the ack queue.
+	 */
+	if (rose->va != nr) {
+		while (skb_peek(&rose->ack_queue) != NULL && rose->va != nr) {
+			skb = skb_dequeue(&rose->ack_queue);
+			kfree_skb(skb);
+			rose->va = (rose->va + 1) % ROSE_MODULUS;
+		}
+	}
+}
+
+void rose_requeue_frames(struct sock *sk)
+{
+	struct sk_buff *skb, *skb_prev = NULL;
+
+	/*
+	 * Requeue all the un-ack-ed frames on the output queue to be picked
+	 * up by rose_kick. This arrangement handles the possibility of an
+	 * empty output queue.
+	 */
+	while ((skb = skb_dequeue(&rose_sk(sk)->ack_queue)) != NULL) {
+		if (skb_prev == NULL)
+			skb_queue_head(&sk->sk_write_queue, skb);
+		else
+			skb_append(skb_prev, skb);
+		skb_prev = skb;
+	}
+}
+
+/*
+ *	Validate that the value of nr is between va and vs. Return true or
+ *	false for testing.
+ */
+int rose_validate_nr(struct sock *sk, unsigned short nr)
+{
+	struct rose_sock *rose = rose_sk(sk);
+	unsigned short vc = rose->va;
+
+	while (vc != rose->vs) {
+		if (nr == vc) return 1;
+		vc = (vc + 1) % ROSE_MODULUS;
+	}
+
+	return nr == rose->vs;
+}
+
+/*
+ *  This routine is called when the packet layer internally generates a
+ *  control frame.
+ */
+void rose_write_internal(struct sock *sk, int frametype)
+{
+	struct rose_sock *rose = rose_sk(sk);
+	struct sk_buff *skb;
+	unsigned char  *dptr;
+	unsigned char  lci1, lci2;
+	char buffer[100];
+	int len, faclen = 0;
+
+	len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN + 1;
+
+	switch (frametype) {
+	case ROSE_CALL_REQUEST:
+		len   += 1 + ROSE_ADDR_LEN + ROSE_ADDR_LEN;
+		faclen = rose_create_facilities(buffer, rose);
+		len   += faclen;
+		break;
+	case ROSE_CALL_ACCEPTED:
+	case ROSE_CLEAR_REQUEST:
+	case ROSE_RESET_REQUEST:
+		len   += 2;
+		break;
+	}
+
+	if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL)
+		return;
+
+	/*
+	 *	Space for AX.25 header and PID.
+	 */
+	skb_reserve(skb, AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + 1);
+
+	dptr = skb_put(skb, skb_tailroom(skb));
+
+	lci1 = (rose->lci >> 8) & 0x0F;
+	lci2 = (rose->lci >> 0) & 0xFF;
+
+	switch (frametype) {
+	case ROSE_CALL_REQUEST:
+		*dptr++ = ROSE_GFI | lci1;
+		*dptr++ = lci2;
+		*dptr++ = frametype;
+		*dptr++ = 0xAA;
+		memcpy(dptr, &rose->dest_addr,  ROSE_ADDR_LEN);
+		dptr   += ROSE_ADDR_LEN;
+		memcpy(dptr, &rose->source_addr, ROSE_ADDR_LEN);
+		dptr   += ROSE_ADDR_LEN;
+		memcpy(dptr, buffer, faclen);
+		dptr   += faclen;
+		break;
+
+	case ROSE_CALL_ACCEPTED:
+		*dptr++ = ROSE_GFI | lci1;
+		*dptr++ = lci2;
+		*dptr++ = frametype;
+		*dptr++ = 0x00;		/* Address length */
+		*dptr++ = 0;		/* Facilities length */
+		break;
+
+	case ROSE_CLEAR_REQUEST:
+		*dptr++ = ROSE_GFI | lci1;
+		*dptr++ = lci2;
+		*dptr++ = frametype;
+		*dptr++ = rose->cause;
+		*dptr++ = rose->diagnostic;
+		break;
+
+	case ROSE_RESET_REQUEST:
+		*dptr++ = ROSE_GFI | lci1;
+		*dptr++ = lci2;
+		*dptr++ = frametype;
+		*dptr++ = ROSE_DTE_ORIGINATED;
+		*dptr++ = 0;
+		break;
+
+	case ROSE_RR:
+	case ROSE_RNR:
+		*dptr++ = ROSE_GFI | lci1;
+		*dptr++ = lci2;
+		*dptr   = frametype;
+		*dptr++ |= (rose->vr << 5) & 0xE0;
+		break;
+
+	case ROSE_CLEAR_CONFIRMATION:
+	case ROSE_RESET_CONFIRMATION:
+		*dptr++ = ROSE_GFI | lci1;
+		*dptr++ = lci2;
+		*dptr++  = frametype;
+		break;
+
+	default:
+		printk(KERN_ERR "ROSE: rose_write_internal - invalid frametype %02X\n", frametype);
+		kfree_skb(skb);
+		return;
+	}
+
+	rose_transmit_link(skb, rose->neighbour);
+}
+
+int rose_decode(struct sk_buff *skb, int *ns, int *nr, int *q, int *d, int *m)
+{
+	unsigned char *frame;
+
+	frame = skb->data;
+
+	*ns = *nr = *q = *d = *m = 0;
+
+	switch (frame[2]) {
+	case ROSE_CALL_REQUEST:
+	case ROSE_CALL_ACCEPTED:
+	case ROSE_CLEAR_REQUEST:
+	case ROSE_CLEAR_CONFIRMATION:
+	case ROSE_RESET_REQUEST:
+	case ROSE_RESET_CONFIRMATION:
+		return frame[2];
+	default:
+		break;
+	}
+
+	if ((frame[2] & 0x1F) == ROSE_RR  ||
+	    (frame[2] & 0x1F) == ROSE_RNR) {
+		*nr = (frame[2] >> 5) & 0x07;
+		return frame[2] & 0x1F;
+	}
+
+	if ((frame[2] & 0x01) == ROSE_DATA) {
+		*q  = (frame[0] & ROSE_Q_BIT) == ROSE_Q_BIT;
+		*d  = (frame[0] & ROSE_D_BIT) == ROSE_D_BIT;
+		*m  = (frame[2] & ROSE_M_BIT) == ROSE_M_BIT;
+		*nr = (frame[2] >> 5) & 0x07;
+		*ns = (frame[2] >> 1) & 0x07;
+		return ROSE_DATA;
+	}
+
+	return ROSE_ILLEGAL;
+}
+
+static int rose_parse_national(unsigned char *p, struct rose_facilities_struct *facilities, int len)
+{
+	unsigned char *pt;
+	unsigned char l, lg, n = 0;
+	int fac_national_digis_received = 0;
+
+	do {
+		switch (*p & 0xC0) {
+		case 0x00:
+			p   += 2;
+			n   += 2;
+			len -= 2;
+			break;
+
+		case 0x40:
+			if (*p == FAC_NATIONAL_RAND)
+				facilities->rand = ((p[1] << 8) & 0xFF00) + ((p[2] << 0) & 0x00FF);
+			p   += 3;
+			n   += 3;
+			len -= 3;
+			break;
+
+		case 0x80:
+			p   += 4;
+			n   += 4;
+			len -= 4;
+			break;
+
+		case 0xC0:
+			l = p[1];
+			if (*p == FAC_NATIONAL_DEST_DIGI) {
+				if (!fac_national_digis_received) {
+					memcpy(&facilities->source_digis[0], p + 2, AX25_ADDR_LEN);
+					facilities->source_ndigis = 1;
+				}
+			}
+			else if (*p == FAC_NATIONAL_SRC_DIGI) {
+				if (!fac_national_digis_received) {
+					memcpy(&facilities->dest_digis[0], p + 2, AX25_ADDR_LEN);
+					facilities->dest_ndigis = 1;
+				}
+			}
+			else if (*p == FAC_NATIONAL_FAIL_CALL) {
+				memcpy(&facilities->fail_call, p + 2, AX25_ADDR_LEN);
+			}
+			else if (*p == FAC_NATIONAL_FAIL_ADD) {
+				memcpy(&facilities->fail_addr, p + 3, ROSE_ADDR_LEN);
+			}
+			else if (*p == FAC_NATIONAL_DIGIS) {
+				fac_national_digis_received = 1;
+				facilities->source_ndigis = 0;
+				facilities->dest_ndigis   = 0;
+				for (pt = p + 2, lg = 0 ; lg < l ; pt += AX25_ADDR_LEN, lg += AX25_ADDR_LEN) {
+					if (pt[6] & AX25_HBIT)
+						memcpy(&facilities->dest_digis[facilities->dest_ndigis++], pt, AX25_ADDR_LEN);
+					else
+						memcpy(&facilities->source_digis[facilities->source_ndigis++], pt, AX25_ADDR_LEN);
+				}
+			}
+			p   += l + 2;
+			n   += l + 2;
+			len -= l + 2;
+			break;
+		}
+	} while (*p != 0x00 && len > 0);
+
+	return n;
+}
+
+static int rose_parse_ccitt(unsigned char *p, struct rose_facilities_struct *facilities, int len)
+{
+	unsigned char l, n = 0;
+	char callsign[11];
+
+	do {
+		switch (*p & 0xC0) {
+		case 0x00:
+			p   += 2;
+			n   += 2;
+			len -= 2;
+			break;
+
+		case 0x40:
+			p   += 3;
+			n   += 3;
+			len -= 3;
+			break;
+
+		case 0x80:
+			p   += 4;
+			n   += 4;
+			len -= 4;
+			break;
+
+		case 0xC0:
+			l = p[1];
+			if (*p == FAC_CCITT_DEST_NSAP) {
+				memcpy(&facilities->source_addr, p + 7, ROSE_ADDR_LEN);
+				memcpy(callsign, p + 12,   l - 10);
+				callsign[l - 10] = '\0';
+				facilities->source_call = *asc2ax(callsign);
+			}
+			if (*p == FAC_CCITT_SRC_NSAP) {
+				memcpy(&facilities->dest_addr, p + 7, ROSE_ADDR_LEN);
+				memcpy(callsign, p + 12, l - 10);
+				callsign[l - 10] = '\0';
+				facilities->dest_call = *asc2ax(callsign);
+			}
+			p   += l + 2;
+			n   += l + 2;
+			len -= l + 2;
+			break;
+		}
+	} while (*p != 0x00 && len > 0);
+
+	return n;
+}
+
+int rose_parse_facilities(unsigned char *p,
+	struct rose_facilities_struct *facilities)
+{
+	int facilities_len, len;
+
+	facilities_len = *p++;
+
+	if (facilities_len == 0)
+		return 0;
+
+	while (facilities_len > 0) {
+		if (*p == 0x00) {
+			facilities_len--;
+			p++;
+
+			switch (*p) {
+			case FAC_NATIONAL:		/* National */
+				len = rose_parse_national(p + 1, facilities, facilities_len - 1);
+				facilities_len -= len + 1;
+				p += len + 1;
+				break;
+
+			case FAC_CCITT:		/* CCITT */
+				len = rose_parse_ccitt(p + 1, facilities, facilities_len - 1);
+				facilities_len -= len + 1;
+				p += len + 1;
+				break;
+
+			default:
+				printk(KERN_DEBUG "ROSE: rose_parse_facilities - unknown facilities family %02X\n", *p);
+				facilities_len--;
+				p++;
+				break;
+			}
+		} else
+			break;	/* Error in facilities format */
+	}
+
+	return 1;
+}
+
+static int rose_create_facilities(unsigned char *buffer, struct rose_sock *rose)
+{
+	unsigned char *p = buffer + 1;
+	char *callsign;
+	int len, nb;
+
+	/* National Facilities */
+	if (rose->rand != 0 || rose->source_ndigis == 1 || rose->dest_ndigis == 1) {
+		*p++ = 0x00;
+		*p++ = FAC_NATIONAL;
+
+		if (rose->rand != 0) {
+			*p++ = FAC_NATIONAL_RAND;
+			*p++ = (rose->rand >> 8) & 0xFF;
+			*p++ = (rose->rand >> 0) & 0xFF;
+		}
+
+		/* Sent before older facilities */
+		if ((rose->source_ndigis > 0) || (rose->dest_ndigis > 0)) {
+			int maxdigi = 0;
+			*p++ = FAC_NATIONAL_DIGIS;
+			*p++ = AX25_ADDR_LEN * (rose->source_ndigis + rose->dest_ndigis);
+			for (nb = 0 ; nb < rose->source_ndigis ; nb++) {
+				if (++maxdigi >= ROSE_MAX_DIGIS)
+					break;
+				memcpy(p, &rose->source_digis[nb], AX25_ADDR_LEN);
+				p[6] |= AX25_HBIT;
+				p += AX25_ADDR_LEN;
+			}
+			for (nb = 0 ; nb < rose->dest_ndigis ; nb++) {
+				if (++maxdigi >= ROSE_MAX_DIGIS)
+					break;
+				memcpy(p, &rose->dest_digis[nb], AX25_ADDR_LEN);
+				p[6] &= ~AX25_HBIT;
+				p += AX25_ADDR_LEN;
+			}
+		}
+
+		/* For compatibility */
+		if (rose->source_ndigis > 0) {
+			*p++ = FAC_NATIONAL_SRC_DIGI;
+			*p++ = AX25_ADDR_LEN;
+			memcpy(p, &rose->source_digis[0], AX25_ADDR_LEN);
+			p   += AX25_ADDR_LEN;
+		}
+
+		/* For compatibility */
+		if (rose->dest_ndigis > 0) {
+			*p++ = FAC_NATIONAL_DEST_DIGI;
+			*p++ = AX25_ADDR_LEN;
+			memcpy(p, &rose->dest_digis[0], AX25_ADDR_LEN);
+			p   += AX25_ADDR_LEN;
+		}
+	}
+
+	*p++ = 0x00;
+	*p++ = FAC_CCITT;
+
+	*p++ = FAC_CCITT_DEST_NSAP;
+
+	callsign = ax2asc(&rose->dest_call);
+
+	*p++ = strlen(callsign) + 10;
+	*p++ = (strlen(callsign) + 9) * 2;		/* ??? */
+
+	*p++ = 0x47; *p++ = 0x00; *p++ = 0x11;
+	*p++ = ROSE_ADDR_LEN * 2;
+	memcpy(p, &rose->dest_addr, ROSE_ADDR_LEN);
+	p   += ROSE_ADDR_LEN;
+
+	memcpy(p, callsign, strlen(callsign));
+	p   += strlen(callsign);
+
+	*p++ = FAC_CCITT_SRC_NSAP;
+
+	callsign = ax2asc(&rose->source_call);
+
+	*p++ = strlen(callsign) + 10;
+	*p++ = (strlen(callsign) + 9) * 2;		/* ??? */
+
+	*p++ = 0x47; *p++ = 0x00; *p++ = 0x11;
+	*p++ = ROSE_ADDR_LEN * 2;
+	memcpy(p, &rose->source_addr, ROSE_ADDR_LEN);
+	p   += ROSE_ADDR_LEN;
+
+	memcpy(p, callsign, strlen(callsign));
+	p   += strlen(callsign);
+
+	len       = p - buffer;
+	buffer[0] = len - 1;
+
+	return len;
+}
+
+void rose_disconnect(struct sock *sk, int reason, int cause, int diagnostic)
+{
+	struct rose_sock *rose = rose_sk(sk);
+
+	rose_stop_timer(sk);
+	rose_stop_idletimer(sk);
+
+	rose_clear_queues(sk);
+
+	rose->lci   = 0;
+	rose->state = ROSE_STATE_0;
+
+	if (cause != -1)
+		rose->cause = cause;
+
+	if (diagnostic != -1)
+		rose->diagnostic = diagnostic;
+
+	sk->sk_state     = TCP_CLOSE;
+	sk->sk_err       = reason;
+	sk->sk_shutdown |= SEND_SHUTDOWN;
+
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		sk->sk_state_change(sk);
+		sock_set_flag(sk, SOCK_DEAD);
+	}
+}
diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c
new file mode 100644
index 00000000000..84dd4403f79
--- /dev/null
+++ b/net/rose/rose_timer.c
@@ -0,0 +1,216 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
+ * Copyright (C) 2002 Ralf Baechle DO1GRB (ralf@gnu.org)
+ */
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <net/ax25.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <asm/system.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <net/rose.h>
+
+static void rose_heartbeat_expiry(unsigned long);
+static void rose_timer_expiry(unsigned long);
+static void rose_idletimer_expiry(unsigned long);
+
+void rose_start_heartbeat(struct sock *sk)
+{
+	del_timer(&sk->sk_timer);
+
+	sk->sk_timer.data     = (unsigned long)sk;
+	sk->sk_timer.function = &rose_heartbeat_expiry;
+	sk->sk_timer.expires  = jiffies + 5 * HZ;
+
+	add_timer(&sk->sk_timer);
+}
+
+void rose_start_t1timer(struct sock *sk)
+{
+	struct rose_sock *rose = rose_sk(sk);
+
+	del_timer(&rose->timer);
+
+	rose->timer.data     = (unsigned long)sk;
+	rose->timer.function = &rose_timer_expiry;
+	rose->timer.expires  = jiffies + rose->t1;
+
+	add_timer(&rose->timer);
+}
+
+void rose_start_t2timer(struct sock *sk)
+{
+	struct rose_sock *rose = rose_sk(sk);
+
+	del_timer(&rose->timer);
+
+	rose->timer.data     = (unsigned long)sk;
+	rose->timer.function = &rose_timer_expiry;
+	rose->timer.expires  = jiffies + rose->t2;
+
+	add_timer(&rose->timer);
+}
+
+void rose_start_t3timer(struct sock *sk)
+{
+	struct rose_sock *rose = rose_sk(sk);
+
+	del_timer(&rose->timer);
+
+	rose->timer.data     = (unsigned long)sk;
+	rose->timer.function = &rose_timer_expiry;
+	rose->timer.expires  = jiffies + rose->t3;
+
+	add_timer(&rose->timer);
+}
+
+void rose_start_hbtimer(struct sock *sk)
+{
+	struct rose_sock *rose = rose_sk(sk);
+
+	del_timer(&rose->timer);
+
+	rose->timer.data     = (unsigned long)sk;
+	rose->timer.function = &rose_timer_expiry;
+	rose->timer.expires  = jiffies + rose->hb;
+
+	add_timer(&rose->timer);
+}
+
+void rose_start_idletimer(struct sock *sk)
+{
+	struct rose_sock *rose = rose_sk(sk);
+
+	del_timer(&rose->idletimer);
+
+	if (rose->idle > 0) {
+		rose->idletimer.data     = (unsigned long)sk;
+		rose->idletimer.function = &rose_idletimer_expiry;
+		rose->idletimer.expires  = jiffies + rose->idle;
+
+		add_timer(&rose->idletimer);
+	}
+}
+
+void rose_stop_heartbeat(struct sock *sk)
+{
+	del_timer(&sk->sk_timer);
+}
+
+void rose_stop_timer(struct sock *sk)
+{
+	del_timer(&rose_sk(sk)->timer);
+}
+
+void rose_stop_idletimer(struct sock *sk)
+{
+	del_timer(&rose_sk(sk)->idletimer);
+}
+
+static void rose_heartbeat_expiry(unsigned long param)
+{
+	struct sock *sk = (struct sock *)param;
+	struct rose_sock *rose = rose_sk(sk);
+
+	bh_lock_sock(sk);
+	switch (rose->state) {
+	case ROSE_STATE_0:
+		/* Magic here: If we listen() and a new link dies before it
+		   is accepted() it isn't 'dead' so doesn't get removed. */
+		if (sock_flag(sk, SOCK_DESTROY) ||
+		    (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) {
+			rose_destroy_socket(sk);
+			return;
+		}
+		break;
+
+	case ROSE_STATE_3:
+		/*
+		 * Check for the state of the receive buffer.
+		 */
+		if (atomic_read(&sk->sk_rmem_alloc) < (sk->sk_rcvbuf / 2) &&
+		    (rose->condition & ROSE_COND_OWN_RX_BUSY)) {
+			rose->condition &= ~ROSE_COND_OWN_RX_BUSY;
+			rose->condition &= ~ROSE_COND_ACK_PENDING;
+			rose->vl         = rose->vr;
+			rose_write_internal(sk, ROSE_RR);
+			rose_stop_timer(sk);	/* HB */
+			break;
+		}
+		break;
+	}
+
+	rose_start_heartbeat(sk);
+	bh_unlock_sock(sk);
+}
+
+static void rose_timer_expiry(unsigned long param)
+{
+	struct sock *sk = (struct sock *)param;
+	struct rose_sock *rose = rose_sk(sk);
+
+	bh_lock_sock(sk);
+	switch (rose->state) {
+	case ROSE_STATE_1:	/* T1 */
+	case ROSE_STATE_4:	/* T2 */
+		rose_write_internal(sk, ROSE_CLEAR_REQUEST);
+		rose->state = ROSE_STATE_2;
+		rose_start_t3timer(sk);
+		break;
+
+	case ROSE_STATE_2:	/* T3 */
+		rose->neighbour->use--;
+		rose_disconnect(sk, ETIMEDOUT, -1, -1);
+		break;
+
+	case ROSE_STATE_3:	/* HB */
+		if (rose->condition & ROSE_COND_ACK_PENDING) {
+			rose->condition &= ~ROSE_COND_ACK_PENDING;
+			rose_enquiry_response(sk);
+		}
+		break;
+	}
+	bh_unlock_sock(sk);
+}
+
+static void rose_idletimer_expiry(unsigned long param)
+{
+	struct sock *sk = (struct sock *)param;
+
+	bh_lock_sock(sk);
+	rose_clear_queues(sk);
+
+	rose_write_internal(sk, ROSE_CLEAR_REQUEST);
+	rose_sk(sk)->state = ROSE_STATE_2;
+
+	rose_start_t3timer(sk);
+
+	sk->sk_state     = TCP_CLOSE;
+	sk->sk_err       = 0;
+	sk->sk_shutdown |= SEND_SHUTDOWN;
+
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		sk->sk_state_change(sk);
+		sock_set_flag(sk, SOCK_DEAD);
+	}
+	bh_unlock_sock(sk);
+}
diff --git a/net/rose/sysctl_net_rose.c b/net/rose/sysctl_net_rose.c
new file mode 100644
index 00000000000..8548c7cf564
--- /dev/null
+++ b/net/rose/sysctl_net_rose.c
@@ -0,0 +1,169 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright (C) 1996 Mike Shaver (shaver@zeroknowledge.com)
+ */
+#include <linux/mm.h>
+#include <linux/sysctl.h>
+#include <linux/init.h>
+#include <net/ax25.h>
+#include <net/rose.h>
+
+static int min_timer[]  = {1 * HZ};
+static int max_timer[]  = {300 * HZ};
+static int min_idle[]   = {0 * HZ};
+static int max_idle[]   = {65535 * HZ};
+static int min_route[1],       max_route[] = {1};
+static int min_ftimer[] = {60 * HZ};
+static int max_ftimer[] = {600 * HZ};
+static int min_maxvcs[] = {1}, max_maxvcs[] = {254};
+static int min_window[] = {1}, max_window[] = {7};
+
+static struct ctl_table_header *rose_table_header;
+
+static ctl_table rose_table[] = {
+        {
+		.ctl_name	= NET_ROSE_RESTART_REQUEST_TIMEOUT,
+		.procname	= "restart_request_timeout",
+		.data		= &sysctl_rose_restart_request_timeout,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_timer,
+		.extra2		= &max_timer
+	},
+        {
+		.ctl_name	= NET_ROSE_CALL_REQUEST_TIMEOUT,
+		.procname	= "call_request_timeout",
+		.data		= &sysctl_rose_call_request_timeout,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_timer,
+		.extra2		= &max_timer
+	},
+        {
+		.ctl_name	= NET_ROSE_RESET_REQUEST_TIMEOUT,
+		.procname	= "reset_request_timeout",
+		.data		= &sysctl_rose_reset_request_timeout,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_timer,
+		.extra2		= &max_timer
+	},
+        {
+		.ctl_name	= NET_ROSE_CLEAR_REQUEST_TIMEOUT,
+		.procname	= "clear_request_timeout",
+		.data		= &sysctl_rose_clear_request_timeout,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_timer,
+		.extra2		= &max_timer
+	},
+        {
+		.ctl_name	= NET_ROSE_NO_ACTIVITY_TIMEOUT,
+		.procname	= "no_activity_timeout",
+		.data		= &sysctl_rose_no_activity_timeout,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_idle,
+		.extra2		= &max_idle
+	},
+        {
+		.ctl_name	= NET_ROSE_ACK_HOLD_BACK_TIMEOUT,
+		.procname	= "acknowledge_hold_back_timeout",
+		.data		= &sysctl_rose_ack_hold_back_timeout,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_timer,
+		.extra2		= &max_timer
+	},
+        {
+		.ctl_name	= NET_ROSE_ROUTING_CONTROL,
+		.procname	= "routing_control",
+		.data		= &sysctl_rose_routing_control,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_route,
+		.extra2		= &max_route
+	},
+        {
+		.ctl_name	= NET_ROSE_LINK_FAIL_TIMEOUT,
+		.procname	= "link_fail_timeout",
+		.data		= &sysctl_rose_link_fail_timeout,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_ftimer,
+		.extra2		= &max_ftimer
+	},
+        {
+		.ctl_name	= NET_ROSE_MAX_VCS,
+		.procname	= "maximum_virtual_circuits",
+		.data		= &sysctl_rose_maximum_vcs,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_maxvcs,
+		.extra2		= &max_maxvcs
+	},
+        {
+		.ctl_name	= NET_ROSE_WINDOW_SIZE,
+		.procname	= "window_size",
+		.data		= &sysctl_rose_window_size,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_window,
+		.extra2		= &max_window
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table rose_dir_table[] = {
+	{
+		.ctl_name	= NET_ROSE,
+		.procname	= "rose",
+		.mode		= 0555,
+		.child		= rose_table
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table rose_root_table[] = {
+	{
+		.ctl_name	= CTL_NET,
+		.procname	= "net",
+		.mode		= 0555,
+		.child		= rose_dir_table
+	},
+	{ .ctl_name = 0 }
+};
+
+void __init rose_register_sysctl(void)
+{
+	rose_table_header = register_sysctl_table(rose_root_table, 1);
+}
+
+void rose_unregister_sysctl(void)
+{
+	unregister_sysctl_table(rose_table_header);
+}
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
new file mode 100644
index 00000000000..6efcb6f162a
--- /dev/null
+++ b/net/rxrpc/Makefile
@@ -0,0 +1,25 @@
+#
+# Makefile for Linux kernel Rx RPC
+#
+
+#CFLAGS += -finstrument-functions
+
+rxrpc-objs := \
+	call.o \
+	connection.o \
+	krxiod.o \
+	krxsecd.o \
+	krxtimod.o \
+	main.o \
+	peer.o \
+	rxrpc_syms.o \
+	transport.o
+
+ifeq ($(CONFIG_PROC_FS),y)
+rxrpc-objs += proc.o
+endif
+ifeq ($(CONFIG_SYSCTL),y)
+rxrpc-objs += sysctl.o
+endif
+
+obj-$(CONFIG_RXRPC) := rxrpc.o
diff --git a/net/rxrpc/call.c b/net/rxrpc/call.c
new file mode 100644
index 00000000000..5cfd4cadee4
--- /dev/null
+++ b/net/rxrpc/call.c
@@ -0,0 +1,2278 @@
+/* call.c: Rx call routines
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <rxrpc/rxrpc.h>
+#include <rxrpc/transport.h>
+#include <rxrpc/peer.h>
+#include <rxrpc/connection.h>
+#include <rxrpc/call.h>
+#include <rxrpc/message.h>
+#include "internal.h"
+
+__RXACCT_DECL(atomic_t rxrpc_call_count);
+__RXACCT_DECL(atomic_t rxrpc_message_count);
+
+LIST_HEAD(rxrpc_calls);
+DECLARE_RWSEM(rxrpc_calls_sem);
+
+unsigned rxrpc_call_rcv_timeout			= HZ/3;
+static unsigned rxrpc_call_acks_timeout		= HZ/3;
+static unsigned rxrpc_call_dfr_ack_timeout	= HZ/20;
+static unsigned short rxrpc_call_max_resend	= HZ/10;
+
+const char *rxrpc_call_states[] = {
+	"COMPLETE",
+	"ERROR",
+	"SRVR_RCV_OPID",
+	"SRVR_RCV_ARGS",
+	"SRVR_GOT_ARGS",
+	"SRVR_SND_REPLY",
+	"SRVR_RCV_FINAL_ACK",
+	"CLNT_SND_ARGS",
+	"CLNT_RCV_REPLY",
+	"CLNT_GOT_REPLY"
+};
+
+const char *rxrpc_call_error_states[] = {
+	"NO_ERROR",
+	"LOCAL_ABORT",
+	"PEER_ABORT",
+	"LOCAL_ERROR",
+	"REMOTE_ERROR"
+};
+
+const char *rxrpc_pkts[] = {
+	"?00",
+	"data", "ack", "busy", "abort", "ackall", "chall", "resp", "debug",
+	"?09", "?10", "?11", "?12", "?13", "?14", "?15"
+};
+
+static const char *rxrpc_acks[] = {
+	"---", "REQ", "DUP", "SEQ", "WIN", "MEM", "PNG", "PNR", "DLY", "IDL",
+	"-?-"
+};
+
+static const char _acktype[] = "NA-";
+
+static void rxrpc_call_receive_packet(struct rxrpc_call *call);
+static void rxrpc_call_receive_data_packet(struct rxrpc_call *call,
+					   struct rxrpc_message *msg);
+static void rxrpc_call_receive_ack_packet(struct rxrpc_call *call,
+					  struct rxrpc_message *msg);
+static void rxrpc_call_definitively_ACK(struct rxrpc_call *call,
+					rxrpc_seq_t higest);
+static void rxrpc_call_resend(struct rxrpc_call *call, rxrpc_seq_t highest);
+static int __rxrpc_call_read_data(struct rxrpc_call *call);
+
+static int rxrpc_call_record_ACK(struct rxrpc_call *call,
+				 struct rxrpc_message *msg,
+				 rxrpc_seq_t seq,
+				 size_t count);
+
+static int rxrpc_call_flush(struct rxrpc_call *call);
+
+#define _state(call) \
+	_debug("[[[ state %s ]]]", rxrpc_call_states[call->app_call_state]);
+
+static void rxrpc_call_default_attn_func(struct rxrpc_call *call)
+{
+	wake_up(&call->waitq);
+}
+
+static void rxrpc_call_default_error_func(struct rxrpc_call *call)
+{
+	wake_up(&call->waitq);
+}
+
+static void rxrpc_call_default_aemap_func(struct rxrpc_call *call)
+{
+	switch (call->app_err_state) {
+	case RXRPC_ESTATE_LOCAL_ABORT:
+		call->app_abort_code = -call->app_errno;
+	case RXRPC_ESTATE_PEER_ABORT:
+		call->app_errno = -ECONNABORTED;
+	default:
+		break;
+	}
+}
+
+static void __rxrpc_call_acks_timeout(unsigned long _call)
+{
+	struct rxrpc_call *call = (struct rxrpc_call *) _call;
+
+	_debug("ACKS TIMEOUT %05lu", jiffies - call->cjif);
+
+	call->flags |= RXRPC_CALL_ACKS_TIMO;
+	rxrpc_krxiod_queue_call(call);
+}
+
+static void __rxrpc_call_rcv_timeout(unsigned long _call)
+{
+	struct rxrpc_call *call = (struct rxrpc_call *) _call;
+
+	_debug("RCV TIMEOUT %05lu", jiffies - call->cjif);
+
+	call->flags |= RXRPC_CALL_RCV_TIMO;
+	rxrpc_krxiod_queue_call(call);
+}
+
+static void __rxrpc_call_ackr_timeout(unsigned long _call)
+{
+	struct rxrpc_call *call = (struct rxrpc_call *) _call;
+
+	_debug("ACKR TIMEOUT %05lu",jiffies - call->cjif);
+
+	call->flags |= RXRPC_CALL_ACKR_TIMO;
+	rxrpc_krxiod_queue_call(call);
+}
+
+/*****************************************************************************/
+/*
+ * calculate a timeout based on an RTT value
+ */
+static inline unsigned long __rxrpc_rtt_based_timeout(struct rxrpc_call *call,
+						      unsigned long val)
+{
+	unsigned long expiry = call->conn->peer->rtt / (1000000 / HZ);
+
+	expiry += 10;
+	if (expiry < HZ / 25)
+		expiry = HZ / 25;
+	if (expiry > HZ)
+		expiry = HZ;
+
+	_leave(" = %lu jiffies", expiry);
+	return jiffies + expiry;
+} /* end __rxrpc_rtt_based_timeout() */
+
+/*****************************************************************************/
+/*
+ * create a new call record
+ */
+static inline int __rxrpc_create_call(struct rxrpc_connection *conn,
+				      struct rxrpc_call **_call)
+{
+	struct rxrpc_call *call;
+
+	_enter("%p", conn);
+
+	/* allocate and initialise a call record */
+	call = (struct rxrpc_call *) get_zeroed_page(GFP_KERNEL);
+	if (!call) {
+		_leave(" ENOMEM");
+		return -ENOMEM;
+	}
+
+	atomic_set(&call->usage, 1);
+
+	init_waitqueue_head(&call->waitq);
+	spin_lock_init(&call->lock);
+	INIT_LIST_HEAD(&call->link);
+	INIT_LIST_HEAD(&call->acks_pendq);
+	INIT_LIST_HEAD(&call->rcv_receiveq);
+	INIT_LIST_HEAD(&call->rcv_krxiodq_lk);
+	INIT_LIST_HEAD(&call->app_readyq);
+	INIT_LIST_HEAD(&call->app_unreadyq);
+	INIT_LIST_HEAD(&call->app_link);
+	INIT_LIST_HEAD(&call->app_attn_link);
+
+	init_timer(&call->acks_timeout);
+	call->acks_timeout.data = (unsigned long) call;
+	call->acks_timeout.function = __rxrpc_call_acks_timeout;
+
+	init_timer(&call->rcv_timeout);
+	call->rcv_timeout.data = (unsigned long) call;
+	call->rcv_timeout.function = __rxrpc_call_rcv_timeout;
+
+	init_timer(&call->ackr_dfr_timo);
+	call->ackr_dfr_timo.data = (unsigned long) call;
+	call->ackr_dfr_timo.function = __rxrpc_call_ackr_timeout;
+
+	call->conn = conn;
+	call->ackr_win_bot = 1;
+	call->ackr_win_top = call->ackr_win_bot + RXRPC_CALL_ACK_WINDOW_SIZE - 1;
+	call->ackr_prev_seq = 0;
+	call->app_mark = RXRPC_APP_MARK_EOF;
+	call->app_attn_func = rxrpc_call_default_attn_func;
+	call->app_error_func = rxrpc_call_default_error_func;
+	call->app_aemap_func = rxrpc_call_default_aemap_func;
+	call->app_scr_alloc = call->app_scratch;
+
+	call->cjif = jiffies;
+
+	_leave(" = 0 (%p)", call);
+
+	*_call = call;
+
+	return 0;
+} /* end __rxrpc_create_call() */
+
+/*****************************************************************************/
+/*
+ * create a new call record for outgoing calls
+ */
+int rxrpc_create_call(struct rxrpc_connection *conn,
+		      rxrpc_call_attn_func_t attn,
+		      rxrpc_call_error_func_t error,
+		      rxrpc_call_aemap_func_t aemap,
+		      struct rxrpc_call **_call)
+{
+	DECLARE_WAITQUEUE(myself, current);
+
+	struct rxrpc_call *call;
+	int ret, cix, loop;
+
+	_enter("%p", conn);
+
+	/* allocate and initialise a call record */
+	ret = __rxrpc_create_call(conn, &call);
+	if (ret < 0) {
+		_leave(" = %d", ret);
+		return ret;
+	}
+
+	call->app_call_state = RXRPC_CSTATE_CLNT_SND_ARGS;
+	if (attn)
+		call->app_attn_func = attn;
+	if (error)
+		call->app_error_func = error;
+	if (aemap)
+		call->app_aemap_func = aemap;
+
+	_state(call);
+
+	spin_lock(&conn->lock);
+	set_current_state(TASK_INTERRUPTIBLE);
+	add_wait_queue(&conn->chanwait, &myself);
+
+ try_again:
+	/* try to find an unused channel */
+	for (cix = 0; cix < 4; cix++)
+		if (!conn->channels[cix])
+			goto obtained_chan;
+
+	/* no free channels - wait for one to become available */
+	ret = -EINTR;
+	if (signal_pending(current))
+		goto error_unwait;
+
+	spin_unlock(&conn->lock);
+
+	schedule();
+	set_current_state(TASK_INTERRUPTIBLE);
+
+	spin_lock(&conn->lock);
+	goto try_again;
+
+	/* got a channel - now attach to the connection */
+ obtained_chan:
+	remove_wait_queue(&conn->chanwait, &myself);
+	set_current_state(TASK_RUNNING);
+
+	/* concoct a unique call number */
+ next_callid:
+	call->call_id = htonl(++conn->call_counter);
+	for (loop = 0; loop < 4; loop++)
+		if (conn->channels[loop] &&
+		    conn->channels[loop]->call_id == call->call_id)
+			goto next_callid;
+
+	rxrpc_get_connection(conn);
+	conn->channels[cix] = call; /* assign _after_ done callid check loop */
+	do_gettimeofday(&conn->atime);
+	call->chan_ix = htonl(cix);
+
+	spin_unlock(&conn->lock);
+
+	down_write(&rxrpc_calls_sem);
+	list_add_tail(&call->call_link, &rxrpc_calls);
+	up_write(&rxrpc_calls_sem);
+
+	__RXACCT(atomic_inc(&rxrpc_call_count));
+	*_call = call;
+
+	_leave(" = 0 (call=%p cix=%u)", call, cix);
+	return 0;
+
+ error_unwait:
+	remove_wait_queue(&conn->chanwait, &myself);
+	set_current_state(TASK_RUNNING);
+	spin_unlock(&conn->lock);
+
+	free_page((unsigned long) call);
+	_leave(" = %d", ret);
+	return ret;
+} /* end rxrpc_create_call() */
+
+/*****************************************************************************/
+/*
+ * create a new call record for incoming calls
+ */
+int rxrpc_incoming_call(struct rxrpc_connection *conn,
+			struct rxrpc_message *msg,
+			struct rxrpc_call **_call)
+{
+	struct rxrpc_call *call;
+	unsigned cix;
+	int ret;
+
+	cix = ntohl(msg->hdr.cid) & RXRPC_CHANNELMASK;
+
+	_enter("%p,%u,%u", conn, ntohl(msg->hdr.callNumber), cix);
+
+	/* allocate and initialise a call record */
+	ret = __rxrpc_create_call(conn, &call);
+	if (ret < 0) {
+		_leave(" = %d", ret);
+		return ret;
+	}
+
+	call->pkt_rcv_count = 1;
+	call->app_call_state = RXRPC_CSTATE_SRVR_RCV_OPID;
+	call->app_mark = sizeof(uint32_t);
+
+	_state(call);
+
+	/* attach to the connection */
+	ret = -EBUSY;
+	call->chan_ix = htonl(cix);
+	call->call_id = msg->hdr.callNumber;
+
+	spin_lock(&conn->lock);
+
+	if (!conn->channels[cix] ||
+	    conn->channels[cix]->app_call_state == RXRPC_CSTATE_COMPLETE ||
+	    conn->channels[cix]->app_call_state == RXRPC_CSTATE_ERROR
+	    ) {
+		conn->channels[cix] = call;
+		rxrpc_get_connection(conn);
+		ret = 0;
+	}
+
+	spin_unlock(&conn->lock);
+
+	if (ret < 0) {
+		free_page((unsigned long) call);
+		call = NULL;
+	}
+
+	if (ret == 0) {
+		down_write(&rxrpc_calls_sem);
+		list_add_tail(&call->call_link, &rxrpc_calls);
+		up_write(&rxrpc_calls_sem);
+		__RXACCT(atomic_inc(&rxrpc_call_count));
+		*_call = call;
+	}
+
+	_leave(" = %d [%p]", ret, call);
+	return ret;
+} /* end rxrpc_incoming_call() */
+
+/*****************************************************************************/
+/*
+ * free a call record
+ */
+void rxrpc_put_call(struct rxrpc_call *call)
+{
+	struct rxrpc_connection *conn = call->conn;
+	struct rxrpc_message *msg;
+
+	_enter("%p{u=%d}",call,atomic_read(&call->usage));
+
+	/* sanity check */
+	if (atomic_read(&call->usage) <= 0)
+		BUG();
+
+	/* to prevent a race, the decrement and the de-list must be effectively
+	 * atomic */
+	spin_lock(&conn->lock);
+	if (likely(!atomic_dec_and_test(&call->usage))) {
+		spin_unlock(&conn->lock);
+		_leave("");
+		return;
+	}
+
+	if (conn->channels[ntohl(call->chan_ix)] == call)
+		conn->channels[ntohl(call->chan_ix)] = NULL;
+
+	spin_unlock(&conn->lock);
+
+	wake_up(&conn->chanwait);
+
+	rxrpc_put_connection(conn);
+
+	/* clear the timers and dequeue from krxiod */
+	del_timer_sync(&call->acks_timeout);
+	del_timer_sync(&call->rcv_timeout);
+	del_timer_sync(&call->ackr_dfr_timo);
+
+	rxrpc_krxiod_dequeue_call(call);
+
+	/* clean up the contents of the struct */
+	if (call->snd_nextmsg)
+		rxrpc_put_message(call->snd_nextmsg);
+
+	if (call->snd_ping)
+		rxrpc_put_message(call->snd_ping);
+
+	while (!list_empty(&call->acks_pendq)) {
+		msg = list_entry(call->acks_pendq.next,
+				 struct rxrpc_message, link);
+		list_del(&msg->link);
+		rxrpc_put_message(msg);
+	}
+
+	while (!list_empty(&call->rcv_receiveq)) {
+		msg = list_entry(call->rcv_receiveq.next,
+				 struct rxrpc_message, link);
+		list_del(&msg->link);
+		rxrpc_put_message(msg);
+	}
+
+	while (!list_empty(&call->app_readyq)) {
+		msg = list_entry(call->app_readyq.next,
+				 struct rxrpc_message, link);
+		list_del(&msg->link);
+		rxrpc_put_message(msg);
+	}
+
+	while (!list_empty(&call->app_unreadyq)) {
+		msg = list_entry(call->app_unreadyq.next,
+				 struct rxrpc_message, link);
+		list_del(&msg->link);
+		rxrpc_put_message(msg);
+	}
+
+	module_put(call->owner);
+
+	down_write(&rxrpc_calls_sem);
+	list_del(&call->call_link);
+	up_write(&rxrpc_calls_sem);
+
+	__RXACCT(atomic_dec(&rxrpc_call_count));
+	free_page((unsigned long) call);
+
+	_leave(" [destroyed]");
+} /* end rxrpc_put_call() */
+
+/*****************************************************************************/
+/*
+ * actually generate a normal ACK
+ */
+static inline int __rxrpc_call_gen_normal_ACK(struct rxrpc_call *call,
+					      rxrpc_seq_t seq)
+{
+	struct rxrpc_message *msg;
+	struct kvec diov[3];
+	__be32 aux[4];
+	int delta, ret;
+
+	/* ACKs default to DELAY */
+	if (!call->ackr.reason)
+		call->ackr.reason = RXRPC_ACK_DELAY;
+
+	_proto("Rx %05lu Sending ACK { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
+	       jiffies - call->cjif,
+	       ntohs(call->ackr.maxSkew),
+	       ntohl(call->ackr.firstPacket),
+	       ntohl(call->ackr.previousPacket),
+	       ntohl(call->ackr.serial),
+	       rxrpc_acks[call->ackr.reason],
+	       call->ackr.nAcks);
+
+	aux[0] = htonl(call->conn->peer->if_mtu);	/* interface MTU */
+	aux[1] = htonl(1444);				/* max MTU */
+	aux[2] = htonl(16);				/* rwind */
+	aux[3] = htonl(4);				/* max packets */
+
+	diov[0].iov_len  = sizeof(struct rxrpc_ackpacket);
+	diov[0].iov_base = &call->ackr;
+	diov[1].iov_len  = call->ackr_pend_cnt + 3;
+	diov[1].iov_base = call->ackr_array;
+	diov[2].iov_len  = sizeof(aux);
+	diov[2].iov_base = &aux;
+
+	/* build and send the message */
+	ret = rxrpc_conn_newmsg(call->conn,call, RXRPC_PACKET_TYPE_ACK,
+				3, diov, GFP_KERNEL, &msg);
+	if (ret < 0)
+		goto out;
+
+	msg->seq = seq;
+	msg->hdr.seq = htonl(seq);
+	msg->hdr.flags |= RXRPC_SLOW_START_OK;
+
+	ret = rxrpc_conn_sendmsg(call->conn, msg);
+	rxrpc_put_message(msg);
+	if (ret < 0)
+		goto out;
+	call->pkt_snd_count++;
+
+	/* count how many actual ACKs there were at the front */
+	for (delta = 0; delta < call->ackr_pend_cnt; delta++)
+		if (call->ackr_array[delta] != RXRPC_ACK_TYPE_ACK)
+			break;
+
+	call->ackr_pend_cnt -= delta; /* all ACK'd to this point */
+
+	/* crank the ACK window around */
+	if (delta == 0) {
+		/* un-ACK'd window */
+	}
+	else if (delta < RXRPC_CALL_ACK_WINDOW_SIZE) {
+		/* partially ACK'd window
+		 * - shuffle down to avoid losing out-of-sequence packets
+		 */
+		call->ackr_win_bot += delta;
+		call->ackr_win_top += delta;
+
+		memmove(&call->ackr_array[0],
+			&call->ackr_array[delta],
+			call->ackr_pend_cnt);
+
+		memset(&call->ackr_array[call->ackr_pend_cnt],
+		       RXRPC_ACK_TYPE_NACK,
+		       sizeof(call->ackr_array) - call->ackr_pend_cnt);
+	}
+	else {
+		/* fully ACK'd window
+		 * - just clear the whole thing
+		 */
+		memset(&call->ackr_array,
+		       RXRPC_ACK_TYPE_NACK,
+		       sizeof(call->ackr_array));
+	}
+
+	/* clear this ACK */
+	memset(&call->ackr, 0, sizeof(call->ackr));
+
+ out:
+	if (!call->app_call_state)
+		printk("___ STATE 0 ___\n");
+	return ret;
+} /* end __rxrpc_call_gen_normal_ACK() */
+
+/*****************************************************************************/
+/*
+ * note the reception of a packet in the call's ACK records and generate an
+ * appropriate ACK packet if necessary
+ * - returns 0 if packet should be processed, 1 if packet should be ignored
+ *   and -ve on an error
+ */
+static int rxrpc_call_generate_ACK(struct rxrpc_call *call,
+				   struct rxrpc_header *hdr,
+				   struct rxrpc_ackpacket *ack)
+{
+	struct rxrpc_message *msg;
+	rxrpc_seq_t seq;
+	unsigned offset;
+	int ret = 0, err;
+	u8 special_ACK, do_ACK, force;
+
+	_enter("%p,%p { seq=%d tp=%d fl=%02x }",
+	       call, hdr, ntohl(hdr->seq), hdr->type, hdr->flags);
+
+	seq = ntohl(hdr->seq);
+	offset = seq - call->ackr_win_bot;
+	do_ACK = RXRPC_ACK_DELAY;
+	special_ACK = 0;
+	force = (seq == 1);
+
+	if (call->ackr_high_seq < seq)
+		call->ackr_high_seq = seq;
+
+	/* deal with generation of obvious special ACKs first */
+	if (ack && ack->reason == RXRPC_ACK_PING) {
+		special_ACK = RXRPC_ACK_PING_RESPONSE;
+		ret = 1;
+		goto gen_ACK;
+	}
+
+	if (seq < call->ackr_win_bot) {
+		special_ACK = RXRPC_ACK_DUPLICATE;
+		ret = 1;
+		goto gen_ACK;
+	}
+
+	if (seq >= call->ackr_win_top) {
+		special_ACK = RXRPC_ACK_EXCEEDS_WINDOW;
+		ret = 1;
+		goto gen_ACK;
+	}
+
+	if (call->ackr_array[offset] != RXRPC_ACK_TYPE_NACK) {
+		special_ACK = RXRPC_ACK_DUPLICATE;
+		ret = 1;
+		goto gen_ACK;
+	}
+
+	/* okay... it's a normal data packet inside the ACK window */
+	call->ackr_array[offset] = RXRPC_ACK_TYPE_ACK;
+
+	if (offset < call->ackr_pend_cnt) {
+	}
+	else if (offset > call->ackr_pend_cnt) {
+		do_ACK = RXRPC_ACK_OUT_OF_SEQUENCE;
+		call->ackr_pend_cnt = offset;
+		goto gen_ACK;
+	}
+
+	if (hdr->flags & RXRPC_REQUEST_ACK) {
+		do_ACK = RXRPC_ACK_REQUESTED;
+	}
+
+	/* generate an ACK on the final packet of a reply just received */
+	if (hdr->flags & RXRPC_LAST_PACKET) {
+		if (call->conn->out_clientflag)
+			force = 1;
+	}
+	else if (!(hdr->flags & RXRPC_MORE_PACKETS)) {
+		do_ACK = RXRPC_ACK_REQUESTED;
+	}
+
+	/* re-ACK packets previously received out-of-order */
+	for (offset++; offset < RXRPC_CALL_ACK_WINDOW_SIZE; offset++)
+		if (call->ackr_array[offset] != RXRPC_ACK_TYPE_ACK)
+			break;
+
+	call->ackr_pend_cnt = offset;
+
+	/* generate an ACK if we fill up the window */
+	if (call->ackr_pend_cnt >= RXRPC_CALL_ACK_WINDOW_SIZE)
+		force = 1;
+
+ gen_ACK:
+	_debug("%05lu ACKs pend=%u norm=%s special=%s%s",
+	       jiffies - call->cjif,
+	       call->ackr_pend_cnt,
+	       rxrpc_acks[do_ACK],
+	       rxrpc_acks[special_ACK],
+	       force ? " immediate" :
+	       do_ACK == RXRPC_ACK_REQUESTED ? " merge-req" :
+	       hdr->flags & RXRPC_LAST_PACKET ? " finalise" :
+	       " defer"
+	       );
+
+	/* send any pending normal ACKs if need be */
+	if (call->ackr_pend_cnt > 0) {
+		/* fill out the appropriate form */
+		call->ackr.bufferSpace	= htons(RXRPC_CALL_ACK_WINDOW_SIZE);
+		call->ackr.maxSkew	= htons(min(call->ackr_high_seq - seq,
+						    65535U));
+		call->ackr.firstPacket	= htonl(call->ackr_win_bot);
+		call->ackr.previousPacket = call->ackr_prev_seq;
+		call->ackr.serial	= hdr->serial;
+		call->ackr.nAcks	= call->ackr_pend_cnt;
+
+		if (do_ACK == RXRPC_ACK_REQUESTED)
+			call->ackr.reason = do_ACK;
+
+		/* generate the ACK immediately if necessary */
+		if (special_ACK || force) {
+			err = __rxrpc_call_gen_normal_ACK(
+				call, do_ACK == RXRPC_ACK_DELAY ? 0 : seq);
+			if (err < 0) {
+				ret = err;
+				goto out;
+			}
+		}
+	}
+
+	if (call->ackr.reason == RXRPC_ACK_REQUESTED)
+		call->ackr_dfr_seq = seq;
+
+	/* start the ACK timer if not running if there are any pending deferred
+	 * ACKs */
+	if (call->ackr_pend_cnt > 0 &&
+	    call->ackr.reason != RXRPC_ACK_REQUESTED &&
+	    !timer_pending(&call->ackr_dfr_timo)
+	    ) {
+		unsigned long timo;
+
+		timo = rxrpc_call_dfr_ack_timeout + jiffies;
+
+		_debug("START ACKR TIMER for cj=%lu", timo - call->cjif);
+
+		spin_lock(&call->lock);
+		mod_timer(&call->ackr_dfr_timo, timo);
+		spin_unlock(&call->lock);
+	}
+	else if ((call->ackr_pend_cnt == 0 ||
+		  call->ackr.reason == RXRPC_ACK_REQUESTED) &&
+		 timer_pending(&call->ackr_dfr_timo)
+		 ) {
+		/* stop timer if no pending ACKs */
+		_debug("CLEAR ACKR TIMER");
+		del_timer_sync(&call->ackr_dfr_timo);
+	}
+
+	/* send a special ACK if one is required */
+	if (special_ACK) {
+		struct rxrpc_ackpacket ack;
+		struct kvec diov[2];
+		uint8_t acks[1] = { RXRPC_ACK_TYPE_ACK };
+
+		/* fill out the appropriate form */
+		ack.bufferSpace	= htons(RXRPC_CALL_ACK_WINDOW_SIZE);
+		ack.maxSkew	= htons(min(call->ackr_high_seq - seq,
+					    65535U));
+		ack.firstPacket	= htonl(call->ackr_win_bot);
+		ack.previousPacket = call->ackr_prev_seq;
+		ack.serial	= hdr->serial;
+		ack.reason	= special_ACK;
+		ack.nAcks	= 0;
+
+		_proto("Rx Sending s-ACK"
+		       " { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
+		       ntohs(ack.maxSkew),
+		       ntohl(ack.firstPacket),
+		       ntohl(ack.previousPacket),
+		       ntohl(ack.serial),
+		       rxrpc_acks[ack.reason],
+		       ack.nAcks);
+
+		diov[0].iov_len  = sizeof(struct rxrpc_ackpacket);
+		diov[0].iov_base = &ack;
+		diov[1].iov_len  = sizeof(acks);
+		diov[1].iov_base = acks;
+
+		/* build and send the message */
+		err = rxrpc_conn_newmsg(call->conn,call, RXRPC_PACKET_TYPE_ACK,
+					hdr->seq ? 2 : 1, diov,
+					GFP_KERNEL,
+					&msg);
+		if (err < 0) {
+			ret = err;
+			goto out;
+		}
+
+		msg->seq = seq;
+		msg->hdr.seq = htonl(seq);
+		msg->hdr.flags |= RXRPC_SLOW_START_OK;
+
+		err = rxrpc_conn_sendmsg(call->conn, msg);
+		rxrpc_put_message(msg);
+		if (err < 0) {
+			ret = err;
+			goto out;
+		}
+		call->pkt_snd_count++;
+	}
+
+ out:
+	if (hdr->seq)
+		call->ackr_prev_seq = hdr->seq;
+
+	_leave(" = %d", ret);
+	return ret;
+} /* end rxrpc_call_generate_ACK() */
+
+/*****************************************************************************/
+/*
+ * handle work to be done on a call
+ * - includes packet reception and timeout processing
+ */
+void rxrpc_call_do_stuff(struct rxrpc_call *call)
+{
+	_enter("%p{flags=%lx}", call, call->flags);
+
+	/* handle packet reception */
+	if (call->flags & RXRPC_CALL_RCV_PKT) {
+		_debug("- receive packet");
+		call->flags &= ~RXRPC_CALL_RCV_PKT;
+		rxrpc_call_receive_packet(call);
+	}
+
+	/* handle overdue ACKs */
+	if (call->flags & RXRPC_CALL_ACKS_TIMO) {
+		_debug("- overdue ACK timeout");
+		call->flags &= ~RXRPC_CALL_ACKS_TIMO;
+		rxrpc_call_resend(call, call->snd_seq_count);
+	}
+
+	/* handle lack of reception */
+	if (call->flags & RXRPC_CALL_RCV_TIMO) {
+		_debug("- reception timeout");
+		call->flags &= ~RXRPC_CALL_RCV_TIMO;
+		rxrpc_call_abort(call, -EIO);
+	}
+
+	/* handle deferred ACKs */
+	if (call->flags & RXRPC_CALL_ACKR_TIMO ||
+	    (call->ackr.nAcks > 0 && call->ackr.reason == RXRPC_ACK_REQUESTED)
+	    ) {
+		_debug("- deferred ACK timeout: cj=%05lu r=%s n=%u",
+		       jiffies - call->cjif,
+		       rxrpc_acks[call->ackr.reason],
+		       call->ackr.nAcks);
+
+		call->flags &= ~RXRPC_CALL_ACKR_TIMO;
+
+		if (call->ackr.nAcks > 0 &&
+		    call->app_call_state != RXRPC_CSTATE_ERROR) {
+			/* generate ACK */
+			__rxrpc_call_gen_normal_ACK(call, call->ackr_dfr_seq);
+			call->ackr_dfr_seq = 0;
+		}
+	}
+
+	_leave("");
+
+} /* end rxrpc_call_do_stuff() */
+
+/*****************************************************************************/
+/*
+ * send an abort message at call or connection level
+ * - must be called with call->lock held
+ * - the supplied error code is sent as the packet data
+ */
+static int __rxrpc_call_abort(struct rxrpc_call *call, int errno)
+{
+	struct rxrpc_connection *conn = call->conn;
+	struct rxrpc_message *msg;
+	struct kvec diov[1];
+	int ret;
+	__be32 _error;
+
+	_enter("%p{%08x},%p{%d},%d",
+	       conn, ntohl(conn->conn_id), call, ntohl(call->call_id), errno);
+
+	/* if this call is already aborted, then just wake up any waiters */
+	if (call->app_call_state == RXRPC_CSTATE_ERROR) {
+		spin_unlock(&call->lock);
+		call->app_error_func(call);
+		_leave(" = 0");
+		return 0;
+	}
+
+	rxrpc_get_call(call);
+
+	/* change the state _with_ the lock still held */
+	call->app_call_state	= RXRPC_CSTATE_ERROR;
+	call->app_err_state	= RXRPC_ESTATE_LOCAL_ABORT;
+	call->app_errno		= errno;
+	call->app_mark		= RXRPC_APP_MARK_EOF;
+	call->app_read_buf	= NULL;
+	call->app_async_read	= 0;
+
+	_state(call);
+
+	/* ask the app to translate the error code */
+	call->app_aemap_func(call);
+
+	spin_unlock(&call->lock);
+
+	/* flush any outstanding ACKs */
+	del_timer_sync(&call->acks_timeout);
+	del_timer_sync(&call->rcv_timeout);
+	del_timer_sync(&call->ackr_dfr_timo);
+
+	if (rxrpc_call_is_ack_pending(call))
+		__rxrpc_call_gen_normal_ACK(call, 0);
+
+	/* send the abort packet only if we actually traded some other
+	 * packets */
+	ret = 0;
+	if (call->pkt_snd_count || call->pkt_rcv_count) {
+		/* actually send the abort */
+		_proto("Rx Sending Call ABORT { data=%d }",
+		       call->app_abort_code);
+
+		_error = htonl(call->app_abort_code);
+
+		diov[0].iov_len  = sizeof(_error);
+		diov[0].iov_base = &_error;
+
+		ret = rxrpc_conn_newmsg(conn, call, RXRPC_PACKET_TYPE_ABORT,
+					1, diov, GFP_KERNEL, &msg);
+		if (ret == 0) {
+			ret = rxrpc_conn_sendmsg(conn, msg);
+			rxrpc_put_message(msg);
+		}
+	}
+
+	/* tell the app layer to let go */
+	call->app_error_func(call);
+
+	rxrpc_put_call(call);
+
+	_leave(" = %d", ret);
+	return ret;
+} /* end __rxrpc_call_abort() */
+
+/*****************************************************************************/
+/*
+ * send an abort message at call or connection level
+ * - the supplied error code is sent as the packet data
+ */
+int rxrpc_call_abort(struct rxrpc_call *call, int error)
+{
+	spin_lock(&call->lock);
+
+	return __rxrpc_call_abort(call, error);
+
+} /* end rxrpc_call_abort() */
+
+/*****************************************************************************/
+/*
+ * process packets waiting for this call
+ */
+static void rxrpc_call_receive_packet(struct rxrpc_call *call)
+{
+	struct rxrpc_message *msg;
+	struct list_head *_p;
+
+	_enter("%p", call);
+
+	rxrpc_get_call(call); /* must not go away too soon if aborted by
+			       * app-layer */
+
+	while (!list_empty(&call->rcv_receiveq)) {
+		/* try to get next packet */
+		_p = NULL;
+		spin_lock(&call->lock);
+		if (!list_empty(&call->rcv_receiveq)) {
+			_p = call->rcv_receiveq.next;
+			list_del_init(_p);
+		}
+		spin_unlock(&call->lock);
+
+		if (!_p)
+			break;
+
+		msg = list_entry(_p, struct rxrpc_message, link);
+
+		_proto("Rx %05lu Received %s packet (%%%u,#%u,%c%c%c%c%c)",
+		       jiffies - call->cjif,
+		       rxrpc_pkts[msg->hdr.type],
+		       ntohl(msg->hdr.serial),
+		       msg->seq,
+		       msg->hdr.flags & RXRPC_JUMBO_PACKET	? 'j' : '-',
+		       msg->hdr.flags & RXRPC_MORE_PACKETS	? 'm' : '-',
+		       msg->hdr.flags & RXRPC_LAST_PACKET	? 'l' : '-',
+		       msg->hdr.flags & RXRPC_REQUEST_ACK	? 'r' : '-',
+		       msg->hdr.flags & RXRPC_CLIENT_INITIATED	? 'C' : 'S'
+		       );
+
+		switch (msg->hdr.type) {
+			/* deal with data packets */
+		case RXRPC_PACKET_TYPE_DATA:
+			/* ACK the packet if necessary */
+			switch (rxrpc_call_generate_ACK(call, &msg->hdr,
+							NULL)) {
+			case 0: /* useful packet */
+				rxrpc_call_receive_data_packet(call, msg);
+				break;
+			case 1: /* duplicate or out-of-window packet */
+				break;
+			default:
+				rxrpc_put_message(msg);
+				goto out;
+			}
+			break;
+
+			/* deal with ACK packets */
+		case RXRPC_PACKET_TYPE_ACK:
+			rxrpc_call_receive_ack_packet(call, msg);
+			break;
+
+			/* deal with abort packets */
+		case RXRPC_PACKET_TYPE_ABORT: {
+			__be32 _dbuf, *dp;
+
+			dp = skb_header_pointer(msg->pkt, msg->offset,
+						sizeof(_dbuf), &_dbuf);
+			if (dp == NULL)
+				printk("Rx Received short ABORT packet\n");
+
+			_proto("Rx Received Call ABORT { data=%d }",
+			       (dp ? ntohl(*dp) : 0));
+
+			spin_lock(&call->lock);
+			call->app_call_state	= RXRPC_CSTATE_ERROR;
+			call->app_err_state	= RXRPC_ESTATE_PEER_ABORT;
+			call->app_abort_code	= (dp ? ntohl(*dp) : 0);
+			call->app_errno		= -ECONNABORTED;
+			call->app_mark		= RXRPC_APP_MARK_EOF;
+			call->app_read_buf	= NULL;
+			call->app_async_read	= 0;
+
+			/* ask the app to translate the error code */
+			call->app_aemap_func(call);
+			_state(call);
+			spin_unlock(&call->lock);
+			call->app_error_func(call);
+			break;
+		}
+		default:
+			/* deal with other packet types */
+			_proto("Rx Unsupported packet type %u (#%u)",
+			       msg->hdr.type, msg->seq);
+			break;
+		}
+
+		rxrpc_put_message(msg);
+	}
+
+ out:
+	rxrpc_put_call(call);
+	_leave("");
+} /* end rxrpc_call_receive_packet() */
+
+/*****************************************************************************/
+/*
+ * process next data packet
+ * - as the next data packet arrives:
+ *   - it is queued on app_readyq _if_ it is the next one expected
+ *     (app_ready_seq+1)
+ *   - it is queued on app_unreadyq _if_ it is not the next one expected
+ *   - if a packet placed on app_readyq completely fills a hole leading up to
+ *     the first packet on app_unreadyq, then packets now in sequence are
+ *     tranferred to app_readyq
+ * - the application layer can only see packets on app_readyq
+ *   (app_ready_qty bytes)
+ * - the application layer is prodded every time a new packet arrives
+ */
+static void rxrpc_call_receive_data_packet(struct rxrpc_call *call,
+					   struct rxrpc_message *msg)
+{
+	const struct rxrpc_operation *optbl, *op;
+	struct rxrpc_message *pmsg;
+	struct list_head *_p;
+	int ret, lo, hi, rmtimo;
+	__be32 opid;
+
+	_enter("%p{%u},%p{%u}", call, ntohl(call->call_id), msg, msg->seq);
+
+	rxrpc_get_message(msg);
+
+	/* add to the unready queue if we'd have to create a hole in the ready
+	 * queue otherwise */
+	if (msg->seq != call->app_ready_seq + 1) {
+		_debug("Call add packet %d to unreadyq", msg->seq);
+
+		/* insert in seq order */
+		list_for_each(_p, &call->app_unreadyq) {
+			pmsg = list_entry(_p, struct rxrpc_message, link);
+			if (pmsg->seq > msg->seq)
+				break;
+		}
+
+		list_add_tail(&msg->link, _p);
+
+		_leave(" [unreadyq]");
+		return;
+	}
+
+	/* next in sequence - simply append into the call's ready queue */
+	_debug("Call add packet %d to readyq (+%Zd => %Zd bytes)",
+	       msg->seq, msg->dsize, call->app_ready_qty);
+
+	spin_lock(&call->lock);
+	call->app_ready_seq = msg->seq;
+	call->app_ready_qty += msg->dsize;
+	list_add_tail(&msg->link, &call->app_readyq);
+
+	/* move unready packets to the readyq if we got rid of a hole */
+	while (!list_empty(&call->app_unreadyq)) {
+		pmsg = list_entry(call->app_unreadyq.next,
+				  struct rxrpc_message, link);
+
+		if (pmsg->seq != call->app_ready_seq + 1)
+			break;
+
+		/* next in sequence - just move list-to-list */
+		_debug("Call transfer packet %d to readyq (+%Zd => %Zd bytes)",
+		       pmsg->seq, pmsg->dsize, call->app_ready_qty);
+
+		call->app_ready_seq = pmsg->seq;
+		call->app_ready_qty += pmsg->dsize;
+		list_del_init(&pmsg->link);
+		list_add_tail(&pmsg->link, &call->app_readyq);
+	}
+
+	/* see if we've got the last packet yet */
+	if (!list_empty(&call->app_readyq)) {
+		pmsg = list_entry(call->app_readyq.prev,
+				  struct rxrpc_message, link);
+		if (pmsg->hdr.flags & RXRPC_LAST_PACKET) {
+			call->app_last_rcv = 1;
+			_debug("Last packet on readyq");
+		}
+	}
+
+	switch (call->app_call_state) {
+		/* do nothing if call already aborted */
+	case RXRPC_CSTATE_ERROR:
+		spin_unlock(&call->lock);
+		_leave(" [error]");
+		return;
+
+		/* extract the operation ID from an incoming call if that's not
+		 * yet been done */
+	case RXRPC_CSTATE_SRVR_RCV_OPID:
+		spin_unlock(&call->lock);
+
+		/* handle as yet insufficient data for the operation ID */
+		if (call->app_ready_qty < 4) {
+			if (call->app_last_rcv)
+				/* trouble - last packet seen */
+				rxrpc_call_abort(call, -EINVAL);
+
+			_leave("");
+			return;
+		}
+
+		/* pull the operation ID out of the buffer */
+		ret = rxrpc_call_read_data(call, &opid, sizeof(opid), 0);
+		if (ret < 0) {
+			printk("Unexpected error from read-data: %d\n", ret);
+			if (call->app_call_state != RXRPC_CSTATE_ERROR)
+				rxrpc_call_abort(call, ret);
+			_leave("");
+			return;
+		}
+		call->app_opcode = ntohl(opid);
+
+		/* locate the operation in the available ops table */
+		optbl = call->conn->service->ops_begin;
+		lo = 0;
+		hi = call->conn->service->ops_end - optbl;
+
+		while (lo < hi) {
+			int mid = (hi + lo) / 2;
+			op = &optbl[mid];
+			if (call->app_opcode == op->id)
+				goto found_op;
+			if (call->app_opcode > op->id)
+				lo = mid + 1;
+			else
+				hi = mid;
+		}
+
+		/* search failed */
+		kproto("Rx Client requested operation %d from %s service",
+		       call->app_opcode, call->conn->service->name);
+		rxrpc_call_abort(call, -EINVAL);
+		_leave(" [inval]");
+		return;
+
+	found_op:
+		_proto("Rx Client requested operation %s from %s service",
+		       op->name, call->conn->service->name);
+
+		/* we're now waiting for the argument block (unless the call
+		 * was aborted) */
+		spin_lock(&call->lock);
+		if (call->app_call_state == RXRPC_CSTATE_SRVR_RCV_OPID ||
+		    call->app_call_state == RXRPC_CSTATE_SRVR_SND_REPLY) {
+			if (!call->app_last_rcv)
+				call->app_call_state =
+					RXRPC_CSTATE_SRVR_RCV_ARGS;
+			else if (call->app_ready_qty > 0)
+				call->app_call_state =
+					RXRPC_CSTATE_SRVR_GOT_ARGS;
+			else
+				call->app_call_state =
+					RXRPC_CSTATE_SRVR_SND_REPLY;
+			call->app_mark = op->asize;
+			call->app_user = op->user;
+		}
+		spin_unlock(&call->lock);
+
+		_state(call);
+		break;
+
+	case RXRPC_CSTATE_SRVR_RCV_ARGS:
+		/* change state if just received last packet of arg block */
+		if (call->app_last_rcv)
+			call->app_call_state = RXRPC_CSTATE_SRVR_GOT_ARGS;
+		spin_unlock(&call->lock);
+
+		_state(call);
+		break;
+
+	case RXRPC_CSTATE_CLNT_RCV_REPLY:
+		/* change state if just received last packet of reply block */
+		rmtimo = 0;
+		if (call->app_last_rcv) {
+			call->app_call_state = RXRPC_CSTATE_CLNT_GOT_REPLY;
+			rmtimo = 1;
+		}
+		spin_unlock(&call->lock);
+
+		if (rmtimo) {
+			del_timer_sync(&call->acks_timeout);
+			del_timer_sync(&call->rcv_timeout);
+			del_timer_sync(&call->ackr_dfr_timo);
+		}
+
+		_state(call);
+		break;
+
+	default:
+		/* deal with data reception in an unexpected state */
+		printk("Unexpected state [[[ %u ]]]\n", call->app_call_state);
+		__rxrpc_call_abort(call, -EBADMSG);
+		_leave("");
+		return;
+	}
+
+	if (call->app_call_state == RXRPC_CSTATE_CLNT_RCV_REPLY &&
+	    call->app_last_rcv)
+		BUG();
+
+	/* otherwise just invoke the data function whenever we can satisfy its desire for more
+	 * data
+	 */
+	_proto("Rx Received Op Data: st=%u qty=%Zu mk=%Zu%s",
+	       call->app_call_state, call->app_ready_qty, call->app_mark,
+	       call->app_last_rcv ? " last-rcvd" : "");
+
+	spin_lock(&call->lock);
+
+	ret = __rxrpc_call_read_data(call);
+	switch (ret) {
+	case 0:
+		spin_unlock(&call->lock);
+		call->app_attn_func(call);
+		break;
+	case -EAGAIN:
+		spin_unlock(&call->lock);
+		break;
+	case -ECONNABORTED:
+		spin_unlock(&call->lock);
+		break;
+	default:
+		__rxrpc_call_abort(call, ret);
+		break;
+	}
+
+	_state(call);
+
+	_leave("");
+
+} /* end rxrpc_call_receive_data_packet() */
+
+/*****************************************************************************/
+/*
+ * received an ACK packet
+ */
+static void rxrpc_call_receive_ack_packet(struct rxrpc_call *call,
+					  struct rxrpc_message *msg)
+{
+	struct rxrpc_ackpacket _ack, *ap;
+	rxrpc_serial_net_t serial;
+	rxrpc_seq_t seq;
+	int ret;
+
+	_enter("%p{%u},%p{%u}", call, ntohl(call->call_id), msg, msg->seq);
+
+	/* extract the basic ACK record */
+	ap = skb_header_pointer(msg->pkt, msg->offset, sizeof(_ack), &_ack);
+	if (ap == NULL) {
+		printk("Rx Received short ACK packet\n");
+		return;
+	}
+	msg->offset += sizeof(_ack);
+
+	serial = ap->serial;
+	seq = ntohl(ap->firstPacket);
+
+	_proto("Rx Received ACK %%%d { b=%hu m=%hu f=%u p=%u s=%u r=%s n=%u }",
+	       ntohl(msg->hdr.serial),
+	       ntohs(ap->bufferSpace),
+	       ntohs(ap->maxSkew),
+	       seq,
+	       ntohl(ap->previousPacket),
+	       ntohl(serial),
+	       rxrpc_acks[ap->reason],
+	       call->ackr.nAcks
+	       );
+
+	/* check the other side isn't ACK'ing a sequence number I haven't sent
+	 * yet */
+	if (ap->nAcks > 0 &&
+	    (seq > call->snd_seq_count ||
+	     seq + ap->nAcks - 1 > call->snd_seq_count)) {
+		printk("Received ACK (#%u-#%u) for unsent packet\n",
+		       seq, seq + ap->nAcks - 1);
+		rxrpc_call_abort(call, -EINVAL);
+		_leave("");
+		return;
+	}
+
+	/* deal with RTT calculation */
+	if (serial) {
+		struct rxrpc_message *rttmsg;
+
+		/* find the prompting packet */
+		spin_lock(&call->lock);
+		if (call->snd_ping && call->snd_ping->hdr.serial == serial) {
+			/* it was a ping packet */
+			rttmsg = call->snd_ping;
+			call->snd_ping = NULL;
+			spin_unlock(&call->lock);
+
+			if (rttmsg) {
+				rttmsg->rttdone = 1;
+				rxrpc_peer_calculate_rtt(call->conn->peer,
+							 rttmsg, msg);
+				rxrpc_put_message(rttmsg);
+			}
+		}
+		else {
+			struct list_head *_p;
+
+			/* it ought to be a data packet - look in the pending
+			 * ACK list */
+			list_for_each(_p, &call->acks_pendq) {
+				rttmsg = list_entry(_p, struct rxrpc_message,
+						    link);
+				if (rttmsg->hdr.serial == serial) {
+					if (rttmsg->rttdone)
+						/* never do RTT twice without
+						 * resending */
+						break;
+
+					rttmsg->rttdone = 1;
+					rxrpc_peer_calculate_rtt(
+						call->conn->peer, rttmsg, msg);
+					break;
+				}
+			}
+			spin_unlock(&call->lock);
+		}
+	}
+
+	switch (ap->reason) {
+		/* deal with negative/positive acknowledgement of data
+		 * packets */
+	case RXRPC_ACK_REQUESTED:
+	case RXRPC_ACK_DELAY:
+	case RXRPC_ACK_IDLE:
+		rxrpc_call_definitively_ACK(call, seq - 1);
+
+	case RXRPC_ACK_DUPLICATE:
+	case RXRPC_ACK_OUT_OF_SEQUENCE:
+	case RXRPC_ACK_EXCEEDS_WINDOW:
+		call->snd_resend_cnt = 0;
+		ret = rxrpc_call_record_ACK(call, msg, seq, ap->nAcks);
+		if (ret < 0)
+			rxrpc_call_abort(call, ret);
+		break;
+
+		/* respond to ping packets immediately */
+	case RXRPC_ACK_PING:
+		rxrpc_call_generate_ACK(call, &msg->hdr, ap);
+		break;
+
+		/* only record RTT on ping response packets */
+	case RXRPC_ACK_PING_RESPONSE:
+		if (call->snd_ping) {
+			struct rxrpc_message *rttmsg;
+
+			/* only do RTT stuff if the response matches the
+			 * retained ping */
+			rttmsg = NULL;
+			spin_lock(&call->lock);
+			if (call->snd_ping &&
+			    call->snd_ping->hdr.serial == ap->serial) {
+				rttmsg = call->snd_ping;
+				call->snd_ping = NULL;
+			}
+			spin_unlock(&call->lock);
+
+			if (rttmsg) {
+				rttmsg->rttdone = 1;
+				rxrpc_peer_calculate_rtt(call->conn->peer,
+							 rttmsg, msg);
+				rxrpc_put_message(rttmsg);
+			}
+		}
+		break;
+
+	default:
+		printk("Unsupported ACK reason %u\n", ap->reason);
+		break;
+	}
+
+	_leave("");
+} /* end rxrpc_call_receive_ack_packet() */
+
+/*****************************************************************************/
+/*
+ * record definitive ACKs for all messages up to and including the one with the
+ * 'highest' seq
+ */
+static void rxrpc_call_definitively_ACK(struct rxrpc_call *call,
+					rxrpc_seq_t highest)
+{
+	struct rxrpc_message *msg;
+	int now_complete;
+
+	_enter("%p{ads=%u},%u", call, call->acks_dftv_seq, highest);
+
+	while (call->acks_dftv_seq < highest) {
+		call->acks_dftv_seq++;
+
+		_proto("Definitive ACK on packet #%u", call->acks_dftv_seq);
+
+		/* discard those at front of queue until message with highest
+		 * ACK is found */
+		spin_lock(&call->lock);
+		msg = NULL;
+		if (!list_empty(&call->acks_pendq)) {
+			msg = list_entry(call->acks_pendq.next,
+					 struct rxrpc_message, link);
+			list_del_init(&msg->link); /* dequeue */
+			if (msg->state == RXRPC_MSG_SENT)
+				call->acks_pend_cnt--;
+		}
+		spin_unlock(&call->lock);
+
+		/* insanity check */
+		if (!msg)
+			panic("%s(): acks_pendq unexpectedly empty\n",
+			      __FUNCTION__);
+
+		if (msg->seq != call->acks_dftv_seq)
+			panic("%s(): Packet #%u expected at front of acks_pendq"
+			      " (#%u found)\n",
+			      __FUNCTION__, call->acks_dftv_seq, msg->seq);
+
+		/* discard the message */
+		msg->state = RXRPC_MSG_DONE;
+		rxrpc_put_message(msg);
+	}
+
+	/* if all sent packets are definitively ACK'd then prod any sleepers just in case */
+	now_complete = 0;
+	spin_lock(&call->lock);
+	if (call->acks_dftv_seq == call->snd_seq_count) {
+		if (call->app_call_state != RXRPC_CSTATE_COMPLETE) {
+			call->app_call_state = RXRPC_CSTATE_COMPLETE;
+			_state(call);
+			now_complete = 1;
+		}
+	}
+	spin_unlock(&call->lock);
+
+	if (now_complete) {
+		del_timer_sync(&call->acks_timeout);
+		del_timer_sync(&call->rcv_timeout);
+		del_timer_sync(&call->ackr_dfr_timo);
+		call->app_attn_func(call);
+	}
+
+	_leave("");
+} /* end rxrpc_call_definitively_ACK() */
+
+/*****************************************************************************/
+/*
+ * record the specified amount of ACKs/NAKs
+ */
+static int rxrpc_call_record_ACK(struct rxrpc_call *call,
+				 struct rxrpc_message *msg,
+				 rxrpc_seq_t seq,
+				 size_t count)
+{
+	struct rxrpc_message *dmsg;
+	struct list_head *_p;
+	rxrpc_seq_t highest;
+	unsigned ix;
+	size_t chunk;
+	char resend, now_complete;
+	u8 acks[16];
+
+	_enter("%p{apc=%u ads=%u},%p,%u,%Zu",
+	       call, call->acks_pend_cnt, call->acks_dftv_seq,
+	       msg, seq, count);
+
+	/* handle re-ACK'ing of definitively ACK'd packets (may be out-of-order
+	 * ACKs) */
+	if (seq <= call->acks_dftv_seq) {
+		unsigned delta = call->acks_dftv_seq - seq;
+
+		if (count <= delta) {
+			_leave(" = 0 [all definitively ACK'd]");
+			return 0;
+		}
+
+		seq += delta;
+		count -= delta;
+		msg->offset += delta;
+	}
+
+	highest = seq + count - 1;
+	resend = 0;
+	while (count > 0) {
+		/* extract up to 16 ACK slots at a time */
+		chunk = min(count, sizeof(acks));
+		count -= chunk;
+
+		memset(acks, 2, sizeof(acks));
+
+		if (skb_copy_bits(msg->pkt, msg->offset, &acks, chunk) < 0) {
+			printk("Rx Received short ACK packet\n");
+			_leave(" = -EINVAL");
+			return -EINVAL;
+		}
+		msg->offset += chunk;
+
+		/* check that the ACK set is valid */
+		for (ix = 0; ix < chunk; ix++) {
+			switch (acks[ix]) {
+			case RXRPC_ACK_TYPE_ACK:
+				break;
+			case RXRPC_ACK_TYPE_NACK:
+				resend = 1;
+				break;
+			default:
+				printk("Rx Received unsupported ACK state"
+				       " %u\n", acks[ix]);
+				_leave(" = -EINVAL");
+				return -EINVAL;
+			}
+		}
+
+		_proto("Rx ACK of packets #%u-#%u "
+		       "[%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c] (pend=%u)",
+		       seq, (unsigned) (seq + chunk - 1),
+		       _acktype[acks[0x0]],
+		       _acktype[acks[0x1]],
+		       _acktype[acks[0x2]],
+		       _acktype[acks[0x3]],
+		       _acktype[acks[0x4]],
+		       _acktype[acks[0x5]],
+		       _acktype[acks[0x6]],
+		       _acktype[acks[0x7]],
+		       _acktype[acks[0x8]],
+		       _acktype[acks[0x9]],
+		       _acktype[acks[0xA]],
+		       _acktype[acks[0xB]],
+		       _acktype[acks[0xC]],
+		       _acktype[acks[0xD]],
+		       _acktype[acks[0xE]],
+		       _acktype[acks[0xF]],
+		       call->acks_pend_cnt
+		       );
+
+		/* mark the packets in the ACK queue as being provisionally
+		 * ACK'd */
+		ix = 0;
+		spin_lock(&call->lock);
+
+		/* find the first packet ACK'd/NAK'd here */
+		list_for_each(_p, &call->acks_pendq) {
+			dmsg = list_entry(_p, struct rxrpc_message, link);
+			if (dmsg->seq == seq)
+				goto found_first;
+			_debug("- %u: skipping #%u", ix, dmsg->seq);
+		}
+		goto bad_queue;
+
+	found_first:
+		do {
+			_debug("- %u: processing #%u (%c) apc=%u",
+			       ix, dmsg->seq, _acktype[acks[ix]],
+			       call->acks_pend_cnt);
+
+			if (acks[ix] == RXRPC_ACK_TYPE_ACK) {
+				if (dmsg->state == RXRPC_MSG_SENT)
+					call->acks_pend_cnt--;
+				dmsg->state = RXRPC_MSG_ACKED;
+			}
+			else {
+				if (dmsg->state == RXRPC_MSG_ACKED)
+					call->acks_pend_cnt++;
+				dmsg->state = RXRPC_MSG_SENT;
+			}
+			ix++;
+			seq++;
+
+			_p = dmsg->link.next;
+			dmsg = list_entry(_p, struct rxrpc_message, link);
+		} while(ix < chunk &&
+			_p != &call->acks_pendq &&
+			dmsg->seq == seq);
+
+		if (ix < chunk)
+			goto bad_queue;
+
+		spin_unlock(&call->lock);
+	}
+
+	if (resend)
+		rxrpc_call_resend(call, highest);
+
+	/* if all packets are provisionally ACK'd, then wake up anyone who's
+	 * waiting for that */
+	now_complete = 0;
+	spin_lock(&call->lock);
+	if (call->acks_pend_cnt == 0) {
+		if (call->app_call_state == RXRPC_CSTATE_SRVR_RCV_FINAL_ACK) {
+			call->app_call_state = RXRPC_CSTATE_COMPLETE;
+			_state(call);
+		}
+		now_complete = 1;
+	}
+	spin_unlock(&call->lock);
+
+	if (now_complete) {
+		_debug("- wake up waiters");
+		del_timer_sync(&call->acks_timeout);
+		del_timer_sync(&call->rcv_timeout);
+		del_timer_sync(&call->ackr_dfr_timo);
+		call->app_attn_func(call);
+	}
+
+	_leave(" = 0 (apc=%u)", call->acks_pend_cnt);
+	return 0;
+
+ bad_queue:
+	panic("%s(): acks_pendq in bad state (packet #%u absent)\n",
+	      __FUNCTION__, seq);
+
+} /* end rxrpc_call_record_ACK() */
+
+/*****************************************************************************/
+/*
+ * transfer data from the ready packet queue to the asynchronous read buffer
+ * - since this func is the only one going to look at packets queued on
+ *   app_readyq, we don't need a lock to modify or access them, only to modify
+ *   the queue pointers
+ * - called with call->lock held
+ * - the buffer must be in kernel space
+ * - returns:
+ *	0 if buffer filled
+ *	-EAGAIN if buffer not filled and more data to come
+ *	-EBADMSG if last packet received and insufficient data left
+ *	-ECONNABORTED if the call has in an error state
+ */
+static int __rxrpc_call_read_data(struct rxrpc_call *call)
+{
+	struct rxrpc_message *msg;
+	size_t qty;
+	int ret;
+
+	_enter("%p{as=%d buf=%p qty=%Zu/%Zu}",
+	       call,
+	       call->app_async_read, call->app_read_buf,
+	       call->app_ready_qty, call->app_mark);
+
+	/* check the state */
+	switch (call->app_call_state) {
+	case RXRPC_CSTATE_SRVR_RCV_ARGS:
+	case RXRPC_CSTATE_CLNT_RCV_REPLY:
+		if (call->app_last_rcv) {
+			printk("%s(%p,%p,%Zd):"
+			       " Inconsistent call state (%s, last pkt)",
+			       __FUNCTION__,
+			       call, call->app_read_buf, call->app_mark,
+			       rxrpc_call_states[call->app_call_state]);
+			BUG();
+		}
+		break;
+
+	case RXRPC_CSTATE_SRVR_RCV_OPID:
+	case RXRPC_CSTATE_SRVR_GOT_ARGS:
+	case RXRPC_CSTATE_CLNT_GOT_REPLY:
+		break;
+
+	case RXRPC_CSTATE_SRVR_SND_REPLY:
+		if (!call->app_last_rcv) {
+			printk("%s(%p,%p,%Zd):"
+			       " Inconsistent call state (%s, not last pkt)",
+			       __FUNCTION__,
+			       call, call->app_read_buf, call->app_mark,
+			       rxrpc_call_states[call->app_call_state]);
+			BUG();
+		}
+		_debug("Trying to read data from call in SND_REPLY state");
+		break;
+
+	case RXRPC_CSTATE_ERROR:
+		_leave(" = -ECONNABORTED");
+		return -ECONNABORTED;
+
+	default:
+		printk("reading in unexpected state [[[ %u ]]]\n",
+		       call->app_call_state);
+		BUG();
+	}
+
+	/* handle the case of not having an async buffer */
+	if (!call->app_async_read) {
+		if (call->app_mark == RXRPC_APP_MARK_EOF) {
+			ret = call->app_last_rcv ? 0 : -EAGAIN;
+		}
+		else {
+			if (call->app_mark >= call->app_ready_qty) {
+				call->app_mark = RXRPC_APP_MARK_EOF;
+				ret = 0;
+			}
+			else {
+				ret = call->app_last_rcv ? -EBADMSG : -EAGAIN;
+			}
+		}
+
+		_leave(" = %d [no buf]", ret);
+		return 0;
+	}
+
+	while (!list_empty(&call->app_readyq) && call->app_mark > 0) {
+		msg = list_entry(call->app_readyq.next,
+				 struct rxrpc_message, link);
+
+		/* drag as much data as we need out of this packet */
+		qty = min(call->app_mark, msg->dsize);
+
+		_debug("reading %Zu from skb=%p off=%lu",
+		       qty, msg->pkt, msg->offset);
+
+		if (call->app_read_buf)
+			if (skb_copy_bits(msg->pkt, msg->offset,
+					  call->app_read_buf, qty) < 0)
+				panic("%s: Failed to copy data from packet:"
+				      " (%p,%p,%Zd)",
+				      __FUNCTION__,
+				      call, call->app_read_buf, qty);
+
+		/* if that packet is now empty, discard it */
+		call->app_ready_qty -= qty;
+		msg->dsize -= qty;
+
+		if (msg->dsize == 0) {
+			list_del_init(&msg->link);
+			rxrpc_put_message(msg);
+		}
+		else {
+			msg->offset += qty;
+		}
+
+		call->app_mark -= qty;
+		if (call->app_read_buf)
+			call->app_read_buf += qty;
+	}
+
+	if (call->app_mark == 0) {
+		call->app_async_read = 0;
+		call->app_mark = RXRPC_APP_MARK_EOF;
+		call->app_read_buf = NULL;
+
+		/* adjust the state if used up all packets */
+		if (list_empty(&call->app_readyq) && call->app_last_rcv) {
+			switch (call->app_call_state) {
+			case RXRPC_CSTATE_SRVR_RCV_OPID:
+				call->app_call_state = RXRPC_CSTATE_SRVR_SND_REPLY;
+				call->app_mark = RXRPC_APP_MARK_EOF;
+				_state(call);
+				del_timer_sync(&call->rcv_timeout);
+				break;
+			case RXRPC_CSTATE_SRVR_GOT_ARGS:
+				call->app_call_state = RXRPC_CSTATE_SRVR_SND_REPLY;
+				_state(call);
+				del_timer_sync(&call->rcv_timeout);
+				break;
+			default:
+				call->app_call_state = RXRPC_CSTATE_COMPLETE;
+				_state(call);
+				del_timer_sync(&call->acks_timeout);
+				del_timer_sync(&call->ackr_dfr_timo);
+				del_timer_sync(&call->rcv_timeout);
+				break;
+			}
+		}
+
+		_leave(" = 0");
+		return 0;
+	}
+
+	if (call->app_last_rcv) {
+		_debug("Insufficient data (%Zu/%Zu)",
+		       call->app_ready_qty, call->app_mark);
+		call->app_async_read = 0;
+		call->app_mark = RXRPC_APP_MARK_EOF;
+		call->app_read_buf = NULL;
+
+		_leave(" = -EBADMSG");
+		return -EBADMSG;
+	}
+
+	_leave(" = -EAGAIN");
+	return -EAGAIN;
+} /* end __rxrpc_call_read_data() */
+
+/*****************************************************************************/
+/*
+ * attempt to read the specified amount of data from the call's ready queue
+ * into the buffer provided
+ * - since this func is the only one going to look at packets queued on
+ *   app_readyq, we don't need a lock to modify or access them, only to modify
+ *   the queue pointers
+ * - if the buffer pointer is NULL, then data is merely drained, not copied
+ * - if flags&RXRPC_CALL_READ_BLOCK, then the function will wait until there is
+ *   enough data or an error will be generated
+ *   - note that the caller must have added the calling task to the call's wait
+ *     queue beforehand
+ * - if flags&RXRPC_CALL_READ_ALL, then an error will be generated if this
+ *   function doesn't read all available data
+ */
+int rxrpc_call_read_data(struct rxrpc_call *call,
+			 void *buffer, size_t size, int flags)
+{
+	int ret;
+
+	_enter("%p{arq=%Zu},%p,%Zd,%x",
+	       call, call->app_ready_qty, buffer, size, flags);
+
+	spin_lock(&call->lock);
+
+	if (unlikely(!!call->app_read_buf)) {
+		spin_unlock(&call->lock);
+		_leave(" = -EBUSY");
+		return -EBUSY;
+	}
+
+	call->app_mark = size;
+	call->app_read_buf = buffer;
+	call->app_async_read = 1;
+	call->app_read_count++;
+
+	/* read as much data as possible */
+	ret = __rxrpc_call_read_data(call);
+	switch (ret) {
+	case 0:
+		if (flags & RXRPC_CALL_READ_ALL &&
+		    (!call->app_last_rcv || call->app_ready_qty > 0)) {
+			_leave(" = -EBADMSG");
+			__rxrpc_call_abort(call, -EBADMSG);
+			return -EBADMSG;
+		}
+
+		spin_unlock(&call->lock);
+		call->app_attn_func(call);
+		_leave(" = 0");
+		return ret;
+
+	case -ECONNABORTED:
+		spin_unlock(&call->lock);
+		_leave(" = %d [aborted]", ret);
+		return ret;
+
+	default:
+		__rxrpc_call_abort(call, ret);
+		_leave(" = %d", ret);
+		return ret;
+
+	case -EAGAIN:
+		spin_unlock(&call->lock);
+
+		if (!(flags & RXRPC_CALL_READ_BLOCK)) {
+			_leave(" = -EAGAIN");
+			return -EAGAIN;
+		}
+
+		/* wait for the data to arrive */
+		_debug("blocking for data arrival");
+
+		for (;;) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			if (!call->app_async_read || signal_pending(current))
+				break;
+			schedule();
+		}
+		set_current_state(TASK_RUNNING);
+
+		if (signal_pending(current)) {
+			_leave(" = -EINTR");
+			return -EINTR;
+		}
+
+		if (call->app_call_state == RXRPC_CSTATE_ERROR) {
+			_leave(" = -ECONNABORTED");
+			return -ECONNABORTED;
+		}
+
+		_leave(" = 0");
+		return 0;
+	}
+
+} /* end rxrpc_call_read_data() */
+
+/*****************************************************************************/
+/*
+ * write data to a call
+ * - the data may not be sent immediately if it doesn't fill a buffer
+ * - if we can't queue all the data for buffering now, siov[] will have been
+ *   adjusted to take account of what has been sent
+ */
+int rxrpc_call_write_data(struct rxrpc_call *call,
+			  size_t sioc,
+			  struct kvec *siov,
+			  u8 rxhdr_flags,
+			  int alloc_flags,
+			  int dup_data,
+			  size_t *size_sent)
+{
+	struct rxrpc_message *msg;
+	struct kvec *sptr;
+	size_t space, size, chunk, tmp;
+	char *buf;
+	int ret;
+
+	_enter("%p,%Zu,%p,%02x,%x,%d,%p",
+	       call, sioc, siov, rxhdr_flags, alloc_flags, dup_data,
+	       size_sent);
+
+	*size_sent = 0;
+	size = 0;
+	ret = -EINVAL;
+
+	/* can't send more if we've sent last packet from this end */
+	switch (call->app_call_state) {
+	case RXRPC_CSTATE_SRVR_SND_REPLY:
+	case RXRPC_CSTATE_CLNT_SND_ARGS:
+		break;
+	case RXRPC_CSTATE_ERROR:
+		ret = call->app_errno;
+	default:
+		goto out;
+	}
+
+	/* calculate how much data we've been given */
+	sptr = siov;
+	for (; sioc > 0; sptr++, sioc--) {
+		if (!sptr->iov_len)
+			continue;
+
+		if (!sptr->iov_base)
+			goto out;
+
+		size += sptr->iov_len;
+	}
+
+	_debug("- size=%Zu mtu=%Zu", size, call->conn->mtu_size);
+
+	do {
+		/* make sure there's a message under construction */
+		if (!call->snd_nextmsg) {
+			/* no - allocate a message with no data yet attached */
+			ret = rxrpc_conn_newmsg(call->conn, call,
+						RXRPC_PACKET_TYPE_DATA,
+						0, NULL, alloc_flags,
+						&call->snd_nextmsg);
+			if (ret < 0)
+				goto out;
+			_debug("- allocated new message [ds=%Zu]",
+			       call->snd_nextmsg->dsize);
+		}
+
+		msg = call->snd_nextmsg;
+		msg->hdr.flags |= rxhdr_flags;
+
+		/* deal with zero-length terminal packet */
+		if (size == 0) {
+			if (rxhdr_flags & RXRPC_LAST_PACKET) {
+				ret = rxrpc_call_flush(call);
+				if (ret < 0)
+					goto out;
+			}
+			break;
+		}
+
+		/* work out how much space current packet has available */
+		space = call->conn->mtu_size - msg->dsize;
+		chunk = min(space, size);
+
+		_debug("- [before] space=%Zu chunk=%Zu", space, chunk);
+
+		while (!siov->iov_len)
+			siov++;
+
+		/* if we are going to have to duplicate the data then coalesce
+		 * it too */
+		if (dup_data) {
+			/* don't allocate more that 1 page at a time */
+			if (chunk > PAGE_SIZE)
+				chunk = PAGE_SIZE;
+
+			/* allocate a data buffer and attach to the message */
+			buf = kmalloc(chunk, alloc_flags);
+			if (unlikely(!buf)) {
+				if (msg->dsize ==
+				    sizeof(struct rxrpc_header)) {
+					/* discard an empty msg and wind back
+					 * the seq counter */
+					rxrpc_put_message(msg);
+					call->snd_nextmsg = NULL;
+					call->snd_seq_count--;
+				}
+
+				ret = -ENOMEM;
+				goto out;
+			}
+
+			tmp = msg->dcount++;
+			set_bit(tmp, &msg->dfree);
+			msg->data[tmp].iov_base = buf;
+			msg->data[tmp].iov_len = chunk;
+			msg->dsize += chunk;
+			*size_sent += chunk;
+			size -= chunk;
+
+			/* load the buffer with data */
+			while (chunk > 0) {
+				tmp = min(chunk, siov->iov_len);
+				memcpy(buf, siov->iov_base, tmp);
+				buf += tmp;
+				siov->iov_base += tmp;
+				siov->iov_len -= tmp;
+				if (!siov->iov_len)
+					siov++;
+				chunk -= tmp;
+			}
+		}
+		else {
+			/* we want to attach the supplied buffers directly */
+			while (chunk > 0 &&
+			       msg->dcount < RXRPC_MSG_MAX_IOCS) {
+				tmp = msg->dcount++;
+				msg->data[tmp].iov_base = siov->iov_base;
+				msg->data[tmp].iov_len = siov->iov_len;
+				msg->dsize += siov->iov_len;
+				*size_sent += siov->iov_len;
+				size -= siov->iov_len;
+				chunk -= siov->iov_len;
+				siov++;
+			}
+		}
+
+		_debug("- [loaded] chunk=%Zu size=%Zu", chunk, size);
+
+		/* dispatch the message when full, final or requesting ACK */
+		if (msg->dsize >= call->conn->mtu_size || rxhdr_flags) {
+			ret = rxrpc_call_flush(call);
+			if (ret < 0)
+				goto out;
+		}
+
+	} while(size > 0);
+
+	ret = 0;
+ out:
+	_leave(" = %d (%Zd queued, %Zd rem)", ret, *size_sent, size);
+	return ret;
+
+} /* end rxrpc_call_write_data() */
+
+/*****************************************************************************/
+/*
+ * flush outstanding packets to the network
+ */
+static int rxrpc_call_flush(struct rxrpc_call *call)
+{
+	struct rxrpc_message *msg;
+	int ret = 0;
+
+	_enter("%p", call);
+
+	rxrpc_get_call(call);
+
+	/* if there's a packet under construction, then dispatch it now */
+	if (call->snd_nextmsg) {
+		msg = call->snd_nextmsg;
+		call->snd_nextmsg = NULL;
+
+		if (msg->hdr.flags & RXRPC_LAST_PACKET) {
+			msg->hdr.flags &= ~RXRPC_MORE_PACKETS;
+			if (call->app_call_state != RXRPC_CSTATE_CLNT_SND_ARGS)
+				msg->hdr.flags |= RXRPC_REQUEST_ACK;
+		}
+		else {
+			msg->hdr.flags |= RXRPC_MORE_PACKETS;
+		}
+
+		_proto("Sending DATA message { ds=%Zu dc=%u df=%02lu }",
+		       msg->dsize, msg->dcount, msg->dfree);
+
+		/* queue and adjust call state */
+		spin_lock(&call->lock);
+		list_add_tail(&msg->link, &call->acks_pendq);
+
+		/* decide what to do depending on current state and if this is
+		 * the last packet */
+		ret = -EINVAL;
+		switch (call->app_call_state) {
+		case RXRPC_CSTATE_SRVR_SND_REPLY:
+			if (msg->hdr.flags & RXRPC_LAST_PACKET) {
+				call->app_call_state =
+					RXRPC_CSTATE_SRVR_RCV_FINAL_ACK;
+				_state(call);
+			}
+			break;
+
+		case RXRPC_CSTATE_CLNT_SND_ARGS:
+			if (msg->hdr.flags & RXRPC_LAST_PACKET) {
+				call->app_call_state =
+					RXRPC_CSTATE_CLNT_RCV_REPLY;
+				_state(call);
+			}
+			break;
+
+		case RXRPC_CSTATE_ERROR:
+			ret = call->app_errno;
+		default:
+			spin_unlock(&call->lock);
+			goto out;
+		}
+
+		call->acks_pend_cnt++;
+
+		mod_timer(&call->acks_timeout,
+			  __rxrpc_rtt_based_timeout(call,
+						    rxrpc_call_acks_timeout));
+
+		spin_unlock(&call->lock);
+
+		ret = rxrpc_conn_sendmsg(call->conn, msg);
+		if (ret == 0)
+			call->pkt_snd_count++;
+	}
+
+ out:
+	rxrpc_put_call(call);
+
+	_leave(" = %d", ret);
+	return ret;
+
+} /* end rxrpc_call_flush() */
+
+/*****************************************************************************/
+/*
+ * resend NAK'd or unacknowledged packets up to the highest one specified
+ */
+static void rxrpc_call_resend(struct rxrpc_call *call, rxrpc_seq_t highest)
+{
+	struct rxrpc_message *msg;
+	struct list_head *_p;
+	rxrpc_seq_t seq = 0;
+
+	_enter("%p,%u", call, highest);
+
+	_proto("Rx Resend required");
+
+	/* handle too many resends */
+	if (call->snd_resend_cnt >= rxrpc_call_max_resend) {
+		_debug("Aborting due to too many resends (rcv=%d)",
+		       call->pkt_rcv_count);
+		rxrpc_call_abort(call,
+				 call->pkt_rcv_count > 0 ? -EIO : -ETIMEDOUT);
+		_leave("");
+		return;
+	}
+
+	spin_lock(&call->lock);
+	call->snd_resend_cnt++;
+	for (;;) {
+		/* determine which the next packet we might need to ACK is */
+		if (seq <= call->acks_dftv_seq)
+			seq = call->acks_dftv_seq;
+		seq++;
+
+		if (seq > highest)
+			break;
+
+		/* look for the packet in the pending-ACK queue */
+		list_for_each(_p, &call->acks_pendq) {
+			msg = list_entry(_p, struct rxrpc_message, link);
+			if (msg->seq == seq)
+				goto found_msg;
+		}
+
+		panic("%s(%p,%d):"
+		      " Inconsistent pending-ACK queue (ds=%u sc=%u sq=%u)\n",
+		      __FUNCTION__, call, highest,
+		      call->acks_dftv_seq, call->snd_seq_count, seq);
+
+	found_msg:
+		if (msg->state != RXRPC_MSG_SENT)
+			continue; /* only un-ACK'd packets */
+
+		rxrpc_get_message(msg);
+		spin_unlock(&call->lock);
+
+		/* send each message again (and ignore any errors we might
+		 * incur) */
+		_proto("Resending DATA message { ds=%Zu dc=%u df=%02lu }",
+		       msg->dsize, msg->dcount, msg->dfree);
+
+		if (rxrpc_conn_sendmsg(call->conn, msg) == 0)
+			call->pkt_snd_count++;
+
+		rxrpc_put_message(msg);
+
+		spin_lock(&call->lock);
+	}
+
+	/* reset the timeout */
+	mod_timer(&call->acks_timeout,
+		  __rxrpc_rtt_based_timeout(call, rxrpc_call_acks_timeout));
+
+	spin_unlock(&call->lock);
+
+	_leave("");
+} /* end rxrpc_call_resend() */
+
+/*****************************************************************************/
+/*
+ * handle an ICMP error being applied to a call
+ */
+void rxrpc_call_handle_error(struct rxrpc_call *call, int local, int errno)
+{
+	_enter("%p{%u},%d", call, ntohl(call->call_id), errno);
+
+	/* if this call is already aborted, then just wake up any waiters */
+	if (call->app_call_state == RXRPC_CSTATE_ERROR) {
+		call->app_error_func(call);
+	}
+	else {
+		/* tell the app layer what happened */
+		spin_lock(&call->lock);
+		call->app_call_state = RXRPC_CSTATE_ERROR;
+		_state(call);
+		if (local)
+			call->app_err_state = RXRPC_ESTATE_LOCAL_ERROR;
+		else
+			call->app_err_state = RXRPC_ESTATE_REMOTE_ERROR;
+		call->app_errno		= errno;
+		call->app_mark		= RXRPC_APP_MARK_EOF;
+		call->app_read_buf	= NULL;
+		call->app_async_read	= 0;
+
+		/* map the error */
+		call->app_aemap_func(call);
+
+		del_timer_sync(&call->acks_timeout);
+		del_timer_sync(&call->rcv_timeout);
+		del_timer_sync(&call->ackr_dfr_timo);
+
+		spin_unlock(&call->lock);
+
+		call->app_error_func(call);
+	}
+
+	_leave("");
+} /* end rxrpc_call_handle_error() */
diff --git a/net/rxrpc/connection.c b/net/rxrpc/connection.c
new file mode 100644
index 00000000000..61463c74f8c
--- /dev/null
+++ b/net/rxrpc/connection.c
@@ -0,0 +1,778 @@
+/* connection.c: Rx connection routines
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <rxrpc/rxrpc.h>
+#include <rxrpc/transport.h>
+#include <rxrpc/peer.h>
+#include <rxrpc/connection.h>
+#include <rxrpc/call.h>
+#include <rxrpc/message.h>
+#include <linux/udp.h>
+#include <linux/ip.h>
+#include <net/sock.h>
+#include <asm/uaccess.h>
+#include "internal.h"
+
+__RXACCT_DECL(atomic_t rxrpc_connection_count);
+
+LIST_HEAD(rxrpc_conns);
+DECLARE_RWSEM(rxrpc_conns_sem);
+unsigned long rxrpc_conn_timeout = 60 * 60;
+
+static void rxrpc_conn_do_timeout(struct rxrpc_connection *conn);
+
+static void __rxrpc_conn_timeout(rxrpc_timer_t *timer)
+{
+	struct rxrpc_connection *conn =
+		list_entry(timer, struct rxrpc_connection, timeout);
+
+	_debug("Rx CONN TIMEOUT [%p{u=%d}]", conn, atomic_read(&conn->usage));
+
+	rxrpc_conn_do_timeout(conn);
+}
+
+static const struct rxrpc_timer_ops rxrpc_conn_timer_ops = {
+	.timed_out	= __rxrpc_conn_timeout,
+};
+
+/*****************************************************************************/
+/*
+ * create a new connection record
+ */
+static inline int __rxrpc_create_connection(struct rxrpc_peer *peer,
+					    struct rxrpc_connection **_conn)
+{
+	struct rxrpc_connection *conn;
+
+	_enter("%p",peer);
+
+	/* allocate and initialise a connection record */
+	conn = kmalloc(sizeof(struct rxrpc_connection), GFP_KERNEL);
+	if (!conn) {
+		_leave(" = -ENOMEM");
+		return -ENOMEM;
+	}
+
+	memset(conn, 0, sizeof(struct rxrpc_connection));
+	atomic_set(&conn->usage, 1);
+
+	INIT_LIST_HEAD(&conn->link);
+	INIT_LIST_HEAD(&conn->id_link);
+	init_waitqueue_head(&conn->chanwait);
+	spin_lock_init(&conn->lock);
+	rxrpc_timer_init(&conn->timeout, &rxrpc_conn_timer_ops);
+
+	do_gettimeofday(&conn->atime);
+	conn->mtu_size = 1024;
+	conn->peer = peer;
+	conn->trans = peer->trans;
+
+	__RXACCT(atomic_inc(&rxrpc_connection_count));
+	*_conn = conn;
+	_leave(" = 0 (%p)", conn);
+
+	return 0;
+} /* end __rxrpc_create_connection() */
+
+/*****************************************************************************/
+/*
+ * create a new connection record for outgoing connections
+ */
+int rxrpc_create_connection(struct rxrpc_transport *trans,
+			    __be16 port,
+			    __be32 addr,
+			    uint16_t service_id,
+			    void *security,
+			    struct rxrpc_connection **_conn)
+{
+	struct rxrpc_connection *candidate, *conn;
+	struct rxrpc_peer *peer;
+	struct list_head *_p;
+	__be32 connid;
+	int ret;
+
+	_enter("%p{%hu},%u,%hu", trans, trans->port, ntohs(port), service_id);
+
+	/* get a peer record */
+	ret = rxrpc_peer_lookup(trans, addr, &peer);
+	if (ret < 0) {
+		_leave(" = %d", ret);
+		return ret;
+	}
+
+	/* allocate and initialise a connection record */
+	ret = __rxrpc_create_connection(peer, &candidate);
+	if (ret < 0) {
+		rxrpc_put_peer(peer);
+		_leave(" = %d", ret);
+		return ret;
+	}
+
+	/* fill in the specific bits */
+	candidate->addr.sin_family	= AF_INET;
+	candidate->addr.sin_port	= port;
+	candidate->addr.sin_addr.s_addr	= addr;
+
+	candidate->in_epoch		= rxrpc_epoch;
+	candidate->out_epoch		= rxrpc_epoch;
+	candidate->in_clientflag	= 0;
+	candidate->out_clientflag	= RXRPC_CLIENT_INITIATED;
+	candidate->service_id		= htons(service_id);
+
+	/* invent a unique connection ID */
+	write_lock(&peer->conn_idlock);
+
+ try_next_id:
+	connid = htonl(peer->conn_idcounter & RXRPC_CIDMASK);
+	peer->conn_idcounter += RXRPC_MAXCALLS;
+
+	list_for_each(_p, &peer->conn_idlist) {
+		conn = list_entry(_p, struct rxrpc_connection, id_link);
+		if (connid == conn->conn_id)
+			goto try_next_id;
+		if (connid > conn->conn_id)
+			break;
+	}
+
+	_debug("selected candidate conn ID %x.%u",
+	       ntohl(peer->addr.s_addr), ntohl(connid));
+
+	candidate->conn_id = connid;
+	list_add_tail(&candidate->id_link, _p);
+
+	write_unlock(&peer->conn_idlock);
+
+	/* attach to peer */
+	candidate->peer = peer;
+
+	write_lock(&peer->conn_lock);
+
+	/* search the peer's transport graveyard list */
+	spin_lock(&peer->conn_gylock);
+	list_for_each(_p, &peer->conn_graveyard) {
+		conn = list_entry(_p, struct rxrpc_connection, link);
+		if (conn->addr.sin_port	== candidate->addr.sin_port	&&
+		    conn->security_ix	== candidate->security_ix	&&
+		    conn->service_id	== candidate->service_id	&& 
+		    conn->in_clientflag	== 0)
+			goto found_in_graveyard;
+	}
+	spin_unlock(&peer->conn_gylock);
+
+	/* pick the new candidate */
+	_debug("created connection: {%08x} [out]", ntohl(candidate->conn_id));
+	atomic_inc(&peer->conn_count);
+	conn = candidate;
+	candidate = NULL;
+
+ make_active:
+	list_add_tail(&conn->link, &peer->conn_active);
+	write_unlock(&peer->conn_lock);
+
+	if (candidate) {
+		write_lock(&peer->conn_idlock);
+		list_del(&candidate->id_link);
+		write_unlock(&peer->conn_idlock);
+
+		__RXACCT(atomic_dec(&rxrpc_connection_count));
+		kfree(candidate);
+	}
+	else {
+		down_write(&rxrpc_conns_sem);
+		list_add_tail(&conn->proc_link, &rxrpc_conns);
+		up_write(&rxrpc_conns_sem);
+	}
+
+	*_conn = conn;
+	_leave(" = 0 (%p)", conn);
+
+	return 0;
+
+	/* handle resurrecting a connection from the graveyard */
+ found_in_graveyard:
+	_debug("resurrecting connection: {%08x} [out]", ntohl(conn->conn_id));
+	rxrpc_get_connection(conn);
+	rxrpc_krxtimod_del_timer(&conn->timeout);
+	list_del_init(&conn->link);
+	spin_unlock(&peer->conn_gylock);
+	goto make_active;
+} /* end rxrpc_create_connection() */
+
+/*****************************************************************************/
+/*
+ * lookup the connection for an incoming packet
+ * - create a new connection record for unrecorded incoming connections
+ */
+int rxrpc_connection_lookup(struct rxrpc_peer *peer,
+			    struct rxrpc_message *msg,
+			    struct rxrpc_connection **_conn)
+{
+	struct rxrpc_connection *conn, *candidate = NULL;
+	struct list_head *_p;
+	int ret, fresh = 0;
+	__be32 x_epoch, x_connid;
+	__be16 x_port, x_servid;
+	__u32 x_secix;
+	u8 x_clflag;
+
+	_enter("%p{{%hu}},%u,%hu",
+	       peer,
+	       peer->trans->port,
+	       ntohs(msg->pkt->h.uh->source),
+	       ntohs(msg->hdr.serviceId));
+
+	x_port		= msg->pkt->h.uh->source;
+	x_epoch		= msg->hdr.epoch;
+	x_clflag	= msg->hdr.flags & RXRPC_CLIENT_INITIATED;
+	x_connid	= htonl(ntohl(msg->hdr.cid) & RXRPC_CIDMASK);
+	x_servid	= msg->hdr.serviceId;
+	x_secix		= msg->hdr.securityIndex;
+
+	/* [common case] search the transport's active list first */
+	read_lock(&peer->conn_lock);
+	list_for_each(_p, &peer->conn_active) {
+		conn = list_entry(_p, struct rxrpc_connection, link);
+		if (conn->addr.sin_port		== x_port	&&
+		    conn->in_epoch		== x_epoch	&&
+		    conn->conn_id		== x_connid	&&
+		    conn->security_ix		== x_secix	&&
+		    conn->service_id		== x_servid	&& 
+		    conn->in_clientflag		== x_clflag)
+			goto found_active;
+	}
+	read_unlock(&peer->conn_lock);
+
+	/* [uncommon case] not active 
+	 * - create a candidate for a new record if an inbound connection
+	 * - only examine the graveyard for an outbound connection
+	 */
+	if (x_clflag) {
+		ret = __rxrpc_create_connection(peer, &candidate);
+		if (ret < 0) {
+			_leave(" = %d", ret);
+			return ret;
+		}
+
+		/* fill in the specifics */
+		candidate->addr.sin_family	= AF_INET;
+		candidate->addr.sin_port	= x_port;
+		candidate->addr.sin_addr.s_addr = msg->pkt->nh.iph->saddr;
+		candidate->in_epoch		= x_epoch;
+		candidate->out_epoch		= x_epoch;
+		candidate->in_clientflag	= RXRPC_CLIENT_INITIATED;
+		candidate->out_clientflag	= 0;
+		candidate->conn_id		= x_connid;
+		candidate->service_id		= x_servid;
+		candidate->security_ix		= x_secix;
+	}
+
+	/* search the active list again, just in case it appeared whilst we
+	 * were busy */
+	write_lock(&peer->conn_lock);
+	list_for_each(_p, &peer->conn_active) {
+		conn = list_entry(_p, struct rxrpc_connection, link);
+		if (conn->addr.sin_port		== x_port	&&
+		    conn->in_epoch		== x_epoch	&&
+		    conn->conn_id		== x_connid	&&
+		    conn->security_ix		== x_secix	&&
+		    conn->service_id		== x_servid	&& 
+		    conn->in_clientflag		== x_clflag)
+			goto found_active_second_chance;
+	}
+
+	/* search the transport's graveyard list */
+	spin_lock(&peer->conn_gylock);
+	list_for_each(_p, &peer->conn_graveyard) {
+		conn = list_entry(_p, struct rxrpc_connection, link);
+		if (conn->addr.sin_port		== x_port	&&
+		    conn->in_epoch		== x_epoch	&&
+		    conn->conn_id		== x_connid	&&
+		    conn->security_ix		== x_secix	&&
+		    conn->service_id		== x_servid	&& 
+		    conn->in_clientflag		== x_clflag)
+			goto found_in_graveyard;
+	}
+	spin_unlock(&peer->conn_gylock);
+
+	/* outbound connections aren't created here */
+	if (!x_clflag) {
+		write_unlock(&peer->conn_lock);
+		_leave(" = -ENOENT");
+		return -ENOENT;
+	}
+
+	/* we can now add the new candidate to the list */
+	_debug("created connection: {%08x} [in]", ntohl(candidate->conn_id));
+	rxrpc_get_peer(peer);
+	conn = candidate;
+	candidate = NULL;
+	atomic_inc(&peer->conn_count);
+	fresh = 1;
+
+ make_active:
+	list_add_tail(&conn->link, &peer->conn_active);
+
+ success_uwfree:
+	write_unlock(&peer->conn_lock);
+
+	if (candidate) {
+		write_lock(&peer->conn_idlock);
+		list_del(&candidate->id_link);
+		write_unlock(&peer->conn_idlock);
+
+		__RXACCT(atomic_dec(&rxrpc_connection_count));
+		kfree(candidate);
+	}
+
+	if (fresh) {
+		down_write(&rxrpc_conns_sem);
+		list_add_tail(&conn->proc_link, &rxrpc_conns);
+		up_write(&rxrpc_conns_sem);
+	}
+
+ success:
+	*_conn = conn;
+	_leave(" = 0 (%p)", conn);
+	return 0;
+
+	/* handle the connection being found in the active list straight off */
+ found_active:
+	rxrpc_get_connection(conn);
+	read_unlock(&peer->conn_lock);
+	goto success;
+
+	/* handle resurrecting a connection from the graveyard */
+ found_in_graveyard:
+	_debug("resurrecting connection: {%08x} [in]", ntohl(conn->conn_id));
+	rxrpc_get_peer(peer);
+	rxrpc_get_connection(conn);
+	rxrpc_krxtimod_del_timer(&conn->timeout);
+	list_del_init(&conn->link);
+	spin_unlock(&peer->conn_gylock);
+	goto make_active;
+
+	/* handle finding the connection on the second time through the active
+	 * list */
+ found_active_second_chance:
+	rxrpc_get_connection(conn);
+	goto success_uwfree;
+
+} /* end rxrpc_connection_lookup() */
+
+/*****************************************************************************/
+/*
+ * finish using a connection record
+ * - it will be transferred to the peer's connection graveyard when refcount
+ *   reaches 0
+ */
+void rxrpc_put_connection(struct rxrpc_connection *conn)
+{
+	struct rxrpc_peer *peer;
+
+	if (!conn)
+		return;
+
+	_enter("%p{u=%d p=%hu}",
+	       conn, atomic_read(&conn->usage), ntohs(conn->addr.sin_port));
+
+	peer = conn->peer;
+	spin_lock(&peer->conn_gylock);
+
+	/* sanity check */
+	if (atomic_read(&conn->usage) <= 0)
+		BUG();
+
+	if (likely(!atomic_dec_and_test(&conn->usage))) {
+		spin_unlock(&peer->conn_gylock);
+		_leave("");
+		return;
+	}
+
+	/* move to graveyard queue */
+	_debug("burying connection: {%08x}", ntohl(conn->conn_id));
+	list_del(&conn->link);
+	list_add_tail(&conn->link, &peer->conn_graveyard);
+
+	rxrpc_krxtimod_add_timer(&conn->timeout, rxrpc_conn_timeout * HZ);
+
+	spin_unlock(&peer->conn_gylock);
+
+	rxrpc_put_peer(conn->peer);
+
+	_leave(" [killed]");
+} /* end rxrpc_put_connection() */
+
+/*****************************************************************************/
+/*
+ * free a connection record
+ */
+static void rxrpc_conn_do_timeout(struct rxrpc_connection *conn)
+{
+	struct rxrpc_peer *peer;
+
+	_enter("%p{u=%d p=%hu}",
+	       conn, atomic_read(&conn->usage), ntohs(conn->addr.sin_port));
+
+	peer = conn->peer;
+
+	if (atomic_read(&conn->usage) < 0)
+		BUG();
+
+	/* remove from graveyard if still dead */
+	spin_lock(&peer->conn_gylock);
+	if (atomic_read(&conn->usage) == 0) {
+		list_del_init(&conn->link);
+	}
+	else {
+		conn = NULL;
+	}
+	spin_unlock(&peer->conn_gylock);
+
+	if (!conn) {
+		_leave("");
+		return; /* resurrected */
+	}
+
+	_debug("--- Destroying Connection %p{%08x} ---",
+	       conn, ntohl(conn->conn_id));
+
+	down_write(&rxrpc_conns_sem);
+	list_del(&conn->proc_link);
+	up_write(&rxrpc_conns_sem);
+
+	write_lock(&peer->conn_idlock);
+	list_del(&conn->id_link);
+	write_unlock(&peer->conn_idlock);
+
+	__RXACCT(atomic_dec(&rxrpc_connection_count));
+	kfree(conn);
+
+	/* if the graveyard is now empty, wake up anyone waiting for that */
+	if (atomic_dec_and_test(&peer->conn_count))
+		wake_up(&peer->conn_gy_waitq);
+
+	_leave(" [destroyed]");
+} /* end rxrpc_conn_do_timeout() */
+
+/*****************************************************************************/
+/*
+ * clear all connection records from a peer endpoint
+ */
+void rxrpc_conn_clearall(struct rxrpc_peer *peer)
+{
+	DECLARE_WAITQUEUE(myself, current);
+
+	struct rxrpc_connection *conn;
+	int err;
+
+	_enter("%p", peer);
+
+	/* there shouldn't be any active conns remaining */
+	if (!list_empty(&peer->conn_active))
+		BUG();
+
+	/* manually timeout all conns in the graveyard */
+	spin_lock(&peer->conn_gylock);
+	while (!list_empty(&peer->conn_graveyard)) {
+		conn = list_entry(peer->conn_graveyard.next,
+				  struct rxrpc_connection, link);
+		err = rxrpc_krxtimod_del_timer(&conn->timeout);
+		spin_unlock(&peer->conn_gylock);
+
+		if (err == 0)
+			rxrpc_conn_do_timeout(conn);
+
+		spin_lock(&peer->conn_gylock);
+	}
+	spin_unlock(&peer->conn_gylock);
+
+	/* wait for the the conn graveyard to be completely cleared */
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	add_wait_queue(&peer->conn_gy_waitq, &myself);
+
+	while (atomic_read(&peer->conn_count) != 0) {
+		schedule();
+		set_current_state(TASK_UNINTERRUPTIBLE);
+	}
+
+	remove_wait_queue(&peer->conn_gy_waitq, &myself);
+	set_current_state(TASK_RUNNING);
+
+	_leave("");
+} /* end rxrpc_conn_clearall() */
+
+/*****************************************************************************/
+/*
+ * allocate and prepare a message for sending out through the transport
+ * endpoint
+ */
+int rxrpc_conn_newmsg(struct rxrpc_connection *conn,
+		      struct rxrpc_call *call,
+		      uint8_t type,
+		      int dcount,
+		      struct kvec diov[],
+		      int alloc_flags,
+		      struct rxrpc_message **_msg)
+{
+	struct rxrpc_message *msg;
+	int loop;
+
+	_enter("%p{%d},%p,%u", conn, ntohs(conn->addr.sin_port), call, type);
+
+	if (dcount > 3) {
+		_leave(" = -EINVAL");
+		return -EINVAL;
+	}
+
+	msg = kmalloc(sizeof(struct rxrpc_message), alloc_flags);
+	if (!msg) {
+		_leave(" = -ENOMEM");
+		return -ENOMEM;
+	}
+
+	memset(msg, 0, sizeof(*msg));
+	atomic_set(&msg->usage, 1);
+
+	INIT_LIST_HEAD(&msg->link);
+
+	msg->state = RXRPC_MSG_PREPARED;
+
+	msg->hdr.epoch		= conn->out_epoch;
+	msg->hdr.cid		= conn->conn_id | (call ? call->chan_ix : 0);
+	msg->hdr.callNumber	= call ? call->call_id : 0;
+	msg->hdr.type		= type;
+	msg->hdr.flags		= conn->out_clientflag;
+	msg->hdr.securityIndex	= conn->security_ix;
+	msg->hdr.serviceId	= conn->service_id;
+
+	/* generate sequence numbers for data packets */
+	if (call) {
+		switch (type) {
+		case RXRPC_PACKET_TYPE_DATA:
+			msg->seq = ++call->snd_seq_count;
+			msg->hdr.seq = htonl(msg->seq);
+			break;
+		case RXRPC_PACKET_TYPE_ACK:
+			/* ACK sequence numbers are complicated. The following
+			 * may be wrong:
+			 * - jumbo packet ACKs should have a seq number
+			 * - normal ACKs should not
+			 */
+		default:
+			break;
+		}
+	}
+
+	msg->dcount = dcount + 1;
+	msg->dsize = sizeof(msg->hdr);
+	msg->data[0].iov_len = sizeof(msg->hdr);
+	msg->data[0].iov_base = &msg->hdr;
+
+	for (loop=0; loop < dcount; loop++) {
+		msg->dsize += diov[loop].iov_len;
+		msg->data[loop+1].iov_len  = diov[loop].iov_len;
+		msg->data[loop+1].iov_base = diov[loop].iov_base;
+	}
+
+	__RXACCT(atomic_inc(&rxrpc_message_count));
+	*_msg = msg;
+	_leave(" = 0 (%p) #%d", msg, atomic_read(&rxrpc_message_count));
+	return 0;
+} /* end rxrpc_conn_newmsg() */
+
+/*****************************************************************************/
+/*
+ * free a message
+ */
+void __rxrpc_put_message(struct rxrpc_message *msg)
+{
+	int loop;
+
+	_enter("%p #%d", msg, atomic_read(&rxrpc_message_count));
+
+	if (msg->pkt)
+		kfree_skb(msg->pkt);
+	rxrpc_put_connection(msg->conn);
+
+	for (loop = 0; loop < 8; loop++)
+		if (test_bit(loop, &msg->dfree))
+			kfree(msg->data[loop].iov_base);
+
+	__RXACCT(atomic_dec(&rxrpc_message_count));
+	kfree(msg);
+
+	_leave("");
+} /* end __rxrpc_put_message() */
+
+/*****************************************************************************/
+/*
+ * send a message out through the transport endpoint
+ */
+int rxrpc_conn_sendmsg(struct rxrpc_connection *conn,
+		       struct rxrpc_message *msg)
+{
+	struct msghdr msghdr;
+	int ret;
+
+	_enter("%p{%d}", conn, ntohs(conn->addr.sin_port));
+
+	/* fill in some fields in the header */
+	spin_lock(&conn->lock);
+	msg->hdr.serial = htonl(++conn->serial_counter);
+	msg->rttdone = 0;
+	spin_unlock(&conn->lock);
+
+	/* set up the message to be transmitted */
+	msghdr.msg_name		= &conn->addr;
+	msghdr.msg_namelen	= sizeof(conn->addr);
+	msghdr.msg_control	= NULL;
+	msghdr.msg_controllen	= 0;
+	msghdr.msg_flags	= MSG_CONFIRM | MSG_DONTWAIT;
+
+	_net("Sending message type %d of %Zd bytes to %08x:%d",
+	     msg->hdr.type,
+	     msg->dsize,
+	     ntohl(conn->addr.sin_addr.s_addr),
+	     ntohs(conn->addr.sin_port));
+
+	/* send the message */
+	ret = kernel_sendmsg(conn->trans->socket, &msghdr,
+			     msg->data, msg->dcount, msg->dsize);
+	if (ret < 0) {
+		msg->state = RXRPC_MSG_ERROR;
+	} else {
+		msg->state = RXRPC_MSG_SENT;
+		ret = 0;
+
+		spin_lock(&conn->lock);
+		do_gettimeofday(&conn->atime);
+		msg->stamp = conn->atime;
+		spin_unlock(&conn->lock);
+	}
+
+	_leave(" = %d", ret);
+
+	return ret;
+} /* end rxrpc_conn_sendmsg() */
+
+/*****************************************************************************/
+/*
+ * deal with a subsequent call packet
+ */
+int rxrpc_conn_receive_call_packet(struct rxrpc_connection *conn,
+				   struct rxrpc_call *call,
+				   struct rxrpc_message *msg)
+{
+	struct rxrpc_message *pmsg;
+	struct list_head *_p;
+	unsigned cix, seq;
+	int ret = 0;
+
+	_enter("%p,%p,%p", conn, call, msg);
+
+	if (!call) {
+		cix = ntohl(msg->hdr.cid) & RXRPC_CHANNELMASK;
+
+		spin_lock(&conn->lock);
+		call = conn->channels[cix];
+
+		if (!call || call->call_id != msg->hdr.callNumber) {
+			spin_unlock(&conn->lock);
+			rxrpc_trans_immediate_abort(conn->trans, msg, -ENOENT);
+			goto out;
+		}
+		else {
+			rxrpc_get_call(call);
+			spin_unlock(&conn->lock);
+		}
+	}
+	else {
+		rxrpc_get_call(call);
+	}
+
+	_proto("Received packet %%%u [%u] on call %hu:%u:%u",
+	       ntohl(msg->hdr.serial),
+	       ntohl(msg->hdr.seq),
+	       ntohs(msg->hdr.serviceId),
+	       ntohl(conn->conn_id),
+	       ntohl(call->call_id));
+
+	call->pkt_rcv_count++;
+
+	if (msg->pkt->dst && msg->pkt->dst->dev)
+		conn->peer->if_mtu =
+			msg->pkt->dst->dev->mtu -
+			msg->pkt->dst->dev->hard_header_len;
+
+	/* queue on the call in seq order */
+	rxrpc_get_message(msg);
+	seq = msg->seq;
+
+	spin_lock(&call->lock);
+	list_for_each(_p, &call->rcv_receiveq) {
+		pmsg = list_entry(_p, struct rxrpc_message, link);
+		if (pmsg->seq > seq)
+			break;
+	}
+	list_add_tail(&msg->link, _p);
+
+	/* reset the activity timeout */
+	call->flags |= RXRPC_CALL_RCV_PKT;
+	mod_timer(&call->rcv_timeout,jiffies + rxrpc_call_rcv_timeout * HZ);
+
+	spin_unlock(&call->lock);
+
+	rxrpc_krxiod_queue_call(call);
+
+	rxrpc_put_call(call);
+ out:
+	_leave(" = %d", ret);
+	return ret;
+} /* end rxrpc_conn_receive_call_packet() */
+
+/*****************************************************************************/
+/*
+ * handle an ICMP error being applied to a connection
+ */
+void rxrpc_conn_handle_error(struct rxrpc_connection *conn,
+			     int local, int errno)
+{
+	struct rxrpc_call *calls[4];
+	int loop;
+
+	_enter("%p{%d},%d", conn, ntohs(conn->addr.sin_port), errno);
+
+	/* get a ref to all my calls in one go */
+	memset(calls, 0, sizeof(calls));
+	spin_lock(&conn->lock);
+
+	for (loop = 3; loop >= 0; loop--) {
+		if (conn->channels[loop]) {
+			calls[loop] = conn->channels[loop];
+			rxrpc_get_call(calls[loop]);
+		}
+	}
+
+	spin_unlock(&conn->lock);
+
+	/* now kick them all */
+	for (loop = 3; loop >= 0; loop--) {
+		if (calls[loop]) {
+			rxrpc_call_handle_error(calls[loop], local, errno);
+			rxrpc_put_call(calls[loop]);
+		}
+	}
+
+	_leave("");
+} /* end rxrpc_conn_handle_error() */
diff --git a/net/rxrpc/internal.h b/net/rxrpc/internal.h
new file mode 100644
index 00000000000..70e52f6b0b6
--- /dev/null
+++ b/net/rxrpc/internal.h
@@ -0,0 +1,106 @@
+/* internal.h: internal Rx RPC stuff
+ *
+ * Copyright (c) 2002   David Howells (dhowells@redhat.com).
+ */
+
+#ifndef RXRPC_INTERNAL_H
+#define RXRPC_INTERNAL_H
+
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+
+/*
+ * debug accounting
+ */
+#if 1
+#define __RXACCT_DECL(X) X
+#define __RXACCT(X) do { X; } while(0) 
+#else
+#define __RXACCT_DECL(X)
+#define __RXACCT(X) do { } while(0)
+#endif
+
+__RXACCT_DECL(extern atomic_t rxrpc_transport_count);
+__RXACCT_DECL(extern atomic_t rxrpc_peer_count);
+__RXACCT_DECL(extern atomic_t rxrpc_connection_count);
+__RXACCT_DECL(extern atomic_t rxrpc_call_count);
+__RXACCT_DECL(extern atomic_t rxrpc_message_count);
+
+/*
+ * debug tracing
+ */
+#define kenter(FMT, a...)	printk("==> %s("FMT")\n",__FUNCTION__ , ##a)
+#define kleave(FMT, a...)	printk("<== %s()"FMT"\n",__FUNCTION__ , ##a)
+#define kdebug(FMT, a...)	printk("    "FMT"\n" , ##a)
+#define kproto(FMT, a...)	printk("### "FMT"\n" , ##a)
+#define knet(FMT, a...)		printk("    "FMT"\n" , ##a)
+
+#if 0
+#define _enter(FMT, a...)	kenter(FMT , ##a)
+#define _leave(FMT, a...)	kleave(FMT , ##a)
+#define _debug(FMT, a...)	kdebug(FMT , ##a)
+#define _proto(FMT, a...)	kproto(FMT , ##a)
+#define _net(FMT, a...)		knet(FMT , ##a)
+#else
+#define _enter(FMT, a...)	do { if (rxrpc_ktrace) kenter(FMT , ##a); } while(0)
+#define _leave(FMT, a...)	do { if (rxrpc_ktrace) kleave(FMT , ##a); } while(0)
+#define _debug(FMT, a...)	do { if (rxrpc_kdebug) kdebug(FMT , ##a); } while(0)
+#define _proto(FMT, a...)	do { if (rxrpc_kproto) kproto(FMT , ##a); } while(0)
+#define _net(FMT, a...)		do { if (rxrpc_knet)   knet  (FMT , ##a); } while(0)
+#endif
+
+static inline void rxrpc_discard_my_signals(void)
+{
+	while (signal_pending(current)) {
+		siginfo_t sinfo;
+
+		spin_lock_irq(&current->sighand->siglock);
+		dequeue_signal(current, &current->blocked, &sinfo);
+		spin_unlock_irq(&current->sighand->siglock);
+	}
+}
+
+/*
+ * call.c
+ */
+extern struct list_head rxrpc_calls;
+extern struct rw_semaphore rxrpc_calls_sem;
+
+/*
+ * connection.c
+ */
+extern struct list_head rxrpc_conns;
+extern struct rw_semaphore rxrpc_conns_sem;
+extern unsigned long rxrpc_conn_timeout;
+
+extern void rxrpc_conn_clearall(struct rxrpc_peer *peer);
+
+/*
+ * peer.c
+ */
+extern struct list_head rxrpc_peers;
+extern struct rw_semaphore rxrpc_peers_sem;
+extern unsigned long rxrpc_peer_timeout;
+
+extern void rxrpc_peer_calculate_rtt(struct rxrpc_peer *peer,
+				     struct rxrpc_message *msg,
+				     struct rxrpc_message *resp);
+
+extern void rxrpc_peer_clearall(struct rxrpc_transport *trans);
+
+
+/*
+ * proc.c
+ */
+#ifdef CONFIG_PROC_FS
+extern int rxrpc_proc_init(void);
+extern void rxrpc_proc_cleanup(void);
+#endif
+
+/*
+ * transport.c
+ */
+extern struct list_head rxrpc_proc_transports;
+extern struct rw_semaphore rxrpc_proc_transports_sem;
+
+#endif /* RXRPC_INTERNAL_H */
diff --git a/net/rxrpc/krxiod.c b/net/rxrpc/krxiod.c
new file mode 100644
index 00000000000..2b537f425a1
--- /dev/null
+++ b/net/rxrpc/krxiod.c
@@ -0,0 +1,261 @@
+/* krxiod.c: Rx I/O daemon
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/sched.h>
+#include <linux/completion.h>
+#include <linux/spinlock.h>
+#include <linux/init.h>
+#include <rxrpc/krxiod.h>
+#include <rxrpc/transport.h>
+#include <rxrpc/peer.h>
+#include <rxrpc/call.h>
+#include "internal.h"
+
+static DECLARE_WAIT_QUEUE_HEAD(rxrpc_krxiod_sleepq);
+static DECLARE_COMPLETION(rxrpc_krxiod_dead);
+
+static atomic_t rxrpc_krxiod_qcount = ATOMIC_INIT(0);
+
+static LIST_HEAD(rxrpc_krxiod_transportq);
+static DEFINE_SPINLOCK(rxrpc_krxiod_transportq_lock);
+
+static LIST_HEAD(rxrpc_krxiod_callq);
+static DEFINE_SPINLOCK(rxrpc_krxiod_callq_lock);
+
+static volatile int rxrpc_krxiod_die;
+
+/*****************************************************************************/
+/*
+ * Rx I/O daemon
+ */
+static int rxrpc_krxiod(void *arg)
+{
+	DECLARE_WAITQUEUE(krxiod,current);
+
+	printk("Started krxiod %d\n",current->pid);
+
+	daemonize("krxiod");
+
+	/* loop around waiting for work to do */
+	do {
+		/* wait for work or to be told to exit */
+		_debug("### Begin Wait");
+		if (!atomic_read(&rxrpc_krxiod_qcount)) {
+			set_current_state(TASK_INTERRUPTIBLE);
+
+			add_wait_queue(&rxrpc_krxiod_sleepq, &krxiod);
+
+			for (;;) {
+				set_current_state(TASK_INTERRUPTIBLE);
+				if (atomic_read(&rxrpc_krxiod_qcount) ||
+				    rxrpc_krxiod_die ||
+				    signal_pending(current))
+					break;
+
+				schedule();
+			}
+
+			remove_wait_queue(&rxrpc_krxiod_sleepq, &krxiod);
+			set_current_state(TASK_RUNNING);
+		}
+		_debug("### End Wait");
+
+		/* do work if been given some to do */
+		_debug("### Begin Work");
+
+		/* see if there's a transport in need of attention */
+		if (!list_empty(&rxrpc_krxiod_transportq)) {
+			struct rxrpc_transport *trans = NULL;
+
+			spin_lock_irq(&rxrpc_krxiod_transportq_lock);
+
+			if (!list_empty(&rxrpc_krxiod_transportq)) {
+				trans = list_entry(
+					rxrpc_krxiod_transportq.next,
+					struct rxrpc_transport,
+					krxiodq_link);
+
+				list_del_init(&trans->krxiodq_link);
+				atomic_dec(&rxrpc_krxiod_qcount);
+
+				/* make sure it hasn't gone away and doesn't go
+				 * away */
+				if (atomic_read(&trans->usage)>0)
+					rxrpc_get_transport(trans);
+				else
+					trans = NULL;
+			}
+
+			spin_unlock_irq(&rxrpc_krxiod_transportq_lock);
+
+			if (trans) {
+				rxrpc_trans_receive_packet(trans);
+				rxrpc_put_transport(trans);
+			}
+		}
+
+		/* see if there's a call in need of attention */
+		if (!list_empty(&rxrpc_krxiod_callq)) {
+			struct rxrpc_call *call = NULL;
+
+			spin_lock_irq(&rxrpc_krxiod_callq_lock);
+
+			if (!list_empty(&rxrpc_krxiod_callq)) {
+				call = list_entry(rxrpc_krxiod_callq.next,
+						  struct rxrpc_call,
+						  rcv_krxiodq_lk);
+				list_del_init(&call->rcv_krxiodq_lk);
+				atomic_dec(&rxrpc_krxiod_qcount);
+
+				/* make sure it hasn't gone away and doesn't go
+				 * away */
+				if (atomic_read(&call->usage) > 0) {
+					_debug("@@@ KRXIOD"
+					       " Begin Attend Call %p", call);
+					rxrpc_get_call(call);
+				}
+				else {
+					call = NULL;
+				}
+			}
+
+			spin_unlock_irq(&rxrpc_krxiod_callq_lock);
+
+			if (call) {
+				rxrpc_call_do_stuff(call);
+				rxrpc_put_call(call);
+				_debug("@@@ KRXIOD End Attend Call %p", call);
+			}
+		}
+
+		_debug("### End Work");
+
+		try_to_freeze(PF_FREEZE);
+
+                /* discard pending signals */
+		rxrpc_discard_my_signals();
+
+	} while (!rxrpc_krxiod_die);
+
+	/* and that's all */
+	complete_and_exit(&rxrpc_krxiod_dead, 0);
+
+} /* end rxrpc_krxiod() */
+
+/*****************************************************************************/
+/*
+ * start up a krxiod daemon
+ */
+int __init rxrpc_krxiod_init(void)
+{
+	return kernel_thread(rxrpc_krxiod, NULL, 0);
+
+} /* end rxrpc_krxiod_init() */
+
+/*****************************************************************************/
+/*
+ * kill the krxiod daemon and wait for it to complete
+ */
+void rxrpc_krxiod_kill(void)
+{
+	rxrpc_krxiod_die = 1;
+	wake_up_all(&rxrpc_krxiod_sleepq);
+	wait_for_completion(&rxrpc_krxiod_dead);
+
+} /* end rxrpc_krxiod_kill() */
+
+/*****************************************************************************/
+/*
+ * queue a transport for attention by krxiod
+ */
+void rxrpc_krxiod_queue_transport(struct rxrpc_transport *trans)
+{
+	unsigned long flags;
+
+	_enter("");
+
+	if (list_empty(&trans->krxiodq_link)) {
+		spin_lock_irqsave(&rxrpc_krxiod_transportq_lock, flags);
+
+		if (list_empty(&trans->krxiodq_link)) {
+			if (atomic_read(&trans->usage) > 0) {
+				list_add_tail(&trans->krxiodq_link,
+					      &rxrpc_krxiod_transportq);
+				atomic_inc(&rxrpc_krxiod_qcount);
+			}
+		}
+
+		spin_unlock_irqrestore(&rxrpc_krxiod_transportq_lock, flags);
+		wake_up_all(&rxrpc_krxiod_sleepq);
+	}
+
+	_leave("");
+
+} /* end rxrpc_krxiod_queue_transport() */
+
+/*****************************************************************************/
+/*
+ * dequeue a transport from krxiod's attention queue
+ */
+void rxrpc_krxiod_dequeue_transport(struct rxrpc_transport *trans)
+{
+	unsigned long flags;
+
+	_enter("");
+
+	spin_lock_irqsave(&rxrpc_krxiod_transportq_lock, flags);
+	if (!list_empty(&trans->krxiodq_link)) {
+		list_del_init(&trans->krxiodq_link);
+		atomic_dec(&rxrpc_krxiod_qcount);
+	}
+	spin_unlock_irqrestore(&rxrpc_krxiod_transportq_lock, flags);
+
+	_leave("");
+
+} /* end rxrpc_krxiod_dequeue_transport() */
+
+/*****************************************************************************/
+/*
+ * queue a call for attention by krxiod
+ */
+void rxrpc_krxiod_queue_call(struct rxrpc_call *call)
+{
+	unsigned long flags;
+
+	if (list_empty(&call->rcv_krxiodq_lk)) {
+		spin_lock_irqsave(&rxrpc_krxiod_callq_lock, flags);
+		if (atomic_read(&call->usage) > 0) {
+			list_add_tail(&call->rcv_krxiodq_lk,
+				      &rxrpc_krxiod_callq);
+			atomic_inc(&rxrpc_krxiod_qcount);
+		}
+		spin_unlock_irqrestore(&rxrpc_krxiod_callq_lock, flags);
+	}
+	wake_up_all(&rxrpc_krxiod_sleepq);
+
+} /* end rxrpc_krxiod_queue_call() */
+
+/*****************************************************************************/
+/*
+ * dequeue a call from krxiod's attention queue
+ */
+void rxrpc_krxiod_dequeue_call(struct rxrpc_call *call)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&rxrpc_krxiod_callq_lock, flags);
+	if (!list_empty(&call->rcv_krxiodq_lk)) {
+		list_del_init(&call->rcv_krxiodq_lk);
+		atomic_dec(&rxrpc_krxiod_qcount);
+	}
+	spin_unlock_irqrestore(&rxrpc_krxiod_callq_lock, flags);
+
+} /* end rxrpc_krxiod_dequeue_call() */
diff --git a/net/rxrpc/krxsecd.c b/net/rxrpc/krxsecd.c
new file mode 100644
index 00000000000..6020c89d922
--- /dev/null
+++ b/net/rxrpc/krxsecd.c
@@ -0,0 +1,270 @@
+/* krxsecd.c: Rx security daemon
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This daemon deals with:
+ * - consulting the application as to whether inbound peers and calls should be authorised
+ * - generating security challenges for inbound connections
+ * - responding to security challenges on outbound connections
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/completion.h>
+#include <linux/spinlock.h>
+#include <linux/init.h>
+#include <rxrpc/krxsecd.h>
+#include <rxrpc/transport.h>
+#include <rxrpc/connection.h>
+#include <rxrpc/message.h>
+#include <rxrpc/peer.h>
+#include <rxrpc/call.h>
+#include <linux/udp.h>
+#include <linux/ip.h>
+#include <net/sock.h>
+#include "internal.h"
+
+static DECLARE_WAIT_QUEUE_HEAD(rxrpc_krxsecd_sleepq);
+static DECLARE_COMPLETION(rxrpc_krxsecd_dead);
+static volatile int rxrpc_krxsecd_die;
+
+static atomic_t rxrpc_krxsecd_qcount;
+
+/* queue of unprocessed inbound messages with seqno #1 and
+ * RXRPC_CLIENT_INITIATED flag set */
+static LIST_HEAD(rxrpc_krxsecd_initmsgq);
+static DEFINE_SPINLOCK(rxrpc_krxsecd_initmsgq_lock);
+
+static void rxrpc_krxsecd_process_incoming_call(struct rxrpc_message *msg);
+
+/*****************************************************************************/
+/*
+ * Rx security daemon
+ */
+static int rxrpc_krxsecd(void *arg)
+{
+	DECLARE_WAITQUEUE(krxsecd, current);
+
+	int die;
+
+	printk("Started krxsecd %d\n", current->pid);
+
+	daemonize("krxsecd");
+
+	/* loop around waiting for work to do */
+	do {
+		/* wait for work or to be told to exit */
+		_debug("### Begin Wait");
+		if (!atomic_read(&rxrpc_krxsecd_qcount)) {
+			set_current_state(TASK_INTERRUPTIBLE);
+
+			add_wait_queue(&rxrpc_krxsecd_sleepq, &krxsecd);
+
+			for (;;) {
+				set_current_state(TASK_INTERRUPTIBLE);
+				if (atomic_read(&rxrpc_krxsecd_qcount) ||
+				    rxrpc_krxsecd_die ||
+				    signal_pending(current))
+					break;
+
+				schedule();
+			}
+
+			remove_wait_queue(&rxrpc_krxsecd_sleepq, &krxsecd);
+			set_current_state(TASK_RUNNING);
+		}
+		die = rxrpc_krxsecd_die;
+		_debug("### End Wait");
+
+		/* see if there're incoming calls in need of authenticating */
+		_debug("### Begin Inbound Calls");
+
+		if (!list_empty(&rxrpc_krxsecd_initmsgq)) {
+			struct rxrpc_message *msg = NULL;
+
+			spin_lock(&rxrpc_krxsecd_initmsgq_lock);
+
+			if (!list_empty(&rxrpc_krxsecd_initmsgq)) {
+				msg = list_entry(rxrpc_krxsecd_initmsgq.next,
+						 struct rxrpc_message, link);
+				list_del_init(&msg->link);
+				atomic_dec(&rxrpc_krxsecd_qcount);
+			}
+
+			spin_unlock(&rxrpc_krxsecd_initmsgq_lock);
+
+			if (msg) {
+				rxrpc_krxsecd_process_incoming_call(msg);
+				rxrpc_put_message(msg);
+			}
+		}
+
+		_debug("### End Inbound Calls");
+
+		try_to_freeze(PF_FREEZE);
+
+                /* discard pending signals */
+		rxrpc_discard_my_signals();
+
+	} while (!die);
+
+	/* and that's all */
+	complete_and_exit(&rxrpc_krxsecd_dead, 0);
+
+} /* end rxrpc_krxsecd() */
+
+/*****************************************************************************/
+/*
+ * start up a krxsecd daemon
+ */
+int __init rxrpc_krxsecd_init(void)
+{
+	return kernel_thread(rxrpc_krxsecd, NULL, 0);
+
+} /* end rxrpc_krxsecd_init() */
+
+/*****************************************************************************/
+/*
+ * kill the krxsecd daemon and wait for it to complete
+ */
+void rxrpc_krxsecd_kill(void)
+{
+	rxrpc_krxsecd_die = 1;
+	wake_up_all(&rxrpc_krxsecd_sleepq);
+	wait_for_completion(&rxrpc_krxsecd_dead);
+
+} /* end rxrpc_krxsecd_kill() */
+
+/*****************************************************************************/
+/*
+ * clear all pending incoming calls for the specified transport
+ */
+void rxrpc_krxsecd_clear_transport(struct rxrpc_transport *trans)
+{
+	LIST_HEAD(tmp);
+
+	struct rxrpc_message *msg;
+	struct list_head *_p, *_n;
+
+	_enter("%p",trans);
+
+	/* move all the messages for this transport onto a temp list */
+	spin_lock(&rxrpc_krxsecd_initmsgq_lock);
+
+	list_for_each_safe(_p, _n, &rxrpc_krxsecd_initmsgq) {
+		msg = list_entry(_p, struct rxrpc_message, link);
+		if (msg->trans == trans) {
+			list_del(&msg->link);
+			list_add_tail(&msg->link, &tmp);
+			atomic_dec(&rxrpc_krxsecd_qcount);
+		}
+	}
+
+	spin_unlock(&rxrpc_krxsecd_initmsgq_lock);
+
+	/* zap all messages on the temp list */
+	while (!list_empty(&tmp)) {
+		msg = list_entry(tmp.next, struct rxrpc_message, link);
+		list_del_init(&msg->link);
+		rxrpc_put_message(msg);
+	}
+
+	_leave("");
+} /* end rxrpc_krxsecd_clear_transport() */
+
+/*****************************************************************************/
+/*
+ * queue a message on the incoming calls list
+ */
+void rxrpc_krxsecd_queue_incoming_call(struct rxrpc_message *msg)
+{
+	_enter("%p", msg);
+
+	/* queue for processing by krxsecd */
+	spin_lock(&rxrpc_krxsecd_initmsgq_lock);
+
+	if (!rxrpc_krxsecd_die) {
+		rxrpc_get_message(msg);
+		list_add_tail(&msg->link, &rxrpc_krxsecd_initmsgq);
+		atomic_inc(&rxrpc_krxsecd_qcount);
+	}
+
+	spin_unlock(&rxrpc_krxsecd_initmsgq_lock);
+
+	wake_up(&rxrpc_krxsecd_sleepq);
+
+	_leave("");
+} /* end rxrpc_krxsecd_queue_incoming_call() */
+
+/*****************************************************************************/
+/*
+ * process the initial message of an incoming call
+ */
+void rxrpc_krxsecd_process_incoming_call(struct rxrpc_message *msg)
+{
+	struct rxrpc_transport *trans = msg->trans;
+	struct rxrpc_service *srv;
+	struct rxrpc_call *call;
+	struct list_head *_p;
+	unsigned short sid;
+	int ret;
+
+	_enter("%p{tr=%p}", msg, trans);
+
+	ret = rxrpc_incoming_call(msg->conn, msg, &call);
+	if (ret < 0)
+		goto out;
+
+	/* find the matching service on the transport */
+	sid = ntohs(msg->hdr.serviceId);
+	srv = NULL;
+
+	spin_lock(&trans->lock);
+	list_for_each(_p, &trans->services) {
+		srv = list_entry(_p, struct rxrpc_service, link);
+		if (srv->service_id == sid && try_module_get(srv->owner)) {
+			/* found a match (made sure it won't vanish) */
+			_debug("found service '%s'", srv->name);
+			call->owner = srv->owner;
+			break;
+		}
+	}
+	spin_unlock(&trans->lock);
+
+	/* report the new connection
+	 * - the func must inc the call's usage count to keep it
+	 */
+	ret = -ENOENT;
+	if (_p != &trans->services) {
+		/* attempt to accept the call */
+		call->conn->service = srv;
+		call->app_attn_func = srv->attn_func;
+		call->app_error_func = srv->error_func;
+		call->app_aemap_func = srv->aemap_func;
+
+		ret = srv->new_call(call);
+
+		/* send an abort if an error occurred */
+		if (ret < 0) {
+			rxrpc_call_abort(call, ret);
+		}
+		else {
+			/* formally receive and ACK the new packet */
+			ret = rxrpc_conn_receive_call_packet(call->conn,
+							     call, msg);
+		}
+	}
+
+	rxrpc_put_call(call);
+ out:
+	if (ret < 0)
+		rxrpc_trans_immediate_abort(trans, msg, ret);
+
+	_leave(" (%d)", ret);
+} /* end rxrpc_krxsecd_process_incoming_call() */
diff --git a/net/rxrpc/krxtimod.c b/net/rxrpc/krxtimod.c
new file mode 100644
index 00000000000..249c2b0290b
--- /dev/null
+++ b/net/rxrpc/krxtimod.c
@@ -0,0 +1,203 @@
+/* krxtimod.c: RXRPC timeout daemon
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/completion.h>
+#include <rxrpc/rxrpc.h>
+#include <rxrpc/krxtimod.h>
+#include <asm/errno.h>
+#include "internal.h"
+
+static DECLARE_COMPLETION(krxtimod_alive);
+static DECLARE_COMPLETION(krxtimod_dead);
+static DECLARE_WAIT_QUEUE_HEAD(krxtimod_sleepq);
+static int krxtimod_die;
+
+static LIST_HEAD(krxtimod_list);
+static DEFINE_SPINLOCK(krxtimod_lock);
+
+static int krxtimod(void *arg);
+
+/*****************************************************************************/
+/*
+ * start the timeout daemon
+ */
+int rxrpc_krxtimod_start(void)
+{
+	int ret;
+
+	ret = kernel_thread(krxtimod, NULL, 0);
+	if (ret < 0)
+		return ret;
+
+	wait_for_completion(&krxtimod_alive);
+
+	return ret;
+} /* end rxrpc_krxtimod_start() */
+
+/*****************************************************************************/
+/*
+ * stop the timeout daemon
+ */
+void rxrpc_krxtimod_kill(void)
+{
+	/* get rid of my daemon */
+	krxtimod_die = 1;
+	wake_up(&krxtimod_sleepq);
+	wait_for_completion(&krxtimod_dead);
+
+} /* end rxrpc_krxtimod_kill() */
+
+/*****************************************************************************/
+/*
+ * timeout processing daemon
+ */
+static int krxtimod(void *arg)
+{
+	DECLARE_WAITQUEUE(myself, current);
+
+	rxrpc_timer_t *timer;
+
+	printk("Started krxtimod %d\n", current->pid);
+
+	daemonize("krxtimod");
+
+	complete(&krxtimod_alive);
+
+	/* loop around looking for things to attend to */
+ loop:
+	set_current_state(TASK_INTERRUPTIBLE);
+	add_wait_queue(&krxtimod_sleepq, &myself);
+
+	for (;;) {
+		unsigned long jif;
+		signed long timeout;
+
+		/* deal with the server being asked to die */
+		if (krxtimod_die) {
+			remove_wait_queue(&krxtimod_sleepq, &myself);
+			_leave("");
+			complete_and_exit(&krxtimod_dead, 0);
+		}
+
+		try_to_freeze(PF_FREEZE);
+
+		/* discard pending signals */
+		rxrpc_discard_my_signals();
+
+		/* work out the time to elapse before the next event */
+		spin_lock(&krxtimod_lock);
+		if (list_empty(&krxtimod_list)) {
+			timeout = MAX_SCHEDULE_TIMEOUT;
+		}
+		else {
+			timer = list_entry(krxtimod_list.next,
+					   rxrpc_timer_t, link);
+			timeout = timer->timo_jif;
+			jif = jiffies;
+
+			if (time_before_eq((unsigned long) timeout, jif))
+				goto immediate;
+
+			else {
+				timeout = (long) timeout - (long) jiffies;
+			}
+		}
+		spin_unlock(&krxtimod_lock);
+
+		schedule_timeout(timeout);
+
+		set_current_state(TASK_INTERRUPTIBLE);
+	}
+
+	/* the thing on the front of the queue needs processing
+	 * - we come here with the lock held and timer pointing to the expired
+	 *   entry
+	 */
+ immediate:
+	remove_wait_queue(&krxtimod_sleepq, &myself);
+	set_current_state(TASK_RUNNING);
+
+	_debug("@@@ Begin Timeout of %p", timer);
+
+	/* dequeue the timer */
+	list_del_init(&timer->link);
+	spin_unlock(&krxtimod_lock);
+
+	/* call the timeout function */
+	timer->ops->timed_out(timer);
+
+	_debug("@@@ End Timeout");
+	goto loop;
+
+} /* end krxtimod() */
+
+/*****************************************************************************/
+/*
+ * (re-)queue a timer
+ */
+void rxrpc_krxtimod_add_timer(rxrpc_timer_t *timer, unsigned long timeout)
+{
+	struct list_head *_p;
+	rxrpc_timer_t *ptimer;
+
+	_enter("%p,%lu", timer, timeout);
+
+	spin_lock(&krxtimod_lock);
+
+	list_del(&timer->link);
+
+	/* the timer was deferred or reset - put it back in the queue at the
+	 * right place */
+	timer->timo_jif = jiffies + timeout;
+
+	list_for_each(_p, &krxtimod_list) {
+		ptimer = list_entry(_p, rxrpc_timer_t, link);
+		if (time_before(timer->timo_jif, ptimer->timo_jif))
+			break;
+	}
+
+	list_add_tail(&timer->link, _p); /* insert before stopping point */
+
+	spin_unlock(&krxtimod_lock);
+
+	wake_up(&krxtimod_sleepq);
+
+	_leave("");
+} /* end rxrpc_krxtimod_add_timer() */
+
+/*****************************************************************************/
+/*
+ * dequeue a timer
+ * - returns 0 if the timer was deleted or -ENOENT if it wasn't queued
+ */
+int rxrpc_krxtimod_del_timer(rxrpc_timer_t *timer)
+{
+	int ret = 0;
+
+	_enter("%p", timer);
+
+	spin_lock(&krxtimod_lock);
+
+	if (list_empty(&timer->link))
+		ret = -ENOENT;
+	else
+		list_del_init(&timer->link);
+
+	spin_unlock(&krxtimod_lock);
+
+	wake_up(&krxtimod_sleepq);
+
+	_leave(" = %d", ret);
+	return ret;
+} /* end rxrpc_krxtimod_del_timer() */
diff --git a/net/rxrpc/main.c b/net/rxrpc/main.c
new file mode 100644
index 00000000000..36fdcbcd80d
--- /dev/null
+++ b/net/rxrpc/main.c
@@ -0,0 +1,180 @@
+/* main.c: Rx RPC interface
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <rxrpc/rxrpc.h>
+#include <rxrpc/krxiod.h>
+#include <rxrpc/krxsecd.h>
+#include <rxrpc/krxtimod.h>
+#include <rxrpc/transport.h>
+#include <rxrpc/connection.h>
+#include <rxrpc/call.h>
+#include <rxrpc/message.h>
+#include "internal.h"
+
+MODULE_DESCRIPTION("Rx RPC implementation");
+MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_LICENSE("GPL");
+
+__be32 rxrpc_epoch;
+
+/*****************************************************************************/
+/*
+ * initialise the Rx module
+ */
+static int __init rxrpc_initialise(void)
+{
+	int ret;
+
+	/* my epoch value */
+	rxrpc_epoch = htonl(xtime.tv_sec);
+
+	/* register the /proc interface */
+#ifdef CONFIG_PROC_FS
+	ret = rxrpc_proc_init();
+	if (ret<0)
+		return ret;
+#endif
+
+	/* register the sysctl files */
+#ifdef CONFIG_SYSCTL
+	ret = rxrpc_sysctl_init();
+	if (ret<0)
+		goto error_proc;
+#endif
+
+	/* start the krxtimod daemon */
+	ret = rxrpc_krxtimod_start();
+	if (ret<0)
+		goto error_sysctl;
+
+	/* start the krxiod daemon */
+	ret = rxrpc_krxiod_init();
+	if (ret<0)
+		goto error_krxtimod;
+
+	/* start the krxsecd daemon */
+	ret = rxrpc_krxsecd_init();
+	if (ret<0)
+		goto error_krxiod;
+
+	kdebug("\n\n");
+
+	return 0;
+
+ error_krxiod:
+	rxrpc_krxiod_kill();
+ error_krxtimod:
+	rxrpc_krxtimod_kill();
+ error_sysctl:
+#ifdef CONFIG_SYSCTL
+	rxrpc_sysctl_cleanup();
+#endif
+ error_proc:
+#ifdef CONFIG_PROC_FS
+	rxrpc_proc_cleanup();
+#endif
+	return ret;
+} /* end rxrpc_initialise() */
+
+module_init(rxrpc_initialise);
+
+/*****************************************************************************/
+/*
+ * clean up the Rx module
+ */
+static void __exit rxrpc_cleanup(void)
+{
+	kenter("");
+
+	__RXACCT(printk("Outstanding Messages   : %d\n",
+			atomic_read(&rxrpc_message_count)));
+	__RXACCT(printk("Outstanding Calls      : %d\n",
+			atomic_read(&rxrpc_call_count)));
+	__RXACCT(printk("Outstanding Connections: %d\n",
+			atomic_read(&rxrpc_connection_count)));
+	__RXACCT(printk("Outstanding Peers      : %d\n",
+			atomic_read(&rxrpc_peer_count)));
+	__RXACCT(printk("Outstanding Transports : %d\n",
+			atomic_read(&rxrpc_transport_count)));
+
+	rxrpc_krxsecd_kill();
+	rxrpc_krxiod_kill();
+	rxrpc_krxtimod_kill();
+#ifdef CONFIG_SYSCTL
+	rxrpc_sysctl_cleanup();
+#endif
+#ifdef CONFIG_PROC_FS
+	rxrpc_proc_cleanup();
+#endif
+
+	__RXACCT(printk("Outstanding Messages   : %d\n",
+			atomic_read(&rxrpc_message_count)));
+	__RXACCT(printk("Outstanding Calls      : %d\n",
+			atomic_read(&rxrpc_call_count)));
+	__RXACCT(printk("Outstanding Connections: %d\n",
+			atomic_read(&rxrpc_connection_count)));
+	__RXACCT(printk("Outstanding Peers      : %d\n",
+			atomic_read(&rxrpc_peer_count)));
+	__RXACCT(printk("Outstanding Transports : %d\n",
+			atomic_read(&rxrpc_transport_count)));
+
+	kleave("");
+} /* end rxrpc_cleanup() */
+
+module_exit(rxrpc_cleanup);
+
+/*****************************************************************************/
+/*
+ * clear the dead space between task_struct and kernel stack
+ * - called by supplying -finstrument-functions to gcc
+ */
+#if 0
+void __cyg_profile_func_enter (void *this_fn, void *call_site)
+__attribute__((no_instrument_function));
+
+void __cyg_profile_func_enter (void *this_fn, void *call_site)
+{
+       asm volatile("  movl    %%esp,%%edi     \n"
+                    "  andl    %0,%%edi        \n"
+                    "  addl    %1,%%edi        \n"
+                    "  movl    %%esp,%%ecx     \n"
+                    "  subl    %%edi,%%ecx     \n"
+                    "  shrl    $2,%%ecx        \n"
+                    "  movl    $0xedededed,%%eax     \n"
+                    "  rep stosl               \n"
+                    :
+                    : "i"(~(THREAD_SIZE-1)), "i"(sizeof(struct thread_info))
+                    : "eax", "ecx", "edi", "memory", "cc"
+                    );
+}
+
+void __cyg_profile_func_exit(void *this_fn, void *call_site)
+__attribute__((no_instrument_function));
+
+void __cyg_profile_func_exit(void *this_fn, void *call_site)
+{
+       asm volatile("  movl    %%esp,%%edi     \n"
+                    "  andl    %0,%%edi        \n"
+                    "  addl    %1,%%edi        \n"
+                    "  movl    %%esp,%%ecx     \n"
+                    "  subl    %%edi,%%ecx     \n"
+                    "  shrl    $2,%%ecx        \n"
+                    "  movl    $0xdadadada,%%eax     \n"
+                    "  rep stosl               \n"
+                    :
+                    : "i"(~(THREAD_SIZE-1)), "i"(sizeof(struct thread_info))
+                    : "eax", "ecx", "edi", "memory", "cc"
+                    );
+}
+#endif
diff --git a/net/rxrpc/peer.c b/net/rxrpc/peer.c
new file mode 100644
index 00000000000..ed38f5b17c1
--- /dev/null
+++ b/net/rxrpc/peer.c
@@ -0,0 +1,399 @@
+/* peer.c: Rx RPC peer management
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <rxrpc/rxrpc.h>
+#include <rxrpc/transport.h>
+#include <rxrpc/peer.h>
+#include <rxrpc/connection.h>
+#include <rxrpc/call.h>
+#include <rxrpc/message.h>
+#include <linux/udp.h>
+#include <linux/ip.h>
+#include <net/sock.h>
+#include <asm/uaccess.h>
+#include <asm/div64.h>
+#include "internal.h"
+
+__RXACCT_DECL(atomic_t rxrpc_peer_count);
+LIST_HEAD(rxrpc_peers);
+DECLARE_RWSEM(rxrpc_peers_sem);
+unsigned long rxrpc_peer_timeout = 12 * 60 * 60;
+
+static void rxrpc_peer_do_timeout(struct rxrpc_peer *peer);
+
+static void __rxrpc_peer_timeout(rxrpc_timer_t *timer)
+{
+	struct rxrpc_peer *peer =
+		list_entry(timer, struct rxrpc_peer, timeout);
+
+	_debug("Rx PEER TIMEOUT [%p{u=%d}]", peer, atomic_read(&peer->usage));
+
+	rxrpc_peer_do_timeout(peer);
+}
+
+static const struct rxrpc_timer_ops rxrpc_peer_timer_ops = {
+	.timed_out	= __rxrpc_peer_timeout,
+};
+
+/*****************************************************************************/
+/*
+ * create a peer record
+ */
+static int __rxrpc_create_peer(struct rxrpc_transport *trans, __be32 addr,
+			       struct rxrpc_peer **_peer)
+{
+	struct rxrpc_peer *peer;
+
+	_enter("%p,%08x", trans, ntohl(addr));
+
+	/* allocate and initialise a peer record */
+	peer = kmalloc(sizeof(struct rxrpc_peer), GFP_KERNEL);
+	if (!peer) {
+		_leave(" = -ENOMEM");
+		return -ENOMEM;
+	}
+
+	memset(peer, 0, sizeof(struct rxrpc_peer));
+	atomic_set(&peer->usage, 1);
+
+	INIT_LIST_HEAD(&peer->link);
+	INIT_LIST_HEAD(&peer->proc_link);
+	INIT_LIST_HEAD(&peer->conn_idlist);
+	INIT_LIST_HEAD(&peer->conn_active);
+	INIT_LIST_HEAD(&peer->conn_graveyard);
+	spin_lock_init(&peer->conn_gylock);
+	init_waitqueue_head(&peer->conn_gy_waitq);
+	rwlock_init(&peer->conn_idlock);
+	rwlock_init(&peer->conn_lock);
+	atomic_set(&peer->conn_count, 0);
+	spin_lock_init(&peer->lock);
+	rxrpc_timer_init(&peer->timeout, &rxrpc_peer_timer_ops);
+
+	peer->addr.s_addr = addr;
+
+	peer->trans = trans;
+	peer->ops = trans->peer_ops;
+
+	__RXACCT(atomic_inc(&rxrpc_peer_count));
+	*_peer = peer;
+	_leave(" = 0 (%p)", peer);
+
+	return 0;
+} /* end __rxrpc_create_peer() */
+
+/*****************************************************************************/
+/*
+ * find a peer record on the specified transport
+ * - returns (if successful) with peer record usage incremented
+ * - resurrects it from the graveyard if found there
+ */
+int rxrpc_peer_lookup(struct rxrpc_transport *trans, __be32 addr,
+		      struct rxrpc_peer **_peer)
+{
+	struct rxrpc_peer *peer, *candidate = NULL;
+	struct list_head *_p;
+	int ret;
+
+	_enter("%p{%hu},%08x", trans, trans->port, ntohl(addr));
+
+	/* [common case] search the transport's active list first */
+	read_lock(&trans->peer_lock);
+	list_for_each(_p, &trans->peer_active) {
+		peer = list_entry(_p, struct rxrpc_peer, link);
+		if (peer->addr.s_addr == addr)
+			goto found_active;
+	}
+	read_unlock(&trans->peer_lock);
+
+	/* [uncommon case] not active - create a candidate for a new record */
+	ret = __rxrpc_create_peer(trans, addr, &candidate);
+	if (ret < 0) {
+		_leave(" = %d", ret);
+		return ret;
+	}
+
+	/* search the active list again, just in case it appeared whilst we
+	 * were busy */
+	write_lock(&trans->peer_lock);
+	list_for_each(_p, &trans->peer_active) {
+		peer = list_entry(_p, struct rxrpc_peer, link);
+		if (peer->addr.s_addr == addr)
+			goto found_active_second_chance;
+	}
+
+	/* search the transport's graveyard list */
+	spin_lock(&trans->peer_gylock);
+	list_for_each(_p, &trans->peer_graveyard) {
+		peer = list_entry(_p, struct rxrpc_peer, link);
+		if (peer->addr.s_addr == addr)
+			goto found_in_graveyard;
+	}
+	spin_unlock(&trans->peer_gylock);
+
+	/* we can now add the new candidate to the list
+	 * - tell the application layer that this peer has been added
+	 */
+	rxrpc_get_transport(trans);
+	peer = candidate;
+	candidate = NULL;
+
+	if (peer->ops && peer->ops->adding) {
+		ret = peer->ops->adding(peer);
+		if (ret < 0) {
+			write_unlock(&trans->peer_lock);
+			__RXACCT(atomic_dec(&rxrpc_peer_count));
+			kfree(peer);
+			rxrpc_put_transport(trans);
+			_leave(" = %d", ret);
+			return ret;
+		}
+	}
+
+	atomic_inc(&trans->peer_count);
+
+ make_active:
+	list_add_tail(&peer->link, &trans->peer_active);
+
+ success_uwfree:
+	write_unlock(&trans->peer_lock);
+
+	if (candidate) {
+		__RXACCT(atomic_dec(&rxrpc_peer_count));
+		kfree(candidate);
+	}
+
+	if (list_empty(&peer->proc_link)) {
+		down_write(&rxrpc_peers_sem);
+		list_add_tail(&peer->proc_link, &rxrpc_peers);
+		up_write(&rxrpc_peers_sem);
+	}
+
+ success:
+	*_peer = peer;
+
+	_leave(" = 0 (%p{u=%d cc=%d})",
+	       peer,
+	       atomic_read(&peer->usage),
+	       atomic_read(&peer->conn_count));
+	return 0;
+
+	/* handle the peer being found in the active list straight off */
+ found_active:
+	rxrpc_get_peer(peer);
+	read_unlock(&trans->peer_lock);
+	goto success;
+
+	/* handle resurrecting a peer from the graveyard */
+ found_in_graveyard:
+	rxrpc_get_peer(peer);
+	rxrpc_get_transport(peer->trans);
+	rxrpc_krxtimod_del_timer(&peer->timeout);
+	list_del_init(&peer->link);
+	spin_unlock(&trans->peer_gylock);
+	goto make_active;
+
+	/* handle finding the peer on the second time through the active
+	 * list */
+ found_active_second_chance:
+	rxrpc_get_peer(peer);
+	goto success_uwfree;
+
+} /* end rxrpc_peer_lookup() */
+
+/*****************************************************************************/
+/*
+ * finish with a peer record
+ * - it gets sent to the graveyard from where it can be resurrected or timed
+ *   out
+ */
+void rxrpc_put_peer(struct rxrpc_peer *peer)
+{
+	struct rxrpc_transport *trans = peer->trans;
+
+	_enter("%p{cc=%d a=%08x}",
+	       peer,
+	       atomic_read(&peer->conn_count),
+	       ntohl(peer->addr.s_addr));
+
+	/* sanity check */
+	if (atomic_read(&peer->usage) <= 0)
+		BUG();
+
+	write_lock(&trans->peer_lock);
+	spin_lock(&trans->peer_gylock);
+	if (likely(!atomic_dec_and_test(&peer->usage))) {
+		spin_unlock(&trans->peer_gylock);
+		write_unlock(&trans->peer_lock);
+		_leave("");
+		return;
+	}
+
+	/* move to graveyard queue */
+	list_del(&peer->link);
+	write_unlock(&trans->peer_lock);
+
+	list_add_tail(&peer->link, &trans->peer_graveyard);
+
+	BUG_ON(!list_empty(&peer->conn_active));
+
+	rxrpc_krxtimod_add_timer(&peer->timeout, rxrpc_peer_timeout * HZ);
+
+	spin_unlock(&trans->peer_gylock);
+
+	rxrpc_put_transport(trans);
+
+	_leave(" [killed]");
+} /* end rxrpc_put_peer() */
+
+/*****************************************************************************/
+/*
+ * handle a peer timing out in the graveyard
+ * - called from krxtimod
+ */
+static void rxrpc_peer_do_timeout(struct rxrpc_peer *peer)
+{
+	struct rxrpc_transport *trans = peer->trans;
+
+	_enter("%p{u=%d cc=%d a=%08x}",
+	       peer,
+	       atomic_read(&peer->usage),
+	       atomic_read(&peer->conn_count),
+	       ntohl(peer->addr.s_addr));
+
+	BUG_ON(atomic_read(&peer->usage) < 0);
+
+	/* remove from graveyard if still dead */
+	spin_lock(&trans->peer_gylock);
+	if (atomic_read(&peer->usage) == 0)
+		list_del_init(&peer->link);
+	else
+		peer = NULL;
+	spin_unlock(&trans->peer_gylock);
+
+	if (!peer) {
+		_leave("");
+		return; /* resurrected */
+	}
+
+	/* clear all connections on this peer */
+	rxrpc_conn_clearall(peer);
+
+	BUG_ON(!list_empty(&peer->conn_active));
+	BUG_ON(!list_empty(&peer->conn_graveyard));
+
+	/* inform the application layer */
+	if (peer->ops && peer->ops->discarding)
+		peer->ops->discarding(peer);
+
+	if (!list_empty(&peer->proc_link)) {
+		down_write(&rxrpc_peers_sem);
+		list_del(&peer->proc_link);
+		up_write(&rxrpc_peers_sem);
+	}
+
+	__RXACCT(atomic_dec(&rxrpc_peer_count));
+	kfree(peer);
+
+	/* if the graveyard is now empty, wake up anyone waiting for that */
+	if (atomic_dec_and_test(&trans->peer_count))
+		wake_up(&trans->peer_gy_waitq);
+
+	_leave(" [destroyed]");
+} /* end rxrpc_peer_do_timeout() */
+
+/*****************************************************************************/
+/*
+ * clear all peer records from a transport endpoint
+ */
+void rxrpc_peer_clearall(struct rxrpc_transport *trans)
+{
+	DECLARE_WAITQUEUE(myself,current);
+
+	struct rxrpc_peer *peer;
+	int err;
+
+	_enter("%p",trans);
+
+	/* there shouldn't be any active peers remaining */
+	BUG_ON(!list_empty(&trans->peer_active));
+
+	/* manually timeout all peers in the graveyard */
+	spin_lock(&trans->peer_gylock);
+	while (!list_empty(&trans->peer_graveyard)) {
+		peer = list_entry(trans->peer_graveyard.next,
+				  struct rxrpc_peer, link);
+		_debug("Clearing peer %p\n", peer);
+		err = rxrpc_krxtimod_del_timer(&peer->timeout);
+		spin_unlock(&trans->peer_gylock);
+
+		if (err == 0)
+			rxrpc_peer_do_timeout(peer);
+
+		spin_lock(&trans->peer_gylock);
+	}
+	spin_unlock(&trans->peer_gylock);
+
+	/* wait for the the peer graveyard to be completely cleared */
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	add_wait_queue(&trans->peer_gy_waitq, &myself);
+
+	while (atomic_read(&trans->peer_count) != 0) {
+		schedule();
+		set_current_state(TASK_UNINTERRUPTIBLE);
+	}
+
+	remove_wait_queue(&trans->peer_gy_waitq, &myself);
+	set_current_state(TASK_RUNNING);
+
+	_leave("");
+} /* end rxrpc_peer_clearall() */
+
+/*****************************************************************************/
+/*
+ * calculate and cache the Round-Trip-Time for a message and its response
+ */
+void rxrpc_peer_calculate_rtt(struct rxrpc_peer *peer,
+			      struct rxrpc_message *msg,
+			      struct rxrpc_message *resp)
+{
+	unsigned long long rtt;
+	int loop;
+
+	_enter("%p,%p,%p", peer, msg, resp);
+
+	/* calculate the latest RTT */
+	rtt = resp->stamp.tv_sec - msg->stamp.tv_sec;
+	rtt *= 1000000UL;
+	rtt += resp->stamp.tv_usec - msg->stamp.tv_usec;
+
+	/* add to cache */
+	peer->rtt_cache[peer->rtt_point] = rtt;
+	peer->rtt_point++;
+	peer->rtt_point %= RXRPC_RTT_CACHE_SIZE;
+
+	if (peer->rtt_usage < RXRPC_RTT_CACHE_SIZE)
+		peer->rtt_usage++;
+
+	/* recalculate RTT */
+	rtt = 0;
+	for (loop = peer->rtt_usage - 1; loop >= 0; loop--)
+		rtt += peer->rtt_cache[loop];
+
+	do_div(rtt, peer->rtt_usage);
+	peer->rtt = rtt;
+
+	_leave(" RTT=%lu.%lums",
+	       (long) (peer->rtt / 1000), (long) (peer->rtt % 1000));
+
+} /* end rxrpc_peer_calculate_rtt() */
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
new file mode 100644
index 00000000000..3b5ecd8e240
--- /dev/null
+++ b/net/rxrpc/proc.c
@@ -0,0 +1,617 @@
+/* proc.c: /proc interface for RxRPC
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <rxrpc/rxrpc.h>
+#include <rxrpc/transport.h>
+#include <rxrpc/peer.h>
+#include <rxrpc/connection.h>
+#include <rxrpc/call.h>
+#include <rxrpc/message.h>
+#include "internal.h"
+
+static struct proc_dir_entry *proc_rxrpc;
+
+static int rxrpc_proc_transports_open(struct inode *inode, struct file *file);
+static void *rxrpc_proc_transports_start(struct seq_file *p, loff_t *pos);
+static void *rxrpc_proc_transports_next(struct seq_file *p, void *v, loff_t *pos);
+static void rxrpc_proc_transports_stop(struct seq_file *p, void *v);
+static int rxrpc_proc_transports_show(struct seq_file *m, void *v);
+
+static struct seq_operations rxrpc_proc_transports_ops = {
+	.start	= rxrpc_proc_transports_start,
+	.next	= rxrpc_proc_transports_next,
+	.stop	= rxrpc_proc_transports_stop,
+	.show	= rxrpc_proc_transports_show,
+};
+
+static struct file_operations rxrpc_proc_transports_fops = {
+	.open		= rxrpc_proc_transports_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static int rxrpc_proc_peers_open(struct inode *inode, struct file *file);
+static void *rxrpc_proc_peers_start(struct seq_file *p, loff_t *pos);
+static void *rxrpc_proc_peers_next(struct seq_file *p, void *v, loff_t *pos);
+static void rxrpc_proc_peers_stop(struct seq_file *p, void *v);
+static int rxrpc_proc_peers_show(struct seq_file *m, void *v);
+
+static struct seq_operations rxrpc_proc_peers_ops = {
+	.start	= rxrpc_proc_peers_start,
+	.next	= rxrpc_proc_peers_next,
+	.stop	= rxrpc_proc_peers_stop,
+	.show	= rxrpc_proc_peers_show,
+};
+
+static struct file_operations rxrpc_proc_peers_fops = {
+	.open		= rxrpc_proc_peers_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static int rxrpc_proc_conns_open(struct inode *inode, struct file *file);
+static void *rxrpc_proc_conns_start(struct seq_file *p, loff_t *pos);
+static void *rxrpc_proc_conns_next(struct seq_file *p, void *v, loff_t *pos);
+static void rxrpc_proc_conns_stop(struct seq_file *p, void *v);
+static int rxrpc_proc_conns_show(struct seq_file *m, void *v);
+
+static struct seq_operations rxrpc_proc_conns_ops = {
+	.start	= rxrpc_proc_conns_start,
+	.next	= rxrpc_proc_conns_next,
+	.stop	= rxrpc_proc_conns_stop,
+	.show	= rxrpc_proc_conns_show,
+};
+
+static struct file_operations rxrpc_proc_conns_fops = {
+	.open		= rxrpc_proc_conns_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static int rxrpc_proc_calls_open(struct inode *inode, struct file *file);
+static void *rxrpc_proc_calls_start(struct seq_file *p, loff_t *pos);
+static void *rxrpc_proc_calls_next(struct seq_file *p, void *v, loff_t *pos);
+static void rxrpc_proc_calls_stop(struct seq_file *p, void *v);
+static int rxrpc_proc_calls_show(struct seq_file *m, void *v);
+
+static struct seq_operations rxrpc_proc_calls_ops = {
+	.start	= rxrpc_proc_calls_start,
+	.next	= rxrpc_proc_calls_next,
+	.stop	= rxrpc_proc_calls_stop,
+	.show	= rxrpc_proc_calls_show,
+};
+
+static struct file_operations rxrpc_proc_calls_fops = {
+	.open		= rxrpc_proc_calls_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static const char *rxrpc_call_states7[] = {
+	"complet",
+	"error  ",
+	"rcv_op ",
+	"rcv_arg",
+	"got_arg",
+	"snd_rpl",
+	"fin_ack",
+	"snd_arg",
+	"rcv_rpl",
+	"got_rpl"
+};
+
+static const char *rxrpc_call_error_states7[] = {
+	"no_err ",
+	"loc_abt",
+	"rmt_abt",
+	"loc_err",
+	"rmt_err"
+};
+
+/*****************************************************************************/
+/*
+ * initialise the /proc/net/rxrpc/ directory
+ */
+int rxrpc_proc_init(void)
+{
+	struct proc_dir_entry *p;
+
+	proc_rxrpc = proc_mkdir("rxrpc", proc_net);
+	if (!proc_rxrpc)
+		goto error;
+	proc_rxrpc->owner = THIS_MODULE;
+
+	p = create_proc_entry("calls", 0, proc_rxrpc);
+	if (!p)
+		goto error_proc;
+	p->proc_fops = &rxrpc_proc_calls_fops;
+	p->owner = THIS_MODULE;
+
+	p = create_proc_entry("connections", 0, proc_rxrpc);
+	if (!p)
+		goto error_calls;
+	p->proc_fops = &rxrpc_proc_conns_fops;
+	p->owner = THIS_MODULE;
+
+	p = create_proc_entry("peers", 0, proc_rxrpc);
+	if (!p)
+		goto error_calls;
+	p->proc_fops = &rxrpc_proc_peers_fops;
+	p->owner = THIS_MODULE;
+
+	p = create_proc_entry("transports", 0, proc_rxrpc);
+	if (!p)
+		goto error_conns;
+	p->proc_fops = &rxrpc_proc_transports_fops;
+	p->owner = THIS_MODULE;
+
+	return 0;
+
+ error_conns:
+	remove_proc_entry("connections", proc_rxrpc);
+ error_calls:
+	remove_proc_entry("calls", proc_rxrpc);
+ error_proc:
+	remove_proc_entry("rxrpc", proc_net);
+ error:
+	return -ENOMEM;
+} /* end rxrpc_proc_init() */
+
+/*****************************************************************************/
+/*
+ * clean up the /proc/net/rxrpc/ directory
+ */
+void rxrpc_proc_cleanup(void)
+{
+	remove_proc_entry("transports", proc_rxrpc);
+	remove_proc_entry("peers", proc_rxrpc);
+	remove_proc_entry("connections", proc_rxrpc);
+	remove_proc_entry("calls", proc_rxrpc);
+
+	remove_proc_entry("rxrpc", proc_net);
+
+} /* end rxrpc_proc_cleanup() */
+
+/*****************************************************************************/
+/*
+ * open "/proc/net/rxrpc/transports" which provides a summary of extant transports
+ */
+static int rxrpc_proc_transports_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *m;
+	int ret;
+
+	ret = seq_open(file, &rxrpc_proc_transports_ops);
+	if (ret < 0)
+		return ret;
+
+	m = file->private_data;
+	m->private = PDE(inode)->data;
+
+	return 0;
+} /* end rxrpc_proc_transports_open() */
+
+/*****************************************************************************/
+/*
+ * set up the iterator to start reading from the transports list and return the first item
+ */
+static void *rxrpc_proc_transports_start(struct seq_file *m, loff_t *_pos)
+{
+	struct list_head *_p;
+	loff_t pos = *_pos;
+
+	/* lock the list against modification */
+	down_read(&rxrpc_proc_transports_sem);
+
+	/* allow for the header line */
+	if (!pos)
+		return SEQ_START_TOKEN;
+	pos--;
+
+	/* find the n'th element in the list */
+	list_for_each(_p, &rxrpc_proc_transports)
+		if (!pos--)
+			break;
+
+	return _p != &rxrpc_proc_transports ? _p : NULL;
+} /* end rxrpc_proc_transports_start() */
+
+/*****************************************************************************/
+/*
+ * move to next call in transports list
+ */
+static void *rxrpc_proc_transports_next(struct seq_file *p, void *v, loff_t *pos)
+{
+	struct list_head *_p;
+
+	(*pos)++;
+
+	_p = v;
+	_p = (v == SEQ_START_TOKEN) ? rxrpc_proc_transports.next : _p->next;
+
+	return _p != &rxrpc_proc_transports ? _p : NULL;
+} /* end rxrpc_proc_transports_next() */
+
+/*****************************************************************************/
+/*
+ * clean up after reading from the transports list
+ */
+static void rxrpc_proc_transports_stop(struct seq_file *p, void *v)
+{
+	up_read(&rxrpc_proc_transports_sem);
+
+} /* end rxrpc_proc_transports_stop() */
+
+/*****************************************************************************/
+/*
+ * display a header line followed by a load of call lines
+ */
+static int rxrpc_proc_transports_show(struct seq_file *m, void *v)
+{
+	struct rxrpc_transport *trans =
+		list_entry(v, struct rxrpc_transport, proc_link);
+
+	/* display header on line 1 */
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(m, "LOCAL USE\n");
+		return 0;
+	}
+
+	/* display one transport per line on subsequent lines */
+	seq_printf(m, "%5hu %3d\n",
+		   trans->port,
+		   atomic_read(&trans->usage)
+		   );
+
+	return 0;
+} /* end rxrpc_proc_transports_show() */
+
+/*****************************************************************************/
+/*
+ * open "/proc/net/rxrpc/peers" which provides a summary of extant peers
+ */
+static int rxrpc_proc_peers_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *m;
+	int ret;
+
+	ret = seq_open(file, &rxrpc_proc_peers_ops);
+	if (ret < 0)
+		return ret;
+
+	m = file->private_data;
+	m->private = PDE(inode)->data;
+
+	return 0;
+} /* end rxrpc_proc_peers_open() */
+
+/*****************************************************************************/
+/*
+ * set up the iterator to start reading from the peers list and return the
+ * first item
+ */
+static void *rxrpc_proc_peers_start(struct seq_file *m, loff_t *_pos)
+{
+	struct list_head *_p;
+	loff_t pos = *_pos;
+
+	/* lock the list against modification */
+	down_read(&rxrpc_peers_sem);
+
+	/* allow for the header line */
+	if (!pos)
+		return SEQ_START_TOKEN;
+	pos--;
+
+	/* find the n'th element in the list */
+	list_for_each(_p, &rxrpc_peers)
+		if (!pos--)
+			break;
+
+	return _p != &rxrpc_peers ? _p : NULL;
+} /* end rxrpc_proc_peers_start() */
+
+/*****************************************************************************/
+/*
+ * move to next conn in peers list
+ */
+static void *rxrpc_proc_peers_next(struct seq_file *p, void *v, loff_t *pos)
+{
+	struct list_head *_p;
+
+	(*pos)++;
+
+	_p = v;
+	_p = (v == SEQ_START_TOKEN) ? rxrpc_peers.next : _p->next;
+
+	return _p != &rxrpc_peers ? _p : NULL;
+} /* end rxrpc_proc_peers_next() */
+
+/*****************************************************************************/
+/*
+ * clean up after reading from the peers list
+ */
+static void rxrpc_proc_peers_stop(struct seq_file *p, void *v)
+{
+	up_read(&rxrpc_peers_sem);
+
+} /* end rxrpc_proc_peers_stop() */
+
+/*****************************************************************************/
+/*
+ * display a header line followed by a load of conn lines
+ */
+static int rxrpc_proc_peers_show(struct seq_file *m, void *v)
+{
+	struct rxrpc_peer *peer = list_entry(v, struct rxrpc_peer, proc_link);
+	signed long timeout;
+
+	/* display header on line 1 */
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(m, "LOCAL REMOTE   USAGE CONNS  TIMEOUT"
+			 "   MTU RTT(uS)\n");
+		return 0;
+	}
+
+	/* display one peer per line on subsequent lines */
+	timeout = 0;
+	if (!list_empty(&peer->timeout.link))
+		timeout = (signed long) peer->timeout.timo_jif -
+			(signed long) jiffies;
+
+	seq_printf(m, "%5hu %08x %5d %5d %8ld %5Zu %7lu\n",
+		   peer->trans->port,
+		   ntohl(peer->addr.s_addr),
+		   atomic_read(&peer->usage),
+		   atomic_read(&peer->conn_count),
+		   timeout,
+		   peer->if_mtu,
+		   (long) peer->rtt
+		   );
+
+	return 0;
+} /* end rxrpc_proc_peers_show() */
+
+/*****************************************************************************/
+/*
+ * open "/proc/net/rxrpc/connections" which provides a summary of extant
+ * connections
+ */
+static int rxrpc_proc_conns_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *m;
+	int ret;
+
+	ret = seq_open(file, &rxrpc_proc_conns_ops);
+	if (ret < 0)
+		return ret;
+
+	m = file->private_data;
+	m->private = PDE(inode)->data;
+
+	return 0;
+} /* end rxrpc_proc_conns_open() */
+
+/*****************************************************************************/
+/*
+ * set up the iterator to start reading from the conns list and return the
+ * first item
+ */
+static void *rxrpc_proc_conns_start(struct seq_file *m, loff_t *_pos)
+{
+	struct list_head *_p;
+	loff_t pos = *_pos;
+
+	/* lock the list against modification */
+	down_read(&rxrpc_conns_sem);
+
+	/* allow for the header line */
+	if (!pos)
+		return SEQ_START_TOKEN;
+	pos--;
+
+	/* find the n'th element in the list */
+	list_for_each(_p, &rxrpc_conns)
+		if (!pos--)
+			break;
+
+	return _p != &rxrpc_conns ? _p : NULL;
+} /* end rxrpc_proc_conns_start() */
+
+/*****************************************************************************/
+/*
+ * move to next conn in conns list
+ */
+static void *rxrpc_proc_conns_next(struct seq_file *p, void *v, loff_t *pos)
+{
+	struct list_head *_p;
+
+	(*pos)++;
+
+	_p = v;
+	_p = (v == SEQ_START_TOKEN) ? rxrpc_conns.next : _p->next;
+
+	return _p != &rxrpc_conns ? _p : NULL;
+} /* end rxrpc_proc_conns_next() */
+
+/*****************************************************************************/
+/*
+ * clean up after reading from the conns list
+ */
+static void rxrpc_proc_conns_stop(struct seq_file *p, void *v)
+{
+	up_read(&rxrpc_conns_sem);
+
+} /* end rxrpc_proc_conns_stop() */
+
+/*****************************************************************************/
+/*
+ * display a header line followed by a load of conn lines
+ */
+static int rxrpc_proc_conns_show(struct seq_file *m, void *v)
+{
+	struct rxrpc_connection *conn;
+	signed long timeout;
+
+	conn = list_entry(v, struct rxrpc_connection, proc_link);
+
+	/* display header on line 1 */
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(m,
+			 "LOCAL REMOTE   RPORT SRVC CONN     END SERIALNO "
+			 "CALLNO     MTU  TIMEOUT"
+			 "\n");
+		return 0;
+	}
+
+	/* display one conn per line on subsequent lines */
+	timeout = 0;
+	if (!list_empty(&conn->timeout.link))
+		timeout = (signed long) conn->timeout.timo_jif -
+			(signed long) jiffies;
+
+	seq_printf(m,
+		   "%5hu %08x %5hu %04hx %08x %-3.3s %08x %08x %5Zu %8ld\n",
+		   conn->trans->port,
+		   ntohl(conn->addr.sin_addr.s_addr),
+		   ntohs(conn->addr.sin_port),
+		   ntohs(conn->service_id),
+		   ntohl(conn->conn_id),
+		   conn->out_clientflag ? "CLT" : "SRV",
+		   conn->serial_counter,
+		   conn->call_counter,
+		   conn->mtu_size,
+		   timeout
+		   );
+
+	return 0;
+} /* end rxrpc_proc_conns_show() */
+
+/*****************************************************************************/
+/*
+ * open "/proc/net/rxrpc/calls" which provides a summary of extant calls
+ */
+static int rxrpc_proc_calls_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *m;
+	int ret;
+
+	ret = seq_open(file, &rxrpc_proc_calls_ops);
+	if (ret < 0)
+		return ret;
+
+	m = file->private_data;
+	m->private = PDE(inode)->data;
+
+	return 0;
+} /* end rxrpc_proc_calls_open() */
+
+/*****************************************************************************/
+/*
+ * set up the iterator to start reading from the calls list and return the
+ * first item
+ */
+static void *rxrpc_proc_calls_start(struct seq_file *m, loff_t *_pos)
+{
+	struct list_head *_p;
+	loff_t pos = *_pos;
+
+	/* lock the list against modification */
+	down_read(&rxrpc_calls_sem);
+
+	/* allow for the header line */
+	if (!pos)
+		return SEQ_START_TOKEN;
+	pos--;
+
+	/* find the n'th element in the list */
+	list_for_each(_p, &rxrpc_calls)
+		if (!pos--)
+			break;
+
+	return _p != &rxrpc_calls ? _p : NULL;
+} /* end rxrpc_proc_calls_start() */
+
+/*****************************************************************************/
+/*
+ * move to next call in calls list
+ */
+static void *rxrpc_proc_calls_next(struct seq_file *p, void *v, loff_t *pos)
+{
+	struct list_head *_p;
+
+	(*pos)++;
+
+	_p = v;
+	_p = (v == SEQ_START_TOKEN) ? rxrpc_calls.next : _p->next;
+
+	return _p != &rxrpc_calls ? _p : NULL;
+} /* end rxrpc_proc_calls_next() */
+
+/*****************************************************************************/
+/*
+ * clean up after reading from the calls list
+ */
+static void rxrpc_proc_calls_stop(struct seq_file *p, void *v)
+{
+	up_read(&rxrpc_calls_sem);
+
+} /* end rxrpc_proc_calls_stop() */
+
+/*****************************************************************************/
+/*
+ * display a header line followed by a load of call lines
+ */
+static int rxrpc_proc_calls_show(struct seq_file *m, void *v)
+{
+	struct rxrpc_call *call = list_entry(v, struct rxrpc_call, call_link);
+
+	/* display header on line 1 */
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(m,
+			 "LOCAL REMOT SRVC CONN     CALL     DIR USE "
+			 " L STATE   OPCODE ABORT    ERRNO\n"
+			 );
+		return 0;
+	}
+
+	/* display one call per line on subsequent lines */
+	seq_printf(m,
+		   "%5hu %5hu %04hx %08x %08x %s %3u%c"
+		   " %c %-7.7s %6d %08x %5d\n",
+		   call->conn->trans->port,
+		   ntohs(call->conn->addr.sin_port),
+		   ntohs(call->conn->service_id),
+		   ntohl(call->conn->conn_id),
+		   ntohl(call->call_id),
+		   call->conn->service ? "SVC" : "CLT",
+		   atomic_read(&call->usage),
+		   waitqueue_active(&call->waitq) ? 'w' : ' ',
+		   call->app_last_rcv ? 'Y' : '-',
+		   (call->app_call_state!=RXRPC_CSTATE_ERROR ?
+		    rxrpc_call_states7[call->app_call_state] :
+		    rxrpc_call_error_states7[call->app_err_state]),
+		   call->app_opcode,
+		   call->app_abort_code,
+		   call->app_errno
+		   );
+
+	return 0;
+} /* end rxrpc_proc_calls_show() */
diff --git a/net/rxrpc/rxrpc_syms.c b/net/rxrpc/rxrpc_syms.c
new file mode 100644
index 00000000000..56adf16fed0
--- /dev/null
+++ b/net/rxrpc/rxrpc_syms.c
@@ -0,0 +1,35 @@
+/* rxrpc_syms.c: exported Rx RPC layer interface symbols
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <rxrpc/transport.h>
+#include <rxrpc/connection.h>
+#include <rxrpc/call.h>
+#include <rxrpc/krxiod.h>
+
+/* call.c */
+EXPORT_SYMBOL(rxrpc_create_call);
+EXPORT_SYMBOL(rxrpc_put_call);
+EXPORT_SYMBOL(rxrpc_call_abort);
+EXPORT_SYMBOL(rxrpc_call_read_data);
+EXPORT_SYMBOL(rxrpc_call_write_data);
+
+/* connection.c */
+EXPORT_SYMBOL(rxrpc_create_connection);
+EXPORT_SYMBOL(rxrpc_put_connection);
+
+/* transport.c */
+EXPORT_SYMBOL(rxrpc_create_transport);
+EXPORT_SYMBOL(rxrpc_put_transport);
+EXPORT_SYMBOL(rxrpc_add_service);
+EXPORT_SYMBOL(rxrpc_del_service);
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
new file mode 100644
index 00000000000..fbf98729c74
--- /dev/null
+++ b/net/rxrpc/sysctl.c
@@ -0,0 +1,122 @@
+/* sysctl.c: Rx RPC control
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/sysctl.h>
+#include <rxrpc/types.h>
+#include <rxrpc/rxrpc.h>
+#include <asm/errno.h>
+#include "internal.h"
+
+int rxrpc_ktrace;
+int rxrpc_kdebug;
+int rxrpc_kproto;
+int rxrpc_knet;
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table_header *rxrpc_sysctl = NULL;
+
+static ctl_table rxrpc_sysctl_table[] = {
+        {
+		.ctl_name	= 1,
+		.procname	= "kdebug",
+		.data		= &rxrpc_kdebug,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+        {
+		.ctl_name	= 2,
+		.procname	= "ktrace",
+		.data		= &rxrpc_ktrace,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+        {
+		.ctl_name	= 3,
+		.procname	= "kproto",
+		.data		= &rxrpc_kproto,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+        {
+		.ctl_name	= 4,
+		.procname	= "knet",
+		.data		= &rxrpc_knet,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+        {
+		.ctl_name	= 5,
+		.procname	= "peertimo",
+		.data		= &rxrpc_peer_timeout,
+		.maxlen		= sizeof(unsigned long),
+		.mode		= 0644,
+		.proc_handler	= &proc_doulongvec_minmax
+	},
+        {
+		.ctl_name	= 6,
+		.procname	= "conntimo",
+		.data		= &rxrpc_conn_timeout,
+		.maxlen		= sizeof(unsigned long),
+		.mode		= 0644,
+		.proc_handler	= &proc_doulongvec_minmax
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table rxrpc_dir_sysctl_table[] = {
+	{
+		.ctl_name	= 1,
+		.procname	= "rxrpc",
+		.maxlen		= 0,
+		.mode		= 0555,
+		.child		= rxrpc_sysctl_table
+	},
+	{ .ctl_name = 0 }
+};
+#endif /* CONFIG_SYSCTL */
+
+/*****************************************************************************/
+/*
+ * initialise the sysctl stuff for Rx RPC
+ */
+int rxrpc_sysctl_init(void)
+{
+#ifdef CONFIG_SYSCTL
+	rxrpc_sysctl = register_sysctl_table(rxrpc_dir_sysctl_table, 0);
+	if (!rxrpc_sysctl)
+		return -ENOMEM;
+#endif /* CONFIG_SYSCTL */
+
+	return 0;
+} /* end rxrpc_sysctl_init() */
+
+/*****************************************************************************/
+/*
+ * clean up the sysctl stuff for Rx RPC
+ */
+void rxrpc_sysctl_cleanup(void)
+{
+#ifdef CONFIG_SYSCTL
+	if (rxrpc_sysctl) {
+		unregister_sysctl_table(rxrpc_sysctl);
+		rxrpc_sysctl = NULL;
+	}
+#endif /* CONFIG_SYSCTL */
+
+} /* end rxrpc_sysctl_cleanup() */
diff --git a/net/rxrpc/transport.c b/net/rxrpc/transport.c
new file mode 100644
index 00000000000..9bce7794130
--- /dev/null
+++ b/net/rxrpc/transport.c
@@ -0,0 +1,854 @@
+/* transport.c: Rx Transport routines
+ *
+ * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <rxrpc/transport.h>
+#include <rxrpc/peer.h>
+#include <rxrpc/connection.h>
+#include <rxrpc/call.h>
+#include <rxrpc/message.h>
+#include <rxrpc/krxiod.h>
+#include <rxrpc/krxsecd.h>
+#include <linux/udp.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/icmp.h>
+#include <net/sock.h>
+#include <net/ip.h>
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#include <linux/ipv6.h>	/* this should _really_ be in errqueue.h.. */
+#endif
+#include <linux/errqueue.h>
+#include <asm/uaccess.h>
+#include <asm/checksum.h>
+#include "internal.h"
+
+struct errormsg {
+	struct cmsghdr			cmsg;		/* control message header */
+	struct sock_extended_err	ee;		/* extended error information */
+	struct sockaddr_in		icmp_src;	/* ICMP packet source address */
+};
+
+static DEFINE_SPINLOCK(rxrpc_transports_lock);
+static struct list_head rxrpc_transports = LIST_HEAD_INIT(rxrpc_transports);
+
+__RXACCT_DECL(atomic_t rxrpc_transport_count);
+LIST_HEAD(rxrpc_proc_transports);
+DECLARE_RWSEM(rxrpc_proc_transports_sem);
+
+static void rxrpc_data_ready(struct sock *sk, int count);
+static void rxrpc_error_report(struct sock *sk);
+static int rxrpc_trans_receive_new_call(struct rxrpc_transport *trans,
+					struct list_head *msgq);
+static void rxrpc_trans_receive_error_report(struct rxrpc_transport *trans);
+
+/*****************************************************************************/
+/*
+ * create a new transport endpoint using the specified UDP port
+ */
+int rxrpc_create_transport(unsigned short port,
+			   struct rxrpc_transport **_trans)
+{
+	struct rxrpc_transport *trans;
+	struct sockaddr_in sin;
+	mm_segment_t oldfs;
+	struct sock *sock;
+	int ret, opt;
+
+	_enter("%hu", port);
+
+	trans = kmalloc(sizeof(struct rxrpc_transport), GFP_KERNEL);
+	if (!trans)
+		return -ENOMEM;
+
+	memset(trans, 0, sizeof(struct rxrpc_transport));
+	atomic_set(&trans->usage, 1);
+	INIT_LIST_HEAD(&trans->services);
+	INIT_LIST_HEAD(&trans->link);
+	INIT_LIST_HEAD(&trans->krxiodq_link);
+	spin_lock_init(&trans->lock);
+	INIT_LIST_HEAD(&trans->peer_active);
+	INIT_LIST_HEAD(&trans->peer_graveyard);
+	spin_lock_init(&trans->peer_gylock);
+	init_waitqueue_head(&trans->peer_gy_waitq);
+	rwlock_init(&trans->peer_lock);
+	atomic_set(&trans->peer_count, 0);
+	trans->port = port;
+
+	/* create a UDP socket to be my actual transport endpoint */
+	ret = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &trans->socket);
+	if (ret < 0)
+		goto error;
+
+	/* use the specified port */
+	if (port) {
+		memset(&sin, 0, sizeof(sin));
+		sin.sin_family = AF_INET;
+		sin.sin_port = htons(port);
+		ret = trans->socket->ops->bind(trans->socket,
+					       (struct sockaddr *) &sin,
+					       sizeof(sin));
+		if (ret < 0)
+			goto error;
+	}
+
+	opt = 1;
+	oldfs = get_fs();
+	set_fs(KERNEL_DS);
+	ret = trans->socket->ops->setsockopt(trans->socket, SOL_IP, IP_RECVERR,
+					     (char *) &opt, sizeof(opt));
+	set_fs(oldfs);
+
+	spin_lock(&rxrpc_transports_lock);
+	list_add(&trans->link, &rxrpc_transports);
+	spin_unlock(&rxrpc_transports_lock);
+
+	/* set the socket up */
+	sock = trans->socket->sk;
+	sock->sk_user_data	= trans;
+	sock->sk_data_ready	= rxrpc_data_ready;
+	sock->sk_error_report	= rxrpc_error_report;
+
+	down_write(&rxrpc_proc_transports_sem);
+	list_add_tail(&trans->proc_link, &rxrpc_proc_transports);
+	up_write(&rxrpc_proc_transports_sem);
+
+	__RXACCT(atomic_inc(&rxrpc_transport_count));
+
+	*_trans = trans;
+	_leave(" = 0 (%p)", trans);
+	return 0;
+
+ error:
+	/* finish cleaning up the transport (not really needed here, but...) */
+	if (trans->socket)
+		trans->socket->ops->shutdown(trans->socket, 2);
+
+	/* close the socket */
+	if (trans->socket) {
+		trans->socket->sk->sk_user_data = NULL;
+		sock_release(trans->socket);
+		trans->socket = NULL;
+	}
+
+	kfree(trans);
+
+
+	_leave(" = %d", ret);
+	return ret;
+} /* end rxrpc_create_transport() */
+
+/*****************************************************************************/
+/*
+ * destroy a transport endpoint
+ */
+void rxrpc_put_transport(struct rxrpc_transport *trans)
+{
+	_enter("%p{u=%d p=%hu}",
+	       trans, atomic_read(&trans->usage), trans->port);
+
+	BUG_ON(atomic_read(&trans->usage) <= 0);
+
+	/* to prevent a race, the decrement and the dequeue must be
+	 * effectively atomic */
+	spin_lock(&rxrpc_transports_lock);
+	if (likely(!atomic_dec_and_test(&trans->usage))) {
+		spin_unlock(&rxrpc_transports_lock);
+		_leave("");
+		return;
+	}
+
+	list_del(&trans->link);
+	spin_unlock(&rxrpc_transports_lock);
+
+	/* finish cleaning up the transport */
+	if (trans->socket)
+		trans->socket->ops->shutdown(trans->socket, 2);
+
+	rxrpc_krxsecd_clear_transport(trans);
+	rxrpc_krxiod_dequeue_transport(trans);
+
+	/* discard all peer information */
+	rxrpc_peer_clearall(trans);
+
+	down_write(&rxrpc_proc_transports_sem);
+	list_del(&trans->proc_link);
+	up_write(&rxrpc_proc_transports_sem);
+	__RXACCT(atomic_dec(&rxrpc_transport_count));
+
+	/* close the socket */
+	if (trans->socket) {
+		trans->socket->sk->sk_user_data = NULL;
+		sock_release(trans->socket);
+		trans->socket = NULL;
+	}
+
+	kfree(trans);
+
+	_leave("");
+} /* end rxrpc_put_transport() */
+
+/*****************************************************************************/
+/*
+ * add a service to a transport to be listened upon
+ */
+int rxrpc_add_service(struct rxrpc_transport *trans,
+		      struct rxrpc_service *newsrv)
+{
+	struct rxrpc_service *srv;
+	struct list_head *_p;
+	int ret = -EEXIST;
+
+	_enter("%p{%hu},%p{%hu}",
+	       trans, trans->port, newsrv, newsrv->service_id);
+
+	/* verify that the service ID is not already present */
+	spin_lock(&trans->lock);
+
+	list_for_each(_p, &trans->services) {
+		srv = list_entry(_p, struct rxrpc_service, link);
+		if (srv->service_id == newsrv->service_id)
+			goto out;
+	}
+
+	/* okay - add the transport to the list */
+	list_add_tail(&newsrv->link, &trans->services);
+	rxrpc_get_transport(trans);
+	ret = 0;
+
+ out:
+	spin_unlock(&trans->lock);
+
+	_leave("= %d", ret);
+	return ret;
+} /* end rxrpc_add_service() */
+
+/*****************************************************************************/
+/*
+ * remove a service from a transport
+ */
+void rxrpc_del_service(struct rxrpc_transport *trans, struct rxrpc_service *srv)
+{
+	_enter("%p{%hu},%p{%hu}", trans, trans->port, srv, srv->service_id);
+
+	spin_lock(&trans->lock);
+	list_del(&srv->link);
+	spin_unlock(&trans->lock);
+
+	rxrpc_put_transport(trans);
+
+	_leave("");
+} /* end rxrpc_del_service() */
+
+/*****************************************************************************/
+/*
+ * INET callback when data has been received on the socket.
+ */
+static void rxrpc_data_ready(struct sock *sk, int count)
+{
+	struct rxrpc_transport *trans;
+
+	_enter("%p{t=%p},%d", sk, sk->sk_user_data, count);
+
+	/* queue the transport for attention by krxiod */
+	trans = (struct rxrpc_transport *) sk->sk_user_data;
+	if (trans)
+		rxrpc_krxiod_queue_transport(trans);
+
+	/* wake up anyone waiting on the socket */
+	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+		wake_up_interruptible(sk->sk_sleep);
+
+	_leave("");
+} /* end rxrpc_data_ready() */
+
+/*****************************************************************************/
+/*
+ * INET callback when an ICMP error packet is received
+ * - sk->err is error (EHOSTUNREACH, EPROTO or EMSGSIZE)
+ */
+static void rxrpc_error_report(struct sock *sk)
+{
+	struct rxrpc_transport *trans;
+
+	_enter("%p{t=%p}", sk, sk->sk_user_data);
+
+	/* queue the transport for attention by krxiod */
+	trans = (struct rxrpc_transport *) sk->sk_user_data;
+	if (trans) {
+		trans->error_rcvd = 1;
+		rxrpc_krxiod_queue_transport(trans);
+	}
+
+	/* wake up anyone waiting on the socket */
+	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+		wake_up_interruptible(sk->sk_sleep);
+
+	_leave("");
+} /* end rxrpc_error_report() */
+
+/*****************************************************************************/
+/*
+ * split a message up, allocating message records and filling them in
+ * from the contents of a socket buffer
+ */
+static int rxrpc_incoming_msg(struct rxrpc_transport *trans,
+			      struct sk_buff *pkt,
+			      struct list_head *msgq)
+{
+	struct rxrpc_message *msg;
+	int ret;
+
+	_enter("");
+
+	msg = kmalloc(sizeof(struct rxrpc_message), GFP_KERNEL);
+	if (!msg) {
+		_leave(" = -ENOMEM");
+		return -ENOMEM;
+	}
+
+	memset(msg, 0, sizeof(*msg));
+	atomic_set(&msg->usage, 1);
+	list_add_tail(&msg->link,msgq);
+
+	/* dig out the Rx routing parameters */
+	if (skb_copy_bits(pkt, sizeof(struct udphdr),
+			  &msg->hdr, sizeof(msg->hdr)) < 0) {
+		ret = -EBADMSG;
+		goto error;
+	}
+
+	msg->trans = trans;
+	msg->state = RXRPC_MSG_RECEIVED;
+	msg->stamp = pkt->stamp;
+	if (msg->stamp.tv_sec == 0) {
+		do_gettimeofday(&msg->stamp); 
+		if (pkt->sk) 
+			sock_enable_timestamp(pkt->sk);
+	} 
+	msg->seq = ntohl(msg->hdr.seq);
+
+	/* attach the packet */
+	skb_get(pkt);
+	msg->pkt = pkt;
+
+	msg->offset = sizeof(struct udphdr) + sizeof(struct rxrpc_header);
+	msg->dsize = msg->pkt->len - msg->offset;
+
+	_net("Rx Received packet from %s (%08x;%08x,%1x,%d,%s,%02x,%d,%d)",
+	     msg->hdr.flags & RXRPC_CLIENT_INITIATED ? "client" : "server",
+	     ntohl(msg->hdr.epoch),
+	     (ntohl(msg->hdr.cid) & RXRPC_CIDMASK) >> RXRPC_CIDSHIFT,
+	     ntohl(msg->hdr.cid) & RXRPC_CHANNELMASK,
+	     ntohl(msg->hdr.callNumber),
+	     rxrpc_pkts[msg->hdr.type],
+	     msg->hdr.flags,
+	     ntohs(msg->hdr.serviceId),
+	     msg->hdr.securityIndex);
+
+	__RXACCT(atomic_inc(&rxrpc_message_count));
+
+	/* split off jumbo packets */
+	while (msg->hdr.type == RXRPC_PACKET_TYPE_DATA &&
+	       msg->hdr.flags & RXRPC_JUMBO_PACKET
+	       ) {
+		struct rxrpc_jumbo_header jumbo;
+		struct rxrpc_message *jumbomsg = msg;
+
+		_debug("split jumbo packet");
+
+		/* quick sanity check */
+		ret = -EBADMSG;
+		if (msg->dsize <
+		    RXRPC_JUMBO_DATALEN + sizeof(struct rxrpc_jumbo_header))
+			goto error;
+		if (msg->hdr.flags & RXRPC_LAST_PACKET)
+			goto error;
+
+		/* dig out the secondary header */
+		if (skb_copy_bits(pkt, msg->offset + RXRPC_JUMBO_DATALEN,
+				  &jumbo, sizeof(jumbo)) < 0)
+			goto error;
+
+		/* allocate a new message record */
+		ret = -ENOMEM;
+		msg = kmalloc(sizeof(struct rxrpc_message), GFP_KERNEL);
+		if (!msg)
+			goto error;
+
+		memcpy(msg, jumbomsg, sizeof(*msg));
+		list_add_tail(&msg->link, msgq);
+
+		/* adjust the jumbo packet */
+		jumbomsg->dsize = RXRPC_JUMBO_DATALEN;
+
+		/* attach the packet here too */
+		skb_get(pkt);
+
+		/* adjust the parameters */
+		msg->seq++;
+		msg->hdr.seq = htonl(msg->seq);
+		msg->hdr.serial = htonl(ntohl(msg->hdr.serial) + 1);
+		msg->offset += RXRPC_JUMBO_DATALEN +
+			sizeof(struct rxrpc_jumbo_header);
+		msg->dsize -= RXRPC_JUMBO_DATALEN +
+			sizeof(struct rxrpc_jumbo_header);
+		msg->hdr.flags = jumbo.flags;
+		msg->hdr._rsvd = jumbo._rsvd;
+
+		_net("Rx Split jumbo packet from %s"
+		     " (%08x;%08x,%1x,%d,%s,%02x,%d,%d)",
+		     msg->hdr.flags & RXRPC_CLIENT_INITIATED ? "client" : "server",
+		     ntohl(msg->hdr.epoch),
+		     (ntohl(msg->hdr.cid) & RXRPC_CIDMASK) >> RXRPC_CIDSHIFT,
+		     ntohl(msg->hdr.cid) & RXRPC_CHANNELMASK,
+		     ntohl(msg->hdr.callNumber),
+		     rxrpc_pkts[msg->hdr.type],
+		     msg->hdr.flags,
+		     ntohs(msg->hdr.serviceId),
+		     msg->hdr.securityIndex);
+
+		__RXACCT(atomic_inc(&rxrpc_message_count));
+	}
+
+	_leave(" = 0 #%d", atomic_read(&rxrpc_message_count));
+	return 0;
+
+ error:
+	while (!list_empty(msgq)) {
+		msg = list_entry(msgq->next, struct rxrpc_message, link);
+		list_del_init(&msg->link);
+
+		rxrpc_put_message(msg);
+	}
+
+	_leave(" = %d", ret);
+	return ret;
+} /* end rxrpc_incoming_msg() */
+
+/*****************************************************************************/
+/*
+ * accept a new call
+ * - called from krxiod in process context
+ */
+void rxrpc_trans_receive_packet(struct rxrpc_transport *trans)
+{
+	struct rxrpc_message *msg;
+	struct rxrpc_peer *peer;
+	struct sk_buff *pkt;
+	int ret;
+	__be32 addr;
+	__be16 port;
+
+	LIST_HEAD(msgq);
+
+	_enter("%p{%d}", trans, trans->port);
+
+	for (;;) {
+		/* deal with outstanting errors first */
+		if (trans->error_rcvd)
+			rxrpc_trans_receive_error_report(trans);
+
+		/* attempt to receive a packet */
+		pkt = skb_recv_datagram(trans->socket->sk, 0, 1, &ret);
+		if (!pkt) {
+			if (ret == -EAGAIN) {
+				_leave(" EAGAIN");
+				return;
+			}
+
+			/* an icmp error may have occurred */
+			rxrpc_krxiod_queue_transport(trans);
+			_leave(" error %d\n", ret);
+			return;
+		}
+
+		/* we'll probably need to checksum it (didn't call
+		 * sock_recvmsg) */
+		if (pkt->ip_summed != CHECKSUM_UNNECESSARY) {
+			if ((unsigned short)
+			    csum_fold(skb_checksum(pkt, 0, pkt->len,
+						   pkt->csum))) {
+				kfree_skb(pkt);
+				rxrpc_krxiod_queue_transport(trans);
+				_leave(" CSUM failed");
+				return;
+			}
+		}
+
+		addr = pkt->nh.iph->saddr;
+		port = pkt->h.uh->source;
+
+		_net("Rx Received UDP packet from %08x:%04hu",
+		     ntohl(addr), ntohs(port));
+
+		/* unmarshall the Rx parameters and split jumbo packets */
+		ret = rxrpc_incoming_msg(trans, pkt, &msgq);
+		if (ret < 0) {
+			kfree_skb(pkt);
+			rxrpc_krxiod_queue_transport(trans);
+			_leave(" bad packet");
+			return;
+		}
+
+		BUG_ON(list_empty(&msgq));
+
+		msg = list_entry(msgq.next, struct rxrpc_message, link);
+
+		/* locate the record for the peer from which it
+		 * originated */
+		ret = rxrpc_peer_lookup(trans, addr, &peer);
+		if (ret < 0) {
+			kdebug("Rx No connections from that peer");
+			rxrpc_trans_immediate_abort(trans, msg, -EINVAL);
+			goto finished_msg;
+		}
+
+		/* try and find a matching connection */
+		ret = rxrpc_connection_lookup(peer, msg, &msg->conn);
+		if (ret < 0) {
+			kdebug("Rx Unknown Connection");
+			rxrpc_trans_immediate_abort(trans, msg, -EINVAL);
+			rxrpc_put_peer(peer);
+			goto finished_msg;
+		}
+		rxrpc_put_peer(peer);
+
+		/* deal with the first packet of a new call */
+		if (msg->hdr.flags & RXRPC_CLIENT_INITIATED &&
+		    msg->hdr.type == RXRPC_PACKET_TYPE_DATA &&
+		    ntohl(msg->hdr.seq) == 1
+		    ) {
+			_debug("Rx New server call");
+			rxrpc_trans_receive_new_call(trans, &msgq);
+			goto finished_msg;
+		}
+
+		/* deal with subsequent packet(s) of call */
+		_debug("Rx Call packet");
+		while (!list_empty(&msgq)) {
+			msg = list_entry(msgq.next, struct rxrpc_message, link);
+			list_del_init(&msg->link);
+
+			ret = rxrpc_conn_receive_call_packet(msg->conn, NULL, msg);
+			if (ret < 0) {
+				rxrpc_trans_immediate_abort(trans, msg, ret);
+				rxrpc_put_message(msg);
+				goto finished_msg;
+			}
+
+			rxrpc_put_message(msg);
+		}
+
+		goto finished_msg;
+
+		/* dispose of the packets */
+	finished_msg:
+		while (!list_empty(&msgq)) {
+			msg = list_entry(msgq.next, struct rxrpc_message, link);
+			list_del_init(&msg->link);
+
+			rxrpc_put_message(msg);
+		}
+		kfree_skb(pkt);
+	}
+
+	_leave("");
+
+} /* end rxrpc_trans_receive_packet() */
+
+/*****************************************************************************/
+/*
+ * accept a new call from a client trying to connect to one of my services
+ * - called in process context
+ */
+static int rxrpc_trans_receive_new_call(struct rxrpc_transport *trans,
+					struct list_head *msgq)
+{
+	struct rxrpc_message *msg;
+
+	_enter("");
+
+	/* only bother with the first packet */
+	msg = list_entry(msgq->next, struct rxrpc_message, link);
+	list_del_init(&msg->link);
+	rxrpc_krxsecd_queue_incoming_call(msg);
+	rxrpc_put_message(msg);
+
+	_leave(" = 0");
+
+	return 0;
+} /* end rxrpc_trans_receive_new_call() */
+
+/*****************************************************************************/
+/*
+ * perform an immediate abort without connection or call structures
+ */
+int rxrpc_trans_immediate_abort(struct rxrpc_transport *trans,
+				struct rxrpc_message *msg,
+				int error)
+{
+	struct rxrpc_header ahdr;
+	struct sockaddr_in sin;
+	struct msghdr msghdr;
+	struct kvec iov[2];
+	__be32 _error;
+	int len, ret;
+
+	_enter("%p,%p,%d", trans, msg, error);
+
+	/* don't abort an abort packet */
+	if (msg->hdr.type == RXRPC_PACKET_TYPE_ABORT) {
+		_leave(" = 0");
+		return 0;
+	}
+
+	_error = htonl(-error);
+
+	/* set up the message to be transmitted */
+	memcpy(&ahdr, &msg->hdr, sizeof(ahdr));
+	ahdr.epoch	= msg->hdr.epoch;
+	ahdr.serial	= htonl(1);
+	ahdr.seq	= 0;
+	ahdr.type	= RXRPC_PACKET_TYPE_ABORT;
+	ahdr.flags	= RXRPC_LAST_PACKET;
+	ahdr.flags	|= ~msg->hdr.flags & RXRPC_CLIENT_INITIATED;
+
+	iov[0].iov_len	= sizeof(ahdr);
+	iov[0].iov_base	= &ahdr;
+	iov[1].iov_len	= sizeof(_error);
+	iov[1].iov_base	= &_error;
+
+	len = sizeof(ahdr) + sizeof(_error);
+
+	memset(&sin,0,sizeof(sin));
+	sin.sin_family		= AF_INET;
+	sin.sin_port		= msg->pkt->h.uh->source;
+	sin.sin_addr.s_addr	= msg->pkt->nh.iph->saddr;
+
+	msghdr.msg_name		= &sin;
+	msghdr.msg_namelen	= sizeof(sin);
+	msghdr.msg_control	= NULL;
+	msghdr.msg_controllen	= 0;
+	msghdr.msg_flags	= MSG_DONTWAIT;
+
+	_net("Sending message type %d of %d bytes to %08x:%d",
+	     ahdr.type,
+	     len,
+	     ntohl(sin.sin_addr.s_addr),
+	     ntohs(sin.sin_port));
+
+	/* send the message */
+	ret = kernel_sendmsg(trans->socket, &msghdr, iov, 2, len);
+
+	_leave(" = %d", ret);
+	return ret;
+} /* end rxrpc_trans_immediate_abort() */
+
+/*****************************************************************************/
+/*
+ * receive an ICMP error report and percolate it to all connections
+ * heading to the affected host or port
+ */
+static void rxrpc_trans_receive_error_report(struct rxrpc_transport *trans)
+{
+	struct rxrpc_connection *conn;
+	struct sockaddr_in sin;
+	struct rxrpc_peer *peer;
+	struct list_head connq, *_p;
+	struct errormsg emsg;
+	struct msghdr msg;
+	__be16 port;
+	int local, err;
+
+	_enter("%p", trans);
+
+	for (;;) {
+		trans->error_rcvd = 0;
+
+		/* try and receive an error message */
+		msg.msg_name	= &sin;
+		msg.msg_namelen	= sizeof(sin);
+		msg.msg_control	= &emsg;
+		msg.msg_controllen = sizeof(emsg);
+		msg.msg_flags	= 0;
+
+		err = kernel_recvmsg(trans->socket, &msg, NULL, 0, 0,
+				   MSG_ERRQUEUE | MSG_DONTWAIT | MSG_TRUNC);
+
+		if (err == -EAGAIN) {
+			_leave("");
+			return;
+		}
+
+		if (err < 0) {
+			printk("%s: unable to recv an error report: %d\n",
+			       __FUNCTION__, err);
+			_leave("");
+			return;
+		}
+
+		msg.msg_controllen = (char *) msg.msg_control - (char *) &emsg;
+
+		if (msg.msg_controllen < sizeof(emsg.cmsg) ||
+		    msg.msg_namelen < sizeof(sin)) {
+			printk("%s: short control message"
+			       " (nlen=%u clen=%Zu fl=%x)\n",
+			       __FUNCTION__,
+			       msg.msg_namelen,
+			       msg.msg_controllen,
+			       msg.msg_flags);
+			continue;
+		}
+
+		_net("Rx Received control message"
+		     " { len=%Zu level=%u type=%u }",
+		     emsg.cmsg.cmsg_len,
+		     emsg.cmsg.cmsg_level,
+		     emsg.cmsg.cmsg_type);
+
+		if (sin.sin_family != AF_INET) {
+			printk("Rx Ignoring error report with non-INET address"
+			       " (fam=%u)",
+			       sin.sin_family);
+			continue;
+		}
+
+		_net("Rx Received message pertaining to host addr=%x port=%hu",
+		     ntohl(sin.sin_addr.s_addr), ntohs(sin.sin_port));
+
+		if (emsg.cmsg.cmsg_level != SOL_IP ||
+		    emsg.cmsg.cmsg_type != IP_RECVERR) {
+			printk("Rx Ignoring unknown error report"
+			       " { level=%u type=%u }",
+			       emsg.cmsg.cmsg_level,
+			       emsg.cmsg.cmsg_type);
+			continue;
+		}
+
+		if (msg.msg_controllen < sizeof(emsg.cmsg) + sizeof(emsg.ee)) {
+			printk("%s: short error message (%Zu)\n",
+			       __FUNCTION__, msg.msg_controllen);
+			_leave("");
+			return;
+		}
+
+		port = sin.sin_port;
+
+		switch (emsg.ee.ee_origin) {
+		case SO_EE_ORIGIN_ICMP:
+			local = 0;
+			switch (emsg.ee.ee_type) {
+			case ICMP_DEST_UNREACH:
+				switch (emsg.ee.ee_code) {
+				case ICMP_NET_UNREACH:
+					_net("Rx Received ICMP Network Unreachable");
+					port = 0;
+					err = -ENETUNREACH;
+					break;
+				case ICMP_HOST_UNREACH:
+					_net("Rx Received ICMP Host Unreachable");
+					port = 0;
+					err = -EHOSTUNREACH;
+					break;
+				case ICMP_PORT_UNREACH:
+					_net("Rx Received ICMP Port Unreachable");
+					err = -ECONNREFUSED;
+					break;
+				case ICMP_NET_UNKNOWN:
+					_net("Rx Received ICMP Unknown Network");
+					port = 0;
+					err = -ENETUNREACH;
+					break;
+				case ICMP_HOST_UNKNOWN:
+					_net("Rx Received ICMP Unknown Host");
+					port = 0;
+					err = -EHOSTUNREACH;
+					break;
+				default:
+					_net("Rx Received ICMP DestUnreach { code=%u }",
+					     emsg.ee.ee_code);
+					err = emsg.ee.ee_errno;
+					break;
+				}
+				break;
+
+			case ICMP_TIME_EXCEEDED:
+				_net("Rx Received ICMP TTL Exceeded");
+				err = emsg.ee.ee_errno;
+				break;
+
+			default:
+				_proto("Rx Received ICMP error { type=%u code=%u }",
+				       emsg.ee.ee_type, emsg.ee.ee_code);
+				err = emsg.ee.ee_errno;
+				break;
+			}
+			break;
+
+		case SO_EE_ORIGIN_LOCAL:
+			_proto("Rx Received local error { error=%d }",
+			       emsg.ee.ee_errno);
+			local = 1;
+			err = emsg.ee.ee_errno;
+			break;
+
+		case SO_EE_ORIGIN_NONE:
+		case SO_EE_ORIGIN_ICMP6:
+		default:
+			_proto("Rx Received error report { orig=%u }",
+			       emsg.ee.ee_origin);
+			local = 0;
+			err = emsg.ee.ee_errno;
+			break;
+		}
+
+		/* find all the connections between this transport and the
+		 * affected destination */
+		INIT_LIST_HEAD(&connq);
+
+		if (rxrpc_peer_lookup(trans, sin.sin_addr.s_addr,
+				      &peer) == 0) {
+			read_lock(&peer->conn_lock);
+			list_for_each(_p, &peer->conn_active) {
+				conn = list_entry(_p, struct rxrpc_connection,
+						  link);
+				if (port && conn->addr.sin_port != port)
+					continue;
+				if (!list_empty(&conn->err_link))
+					continue;
+
+				rxrpc_get_connection(conn);
+				list_add_tail(&conn->err_link, &connq);
+			}
+			read_unlock(&peer->conn_lock);
+
+			/* service all those connections */
+			while (!list_empty(&connq)) {
+				conn = list_entry(connq.next,
+						  struct rxrpc_connection,
+						  err_link);
+				list_del(&conn->err_link);
+
+				rxrpc_conn_handle_error(conn, local, err);
+
+				rxrpc_put_connection(conn);
+			}
+
+			rxrpc_put_peer(peer);
+		}
+	}
+
+	_leave("");
+	return;
+} /* end rxrpc_trans_receive_error_report() */
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
new file mode 100644
index 00000000000..3d1d902dd1a
--- /dev/null
+++ b/net/sched/Kconfig
@@ -0,0 +1,508 @@
+#
+# Traffic control configuration.
+# 
+choice
+	prompt "Packet scheduler clock source"
+	depends on NET_SCHED
+	default NET_SCH_CLK_JIFFIES
+	help
+	  Packet schedulers need a monotonic clock that increments at a static
+	  rate. The kernel provides several suitable interfaces, each with
+	  different properties:
+	  
+	  - high resolution (us or better)
+	  - fast to read (minimal locking, no i/o access)
+	  - synchronized on all processors
+	  - handles cpu clock frequency changes
+
+	  but nothing provides all of the above.
+
+config NET_SCH_CLK_JIFFIES
+	bool "Timer interrupt"
+	help
+	  Say Y here if you want to use the timer interrupt (jiffies) as clock
+	  source. This clock source is fast, synchronized on all processors and
+	  handles cpu clock frequency changes, but its resolution is too low
+	  for accurate shaping except at very low speed.
+
+config NET_SCH_CLK_GETTIMEOFDAY
+	bool "gettimeofday"
+	help
+	  Say Y here if you want to use gettimeofday as clock source. This clock
+	  source has high resolution, is synchronized on all processors and
+	  handles cpu clock frequency changes, but it is slow.
+
+	  Choose this if you need a high resolution clock source but can't use
+	  the CPU's cycle counter.
+
+config NET_SCH_CLK_CPU
+	bool "CPU cycle counter"
+	depends on X86_TSC || X86_64 || ALPHA || SPARC64 || PPC64 || IA64
+	help
+	  Say Y here if you want to use the CPU's cycle counter as clock source.
+	  This is a cheap and high resolution clock source, but on some
+	  architectures it is not synchronized on all processors and doesn't
+	  handle cpu clock frequency changes.
+
+	  The useable cycle counters are:
+
+	  	x86/x86_64	- Timestamp Counter
+		alpha		- Cycle Counter
+		sparc64		- %ticks register
+		ppc64		- Time base
+		ia64		- Interval Time Counter
+
+	  Choose this if your CPU's cycle counter is working properly.
+
+endchoice
+
+config NET_SCH_CBQ
+	tristate "CBQ packet scheduler"
+	depends on NET_SCHED
+	---help---
+	  Say Y here if you want to use the Class-Based Queueing (CBQ) packet
+	  scheduling algorithm for some of your network devices.  This
+	  algorithm classifies the waiting packets into a tree-like hierarchy
+	  of classes; the leaves of this tree are in turn scheduled by
+	  separate algorithms (called "disciplines" in this context).
+
+	  See the top of <file:net/sched/sch_cbq.c> for references about the
+	  CBQ algorithm.
+
+	  CBQ is a commonly used scheduler, so if you're unsure, you should
+	  say Y here. Then say Y to all the queueing algorithms below that you
+	  want to use as CBQ disciplines.  Then say Y to "Packet classifier
+	  API" and say Y to all the classifiers you want to use; a classifier
+	  is a routine that allows you to sort your outgoing traffic into
+	  classes based on a certain criterion.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_cbq.
+
+config NET_SCH_HTB
+	tristate "HTB packet scheduler"
+	depends on NET_SCHED
+	---help---
+	  Say Y here if you want to use the Hierarchical Token Buckets (HTB)
+	  packet scheduling algorithm for some of your network devices. See
+	  <http://luxik.cdi.cz/~devik/qos/htb/> for complete manual and
+	  in-depth articles.
+
+	  HTB is very similar to the CBQ regarding its goals however is has 
+	  different properties and different algorithm.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_htb.
+
+config NET_SCH_HFSC
+	tristate "HFSC packet scheduler"
+	depends on NET_SCHED
+	---help---
+	  Say Y here if you want to use the Hierarchical Fair Service Curve
+	  (HFSC) packet scheduling algorithm for some of your network devices.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_hfsc.
+
+#tristate '  H-PFQ packet scheduler' CONFIG_NET_SCH_HPFQ
+config NET_SCH_ATM
+	tristate "ATM pseudo-scheduler"
+	depends on NET_SCHED && ATM
+	---help---
+	  Say Y here if you want to use the ATM pseudo-scheduler.  This
+	  provides a framework for invoking classifiers (aka "filters"), which
+	  in turn select classes of this queuing discipline.  Each class maps
+	  the flow(s) it is handling to a given virtual circuit (see the top of
+	  <file:net/sched/sch_atm.c>).
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_atm.
+
+config NET_SCH_PRIO
+	tristate "The simplest PRIO pseudoscheduler"
+	depends on NET_SCHED
+	help
+	  Say Y here if you want to use an n-band priority queue packet
+	  "scheduler" for some of your network devices or as a leaf discipline
+	  for the CBQ scheduling algorithm. If unsure, say Y.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_prio.
+
+config NET_SCH_RED
+	tristate "RED queue"
+	depends on NET_SCHED
+	help
+	  Say Y here if you want to use the Random Early Detection (RED)
+	  packet scheduling algorithm for some of your network devices (see
+	  the top of <file:net/sched/sch_red.c> for details and references
+	  about the algorithm).
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_red.
+
+config NET_SCH_SFQ
+	tristate "SFQ queue"
+	depends on NET_SCHED
+	---help---
+	  Say Y here if you want to use the Stochastic Fairness Queueing (SFQ)
+	  packet scheduling algorithm for some of your network devices or as a
+	  leaf discipline for the CBQ scheduling algorithm (see the top of
+	  <file:net/sched/sch_sfq.c> for details and references about the SFQ
+	  algorithm).
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_sfq.
+
+config NET_SCH_TEQL
+	tristate "TEQL queue"
+	depends on NET_SCHED
+	---help---
+	  Say Y here if you want to use the True Link Equalizer (TLE) packet
+	  scheduling algorithm for some of your network devices or as a leaf
+	  discipline for the CBQ scheduling algorithm. This queueing
+	  discipline allows the combination of several physical devices into
+	  one virtual device. (see the top of <file:net/sched/sch_teql.c> for
+	  details).
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_teql.
+
+config NET_SCH_TBF
+	tristate "TBF queue"
+	depends on NET_SCHED
+	help
+	  Say Y here if you want to use the Simple Token Bucket Filter (TBF)
+	  packet scheduling algorithm for some of your network devices or as a
+	  leaf discipline for the CBQ scheduling algorithm (see the top of
+	  <file:net/sched/sch_tbf.c> for a description of the TBF algorithm).
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_tbf.
+
+config NET_SCH_GRED
+	tristate "GRED queue"
+	depends on NET_SCHED
+	help
+	  Say Y here if you want to use the Generic Random Early Detection
+	  (RED) packet scheduling algorithm for some of your network devices
+	  (see the top of <file:net/sched/sch_red.c> for details and
+	  references about the algorithm).
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_gred.
+
+config NET_SCH_DSMARK
+	tristate "Diffserv field marker"
+	depends on NET_SCHED
+	help
+	  Say Y if you want to schedule packets according to the
+	  Differentiated Services architecture proposed in RFC 2475.
+	  Technical information on this method, with pointers to associated
+	  RFCs, is available at <http://www.gta.ufrj.br/diffserv/>.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_dsmark.
+
+config NET_SCH_NETEM
+	tristate "Network emulator"
+	depends on NET_SCHED
+	help
+	  Say Y if you want to emulate network delay, loss, and packet
+	  re-ordering. This is often useful to simulate networks when
+	  testing applications or protocols.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called sch_netem.
+
+	  If unsure, say N.
+
+config NET_SCH_INGRESS
+	tristate "Ingress Qdisc"
+	depends on NET_SCHED 
+	help
+	  If you say Y here, you will be able to police incoming bandwidth
+	  and drop packets when this bandwidth exceeds your desired rate.
+	  If unsure, say Y.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_ingress.
+
+config NET_QOS
+	bool "QoS support"
+	depends on NET_SCHED
+	---help---
+	  Say Y here if you want to include Quality Of Service scheduling
+	  features, which means that you will be able to request certain
+	  rate-of-flow limits for your network devices.
+
+	  This Quality of Service (QoS) support will enable you to use
+	  Differentiated Services (diffserv) and Resource Reservation Protocol
+	  (RSVP) on your Linux router if you also say Y to "Packet classifier
+	  API" and to some classifiers below. Documentation and software is at
+	  <http://diffserv.sourceforge.net/>.
+
+	  Note that the answer to this question won't directly affect the
+	  kernel: saying N will just cause the configurator to skip all
+	  the questions about QoS support.
+
+config NET_ESTIMATOR
+	bool "Rate estimator"
+	depends on NET_QOS
+	help
+	  In order for Quality of Service scheduling to work, the current
+	  rate-of-flow for a network device has to be estimated; if you say Y
+	  here, the kernel will do just that.
+
+config NET_CLS
+	bool "Packet classifier API"
+	depends on NET_SCHED
+	---help---
+	  The CBQ scheduling algorithm requires that network packets which are
+	  scheduled to be sent out over a network device be classified
+	  according to some criterion. If you say Y here, you will get a
+	  choice of several different packet classifiers with the following
+	  questions.
+
+	  This will enable you to use Differentiated Services (diffserv) and
+	  Resource Reservation Protocol (RSVP) on your Linux router.
+	  Documentation and software is at
+	  <http://diffserv.sourceforge.net/>.
+
+config NET_CLS_BASIC
+	tristate "Basic classifier"
+	depends on NET_CLS
+	---help---
+	  Say Y here if you want to be able to classify packets using
+	  only extended matches and actions.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called cls_basic.
+
+config NET_CLS_TCINDEX
+	tristate "TC index classifier"
+	depends on NET_CLS
+	help
+	  If you say Y here, you will be able to classify outgoing packets
+	  according to the tc_index field of the skb. You will want this
+	  feature if you want to implement Differentiated Services using
+	  sch_dsmark. If unsure, say Y.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called cls_tcindex.
+
+config NET_CLS_ROUTE4
+	tristate "Routing table based classifier"
+	depends on NET_CLS
+	select NET_CLS_ROUTE
+	help
+	  If you say Y here, you will be able to classify outgoing packets
+	  according to the route table entry they matched. If unsure, say Y.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called cls_route.
+
+config NET_CLS_ROUTE
+	bool
+	default n
+
+config NET_CLS_FW
+	tristate "Firewall based classifier"
+	depends on NET_CLS
+	help
+	  If you say Y here, you will be able to classify outgoing packets
+	  according to firewall criteria you specified.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called cls_fw.
+
+config NET_CLS_U32
+	tristate "U32 classifier"
+	depends on NET_CLS
+	help
+	  If you say Y here, you will be able to classify outgoing packets
+	  according to their destination address. If unsure, say Y.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called cls_u32.
+
+config CLS_U32_PERF
+	bool "U32 classifier performance counters"
+	depends on NET_CLS_U32
+	help
+	  gathers stats that could be used to tune u32 classifier performance.
+	  Requires a new iproute2
+	  You MUST NOT turn this on if you dont have an update iproute2.
+
+config NET_CLS_IND
+	bool "classify input device (slows things u32/fw) "
+	depends on NET_CLS_U32 || NET_CLS_FW
+	help
+	  This option will be killed eventually when a 
+          metadata action appears because it slows things a little
+          Available only for u32 and fw classifiers.
+	  Requires a new iproute2
+	  You MUST NOT turn this on if you dont have an update iproute2.
+
+config CLS_U32_MARK
+	bool "Use nfmark as a key in U32 classifier"
+	depends on NET_CLS_U32 && NETFILTER
+	help
+	  This allows you to match mark in a u32 filter.
+	  Example:
+	  tc filter add dev eth0 protocol ip parent 1:0 prio 5 u32 \
+		match mark 0x0090 0xffff \
+		match ip dst 4.4.4.4 \
+		flowid 1:90
+	  You must use a new iproute2 to use this feature.
+
+config NET_CLS_RSVP
+	tristate "Special RSVP classifier"
+	depends on NET_CLS && NET_QOS
+	---help---
+	  The Resource Reservation Protocol (RSVP) permits end systems to
+	  request a minimum and maximum data flow rate for a connection; this
+	  is important for real time data such as streaming sound or video.
+
+	  Say Y here if you want to be able to classify outgoing packets based
+	  on their RSVP requests.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called cls_rsvp.
+
+config NET_CLS_RSVP6
+	tristate "Special RSVP classifier for IPv6"
+	depends on NET_CLS && NET_QOS
+	---help---
+	  The Resource Reservation Protocol (RSVP) permits end systems to
+	  request a minimum and maximum data flow rate for a connection; this
+	  is important for real time data such as streaming sound or video.
+
+	  Say Y here if you want to be able to classify outgoing packets based
+	  on their RSVP requests and you are using the new Internet Protocol
+	  IPv6 as opposed to the older and more common IPv4.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called cls_rsvp6.
+
+config NET_EMATCH
+	bool "Extended Matches"
+	depends on NET_CLS
+	---help---
+	  Say Y here if you want to use extended matches on top of classifiers
+	  and select the extended matches below.
+
+	  Extended matches are small classification helpers not worth writing
+	  a separate classifier.
+
+	  You must have a recent version of the iproute2 tools in order to use
+	  extended matches.
+
+config NET_EMATCH_STACK
+	int "Stack size"
+	depends on NET_EMATCH
+	default "32"
+	---help---
+	  Size of the local stack variable used while evaluating the tree of
+	  ematches. Limits the depth of the tree, i.e. the number of
+	  encapsulated precedences. Every level requires 4 bytes of addtional
+	  stack space.
+
+config NET_EMATCH_CMP
+	tristate "Simple packet data comparison"
+	depends on NET_EMATCH
+	---help---
+	  Say Y here if you want to be able to classify packets based on
+	  simple packet data comparisons for 8, 16, and 32bit values.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called em_cmp.
+
+config NET_EMATCH_NBYTE
+	tristate "Multi byte comparison"
+	depends on NET_EMATCH
+	---help---
+	  Say Y here if you want to be able to classify packets based on
+	  multiple byte comparisons mainly useful for IPv6 address comparisons.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called em_nbyte.
+
+config NET_EMATCH_U32
+	tristate "U32 hashing key"
+	depends on NET_EMATCH
+	---help---
+	  Say Y here if you want to be able to classify packets using
+	  the famous u32 key in combination with logic relations.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called em_u32.
+
+config NET_EMATCH_META
+	tristate "Metadata"
+	depends on NET_EMATCH
+	---help---
+	  Say Y here if you want to be ablt to classify packets based on
+	  metadata such as load average, netfilter attributes, socket
+	  attributes and routing decisions.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called em_meta.
+
+config NET_CLS_ACT
+	bool "Packet ACTION"
+	depends on EXPERIMENTAL && NET_CLS && NET_QOS
+	---help---
+	This option requires you have a new iproute2. It enables
+	tc extensions which can be used with tc classifiers.
+	  You MUST NOT turn this on if you dont have an update iproute2.
+
+config NET_ACT_POLICE
+	tristate "Policing Actions"
+        depends on NET_CLS_ACT 
+        ---help---
+        If you are using a newer iproute2 select this one, otherwise use one
+	below to select a policer.
+	  You MUST NOT turn this on if you dont have an update iproute2.
+
+config NET_ACT_GACT
+        tristate "generic Actions"
+        depends on NET_CLS_ACT
+        ---help---
+        You must have new iproute2 to use this feature.
+        This adds simple filtering actions like drop, accept etc.
+
+config GACT_PROB
+        bool "generic Actions probability"
+        depends on NET_ACT_GACT
+        ---help---
+        Allows generic actions to be randomly or deterministically used.
+
+config NET_ACT_MIRRED
+        tristate "Packet In/Egress redirecton/mirror Actions"
+        depends on NET_CLS_ACT
+        ---help---
+        requires new iproute2
+        This allows packets to be mirrored or redirected to netdevices
+
+config NET_ACT_IPT
+        tristate "iptables Actions"
+        depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
+        ---help---
+        requires new iproute2
+        This allows iptables targets to be used by tc filters
+
+config NET_ACT_PEDIT
+        tristate "Generic Packet Editor Actions"
+        depends on NET_CLS_ACT
+        ---help---
+        requires new iproute2
+        This allows for packets to be generically edited
+
+config NET_CLS_POLICE
+	bool "Traffic policing (needed for in/egress)"
+	depends on NET_CLS && NET_QOS && NET_CLS_ACT!=y
+	help
+	  Say Y to support traffic policing (bandwidth limits).  Needed for
+	  ingress and egress rate limiting.
+
diff --git a/net/sched/Makefile b/net/sched/Makefile
new file mode 100644
index 00000000000..431e55786ef
--- /dev/null
+++ b/net/sched/Makefile
@@ -0,0 +1,41 @@
+#
+# Makefile for the Linux Traffic Control Unit.
+#
+
+obj-y	:= sch_generic.o
+
+obj-$(CONFIG_NET_SCHED)		+= sch_api.o sch_fifo.o
+obj-$(CONFIG_NET_CLS)		+= cls_api.o
+obj-$(CONFIG_NET_CLS_ACT)       += act_api.o
+obj-$(CONFIG_NET_ACT_POLICE)	+= police.o
+obj-$(CONFIG_NET_CLS_POLICE)	+= police.o
+obj-$(CONFIG_NET_ACT_GACT)      += gact.o
+obj-$(CONFIG_NET_ACT_MIRRED)    += mirred.o
+obj-$(CONFIG_NET_ACT_IPT)       += ipt.o
+obj-$(CONFIG_NET_ACT_PEDIT)     += pedit.o
+obj-$(CONFIG_NET_SCH_CBQ)	+= sch_cbq.o
+obj-$(CONFIG_NET_SCH_HTB)	+= sch_htb.o
+obj-$(CONFIG_NET_SCH_HPFQ)	+= sch_hpfq.o
+obj-$(CONFIG_NET_SCH_HFSC)	+= sch_hfsc.o
+obj-$(CONFIG_NET_SCH_RED)	+= sch_red.o
+obj-$(CONFIG_NET_SCH_GRED)	+= sch_gred.o
+obj-$(CONFIG_NET_SCH_INGRESS)	+= sch_ingress.o 
+obj-$(CONFIG_NET_SCH_DSMARK)	+= sch_dsmark.o
+obj-$(CONFIG_NET_SCH_SFQ)	+= sch_sfq.o
+obj-$(CONFIG_NET_SCH_TBF)	+= sch_tbf.o
+obj-$(CONFIG_NET_SCH_TEQL)	+= sch_teql.o
+obj-$(CONFIG_NET_SCH_PRIO)	+= sch_prio.o
+obj-$(CONFIG_NET_SCH_ATM)	+= sch_atm.o
+obj-$(CONFIG_NET_SCH_NETEM)	+= sch_netem.o
+obj-$(CONFIG_NET_CLS_U32)	+= cls_u32.o
+obj-$(CONFIG_NET_CLS_ROUTE4)	+= cls_route.o
+obj-$(CONFIG_NET_CLS_FW)	+= cls_fw.o
+obj-$(CONFIG_NET_CLS_RSVP)	+= cls_rsvp.o
+obj-$(CONFIG_NET_CLS_TCINDEX)	+= cls_tcindex.o
+obj-$(CONFIG_NET_CLS_RSVP6)	+= cls_rsvp6.o
+obj-$(CONFIG_NET_CLS_BASIC)	+= cls_basic.o
+obj-$(CONFIG_NET_EMATCH)	+= ematch.o
+obj-$(CONFIG_NET_EMATCH_CMP)	+= em_cmp.o
+obj-$(CONFIG_NET_EMATCH_NBYTE)	+= em_nbyte.o
+obj-$(CONFIG_NET_EMATCH_U32)	+= em_u32.o
+obj-$(CONFIG_NET_EMATCH_META)	+= em_meta.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
new file mode 100644
index 00000000000..5e6cc371b39
--- /dev/null
+++ b/net/sched/act_api.c
@@ -0,0 +1,894 @@
+/*
+ * net/sched/act_api.c	Packet action API.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Author:	Jamal Hadi Salim
+ *
+ *
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/init.h>
+#include <linux/kmod.h>
+#include <net/sock.h>
+#include <net/sch_generic.h>
+#include <net/act_api.h>
+
+#if 1 /* control */
+#define DPRINTK(format, args...) printk(KERN_DEBUG format, ##args)
+#else
+#define DPRINTK(format, args...)
+#endif
+#if 0 /* data */
+#define D2PRINTK(format, args...) printk(KERN_DEBUG format, ##args)
+#else
+#define D2PRINTK(format, args...)
+#endif
+
+static struct tc_action_ops *act_base = NULL;
+static DEFINE_RWLOCK(act_mod_lock);
+
+int tcf_register_action(struct tc_action_ops *act)
+{
+	struct tc_action_ops *a, **ap;
+
+	write_lock(&act_mod_lock);
+	for (ap = &act_base; (a = *ap) != NULL; ap = &a->next) {
+		if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) {
+			write_unlock(&act_mod_lock);
+			return -EEXIST;
+		}
+	}
+	act->next = NULL;
+	*ap = act;
+	write_unlock(&act_mod_lock);
+	return 0;
+}
+
+int tcf_unregister_action(struct tc_action_ops *act)
+{
+	struct tc_action_ops *a, **ap;
+	int err = -ENOENT;
+
+	write_lock(&act_mod_lock);
+	for (ap = &act_base; (a = *ap) != NULL; ap = &a->next)
+		if (a == act)
+			break;
+	if (a) {
+		*ap = a->next;
+		a->next = NULL;
+		err = 0;
+	}
+	write_unlock(&act_mod_lock);
+	return err;
+}
+
+/* lookup by name */
+static struct tc_action_ops *tc_lookup_action_n(char *kind)
+{
+	struct tc_action_ops *a = NULL;
+
+	if (kind) {
+		read_lock(&act_mod_lock);
+		for (a = act_base; a; a = a->next) {
+			if (strcmp(kind, a->kind) == 0) {
+				if (!try_module_get(a->owner)) {
+					read_unlock(&act_mod_lock);
+					return NULL;
+				}
+				break;
+			}
+		}
+		read_unlock(&act_mod_lock);
+	}
+	return a;
+}
+
+/* lookup by rtattr */
+static struct tc_action_ops *tc_lookup_action(struct rtattr *kind)
+{
+	struct tc_action_ops *a = NULL;
+
+	if (kind) {
+		read_lock(&act_mod_lock);
+		for (a = act_base; a; a = a->next) {
+			if (rtattr_strcmp(kind, a->kind) == 0) {
+				if (!try_module_get(a->owner)) {
+					read_unlock(&act_mod_lock);
+					return NULL;
+				}
+				break;
+			}
+		}
+		read_unlock(&act_mod_lock);
+	}
+	return a;
+}
+
+#if 0
+/* lookup by id */
+static struct tc_action_ops *tc_lookup_action_id(u32 type)
+{
+	struct tc_action_ops *a = NULL;
+
+	if (type) {
+		read_lock(&act_mod_lock);
+		for (a = act_base; a; a = a->next) {
+			if (a->type == type) {
+				if (!try_module_get(a->owner)) {
+					read_unlock(&act_mod_lock);
+					return NULL;
+				}
+				break;
+			}
+		}
+		read_unlock(&act_mod_lock);
+	}
+	return a;
+}
+#endif
+
+int tcf_action_exec(struct sk_buff *skb, struct tc_action *act,
+                    struct tcf_result *res)
+{
+	struct tc_action *a;
+	int ret = -1;
+
+	if (skb->tc_verd & TC_NCLS) {
+		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
+		D2PRINTK("(%p)tcf_action_exec: cleared TC_NCLS in %s out %s\n",
+		         skb, skb->input_dev ? skb->input_dev->name : "xxx",
+		         skb->dev->name);
+		ret = TC_ACT_OK;
+		goto exec_done;
+	}
+	while ((a = act) != NULL) {
+repeat:
+		if (a->ops && a->ops->act) {
+			ret = a->ops->act(&skb, a);
+			if (TC_MUNGED & skb->tc_verd) {
+				/* copied already, allow trampling */
+				skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
+				skb->tc_verd = CLR_TC_MUNGED(skb->tc_verd);
+			}
+			if (ret != TC_ACT_PIPE)
+				goto exec_done;
+			if (ret == TC_ACT_REPEAT)
+				goto repeat;	/* we need a ttl - JHS */
+		}
+		act = a->next;
+	}
+exec_done:
+	if (skb->tc_classid > 0) {
+		res->classid = skb->tc_classid;
+		res->class = 0;
+		skb->tc_classid = 0;
+	}
+	return ret;
+}
+
+void tcf_action_destroy(struct tc_action *act, int bind)
+{
+	struct tc_action *a;
+
+	for (a = act; a; a = act) {
+		if (a->ops && a->ops->cleanup) {
+			DPRINTK("tcf_action_destroy destroying %p next %p\n",
+			        a, a->next);
+			if (a->ops->cleanup(a, bind) == ACT_P_DELETED)
+				module_put(a->ops->owner);
+			act = act->next;
+			kfree(a);
+		} else { /*FIXME: Remove later - catch insertion bugs*/
+			printk("tcf_action_destroy: BUG? destroying NULL ops\n");
+			act = act->next;
+			kfree(a);
+		}
+	}
+}
+
+int
+tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+{
+	int err = -EINVAL;
+
+	if (a->ops == NULL || a->ops->dump == NULL)
+		return err;
+	return a->ops->dump(skb, a, bind, ref);
+}
+
+int
+tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+{
+	int err = -EINVAL;
+	unsigned char *b = skb->tail;
+	struct rtattr *r;
+
+	if (a->ops == NULL || a->ops->dump == NULL)
+		return err;
+
+	RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind);
+	if (tcf_action_copy_stats(skb, a, 0))
+		goto rtattr_failure;
+	r = (struct rtattr*) skb->tail;
+	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	if ((err = tcf_action_dump_old(skb, a, bind, ref)) > 0) {
+		r->rta_len = skb->tail - (u8*)r;
+		return err;
+	}
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+int
+tcf_action_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref)
+{
+	struct tc_action *a;
+	int err = -EINVAL;
+	unsigned char *b = skb->tail;
+	struct rtattr *r ;
+
+	while ((a = act) != NULL) {
+		r = (struct rtattr*) skb->tail;
+		act = a->next;
+		RTA_PUT(skb, a->order, 0, NULL);
+		err = tcf_action_dump_1(skb, a, bind, ref);
+		if (err < 0)
+			goto rtattr_failure;
+		r->rta_len = skb->tail - (u8*)r;
+	}
+
+	return 0;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -err;
+}
+
+struct tc_action *tcf_action_init_1(struct rtattr *rta, struct rtattr *est,
+                                    char *name, int ovr, int bind, int *err)
+{
+	struct tc_action *a;
+	struct tc_action_ops *a_o;
+	char act_name[IFNAMSIZ];
+	struct rtattr *tb[TCA_ACT_MAX+1];
+	struct rtattr *kind;
+
+	*err = -EINVAL;
+
+	if (name == NULL) {
+		if (rtattr_parse_nested(tb, TCA_ACT_MAX, rta) < 0)
+			goto err_out;
+		kind = tb[TCA_ACT_KIND-1];
+		if (kind == NULL)
+			goto err_out;
+		if (rtattr_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ)
+			goto err_out;
+	} else {
+		if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ)
+			goto err_out;
+	}
+
+	a_o = tc_lookup_action_n(act_name);
+	if (a_o == NULL) {
+#ifdef CONFIG_KMOD
+		rtnl_unlock();
+		request_module(act_name);
+		rtnl_lock();
+
+		a_o = tc_lookup_action_n(act_name);
+
+		/* We dropped the RTNL semaphore in order to
+		 * perform the module load.  So, even if we
+		 * succeeded in loading the module we have to
+		 * tell the caller to replay the request.  We
+		 * indicate this using -EAGAIN.
+		 */
+		if (a_o != NULL) {
+			*err = -EAGAIN;
+			goto err_mod;
+		}
+#endif
+		goto err_out;
+	}
+
+	*err = -ENOMEM;
+	a = kmalloc(sizeof(*a), GFP_KERNEL);
+	if (a == NULL)
+		goto err_mod;
+	memset(a, 0, sizeof(*a));
+
+	/* backward compatibility for policer */
+	if (name == NULL)
+		*err = a_o->init(tb[TCA_ACT_OPTIONS-1], est, a, ovr, bind);
+	else
+		*err = a_o->init(rta, est, a, ovr, bind);
+	if (*err < 0)
+		goto err_free;
+
+	/* module count goes up only when brand new policy is created
+	   if it exists and is only bound to in a_o->init() then
+	   ACT_P_CREATED is not returned (a zero is).
+	*/
+	if (*err != ACT_P_CREATED)
+		module_put(a_o->owner);
+	a->ops = a_o;
+	DPRINTK("tcf_action_init_1: successfull %s\n", act_name);
+
+	*err = 0;
+	return a;
+
+err_free:
+	kfree(a);
+err_mod:
+	module_put(a_o->owner);
+err_out:
+	return NULL;
+}
+
+struct tc_action *tcf_action_init(struct rtattr *rta, struct rtattr *est,
+                                  char *name, int ovr, int bind, int *err)
+{
+	struct rtattr *tb[TCA_ACT_MAX_PRIO+1];
+	struct tc_action *head = NULL, *act, *act_prev = NULL;
+	int i;
+
+	if (rtattr_parse_nested(tb, TCA_ACT_MAX_PRIO, rta) < 0) {
+		*err = -EINVAL;
+		return head;
+	}
+
+	for (i=0; i < TCA_ACT_MAX_PRIO && tb[i]; i++) {
+		act = tcf_action_init_1(tb[i], est, name, ovr, bind, err);
+		if (act == NULL)
+			goto err;
+		act->order = i+1;
+
+		if (head == NULL)
+			head = act;
+		else
+			act_prev->next = act;
+		act_prev = act;
+	}
+	return head;
+
+err:
+	if (head != NULL)
+		tcf_action_destroy(head, bind);
+	return NULL;
+}
+
+int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
+			  int compat_mode)
+{
+	int err = 0;
+	struct gnet_dump d;
+	struct tcf_act_hdr *h = a->priv;
+	
+	if (h == NULL)
+		goto errout;
+
+	/* compat_mode being true specifies a call that is supposed
+	 * to add additional backward compatiblity statistic TLVs.
+	 */
+	if (compat_mode) {
+		if (a->type == TCA_OLD_COMPAT)
+			err = gnet_stats_start_copy_compat(skb, 0,
+				TCA_STATS, TCA_XSTATS, h->stats_lock, &d);
+		else
+			return 0;
+	} else
+		err = gnet_stats_start_copy(skb, TCA_ACT_STATS,
+			h->stats_lock, &d);
+
+	if (err < 0)
+		goto errout;
+
+	if (a->ops != NULL && a->ops->get_stats != NULL)
+		if (a->ops->get_stats(skb, a) < 0)
+			goto errout;
+
+	if (gnet_stats_copy_basic(&d, &h->bstats) < 0 ||
+#ifdef CONFIG_NET_ESTIMATOR
+	    gnet_stats_copy_rate_est(&d, &h->rate_est) < 0 ||
+#endif
+	    gnet_stats_copy_queue(&d, &h->qstats) < 0)
+		goto errout;
+
+	if (gnet_stats_finish_copy(&d) < 0)
+		goto errout;
+
+	return 0;
+
+errout:
+	return -1;
+}
+
+static int
+tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq,
+             unsigned flags, int event, int bind, int ref)
+{
+	struct tcamsg *t;
+	struct nlmsghdr *nlh;
+	unsigned char *b = skb->tail;
+	struct rtattr *x;
+
+	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*t));
+	nlh->nlmsg_flags = flags;
+	t = NLMSG_DATA(nlh);
+	t->tca_family = AF_UNSPEC;
+	
+	x = (struct rtattr*) skb->tail;
+	RTA_PUT(skb, TCA_ACT_TAB, 0, NULL);
+
+	if (tcf_action_dump(skb, a, bind, ref) < 0)
+		goto rtattr_failure;
+
+	x->rta_len = skb->tail - (u8*)x;
+	
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+rtattr_failure:
+nlmsg_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int
+act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event)
+{
+	struct sk_buff *skb;
+	int err = 0;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+	if (tca_get_fill(skb, a, pid, n->nlmsg_seq, 0, event, 0, 0) <= 0) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+	err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
+	if (err > 0)
+		err = 0;
+	return err;
+}
+
+static struct tc_action *
+tcf_action_get_1(struct rtattr *rta, struct nlmsghdr *n, u32 pid, int *err)
+{
+	struct rtattr *tb[TCA_ACT_MAX+1];
+	struct tc_action *a;
+	int index;
+
+	*err = -EINVAL;
+	if (rtattr_parse_nested(tb, TCA_ACT_MAX, rta) < 0)
+		return NULL;
+
+	if (tb[TCA_ACT_INDEX - 1] == NULL ||
+	    RTA_PAYLOAD(tb[TCA_ACT_INDEX - 1]) < sizeof(index))
+		return NULL;
+	index = *(int *)RTA_DATA(tb[TCA_ACT_INDEX - 1]);
+
+	*err = -ENOMEM;
+	a = kmalloc(sizeof(struct tc_action), GFP_KERNEL);
+	if (a == NULL)
+		return NULL;
+	memset(a, 0, sizeof(struct tc_action));
+
+	*err = -EINVAL;
+	a->ops = tc_lookup_action(tb[TCA_ACT_KIND - 1]);
+	if (a->ops == NULL)
+		goto err_free;
+	if (a->ops->lookup == NULL)
+		goto err_mod;
+	*err = -ENOENT;
+	if (a->ops->lookup(a, index) == 0)
+		goto err_mod;
+
+	module_put(a->ops->owner);
+	*err = 0;
+	return a;
+err_mod:
+	module_put(a->ops->owner);
+err_free:
+	kfree(a);
+	return NULL;
+}
+
+static void cleanup_a(struct tc_action *act)
+{
+	struct tc_action *a;
+
+	for (a = act; a; a = act) {
+		act = a->next;
+		kfree(a);
+	}
+}
+
+static struct tc_action *create_a(int i)
+{
+	struct tc_action *act;
+
+	act = kmalloc(sizeof(*act), GFP_KERNEL);
+	if (act == NULL) {
+		printk("create_a: failed to alloc!\n");
+		return NULL;
+	}
+	memset(act, 0, sizeof(*act));
+	act->order = i;
+	return act;
+}
+
+static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid)
+{
+	struct sk_buff *skb;
+	unsigned char *b;
+	struct nlmsghdr *nlh;
+	struct tcamsg *t;
+	struct netlink_callback dcb;
+	struct rtattr *x;
+	struct rtattr *tb[TCA_ACT_MAX+1];
+	struct rtattr *kind;
+	struct tc_action *a = create_a(0);
+	int err = -EINVAL;
+
+	if (a == NULL) {
+		printk("tca_action_flush: couldnt create tc_action\n");
+		return err;
+	}
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb) {
+		printk("tca_action_flush: failed skb alloc\n");
+		kfree(a);
+		return -ENOBUFS;
+	}
+
+	b = (unsigned char *)skb->tail;
+
+	if (rtattr_parse_nested(tb, TCA_ACT_MAX, rta) < 0)
+		goto err_out;
+
+	kind = tb[TCA_ACT_KIND-1];
+	a->ops = tc_lookup_action(kind);
+	if (a->ops == NULL)
+		goto err_out;
+
+	nlh = NLMSG_PUT(skb, pid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t));
+	t = NLMSG_DATA(nlh);
+	t->tca_family = AF_UNSPEC;
+
+	x = (struct rtattr *) skb->tail;
+	RTA_PUT(skb, TCA_ACT_TAB, 0, NULL);
+
+	err = a->ops->walk(skb, &dcb, RTM_DELACTION, a);
+	if (err < 0)
+		goto rtattr_failure;
+
+	x->rta_len = skb->tail - (u8 *) x;
+
+	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_flags |= NLM_F_ROOT;
+	module_put(a->ops->owner);
+	kfree(a);
+	err = rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+	if (err > 0)
+		return 0;
+
+	return err;
+
+rtattr_failure:
+	module_put(a->ops->owner);
+nlmsg_failure:
+err_out:
+	kfree_skb(skb);
+	kfree(a);
+	return err;
+}
+
+static int
+tca_action_gd(struct rtattr *rta, struct nlmsghdr *n, u32 pid, int event)
+{
+	int i, ret = 0;
+	struct rtattr *tb[TCA_ACT_MAX_PRIO+1];
+	struct tc_action *head = NULL, *act, *act_prev = NULL;
+
+	if (rtattr_parse_nested(tb, TCA_ACT_MAX_PRIO, rta) < 0)
+		return -EINVAL;
+
+	if (event == RTM_DELACTION && n->nlmsg_flags&NLM_F_ROOT) {
+		if (tb[0] != NULL && tb[1] == NULL)
+			return tca_action_flush(tb[0], n, pid);
+	}
+
+	for (i=0; i < TCA_ACT_MAX_PRIO && tb[i]; i++) {
+		act = tcf_action_get_1(tb[i], n, pid, &ret);
+		if (act == NULL)
+			goto err;
+		act->order = i+1;
+
+		if (head == NULL)
+			head = act;
+		else
+			act_prev->next = act;
+		act_prev = act;
+	}
+
+	if (event == RTM_GETACTION)
+		ret = act_get_notify(pid, n, head, event);
+	else { /* delete */
+		struct sk_buff *skb;
+
+		skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+		if (!skb) {
+			ret = -ENOBUFS;
+			goto err;
+		}
+
+		if (tca_get_fill(skb, head, pid, n->nlmsg_seq, 0, event,
+		                 0, 1) <= 0) {
+			kfree_skb(skb);
+			ret = -EINVAL;
+			goto err;
+		}
+
+		/* now do the delete */
+		tcf_action_destroy(head, 0);
+		ret = rtnetlink_send(skb, pid, RTMGRP_TC,
+		                     n->nlmsg_flags&NLM_F_ECHO);
+		if (ret > 0)
+			return 0;
+		return ret;
+	}
+err:
+	cleanup_a(head);
+	return ret;
+}
+
+static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
+                          unsigned flags)
+{
+	struct tcamsg *t;
+	struct nlmsghdr *nlh;
+	struct sk_buff *skb;
+	struct rtattr *x;
+	unsigned char *b;
+	int err = 0;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+
+	b = (unsigned char *)skb->tail;
+
+	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*t));
+	nlh->nlmsg_flags = flags;
+	t = NLMSG_DATA(nlh);
+	t->tca_family = AF_UNSPEC;
+	
+	x = (struct rtattr*) skb->tail;
+	RTA_PUT(skb, TCA_ACT_TAB, 0, NULL);
+
+	if (tcf_action_dump(skb, a, 0, 0) < 0)
+		goto rtattr_failure;
+
+	x->rta_len = skb->tail - (u8*)x;
+	
+	nlh->nlmsg_len = skb->tail - b;
+	NETLINK_CB(skb).dst_groups = RTMGRP_TC;
+	
+	err = rtnetlink_send(skb, pid, RTMGRP_TC, flags&NLM_F_ECHO);
+	if (err > 0)
+		err = 0;
+	return err;
+
+rtattr_failure:
+nlmsg_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+	
+static int
+tcf_action_add(struct rtattr *rta, struct nlmsghdr *n, u32 pid, int ovr)
+{
+	int ret = 0;
+	struct tc_action *act;
+	struct tc_action *a;
+	u32 seq = n->nlmsg_seq;
+
+	act = tcf_action_init(rta, NULL, NULL, ovr, 0, &ret);
+	if (act == NULL)
+		goto done;
+
+	/* dump then free all the actions after update; inserted policy
+	 * stays intact
+	 * */
+	ret = tcf_add_notify(act, pid, seq, RTM_NEWACTION, n->nlmsg_flags);
+	for (a = act; a; a = act) {
+		act = a->next;
+		kfree(a);
+	}
+done:
+	return ret;
+}
+
+static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
+{
+	struct rtattr **tca = arg;
+	u32 pid = skb ? NETLINK_CB(skb).pid : 0;
+	int ret = 0, ovr = 0;
+
+	if (tca[TCA_ACT_TAB-1] == NULL) {
+		printk("tc_ctl_action: received NO action attribs\n");
+		return -EINVAL;
+	}
+
+	/* n->nlmsg_flags&NLM_F_CREATE
+	 * */
+	switch (n->nlmsg_type) {
+	case RTM_NEWACTION:
+		/* we are going to assume all other flags
+		 * imply create only if it doesnt exist
+		 * Note that CREATE | EXCL implies that
+		 * but since we want avoid ambiguity (eg when flags
+		 * is zero) then just set this
+		 */
+		if (n->nlmsg_flags&NLM_F_REPLACE)
+			ovr = 1;
+replay:
+		ret = tcf_action_add(tca[TCA_ACT_TAB-1], n, pid, ovr);
+		if (ret == -EAGAIN)
+			goto replay;
+		break;
+	case RTM_DELACTION:
+		ret = tca_action_gd(tca[TCA_ACT_TAB-1], n, pid, RTM_DELACTION);
+		break;
+	case RTM_GETACTION:
+		ret = tca_action_gd(tca[TCA_ACT_TAB-1], n, pid, RTM_GETACTION);
+		break;
+	default:
+		BUG();
+	}
+
+	return ret;
+}
+
+static char *
+find_dump_kind(struct nlmsghdr *n)
+{
+	struct rtattr *tb1, *tb2[TCA_ACT_MAX+1];
+	struct rtattr *tb[TCA_ACT_MAX_PRIO + 1];
+	struct rtattr *rta[TCAA_MAX + 1];
+	struct rtattr *kind;
+	int min_len = NLMSG_LENGTH(sizeof(struct tcamsg));
+	int attrlen = n->nlmsg_len - NLMSG_ALIGN(min_len);
+	struct rtattr *attr = (void *) n + NLMSG_ALIGN(min_len);
+
+	if (rtattr_parse(rta, TCAA_MAX, attr, attrlen) < 0)
+		return NULL;
+	tb1 = rta[TCA_ACT_TAB - 1];
+	if (tb1 == NULL)
+		return NULL;
+
+	if (rtattr_parse(tb, TCA_ACT_MAX_PRIO, RTA_DATA(tb1),
+	                 NLMSG_ALIGN(RTA_PAYLOAD(tb1))) < 0)
+		return NULL;
+	if (tb[0] == NULL)
+		return NULL;
+
+	if (rtattr_parse(tb2, TCA_ACT_MAX, RTA_DATA(tb[0]),
+	                 RTA_PAYLOAD(tb[0])) < 0)
+		return NULL;
+	kind = tb2[TCA_ACT_KIND-1];
+
+	return (char *) RTA_DATA(kind);
+}
+
+static int
+tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct nlmsghdr *nlh;
+	unsigned char *b = skb->tail;
+	struct rtattr *x;
+	struct tc_action_ops *a_o;
+	struct tc_action a;
+	int ret = 0;
+	struct tcamsg *t = (struct tcamsg *) NLMSG_DATA(cb->nlh);
+	char *kind = find_dump_kind(cb->nlh);
+
+	if (kind == NULL) {
+		printk("tc_dump_action: action bad kind\n");
+		return 0;
+	}
+
+	a_o = tc_lookup_action_n(kind);
+	if (a_o == NULL) {
+		printk("failed to find %s\n", kind);
+		return 0;
+	}
+
+	memset(&a, 0, sizeof(struct tc_action));
+	a.ops = a_o;
+
+	if (a_o->walk == NULL) {
+		printk("tc_dump_action: %s !capable of dumping table\n", kind);
+		goto rtattr_failure;
+	}
+
+	nlh = NLMSG_PUT(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+	                cb->nlh->nlmsg_type, sizeof(*t));
+	t = NLMSG_DATA(nlh);
+	t->tca_family = AF_UNSPEC;
+
+	x = (struct rtattr *) skb->tail;
+	RTA_PUT(skb, TCA_ACT_TAB, 0, NULL);
+
+	ret = a_o->walk(skb, cb, RTM_GETACTION, &a);
+	if (ret < 0)
+		goto rtattr_failure;
+
+	if (ret > 0) {
+		x->rta_len = skb->tail - (u8 *) x;
+		ret = skb->len;
+	} else
+		skb_trim(skb, (u8*)x - skb->data);
+
+	nlh->nlmsg_len = skb->tail - b;
+	if (NETLINK_CB(cb->skb).pid && ret)
+		nlh->nlmsg_flags |= NLM_F_MULTI;
+	module_put(a_o->owner);
+	return skb->len;
+
+rtattr_failure:
+nlmsg_failure:
+	module_put(a_o->owner);
+	skb_trim(skb, b - skb->data);
+	return skb->len;
+}
+
+static int __init tc_action_init(void)
+{
+	struct rtnetlink_link *link_p = rtnetlink_links[PF_UNSPEC];
+
+	if (link_p) {
+		link_p[RTM_NEWACTION-RTM_BASE].doit = tc_ctl_action;
+		link_p[RTM_DELACTION-RTM_BASE].doit = tc_ctl_action;
+		link_p[RTM_GETACTION-RTM_BASE].doit = tc_ctl_action;
+		link_p[RTM_GETACTION-RTM_BASE].dumpit = tc_dump_action;
+	}
+
+	printk("TC classifier action (bugs to netdev@oss.sgi.com cc "
+	       "hadi@cyberus.ca)\n");
+	return 0;
+}
+
+subsys_initcall(tc_action_init);
+
+EXPORT_SYMBOL(tcf_register_action);
+EXPORT_SYMBOL(tcf_unregister_action);
+EXPORT_SYMBOL(tcf_action_exec);
+EXPORT_SYMBOL(tcf_action_dump_1);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
new file mode 100644
index 00000000000..56e66c3fe0f
--- /dev/null
+++ b/net/sched/cls_api.c
@@ -0,0 +1,642 @@
+/*
+ * net/sched/cls_api.c	Packet classifier API.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * Changes:
+ *
+ * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
+ *
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/init.h>
+#include <linux/kmod.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
+
+#if 0 /* control */
+#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
+#else
+#define DPRINTK(format,args...)
+#endif
+
+/* The list of all installed classifier types */
+
+static struct tcf_proto_ops *tcf_proto_base;
+
+/* Protects list of registered TC modules. It is pure SMP lock. */
+static DEFINE_RWLOCK(cls_mod_lock);
+
+/* Find classifier type by string name */
+
+static struct tcf_proto_ops * tcf_proto_lookup_ops(struct rtattr *kind)
+{
+	struct tcf_proto_ops *t = NULL;
+
+	if (kind) {
+		read_lock(&cls_mod_lock);
+		for (t = tcf_proto_base; t; t = t->next) {
+			if (rtattr_strcmp(kind, t->kind) == 0) {
+				if (!try_module_get(t->owner))
+					t = NULL;
+				break;
+			}
+		}
+		read_unlock(&cls_mod_lock);
+	}
+	return t;
+}
+
+/* Register(unregister) new classifier type */
+
+int register_tcf_proto_ops(struct tcf_proto_ops *ops)
+{
+	struct tcf_proto_ops *t, **tp;
+	int rc = -EEXIST;
+
+	write_lock(&cls_mod_lock);
+	for (tp = &tcf_proto_base; (t = *tp) != NULL; tp = &t->next)
+		if (!strcmp(ops->kind, t->kind))
+			goto out;
+
+	ops->next = NULL;
+	*tp = ops;
+	rc = 0;
+out:
+	write_unlock(&cls_mod_lock);
+	return rc;
+}
+
+int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
+{
+	struct tcf_proto_ops *t, **tp;
+	int rc = -ENOENT;
+
+	write_lock(&cls_mod_lock);
+	for (tp = &tcf_proto_base; (t=*tp) != NULL; tp = &t->next)
+		if (t == ops)
+			break;
+
+	if (!t)
+		goto out;
+	*tp = t->next;
+	rc = 0;
+out:
+	write_unlock(&cls_mod_lock);
+	return rc;
+}
+
+static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n,
+			  struct tcf_proto *tp, unsigned long fh, int event);
+
+
+/* Select new prio value from the range, managed by kernel. */
+
+static __inline__ u32 tcf_auto_prio(struct tcf_proto *tp)
+{
+	u32 first = TC_H_MAKE(0xC0000000U,0U);
+
+	if (tp)
+		first = tp->prio-1;
+
+	return first;
+}
+
+/* Add/change/delete/get a filter node */
+
+static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
+{
+	struct rtattr **tca;
+	struct tcmsg *t;
+	u32 protocol;
+	u32 prio;
+	u32 nprio;
+	u32 parent;
+	struct net_device *dev;
+	struct Qdisc  *q;
+	struct tcf_proto **back, **chain;
+	struct tcf_proto *tp;
+	struct tcf_proto_ops *tp_ops;
+	struct Qdisc_class_ops *cops;
+	unsigned long cl;
+	unsigned long fh;
+	int err;
+
+replay:
+	tca = arg;
+	t = NLMSG_DATA(n);
+	protocol = TC_H_MIN(t->tcm_info);
+	prio = TC_H_MAJ(t->tcm_info);
+	nprio = prio;
+	parent = t->tcm_parent;
+	cl = 0;
+
+	if (prio == 0) {
+		/* If no priority is given, user wants we allocated it. */
+		if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE))
+			return -ENOENT;
+		prio = TC_H_MAKE(0x80000000U,0U);
+	}
+
+	/* Find head of filter chain. */
+
+	/* Find link */
+	if ((dev = __dev_get_by_index(t->tcm_ifindex)) == NULL)
+		return -ENODEV;
+
+	/* Find qdisc */
+	if (!parent) {
+		q = dev->qdisc_sleeping;
+		parent = q->handle;
+	} else if ((q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent))) == NULL)
+		return -EINVAL;
+
+	/* Is it classful? */
+	if ((cops = q->ops->cl_ops) == NULL)
+		return -EINVAL;
+
+	/* Do we search for filter, attached to class? */
+	if (TC_H_MIN(parent)) {
+		cl = cops->get(q, parent);
+		if (cl == 0)
+			return -ENOENT;
+	}
+
+	/* And the last stroke */
+	chain = cops->tcf_chain(q, cl);
+	err = -EINVAL;
+	if (chain == NULL)
+		goto errout;
+
+	/* Check the chain for existence of proto-tcf with this priority */
+	for (back = chain; (tp=*back) != NULL; back = &tp->next) {
+		if (tp->prio >= prio) {
+			if (tp->prio == prio) {
+				if (!nprio || (tp->protocol != protocol && protocol))
+					goto errout;
+			} else
+				tp = NULL;
+			break;
+		}
+	}
+
+	if (tp == NULL) {
+		/* Proto-tcf does not exist, create new one */
+
+		if (tca[TCA_KIND-1] == NULL || !protocol)
+			goto errout;
+
+		err = -ENOENT;
+		if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE))
+			goto errout;
+
+
+		/* Create new proto tcf */
+
+		err = -ENOBUFS;
+		if ((tp = kmalloc(sizeof(*tp), GFP_KERNEL)) == NULL)
+			goto errout;
+		err = -EINVAL;
+		tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND-1]);
+		if (tp_ops == NULL) {
+#ifdef CONFIG_KMOD
+			struct rtattr *kind = tca[TCA_KIND-1];
+			char name[IFNAMSIZ];
+
+			if (kind != NULL &&
+			    rtattr_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
+				rtnl_unlock();
+				request_module("cls_%s", name);
+				rtnl_lock();
+				tp_ops = tcf_proto_lookup_ops(kind);
+				/* We dropped the RTNL semaphore in order to
+				 * perform the module load.  So, even if we
+				 * succeeded in loading the module we have to
+				 * replay the request.  We indicate this using
+				 * -EAGAIN.
+				 */
+				if (tp_ops != NULL) {
+					module_put(tp_ops->owner);
+					err = -EAGAIN;
+				}
+			}
+#endif
+			kfree(tp);
+			goto errout;
+		}
+		memset(tp, 0, sizeof(*tp));
+		tp->ops = tp_ops;
+		tp->protocol = protocol;
+		tp->prio = nprio ? : tcf_auto_prio(*back);
+		tp->q = q;
+		tp->classify = tp_ops->classify;
+		tp->classid = parent;
+		if ((err = tp_ops->init(tp)) != 0) {
+			module_put(tp_ops->owner);
+			kfree(tp);
+			goto errout;
+		}
+
+		qdisc_lock_tree(dev);
+		tp->next = *back;
+		*back = tp;
+		qdisc_unlock_tree(dev);
+
+	} else if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], tp->ops->kind))
+		goto errout;
+
+	fh = tp->ops->get(tp, t->tcm_handle);
+
+	if (fh == 0) {
+		if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
+			qdisc_lock_tree(dev);
+			*back = tp->next;
+			qdisc_unlock_tree(dev);
+
+			tfilter_notify(skb, n, tp, fh, RTM_DELTFILTER);
+			tcf_destroy(tp);
+			err = 0;
+			goto errout;
+		}
+
+		err = -ENOENT;
+		if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE))
+			goto errout;
+	} else {
+		switch (n->nlmsg_type) {
+		case RTM_NEWTFILTER:	
+			err = -EEXIST;
+			if (n->nlmsg_flags&NLM_F_EXCL)
+				goto errout;
+			break;
+		case RTM_DELTFILTER:
+			err = tp->ops->delete(tp, fh);
+			if (err == 0)
+				tfilter_notify(skb, n, tp, fh, RTM_DELTFILTER);
+			goto errout;
+		case RTM_GETTFILTER:
+			err = tfilter_notify(skb, n, tp, fh, RTM_NEWTFILTER);
+			goto errout;
+		default:
+			err = -EINVAL;
+			goto errout;
+		}
+	}
+
+	err = tp->ops->change(tp, cl, t->tcm_handle, tca, &fh);
+	if (err == 0)
+		tfilter_notify(skb, n, tp, fh, RTM_NEWTFILTER);
+
+errout:
+	if (cl)
+		cops->put(q, cl);
+	if (err == -EAGAIN)
+		/* Replay the request. */
+		goto replay;
+	return err;
+}
+
+static int
+tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, unsigned long fh,
+	      u32 pid, u32 seq, unsigned flags, int event)
+{
+	struct tcmsg *tcm;
+	struct nlmsghdr  *nlh;
+	unsigned char	 *b = skb->tail;
+
+	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm));
+	nlh->nlmsg_flags = flags;
+	tcm = NLMSG_DATA(nlh);
+	tcm->tcm_family = AF_UNSPEC;
+	tcm->tcm_ifindex = tp->q->dev->ifindex;
+	tcm->tcm_parent = tp->classid;
+	tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
+	RTA_PUT(skb, TCA_KIND, IFNAMSIZ, tp->ops->kind);
+	tcm->tcm_handle = fh;
+	if (RTM_DELTFILTER != event) {
+		tcm->tcm_handle = 0;
+		if (tp->ops->dump && tp->ops->dump(tp, fh, skb, tcm) < 0)
+			goto rtattr_failure;
+	}
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n,
+			  struct tcf_proto *tp, unsigned long fh, int event)
+{
+	struct sk_buff *skb;
+	u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+
+	if (tcf_fill_node(skb, tp, fh, pid, n->nlmsg_seq, 0, event) <= 0) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+}
+
+struct tcf_dump_args
+{
+	struct tcf_walker w;
+	struct sk_buff *skb;
+	struct netlink_callback *cb;
+};
+
+static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, struct tcf_walker *arg)
+{
+	struct tcf_dump_args *a = (void*)arg;
+
+	return tcf_fill_node(a->skb, tp, n, NETLINK_CB(a->cb->skb).pid,
+			     a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER);
+}
+
+static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int t;
+	int s_t;
+	struct net_device *dev;
+	struct Qdisc *q;
+	struct tcf_proto *tp, **chain;
+	struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
+	unsigned long cl = 0;
+	struct Qdisc_class_ops *cops;
+	struct tcf_dump_args arg;
+
+	if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
+		return skb->len;
+	if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
+		return skb->len;
+
+	read_lock_bh(&qdisc_tree_lock);
+	if (!tcm->tcm_parent)
+		q = dev->qdisc_sleeping;
+	else
+		q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
+	if (!q)
+		goto out;
+	if ((cops = q->ops->cl_ops) == NULL)
+		goto errout;
+	if (TC_H_MIN(tcm->tcm_parent)) {
+		cl = cops->get(q, tcm->tcm_parent);
+		if (cl == 0)
+			goto errout;
+	}
+	chain = cops->tcf_chain(q, cl);
+	if (chain == NULL)
+		goto errout;
+
+	s_t = cb->args[0];
+
+	for (tp=*chain, t=0; tp; tp = tp->next, t++) {
+		if (t < s_t) continue;
+		if (TC_H_MAJ(tcm->tcm_info) &&
+		    TC_H_MAJ(tcm->tcm_info) != tp->prio)
+			continue;
+		if (TC_H_MIN(tcm->tcm_info) &&
+		    TC_H_MIN(tcm->tcm_info) != tp->protocol)
+			continue;
+		if (t > s_t)
+			memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
+		if (cb->args[1] == 0) {
+			if (tcf_fill_node(skb, tp, 0, NETLINK_CB(cb->skb).pid,
+					  cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER) <= 0) {
+				break;
+			}
+			cb->args[1] = 1;
+		}
+		if (tp->ops->walk == NULL)
+			continue;
+		arg.w.fn = tcf_node_dump;
+		arg.skb = skb;
+		arg.cb = cb;
+		arg.w.stop = 0;
+		arg.w.skip = cb->args[1]-1;
+		arg.w.count = 0;
+		tp->ops->walk(tp, &arg.w);
+		cb->args[1] = arg.w.count+1;
+		if (arg.w.stop)
+			break;
+	}
+
+	cb->args[0] = t;
+
+errout:
+	if (cl)
+		cops->put(q, cl);
+out:
+	read_unlock_bh(&qdisc_tree_lock);
+	dev_put(dev);
+	return skb->len;
+}
+
+void
+tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	if (exts->action) {
+		tcf_action_destroy(exts->action, TCA_ACT_UNBIND);
+		exts->action = NULL;
+	}
+#elif defined CONFIG_NET_CLS_POLICE
+	if (exts->police) {
+		tcf_police_release(exts->police, TCA_ACT_UNBIND);
+		exts->police = NULL;
+	}
+#endif
+}
+
+
+int
+tcf_exts_validate(struct tcf_proto *tp, struct rtattr **tb,
+	          struct rtattr *rate_tlv, struct tcf_exts *exts,
+	          struct tcf_ext_map *map)
+{
+	memset(exts, 0, sizeof(*exts));
+	
+#ifdef CONFIG_NET_CLS_ACT
+	{
+		int err;
+		struct tc_action *act;
+
+		if (map->police && tb[map->police-1]) {
+			act = tcf_action_init_1(tb[map->police-1], rate_tlv, "police",
+				TCA_ACT_NOREPLACE, TCA_ACT_BIND, &err);
+			if (act == NULL)
+				return err;
+
+			act->type = TCA_OLD_COMPAT;
+			exts->action = act;
+		} else if (map->action && tb[map->action-1]) {
+			act = tcf_action_init(tb[map->action-1], rate_tlv, NULL,
+				TCA_ACT_NOREPLACE, TCA_ACT_BIND, &err);
+			if (act == NULL)
+				return err;
+
+			exts->action = act;
+		}
+	}
+#elif defined CONFIG_NET_CLS_POLICE
+	if (map->police && tb[map->police-1]) {
+		struct tcf_police *p;
+		
+		p = tcf_police_locate(tb[map->police-1], rate_tlv);
+		if (p == NULL)
+			return -EINVAL;
+
+		exts->police = p;
+	} else if (map->action && tb[map->action-1])
+		return -EOPNOTSUPP;
+#else
+	if ((map->action && tb[map->action-1]) ||
+	    (map->police && tb[map->police-1]))
+		return -EOPNOTSUPP;
+#endif
+
+	return 0;
+}
+
+void
+tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
+	        struct tcf_exts *src)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	if (src->action) {
+		struct tc_action *act;
+		tcf_tree_lock(tp);
+		act = xchg(&dst->action, src->action);
+		tcf_tree_unlock(tp);
+		if (act)
+			tcf_action_destroy(act, TCA_ACT_UNBIND);
+	}
+#elif defined CONFIG_NET_CLS_POLICE
+	if (src->police) {
+		struct tcf_police *p;
+		tcf_tree_lock(tp);
+		p = xchg(&dst->police, src->police);
+		tcf_tree_unlock(tp);
+		if (p)
+			tcf_police_release(p, TCA_ACT_UNBIND);
+	}
+#endif
+}
+
+int
+tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts,
+	      struct tcf_ext_map *map)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	if (map->action && exts->action) {
+		/*
+		 * again for backward compatible mode - we want
+		 * to work with both old and new modes of entering
+		 * tc data even if iproute2  was newer - jhs
+		 */
+		struct rtattr * p_rta = (struct rtattr*) skb->tail;
+
+		if (exts->action->type != TCA_OLD_COMPAT) {
+			RTA_PUT(skb, map->action, 0, NULL);
+			if (tcf_action_dump(skb, exts->action, 0, 0) < 0)
+				goto rtattr_failure;
+			p_rta->rta_len = skb->tail - (u8*)p_rta;
+		} else if (map->police) {
+			RTA_PUT(skb, map->police, 0, NULL);
+			if (tcf_action_dump_old(skb, exts->action, 0, 0) < 0)
+				goto rtattr_failure;
+			p_rta->rta_len = skb->tail - (u8*)p_rta;
+		}
+	}
+#elif defined CONFIG_NET_CLS_POLICE
+	if (map->police && exts->police) {
+		struct rtattr * p_rta = (struct rtattr*) skb->tail;
+
+		RTA_PUT(skb, map->police, 0, NULL);
+
+		if (tcf_police_dump(skb, exts->police) < 0)
+			goto rtattr_failure;
+
+		p_rta->rta_len = skb->tail - (u8*)p_rta;
+	}
+#endif
+	return 0;
+rtattr_failure: __attribute__ ((unused))
+	return -1;
+}
+
+int
+tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts,
+	            struct tcf_ext_map *map)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	if (exts->action)
+		if (tcf_action_copy_stats(skb, exts->action, 1) < 0)
+			goto rtattr_failure;
+#elif defined CONFIG_NET_CLS_POLICE
+	if (exts->police)
+		if (tcf_police_dump_stats(skb, exts->police) < 0)
+			goto rtattr_failure;
+#endif
+	return 0;
+rtattr_failure: __attribute__ ((unused))
+	return -1;
+}
+
+static int __init tc_filter_init(void)
+{
+	struct rtnetlink_link *link_p = rtnetlink_links[PF_UNSPEC];
+
+	/* Setup rtnetlink links. It is made here to avoid
+	   exporting large number of public symbols.
+	 */
+
+	if (link_p) {
+		link_p[RTM_NEWTFILTER-RTM_BASE].doit = tc_ctl_tfilter;
+		link_p[RTM_DELTFILTER-RTM_BASE].doit = tc_ctl_tfilter;
+		link_p[RTM_GETTFILTER-RTM_BASE].doit = tc_ctl_tfilter;
+		link_p[RTM_GETTFILTER-RTM_BASE].dumpit = tc_dump_tfilter;
+	}
+	return 0;
+}
+
+subsys_initcall(tc_filter_init);
+
+EXPORT_SYMBOL(register_tcf_proto_ops);
+EXPORT_SYMBOL(unregister_tcf_proto_ops);
+EXPORT_SYMBOL(tcf_exts_validate);
+EXPORT_SYMBOL(tcf_exts_destroy);
+EXPORT_SYMBOL(tcf_exts_change);
+EXPORT_SYMBOL(tcf_exts_dump);
+EXPORT_SYMBOL(tcf_exts_dump_stats);
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
new file mode 100644
index 00000000000..0d2d4415f33
--- /dev/null
+++ b/net/sched/cls_basic.c
@@ -0,0 +1,303 @@
+/*
+ * net/sched/cls_basic.c	Basic Packet Classifier.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+#include <net/act_api.h>
+#include <net/pkt_cls.h>
+
+struct basic_head
+{
+	u32			hgenerator;
+	struct list_head	flist;
+};
+
+struct basic_filter
+{
+	u32			handle;
+	struct tcf_exts		exts;
+	struct tcf_ematch_tree	ematches;
+	struct tcf_result	res;
+	struct list_head	link;
+};
+
+static struct tcf_ext_map basic_ext_map = {
+	.action = TCA_BASIC_ACT,
+	.police = TCA_BASIC_POLICE
+};
+
+static int basic_classify(struct sk_buff *skb, struct tcf_proto *tp,
+			  struct tcf_result *res)
+{
+	int r;
+	struct basic_head *head = (struct basic_head *) tp->root;
+	struct basic_filter *f;
+
+	list_for_each_entry(f, &head->flist, link) {
+		if (!tcf_em_tree_match(skb, &f->ematches, NULL))
+			continue;
+		*res = f->res;
+		r = tcf_exts_exec(skb, &f->exts, res);
+		if (r < 0)
+			continue;
+		return r;
+	}
+	return -1;
+}
+
+static unsigned long basic_get(struct tcf_proto *tp, u32 handle)
+{
+	unsigned long l = 0UL;
+	struct basic_head *head = (struct basic_head *) tp->root;
+	struct basic_filter *f;
+
+	if (head == NULL)
+		return 0UL;
+
+	list_for_each_entry(f, &head->flist, link)
+		if (f->handle == handle)
+			l = (unsigned long) f;
+
+	return l;
+}
+
+static void basic_put(struct tcf_proto *tp, unsigned long f)
+{
+}
+
+static int basic_init(struct tcf_proto *tp)
+{
+	return 0;
+}
+
+static inline void basic_delete_filter(struct tcf_proto *tp,
+				       struct basic_filter *f)
+{
+	tcf_unbind_filter(tp, &f->res);
+	tcf_exts_destroy(tp, &f->exts);
+	tcf_em_tree_destroy(tp, &f->ematches);
+	kfree(f);
+}
+
+static void basic_destroy(struct tcf_proto *tp)
+{
+	struct basic_head *head = (struct basic_head *) xchg(&tp->root, NULL);
+	struct basic_filter *f, *n;
+	
+	list_for_each_entry_safe(f, n, &head->flist, link) {
+		list_del(&f->link);
+		basic_delete_filter(tp, f);
+	}
+}
+
+static int basic_delete(struct tcf_proto *tp, unsigned long arg)
+{
+	struct basic_head *head = (struct basic_head *) tp->root;
+	struct basic_filter *t, *f = (struct basic_filter *) arg;
+
+	list_for_each_entry(t, &head->flist, link)
+		if (t == f) {
+			tcf_tree_lock(tp);
+			list_del(&t->link);
+			tcf_tree_unlock(tp);
+			basic_delete_filter(tp, t);
+			return 0;
+		}
+
+	return -ENOENT;
+}
+
+static inline int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f,
+				  unsigned long base, struct rtattr **tb,
+				  struct rtattr *est)
+{
+	int err = -EINVAL;
+	struct tcf_exts e;
+	struct tcf_ematch_tree t;
+
+	if (tb[TCA_BASIC_CLASSID-1])
+		if (RTA_PAYLOAD(tb[TCA_BASIC_CLASSID-1]) < sizeof(u32))
+			return err;
+
+	err = tcf_exts_validate(tp, tb, est, &e, &basic_ext_map);
+	if (err < 0)
+		return err;
+
+	err = tcf_em_tree_validate(tp, tb[TCA_BASIC_EMATCHES-1], &t);
+	if (err < 0)
+		goto errout;
+
+	if (tb[TCA_BASIC_CLASSID-1]) {
+		f->res.classid = *(u32*)RTA_DATA(tb[TCA_BASIC_CLASSID-1]);
+		tcf_bind_filter(tp, &f->res, base);
+	}
+
+	tcf_exts_change(tp, &f->exts, &e);
+	tcf_em_tree_change(tp, &f->ematches, &t);
+
+	return 0;
+errout:
+	tcf_exts_destroy(tp, &e);
+	return err;
+}
+
+static int basic_change(struct tcf_proto *tp, unsigned long base, u32 handle,
+		        struct rtattr **tca, unsigned long *arg)
+{
+	int err = -EINVAL;
+	struct basic_head *head = (struct basic_head *) tp->root;
+	struct rtattr *tb[TCA_BASIC_MAX];
+	struct basic_filter *f = (struct basic_filter *) *arg;
+
+	if (tca[TCA_OPTIONS-1] == NULL)
+		return -EINVAL;
+
+	if (rtattr_parse_nested(tb, TCA_BASIC_MAX, tca[TCA_OPTIONS-1]) < 0)
+		return -EINVAL;
+
+	if (f != NULL) {
+		if (handle && f->handle != handle)
+			return -EINVAL;
+		return basic_set_parms(tp, f, base, tb, tca[TCA_RATE-1]);
+	}
+
+	err = -ENOBUFS;
+	if (head == NULL) {
+		head = kmalloc(sizeof(*head), GFP_KERNEL);
+		if (head == NULL)
+			goto errout;
+
+		memset(head, 0, sizeof(*head));
+		INIT_LIST_HEAD(&head->flist);
+		tp->root = head;
+	}
+
+	f = kmalloc(sizeof(*f), GFP_KERNEL);
+	if (f == NULL)
+		goto errout;
+	memset(f, 0, sizeof(*f));
+
+	err = -EINVAL;
+	if (handle)
+		f->handle = handle;
+	else {
+		int i = 0x80000000;
+		do {
+			if (++head->hgenerator == 0x7FFFFFFF)
+				head->hgenerator = 1;
+		} while (--i > 0 && basic_get(tp, head->hgenerator));
+
+		if (i <= 0) {
+			printk(KERN_ERR "Insufficient number of handles\n");
+			goto errout;
+		}
+
+		f->handle = head->hgenerator;
+	}
+
+	err = basic_set_parms(tp, f, base, tb, tca[TCA_RATE-1]);
+	if (err < 0)
+		goto errout;
+
+	tcf_tree_lock(tp);
+	list_add(&f->link, &head->flist);
+	tcf_tree_unlock(tp);
+	*arg = (unsigned long) f;
+
+	return 0;
+errout:
+	if (*arg == 0UL && f)
+		kfree(f);
+
+	return err;
+}
+
+static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+	struct basic_head *head = (struct basic_head *) tp->root;
+	struct basic_filter *f;
+
+	list_for_each_entry(f, &head->flist, link) {
+		if (arg->count < arg->skip)
+			goto skip;
+
+		if (arg->fn(tp, (unsigned long) f, arg) < 0) {
+			arg->stop = 1;
+			break;
+		}
+skip:
+		arg->count++;
+	}
+}
+
+static int basic_dump(struct tcf_proto *tp, unsigned long fh,
+		      struct sk_buff *skb, struct tcmsg *t)
+{
+	struct basic_filter *f = (struct basic_filter *) fh;
+	unsigned char *b = skb->tail;
+	struct rtattr *rta;
+
+	if (f == NULL)
+		return skb->len;
+
+	t->tcm_handle = f->handle;
+
+	rta = (struct rtattr *) b;
+	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+
+	if (tcf_exts_dump(skb, &f->exts, &basic_ext_map) < 0 ||
+	    tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0)
+		goto rtattr_failure;
+
+	rta->rta_len = (skb->tail - b);
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static struct tcf_proto_ops cls_basic_ops = {
+	.kind		=	"basic",
+	.classify	=	basic_classify,
+	.init		=	basic_init,
+	.destroy	=	basic_destroy,
+	.get		=	basic_get,
+	.put		=	basic_put,
+	.change		=	basic_change,
+	.delete		=	basic_delete,
+	.walk		=	basic_walk,
+	.dump		=	basic_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init init_basic(void)
+{
+	return register_tcf_proto_ops(&cls_basic_ops);
+}
+
+static void __exit exit_basic(void) 
+{
+	unregister_tcf_proto_ops(&cls_basic_ops);
+}
+
+module_init(init_basic)
+module_exit(exit_basic)
+MODULE_LICENSE("GPL");
+
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
new file mode 100644
index 00000000000..fdfc83af3d1
--- /dev/null
+++ b/net/sched/cls_fw.c
@@ -0,0 +1,378 @@
+/*
+ * net/sched/cls_fw.c	Classifier mapping ipchains' fwmark to traffic class.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * Changes:
+ * Karlis Peisenieks <karlis@mt.lv> : 990415 : fw_walk off by one
+ * Karlis Peisenieks <karlis@mt.lv> : 990415 : fw_delete killed all the filter (and kernel).
+ * Alex <alex@pilotsoft.com> : 2004xxyy: Added Action extension
+ *
+ * JHS: We should remove the CONFIG_NET_CLS_IND from here
+ * eventually when the meta match extension is made available
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <linux/netfilter.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/act_api.h>
+#include <net/pkt_cls.h>
+
+struct fw_head
+{
+	struct fw_filter *ht[256];
+};
+
+struct fw_filter
+{
+	struct fw_filter	*next;
+	u32			id;
+	struct tcf_result	res;
+#ifdef CONFIG_NET_CLS_IND
+	char			indev[IFNAMSIZ];
+#endif /* CONFIG_NET_CLS_IND */
+	struct tcf_exts		exts;
+};
+
+static struct tcf_ext_map fw_ext_map = {
+	.action = TCA_FW_ACT,
+	.police = TCA_FW_POLICE
+};
+
+static __inline__ int fw_hash(u32 handle)
+{
+	return handle&0xFF;
+}
+
+static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp,
+			  struct tcf_result *res)
+{
+	struct fw_head *head = (struct fw_head*)tp->root;
+	struct fw_filter *f;
+	int r;
+#ifdef CONFIG_NETFILTER
+	u32 id = skb->nfmark;
+#else
+	u32 id = 0;
+#endif
+
+	if (head != NULL) {
+		for (f=head->ht[fw_hash(id)]; f; f=f->next) {
+			if (f->id == id) {
+				*res = f->res;
+#ifdef CONFIG_NET_CLS_IND
+				if (!tcf_match_indev(skb, f->indev))
+					continue;
+#endif /* CONFIG_NET_CLS_IND */
+				r = tcf_exts_exec(skb, &f->exts, res);
+				if (r < 0)
+					continue;
+
+				return r;
+			}
+		}
+	} else {
+		/* old method */
+		if (id && (TC_H_MAJ(id) == 0 || !(TC_H_MAJ(id^tp->q->handle)))) {
+			res->classid = id;
+			res->class = 0;
+			return 0;
+		}
+	}
+
+	return -1;
+}
+
+static unsigned long fw_get(struct tcf_proto *tp, u32 handle)
+{
+	struct fw_head *head = (struct fw_head*)tp->root;
+	struct fw_filter *f;
+
+	if (head == NULL)
+		return 0;
+
+	for (f=head->ht[fw_hash(handle)]; f; f=f->next) {
+		if (f->id == handle)
+			return (unsigned long)f;
+	}
+	return 0;
+}
+
+static void fw_put(struct tcf_proto *tp, unsigned long f)
+{
+}
+
+static int fw_init(struct tcf_proto *tp)
+{
+	return 0;
+}
+
+static inline void
+fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f)
+{
+	tcf_unbind_filter(tp, &f->res);
+	tcf_exts_destroy(tp, &f->exts);
+	kfree(f);
+}
+
+static void fw_destroy(struct tcf_proto *tp)
+{
+	struct fw_head *head = (struct fw_head*)xchg(&tp->root, NULL);
+	struct fw_filter *f;
+	int h;
+
+	if (head == NULL)
+		return;
+
+	for (h=0; h<256; h++) {
+		while ((f=head->ht[h]) != NULL) {
+			head->ht[h] = f->next;
+			fw_delete_filter(tp, f);
+		}
+	}
+	kfree(head);
+}
+
+static int fw_delete(struct tcf_proto *tp, unsigned long arg)
+{
+	struct fw_head *head = (struct fw_head*)tp->root;
+	struct fw_filter *f = (struct fw_filter*)arg;
+	struct fw_filter **fp;
+
+	if (head == NULL || f == NULL)
+		goto out;
+
+	for (fp=&head->ht[fw_hash(f->id)]; *fp; fp = &(*fp)->next) {
+		if (*fp == f) {
+			tcf_tree_lock(tp);
+			*fp = f->next;
+			tcf_tree_unlock(tp);
+			fw_delete_filter(tp, f);
+			return 0;
+		}
+	}
+out:
+	return -EINVAL;
+}
+
+static int
+fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f,
+	struct rtattr **tb, struct rtattr **tca, unsigned long base)
+{
+	struct tcf_exts e;
+	int err;
+
+	err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &fw_ext_map);
+	if (err < 0)
+		return err;
+
+	err = -EINVAL;
+	if (tb[TCA_FW_CLASSID-1]) {
+		if (RTA_PAYLOAD(tb[TCA_FW_CLASSID-1]) != sizeof(u32))
+			goto errout;
+		f->res.classid = *(u32*)RTA_DATA(tb[TCA_FW_CLASSID-1]);
+		tcf_bind_filter(tp, &f->res, base);
+	}
+
+#ifdef CONFIG_NET_CLS_IND
+	if (tb[TCA_FW_INDEV-1]) {
+		err = tcf_change_indev(tp, f->indev, tb[TCA_FW_INDEV-1]);
+		if (err < 0)
+			goto errout;
+	}
+#endif /* CONFIG_NET_CLS_IND */
+
+	tcf_exts_change(tp, &f->exts, &e);
+
+	return 0;
+errout:
+	tcf_exts_destroy(tp, &e);
+	return err;
+}
+
+static int fw_change(struct tcf_proto *tp, unsigned long base,
+		     u32 handle,
+		     struct rtattr **tca,
+		     unsigned long *arg)
+{
+	struct fw_head *head = (struct fw_head*)tp->root;
+	struct fw_filter *f = (struct fw_filter *) *arg;
+	struct rtattr *opt = tca[TCA_OPTIONS-1];
+	struct rtattr *tb[TCA_FW_MAX];
+	int err;
+
+	if (!opt)
+		return handle ? -EINVAL : 0;
+
+	if (rtattr_parse_nested(tb, TCA_FW_MAX, opt) < 0)
+		return -EINVAL;
+
+	if (f != NULL) {
+		if (f->id != handle && handle)
+			return -EINVAL;
+		return fw_change_attrs(tp, f, tb, tca, base);
+	}
+
+	if (!handle)
+		return -EINVAL;
+
+	if (head == NULL) {
+		head = kmalloc(sizeof(struct fw_head), GFP_KERNEL);
+		if (head == NULL)
+			return -ENOBUFS;
+		memset(head, 0, sizeof(*head));
+
+		tcf_tree_lock(tp);
+		tp->root = head;
+		tcf_tree_unlock(tp);
+	}
+
+	f = kmalloc(sizeof(struct fw_filter), GFP_KERNEL);
+	if (f == NULL)
+		return -ENOBUFS;
+	memset(f, 0, sizeof(*f));
+
+	f->id = handle;
+
+	err = fw_change_attrs(tp, f, tb, tca, base);
+	if (err < 0)
+		goto errout;
+
+	f->next = head->ht[fw_hash(handle)];
+	tcf_tree_lock(tp);
+	head->ht[fw_hash(handle)] = f;
+	tcf_tree_unlock(tp);
+
+	*arg = (unsigned long)f;
+	return 0;
+
+errout:
+	if (f)
+		kfree(f);
+	return err;
+}
+
+static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+	struct fw_head *head = (struct fw_head*)tp->root;
+	int h;
+
+	if (head == NULL)
+		arg->stop = 1;
+
+	if (arg->stop)
+		return;
+
+	for (h = 0; h < 256; h++) {
+		struct fw_filter *f;
+
+		for (f = head->ht[h]; f; f = f->next) {
+			if (arg->count < arg->skip) {
+				arg->count++;
+				continue;
+			}
+			if (arg->fn(tp, (unsigned long)f, arg) < 0) {
+				arg->stop = 1;
+				return;
+			}
+			arg->count++;
+		}
+	}
+}
+
+static int fw_dump(struct tcf_proto *tp, unsigned long fh,
+		   struct sk_buff *skb, struct tcmsg *t)
+{
+	struct fw_filter *f = (struct fw_filter*)fh;
+	unsigned char	 *b = skb->tail;
+	struct rtattr *rta;
+
+	if (f == NULL)
+		return skb->len;
+
+	t->tcm_handle = f->id;
+
+	if (!f->res.classid && !tcf_exts_is_available(&f->exts))
+		return skb->len;
+
+	rta = (struct rtattr*)b;
+	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+
+	if (f->res.classid)
+		RTA_PUT(skb, TCA_FW_CLASSID, 4, &f->res.classid);
+#ifdef CONFIG_NET_CLS_IND
+	if (strlen(f->indev))
+		RTA_PUT(skb, TCA_FW_INDEV, IFNAMSIZ, f->indev);
+#endif /* CONFIG_NET_CLS_IND */
+
+	if (tcf_exts_dump(skb, &f->exts, &fw_ext_map) < 0)
+		goto rtattr_failure;
+
+	rta->rta_len = skb->tail - b;
+
+	if (tcf_exts_dump_stats(skb, &f->exts, &fw_ext_map) < 0)
+		goto rtattr_failure;
+
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static struct tcf_proto_ops cls_fw_ops = {
+	.next		=	NULL,
+	.kind		=	"fw",
+	.classify	=	fw_classify,
+	.init		=	fw_init,
+	.destroy	=	fw_destroy,
+	.get		=	fw_get,
+	.put		=	fw_put,
+	.change		=	fw_change,
+	.delete		=	fw_delete,
+	.walk		=	fw_walk,
+	.dump		=	fw_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init init_fw(void)
+{
+	return register_tcf_proto_ops(&cls_fw_ops);
+}
+
+static void __exit exit_fw(void) 
+{
+	unregister_tcf_proto_ops(&cls_fw_ops);
+}
+
+module_init(init_fw)
+module_exit(exit_fw)
+MODULE_LICENSE("GPL");
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
new file mode 100644
index 00000000000..02996ac05c7
--- /dev/null
+++ b/net/sched/cls_route.c
@@ -0,0 +1,639 @@
+/*
+ * net/sched/cls_route.c	ROUTE4 classifier.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+
+#include <linux/module.h>
+#include <linux/config.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/act_api.h>
+#include <net/pkt_cls.h>
+
+/*
+   1. For now we assume that route tags < 256.
+      It allows to use direct table lookups, instead of hash tables.
+   2. For now we assume that "from TAG" and "fromdev DEV" statements
+      are mutually  exclusive.
+   3. "to TAG from ANY" has higher priority, than "to ANY from XXX"
+ */
+
+struct route4_fastmap
+{
+	struct route4_filter	*filter;
+	u32			id;
+	int			iif;
+};
+
+struct route4_head
+{
+	struct route4_fastmap	fastmap[16];
+	struct route4_bucket	*table[256+1];
+};
+
+struct route4_bucket
+{
+	/* 16 FROM buckets + 16 IIF buckets + 1 wildcard bucket */
+	struct route4_filter	*ht[16+16+1];
+};
+
+struct route4_filter
+{
+	struct route4_filter	*next;
+	u32			id;
+	int			iif;
+
+	struct tcf_result	res;
+	struct tcf_exts		exts;
+	u32			handle;
+	struct route4_bucket	*bkt;
+};
+
+#define ROUTE4_FAILURE ((struct route4_filter*)(-1L))
+
+static struct tcf_ext_map route_ext_map = {
+	.police = TCA_ROUTE4_POLICE,
+	.action = TCA_ROUTE4_ACT
+};
+
+static __inline__ int route4_fastmap_hash(u32 id, int iif)
+{
+	return id&0xF;
+}
+
+static inline
+void route4_reset_fastmap(struct net_device *dev, struct route4_head *head, u32 id)
+{
+	spin_lock_bh(&dev->queue_lock);
+	memset(head->fastmap, 0, sizeof(head->fastmap));
+	spin_unlock_bh(&dev->queue_lock);
+}
+
+static void __inline__
+route4_set_fastmap(struct route4_head *head, u32 id, int iif,
+		   struct route4_filter *f)
+{
+	int h = route4_fastmap_hash(id, iif);
+	head->fastmap[h].id = id;
+	head->fastmap[h].iif = iif;
+	head->fastmap[h].filter = f;
+}
+
+static __inline__ int route4_hash_to(u32 id)
+{
+	return id&0xFF;
+}
+
+static __inline__ int route4_hash_from(u32 id)
+{
+	return (id>>16)&0xF;
+}
+
+static __inline__ int route4_hash_iif(int iif)
+{
+	return 16 + ((iif>>16)&0xF);
+}
+
+static __inline__ int route4_hash_wild(void)
+{
+	return 32;
+}
+
+#define ROUTE4_APPLY_RESULT()					\
+{								\
+	*res = f->res;						\
+	if (tcf_exts_is_available(&f->exts)) {			\
+		int r = tcf_exts_exec(skb, &f->exts, res);	\
+		if (r < 0) {					\
+			dont_cache = 1;				\
+			continue;				\
+		}						\
+		return r;					\
+	} else if (!dont_cache)					\
+		route4_set_fastmap(head, id, iif, f);		\
+	return 0;						\
+}
+
+static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp,
+			   struct tcf_result *res)
+{
+	struct route4_head *head = (struct route4_head*)tp->root;
+	struct dst_entry *dst;
+	struct route4_bucket *b;
+	struct route4_filter *f;
+	u32 id, h;
+	int iif, dont_cache = 0;
+
+	if ((dst = skb->dst) == NULL)
+		goto failure;
+
+	id = dst->tclassid;
+	if (head == NULL)
+		goto old_method;
+
+	iif = ((struct rtable*)dst)->fl.iif;
+
+	h = route4_fastmap_hash(id, iif);
+	if (id == head->fastmap[h].id &&
+	    iif == head->fastmap[h].iif &&
+	    (f = head->fastmap[h].filter) != NULL) {
+		if (f == ROUTE4_FAILURE)
+			goto failure;
+
+		*res = f->res;
+		return 0;
+	}
+
+	h = route4_hash_to(id);
+
+restart:
+	if ((b = head->table[h]) != NULL) {
+		for (f = b->ht[route4_hash_from(id)]; f; f = f->next)
+			if (f->id == id)
+				ROUTE4_APPLY_RESULT();
+
+		for (f = b->ht[route4_hash_iif(iif)]; f; f = f->next)
+			if (f->iif == iif)
+				ROUTE4_APPLY_RESULT();
+
+		for (f = b->ht[route4_hash_wild()]; f; f = f->next)
+			ROUTE4_APPLY_RESULT();
+
+	}
+	if (h < 256) {
+		h = 256;
+		id &= ~0xFFFF;
+		goto restart;
+	}
+
+	if (!dont_cache)
+		route4_set_fastmap(head, id, iif, ROUTE4_FAILURE);
+failure:
+	return -1;
+
+old_method:
+	if (id && (TC_H_MAJ(id) == 0 ||
+		   !(TC_H_MAJ(id^tp->q->handle)))) {
+		res->classid = id;
+		res->class = 0;
+		return 0;
+	}
+	return -1;
+}
+
+static inline u32 to_hash(u32 id)
+{
+	u32 h = id&0xFF;
+	if (id&0x8000)
+		h += 256;
+	return h;
+}
+
+static inline u32 from_hash(u32 id)
+{
+	id &= 0xFFFF;
+	if (id == 0xFFFF)
+		return 32;
+	if (!(id & 0x8000)) {
+		if (id > 255)
+			return 256;
+		return id&0xF;
+	}
+	return 16 + (id&0xF);
+}
+
+static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
+{
+	struct route4_head *head = (struct route4_head*)tp->root;
+	struct route4_bucket *b;
+	struct route4_filter *f;
+	unsigned h1, h2;
+
+	if (!head)
+		return 0;
+
+	h1 = to_hash(handle);
+	if (h1 > 256)
+		return 0;
+
+	h2 = from_hash(handle>>16);
+	if (h2 > 32)
+		return 0;
+
+	if ((b = head->table[h1]) != NULL) {
+		for (f = b->ht[h2]; f; f = f->next)
+			if (f->handle == handle)
+				return (unsigned long)f;
+	}
+	return 0;
+}
+
+static void route4_put(struct tcf_proto *tp, unsigned long f)
+{
+}
+
+static int route4_init(struct tcf_proto *tp)
+{
+	return 0;
+}
+
+static inline void
+route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f)
+{
+	tcf_unbind_filter(tp, &f->res);
+	tcf_exts_destroy(tp, &f->exts);
+	kfree(f);
+}
+
+static void route4_destroy(struct tcf_proto *tp)
+{
+	struct route4_head *head = xchg(&tp->root, NULL);
+	int h1, h2;
+
+	if (head == NULL)
+		return;
+
+	for (h1=0; h1<=256; h1++) {
+		struct route4_bucket *b;
+
+		if ((b = head->table[h1]) != NULL) {
+			for (h2=0; h2<=32; h2++) {
+				struct route4_filter *f;
+
+				while ((f = b->ht[h2]) != NULL) {
+					b->ht[h2] = f->next;
+					route4_delete_filter(tp, f);
+				}
+			}
+			kfree(b);
+		}
+	}
+	kfree(head);
+}
+
+static int route4_delete(struct tcf_proto *tp, unsigned long arg)
+{
+	struct route4_head *head = (struct route4_head*)tp->root;
+	struct route4_filter **fp, *f = (struct route4_filter*)arg;
+	unsigned h = 0;
+	struct route4_bucket *b;
+	int i;
+
+	if (!head || !f)
+		return -EINVAL;
+
+	h = f->handle;
+	b = f->bkt;
+
+	for (fp = &b->ht[from_hash(h>>16)]; *fp; fp = &(*fp)->next) {
+		if (*fp == f) {
+			tcf_tree_lock(tp);
+			*fp = f->next;
+			tcf_tree_unlock(tp);
+
+			route4_reset_fastmap(tp->q->dev, head, f->id);
+			route4_delete_filter(tp, f);
+
+			/* Strip tree */
+
+			for (i=0; i<=32; i++)
+				if (b->ht[i])
+					return 0;
+
+			/* OK, session has no flows */
+			tcf_tree_lock(tp);
+			head->table[to_hash(h)] = NULL;
+			tcf_tree_unlock(tp);
+
+			kfree(b);
+			return 0;
+		}
+	}
+	return 0;
+}
+
+static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
+	struct route4_filter *f, u32 handle, struct route4_head *head,
+	struct rtattr **tb, struct rtattr *est, int new)
+{
+	int err;
+	u32 id = 0, to = 0, nhandle = 0x8000;
+	struct route4_filter *fp;
+	unsigned int h1;
+	struct route4_bucket *b;
+	struct tcf_exts e;
+
+	err = tcf_exts_validate(tp, tb, est, &e, &route_ext_map);
+	if (err < 0)
+		return err;
+
+	err = -EINVAL;
+	if (tb[TCA_ROUTE4_CLASSID-1])
+		if (RTA_PAYLOAD(tb[TCA_ROUTE4_CLASSID-1]) < sizeof(u32))
+			goto errout;
+
+	if (tb[TCA_ROUTE4_TO-1]) {
+		if (new && handle & 0x8000)
+			goto errout;
+		if (RTA_PAYLOAD(tb[TCA_ROUTE4_TO-1]) < sizeof(u32))
+			goto errout;
+		to = *(u32*)RTA_DATA(tb[TCA_ROUTE4_TO-1]);
+		if (to > 0xFF)
+			goto errout;
+		nhandle = to;
+	}
+
+	if (tb[TCA_ROUTE4_FROM-1]) {
+		if (tb[TCA_ROUTE4_IIF-1])
+			goto errout;
+		if (RTA_PAYLOAD(tb[TCA_ROUTE4_FROM-1]) < sizeof(u32))
+			goto errout;
+		id = *(u32*)RTA_DATA(tb[TCA_ROUTE4_FROM-1]);
+		if (id > 0xFF)
+			goto errout;
+		nhandle |= id << 16;
+	} else if (tb[TCA_ROUTE4_IIF-1]) {
+		if (RTA_PAYLOAD(tb[TCA_ROUTE4_IIF-1]) < sizeof(u32))
+			goto errout;
+		id = *(u32*)RTA_DATA(tb[TCA_ROUTE4_IIF-1]);
+		if (id > 0x7FFF)
+			goto errout;
+		nhandle |= (id | 0x8000) << 16;
+	} else
+		nhandle |= 0xFFFF << 16;
+
+	if (handle && new) {
+		nhandle |= handle & 0x7F00;
+		if (nhandle != handle)
+			goto errout;
+	}
+
+	h1 = to_hash(nhandle);
+	if ((b = head->table[h1]) == NULL) {
+		err = -ENOBUFS;
+		b = kmalloc(sizeof(struct route4_bucket), GFP_KERNEL);
+		if (b == NULL)
+			goto errout;
+		memset(b, 0, sizeof(*b));
+
+		tcf_tree_lock(tp);
+		head->table[h1] = b;
+		tcf_tree_unlock(tp);
+	} else {
+		unsigned int h2 = from_hash(nhandle >> 16);
+		err = -EEXIST;
+		for (fp = b->ht[h2]; fp; fp = fp->next)
+			if (fp->handle == f->handle)
+				goto errout;
+	}
+
+	tcf_tree_lock(tp);
+	if (tb[TCA_ROUTE4_TO-1])
+		f->id = to;
+
+	if (tb[TCA_ROUTE4_FROM-1])
+		f->id = to | id<<16;
+	else if (tb[TCA_ROUTE4_IIF-1])
+		f->iif = id;
+
+	f->handle = nhandle;
+	f->bkt = b;
+	tcf_tree_unlock(tp);
+
+	if (tb[TCA_ROUTE4_CLASSID-1]) {
+		f->res.classid = *(u32*)RTA_DATA(tb[TCA_ROUTE4_CLASSID-1]);
+		tcf_bind_filter(tp, &f->res, base);
+	}
+
+	tcf_exts_change(tp, &f->exts, &e);
+
+	return 0;
+errout:
+	tcf_exts_destroy(tp, &e);
+	return err;
+}
+
+static int route4_change(struct tcf_proto *tp, unsigned long base,
+		       u32 handle,
+		       struct rtattr **tca,
+		       unsigned long *arg)
+{
+	struct route4_head *head = tp->root;
+	struct route4_filter *f, *f1, **fp;
+	struct route4_bucket *b;
+	struct rtattr *opt = tca[TCA_OPTIONS-1];
+	struct rtattr *tb[TCA_ROUTE4_MAX];
+	unsigned int h, th;
+	u32 old_handle = 0;
+	int err;
+
+	if (opt == NULL)
+		return handle ? -EINVAL : 0;
+
+	if (rtattr_parse_nested(tb, TCA_ROUTE4_MAX, opt) < 0)
+		return -EINVAL;
+
+	if ((f = (struct route4_filter*)*arg) != NULL) {
+		if (f->handle != handle && handle)
+			return -EINVAL;
+
+		if (f->bkt)
+			old_handle = f->handle;
+
+		err = route4_set_parms(tp, base, f, handle, head, tb,
+			tca[TCA_RATE-1], 0);
+		if (err < 0)
+			return err;
+
+		goto reinsert;
+	}
+
+	err = -ENOBUFS;
+	if (head == NULL) {
+		head = kmalloc(sizeof(struct route4_head), GFP_KERNEL);
+		if (head == NULL)
+			goto errout;
+		memset(head, 0, sizeof(struct route4_head));
+
+		tcf_tree_lock(tp);
+		tp->root = head;
+		tcf_tree_unlock(tp);
+	}
+
+	f = kmalloc(sizeof(struct route4_filter), GFP_KERNEL);
+	if (f == NULL)
+		goto errout;
+	memset(f, 0, sizeof(*f));
+
+	err = route4_set_parms(tp, base, f, handle, head, tb,
+		tca[TCA_RATE-1], 1);
+	if (err < 0)
+		goto errout;
+
+reinsert:
+	h = from_hash(f->handle >> 16);
+	for (fp = &f->bkt->ht[h]; (f1=*fp) != NULL; fp = &f1->next)
+		if (f->handle < f1->handle)
+			break;
+
+	f->next = f1;
+	tcf_tree_lock(tp);
+	*fp = f;
+
+	if (old_handle && f->handle != old_handle) {
+		th = to_hash(old_handle);
+		h = from_hash(old_handle >> 16);
+		if ((b = head->table[th]) != NULL) {
+			for (fp = &b->ht[h]; *fp; fp = &(*fp)->next) {
+				if (*fp == f) {
+					*fp = f->next;
+					break;
+				}
+			}
+		}
+	}
+	tcf_tree_unlock(tp);
+
+	route4_reset_fastmap(tp->q->dev, head, f->id);
+	*arg = (unsigned long)f;
+	return 0;
+
+errout:
+	if (f)
+		kfree(f);
+	return err;
+}
+
+static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+	struct route4_head *head = tp->root;
+	unsigned h, h1;
+
+	if (head == NULL)
+		arg->stop = 1;
+
+	if (arg->stop)
+		return;
+
+	for (h = 0; h <= 256; h++) {
+		struct route4_bucket *b = head->table[h];
+
+		if (b) {
+			for (h1 = 0; h1 <= 32; h1++) {
+				struct route4_filter *f;
+
+				for (f = b->ht[h1]; f; f = f->next) {
+					if (arg->count < arg->skip) {
+						arg->count++;
+						continue;
+					}
+					if (arg->fn(tp, (unsigned long)f, arg) < 0) {
+						arg->stop = 1;
+						return;
+					}
+					arg->count++;
+				}
+			}
+		}
+	}
+}
+
+static int route4_dump(struct tcf_proto *tp, unsigned long fh,
+		       struct sk_buff *skb, struct tcmsg *t)
+{
+	struct route4_filter *f = (struct route4_filter*)fh;
+	unsigned char	 *b = skb->tail;
+	struct rtattr *rta;
+	u32 id;
+
+	if (f == NULL)
+		return skb->len;
+
+	t->tcm_handle = f->handle;
+
+	rta = (struct rtattr*)b;
+	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+
+	if (!(f->handle&0x8000)) {
+		id = f->id&0xFF;
+		RTA_PUT(skb, TCA_ROUTE4_TO, sizeof(id), &id);
+	}
+	if (f->handle&0x80000000) {
+		if ((f->handle>>16) != 0xFFFF)
+			RTA_PUT(skb, TCA_ROUTE4_IIF, sizeof(f->iif), &f->iif);
+	} else {
+		id = f->id>>16;
+		RTA_PUT(skb, TCA_ROUTE4_FROM, sizeof(id), &id);
+	}
+	if (f->res.classid)
+		RTA_PUT(skb, TCA_ROUTE4_CLASSID, 4, &f->res.classid);
+
+	if (tcf_exts_dump(skb, &f->exts, &route_ext_map) < 0)
+		goto rtattr_failure;
+
+	rta->rta_len = skb->tail - b;
+
+	if (tcf_exts_dump_stats(skb, &f->exts, &route_ext_map) < 0)
+		goto rtattr_failure;
+
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static struct tcf_proto_ops cls_route4_ops = {
+	.next		=	NULL,
+	.kind		=	"route",
+	.classify	=	route4_classify,
+	.init		=	route4_init,
+	.destroy	=	route4_destroy,
+	.get		=	route4_get,
+	.put		=	route4_put,
+	.change		=	route4_change,
+	.delete		=	route4_delete,
+	.walk		=	route4_walk,
+	.dump		=	route4_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init init_route4(void)
+{
+	return register_tcf_proto_ops(&cls_route4_ops);
+}
+
+static void __exit exit_route4(void)
+{
+	unregister_tcf_proto_ops(&cls_route4_ops);
+}
+
+module_init(init_route4)
+module_exit(exit_route4)
+MODULE_LICENSE("GPL");
diff --git a/net/sched/cls_rsvp.c b/net/sched/cls_rsvp.c
new file mode 100644
index 00000000000..ad2613790d8
--- /dev/null
+++ b/net/sched/cls_rsvp.c
@@ -0,0 +1,43 @@
+/*
+ * net/sched/cls_rsvp.c	Special RSVP packet classifier for IPv4.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/act_api.h>
+#include <net/pkt_cls.h>
+
+#define RSVP_DST_LEN	1
+#define RSVP_ID		"rsvp"
+#define RSVP_OPS	cls_rsvp_ops
+
+#include "cls_rsvp.h"
+MODULE_LICENSE("GPL");
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
new file mode 100644
index 00000000000..232fb919681
--- /dev/null
+++ b/net/sched/cls_rsvp.h
@@ -0,0 +1,667 @@
+/*
+ * net/sched/cls_rsvp.h	Template file for RSVPv[46] classifiers.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+
+/*
+   Comparing to general packet classification problem,
+   RSVP needs only sevaral relatively simple rules:
+
+   * (dst, protocol) are always specified,
+     so that we are able to hash them.
+   * src may be exact, or may be wildcard, so that
+     we can keep a hash table plus one wildcard entry.
+   * source port (or flow label) is important only if src is given.
+
+   IMPLEMENTATION.
+
+   We use a two level hash table: The top level is keyed by
+   destination address and protocol ID, every bucket contains a list
+   of "rsvp sessions", identified by destination address, protocol and
+   DPI(="Destination Port ID"): triple (key, mask, offset).
+
+   Every bucket has a smaller hash table keyed by source address
+   (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
+   Every bucket is again a list of "RSVP flows", selected by
+   source address and SPI(="Source Port ID" here rather than
+   "security parameter index"): triple (key, mask, offset).
+
+
+   NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
+   and all fragmented packets go to the best-effort traffic class.
+
+
+   NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
+   only one "Generalized Port Identifier". So that for classic
+   ah, esp (and udp,tcp) both *pi should coincide or one of them
+   should be wildcard.
+
+   At first sight, this redundancy is just a waste of CPU
+   resources. But DPI and SPI add the possibility to assign different
+   priorities to GPIs. Look also at note 4 about tunnels below.
+
+
+   NOTE 3. One complication is the case of tunneled packets.
+   We implement it as following: if the first lookup
+   matches a special session with "tunnelhdr" value not zero,
+   flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
+   In this case, we pull tunnelhdr bytes and restart lookup
+   with tunnel ID added to the list of keys. Simple and stupid 8)8)
+   It's enough for PIMREG and IPIP.
+
+
+   NOTE 4. Two GPIs make it possible to parse even GRE packets.
+   F.e. DPI can select ETH_P_IP (and necessary flags to make
+   tunnelhdr correct) in GRE protocol field and SPI matches
+   GRE key. Is it not nice? 8)8)
+
+
+   Well, as result, despite its simplicity, we get a pretty
+   powerful classification engine.  */
+
+#include <linux/config.h>
+
+struct rsvp_head
+{
+	u32			tmap[256/32];
+	u32			hgenerator;
+	u8			tgenerator;
+	struct rsvp_session	*ht[256];
+};
+
+struct rsvp_session
+{
+	struct rsvp_session	*next;
+	u32			dst[RSVP_DST_LEN];
+	struct tc_rsvp_gpi 	dpi;
+	u8			protocol;
+	u8			tunnelid;
+	/* 16 (src,sport) hash slots, and one wildcard source slot */
+	struct rsvp_filter	*ht[16+1];
+};
+
+
+struct rsvp_filter
+{
+	struct rsvp_filter	*next;
+	u32			src[RSVP_DST_LEN];
+	struct tc_rsvp_gpi	spi;
+	u8			tunnelhdr;
+
+	struct tcf_result	res;
+	struct tcf_exts		exts;
+
+	u32			handle;
+	struct rsvp_session	*sess;
+};
+
+static __inline__ unsigned hash_dst(u32 *dst, u8 protocol, u8 tunnelid)
+{
+	unsigned h = dst[RSVP_DST_LEN-1];
+	h ^= h>>16;
+	h ^= h>>8;
+	return (h ^ protocol ^ tunnelid) & 0xFF;
+}
+
+static __inline__ unsigned hash_src(u32 *src)
+{
+	unsigned h = src[RSVP_DST_LEN-1];
+	h ^= h>>16;
+	h ^= h>>8;
+	h ^= h>>4;
+	return h & 0xF;
+}
+
+static struct tcf_ext_map rsvp_ext_map = {
+	.police = TCA_RSVP_POLICE,
+	.action = TCA_RSVP_ACT
+};
+
+#define RSVP_APPLY_RESULT()				\
+{							\
+	int r = tcf_exts_exec(skb, &f->exts, res);	\
+	if (r < 0)					\
+		continue;				\
+	else if (r > 0)					\
+		return r;				\
+}
+	
+static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
+			 struct tcf_result *res)
+{
+	struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
+	struct rsvp_session *s;
+	struct rsvp_filter *f;
+	unsigned h1, h2;
+	u32 *dst, *src;
+	u8 protocol;
+	u8 tunnelid = 0;
+	u8 *xprt;
+#if RSVP_DST_LEN == 4
+	struct ipv6hdr *nhptr = skb->nh.ipv6h;
+#else
+	struct iphdr *nhptr = skb->nh.iph;
+#endif
+
+restart:
+
+#if RSVP_DST_LEN == 4
+	src = &nhptr->saddr.s6_addr32[0];
+	dst = &nhptr->daddr.s6_addr32[0];
+	protocol = nhptr->nexthdr;
+	xprt = ((u8*)nhptr) + sizeof(struct ipv6hdr);
+#else
+	src = &nhptr->saddr;
+	dst = &nhptr->daddr;
+	protocol = nhptr->protocol;
+	xprt = ((u8*)nhptr) + (nhptr->ihl<<2);
+	if (nhptr->frag_off&__constant_htons(IP_MF|IP_OFFSET))
+		return -1;
+#endif
+
+	h1 = hash_dst(dst, protocol, tunnelid);
+	h2 = hash_src(src);
+
+	for (s = sht[h1]; s; s = s->next) {
+		if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
+		    protocol == s->protocol &&
+		    !(s->dpi.mask & (*(u32*)(xprt+s->dpi.offset)^s->dpi.key))
+#if RSVP_DST_LEN == 4
+		    && dst[0] == s->dst[0]
+		    && dst[1] == s->dst[1]
+		    && dst[2] == s->dst[2]
+#endif
+		    && tunnelid == s->tunnelid) {
+
+			for (f = s->ht[h2]; f; f = f->next) {
+				if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN-1] &&
+				    !(f->spi.mask & (*(u32*)(xprt+f->spi.offset)^f->spi.key))
+#if RSVP_DST_LEN == 4
+				    && src[0] == f->src[0]
+				    && src[1] == f->src[1]
+				    && src[2] == f->src[2]
+#endif
+				    ) {
+					*res = f->res;
+					RSVP_APPLY_RESULT();
+
+matched:
+					if (f->tunnelhdr == 0)
+						return 0;
+
+					tunnelid = f->res.classid;
+					nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr));
+					goto restart;
+				}
+			}
+
+			/* And wildcard bucket... */
+			for (f = s->ht[16]; f; f = f->next) {
+				*res = f->res;
+				RSVP_APPLY_RESULT();
+				goto matched;
+			}
+			return -1;
+		}
+	}
+	return -1;
+}
+
+static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
+{
+	struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
+	struct rsvp_session *s;
+	struct rsvp_filter *f;
+	unsigned h1 = handle&0xFF;
+	unsigned h2 = (handle>>8)&0xFF;
+
+	if (h2 > 16)
+		return 0;
+
+	for (s = sht[h1]; s; s = s->next) {
+		for (f = s->ht[h2]; f; f = f->next) {
+			if (f->handle == handle)
+				return (unsigned long)f;
+		}
+	}
+	return 0;
+}
+
+static void rsvp_put(struct tcf_proto *tp, unsigned long f)
+{
+}
+
+static int rsvp_init(struct tcf_proto *tp)
+{
+	struct rsvp_head *data;
+
+	data = kmalloc(sizeof(struct rsvp_head), GFP_KERNEL);
+	if (data) {
+		memset(data, 0, sizeof(struct rsvp_head));
+		tp->root = data;
+		return 0;
+	}
+	return -ENOBUFS;
+}
+
+static inline void
+rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
+{
+	tcf_unbind_filter(tp, &f->res);
+	tcf_exts_destroy(tp, &f->exts);
+	kfree(f);
+}
+
+static void rsvp_destroy(struct tcf_proto *tp)
+{
+	struct rsvp_head *data = xchg(&tp->root, NULL);
+	struct rsvp_session **sht;
+	int h1, h2;
+
+	if (data == NULL)
+		return;
+
+	sht = data->ht;
+
+	for (h1=0; h1<256; h1++) {
+		struct rsvp_session *s;
+
+		while ((s = sht[h1]) != NULL) {
+			sht[h1] = s->next;
+
+			for (h2=0; h2<=16; h2++) {
+				struct rsvp_filter *f;
+
+				while ((f = s->ht[h2]) != NULL) {
+					s->ht[h2] = f->next;
+					rsvp_delete_filter(tp, f);
+				}
+			}
+			kfree(s);
+		}
+	}
+	kfree(data);
+}
+
+static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
+{
+	struct rsvp_filter **fp, *f = (struct rsvp_filter*)arg;
+	unsigned h = f->handle;
+	struct rsvp_session **sp;
+	struct rsvp_session *s = f->sess;
+	int i;
+
+	for (fp = &s->ht[(h>>8)&0xFF]; *fp; fp = &(*fp)->next) {
+		if (*fp == f) {
+			tcf_tree_lock(tp);
+			*fp = f->next;
+			tcf_tree_unlock(tp);
+			rsvp_delete_filter(tp, f);
+
+			/* Strip tree */
+
+			for (i=0; i<=16; i++)
+				if (s->ht[i])
+					return 0;
+
+			/* OK, session has no flows */
+			for (sp = &((struct rsvp_head*)tp->root)->ht[h&0xFF];
+			     *sp; sp = &(*sp)->next) {
+				if (*sp == s) {
+					tcf_tree_lock(tp);
+					*sp = s->next;
+					tcf_tree_unlock(tp);
+
+					kfree(s);
+					return 0;
+				}
+			}
+
+			return 0;
+		}
+	}
+	return 0;
+}
+
+static unsigned gen_handle(struct tcf_proto *tp, unsigned salt)
+{
+	struct rsvp_head *data = tp->root;
+	int i = 0xFFFF;
+
+	while (i-- > 0) {
+		u32 h;
+		if ((data->hgenerator += 0x10000) == 0)
+			data->hgenerator = 0x10000;
+		h = data->hgenerator|salt;
+		if (rsvp_get(tp, h) == 0)
+			return h;
+	}
+	return 0;
+}
+
+static int tunnel_bts(struct rsvp_head *data)
+{
+	int n = data->tgenerator>>5;
+	u32 b = 1<<(data->tgenerator&0x1F);
+	
+	if (data->tmap[n]&b)
+		return 0;
+	data->tmap[n] |= b;
+	return 1;
+}
+
+static void tunnel_recycle(struct rsvp_head *data)
+{
+	struct rsvp_session **sht = data->ht;
+	u32 tmap[256/32];
+	int h1, h2;
+
+	memset(tmap, 0, sizeof(tmap));
+
+	for (h1=0; h1<256; h1++) {
+		struct rsvp_session *s;
+		for (s = sht[h1]; s; s = s->next) {
+			for (h2=0; h2<=16; h2++) {
+				struct rsvp_filter *f;
+
+				for (f = s->ht[h2]; f; f = f->next) {
+					if (f->tunnelhdr == 0)
+						continue;
+					data->tgenerator = f->res.classid;
+					tunnel_bts(data);
+				}
+			}
+		}
+	}
+
+	memcpy(data->tmap, tmap, sizeof(tmap));
+}
+
+static u32 gen_tunnel(struct rsvp_head *data)
+{
+	int i, k;
+
+	for (k=0; k<2; k++) {
+		for (i=255; i>0; i--) {
+			if (++data->tgenerator == 0)
+				data->tgenerator = 1;
+			if (tunnel_bts(data))
+				return data->tgenerator;
+		}
+		tunnel_recycle(data);
+	}
+	return 0;
+}
+
+static int rsvp_change(struct tcf_proto *tp, unsigned long base,
+		       u32 handle,
+		       struct rtattr **tca,
+		       unsigned long *arg)
+{
+	struct rsvp_head *data = tp->root;
+	struct rsvp_filter *f, **fp;
+	struct rsvp_session *s, **sp;
+	struct tc_rsvp_pinfo *pinfo = NULL;
+	struct rtattr *opt = tca[TCA_OPTIONS-1];
+	struct rtattr *tb[TCA_RSVP_MAX];
+	struct tcf_exts e;
+	unsigned h1, h2;
+	u32 *dst;
+	int err;
+
+	if (opt == NULL)
+		return handle ? -EINVAL : 0;
+
+	if (rtattr_parse_nested(tb, TCA_RSVP_MAX, opt) < 0)
+		return -EINVAL;
+
+	err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map);
+	if (err < 0)
+		return err;
+
+	if ((f = (struct rsvp_filter*)*arg) != NULL) {
+		/* Node exists: adjust only classid */
+
+		if (f->handle != handle && handle)
+			goto errout2;
+		if (tb[TCA_RSVP_CLASSID-1]) {
+			f->res.classid = *(u32*)RTA_DATA(tb[TCA_RSVP_CLASSID-1]);
+			tcf_bind_filter(tp, &f->res, base);
+		}
+
+		tcf_exts_change(tp, &f->exts, &e);
+		return 0;
+	}
+
+	/* Now more serious part... */
+	err = -EINVAL;
+	if (handle)
+		goto errout2;
+	if (tb[TCA_RSVP_DST-1] == NULL)
+		goto errout2;
+
+	err = -ENOBUFS;
+	f = kmalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
+	if (f == NULL)
+		goto errout2;
+
+	memset(f, 0, sizeof(*f));
+	h2 = 16;
+	if (tb[TCA_RSVP_SRC-1]) {
+		err = -EINVAL;
+		if (RTA_PAYLOAD(tb[TCA_RSVP_SRC-1]) != sizeof(f->src))
+			goto errout;
+		memcpy(f->src, RTA_DATA(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
+		h2 = hash_src(f->src);
+	}
+	if (tb[TCA_RSVP_PINFO-1]) {
+		err = -EINVAL;
+		if (RTA_PAYLOAD(tb[TCA_RSVP_PINFO-1]) < sizeof(struct tc_rsvp_pinfo))
+			goto errout;
+		pinfo = RTA_DATA(tb[TCA_RSVP_PINFO-1]);
+		f->spi = pinfo->spi;
+		f->tunnelhdr = pinfo->tunnelhdr;
+	}
+	if (tb[TCA_RSVP_CLASSID-1]) {
+		err = -EINVAL;
+		if (RTA_PAYLOAD(tb[TCA_RSVP_CLASSID-1]) != 4)
+			goto errout;
+		f->res.classid = *(u32*)RTA_DATA(tb[TCA_RSVP_CLASSID-1]);
+	}
+
+	err = -EINVAL;
+	if (RTA_PAYLOAD(tb[TCA_RSVP_DST-1]) != sizeof(f->src))
+		goto errout;
+	dst = RTA_DATA(tb[TCA_RSVP_DST-1]);
+	h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
+
+	err = -ENOMEM;
+	if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
+		goto errout;
+
+	if (f->tunnelhdr) {
+		err = -EINVAL;
+		if (f->res.classid > 255)
+			goto errout;
+
+		err = -ENOMEM;
+		if (f->res.classid == 0 &&
+		    (f->res.classid = gen_tunnel(data)) == 0)
+			goto errout;
+	}
+
+	for (sp = &data->ht[h1]; (s=*sp) != NULL; sp = &s->next) {
+		if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
+		    pinfo && pinfo->protocol == s->protocol &&
+		    memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0
+#if RSVP_DST_LEN == 4
+		    && dst[0] == s->dst[0]
+		    && dst[1] == s->dst[1]
+		    && dst[2] == s->dst[2]
+#endif
+		    && pinfo->tunnelid == s->tunnelid) {
+
+insert:
+			/* OK, we found appropriate session */
+
+			fp = &s->ht[h2];
+
+			f->sess = s;
+			if (f->tunnelhdr == 0)
+				tcf_bind_filter(tp, &f->res, base);
+
+			tcf_exts_change(tp, &f->exts, &e);
+
+			for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
+				if (((*fp)->spi.mask&f->spi.mask) != f->spi.mask)
+					break;
+			f->next = *fp;
+			wmb();
+			*fp = f;
+
+			*arg = (unsigned long)f;
+			return 0;
+		}
+	}
+
+	/* No session found. Create new one. */
+
+	err = -ENOBUFS;
+	s = kmalloc(sizeof(struct rsvp_session), GFP_KERNEL);
+	if (s == NULL)
+		goto errout;
+	memset(s, 0, sizeof(*s));
+	memcpy(s->dst, dst, sizeof(s->dst));
+
+	if (pinfo) {
+		s->dpi = pinfo->dpi;
+		s->protocol = pinfo->protocol;
+		s->tunnelid = pinfo->tunnelid;
+	}
+	for (sp = &data->ht[h1]; *sp; sp = &(*sp)->next) {
+		if (((*sp)->dpi.mask&s->dpi.mask) != s->dpi.mask)
+			break;
+	}
+	s->next = *sp;
+	wmb();
+	*sp = s;
+	
+	goto insert;
+
+errout:
+	if (f)
+		kfree(f);
+errout2:
+	tcf_exts_destroy(tp, &e);
+	return err;
+}
+
+static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+	struct rsvp_head *head = tp->root;
+	unsigned h, h1;
+
+	if (arg->stop)
+		return;
+
+	for (h = 0; h < 256; h++) {
+		struct rsvp_session *s;
+
+		for (s = head->ht[h]; s; s = s->next) {
+			for (h1 = 0; h1 <= 16; h1++) {
+				struct rsvp_filter *f;
+
+				for (f = s->ht[h1]; f; f = f->next) {
+					if (arg->count < arg->skip) {
+						arg->count++;
+						continue;
+					}
+					if (arg->fn(tp, (unsigned long)f, arg) < 0) {
+						arg->stop = 1;
+						return;
+					}
+					arg->count++;
+				}
+			}
+		}
+	}
+}
+
+static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
+		     struct sk_buff *skb, struct tcmsg *t)
+{
+	struct rsvp_filter *f = (struct rsvp_filter*)fh;
+	struct rsvp_session *s;
+	unsigned char	 *b = skb->tail;
+	struct rtattr *rta;
+	struct tc_rsvp_pinfo pinfo;
+
+	if (f == NULL)
+		return skb->len;
+	s = f->sess;
+
+	t->tcm_handle = f->handle;
+
+
+	rta = (struct rtattr*)b;
+	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+
+	RTA_PUT(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst);
+	pinfo.dpi = s->dpi;
+	pinfo.spi = f->spi;
+	pinfo.protocol = s->protocol;
+	pinfo.tunnelid = s->tunnelid;
+	pinfo.tunnelhdr = f->tunnelhdr;
+	RTA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
+	if (f->res.classid)
+		RTA_PUT(skb, TCA_RSVP_CLASSID, 4, &f->res.classid);
+	if (((f->handle>>8)&0xFF) != 16)
+		RTA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
+
+	if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
+		goto rtattr_failure;
+
+	rta->rta_len = skb->tail - b;
+
+	if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0)
+		goto rtattr_failure;
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static struct tcf_proto_ops RSVP_OPS = {
+	.next		=	NULL,
+	.kind		=	RSVP_ID,
+	.classify	=	rsvp_classify,
+	.init		=	rsvp_init,
+	.destroy	=	rsvp_destroy,
+	.get		=	rsvp_get,
+	.put		=	rsvp_put,
+	.change		=	rsvp_change,
+	.delete		=	rsvp_delete,
+	.walk		=	rsvp_walk,
+	.dump		=	rsvp_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init init_rsvp(void)
+{
+	return register_tcf_proto_ops(&RSVP_OPS);
+}
+
+static void __exit exit_rsvp(void) 
+{
+	unregister_tcf_proto_ops(&RSVP_OPS);
+}
+
+module_init(init_rsvp)
+module_exit(exit_rsvp)
diff --git a/net/sched/cls_rsvp6.c b/net/sched/cls_rsvp6.c
new file mode 100644
index 00000000000..fde51f7848e
--- /dev/null
+++ b/net/sched/cls_rsvp6.c
@@ -0,0 +1,44 @@
+/*
+ * net/sched/cls_rsvp6.c	Special RSVP packet classifier for IPv6.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <net/ip.h>
+#include <linux/ipv6.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/act_api.h>
+#include <net/pkt_cls.h>
+
+#define RSVP_DST_LEN	4
+#define RSVP_ID		"rsvp6"
+#define RSVP_OPS	cls_rsvp6_ops
+
+#include "cls_rsvp.h"
+MODULE_LICENSE("GPL");
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
new file mode 100644
index 00000000000..404d9d83a7f
--- /dev/null
+++ b/net/sched/cls_tcindex.c
@@ -0,0 +1,537 @@
+/*
+ * net/sched/cls_tcindex.c	Packet classifier for skb->tc_index
+ *
+ * Written 1998,1999 by Werner Almesberger, EPFL ICA
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <net/ip.h>
+#include <net/act_api.h>
+#include <net/pkt_cls.h>
+#include <net/route.h>
+
+
+/*
+ * Not quite sure if we need all the xchgs Alexey uses when accessing things.
+ * Can always add them later ... :)
+ */
+
+/*
+ * Passing parameters to the root seems to be done more awkwardly than really
+ * necessary. At least, u32 doesn't seem to use such dirty hacks. To be
+ * verified. FIXME.
+ */
+
+#define PERFECT_HASH_THRESHOLD	64	/* use perfect hash if not bigger */
+#define DEFAULT_HASH_SIZE	64	/* optimized for diffserv */
+
+
+#if 1 /* control */
+#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
+#else
+#define DPRINTK(format,args...)
+#endif
+
+#if 0 /* data */
+#define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
+#else
+#define D2PRINTK(format,args...)
+#endif
+
+
+#define	PRIV(tp)	((struct tcindex_data *) (tp)->root)
+
+
+struct tcindex_filter_result {
+	struct tcf_exts		exts;
+	struct tcf_result	res;
+};
+
+struct tcindex_filter {
+	u16 key;
+	struct tcindex_filter_result result;
+	struct tcindex_filter *next;
+};
+
+
+struct tcindex_data {
+	struct tcindex_filter_result *perfect; /* perfect hash; NULL if none */
+	struct tcindex_filter **h; /* imperfect hash; only used if !perfect;
+				      NULL if unused */
+	u16 mask;		/* AND key with mask */
+	int shift;		/* shift ANDed key to the right */
+	int hash;		/* hash table size; 0 if undefined */
+	int alloc_hash;		/* allocated size */
+	int fall_through;	/* 0: only classify if explicit match */
+};
+
+static struct tcf_ext_map tcindex_ext_map = {
+	.police = TCA_TCINDEX_POLICE,
+	.action = TCA_TCINDEX_ACT
+};
+
+static inline int
+tcindex_filter_is_set(struct tcindex_filter_result *r)
+{
+	return tcf_exts_is_predicative(&r->exts) || r->res.classid;
+}
+
+static struct tcindex_filter_result *
+tcindex_lookup(struct tcindex_data *p, u16 key)
+{
+	struct tcindex_filter *f;
+
+	if (p->perfect)
+		return tcindex_filter_is_set(p->perfect + key) ?
+			p->perfect + key : NULL;
+	else if (p->h) {
+		for (f = p->h[key % p->hash]; f; f = f->next)
+			if (f->key == key)
+				return &f->result;
+	}
+
+	return NULL;
+}
+
+
+static int tcindex_classify(struct sk_buff *skb, struct tcf_proto *tp,
+			    struct tcf_result *res)
+{
+	struct tcindex_data *p = PRIV(tp);
+	struct tcindex_filter_result *f;
+	int key = (skb->tc_index & p->mask) >> p->shift;
+
+	D2PRINTK("tcindex_classify(skb %p,tp %p,res %p),p %p\n",skb,tp,res,p);
+
+	f = tcindex_lookup(p, key);
+	if (!f) {
+		if (!p->fall_through)
+			return -1;
+		res->classid = TC_H_MAKE(TC_H_MAJ(tp->q->handle), key);
+		res->class = 0;
+		D2PRINTK("alg 0x%x\n",res->classid);
+		return 0;
+	}
+	*res = f->res;
+	D2PRINTK("map 0x%x\n",res->classid);
+
+	return tcf_exts_exec(skb, &f->exts, res);
+}
+
+
+static unsigned long tcindex_get(struct tcf_proto *tp, u32 handle)
+{
+	struct tcindex_data *p = PRIV(tp);
+	struct tcindex_filter_result *r;
+
+	DPRINTK("tcindex_get(tp %p,handle 0x%08x)\n",tp,handle);
+	if (p->perfect && handle >= p->alloc_hash)
+		return 0;
+	r = tcindex_lookup(p, handle);
+	return r && tcindex_filter_is_set(r) ? (unsigned long) r : 0UL;
+}
+
+
+static void tcindex_put(struct tcf_proto *tp, unsigned long f)
+{
+	DPRINTK("tcindex_put(tp %p,f 0x%lx)\n",tp,f);
+}
+
+
+static int tcindex_init(struct tcf_proto *tp)
+{
+	struct tcindex_data *p;
+
+	DPRINTK("tcindex_init(tp %p)\n",tp);
+	p = kmalloc(sizeof(struct tcindex_data),GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	memset(p, 0, sizeof(*p));
+	p->mask = 0xffff;
+	p->hash = DEFAULT_HASH_SIZE;
+	p->fall_through = 1;
+
+	tp->root = p;
+	return 0;
+}
+
+
+static int
+__tcindex_delete(struct tcf_proto *tp, unsigned long arg, int lock)
+{
+	struct tcindex_data *p = PRIV(tp);
+	struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg;
+	struct tcindex_filter *f = NULL;
+
+	DPRINTK("tcindex_delete(tp %p,arg 0x%lx),p %p,f %p\n",tp,arg,p,f);
+	if (p->perfect) {
+		if (!r->res.class)
+			return -ENOENT;
+	} else {
+		int i;
+		struct tcindex_filter **walk = NULL;
+
+		for (i = 0; i < p->hash; i++)
+			for (walk = p->h+i; *walk; walk = &(*walk)->next)
+				if (&(*walk)->result == r)
+					goto found;
+		return -ENOENT;
+
+found:
+		f = *walk;
+		if (lock)
+			tcf_tree_lock(tp);
+		*walk = f->next;
+		if (lock)
+			tcf_tree_unlock(tp);
+	}
+	tcf_unbind_filter(tp, &r->res);
+	tcf_exts_destroy(tp, &r->exts);
+	if (f)
+		kfree(f);
+	return 0;
+}
+
+static int tcindex_delete(struct tcf_proto *tp, unsigned long arg)
+{
+	return __tcindex_delete(tp, arg, 1);
+}
+
+static inline int
+valid_perfect_hash(struct tcindex_data *p)
+{
+	return  p->hash > (p->mask >> p->shift);
+}
+
+static int
+tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
+		  struct tcindex_data *p, struct tcindex_filter_result *r,
+		  struct rtattr **tb, struct rtattr *est)
+{
+	int err, balloc = 0;
+	struct tcindex_filter_result new_filter_result, *old_r = r;
+	struct tcindex_filter_result cr;
+	struct tcindex_data cp;
+	struct tcindex_filter *f = NULL; /* make gcc behave */
+	struct tcf_exts e;
+
+	err = tcf_exts_validate(tp, tb, est, &e, &tcindex_ext_map);
+	if (err < 0)
+		return err;
+	
+	memcpy(&cp, p, sizeof(cp));
+	memset(&new_filter_result, 0, sizeof(new_filter_result));
+
+	if (old_r)
+		memcpy(&cr, r, sizeof(cr));
+	else
+		memset(&cr, 0, sizeof(cr));
+
+	err = -EINVAL;
+	if (tb[TCA_TCINDEX_HASH-1]) {
+		if (RTA_PAYLOAD(tb[TCA_TCINDEX_HASH-1]) < sizeof(u32))
+			goto errout;
+		cp.hash = *(u32 *) RTA_DATA(tb[TCA_TCINDEX_HASH-1]);
+	}
+
+	if (tb[TCA_TCINDEX_MASK-1]) {
+		if (RTA_PAYLOAD(tb[TCA_TCINDEX_MASK-1]) < sizeof(u16))
+			goto errout;
+		cp.mask = *(u16 *) RTA_DATA(tb[TCA_TCINDEX_MASK-1]);
+	}
+
+	if (tb[TCA_TCINDEX_SHIFT-1]) {
+		if (RTA_PAYLOAD(tb[TCA_TCINDEX_SHIFT-1]) < sizeof(u16))
+			goto errout;
+		cp.shift = *(u16 *) RTA_DATA(tb[TCA_TCINDEX_SHIFT-1]);
+	}
+
+	err = -EBUSY;
+	/* Hash already allocated, make sure that we still meet the
+	 * requirements for the allocated hash.
+	 */
+	if (cp.perfect) {
+		if (!valid_perfect_hash(&cp) ||
+		    cp.hash > cp.alloc_hash)
+			goto errout;
+	} else if (cp.h && cp.hash != cp.alloc_hash)
+		goto errout;
+
+	err = -EINVAL;
+	if (tb[TCA_TCINDEX_FALL_THROUGH-1]) {
+		if (RTA_PAYLOAD(tb[TCA_TCINDEX_FALL_THROUGH-1]) < sizeof(u32))
+			goto errout;
+		cp.fall_through =
+			*(u32 *) RTA_DATA(tb[TCA_TCINDEX_FALL_THROUGH-1]);
+	}
+
+	if (!cp.hash) {
+		/* Hash not specified, use perfect hash if the upper limit
+		 * of the hashing index is below the threshold.
+		 */
+		if ((cp.mask >> cp.shift) < PERFECT_HASH_THRESHOLD)
+			cp.hash = (cp.mask >> cp.shift)+1;
+		else
+			cp.hash = DEFAULT_HASH_SIZE;
+	}
+
+	if (!cp.perfect && !cp.h)
+		cp.alloc_hash = cp.hash;
+
+	/* Note: this could be as restrictive as if (handle & ~(mask >> shift))
+	 * but then, we'd fail handles that may become valid after some future
+	 * mask change. While this is extremely unlikely to ever matter,
+	 * the check below is safer (and also more backwards-compatible).
+	 */
+	if (cp.perfect || valid_perfect_hash(&cp))
+		if (handle >= cp.alloc_hash)
+			goto errout;
+
+
+	err = -ENOMEM;
+	if (!cp.perfect && !cp.h) {
+		if (valid_perfect_hash(&cp)) {
+			cp.perfect = kmalloc(cp.hash * sizeof(*r), GFP_KERNEL);
+			if (!cp.perfect)
+				goto errout;
+			memset(cp.perfect, 0, cp.hash * sizeof(*r));
+			balloc = 1;
+		} else {
+			cp.h = kmalloc(cp.hash * sizeof(f), GFP_KERNEL);
+			if (!cp.h)
+				goto errout;
+			memset(cp.h, 0, cp.hash * sizeof(f));
+			balloc = 2;
+		}
+	}
+
+	if (cp.perfect)
+		r = cp.perfect + handle;
+	else
+		r = tcindex_lookup(&cp, handle) ? : &new_filter_result;
+
+	if (r == &new_filter_result) {
+		f = kmalloc(sizeof(*f), GFP_KERNEL);
+		if (!f)
+			goto errout_alloc;
+		memset(f, 0, sizeof(*f));
+ 	}
+
+	if (tb[TCA_TCINDEX_CLASSID-1]) {
+		cr.res.classid = *(u32 *) RTA_DATA(tb[TCA_TCINDEX_CLASSID-1]);
+		tcf_bind_filter(tp, &cr.res, base);
+ 	}
+
+	tcf_exts_change(tp, &cr.exts, &e);
+
+	tcf_tree_lock(tp);
+	if (old_r && old_r != r)
+		memset(old_r, 0, sizeof(*old_r));
+
+	memcpy(p, &cp, sizeof(cp));
+	memcpy(r, &cr, sizeof(cr));
+
+	if (r == &new_filter_result) {
+		struct tcindex_filter **fp;
+
+		f->key = handle;
+		f->result = new_filter_result;
+		f->next = NULL;
+		for (fp = p->h+(handle % p->hash); *fp; fp = &(*fp)->next)
+			/* nothing */;
+		*fp = f;
+ 	}
+	tcf_tree_unlock(tp);
+
+	return 0;
+
+errout_alloc:
+	if (balloc == 1)
+		kfree(cp.perfect);
+	else if (balloc == 2)
+		kfree(cp.h);
+errout:
+	tcf_exts_destroy(tp, &e);
+	return err;
+}
+
+static int
+tcindex_change(struct tcf_proto *tp, unsigned long base, u32 handle,
+	       struct rtattr **tca, unsigned long *arg)
+{
+	struct rtattr *opt = tca[TCA_OPTIONS-1];
+	struct rtattr *tb[TCA_TCINDEX_MAX];
+	struct tcindex_data *p = PRIV(tp);
+	struct tcindex_filter_result *r = (struct tcindex_filter_result *) *arg;
+
+	DPRINTK("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p,"
+	    "p %p,r %p,*arg 0x%lx\n",
+	    tp, handle, tca, arg, opt, p, r, arg ? *arg : 0L);
+
+	if (!opt)
+		return 0;
+
+	if (rtattr_parse_nested(tb, TCA_TCINDEX_MAX, opt) < 0)
+		return -EINVAL;
+
+	return tcindex_set_parms(tp, base, handle, p, r, tb, tca[TCA_RATE-1]);
+}
+
+
+static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
+{
+	struct tcindex_data *p = PRIV(tp);
+	struct tcindex_filter *f,*next;
+	int i;
+
+	DPRINTK("tcindex_walk(tp %p,walker %p),p %p\n",tp,walker,p);
+	if (p->perfect) {
+		for (i = 0; i < p->hash; i++) {
+			if (!p->perfect[i].res.class)
+				continue;
+			if (walker->count >= walker->skip) {
+				if (walker->fn(tp,
+				    (unsigned long) (p->perfect+i), walker)
+				     < 0) {
+					walker->stop = 1;
+					return;
+				}
+			}
+			walker->count++;
+		}
+	}
+	if (!p->h)
+		return;
+	for (i = 0; i < p->hash; i++) {
+		for (f = p->h[i]; f; f = next) {
+			next = f->next;
+			if (walker->count >= walker->skip) {
+				if (walker->fn(tp,(unsigned long) &f->result,
+				    walker) < 0) {
+					walker->stop = 1;
+					return;
+				}
+			}
+			walker->count++;
+		}
+	}
+}
+
+
+static int tcindex_destroy_element(struct tcf_proto *tp,
+    unsigned long arg, struct tcf_walker *walker)
+{
+	return __tcindex_delete(tp, arg, 0);
+}
+
+
+static void tcindex_destroy(struct tcf_proto *tp)
+{
+	struct tcindex_data *p = PRIV(tp);
+	struct tcf_walker walker;
+
+	DPRINTK("tcindex_destroy(tp %p),p %p\n",tp,p);
+	walker.count = 0;
+	walker.skip = 0;
+	walker.fn = &tcindex_destroy_element;
+	tcindex_walk(tp,&walker);
+	if (p->perfect)
+		kfree(p->perfect);
+	if (p->h)
+		kfree(p->h);
+	kfree(p);
+	tp->root = NULL;
+}
+
+
+static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
+    struct sk_buff *skb, struct tcmsg *t)
+{
+	struct tcindex_data *p = PRIV(tp);
+	struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh;
+	unsigned char *b = skb->tail;
+	struct rtattr *rta;
+
+	DPRINTK("tcindex_dump(tp %p,fh 0x%lx,skb %p,t %p),p %p,r %p,b %p\n",
+	    tp,fh,skb,t,p,r,b);
+	DPRINTK("p->perfect %p p->h %p\n",p->perfect,p->h);
+	rta = (struct rtattr *) b;
+	RTA_PUT(skb,TCA_OPTIONS,0,NULL);
+	if (!fh) {
+		t->tcm_handle = ~0; /* whatever ... */
+		RTA_PUT(skb,TCA_TCINDEX_HASH,sizeof(p->hash),&p->hash);
+		RTA_PUT(skb,TCA_TCINDEX_MASK,sizeof(p->mask),&p->mask);
+		RTA_PUT(skb,TCA_TCINDEX_SHIFT,sizeof(p->shift),&p->shift);
+		RTA_PUT(skb,TCA_TCINDEX_FALL_THROUGH,sizeof(p->fall_through),
+		    &p->fall_through);
+		rta->rta_len = skb->tail-b;
+	} else {
+		if (p->perfect) {
+			t->tcm_handle = r-p->perfect;
+		} else {
+			struct tcindex_filter *f;
+			int i;
+
+			t->tcm_handle = 0;
+			for (i = 0; !t->tcm_handle && i < p->hash; i++) {
+				for (f = p->h[i]; !t->tcm_handle && f;
+				     f = f->next) {
+					if (&f->result == r)
+						t->tcm_handle = f->key;
+				}
+			}
+		}
+		DPRINTK("handle = %d\n",t->tcm_handle);
+		if (r->res.class)
+			RTA_PUT(skb, TCA_TCINDEX_CLASSID, 4, &r->res.classid);
+
+		if (tcf_exts_dump(skb, &r->exts, &tcindex_ext_map) < 0)
+			goto rtattr_failure;
+		rta->rta_len = skb->tail-b;
+
+		if (tcf_exts_dump_stats(skb, &r->exts, &tcindex_ext_map) < 0)
+			goto rtattr_failure;
+	}
+	
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static struct tcf_proto_ops cls_tcindex_ops = {
+	.next		=	NULL,
+	.kind		=	"tcindex",
+	.classify	=	tcindex_classify,
+	.init		=	tcindex_init,
+	.destroy	=	tcindex_destroy,
+	.get		=	tcindex_get,
+	.put		=	tcindex_put,
+	.change		=	tcindex_change,
+	.delete		=	tcindex_delete,
+	.walk		=	tcindex_walk,
+	.dump		=	tcindex_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init init_tcindex(void)
+{
+	return register_tcf_proto_ops(&cls_tcindex_ops);
+}
+
+static void __exit exit_tcindex(void) 
+{
+	unregister_tcf_proto_ops(&cls_tcindex_ops);
+}
+
+module_init(init_tcindex)
+module_exit(exit_tcindex)
+MODULE_LICENSE("GPL");
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
new file mode 100644
index 00000000000..364b87d8645
--- /dev/null
+++ b/net/sched/cls_u32.c
@@ -0,0 +1,828 @@
+/*
+ * net/sched/cls_u32.c	Ugly (or Universal) 32bit key Packet Classifier.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ *	The filters are packed to hash tables of key nodes
+ *	with a set of 32bit key/mask pairs at every node.
+ *	Nodes reference next level hash tables etc.
+ *
+ *	This scheme is the best universal classifier I managed to
+ *	invent; it is not super-fast, but it is not slow (provided you
+ *	program it correctly), and general enough.  And its relative
+ *	speed grows as the number of rules becomes larger.
+ *
+ *	It seems that it represents the best middle point between
+ *	speed and manageability both by human and by machine.
+ *
+ *	It is especially useful for link sharing combined with QoS;
+ *	pure RSVP doesn't need such a general approach and can use
+ *	much simpler (and faster) schemes, sort of cls_rsvp.c.
+ *
+ *	JHS: We should remove the CONFIG_NET_CLS_IND from here
+ *	eventually when the meta match extension is made available
+ *
+ *	nfmark match added by Catalin(ux aka Dino) BOIE <catab at umbrella.ro>
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <linux/rtnetlink.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/act_api.h>
+#include <net/pkt_cls.h>
+
+struct tc_u_knode
+{
+	struct tc_u_knode	*next;
+	u32			handle;
+	struct tc_u_hnode	*ht_up;
+	struct tcf_exts		exts;
+#ifdef CONFIG_NET_CLS_IND
+	char                     indev[IFNAMSIZ];
+#endif
+	u8			fshift;
+	struct tcf_result	res;
+	struct tc_u_hnode	*ht_down;
+#ifdef CONFIG_CLS_U32_PERF
+	struct tc_u32_pcnt	*pf;
+#endif
+#ifdef CONFIG_CLS_U32_MARK
+	struct tc_u32_mark	mark;
+#endif
+	struct tc_u32_sel	sel;
+};
+
+struct tc_u_hnode
+{
+	struct tc_u_hnode	*next;
+	u32			handle;
+	u32			prio;
+	struct tc_u_common	*tp_c;
+	int			refcnt;
+	unsigned		divisor;
+	struct tc_u_knode	*ht[1];
+};
+
+struct tc_u_common
+{
+	struct tc_u_common	*next;
+	struct tc_u_hnode	*hlist;
+	struct Qdisc		*q;
+	int			refcnt;
+	u32			hgenerator;
+};
+
+static struct tcf_ext_map u32_ext_map = {
+	.action = TCA_U32_ACT,
+	.police = TCA_U32_POLICE
+};
+
+static struct tc_u_common *u32_list;
+
+static __inline__ unsigned u32_hash_fold(u32 key, struct tc_u32_sel *sel, u8 fshift)
+{
+	unsigned h = (key & sel->hmask)>>fshift;
+
+	return h;
+}
+
+static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_result *res)
+{
+	struct {
+		struct tc_u_knode *knode;
+		u8		  *ptr;
+	} stack[TC_U32_MAXDEPTH];
+
+	struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root;
+	u8 *ptr = skb->nh.raw;
+	struct tc_u_knode *n;
+	int sdepth = 0;
+	int off2 = 0;
+	int sel = 0;
+#ifdef CONFIG_CLS_U32_PERF
+	int j;
+#endif
+	int i, r;
+
+next_ht:
+	n = ht->ht[sel];
+
+next_knode:
+	if (n) {
+		struct tc_u32_key *key = n->sel.keys;
+
+#ifdef CONFIG_CLS_U32_PERF
+		n->pf->rcnt +=1;
+		j = 0;
+#endif
+
+#ifdef CONFIG_CLS_U32_MARK
+		if ((skb->nfmark & n->mark.mask) != n->mark.val) {
+			n = n->next;
+			goto next_knode;
+		} else {
+			n->mark.success++;
+		}
+#endif
+
+		for (i = n->sel.nkeys; i>0; i--, key++) {
+
+			if ((*(u32*)(ptr+key->off+(off2&key->offmask))^key->val)&key->mask) {
+				n = n->next;
+				goto next_knode;
+			}
+#ifdef CONFIG_CLS_U32_PERF
+			n->pf->kcnts[j] +=1;
+			j++;
+#endif
+		}
+		if (n->ht_down == NULL) {
+check_terminal:
+			if (n->sel.flags&TC_U32_TERMINAL) {
+
+				*res = n->res;
+#ifdef CONFIG_NET_CLS_IND
+				if (!tcf_match_indev(skb, n->indev)) {
+					n = n->next;
+					goto next_knode;
+				}
+#endif
+#ifdef CONFIG_CLS_U32_PERF
+				n->pf->rhit +=1;
+#endif
+				r = tcf_exts_exec(skb, &n->exts, res);
+				if (r < 0) {
+					n = n->next;
+					goto next_knode;
+				}
+
+				return r;
+			}
+			n = n->next;
+			goto next_knode;
+		}
+
+		/* PUSH */
+		if (sdepth >= TC_U32_MAXDEPTH)
+			goto deadloop;
+		stack[sdepth].knode = n;
+		stack[sdepth].ptr = ptr;
+		sdepth++;
+
+		ht = n->ht_down;
+		sel = 0;
+		if (ht->divisor)
+			sel = ht->divisor&u32_hash_fold(*(u32*)(ptr+n->sel.hoff), &n->sel,n->fshift);
+
+		if (!(n->sel.flags&(TC_U32_VAROFFSET|TC_U32_OFFSET|TC_U32_EAT)))
+			goto next_ht;
+
+		if (n->sel.flags&(TC_U32_OFFSET|TC_U32_VAROFFSET)) {
+			off2 = n->sel.off + 3;
+			if (n->sel.flags&TC_U32_VAROFFSET)
+				off2 += ntohs(n->sel.offmask & *(u16*)(ptr+n->sel.offoff)) >>n->sel.offshift;
+			off2 &= ~3;
+		}
+		if (n->sel.flags&TC_U32_EAT) {
+			ptr += off2;
+			off2 = 0;
+		}
+
+		if (ptr < skb->tail)
+			goto next_ht;
+	}
+
+	/* POP */
+	if (sdepth--) {
+		n = stack[sdepth].knode;
+		ht = n->ht_up;
+		ptr = stack[sdepth].ptr;
+		goto check_terminal;
+	}
+	return -1;
+
+deadloop:
+	if (net_ratelimit())
+		printk("cls_u32: dead loop\n");
+	return -1;
+}
+
+static __inline__ struct tc_u_hnode *
+u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
+{
+	struct tc_u_hnode *ht;
+
+	for (ht = tp_c->hlist; ht; ht = ht->next)
+		if (ht->handle == handle)
+			break;
+
+	return ht;
+}
+
+static __inline__ struct tc_u_knode *
+u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
+{
+	unsigned sel;
+	struct tc_u_knode *n = NULL;
+
+	sel = TC_U32_HASH(handle);
+	if (sel > ht->divisor)
+		goto out;
+
+	for (n = ht->ht[sel]; n; n = n->next)
+		if (n->handle == handle)
+			break;
+out:
+	return n;
+}
+
+
+static unsigned long u32_get(struct tcf_proto *tp, u32 handle)
+{
+	struct tc_u_hnode *ht;
+	struct tc_u_common *tp_c = tp->data;
+
+	if (TC_U32_HTID(handle) == TC_U32_ROOT)
+		ht = tp->root;
+	else
+		ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle));
+
+	if (!ht)
+		return 0;
+
+	if (TC_U32_KEY(handle) == 0)
+		return (unsigned long)ht;
+
+	return (unsigned long)u32_lookup_key(ht, handle);
+}
+
+static void u32_put(struct tcf_proto *tp, unsigned long f)
+{
+}
+
+static u32 gen_new_htid(struct tc_u_common *tp_c)
+{
+	int i = 0x800;
+
+	do {
+		if (++tp_c->hgenerator == 0x7FF)
+			tp_c->hgenerator = 1;
+	} while (--i>0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20));
+
+	return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0;
+}
+
+static int u32_init(struct tcf_proto *tp)
+{
+	struct tc_u_hnode *root_ht;
+	struct tc_u_common *tp_c;
+
+	for (tp_c = u32_list; tp_c; tp_c = tp_c->next)
+		if (tp_c->q == tp->q)
+			break;
+
+	root_ht = kmalloc(sizeof(*root_ht), GFP_KERNEL);
+	if (root_ht == NULL)
+		return -ENOBUFS;
+
+	memset(root_ht, 0, sizeof(*root_ht));
+	root_ht->divisor = 0;
+	root_ht->refcnt++;
+	root_ht->handle = tp_c ? gen_new_htid(tp_c) : 0x80000000;
+	root_ht->prio = tp->prio;
+
+	if (tp_c == NULL) {
+		tp_c = kmalloc(sizeof(*tp_c), GFP_KERNEL);
+		if (tp_c == NULL) {
+			kfree(root_ht);
+			return -ENOBUFS;
+		}
+		memset(tp_c, 0, sizeof(*tp_c));
+		tp_c->q = tp->q;
+		tp_c->next = u32_list;
+		u32_list = tp_c;
+	}
+
+	tp_c->refcnt++;
+	root_ht->next = tp_c->hlist;
+	tp_c->hlist = root_ht;
+	root_ht->tp_c = tp_c;
+
+	tp->root = root_ht;
+	tp->data = tp_c;
+	return 0;
+}
+
+static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n)
+{
+	tcf_unbind_filter(tp, &n->res);
+	tcf_exts_destroy(tp, &n->exts);
+	if (n->ht_down)
+		n->ht_down->refcnt--;
+#ifdef CONFIG_CLS_U32_PERF
+	if (n && (NULL != n->pf))
+		kfree(n->pf);
+#endif
+	kfree(n);
+	return 0;
+}
+
+static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode* key)
+{
+	struct tc_u_knode **kp;
+	struct tc_u_hnode *ht = key->ht_up;
+
+	if (ht) {
+		for (kp = &ht->ht[TC_U32_HASH(key->handle)]; *kp; kp = &(*kp)->next) {
+			if (*kp == key) {
+				tcf_tree_lock(tp);
+				*kp = key->next;
+				tcf_tree_unlock(tp);
+
+				u32_destroy_key(tp, key);
+				return 0;
+			}
+		}
+	}
+	BUG_TRAP(0);
+	return 0;
+}
+
+static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
+{
+	struct tc_u_knode *n;
+	unsigned h;
+
+	for (h=0; h<=ht->divisor; h++) {
+		while ((n = ht->ht[h]) != NULL) {
+			ht->ht[h] = n->next;
+
+			u32_destroy_key(tp, n);
+		}
+	}
+}
+
+static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
+{
+	struct tc_u_common *tp_c = tp->data;
+	struct tc_u_hnode **hn;
+
+	BUG_TRAP(!ht->refcnt);
+
+	u32_clear_hnode(tp, ht);
+
+	for (hn = &tp_c->hlist; *hn; hn = &(*hn)->next) {
+		if (*hn == ht) {
+			*hn = ht->next;
+			kfree(ht);
+			return 0;
+		}
+	}
+
+	BUG_TRAP(0);
+	return -ENOENT;
+}
+
+static void u32_destroy(struct tcf_proto *tp)
+{
+	struct tc_u_common *tp_c = tp->data;
+	struct tc_u_hnode *root_ht = xchg(&tp->root, NULL);
+
+	BUG_TRAP(root_ht != NULL);
+
+	if (root_ht && --root_ht->refcnt == 0)
+		u32_destroy_hnode(tp, root_ht);
+
+	if (--tp_c->refcnt == 0) {
+		struct tc_u_hnode *ht;
+		struct tc_u_common **tp_cp;
+
+		for (tp_cp = &u32_list; *tp_cp; tp_cp = &(*tp_cp)->next) {
+			if (*tp_cp == tp_c) {
+				*tp_cp = tp_c->next;
+				break;
+			}
+		}
+
+		for (ht=tp_c->hlist; ht; ht = ht->next)
+			u32_clear_hnode(tp, ht);
+
+		while ((ht = tp_c->hlist) != NULL) {
+			tp_c->hlist = ht->next;
+
+			BUG_TRAP(ht->refcnt == 0);
+
+			kfree(ht);
+		};
+
+		kfree(tp_c);
+	}
+
+	tp->data = NULL;
+}
+
+static int u32_delete(struct tcf_proto *tp, unsigned long arg)
+{
+	struct tc_u_hnode *ht = (struct tc_u_hnode*)arg;
+
+	if (ht == NULL)
+		return 0;
+
+	if (TC_U32_KEY(ht->handle))
+		return u32_delete_key(tp, (struct tc_u_knode*)ht);
+
+	if (tp->root == ht)
+		return -EINVAL;
+
+	if (--ht->refcnt == 0)
+		u32_destroy_hnode(tp, ht);
+
+	return 0;
+}
+
+static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
+{
+	struct tc_u_knode *n;
+	unsigned i = 0x7FF;
+
+	for (n=ht->ht[TC_U32_HASH(handle)]; n; n = n->next)
+		if (i < TC_U32_NODE(n->handle))
+			i = TC_U32_NODE(n->handle);
+	i++;
+
+	return handle|(i>0xFFF ? 0xFFF : i);
+}
+
+static int u32_set_parms(struct tcf_proto *tp, unsigned long base,
+			 struct tc_u_hnode *ht,
+			 struct tc_u_knode *n, struct rtattr **tb,
+			 struct rtattr *est)
+{
+	int err;
+	struct tcf_exts e;
+
+	err = tcf_exts_validate(tp, tb, est, &e, &u32_ext_map);
+	if (err < 0)
+		return err;
+
+	err = -EINVAL;
+	if (tb[TCA_U32_LINK-1]) {
+		u32 handle = *(u32*)RTA_DATA(tb[TCA_U32_LINK-1]);
+		struct tc_u_hnode *ht_down = NULL;
+
+		if (TC_U32_KEY(handle))
+			goto errout;
+
+		if (handle) {
+			ht_down = u32_lookup_ht(ht->tp_c, handle);
+
+			if (ht_down == NULL)
+				goto errout;
+			ht_down->refcnt++;
+		}
+
+		tcf_tree_lock(tp);
+		ht_down = xchg(&n->ht_down, ht_down);
+		tcf_tree_unlock(tp);
+
+		if (ht_down)
+			ht_down->refcnt--;
+	}
+	if (tb[TCA_U32_CLASSID-1]) {
+		n->res.classid = *(u32*)RTA_DATA(tb[TCA_U32_CLASSID-1]);
+		tcf_bind_filter(tp, &n->res, base);
+	}
+
+#ifdef CONFIG_NET_CLS_IND
+	if (tb[TCA_U32_INDEV-1]) {
+		int err = tcf_change_indev(tp, n->indev, tb[TCA_U32_INDEV-1]);
+		if (err < 0)
+			goto errout;
+	}
+#endif
+	tcf_exts_change(tp, &n->exts, &e);
+
+	return 0;
+errout:
+	tcf_exts_destroy(tp, &e);
+	return err;
+}
+
+static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
+		      struct rtattr **tca,
+		      unsigned long *arg)
+{
+	struct tc_u_common *tp_c = tp->data;
+	struct tc_u_hnode *ht;
+	struct tc_u_knode *n;
+	struct tc_u32_sel *s;
+	struct rtattr *opt = tca[TCA_OPTIONS-1];
+	struct rtattr *tb[TCA_U32_MAX];
+	u32 htid;
+	int err;
+
+	if (opt == NULL)
+		return handle ? -EINVAL : 0;
+
+	if (rtattr_parse_nested(tb, TCA_U32_MAX, opt) < 0)
+		return -EINVAL;
+
+	if ((n = (struct tc_u_knode*)*arg) != NULL) {
+		if (TC_U32_KEY(n->handle) == 0)
+			return -EINVAL;
+
+		return u32_set_parms(tp, base, n->ht_up, n, tb, tca[TCA_RATE-1]);
+	}
+
+	if (tb[TCA_U32_DIVISOR-1]) {
+		unsigned divisor = *(unsigned*)RTA_DATA(tb[TCA_U32_DIVISOR-1]);
+
+		if (--divisor > 0x100)
+			return -EINVAL;
+		if (TC_U32_KEY(handle))
+			return -EINVAL;
+		if (handle == 0) {
+			handle = gen_new_htid(tp->data);
+			if (handle == 0)
+				return -ENOMEM;
+		}
+		ht = kmalloc(sizeof(*ht) + divisor*sizeof(void*), GFP_KERNEL);
+		if (ht == NULL)
+			return -ENOBUFS;
+		memset(ht, 0, sizeof(*ht) + divisor*sizeof(void*));
+		ht->tp_c = tp_c;
+		ht->refcnt = 0;
+		ht->divisor = divisor;
+		ht->handle = handle;
+		ht->prio = tp->prio;
+		ht->next = tp_c->hlist;
+		tp_c->hlist = ht;
+		*arg = (unsigned long)ht;
+		return 0;
+	}
+
+	if (tb[TCA_U32_HASH-1]) {
+		htid = *(unsigned*)RTA_DATA(tb[TCA_U32_HASH-1]);
+		if (TC_U32_HTID(htid) == TC_U32_ROOT) {
+			ht = tp->root;
+			htid = ht->handle;
+		} else {
+			ht = u32_lookup_ht(tp->data, TC_U32_HTID(htid));
+			if (ht == NULL)
+				return -EINVAL;
+		}
+	} else {
+		ht = tp->root;
+		htid = ht->handle;
+	}
+
+	if (ht->divisor < TC_U32_HASH(htid))
+		return -EINVAL;
+
+	if (handle) {
+		if (TC_U32_HTID(handle) && TC_U32_HTID(handle^htid))
+			return -EINVAL;
+		handle = htid | TC_U32_NODE(handle);
+	} else
+		handle = gen_new_kid(ht, htid);
+
+	if (tb[TCA_U32_SEL-1] == 0 ||
+	    RTA_PAYLOAD(tb[TCA_U32_SEL-1]) < sizeof(struct tc_u32_sel))
+		return -EINVAL;
+
+	s = RTA_DATA(tb[TCA_U32_SEL-1]);
+
+	n = kmalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL);
+	if (n == NULL)
+		return -ENOBUFS;
+
+	memset(n, 0, sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key));
+#ifdef CONFIG_CLS_U32_PERF
+	n->pf = kmalloc(sizeof(struct tc_u32_pcnt) + s->nkeys*sizeof(u64), GFP_KERNEL);
+	if (n->pf == NULL) {
+		kfree(n);
+		return -ENOBUFS;
+	}
+	memset(n->pf, 0, sizeof(struct tc_u32_pcnt) + s->nkeys*sizeof(u64));
+#endif
+
+	memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
+	n->ht_up = ht;
+	n->handle = handle;
+{
+	u8 i = 0;
+	u32 mask = s->hmask;
+	if (mask) {
+		while (!(mask & 1)) {
+			i++;
+			mask>>=1;
+		}
+	}
+	n->fshift = i;
+}
+
+#ifdef CONFIG_CLS_U32_MARK
+	if (tb[TCA_U32_MARK-1]) {
+		struct tc_u32_mark *mark;
+
+		if (RTA_PAYLOAD(tb[TCA_U32_MARK-1]) < sizeof(struct tc_u32_mark)) {
+#ifdef CONFIG_CLS_U32_PERF
+			kfree(n->pf);
+#endif
+			kfree(n);
+			return -EINVAL;
+		}
+		mark = RTA_DATA(tb[TCA_U32_MARK-1]);
+		memcpy(&n->mark, mark, sizeof(struct tc_u32_mark));
+		n->mark.success = 0;
+	}
+#endif
+
+	err = u32_set_parms(tp, base, ht, n, tb, tca[TCA_RATE-1]);
+	if (err == 0) {
+		struct tc_u_knode **ins;
+		for (ins = &ht->ht[TC_U32_HASH(handle)]; *ins; ins = &(*ins)->next)
+			if (TC_U32_NODE(handle) < TC_U32_NODE((*ins)->handle))
+				break;
+
+		n->next = *ins;
+		wmb();
+		*ins = n;
+
+		*arg = (unsigned long)n;
+		return 0;
+	}
+#ifdef CONFIG_CLS_U32_PERF
+	if (n && (NULL != n->pf))
+		kfree(n->pf);
+#endif
+	kfree(n);
+	return err;
+}
+
+static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+	struct tc_u_common *tp_c = tp->data;
+	struct tc_u_hnode *ht;
+	struct tc_u_knode *n;
+	unsigned h;
+
+	if (arg->stop)
+		return;
+
+	for (ht = tp_c->hlist; ht; ht = ht->next) {
+		if (ht->prio != tp->prio)
+			continue;
+		if (arg->count >= arg->skip) {
+			if (arg->fn(tp, (unsigned long)ht, arg) < 0) {
+				arg->stop = 1;
+				return;
+			}
+		}
+		arg->count++;
+		for (h = 0; h <= ht->divisor; h++) {
+			for (n = ht->ht[h]; n; n = n->next) {
+				if (arg->count < arg->skip) {
+					arg->count++;
+					continue;
+				}
+				if (arg->fn(tp, (unsigned long)n, arg) < 0) {
+					arg->stop = 1;
+					return;
+				}
+				arg->count++;
+			}
+		}
+	}
+}
+
+static int u32_dump(struct tcf_proto *tp, unsigned long fh,
+		     struct sk_buff *skb, struct tcmsg *t)
+{
+	struct tc_u_knode *n = (struct tc_u_knode*)fh;
+	unsigned char	 *b = skb->tail;
+	struct rtattr *rta;
+
+	if (n == NULL)
+		return skb->len;
+
+	t->tcm_handle = n->handle;
+
+	rta = (struct rtattr*)b;
+	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+
+	if (TC_U32_KEY(n->handle) == 0) {
+		struct tc_u_hnode *ht = (struct tc_u_hnode*)fh;
+		u32 divisor = ht->divisor+1;
+		RTA_PUT(skb, TCA_U32_DIVISOR, 4, &divisor);
+	} else {
+		RTA_PUT(skb, TCA_U32_SEL,
+			sizeof(n->sel) + n->sel.nkeys*sizeof(struct tc_u32_key),
+			&n->sel);
+		if (n->ht_up) {
+			u32 htid = n->handle & 0xFFFFF000;
+			RTA_PUT(skb, TCA_U32_HASH, 4, &htid);
+		}
+		if (n->res.classid)
+			RTA_PUT(skb, TCA_U32_CLASSID, 4, &n->res.classid);
+		if (n->ht_down)
+			RTA_PUT(skb, TCA_U32_LINK, 4, &n->ht_down->handle);
+
+#ifdef CONFIG_CLS_U32_MARK
+		if (n->mark.val || n->mark.mask)
+			RTA_PUT(skb, TCA_U32_MARK, sizeof(n->mark), &n->mark);
+#endif
+
+		if (tcf_exts_dump(skb, &n->exts, &u32_ext_map) < 0)
+			goto rtattr_failure;
+
+#ifdef CONFIG_NET_CLS_IND
+		if(strlen(n->indev))
+			RTA_PUT(skb, TCA_U32_INDEV, IFNAMSIZ, n->indev);
+#endif
+#ifdef CONFIG_CLS_U32_PERF
+		RTA_PUT(skb, TCA_U32_PCNT, 
+		sizeof(struct tc_u32_pcnt) + n->sel.nkeys*sizeof(u64),
+			n->pf);
+#endif
+	}
+
+	rta->rta_len = skb->tail - b;
+	if (TC_U32_KEY(n->handle))
+		if (tcf_exts_dump_stats(skb, &n->exts, &u32_ext_map) < 0)
+			goto rtattr_failure;
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static struct tcf_proto_ops cls_u32_ops = {
+	.next		=	NULL,
+	.kind		=	"u32",
+	.classify	=	u32_classify,
+	.init		=	u32_init,
+	.destroy	=	u32_destroy,
+	.get		=	u32_get,
+	.put		=	u32_put,
+	.change		=	u32_change,
+	.delete		=	u32_delete,
+	.walk		=	u32_walk,
+	.dump		=	u32_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init init_u32(void)
+{
+	printk("u32 classifier\n");
+#ifdef CONFIG_CLS_U32_PERF
+	printk("    Perfomance counters on\n");
+#endif
+#ifdef CONFIG_NET_CLS_POLICE
+	printk("    OLD policer on \n");
+#endif
+#ifdef CONFIG_NET_CLS_IND
+	printk("    input device check on \n");
+#endif
+#ifdef CONFIG_NET_CLS_ACT
+	printk("    Actions configured \n");
+#endif
+	return register_tcf_proto_ops(&cls_u32_ops);
+}
+
+static void __exit exit_u32(void) 
+{
+	unregister_tcf_proto_ops(&cls_u32_ops);
+}
+
+module_init(init_u32)
+module_exit(exit_u32)
+MODULE_LICENSE("GPL");
diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c
new file mode 100644
index 00000000000..bf1f00f8b1b
--- /dev/null
+++ b/net/sched/em_cmp.c
@@ -0,0 +1,101 @@
+/*
+ * net/sched/em_cmp.c	Simple packet data comparison ematch
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/tc_ematch/tc_em_cmp.h>
+#include <net/pkt_cls.h>
+
+static inline int cmp_needs_transformation(struct tcf_em_cmp *cmp)
+{
+	return unlikely(cmp->flags & TCF_EM_CMP_TRANS);
+}
+
+static int em_cmp_match(struct sk_buff *skb, struct tcf_ematch *em,
+			struct tcf_pkt_info *info)
+{
+	struct tcf_em_cmp *cmp = (struct tcf_em_cmp *) em->data;
+	unsigned char *ptr = tcf_get_base_ptr(skb, cmp->layer) + cmp->off;
+	u32 val = 0;
+
+	if (!tcf_valid_offset(skb, ptr, cmp->align))
+		return 0;
+
+	switch (cmp->align) {
+		case TCF_EM_ALIGN_U8:
+			val = *ptr;
+			break;
+
+		case TCF_EM_ALIGN_U16:
+			val = *ptr << 8;
+			val |= *(ptr+1);
+
+			if (cmp_needs_transformation(cmp))
+				val = be16_to_cpu(val);
+			break;
+
+		case TCF_EM_ALIGN_U32:
+			/* Worth checking boundries? The branching seems
+			 * to get worse. Visit again. */
+			val = *ptr << 24;
+			val |= *(ptr+1) << 16;
+			val |= *(ptr+2) << 8;
+			val |= *(ptr+3);
+
+			if (cmp_needs_transformation(cmp))
+				val = be32_to_cpu(val);
+			break;
+
+		default:
+			return 0;
+	}
+
+	if (cmp->mask)
+		val &= cmp->mask;
+
+	switch (cmp->opnd) {
+		case TCF_EM_OPND_EQ:
+			return val == cmp->val;
+		case TCF_EM_OPND_LT:
+			return val < cmp->val;
+		case TCF_EM_OPND_GT:
+			return val > cmp->val;
+	}
+
+	return 0;
+}
+
+static struct tcf_ematch_ops em_cmp_ops = {
+	.kind	  = TCF_EM_CMP,
+	.datalen  = sizeof(struct tcf_em_cmp),
+	.match	  = em_cmp_match,
+	.owner	  = THIS_MODULE,
+	.link	  = LIST_HEAD_INIT(em_cmp_ops.link)
+};
+
+static int __init init_em_cmp(void)
+{
+	return tcf_em_register(&em_cmp_ops);
+}
+
+static void __exit exit_em_cmp(void) 
+{
+	tcf_em_unregister(&em_cmp_ops);
+}
+
+MODULE_LICENSE("GPL");
+
+module_init(init_em_cmp);
+module_exit(exit_em_cmp);
+
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
new file mode 100644
index 00000000000..f1eeaf65cee
--- /dev/null
+++ b/net/sched/em_meta.c
@@ -0,0 +1,661 @@
+/*
+ * net/sched/em_meta.c	Metadata ematch
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ *
+ * ==========================================================================
+ * 
+ * 	The metadata ematch compares two meta objects where each object
+ * 	represents either a meta value stored in the kernel or a static
+ * 	value provided by userspace. The objects are not provided by
+ * 	userspace itself but rather a definition providing the information
+ * 	to build them. Every object is of a certain type which must be
+ * 	equal to the object it is being compared to.
+ *
+ * 	The definition of a objects conists of the type (meta type), a
+ * 	identifier (meta id) and additional type specific information.
+ * 	The meta id is either TCF_META_TYPE_VALUE for values provided by
+ * 	userspace or a index to the meta operations table consisting of
+ * 	function pointers to type specific meta data collectors returning
+ * 	the value of the requested meta value.
+ *
+ * 	         lvalue                                   rvalue
+ * 	      +-----------+                           +-----------+
+ * 	      | type: INT |                           | type: INT |
+ * 	 def  | id: INDEV |                           | id: VALUE |
+ * 	      | data:     |                           | data: 3   |
+ * 	      +-----------+                           +-----------+
+ * 	            |                                       |
+ * 	            ---> meta_ops[INT][INDEV](...)          |
+ *                            |                            |
+ * 	            -----------                             |
+ * 	            V                                       V
+ * 	      +-----------+                           +-----------+
+ * 	      | type: INT |                           | type: INT |
+ * 	 obj  | id: INDEV |                           | id: VALUE |
+ * 	      | data: 2   |<--data got filled out     | data: 3   |
+ * 	      +-----------+                           +-----------+
+ * 	            |                                         |
+ * 	            --------------> 2  equals 3 <--------------
+ *
+ * 	This is a simplified schema, the complexity varies depending
+ * 	on the meta type. Obviously, the length of the data must also
+ * 	be provided for non-numeric types.
+ *
+ * 	Additionaly, type dependant modifiers such as shift operators
+ * 	or mask may be applied to extend the functionaliy. As of now,
+ * 	the variable length type supports shifting the byte string to
+ * 	the right, eating up any number of octets and thus supporting
+ * 	wildcard interface name comparisons such as "ppp%" matching
+ * 	ppp0..9.
+ *
+ * 	NOTE: Certain meta values depend on other subsystems and are
+ * 	      only available if that subsytem is enabled in the kernel.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/random.h>
+#include <linux/tc_ematch/tc_em_meta.h>
+#include <net/dst.h>
+#include <net/route.h>
+#include <net/pkt_cls.h>
+
+struct meta_obj
+{
+	unsigned long		value;
+	unsigned int		len;
+};
+
+struct meta_value
+{
+	struct tcf_meta_val	hdr;
+	unsigned long		val;
+	unsigned int		len;
+};
+
+struct meta_match
+{
+	struct meta_value	lvalue;
+	struct meta_value	rvalue;
+};
+
+static inline int meta_id(struct meta_value *v)
+{
+	return TCF_META_ID(v->hdr.kind);
+}
+
+static inline int meta_type(struct meta_value *v)
+{
+	return TCF_META_TYPE(v->hdr.kind);
+}
+
+#define META_COLLECTOR(FUNC) static void meta_##FUNC(struct sk_buff *skb, \
+	struct tcf_pkt_info *info, struct meta_value *v, \
+	struct meta_obj *dst, int *err)
+
+/**************************************************************************
+ * System status & misc
+ **************************************************************************/
+
+META_COLLECTOR(int_random)
+{
+	get_random_bytes(&dst->value, sizeof(dst->value));
+}
+
+static inline unsigned long fixed_loadavg(int load)
+{
+	int rnd_load = load + (FIXED_1/200);
+	int rnd_frac = ((rnd_load & (FIXED_1-1)) * 100) >> FSHIFT;
+
+	return ((rnd_load >> FSHIFT) * 100) + rnd_frac;
+}
+
+META_COLLECTOR(int_loadavg_0)
+{
+	dst->value = fixed_loadavg(avenrun[0]);
+}
+
+META_COLLECTOR(int_loadavg_1)
+{
+	dst->value = fixed_loadavg(avenrun[1]);
+}
+
+META_COLLECTOR(int_loadavg_2)
+{
+	dst->value = fixed_loadavg(avenrun[2]);
+}
+
+/**************************************************************************
+ * Device names & indices
+ **************************************************************************/
+
+static inline int int_dev(struct net_device *dev, struct meta_obj *dst)
+{
+	if (unlikely(dev == NULL))
+		return -1;
+
+	dst->value = dev->ifindex;
+	return 0;
+}
+
+static inline int var_dev(struct net_device *dev, struct meta_obj *dst)
+{
+	if (unlikely(dev == NULL))
+		return -1;
+
+	dst->value = (unsigned long) dev->name;
+	dst->len = strlen(dev->name);
+	return 0;
+}
+
+META_COLLECTOR(int_dev)
+{
+	*err = int_dev(skb->dev, dst);
+}
+
+META_COLLECTOR(var_dev)
+{
+	*err = var_dev(skb->dev, dst);
+}
+
+META_COLLECTOR(int_indev)
+{
+	*err = int_dev(skb->input_dev, dst);
+}
+
+META_COLLECTOR(var_indev)
+{
+	*err = var_dev(skb->input_dev, dst);
+}
+
+META_COLLECTOR(int_realdev)
+{
+	*err = int_dev(skb->real_dev, dst);
+}
+
+META_COLLECTOR(var_realdev)
+{
+	*err = var_dev(skb->real_dev, dst);
+}
+
+/**************************************************************************
+ * skb attributes
+ **************************************************************************/
+
+META_COLLECTOR(int_priority)
+{
+	dst->value = skb->priority;
+}
+
+META_COLLECTOR(int_protocol)
+{
+	/* Let userspace take care of the byte ordering */
+	dst->value = skb->protocol;
+}
+
+META_COLLECTOR(int_security)
+{
+	dst->value = skb->security;
+}
+
+META_COLLECTOR(int_pkttype)
+{
+	dst->value = skb->pkt_type;
+}
+
+META_COLLECTOR(int_pktlen)
+{
+	dst->value = skb->len;
+}
+
+META_COLLECTOR(int_datalen)
+{
+	dst->value = skb->data_len;
+}
+
+META_COLLECTOR(int_maclen)
+{
+	dst->value = skb->mac_len;
+}
+
+/**************************************************************************
+ * Netfilter
+ **************************************************************************/
+
+#ifdef CONFIG_NETFILTER
+META_COLLECTOR(int_nfmark)
+{
+	dst->value = skb->nfmark;
+}
+#endif
+
+/**************************************************************************
+ * Traffic Control
+ **************************************************************************/
+
+META_COLLECTOR(int_tcindex)
+{
+	dst->value = skb->tc_index;
+}
+
+#ifdef CONFIG_NET_CLS_ACT
+META_COLLECTOR(int_tcverd)
+{
+	dst->value = skb->tc_verd;
+}
+
+META_COLLECTOR(int_tcclassid)
+{
+	dst->value = skb->tc_classid;
+}
+#endif
+
+/**************************************************************************
+ * Routing
+ **************************************************************************/
+
+#ifdef CONFIG_NET_CLS_ROUTE
+META_COLLECTOR(int_rtclassid)
+{
+	if (unlikely(skb->dst == NULL))
+		*err = -1;
+	else
+		dst->value = skb->dst->tclassid;
+}
+#endif
+
+META_COLLECTOR(int_rtiif)
+{
+	if (unlikely(skb->dst == NULL))
+		*err = -1;
+	else
+		dst->value = ((struct rtable*) skb->dst)->fl.iif;
+}
+
+/**************************************************************************
+ * Meta value collectors assignment table
+ **************************************************************************/
+
+struct meta_ops
+{
+	void		(*get)(struct sk_buff *, struct tcf_pkt_info *,
+			       struct meta_value *, struct meta_obj *, int *);
+};
+
+/* Meta value operations table listing all meta value collectors and
+ * assigns them to a type and meta id. */
+static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
+	[TCF_META_TYPE_VAR] = {
+		[TCF_META_ID_DEV]	= { .get = meta_var_dev },
+		[TCF_META_ID_INDEV]	= { .get = meta_var_indev },
+		[TCF_META_ID_REALDEV]	= { .get = meta_var_realdev }
+	},
+	[TCF_META_TYPE_INT] = {
+		[TCF_META_ID_RANDOM]	= { .get = meta_int_random },
+		[TCF_META_ID_LOADAVG_0]	= { .get = meta_int_loadavg_0 },
+		[TCF_META_ID_LOADAVG_1]	= { .get = meta_int_loadavg_1 },
+		[TCF_META_ID_LOADAVG_2]	= { .get = meta_int_loadavg_2 },
+		[TCF_META_ID_DEV]	= { .get = meta_int_dev },
+		[TCF_META_ID_INDEV]	= { .get = meta_int_indev },
+		[TCF_META_ID_REALDEV]	= { .get = meta_int_realdev },
+		[TCF_META_ID_PRIORITY]	= { .get = meta_int_priority },
+		[TCF_META_ID_PROTOCOL]	= { .get = meta_int_protocol },
+		[TCF_META_ID_SECURITY]	= { .get = meta_int_security },
+		[TCF_META_ID_PKTTYPE]	= { .get = meta_int_pkttype },
+		[TCF_META_ID_PKTLEN]	= { .get = meta_int_pktlen },
+		[TCF_META_ID_DATALEN]	= { .get = meta_int_datalen },
+		[TCF_META_ID_MACLEN]	= { .get = meta_int_maclen },
+#ifdef CONFIG_NETFILTER
+		[TCF_META_ID_NFMARK]	= { .get = meta_int_nfmark },
+#endif
+		[TCF_META_ID_TCINDEX]	= { .get = meta_int_tcindex },
+#ifdef CONFIG_NET_CLS_ACT
+		[TCF_META_ID_TCVERDICT]	= { .get = meta_int_tcverd },
+		[TCF_META_ID_TCCLASSID]	= { .get = meta_int_tcclassid },
+#endif
+#ifdef CONFIG_NET_CLS_ROUTE
+		[TCF_META_ID_RTCLASSID]	= { .get = meta_int_rtclassid },
+#endif
+		[TCF_META_ID_RTIIF]	= { .get = meta_int_rtiif }
+	}
+};
+
+static inline struct meta_ops * meta_ops(struct meta_value *val)
+{
+	return &__meta_ops[meta_type(val)][meta_id(val)];
+}
+
+/**************************************************************************
+ * Type specific operations for TCF_META_TYPE_VAR
+ **************************************************************************/
+
+static int meta_var_compare(struct meta_obj *a, struct meta_obj *b)
+{
+	int r = a->len - b->len;
+
+	if (r == 0)
+		r = memcmp((void *) a->value, (void *) b->value, a->len);
+
+	return r;
+}
+
+static int meta_var_change(struct meta_value *dst, struct rtattr *rta)
+{
+	int len = RTA_PAYLOAD(rta);
+
+	dst->val = (unsigned long) kmalloc(len, GFP_KERNEL);
+	if (dst->val == 0UL)
+		return -ENOMEM;
+	memcpy((void *) dst->val, RTA_DATA(rta), len);
+	dst->len = len;
+	return 0;
+}
+
+static void meta_var_destroy(struct meta_value *v)
+{
+	if (v->val)
+		kfree((void *) v->val);
+}
+
+static void meta_var_apply_extras(struct meta_value *v,
+				  struct meta_obj *dst)
+{
+	int shift = v->hdr.shift;
+
+	if (shift && shift < dst->len)
+		dst->len -= shift;
+}
+
+static int meta_var_dump(struct sk_buff *skb, struct meta_value *v, int tlv)
+{
+	if (v->val && v->len)
+		RTA_PUT(skb, tlv, v->len, (void *) v->val);
+	return 0;
+
+rtattr_failure:
+	return -1;
+}
+
+/**************************************************************************
+ * Type specific operations for TCF_META_TYPE_INT
+ **************************************************************************/
+
+static int meta_int_compare(struct meta_obj *a, struct meta_obj *b)
+{
+	/* Let gcc optimize it, the unlikely is not really based on
+	 * some numbers but jump free code for mismatches seems
+	 * more logical. */
+	if (unlikely(a == b))
+		return 0;
+	else if (a < b)
+		return -1;
+	else
+		return 1;
+}
+
+static int meta_int_change(struct meta_value *dst, struct rtattr *rta)
+{
+	if (RTA_PAYLOAD(rta) >= sizeof(unsigned long)) {
+		dst->val = *(unsigned long *) RTA_DATA(rta);
+		dst->len = sizeof(unsigned long);
+	} else if (RTA_PAYLOAD(rta) == sizeof(u32)) {
+		dst->val = *(u32 *) RTA_DATA(rta);
+		dst->len = sizeof(u32);
+	} else
+		return -EINVAL;
+
+	return 0;
+}
+
+static void meta_int_apply_extras(struct meta_value *v,
+				  struct meta_obj *dst)
+{
+	if (v->hdr.shift)
+		dst->value >>= v->hdr.shift;
+
+	if (v->val)
+		dst->value &= v->val;
+}
+
+static int meta_int_dump(struct sk_buff *skb, struct meta_value *v, int tlv)
+{
+	if (v->len == sizeof(unsigned long))
+		RTA_PUT(skb, tlv, sizeof(unsigned long), &v->val);
+	else if (v->len == sizeof(u32)) {
+		u32 d = v->val;
+		RTA_PUT(skb, tlv, sizeof(d), &d);
+	}
+
+	return 0;
+
+rtattr_failure:
+	return -1;
+}
+
+/**************************************************************************
+ * Type specific operations table
+ **************************************************************************/
+
+struct meta_type_ops
+{
+	void	(*destroy)(struct meta_value *);
+	int	(*compare)(struct meta_obj *, struct meta_obj *);
+	int	(*change)(struct meta_value *, struct rtattr *);
+	void	(*apply_extras)(struct meta_value *, struct meta_obj *);
+	int	(*dump)(struct sk_buff *, struct meta_value *, int);
+};
+
+static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX+1] = {
+	[TCF_META_TYPE_VAR] = {
+		.destroy = meta_var_destroy,
+		.compare = meta_var_compare,
+		.change = meta_var_change,
+		.apply_extras = meta_var_apply_extras,
+		.dump = meta_var_dump
+	},
+	[TCF_META_TYPE_INT] = {
+		.compare = meta_int_compare,
+		.change = meta_int_change,
+		.apply_extras = meta_int_apply_extras,
+		.dump = meta_int_dump
+	}
+};
+
+static inline struct meta_type_ops * meta_type_ops(struct meta_value *v)
+{
+	return &__meta_type_ops[meta_type(v)];
+}
+
+/**************************************************************************
+ * Core
+ **************************************************************************/
+
+static inline int meta_get(struct sk_buff *skb, struct tcf_pkt_info *info, 
+			   struct meta_value *v, struct meta_obj *dst)
+{
+	int err = 0;
+
+	if (meta_id(v) == TCF_META_ID_VALUE) {
+		dst->value = v->val;
+		dst->len = v->len;
+		return 0;
+	}
+
+	meta_ops(v)->get(skb, info, v, dst, &err);
+	if (err < 0)
+		return err;
+
+	if (meta_type_ops(v)->apply_extras)
+	    meta_type_ops(v)->apply_extras(v, dst);
+
+	return 0;
+}
+
+static int em_meta_match(struct sk_buff *skb, struct tcf_ematch *m,
+			 struct tcf_pkt_info *info)
+{
+	int r;
+	struct meta_match *meta = (struct meta_match *) m->data;
+	struct meta_obj l_value, r_value;
+
+	if (meta_get(skb, info, &meta->lvalue, &l_value) < 0 ||
+	    meta_get(skb, info, &meta->rvalue, &r_value) < 0)
+		return 0;
+
+	r = meta_type_ops(&meta->lvalue)->compare(&l_value, &r_value);
+
+	switch (meta->lvalue.hdr.op) {
+		case TCF_EM_OPND_EQ:
+			return !r;
+		case TCF_EM_OPND_LT:
+			return r < 0;
+		case TCF_EM_OPND_GT:
+			return r > 0;
+	}
+
+	return 0;
+}
+
+static inline void meta_delete(struct meta_match *meta)
+{
+	struct meta_type_ops *ops = meta_type_ops(&meta->lvalue);
+
+	if (ops && ops->destroy) {
+		ops->destroy(&meta->lvalue);
+		ops->destroy(&meta->rvalue);
+	}
+
+	kfree(meta);
+}
+
+static inline int meta_change_data(struct meta_value *dst, struct rtattr *rta)
+{
+	if (rta) {
+		if (RTA_PAYLOAD(rta) == 0)
+			return -EINVAL;
+
+		return meta_type_ops(dst)->change(dst, rta);
+	}
+
+	return 0;
+}
+
+static inline int meta_is_supported(struct meta_value *val)
+{
+	return (!meta_id(val) || meta_ops(val)->get);
+}
+
+static int em_meta_change(struct tcf_proto *tp, void *data, int len,
+			  struct tcf_ematch *m)
+{
+	int err = -EINVAL;
+	struct rtattr *tb[TCA_EM_META_MAX];
+	struct tcf_meta_hdr *hdr;
+	struct meta_match *meta = NULL;
+	
+	if (rtattr_parse(tb, TCA_EM_META_MAX, data, len) < 0)
+		goto errout;
+
+	if (tb[TCA_EM_META_HDR-1] == NULL ||
+	    RTA_PAYLOAD(tb[TCA_EM_META_HDR-1]) < sizeof(*hdr))
+		goto errout;
+	hdr = RTA_DATA(tb[TCA_EM_META_HDR-1]);
+
+	if (TCF_META_TYPE(hdr->left.kind) != TCF_META_TYPE(hdr->right.kind) ||
+	    TCF_META_TYPE(hdr->left.kind) > TCF_META_TYPE_MAX ||
+	    TCF_META_ID(hdr->left.kind) > TCF_META_ID_MAX ||
+	    TCF_META_ID(hdr->right.kind) > TCF_META_ID_MAX)
+		goto errout;
+
+	meta = kmalloc(sizeof(*meta), GFP_KERNEL);
+	if (meta == NULL)
+		goto errout;
+	memset(meta, 0, sizeof(*meta));
+
+	memcpy(&meta->lvalue.hdr, &hdr->left, sizeof(hdr->left));
+	memcpy(&meta->rvalue.hdr, &hdr->right, sizeof(hdr->right));
+
+	if (!meta_is_supported(&meta->lvalue) ||
+	    !meta_is_supported(&meta->rvalue)) {
+		err = -EOPNOTSUPP;
+		goto errout;
+	}
+
+	if (meta_change_data(&meta->lvalue, tb[TCA_EM_META_LVALUE-1]) < 0 ||
+	    meta_change_data(&meta->rvalue, tb[TCA_EM_META_RVALUE-1]) < 0)
+		goto errout;
+
+	m->datalen = sizeof(*meta);
+	m->data = (unsigned long) meta;
+
+	err = 0;
+errout:
+	if (err && meta)
+		meta_delete(meta);
+	return err;
+}
+
+static void em_meta_destroy(struct tcf_proto *tp, struct tcf_ematch *m)
+{
+	if (m)
+		meta_delete((struct meta_match *) m->data);
+}
+
+static int em_meta_dump(struct sk_buff *skb, struct tcf_ematch *em)
+{
+	struct meta_match *meta = (struct meta_match *) em->data;
+	struct tcf_meta_hdr hdr;
+	struct meta_type_ops *ops;
+
+	memset(&hdr, 0, sizeof(hdr));
+	memcpy(&hdr.left, &meta->lvalue.hdr, sizeof(hdr.left));
+	memcpy(&hdr.right, &meta->rvalue.hdr, sizeof(hdr.right));
+
+	RTA_PUT(skb, TCA_EM_META_HDR, sizeof(hdr), &hdr);
+
+	ops = meta_type_ops(&meta->lvalue);
+	if (ops->dump(skb, &meta->lvalue, TCA_EM_META_LVALUE) < 0 ||
+	    ops->dump(skb, &meta->rvalue, TCA_EM_META_RVALUE) < 0)
+		goto rtattr_failure;
+
+	return 0;
+
+rtattr_failure:
+	return -1;
+}		
+
+static struct tcf_ematch_ops em_meta_ops = {
+	.kind	  = TCF_EM_META,
+	.change	  = em_meta_change,
+	.match	  = em_meta_match,
+	.destroy  = em_meta_destroy,
+	.dump	  = em_meta_dump,
+	.owner	  = THIS_MODULE,
+	.link	  = LIST_HEAD_INIT(em_meta_ops.link)
+};
+
+static int __init init_em_meta(void)
+{
+	return tcf_em_register(&em_meta_ops);
+}
+
+static void __exit exit_em_meta(void) 
+{
+	tcf_em_unregister(&em_meta_ops);
+}
+
+MODULE_LICENSE("GPL");
+
+module_init(init_em_meta);
+module_exit(exit_em_meta);
diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c
new file mode 100644
index 00000000000..71ea926a9f0
--- /dev/null
+++ b/net/sched/em_nbyte.c
@@ -0,0 +1,82 @@
+/*
+ * net/sched/em_nbyte.c	N-Byte ematch
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/tc_ematch/tc_em_nbyte.h>
+#include <net/pkt_cls.h>
+
+struct nbyte_data
+{
+	struct tcf_em_nbyte	hdr;
+	char			pattern[0];
+};
+	
+static int em_nbyte_change(struct tcf_proto *tp, void *data, int data_len,
+			   struct tcf_ematch *em)
+{
+	struct tcf_em_nbyte *nbyte = data;
+
+	if (data_len < sizeof(*nbyte) ||
+	    data_len < (sizeof(*nbyte) + nbyte->len))
+		return -EINVAL;
+
+	em->datalen = sizeof(*nbyte) + nbyte->len;
+	em->data = (unsigned long) kmalloc(em->datalen, GFP_KERNEL);
+	if (em->data == 0UL)
+		return -ENOBUFS;
+
+	memcpy((void *) em->data, data, em->datalen);
+
+	return 0;
+}
+
+static int em_nbyte_match(struct sk_buff *skb, struct tcf_ematch *em,
+			  struct tcf_pkt_info *info)
+{
+	struct nbyte_data *nbyte = (struct nbyte_data *) em->data;
+	unsigned char *ptr = tcf_get_base_ptr(skb, nbyte->hdr.layer);
+
+	ptr += nbyte->hdr.off;
+
+	if (!tcf_valid_offset(skb, ptr, nbyte->hdr.len))
+		return 0;
+
+	return !memcmp(ptr + nbyte->hdr.off, nbyte->pattern, nbyte->hdr.len);
+}
+
+static struct tcf_ematch_ops em_nbyte_ops = {
+	.kind	  = TCF_EM_NBYTE,
+	.change	  = em_nbyte_change,
+	.match	  = em_nbyte_match,
+	.owner	  = THIS_MODULE,
+	.link	  = LIST_HEAD_INIT(em_nbyte_ops.link)
+};
+
+static int __init init_em_nbyte(void)
+{
+	return tcf_em_register(&em_nbyte_ops);
+}
+
+static void __exit exit_em_nbyte(void) 
+{
+	tcf_em_unregister(&em_nbyte_ops);
+}
+
+MODULE_LICENSE("GPL");
+
+module_init(init_em_nbyte);
+module_exit(exit_em_nbyte);
diff --git a/net/sched/em_u32.c b/net/sched/em_u32.c
new file mode 100644
index 00000000000..34e7e51e601
--- /dev/null
+++ b/net/sched/em_u32.c
@@ -0,0 +1,63 @@
+/*
+ * net/sched/em_u32.c	U32 Ematch
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * Based on net/sched/cls_u32.c
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <net/pkt_cls.h>
+
+static int em_u32_match(struct sk_buff *skb, struct tcf_ematch *em,
+			struct tcf_pkt_info *info)
+{
+	struct tc_u32_key *key = (struct tc_u32_key *) em->data;
+	unsigned char *ptr = skb->nh.raw;
+	
+	if (info) {
+		if (info->ptr)
+			ptr = info->ptr;
+		ptr += (info->nexthdr & key->offmask);
+	}
+
+	ptr += key->off;
+
+	if (!tcf_valid_offset(skb, ptr, sizeof(u32)))
+		return 0;
+	
+	return !(((*(u32*) ptr)  ^ key->val) & key->mask);
+}
+
+static struct tcf_ematch_ops em_u32_ops = {
+	.kind	  = TCF_EM_U32,
+	.datalen  = sizeof(struct tc_u32_key),
+	.match	  = em_u32_match,
+	.owner	  = THIS_MODULE,
+	.link	  = LIST_HEAD_INIT(em_u32_ops.link)
+};
+
+static int __init init_em_u32(void)
+{
+	return tcf_em_register(&em_u32_ops);
+}
+
+static void __exit exit_em_u32(void) 
+{
+	tcf_em_unregister(&em_u32_ops);
+}
+
+MODULE_LICENSE("GPL");
+
+module_init(init_em_u32);
+module_exit(exit_em_u32);
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
new file mode 100644
index 00000000000..ebfe2e7d21b
--- /dev/null
+++ b/net/sched/ematch.c
@@ -0,0 +1,524 @@
+/*
+ * net/sched/ematch.c		Extended Match API
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ *
+ * ==========================================================================
+ *
+ * An extended match (ematch) is a small classification tool not worth
+ * writing a full classifier for. Ematches can be interconnected to form
+ * a logic expression and get attached to classifiers to extend their
+ * functionatlity.
+ *
+ * The userspace part transforms the logic expressions into an array
+ * consisting of multiple sequences of interconnected ematches separated
+ * by markers. Precedence is implemented by a special ematch kind
+ * referencing a sequence beyond the marker of the current sequence
+ * causing the current position in the sequence to be pushed onto a stack
+ * to allow the current position to be overwritten by the position referenced
+ * in the special ematch. Matching continues in the new sequence until a
+ * marker is reached causing the position to be restored from the stack.
+ *
+ * Example:
+ *          A AND (B1 OR B2) AND C AND D
+ *
+ *              ------->-PUSH-------
+ *    -->--    /         -->--      \   -->--
+ *   /     \  /         /     \      \ /     \
+ * +-------+-------+-------+-------+-------+--------+
+ * | A AND | B AND | C AND | D END | B1 OR | B2 END |
+ * +-------+-------+-------+-------+-------+--------+
+ *                    \                      /
+ *                     --------<-POP---------
+ *
+ * where B is a virtual ematch referencing to sequence starting with B1.
+ * 
+ * ==========================================================================
+ *
+ * How to write an ematch in 60 seconds
+ * ------------------------------------
+ * 
+ *   1) Provide a matcher function:
+ *      static int my_match(struct sk_buff *skb, struct tcf_ematch *m,
+ *                          struct tcf_pkt_info *info)
+ *      {
+ *      	struct mydata *d = (struct mydata *) m->data;
+ *
+ *      	if (...matching goes here...)
+ *      		return 1;
+ *      	else
+ *      		return 0;
+ *      }
+ *
+ *   2) Fill out a struct tcf_ematch_ops:
+ *      static struct tcf_ematch_ops my_ops = {
+ *      	.kind = unique id,
+ *      	.datalen = sizeof(struct mydata),
+ *      	.match = my_match,
+ *      	.owner = THIS_MODULE,
+ *      };
+ *
+ *   3) Register/Unregister your ematch:
+ *      static int __init init_my_ematch(void)
+ *      {
+ *      	return tcf_em_register(&my_ops);
+ *      }
+ *
+ *      static void __exit exit_my_ematch(void)
+ *      {
+ *      	return tcf_em_unregister(&my_ops);
+ *      }
+ *
+ *      module_init(init_my_ematch);
+ *      module_exit(exit_my_ematch);
+ *
+ *   4) By now you should have two more seconds left, barely enough to
+ *      open up a beer to watch the compilation going.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+#include <net/pkt_cls.h>
+#include <config/net/ematch/stack.h>
+
+static LIST_HEAD(ematch_ops);
+static DEFINE_RWLOCK(ematch_mod_lock);
+
+static inline struct tcf_ematch_ops * tcf_em_lookup(u16 kind)
+{
+	struct tcf_ematch_ops *e = NULL;
+
+	read_lock(&ematch_mod_lock);
+	list_for_each_entry(e, &ematch_ops, link) {
+		if (kind == e->kind) {
+			if (!try_module_get(e->owner))
+				e = NULL;
+			read_unlock(&ematch_mod_lock);
+			return e;
+		}
+	}
+	read_unlock(&ematch_mod_lock);
+
+	return NULL;
+}
+
+/**
+ * tcf_em_register - register an extended match
+ * 
+ * @ops: ematch operations lookup table
+ *
+ * This function must be called by ematches to announce their presence.
+ * The given @ops must have kind set to a unique identifier and the
+ * callback match() must be implemented. All other callbacks are optional
+ * and a fallback implementation is used instead.
+ *
+ * Returns -EEXISTS if an ematch of the same kind has already registered.
+ */
+int tcf_em_register(struct tcf_ematch_ops *ops)
+{
+	int err = -EEXIST;
+	struct tcf_ematch_ops *e;
+
+	if (ops->match == NULL)
+		return -EINVAL;
+
+	write_lock(&ematch_mod_lock);
+	list_for_each_entry(e, &ematch_ops, link)
+		if (ops->kind == e->kind)
+			goto errout;
+
+	list_add_tail(&ops->link, &ematch_ops);
+	err = 0;
+errout:
+	write_unlock(&ematch_mod_lock);
+	return err;
+}
+
+/**
+ * tcf_em_unregister - unregster and extended match
+ *
+ * @ops: ematch operations lookup table
+ *
+ * This function must be called by ematches to announce their disappearance
+ * for examples when the module gets unloaded. The @ops parameter must be
+ * the same as the one used for registration.
+ *
+ * Returns -ENOENT if no matching ematch was found.
+ */
+int tcf_em_unregister(struct tcf_ematch_ops *ops)
+{
+	int err = 0;
+	struct tcf_ematch_ops *e;
+
+	write_lock(&ematch_mod_lock);
+	list_for_each_entry(e, &ematch_ops, link) {
+		if (e == ops) {
+			list_del(&e->link);
+			goto out;
+		}
+	}
+
+	err = -ENOENT;
+out:
+	write_unlock(&ematch_mod_lock);
+	return err;
+}
+
+static inline struct tcf_ematch * tcf_em_get_match(struct tcf_ematch_tree *tree,
+						   int index)
+{
+	return &tree->matches[index];
+}
+
+
+static int tcf_em_validate(struct tcf_proto *tp,
+			   struct tcf_ematch_tree_hdr *tree_hdr,
+			   struct tcf_ematch *em, struct rtattr *rta, int idx)
+{
+	int err = -EINVAL;
+	struct tcf_ematch_hdr *em_hdr = RTA_DATA(rta);
+	int data_len = RTA_PAYLOAD(rta) - sizeof(*em_hdr);
+	void *data = (void *) em_hdr + sizeof(*em_hdr);
+
+	if (!TCF_EM_REL_VALID(em_hdr->flags))
+		goto errout;
+
+	if (em_hdr->kind == TCF_EM_CONTAINER) {
+		/* Special ematch called "container", carries an index
+		 * referencing an external ematch sequence. */
+		u32 ref;
+
+		if (data_len < sizeof(ref))
+			goto errout;
+		ref = *(u32 *) data;
+
+		if (ref >= tree_hdr->nmatches)
+			goto errout;
+
+		/* We do not allow backward jumps to avoid loops and jumps
+		 * to our own position are of course illegal. */
+		if (ref <= idx)
+			goto errout;
+
+		
+		em->data = ref;
+	} else {
+		/* Note: This lookup will increase the module refcnt
+		 * of the ematch module referenced. In case of a failure,
+		 * a destroy function is called by the underlying layer
+		 * which automatically releases the reference again, therefore
+		 * the module MUST not be given back under any circumstances
+		 * here. Be aware, the destroy function assumes that the
+		 * module is held if the ops field is non zero. */
+		em->ops = tcf_em_lookup(em_hdr->kind);
+
+		if (em->ops == NULL) {
+			err = -ENOENT;
+			goto errout;
+		}
+
+		/* ematch module provides expected length of data, so we
+		 * can do a basic sanity check. */
+		if (em->ops->datalen && data_len < em->ops->datalen)
+			goto errout;
+
+		if (em->ops->change) {
+			err = em->ops->change(tp, data, data_len, em);
+			if (err < 0)
+				goto errout;
+		} else if (data_len > 0) {
+			/* ematch module doesn't provide an own change
+			 * procedure and expects us to allocate and copy
+			 * the ematch data.
+			 *
+			 * TCF_EM_SIMPLE may be specified stating that the
+			 * data only consists of a u32 integer and the module
+			 * does not expected a memory reference but rather
+			 * the value carried. */
+			if (em_hdr->flags & TCF_EM_SIMPLE) {
+				if (data_len < sizeof(u32))
+					goto errout;
+				em->data = *(u32 *) data;
+			} else {
+				void *v = kmalloc(data_len, GFP_KERNEL);
+				if (v == NULL) {
+					err = -ENOBUFS;
+					goto errout;
+				}
+				memcpy(v, data, data_len);
+				em->data = (unsigned long) v;
+			}
+		}
+	}
+
+	em->matchid = em_hdr->matchid;
+	em->flags = em_hdr->flags;
+	em->datalen = data_len;
+
+	err = 0;
+errout:
+	return err;
+}
+
+/**
+ * tcf_em_tree_validate - validate ematch config TLV and build ematch tree
+ *
+ * @tp: classifier kind handle
+ * @rta: ematch tree configuration TLV
+ * @tree: destination ematch tree variable to store the resulting
+ *        ematch tree.
+ *
+ * This function validates the given configuration TLV @rta and builds an
+ * ematch tree in @tree. The resulting tree must later be copied into
+ * the private classifier data using tcf_em_tree_change(). You MUST NOT
+ * provide the ematch tree variable of the private classifier data directly,
+ * the changes would not be locked properly.
+ *
+ * Returns a negative error code if the configuration TLV contains errors.
+ */
+int tcf_em_tree_validate(struct tcf_proto *tp, struct rtattr *rta,
+			 struct tcf_ematch_tree *tree)
+{
+	int idx, list_len, matches_len, err = -EINVAL;
+	struct rtattr *tb[TCA_EMATCH_TREE_MAX];
+	struct rtattr *rt_match, *rt_hdr, *rt_list;
+	struct tcf_ematch_tree_hdr *tree_hdr;
+	struct tcf_ematch *em;
+
+	if (rtattr_parse_nested(tb, TCA_EMATCH_TREE_MAX, rta) < 0)
+		goto errout;
+
+	rt_hdr = tb[TCA_EMATCH_TREE_HDR-1];
+	rt_list = tb[TCA_EMATCH_TREE_LIST-1];
+
+	if (rt_hdr == NULL || rt_list == NULL)
+		goto errout;
+
+	if (RTA_PAYLOAD(rt_hdr) < sizeof(*tree_hdr) ||
+	    RTA_PAYLOAD(rt_list) < sizeof(*rt_match))
+		goto errout;
+
+	tree_hdr = RTA_DATA(rt_hdr);
+	memcpy(&tree->hdr, tree_hdr, sizeof(*tree_hdr));
+
+	rt_match = RTA_DATA(rt_list);
+	list_len = RTA_PAYLOAD(rt_list);
+	matches_len = tree_hdr->nmatches * sizeof(*em);
+
+	tree->matches = kmalloc(matches_len, GFP_KERNEL);
+	if (tree->matches == NULL)
+		goto errout;
+	memset(tree->matches, 0, matches_len);
+
+	/* We do not use rtattr_parse_nested here because the maximum
+	 * number of attributes is unknown. This saves us the allocation
+	 * for a tb buffer which would serve no purpose at all.
+	 * 
+	 * The array of rt attributes is parsed in the order as they are
+	 * provided, their type must be incremental from 1 to n. Even
+	 * if it does not serve any real purpose, a failure of sticking
+	 * to this policy will result in parsing failure. */
+	for (idx = 0; RTA_OK(rt_match, list_len); idx++) {
+		err = -EINVAL;
+
+		if (rt_match->rta_type != (idx + 1))
+			goto errout_abort;
+
+		if (idx >= tree_hdr->nmatches)
+			goto errout_abort;
+
+		if (RTA_PAYLOAD(rt_match) < sizeof(struct tcf_ematch_hdr))
+			goto errout_abort;
+
+		em = tcf_em_get_match(tree, idx);
+
+		err = tcf_em_validate(tp, tree_hdr, em, rt_match, idx);
+		if (err < 0)
+			goto errout_abort;
+
+		rt_match = RTA_NEXT(rt_match, list_len);
+	}
+
+	/* Check if the number of matches provided by userspace actually
+	 * complies with the array of matches. The number was used for
+	 * the validation of references and a mismatch could lead to
+	 * undefined references during the matching process. */
+	if (idx != tree_hdr->nmatches) {
+		err = -EINVAL;
+		goto errout_abort;
+	}
+
+	err = 0;
+errout:
+	return err;
+
+errout_abort:
+	tcf_em_tree_destroy(tp, tree);
+	return err;
+}
+
+/**
+ * tcf_em_tree_destroy - destroy an ematch tree
+ *
+ * @tp: classifier kind handle
+ * @tree: ematch tree to be deleted
+ *
+ * This functions destroys an ematch tree previously created by
+ * tcf_em_tree_validate()/tcf_em_tree_change(). You must ensure that
+ * the ematch tree is not in use before calling this function.
+ */
+void tcf_em_tree_destroy(struct tcf_proto *tp, struct tcf_ematch_tree *tree)
+{
+	int i;
+
+	if (tree->matches == NULL)
+		return;
+
+	for (i = 0; i < tree->hdr.nmatches; i++) {
+		struct tcf_ematch *em = tcf_em_get_match(tree, i);
+
+		if (em->ops) {
+			if (em->ops->destroy)
+				em->ops->destroy(tp, em);
+			else if (!tcf_em_is_simple(em) && em->data)
+				kfree((void *) em->data);
+			module_put(em->ops->owner);
+		}
+	}
+	
+	tree->hdr.nmatches = 0;
+	kfree(tree->matches);
+}
+
+/**
+ * tcf_em_tree_dump - dump ematch tree into a rtnl message
+ *
+ * @skb: skb holding the rtnl message
+ * @t: ematch tree to be dumped
+ * @tlv: TLV type to be used to encapsulate the tree
+ *
+ * This function dumps a ematch tree into a rtnl message. It is valid to
+ * call this function while the ematch tree is in use.
+ *
+ * Returns -1 if the skb tailroom is insufficient.
+ */
+int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
+{
+	int i;
+	struct rtattr * top_start = (struct rtattr*) skb->tail;
+	struct rtattr * list_start;
+
+	RTA_PUT(skb, tlv, 0, NULL);
+	RTA_PUT(skb, TCA_EMATCH_TREE_HDR, sizeof(tree->hdr), &tree->hdr);
+
+	list_start = (struct rtattr *) skb->tail;
+	RTA_PUT(skb, TCA_EMATCH_TREE_LIST, 0, NULL);
+
+	for (i = 0; i < tree->hdr.nmatches; i++) {
+		struct rtattr *match_start = (struct rtattr*) skb->tail;
+		struct tcf_ematch *em = tcf_em_get_match(tree, i);
+		struct tcf_ematch_hdr em_hdr = {
+			.kind = em->ops ? em->ops->kind : TCF_EM_CONTAINER,
+			.matchid = em->matchid,
+			.flags = em->flags
+		};
+
+		RTA_PUT(skb, i+1, sizeof(em_hdr), &em_hdr);
+
+		if (em->ops && em->ops->dump) {
+			if (em->ops->dump(skb, em) < 0)
+				goto rtattr_failure;
+		} else if (tcf_em_is_container(em) || tcf_em_is_simple(em)) {
+			u32 u = em->data;
+			RTA_PUT_NOHDR(skb, sizeof(u), &u);
+		} else if (em->datalen > 0)
+			RTA_PUT_NOHDR(skb, em->datalen, (void *) em->data);
+
+		match_start->rta_len = skb->tail - (u8*) match_start;
+	}
+
+	list_start->rta_len = skb->tail - (u8 *) list_start;
+	top_start->rta_len = skb->tail - (u8 *) top_start;
+
+	return 0;
+
+rtattr_failure:
+	return -1;
+}
+
+static inline int tcf_em_match(struct sk_buff *skb, struct tcf_ematch *em,
+			       struct tcf_pkt_info *info)
+{
+	int r = em->ops->match(skb, em, info);
+	return tcf_em_is_inverted(em) ? !r : r;
+}
+
+/* Do not use this function directly, use tcf_em_tree_match instead */
+int __tcf_em_tree_match(struct sk_buff *skb, struct tcf_ematch_tree *tree,
+			struct tcf_pkt_info *info)
+{
+	int stackp = 0, match_idx = 0, res = 0;
+	struct tcf_ematch *cur_match;
+	int stack[CONFIG_NET_EMATCH_STACK];
+
+proceed:
+	while (match_idx < tree->hdr.nmatches) {
+		cur_match = tcf_em_get_match(tree, match_idx);
+
+		if (tcf_em_is_container(cur_match)) {
+			if (unlikely(stackp >= CONFIG_NET_EMATCH_STACK))
+				goto stack_overflow;
+
+			stack[stackp++] = match_idx;
+			match_idx = cur_match->data;
+			goto proceed;
+		}
+
+		res = tcf_em_match(skb, cur_match, info);
+
+		if (tcf_em_early_end(cur_match, res))
+			break;
+
+		match_idx++;
+	}
+
+pop_stack:
+	if (stackp > 0) {
+		match_idx = stack[--stackp];
+		cur_match = tcf_em_get_match(tree, match_idx);
+
+		if (tcf_em_early_end(cur_match, res))
+			goto pop_stack;
+		else {
+			match_idx++;
+			goto proceed;
+		}
+	}
+
+	return res;
+
+stack_overflow:
+	if (net_ratelimit())
+		printk("Local stack overflow, increase NET_EMATCH_STACK\n");
+	return -1;
+}
+
+EXPORT_SYMBOL(tcf_em_register);
+EXPORT_SYMBOL(tcf_em_unregister);
+EXPORT_SYMBOL(tcf_em_tree_validate);
+EXPORT_SYMBOL(tcf_em_tree_destroy);
+EXPORT_SYMBOL(tcf_em_tree_dump);
+EXPORT_SYMBOL(__tcf_em_tree_match);
diff --git a/net/sched/estimator.c b/net/sched/estimator.c
new file mode 100644
index 00000000000..5d3ae03e22a
--- /dev/null
+++ b/net/sched/estimator.c
@@ -0,0 +1,197 @@
+/*
+ * net/sched/estimator.c	Simple rate estimator.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/init.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+
+/*
+   This code is NOT intended to be used for statistics collection,
+   its purpose is to provide a base for statistical multiplexing
+   for controlled load service.
+   If you need only statistics, run a user level daemon which
+   periodically reads byte counters.
+
+   Unfortunately, rate estimation is not a very easy task.
+   F.e. I did not find a simple way to estimate the current peak rate
+   and even failed to formulate the problem 8)8)
+
+   So I preferred not to built an estimator into the scheduler,
+   but run this task separately.
+   Ideally, it should be kernel thread(s), but for now it runs
+   from timers, which puts apparent top bounds on the number of rated
+   flows, has minimal overhead on small, but is enough
+   to handle controlled load service, sets of aggregates.
+
+   We measure rate over A=(1<<interval) seconds and evaluate EWMA:
+
+   avrate = avrate*(1-W) + rate*W
+
+   where W is chosen as negative power of 2: W = 2^(-ewma_log)
+
+   The resulting time constant is:
+
+   T = A/(-ln(1-W))
+
+
+   NOTES.
+
+   * The stored value for avbps is scaled by 2^5, so that maximal
+     rate is ~1Gbit, avpps is scaled by 2^10.
+
+   * Minimal interval is HZ/4=250msec (it is the greatest common divisor
+     for HZ=100 and HZ=1024 8)), maximal interval
+     is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals
+     are too expensive, longer ones can be implemented
+     at user level painlessly.
+ */
+
+#define EST_MAX_INTERVAL	5
+
+struct qdisc_estimator
+{
+	struct qdisc_estimator	*next;
+	struct tc_stats		*stats;
+	spinlock_t		*stats_lock;
+	unsigned		interval;
+	int			ewma_log;
+	u64			last_bytes;
+	u32			last_packets;
+	u32			avpps;
+	u32			avbps;
+};
+
+struct qdisc_estimator_head
+{
+	struct timer_list	timer;
+	struct qdisc_estimator	*list;
+};
+
+static struct qdisc_estimator_head elist[EST_MAX_INTERVAL+1];
+
+/* Estimator array lock */
+static DEFINE_RWLOCK(est_lock);
+
+static void est_timer(unsigned long arg)
+{
+	int idx = (int)arg;
+	struct qdisc_estimator *e;
+
+	read_lock(&est_lock);
+	for (e = elist[idx].list; e; e = e->next) {
+		struct tc_stats *st = e->stats;
+		u64 nbytes;
+		u32 npackets;
+		u32 rate;
+
+		spin_lock(e->stats_lock);
+		nbytes = st->bytes;
+		npackets = st->packets;
+		rate = (nbytes - e->last_bytes)<<(7 - idx);
+		e->last_bytes = nbytes;
+		e->avbps += ((long)rate - (long)e->avbps) >> e->ewma_log;
+		st->bps = (e->avbps+0xF)>>5;
+
+		rate = (npackets - e->last_packets)<<(12 - idx);
+		e->last_packets = npackets;
+		e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log;
+		e->stats->pps = (e->avpps+0x1FF)>>10;
+		spin_unlock(e->stats_lock);
+	}
+
+	mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4));
+	read_unlock(&est_lock);
+}
+
+int qdisc_new_estimator(struct tc_stats *stats, spinlock_t *stats_lock, struct rtattr *opt)
+{
+	struct qdisc_estimator *est;
+	struct tc_estimator *parm = RTA_DATA(opt);
+
+	if (RTA_PAYLOAD(opt) < sizeof(*parm))
+		return -EINVAL;
+
+	if (parm->interval < -2 || parm->interval > 3)
+		return -EINVAL;
+
+	est = kmalloc(sizeof(*est), GFP_KERNEL);
+	if (est == NULL)
+		return -ENOBUFS;
+
+	memset(est, 0, sizeof(*est));
+	est->interval = parm->interval + 2;
+	est->stats = stats;
+	est->stats_lock = stats_lock;
+	est->ewma_log = parm->ewma_log;
+	est->last_bytes = stats->bytes;
+	est->avbps = stats->bps<<5;
+	est->last_packets = stats->packets;
+	est->avpps = stats->pps<<10;
+
+	est->next = elist[est->interval].list;
+	if (est->next == NULL) {
+		init_timer(&elist[est->interval].timer);
+		elist[est->interval].timer.data = est->interval;
+		elist[est->interval].timer.expires = jiffies + ((HZ<<est->interval)/4);
+		elist[est->interval].timer.function = est_timer;
+		add_timer(&elist[est->interval].timer);
+	}
+	write_lock_bh(&est_lock);
+	elist[est->interval].list = est;
+	write_unlock_bh(&est_lock);
+	return 0;
+}
+
+void qdisc_kill_estimator(struct tc_stats *stats)
+{
+	int idx;
+	struct qdisc_estimator *est, **pest;
+
+	for (idx=0; idx <= EST_MAX_INTERVAL; idx++) {
+		int killed = 0;
+		pest = &elist[idx].list;
+		while ((est=*pest) != NULL) {
+			if (est->stats != stats) {
+				pest = &est->next;
+				continue;
+			}
+
+			write_lock_bh(&est_lock);
+			*pest = est->next;
+			write_unlock_bh(&est_lock);
+
+			kfree(est);
+			killed++;
+		}
+		if (killed && elist[idx].list == NULL)
+			del_timer(&elist[idx].timer);
+	}
+}
+
+EXPORT_SYMBOL(qdisc_kill_estimator);
+EXPORT_SYMBOL(qdisc_new_estimator);
diff --git a/net/sched/gact.c b/net/sched/gact.c
new file mode 100644
index 00000000000..a811c89fef7
--- /dev/null
+++ b/net/sched/gact.c
@@ -0,0 +1,231 @@
+/*
+ * net/sched/gact.c	Generic actions
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * copyright 	Jamal Hadi Salim (2002-4)
+ *
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+#include <linux/tc_act/tc_gact.h>
+#include <net/tc_act/tc_gact.h>
+
+/* use generic hash table */
+#define MY_TAB_SIZE	16
+#define MY_TAB_MASK	15
+
+static u32 idx_gen;
+static struct tcf_gact *tcf_gact_ht[MY_TAB_SIZE];
+static DEFINE_RWLOCK(gact_lock);
+
+/* ovewrride the defaults */
+#define tcf_st		tcf_gact
+#define tc_st		tc_gact
+#define tcf_t_lock	gact_lock
+#define tcf_ht		tcf_gact_ht
+
+#define CONFIG_NET_ACT_INIT 1
+#include <net/pkt_act.h>
+
+#ifdef CONFIG_GACT_PROB
+static int gact_net_rand(struct tcf_gact *p)
+{
+	if (net_random()%p->pval)
+		return p->action;
+	return p->paction;
+}
+
+static int gact_determ(struct tcf_gact *p)
+{
+	if (p->bstats.packets%p->pval)
+		return p->action;
+	return p->paction;
+}
+
+typedef int (*g_rand)(struct tcf_gact *p);
+static g_rand gact_rand[MAX_RAND]= { NULL, gact_net_rand, gact_determ };
+#endif
+
+static int tcf_gact_init(struct rtattr *rta, struct rtattr *est,
+                         struct tc_action *a, int ovr, int bind)
+{
+	struct rtattr *tb[TCA_GACT_MAX];
+	struct tc_gact *parm;
+	struct tcf_gact *p;
+	int ret = 0;
+
+	if (rta == NULL || rtattr_parse_nested(tb, TCA_GACT_MAX, rta) < 0)
+		return -EINVAL;
+
+	if (tb[TCA_GACT_PARMS - 1] == NULL ||
+	    RTA_PAYLOAD(tb[TCA_GACT_PARMS - 1]) < sizeof(*parm))
+		return -EINVAL;
+	parm = RTA_DATA(tb[TCA_GACT_PARMS - 1]);
+
+	if (tb[TCA_GACT_PROB-1] != NULL)
+#ifdef CONFIG_GACT_PROB
+		if (RTA_PAYLOAD(tb[TCA_GACT_PROB-1]) < sizeof(struct tc_gact_p))
+			return -EINVAL;
+#else
+		return -EOPNOTSUPP;
+#endif
+
+	p = tcf_hash_check(parm->index, a, ovr, bind);
+	if (p == NULL) {
+		p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind);
+		if (p == NULL)
+			return -ENOMEM;
+		ret = ACT_P_CREATED;
+	} else {
+		if (!ovr) {
+			tcf_hash_release(p, bind);
+			return -EEXIST;
+		}
+	}
+
+	spin_lock_bh(&p->lock);
+	p->action = parm->action;
+#ifdef CONFIG_GACT_PROB
+	if (tb[TCA_GACT_PROB-1] != NULL) {
+		struct tc_gact_p *p_parm = RTA_DATA(tb[TCA_GACT_PROB-1]);
+		p->paction = p_parm->paction;
+		p->pval    = p_parm->pval;
+		p->ptype   = p_parm->ptype;
+	}
+#endif
+	spin_unlock_bh(&p->lock);
+	if (ret == ACT_P_CREATED)
+		tcf_hash_insert(p);
+	return ret;
+}
+
+static int
+tcf_gact_cleanup(struct tc_action *a, int bind)
+{
+	struct tcf_gact *p = PRIV(a, gact);
+
+	if (p != NULL)
+		return tcf_hash_release(p, bind);
+	return 0;
+}
+
+static int
+tcf_gact(struct sk_buff **pskb, struct tc_action *a)
+{
+	struct tcf_gact *p = PRIV(a, gact);
+	struct sk_buff *skb = *pskb;
+	int action = TC_ACT_SHOT;
+
+	spin_lock(&p->lock);
+#ifdef CONFIG_GACT_PROB
+	if (p->ptype && gact_rand[p->ptype] != NULL)
+		action = gact_rand[p->ptype](p);
+	else
+		action = p->action;
+#else
+	action = p->action;
+#endif
+	p->bstats.bytes += skb->len;
+	p->bstats.packets++;
+	if (action == TC_ACT_SHOT)
+		p->qstats.drops++;
+	p->tm.lastuse = jiffies;
+	spin_unlock(&p->lock);
+
+	return action;
+}
+
+static int
+tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+{
+	unsigned char *b = skb->tail;
+	struct tc_gact opt;
+	struct tcf_gact *p = PRIV(a, gact);
+	struct tcf_t t;
+
+	opt.index = p->index;
+	opt.refcnt = p->refcnt - ref;
+	opt.bindcnt = p->bindcnt - bind;
+	opt.action = p->action;
+	RTA_PUT(skb, TCA_GACT_PARMS, sizeof(opt), &opt);
+#ifdef CONFIG_GACT_PROB
+	if (p->ptype) {
+		struct tc_gact_p p_opt;
+		p_opt.paction = p->paction;
+		p_opt.pval = p->pval;
+		p_opt.ptype = p->ptype;
+		RTA_PUT(skb, TCA_GACT_PROB, sizeof(p_opt), &p_opt);
+	}
+#endif
+	t.install = jiffies_to_clock_t(jiffies - p->tm.install);
+	t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse);
+	t.expires = jiffies_to_clock_t(p->tm.expires);
+	RTA_PUT(skb, TCA_GACT_TM, sizeof(t), &t);
+	return skb->len;
+
+      rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static struct tc_action_ops act_gact_ops = {
+	.kind		=	"gact",
+	.type		=	TCA_ACT_GACT,
+	.capab		=	TCA_CAP_NONE,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_gact,
+	.dump		=	tcf_gact_dump,
+	.cleanup	=	tcf_gact_cleanup,
+	.lookup		=	tcf_hash_search,
+	.init		=	tcf_gact_init,
+	.walk		=	tcf_generic_walker
+};
+
+MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
+MODULE_DESCRIPTION("Generic Classifier actions");
+MODULE_LICENSE("GPL");
+
+static int __init
+gact_init_module(void)
+{
+#ifdef CONFIG_GACT_PROB
+	printk("GACT probability on\n");
+#else
+	printk("GACT probability NOT on\n");
+#endif
+	return tcf_register_action(&act_gact_ops);
+}
+
+static void __exit
+gact_cleanup_module(void)
+{
+	tcf_unregister_action(&act_gact_ops);
+}
+
+module_init(gact_init_module);
+module_exit(gact_cleanup_module);
diff --git a/net/sched/ipt.c b/net/sched/ipt.c
new file mode 100644
index 00000000000..b114d994d52
--- /dev/null
+++ b/net/sched/ipt.c
@@ -0,0 +1,326 @@
+/*
+ * net/sched/ipt.c	iptables target interface
+ *
+ *TODO: Add other tables. For now we only support the ipv4 table targets
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Copyright:	Jamal Hadi Salim (2002-4)
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/kmod.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+#include <linux/tc_act/tc_ipt.h>
+#include <net/tc_act/tc_ipt.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+/* use generic hash table */
+#define MY_TAB_SIZE     16
+#define MY_TAB_MASK     15
+
+static u32 idx_gen;
+static struct tcf_ipt *tcf_ipt_ht[MY_TAB_SIZE];
+/* ipt hash table lock */
+static DEFINE_RWLOCK(ipt_lock);
+
+/* ovewrride the defaults */
+#define tcf_st		tcf_ipt
+#define tcf_t_lock	ipt_lock
+#define tcf_ht		tcf_ipt_ht
+
+#define CONFIG_NET_ACT_INIT
+#include <net/pkt_act.h>
+
+static int
+ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook)
+{
+	struct ipt_target *target;
+	int ret = 0;
+
+	target = ipt_find_target(t->u.user.name, t->u.user.revision);
+	if (!target)
+		return -ENOENT;
+
+	DPRINTK("ipt_init_target: found %s\n", target->name);
+	t->u.kernel.target = target;
+
+	if (t->u.kernel.target->checkentry
+	    && !t->u.kernel.target->checkentry(table, NULL, t->data,
+					       t->u.target_size - sizeof(*t),
+					       hook)) {
+		DPRINTK("ipt_init_target: check failed for `%s'.\n",
+			t->u.kernel.target->name);
+		module_put(t->u.kernel.target->me);
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static void
+ipt_destroy_target(struct ipt_entry_target *t)
+{
+	if (t->u.kernel.target->destroy)
+		t->u.kernel.target->destroy(t->data,
+		                            t->u.target_size - sizeof(*t));
+        module_put(t->u.kernel.target->me);
+}
+
+static int
+tcf_ipt_release(struct tcf_ipt *p, int bind)
+{
+	int ret = 0;
+	if (p) {
+		if (bind)
+			p->bindcnt--;
+		p->refcnt--;
+		if (p->bindcnt <= 0 && p->refcnt <= 0) {
+			ipt_destroy_target(p->t);
+			kfree(p->tname);
+			kfree(p->t);
+			tcf_hash_destroy(p);
+			ret = ACT_P_DELETED;
+		}
+	}
+	return ret;
+}
+
+static int
+tcf_ipt_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a,
+             int ovr, int bind)
+{
+	struct rtattr *tb[TCA_IPT_MAX];
+	struct tcf_ipt *p;
+	struct ipt_entry_target *td, *t;
+	char *tname;
+	int ret = 0, err;
+	u32 hook = 0;
+	u32 index = 0;
+
+	if (rta == NULL || rtattr_parse_nested(tb, TCA_IPT_MAX, rta) < 0)
+		return -EINVAL;
+
+	if (tb[TCA_IPT_HOOK-1] == NULL ||
+	    RTA_PAYLOAD(tb[TCA_IPT_HOOK-1]) < sizeof(u32))
+		return -EINVAL;
+	if (tb[TCA_IPT_TARG-1] == NULL ||
+	    RTA_PAYLOAD(tb[TCA_IPT_TARG-1]) < sizeof(*t))
+		return -EINVAL;
+	td = (struct ipt_entry_target *)RTA_DATA(tb[TCA_IPT_TARG-1]);
+	if (RTA_PAYLOAD(tb[TCA_IPT_TARG-1]) < td->u.target_size)
+		return -EINVAL;
+
+	if (tb[TCA_IPT_INDEX-1] != NULL &&
+	    RTA_PAYLOAD(tb[TCA_IPT_INDEX-1]) >= sizeof(u32))
+		index = *(u32 *)RTA_DATA(tb[TCA_IPT_INDEX-1]);
+
+	p = tcf_hash_check(index, a, ovr, bind);
+	if (p == NULL) {
+		p = tcf_hash_create(index, est, a, sizeof(*p), ovr, bind);
+		if (p == NULL)
+			return -ENOMEM;
+		ret = ACT_P_CREATED;
+	} else {
+		if (!ovr) {
+			tcf_ipt_release(p, bind);
+			return -EEXIST;
+		}
+	}
+
+	hook = *(u32 *)RTA_DATA(tb[TCA_IPT_HOOK-1]);
+
+	err = -ENOMEM;
+	tname = kmalloc(IFNAMSIZ, GFP_KERNEL);
+	if (tname == NULL)
+		goto err1;
+	if (tb[TCA_IPT_TABLE - 1] == NULL ||
+	    rtattr_strlcpy(tname, tb[TCA_IPT_TABLE-1], IFNAMSIZ) >= IFNAMSIZ)
+		strcpy(tname, "mangle");
+
+	t = kmalloc(td->u.target_size, GFP_KERNEL);
+	if (t == NULL)
+		goto err2;
+	memcpy(t, td, td->u.target_size);
+
+	if ((err = ipt_init_target(t, tname, hook)) < 0)
+		goto err3;
+
+	spin_lock_bh(&p->lock);
+	if (ret != ACT_P_CREATED) {
+		ipt_destroy_target(p->t);
+		kfree(p->tname);
+		kfree(p->t);
+	}
+	p->tname = tname;
+	p->t     = t;
+	p->hook  = hook;
+	spin_unlock_bh(&p->lock);
+	if (ret == ACT_P_CREATED)
+		tcf_hash_insert(p);
+	return ret;
+
+err3:
+	kfree(t);
+err2:
+	kfree(tname);
+err1:
+	kfree(p);
+	return err;
+}
+
+static int
+tcf_ipt_cleanup(struct tc_action *a, int bind)
+{
+	struct tcf_ipt *p = PRIV(a, ipt);
+	return tcf_ipt_release(p, bind);
+}
+
+static int
+tcf_ipt(struct sk_buff **pskb, struct tc_action *a)
+{
+	int ret = 0, result = 0;
+	struct tcf_ipt *p = PRIV(a, ipt);
+	struct sk_buff *skb = *pskb;
+
+	if (skb_cloned(skb)) {
+		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+			return TC_ACT_UNSPEC;
+	}
+
+	spin_lock(&p->lock);
+
+	p->tm.lastuse = jiffies;
+	p->bstats.bytes += skb->len;
+	p->bstats.packets++;
+
+	/* yes, we have to worry about both in and out dev
+	 worry later - danger - this API seems to have changed
+	 from earlier kernels */
+
+	ret = p->t->u.kernel.target->target(&skb, skb->dev, NULL,
+					    p->hook, p->t->data, NULL);
+	switch (ret) {
+	case NF_ACCEPT:
+		result = TC_ACT_OK;
+		break;
+	case NF_DROP:
+		result = TC_ACT_SHOT;
+		p->qstats.drops++;
+		break;
+	case IPT_CONTINUE:
+		result = TC_ACT_PIPE;
+		break;
+	default:
+		if (net_ratelimit())
+			printk("Bogus netfilter code %d assume ACCEPT\n", ret);
+		result = TC_POLICE_OK;
+		break;
+	}
+	spin_unlock(&p->lock);
+	return result;
+
+}
+
+static int
+tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+{
+	struct ipt_entry_target *t;
+	struct tcf_t tm;
+	struct tc_cnt c;
+	unsigned char *b = skb->tail;
+	struct tcf_ipt *p = PRIV(a, ipt);
+
+	/* for simple targets kernel size == user size
+	** user name = target name
+	** for foolproof you need to not assume this
+	*/
+
+	t = kmalloc(p->t->u.user.target_size, GFP_ATOMIC);
+	if (t == NULL)
+		goto rtattr_failure;
+
+	c.bindcnt = p->bindcnt - bind;
+	c.refcnt = p->refcnt - ref;
+	memcpy(t, p->t, p->t->u.user.target_size);
+	strcpy(t->u.user.name, p->t->u.kernel.target->name);
+
+	DPRINTK("\ttcf_ipt_dump tablename %s length %d\n", p->tname,
+		strlen(p->tname));
+	DPRINTK("\tdump target name %s size %d size user %d "
+	        "data[0] %x data[1] %x\n", p->t->u.kernel.target->name,
+	        p->t->u.target_size, p->t->u.user.target_size,
+	        p->t->data[0], p->t->data[1]);
+	RTA_PUT(skb, TCA_IPT_TARG, p->t->u.user.target_size, t);
+	RTA_PUT(skb, TCA_IPT_INDEX, 4, &p->index);
+	RTA_PUT(skb, TCA_IPT_HOOK, 4, &p->hook);
+	RTA_PUT(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c);
+	RTA_PUT(skb, TCA_IPT_TABLE, IFNAMSIZ, p->tname);
+	tm.install = jiffies_to_clock_t(jiffies - p->tm.install);
+	tm.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse);
+	tm.expires = jiffies_to_clock_t(p->tm.expires);
+	RTA_PUT(skb, TCA_IPT_TM, sizeof (tm), &tm);
+	kfree(t);
+	return skb->len;
+
+      rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	kfree(t);
+	return -1;
+}
+
+static struct tc_action_ops act_ipt_ops = {
+	.kind		=	"ipt",
+	.type		=	TCA_ACT_IPT,
+	.capab		=	TCA_CAP_NONE,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_ipt,
+	.dump		=	tcf_ipt_dump,
+	.cleanup	=	tcf_ipt_cleanup,
+	.lookup		=	tcf_hash_search,
+	.init		=	tcf_ipt_init,
+	.walk		=	tcf_generic_walker
+};
+
+MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
+MODULE_DESCRIPTION("Iptables target actions");
+MODULE_LICENSE("GPL");
+
+static int __init
+ipt_init_module(void)
+{
+	return tcf_register_action(&act_ipt_ops);
+}
+
+static void __exit
+ipt_cleanup_module(void)
+{
+	tcf_unregister_action(&act_ipt_ops);
+}
+
+module_init(ipt_init_module);
+module_exit(ipt_cleanup_module);
diff --git a/net/sched/mirred.c b/net/sched/mirred.c
new file mode 100644
index 00000000000..f309ce33680
--- /dev/null
+++ b/net/sched/mirred.c
@@ -0,0 +1,276 @@
+/*
+ * net/sched/mirred.c	packet mirroring and redirect actions
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Jamal Hadi Salim (2002-4)
+ *
+ * TODO: Add ingress support (and socket redirect support)
+ *
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+#include <linux/tc_act/tc_mirred.h>
+#include <net/tc_act/tc_mirred.h>
+
+#include <linux/etherdevice.h>
+#include <linux/if_arp.h>
+
+
+/* use generic hash table */
+#define MY_TAB_SIZE     8
+#define MY_TAB_MASK     (MY_TAB_SIZE - 1)
+static u32 idx_gen;
+static struct tcf_mirred *tcf_mirred_ht[MY_TAB_SIZE];
+static DEFINE_RWLOCK(mirred_lock);
+
+/* ovewrride the defaults */
+#define tcf_st		tcf_mirred
+#define tc_st		tc_mirred
+#define tcf_t_lock	mirred_lock
+#define tcf_ht		tcf_mirred_ht
+
+#define CONFIG_NET_ACT_INIT 1
+#include <net/pkt_act.h>
+
+static inline int
+tcf_mirred_release(struct tcf_mirred *p, int bind)
+{
+	if (p) {
+		if (bind)
+			p->bindcnt--;
+		p->refcnt--;
+		if(!p->bindcnt && p->refcnt <= 0) {
+			dev_put(p->dev);
+			tcf_hash_destroy(p);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static int
+tcf_mirred_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a,
+                int ovr, int bind)
+{
+	struct rtattr *tb[TCA_MIRRED_MAX];
+	struct tc_mirred *parm;
+	struct tcf_mirred *p;
+	struct net_device *dev = NULL;
+	int ret = 0;
+	int ok_push = 0;
+
+	if (rta == NULL || rtattr_parse_nested(tb, TCA_MIRRED_MAX, rta) < 0)
+		return -EINVAL;
+
+	if (tb[TCA_MIRRED_PARMS-1] == NULL ||
+	    RTA_PAYLOAD(tb[TCA_MIRRED_PARMS-1]) < sizeof(*parm))
+		return -EINVAL;
+	parm = RTA_DATA(tb[TCA_MIRRED_PARMS-1]);
+
+	if (parm->ifindex) {
+		dev = __dev_get_by_index(parm->ifindex);
+		if (dev == NULL)
+			return -ENODEV;
+		switch (dev->type) {
+			case ARPHRD_TUNNEL:
+			case ARPHRD_TUNNEL6:
+			case ARPHRD_SIT:
+			case ARPHRD_IPGRE:
+			case ARPHRD_VOID:
+			case ARPHRD_NONE:
+				ok_push = 0;
+				break;
+			default:
+				ok_push = 1;
+				break;
+		}
+	}
+
+	p = tcf_hash_check(parm->index, a, ovr, bind);
+	if (p == NULL) {
+		if (!parm->ifindex)
+			return -EINVAL;
+		p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind);
+		if (p == NULL)
+			return -ENOMEM;
+		ret = ACT_P_CREATED;
+	} else {
+		if (!ovr) {
+			tcf_mirred_release(p, bind);
+			return -EEXIST;
+		}
+	}
+
+	spin_lock_bh(&p->lock);
+	p->action = parm->action;
+	p->eaction = parm->eaction;
+	if (parm->ifindex) {
+		p->ifindex = parm->ifindex;
+		if (ret != ACT_P_CREATED)
+			dev_put(p->dev);
+		p->dev = dev;
+		dev_hold(dev);
+		p->ok_push = ok_push;
+	}
+	spin_unlock_bh(&p->lock);
+	if (ret == ACT_P_CREATED)
+		tcf_hash_insert(p);
+
+	DPRINTK("tcf_mirred_init index %d action %d eaction %d device %s "
+	        "ifindex %d\n", parm->index, parm->action, parm->eaction,
+	        dev->name, parm->ifindex);
+	return ret;
+}
+
+static int
+tcf_mirred_cleanup(struct tc_action *a, int bind)
+{
+	struct tcf_mirred *p = PRIV(a, mirred);
+
+	if (p != NULL)
+		return tcf_mirred_release(p, bind);
+	return 0;
+}
+
+static int
+tcf_mirred(struct sk_buff **pskb, struct tc_action *a)
+{
+	struct tcf_mirred *p = PRIV(a, mirred);
+	struct net_device *dev;
+	struct sk_buff *skb2 = NULL;
+	struct sk_buff *skb = *pskb;
+	u32 at = G_TC_AT(skb->tc_verd);
+
+	spin_lock(&p->lock);
+
+	dev = p->dev;
+	p->tm.lastuse = jiffies;
+
+	if (!(dev->flags&IFF_UP) ) {
+		if (net_ratelimit())
+			printk("mirred to Houston: device %s is gone!\n",
+			       dev->name);
+bad_mirred:
+		if (skb2 != NULL)
+			kfree_skb(skb2);
+		p->qstats.overlimits++;
+		p->bstats.bytes += skb->len;
+		p->bstats.packets++;
+		spin_unlock(&p->lock);
+		/* should we be asking for packet to be dropped?
+		 * may make sense for redirect case only
+		*/
+		return TC_ACT_SHOT;
+	}
+
+	skb2 = skb_clone(skb, GFP_ATOMIC);
+	if (skb2 == NULL)
+		goto bad_mirred;
+	if (p->eaction != TCA_EGRESS_MIRROR && p->eaction != TCA_EGRESS_REDIR) {
+		if (net_ratelimit())
+			printk("tcf_mirred unknown action %d\n", p->eaction);
+		goto bad_mirred;
+	}
+
+	p->bstats.bytes += skb2->len;
+	p->bstats.packets++;
+	if (!(at & AT_EGRESS))
+		if (p->ok_push)
+			skb_push(skb2, skb2->dev->hard_header_len);
+
+	/* mirror is always swallowed */
+	if (p->eaction != TCA_EGRESS_MIRROR)
+		skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at);
+
+	skb2->dev = dev;
+	skb2->input_dev = skb->dev;
+	dev_queue_xmit(skb2);
+	spin_unlock(&p->lock);
+	return p->action;
+}
+
+static int
+tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+{
+	unsigned char *b = skb->tail;
+	struct tc_mirred opt;
+	struct tcf_mirred *p = PRIV(a, mirred);
+	struct tcf_t t;
+
+	opt.index = p->index;
+	opt.action = p->action;
+	opt.refcnt = p->refcnt - ref;
+	opt.bindcnt = p->bindcnt - bind;
+	opt.eaction = p->eaction;
+	opt.ifindex = p->ifindex;
+	DPRINTK("tcf_mirred_dump index %d action %d eaction %d ifindex %d\n",
+	         p->index, p->action, p->eaction, p->ifindex);
+	RTA_PUT(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt);
+	t.install = jiffies_to_clock_t(jiffies - p->tm.install);
+	t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse);
+	t.expires = jiffies_to_clock_t(p->tm.expires);
+	RTA_PUT(skb, TCA_MIRRED_TM, sizeof(t), &t);
+	return skb->len;
+
+      rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static struct tc_action_ops act_mirred_ops = {
+	.kind		=	"mirred",
+	.type		=	TCA_ACT_MIRRED,
+	.capab		=	TCA_CAP_NONE,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_mirred,
+	.dump		=	tcf_mirred_dump,
+	.cleanup	=	tcf_mirred_cleanup,
+	.lookup		=	tcf_hash_search,
+	.init		=	tcf_mirred_init,
+	.walk		=	tcf_generic_walker
+};
+
+MODULE_AUTHOR("Jamal Hadi Salim(2002)");
+MODULE_DESCRIPTION("Device Mirror/redirect actions");
+MODULE_LICENSE("GPL");
+
+static int __init
+mirred_init_module(void)
+{
+	printk("Mirror/redirect action on\n");
+	return tcf_register_action(&act_mirred_ops);
+}
+
+static void __exit
+mirred_cleanup_module(void)
+{
+	tcf_unregister_action(&act_mirred_ops);
+}
+
+module_init(mirred_init_module);
+module_exit(mirred_cleanup_module);
diff --git a/net/sched/pedit.c b/net/sched/pedit.c
new file mode 100644
index 00000000000..678be6a645f
--- /dev/null
+++ b/net/sched/pedit.c
@@ -0,0 +1,288 @@
+/*
+ * net/sched/pedit.c	Generic packet editor
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Jamal Hadi Salim (2002-4)
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+#include <linux/tc_act/tc_pedit.h>
+#include <net/tc_act/tc_pedit.h>
+
+
+#define PEDIT_DEB 1
+
+/* use generic hash table */
+#define MY_TAB_SIZE     16
+#define MY_TAB_MASK     15
+static u32 idx_gen;
+static struct tcf_pedit *tcf_pedit_ht[MY_TAB_SIZE];
+static DEFINE_RWLOCK(pedit_lock);
+
+#define tcf_st		tcf_pedit
+#define tc_st		tc_pedit
+#define tcf_t_lock	pedit_lock
+#define tcf_ht		tcf_pedit_ht
+
+#define CONFIG_NET_ACT_INIT 1
+#include <net/pkt_act.h>
+
+static int
+tcf_pedit_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a,
+               int ovr, int bind)
+{
+	struct rtattr *tb[TCA_PEDIT_MAX];
+	struct tc_pedit *parm;
+	int ret = 0;
+	struct tcf_pedit *p;
+	struct tc_pedit_key *keys = NULL;
+	int ksize;
+
+	if (rta == NULL || rtattr_parse_nested(tb, TCA_PEDIT_MAX, rta) < 0)
+		return -EINVAL;
+
+	if (tb[TCA_PEDIT_PARMS - 1] == NULL ||
+	    RTA_PAYLOAD(tb[TCA_PEDIT_PARMS-1]) < sizeof(*parm))
+		return -EINVAL;
+	parm = RTA_DATA(tb[TCA_PEDIT_PARMS-1]);
+	ksize = parm->nkeys * sizeof(struct tc_pedit_key);
+	if (RTA_PAYLOAD(tb[TCA_PEDIT_PARMS-1]) < sizeof(*parm) + ksize)
+		return -EINVAL;
+
+	p = tcf_hash_check(parm->index, a, ovr, bind);
+	if (p == NULL) {
+		if (!parm->nkeys)
+			return -EINVAL;
+		p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind);
+		if (p == NULL)
+			return -ENOMEM;
+		keys = kmalloc(ksize, GFP_KERNEL);
+		if (keys == NULL) {
+			kfree(p);
+			return -ENOMEM;
+		}
+		ret = ACT_P_CREATED;
+	} else {
+		if (!ovr) {
+			tcf_hash_release(p, bind);
+			return -EEXIST;
+		}
+		if (p->nkeys && p->nkeys != parm->nkeys) {
+			keys = kmalloc(ksize, GFP_KERNEL);
+			if (keys == NULL)
+				return -ENOMEM;
+		}
+	}
+
+	spin_lock_bh(&p->lock);
+	p->flags = parm->flags;
+	p->action = parm->action;
+	if (keys) {
+		kfree(p->keys);
+		p->keys = keys;
+		p->nkeys = parm->nkeys;
+	}
+	memcpy(p->keys, parm->keys, ksize);
+	spin_unlock_bh(&p->lock);
+	if (ret == ACT_P_CREATED)
+		tcf_hash_insert(p);
+	return ret;
+}
+
+static int
+tcf_pedit_cleanup(struct tc_action *a, int bind)
+{
+	struct tcf_pedit *p = PRIV(a, pedit);
+
+	if (p != NULL) {
+		struct tc_pedit_key *keys = p->keys;
+		if (tcf_hash_release(p, bind)) {
+			kfree(keys);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static int
+tcf_pedit(struct sk_buff **pskb, struct tc_action *a)
+{
+	struct tcf_pedit *p = PRIV(a, pedit);
+	struct sk_buff *skb = *pskb;
+	int i, munged = 0;
+	u8 *pptr;
+
+	if (!(skb->tc_verd & TC_OK2MUNGE)) {
+		/* should we set skb->cloned? */
+		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
+			return p->action;
+		}
+	}
+
+	pptr = skb->nh.raw;
+
+	spin_lock(&p->lock);
+
+	p->tm.lastuse = jiffies;
+
+	if (p->nkeys > 0) {
+		struct tc_pedit_key *tkey = p->keys;
+
+		for (i = p->nkeys; i > 0; i--, tkey++) {
+			u32 *ptr;
+			int offset = tkey->off;
+
+			if (tkey->offmask) {
+				if (skb->len > tkey->at) {
+					 char *j = pptr + tkey->at;
+					 offset += ((*j & tkey->offmask) >> 
+					           tkey->shift);
+				} else {
+					goto bad;
+				}
+			}
+
+			if (offset % 4) {
+				printk("offset must be on 32 bit boundaries\n");
+				goto bad;
+			}
+			if (skb->len < 0 || (offset > 0 && offset > skb->len)) {
+				printk("offset %d cant exceed pkt length %d\n",
+				       offset, skb->len);
+				goto bad;
+			}
+
+			ptr = (u32 *)(pptr+offset);
+			/* just do it, baby */
+			*ptr = ((*ptr & tkey->mask) ^ tkey->val);
+			munged++;
+		}
+		
+		if (munged)
+			skb->tc_verd = SET_TC_MUNGED(skb->tc_verd);
+		goto done;
+	} else {
+		printk("pedit BUG: index %d\n",p->index);
+	}
+
+bad:
+	p->qstats.overlimits++;
+done:
+	p->bstats.bytes += skb->len;
+	p->bstats.packets++;
+	spin_unlock(&p->lock);
+	return p->action;
+}
+
+static int
+tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,int bind, int ref)
+{
+	unsigned char *b = skb->tail;
+	struct tc_pedit *opt;
+	struct tcf_pedit *p = PRIV(a, pedit);
+	struct tcf_t t;
+	int s; 
+		
+	s = sizeof(*opt) + p->nkeys * sizeof(struct tc_pedit_key);
+
+	/* netlink spinlocks held above us - must use ATOMIC */
+	opt = kmalloc(s, GFP_ATOMIC);
+	if (opt == NULL)
+		return -ENOBUFS;
+	memset(opt, 0, s);
+
+	memcpy(opt->keys, p->keys, p->nkeys * sizeof(struct tc_pedit_key));
+	opt->index = p->index;
+	opt->nkeys = p->nkeys;
+	opt->flags = p->flags;
+	opt->action = p->action;
+	opt->refcnt = p->refcnt - ref;
+	opt->bindcnt = p->bindcnt - bind;
+
+
+#ifdef PEDIT_DEB
+	{                
+		/* Debug - get rid of later */
+		int i;
+		struct tc_pedit_key *key = opt->keys;
+
+		for (i=0; i<opt->nkeys; i++, key++) {
+			printk( "\n key #%d",i);
+			printk( "  at %d: val %08x mask %08x",
+			(unsigned int)key->off,
+			(unsigned int)key->val,
+			(unsigned int)key->mask);
+		}
+	}
+#endif
+
+	RTA_PUT(skb, TCA_PEDIT_PARMS, s, opt);
+	t.install = jiffies_to_clock_t(jiffies - p->tm.install);
+	t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse);
+	t.expires = jiffies_to_clock_t(p->tm.expires);
+	RTA_PUT(skb, TCA_PEDIT_TM, sizeof(t), &t);
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static
+struct tc_action_ops act_pedit_ops = {
+	.kind		=	"pedit",
+	.type		=	TCA_ACT_PEDIT,
+	.capab		=	TCA_CAP_NONE,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_pedit,
+	.dump		=	tcf_pedit_dump,
+	.cleanup	=	tcf_pedit_cleanup,
+	.lookup		=	tcf_hash_search,
+	.init		=	tcf_pedit_init,
+	.walk		=	tcf_generic_walker
+};
+
+MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
+MODULE_DESCRIPTION("Generic Packet Editor actions");
+MODULE_LICENSE("GPL");
+
+static int __init
+pedit_init_module(void)
+{
+	return tcf_register_action(&act_pedit_ops);
+}
+
+static void __exit
+pedit_cleanup_module(void)
+{
+	tcf_unregister_action(&act_pedit_ops);
+}
+
+module_init(pedit_init_module);
+module_exit(pedit_cleanup_module);
+
diff --git a/net/sched/police.c b/net/sched/police.c
new file mode 100644
index 00000000000..c03545faf52
--- /dev/null
+++ b/net/sched/police.c
@@ -0,0 +1,612 @@
+/*
+ * net/sched/police.c	Input police filter.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ * 		J Hadi Salim (action changes)
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/module.h>
+#include <linux/rtnetlink.h>
+#include <linux/init.h>
+#include <net/sock.h>
+#include <net/act_api.h>
+
+#define L2T(p,L)   ((p)->R_tab->data[(L)>>(p)->R_tab->rate.cell_log])
+#define L2T_P(p,L) ((p)->P_tab->data[(L)>>(p)->P_tab->rate.cell_log])
+#define PRIV(a) ((struct tcf_police *) (a)->priv)
+
+/* use generic hash table */
+#define MY_TAB_SIZE     16
+#define MY_TAB_MASK     15
+static u32 idx_gen;
+static struct tcf_police *tcf_police_ht[MY_TAB_SIZE];
+/* Policer hash table lock */
+static DEFINE_RWLOCK(police_lock);
+
+/* Each policer is serialized by its individual spinlock */
+
+static __inline__ unsigned tcf_police_hash(u32 index)
+{
+	return index&0xF;
+}
+
+static __inline__ struct tcf_police * tcf_police_lookup(u32 index)
+{
+	struct tcf_police *p;
+
+	read_lock(&police_lock);
+	for (p = tcf_police_ht[tcf_police_hash(index)]; p; p = p->next) {
+		if (p->index == index)
+			break;
+	}
+	read_unlock(&police_lock);
+	return p;
+}
+
+#ifdef CONFIG_NET_CLS_ACT
+static int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb,
+                              int type, struct tc_action *a)
+{
+	struct tcf_police *p;
+	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
+	struct rtattr *r;
+
+	read_lock(&police_lock);
+
+	s_i = cb->args[0];
+
+	for (i = 0; i < MY_TAB_SIZE; i++) {
+		p = tcf_police_ht[tcf_police_hash(i)];
+
+		for (; p; p = p->next) {
+			index++;
+			if (index < s_i)
+				continue;
+			a->priv = p;
+			a->order = index;
+			r = (struct rtattr*) skb->tail;
+			RTA_PUT(skb, a->order, 0, NULL);
+			if (type == RTM_DELACTION)
+				err = tcf_action_dump_1(skb, a, 0, 1);
+			else
+				err = tcf_action_dump_1(skb, a, 0, 0);
+			if (err < 0) {
+				index--;
+				skb_trim(skb, (u8*)r - skb->data);
+				goto done;
+			}
+			r->rta_len = skb->tail - (u8*)r;
+			n_i++;
+		}
+	}
+done:
+	read_unlock(&police_lock);
+	if (n_i)
+		cb->args[0] += n_i;
+	return n_i;
+
+rtattr_failure:
+	skb_trim(skb, (u8*)r - skb->data);
+	goto done;
+}
+
+static inline int
+tcf_hash_search(struct tc_action *a, u32 index)
+{
+	struct tcf_police *p = tcf_police_lookup(index);
+
+	if (p != NULL) {
+		a->priv = p;
+		return 1;
+	} else {
+		return 0;
+	}
+}
+#endif
+
+static inline u32 tcf_police_new_index(void)
+{
+	do {
+		if (++idx_gen == 0)
+			idx_gen = 1;
+	} while (tcf_police_lookup(idx_gen));
+
+	return idx_gen;
+}
+
+void tcf_police_destroy(struct tcf_police *p)
+{
+	unsigned h = tcf_police_hash(p->index);
+	struct tcf_police **p1p;
+	
+	for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->next) {
+		if (*p1p == p) {
+			write_lock_bh(&police_lock);
+			*p1p = p->next;
+			write_unlock_bh(&police_lock);
+#ifdef CONFIG_NET_ESTIMATOR
+			gen_kill_estimator(&p->bstats, &p->rate_est);
+#endif
+			if (p->R_tab)
+				qdisc_put_rtab(p->R_tab);
+			if (p->P_tab)
+				qdisc_put_rtab(p->P_tab);
+			kfree(p);
+			return;
+		}
+	}
+	BUG_TRAP(0);
+}
+
+#ifdef CONFIG_NET_CLS_ACT
+static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est,
+                                 struct tc_action *a, int ovr, int bind)
+{
+	unsigned h;
+	int ret = 0, err;
+	struct rtattr *tb[TCA_POLICE_MAX];
+	struct tc_police *parm;
+	struct tcf_police *p;
+	struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
+
+	if (rta == NULL || rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0)
+		return -EINVAL;
+
+	if (tb[TCA_POLICE_TBF-1] == NULL ||
+	    RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]) != sizeof(*parm))
+		return -EINVAL;
+	parm = RTA_DATA(tb[TCA_POLICE_TBF-1]);
+
+	if (tb[TCA_POLICE_RESULT-1] != NULL &&
+	    RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
+		return -EINVAL;
+	if (tb[TCA_POLICE_RESULT-1] != NULL &&
+	    RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
+		return -EINVAL;
+
+	if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) {
+		a->priv = p;
+		if (bind) {
+			p->bindcnt += 1;
+			p->refcnt += 1;
+		}
+		if (ovr)
+			goto override;
+		return ret;
+	}
+
+	p = kmalloc(sizeof(*p), GFP_KERNEL);
+	if (p == NULL)
+		return -ENOMEM;
+	memset(p, 0, sizeof(*p));
+
+	ret = ACT_P_CREATED;
+	p->refcnt = 1;
+	spin_lock_init(&p->lock);
+	p->stats_lock = &p->lock;
+	if (bind)
+		p->bindcnt = 1;
+override:
+	if (parm->rate.rate) {
+		err = -ENOMEM;
+		R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]);
+		if (R_tab == NULL)
+			goto failure;
+		if (parm->peakrate.rate) {
+			P_tab = qdisc_get_rtab(&parm->peakrate,
+					       tb[TCA_POLICE_PEAKRATE-1]);
+			if (p->P_tab == NULL) {
+				qdisc_put_rtab(R_tab);
+				goto failure;
+			}
+		}
+	}
+	/* No failure allowed after this point */
+	spin_lock_bh(&p->lock);
+	if (R_tab != NULL) {
+		qdisc_put_rtab(p->R_tab);
+		p->R_tab = R_tab;
+	}
+	if (P_tab != NULL) {
+		qdisc_put_rtab(p->P_tab);
+		p->P_tab = P_tab;
+	}
+
+	if (tb[TCA_POLICE_RESULT-1])
+		p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
+	p->toks = p->burst = parm->burst;
+	p->mtu = parm->mtu;
+	if (p->mtu == 0) {
+		p->mtu = ~0;
+		if (p->R_tab)
+			p->mtu = 255<<p->R_tab->rate.cell_log;
+	}
+	if (p->P_tab)
+		p->ptoks = L2T_P(p, p->mtu);
+	p->action = parm->action;
+
+#ifdef CONFIG_NET_ESTIMATOR
+	if (tb[TCA_POLICE_AVRATE-1])
+		p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
+	if (est)
+		gen_replace_estimator(&p->bstats, &p->rate_est, p->stats_lock, est);
+#endif
+
+	spin_unlock_bh(&p->lock);
+	if (ret != ACT_P_CREATED)
+		return ret;
+
+	PSCHED_GET_TIME(p->t_c);
+	p->index = parm->index ? : tcf_police_new_index();
+	h = tcf_police_hash(p->index);
+	write_lock_bh(&police_lock);
+	p->next = tcf_police_ht[h];
+	tcf_police_ht[h] = p;
+	write_unlock_bh(&police_lock);
+
+	a->priv = p;
+	return ret;
+
+failure:
+	if (ret == ACT_P_CREATED)
+		kfree(p);
+	return err;
+}
+
+static int tcf_act_police_cleanup(struct tc_action *a, int bind)
+{
+	struct tcf_police *p = PRIV(a);
+
+	if (p != NULL)
+		return tcf_police_release(p, bind);
+	return 0;
+}
+
+static int tcf_act_police(struct sk_buff **pskb, struct tc_action *a)
+{
+	psched_time_t now;
+	struct sk_buff *skb = *pskb;
+	struct tcf_police *p = PRIV(a);
+	long toks;
+	long ptoks = 0;
+
+	spin_lock(&p->lock);
+
+	p->bstats.bytes += skb->len;
+	p->bstats.packets++;
+
+#ifdef CONFIG_NET_ESTIMATOR
+	if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) {
+		p->qstats.overlimits++;
+		spin_unlock(&p->lock);
+		return p->action;
+	}
+#endif
+
+	if (skb->len <= p->mtu) {
+		if (p->R_tab == NULL) {
+			spin_unlock(&p->lock);
+			return p->result;
+		}
+
+		PSCHED_GET_TIME(now);
+
+		toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst);
+
+		if (p->P_tab) {
+			ptoks = toks + p->ptoks;
+			if (ptoks > (long)L2T_P(p, p->mtu))
+				ptoks = (long)L2T_P(p, p->mtu);
+			ptoks -= L2T_P(p, skb->len);
+		}
+		toks += p->toks;
+		if (toks > (long)p->burst)
+			toks = p->burst;
+		toks -= L2T(p, skb->len);
+
+		if ((toks|ptoks) >= 0) {
+			p->t_c = now;
+			p->toks = toks;
+			p->ptoks = ptoks;
+			spin_unlock(&p->lock);
+			return p->result;
+		}
+	}
+
+	p->qstats.overlimits++;
+	spin_unlock(&p->lock);
+	return p->action;
+}
+
+static int
+tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+{
+	unsigned char	 *b = skb->tail;
+	struct tc_police opt;
+	struct tcf_police *p = PRIV(a);
+
+	opt.index = p->index;
+	opt.action = p->action;
+	opt.mtu = p->mtu;
+	opt.burst = p->burst;
+	opt.refcnt = p->refcnt - ref;
+	opt.bindcnt = p->bindcnt - bind;
+	if (p->R_tab)
+		opt.rate = p->R_tab->rate;
+	else
+		memset(&opt.rate, 0, sizeof(opt.rate));
+	if (p->P_tab)
+		opt.peakrate = p->P_tab->rate;
+	else
+		memset(&opt.peakrate, 0, sizeof(opt.peakrate));
+	RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
+	if (p->result)
+		RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result);
+#ifdef CONFIG_NET_ESTIMATOR
+	if (p->ewma_rate)
+		RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate);
+#endif
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+MODULE_AUTHOR("Alexey Kuznetsov");
+MODULE_DESCRIPTION("Policing actions");
+MODULE_LICENSE("GPL");
+
+static struct tc_action_ops act_police_ops = {
+	.kind		=	"police",
+	.type		=	TCA_ID_POLICE,
+	.capab		=	TCA_CAP_NONE,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_act_police,
+	.dump		=	tcf_act_police_dump,
+	.cleanup	=	tcf_act_police_cleanup,
+	.lookup		=	tcf_hash_search,
+	.init		=	tcf_act_police_locate,
+	.walk		=	tcf_generic_walker
+};
+
+static int __init
+police_init_module(void)
+{
+	return tcf_register_action(&act_police_ops);
+}
+
+static void __exit
+police_cleanup_module(void)
+{
+	tcf_unregister_action(&act_police_ops);
+}
+
+module_init(police_init_module);
+module_exit(police_cleanup_module);
+
+#endif
+
+struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est)
+{
+	unsigned h;
+	struct tcf_police *p;
+	struct rtattr *tb[TCA_POLICE_MAX];
+	struct tc_police *parm;
+
+	if (rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0)
+		return NULL;
+
+	if (tb[TCA_POLICE_TBF-1] == NULL ||
+	    RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]) != sizeof(*parm))
+		return NULL;
+
+	parm = RTA_DATA(tb[TCA_POLICE_TBF-1]);
+
+	if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) {
+		p->refcnt++;
+		return p;
+	}
+
+	p = kmalloc(sizeof(*p), GFP_KERNEL);
+	if (p == NULL)
+		return NULL;
+
+	memset(p, 0, sizeof(*p));
+	p->refcnt = 1;
+	spin_lock_init(&p->lock);
+	p->stats_lock = &p->lock;
+	if (parm->rate.rate) {
+		p->R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]);
+		if (p->R_tab == NULL)
+			goto failure;
+		if (parm->peakrate.rate) {
+			p->P_tab = qdisc_get_rtab(&parm->peakrate,
+			                          tb[TCA_POLICE_PEAKRATE-1]);
+			if (p->P_tab == NULL)
+				goto failure;
+		}
+	}
+	if (tb[TCA_POLICE_RESULT-1]) {
+		if (RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
+			goto failure;
+		p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
+	}
+#ifdef CONFIG_NET_ESTIMATOR
+	if (tb[TCA_POLICE_AVRATE-1]) {
+		if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32))
+			goto failure;
+		p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
+	}
+#endif
+	p->toks = p->burst = parm->burst;
+	p->mtu = parm->mtu;
+	if (p->mtu == 0) {
+		p->mtu = ~0;
+		if (p->R_tab)
+			p->mtu = 255<<p->R_tab->rate.cell_log;
+	}
+	if (p->P_tab)
+		p->ptoks = L2T_P(p, p->mtu);
+	PSCHED_GET_TIME(p->t_c);
+	p->index = parm->index ? : tcf_police_new_index();
+	p->action = parm->action;
+#ifdef CONFIG_NET_ESTIMATOR
+	if (est)
+		gen_new_estimator(&p->bstats, &p->rate_est, p->stats_lock, est);
+#endif
+	h = tcf_police_hash(p->index);
+	write_lock_bh(&police_lock);
+	p->next = tcf_police_ht[h];
+	tcf_police_ht[h] = p;
+	write_unlock_bh(&police_lock);
+	return p;
+
+failure:
+	if (p->R_tab)
+		qdisc_put_rtab(p->R_tab);
+	kfree(p);
+	return NULL;
+}
+
+int tcf_police(struct sk_buff *skb, struct tcf_police *p)
+{
+	psched_time_t now;
+	long toks;
+	long ptoks = 0;
+
+	spin_lock(&p->lock);
+
+	p->bstats.bytes += skb->len;
+	p->bstats.packets++;
+
+#ifdef CONFIG_NET_ESTIMATOR
+	if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) {
+		p->qstats.overlimits++;
+		spin_unlock(&p->lock);
+		return p->action;
+	}
+#endif
+
+	if (skb->len <= p->mtu) {
+		if (p->R_tab == NULL) {
+			spin_unlock(&p->lock);
+			return p->result;
+		}
+
+		PSCHED_GET_TIME(now);
+
+		toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst);
+
+		if (p->P_tab) {
+			ptoks = toks + p->ptoks;
+			if (ptoks > (long)L2T_P(p, p->mtu))
+				ptoks = (long)L2T_P(p, p->mtu);
+			ptoks -= L2T_P(p, skb->len);
+		}
+		toks += p->toks;
+		if (toks > (long)p->burst)
+			toks = p->burst;
+		toks -= L2T(p, skb->len);
+
+		if ((toks|ptoks) >= 0) {
+			p->t_c = now;
+			p->toks = toks;
+			p->ptoks = ptoks;
+			spin_unlock(&p->lock);
+			return p->result;
+		}
+	}
+
+	p->qstats.overlimits++;
+	spin_unlock(&p->lock);
+	return p->action;
+}
+
+int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p)
+{
+	unsigned char	 *b = skb->tail;
+	struct tc_police opt;
+
+	opt.index = p->index;
+	opt.action = p->action;
+	opt.mtu = p->mtu;
+	opt.burst = p->burst;
+	if (p->R_tab)
+		opt.rate = p->R_tab->rate;
+	else
+		memset(&opt.rate, 0, sizeof(opt.rate));
+	if (p->P_tab)
+		opt.peakrate = p->P_tab->rate;
+	else
+		memset(&opt.peakrate, 0, sizeof(opt.peakrate));
+	RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
+	if (p->result)
+		RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result);
+#ifdef CONFIG_NET_ESTIMATOR
+	if (p->ewma_rate)
+		RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate);
+#endif
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *p)
+{
+	struct gnet_dump d;
+	
+	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
+			TCA_XSTATS, p->stats_lock, &d) < 0)
+		goto errout;
+	
+	if (gnet_stats_copy_basic(&d, &p->bstats) < 0 ||
+#ifdef CONFIG_NET_ESTIMATOR
+	    gnet_stats_copy_rate_est(&d, &p->rate_est) < 0 ||
+#endif
+	    gnet_stats_copy_queue(&d, &p->qstats) < 0)
+		goto errout;
+
+	if (gnet_stats_finish_copy(&d) < 0)
+		goto errout;
+
+	return 0;
+
+errout:
+	return -1;
+}
+
+
+EXPORT_SYMBOL(tcf_police);
+EXPORT_SYMBOL(tcf_police_destroy);
+EXPORT_SYMBOL(tcf_police_dump);
+EXPORT_SYMBOL(tcf_police_dump_stats);
+EXPORT_SYMBOL(tcf_police_hash);
+EXPORT_SYMBOL(tcf_police_ht);
+EXPORT_SYMBOL(tcf_police_locate);
+EXPORT_SYMBOL(tcf_police_lookup);
+EXPORT_SYMBOL(tcf_police_new_index);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
new file mode 100644
index 00000000000..4323a74eea3
--- /dev/null
+++ b/net/sched/sch_api.c
@@ -0,0 +1,1296 @@
+/*
+ * net/sched/sch_api.c	Packet scheduler API.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * Fixes:
+ *
+ * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
+ * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
+ * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/kmod.h>
+#include <linux/list.h>
+#include <linux/bitops.h>
+
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+
+#include <asm/processor.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
+			struct Qdisc *old, struct Qdisc *new);
+static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
+			 struct Qdisc *q, unsigned long cl, int event);
+
+/*
+
+   Short review.
+   -------------
+
+   This file consists of two interrelated parts:
+
+   1. queueing disciplines manager frontend.
+   2. traffic classes manager frontend.
+
+   Generally, queueing discipline ("qdisc") is a black box,
+   which is able to enqueue packets and to dequeue them (when
+   device is ready to send something) in order and at times
+   determined by algorithm hidden in it.
+
+   qdisc's are divided to two categories:
+   - "queues", which have no internal structure visible from outside.
+   - "schedulers", which split all the packets to "traffic classes",
+     using "packet classifiers" (look at cls_api.c)
+
+   In turn, classes may have child qdiscs (as rule, queues)
+   attached to them etc. etc. etc.
+
+   The goal of the routines in this file is to translate
+   information supplied by user in the form of handles
+   to more intelligible for kernel form, to make some sanity
+   checks and part of work, which is common to all qdiscs
+   and to provide rtnetlink notifications.
+
+   All real intelligent work is done inside qdisc modules.
+
+
+
+   Every discipline has two major routines: enqueue and dequeue.
+
+   ---dequeue
+
+   dequeue usually returns a skb to send. It is allowed to return NULL,
+   but it does not mean that queue is empty, it just means that
+   discipline does not want to send anything this time.
+   Queue is really empty if q->q.qlen == 0.
+   For complicated disciplines with multiple queues q->q is not
+   real packet queue, but however q->q.qlen must be valid.
+
+   ---enqueue
+
+   enqueue returns 0, if packet was enqueued successfully.
+   If packet (this one or another one) was dropped, it returns
+   not zero error code.
+   NET_XMIT_DROP 	- this packet dropped
+     Expected action: do not backoff, but wait until queue will clear.
+   NET_XMIT_CN	 	- probably this packet enqueued, but another one dropped.
+     Expected action: backoff or ignore
+   NET_XMIT_POLICED	- dropped by police.
+     Expected action: backoff or error to real-time apps.
+
+   Auxiliary routines:
+
+   ---requeue
+
+   requeues once dequeued packet. It is used for non-standard or
+   just buggy devices, which can defer output even if dev->tbusy=0.
+
+   ---reset
+
+   returns qdisc to initial state: purge all buffers, clear all
+   timers, counters (except for statistics) etc.
+
+   ---init
+
+   initializes newly created qdisc.
+
+   ---destroy
+
+   destroys resources allocated by init and during lifetime of qdisc.
+
+   ---change
+
+   changes qdisc parameters.
+ */
+
+/* Protects list of registered TC modules. It is pure SMP lock. */
+static DEFINE_RWLOCK(qdisc_mod_lock);
+
+
+/************************************************
+ *	Queueing disciplines manipulation.	*
+ ************************************************/
+
+
+/* The list of all installed queueing disciplines. */
+
+static struct Qdisc_ops *qdisc_base;
+
+/* Register/uregister queueing discipline */
+
+int register_qdisc(struct Qdisc_ops *qops)
+{
+	struct Qdisc_ops *q, **qp;
+	int rc = -EEXIST;
+
+	write_lock(&qdisc_mod_lock);
+	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
+		if (!strcmp(qops->id, q->id))
+			goto out;
+
+	if (qops->enqueue == NULL)
+		qops->enqueue = noop_qdisc_ops.enqueue;
+	if (qops->requeue == NULL)
+		qops->requeue = noop_qdisc_ops.requeue;
+	if (qops->dequeue == NULL)
+		qops->dequeue = noop_qdisc_ops.dequeue;
+
+	qops->next = NULL;
+	*qp = qops;
+	rc = 0;
+out:
+	write_unlock(&qdisc_mod_lock);
+	return rc;
+}
+
+int unregister_qdisc(struct Qdisc_ops *qops)
+{
+	struct Qdisc_ops *q, **qp;
+	int err = -ENOENT;
+
+	write_lock(&qdisc_mod_lock);
+	for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
+		if (q == qops)
+			break;
+	if (q) {
+		*qp = q->next;
+		q->next = NULL;
+		err = 0;
+	}
+	write_unlock(&qdisc_mod_lock);
+	return err;
+}
+
+/* We know handle. Find qdisc among all qdisc's attached to device
+   (root qdisc, all its children, children of children etc.)
+ */
+
+struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
+{
+	struct Qdisc *q;
+
+	read_lock_bh(&qdisc_tree_lock);
+	list_for_each_entry(q, &dev->qdisc_list, list) {
+		if (q->handle == handle) {
+			read_unlock_bh(&qdisc_tree_lock);
+			return q;
+		}
+	}
+	read_unlock_bh(&qdisc_tree_lock);
+	return NULL;
+}
+
+static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
+{
+	unsigned long cl;
+	struct Qdisc *leaf;
+	struct Qdisc_class_ops *cops = p->ops->cl_ops;
+
+	if (cops == NULL)
+		return NULL;
+	cl = cops->get(p, classid);
+
+	if (cl == 0)
+		return NULL;
+	leaf = cops->leaf(p, cl);
+	cops->put(p, cl);
+	return leaf;
+}
+
+/* Find queueing discipline by name */
+
+static struct Qdisc_ops *qdisc_lookup_ops(struct rtattr *kind)
+{
+	struct Qdisc_ops *q = NULL;
+
+	if (kind) {
+		read_lock(&qdisc_mod_lock);
+		for (q = qdisc_base; q; q = q->next) {
+			if (rtattr_strcmp(kind, q->id) == 0) {
+				if (!try_module_get(q->owner))
+					q = NULL;
+				break;
+			}
+		}
+		read_unlock(&qdisc_mod_lock);
+	}
+	return q;
+}
+
+static struct qdisc_rate_table *qdisc_rtab_list;
+
+struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *tab)
+{
+	struct qdisc_rate_table *rtab;
+
+	for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
+		if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
+			rtab->refcnt++;
+			return rtab;
+		}
+	}
+
+	if (tab == NULL || r->rate == 0 || r->cell_log == 0 || RTA_PAYLOAD(tab) != 1024)
+		return NULL;
+
+	rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
+	if (rtab) {
+		rtab->rate = *r;
+		rtab->refcnt = 1;
+		memcpy(rtab->data, RTA_DATA(tab), 1024);
+		rtab->next = qdisc_rtab_list;
+		qdisc_rtab_list = rtab;
+	}
+	return rtab;
+}
+
+void qdisc_put_rtab(struct qdisc_rate_table *tab)
+{
+	struct qdisc_rate_table *rtab, **rtabp;
+
+	if (!tab || --tab->refcnt)
+		return;
+
+	for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
+		if (rtab == tab) {
+			*rtabp = rtab->next;
+			kfree(rtab);
+			return;
+		}
+	}
+}
+
+
+/* Allocate an unique handle from space managed by kernel */
+
+static u32 qdisc_alloc_handle(struct net_device *dev)
+{
+	int i = 0x10000;
+	static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
+
+	do {
+		autohandle += TC_H_MAKE(0x10000U, 0);
+		if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
+			autohandle = TC_H_MAKE(0x80000000U, 0);
+	} while	(qdisc_lookup(dev, autohandle) && --i > 0);
+
+	return i>0 ? autohandle : 0;
+}
+
+/* Attach toplevel qdisc to device dev */
+
+static struct Qdisc *
+dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc)
+{
+	struct Qdisc *oqdisc;
+
+	if (dev->flags & IFF_UP)
+		dev_deactivate(dev);
+
+	qdisc_lock_tree(dev);
+	if (qdisc && qdisc->flags&TCQ_F_INGRESS) {
+		oqdisc = dev->qdisc_ingress;
+		/* Prune old scheduler */
+		if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) {
+			/* delete */
+			qdisc_reset(oqdisc);
+			dev->qdisc_ingress = NULL;
+		} else {  /* new */
+			dev->qdisc_ingress = qdisc;
+		}
+
+	} else {
+
+		oqdisc = dev->qdisc_sleeping;
+
+		/* Prune old scheduler */
+		if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
+			qdisc_reset(oqdisc);
+
+		/* ... and graft new one */
+		if (qdisc == NULL)
+			qdisc = &noop_qdisc;
+		dev->qdisc_sleeping = qdisc;
+		dev->qdisc = &noop_qdisc;
+	}
+
+	qdisc_unlock_tree(dev);
+
+	if (dev->flags & IFF_UP)
+		dev_activate(dev);
+
+	return oqdisc;
+}
+
+
+/* Graft qdisc "new" to class "classid" of qdisc "parent" or
+   to device "dev".
+
+   Old qdisc is not destroyed but returned in *old.
+ */
+
+static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
+		       u32 classid,
+		       struct Qdisc *new, struct Qdisc **old)
+{
+	int err = 0;
+	struct Qdisc *q = *old;
+
+
+	if (parent == NULL) { 
+		if (q && q->flags&TCQ_F_INGRESS) {
+			*old = dev_graft_qdisc(dev, q);
+		} else {
+			*old = dev_graft_qdisc(dev, new);
+		}
+	} else {
+		struct Qdisc_class_ops *cops = parent->ops->cl_ops;
+
+		err = -EINVAL;
+
+		if (cops) {
+			unsigned long cl = cops->get(parent, classid);
+			if (cl) {
+				err = cops->graft(parent, cl, new, old);
+				if (new)
+					new->parent = classid;
+				cops->put(parent, cl);
+			}
+		}
+	}
+	return err;
+}
+
+/*
+   Allocate and initialize new qdisc.
+
+   Parameters are passed via opt.
+ */
+
+static struct Qdisc *
+qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
+{
+	int err;
+	struct rtattr *kind = tca[TCA_KIND-1];
+	void *p = NULL;
+	struct Qdisc *sch;
+	struct Qdisc_ops *ops;
+	int size;
+
+	ops = qdisc_lookup_ops(kind);
+#ifdef CONFIG_KMOD
+	if (ops == NULL && kind != NULL) {
+		char name[IFNAMSIZ];
+		if (rtattr_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
+			/* We dropped the RTNL semaphore in order to
+			 * perform the module load.  So, even if we
+			 * succeeded in loading the module we have to
+			 * tell the caller to replay the request.  We
+			 * indicate this using -EAGAIN.
+			 * We replay the request because the device may
+			 * go away in the mean time.
+			 */
+			rtnl_unlock();
+			request_module("sch_%s", name);
+			rtnl_lock();
+			ops = qdisc_lookup_ops(kind);
+			if (ops != NULL) {
+				/* We will try again qdisc_lookup_ops,
+				 * so don't keep a reference.
+				 */
+				module_put(ops->owner);
+				err = -EAGAIN;
+				goto err_out;
+			}
+		}
+	}
+#endif
+
+	err = -EINVAL;
+	if (ops == NULL)
+		goto err_out;
+
+	/* ensure that the Qdisc and the private data are 32-byte aligned */
+	size = ((sizeof(*sch) + QDISC_ALIGN_CONST) & ~QDISC_ALIGN_CONST);
+	size += ops->priv_size + QDISC_ALIGN_CONST;
+
+	p = kmalloc(size, GFP_KERNEL);
+	err = -ENOBUFS;
+	if (!p)
+		goto err_out2;
+	memset(p, 0, size);
+	sch = (struct Qdisc *)(((unsigned long)p + QDISC_ALIGN_CONST)
+	                       & ~QDISC_ALIGN_CONST);
+	sch->padded = (char *)sch - (char *)p;
+
+	INIT_LIST_HEAD(&sch->list);
+	skb_queue_head_init(&sch->q);
+
+	if (handle == TC_H_INGRESS)
+		sch->flags |= TCQ_F_INGRESS;
+
+	sch->ops = ops;
+	sch->enqueue = ops->enqueue;
+	sch->dequeue = ops->dequeue;
+	sch->dev = dev;
+	dev_hold(dev);
+	atomic_set(&sch->refcnt, 1);
+	sch->stats_lock = &dev->queue_lock;
+	if (handle == 0) {
+		handle = qdisc_alloc_handle(dev);
+		err = -ENOMEM;
+		if (handle == 0)
+			goto err_out3;
+	}
+
+	if (handle == TC_H_INGRESS)
+                sch->handle =TC_H_MAKE(TC_H_INGRESS, 0);
+        else
+                sch->handle = handle;
+
+	if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) {
+		qdisc_lock_tree(dev);
+		list_add_tail(&sch->list, &dev->qdisc_list);
+		qdisc_unlock_tree(dev);
+
+#ifdef CONFIG_NET_ESTIMATOR
+		if (tca[TCA_RATE-1])
+			gen_new_estimator(&sch->bstats, &sch->rate_est,
+				sch->stats_lock, tca[TCA_RATE-1]);
+#endif
+		return sch;
+	}
+err_out3:
+	dev_put(dev);
+err_out2:
+	module_put(ops->owner);
+err_out:
+	*errp = err;
+	if (p)
+		kfree(p);
+	return NULL;
+}
+
+static int qdisc_change(struct Qdisc *sch, struct rtattr **tca)
+{
+	if (tca[TCA_OPTIONS-1]) {
+		int err;
+
+		if (sch->ops->change == NULL)
+			return -EINVAL;
+		err = sch->ops->change(sch, tca[TCA_OPTIONS-1]);
+		if (err)
+			return err;
+	}
+#ifdef CONFIG_NET_ESTIMATOR
+	if (tca[TCA_RATE-1])
+		gen_replace_estimator(&sch->bstats, &sch->rate_est,
+			sch->stats_lock, tca[TCA_RATE-1]);
+#endif
+	return 0;
+}
+
+struct check_loop_arg
+{
+	struct qdisc_walker 	w;
+	struct Qdisc		*p;
+	int			depth;
+};
+
+static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
+
+static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
+{
+	struct check_loop_arg	arg;
+
+	if (q->ops->cl_ops == NULL)
+		return 0;
+
+	arg.w.stop = arg.w.skip = arg.w.count = 0;
+	arg.w.fn = check_loop_fn;
+	arg.depth = depth;
+	arg.p = p;
+	q->ops->cl_ops->walk(q, &arg.w);
+	return arg.w.stop ? -ELOOP : 0;
+}
+
+static int
+check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
+{
+	struct Qdisc *leaf;
+	struct Qdisc_class_ops *cops = q->ops->cl_ops;
+	struct check_loop_arg *arg = (struct check_loop_arg *)w;
+
+	leaf = cops->leaf(q, cl);
+	if (leaf) {
+		if (leaf == arg->p || arg->depth > 7)
+			return -ELOOP;
+		return check_loop(leaf, arg->p, arg->depth + 1);
+	}
+	return 0;
+}
+
+/*
+ * Delete/get qdisc.
+ */
+
+static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
+{
+	struct tcmsg *tcm = NLMSG_DATA(n);
+	struct rtattr **tca = arg;
+	struct net_device *dev;
+	u32 clid = tcm->tcm_parent;
+	struct Qdisc *q = NULL;
+	struct Qdisc *p = NULL;
+	int err;
+
+	if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
+		return -ENODEV;
+
+	if (clid) {
+		if (clid != TC_H_ROOT) {
+			if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
+				if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
+					return -ENOENT;
+				q = qdisc_leaf(p, clid);
+			} else { /* ingress */
+				q = dev->qdisc_ingress;
+                        }
+		} else {
+			q = dev->qdisc_sleeping;
+		}
+		if (!q)
+			return -ENOENT;
+
+		if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
+			return -EINVAL;
+	} else {
+		if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
+			return -ENOENT;
+	}
+
+	if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
+		return -EINVAL;
+
+	if (n->nlmsg_type == RTM_DELQDISC) {
+		if (!clid)
+			return -EINVAL;
+		if (q->handle == 0)
+			return -ENOENT;
+		if ((err = qdisc_graft(dev, p, clid, NULL, &q)) != 0)
+			return err;
+		if (q) {
+			qdisc_notify(skb, n, clid, q, NULL);
+			spin_lock_bh(&dev->queue_lock);
+			qdisc_destroy(q);
+			spin_unlock_bh(&dev->queue_lock);
+		}
+	} else {
+		qdisc_notify(skb, n, clid, NULL, q);
+	}
+	return 0;
+}
+
+/*
+   Create/change qdisc.
+ */
+
+static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
+{
+	struct tcmsg *tcm;
+	struct rtattr **tca;
+	struct net_device *dev;
+	u32 clid;
+	struct Qdisc *q, *p;
+	int err;
+
+replay:
+	/* Reinit, just in case something touches this. */
+	tcm = NLMSG_DATA(n);
+	tca = arg;
+	clid = tcm->tcm_parent;
+	q = p = NULL;
+
+	if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
+		return -ENODEV;
+
+	if (clid) {
+		if (clid != TC_H_ROOT) {
+			if (clid != TC_H_INGRESS) {
+				if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
+					return -ENOENT;
+				q = qdisc_leaf(p, clid);
+			} else { /*ingress */
+				q = dev->qdisc_ingress;
+			}
+		} else {
+			q = dev->qdisc_sleeping;
+		}
+
+		/* It may be default qdisc, ignore it */
+		if (q && q->handle == 0)
+			q = NULL;
+
+		if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
+			if (tcm->tcm_handle) {
+				if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
+					return -EEXIST;
+				if (TC_H_MIN(tcm->tcm_handle))
+					return -EINVAL;
+				if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
+					goto create_n_graft;
+				if (n->nlmsg_flags&NLM_F_EXCL)
+					return -EEXIST;
+				if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
+					return -EINVAL;
+				if (q == p ||
+				    (p && check_loop(q, p, 0)))
+					return -ELOOP;
+				atomic_inc(&q->refcnt);
+				goto graft;
+			} else {
+				if (q == NULL)
+					goto create_n_graft;
+
+				/* This magic test requires explanation.
+				 *
+				 *   We know, that some child q is already
+				 *   attached to this parent and have choice:
+				 *   either to change it or to create/graft new one.
+				 *
+				 *   1. We are allowed to create/graft only
+				 *   if CREATE and REPLACE flags are set.
+				 *
+				 *   2. If EXCL is set, requestor wanted to say,
+				 *   that qdisc tcm_handle is not expected
+				 *   to exist, so that we choose create/graft too.
+				 *
+				 *   3. The last case is when no flags are set.
+				 *   Alas, it is sort of hole in API, we
+				 *   cannot decide what to do unambiguously.
+				 *   For now we select create/graft, if
+				 *   user gave KIND, which does not match existing.
+				 */
+				if ((n->nlmsg_flags&NLM_F_CREATE) &&
+				    (n->nlmsg_flags&NLM_F_REPLACE) &&
+				    ((n->nlmsg_flags&NLM_F_EXCL) ||
+				     (tca[TCA_KIND-1] &&
+				      rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))))
+					goto create_n_graft;
+			}
+		}
+	} else {
+		if (!tcm->tcm_handle)
+			return -EINVAL;
+		q = qdisc_lookup(dev, tcm->tcm_handle);
+	}
+
+	/* Change qdisc parameters */
+	if (q == NULL)
+		return -ENOENT;
+	if (n->nlmsg_flags&NLM_F_EXCL)
+		return -EEXIST;
+	if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
+		return -EINVAL;
+	err = qdisc_change(q, tca);
+	if (err == 0)
+		qdisc_notify(skb, n, clid, NULL, q);
+	return err;
+
+create_n_graft:
+	if (!(n->nlmsg_flags&NLM_F_CREATE))
+		return -ENOENT;
+	if (clid == TC_H_INGRESS)
+		q = qdisc_create(dev, tcm->tcm_parent, tca, &err);
+        else
+		q = qdisc_create(dev, tcm->tcm_handle, tca, &err);
+	if (q == NULL) {
+		if (err == -EAGAIN)
+			goto replay;
+		return err;
+	}
+
+graft:
+	if (1) {
+		struct Qdisc *old_q = NULL;
+		err = qdisc_graft(dev, p, clid, q, &old_q);
+		if (err) {
+			if (q) {
+				spin_lock_bh(&dev->queue_lock);
+				qdisc_destroy(q);
+				spin_unlock_bh(&dev->queue_lock);
+			}
+			return err;
+		}
+		qdisc_notify(skb, n, clid, old_q, q);
+		if (old_q) {
+			spin_lock_bh(&dev->queue_lock);
+			qdisc_destroy(old_q);
+			spin_unlock_bh(&dev->queue_lock);
+		}
+	}
+	return 0;
+}
+
+static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
+			 u32 pid, u32 seq, unsigned flags, int event)
+{
+	struct tcmsg *tcm;
+	struct nlmsghdr  *nlh;
+	unsigned char	 *b = skb->tail;
+	struct gnet_dump d;
+
+	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm));
+	nlh->nlmsg_flags = flags;
+	tcm = NLMSG_DATA(nlh);
+	tcm->tcm_family = AF_UNSPEC;
+	tcm->tcm_ifindex = q->dev->ifindex;
+	tcm->tcm_parent = clid;
+	tcm->tcm_handle = q->handle;
+	tcm->tcm_info = atomic_read(&q->refcnt);
+	RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
+	if (q->ops->dump && q->ops->dump(q, skb) < 0)
+		goto rtattr_failure;
+	q->qstats.qlen = q->q.qlen;
+
+	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
+			TCA_XSTATS, q->stats_lock, &d) < 0)
+		goto rtattr_failure;
+
+	if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
+		goto rtattr_failure;
+
+	if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
+#ifdef CONFIG_NET_ESTIMATOR
+	    gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
+#endif
+	    gnet_stats_copy_queue(&d, &q->qstats) < 0)
+		goto rtattr_failure;
+	
+	if (gnet_stats_finish_copy(&d) < 0)
+		goto rtattr_failure;
+	
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
+			u32 clid, struct Qdisc *old, struct Qdisc *new)
+{
+	struct sk_buff *skb;
+	u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+
+	if (old && old->handle) {
+		if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
+			goto err_out;
+	}
+	if (new) {
+		if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
+			goto err_out;
+	}
+
+	if (skb->len)
+		return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+
+err_out:
+	kfree_skb(skb);
+	return -EINVAL;
+}
+
+static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int idx, q_idx;
+	int s_idx, s_q_idx;
+	struct net_device *dev;
+	struct Qdisc *q;
+
+	s_idx = cb->args[0];
+	s_q_idx = q_idx = cb->args[1];
+	read_lock(&dev_base_lock);
+	for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
+		if (idx < s_idx)
+			continue;
+		if (idx > s_idx)
+			s_q_idx = 0;
+		read_lock_bh(&qdisc_tree_lock);
+		q_idx = 0;
+		list_for_each_entry(q, &dev->qdisc_list, list) {
+			if (q_idx < s_q_idx) {
+				q_idx++;
+				continue;
+			}
+			if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
+					  cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) {
+				read_unlock_bh(&qdisc_tree_lock);
+				goto done;
+			}
+			q_idx++;
+		}
+		read_unlock_bh(&qdisc_tree_lock);
+	}
+
+done:
+	read_unlock(&dev_base_lock);
+
+	cb->args[0] = idx;
+	cb->args[1] = q_idx;
+
+	return skb->len;
+}
+
+
+
+/************************************************
+ *	Traffic classes manipulation.		*
+ ************************************************/
+
+
+
+static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
+{
+	struct tcmsg *tcm = NLMSG_DATA(n);
+	struct rtattr **tca = arg;
+	struct net_device *dev;
+	struct Qdisc *q = NULL;
+	struct Qdisc_class_ops *cops;
+	unsigned long cl = 0;
+	unsigned long new_cl;
+	u32 pid = tcm->tcm_parent;
+	u32 clid = tcm->tcm_handle;
+	u32 qid = TC_H_MAJ(clid);
+	int err;
+
+	if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
+		return -ENODEV;
+
+	/*
+	   parent == TC_H_UNSPEC - unspecified parent.
+	   parent == TC_H_ROOT   - class is root, which has no parent.
+	   parent == X:0	 - parent is root class.
+	   parent == X:Y	 - parent is a node in hierarchy.
+	   parent == 0:Y	 - parent is X:Y, where X:0 is qdisc.
+
+	   handle == 0:0	 - generate handle from kernel pool.
+	   handle == 0:Y	 - class is X:Y, where X:0 is qdisc.
+	   handle == X:Y	 - clear.
+	   handle == X:0	 - root class.
+	 */
+
+	/* Step 1. Determine qdisc handle X:0 */
+
+	if (pid != TC_H_ROOT) {
+		u32 qid1 = TC_H_MAJ(pid);
+
+		if (qid && qid1) {
+			/* If both majors are known, they must be identical. */
+			if (qid != qid1)
+				return -EINVAL;
+		} else if (qid1) {
+			qid = qid1;
+		} else if (qid == 0)
+			qid = dev->qdisc_sleeping->handle;
+
+		/* Now qid is genuine qdisc handle consistent
+		   both with parent and child.
+
+		   TC_H_MAJ(pid) still may be unspecified, complete it now.
+		 */
+		if (pid)
+			pid = TC_H_MAKE(qid, pid);
+	} else {
+		if (qid == 0)
+			qid = dev->qdisc_sleeping->handle;
+	}
+
+	/* OK. Locate qdisc */
+	if ((q = qdisc_lookup(dev, qid)) == NULL) 
+		return -ENOENT;
+
+	/* An check that it supports classes */
+	cops = q->ops->cl_ops;
+	if (cops == NULL)
+		return -EINVAL;
+
+	/* Now try to get class */
+	if (clid == 0) {
+		if (pid == TC_H_ROOT)
+			clid = qid;
+	} else
+		clid = TC_H_MAKE(qid, clid);
+
+	if (clid)
+		cl = cops->get(q, clid);
+
+	if (cl == 0) {
+		err = -ENOENT;
+		if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
+			goto out;
+	} else {
+		switch (n->nlmsg_type) {
+		case RTM_NEWTCLASS:	
+			err = -EEXIST;
+			if (n->nlmsg_flags&NLM_F_EXCL)
+				goto out;
+			break;
+		case RTM_DELTCLASS:
+			err = cops->delete(q, cl);
+			if (err == 0)
+				tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
+			goto out;
+		case RTM_GETTCLASS:
+			err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
+			goto out;
+		default:
+			err = -EINVAL;
+			goto out;
+		}
+	}
+
+	new_cl = cl;
+	err = cops->change(q, clid, pid, tca, &new_cl);
+	if (err == 0)
+		tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
+
+out:
+	if (cl)
+		cops->put(q, cl);
+
+	return err;
+}
+
+
+static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
+			  unsigned long cl,
+			  u32 pid, u32 seq, unsigned flags, int event)
+{
+	struct tcmsg *tcm;
+	struct nlmsghdr  *nlh;
+	unsigned char	 *b = skb->tail;
+	struct gnet_dump d;
+	struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
+
+	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm));
+	nlh->nlmsg_flags = flags;
+	tcm = NLMSG_DATA(nlh);
+	tcm->tcm_family = AF_UNSPEC;
+	tcm->tcm_ifindex = q->dev->ifindex;
+	tcm->tcm_parent = q->handle;
+	tcm->tcm_handle = q->handle;
+	tcm->tcm_info = 0;
+	RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
+	if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
+		goto rtattr_failure;
+
+	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
+			TCA_XSTATS, q->stats_lock, &d) < 0)
+		goto rtattr_failure;
+
+	if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
+		goto rtattr_failure;
+
+	if (gnet_stats_finish_copy(&d) < 0)
+		goto rtattr_failure;
+
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
+			  struct Qdisc *q, unsigned long cl, int event)
+{
+	struct sk_buff *skb;
+	u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+
+	if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+}
+
+struct qdisc_dump_args
+{
+	struct qdisc_walker w;
+	struct sk_buff *skb;
+	struct netlink_callback *cb;
+};
+
+static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
+{
+	struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
+
+	return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
+			      a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
+}
+
+static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int t;
+	int s_t;
+	struct net_device *dev;
+	struct Qdisc *q;
+	struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
+	struct qdisc_dump_args arg;
+
+	if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
+		return 0;
+	if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
+		return 0;
+
+	s_t = cb->args[0];
+	t = 0;
+
+	read_lock_bh(&qdisc_tree_lock);
+	list_for_each_entry(q, &dev->qdisc_list, list) {
+		if (t < s_t || !q->ops->cl_ops ||
+		    (tcm->tcm_parent &&
+		     TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
+			t++;
+			continue;
+		}
+		if (t > s_t)
+			memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
+		arg.w.fn = qdisc_class_dump;
+		arg.skb = skb;
+		arg.cb = cb;
+		arg.w.stop  = 0;
+		arg.w.skip = cb->args[1];
+		arg.w.count = 0;
+		q->ops->cl_ops->walk(q, &arg.w);
+		cb->args[1] = arg.w.count;
+		if (arg.w.stop)
+			break;
+		t++;
+	}
+	read_unlock_bh(&qdisc_tree_lock);
+
+	cb->args[0] = t;
+
+	dev_put(dev);
+	return skb->len;
+}
+
+/* Main classifier routine: scans classifier chain attached
+   to this qdisc, (optionally) tests for protocol and asks
+   specific classifiers.
+ */
+int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
+	struct tcf_result *res)
+{
+	int err = 0;
+	u32 protocol = skb->protocol;
+#ifdef CONFIG_NET_CLS_ACT
+	struct tcf_proto *otp = tp;
+reclassify:
+#endif
+	protocol = skb->protocol;
+
+	for ( ; tp; tp = tp->next) {
+		if ((tp->protocol == protocol ||
+			tp->protocol == __constant_htons(ETH_P_ALL)) &&
+			(err = tp->classify(skb, tp, res)) >= 0) {
+#ifdef CONFIG_NET_CLS_ACT
+			if ( TC_ACT_RECLASSIFY == err) {
+				__u32 verd = (__u32) G_TC_VERD(skb->tc_verd);
+				tp = otp;
+
+				if (MAX_REC_LOOP < verd++) {
+					printk("rule prio %d protocol %02x reclassify is buggy packet dropped\n",
+						tp->prio&0xffff, ntohs(tp->protocol));
+					return TC_ACT_SHOT;
+				}
+				skb->tc_verd = SET_TC_VERD(skb->tc_verd,verd);
+				goto reclassify;
+			} else {
+				if (skb->tc_verd) 
+					skb->tc_verd = SET_TC_VERD(skb->tc_verd,0);
+				return err;
+			}
+#else
+
+			return err;
+#endif
+		}
+
+	}
+	return -1;
+}
+
+static int psched_us_per_tick = 1;
+static int psched_tick_per_us = 1;
+
+#ifdef CONFIG_PROC_FS
+static int psched_show(struct seq_file *seq, void *v)
+{
+	seq_printf(seq, "%08x %08x %08x %08x\n",
+		      psched_tick_per_us, psched_us_per_tick,
+		      1000000, HZ);
+
+	return 0;
+}
+
+static int psched_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, psched_show, PDE(inode)->data);
+}
+
+static struct file_operations psched_fops = {
+	.owner = THIS_MODULE,
+	.open = psched_open,
+	.read  = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};	
+#endif
+
+#ifdef CONFIG_NET_SCH_CLK_CPU
+psched_tdiff_t psched_clock_per_hz;
+int psched_clock_scale;
+EXPORT_SYMBOL(psched_clock_per_hz);
+EXPORT_SYMBOL(psched_clock_scale);
+
+psched_time_t psched_time_base;
+cycles_t psched_time_mark;
+EXPORT_SYMBOL(psched_time_mark);
+EXPORT_SYMBOL(psched_time_base);
+
+/*
+ * Periodically adjust psched_time_base to avoid overflow
+ * with 32-bit get_cycles(). Safe up to 4GHz CPU.
+ */
+static void psched_tick(unsigned long);
+static struct timer_list psched_timer = TIMER_INITIALIZER(psched_tick, 0, 0);
+
+static void psched_tick(unsigned long dummy)
+{
+	if (sizeof(cycles_t) == sizeof(u32)) {
+		psched_time_t dummy_stamp;
+		PSCHED_GET_TIME(dummy_stamp);
+		psched_timer.expires = jiffies + 1*HZ;
+		add_timer(&psched_timer);
+	}
+}
+
+int __init psched_calibrate_clock(void)
+{
+	psched_time_t stamp, stamp1;
+	struct timeval tv, tv1;
+	psched_tdiff_t delay;
+	long rdelay;
+	unsigned long stop;
+
+	psched_tick(0);
+	stop = jiffies + HZ/10;
+	PSCHED_GET_TIME(stamp);
+	do_gettimeofday(&tv);
+	while (time_before(jiffies, stop)) {
+		barrier();
+		cpu_relax();
+	}
+	PSCHED_GET_TIME(stamp1);
+	do_gettimeofday(&tv1);
+
+	delay = PSCHED_TDIFF(stamp1, stamp);
+	rdelay = tv1.tv_usec - tv.tv_usec;
+	rdelay += (tv1.tv_sec - tv.tv_sec)*1000000;
+	if (rdelay > delay)
+		return -1;
+	delay /= rdelay;
+	psched_tick_per_us = delay;
+	while ((delay>>=1) != 0)
+		psched_clock_scale++;
+	psched_us_per_tick = 1<<psched_clock_scale;
+	psched_clock_per_hz = (psched_tick_per_us*(1000000/HZ))>>psched_clock_scale;
+	return 0;
+}
+#endif
+
+static int __init pktsched_init(void)
+{
+	struct rtnetlink_link *link_p;
+
+#ifdef CONFIG_NET_SCH_CLK_CPU
+	if (psched_calibrate_clock() < 0)
+		return -1;
+#elif defined(CONFIG_NET_SCH_CLK_JIFFIES)
+	psched_tick_per_us = HZ<<PSCHED_JSCALE;
+	psched_us_per_tick = 1000000;
+#endif
+
+	link_p = rtnetlink_links[PF_UNSPEC];
+
+	/* Setup rtnetlink links. It is made here to avoid
+	   exporting large number of public symbols.
+	 */
+
+	if (link_p) {
+		link_p[RTM_NEWQDISC-RTM_BASE].doit = tc_modify_qdisc;
+		link_p[RTM_DELQDISC-RTM_BASE].doit = tc_get_qdisc;
+		link_p[RTM_GETQDISC-RTM_BASE].doit = tc_get_qdisc;
+		link_p[RTM_GETQDISC-RTM_BASE].dumpit = tc_dump_qdisc;
+		link_p[RTM_NEWTCLASS-RTM_BASE].doit = tc_ctl_tclass;
+		link_p[RTM_DELTCLASS-RTM_BASE].doit = tc_ctl_tclass;
+		link_p[RTM_GETTCLASS-RTM_BASE].doit = tc_ctl_tclass;
+		link_p[RTM_GETTCLASS-RTM_BASE].dumpit = tc_dump_tclass;
+	}
+
+	register_qdisc(&pfifo_qdisc_ops);
+	register_qdisc(&bfifo_qdisc_ops);
+	proc_net_fops_create("psched", 0, &psched_fops);
+
+	return 0;
+}
+
+subsys_initcall(pktsched_init);
+
+EXPORT_SYMBOL(qdisc_get_rtab);
+EXPORT_SYMBOL(qdisc_put_rtab);
+EXPORT_SYMBOL(register_qdisc);
+EXPORT_SYMBOL(unregister_qdisc);
+EXPORT_SYMBOL(tc_classify);
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
new file mode 100644
index 00000000000..93ebce40aca
--- /dev/null
+++ b/net/sched/sch_atm.c
@@ -0,0 +1,735 @@
+/* net/sched/sch_atm.c - ATM VC selection "queueing discipline" */
+
+/* Written 1998-2000 by Werner Almesberger, EPFL ICA */
+
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/interrupt.h>
+#include <linux/atmdev.h>
+#include <linux/atmclip.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/file.h> /* for fput */
+#include <net/pkt_sched.h>
+#include <net/sock.h>
+
+
+extern struct socket *sockfd_lookup(int fd, int *err); /* @@@ fix this */
+
+#if 0 /* control */
+#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
+#else
+#define DPRINTK(format,args...)
+#endif
+
+#if 0 /* data */
+#define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
+#else
+#define D2PRINTK(format,args...)
+#endif
+
+
+/*
+ * The ATM queuing discipline provides a framework for invoking classifiers
+ * (aka "filters"), which in turn select classes of this queuing discipline.
+ * Each class maps the flow(s) it is handling to a given VC. Multiple classes
+ * may share the same VC.
+ *
+ * When creating a class, VCs are specified by passing the number of the open
+ * socket descriptor by which the calling process references the VC. The kernel
+ * keeps the VC open at least until all classes using it are removed.
+ *
+ * In this file, most functions are named atm_tc_* to avoid confusion with all
+ * the atm_* in net/atm. This naming convention differs from what's used in the
+ * rest of net/sched.
+ *
+ * Known bugs:
+ *  - sometimes messes up the IP stack
+ *  - any manipulations besides the few operations described in the README, are
+ *    untested and likely to crash the system
+ *  - should lock the flow while there is data in the queue (?)
+ */
+
+
+#define PRIV(sch) qdisc_priv(sch)
+#define VCC2FLOW(vcc) ((struct atm_flow_data *) ((vcc)->user_back))
+
+
+struct atm_flow_data {
+	struct Qdisc		*q;		/* FIFO, TBF, etc. */
+	struct tcf_proto	*filter_list;
+	struct atm_vcc		*vcc;		/* VCC; NULL if VCC is closed */
+	void (*old_pop)(struct atm_vcc *vcc,struct sk_buff *skb); /* chaining */
+	struct atm_qdisc_data	*parent;	/* parent qdisc */
+	struct socket		*sock;		/* for closing */
+	u32			classid;	/* x:y type ID */
+	int			ref;		/* reference count */
+	struct gnet_stats_basic	bstats;
+	struct gnet_stats_queue	qstats;
+	spinlock_t		*stats_lock;
+	struct atm_flow_data	*next;
+	struct atm_flow_data	*excess;	/* flow for excess traffic;
+						   NULL to set CLP instead */
+	int			hdr_len;
+	unsigned char		hdr[0];		/* header data; MUST BE LAST */
+};
+
+struct atm_qdisc_data {
+	struct atm_flow_data	link;		/* unclassified skbs go here */
+	struct atm_flow_data	*flows;		/* NB: "link" is also on this
+						   list */
+	struct tasklet_struct	task;		/* requeue tasklet */
+};
+
+
+/* ------------------------- Class/flow operations ------------------------- */
+
+
+static int find_flow(struct atm_qdisc_data *qdisc,struct atm_flow_data *flow)
+{
+	struct atm_flow_data *walk;
+
+	DPRINTK("find_flow(qdisc %p,flow %p)\n",qdisc,flow);
+	for (walk = qdisc->flows; walk; walk = walk->next)
+		if (walk == flow) return 1;
+	DPRINTK("find_flow: not found\n");
+	return 0;
+}
+
+
+static __inline__ struct atm_flow_data *lookup_flow(struct Qdisc *sch,
+    u32 classid)
+{
+	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_flow_data *flow;
+
+        for (flow = p->flows; flow; flow = flow->next)
+		if (flow->classid == classid) break;
+	return flow;
+}
+
+
+static int atm_tc_graft(struct Qdisc *sch,unsigned long arg,
+    struct Qdisc *new,struct Qdisc **old)
+{
+	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_flow_data *flow = (struct atm_flow_data *) arg;
+
+	DPRINTK("atm_tc_graft(sch %p,[qdisc %p],flow %p,new %p,old %p)\n",sch,
+	    p,flow,new,old);
+	if (!find_flow(p,flow)) return -EINVAL;
+	if (!new) new = &noop_qdisc;
+	*old = xchg(&flow->q,new);
+	if (*old) qdisc_reset(*old);
+        return 0;
+}
+
+
+static struct Qdisc *atm_tc_leaf(struct Qdisc *sch,unsigned long cl)
+{
+	struct atm_flow_data *flow = (struct atm_flow_data *) cl;
+
+	DPRINTK("atm_tc_leaf(sch %p,flow %p)\n",sch,flow);
+	return flow ? flow->q : NULL;
+}
+
+
+static unsigned long atm_tc_get(struct Qdisc *sch,u32 classid)
+{
+	struct atm_qdisc_data *p __attribute__((unused)) = PRIV(sch);
+	struct atm_flow_data *flow;
+
+	DPRINTK("atm_tc_get(sch %p,[qdisc %p],classid %x)\n",sch,p,classid);
+	flow = lookup_flow(sch,classid);
+        if (flow) flow->ref++;
+	DPRINTK("atm_tc_get: flow %p\n",flow);
+	return (unsigned long) flow;
+}
+
+
+static unsigned long atm_tc_bind_filter(struct Qdisc *sch,
+    unsigned long parent, u32 classid)
+{
+	return atm_tc_get(sch,classid);
+}
+
+
+static void destroy_filters(struct atm_flow_data *flow)
+{
+	struct tcf_proto *filter;
+
+	while ((filter = flow->filter_list)) {
+		DPRINTK("destroy_filters: destroying filter %p\n",filter);
+		flow->filter_list = filter->next;
+		tcf_destroy(filter);
+	}
+}
+
+
+/*
+ * atm_tc_put handles all destructions, including the ones that are explicitly
+ * requested (atm_tc_destroy, etc.). The assumption here is that we never drop
+ * anything that still seems to be in use.
+ */
+
+static void atm_tc_put(struct Qdisc *sch, unsigned long cl)
+{
+	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_flow_data *flow = (struct atm_flow_data *) cl;
+	struct atm_flow_data **prev;
+
+	DPRINTK("atm_tc_put(sch %p,[qdisc %p],flow %p)\n",sch,p,flow);
+	if (--flow->ref) return;
+	DPRINTK("atm_tc_put: destroying\n");
+	for (prev = &p->flows; *prev; prev = &(*prev)->next)
+		if (*prev == flow) break;
+	if (!*prev) {
+		printk(KERN_CRIT "atm_tc_put: class %p not found\n",flow);
+		return;
+	}
+	*prev = flow->next;
+	DPRINTK("atm_tc_put: qdisc %p\n",flow->q);
+	qdisc_destroy(flow->q);
+	destroy_filters(flow);
+	if (flow->sock) {
+		DPRINTK("atm_tc_put: f_count %d\n",
+		    file_count(flow->sock->file));
+		flow->vcc->pop = flow->old_pop;
+		sockfd_put(flow->sock);
+	}
+	if (flow->excess) atm_tc_put(sch,(unsigned long) flow->excess);
+	if (flow != &p->link) kfree(flow);
+	/*
+	 * If flow == &p->link, the qdisc no longer works at this point and
+	 * needs to be removed. (By the caller of atm_tc_put.)
+	 */
+}
+
+
+static void sch_atm_pop(struct atm_vcc *vcc,struct sk_buff *skb)
+{
+	struct atm_qdisc_data *p = VCC2FLOW(vcc)->parent;
+
+	D2PRINTK("sch_atm_pop(vcc %p,skb %p,[qdisc %p])\n",vcc,skb,p);
+	VCC2FLOW(vcc)->old_pop(vcc,skb);
+	tasklet_schedule(&p->task);
+}
+
+static const u8 llc_oui_ip[] = {
+	0xaa,		/* DSAP: non-ISO */
+	0xaa,		/* SSAP: non-ISO */
+	0x03,		/* Ctrl: Unnumbered Information Command PDU */
+	0x00,		/* OUI: EtherType */
+	0x00, 0x00,
+	0x08, 0x00 };	/* Ethertype IP (0800) */
+
+static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
+    struct rtattr **tca, unsigned long *arg)
+{
+	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_flow_data *flow = (struct atm_flow_data *) *arg;
+	struct atm_flow_data *excess = NULL;
+	struct rtattr *opt = tca[TCA_OPTIONS-1];
+	struct rtattr *tb[TCA_ATM_MAX];
+	struct socket *sock;
+	int fd,error,hdr_len;
+	void *hdr;
+
+	DPRINTK("atm_tc_change(sch %p,[qdisc %p],classid %x,parent %x,"
+	    "flow %p,opt %p)\n",sch,p,classid,parent,flow,opt);
+	/*
+	 * The concept of parents doesn't apply for this qdisc.
+	 */
+	if (parent && parent != TC_H_ROOT && parent != sch->handle)
+		return -EINVAL;
+	/*
+	 * ATM classes cannot be changed. In order to change properties of the
+	 * ATM connection, that socket needs to be modified directly (via the
+	 * native ATM API. In order to send a flow to a different VC, the old
+	 * class needs to be removed and a new one added. (This may be changed
+	 * later.)
+	 */
+	if (flow) return -EBUSY;
+	if (opt == NULL || rtattr_parse_nested(tb, TCA_ATM_MAX, opt))
+		return -EINVAL;
+	if (!tb[TCA_ATM_FD-1] || RTA_PAYLOAD(tb[TCA_ATM_FD-1]) < sizeof(fd))
+		return -EINVAL;
+	fd = *(int *) RTA_DATA(tb[TCA_ATM_FD-1]);
+	DPRINTK("atm_tc_change: fd %d\n",fd);
+	if (tb[TCA_ATM_HDR-1]) {
+		hdr_len = RTA_PAYLOAD(tb[TCA_ATM_HDR-1]);
+		hdr = RTA_DATA(tb[TCA_ATM_HDR-1]);
+	}
+	else {
+		hdr_len = RFC1483LLC_LEN;
+		hdr = NULL; /* default LLC/SNAP for IP */
+	}
+	if (!tb[TCA_ATM_EXCESS-1]) excess = NULL;
+	else {
+		if (RTA_PAYLOAD(tb[TCA_ATM_EXCESS-1]) != sizeof(u32))
+			return -EINVAL;
+		excess = (struct atm_flow_data *) atm_tc_get(sch,
+		    *(u32 *) RTA_DATA(tb[TCA_ATM_EXCESS-1]));
+		if (!excess) return -ENOENT;
+	}
+	DPRINTK("atm_tc_change: type %d, payload %d, hdr_len %d\n",
+	    opt->rta_type,RTA_PAYLOAD(opt),hdr_len);
+	if (!(sock = sockfd_lookup(fd,&error))) return error; /* f_count++ */
+	DPRINTK("atm_tc_change: f_count %d\n",file_count(sock->file));
+        if (sock->ops->family != PF_ATMSVC && sock->ops->family != PF_ATMPVC) {
+		error = -EPROTOTYPE;
+                goto err_out;
+	}
+	/* @@@ should check if the socket is really operational or we'll crash
+	   on vcc->send */
+	if (classid) {
+		if (TC_H_MAJ(classid ^ sch->handle)) {
+			DPRINTK("atm_tc_change: classid mismatch\n");
+			error = -EINVAL;
+			goto err_out;
+		}
+		if (find_flow(p,flow)) {
+			error = -EEXIST;
+			goto err_out;
+		}
+	}
+	else {
+		int i;
+		unsigned long cl;
+
+		for (i = 1; i < 0x8000; i++) {
+			classid = TC_H_MAKE(sch->handle,0x8000 | i);
+			if (!(cl = atm_tc_get(sch,classid))) break;
+			atm_tc_put(sch,cl);
+		}
+	}
+	DPRINTK("atm_tc_change: new id %x\n",classid);
+	flow = kmalloc(sizeof(struct atm_flow_data)+hdr_len,GFP_KERNEL);
+	DPRINTK("atm_tc_change: flow %p\n",flow);
+	if (!flow) {
+		error = -ENOBUFS;
+		goto err_out;
+	}
+	memset(flow,0,sizeof(*flow));
+	flow->filter_list = NULL;
+	if (!(flow->q = qdisc_create_dflt(sch->dev,&pfifo_qdisc_ops)))
+		flow->q = &noop_qdisc;
+	DPRINTK("atm_tc_change: qdisc %p\n",flow->q);
+	flow->sock = sock;
+        flow->vcc = ATM_SD(sock); /* speedup */
+	flow->vcc->user_back = flow;
+        DPRINTK("atm_tc_change: vcc %p\n",flow->vcc);
+	flow->old_pop = flow->vcc->pop;
+	flow->parent = p;
+	flow->vcc->pop = sch_atm_pop;
+	flow->classid = classid;
+	flow->ref = 1;
+	flow->excess = excess;
+	flow->next = p->link.next;
+	p->link.next = flow;
+	flow->hdr_len = hdr_len;
+	if (hdr)
+		memcpy(flow->hdr,hdr,hdr_len);
+	else
+		memcpy(flow->hdr,llc_oui_ip,sizeof(llc_oui_ip));
+	*arg = (unsigned long) flow;
+	return 0;
+err_out:
+	if (excess) atm_tc_put(sch,(unsigned long) excess);
+	sockfd_put(sock);
+	return error;
+}
+
+
+static int atm_tc_delete(struct Qdisc *sch,unsigned long arg)
+{
+	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_flow_data *flow = (struct atm_flow_data *) arg;
+
+	DPRINTK("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n",sch,p,flow);
+	if (!find_flow(PRIV(sch),flow)) return -EINVAL;
+	if (flow->filter_list || flow == &p->link) return -EBUSY;
+	/*
+	 * Reference count must be 2: one for "keepalive" (set at class
+	 * creation), and one for the reference held when calling delete.
+	 */
+	if (flow->ref < 2) {
+		printk(KERN_ERR "atm_tc_delete: flow->ref == %d\n",flow->ref);
+		return -EINVAL;
+	}
+	if (flow->ref > 2) return -EBUSY; /* catch references via excess, etc.*/
+	atm_tc_put(sch,arg);
+	return 0;
+}
+
+
+static void atm_tc_walk(struct Qdisc *sch,struct qdisc_walker *walker)
+{
+	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_flow_data *flow;
+
+	DPRINTK("atm_tc_walk(sch %p,[qdisc %p],walker %p)\n",sch,p,walker);
+	if (walker->stop) return;
+	for (flow = p->flows; flow; flow = flow->next) {
+		if (walker->count >= walker->skip)
+			if (walker->fn(sch,(unsigned long) flow,walker) < 0) {
+				walker->stop = 1;
+				break;
+			}
+		walker->count++;
+	}
+}
+
+
+static struct tcf_proto **atm_tc_find_tcf(struct Qdisc *sch,unsigned long cl)
+{
+	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_flow_data *flow = (struct atm_flow_data *) cl;
+
+	DPRINTK("atm_tc_find_tcf(sch %p,[qdisc %p],flow %p)\n",sch,p,flow);
+        return flow ? &flow->filter_list : &p->link.filter_list;
+}
+
+
+/* --------------------------- Qdisc operations ---------------------------- */
+
+
+static int atm_tc_enqueue(struct sk_buff *skb,struct Qdisc *sch)
+{
+	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_flow_data *flow = NULL ; /* @@@ */
+	struct tcf_result res;
+	int result;
+	int ret = NET_XMIT_POLICED;
+
+	D2PRINTK("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n",skb,sch,p);
+	result = TC_POLICE_OK; /* be nice to gcc */
+	if (TC_H_MAJ(skb->priority) != sch->handle ||
+	    !(flow = (struct atm_flow_data *) atm_tc_get(sch,skb->priority)))
+		for (flow = p->flows; flow; flow = flow->next)
+			if (flow->filter_list) {
+				result = tc_classify(skb,flow->filter_list,
+				    &res);
+				if (result < 0) continue;
+				flow = (struct atm_flow_data *) res.class;
+				if (!flow) flow = lookup_flow(sch,res.classid);
+				break;
+			}
+	if (!flow) flow = &p->link;
+	else {
+		if (flow->vcc)
+			ATM_SKB(skb)->atm_options = flow->vcc->atm_options;
+			/*@@@ looks good ... but it's not supposed to work :-)*/
+#ifdef CONFIG_NET_CLS_POLICE
+		switch (result) {
+			case TC_POLICE_SHOT:
+				kfree_skb(skb);
+				break;
+			case TC_POLICE_RECLASSIFY:
+				if (flow->excess) flow = flow->excess;
+				else {
+					ATM_SKB(skb)->atm_options |=
+					    ATM_ATMOPT_CLP;
+					break;
+				}
+				/* fall through */
+			case TC_POLICE_OK:
+				/* fall through */
+			default:
+				break;
+		}
+#endif
+	}
+	if (
+#ifdef CONFIG_NET_CLS_POLICE
+	    result == TC_POLICE_SHOT ||
+#endif
+	    (ret = flow->q->enqueue(skb,flow->q)) != 0) {
+		sch->qstats.drops++;
+		if (flow) flow->qstats.drops++;
+		return ret;
+	}
+	sch->bstats.bytes += skb->len;
+	sch->bstats.packets++;
+	flow->bstats.bytes += skb->len;
+	flow->bstats.packets++;
+	/*
+	 * Okay, this may seem weird. We pretend we've dropped the packet if
+	 * it goes via ATM. The reason for this is that the outer qdisc
+	 * expects to be able to q->dequeue the packet later on if we return
+	 * success at this place. Also, sch->q.qdisc needs to reflect whether
+	 * there is a packet egligible for dequeuing or not. Note that the
+	 * statistics of the outer qdisc are necessarily wrong because of all
+	 * this. There's currently no correct solution for this.
+	 */
+	if (flow == &p->link) {
+		sch->q.qlen++;
+		return 0;
+	}
+	tasklet_schedule(&p->task);
+	return NET_XMIT_BYPASS;
+}
+
+
+/*
+ * Dequeue packets and send them over ATM. Note that we quite deliberately
+ * avoid checking net_device's flow control here, simply because sch_atm
+ * uses its own channels, which have nothing to do with any CLIP/LANE/or
+ * non-ATM interfaces.
+ */
+
+
+static void sch_atm_dequeue(unsigned long data)
+{
+	struct Qdisc *sch = (struct Qdisc *) data;
+	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_flow_data *flow;
+	struct sk_buff *skb;
+
+	D2PRINTK("sch_atm_dequeue(sch %p,[qdisc %p])\n",sch,p);
+	for (flow = p->link.next; flow; flow = flow->next)
+		/*
+		 * If traffic is properly shaped, this won't generate nasty
+		 * little bursts. Otherwise, it may ... (but that's okay)
+		 */
+		while ((skb = flow->q->dequeue(flow->q))) {
+			if (!atm_may_send(flow->vcc,skb->truesize)) {
+				(void) flow->q->ops->requeue(skb,flow->q);
+				break;
+			}
+			D2PRINTK("atm_tc_dequeue: sending on class %p\n",flow);
+			/* remove any LL header somebody else has attached */
+			skb_pull(skb,(char *) skb->nh.iph-(char *) skb->data);
+			if (skb_headroom(skb) < flow->hdr_len) {
+				struct sk_buff *new;
+
+				new = skb_realloc_headroom(skb,flow->hdr_len);
+				dev_kfree_skb(skb);
+				if (!new) continue;
+				skb = new;
+			}
+			D2PRINTK("sch_atm_dequeue: ip %p, data %p\n",
+			    skb->nh.iph,skb->data);
+			ATM_SKB(skb)->vcc = flow->vcc;
+			memcpy(skb_push(skb,flow->hdr_len),flow->hdr,
+			    flow->hdr_len);
+			atomic_add(skb->truesize,
+				   &sk_atm(flow->vcc)->sk_wmem_alloc);
+			/* atm.atm_options are already set by atm_tc_enqueue */
+			(void) flow->vcc->send(flow->vcc,skb);
+		}
+}
+
+
+static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch)
+{
+	struct atm_qdisc_data *p = PRIV(sch);
+	struct sk_buff *skb;
+
+	D2PRINTK("atm_tc_dequeue(sch %p,[qdisc %p])\n",sch,p);
+	tasklet_schedule(&p->task);
+	skb = p->link.q->dequeue(p->link.q);
+	if (skb) sch->q.qlen--;
+	return skb;
+}
+
+
+static int atm_tc_requeue(struct sk_buff *skb,struct Qdisc *sch)
+{
+	struct atm_qdisc_data *p = PRIV(sch);
+	int ret;
+
+	D2PRINTK("atm_tc_requeue(skb %p,sch %p,[qdisc %p])\n",skb,sch,p);
+	ret = p->link.q->ops->requeue(skb,p->link.q);
+	if (!ret) {
+        sch->q.qlen++;
+        sch->qstats.requeues++;
+    } else {
+		sch->qstats.drops++;
+		p->link.qstats.drops++;
+	}
+	return ret;
+}
+
+
+static unsigned int atm_tc_drop(struct Qdisc *sch)
+{
+	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_flow_data *flow;
+	unsigned int len;
+
+	DPRINTK("atm_tc_drop(sch %p,[qdisc %p])\n",sch,p);
+	for (flow = p->flows; flow; flow = flow->next)
+		if (flow->q->ops->drop && (len = flow->q->ops->drop(flow->q)))
+			return len;
+	return 0;
+}
+
+
+static int atm_tc_init(struct Qdisc *sch,struct rtattr *opt)
+{
+	struct atm_qdisc_data *p = PRIV(sch);
+
+	DPRINTK("atm_tc_init(sch %p,[qdisc %p],opt %p)\n",sch,p,opt);
+	p->flows = &p->link;
+	if(!(p->link.q = qdisc_create_dflt(sch->dev,&pfifo_qdisc_ops)))
+		p->link.q = &noop_qdisc;
+	DPRINTK("atm_tc_init: link (%p) qdisc %p\n",&p->link,p->link.q);
+	p->link.filter_list = NULL;
+	p->link.vcc = NULL;
+	p->link.sock = NULL;
+	p->link.classid = sch->handle;
+	p->link.ref = 1;
+	p->link.next = NULL;
+	tasklet_init(&p->task,sch_atm_dequeue,(unsigned long) sch);
+	return 0;
+}
+
+
+static void atm_tc_reset(struct Qdisc *sch)
+{
+	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_flow_data *flow;
+
+	DPRINTK("atm_tc_reset(sch %p,[qdisc %p])\n",sch,p);
+	for (flow = p->flows; flow; flow = flow->next) qdisc_reset(flow->q);
+	sch->q.qlen = 0;
+}
+
+
+static void atm_tc_destroy(struct Qdisc *sch)
+{
+	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_flow_data *flow;
+
+	DPRINTK("atm_tc_destroy(sch %p,[qdisc %p])\n",sch,p);
+	/* races ? */
+	while ((flow = p->flows)) {
+		destroy_filters(flow);
+		if (flow->ref > 1)
+			printk(KERN_ERR "atm_destroy: %p->ref = %d\n",flow,
+			    flow->ref);
+		atm_tc_put(sch,(unsigned long) flow);
+		if (p->flows == flow) {
+			printk(KERN_ERR "atm_destroy: putting flow %p didn't "
+			    "kill it\n",flow);
+			p->flows = flow->next; /* brute force */
+			break;
+		}
+	}
+	tasklet_kill(&p->task);
+}
+
+
+static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
+    struct sk_buff *skb, struct tcmsg *tcm)
+{
+	struct atm_qdisc_data *p = PRIV(sch);
+	struct atm_flow_data *flow = (struct atm_flow_data *) cl;
+	unsigned char *b = skb->tail;
+	struct rtattr *rta;
+
+	DPRINTK("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n",
+	    sch,p,flow,skb,tcm);
+	if (!find_flow(p,flow)) return -EINVAL;
+	tcm->tcm_handle = flow->classid;
+	rta = (struct rtattr *) b;
+	RTA_PUT(skb,TCA_OPTIONS,0,NULL);
+	RTA_PUT(skb,TCA_ATM_HDR,flow->hdr_len,flow->hdr);
+	if (flow->vcc) {
+		struct sockaddr_atmpvc pvc;
+		int state;
+
+		pvc.sap_family = AF_ATMPVC;
+		pvc.sap_addr.itf = flow->vcc->dev ? flow->vcc->dev->number : -1;
+		pvc.sap_addr.vpi = flow->vcc->vpi;
+		pvc.sap_addr.vci = flow->vcc->vci;
+		RTA_PUT(skb,TCA_ATM_ADDR,sizeof(pvc),&pvc);
+		state = ATM_VF2VS(flow->vcc->flags);
+		RTA_PUT(skb,TCA_ATM_STATE,sizeof(state),&state);
+	}
+	if (flow->excess)
+		RTA_PUT(skb,TCA_ATM_EXCESS,sizeof(u32),&flow->classid);
+	else {
+		static u32 zero;
+
+		RTA_PUT(skb,TCA_ATM_EXCESS,sizeof(zero),&zero);
+	}
+	rta->rta_len = skb->tail-b;
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb,b-skb->data);
+	return -1;
+}
+static int
+atm_tc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
+	struct gnet_dump *d)
+{
+	struct atm_flow_data *flow = (struct atm_flow_data *) arg;
+
+	flow->qstats.qlen = flow->q->q.qlen;
+
+	if (gnet_stats_copy_basic(d, &flow->bstats) < 0 ||
+	    gnet_stats_copy_queue(d, &flow->qstats) < 0)
+		return -1;
+
+	return 0;
+}
+
+static int atm_tc_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	return 0;
+}
+
+static struct Qdisc_class_ops atm_class_ops = {
+	.graft		=	atm_tc_graft,
+	.leaf		=	atm_tc_leaf,
+	.get		=	atm_tc_get,
+	.put		=	atm_tc_put,
+	.change		=	atm_tc_change,
+	.delete		=	atm_tc_delete,
+	.walk		=	atm_tc_walk,
+	.tcf_chain	=	atm_tc_find_tcf,
+	.bind_tcf	=	atm_tc_bind_filter,
+	.unbind_tcf	=	atm_tc_put,
+	.dump		=	atm_tc_dump_class,
+	.dump_stats	=	atm_tc_dump_class_stats,
+};
+
+static struct Qdisc_ops atm_qdisc_ops = {
+	.next		=	NULL,
+	.cl_ops		=	&atm_class_ops,
+	.id		=	"atm",
+	.priv_size	=	sizeof(struct atm_qdisc_data),
+	.enqueue	=	atm_tc_enqueue,
+	.dequeue	=	atm_tc_dequeue,
+	.requeue	=	atm_tc_requeue,
+	.drop		=	atm_tc_drop,
+	.init		=	atm_tc_init,
+	.reset		=	atm_tc_reset,
+	.destroy	=	atm_tc_destroy,
+	.change		=	NULL,
+	.dump		=	atm_tc_dump,
+	.owner		=	THIS_MODULE,
+};
+
+
+static int __init atm_init(void)
+{
+	return register_qdisc(&atm_qdisc_ops);
+}
+
+static void __exit atm_exit(void) 
+{
+	unregister_qdisc(&atm_qdisc_ops);
+}
+
+module_init(atm_init)
+module_exit(atm_exit)
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
new file mode 100644
index 00000000000..d43e3b8cbf6
--- /dev/null
+++ b/net/sched/sch_cbq.c
@@ -0,0 +1,2124 @@
+/*
+ * net/sched/sch_cbq.c	Class-Based Queueing discipline.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+
+
+/*	Class-Based Queueing (CBQ) algorithm.
+	=======================================
+
+	Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
+	         Management Models for Packet Networks",
+		 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
+
+	         [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
+
+	         [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
+		 Parameters", 1996
+
+		 [4] Sally Floyd and Michael Speer, "Experimental Results
+		 for Class-Based Queueing", 1998, not published.
+
+	-----------------------------------------------------------------------
+
+	Algorithm skeleton was taken from NS simulator cbq.cc.
+	If someone wants to check this code against the LBL version,
+	he should take into account that ONLY the skeleton was borrowed,
+	the implementation is different. Particularly:
+
+	--- The WRR algorithm is different. Our version looks more
+        reasonable (I hope) and works when quanta are allowed to be
+        less than MTU, which is always the case when real time classes
+        have small rates. Note, that the statement of [3] is
+        incomplete, delay may actually be estimated even if class
+        per-round allotment is less than MTU. Namely, if per-round
+        allotment is W*r_i, and r_1+...+r_k = r < 1
+
+	delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
+
+	In the worst case we have IntServ estimate with D = W*r+k*MTU
+	and C = MTU*r. The proof (if correct at all) is trivial.
+
+
+	--- It seems that cbq-2.0 is not very accurate. At least, I cannot
+	interpret some places, which look like wrong translations
+	from NS. Anyone is advised to find these differences
+	and explain to me, why I am wrong 8).
+
+	--- Linux has no EOI event, so that we cannot estimate true class
+	idle time. Workaround is to consider the next dequeue event
+	as sign that previous packet is finished. This is wrong because of
+	internal device queueing, but on a permanently loaded link it is true.
+	Moreover, combined with clock integrator, this scheme looks
+	very close to an ideal solution.  */
+
+struct cbq_sched_data;
+
+
+struct cbq_class
+{
+	struct cbq_class	*next;		/* hash table link */
+	struct cbq_class	*next_alive;	/* next class with backlog in this priority band */
+
+/* Parameters */
+	u32			classid;
+	unsigned char		priority;	/* class priority */
+	unsigned char		priority2;	/* priority to be used after overlimit */
+	unsigned char		ewma_log;	/* time constant for idle time calculation */
+	unsigned char		ovl_strategy;
+#ifdef CONFIG_NET_CLS_POLICE
+	unsigned char		police;
+#endif
+
+	u32			defmap;
+
+	/* Link-sharing scheduler parameters */
+	long			maxidle;	/* Class parameters: see below. */
+	long			offtime;
+	long			minidle;
+	u32			avpkt;
+	struct qdisc_rate_table	*R_tab;
+
+	/* Overlimit strategy parameters */
+	void			(*overlimit)(struct cbq_class *cl);
+	long			penalty;
+
+	/* General scheduler (WRR) parameters */
+	long			allot;
+	long			quantum;	/* Allotment per WRR round */
+	long			weight;		/* Relative allotment: see below */
+
+	struct Qdisc		*qdisc;		/* Ptr to CBQ discipline */
+	struct cbq_class	*split;		/* Ptr to split node */
+	struct cbq_class	*share;		/* Ptr to LS parent in the class tree */
+	struct cbq_class	*tparent;	/* Ptr to tree parent in the class tree */
+	struct cbq_class	*borrow;	/* NULL if class is bandwidth limited;
+						   parent otherwise */
+	struct cbq_class	*sibling;	/* Sibling chain */
+	struct cbq_class	*children;	/* Pointer to children chain */
+
+	struct Qdisc		*q;		/* Elementary queueing discipline */
+
+
+/* Variables */
+	unsigned char		cpriority;	/* Effective priority */
+	unsigned char		delayed;
+	unsigned char		level;		/* level of the class in hierarchy:
+						   0 for leaf classes, and maximal
+						   level of children + 1 for nodes.
+						 */
+
+	psched_time_t		last;		/* Last end of service */
+	psched_time_t		undertime;
+	long			avgidle;
+	long			deficit;	/* Saved deficit for WRR */
+	unsigned long		penalized;
+	struct gnet_stats_basic bstats;
+	struct gnet_stats_queue qstats;
+	struct gnet_stats_rate_est rate_est;
+	spinlock_t		*stats_lock;
+	struct tc_cbq_xstats	xstats;
+
+	struct tcf_proto	*filter_list;
+
+	int			refcnt;
+	int			filters;
+
+	struct cbq_class 	*defaults[TC_PRIO_MAX+1];
+};
+
+struct cbq_sched_data
+{
+	struct cbq_class	*classes[16];		/* Hash table of all classes */
+	int			nclasses[TC_CBQ_MAXPRIO+1];
+	unsigned		quanta[TC_CBQ_MAXPRIO+1];
+
+	struct cbq_class	link;
+
+	unsigned		activemask;
+	struct cbq_class	*active[TC_CBQ_MAXPRIO+1];	/* List of all classes
+								   with backlog */
+
+#ifdef CONFIG_NET_CLS_POLICE
+	struct cbq_class	*rx_class;
+#endif
+	struct cbq_class	*tx_class;
+	struct cbq_class	*tx_borrowed;
+	int			tx_len;
+	psched_time_t		now;		/* Cached timestamp */
+	psched_time_t		now_rt;		/* Cached real time */
+	unsigned		pmask;
+
+	struct timer_list	delay_timer;
+	struct timer_list	wd_timer;	/* Watchdog timer,
+						   started when CBQ has
+						   backlog, but cannot
+						   transmit just now */
+	long			wd_expires;
+	int			toplevel;
+	u32			hgenerator;
+};
+
+
+#define L2T(cl,len)	((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
+
+
+static __inline__ unsigned cbq_hash(u32 h)
+{
+	h ^= h>>8;
+	h ^= h>>4;
+	return h&0xF;
+}
+
+static __inline__ struct cbq_class *
+cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
+{
+	struct cbq_class *cl;
+
+	for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
+		if (cl->classid == classid)
+			return cl;
+	return NULL;
+}
+
+#ifdef CONFIG_NET_CLS_POLICE
+
+static struct cbq_class *
+cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
+{
+	struct cbq_class *cl, *new;
+
+	for (cl = this->tparent; cl; cl = cl->tparent)
+		if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
+			return new;
+
+	return NULL;
+}
+
+#endif
+
+/* Classify packet. The procedure is pretty complicated, but
+   it allows us to combine link sharing and priority scheduling
+   transparently.
+
+   Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
+   so that it resolves to split nodes. Then packets are classified
+   by logical priority, or a more specific classifier may be attached
+   to the split node.
+ */
+
+static struct cbq_class *
+cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
+{
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	struct cbq_class *head = &q->link;
+	struct cbq_class **defmap;
+	struct cbq_class *cl = NULL;
+	u32 prio = skb->priority;
+	struct tcf_result res;
+
+	/*
+	 *  Step 1. If skb->priority points to one of our classes, use it.
+	 */
+	if (TC_H_MAJ(prio^sch->handle) == 0 &&
+	    (cl = cbq_class_lookup(q, prio)) != NULL)
+		return cl;
+
+	*qerr = NET_XMIT_DROP;
+	for (;;) {
+		int result = 0;
+		defmap = head->defaults;
+
+		/*
+		 * Step 2+n. Apply classifier.
+		 */
+		if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
+			goto fallback;
+
+		if ((cl = (void*)res.class) == NULL) {
+			if (TC_H_MAJ(res.classid))
+				cl = cbq_class_lookup(q, res.classid);
+			else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
+				cl = defmap[TC_PRIO_BESTEFFORT];
+
+			if (cl == NULL || cl->level >= head->level)
+				goto fallback;
+		}
+
+#ifdef CONFIG_NET_CLS_ACT
+		switch (result) {
+		case TC_ACT_QUEUED:
+		case TC_ACT_STOLEN: 
+			*qerr = NET_XMIT_SUCCESS;
+		case TC_ACT_SHOT:
+			return NULL;
+		}
+#elif defined(CONFIG_NET_CLS_POLICE)
+		switch (result) {
+		case TC_POLICE_RECLASSIFY:
+			return cbq_reclassify(skb, cl);
+		case TC_POLICE_SHOT:
+			return NULL;
+		default:
+			break;
+		}
+#endif
+		if (cl->level == 0)
+			return cl;
+
+		/*
+		 * Step 3+n. If classifier selected a link sharing class,
+		 *	   apply agency specific classifier.
+		 *	   Repeat this procdure until we hit a leaf node.
+		 */
+		head = cl;
+	}
+
+fallback:
+	cl = head;
+
+	/*
+	 * Step 4. No success...
+	 */
+	if (TC_H_MAJ(prio) == 0 &&
+	    !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
+	    !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
+		return head;
+
+	return cl;
+}
+
+/*
+   A packet has just been enqueued on the empty class.
+   cbq_activate_class adds it to the tail of active class list
+   of its priority band.
+ */
+
+static __inline__ void cbq_activate_class(struct cbq_class *cl)
+{
+	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
+	int prio = cl->cpriority;
+	struct cbq_class *cl_tail;
+
+	cl_tail = q->active[prio];
+	q->active[prio] = cl;
+
+	if (cl_tail != NULL) {
+		cl->next_alive = cl_tail->next_alive;
+		cl_tail->next_alive = cl;
+	} else {
+		cl->next_alive = cl;
+		q->activemask |= (1<<prio);
+	}
+}
+
+/*
+   Unlink class from active chain.
+   Note that this same procedure is done directly in cbq_dequeue*
+   during round-robin procedure.
+ */
+
+static void cbq_deactivate_class(struct cbq_class *this)
+{
+	struct cbq_sched_data *q = qdisc_priv(this->qdisc);
+	int prio = this->cpriority;
+	struct cbq_class *cl;
+	struct cbq_class *cl_prev = q->active[prio];
+
+	do {
+		cl = cl_prev->next_alive;
+		if (cl == this) {
+			cl_prev->next_alive = cl->next_alive;
+			cl->next_alive = NULL;
+
+			if (cl == q->active[prio]) {
+				q->active[prio] = cl_prev;
+				if (cl == q->active[prio]) {
+					q->active[prio] = NULL;
+					q->activemask &= ~(1<<prio);
+					return;
+				}
+			}
+
+			cl = cl_prev->next_alive;
+			return;
+		}
+	} while ((cl_prev = cl) != q->active[prio]);
+}
+
+static void
+cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
+{
+	int toplevel = q->toplevel;
+
+	if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
+		psched_time_t now;
+		psched_tdiff_t incr;
+
+		PSCHED_GET_TIME(now);
+		incr = PSCHED_TDIFF(now, q->now_rt);
+		PSCHED_TADD2(q->now, incr, now);
+
+		do {
+			if (PSCHED_TLESS(cl->undertime, now)) {
+				q->toplevel = cl->level;
+				return;
+			}
+		} while ((cl=cl->borrow) != NULL && toplevel > cl->level);
+	}
+}
+
+static int
+cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	int len = skb->len;
+	int ret;
+	struct cbq_class *cl = cbq_classify(skb, sch, &ret);
+
+#ifdef CONFIG_NET_CLS_POLICE
+	q->rx_class = cl;
+#endif
+	if (cl == NULL) {
+		if (ret == NET_XMIT_DROP)
+			sch->qstats.drops++;
+		kfree_skb(skb);
+		return ret;
+	}
+
+#ifdef CONFIG_NET_CLS_POLICE
+	cl->q->__parent = sch;
+#endif
+	if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
+		sch->q.qlen++;
+		sch->bstats.packets++;
+		sch->bstats.bytes+=len;
+		cbq_mark_toplevel(q, cl);
+		if (!cl->next_alive)
+			cbq_activate_class(cl);
+		return ret;
+	}
+
+	sch->qstats.drops++;
+	cbq_mark_toplevel(q, cl);
+	cl->qstats.drops++;
+	return ret;
+}
+
+static int
+cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	struct cbq_class *cl;
+	int ret;
+
+	if ((cl = q->tx_class) == NULL) {
+		kfree_skb(skb);
+		sch->qstats.drops++;
+		return NET_XMIT_CN;
+	}
+	q->tx_class = NULL;
+
+	cbq_mark_toplevel(q, cl);
+
+#ifdef CONFIG_NET_CLS_POLICE
+	q->rx_class = cl;
+	cl->q->__parent = sch;
+#endif
+	if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
+		sch->q.qlen++;
+		sch->qstats.requeues++;
+		if (!cl->next_alive)
+			cbq_activate_class(cl);
+		return 0;
+	}
+	sch->qstats.drops++;
+	cl->qstats.drops++;
+	return ret;
+}
+
+/* Overlimit actions */
+
+/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
+
+static void cbq_ovl_classic(struct cbq_class *cl)
+{
+	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
+	psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
+
+	if (!cl->delayed) {
+		delay += cl->offtime;
+
+		/* 
+		   Class goes to sleep, so that it will have no
+		   chance to work avgidle. Let's forgive it 8)
+
+		   BTW cbq-2.0 has a crap in this
+		   place, apparently they forgot to shift it by cl->ewma_log.
+		 */
+		if (cl->avgidle < 0)
+			delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
+		if (cl->avgidle < cl->minidle)
+			cl->avgidle = cl->minidle;
+		if (delay <= 0)
+			delay = 1;
+		PSCHED_TADD2(q->now, delay, cl->undertime);
+
+		cl->xstats.overactions++;
+		cl->delayed = 1;
+	}
+	if (q->wd_expires == 0 || q->wd_expires > delay)
+		q->wd_expires = delay;
+
+	/* Dirty work! We must schedule wakeups based on
+	   real available rate, rather than leaf rate,
+	   which may be tiny (even zero).
+	 */
+	if (q->toplevel == TC_CBQ_MAXLEVEL) {
+		struct cbq_class *b;
+		psched_tdiff_t base_delay = q->wd_expires;
+
+		for (b = cl->borrow; b; b = b->borrow) {
+			delay = PSCHED_TDIFF(b->undertime, q->now);
+			if (delay < base_delay) {
+				if (delay <= 0)
+					delay = 1;
+				base_delay = delay;
+			}
+		}
+
+		q->wd_expires = base_delay;
+	}
+}
+
+/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
+   they go overlimit
+ */
+
+static void cbq_ovl_rclassic(struct cbq_class *cl)
+{
+	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
+	struct cbq_class *this = cl;
+
+	do {
+		if (cl->level > q->toplevel) {
+			cl = NULL;
+			break;
+		}
+	} while ((cl = cl->borrow) != NULL);
+
+	if (cl == NULL)
+		cl = this;
+	cbq_ovl_classic(cl);
+}
+
+/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
+
+static void cbq_ovl_delay(struct cbq_class *cl)
+{
+	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
+	psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
+
+	if (!cl->delayed) {
+		unsigned long sched = jiffies;
+
+		delay += cl->offtime;
+		if (cl->avgidle < 0)
+			delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
+		if (cl->avgidle < cl->minidle)
+			cl->avgidle = cl->minidle;
+		PSCHED_TADD2(q->now, delay, cl->undertime);
+
+		if (delay > 0) {
+			sched += PSCHED_US2JIFFIE(delay) + cl->penalty;
+			cl->penalized = sched;
+			cl->cpriority = TC_CBQ_MAXPRIO;
+			q->pmask |= (1<<TC_CBQ_MAXPRIO);
+			if (del_timer(&q->delay_timer) &&
+			    (long)(q->delay_timer.expires - sched) > 0)
+				q->delay_timer.expires = sched;
+			add_timer(&q->delay_timer);
+			cl->delayed = 1;
+			cl->xstats.overactions++;
+			return;
+		}
+		delay = 1;
+	}
+	if (q->wd_expires == 0 || q->wd_expires > delay)
+		q->wd_expires = delay;
+}
+
+/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
+
+static void cbq_ovl_lowprio(struct cbq_class *cl)
+{
+	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
+
+	cl->penalized = jiffies + cl->penalty;
+
+	if (cl->cpriority != cl->priority2) {
+		cl->cpriority = cl->priority2;
+		q->pmask |= (1<<cl->cpriority);
+		cl->xstats.overactions++;
+	}
+	cbq_ovl_classic(cl);
+}
+
+/* TC_CBQ_OVL_DROP: penalize class by dropping */
+
+static void cbq_ovl_drop(struct cbq_class *cl)
+{
+	if (cl->q->ops->drop)
+		if (cl->q->ops->drop(cl->q))
+			cl->qdisc->q.qlen--;
+	cl->xstats.overactions++;
+	cbq_ovl_classic(cl);
+}
+
+static void cbq_watchdog(unsigned long arg)
+{
+	struct Qdisc *sch = (struct Qdisc*)arg;
+
+	sch->flags &= ~TCQ_F_THROTTLED;
+	netif_schedule(sch->dev);
+}
+
+static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
+{
+	struct cbq_class *cl;
+	struct cbq_class *cl_prev = q->active[prio];
+	unsigned long now = jiffies;
+	unsigned long sched = now;
+
+	if (cl_prev == NULL)
+		return now;
+
+	do {
+		cl = cl_prev->next_alive;
+		if ((long)(now - cl->penalized) > 0) {
+			cl_prev->next_alive = cl->next_alive;
+			cl->next_alive = NULL;
+			cl->cpriority = cl->priority;
+			cl->delayed = 0;
+			cbq_activate_class(cl);
+
+			if (cl == q->active[prio]) {
+				q->active[prio] = cl_prev;
+				if (cl == q->active[prio]) {
+					q->active[prio] = NULL;
+					return 0;
+				}
+			}
+
+			cl = cl_prev->next_alive;
+		} else if ((long)(sched - cl->penalized) > 0)
+			sched = cl->penalized;
+	} while ((cl_prev = cl) != q->active[prio]);
+
+	return (long)(sched - now);
+}
+
+static void cbq_undelay(unsigned long arg)
+{
+	struct Qdisc *sch = (struct Qdisc*)arg;
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	long delay = 0;
+	unsigned pmask;
+
+	pmask = q->pmask;
+	q->pmask = 0;
+
+	while (pmask) {
+		int prio = ffz(~pmask);
+		long tmp;
+
+		pmask &= ~(1<<prio);
+
+		tmp = cbq_undelay_prio(q, prio);
+		if (tmp > 0) {
+			q->pmask |= 1<<prio;
+			if (tmp < delay || delay == 0)
+				delay = tmp;
+		}
+	}
+
+	if (delay) {
+		q->delay_timer.expires = jiffies + delay;
+		add_timer(&q->delay_timer);
+	}
+
+	sch->flags &= ~TCQ_F_THROTTLED;
+	netif_schedule(sch->dev);
+}
+
+
+#ifdef CONFIG_NET_CLS_POLICE
+
+static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
+{
+	int len = skb->len;
+	struct Qdisc *sch = child->__parent;
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	struct cbq_class *cl = q->rx_class;
+
+	q->rx_class = NULL;
+
+	if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
+
+		cbq_mark_toplevel(q, cl);
+
+		q->rx_class = cl;
+		cl->q->__parent = sch;
+
+		if (cl->q->enqueue(skb, cl->q) == 0) {
+			sch->q.qlen++;
+			sch->bstats.packets++;
+			sch->bstats.bytes+=len;
+			if (!cl->next_alive)
+				cbq_activate_class(cl);
+			return 0;
+		}
+		sch->qstats.drops++;
+		return 0;
+	}
+
+	sch->qstats.drops++;
+	return -1;
+}
+#endif
+
+/* 
+   It is mission critical procedure.
+
+   We "regenerate" toplevel cutoff, if transmitting class
+   has backlog and it is not regulated. It is not part of
+   original CBQ description, but looks more reasonable.
+   Probably, it is wrong. This question needs further investigation.
+*/
+
+static __inline__ void
+cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
+		    struct cbq_class *borrowed)
+{
+	if (cl && q->toplevel >= borrowed->level) {
+		if (cl->q->q.qlen > 1) {
+			do {
+				if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) {
+					q->toplevel = borrowed->level;
+					return;
+				}
+			} while ((borrowed=borrowed->borrow) != NULL);
+		}
+#if 0	
+	/* It is not necessary now. Uncommenting it
+	   will save CPU cycles, but decrease fairness.
+	 */
+		q->toplevel = TC_CBQ_MAXLEVEL;
+#endif
+	}
+}
+
+static void
+cbq_update(struct cbq_sched_data *q)
+{
+	struct cbq_class *this = q->tx_class;
+	struct cbq_class *cl = this;
+	int len = q->tx_len;
+
+	q->tx_class = NULL;
+
+	for ( ; cl; cl = cl->share) {
+		long avgidle = cl->avgidle;
+		long idle;
+
+		cl->bstats.packets++;
+		cl->bstats.bytes += len;
+
+		/*
+		   (now - last) is total time between packet right edges.
+		   (last_pktlen/rate) is "virtual" busy time, so that
+
+		         idle = (now - last) - last_pktlen/rate
+		 */
+
+		idle = PSCHED_TDIFF(q->now, cl->last);
+		if ((unsigned long)idle > 128*1024*1024) {
+			avgidle = cl->maxidle;
+		} else {
+			idle -= L2T(cl, len);
+
+		/* true_avgidle := (1-W)*true_avgidle + W*idle,
+		   where W=2^{-ewma_log}. But cl->avgidle is scaled:
+		   cl->avgidle == true_avgidle/W,
+		   hence:
+		 */
+			avgidle += idle - (avgidle>>cl->ewma_log);
+		}
+
+		if (avgidle <= 0) {
+			/* Overlimit or at-limit */
+
+			if (avgidle < cl->minidle)
+				avgidle = cl->minidle;
+
+			cl->avgidle = avgidle;
+
+			/* Calculate expected time, when this class
+			   will be allowed to send.
+			   It will occur, when:
+			   (1-W)*true_avgidle + W*delay = 0, i.e.
+			   idle = (1/W - 1)*(-true_avgidle)
+			   or
+			   idle = (1 - W)*(-cl->avgidle);
+			 */
+			idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
+
+			/*
+			   That is not all.
+			   To maintain the rate allocated to the class,
+			   we add to undertime virtual clock,
+			   necessary to complete transmitted packet.
+			   (len/phys_bandwidth has been already passed
+			   to the moment of cbq_update)
+			 */
+
+			idle -= L2T(&q->link, len);
+			idle += L2T(cl, len);
+
+			PSCHED_AUDIT_TDIFF(idle);
+
+			PSCHED_TADD2(q->now, idle, cl->undertime);
+		} else {
+			/* Underlimit */
+
+			PSCHED_SET_PASTPERFECT(cl->undertime);
+			if (avgidle > cl->maxidle)
+				cl->avgidle = cl->maxidle;
+			else
+				cl->avgidle = avgidle;
+		}
+		cl->last = q->now;
+	}
+
+	cbq_update_toplevel(q, this, q->tx_borrowed);
+}
+
+static __inline__ struct cbq_class *
+cbq_under_limit(struct cbq_class *cl)
+{
+	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
+	struct cbq_class *this_cl = cl;
+
+	if (cl->tparent == NULL)
+		return cl;
+
+	if (PSCHED_IS_PASTPERFECT(cl->undertime) ||
+	    !PSCHED_TLESS(q->now, cl->undertime)) {
+		cl->delayed = 0;
+		return cl;
+	}
+
+	do {
+		/* It is very suspicious place. Now overlimit
+		   action is generated for not bounded classes
+		   only if link is completely congested.
+		   Though it is in agree with ancestor-only paradigm,
+		   it looks very stupid. Particularly,
+		   it means that this chunk of code will either
+		   never be called or result in strong amplification
+		   of burstiness. Dangerous, silly, and, however,
+		   no another solution exists.
+		 */
+		if ((cl = cl->borrow) == NULL) {
+			this_cl->qstats.overlimits++;
+			this_cl->overlimit(this_cl);
+			return NULL;
+		}
+		if (cl->level > q->toplevel)
+			return NULL;
+	} while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
+		 PSCHED_TLESS(q->now, cl->undertime));
+
+	cl->delayed = 0;
+	return cl;
+}
+
+static __inline__ struct sk_buff *
+cbq_dequeue_prio(struct Qdisc *sch, int prio)
+{
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	struct cbq_class *cl_tail, *cl_prev, *cl;
+	struct sk_buff *skb;
+	int deficit;
+
+	cl_tail = cl_prev = q->active[prio];
+	cl = cl_prev->next_alive;
+
+	do {
+		deficit = 0;
+
+		/* Start round */
+		do {
+			struct cbq_class *borrow = cl;
+
+			if (cl->q->q.qlen &&
+			    (borrow = cbq_under_limit(cl)) == NULL)
+				goto skip_class;
+
+			if (cl->deficit <= 0) {
+				/* Class exhausted its allotment per
+				   this round. Switch to the next one.
+				 */
+				deficit = 1;
+				cl->deficit += cl->quantum;
+				goto next_class;
+			}
+
+			skb = cl->q->dequeue(cl->q);
+
+			/* Class did not give us any skb :-(
+			   It could occur even if cl->q->q.qlen != 0 
+			   f.e. if cl->q == "tbf"
+			 */
+			if (skb == NULL)
+				goto skip_class;
+
+			cl->deficit -= skb->len;
+			q->tx_class = cl;
+			q->tx_borrowed = borrow;
+			if (borrow != cl) {
+#ifndef CBQ_XSTATS_BORROWS_BYTES
+				borrow->xstats.borrows++;
+				cl->xstats.borrows++;
+#else
+				borrow->xstats.borrows += skb->len;
+				cl->xstats.borrows += skb->len;
+#endif
+			}
+			q->tx_len = skb->len;
+
+			if (cl->deficit <= 0) {
+				q->active[prio] = cl;
+				cl = cl->next_alive;
+				cl->deficit += cl->quantum;
+			}
+			return skb;
+
+skip_class:
+			if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
+				/* Class is empty or penalized.
+				   Unlink it from active chain.
+				 */
+				cl_prev->next_alive = cl->next_alive;
+				cl->next_alive = NULL;
+
+				/* Did cl_tail point to it? */
+				if (cl == cl_tail) {
+					/* Repair it! */
+					cl_tail = cl_prev;
+
+					/* Was it the last class in this band? */
+					if (cl == cl_tail) {
+						/* Kill the band! */
+						q->active[prio] = NULL;
+						q->activemask &= ~(1<<prio);
+						if (cl->q->q.qlen)
+							cbq_activate_class(cl);
+						return NULL;
+					}
+
+					q->active[prio] = cl_tail;
+				}
+				if (cl->q->q.qlen)
+					cbq_activate_class(cl);
+
+				cl = cl_prev;
+			}
+
+next_class:
+			cl_prev = cl;
+			cl = cl->next_alive;
+		} while (cl_prev != cl_tail);
+	} while (deficit);
+
+	q->active[prio] = cl_prev;
+
+	return NULL;
+}
+
+static __inline__ struct sk_buff *
+cbq_dequeue_1(struct Qdisc *sch)
+{
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	struct sk_buff *skb;
+	unsigned activemask;
+
+	activemask = q->activemask&0xFF;
+	while (activemask) {
+		int prio = ffz(~activemask);
+		activemask &= ~(1<<prio);
+		skb = cbq_dequeue_prio(sch, prio);
+		if (skb)
+			return skb;
+	}
+	return NULL;
+}
+
+static struct sk_buff *
+cbq_dequeue(struct Qdisc *sch)
+{
+	struct sk_buff *skb;
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	psched_time_t now;
+	psched_tdiff_t incr;
+
+	PSCHED_GET_TIME(now);
+	incr = PSCHED_TDIFF(now, q->now_rt);
+
+	if (q->tx_class) {
+		psched_tdiff_t incr2;
+		/* Time integrator. We calculate EOS time
+		   by adding expected packet transmission time.
+		   If real time is greater, we warp artificial clock,
+		   so that:
+
+		   cbq_time = max(real_time, work);
+		 */
+		incr2 = L2T(&q->link, q->tx_len);
+		PSCHED_TADD(q->now, incr2);
+		cbq_update(q);
+		if ((incr -= incr2) < 0)
+			incr = 0;
+	}
+	PSCHED_TADD(q->now, incr);
+	q->now_rt = now;
+
+	for (;;) {
+		q->wd_expires = 0;
+
+		skb = cbq_dequeue_1(sch);
+		if (skb) {
+			sch->q.qlen--;
+			sch->flags &= ~TCQ_F_THROTTLED;
+			return skb;
+		}
+
+		/* All the classes are overlimit.
+
+		   It is possible, if:
+
+		   1. Scheduler is empty.
+		   2. Toplevel cutoff inhibited borrowing.
+		   3. Root class is overlimit.
+
+		   Reset 2d and 3d conditions and retry.
+
+		   Note, that NS and cbq-2.0 are buggy, peeking
+		   an arbitrary class is appropriate for ancestor-only
+		   sharing, but not for toplevel algorithm.
+
+		   Our version is better, but slower, because it requires
+		   two passes, but it is unavoidable with top-level sharing.
+		*/
+
+		if (q->toplevel == TC_CBQ_MAXLEVEL &&
+		    PSCHED_IS_PASTPERFECT(q->link.undertime))
+			break;
+
+		q->toplevel = TC_CBQ_MAXLEVEL;
+		PSCHED_SET_PASTPERFECT(q->link.undertime);
+	}
+
+	/* No packets in scheduler or nobody wants to give them to us :-(
+	   Sigh... start watchdog timer in the last case. */
+
+	if (sch->q.qlen) {
+		sch->qstats.overlimits++;
+		if (q->wd_expires) {
+			long delay = PSCHED_US2JIFFIE(q->wd_expires);
+			if (delay <= 0)
+				delay = 1;
+			mod_timer(&q->wd_timer, jiffies + delay);
+			sch->flags |= TCQ_F_THROTTLED;
+		}
+	}
+	return NULL;
+}
+
+/* CBQ class maintanance routines */
+
+static void cbq_adjust_levels(struct cbq_class *this)
+{
+	if (this == NULL)
+		return;
+
+	do {
+		int level = 0;
+		struct cbq_class *cl;
+
+		if ((cl = this->children) != NULL) {
+			do {
+				if (cl->level > level)
+					level = cl->level;
+			} while ((cl = cl->sibling) != this->children);
+		}
+		this->level = level+1;
+	} while ((this = this->tparent) != NULL);
+}
+
+static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
+{
+	struct cbq_class *cl;
+	unsigned h;
+
+	if (q->quanta[prio] == 0)
+		return;
+
+	for (h=0; h<16; h++) {
+		for (cl = q->classes[h]; cl; cl = cl->next) {
+			/* BUGGGG... Beware! This expression suffer of
+			   arithmetic overflows!
+			 */
+			if (cl->priority == prio) {
+				cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
+					q->quanta[prio];
+			}
+			if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
+				printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
+				cl->quantum = cl->qdisc->dev->mtu/2 + 1;
+			}
+		}
+	}
+}
+
+static void cbq_sync_defmap(struct cbq_class *cl)
+{
+	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
+	struct cbq_class *split = cl->split;
+	unsigned h;
+	int i;
+
+	if (split == NULL)
+		return;
+
+	for (i=0; i<=TC_PRIO_MAX; i++) {
+		if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
+			split->defaults[i] = NULL;
+	}
+
+	for (i=0; i<=TC_PRIO_MAX; i++) {
+		int level = split->level;
+
+		if (split->defaults[i])
+			continue;
+
+		for (h=0; h<16; h++) {
+			struct cbq_class *c;
+
+			for (c = q->classes[h]; c; c = c->next) {
+				if (c->split == split && c->level < level &&
+				    c->defmap&(1<<i)) {
+					split->defaults[i] = c;
+					level = c->level;
+				}
+			}
+		}
+	}
+}
+
+static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
+{
+	struct cbq_class *split = NULL;
+
+	if (splitid == 0) {
+		if ((split = cl->split) == NULL)
+			return;
+		splitid = split->classid;
+	}
+
+	if (split == NULL || split->classid != splitid) {
+		for (split = cl->tparent; split; split = split->tparent)
+			if (split->classid == splitid)
+				break;
+	}
+
+	if (split == NULL)
+		return;
+
+	if (cl->split != split) {
+		cl->defmap = 0;
+		cbq_sync_defmap(cl);
+		cl->split = split;
+		cl->defmap = def&mask;
+	} else
+		cl->defmap = (cl->defmap&~mask)|(def&mask);
+
+	cbq_sync_defmap(cl);
+}
+
+static void cbq_unlink_class(struct cbq_class *this)
+{
+	struct cbq_class *cl, **clp;
+	struct cbq_sched_data *q = qdisc_priv(this->qdisc);
+
+	for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
+		if (cl == this) {
+			*clp = cl->next;
+			cl->next = NULL;
+			break;
+		}
+	}
+
+	if (this->tparent) {
+		clp=&this->sibling;
+		cl = *clp;
+		do {
+			if (cl == this) {
+				*clp = cl->sibling;
+				break;
+			}
+			clp = &cl->sibling;
+		} while ((cl = *clp) != this->sibling);
+
+		if (this->tparent->children == this) {
+			this->tparent->children = this->sibling;
+			if (this->sibling == this)
+				this->tparent->children = NULL;
+		}
+	} else {
+		BUG_TRAP(this->sibling == this);
+	}
+}
+
+static void cbq_link_class(struct cbq_class *this)
+{
+	struct cbq_sched_data *q = qdisc_priv(this->qdisc);
+	unsigned h = cbq_hash(this->classid);
+	struct cbq_class *parent = this->tparent;
+
+	this->sibling = this;
+	this->next = q->classes[h];
+	q->classes[h] = this;
+
+	if (parent == NULL)
+		return;
+
+	if (parent->children == NULL) {
+		parent->children = this;
+	} else {
+		this->sibling = parent->children->sibling;
+		parent->children->sibling = this;
+	}
+}
+
+static unsigned int cbq_drop(struct Qdisc* sch)
+{
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	struct cbq_class *cl, *cl_head;
+	int prio;
+	unsigned int len;
+
+	for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
+		if ((cl_head = q->active[prio]) == NULL)
+			continue;
+
+		cl = cl_head;
+		do {
+			if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
+				sch->q.qlen--;
+				return len;
+			}
+		} while ((cl = cl->next_alive) != cl_head);
+	}
+	return 0;
+}
+
+static void
+cbq_reset(struct Qdisc* sch)
+{
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	struct cbq_class *cl;
+	int prio;
+	unsigned h;
+
+	q->activemask = 0;
+	q->pmask = 0;
+	q->tx_class = NULL;
+	q->tx_borrowed = NULL;
+	del_timer(&q->wd_timer);
+	del_timer(&q->delay_timer);
+	q->toplevel = TC_CBQ_MAXLEVEL;
+	PSCHED_GET_TIME(q->now);
+	q->now_rt = q->now;
+
+	for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
+		q->active[prio] = NULL;
+
+	for (h = 0; h < 16; h++) {
+		for (cl = q->classes[h]; cl; cl = cl->next) {
+			qdisc_reset(cl->q);
+
+			cl->next_alive = NULL;
+			PSCHED_SET_PASTPERFECT(cl->undertime);
+			cl->avgidle = cl->maxidle;
+			cl->deficit = cl->quantum;
+			cl->cpriority = cl->priority;
+		}
+	}
+	sch->q.qlen = 0;
+}
+
+
+static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
+{
+	if (lss->change&TCF_CBQ_LSS_FLAGS) {
+		cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
+		cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
+	}
+	if (lss->change&TCF_CBQ_LSS_EWMA)
+		cl->ewma_log = lss->ewma_log;
+	if (lss->change&TCF_CBQ_LSS_AVPKT)
+		cl->avpkt = lss->avpkt;
+	if (lss->change&TCF_CBQ_LSS_MINIDLE)
+		cl->minidle = -(long)lss->minidle;
+	if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
+		cl->maxidle = lss->maxidle;
+		cl->avgidle = lss->maxidle;
+	}
+	if (lss->change&TCF_CBQ_LSS_OFFTIME)
+		cl->offtime = lss->offtime;
+	return 0;
+}
+
+static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
+{
+	q->nclasses[cl->priority]--;
+	q->quanta[cl->priority] -= cl->weight;
+	cbq_normalize_quanta(q, cl->priority);
+}
+
+static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
+{
+	q->nclasses[cl->priority]++;
+	q->quanta[cl->priority] += cl->weight;
+	cbq_normalize_quanta(q, cl->priority);
+}
+
+static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
+{
+	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
+
+	if (wrr->allot)
+		cl->allot = wrr->allot;
+	if (wrr->weight)
+		cl->weight = wrr->weight;
+	if (wrr->priority) {
+		cl->priority = wrr->priority-1;
+		cl->cpriority = cl->priority;
+		if (cl->priority >= cl->priority2)
+			cl->priority2 = TC_CBQ_MAXPRIO-1;
+	}
+
+	cbq_addprio(q, cl);
+	return 0;
+}
+
+static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
+{
+	switch (ovl->strategy) {
+	case TC_CBQ_OVL_CLASSIC:
+		cl->overlimit = cbq_ovl_classic;
+		break;
+	case TC_CBQ_OVL_DELAY:
+		cl->overlimit = cbq_ovl_delay;
+		break;
+	case TC_CBQ_OVL_LOWPRIO:
+		if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
+		    ovl->priority2-1 <= cl->priority)
+			return -EINVAL;
+		cl->priority2 = ovl->priority2-1;
+		cl->overlimit = cbq_ovl_lowprio;
+		break;
+	case TC_CBQ_OVL_DROP:
+		cl->overlimit = cbq_ovl_drop;
+		break;
+	case TC_CBQ_OVL_RCLASSIC:
+		cl->overlimit = cbq_ovl_rclassic;
+		break;
+	default:
+		return -EINVAL;
+	}
+	cl->penalty = (ovl->penalty*HZ)/1000;
+	return 0;
+}
+
+#ifdef CONFIG_NET_CLS_POLICE
+static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
+{
+	cl->police = p->police;
+
+	if (cl->q->handle) {
+		if (p->police == TC_POLICE_RECLASSIFY)
+			cl->q->reshape_fail = cbq_reshape_fail;
+		else
+			cl->q->reshape_fail = NULL;
+	}
+	return 0;
+}
+#endif
+
+static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
+{
+	cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
+	return 0;
+}
+
+static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
+{
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	struct rtattr *tb[TCA_CBQ_MAX];
+	struct tc_ratespec *r;
+
+	if (rtattr_parse_nested(tb, TCA_CBQ_MAX, opt) < 0 ||
+	    tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
+	    RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
+		return -EINVAL;
+
+	if (tb[TCA_CBQ_LSSOPT-1] &&
+	    RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
+		return -EINVAL;
+
+	r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
+
+	if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
+		return -EINVAL;
+
+	q->link.refcnt = 1;
+	q->link.sibling = &q->link;
+	q->link.classid = sch->handle;
+	q->link.qdisc = sch;
+	if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
+		q->link.q = &noop_qdisc;
+
+	q->link.priority = TC_CBQ_MAXPRIO-1;
+	q->link.priority2 = TC_CBQ_MAXPRIO-1;
+	q->link.cpriority = TC_CBQ_MAXPRIO-1;
+	q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
+	q->link.overlimit = cbq_ovl_classic;
+	q->link.allot = psched_mtu(sch->dev);
+	q->link.quantum = q->link.allot;
+	q->link.weight = q->link.R_tab->rate.rate;
+
+	q->link.ewma_log = TC_CBQ_DEF_EWMA;
+	q->link.avpkt = q->link.allot/2;
+	q->link.minidle = -0x7FFFFFFF;
+	q->link.stats_lock = &sch->dev->queue_lock;
+
+	init_timer(&q->wd_timer);
+	q->wd_timer.data = (unsigned long)sch;
+	q->wd_timer.function = cbq_watchdog;
+	init_timer(&q->delay_timer);
+	q->delay_timer.data = (unsigned long)sch;
+	q->delay_timer.function = cbq_undelay;
+	q->toplevel = TC_CBQ_MAXLEVEL;
+	PSCHED_GET_TIME(q->now);
+	q->now_rt = q->now;
+
+	cbq_link_class(&q->link);
+
+	if (tb[TCA_CBQ_LSSOPT-1])
+		cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
+
+	cbq_addprio(q, &q->link);
+	return 0;
+}
+
+static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
+{
+	unsigned char	 *b = skb->tail;
+
+	RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
+{
+	unsigned char	 *b = skb->tail;
+	struct tc_cbq_lssopt opt;
+
+	opt.flags = 0;
+	if (cl->borrow == NULL)
+		opt.flags |= TCF_CBQ_LSS_BOUNDED;
+	if (cl->share == NULL)
+		opt.flags |= TCF_CBQ_LSS_ISOLATED;
+	opt.ewma_log = cl->ewma_log;
+	opt.level = cl->level;
+	opt.avpkt = cl->avpkt;
+	opt.maxidle = cl->maxidle;
+	opt.minidle = (u32)(-cl->minidle);
+	opt.offtime = cl->offtime;
+	opt.change = ~0;
+	RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
+{
+	unsigned char	 *b = skb->tail;
+	struct tc_cbq_wrropt opt;
+
+	opt.flags = 0;
+	opt.allot = cl->allot;
+	opt.priority = cl->priority+1;
+	opt.cpriority = cl->cpriority+1;
+	opt.weight = cl->weight;
+	RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
+{
+	unsigned char	 *b = skb->tail;
+	struct tc_cbq_ovl opt;
+
+	opt.strategy = cl->ovl_strategy;
+	opt.priority2 = cl->priority2+1;
+	opt.penalty = (cl->penalty*1000)/HZ;
+	RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
+{
+	unsigned char	 *b = skb->tail;
+	struct tc_cbq_fopt opt;
+
+	if (cl->split || cl->defmap) {
+		opt.split = cl->split ? cl->split->classid : 0;
+		opt.defmap = cl->defmap;
+		opt.defchange = ~0;
+		RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
+	}
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+#ifdef CONFIG_NET_CLS_POLICE
+static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
+{
+	unsigned char	 *b = skb->tail;
+	struct tc_cbq_police opt;
+
+	if (cl->police) {
+		opt.police = cl->police;
+		RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
+	}
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+#endif
+
+static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
+{
+	if (cbq_dump_lss(skb, cl) < 0 ||
+	    cbq_dump_rate(skb, cl) < 0 ||
+	    cbq_dump_wrr(skb, cl) < 0 ||
+	    cbq_dump_ovl(skb, cl) < 0 ||
+#ifdef CONFIG_NET_CLS_POLICE
+	    cbq_dump_police(skb, cl) < 0 ||
+#endif
+	    cbq_dump_fopt(skb, cl) < 0)
+		return -1;
+	return 0;
+}
+
+static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	unsigned char	 *b = skb->tail;
+	struct rtattr *rta;
+
+	rta = (struct rtattr*)b;
+	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	if (cbq_dump_attr(skb, &q->link) < 0)
+		goto rtattr_failure;
+	rta->rta_len = skb->tail - b;
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int
+cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
+{
+	struct cbq_sched_data *q = qdisc_priv(sch);
+
+	q->link.xstats.avgidle = q->link.avgidle;
+	return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
+}
+
+static int
+cbq_dump_class(struct Qdisc *sch, unsigned long arg,
+	       struct sk_buff *skb, struct tcmsg *tcm)
+{
+	struct cbq_class *cl = (struct cbq_class*)arg;
+	unsigned char	 *b = skb->tail;
+	struct rtattr *rta;
+
+	if (cl->tparent)
+		tcm->tcm_parent = cl->tparent->classid;
+	else
+		tcm->tcm_parent = TC_H_ROOT;
+	tcm->tcm_handle = cl->classid;
+	tcm->tcm_info = cl->q->handle;
+
+	rta = (struct rtattr*)b;
+	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	if (cbq_dump_attr(skb, cl) < 0)
+		goto rtattr_failure;
+	rta->rta_len = skb->tail - b;
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int
+cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
+	struct gnet_dump *d)
+{
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	struct cbq_class *cl = (struct cbq_class*)arg;
+
+	cl->qstats.qlen = cl->q->q.qlen;
+	cl->xstats.avgidle = cl->avgidle;
+	cl->xstats.undertime = 0;
+
+	if (!PSCHED_IS_PASTPERFECT(cl->undertime))
+		cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
+
+	if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
+#ifdef CONFIG_NET_ESTIMATOR
+	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
+#endif
+	    gnet_stats_copy_queue(d, &cl->qstats) < 0)
+		return -1;
+
+	return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
+}
+
+static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
+		     struct Qdisc **old)
+{
+	struct cbq_class *cl = (struct cbq_class*)arg;
+
+	if (cl) {
+		if (new == NULL) {
+			if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)) == NULL)
+				return -ENOBUFS;
+		} else {
+#ifdef CONFIG_NET_CLS_POLICE
+			if (cl->police == TC_POLICE_RECLASSIFY)
+				new->reshape_fail = cbq_reshape_fail;
+#endif
+		}
+		sch_tree_lock(sch);
+		*old = cl->q;
+		cl->q = new;
+		sch->q.qlen -= (*old)->q.qlen;
+		qdisc_reset(*old);
+		sch_tree_unlock(sch);
+
+		return 0;
+	}
+	return -ENOENT;
+}
+
+static struct Qdisc *
+cbq_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	struct cbq_class *cl = (struct cbq_class*)arg;
+
+	return cl ? cl->q : NULL;
+}
+
+static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
+{
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	struct cbq_class *cl = cbq_class_lookup(q, classid);
+
+	if (cl) {
+		cl->refcnt++;
+		return (unsigned long)cl;
+	}
+	return 0;
+}
+
+static void cbq_destroy_filters(struct cbq_class *cl)
+{
+	struct tcf_proto *tp;
+
+	while ((tp = cl->filter_list) != NULL) {
+		cl->filter_list = tp->next;
+		tcf_destroy(tp);
+	}
+}
+
+static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
+{
+	struct cbq_sched_data *q = qdisc_priv(sch);
+
+	BUG_TRAP(!cl->filters);
+
+	cbq_destroy_filters(cl);
+	qdisc_destroy(cl->q);
+	qdisc_put_rtab(cl->R_tab);
+#ifdef CONFIG_NET_ESTIMATOR
+	gen_kill_estimator(&cl->bstats, &cl->rate_est);
+#endif
+	if (cl != &q->link)
+		kfree(cl);
+}
+
+static void
+cbq_destroy(struct Qdisc* sch)
+{
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	struct cbq_class *cl;
+	unsigned h;
+
+#ifdef CONFIG_NET_CLS_POLICE
+	q->rx_class = NULL;
+#endif
+	/*
+	 * Filters must be destroyed first because we don't destroy the
+	 * classes from root to leafs which means that filters can still
+	 * be bound to classes which have been destroyed already. --TGR '04
+	 */
+	for (h = 0; h < 16; h++)
+		for (cl = q->classes[h]; cl; cl = cl->next)
+			cbq_destroy_filters(cl);
+
+	for (h = 0; h < 16; h++) {
+		struct cbq_class *next;
+
+		for (cl = q->classes[h]; cl; cl = next) {
+			next = cl->next;
+			cbq_destroy_class(sch, cl);
+		}
+	}
+}
+
+static void cbq_put(struct Qdisc *sch, unsigned long arg)
+{
+	struct cbq_class *cl = (struct cbq_class*)arg;
+
+	if (--cl->refcnt == 0) {
+#ifdef CONFIG_NET_CLS_POLICE
+		struct cbq_sched_data *q = qdisc_priv(sch);
+
+		spin_lock_bh(&sch->dev->queue_lock);
+		if (q->rx_class == cl)
+			q->rx_class = NULL;
+		spin_unlock_bh(&sch->dev->queue_lock);
+#endif
+
+		cbq_destroy_class(sch, cl);
+	}
+}
+
+static int
+cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
+		 unsigned long *arg)
+{
+	int err;
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	struct cbq_class *cl = (struct cbq_class*)*arg;
+	struct rtattr *opt = tca[TCA_OPTIONS-1];
+	struct rtattr *tb[TCA_CBQ_MAX];
+	struct cbq_class *parent;
+	struct qdisc_rate_table *rtab = NULL;
+
+	if (opt==NULL || rtattr_parse_nested(tb, TCA_CBQ_MAX, opt))
+		return -EINVAL;
+
+	if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
+	    RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
+		return -EINVAL;
+
+	if (tb[TCA_CBQ_FOPT-1] &&
+	    RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
+		return -EINVAL;
+
+	if (tb[TCA_CBQ_RATE-1] &&
+	    RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
+			return -EINVAL;
+
+	if (tb[TCA_CBQ_LSSOPT-1] &&
+	    RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
+			return -EINVAL;
+
+	if (tb[TCA_CBQ_WRROPT-1] &&
+	    RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
+			return -EINVAL;
+
+#ifdef CONFIG_NET_CLS_POLICE
+	if (tb[TCA_CBQ_POLICE-1] &&
+	    RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
+			return -EINVAL;
+#endif
+
+	if (cl) {
+		/* Check parent */
+		if (parentid) {
+			if (cl->tparent && cl->tparent->classid != parentid)
+				return -EINVAL;
+			if (!cl->tparent && parentid != TC_H_ROOT)
+				return -EINVAL;
+		}
+
+		if (tb[TCA_CBQ_RATE-1]) {
+			rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
+			if (rtab == NULL)
+				return -EINVAL;
+		}
+
+		/* Change class parameters */
+		sch_tree_lock(sch);
+
+		if (cl->next_alive != NULL)
+			cbq_deactivate_class(cl);
+
+		if (rtab) {
+			rtab = xchg(&cl->R_tab, rtab);
+			qdisc_put_rtab(rtab);
+		}
+
+		if (tb[TCA_CBQ_LSSOPT-1])
+			cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
+
+		if (tb[TCA_CBQ_WRROPT-1]) {
+			cbq_rmprio(q, cl);
+			cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
+		}
+
+		if (tb[TCA_CBQ_OVL_STRATEGY-1])
+			cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
+
+#ifdef CONFIG_NET_CLS_POLICE
+		if (tb[TCA_CBQ_POLICE-1])
+			cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
+#endif
+
+		if (tb[TCA_CBQ_FOPT-1])
+			cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
+
+		if (cl->q->q.qlen)
+			cbq_activate_class(cl);
+
+		sch_tree_unlock(sch);
+
+#ifdef CONFIG_NET_ESTIMATOR
+		if (tca[TCA_RATE-1])
+			gen_replace_estimator(&cl->bstats, &cl->rate_est,
+				cl->stats_lock, tca[TCA_RATE-1]);
+#endif
+		return 0;
+	}
+
+	if (parentid == TC_H_ROOT)
+		return -EINVAL;
+
+	if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
+	    tb[TCA_CBQ_LSSOPT-1] == NULL)
+		return -EINVAL;
+
+	rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
+	if (rtab == NULL)
+		return -EINVAL;
+
+	if (classid) {
+		err = -EINVAL;
+		if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
+			goto failure;
+	} else {
+		int i;
+		classid = TC_H_MAKE(sch->handle,0x8000);
+
+		for (i=0; i<0x8000; i++) {
+			if (++q->hgenerator >= 0x8000)
+				q->hgenerator = 1;
+			if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
+				break;
+		}
+		err = -ENOSR;
+		if (i >= 0x8000)
+			goto failure;
+		classid = classid|q->hgenerator;
+	}
+
+	parent = &q->link;
+	if (parentid) {
+		parent = cbq_class_lookup(q, parentid);
+		err = -EINVAL;
+		if (parent == NULL)
+			goto failure;
+	}
+
+	err = -ENOBUFS;
+	cl = kmalloc(sizeof(*cl), GFP_KERNEL);
+	if (cl == NULL)
+		goto failure;
+	memset(cl, 0, sizeof(*cl));
+	cl->R_tab = rtab;
+	rtab = NULL;
+	cl->refcnt = 1;
+	if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
+		cl->q = &noop_qdisc;
+	cl->classid = classid;
+	cl->tparent = parent;
+	cl->qdisc = sch;
+	cl->allot = parent->allot;
+	cl->quantum = cl->allot;
+	cl->weight = cl->R_tab->rate.rate;
+	cl->stats_lock = &sch->dev->queue_lock;
+
+	sch_tree_lock(sch);
+	cbq_link_class(cl);
+	cl->borrow = cl->tparent;
+	if (cl->tparent != &q->link)
+		cl->share = cl->tparent;
+	cbq_adjust_levels(parent);
+	cl->minidle = -0x7FFFFFFF;
+	cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
+	cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
+	if (cl->ewma_log==0)
+		cl->ewma_log = q->link.ewma_log;
+	if (cl->maxidle==0)
+		cl->maxidle = q->link.maxidle;
+	if (cl->avpkt==0)
+		cl->avpkt = q->link.avpkt;
+	cl->overlimit = cbq_ovl_classic;
+	if (tb[TCA_CBQ_OVL_STRATEGY-1])
+		cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
+#ifdef CONFIG_NET_CLS_POLICE
+	if (tb[TCA_CBQ_POLICE-1])
+		cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
+#endif
+	if (tb[TCA_CBQ_FOPT-1])
+		cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
+	sch_tree_unlock(sch);
+
+#ifdef CONFIG_NET_ESTIMATOR
+	if (tca[TCA_RATE-1])
+		gen_new_estimator(&cl->bstats, &cl->rate_est,
+			cl->stats_lock, tca[TCA_RATE-1]);
+#endif
+
+	*arg = (unsigned long)cl;
+	return 0;
+
+failure:
+	qdisc_put_rtab(rtab);
+	return err;
+}
+
+static int cbq_delete(struct Qdisc *sch, unsigned long arg)
+{
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	struct cbq_class *cl = (struct cbq_class*)arg;
+
+	if (cl->filters || cl->children || cl == &q->link)
+		return -EBUSY;
+
+	sch_tree_lock(sch);
+
+	if (cl->next_alive)
+		cbq_deactivate_class(cl);
+
+	if (q->tx_borrowed == cl)
+		q->tx_borrowed = q->tx_class;
+	if (q->tx_class == cl) {
+		q->tx_class = NULL;
+		q->tx_borrowed = NULL;
+	}
+#ifdef CONFIG_NET_CLS_POLICE
+	if (q->rx_class == cl)
+		q->rx_class = NULL;
+#endif
+
+	cbq_unlink_class(cl);
+	cbq_adjust_levels(cl->tparent);
+	cl->defmap = 0;
+	cbq_sync_defmap(cl);
+
+	cbq_rmprio(q, cl);
+	sch_tree_unlock(sch);
+
+	if (--cl->refcnt == 0)
+		cbq_destroy_class(sch, cl);
+
+	return 0;
+}
+
+static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
+{
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	struct cbq_class *cl = (struct cbq_class *)arg;
+
+	if (cl == NULL)
+		cl = &q->link;
+
+	return &cl->filter_list;
+}
+
+static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
+				     u32 classid)
+{
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	struct cbq_class *p = (struct cbq_class*)parent;
+	struct cbq_class *cl = cbq_class_lookup(q, classid);
+
+	if (cl) {
+		if (p && p->level <= cl->level)
+			return 0;
+		cl->filters++;
+		return (unsigned long)cl;
+	}
+	return 0;
+}
+
+static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
+{
+	struct cbq_class *cl = (struct cbq_class*)arg;
+
+	cl->filters--;
+}
+
+static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	unsigned h;
+
+	if (arg->stop)
+		return;
+
+	for (h = 0; h < 16; h++) {
+		struct cbq_class *cl;
+
+		for (cl = q->classes[h]; cl; cl = cl->next) {
+			if (arg->count < arg->skip) {
+				arg->count++;
+				continue;
+			}
+			if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
+				arg->stop = 1;
+				return;
+			}
+			arg->count++;
+		}
+	}
+}
+
+static struct Qdisc_class_ops cbq_class_ops = {
+	.graft		=	cbq_graft,
+	.leaf		=	cbq_leaf,
+	.get		=	cbq_get,
+	.put		=	cbq_put,
+	.change		=	cbq_change_class,
+	.delete		=	cbq_delete,
+	.walk		=	cbq_walk,
+	.tcf_chain	=	cbq_find_tcf,
+	.bind_tcf	=	cbq_bind_filter,
+	.unbind_tcf	=	cbq_unbind_filter,
+	.dump		=	cbq_dump_class,
+	.dump_stats	=	cbq_dump_class_stats,
+};
+
+static struct Qdisc_ops cbq_qdisc_ops = {
+	.next		=	NULL,
+	.cl_ops		=	&cbq_class_ops,
+	.id		=	"cbq",
+	.priv_size	=	sizeof(struct cbq_sched_data),
+	.enqueue	=	cbq_enqueue,
+	.dequeue	=	cbq_dequeue,
+	.requeue	=	cbq_requeue,
+	.drop		=	cbq_drop,
+	.init		=	cbq_init,
+	.reset		=	cbq_reset,
+	.destroy	=	cbq_destroy,
+	.change		=	NULL,
+	.dump		=	cbq_dump,
+	.dump_stats	=	cbq_dump_stats,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init cbq_module_init(void)
+{
+	return register_qdisc(&cbq_qdisc_ops);
+}
+static void __exit cbq_module_exit(void) 
+{
+	unregister_qdisc(&cbq_qdisc_ops);
+}
+module_init(cbq_module_init)
+module_exit(cbq_module_exit)
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
new file mode 100644
index 00000000000..8a3db9d95ba
--- /dev/null
+++ b/net/sched/sch_dsmark.c
@@ -0,0 +1,479 @@
+/* net/sched/sch_dsmark.c - Differentiated Services field marker */
+
+/* Written 1998-2000 by Werner Almesberger, EPFL ICA */
+
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h> /* for pkt_sched */
+#include <linux/rtnetlink.h>
+#include <net/pkt_sched.h>
+#include <net/dsfield.h>
+#include <net/inet_ecn.h>
+#include <asm/byteorder.h>
+
+
+#if 1 /* control */
+#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
+#else
+#define DPRINTK(format,args...)
+#endif
+
+#if 0 /* data */
+#define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
+#else
+#define D2PRINTK(format,args...)
+#endif
+
+
+#define PRIV(sch) qdisc_priv(sch)
+
+
+/*
+ * classid	class		marking
+ * -------	-----		-------
+ *   n/a	  0		n/a
+ *   x:0	  1		use entry [0]
+ *   ...	 ...		...
+ *   x:y y>0	 y+1		use entry [y]
+ *   ...	 ...		...
+ * x:indices-1	indices		use entry [indices-1]
+ *   ...	 ...		...
+ *   x:y	 y+1		use entry [y & (indices-1)]
+ *   ...	 ...		...
+ * 0xffff	0x10000		use entry [indices-1]
+ */
+
+
+#define NO_DEFAULT_INDEX	(1 << 16)
+
+struct dsmark_qdisc_data {
+	struct Qdisc		*q;
+	struct tcf_proto	*filter_list;
+	__u8			*mask;	/* "owns" the array */
+	__u8			*value;
+	__u16			indices;
+	__u32			default_index;	/* index range is 0...0xffff */
+	int			set_tc_index;
+};
+
+
+/* ------------------------- Class/flow operations ------------------------- */
+
+
+static int dsmark_graft(struct Qdisc *sch,unsigned long arg,
+    struct Qdisc *new,struct Qdisc **old)
+{
+	struct dsmark_qdisc_data *p = PRIV(sch);
+
+	DPRINTK("dsmark_graft(sch %p,[qdisc %p],new %p,old %p)\n",sch,p,new,
+	    old);
+	if (!new)
+		new = &noop_qdisc;
+	sch_tree_lock(sch);
+	*old = xchg(&p->q,new);
+	if (*old)
+		qdisc_reset(*old);
+	sch->q.qlen = 0;
+	sch_tree_unlock(sch); /* @@@ move up ? */
+        return 0;
+}
+
+
+static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	struct dsmark_qdisc_data *p = PRIV(sch);
+
+	return p->q;
+}
+
+
+static unsigned long dsmark_get(struct Qdisc *sch,u32 classid)
+{
+	struct dsmark_qdisc_data *p __attribute__((unused)) = PRIV(sch);
+
+	DPRINTK("dsmark_get(sch %p,[qdisc %p],classid %x)\n",sch,p,classid);
+	return TC_H_MIN(classid)+1;
+}
+
+
+static unsigned long dsmark_bind_filter(struct Qdisc *sch,
+    unsigned long parent, u32 classid)
+{
+	return dsmark_get(sch,classid);
+}
+
+
+static void dsmark_put(struct Qdisc *sch, unsigned long cl)
+{
+}
+
+
+static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
+    struct rtattr **tca, unsigned long *arg)
+{
+	struct dsmark_qdisc_data *p = PRIV(sch);
+	struct rtattr *opt = tca[TCA_OPTIONS-1];
+	struct rtattr *tb[TCA_DSMARK_MAX];
+
+	DPRINTK("dsmark_change(sch %p,[qdisc %p],classid %x,parent %x),"
+	    "arg 0x%lx\n",sch,p,classid,parent,*arg);
+	if (*arg > p->indices)
+		return -ENOENT;
+	if (!opt || rtattr_parse_nested(tb, TCA_DSMARK_MAX, opt))
+		return -EINVAL;
+	if (tb[TCA_DSMARK_MASK-1]) {
+		if (!RTA_PAYLOAD(tb[TCA_DSMARK_MASK-1]))
+			return -EINVAL;
+		p->mask[*arg-1] = *(__u8 *) RTA_DATA(tb[TCA_DSMARK_MASK-1]);
+	}
+	if (tb[TCA_DSMARK_VALUE-1]) {
+		if (!RTA_PAYLOAD(tb[TCA_DSMARK_VALUE-1]))
+			return -EINVAL;
+		p->value[*arg-1] = *(__u8 *) RTA_DATA(tb[TCA_DSMARK_VALUE-1]);
+	}
+	return 0;
+}
+
+
+static int dsmark_delete(struct Qdisc *sch,unsigned long arg)
+{
+	struct dsmark_qdisc_data *p = PRIV(sch);
+
+	if (!arg || arg > p->indices)
+		return -EINVAL;
+	p->mask[arg-1] = 0xff;
+	p->value[arg-1] = 0;
+	return 0;
+}
+
+
+static void dsmark_walk(struct Qdisc *sch,struct qdisc_walker *walker)
+{
+	struct dsmark_qdisc_data *p = PRIV(sch);
+	int i;
+
+	DPRINTK("dsmark_walk(sch %p,[qdisc %p],walker %p)\n",sch,p,walker);
+	if (walker->stop)
+		return;
+	for (i = 0; i < p->indices; i++) {
+		if (p->mask[i] == 0xff && !p->value[i])
+			continue;
+		if (walker->count >= walker->skip) {
+			if (walker->fn(sch, i+1, walker) < 0) {
+				walker->stop = 1;
+				break;
+			}
+		}
+                walker->count++;
+        }
+}
+
+
+static struct tcf_proto **dsmark_find_tcf(struct Qdisc *sch,unsigned long cl)
+{
+	struct dsmark_qdisc_data *p = PRIV(sch);
+
+	return &p->filter_list;
+}
+
+
+/* --------------------------- Qdisc operations ---------------------------- */
+
+
+static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
+{
+	struct dsmark_qdisc_data *p = PRIV(sch);
+	struct tcf_result res;
+	int result;
+	int ret = NET_XMIT_POLICED;
+
+	D2PRINTK("dsmark_enqueue(skb %p,sch %p,[qdisc %p])\n",skb,sch,p);
+	if (p->set_tc_index) {
+		/* FIXME: Safe with non-linear skbs? --RR */
+		switch (skb->protocol) {
+			case __constant_htons(ETH_P_IP):
+				skb->tc_index = ipv4_get_dsfield(skb->nh.iph)
+					& ~INET_ECN_MASK;
+				break;
+			case __constant_htons(ETH_P_IPV6):
+				skb->tc_index = ipv6_get_dsfield(skb->nh.ipv6h)
+					& ~INET_ECN_MASK;
+				break;
+			default:
+				skb->tc_index = 0;
+				break;
+		};
+	}
+	result = TC_POLICE_OK; /* be nice to gcc */
+	if (TC_H_MAJ(skb->priority) == sch->handle) {
+		skb->tc_index = TC_H_MIN(skb->priority);
+	} else {
+		result = tc_classify(skb,p->filter_list,&res);
+		D2PRINTK("result %d class 0x%04x\n",result,res.classid);
+		switch (result) {
+#ifdef CONFIG_NET_CLS_POLICE
+			case TC_POLICE_SHOT:
+				kfree_skb(skb);
+				break;
+#if 0
+			case TC_POLICE_RECLASSIFY:
+				/* FIXME: what to do here ??? */
+#endif
+#endif
+			case TC_POLICE_OK:
+				skb->tc_index = TC_H_MIN(res.classid);
+				break;
+			case TC_POLICE_UNSPEC:
+				/* fall through */
+			default:
+				if (p->default_index != NO_DEFAULT_INDEX)
+					skb->tc_index = p->default_index;
+				break;
+		};
+	}
+	if (
+#ifdef CONFIG_NET_CLS_POLICE
+	    result == TC_POLICE_SHOT ||
+#endif
+
+	    ((ret = p->q->enqueue(skb,p->q)) != 0)) {
+		sch->qstats.drops++;
+		return ret;
+	}
+	sch->bstats.bytes += skb->len;
+	sch->bstats.packets++;
+	sch->q.qlen++;
+	return ret;
+}
+
+
+static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
+{
+	struct dsmark_qdisc_data *p = PRIV(sch);
+	struct sk_buff *skb;
+	int index;
+
+	D2PRINTK("dsmark_dequeue(sch %p,[qdisc %p])\n",sch,p);
+	skb = p->q->ops->dequeue(p->q);
+	if (!skb)
+		return NULL;
+	sch->q.qlen--;
+	index = skb->tc_index & (p->indices-1);
+	D2PRINTK("index %d->%d\n",skb->tc_index,index);
+	switch (skb->protocol) {
+		case __constant_htons(ETH_P_IP):
+			ipv4_change_dsfield(skb->nh.iph,
+			    p->mask[index],p->value[index]);
+			break;
+		case __constant_htons(ETH_P_IPV6):
+			ipv6_change_dsfield(skb->nh.ipv6h,
+			    p->mask[index],p->value[index]);
+			break;
+		default:
+			/*
+			 * Only complain if a change was actually attempted.
+			 * This way, we can send non-IP traffic through dsmark
+			 * and don't need yet another qdisc as a bypass.
+			 */
+			if (p->mask[index] != 0xff || p->value[index])
+				printk(KERN_WARNING "dsmark_dequeue: "
+				       "unsupported protocol %d\n",
+				       htons(skb->protocol));
+			break;
+	};
+	return skb;
+}
+
+
+static int dsmark_requeue(struct sk_buff *skb,struct Qdisc *sch)
+{
+	int ret;
+	struct dsmark_qdisc_data *p = PRIV(sch);
+
+	D2PRINTK("dsmark_requeue(skb %p,sch %p,[qdisc %p])\n",skb,sch,p);
+        if ((ret = p->q->ops->requeue(skb, p->q)) == 0) {
+		sch->q.qlen++;
+		sch->qstats.requeues++;
+		return 0;
+	}
+	sch->qstats.drops++;
+	return ret;
+}
+
+
+static unsigned int dsmark_drop(struct Qdisc *sch)
+{
+	struct dsmark_qdisc_data *p = PRIV(sch);
+	unsigned int len;
+	
+	DPRINTK("dsmark_reset(sch %p,[qdisc %p])\n",sch,p);
+	if (!p->q->ops->drop)
+		return 0;
+	if (!(len = p->q->ops->drop(p->q)))
+		return 0;
+	sch->q.qlen--;
+	return len;
+}
+
+
+static int dsmark_init(struct Qdisc *sch,struct rtattr *opt)
+{
+	struct dsmark_qdisc_data *p = PRIV(sch);
+	struct rtattr *tb[TCA_DSMARK_MAX];
+	__u16 tmp;
+
+	DPRINTK("dsmark_init(sch %p,[qdisc %p],opt %p)\n",sch,p,opt);
+	if (!opt ||
+	    rtattr_parse(tb,TCA_DSMARK_MAX,RTA_DATA(opt),RTA_PAYLOAD(opt)) < 0 ||
+	    !tb[TCA_DSMARK_INDICES-1] ||
+	    RTA_PAYLOAD(tb[TCA_DSMARK_INDICES-1]) < sizeof(__u16))
+                return -EINVAL;
+	p->indices = *(__u16 *) RTA_DATA(tb[TCA_DSMARK_INDICES-1]);
+	if (!p->indices)
+		return -EINVAL;
+	for (tmp = p->indices; tmp != 1; tmp >>= 1) {
+		if (tmp & 1)
+			return -EINVAL;
+	}
+	p->default_index = NO_DEFAULT_INDEX;
+	if (tb[TCA_DSMARK_DEFAULT_INDEX-1]) {
+		if (RTA_PAYLOAD(tb[TCA_DSMARK_DEFAULT_INDEX-1]) < sizeof(__u16))
+			return -EINVAL;
+		p->default_index =
+		    *(__u16 *) RTA_DATA(tb[TCA_DSMARK_DEFAULT_INDEX-1]);
+	}
+	p->set_tc_index = !!tb[TCA_DSMARK_SET_TC_INDEX-1];
+	p->mask = kmalloc(p->indices*2,GFP_KERNEL);
+	if (!p->mask)
+		return -ENOMEM;
+	p->value = p->mask+p->indices;
+	memset(p->mask,0xff,p->indices);
+	memset(p->value,0,p->indices);
+	if (!(p->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
+		p->q = &noop_qdisc;
+	DPRINTK("dsmark_init: qdisc %p\n",&p->q);
+	return 0;
+}
+
+
+static void dsmark_reset(struct Qdisc *sch)
+{
+	struct dsmark_qdisc_data *p = PRIV(sch);
+
+	DPRINTK("dsmark_reset(sch %p,[qdisc %p])\n",sch,p);
+	qdisc_reset(p->q);
+	sch->q.qlen = 0;
+}
+
+
+static void dsmark_destroy(struct Qdisc *sch)
+{
+	struct dsmark_qdisc_data *p = PRIV(sch);
+	struct tcf_proto *tp;
+
+	DPRINTK("dsmark_destroy(sch %p,[qdisc %p])\n",sch,p);
+	while (p->filter_list) {
+		tp = p->filter_list;
+		p->filter_list = tp->next;
+		tcf_destroy(tp);
+	}
+	qdisc_destroy(p->q);
+	kfree(p->mask);
+}
+
+
+static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
+    struct sk_buff *skb, struct tcmsg *tcm)
+{
+	struct dsmark_qdisc_data *p = PRIV(sch);
+	unsigned char *b = skb->tail;
+	struct rtattr *rta;
+
+	DPRINTK("dsmark_dump_class(sch %p,[qdisc %p],class %ld\n",sch,p,cl);
+	if (!cl || cl > p->indices)
+		return -EINVAL;
+	tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle),cl-1);
+	rta = (struct rtattr *) b;
+	RTA_PUT(skb,TCA_OPTIONS,0,NULL);
+	RTA_PUT(skb,TCA_DSMARK_MASK,1,&p->mask[cl-1]);
+	RTA_PUT(skb,TCA_DSMARK_VALUE,1,&p->value[cl-1]);
+	rta->rta_len = skb->tail-b;
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb,b-skb->data);
+	return -1;
+}
+
+static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct dsmark_qdisc_data *p = PRIV(sch);
+	unsigned char *b = skb->tail;
+	struct rtattr *rta;
+
+	rta = (struct rtattr *) b;
+	RTA_PUT(skb,TCA_OPTIONS,0,NULL);
+	RTA_PUT(skb,TCA_DSMARK_INDICES,sizeof(__u16),&p->indices);
+	if (p->default_index != NO_DEFAULT_INDEX) {
+		__u16 tmp = p->default_index;
+
+		RTA_PUT(skb,TCA_DSMARK_DEFAULT_INDEX, sizeof(__u16), &tmp);
+	}
+	if (p->set_tc_index)
+		RTA_PUT(skb, TCA_DSMARK_SET_TC_INDEX, 0, NULL);
+	rta->rta_len = skb->tail-b;
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb,b-skb->data);
+	return -1;
+}
+
+static struct Qdisc_class_ops dsmark_class_ops = {
+	.graft		=	dsmark_graft,
+	.leaf		=	dsmark_leaf,
+	.get		=	dsmark_get,
+	.put		=	dsmark_put,
+	.change		=	dsmark_change,
+	.delete		=	dsmark_delete,
+	.walk		=	dsmark_walk,
+	.tcf_chain	=	dsmark_find_tcf,
+	.bind_tcf	=	dsmark_bind_filter,
+	.unbind_tcf	=	dsmark_put,
+	.dump		=	dsmark_dump_class,
+};
+
+static struct Qdisc_ops dsmark_qdisc_ops = {
+	.next		=	NULL,
+	.cl_ops		=	&dsmark_class_ops,
+	.id		=	"dsmark",
+	.priv_size	=	sizeof(struct dsmark_qdisc_data),
+	.enqueue	=	dsmark_enqueue,
+	.dequeue	=	dsmark_dequeue,
+	.requeue	=	dsmark_requeue,
+	.drop		=	dsmark_drop,
+	.init		=	dsmark_init,
+	.reset		=	dsmark_reset,
+	.destroy	=	dsmark_destroy,
+	.change		=	NULL,
+	.dump		=	dsmark_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init dsmark_module_init(void)
+{
+	return register_qdisc(&dsmark_qdisc_ops);
+}
+static void __exit dsmark_module_exit(void) 
+{
+	unregister_qdisc(&dsmark_qdisc_ops);
+}
+module_init(dsmark_module_init)
+module_exit(dsmark_module_exit)
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
new file mode 100644
index 00000000000..4888305c96d
--- /dev/null
+++ b/net/sched/sch_fifo.c
@@ -0,0 +1,212 @@
+/*
+ * net/sched/sch_fifo.c	The simplest FIFO queue.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+
+/* 1 band FIFO pseudo-"scheduler" */
+
+struct fifo_sched_data
+{
+	unsigned limit;
+};
+
+static int
+bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct fifo_sched_data *q = qdisc_priv(sch);
+
+	if (sch->qstats.backlog + skb->len <= q->limit) {
+		__skb_queue_tail(&sch->q, skb);
+		sch->qstats.backlog += skb->len;
+		sch->bstats.bytes += skb->len;
+		sch->bstats.packets++;
+		return 0;
+	}
+	sch->qstats.drops++;
+#ifdef CONFIG_NET_CLS_POLICE
+	if (sch->reshape_fail==NULL || sch->reshape_fail(skb, sch))
+#endif
+		kfree_skb(skb);
+	return NET_XMIT_DROP;
+}
+
+static int
+bfifo_requeue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	__skb_queue_head(&sch->q, skb);
+	sch->qstats.backlog += skb->len;
+	sch->qstats.requeues++;
+	return 0;
+}
+
+static struct sk_buff *
+bfifo_dequeue(struct Qdisc* sch)
+{
+	struct sk_buff *skb;
+
+	skb = __skb_dequeue(&sch->q);
+	if (skb)
+		sch->qstats.backlog -= skb->len;
+	return skb;
+}
+
+static unsigned int 
+fifo_drop(struct Qdisc* sch)
+{
+	struct sk_buff *skb;
+
+	skb = __skb_dequeue_tail(&sch->q);
+	if (skb) {
+		unsigned int len = skb->len;
+		sch->qstats.backlog -= len;
+		kfree_skb(skb);
+		return len;
+	}
+	return 0;
+}
+
+static void
+fifo_reset(struct Qdisc* sch)
+{
+	skb_queue_purge(&sch->q);
+	sch->qstats.backlog = 0;
+}
+
+static int
+pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct fifo_sched_data *q = qdisc_priv(sch);
+
+	if (sch->q.qlen < q->limit) {
+		__skb_queue_tail(&sch->q, skb);
+		sch->bstats.bytes += skb->len;
+		sch->bstats.packets++;
+		return 0;
+	}
+	sch->qstats.drops++;
+#ifdef CONFIG_NET_CLS_POLICE
+	if (sch->reshape_fail==NULL || sch->reshape_fail(skb, sch))
+#endif
+		kfree_skb(skb);
+	return NET_XMIT_DROP;
+}
+
+static int
+pfifo_requeue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	__skb_queue_head(&sch->q, skb);
+	sch->qstats.requeues++;
+	return 0;
+}
+
+
+static struct sk_buff *
+pfifo_dequeue(struct Qdisc* sch)
+{
+	return __skb_dequeue(&sch->q);
+}
+
+static int fifo_init(struct Qdisc *sch, struct rtattr *opt)
+{
+	struct fifo_sched_data *q = qdisc_priv(sch);
+
+	if (opt == NULL) {
+		unsigned int limit = sch->dev->tx_queue_len ? : 1;
+
+		if (sch->ops == &bfifo_qdisc_ops)
+			q->limit = limit*sch->dev->mtu;
+		else	
+			q->limit = limit;
+	} else {
+		struct tc_fifo_qopt *ctl = RTA_DATA(opt);
+		if (opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
+			return -EINVAL;
+		q->limit = ctl->limit;
+	}
+	return 0;
+}
+
+static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct fifo_sched_data *q = qdisc_priv(sch);
+	unsigned char	 *b = skb->tail;
+	struct tc_fifo_qopt opt;
+
+	opt.limit = q->limit;
+	RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+struct Qdisc_ops pfifo_qdisc_ops = {
+	.next		=	NULL,
+	.cl_ops		=	NULL,
+	.id		=	"pfifo",
+	.priv_size	=	sizeof(struct fifo_sched_data),
+	.enqueue	=	pfifo_enqueue,
+	.dequeue	=	pfifo_dequeue,
+	.requeue	=	pfifo_requeue,
+	.drop		=	fifo_drop,
+	.init		=	fifo_init,
+	.reset		=	fifo_reset,
+	.destroy	=	NULL,
+	.change		=	fifo_init,
+	.dump		=	fifo_dump,
+	.owner		=	THIS_MODULE,
+};
+
+struct Qdisc_ops bfifo_qdisc_ops = {
+	.next		=	NULL,
+	.cl_ops		=	NULL,
+	.id		=	"bfifo",
+	.priv_size	=	sizeof(struct fifo_sched_data),
+	.enqueue	=	bfifo_enqueue,
+	.dequeue	=	bfifo_dequeue,
+	.requeue	=	bfifo_requeue,
+	.drop		=	fifo_drop,
+	.init		=	fifo_init,
+	.reset		=	fifo_reset,
+	.destroy	=	NULL,
+	.change		=	fifo_init,
+	.dump		=	fifo_dump,
+	.owner		=	THIS_MODULE,
+};
+
+EXPORT_SYMBOL(bfifo_qdisc_ops);
+EXPORT_SYMBOL(pfifo_qdisc_ops);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
new file mode 100644
index 00000000000..8c01e023f02
--- /dev/null
+++ b/net/sched/sch_generic.c
@@ -0,0 +1,609 @@
+/*
+ * net/sched/sch_generic.c	Generic packet scheduler routines.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *              Jamal Hadi Salim, <hadi@cyberus.ca> 990601
+ *              - Ingress support
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/init.h>
+#include <linux/rcupdate.h>
+#include <linux/list.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+
+/* Main transmission queue. */
+
+/* Main qdisc structure lock. 
+
+   However, modifications
+   to data, participating in scheduling must be additionally
+   protected with dev->queue_lock spinlock.
+
+   The idea is the following:
+   - enqueue, dequeue are serialized via top level device
+     spinlock dev->queue_lock.
+   - tree walking is protected by read_lock_bh(qdisc_tree_lock)
+     and this lock is used only in process context.
+   - updates to tree are made under rtnl semaphore or
+     from softirq context (__qdisc_destroy rcu-callback)
+     hence this lock needs local bh disabling.
+
+   qdisc_tree_lock must be grabbed BEFORE dev->queue_lock!
+ */
+DEFINE_RWLOCK(qdisc_tree_lock);
+
+void qdisc_lock_tree(struct net_device *dev)
+{
+	write_lock_bh(&qdisc_tree_lock);
+	spin_lock_bh(&dev->queue_lock);
+}
+
+void qdisc_unlock_tree(struct net_device *dev)
+{
+	spin_unlock_bh(&dev->queue_lock);
+	write_unlock_bh(&qdisc_tree_lock);
+}
+
+/* 
+   dev->queue_lock serializes queue accesses for this device
+   AND dev->qdisc pointer itself.
+
+   dev->xmit_lock serializes accesses to device driver.
+
+   dev->queue_lock and dev->xmit_lock are mutually exclusive,
+   if one is grabbed, another must be free.
+ */
+
+
+/* Kick device.
+   Note, that this procedure can be called by a watchdog timer, so that
+   we do not check dev->tbusy flag here.
+
+   Returns:  0  - queue is empty.
+            >0  - queue is not empty, but throttled.
+	    <0  - queue is not empty. Device is throttled, if dev->tbusy != 0.
+
+   NOTE: Called under dev->queue_lock with locally disabled BH.
+*/
+
+int qdisc_restart(struct net_device *dev)
+{
+	struct Qdisc *q = dev->qdisc;
+	struct sk_buff *skb;
+
+	/* Dequeue packet */
+	if ((skb = q->dequeue(q)) != NULL) {
+		unsigned nolock = (dev->features & NETIF_F_LLTX);
+		/*
+		 * When the driver has LLTX set it does its own locking
+		 * in start_xmit. No need to add additional overhead by
+		 * locking again. These checks are worth it because
+		 * even uncongested locks can be quite expensive.
+		 * The driver can do trylock like here too, in case
+		 * of lock congestion it should return -1 and the packet
+		 * will be requeued.
+		 */
+		if (!nolock) {
+			if (!spin_trylock(&dev->xmit_lock)) {
+			collision:
+				/* So, someone grabbed the driver. */
+				
+				/* It may be transient configuration error,
+				   when hard_start_xmit() recurses. We detect
+				   it by checking xmit owner and drop the
+				   packet when deadloop is detected.
+				*/
+				if (dev->xmit_lock_owner == smp_processor_id()) {
+					kfree_skb(skb);
+					if (net_ratelimit())
+						printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name);
+					return -1;
+				}
+				__get_cpu_var(netdev_rx_stat).cpu_collision++;
+				goto requeue;
+			}
+			/* Remember that the driver is grabbed by us. */
+			dev->xmit_lock_owner = smp_processor_id();
+		}
+		
+		{
+			/* And release queue */
+			spin_unlock(&dev->queue_lock);
+
+			if (!netif_queue_stopped(dev)) {
+				int ret;
+				if (netdev_nit)
+					dev_queue_xmit_nit(skb, dev);
+
+				ret = dev->hard_start_xmit(skb, dev);
+				if (ret == NETDEV_TX_OK) { 
+					if (!nolock) {
+						dev->xmit_lock_owner = -1;
+						spin_unlock(&dev->xmit_lock);
+					}
+					spin_lock(&dev->queue_lock);
+					return -1;
+				}
+				if (ret == NETDEV_TX_LOCKED && nolock) {
+					spin_lock(&dev->queue_lock);
+					goto collision; 
+				}
+			}
+
+			/* NETDEV_TX_BUSY - we need to requeue */
+			/* Release the driver */
+			if (!nolock) { 
+				dev->xmit_lock_owner = -1;
+				spin_unlock(&dev->xmit_lock);
+			} 
+			spin_lock(&dev->queue_lock);
+			q = dev->qdisc;
+		}
+
+		/* Device kicked us out :(
+		   This is possible in three cases:
+
+		   0. driver is locked
+		   1. fastroute is enabled
+		   2. device cannot determine busy state
+		      before start of transmission (f.e. dialout)
+		   3. device is buggy (ppp)
+		 */
+
+requeue:
+		q->ops->requeue(skb, q);
+		netif_schedule(dev);
+		return 1;
+	}
+	return q->q.qlen;
+}
+
+static void dev_watchdog(unsigned long arg)
+{
+	struct net_device *dev = (struct net_device *)arg;
+
+	spin_lock(&dev->xmit_lock);
+	if (dev->qdisc != &noop_qdisc) {
+		if (netif_device_present(dev) &&
+		    netif_running(dev) &&
+		    netif_carrier_ok(dev)) {
+			if (netif_queue_stopped(dev) &&
+			    (jiffies - dev->trans_start) > dev->watchdog_timeo) {
+				printk(KERN_INFO "NETDEV WATCHDOG: %s: transmit timed out\n", dev->name);
+				dev->tx_timeout(dev);
+			}
+			if (!mod_timer(&dev->watchdog_timer, jiffies + dev->watchdog_timeo))
+				dev_hold(dev);
+		}
+	}
+	spin_unlock(&dev->xmit_lock);
+
+	dev_put(dev);
+}
+
+static void dev_watchdog_init(struct net_device *dev)
+{
+	init_timer(&dev->watchdog_timer);
+	dev->watchdog_timer.data = (unsigned long)dev;
+	dev->watchdog_timer.function = dev_watchdog;
+}
+
+void __netdev_watchdog_up(struct net_device *dev)
+{
+	if (dev->tx_timeout) {
+		if (dev->watchdog_timeo <= 0)
+			dev->watchdog_timeo = 5*HZ;
+		if (!mod_timer(&dev->watchdog_timer, jiffies + dev->watchdog_timeo))
+			dev_hold(dev);
+	}
+}
+
+static void dev_watchdog_up(struct net_device *dev)
+{
+	spin_lock_bh(&dev->xmit_lock);
+	__netdev_watchdog_up(dev);
+	spin_unlock_bh(&dev->xmit_lock);
+}
+
+static void dev_watchdog_down(struct net_device *dev)
+{
+	spin_lock_bh(&dev->xmit_lock);
+	if (del_timer(&dev->watchdog_timer))
+		__dev_put(dev);
+	spin_unlock_bh(&dev->xmit_lock);
+}
+
+/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
+   under all circumstances. It is difficult to invent anything faster or
+   cheaper.
+ */
+
+static int
+noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
+{
+	kfree_skb(skb);
+	return NET_XMIT_CN;
+}
+
+static struct sk_buff *
+noop_dequeue(struct Qdisc * qdisc)
+{
+	return NULL;
+}
+
+static int
+noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
+{
+	if (net_ratelimit())
+		printk(KERN_DEBUG "%s deferred output. It is buggy.\n", skb->dev->name);
+	kfree_skb(skb);
+	return NET_XMIT_CN;
+}
+
+struct Qdisc_ops noop_qdisc_ops = {
+	.next		=	NULL,
+	.cl_ops		=	NULL,
+	.id		=	"noop",
+	.priv_size	=	0,
+	.enqueue	=	noop_enqueue,
+	.dequeue	=	noop_dequeue,
+	.requeue	=	noop_requeue,
+	.owner		=	THIS_MODULE,
+};
+
+struct Qdisc noop_qdisc = {
+	.enqueue	=	noop_enqueue,
+	.dequeue	=	noop_dequeue,
+	.flags		=	TCQ_F_BUILTIN,
+	.ops		=	&noop_qdisc_ops,	
+	.list		=	LIST_HEAD_INIT(noop_qdisc.list),
+};
+
+static struct Qdisc_ops noqueue_qdisc_ops = {
+	.next		=	NULL,
+	.cl_ops		=	NULL,
+	.id		=	"noqueue",
+	.priv_size	=	0,
+	.enqueue	=	noop_enqueue,
+	.dequeue	=	noop_dequeue,
+	.requeue	=	noop_requeue,
+	.owner		=	THIS_MODULE,
+};
+
+static struct Qdisc noqueue_qdisc = {
+	.enqueue	=	NULL,
+	.dequeue	=	noop_dequeue,
+	.flags		=	TCQ_F_BUILTIN,
+	.ops		=	&noqueue_qdisc_ops,
+	.list		=	LIST_HEAD_INIT(noqueue_qdisc.list),
+};
+
+
+static const u8 prio2band[TC_PRIO_MAX+1] =
+	{ 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 };
+
+/* 3-band FIFO queue: old style, but should be a bit faster than
+   generic prio+fifo combination.
+ */
+
+static int
+pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
+{
+	struct sk_buff_head *list = qdisc_priv(qdisc);
+
+	list += prio2band[skb->priority&TC_PRIO_MAX];
+
+	if (list->qlen < qdisc->dev->tx_queue_len) {
+		__skb_queue_tail(list, skb);
+		qdisc->q.qlen++;
+		qdisc->bstats.bytes += skb->len;
+		qdisc->bstats.packets++;
+		return 0;
+	}
+	qdisc->qstats.drops++;
+	kfree_skb(skb);
+	return NET_XMIT_DROP;
+}
+
+static struct sk_buff *
+pfifo_fast_dequeue(struct Qdisc* qdisc)
+{
+	int prio;
+	struct sk_buff_head *list = qdisc_priv(qdisc);
+	struct sk_buff *skb;
+
+	for (prio = 0; prio < 3; prio++, list++) {
+		skb = __skb_dequeue(list);
+		if (skb) {
+			qdisc->q.qlen--;
+			return skb;
+		}
+	}
+	return NULL;
+}
+
+static int
+pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
+{
+	struct sk_buff_head *list = qdisc_priv(qdisc);
+
+	list += prio2band[skb->priority&TC_PRIO_MAX];
+
+	__skb_queue_head(list, skb);
+	qdisc->q.qlen++;
+	qdisc->qstats.requeues++;
+	return 0;
+}
+
+static void
+pfifo_fast_reset(struct Qdisc* qdisc)
+{
+	int prio;
+	struct sk_buff_head *list = qdisc_priv(qdisc);
+
+	for (prio=0; prio < 3; prio++)
+		skb_queue_purge(list+prio);
+	qdisc->q.qlen = 0;
+}
+
+static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
+{
+	unsigned char	 *b = skb->tail;
+	struct tc_prio_qopt opt;
+
+	opt.bands = 3; 
+	memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1);
+	RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int pfifo_fast_init(struct Qdisc *qdisc, struct rtattr *opt)
+{
+	int i;
+	struct sk_buff_head *list = qdisc_priv(qdisc);
+
+	for (i=0; i<3; i++)
+		skb_queue_head_init(list+i);
+
+	return 0;
+}
+
+static struct Qdisc_ops pfifo_fast_ops = {
+	.next		=	NULL,
+	.cl_ops		=	NULL,
+	.id		=	"pfifo_fast",
+	.priv_size	=	3 * sizeof(struct sk_buff_head),
+	.enqueue	=	pfifo_fast_enqueue,
+	.dequeue	=	pfifo_fast_dequeue,
+	.requeue	=	pfifo_fast_requeue,
+	.init		=	pfifo_fast_init,
+	.reset		=	pfifo_fast_reset,
+	.dump		=	pfifo_fast_dump,
+	.owner		=	THIS_MODULE,
+};
+
+struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops)
+{
+	void *p;
+	struct Qdisc *sch;
+	int size;
+
+	/* ensure that the Qdisc and the private data are 32-byte aligned */
+	size = ((sizeof(*sch) + QDISC_ALIGN_CONST) & ~QDISC_ALIGN_CONST);
+	size += ops->priv_size + QDISC_ALIGN_CONST;
+
+	p = kmalloc(size, GFP_KERNEL);
+	if (!p)
+		return NULL;
+	memset(p, 0, size);
+
+	sch = (struct Qdisc *)(((unsigned long)p + QDISC_ALIGN_CONST) 
+			       & ~QDISC_ALIGN_CONST);
+	sch->padded = (char *)sch - (char *)p;
+
+	INIT_LIST_HEAD(&sch->list);
+	skb_queue_head_init(&sch->q);
+	sch->ops = ops;
+	sch->enqueue = ops->enqueue;
+	sch->dequeue = ops->dequeue;
+	sch->dev = dev;
+	dev_hold(dev);
+	sch->stats_lock = &dev->queue_lock;
+	atomic_set(&sch->refcnt, 1);
+	if (!ops->init || ops->init(sch, NULL) == 0)
+		return sch;
+
+	dev_put(dev);
+	kfree(p);
+	return NULL;
+}
+
+/* Under dev->queue_lock and BH! */
+
+void qdisc_reset(struct Qdisc *qdisc)
+{
+	struct Qdisc_ops *ops = qdisc->ops;
+
+	if (ops->reset)
+		ops->reset(qdisc);
+}
+
+/* this is the rcu callback function to clean up a qdisc when there 
+ * are no further references to it */
+
+static void __qdisc_destroy(struct rcu_head *head)
+{
+	struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu);
+	struct Qdisc_ops  *ops = qdisc->ops;
+
+#ifdef CONFIG_NET_ESTIMATOR
+	gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
+#endif
+	write_lock(&qdisc_tree_lock);
+	if (ops->reset)
+		ops->reset(qdisc);
+	if (ops->destroy)
+		ops->destroy(qdisc);
+	write_unlock(&qdisc_tree_lock);
+	module_put(ops->owner);
+
+	dev_put(qdisc->dev);
+	kfree((char *) qdisc - qdisc->padded);
+}
+
+/* Under dev->queue_lock and BH! */
+
+void qdisc_destroy(struct Qdisc *qdisc)
+{
+	struct list_head cql = LIST_HEAD_INIT(cql);
+	struct Qdisc *cq, *q, *n;
+
+	if (qdisc->flags & TCQ_F_BUILTIN ||
+		!atomic_dec_and_test(&qdisc->refcnt))
+		return;
+
+	if (!list_empty(&qdisc->list)) {
+		if (qdisc->ops->cl_ops == NULL)
+			list_del(&qdisc->list);
+		else
+			list_move(&qdisc->list, &cql);
+	}
+
+	/* unlink inner qdiscs from dev->qdisc_list immediately */
+	list_for_each_entry(cq, &cql, list)
+		list_for_each_entry_safe(q, n, &qdisc->dev->qdisc_list, list)
+			if (TC_H_MAJ(q->parent) == TC_H_MAJ(cq->handle)) {
+				if (q->ops->cl_ops == NULL)
+					list_del_init(&q->list);
+				else
+					list_move_tail(&q->list, &cql);
+			}
+	list_for_each_entry_safe(cq, n, &cql, list)
+		list_del_init(&cq->list);
+
+	call_rcu(&qdisc->q_rcu, __qdisc_destroy);
+}
+
+void dev_activate(struct net_device *dev)
+{
+	/* No queueing discipline is attached to device;
+	   create default one i.e. pfifo_fast for devices,
+	   which need queueing and noqueue_qdisc for
+	   virtual interfaces
+	 */
+
+	if (dev->qdisc_sleeping == &noop_qdisc) {
+		struct Qdisc *qdisc;
+		if (dev->tx_queue_len) {
+			qdisc = qdisc_create_dflt(dev, &pfifo_fast_ops);
+			if (qdisc == NULL) {
+				printk(KERN_INFO "%s: activation failed\n", dev->name);
+				return;
+			}
+			write_lock_bh(&qdisc_tree_lock);
+			list_add_tail(&qdisc->list, &dev->qdisc_list);
+			write_unlock_bh(&qdisc_tree_lock);
+		} else {
+			qdisc =  &noqueue_qdisc;
+		}
+		write_lock_bh(&qdisc_tree_lock);
+		dev->qdisc_sleeping = qdisc;
+		write_unlock_bh(&qdisc_tree_lock);
+	}
+
+	spin_lock_bh(&dev->queue_lock);
+	rcu_assign_pointer(dev->qdisc, dev->qdisc_sleeping);
+	if (dev->qdisc != &noqueue_qdisc) {
+		dev->trans_start = jiffies;
+		dev_watchdog_up(dev);
+	}
+	spin_unlock_bh(&dev->queue_lock);
+}
+
+void dev_deactivate(struct net_device *dev)
+{
+	struct Qdisc *qdisc;
+
+	spin_lock_bh(&dev->queue_lock);
+	qdisc = dev->qdisc;
+	dev->qdisc = &noop_qdisc;
+
+	qdisc_reset(qdisc);
+
+	spin_unlock_bh(&dev->queue_lock);
+
+	dev_watchdog_down(dev);
+
+	while (test_bit(__LINK_STATE_SCHED, &dev->state))
+		yield();
+
+	spin_unlock_wait(&dev->xmit_lock);
+}
+
+void dev_init_scheduler(struct net_device *dev)
+{
+	qdisc_lock_tree(dev);
+	dev->qdisc = &noop_qdisc;
+	dev->qdisc_sleeping = &noop_qdisc;
+	INIT_LIST_HEAD(&dev->qdisc_list);
+	qdisc_unlock_tree(dev);
+
+	dev_watchdog_init(dev);
+}
+
+void dev_shutdown(struct net_device *dev)
+{
+	struct Qdisc *qdisc;
+
+	qdisc_lock_tree(dev);
+	qdisc = dev->qdisc_sleeping;
+	dev->qdisc = &noop_qdisc;
+	dev->qdisc_sleeping = &noop_qdisc;
+	qdisc_destroy(qdisc);
+#if defined(CONFIG_NET_SCH_INGRESS) || defined(CONFIG_NET_SCH_INGRESS_MODULE)
+        if ((qdisc = dev->qdisc_ingress) != NULL) {
+		dev->qdisc_ingress = NULL;
+		qdisc_destroy(qdisc);
+        }
+#endif
+	BUG_TRAP(!timer_pending(&dev->watchdog_timer));
+	qdisc_unlock_tree(dev);
+}
+
+EXPORT_SYMBOL(__netdev_watchdog_up);
+EXPORT_SYMBOL(noop_qdisc);
+EXPORT_SYMBOL(noop_qdisc_ops);
+EXPORT_SYMBOL(qdisc_create_dflt);
+EXPORT_SYMBOL(qdisc_destroy);
+EXPORT_SYMBOL(qdisc_reset);
+EXPORT_SYMBOL(qdisc_restart);
+EXPORT_SYMBOL(qdisc_lock_tree);
+EXPORT_SYMBOL(qdisc_unlock_tree);
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
new file mode 100644
index 00000000000..25c171c3271
--- /dev/null
+++ b/net/sched/sch_gred.c
@@ -0,0 +1,630 @@
+/*
+ * net/sched/sch_gred.c	Generic Random Early Detection queue.
+ *
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Authors:    J Hadi Salim (hadi@cyberus.ca) 1998-2002
+ *
+ *             991129: -  Bug fix with grio mode
+ *		       - a better sing. AvgQ mode with Grio(WRED)
+ *		       - A finer grained VQ dequeue based on sugestion
+ *		         from Ren Liu
+ *		       - More error checks
+ *
+ *
+ *
+ *  For all the glorious comments look at Alexey's sch_red.c
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+
+#if 1 /* control */
+#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
+#else
+#define DPRINTK(format,args...)
+#endif
+
+#if 0 /* data */
+#define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
+#else
+#define D2PRINTK(format,args...)
+#endif
+
+struct gred_sched_data;
+struct gred_sched;
+
+struct gred_sched_data
+{
+/* Parameters */
+	u32		limit;		/* HARD maximal queue length	*/
+	u32		qth_min;	/* Min average length threshold: A scaled */
+	u32		qth_max;	/* Max average length threshold: A scaled */
+	u32      	DP;		/* the drop pramaters */
+	char		Wlog;		/* log(W)		*/
+	char		Plog;		/* random number bits	*/
+	u32		Scell_max;
+	u32		Rmask;
+	u32		bytesin;	/* bytes seen on virtualQ so far*/
+	u32		packetsin;	/* packets seen on virtualQ so far*/
+	u32		backlog;	/* bytes on the virtualQ */
+	u32		forced;	/* packets dropped for exceeding limits */
+	u32		early;	/* packets dropped as a warning */
+	u32		other;	/* packets dropped by invoking drop() */
+	u32		pdrop;	/* packets dropped because we exceeded physical queue limits */
+	char		Scell_log;
+	u8		Stab[256];
+	u8              prio;        /* the prio of this vq */
+
+/* Variables */
+	unsigned long	qave;		/* Average queue length: A scaled */
+	int		qcount;		/* Packets since last random number generation */
+	u32		qR;		/* Cached random number */
+
+	psched_time_t	qidlestart;	/* Start of idle period	*/
+};
+
+struct gred_sched
+{
+	struct gred_sched_data *tab[MAX_DPs];
+	u32 		DPs;   
+	u32 		def; 
+	u8 		initd; 
+	u8 		grio; 
+	u8 		eqp; 
+};
+
+static int
+gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	psched_time_t now;
+	struct gred_sched_data *q=NULL;
+	struct gred_sched *t= qdisc_priv(sch);
+	unsigned long	qave=0;	
+	int i=0;
+
+	if (!t->initd && skb_queue_len(&sch->q) < (sch->dev->tx_queue_len ? : 1)) {
+		D2PRINTK("NO GRED Queues setup yet! Enqueued anyway\n");
+		goto do_enqueue;
+	}
+
+
+	if ( ((skb->tc_index&0xf) > (t->DPs -1)) || !(q=t->tab[skb->tc_index&0xf])) {
+		printk("GRED: setting to default (%d)\n ",t->def);
+		if (!(q=t->tab[t->def])) {
+			DPRINTK("GRED: setting to default FAILED! dropping!! "
+			    "(%d)\n ", t->def);
+			goto drop;
+		}
+		/* fix tc_index? --could be controvesial but needed for
+		   requeueing */
+		skb->tc_index=(skb->tc_index&0xfffffff0) | t->def;
+	}
+
+	D2PRINTK("gred_enqueue virtualQ 0x%x classid %x backlog %d "
+	    "general backlog %d\n",skb->tc_index&0xf,sch->handle,q->backlog,
+	    sch->qstats.backlog);
+	/* sum up all the qaves of prios <= to ours to get the new qave*/
+	if (!t->eqp && t->grio) {
+		for (i=0;i<t->DPs;i++) {
+			if ((!t->tab[i]) || (i==q->DP))	
+				continue; 
+				
+			if ((t->tab[i]->prio < q->prio) && (PSCHED_IS_PASTPERFECT(t->tab[i]->qidlestart)))
+				qave +=t->tab[i]->qave;
+		}
+			
+	}
+
+	q->packetsin++;
+	q->bytesin+=skb->len;
+
+	if (t->eqp && t->grio) {
+		qave=0;
+		q->qave=t->tab[t->def]->qave;
+		q->qidlestart=t->tab[t->def]->qidlestart;
+	}
+
+	if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) {
+		long us_idle;
+		PSCHED_GET_TIME(now);
+		us_idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max);
+		PSCHED_SET_PASTPERFECT(q->qidlestart);
+
+		q->qave >>= q->Stab[(us_idle>>q->Scell_log)&0xFF];
+	} else {
+		if (t->eqp) {
+			q->qave += sch->qstats.backlog - (q->qave >> q->Wlog);
+		} else {
+			q->qave += q->backlog - (q->qave >> q->Wlog);
+		}
+
+	}
+	
+
+	if (t->eqp && t->grio) 
+		t->tab[t->def]->qave=q->qave;
+
+	if ((q->qave+qave) < q->qth_min) {
+		q->qcount = -1;
+enqueue:
+		if (q->backlog + skb->len <= q->limit) {
+			q->backlog += skb->len;
+do_enqueue:
+			__skb_queue_tail(&sch->q, skb);
+			sch->qstats.backlog += skb->len;
+			sch->bstats.bytes += skb->len;
+			sch->bstats.packets++;
+			return 0;
+		} else {
+			q->pdrop++;
+		}
+
+drop:
+		kfree_skb(skb);
+		sch->qstats.drops++;
+		return NET_XMIT_DROP;
+	}
+	if ((q->qave+qave) >= q->qth_max) {
+		q->qcount = -1;
+		sch->qstats.overlimits++;
+		q->forced++;
+		goto drop;
+	}
+	if (++q->qcount) {
+		if ((((qave+q->qave) - q->qth_min)>>q->Wlog)*q->qcount < q->qR)
+			goto enqueue;
+		q->qcount = 0;
+		q->qR = net_random()&q->Rmask;
+		sch->qstats.overlimits++;
+		q->early++;
+		goto drop;
+	}
+	q->qR = net_random()&q->Rmask;
+	goto enqueue;
+}
+
+static int
+gred_requeue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct gred_sched_data *q;
+	struct gred_sched *t= qdisc_priv(sch);
+	q= t->tab[(skb->tc_index&0xf)];
+/* error checking here -- probably unnecessary */
+	PSCHED_SET_PASTPERFECT(q->qidlestart);
+
+	__skb_queue_head(&sch->q, skb);
+	sch->qstats.backlog += skb->len;
+	sch->qstats.requeues++;
+	q->backlog += skb->len;
+	return 0;
+}
+
+static struct sk_buff *
+gred_dequeue(struct Qdisc* sch)
+{
+	struct sk_buff *skb;
+	struct gred_sched_data *q;
+	struct gred_sched *t= qdisc_priv(sch);
+
+	skb = __skb_dequeue(&sch->q);
+	if (skb) {
+		sch->qstats.backlog -= skb->len;
+		q= t->tab[(skb->tc_index&0xf)];
+		if (q) {
+			q->backlog -= skb->len;
+			if (!q->backlog && !t->eqp)
+				PSCHED_GET_TIME(q->qidlestart);
+		} else {
+			D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf); 
+		}
+		return skb;
+	}
+
+	if (t->eqp) {
+			q= t->tab[t->def];
+			if (!q)	
+				D2PRINTK("no default VQ set: Results will be "
+				       "screwed up\n");
+			else
+				PSCHED_GET_TIME(q->qidlestart);
+	}
+
+	return NULL;
+}
+
+static unsigned int gred_drop(struct Qdisc* sch)
+{
+	struct sk_buff *skb;
+
+	struct gred_sched_data *q;
+	struct gred_sched *t= qdisc_priv(sch);
+
+	skb = __skb_dequeue_tail(&sch->q);
+	if (skb) {
+		unsigned int len = skb->len;
+		sch->qstats.backlog -= len;
+		sch->qstats.drops++;
+		q= t->tab[(skb->tc_index&0xf)];
+		if (q) {
+			q->backlog -= len;
+			q->other++;
+			if (!q->backlog && !t->eqp)
+				PSCHED_GET_TIME(q->qidlestart);
+		} else {
+			D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf); 
+		}
+
+		kfree_skb(skb);
+		return len;
+	}
+
+	q=t->tab[t->def];
+	if (!q) {
+		D2PRINTK("no default VQ set: Results might be screwed up\n");
+		return 0;
+	}
+
+	PSCHED_GET_TIME(q->qidlestart);
+	return 0;
+
+}
+
+static void gred_reset(struct Qdisc* sch)
+{
+	int i;
+	struct gred_sched_data *q;
+	struct gred_sched *t= qdisc_priv(sch);
+
+	__skb_queue_purge(&sch->q);
+
+	sch->qstats.backlog = 0;
+
+        for (i=0;i<t->DPs;i++) {
+	        q= t->tab[i];
+		if (!q)	
+			continue; 
+		PSCHED_SET_PASTPERFECT(q->qidlestart);
+		q->qave = 0;
+		q->qcount = -1;
+		q->backlog = 0;
+		q->other=0;
+		q->forced=0;
+		q->pdrop=0;
+		q->early=0;
+	}
+}
+
+static int gred_change(struct Qdisc *sch, struct rtattr *opt)
+{
+	struct gred_sched *table = qdisc_priv(sch);
+	struct gred_sched_data *q;
+	struct tc_gred_qopt *ctl;
+	struct tc_gred_sopt *sopt;
+	struct rtattr *tb[TCA_GRED_STAB];
+	struct rtattr *tb2[TCA_GRED_DPS];
+	int i;
+
+	if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_STAB, opt))
+		return -EINVAL;
+
+	if (tb[TCA_GRED_PARMS-1] == 0 && tb[TCA_GRED_STAB-1] == 0) {
+		rtattr_parse_nested(tb2, TCA_GRED_DPS, opt);
+
+	    if (tb2[TCA_GRED_DPS-1] == 0) 
+			return -EINVAL;
+
+		sopt = RTA_DATA(tb2[TCA_GRED_DPS-1]);
+		table->DPs=sopt->DPs;   
+		table->def=sopt->def_DP; 
+		table->grio=sopt->grio; 
+		table->initd=0;
+		/* probably need to clear all the table DP entries as well */
+		return 0;
+	    }
+
+
+	if (!table->DPs || tb[TCA_GRED_PARMS-1] == 0 || tb[TCA_GRED_STAB-1] == 0 ||
+		RTA_PAYLOAD(tb[TCA_GRED_PARMS-1]) < sizeof(*ctl) ||
+		RTA_PAYLOAD(tb[TCA_GRED_STAB-1]) < 256)
+			return -EINVAL;
+
+	ctl = RTA_DATA(tb[TCA_GRED_PARMS-1]);
+	if (ctl->DP > MAX_DPs-1 ) {
+		/* misbehaving is punished! Put in the default drop probability */
+		DPRINTK("\nGRED: DP %u not in  the proper range fixed. New DP "
+			"set to default at %d\n",ctl->DP,table->def);
+		ctl->DP=table->def;
+	}
+	
+	if (table->tab[ctl->DP] == NULL) {
+		table->tab[ctl->DP]=kmalloc(sizeof(struct gred_sched_data),
+					    GFP_KERNEL);
+		if (NULL == table->tab[ctl->DP])
+			return -ENOMEM;
+		memset(table->tab[ctl->DP], 0, (sizeof(struct gred_sched_data)));
+	}
+	q= table->tab[ctl->DP]; 
+
+	if (table->grio) {
+		if (ctl->prio <=0) {
+			if (table->def && table->tab[table->def]) {
+				DPRINTK("\nGRED: DP %u does not have a prio"
+					"setting default to %d\n",ctl->DP,
+					table->tab[table->def]->prio);
+				q->prio=table->tab[table->def]->prio;
+			} else { 
+				DPRINTK("\nGRED: DP %u does not have a prio"
+					" setting default to 8\n",ctl->DP);
+				q->prio=8;
+			}
+		} else {
+			q->prio=ctl->prio;
+		}
+	} else {
+		q->prio=8;
+	}
+
+
+	q->DP=ctl->DP;
+	q->Wlog = ctl->Wlog;
+	q->Plog = ctl->Plog;
+	q->limit = ctl->limit;
+	q->Scell_log = ctl->Scell_log;
+	q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL;
+	q->Scell_max = (255<<q->Scell_log);
+	q->qth_min = ctl->qth_min<<ctl->Wlog;
+	q->qth_max = ctl->qth_max<<ctl->Wlog;
+	q->qave=0;
+	q->backlog=0;
+	q->qcount = -1;
+	q->other=0;
+	q->forced=0;
+	q->pdrop=0;
+	q->early=0;
+
+	PSCHED_SET_PASTPERFECT(q->qidlestart);
+	memcpy(q->Stab, RTA_DATA(tb[TCA_GRED_STAB-1]), 256);
+
+	if ( table->initd && table->grio) {
+	/* this looks ugly but it's not in the fast path */
+		for (i=0;i<table->DPs;i++) {
+			if ((!table->tab[i]) || (i==q->DP) )    
+				continue; 
+			if (table->tab[i]->prio == q->prio ){
+				/* WRED mode detected */
+				table->eqp=1;
+				break;
+			}
+		}
+	}
+
+	if (!table->initd) {
+		table->initd=1;
+		/* 
+        	the first entry also goes into the default until
+        	over-written 
+		*/
+
+		if (table->tab[table->def] == NULL) {
+			table->tab[table->def]=
+				kmalloc(sizeof(struct gred_sched_data), GFP_KERNEL);
+			if (NULL == table->tab[table->def])
+				return -ENOMEM;
+
+			memset(table->tab[table->def], 0,
+			       (sizeof(struct gred_sched_data)));
+		}
+		q= table->tab[table->def]; 
+		q->DP=table->def;
+		q->Wlog = ctl->Wlog;
+		q->Plog = ctl->Plog;
+		q->limit = ctl->limit;
+		q->Scell_log = ctl->Scell_log;
+		q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL;
+		q->Scell_max = (255<<q->Scell_log);
+		q->qth_min = ctl->qth_min<<ctl->Wlog;
+		q->qth_max = ctl->qth_max<<ctl->Wlog;
+
+		if (table->grio)
+			q->prio=table->tab[ctl->DP]->prio;
+		else
+			q->prio=8;
+
+		q->qcount = -1;
+		PSCHED_SET_PASTPERFECT(q->qidlestart);
+		memcpy(q->Stab, RTA_DATA(tb[TCA_GRED_STAB-1]), 256);
+	}
+	return 0;
+
+}
+
+static int gred_init(struct Qdisc *sch, struct rtattr *opt)
+{
+	struct gred_sched *table = qdisc_priv(sch);
+	struct tc_gred_sopt *sopt;
+	struct rtattr *tb[TCA_GRED_STAB];
+	struct rtattr *tb2[TCA_GRED_DPS];
+
+	if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_STAB, opt))
+		return -EINVAL;
+
+	if (tb[TCA_GRED_PARMS-1] == 0 && tb[TCA_GRED_STAB-1] == 0) {
+		rtattr_parse_nested(tb2, TCA_GRED_DPS, opt);
+
+	    if (tb2[TCA_GRED_DPS-1] == 0) 
+			return -EINVAL;
+
+		sopt = RTA_DATA(tb2[TCA_GRED_DPS-1]);
+		table->DPs=sopt->DPs;   
+		table->def=sopt->def_DP; 
+		table->grio=sopt->grio; 
+		table->initd=0;
+		return 0;
+	}
+
+	DPRINTK("\n GRED_INIT error!\n");
+	return -EINVAL;
+}
+
+static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	unsigned long qave;
+	struct rtattr *rta;
+	struct tc_gred_qopt *opt = NULL ;
+	struct tc_gred_qopt *dst;
+	struct gred_sched *table = qdisc_priv(sch);
+	struct gred_sched_data *q;
+	int i;
+	unsigned char	 *b = skb->tail;
+
+	rta = (struct rtattr*)b;
+	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+
+	opt=kmalloc(sizeof(struct tc_gred_qopt)*MAX_DPs, GFP_KERNEL);
+
+	if (opt  == NULL) {
+		DPRINTK("gred_dump:failed to malloc for %Zd\n",
+		    sizeof(struct tc_gred_qopt)*MAX_DPs);
+		goto rtattr_failure;
+	}
+
+	memset(opt, 0, (sizeof(struct tc_gred_qopt))*table->DPs);
+
+	if (!table->initd) {
+		DPRINTK("NO GRED Queues setup!\n");
+	}
+
+	for (i=0;i<MAX_DPs;i++) {
+		dst= &opt[i]; 
+		q= table->tab[i]; 
+
+		if (!q) {
+			/* hack -- fix at some point with proper message
+			   This is how we indicate to tc that there is no VQ
+			   at this DP */
+
+			dst->DP=MAX_DPs+i;
+			continue;
+		}
+
+		dst->limit=q->limit;
+		dst->qth_min=q->qth_min>>q->Wlog;
+		dst->qth_max=q->qth_max>>q->Wlog;
+		dst->DP=q->DP;
+		dst->backlog=q->backlog;
+		if (q->qave) {
+			if (table->eqp && table->grio) {
+				q->qidlestart=table->tab[table->def]->qidlestart;
+				q->qave=table->tab[table->def]->qave;
+			}
+			if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) {
+				long idle;
+				psched_time_t now;
+				PSCHED_GET_TIME(now);
+				idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max);
+				qave  = q->qave >> q->Stab[(idle>>q->Scell_log)&0xFF];
+				dst->qave = qave >> q->Wlog;
+
+			} else {
+				dst->qave = q->qave >> q->Wlog;
+			}
+		} else {
+			dst->qave = 0;
+		}
+		
+
+		dst->Wlog = q->Wlog;
+		dst->Plog = q->Plog;
+		dst->Scell_log = q->Scell_log;
+		dst->other = q->other;
+		dst->forced = q->forced;
+		dst->early = q->early;
+		dst->pdrop = q->pdrop;
+		dst->prio = q->prio;
+		dst->packets=q->packetsin;
+		dst->bytesin=q->bytesin;
+	}
+
+	RTA_PUT(skb, TCA_GRED_PARMS, sizeof(struct tc_gred_qopt)*MAX_DPs, opt);
+	rta->rta_len = skb->tail - b;
+
+	kfree(opt);
+	return skb->len;
+
+rtattr_failure:
+	if (opt)
+		kfree(opt);
+	DPRINTK("gred_dump: FAILURE!!!!\n");
+
+/* also free the opt struct here */
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static void gred_destroy(struct Qdisc *sch)
+{
+	struct gred_sched *table = qdisc_priv(sch);
+	int i;
+
+	for (i = 0;i < table->DPs; i++) {
+		if (table->tab[i])
+			kfree(table->tab[i]);
+	}
+}
+
+static struct Qdisc_ops gred_qdisc_ops = {
+	.next		=	NULL,
+	.cl_ops		=	NULL,
+	.id		=	"gred",
+	.priv_size	=	sizeof(struct gred_sched),
+	.enqueue	=	gred_enqueue,
+	.dequeue	=	gred_dequeue,
+	.requeue	=	gred_requeue,
+	.drop		=	gred_drop,
+	.init		=	gred_init,
+	.reset		=	gred_reset,
+	.destroy	=	gred_destroy,
+	.change		=	gred_change,
+	.dump		=	gred_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init gred_module_init(void)
+{
+	return register_qdisc(&gred_qdisc_ops);
+}
+static void __exit gred_module_exit(void) 
+{
+	unregister_qdisc(&gred_qdisc_ops);
+}
+module_init(gred_module_init)
+module_exit(gred_module_exit)
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
new file mode 100644
index 00000000000..c26764bc410
--- /dev/null
+++ b/net/sched/sch_hfsc.c
@@ -0,0 +1,1822 @@
+/*
+ * Copyright (c) 2003 Patrick McHardy, <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * 2003-10-17 - Ported from altq
+ */
+/*
+ * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation is hereby granted (including for commercial or
+ * for-profit use), provided that both the copyright notice and this
+ * permission notice appear in all copies of the software, derivative
+ * works, or modified versions, and any portions thereof.
+ *
+ * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF
+ * WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON PROVIDES THIS
+ * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * Carnegie Mellon encourages (but does not require) users of this
+ * software to return any improvements or extensions that they make,
+ * and to grant Carnegie Mellon the rights to redistribute these
+ * changes without encumbrance.
+ */
+/*
+ * H-FSC is described in Proceedings of SIGCOMM'97,
+ * "A Hierarchical Fair Service Curve Algorithm for Link-Sharing,
+ * Real-Time and Priority Service"
+ * by Ion Stoica, Hui Zhang, and T. S. Eugene Ng.
+ *
+ * Oleg Cherevko <olwi@aq.ml.com.ua> added the upperlimit for link-sharing.
+ * when a class has an upperlimit, the fit-time is computed from the
+ * upperlimit service curve.  the link-sharing scheduler does not schedule
+ * a class whose fit-time exceeds the current time.
+ */
+
+#include <linux/kernel.h>
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/jiffies.h>
+#include <linux/compiler.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/timer.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/pkt_sched.h>
+#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
+#include <asm/system.h>
+#include <asm/div64.h>
+
+#define HFSC_DEBUG 1
+
+/*
+ * kernel internal service curve representation:
+ *   coordinates are given by 64 bit unsigned integers.
+ *   x-axis: unit is clock count.
+ *   y-axis: unit is byte.
+ *
+ *   The service curve parameters are converted to the internal
+ *   representation. The slope values are scaled to avoid overflow.
+ *   the inverse slope values as well as the y-projection of the 1st
+ *   segment are kept in order to to avoid 64-bit divide operations
+ *   that are expensive on 32-bit architectures.
+ */
+
+struct internal_sc
+{
+	u64	sm1;	/* scaled slope of the 1st segment */
+	u64	ism1;	/* scaled inverse-slope of the 1st segment */
+	u64	dx;	/* the x-projection of the 1st segment */
+	u64	dy;	/* the y-projection of the 1st segment */
+	u64	sm2;	/* scaled slope of the 2nd segment */
+	u64	ism2;	/* scaled inverse-slope of the 2nd segment */
+};
+
+/* runtime service curve */
+struct runtime_sc
+{
+	u64	x;	/* current starting position on x-axis */
+	u64	y;	/* current starting position on y-axis */
+	u64	sm1;	/* scaled slope of the 1st segment */
+	u64	ism1;	/* scaled inverse-slope of the 1st segment */
+	u64	dx;	/* the x-projection of the 1st segment */
+	u64	dy;	/* the y-projection of the 1st segment */
+	u64	sm2;	/* scaled slope of the 2nd segment */
+	u64	ism2;	/* scaled inverse-slope of the 2nd segment */
+};
+
+enum hfsc_class_flags
+{
+	HFSC_RSC = 0x1,
+	HFSC_FSC = 0x2,
+	HFSC_USC = 0x4
+};
+
+struct hfsc_class
+{
+	u32		classid;	/* class id */
+	unsigned int	refcnt;		/* usage count */
+
+	struct gnet_stats_basic bstats;
+	struct gnet_stats_queue qstats;
+	struct gnet_stats_rate_est rate_est;
+	spinlock_t	*stats_lock;
+	unsigned int	level;		/* class level in hierarchy */
+	struct tcf_proto *filter_list;	/* filter list */
+	unsigned int	filter_cnt;	/* filter count */
+
+	struct hfsc_sched *sched;	/* scheduler data */
+	struct hfsc_class *cl_parent;	/* parent class */
+	struct list_head siblings;	/* sibling classes */
+	struct list_head children;	/* child classes */
+	struct Qdisc	*qdisc;		/* leaf qdisc */
+
+	struct rb_node el_node;		/* qdisc's eligible tree member */
+	struct rb_root vt_tree;		/* active children sorted by cl_vt */
+	struct rb_node vt_node;		/* parent's vt_tree member */
+	struct rb_root cf_tree;		/* active children sorted by cl_f */
+	struct rb_node cf_node;		/* parent's cf_heap member */
+	struct list_head hlist;		/* hash list member */
+	struct list_head dlist;		/* drop list member */
+
+	u64	cl_total;		/* total work in bytes */
+	u64	cl_cumul;		/* cumulative work in bytes done by
+					   real-time criteria */
+
+	u64 	cl_d;			/* deadline*/
+	u64 	cl_e;			/* eligible time */
+	u64	cl_vt;			/* virtual time */
+	u64	cl_f;			/* time when this class will fit for
+					   link-sharing, max(myf, cfmin) */
+	u64	cl_myf;			/* my fit-time (calculated from this
+					   class's own upperlimit curve) */
+	u64	cl_myfadj;		/* my fit-time adjustment (to cancel
+					   history dependence) */
+	u64	cl_cfmin;		/* earliest children's fit-time (used
+					   with cl_myf to obtain cl_f) */
+	u64	cl_cvtmin;		/* minimal virtual time among the
+					   children fit for link-sharing
+					   (monotonic within a period) */
+	u64	cl_vtadj;		/* intra-period cumulative vt
+					   adjustment */
+	u64	cl_vtoff;		/* inter-period cumulative vt offset */
+	u64	cl_cvtmax;		/* max child's vt in the last period */
+	u64	cl_cvtoff;		/* cumulative cvtmax of all periods */
+	u64	cl_pcvtoff;		/* parent's cvtoff at initalization
+					   time */
+
+	struct internal_sc cl_rsc;	/* internal real-time service curve */
+	struct internal_sc cl_fsc;	/* internal fair service curve */
+	struct internal_sc cl_usc;	/* internal upperlimit service curve */
+	struct runtime_sc cl_deadline;	/* deadline curve */
+	struct runtime_sc cl_eligible;	/* eligible curve */
+	struct runtime_sc cl_virtual;	/* virtual curve */
+	struct runtime_sc cl_ulimit;	/* upperlimit curve */
+
+	unsigned long	cl_flags;	/* which curves are valid */
+	unsigned long	cl_vtperiod;	/* vt period sequence number */
+	unsigned long	cl_parentperiod;/* parent's vt period sequence number*/
+	unsigned long	cl_nactive;	/* number of active children */
+};
+
+#define HFSC_HSIZE	16
+
+struct hfsc_sched
+{
+	u16	defcls;				/* default class id */
+	struct hfsc_class root;			/* root class */
+	struct list_head clhash[HFSC_HSIZE];	/* class hash */
+	struct rb_root eligible;		/* eligible tree */
+	struct list_head droplist;		/* active leaf class list (for
+						   dropping) */
+	struct sk_buff_head requeue;		/* requeued packet */
+	struct timer_list wd_timer;		/* watchdog timer */
+};
+
+/*
+ * macros
+ */
+#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
+#include <linux/time.h>
+#undef PSCHED_GET_TIME
+#define PSCHED_GET_TIME(stamp)						\
+do {									\
+	struct timeval tv;						\
+	do_gettimeofday(&tv);						\
+	(stamp) = 1000000ULL * tv.tv_sec + tv.tv_usec;			\
+} while (0)
+#endif
+
+#if HFSC_DEBUG
+#define ASSERT(cond)							\
+do {									\
+	if (unlikely(!(cond)))						\
+		printk("assertion %s failed at %s:%i (%s)\n",		\
+		       #cond, __FILE__, __LINE__, __FUNCTION__);	\
+} while (0)
+#else
+#define ASSERT(cond)
+#endif /* HFSC_DEBUG */
+
+#define	HT_INFINITY	0xffffffffffffffffULL	/* infinite time value */
+
+
+/*
+ * eligible tree holds backlogged classes being sorted by their eligible times.
+ * there is one eligible tree per hfsc instance.
+ */
+
+static void
+eltree_insert(struct hfsc_class *cl)
+{
+	struct rb_node **p = &cl->sched->eligible.rb_node;
+	struct rb_node *parent = NULL;
+	struct hfsc_class *cl1;
+
+	while (*p != NULL) {
+		parent = *p;
+		cl1 = rb_entry(parent, struct hfsc_class, el_node);
+		if (cl->cl_e >= cl1->cl_e)
+			p = &parent->rb_right;
+		else
+			p = &parent->rb_left;
+	}
+	rb_link_node(&cl->el_node, parent, p);
+	rb_insert_color(&cl->el_node, &cl->sched->eligible);
+}
+
+static inline void
+eltree_remove(struct hfsc_class *cl)
+{
+	rb_erase(&cl->el_node, &cl->sched->eligible);
+}
+
+static inline void
+eltree_update(struct hfsc_class *cl)
+{
+	eltree_remove(cl);
+	eltree_insert(cl);
+}
+
+/* find the class with the minimum deadline among the eligible classes */
+static inline struct hfsc_class *
+eltree_get_mindl(struct hfsc_sched *q, u64 cur_time)
+{
+	struct hfsc_class *p, *cl = NULL;
+	struct rb_node *n;
+
+	for (n = rb_first(&q->eligible); n != NULL; n = rb_next(n)) {
+		p = rb_entry(n, struct hfsc_class, el_node);
+		if (p->cl_e > cur_time)
+			break;
+		if (cl == NULL || p->cl_d < cl->cl_d)
+			cl = p;
+	}
+	return cl;
+}
+
+/* find the class with minimum eligible time among the eligible classes */
+static inline struct hfsc_class *
+eltree_get_minel(struct hfsc_sched *q)
+{
+	struct rb_node *n;
+	
+	n = rb_first(&q->eligible);
+	if (n == NULL)
+		return NULL;
+	return rb_entry(n, struct hfsc_class, el_node);
+}
+
+/*
+ * vttree holds holds backlogged child classes being sorted by their virtual
+ * time. each intermediate class has one vttree.
+ */
+static void
+vttree_insert(struct hfsc_class *cl)
+{
+	struct rb_node **p = &cl->cl_parent->vt_tree.rb_node;
+	struct rb_node *parent = NULL;
+	struct hfsc_class *cl1;
+
+	while (*p != NULL) {
+		parent = *p;
+		cl1 = rb_entry(parent, struct hfsc_class, vt_node);
+		if (cl->cl_vt >= cl1->cl_vt)
+			p = &parent->rb_right;
+		else
+			p = &parent->rb_left;
+	}
+	rb_link_node(&cl->vt_node, parent, p);
+	rb_insert_color(&cl->vt_node, &cl->cl_parent->vt_tree);
+}
+
+static inline void
+vttree_remove(struct hfsc_class *cl)
+{
+	rb_erase(&cl->vt_node, &cl->cl_parent->vt_tree);
+}
+
+static inline void
+vttree_update(struct hfsc_class *cl)
+{
+	vttree_remove(cl);
+	vttree_insert(cl);
+}
+
+static inline struct hfsc_class *
+vttree_firstfit(struct hfsc_class *cl, u64 cur_time)
+{
+	struct hfsc_class *p;
+	struct rb_node *n;
+
+	for (n = rb_first(&cl->vt_tree); n != NULL; n = rb_next(n)) {
+		p = rb_entry(n, struct hfsc_class, vt_node);
+		if (p->cl_f <= cur_time)
+			return p;
+	}
+	return NULL;
+}
+
+/*
+ * get the leaf class with the minimum vt in the hierarchy
+ */
+static struct hfsc_class *
+vttree_get_minvt(struct hfsc_class *cl, u64 cur_time)
+{
+	/* if root-class's cfmin is bigger than cur_time nothing to do */
+	if (cl->cl_cfmin > cur_time)
+		return NULL;
+
+	while (cl->level > 0) {
+		cl = vttree_firstfit(cl, cur_time);
+		if (cl == NULL)
+			return NULL;
+		/*
+		 * update parent's cl_cvtmin.
+		 */
+		if (cl->cl_parent->cl_cvtmin < cl->cl_vt)
+			cl->cl_parent->cl_cvtmin = cl->cl_vt;
+	}
+	return cl;
+}
+
+static void
+cftree_insert(struct hfsc_class *cl)
+{
+	struct rb_node **p = &cl->cl_parent->cf_tree.rb_node;
+	struct rb_node *parent = NULL;
+	struct hfsc_class *cl1;
+
+	while (*p != NULL) {
+		parent = *p;
+		cl1 = rb_entry(parent, struct hfsc_class, cf_node);
+		if (cl->cl_f >= cl1->cl_f)
+			p = &parent->rb_right;
+		else
+			p = &parent->rb_left;
+	}
+	rb_link_node(&cl->cf_node, parent, p);
+	rb_insert_color(&cl->cf_node, &cl->cl_parent->cf_tree);
+}
+
+static inline void
+cftree_remove(struct hfsc_class *cl)
+{
+	rb_erase(&cl->cf_node, &cl->cl_parent->cf_tree);
+}
+
+static inline void
+cftree_update(struct hfsc_class *cl)
+{
+	cftree_remove(cl);
+	cftree_insert(cl);
+}
+
+/*
+ * service curve support functions
+ *
+ *  external service curve parameters
+ *	m: bps
+ *	d: us
+ *  internal service curve parameters
+ *	sm: (bytes/psched_us) << SM_SHIFT
+ *	ism: (psched_us/byte) << ISM_SHIFT
+ *	dx: psched_us
+ *
+ * Clock source resolution (CONFIG_NET_SCH_CLK_*)
+ *  JIFFIES: for 48<=HZ<=1534 resolution is between 0.63us and 1.27us.
+ *  CPU: resolution is between 0.5us and 1us.
+ *  GETTIMEOFDAY: resolution is exactly 1us.
+ *
+ * sm and ism are scaled in order to keep effective digits.
+ * SM_SHIFT and ISM_SHIFT are selected to keep at least 4 effective
+ * digits in decimal using the following table.
+ *
+ * Note: We can afford the additional accuracy (altq hfsc keeps at most
+ * 3 effective digits) thanks to the fact that linux clock is bounded
+ * much more tightly.
+ *
+ *  bits/sec      100Kbps     1Mbps     10Mbps     100Mbps    1Gbps
+ *  ------------+-------------------------------------------------------
+ *  bytes/0.5us   6.25e-3    62.5e-3    625e-3     6250e-e    62500e-3
+ *  bytes/us      12.5e-3    125e-3     1250e-3    12500e-3   125000e-3
+ *  bytes/1.27us  15.875e-3  158.75e-3  1587.5e-3  15875e-3   158750e-3
+ *
+ *  0.5us/byte    160        16         1.6        0.16       0.016
+ *  us/byte       80         8          0.8        0.08       0.008
+ *  1.27us/byte   63         6.3        0.63       0.063      0.0063
+ */
+#define	SM_SHIFT	20
+#define	ISM_SHIFT	18
+
+#define	SM_MASK		((1ULL << SM_SHIFT) - 1)
+#define	ISM_MASK	((1ULL << ISM_SHIFT) - 1)
+
+static inline u64
+seg_x2y(u64 x, u64 sm)
+{
+	u64 y;
+
+	/*
+	 * compute
+	 *	y = x * sm >> SM_SHIFT
+	 * but divide it for the upper and lower bits to avoid overflow
+	 */
+	y = (x >> SM_SHIFT) * sm + (((x & SM_MASK) * sm) >> SM_SHIFT);
+	return y;
+}
+
+static inline u64
+seg_y2x(u64 y, u64 ism)
+{
+	u64 x;
+
+	if (y == 0)
+		x = 0;
+	else if (ism == HT_INFINITY)
+		x = HT_INFINITY;
+	else {
+		x = (y >> ISM_SHIFT) * ism
+		    + (((y & ISM_MASK) * ism) >> ISM_SHIFT);
+	}
+	return x;
+}
+
+/* Convert m (bps) into sm (bytes/psched us) */
+static u64
+m2sm(u32 m)
+{
+	u64 sm;
+
+	sm = ((u64)m << SM_SHIFT);
+	sm += PSCHED_JIFFIE2US(HZ) - 1;
+	do_div(sm, PSCHED_JIFFIE2US(HZ));
+	return sm;
+}
+
+/* convert m (bps) into ism (psched us/byte) */
+static u64
+m2ism(u32 m)
+{
+	u64 ism;
+
+	if (m == 0)
+		ism = HT_INFINITY;
+	else {
+		ism = ((u64)PSCHED_JIFFIE2US(HZ) << ISM_SHIFT);
+		ism += m - 1;
+		do_div(ism, m);
+	}
+	return ism;
+}
+
+/* convert d (us) into dx (psched us) */
+static u64
+d2dx(u32 d)
+{
+	u64 dx;
+
+	dx = ((u64)d * PSCHED_JIFFIE2US(HZ));
+	dx += 1000000 - 1;
+	do_div(dx, 1000000);
+	return dx;
+}
+
+/* convert sm (bytes/psched us) into m (bps) */
+static u32
+sm2m(u64 sm)
+{
+	u64 m;
+
+	m = (sm * PSCHED_JIFFIE2US(HZ)) >> SM_SHIFT;
+	return (u32)m;
+}
+
+/* convert dx (psched us) into d (us) */
+static u32
+dx2d(u64 dx)
+{
+	u64 d;
+
+	d = dx * 1000000;
+	do_div(d, PSCHED_JIFFIE2US(HZ));
+	return (u32)d;
+}
+
+static void
+sc2isc(struct tc_service_curve *sc, struct internal_sc *isc)
+{
+	isc->sm1  = m2sm(sc->m1);
+	isc->ism1 = m2ism(sc->m1);
+	isc->dx   = d2dx(sc->d);
+	isc->dy   = seg_x2y(isc->dx, isc->sm1);
+	isc->sm2  = m2sm(sc->m2);
+	isc->ism2 = m2ism(sc->m2);
+}
+
+/*
+ * initialize the runtime service curve with the given internal
+ * service curve starting at (x, y).
+ */
+static void
+rtsc_init(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y)
+{
+	rtsc->x	   = x;
+	rtsc->y    = y;
+	rtsc->sm1  = isc->sm1;
+	rtsc->ism1 = isc->ism1;
+	rtsc->dx   = isc->dx;
+	rtsc->dy   = isc->dy;
+	rtsc->sm2  = isc->sm2;
+	rtsc->ism2 = isc->ism2;
+}
+
+/*
+ * calculate the y-projection of the runtime service curve by the
+ * given x-projection value
+ */
+static u64
+rtsc_y2x(struct runtime_sc *rtsc, u64 y)
+{
+	u64 x;
+
+	if (y < rtsc->y)
+		x = rtsc->x;
+	else if (y <= rtsc->y + rtsc->dy) {
+		/* x belongs to the 1st segment */
+		if (rtsc->dy == 0)
+			x = rtsc->x + rtsc->dx;
+		else
+			x = rtsc->x + seg_y2x(y - rtsc->y, rtsc->ism1);
+	} else {
+		/* x belongs to the 2nd segment */
+		x = rtsc->x + rtsc->dx
+		    + seg_y2x(y - rtsc->y - rtsc->dy, rtsc->ism2);
+	}
+	return x;
+}
+
+static u64
+rtsc_x2y(struct runtime_sc *rtsc, u64 x)
+{
+	u64 y;
+
+	if (x <= rtsc->x)
+		y = rtsc->y;
+	else if (x <= rtsc->x + rtsc->dx)
+		/* y belongs to the 1st segment */
+		y = rtsc->y + seg_x2y(x - rtsc->x, rtsc->sm1);
+	else
+		/* y belongs to the 2nd segment */
+		y = rtsc->y + rtsc->dy
+		    + seg_x2y(x - rtsc->x - rtsc->dx, rtsc->sm2);
+	return y;
+}
+
+/*
+ * update the runtime service curve by taking the minimum of the current
+ * runtime service curve and the service curve starting at (x, y).
+ */
+static void
+rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y)
+{
+	u64 y1, y2, dx, dy;
+	u32 dsm;
+
+	if (isc->sm1 <= isc->sm2) {
+		/* service curve is convex */
+		y1 = rtsc_x2y(rtsc, x);
+		if (y1 < y)
+			/* the current rtsc is smaller */
+			return;
+		rtsc->x = x;
+		rtsc->y = y;
+		return;
+	}
+
+	/*
+	 * service curve is concave
+	 * compute the two y values of the current rtsc
+	 *	y1: at x
+	 *	y2: at (x + dx)
+	 */
+	y1 = rtsc_x2y(rtsc, x);
+	if (y1 <= y) {
+		/* rtsc is below isc, no change to rtsc */
+		return;
+	}
+
+	y2 = rtsc_x2y(rtsc, x + isc->dx);
+	if (y2 >= y + isc->dy) {
+		/* rtsc is above isc, replace rtsc by isc */
+		rtsc->x = x;
+		rtsc->y = y;
+		rtsc->dx = isc->dx;
+		rtsc->dy = isc->dy;
+		return;
+	}
+
+	/*
+	 * the two curves intersect
+	 * compute the offsets (dx, dy) using the reverse
+	 * function of seg_x2y()
+	 *	seg_x2y(dx, sm1) == seg_x2y(dx, sm2) + (y1 - y)
+	 */
+	dx = (y1 - y) << SM_SHIFT;
+	dsm = isc->sm1 - isc->sm2;
+	do_div(dx, dsm);
+	/*
+	 * check if (x, y1) belongs to the 1st segment of rtsc.
+	 * if so, add the offset.
+	 */
+	if (rtsc->x + rtsc->dx > x)
+		dx += rtsc->x + rtsc->dx - x;
+	dy = seg_x2y(dx, isc->sm1);
+
+	rtsc->x = x;
+	rtsc->y = y;
+	rtsc->dx = dx;
+	rtsc->dy = dy;
+	return;
+}
+
+static void
+init_ed(struct hfsc_class *cl, unsigned int next_len)
+{
+	u64 cur_time;
+
+	PSCHED_GET_TIME(cur_time);
+
+	/* update the deadline curve */
+	rtsc_min(&cl->cl_deadline, &cl->cl_rsc, cur_time, cl->cl_cumul);
+
+	/*
+	 * update the eligible curve.
+	 * for concave, it is equal to the deadline curve.
+	 * for convex, it is a linear curve with slope m2.
+	 */
+	cl->cl_eligible = cl->cl_deadline;
+	if (cl->cl_rsc.sm1 <= cl->cl_rsc.sm2) {
+		cl->cl_eligible.dx = 0;
+		cl->cl_eligible.dy = 0;
+	}
+
+	/* compute e and d */
+	cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
+	cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
+
+	eltree_insert(cl);
+}
+
+static void
+update_ed(struct hfsc_class *cl, unsigned int next_len)
+{
+	cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
+	cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
+
+	eltree_update(cl);
+}
+
+static inline void
+update_d(struct hfsc_class *cl, unsigned int next_len)
+{
+	cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
+}
+
+static inline void
+update_cfmin(struct hfsc_class *cl)
+{
+	struct rb_node *n = rb_first(&cl->cf_tree);
+	struct hfsc_class *p;
+
+	if (n == NULL) {
+		cl->cl_cfmin = 0;
+		return;
+	}
+	p = rb_entry(n, struct hfsc_class, cf_node);
+	cl->cl_cfmin = p->cl_f;
+}
+
+static void
+init_vf(struct hfsc_class *cl, unsigned int len)
+{
+	struct hfsc_class *max_cl;
+	struct rb_node *n;
+	u64 vt, f, cur_time;
+	int go_active;
+
+	cur_time = 0;
+	go_active = 1;
+	for (; cl->cl_parent != NULL; cl = cl->cl_parent) {
+		if (go_active && cl->cl_nactive++ == 0)
+			go_active = 1;
+		else
+			go_active = 0;
+
+		if (go_active) {
+			n = rb_last(&cl->cl_parent->vt_tree);
+			if (n != NULL) {
+				max_cl = rb_entry(n, struct hfsc_class,vt_node);
+				/*
+				 * set vt to the average of the min and max
+				 * classes.  if the parent's period didn't
+				 * change, don't decrease vt of the class.
+				 */
+				vt = max_cl->cl_vt;
+				if (cl->cl_parent->cl_cvtmin != 0)
+					vt = (cl->cl_parent->cl_cvtmin + vt)/2;
+
+				if (cl->cl_parent->cl_vtperiod !=
+				    cl->cl_parentperiod || vt > cl->cl_vt)
+					cl->cl_vt = vt;
+			} else {
+				/*
+				 * first child for a new parent backlog period.
+				 * add parent's cvtmax to cvtoff to make a new
+				 * vt (vtoff + vt) larger than the vt in the
+				 * last period for all children.
+				 */
+				vt = cl->cl_parent->cl_cvtmax;
+				cl->cl_parent->cl_cvtoff += vt;
+				cl->cl_parent->cl_cvtmax = 0;
+				cl->cl_parent->cl_cvtmin = 0;
+				cl->cl_vt = 0;
+			}
+
+			cl->cl_vtoff = cl->cl_parent->cl_cvtoff -
+							cl->cl_pcvtoff;
+
+			/* update the virtual curve */
+			vt = cl->cl_vt + cl->cl_vtoff;
+			rtsc_min(&cl->cl_virtual, &cl->cl_fsc, vt,
+			                              cl->cl_total);
+			if (cl->cl_virtual.x == vt) {
+				cl->cl_virtual.x -= cl->cl_vtoff;
+				cl->cl_vtoff = 0;
+			}
+			cl->cl_vtadj = 0;
+
+			cl->cl_vtperiod++;  /* increment vt period */
+			cl->cl_parentperiod = cl->cl_parent->cl_vtperiod;
+			if (cl->cl_parent->cl_nactive == 0)
+				cl->cl_parentperiod++;
+			cl->cl_f = 0;
+
+			vttree_insert(cl);
+			cftree_insert(cl);
+
+			if (cl->cl_flags & HFSC_USC) {
+				/* class has upper limit curve */
+				if (cur_time == 0)
+					PSCHED_GET_TIME(cur_time);
+
+				/* update the ulimit curve */
+				rtsc_min(&cl->cl_ulimit, &cl->cl_usc, cur_time,
+				         cl->cl_total);
+				/* compute myf */
+				cl->cl_myf = rtsc_y2x(&cl->cl_ulimit,
+				                      cl->cl_total);
+				cl->cl_myfadj = 0;
+			}
+		}
+
+		f = max(cl->cl_myf, cl->cl_cfmin);
+		if (f != cl->cl_f) {
+			cl->cl_f = f;
+			cftree_update(cl);
+			update_cfmin(cl->cl_parent);
+		}
+	}
+}
+
+static void
+update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time)
+{
+	u64 f; /* , myf_bound, delta; */
+	int go_passive = 0;
+
+	if (cl->qdisc->q.qlen == 0 && cl->cl_flags & HFSC_FSC)
+		go_passive = 1;
+
+	for (; cl->cl_parent != NULL; cl = cl->cl_parent) {
+		cl->cl_total += len;
+
+		if (!(cl->cl_flags & HFSC_FSC) || cl->cl_nactive == 0)
+			continue;
+
+		if (go_passive && --cl->cl_nactive == 0)
+			go_passive = 1;
+		else
+			go_passive = 0;
+
+		if (go_passive) {
+			/* no more active child, going passive */
+
+			/* update cvtmax of the parent class */
+			if (cl->cl_vt > cl->cl_parent->cl_cvtmax)
+				cl->cl_parent->cl_cvtmax = cl->cl_vt;
+
+			/* remove this class from the vt tree */
+			vttree_remove(cl);
+
+			cftree_remove(cl);
+			update_cfmin(cl->cl_parent);
+
+			continue;
+		}
+
+		/*
+		 * update vt and f
+		 */
+		cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total)
+		            - cl->cl_vtoff + cl->cl_vtadj;
+
+		/*
+		 * if vt of the class is smaller than cvtmin,
+		 * the class was skipped in the past due to non-fit.
+		 * if so, we need to adjust vtadj.
+		 */
+		if (cl->cl_vt < cl->cl_parent->cl_cvtmin) {
+			cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt;
+			cl->cl_vt = cl->cl_parent->cl_cvtmin;
+		}
+
+		/* update the vt tree */
+		vttree_update(cl);
+
+		if (cl->cl_flags & HFSC_USC) {
+			cl->cl_myf = cl->cl_myfadj + rtsc_y2x(&cl->cl_ulimit,
+			                                      cl->cl_total);
+#if 0
+			/*
+			 * This code causes classes to stay way under their
+			 * limit when multiple classes are used at gigabit
+			 * speed. needs investigation. -kaber
+			 */
+			/*
+			 * if myf lags behind by more than one clock tick
+			 * from the current time, adjust myfadj to prevent
+			 * a rate-limited class from going greedy.
+			 * in a steady state under rate-limiting, myf
+			 * fluctuates within one clock tick.
+			 */
+			myf_bound = cur_time - PSCHED_JIFFIE2US(1);
+			if (cl->cl_myf < myf_bound) {
+				delta = cur_time - cl->cl_myf;
+				cl->cl_myfadj += delta;
+				cl->cl_myf += delta;
+			}
+#endif
+		}
+
+		f = max(cl->cl_myf, cl->cl_cfmin);
+		if (f != cl->cl_f) {
+			cl->cl_f = f;
+			cftree_update(cl);
+			update_cfmin(cl->cl_parent);
+		}
+	}
+}
+
+static void
+set_active(struct hfsc_class *cl, unsigned int len)
+{
+	if (cl->cl_flags & HFSC_RSC)
+		init_ed(cl, len);
+	if (cl->cl_flags & HFSC_FSC)
+		init_vf(cl, len);
+
+	list_add_tail(&cl->dlist, &cl->sched->droplist);
+}
+
+static void
+set_passive(struct hfsc_class *cl)
+{
+	if (cl->cl_flags & HFSC_RSC)
+		eltree_remove(cl);
+
+	list_del(&cl->dlist);
+
+	/*
+	 * vttree is now handled in update_vf() so that update_vf(cl, 0, 0)
+	 * needs to be called explicitly to remove a class from vttree.
+	 */
+}
+
+/*
+ * hack to get length of first packet in queue.
+ */
+static unsigned int
+qdisc_peek_len(struct Qdisc *sch)
+{
+	struct sk_buff *skb;
+	unsigned int len;
+
+	skb = sch->dequeue(sch);
+	if (skb == NULL) {
+		if (net_ratelimit())
+			printk("qdisc_peek_len: non work-conserving qdisc ?\n");
+		return 0;
+	}
+	len = skb->len;
+	if (unlikely(sch->ops->requeue(skb, sch) != NET_XMIT_SUCCESS)) {
+		if (net_ratelimit())
+			printk("qdisc_peek_len: failed to requeue\n");
+		return 0;
+	}
+	return len;
+}
+
+static void
+hfsc_purge_queue(struct Qdisc *sch, struct hfsc_class *cl)
+{
+	unsigned int len = cl->qdisc->q.qlen;
+
+	qdisc_reset(cl->qdisc);
+	if (len > 0) {
+		update_vf(cl, 0, 0);
+		set_passive(cl);
+		sch->q.qlen -= len;
+	}
+}
+
+static void
+hfsc_adjust_levels(struct hfsc_class *cl)
+{
+	struct hfsc_class *p;
+	unsigned int level;
+
+	do {
+		level = 0;
+		list_for_each_entry(p, &cl->children, siblings) {
+			if (p->level > level)
+				level = p->level;
+		}
+		cl->level = level + 1;
+	} while ((cl = cl->cl_parent) != NULL);
+}
+
+static inline unsigned int
+hfsc_hash(u32 h)
+{
+	h ^= h >> 8;
+	h ^= h >> 4;
+
+	return h & (HFSC_HSIZE - 1);
+}
+
+static inline struct hfsc_class *
+hfsc_find_class(u32 classid, struct Qdisc *sch)
+{
+	struct hfsc_sched *q = qdisc_priv(sch);
+	struct hfsc_class *cl;
+
+	list_for_each_entry(cl, &q->clhash[hfsc_hash(classid)], hlist) {
+		if (cl->classid == classid)
+			return cl;
+	}
+	return NULL;
+}
+
+static void
+hfsc_change_rsc(struct hfsc_class *cl, struct tc_service_curve *rsc,
+                u64 cur_time)
+{
+	sc2isc(rsc, &cl->cl_rsc);
+	rtsc_init(&cl->cl_deadline, &cl->cl_rsc, cur_time, cl->cl_cumul);
+	cl->cl_eligible = cl->cl_deadline;
+	if (cl->cl_rsc.sm1 <= cl->cl_rsc.sm2) {
+		cl->cl_eligible.dx = 0;
+		cl->cl_eligible.dy = 0;
+	}
+	cl->cl_flags |= HFSC_RSC;
+}
+
+static void
+hfsc_change_fsc(struct hfsc_class *cl, struct tc_service_curve *fsc)
+{
+	sc2isc(fsc, &cl->cl_fsc);
+	rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total);
+	cl->cl_flags |= HFSC_FSC;
+}
+
+static void
+hfsc_change_usc(struct hfsc_class *cl, struct tc_service_curve *usc,
+                u64 cur_time)
+{
+	sc2isc(usc, &cl->cl_usc);
+	rtsc_init(&cl->cl_ulimit, &cl->cl_usc, cur_time, cl->cl_total);
+	cl->cl_flags |= HFSC_USC;
+}
+
+static int
+hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
+                  struct rtattr **tca, unsigned long *arg)
+{
+	struct hfsc_sched *q = qdisc_priv(sch);
+	struct hfsc_class *cl = (struct hfsc_class *)*arg;
+	struct hfsc_class *parent = NULL;
+	struct rtattr *opt = tca[TCA_OPTIONS-1];
+	struct rtattr *tb[TCA_HFSC_MAX];
+	struct tc_service_curve *rsc = NULL, *fsc = NULL, *usc = NULL;
+	u64 cur_time;
+
+	if (opt == NULL || rtattr_parse_nested(tb, TCA_HFSC_MAX, opt))
+		return -EINVAL;
+
+	if (tb[TCA_HFSC_RSC-1]) {
+		if (RTA_PAYLOAD(tb[TCA_HFSC_RSC-1]) < sizeof(*rsc))
+			return -EINVAL;
+		rsc = RTA_DATA(tb[TCA_HFSC_RSC-1]);
+		if (rsc->m1 == 0 && rsc->m2 == 0)
+			rsc = NULL;
+	}
+
+	if (tb[TCA_HFSC_FSC-1]) {
+		if (RTA_PAYLOAD(tb[TCA_HFSC_FSC-1]) < sizeof(*fsc))
+			return -EINVAL;
+		fsc = RTA_DATA(tb[TCA_HFSC_FSC-1]);
+		if (fsc->m1 == 0 && fsc->m2 == 0)
+			fsc = NULL;
+	}
+
+	if (tb[TCA_HFSC_USC-1]) {
+		if (RTA_PAYLOAD(tb[TCA_HFSC_USC-1]) < sizeof(*usc))
+			return -EINVAL;
+		usc = RTA_DATA(tb[TCA_HFSC_USC-1]);
+		if (usc->m1 == 0 && usc->m2 == 0)
+			usc = NULL;
+	}
+
+	if (cl != NULL) {
+		if (parentid) {
+			if (cl->cl_parent && cl->cl_parent->classid != parentid)
+				return -EINVAL;
+			if (cl->cl_parent == NULL && parentid != TC_H_ROOT)
+				return -EINVAL;
+		}
+		PSCHED_GET_TIME(cur_time);
+
+		sch_tree_lock(sch);
+		if (rsc != NULL)
+			hfsc_change_rsc(cl, rsc, cur_time);
+		if (fsc != NULL)
+			hfsc_change_fsc(cl, fsc);
+		if (usc != NULL)
+			hfsc_change_usc(cl, usc, cur_time);
+
+		if (cl->qdisc->q.qlen != 0) {
+			if (cl->cl_flags & HFSC_RSC)
+				update_ed(cl, qdisc_peek_len(cl->qdisc));
+			if (cl->cl_flags & HFSC_FSC)
+				update_vf(cl, 0, cur_time);
+		}
+		sch_tree_unlock(sch);
+
+#ifdef CONFIG_NET_ESTIMATOR
+		if (tca[TCA_RATE-1])
+			gen_replace_estimator(&cl->bstats, &cl->rate_est,
+				cl->stats_lock, tca[TCA_RATE-1]);
+#endif
+		return 0;
+	}
+
+	if (parentid == TC_H_ROOT)
+		return -EEXIST;
+
+	parent = &q->root;
+	if (parentid) {
+		parent = hfsc_find_class(parentid, sch);
+		if (parent == NULL)
+			return -ENOENT;
+	}
+
+	if (classid == 0 || TC_H_MAJ(classid ^ sch->handle) != 0)
+		return -EINVAL;
+	if (hfsc_find_class(classid, sch))
+		return -EEXIST;
+
+	if (rsc == NULL && fsc == NULL)
+		return -EINVAL;
+
+	cl = kmalloc(sizeof(struct hfsc_class), GFP_KERNEL);
+	if (cl == NULL)
+		return -ENOBUFS;
+	memset(cl, 0, sizeof(struct hfsc_class));
+
+	if (rsc != NULL)
+		hfsc_change_rsc(cl, rsc, 0);
+	if (fsc != NULL)
+		hfsc_change_fsc(cl, fsc);
+	if (usc != NULL)
+		hfsc_change_usc(cl, usc, 0);
+
+	cl->refcnt    = 1;
+	cl->classid   = classid;
+	cl->sched     = q;
+	cl->cl_parent = parent;
+	cl->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
+	if (cl->qdisc == NULL)
+		cl->qdisc = &noop_qdisc;
+	cl->stats_lock = &sch->dev->queue_lock;
+	INIT_LIST_HEAD(&cl->children);
+	cl->vt_tree = RB_ROOT;
+	cl->cf_tree = RB_ROOT;
+
+	sch_tree_lock(sch);
+	list_add_tail(&cl->hlist, &q->clhash[hfsc_hash(classid)]);
+	list_add_tail(&cl->siblings, &parent->children);
+	if (parent->level == 0)
+		hfsc_purge_queue(sch, parent);
+	hfsc_adjust_levels(parent);
+	cl->cl_pcvtoff = parent->cl_cvtoff;
+	sch_tree_unlock(sch);
+
+#ifdef CONFIG_NET_ESTIMATOR
+	if (tca[TCA_RATE-1])
+		gen_new_estimator(&cl->bstats, &cl->rate_est,
+			cl->stats_lock, tca[TCA_RATE-1]);
+#endif
+	*arg = (unsigned long)cl;
+	return 0;
+}
+
+static void
+hfsc_destroy_filters(struct tcf_proto **fl)
+{
+	struct tcf_proto *tp;
+
+	while ((tp = *fl) != NULL) {
+		*fl = tp->next;
+		tcf_destroy(tp);
+	}
+}
+
+static void
+hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl)
+{
+	struct hfsc_sched *q = qdisc_priv(sch);
+
+	hfsc_destroy_filters(&cl->filter_list);
+	qdisc_destroy(cl->qdisc);
+#ifdef CONFIG_NET_ESTIMATOR
+	gen_kill_estimator(&cl->bstats, &cl->rate_est);
+#endif
+	if (cl != &q->root)
+		kfree(cl);
+}
+
+static int
+hfsc_delete_class(struct Qdisc *sch, unsigned long arg)
+{
+	struct hfsc_sched *q = qdisc_priv(sch);
+	struct hfsc_class *cl = (struct hfsc_class *)arg;
+
+	if (cl->level > 0 || cl->filter_cnt > 0 || cl == &q->root)
+		return -EBUSY;
+
+	sch_tree_lock(sch);
+
+	list_del(&cl->hlist);
+	list_del(&cl->siblings);
+	hfsc_adjust_levels(cl->cl_parent);
+	hfsc_purge_queue(sch, cl);
+	if (--cl->refcnt == 0)
+		hfsc_destroy_class(sch, cl);
+
+	sch_tree_unlock(sch);
+	return 0;
+}
+
+static struct hfsc_class *
+hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
+{
+	struct hfsc_sched *q = qdisc_priv(sch);
+	struct hfsc_class *cl;
+	struct tcf_result res;
+	struct tcf_proto *tcf;
+	int result;
+
+	if (TC_H_MAJ(skb->priority ^ sch->handle) == 0 &&
+	    (cl = hfsc_find_class(skb->priority, sch)) != NULL)
+		if (cl->level == 0)
+			return cl;
+
+	*qerr = NET_XMIT_DROP;
+	tcf = q->root.filter_list;
+	while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
+#ifdef CONFIG_NET_CLS_ACT
+		switch (result) {
+		case TC_ACT_QUEUED:
+		case TC_ACT_STOLEN: 
+			*qerr = NET_XMIT_SUCCESS;
+		case TC_ACT_SHOT: 
+			return NULL;
+		}
+#elif defined(CONFIG_NET_CLS_POLICE)
+		if (result == TC_POLICE_SHOT)
+			return NULL;
+#endif
+		if ((cl = (struct hfsc_class *)res.class) == NULL) {
+			if ((cl = hfsc_find_class(res.classid, sch)) == NULL)
+				break; /* filter selected invalid classid */
+		}
+
+		if (cl->level == 0)
+			return cl; /* hit leaf class */
+
+		/* apply inner filter chain */
+		tcf = cl->filter_list;
+	}
+
+	/* classification failed, try default class */
+	cl = hfsc_find_class(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
+	if (cl == NULL || cl->level > 0)
+		return NULL;
+
+	return cl;
+}
+
+static int
+hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
+                 struct Qdisc **old)
+{
+	struct hfsc_class *cl = (struct hfsc_class *)arg;
+
+	if (cl == NULL)
+		return -ENOENT;
+	if (cl->level > 0)
+		return -EINVAL;
+	if (new == NULL) {
+		new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
+		if (new == NULL)
+			new = &noop_qdisc;
+	}
+
+	sch_tree_lock(sch);
+	hfsc_purge_queue(sch, cl);
+	*old = xchg(&cl->qdisc, new);
+	sch_tree_unlock(sch);
+	return 0;
+}
+
+static struct Qdisc *
+hfsc_class_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	struct hfsc_class *cl = (struct hfsc_class *)arg;
+
+	if (cl != NULL && cl->level == 0)
+		return cl->qdisc;
+
+	return NULL;
+}
+
+static unsigned long
+hfsc_get_class(struct Qdisc *sch, u32 classid)
+{
+	struct hfsc_class *cl = hfsc_find_class(classid, sch);
+
+	if (cl != NULL)
+		cl->refcnt++;
+
+	return (unsigned long)cl;
+}
+
+static void
+hfsc_put_class(struct Qdisc *sch, unsigned long arg)
+{
+	struct hfsc_class *cl = (struct hfsc_class *)arg;
+
+	if (--cl->refcnt == 0)
+		hfsc_destroy_class(sch, cl);
+}
+
+static unsigned long
+hfsc_bind_tcf(struct Qdisc *sch, unsigned long parent, u32 classid)
+{
+	struct hfsc_class *p = (struct hfsc_class *)parent;
+	struct hfsc_class *cl = hfsc_find_class(classid, sch);
+
+	if (cl != NULL) {
+		if (p != NULL && p->level <= cl->level)
+			return 0;
+		cl->filter_cnt++;
+	}
+
+	return (unsigned long)cl;
+}
+
+static void
+hfsc_unbind_tcf(struct Qdisc *sch, unsigned long arg)
+{
+	struct hfsc_class *cl = (struct hfsc_class *)arg;
+
+	cl->filter_cnt--;
+}
+
+static struct tcf_proto **
+hfsc_tcf_chain(struct Qdisc *sch, unsigned long arg)
+{
+	struct hfsc_sched *q = qdisc_priv(sch);
+	struct hfsc_class *cl = (struct hfsc_class *)arg;
+
+	if (cl == NULL)
+		cl = &q->root;
+
+	return &cl->filter_list;
+}
+
+static int
+hfsc_dump_sc(struct sk_buff *skb, int attr, struct internal_sc *sc)
+{
+	struct tc_service_curve tsc;
+
+	tsc.m1 = sm2m(sc->sm1);
+	tsc.d  = dx2d(sc->dx);
+	tsc.m2 = sm2m(sc->sm2);
+	RTA_PUT(skb, attr, sizeof(tsc), &tsc);
+
+	return skb->len;
+
+ rtattr_failure:
+	return -1;
+}
+
+static inline int
+hfsc_dump_curves(struct sk_buff *skb, struct hfsc_class *cl)
+{
+	if ((cl->cl_flags & HFSC_RSC) &&
+	    (hfsc_dump_sc(skb, TCA_HFSC_RSC, &cl->cl_rsc) < 0))
+		goto rtattr_failure;
+
+	if ((cl->cl_flags & HFSC_FSC) &&
+	    (hfsc_dump_sc(skb, TCA_HFSC_FSC, &cl->cl_fsc) < 0))
+		goto rtattr_failure;
+
+	if ((cl->cl_flags & HFSC_USC) &&
+	    (hfsc_dump_sc(skb, TCA_HFSC_USC, &cl->cl_usc) < 0))
+		goto rtattr_failure;
+
+	return skb->len;
+
+ rtattr_failure:
+	return -1;
+}
+
+static int
+hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb,
+                struct tcmsg *tcm)
+{
+	struct hfsc_class *cl = (struct hfsc_class *)arg;
+	unsigned char *b = skb->tail;
+	struct rtattr *rta = (struct rtattr *)b;
+
+	tcm->tcm_parent = cl->cl_parent ? cl->cl_parent->classid : TC_H_ROOT;
+	tcm->tcm_handle = cl->classid;
+	if (cl->level == 0)
+		tcm->tcm_info = cl->qdisc->handle;
+
+	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	if (hfsc_dump_curves(skb, cl) < 0)
+		goto rtattr_failure;
+	rta->rta_len = skb->tail - b;
+	return skb->len;
+
+ rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int
+hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
+	struct gnet_dump *d)
+{
+	struct hfsc_class *cl = (struct hfsc_class *)arg;
+	struct tc_hfsc_stats xstats;
+
+	cl->qstats.qlen = cl->qdisc->q.qlen;
+	xstats.level   = cl->level;
+	xstats.period  = cl->cl_vtperiod;
+	xstats.work    = cl->cl_total;
+	xstats.rtwork  = cl->cl_cumul;
+
+	if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
+#ifdef CONFIG_NET_ESTIMATOR
+	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
+#endif
+	    gnet_stats_copy_queue(d, &cl->qstats) < 0)
+		return -1;
+
+	return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
+}
+
+
+
+static void
+hfsc_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	struct hfsc_sched *q = qdisc_priv(sch);
+	struct hfsc_class *cl;
+	unsigned int i;
+
+	if (arg->stop)
+		return;
+
+	for (i = 0; i < HFSC_HSIZE; i++) {
+		list_for_each_entry(cl, &q->clhash[i], hlist) {
+			if (arg->count < arg->skip) {
+				arg->count++;
+				continue;
+			}
+			if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
+				arg->stop = 1;
+				return;
+			}
+			arg->count++;
+		}
+	}
+}
+
+static void
+hfsc_watchdog(unsigned long arg)
+{
+	struct Qdisc *sch = (struct Qdisc *)arg;
+
+	sch->flags &= ~TCQ_F_THROTTLED;
+	netif_schedule(sch->dev);
+}
+
+static void
+hfsc_schedule_watchdog(struct Qdisc *sch, u64 cur_time)
+{
+	struct hfsc_sched *q = qdisc_priv(sch);
+	struct hfsc_class *cl;
+	u64 next_time = 0;
+	long delay;
+
+	if ((cl = eltree_get_minel(q)) != NULL)
+		next_time = cl->cl_e;
+	if (q->root.cl_cfmin != 0) {
+		if (next_time == 0 || next_time > q->root.cl_cfmin)
+			next_time = q->root.cl_cfmin;
+	}
+	ASSERT(next_time != 0);
+	delay = next_time - cur_time;
+	delay = PSCHED_US2JIFFIE(delay);
+
+	sch->flags |= TCQ_F_THROTTLED;
+	mod_timer(&q->wd_timer, jiffies + delay);
+}
+
+static int
+hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt)
+{
+	struct hfsc_sched *q = qdisc_priv(sch);
+	struct tc_hfsc_qopt *qopt;
+	unsigned int i;
+
+	if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt))
+		return -EINVAL;
+	qopt = RTA_DATA(opt);
+
+	sch->stats_lock = &sch->dev->queue_lock;
+
+	q->defcls = qopt->defcls;
+	for (i = 0; i < HFSC_HSIZE; i++)
+		INIT_LIST_HEAD(&q->clhash[i]);
+	q->eligible = RB_ROOT;
+	INIT_LIST_HEAD(&q->droplist);
+	skb_queue_head_init(&q->requeue);
+
+	q->root.refcnt  = 1;
+	q->root.classid = sch->handle;
+	q->root.sched   = q;
+	q->root.qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
+	if (q->root.qdisc == NULL)
+		q->root.qdisc = &noop_qdisc;
+	q->root.stats_lock = &sch->dev->queue_lock;
+	INIT_LIST_HEAD(&q->root.children);
+	q->root.vt_tree = RB_ROOT;
+	q->root.cf_tree = RB_ROOT;
+
+	list_add(&q->root.hlist, &q->clhash[hfsc_hash(q->root.classid)]);
+
+	init_timer(&q->wd_timer);
+	q->wd_timer.function = hfsc_watchdog;
+	q->wd_timer.data = (unsigned long)sch;
+
+	return 0;
+}
+
+static int
+hfsc_change_qdisc(struct Qdisc *sch, struct rtattr *opt)
+{
+	struct hfsc_sched *q = qdisc_priv(sch);
+	struct tc_hfsc_qopt *qopt;
+
+	if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt))
+		return -EINVAL;
+	qopt = RTA_DATA(opt);
+
+	sch_tree_lock(sch);
+	q->defcls = qopt->defcls;
+	sch_tree_unlock(sch);
+
+	return 0;
+}
+
+static void
+hfsc_reset_class(struct hfsc_class *cl)
+{
+	cl->cl_total        = 0;
+	cl->cl_cumul        = 0;
+	cl->cl_d            = 0;
+	cl->cl_e            = 0;
+	cl->cl_vt           = 0;
+	cl->cl_vtadj        = 0;
+	cl->cl_vtoff        = 0;
+	cl->cl_cvtmin       = 0;
+	cl->cl_cvtmax       = 0;
+	cl->cl_cvtoff       = 0;
+	cl->cl_pcvtoff      = 0;
+	cl->cl_vtperiod     = 0;
+	cl->cl_parentperiod = 0;
+	cl->cl_f            = 0;
+	cl->cl_myf          = 0;
+	cl->cl_myfadj       = 0;
+	cl->cl_cfmin        = 0;
+	cl->cl_nactive      = 0;
+
+	cl->vt_tree = RB_ROOT;
+	cl->cf_tree = RB_ROOT;
+	qdisc_reset(cl->qdisc);
+
+	if (cl->cl_flags & HFSC_RSC)
+		rtsc_init(&cl->cl_deadline, &cl->cl_rsc, 0, 0);
+	if (cl->cl_flags & HFSC_FSC)
+		rtsc_init(&cl->cl_virtual, &cl->cl_fsc, 0, 0);
+	if (cl->cl_flags & HFSC_USC)
+		rtsc_init(&cl->cl_ulimit, &cl->cl_usc, 0, 0);
+}
+
+static void
+hfsc_reset_qdisc(struct Qdisc *sch)
+{
+	struct hfsc_sched *q = qdisc_priv(sch);
+	struct hfsc_class *cl;
+	unsigned int i;
+
+	for (i = 0; i < HFSC_HSIZE; i++) {
+		list_for_each_entry(cl, &q->clhash[i], hlist)
+			hfsc_reset_class(cl);
+	}
+	__skb_queue_purge(&q->requeue);
+	q->eligible = RB_ROOT;
+	INIT_LIST_HEAD(&q->droplist);
+	del_timer(&q->wd_timer);
+	sch->flags &= ~TCQ_F_THROTTLED;
+	sch->q.qlen = 0;
+}
+
+static void
+hfsc_destroy_qdisc(struct Qdisc *sch)
+{
+	struct hfsc_sched *q = qdisc_priv(sch);
+	struct hfsc_class *cl, *next;
+	unsigned int i;
+
+	for (i = 0; i < HFSC_HSIZE; i++) {
+		list_for_each_entry_safe(cl, next, &q->clhash[i], hlist)
+			hfsc_destroy_class(sch, cl);
+	}
+	__skb_queue_purge(&q->requeue);
+	del_timer(&q->wd_timer);
+}
+
+static int
+hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct hfsc_sched *q = qdisc_priv(sch);
+	unsigned char *b = skb->tail;
+	struct tc_hfsc_qopt qopt;
+
+	qopt.defcls = q->defcls;
+	RTA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
+	return skb->len;
+
+ rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int
+hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct hfsc_class *cl;
+	unsigned int len;
+	int err;
+
+	cl = hfsc_classify(skb, sch, &err);
+	if (cl == NULL) {
+		if (err == NET_XMIT_DROP)
+			sch->qstats.drops++;
+		kfree_skb(skb);
+		return err;
+	}
+
+	len = skb->len;
+	err = cl->qdisc->enqueue(skb, cl->qdisc);
+	if (unlikely(err != NET_XMIT_SUCCESS)) {
+		cl->qstats.drops++;
+		sch->qstats.drops++;
+		return err;
+	}
+
+	if (cl->qdisc->q.qlen == 1)
+		set_active(cl, len);
+
+	cl->bstats.packets++;
+	cl->bstats.bytes += len;
+	sch->bstats.packets++;
+	sch->bstats.bytes += len;
+	sch->q.qlen++;
+
+	return NET_XMIT_SUCCESS;
+}
+
+static struct sk_buff *
+hfsc_dequeue(struct Qdisc *sch)
+{
+	struct hfsc_sched *q = qdisc_priv(sch);
+	struct hfsc_class *cl;
+	struct sk_buff *skb;
+	u64 cur_time;
+	unsigned int next_len;
+	int realtime = 0;
+
+	if (sch->q.qlen == 0)
+		return NULL;
+	if ((skb = __skb_dequeue(&q->requeue)))
+		goto out;
+
+	PSCHED_GET_TIME(cur_time);
+
+	/*
+	 * if there are eligible classes, use real-time criteria.
+	 * find the class with the minimum deadline among
+	 * the eligible classes.
+	 */
+	if ((cl = eltree_get_mindl(q, cur_time)) != NULL) {
+		realtime = 1;
+	} else {
+		/*
+		 * use link-sharing criteria
+		 * get the class with the minimum vt in the hierarchy
+		 */
+		cl = vttree_get_minvt(&q->root, cur_time);
+		if (cl == NULL) {
+			sch->qstats.overlimits++;
+			hfsc_schedule_watchdog(sch, cur_time);
+			return NULL;
+		}
+	}
+
+	skb = cl->qdisc->dequeue(cl->qdisc);
+	if (skb == NULL) {
+		if (net_ratelimit())
+			printk("HFSC: Non-work-conserving qdisc ?\n");
+		return NULL;
+	}
+
+	update_vf(cl, skb->len, cur_time);
+	if (realtime)
+		cl->cl_cumul += skb->len;
+
+	if (cl->qdisc->q.qlen != 0) {
+		if (cl->cl_flags & HFSC_RSC) {
+			/* update ed */
+			next_len = qdisc_peek_len(cl->qdisc);
+			if (realtime)
+				update_ed(cl, next_len);
+			else
+				update_d(cl, next_len);
+		}
+	} else {
+		/* the class becomes passive */
+		set_passive(cl);
+	}
+
+ out:
+	sch->flags &= ~TCQ_F_THROTTLED;
+	sch->q.qlen--;
+
+	return skb;
+}
+
+static int
+hfsc_requeue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct hfsc_sched *q = qdisc_priv(sch);
+
+	__skb_queue_head(&q->requeue, skb);
+	sch->q.qlen++;
+	sch->qstats.requeues++;
+	return NET_XMIT_SUCCESS;
+}
+
+static unsigned int
+hfsc_drop(struct Qdisc *sch)
+{
+	struct hfsc_sched *q = qdisc_priv(sch);
+	struct hfsc_class *cl;
+	unsigned int len;
+
+	list_for_each_entry(cl, &q->droplist, dlist) {
+		if (cl->qdisc->ops->drop != NULL &&
+		    (len = cl->qdisc->ops->drop(cl->qdisc)) > 0) {
+			if (cl->qdisc->q.qlen == 0) {
+				update_vf(cl, 0, 0);
+				set_passive(cl);
+			} else {
+				list_move_tail(&cl->dlist, &q->droplist);
+			}
+			cl->qstats.drops++;
+			sch->qstats.drops++;
+			sch->q.qlen--;
+			return len;
+		}
+	}
+	return 0;
+}
+
+static struct Qdisc_class_ops hfsc_class_ops = {
+	.change		= hfsc_change_class,
+	.delete		= hfsc_delete_class,
+	.graft		= hfsc_graft_class,
+	.leaf		= hfsc_class_leaf,
+	.get		= hfsc_get_class,
+	.put		= hfsc_put_class,
+	.bind_tcf	= hfsc_bind_tcf,
+	.unbind_tcf	= hfsc_unbind_tcf,
+	.tcf_chain	= hfsc_tcf_chain,
+	.dump		= hfsc_dump_class,
+	.dump_stats	= hfsc_dump_class_stats,
+	.walk		= hfsc_walk
+};
+
+static struct Qdisc_ops hfsc_qdisc_ops = {
+	.id		= "hfsc",
+	.init		= hfsc_init_qdisc,
+	.change		= hfsc_change_qdisc,
+	.reset		= hfsc_reset_qdisc,
+	.destroy	= hfsc_destroy_qdisc,
+	.dump		= hfsc_dump_qdisc,
+	.enqueue	= hfsc_enqueue,
+	.dequeue	= hfsc_dequeue,
+	.requeue	= hfsc_requeue,
+	.drop		= hfsc_drop,
+	.cl_ops		= &hfsc_class_ops,
+	.priv_size	= sizeof(struct hfsc_sched),
+	.owner		= THIS_MODULE
+};
+
+static int __init
+hfsc_init(void)
+{
+	return register_qdisc(&hfsc_qdisc_ops);
+}
+
+static void __exit
+hfsc_cleanup(void)
+{
+	unregister_qdisc(&hfsc_qdisc_ops);
+}
+
+MODULE_LICENSE("GPL");
+module_init(hfsc_init);
+module_exit(hfsc_cleanup);
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
new file mode 100644
index 00000000000..a85935e7d53
--- /dev/null
+++ b/net/sched/sch_htb.c
@@ -0,0 +1,1759 @@
+/* vim: ts=8 sw=8
+ * net/sched/sch_htb.c	Hierarchical token bucket, feed tree version
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Martin Devera, <devik@cdi.cz>
+ *
+ * Credits (in time order) for older HTB versions:
+ *              Stef Coene <stef.coene@docum.org>
+ *			HTB support at LARTC mailing list
+ *		Ondrej Kraus, <krauso@barr.cz> 
+ *			found missing INIT_QDISC(htb)
+ *		Vladimir Smelhaus, Aamer Akhter, Bert Hubert
+ *			helped a lot to locate nasty class stall bug
+ *		Andi Kleen, Jamal Hadi, Bert Hubert
+ *			code review and helpful comments on shaping
+ *		Tomasz Wrona, <tw@eter.tym.pl>
+ *			created test case so that I was able to fix nasty bug
+ *		Wilfried Weissmann
+ *			spotted bug in dequeue code and helped with fix
+ *		Jiri Fojtasek
+ *			fixed requeue routine
+ *		and many others. thanks.
+ *
+ * $Id: sch_htb.c,v 1.25 2003/12/07 11:08:25 devik Exp devik $
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <linux/list.h>
+#include <linux/compiler.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+#include <linux/rbtree.h>
+
+/* HTB algorithm.
+    Author: devik@cdi.cz
+    ========================================================================
+    HTB is like TBF with multiple classes. It is also similar to CBQ because
+    it allows to assign priority to each class in hierarchy. 
+    In fact it is another implementation of Floyd's formal sharing.
+
+    Levels:
+    Each class is assigned level. Leaf has ALWAYS level 0 and root 
+    classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
+    one less than their parent.
+*/
+
+#define HTB_HSIZE 16	/* classid hash size */
+#define HTB_EWMAC 2	/* rate average over HTB_EWMAC*HTB_HSIZE sec */
+#undef HTB_DEBUG	/* compile debugging support (activated by tc tool) */
+#define HTB_RATECM 1    /* whether to use rate computer */
+#define HTB_HYSTERESIS 1/* whether to use mode hysteresis for speedup */
+#define HTB_QLOCK(S) spin_lock_bh(&(S)->dev->queue_lock)
+#define HTB_QUNLOCK(S) spin_unlock_bh(&(S)->dev->queue_lock)
+#define HTB_VER 0x30011	/* major must be matched with number suplied by TC as version */
+
+#if HTB_VER >> 16 != TC_HTB_PROTOVER
+#error "Mismatched sch_htb.c and pkt_sch.h"
+#endif
+
+/* debugging support; S is subsystem, these are defined:
+  0 - netlink messages
+  1 - enqueue
+  2 - drop & requeue
+  3 - dequeue main
+  4 - dequeue one prio DRR part
+  5 - dequeue class accounting
+  6 - class overlimit status computation
+  7 - hint tree
+  8 - event queue
+ 10 - rate estimator
+ 11 - classifier 
+ 12 - fast dequeue cache
+
+ L is level; 0 = none, 1 = basic info, 2 = detailed, 3 = full
+ q->debug uint32 contains 16 2-bit fields one for subsystem starting
+ from LSB
+ */
+#ifdef HTB_DEBUG
+#define HTB_DBG_COND(S,L) (((q->debug>>(2*S))&3) >= L)
+#define HTB_DBG(S,L,FMT,ARG...) if (HTB_DBG_COND(S,L)) \
+	printk(KERN_DEBUG FMT,##ARG)
+#define HTB_CHCL(cl) BUG_TRAP((cl)->magic == HTB_CMAGIC)
+#define HTB_PASSQ q,
+#define HTB_ARGQ struct htb_sched *q,
+#define static
+#undef __inline__
+#define __inline__
+#undef inline
+#define inline
+#define HTB_CMAGIC 0xFEFAFEF1
+#define htb_safe_rb_erase(N,R) do { BUG_TRAP((N)->rb_color != -1); \
+		if ((N)->rb_color == -1) break; \
+		rb_erase(N,R); \
+		(N)->rb_color = -1; } while (0)
+#else
+#define HTB_DBG_COND(S,L) (0)
+#define HTB_DBG(S,L,FMT,ARG...)
+#define HTB_PASSQ
+#define HTB_ARGQ
+#define HTB_CHCL(cl)
+#define htb_safe_rb_erase(N,R) rb_erase(N,R)
+#endif
+
+
+/* used internaly to keep status of single class */
+enum htb_cmode {
+    HTB_CANT_SEND,		/* class can't send and can't borrow */
+    HTB_MAY_BORROW,		/* class can't send but may borrow */
+    HTB_CAN_SEND		/* class can send */
+};
+
+/* interior & leaf nodes; props specific to leaves are marked L: */
+struct htb_class
+{
+#ifdef HTB_DEBUG
+	unsigned magic;
+#endif
+    /* general class parameters */
+    u32 classid;
+    struct gnet_stats_basic bstats;
+    struct gnet_stats_queue qstats;
+    struct gnet_stats_rate_est rate_est;
+    struct tc_htb_xstats xstats;/* our special stats */
+    int refcnt;			/* usage count of this class */
+
+#ifdef HTB_RATECM
+    /* rate measurement counters */
+    unsigned long rate_bytes,sum_bytes;
+    unsigned long rate_packets,sum_packets;
+#endif
+
+    /* topology */
+    int level;			/* our level (see above) */
+    struct htb_class *parent;	/* parent class */
+    struct list_head hlist;	/* classid hash list item */
+    struct list_head sibling;	/* sibling list item */
+    struct list_head children;	/* children list */
+
+    union {
+	    struct htb_class_leaf {
+		    struct Qdisc *q;
+		    int prio;
+		    int aprio;	
+		    int quantum;
+		    int deficit[TC_HTB_MAXDEPTH];
+		    struct list_head drop_list;
+	    } leaf;
+	    struct htb_class_inner {
+		    struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */
+		    struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */
+            /* When class changes from state 1->2 and disconnects from 
+               parent's feed then we lost ptr value and start from the
+              first child again. Here we store classid of the
+              last valid ptr (used when ptr is NULL). */
+              u32 last_ptr_id[TC_HTB_NUMPRIO];
+	    } inner;
+    } un;
+    struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
+    struct rb_node pq_node;		 /* node for event queue */
+    unsigned long pq_key;	/* the same type as jiffies global */
+    
+    int prio_activity;		/* for which prios are we active */
+    enum htb_cmode cmode;	/* current mode of the class */
+
+    /* class attached filters */
+    struct tcf_proto *filter_list;
+    int filter_cnt;
+
+    int warned;		/* only one warning about non work conserving .. */
+
+    /* token bucket parameters */
+    struct qdisc_rate_table *rate;	/* rate table of the class itself */
+    struct qdisc_rate_table *ceil;	/* ceiling rate (limits borrows too) */
+    long buffer,cbuffer;		/* token bucket depth/rate */
+    long mbuffer;			/* max wait time */
+    long tokens,ctokens;		/* current number of tokens */
+    psched_time_t t_c;			/* checkpoint time */
+};
+
+/* TODO: maybe compute rate when size is too large .. or drop ? */
+static __inline__ long L2T(struct htb_class *cl,struct qdisc_rate_table *rate,
+	int size)
+{ 
+    int slot = size >> rate->rate.cell_log;
+    if (slot > 255) {
+	cl->xstats.giants++;
+	slot = 255;
+    }
+    return rate->data[slot];
+}
+
+struct htb_sched
+{
+    struct list_head root;			/* root classes list */
+    struct list_head hash[HTB_HSIZE];		/* hashed by classid */
+    struct list_head drops[TC_HTB_NUMPRIO];	/* active leaves (for drops) */
+    
+    /* self list - roots of self generating tree */
+    struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
+    int row_mask[TC_HTB_MAXDEPTH];
+    struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
+    u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
+
+    /* self wait list - roots of wait PQs per row */
+    struct rb_root wait_pq[TC_HTB_MAXDEPTH];
+
+    /* time of nearest event per level (row) */
+    unsigned long near_ev_cache[TC_HTB_MAXDEPTH];
+
+    /* cached value of jiffies in dequeue */
+    unsigned long jiffies;
+
+    /* whether we hit non-work conserving class during this dequeue; we use */
+    int nwc_hit;	/* this to disable mindelay complaint in dequeue */
+
+    int defcls;		/* class where unclassified flows go to */
+    u32 debug;		/* subsystem debug levels */
+
+    /* filters for qdisc itself */
+    struct tcf_proto *filter_list;
+    int filter_cnt;
+
+    int rate2quantum;		/* quant = rate / rate2quantum */
+    psched_time_t now;		/* cached dequeue time */
+    struct timer_list timer;	/* send delay timer */
+#ifdef HTB_RATECM
+    struct timer_list rttim;	/* rate computer timer */
+    int recmp_bucket;		/* which hash bucket to recompute next */
+#endif
+    
+    /* non shaped skbs; let them go directly thru */
+    struct sk_buff_head direct_queue;
+    int direct_qlen;  /* max qlen of above */
+
+    long direct_pkts;
+};
+
+/* compute hash of size HTB_HSIZE for given handle */
+static __inline__ int htb_hash(u32 h) 
+{
+#if HTB_HSIZE != 16
+ #error "Declare new hash for your HTB_HSIZE"
+#endif
+    h ^= h>>8;	/* stolen from cbq_hash */
+    h ^= h>>4;
+    return h & 0xf;
+}
+
+/* find class in global hash table using given handle */
+static __inline__ struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
+{
+	struct htb_sched *q = qdisc_priv(sch);
+	struct list_head *p;
+	if (TC_H_MAJ(handle) != sch->handle) 
+		return NULL;
+	
+	list_for_each (p,q->hash+htb_hash(handle)) {
+		struct htb_class *cl = list_entry(p,struct htb_class,hlist);
+		if (cl->classid == handle)
+			return cl;
+	}
+	return NULL;
+}
+
+/**
+ * htb_classify - classify a packet into class
+ *
+ * It returns NULL if the packet should be dropped or -1 if the packet
+ * should be passed directly thru. In all other cases leaf class is returned.
+ * We allow direct class selection by classid in priority. The we examine
+ * filters in qdisc and in inner nodes (if higher filter points to the inner
+ * node). If we end up with classid MAJOR:0 we enqueue the skb into special
+ * internal fifo (direct). These packets then go directly thru. If we still 
+ * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull
+ * then finish and return direct queue.
+ */
+#define HTB_DIRECT (struct htb_class*)-1
+static inline u32 htb_classid(struct htb_class *cl)
+{
+	return (cl && cl != HTB_DIRECT) ? cl->classid : TC_H_UNSPEC;
+}
+
+static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
+{
+	struct htb_sched *q = qdisc_priv(sch);
+	struct htb_class *cl;
+	struct tcf_result res;
+	struct tcf_proto *tcf;
+	int result;
+
+	/* allow to select class by setting skb->priority to valid classid;
+	   note that nfmark can be used too by attaching filter fw with no
+	   rules in it */
+	if (skb->priority == sch->handle)
+		return HTB_DIRECT;  /* X:0 (direct flow) selected */
+	if ((cl = htb_find(skb->priority,sch)) != NULL && cl->level == 0) 
+		return cl;
+
+	*qerr = NET_XMIT_DROP;
+	tcf = q->filter_list;
+	while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
+#ifdef CONFIG_NET_CLS_ACT
+		switch (result) {
+		case TC_ACT_QUEUED:
+		case TC_ACT_STOLEN: 
+			*qerr = NET_XMIT_SUCCESS;
+		case TC_ACT_SHOT:
+			return NULL;
+		}
+#elif defined(CONFIG_NET_CLS_POLICE)
+		if (result == TC_POLICE_SHOT)
+			return HTB_DIRECT;
+#endif
+		if ((cl = (void*)res.class) == NULL) {
+			if (res.classid == sch->handle)
+				return HTB_DIRECT;  /* X:0 (direct flow) */
+			if ((cl = htb_find(res.classid,sch)) == NULL)
+				break; /* filter selected invalid classid */
+		}
+		if (!cl->level)
+			return cl; /* we hit leaf; return it */
+
+		/* we have got inner class; apply inner filter chain */
+		tcf = cl->filter_list;
+	}
+	/* classification failed; try to use default class */
+	cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle),q->defcls),sch);
+	if (!cl || cl->level)
+		return HTB_DIRECT; /* bad default .. this is safe bet */
+	return cl;
+}
+
+#ifdef HTB_DEBUG
+static void htb_next_rb_node(struct rb_node **n);
+#define HTB_DUMTREE(root,memb) if(root) { \
+	struct rb_node *n = (root)->rb_node; \
+	while (n->rb_left) n = n->rb_left; \
+	while (n) { \
+		struct htb_class *cl = rb_entry(n, struct htb_class, memb); \
+		printk(" %x",cl->classid); htb_next_rb_node (&n); \
+	} }
+
+static void htb_debug_dump (struct htb_sched *q)
+{
+	int i,p;
+	printk(KERN_DEBUG "htb*g j=%lu lj=%lu\n",jiffies,q->jiffies);
+	/* rows */
+	for (i=TC_HTB_MAXDEPTH-1;i>=0;i--) {
+		printk(KERN_DEBUG "htb*r%d m=%x",i,q->row_mask[i]);
+		for (p=0;p<TC_HTB_NUMPRIO;p++) {
+			if (!q->row[i][p].rb_node) continue;
+			printk(" p%d:",p);
+			HTB_DUMTREE(q->row[i]+p,node[p]);
+		}
+		printk("\n");
+	}
+	/* classes */
+	for (i = 0; i < HTB_HSIZE; i++) {
+		struct list_head *l;
+		list_for_each (l,q->hash+i) {
+			struct htb_class *cl = list_entry(l,struct htb_class,hlist);
+			long diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer);
+			printk(KERN_DEBUG "htb*c%x m=%d t=%ld c=%ld pq=%lu df=%ld ql=%d "
+					"pa=%x f:",
+				cl->classid,cl->cmode,cl->tokens,cl->ctokens,
+				cl->pq_node.rb_color==-1?0:cl->pq_key,diff,
+				cl->level?0:cl->un.leaf.q->q.qlen,cl->prio_activity);
+			if (cl->level)
+			for (p=0;p<TC_HTB_NUMPRIO;p++) {
+				if (!cl->un.inner.feed[p].rb_node) continue;
+				printk(" p%d a=%x:",p,cl->un.inner.ptr[p]?rb_entry(cl->un.inner.ptr[p], struct htb_class,node[p])->classid:0);
+				HTB_DUMTREE(cl->un.inner.feed+p,node[p]);
+			}
+			printk("\n");
+		}
+	}
+}
+#endif
+/**
+ * htb_add_to_id_tree - adds class to the round robin list
+ *
+ * Routine adds class to the list (actually tree) sorted by classid.
+ * Make sure that class is not already on such list for given prio.
+ */
+static void htb_add_to_id_tree (HTB_ARGQ struct rb_root *root,
+		struct htb_class *cl,int prio)
+{
+	struct rb_node **p = &root->rb_node, *parent = NULL;
+	HTB_DBG(7,3,"htb_add_id_tree cl=%X prio=%d\n",cl->classid,prio);
+#ifdef HTB_DEBUG
+	if (cl->node[prio].rb_color != -1) { BUG_TRAP(0); return; }
+	HTB_CHCL(cl);
+	if (*p) {
+		struct htb_class *x = rb_entry(*p,struct htb_class,node[prio]);
+		HTB_CHCL(x);
+	}
+#endif
+	while (*p) {
+		struct htb_class *c; parent = *p;
+		c = rb_entry(parent, struct htb_class, node[prio]);
+		HTB_CHCL(c);
+		if (cl->classid > c->classid)
+			p = &parent->rb_right;
+		else 
+			p = &parent->rb_left;
+	}
+	rb_link_node(&cl->node[prio], parent, p);
+	rb_insert_color(&cl->node[prio], root);
+}
+
+/**
+ * htb_add_to_wait_tree - adds class to the event queue with delay
+ *
+ * The class is added to priority event queue to indicate that class will
+ * change its mode in cl->pq_key microseconds. Make sure that class is not
+ * already in the queue.
+ */
+static void htb_add_to_wait_tree (struct htb_sched *q,
+		struct htb_class *cl,long delay,int debug_hint)
+{
+	struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
+	HTB_DBG(7,3,"htb_add_wt cl=%X key=%lu\n",cl->classid,cl->pq_key);
+#ifdef HTB_DEBUG
+	if (cl->pq_node.rb_color != -1) { BUG_TRAP(0); return; }
+	HTB_CHCL(cl);
+	if ((delay <= 0 || delay > cl->mbuffer) && net_ratelimit())
+		printk(KERN_ERR "HTB: suspicious delay in wait_tree d=%ld cl=%X h=%d\n",delay,cl->classid,debug_hint);
+#endif
+	cl->pq_key = q->jiffies + PSCHED_US2JIFFIE(delay);
+	if (cl->pq_key == q->jiffies)
+		cl->pq_key++;
+
+	/* update the nearest event cache */
+	if (time_after(q->near_ev_cache[cl->level], cl->pq_key))
+		q->near_ev_cache[cl->level] = cl->pq_key;
+	
+	while (*p) {
+		struct htb_class *c; parent = *p;
+		c = rb_entry(parent, struct htb_class, pq_node);
+		if (time_after_eq(cl->pq_key, c->pq_key))
+			p = &parent->rb_right;
+		else 
+			p = &parent->rb_left;
+	}
+	rb_link_node(&cl->pq_node, parent, p);
+	rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]);
+}
+
+/**
+ * htb_next_rb_node - finds next node in binary tree
+ *
+ * When we are past last key we return NULL.
+ * Average complexity is 2 steps per call.
+ */
+static void htb_next_rb_node(struct rb_node **n)
+{
+	*n = rb_next(*n);
+}
+
+/**
+ * htb_add_class_to_row - add class to its row
+ *
+ * The class is added to row at priorities marked in mask.
+ * It does nothing if mask == 0.
+ */
+static inline void htb_add_class_to_row(struct htb_sched *q, 
+		struct htb_class *cl,int mask)
+{
+	HTB_DBG(7,2,"htb_addrow cl=%X mask=%X rmask=%X\n",
+			cl->classid,mask,q->row_mask[cl->level]);
+	HTB_CHCL(cl);
+	q->row_mask[cl->level] |= mask;
+	while (mask) {
+		int prio = ffz(~mask);
+		mask &= ~(1 << prio);
+		htb_add_to_id_tree(HTB_PASSQ q->row[cl->level]+prio,cl,prio);
+	}
+}
+
+/**
+ * htb_remove_class_from_row - removes class from its row
+ *
+ * The class is removed from row at priorities marked in mask.
+ * It does nothing if mask == 0.
+ */
+static __inline__ void htb_remove_class_from_row(struct htb_sched *q,
+		struct htb_class *cl,int mask)
+{
+	int m = 0;
+	HTB_CHCL(cl);
+	while (mask) {
+		int prio = ffz(~mask);
+		mask &= ~(1 << prio);
+		if (q->ptr[cl->level][prio] == cl->node+prio)
+			htb_next_rb_node(q->ptr[cl->level]+prio);
+		htb_safe_rb_erase(cl->node + prio,q->row[cl->level]+prio);
+		if (!q->row[cl->level][prio].rb_node) 
+			m |= 1 << prio;
+	}
+	HTB_DBG(7,2,"htb_delrow cl=%X mask=%X rmask=%X maskdel=%X\n",
+			cl->classid,mask,q->row_mask[cl->level],m);
+	q->row_mask[cl->level] &= ~m;
+}
+
+/**
+ * htb_activate_prios - creates active classe's feed chain
+ *
+ * The class is connected to ancestors and/or appropriate rows
+ * for priorities it is participating on. cl->cmode must be new 
+ * (activated) mode. It does nothing if cl->prio_activity == 0.
+ */
+static void htb_activate_prios(struct htb_sched *q,struct htb_class *cl)
+{
+	struct htb_class *p = cl->parent;
+	long m,mask = cl->prio_activity;
+	HTB_DBG(7,2,"htb_act_prios cl=%X mask=%lX cmode=%d\n",cl->classid,mask,cl->cmode);
+	HTB_CHCL(cl);
+
+	while (cl->cmode == HTB_MAY_BORROW && p && mask) {
+		HTB_CHCL(p);
+		m = mask; while (m) {
+			int prio = ffz(~m);
+			m &= ~(1 << prio);
+			
+			if (p->un.inner.feed[prio].rb_node)
+				/* parent already has its feed in use so that
+				   reset bit in mask as parent is already ok */
+				mask &= ~(1 << prio);
+			
+			htb_add_to_id_tree(HTB_PASSQ p->un.inner.feed+prio,cl,prio);
+		}
+		HTB_DBG(7,3,"htb_act_pr_aft p=%X pact=%X mask=%lX pmode=%d\n",
+				p->classid,p->prio_activity,mask,p->cmode);
+		p->prio_activity |= mask;
+		cl = p; p = cl->parent;
+		HTB_CHCL(cl);
+	}
+	if (cl->cmode == HTB_CAN_SEND && mask)
+		htb_add_class_to_row(q,cl,mask);
+}
+
+/**
+ * htb_deactivate_prios - remove class from feed chain
+ *
+ * cl->cmode must represent old mode (before deactivation). It does 
+ * nothing if cl->prio_activity == 0. Class is removed from all feed
+ * chains and rows.
+ */
+static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
+{
+	struct htb_class *p = cl->parent;
+	long m,mask = cl->prio_activity;
+	HTB_DBG(7,2,"htb_deact_prios cl=%X mask=%lX cmode=%d\n",cl->classid,mask,cl->cmode);
+	HTB_CHCL(cl);
+
+	while (cl->cmode == HTB_MAY_BORROW && p && mask) {
+		m = mask; mask = 0; 
+		while (m) {
+			int prio = ffz(~m);
+			m &= ~(1 << prio);
+			
+			if (p->un.inner.ptr[prio] == cl->node+prio) {
+				/* we are removing child which is pointed to from
+				   parent feed - forget the pointer but remember
+				   classid */
+				p->un.inner.last_ptr_id[prio] = cl->classid;
+				p->un.inner.ptr[prio] = NULL;
+			}
+			
+			htb_safe_rb_erase(cl->node + prio,p->un.inner.feed + prio);
+			
+			if (!p->un.inner.feed[prio].rb_node) 
+				mask |= 1 << prio;
+		}
+		HTB_DBG(7,3,"htb_deact_pr_aft p=%X pact=%X mask=%lX pmode=%d\n",
+				p->classid,p->prio_activity,mask,p->cmode);
+		p->prio_activity &= ~mask;
+		cl = p; p = cl->parent;
+		HTB_CHCL(cl);
+	}
+	if (cl->cmode == HTB_CAN_SEND && mask) 
+		htb_remove_class_from_row(q,cl,mask);
+}
+
+/**
+ * htb_class_mode - computes and returns current class mode
+ *
+ * It computes cl's mode at time cl->t_c+diff and returns it. If mode
+ * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
+ * from now to time when cl will change its state. 
+ * Also it is worth to note that class mode doesn't change simply
+ * at cl->{c,}tokens == 0 but there can rather be hysteresis of 
+ * 0 .. -cl->{c,}buffer range. It is meant to limit number of
+ * mode transitions per time unit. The speed gain is about 1/6.
+ */
+static __inline__ enum htb_cmode 
+htb_class_mode(struct htb_class *cl,long *diff)
+{
+    long toks;
+
+    if ((toks = (cl->ctokens + *diff)) < (
+#if HTB_HYSTERESIS
+	    cl->cmode != HTB_CANT_SEND ? -cl->cbuffer :
+#endif
+       	    0)) {
+	    *diff = -toks;
+	    return HTB_CANT_SEND;
+    }
+    if ((toks = (cl->tokens + *diff)) >= (
+#if HTB_HYSTERESIS
+	    cl->cmode == HTB_CAN_SEND ? -cl->buffer :
+#endif
+	    0))
+	    return HTB_CAN_SEND;
+
+    *diff = -toks;
+    return HTB_MAY_BORROW;
+}
+
+/**
+ * htb_change_class_mode - changes classe's mode
+ *
+ * This should be the only way how to change classe's mode under normal
+ * cirsumstances. Routine will update feed lists linkage, change mode
+ * and add class to the wait event queue if appropriate. New mode should
+ * be different from old one and cl->pq_key has to be valid if changing
+ * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
+ */
+static void 
+htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
+{ 
+	enum htb_cmode new_mode = htb_class_mode(cl,diff);
+	
+	HTB_CHCL(cl);
+	HTB_DBG(7,1,"htb_chging_clmode %d->%d cl=%X\n",cl->cmode,new_mode,cl->classid);
+
+	if (new_mode == cl->cmode)
+		return;	
+	
+	if (cl->prio_activity) { /* not necessary: speed optimization */
+		if (cl->cmode != HTB_CANT_SEND) 
+			htb_deactivate_prios(q,cl);
+		cl->cmode = new_mode;
+		if (new_mode != HTB_CANT_SEND) 
+			htb_activate_prios(q,cl);
+	} else 
+		cl->cmode = new_mode;
+}
+
+/**
+ * htb_activate - inserts leaf cl into appropriate active feeds 
+ *
+ * Routine learns (new) priority of leaf and activates feed chain
+ * for the prio. It can be called on already active leaf safely.
+ * It also adds leaf into droplist.
+ */
+static __inline__ void htb_activate(struct htb_sched *q,struct htb_class *cl)
+{
+	BUG_TRAP(!cl->level && cl->un.leaf.q && cl->un.leaf.q->q.qlen);
+	HTB_CHCL(cl);
+	if (!cl->prio_activity) {
+		cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio);
+		htb_activate_prios(q,cl);
+		list_add_tail(&cl->un.leaf.drop_list,q->drops+cl->un.leaf.aprio);
+	}
+}
+
+/**
+ * htb_deactivate - remove leaf cl from active feeds 
+ *
+ * Make sure that leaf is active. In the other words it can't be called
+ * with non-active leaf. It also removes class from the drop list.
+ */
+static __inline__ void 
+htb_deactivate(struct htb_sched *q,struct htb_class *cl)
+{
+	BUG_TRAP(cl->prio_activity);
+	HTB_CHCL(cl);
+	htb_deactivate_prios(q,cl);
+	cl->prio_activity = 0;
+	list_del_init(&cl->un.leaf.drop_list);
+}
+
+static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+    int ret;
+    struct htb_sched *q = qdisc_priv(sch);
+    struct htb_class *cl = htb_classify(skb,sch,&ret);
+
+    if (cl == HTB_DIRECT) {
+	/* enqueue to helper queue */
+	if (q->direct_queue.qlen < q->direct_qlen) {
+	    __skb_queue_tail(&q->direct_queue, skb);
+	    q->direct_pkts++;
+	}
+#ifdef CONFIG_NET_CLS_ACT
+    } else if (!cl) {
+	if (ret == NET_XMIT_DROP)
+		sch->qstats.drops++;
+	kfree_skb (skb);
+	return ret;
+#endif
+    } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
+	sch->qstats.drops++;
+	cl->qstats.drops++;
+	return NET_XMIT_DROP;
+    } else {
+	cl->bstats.packets++; cl->bstats.bytes += skb->len;
+	htb_activate (q,cl);
+    }
+
+    sch->q.qlen++;
+    sch->bstats.packets++; sch->bstats.bytes += skb->len;
+    HTB_DBG(1,1,"htb_enq_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb);
+    return NET_XMIT_SUCCESS;
+}
+
+/* TODO: requeuing packet charges it to policers again !! */
+static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
+{
+    struct htb_sched *q = qdisc_priv(sch);
+    int ret =  NET_XMIT_SUCCESS;
+    struct htb_class *cl = htb_classify(skb,sch, &ret);
+    struct sk_buff *tskb;
+
+    if (cl == HTB_DIRECT || !cl) {
+	/* enqueue to helper queue */
+	if (q->direct_queue.qlen < q->direct_qlen && cl) {
+	    __skb_queue_head(&q->direct_queue, skb);
+	} else {
+            __skb_queue_head(&q->direct_queue, skb);
+            tskb = __skb_dequeue_tail(&q->direct_queue);
+            kfree_skb (tskb);
+            sch->qstats.drops++;
+            return NET_XMIT_CN;	
+	}
+    } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
+	sch->qstats.drops++;
+	cl->qstats.drops++;
+	return NET_XMIT_DROP;
+    } else 
+	    htb_activate (q,cl);
+
+    sch->q.qlen++;
+    sch->qstats.requeues++;
+    HTB_DBG(1,1,"htb_req_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb);
+    return NET_XMIT_SUCCESS;
+}
+
+static void htb_timer(unsigned long arg)
+{
+    struct Qdisc *sch = (struct Qdisc*)arg;
+    sch->flags &= ~TCQ_F_THROTTLED;
+    wmb();
+    netif_schedule(sch->dev);
+}
+
+#ifdef HTB_RATECM
+#define RT_GEN(D,R) R+=D-(R/HTB_EWMAC);D=0
+static void htb_rate_timer(unsigned long arg)
+{
+	struct Qdisc *sch = (struct Qdisc*)arg;
+	struct htb_sched *q = qdisc_priv(sch);
+	struct list_head *p;
+
+	/* lock queue so that we can muck with it */
+	HTB_QLOCK(sch);
+	HTB_DBG(10,1,"htb_rttmr j=%ld\n",jiffies);
+
+	q->rttim.expires = jiffies + HZ;
+	add_timer(&q->rttim);
+
+	/* scan and recompute one bucket at time */
+	if (++q->recmp_bucket >= HTB_HSIZE) 
+		q->recmp_bucket = 0;
+	list_for_each (p,q->hash+q->recmp_bucket) {
+		struct htb_class *cl = list_entry(p,struct htb_class,hlist);
+		HTB_DBG(10,2,"htb_rttmr_cl cl=%X sbyte=%lu spkt=%lu\n",
+				cl->classid,cl->sum_bytes,cl->sum_packets);
+		RT_GEN (cl->sum_bytes,cl->rate_bytes);
+		RT_GEN (cl->sum_packets,cl->rate_packets);
+	}
+	HTB_QUNLOCK(sch);
+}
+#endif
+
+/**
+ * htb_charge_class - charges amount "bytes" to leaf and ancestors
+ *
+ * Routine assumes that packet "bytes" long was dequeued from leaf cl
+ * borrowing from "level". It accounts bytes to ceil leaky bucket for
+ * leaf and all ancestors and to rate bucket for ancestors at levels
+ * "level" and higher. It also handles possible change of mode resulting
+ * from the update. Note that mode can also increase here (MAY_BORROW to
+ * CAN_SEND) because we can use more precise clock that event queue here.
+ * In such case we remove class from event queue first.
+ */
+static void htb_charge_class(struct htb_sched *q,struct htb_class *cl,
+		int level,int bytes)
+{	
+	long toks,diff;
+	enum htb_cmode old_mode;
+	HTB_DBG(5,1,"htb_chrg_cl cl=%X lev=%d len=%d\n",cl->classid,level,bytes);
+
+#define HTB_ACCNT(T,B,R) toks = diff + cl->T; \
+	if (toks > cl->B) toks = cl->B; \
+	toks -= L2T(cl, cl->R, bytes); \
+	if (toks <= -cl->mbuffer) toks = 1-cl->mbuffer; \
+	cl->T = toks
+
+	while (cl) {
+		HTB_CHCL(cl);
+		diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer);
+#ifdef HTB_DEBUG
+		if (diff > cl->mbuffer || diff < 0 || PSCHED_TLESS(q->now, cl->t_c)) {
+			if (net_ratelimit())
+				printk(KERN_ERR "HTB: bad diff in charge, cl=%X diff=%lX now=%Lu then=%Lu j=%lu\n",
+				       cl->classid, diff,
+#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
+				       q->now.tv_sec * 1000000ULL + q->now.tv_usec,
+				       cl->t_c.tv_sec * 1000000ULL + cl->t_c.tv_usec,
+#else
+				       (unsigned long long) q->now,
+				       (unsigned long long) cl->t_c,
+#endif
+				       q->jiffies);
+			diff = 1000;
+		}
+#endif
+		if (cl->level >= level) {
+			if (cl->level == level) cl->xstats.lends++;
+			HTB_ACCNT (tokens,buffer,rate);
+		} else {
+			cl->xstats.borrows++;
+			cl->tokens += diff; /* we moved t_c; update tokens */
+		}
+		HTB_ACCNT (ctokens,cbuffer,ceil);
+		cl->t_c = q->now;
+		HTB_DBG(5,2,"htb_chrg_clp cl=%X diff=%ld tok=%ld ctok=%ld\n",cl->classid,diff,cl->tokens,cl->ctokens);
+
+		old_mode = cl->cmode; diff = 0;
+		htb_change_class_mode(q,cl,&diff);
+		if (old_mode != cl->cmode) {
+			if (old_mode != HTB_CAN_SEND)
+				htb_safe_rb_erase(&cl->pq_node,q->wait_pq+cl->level);
+			if (cl->cmode != HTB_CAN_SEND)
+				htb_add_to_wait_tree (q,cl,diff,1);
+		}
+		
+#ifdef HTB_RATECM
+		/* update rate counters */
+		cl->sum_bytes += bytes; cl->sum_packets++;
+#endif
+
+		/* update byte stats except for leaves which are already updated */
+		if (cl->level) {
+			cl->bstats.bytes += bytes;
+			cl->bstats.packets++;
+		}
+		cl = cl->parent;
+	}
+}
+
+/**
+ * htb_do_events - make mode changes to classes at the level
+ *
+ * Scans event queue for pending events and applies them. Returns jiffies to
+ * next pending event (0 for no event in pq).
+ * Note: Aplied are events whose have cl->pq_key <= jiffies.
+ */
+static long htb_do_events(struct htb_sched *q,int level)
+{
+	int i;
+	HTB_DBG(8,1,"htb_do_events l=%d root=%p rmask=%X\n",
+			level,q->wait_pq[level].rb_node,q->row_mask[level]);
+	for (i = 0; i < 500; i++) {
+		struct htb_class *cl;
+		long diff;
+		struct rb_node *p = q->wait_pq[level].rb_node;
+		if (!p) return 0;
+		while (p->rb_left) p = p->rb_left;
+
+		cl = rb_entry(p, struct htb_class, pq_node);
+		if (time_after(cl->pq_key, q->jiffies)) {
+			HTB_DBG(8,3,"htb_do_ev_ret delay=%ld\n",cl->pq_key - q->jiffies);
+			return cl->pq_key - q->jiffies;
+		}
+		htb_safe_rb_erase(p,q->wait_pq+level);
+		diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer);
+#ifdef HTB_DEBUG
+		if (diff > cl->mbuffer || diff < 0 || PSCHED_TLESS(q->now, cl->t_c)) {
+			if (net_ratelimit())
+				printk(KERN_ERR "HTB: bad diff in events, cl=%X diff=%lX now=%Lu then=%Lu j=%lu\n",
+				       cl->classid, diff,
+#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
+				       q->now.tv_sec * 1000000ULL + q->now.tv_usec,
+				       cl->t_c.tv_sec * 1000000ULL + cl->t_c.tv_usec,
+#else
+				       (unsigned long long) q->now,
+				       (unsigned long long) cl->t_c,
+#endif
+				       q->jiffies);
+			diff = 1000;
+		}
+#endif
+		htb_change_class_mode(q,cl,&diff);
+		if (cl->cmode != HTB_CAN_SEND)
+			htb_add_to_wait_tree (q,cl,diff,2);
+	}
+	if (net_ratelimit())
+		printk(KERN_WARNING "htb: too many events !\n");
+	return HZ/10;
+}
+
+/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
+   is no such one exists. */
+static struct rb_node *
+htb_id_find_next_upper(int prio,struct rb_node *n,u32 id)
+{
+	struct rb_node *r = NULL;
+	while (n) {
+		struct htb_class *cl = rb_entry(n,struct htb_class,node[prio]);
+		if (id == cl->classid) return n;
+		
+		if (id > cl->classid) {
+			n = n->rb_right;
+		} else {
+			r = n;
+			n = n->rb_left;
+		}
+	}
+	return r;
+}
+
+/**
+ * htb_lookup_leaf - returns next leaf class in DRR order
+ *
+ * Find leaf where current feed pointers points to.
+ */
+static struct htb_class *
+htb_lookup_leaf(HTB_ARGQ struct rb_root *tree,int prio,struct rb_node **pptr,u32 *pid)
+{
+	int i;
+	struct {
+		struct rb_node *root;
+		struct rb_node **pptr;
+		u32 *pid;
+	} stk[TC_HTB_MAXDEPTH],*sp = stk;
+	
+	BUG_TRAP(tree->rb_node);
+	sp->root = tree->rb_node;
+	sp->pptr = pptr;
+	sp->pid = pid;
+
+	for (i = 0; i < 65535; i++) {
+		HTB_DBG(4,2,"htb_lleaf ptr=%p pid=%X\n",*sp->pptr,*sp->pid);
+		
+		if (!*sp->pptr && *sp->pid) { 
+			/* ptr was invalidated but id is valid - try to recover 
+			   the original or next ptr */
+			*sp->pptr = htb_id_find_next_upper(prio,sp->root,*sp->pid);
+		}
+		*sp->pid = 0; /* ptr is valid now so that remove this hint as it
+			         can become out of date quickly */
+		if (!*sp->pptr) { /* we are at right end; rewind & go up */
+			*sp->pptr = sp->root;
+			while ((*sp->pptr)->rb_left) 
+				*sp->pptr = (*sp->pptr)->rb_left;
+			if (sp > stk) {
+				sp--;
+				BUG_TRAP(*sp->pptr); if(!*sp->pptr) return NULL;
+				htb_next_rb_node (sp->pptr);
+			}
+		} else {
+			struct htb_class *cl;
+			cl = rb_entry(*sp->pptr,struct htb_class,node[prio]);
+			HTB_CHCL(cl);
+			if (!cl->level) 
+				return cl;
+			(++sp)->root = cl->un.inner.feed[prio].rb_node;
+			sp->pptr = cl->un.inner.ptr+prio;
+			sp->pid = cl->un.inner.last_ptr_id+prio;
+		}
+	}
+	BUG_TRAP(0);
+	return NULL;
+}
+
+/* dequeues packet at given priority and level; call only if
+   you are sure that there is active class at prio/level */
+static struct sk_buff *
+htb_dequeue_tree(struct htb_sched *q,int prio,int level)
+{
+	struct sk_buff *skb = NULL;
+	struct htb_class *cl,*start;
+	/* look initial class up in the row */
+	start = cl = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,prio,
+			q->ptr[level]+prio,q->last_ptr_id[level]+prio);
+	
+	do {
+next:
+		BUG_TRAP(cl); 
+		if (!cl) return NULL;
+		HTB_DBG(4,1,"htb_deq_tr prio=%d lev=%d cl=%X defic=%d\n",
+				prio,level,cl->classid,cl->un.leaf.deficit[level]);
+
+		/* class can be empty - it is unlikely but can be true if leaf
+		   qdisc drops packets in enqueue routine or if someone used
+		   graft operation on the leaf since last dequeue; 
+		   simply deactivate and skip such class */
+		if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
+			struct htb_class *next;
+			htb_deactivate(q,cl);
+
+			/* row/level might become empty */
+			if ((q->row_mask[level] & (1 << prio)) == 0)
+				return NULL; 
+			
+			next = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,
+					prio,q->ptr[level]+prio,q->last_ptr_id[level]+prio);
+
+			if (cl == start) /* fix start if we just deleted it */
+				start = next;
+			cl = next;
+			goto next;
+		}
+	
+		if (likely((skb = cl->un.leaf.q->dequeue(cl->un.leaf.q)) != NULL)) 
+			break;
+		if (!cl->warned) {
+			printk(KERN_WARNING "htb: class %X isn't work conserving ?!\n",cl->classid);
+			cl->warned = 1;
+		}
+		q->nwc_hit++;
+		htb_next_rb_node((level?cl->parent->un.inner.ptr:q->ptr[0])+prio);
+		cl = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,prio,q->ptr[level]+prio,
+				q->last_ptr_id[level]+prio);
+
+	} while (cl != start);
+
+	if (likely(skb != NULL)) {
+		if ((cl->un.leaf.deficit[level] -= skb->len) < 0) {
+			HTB_DBG(4,2,"htb_next_cl oldptr=%p quant_add=%d\n",
+				level?cl->parent->un.inner.ptr[prio]:q->ptr[0][prio],cl->un.leaf.quantum);
+			cl->un.leaf.deficit[level] += cl->un.leaf.quantum;
+			htb_next_rb_node((level?cl->parent->un.inner.ptr:q->ptr[0])+prio);
+		}
+		/* this used to be after charge_class but this constelation
+		   gives us slightly better performance */
+		if (!cl->un.leaf.q->q.qlen)
+			htb_deactivate (q,cl);
+		htb_charge_class (q,cl,level,skb->len);
+	}
+	return skb;
+}
+
+static void htb_delay_by(struct Qdisc *sch,long delay)
+{
+	struct htb_sched *q = qdisc_priv(sch);
+	if (delay <= 0) delay = 1;
+	if (unlikely(delay > 5*HZ)) {
+		if (net_ratelimit())
+			printk(KERN_INFO "HTB delay %ld > 5sec\n", delay);
+		delay = 5*HZ;
+	}
+	/* why don't use jiffies here ? because expires can be in past */
+	mod_timer(&q->timer, q->jiffies + delay);
+	sch->flags |= TCQ_F_THROTTLED;
+	sch->qstats.overlimits++;
+	HTB_DBG(3,1,"htb_deq t_delay=%ld\n",delay);
+}
+
+static struct sk_buff *htb_dequeue(struct Qdisc *sch)
+{
+	struct sk_buff *skb = NULL;
+	struct htb_sched *q = qdisc_priv(sch);
+	int level;
+	long min_delay;
+#ifdef HTB_DEBUG
+	int evs_used = 0;
+#endif
+
+	q->jiffies = jiffies;
+	HTB_DBG(3,1,"htb_deq dircnt=%d qlen=%d\n",skb_queue_len(&q->direct_queue),
+			sch->q.qlen);
+
+	/* try to dequeue direct packets as high prio (!) to minimize cpu work */
+	if ((skb = __skb_dequeue(&q->direct_queue)) != NULL) {
+		sch->flags &= ~TCQ_F_THROTTLED;
+		sch->q.qlen--;
+		return skb;
+	}
+
+	if (!sch->q.qlen) goto fin;
+	PSCHED_GET_TIME(q->now);
+
+	min_delay = LONG_MAX;
+	q->nwc_hit = 0;
+	for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
+		/* common case optimization - skip event handler quickly */
+		int m;
+		long delay;
+		if (time_after_eq(q->jiffies, q->near_ev_cache[level])) {
+			delay = htb_do_events(q,level);
+			q->near_ev_cache[level] = q->jiffies + (delay ? delay : HZ);
+#ifdef HTB_DEBUG
+			evs_used++;
+#endif
+		} else
+			delay = q->near_ev_cache[level] - q->jiffies;	
+		
+		if (delay && min_delay > delay) 
+			min_delay = delay;
+		m = ~q->row_mask[level];
+		while (m != (int)(-1)) {
+			int prio = ffz (m);
+			m |= 1 << prio;
+			skb = htb_dequeue_tree(q,prio,level);
+			if (likely(skb != NULL)) {
+				sch->q.qlen--;
+				sch->flags &= ~TCQ_F_THROTTLED;
+				goto fin;
+			}
+		}
+	}
+#ifdef HTB_DEBUG
+	if (!q->nwc_hit && min_delay >= 10*HZ && net_ratelimit()) {
+		if (min_delay == LONG_MAX) {
+			printk(KERN_ERR "HTB: dequeue bug (%d,%lu,%lu), report it please !\n",
+					evs_used,q->jiffies,jiffies);
+			htb_debug_dump(q);
+		} else 
+			printk(KERN_WARNING "HTB: mindelay=%ld, some class has "
+					"too small rate\n",min_delay);
+	}
+#endif
+	htb_delay_by (sch,min_delay > 5*HZ ? 5*HZ : min_delay);
+fin:
+	HTB_DBG(3,1,"htb_deq_end %s j=%lu skb=%p\n",sch->dev->name,q->jiffies,skb);
+	return skb;
+}
+
+/* try to drop from each class (by prio) until one succeed */
+static unsigned int htb_drop(struct Qdisc* sch)
+{
+	struct htb_sched *q = qdisc_priv(sch);
+	int prio;
+
+	for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
+		struct list_head *p;
+		list_for_each (p,q->drops+prio) {
+			struct htb_class *cl = list_entry(p, struct htb_class,
+							  un.leaf.drop_list);
+			unsigned int len;
+			if (cl->un.leaf.q->ops->drop && 
+				(len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
+				sch->q.qlen--;
+				if (!cl->un.leaf.q->q.qlen)
+					htb_deactivate (q,cl);
+				return len;
+			}
+		}
+	}
+	return 0;
+}
+
+/* reset all classes */
+/* always caled under BH & queue lock */
+static void htb_reset(struct Qdisc* sch)
+{
+	struct htb_sched *q = qdisc_priv(sch);
+	int i;
+	HTB_DBG(0,1,"htb_reset sch=%p, handle=%X\n",sch,sch->handle);
+
+	for (i = 0; i < HTB_HSIZE; i++) {
+		struct list_head *p;
+		list_for_each (p,q->hash+i) {
+			struct htb_class *cl = list_entry(p,struct htb_class,hlist);
+			if (cl->level)
+				memset(&cl->un.inner,0,sizeof(cl->un.inner));
+			else {
+				if (cl->un.leaf.q) 
+					qdisc_reset(cl->un.leaf.q);
+				INIT_LIST_HEAD(&cl->un.leaf.drop_list);
+			}
+			cl->prio_activity = 0;
+			cl->cmode = HTB_CAN_SEND;
+#ifdef HTB_DEBUG
+			cl->pq_node.rb_color = -1;
+			memset(cl->node,255,sizeof(cl->node));
+#endif
+
+		}
+	}
+	sch->flags &= ~TCQ_F_THROTTLED;
+	del_timer(&q->timer);
+	__skb_queue_purge(&q->direct_queue);
+	sch->q.qlen = 0;
+	memset(q->row,0,sizeof(q->row));
+	memset(q->row_mask,0,sizeof(q->row_mask));
+	memset(q->wait_pq,0,sizeof(q->wait_pq));
+	memset(q->ptr,0,sizeof(q->ptr));
+	for (i = 0; i < TC_HTB_NUMPRIO; i++)
+		INIT_LIST_HEAD(q->drops+i);
+}
+
+static int htb_init(struct Qdisc *sch, struct rtattr *opt)
+{
+	struct htb_sched *q = qdisc_priv(sch);
+	struct rtattr *tb[TCA_HTB_INIT];
+	struct tc_htb_glob *gopt;
+	int i;
+#ifdef HTB_DEBUG
+	printk(KERN_INFO "HTB init, kernel part version %d.%d\n",
+			  HTB_VER >> 16,HTB_VER & 0xffff);
+#endif
+	if (!opt || rtattr_parse_nested(tb, TCA_HTB_INIT, opt) ||
+			tb[TCA_HTB_INIT-1] == NULL ||
+			RTA_PAYLOAD(tb[TCA_HTB_INIT-1]) < sizeof(*gopt)) {
+		printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
+		return -EINVAL;
+	}
+	gopt = RTA_DATA(tb[TCA_HTB_INIT-1]);
+	if (gopt->version != HTB_VER >> 16) {
+		printk(KERN_ERR "HTB: need tc/htb version %d (minor is %d), you have %d\n",
+				HTB_VER >> 16,HTB_VER & 0xffff,gopt->version);
+		return -EINVAL;
+	}
+	q->debug = gopt->debug;
+	HTB_DBG(0,1,"htb_init sch=%p handle=%X r2q=%d\n",sch,sch->handle,gopt->rate2quantum);
+
+	INIT_LIST_HEAD(&q->root);
+	for (i = 0; i < HTB_HSIZE; i++)
+		INIT_LIST_HEAD(q->hash+i);
+	for (i = 0; i < TC_HTB_NUMPRIO; i++)
+		INIT_LIST_HEAD(q->drops+i);
+
+	init_timer(&q->timer);
+	skb_queue_head_init(&q->direct_queue);
+
+	q->direct_qlen = sch->dev->tx_queue_len;
+	if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
+		q->direct_qlen = 2;
+	q->timer.function = htb_timer;
+	q->timer.data = (unsigned long)sch;
+
+#ifdef HTB_RATECM
+	init_timer(&q->rttim);
+	q->rttim.function = htb_rate_timer;
+	q->rttim.data = (unsigned long)sch;
+	q->rttim.expires = jiffies + HZ;
+	add_timer(&q->rttim);
+#endif
+	if ((q->rate2quantum = gopt->rate2quantum) < 1)
+		q->rate2quantum = 1;
+	q->defcls = gopt->defcls;
+
+	return 0;
+}
+
+static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct htb_sched *q = qdisc_priv(sch);
+	unsigned char	 *b = skb->tail;
+	struct rtattr *rta;
+	struct tc_htb_glob gopt;
+	HTB_DBG(0,1,"htb_dump sch=%p, handle=%X\n",sch,sch->handle);
+	HTB_QLOCK(sch);
+	gopt.direct_pkts = q->direct_pkts;
+
+#ifdef HTB_DEBUG
+	if (HTB_DBG_COND(0,2))
+		htb_debug_dump(q);
+#endif
+	gopt.version = HTB_VER;
+	gopt.rate2quantum = q->rate2quantum;
+	gopt.defcls = q->defcls;
+	gopt.debug = q->debug;
+	rta = (struct rtattr*)b;
+	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
+	rta->rta_len = skb->tail - b;
+	HTB_QUNLOCK(sch);
+	return skb->len;
+rtattr_failure:
+	HTB_QUNLOCK(sch);
+	skb_trim(skb, skb->tail - skb->data);
+	return -1;
+}
+
+static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
+	struct sk_buff *skb, struct tcmsg *tcm)
+{
+#ifdef HTB_DEBUG
+	struct htb_sched *q = qdisc_priv(sch);
+#endif
+	struct htb_class *cl = (struct htb_class*)arg;
+	unsigned char	 *b = skb->tail;
+	struct rtattr *rta;
+	struct tc_htb_opt opt;
+
+	HTB_DBG(0,1,"htb_dump_class handle=%X clid=%X\n",sch->handle,cl->classid);
+
+	HTB_QLOCK(sch);
+	tcm->tcm_parent = cl->parent ? cl->parent->classid : TC_H_ROOT;
+	tcm->tcm_handle = cl->classid;
+	if (!cl->level && cl->un.leaf.q)
+		tcm->tcm_info = cl->un.leaf.q->handle;
+
+	rta = (struct rtattr*)b;
+	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+
+	memset (&opt,0,sizeof(opt));
+
+	opt.rate = cl->rate->rate; opt.buffer = cl->buffer;
+	opt.ceil = cl->ceil->rate; opt.cbuffer = cl->cbuffer;
+	opt.quantum = cl->un.leaf.quantum; opt.prio = cl->un.leaf.prio;
+	opt.level = cl->level; 
+	RTA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
+	rta->rta_len = skb->tail - b;
+	HTB_QUNLOCK(sch);
+	return skb->len;
+rtattr_failure:
+	HTB_QUNLOCK(sch);
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int
+htb_dump_class_stats(struct Qdisc *sch, unsigned long arg,
+	struct gnet_dump *d)
+{
+	struct htb_class *cl = (struct htb_class*)arg;
+
+#ifdef HTB_RATECM
+	cl->rate_est.bps = cl->rate_bytes/(HTB_EWMAC*HTB_HSIZE);
+	cl->rate_est.pps = cl->rate_packets/(HTB_EWMAC*HTB_HSIZE);
+#endif
+
+	if (!cl->level && cl->un.leaf.q)
+		cl->qstats.qlen = cl->un.leaf.q->q.qlen;
+	cl->xstats.tokens = cl->tokens;
+	cl->xstats.ctokens = cl->ctokens;
+
+	if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
+	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
+	    gnet_stats_copy_queue(d, &cl->qstats) < 0)
+		return -1;
+
+	return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
+}
+
+static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
+	struct Qdisc **old)
+{
+	struct htb_class *cl = (struct htb_class*)arg;
+
+	if (cl && !cl->level) {
+		if (new == NULL && (new = qdisc_create_dflt(sch->dev, 
+					&pfifo_qdisc_ops)) == NULL)
+					return -ENOBUFS;
+		sch_tree_lock(sch);
+		if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) {
+			if (cl->prio_activity)
+				htb_deactivate (qdisc_priv(sch),cl);
+
+			/* TODO: is it correct ? Why CBQ doesn't do it ? */
+			sch->q.qlen -= (*old)->q.qlen;	
+			qdisc_reset(*old);
+		}
+		sch_tree_unlock(sch);
+		return 0;
+	}
+	return -ENOENT;
+}
+
+static struct Qdisc * htb_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	struct htb_class *cl = (struct htb_class*)arg;
+	return (cl && !cl->level) ? cl->un.leaf.q : NULL;
+}
+
+static unsigned long htb_get(struct Qdisc *sch, u32 classid)
+{
+#ifdef HTB_DEBUG
+	struct htb_sched *q = qdisc_priv(sch);
+#endif
+	struct htb_class *cl = htb_find(classid,sch);
+	HTB_DBG(0,1,"htb_get clid=%X q=%p cl=%p ref=%d\n",classid,q,cl,cl?cl->refcnt:0);
+	if (cl) 
+		cl->refcnt++;
+	return (unsigned long)cl;
+}
+
+static void htb_destroy_filters(struct tcf_proto **fl)
+{
+	struct tcf_proto *tp;
+
+	while ((tp = *fl) != NULL) {
+		*fl = tp->next;
+		tcf_destroy(tp);
+	}
+}
+
+static void htb_destroy_class(struct Qdisc* sch,struct htb_class *cl)
+{
+	struct htb_sched *q = qdisc_priv(sch);
+	HTB_DBG(0,1,"htb_destrycls clid=%X ref=%d\n", cl?cl->classid:0,cl?cl->refcnt:0);
+	if (!cl->level) {
+		BUG_TRAP(cl->un.leaf.q);
+		sch->q.qlen -= cl->un.leaf.q->q.qlen;
+		qdisc_destroy(cl->un.leaf.q);
+	}
+	qdisc_put_rtab(cl->rate);
+	qdisc_put_rtab(cl->ceil);
+	
+	htb_destroy_filters (&cl->filter_list);
+	
+	while (!list_empty(&cl->children)) 
+		htb_destroy_class (sch,list_entry(cl->children.next,
+					struct htb_class,sibling));
+
+	/* note: this delete may happen twice (see htb_delete) */
+	list_del(&cl->hlist);
+	list_del(&cl->sibling);
+	
+	if (cl->prio_activity)
+		htb_deactivate (q,cl);
+	
+	if (cl->cmode != HTB_CAN_SEND)
+		htb_safe_rb_erase(&cl->pq_node,q->wait_pq+cl->level);
+	
+	kfree(cl);
+}
+
+/* always caled under BH & queue lock */
+static void htb_destroy(struct Qdisc* sch)
+{
+	struct htb_sched *q = qdisc_priv(sch);
+	HTB_DBG(0,1,"htb_destroy q=%p\n",q);
+
+	del_timer_sync (&q->timer);
+#ifdef HTB_RATECM
+	del_timer_sync (&q->rttim);
+#endif
+	/* This line used to be after htb_destroy_class call below
+	   and surprisingly it worked in 2.4. But it must precede it 
+	   because filter need its target class alive to be able to call
+	   unbind_filter on it (without Oops). */
+	htb_destroy_filters(&q->filter_list);
+	
+	while (!list_empty(&q->root)) 
+		htb_destroy_class (sch,list_entry(q->root.next,
+					struct htb_class,sibling));
+
+	__skb_queue_purge(&q->direct_queue);
+}
+
+static int htb_delete(struct Qdisc *sch, unsigned long arg)
+{
+	struct htb_sched *q = qdisc_priv(sch);
+	struct htb_class *cl = (struct htb_class*)arg;
+	HTB_DBG(0,1,"htb_delete q=%p cl=%X ref=%d\n",q,cl?cl->classid:0,cl?cl->refcnt:0);
+
+	// TODO: why don't allow to delete subtree ? references ? does
+	// tc subsys quarantee us that in htb_destroy it holds no class
+	// refs so that we can remove children safely there ?
+	if (!list_empty(&cl->children) || cl->filter_cnt)
+		return -EBUSY;
+	
+	sch_tree_lock(sch);
+	
+	/* delete from hash and active; remainder in destroy_class */
+	list_del_init(&cl->hlist);
+	if (cl->prio_activity)
+		htb_deactivate (q,cl);
+
+	if (--cl->refcnt == 0)
+		htb_destroy_class(sch,cl);
+
+	sch_tree_unlock(sch);
+	return 0;
+}
+
+static void htb_put(struct Qdisc *sch, unsigned long arg)
+{
+#ifdef HTB_DEBUG
+	struct htb_sched *q = qdisc_priv(sch);
+#endif
+	struct htb_class *cl = (struct htb_class*)arg;
+	HTB_DBG(0,1,"htb_put q=%p cl=%X ref=%d\n",q,cl?cl->classid:0,cl?cl->refcnt:0);
+
+	if (--cl->refcnt == 0)
+		htb_destroy_class(sch,cl);
+}
+
+static int htb_change_class(struct Qdisc *sch, u32 classid, 
+		u32 parentid, struct rtattr **tca, unsigned long *arg)
+{
+	int err = -EINVAL;
+	struct htb_sched *q = qdisc_priv(sch);
+	struct htb_class *cl = (struct htb_class*)*arg,*parent;
+	struct rtattr *opt = tca[TCA_OPTIONS-1];
+	struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
+	struct rtattr *tb[TCA_HTB_RTAB];
+	struct tc_htb_opt *hopt;
+
+	/* extract all subattrs from opt attr */
+	if (!opt || rtattr_parse_nested(tb, TCA_HTB_RTAB, opt) ||
+			tb[TCA_HTB_PARMS-1] == NULL ||
+			RTA_PAYLOAD(tb[TCA_HTB_PARMS-1]) < sizeof(*hopt))
+		goto failure;
+	
+	parent = parentid == TC_H_ROOT ? NULL : htb_find (parentid,sch);
+
+	hopt = RTA_DATA(tb[TCA_HTB_PARMS-1]);
+	HTB_DBG(0,1,"htb_chg cl=%p(%X), clid=%X, parid=%X, opt/prio=%d, rate=%u, buff=%d, quant=%d\n", cl,cl?cl->classid:0,classid,parentid,(int)hopt->prio,hopt->rate.rate,hopt->buffer,hopt->quantum);
+	rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB-1]);
+	ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB-1]);
+	if (!rtab || !ctab) goto failure;
+
+	if (!cl) { /* new class */
+		struct Qdisc *new_q;
+		/* check for valid classid */
+		if (!classid || TC_H_MAJ(classid^sch->handle) || htb_find(classid,sch))
+			goto failure;
+
+		/* check maximal depth */
+		if (parent && parent->parent && parent->parent->level < 2) {
+			printk(KERN_ERR "htb: tree is too deep\n");
+			goto failure;
+		}
+		err = -ENOBUFS;
+		if ((cl = kmalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
+			goto failure;
+		
+		memset(cl, 0, sizeof(*cl));
+		cl->refcnt = 1;
+		INIT_LIST_HEAD(&cl->sibling);
+		INIT_LIST_HEAD(&cl->hlist);
+		INIT_LIST_HEAD(&cl->children);
+		INIT_LIST_HEAD(&cl->un.leaf.drop_list);
+#ifdef HTB_DEBUG
+		cl->magic = HTB_CMAGIC;
+#endif
+
+		/* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
+		   so that can't be used inside of sch_tree_lock
+		   -- thanks to Karlis Peisenieks */
+		new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
+		sch_tree_lock(sch);
+		if (parent && !parent->level) {
+			/* turn parent into inner node */
+			sch->q.qlen -= parent->un.leaf.q->q.qlen;
+			qdisc_destroy (parent->un.leaf.q);
+			if (parent->prio_activity) 
+				htb_deactivate (q,parent);
+
+			/* remove from evt list because of level change */
+			if (parent->cmode != HTB_CAN_SEND) {
+				htb_safe_rb_erase(&parent->pq_node,q->wait_pq /*+0*/);
+				parent->cmode = HTB_CAN_SEND;
+			}
+			parent->level = (parent->parent ? parent->parent->level
+					: TC_HTB_MAXDEPTH) - 1;
+			memset (&parent->un.inner,0,sizeof(parent->un.inner));
+		}
+		/* leaf (we) needs elementary qdisc */
+		cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
+
+		cl->classid = classid; cl->parent = parent;
+
+		/* set class to be in HTB_CAN_SEND state */
+		cl->tokens = hopt->buffer;
+		cl->ctokens = hopt->cbuffer;
+		cl->mbuffer = 60000000; /* 1min */
+		PSCHED_GET_TIME(cl->t_c);
+		cl->cmode = HTB_CAN_SEND;
+
+		/* attach to the hash list and parent's family */
+		list_add_tail(&cl->hlist, q->hash+htb_hash(classid));
+		list_add_tail(&cl->sibling, parent ? &parent->children : &q->root);
+#ifdef HTB_DEBUG
+		{ 
+			int i;
+			for (i = 0; i < TC_HTB_NUMPRIO; i++) cl->node[i].rb_color = -1;
+			cl->pq_node.rb_color = -1;
+		}
+#endif
+	} else sch_tree_lock(sch);
+
+	/* it used to be a nasty bug here, we have to check that node
+           is really leaf before changing cl->un.leaf ! */
+	if (!cl->level) {
+		cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum;
+		if (!hopt->quantum && cl->un.leaf.quantum < 1000) {
+			printk(KERN_WARNING "HTB: quantum of class %X is small. Consider r2q change.\n", cl->classid);
+			cl->un.leaf.quantum = 1000;
+		}
+		if (!hopt->quantum && cl->un.leaf.quantum > 200000) {
+			printk(KERN_WARNING "HTB: quantum of class %X is big. Consider r2q change.\n", cl->classid);
+			cl->un.leaf.quantum = 200000;
+		}
+		if (hopt->quantum)
+			cl->un.leaf.quantum = hopt->quantum;
+		if ((cl->un.leaf.prio = hopt->prio) >= TC_HTB_NUMPRIO)
+			cl->un.leaf.prio = TC_HTB_NUMPRIO - 1;
+	}
+
+	cl->buffer = hopt->buffer;
+	cl->cbuffer = hopt->cbuffer;
+	if (cl->rate) qdisc_put_rtab(cl->rate); cl->rate = rtab;
+	if (cl->ceil) qdisc_put_rtab(cl->ceil); cl->ceil = ctab;
+	sch_tree_unlock(sch);
+
+	*arg = (unsigned long)cl;
+	return 0;
+
+failure:
+	if (rtab) qdisc_put_rtab(rtab);
+	if (ctab) qdisc_put_rtab(ctab);
+	return err;
+}
+
+static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
+{
+	struct htb_sched *q = qdisc_priv(sch);
+	struct htb_class *cl = (struct htb_class *)arg;
+	struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
+	HTB_DBG(0,2,"htb_tcf q=%p clid=%X fref=%d fl=%p\n",q,cl?cl->classid:0,cl?cl->filter_cnt:q->filter_cnt,*fl);
+	return fl;
+}
+
+static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
+	u32 classid)
+{
+	struct htb_sched *q = qdisc_priv(sch);
+	struct htb_class *cl = htb_find (classid,sch);
+	HTB_DBG(0,2,"htb_bind q=%p clid=%X cl=%p fref=%d\n",q,classid,cl,cl?cl->filter_cnt:q->filter_cnt);
+	/*if (cl && !cl->level) return 0;
+	  The line above used to be there to prevent attaching filters to 
+	  leaves. But at least tc_index filter uses this just to get class 
+	  for other reasons so that we have to allow for it.
+	  ----
+	  19.6.2002 As Werner explained it is ok - bind filter is just
+	  another way to "lock" the class - unlike "get" this lock can
+	  be broken by class during destroy IIUC.
+	 */
+	if (cl) 
+		cl->filter_cnt++; 
+	else 
+		q->filter_cnt++;
+	return (unsigned long)cl;
+}
+
+static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
+{
+	struct htb_sched *q = qdisc_priv(sch);
+	struct htb_class *cl = (struct htb_class *)arg;
+	HTB_DBG(0,2,"htb_unbind q=%p cl=%p fref=%d\n",q,cl,cl?cl->filter_cnt:q->filter_cnt);
+	if (cl) 
+		cl->filter_cnt--; 
+	else 
+		q->filter_cnt--;
+}
+
+static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	struct htb_sched *q = qdisc_priv(sch);
+	int i;
+
+	if (arg->stop)
+		return;
+
+	for (i = 0; i < HTB_HSIZE; i++) {
+		struct list_head *p;
+		list_for_each (p,q->hash+i) {
+			struct htb_class *cl = list_entry(p,struct htb_class,hlist);
+			if (arg->count < arg->skip) {
+				arg->count++;
+				continue;
+			}
+			if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
+				arg->stop = 1;
+				return;
+			}
+			arg->count++;
+		}
+	}
+}
+
+static struct Qdisc_class_ops htb_class_ops = {
+	.graft		=	htb_graft,
+	.leaf		=	htb_leaf,
+	.get		=	htb_get,
+	.put		=	htb_put,
+	.change		=	htb_change_class,
+	.delete		=	htb_delete,
+	.walk		=	htb_walk,
+	.tcf_chain	=	htb_find_tcf,
+	.bind_tcf	=	htb_bind_filter,
+	.unbind_tcf	=	htb_unbind_filter,
+	.dump		=	htb_dump_class,
+	.dump_stats	=	htb_dump_class_stats,
+};
+
+static struct Qdisc_ops htb_qdisc_ops = {
+	.next		=	NULL,
+	.cl_ops		=	&htb_class_ops,
+	.id		=	"htb",
+	.priv_size	=	sizeof(struct htb_sched),
+	.enqueue	=	htb_enqueue,
+	.dequeue	=	htb_dequeue,
+	.requeue	=	htb_requeue,
+	.drop		=	htb_drop,
+	.init		=	htb_init,
+	.reset		=	htb_reset,
+	.destroy	=	htb_destroy,
+	.change		=	NULL /* htb_change */,
+	.dump		=	htb_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init htb_module_init(void)
+{
+    return register_qdisc(&htb_qdisc_ops);
+}
+static void __exit htb_module_exit(void) 
+{
+    unregister_qdisc(&htb_qdisc_ops);
+}
+module_init(htb_module_init)
+module_exit(htb_module_exit)
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
new file mode 100644
index 00000000000..8edc32a6ad2
--- /dev/null
+++ b/net/sched/sch_ingress.c
@@ -0,0 +1,436 @@
+/* net/sched/sch_ingress.c - Ingress qdisc 
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Authors:     Jamal Hadi Salim 1999
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter.h>
+#include <linux/smp.h>
+#include <net/pkt_sched.h>
+#include <asm/byteorder.h>
+#include <asm/uaccess.h>
+#include <linux/kmod.h>
+#include <linux/stat.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+
+
+#undef DEBUG_INGRESS
+
+#ifdef DEBUG_INGRESS  /* control */
+#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
+#else
+#define DPRINTK(format,args...)
+#endif
+
+#if 0  /* data */
+#define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
+#else
+#define D2PRINTK(format,args...)
+#endif
+
+
+#define PRIV(sch) qdisc_priv(sch)
+
+
+/* Thanks to Doron Oz for this hack
+*/
+#ifndef CONFIG_NET_CLS_ACT
+#ifdef CONFIG_NETFILTER
+static int nf_registered; 
+#endif
+#endif
+
+struct ingress_qdisc_data {
+	struct Qdisc		*q;
+	struct tcf_proto	*filter_list;
+};
+
+
+/* ------------------------- Class/flow operations ------------------------- */
+
+
+static int ingress_graft(struct Qdisc *sch,unsigned long arg,
+    struct Qdisc *new,struct Qdisc **old)
+{
+#ifdef DEBUG_INGRESS
+	struct ingress_qdisc_data *p = PRIV(sch);
+#endif
+
+	DPRINTK("ingress_graft(sch %p,[qdisc %p],new %p,old %p)\n",
+		sch, p, new, old);
+	DPRINTK("\n ingress_graft: You cannot add qdiscs to classes");
+        return 1;
+}
+
+
+static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	return NULL;
+}
+
+
+static unsigned long ingress_get(struct Qdisc *sch,u32 classid)
+{
+#ifdef DEBUG_INGRESS
+	struct ingress_qdisc_data *p = PRIV(sch);
+#endif
+	DPRINTK("ingress_get(sch %p,[qdisc %p],classid %x)\n", sch, p, classid);
+	return TC_H_MIN(classid) + 1;
+}
+
+
+static unsigned long ingress_bind_filter(struct Qdisc *sch,
+    unsigned long parent, u32 classid)
+{
+	return ingress_get(sch, classid);
+}
+
+
+static void ingress_put(struct Qdisc *sch, unsigned long cl)
+{
+}
+
+
+static int ingress_change(struct Qdisc *sch, u32 classid, u32 parent,
+    struct rtattr **tca, unsigned long *arg)
+{
+#ifdef DEBUG_INGRESS
+	struct ingress_qdisc_data *p = PRIV(sch);
+#endif
+	DPRINTK("ingress_change(sch %p,[qdisc %p],classid %x,parent %x),"
+		"arg 0x%lx\n", sch, p, classid, parent, *arg);
+	DPRINTK("No effect. sch_ingress doesn't maintain classes at the moment");
+	return 0;
+}
+
+
+
+static void ingress_walk(struct Qdisc *sch,struct qdisc_walker *walker)
+{
+#ifdef DEBUG_INGRESS
+	struct ingress_qdisc_data *p = PRIV(sch);
+#endif
+	DPRINTK("ingress_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker);
+	DPRINTK("No effect. sch_ingress doesn't maintain classes at the moment");
+}
+
+
+static struct tcf_proto **ingress_find_tcf(struct Qdisc *sch,unsigned long cl)
+{
+	struct ingress_qdisc_data *p = PRIV(sch);
+
+	return &p->filter_list;
+}
+
+
+/* --------------------------- Qdisc operations ---------------------------- */
+
+
+static int ingress_enqueue(struct sk_buff *skb,struct Qdisc *sch)
+{
+	struct ingress_qdisc_data *p = PRIV(sch);
+	struct tcf_result res;
+	int result;
+
+	D2PRINTK("ingress_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
+	result = tc_classify(skb, p->filter_list, &res);
+	D2PRINTK("result %d class 0x%04x\n", result, res.classid);
+	/*
+	 * Unlike normal "enqueue" functions, ingress_enqueue returns a
+	 * firewall FW_* code.
+	 */
+#ifdef CONFIG_NET_CLS_ACT
+	sch->bstats.packets++;
+	sch->bstats.bytes += skb->len;
+	switch (result) {
+		case TC_ACT_SHOT:
+			result = TC_ACT_SHOT;
+			sch->qstats.drops++;
+			break;
+		case TC_ACT_STOLEN:
+		case TC_ACT_QUEUED:
+			result = TC_ACT_STOLEN;
+			break;
+		case TC_ACT_RECLASSIFY: 
+		case TC_ACT_OK:
+		case TC_ACT_UNSPEC:
+		default:
+			skb->tc_index = TC_H_MIN(res.classid);
+			result = TC_ACT_OK;
+			break;
+	};
+/* backward compat */
+#else
+#ifdef	CONFIG_NET_CLS_POLICE  
+	switch (result) {
+		case TC_POLICE_SHOT:
+		result = NF_DROP;
+		sch->qstats.drops++;
+		break;
+		case TC_POLICE_RECLASSIFY: /* DSCP remarking here ? */
+		case TC_POLICE_OK:
+		case TC_POLICE_UNSPEC:
+		default:
+		sch->bstats.packets++;
+		sch->bstats.bytes += skb->len;
+		result = NF_ACCEPT;
+		break;
+	};
+
+#else
+	D2PRINTK("Overriding result to ACCEPT\n");
+	result = NF_ACCEPT;
+	sch->bstats.packets++;
+	sch->bstats.bytes += skb->len;
+#endif
+#endif
+
+	return result;
+}
+
+
+static struct sk_buff *ingress_dequeue(struct Qdisc *sch)
+{
+/*
+	struct ingress_qdisc_data *p = PRIV(sch);
+	D2PRINTK("ingress_dequeue(sch %p,[qdisc %p])\n",sch,PRIV(p));
+*/
+	return NULL;
+}
+
+
+static int ingress_requeue(struct sk_buff *skb,struct Qdisc *sch)
+{
+/*
+	struct ingress_qdisc_data *p = PRIV(sch);
+	D2PRINTK("ingress_requeue(skb %p,sch %p,[qdisc %p])\n",skb,sch,PRIV(p));
+*/
+	return 0;
+}
+
+static unsigned int ingress_drop(struct Qdisc *sch)
+{
+#ifdef DEBUG_INGRESS
+	struct ingress_qdisc_data *p = PRIV(sch);
+#endif
+	DPRINTK("ingress_drop(sch %p,[qdisc %p])\n", sch, p);
+	return 0;
+}
+
+#ifndef CONFIG_NET_CLS_ACT
+#ifdef CONFIG_NETFILTER
+static unsigned int
+ing_hook(unsigned int hook, struct sk_buff **pskb,
+                             const struct net_device *indev,
+                             const struct net_device *outdev,
+	                     int (*okfn)(struct sk_buff *))
+{
+	
+	struct Qdisc *q;
+	struct sk_buff *skb = *pskb;
+        struct net_device *dev = skb->dev;
+	int fwres=NF_ACCEPT;
+
+	DPRINTK("ing_hook: skb %s dev=%s len=%u\n",
+		skb->sk ? "(owned)" : "(unowned)",
+		skb->dev ? (*pskb)->dev->name : "(no dev)",
+		skb->len);
+
+/* 
+revisit later: Use a private since lock dev->queue_lock is also
+used on the egress (might slow things for an iota)
+*/
+
+	if (dev->qdisc_ingress) {
+		spin_lock(&dev->queue_lock);
+		if ((q = dev->qdisc_ingress) != NULL)
+			fwres = q->enqueue(skb, q);
+		spin_unlock(&dev->queue_lock);
+        }
+			
+	return fwres;
+}
+
+/* after ipt_filter */
+static struct nf_hook_ops ing_ops = {
+	.hook           = ing_hook,
+	.owner		= THIS_MODULE,
+	.pf             = PF_INET,
+	.hooknum        = NF_IP_PRE_ROUTING,
+	.priority       = NF_IP_PRI_FILTER + 1,
+};
+
+static struct nf_hook_ops ing6_ops = {
+	.hook           = ing_hook,
+	.owner		= THIS_MODULE,
+	.pf             = PF_INET6,
+	.hooknum        = NF_IP6_PRE_ROUTING,
+	.priority       = NF_IP6_PRI_FILTER + 1,
+};
+
+#endif
+#endif
+
+static int ingress_init(struct Qdisc *sch,struct rtattr *opt)
+{
+	struct ingress_qdisc_data *p = PRIV(sch);
+
+/* Make sure either netfilter or preferably CLS_ACT is
+* compiled in */
+#ifndef CONFIG_NET_CLS_ACT
+#ifndef CONFIG_NETFILTER
+	printk("You MUST compile classifier actions into the kernel\n");
+	return -EINVAL;
+#else
+	printk("Ingress scheduler: Classifier actions prefered over netfilter\n");
+#endif
+#endif
+                                                                                
+#ifndef CONFIG_NET_CLS_ACT
+#ifdef CONFIG_NETFILTER
+	if (!nf_registered) {
+		if (nf_register_hook(&ing_ops) < 0) {
+			printk("ingress qdisc registration error \n");
+			return -EINVAL;
+		}
+		nf_registered++;
+
+		if (nf_register_hook(&ing6_ops) < 0) {
+			printk("IPv6 ingress qdisc registration error, " \
+			    "disabling IPv6 support.\n");
+		} else
+			nf_registered++;
+	}
+#endif
+#endif
+
+	DPRINTK("ingress_init(sch %p,[qdisc %p],opt %p)\n",sch,p,opt);
+	p->q = &noop_qdisc;
+	return 0;
+}
+
+
+static void ingress_reset(struct Qdisc *sch)
+{
+	struct ingress_qdisc_data *p = PRIV(sch);
+
+	DPRINTK("ingress_reset(sch %p,[qdisc %p])\n", sch, p);
+
+/*
+#if 0
+*/
+/* for future use */
+	qdisc_reset(p->q);
+/*
+#endif
+*/
+}
+
+/* ------------------------------------------------------------- */
+
+
+/* ------------------------------------------------------------- */
+
+static void ingress_destroy(struct Qdisc *sch)
+{
+	struct ingress_qdisc_data *p = PRIV(sch);
+	struct tcf_proto *tp;
+
+	DPRINTK("ingress_destroy(sch %p,[qdisc %p])\n", sch, p);
+	while (p->filter_list) {
+		tp = p->filter_list;
+		p->filter_list = tp->next;
+		tcf_destroy(tp);
+	}
+#if 0
+/* for future use */
+	qdisc_destroy(p->q);
+#endif
+}
+
+
+static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	unsigned char *b = skb->tail;
+	struct rtattr *rta;
+
+	rta = (struct rtattr *) b;
+	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	rta->rta_len = skb->tail - b;
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static struct Qdisc_class_ops ingress_class_ops = {
+	.graft		=	ingress_graft,
+	.leaf		=	ingress_leaf,
+	.get		=	ingress_get,
+	.put		=	ingress_put,
+	.change		=	ingress_change,
+	.delete		=	NULL,
+	.walk		=	ingress_walk,
+	.tcf_chain	=	ingress_find_tcf,
+	.bind_tcf	=	ingress_bind_filter,
+	.unbind_tcf	=	ingress_put,
+	.dump		=	NULL,
+};
+
+static struct Qdisc_ops ingress_qdisc_ops = {
+	.next		=	NULL,
+	.cl_ops		=	&ingress_class_ops,
+	.id		=	"ingress",
+	.priv_size	=	sizeof(struct ingress_qdisc_data),
+	.enqueue	=	ingress_enqueue,
+	.dequeue	=	ingress_dequeue,
+	.requeue	=	ingress_requeue,
+	.drop		=	ingress_drop,
+	.init		=	ingress_init,
+	.reset		=	ingress_reset,
+	.destroy	=	ingress_destroy,
+	.change		=	NULL,
+	.dump		=	ingress_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init ingress_module_init(void)
+{
+	int ret = 0;
+
+	if ((ret = register_qdisc(&ingress_qdisc_ops)) < 0) {
+		printk("Unable to register Ingress qdisc\n");
+		return ret;
+	}
+
+	return ret;
+}
+static void __exit ingress_module_exit(void) 
+{
+	unregister_qdisc(&ingress_qdisc_ops);
+#ifndef CONFIG_NET_CLS_ACT
+#ifdef CONFIG_NETFILTER
+	if (nf_registered) {
+		nf_unregister_hook(&ing_ops);
+		if (nf_registered > 1)
+			nf_unregister_hook(&ing6_ops);
+	}
+#endif
+#endif
+}
+module_init(ingress_module_init)
+module_exit(ingress_module_exit)
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
new file mode 100644
index 00000000000..31c29deb139
--- /dev/null
+++ b/net/sched/sch_netem.c
@@ -0,0 +1,598 @@
+/*
+ * net/sched/sch_netem.c	Network emulator
+ *
+ * 		This program is free software; you can redistribute it and/or
+ * 		modify it under the terms of the GNU General Public License
+ * 		as published by the Free Software Foundation; either version
+ * 		2 of the License, or (at your option) any later version.
+ *
+ *  		Many of the algorithms and ideas for this came from
+ *		NIST Net which is not copyrighted. 
+ *
+ * Authors:	Stephen Hemminger <shemminger@osdl.org>
+ *		Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+
+#include <net/pkt_sched.h>
+
+/*	Network Emulation Queuing algorithm.
+	====================================
+
+	Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
+		 Network Emulation Tool
+		 [2] Luigi Rizzo, DummyNet for FreeBSD
+
+	 ----------------------------------------------------------------
+
+	 This started out as a simple way to delay outgoing packets to
+	 test TCP but has grown to include most of the functionality
+	 of a full blown network emulator like NISTnet. It can delay
+	 packets and add random jitter (and correlation). The random
+	 distribution can be loaded from a table as well to provide
+	 normal, Pareto, or experimental curves. Packet loss,
+	 duplication, and reordering can also be emulated.
+
+	 This qdisc does not do classification that can be handled in
+	 layering other disciplines.  It does not need to do bandwidth
+	 control either since that can be handled by using token
+	 bucket or other rate control.
+
+	 The simulator is limited by the Linux timer resolution
+	 and will create packet bursts on the HZ boundary (1ms).
+*/
+
+struct netem_sched_data {
+	struct Qdisc	*qdisc;
+	struct sk_buff_head delayed;
+	struct timer_list timer;
+
+	u32 latency;
+	u32 loss;
+	u32 limit;
+	u32 counter;
+	u32 gap;
+	u32 jitter;
+	u32 duplicate;
+
+	struct crndstate {
+		unsigned long last;
+		unsigned long rho;
+	} delay_cor, loss_cor, dup_cor;
+
+	struct disttable {
+		u32  size;
+		s16 table[0];
+	} *delay_dist;
+};
+
+/* Time stamp put into socket buffer control block */
+struct netem_skb_cb {
+	psched_time_t	time_to_send;
+};
+
+/* init_crandom - initialize correlated random number generator
+ * Use entropy source for initial seed.
+ */
+static void init_crandom(struct crndstate *state, unsigned long rho)
+{
+	state->rho = rho;
+	state->last = net_random();
+}
+
+/* get_crandom - correlated random number generator
+ * Next number depends on last value.
+ * rho is scaled to avoid floating point.
+ */
+static unsigned long get_crandom(struct crndstate *state)
+{
+	u64 value, rho;
+	unsigned long answer;
+
+	if (state->rho == 0)	/* no correllation */
+		return net_random();
+
+	value = net_random();
+	rho = (u64)state->rho + 1;
+	answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
+	state->last = answer;
+	return answer;
+}
+
+/* tabledist - return a pseudo-randomly distributed value with mean mu and
+ * std deviation sigma.  Uses table lookup to approximate the desired
+ * distribution, and a uniformly-distributed pseudo-random source.
+ */
+static long tabledist(unsigned long mu, long sigma, 
+		      struct crndstate *state, const struct disttable *dist)
+{
+	long t, x;
+	unsigned long rnd;
+
+	if (sigma == 0)
+		return mu;
+
+	rnd = get_crandom(state);
+
+	/* default uniform distribution */
+	if (dist == NULL) 
+		return (rnd % (2*sigma)) - sigma + mu;
+
+	t = dist->table[rnd % dist->size];
+	x = (sigma % NETEM_DIST_SCALE) * t;
+	if (x >= 0)
+		x += NETEM_DIST_SCALE/2;
+	else
+		x -= NETEM_DIST_SCALE/2;
+
+	return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
+}
+
+/* Put skb in the private delayed queue. */
+static int delay_skb(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb;
+	psched_tdiff_t td;
+	psched_time_t now;
+	
+	PSCHED_GET_TIME(now);
+	td = tabledist(q->latency, q->jitter, &q->delay_cor, q->delay_dist);
+	PSCHED_TADD2(now, td, cb->time_to_send);
+	
+	/* Always queue at tail to keep packets in order */
+	if (likely(q->delayed.qlen < q->limit)) {
+		__skb_queue_tail(&q->delayed, skb);
+		if (!timer_pending(&q->timer)) {
+			q->timer.expires = jiffies + PSCHED_US2JIFFIE(td);
+			add_timer(&q->timer);
+		}
+		return NET_XMIT_SUCCESS;
+	}
+
+	kfree_skb(skb);
+	return NET_XMIT_DROP;
+}
+
+static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	struct sk_buff *skb2;
+	int ret;
+
+	pr_debug("netem_enqueue skb=%p @%lu\n", skb, jiffies);
+
+	/* Random packet drop 0 => none, ~0 => all */
+	if (q->loss && q->loss >= get_crandom(&q->loss_cor)) {
+		pr_debug("netem_enqueue: random loss\n");
+		sch->qstats.drops++;
+		kfree_skb(skb);
+		return 0;	/* lie about loss so TCP doesn't know */
+	}
+
+	/* Random duplication */
+	if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)
+	    && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
+		pr_debug("netem_enqueue: dup %p\n", skb2);
+
+		if (delay_skb(sch, skb2)) {
+			sch->q.qlen++;
+			sch->bstats.bytes += skb2->len;
+			sch->bstats.packets++;
+		} else
+			sch->qstats.drops++;
+	}
+
+	/* If doing simple delay then gap == 0 so all packets
+	 * go into the delayed holding queue
+	 * otherwise if doing out of order only "1 out of gap"
+	 * packets will be delayed.
+	 */
+	if (q->counter < q->gap) {
+		++q->counter;
+		ret = q->qdisc->enqueue(skb, q->qdisc);
+	} else {
+		q->counter = 0;
+		ret = delay_skb(sch, skb);
+	}
+
+	if (likely(ret == NET_XMIT_SUCCESS)) {
+		sch->q.qlen++;
+		sch->bstats.bytes += skb->len;
+		sch->bstats.packets++;
+	} else
+		sch->qstats.drops++;
+
+	return ret;
+}
+
+/* Requeue packets but don't change time stamp */
+static int netem_requeue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	int ret;
+
+	if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) {
+		sch->q.qlen++;
+		sch->qstats.requeues++;
+	}
+
+	return ret;
+}
+
+static unsigned int netem_drop(struct Qdisc* sch)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	unsigned int len;
+
+	if ((len = q->qdisc->ops->drop(q->qdisc)) != 0) {
+		sch->q.qlen--;
+		sch->qstats.drops++;
+	}
+	return len;
+}
+
+/* Dequeue packet.
+ *  Move all packets that are ready to send from the delay holding
+ *  list to the underlying qdisc, then just call dequeue
+ */
+static struct sk_buff *netem_dequeue(struct Qdisc *sch)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	struct sk_buff *skb;
+
+	skb = q->qdisc->dequeue(q->qdisc);
+	if (skb) 
+		sch->q.qlen--;
+	return skb;
+}
+
+static void netem_watchdog(unsigned long arg)
+{
+	struct Qdisc *sch = (struct Qdisc *)arg;
+	struct netem_sched_data *q = qdisc_priv(sch);
+	struct net_device *dev = sch->dev;
+	struct sk_buff *skb;
+	psched_time_t now;
+
+	pr_debug("netem_watchdog: fired @%lu\n", jiffies);
+
+	spin_lock_bh(&dev->queue_lock);
+	PSCHED_GET_TIME(now);
+
+	while ((skb = skb_peek(&q->delayed)) != NULL) {
+		const struct netem_skb_cb *cb
+			= (const struct netem_skb_cb *)skb->cb;
+		long delay 
+			= PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now));
+		pr_debug("netem_watchdog: skb %p@%lu %ld\n",
+			 skb, jiffies, delay);
+
+		/* if more time remaining? */
+		if (delay > 0) {
+			mod_timer(&q->timer, jiffies + delay);
+			break;
+		}
+		__skb_unlink(skb, &q->delayed);
+
+		if (q->qdisc->enqueue(skb, q->qdisc)) {
+			sch->q.qlen--;
+			sch->qstats.drops++;
+		}
+	}
+	qdisc_run(dev);
+	spin_unlock_bh(&dev->queue_lock);
+}
+
+static void netem_reset(struct Qdisc *sch)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+
+	qdisc_reset(q->qdisc);
+	skb_queue_purge(&q->delayed);
+
+	sch->q.qlen = 0;
+	del_timer_sync(&q->timer);
+}
+
+static int set_fifo_limit(struct Qdisc *q, int limit)
+{
+        struct rtattr *rta;
+	int ret = -ENOMEM;
+
+	rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
+	if (rta) {
+		rta->rta_type = RTM_NEWQDISC;
+		rta->rta_len = RTA_LENGTH(sizeof(struct tc_fifo_qopt)); 
+		((struct tc_fifo_qopt *)RTA_DATA(rta))->limit = limit;
+		
+		ret = q->ops->change(q, rta);
+		kfree(rta);
+	}
+	return ret;
+}
+
+/*
+ * Distribution data is a variable size payload containing
+ * signed 16 bit values.
+ */
+static int get_dist_table(struct Qdisc *sch, const struct rtattr *attr)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	unsigned long n = RTA_PAYLOAD(attr)/sizeof(__s16);
+	const __s16 *data = RTA_DATA(attr);
+	struct disttable *d;
+	int i;
+
+	if (n > 65536)
+		return -EINVAL;
+
+	d = kmalloc(sizeof(*d) + n*sizeof(d->table[0]), GFP_KERNEL);
+	if (!d)
+		return -ENOMEM;
+
+	d->size = n;
+	for (i = 0; i < n; i++)
+		d->table[i] = data[i];
+	
+	spin_lock_bh(&sch->dev->queue_lock);
+	d = xchg(&q->delay_dist, d);
+	spin_unlock_bh(&sch->dev->queue_lock);
+
+	kfree(d);
+	return 0;
+}
+
+static int get_correlation(struct Qdisc *sch, const struct rtattr *attr)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	const struct tc_netem_corr *c = RTA_DATA(attr);
+
+	if (RTA_PAYLOAD(attr) != sizeof(*c))
+		return -EINVAL;
+
+	init_crandom(&q->delay_cor, c->delay_corr);
+	init_crandom(&q->loss_cor, c->loss_corr);
+	init_crandom(&q->dup_cor, c->dup_corr);
+	return 0;
+}
+
+static int netem_change(struct Qdisc *sch, struct rtattr *opt)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	struct tc_netem_qopt *qopt;
+	int ret;
+	
+	if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt))
+		return -EINVAL;
+
+	qopt = RTA_DATA(opt);
+	ret = set_fifo_limit(q->qdisc, qopt->limit);
+	if (ret) {
+		pr_debug("netem: can't set fifo limit\n");
+		return ret;
+	}
+	
+	q->latency = qopt->latency;
+	q->jitter = qopt->jitter;
+	q->limit = qopt->limit;
+	q->gap = qopt->gap;
+	q->loss = qopt->loss;
+	q->duplicate = qopt->duplicate;
+
+	/* Handle nested options after initial queue options.
+	 * Should have put all options in nested format but too late now.
+	 */ 
+	if (RTA_PAYLOAD(opt) > sizeof(*qopt)) {
+		struct rtattr *tb[TCA_NETEM_MAX];
+		if (rtattr_parse(tb, TCA_NETEM_MAX, 
+				 RTA_DATA(opt) + sizeof(*qopt),
+				 RTA_PAYLOAD(opt) - sizeof(*qopt)))
+			return -EINVAL;
+
+		if (tb[TCA_NETEM_CORR-1]) {
+			ret = get_correlation(sch, tb[TCA_NETEM_CORR-1]);
+			if (ret)
+				return ret;
+		}
+
+		if (tb[TCA_NETEM_DELAY_DIST-1]) {
+			ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST-1]);
+			if (ret)
+				return ret;
+		}
+	}
+
+
+	return 0;
+}
+
+static int netem_init(struct Qdisc *sch, struct rtattr *opt)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	int ret;
+
+	if (!opt)
+		return -EINVAL;
+
+	skb_queue_head_init(&q->delayed);
+	init_timer(&q->timer);
+	q->timer.function = netem_watchdog;
+	q->timer.data = (unsigned long) sch;
+	q->counter = 0;
+
+	q->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
+	if (!q->qdisc) {
+		pr_debug("netem: qdisc create failed\n");
+		return -ENOMEM;
+	}
+
+	ret = netem_change(sch, opt);
+	if (ret) {
+		pr_debug("netem: change failed\n");
+		qdisc_destroy(q->qdisc);
+	}
+	return ret;
+}
+
+static void netem_destroy(struct Qdisc *sch)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+
+	del_timer_sync(&q->timer);
+	qdisc_destroy(q->qdisc);
+	kfree(q->delay_dist);
+}
+
+static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	const struct netem_sched_data *q = qdisc_priv(sch);
+	unsigned char	 *b = skb->tail;
+	struct rtattr *rta = (struct rtattr *) b;
+	struct tc_netem_qopt qopt;
+	struct tc_netem_corr cor;
+
+	qopt.latency = q->latency;
+	qopt.jitter = q->jitter;
+	qopt.limit = q->limit;
+	qopt.loss = q->loss;
+	qopt.gap = q->gap;
+	qopt.duplicate = q->duplicate;
+	RTA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
+
+	cor.delay_corr = q->delay_cor.rho;
+	cor.loss_corr = q->loss_cor.rho;
+	cor.dup_corr = q->dup_cor.rho;
+	RTA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
+	rta->rta_len = skb->tail - b;
+
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
+			  struct sk_buff *skb, struct tcmsg *tcm)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+
+	if (cl != 1) 	/* only one class */
+		return -ENOENT;
+
+	tcm->tcm_handle |= TC_H_MIN(1);
+	tcm->tcm_info = q->qdisc->handle;
+
+	return 0;
+}
+
+static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
+		     struct Qdisc **old)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+
+	if (new == NULL)
+		new = &noop_qdisc;
+
+	sch_tree_lock(sch);
+	*old = xchg(&q->qdisc, new);
+	qdisc_reset(*old);
+	sch->q.qlen = 0;
+	sch_tree_unlock(sch);
+
+	return 0;
+}
+
+static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	return q->qdisc;
+}
+
+static unsigned long netem_get(struct Qdisc *sch, u32 classid)
+{
+	return 1;
+}
+
+static void netem_put(struct Qdisc *sch, unsigned long arg)
+{
+}
+
+static int netem_change_class(struct Qdisc *sch, u32 classid, u32 parentid, 
+			    struct rtattr **tca, unsigned long *arg)
+{
+	return -ENOSYS;
+}
+
+static int netem_delete(struct Qdisc *sch, unsigned long arg)
+{
+	return -ENOSYS;
+}
+
+static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
+{
+	if (!walker->stop) {
+		if (walker->count >= walker->skip)
+			if (walker->fn(sch, 1, walker) < 0) {
+				walker->stop = 1;
+				return;
+			}
+		walker->count++;
+	}
+}
+
+static struct tcf_proto **netem_find_tcf(struct Qdisc *sch, unsigned long cl)
+{
+	return NULL;
+}
+
+static struct Qdisc_class_ops netem_class_ops = {
+	.graft		=	netem_graft,
+	.leaf		=	netem_leaf,
+	.get		=	netem_get,
+	.put		=	netem_put,
+	.change		=	netem_change_class,
+	.delete		=	netem_delete,
+	.walk		=	netem_walk,
+	.tcf_chain	=	netem_find_tcf,
+	.dump		=	netem_dump_class,
+};
+
+static struct Qdisc_ops netem_qdisc_ops = {
+	.id		=	"netem",
+	.cl_ops		=	&netem_class_ops,
+	.priv_size	=	sizeof(struct netem_sched_data),
+	.enqueue	=	netem_enqueue,
+	.dequeue	=	netem_dequeue,
+	.requeue	=	netem_requeue,
+	.drop		=	netem_drop,
+	.init		=	netem_init,
+	.reset		=	netem_reset,
+	.destroy	=	netem_destroy,
+	.change		=	netem_change,
+	.dump		=	netem_dump,
+	.owner		=	THIS_MODULE,
+};
+
+
+static int __init netem_module_init(void)
+{
+	return register_qdisc(&netem_qdisc_ops);
+}
+static void __exit netem_module_exit(void)
+{
+	unregister_qdisc(&netem_qdisc_ops);
+}
+module_init(netem_module_init)
+module_exit(netem_module_exit)
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
new file mode 100644
index 00000000000..3ac0f495bad
--- /dev/null
+++ b/net/sched/sch_prio.c
@@ -0,0 +1,444 @@
+/*
+ * net/sched/sch_prio.c	Simple 3-band priority "scheduler".
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ * Fixes:       19990609: J Hadi Salim <hadi@nortelnetworks.com>: 
+ *              Init --  EINVAL when opt undefined
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+
+
+struct prio_sched_data
+{
+	int bands;
+	struct tcf_proto *filter_list;
+	u8  prio2band[TC_PRIO_MAX+1];
+	struct Qdisc *queues[TCQ_PRIO_BANDS];
+};
+
+
+static struct Qdisc *
+prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+	u32 band = skb->priority;
+	struct tcf_result res;
+
+	*qerr = NET_XMIT_DROP;
+	if (TC_H_MAJ(skb->priority) != sch->handle) {
+#ifdef CONFIG_NET_CLS_ACT
+		switch (tc_classify(skb, q->filter_list, &res)) {
+		case TC_ACT_STOLEN:
+		case TC_ACT_QUEUED:
+			*qerr = NET_XMIT_SUCCESS;
+		case TC_ACT_SHOT:
+			return NULL;
+		};
+
+		if (!q->filter_list ) {
+#else
+		if (!q->filter_list || tc_classify(skb, q->filter_list, &res)) {
+#endif
+			if (TC_H_MAJ(band))
+				band = 0;
+			return q->queues[q->prio2band[band&TC_PRIO_MAX]];
+		}
+		band = res.classid;
+	}
+	band = TC_H_MIN(band) - 1;
+	if (band > q->bands)
+		return q->queues[q->prio2band[0]];
+
+	return q->queues[band];
+}
+
+static int
+prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct Qdisc *qdisc;
+	int ret;
+
+	qdisc = prio_classify(skb, sch, &ret);
+#ifdef CONFIG_NET_CLS_ACT
+	if (qdisc == NULL) {
+		if (ret == NET_XMIT_DROP)
+			sch->qstats.drops++;
+		kfree_skb(skb);
+		return ret;
+	}
+#endif
+
+	if ((ret = qdisc->enqueue(skb, qdisc)) == NET_XMIT_SUCCESS) {
+		sch->bstats.bytes += skb->len;
+		sch->bstats.packets++;
+		sch->q.qlen++;
+		return NET_XMIT_SUCCESS;
+	}
+	sch->qstats.drops++;
+	return ret; 
+}
+
+
+static int
+prio_requeue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct Qdisc *qdisc;
+	int ret;
+
+	qdisc = prio_classify(skb, sch, &ret);
+#ifdef CONFIG_NET_CLS_ACT
+	if (qdisc == NULL) {
+		if (ret == NET_XMIT_DROP)
+			sch->qstats.drops++;
+		kfree_skb(skb);
+		return ret;
+	}
+#endif
+
+	if ((ret = qdisc->ops->requeue(skb, qdisc)) == NET_XMIT_SUCCESS) {
+		sch->q.qlen++;
+		sch->qstats.requeues++;
+		return 0;
+	}
+	sch->qstats.drops++;
+	return NET_XMIT_DROP;
+}
+
+
+static struct sk_buff *
+prio_dequeue(struct Qdisc* sch)
+{
+	struct sk_buff *skb;
+	struct prio_sched_data *q = qdisc_priv(sch);
+	int prio;
+	struct Qdisc *qdisc;
+
+	for (prio = 0; prio < q->bands; prio++) {
+		qdisc = q->queues[prio];
+		skb = qdisc->dequeue(qdisc);
+		if (skb) {
+			sch->q.qlen--;
+			return skb;
+		}
+	}
+	return NULL;
+
+}
+
+static unsigned int prio_drop(struct Qdisc* sch)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+	int prio;
+	unsigned int len;
+	struct Qdisc *qdisc;
+
+	for (prio = q->bands-1; prio >= 0; prio--) {
+		qdisc = q->queues[prio];
+		if ((len = qdisc->ops->drop(qdisc)) != 0) {
+			sch->q.qlen--;
+			return len;
+		}
+	}
+	return 0;
+}
+
+
+static void
+prio_reset(struct Qdisc* sch)
+{
+	int prio;
+	struct prio_sched_data *q = qdisc_priv(sch);
+
+	for (prio=0; prio<q->bands; prio++)
+		qdisc_reset(q->queues[prio]);
+	sch->q.qlen = 0;
+}
+
+static void
+prio_destroy(struct Qdisc* sch)
+{
+	int prio;
+	struct prio_sched_data *q = qdisc_priv(sch);
+	struct tcf_proto *tp;
+
+	while ((tp = q->filter_list) != NULL) {
+		q->filter_list = tp->next;
+		tcf_destroy(tp);
+	}
+
+	for (prio=0; prio<q->bands; prio++)
+		qdisc_destroy(q->queues[prio]);
+}
+
+static int prio_tune(struct Qdisc *sch, struct rtattr *opt)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+	struct tc_prio_qopt *qopt = RTA_DATA(opt);
+	int i;
+
+	if (opt->rta_len < RTA_LENGTH(sizeof(*qopt)))
+		return -EINVAL;
+	if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2)
+		return -EINVAL;
+
+	for (i=0; i<=TC_PRIO_MAX; i++) {
+		if (qopt->priomap[i] >= qopt->bands)
+			return -EINVAL;
+	}
+
+	sch_tree_lock(sch);
+	q->bands = qopt->bands;
+	memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
+
+	for (i=q->bands; i<TCQ_PRIO_BANDS; i++) {
+		struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc);
+		if (child != &noop_qdisc)
+			qdisc_destroy(child);
+	}
+	sch_tree_unlock(sch);
+
+	for (i=0; i<=TC_PRIO_MAX; i++) {
+		int band = q->prio2band[i];
+		if (q->queues[band] == &noop_qdisc) {
+			struct Qdisc *child;
+			child = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
+			if (child) {
+				sch_tree_lock(sch);
+				child = xchg(&q->queues[band], child);
+
+				if (child != &noop_qdisc)
+					qdisc_destroy(child);
+				sch_tree_unlock(sch);
+			}
+		}
+	}
+	return 0;
+}
+
+static int prio_init(struct Qdisc *sch, struct rtattr *opt)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+	int i;
+
+	for (i=0; i<TCQ_PRIO_BANDS; i++)
+		q->queues[i] = &noop_qdisc;
+
+	if (opt == NULL) {
+		return -EINVAL;
+	} else {
+		int err;
+
+		if ((err= prio_tune(sch, opt)) != 0)
+			return err;
+	}
+	return 0;
+}
+
+static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+	unsigned char	 *b = skb->tail;
+	struct tc_prio_qopt opt;
+
+	opt.bands = q->bands;
+	memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1);
+	RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
+		      struct Qdisc **old)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+	unsigned long band = arg - 1;
+
+	if (band >= q->bands)
+		return -EINVAL;
+
+	if (new == NULL)
+		new = &noop_qdisc;
+
+	sch_tree_lock(sch);
+	*old = q->queues[band];
+	q->queues[band] = new;
+	sch->q.qlen -= (*old)->q.qlen;
+	qdisc_reset(*old);
+	sch_tree_unlock(sch);
+
+	return 0;
+}
+
+static struct Qdisc *
+prio_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+	unsigned long band = arg - 1;
+
+	if (band >= q->bands)
+		return NULL;
+
+	return q->queues[band];
+}
+
+static unsigned long prio_get(struct Qdisc *sch, u32 classid)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+	unsigned long band = TC_H_MIN(classid);
+
+	if (band - 1 >= q->bands)
+		return 0;
+	return band;
+}
+
+static unsigned long prio_bind(struct Qdisc *sch, unsigned long parent, u32 classid)
+{
+	return prio_get(sch, classid);
+}
+
+
+static void prio_put(struct Qdisc *q, unsigned long cl)
+{
+	return;
+}
+
+static int prio_change(struct Qdisc *sch, u32 handle, u32 parent, struct rtattr **tca, unsigned long *arg)
+{
+	unsigned long cl = *arg;
+	struct prio_sched_data *q = qdisc_priv(sch);
+
+	if (cl - 1 > q->bands)
+		return -ENOENT;
+	return 0;
+}
+
+static int prio_delete(struct Qdisc *sch, unsigned long cl)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+	if (cl - 1 > q->bands)
+		return -ENOENT;
+	return 0;
+}
+
+
+static int prio_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb,
+			   struct tcmsg *tcm)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+
+	if (cl - 1 > q->bands)
+		return -ENOENT;
+	tcm->tcm_handle |= TC_H_MIN(cl);
+	if (q->queues[cl-1])
+		tcm->tcm_info = q->queues[cl-1]->handle;
+	return 0;
+}
+
+static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+	int prio;
+
+	if (arg->stop)
+		return;
+
+	for (prio = 0; prio < q->bands; prio++) {
+		if (arg->count < arg->skip) {
+			arg->count++;
+			continue;
+		}
+		if (arg->fn(sch, prio+1, arg) < 0) {
+			arg->stop = 1;
+			break;
+		}
+		arg->count++;
+	}
+}
+
+static struct tcf_proto ** prio_find_tcf(struct Qdisc *sch, unsigned long cl)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+
+	if (cl)
+		return NULL;
+	return &q->filter_list;
+}
+
+static struct Qdisc_class_ops prio_class_ops = {
+	.graft		=	prio_graft,
+	.leaf		=	prio_leaf,
+	.get		=	prio_get,
+	.put		=	prio_put,
+	.change		=	prio_change,
+	.delete		=	prio_delete,
+	.walk		=	prio_walk,
+	.tcf_chain	=	prio_find_tcf,
+	.bind_tcf	=	prio_bind,
+	.unbind_tcf	=	prio_put,
+	.dump		=	prio_dump_class,
+};
+
+static struct Qdisc_ops prio_qdisc_ops = {
+	.next		=	NULL,
+	.cl_ops		=	&prio_class_ops,
+	.id		=	"prio",
+	.priv_size	=	sizeof(struct prio_sched_data),
+	.enqueue	=	prio_enqueue,
+	.dequeue	=	prio_dequeue,
+	.requeue	=	prio_requeue,
+	.drop		=	prio_drop,
+	.init		=	prio_init,
+	.reset		=	prio_reset,
+	.destroy	=	prio_destroy,
+	.change		=	prio_tune,
+	.dump		=	prio_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init prio_module_init(void)
+{
+	return register_qdisc(&prio_qdisc_ops);
+}
+
+static void __exit prio_module_exit(void) 
+{
+	unregister_qdisc(&prio_qdisc_ops);
+}
+
+module_init(prio_module_init)
+module_exit(prio_module_exit)
+
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
new file mode 100644
index 00000000000..664d0e47374
--- /dev/null
+++ b/net/sched/sch_red.c
@@ -0,0 +1,459 @@
+/*
+ * net/sched/sch_red.c	Random Early Detection queue.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * Changes:
+ * J Hadi Salim <hadi@nortel.com> 980914:	computation fixes
+ * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
+ * J Hadi Salim <hadi@nortelnetworks.com> 980816:  ECN support	
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+#include <net/inet_ecn.h>
+#include <net/dsfield.h>
+
+
+/*	Random Early Detection (RED) algorithm.
+	=======================================
+
+	Source: Sally Floyd and Van Jacobson, "Random Early Detection Gateways
+	for Congestion Avoidance", 1993, IEEE/ACM Transactions on Networking.
+
+	This file codes a "divisionless" version of RED algorithm
+	as written down in Fig.17 of the paper.
+
+Short description.
+------------------
+
+	When a new packet arrives we calculate the average queue length:
+
+	avg = (1-W)*avg + W*current_queue_len,
+
+	W is the filter time constant (chosen as 2^(-Wlog)), it controls
+	the inertia of the algorithm. To allow larger bursts, W should be
+	decreased.
+
+	if (avg > th_max) -> packet marked (dropped).
+	if (avg < th_min) -> packet passes.
+	if (th_min < avg < th_max) we calculate probability:
+
+	Pb = max_P * (avg - th_min)/(th_max-th_min)
+
+	and mark (drop) packet with this probability.
+	Pb changes from 0 (at avg==th_min) to max_P (avg==th_max).
+	max_P should be small (not 1), usually 0.01..0.02 is good value.
+
+	max_P is chosen as a number, so that max_P/(th_max-th_min)
+	is a negative power of two in order arithmetics to contain
+	only shifts.
+
+
+	Parameters, settable by user:
+	-----------------------------
+
+	limit		- bytes (must be > qth_max + burst)
+
+	Hard limit on queue length, should be chosen >qth_max
+	to allow packet bursts. This parameter does not
+	affect the algorithms behaviour and can be chosen
+	arbitrarily high (well, less than ram size)
+	Really, this limit will never be reached
+	if RED works correctly.
+
+	qth_min		- bytes (should be < qth_max/2)
+	qth_max		- bytes (should be at least 2*qth_min and less limit)
+	Wlog	       	- bits (<32) log(1/W).
+	Plog	       	- bits (<32)
+
+	Plog is related to max_P by formula:
+
+	max_P = (qth_max-qth_min)/2^Plog;
+
+	F.e. if qth_max=128K and qth_min=32K, then Plog=22
+	corresponds to max_P=0.02
+
+	Scell_log
+	Stab
+
+	Lookup table for log((1-W)^(t/t_ave).
+
+
+NOTES:
+
+Upper bound on W.
+-----------------
+
+	If you want to allow bursts of L packets of size S,
+	you should choose W:
+
+	L + 1 - th_min/S < (1-(1-W)^L)/W
+
+	th_min/S = 32         th_min/S = 4
+			                       
+	log(W)	L
+	-1	33
+	-2	35
+	-3	39
+	-4	46
+	-5	57
+	-6	75
+	-7	101
+	-8	135
+	-9	190
+	etc.
+ */
+
+struct red_sched_data
+{
+/* Parameters */
+	u32		limit;		/* HARD maximal queue length	*/
+	u32		qth_min;	/* Min average length threshold: A scaled */
+	u32		qth_max;	/* Max average length threshold: A scaled */
+	u32		Rmask;
+	u32		Scell_max;
+	unsigned char	flags;
+	char		Wlog;		/* log(W)		*/
+	char		Plog;		/* random number bits	*/
+	char		Scell_log;
+	u8		Stab[256];
+
+/* Variables */
+	unsigned long	qave;		/* Average queue length: A scaled */
+	int		qcount;		/* Packets since last random number generation */
+	u32		qR;		/* Cached random number */
+
+	psched_time_t	qidlestart;	/* Start of idle period		*/
+	struct tc_red_xstats st;
+};
+
+static int red_ecn_mark(struct sk_buff *skb)
+{
+	if (skb->nh.raw + 20 > skb->tail)
+		return 0;
+
+	switch (skb->protocol) {
+	case __constant_htons(ETH_P_IP):
+		if (INET_ECN_is_not_ect(skb->nh.iph->tos))
+			return 0;
+		IP_ECN_set_ce(skb->nh.iph);
+		return 1;
+	case __constant_htons(ETH_P_IPV6):
+		if (INET_ECN_is_not_ect(ipv6_get_dsfield(skb->nh.ipv6h)))
+			return 0;
+		IP6_ECN_set_ce(skb->nh.ipv6h);
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+static int
+red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct red_sched_data *q = qdisc_priv(sch);
+
+	psched_time_t now;
+
+	if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) {
+		long us_idle;
+		int  shift;
+
+		PSCHED_GET_TIME(now);
+		us_idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max);
+		PSCHED_SET_PASTPERFECT(q->qidlestart);
+
+/*
+   The problem: ideally, average length queue recalcultion should
+   be done over constant clock intervals. This is too expensive, so that
+   the calculation is driven by outgoing packets.
+   When the queue is idle we have to model this clock by hand.
+
+   SF+VJ proposed to "generate" m = idletime/(average_pkt_size/bandwidth)
+   dummy packets as a burst after idle time, i.e.
+
+          q->qave *= (1-W)^m
+
+   This is an apparently overcomplicated solution (f.e. we have to precompute
+   a table to make this calculation in reasonable time)
+   I believe that a simpler model may be used here,
+   but it is field for experiments.
+*/
+		shift = q->Stab[us_idle>>q->Scell_log];
+
+		if (shift) {
+			q->qave >>= shift;
+		} else {
+			/* Approximate initial part of exponent
+			   with linear function:
+			   (1-W)^m ~= 1-mW + ...
+
+			   Seems, it is the best solution to
+			   problem of too coarce exponent tabulation.
+			 */
+
+			us_idle = (q->qave * us_idle)>>q->Scell_log;
+			if (us_idle < q->qave/2)
+				q->qave -= us_idle;
+			else
+				q->qave >>= 1;
+		}
+	} else {
+		q->qave += sch->qstats.backlog - (q->qave >> q->Wlog);
+		/* NOTE:
+		   q->qave is fixed point number with point at Wlog.
+		   The formulae above is equvalent to floating point
+		   version:
+
+		   qave = qave*(1-W) + sch->qstats.backlog*W;
+		                                           --ANK (980924)
+		 */
+	}
+
+	if (q->qave < q->qth_min) {
+		q->qcount = -1;
+enqueue:
+		if (sch->qstats.backlog + skb->len <= q->limit) {
+			__skb_queue_tail(&sch->q, skb);
+			sch->qstats.backlog += skb->len;
+			sch->bstats.bytes += skb->len;
+			sch->bstats.packets++;
+			return NET_XMIT_SUCCESS;
+		} else {
+			q->st.pdrop++;
+		}
+		kfree_skb(skb);
+		sch->qstats.drops++;
+		return NET_XMIT_DROP;
+	}
+	if (q->qave >= q->qth_max) {
+		q->qcount = -1;
+		sch->qstats.overlimits++;
+mark:
+		if  (!(q->flags&TC_RED_ECN) || !red_ecn_mark(skb)) {
+			q->st.early++;
+			goto drop;
+		}
+		q->st.marked++;
+		goto enqueue;
+	}
+
+	if (++q->qcount) {
+		/* The formula used below causes questions.
+
+		   OK. qR is random number in the interval 0..Rmask
+		   i.e. 0..(2^Plog). If we used floating point
+		   arithmetics, it would be: (2^Plog)*rnd_num,
+		   where rnd_num is less 1.
+
+		   Taking into account, that qave have fixed
+		   point at Wlog, and Plog is related to max_P by
+		   max_P = (qth_max-qth_min)/2^Plog; two lines
+		   below have the following floating point equivalent:
+		   
+		   max_P*(qave - qth_min)/(qth_max-qth_min) < rnd/qcount
+
+		   Any questions? --ANK (980924)
+		 */
+		if (((q->qave - q->qth_min)>>q->Wlog)*q->qcount < q->qR)
+			goto enqueue;
+		q->qcount = 0;
+		q->qR = net_random()&q->Rmask;
+		sch->qstats.overlimits++;
+		goto mark;
+	}
+	q->qR = net_random()&q->Rmask;
+	goto enqueue;
+
+drop:
+	kfree_skb(skb);
+	sch->qstats.drops++;
+	return NET_XMIT_CN;
+}
+
+static int
+red_requeue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct red_sched_data *q = qdisc_priv(sch);
+
+	PSCHED_SET_PASTPERFECT(q->qidlestart);
+
+	__skb_queue_head(&sch->q, skb);
+	sch->qstats.backlog += skb->len;
+	sch->qstats.requeues++;
+	return 0;
+}
+
+static struct sk_buff *
+red_dequeue(struct Qdisc* sch)
+{
+	struct sk_buff *skb;
+	struct red_sched_data *q = qdisc_priv(sch);
+
+	skb = __skb_dequeue(&sch->q);
+	if (skb) {
+		sch->qstats.backlog -= skb->len;
+		return skb;
+	}
+	PSCHED_GET_TIME(q->qidlestart);
+	return NULL;
+}
+
+static unsigned int red_drop(struct Qdisc* sch)
+{
+	struct sk_buff *skb;
+	struct red_sched_data *q = qdisc_priv(sch);
+
+	skb = __skb_dequeue_tail(&sch->q);
+	if (skb) {
+		unsigned int len = skb->len;
+		sch->qstats.backlog -= len;
+		sch->qstats.drops++;
+		q->st.other++;
+		kfree_skb(skb);
+		return len;
+	}
+	PSCHED_GET_TIME(q->qidlestart);
+	return 0;
+}
+
+static void red_reset(struct Qdisc* sch)
+{
+	struct red_sched_data *q = qdisc_priv(sch);
+
+	__skb_queue_purge(&sch->q);
+	sch->qstats.backlog = 0;
+	PSCHED_SET_PASTPERFECT(q->qidlestart);
+	q->qave = 0;
+	q->qcount = -1;
+}
+
+static int red_change(struct Qdisc *sch, struct rtattr *opt)
+{
+	struct red_sched_data *q = qdisc_priv(sch);
+	struct rtattr *tb[TCA_RED_STAB];
+	struct tc_red_qopt *ctl;
+
+	if (opt == NULL ||
+	    rtattr_parse_nested(tb, TCA_RED_STAB, opt) ||
+	    tb[TCA_RED_PARMS-1] == 0 || tb[TCA_RED_STAB-1] == 0 ||
+	    RTA_PAYLOAD(tb[TCA_RED_PARMS-1]) < sizeof(*ctl) ||
+	    RTA_PAYLOAD(tb[TCA_RED_STAB-1]) < 256)
+		return -EINVAL;
+
+	ctl = RTA_DATA(tb[TCA_RED_PARMS-1]);
+
+	sch_tree_lock(sch);
+	q->flags = ctl->flags;
+	q->Wlog = ctl->Wlog;
+	q->Plog = ctl->Plog;
+	q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL;
+	q->Scell_log = ctl->Scell_log;
+	q->Scell_max = (255<<q->Scell_log);
+	q->qth_min = ctl->qth_min<<ctl->Wlog;
+	q->qth_max = ctl->qth_max<<ctl->Wlog;
+	q->limit = ctl->limit;
+	memcpy(q->Stab, RTA_DATA(tb[TCA_RED_STAB-1]), 256);
+
+	q->qcount = -1;
+	if (skb_queue_len(&sch->q) == 0)
+		PSCHED_SET_PASTPERFECT(q->qidlestart);
+	sch_tree_unlock(sch);
+	return 0;
+}
+
+static int red_init(struct Qdisc* sch, struct rtattr *opt)
+{
+	return red_change(sch, opt);
+}
+
+static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct red_sched_data *q = qdisc_priv(sch);
+	unsigned char	 *b = skb->tail;
+	struct rtattr *rta;
+	struct tc_red_qopt opt;
+
+	rta = (struct rtattr*)b;
+	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	opt.limit = q->limit;
+	opt.qth_min = q->qth_min>>q->Wlog;
+	opt.qth_max = q->qth_max>>q->Wlog;
+	opt.Wlog = q->Wlog;
+	opt.Plog = q->Plog;
+	opt.Scell_log = q->Scell_log;
+	opt.flags = q->flags;
+	RTA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt);
+	rta->rta_len = skb->tail - b;
+
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
+{
+	struct red_sched_data *q = qdisc_priv(sch);
+
+	return gnet_stats_copy_app(d, &q->st, sizeof(q->st));
+}
+
+static struct Qdisc_ops red_qdisc_ops = {
+	.next		=	NULL,
+	.cl_ops		=	NULL,
+	.id		=	"red",
+	.priv_size	=	sizeof(struct red_sched_data),
+	.enqueue	=	red_enqueue,
+	.dequeue	=	red_dequeue,
+	.requeue	=	red_requeue,
+	.drop		=	red_drop,
+	.init		=	red_init,
+	.reset		=	red_reset,
+	.change		=	red_change,
+	.dump		=	red_dump,
+	.dump_stats	=	red_dump_stats,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init red_module_init(void)
+{
+	return register_qdisc(&red_qdisc_ops);
+}
+static void __exit red_module_exit(void) 
+{
+	unregister_qdisc(&red_qdisc_ops);
+}
+module_init(red_module_init)
+module_exit(red_module_exit)
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
new file mode 100644
index 00000000000..8734bb7280e
--- /dev/null
+++ b/net/sched/sch_sfq.c
@@ -0,0 +1,497 @@
+/*
+ * net/sched/sch_sfq.c	Stochastic Fairness Queueing discipline.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <net/ip.h>
+#include <linux/ipv6.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+
+
+/*	Stochastic Fairness Queuing algorithm.
+	=======================================
+
+	Source:
+	Paul E. McKenney "Stochastic Fairness Queuing",
+	IEEE INFOCOMM'90 Proceedings, San Francisco, 1990.
+
+	Paul E. McKenney "Stochastic Fairness Queuing",
+	"Interworking: Research and Experience", v.2, 1991, p.113-131.
+
+
+	See also:
+	M. Shreedhar and George Varghese "Efficient Fair
+	Queuing using Deficit Round Robin", Proc. SIGCOMM 95.
+
+
+	This is not the thing that is usually called (W)FQ nowadays. 
+	It does not use any timestamp mechanism, but instead
+	processes queues in round-robin order.
+
+	ADVANTAGE:
+
+	- It is very cheap. Both CPU and memory requirements are minimal.
+
+	DRAWBACKS:
+
+	- "Stochastic" -> It is not 100% fair. 
+	When hash collisions occur, several flows are considered as one.
+
+	- "Round-robin" -> It introduces larger delays than virtual clock
+	based schemes, and should not be used for isolating interactive
+	traffic	from non-interactive. It means, that this scheduler
+	should be used as leaf of CBQ or P3, which put interactive traffic
+	to higher priority band.
+
+	We still need true WFQ for top level CSZ, but using WFQ
+	for the best effort traffic is absolutely pointless:
+	SFQ is superior for this purpose.
+
+	IMPLEMENTATION:
+	This implementation limits maximal queue length to 128;
+	maximal mtu to 2^15-1; number of hash buckets to 1024.
+	The only goal of this restrictions was that all data
+	fit into one 4K page :-). Struct sfq_sched_data is
+	organized in anti-cache manner: all the data for a bucket
+	are scattered over different locations. This is not good,
+	but it allowed me to put it into 4K.
+
+	It is easy to increase these values, but not in flight.  */
+
+#define SFQ_DEPTH		128
+#define SFQ_HASH_DIVISOR	1024
+
+/* This type should contain at least SFQ_DEPTH*2 values */
+typedef unsigned char sfq_index;
+
+struct sfq_head
+{
+	sfq_index	next;
+	sfq_index	prev;
+};
+
+struct sfq_sched_data
+{
+/* Parameters */
+	int		perturb_period;
+	unsigned	quantum;	/* Allotment per round: MUST BE >= MTU */
+	int		limit;
+
+/* Variables */
+	struct timer_list perturb_timer;
+	int		perturbation;
+	sfq_index	tail;		/* Index of current slot in round */
+	sfq_index	max_depth;	/* Maximal depth */
+
+	sfq_index	ht[SFQ_HASH_DIVISOR];	/* Hash table */
+	sfq_index	next[SFQ_DEPTH];	/* Active slots link */
+	short		allot[SFQ_DEPTH];	/* Current allotment per slot */
+	unsigned short	hash[SFQ_DEPTH];	/* Hash value indexed by slots */
+	struct sk_buff_head	qs[SFQ_DEPTH];		/* Slot queue */
+	struct sfq_head	dep[SFQ_DEPTH*2];	/* Linked list of slots, indexed by depth */
+};
+
+static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
+{
+	int pert = q->perturbation;
+
+	/* Have we any rotation primitives? If not, WHY? */
+	h ^= (h1<<pert) ^ (h1>>(0x1F - pert));
+	h ^= h>>10;
+	return h & 0x3FF;
+}
+
+static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
+{
+	u32 h, h2;
+
+	switch (skb->protocol) {
+	case __constant_htons(ETH_P_IP):
+	{
+		struct iphdr *iph = skb->nh.iph;
+		h = iph->daddr;
+		h2 = iph->saddr^iph->protocol;
+		if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
+		    (iph->protocol == IPPROTO_TCP ||
+		     iph->protocol == IPPROTO_UDP ||
+		     iph->protocol == IPPROTO_ESP))
+			h2 ^= *(((u32*)iph) + iph->ihl);
+		break;
+	}
+	case __constant_htons(ETH_P_IPV6):
+	{
+		struct ipv6hdr *iph = skb->nh.ipv6h;
+		h = iph->daddr.s6_addr32[3];
+		h2 = iph->saddr.s6_addr32[3]^iph->nexthdr;
+		if (iph->nexthdr == IPPROTO_TCP ||
+		    iph->nexthdr == IPPROTO_UDP ||
+		    iph->nexthdr == IPPROTO_ESP)
+			h2 ^= *(u32*)&iph[1];
+		break;
+	}
+	default:
+		h = (u32)(unsigned long)skb->dst^skb->protocol;
+		h2 = (u32)(unsigned long)skb->sk;
+	}
+	return sfq_fold_hash(q, h, h2);
+}
+
+static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
+{
+	sfq_index p, n;
+	int d = q->qs[x].qlen + SFQ_DEPTH;
+
+	p = d;
+	n = q->dep[d].next;
+	q->dep[x].next = n;
+	q->dep[x].prev = p;
+	q->dep[p].next = q->dep[n].prev = x;
+}
+
+static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)
+{
+	sfq_index p, n;
+
+	n = q->dep[x].next;
+	p = q->dep[x].prev;
+	q->dep[p].next = n;
+	q->dep[n].prev = p;
+
+	if (n == p && q->max_depth == q->qs[x].qlen + 1)
+		q->max_depth--;
+
+	sfq_link(q, x);
+}
+
+static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x)
+{
+	sfq_index p, n;
+	int d;
+
+	n = q->dep[x].next;
+	p = q->dep[x].prev;
+	q->dep[p].next = n;
+	q->dep[n].prev = p;
+	d = q->qs[x].qlen;
+	if (q->max_depth < d)
+		q->max_depth = d;
+
+	sfq_link(q, x);
+}
+
+static unsigned int sfq_drop(struct Qdisc *sch)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+	sfq_index d = q->max_depth;
+	struct sk_buff *skb;
+	unsigned int len;
+
+	/* Queue is full! Find the longest slot and
+	   drop a packet from it */
+
+	if (d > 1) {
+		sfq_index x = q->dep[d+SFQ_DEPTH].next;
+		skb = q->qs[x].prev;
+		len = skb->len;
+		__skb_unlink(skb, &q->qs[x]);
+		kfree_skb(skb);
+		sfq_dec(q, x);
+		sch->q.qlen--;
+		sch->qstats.drops++;
+		return len;
+	}
+
+	if (d == 1) {
+		/* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
+		d = q->next[q->tail];
+		q->next[q->tail] = q->next[d];
+		q->allot[q->next[d]] += q->quantum;
+		skb = q->qs[d].prev;
+		len = skb->len;
+		__skb_unlink(skb, &q->qs[d]);
+		kfree_skb(skb);
+		sfq_dec(q, d);
+		sch->q.qlen--;
+		q->ht[q->hash[d]] = SFQ_DEPTH;
+		sch->qstats.drops++;
+		return len;
+	}
+
+	return 0;
+}
+
+static int
+sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+	unsigned hash = sfq_hash(q, skb);
+	sfq_index x;
+
+	x = q->ht[hash];
+	if (x == SFQ_DEPTH) {
+		q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
+		q->hash[x] = hash;
+	}
+	__skb_queue_tail(&q->qs[x], skb);
+	sfq_inc(q, x);
+	if (q->qs[x].qlen == 1) {		/* The flow is new */
+		if (q->tail == SFQ_DEPTH) {	/* It is the first flow */
+			q->tail = x;
+			q->next[x] = x;
+			q->allot[x] = q->quantum;
+		} else {
+			q->next[x] = q->next[q->tail];
+			q->next[q->tail] = x;
+			q->tail = x;
+		}
+	}
+	if (++sch->q.qlen < q->limit-1) {
+		sch->bstats.bytes += skb->len;
+		sch->bstats.packets++;
+		return 0;
+	}
+
+	sfq_drop(sch);
+	return NET_XMIT_CN;
+}
+
+static int
+sfq_requeue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+	unsigned hash = sfq_hash(q, skb);
+	sfq_index x;
+
+	x = q->ht[hash];
+	if (x == SFQ_DEPTH) {
+		q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
+		q->hash[x] = hash;
+	}
+	__skb_queue_head(&q->qs[x], skb);
+	sfq_inc(q, x);
+	if (q->qs[x].qlen == 1) {		/* The flow is new */
+		if (q->tail == SFQ_DEPTH) {	/* It is the first flow */
+			q->tail = x;
+			q->next[x] = x;
+			q->allot[x] = q->quantum;
+		} else {
+			q->next[x] = q->next[q->tail];
+			q->next[q->tail] = x;
+			q->tail = x;
+		}
+	}
+	if (++sch->q.qlen < q->limit - 1) {
+		sch->qstats.requeues++;
+		return 0;
+	}
+
+	sch->qstats.drops++;
+	sfq_drop(sch);
+	return NET_XMIT_CN;
+}
+
+
+
+
+static struct sk_buff *
+sfq_dequeue(struct Qdisc* sch)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+	struct sk_buff *skb;
+	sfq_index a, old_a;
+
+	/* No active slots */
+	if (q->tail == SFQ_DEPTH)
+		return NULL;
+
+	a = old_a = q->next[q->tail];
+
+	/* Grab packet */
+	skb = __skb_dequeue(&q->qs[a]);
+	sfq_dec(q, a);
+	sch->q.qlen--;
+
+	/* Is the slot empty? */
+	if (q->qs[a].qlen == 0) {
+		q->ht[q->hash[a]] = SFQ_DEPTH;
+		a = q->next[a];
+		if (a == old_a) {
+			q->tail = SFQ_DEPTH;
+			return skb;
+		}
+		q->next[q->tail] = a;
+		q->allot[a] += q->quantum;
+	} else if ((q->allot[a] -= skb->len) <= 0) {
+		q->tail = a;
+		a = q->next[a];
+		q->allot[a] += q->quantum;
+	}
+	return skb;
+}
+
+static void
+sfq_reset(struct Qdisc* sch)
+{
+	struct sk_buff *skb;
+
+	while ((skb = sfq_dequeue(sch)) != NULL)
+		kfree_skb(skb);
+}
+
+static void sfq_perturbation(unsigned long arg)
+{
+	struct Qdisc *sch = (struct Qdisc*)arg;
+	struct sfq_sched_data *q = qdisc_priv(sch);
+
+	q->perturbation = net_random()&0x1F;
+
+	if (q->perturb_period) {
+		q->perturb_timer.expires = jiffies + q->perturb_period;
+		add_timer(&q->perturb_timer);
+	}
+}
+
+static int sfq_change(struct Qdisc *sch, struct rtattr *opt)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+	struct tc_sfq_qopt *ctl = RTA_DATA(opt);
+
+	if (opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
+		return -EINVAL;
+
+	sch_tree_lock(sch);
+	q->quantum = ctl->quantum ? : psched_mtu(sch->dev);
+	q->perturb_period = ctl->perturb_period*HZ;
+	if (ctl->limit)
+		q->limit = min_t(u32, ctl->limit, SFQ_DEPTH);
+
+	while (sch->q.qlen >= q->limit-1)
+		sfq_drop(sch);
+
+	del_timer(&q->perturb_timer);
+	if (q->perturb_period) {
+		q->perturb_timer.expires = jiffies + q->perturb_period;
+		add_timer(&q->perturb_timer);
+	}
+	sch_tree_unlock(sch);
+	return 0;
+}
+
+static int sfq_init(struct Qdisc *sch, struct rtattr *opt)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+	int i;
+
+	init_timer(&q->perturb_timer);
+	q->perturb_timer.data = (unsigned long)sch;
+	q->perturb_timer.function = sfq_perturbation;
+
+	for (i=0; i<SFQ_HASH_DIVISOR; i++)
+		q->ht[i] = SFQ_DEPTH;
+	for (i=0; i<SFQ_DEPTH; i++) {
+		skb_queue_head_init(&q->qs[i]);
+		q->dep[i+SFQ_DEPTH].next = i+SFQ_DEPTH;
+		q->dep[i+SFQ_DEPTH].prev = i+SFQ_DEPTH;
+	}
+	q->limit = SFQ_DEPTH;
+	q->max_depth = 0;
+	q->tail = SFQ_DEPTH;
+	if (opt == NULL) {
+		q->quantum = psched_mtu(sch->dev);
+		q->perturb_period = 0;
+	} else {
+		int err = sfq_change(sch, opt);
+		if (err)
+			return err;
+	}
+	for (i=0; i<SFQ_DEPTH; i++)
+		sfq_link(q, i);
+	return 0;
+}
+
+static void sfq_destroy(struct Qdisc *sch)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+	del_timer(&q->perturb_timer);
+}
+
+static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+	unsigned char	 *b = skb->tail;
+	struct tc_sfq_qopt opt;
+
+	opt.quantum = q->quantum;
+	opt.perturb_period = q->perturb_period/HZ;
+
+	opt.limit = q->limit;
+	opt.divisor = SFQ_HASH_DIVISOR;
+	opt.flows = q->limit;
+
+	RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static struct Qdisc_ops sfq_qdisc_ops = {
+	.next		=	NULL,
+	.cl_ops		=	NULL,
+	.id		=	"sfq",
+	.priv_size	=	sizeof(struct sfq_sched_data),
+	.enqueue	=	sfq_enqueue,
+	.dequeue	=	sfq_dequeue,
+	.requeue	=	sfq_requeue,
+	.drop		=	sfq_drop,
+	.init		=	sfq_init,
+	.reset		=	sfq_reset,
+	.destroy	=	sfq_destroy,
+	.change		=	NULL,
+	.dump		=	sfq_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init sfq_module_init(void)
+{
+	return register_qdisc(&sfq_qdisc_ops);
+}
+static void __exit sfq_module_exit(void) 
+{
+	unregister_qdisc(&sfq_qdisc_ops);
+}
+module_init(sfq_module_init)
+module_exit(sfq_module_exit)
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
new file mode 100644
index 00000000000..cb9711ea8c6
--- /dev/null
+++ b/net/sched/sch_tbf.c
@@ -0,0 +1,543 @@
+/*
+ * net/sched/sch_tbf.c	Token Bucket Filter queue.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *		Dmitry Torokhov <dtor@mail.ru> - allow attaching inner qdiscs -
+ *						 original idea by Martin Devera
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+
+
+/*	Simple Token Bucket Filter.
+	=======================================
+
+	SOURCE.
+	-------
+
+	None.
+
+	Description.
+	------------
+
+	A data flow obeys TBF with rate R and depth B, if for any
+	time interval t_i...t_f the number of transmitted bits
+	does not exceed B + R*(t_f-t_i).
+
+	Packetized version of this definition:
+	The sequence of packets of sizes s_i served at moments t_i
+	obeys TBF, if for any i<=k:
+
+	s_i+....+s_k <= B + R*(t_k - t_i)
+
+	Algorithm.
+	----------
+
+	Let N(t_i) be B/R initially and N(t) grow continuously with time as:
+
+	N(t+delta) = min{B/R, N(t) + delta}
+
+	If the first packet in queue has length S, it may be
+	transmitted only at the time t_* when S/R <= N(t_*),
+	and in this case N(t) jumps:
+
+	N(t_* + 0) = N(t_* - 0) - S/R.
+
+
+
+	Actually, QoS requires two TBF to be applied to a data stream.
+	One of them controls steady state burst size, another
+	one with rate P (peak rate) and depth M (equal to link MTU)
+	limits bursts at a smaller time scale.
+
+	It is easy to see that P>R, and B>M. If P is infinity, this double
+	TBF is equivalent to a single one.
+
+	When TBF works in reshaping mode, latency is estimated as:
+
+	lat = max ((L-B)/R, (L-M)/P)
+
+
+	NOTES.
+	------
+
+	If TBF throttles, it starts a watchdog timer, which will wake it up
+	when it is ready to transmit.
+	Note that the minimal timer resolution is 1/HZ.
+	If no new packets arrive during this period,
+	or if the device is not awaken by EOI for some previous packet,
+	TBF can stop its activity for 1/HZ.
+
+
+	This means, that with depth B, the maximal rate is
+
+	R_crit = B*HZ
+
+	F.e. for 10Mbit ethernet and HZ=100 the minimal allowed B is ~10Kbytes.
+
+	Note that the peak rate TBF is much more tough: with MTU 1500
+	P_crit = 150Kbytes/sec. So, if you need greater peak
+	rates, use alpha with HZ=1000 :-)
+
+	With classful TBF, limit is just kept for backwards compatibility.
+	It is passed to the default bfifo qdisc - if the inner qdisc is
+	changed the limit is not effective anymore.
+*/
+
+struct tbf_sched_data
+{
+/* Parameters */
+	u32		limit;		/* Maximal length of backlog: bytes */
+	u32		buffer;		/* Token bucket depth/rate: MUST BE >= MTU/B */
+	u32		mtu;
+	u32		max_size;
+	struct qdisc_rate_table	*R_tab;
+	struct qdisc_rate_table	*P_tab;
+
+/* Variables */
+	long	tokens;			/* Current number of B tokens */
+	long	ptokens;		/* Current number of P tokens */
+	psched_time_t	t_c;		/* Time check-point */
+	struct timer_list wd_timer;	/* Watchdog timer */
+	struct Qdisc	*qdisc;		/* Inner qdisc, default - bfifo queue */
+};
+
+#define L2T(q,L)   ((q)->R_tab->data[(L)>>(q)->R_tab->rate.cell_log])
+#define L2T_P(q,L) ((q)->P_tab->data[(L)>>(q)->P_tab->rate.cell_log])
+
+static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct tbf_sched_data *q = qdisc_priv(sch);
+	int ret;
+
+	if (skb->len > q->max_size) {
+		sch->qstats.drops++;
+#ifdef CONFIG_NET_CLS_POLICE
+		if (sch->reshape_fail == NULL || sch->reshape_fail(skb, sch))
+#endif
+			kfree_skb(skb);
+
+		return NET_XMIT_DROP;
+	}
+
+	if ((ret = q->qdisc->enqueue(skb, q->qdisc)) != 0) {
+		sch->qstats.drops++;
+		return ret;
+	}
+
+	sch->q.qlen++;
+	sch->bstats.bytes += skb->len;
+	sch->bstats.packets++;
+	return 0;
+}
+
+static int tbf_requeue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct tbf_sched_data *q = qdisc_priv(sch);
+	int ret;
+
+	if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) {
+		sch->q.qlen++;
+		sch->qstats.requeues++;
+	}
+
+	return ret;
+}
+
+static unsigned int tbf_drop(struct Qdisc* sch)
+{
+	struct tbf_sched_data *q = qdisc_priv(sch);
+	unsigned int len;
+
+	if ((len = q->qdisc->ops->drop(q->qdisc)) != 0) {
+		sch->q.qlen--;
+		sch->qstats.drops++;
+	}
+	return len;
+}
+
+static void tbf_watchdog(unsigned long arg)
+{
+	struct Qdisc *sch = (struct Qdisc*)arg;
+
+	sch->flags &= ~TCQ_F_THROTTLED;
+	netif_schedule(sch->dev);
+}
+
+static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
+{
+	struct tbf_sched_data *q = qdisc_priv(sch);
+	struct sk_buff *skb;
+
+	skb = q->qdisc->dequeue(q->qdisc);
+
+	if (skb) {
+		psched_time_t now;
+		long toks, delay;
+		long ptoks = 0;
+		unsigned int len = skb->len;
+
+		PSCHED_GET_TIME(now);
+
+		toks = PSCHED_TDIFF_SAFE(now, q->t_c, q->buffer);
+
+		if (q->P_tab) {
+			ptoks = toks + q->ptokens;
+			if (ptoks > (long)q->mtu)
+				ptoks = q->mtu;
+			ptoks -= L2T_P(q, len);
+		}
+		toks += q->tokens;
+		if (toks > (long)q->buffer)
+			toks = q->buffer;
+		toks -= L2T(q, len);
+
+		if ((toks|ptoks) >= 0) {
+			q->t_c = now;
+			q->tokens = toks;
+			q->ptokens = ptoks;
+			sch->q.qlen--;
+			sch->flags &= ~TCQ_F_THROTTLED;
+			return skb;
+		}
+
+		delay = PSCHED_US2JIFFIE(max_t(long, -toks, -ptoks));
+
+		if (delay == 0)
+			delay = 1;
+
+		mod_timer(&q->wd_timer, jiffies+delay);
+
+		/* Maybe we have a shorter packet in the queue,
+		   which can be sent now. It sounds cool,
+		   but, however, this is wrong in principle.
+		   We MUST NOT reorder packets under these circumstances.
+
+		   Really, if we split the flow into independent
+		   subflows, it would be a very good solution.
+		   This is the main idea of all FQ algorithms
+		   (cf. CSZ, HPFQ, HFSC)
+		 */
+
+		if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) {
+			/* When requeue fails skb is dropped */
+			sch->q.qlen--;
+			sch->qstats.drops++;
+		}
+
+		sch->flags |= TCQ_F_THROTTLED;
+		sch->qstats.overlimits++;
+	}
+	return NULL;
+}
+
+static void tbf_reset(struct Qdisc* sch)
+{
+	struct tbf_sched_data *q = qdisc_priv(sch);
+
+	qdisc_reset(q->qdisc);
+	sch->q.qlen = 0;
+	PSCHED_GET_TIME(q->t_c);
+	q->tokens = q->buffer;
+	q->ptokens = q->mtu;
+	sch->flags &= ~TCQ_F_THROTTLED;
+	del_timer(&q->wd_timer);
+}
+
+static struct Qdisc *tbf_create_dflt_qdisc(struct net_device *dev, u32 limit)
+{
+	struct Qdisc *q = qdisc_create_dflt(dev, &bfifo_qdisc_ops);
+        struct rtattr *rta;
+	int ret;
+
+	if (q) {
+		rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
+		if (rta) {
+			rta->rta_type = RTM_NEWQDISC;
+			rta->rta_len = RTA_LENGTH(sizeof(struct tc_fifo_qopt)); 
+			((struct tc_fifo_qopt *)RTA_DATA(rta))->limit = limit;
+
+			ret = q->ops->change(q, rta);
+			kfree(rta);
+
+			if (ret == 0)
+				return q;
+		}
+		qdisc_destroy(q);
+	}
+
+	return NULL;
+}
+
+static int tbf_change(struct Qdisc* sch, struct rtattr *opt)
+{
+	int err = -EINVAL;
+	struct tbf_sched_data *q = qdisc_priv(sch);
+	struct rtattr *tb[TCA_TBF_PTAB];
+	struct tc_tbf_qopt *qopt;
+	struct qdisc_rate_table *rtab = NULL;
+	struct qdisc_rate_table *ptab = NULL;
+	struct Qdisc *child = NULL;
+	int max_size,n;
+
+	if (rtattr_parse_nested(tb, TCA_TBF_PTAB, opt) ||
+	    tb[TCA_TBF_PARMS-1] == NULL ||
+	    RTA_PAYLOAD(tb[TCA_TBF_PARMS-1]) < sizeof(*qopt))
+		goto done;
+
+	qopt = RTA_DATA(tb[TCA_TBF_PARMS-1]);
+	rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB-1]);
+	if (rtab == NULL)
+		goto done;
+
+	if (qopt->peakrate.rate) {
+		if (qopt->peakrate.rate > qopt->rate.rate)
+			ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB-1]);
+		if (ptab == NULL)
+			goto done;
+	}
+
+	for (n = 0; n < 256; n++)
+		if (rtab->data[n] > qopt->buffer) break;
+	max_size = (n << qopt->rate.cell_log)-1;
+	if (ptab) {
+		int size;
+
+		for (n = 0; n < 256; n++)
+			if (ptab->data[n] > qopt->mtu) break;
+		size = (n << qopt->peakrate.cell_log)-1;
+		if (size < max_size) max_size = size;
+	}
+	if (max_size < 0)
+		goto done;
+
+	if (q->qdisc == &noop_qdisc) {
+		if ((child = tbf_create_dflt_qdisc(sch->dev, qopt->limit)) == NULL)
+			goto done;
+	}
+
+	sch_tree_lock(sch);
+	if (child) q->qdisc = child;
+	q->limit = qopt->limit;
+	q->mtu = qopt->mtu;
+	q->max_size = max_size;
+	q->buffer = qopt->buffer;
+	q->tokens = q->buffer;
+	q->ptokens = q->mtu;
+	rtab = xchg(&q->R_tab, rtab);
+	ptab = xchg(&q->P_tab, ptab);
+	sch_tree_unlock(sch);
+	err = 0;
+done:
+	if (rtab)
+		qdisc_put_rtab(rtab);
+	if (ptab)
+		qdisc_put_rtab(ptab);
+	return err;
+}
+
+static int tbf_init(struct Qdisc* sch, struct rtattr *opt)
+{
+	struct tbf_sched_data *q = qdisc_priv(sch);
+
+	if (opt == NULL)
+		return -EINVAL;
+
+	PSCHED_GET_TIME(q->t_c);
+	init_timer(&q->wd_timer);
+	q->wd_timer.function = tbf_watchdog;
+	q->wd_timer.data = (unsigned long)sch;
+
+	q->qdisc = &noop_qdisc;
+
+	return tbf_change(sch, opt);
+}
+
+static void tbf_destroy(struct Qdisc *sch)
+{
+	struct tbf_sched_data *q = qdisc_priv(sch);
+
+	del_timer(&q->wd_timer);
+
+	if (q->P_tab)
+		qdisc_put_rtab(q->P_tab);
+	if (q->R_tab)
+		qdisc_put_rtab(q->R_tab);
+
+	qdisc_destroy(q->qdisc);
+}
+
+static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct tbf_sched_data *q = qdisc_priv(sch);
+	unsigned char	 *b = skb->tail;
+	struct rtattr *rta;
+	struct tc_tbf_qopt opt;
+
+	rta = (struct rtattr*)b;
+	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+
+	opt.limit = q->limit;
+	opt.rate = q->R_tab->rate;
+	if (q->P_tab)
+		opt.peakrate = q->P_tab->rate;
+	else
+		memset(&opt.peakrate, 0, sizeof(opt.peakrate));
+	opt.mtu = q->mtu;
+	opt.buffer = q->buffer;
+	RTA_PUT(skb, TCA_TBF_PARMS, sizeof(opt), &opt);
+	rta->rta_len = skb->tail - b;
+
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int tbf_dump_class(struct Qdisc *sch, unsigned long cl,
+			  struct sk_buff *skb, struct tcmsg *tcm)
+{
+	struct tbf_sched_data *q = qdisc_priv(sch);
+
+	if (cl != 1) 	/* only one class */
+		return -ENOENT;
+
+	tcm->tcm_handle |= TC_H_MIN(1);
+	tcm->tcm_info = q->qdisc->handle;
+
+	return 0;
+}
+
+static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
+		     struct Qdisc **old)
+{
+	struct tbf_sched_data *q = qdisc_priv(sch);
+
+	if (new == NULL)
+		new = &noop_qdisc;
+
+	sch_tree_lock(sch);
+	*old = xchg(&q->qdisc, new);
+	qdisc_reset(*old);
+	sch->q.qlen = 0;
+	sch_tree_unlock(sch);
+
+	return 0;
+}
+
+static struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	struct tbf_sched_data *q = qdisc_priv(sch);
+	return q->qdisc;
+}
+
+static unsigned long tbf_get(struct Qdisc *sch, u32 classid)
+{
+	return 1;
+}
+
+static void tbf_put(struct Qdisc *sch, unsigned long arg)
+{
+}
+
+static int tbf_change_class(struct Qdisc *sch, u32 classid, u32 parentid, 
+			    struct rtattr **tca, unsigned long *arg)
+{
+	return -ENOSYS;
+}
+
+static int tbf_delete(struct Qdisc *sch, unsigned long arg)
+{
+	return -ENOSYS;
+}
+
+static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
+{
+	if (!walker->stop) {
+		if (walker->count >= walker->skip)
+			if (walker->fn(sch, 1, walker) < 0) {
+				walker->stop = 1;
+				return;
+			}
+		walker->count++;
+	}
+}
+
+static struct tcf_proto **tbf_find_tcf(struct Qdisc *sch, unsigned long cl)
+{
+	return NULL;
+}
+
+static struct Qdisc_class_ops tbf_class_ops =
+{
+	.graft		=	tbf_graft,
+	.leaf		=	tbf_leaf,
+	.get		=	tbf_get,
+	.put		=	tbf_put,
+	.change		=	tbf_change_class,
+	.delete		=	tbf_delete,
+	.walk		=	tbf_walk,
+	.tcf_chain	=	tbf_find_tcf,
+	.dump		=	tbf_dump_class,
+};
+
+static struct Qdisc_ops tbf_qdisc_ops = {
+	.next		=	NULL,
+	.cl_ops		=	&tbf_class_ops,
+	.id		=	"tbf",
+	.priv_size	=	sizeof(struct tbf_sched_data),
+	.enqueue	=	tbf_enqueue,
+	.dequeue	=	tbf_dequeue,
+	.requeue	=	tbf_requeue,
+	.drop		=	tbf_drop,
+	.init		=	tbf_init,
+	.reset		=	tbf_reset,
+	.destroy	=	tbf_destroy,
+	.change		=	tbf_change,
+	.dump		=	tbf_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init tbf_module_init(void)
+{
+	return register_qdisc(&tbf_qdisc_ops);
+}
+
+static void __exit tbf_module_exit(void)
+{
+	unregister_qdisc(&tbf_qdisc_ops);
+}
+module_init(tbf_module_init)
+module_exit(tbf_module_exit)
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
new file mode 100644
index 00000000000..6cf0342706b
--- /dev/null
+++ b/net/sched/sch_teql.c
@@ -0,0 +1,511 @@
+/* net/sched/sch_teql.c	"True" (or "trivial") link equalizer.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <linux/moduleparam.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+
+/*
+   How to setup it.
+   ----------------
+
+   After loading this module you will find a new device teqlN
+   and new qdisc with the same name. To join a slave to the equalizer
+   you should just set this qdisc on a device f.e.
+
+   # tc qdisc add dev eth0 root teql0
+   # tc qdisc add dev eth1 root teql0
+
+   That's all. Full PnP 8)
+
+   Applicability.
+   --------------
+
+   1. Slave devices MUST be active devices, i.e., they must raise the tbusy
+      signal and generate EOI events. If you want to equalize virtual devices
+      like tunnels, use a normal eql device.
+   2. This device puts no limitations on physical slave characteristics
+      f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
+      Certainly, large difference in link speeds will make the resulting
+      eqalized link unusable, because of huge packet reordering.
+      I estimate an upper useful difference as ~10 times.
+   3. If the slave requires address resolution, only protocols using
+      neighbour cache (IPv4/IPv6) will work over the equalized link.
+      Other protocols are still allowed to use the slave device directly,
+      which will not break load balancing, though native slave
+      traffic will have the highest priority.  */
+
+struct teql_master
+{
+	struct Qdisc_ops qops;
+	struct net_device *dev;
+	struct Qdisc *slaves;
+	struct list_head master_list;
+	struct net_device_stats stats;
+};
+
+struct teql_sched_data
+{
+	struct Qdisc *next;
+	struct teql_master *m;
+	struct neighbour *ncache;
+	struct sk_buff_head q;
+};
+
+#define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
+
+#define FMASK (IFF_BROADCAST|IFF_POINTOPOINT|IFF_BROADCAST)
+
+/* "teql*" qdisc routines */
+
+static int
+teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct net_device *dev = sch->dev;
+	struct teql_sched_data *q = qdisc_priv(sch);
+
+	__skb_queue_tail(&q->q, skb);
+	if (q->q.qlen <= dev->tx_queue_len) {
+		sch->bstats.bytes += skb->len;
+		sch->bstats.packets++;
+		return 0;
+	}
+
+	__skb_unlink(skb, &q->q);
+	kfree_skb(skb);
+	sch->qstats.drops++;
+	return NET_XMIT_DROP;
+}
+
+static int
+teql_requeue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct teql_sched_data *q = qdisc_priv(sch);
+
+	__skb_queue_head(&q->q, skb);
+	sch->qstats.requeues++;
+	return 0;
+}
+
+static struct sk_buff *
+teql_dequeue(struct Qdisc* sch)
+{
+	struct teql_sched_data *dat = qdisc_priv(sch);
+	struct sk_buff *skb;
+
+	skb = __skb_dequeue(&dat->q);
+	if (skb == NULL) {
+		struct net_device *m = dat->m->dev->qdisc->dev;
+		if (m) {
+			dat->m->slaves = sch;
+			netif_wake_queue(m);
+		}
+	}
+	sch->q.qlen = dat->q.qlen + dat->m->dev->qdisc->q.qlen;
+	return skb;
+}
+
+static __inline__ void
+teql_neigh_release(struct neighbour *n)
+{
+	if (n)
+		neigh_release(n);
+}
+
+static void
+teql_reset(struct Qdisc* sch)
+{
+	struct teql_sched_data *dat = qdisc_priv(sch);
+
+	skb_queue_purge(&dat->q);
+	sch->q.qlen = 0;
+	teql_neigh_release(xchg(&dat->ncache, NULL));
+}
+
+static void
+teql_destroy(struct Qdisc* sch)
+{
+	struct Qdisc *q, *prev;
+	struct teql_sched_data *dat = qdisc_priv(sch);
+	struct teql_master *master = dat->m;
+
+	if ((prev = master->slaves) != NULL) {
+		do {
+			q = NEXT_SLAVE(prev);
+			if (q == sch) {
+				NEXT_SLAVE(prev) = NEXT_SLAVE(q);
+				if (q == master->slaves) {
+					master->slaves = NEXT_SLAVE(q);
+					if (q == master->slaves) {
+						master->slaves = NULL;
+						spin_lock_bh(&master->dev->queue_lock);
+						qdisc_reset(master->dev->qdisc);
+						spin_unlock_bh(&master->dev->queue_lock);
+					}
+				}
+				skb_queue_purge(&dat->q);
+				teql_neigh_release(xchg(&dat->ncache, NULL));
+				break;
+			}
+				
+		} while ((prev = q) != master->slaves);
+	}
+}
+
+static int teql_qdisc_init(struct Qdisc *sch, struct rtattr *opt)
+{
+	struct net_device *dev = sch->dev;
+	struct teql_master *m = (struct teql_master*)sch->ops;
+	struct teql_sched_data *q = qdisc_priv(sch);
+
+	if (dev->hard_header_len > m->dev->hard_header_len)
+		return -EINVAL;
+
+	if (m->dev == dev)
+		return -ELOOP;
+
+	q->m = m;
+
+	skb_queue_head_init(&q->q);
+
+	if (m->slaves) {
+		if (m->dev->flags & IFF_UP) {
+			if ((m->dev->flags&IFF_POINTOPOINT && !(dev->flags&IFF_POINTOPOINT))
+			    || (m->dev->flags&IFF_BROADCAST && !(dev->flags&IFF_BROADCAST))
+			    || (m->dev->flags&IFF_MULTICAST && !(dev->flags&IFF_MULTICAST))
+			    || dev->mtu < m->dev->mtu)
+				return -EINVAL;
+		} else {
+			if (!(dev->flags&IFF_POINTOPOINT))
+				m->dev->flags &= ~IFF_POINTOPOINT;
+			if (!(dev->flags&IFF_BROADCAST))
+				m->dev->flags &= ~IFF_BROADCAST;
+			if (!(dev->flags&IFF_MULTICAST))
+				m->dev->flags &= ~IFF_MULTICAST;
+			if (dev->mtu < m->dev->mtu)
+				m->dev->mtu = dev->mtu;
+		}
+		q->next = NEXT_SLAVE(m->slaves);
+		NEXT_SLAVE(m->slaves) = sch;
+	} else {
+		q->next = sch;
+		m->slaves = sch;
+		m->dev->mtu = dev->mtu;
+		m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
+	}
+	return 0;
+}
+
+/* "teql*" netdevice routines */
+
+static int
+__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
+{
+	struct teql_sched_data *q = qdisc_priv(dev->qdisc);
+	struct neighbour *mn = skb->dst->neighbour;
+	struct neighbour *n = q->ncache;
+
+	if (mn->tbl == NULL)
+		return -EINVAL;
+	if (n && n->tbl == mn->tbl &&
+	    memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
+		atomic_inc(&n->refcnt);
+	} else {
+		n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
+		if (IS_ERR(n))
+			return PTR_ERR(n);
+	}
+	if (neigh_event_send(n, skb_res) == 0) {
+		int err;
+		read_lock(&n->lock);
+		err = dev->hard_header(skb, dev, ntohs(skb->protocol), n->ha, NULL, skb->len);
+		read_unlock(&n->lock);
+		if (err < 0) {
+			neigh_release(n);
+			return -EINVAL;
+		}
+		teql_neigh_release(xchg(&q->ncache, n));
+		return 0;
+	}
+	neigh_release(n);
+	return (skb_res == NULL) ? -EAGAIN : 1;
+}
+
+static __inline__ int
+teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
+{
+	if (dev->hard_header == NULL ||
+	    skb->dst == NULL ||
+	    skb->dst->neighbour == NULL)
+		return 0;
+	return __teql_resolve(skb, skb_res, dev);
+}
+
+static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct teql_master *master = (void*)dev->priv;
+	struct Qdisc *start, *q;
+	int busy;
+	int nores;
+	int len = skb->len;
+	struct sk_buff *skb_res = NULL;
+
+	start = master->slaves;
+
+restart:
+	nores = 0;
+	busy = 0;
+
+	if ((q = start) == NULL)
+		goto drop;
+
+	do {
+		struct net_device *slave = q->dev;
+		
+		if (slave->qdisc_sleeping != q)
+			continue;
+		if (netif_queue_stopped(slave) || ! netif_running(slave)) {
+			busy = 1;
+			continue;
+		}
+
+		switch (teql_resolve(skb, skb_res, slave)) {
+		case 0:
+			if (spin_trylock(&slave->xmit_lock)) {
+				slave->xmit_lock_owner = smp_processor_id();
+				if (!netif_queue_stopped(slave) &&
+				    slave->hard_start_xmit(skb, slave) == 0) {
+					slave->xmit_lock_owner = -1;
+					spin_unlock(&slave->xmit_lock);
+					master->slaves = NEXT_SLAVE(q);
+					netif_wake_queue(dev);
+					master->stats.tx_packets++;
+					master->stats.tx_bytes += len;
+					return 0;
+				}
+				slave->xmit_lock_owner = -1;
+				spin_unlock(&slave->xmit_lock);
+			}
+			if (netif_queue_stopped(dev))
+				busy = 1;
+			break;
+		case 1:
+			master->slaves = NEXT_SLAVE(q);
+			return 0;
+		default:
+			nores = 1;
+			break;
+		}
+		__skb_pull(skb, skb->nh.raw - skb->data);
+	} while ((q = NEXT_SLAVE(q)) != start);
+
+	if (nores && skb_res == NULL) {
+		skb_res = skb;
+		goto restart;
+	}
+
+	if (busy) {
+		netif_stop_queue(dev);
+		return 1;
+	}
+	master->stats.tx_errors++;
+
+drop:
+	master->stats.tx_dropped++;
+	dev_kfree_skb(skb);
+	return 0;
+}
+
+static int teql_master_open(struct net_device *dev)
+{
+	struct Qdisc * q;
+	struct teql_master *m = (void*)dev->priv;
+	int mtu = 0xFFFE;
+	unsigned flags = IFF_NOARP|IFF_MULTICAST;
+
+	if (m->slaves == NULL)
+		return -EUNATCH;
+
+	flags = FMASK;
+
+	q = m->slaves;
+	do {
+		struct net_device *slave = q->dev;
+
+		if (slave == NULL)
+			return -EUNATCH;
+
+		if (slave->mtu < mtu)
+			mtu = slave->mtu;
+		if (slave->hard_header_len > LL_MAX_HEADER)
+			return -EINVAL;
+
+		/* If all the slaves are BROADCAST, master is BROADCAST
+		   If all the slaves are PtP, master is PtP
+		   Otherwise, master is NBMA.
+		 */
+		if (!(slave->flags&IFF_POINTOPOINT))
+			flags &= ~IFF_POINTOPOINT;
+		if (!(slave->flags&IFF_BROADCAST))
+			flags &= ~IFF_BROADCAST;
+		if (!(slave->flags&IFF_MULTICAST))
+			flags &= ~IFF_MULTICAST;
+	} while ((q = NEXT_SLAVE(q)) != m->slaves);
+
+	m->dev->mtu = mtu;
+	m->dev->flags = (m->dev->flags&~FMASK) | flags;
+	netif_start_queue(m->dev);
+	return 0;
+}
+
+static int teql_master_close(struct net_device *dev)
+{
+	netif_stop_queue(dev);
+	return 0;
+}
+
+static struct net_device_stats *teql_master_stats(struct net_device *dev)
+{
+	struct teql_master *m = (void*)dev->priv;
+	return &m->stats;
+}
+
+static int teql_master_mtu(struct net_device *dev, int new_mtu)
+{
+	struct teql_master *m = (void*)dev->priv;
+	struct Qdisc *q;
+
+	if (new_mtu < 68)
+		return -EINVAL;
+
+	q = m->slaves;
+	if (q) {
+		do {
+			if (new_mtu > q->dev->mtu)
+				return -EINVAL;
+		} while ((q=NEXT_SLAVE(q)) != m->slaves);
+	}
+
+	dev->mtu = new_mtu;
+	return 0;
+}
+
+static __init void teql_master_setup(struct net_device *dev)
+{
+	struct teql_master *master = dev->priv;
+	struct Qdisc_ops *ops = &master->qops;
+
+	master->dev	= dev;
+	ops->priv_size  = sizeof(struct teql_sched_data);
+	
+	ops->enqueue	=	teql_enqueue;
+	ops->dequeue	=	teql_dequeue;
+	ops->requeue	=	teql_requeue;
+	ops->init	=	teql_qdisc_init;
+	ops->reset	=	teql_reset;
+	ops->destroy	=	teql_destroy;
+	ops->owner	=	THIS_MODULE;
+
+	dev->open		= teql_master_open;
+	dev->hard_start_xmit	= teql_master_xmit;
+	dev->stop		= teql_master_close;
+	dev->get_stats		= teql_master_stats;
+	dev->change_mtu		= teql_master_mtu;
+	dev->type		= ARPHRD_VOID;
+	dev->mtu		= 1500;
+	dev->tx_queue_len	= 100;
+	dev->flags		= IFF_NOARP;
+	dev->hard_header_len	= LL_MAX_HEADER;
+	SET_MODULE_OWNER(dev);
+}
+
+static LIST_HEAD(master_dev_list);
+static int max_equalizers = 1;
+module_param(max_equalizers, int, 0);
+MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
+
+static int __init teql_init(void)
+{
+	int i;
+	int err = -ENODEV;
+
+	for (i = 0; i < max_equalizers; i++) {
+		struct net_device *dev;
+		struct teql_master *master;
+
+		dev = alloc_netdev(sizeof(struct teql_master),
+				  "teql%d", teql_master_setup);
+		if (!dev) {
+			err = -ENOMEM;
+			break;
+		}
+
+		if ((err = register_netdev(dev))) {
+			free_netdev(dev);
+			break;
+		}
+
+		master = dev->priv;
+
+		strlcpy(master->qops.id, dev->name, IFNAMSIZ);
+		err = register_qdisc(&master->qops);
+
+		if (err) {
+			unregister_netdev(dev);
+			free_netdev(dev);
+			break;
+		}
+
+		list_add_tail(&master->master_list, &master_dev_list);
+	}
+	return i ? 0 : err;
+}
+
+static void __exit teql_exit(void) 
+{
+	struct teql_master *master, *nxt;
+
+	list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
+
+		list_del(&master->master_list);
+
+		unregister_qdisc(&master->qops);
+		unregister_netdev(master->dev);
+		free_netdev(master->dev);
+	}
+}
+
+module_init(teql_init);
+module_exit(teql_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig
new file mode 100644
index 00000000000..9cba49e2ad4
--- /dev/null
+++ b/net/sctp/Kconfig
@@ -0,0 +1,89 @@
+#
+# SCTP configuration
+#
+
+menu "SCTP Configuration (EXPERIMENTAL)"
+	depends on INET && EXPERIMENTAL
+
+config IP_SCTP
+	tristate "The SCTP Protocol (EXPERIMENTAL)"
+	depends on IPV6 || IPV6=n
+	select CRYPTO if SCTP_HMAC_SHA1 || SCTP_HMAC_MD5
+	select CRYPTO_HMAC if SCTP_HMAC_SHA1 || SCTP_HMAC_MD5
+	select CRYPTO_SHA1 if SCTP_HMAC_SHA1
+	select CRYPTO_MD5 if SCTP_HMAC_MD5
+	---help---
+	  Stream Control Transmission Protocol
+
+	  From RFC 2960 <http://www.ietf.org/rfc/rfc2960.txt>.
+
+	  "SCTP is a reliable transport protocol operating on top of a
+	  connectionless packet network such as IP.  It offers the following
+	  services to its users:
+
+	  -- acknowledged error-free non-duplicated transfer of user data,
+	  -- data fragmentation to conform to discovered path MTU size,
+	  -- sequenced delivery of user messages within multiple streams,
+	  with an option for order-of-arrival delivery of individual user
+	  messages,
+	  -- optional bundling of multiple user messages into a single SCTP
+	  packet, and
+	  -- network-level fault tolerance through supporting of multi-
+	  homing at either or both ends of an association."
+
+	  To compile this protocol support as a module, choose M here: the
+	  module will be called sctp.
+
+	  If in doubt, say N.
+
+config SCTP_DBG_MSG
+	bool "SCTP: Debug messages"
+	depends on IP_SCTP
+	help
+	  If you say Y, this will enable verbose debugging messages. 
+
+	  If unsure, say N.  However, if you are running into problems, use 
+	  this option to gather detailed trace information
+
+config SCTP_DBG_OBJCNT
+	bool "SCTP: Debug object counts"
+	depends on IP_SCTP
+	help
+	  If you say Y, this will enable debugging support for counting the 
+	  type of objects that are currently allocated.  This is useful for 
+	  identifying memory leaks.   If the /proc filesystem is enabled this 
+	  debug information can be viewed by 
+	  'cat /proc/net/sctp/sctp_dbg_objcnt'
+
+	  If unsure, say N
+
+choice
+	prompt "SCTP: Cookie HMAC Algorithm"
+	depends on IP_SCTP
+	default SCTP_HMAC_MD5
+	help
+	  HMAC algorithm to be used during association initialization.  It
+	  is strongly recommended to use HMAC-SHA1 or HMAC-MD5.  See 
+	  configuration for Cryptographic API and enable those algorithms
+          to make usable by SCTP. 
+
+config SCTP_HMAC_NONE
+	bool "None"
+	help 
+	  Choosing this disables the use of an HMAC during association 
+	  establishment.  It is advised to use either HMAC-MD5 or HMAC-SHA1.
+
+config SCTP_HMAC_SHA1
+	bool "HMAC-SHA1"
+	help 
+	  Enable the use of HMAC-SHA1 during association establishment.  It 
+	  is advised to use either HMAC-MD5 or HMAC-SHA1.
+
+config SCTP_HMAC_MD5
+	bool "HMAC-MD5"
+	help
+	  Enable the use of HMAC-MD5 during association establishment.  It is 
+	  advised to use either HMAC-MD5 or HMAC-SHA1.
+
+endchoice
+endmenu
diff --git a/net/sctp/Makefile b/net/sctp/Makefile
new file mode 100644
index 00000000000..70c828bbe44
--- /dev/null
+++ b/net/sctp/Makefile
@@ -0,0 +1,17 @@
+#
+# Makefile for SCTP support code.
+#
+
+obj-$(CONFIG_IP_SCTP) += sctp.o
+
+sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
+	  protocol.o endpointola.o associola.o \
+	  transport.o chunk.o sm_make_chunk.o ulpevent.o \
+	  inqueue.o outqueue.o ulpqueue.o command.o \
+	  tsnmap.o bind_addr.o socket.o primitive.o \
+	  output.o input.o debug.o ssnmap.o proc.o crc32c.o
+
+sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o
+sctp-$(CONFIG_SYSCTL) += sysctl.o
+
+sctp-$(subst m,y,$(CONFIG_IPV6))	+= ipv6.o
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
new file mode 100644
index 00000000000..663843d97a9
--- /dev/null
+++ b/net/sctp/associola.c
@@ -0,0 +1,1205 @@
+/* SCTP kernel reference Implementation
+ * (C) Copyright IBM Corp. 2001, 2004
+ * Copyright (c) 1999-2000 Cisco, Inc.
+ * Copyright (c) 1999-2001 Motorola, Inc.
+ * Copyright (c) 2001 Intel Corp.
+ * Copyright (c) 2001 La Monte H.P. Yarroll
+ *
+ * This file is part of the SCTP kernel reference Implementation
+ *
+ * This module provides the abstraction for an SCTP association.
+ *
+ * The SCTP reference implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * The SCTP reference implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *                 ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email address(es):
+ *    lksctp developers <lksctp-developers@lists.sourceforge.net>
+ *
+ * Or submit a bug report through the following website:
+ *    http://www.sf.net/projects/lksctp
+ *
+ * Written or modified by:
+ *    La Monte H.P. Yarroll <piggy@acm.org>
+ *    Karl Knutson          <karl@athena.chicago.il.us>
+ *    Jon Grimm             <jgrimm@us.ibm.com>
+ *    Xingang Guo           <xingang.guo@intel.com>
+ *    Hui Huang             <hui.huang@nokia.com>
+ *    Sridhar Samudrala	    <sri@us.ibm.com>
+ *    Daisy Chang	    <daisyc@us.ibm.com>
+ *    Ryan Layer	    <rmlayer@us.ibm.com>
+ *    Kevin Gao             <kevin.gao@intel.com>
+ *
+ * Any bugs reported given to us we will try to fix... any fixes shared will
+ * be incorporated into the next SCTP release.
+ */
+
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/poll.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+
+#include <linux/slab.h>
+#include <linux/in.h>
+#include <net/ipv6.h>
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+
+/* Forward declarations for internal functions. */
+static void sctp_assoc_bh_rcv(struct sctp_association *asoc);
+
+
+/* 1st Level Abstractions. */
+
+/* Initialize a new association from provided memory. */
+static struct sctp_association *sctp_association_init(struct sctp_association *asoc,
+					  const struct sctp_endpoint *ep,
+					  const struct sock *sk,
+					  sctp_scope_t scope,
+					  int gfp)
+{
+	struct sctp_sock *sp;
+	int i;
+
+	/* Retrieve the SCTP per socket area.  */
+	sp = sctp_sk((struct sock *)sk);
+
+	/* Init all variables to a known value.  */
+	memset(asoc, 0, sizeof(struct sctp_association));
+
+	/* Discarding const is appropriate here.  */
+	asoc->ep = (struct sctp_endpoint *)ep;
+	sctp_endpoint_hold(asoc->ep);
+
+	/* Hold the sock.  */
+	asoc->base.sk = (struct sock *)sk;
+	sock_hold(asoc->base.sk);
+
+	/* Initialize the common base substructure.  */
+	asoc->base.type = SCTP_EP_TYPE_ASSOCIATION;
+
+	/* Initialize the object handling fields.  */
+	atomic_set(&asoc->base.refcnt, 1);
+	asoc->base.dead = 0;
+	asoc->base.malloced = 0;
+
+	/* Initialize the bind addr area.  */
+	sctp_bind_addr_init(&asoc->base.bind_addr, ep->base.bind_addr.port);
+	rwlock_init(&asoc->base.addr_lock);
+
+	asoc->state = SCTP_STATE_CLOSED;
+
+	/* Set these values from the socket values, a conversion between
+	 * millsecons to seconds/microseconds must also be done.
+	 */
+	asoc->cookie_life.tv_sec = sp->assocparams.sasoc_cookie_life / 1000;
+	asoc->cookie_life.tv_usec = (sp->assocparams.sasoc_cookie_life % 1000)
+					* 1000;
+	asoc->pmtu = 0;
+	asoc->frag_point = 0;
+
+	/* Set the association max_retrans and RTO values from the
+	 * socket values.
+	 */
+	asoc->max_retrans = sp->assocparams.sasoc_asocmaxrxt;
+	asoc->rto_initial = msecs_to_jiffies(sp->rtoinfo.srto_initial);
+	asoc->rto_max = msecs_to_jiffies(sp->rtoinfo.srto_max);
+	asoc->rto_min = msecs_to_jiffies(sp->rtoinfo.srto_min);
+
+	asoc->overall_error_count = 0;
+
+	/* Initialize the maximum mumber of new data packets that can be sent
+	 * in a burst.
+	 */
+	asoc->max_burst = sctp_max_burst;
+
+	/* Copy things from the endpoint.  */
+	for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) {
+		asoc->timeouts[i] = ep->timeouts[i];
+		init_timer(&asoc->timers[i]);
+		asoc->timers[i].function = sctp_timer_events[i];
+		asoc->timers[i].data = (unsigned long) asoc;
+	}
+
+	/* Pull default initialization values from the sock options.
+	 * Note: This assumes that the values have already been
+	 * validated in the sock.
+	 */
+	asoc->c.sinit_max_instreams = sp->initmsg.sinit_max_instreams;
+	asoc->c.sinit_num_ostreams  = sp->initmsg.sinit_num_ostreams;
+	asoc->max_init_attempts	= sp->initmsg.sinit_max_attempts;
+
+	asoc->max_init_timeo =
+		 msecs_to_jiffies(sp->initmsg.sinit_max_init_timeo);
+
+	/* Allocate storage for the ssnmap after the inbound and outbound
+	 * streams have been negotiated during Init.
+	 */
+	asoc->ssnmap = NULL;
+
+	/* Set the local window size for receive.
+	 * This is also the rcvbuf space per association.
+	 * RFC 6 - A SCTP receiver MUST be able to receive a minimum of
+	 * 1500 bytes in one SCTP packet.
+	 */
+	if (sk->sk_rcvbuf < SCTP_DEFAULT_MINWINDOW)
+		asoc->rwnd = SCTP_DEFAULT_MINWINDOW;
+	else
+		asoc->rwnd = sk->sk_rcvbuf;
+
+	asoc->a_rwnd = asoc->rwnd;
+
+	asoc->rwnd_over = 0;
+
+	/* Use my own max window until I learn something better.  */
+	asoc->peer.rwnd = SCTP_DEFAULT_MAXWINDOW;
+
+	/* Set the sndbuf size for transmit.  */
+	asoc->sndbuf_used = 0;
+
+	init_waitqueue_head(&asoc->wait);
+
+	asoc->c.my_vtag = sctp_generate_tag(ep);
+	asoc->peer.i.init_tag = 0;     /* INIT needs a vtag of 0. */
+	asoc->c.peer_vtag = 0;
+	asoc->c.my_ttag   = 0;
+	asoc->c.peer_ttag = 0;
+	asoc->c.my_port = ep->base.bind_addr.port;
+
+	asoc->c.initial_tsn = sctp_generate_tsn(ep);
+
+	asoc->next_tsn = asoc->c.initial_tsn;
+
+	asoc->ctsn_ack_point = asoc->next_tsn - 1;
+	asoc->adv_peer_ack_point = asoc->ctsn_ack_point;
+	asoc->highest_sacked = asoc->ctsn_ack_point;
+	asoc->last_cwr_tsn = asoc->ctsn_ack_point;
+	asoc->unack_data = 0;
+
+	SCTP_DEBUG_PRINTK("myctsnap for %s INIT as 0x%x.\n",
+			  asoc->ep->debug_name,
+			  asoc->ctsn_ack_point);
+
+	/* ADDIP Section 4.1 Asconf Chunk Procedures
+	 *
+	 * When an endpoint has an ASCONF signaled change to be sent to the
+	 * remote endpoint it should do the following:
+	 * ...
+	 * A2) a serial number should be assigned to the chunk. The serial
+	 * number SHOULD be a monotonically increasing number. The serial
+	 * numbers SHOULD be initialized at the start of the
+	 * association to the same value as the initial TSN.
+	 */
+	asoc->addip_serial = asoc->c.initial_tsn;
+
+	skb_queue_head_init(&asoc->addip_chunks);
+
+	/* Make an empty list of remote transport addresses.  */
+	INIT_LIST_HEAD(&asoc->peer.transport_addr_list);
+
+	/* RFC 2960 5.1 Normal Establishment of an Association
+	 *
+	 * After the reception of the first data chunk in an
+	 * association the endpoint must immediately respond with a
+	 * sack to acknowledge the data chunk.  Subsequent
+	 * acknowledgements should be done as described in Section
+	 * 6.2.
+	 *
+	 * [We implement this by telling a new association that it
+	 * already received one packet.]
+	 */
+	asoc->peer.sack_needed = 1;
+
+	/* Assume that the peer recongizes ASCONF until reported otherwise
+	 * via an ERROR chunk.
+	 */
+	asoc->peer.asconf_capable = 1;
+
+	/* Create an input queue.  */
+	sctp_inq_init(&asoc->base.inqueue);
+	sctp_inq_set_th_handler(&asoc->base.inqueue,
+				    (void (*)(void *))sctp_assoc_bh_rcv,
+				    asoc);
+
+	/* Create an output queue.  */
+	sctp_outq_init(asoc, &asoc->outqueue);
+
+	if (!sctp_ulpq_init(&asoc->ulpq, asoc))
+		goto fail_init;
+
+	/* Set up the tsn tracking. */
+	sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_SIZE, 0);
+
+	asoc->need_ecne = 0;
+
+	asoc->assoc_id = 0;
+
+	/* Assume that peer would support both address types unless we are
+	 * told otherwise.
+	 */
+	asoc->peer.ipv4_address = 1;
+	asoc->peer.ipv6_address = 1;
+	INIT_LIST_HEAD(&asoc->asocs);
+
+	asoc->autoclose = sp->autoclose;
+
+	asoc->default_stream = sp->default_stream;
+	asoc->default_ppid = sp->default_ppid;
+	asoc->default_flags = sp->default_flags;
+	asoc->default_context = sp->default_context;
+	asoc->default_timetolive = sp->default_timetolive;
+
+	return asoc;
+
+fail_init:
+	sctp_endpoint_put(asoc->ep);
+	sock_put(asoc->base.sk);
+	return NULL;
+}
+
+/* Allocate and initialize a new association */
+struct sctp_association *sctp_association_new(const struct sctp_endpoint *ep,
+					 const struct sock *sk,
+					 sctp_scope_t scope, int gfp)
+{
+	struct sctp_association *asoc;
+
+	asoc = t_new(struct sctp_association, gfp);
+	if (!asoc)
+		goto fail;
+
+	if (!sctp_association_init(asoc, ep, sk, scope, gfp))
+		goto fail_init;
+
+	asoc->base.malloced = 1;
+	SCTP_DBG_OBJCNT_INC(assoc);
+
+	return asoc;
+
+fail_init:
+	kfree(asoc);
+fail:
+	return NULL;
+}
+
+/* Free this association if possible.  There may still be users, so
+ * the actual deallocation may be delayed.
+ */
+void sctp_association_free(struct sctp_association *asoc)
+{
+	struct sock *sk = asoc->base.sk;
+	struct sctp_transport *transport;
+	struct list_head *pos, *temp;
+	int i;
+
+	list_del(&asoc->asocs);
+
+	/* Decrement the backlog value for a TCP-style listening socket. */
+	if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))
+		sk->sk_ack_backlog--;
+
+	/* Mark as dead, so other users can know this structure is
+	 * going away.
+	 */
+	asoc->base.dead = 1;
+
+	/* Dispose of any data lying around in the outqueue. */
+	sctp_outq_free(&asoc->outqueue);
+
+	/* Dispose of any pending messages for the upper layer. */
+	sctp_ulpq_free(&asoc->ulpq);
+
+	/* Dispose of any pending chunks on the inqueue. */
+	sctp_inq_free(&asoc->base.inqueue);
+
+	/* Free ssnmap storage. */
+	sctp_ssnmap_free(asoc->ssnmap);
+
+	/* Clean up the bound address list. */
+	sctp_bind_addr_free(&asoc->base.bind_addr);
+
+	/* Do we need to go through all of our timers and
+	 * delete them?   To be safe we will try to delete all, but we
+	 * should be able to go through and make a guess based
+	 * on our state.
+	 */
+	for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) {
+		if (timer_pending(&asoc->timers[i]) &&
+		    del_timer(&asoc->timers[i]))
+			sctp_association_put(asoc);
+	}
+
+	/* Free peer's cached cookie. */
+	if (asoc->peer.cookie) {
+		kfree(asoc->peer.cookie);
+	}
+
+	/* Release the transport structures. */
+	list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) {
+		transport = list_entry(pos, struct sctp_transport, transports);
+		list_del(pos);
+		sctp_transport_free(transport);
+	}
+
+	/* Free any cached ASCONF_ACK chunk. */
+	if (asoc->addip_last_asconf_ack)
+		sctp_chunk_free(asoc->addip_last_asconf_ack);
+
+	/* Free any cached ASCONF chunk. */
+	if (asoc->addip_last_asconf)
+		sctp_chunk_free(asoc->addip_last_asconf);
+
+	sctp_association_put(asoc);
+}
+
+/* Cleanup and free up an association. */
+static void sctp_association_destroy(struct sctp_association *asoc)
+{
+	SCTP_ASSERT(asoc->base.dead, "Assoc is not dead", return);
+
+	sctp_endpoint_put(asoc->ep);
+	sock_put(asoc->base.sk);
+
+	if (asoc->assoc_id != 0) {
+		spin_lock_bh(&sctp_assocs_id_lock);
+		idr_remove(&sctp_assocs_id, asoc->assoc_id);
+		spin_unlock_bh(&sctp_assocs_id_lock);
+	}
+
+	if (asoc->base.malloced) {
+		kfree(asoc);
+		SCTP_DBG_OBJCNT_DEC(assoc);
+	}
+}
+
+/* Change the primary destination address for the peer. */
+void sctp_assoc_set_primary(struct sctp_association *asoc,
+			    struct sctp_transport *transport)
+{
+	asoc->peer.primary_path = transport;
+
+	/* Set a default msg_name for events. */
+	memcpy(&asoc->peer.primary_addr, &transport->ipaddr,
+	       sizeof(union sctp_addr));
+
+	/* If the primary path is changing, assume that the
+	 * user wants to use this new path.
+	 */
+	if (transport->active)
+		asoc->peer.active_path = transport;
+
+	/*
+	 * SFR-CACC algorithm:
+	 * Upon the receipt of a request to change the primary
+	 * destination address, on the data structure for the new
+	 * primary destination, the sender MUST do the following:
+	 *
+	 * 1) If CHANGEOVER_ACTIVE is set, then there was a switch
+	 * to this destination address earlier. The sender MUST set
+	 * CYCLING_CHANGEOVER to indicate that this switch is a
+	 * double switch to the same destination address.
+	 */
+	if (transport->cacc.changeover_active)
+		transport->cacc.cycling_changeover = 1;
+
+	/* 2) The sender MUST set CHANGEOVER_ACTIVE to indicate that
+	 * a changeover has occurred.
+	 */
+	transport->cacc.changeover_active = 1;
+
+	/* 3) The sender MUST store the next TSN to be sent in
+	 * next_tsn_at_change.
+	 */
+	transport->cacc.next_tsn_at_change = asoc->next_tsn;
+}
+
+/* Add a transport address to an association.  */
+struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
+					   const union sctp_addr *addr,
+					   int gfp)
+{
+	struct sctp_transport *peer;
+	struct sctp_sock *sp;
+	unsigned short port;
+
+	sp = sctp_sk(asoc->base.sk);
+
+	/* AF_INET and AF_INET6 share common port field. */
+	port = addr->v4.sin_port;
+
+	/* Set the port if it has not been set yet.  */
+	if (0 == asoc->peer.port)
+		asoc->peer.port = port;
+
+	/* Check to see if this is a duplicate. */
+	peer = sctp_assoc_lookup_paddr(asoc, addr);
+	if (peer)
+		return peer;
+
+	peer = sctp_transport_new(addr, gfp);
+	if (!peer)
+		return NULL;
+
+	sctp_transport_set_owner(peer, asoc);
+
+	/* Initialize the pmtu of the transport. */
+	sctp_transport_pmtu(peer);
+
+	/* If this is the first transport addr on this association,
+	 * initialize the association PMTU to the peer's PMTU.
+	 * If not and the current association PMTU is higher than the new
+	 * peer's PMTU, reset the association PMTU to the new peer's PMTU.
+	 */
+	if (asoc->pmtu)
+		asoc->pmtu = min_t(int, peer->pmtu, asoc->pmtu);
+	else
+		asoc->pmtu = peer->pmtu;
+
+	SCTP_DEBUG_PRINTK("sctp_assoc_add_peer:association %p PMTU set to "
+			  "%d\n", asoc, asoc->pmtu);
+
+	asoc->frag_point = sctp_frag_point(sp, asoc->pmtu);
+
+	/* The asoc->peer.port might not be meaningful yet, but
+	 * initialize the packet structure anyway.
+	 */
+	sctp_packet_init(&peer->packet, peer, asoc->base.bind_addr.port,
+			 asoc->peer.port);
+
+	/* 7.2.1 Slow-Start
+	 *
+	 * o The initial cwnd before DATA transmission or after a sufficiently
+	 *   long idle period MUST be set to
+	 *      min(4*MTU, max(2*MTU, 4380 bytes))
+	 *
+	 * o The initial value of ssthresh MAY be arbitrarily high
+	 *   (for example, implementations MAY use the size of the
+	 *   receiver advertised window).
+	 */
+	peer->cwnd = min(4*asoc->pmtu, max_t(__u32, 2*asoc->pmtu, 4380));
+
+	/* At this point, we may not have the receiver's advertised window,
+	 * so initialize ssthresh to the default value and it will be set
+	 * later when we process the INIT.
+	 */
+	peer->ssthresh = SCTP_DEFAULT_MAXWINDOW;
+
+	peer->partial_bytes_acked = 0;
+	peer->flight_size = 0;
+
+	/* By default, enable heartbeat for peer address. */
+	peer->hb_allowed = 1;
+
+	/* Initialize the peer's heartbeat interval based on the
+	 * sock configured value.
+	 */
+	peer->hb_interval = msecs_to_jiffies(sp->paddrparam.spp_hbinterval);
+
+	/* Set the path max_retrans.  */
+	peer->max_retrans = sp->paddrparam.spp_pathmaxrxt;
+
+	/* Set the transport's RTO.initial value */
+	peer->rto = asoc->rto_initial;
+
+	/* Attach the remote transport to our asoc.  */
+	list_add_tail(&peer->transports, &asoc->peer.transport_addr_list);
+
+	/* If we do not yet have a primary path, set one.  */
+	if (!asoc->peer.primary_path) {
+		sctp_assoc_set_primary(asoc, peer);
+		asoc->peer.retran_path = peer;
+	}
+
+	if (asoc->peer.active_path == asoc->peer.retran_path)
+		asoc->peer.retran_path = peer;
+
+	return peer;
+}
+
+/* Delete a transport address from an association.  */
+void sctp_assoc_del_peer(struct sctp_association *asoc,
+			 const union sctp_addr *addr)
+{
+	struct list_head	*pos;
+	struct list_head	*temp;
+	struct sctp_transport	*peer = NULL;
+	struct sctp_transport	*transport;
+
+	list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) {
+		transport = list_entry(pos, struct sctp_transport, transports);
+		if (sctp_cmp_addr_exact(addr, &transport->ipaddr)) {
+			peer = transport;
+			list_del(pos);
+			break;
+		}
+	}
+
+	/* The address we want delete is not in the association. */
+	if (!peer)
+		return;
+
+	/* Get the first transport of asoc. */ 
+	pos = asoc->peer.transport_addr_list.next;
+	transport = list_entry(pos, struct sctp_transport, transports);
+
+	/* Update any entries that match the peer to be deleted. */  
+	if (asoc->peer.primary_path == peer)
+		sctp_assoc_set_primary(asoc, transport);
+	if (asoc->peer.active_path == peer)
+		asoc->peer.active_path = transport;
+	if (asoc->peer.retran_path == peer)
+		asoc->peer.retran_path = transport;
+	if (asoc->peer.last_data_from == peer)
+		asoc->peer.last_data_from = transport;
+
+	sctp_transport_free(peer);
+}
+
+/* Lookup a transport by address. */
+struct sctp_transport *sctp_assoc_lookup_paddr(
+					const struct sctp_association *asoc,
+					const union sctp_addr *address)
+{
+	struct sctp_transport *t;
+	struct list_head *pos;
+
+	/* Cycle through all transports searching for a peer address. */
+
+	list_for_each(pos, &asoc->peer.transport_addr_list) {
+		t = list_entry(pos, struct sctp_transport, transports);
+		if (sctp_cmp_addr_exact(address, &t->ipaddr))
+			return t;
+	}
+
+	return NULL;
+}
+
+/* Engage in transport control operations.
+ * Mark the transport up or down and send a notification to the user.
+ * Select and update the new active and retran paths.
+ */
+void sctp_assoc_control_transport(struct sctp_association *asoc,
+				  struct sctp_transport *transport,
+				  sctp_transport_cmd_t command,
+				  sctp_sn_error_t error)
+{
+	struct sctp_transport *t = NULL;
+	struct sctp_transport *first;
+	struct sctp_transport *second;
+	struct sctp_ulpevent *event;
+	struct list_head *pos;
+	int spc_state = 0;
+
+	/* Record the transition on the transport.  */
+	switch (command) {
+	case SCTP_TRANSPORT_UP:
+		transport->active = SCTP_ACTIVE;
+		spc_state = SCTP_ADDR_AVAILABLE;
+		break;
+
+	case SCTP_TRANSPORT_DOWN:
+		transport->active = SCTP_INACTIVE;
+		spc_state = SCTP_ADDR_UNREACHABLE;
+		break;
+
+	default:
+		return;
+	};
+
+	/* Generate and send a SCTP_PEER_ADDR_CHANGE notification to the
+	 * user.
+	 */
+	event = sctp_ulpevent_make_peer_addr_change(asoc,
+				(struct sockaddr_storage *) &transport->ipaddr,
+				0, spc_state, error, GFP_ATOMIC);
+	if (event)
+		sctp_ulpq_tail_event(&asoc->ulpq, event);
+
+	/* Select new active and retran paths. */
+
+	/* Look for the two most recently used active transports.
+	 *
+	 * This code produces the wrong ordering whenever jiffies
+	 * rolls over, but we still get usable transports, so we don't
+	 * worry about it.
+	 */
+	first = NULL; second = NULL;
+
+	list_for_each(pos, &asoc->peer.transport_addr_list) {
+		t = list_entry(pos, struct sctp_transport, transports);
+
+		if (!t->active)
+			continue;
+		if (!first || t->last_time_heard > first->last_time_heard) {
+			second = first;
+			first = t;
+		}
+		if (!second || t->last_time_heard > second->last_time_heard)
+			second = t;
+	}
+
+	/* RFC 2960 6.4 Multi-Homed SCTP Endpoints
+	 *
+	 * By default, an endpoint should always transmit to the
+	 * primary path, unless the SCTP user explicitly specifies the
+	 * destination transport address (and possibly source
+	 * transport address) to use.
+	 *
+	 * [If the primary is active but not most recent, bump the most
+	 * recently used transport.]
+	 */
+	if (asoc->peer.primary_path->active &&
+	    first != asoc->peer.primary_path) {
+		second = first;
+		first = asoc->peer.primary_path;
+	}
+
+	/* If we failed to find a usable transport, just camp on the
+	 * primary, even if it is inactive.
+	 */
+	if (!first) {
+		first = asoc->peer.primary_path;
+		second = asoc->peer.primary_path;
+	}
+
+	/* Set the active and retran transports.  */
+	asoc->peer.active_path = first;
+	asoc->peer.retran_path = second;
+}
+
+/* Hold a reference to an association. */
+void sctp_association_hold(struct sctp_association *asoc)
+{
+	atomic_inc(&asoc->base.refcnt);
+}
+
+/* Release a reference to an association and cleanup
+ * if there are no more references.
+ */
+void sctp_association_put(struct sctp_association *asoc)
+{
+	if (atomic_dec_and_test(&asoc->base.refcnt))
+		sctp_association_destroy(asoc);
+}
+
+/* Allocate the next TSN, Transmission Sequence Number, for the given
+ * association.
+ */
+__u32 sctp_association_get_next_tsn(struct sctp_association *asoc)
+{
+	/* From Section 1.6 Serial Number Arithmetic:
+	 * Transmission Sequence Numbers wrap around when they reach
+	 * 2**32 - 1.  That is, the next TSN a DATA chunk MUST use
+	 * after transmitting TSN = 2*32 - 1 is TSN = 0.
+	 */
+	__u32 retval = asoc->next_tsn;
+	asoc->next_tsn++;
+	asoc->unack_data++;
+
+	return retval;
+}
+
+/* Compare two addresses to see if they match.  Wildcard addresses
+ * only match themselves.
+ */
+int sctp_cmp_addr_exact(const union sctp_addr *ss1,
+			const union sctp_addr *ss2)
+{
+	struct sctp_af *af;
+
+	af = sctp_get_af_specific(ss1->sa.sa_family);
+	if (unlikely(!af))
+		return 0;
+
+	return af->cmp_addr(ss1, ss2);
+}
+
+/* Return an ecne chunk to get prepended to a packet.
+ * Note:  We are sly and return a shared, prealloced chunk.  FIXME:
+ * No we don't, but we could/should.
+ */
+struct sctp_chunk *sctp_get_ecne_prepend(struct sctp_association *asoc)
+{
+	struct sctp_chunk *chunk;
+
+	/* Send ECNE if needed.
+	 * Not being able to allocate a chunk here is not deadly.
+	 */
+	if (asoc->need_ecne)
+		chunk = sctp_make_ecne(asoc, asoc->last_ecne_tsn);
+	else
+		chunk = NULL;
+
+	return chunk;
+}
+
+/*
+ * Find which transport this TSN was sent on.
+ */
+struct sctp_transport *sctp_assoc_lookup_tsn(struct sctp_association *asoc,
+					     __u32 tsn)
+{
+	struct sctp_transport *active;
+	struct sctp_transport *match;
+	struct list_head *entry, *pos;
+	struct sctp_transport *transport;
+	struct sctp_chunk *chunk;
+	__u32 key = htonl(tsn);
+
+	match = NULL;
+
+	/*
+	 * FIXME: In general, find a more efficient data structure for
+	 * searching.
+	 */
+
+	/*
+	 * The general strategy is to search each transport's transmitted
+	 * list.   Return which transport this TSN lives on.
+	 *
+	 * Let's be hopeful and check the active_path first.
+	 * Another optimization would be to know if there is only one
+	 * outbound path and not have to look for the TSN at all.
+	 *
+	 */
+
+	active = asoc->peer.active_path;
+
+	list_for_each(entry, &active->transmitted) {
+		chunk = list_entry(entry, struct sctp_chunk, transmitted_list);
+
+		if (key == chunk->subh.data_hdr->tsn) {
+			match = active;
+			goto out;
+		}
+	}
+
+	/* If not found, go search all the other transports. */
+	list_for_each(pos, &asoc->peer.transport_addr_list) {
+		transport = list_entry(pos, struct sctp_transport, transports);
+
+		if (transport == active)
+			break;
+		list_for_each(entry, &transport->transmitted) {
+			chunk = list_entry(entry, struct sctp_chunk,
+					   transmitted_list);
+			if (key == chunk->subh.data_hdr->tsn) {
+				match = transport;
+				goto out;
+			}
+		}
+	}
+out:
+	return match;
+}
+
+/* Is this the association we are looking for? */
+struct sctp_transport *sctp_assoc_is_match(struct sctp_association *asoc,
+					   const union sctp_addr *laddr,
+					   const union sctp_addr *paddr)
+{
+	struct sctp_transport *transport;
+
+	sctp_read_lock(&asoc->base.addr_lock);
+
+	if ((asoc->base.bind_addr.port == laddr->v4.sin_port) &&
+	    (asoc->peer.port == paddr->v4.sin_port)) {
+		transport = sctp_assoc_lookup_paddr(asoc, paddr);
+		if (!transport)
+			goto out;
+
+		if (sctp_bind_addr_match(&asoc->base.bind_addr, laddr,
+					 sctp_sk(asoc->base.sk)))
+			goto out;
+	}
+	transport = NULL;
+
+out:
+	sctp_read_unlock(&asoc->base.addr_lock);
+	return transport;
+}
+
+/* Do delayed input processing.  This is scheduled by sctp_rcv(). */
+static void sctp_assoc_bh_rcv(struct sctp_association *asoc)
+{
+	struct sctp_endpoint *ep;
+	struct sctp_chunk *chunk;
+	struct sock *sk;
+	struct sctp_inq *inqueue;
+	int state;
+	sctp_subtype_t subtype;
+	int error = 0;
+
+	/* The association should be held so we should be safe. */
+	ep = asoc->ep;
+	sk = asoc->base.sk;
+
+	inqueue = &asoc->base.inqueue;
+	sctp_association_hold(asoc);
+	while (NULL != (chunk = sctp_inq_pop(inqueue))) {
+		state = asoc->state;
+		subtype = SCTP_ST_CHUNK(chunk->chunk_hdr->type);
+
+		/* Remember where the last DATA chunk came from so we
+		 * know where to send the SACK.
+		 */
+		if (sctp_chunk_is_data(chunk))
+			asoc->peer.last_data_from = chunk->transport;
+		else
+			SCTP_INC_STATS(SCTP_MIB_INCTRLCHUNKS);
+
+		if (chunk->transport)
+			chunk->transport->last_time_heard = jiffies;
+
+		/* Run through the state machine. */
+		error = sctp_do_sm(SCTP_EVENT_T_CHUNK, subtype,
+				   state, ep, asoc, chunk, GFP_ATOMIC);
+
+		/* Check to see if the association is freed in response to
+		 * the incoming chunk.  If so, get out of the while loop.
+		 */
+		if (asoc->base.dead)
+			break;
+
+		/* If there is an error on chunk, discard this packet. */
+		if (error && chunk)
+			chunk->pdiscard = 1;
+	}
+	sctp_association_put(asoc);
+}
+
+/* This routine moves an association from its old sk to a new sk.  */
+void sctp_assoc_migrate(struct sctp_association *assoc, struct sock *newsk)
+{
+	struct sctp_sock *newsp = sctp_sk(newsk);
+	struct sock *oldsk = assoc->base.sk;
+
+	/* Delete the association from the old endpoint's list of
+	 * associations.
+	 */
+	list_del_init(&assoc->asocs);
+
+	/* Decrement the backlog value for a TCP-style socket. */
+	if (sctp_style(oldsk, TCP))
+		oldsk->sk_ack_backlog--;
+
+	/* Release references to the old endpoint and the sock.  */
+	sctp_endpoint_put(assoc->ep);
+	sock_put(assoc->base.sk);
+
+	/* Get a reference to the new endpoint.  */
+	assoc->ep = newsp->ep;
+	sctp_endpoint_hold(assoc->ep);
+
+	/* Get a reference to the new sock.  */
+	assoc->base.sk = newsk;
+	sock_hold(assoc->base.sk);
+
+	/* Add the association to the new endpoint's list of associations.  */
+	sctp_endpoint_add_asoc(newsp->ep, assoc);
+}
+
+/* Update an association (possibly from unexpected COOKIE-ECHO processing).  */
+void sctp_assoc_update(struct sctp_association *asoc,
+		       struct sctp_association *new)
+{
+	struct sctp_transport *trans;
+	struct list_head *pos, *temp;
+
+	/* Copy in new parameters of peer. */
+	asoc->c = new->c;
+	asoc->peer.rwnd = new->peer.rwnd;
+	asoc->peer.sack_needed = new->peer.sack_needed;
+	asoc->peer.i = new->peer.i;
+	sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_SIZE,
+			 asoc->peer.i.initial_tsn);
+
+	/* Remove any peer addresses not present in the new association. */
+	list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) {
+		trans = list_entry(pos, struct sctp_transport, transports);
+		if (!sctp_assoc_lookup_paddr(new, &trans->ipaddr))
+			sctp_assoc_del_peer(asoc, &trans->ipaddr);
+	}
+
+	/* If the case is A (association restart), use
+	 * initial_tsn as next_tsn. If the case is B, use
+	 * current next_tsn in case data sent to peer
+	 * has been discarded and needs retransmission.
+	 */
+	if (asoc->state >= SCTP_STATE_ESTABLISHED) {
+		asoc->next_tsn = new->next_tsn;
+		asoc->ctsn_ack_point = new->ctsn_ack_point;
+		asoc->adv_peer_ack_point = new->adv_peer_ack_point;
+
+		/* Reinitialize SSN for both local streams
+		 * and peer's streams.
+		 */
+		sctp_ssnmap_clear(asoc->ssnmap);
+
+	} else {
+		/* Add any peer addresses from the new association. */
+		list_for_each(pos, &new->peer.transport_addr_list) {
+			trans = list_entry(pos, struct sctp_transport,
+					   transports);
+			if (!sctp_assoc_lookup_paddr(asoc, &trans->ipaddr))
+				sctp_assoc_add_peer(asoc, &trans->ipaddr,
+						    GFP_ATOMIC);
+		}
+
+		asoc->ctsn_ack_point = asoc->next_tsn - 1;
+		asoc->adv_peer_ack_point = asoc->ctsn_ack_point;
+		if (!asoc->ssnmap) {
+			/* Move the ssnmap. */
+			asoc->ssnmap = new->ssnmap;
+			new->ssnmap = NULL;
+		}
+	}
+}
+
+/* Update the retran path for sending a retransmitted packet.
+ * Round-robin through the active transports, else round-robin
+ * through the inactive transports as this is the next best thing
+ * we can try.
+ */
+void sctp_assoc_update_retran_path(struct sctp_association *asoc)
+{
+	struct sctp_transport *t, *next;
+	struct list_head *head = &asoc->peer.transport_addr_list;
+	struct list_head *pos;
+
+	/* Find the next transport in a round-robin fashion. */
+	t = asoc->peer.retran_path;
+	pos = &t->transports;
+	next = NULL;
+
+	while (1) {
+		/* Skip the head. */
+		if (pos->next == head)
+			pos = head->next;
+		else
+			pos = pos->next;
+
+		t = list_entry(pos, struct sctp_transport, transports);
+
+		/* Try to find an active transport. */
+
+		if (t->active) {
+			break;
+		} else {
+			/* Keep track of the next transport in case
+			 * we don't find any active transport.
+			 */
+			if (!next)
+				next = t;
+		}
+
+		/* We have exhausted the list, but didn't find any
+		 * other active transports.  If so, use the next
+		 * transport.
+		 */
+		if (t == asoc->peer.retran_path) {
+			t = next;
+			break;
+		}
+	}
+
+	asoc->peer.retran_path = t;
+}
+
+/* Choose the transport for sending a SHUTDOWN packet.  */
+struct sctp_transport *sctp_assoc_choose_shutdown_transport(
+	struct sctp_association *asoc)
+{
+	/* If this is the first time SHUTDOWN is sent, use the active path,
+	 * else use the retran path. If the last SHUTDOWN was sent over the
+	 * retran path, update the retran path and use it.
+	 */
+	if (!asoc->shutdown_last_sent_to)
+		return asoc->peer.active_path;
+	else {
+		if (asoc->shutdown_last_sent_to == asoc->peer.retran_path)
+			sctp_assoc_update_retran_path(asoc);
+		return asoc->peer.retran_path;
+	}
+
+}
+
+/* Update the association's pmtu and frag_point by going through all the
+ * transports. This routine is called when a transport's PMTU has changed.
+ */
+void sctp_assoc_sync_pmtu(struct sctp_association *asoc)
+{
+	struct sctp_transport *t;
+	struct list_head *pos;
+	__u32 pmtu = 0;
+
+	if (!asoc)
+		return;
+
+	/* Get the lowest pmtu of all the transports. */
+	list_for_each(pos, &asoc->peer.transport_addr_list) {
+		t = list_entry(pos, struct sctp_transport, transports);
+		if (!pmtu || (t->pmtu < pmtu))
+			pmtu = t->pmtu;
+	}
+
+	if (pmtu) {
+		struct sctp_sock *sp = sctp_sk(asoc->base.sk);
+		asoc->pmtu = pmtu;
+		asoc->frag_point = sctp_frag_point(sp, pmtu);
+	}
+
+	SCTP_DEBUG_PRINTK("%s: asoc:%p, pmtu:%d, frag_point:%d\n",
+			  __FUNCTION__, asoc, asoc->pmtu, asoc->frag_point);
+}
+
+/* Should we send a SACK to update our peer? */
+static inline int sctp_peer_needs_update(struct sctp_association *asoc)
+{
+	switch (asoc->state) {
+	case SCTP_STATE_ESTABLISHED:
+	case SCTP_STATE_SHUTDOWN_PENDING:
+	case SCTP_STATE_SHUTDOWN_RECEIVED:
+	case SCTP_STATE_SHUTDOWN_SENT:
+		if ((asoc->rwnd > asoc->a_rwnd) &&
+		    ((asoc->rwnd - asoc->a_rwnd) >=
+		     min_t(__u32, (asoc->base.sk->sk_rcvbuf >> 1), asoc->pmtu)))
+			return 1;
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+/* Increase asoc's rwnd by len and send any window update SACK if needed. */
+void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned len)
+{
+	struct sctp_chunk *sack;
+	struct timer_list *timer;
+
+	if (asoc->rwnd_over) {
+		if (asoc->rwnd_over >= len) {
+			asoc->rwnd_over -= len;
+		} else {
+			asoc->rwnd += (len - asoc->rwnd_over);
+			asoc->rwnd_over = 0;
+		}
+	} else {
+		asoc->rwnd += len;
+	}
+
+	SCTP_DEBUG_PRINTK("%s: asoc %p rwnd increased by %d to (%u, %u) "
+			  "- %u\n", __FUNCTION__, asoc, len, asoc->rwnd,
+			  asoc->rwnd_over, asoc->a_rwnd);
+
+	/* Send a window update SACK if the rwnd has increased by at least the
+	 * minimum of the association's PMTU and half of the receive buffer.
+	 * The algorithm used is similar to the one described in
+	 * Section 4.2.3.3 of RFC 1122.
+	 */
+	if (sctp_peer_needs_update(asoc)) {
+		asoc->a_rwnd = asoc->rwnd;
+		SCTP_DEBUG_PRINTK("%s: Sending window update SACK- asoc: %p "
+				  "rwnd: %u a_rwnd: %u\n", __FUNCTION__,
+				  asoc, asoc->rwnd, asoc->a_rwnd);
+		sack = sctp_make_sack(asoc);
+		if (!sack)
+			return;
+
+		asoc->peer.sack_needed = 0;
+
+		sctp_outq_tail(&asoc->outqueue, sack);
+
+		/* Stop the SACK timer.  */
+		timer = &asoc->timers[SCTP_EVENT_TIMEOUT_SACK];
+		if (timer_pending(timer) && del_timer(timer))
+			sctp_association_put(asoc);
+	}
+}
+
+/* Decrease asoc's rwnd by len. */
+void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned len)
+{
+	SCTP_ASSERT(asoc->rwnd, "rwnd zero", return);
+	SCTP_ASSERT(!asoc->rwnd_over, "rwnd_over not zero", return);
+	if (asoc->rwnd >= len) {
+		asoc->rwnd -= len;
+	} else {
+		asoc->rwnd_over = len - asoc->rwnd;
+		asoc->rwnd = 0;
+	}
+	SCTP_DEBUG_PRINTK("%s: asoc %p rwnd decreased by %d to (%u, %u)\n",
+			  __FUNCTION__, asoc, len, asoc->rwnd,
+			  asoc->rwnd_over);
+}
+
+/* Build the bind address list for the association based on info from the
+ * local endpoint and the remote peer.
+ */
+int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc, int gfp)
+{
+	sctp_scope_t scope;
+	int flags;
+
+	/* Use scoping rules to determine the subset of addresses from
+	 * the endpoint.
+	 */
+	scope = sctp_scope(&asoc->peer.active_path->ipaddr);
+	flags = (PF_INET6 == asoc->base.sk->sk_family) ? SCTP_ADDR6_ALLOWED : 0;
+	if (asoc->peer.ipv4_address)
+		flags |= SCTP_ADDR4_PEERSUPP;
+	if (asoc->peer.ipv6_address)
+		flags |= SCTP_ADDR6_PEERSUPP;
+
+	return sctp_bind_addr_copy(&asoc->base.bind_addr,
+				   &asoc->ep->base.bind_addr,
+				   scope, gfp, flags);
+}
+
+/* Build the association's bind address list from the cookie.  */
+int sctp_assoc_set_bind_addr_from_cookie(struct sctp_association *asoc,
+					 struct sctp_cookie *cookie, int gfp)
+{
+	int var_size2 = ntohs(cookie->peer_init->chunk_hdr.length);
+	int var_size3 = cookie->raw_addr_list_len;
+	__u8 *raw = (__u8 *)cookie->peer_init + var_size2;
+
+	return sctp_raw_to_bind_addrs(&asoc->base.bind_addr, raw, var_size3,
+				      asoc->ep->base.bind_addr.port, gfp);
+}
+
+/* Lookup laddr in the bind address list of an association. */ 
+int sctp_assoc_lookup_laddr(struct sctp_association *asoc, 
+			    const union sctp_addr *laddr)
+{
+	int found;
+
+	sctp_read_lock(&asoc->base.addr_lock);
+	if ((asoc->base.bind_addr.port == ntohs(laddr->v4.sin_port)) &&
+	    sctp_bind_addr_match(&asoc->base.bind_addr, laddr,
+			         sctp_sk(asoc->base.sk))) {
+		found = 1;
+		goto out;
+	}
+
+	found = 0;
+out:
+	sctp_read_unlock(&asoc->base.addr_lock);
+	return found;
+}
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
new file mode 100644
index 00000000000..f90eadfb60a
--- /dev/null
+++ b/net/sctp/bind_addr.c
@@ -0,0 +1,417 @@
+/* SCTP kernel reference Implementation
+ * (C) Copyright IBM Corp. 2001, 2003
+ * Copyright (c) Cisco 1999,2000
+ * Copyright (c) Motorola 1999,2000,2001
+ * Copyright (c) La Monte H.P. Yarroll 2001
+ *
+ * This file is part of the SCTP kernel reference implementation.
+ *
+ * A collection class to handle the storage of transport addresses.
+ *
+ * The SCTP reference implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * The SCTP reference implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *                 ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email address(es):
+ *    lksctp developers <lksctp-developers@lists.sourceforge.net>
+ *
+ * Or submit a bug report through the following website:
+ *    http://www.sf.net/projects/lksctp
+ *
+ * Written or modified by:
+ *    La Monte H.P. Yarroll <piggy@acm.org>
+ *    Karl Knutson          <karl@athena.chicago.il.us>
+ *    Jon Grimm             <jgrimm@us.ibm.com>
+ *    Daisy Chang           <daisyc@us.ibm.com>
+ *
+ * Any bugs reported given to us we will try to fix... any fixes shared will
+ * be incorporated into the next SCTP release.
+ */
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/in.h>
+#include <net/sock.h>
+#include <net/ipv6.h>
+#include <net/if_inet6.h>
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+
+/* Forward declarations for internal helpers. */
+static int sctp_copy_one_addr(struct sctp_bind_addr *, union sctp_addr *,
+			      sctp_scope_t scope, int gfp, int flags);
+static void sctp_bind_addr_clean(struct sctp_bind_addr *);
+
+/* First Level Abstractions. */
+
+/* Copy 'src' to 'dest' taking 'scope' into account.  Omit addresses
+ * in 'src' which have a broader scope than 'scope'.
+ */
+int sctp_bind_addr_copy(struct sctp_bind_addr *dest, 
+			const struct sctp_bind_addr *src,
+			sctp_scope_t scope, int gfp, int flags)
+{
+	struct sctp_sockaddr_entry *addr;
+	struct list_head *pos;
+	int error = 0;
+
+	/* All addresses share the same port.  */
+	dest->port = src->port;
+
+	/* Extract the addresses which are relevant for this scope.  */
+	list_for_each(pos, &src->address_list) {
+		addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+		error = sctp_copy_one_addr(dest, &addr->a, scope,
+					   gfp, flags);
+		if (error < 0)
+			goto out;
+	}
+
+	/* If there are no addresses matching the scope and
+	 * this is global scope, try to get a link scope address, with
+	 * the assumption that we must be sitting behind a NAT.
+	 */
+	if (list_empty(&dest->address_list) && (SCTP_SCOPE_GLOBAL == scope)) {
+		list_for_each(pos, &src->address_list) {
+			addr = list_entry(pos, struct sctp_sockaddr_entry,
+					  list);
+			error = sctp_copy_one_addr(dest, &addr->a,
+						   SCTP_SCOPE_LINK, gfp,
+						   flags);
+			if (error < 0)
+				goto out;
+		}
+	}
+
+out:
+	if (error)
+		sctp_bind_addr_clean(dest);
+
+	return error;
+}
+
+/* Initialize the SCTP_bind_addr structure for either an endpoint or
+ * an association.
+ */
+void sctp_bind_addr_init(struct sctp_bind_addr *bp, __u16 port)
+{
+	bp->malloced = 0;
+
+	INIT_LIST_HEAD(&bp->address_list);
+	bp->port = port;
+}
+
+/* Dispose of the address list. */
+static void sctp_bind_addr_clean(struct sctp_bind_addr *bp)
+{
+	struct sctp_sockaddr_entry *addr;
+	struct list_head *pos, *temp;
+
+	/* Empty the bind address list. */
+	list_for_each_safe(pos, temp, &bp->address_list) {
+		addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+		list_del(pos);
+		kfree(addr);
+		SCTP_DBG_OBJCNT_DEC(addr);
+	}
+}
+
+/* Dispose of an SCTP_bind_addr structure  */
+void sctp_bind_addr_free(struct sctp_bind_addr *bp)
+{
+	/* Empty the bind address list. */
+	sctp_bind_addr_clean(bp);
+
+	if (bp->malloced) {
+		kfree(bp);
+		SCTP_DBG_OBJCNT_DEC(bind_addr);
+	}
+}
+
+/* Add an address to the bind address list in the SCTP_bind_addr structure. */
+int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new,
+		       int gfp)
+{
+	struct sctp_sockaddr_entry *addr;
+
+	/* Add the address to the bind address list.  */
+	addr = t_new(struct sctp_sockaddr_entry, gfp);
+	if (!addr)
+		return -ENOMEM;
+
+	memcpy(&addr->a, new, sizeof(*new));
+
+	/* Fix up the port if it has not yet been set.
+	 * Both v4 and v6 have the port at the same offset.
+	 */
+	if (!addr->a.v4.sin_port)
+		addr->a.v4.sin_port = bp->port;
+
+	INIT_LIST_HEAD(&addr->list);
+	list_add_tail(&addr->list, &bp->address_list);
+	SCTP_DBG_OBJCNT_INC(addr);
+
+	return 0;
+}
+
+/* Delete an address from the bind address list in the SCTP_bind_addr
+ * structure.
+ */
+int sctp_del_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *del_addr)
+{
+	struct list_head *pos, *temp;
+	struct sctp_sockaddr_entry *addr;
+
+	list_for_each_safe(pos, temp, &bp->address_list) {
+		addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+		if (sctp_cmp_addr_exact(&addr->a, del_addr)) {
+			/* Found the exact match. */
+			list_del(pos);
+			kfree(addr);
+			SCTP_DBG_OBJCNT_DEC(addr);
+
+			return 0;
+		}
+	}
+
+	return -EINVAL;
+}
+
+/* Create a network byte-order representation of all the addresses
+ * formated as SCTP parameters.
+ *
+ * The second argument is the return value for the length.
+ */
+union sctp_params sctp_bind_addrs_to_raw(const struct sctp_bind_addr *bp,
+					 int *addrs_len, int gfp)
+{
+	union sctp_params addrparms;
+	union sctp_params retval;
+	int addrparms_len;
+	union sctp_addr_param rawaddr;
+	int len;
+	struct sctp_sockaddr_entry *addr;
+	struct list_head *pos;
+	struct sctp_af *af;
+
+	addrparms_len = 0;
+	len = 0;
+
+	/* Allocate enough memory at once. */
+	list_for_each(pos, &bp->address_list) {
+		len += sizeof(union sctp_addr_param);
+	}
+
+	/* Don't even bother embedding an address if there
+	 * is only one.
+	 */
+	if (len == sizeof(union sctp_addr_param)) {
+		retval.v = NULL;
+		goto end_raw;
+	}
+
+	retval.v = kmalloc(len, gfp);
+	if (!retval.v)
+		goto end_raw;
+
+	addrparms = retval;
+
+	list_for_each(pos, &bp->address_list) {
+		addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+		af = sctp_get_af_specific(addr->a.v4.sin_family);
+		len = af->to_addr_param(&addr->a, &rawaddr);
+		memcpy(addrparms.v, &rawaddr, len);
+		addrparms.v += len;
+		addrparms_len += len;
+	}
+
+end_raw:
+	*addrs_len = addrparms_len;
+	return retval;
+}
+
+/*
+ * Create an address list out of the raw address list format (IPv4 and IPv6
+ * address parameters).
+ */
+int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list,
+			   int addrs_len, __u16 port, int gfp)
+{
+	union sctp_addr_param *rawaddr;
+	struct sctp_paramhdr *param;
+	union sctp_addr addr;
+	int retval = 0;
+	int len;
+	struct sctp_af *af;
+
+	/* Convert the raw address to standard address format */
+	while (addrs_len) {
+		param = (struct sctp_paramhdr *)raw_addr_list;
+		rawaddr = (union sctp_addr_param *)raw_addr_list;
+
+		af = sctp_get_af_specific(param_type2af(param->type));
+		if (unlikely(!af)) {
+			retval = -EINVAL;
+			sctp_bind_addr_clean(bp);
+			break;
+		}
+
+		af->from_addr_param(&addr, rawaddr, port, 0);
+		retval = sctp_add_bind_addr(bp, &addr, gfp);
+		if (retval) {
+			/* Can't finish building the list, clean up. */
+			sctp_bind_addr_clean(bp);
+			break;
+		}
+
+		len = ntohs(param->length);
+		addrs_len -= len;
+		raw_addr_list += len;
+	}
+
+	return retval;
+}
+
+/********************************************************************
+ * 2nd Level Abstractions
+ ********************************************************************/
+
+/* Does this contain a specified address?  Allow wildcarding. */
+int sctp_bind_addr_match(struct sctp_bind_addr *bp, 
+			 const union sctp_addr *addr,
+			 struct sctp_sock *opt)
+{
+	struct sctp_sockaddr_entry *laddr;
+	struct list_head *pos;
+
+	list_for_each(pos, &bp->address_list) {
+		laddr = list_entry(pos, struct sctp_sockaddr_entry, list);
+		if (opt->pf->cmp_addr(&laddr->a, addr, opt))
+ 			return 1;
+	}
+
+	return 0;
+}
+
+/* Find the first address in the bind address list that is not present in
+ * the addrs packed array.
+ */
+union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr	*bp,
+					const union sctp_addr	*addrs,
+					int			addrcnt,
+					struct sctp_sock	*opt)
+{
+	struct sctp_sockaddr_entry	*laddr;
+	union sctp_addr			*addr;
+	void 				*addr_buf;
+	struct sctp_af			*af;
+	struct list_head		*pos;
+	int				i;
+
+	list_for_each(pos, &bp->address_list) {
+		laddr = list_entry(pos, struct sctp_sockaddr_entry, list);
+		
+		addr_buf = (union sctp_addr *)addrs;
+		for (i = 0; i < addrcnt; i++) {
+			addr = (union sctp_addr *)addr_buf;
+			af = sctp_get_af_specific(addr->v4.sin_family);
+			if (!af) 
+				return NULL;
+
+			if (opt->pf->cmp_addr(&laddr->a, addr, opt))
+				break;
+
+			addr_buf += af->sockaddr_len;
+		}
+		if (i == addrcnt)
+			return &laddr->a;
+	}
+
+	return NULL;
+}
+
+/* Copy out addresses from the global local address list. */
+static int sctp_copy_one_addr(struct sctp_bind_addr *dest, 
+			      union sctp_addr *addr,
+			      sctp_scope_t scope, int gfp, int flags)
+{
+	int error = 0;
+
+	if (sctp_is_any(addr)) {
+		error = sctp_copy_local_addr_list(dest, scope, gfp, flags);
+	} else if (sctp_in_scope(addr, scope)) {
+		/* Now that the address is in scope, check to see if
+		 * the address type is supported by local sock as
+		 * well as the remote peer.
+		 */
+		if ((((AF_INET == addr->sa.sa_family) &&
+		      (flags & SCTP_ADDR4_PEERSUPP))) ||
+		    (((AF_INET6 == addr->sa.sa_family) &&
+		      (flags & SCTP_ADDR6_ALLOWED) &&
+		      (flags & SCTP_ADDR6_PEERSUPP))))
+			error = sctp_add_bind_addr(dest, addr, gfp);
+	}
+
+	return error;
+}
+
+/* Is this a wildcard address?  */
+int sctp_is_any(const union sctp_addr *addr)
+{
+	struct sctp_af *af = sctp_get_af_specific(addr->sa.sa_family);
+	if (!af)
+		return 0;
+	return af->is_any(addr);
+}
+
+/* Is 'addr' valid for 'scope'?  */
+int sctp_in_scope(const union sctp_addr *addr, sctp_scope_t scope)
+{
+	sctp_scope_t addr_scope = sctp_scope(addr);
+
+	/* The unusable SCTP addresses will not be considered with
+	 * any defined scopes.
+	 */
+	if (SCTP_SCOPE_UNUSABLE == addr_scope)
+		return 0;
+	/*
+	 * For INIT and INIT-ACK address list, let L be the level of
+	 * of requested destination address, sender and receiver
+	 * SHOULD include all of its addresses with level greater
+	 * than or equal to L.
+	 */
+	if (addr_scope <= scope)
+		return 1;
+
+	return 0;
+}
+
+/********************************************************************
+ * 3rd Level Abstractions
+ ********************************************************************/
+
+/* What is the scope of 'addr'?  */
+sctp_scope_t sctp_scope(const union sctp_addr *addr)
+{
+	struct sctp_af *af;
+
+	af = sctp_get_af_specific(addr->sa.sa_family);
+	if (!af)
+		return SCTP_SCOPE_UNUSABLE;
+
+	return af->scope((union sctp_addr *)addr);
+}
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
new file mode 100644
index 00000000000..0c2ab788505
--- /dev/null
+++ b/net/sctp/chunk.c
@@ -0,0 +1,309 @@
+/* SCTP kernel reference Implementation
+ * (C) Copyright IBM Corp. 2003, 2004
+ *
+ * This file is part of the SCTP kernel reference Implementation
+ *
+ * This file contains the code relating the the chunk abstraction.
+ *
+ * The SCTP reference implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * The SCTP reference implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *                 ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email address(es):
+ *    lksctp developers <lksctp-developers@lists.sourceforge.net>
+ *
+ * Or submit a bug report through the following website:
+ *    http://www.sf.net/projects/lksctp
+ *
+ * Written or modified by:
+ *    Jon Grimm             <jgrimm@us.ibm.com>
+ *    Sridhar Samudrala     <sri@us.ibm.com>
+ *
+ * Any bugs reported given to us we will try to fix... any fixes shared will
+ * be incorporated into the next SCTP release.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/net.h>
+#include <linux/inet.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+
+/* This file is mostly in anticipation of future work, but initially
+ * populate with fragment tracking for an outbound message.
+ */
+
+/* Initialize datamsg from memory. */
+static void sctp_datamsg_init(struct sctp_datamsg *msg)
+{
+	atomic_set(&msg->refcnt, 1);
+	msg->send_failed = 0;
+	msg->send_error = 0;
+	msg->can_abandon = 0;
+	msg->expires_at = 0;
+	INIT_LIST_HEAD(&msg->chunks);
+}
+
+/* Allocate and initialize datamsg. */
+SCTP_STATIC struct sctp_datamsg *sctp_datamsg_new(int gfp)
+{
+	struct sctp_datamsg *msg;
+	msg = kmalloc(sizeof(struct sctp_datamsg), gfp);
+	if (msg)
+		sctp_datamsg_init(msg);
+	SCTP_DBG_OBJCNT_INC(datamsg);
+	return msg;
+}
+
+/* Final destructruction of datamsg memory. */
+static void sctp_datamsg_destroy(struct sctp_datamsg *msg)
+{
+	struct list_head *pos, *temp;
+	struct sctp_chunk *chunk;
+	struct sctp_sock *sp;
+	struct sctp_ulpevent *ev;
+	struct sctp_association *asoc = NULL;
+	int error = 0, notify;
+
+	/* If we failed, we may need to notify. */
+	notify = msg->send_failed ? -1 : 0;
+
+	/* Release all references. */
+	list_for_each_safe(pos, temp, &msg->chunks) {
+		list_del_init(pos);
+		chunk = list_entry(pos, struct sctp_chunk, frag_list);
+		/* Check whether we _really_ need to notify. */
+		if (notify < 0) {
+			asoc = chunk->asoc;
+			if (msg->send_error)
+				error = msg->send_error;
+			else
+				error = asoc->outqueue.error;
+
+			sp = sctp_sk(asoc->base.sk);
+			notify = sctp_ulpevent_type_enabled(SCTP_SEND_FAILED,
+							    &sp->subscribe);
+		}
+
+		/* Generate a SEND FAILED event only if enabled. */
+		if (notify > 0) {
+			int sent;
+			if (chunk->has_tsn)
+				sent = SCTP_DATA_SENT;
+			else
+				sent = SCTP_DATA_UNSENT;
+
+			ev = sctp_ulpevent_make_send_failed(asoc, chunk, sent,
+							    error, GFP_ATOMIC);
+			if (ev)
+				sctp_ulpq_tail_event(&asoc->ulpq, ev);
+		}
+
+		sctp_chunk_put(chunk);
+	}
+
+	SCTP_DBG_OBJCNT_DEC(datamsg);
+	kfree(msg);
+}
+
+/* Hold a reference. */
+static void sctp_datamsg_hold(struct sctp_datamsg *msg)
+{
+	atomic_inc(&msg->refcnt);
+}
+
+/* Release a reference. */
+void sctp_datamsg_put(struct sctp_datamsg *msg)
+{
+	if (atomic_dec_and_test(&msg->refcnt))
+		sctp_datamsg_destroy(msg);
+}
+
+/* Free a message.  Really just give up a reference, the
+ * really free happens in sctp_datamsg_destroy().
+ */
+void sctp_datamsg_free(struct sctp_datamsg *msg)
+{
+	sctp_datamsg_put(msg);
+}
+
+/* Hold on to all the fragments until all chunks have been sent. */
+void sctp_datamsg_track(struct sctp_chunk *chunk)
+{
+	sctp_chunk_hold(chunk);
+}
+
+/* Assign a chunk to this datamsg. */
+static void sctp_datamsg_assign(struct sctp_datamsg *msg, struct sctp_chunk *chunk)
+{
+	sctp_datamsg_hold(msg);
+	chunk->msg = msg;
+}
+
+
+/* A data chunk can have a maximum payload of (2^16 - 20).  Break
+ * down any such message into smaller chunks.  Opportunistically, fragment
+ * the chunks down to the current MTU constraints.  We may get refragmented
+ * later if the PMTU changes, but it is _much better_ to fragment immediately
+ * with a reasonable guess than always doing our fragmentation on the
+ * soft-interrupt.
+ */
+struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
+					    struct sctp_sndrcvinfo *sinfo,
+					    struct msghdr *msgh, int msg_len)
+{
+	int max, whole, i, offset, over, err;
+	int len, first_len;
+	struct sctp_chunk *chunk;
+	struct sctp_datamsg *msg;
+	struct list_head *pos, *temp;
+	__u8 frag;
+
+	msg = sctp_datamsg_new(GFP_KERNEL);
+	if (!msg)
+		return NULL;
+
+	/* Note: Calculate this outside of the loop, so that all fragments
+	 * have the same expiration.
+	 */
+	if (sinfo->sinfo_timetolive) {
+		/* sinfo_timetolive is in milliseconds */
+		msg->expires_at = jiffies +
+				    msecs_to_jiffies(sinfo->sinfo_timetolive);
+		msg->can_abandon = 1;
+		SCTP_DEBUG_PRINTK("%s: msg:%p expires_at: %ld jiffies:%ld\n",
+				  __FUNCTION__, msg, msg->expires_at, jiffies);
+	}
+
+	max = asoc->frag_point;
+
+	whole = 0;
+	first_len = max;
+
+	/* Encourage Cookie-ECHO bundling. */
+	if (asoc->state < SCTP_STATE_COOKIE_ECHOED) {
+		whole = msg_len / (max - SCTP_ARBITRARY_COOKIE_ECHO_LEN);
+
+		/* Account for the DATA to be bundled with the COOKIE-ECHO. */
+		if (whole) {
+			first_len = max - SCTP_ARBITRARY_COOKIE_ECHO_LEN;
+			msg_len -= first_len;
+			whole = 1;
+		}
+	}
+
+	/* How many full sized?  How many bytes leftover? */
+	whole += msg_len / max;
+	over = msg_len % max;
+	offset = 0;
+
+	if ((whole > 1) || (whole && over))
+		SCTP_INC_STATS_USER(SCTP_MIB_FRAGUSRMSGS);
+
+	/* Create chunks for all the full sized DATA chunks. */
+	for (i=0, len=first_len; i < whole; i++) {
+		frag = SCTP_DATA_MIDDLE_FRAG;
+
+		if (0 == i)
+			frag |= SCTP_DATA_FIRST_FRAG;
+
+		if ((i == (whole - 1)) && !over)
+			frag |= SCTP_DATA_LAST_FRAG;
+
+		chunk = sctp_make_datafrag_empty(asoc, sinfo, len, frag, 0);
+
+		if (!chunk)
+			goto errout;
+		err = sctp_user_addto_chunk(chunk, offset, len, msgh->msg_iov);
+		if (err < 0)
+			goto errout;
+
+		offset += len;
+
+		/* Put the chunk->skb back into the form expected by send.  */
+		__skb_pull(chunk->skb, (__u8 *)chunk->chunk_hdr
+			   - (__u8 *)chunk->skb->data);
+
+		sctp_datamsg_assign(msg, chunk);
+		list_add_tail(&chunk->frag_list, &msg->chunks);
+
+		/* The first chunk, the first chunk was likely short
+		 * to allow bundling, so reset to full size.
+		 */
+		if (0 == i)
+			len = max;
+	}
+
+	/* .. now the leftover bytes. */
+	if (over) {
+		if (!whole)
+			frag = SCTP_DATA_NOT_FRAG;
+		else
+			frag = SCTP_DATA_LAST_FRAG;
+
+		chunk = sctp_make_datafrag_empty(asoc, sinfo, over, frag, 0);
+
+		if (!chunk)
+			goto errout;
+
+		err = sctp_user_addto_chunk(chunk, offset, over,msgh->msg_iov);
+
+		/* Put the chunk->skb back into the form expected by send.  */
+		__skb_pull(chunk->skb, (__u8 *)chunk->chunk_hdr
+			   - (__u8 *)chunk->skb->data);
+		if (err < 0)
+			goto errout;
+
+		sctp_datamsg_assign(msg, chunk);
+		list_add_tail(&chunk->frag_list, &msg->chunks);
+	}
+
+	return msg;
+
+errout:
+	list_for_each_safe(pos, temp, &msg->chunks) {
+		list_del_init(pos);
+		chunk = list_entry(pos, struct sctp_chunk, frag_list);
+		sctp_chunk_free(chunk);
+	}
+	sctp_datamsg_free(msg);
+	return NULL;
+}
+
+/* Check whether this message has expired. */
+int sctp_chunk_abandoned(struct sctp_chunk *chunk)
+{
+	struct sctp_datamsg *msg = chunk->msg;
+
+	if (!msg->can_abandon)
+		return 0;
+
+	if (time_after(jiffies, msg->expires_at))
+		return 1;
+
+	return 0;
+}
+
+/* This chunk (and consequently entire message) has failed in its sending. */
+void sctp_chunk_fail(struct sctp_chunk *chunk, int error)
+{
+	chunk->msg->send_failed = 1;
+	chunk->msg->send_error = error;
+}
diff --git a/net/sctp/command.c b/net/sctp/command.c
new file mode 100644
index 00000000000..3ff804757f4
--- /dev/null
+++ b/net/sctp/command.c
@@ -0,0 +1,81 @@
+/* SCTP kernel reference Implementation Copyright (C) 1999-2001
+ * Cisco, Motorola, and IBM
+ * Copyright 2001 La Monte H.P. Yarroll
+ *
+ * This file is part of the SCTP kernel reference Implementation
+ *
+ * These functions manipulate sctp command sequences.
+ *
+ * The SCTP reference implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * The SCTP reference implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *                 ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email address(es):
+ *    lksctp developers <lksctp-developers@lists.sourceforge.net>
+ *
+ * Or submit a bug report through the following website:
+ *    http://www.sf.net/projects/lksctp
+ *
+ * Written or modified by:
+ *    La Monte H.P. Yarroll <piggy@acm.org>
+ *    Karl Knutson <karl@athena.chicago.il.us>
+ *
+ * Any bugs reported given to us we will try to fix... any fixes shared will
+ * be incorporated into the next SCTP release.
+ */
+
+#include <linux/types.h>
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+
+/* Initialize a block of memory as a command sequence. */
+int sctp_init_cmd_seq(sctp_cmd_seq_t *seq)
+{
+	memset(seq, 0, sizeof(sctp_cmd_seq_t));
+	return 1;		/* We always succeed.  */
+}
+
+/* Add a command to a sctp_cmd_seq_t.
+ * Return 0 if the command sequence is full.
+ */
+int sctp_add_cmd(sctp_cmd_seq_t *seq, sctp_verb_t verb, sctp_arg_t obj)
+{
+	if (seq->next_free_slot >= SCTP_MAX_NUM_COMMANDS)
+		goto fail;
+
+	seq->cmds[seq->next_free_slot].verb = verb;
+	seq->cmds[seq->next_free_slot++].obj = obj;
+
+	return 1;
+
+fail:
+	return 0;
+}
+
+/* Return the next command structure in a sctp_cmd_seq.
+ * Returns NULL at the end of the sequence.
+ */
+sctp_cmd_t *sctp_next_cmd(sctp_cmd_seq_t *seq)
+{
+	sctp_cmd_t *retval = NULL;
+
+	if (seq->next_cmd < seq->next_free_slot)
+		retval = &seq->cmds[seq->next_cmd++];
+
+	return retval;
+}
+
diff --git a/net/sctp/crc32c.c b/net/sctp/crc32c.c
new file mode 100644
index 00000000000..31f05ec8e1d
--- /dev/null
+++ b/net/sctp/crc32c.c
@@ -0,0 +1,220 @@
+/* SCTP kernel reference Implementation
+ * Copyright (c) 1999-2001 Motorola, Inc.
+ * Copyright (c) 2001-2003 International Business Machines, Corp.
+ * 
+ * This file is part of the SCTP kernel reference Implementation
+ * 
+ * SCTP Checksum functions
+ * 
+ * The SCTP reference implementation is free software; 
+ * you can redistribute it and/or modify it under the terms of 
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ * 
+ * The SCTP reference implementation is distributed in the hope that it 
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *                 ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.  
+ * 
+ * Please send any bug reports or fixes you make to the
+ * email address(es):
+ *    lksctp developers <lksctp-developers@lists.sourceforge.net>
+ * 
+ * Or submit a bug report through the following website:
+ *    http://www.sf.net/projects/lksctp
+ *
+ * Written or modified by: 
+ *    Dinakaran Joseph 
+ *    Jon Grimm <jgrimm@us.ibm.com>
+ *    Sridhar Samudrala <sri@us.ibm.com>
+ * 
+ * Any bugs reported given to us we will try to fix... any fixes shared will
+ * be incorporated into the next SCTP release.
+ */
+
+/* The following code has been taken directly from
+ * draft-ietf-tsvwg-sctpcsum-03.txt
+ *
+ * The code has now been modified specifically for SCTP knowledge.
+ */
+
+#include <linux/types.h>
+#include <net/sctp/sctp.h>
+
+#define CRC32C_POLY 0x1EDC6F41
+#define CRC32C(c,d) (c=(c>>8)^crc_c[(c^(d))&0xFF])
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+/* Copyright 2001, D. Otis.  Use this program, code or tables    */
+/* extracted from it, as desired without restriction.            */
+/*                                                               */
+/* 32 Bit Reflected CRC table generation for SCTP.               */
+/* To accommodate serial byte data being shifted out least       */
+/* significant bit first, the table's 32 bit words are reflected */
+/* which flips both byte and bit MS and LS positions.  The CRC   */
+/* is calculated MS bits first from the perspective of the serial*/
+/* stream.  The x^32 term is implied and the x^0 term may also   */
+/* be shown as +1.  The polynomial code used is 0x1EDC6F41.      */
+/* Castagnoli93                                                  */
+/* x^32+x^28+x^27+x^26+x^25+x^23+x^22+x^20+x^19+x^18+x^14+x^13+  */
+/* x^11+x^10+x^9+x^8+x^6+x^0                                     */
+/* Guy Castagnoli Stefan Braeuer and Martin Herrman              */
+/* "Optimization of Cyclic Redundancy-Check Codes                */
+/* with 24 and 32 Parity Bits",                                  */
+/* IEEE Transactions on Communications, Vol.41, No.6, June 1993  */
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+static const __u32 crc_c[256] = {
+	0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4,
+	0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
+	0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B,
+	0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
+	0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B,
+	0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
+	0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54,
+	0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B,
+	0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A,
+	0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35,
+	0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5,
+	0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
+	0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45,
+	0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A,
+	0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A,
+	0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595,
+	0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48,
+	0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
+	0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687,
+	0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198,
+	0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927,
+	0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38,
+	0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8,
+	0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
+	0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096,
+	0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789,
+	0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859,
+	0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46,
+	0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9,
+	0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
+	0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36,
+	0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829,
+	0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C,
+	0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93,
+	0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043,
+	0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
+	0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3,
+	0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC,
+	0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C,
+	0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033,
+	0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652,
+	0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
+	0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D,
+	0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982,
+	0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D,
+	0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622,
+	0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2,
+	0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
+	0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530,
+	0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F,
+	0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF,
+	0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0,
+	0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F,
+	0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
+	0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90,
+	0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F,
+	0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE,
+	0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1,
+	0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321,
+	0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
+	0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81,
+	0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
+	0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E,
+	0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351,
+};
+     
+__u32 sctp_start_cksum(__u8 *buffer, __u16 length)
+{
+    	__u32 crc32 = ~(__u32) 0;
+	__u32 i;
+
+	/* Optimize this routine to be SCTP specific, knowing how
+	 * to skip the checksum field of the SCTP header.
+	 */
+
+	/* Calculate CRC up to the checksum. */
+	for (i = 0; i < (sizeof(struct sctphdr) - sizeof(__u32)); i++)
+      		CRC32C(crc32, buffer[i]);
+
+	/* Skip checksum field of the header. */
+	for (i = 0; i < sizeof(__u32); i++)
+		CRC32C(crc32, 0);
+
+	/* Calculate the rest of the CRC. */
+	for (i = sizeof(struct sctphdr); i < length ; i++)
+		CRC32C(crc32, buffer[i]);
+
+	return crc32;
+}
+
+__u32 sctp_update_cksum(__u8 *buffer, __u16 length, __u32 crc32)
+{
+	__u32 i;
+
+	for (i = 0; i < length ; i++)
+		CRC32C(crc32, buffer[i]);
+
+	return crc32;
+}
+
+__u32 sctp_update_copy_cksum(__u8 *to, __u8 *from, __u16 length, __u32 crc32)
+{
+	__u32 i;
+	__u32 *_to = (__u32 *)to;
+	__u32 *_from = (__u32 *)from;
+	
+	for (i = 0; i < (length/4); i++) {
+		_to[i] = _from[i];
+		CRC32C(crc32, from[i*4]);
+		CRC32C(crc32, from[i*4+1]);
+		CRC32C(crc32, from[i*4+2]);
+		CRC32C(crc32, from[i*4+3]);	
+	}
+
+	return crc32;
+}
+
+__u32 sctp_end_cksum(__u32 crc32)
+{
+	__u32 result;
+	__u8 byte0, byte1, byte2, byte3;
+
+	result = ~crc32;
+
+	/*  result  now holds the negated polynomial remainder;
+	 *  since the table and algorithm is "reflected" [williams95].
+	 *  That is,  result has the same value as if we mapped the message
+	 *  to a polyomial, computed the host-bit-order polynomial
+	 *  remainder, performed final negation, then did an end-for-end
+	 *  bit-reversal.
+	 *  Note that a 32-bit bit-reversal is identical to four inplace
+	 *  8-bit reversals followed by an end-for-end byteswap.
+	 *  In other words, the bytes of each bit are in the right order,
+	 *  but the bytes have been byteswapped.  So we now do an explicit
+	 *  byteswap.  On a little-endian machine, this byteswap and
+	 *  the final ntohl cancel out and could be elided.
+	 */
+	byte0 = result & 0xff;
+	byte1 = (result>>8) & 0xff;
+	byte2 = (result>>16) & 0xff;
+	byte3 = (result>>24) & 0xff;
+
+	crc32 = ((byte0 << 24) |
+		 (byte1 << 16) |
+		 (byte2 << 8)  |
+		 byte3);
+	return crc32;
+}
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
new file mode 100644
index 00000000000..aa8340373af
--- /dev/null
+++ b/net/sctp/debug.c
@@ -0,0 +1,191 @@
+/* SCTP kernel reference Implementation
+ * (C) Copyright IBM Corp. 2001, 2004
+ * Copyright (c) 1999-2000 Cisco, Inc.
+ * Copyright (c) 1999-2001 Motorola, Inc.
+ * Copyright (c) 2001 Intel Corp.
+ * 
+ * This file is part of the SCTP kernel reference Implementation
+ * 
+ * This file is part of the implementation of the add-IP extension,
+ * based on <draft-ietf-tsvwg-addip-sctp-02.txt> June 29, 2001,
+ * for the SCTP kernel reference Implementation.
+ * 
+ * This file converts numerical ID value to alphabetical names for SCTP
+ * terms such as chunk type, parameter time, event type, etc.
+ * 
+ * The SCTP reference implementation is free software; 
+ * you can redistribute it and/or modify it under the terms of 
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ * 
+ * The SCTP reference implementation is distributed in the hope that it 
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *                 ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.  
+ * 
+ * Please send any bug reports or fixes you make to the
+ * email address(es):
+ *    lksctp developers <lksctp-developers@lists.sourceforge.net>
+ * 
+ * Or submit a bug report through the following website:
+ *    http://www.sf.net/projects/lksctp
+ *
+ * Written or modified by: 
+ *    La Monte H.P. Yarroll <piggy@acm.org>
+ *    Karl Knutson          <karl@athena.chicago.il.us>
+ *    Xingang Guo           <xingang.guo@intel.com>
+ *    Jon Grimm             <jgrimm@us.ibm.com>
+ *    Daisy Chang	    <daisyc@us.ibm.com>
+ *    Sridhar Samudrala	    <sri@us.ibm.com>
+ * 
+ * Any bugs reported given to us we will try to fix... any fixes shared will
+ * be incorporated into the next SCTP release.
+ */
+
+#include <net/sctp/sctp.h>
+
+#if SCTP_DEBUG
+int sctp_debug_flag = 1;	/* Initially enable DEBUG */
+#endif	/* SCTP_DEBUG */
+
+/* These are printable forms of Chunk ID's from section 3.1.  */
+static const char *sctp_cid_tbl[SCTP_NUM_BASE_CHUNK_TYPES] = {
+	"DATA",
+	"INIT",
+	"INIT_ACK",
+	"SACK",
+	"HEARTBEAT",
+	"HEARTBEAT_ACK",
+	"ABORT",
+	"SHUTDOWN",
+	"SHUTDOWN_ACK",
+	"ERROR",
+	"COOKIE_ECHO",
+	"COOKIE_ACK",
+	"ECN_ECNE",
+	"ECN_CWR",
+	"SHUTDOWN_COMPLETE",
+};
+
+/* Lookup "chunk type" debug name. */
+const char *sctp_cname(const sctp_subtype_t cid)
+{
+	if (cid.chunk < 0)
+		return "illegal chunk id";
+	if (cid.chunk <= SCTP_CID_BASE_MAX)
+		return sctp_cid_tbl[cid.chunk];
+	
+	switch (cid.chunk) {
+	case SCTP_CID_ASCONF:
+		return "ASCONF";
+
+	case SCTP_CID_ASCONF_ACK:
+		return "ASCONF_ACK";
+
+	case SCTP_CID_FWD_TSN:
+		return "FWD_TSN";
+
+	default:
+		return "unknown chunk";
+	};
+	return "unknown chunk";
+}
+
+/* These are printable forms of the states.  */
+const char *sctp_state_tbl[SCTP_STATE_NUM_STATES] = {
+	"STATE_EMPTY",
+	"STATE_CLOSED",
+	"STATE_COOKIE_WAIT",
+	"STATE_COOKIE_ECHOED",
+	"STATE_ESTABLISHED",
+	"STATE_SHUTDOWN_PENDING",
+	"STATE_SHUTDOWN_SENT",
+	"STATE_SHUTDOWN_RECEIVED",
+	"STATE_SHUTDOWN_ACK_SENT",
+};
+
+/* Events that could change the state of an association.  */
+const char *sctp_evttype_tbl[] = {
+	"EVENT_T_unknown",
+	"EVENT_T_CHUNK",
+	"EVENT_T_TIMEOUT",
+	"EVENT_T_OTHER",
+	"EVENT_T_PRIMITIVE"
+};
+
+/* Return value of a state function */
+const char *sctp_status_tbl[] = {
+	"DISPOSITION_DISCARD",
+	"DISPOSITION_CONSUME",
+	"DISPOSITION_NOMEM",
+	"DISPOSITION_DELETE_TCB",
+	"DISPOSITION_ABORT",
+	"DISPOSITION_VIOLATION",
+	"DISPOSITION_NOT_IMPL",
+	"DISPOSITION_ERROR",
+	"DISPOSITION_BUG"
+};
+
+/* Printable forms of primitives */
+static const char *sctp_primitive_tbl[SCTP_NUM_PRIMITIVE_TYPES] = {
+	"PRIMITIVE_ASSOCIATE",
+	"PRIMITIVE_SHUTDOWN",
+	"PRIMITIVE_ABORT",
+	"PRIMITIVE_SEND",
+	"PRIMITIVE_REQUESTHEARTBEAT",
+};
+
+/* Lookup primitive debug name. */
+const char *sctp_pname(const sctp_subtype_t id)
+{
+	if (id.primitive < 0)
+		return "illegal primitive";
+	if (id.primitive <= SCTP_EVENT_PRIMITIVE_MAX)
+		return sctp_primitive_tbl[id.primitive];
+	return "unknown_primitive";
+}
+
+static const char *sctp_other_tbl[] = {
+	"NO_PENDING_TSN",
+        "ICMP_PROTO_UNREACH",
+};
+
+/* Lookup "other" debug name. */
+const char *sctp_oname(const sctp_subtype_t id)
+{
+	if (id.other < 0)
+		return "illegal 'other' event";
+	if (id.other <= SCTP_EVENT_OTHER_MAX)
+		return sctp_other_tbl[id.other];
+	return "unknown 'other' event";
+}
+
+static const char *sctp_timer_tbl[] = {
+	"TIMEOUT_NONE",
+	"TIMEOUT_T1_COOKIE",
+	"TIMEOUT_T1_INIT",
+	"TIMEOUT_T2_SHUTDOWN",
+	"TIMEOUT_T3_RTX",
+	"TIMEOUT_T4_RTO",
+	"TIMEOUT_T5_SHUTDOWN_GUARD",
+	"TIMEOUT_HEARTBEAT",
+	"TIMEOUT_SACK",
+	"TIMEOUT_AUTOCLOSE",
+};
+
+/* Lookup timer debug name. */
+const char *sctp_tname(const sctp_subtype_t id)
+{
+	if (id.timeout < 0)
+		return "illegal 'timer' event";
+	if (id.timeout <= SCTP_EVENT_TIMEOUT_MAX)
+		return sctp_timer_tbl[id.timeout];
+	return "unknown_timer";
+}
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
new file mode 100644
index 00000000000..544b75077db
--- /dev/null
+++ b/net/sctp/endpointola.c
@@ -0,0 +1,389 @@
+/* SCTP kernel reference Implementation
+ * Copyright (c) 1999-2000 Cisco, Inc.
+ * Copyright (c) 1999-2001 Motorola, Inc.
+ * Copyright (c) 2001-2002 International Business Machines, Corp.
+ * Copyright (c) 2001 Intel Corp.
+ * Copyright (c) 2001 Nokia, Inc.
+ * Copyright (c) 2001 La Monte H.P. Yarroll
+ *
+ * This file is part of the SCTP kernel reference Implementation
+ *
+ * This abstraction represents an SCTP endpoint.
+ *
+ * This file is part of the implementation of the add-IP extension,
+ * based on <draft-ietf-tsvwg-addip-sctp-02.txt> June 29, 2001,
+ * for the SCTP kernel reference Implementation.
+ *
+ * The SCTP reference implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * The SCTP reference implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *                 ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email address(es):
+ *    lksctp developers <lksctp-developers@lists.sourceforge.net>
+ *
+ * Or submit a bug report through the following website:
+ *    http://www.sf.net/projects/lksctp
+ *
+ * Written or modified by:
+ *    La Monte H.P. Yarroll <piggy@acm.org>
+ *    Karl Knutson <karl@athena.chicago.il.us>
+ *    Jon Grimm <jgrimm@austin.ibm.com>
+ *    Daisy Chang <daisyc@us.ibm.com>
+ *    Dajiang Zhang <dajiang.zhang@nokia.com>
+ *
+ * Any bugs reported given to us we will try to fix... any fixes shared will
+ * be incorporated into the next SCTP release.
+ */
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/in.h>
+#include <linux/random.h>	/* get_random_bytes() */
+#include <linux/crypto.h>
+#include <net/sock.h>
+#include <net/ipv6.h>
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+
+/* Forward declarations for internal helpers. */
+static void sctp_endpoint_bh_rcv(struct sctp_endpoint *ep);
+
+/*
+ * Initialize the base fields of the endpoint structure.
+ */
+static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
+						struct sock *sk, int gfp)
+{
+	struct sctp_sock *sp = sctp_sk(sk);
+	memset(ep, 0, sizeof(struct sctp_endpoint));
+
+	/* Initialize the base structure. */
+	/* What type of endpoint are we?  */
+	ep->base.type = SCTP_EP_TYPE_SOCKET;
+
+	/* Initialize the basic object fields. */
+	atomic_set(&ep->base.refcnt, 1);
+	ep->base.dead = 0;
+	ep->base.malloced = 1;
+
+	/* Create an input queue.  */
+	sctp_inq_init(&ep->base.inqueue);
+
+	/* Set its top-half handler */
+	sctp_inq_set_th_handler(&ep->base.inqueue,
+				(void (*)(void *))sctp_endpoint_bh_rcv, ep);
+
+	/* Initialize the bind addr area */
+	sctp_bind_addr_init(&ep->base.bind_addr, 0);
+	rwlock_init(&ep->base.addr_lock);
+
+	/* Remember who we are attached to.  */
+	ep->base.sk = sk;
+	sock_hold(ep->base.sk);
+
+	/* Create the lists of associations.  */
+	INIT_LIST_HEAD(&ep->asocs);
+
+	/* Set up the base timeout information.  */
+	ep->timeouts[SCTP_EVENT_TIMEOUT_NONE] = 0;
+	ep->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE] =
+		SCTP_DEFAULT_TIMEOUT_T1_COOKIE;
+	ep->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT] =
+		SCTP_DEFAULT_TIMEOUT_T1_INIT;
+	ep->timeouts[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN] =
+		msecs_to_jiffies(sp->rtoinfo.srto_initial);
+	ep->timeouts[SCTP_EVENT_TIMEOUT_T3_RTX] = 0;
+	ep->timeouts[SCTP_EVENT_TIMEOUT_T4_RTO] = 0;
+
+	/* sctpimpguide-05 Section 2.12.2
+	 * If the 'T5-shutdown-guard' timer is used, it SHOULD be set to the
+	 * recommended value of 5 times 'RTO.Max'.
+	 */
+        ep->timeouts[SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD]
+		= 5 * msecs_to_jiffies(sp->rtoinfo.srto_max);
+
+	ep->timeouts[SCTP_EVENT_TIMEOUT_HEARTBEAT] =
+		SCTP_DEFAULT_TIMEOUT_HEARTBEAT;
+	ep->timeouts[SCTP_EVENT_TIMEOUT_SACK] =
+		SCTP_DEFAULT_TIMEOUT_SACK;
+	ep->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] =
+		sp->autoclose * HZ;
+
+	/* Use SCTP specific send buffer space queues.  */
+	sk->sk_write_space = sctp_write_space;
+	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
+
+	/* Initialize the secret key used with cookie. */
+	get_random_bytes(&ep->secret_key[0], SCTP_SECRET_SIZE);
+	ep->last_key = ep->current_key = 0;
+	ep->key_changed_at = jiffies;
+
+	ep->debug_name = "unnamedEndpoint";
+	return ep;
+}
+
+/* Create a sctp_endpoint with all that boring stuff initialized.
+ * Returns NULL if there isn't enough memory.
+ */
+struct sctp_endpoint *sctp_endpoint_new(struct sock *sk, int gfp)
+{
+	struct sctp_endpoint *ep;
+
+	/* Build a local endpoint. */
+	ep = t_new(struct sctp_endpoint, gfp);
+	if (!ep)
+		goto fail;
+	if (!sctp_endpoint_init(ep, sk, gfp))
+		goto fail_init;
+	ep->base.malloced = 1;
+	SCTP_DBG_OBJCNT_INC(ep);
+	return ep;
+
+fail_init:
+	kfree(ep);
+fail:
+	return NULL;
+}
+
+/* Add an association to an endpoint.  */
+void sctp_endpoint_add_asoc(struct sctp_endpoint *ep,
+			    struct sctp_association *asoc)
+{
+	struct sock *sk = ep->base.sk;
+
+	/* Now just add it to our list of asocs */
+	list_add_tail(&asoc->asocs, &ep->asocs);
+
+	/* Increment the backlog value for a TCP-style listening socket. */
+	if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))
+		sk->sk_ack_backlog++;
+}
+
+/* Free the endpoint structure.  Delay cleanup until
+ * all users have released their reference count on this structure.
+ */
+void sctp_endpoint_free(struct sctp_endpoint *ep)
+{
+	ep->base.dead = 1;
+	sctp_endpoint_put(ep);
+}
+
+/* Final destructor for endpoint.  */
+static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
+{
+	SCTP_ASSERT(ep->base.dead, "Endpoint is not dead", return);
+
+	ep->base.sk->sk_state = SCTP_SS_CLOSED;
+
+	/* Unlink this endpoint, so we can't find it again! */
+	sctp_unhash_endpoint(ep);
+
+	/* Free up the HMAC transform. */
+	if (sctp_sk(ep->base.sk)->hmac)
+		sctp_crypto_free_tfm(sctp_sk(ep->base.sk)->hmac);
+
+	/* Cleanup. */
+	sctp_inq_free(&ep->base.inqueue);
+	sctp_bind_addr_free(&ep->base.bind_addr);
+
+	/* Remove and free the port */
+	if (sctp_sk(ep->base.sk)->bind_hash)
+		sctp_put_port(ep->base.sk);
+
+	/* Give up our hold on the sock. */
+	if (ep->base.sk)
+		sock_put(ep->base.sk);
+
+	/* Finally, free up our memory. */
+	if (ep->base.malloced) {
+		kfree(ep);
+		SCTP_DBG_OBJCNT_DEC(ep);
+	}
+}
+
+/* Hold a reference to an endpoint. */
+void sctp_endpoint_hold(struct sctp_endpoint *ep)
+{
+	atomic_inc(&ep->base.refcnt);
+}
+
+/* Release a reference to an endpoint and clean up if there are
+ * no more references.
+ */
+void sctp_endpoint_put(struct sctp_endpoint *ep)
+{
+	if (atomic_dec_and_test(&ep->base.refcnt))
+		sctp_endpoint_destroy(ep);
+}
+
+/* Is this the endpoint we are looking for?  */
+struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *ep,
+					       const union sctp_addr *laddr)
+{
+	struct sctp_endpoint *retval;
+
+	sctp_read_lock(&ep->base.addr_lock);
+	if (ep->base.bind_addr.port == laddr->v4.sin_port) {
+		if (sctp_bind_addr_match(&ep->base.bind_addr, laddr,
+					 sctp_sk(ep->base.sk))) {
+			retval = ep;
+			goto out;
+		}
+	}
+
+	retval = NULL;
+
+out:
+	sctp_read_unlock(&ep->base.addr_lock);
+	return retval;
+}
+
+/* Find the association that goes with this chunk.
+ * We do a linear search of the associations for this endpoint.
+ * We return the matching transport address too.
+ */
+static struct sctp_association *__sctp_endpoint_lookup_assoc(
+	const struct sctp_endpoint *ep,
+	const union sctp_addr *paddr,
+	struct sctp_transport **transport)
+{
+	int rport;
+	struct sctp_association *asoc;
+	struct list_head *pos;
+
+	rport = paddr->v4.sin_port;
+
+	list_for_each(pos, &ep->asocs) {
+		asoc = list_entry(pos, struct sctp_association, asocs);
+		if (rport == asoc->peer.port) {
+			sctp_read_lock(&asoc->base.addr_lock);
+			*transport = sctp_assoc_lookup_paddr(asoc, paddr);
+			sctp_read_unlock(&asoc->base.addr_lock);
+
+			if (*transport)
+				return asoc;
+		}
+	}
+
+	*transport = NULL;
+	return NULL;
+}
+
+/* Lookup association on an endpoint based on a peer address.  BH-safe.  */
+struct sctp_association *sctp_endpoint_lookup_assoc(
+	const struct sctp_endpoint *ep,
+	const union sctp_addr *paddr,
+	struct sctp_transport **transport)
+{
+	struct sctp_association *asoc;
+
+	sctp_local_bh_disable();
+	asoc = __sctp_endpoint_lookup_assoc(ep, paddr, transport);
+	sctp_local_bh_enable();
+
+	return asoc;
+}
+
+/* Look for any peeled off association from the endpoint that matches the
+ * given peer address.
+ */
+int sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep,
+				const union sctp_addr *paddr)
+{
+	struct list_head *pos;
+	struct sctp_sockaddr_entry *addr;
+	struct sctp_bind_addr *bp;
+
+	sctp_read_lock(&ep->base.addr_lock);
+	bp = &ep->base.bind_addr;
+	list_for_each(pos, &bp->address_list) {
+		addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+		if (sctp_has_association(&addr->a, paddr)) {
+			sctp_read_unlock(&ep->base.addr_lock);
+			return 1;
+		}
+	}
+	sctp_read_unlock(&ep->base.addr_lock);
+
+	return 0;
+}
+
+/* Do delayed input processing.  This is scheduled by sctp_rcv().
+ * This may be called on BH or task time.
+ */
+static void sctp_endpoint_bh_rcv(struct sctp_endpoint *ep)
+{
+	struct sctp_association *asoc;
+	struct sock *sk;
+	struct sctp_transport *transport;
+	struct sctp_chunk *chunk;
+	struct sctp_inq *inqueue;
+	sctp_subtype_t subtype;
+	sctp_state_t state;
+	int error = 0;
+
+	if (ep->base.dead)
+		return;
+
+	asoc = NULL;
+	inqueue = &ep->base.inqueue;
+	sk = ep->base.sk;
+
+	while (NULL != (chunk = sctp_inq_pop(inqueue))) {
+		subtype = SCTP_ST_CHUNK(chunk->chunk_hdr->type);
+
+		/* We might have grown an association since last we
+		 * looked, so try again.
+		 *
+		 * This happens when we've just processed our
+		 * COOKIE-ECHO chunk.
+		 */
+		if (NULL == chunk->asoc) {
+			asoc = sctp_endpoint_lookup_assoc(ep,
+							  sctp_source(chunk),
+							  &transport);
+			chunk->asoc = asoc;
+			chunk->transport = transport;
+		}
+
+		state = asoc ? asoc->state : SCTP_STATE_CLOSED;
+
+		/* Remember where the last DATA chunk came from so we
+		 * know where to send the SACK.
+		 */
+		if (asoc && sctp_chunk_is_data(chunk))
+			asoc->peer.last_data_from = chunk->transport;
+		else
+			SCTP_INC_STATS(SCTP_MIB_INCTRLCHUNKS);
+
+		if (chunk->transport)
+			chunk->transport->last_time_heard = jiffies;
+
+		error = sctp_do_sm(SCTP_EVENT_T_CHUNK, subtype, state,
+                                   ep, asoc, chunk, GFP_ATOMIC);
+
+		if (error && chunk)
+			chunk->pdiscard = 1;
+
+		/* Check to see if the endpoint is freed in response to
+		 * the incoming chunk. If so, get out of the while loop.
+		 */
+		if (!sctp_sk(sk)->ep)
+			break;
+	}
+}
diff --git a/net/sctp/input.c b/net/sctp/input.c
new file mode 100644
index 00000000000..b719a77d66b
--- /dev/null
+++ b/net/sctp/input.c
@@ -0,0 +1,913 @@
+/* SCTP kernel reference Implementation
+ * Copyright (c) 1999-2000 Cisco, Inc.
+ * Copyright (c) 1999-2001 Motorola, Inc.
+ * Copyright (c) 2001-2003 International Business Machines, Corp.
+ * Copyright (c) 2001 Intel Corp.
+ * Copyright (c) 2001 Nokia, Inc.
+ * Copyright (c) 2001 La Monte H.P. Yarroll
+ *
+ * This file is part of the SCTP kernel reference Implementation
+ *
+ * These functions handle all input from the IP layer into SCTP.
+ *
+ * The SCTP reference implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * The SCTP reference implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *                 ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email address(es):
+ *    lksctp developers <lksctp-developers@lists.sourceforge.net>
+ *
+ * Or submit a bug report through the following website:
+ *    http://www.sf.net/projects/lksctp
+ *
+ * Written or modified by:
+ *    La Monte H.P. Yarroll <piggy@acm.org>
+ *    Karl Knutson <karl@athena.chicago.il.us>
+ *    Xingang Guo <xingang.guo@intel.com>
+ *    Jon Grimm <jgrimm@us.ibm.com>
+ *    Hui Huang <hui.huang@nokia.com>
+ *    Daisy Chang <daisyc@us.ibm.com>
+ *    Sridhar Samudrala <sri@us.ibm.com>
+ *    Ardelle Fan <ardelle.fan@intel.com>
+ *
+ * Any bugs reported given to us we will try to fix... any fixes shared will
+ * be incorporated into the next SCTP release.
+ */
+
+#include <linux/types.h>
+#include <linux/list.h> /* For struct list_head */
+#include <linux/socket.h>
+#include <linux/ip.h>
+#include <linux/time.h> /* For struct timeval */
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/snmp.h>
+#include <net/sock.h>
+#include <net/xfrm.h>
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+
+/* Forward declarations for internal helpers. */
+static int sctp_rcv_ootb(struct sk_buff *);
+static struct sctp_association *__sctp_rcv_lookup(struct sk_buff *skb,
+				      const union sctp_addr *laddr,
+				      const union sctp_addr *paddr,
+				      struct sctp_transport **transportp);
+static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(const union sctp_addr *laddr);
+static struct sctp_association *__sctp_lookup_association(
+					const union sctp_addr *local,
+					const union sctp_addr *peer,
+					struct sctp_transport **pt);
+
+
+/* Calculate the SCTP checksum of an SCTP packet.  */
+static inline int sctp_rcv_checksum(struct sk_buff *skb)
+{
+	struct sctphdr *sh;
+	__u32 cmp, val;
+	struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+	sh = (struct sctphdr *) skb->h.raw;
+	cmp = ntohl(sh->checksum);
+
+	val = sctp_start_cksum((__u8 *)sh, skb_headlen(skb));
+
+	for (; list; list = list->next)
+		val = sctp_update_cksum((__u8 *)list->data, skb_headlen(list),
+					val);
+
+	val = sctp_end_cksum(val);
+
+	if (val != cmp) {
+		/* CRC failure, dump it. */
+		SCTP_INC_STATS_BH(SCTP_MIB_CHECKSUMERRORS);
+		return -1;
+	}
+	return 0;
+}
+
+/* The free routine for skbuffs that sctp receives */
+static void sctp_rfree(struct sk_buff *skb)
+{
+	atomic_sub(sizeof(struct sctp_chunk),&skb->sk->sk_rmem_alloc);
+	sock_rfree(skb);
+}
+
+/* The ownership wrapper routine to do receive buffer accounting */
+static void sctp_rcv_set_owner_r(struct sk_buff *skb, struct sock *sk)
+{
+	skb_set_owner_r(skb,sk);
+	skb->destructor = sctp_rfree;
+	atomic_add(sizeof(struct sctp_chunk),&sk->sk_rmem_alloc);
+}
+
+/*
+ * This is the routine which IP calls when receiving an SCTP packet.
+ */
+int sctp_rcv(struct sk_buff *skb)
+{
+	struct sock *sk;
+	struct sctp_association *asoc;
+	struct sctp_endpoint *ep = NULL;
+	struct sctp_ep_common *rcvr;
+	struct sctp_transport *transport = NULL;
+	struct sctp_chunk *chunk;
+	struct sctphdr *sh;
+	union sctp_addr src;
+	union sctp_addr dest;
+	int family;
+	struct sctp_af *af;
+	int ret = 0;
+
+	if (skb->pkt_type!=PACKET_HOST)
+		goto discard_it;
+
+	SCTP_INC_STATS_BH(SCTP_MIB_INSCTPPACKS);
+
+	sh = (struct sctphdr *) skb->h.raw;
+
+	/* Pull up the IP and SCTP headers. */
+	__skb_pull(skb, skb->h.raw - skb->data);
+	if (skb->len < sizeof(struct sctphdr))
+		goto discard_it;
+	if (sctp_rcv_checksum(skb) < 0)
+		goto discard_it;
+
+	skb_pull(skb, sizeof(struct sctphdr));
+
+	/* Make sure we at least have chunk headers worth of data left. */
+	if (skb->len < sizeof(struct sctp_chunkhdr))
+		goto discard_it;
+
+	family = ipver2af(skb->nh.iph->version);
+	af = sctp_get_af_specific(family);
+	if (unlikely(!af))
+		goto discard_it;
+
+	/* Initialize local addresses for lookups. */
+	af->from_skb(&src, skb, 1);
+	af->from_skb(&dest, skb, 0);
+
+	/* If the packet is to or from a non-unicast address,
+	 * silently discard the packet.
+	 *
+	 * This is not clearly defined in the RFC except in section
+	 * 8.4 - OOTB handling.  However, based on the book "Stream Control
+	 * Transmission Protocol" 2.1, "It is important to note that the
+	 * IP address of an SCTP transport address must be a routable
+	 * unicast address.  In other words, IP multicast addresses and
+	 * IP broadcast addresses cannot be used in an SCTP transport
+	 * address."
+	 */
+	if (!af->addr_valid(&src, NULL) || !af->addr_valid(&dest, NULL))
+		goto discard_it;
+
+	asoc = __sctp_rcv_lookup(skb, &src, &dest, &transport);
+
+	/*
+	 * RFC 2960, 8.4 - Handle "Out of the blue" Packets.
+	 * An SCTP packet is called an "out of the blue" (OOTB)
+	 * packet if it is correctly formed, i.e., passed the
+	 * receiver's checksum check, but the receiver is not
+	 * able to identify the association to which this
+	 * packet belongs.
+	 */
+	if (!asoc) {
+		ep = __sctp_rcv_lookup_endpoint(&dest);
+		if (sctp_rcv_ootb(skb)) {
+			SCTP_INC_STATS_BH(SCTP_MIB_OUTOFBLUES);
+			goto discard_release;
+		}
+	}
+
+	/* Retrieve the common input handling substructure. */
+	rcvr = asoc ? &asoc->base : &ep->base;
+	sk = rcvr->sk;
+
+	if ((sk) && (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)) {
+		goto discard_release;
+	}
+
+
+	/* SCTP seems to always need a timestamp right now (FIXME) */
+	if (skb->stamp.tv_sec == 0) {
+		do_gettimeofday(&skb->stamp);
+		sock_enable_timestamp(sk); 
+	}
+
+	if (!xfrm_policy_check(sk, XFRM_POLICY_IN, skb, family))
+		goto discard_release;
+
+	ret = sk_filter(sk, skb, 1);
+	if (ret)
+                goto discard_release;
+
+	/* Create an SCTP packet structure. */
+	chunk = sctp_chunkify(skb, asoc, sk);
+	if (!chunk) {
+		ret = -ENOMEM;
+		goto discard_release;
+	}
+
+	sctp_rcv_set_owner_r(skb,sk);
+
+	/* Remember what endpoint is to handle this packet. */
+	chunk->rcvr = rcvr;
+
+	/* Remember the SCTP header. */
+	chunk->sctp_hdr = sh;
+
+	/* Set the source and destination addresses of the incoming chunk.  */
+	sctp_init_addrs(chunk, &src, &dest);
+
+	/* Remember where we came from.  */
+	chunk->transport = transport;
+
+	/* Acquire access to the sock lock. Note: We are safe from other
+	 * bottom halves on this lock, but a user may be in the lock too,
+	 * so check if it is busy.
+	 */
+	sctp_bh_lock_sock(sk);
+
+	if (sock_owned_by_user(sk))
+		sk_add_backlog(sk, (struct sk_buff *) chunk);
+	else
+		sctp_backlog_rcv(sk, (struct sk_buff *) chunk);
+
+	/* Release the sock and any reference counts we took in the
+	 * lookup calls.
+	 */
+	sctp_bh_unlock_sock(sk);
+	if (asoc)
+		sctp_association_put(asoc);
+	else
+		sctp_endpoint_put(ep);
+	sock_put(sk);
+	return ret;
+
+discard_it:
+	kfree_skb(skb);
+	return ret;
+
+discard_release:
+	/* Release any structures we may be holding. */
+	if (asoc) {
+		sock_put(asoc->base.sk);
+		sctp_association_put(asoc);
+	} else {
+		sock_put(ep->base.sk);
+		sctp_endpoint_put(ep);
+	}
+
+	goto discard_it;
+}
+
+/* Handle second half of inbound skb processing.  If the sock was busy,
+ * we may have need to delay processing until later when the sock is
+ * released (on the backlog).   If not busy, we call this routine
+ * directly from the bottom half.
+ */
+int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	struct sctp_chunk *chunk;
+	struct sctp_inq *inqueue;
+
+	/* One day chunk will live inside the skb, but for
+	 * now this works.
+	 */
+	chunk = (struct sctp_chunk *) skb;
+	inqueue = &chunk->rcvr->inqueue;
+
+	sctp_inq_push(inqueue, chunk);
+        return 0;
+}
+
+/* Handle icmp frag needed error. */
+void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc,
+			   struct sctp_transport *t, __u32 pmtu)
+{
+	if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
+		printk(KERN_WARNING "%s: Reported pmtu %d too low, "
+		       "using default minimum of %d\n", __FUNCTION__, pmtu,
+		       SCTP_DEFAULT_MINSEGMENT);
+		pmtu = SCTP_DEFAULT_MINSEGMENT;
+	}
+
+	if (!sock_owned_by_user(sk) && t && (t->pmtu != pmtu)) {
+		t->pmtu = pmtu;
+		sctp_assoc_sync_pmtu(asoc);
+		sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD);
+	}
+}
+
+/*
+ * SCTP Implementer's Guide, 2.37 ICMP handling procedures
+ *
+ * ICMP8) If the ICMP code is a "Unrecognized next header type encountered"
+ *        or a "Protocol Unreachable" treat this message as an abort
+ *        with the T bit set.
+ *
+ * This function sends an event to the state machine, which will abort the
+ * association.
+ *
+ */
+void sctp_icmp_proto_unreachable(struct sock *sk,
+                           struct sctp_endpoint *ep,
+                           struct sctp_association *asoc,
+                           struct sctp_transport *t)
+{
+	SCTP_DEBUG_PRINTK("%s\n",  __FUNCTION__);
+
+	sctp_do_sm(SCTP_EVENT_T_OTHER,
+		   SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH),
+		   asoc->state, asoc->ep, asoc, NULL,
+		   GFP_ATOMIC);
+
+}
+
+/* Common lookup code for icmp/icmpv6 error handler. */
+struct sock *sctp_err_lookup(int family, struct sk_buff *skb,
+			     struct sctphdr *sctphdr,
+			     struct sctp_endpoint **epp,
+			     struct sctp_association **app,
+			     struct sctp_transport **tpp)
+{
+	union sctp_addr saddr;
+	union sctp_addr daddr;
+	struct sctp_af *af;
+	struct sock *sk = NULL;
+	struct sctp_endpoint *ep = NULL;
+	struct sctp_association *asoc = NULL;
+	struct sctp_transport *transport = NULL;
+
+	*app = NULL; *epp = NULL; *tpp = NULL;
+
+	af = sctp_get_af_specific(family);
+	if (unlikely(!af)) {
+		return NULL;
+	}
+
+	/* Initialize local addresses for lookups. */
+	af->from_skb(&saddr, skb, 1);
+	af->from_skb(&daddr, skb, 0);
+
+	/* Look for an association that matches the incoming ICMP error
+	 * packet.
+	 */
+	asoc = __sctp_lookup_association(&saddr, &daddr, &transport);
+	if (!asoc) {
+		/* If there is no matching association, see if it matches any
+		 * endpoint. This may happen for an ICMP error generated in
+		 * response to an INIT_ACK.
+		 */
+		ep = __sctp_rcv_lookup_endpoint(&daddr);
+		if (!ep) {
+			return NULL;
+		}
+	}
+
+	if (asoc) {
+		sk = asoc->base.sk;
+
+		if (ntohl(sctphdr->vtag) != asoc->c.peer_vtag) {
+			ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+			goto out;
+		}
+	} else
+		sk = ep->base.sk;
+
+	sctp_bh_lock_sock(sk);
+
+	/* If too many ICMPs get dropped on busy
+	 * servers this needs to be solved differently.
+	 */
+	if (sock_owned_by_user(sk))
+		NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
+
+	*epp = ep;
+	*app = asoc;
+	*tpp = transport;
+	return sk;
+
+out:
+	sock_put(sk);
+	if (asoc)
+		sctp_association_put(asoc);
+	if (ep)
+		sctp_endpoint_put(ep);
+	return NULL;
+}
+
+/* Common cleanup code for icmp/icmpv6 error handler. */
+void sctp_err_finish(struct sock *sk, struct sctp_endpoint *ep,
+		     struct sctp_association *asoc)
+{
+	sctp_bh_unlock_sock(sk);
+	sock_put(sk);
+	if (asoc)
+		sctp_association_put(asoc);
+	if (ep)
+		sctp_endpoint_put(ep);
+}
+
+/*
+ * This routine is called by the ICMP module when it gets some
+ * sort of error condition.  If err < 0 then the socket should
+ * be closed and the error returned to the user.  If err > 0
+ * it's just the icmp type << 8 | icmp code.  After adjustment
+ * header points to the first 8 bytes of the sctp header.  We need
+ * to find the appropriate port.
+ *
+ * The locking strategy used here is very "optimistic". When
+ * someone else accesses the socket the ICMP is just dropped
+ * and for some paths there is no check at all.
+ * A more general error queue to queue errors for later handling
+ * is probably better.
+ *
+ */
+void sctp_v4_err(struct sk_buff *skb, __u32 info)
+{
+	struct iphdr *iph = (struct iphdr *)skb->data;
+	struct sctphdr *sh = (struct sctphdr *)(skb->data + (iph->ihl <<2));
+	int type = skb->h.icmph->type;
+	int code = skb->h.icmph->code;
+	struct sock *sk;
+	struct sctp_endpoint *ep;
+	struct sctp_association *asoc;
+	struct sctp_transport *transport;
+	struct inet_sock *inet;
+	char *saveip, *savesctp;
+	int err;
+
+	if (skb->len < ((iph->ihl << 2) + 8)) {
+		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+		return;
+	}
+
+	/* Fix up skb to look at the embedded net header. */
+	saveip = skb->nh.raw;
+	savesctp  = skb->h.raw;
+	skb->nh.iph = iph;
+	skb->h.raw = (char *)sh;
+	sk = sctp_err_lookup(AF_INET, skb, sh, &ep, &asoc, &transport);
+	/* Put back, the original pointers. */
+	skb->nh.raw = saveip;
+	skb->h.raw = savesctp;
+	if (!sk) {
+		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+		return;
+	}
+	/* Warning:  The sock lock is held.  Remember to call
+	 * sctp_err_finish!
+	 */
+
+	switch (type) {
+	case ICMP_PARAMETERPROB:
+		err = EPROTO;
+		break;
+	case ICMP_DEST_UNREACH:
+		if (code > NR_ICMP_UNREACH)
+			goto out_unlock;
+
+		/* PMTU discovery (RFC1191) */
+		if (ICMP_FRAG_NEEDED == code) {
+			sctp_icmp_frag_needed(sk, asoc, transport, info);
+			goto out_unlock;
+		}
+		else {
+			if (ICMP_PROT_UNREACH == code) {
+				sctp_icmp_proto_unreachable(sk, ep, asoc,
+							    transport);
+				goto out_unlock;
+			}
+		}
+		err = icmp_err_convert[code].errno;
+		break;
+	case ICMP_TIME_EXCEEDED:
+		/* Ignore any time exceeded errors due to fragment reassembly
+		 * timeouts.
+		 */
+		if (ICMP_EXC_FRAGTIME == code)
+			goto out_unlock;
+
+		err = EHOSTUNREACH;
+		break;
+	default:
+		goto out_unlock;
+	}
+
+	inet = inet_sk(sk);
+	if (!sock_owned_by_user(sk) && inet->recverr) {
+		sk->sk_err = err;
+		sk->sk_error_report(sk);
+	} else {  /* Only an error on timeout */
+		sk->sk_err_soft = err;
+	}
+
+out_unlock:
+	sctp_err_finish(sk, ep, asoc);
+}
+
+/*
+ * RFC 2960, 8.4 - Handle "Out of the blue" Packets.
+ *
+ * This function scans all the chunks in the OOTB packet to determine if
+ * the packet should be discarded right away.  If a response might be needed
+ * for this packet, or, if further processing is possible, the packet will
+ * be queued to a proper inqueue for the next phase of handling.
+ *
+ * Output:
+ * Return 0 - If further processing is needed.
+ * Return 1 - If the packet can be discarded right away.
+ */
+int sctp_rcv_ootb(struct sk_buff *skb)
+{
+	sctp_chunkhdr_t *ch;
+	__u8 *ch_end;
+	sctp_errhdr_t *err;
+
+	ch = (sctp_chunkhdr_t *) skb->data;
+	ch_end = ((__u8 *) ch) + WORD_ROUND(ntohs(ch->length));
+
+	/* Scan through all the chunks in the packet.  */
+	while (ch_end > (__u8 *)ch && ch_end < skb->tail) {
+
+		/* RFC 8.4, 2) If the OOTB packet contains an ABORT chunk, the
+		 * receiver MUST silently discard the OOTB packet and take no
+		 * further action.
+		 */
+		if (SCTP_CID_ABORT == ch->type)
+			goto discard;
+
+		/* RFC 8.4, 6) If the packet contains a SHUTDOWN COMPLETE
+		 * chunk, the receiver should silently discard the packet
+		 * and take no further action.
+		 */
+		if (SCTP_CID_SHUTDOWN_COMPLETE == ch->type)
+			goto discard;
+
+		/* RFC 8.4, 7) If the packet contains a "Stale cookie" ERROR
+		 * or a COOKIE ACK the SCTP Packet should be silently
+		 * discarded.
+		 */
+		if (SCTP_CID_COOKIE_ACK == ch->type)
+			goto discard;
+
+		if (SCTP_CID_ERROR == ch->type) {
+			sctp_walk_errors(err, ch) {
+				if (SCTP_ERROR_STALE_COOKIE == err->cause)
+					goto discard;
+			}
+		}
+
+		ch = (sctp_chunkhdr_t *) ch_end;
+	        ch_end = ((__u8 *) ch) + WORD_ROUND(ntohs(ch->length));
+	}
+
+	return 0;
+
+discard:
+	return 1;
+}
+
+/* Insert endpoint into the hash table.  */
+static void __sctp_hash_endpoint(struct sctp_endpoint *ep)
+{
+	struct sctp_ep_common **epp;
+	struct sctp_ep_common *epb;
+	struct sctp_hashbucket *head;
+
+	epb = &ep->base;
+
+	epb->hashent = sctp_ep_hashfn(epb->bind_addr.port);
+	head = &sctp_ep_hashtable[epb->hashent];
+
+	sctp_write_lock(&head->lock);
+	epp = &head->chain;
+	epb->next = *epp;
+	if (epb->next)
+		(*epp)->pprev = &epb->next;
+	*epp = epb;
+	epb->pprev = epp;
+	sctp_write_unlock(&head->lock);
+}
+
+/* Add an endpoint to the hash. Local BH-safe. */
+void sctp_hash_endpoint(struct sctp_endpoint *ep)
+{
+	sctp_local_bh_disable();
+	__sctp_hash_endpoint(ep);
+	sctp_local_bh_enable();
+}
+
+/* Remove endpoint from the hash table.  */
+static void __sctp_unhash_endpoint(struct sctp_endpoint *ep)
+{
+	struct sctp_hashbucket *head;
+	struct sctp_ep_common *epb;
+
+	epb = &ep->base;
+
+	epb->hashent = sctp_ep_hashfn(epb->bind_addr.port);
+
+	head = &sctp_ep_hashtable[epb->hashent];
+
+	sctp_write_lock(&head->lock);
+
+	if (epb->pprev) {
+		if (epb->next)
+			epb->next->pprev = epb->pprev;
+		*epb->pprev = epb->next;
+		epb->pprev = NULL;
+	}
+
+	sctp_write_unlock(&head->lock);
+}
+
+/* Remove endpoint from the hash.  Local BH-safe. */
+void sctp_unhash_endpoint(struct sctp_endpoint *ep)
+{
+	sctp_local_bh_disable();
+	__sctp_unhash_endpoint(ep);
+	sctp_local_bh_enable();
+}
+
+/* Look up an endpoint. */
+static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(const union sctp_addr *laddr)
+{
+	struct sctp_hashbucket *head;
+	struct sctp_ep_common *epb;
+	struct sctp_endpoint *ep;
+	int hash;
+
+	hash = sctp_ep_hashfn(laddr->v4.sin_port);
+	head = &sctp_ep_hashtable[hash];
+	read_lock(&head->lock);
+	for (epb = head->chain; epb; epb = epb->next) {
+		ep = sctp_ep(epb);
+		if (sctp_endpoint_is_match(ep, laddr))
+			goto hit;
+	}
+
+	ep = sctp_sk((sctp_get_ctl_sock()))->ep;
+	epb = &ep->base;
+
+hit:
+	sctp_endpoint_hold(ep);
+	sock_hold(epb->sk);
+	read_unlock(&head->lock);
+	return ep;
+}
+
+/* Insert association into the hash table.  */
+static void __sctp_hash_established(struct sctp_association *asoc)
+{
+	struct sctp_ep_common **epp;
+	struct sctp_ep_common *epb;
+	struct sctp_hashbucket *head;
+
+	epb = &asoc->base;
+
+	/* Calculate which chain this entry will belong to. */
+	epb->hashent = sctp_assoc_hashfn(epb->bind_addr.port, asoc->peer.port);
+
+	head = &sctp_assoc_hashtable[epb->hashent];
+
+	sctp_write_lock(&head->lock);
+	epp = &head->chain;
+	epb->next = *epp;
+	if (epb->next)
+		(*epp)->pprev = &epb->next;
+	*epp = epb;
+	epb->pprev = epp;
+	sctp_write_unlock(&head->lock);
+}
+
+/* Add an association to the hash. Local BH-safe. */
+void sctp_hash_established(struct sctp_association *asoc)
+{
+	sctp_local_bh_disable();
+	__sctp_hash_established(asoc);
+	sctp_local_bh_enable();
+}
+
+/* Remove association from the hash table.  */
+static void __sctp_unhash_established(struct sctp_association *asoc)
+{
+	struct sctp_hashbucket *head;
+	struct sctp_ep_common *epb;
+
+	epb = &asoc->base;
+
+	epb->hashent = sctp_assoc_hashfn(epb->bind_addr.port,
+					 asoc->peer.port);
+
+	head = &sctp_assoc_hashtable[epb->hashent];
+
+	sctp_write_lock(&head->lock);
+
+	if (epb->pprev) {
+		if (epb->next)
+			epb->next->pprev = epb->pprev;
+		*epb->pprev = epb->next;
+		epb->pprev = NULL;
+	}
+
+	sctp_write_unlock(&head->lock);
+}
+
+/* Remove association from the hash table.  Local BH-safe. */
+void sctp_unhash_established(struct sctp_association *asoc)
+{
+	sctp_local_bh_disable();
+	__sctp_unhash_established(asoc);
+	sctp_local_bh_enable();
+}
+
+/* Look up an association. */
+static struct sctp_association *__sctp_lookup_association(
+					const union sctp_addr *local,
+					const union sctp_addr *peer,
+					struct sctp_transport **pt)
+{
+	struct sctp_hashbucket *head;
+	struct sctp_ep_common *epb;
+	struct sctp_association *asoc;
+	struct sctp_transport *transport;
+	int hash;
+
+	/* Optimize here for direct hit, only listening connections can
+	 * have wildcards anyways.
+	 */
+	hash = sctp_assoc_hashfn(local->v4.sin_port, peer->v4.sin_port);
+	head = &sctp_assoc_hashtable[hash];
+	read_lock(&head->lock);
+	for (epb = head->chain; epb; epb = epb->next) {
+		asoc = sctp_assoc(epb);
+		transport = sctp_assoc_is_match(asoc, local, peer);
+		if (transport)
+			goto hit;
+	}
+
+	read_unlock(&head->lock);
+
+	return NULL;
+
+hit:
+	*pt = transport;
+	sctp_association_hold(asoc);
+	sock_hold(epb->sk);
+	read_unlock(&head->lock);
+	return asoc;
+}
+
+/* Look up an association. BH-safe. */
+SCTP_STATIC
+struct sctp_association *sctp_lookup_association(const union sctp_addr *laddr,
+						 const union sctp_addr *paddr,
+					    struct sctp_transport **transportp)
+{
+	struct sctp_association *asoc;
+
+	sctp_local_bh_disable();
+	asoc = __sctp_lookup_association(laddr, paddr, transportp);
+	sctp_local_bh_enable();
+
+	return asoc;
+}
+
+/* Is there an association matching the given local and peer addresses? */
+int sctp_has_association(const union sctp_addr *laddr,
+			 const union sctp_addr *paddr)
+{
+	struct sctp_association *asoc;
+	struct sctp_transport *transport;
+
+	if ((asoc = sctp_lookup_association(laddr, paddr, &transport))) {
+		sock_put(asoc->base.sk);
+		sctp_association_put(asoc);
+		return 1;
+	}
+
+	return 0;
+}
+
+/*
+ * SCTP Implementors Guide, 2.18 Handling of address
+ * parameters within the INIT or INIT-ACK.
+ *
+ * D) When searching for a matching TCB upon reception of an INIT
+ *    or INIT-ACK chunk the receiver SHOULD use not only the
+ *    source address of the packet (containing the INIT or
+ *    INIT-ACK) but the receiver SHOULD also use all valid
+ *    address parameters contained within the chunk.
+ *
+ * 2.18.3 Solution description
+ *
+ * This new text clearly specifies to an implementor the need
+ * to look within the INIT or INIT-ACK. Any implementation that
+ * does not do this, may not be able to establish associations
+ * in certain circumstances.
+ *
+ */
+static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb,
+	const union sctp_addr *laddr, struct sctp_transport **transportp)
+{
+	struct sctp_association *asoc;
+	union sctp_addr addr;
+	union sctp_addr *paddr = &addr;
+	struct sctphdr *sh = (struct sctphdr *) skb->h.raw;
+	sctp_chunkhdr_t *ch;
+	union sctp_params params;
+	sctp_init_chunk_t *init;
+	struct sctp_transport *transport;
+	struct sctp_af *af;
+
+	ch = (sctp_chunkhdr_t *) skb->data;
+
+	/* If this is INIT/INIT-ACK look inside the chunk too. */
+	switch (ch->type) {
+	case SCTP_CID_INIT:
+	case SCTP_CID_INIT_ACK:
+		break;
+	default:
+		return NULL;
+	}
+
+	/* The code below will attempt to walk the chunk and extract
+	 * parameter information.  Before we do that, we need to verify
+	 * that the chunk length doesn't cause overflow.  Otherwise, we'll
+	 * walk off the end.
+	 */
+	if (WORD_ROUND(ntohs(ch->length)) > skb->len)
+		return NULL;
+
+	/*
+	 * This code will NOT touch anything inside the chunk--it is
+	 * strictly READ-ONLY.
+	 *
+	 * RFC 2960 3  SCTP packet Format
+	 *
+	 * Multiple chunks can be bundled into one SCTP packet up to
+	 * the MTU size, except for the INIT, INIT ACK, and SHUTDOWN
+	 * COMPLETE chunks.  These chunks MUST NOT be bundled with any
+	 * other chunk in a packet.  See Section 6.10 for more details
+	 * on chunk bundling.
+	 */
+
+	/* Find the start of the TLVs and the end of the chunk.  This is
+	 * the region we search for address parameters.
+	 */
+	init = (sctp_init_chunk_t *)skb->data;
+
+	/* Walk the parameters looking for embedded addresses. */
+	sctp_walk_params(params, init, init_hdr.params) {
+
+		/* Note: Ignoring hostname addresses. */
+		af = sctp_get_af_specific(param_type2af(params.p->type));
+		if (!af)
+			continue;
+
+		af->from_addr_param(paddr, params.addr, ntohs(sh->source), 0);
+
+		asoc = __sctp_lookup_association(laddr, paddr, &transport);
+		if (asoc)
+			return asoc;
+	}
+
+	return NULL;
+}
+
+/* Lookup an association for an inbound skb. */
+static struct sctp_association *__sctp_rcv_lookup(struct sk_buff *skb,
+				      const union sctp_addr *paddr,
+				      const union sctp_addr *laddr,
+				      struct sctp_transport **transportp)
+{
+	struct sctp_association *asoc;
+
+	asoc = __sctp_lookup_association(laddr, paddr, transportp);
+
+	/* Further lookup for INIT/INIT-ACK packets.
+	 * SCTP Implementors Guide, 2.18 Handling of address
+	 * parameters within the INIT or INIT-ACK.
+	 */
+	if (!asoc)
+		asoc = __sctp_rcv_init_lookup(skb, laddr, transportp);
+
+	return asoc;
+}
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
new file mode 100644
index 00000000000..cedf4351556
--- /dev/null
+++ b/net/sctp/inqueue.c
@@ -0,0 +1,204 @@
+/* SCTP kernel reference Implementation
+ * Copyright (c) 1999-2000 Cisco, Inc.
+ * Copyright (c) 1999-2001 Motorola, Inc.
+ * Copyright (c) 2002 International Business Machines, Corp.
+ * 
+ * This file is part of the SCTP kernel reference Implementation
+ * 
+ * These functions are the methods for accessing the SCTP inqueue.
+ *
+ * An SCTP inqueue is a queue into which you push SCTP packets
+ * (which might be bundles or fragments of chunks) and out of which you
+ * pop SCTP whole chunks.
+ * 
+ * The SCTP reference implementation is free software; 
+ * you can redistribute it and/or modify it under the terms of 
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ * 
+ * The SCTP reference implementation is distributed in the hope that it 
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *                 ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.  
+ * 
+ * Please send any bug reports or fixes you make to the
+ * email address(es):
+ *    lksctp developers <lksctp-developers@lists.sourceforge.net>
+ * 
+ * Or submit a bug report through the following website:
+ *    http://www.sf.net/projects/lksctp
+ *
+ * Written or modified by: 
+ *    La Monte H.P. Yarroll <piggy@acm.org>
+ *    Karl Knutson <karl@athena.chicago.il.us>
+ * 
+ * Any bugs reported given to us we will try to fix... any fixes shared will
+ * be incorporated into the next SCTP release.
+ */
+
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+#include <linux/interrupt.h>
+
+/* Initialize an SCTP inqueue.  */
+void sctp_inq_init(struct sctp_inq *queue)
+{
+	skb_queue_head_init(&queue->in);
+	queue->in_progress = NULL;
+
+	/* Create a task for delivering data.  */
+	INIT_WORK(&queue->immediate, NULL, NULL);
+
+	queue->malloced = 0;
+}
+
+/* Release the memory associated with an SCTP inqueue.  */
+void sctp_inq_free(struct sctp_inq *queue)
+{
+	struct sctp_chunk *chunk;
+
+	/* Empty the queue.  */
+	while ((chunk = (struct sctp_chunk *) skb_dequeue(&queue->in)) != NULL)
+		sctp_chunk_free(chunk);
+
+	/* If there is a packet which is currently being worked on,
+	 * free it as well.
+	 */
+	if (queue->in_progress)
+		sctp_chunk_free(queue->in_progress);
+
+	if (queue->malloced) {
+		/* Dump the master memory segment.  */
+		kfree(queue);
+	}
+}
+
+/* Put a new packet in an SCTP inqueue.
+ * We assume that packet->sctp_hdr is set and in host byte order.
+ */
+void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *packet)
+{
+	/* Directly call the packet handling routine. */
+
+	/* We are now calling this either from the soft interrupt
+	 * or from the backlog processing.
+	 * Eventually, we should clean up inqueue to not rely
+	 * on the BH related data structures.
+	 */
+	skb_queue_tail(&(q->in), (struct sk_buff *) packet);
+	q->immediate.func(q->immediate.data);
+}
+
+/* Extract a chunk from an SCTP inqueue.
+ *
+ * WARNING:  If you need to put the chunk on another queue, you need to
+ * make a shallow copy (clone) of it.
+ */
+struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
+{
+	struct sctp_chunk *chunk;
+	sctp_chunkhdr_t *ch = NULL;
+
+	/* The assumption is that we are safe to process the chunks
+	 * at this time.
+	 */
+
+	if ((chunk = queue->in_progress)) {
+		/* There is a packet that we have been working on.
+		 * Any post processing work to do before we move on?
+		 */
+		if (chunk->singleton ||
+		    chunk->end_of_packet ||
+		    chunk->pdiscard) {
+			sctp_chunk_free(chunk);
+			chunk = queue->in_progress = NULL;
+		} else {
+			/* Nothing to do. Next chunk in the packet, please. */
+			ch = (sctp_chunkhdr_t *) chunk->chunk_end;
+
+			/* Force chunk->skb->data to chunk->chunk_end.  */
+			skb_pull(chunk->skb,
+				 chunk->chunk_end - chunk->skb->data);
+		}
+	}
+
+	/* Do we need to take the next packet out of the queue to process? */
+	if (!chunk) {
+		/* Is the queue empty?  */
+        	if (skb_queue_empty(&queue->in))
+			return NULL;
+
+		chunk = queue->in_progress =
+			(struct sctp_chunk *) skb_dequeue(&queue->in);
+
+		/* This is the first chunk in the packet.  */
+		chunk->singleton = 1;
+		ch = (sctp_chunkhdr_t *) chunk->skb->data;
+	}
+
+        chunk->chunk_hdr = ch;
+        chunk->chunk_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
+	/* In the unlikely case of an IP reassembly, the skb could be
+	 * non-linear. If so, update chunk_end so that it doesn't go past
+	 * the skb->tail.
+	 */
+	if (unlikely(skb_is_nonlinear(chunk->skb))) {
+		if (chunk->chunk_end > chunk->skb->tail)
+			chunk->chunk_end = chunk->skb->tail;
+	}
+	skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t));
+	chunk->subh.v = NULL; /* Subheader is no longer valid.  */
+
+	if (chunk->chunk_end < chunk->skb->tail) {
+		/* This is not a singleton */
+		chunk->singleton = 0;
+	} else if (chunk->chunk_end > chunk->skb->tail) {
+                /* RFC 2960, Section 6.10  Bundling
+		 *
+		 * Partial chunks MUST NOT be placed in an SCTP packet.
+		 * If the receiver detects a partial chunk, it MUST drop
+		 * the chunk.  
+		 *
+		 * Since the end of the chunk is past the end of our buffer
+		 * (which contains the whole packet, we can freely discard
+		 * the whole packet.
+		 */
+		sctp_chunk_free(chunk);
+		chunk = queue->in_progress = NULL;
+
+		return NULL;
+	} else {
+		/* We are at the end of the packet, so mark the chunk
+		 * in case we need to send a SACK.
+		 */
+		chunk->end_of_packet = 1;
+	}
+
+	SCTP_DEBUG_PRINTK("+++sctp_inq_pop+++ chunk %p[%s],"
+			  " length %d, skb->len %d\n",chunk,
+			  sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)),
+			  ntohs(chunk->chunk_hdr->length), chunk->skb->len);
+	return chunk;
+}
+
+/* Set a top-half handler.
+ *
+ * Originally, we the top-half handler was scheduled as a BH.  We now
+ * call the handler directly in sctp_inq_push() at a time that
+ * we know we are lock safe.
+ * The intent is that this routine will pull stuff out of the
+ * inqueue and process it.
+ */
+void sctp_inq_set_th_handler(struct sctp_inq *q,
+				 void (*callback)(void *), void *arg)
+{
+	INIT_WORK(&q->immediate, callback, arg);
+}
+
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
new file mode 100644
index 00000000000..e42c74e3ec1
--- /dev/null
+++ b/net/sctp/ipv6.c
@@ -0,0 +1,1013 @@
+/* SCTP kernel reference Implementation
+ * (C) Copyright IBM Corp. 2002, 2004
+ * Copyright (c) 2001 Nokia, Inc.
+ * Copyright (c) 2001 La Monte H.P. Yarroll
+ * Copyright (c) 2002-2003 Intel Corp.
+ *
+ * This file is part of the SCTP kernel reference Implementation
+ *
+ * SCTP over IPv6.
+ *
+ * The SCTP reference implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * The SCTP reference implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *		   ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email address(es):
+ *    lksctp developers <lksctp-developers@lists.sourceforge.net>
+ *
+ * Or submit a bug report through the following website:
+ *    http://www.sf.net/projects/lksctp
+ *
+ * Written or modified by:
+ *    Le Yanqun		    <yanqun.le@nokia.com>
+ *    Hui Huang		    <hui.huang@nokia.com>
+ *    La Monte H.P. Yarroll <piggy@acm.org>
+ *    Sridhar Samudrala	    <sri@us.ibm.com>
+ *    Jon Grimm		    <jgrimm@us.ibm.com>
+ *    Ardelle Fan	    <ardelle.fan@intel.com>
+ *
+ * Based on:
+ *	linux/net/ipv6/tcp_ipv6.c
+ *
+ * Any bugs reported given to us we will try to fix... any fixes shared will
+ * be incorporated into the next SCTP release.
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/sched.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/init.h>
+#include <linux/ipsec.h>
+
+#include <linux/ipv6.h>
+#include <linux/icmpv6.h>
+#include <linux/random.h>
+#include <linux/seq_file.h>
+
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <net/ndisc.h>
+#include <net/ipv6.h>
+#include <net/transp_v6.h>
+#include <net/addrconf.h>
+#include <net/ip6_route.h>
+#include <net/inet_common.h>
+#include <net/inet_ecn.h>
+#include <net/sctp/sctp.h>
+
+#include <asm/uaccess.h>
+
+extern int sctp_inetaddr_event(struct notifier_block *, unsigned long, void *);
+static struct notifier_block sctp_inet6addr_notifier = {
+	.notifier_call = sctp_inetaddr_event,
+};
+
+/* ICMP error handler. */
+SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+			     int type, int code, int offset, __u32 info)
+{
+	struct inet6_dev *idev;
+	struct ipv6hdr *iph = (struct ipv6hdr *)skb->data;
+	struct sctphdr *sh = (struct sctphdr *)(skb->data + offset);
+	struct sock *sk;
+	struct sctp_endpoint *ep;
+	struct sctp_association *asoc;
+	struct sctp_transport *transport;
+	struct ipv6_pinfo *np;
+	char *saveip, *savesctp;
+	int err;
+
+	idev = in6_dev_get(skb->dev);
+
+	/* Fix up skb to look at the embedded net header. */
+	saveip = skb->nh.raw;
+	savesctp  = skb->h.raw;
+	skb->nh.ipv6h = iph;
+	skb->h.raw = (char *)sh;
+	sk = sctp_err_lookup(AF_INET6, skb, sh, &ep, &asoc, &transport);
+	/* Put back, the original pointers. */
+	skb->nh.raw = saveip;
+	skb->h.raw = savesctp;
+	if (!sk) {
+		ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS);
+		goto out;
+	}
+
+	/* Warning:  The sock lock is held.  Remember to call
+	 * sctp_err_finish!
+	 */
+
+	switch (type) {
+	case ICMPV6_PKT_TOOBIG:
+		sctp_icmp_frag_needed(sk, asoc, transport, ntohl(info));
+		goto out_unlock;
+	case ICMPV6_PARAMPROB:
+		if (ICMPV6_UNK_NEXTHDR == code) {
+			sctp_icmp_proto_unreachable(sk, ep, asoc, transport);
+			goto out_unlock;
+		}
+		break;
+	default:
+		break;
+	}
+
+	np = inet6_sk(sk);
+	icmpv6_err_convert(type, code, &err);
+	if (!sock_owned_by_user(sk) && np->recverr) {
+		sk->sk_err = err;
+		sk->sk_error_report(sk);
+	} else {  /* Only an error on timeout */
+		sk->sk_err_soft = err;
+	}
+
+out_unlock:
+	sctp_err_finish(sk, ep, asoc);
+out:
+	if (likely(idev != NULL))
+		in6_dev_put(idev);
+}
+
+/* Based on tcp_v6_xmit() in tcp_ipv6.c. */
+static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport,
+			int ipfragok)
+{
+	struct sock *sk = skb->sk;
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct flowi fl;
+
+	memset(&fl, 0, sizeof(fl));
+
+	fl.proto = sk->sk_protocol;
+
+	/* Fill in the dest address from the route entry passed with the skb
+	 * and the source address from the transport.
+	 */
+	ipv6_addr_copy(&fl.fl6_dst, &transport->ipaddr.v6.sin6_addr);
+	ipv6_addr_copy(&fl.fl6_src, &transport->saddr.v6.sin6_addr);
+
+	fl.fl6_flowlabel = np->flow_label;
+	IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
+	if (ipv6_addr_type(&fl.fl6_src) & IPV6_ADDR_LINKLOCAL)
+		fl.oif = transport->saddr.v6.sin6_scope_id;
+	else
+		fl.oif = sk->sk_bound_dev_if;
+	fl.fl_ip_sport = inet_sk(sk)->sport;
+	fl.fl_ip_dport = transport->ipaddr.v6.sin6_port;
+
+	if (np->opt && np->opt->srcrt) {
+		struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
+		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+	}
+
+	SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, "
+			  "src:%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x "
+			  "dst:%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+			  __FUNCTION__, skb, skb->len,
+			  NIP6(fl.fl6_src), NIP6(fl.fl6_dst));
+
+	SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS);
+
+	return ip6_xmit(sk, skb, &fl, np->opt, ipfragok);
+}
+
+/* Returns the dst cache entry for the given source and destination ip
+ * addresses.
+ */
+static struct dst_entry *sctp_v6_get_dst(struct sctp_association *asoc,
+					 union sctp_addr *daddr,
+					 union sctp_addr *saddr)
+{
+	struct dst_entry *dst;
+	struct flowi fl;
+
+	memset(&fl, 0, sizeof(fl));
+	ipv6_addr_copy(&fl.fl6_dst, &daddr->v6.sin6_addr);
+	if (ipv6_addr_type(&daddr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
+		fl.oif = daddr->v6.sin6_scope_id;
+	
+
+	SCTP_DEBUG_PRINTK("%s: DST=%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x ",
+			  __FUNCTION__, NIP6(fl.fl6_dst));
+
+	if (saddr) {
+		ipv6_addr_copy(&fl.fl6_src, &saddr->v6.sin6_addr);
+		SCTP_DEBUG_PRINTK(
+			"SRC=%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x - ",
+			NIP6(fl.fl6_src));
+	}
+
+	dst = ip6_route_output(NULL, &fl);
+	if (dst) {
+		struct rt6_info *rt;
+		rt = (struct rt6_info *)dst;
+		SCTP_DEBUG_PRINTK(
+			"rt6_dst:%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x "
+			"rt6_src:%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+			NIP6(rt->rt6i_dst.addr), NIP6(rt->rt6i_src.addr));
+	} else {
+		SCTP_DEBUG_PRINTK("NO ROUTE\n");
+	}
+
+	return dst;
+}
+
+/* Returns the number of consecutive initial bits that match in the 2 ipv6
+ * addresses.
+ */
+static inline int sctp_v6_addr_match_len(union sctp_addr *s1,
+					 union sctp_addr *s2)
+{
+	struct in6_addr *a1 = &s1->v6.sin6_addr;
+	struct in6_addr *a2 = &s2->v6.sin6_addr;
+	int i, j;
+
+	for (i = 0; i < 4 ; i++) {
+		__u32 a1xora2;
+
+		a1xora2 = a1->s6_addr32[i] ^ a2->s6_addr32[i];
+
+		if ((j = fls(ntohl(a1xora2))))
+			return (i * 32 + 32 - j);
+	}
+
+	return (i*32);
+}
+
+/* Fills in the source address(saddr) based on the destination address(daddr)
+ * and asoc's bind address list.
+ */
+static void sctp_v6_get_saddr(struct sctp_association *asoc,
+			      struct dst_entry *dst,
+			      union sctp_addr *daddr,
+			      union sctp_addr *saddr)
+{
+	struct sctp_bind_addr *bp;
+	rwlock_t *addr_lock;
+	struct sctp_sockaddr_entry *laddr;
+	struct list_head *pos;
+	sctp_scope_t scope;
+	union sctp_addr *baddr = NULL;
+	__u8 matchlen = 0;
+	__u8 bmatchlen;
+
+	SCTP_DEBUG_PRINTK("%s: asoc:%p dst:%p "
+			  "daddr:%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x ",
+			  __FUNCTION__, asoc, dst, NIP6(daddr->v6.sin6_addr));
+
+	if (!asoc) {
+		ipv6_get_saddr(dst, &daddr->v6.sin6_addr,&saddr->v6.sin6_addr);
+		SCTP_DEBUG_PRINTK("saddr from ipv6_get_saddr: "
+				  "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+				  NIP6(saddr->v6.sin6_addr));
+		return;
+	}
+
+	scope = sctp_scope(daddr);
+
+	bp = &asoc->base.bind_addr;
+	addr_lock = &asoc->base.addr_lock;
+
+	/* Go through the bind address list and find the best source address
+	 * that matches the scope of the destination address.
+	 */
+	sctp_read_lock(addr_lock);
+	list_for_each(pos, &bp->address_list) {
+		laddr = list_entry(pos, struct sctp_sockaddr_entry, list);
+		if ((laddr->a.sa.sa_family == AF_INET6) &&
+		    (scope <= sctp_scope(&laddr->a))) {
+			bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a);
+			if (!baddr || (matchlen < bmatchlen)) {
+				baddr = &laddr->a;
+				matchlen = bmatchlen;
+			}
+		}
+	}
+
+	if (baddr) {
+		memcpy(saddr, baddr, sizeof(union sctp_addr));
+		SCTP_DEBUG_PRINTK("saddr: "
+				  "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+				  NIP6(saddr->v6.sin6_addr));
+	} else {
+		printk(KERN_ERR "%s: asoc:%p Could not find a valid source "
+		       "address for the "
+		       "dest:%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+		       __FUNCTION__, asoc, NIP6(daddr->v6.sin6_addr));
+	}
+
+	sctp_read_unlock(addr_lock);
+}
+
+/* Make a copy of all potential local addresses. */
+static void sctp_v6_copy_addrlist(struct list_head *addrlist,
+				  struct net_device *dev)
+{
+	struct inet6_dev *in6_dev;
+	struct inet6_ifaddr *ifp;
+	struct sctp_sockaddr_entry *addr;
+
+	read_lock(&addrconf_lock);
+	if ((in6_dev = __in6_dev_get(dev)) == NULL) {
+		read_unlock(&addrconf_lock);
+		return;
+	}
+
+	read_lock(&in6_dev->lock);
+	for (ifp = in6_dev->addr_list; ifp; ifp = ifp->if_next) {
+		/* Add the address to the local list.  */
+		addr = t_new(struct sctp_sockaddr_entry, GFP_ATOMIC);
+		if (addr) {
+			addr->a.v6.sin6_family = AF_INET6;
+			addr->a.v6.sin6_port = 0;
+			addr->a.v6.sin6_addr = ifp->addr;
+			addr->a.v6.sin6_scope_id = dev->ifindex;
+			INIT_LIST_HEAD(&addr->list);
+			list_add_tail(&addr->list, addrlist);
+		}
+	}
+
+	read_unlock(&in6_dev->lock);
+	read_unlock(&addrconf_lock);
+}
+
+/* Initialize a sockaddr_storage from in incoming skb. */
+static void sctp_v6_from_skb(union sctp_addr *addr,struct sk_buff *skb,
+			     int is_saddr)
+{
+	void *from;
+	__u16 *port;
+	struct sctphdr *sh;
+
+	port = &addr->v6.sin6_port;
+	addr->v6.sin6_family = AF_INET6;
+	addr->v6.sin6_flowinfo = 0; /* FIXME */
+	addr->v6.sin6_scope_id = ((struct inet6_skb_parm *)skb->cb)->iif;
+
+	sh = (struct sctphdr *) skb->h.raw;
+	if (is_saddr) {
+		*port  = ntohs(sh->source);
+		from = &skb->nh.ipv6h->saddr;
+	} else {
+		*port = ntohs(sh->dest);
+		from = &skb->nh.ipv6h->daddr;
+	}
+	ipv6_addr_copy(&addr->v6.sin6_addr, from);
+}
+
+/* Initialize an sctp_addr from a socket. */
+static void sctp_v6_from_sk(union sctp_addr *addr, struct sock *sk)
+{
+	addr->v6.sin6_family = AF_INET6;
+	addr->v6.sin6_port = inet_sk(sk)->num;
+	addr->v6.sin6_addr = inet6_sk(sk)->rcv_saddr;
+}
+
+/* Initialize sk->sk_rcv_saddr from sctp_addr. */
+static void sctp_v6_to_sk_saddr(union sctp_addr *addr, struct sock *sk)
+{
+	if (addr->sa.sa_family == AF_INET && sctp_sk(sk)->v4mapped) {
+		inet6_sk(sk)->rcv_saddr.s6_addr32[0] = 0;
+		inet6_sk(sk)->rcv_saddr.s6_addr32[1] = 0;
+		inet6_sk(sk)->rcv_saddr.s6_addr32[2] = htonl(0x0000ffff);
+		inet6_sk(sk)->rcv_saddr.s6_addr32[3] =
+			addr->v4.sin_addr.s_addr;
+	} else {
+		inet6_sk(sk)->rcv_saddr = addr->v6.sin6_addr;
+	}
+}
+
+/* Initialize sk->sk_daddr from sctp_addr. */
+static void sctp_v6_to_sk_daddr(union sctp_addr *addr, struct sock *sk)
+{
+	if (addr->sa.sa_family == AF_INET && sctp_sk(sk)->v4mapped) {
+		inet6_sk(sk)->daddr.s6_addr32[0] = 0;
+		inet6_sk(sk)->daddr.s6_addr32[1] = 0;
+		inet6_sk(sk)->daddr.s6_addr32[2] = htonl(0x0000ffff);
+		inet6_sk(sk)->daddr.s6_addr32[3] = addr->v4.sin_addr.s_addr;
+	} else {
+		inet6_sk(sk)->daddr = addr->v6.sin6_addr;
+	}
+}
+
+/* Initialize a sctp_addr from an address parameter. */
+static void sctp_v6_from_addr_param(union sctp_addr *addr,
+				    union sctp_addr_param *param,
+				    __u16 port, int iif)
+{
+	addr->v6.sin6_family = AF_INET6;
+	addr->v6.sin6_port = port;
+	addr->v6.sin6_flowinfo = 0; /* BUG */
+	ipv6_addr_copy(&addr->v6.sin6_addr, &param->v6.addr);
+	addr->v6.sin6_scope_id = iif;
+}
+
+/* Initialize an address parameter from a sctp_addr and return the length
+ * of the address parameter.
+ */
+static int sctp_v6_to_addr_param(const union sctp_addr *addr,
+				 union sctp_addr_param *param)
+{
+	int length = sizeof(sctp_ipv6addr_param_t);
+
+	param->v6.param_hdr.type = SCTP_PARAM_IPV6_ADDRESS;
+	param->v6.param_hdr.length = ntohs(length);
+	ipv6_addr_copy(&param->v6.addr, &addr->v6.sin6_addr);
+
+	return length;
+}
+
+/* Initialize a sctp_addr from a dst_entry. */
+static void sctp_v6_dst_saddr(union sctp_addr *addr, struct dst_entry *dst,
+			      unsigned short port)
+{
+	struct rt6_info *rt = (struct rt6_info *)dst;
+	addr->sa.sa_family = AF_INET6;
+	addr->v6.sin6_port = port;
+	ipv6_addr_copy(&addr->v6.sin6_addr, &rt->rt6i_src.addr);
+}
+
+/* Compare addresses exactly.
+ * v4-mapped-v6 is also in consideration.
+ */
+static int sctp_v6_cmp_addr(const union sctp_addr *addr1,
+			    const union sctp_addr *addr2)
+{
+	if (addr1->sa.sa_family != addr2->sa.sa_family) {
+		if (addr1->sa.sa_family == AF_INET &&
+		    addr2->sa.sa_family == AF_INET6 &&
+		    IPV6_ADDR_MAPPED == ipv6_addr_type(&addr2->v6.sin6_addr)) {
+			if (addr2->v6.sin6_port == addr1->v4.sin_port &&
+			    addr2->v6.sin6_addr.s6_addr32[3] ==
+			    addr1->v4.sin_addr.s_addr)
+				return 1;
+		}
+		if (addr2->sa.sa_family == AF_INET &&
+		    addr1->sa.sa_family == AF_INET6 &&
+		    IPV6_ADDR_MAPPED == ipv6_addr_type(&addr1->v6.sin6_addr)) {
+			if (addr1->v6.sin6_port == addr2->v4.sin_port &&
+			    addr1->v6.sin6_addr.s6_addr32[3] ==
+			    addr2->v4.sin_addr.s_addr)
+				return 1;
+		}
+		return 0;
+	}
+	if (!ipv6_addr_equal(&addr1->v6.sin6_addr, &addr2->v6.sin6_addr))
+		return 0;
+	/* If this is a linklocal address, compare the scope_id. */
+	if (ipv6_addr_type(&addr1->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) {
+		if (addr1->v6.sin6_scope_id && addr2->v6.sin6_scope_id &&
+		    (addr1->v6.sin6_scope_id != addr2->v6.sin6_scope_id)) {
+			return 0;
+		}
+	}
+
+	return 1;
+}
+
+/* Initialize addr struct to INADDR_ANY. */
+static void sctp_v6_inaddr_any(union sctp_addr *addr, unsigned short port)
+{
+	memset(addr, 0x00, sizeof(union sctp_addr));
+	addr->v6.sin6_family = AF_INET6;
+	addr->v6.sin6_port = port;
+}
+
+/* Is this a wildcard address? */
+static int sctp_v6_is_any(const union sctp_addr *addr)
+{
+	int type;
+	type = ipv6_addr_type((struct in6_addr *)&addr->v6.sin6_addr);
+	return IPV6_ADDR_ANY == type;
+}
+
+/* Should this be available for binding?   */
+static int sctp_v6_available(union sctp_addr *addr, struct sctp_sock *sp)
+{
+	int type;
+	struct in6_addr *in6 = (struct in6_addr *)&addr->v6.sin6_addr;
+
+	type = ipv6_addr_type(in6);
+	if (IPV6_ADDR_ANY == type)
+		return 1;
+	if (type == IPV6_ADDR_MAPPED) {
+		if (sp && !sp->v4mapped)
+			return 0;
+		if (sp && ipv6_only_sock(sctp_opt2sk(sp)))
+			return 0;
+		sctp_v6_map_v4(addr);
+		return sctp_get_af_specific(AF_INET)->available(addr, sp);
+	}
+	if (!(type & IPV6_ADDR_UNICAST))
+		return 0;
+
+	return ipv6_chk_addr(in6, NULL, 0);
+}
+
+/* This function checks if the address is a valid address to be used for
+ * SCTP.
+ *
+ * Output:
+ * Return 0 - If the address is a non-unicast or an illegal address.
+ * Return 1 - If the address is a unicast.
+ */
+static int sctp_v6_addr_valid(union sctp_addr *addr, struct sctp_sock *sp)
+{
+	int ret = ipv6_addr_type(&addr->v6.sin6_addr);
+
+	/* Support v4-mapped-v6 address. */
+	if (ret == IPV6_ADDR_MAPPED) {
+		/* Note: This routine is used in input, so v4-mapped-v6
+		 * are disallowed here when there is no sctp_sock.
+		 */
+		if (!sp || !sp->v4mapped)
+			return 0;
+		if (sp && ipv6_only_sock(sctp_opt2sk(sp)))
+			return 0;
+		sctp_v6_map_v4(addr);
+		return sctp_get_af_specific(AF_INET)->addr_valid(addr, sp);
+	}
+
+	/* Is this a non-unicast address */
+	if (!(ret & IPV6_ADDR_UNICAST))
+		return 0;
+
+	return 1;
+}
+
+/* What is the scope of 'addr'?  */
+static sctp_scope_t sctp_v6_scope(union sctp_addr *addr)
+{
+	int v6scope;
+	sctp_scope_t retval;
+
+	/* The IPv6 scope is really a set of bit fields.
+	 * See IFA_* in <net/if_inet6.h>.  Map to a generic SCTP scope.
+	 */
+
+	v6scope = ipv6_addr_scope(&addr->v6.sin6_addr);
+	switch (v6scope) {
+	case IFA_HOST:
+		retval = SCTP_SCOPE_LOOPBACK;
+		break;
+	case IFA_LINK:
+		retval = SCTP_SCOPE_LINK;
+		break;
+	case IFA_SITE:
+		retval = SCTP_SCOPE_PRIVATE;
+		break;
+	default:
+		retval = SCTP_SCOPE_GLOBAL;
+		break;
+	};
+
+	return retval;
+}
+
+/* Create and initialize a new sk for the socket to be returned by accept(). */
+static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
+					     struct sctp_association *asoc)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct sock *newsk;
+	struct inet_sock *newinet;
+	struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
+	struct sctp6_sock *newsctp6sk;
+
+	newsk = sk_alloc(PF_INET6, GFP_KERNEL, sk->sk_prot, 1);
+	if (!newsk)
+		goto out;
+
+	sock_init_data(NULL, newsk);
+
+	newsk->sk_type = SOCK_STREAM;
+
+	newsk->sk_prot = sk->sk_prot;
+	newsk->sk_no_check = sk->sk_no_check;
+	newsk->sk_reuse = sk->sk_reuse;
+
+	newsk->sk_destruct = inet_sock_destruct;
+	newsk->sk_family = PF_INET6;
+	newsk->sk_protocol = IPPROTO_SCTP;
+	newsk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
+	newsk->sk_shutdown = sk->sk_shutdown;
+	sock_reset_flag(sk, SOCK_ZAPPED);
+
+	newsctp6sk = (struct sctp6_sock *)newsk;
+	inet_sk(newsk)->pinet6 = &newsctp6sk->inet6;
+
+	newinet = inet_sk(newsk);
+	newnp = inet6_sk(newsk);
+
+	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
+
+	/* Initialize sk's sport, dport, rcv_saddr and daddr for getsockname()
+	 * and getpeername().
+	 */
+	newinet->sport = inet->sport;
+	newnp->saddr = np->saddr;
+	newnp->rcv_saddr = np->rcv_saddr;
+	newinet->dport = htons(asoc->peer.port);
+	sctp_v6_to_sk_daddr(&asoc->peer.primary_addr, newsk);
+
+	/* Init the ipv4 part of the socket since we can have sockets
+	 * using v6 API for ipv4.
+	 */
+	newinet->uc_ttl = -1;
+	newinet->mc_loop = 1;
+	newinet->mc_ttl = 1;
+	newinet->mc_index = 0;
+	newinet->mc_list = NULL;
+
+	if (ipv4_config.no_pmtu_disc)
+		newinet->pmtudisc = IP_PMTUDISC_DONT;
+	else
+		newinet->pmtudisc = IP_PMTUDISC_WANT;
+
+#ifdef INET_REFCNT_DEBUG
+	atomic_inc(&inet6_sock_nr);
+	atomic_inc(&inet_sock_nr);
+#endif
+
+	if (newsk->sk_prot->init(newsk)) {
+		sk_common_release(newsk);
+		newsk = NULL;
+	}
+
+out:
+	return newsk;
+}
+
+/* Map v4 address to mapped v6 address */
+static void sctp_v6_addr_v4map(struct sctp_sock *sp, union sctp_addr *addr)
+{
+	if (sp->v4mapped && AF_INET == addr->sa.sa_family)
+		sctp_v4_map_v6(addr);
+}
+
+/* Where did this skb come from?  */
+static int sctp_v6_skb_iif(const struct sk_buff *skb)
+{
+	struct inet6_skb_parm *opt = (struct inet6_skb_parm *) skb->cb;
+	return opt->iif;
+}
+
+/* Was this packet marked by Explicit Congestion Notification? */
+static int sctp_v6_is_ce(const struct sk_buff *skb)
+{
+	return *((__u32 *)(skb->nh.ipv6h)) & htonl(1<<20);
+}
+
+/* Dump the v6 addr to the seq file. */
+static void sctp_v6_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr)
+{
+	seq_printf(seq, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x ",
+		   NIP6(addr->v6.sin6_addr));
+}
+
+/* Initialize a PF_INET6 socket msg_name. */
+static void sctp_inet6_msgname(char *msgname, int *addr_len)
+{
+	struct sockaddr_in6 *sin6;
+
+	sin6 = (struct sockaddr_in6 *)msgname;
+	sin6->sin6_family = AF_INET6;
+	sin6->sin6_flowinfo = 0;
+	sin6->sin6_scope_id = 0; /*FIXME */
+	*addr_len = sizeof(struct sockaddr_in6);
+}
+
+/* Initialize a PF_INET msgname from a ulpevent. */
+static void sctp_inet6_event_msgname(struct sctp_ulpevent *event,
+				     char *msgname, int *addrlen)
+{
+	struct sockaddr_in6 *sin6, *sin6from;
+
+	if (msgname) {
+		union sctp_addr *addr;
+		struct sctp_association *asoc;
+
+		asoc = event->asoc;
+		sctp_inet6_msgname(msgname, addrlen);
+		sin6 = (struct sockaddr_in6 *)msgname;
+		sin6->sin6_port = htons(asoc->peer.port);
+		addr = &asoc->peer.primary_addr;
+
+		/* Note: If we go to a common v6 format, this code
+		 * will change.
+		 */
+
+		/* Map ipv4 address into v4-mapped-on-v6 address.  */
+		if (sctp_sk(asoc->base.sk)->v4mapped &&
+		    AF_INET == addr->sa.sa_family) {
+			sctp_v4_map_v6((union sctp_addr *)sin6);
+			sin6->sin6_addr.s6_addr32[3] =
+				addr->v4.sin_addr.s_addr;
+			return;
+		}
+
+		sin6from = &asoc->peer.primary_addr.v6;
+		ipv6_addr_copy(&sin6->sin6_addr, &sin6from->sin6_addr);
+		if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
+			sin6->sin6_scope_id = sin6from->sin6_scope_id;
+	}
+}
+
+/* Initialize a msg_name from an inbound skb. */
+static void sctp_inet6_skb_msgname(struct sk_buff *skb, char *msgname,
+				   int *addr_len)
+{
+	struct sctphdr *sh;
+	struct sockaddr_in6 *sin6;
+
+	if (msgname) {
+		sctp_inet6_msgname(msgname, addr_len);
+		sin6 = (struct sockaddr_in6 *)msgname;
+		sh = (struct sctphdr *)skb->h.raw;
+		sin6->sin6_port = sh->source;
+
+		/* Map ipv4 address into v4-mapped-on-v6 address. */
+		if (sctp_sk(skb->sk)->v4mapped &&
+		    skb->nh.iph->version == 4) {
+			sctp_v4_map_v6((union sctp_addr *)sin6);
+			sin6->sin6_addr.s6_addr32[3] = skb->nh.iph->saddr;
+			return;
+		}
+
+		/* Otherwise, just copy the v6 address. */
+		ipv6_addr_copy(&sin6->sin6_addr, &skb->nh.ipv6h->saddr);
+		if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) {
+			struct sctp_ulpevent *ev = sctp_skb2event(skb);
+			sin6->sin6_scope_id = ev->iif;
+		}
+	}
+}
+
+/* Do we support this AF? */
+static int sctp_inet6_af_supported(sa_family_t family, struct sctp_sock *sp)
+{
+	switch (family) {
+	case AF_INET6:
+		return 1;
+	/* v4-mapped-v6 addresses */
+	case AF_INET:
+		if (!__ipv6_only_sock(sctp_opt2sk(sp)) && sp->v4mapped)
+			return 1;
+	default:
+		return 0;
+	}
+}
+
+/* Address matching with wildcards allowed.  This extra level
+ * of indirection lets us choose whether a PF_INET6 should
+ * disallow any v4 addresses if we so choose.
+ */
+static int sctp_inet6_cmp_addr(const union sctp_addr *addr1,
+			       const union sctp_addr *addr2,
+			       struct sctp_sock *opt)
+{
+	struct sctp_af *af1, *af2;
+
+	af1 = sctp_get_af_specific(addr1->sa.sa_family);
+	af2 = sctp_get_af_specific(addr2->sa.sa_family);
+
+	if (!af1 || !af2)
+		return 0;
+	/* Today, wildcard AF_INET/AF_INET6. */
+	if (sctp_is_any(addr1) || sctp_is_any(addr2))
+		return 1;
+
+	if (addr1->sa.sa_family != addr2->sa.sa_family)
+		return 0;
+
+	return af1->cmp_addr(addr1, addr2);
+}
+
+/* Verify that the provided sockaddr looks bindable.   Common verification,
+ * has already been taken care of.
+ */
+static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr)
+{
+	struct sctp_af *af;
+
+	/* ASSERT: address family has already been verified. */
+	if (addr->sa.sa_family != AF_INET6)
+		af = sctp_get_af_specific(addr->sa.sa_family);
+	else {
+		struct sock *sk;
+		int type = ipv6_addr_type(&addr->v6.sin6_addr);
+		sk = sctp_opt2sk(opt);
+		if (type & IPV6_ADDR_LINKLOCAL) {
+			/* Note: Behavior similar to af_inet6.c:
+			 *  1) Overrides previous bound_dev_if
+			 *  2) Destructive even if bind isn't successful.
+			 */
+
+			if (addr->v6.sin6_scope_id)
+				sk->sk_bound_dev_if = addr->v6.sin6_scope_id;
+			if (!sk->sk_bound_dev_if)
+				return 0;
+		}
+		af = opt->pf->af;
+	}
+	return af->available(addr, opt);
+}
+
+/* Verify that the provided sockaddr looks bindable.   Common verification,
+ * has already been taken care of.
+ */
+static int sctp_inet6_send_verify(struct sctp_sock *opt, union sctp_addr *addr)
+{
+	struct sctp_af *af = NULL;
+
+	/* ASSERT: address family has already been verified. */
+	if (addr->sa.sa_family != AF_INET6)
+		af = sctp_get_af_specific(addr->sa.sa_family);
+	else {
+		struct sock *sk;
+		int type = ipv6_addr_type(&addr->v6.sin6_addr);
+		sk = sctp_opt2sk(opt);
+		if (type & IPV6_ADDR_LINKLOCAL) {
+			/* Note: Behavior similar to af_inet6.c:
+			 *  1) Overrides previous bound_dev_if
+			 *  2) Destructive even if bind isn't successful.
+			 */
+
+			if (addr->v6.sin6_scope_id)
+				sk->sk_bound_dev_if = addr->v6.sin6_scope_id;
+			if (!sk->sk_bound_dev_if)
+				return 0;
+		}
+		af = opt->pf->af;
+	}
+
+	return af != NULL;
+}
+
+/* Fill in Supported Address Type information for INIT and INIT-ACK
+ * chunks.   Note: In the future, we may want to look at sock options
+ * to determine whether a PF_INET6 socket really wants to have IPV4
+ * addresses.
+ * Returns number of addresses supported.
+ */
+static int sctp_inet6_supported_addrs(const struct sctp_sock *opt,
+				      __u16 *types)
+{
+	types[0] = SCTP_PARAM_IPV4_ADDRESS;
+	types[1] = SCTP_PARAM_IPV6_ADDRESS;
+	return 2;
+}
+
+static struct proto_ops inet6_seqpacket_ops = {
+	.family     = PF_INET6,
+	.owner      = THIS_MODULE,
+	.release    = inet6_release,
+	.bind       = inet6_bind,
+	.connect    = inet_dgram_connect,
+	.socketpair = sock_no_socketpair,
+	.accept     = inet_accept,
+	.getname    = inet6_getname,
+	.poll       = sctp_poll,
+	.ioctl      = inet6_ioctl,
+	.listen     = sctp_inet_listen,
+	.shutdown   = inet_shutdown,
+	.setsockopt = sock_common_setsockopt,
+	.getsockopt = sock_common_getsockopt,
+	.sendmsg    = inet_sendmsg,
+	.recvmsg    = sock_common_recvmsg,
+	.mmap       = sock_no_mmap,
+};
+
+static struct inet_protosw sctpv6_seqpacket_protosw = {
+	.type          = SOCK_SEQPACKET,
+	.protocol      = IPPROTO_SCTP,
+	.prot 	       = &sctpv6_prot,
+	.ops           = &inet6_seqpacket_ops,
+	.capability    = -1,
+	.no_check      = 0,
+	.flags         = SCTP_PROTOSW_FLAG
+};
+static struct inet_protosw sctpv6_stream_protosw = {
+	.type          = SOCK_STREAM,
+	.protocol      = IPPROTO_SCTP,
+	.prot 	       = &sctpv6_prot,
+	.ops           = &inet6_seqpacket_ops,
+	.capability    = -1,
+	.no_check      = 0,
+	.flags         = SCTP_PROTOSW_FLAG,
+};
+
+static int sctp6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+{
+	return sctp_rcv(*pskb) ? -1 : 0;
+}
+
+static struct inet6_protocol sctpv6_protocol = {
+	.handler      = sctp6_rcv,
+	.err_handler  = sctp_v6_err,
+	.flags        = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL,
+};
+
+static struct sctp_af sctp_ipv6_specific = {
+	.sctp_xmit       = sctp_v6_xmit,
+	.setsockopt      = ipv6_setsockopt,
+	.getsockopt      = ipv6_getsockopt,
+	.get_dst	 = sctp_v6_get_dst,
+	.get_saddr	 = sctp_v6_get_saddr,
+	.copy_addrlist   = sctp_v6_copy_addrlist,
+	.from_skb        = sctp_v6_from_skb,
+	.from_sk         = sctp_v6_from_sk,
+	.to_sk_saddr     = sctp_v6_to_sk_saddr,
+	.to_sk_daddr     = sctp_v6_to_sk_daddr,
+	.from_addr_param = sctp_v6_from_addr_param,
+	.to_addr_param   = sctp_v6_to_addr_param,
+	.dst_saddr       = sctp_v6_dst_saddr,
+	.cmp_addr        = sctp_v6_cmp_addr,
+	.scope           = sctp_v6_scope,
+	.addr_valid      = sctp_v6_addr_valid,
+	.inaddr_any      = sctp_v6_inaddr_any,
+	.is_any          = sctp_v6_is_any,
+	.available       = sctp_v6_available,
+	.skb_iif         = sctp_v6_skb_iif,
+	.is_ce           = sctp_v6_is_ce,
+	.seq_dump_addr   = sctp_v6_seq_dump_addr,
+	.net_header_len  = sizeof(struct ipv6hdr),
+	.sockaddr_len    = sizeof(struct sockaddr_in6),
+	.sa_family       = AF_INET6,
+};
+
+static struct sctp_pf sctp_pf_inet6_specific = {
+	.event_msgname = sctp_inet6_event_msgname,
+	.skb_msgname   = sctp_inet6_skb_msgname,
+	.af_supported  = sctp_inet6_af_supported,
+	.cmp_addr      = sctp_inet6_cmp_addr,
+	.bind_verify   = sctp_inet6_bind_verify,
+	.send_verify   = sctp_inet6_send_verify,
+	.supported_addrs = sctp_inet6_supported_addrs,
+	.create_accept_sk = sctp_v6_create_accept_sk,
+	.addr_v4map    = sctp_v6_addr_v4map,
+	.af            = &sctp_ipv6_specific,
+};
+
+/* Initialize IPv6 support and register with inet6 stack.  */
+int sctp_v6_init(void)
+{
+	int rc = proto_register(&sctpv6_prot, 1);
+
+	if (rc)
+		goto out;
+	/* Register inet6 protocol. */
+	rc = -EAGAIN;
+	if (inet6_add_protocol(&sctpv6_protocol, IPPROTO_SCTP) < 0)
+		goto out_unregister_sctp_proto;
+
+	/* Add SCTPv6(UDP and TCP style) to inetsw6 linked list. */
+	inet6_register_protosw(&sctpv6_seqpacket_protosw);
+	inet6_register_protosw(&sctpv6_stream_protosw);
+
+	/* Register the SCTP specific PF_INET6 functions. */
+	sctp_register_pf(&sctp_pf_inet6_specific, PF_INET6);
+
+	/* Register the SCTP specific AF_INET6 functions. */
+	sctp_register_af(&sctp_ipv6_specific);
+
+	/* Register notifier for inet6 address additions/deletions. */
+	register_inet6addr_notifier(&sctp_inet6addr_notifier);
+	rc = 0;
+out:
+	return rc;
+out_unregister_sctp_proto:
+	proto_unregister(&sctpv6_prot);
+	goto out;
+}
+
+/* IPv6 specific exit support. */
+void sctp_v6_exit(void)
+{
+	list_del(&sctp_ipv6_specific.list);
+	inet6_del_protocol(&sctpv6_protocol, IPPROTO_SCTP);
+	inet6_unregister_protosw(&sctpv6_seqpacket_protosw);
+	inet6_unregister_protosw(&sctpv6_stream_protosw);
+	unregister_inet6addr_notifier(&sctp_inet6addr_notifier);
+	proto_unregister(&sctpv6_prot);
+}
diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c
new file mode 100644
index 00000000000..0781e5d509f
--- /dev/null
+++ b/net/sctp/objcnt.c
@@ -0,0 +1,140 @@
+/* SCTP kernel reference Implementation
+ * (C) Copyright IBM Corp. 2001, 2004
+ * 
+ * This file is part of the SCTP kernel reference Implementation
+ * 
+ * Support for memory object debugging.  This allows one to monitor the
+ * object allocations/deallocations for types instrumented for this 
+ * via the proc fs. 
+ * 
+ * The SCTP reference implementation is free software; 
+ * you can redistribute it and/or modify it under the terms of 
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ * 
+ * The SCTP reference implementation is distributed in the hope that it 
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *                 ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.  
+ * 
+ * Please send any bug reports or fixes you make to the
+ * email address(es):
+ *    lksctp developers <lksctp-developers@lists.sourceforge.net>
+ * 
+ * Or submit a bug report through the following website:
+ *    http://www.sf.net/projects/lksctp
+ *
+ * Written or modified by: 
+ *    Jon Grimm             <jgrimm@us.ibm.com>
+ * 
+ * Any bugs reported given to us we will try to fix... any fixes shared will
+ * be incorporated into the next SCTP release.
+ */
+
+#include <linux/kernel.h>
+#include <net/sctp/sctp.h>
+
+/*
+ * Global counters to count raw object allocation counts.
+ * To add new counters, choose a unique suffix for the variable
+ * name as the helper macros key off this suffix to make
+ * life easier for the programmer.
+ */
+
+SCTP_DBG_OBJCNT(sock);
+SCTP_DBG_OBJCNT(ep);
+SCTP_DBG_OBJCNT(transport);
+SCTP_DBG_OBJCNT(assoc);
+SCTP_DBG_OBJCNT(bind_addr);
+SCTP_DBG_OBJCNT(bind_bucket);
+SCTP_DBG_OBJCNT(chunk);
+SCTP_DBG_OBJCNT(addr);
+SCTP_DBG_OBJCNT(ssnmap);
+SCTP_DBG_OBJCNT(datamsg);
+
+/* An array to make it easy to pretty print the debug information
+ * to the proc fs.
+ */
+static sctp_dbg_objcnt_entry_t sctp_dbg_objcnt[] = {
+	SCTP_DBG_OBJCNT_ENTRY(sock),
+	SCTP_DBG_OBJCNT_ENTRY(ep),
+	SCTP_DBG_OBJCNT_ENTRY(assoc),
+	SCTP_DBG_OBJCNT_ENTRY(transport),
+	SCTP_DBG_OBJCNT_ENTRY(chunk),
+	SCTP_DBG_OBJCNT_ENTRY(bind_addr),
+	SCTP_DBG_OBJCNT_ENTRY(bind_bucket),
+	SCTP_DBG_OBJCNT_ENTRY(addr),
+	SCTP_DBG_OBJCNT_ENTRY(ssnmap),
+	SCTP_DBG_OBJCNT_ENTRY(datamsg),
+};
+
+/* Callback from procfs to read out objcount information.
+ * Walk through the entries in the sctp_dbg_objcnt array, dumping
+ * the raw object counts for each monitored type.
+ *
+ * This code was modified from similar code in route.c
+ */
+static int sctp_dbg_objcnt_read(char *buffer, char **start, off_t offset,
+				int length, int *eof, void *data)
+{
+	int len = 0;
+	off_t pos = 0;
+	int entries;
+	int i;
+	char temp[128];
+
+	/* How many entries? */
+	entries = ARRAY_SIZE(sctp_dbg_objcnt);
+
+	/* Walk the entries and print out the debug information
+	 * for proc fs.
+	 */
+	for (i = 0; i < entries; i++) {
+		pos += 128;
+
+		/* Skip ahead. */
+		if (pos <= offset) {
+			len = 0;
+			continue;
+		}
+		/* Print out each entry. */
+		sprintf(temp, "%s: %d",
+			sctp_dbg_objcnt[i].label,
+			atomic_read(sctp_dbg_objcnt[i].counter));
+
+		sprintf(buffer + len, "%-127s\n", temp);
+		len += 128;
+		if (pos >= offset+length)
+			goto done;
+	}
+
+done:
+	*start = buffer + len - (pos - offset);
+	len = pos - offset;
+	if (len > length)
+		len = length;
+
+  	return len;
+}
+
+/* Initialize the objcount in the proc filesystem.  */
+void sctp_dbg_objcnt_init(void)
+{
+	create_proc_read_entry("sctp_dbg_objcnt", 0, proc_net_sctp,
+			       sctp_dbg_objcnt_read, NULL);
+}
+
+/* Cleanup the objcount entry in the proc filesystem.  */
+void sctp_dbg_objcnt_exit(void)
+{
+	remove_proc_entry("sctp_dbg_objcnt", proc_net_sctp);
+}
+
+
diff --git a/net/sctp/output.c b/net/sctp/output.c
new file mode 100644
index 00000000000..9013f64f521
--- /dev/null
+++ b/net/sctp/output.c
@@ -0,0 +1,646 @@
+/* SCTP kernel reference Implementation
+ * (C) Copyright IBM Corp. 2001, 2004
+ * Copyright (c) 1999-2000 Cisco, Inc.
+ * Copyright (c) 1999-2001 Motorola, Inc.
+ *
+ * This file is part of the SCTP kernel reference Implementation
+ *
+ * These functions handle output processing.
+ *
+ * The SCTP reference implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * The SCTP reference implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *                 ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email address(es):
+ *    lksctp developers <lksctp-developers@lists.sourceforge.net>
+ *
+ * Or submit a bug report through the following website:
+ *    http://www.sf.net/projects/lksctp
+ *
+ * Written or modified by:
+ *    La Monte H.P. Yarroll <piggy@acm.org>
+ *    Karl Knutson          <karl@athena.chicago.il.us>
+ *    Jon Grimm             <jgrimm@austin.ibm.com>
+ *    Sridhar Samudrala     <sri@us.ibm.com>
+ *
+ * Any bugs reported given to us we will try to fix... any fixes shared will
+ * be incorporated into the next SCTP release.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/wait.h>
+#include <linux/time.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/init.h>
+#include <net/inet_ecn.h>
+#include <net/icmp.h>
+
+#ifndef TEST_FRAME
+#include <net/tcp.h>
+#endif /* TEST_FRAME (not defined) */
+
+#include <linux/socket.h> /* for sa_family_t */
+#include <net/sock.h>
+
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+
+/* Forward declarations for private helpers. */
+static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet,
+					   struct sctp_chunk *chunk);
+
+/* Config a packet.
+ * This appears to be a followup set of initializations.
+ */
+struct sctp_packet *sctp_packet_config(struct sctp_packet *packet,
+				       __u32 vtag, int ecn_capable)
+{
+	struct sctp_chunk *chunk = NULL;
+
+	SCTP_DEBUG_PRINTK("%s: packet:%p vtag:0x%x\n", __FUNCTION__,
+			  packet, vtag);
+
+	packet->vtag = vtag;
+	packet->has_cookie_echo = 0;
+	packet->has_sack = 0;
+	packet->ipfragok = 0;
+
+	if (ecn_capable && sctp_packet_empty(packet)) {
+		chunk = sctp_get_ecne_prepend(packet->transport->asoc);
+
+		/* If there a is a prepend chunk stick it on the list before
+	 	 * any other chunks get appended.
+	 	 */
+		if (chunk)
+			sctp_packet_append_chunk(packet, chunk);
+	}
+
+	return packet;
+}
+
+/* Initialize the packet structure. */
+struct sctp_packet *sctp_packet_init(struct sctp_packet *packet,
+				     struct sctp_transport *transport,
+				     __u16 sport, __u16 dport)
+{
+	struct sctp_association *asoc = transport->asoc;
+	size_t overhead;
+
+	SCTP_DEBUG_PRINTK("%s: packet:%p transport:%p\n", __FUNCTION__,
+			  packet, transport);
+
+	packet->transport = transport;
+	packet->source_port = sport;
+	packet->destination_port = dport;
+	skb_queue_head_init(&packet->chunks);
+	if (asoc) {
+		struct sctp_sock *sp = sctp_sk(asoc->base.sk);	
+		overhead = sp->pf->af->net_header_len; 
+	} else {
+		overhead = sizeof(struct ipv6hdr);
+	}
+	overhead += sizeof(struct sctphdr);
+	packet->overhead = overhead;
+	packet->size = overhead;
+	packet->vtag = 0;
+	packet->has_cookie_echo = 0;
+	packet->has_sack = 0;
+	packet->ipfragok = 0;
+	packet->malloced = 0;
+	return packet;
+}
+
+/* Free a packet.  */
+void sctp_packet_free(struct sctp_packet *packet)
+{
+	struct sctp_chunk *chunk;
+
+	SCTP_DEBUG_PRINTK("%s: packet:%p\n", __FUNCTION__, packet);
+
+        while ((chunk = (struct sctp_chunk *)__skb_dequeue(&packet->chunks)) != NULL)
+		sctp_chunk_free(chunk);
+
+	if (packet->malloced)
+		kfree(packet);
+}
+
+/* This routine tries to append the chunk to the offered packet. If adding
+ * the chunk causes the packet to exceed the path MTU and COOKIE_ECHO chunk
+ * is not present in the packet, it transmits the input packet.
+ * Data can be bundled with a packet containing a COOKIE_ECHO chunk as long
+ * as it can fit in the packet, but any more data that does not fit in this
+ * packet can be sent only after receiving the COOKIE_ACK.
+ */
+sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet,
+				       struct sctp_chunk *chunk)
+{
+	sctp_xmit_t retval;
+	int error = 0;
+
+	SCTP_DEBUG_PRINTK("%s: packet:%p chunk:%p\n", __FUNCTION__,
+			  packet, chunk);
+
+	switch ((retval = (sctp_packet_append_chunk(packet, chunk)))) {
+	case SCTP_XMIT_PMTU_FULL:
+		if (!packet->has_cookie_echo) {
+			error = sctp_packet_transmit(packet);
+			if (error < 0)
+				chunk->skb->sk->sk_err = -error;
+
+			/* If we have an empty packet, then we can NOT ever
+			 * return PMTU_FULL.
+			 */
+			retval = sctp_packet_append_chunk(packet, chunk);
+		}
+		break;
+
+	case SCTP_XMIT_RWND_FULL:
+	case SCTP_XMIT_OK:
+	case SCTP_XMIT_NAGLE_DELAY:
+		break;
+	};
+
+	return retval;
+}
+
+/* Try to bundle a SACK with the packet. */
+static sctp_xmit_t sctp_packet_bundle_sack(struct sctp_packet *pkt,
+					   struct sctp_chunk *chunk)
+{
+	sctp_xmit_t retval = SCTP_XMIT_OK;
+
+	/* If sending DATA and haven't aleady bundled a SACK, try to
+	 * bundle one in to the packet.
+	 */
+	if (sctp_chunk_is_data(chunk) && !pkt->has_sack &&
+	    !pkt->has_cookie_echo) {
+		struct sctp_association *asoc;
+		asoc = pkt->transport->asoc;
+
+		if (asoc->a_rwnd > asoc->rwnd) {
+			struct sctp_chunk *sack;
+			asoc->a_rwnd = asoc->rwnd;
+			sack = sctp_make_sack(asoc);
+			if (sack) {
+				struct timer_list *timer;
+				retval = sctp_packet_append_chunk(pkt, sack);
+				asoc->peer.sack_needed = 0;
+				timer = &asoc->timers[SCTP_EVENT_TIMEOUT_SACK];
+				if (timer_pending(timer) && del_timer(timer))
+					sctp_association_put(asoc);
+			}
+		}
+	}
+	return retval;
+}
+
+/* Append a chunk to the offered packet reporting back any inability to do
+ * so.
+ */
+sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *packet,
+				     struct sctp_chunk *chunk)
+{
+	sctp_xmit_t retval = SCTP_XMIT_OK;
+	__u16 chunk_len = WORD_ROUND(ntohs(chunk->chunk_hdr->length));
+	size_t psize;
+	size_t pmtu;
+	int too_big;
+
+	SCTP_DEBUG_PRINTK("%s: packet:%p chunk:%p\n", __FUNCTION__, packet,
+			  chunk);
+
+	retval = sctp_packet_bundle_sack(packet, chunk);
+	psize = packet->size;
+
+	if (retval != SCTP_XMIT_OK)
+		goto finish;
+
+	pmtu  = ((packet->transport->asoc) ?
+		 (packet->transport->asoc->pmtu) :
+		 (packet->transport->pmtu));
+
+	too_big = (psize + chunk_len > pmtu);
+
+	/* Decide if we need to fragment or resubmit later. */
+	if (too_big) {
+		/* Both control chunks and data chunks with TSNs are
+		 * non-fragmentable.
+		 */
+		if (sctp_packet_empty(packet) || !sctp_chunk_is_data(chunk)) {
+			/* We no longer do re-fragmentation.
+			 * Just fragment at the IP layer, if we
+			 * actually hit this condition
+			 */
+			packet->ipfragok = 1;
+			goto append;
+
+		} else {
+			retval = SCTP_XMIT_PMTU_FULL;
+			goto finish;
+		}
+	}
+
+append:
+	/* We believe that this chunk is OK to add to the packet (as
+	 * long as we have the cwnd for it).
+	 */
+
+	/* DATA is a special case since we must examine both rwnd and cwnd
+	 * before we send DATA.
+	 */
+	if (sctp_chunk_is_data(chunk)) {
+		retval = sctp_packet_append_data(packet, chunk);
+		/* Disallow SACK bundling after DATA. */
+		packet->has_sack = 1;
+		if (SCTP_XMIT_OK != retval)
+			goto finish;
+	} else if (SCTP_CID_COOKIE_ECHO == chunk->chunk_hdr->type)
+		packet->has_cookie_echo = 1;
+	else if (SCTP_CID_SACK == chunk->chunk_hdr->type)
+		packet->has_sack = 1;
+
+	/* It is OK to send this chunk.  */
+	__skb_queue_tail(&packet->chunks, (struct sk_buff *)chunk);
+	packet->size += chunk_len;
+	chunk->transport = packet->transport;
+finish:
+	return retval;
+}
+
+/* All packets are sent to the network through this function from
+ * sctp_outq_tail().
+ *
+ * The return value is a normal kernel error return value.
+ */
+int sctp_packet_transmit(struct sctp_packet *packet)
+{
+	struct sctp_transport *tp = packet->transport;
+	struct sctp_association *asoc = tp->asoc;
+	struct sctphdr *sh;
+	__u32 crc32;
+	struct sk_buff *nskb;
+	struct sctp_chunk *chunk;
+	struct sock *sk;
+	int err = 0;
+	int padding;		/* How much padding do we need?  */
+	__u8 has_data = 0;
+	struct dst_entry *dst;
+
+	SCTP_DEBUG_PRINTK("%s: packet:%p\n", __FUNCTION__, packet);
+
+	/* Do NOT generate a chunkless packet. */
+	chunk = (struct sctp_chunk *)skb_peek(&packet->chunks);
+	if (unlikely(!chunk))
+		return err;
+
+	/* Set up convenience variables... */
+	sk = chunk->skb->sk;
+
+	/* Allocate the new skb.  */
+	nskb = dev_alloc_skb(packet->size);
+	if (!nskb)
+		goto nomem;
+
+	/* Make sure the outbound skb has enough header room reserved. */
+	skb_reserve(nskb, packet->overhead);
+
+	/* Set the owning socket so that we know where to get the
+	 * destination IP address.
+	 */
+	skb_set_owner_w(nskb, sk);
+
+	/* Build the SCTP header.  */
+	sh = (struct sctphdr *)skb_push(nskb, sizeof(struct sctphdr));
+	sh->source = htons(packet->source_port);
+	sh->dest   = htons(packet->destination_port);
+
+	/* From 6.8 Adler-32 Checksum Calculation:
+	 * After the packet is constructed (containing the SCTP common
+	 * header and one or more control or DATA chunks), the
+	 * transmitter shall:
+	 *
+	 * 1) Fill in the proper Verification Tag in the SCTP common
+	 *    header and initialize the checksum field to 0's.
+	 */
+	sh->vtag     = htonl(packet->vtag);
+	sh->checksum = 0;
+
+	/* 2) Calculate the Adler-32 checksum of the whole packet,
+	 *    including the SCTP common header and all the
+	 *    chunks.
+	 *
+	 * Note: Adler-32 is no longer applicable, as has been replaced
+	 * by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>.
+	 */
+	crc32 = sctp_start_cksum((__u8 *)sh, sizeof(struct sctphdr));
+
+	/**
+	 * 6.10 Bundling
+	 *
+	 *    An endpoint bundles chunks by simply including multiple
+	 *    chunks in one outbound SCTP packet.  ...
+	 */
+
+	/**
+	 * 3.2  Chunk Field Descriptions
+	 *
+	 * The total length of a chunk (including Type, Length and
+	 * Value fields) MUST be a multiple of 4 bytes.  If the length
+	 * of the chunk is not a multiple of 4 bytes, the sender MUST
+	 * pad the chunk with all zero bytes and this padding is not
+	 * included in the chunk length field.  The sender should
+	 * never pad with more than 3 bytes.
+	 *
+	 * [This whole comment explains WORD_ROUND() below.]
+	 */
+	SCTP_DEBUG_PRINTK("***sctp_transmit_packet***\n");
+	while ((chunk = (struct sctp_chunk *)__skb_dequeue(&packet->chunks)) != NULL) {
+		if (sctp_chunk_is_data(chunk)) {
+
+			if (!chunk->has_tsn) {
+				sctp_chunk_assign_ssn(chunk);
+				sctp_chunk_assign_tsn(chunk);
+
+			/* 6.3.1 C4) When data is in flight and when allowed
+			 * by rule C5, a new RTT measurement MUST be made each
+			 * round trip.  Furthermore, new RTT measurements
+			 * SHOULD be made no more than once per round-trip
+			 * for a given destination transport address.
+			 */
+
+				if (!tp->rto_pending) {
+					chunk->rtt_in_progress = 1;
+					tp->rto_pending = 1;
+				}
+			} else
+				chunk->resent = 1;
+
+			chunk->sent_at = jiffies;
+			has_data = 1;
+		}
+
+		padding = WORD_ROUND(chunk->skb->len) - chunk->skb->len;
+		if (padding)
+			memset(skb_put(chunk->skb, padding), 0, padding);
+
+		crc32 = sctp_update_copy_cksum(skb_put(nskb, chunk->skb->len),
+					       chunk->skb->data,
+					       chunk->skb->len, crc32);
+
+		SCTP_DEBUG_PRINTK("%s %p[%s] %s 0x%x, %s %d, %s %d, %s %d\n",
+				  "*** Chunk", chunk,
+				  sctp_cname(SCTP_ST_CHUNK(
+					  chunk->chunk_hdr->type)),
+				  chunk->has_tsn ? "TSN" : "No TSN",
+				  chunk->has_tsn ?
+				  ntohl(chunk->subh.data_hdr->tsn) : 0,
+				  "length", ntohs(chunk->chunk_hdr->length),
+				  "chunk->skb->len", chunk->skb->len,
+				  "rtt_in_progress", chunk->rtt_in_progress);
+
+		/*
+		 * If this is a control chunk, this is our last
+		 * reference. Free data chunks after they've been
+		 * acknowledged or have failed.
+		 */
+		if (!sctp_chunk_is_data(chunk))
+    			sctp_chunk_free(chunk);
+	}
+
+	/* Perform final transformation on checksum. */
+	crc32 = sctp_end_cksum(crc32);
+
+	/* 3) Put the resultant value into the checksum field in the
+	 *    common header, and leave the rest of the bits unchanged.
+	 */
+	sh->checksum = htonl(crc32);
+
+	/* IP layer ECN support
+	 * From RFC 2481
+	 *  "The ECN-Capable Transport (ECT) bit would be set by the
+	 *   data sender to indicate that the end-points of the
+	 *   transport protocol are ECN-capable."
+	 *
+	 * Now setting the ECT bit all the time, as it should not cause
+	 * any problems protocol-wise even if our peer ignores it.
+	 *
+	 * Note: The works for IPv6 layer checks this bit too later
+	 * in transmission.  See IP6_ECN_flow_xmit().
+	 */
+	INET_ECN_xmit(nskb->sk);
+
+	/* Set up the IP options.  */
+	/* BUG: not implemented
+	 * For v4 this all lives somewhere in sk->sk_opt...
+	 */
+
+	/* Dump that on IP!  */
+	if (asoc && asoc->peer.last_sent_to != tp) {
+		/* Considering the multiple CPU scenario, this is a
+		 * "correcter" place for last_sent_to.  --xguo
+		 */
+		asoc->peer.last_sent_to = tp;
+	}
+
+	if (has_data) {
+		struct timer_list *timer;
+		unsigned long timeout;
+
+		tp->last_time_used = jiffies;
+
+		/* Restart the AUTOCLOSE timer when sending data. */
+		if (sctp_state(asoc, ESTABLISHED) && asoc->autoclose) {
+			timer = &asoc->timers[SCTP_EVENT_TIMEOUT_AUTOCLOSE];
+			timeout = asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE];
+
+			if (!mod_timer(timer, jiffies + timeout))
+				sctp_association_hold(asoc);
+		}
+	}
+
+	dst = tp->dst;
+	/* The 'obsolete' field of dst is set to 2 when a dst is freed. */
+	if (!dst || (dst->obsolete > 1)) {
+		dst_release(dst);
+		sctp_transport_route(tp, NULL, sctp_sk(sk));
+		sctp_assoc_sync_pmtu(asoc);
+	}
+
+	nskb->dst = dst_clone(tp->dst);
+	if (!nskb->dst)
+		goto no_route;
+
+	SCTP_DEBUG_PRINTK("***sctp_transmit_packet*** skb len %d\n",
+			  nskb->len);
+
+	(*tp->af_specific->sctp_xmit)(nskb, tp, packet->ipfragok);
+
+out:
+	packet->size = packet->overhead;
+	return err;
+no_route:
+	kfree_skb(nskb);
+	IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
+
+	/* FIXME: Returning the 'err' will effect all the associations
+	 * associated with a socket, although only one of the paths of the
+	 * association is unreachable.
+	 * The real failure of a transport or association can be passed on
+	 * to the user via notifications. So setting this error may not be
+	 * required.
+	 */
+	 /* err = -EHOSTUNREACH; */
+err:
+	/* Control chunks are unreliable so just drop them.  DATA chunks
+	 * will get resent or dropped later.
+	 */
+
+	while ((chunk = (struct sctp_chunk *)__skb_dequeue(&packet->chunks)) != NULL) {
+		if (!sctp_chunk_is_data(chunk))
+    			sctp_chunk_free(chunk);
+	}
+	goto out;
+nomem:
+	err = -ENOMEM;
+	goto err;
+}
+
+/********************************************************************
+ * 2nd Level Abstractions
+ ********************************************************************/
+
+/* This private function handles the specifics of appending DATA chunks.  */
+static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet,
+					   struct sctp_chunk *chunk)
+{
+	sctp_xmit_t retval = SCTP_XMIT_OK;
+	size_t datasize, rwnd, inflight;
+	struct sctp_transport *transport = packet->transport;
+	__u32 max_burst_bytes;
+	struct sctp_association *asoc = transport->asoc;
+	struct sctp_sock *sp = sctp_sk(asoc->base.sk);
+	struct sctp_outq *q = &asoc->outqueue;
+
+	/* RFC 2960 6.1  Transmission of DATA Chunks
+	 *
+	 * A) At any given time, the data sender MUST NOT transmit new data to
+	 * any destination transport address if its peer's rwnd indicates
+	 * that the peer has no buffer space (i.e. rwnd is 0, see Section
+	 * 6.2.1).  However, regardless of the value of rwnd (including if it
+	 * is 0), the data sender can always have one DATA chunk in flight to
+	 * the receiver if allowed by cwnd (see rule B below).  This rule
+	 * allows the sender to probe for a change in rwnd that the sender
+	 * missed due to the SACK having been lost in transit from the data
+	 * receiver to the data sender.
+	 */
+
+	rwnd = asoc->peer.rwnd;
+	inflight = asoc->outqueue.outstanding_bytes;
+
+	datasize = sctp_data_size(chunk);
+
+	if (datasize > rwnd) {
+		if (inflight > 0) {
+			/* We have (at least) one data chunk in flight,
+			 * so we can't fall back to rule 6.1 B).
+			 */
+			retval = SCTP_XMIT_RWND_FULL;
+			goto finish;
+		}
+	}
+
+	/* sctpimpguide-05 2.14.2
+	 * D) When the time comes for the sender to
+	 * transmit new DATA chunks, the protocol parameter Max.Burst MUST
+	 * first be applied to limit how many new DATA chunks may be sent.
+	 * The limit is applied by adjusting cwnd as follows:
+	 * 	if ((flightsize + Max.Burst * MTU) < cwnd)
+	 *		cwnd = flightsize + Max.Burst * MTU
+	 */
+	max_burst_bytes = asoc->max_burst * asoc->pmtu;
+	if ((transport->flight_size + max_burst_bytes) < transport->cwnd) {
+		transport->cwnd = transport->flight_size + max_burst_bytes;
+		SCTP_DEBUG_PRINTK("%s: cwnd limited by max_burst: "
+				  "transport: %p, cwnd: %d, "
+				  "ssthresh: %d, flight_size: %d, "
+				  "pba: %d\n",
+				  __FUNCTION__, transport,
+				  transport->cwnd,
+				  transport->ssthresh,
+				  transport->flight_size,
+				  transport->partial_bytes_acked);
+	}
+
+	/* RFC 2960 6.1  Transmission of DATA Chunks
+	 *
+	 * B) At any given time, the sender MUST NOT transmit new data
+	 * to a given transport address if it has cwnd or more bytes
+	 * of data outstanding to that transport address.
+	 */
+	/* RFC 7.2.4 & the Implementers Guide 2.8.
+	 *
+	 * 3) ...
+	 *    When a Fast Retransmit is being performed the sender SHOULD
+	 *    ignore the value of cwnd and SHOULD NOT delay retransmission.
+	 */
+	if (!chunk->fast_retransmit)
+		if (transport->flight_size >= transport->cwnd) {
+			retval = SCTP_XMIT_RWND_FULL;
+			goto finish;
+		}
+
+	/* Nagle's algorithm to solve small-packet problem:
+	 * Inhibit the sending of new chunks when new outgoing data arrives
+	 * if any previously transmitted data on the connection remains
+	 * unacknowledged.
+	 */
+	if (!sp->nodelay && sctp_packet_empty(packet) &&
+	    q->outstanding_bytes && sctp_state(asoc, ESTABLISHED)) {
+		unsigned len = datasize + q->out_qlen;
+
+		/* Check whether this chunk and all the rest of pending
+		 * data will fit or delay in hopes of bundling a full
+		 * sized packet.
+		 */
+		if (len < asoc->pmtu - packet->overhead) {
+			retval = SCTP_XMIT_NAGLE_DELAY;
+			goto finish;
+		}
+	}
+
+	/* Keep track of how many bytes are in flight over this transport. */
+	transport->flight_size += datasize;
+
+	/* Keep track of how many bytes are in flight to the receiver. */
+	asoc->outqueue.outstanding_bytes += datasize;
+
+	/* Update our view of the receiver's rwnd. */
+	if (datasize < rwnd)
+		rwnd -= datasize;
+	else
+		rwnd = 0;
+
+	asoc->peer.rwnd = rwnd;
+	/* Has been accepted for transmission. */
+	if (!asoc->peer.prsctp_capable)
+		chunk->msg->can_abandon = 0;
+
+finish:
+	return retval;
+}
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
new file mode 100644
index 00000000000..1b2d4adc4dd
--- /dev/null
+++ b/net/sctp/outqueue.c
@@ -0,0 +1,1734 @@
+/* SCTP kernel reference Implementation
+ * (C) Copyright IBM Corp. 2001, 2004
+ * Copyright (c) 1999-2000 Cisco, Inc.
+ * Copyright (c) 1999-2001 Motorola, Inc.
+ * Copyright (c) 2001-2003 Intel Corp.
+ *
+ * This file is part of the SCTP kernel reference Implementation
+ *
+ * These functions implement the sctp_outq class.   The outqueue handles
+ * bundling and queueing of outgoing SCTP chunks.
+ *
+ * The SCTP reference implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * The SCTP reference implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *                 ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email address(es):
+ *    lksctp developers <lksctp-developers@lists.sourceforge.net>
+ *
+ * Or submit a bug report through the following website:
+ *    http://www.sf.net/projects/lksctp
+ *
+ * Written or modified by:
+ *    La Monte H.P. Yarroll <piggy@acm.org>
+ *    Karl Knutson          <karl@athena.chicago.il.us>
+ *    Perry Melange         <pmelange@null.cc.uic.edu>
+ *    Xingang Guo           <xingang.guo@intel.com>
+ *    Hui Huang 	    <hui.huang@nokia.com>
+ *    Sridhar Samudrala     <sri@us.ibm.com>
+ *    Jon Grimm             <jgrimm@us.ibm.com>
+ *
+ * Any bugs reported given to us we will try to fix... any fixes shared will
+ * be incorporated into the next SCTP release.
+ */
+
+#include <linux/types.h>
+#include <linux/list.h>   /* For struct list_head */
+#include <linux/socket.h>
+#include <linux/ip.h>
+#include <net/sock.h>	  /* For skb_set_owner_w */
+
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+
+/* Declare internal functions here.  */
+static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn);
+static void sctp_check_transmitted(struct sctp_outq *q,
+				   struct list_head *transmitted_queue,
+				   struct sctp_transport *transport,
+				   struct sctp_sackhdr *sack,
+				   __u32 highest_new_tsn);
+
+static void sctp_mark_missing(struct sctp_outq *q,
+			      struct list_head *transmitted_queue,
+			      struct sctp_transport *transport,
+			      __u32 highest_new_tsn,
+			      int count_of_newacks);
+
+static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 sack_ctsn);
+
+/* Add data to the front of the queue. */
+static inline void sctp_outq_head_data(struct sctp_outq *q,
+					struct sctp_chunk *ch)
+{
+	__skb_queue_head(&q->out, (struct sk_buff *)ch);
+	q->out_qlen += ch->skb->len;
+	return;
+}
+
+/* Take data from the front of the queue. */
+static inline struct sctp_chunk *sctp_outq_dequeue_data(struct sctp_outq *q)
+{
+	struct sctp_chunk *ch;
+	ch = (struct sctp_chunk *)__skb_dequeue(&q->out);
+	if (ch)
+		q->out_qlen -= ch->skb->len;
+	return ch;
+}
+/* Add data chunk to the end of the queue. */
+static inline void sctp_outq_tail_data(struct sctp_outq *q,
+				       struct sctp_chunk *ch)
+{
+	__skb_queue_tail(&q->out, (struct sk_buff *)ch);
+	q->out_qlen += ch->skb->len;
+	return;
+}
+
+/*
+ * SFR-CACC algorithm:
+ * D) If count_of_newacks is greater than or equal to 2
+ * and t was not sent to the current primary then the
+ * sender MUST NOT increment missing report count for t.
+ */
+static inline int sctp_cacc_skip_3_1_d(struct sctp_transport *primary,
+				       struct sctp_transport *transport,
+				       int count_of_newacks)
+{
+	if (count_of_newacks >=2 && transport != primary)
+		return 1;
+	return 0;
+}
+
+/*
+ * SFR-CACC algorithm:
+ * F) If count_of_newacks is less than 2, let d be the
+ * destination to which t was sent. If cacc_saw_newack
+ * is 0 for destination d, then the sender MUST NOT
+ * increment missing report count for t.
+ */
+static inline int sctp_cacc_skip_3_1_f(struct sctp_transport *transport,
+				       int count_of_newacks)
+{
+	if (count_of_newacks < 2 && !transport->cacc.cacc_saw_newack)
+		return 1;
+	return 0;
+}
+
+/*
+ * SFR-CACC algorithm:
+ * 3.1) If CYCLING_CHANGEOVER is 0, the sender SHOULD
+ * execute steps C, D, F.
+ *
+ * C has been implemented in sctp_outq_sack
+ */
+static inline int sctp_cacc_skip_3_1(struct sctp_transport *primary,
+				     struct sctp_transport *transport,
+				     int count_of_newacks)
+{
+	if (!primary->cacc.cycling_changeover) {
+		if (sctp_cacc_skip_3_1_d(primary, transport, count_of_newacks))
+			return 1;
+		if (sctp_cacc_skip_3_1_f(transport, count_of_newacks))
+			return 1;
+		return 0;
+	}
+	return 0;
+}
+
+/*
+ * SFR-CACC algorithm:
+ * 3.2) Else if CYCLING_CHANGEOVER is 1, and t is less
+ * than next_tsn_at_change of the current primary, then
+ * the sender MUST NOT increment missing report count
+ * for t.
+ */
+static inline int sctp_cacc_skip_3_2(struct sctp_transport *primary, __u32 tsn)
+{
+	if (primary->cacc.cycling_changeover &&
+	    TSN_lt(tsn, primary->cacc.next_tsn_at_change))
+		return 1;
+	return 0;
+}
+
+/*
+ * SFR-CACC algorithm:
+ * 3) If the missing report count for TSN t is to be
+ * incremented according to [RFC2960] and
+ * [SCTP_STEWART-2002], and CHANGEOVER_ACTIVE is set,
+ * then the sender MUST futher execute steps 3.1 and
+ * 3.2 to determine if the missing report count for
+ * TSN t SHOULD NOT be incremented.
+ *
+ * 3.3) If 3.1 and 3.2 do not dictate that the missing
+ * report count for t should not be incremented, then
+ * the sender SOULD increment missing report count for
+ * t (according to [RFC2960] and [SCTP_STEWART_2002]).
+ */
+static inline int sctp_cacc_skip(struct sctp_transport *primary,
+				 struct sctp_transport *transport,
+				 int count_of_newacks,
+				 __u32 tsn)
+{
+	if (primary->cacc.changeover_active &&
+	    (sctp_cacc_skip_3_1(primary, transport, count_of_newacks)
+	     || sctp_cacc_skip_3_2(primary, tsn)))
+		return 1;
+	return 0;
+}
+
+/* Initialize an existing sctp_outq.  This does the boring stuff.
+ * You still need to define handlers if you really want to DO
+ * something with this structure...
+ */
+void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q)
+{
+	q->asoc = asoc;
+	skb_queue_head_init(&q->out);
+	skb_queue_head_init(&q->control);
+	INIT_LIST_HEAD(&q->retransmit);
+	INIT_LIST_HEAD(&q->sacked);
+	INIT_LIST_HEAD(&q->abandoned);
+
+	q->outstanding_bytes = 0;
+	q->empty = 1;
+	q->cork  = 0;
+
+	q->malloced = 0;
+	q->out_qlen = 0;
+}
+
+/* Free the outqueue structure and any related pending chunks.
+ */
+void sctp_outq_teardown(struct sctp_outq *q)
+{
+	struct sctp_transport *transport;
+	struct list_head *lchunk, *pos, *temp;
+	struct sctp_chunk *chunk;
+
+	/* Throw away unacknowledged chunks. */
+	list_for_each(pos, &q->asoc->peer.transport_addr_list) {
+		transport = list_entry(pos, struct sctp_transport, transports);
+		while ((lchunk = sctp_list_dequeue(&transport->transmitted)) != NULL) {
+			chunk = list_entry(lchunk, struct sctp_chunk,
+					   transmitted_list);
+			/* Mark as part of a failed message. */
+			sctp_chunk_fail(chunk, q->error);
+			sctp_chunk_free(chunk);
+		}
+	}
+
+	/* Throw away chunks that have been gap ACKed.  */
+	list_for_each_safe(lchunk, temp, &q->sacked) {
+		list_del_init(lchunk);
+		chunk = list_entry(lchunk, struct sctp_chunk,
+				   transmitted_list);
+		sctp_chunk_fail(chunk, q->error);
+		sctp_chunk_free(chunk);
+	}
+
+	/* Throw away any chunks in the retransmit queue. */
+	list_for_each_safe(lchunk, temp, &q->retransmit) {
+		list_del_init(lchunk);
+		chunk = list_entry(lchunk, struct sctp_chunk,
+				   transmitted_list);
+		sctp_chunk_fail(chunk, q->error);
+		sctp_chunk_free(chunk);
+	}
+
+	/* Throw away any chunks that are in the abandoned queue. */
+	list_for_each_safe(lchunk, temp, &q->abandoned) {
+		list_del_init(lchunk);
+		chunk = list_entry(lchunk, struct sctp_chunk,
+				   transmitted_list);
+		sctp_chunk_fail(chunk, q->error);
+		sctp_chunk_free(chunk);
+	}
+
+	/* Throw away any leftover data chunks. */
+	while ((chunk = sctp_outq_dequeue_data(q)) != NULL) {
+
+		/* Mark as send failure. */
+		sctp_chunk_fail(chunk, q->error);
+		sctp_chunk_free(chunk);
+	}
+
+	q->error = 0;
+
+	/* Throw away any leftover control chunks. */
+	while ((chunk = (struct sctp_chunk *) skb_dequeue(&q->control)) != NULL)
+		sctp_chunk_free(chunk);
+}
+
+/* Free the outqueue structure and any related pending chunks.  */
+void sctp_outq_free(struct sctp_outq *q)
+{
+	/* Throw away leftover chunks. */
+	sctp_outq_teardown(q);
+
+	/* If we were kmalloc()'d, free the memory.  */
+	if (q->malloced)
+		kfree(q);
+}
+
+/* Put a new chunk in an sctp_outq.  */
+int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk)
+{
+	int error = 0;
+
+	SCTP_DEBUG_PRINTK("sctp_outq_tail(%p, %p[%s])\n",
+			  q, chunk, chunk && chunk->chunk_hdr ?
+			  sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type))
+			  : "Illegal Chunk");
+
+	/* If it is data, queue it up, otherwise, send it
+	 * immediately.
+	 */
+	if (SCTP_CID_DATA == chunk->chunk_hdr->type) {
+		/* Is it OK to queue data chunks?  */
+		/* From 9. Termination of Association
+		 *
+		 * When either endpoint performs a shutdown, the
+		 * association on each peer will stop accepting new
+		 * data from its user and only deliver data in queue
+		 * at the time of sending or receiving the SHUTDOWN
+		 * chunk.
+		 */
+		switch (q->asoc->state) {
+		case SCTP_STATE_EMPTY:
+		case SCTP_STATE_CLOSED:
+		case SCTP_STATE_SHUTDOWN_PENDING:
+		case SCTP_STATE_SHUTDOWN_SENT:
+		case SCTP_STATE_SHUTDOWN_RECEIVED:
+		case SCTP_STATE_SHUTDOWN_ACK_SENT:
+			/* Cannot send after transport endpoint shutdown */
+			error = -ESHUTDOWN;
+			break;
+
+		default:
+			SCTP_DEBUG_PRINTK("outqueueing (%p, %p[%s])\n",
+			  q, chunk, chunk && chunk->chunk_hdr ?
+			  sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type))
+			  : "Illegal Chunk");
+
+			sctp_outq_tail_data(q, chunk);
+			if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
+				SCTP_INC_STATS(SCTP_MIB_OUTUNORDERCHUNKS);
+			else
+				SCTP_INC_STATS(SCTP_MIB_OUTORDERCHUNKS);
+			q->empty = 0;
+			break;
+		};
+	} else {
+		__skb_queue_tail(&q->control, (struct sk_buff *) chunk);
+		SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+	}
+
+	if (error < 0)
+		return error;
+
+	if (!q->cork)
+		error = sctp_outq_flush(q, 0);
+
+	return error;
+}
+
+/* Insert a chunk into the sorted list based on the TSNs.  The retransmit list
+ * and the abandoned list are in ascending order.
+ */
+static void sctp_insert_list(struct list_head *head, struct list_head *new)
+{
+	struct list_head *pos;
+	struct sctp_chunk *nchunk, *lchunk;
+	__u32 ntsn, ltsn;
+	int done = 0;
+
+	nchunk = list_entry(new, struct sctp_chunk, transmitted_list);
+	ntsn = ntohl(nchunk->subh.data_hdr->tsn);
+
+	list_for_each(pos, head) {
+		lchunk = list_entry(pos, struct sctp_chunk, transmitted_list);
+		ltsn = ntohl(lchunk->subh.data_hdr->tsn);
+		if (TSN_lt(ntsn, ltsn)) {
+			list_add(new, pos->prev);
+			done = 1;
+			break;
+		}
+	}
+	if (!done)
+		list_add_tail(new, head); 
+}
+
+/* Mark all the eligible packets on a transport for retransmission.  */
+void sctp_retransmit_mark(struct sctp_outq *q,
+			  struct sctp_transport *transport,
+			  __u8 fast_retransmit)
+{
+	struct list_head *lchunk, *ltemp;
+	struct sctp_chunk *chunk;
+
+	/* Walk through the specified transmitted queue.  */
+	list_for_each_safe(lchunk, ltemp, &transport->transmitted) {
+		chunk = list_entry(lchunk, struct sctp_chunk,
+				   transmitted_list);
+
+		/* If the chunk is abandoned, move it to abandoned list. */
+		if (sctp_chunk_abandoned(chunk)) {
+			list_del_init(lchunk);
+			sctp_insert_list(&q->abandoned, lchunk);
+			continue;
+		}
+
+		/* If we are doing retransmission due to a fast retransmit,
+		 * only the chunk's that are marked for fast retransmit
+		 * should be added to the retransmit queue.  If we are doing
+		 * retransmission due to a timeout or pmtu discovery, only the
+		 * chunks that are not yet acked should be added to the
+		 * retransmit queue.
+		 */
+		if ((fast_retransmit && chunk->fast_retransmit) ||
+		   (!fast_retransmit && !chunk->tsn_gap_acked)) {
+			/* RFC 2960 6.2.1 Processing a Received SACK
+			 *
+			 * C) Any time a DATA chunk is marked for
+			 * retransmission (via either T3-rtx timer expiration
+			 * (Section 6.3.3) or via fast retransmit
+			 * (Section 7.2.4)), add the data size of those
+			 * chunks to the rwnd.
+			 */
+			q->asoc->peer.rwnd += sctp_data_size(chunk);
+			q->outstanding_bytes -= sctp_data_size(chunk);
+			transport->flight_size -= sctp_data_size(chunk);
+
+			/* sctpimpguide-05 Section 2.8.2
+			 * M5) If a T3-rtx timer expires, the
+			 * 'TSN.Missing.Report' of all affected TSNs is set
+			 * to 0.
+			 */
+			chunk->tsn_missing_report = 0;
+
+			/* If a chunk that is being used for RTT measurement
+			 * has to be retransmitted, we cannot use this chunk
+			 * anymore for RTT measurements. Reset rto_pending so
+			 * that a new RTT measurement is started when a new
+			 * data chunk is sent.
+			 */
+			if (chunk->rtt_in_progress) {
+				chunk->rtt_in_progress = 0;
+				transport->rto_pending = 0;
+			}
+
+			/* Move the chunk to the retransmit queue. The chunks
+			 * on the retransmit queue are always kept in order.
+			 */
+			list_del_init(lchunk);
+			sctp_insert_list(&q->retransmit, lchunk);
+		}
+	}
+
+	SCTP_DEBUG_PRINTK("%s: transport: %p, fast_retransmit: %d, "
+			  "cwnd: %d, ssthresh: %d, flight_size: %d, "
+			  "pba: %d\n", __FUNCTION__,
+			  transport, fast_retransmit,
+			  transport->cwnd, transport->ssthresh,
+			  transport->flight_size,
+			  transport->partial_bytes_acked);
+
+}
+
+/* Mark all the eligible packets on a transport for retransmission and force
+ * one packet out.
+ */
+void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
+		     sctp_retransmit_reason_t reason)
+{
+	int error = 0;
+	__u8 fast_retransmit = 0;
+
+	switch(reason) {
+	case SCTP_RTXR_T3_RTX:
+		sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_T3_RTX);
+		/* Update the retran path if the T3-rtx timer has expired for
+		 * the current retran path.
+		 */
+		if (transport == transport->asoc->peer.retran_path)
+			sctp_assoc_update_retran_path(transport->asoc);
+		break;
+	case SCTP_RTXR_FAST_RTX:
+		sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX);
+		fast_retransmit = 1;
+		break;
+	case SCTP_RTXR_PMTUD:
+	default:
+		break;
+	}
+
+	sctp_retransmit_mark(q, transport, fast_retransmit);
+
+	/* PR-SCTP A5) Any time the T3-rtx timer expires, on any destination,
+	 * the sender SHOULD try to advance the "Advanced.Peer.Ack.Point" by
+	 * following the procedures outlined in C1 - C5.
+	 */
+	sctp_generate_fwdtsn(q, q->asoc->ctsn_ack_point);
+
+	error = sctp_outq_flush(q, /* rtx_timeout */ 1);
+
+	if (error)
+		q->asoc->base.sk->sk_err = -error;
+}
+
+/*
+ * Transmit DATA chunks on the retransmit queue.  Upon return from
+ * sctp_outq_flush_rtx() the packet 'pkt' may contain chunks which
+ * need to be transmitted by the caller.
+ * We assume that pkt->transport has already been set.
+ *
+ * The return value is a normal kernel error return value.
+ */
+static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
+			       int rtx_timeout, int *start_timer)
+{
+	struct list_head *lqueue;
+	struct list_head *lchunk, *lchunk1;
+	struct sctp_transport *transport = pkt->transport;
+	sctp_xmit_t status;
+	struct sctp_chunk *chunk, *chunk1;
+	struct sctp_association *asoc;
+	int error = 0;
+
+	asoc = q->asoc;
+	lqueue = &q->retransmit;
+
+	/* RFC 2960 6.3.3 Handle T3-rtx Expiration
+	 *
+	 * E3) Determine how many of the earliest (i.e., lowest TSN)
+	 * outstanding DATA chunks for the address for which the
+	 * T3-rtx has expired will fit into a single packet, subject
+	 * to the MTU constraint for the path corresponding to the
+	 * destination transport address to which the retransmission
+	 * is being sent (this may be different from the address for
+	 * which the timer expires [see Section 6.4]). Call this value
+	 * K. Bundle and retransmit those K DATA chunks in a single
+	 * packet to the destination endpoint.
+	 *
+	 * [Just to be painfully clear, if we are retransmitting
+	 * because a timeout just happened, we should send only ONE
+	 * packet of retransmitted data.]
+	 */
+	lchunk = sctp_list_dequeue(lqueue);
+
+	while (lchunk) {
+		chunk = list_entry(lchunk, struct sctp_chunk,
+				   transmitted_list);
+
+		/* Make sure that Gap Acked TSNs are not retransmitted.  A
+		 * simple approach is just to move such TSNs out of the
+		 * way and into a 'transmitted' queue and skip to the
+		 * next chunk.
+		 */
+		if (chunk->tsn_gap_acked) {
+			list_add_tail(lchunk, &transport->transmitted);
+			lchunk = sctp_list_dequeue(lqueue);
+			continue;
+		}
+
+		/* Attempt to append this chunk to the packet. */
+		status = sctp_packet_append_chunk(pkt, chunk);
+
+		switch (status) {
+		case SCTP_XMIT_PMTU_FULL:
+			/* Send this packet.  */
+			if ((error = sctp_packet_transmit(pkt)) == 0)
+				*start_timer = 1;
+
+			/* If we are retransmitting, we should only
+			 * send a single packet.
+			 */
+			if (rtx_timeout) {
+				list_add(lchunk, lqueue);
+				lchunk = NULL;
+			}
+
+			/* Bundle lchunk in the next round.  */
+			break;
+
+		case SCTP_XMIT_RWND_FULL:
+		        /* Send this packet. */
+			if ((error = sctp_packet_transmit(pkt)) == 0)
+				*start_timer = 1;
+
+			/* Stop sending DATA as there is no more room
+			 * at the receiver.
+			 */
+			list_add(lchunk, lqueue);
+			lchunk = NULL;
+			break;
+
+		case SCTP_XMIT_NAGLE_DELAY:
+		        /* Send this packet. */
+			if ((error = sctp_packet_transmit(pkt)) == 0)
+				*start_timer = 1;
+
+			/* Stop sending DATA because of nagle delay. */
+			list_add(lchunk, lqueue);
+			lchunk = NULL;
+			break;
+
+		default:
+			/* The append was successful, so add this chunk to
+			 * the transmitted list.
+			 */
+			list_add_tail(lchunk, &transport->transmitted);
+
+			/* Mark the chunk as ineligible for fast retransmit 
+			 * after it is retransmitted.
+			 */
+			chunk->fast_retransmit = 0;
+
+			*start_timer = 1;
+			q->empty = 0;
+
+			/* Retrieve a new chunk to bundle. */
+			lchunk = sctp_list_dequeue(lqueue);
+			break;
+		};
+
+		/* If we are here due to a retransmit timeout or a fast
+		 * retransmit and if there are any chunks left in the retransmit
+		 * queue that could not fit in the PMTU sized packet, they need			 * to be marked as ineligible for a subsequent fast retransmit.
+		 */
+		if (rtx_timeout && !lchunk) {
+			list_for_each(lchunk1, lqueue) {
+				chunk1 = list_entry(lchunk1, struct sctp_chunk,
+						    transmitted_list);
+				chunk1->fast_retransmit = 0;
+			}
+		}
+	}
+
+	return error;
+}
+
+/* Cork the outqueue so queued chunks are really queued. */
+int sctp_outq_uncork(struct sctp_outq *q)
+{
+	int error = 0;
+	if (q->cork) {
+		q->cork = 0;
+		error = sctp_outq_flush(q, 0);
+	}
+	return error;
+}
+
+/*
+ * Try to flush an outqueue.
+ *
+ * Description: Send everything in q which we legally can, subject to
+ * congestion limitations.
+ * * Note: This function can be called from multiple contexts so appropriate
+ * locking concerns must be made.  Today we use the sock lock to protect
+ * this function.
+ */
+int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
+{
+	struct sctp_packet *packet;
+	struct sctp_packet singleton;
+	struct sctp_association *asoc = q->asoc;
+	__u16 sport = asoc->base.bind_addr.port;
+	__u16 dport = asoc->peer.port;
+	__u32 vtag = asoc->peer.i.init_tag;
+	struct sk_buff_head *queue;
+	struct sctp_transport *transport = NULL;
+	struct sctp_transport *new_transport;
+	struct sctp_chunk *chunk;
+	sctp_xmit_t status;
+	int error = 0;
+	int start_timer = 0;
+
+	/* These transports have chunks to send. */
+	struct list_head transport_list;
+	struct list_head *ltransport;
+
+	INIT_LIST_HEAD(&transport_list);
+	packet = NULL;
+
+	/*
+	 * 6.10 Bundling
+	 *   ...
+	 *   When bundling control chunks with DATA chunks, an
+	 *   endpoint MUST place control chunks first in the outbound
+	 *   SCTP packet.  The transmitter MUST transmit DATA chunks
+	 *   within a SCTP packet in increasing order of TSN.
+	 *   ...
+	 */
+
+	queue = &q->control;
+	while ((chunk = (struct sctp_chunk *)skb_dequeue(queue)) != NULL) {
+		/* Pick the right transport to use. */
+		new_transport = chunk->transport;
+
+		if (!new_transport) {
+			new_transport = asoc->peer.active_path;
+		} else if (!new_transport->active) {
+			/* If the chunk is Heartbeat or Heartbeat Ack, 
+			 * send it to chunk->transport, even if it's 
+			 * inactive.
+			 *
+			 * 3.3.6 Heartbeat Acknowledgement:
+			 * ...  
+			 * A HEARTBEAT ACK is always sent to the source IP
+			 * address of the IP datagram containing the
+			 * HEARTBEAT chunk to which this ack is responding.
+			 * ...  
+			 */
+			if (chunk->chunk_hdr->type != SCTP_CID_HEARTBEAT &&
+			    chunk->chunk_hdr->type != SCTP_CID_HEARTBEAT_ACK)
+				new_transport = asoc->peer.active_path;
+		}
+
+		/* Are we switching transports?
+		 * Take care of transport locks.
+		 */
+		if (new_transport != transport) {
+			transport = new_transport;
+			if (list_empty(&transport->send_ready)) {
+				list_add_tail(&transport->send_ready,
+					      &transport_list);
+			}
+			packet = &transport->packet;
+			sctp_packet_config(packet, vtag,
+					   asoc->peer.ecn_capable);
+		}
+
+		switch (chunk->chunk_hdr->type) {
+		/*
+		 * 6.10 Bundling
+		 *   ...
+		 *   An endpoint MUST NOT bundle INIT, INIT ACK or SHUTDOWN
+		 *   COMPLETE with any other chunks.  [Send them immediately.]
+		 */
+		case SCTP_CID_INIT:
+		case SCTP_CID_INIT_ACK:
+		case SCTP_CID_SHUTDOWN_COMPLETE:
+			sctp_packet_init(&singleton, transport, sport, dport);
+			sctp_packet_config(&singleton, vtag, 0);
+			sctp_packet_append_chunk(&singleton, chunk);
+			error = sctp_packet_transmit(&singleton);
+			if (error < 0)
+				return error;
+			break;
+
+		case SCTP_CID_ABORT:
+		case SCTP_CID_SACK:
+		case SCTP_CID_HEARTBEAT:
+		case SCTP_CID_HEARTBEAT_ACK:
+		case SCTP_CID_SHUTDOWN:
+		case SCTP_CID_SHUTDOWN_ACK:
+		case SCTP_CID_ERROR:
+		case SCTP_CID_COOKIE_ECHO:
+		case SCTP_CID_COOKIE_ACK:
+		case SCTP_CID_ECN_ECNE:
+		case SCTP_CID_ECN_CWR:
+		case SCTP_CID_ASCONF:
+		case SCTP_CID_ASCONF_ACK:
+		case SCTP_CID_FWD_TSN:
+			sctp_packet_transmit_chunk(packet, chunk);
+			break;
+
+		default:
+			/* We built a chunk with an illegal type! */
+			BUG();
+		};
+	}
+
+	/* Is it OK to send data chunks?  */
+	switch (asoc->state) {
+	case SCTP_STATE_COOKIE_ECHOED:
+		/* Only allow bundling when this packet has a COOKIE-ECHO
+		 * chunk.
+		 */
+		if (!packet || !packet->has_cookie_echo)
+			break;
+
+		/* fallthru */
+	case SCTP_STATE_ESTABLISHED:
+	case SCTP_STATE_SHUTDOWN_PENDING:
+	case SCTP_STATE_SHUTDOWN_RECEIVED:
+		/*
+		 * RFC 2960 6.1  Transmission of DATA Chunks
+		 *
+		 * C) When the time comes for the sender to transmit,
+		 * before sending new DATA chunks, the sender MUST
+		 * first transmit any outstanding DATA chunks which
+		 * are marked for retransmission (limited by the
+		 * current cwnd).
+		 */
+		if (!list_empty(&q->retransmit)) {
+			if (transport == asoc->peer.retran_path)
+				goto retran;
+
+			/* Switch transports & prepare the packet.  */
+
+			transport = asoc->peer.retran_path;
+
+			if (list_empty(&transport->send_ready)) {
+				list_add_tail(&transport->send_ready,
+					      &transport_list);
+			}
+
+			packet = &transport->packet;
+			sctp_packet_config(packet, vtag,
+					   asoc->peer.ecn_capable);
+		retran:
+			error = sctp_outq_flush_rtx(q, packet,
+						    rtx_timeout, &start_timer);
+
+			if (start_timer)
+				sctp_transport_reset_timers(transport);
+
+			/* This can happen on COOKIE-ECHO resend.  Only
+			 * one chunk can get bundled with a COOKIE-ECHO.
+			 */
+			if (packet->has_cookie_echo)
+				goto sctp_flush_out;
+
+			/* Don't send new data if there is still data
+			 * waiting to retransmit.
+			 */
+			if (!list_empty(&q->retransmit))
+				goto sctp_flush_out;
+		}
+
+		/* Finally, transmit new packets.  */
+		start_timer = 0;
+		queue = &q->out;
+
+		while ((chunk = sctp_outq_dequeue_data(q)) != NULL) {
+			/* RFC 2960 6.5 Every DATA chunk MUST carry a valid
+			 * stream identifier.
+			 */
+			if (chunk->sinfo.sinfo_stream >=
+			    asoc->c.sinit_num_ostreams) {
+
+				/* Mark as failed send. */
+				sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM);
+				sctp_chunk_free(chunk);
+				continue;
+			}
+
+			/* Has this chunk expired? */
+			if (sctp_chunk_abandoned(chunk)) {
+				sctp_chunk_fail(chunk, 0);
+				sctp_chunk_free(chunk);
+				continue;
+			}
+
+			/* If there is a specified transport, use it.
+			 * Otherwise, we want to use the active path.
+			 */
+			new_transport = chunk->transport;
+			if (!new_transport || !new_transport->active)
+				new_transport = asoc->peer.active_path;
+
+			/* Change packets if necessary.  */
+			if (new_transport != transport) {
+				transport = new_transport;
+
+				/* Schedule to have this transport's
+				 * packet flushed.
+				 */
+				if (list_empty(&transport->send_ready)) {
+					list_add_tail(&transport->send_ready,
+						      &transport_list);
+				}
+
+				packet = &transport->packet;
+				sctp_packet_config(packet, vtag,
+						   asoc->peer.ecn_capable);
+			}
+
+			SCTP_DEBUG_PRINTK("sctp_outq_flush(%p, %p[%s]), ",
+					  q, chunk,
+					  chunk && chunk->chunk_hdr ?
+					  sctp_cname(SCTP_ST_CHUNK(
+						  chunk->chunk_hdr->type))
+					  : "Illegal Chunk");
+
+			SCTP_DEBUG_PRINTK("TX TSN 0x%x skb->head "
+					"%p skb->users %d.\n",
+					ntohl(chunk->subh.data_hdr->tsn),
+					chunk->skb ?chunk->skb->head : NULL,
+					chunk->skb ?
+					atomic_read(&chunk->skb->users) : -1);
+
+			/* Add the chunk to the packet.  */
+			status = sctp_packet_transmit_chunk(packet, chunk);
+
+			switch (status) {
+			case SCTP_XMIT_PMTU_FULL:
+			case SCTP_XMIT_RWND_FULL:
+			case SCTP_XMIT_NAGLE_DELAY:
+				/* We could not append this chunk, so put
+				 * the chunk back on the output queue.
+				 */
+				SCTP_DEBUG_PRINTK("sctp_outq_flush: could "
+					"not transmit TSN: 0x%x, status: %d\n",
+					ntohl(chunk->subh.data_hdr->tsn),
+					status);
+				sctp_outq_head_data(q, chunk);
+				goto sctp_flush_out;
+				break;
+
+			case SCTP_XMIT_OK:
+				break;
+
+			default:
+				BUG();
+			}
+
+			/* BUG: We assume that the sctp_packet_transmit() 
+			 * call below will succeed all the time and add the
+			 * chunk to the transmitted list and restart the
+			 * timers.
+			 * It is possible that the call can fail under OOM
+			 * conditions.
+			 *
+			 * Is this really a problem?  Won't this behave
+			 * like a lost TSN?
+			 */
+			list_add_tail(&chunk->transmitted_list,
+				      &transport->transmitted);
+
+			sctp_transport_reset_timers(transport);
+
+			q->empty = 0;
+
+			/* Only let one DATA chunk get bundled with a
+			 * COOKIE-ECHO chunk.
+			 */
+			if (packet->has_cookie_echo)
+				goto sctp_flush_out;
+		}
+		break;
+
+	default:
+		/* Do nothing.  */
+		break;
+	}
+
+sctp_flush_out:
+
+	/* Before returning, examine all the transports touched in
+	 * this call.  Right now, we bluntly force clear all the
+	 * transports.  Things might change after we implement Nagle.
+	 * But such an examination is still required.
+	 *
+	 * --xguo
+	 */
+	while ((ltransport = sctp_list_dequeue(&transport_list)) != NULL ) {
+		struct sctp_transport *t = list_entry(ltransport,
+						      struct sctp_transport,
+						      send_ready);
+		packet = &t->packet;
+		if (!sctp_packet_empty(packet))
+			error = sctp_packet_transmit(packet);
+	}
+
+	return error;
+}
+
+/* Update unack_data based on the incoming SACK chunk */
+static void sctp_sack_update_unack_data(struct sctp_association *assoc,
+					struct sctp_sackhdr *sack)
+{
+	sctp_sack_variable_t *frags;
+	__u16 unack_data;
+	int i;
+
+	unack_data = assoc->next_tsn - assoc->ctsn_ack_point - 1;
+
+	frags = sack->variable;
+	for (i = 0; i < ntohs(sack->num_gap_ack_blocks); i++) {
+		unack_data -= ((ntohs(frags[i].gab.end) -
+				ntohs(frags[i].gab.start) + 1));
+	}
+
+	assoc->unack_data = unack_data;
+}
+
+/* Return the highest new tsn that is acknowledged by the given SACK chunk. */
+static __u32 sctp_highest_new_tsn(struct sctp_sackhdr *sack,
+				  struct sctp_association *asoc)
+{
+	struct list_head *ltransport, *lchunk;
+	struct sctp_transport *transport;
+	struct sctp_chunk *chunk;
+	__u32 highest_new_tsn, tsn;
+	struct list_head *transport_list = &asoc->peer.transport_addr_list;
+
+	highest_new_tsn = ntohl(sack->cum_tsn_ack);
+
+	list_for_each(ltransport, transport_list) {
+		transport = list_entry(ltransport, struct sctp_transport,
+				       transports);
+		list_for_each(lchunk, &transport->transmitted) {
+			chunk = list_entry(lchunk, struct sctp_chunk,
+					   transmitted_list);
+			tsn = ntohl(chunk->subh.data_hdr->tsn);
+
+			if (!chunk->tsn_gap_acked &&
+			    TSN_lt(highest_new_tsn, tsn) &&
+			    sctp_acked(sack, tsn))
+				highest_new_tsn = tsn;
+		}
+	}
+
+	return highest_new_tsn;
+}
+
+/* This is where we REALLY process a SACK.
+ *
+ * Process the SACK against the outqueue.  Mostly, this just frees
+ * things off the transmitted queue.
+ */
+int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
+{
+	struct sctp_association *asoc = q->asoc;
+	struct sctp_transport *transport;
+	struct sctp_chunk *tchunk = NULL;
+	struct list_head *lchunk, *transport_list, *pos, *temp;
+	sctp_sack_variable_t *frags = sack->variable;
+	__u32 sack_ctsn, ctsn, tsn;
+	__u32 highest_tsn, highest_new_tsn;
+	__u32 sack_a_rwnd;
+	unsigned outstanding;
+	struct sctp_transport *primary = asoc->peer.primary_path;
+	int count_of_newacks = 0;
+
+	/* Grab the association's destination address list. */
+	transport_list = &asoc->peer.transport_addr_list;
+
+	sack_ctsn = ntohl(sack->cum_tsn_ack);
+
+	/*
+	 * SFR-CACC algorithm:
+	 * On receipt of a SACK the sender SHOULD execute the
+	 * following statements.
+	 *
+	 * 1) If the cumulative ack in the SACK passes next tsn_at_change
+	 * on the current primary, the CHANGEOVER_ACTIVE flag SHOULD be
+	 * cleared. The CYCLING_CHANGEOVER flag SHOULD also be cleared for
+	 * all destinations.
+	 */
+	if (TSN_lte(primary->cacc.next_tsn_at_change, sack_ctsn)) {
+		primary->cacc.changeover_active = 0;
+		list_for_each(pos, transport_list) {
+			transport = list_entry(pos, struct sctp_transport,
+					transports);
+			transport->cacc.cycling_changeover = 0;
+		}
+	}
+
+	/*
+	 * SFR-CACC algorithm:
+	 * 2) If the SACK contains gap acks and the flag CHANGEOVER_ACTIVE
+	 * is set the receiver of the SACK MUST take the following actions:
+	 *
+	 * A) Initialize the cacc_saw_newack to 0 for all destination
+	 * addresses.
+	 */
+	if (sack->num_gap_ack_blocks > 0 &&
+	    primary->cacc.changeover_active) {
+		list_for_each(pos, transport_list) {
+			transport = list_entry(pos, struct sctp_transport,
+					transports);
+			transport->cacc.cacc_saw_newack = 0;
+		}
+	}
+
+	/* Get the highest TSN in the sack. */
+	highest_tsn = sack_ctsn;
+	if (sack->num_gap_ack_blocks)
+		highest_tsn +=
+		    ntohs(frags[ntohs(sack->num_gap_ack_blocks) - 1].gab.end);
+
+	if (TSN_lt(asoc->highest_sacked, highest_tsn)) {
+		highest_new_tsn = highest_tsn;
+		asoc->highest_sacked = highest_tsn;
+	} else {
+		highest_new_tsn = sctp_highest_new_tsn(sack, asoc);
+	}
+
+	/* Run through the retransmit queue.  Credit bytes received
+	 * and free those chunks that we can.
+	 */
+	sctp_check_transmitted(q, &q->retransmit, NULL, sack, highest_new_tsn);
+	sctp_mark_missing(q, &q->retransmit, NULL, highest_new_tsn, 0);
+
+	/* Run through the transmitted queue.
+	 * Credit bytes received and free those chunks which we can.
+	 *
+	 * This is a MASSIVE candidate for optimization.
+	 */
+	list_for_each(pos, transport_list) {
+		transport  = list_entry(pos, struct sctp_transport,
+					transports);
+		sctp_check_transmitted(q, &transport->transmitted,
+				       transport, sack, highest_new_tsn);
+		/*
+		 * SFR-CACC algorithm:
+		 * C) Let count_of_newacks be the number of
+		 * destinations for which cacc_saw_newack is set.
+		 */
+		if (transport->cacc.cacc_saw_newack)
+			count_of_newacks ++;
+	}
+
+	list_for_each(pos, transport_list) {
+		transport  = list_entry(pos, struct sctp_transport,
+					transports);
+		sctp_mark_missing(q, &transport->transmitted, transport,
+				  highest_new_tsn, count_of_newacks);
+	}
+
+	/* Move the Cumulative TSN Ack Point if appropriate.  */
+	if (TSN_lt(asoc->ctsn_ack_point, sack_ctsn))
+		asoc->ctsn_ack_point = sack_ctsn;
+
+	/* Update unack_data field in the assoc. */
+	sctp_sack_update_unack_data(asoc, sack);
+
+	ctsn = asoc->ctsn_ack_point;
+
+	/* Throw away stuff rotting on the sack queue.  */
+	list_for_each_safe(lchunk, temp, &q->sacked) {
+		tchunk = list_entry(lchunk, struct sctp_chunk,
+				    transmitted_list);
+		tsn = ntohl(tchunk->subh.data_hdr->tsn);
+		if (TSN_lte(tsn, ctsn))
+			sctp_chunk_free(tchunk);
+	}
+
+	/* ii) Set rwnd equal to the newly received a_rwnd minus the
+	 *     number of bytes still outstanding after processing the
+	 *     Cumulative TSN Ack and the Gap Ack Blocks.
+	 */
+
+	sack_a_rwnd = ntohl(sack->a_rwnd);
+	outstanding = q->outstanding_bytes;
+
+	if (outstanding < sack_a_rwnd)
+		sack_a_rwnd -= outstanding;
+	else
+		sack_a_rwnd = 0;
+
+	asoc->peer.rwnd = sack_a_rwnd;
+
+	sctp_generate_fwdtsn(q, sack_ctsn);
+
+	SCTP_DEBUG_PRINTK("%s: sack Cumulative TSN Ack is 0x%x.\n",
+			  __FUNCTION__, sack_ctsn);
+	SCTP_DEBUG_PRINTK("%s: Cumulative TSN Ack of association, "
+			  "%p is 0x%x. Adv peer ack point: 0x%x\n",
+			  __FUNCTION__, asoc, ctsn, asoc->adv_peer_ack_point);
+
+	/* See if all chunks are acked.
+	 * Make sure the empty queue handler will get run later.
+	 */
+	q->empty = skb_queue_empty(&q->out) && skb_queue_empty(&q->control) &&
+			list_empty(&q->retransmit);
+	if (!q->empty)
+		goto finish;
+
+	list_for_each(pos, transport_list) {
+		transport  = list_entry(pos, struct sctp_transport,
+					transports);
+		q->empty = q->empty && list_empty(&transport->transmitted);
+		if (!q->empty)
+			goto finish;
+	}
+
+	SCTP_DEBUG_PRINTK("sack queue is empty.\n");
+finish:
+	return q->empty;
+}
+
+/* Is the outqueue empty?  */
+int sctp_outq_is_empty(const struct sctp_outq *q)
+{
+	return q->empty;
+}
+
+/********************************************************************
+ * 2nd Level Abstractions
+ ********************************************************************/
+
+/* Go through a transport's transmitted list or the association's retransmit
+ * list and move chunks that are acked by the Cumulative TSN Ack to q->sacked.
+ * The retransmit list will not have an associated transport.
+ *
+ * I added coherent debug information output.	--xguo
+ *
+ * Instead of printing 'sacked' or 'kept' for each TSN on the
+ * transmitted_queue, we print a range: SACKED: TSN1-TSN2, TSN3, TSN4-TSN5.
+ * KEPT TSN6-TSN7, etc.
+ */
+static void sctp_check_transmitted(struct sctp_outq *q,
+				   struct list_head *transmitted_queue,
+				   struct sctp_transport *transport,
+				   struct sctp_sackhdr *sack,
+				   __u32 highest_new_tsn_in_sack)
+{
+	struct list_head *lchunk;
+	struct sctp_chunk *tchunk;
+	struct list_head tlist;
+	__u32 tsn;
+	__u32 sack_ctsn;
+	__u32 rtt;
+	__u8 restart_timer = 0;
+	int bytes_acked = 0;
+
+	/* These state variables are for coherent debug output. --xguo */
+
+#if SCTP_DEBUG
+	__u32 dbg_ack_tsn = 0;	/* An ACKed TSN range starts here... */
+	__u32 dbg_last_ack_tsn = 0;  /* ...and finishes here.	     */
+	__u32 dbg_kept_tsn = 0;	/* An un-ACKed range starts here...  */
+	__u32 dbg_last_kept_tsn = 0; /* ...and finishes here.	     */
+
+	/* 0 : The last TSN was ACKed.
+	 * 1 : The last TSN was NOT ACKed (i.e. KEPT).
+	 * -1: We need to initialize.
+	 */
+	int dbg_prt_state = -1;
+#endif /* SCTP_DEBUG */
+
+	sack_ctsn = ntohl(sack->cum_tsn_ack);
+
+	INIT_LIST_HEAD(&tlist);
+
+	/* The while loop will skip empty transmitted queues. */
+	while (NULL != (lchunk = sctp_list_dequeue(transmitted_queue))) {
+		tchunk = list_entry(lchunk, struct sctp_chunk,
+				    transmitted_list);
+
+		if (sctp_chunk_abandoned(tchunk)) {
+			/* Move the chunk to abandoned list. */
+			sctp_insert_list(&q->abandoned, lchunk);
+			continue;
+		}
+
+		tsn = ntohl(tchunk->subh.data_hdr->tsn);
+		if (sctp_acked(sack, tsn)) {
+			/* If this queue is the retransmit queue, the
+			 * retransmit timer has already reclaimed
+			 * the outstanding bytes for this chunk, so only
+			 * count bytes associated with a transport.
+			 */
+			if (transport) {
+				/* If this chunk is being used for RTT
+				 * measurement, calculate the RTT and update
+				 * the RTO using this value.
+				 *
+				 * 6.3.1 C5) Karn's algorithm: RTT measurements
+				 * MUST NOT be made using packets that were
+				 * retransmitted (and thus for which it is
+				 * ambiguous whether the reply was for the
+				 * first instance of the packet or a later
+				 * instance).
+				 */
+			   	if (!tchunk->tsn_gap_acked &&
+				    !tchunk->resent &&
+				    tchunk->rtt_in_progress) {
+					rtt = jiffies - tchunk->sent_at;
+					sctp_transport_update_rto(transport,
+								  rtt);
+				}
+			}
+                        if (TSN_lte(tsn, sack_ctsn)) {
+				/* RFC 2960  6.3.2 Retransmission Timer Rules
+				 *
+				 * R3) Whenever a SACK is received
+				 * that acknowledges the DATA chunk
+				 * with the earliest outstanding TSN
+				 * for that address, restart T3-rtx
+				 * timer for that address with its
+				 * current RTO.
+				 */
+				restart_timer = 1;
+
+				if (!tchunk->tsn_gap_acked) {
+					tchunk->tsn_gap_acked = 1;
+					bytes_acked += sctp_data_size(tchunk);
+					/*
+					 * SFR-CACC algorithm:
+					 * 2) If the SACK contains gap acks
+					 * and the flag CHANGEOVER_ACTIVE is
+					 * set the receiver of the SACK MUST
+					 * take the following action:
+					 *
+					 * B) For each TSN t being acked that
+					 * has not been acked in any SACK so
+					 * far, set cacc_saw_newack to 1 for
+					 * the destination that the TSN was
+					 * sent to.
+					 */
+					if (transport &&
+					    sack->num_gap_ack_blocks &&
+					    q->asoc->peer.primary_path->cacc.
+					    changeover_active)
+						transport->cacc.cacc_saw_newack
+							= 1;
+				}
+
+				list_add_tail(&tchunk->transmitted_list,
+					      &q->sacked);
+			} else {
+				/* RFC2960 7.2.4, sctpimpguide-05 2.8.2
+				 * M2) Each time a SACK arrives reporting
+				 * 'Stray DATA chunk(s)' record the highest TSN
+				 * reported as newly acknowledged, call this
+				 * value 'HighestTSNinSack'. A newly
+				 * acknowledged DATA chunk is one not
+				 * previously acknowledged in a SACK.
+				 *
+				 * When the SCTP sender of data receives a SACK
+				 * chunk that acknowledges, for the first time,
+				 * the receipt of a DATA chunk, all the still
+				 * unacknowledged DATA chunks whose TSN is
+				 * older than that newly acknowledged DATA
+				 * chunk, are qualified as 'Stray DATA chunks'.
+				 */
+				if (!tchunk->tsn_gap_acked) {
+					tchunk->tsn_gap_acked = 1;
+					bytes_acked += sctp_data_size(tchunk);
+				}
+				list_add_tail(lchunk, &tlist);
+			}
+
+#if SCTP_DEBUG
+			switch (dbg_prt_state) {
+			case 0:	/* last TSN was ACKed */
+				if (dbg_last_ack_tsn + 1 == tsn) {
+					/* This TSN belongs to the
+					 * current ACK range.
+					 */
+					break;
+				}
+
+				if (dbg_last_ack_tsn != dbg_ack_tsn) {
+					/* Display the end of the
+					 * current range.
+					 */
+					SCTP_DEBUG_PRINTK("-%08x",
+							  dbg_last_ack_tsn);
+				}
+
+				/* Start a new range.  */
+				SCTP_DEBUG_PRINTK(",%08x", tsn);
+				dbg_ack_tsn = tsn;
+				break;
+
+			case 1:	/* The last TSN was NOT ACKed. */
+				if (dbg_last_kept_tsn != dbg_kept_tsn) {
+					/* Display the end of current range. */
+					SCTP_DEBUG_PRINTK("-%08x",
+							  dbg_last_kept_tsn);
+				}
+
+				SCTP_DEBUG_PRINTK("\n");
+
+				/* FALL THROUGH... */
+			default:
+				/* This is the first-ever TSN we examined.  */
+				/* Start a new range of ACK-ed TSNs.  */
+				SCTP_DEBUG_PRINTK("ACKed: %08x", tsn);
+				dbg_prt_state = 0;
+				dbg_ack_tsn = tsn;
+			};
+
+			dbg_last_ack_tsn = tsn;
+#endif /* SCTP_DEBUG */
+
+		} else {
+			if (tchunk->tsn_gap_acked) {
+				SCTP_DEBUG_PRINTK("%s: Receiver reneged on "
+						  "data TSN: 0x%x\n",
+						  __FUNCTION__,
+						  tsn);
+				tchunk->tsn_gap_acked = 0;
+
+				bytes_acked -= sctp_data_size(tchunk);
+
+				/* RFC 2960 6.3.2 Retransmission Timer Rules
+				 *
+				 * R4) Whenever a SACK is received missing a
+				 * TSN that was previously acknowledged via a
+				 * Gap Ack Block, start T3-rtx for the
+				 * destination address to which the DATA
+				 * chunk was originally
+				 * transmitted if it is not already running.
+				 */
+				restart_timer = 1;
+			}
+
+			list_add_tail(lchunk, &tlist);
+
+#if SCTP_DEBUG
+			/* See the above comments on ACK-ed TSNs. */
+			switch (dbg_prt_state) {
+			case 1:
+				if (dbg_last_kept_tsn + 1 == tsn)
+					break;
+
+				if (dbg_last_kept_tsn != dbg_kept_tsn)
+					SCTP_DEBUG_PRINTK("-%08x",
+							  dbg_last_kept_tsn);
+
+				SCTP_DEBUG_PRINTK(",%08x", tsn);
+				dbg_kept_tsn = tsn;
+				break;
+
+			case 0:
+				if (dbg_last_ack_tsn != dbg_ack_tsn)
+					SCTP_DEBUG_PRINTK("-%08x",
+							  dbg_last_ack_tsn);
+				SCTP_DEBUG_PRINTK("\n");
+
+				/* FALL THROUGH... */
+			default:
+				SCTP_DEBUG_PRINTK("KEPT: %08x",tsn);
+				dbg_prt_state = 1;
+				dbg_kept_tsn = tsn;
+			};
+
+			dbg_last_kept_tsn = tsn;
+#endif /* SCTP_DEBUG */
+		}
+	}
+
+#if SCTP_DEBUG
+	/* Finish off the last range, displaying its ending TSN.  */
+	switch (dbg_prt_state) {
+	case 0:
+		if (dbg_last_ack_tsn != dbg_ack_tsn) {
+			SCTP_DEBUG_PRINTK("-%08x\n", dbg_last_ack_tsn);
+		} else {
+			SCTP_DEBUG_PRINTK("\n");
+		}
+	break;
+
+	case 1:
+		if (dbg_last_kept_tsn != dbg_kept_tsn) {
+			SCTP_DEBUG_PRINTK("-%08x\n", dbg_last_kept_tsn);
+		} else {
+			SCTP_DEBUG_PRINTK("\n");
+		}
+	};
+#endif /* SCTP_DEBUG */
+	if (transport) {
+		if (bytes_acked) {
+			/* 8.2. When an outstanding TSN is acknowledged,
+			 * the endpoint shall clear the error counter of
+			 * the destination transport address to which the
+			 * DATA chunk was last sent.
+			 * The association's overall error counter is
+			 * also cleared.
+			 */
+			transport->error_count = 0;
+			transport->asoc->overall_error_count = 0;
+
+			/* Mark the destination transport address as
+			 * active if it is not so marked.
+			 */
+			if (!transport->active) {
+				sctp_assoc_control_transport(
+					transport->asoc,
+					transport,
+					SCTP_TRANSPORT_UP,
+					SCTP_RECEIVED_SACK);
+			}
+
+			sctp_transport_raise_cwnd(transport, sack_ctsn,
+						  bytes_acked);
+
+			transport->flight_size -= bytes_acked;
+			q->outstanding_bytes -= bytes_acked;
+		} else {
+			/* RFC 2960 6.1, sctpimpguide-06 2.15.2
+			 * When a sender is doing zero window probing, it
+			 * should not timeout the association if it continues
+			 * to receive new packets from the receiver. The
+			 * reason is that the receiver MAY keep its window
+			 * closed for an indefinite time.
+			 * A sender is doing zero window probing when the
+			 * receiver's advertised window is zero, and there is
+			 * only one data chunk in flight to the receiver.
+			 */
+			if (!q->asoc->peer.rwnd &&
+			    !list_empty(&tlist) &&
+			    (sack_ctsn+2 == q->asoc->next_tsn)) {
+				SCTP_DEBUG_PRINTK("%s: SACK received for zero "
+						  "window probe: %u\n",
+						  __FUNCTION__, sack_ctsn);
+				q->asoc->overall_error_count = 0;
+				transport->error_count = 0;
+			}
+		}
+
+		/* RFC 2960 6.3.2 Retransmission Timer Rules
+		 *
+		 * R2) Whenever all outstanding data sent to an address have
+		 * been acknowledged, turn off the T3-rtx timer of that
+		 * address.
+		 */
+		if (!transport->flight_size) {
+			if (timer_pending(&transport->T3_rtx_timer) &&
+			    del_timer(&transport->T3_rtx_timer)) {
+				sctp_transport_put(transport);
+			}
+		} else if (restart_timer) {
+			if (!mod_timer(&transport->T3_rtx_timer,
+				       jiffies + transport->rto))
+				sctp_transport_hold(transport);
+		}
+	}
+
+	list_splice(&tlist, transmitted_queue);
+}
+
+/* Mark chunks as missing and consequently may get retransmitted. */
+static void sctp_mark_missing(struct sctp_outq *q,
+			      struct list_head *transmitted_queue,
+			      struct sctp_transport *transport,
+			      __u32 highest_new_tsn_in_sack,
+			      int count_of_newacks)
+{
+	struct sctp_chunk *chunk;
+	struct list_head *pos;
+	__u32 tsn;
+	char do_fast_retransmit = 0;
+	struct sctp_transport *primary = q->asoc->peer.primary_path;
+
+	list_for_each(pos, transmitted_queue) {
+
+		chunk = list_entry(pos, struct sctp_chunk, transmitted_list);
+		tsn = ntohl(chunk->subh.data_hdr->tsn);
+
+		/* RFC 2960 7.2.4, sctpimpguide-05 2.8.2 M3) Examine all
+		 * 'Unacknowledged TSN's', if the TSN number of an
+		 * 'Unacknowledged TSN' is smaller than the 'HighestTSNinSack'
+		 * value, increment the 'TSN.Missing.Report' count on that
+		 * chunk if it has NOT been fast retransmitted or marked for
+		 * fast retransmit already.
+		 */
+		if (!chunk->fast_retransmit &&
+		    !chunk->tsn_gap_acked &&
+		    TSN_lt(tsn, highest_new_tsn_in_sack)) {
+
+			/* SFR-CACC may require us to skip marking
+			 * this chunk as missing.
+			 */
+			if (!transport || !sctp_cacc_skip(primary, transport,
+					    count_of_newacks, tsn)) {
+				chunk->tsn_missing_report++;
+
+				SCTP_DEBUG_PRINTK(
+					"%s: TSN 0x%x missing counter: %d\n",
+					__FUNCTION__, tsn,
+					chunk->tsn_missing_report);
+			}
+		}
+		/*
+		 * M4) If any DATA chunk is found to have a
+		 * 'TSN.Missing.Report'
+		 * value larger than or equal to 4, mark that chunk for
+		 * retransmission and start the fast retransmit procedure.
+		 */
+
+		if (chunk->tsn_missing_report >= 4) {
+			chunk->fast_retransmit = 1;
+			do_fast_retransmit = 1;
+		}
+	}
+
+	if (transport) {
+		if (do_fast_retransmit)
+			sctp_retransmit(q, transport, SCTP_RTXR_FAST_RTX);
+
+		SCTP_DEBUG_PRINTK("%s: transport: %p, cwnd: %d, "
+				  "ssthresh: %d, flight_size: %d, pba: %d\n",
+				  __FUNCTION__, transport, transport->cwnd,
+			  	  transport->ssthresh, transport->flight_size,
+				  transport->partial_bytes_acked);
+	}
+}
+
+/* Is the given TSN acked by this packet?  */
+static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn)
+{
+	int i;
+	sctp_sack_variable_t *frags;
+	__u16 gap;
+	__u32 ctsn = ntohl(sack->cum_tsn_ack);
+
+        if (TSN_lte(tsn, ctsn))
+		goto pass;
+
+	/* 3.3.4 Selective Acknowledgement (SACK) (3):
+	 *
+	 * Gap Ack Blocks:
+	 *  These fields contain the Gap Ack Blocks. They are repeated
+	 *  for each Gap Ack Block up to the number of Gap Ack Blocks
+	 *  defined in the Number of Gap Ack Blocks field. All DATA
+	 *  chunks with TSNs greater than or equal to (Cumulative TSN
+	 *  Ack + Gap Ack Block Start) and less than or equal to
+	 *  (Cumulative TSN Ack + Gap Ack Block End) of each Gap Ack
+	 *  Block are assumed to have been received correctly.
+	 */
+
+	frags = sack->variable;
+	gap = tsn - ctsn;
+	for (i = 0; i < ntohs(sack->num_gap_ack_blocks); ++i) {
+		if (TSN_lte(ntohs(frags[i].gab.start), gap) &&
+		    TSN_lte(gap, ntohs(frags[i].gab.end)))
+			goto pass;
+	}
+
+	return 0;
+pass:
+	return 1;
+}
+
+static inline int sctp_get_skip_pos(struct sctp_fwdtsn_skip *skiplist,
+				    int nskips, __u16 stream)
+{
+	int i;
+
+	for (i = 0; i < nskips; i++) {
+		if (skiplist[i].stream == stream)
+			return i;
+	}
+	return i;
+}
+
+/* Create and add a fwdtsn chunk to the outq's control queue if needed. */
+static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 ctsn)
+{
+	struct sctp_association *asoc = q->asoc;
+	struct sctp_chunk *ftsn_chunk = NULL;
+	struct sctp_fwdtsn_skip ftsn_skip_arr[10];
+	int nskips = 0;
+	int skip_pos = 0;
+	__u32 tsn;
+	struct sctp_chunk *chunk;
+	struct list_head *lchunk, *temp;
+
+	/* PR-SCTP C1) Let SackCumAck be the Cumulative TSN ACK carried in the
+	 * received SACK.
+	 * 
+	 * If (Advanced.Peer.Ack.Point < SackCumAck), then update
+	 * Advanced.Peer.Ack.Point to be equal to SackCumAck.
+	 */
+	if (TSN_lt(asoc->adv_peer_ack_point, ctsn))
+		asoc->adv_peer_ack_point = ctsn;
+
+	/* PR-SCTP C2) Try to further advance the "Advanced.Peer.Ack.Point"
+	 * locally, that is, to move "Advanced.Peer.Ack.Point" up as long as
+	 * the chunk next in the out-queue space is marked as "abandoned" as
+	 * shown in the following example:
+	 *
+	 * Assuming that a SACK arrived with the Cumulative TSN ACK 102
+	 * and the Advanced.Peer.Ack.Point is updated to this value:
+	 * 
+	 *   out-queue at the end of  ==>   out-queue after Adv.Ack.Point
+	 *   normal SACK processing           local advancement
+	 *                ...                           ...
+	 *   Adv.Ack.Pt-> 102 acked                     102 acked
+	 *                103 abandoned                 103 abandoned
+	 *                104 abandoned     Adv.Ack.P-> 104 abandoned
+	 *                105                           105
+	 *                106 acked                     106 acked
+	 *                ...                           ...
+	 *
+	 * In this example, the data sender successfully advanced the
+	 * "Advanced.Peer.Ack.Point" from 102 to 104 locally.
+	 */
+	list_for_each_safe(lchunk, temp, &q->abandoned) {
+		chunk = list_entry(lchunk, struct sctp_chunk,
+					transmitted_list);
+		tsn = ntohl(chunk->subh.data_hdr->tsn);
+
+		/* Remove any chunks in the abandoned queue that are acked by
+		 * the ctsn.
+		 */ 
+		if (TSN_lte(tsn, ctsn)) {
+			list_del_init(lchunk);
+			if (!chunk->tsn_gap_acked) {
+			chunk->transport->flight_size -=
+						 sctp_data_size(chunk);
+			q->outstanding_bytes -= sctp_data_size(chunk);
+			}
+			sctp_chunk_free(chunk);
+		} else {
+			if (TSN_lte(tsn, asoc->adv_peer_ack_point+1)) {
+				asoc->adv_peer_ack_point = tsn;
+				if (chunk->chunk_hdr->flags &
+					 SCTP_DATA_UNORDERED)
+					continue;
+				skip_pos = sctp_get_skip_pos(&ftsn_skip_arr[0],
+						nskips,
+						chunk->subh.data_hdr->stream);
+				ftsn_skip_arr[skip_pos].stream =
+					chunk->subh.data_hdr->stream;
+				ftsn_skip_arr[skip_pos].ssn =
+					 chunk->subh.data_hdr->ssn;
+				if (skip_pos == nskips)
+					nskips++;
+				if (nskips == 10)
+					break;
+			} else
+				break;
+		}
+	}
+
+	/* PR-SCTP C3) If, after step C1 and C2, the "Advanced.Peer.Ack.Point"
+	 * is greater than the Cumulative TSN ACK carried in the received
+	 * SACK, the data sender MUST send the data receiver a FORWARD TSN
+	 * chunk containing the latest value of the
+	 * "Advanced.Peer.Ack.Point".
+	 *
+	 * C4) For each "abandoned" TSN the sender of the FORWARD TSN SHOULD
+	 * list each stream and sequence number in the forwarded TSN. This
+	 * information will enable the receiver to easily find any
+	 * stranded TSN's waiting on stream reorder queues. Each stream
+	 * SHOULD only be reported once; this means that if multiple
+	 * abandoned messages occur in the same stream then only the
+	 * highest abandoned stream sequence number is reported. If the
+	 * total size of the FORWARD TSN does NOT fit in a single MTU then
+	 * the sender of the FORWARD TSN SHOULD lower the
+	 * Advanced.Peer.Ack.Point to the last TSN that will fit in a
+	 * single MTU.
+	 */
+	if (asoc->adv_peer_ack_point > ctsn)
+		ftsn_chunk = sctp_make_fwdtsn(asoc, asoc->adv_peer_ack_point,
+					      nskips, &ftsn_skip_arr[0]); 
+
+	if (ftsn_chunk) {
+		__skb_queue_tail(&q->control, (struct sk_buff *)ftsn_chunk);
+		SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+	}
+}
diff --git a/net/sctp/primitive.c b/net/sctp/primitive.c
new file mode 100644
index 00000000000..3a7ebfcc1fd
--- /dev/null
+++ b/net/sctp/primitive.c
@@ -0,0 +1,219 @@
+/* SCTP kernel reference Implementation
+ * Copyright (c) 1999-2000 Cisco, Inc.
+ * Copyright (c) 1999-2001 Motorola, Inc.
+ *
+ * This file is part of the SCTP kernel reference Implementation
+ *
+ * These functions implement the SCTP primitive functions from Section 10.
+ *
+ * Note that the descriptions from the specification are USER level
+ * functions--this file is the functions which populate the struct proto
+ * for SCTP which is the BOTTOM of the sockets interface.
+ *
+ * The SCTP reference implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * The SCTP reference implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *                 ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email address(es):
+ *    lksctp developers <lksctp-developers@lists.sourceforge.net>
+ *
+ * Or submit a bug report through the following website:
+ *    http://www.sf.net/projects/lksctp
+ *
+ * Written or modified by:
+ *    La Monte H.P. Yarroll <piggy@acm.org>
+ *    Narasimha Budihal     <narasimha@refcode.org>
+ *    Karl Knutson          <karl@athena.chicago.il.us>
+ *    Ardelle Fan	    <ardelle.fan@intel.com>
+ *    Kevin Gao             <kevin.gao@intel.com>
+ *
+ * Any bugs reported given to us we will try to fix... any fixes shared will
+ * be incorporated into the next SCTP release.
+ */
+
+#include <linux/types.h>
+#include <linux/list.h> /* For struct list_head */
+#include <linux/socket.h>
+#include <linux/ip.h>
+#include <linux/time.h> /* For struct timeval */
+#include <net/sock.h>
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+
+#define DECLARE_PRIMITIVE(name) \
+/* This is called in the code as sctp_primitive_ ## name.  */ \
+int sctp_primitive_ ## name(struct sctp_association *asoc, \
+			    void *arg) { \
+	int error = 0; \
+	sctp_event_t event_type; sctp_subtype_t subtype; \
+	sctp_state_t state; \
+	struct sctp_endpoint *ep; \
+	\
+	event_type = SCTP_EVENT_T_PRIMITIVE; \
+	subtype = SCTP_ST_PRIMITIVE(SCTP_PRIMITIVE_ ## name); \
+	state = asoc ? asoc->state : SCTP_STATE_CLOSED; \
+	ep = asoc ? asoc->ep : NULL; \
+	\
+	error = sctp_do_sm(event_type, subtype, state, ep, asoc, \
+			   arg, GFP_KERNEL); \
+ 	return error; \
+}
+
+/* 10.1 ULP-to-SCTP
+ * B) Associate
+ *
+ * Format: ASSOCIATE(local SCTP instance name, destination transport addr,
+ *         outbound stream count)
+ * -> association id [,destination transport addr list] [,outbound stream
+ *    count]
+ *
+ * This primitive allows the upper layer to initiate an association to a
+ * specific peer endpoint.
+ *
+ * This version assumes that asoc is fully populated with the initial
+ * parameters.  We then return a traditional kernel indicator of
+ * success or failure.
+ */
+
+/* This is called in the code as sctp_primitive_ASSOCIATE.  */
+
+DECLARE_PRIMITIVE(ASSOCIATE)
+
+/* 10.1 ULP-to-SCTP
+ * C) Shutdown
+ *
+ * Format: SHUTDOWN(association id)
+ * -> result
+ *
+ * Gracefully closes an association. Any locally queued user data
+ * will be delivered to the peer. The association will be terminated only
+ * after the peer acknowledges all the SCTP packets sent.  A success code
+ * will be returned on successful termination of the association. If
+ * attempting to terminate the association results in a failure, an error
+ * code shall be returned.
+ */
+
+DECLARE_PRIMITIVE(SHUTDOWN);
+
+/* 10.1 ULP-to-SCTP
+ * C) Abort
+ *
+ * Format: Abort(association id [, cause code])
+ * -> result
+ *
+ * Ungracefully closes an association. Any locally queued user data
+ * will be discarded and an ABORT chunk is sent to the peer. A success
+ * code will be returned on successful abortion of the association. If
+ * attempting to abort the association results in a failure, an error
+ * code shall be returned.
+ */
+
+DECLARE_PRIMITIVE(ABORT);
+
+/* 10.1 ULP-to-SCTP
+ * E) Send
+ *
+ * Format: SEND(association id, buffer address, byte count [,context]
+ *         [,stream id] [,life time] [,destination transport address]
+ *         [,unorder flag] [,no-bundle flag] [,payload protocol-id] )
+ * -> result
+ *
+ * This is the main method to send user data via SCTP.
+ *
+ * Mandatory attributes:
+ *
+ *  o association id - local handle to the SCTP association
+ *
+ *  o buffer address - the location where the user message to be
+ *    transmitted is stored;
+ *
+ *  o byte count - The size of the user data in number of bytes;
+ *
+ * Optional attributes:
+ *
+ *  o context - an optional 32 bit integer that will be carried in the
+ *    sending failure notification to the ULP if the transportation of
+ *    this User Message fails.
+ *
+ *  o stream id - to indicate which stream to send the data on. If not
+ *    specified, stream 0 will be used.
+ *
+ *  o life time - specifies the life time of the user data. The user data
+ *    will not be sent by SCTP after the life time expires. This
+ *    parameter can be used to avoid efforts to transmit stale
+ *    user messages. SCTP notifies the ULP if the data cannot be
+ *    initiated to transport (i.e. sent to the destination via SCTP's
+ *    send primitive) within the life time variable. However, the
+ *    user data will be transmitted if SCTP has attempted to transmit a
+ *    chunk before the life time expired.
+ *
+ *  o destination transport address - specified as one of the destination
+ *    transport addresses of the peer endpoint to which this packet
+ *    should be sent. Whenever possible, SCTP should use this destination
+ *    transport address for sending the packets, instead of the current
+ *    primary path.
+ *
+ *  o unorder flag - this flag, if present, indicates that the user
+ *    would like the data delivered in an unordered fashion to the peer
+ *    (i.e., the U flag is set to 1 on all DATA chunks carrying this
+ *    message).
+ *
+ *  o no-bundle flag - instructs SCTP not to bundle this user data with
+ *    other outbound DATA chunks. SCTP MAY still bundle even when
+ *    this flag is present, when faced with network congestion.
+ *
+ *  o payload protocol-id - A 32 bit unsigned integer that is to be
+ *    passed to the peer indicating the type of payload protocol data
+ *    being transmitted. This value is passed as opaque data by SCTP.
+ */
+
+DECLARE_PRIMITIVE(SEND);
+
+/* 10.1 ULP-to-SCTP
+ * J) Request Heartbeat
+ *
+ * Format: REQUESTHEARTBEAT(association id, destination transport address)
+ *
+ * -> result
+ *
+ * Instructs the local endpoint to perform a HeartBeat on the specified
+ * destination transport address of the given association. The returned
+ * result should indicate whether the transmission of the HEARTBEAT
+ * chunk to the destination address is successful.
+ *
+ * Mandatory attributes:
+ *
+ * o association id - local handle to the SCTP association
+ *
+ * o destination transport address - the transport address of the
+ *   association on which a heartbeat should be issued.
+ */
+
+DECLARE_PRIMITIVE(REQUESTHEARTBEAT);
+
+/* ADDIP
+* 3.1.1 Address Configuration Change Chunk (ASCONF)
+* 
+* This chunk is used to communicate to the remote endpoint one of the
+* configuration change requests that MUST be acknowledged.  The
+* information carried in the ASCONF Chunk uses the form of a
+* Type-Length-Value (TLV), as described in "3.2.1 Optional/
+* Variable-length Parameter Format" in RFC2960 [5], forall variable
+* parameters.
+*/
+
+DECLARE_PRIMITIVE(ASCONF);
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
new file mode 100644
index 00000000000..e42fd8c2916
--- /dev/null
+++ b/net/sctp/proc.c
@@ -0,0 +1,288 @@
+/* SCTP kernel reference Implementation
+ * Copyright (c) 2003 International Business Machines, Corp.
+ *
+ * This file is part of the SCTP kernel reference Implementation
+ *
+ * The SCTP reference implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * The SCTP reference implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *                 ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email address(es):
+ *    lksctp developers <lksctp-developers@lists.sourceforge.net>
+ *
+ * Or submit a bug report through the following website:
+ *    http://www.sf.net/projects/lksctp
+ *
+ * Written or modified by:
+ *    Sridhar Samudrala <sri@us.ibm.com>
+ *
+ * Any bugs reported given to us we will try to fix... any fixes shared will
+ * be incorporated into the next SCTP release.
+ */
+
+#include <linux/types.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <net/sctp/sctp.h>
+
+static struct snmp_mib sctp_snmp_list[] = {
+	SNMP_MIB_ITEM("SctpCurrEstab", SCTP_MIB_CURRESTAB),
+	SNMP_MIB_ITEM("SctpActiveEstabs", SCTP_MIB_ACTIVEESTABS),
+	SNMP_MIB_ITEM("SctpPassiveEstabs", SCTP_MIB_PASSIVEESTABS),
+	SNMP_MIB_ITEM("SctpAborteds", SCTP_MIB_ABORTEDS),
+	SNMP_MIB_ITEM("SctpShutdowns", SCTP_MIB_SHUTDOWNS),
+	SNMP_MIB_ITEM("SctpOutOfBlues", SCTP_MIB_OUTOFBLUES),
+	SNMP_MIB_ITEM("SctpChecksumErrors", SCTP_MIB_CHECKSUMERRORS),
+	SNMP_MIB_ITEM("SctpOutCtrlChunks", SCTP_MIB_OUTCTRLCHUNKS),
+	SNMP_MIB_ITEM("SctpOutOrderChunks", SCTP_MIB_OUTORDERCHUNKS),
+	SNMP_MIB_ITEM("SctpOutUnorderChunks", SCTP_MIB_OUTUNORDERCHUNKS),
+	SNMP_MIB_ITEM("SctpInCtrlChunks", SCTP_MIB_INCTRLCHUNKS),
+	SNMP_MIB_ITEM("SctpInOrderChunks", SCTP_MIB_INORDERCHUNKS),
+	SNMP_MIB_ITEM("SctpInUnorderChunks", SCTP_MIB_INUNORDERCHUNKS),
+	SNMP_MIB_ITEM("SctpFragUsrMsgs", SCTP_MIB_FRAGUSRMSGS),
+	SNMP_MIB_ITEM("SctpReasmUsrMsgs", SCTP_MIB_REASMUSRMSGS),
+	SNMP_MIB_ITEM("SctpOutSCTPPacks", SCTP_MIB_OUTSCTPPACKS),
+	SNMP_MIB_ITEM("SctpInSCTPPacks", SCTP_MIB_INSCTPPACKS),
+};
+
+/* Return the current value of a particular entry in the mib by adding its
+ * per cpu counters.
+ */ 
+static unsigned long
+fold_field(void *mib[], int nr)
+{
+	unsigned long res = 0;
+	int i;
+
+	for (i = 0; i < NR_CPUS; i++) {
+		if (!cpu_possible(i))
+			continue;
+		res +=
+		    *((unsigned long *) (((void *) per_cpu_ptr(mib[0], i)) +
+					 sizeof (unsigned long) * nr));
+		res +=
+		    *((unsigned long *) (((void *) per_cpu_ptr(mib[1], i)) +
+					 sizeof (unsigned long) * nr));
+	}
+	return res;
+}
+
+/* Display sctp snmp mib statistics(/proc/net/sctp/snmp). */
+static int sctp_snmp_seq_show(struct seq_file *seq, void *v)
+{
+	int i;
+
+	for (i = 0; sctp_snmp_list[i].name != NULL; i++)
+		seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name,
+			   fold_field((void **)sctp_statistics, 
+				      sctp_snmp_list[i].entry));
+
+	return 0;
+}
+
+/* Initialize the seq file operations for 'snmp' object. */
+static int sctp_snmp_seq_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, sctp_snmp_seq_show, NULL);
+}
+
+static struct file_operations sctp_snmp_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open	 = sctp_snmp_seq_open,
+	.read	 = seq_read,
+	.llseek	 = seq_lseek,
+	.release = single_release,
+};
+
+/* Set up the proc fs entry for 'snmp' object. */
+int __init sctp_snmp_proc_init(void)
+{
+	struct proc_dir_entry *p;
+
+	p = create_proc_entry("snmp", S_IRUGO, proc_net_sctp);
+	if (!p)
+		return -ENOMEM;
+
+	p->proc_fops = &sctp_snmp_seq_fops;
+
+	return 0;
+}
+
+/* Cleanup the proc fs entry for 'snmp' object. */
+void sctp_snmp_proc_exit(void)
+{
+	remove_proc_entry("snmp", proc_net_sctp);
+}
+
+/* Dump local addresses of an association/endpoint. */
+static void sctp_seq_dump_local_addrs(struct seq_file *seq, struct sctp_ep_common *epb)
+{
+	struct list_head *pos;
+	struct sctp_sockaddr_entry *laddr;
+	union sctp_addr *addr;
+	struct sctp_af *af;
+
+	list_for_each(pos, &epb->bind_addr.address_list) {
+		laddr = list_entry(pos, struct sctp_sockaddr_entry, list);
+		addr = (union sctp_addr *)&laddr->a;
+		af = sctp_get_af_specific(addr->sa.sa_family);
+		af->seq_dump_addr(seq, addr);
+	}
+}
+
+/* Dump remote addresses of an association. */
+static void sctp_seq_dump_remote_addrs(struct seq_file *seq, struct sctp_association *assoc)
+{
+	struct list_head *pos;
+	struct sctp_transport *transport;
+	union sctp_addr *addr;
+	struct sctp_af *af;
+
+	list_for_each(pos, &assoc->peer.transport_addr_list) {
+		transport = list_entry(pos, struct sctp_transport, transports);
+		addr = (union sctp_addr *)&transport->ipaddr;
+		af = sctp_get_af_specific(addr->sa.sa_family);
+		af->seq_dump_addr(seq, addr);
+	}
+}
+
+/* Display sctp endpoints (/proc/net/sctp/eps). */
+static int sctp_eps_seq_show(struct seq_file *seq, void *v)
+{
+	struct sctp_hashbucket *head;
+	struct sctp_ep_common *epb;
+	struct sctp_endpoint *ep;
+	struct sock *sk;
+	int hash;
+
+	seq_printf(seq, " ENDPT     SOCK   STY SST HBKT LPORT LADDRS\n");
+	for (hash = 0; hash < sctp_ep_hashsize; hash++) {
+		head = &sctp_ep_hashtable[hash];
+		read_lock(&head->lock);
+		for (epb = head->chain; epb; epb = epb->next) {
+			ep = sctp_ep(epb);
+			sk = epb->sk;
+			seq_printf(seq, "%8p %8p %-3d %-3d %-4d %-5d ", ep, sk,
+				   sctp_sk(sk)->type, sk->sk_state, hash,
+				   epb->bind_addr.port);
+			sctp_seq_dump_local_addrs(seq, epb);
+			seq_printf(seq, "\n");
+		}
+		read_unlock(&head->lock);
+	}
+
+	return 0;
+}
+
+/* Initialize the seq file operations for 'eps' object. */
+static int sctp_eps_seq_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, sctp_eps_seq_show, NULL);
+}
+
+static struct file_operations sctp_eps_seq_fops = {
+	.open	 = sctp_eps_seq_open,
+	.read	 = seq_read,
+	.llseek	 = seq_lseek,
+	.release = single_release,
+};
+
+/* Set up the proc fs entry for 'eps' object. */
+int __init sctp_eps_proc_init(void)
+{
+	struct proc_dir_entry *p;
+
+	p = create_proc_entry("eps", S_IRUGO, proc_net_sctp);
+	if (!p)
+		return -ENOMEM;
+
+	p->proc_fops = &sctp_eps_seq_fops;
+
+	return 0;
+}
+
+/* Cleanup the proc fs entry for 'eps' object. */
+void sctp_eps_proc_exit(void)
+{
+	remove_proc_entry("eps", proc_net_sctp);
+}
+
+/* Display sctp associations (/proc/net/sctp/assocs). */
+static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
+{
+	struct sctp_hashbucket *head;
+	struct sctp_ep_common *epb;
+	struct sctp_association *assoc;
+	struct sock *sk;
+	int hash;
+
+	seq_printf(seq, " ASSOC     SOCK   STY SST ST HBKT LPORT RPORT "
+			"LADDRS <-> RADDRS\n");
+	for (hash = 0; hash < sctp_assoc_hashsize; hash++) {
+		head = &sctp_assoc_hashtable[hash];
+		read_lock(&head->lock);
+		for (epb = head->chain; epb; epb = epb->next) {
+			assoc = sctp_assoc(epb);
+			sk = epb->sk;
+			seq_printf(seq,
+				   "%8p %8p %-3d %-3d %-2d %-4d %-5d %-5d ",
+				   assoc, sk, sctp_sk(sk)->type, sk->sk_state,
+				   assoc->state, hash, epb->bind_addr.port,
+				   assoc->peer.port);
+			sctp_seq_dump_local_addrs(seq, epb);
+			seq_printf(seq, "<-> ");
+			sctp_seq_dump_remote_addrs(seq, assoc);
+			seq_printf(seq, "\n");
+		}
+		read_unlock(&head->lock);
+	}
+
+	return 0;
+}
+
+/* Initialize the seq file operations for 'assocs' object. */
+static int sctp_assocs_seq_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, sctp_assocs_seq_show, NULL);
+}
+
+static struct file_operations sctp_assocs_seq_fops = {
+	.open	 = sctp_assocs_seq_open,
+	.read	 = seq_read,
+	.llseek	 = seq_lseek,
+	.release = single_release,
+};
+
+/* Set up the proc fs entry for 'assocs' object. */
+int __init sctp_assocs_proc_init(void)
+{
+	struct proc_dir_entry *p;
+
+	p = create_proc_entry("assocs", S_IRUGO, proc_net_sctp);
+	if (!p)
+		return -ENOMEM;
+
+	p->proc_fops = &sctp_assocs_seq_fops;
+
+	return 0;
+}
+
+/* Cleanup the proc fs entry for 'assocs' object. */
+void sctp_assocs_proc_exit(void)
+{
+	remove_proc_entry("assocs", proc_net_sctp);
+}
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
new file mode 100644
index 00000000000..b9813cf3d91
--- /dev/null
+++ b/net/sctp/protocol.c
@@ -0,0 +1,1240 @@
+/* SCTP kernel reference Implementation
+ * (C) Copyright IBM Corp. 2001, 2004
+ * Copyright (c) 1999-2000 Cisco, Inc.
+ * Copyright (c) 1999-2001 Motorola, Inc.
+ * Copyright (c) 2001 Intel Corp.
+ * Copyright (c) 2001 Nokia, Inc.
+ * Copyright (c) 2001 La Monte H.P. Yarroll
+ *
+ * This file is part of the SCTP kernel reference Implementation
+ *
+ * Initialization/cleanup for SCTP protocol support.
+ *
+ * The SCTP reference implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * The SCTP reference implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *                 ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email address(es):
+ *    lksctp developers <lksctp-developers@lists.sourceforge.net>
+ *
+ * Or submit a bug report through the following website:
+ *    http://www.sf.net/projects/lksctp
+ *
+ * Written or modified by:
+ *    La Monte H.P. Yarroll <piggy@acm.org>
+ *    Karl Knutson <karl@athena.chicago.il.us>
+ *    Jon Grimm <jgrimm@us.ibm.com>
+ *    Sridhar Samudrala <sri@us.ibm.com>
+ *    Daisy Chang <daisyc@us.ibm.com>
+ *    Ardelle Fan <ardelle.fan@intel.com>
+ *
+ * Any bugs reported given to us we will try to fix... any fixes shared will
+ * be incorporated into the next SCTP release.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/seq_file.h>
+#include <net/protocol.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/sctp/sctp.h>
+#include <net/addrconf.h>
+#include <net/inet_common.h>
+#include <net/inet_ecn.h>
+
+/* Global data structures. */
+struct sctp_globals sctp_globals;
+struct proc_dir_entry	*proc_net_sctp;
+DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics);
+
+struct idr sctp_assocs_id;
+DEFINE_SPINLOCK(sctp_assocs_id_lock);
+
+/* This is the global socket data structure used for responding to
+ * the Out-of-the-blue (OOTB) packets.  A control sock will be created
+ * for this socket at the initialization time.
+ */
+static struct socket *sctp_ctl_socket;
+
+static struct sctp_pf *sctp_pf_inet6_specific;
+static struct sctp_pf *sctp_pf_inet_specific;
+static struct sctp_af *sctp_af_v4_specific;
+static struct sctp_af *sctp_af_v6_specific;
+
+kmem_cache_t *sctp_chunk_cachep;
+kmem_cache_t *sctp_bucket_cachep;
+
+extern int sctp_snmp_proc_init(void);
+extern int sctp_snmp_proc_exit(void);
+extern int sctp_eps_proc_init(void);
+extern int sctp_eps_proc_exit(void);
+extern int sctp_assocs_proc_init(void);
+extern int sctp_assocs_proc_exit(void);
+
+/* Return the address of the control sock. */
+struct sock *sctp_get_ctl_sock(void)
+{
+	return sctp_ctl_socket->sk;
+}
+
+/* Set up the proc fs entry for the SCTP protocol. */
+static __init int sctp_proc_init(void)
+{
+	if (!proc_net_sctp) {
+		struct proc_dir_entry *ent;
+		ent = proc_mkdir("net/sctp", NULL);
+		if (ent) {
+			ent->owner = THIS_MODULE;
+			proc_net_sctp = ent;
+		} else
+			goto out_nomem;
+	}
+
+	if (sctp_snmp_proc_init())
+		goto out_nomem;	
+	if (sctp_eps_proc_init())
+		goto out_nomem;	
+	if (sctp_assocs_proc_init())
+		goto out_nomem;	
+
+	return 0;
+
+out_nomem:
+	return -ENOMEM;
+}
+
+/* Clean up the proc fs entry for the SCTP protocol. 
+ * Note: Do not make this __exit as it is used in the init error
+ * path.
+ */
+static void sctp_proc_exit(void)
+{
+	sctp_snmp_proc_exit();
+	sctp_eps_proc_exit();
+	sctp_assocs_proc_exit();
+
+	if (proc_net_sctp) {
+		proc_net_sctp = NULL;
+		remove_proc_entry("net/sctp", NULL);
+	}
+}
+
+/* Private helper to extract ipv4 address and stash them in
+ * the protocol structure.
+ */
+static void sctp_v4_copy_addrlist(struct list_head *addrlist,
+				  struct net_device *dev)
+{
+	struct in_device *in_dev;
+	struct in_ifaddr *ifa;
+	struct sctp_sockaddr_entry *addr;
+
+	rcu_read_lock();
+	if ((in_dev = __in_dev_get(dev)) == NULL) {
+		rcu_read_unlock();
+		return;
+	}
+
+	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
+		/* Add the address to the local list.  */
+		addr = t_new(struct sctp_sockaddr_entry, GFP_ATOMIC);
+		if (addr) {
+			addr->a.v4.sin_family = AF_INET;
+			addr->a.v4.sin_port = 0;
+			addr->a.v4.sin_addr.s_addr = ifa->ifa_local;
+			list_add_tail(&addr->list, addrlist);
+		}
+	}
+
+	rcu_read_unlock();
+}
+
+/* Extract our IP addresses from the system and stash them in the
+ * protocol structure.
+ */
+static void __sctp_get_local_addr_list(void)
+{
+	struct net_device *dev;
+	struct list_head *pos;
+	struct sctp_af *af;
+
+	read_lock(&dev_base_lock);
+	for (dev = dev_base; dev; dev = dev->next) {
+		__list_for_each(pos, &sctp_address_families) {
+			af = list_entry(pos, struct sctp_af, list);
+			af->copy_addrlist(&sctp_local_addr_list, dev);
+		}
+	}
+	read_unlock(&dev_base_lock);
+}
+
+static void sctp_get_local_addr_list(void)
+{
+	unsigned long flags;
+
+	sctp_spin_lock_irqsave(&sctp_local_addr_lock, flags);
+	__sctp_get_local_addr_list();
+	sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, flags);
+}
+
+/* Free the existing local addresses.  */
+static void __sctp_free_local_addr_list(void)
+{
+	struct sctp_sockaddr_entry *addr;
+	struct list_head *pos, *temp;
+
+	list_for_each_safe(pos, temp, &sctp_local_addr_list) {
+		addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+		list_del(pos);
+		kfree(addr);
+	}
+}
+
+/* Free the existing local addresses.  */
+static void sctp_free_local_addr_list(void)
+{
+	unsigned long flags;
+
+	sctp_spin_lock_irqsave(&sctp_local_addr_lock, flags);
+	__sctp_free_local_addr_list();
+	sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, flags);
+}
+
+/* Copy the local addresses which are valid for 'scope' into 'bp'.  */
+int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope,
+			      int gfp, int copy_flags)
+{
+	struct sctp_sockaddr_entry *addr;
+	int error = 0;
+	struct list_head *pos;
+	unsigned long flags;
+
+	sctp_spin_lock_irqsave(&sctp_local_addr_lock, flags);
+	list_for_each(pos, &sctp_local_addr_list) {
+		addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+		if (sctp_in_scope(&addr->a, scope)) {
+			/* Now that the address is in scope, check to see if
+			 * the address type is really supported by the local
+			 * sock as well as the remote peer.
+			 */
+			if ((((AF_INET == addr->a.sa.sa_family) &&
+			      (copy_flags & SCTP_ADDR4_PEERSUPP))) ||
+			    (((AF_INET6 == addr->a.sa.sa_family) &&
+			      (copy_flags & SCTP_ADDR6_ALLOWED) &&
+			      (copy_flags & SCTP_ADDR6_PEERSUPP)))) {
+				error = sctp_add_bind_addr(bp, &addr->a, 
+							   GFP_ATOMIC);
+				if (error)
+					goto end_copy;
+			}
+		}
+	}
+
+end_copy:
+	sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, flags);
+	return error;
+}
+
+/* Initialize a sctp_addr from in incoming skb.  */
+static void sctp_v4_from_skb(union sctp_addr *addr, struct sk_buff *skb,
+			     int is_saddr)
+{
+	void *from;
+	__u16 *port;
+	struct sctphdr *sh;
+
+	port = &addr->v4.sin_port;
+	addr->v4.sin_family = AF_INET;
+
+	sh = (struct sctphdr *) skb->h.raw;
+	if (is_saddr) {
+		*port  = ntohs(sh->source);
+		from = &skb->nh.iph->saddr;
+	} else {
+		*port = ntohs(sh->dest);
+		from = &skb->nh.iph->daddr;
+	}
+	memcpy(&addr->v4.sin_addr.s_addr, from, sizeof(struct in_addr));
+}
+
+/* Initialize an sctp_addr from a socket. */
+static void sctp_v4_from_sk(union sctp_addr *addr, struct sock *sk)
+{
+	addr->v4.sin_family = AF_INET;
+	addr->v4.sin_port = inet_sk(sk)->num;
+	addr->v4.sin_addr.s_addr = inet_sk(sk)->rcv_saddr;
+}
+
+/* Initialize sk->sk_rcv_saddr from sctp_addr. */
+static void sctp_v4_to_sk_saddr(union sctp_addr *addr, struct sock *sk)
+{
+	inet_sk(sk)->rcv_saddr = addr->v4.sin_addr.s_addr;
+}
+
+/* Initialize sk->sk_daddr from sctp_addr. */
+static void sctp_v4_to_sk_daddr(union sctp_addr *addr, struct sock *sk)
+{
+	inet_sk(sk)->daddr = addr->v4.sin_addr.s_addr;
+}
+
+/* Initialize a sctp_addr from an address parameter. */
+static void sctp_v4_from_addr_param(union sctp_addr *addr,
+				    union sctp_addr_param *param,
+				    __u16 port, int iif)
+{
+	addr->v4.sin_family = AF_INET;
+	addr->v4.sin_port = port;
+	addr->v4.sin_addr.s_addr = param->v4.addr.s_addr;
+}
+
+/* Initialize an address parameter from a sctp_addr and return the length
+ * of the address parameter.
+ */
+static int sctp_v4_to_addr_param(const union sctp_addr *addr,
+				 union sctp_addr_param *param)
+{
+	int length = sizeof(sctp_ipv4addr_param_t);
+
+	param->v4.param_hdr.type = SCTP_PARAM_IPV4_ADDRESS;
+	param->v4.param_hdr.length = ntohs(length);
+	param->v4.addr.s_addr = addr->v4.sin_addr.s_addr;	
+
+	return length;
+}
+
+/* Initialize a sctp_addr from a dst_entry. */
+static void sctp_v4_dst_saddr(union sctp_addr *saddr, struct dst_entry *dst,
+			      unsigned short port)
+{
+	struct rtable *rt = (struct rtable *)dst;
+	saddr->v4.sin_family = AF_INET;
+	saddr->v4.sin_port = port;
+	saddr->v4.sin_addr.s_addr = rt->rt_src;
+}
+
+/* Compare two addresses exactly. */
+static int sctp_v4_cmp_addr(const union sctp_addr *addr1,
+			    const union sctp_addr *addr2)
+{
+	if (addr1->sa.sa_family != addr2->sa.sa_family)
+		return 0;
+	if (addr1->v4.sin_port != addr2->v4.sin_port)
+		return 0;
+	if (addr1->v4.sin_addr.s_addr != addr2->v4.sin_addr.s_addr)
+		return 0;
+
+	return 1;
+}
+
+/* Initialize addr struct to INADDR_ANY. */
+static void sctp_v4_inaddr_any(union sctp_addr *addr, unsigned short port)
+{
+	addr->v4.sin_family = AF_INET;
+	addr->v4.sin_addr.s_addr = INADDR_ANY;
+	addr->v4.sin_port = port;
+}
+
+/* Is this a wildcard address? */
+static int sctp_v4_is_any(const union sctp_addr *addr)
+{
+	return INADDR_ANY == addr->v4.sin_addr.s_addr;
+}
+
+/* This function checks if the address is a valid address to be used for
+ * SCTP binding.
+ *
+ * Output:
+ * Return 0 - If the address is a non-unicast or an illegal address.
+ * Return 1 - If the address is a unicast.
+ */
+static int sctp_v4_addr_valid(union sctp_addr *addr, struct sctp_sock *sp)
+{
+	/* Is this a non-unicast address or a unusable SCTP address? */
+	if (IS_IPV4_UNUSABLE_ADDRESS(&addr->v4.sin_addr.s_addr))
+		return 0;
+
+	return 1;
+}
+
+/* Should this be available for binding?   */
+static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp)
+{
+	int ret = inet_addr_type(addr->v4.sin_addr.s_addr);
+
+	/* FIXME: ip_nonlocal_bind sysctl support. */
+
+	if (addr->v4.sin_addr.s_addr != INADDR_ANY && ret != RTN_LOCAL)
+		return 0;
+	return 1;
+}
+
+/* Checking the loopback, private and other address scopes as defined in
+ * RFC 1918.   The IPv4 scoping is based on the draft for SCTP IPv4
+ * scoping <draft-stewart-tsvwg-sctp-ipv4-00.txt>.
+ *
+ * Level 0 - unusable SCTP addresses
+ * Level 1 - loopback address
+ * Level 2 - link-local addresses
+ * Level 3 - private addresses.
+ * Level 4 - global addresses
+ * For INIT and INIT-ACK address list, let L be the level of
+ * of requested destination address, sender and receiver
+ * SHOULD include all of its addresses with level greater
+ * than or equal to L.
+ */
+static sctp_scope_t sctp_v4_scope(union sctp_addr *addr)
+{
+	sctp_scope_t retval;
+
+	/* Should IPv4 scoping be a sysctl configurable option
+	 * so users can turn it off (default on) for certain
+	 * unconventional networking environments?
+	 */
+
+	/* Check for unusable SCTP addresses. */
+	if (IS_IPV4_UNUSABLE_ADDRESS(&addr->v4.sin_addr.s_addr)) {
+		retval =  SCTP_SCOPE_UNUSABLE;
+	} else if (LOOPBACK(addr->v4.sin_addr.s_addr)) {
+		retval = SCTP_SCOPE_LOOPBACK;
+	} else if (IS_IPV4_LINK_ADDRESS(&addr->v4.sin_addr.s_addr)) {
+		retval = SCTP_SCOPE_LINK;
+	} else if (IS_IPV4_PRIVATE_ADDRESS(&addr->v4.sin_addr.s_addr)) {
+		retval = SCTP_SCOPE_PRIVATE;
+	} else {
+		retval = SCTP_SCOPE_GLOBAL;
+	}
+
+	return retval;
+}
+
+/* Returns a valid dst cache entry for the given source and destination ip
+ * addresses. If an association is passed, trys to get a dst entry with a
+ * source address that matches an address in the bind address list.
+ */
+static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
+					 union sctp_addr *daddr,
+					 union sctp_addr *saddr)
+{
+	struct rtable *rt;
+	struct flowi fl;
+	struct sctp_bind_addr *bp;
+	rwlock_t *addr_lock;
+	struct sctp_sockaddr_entry *laddr;
+	struct list_head *pos;
+	struct dst_entry *dst = NULL;
+	union sctp_addr dst_saddr;
+
+	memset(&fl, 0x0, sizeof(struct flowi));
+	fl.fl4_dst  = daddr->v4.sin_addr.s_addr;
+	fl.proto = IPPROTO_SCTP;
+	if (asoc) {
+		fl.fl4_tos = RT_CONN_FLAGS(asoc->base.sk);
+		fl.oif = asoc->base.sk->sk_bound_dev_if;
+	}
+	if (saddr)
+		fl.fl4_src = saddr->v4.sin_addr.s_addr;
+
+	SCTP_DEBUG_PRINTK("%s: DST:%u.%u.%u.%u, SRC:%u.%u.%u.%u - ",
+			  __FUNCTION__, NIPQUAD(fl.fl4_dst),
+			  NIPQUAD(fl.fl4_src));
+
+	if (!ip_route_output_key(&rt, &fl)) {
+		dst = &rt->u.dst;
+	}
+
+	/* If there is no association or if a source address is passed, no
+	 * more validation is required.
+	 */
+	if (!asoc || saddr)
+		goto out;
+
+	bp = &asoc->base.bind_addr;
+	addr_lock = &asoc->base.addr_lock;
+
+	if (dst) {
+		/* Walk through the bind address list and look for a bind
+		 * address that matches the source address of the returned dst.
+		 */
+		sctp_read_lock(addr_lock);
+		list_for_each(pos, &bp->address_list) {
+			laddr = list_entry(pos, struct sctp_sockaddr_entry,
+					   list);
+			sctp_v4_dst_saddr(&dst_saddr, dst, bp->port);
+			if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a))
+				goto out_unlock;
+		}
+		sctp_read_unlock(addr_lock);
+
+		/* None of the bound addresses match the source address of the
+		 * dst. So release it.
+		 */
+		dst_release(dst);
+		dst = NULL;
+	}
+
+	/* Walk through the bind address list and try to get a dst that
+	 * matches a bind address as the source address.
+	 */
+	sctp_read_lock(addr_lock);
+	list_for_each(pos, &bp->address_list) {
+		laddr = list_entry(pos, struct sctp_sockaddr_entry, list);
+
+		if (AF_INET == laddr->a.sa.sa_family) {
+			fl.fl4_src = laddr->a.v4.sin_addr.s_addr;
+			if (!ip_route_output_key(&rt, &fl)) {
+				dst = &rt->u.dst;
+				goto out_unlock;
+			}
+		}
+	}
+
+out_unlock:
+	sctp_read_unlock(addr_lock);
+out:
+	if (dst)
+		SCTP_DEBUG_PRINTK("rt_dst:%u.%u.%u.%u, rt_src:%u.%u.%u.%u\n",
+			  	  NIPQUAD(rt->rt_dst), NIPQUAD(rt->rt_src));
+	else
+		SCTP_DEBUG_PRINTK("NO ROUTE\n");
+
+	return dst;
+}
+
+/* For v4, the source address is cached in the route entry(dst). So no need
+ * to cache it separately and hence this is an empty routine.
+ */
+static void sctp_v4_get_saddr(struct sctp_association *asoc,
+			      struct dst_entry *dst,
+			      union sctp_addr *daddr,
+			      union sctp_addr *saddr)
+{
+	struct rtable *rt = (struct rtable *)dst;
+
+	if (rt) {
+		saddr->v4.sin_family = AF_INET;
+		saddr->v4.sin_port = asoc->base.bind_addr.port;  
+		saddr->v4.sin_addr.s_addr = rt->rt_src; 
+	}
+}
+
+/* What interface did this skb arrive on? */
+static int sctp_v4_skb_iif(const struct sk_buff *skb)
+{
+     	return ((struct rtable *)skb->dst)->rt_iif;
+}
+
+/* Was this packet marked by Explicit Congestion Notification? */
+static int sctp_v4_is_ce(const struct sk_buff *skb)
+{
+	return INET_ECN_is_ce(skb->nh.iph->tos);
+}
+
+/* Create and initialize a new sk for the socket returned by accept(). */
+static struct sock *sctp_v4_create_accept_sk(struct sock *sk,
+					     struct sctp_association *asoc)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct inet_sock *newinet;
+	struct sock *newsk = sk_alloc(PF_INET, GFP_KERNEL, sk->sk_prot, 1);
+
+	if (!newsk)
+		goto out;
+
+	sock_init_data(NULL, newsk);
+
+	newsk->sk_type = SOCK_STREAM;
+
+	newsk->sk_no_check = sk->sk_no_check;
+	newsk->sk_reuse = sk->sk_reuse;
+	newsk->sk_shutdown = sk->sk_shutdown;
+
+	newsk->sk_destruct = inet_sock_destruct;
+	newsk->sk_family = PF_INET;
+	newsk->sk_protocol = IPPROTO_SCTP;
+	newsk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
+	sock_reset_flag(newsk, SOCK_ZAPPED);
+
+	newinet = inet_sk(newsk);
+
+	/* Initialize sk's sport, dport, rcv_saddr and daddr for
+	 * getsockname() and getpeername()
+	 */
+	newinet->sport = inet->sport;
+	newinet->saddr = inet->saddr;
+	newinet->rcv_saddr = inet->rcv_saddr;
+	newinet->dport = htons(asoc->peer.port);
+	newinet->daddr = asoc->peer.primary_addr.v4.sin_addr.s_addr;
+	newinet->pmtudisc = inet->pmtudisc;
+      	newinet->id = 0;
+
+	newinet->uc_ttl = -1;
+	newinet->mc_loop = 1;
+	newinet->mc_ttl = 1;
+	newinet->mc_index = 0;
+	newinet->mc_list = NULL;
+
+#ifdef INET_REFCNT_DEBUG
+	atomic_inc(&inet_sock_nr);
+#endif
+
+	if (newsk->sk_prot->init(newsk)) {
+		sk_common_release(newsk);
+		newsk = NULL;
+	}
+
+out:
+	return newsk;
+}
+
+/* Map address, empty for v4 family */
+static void sctp_v4_addr_v4map(struct sctp_sock *sp, union sctp_addr *addr)
+{
+	/* Empty */
+}
+
+/* Dump the v4 addr to the seq file. */
+static void sctp_v4_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr)
+{
+	seq_printf(seq, "%d.%d.%d.%d ", NIPQUAD(addr->v4.sin_addr));
+}
+
+/* Event handler for inet address addition/deletion events.
+ * Basically, whenever there is an event, we re-build our local address list.
+ */
+int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev,
+                        void *ptr)
+{
+	unsigned long flags;
+
+	sctp_spin_lock_irqsave(&sctp_local_addr_lock, flags);
+	__sctp_free_local_addr_list();
+	__sctp_get_local_addr_list();
+	sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, flags);
+
+	return NOTIFY_DONE;
+}
+
+/*
+ * Initialize the control inode/socket with a control endpoint data
+ * structure.  This endpoint is reserved exclusively for the OOTB processing.
+ */
+static int sctp_ctl_sock_init(void)
+{
+	int err;
+	sa_family_t family;
+
+	if (sctp_get_pf_specific(PF_INET6))
+		family = PF_INET6;
+	else
+		family = PF_INET;
+
+	err = sock_create_kern(family, SOCK_SEQPACKET, IPPROTO_SCTP,
+			       &sctp_ctl_socket);
+	if (err < 0) {
+		printk(KERN_ERR
+		       "SCTP: Failed to create the SCTP control socket.\n");
+		return err;
+	}
+	sctp_ctl_socket->sk->sk_allocation = GFP_ATOMIC;
+	inet_sk(sctp_ctl_socket->sk)->uc_ttl = -1;
+
+	return 0;
+}
+
+/* Register address family specific functions. */
+int sctp_register_af(struct sctp_af *af)
+{
+	switch (af->sa_family) {
+	case AF_INET:
+		if (sctp_af_v4_specific)
+			return 0;
+		sctp_af_v4_specific = af;
+		break;
+	case AF_INET6:
+		if (sctp_af_v6_specific)
+			return 0;
+		sctp_af_v6_specific = af;
+		break;
+	default:
+		return 0;
+	}
+
+	INIT_LIST_HEAD(&af->list);
+	list_add_tail(&af->list, &sctp_address_families);
+	return 1;
+}
+
+/* Get the table of functions for manipulating a particular address
+ * family.
+ */
+struct sctp_af *sctp_get_af_specific(sa_family_t family)
+{
+	switch (family) {
+	case AF_INET:
+		return sctp_af_v4_specific;
+	case AF_INET6:
+		return sctp_af_v6_specific;
+	default:
+		return NULL;
+	}
+}
+
+/* Common code to initialize a AF_INET msg_name. */
+static void sctp_inet_msgname(char *msgname, int *addr_len)
+{
+	struct sockaddr_in *sin;
+
+	sin = (struct sockaddr_in *)msgname;
+	*addr_len = sizeof(struct sockaddr_in);
+	sin->sin_family = AF_INET;
+	memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+}
+
+/* Copy the primary address of the peer primary address as the msg_name. */
+static void sctp_inet_event_msgname(struct sctp_ulpevent *event, char *msgname,
+				    int *addr_len)
+{
+	struct sockaddr_in *sin, *sinfrom;
+
+	if (msgname) {
+		struct sctp_association *asoc;
+
+		asoc = event->asoc;
+		sctp_inet_msgname(msgname, addr_len);
+		sin = (struct sockaddr_in *)msgname;
+		sinfrom = &asoc->peer.primary_addr.v4;
+		sin->sin_port = htons(asoc->peer.port);
+		sin->sin_addr.s_addr = sinfrom->sin_addr.s_addr;
+	}
+}
+
+/* Initialize and copy out a msgname from an inbound skb. */
+static void sctp_inet_skb_msgname(struct sk_buff *skb, char *msgname, int *len)
+{
+	struct sctphdr *sh;
+	struct sockaddr_in *sin;
+
+	if (msgname) {
+		sctp_inet_msgname(msgname, len);
+		sin = (struct sockaddr_in *)msgname;
+		sh = (struct sctphdr *)skb->h.raw;
+		sin->sin_port = sh->source;
+		sin->sin_addr.s_addr = skb->nh.iph->saddr;
+	}
+}
+
+/* Do we support this AF? */
+static int sctp_inet_af_supported(sa_family_t family, struct sctp_sock *sp)
+{
+	/* PF_INET only supports AF_INET addresses. */
+	return (AF_INET == family);
+}
+
+/* Address matching with wildcards allowed. */
+static int sctp_inet_cmp_addr(const union sctp_addr *addr1,
+			      const union sctp_addr *addr2,
+			      struct sctp_sock *opt)
+{
+	/* PF_INET only supports AF_INET addresses. */
+	if (addr1->sa.sa_family != addr2->sa.sa_family)
+		return 0;
+	if (INADDR_ANY == addr1->v4.sin_addr.s_addr ||
+	    INADDR_ANY == addr2->v4.sin_addr.s_addr)
+		return 1;
+	if (addr1->v4.sin_addr.s_addr == addr2->v4.sin_addr.s_addr)
+		return 1;
+
+	return 0;
+}
+
+/* Verify that provided sockaddr looks bindable.  Common verification has
+ * already been taken care of.
+ */
+static int sctp_inet_bind_verify(struct sctp_sock *opt, union sctp_addr *addr)
+{
+	return sctp_v4_available(addr, opt);
+}
+
+/* Verify that sockaddr looks sendable.  Common verification has already
+ * been taken care of.
+ */
+static int sctp_inet_send_verify(struct sctp_sock *opt, union sctp_addr *addr)
+{
+	return 1;
+}
+
+/* Fill in Supported Address Type information for INIT and INIT-ACK
+ * chunks.  Returns number of addresses supported.
+ */
+static int sctp_inet_supported_addrs(const struct sctp_sock *opt,
+				     __u16 *types)
+{
+	types[0] = SCTP_PARAM_IPV4_ADDRESS;
+	return 1;
+}
+
+/* Wrapper routine that calls the ip transmit routine. */
+static inline int sctp_v4_xmit(struct sk_buff *skb,
+			       struct sctp_transport *transport, int ipfragok)
+{
+	SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, "
+			  "src:%u.%u.%u.%u, dst:%u.%u.%u.%u\n",
+			  __FUNCTION__, skb, skb->len,
+			  NIPQUAD(((struct rtable *)skb->dst)->rt_src),
+			  NIPQUAD(((struct rtable *)skb->dst)->rt_dst));
+
+	SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS);
+	return ip_queue_xmit(skb, ipfragok);
+}
+
+static struct sctp_af sctp_ipv4_specific;
+
+static struct sctp_pf sctp_pf_inet = {
+	.event_msgname = sctp_inet_event_msgname,
+	.skb_msgname   = sctp_inet_skb_msgname,
+	.af_supported  = sctp_inet_af_supported,
+	.cmp_addr      = sctp_inet_cmp_addr,
+	.bind_verify   = sctp_inet_bind_verify,
+	.send_verify   = sctp_inet_send_verify,
+	.supported_addrs = sctp_inet_supported_addrs,
+	.create_accept_sk = sctp_v4_create_accept_sk,
+	.addr_v4map	= sctp_v4_addr_v4map,
+	.af            = &sctp_ipv4_specific,
+};
+
+/* Notifier for inetaddr addition/deletion events.  */
+static struct notifier_block sctp_inetaddr_notifier = {
+	.notifier_call = sctp_inetaddr_event,
+};
+
+/* Socket operations.  */
+static struct proto_ops inet_seqpacket_ops = {
+	.family      = PF_INET,
+	.owner       = THIS_MODULE,
+	.release     = inet_release,       /* Needs to be wrapped... */
+	.bind        = inet_bind,
+	.connect     = inet_dgram_connect,
+	.socketpair  = sock_no_socketpair,
+	.accept      = inet_accept,
+	.getname     = inet_getname,      /* Semantics are different.  */
+	.poll        = sctp_poll,
+	.ioctl       = inet_ioctl,
+	.listen      = sctp_inet_listen,
+	.shutdown    = inet_shutdown,     /* Looks harmless.  */
+	.setsockopt  = sock_common_setsockopt,   /* IP_SOL IP_OPTION is a problem. */
+	.getsockopt  = sock_common_getsockopt,
+	.sendmsg     = inet_sendmsg,
+	.recvmsg     = sock_common_recvmsg,
+	.mmap        = sock_no_mmap,
+	.sendpage    = sock_no_sendpage,
+};
+
+/* Registration with AF_INET family.  */
+static struct inet_protosw sctp_seqpacket_protosw = {
+	.type       = SOCK_SEQPACKET,
+	.protocol   = IPPROTO_SCTP,
+	.prot       = &sctp_prot,
+	.ops        = &inet_seqpacket_ops,
+	.capability = -1,
+	.no_check   = 0,
+	.flags      = SCTP_PROTOSW_FLAG
+};
+static struct inet_protosw sctp_stream_protosw = {
+	.type       = SOCK_STREAM,
+	.protocol   = IPPROTO_SCTP,
+	.prot       = &sctp_prot,
+	.ops        = &inet_seqpacket_ops,
+	.capability = -1,
+	.no_check   = 0,
+	.flags      = SCTP_PROTOSW_FLAG
+};
+
+/* Register with IP layer.  */
+static struct net_protocol sctp_protocol = {
+	.handler     = sctp_rcv,
+	.err_handler = sctp_v4_err,
+	.no_policy   = 1,
+};
+
+/* IPv4 address related functions.  */
+static struct sctp_af sctp_ipv4_specific = {
+	.sctp_xmit      = sctp_v4_xmit,
+	.setsockopt     = ip_setsockopt,
+	.getsockopt     = ip_getsockopt,
+	.get_dst	= sctp_v4_get_dst,
+	.get_saddr	= sctp_v4_get_saddr,
+	.copy_addrlist  = sctp_v4_copy_addrlist,
+	.from_skb       = sctp_v4_from_skb,
+	.from_sk        = sctp_v4_from_sk,
+	.to_sk_saddr    = sctp_v4_to_sk_saddr,
+	.to_sk_daddr    = sctp_v4_to_sk_daddr,
+	.from_addr_param= sctp_v4_from_addr_param,
+	.to_addr_param  = sctp_v4_to_addr_param,	
+	.dst_saddr      = sctp_v4_dst_saddr,
+	.cmp_addr       = sctp_v4_cmp_addr,
+	.addr_valid     = sctp_v4_addr_valid,
+	.inaddr_any     = sctp_v4_inaddr_any,
+	.is_any         = sctp_v4_is_any,
+	.available      = sctp_v4_available,
+	.scope          = sctp_v4_scope,
+	.skb_iif        = sctp_v4_skb_iif,
+	.is_ce          = sctp_v4_is_ce,
+	.seq_dump_addr  = sctp_v4_seq_dump_addr,
+	.net_header_len = sizeof(struct iphdr),
+	.sockaddr_len   = sizeof(struct sockaddr_in),
+	.sa_family      = AF_INET,
+};
+
+struct sctp_pf *sctp_get_pf_specific(sa_family_t family) {
+
+	switch (family) {
+	case PF_INET:
+		return sctp_pf_inet_specific;
+	case PF_INET6:
+		return sctp_pf_inet6_specific;
+	default:
+		return NULL;
+	}
+}
+
+/* Register the PF specific function table.  */
+int sctp_register_pf(struct sctp_pf *pf, sa_family_t family)
+{
+	switch (family) {
+	case PF_INET:
+		if (sctp_pf_inet_specific)
+			return 0;
+		sctp_pf_inet_specific = pf;
+		break;
+	case PF_INET6:
+		if (sctp_pf_inet6_specific)
+			return 0;
+		sctp_pf_inet6_specific = pf;
+		break;
+	default:
+		return 0;
+	}
+	return 1;
+}
+
+static int __init init_sctp_mibs(void)
+{
+	sctp_statistics[0] = alloc_percpu(struct sctp_mib);
+	if (!sctp_statistics[0])
+		return -ENOMEM;
+	sctp_statistics[1] = alloc_percpu(struct sctp_mib);
+	if (!sctp_statistics[1]) {
+		free_percpu(sctp_statistics[0]);
+		return -ENOMEM;
+	}
+	return 0;
+
+}
+
+static void cleanup_sctp_mibs(void)
+{
+	free_percpu(sctp_statistics[0]);
+	free_percpu(sctp_statistics[1]);
+}
+
+/* Initialize the universe into something sensible.  */
+SCTP_STATIC __init int sctp_init(void)
+{
+	int i;
+	int status = -EINVAL;
+	unsigned long goal;
+	int order;
+
+	/* SCTP_DEBUG sanity check. */
+	if (!sctp_sanity_check())
+		goto out;
+
+	status = proto_register(&sctp_prot, 1);
+	if (status)
+		goto out;
+
+	/* Add SCTP to inet_protos hash table.  */
+	status = -EAGAIN;
+	if (inet_add_protocol(&sctp_protocol, IPPROTO_SCTP) < 0)
+		goto err_add_protocol;
+
+	/* Add SCTP(TCP and UDP style) to inetsw linked list.  */
+	inet_register_protosw(&sctp_seqpacket_protosw);
+	inet_register_protosw(&sctp_stream_protosw);
+
+	/* Allocate a cache pools. */
+	status = -ENOBUFS;
+	sctp_bucket_cachep = kmem_cache_create("sctp_bind_bucket",
+					       sizeof(struct sctp_bind_bucket),
+					       0, SLAB_HWCACHE_ALIGN,
+					       NULL, NULL);
+
+	if (!sctp_bucket_cachep)
+		goto err_bucket_cachep;
+
+	sctp_chunk_cachep = kmem_cache_create("sctp_chunk",
+					       sizeof(struct sctp_chunk),
+					       0, SLAB_HWCACHE_ALIGN,
+					       NULL, NULL);
+	if (!sctp_chunk_cachep)
+		goto err_chunk_cachep;
+
+	/* Allocate and initialise sctp mibs.  */
+	status = init_sctp_mibs();
+	if (status)
+		goto err_init_mibs;
+
+	/* Initialize proc fs directory.  */
+	status = sctp_proc_init();
+	if (status)
+		goto err_init_proc;
+
+	/* Initialize object count debugging.  */
+	sctp_dbg_objcnt_init();
+
+	/* Initialize the SCTP specific PF functions. */
+	sctp_register_pf(&sctp_pf_inet, PF_INET);
+	/*
+	 * 14. Suggested SCTP Protocol Parameter Values
+	 */
+	/* The following protocol parameters are RECOMMENDED:  */
+	/* RTO.Initial              - 3  seconds */
+	sctp_rto_initial		= SCTP_RTO_INITIAL;
+	/* RTO.Min                  - 1  second */
+	sctp_rto_min	 		= SCTP_RTO_MIN;
+	/* RTO.Max                 -  60 seconds */
+	sctp_rto_max 			= SCTP_RTO_MAX;
+	/* RTO.Alpha                - 1/8 */
+	sctp_rto_alpha	        	= SCTP_RTO_ALPHA;
+	/* RTO.Beta                 - 1/4 */
+	sctp_rto_beta			= SCTP_RTO_BETA;
+
+	/* Valid.Cookie.Life        - 60  seconds */
+	sctp_valid_cookie_life		= 60 * HZ;
+
+	/* Whether Cookie Preservative is enabled(1) or not(0) */
+	sctp_cookie_preserve_enable 	= 1;
+
+	/* Max.Burst		    - 4 */
+	sctp_max_burst 			= SCTP_MAX_BURST;
+
+	/* Association.Max.Retrans  - 10 attempts
+	 * Path.Max.Retrans         - 5  attempts (per destination address)
+	 * Max.Init.Retransmits     - 8  attempts
+	 */
+	sctp_max_retrans_association 	= 10;
+	sctp_max_retrans_path		= 5;
+	sctp_max_retrans_init		= 8;
+
+	/* HB.interval              - 30 seconds */
+	sctp_hb_interval		= 30 * HZ;
+
+	/* Implementation specific variables. */
+
+	/* Initialize default stream count setup information. */
+	sctp_max_instreams    		= SCTP_DEFAULT_INSTREAMS;
+	sctp_max_outstreams   		= SCTP_DEFAULT_OUTSTREAMS;
+
+	/* Initialize handle used for association ids. */
+	idr_init(&sctp_assocs_id);
+
+	/* Size and allocate the association hash table.
+	 * The methodology is similar to that of the tcp hash tables.
+	 */
+	if (num_physpages >= (128 * 1024))
+		goal = num_physpages >> (22 - PAGE_SHIFT);
+	else
+		goal = num_physpages >> (24 - PAGE_SHIFT);
+
+	for (order = 0; (1UL << order) < goal; order++)
+		;
+
+	do {
+		sctp_assoc_hashsize = (1UL << order) * PAGE_SIZE /
+					sizeof(struct sctp_hashbucket);
+		if ((sctp_assoc_hashsize > (64 * 1024)) && order > 0)
+			continue;
+		sctp_assoc_hashtable = (struct sctp_hashbucket *)
+					__get_free_pages(GFP_ATOMIC, order);
+	} while (!sctp_assoc_hashtable && --order > 0);
+	if (!sctp_assoc_hashtable) {
+		printk(KERN_ERR "SCTP: Failed association hash alloc.\n");
+		status = -ENOMEM;
+		goto err_ahash_alloc;
+	}
+	for (i = 0; i < sctp_assoc_hashsize; i++) {
+		rwlock_init(&sctp_assoc_hashtable[i].lock);
+		sctp_assoc_hashtable[i].chain = NULL;
+	}
+
+	/* Allocate and initialize the endpoint hash table.  */
+	sctp_ep_hashsize = 64;
+	sctp_ep_hashtable = (struct sctp_hashbucket *)
+		kmalloc(64 * sizeof(struct sctp_hashbucket), GFP_KERNEL);
+	if (!sctp_ep_hashtable) {
+		printk(KERN_ERR "SCTP: Failed endpoint_hash alloc.\n");
+		status = -ENOMEM;
+		goto err_ehash_alloc;
+	}
+	for (i = 0; i < sctp_ep_hashsize; i++) {
+		rwlock_init(&sctp_ep_hashtable[i].lock);
+		sctp_ep_hashtable[i].chain = NULL;
+	}
+
+	/* Allocate and initialize the SCTP port hash table.  */
+	do {
+		sctp_port_hashsize = (1UL << order) * PAGE_SIZE /
+					sizeof(struct sctp_bind_hashbucket);
+		if ((sctp_port_hashsize > (64 * 1024)) && order > 0)
+			continue;
+		sctp_port_hashtable = (struct sctp_bind_hashbucket *)
+					__get_free_pages(GFP_ATOMIC, order);
+	} while (!sctp_port_hashtable && --order > 0);
+	if (!sctp_port_hashtable) {
+		printk(KERN_ERR "SCTP: Failed bind hash alloc.");
+		status = -ENOMEM;
+		goto err_bhash_alloc;
+	}
+	for (i = 0; i < sctp_port_hashsize; i++) {
+		spin_lock_init(&sctp_port_hashtable[i].lock);
+		sctp_port_hashtable[i].chain = NULL;
+	}
+
+	spin_lock_init(&sctp_port_alloc_lock);
+	sctp_port_rover = sysctl_local_port_range[0] - 1;
+
+	printk(KERN_INFO "SCTP: Hash tables configured "
+			 "(established %d bind %d)\n",
+		sctp_assoc_hashsize, sctp_port_hashsize);
+
+	/* Disable ADDIP by default. */
+	sctp_addip_enable = 0;
+
+	/* Enable PR-SCTP by default. */
+	sctp_prsctp_enable = 1;
+
+	sctp_sysctl_register();
+
+	INIT_LIST_HEAD(&sctp_address_families);
+	sctp_register_af(&sctp_ipv4_specific);
+
+	status = sctp_v6_init();
+	if (status)
+		goto err_v6_init;
+
+	/* Initialize the control inode/socket for handling OOTB packets.  */
+	if ((status = sctp_ctl_sock_init())) {
+		printk (KERN_ERR
+			"SCTP: Failed to initialize the SCTP control sock.\n");
+		goto err_ctl_sock_init;
+	}
+
+	/* Initialize the local address list. */
+	INIT_LIST_HEAD(&sctp_local_addr_list);
+	spin_lock_init(&sctp_local_addr_lock);
+
+	/* Register notifier for inet address additions/deletions. */
+	register_inetaddr_notifier(&sctp_inetaddr_notifier);
+
+	sctp_get_local_addr_list();
+
+	__unsafe(THIS_MODULE);
+	status = 0;
+out:
+	return status;
+err_add_protocol:
+	proto_unregister(&sctp_prot);
+err_ctl_sock_init:
+	sctp_v6_exit();
+err_v6_init:
+	sctp_sysctl_unregister();
+	list_del(&sctp_ipv4_specific.list);
+	free_pages((unsigned long)sctp_port_hashtable,
+		   get_order(sctp_port_hashsize *
+			     sizeof(struct sctp_bind_hashbucket)));
+err_bhash_alloc:
+	kfree(sctp_ep_hashtable);
+err_ehash_alloc:
+	free_pages((unsigned long)sctp_assoc_hashtable,
+		   get_order(sctp_assoc_hashsize *
+			     sizeof(struct sctp_hashbucket)));
+err_ahash_alloc:
+	sctp_dbg_objcnt_exit();
+err_init_proc:
+	sctp_proc_exit();
+	cleanup_sctp_mibs();
+err_init_mibs:
+	kmem_cache_destroy(sctp_chunk_cachep);
+err_chunk_cachep:
+	kmem_cache_destroy(sctp_bucket_cachep);
+err_bucket_cachep:
+	inet_del_protocol(&sctp_protocol, IPPROTO_SCTP);
+	inet_unregister_protosw(&sctp_seqpacket_protosw);
+	inet_unregister_protosw(&sctp_stream_protosw);
+	goto out;
+}
+
+/* Exit handler for the SCTP protocol.  */
+SCTP_STATIC __exit void sctp_exit(void)
+{
+	/* BUG.  This should probably do something useful like clean
+	 * up all the remaining associations and all that memory.
+	 */
+
+	/* Unregister notifier for inet address additions/deletions. */
+	unregister_inetaddr_notifier(&sctp_inetaddr_notifier);
+
+	/* Free the local address list.  */
+	sctp_free_local_addr_list();
+
+	/* Free the control endpoint.  */
+	sock_release(sctp_ctl_socket);
+
+	sctp_v6_exit();
+	sctp_sysctl_unregister();
+	list_del(&sctp_ipv4_specific.list);
+
+	free_pages((unsigned long)sctp_assoc_hashtable,
+		   get_order(sctp_assoc_hashsize *
+			     sizeof(struct sctp_hashbucket)));
+	kfree(sctp_ep_hashtable);
+	free_pages((unsigned long)sctp_port_hashtable,
+		   get_order(sctp_port_hashsize *
+			     sizeof(struct sctp_bind_hashbucket)));
+
+	kmem_cache_destroy(sctp_chunk_cachep);
+	kmem_cache_destroy(sctp_bucket_cachep);
+
+	sctp_dbg_objcnt_exit();
+	sctp_proc_exit();
+	cleanup_sctp_mibs();
+
+	inet_del_protocol(&sctp_protocol, IPPROTO_SCTP);
+	inet_unregister_protosw(&sctp_seqpacket_protosw);
+	inet_unregister_protosw(&sctp_stream_protosw);
+	proto_unregister(&sctp_prot);
+}
+
+module_init(sctp_init);
+module_exit(sctp_exit);
+
+MODULE_AUTHOR("Linux Kernel SCTP developers <lksctp-developers@lists.sourceforge.net>");
+MODULE_DESCRIPTION("Support for the SCTP protocol (RFC2960)");
+MODULE_LICENSE("GPL");
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
new file mode 100644
index 00000000000..1db12cc18cf
--- /dev/null
+++ b/net/sctp/sm_make_chunk.c
@@ -0,0 +1,2766 @@
+/* SCTP kernel reference Implementation
+ * (C) Copyright IBM Corp. 2001, 2004
+ * Copyright (c) 1999-2000 Cisco, Inc.
+ * Copyright (c) 1999-2001 Motorola, Inc.
+ * Copyright (c) 2001-2002 Intel Corp.
+ *
+ * This file is part of the SCTP kernel reference Implementation
+ *
+ * These functions work with the state functions in sctp_sm_statefuns.c
+ * to implement the state operations.  These functions implement the
+ * steps which require modifying existing data structures.
+ *
+ * The SCTP reference implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * The SCTP reference implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *                 ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email address(es):
+ *    lksctp developers <lksctp-developers@lists.sourceforge.net>
+ *
+ * Or submit a bug report through the following website:
+ *    http://www.sf.net/projects/lksctp
+ *
+ * Written or modified by:
+ *    La Monte H.P. Yarroll <piggy@acm.org>
+ *    Karl Knutson          <karl@athena.chicago.il.us>
+ *    C. Robin              <chris@hundredacre.ac.uk>
+ *    Jon Grimm             <jgrimm@us.ibm.com>
+ *    Xingang Guo           <xingang.guo@intel.com>
+ *    Dajiang Zhang	    <dajiang.zhang@nokia.com>
+ *    Sridhar Samudrala	    <sri@us.ibm.com>
+ *    Daisy Chang	    <daisyc@us.ibm.com>
+ *    Ardelle Fan	    <ardelle.fan@intel.com>
+ *    Kevin Gao             <kevin.gao@intel.com>
+ *
+ * Any bugs reported given to us we will try to fix... any fixes shared will
+ * be incorporated into the next SCTP release.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/net.h>
+#include <linux/inet.h>
+#include <asm/scatterlist.h>
+#include <linux/crypto.h>
+#include <net/sock.h>
+
+#include <linux/skbuff.h>
+#include <linux/random.h>	/* for get_random_bytes */
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+
+extern kmem_cache_t *sctp_chunk_cachep;
+
+SCTP_STATIC
+struct sctp_chunk *sctp_make_chunk(const struct sctp_association *asoc,
+				   __u8 type, __u8 flags, int paylen);
+static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *init_chunk,
+					int *cookie_len,
+					const __u8 *raw_addrs, int addrs_len);
+static int sctp_process_param(struct sctp_association *asoc,
+			      union sctp_params param,
+			      const union sctp_addr *peer_addr,
+			      int gfp);
+
+/* What was the inbound interface for this chunk? */
+int sctp_chunk_iif(const struct sctp_chunk *chunk)
+{
+	struct sctp_af *af;
+	int iif = 0;
+
+	af = sctp_get_af_specific(ipver2af(chunk->skb->nh.iph->version));
+	if (af)
+		iif = af->skb_iif(chunk->skb);
+
+	return iif;
+}
+
+/* RFC 2960 3.3.2 Initiation (INIT) (1)
+ *
+ * Note 2: The ECN capable field is reserved for future use of
+ * Explicit Congestion Notification.
+ */
+static const struct sctp_paramhdr ecap_param = {
+	SCTP_PARAM_ECN_CAPABLE,
+	__constant_htons(sizeof(struct sctp_paramhdr)),
+};
+static const struct sctp_paramhdr prsctp_param = {
+	SCTP_PARAM_FWD_TSN_SUPPORT,
+	__constant_htons(sizeof(struct sctp_paramhdr)),
+};
+
+/* A helper to initialize to initialize an op error inside a
+ * provided chunk, as most cause codes will be embedded inside an
+ * abort chunk.
+ */
+void  sctp_init_cause(struct sctp_chunk *chunk, __u16 cause_code,
+		      const void *payload, size_t paylen)
+{
+	sctp_errhdr_t err;
+	int padlen;
+	__u16 len;
+
+        /* Cause code constants are now defined in network order.  */
+	err.cause = cause_code;
+	len = sizeof(sctp_errhdr_t) + paylen;
+	padlen = len % 4;
+	err.length  = htons(len);
+	len += padlen;
+	sctp_addto_chunk(chunk, sizeof(sctp_errhdr_t), &err);
+	chunk->subh.err_hdr = sctp_addto_chunk(chunk, paylen, payload);
+}
+
+/* 3.3.2 Initiation (INIT) (1)
+ *
+ * This chunk is used to initiate a SCTP association between two
+ * endpoints. The format of the INIT chunk is shown below:
+ *
+ *     0                   1                   2                   3
+ *     0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *    |   Type = 1    |  Chunk Flags  |      Chunk Length             |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *    |                         Initiate Tag                          |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *    |           Advertised Receiver Window Credit (a_rwnd)          |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *    |  Number of Outbound Streams   |  Number of Inbound Streams    |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *    |                          Initial TSN                          |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *    \                                                               \
+ *    /              Optional/Variable-Length Parameters              /
+ *    \                                                               \
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ *
+ * The INIT chunk contains the following parameters. Unless otherwise
+ * noted, each parameter MUST only be included once in the INIT chunk.
+ *
+ * Fixed Parameters                     Status
+ * ----------------------------------------------
+ * Initiate Tag                        Mandatory
+ * Advertised Receiver Window Credit   Mandatory
+ * Number of Outbound Streams          Mandatory
+ * Number of Inbound Streams           Mandatory
+ * Initial TSN                         Mandatory
+ *
+ * Variable Parameters                  Status     Type Value
+ * -------------------------------------------------------------
+ * IPv4 Address (Note 1)               Optional    5
+ * IPv6 Address (Note 1)               Optional    6
+ * Cookie Preservative                 Optional    9
+ * Reserved for ECN Capable (Note 2)   Optional    32768 (0x8000)
+ * Host Name Address (Note 3)          Optional    11
+ * Supported Address Types (Note 4)    Optional    12
+ */
+struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
+			     const struct sctp_bind_addr *bp,
+			     int gfp, int vparam_len)
+{
+	sctp_inithdr_t init;
+	union sctp_params addrs;
+	size_t chunksize;
+	struct sctp_chunk *retval = NULL;
+	int num_types, addrs_len = 0;
+	struct sctp_sock *sp;
+	sctp_supported_addrs_param_t sat;
+	__u16 types[2];
+	sctp_adaption_ind_param_t aiparam;
+
+	/* RFC 2960 3.3.2 Initiation (INIT) (1)
+	 *
+	 * Note 1: The INIT chunks can contain multiple addresses that
+	 * can be IPv4 and/or IPv6 in any combination.
+	 */
+	retval = NULL;
+
+	/* Convert the provided bind address list to raw format. */
+	addrs = sctp_bind_addrs_to_raw(bp, &addrs_len, gfp);
+
+	init.init_tag		   = htonl(asoc->c.my_vtag);
+	init.a_rwnd		   = htonl(asoc->rwnd);
+	init.num_outbound_streams  = htons(asoc->c.sinit_num_ostreams);
+	init.num_inbound_streams   = htons(asoc->c.sinit_max_instreams);
+	init.initial_tsn	   = htonl(asoc->c.initial_tsn);
+
+	/* How many address types are needed? */
+	sp = sctp_sk(asoc->base.sk);
+	num_types = sp->pf->supported_addrs(sp, types);
+
+	chunksize = sizeof(init) + addrs_len + SCTP_SAT_LEN(num_types);
+	chunksize += sizeof(ecap_param);
+	if (sctp_prsctp_enable)
+		chunksize += sizeof(prsctp_param);
+	chunksize += sizeof(aiparam);
+	chunksize += vparam_len;
+
+	/* RFC 2960 3.3.2 Initiation (INIT) (1)
+	 *
+	 * Note 3: An INIT chunk MUST NOT contain more than one Host
+	 * Name address parameter. Moreover, the sender of the INIT
+	 * MUST NOT combine any other address types with the Host Name
+	 * address in the INIT. The receiver of INIT MUST ignore any
+	 * other address types if the Host Name address parameter is
+	 * present in the received INIT chunk.
+	 *
+	 * PLEASE DO NOT FIXME [This version does not support Host Name.]
+	 */
+
+	retval = sctp_make_chunk(asoc, SCTP_CID_INIT, 0, chunksize);
+	if (!retval)
+		goto nodata;
+
+	retval->subh.init_hdr =
+		sctp_addto_chunk(retval, sizeof(init), &init);
+	retval->param_hdr.v =
+		sctp_addto_chunk(retval, addrs_len, addrs.v);
+
+	/* RFC 2960 3.3.2 Initiation (INIT) (1)
+	 *
+	 * Note 4: This parameter, when present, specifies all the
+	 * address types the sending endpoint can support. The absence
+	 * of this parameter indicates that the sending endpoint can
+	 * support any address type.
+	 */
+	sat.param_hdr.type = SCTP_PARAM_SUPPORTED_ADDRESS_TYPES;
+	sat.param_hdr.length = htons(SCTP_SAT_LEN(num_types));
+	sctp_addto_chunk(retval, sizeof(sat), &sat);
+	sctp_addto_chunk(retval, num_types * sizeof(__u16), &types);
+
+	sctp_addto_chunk(retval, sizeof(ecap_param), &ecap_param);
+	if (sctp_prsctp_enable)
+		sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param);
+	aiparam.param_hdr.type = SCTP_PARAM_ADAPTION_LAYER_IND;
+	aiparam.param_hdr.length = htons(sizeof(aiparam));
+	aiparam.adaption_ind = htonl(sp->adaption_ind);
+	sctp_addto_chunk(retval, sizeof(aiparam), &aiparam);
+nodata:
+	if (addrs.v)
+		kfree(addrs.v);
+	return retval;
+}
+
+struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
+				 const struct sctp_chunk *chunk,
+				 int gfp, int unkparam_len)
+{
+	sctp_inithdr_t initack;
+	struct sctp_chunk *retval;
+	union sctp_params addrs;
+	int addrs_len;
+	sctp_cookie_param_t *cookie;
+	int cookie_len;
+	size_t chunksize;
+	sctp_adaption_ind_param_t aiparam;
+
+	retval = NULL;
+
+	/* Note: there may be no addresses to embed. */
+	addrs = sctp_bind_addrs_to_raw(&asoc->base.bind_addr, &addrs_len, gfp);
+
+	initack.init_tag	        = htonl(asoc->c.my_vtag);
+	initack.a_rwnd			= htonl(asoc->rwnd);
+	initack.num_outbound_streams	= htons(asoc->c.sinit_num_ostreams);
+	initack.num_inbound_streams	= htons(asoc->c.sinit_max_instreams);
+	initack.initial_tsn		= htonl(asoc->c.initial_tsn);
+
+	/* FIXME:  We really ought to build the cookie right
+	 * into the packet instead of allocating more fresh memory.
+	 */
+	cookie = sctp_pack_cookie(asoc->ep, asoc, chunk, &cookie_len,
+				  addrs.v, addrs_len);
+	if (!cookie)
+		goto nomem_cookie;
+
+	/* Calculate the total size of allocation, include the reserved
+	 * space for reporting unknown parameters if it is specified.
+	 */
+	chunksize = sizeof(initack) + addrs_len + cookie_len + unkparam_len;
+
+        /* Tell peer that we'll do ECN only if peer advertised such cap.  */
+	if (asoc->peer.ecn_capable)
+		chunksize += sizeof(ecap_param);
+
+        /* Tell peer that we'll do PR-SCTP only if peer advertised.  */
+	if (asoc->peer.prsctp_capable)
+		chunksize += sizeof(prsctp_param);
+
+	chunksize += sizeof(aiparam);
+
+	/* Now allocate and fill out the chunk.  */
+	retval = sctp_make_chunk(asoc, SCTP_CID_INIT_ACK, 0, chunksize);
+	if (!retval)
+		goto nomem_chunk;
+
+	/* Per the advice in RFC 2960 6.4, send this reply to
+	 * the source of the INIT packet.
+	 */
+	retval->transport = chunk->transport;
+	retval->subh.init_hdr =
+		sctp_addto_chunk(retval, sizeof(initack), &initack);
+	retval->param_hdr.v = sctp_addto_chunk(retval, addrs_len, addrs.v);
+	sctp_addto_chunk(retval, cookie_len, cookie);
+	if (asoc->peer.ecn_capable)
+		sctp_addto_chunk(retval, sizeof(ecap_param), &ecap_param);
+	if (asoc->peer.prsctp_capable)
+		sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param);
+
+	aiparam.param_hdr.type = SCTP_PARAM_ADAPTION_LAYER_IND;
+	aiparam.param_hdr.length = htons(sizeof(aiparam));
+	aiparam.adaption_ind = htonl(sctp_sk(asoc->base.sk)->adaption_ind);
+	sctp_addto_chunk(retval, sizeof(aiparam), &aiparam);
+
+	/* We need to remove the const qualifier at this point.  */
+	retval->asoc = (struct sctp_association *) asoc;
+
+	/* RFC 2960 6.4 Multi-homed SCTP Endpoints
+	 *
+	 * An endpoint SHOULD transmit reply chunks (e.g., SACK,
+	 * HEARTBEAT ACK, * etc.) to the same destination transport
+	 * address from which it received the DATA or control chunk
+	 * to which it is replying.
+	 *
+	 * [INIT ACK back to where the INIT came from.]
+	 */
+	if (chunk)
+		retval->transport = chunk->transport;
+
+nomem_chunk:
+	kfree(cookie);
+nomem_cookie:
+	if (addrs.v)
+		kfree(addrs.v);
+	return retval;
+}
+
+/* 3.3.11 Cookie Echo (COOKIE ECHO) (10):
+ *
+ * This chunk is used only during the initialization of an association.
+ * It is sent by the initiator of an association to its peer to complete
+ * the initialization process. This chunk MUST precede any DATA chunk
+ * sent within the association, but MAY be bundled with one or more DATA
+ * chunks in the same packet.
+ *
+ *      0                   1                   2                   3
+ *      0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *     |   Type = 10   |Chunk  Flags   |         Length                |
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *     /                     Cookie                                    /
+ *     \                                                               \
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Chunk Flags: 8 bit
+ *
+ *   Set to zero on transmit and ignored on receipt.
+ *
+ * Length: 16 bits (unsigned integer)
+ *
+ *   Set to the size of the chunk in bytes, including the 4 bytes of
+ *   the chunk header and the size of the Cookie.
+ *
+ * Cookie: variable size
+ *
+ *   This field must contain the exact cookie received in the
+ *   State Cookie parameter from the previous INIT ACK.
+ *
+ *   An implementation SHOULD make the cookie as small as possible
+ *   to insure interoperability.
+ */
+struct sctp_chunk *sctp_make_cookie_echo(const struct sctp_association *asoc,
+				    const struct sctp_chunk *chunk)
+{
+	struct sctp_chunk *retval;
+	void *cookie;
+	int cookie_len;
+
+	cookie = asoc->peer.cookie;
+	cookie_len = asoc->peer.cookie_len;
+
+	/* Build a cookie echo chunk.  */
+	retval = sctp_make_chunk(asoc, SCTP_CID_COOKIE_ECHO, 0, cookie_len);
+	if (!retval)
+		goto nodata;
+	retval->subh.cookie_hdr =
+		sctp_addto_chunk(retval, cookie_len, cookie);
+
+	/* RFC 2960 6.4 Multi-homed SCTP Endpoints
+	 *
+	 * An endpoint SHOULD transmit reply chunks (e.g., SACK,
+	 * HEARTBEAT ACK, * etc.) to the same destination transport
+	 * address from which it * received the DATA or control chunk
+	 * to which it is replying.
+	 *
+	 * [COOKIE ECHO back to where the INIT ACK came from.]
+	 */
+	if (chunk)
+		retval->transport = chunk->transport;
+
+nodata:
+	return retval;
+}
+
+/* 3.3.12 Cookie Acknowledgement (COOKIE ACK) (11):
+ *
+ * This chunk is used only during the initialization of an
+ * association.  It is used to acknowledge the receipt of a COOKIE
+ * ECHO chunk.  This chunk MUST precede any DATA or SACK chunk sent
+ * within the association, but MAY be bundled with one or more DATA
+ * chunks or SACK chunk in the same SCTP packet.
+ *
+ *      0                   1                   2                   3
+ *      0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *     |   Type = 11   |Chunk  Flags   |     Length = 4                |
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Chunk Flags: 8 bits
+ *
+ *   Set to zero on transmit and ignored on receipt.
+ */
+struct sctp_chunk *sctp_make_cookie_ack(const struct sctp_association *asoc,
+				   const struct sctp_chunk *chunk)
+{
+	struct sctp_chunk *retval;
+
+	retval = sctp_make_chunk(asoc, SCTP_CID_COOKIE_ACK, 0, 0);
+
+	/* RFC 2960 6.4 Multi-homed SCTP Endpoints
+	 *
+	 * An endpoint SHOULD transmit reply chunks (e.g., SACK,
+	 * HEARTBEAT ACK, * etc.) to the same destination transport
+	 * address from which it * received the DATA or control chunk
+	 * to which it is replying.
+	 *
+	 * [COOKIE ACK back to where the COOKIE ECHO came from.]
+	 */
+	if (retval && chunk)
+		retval->transport = chunk->transport;
+
+	return retval;
+}
+
+/*
+ *  Appendix A: Explicit Congestion Notification:
+ *  CWR:
+ *
+ *  RFC 2481 details a specific bit for a sender to send in the header of
+ *  its next outbound TCP segment to indicate to its peer that it has
+ *  reduced its congestion window.  This is termed the CWR bit.  For
+ *  SCTP the same indication is made by including the CWR chunk.
+ *  This chunk contains one data element, i.e. the TSN number that
+ *  was sent in the ECNE chunk.  This element represents the lowest
+ *  TSN number in the datagram that was originally marked with the
+ *  CE bit.
+ *
+ *     0                   1                   2                   3
+ *     0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *    | Chunk Type=13 | Flags=00000000|    Chunk Length = 8           |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *    |                      Lowest TSN Number                        |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ *     Note: The CWR is considered a Control chunk.
+ */
+struct sctp_chunk *sctp_make_cwr(const struct sctp_association *asoc,
+			    const __u32 lowest_tsn,
+			    const struct sctp_chunk *chunk)
+{
+	struct sctp_chunk *retval;
+	sctp_cwrhdr_t cwr;
+
+	cwr.lowest_tsn = htonl(lowest_tsn);
+	retval = sctp_make_chunk(asoc, SCTP_CID_ECN_CWR, 0,
+				 sizeof(sctp_cwrhdr_t));
+
+	if (!retval)
+		goto nodata;
+
+	retval->subh.ecn_cwr_hdr =
+		sctp_addto_chunk(retval, sizeof(cwr), &cwr);
+
+	/* RFC 2960 6.4 Multi-homed SCTP Endpoints
+	 *
+	 * An endpoint SHOULD transmit reply chunks (e.g., SACK,
+	 * HEARTBEAT ACK, * etc.) to the same destination transport
+	 * address from which it * received the DATA or control chunk
+	 * to which it is replying.
+	 *
+	 * [Report a reduced congestion window back to where the ECNE
+	 * came from.]
+	 */
+	if (chunk)
+		retval->transport = chunk->transport;
+
+nodata:
+	return retval;
+}
+
+/* Make an ECNE chunk.  This is a congestion experienced report.  */
+struct sctp_chunk *sctp_make_ecne(const struct sctp_association *asoc,
+			     const __u32 lowest_tsn)
+{
+	struct sctp_chunk *retval;
+	sctp_ecnehdr_t ecne;
+
+	ecne.lowest_tsn = htonl(lowest_tsn);
+	retval = sctp_make_chunk(asoc, SCTP_CID_ECN_ECNE, 0,
+				 sizeof(sctp_ecnehdr_t));
+	if (!retval)
+		goto nodata;
+	retval->subh.ecne_hdr =
+		sctp_addto_chunk(retval, sizeof(ecne), &ecne);
+
+nodata:
+	return retval;
+}
+
+/* Make a DATA chunk for the given association from the provided
+ * parameters.  However, do not populate the data payload.
+ */
+struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc,
+				       const struct sctp_sndrcvinfo *sinfo,
+				       int data_len, __u8 flags, __u16 ssn)
+{
+	struct sctp_chunk *retval;
+	struct sctp_datahdr dp;
+	int chunk_len;
+
+	/* We assign the TSN as LATE as possible, not here when
+	 * creating the chunk.
+	 */
+	dp.tsn = 0;
+	dp.stream = htons(sinfo->sinfo_stream);
+	dp.ppid   = sinfo->sinfo_ppid;
+
+	/* Set the flags for an unordered send.  */
+	if (sinfo->sinfo_flags & MSG_UNORDERED) {
+		flags |= SCTP_DATA_UNORDERED;
+		dp.ssn = 0;
+	} else
+		dp.ssn = htons(ssn);
+
+	chunk_len = sizeof(dp) + data_len;
+	retval = sctp_make_chunk(asoc, SCTP_CID_DATA, flags, chunk_len);
+	if (!retval)
+		goto nodata;
+
+	retval->subh.data_hdr = sctp_addto_chunk(retval, sizeof(dp), &dp);
+	memcpy(&retval->sinfo, sinfo, sizeof(struct sctp_sndrcvinfo));
+
+nodata:
+	return retval;
+}
+
+/* Create a selective ackowledgement (SACK) for the given
+ * association.  This reports on which TSN's we've seen to date,
+ * including duplicates and gaps.
+ */
+struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc)
+{
+	struct sctp_chunk *retval;
+	struct sctp_sackhdr sack;
+	int len;
+	__u32 ctsn;
+	__u16 num_gabs, num_dup_tsns;
+	struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map;
+
+	ctsn = sctp_tsnmap_get_ctsn(map);
+	SCTP_DEBUG_PRINTK("sackCTSNAck sent:  0x%x.\n", ctsn);
+
+	/* How much room is needed in the chunk? */
+	num_gabs = sctp_tsnmap_num_gabs(map);
+	num_dup_tsns = sctp_tsnmap_num_dups(map);
+
+	/* Initialize the SACK header.  */
+	sack.cum_tsn_ack	    = htonl(ctsn);
+	sack.a_rwnd 		    = htonl(asoc->a_rwnd);
+	sack.num_gap_ack_blocks     = htons(num_gabs);
+	sack.num_dup_tsns           = htons(num_dup_tsns);
+
+	len = sizeof(sack)
+		+ sizeof(struct sctp_gap_ack_block) * num_gabs
+		+ sizeof(__u32) * num_dup_tsns;
+
+	/* Create the chunk.  */
+	retval = sctp_make_chunk(asoc, SCTP_CID_SACK, 0, len);
+	if (!retval)
+		goto nodata;
+
+	/* RFC 2960 6.4 Multi-homed SCTP Endpoints
+	 *
+	 * An endpoint SHOULD transmit reply chunks (e.g., SACK,
+	 * HEARTBEAT ACK, etc.) to the same destination transport
+	 * address from which it received the DATA or control chunk to
+	 * which it is replying.  This rule should also be followed if
+	 * the endpoint is bundling DATA chunks together with the
+	 * reply chunk.
+	 *
+	 * However, when acknowledging multiple DATA chunks received
+	 * in packets from different source addresses in a single
+	 * SACK, the SACK chunk may be transmitted to one of the
+	 * destination transport addresses from which the DATA or
+	 * control chunks being acknowledged were received.
+	 *
+	 * [BUG:  We do not implement the following paragraph.
+	 * Perhaps we should remember the last transport we used for a
+	 * SACK and avoid that (if possible) if we have seen any
+	 * duplicates. --piggy]
+	 *
+	 * When a receiver of a duplicate DATA chunk sends a SACK to a
+	 * multi- homed endpoint it MAY be beneficial to vary the
+	 * destination address and not use the source address of the
+	 * DATA chunk.  The reason being that receiving a duplicate
+	 * from a multi-homed endpoint might indicate that the return
+	 * path (as specified in the source address of the DATA chunk)
+	 * for the SACK is broken.
+	 *
+	 * [Send to the address from which we last received a DATA chunk.]
+	 */
+	retval->transport = asoc->peer.last_data_from;
+
+	retval->subh.sack_hdr =
+		sctp_addto_chunk(retval, sizeof(sack), &sack);
+
+	/* Add the gap ack block information.   */
+	if (num_gabs)
+		sctp_addto_chunk(retval, sizeof(__u32) * num_gabs,
+				 sctp_tsnmap_get_gabs(map));
+
+	/* Add the duplicate TSN information.  */
+	if (num_dup_tsns)
+		sctp_addto_chunk(retval, sizeof(__u32) * num_dup_tsns,
+				 sctp_tsnmap_get_dups(map));
+
+nodata:
+	return retval;
+}
+
+/* Make a SHUTDOWN chunk. */
+struct sctp_chunk *sctp_make_shutdown(const struct sctp_association *asoc,
+				      const struct sctp_chunk *chunk)
+{
+	struct sctp_chunk *retval;
+	sctp_shutdownhdr_t shut;
+	__u32 ctsn;
+
+	ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map);
+	shut.cum_tsn_ack = htonl(ctsn);
+
+	retval = sctp_make_chunk(asoc, SCTP_CID_SHUTDOWN, 0,
+				 sizeof(sctp_shutdownhdr_t));
+	if (!retval)
+		goto nodata;
+
+	retval->subh.shutdown_hdr =
+		sctp_addto_chunk(retval, sizeof(shut), &shut);
+
+	if (chunk)
+		retval->transport = chunk->transport;
+nodata:
+	return retval;
+}
+
+struct sctp_chunk *sctp_make_shutdown_ack(const struct sctp_association *asoc,
+				     const struct sctp_chunk *chunk)
+{
+	struct sctp_chunk *retval;
+
+	retval = sctp_make_chunk(asoc, SCTP_CID_SHUTDOWN_ACK, 0, 0);
+
+	/* RFC 2960 6.4 Multi-homed SCTP Endpoints
+	 *
+	 * An endpoint SHOULD transmit reply chunks (e.g., SACK,
+	 * HEARTBEAT ACK, * etc.) to the same destination transport
+	 * address from which it * received the DATA or control chunk
+	 * to which it is replying.
+	 *
+	 * [ACK back to where the SHUTDOWN came from.]
+	 */
+	if (retval && chunk)
+		retval->transport = chunk->transport;
+
+	return retval;
+}
+
+struct sctp_chunk *sctp_make_shutdown_complete(
+	const struct sctp_association *asoc,
+	const struct sctp_chunk *chunk)
+{
+	struct sctp_chunk *retval;
+	__u8 flags = 0;
+
+	/* Maybe set the T-bit if we have no association. */
+	flags |= asoc ? 0 : SCTP_CHUNK_FLAG_T;
+
+	retval = sctp_make_chunk(asoc, SCTP_CID_SHUTDOWN_COMPLETE, flags, 0);
+
+	/* RFC 2960 6.4 Multi-homed SCTP Endpoints
+	 *
+	 * An endpoint SHOULD transmit reply chunks (e.g., SACK,
+	 * HEARTBEAT ACK, * etc.) to the same destination transport
+	 * address from which it * received the DATA or control chunk
+	 * to which it is replying.
+	 *
+	 * [Report SHUTDOWN COMPLETE back to where the SHUTDOWN ACK
+	 * came from.]
+	 */
+	if (retval && chunk)
+		retval->transport = chunk->transport;
+
+        return retval;
+}
+
+/* Create an ABORT.  Note that we set the T bit if we have no
+ * association.
+ */
+struct sctp_chunk *sctp_make_abort(const struct sctp_association *asoc,
+			      const struct sctp_chunk *chunk,
+			      const size_t hint)
+{
+	struct sctp_chunk *retval;
+	__u8 flags = 0;
+
+	/* Maybe set the T-bit if we have no association.  */
+	flags |= asoc ? 0 : SCTP_CHUNK_FLAG_T;
+
+	retval = sctp_make_chunk(asoc, SCTP_CID_ABORT, flags, hint);
+
+	/* RFC 2960 6.4 Multi-homed SCTP Endpoints
+	 *
+	 * An endpoint SHOULD transmit reply chunks (e.g., SACK,
+	 * HEARTBEAT ACK, * etc.) to the same destination transport
+	 * address from which it * received the DATA or control chunk
+	 * to which it is replying.
+	 *
+	 * [ABORT back to where the offender came from.]
+	 */
+	if (retval && chunk)
+		retval->transport = chunk->transport;
+
+	return retval;
+}
+
+/* Helper to create ABORT with a NO_USER_DATA error.  */
+struct sctp_chunk *sctp_make_abort_no_data(
+	const struct sctp_association *asoc,
+	const struct sctp_chunk *chunk, __u32 tsn)
+{
+	struct sctp_chunk *retval;
+	__u32 payload;
+
+	retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t)
+				 + sizeof(tsn));
+
+	if (!retval)
+		goto no_mem;
+
+	/* Put the tsn back into network byte order.  */
+	payload = htonl(tsn);
+	sctp_init_cause(retval, SCTP_ERROR_NO_DATA, (const void *)&payload,
+			sizeof(payload));
+
+	/* RFC 2960 6.4 Multi-homed SCTP Endpoints
+	 *
+	 * An endpoint SHOULD transmit reply chunks (e.g., SACK,
+	 * HEARTBEAT ACK, * etc.) to the same destination transport
+	 * address from which it * received the DATA or control chunk
+	 * to which it is replying.
+	 *
+	 * [ABORT back to where the offender came from.]
+	 */
+	if (chunk)
+		retval->transport = chunk->transport;
+
+no_mem:
+	return retval;
+}
+
+/* Helper to create ABORT with a SCTP_ERROR_USER_ABORT error.  */
+struct sctp_chunk *sctp_make_abort_user(const struct sctp_association *asoc,
+				   const struct sctp_chunk *chunk,
+				   const struct msghdr *msg)
+{
+	struct sctp_chunk *retval;
+	void *payload = NULL, *payoff;
+	size_t paylen = 0;
+	struct iovec *iov = NULL;
+	int iovlen = 0;
+
+	if (msg) {
+		iov = msg->msg_iov;
+		iovlen = msg->msg_iovlen;
+		paylen = get_user_iov_size(iov, iovlen);
+	}
+
+	retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t) + paylen);
+	if (!retval)
+		goto err_chunk;
+
+	if (paylen) {
+		/* Put the msg_iov together into payload.  */
+		payload = kmalloc(paylen, GFP_ATOMIC);
+		if (!payload)
+			goto err_payload;
+		payoff = payload;
+
+		for (; iovlen > 0; --iovlen) {
+			if (copy_from_user(payoff, iov->iov_base,iov->iov_len))
+				goto err_copy;
+			payoff += iov->iov_len;
+			iov++;
+		}
+	}
+
+	sctp_init_cause(retval, SCTP_ERROR_USER_ABORT, payload, paylen);
+
+	if (paylen)
+		kfree(payload);
+
+	return retval;
+
+err_copy:
+	kfree(payload);
+err_payload:
+	sctp_chunk_free(retval);
+	retval = NULL;
+err_chunk:
+	return retval;
+}
+
+/* Make an ABORT chunk with a PROTOCOL VIOLATION cause code. */ 
+struct sctp_chunk *sctp_make_abort_violation(
+	const struct sctp_association *asoc,
+	const struct sctp_chunk *chunk,
+	const __u8   *payload,
+	const size_t paylen)
+{
+	struct sctp_chunk  *retval;
+	struct sctp_paramhdr phdr;
+
+	retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t) + paylen
+					+ sizeof(sctp_chunkhdr_t));
+	if (!retval)
+		goto end;
+
+	sctp_init_cause(retval, SCTP_ERROR_PROTO_VIOLATION, payload, paylen);
+
+	phdr.type = htons(chunk->chunk_hdr->type);
+	phdr.length = chunk->chunk_hdr->length;
+	sctp_addto_chunk(retval, sizeof(sctp_paramhdr_t), &phdr);
+
+end:
+	return retval;
+}
+
+/* Make a HEARTBEAT chunk.  */
+struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
+				  const struct sctp_transport *transport,
+				  const void *payload, const size_t paylen)
+{
+	struct sctp_chunk *retval = sctp_make_chunk(asoc, SCTP_CID_HEARTBEAT,
+						    0, paylen);
+
+	if (!retval)
+		goto nodata;
+
+	/* Cast away the 'const', as this is just telling the chunk
+	 * what transport it belongs to.
+	 */
+	retval->transport = (struct sctp_transport *) transport;
+	retval->subh.hbs_hdr = sctp_addto_chunk(retval, paylen, payload);
+
+nodata:
+	return retval;
+}
+
+struct sctp_chunk *sctp_make_heartbeat_ack(const struct sctp_association *asoc,
+				      const struct sctp_chunk *chunk,
+				      const void *payload, const size_t paylen)
+{
+	struct sctp_chunk *retval;
+
+	retval  = sctp_make_chunk(asoc, SCTP_CID_HEARTBEAT_ACK, 0, paylen);
+	if (!retval)
+		goto nodata;
+
+	retval->subh.hbs_hdr = sctp_addto_chunk(retval, paylen, payload);
+
+	/* RFC 2960 6.4 Multi-homed SCTP Endpoints
+	 *
+	 * An endpoint SHOULD transmit reply chunks (e.g., SACK,
+	 * HEARTBEAT ACK, * etc.) to the same destination transport
+	 * address from which it * received the DATA or control chunk
+	 * to which it is replying.
+	 *
+	 * [HBACK back to where the HEARTBEAT came from.]
+	 */
+	if (chunk)
+		retval->transport = chunk->transport;
+
+nodata:
+	return retval;
+}
+
+/* Create an Operation Error chunk with the specified space reserved.
+ * This routine can be used for containing multiple causes in the chunk.
+ */
+static struct sctp_chunk *sctp_make_op_error_space(
+	const struct sctp_association *asoc,
+	const struct sctp_chunk *chunk,
+	size_t size)
+{
+	struct sctp_chunk *retval;
+
+	retval = sctp_make_chunk(asoc, SCTP_CID_ERROR, 0,
+				 sizeof(sctp_errhdr_t) + size);
+	if (!retval)
+		goto nodata;
+
+	/* RFC 2960 6.4 Multi-homed SCTP Endpoints
+	 *
+	 * An endpoint SHOULD transmit reply chunks (e.g., SACK,
+	 * HEARTBEAT ACK, etc.) to the same destination transport
+	 * address from which it received the DATA or control chunk
+	 * to which it is replying.
+	 *
+	 */
+	if (chunk)
+		retval->transport = chunk->transport;
+
+nodata:
+	return retval;
+}
+
+/* Create an Operation Error chunk.  */
+struct sctp_chunk *sctp_make_op_error(const struct sctp_association *asoc,
+				 const struct sctp_chunk *chunk,
+				 __u16 cause_code, const void *payload,
+				 size_t paylen)
+{
+	struct sctp_chunk *retval;
+
+	retval = sctp_make_op_error_space(asoc, chunk, paylen);
+	if (!retval)
+		goto nodata;
+
+	sctp_init_cause(retval, cause_code, payload, paylen);
+
+nodata:
+	return retval;
+}
+
+/********************************************************************
+ * 2nd Level Abstractions
+ ********************************************************************/
+
+/* Turn an skb into a chunk.
+ * FIXME: Eventually move the structure directly inside the skb->cb[].
+ */
+struct sctp_chunk *sctp_chunkify(struct sk_buff *skb,
+			    const struct sctp_association *asoc,
+			    struct sock *sk)
+{
+	struct sctp_chunk *retval;
+
+	retval = kmem_cache_alloc(sctp_chunk_cachep, SLAB_ATOMIC);
+
+	if (!retval)
+		goto nodata;
+	memset(retval, 0, sizeof(struct sctp_chunk));
+
+	if (!sk) {
+		SCTP_DEBUG_PRINTK("chunkifying skb %p w/o an sk\n", skb);
+	}
+
+	retval->skb		= skb;
+	retval->asoc		= (struct sctp_association *)asoc;
+	retval->resent  	= 0;
+	retval->has_tsn		= 0;
+	retval->has_ssn         = 0;
+	retval->rtt_in_progress	= 0;
+	retval->sent_at		= 0;
+	retval->singleton	= 1;
+	retval->end_of_packet	= 0;
+	retval->ecn_ce_done	= 0;
+	retval->pdiscard	= 0;
+
+	/* sctpimpguide-05.txt Section 2.8.2
+	 * M1) Each time a new DATA chunk is transmitted
+	 * set the 'TSN.Missing.Report' count for that TSN to 0. The
+	 * 'TSN.Missing.Report' count will be used to determine missing chunks
+	 * and when to fast retransmit.
+	 */
+	retval->tsn_missing_report = 0;
+	retval->tsn_gap_acked = 0;
+	retval->fast_retransmit = 0;
+
+	/* If this is a fragmented message, track all fragments
+	 * of the message (for SEND_FAILED).
+	 */
+	retval->msg = NULL;
+
+	/* Polish the bead hole.  */
+	INIT_LIST_HEAD(&retval->transmitted_list);
+	INIT_LIST_HEAD(&retval->frag_list);
+	SCTP_DBG_OBJCNT_INC(chunk);
+	atomic_set(&retval->refcnt, 1);
+
+nodata:
+	return retval;
+}
+
+/* Set chunk->source and dest based on the IP header in chunk->skb.  */
+void sctp_init_addrs(struct sctp_chunk *chunk, union sctp_addr *src,
+		     union sctp_addr *dest)
+{
+	memcpy(&chunk->source, src, sizeof(union sctp_addr));
+	memcpy(&chunk->dest, dest, sizeof(union sctp_addr));
+}
+
+/* Extract the source address from a chunk.  */
+const union sctp_addr *sctp_source(const struct sctp_chunk *chunk)
+{
+	/* If we have a known transport, use that.  */
+	if (chunk->transport) {
+		return &chunk->transport->ipaddr;
+	} else {
+		/* Otherwise, extract it from the IP header.  */
+		return &chunk->source;
+	}
+}
+
+/* Create a new chunk, setting the type and flags headers from the
+ * arguments, reserving enough space for a 'paylen' byte payload.
+ */
+SCTP_STATIC
+struct sctp_chunk *sctp_make_chunk(const struct sctp_association *asoc,
+				   __u8 type, __u8 flags, int paylen)
+{
+	struct sctp_chunk *retval;
+	sctp_chunkhdr_t *chunk_hdr;
+	struct sk_buff *skb;
+	struct sock *sk;
+
+	/* No need to allocate LL here, as this is only a chunk. */
+	skb = alloc_skb(WORD_ROUND(sizeof(sctp_chunkhdr_t) + paylen),
+			GFP_ATOMIC);
+	if (!skb)
+		goto nodata;
+
+	/* Make room for the chunk header.  */
+	chunk_hdr = (sctp_chunkhdr_t *)skb_put(skb, sizeof(sctp_chunkhdr_t));
+	chunk_hdr->type	  = type;
+	chunk_hdr->flags  = flags;
+	chunk_hdr->length = htons(sizeof(sctp_chunkhdr_t));
+
+	sk = asoc ? asoc->base.sk : NULL;
+	retval = sctp_chunkify(skb, asoc, sk);
+	if (!retval) {
+		kfree_skb(skb);
+		goto nodata;
+	}
+
+	retval->chunk_hdr = chunk_hdr;
+	retval->chunk_end = ((__u8 *)chunk_hdr) + sizeof(struct sctp_chunkhdr);
+
+	/* Set the skb to the belonging sock for accounting.  */
+	skb->sk = sk;
+
+	return retval;
+nodata:
+	return NULL;
+}
+
+
+/* Release the memory occupied by a chunk.  */
+static void sctp_chunk_destroy(struct sctp_chunk *chunk)
+{
+	/* Free the chunk skb data and the SCTP_chunk stub itself. */
+	dev_kfree_skb(chunk->skb);
+
+	SCTP_DBG_OBJCNT_DEC(chunk);
+	kmem_cache_free(sctp_chunk_cachep, chunk);
+}
+
+/* Possibly, free the chunk.  */
+void sctp_chunk_free(struct sctp_chunk *chunk)
+{
+	/* Make sure that we are not on any list.  */
+	skb_unlink((struct sk_buff *) chunk);
+	list_del_init(&chunk->transmitted_list);
+
+	/* Release our reference on the message tracker. */
+	if (chunk->msg)
+		sctp_datamsg_put(chunk->msg);
+
+	sctp_chunk_put(chunk);
+}
+
+/* Grab a reference to the chunk. */
+void sctp_chunk_hold(struct sctp_chunk *ch)
+{
+	atomic_inc(&ch->refcnt);
+}
+
+/* Release a reference to the chunk. */
+void sctp_chunk_put(struct sctp_chunk *ch)
+{
+	if (atomic_dec_and_test(&ch->refcnt))
+		sctp_chunk_destroy(ch);
+}
+
+/* Append bytes to the end of a chunk.  Will panic if chunk is not big
+ * enough.
+ */
+void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data)
+{
+	void *target;
+	void *padding;
+	int chunklen = ntohs(chunk->chunk_hdr->length);
+	int padlen = chunklen % 4;
+
+	padding = skb_put(chunk->skb, padlen);
+	target = skb_put(chunk->skb, len);
+
+	memset(padding, 0, padlen);
+	memcpy(target, data, len);
+
+	/* Adjust the chunk length field.  */
+	chunk->chunk_hdr->length = htons(chunklen + padlen + len);
+	chunk->chunk_end = chunk->skb->tail;
+
+	return target;
+}
+
+/* Append bytes from user space to the end of a chunk.  Will panic if
+ * chunk is not big enough.
+ * Returns a kernel err value.
+ */
+int sctp_user_addto_chunk(struct sctp_chunk *chunk, int off, int len,
+			  struct iovec *data)
+{
+	__u8 *target;
+	int err = 0;
+
+	/* Make room in chunk for data.  */
+	target = skb_put(chunk->skb, len);
+
+	/* Copy data (whole iovec) into chunk */
+	if ((err = memcpy_fromiovecend(target, data, off, len)))
+		goto out;
+
+	/* Adjust the chunk length field.  */
+	chunk->chunk_hdr->length =
+		htons(ntohs(chunk->chunk_hdr->length) + len);
+	chunk->chunk_end = chunk->skb->tail;
+
+out:
+	return err;
+}
+
+/* Helper function to assign a TSN if needed.  This assumes that both
+ * the data_hdr and association have already been assigned.
+ */
+void sctp_chunk_assign_ssn(struct sctp_chunk *chunk)
+{
+	__u16 ssn;
+	__u16 sid;
+
+	if (chunk->has_ssn)
+		return;
+
+	/* This is the last possible instant to assign a SSN. */
+	if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) {
+		ssn = 0;
+	} else {
+		sid = htons(chunk->subh.data_hdr->stream);
+		if (chunk->chunk_hdr->flags & SCTP_DATA_LAST_FRAG)
+			ssn = sctp_ssn_next(&chunk->asoc->ssnmap->out, sid);
+		else
+			ssn = sctp_ssn_peek(&chunk->asoc->ssnmap->out, sid);
+		ssn = htons(ssn);
+	}
+
+	chunk->subh.data_hdr->ssn = ssn;
+	chunk->has_ssn = 1;
+}
+
+/* Helper function to assign a TSN if needed.  This assumes that both
+ * the data_hdr and association have already been assigned.
+ */
+void sctp_chunk_assign_tsn(struct sctp_chunk *chunk)
+{
+	if (!chunk->has_tsn) {
+		/* This is the last possible instant to
+		 * assign a TSN.
+		 */
+		chunk->subh.data_hdr->tsn =
+			htonl(sctp_association_get_next_tsn(chunk->asoc));
+		chunk->has_tsn = 1;
+	}
+}
+
+/* Create a CLOSED association to use with an incoming packet.  */
+struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *ep,
+					struct sctp_chunk *chunk, int gfp)
+{
+	struct sctp_association *asoc;
+	struct sk_buff *skb;
+	sctp_scope_t scope;
+	struct sctp_af *af;
+
+	/* Create the bare association.  */
+	scope = sctp_scope(sctp_source(chunk));
+	asoc = sctp_association_new(ep, ep->base.sk, scope, gfp);
+	if (!asoc)
+		goto nodata;
+	asoc->temp = 1;
+	skb = chunk->skb;
+	/* Create an entry for the source address of the packet.  */
+	af = sctp_get_af_specific(ipver2af(skb->nh.iph->version));
+	if (unlikely(!af))
+		goto fail;
+	af->from_skb(&asoc->c.peer_addr, skb, 1);
+nodata:
+	return asoc;
+
+fail:
+	sctp_association_free(asoc);
+	return NULL;
+}
+
+/* Build a cookie representing asoc.
+ * This INCLUDES the param header needed to put the cookie in the INIT ACK.
+ */
+static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
+				      const struct sctp_association *asoc,
+				      const struct sctp_chunk *init_chunk,
+				      int *cookie_len,
+				      const __u8 *raw_addrs, int addrs_len)
+{
+	sctp_cookie_param_t *retval;
+	struct sctp_signed_cookie *cookie;
+	struct scatterlist sg;
+	int headersize, bodysize;
+	unsigned int keylen;
+	char *key;
+
+	headersize = sizeof(sctp_paramhdr_t) + SCTP_SECRET_SIZE;
+	bodysize = sizeof(struct sctp_cookie)
+		+ ntohs(init_chunk->chunk_hdr->length) + addrs_len;
+
+	/* Pad out the cookie to a multiple to make the signature
+	 * functions simpler to write.
+	 */
+	if (bodysize % SCTP_COOKIE_MULTIPLE)
+		bodysize += SCTP_COOKIE_MULTIPLE
+			- (bodysize % SCTP_COOKIE_MULTIPLE);
+	*cookie_len = headersize + bodysize;
+
+	retval = (sctp_cookie_param_t *)kmalloc(*cookie_len, GFP_ATOMIC);
+
+	if (!retval) {
+		*cookie_len = 0;
+		goto nodata;
+	}
+
+	/* Clear this memory since we are sending this data structure
+	 * out on the network.
+	 */
+	memset(retval, 0x00, *cookie_len);
+	cookie = (struct sctp_signed_cookie *) retval->body;
+
+	/* Set up the parameter header.  */
+	retval->p.type = SCTP_PARAM_STATE_COOKIE;
+	retval->p.length = htons(*cookie_len);
+
+	/* Copy the cookie part of the association itself.  */
+	cookie->c = asoc->c;
+	/* Save the raw address list length in the cookie. */
+	cookie->c.raw_addr_list_len = addrs_len;
+
+	/* Remember PR-SCTP capability. */
+	cookie->c.prsctp_capable = asoc->peer.prsctp_capable;
+
+	/* Save adaption indication in the cookie. */
+	cookie->c.adaption_ind = asoc->peer.adaption_ind;
+
+	/* Set an expiration time for the cookie.  */
+	do_gettimeofday(&cookie->c.expiration);
+	TIMEVAL_ADD(asoc->cookie_life, cookie->c.expiration);
+
+	/* Copy the peer's init packet.  */
+	memcpy(&cookie->c.peer_init[0], init_chunk->chunk_hdr,
+	       ntohs(init_chunk->chunk_hdr->length));
+
+	/* Copy the raw local address list of the association. */
+	memcpy((__u8 *)&cookie->c.peer_init[0] +
+	       ntohs(init_chunk->chunk_hdr->length), raw_addrs, addrs_len);
+
+  	if (sctp_sk(ep->base.sk)->hmac) {
+		/* Sign the message.  */
+		sg.page = virt_to_page(&cookie->c);
+		sg.offset = (unsigned long)(&cookie->c) % PAGE_SIZE;
+		sg.length = bodysize;
+		keylen = SCTP_SECRET_SIZE;
+		key = (char *)ep->secret_key[ep->current_key];
+
+		sctp_crypto_hmac(sctp_sk(ep->base.sk)->hmac, key, &keylen,
+				 &sg, 1, cookie->signature);
+	}
+
+nodata:
+	return retval;
+}
+
+/* Unpack the cookie from COOKIE ECHO chunk, recreating the association.  */
+struct sctp_association *sctp_unpack_cookie(
+	const struct sctp_endpoint *ep,
+	const struct sctp_association *asoc,
+	struct sctp_chunk *chunk, int gfp,
+	int *error, struct sctp_chunk **errp)
+{
+	struct sctp_association *retval = NULL;
+	struct sctp_signed_cookie *cookie;
+	struct sctp_cookie *bear_cookie;
+	int headersize, bodysize, fixed_size;
+	__u8 digest[SCTP_SIGNATURE_SIZE];
+	struct scatterlist sg;
+	unsigned int keylen, len;
+	char *key;
+	sctp_scope_t scope;
+	struct sk_buff *skb = chunk->skb;
+
+	headersize = sizeof(sctp_chunkhdr_t) + SCTP_SECRET_SIZE;
+	bodysize = ntohs(chunk->chunk_hdr->length) - headersize;
+	fixed_size = headersize + sizeof(struct sctp_cookie);
+
+	/* Verify that the chunk looks like it even has a cookie.
+	 * There must be enough room for our cookie and our peer's
+	 * INIT chunk.
+	 */
+	len = ntohs(chunk->chunk_hdr->length);
+	if (len < fixed_size + sizeof(struct sctp_chunkhdr))
+		goto malformed;
+
+	/* Verify that the cookie has been padded out. */
+	if (bodysize % SCTP_COOKIE_MULTIPLE)
+		goto malformed;
+
+	/* Process the cookie.  */
+	cookie = chunk->subh.cookie_hdr;
+	bear_cookie = &cookie->c;
+
+	if (!sctp_sk(ep->base.sk)->hmac)
+		goto no_hmac;
+
+	/* Check the signature.  */
+	keylen = SCTP_SECRET_SIZE;
+	sg.page = virt_to_page(bear_cookie);
+	sg.offset = (unsigned long)(bear_cookie) % PAGE_SIZE;
+	sg.length = bodysize;
+	key = (char *)ep->secret_key[ep->current_key];
+
+	memset(digest, 0x00, sizeof(digest));
+	sctp_crypto_hmac(sctp_sk(ep->base.sk)->hmac, key, &keylen, &sg,
+			 1, digest);
+
+	if (memcmp(digest, cookie->signature, SCTP_SIGNATURE_SIZE)) {
+		/* Try the previous key. */
+		key = (char *)ep->secret_key[ep->last_key];
+		memset(digest, 0x00, sizeof(digest));
+		sctp_crypto_hmac(sctp_sk(ep->base.sk)->hmac, key, &keylen,
+				 &sg, 1, digest);
+
+		if (memcmp(digest, cookie->signature, SCTP_SIGNATURE_SIZE)) {
+			/* Yikes!  Still bad signature! */
+			*error = -SCTP_IERROR_BAD_SIG;
+			goto fail;
+		}
+	}
+
+no_hmac:
+	/* IG Section 2.35.2:
+	 *  3) Compare the port numbers and the verification tag contained
+	 *     within the COOKIE ECHO chunk to the actual port numbers and the
+	 *     verification tag within the SCTP common header of the received
+	 *     packet. If these values do not match the packet MUST be silently
+	 *     discarded,
+	 */
+	if (ntohl(chunk->sctp_hdr->vtag) != bear_cookie->my_vtag) {
+		*error = -SCTP_IERROR_BAD_TAG;
+		goto fail;
+	}
+
+	if (ntohs(chunk->sctp_hdr->source) != bear_cookie->peer_addr.v4.sin_port ||
+	    ntohs(chunk->sctp_hdr->dest) != bear_cookie->my_port) {
+		*error = -SCTP_IERROR_BAD_PORTS;
+		goto fail;
+	}
+
+	/* Check to see if the cookie is stale.  If there is already
+	 * an association, there is no need to check cookie's expiration
+	 * for init collision case of lost COOKIE ACK.
+	 */
+	if (!asoc && tv_lt(bear_cookie->expiration, skb->stamp)) {
+		__u16 len;
+		/*
+		 * Section 3.3.10.3 Stale Cookie Error (3)
+		 *
+		 * Cause of error
+		 * ---------------
+		 * Stale Cookie Error:  Indicates the receipt of a valid State
+		 * Cookie that has expired.
+		 */
+		len = ntohs(chunk->chunk_hdr->length);
+		*errp = sctp_make_op_error_space(asoc, chunk, len);
+		if (*errp) {
+			suseconds_t usecs = (skb->stamp.tv_sec -
+				bear_cookie->expiration.tv_sec) * 1000000L +
+				skb->stamp.tv_usec -
+				bear_cookie->expiration.tv_usec;
+
+			usecs = htonl(usecs);
+			sctp_init_cause(*errp, SCTP_ERROR_STALE_COOKIE,
+					&usecs, sizeof(usecs));
+			*error = -SCTP_IERROR_STALE_COOKIE;
+		} else
+			*error = -SCTP_IERROR_NOMEM;
+
+		goto fail;
+	}
+
+	/* Make a new base association.  */
+	scope = sctp_scope(sctp_source(chunk));
+	retval = sctp_association_new(ep, ep->base.sk, scope, gfp);
+	if (!retval) {
+		*error = -SCTP_IERROR_NOMEM;
+		goto fail;
+	}
+
+	/* Set up our peer's port number.  */
+	retval->peer.port = ntohs(chunk->sctp_hdr->source);
+
+	/* Populate the association from the cookie.  */
+	memcpy(&retval->c, bear_cookie, sizeof(*bear_cookie));
+
+	if (sctp_assoc_set_bind_addr_from_cookie(retval, bear_cookie,
+						 GFP_ATOMIC) < 0) {
+		*error = -SCTP_IERROR_NOMEM;
+		goto fail;
+	}
+
+	/* Also, add the destination address. */
+	if (list_empty(&retval->base.bind_addr.address_list)) {
+		sctp_add_bind_addr(&retval->base.bind_addr, &chunk->dest,
+				   GFP_ATOMIC);
+	}
+
+	retval->next_tsn = retval->c.initial_tsn;
+	retval->ctsn_ack_point = retval->next_tsn - 1;
+	retval->addip_serial = retval->c.initial_tsn;
+	retval->adv_peer_ack_point = retval->ctsn_ack_point;
+	retval->peer.prsctp_capable = retval->c.prsctp_capable;
+	retval->peer.adaption_ind = retval->c.adaption_ind;
+
+	/* The INIT stuff will be done by the side effects.  */
+	return retval;
+
+fail:
+	if (retval)
+		sctp_association_free(retval);
+
+	return NULL;
+
+malformed:
+	/* Yikes!  The packet is either corrupt or deliberately
+	 * malformed.
+	 */
+	*error = -SCTP_IERROR_MALFORMED;
+	goto fail;
+}
+
+/********************************************************************
+ * 3rd Level Abstractions
+ ********************************************************************/
+
+struct __sctp_missing {
+	__u32 num_missing;
+	__u16 type;
+}  __attribute__((packed));
+
+/*
+ * Report a missing mandatory parameter.
+ */
+static int sctp_process_missing_param(const struct sctp_association *asoc,
+				      sctp_param_t paramtype,
+				      struct sctp_chunk *chunk,
+				      struct sctp_chunk **errp)
+{
+	struct __sctp_missing report;
+	__u16 len;
+
+	len = WORD_ROUND(sizeof(report));
+
+	/* Make an ERROR chunk, preparing enough room for
+	 * returning multiple unknown parameters.
+	 */
+	if (!*errp)
+		*errp = sctp_make_op_error_space(asoc, chunk, len);
+
+	if (*errp) {
+		report.num_missing = htonl(1);
+		report.type = paramtype;
+		sctp_init_cause(*errp, SCTP_ERROR_INV_PARAM,
+				&report, sizeof(report));
+	}
+
+	/* Stop processing this chunk. */
+	return 0;
+}
+
+/* Report an Invalid Mandatory Parameter.  */
+static int sctp_process_inv_mandatory(const struct sctp_association *asoc,
+				      struct sctp_chunk *chunk,
+				      struct sctp_chunk **errp)
+{
+	/* Invalid Mandatory Parameter Error has no payload. */
+
+	if (!*errp)
+		*errp = sctp_make_op_error_space(asoc, chunk, 0);
+
+	if (*errp)
+		sctp_init_cause(*errp, SCTP_ERROR_INV_PARAM, NULL, 0);
+
+	/* Stop processing this chunk. */
+	return 0;
+}
+
+static int sctp_process_inv_paramlength(const struct sctp_association *asoc,
+					struct sctp_paramhdr *param,
+					const struct sctp_chunk *chunk,
+					struct sctp_chunk **errp)
+{
+	char		error[] = "The following parameter had invalid length:";
+	size_t		payload_len = WORD_ROUND(sizeof(error)) + 
+						sizeof(sctp_paramhdr_t);
+
+
+	/* Create an error chunk and fill it in with our payload. */
+	if (!*errp)
+		*errp = sctp_make_op_error_space(asoc, chunk, payload_len);
+
+	if (*errp) {
+		sctp_init_cause(*errp, SCTP_ERROR_PROTO_VIOLATION, error,
+				sizeof(error));
+		sctp_addto_chunk(*errp, sizeof(sctp_paramhdr_t), param);
+	}
+
+	return 0;
+}
+
+
+/* Do not attempt to handle the HOST_NAME parm.  However, do
+ * send back an indicator to the peer.
+ */
+static int sctp_process_hn_param(const struct sctp_association *asoc,
+				 union sctp_params param,
+				 struct sctp_chunk *chunk,
+				 struct sctp_chunk **errp)
+{
+	__u16 len = ntohs(param.p->length);
+
+	/* Make an ERROR chunk. */
+	if (!*errp)
+		*errp = sctp_make_op_error_space(asoc, chunk, len);
+
+	if (*errp)
+		sctp_init_cause(*errp, SCTP_ERROR_DNS_FAILED,
+				param.v, len);
+
+	/* Stop processing this chunk. */
+	return 0;
+}
+
+/* RFC 3.2.1 & the Implementers Guide 2.2.
+ *
+ * The Parameter Types are encoded such that the
+ * highest-order two bits specify the action that must be
+ * taken if the processing endpoint does not recognize the
+ * Parameter Type.
+ *
+ * 00 - Stop processing this SCTP chunk and discard it,
+ *	do not process any further chunks within it.
+ *
+ * 01 - Stop processing this SCTP chunk and discard it,
+ *	do not process any further chunks within it, and report
+ *	the unrecognized parameter in an 'Unrecognized
+ *	Parameter Type' (in either an ERROR or in the INIT ACK).
+ *
+ * 10 - Skip this parameter and continue processing.
+ *
+ * 11 - Skip this parameter and continue processing but
+ *	report the unrecognized parameter in an
+ *	'Unrecognized Parameter Type' (in either an ERROR or in
+ *	the INIT ACK).
+ *
+ * Return value:
+ * 	0 - discard the chunk
+ * 	1 - continue with the chunk
+ */
+static int sctp_process_unk_param(const struct sctp_association *asoc,
+				  union sctp_params param,
+				  struct sctp_chunk *chunk,
+				  struct sctp_chunk **errp)
+{
+	int retval = 1;
+
+	switch (param.p->type & SCTP_PARAM_ACTION_MASK) {
+	case SCTP_PARAM_ACTION_DISCARD:
+		retval =  0;
+		break;
+	case SCTP_PARAM_ACTION_DISCARD_ERR:
+		retval =  0;
+		/* Make an ERROR chunk, preparing enough room for
+		 * returning multiple unknown parameters.
+		 */
+		if (NULL == *errp)
+			*errp = sctp_make_op_error_space(asoc, chunk,
+					ntohs(chunk->chunk_hdr->length));
+
+		if (*errp)
+			sctp_init_cause(*errp, SCTP_ERROR_UNKNOWN_PARAM,
+					param.v,
+					WORD_ROUND(ntohs(param.p->length)));
+
+		break;
+	case SCTP_PARAM_ACTION_SKIP:
+		break;
+	case SCTP_PARAM_ACTION_SKIP_ERR:
+		/* Make an ERROR chunk, preparing enough room for
+		 * returning multiple unknown parameters.
+		 */
+		if (NULL == *errp)
+			*errp = sctp_make_op_error_space(asoc, chunk,
+					ntohs(chunk->chunk_hdr->length));
+
+		if (*errp) {
+			sctp_init_cause(*errp, SCTP_ERROR_UNKNOWN_PARAM,
+					param.v,
+					WORD_ROUND(ntohs(param.p->length)));
+		} else {
+			/* If there is no memory for generating the ERROR
+			 * report as specified, an ABORT will be triggered
+			 * to the peer and the association won't be
+			 * established.
+			 */
+			retval = 0;
+		}
+
+		break;
+	default:
+		break;
+	}
+
+	return retval;
+}
+
+/* Find unrecognized parameters in the chunk.
+ * Return values:
+ * 	0 - discard the chunk
+ * 	1 - continue with the chunk
+ */
+static int sctp_verify_param(const struct sctp_association *asoc,
+			     union sctp_params param,
+			     sctp_cid_t cid,
+			     struct sctp_chunk *chunk,
+			     struct sctp_chunk **err_chunk)
+{
+	int retval = 1;
+
+	/* FIXME - This routine is not looking at each parameter per the
+	 * chunk type, i.e., unrecognized parameters should be further
+	 * identified based on the chunk id.
+	 */
+
+	switch (param.p->type) {
+	case SCTP_PARAM_IPV4_ADDRESS:
+	case SCTP_PARAM_IPV6_ADDRESS:
+	case SCTP_PARAM_COOKIE_PRESERVATIVE:
+	case SCTP_PARAM_SUPPORTED_ADDRESS_TYPES:
+	case SCTP_PARAM_STATE_COOKIE:
+	case SCTP_PARAM_HEARTBEAT_INFO:
+	case SCTP_PARAM_UNRECOGNIZED_PARAMETERS:
+	case SCTP_PARAM_ECN_CAPABLE:
+	case SCTP_PARAM_ADAPTION_LAYER_IND:
+		break;
+
+	case SCTP_PARAM_HOST_NAME_ADDRESS:
+		/* Tell the peer, we won't support this param.  */
+		return sctp_process_hn_param(asoc, param, chunk, err_chunk);
+	case SCTP_PARAM_FWD_TSN_SUPPORT:
+		if (sctp_prsctp_enable)
+			break;
+		/* Fall Through */ 
+	default:
+		SCTP_DEBUG_PRINTK("Unrecognized param: %d for chunk %d.\n",
+				ntohs(param.p->type), cid);
+		return sctp_process_unk_param(asoc, param, chunk, err_chunk);
+
+		break;
+	}
+	return retval;
+}
+
+/* Verify the INIT packet before we process it.  */
+int sctp_verify_init(const struct sctp_association *asoc,
+		     sctp_cid_t cid,
+		     sctp_init_chunk_t *peer_init,
+		     struct sctp_chunk *chunk,
+		     struct sctp_chunk **errp)
+{
+	union sctp_params param;
+	int has_cookie = 0;
+
+	/* Verify stream values are non-zero. */
+	if ((0 == peer_init->init_hdr.num_outbound_streams) ||
+	    (0 == peer_init->init_hdr.num_inbound_streams)) {
+
+		sctp_process_inv_mandatory(asoc, chunk, errp);
+		return 0;
+	}
+
+	/* Check for missing mandatory parameters.  */
+	sctp_walk_params(param, peer_init, init_hdr.params) {
+
+		if (SCTP_PARAM_STATE_COOKIE == param.p->type)
+			has_cookie = 1;
+
+	} /* for (loop through all parameters) */
+
+	/* There is a possibility that a parameter length was bad and
+	 * in that case we would have stoped walking the parameters.
+	 * The current param.p would point at the bad one.
+	 * Current consensus on the mailing list is to generate a PROTOCOL
+	 * VIOLATION error.  We build the ERROR chunk here and let the normal
+	 * error handling code build and send the packet.
+	 */
+	if (param.v < (void*)chunk->chunk_end - sizeof(sctp_paramhdr_t)) {
+		sctp_process_inv_paramlength(asoc, param.p, chunk, errp);
+		return 0;
+	}
+
+	/* The only missing mandatory param possible today is
+	 * the state cookie for an INIT-ACK chunk.
+	 */
+	if ((SCTP_CID_INIT_ACK == cid) && !has_cookie) {
+		sctp_process_missing_param(asoc, SCTP_PARAM_STATE_COOKIE,
+					   chunk, errp);
+		return 0;
+	}
+
+	/* Find unrecognized parameters. */
+
+	sctp_walk_params(param, peer_init, init_hdr.params) {
+
+		if (!sctp_verify_param(asoc, param, cid, chunk, errp)) {
+			if (SCTP_PARAM_HOST_NAME_ADDRESS == param.p->type)
+				return 0;
+			else
+				return 1;
+		}
+
+	} /* for (loop through all parameters) */
+
+	return 1;
+}
+
+/* Unpack the parameters in an INIT packet into an association.
+ * Returns 0 on failure, else success.
+ * FIXME:  This is an association method.
+ */
+int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid,
+		      const union sctp_addr *peer_addr,
+		      sctp_init_chunk_t *peer_init, int gfp)
+{
+	union sctp_params param;
+	struct sctp_transport *transport;
+	struct list_head *pos, *temp;
+	char *cookie;
+
+	/* We must include the address that the INIT packet came from.
+	 * This is the only address that matters for an INIT packet.
+	 * When processing a COOKIE ECHO, we retrieve the from address
+	 * of the INIT from the cookie.
+	 */
+
+	/* This implementation defaults to making the first transport
+	 * added as the primary transport.  The source address seems to
+	 * be a a better choice than any of the embedded addresses.
+	 */
+	if (peer_addr)
+		if(!sctp_assoc_add_peer(asoc, peer_addr, gfp))
+			goto nomem;
+
+	/* Process the initialization parameters.  */
+
+	sctp_walk_params(param, peer_init, init_hdr.params) {
+
+		if (!sctp_process_param(asoc, param, peer_addr, gfp))
+                        goto clean_up;
+	}
+
+	/* The fixed INIT headers are always in network byte
+	 * order.
+	 */
+	asoc->peer.i.init_tag =
+		ntohl(peer_init->init_hdr.init_tag);
+	asoc->peer.i.a_rwnd =
+		ntohl(peer_init->init_hdr.a_rwnd);
+	asoc->peer.i.num_outbound_streams =
+		ntohs(peer_init->init_hdr.num_outbound_streams);
+	asoc->peer.i.num_inbound_streams =
+		ntohs(peer_init->init_hdr.num_inbound_streams);
+	asoc->peer.i.initial_tsn =
+		ntohl(peer_init->init_hdr.initial_tsn);
+
+	/* Apply the upper bounds for output streams based on peer's
+	 * number of inbound streams.
+	 */
+	if (asoc->c.sinit_num_ostreams  >
+	    ntohs(peer_init->init_hdr.num_inbound_streams)) {
+		asoc->c.sinit_num_ostreams =
+			ntohs(peer_init->init_hdr.num_inbound_streams);
+	}
+
+	if (asoc->c.sinit_max_instreams >
+	    ntohs(peer_init->init_hdr.num_outbound_streams)) {
+		asoc->c.sinit_max_instreams =
+			ntohs(peer_init->init_hdr.num_outbound_streams);
+	}
+
+	/* Copy Initiation tag from INIT to VT_peer in cookie.   */
+	asoc->c.peer_vtag = asoc->peer.i.init_tag;
+
+	/* Peer Rwnd   : Current calculated value of the peer's rwnd.  */
+	asoc->peer.rwnd = asoc->peer.i.a_rwnd;
+
+	/* Copy cookie in case we need to resend COOKIE-ECHO. */
+	cookie = asoc->peer.cookie;
+	if (cookie) {
+		asoc->peer.cookie = kmalloc(asoc->peer.cookie_len, gfp);
+		if (!asoc->peer.cookie)
+			goto clean_up;
+		memcpy(asoc->peer.cookie, cookie, asoc->peer.cookie_len);
+	}
+
+	/* RFC 2960 7.2.1 The initial value of ssthresh MAY be arbitrarily
+	 * high (for example, implementations MAY use the size of the receiver
+	 * advertised window).
+	 */
+	list_for_each(pos, &asoc->peer.transport_addr_list) {
+		transport = list_entry(pos, struct sctp_transport, transports);
+		transport->ssthresh = asoc->peer.i.a_rwnd;
+	}
+
+	/* Set up the TSN tracking pieces.  */
+	sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_SIZE,
+			 asoc->peer.i.initial_tsn);
+
+	/* RFC 2960 6.5 Stream Identifier and Stream Sequence Number
+	 *
+	 * The stream sequence number in all the streams shall start
+	 * from 0 when the association is established.  Also, when the
+	 * stream sequence number reaches the value 65535 the next
+	 * stream sequence number shall be set to 0.
+	 */
+
+	/* Allocate storage for the negotiated streams if it is not a temporary 	 * association.
+	 */
+	if (!asoc->temp) {
+		int assoc_id;
+		int error;
+
+		asoc->ssnmap = sctp_ssnmap_new(asoc->c.sinit_max_instreams,
+					       asoc->c.sinit_num_ostreams, gfp);
+		if (!asoc->ssnmap)
+			goto clean_up;
+
+	retry:
+		if (unlikely(!idr_pre_get(&sctp_assocs_id, gfp)))
+			goto clean_up;
+		spin_lock_bh(&sctp_assocs_id_lock);
+		error = idr_get_new_above(&sctp_assocs_id, (void *)asoc, 1,
+					  &assoc_id);
+		spin_unlock_bh(&sctp_assocs_id_lock);
+		if (error == -EAGAIN)
+			goto retry;
+		else if (error)
+			goto clean_up;
+
+		asoc->assoc_id = (sctp_assoc_t) assoc_id;
+	}
+
+	/* ADDIP Section 4.1 ASCONF Chunk Procedures
+	 *
+	 * When an endpoint has an ASCONF signaled change to be sent to the
+	 * remote endpoint it should do the following:
+	 * ...
+	 * A2) A serial number should be assigned to the Chunk. The serial
+	 * number should be a monotonically increasing number. All serial
+	 * numbers are defined to be initialized at the start of the
+	 * association to the same value as the Initial TSN.
+	 */
+	asoc->peer.addip_serial = asoc->peer.i.initial_tsn - 1;
+	return 1;
+
+clean_up:
+	/* Release the transport structures. */
+	list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) {
+		transport = list_entry(pos, struct sctp_transport, transports);
+		list_del_init(pos);
+		sctp_transport_free(transport);
+	}
+nomem:
+	return 0;
+}
+
+
+/* Update asoc with the option described in param.
+ *
+ * RFC2960 3.3.2.1 Optional/Variable Length Parameters in INIT
+ *
+ * asoc is the association to update.
+ * param is the variable length parameter to use for update.
+ * cid tells us if this is an INIT, INIT ACK or COOKIE ECHO.
+ * If the current packet is an INIT we want to minimize the amount of
+ * work we do.  In particular, we should not build transport
+ * structures for the addresses.
+ */
+static int sctp_process_param(struct sctp_association *asoc,
+			      union sctp_params param,
+			      const union sctp_addr *peer_addr,
+			      int gfp)
+{
+	union sctp_addr addr;
+	int i;
+	__u16 sat;
+	int retval = 1;
+	sctp_scope_t scope;
+	time_t stale;
+	struct sctp_af *af;
+
+	/* We maintain all INIT parameters in network byte order all the
+	 * time.  This allows us to not worry about whether the parameters
+	 * came from a fresh INIT, and INIT ACK, or were stored in a cookie.
+	 */
+	switch (param.p->type) {
+	case SCTP_PARAM_IPV6_ADDRESS:
+		if (PF_INET6 != asoc->base.sk->sk_family)
+			break;
+		/* Fall through. */
+	case SCTP_PARAM_IPV4_ADDRESS:
+		af = sctp_get_af_specific(param_type2af(param.p->type));
+		af->from_addr_param(&addr, param.addr, asoc->peer.port, 0);
+		scope = sctp_scope(peer_addr);
+		if (sctp_in_scope(&addr, scope))
+			if (!sctp_assoc_add_peer(asoc, &addr, gfp))
+				return 0;
+		break;
+
+	case SCTP_PARAM_COOKIE_PRESERVATIVE:
+		if (!sctp_cookie_preserve_enable)
+			break;
+
+		stale = ntohl(param.life->lifespan_increment);
+
+		/* Suggested Cookie Life span increment's unit is msec,
+		 * (1/1000sec).
+		 */
+		asoc->cookie_life.tv_sec += stale / 1000;
+		asoc->cookie_life.tv_usec += (stale % 1000) * 1000;
+		break;
+
+	case SCTP_PARAM_HOST_NAME_ADDRESS:
+		SCTP_DEBUG_PRINTK("unimplemented SCTP_HOST_NAME_ADDRESS\n");
+		break;
+
+	case SCTP_PARAM_SUPPORTED_ADDRESS_TYPES:
+		/* Turn off the default values first so we'll know which
+		 * ones are really set by the peer.
+		 */
+		asoc->peer.ipv4_address = 0;
+		asoc->peer.ipv6_address = 0;
+
+		/* Cycle through address types; avoid divide by 0. */
+		sat = ntohs(param.p->length) - sizeof(sctp_paramhdr_t);
+		if (sat)
+			sat /= sizeof(__u16);
+
+		for (i = 0; i < sat; ++i) {
+			switch (param.sat->types[i]) {
+			case SCTP_PARAM_IPV4_ADDRESS:
+				asoc->peer.ipv4_address = 1;
+				break;
+
+			case SCTP_PARAM_IPV6_ADDRESS:
+				asoc->peer.ipv6_address = 1;
+				break;
+
+			case SCTP_PARAM_HOST_NAME_ADDRESS:
+				asoc->peer.hostname_address = 1;
+				break;
+
+			default: /* Just ignore anything else.  */
+				break;
+			};
+		}
+		break;
+
+	case SCTP_PARAM_STATE_COOKIE:
+		asoc->peer.cookie_len =
+			ntohs(param.p->length) - sizeof(sctp_paramhdr_t);
+		asoc->peer.cookie = param.cookie->body;
+		break;
+
+	case SCTP_PARAM_HEARTBEAT_INFO:
+		/* Would be odd to receive, but it causes no problems. */
+		break;
+
+	case SCTP_PARAM_UNRECOGNIZED_PARAMETERS:
+		/* Rejected during verify stage. */
+		break;
+
+	case SCTP_PARAM_ECN_CAPABLE:
+		asoc->peer.ecn_capable = 1;
+		break;
+
+	case SCTP_PARAM_ADAPTION_LAYER_IND:
+		asoc->peer.adaption_ind = param.aind->adaption_ind;
+		break;
+
+	case SCTP_PARAM_FWD_TSN_SUPPORT:
+		if (sctp_prsctp_enable) {
+			asoc->peer.prsctp_capable = 1;
+			break;
+		}
+		/* Fall Through */ 
+	default:
+		/* Any unrecognized parameters should have been caught
+		 * and handled by sctp_verify_param() which should be
+		 * called prior to this routine.  Simply log the error
+		 * here.
+		 */
+		SCTP_DEBUG_PRINTK("Ignoring param: %d for association %p.\n",
+				  ntohs(param.p->type), asoc);
+		break;
+	};
+
+	return retval;
+}
+
+/* Select a new verification tag.  */
+__u32 sctp_generate_tag(const struct sctp_endpoint *ep)
+{
+	/* I believe that this random number generator complies with RFC1750.
+	 * A tag of 0 is reserved for special cases (e.g. INIT).
+	 */
+	__u32 x;
+
+	do {
+		get_random_bytes(&x, sizeof(__u32));
+	} while (x == 0);
+
+	return x;
+}
+
+/* Select an initial TSN to send during startup.  */
+__u32 sctp_generate_tsn(const struct sctp_endpoint *ep)
+{
+	__u32 retval;
+
+	get_random_bytes(&retval, sizeof(__u32));
+	return retval;
+}
+
+/*
+ * ADDIP 3.1.1 Address Configuration Change Chunk (ASCONF)
+ *      0                   1                   2                   3
+ *      0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *     | Type = 0xC1   |  Chunk Flags  |      Chunk Length             |
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *     |                       Serial Number                           |
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *     |                    Address Parameter                          |
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *     |                     ASCONF Parameter #1                       |
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *     \                                                               \
+ *     /                             ....                              /
+ *     \                                                               \
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *     |                     ASCONF Parameter #N                       |
+ *      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Address Parameter and other parameter will not be wrapped in this function 
+ */
+static struct sctp_chunk *sctp_make_asconf(struct sctp_association *asoc,
+					   union sctp_addr *addr,
+					   int vparam_len)
+{
+	sctp_addiphdr_t asconf;
+	struct sctp_chunk *retval;
+	int length = sizeof(asconf) + vparam_len;
+	union sctp_addr_param addrparam;
+	int addrlen;
+	struct sctp_af *af = sctp_get_af_specific(addr->v4.sin_family);
+
+	addrlen = af->to_addr_param(addr, &addrparam);
+	if (!addrlen)
+		return NULL;
+	length += addrlen;
+
+	/* Create the chunk.  */
+	retval = sctp_make_chunk(asoc, SCTP_CID_ASCONF, 0, length);
+	if (!retval)
+		return NULL;
+
+	asconf.serial = htonl(asoc->addip_serial++);
+
+	retval->subh.addip_hdr =
+		sctp_addto_chunk(retval, sizeof(asconf), &asconf);
+	retval->param_hdr.v =
+		sctp_addto_chunk(retval, addrlen, &addrparam);
+
+	return retval;
+}
+
+/* ADDIP
+ * 3.2.1 Add IP Address
+ * 	0                   1                   2                   3
+ * 	0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *     |        Type = 0xC001          |    Length = Variable          |
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *     |               ASCONF-Request Correlation ID                   |
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *     |                       Address Parameter                       |
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * 3.2.2 Delete IP Address
+ * 	0                   1                   2                   3
+ * 	0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *     |        Type = 0xC002          |    Length = Variable          |
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *     |               ASCONF-Request Correlation ID                   |
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *     |                       Address Parameter                       |
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ */
+struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc,
+					      union sctp_addr	      *laddr,
+					      struct sockaddr	      *addrs,
+					      int		      addrcnt,
+					      __u16		      flags)
+{
+	sctp_addip_param_t	param;
+	struct sctp_chunk	*retval;
+	union sctp_addr_param	addr_param;
+	union sctp_addr		*addr;
+	void			*addr_buf;
+	struct sctp_af		*af;
+	int			paramlen = sizeof(param);
+	int			addr_param_len = 0;
+	int 			totallen = 0;
+	int 			i;
+
+	/* Get total length of all the address parameters. */
+	addr_buf = addrs;
+	for (i = 0; i < addrcnt; i++) {
+		addr = (union sctp_addr *)addr_buf;
+		af = sctp_get_af_specific(addr->v4.sin_family);
+		addr_param_len = af->to_addr_param(addr, &addr_param);
+
+		totallen += paramlen;
+		totallen += addr_param_len;
+
+		addr_buf += af->sockaddr_len;
+	}
+
+	/* Create an asconf chunk with the required length. */
+	retval = sctp_make_asconf(asoc, laddr, totallen);
+	if (!retval)
+		return NULL;
+
+	/* Add the address parameters to the asconf chunk. */
+	addr_buf = addrs;
+	for (i = 0; i < addrcnt; i++) {
+		addr = (union sctp_addr *)addr_buf;
+		af = sctp_get_af_specific(addr->v4.sin_family);
+		addr_param_len = af->to_addr_param(addr, &addr_param);
+		param.param_hdr.type = flags;
+		param.param_hdr.length = htons(paramlen + addr_param_len);
+		param.crr_id = i;
+
+		sctp_addto_chunk(retval, paramlen, &param);
+		sctp_addto_chunk(retval, addr_param_len, &addr_param);
+
+		addr_buf += af->sockaddr_len;
+	}
+	return retval;
+}
+
+/* ADDIP
+ * 3.2.4 Set Primary IP Address
+ *	0                   1                   2                   3
+ *	0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *     |        Type =0xC004           |    Length = Variable          |
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *     |               ASCONF-Request Correlation ID                   |
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *     |                       Address Parameter                       |
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Create an ASCONF chunk with Set Primary IP address parameter. 
+ */
+struct sctp_chunk *sctp_make_asconf_set_prim(struct sctp_association *asoc,
+					     union sctp_addr *addr)
+{
+	sctp_addip_param_t	param;
+	struct sctp_chunk 	*retval;
+	int 			len = sizeof(param);
+	union sctp_addr_param	addrparam;
+	int			addrlen;
+	struct sctp_af		*af = sctp_get_af_specific(addr->v4.sin_family);
+
+	addrlen = af->to_addr_param(addr, &addrparam);
+	if (!addrlen)
+		return NULL;
+	len += addrlen;
+
+	/* Create the chunk and make asconf header. */
+	retval = sctp_make_asconf(asoc, addr, len);
+	if (!retval)
+		return NULL;
+
+	param.param_hdr.type = SCTP_PARAM_SET_PRIMARY;
+	param.param_hdr.length = htons(len);
+	param.crr_id = 0;
+
+	sctp_addto_chunk(retval, sizeof(param), &param);
+	sctp_addto_chunk(retval, addrlen, &addrparam);
+
+	return retval;
+}
+
+/* ADDIP 3.1.2 Address Configuration Acknowledgement Chunk (ASCONF-ACK)
+ *      0                   1                   2                   3
+ *      0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *     | Type = 0x80   |  Chunk Flags  |      Chunk Length             |
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *     |                       Serial Number                           |
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *     |                 ASCONF Parameter Response#1                   |
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *     \                                                               \
+ *     /                             ....                              /
+ *     \                                                               \
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *     |                 ASCONF Parameter Response#N                   |
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Create an ASCONF_ACK chunk with enough space for the parameter responses. 
+ */
+static struct sctp_chunk *sctp_make_asconf_ack(const struct sctp_association *asoc,
+					       __u32 serial, int vparam_len)
+{
+	sctp_addiphdr_t		asconf;
+	struct sctp_chunk	*retval;
+	int			length = sizeof(asconf) + vparam_len;
+
+	/* Create the chunk.  */
+	retval = sctp_make_chunk(asoc, SCTP_CID_ASCONF_ACK, 0, length);
+	if (!retval)
+		return NULL;
+
+	asconf.serial = htonl(serial);
+
+	retval->subh.addip_hdr =
+		sctp_addto_chunk(retval, sizeof(asconf), &asconf);
+
+	return retval;
+}
+
+/* Add response parameters to an ASCONF_ACK chunk. */
+static void sctp_add_asconf_response(struct sctp_chunk *chunk, __u32 crr_id,
+			      __u16 err_code, sctp_addip_param_t *asconf_param)
+{
+	sctp_addip_param_t 	ack_param;
+	sctp_errhdr_t		err_param;
+	int			asconf_param_len = 0;
+	int			err_param_len = 0;
+	__u16			response_type;
+
+	if (SCTP_ERROR_NO_ERROR == err_code) {
+		response_type = SCTP_PARAM_SUCCESS_REPORT;
+	} else {
+		response_type = SCTP_PARAM_ERR_CAUSE;
+		err_param_len = sizeof(err_param);
+		if (asconf_param)
+			asconf_param_len =
+				 ntohs(asconf_param->param_hdr.length);
+	}
+
+	/* Add Success Indication or Error Cause Indication parameter. */ 
+	ack_param.param_hdr.type = response_type;
+	ack_param.param_hdr.length = htons(sizeof(ack_param) +
+					   err_param_len +
+					   asconf_param_len);
+	ack_param.crr_id = crr_id;
+	sctp_addto_chunk(chunk, sizeof(ack_param), &ack_param);
+
+	if (SCTP_ERROR_NO_ERROR == err_code)
+		return;
+
+	/* Add Error Cause parameter. */
+	err_param.cause = err_code;
+	err_param.length = htons(err_param_len + asconf_param_len);
+	sctp_addto_chunk(chunk, err_param_len, &err_param);
+
+	/* Add the failed TLV copied from ASCONF chunk. */
+	if (asconf_param)
+		sctp_addto_chunk(chunk, asconf_param_len, asconf_param);
+}
+
+/* Process a asconf parameter. */
+static __u16 sctp_process_asconf_param(struct sctp_association *asoc,
+				       struct sctp_chunk *asconf,
+				       sctp_addip_param_t *asconf_param)
+{
+	struct sctp_transport *peer;
+	struct sctp_af *af;
+	union sctp_addr	addr;
+	struct list_head *pos;
+	union sctp_addr_param *addr_param;
+				 
+	addr_param = (union sctp_addr_param *)
+			((void *)asconf_param + sizeof(sctp_addip_param_t));
+
+	af = sctp_get_af_specific(param_type2af(addr_param->v4.param_hdr.type));
+	if (unlikely(!af))
+		return SCTP_ERROR_INV_PARAM;
+
+	af->from_addr_param(&addr, addr_param, asoc->peer.port, 0);
+	switch (asconf_param->param_hdr.type) {
+	case SCTP_PARAM_ADD_IP:
+		/* ADDIP 4.3 D9) If an endpoint receives an ADD IP address
+	 	 * request and does not have the local resources to add this
+	 	 * new address to the association, it MUST return an Error
+	 	 * Cause TLV set to the new error code 'Operation Refused
+	 	 * Due to Resource Shortage'.
+	 	 */
+
+		peer = sctp_assoc_add_peer(asoc, &addr, GFP_ATOMIC);
+		if (!peer)
+			return SCTP_ERROR_RSRC_LOW;
+
+		/* Start the heartbeat timer. */
+		if (!mod_timer(&peer->hb_timer, sctp_transport_timeout(peer)))
+			sctp_transport_hold(peer);
+		break;
+	case SCTP_PARAM_DEL_IP:
+		/* ADDIP 4.3 D7) If a request is received to delete the
+	 	 * last remaining IP address of a peer endpoint, the receiver
+	 	 * MUST send an Error Cause TLV with the error cause set to the
+	 	 * new error code 'Request to Delete Last Remaining IP Address'.
+	 	 */
+		pos = asoc->peer.transport_addr_list.next;
+		if (pos->next == &asoc->peer.transport_addr_list)
+			return SCTP_ERROR_DEL_LAST_IP;
+
+		/* ADDIP 4.3 D8) If a request is received to delete an IP
+		 * address which is also the source address of the IP packet
+		 * which contained the ASCONF chunk, the receiver MUST reject
+		 * this request. To reject the request the receiver MUST send
+		 * an Error Cause TLV set to the new error code 'Request to
+		 * Delete Source IP Address'
+		 */
+		if (sctp_cmp_addr_exact(sctp_source(asconf), &addr))
+			return SCTP_ERROR_DEL_SRC_IP;
+
+		sctp_assoc_del_peer(asoc, &addr);
+		break;
+	case SCTP_PARAM_SET_PRIMARY:
+		peer = sctp_assoc_lookup_paddr(asoc, &addr);
+		if (!peer)
+			return SCTP_ERROR_INV_PARAM;
+
+		sctp_assoc_set_primary(asoc, peer);
+		break;
+	default:
+		return SCTP_ERROR_INV_PARAM;
+		break;
+	}
+
+	return SCTP_ERROR_NO_ERROR;
+}
+
+/* Process an incoming ASCONF chunk with the next expected serial no. and 
+ * return an ASCONF_ACK chunk to be sent in response.
+ */
+struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc,
+				       struct sctp_chunk *asconf)
+{
+	sctp_addiphdr_t		*hdr;
+	union sctp_addr_param	*addr_param;
+	sctp_addip_param_t	*asconf_param;
+	struct sctp_chunk	*asconf_ack;
+
+	__u16	err_code;
+	int	length = 0;
+	int	chunk_len = asconf->skb->len;
+	__u32	serial;
+	int	all_param_pass = 1;
+
+	hdr = (sctp_addiphdr_t *)asconf->skb->data;
+	serial = ntohl(hdr->serial);
+
+	/* Skip the addiphdr and store a pointer to address parameter.  */ 
+	length = sizeof(sctp_addiphdr_t);
+	addr_param = (union sctp_addr_param *)(asconf->skb->data + length);
+	chunk_len -= length;
+
+	/* Skip the address parameter and store a pointer to the first
+	 * asconf paramter.
+	 */ 
+	length = ntohs(addr_param->v4.param_hdr.length);
+	asconf_param = (sctp_addip_param_t *)((void *)addr_param + length);
+	chunk_len -= length;
+
+	/* create an ASCONF_ACK chunk. 
+	 * Based on the definitions of parameters, we know that the size of
+	 * ASCONF_ACK parameters are less than or equal to the twice of ASCONF
+	 * paramters.
+	 */
+	asconf_ack = sctp_make_asconf_ack(asoc, serial, chunk_len * 2);
+	if (!asconf_ack)
+		goto done;
+
+	/* Process the TLVs contained within the ASCONF chunk. */
+	while (chunk_len > 0) {
+		err_code = sctp_process_asconf_param(asoc, asconf,
+						     asconf_param);
+		/* ADDIP 4.1 A7)
+		 * If an error response is received for a TLV parameter,
+		 * all TLVs with no response before the failed TLV are
+		 * considered successful if not reported.  All TLVs after
+		 * the failed response are considered unsuccessful unless
+		 * a specific success indication is present for the parameter.
+		 */
+		if (SCTP_ERROR_NO_ERROR != err_code)
+			all_param_pass = 0;
+
+		if (!all_param_pass)
+			sctp_add_asconf_response(asconf_ack,
+						 asconf_param->crr_id, err_code,
+						 asconf_param);
+
+		/* ADDIP 4.3 D11) When an endpoint receiving an ASCONF to add
+		 * an IP address sends an 'Out of Resource' in its response, it
+		 * MUST also fail any subsequent add or delete requests bundled
+		 * in the ASCONF. 
+		 */
+		if (SCTP_ERROR_RSRC_LOW == err_code)
+			goto done;
+
+		/* Move to the next ASCONF param. */
+		length = ntohs(asconf_param->param_hdr.length);
+		asconf_param = (sctp_addip_param_t *)((void *)asconf_param +
+						      length);
+		chunk_len -= length;
+	}
+	
+done:
+	asoc->peer.addip_serial++;
+
+	/* If we are sending a new ASCONF_ACK hold a reference to it in assoc
+	 * after freeing the reference to old asconf ack if any. 
+	 */
+	if (asconf_ack) {
+		if (asoc->addip_last_asconf_ack)
+			sctp_chunk_free(asoc->addip_last_asconf_ack);
+
+		sctp_chunk_hold(asconf_ack);
+		asoc->addip_last_asconf_ack = asconf_ack;
+	}
+
+	return asconf_ack;
+}
+
+/* Process a asconf parameter that is successfully acked. */
+static int sctp_asconf_param_success(struct sctp_association *asoc,
+				     sctp_addip_param_t *asconf_param)
+{
+	struct sctp_af *af;
+	union sctp_addr	addr;
+	struct sctp_bind_addr *bp = &asoc->base.bind_addr;
+	union sctp_addr_param *addr_param;
+	struct list_head *pos;
+	struct sctp_transport *transport;
+	int retval = 0;
+
+	addr_param = (union sctp_addr_param *)
+			((void *)asconf_param + sizeof(sctp_addip_param_t));
+
+	/* We have checked the packet before, so we do not check again.	*/
+	af = sctp_get_af_specific(param_type2af(addr_param->v4.param_hdr.type));
+	af->from_addr_param(&addr, addr_param, bp->port, 0);
+
+	switch (asconf_param->param_hdr.type) {
+	case SCTP_PARAM_ADD_IP:
+		sctp_local_bh_disable();
+		sctp_write_lock(&asoc->base.addr_lock);
+		retval = sctp_add_bind_addr(bp, &addr, GFP_ATOMIC);
+		sctp_write_unlock(&asoc->base.addr_lock);
+		sctp_local_bh_enable();
+		break;
+	case SCTP_PARAM_DEL_IP:
+		sctp_local_bh_disable();
+		sctp_write_lock(&asoc->base.addr_lock);
+		retval = sctp_del_bind_addr(bp, &addr);
+		sctp_write_unlock(&asoc->base.addr_lock);
+		sctp_local_bh_enable();
+		list_for_each(pos, &asoc->peer.transport_addr_list) {
+			transport = list_entry(pos, struct sctp_transport,
+						 transports);
+			sctp_transport_route(transport, NULL,
+					     sctp_sk(asoc->base.sk));
+		}
+		break;
+	default:
+		break;
+	}
+
+	return retval;
+}
+
+/* Get the corresponding ASCONF response error code from the ASCONF_ACK chunk
+ * for the given asconf parameter.  If there is no response for this parameter,
+ * return the error code based on the third argument 'no_err'. 
+ * ADDIP 4.1
+ * A7) If an error response is received for a TLV parameter, all TLVs with no
+ * response before the failed TLV are considered successful if not reported.
+ * All TLVs after the failed response are considered unsuccessful unless a
+ * specific success indication is present for the parameter.
+ */
+static __u16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack,
+				      sctp_addip_param_t *asconf_param,
+				      int no_err)
+{
+	sctp_addip_param_t	*asconf_ack_param;
+	sctp_errhdr_t		*err_param;
+	int			length;
+	int			asconf_ack_len = asconf_ack->skb->len;
+	__u16			err_code;
+
+	if (no_err)
+		err_code = SCTP_ERROR_NO_ERROR;
+	else
+		err_code = SCTP_ERROR_REQ_REFUSED;
+
+	/* Skip the addiphdr from the asconf_ack chunk and store a pointer to
+	 * the first asconf_ack parameter.
+	 */ 
+	length = sizeof(sctp_addiphdr_t);
+	asconf_ack_param = (sctp_addip_param_t *)(asconf_ack->skb->data +
+						  length);
+	asconf_ack_len -= length;
+
+	while (asconf_ack_len > 0) {
+		if (asconf_ack_param->crr_id == asconf_param->crr_id) {
+			switch(asconf_ack_param->param_hdr.type) {
+			case SCTP_PARAM_SUCCESS_REPORT:
+				return SCTP_ERROR_NO_ERROR;
+			case SCTP_PARAM_ERR_CAUSE:
+				length = sizeof(sctp_addip_param_t);
+				err_param = (sctp_errhdr_t *)
+					   ((void *)asconf_ack_param + length);
+				asconf_ack_len -= length;
+				if (asconf_ack_len > 0)
+					return err_param->cause;
+				else
+					return SCTP_ERROR_INV_PARAM;
+				break;
+			default:
+				return SCTP_ERROR_INV_PARAM;
+			}
+		}
+
+		length = ntohs(asconf_ack_param->param_hdr.length);
+		asconf_ack_param = (sctp_addip_param_t *)
+					((void *)asconf_ack_param + length);
+		asconf_ack_len -= length;
+	}
+
+	return err_code;
+}
+
+/* Process an incoming ASCONF_ACK chunk against the cached last ASCONF chunk. */
+int sctp_process_asconf_ack(struct sctp_association *asoc,
+			    struct sctp_chunk *asconf_ack)
+{
+	struct sctp_chunk	*asconf = asoc->addip_last_asconf;
+	union sctp_addr_param	*addr_param;
+	sctp_addip_param_t	*asconf_param;
+	int	length = 0;
+	int	asconf_len = asconf->skb->len;
+	int	all_param_pass = 0;
+	int	no_err = 1;
+	int	retval = 0;
+	__u16	err_code = SCTP_ERROR_NO_ERROR;
+
+	/* Skip the chunkhdr and addiphdr from the last asconf sent and store
+	 * a pointer to address parameter.
+	 */ 
+	length = sizeof(sctp_addip_chunk_t);
+	addr_param = (union sctp_addr_param *)(asconf->skb->data + length);
+	asconf_len -= length;
+
+	/* Skip the address parameter in the last asconf sent and store a
+	 * pointer to the first asconf paramter.
+	 */ 
+	length = ntohs(addr_param->v4.param_hdr.length);
+	asconf_param = (sctp_addip_param_t *)((void *)addr_param + length);
+	asconf_len -= length;
+
+	/* ADDIP 4.1
+	 * A8) If there is no response(s) to specific TLV parameter(s), and no
+	 * failures are indicated, then all request(s) are considered
+	 * successful.
+	 */
+	if (asconf_ack->skb->len == sizeof(sctp_addiphdr_t))
+		all_param_pass = 1;
+
+	/* Process the TLVs contained in the last sent ASCONF chunk. */
+	while (asconf_len > 0) {
+		if (all_param_pass)
+			err_code = SCTP_ERROR_NO_ERROR;
+		else {
+			err_code = sctp_get_asconf_response(asconf_ack,
+							    asconf_param,
+							    no_err);
+			if (no_err && (SCTP_ERROR_NO_ERROR != err_code))
+				no_err = 0;
+		}
+
+		switch (err_code) {
+		case SCTP_ERROR_NO_ERROR:
+			retval = sctp_asconf_param_success(asoc, asconf_param);
+			break;
+
+		case SCTP_ERROR_RSRC_LOW:
+			retval = 1;
+			break;
+
+		case SCTP_ERROR_INV_PARAM:
+			/* Disable sending this type of asconf parameter in
+			 * future.
+			 */	
+			asoc->peer.addip_disabled_mask |=
+				asconf_param->param_hdr.type;
+			break;
+
+		case SCTP_ERROR_REQ_REFUSED:
+		case SCTP_ERROR_DEL_LAST_IP:
+		case SCTP_ERROR_DEL_SRC_IP:
+		default:
+			 break;
+		}
+
+		/* Skip the processed asconf parameter and move to the next
+		 * one.
+	 	 */ 
+		length = ntohs(asconf_param->param_hdr.length);
+		asconf_param = (sctp_addip_param_t *)((void *)asconf_param +
+						      length);
+		asconf_len -= length;
+	}
+
+	/* Free the cached last sent asconf chunk. */
+	sctp_chunk_free(asconf);
+	asoc->addip_last_asconf = NULL;
+
+	/* Send the next asconf chunk from the addip chunk queue. */
+	asconf = (struct sctp_chunk *)__skb_dequeue(&asoc->addip_chunks);
+	if (asconf) {
+		/* Hold the chunk until an ASCONF_ACK is received. */
+		sctp_chunk_hold(asconf);
+		if (sctp_primitive_ASCONF(asoc, asconf))
+			sctp_chunk_free(asconf);
+		else
+			asoc->addip_last_asconf = asconf;
+	}
+
+	return retval;
+}
+
+/* Make a FWD TSN chunk. */ 
+struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc,
+				    __u32 new_cum_tsn, size_t nstreams,
+				    struct sctp_fwdtsn_skip *skiplist)
+{
+	struct sctp_chunk *retval = NULL;
+	struct sctp_fwdtsn_chunk *ftsn_chunk;
+	struct sctp_fwdtsn_hdr ftsn_hdr; 
+	struct sctp_fwdtsn_skip skip;
+	size_t hint;
+	int i;
+
+	hint = (nstreams + 1) * sizeof(__u32);
+
+	/* Maybe set the T-bit if we have no association.  */
+	retval = sctp_make_chunk(asoc, SCTP_CID_FWD_TSN, 0, hint);
+
+	if (!retval)
+		return NULL;
+
+	ftsn_chunk = (struct sctp_fwdtsn_chunk *)retval->subh.fwdtsn_hdr;
+
+	ftsn_hdr.new_cum_tsn = htonl(new_cum_tsn);
+	retval->subh.fwdtsn_hdr =
+		sctp_addto_chunk(retval, sizeof(ftsn_hdr), &ftsn_hdr);
+
+	for (i = 0; i < nstreams; i++) {
+		skip.stream = skiplist[i].stream;
+		skip.ssn = skiplist[i].ssn;
+		sctp_addto_chunk(retval, sizeof(skip), &skip);
+	}
+
+	return retval;
+}
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
new file mode 100644
index 00000000000..f65fa441952
--- /dev/null
+++ b/net/sctp/sm_sideeffect.c
@@ -0,0 +1,1395 @@
+/* SCTP kernel reference Implementation
+ * (C) Copyright IBM Corp. 2001, 2004
+ * Copyright (c) 1999 Cisco, Inc.
+ * Copyright (c) 1999-2001 Motorola, Inc.
+ *
+ * This file is part of the SCTP kernel reference Implementation
+ *
+ * These functions work with the state functions in sctp_sm_statefuns.c
+ * to implement that state operations.  These functions implement the
+ * steps which require modifying existing data structures.
+ *
+ * The SCTP reference implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * The SCTP reference implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *                 ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email address(es):
+ *    lksctp developers <lksctp-developers@lists.sourceforge.net>
+ *
+ * Or submit a bug report through the following website:
+ *    http://www.sf.net/projects/lksctp
+ *
+ * Written or modified by:
+ *    La Monte H.P. Yarroll <piggy@acm.org>
+ *    Karl Knutson          <karl@athena.chicago.il.us>
+ *    Jon Grimm             <jgrimm@austin.ibm.com>
+ *    Hui Huang		    <hui.huang@nokia.com>
+ *    Dajiang Zhang	    <dajiang.zhang@nokia.com>
+ *    Daisy Chang	    <daisyc@us.ibm.com>
+ *    Sridhar Samudrala	    <sri@us.ibm.com>
+ *    Ardelle Fan	    <ardelle.fan@intel.com>
+ *
+ * Any bugs reported given to us we will try to fix... any fixes shared will
+ * be incorporated into the next SCTP release.
+ */
+
+#include <linux/skbuff.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/ip.h>
+#include <net/sock.h>
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+
+static int sctp_cmd_interpreter(sctp_event_t event_type,
+				sctp_subtype_t subtype,
+				sctp_state_t state,
+				struct sctp_endpoint *ep,
+				struct sctp_association *asoc,
+				void *event_arg,
+			 	sctp_disposition_t status,
+				sctp_cmd_seq_t *commands,
+				int gfp);
+static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
+			     sctp_state_t state,
+			     struct sctp_endpoint *ep,
+			     struct sctp_association *asoc,
+			     void *event_arg,
+			     sctp_disposition_t status,
+			     sctp_cmd_seq_t *commands,
+			     int gfp);
+
+/********************************************************************
+ * Helper functions
+ ********************************************************************/
+
+/* A helper function for delayed processing of INET ECN CE bit. */
+static void sctp_do_ecn_ce_work(struct sctp_association *asoc, 
+				__u32 lowest_tsn)
+{
+	/* Save the TSN away for comparison when we receive CWR */
+
+	asoc->last_ecne_tsn = lowest_tsn;
+	asoc->need_ecne = 1;
+}
+
+/* Helper function for delayed processing of SCTP ECNE chunk.  */
+/* RFC 2960 Appendix A
+ *
+ * RFC 2481 details a specific bit for a sender to send in
+ * the header of its next outbound TCP segment to indicate to
+ * its peer that it has reduced its congestion window.  This
+ * is termed the CWR bit.  For SCTP the same indication is made
+ * by including the CWR chunk.  This chunk contains one data
+ * element, i.e. the TSN number that was sent in the ECNE chunk.
+ * This element represents the lowest TSN number in the datagram
+ * that was originally marked with the CE bit.
+ */
+static struct sctp_chunk *sctp_do_ecn_ecne_work(struct sctp_association *asoc,
+					   __u32 lowest_tsn,
+					   struct sctp_chunk *chunk)
+{
+	struct sctp_chunk *repl;
+
+	/* Our previously transmitted packet ran into some congestion
+	 * so we should take action by reducing cwnd and ssthresh
+	 * and then ACK our peer that we we've done so by
+	 * sending a CWR.
+	 */
+
+	/* First, try to determine if we want to actually lower
+	 * our cwnd variables.  Only lower them if the ECNE looks more
+	 * recent than the last response.
+	 */
+	if (TSN_lt(asoc->last_cwr_tsn, lowest_tsn)) {
+		struct sctp_transport *transport;
+
+		/* Find which transport's congestion variables
+		 * need to be adjusted.
+		 */
+		transport = sctp_assoc_lookup_tsn(asoc, lowest_tsn);
+
+		/* Update the congestion variables. */
+		if (transport)
+			sctp_transport_lower_cwnd(transport,
+						  SCTP_LOWER_CWND_ECNE);
+		asoc->last_cwr_tsn = lowest_tsn;
+	}
+
+	/* Always try to quiet the other end.  In case of lost CWR,
+	 * resend last_cwr_tsn.
+	 */
+	repl = sctp_make_cwr(asoc, asoc->last_cwr_tsn, chunk);
+
+	/* If we run out of memory, it will look like a lost CWR.  We'll
+	 * get back in sync eventually.
+	 */
+	return repl;
+}
+
+/* Helper function to do delayed processing of ECN CWR chunk.  */
+static void sctp_do_ecn_cwr_work(struct sctp_association *asoc,
+				 __u32 lowest_tsn)
+{
+	/* Turn off ECNE getting auto-prepended to every outgoing
+	 * packet
+	 */
+	asoc->need_ecne = 0;
+}
+
+/* Generate SACK if necessary.  We call this at the end of a packet.  */
+static int sctp_gen_sack(struct sctp_association *asoc, int force,
+			 sctp_cmd_seq_t *commands)
+{
+	__u32 ctsn, max_tsn_seen;
+	struct sctp_chunk *sack;
+	int error = 0;
+
+	if (force)
+		asoc->peer.sack_needed = 1;
+
+	ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map);
+	max_tsn_seen = sctp_tsnmap_get_max_tsn_seen(&asoc->peer.tsn_map);
+
+	/* From 12.2 Parameters necessary per association (i.e. the TCB):
+	 *
+	 * Ack State : This flag indicates if the next received packet
+	 * 	     : is to be responded to with a SACK. ...
+	 *	     : When DATA chunks are out of order, SACK's
+	 *           : are not delayed (see Section 6).
+	 *
+	 * [This is actually not mentioned in Section 6, but we
+	 * implement it here anyway. --piggy]
+	 */
+        if (max_tsn_seen != ctsn)
+		asoc->peer.sack_needed = 1;
+
+	/* From 6.2  Acknowledgement on Reception of DATA Chunks:
+	 *
+	 * Section 4.2 of [RFC2581] SHOULD be followed. Specifically,
+	 * an acknowledgement SHOULD be generated for at least every
+	 * second packet (not every second DATA chunk) received, and
+	 * SHOULD be generated within 200 ms of the arrival of any
+	 * unacknowledged DATA chunk. ...
+	 */
+	if (!asoc->peer.sack_needed) {
+		/* We will need a SACK for the next packet.  */
+		asoc->peer.sack_needed = 1;
+		goto out;
+	} else {
+		if (asoc->a_rwnd > asoc->rwnd)
+			asoc->a_rwnd = asoc->rwnd;
+		sack = sctp_make_sack(asoc);
+		if (!sack)
+			goto nomem;
+
+		asoc->peer.sack_needed = 0;
+
+		error = sctp_outq_tail(&asoc->outqueue, sack);
+
+		/* Stop the SACK timer.  */
+		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
+				SCTP_TO(SCTP_EVENT_TIMEOUT_SACK));
+	}
+out:
+	return error;
+nomem:
+	error = -ENOMEM;
+	return error;
+}
+
+/* When the T3-RTX timer expires, it calls this function to create the
+ * relevant state machine event.
+ */
+void sctp_generate_t3_rtx_event(unsigned long peer)
+{
+	int error;
+	struct sctp_transport *transport = (struct sctp_transport *) peer;
+	struct sctp_association *asoc = transport->asoc;
+
+	/* Check whether a task is in the sock.  */
+
+	sctp_bh_lock_sock(asoc->base.sk);
+	if (sock_owned_by_user(asoc->base.sk)) {
+		SCTP_DEBUG_PRINTK("%s:Sock is busy.\n", __FUNCTION__);
+
+		/* Try again later.  */
+		if (!mod_timer(&transport->T3_rtx_timer, jiffies + (HZ/20)))
+			sctp_transport_hold(transport);
+		goto out_unlock;
+	}
+
+	/* Is this transport really dead and just waiting around for
+	 * the timer to let go of the reference?
+	 */
+	if (transport->dead)
+		goto out_unlock;
+
+	/* Run through the state machine.  */
+	error = sctp_do_sm(SCTP_EVENT_T_TIMEOUT,
+			   SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_T3_RTX),
+			   asoc->state,
+			   asoc->ep, asoc,
+			   transport, GFP_ATOMIC);
+
+	if (error)
+		asoc->base.sk->sk_err = -error;
+
+out_unlock:
+	sctp_bh_unlock_sock(asoc->base.sk);
+	sctp_transport_put(transport);
+}
+
+/* This is a sa interface for producing timeout events.  It works
+ * for timeouts which use the association as their parameter.
+ */
+static void sctp_generate_timeout_event(struct sctp_association *asoc,
+					sctp_event_timeout_t timeout_type)
+{
+	int error = 0;
+
+	sctp_bh_lock_sock(asoc->base.sk);
+	if (sock_owned_by_user(asoc->base.sk)) {
+		SCTP_DEBUG_PRINTK("%s:Sock is busy: timer %d\n",
+				  __FUNCTION__,
+				  timeout_type);
+
+		/* Try again later.  */
+		if (!mod_timer(&asoc->timers[timeout_type], jiffies + (HZ/20)))
+			sctp_association_hold(asoc);
+		goto out_unlock;
+	}
+
+	/* Is this association really dead and just waiting around for
+	 * the timer to let go of the reference?
+	 */
+	if (asoc->base.dead)
+		goto out_unlock;
+
+	/* Run through the state machine.  */
+	error = sctp_do_sm(SCTP_EVENT_T_TIMEOUT,
+			   SCTP_ST_TIMEOUT(timeout_type),
+			   asoc->state, asoc->ep, asoc,
+			   (void *)timeout_type, GFP_ATOMIC);
+
+	if (error)
+		asoc->base.sk->sk_err = -error;
+
+out_unlock:
+	sctp_bh_unlock_sock(asoc->base.sk);
+	sctp_association_put(asoc);
+}
+
+static void sctp_generate_t1_cookie_event(unsigned long data)
+{
+	struct sctp_association *asoc = (struct sctp_association *) data;
+	sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T1_COOKIE);
+}
+
+static void sctp_generate_t1_init_event(unsigned long data)
+{
+	struct sctp_association *asoc = (struct sctp_association *) data;
+	sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T1_INIT);
+}
+
+static void sctp_generate_t2_shutdown_event(unsigned long data)
+{
+	struct sctp_association *asoc = (struct sctp_association *) data;
+	sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T2_SHUTDOWN);
+}
+
+static void sctp_generate_t4_rto_event(unsigned long data)
+{
+	struct sctp_association *asoc = (struct sctp_association *) data;
+	sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T4_RTO);
+}
+
+static void sctp_generate_t5_shutdown_guard_event(unsigned long data)
+{
+        struct sctp_association *asoc = (struct sctp_association *)data;
+        sctp_generate_timeout_event(asoc,
+				    SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD);
+
+} /* sctp_generate_t5_shutdown_guard_event() */
+
+static void sctp_generate_autoclose_event(unsigned long data)
+{
+	struct sctp_association *asoc = (struct sctp_association *) data;
+	sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_AUTOCLOSE);
+}
+
+/* Generate a heart beat event.  If the sock is busy, reschedule.   Make
+ * sure that the transport is still valid.
+ */
+void sctp_generate_heartbeat_event(unsigned long data)
+{
+	int error = 0;
+	struct sctp_transport *transport = (struct sctp_transport *) data;
+	struct sctp_association *asoc = transport->asoc;
+
+	sctp_bh_lock_sock(asoc->base.sk);
+	if (sock_owned_by_user(asoc->base.sk)) {
+		SCTP_DEBUG_PRINTK("%s:Sock is busy.\n", __FUNCTION__);
+
+		/* Try again later.  */
+		if (!mod_timer(&transport->hb_timer, jiffies + (HZ/20)))
+			sctp_transport_hold(transport);
+		goto out_unlock;
+	}
+
+	/* Is this structure just waiting around for us to actually
+	 * get destroyed?
+	 */
+	if (transport->dead)
+		goto out_unlock;
+
+	error = sctp_do_sm(SCTP_EVENT_T_TIMEOUT,
+			   SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_HEARTBEAT),
+			   asoc->state, asoc->ep, asoc,
+			   transport, GFP_ATOMIC);
+
+         if (error)
+		 asoc->base.sk->sk_err = -error;
+
+out_unlock:
+	sctp_bh_unlock_sock(asoc->base.sk);
+	sctp_transport_put(transport);
+}
+
+/* Inject a SACK Timeout event into the state machine.  */
+static void sctp_generate_sack_event(unsigned long data)
+{
+	struct sctp_association *asoc = (struct sctp_association *) data;
+	sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_SACK);
+}
+
+sctp_timer_event_t *sctp_timer_events[SCTP_NUM_TIMEOUT_TYPES] = {
+	NULL,
+	sctp_generate_t1_cookie_event,
+	sctp_generate_t1_init_event,
+	sctp_generate_t2_shutdown_event,
+	NULL,
+	sctp_generate_t4_rto_event,
+	sctp_generate_t5_shutdown_guard_event,
+	sctp_generate_heartbeat_event,
+	sctp_generate_sack_event,
+	sctp_generate_autoclose_event,
+};
+
+
+/* RFC 2960 8.2 Path Failure Detection
+ *
+ * When its peer endpoint is multi-homed, an endpoint should keep a
+ * error counter for each of the destination transport addresses of the
+ * peer endpoint.
+ *
+ * Each time the T3-rtx timer expires on any address, or when a
+ * HEARTBEAT sent to an idle address is not acknowledged within a RTO,
+ * the error counter of that destination address will be incremented.
+ * When the value in the error counter exceeds the protocol parameter
+ * 'Path.Max.Retrans' of that destination address, the endpoint should
+ * mark the destination transport address as inactive, and a
+ * notification SHOULD be sent to the upper layer.
+ *
+ */
+static void sctp_do_8_2_transport_strike(struct sctp_association *asoc,
+					 struct sctp_transport *transport)
+{
+	/* The check for association's overall error counter exceeding the
+	 * threshold is done in the state function.
+	 */
+	asoc->overall_error_count++;
+
+	if (transport->active &&
+	    (transport->error_count++ >= transport->max_retrans)) {
+		SCTP_DEBUG_PRINTK("transport_strike: transport "
+				  "IP:%d.%d.%d.%d failed.\n",
+				  NIPQUAD(transport->ipaddr.v4.sin_addr));
+		sctp_assoc_control_transport(asoc, transport,
+					     SCTP_TRANSPORT_DOWN,
+					     SCTP_FAILED_THRESHOLD);
+	}
+
+	/* E2) For the destination address for which the timer
+	 * expires, set RTO <- RTO * 2 ("back off the timer").  The
+	 * maximum value discussed in rule C7 above (RTO.max) may be
+	 * used to provide an upper bound to this doubling operation.
+	 */
+	transport->rto = min((transport->rto * 2), transport->asoc->rto_max);
+}
+
+/* Worker routine to handle INIT command failure.  */
+static void sctp_cmd_init_failed(sctp_cmd_seq_t *commands,
+				 struct sctp_association *asoc,
+				 unsigned error)
+{
+	struct sctp_ulpevent *event;
+
+	event = sctp_ulpevent_make_assoc_change(asoc,0, SCTP_CANT_STR_ASSOC,
+						(__u16)error, 0, 0,
+						GFP_ATOMIC);
+
+	if (event)
+		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
+				SCTP_ULPEVENT(event));
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
+			SCTP_STATE(SCTP_STATE_CLOSED));
+
+	/* SEND_FAILED sent later when cleaning up the association. */
+	asoc->outqueue.error = error;
+	sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL());
+}
+
+/* Worker routine to handle SCTP_CMD_ASSOC_FAILED.  */
+static void sctp_cmd_assoc_failed(sctp_cmd_seq_t *commands,
+				  struct sctp_association *asoc,
+				  sctp_event_t event_type,
+				  sctp_subtype_t subtype,
+				  struct sctp_chunk *chunk,
+				  unsigned error)
+{
+	struct sctp_ulpevent *event;
+
+	/* Cancel any partial delivery in progress. */
+	sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC);
+
+	event = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_LOST,
+						(__u16)error, 0, 0,
+						GFP_ATOMIC);
+	if (event)
+		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
+				SCTP_ULPEVENT(event));
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
+			SCTP_STATE(SCTP_STATE_CLOSED));
+
+	/* Set sk_err to ECONNRESET on a 1-1 style socket. */
+	if (!sctp_style(asoc->base.sk, UDP))
+		asoc->base.sk->sk_err = ECONNRESET; 
+
+	/* SEND_FAILED sent later when cleaning up the association. */
+	asoc->outqueue.error = error;
+	sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL());
+}
+
+/* Process an init chunk (may be real INIT/INIT-ACK or an embedded INIT
+ * inside the cookie.  In reality, this is only used for INIT-ACK processing
+ * since all other cases use "temporary" associations and can do all
+ * their work in statefuns directly.
+ */
+static int sctp_cmd_process_init(sctp_cmd_seq_t *commands,
+				 struct sctp_association *asoc,
+				 struct sctp_chunk *chunk,
+				 sctp_init_chunk_t *peer_init, int gfp)
+{
+	int error;
+
+	/* We only process the init as a sideeffect in a single
+	 * case.   This is when we process the INIT-ACK.   If we
+	 * fail during INIT processing (due to malloc problems),
+	 * just return the error and stop processing the stack.
+	 */
+	if (!sctp_process_init(asoc, chunk->chunk_hdr->type,
+			       sctp_source(chunk), peer_init, gfp))
+		error = -ENOMEM;
+	else
+		error = 0;
+
+	return error;
+}
+
+/* Helper function to break out starting up of heartbeat timers.  */
+static void sctp_cmd_hb_timers_start(sctp_cmd_seq_t *cmds,
+				     struct sctp_association *asoc)
+{
+	struct sctp_transport *t;
+	struct list_head *pos;
+
+	/* Start a heartbeat timer for each transport on the association.
+	 * hold a reference on the transport to make sure none of
+	 * the needed data structures go away.
+	 */
+	list_for_each(pos, &asoc->peer.transport_addr_list) {
+		t = list_entry(pos, struct sctp_transport, transports);
+
+		if (!mod_timer(&t->hb_timer, sctp_transport_timeout(t)))
+			sctp_transport_hold(t);
+	}
+}
+
+static void sctp_cmd_hb_timers_stop(sctp_cmd_seq_t *cmds,
+				    struct sctp_association *asoc)
+{
+	struct sctp_transport *t;
+	struct list_head *pos;
+
+	/* Stop all heartbeat timers. */
+
+	list_for_each(pos, &asoc->peer.transport_addr_list) {
+		t = list_entry(pos, struct sctp_transport, transports);
+		if (del_timer(&t->hb_timer))
+			sctp_transport_put(t);
+	}
+}
+
+/* Helper function to stop any pending T3-RTX timers */
+static void sctp_cmd_t3_rtx_timers_stop(sctp_cmd_seq_t *cmds,
+				        struct sctp_association *asoc)
+{
+	struct sctp_transport *t;
+	struct list_head *pos;
+
+	list_for_each(pos, &asoc->peer.transport_addr_list) {
+		t = list_entry(pos, struct sctp_transport, transports);
+		if (timer_pending(&t->T3_rtx_timer) &&
+		    del_timer(&t->T3_rtx_timer)) {
+			sctp_transport_put(t);
+		}
+	}
+}
+
+
+/* Helper function to update the heartbeat timer. */
+static void sctp_cmd_hb_timer_update(sctp_cmd_seq_t *cmds,
+				     struct sctp_association *asoc,
+				     struct sctp_transport *t)
+{
+	/* Update the heartbeat timer.  */
+	if (!mod_timer(&t->hb_timer, sctp_transport_timeout(t)))
+		sctp_transport_hold(t);
+}
+
+/* Helper function to handle the reception of an HEARTBEAT ACK.  */
+static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds,
+				  struct sctp_association *asoc,
+				  struct sctp_transport *t,
+				  struct sctp_chunk *chunk)
+{
+	sctp_sender_hb_info_t *hbinfo;
+
+	/* 8.3 Upon the receipt of the HEARTBEAT ACK, the sender of the
+	 * HEARTBEAT should clear the error counter of the destination
+	 * transport address to which the HEARTBEAT was sent.
+	 * The association's overall error count is also cleared.
+	 */
+	t->error_count = 0;
+	t->asoc->overall_error_count = 0;
+
+	/* Mark the destination transport address as active if it is not so
+	 * marked.
+	 */
+	if (!t->active)
+		sctp_assoc_control_transport(asoc, t, SCTP_TRANSPORT_UP,
+					     SCTP_HEARTBEAT_SUCCESS);
+
+	/* The receiver of the HEARTBEAT ACK should also perform an
+	 * RTT measurement for that destination transport address
+	 * using the time value carried in the HEARTBEAT ACK chunk.
+	 */
+	hbinfo = (sctp_sender_hb_info_t *) chunk->skb->data;
+	sctp_transport_update_rto(t, (jiffies - hbinfo->sent_at));
+}
+
+/* Helper function to do a transport reset at the expiry of the hearbeat
+ * timer.
+ */
+static void sctp_cmd_transport_reset(sctp_cmd_seq_t *cmds,
+				     struct sctp_association *asoc,
+				     struct sctp_transport *t)
+{
+	sctp_transport_lower_cwnd(t, SCTP_LOWER_CWND_INACTIVE);
+
+	/* Mark one strike against a transport.  */
+	sctp_do_8_2_transport_strike(asoc, t);
+}
+
+/* Helper function to process the process SACK command.  */
+static int sctp_cmd_process_sack(sctp_cmd_seq_t *cmds,
+				 struct sctp_association *asoc,
+				 struct sctp_sackhdr *sackh)
+{
+	int err;
+
+	if (sctp_outq_sack(&asoc->outqueue, sackh)) {
+		/* There are no more TSNs awaiting SACK.  */
+		err = sctp_do_sm(SCTP_EVENT_T_OTHER,
+				 SCTP_ST_OTHER(SCTP_EVENT_NO_PENDING_TSN),
+				 asoc->state, asoc->ep, asoc, NULL,
+				 GFP_ATOMIC);
+	} else {
+		/* Windows may have opened, so we need
+		 * to check if we have DATA to transmit
+		 */
+		err = sctp_outq_flush(&asoc->outqueue, 0);
+	}
+
+	return err;
+}
+
+/* Helper function to set the timeout value for T2-SHUTDOWN timer and to set
+ * the transport for a shutdown chunk.
+ */
+static void sctp_cmd_setup_t2(sctp_cmd_seq_t *cmds, 
+			      struct sctp_association *asoc,
+			      struct sctp_chunk *chunk)
+{
+	struct sctp_transport *t;
+
+	t = sctp_assoc_choose_shutdown_transport(asoc);
+	asoc->shutdown_last_sent_to = t;
+	asoc->timeouts[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN] = t->rto;
+	chunk->transport = t;
+}
+
+/* Helper function to change the state of an association. */
+static void sctp_cmd_new_state(sctp_cmd_seq_t *cmds, 
+			       struct sctp_association *asoc,
+			       sctp_state_t state)
+{
+	struct sock *sk = asoc->base.sk;
+
+	asoc->state = state;
+
+	if (sctp_style(sk, TCP)) {
+		/* Change the sk->sk_state of a TCP-style socket that has 
+		 * sucessfully completed a connect() call.
+		 */
+		if (sctp_state(asoc, ESTABLISHED) && sctp_sstate(sk, CLOSED))
+			sk->sk_state = SCTP_SS_ESTABLISHED;
+
+		/* Set the RCV_SHUTDOWN flag when a SHUTDOWN is received. */
+		if (sctp_state(asoc, SHUTDOWN_RECEIVED) &&
+		    sctp_sstate(sk, ESTABLISHED))
+			sk->sk_shutdown |= RCV_SHUTDOWN;
+	}
+
+	if (sctp_state(asoc, ESTABLISHED) ||
+	    sctp_state(asoc, CLOSED) ||
+	    sctp_state(asoc, SHUTDOWN_RECEIVED)) {
+		/* Wake up any processes waiting in the asoc's wait queue in
+		 * sctp_wait_for_connect() or sctp_wait_for_sndbuf().
+	 	 */
+		if (waitqueue_active(&asoc->wait))
+			wake_up_interruptible(&asoc->wait);
+
+		/* Wake up any processes waiting in the sk's sleep queue of
+		 * a TCP-style or UDP-style peeled-off socket in
+		 * sctp_wait_for_accept() or sctp_wait_for_packet().
+		 * For a UDP-style socket, the waiters are woken up by the
+		 * notifications.
+		 */
+		if (!sctp_style(sk, UDP))
+			sk->sk_state_change(sk);
+	}
+}
+
+/* Helper function to delete an association. */
+static void sctp_cmd_delete_tcb(sctp_cmd_seq_t *cmds,
+				struct sctp_association *asoc)
+{
+	struct sock *sk = asoc->base.sk;
+
+	/* If it is a non-temporary association belonging to a TCP-style
+	 * listening socket that is not closed, do not free it so that accept() 
+	 * can pick it up later.
+	 */ 
+	if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING) &&
+	    (!asoc->temp) && (sk->sk_shutdown != SHUTDOWN_MASK))
+		return;
+
+	sctp_unhash_established(asoc);
+	sctp_association_free(asoc);
+}
+
+/*
+ * ADDIP Section 4.1 ASCONF Chunk Procedures
+ * A4) Start a T-4 RTO timer, using the RTO value of the selected
+ * destination address (we use active path instead of primary path just
+ * because primary path may be inactive. 
+ */
+static void sctp_cmd_setup_t4(sctp_cmd_seq_t *cmds,
+				struct sctp_association *asoc,
+				struct sctp_chunk *chunk)
+{
+	struct sctp_transport *t;
+
+	t = asoc->peer.active_path;
+	asoc->timeouts[SCTP_EVENT_TIMEOUT_T4_RTO] = t->rto;
+	chunk->transport = t;
+}
+
+/* Process an incoming Operation Error Chunk. */ 
+static void sctp_cmd_process_operr(sctp_cmd_seq_t *cmds,
+				   struct sctp_association *asoc,
+				   struct sctp_chunk *chunk)
+{
+	struct sctp_operr_chunk *operr_chunk;
+	struct sctp_errhdr *err_hdr;
+
+	operr_chunk = (struct sctp_operr_chunk *)chunk->chunk_hdr;
+	err_hdr = &operr_chunk->err_hdr;
+
+	switch (err_hdr->cause) {
+	case SCTP_ERROR_UNKNOWN_CHUNK:
+	{
+		struct sctp_chunkhdr *unk_chunk_hdr;
+
+		unk_chunk_hdr = (struct sctp_chunkhdr *)err_hdr->variable;
+		switch (unk_chunk_hdr->type) {
+		/* ADDIP 4.1 A9) If the peer responds to an ASCONF with an
+		 * ERROR chunk reporting that it did not recognized the ASCONF
+		 * chunk type, the sender of the ASCONF MUST NOT send any
+		 * further ASCONF chunks and MUST stop its T-4 timer.
+		 */
+		case SCTP_CID_ASCONF:
+			asoc->peer.asconf_capable = 0;
+			sctp_add_cmd_sf(cmds, SCTP_CMD_TIMER_STOP,
+					SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO));
+			break;
+		default:
+			break;
+		}
+		break;
+	}
+	default:
+		break;
+	}
+}
+
+/* Process variable FWDTSN chunk information. */
+static void sctp_cmd_process_fwdtsn(struct sctp_ulpq *ulpq, 
+				    struct sctp_chunk *chunk)
+{
+	struct sctp_fwdtsn_skip *skip;
+	/* Walk through all the skipped SSNs */
+	sctp_walk_fwdtsn(skip, chunk) {
+		sctp_ulpq_skip(ulpq, ntohs(skip->stream), ntohs(skip->ssn));
+	}
+
+	return;
+}
+
+/* Helper function to remove the association non-primary peer 
+ * transports.
+ */ 
+static void sctp_cmd_del_non_primary(struct sctp_association *asoc)
+{
+	struct sctp_transport *t;
+	struct list_head *pos;
+	struct list_head *temp;
+
+	list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) {
+		t = list_entry(pos, struct sctp_transport, transports);
+		if (!sctp_cmp_addr_exact(&t->ipaddr,
+		                         &asoc->peer.primary_addr)) {
+			sctp_assoc_del_peer(asoc, &t->ipaddr);
+		}
+	}
+
+	return;
+}
+
+/* These three macros allow us to pull the debugging code out of the
+ * main flow of sctp_do_sm() to keep attention focused on the real
+ * functionality there.
+ */
+#define DEBUG_PRE \
+	SCTP_DEBUG_PRINTK("sctp_do_sm prefn: " \
+			  "ep %p, %s, %s, asoc %p[%s], %s\n", \
+			  ep, sctp_evttype_tbl[event_type], \
+			  (*debug_fn)(subtype), asoc, \
+			  sctp_state_tbl[state], state_fn->name)
+
+#define DEBUG_POST \
+	SCTP_DEBUG_PRINTK("sctp_do_sm postfn: " \
+			  "asoc %p, status: %s\n", \
+			  asoc, sctp_status_tbl[status])
+
+#define DEBUG_POST_SFX \
+	SCTP_DEBUG_PRINTK("sctp_do_sm post sfx: error %d, asoc %p[%s]\n", \
+			  error, asoc, \
+			  sctp_state_tbl[(asoc && sctp_id2assoc(ep->base.sk, \
+			  sctp_assoc2id(asoc)))?asoc->state:SCTP_STATE_CLOSED])
+
+/*
+ * This is the master state machine processing function.
+ *
+ * If you want to understand all of lksctp, this is a
+ * good place to start.
+ */
+int sctp_do_sm(sctp_event_t event_type, sctp_subtype_t subtype,
+	       sctp_state_t state,
+	       struct sctp_endpoint *ep,
+	       struct sctp_association *asoc,
+	       void *event_arg,
+	       int gfp)
+{
+	sctp_cmd_seq_t commands;
+	const sctp_sm_table_entry_t *state_fn;
+	sctp_disposition_t status;
+	int error = 0;
+	typedef const char *(printfn_t)(sctp_subtype_t);
+
+	static printfn_t *table[] = {
+		NULL, sctp_cname, sctp_tname, sctp_oname, sctp_pname,
+	};
+	printfn_t *debug_fn  __attribute__ ((unused)) = table[event_type];
+
+	/* Look up the state function, run it, and then process the
+	 * side effects.  These three steps are the heart of lksctp.
+	 */
+	state_fn = sctp_sm_lookup_event(event_type, state, subtype);
+
+	sctp_init_cmd_seq(&commands);
+
+	DEBUG_PRE;
+	status = (*state_fn->fn)(ep, asoc, subtype, event_arg, &commands);
+	DEBUG_POST;
+
+	error = sctp_side_effects(event_type, subtype, state,
+				  ep, asoc, event_arg, status, 
+				  &commands, gfp);
+	DEBUG_POST_SFX;
+
+	return error;
+}
+
+#undef DEBUG_PRE
+#undef DEBUG_POST
+
+/*****************************************************************
+ * This the master state function side effect processing function.
+ *****************************************************************/
+static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
+			     sctp_state_t state,
+			     struct sctp_endpoint *ep,
+			     struct sctp_association *asoc,
+			     void *event_arg,
+			     sctp_disposition_t status,
+			     sctp_cmd_seq_t *commands,
+			     int gfp)
+{
+	int error;
+
+	/* FIXME - Most of the dispositions left today would be categorized
+	 * as "exceptional" dispositions.  For those dispositions, it
+	 * may not be proper to run through any of the commands at all.
+	 * For example, the command interpreter might be run only with
+	 * disposition SCTP_DISPOSITION_CONSUME.
+	 */
+	if (0 != (error = sctp_cmd_interpreter(event_type, subtype, state,
+					       ep, asoc,
+					       event_arg, status,
+					       commands, gfp)))
+		goto bail;
+
+	switch (status) {
+	case SCTP_DISPOSITION_DISCARD:
+		SCTP_DEBUG_PRINTK("Ignored sctp protocol event - state %d, "
+				  "event_type %d, event_id %d\n",
+				  state, event_type, subtype.chunk);
+		break;
+
+	case SCTP_DISPOSITION_NOMEM:
+		/* We ran out of memory, so we need to discard this
+		 * packet.
+		 */
+		/* BUG--we should now recover some memory, probably by
+		 * reneging...
+		 */
+		error = -ENOMEM;
+		break;
+
+        case SCTP_DISPOSITION_DELETE_TCB:
+		/* This should now be a command. */
+		break;
+
+	case SCTP_DISPOSITION_CONSUME:
+	case SCTP_DISPOSITION_ABORT:
+		/*
+		 * We should no longer have much work to do here as the
+		 * real work has been done as explicit commands above.
+		 */
+		break;
+
+	case SCTP_DISPOSITION_VIOLATION:
+		printk(KERN_ERR "sctp protocol violation state %d "
+		       "chunkid %d\n", state, subtype.chunk);
+		break;
+
+	case SCTP_DISPOSITION_NOT_IMPL:
+		printk(KERN_WARNING "sctp unimplemented feature in state %d, "
+		       "event_type %d, event_id %d\n",
+		       state, event_type, subtype.chunk);
+		break;
+
+	case SCTP_DISPOSITION_BUG:
+		printk(KERN_ERR "sctp bug in state %d, "
+		       "event_type %d, event_id %d\n",
+		       state, event_type, subtype.chunk);
+		BUG();
+		break;
+
+	default:
+		printk(KERN_ERR "sctp impossible disposition %d "
+		       "in state %d, event_type %d, event_id %d\n",
+		       status, state, event_type, subtype.chunk);
+		BUG();
+		break;
+	};
+
+bail:
+	return error;
+}
+
+/********************************************************************
+ * 2nd Level Abstractions
+ ********************************************************************/
+
+/* This is the side-effect interpreter.  */
+static int sctp_cmd_interpreter(sctp_event_t event_type,
+				sctp_subtype_t subtype,
+				sctp_state_t state,
+				struct sctp_endpoint *ep,
+				struct sctp_association *asoc,
+				void *event_arg,
+			 	sctp_disposition_t status,
+				sctp_cmd_seq_t *commands,
+				int gfp)
+{
+	int error = 0;
+	int force;
+	sctp_cmd_t *cmd;
+	struct sctp_chunk *new_obj;
+	struct sctp_chunk *chunk = NULL;
+	struct sctp_packet *packet;
+	struct list_head *pos;
+	struct timer_list *timer;
+	unsigned long timeout;
+	struct sctp_transport *t;
+	struct sctp_sackhdr sackh;
+	int local_cork = 0;
+
+	if (SCTP_EVENT_T_TIMEOUT != event_type)
+		chunk = (struct sctp_chunk *) event_arg;
+
+	/* Note:  This whole file is a huge candidate for rework.
+	 * For example, each command could either have its own handler, so
+	 * the loop would look like:
+	 *     while (cmds)
+	 *         cmd->handle(x, y, z)
+	 * --jgrimm
+	 */
+	while (NULL != (cmd = sctp_next_cmd(commands))) {
+		switch (cmd->verb) {
+		case SCTP_CMD_NOP:
+			/* Do nothing. */
+			break;
+
+		case SCTP_CMD_NEW_ASOC:
+			/* Register a new association.  */
+			if (local_cork) {
+				sctp_outq_uncork(&asoc->outqueue); 
+				local_cork = 0;
+			}
+			asoc = cmd->obj.ptr;
+			/* Register with the endpoint.  */
+			sctp_endpoint_add_asoc(ep, asoc);
+			sctp_hash_established(asoc);
+			break;
+
+		case SCTP_CMD_UPDATE_ASSOC:
+		       sctp_assoc_update(asoc, cmd->obj.ptr);
+		       break;
+
+		case SCTP_CMD_PURGE_OUTQUEUE:
+		       sctp_outq_teardown(&asoc->outqueue);
+		       break;
+
+		case SCTP_CMD_DELETE_TCB:			
+			if (local_cork) {
+				sctp_outq_uncork(&asoc->outqueue);
+				local_cork = 0;
+			}
+			/* Delete the current association.  */
+			sctp_cmd_delete_tcb(commands, asoc);
+			asoc = NULL;
+			break;
+
+		case SCTP_CMD_NEW_STATE:
+			/* Enter a new state.  */
+			sctp_cmd_new_state(commands, asoc, cmd->obj.state);
+			break;
+
+		case SCTP_CMD_REPORT_TSN:
+			/* Record the arrival of a TSN.  */
+			sctp_tsnmap_mark(&asoc->peer.tsn_map, cmd->obj.u32);
+			break;
+
+		case SCTP_CMD_REPORT_FWDTSN:
+			/* Move the Cumulattive TSN Ack ahead. */
+			sctp_tsnmap_skip(&asoc->peer.tsn_map, cmd->obj.u32);
+
+			/* Abort any in progress partial delivery. */
+			sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC);
+			break;
+
+		case SCTP_CMD_PROCESS_FWDTSN:
+			sctp_cmd_process_fwdtsn(&asoc->ulpq, cmd->obj.ptr);
+                        break;
+
+		case SCTP_CMD_GEN_SACK:
+			/* Generate a Selective ACK.
+			 * The argument tells us whether to just count
+			 * the packet and MAYBE generate a SACK, or
+			 * force a SACK out.
+			 */
+			force = cmd->obj.i32;
+			error = sctp_gen_sack(asoc, force, commands);
+			break;
+
+		case SCTP_CMD_PROCESS_SACK:
+			/* Process an inbound SACK.  */
+			error = sctp_cmd_process_sack(commands, asoc,
+						      cmd->obj.ptr);
+			break;
+
+		case SCTP_CMD_GEN_INIT_ACK:
+			/* Generate an INIT ACK chunk.  */
+			new_obj = sctp_make_init_ack(asoc, chunk, GFP_ATOMIC,
+						     0);
+			if (!new_obj)
+				goto nomem;
+
+			sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
+					SCTP_CHUNK(new_obj));
+			break;
+
+		case SCTP_CMD_PEER_INIT:
+			/* Process a unified INIT from the peer.
+			 * Note: Only used during INIT-ACK processing.  If
+			 * there is an error just return to the outter
+			 * layer which will bail.
+			 */
+			error = sctp_cmd_process_init(commands, asoc, chunk,
+						      cmd->obj.ptr, gfp);
+			break;
+
+		case SCTP_CMD_GEN_COOKIE_ECHO:
+			/* Generate a COOKIE ECHO chunk.  */
+			new_obj = sctp_make_cookie_echo(asoc, chunk);
+			if (!new_obj) {
+				if (cmd->obj.ptr)
+					sctp_chunk_free(cmd->obj.ptr);
+				goto nomem;
+			}
+			sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
+					SCTP_CHUNK(new_obj));
+
+			/* If there is an ERROR chunk to be sent along with
+			 * the COOKIE_ECHO, send it, too.
+			 */
+			if (cmd->obj.ptr)
+				sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
+						SCTP_CHUNK(cmd->obj.ptr));
+
+			/* FIXME - Eventually come up with a cleaner way to
+			 * enabling COOKIE-ECHO + DATA bundling during 
+			 * multihoming stale cookie scenarios, the following 
+			 * command plays with asoc->peer.retran_path to 
+			 * avoid the problem of sending the COOKIE-ECHO and 
+			 * DATA in different paths, which could result 
+			 * in the association being ABORTed if the DATA chunk 
+			 * is processed first by the server.  Checking the
+			 * init error counter simply causes this command
+			 * to be executed only during failed attempts of
+			 * association establishment.
+			 */
+			if ((asoc->peer.retran_path != 
+			     asoc->peer.primary_path) && 
+			    (asoc->counters[SCTP_COUNTER_INIT_ERROR] > 0)) {
+				sctp_add_cmd_sf(commands, 
+				                SCTP_CMD_FORCE_PRIM_RETRAN,
+						SCTP_NULL());
+			}
+
+			break;
+
+		case SCTP_CMD_GEN_SHUTDOWN:
+			/* Generate SHUTDOWN when in SHUTDOWN_SENT state.
+			 * Reset error counts.
+			 */
+			asoc->overall_error_count = 0;
+
+			/* Generate a SHUTDOWN chunk.  */
+			new_obj = sctp_make_shutdown(asoc, chunk);
+			if (!new_obj)
+				goto nomem;
+			sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
+					SCTP_CHUNK(new_obj));
+			break;
+
+		case SCTP_CMD_CHUNK_ULP:
+			/* Send a chunk to the sockets layer.  */
+			SCTP_DEBUG_PRINTK("sm_sideff: %s %p, %s %p.\n",
+					  "chunk_up:", cmd->obj.ptr,
+					  "ulpq:", &asoc->ulpq);
+			sctp_ulpq_tail_data(&asoc->ulpq, cmd->obj.ptr,
+					    GFP_ATOMIC);
+			break;
+
+		case SCTP_CMD_EVENT_ULP:
+			/* Send a notification to the sockets layer.  */
+			SCTP_DEBUG_PRINTK("sm_sideff: %s %p, %s %p.\n",
+					  "event_up:",cmd->obj.ptr,
+					  "ulpq:",&asoc->ulpq);
+			sctp_ulpq_tail_event(&asoc->ulpq, cmd->obj.ptr);
+			break;
+
+		case SCTP_CMD_REPLY:
+			/* If an caller has not already corked, do cork. */
+			if (!asoc->outqueue.cork) {
+				sctp_outq_cork(&asoc->outqueue);
+				local_cork = 1;
+			}
+			/* Send a chunk to our peer.  */
+			error = sctp_outq_tail(&asoc->outqueue, cmd->obj.ptr);
+			break;
+
+		case SCTP_CMD_SEND_PKT:
+			/* Send a full packet to our peer.  */
+			packet = cmd->obj.ptr;
+			sctp_packet_transmit(packet);
+			sctp_ootb_pkt_free(packet);
+			break;
+
+		case SCTP_CMD_RETRAN:
+			/* Mark a transport for retransmission.  */
+			sctp_retransmit(&asoc->outqueue, cmd->obj.transport,
+					SCTP_RTXR_T3_RTX);
+			break;
+
+		case SCTP_CMD_TRANSMIT:
+			/* Kick start transmission. */
+			error = sctp_outq_uncork(&asoc->outqueue);
+			local_cork = 0;
+			break;
+
+		case SCTP_CMD_ECN_CE:
+			/* Do delayed CE processing.   */
+			sctp_do_ecn_ce_work(asoc, cmd->obj.u32);
+			break;
+
+		case SCTP_CMD_ECN_ECNE:
+			/* Do delayed ECNE processing. */
+			new_obj = sctp_do_ecn_ecne_work(asoc, cmd->obj.u32,
+							chunk);
+			if (new_obj)
+				sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
+						SCTP_CHUNK(new_obj));
+			break;
+
+		case SCTP_CMD_ECN_CWR:
+			/* Do delayed CWR processing.  */
+			sctp_do_ecn_cwr_work(asoc, cmd->obj.u32);
+			break;
+
+		case SCTP_CMD_SETUP_T2:
+			sctp_cmd_setup_t2(commands, asoc, cmd->obj.ptr);
+			break;
+
+		case SCTP_CMD_TIMER_START:
+			timer = &asoc->timers[cmd->obj.to];
+			timeout = asoc->timeouts[cmd->obj.to];
+			if (!timeout)
+				BUG();
+
+			timer->expires = jiffies + timeout;
+			sctp_association_hold(asoc);
+			add_timer(timer);
+			break;
+
+		case SCTP_CMD_TIMER_RESTART:
+			timer = &asoc->timers[cmd->obj.to];
+			timeout = asoc->timeouts[cmd->obj.to];
+			if (!mod_timer(timer, jiffies + timeout))
+				sctp_association_hold(asoc);
+			break;
+
+		case SCTP_CMD_TIMER_STOP:
+			timer = &asoc->timers[cmd->obj.to];
+			if (timer_pending(timer) && del_timer(timer))
+				sctp_association_put(asoc);
+			break;
+
+		case SCTP_CMD_INIT_RESTART:
+			/* Do the needed accounting and updates
+			 * associated with restarting an initialization
+			 * timer.
+			 */
+			asoc->counters[SCTP_COUNTER_INIT_ERROR]++;
+			asoc->timeouts[cmd->obj.to] *= 2;
+			if (asoc->timeouts[cmd->obj.to] >
+			    asoc->max_init_timeo) {
+				asoc->timeouts[cmd->obj.to] =
+					asoc->max_init_timeo;
+			}
+
+			/* If we've sent any data bundled with
+			 * COOKIE-ECHO we need to resend.
+			 */
+			list_for_each(pos, &asoc->peer.transport_addr_list) {
+				t = list_entry(pos, struct sctp_transport,
+					       transports);
+				sctp_retransmit_mark(&asoc->outqueue, t, 0);
+			}
+
+			sctp_add_cmd_sf(commands,
+					SCTP_CMD_TIMER_RESTART,
+					SCTP_TO(cmd->obj.to));
+			break;
+
+		case SCTP_CMD_INIT_FAILED:
+			sctp_cmd_init_failed(commands, asoc, cmd->obj.u32);
+			break;
+
+		case SCTP_CMD_ASSOC_FAILED:
+			sctp_cmd_assoc_failed(commands, asoc, event_type,
+					      subtype, chunk, cmd->obj.u32);
+			break;
+
+		case SCTP_CMD_COUNTER_INC:
+			asoc->counters[cmd->obj.counter]++;
+			break;
+
+		case SCTP_CMD_COUNTER_RESET:
+			asoc->counters[cmd->obj.counter] = 0;
+			break;
+
+		case SCTP_CMD_REPORT_DUP:
+			sctp_tsnmap_mark_dup(&asoc->peer.tsn_map,
+					     cmd->obj.u32);
+			break;
+
+		case SCTP_CMD_REPORT_BAD_TAG:
+			SCTP_DEBUG_PRINTK("vtag mismatch!\n");
+			break;
+
+		case SCTP_CMD_STRIKE:
+			/* Mark one strike against a transport.  */
+			sctp_do_8_2_transport_strike(asoc, cmd->obj.transport);
+			break;
+
+		case SCTP_CMD_TRANSPORT_RESET:
+			t = cmd->obj.transport;
+			sctp_cmd_transport_reset(commands, asoc, t);
+			break;
+
+		case SCTP_CMD_TRANSPORT_ON:
+			t = cmd->obj.transport;
+			sctp_cmd_transport_on(commands, asoc, t, chunk);
+			break;
+
+		case SCTP_CMD_HB_TIMERS_START:
+			sctp_cmd_hb_timers_start(commands, asoc);
+			break;
+
+		case SCTP_CMD_HB_TIMER_UPDATE:
+			t = cmd->obj.transport;
+			sctp_cmd_hb_timer_update(commands, asoc, t);
+			break;
+
+		case SCTP_CMD_HB_TIMERS_STOP:
+			sctp_cmd_hb_timers_stop(commands, asoc);
+			break;
+
+		case SCTP_CMD_REPORT_ERROR:
+			error = cmd->obj.error;
+			break;
+
+		case SCTP_CMD_PROCESS_CTSN:
+			/* Dummy up a SACK for processing. */
+			sackh.cum_tsn_ack = cmd->obj.u32;
+			sackh.a_rwnd = 0;
+			sackh.num_gap_ack_blocks = 0;
+			sackh.num_dup_tsns = 0;
+			sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_SACK,
+					SCTP_SACKH(&sackh));
+			break;
+
+		case SCTP_CMD_DISCARD_PACKET:
+			/* We need to discard the whole packet.  */
+			chunk->pdiscard = 1;
+			break;
+
+		case SCTP_CMD_RTO_PENDING:
+			t = cmd->obj.transport;
+			t->rto_pending = 1;
+			break;
+
+		case SCTP_CMD_PART_DELIVER:
+			sctp_ulpq_partial_delivery(&asoc->ulpq, cmd->obj.ptr,
+						   GFP_ATOMIC);
+			break;
+
+		case SCTP_CMD_RENEGE:
+			sctp_ulpq_renege(&asoc->ulpq, cmd->obj.ptr,
+					 GFP_ATOMIC);
+			break;
+
+		case SCTP_CMD_SETUP_T4:
+			sctp_cmd_setup_t4(commands, asoc, cmd->obj.ptr);
+			break;
+
+		case SCTP_CMD_PROCESS_OPERR:
+			sctp_cmd_process_operr(commands, asoc, chunk);
+			break;
+		case SCTP_CMD_CLEAR_INIT_TAG:
+			asoc->peer.i.init_tag = 0;
+			break;
+		case SCTP_CMD_DEL_NON_PRIMARY:
+			sctp_cmd_del_non_primary(asoc);
+			break;
+		case SCTP_CMD_T3_RTX_TIMERS_STOP:
+			sctp_cmd_t3_rtx_timers_stop(commands, asoc);
+			break;
+		case SCTP_CMD_FORCE_PRIM_RETRAN:
+			t = asoc->peer.retran_path;
+			asoc->peer.retran_path = asoc->peer.primary_path;
+			error = sctp_outq_uncork(&asoc->outqueue);
+			local_cork = 0;
+			asoc->peer.retran_path = t;
+			break;
+		default:
+			printk(KERN_WARNING "Impossible command: %u, %p\n",
+			       cmd->verb, cmd->obj.ptr);
+			break;
+		};
+		if (error)
+			break;
+	}
+
+out:
+	if (local_cork)
+		sctp_outq_uncork(&asoc->outqueue);
+	return error;
+nomem:
+	error = -ENOMEM;
+	goto out;
+}
+
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
new file mode 100644
index 00000000000..278c56a2d07
--- /dev/null
+++ b/net/sctp/sm_statefuns.c
@@ -0,0 +1,5238 @@
+/* SCTP kernel reference Implementation
+ * (C) Copyright IBM Corp. 2001, 2004
+ * Copyright (c) 1999-2000 Cisco, Inc.
+ * Copyright (c) 1999-2001 Motorola, Inc.
+ * Copyright (c) 2001-2002 Intel Corp.
+ * Copyright (c) 2002      Nokia Corp.
+ *
+ * This file is part of the SCTP kernel reference Implementation
+ *
+ * This is part of the SCTP Linux Kernel Reference Implementation.
+ *
+ * These are the state functions for the state machine.
+ *
+ * The SCTP reference implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * The SCTP reference implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *                 ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email address(es):
+ *    lksctp developers <lksctp-developers@lists.sourceforge.net>
+ *
+ * Or submit a bug report through the following website:
+ *    http://www.sf.net/projects/lksctp
+ *
+ * Written or modified by:
+ *    La Monte H.P. Yarroll <piggy@acm.org>
+ *    Karl Knutson          <karl@athena.chicago.il.us>
+ *    Mathew Kotowsky       <kotowsky@sctp.org>
+ *    Sridhar Samudrala     <samudrala@us.ibm.com>
+ *    Jon Grimm             <jgrimm@us.ibm.com>
+ *    Hui Huang 	    <hui.huang@nokia.com>
+ *    Dajiang Zhang 	    <dajiang.zhang@nokia.com>
+ *    Daisy Chang	    <daisyc@us.ibm.com>
+ *    Ardelle Fan	    <ardelle.fan@intel.com>
+ *    Ryan Layer	    <rmlayer@us.ibm.com>
+ *    Kevin Gao		    <kevin.gao@intel.com>
+ *
+ * Any bugs reported given to us we will try to fix... any fixes shared will
+ * be incorporated into the next SCTP release.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/net.h>
+#include <linux/inet.h>
+#include <net/sock.h>
+#include <net/inet_ecn.h>
+#include <linux/skbuff.h>
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+#include <net/sctp/structs.h>
+
+static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep,
+				  const struct sctp_association *asoc,
+				  struct sctp_chunk *chunk,
+				  const void *payload,
+				  size_t paylen);
+static int sctp_eat_data(const struct sctp_association *asoc,
+			 struct sctp_chunk *chunk,
+			 sctp_cmd_seq_t *commands);
+static struct sctp_packet *sctp_ootb_pkt_new(const struct sctp_association *asoc,
+					     const struct sctp_chunk *chunk);
+static void sctp_send_stale_cookie_err(const struct sctp_endpoint *ep,
+				       const struct sctp_association *asoc,
+				       const struct sctp_chunk *chunk,
+				       sctp_cmd_seq_t *commands,
+				       struct sctp_chunk *err_chunk);
+static sctp_disposition_t sctp_sf_do_5_2_6_stale(const struct sctp_endpoint *ep,
+						 const struct sctp_association *asoc,
+						 const sctp_subtype_t type,
+						 void *arg,
+						 sctp_cmd_seq_t *commands);
+static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep,
+					     const struct sctp_association *asoc,
+					     const sctp_subtype_t type,
+					     void *arg,
+					     sctp_cmd_seq_t *commands);
+static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk);
+
+
+/* Small helper function that checks if the chunk length
+ * is of the appropriate length.  The 'required_length' argument
+ * is set to be the size of a specific chunk we are testing.
+ * Return Values:  1 = Valid length
+ * 		   0 = Invalid length
+ *
+ */
+static inline int
+sctp_chunk_length_valid(struct sctp_chunk *chunk,
+			   __u16 required_length)
+{
+	__u16 chunk_length = ntohs(chunk->chunk_hdr->length);
+
+	if (unlikely(chunk_length < required_length))
+		return 0;
+
+	return 1;
+}
+
+/**********************************************************
+ * These are the state functions for handling chunk events.
+ **********************************************************/
+
+/*
+ * Process the final SHUTDOWN COMPLETE.
+ *
+ * Section: 4 (C) (diagram), 9.2
+ * Upon reception of the SHUTDOWN COMPLETE chunk the endpoint will verify
+ * that it is in SHUTDOWN-ACK-SENT state, if it is not the chunk should be
+ * discarded. If the endpoint is in the SHUTDOWN-ACK-SENT state the endpoint
+ * should stop the T2-shutdown timer and remove all knowledge of the
+ * association (and thus the association enters the CLOSED state).
+ *
+ * Verification Tag: 8.5.1(C)
+ * C) Rules for packet carrying SHUTDOWN COMPLETE:
+ * ...
+ * - The receiver of a SHUTDOWN COMPLETE shall accept the packet if the
+ *   Verification Tag field of the packet matches its own tag OR it is
+ *   set to its peer's tag and the T bit is set in the Chunk Flags.
+ *   Otherwise, the receiver MUST silently discard the packet and take
+ *   no further action. An endpoint MUST ignore the SHUTDOWN COMPLETE if
+ *   it is not in the SHUTDOWN-ACK-SENT state.
+ *
+ * Inputs
+ * (endpoint, asoc, chunk)
+ *
+ * Outputs
+ * (asoc, reply_msg, msg_up, timers, counters)
+ *
+ * The return value is the disposition of the chunk.
+ */
+sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep,
+				  const struct sctp_association *asoc,
+				  const sctp_subtype_t type,
+				  void *arg,
+				  sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *chunk = arg;
+	struct sctp_ulpevent *ev;
+
+	/* RFC 2960 6.10 Bundling
+	 *
+	 * An endpoint MUST NOT bundle INIT, INIT ACK or
+	 * SHUTDOWN COMPLETE with any other chunks.
+	 */
+	if (!chunk->singleton)
+		return SCTP_DISPOSITION_VIOLATION;
+
+	if (!sctp_vtag_verify_either(chunk, asoc))
+		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+
+	/* RFC 2960 10.2 SCTP-to-ULP
+	 *
+	 * H) SHUTDOWN COMPLETE notification
+	 *
+	 * When SCTP completes the shutdown procedures (section 9.2) this
+	 * notification is passed to the upper layer.
+	 */
+	ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_SHUTDOWN_COMP,
+					     0, 0, 0, GFP_ATOMIC);
+	if (!ev)
+		goto nomem;
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
+
+	/* Upon reception of the SHUTDOWN COMPLETE chunk the endpoint
+	 * will verify that it is in SHUTDOWN-ACK-SENT state, if it is
+	 * not the chunk should be discarded. If the endpoint is in
+	 * the SHUTDOWN-ACK-SENT state the endpoint should stop the
+	 * T2-shutdown timer and remove all knowledge of the
+	 * association (and thus the association enters the CLOSED
+	 * state).
+	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
+			SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN));
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
+			SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
+			SCTP_STATE(SCTP_STATE_CLOSED));
+
+	SCTP_INC_STATS(SCTP_MIB_SHUTDOWNS);
+	SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL());
+
+	return SCTP_DISPOSITION_DELETE_TCB;
+
+nomem:
+	return SCTP_DISPOSITION_NOMEM;
+}
+
+/*
+ * Respond to a normal INIT chunk.
+ * We are the side that is being asked for an association.
+ *
+ * Section: 5.1 Normal Establishment of an Association, B
+ * B) "Z" shall respond immediately with an INIT ACK chunk.  The
+ *    destination IP address of the INIT ACK MUST be set to the source
+ *    IP address of the INIT to which this INIT ACK is responding.  In
+ *    the response, besides filling in other parameters, "Z" must set the
+ *    Verification Tag field to Tag_A, and also provide its own
+ *    Verification Tag (Tag_Z) in the Initiate Tag field.
+ *
+ * Verification Tag: Must be 0. 
+ *
+ * Inputs
+ * (endpoint, asoc, chunk)
+ *
+ * Outputs
+ * (asoc, reply_msg, msg_up, timers, counters)
+ *
+ * The return value is the disposition of the chunk.
+ */
+sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const sctp_subtype_t type,
+					void *arg,
+					sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *chunk = arg;
+	struct sctp_chunk *repl;
+	struct sctp_association *new_asoc;
+	struct sctp_chunk *err_chunk;
+	struct sctp_packet *packet;
+	sctp_unrecognized_param_t *unk_param;
+	struct sock *sk;
+	int len;
+
+	/* 6.10 Bundling
+	 * An endpoint MUST NOT bundle INIT, INIT ACK or
+	 * SHUTDOWN COMPLETE with any other chunks.
+	 * 
+	 * IG Section 2.11.2
+	 * Furthermore, we require that the receiver of an INIT chunk MUST
+	 * enforce these rules by silently discarding an arriving packet
+	 * with an INIT chunk that is bundled with other chunks.
+	 */
+	if (!chunk->singleton)
+		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+
+	/* If the packet is an OOTB packet which is temporarily on the
+	 * control endpoint, respond with an ABORT.
+	 */
+	if (ep == sctp_sk((sctp_get_ctl_sock()))->ep)
+		return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
+
+	sk = ep->base.sk;
+	/* If the endpoint is not listening or if the number of associations
+	 * on the TCP-style socket exceed the max backlog, respond with an
+	 * ABORT.
+	 */
+	if (!sctp_sstate(sk, LISTENING) ||
+	    (sctp_style(sk, TCP) &&
+	     sk_acceptq_is_full(sk)))
+		return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
+
+	/* 3.1 A packet containing an INIT chunk MUST have a zero Verification
+	 * Tag. 
+	 */
+	if (chunk->sctp_hdr->vtag != 0)
+		return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
+
+	/* Make sure that the INIT chunk has a valid length.
+	 * Normally, this would cause an ABORT with a Protocol Violation
+	 * error, but since we don't have an association, we'll
+	 * just discard the packet.
+	 */
+	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_init_chunk_t)))
+		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+
+	/* Verify the INIT chunk before processing it. */
+	err_chunk = NULL;
+	if (!sctp_verify_init(asoc, chunk->chunk_hdr->type,
+			      (sctp_init_chunk_t *)chunk->chunk_hdr, chunk,
+			      &err_chunk)) {
+		/* This chunk contains fatal error. It is to be discarded.
+		 * Send an ABORT, with causes if there is any.
+		 */
+		if (err_chunk) {
+			packet = sctp_abort_pkt_new(ep, asoc, arg,
+					(__u8 *)(err_chunk->chunk_hdr) +
+					sizeof(sctp_chunkhdr_t),
+					ntohs(err_chunk->chunk_hdr->length) -
+					sizeof(sctp_chunkhdr_t));
+
+			sctp_chunk_free(err_chunk);
+
+			if (packet) {
+				sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT,
+						SCTP_PACKET(packet));
+				SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+				return SCTP_DISPOSITION_CONSUME;
+			} else {
+				return SCTP_DISPOSITION_NOMEM;
+			}
+		} else {
+			return sctp_sf_tabort_8_4_8(ep, asoc, type, arg,
+						    commands);
+		}
+	}
+
+        /* Grab the INIT header.  */
+	chunk->subh.init_hdr = (sctp_inithdr_t *)chunk->skb->data;
+
+	/* Tag the variable length parameters.  */
+	chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(sctp_inithdr_t));
+
+	new_asoc = sctp_make_temp_asoc(ep, chunk, GFP_ATOMIC);
+	if (!new_asoc)
+		goto nomem;
+
+	/* The call, sctp_process_init(), can fail on memory allocation.  */
+	if (!sctp_process_init(new_asoc, chunk->chunk_hdr->type,
+			       sctp_source(chunk),
+			       (sctp_init_chunk_t *)chunk->chunk_hdr,
+			       GFP_ATOMIC))
+		goto nomem_init;
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
+
+	/* B) "Z" shall respond immediately with an INIT ACK chunk.  */
+
+	/* If there are errors need to be reported for unknown parameters,
+	 * make sure to reserve enough room in the INIT ACK for them.
+	 */
+	len = 0;
+	if (err_chunk)
+		len = ntohs(err_chunk->chunk_hdr->length) -
+			sizeof(sctp_chunkhdr_t);
+
+	if (sctp_assoc_set_bind_addr_from_ep(new_asoc, GFP_ATOMIC) < 0)
+		goto nomem_ack;
+
+	repl = sctp_make_init_ack(new_asoc, chunk, GFP_ATOMIC, len);
+	if (!repl)
+		goto nomem_ack;
+
+	/* If there are errors need to be reported for unknown parameters,
+	 * include them in the outgoing INIT ACK as "Unrecognized parameter"
+	 * parameter.
+	 */
+	if (err_chunk) {
+		/* Get the "Unrecognized parameter" parameter(s) out of the
+		 * ERROR chunk generated by sctp_verify_init(). Since the
+		 * error cause code for "unknown parameter" and the
+		 * "Unrecognized parameter" type is the same, we can
+		 * construct the parameters in INIT ACK by copying the
+		 * ERROR causes over.
+		 */
+		unk_param = (sctp_unrecognized_param_t *)
+			    ((__u8 *)(err_chunk->chunk_hdr) +
+			    sizeof(sctp_chunkhdr_t));
+		/* Replace the cause code with the "Unrecognized parameter"
+		 * parameter type.
+		 */
+		sctp_addto_chunk(repl, len, unk_param);
+		sctp_chunk_free(err_chunk);
+	}
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
+
+	/*
+	 * Note:  After sending out INIT ACK with the State Cookie parameter,
+	 * "Z" MUST NOT allocate any resources, nor keep any states for the
+	 * new association.  Otherwise, "Z" will be vulnerable to resource
+	 * attacks.
+	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL());
+
+	return SCTP_DISPOSITION_DELETE_TCB;
+
+nomem_ack:
+	if (err_chunk)
+		sctp_chunk_free(err_chunk);
+nomem_init:
+	sctp_association_free(new_asoc);
+nomem:
+	return SCTP_DISPOSITION_NOMEM;
+}
+
+/*
+ * Respond to a normal INIT ACK chunk.
+ * We are the side that is initiating the association.
+ *
+ * Section: 5.1 Normal Establishment of an Association, C
+ * C) Upon reception of the INIT ACK from "Z", "A" shall stop the T1-init
+ *    timer and leave COOKIE-WAIT state. "A" shall then send the State
+ *    Cookie received in the INIT ACK chunk in a COOKIE ECHO chunk, start
+ *    the T1-cookie timer, and enter the COOKIE-ECHOED state.
+ *
+ *    Note: The COOKIE ECHO chunk can be bundled with any pending outbound
+ *    DATA chunks, but it MUST be the first chunk in the packet and
+ *    until the COOKIE ACK is returned the sender MUST NOT send any
+ *    other packets to the peer.
+ *
+ * Verification Tag: 3.3.3
+ *   If the value of the Initiate Tag in a received INIT ACK chunk is
+ *   found to be 0, the receiver MUST treat it as an error and close the
+ *   association by transmitting an ABORT.
+ *
+ * Inputs
+ * (endpoint, asoc, chunk)
+ *
+ * Outputs
+ * (asoc, reply_msg, msg_up, timers, counters)
+ *
+ * The return value is the disposition of the chunk.
+ */
+sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
+				       const struct sctp_association *asoc,
+				       const sctp_subtype_t type,
+				       void *arg,
+				       sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *chunk = arg;
+	sctp_init_chunk_t *initchunk;
+	__u32 init_tag;
+	struct sctp_chunk *err_chunk;
+	struct sctp_packet *packet;
+	sctp_disposition_t ret;
+
+	if (!sctp_vtag_verify(chunk, asoc))
+		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+
+	/* Make sure that the INIT-ACK chunk has a valid length */
+	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_initack_chunk_t)))
+		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+						  commands);
+	/* 6.10 Bundling
+	 * An endpoint MUST NOT bundle INIT, INIT ACK or
+	 * SHUTDOWN COMPLETE with any other chunks.
+	 */
+	if (!chunk->singleton)
+		return SCTP_DISPOSITION_VIOLATION;
+
+	/* Grab the INIT header.  */
+	chunk->subh.init_hdr = (sctp_inithdr_t *) chunk->skb->data;
+
+	init_tag = ntohl(chunk->subh.init_hdr->init_tag);
+
+	/* Verification Tag: 3.3.3
+	 *   If the value of the Initiate Tag in a received INIT ACK
+	 *   chunk is found to be 0, the receiver MUST treat it as an
+	 *   error and close the association by transmitting an ABORT.
+	 */
+	if (!init_tag) {
+		struct sctp_chunk *reply = sctp_make_abort(asoc, chunk, 0);
+		if (!reply)
+			goto nomem;
+
+		sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply));
+		sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
+				SCTP_STATE(SCTP_STATE_CLOSED));
+		SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
+		sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL());
+		return SCTP_DISPOSITION_DELETE_TCB;
+	}
+
+	/* Verify the INIT chunk before processing it. */
+	err_chunk = NULL;
+	if (!sctp_verify_init(asoc, chunk->chunk_hdr->type,
+			      (sctp_init_chunk_t *)chunk->chunk_hdr, chunk,
+			      &err_chunk)) {
+
+		SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
+
+		/* This chunk contains fatal error. It is to be discarded.
+		 * Send an ABORT, with causes if there is any.
+		 */
+		if (err_chunk) {
+			packet = sctp_abort_pkt_new(ep, asoc, arg,
+					(__u8 *)(err_chunk->chunk_hdr) +
+					sizeof(sctp_chunkhdr_t),
+					ntohs(err_chunk->chunk_hdr->length) -
+					sizeof(sctp_chunkhdr_t));
+
+			sctp_chunk_free(err_chunk);
+
+			if (packet) {
+				sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT,
+						SCTP_PACKET(packet));
+				SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+				sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
+						SCTP_STATE(SCTP_STATE_CLOSED));
+				sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB,
+						SCTP_NULL());
+				return SCTP_DISPOSITION_CONSUME;
+			} else {
+				sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
+						SCTP_STATE(SCTP_STATE_CLOSED));
+				sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB,
+						SCTP_NULL());
+				return SCTP_DISPOSITION_NOMEM;
+			}
+		} else {
+			ret = sctp_sf_tabort_8_4_8(ep, asoc, type, arg,
+						   commands);
+			sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
+					SCTP_STATE(SCTP_STATE_CLOSED));
+			sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB,
+					SCTP_NULL());
+			return ret;
+		}
+	}
+
+	/* Tag the variable length parameters.  Note that we never
+	 * convert the parameters in an INIT chunk.
+	 */
+	chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(sctp_inithdr_t));
+
+	initchunk = (sctp_init_chunk_t *) chunk->chunk_hdr;
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_PEER_INIT,
+			SCTP_PEER_INIT(initchunk));
+
+	/* 5.1 C) "A" shall stop the T1-init timer and leave
+	 * COOKIE-WAIT state.  "A" shall then ... start the T1-cookie
+	 * timer, and enter the COOKIE-ECHOED state.
+	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
+			SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT));
+	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START,
+			SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE));
+	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
+			SCTP_STATE(SCTP_STATE_COOKIE_ECHOED));
+
+	/* 5.1 C) "A" shall then send the State Cookie received in the
+	 * INIT ACK chunk in a COOKIE ECHO chunk, ...
+	 */
+	/* If there is any errors to report, send the ERROR chunk generated
+	 * for unknown parameters as well.
+	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_GEN_COOKIE_ECHO,
+			SCTP_CHUNK(err_chunk));
+
+	return SCTP_DISPOSITION_CONSUME;
+
+nomem:
+	return SCTP_DISPOSITION_NOMEM;
+}
+
+/*
+ * Respond to a normal COOKIE ECHO chunk.
+ * We are the side that is being asked for an association.
+ *
+ * Section: 5.1 Normal Establishment of an Association, D
+ * D) Upon reception of the COOKIE ECHO chunk, Endpoint "Z" will reply
+ *    with a COOKIE ACK chunk after building a TCB and moving to
+ *    the ESTABLISHED state. A COOKIE ACK chunk may be bundled with
+ *    any pending DATA chunks (and/or SACK chunks), but the COOKIE ACK
+ *    chunk MUST be the first chunk in the packet.
+ *
+ *   IMPLEMENTATION NOTE: An implementation may choose to send the
+ *   Communication Up notification to the SCTP user upon reception
+ *   of a valid COOKIE ECHO chunk.
+ *
+ * Verification Tag: 8.5.1 Exceptions in Verification Tag Rules
+ * D) Rules for packet carrying a COOKIE ECHO
+ *
+ * - When sending a COOKIE ECHO, the endpoint MUST use the value of the
+ *   Initial Tag received in the INIT ACK.
+ *
+ * - The receiver of a COOKIE ECHO follows the procedures in Section 5.
+ *
+ * Inputs
+ * (endpoint, asoc, chunk)
+ *
+ * Outputs
+ * (asoc, reply_msg, msg_up, timers, counters)
+ *
+ * The return value is the disposition of the chunk.
+ */
+sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
+				      const struct sctp_association *asoc,
+				      const sctp_subtype_t type, void *arg,
+				      sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *chunk = arg;
+	struct sctp_association *new_asoc;
+	sctp_init_chunk_t *peer_init;
+	struct sctp_chunk *repl;
+	struct sctp_ulpevent *ev;
+	int error = 0;
+	struct sctp_chunk *err_chk_p;
+
+	/* If the packet is an OOTB packet which is temporarily on the
+	 * control endpoint, respond with an ABORT.
+	 */
+	if (ep == sctp_sk((sctp_get_ctl_sock()))->ep)
+		return sctp_sf_ootb(ep, asoc, type, arg, commands);
+
+	/* Make sure that the COOKIE_ECHO chunk has a valid length.
+	 * In this case, we check that we have enough for at least a
+	 * chunk header.  More detailed verification is done
+	 * in sctp_unpack_cookie().
+	 */
+	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+
+	/* "Decode" the chunk.  We have no optional parameters so we
+	 * are in good shape.
+	 */
+        chunk->subh.cookie_hdr =
+		(struct sctp_signed_cookie *)chunk->skb->data;
+	skb_pull(chunk->skb,
+		 ntohs(chunk->chunk_hdr->length) - sizeof(sctp_chunkhdr_t));
+
+	/* 5.1 D) Upon reception of the COOKIE ECHO chunk, Endpoint
+	 * "Z" will reply with a COOKIE ACK chunk after building a TCB
+	 * and moving to the ESTABLISHED state.
+	 */
+	new_asoc = sctp_unpack_cookie(ep, asoc, chunk, GFP_ATOMIC, &error,
+				      &err_chk_p);
+
+	/* FIXME:
+	 * If the re-build failed, what is the proper error path
+	 * from here?
+	 *
+	 * [We should abort the association. --piggy]
+	 */
+	if (!new_asoc) {
+		/* FIXME: Several errors are possible.  A bad cookie should
+		 * be silently discarded, but think about logging it too.
+		 */
+		switch (error) {
+		case -SCTP_IERROR_NOMEM:
+			goto nomem;
+
+		case -SCTP_IERROR_STALE_COOKIE:
+			sctp_send_stale_cookie_err(ep, asoc, chunk, commands,
+						   err_chk_p);
+			return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+
+		case -SCTP_IERROR_BAD_SIG:
+		default:
+			return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		};
+	}
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
+	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
+			SCTP_STATE(SCTP_STATE_ESTABLISHED));
+	SCTP_INC_STATS(SCTP_MIB_CURRESTAB);
+	SCTP_INC_STATS(SCTP_MIB_PASSIVEESTABS);
+	sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL());
+
+	if (new_asoc->autoclose)
+		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START,
+				SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE));
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL());
+
+	/* Re-build the bind address for the association is done in
+	 * the sctp_unpack_cookie() already.
+	 */
+	/* This is a brand-new association, so these are not yet side
+	 * effects--it is safe to run them here.
+	 */
+	peer_init = &chunk->subh.cookie_hdr->c.peer_init[0];
+
+	if (!sctp_process_init(new_asoc, chunk->chunk_hdr->type,
+			       &chunk->subh.cookie_hdr->c.peer_addr,
+			       peer_init, GFP_ATOMIC))
+		goto nomem_init;
+
+	repl = sctp_make_cookie_ack(new_asoc, chunk);
+	if (!repl)
+		goto nomem_repl;
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
+
+	/* RFC 2960 5.1 Normal Establishment of an Association
+	 *
+	 * D) IMPLEMENTATION NOTE: An implementation may choose to
+	 * send the Communication Up notification to the SCTP user
+	 * upon reception of a valid COOKIE ECHO chunk.
+	 */
+	ev = sctp_ulpevent_make_assoc_change(new_asoc, 0, SCTP_COMM_UP, 0,
+					     new_asoc->c.sinit_num_ostreams,
+					     new_asoc->c.sinit_max_instreams,
+					     GFP_ATOMIC);
+	if (!ev)
+		goto nomem_ev;
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
+
+	/* Sockets API Draft Section 5.3.1.6 	
+	 * When a peer sends a Adaption Layer Indication parameter , SCTP
+	 * delivers this notification to inform the application that of the
+	 * peers requested adaption layer.
+	 */
+	if (new_asoc->peer.adaption_ind) {
+		ev = sctp_ulpevent_make_adaption_indication(new_asoc,
+							    GFP_ATOMIC);
+		if (!ev)
+			goto nomem_ev;
+
+		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
+				SCTP_ULPEVENT(ev));
+	}
+
+	return SCTP_DISPOSITION_CONSUME;
+
+nomem_ev:
+	sctp_chunk_free(repl);
+nomem_repl:
+nomem_init:
+	sctp_association_free(new_asoc);
+nomem:
+	return SCTP_DISPOSITION_NOMEM;
+}
+
+/*
+ * Respond to a normal COOKIE ACK chunk.
+ * We are the side that is being asked for an association.
+ *
+ * RFC 2960 5.1 Normal Establishment of an Association
+ *
+ * E) Upon reception of the COOKIE ACK, endpoint "A" will move from the
+ *    COOKIE-ECHOED state to the ESTABLISHED state, stopping the T1-cookie
+ *    timer. It may also notify its ULP about the successful
+ *    establishment of the association with a Communication Up
+ *    notification (see Section 10).
+ *
+ * Verification Tag:
+ * Inputs
+ * (endpoint, asoc, chunk)
+ *
+ * Outputs
+ * (asoc, reply_msg, msg_up, timers, counters)
+ *
+ * The return value is the disposition of the chunk.
+ */
+sctp_disposition_t sctp_sf_do_5_1E_ca(const struct sctp_endpoint *ep,
+				      const struct sctp_association *asoc,
+				      const sctp_subtype_t type, void *arg,
+				      sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *chunk = arg;
+	struct sctp_ulpevent *ev;
+
+	if (!sctp_vtag_verify(chunk, asoc))
+		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+
+	/* Verify that the chunk length for the COOKIE-ACK is OK.
+	 * If we don't do this, any bundled chunks may be junked.
+	 */
+	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+						  commands);
+
+	/* Reset init error count upon receipt of COOKIE-ACK,
+	 * to avoid problems with the managemement of this
+	 * counter in stale cookie situations when a transition back
+	 * from the COOKIE-ECHOED state to the COOKIE-WAIT
+	 * state is performed.
+	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_COUNTER_RESET,
+	                SCTP_COUNTER(SCTP_COUNTER_INIT_ERROR));
+
+	/* RFC 2960 5.1 Normal Establishment of an Association
+	 *
+	 * E) Upon reception of the COOKIE ACK, endpoint "A" will move
+	 * from the COOKIE-ECHOED state to the ESTABLISHED state,
+	 * stopping the T1-cookie timer.
+	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
+			SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE));
+	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
+			SCTP_STATE(SCTP_STATE_ESTABLISHED));
+	SCTP_INC_STATS(SCTP_MIB_CURRESTAB);
+	SCTP_INC_STATS(SCTP_MIB_ACTIVEESTABS);
+	sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL());
+	if (asoc->autoclose)
+		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START,
+				SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE));
+	sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL());
+
+	/* It may also notify its ULP about the successful
+	 * establishment of the association with a Communication Up
+	 * notification (see Section 10).
+	 */
+	ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_UP,
+					     0, asoc->c.sinit_num_ostreams,
+					     asoc->c.sinit_max_instreams,
+					     GFP_ATOMIC);
+
+	if (!ev)
+		goto nomem;
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
+
+	/* Sockets API Draft Section 5.3.1.6
+	 * When a peer sends a Adaption Layer Indication parameter , SCTP
+	 * delivers this notification to inform the application that of the
+	 * peers requested adaption layer.
+	 */
+	if (asoc->peer.adaption_ind) {
+		ev = sctp_ulpevent_make_adaption_indication(asoc, GFP_ATOMIC);
+		if (!ev)
+			goto nomem;
+
+		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
+				SCTP_ULPEVENT(ev));
+	}
+
+	return SCTP_DISPOSITION_CONSUME;
+nomem:
+	return SCTP_DISPOSITION_NOMEM;
+}
+
+/* Generate and sendout a heartbeat packet.  */
+static sctp_disposition_t sctp_sf_heartbeat(const struct sctp_endpoint *ep,
+					    const struct sctp_association *asoc,
+					    const sctp_subtype_t type,
+					    void *arg,
+					    sctp_cmd_seq_t *commands)
+{
+	struct sctp_transport *transport = (struct sctp_transport *) arg;
+	struct sctp_chunk *reply;
+	sctp_sender_hb_info_t hbinfo;
+	size_t paylen = 0;
+
+	hbinfo.param_hdr.type = SCTP_PARAM_HEARTBEAT_INFO;
+	hbinfo.param_hdr.length = htons(sizeof(sctp_sender_hb_info_t));
+	hbinfo.daddr = transport->ipaddr;
+	hbinfo.sent_at = jiffies;
+
+	/* Send a heartbeat to our peer.  */
+	paylen = sizeof(sctp_sender_hb_info_t);
+	reply = sctp_make_heartbeat(asoc, transport, &hbinfo, paylen);
+	if (!reply)
+		return SCTP_DISPOSITION_NOMEM;
+
+	/* Set rto_pending indicating that an RTT measurement
+	 * is started with this heartbeat chunk.
+	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_RTO_PENDING,
+			SCTP_TRANSPORT(transport));
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply));
+	return SCTP_DISPOSITION_CONSUME;
+}
+
+/* Generate a HEARTBEAT packet on the given transport.  */
+sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const sctp_subtype_t type,
+					void *arg,
+					sctp_cmd_seq_t *commands)
+{
+	struct sctp_transport *transport = (struct sctp_transport *) arg;
+
+	if (asoc->overall_error_count > asoc->max_retrans) {
+		/* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */
+		sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
+				SCTP_U32(SCTP_ERROR_NO_ERROR));
+		SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
+		SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+		return SCTP_DISPOSITION_DELETE_TCB;
+	}
+
+	/* Section 3.3.5.
+	 * The Sender-specific Heartbeat Info field should normally include
+	 * information about the sender's current time when this HEARTBEAT
+	 * chunk is sent and the destination transport address to which this
+	 * HEARTBEAT is sent (see Section 8.3).
+	 */
+
+	if (transport->hb_allowed) {
+		if (SCTP_DISPOSITION_NOMEM ==
+				sctp_sf_heartbeat(ep, asoc, type, arg,
+						  commands))
+			return SCTP_DISPOSITION_NOMEM;
+		/* Set transport error counter and association error counter
+		 * when sending heartbeat.
+		 */
+		sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_RESET,
+				SCTP_TRANSPORT(transport));
+	}
+	sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMER_UPDATE,
+			SCTP_TRANSPORT(transport));
+
+        return SCTP_DISPOSITION_CONSUME;
+}
+
+/*
+ * Process an heartbeat request.
+ *
+ * Section: 8.3 Path Heartbeat
+ * The receiver of the HEARTBEAT should immediately respond with a
+ * HEARTBEAT ACK that contains the Heartbeat Information field copied
+ * from the received HEARTBEAT chunk.
+ *
+ * Verification Tag:  8.5 Verification Tag [Normal verification]
+ * When receiving an SCTP packet, the endpoint MUST ensure that the
+ * value in the Verification Tag field of the received SCTP packet
+ * matches its own Tag. If the received Verification Tag value does not
+ * match the receiver's own tag value, the receiver shall silently
+ * discard the packet and shall not process it any further except for
+ * those cases listed in Section 8.5.1 below.
+ *
+ * Inputs
+ * (endpoint, asoc, chunk)
+ *
+ * Outputs
+ * (asoc, reply_msg, msg_up, timers, counters)
+ *
+ * The return value is the disposition of the chunk.
+ */
+sctp_disposition_t sctp_sf_beat_8_3(const struct sctp_endpoint *ep,
+				    const struct sctp_association *asoc,
+				    const sctp_subtype_t type,
+				    void *arg,
+				    sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *chunk = arg;
+	struct sctp_chunk *reply;
+	size_t paylen = 0;
+
+	if (!sctp_vtag_verify(chunk, asoc))
+		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+
+	/* Make sure that the HEARTBEAT chunk has a valid length. */
+	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_heartbeat_chunk_t)))
+		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+						  commands);
+
+	/* 8.3 The receiver of the HEARTBEAT should immediately
+	 * respond with a HEARTBEAT ACK that contains the Heartbeat
+	 * Information field copied from the received HEARTBEAT chunk.
+	 */
+	chunk->subh.hb_hdr = (sctp_heartbeathdr_t *) chunk->skb->data;
+	paylen = ntohs(chunk->chunk_hdr->length) - sizeof(sctp_chunkhdr_t);
+	skb_pull(chunk->skb, paylen);
+
+	reply = sctp_make_heartbeat_ack(asoc, chunk,
+					chunk->subh.hb_hdr, paylen);
+	if (!reply)
+		goto nomem;
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply));
+	return SCTP_DISPOSITION_CONSUME;
+
+nomem:
+	return SCTP_DISPOSITION_NOMEM;
+}
+
+/*
+ * Process the returning HEARTBEAT ACK.
+ *
+ * Section: 8.3 Path Heartbeat
+ * Upon the receipt of the HEARTBEAT ACK, the sender of the HEARTBEAT
+ * should clear the error counter of the destination transport
+ * address to which the HEARTBEAT was sent, and mark the destination
+ * transport address as active if it is not so marked. The endpoint may
+ * optionally report to the upper layer when an inactive destination
+ * address is marked as active due to the reception of the latest
+ * HEARTBEAT ACK. The receiver of the HEARTBEAT ACK must also
+ * clear the association overall error count as well (as defined
+ * in section 8.1).
+ *
+ * The receiver of the HEARTBEAT ACK should also perform an RTT
+ * measurement for that destination transport address using the time
+ * value carried in the HEARTBEAT ACK chunk.
+ *
+ * Verification Tag:  8.5 Verification Tag [Normal verification]
+ *
+ * Inputs
+ * (endpoint, asoc, chunk)
+ *
+ * Outputs
+ * (asoc, reply_msg, msg_up, timers, counters)
+ *
+ * The return value is the disposition of the chunk.
+ */
+sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const sctp_subtype_t type,
+					void *arg,
+					sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *chunk = arg;
+	union sctp_addr from_addr;
+	struct sctp_transport *link;
+	sctp_sender_hb_info_t *hbinfo;
+	unsigned long max_interval;
+
+	if (!sctp_vtag_verify(chunk, asoc))
+		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+
+	/* Make sure that the HEARTBEAT-ACK chunk has a valid length.  */
+	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_heartbeat_chunk_t)))
+		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+						  commands);
+
+	hbinfo = (sctp_sender_hb_info_t *) chunk->skb->data;
+	from_addr = hbinfo->daddr;
+	link = sctp_assoc_lookup_paddr(asoc, &from_addr);
+
+	/* This should never happen, but lets log it if so.  */
+	if (!link) {
+		printk(KERN_WARNING
+		       "%s: Could not find address %d.%d.%d.%d\n",
+		       __FUNCTION__, NIPQUAD(from_addr.v4.sin_addr));
+		return SCTP_DISPOSITION_DISCARD;
+	}
+
+	max_interval = link->hb_interval + link->rto;
+
+	/* Check if the timestamp looks valid.  */
+	if (time_after(hbinfo->sent_at, jiffies) ||
+	    time_after(jiffies, hbinfo->sent_at + max_interval)) {
+		SCTP_DEBUG_PRINTK("%s: HEARTBEAT ACK with invalid timestamp"
+				  "received for transport: %p\n",
+				   __FUNCTION__, link);
+		return SCTP_DISPOSITION_DISCARD;
+	}
+
+	/* 8.3 Upon the receipt of the HEARTBEAT ACK, the sender of
+	 * the HEARTBEAT should clear the error counter of the
+	 * destination transport address to which the HEARTBEAT was
+	 * sent and mark the destination transport address as active if
+	 * it is not so marked.
+	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_ON, SCTP_TRANSPORT(link));
+
+	return SCTP_DISPOSITION_CONSUME;
+}
+
+/* Helper function to send out an abort for the restart
+ * condition.
+ */
+static int sctp_sf_send_restart_abort(union sctp_addr *ssa,
+				      struct sctp_chunk *init,
+				      sctp_cmd_seq_t *commands)
+{
+	int len;
+	struct sctp_packet *pkt;
+	union sctp_addr_param *addrparm;
+	struct sctp_errhdr *errhdr;
+	struct sctp_endpoint *ep;
+	char buffer[sizeof(struct sctp_errhdr)+sizeof(union sctp_addr_param)];
+	struct sctp_af *af = sctp_get_af_specific(ssa->v4.sin_family);
+
+	/* Build the error on the stack.   We are way to malloc crazy
+	 * throughout the code today.
+	 */
+	errhdr = (struct sctp_errhdr *)buffer;
+	addrparm = (union sctp_addr_param *)errhdr->variable;
+
+	/* Copy into a parm format. */
+	len = af->to_addr_param(ssa, addrparm);
+	len += sizeof(sctp_errhdr_t);
+
+	errhdr->cause = SCTP_ERROR_RESTART;
+	errhdr->length = htons(len);
+
+	/* Assign to the control socket. */
+	ep = sctp_sk((sctp_get_ctl_sock()))->ep;
+
+	/* Association is NULL since this may be a restart attack and we
+	 * want to send back the attacker's vtag.
+	 */
+	pkt = sctp_abort_pkt_new(ep, NULL, init, errhdr, len);
+
+	if (!pkt)
+		goto out;
+	sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(pkt));
+
+	SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+
+	/* Discard the rest of the inbound packet. */
+	sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL());
+
+out:
+	/* Even if there is no memory, treat as a failure so
+	 * the packet will get dropped.
+	 */
+	return 0;
+}
+
+/* A restart is occurring, check to make sure no new addresses
+ * are being added as we may be under a takeover attack.
+ */
+static int sctp_sf_check_restart_addrs(const struct sctp_association *new_asoc,
+				       const struct sctp_association *asoc,
+				       struct sctp_chunk *init,
+				       sctp_cmd_seq_t *commands)
+{
+	struct sctp_transport *new_addr, *addr;
+	struct list_head *pos, *pos2;
+	int found;
+
+	/* Implementor's Guide - Sectin 5.2.2
+	 * ...
+	 * Before responding the endpoint MUST check to see if the
+	 * unexpected INIT adds new addresses to the association. If new
+	 * addresses are added to the association, the endpoint MUST respond
+	 * with an ABORT..
+	 */
+
+	/* Search through all current addresses and make sure
+	 * we aren't adding any new ones.
+	 */
+	new_addr = NULL;
+	found = 0;
+
+	list_for_each(pos, &new_asoc->peer.transport_addr_list) {
+		new_addr = list_entry(pos, struct sctp_transport, transports);
+		found = 0;
+		list_for_each(pos2, &asoc->peer.transport_addr_list) {
+			addr = list_entry(pos2, struct sctp_transport,
+					  transports);
+			if (sctp_cmp_addr_exact(&new_addr->ipaddr,
+						&addr->ipaddr)) {
+				found = 1;
+				break;
+			}
+		}
+		if (!found)
+			break;
+	}
+
+	/* If a new address was added, ABORT the sender. */
+	if (!found && new_addr) {
+		sctp_sf_send_restart_abort(&new_addr->ipaddr, init, commands);
+	}
+
+	/* Return success if all addresses were found. */
+	return found;
+}
+
+/* Populate the verification/tie tags based on overlapping INIT
+ * scenario.
+ *
+ * Note: Do not use in CLOSED or SHUTDOWN-ACK-SENT state.
+ */
+static void sctp_tietags_populate(struct sctp_association *new_asoc,
+				  const struct sctp_association *asoc)
+{
+	switch (asoc->state) {
+
+	/* 5.2.1 INIT received in COOKIE-WAIT or COOKIE-ECHOED State */
+
+	case SCTP_STATE_COOKIE_WAIT:
+		new_asoc->c.my_vtag     = asoc->c.my_vtag;
+		new_asoc->c.my_ttag     = asoc->c.my_vtag;
+		new_asoc->c.peer_ttag   = 0;
+		break;
+
+	case SCTP_STATE_COOKIE_ECHOED:
+		new_asoc->c.my_vtag     = asoc->c.my_vtag;
+		new_asoc->c.my_ttag     = asoc->c.my_vtag;
+		new_asoc->c.peer_ttag   = asoc->c.peer_vtag;
+		break;
+
+	/* 5.2.2 Unexpected INIT in States Other than CLOSED, COOKIE-ECHOED,
+	 * COOKIE-WAIT and SHUTDOWN-ACK-SENT
+	 */
+	default:
+		new_asoc->c.my_ttag   = asoc->c.my_vtag;
+		new_asoc->c.peer_ttag = asoc->c.peer_vtag;
+		break;
+	};
+
+	/* Other parameters for the endpoint SHOULD be copied from the
+	 * existing parameters of the association (e.g. number of
+	 * outbound streams) into the INIT ACK and cookie.
+	 */
+	new_asoc->rwnd                  = asoc->rwnd;
+	new_asoc->c.sinit_num_ostreams  = asoc->c.sinit_num_ostreams;
+	new_asoc->c.sinit_max_instreams = asoc->c.sinit_max_instreams;
+	new_asoc->c.initial_tsn         = asoc->c.initial_tsn;
+}
+
+/*
+ * Compare vtag/tietag values to determine unexpected COOKIE-ECHO
+ * handling action.
+ *
+ * RFC 2960 5.2.4 Handle a COOKIE ECHO when a TCB exists.
+ *
+ * Returns value representing action to be taken.   These action values
+ * correspond to Action/Description values in RFC 2960, Table 2.
+ */
+static char sctp_tietags_compare(struct sctp_association *new_asoc,
+				 const struct sctp_association *asoc)
+{
+	/* In this case, the peer may have restarted.  */
+	if ((asoc->c.my_vtag != new_asoc->c.my_vtag) &&
+	    (asoc->c.peer_vtag != new_asoc->c.peer_vtag) &&
+	    (asoc->c.my_vtag == new_asoc->c.my_ttag) &&
+	    (asoc->c.peer_vtag == new_asoc->c.peer_ttag))
+		return 'A';
+
+	/* Collision case B. */
+	if ((asoc->c.my_vtag == new_asoc->c.my_vtag) &&
+	    ((asoc->c.peer_vtag != new_asoc->c.peer_vtag) ||
+	     (0 == asoc->c.peer_vtag))) {
+		return 'B';
+	}
+
+	/* Collision case D. */
+	if ((asoc->c.my_vtag == new_asoc->c.my_vtag) &&
+	    (asoc->c.peer_vtag == new_asoc->c.peer_vtag))
+		return 'D';
+
+	/* Collision case C. */
+	if ((asoc->c.my_vtag != new_asoc->c.my_vtag) &&
+	    (asoc->c.peer_vtag == new_asoc->c.peer_vtag) &&
+	    (0 == new_asoc->c.my_ttag) &&
+	    (0 == new_asoc->c.peer_ttag))
+		return 'C';
+
+	/* No match to any of the special cases; discard this packet. */
+	return 'E';
+}
+
+/* Common helper routine for both duplicate and simulataneous INIT
+ * chunk handling.
+ */
+static sctp_disposition_t sctp_sf_do_unexpected_init(
+	const struct sctp_endpoint *ep,
+	const struct sctp_association *asoc,
+	const sctp_subtype_t type,
+	void *arg, sctp_cmd_seq_t *commands)
+{
+	sctp_disposition_t retval;
+	struct sctp_chunk *chunk = arg;
+	struct sctp_chunk *repl;
+	struct sctp_association *new_asoc;
+	struct sctp_chunk *err_chunk;
+	struct sctp_packet *packet;
+	sctp_unrecognized_param_t *unk_param;
+	int len;
+
+	/* 6.10 Bundling
+	 * An endpoint MUST NOT bundle INIT, INIT ACK or
+	 * SHUTDOWN COMPLETE with any other chunks.
+	 *
+	 * IG Section 2.11.2
+	 * Furthermore, we require that the receiver of an INIT chunk MUST
+	 * enforce these rules by silently discarding an arriving packet
+	 * with an INIT chunk that is bundled with other chunks.
+	 */
+	if (!chunk->singleton)
+		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+
+	/* 3.1 A packet containing an INIT chunk MUST have a zero Verification
+	 * Tag. 
+	 */
+	if (chunk->sctp_hdr->vtag != 0)
+		return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
+
+	/* Make sure that the INIT chunk has a valid length.
+	 * In this case, we generate a protocol violation since we have
+	 * an association established.
+	 */
+	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_init_chunk_t)))
+		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+						  commands);
+	/* Grab the INIT header.  */
+	chunk->subh.init_hdr = (sctp_inithdr_t *) chunk->skb->data;
+
+	/* Tag the variable length parameters.  */
+	chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(sctp_inithdr_t));
+
+	/* Verify the INIT chunk before processing it. */
+	err_chunk = NULL;
+	if (!sctp_verify_init(asoc, chunk->chunk_hdr->type,
+			      (sctp_init_chunk_t *)chunk->chunk_hdr, chunk,
+			      &err_chunk)) {
+		/* This chunk contains fatal error. It is to be discarded.
+		 * Send an ABORT, with causes if there is any.
+		 */
+		if (err_chunk) {
+			packet = sctp_abort_pkt_new(ep, asoc, arg,
+					(__u8 *)(err_chunk->chunk_hdr) +
+					sizeof(sctp_chunkhdr_t),
+					ntohs(err_chunk->chunk_hdr->length) -
+					sizeof(sctp_chunkhdr_t));
+
+			if (packet) {
+				sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT,
+						SCTP_PACKET(packet));
+				SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+				retval = SCTP_DISPOSITION_CONSUME;
+			} else {
+				retval = SCTP_DISPOSITION_NOMEM;
+			}
+			goto cleanup;
+		} else {
+			return sctp_sf_tabort_8_4_8(ep, asoc, type, arg,
+						    commands);
+		}
+	}
+
+	/*
+	 * Other parameters for the endpoint SHOULD be copied from the
+	 * existing parameters of the association (e.g. number of
+	 * outbound streams) into the INIT ACK and cookie.
+	 * FIXME:  We are copying parameters from the endpoint not the
+	 * association.
+	 */
+	new_asoc = sctp_make_temp_asoc(ep, chunk, GFP_ATOMIC);
+	if (!new_asoc)
+		goto nomem;
+
+	/* In the outbound INIT ACK the endpoint MUST copy its current
+	 * Verification Tag and Peers Verification tag into a reserved
+	 * place (local tie-tag and per tie-tag) within the state cookie.
+	 */
+	if (!sctp_process_init(new_asoc, chunk->chunk_hdr->type,
+			       sctp_source(chunk),
+			       (sctp_init_chunk_t *)chunk->chunk_hdr,
+			       GFP_ATOMIC)) {
+		retval = SCTP_DISPOSITION_NOMEM;
+		goto nomem_init;
+	}
+
+	/* Make sure no new addresses are being added during the
+	 * restart.   Do not do this check for COOKIE-WAIT state,
+	 * since there are no peer addresses to check against.
+	 * Upon return an ABORT will have been sent if needed.
+	 */
+	if (!sctp_state(asoc, COOKIE_WAIT)) {
+		if (!sctp_sf_check_restart_addrs(new_asoc, asoc, chunk,
+						 commands)) {
+			retval = SCTP_DISPOSITION_CONSUME;
+			goto cleanup_asoc;
+		}
+	}
+
+	sctp_tietags_populate(new_asoc, asoc);
+
+	/* B) "Z" shall respond immediately with an INIT ACK chunk.  */
+
+	/* If there are errors need to be reported for unknown parameters,
+	 * make sure to reserve enough room in the INIT ACK for them.
+	 */
+	len = 0;
+	if (err_chunk) {
+		len = ntohs(err_chunk->chunk_hdr->length) -
+			sizeof(sctp_chunkhdr_t);
+	}
+
+	if (sctp_assoc_set_bind_addr_from_ep(new_asoc, GFP_ATOMIC) < 0)
+		goto nomem;
+
+	repl = sctp_make_init_ack(new_asoc, chunk, GFP_ATOMIC, len);
+	if (!repl)
+		goto nomem;
+
+	/* If there are errors need to be reported for unknown parameters,
+	 * include them in the outgoing INIT ACK as "Unrecognized parameter"
+	 * parameter.
+	 */
+	if (err_chunk) {
+		/* Get the "Unrecognized parameter" parameter(s) out of the
+		 * ERROR chunk generated by sctp_verify_init(). Since the
+		 * error cause code for "unknown parameter" and the
+		 * "Unrecognized parameter" type is the same, we can
+		 * construct the parameters in INIT ACK by copying the
+		 * ERROR causes over.
+		 */
+		unk_param = (sctp_unrecognized_param_t *)
+			    ((__u8 *)(err_chunk->chunk_hdr) +
+			    sizeof(sctp_chunkhdr_t));
+		/* Replace the cause code with the "Unrecognized parameter"
+		 * parameter type.
+		 */
+		sctp_addto_chunk(repl, len, unk_param);
+	}
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
+
+	/*
+	 * Note: After sending out INIT ACK with the State Cookie parameter,
+	 * "Z" MUST NOT allocate any resources for this new association.
+	 * Otherwise, "Z" will be vulnerable to resource attacks.
+	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL());
+	retval = SCTP_DISPOSITION_CONSUME;
+
+cleanup:
+	if (err_chunk)
+		sctp_chunk_free(err_chunk);
+	return retval;
+nomem:
+	retval = SCTP_DISPOSITION_NOMEM;
+	goto cleanup;
+nomem_init:
+cleanup_asoc:
+	sctp_association_free(new_asoc);
+	goto cleanup;
+}
+
+/*
+ * Handle simultanous INIT.
+ * This means we started an INIT and then we got an INIT request from
+ * our peer.
+ *
+ * Section: 5.2.1 INIT received in COOKIE-WAIT or COOKIE-ECHOED State (Item B)
+ * This usually indicates an initialization collision, i.e., each
+ * endpoint is attempting, at about the same time, to establish an
+ * association with the other endpoint.
+ *
+ * Upon receipt of an INIT in the COOKIE-WAIT or COOKIE-ECHOED state, an
+ * endpoint MUST respond with an INIT ACK using the same parameters it
+ * sent in its original INIT chunk (including its Verification Tag,
+ * unchanged). These original parameters are combined with those from the
+ * newly received INIT chunk. The endpoint shall also generate a State
+ * Cookie with the INIT ACK. The endpoint uses the parameters sent in its
+ * INIT to calculate the State Cookie.
+ *
+ * After that, the endpoint MUST NOT change its state, the T1-init
+ * timer shall be left running and the corresponding TCB MUST NOT be
+ * destroyed. The normal procedures for handling State Cookies when
+ * a TCB exists will resolve the duplicate INITs to a single association.
+ *
+ * For an endpoint that is in the COOKIE-ECHOED state it MUST populate
+ * its Tie-Tags with the Tag information of itself and its peer (see
+ * section 5.2.2 for a description of the Tie-Tags).
+ *
+ * Verification Tag: Not explicit, but an INIT can not have a valid
+ * verification tag, so we skip the check.
+ *
+ * Inputs
+ * (endpoint, asoc, chunk)
+ *
+ * Outputs
+ * (asoc, reply_msg, msg_up, timers, counters)
+ *
+ * The return value is the disposition of the chunk.
+ */
+sctp_disposition_t sctp_sf_do_5_2_1_siminit(const struct sctp_endpoint *ep,
+				    const struct sctp_association *asoc,
+				    const sctp_subtype_t type,
+				    void *arg,
+				    sctp_cmd_seq_t *commands)
+{
+	/* Call helper to do the real work for both simulataneous and
+	 * duplicate INIT chunk handling.
+	 */
+	return sctp_sf_do_unexpected_init(ep, asoc, type, arg, commands);
+}
+
+/*
+ * Handle duplicated INIT messages.  These are usually delayed
+ * restransmissions.
+ *
+ * Section: 5.2.2 Unexpected INIT in States Other than CLOSED,
+ * COOKIE-ECHOED and COOKIE-WAIT
+ *
+ * Unless otherwise stated, upon reception of an unexpected INIT for
+ * this association, the endpoint shall generate an INIT ACK with a
+ * State Cookie.  In the outbound INIT ACK the endpoint MUST copy its
+ * current Verification Tag and peer's Verification Tag into a reserved
+ * place within the state cookie.  We shall refer to these locations as
+ * the Peer's-Tie-Tag and the Local-Tie-Tag.  The outbound SCTP packet
+ * containing this INIT ACK MUST carry a Verification Tag value equal to
+ * the Initiation Tag found in the unexpected INIT.  And the INIT ACK
+ * MUST contain a new Initiation Tag (randomly generated see Section
+ * 5.3.1).  Other parameters for the endpoint SHOULD be copied from the
+ * existing parameters of the association (e.g. number of outbound
+ * streams) into the INIT ACK and cookie.
+ *
+ * After sending out the INIT ACK, the endpoint shall take no further
+ * actions, i.e., the existing association, including its current state,
+ * and the corresponding TCB MUST NOT be changed.
+ *
+ * Note: Only when a TCB exists and the association is not in a COOKIE-
+ * WAIT state are the Tie-Tags populated.  For a normal association INIT
+ * (i.e. the endpoint is in a COOKIE-WAIT state), the Tie-Tags MUST be
+ * set to 0 (indicating that no previous TCB existed).  The INIT ACK and
+ * State Cookie are populated as specified in section 5.2.1.
+ *
+ * Verification Tag: Not specified, but an INIT has no way of knowing
+ * what the verification tag could be, so we ignore it.
+ *
+ * Inputs
+ * (endpoint, asoc, chunk)
+ *
+ * Outputs
+ * (asoc, reply_msg, msg_up, timers, counters)
+ *
+ * The return value is the disposition of the chunk.
+ */
+sctp_disposition_t sctp_sf_do_5_2_2_dupinit(const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const sctp_subtype_t type,
+					void *arg,
+					sctp_cmd_seq_t *commands)
+{
+	/* Call helper to do the real work for both simulataneous and
+	 * duplicate INIT chunk handling.
+	 */
+	return sctp_sf_do_unexpected_init(ep, asoc, type, arg, commands);
+}
+
+
+
+/* Unexpected COOKIE-ECHO handler for peer restart (Table 2, action 'A')
+ *
+ * Section 5.2.4
+ *  A)  In this case, the peer may have restarted.
+ */
+static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					struct sctp_chunk *chunk,
+					sctp_cmd_seq_t *commands,
+					struct sctp_association *new_asoc)
+{
+	sctp_init_chunk_t *peer_init;
+	struct sctp_ulpevent *ev;
+	struct sctp_chunk *repl;
+	struct sctp_chunk *err;
+	sctp_disposition_t disposition;
+
+	/* new_asoc is a brand-new association, so these are not yet
+	 * side effects--it is safe to run them here.
+	 */
+	peer_init = &chunk->subh.cookie_hdr->c.peer_init[0];
+
+	if (!sctp_process_init(new_asoc, chunk->chunk_hdr->type,
+			       sctp_source(chunk), peer_init,
+			       GFP_ATOMIC))
+		goto nomem;
+
+	/* Make sure no new addresses are being added during the
+	 * restart.  Though this is a pretty complicated attack
+	 * since you'd have to get inside the cookie.
+	 */
+	if (!sctp_sf_check_restart_addrs(new_asoc, asoc, chunk, commands)) {
+		return SCTP_DISPOSITION_CONSUME;
+	}
+
+	/* If the endpoint is in the SHUTDOWN-ACK-SENT state and recognizes
+	 * the peer has restarted (Action A), it MUST NOT setup a new
+	 * association but instead resend the SHUTDOWN ACK and send an ERROR
+	 * chunk with a "Cookie Received while Shutting Down" error cause to
+	 * its peer.
+	*/
+	if (sctp_state(asoc, SHUTDOWN_ACK_SENT)) {
+		disposition = sctp_sf_do_9_2_reshutack(ep, asoc,
+				SCTP_ST_CHUNK(chunk->chunk_hdr->type),
+				chunk, commands);
+		if (SCTP_DISPOSITION_NOMEM == disposition)
+			goto nomem;
+
+		err = sctp_make_op_error(asoc, chunk,
+					 SCTP_ERROR_COOKIE_IN_SHUTDOWN,
+					 NULL, 0);
+		if (err)
+			sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
+					SCTP_CHUNK(err));
+
+		return SCTP_DISPOSITION_CONSUME;
+	}
+
+	/* For now, fail any unsent/unacked data.  Consider the optional
+	 * choice of resending of this data.
+	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_PURGE_OUTQUEUE, SCTP_NULL());
+
+	/* Update the content of current association. */
+	sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc));
+
+	repl = sctp_make_cookie_ack(new_asoc, chunk);
+	if (!repl)
+		goto nomem;
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
+
+	/* Report association restart to upper layer. */
+	ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_RESTART, 0,
+					     new_asoc->c.sinit_num_ostreams,
+					     new_asoc->c.sinit_max_instreams,
+					     GFP_ATOMIC);
+	if (!ev)
+		goto nomem_ev;
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
+	return SCTP_DISPOSITION_CONSUME;
+
+nomem_ev:
+	sctp_chunk_free(repl);
+nomem:
+	return SCTP_DISPOSITION_NOMEM;
+}
+
+/* Unexpected COOKIE-ECHO handler for setup collision (Table 2, action 'B')
+ *
+ * Section 5.2.4
+ *   B) In this case, both sides may be attempting to start an association
+ *      at about the same time but the peer endpoint started its INIT
+ *      after responding to the local endpoint's INIT
+ */
+/* This case represents an initialization collision.  */
+static sctp_disposition_t sctp_sf_do_dupcook_b(const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					struct sctp_chunk *chunk,
+					sctp_cmd_seq_t *commands,
+					struct sctp_association *new_asoc)
+{
+	sctp_init_chunk_t *peer_init;
+	struct sctp_ulpevent *ev;
+	struct sctp_chunk *repl;
+
+	/* new_asoc is a brand-new association, so these are not yet
+	 * side effects--it is safe to run them here.
+	 */
+	peer_init = &chunk->subh.cookie_hdr->c.peer_init[0];
+	if (!sctp_process_init(new_asoc, chunk->chunk_hdr->type,
+			       sctp_source(chunk), peer_init,
+			       GFP_ATOMIC))
+		goto nomem;
+
+	/* Update the content of current association.  */
+	sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc));
+	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
+			SCTP_STATE(SCTP_STATE_ESTABLISHED));
+	SCTP_INC_STATS(SCTP_MIB_CURRESTAB);
+	sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL());
+
+	repl = sctp_make_cookie_ack(new_asoc, chunk);
+	if (!repl)
+		goto nomem;
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
+	sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL());
+
+	/* RFC 2960 5.1 Normal Establishment of an Association
+	 *
+	 * D) IMPLEMENTATION NOTE: An implementation may choose to
+	 * send the Communication Up notification to the SCTP user
+	 * upon reception of a valid COOKIE ECHO chunk.
+	 */
+	ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_UP, 0,
+					     new_asoc->c.sinit_num_ostreams,
+					     new_asoc->c.sinit_max_instreams,
+					     GFP_ATOMIC);
+	if (!ev)
+		goto nomem_ev;
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
+
+	/* Sockets API Draft Section 5.3.1.6
+	 * When a peer sends a Adaption Layer Indication parameter , SCTP
+	 * delivers this notification to inform the application that of the
+	 * peers requested adaption layer.
+	 */
+	if (asoc->peer.adaption_ind) {
+		ev = sctp_ulpevent_make_adaption_indication(asoc, GFP_ATOMIC);
+		if (!ev)
+			goto nomem_ev;
+
+		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
+				SCTP_ULPEVENT(ev));
+	}
+
+	return SCTP_DISPOSITION_CONSUME;
+
+nomem_ev:
+	sctp_chunk_free(repl);
+nomem:
+	return SCTP_DISPOSITION_NOMEM;
+}
+
+/* Unexpected COOKIE-ECHO handler for setup collision (Table 2, action 'C')
+ *
+ * Section 5.2.4
+ *  C) In this case, the local endpoint's cookie has arrived late.
+ *     Before it arrived, the local endpoint sent an INIT and received an
+ *     INIT-ACK and finally sent a COOKIE ECHO with the peer's same tag
+ *     but a new tag of its own.
+ */
+/* This case represents an initialization collision.  */
+static sctp_disposition_t sctp_sf_do_dupcook_c(const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					struct sctp_chunk *chunk,
+					sctp_cmd_seq_t *commands,
+					struct sctp_association *new_asoc)
+{
+	/* The cookie should be silently discarded.
+	 * The endpoint SHOULD NOT change states and should leave
+	 * any timers running.
+	 */
+	return SCTP_DISPOSITION_DISCARD;
+}
+
+/* Unexpected COOKIE-ECHO handler lost chunk (Table 2, action 'D')
+ *
+ * Section 5.2.4
+ *
+ * D) When both local and remote tags match the endpoint should always
+ *    enter the ESTABLISHED state, if it has not already done so.
+ */
+/* This case represents an initialization collision.  */
+static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					struct sctp_chunk *chunk,
+					sctp_cmd_seq_t *commands,
+					struct sctp_association *new_asoc)
+{
+	struct sctp_ulpevent *ev = NULL;
+	struct sctp_chunk *repl;
+
+	/* Clarification from Implementor's Guide:
+	 * D) When both local and remote tags match the endpoint should
+         * enter the ESTABLISHED state, if it is in the COOKIE-ECHOED state.
+         * It should stop any cookie timer that may be running and send
+         * a COOKIE ACK.
+	 */
+
+	/* Don't accidentally move back into established state. */
+	if (asoc->state < SCTP_STATE_ESTABLISHED) {
+		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
+				SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE));
+		sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
+				SCTP_STATE(SCTP_STATE_ESTABLISHED));
+		SCTP_INC_STATS(SCTP_MIB_CURRESTAB);
+		sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START,
+				SCTP_NULL());
+
+		/* RFC 2960 5.1 Normal Establishment of an Association
+		 *
+		 * D) IMPLEMENTATION NOTE: An implementation may choose
+		 * to send the Communication Up notification to the
+		 * SCTP user upon reception of a valid COOKIE
+		 * ECHO chunk.
+		 */
+		ev = sctp_ulpevent_make_assoc_change(new_asoc, 0,
+					     SCTP_COMM_UP, 0,
+					     new_asoc->c.sinit_num_ostreams,
+					     new_asoc->c.sinit_max_instreams,
+                                             GFP_ATOMIC);
+		if (!ev)
+			goto nomem;
+		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
+				SCTP_ULPEVENT(ev));
+
+		/* Sockets API Draft Section 5.3.1.6
+		 * When a peer sends a Adaption Layer Indication parameter,
+		 * SCTP delivers this notification to inform the application
+		 * that of the peers requested adaption layer.
+		 */
+		if (new_asoc->peer.adaption_ind) {
+			ev = sctp_ulpevent_make_adaption_indication(new_asoc,
+								 GFP_ATOMIC);
+			if (!ev)
+				goto nomem;
+
+			sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
+					SCTP_ULPEVENT(ev));
+		}
+	}
+	sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL());
+
+	repl = sctp_make_cookie_ack(new_asoc, chunk);
+	if (!repl)
+		goto nomem;
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
+	sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL());
+
+	return SCTP_DISPOSITION_CONSUME;
+
+nomem:
+	if (ev)
+		sctp_ulpevent_free(ev);
+	return SCTP_DISPOSITION_NOMEM;
+}
+
+/*
+ * Handle a duplicate COOKIE-ECHO.  This usually means a cookie-carrying
+ * chunk was retransmitted and then delayed in the network.
+ *
+ * Section: 5.2.4 Handle a COOKIE ECHO when a TCB exists
+ *
+ * Verification Tag: None.  Do cookie validation.
+ *
+ * Inputs
+ * (endpoint, asoc, chunk)
+ *
+ * Outputs
+ * (asoc, reply_msg, msg_up, timers, counters)
+ *
+ * The return value is the disposition of the chunk.
+ */
+sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const sctp_subtype_t type,
+					void *arg,
+					sctp_cmd_seq_t *commands)
+{
+	sctp_disposition_t retval;
+	struct sctp_chunk *chunk = arg;
+	struct sctp_association *new_asoc;
+	int error = 0;
+	char action;
+	struct sctp_chunk *err_chk_p;
+
+	/* Make sure that the chunk has a valid length from the protocol
+	 * perspective.  In this case check to make sure we have at least
+	 * enough for the chunk header.  Cookie length verification is
+	 * done later.
+	 */
+	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+						  commands);
+
+	/* "Decode" the chunk.  We have no optional parameters so we
+	 * are in good shape.
+	 */
+        chunk->subh.cookie_hdr = (struct sctp_signed_cookie *)chunk->skb->data;
+	skb_pull(chunk->skb, ntohs(chunk->chunk_hdr->length) -
+		 sizeof(sctp_chunkhdr_t));
+
+	/* In RFC 2960 5.2.4 3, if both Verification Tags in the State Cookie
+	 * of a duplicate COOKIE ECHO match the Verification Tags of the
+	 * current association, consider the State Cookie valid even if
+	 * the lifespan is exceeded.
+	 */
+	new_asoc = sctp_unpack_cookie(ep, asoc, chunk, GFP_ATOMIC, &error,
+				      &err_chk_p);
+
+	/* FIXME:
+	 * If the re-build failed, what is the proper error path
+	 * from here?
+	 *
+	 * [We should abort the association. --piggy]
+	 */
+	if (!new_asoc) {
+		/* FIXME: Several errors are possible.  A bad cookie should
+		 * be silently discarded, but think about logging it too.
+		 */
+		switch (error) {
+		case -SCTP_IERROR_NOMEM:
+			goto nomem;
+
+		case -SCTP_IERROR_STALE_COOKIE:
+			sctp_send_stale_cookie_err(ep, asoc, chunk, commands,
+						   err_chk_p);
+			return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		case -SCTP_IERROR_BAD_SIG:
+		default:
+			return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		};
+	}
+
+	/* Compare the tie_tag in cookie with the verification tag of
+	 * current association.
+	 */
+	action = sctp_tietags_compare(new_asoc, asoc);
+
+	switch (action) {
+	case 'A': /* Association restart. */
+		retval = sctp_sf_do_dupcook_a(ep, asoc, chunk, commands,
+					      new_asoc);
+		break;
+
+	case 'B': /* Collision case B. */
+		retval = sctp_sf_do_dupcook_b(ep, asoc, chunk, commands,
+					      new_asoc);
+		break;
+
+	case 'C': /* Collision case C. */
+		retval = sctp_sf_do_dupcook_c(ep, asoc, chunk, commands,
+					      new_asoc);
+		break;
+
+	case 'D': /* Collision case D. */
+		retval = sctp_sf_do_dupcook_d(ep, asoc, chunk, commands,
+					      new_asoc);
+		break;
+
+	default: /* Discard packet for all others. */
+		retval = sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		break;
+        };
+
+	/* Delete the tempory new association. */
+	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
+	sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL());
+
+	return retval;
+
+nomem:
+	return SCTP_DISPOSITION_NOMEM;
+}
+
+/*
+ * Process an ABORT.  (SHUTDOWN-PENDING state)
+ *
+ * See sctp_sf_do_9_1_abort().
+ */
+sctp_disposition_t sctp_sf_shutdown_pending_abort(
+	const struct sctp_endpoint *ep,
+	const struct sctp_association *asoc,
+	const sctp_subtype_t type,
+	void *arg,
+	sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *chunk = arg;
+
+	if (!sctp_vtag_verify_either(chunk, asoc))
+		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+
+	/* Make sure that the ABORT chunk has a valid length.
+	 * Since this is an ABORT chunk, we have to discard it
+	 * because of the following text:
+	 * RFC 2960, Section 3.3.7
+	 *    If an endpoint receives an ABORT with a format error or for an
+	 *    association that doesn't exist, it MUST silently discard it.
+	 * Becasue the length is "invalid", we can't really discard just
+	 * as we do not know its true length.  So, to be safe, discard the
+	 * packet.
+	 */
+	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
+		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+
+	/* Stop the T5-shutdown guard timer.  */
+	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
+			SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
+
+	return sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands);
+}
+
+/*
+ * Process an ABORT.  (SHUTDOWN-SENT state)
+ *
+ * See sctp_sf_do_9_1_abort().
+ */
+sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const sctp_subtype_t type,
+					void *arg,
+					sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *chunk = arg;
+
+	if (!sctp_vtag_verify_either(chunk, asoc))
+		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+
+	/* Make sure that the ABORT chunk has a valid length.
+	 * Since this is an ABORT chunk, we have to discard it
+	 * because of the following text:
+	 * RFC 2960, Section 3.3.7
+	 *    If an endpoint receives an ABORT with a format error or for an
+	 *    association that doesn't exist, it MUST silently discard it.
+	 * Becasue the length is "invalid", we can't really discard just
+	 * as we do not know its true length.  So, to be safe, discard the
+	 * packet.
+	 */
+	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
+		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+
+	/* Stop the T2-shutdown timer. */
+	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
+			SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN));
+
+	/* Stop the T5-shutdown guard timer.  */
+	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
+			SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
+
+	return sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands);
+}
+
+/*
+ * Process an ABORT.  (SHUTDOWN-ACK-SENT state)
+ *
+ * See sctp_sf_do_9_1_abort().
+ */
+sctp_disposition_t sctp_sf_shutdown_ack_sent_abort(
+	const struct sctp_endpoint *ep,
+	const struct sctp_association *asoc,
+	const sctp_subtype_t type,
+	void *arg,
+	sctp_cmd_seq_t *commands)
+{
+	/* The same T2 timer, so we should be able to use
+	 * common function with the SHUTDOWN-SENT state.
+	 */
+	return sctp_sf_shutdown_sent_abort(ep, asoc, type, arg, commands);
+}
+
+/*
+ * Handle an Error received in COOKIE_ECHOED state.
+ *
+ * Only handle the error type of stale COOKIE Error, the other errors will
+ * be ignored.
+ *
+ * Inputs
+ * (endpoint, asoc, chunk)
+ *
+ * Outputs
+ * (asoc, reply_msg, msg_up, timers, counters)
+ *
+ * The return value is the disposition of the chunk.
+ */
+sctp_disposition_t sctp_sf_cookie_echoed_err(const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const sctp_subtype_t type,
+					void *arg,
+					sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *chunk = arg;
+	sctp_errhdr_t *err;
+
+	if (!sctp_vtag_verify(chunk, asoc))
+		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+
+	/* Make sure that the ERROR chunk has a valid length.
+	 * The parameter walking depends on this as well.
+	 */
+	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_operr_chunk_t)))
+		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+						  commands);
+
+	/* Process the error here */
+	/* FUTURE FIXME:  When PR-SCTP related and other optional
+	 * parms are emitted, this will have to change to handle multiple
+	 * errors.
+	 */
+	sctp_walk_errors(err, chunk->chunk_hdr) {
+		if (SCTP_ERROR_STALE_COOKIE == err->cause)
+			return sctp_sf_do_5_2_6_stale(ep, asoc, type, 
+							arg, commands);
+	}
+
+	/* It is possible to have malformed error causes, and that
+	 * will cause us to end the walk early.  However, since
+	 * we are discarding the packet, there should be no adverse
+	 * affects.
+	 */
+	return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+}
+
+/*
+ * Handle a Stale COOKIE Error
+ *
+ * Section: 5.2.6 Handle Stale COOKIE Error
+ * If the association is in the COOKIE-ECHOED state, the endpoint may elect
+ * one of the following three alternatives.
+ * ...
+ * 3) Send a new INIT chunk to the endpoint, adding a Cookie
+ *    Preservative parameter requesting an extension to the lifetime of
+ *    the State Cookie. When calculating the time extension, an
+ *    implementation SHOULD use the RTT information measured based on the
+ *    previous COOKIE ECHO / ERROR exchange, and should add no more
+ *    than 1 second beyond the measured RTT, due to long State Cookie
+ *    lifetimes making the endpoint more subject to a replay attack.
+ *
+ * Verification Tag:  Not explicit, but safe to ignore.
+ *
+ * Inputs
+ * (endpoint, asoc, chunk)
+ *
+ * Outputs
+ * (asoc, reply_msg, msg_up, timers, counters)
+ *
+ * The return value is the disposition of the chunk.
+ */
+static sctp_disposition_t sctp_sf_do_5_2_6_stale(const struct sctp_endpoint *ep,
+						 const struct sctp_association *asoc,
+						 const sctp_subtype_t type,
+						 void *arg,
+						 sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *chunk = arg;
+	time_t stale;
+	sctp_cookie_preserve_param_t bht;
+	sctp_errhdr_t *err;
+	struct sctp_chunk *reply;
+	struct sctp_bind_addr *bp;
+	int attempts;
+
+	attempts = asoc->counters[SCTP_COUNTER_INIT_ERROR] + 1;
+
+	if (attempts >= asoc->max_init_attempts) {
+		sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED,
+				SCTP_U32(SCTP_ERROR_STALE_COOKIE));
+		return SCTP_DISPOSITION_DELETE_TCB;
+	}
+
+	err = (sctp_errhdr_t *)(chunk->skb->data);
+
+	/* When calculating the time extension, an implementation
+	 * SHOULD use the RTT information measured based on the
+	 * previous COOKIE ECHO / ERROR exchange, and should add no
+	 * more than 1 second beyond the measured RTT, due to long
+	 * State Cookie lifetimes making the endpoint more subject to
+	 * a replay attack.
+	 * Measure of Staleness's unit is usec. (1/1000000 sec)
+	 * Suggested Cookie Life-span Increment's unit is msec.
+	 * (1/1000 sec)
+	 * In general, if you use the suggested cookie life, the value
+	 * found in the field of measure of staleness should be doubled
+	 * to give ample time to retransmit the new cookie and thus
+	 * yield a higher probability of success on the reattempt.
+	 */
+	stale = ntohl(*(suseconds_t *)((u8 *)err + sizeof(sctp_errhdr_t)));
+	stale = (stale * 2) / 1000;
+
+	bht.param_hdr.type = SCTP_PARAM_COOKIE_PRESERVATIVE;
+	bht.param_hdr.length = htons(sizeof(bht));
+	bht.lifespan_increment = htonl(stale);
+
+	/* Build that new INIT chunk.  */
+	bp = (struct sctp_bind_addr *) &asoc->base.bind_addr;
+	reply = sctp_make_init(asoc, bp, GFP_ATOMIC, sizeof(bht));
+	if (!reply)
+		goto nomem;
+
+	sctp_addto_chunk(reply, sizeof(bht), &bht);
+
+	/* Clear peer's init_tag cached in assoc as we are sending a new INIT */
+	sctp_add_cmd_sf(commands, SCTP_CMD_CLEAR_INIT_TAG, SCTP_NULL());
+
+	/* Stop pending T3-rtx and heartbeat timers */
+	sctp_add_cmd_sf(commands, SCTP_CMD_T3_RTX_TIMERS_STOP, SCTP_NULL());
+	sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_STOP, SCTP_NULL());
+
+	/* Delete non-primary peer ip addresses since we are transitioning
+	 * back to the COOKIE-WAIT state
+	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_DEL_NON_PRIMARY, SCTP_NULL());
+
+	/* If we've sent any data bundled with COOKIE-ECHO we will need to 
+	 * resend 
+	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_RETRAN, 
+			SCTP_TRANSPORT(asoc->peer.primary_path));
+
+	/* Cast away the const modifier, as we want to just
+	 * rerun it through as a sideffect.
+	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_COUNTER_INC,
+			SCTP_COUNTER(SCTP_COUNTER_INIT_ERROR));
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
+			SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE));
+	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
+			SCTP_STATE(SCTP_STATE_COOKIE_WAIT));
+	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START,
+			SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT));
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply));
+
+	return SCTP_DISPOSITION_CONSUME;
+
+nomem:
+	return SCTP_DISPOSITION_NOMEM;
+}
+
+/*
+ * Process an ABORT.
+ *
+ * Section: 9.1
+ * After checking the Verification Tag, the receiving endpoint shall
+ * remove the association from its record, and shall report the
+ * termination to its upper layer.
+ *
+ * Verification Tag: 8.5.1 Exceptions in Verification Tag Rules
+ * B) Rules for packet carrying ABORT:
+ *
+ *  - The endpoint shall always fill in the Verification Tag field of the
+ *    outbound packet with the destination endpoint's tag value if it
+ *    is known.
+ *
+ *  - If the ABORT is sent in response to an OOTB packet, the endpoint
+ *    MUST follow the procedure described in Section 8.4.
+ *
+ *  - The receiver MUST accept the packet if the Verification Tag
+ *    matches either its own tag, OR the tag of its peer. Otherwise, the
+ *    receiver MUST silently discard the packet and take no further
+ *    action.
+ *
+ * Inputs
+ * (endpoint, asoc, chunk)
+ *
+ * Outputs
+ * (asoc, reply_msg, msg_up, timers, counters)
+ *
+ * The return value is the disposition of the chunk.
+ */
+sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const sctp_subtype_t type,
+					void *arg,
+					sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *chunk = arg;
+	unsigned len;
+	__u16 error = SCTP_ERROR_NO_ERROR;
+
+	if (!sctp_vtag_verify_either(chunk, asoc))
+		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+
+	/* Make sure that the ABORT chunk has a valid length.
+	 * Since this is an ABORT chunk, we have to discard it
+	 * because of the following text:
+	 * RFC 2960, Section 3.3.7
+	 *    If an endpoint receives an ABORT with a format error or for an
+	 *    association that doesn't exist, it MUST silently discard it.
+	 * Becasue the length is "invalid", we can't really discard just
+	 * as we do not know its true length.  So, to be safe, discard the
+	 * packet.
+	 */
+	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
+		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+
+	/* See if we have an error cause code in the chunk.  */
+	len = ntohs(chunk->chunk_hdr->length);
+	if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr))
+		error = ((sctp_errhdr_t *)chunk->skb->data)->cause;
+
+ 	/* ASSOC_FAILED will DELETE_TCB. */
+	sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_U32(error));
+	SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
+	SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+
+	return SCTP_DISPOSITION_ABORT;
+}
+
+/*
+ * Process an ABORT.  (COOKIE-WAIT state)
+ *
+ * See sctp_sf_do_9_1_abort() above.
+ */
+sctp_disposition_t sctp_sf_cookie_wait_abort(const struct sctp_endpoint *ep,
+				     const struct sctp_association *asoc,
+				     const sctp_subtype_t type,
+				     void *arg,
+				     sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *chunk = arg;
+	unsigned len;
+	__u16 error = SCTP_ERROR_NO_ERROR;
+
+	if (!sctp_vtag_verify_either(chunk, asoc))
+		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+
+	/* Make sure that the ABORT chunk has a valid length.
+	 * Since this is an ABORT chunk, we have to discard it
+	 * because of the following text:
+	 * RFC 2960, Section 3.3.7
+	 *    If an endpoint receives an ABORT with a format error or for an
+	 *    association that doesn't exist, it MUST silently discard it.
+	 * Becasue the length is "invalid", we can't really discard just
+	 * as we do not know its true length.  So, to be safe, discard the
+	 * packet.
+	 */
+	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
+		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+
+	/* See if we have an error cause code in the chunk.  */
+	len = ntohs(chunk->chunk_hdr->length);
+	if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr))
+		error = ((sctp_errhdr_t *)chunk->skb->data)->cause;
+
+ 	sctp_stop_t1_and_abort(commands, error);
+	return SCTP_DISPOSITION_ABORT;
+}
+
+/*
+ * Process an incoming ICMP as an ABORT.  (COOKIE-WAIT state)
+ */
+sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const sctp_subtype_t type,
+					void *arg,
+					sctp_cmd_seq_t *commands)
+{
+	sctp_stop_t1_and_abort(commands, SCTP_ERROR_NO_ERROR);
+ 	return SCTP_DISPOSITION_ABORT;
+}
+
+/*
+ * Process an ABORT.  (COOKIE-ECHOED state)
+ */
+sctp_disposition_t sctp_sf_cookie_echoed_abort(const struct sctp_endpoint *ep,
+					       const struct sctp_association *asoc,
+					       const sctp_subtype_t type,
+					       void *arg,
+					       sctp_cmd_seq_t *commands)
+{
+	/* There is a single T1 timer, so we should be able to use
+	 * common function with the COOKIE-WAIT state.
+	 */
+	return sctp_sf_cookie_wait_abort(ep, asoc, type, arg, commands);
+}
+
+/*
+ * Stop T1 timer and abort association with "INIT failed".
+ *
+ * This is common code called by several sctp_sf_*_abort() functions above.
+ */
+void sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands, __u16 error)
+{
+	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
+			SCTP_STATE(SCTP_STATE_CLOSED));
+	SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
+	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
+			SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT));
+	/* CMD_INIT_FAILED will DELETE_TCB. */
+	sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED,
+			SCTP_U32(error));
+}
+
+/*
+ * sctp_sf_do_9_2_shut
+ *
+ * Section: 9.2
+ * Upon the reception of the SHUTDOWN, the peer endpoint shall
+ *  - enter the SHUTDOWN-RECEIVED state,
+ *
+ *  - stop accepting new data from its SCTP user
+ *
+ *  - verify, by checking the Cumulative TSN Ack field of the chunk,
+ *    that all its outstanding DATA chunks have been received by the
+ *    SHUTDOWN sender.
+ *
+ * Once an endpoint as reached the SHUTDOWN-RECEIVED state it MUST NOT
+ * send a SHUTDOWN in response to a ULP request. And should discard
+ * subsequent SHUTDOWN chunks.
+ *
+ * If there are still outstanding DATA chunks left, the SHUTDOWN
+ * receiver shall continue to follow normal data transmission
+ * procedures defined in Section 6 until all outstanding DATA chunks
+ * are acknowledged; however, the SHUTDOWN receiver MUST NOT accept
+ * new data from its SCTP user.
+ *
+ * Verification Tag:  8.5 Verification Tag [Normal verification]
+ *
+ * Inputs
+ * (endpoint, asoc, chunk)
+ *
+ * Outputs
+ * (asoc, reply_msg, msg_up, timers, counters)
+ *
+ * The return value is the disposition of the chunk.
+ */
+sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const sctp_subtype_t type,
+					   void *arg,
+					   sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *chunk = arg;
+	sctp_shutdownhdr_t *sdh;
+	sctp_disposition_t disposition;
+	struct sctp_ulpevent *ev;
+
+	if (!sctp_vtag_verify(chunk, asoc))
+		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+
+	/* Make sure that the SHUTDOWN chunk has a valid length. */
+	if (!sctp_chunk_length_valid(chunk,
+				      sizeof(struct sctp_shutdown_chunk_t)))
+		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+						  commands);
+
+	/* Convert the elaborate header.  */
+	sdh = (sctp_shutdownhdr_t *)chunk->skb->data;
+	skb_pull(chunk->skb, sizeof(sctp_shutdownhdr_t));
+	chunk->subh.shutdown_hdr = sdh;
+
+	/* Upon the reception of the SHUTDOWN, the peer endpoint shall
+	 *  - enter the SHUTDOWN-RECEIVED state,
+	 *  - stop accepting new data from its SCTP user
+	 *
+	 * [This is implicit in the new state.]
+	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
+			SCTP_STATE(SCTP_STATE_SHUTDOWN_RECEIVED));
+	disposition = SCTP_DISPOSITION_CONSUME;
+
+	if (sctp_outq_is_empty(&asoc->outqueue)) {
+		disposition = sctp_sf_do_9_2_shutdown_ack(ep, asoc, type,
+							  arg, commands);
+	}
+
+	if (SCTP_DISPOSITION_NOMEM == disposition)
+		goto out;
+
+	/*  - verify, by checking the Cumulative TSN Ack field of the
+	 *    chunk, that all its outstanding DATA chunks have been
+	 *    received by the SHUTDOWN sender.
+	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_CTSN,
+			SCTP_U32(chunk->subh.shutdown_hdr->cum_tsn_ack));
+
+	/* API 5.3.1.5 SCTP_SHUTDOWN_EVENT
+	 * When a peer sends a SHUTDOWN, SCTP delivers this notification to
+	 * inform the application that it should cease sending data.
+	 */
+	ev = sctp_ulpevent_make_shutdown_event(asoc, 0, GFP_ATOMIC);
+	if (!ev) {
+		disposition = SCTP_DISPOSITION_NOMEM;
+		goto out;	
+	}
+	sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
+
+out:
+	return disposition;
+}
+
+/* RFC 2960 9.2
+ * If an endpoint is in SHUTDOWN-ACK-SENT state and receives an INIT chunk
+ * (e.g., if the SHUTDOWN COMPLETE was lost) with source and destination
+ * transport addresses (either in the IP addresses or in the INIT chunk)
+ * that belong to this association, it should discard the INIT chunk and
+ * retransmit the SHUTDOWN ACK chunk.
+ */
+sctp_disposition_t sctp_sf_do_9_2_reshutack(const struct sctp_endpoint *ep,
+				    const struct sctp_association *asoc,
+				    const sctp_subtype_t type,
+				    void *arg,
+				    sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *chunk = (struct sctp_chunk *) arg;
+	struct sctp_chunk *reply;
+
+	/* Since we are not going to really process this INIT, there
+	 * is no point in verifying chunk boundries.  Just generate
+	 * the SHUTDOWN ACK.
+	 */
+	reply = sctp_make_shutdown_ack(asoc, chunk);
+	if (NULL == reply)
+		goto nomem;
+
+	/* Set the transport for the SHUTDOWN ACK chunk and the timeout for
+	 * the T2-SHUTDOWN timer.
+	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_SETUP_T2, SCTP_CHUNK(reply));
+
+	/* and restart the T2-shutdown timer. */
+	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
+			SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN));
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply));
+
+	return SCTP_DISPOSITION_CONSUME;
+nomem:
+	return SCTP_DISPOSITION_NOMEM;
+}
+
+/*
+ * sctp_sf_do_ecn_cwr
+ *
+ * Section:  Appendix A: Explicit Congestion Notification
+ *
+ * CWR:
+ *
+ * RFC 2481 details a specific bit for a sender to send in the header of
+ * its next outbound TCP segment to indicate to its peer that it has
+ * reduced its congestion window.  This is termed the CWR bit.  For
+ * SCTP the same indication is made by including the CWR chunk.
+ * This chunk contains one data element, i.e. the TSN number that
+ * was sent in the ECNE chunk.  This element represents the lowest
+ * TSN number in the datagram that was originally marked with the
+ * CE bit.
+ *
+ * Verification Tag: 8.5 Verification Tag [Normal verification]
+ * Inputs
+ * (endpoint, asoc, chunk)
+ *
+ * Outputs
+ * (asoc, reply_msg, msg_up, timers, counters)
+ *
+ * The return value is the disposition of the chunk.
+ */
+sctp_disposition_t sctp_sf_do_ecn_cwr(const struct sctp_endpoint *ep,
+				      const struct sctp_association *asoc,
+				      const sctp_subtype_t type,
+				      void *arg,
+				      sctp_cmd_seq_t *commands)
+{
+	sctp_cwrhdr_t *cwr;
+	struct sctp_chunk *chunk = arg;
+
+	if (!sctp_vtag_verify(chunk, asoc))
+		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+
+	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_ecne_chunk_t)))
+		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+						  commands);
+		
+	cwr = (sctp_cwrhdr_t *) chunk->skb->data;
+	skb_pull(chunk->skb, sizeof(sctp_cwrhdr_t));
+
+	cwr->lowest_tsn = ntohl(cwr->lowest_tsn);
+
+	/* Does this CWR ack the last sent congestion notification? */
+	if (TSN_lte(asoc->last_ecne_tsn, cwr->lowest_tsn)) {
+		/* Stop sending ECNE. */
+		sctp_add_cmd_sf(commands,
+				SCTP_CMD_ECN_CWR,
+				SCTP_U32(cwr->lowest_tsn));
+	}
+	return SCTP_DISPOSITION_CONSUME;
+}
+
+/*
+ * sctp_sf_do_ecne
+ *
+ * Section:  Appendix A: Explicit Congestion Notification
+ *
+ * ECN-Echo
+ *
+ * RFC 2481 details a specific bit for a receiver to send back in its
+ * TCP acknowledgements to notify the sender of the Congestion
+ * Experienced (CE) bit having arrived from the network.  For SCTP this
+ * same indication is made by including the ECNE chunk.  This chunk
+ * contains one data element, i.e. the lowest TSN associated with the IP
+ * datagram marked with the CE bit.....
+ *
+ * Verification Tag: 8.5 Verification Tag [Normal verification]
+ * Inputs
+ * (endpoint, asoc, chunk)
+ *
+ * Outputs
+ * (asoc, reply_msg, msg_up, timers, counters)
+ *
+ * The return value is the disposition of the chunk.
+ */
+sctp_disposition_t sctp_sf_do_ecne(const struct sctp_endpoint *ep,
+				   const struct sctp_association *asoc,
+				   const sctp_subtype_t type,
+				   void *arg,
+				   sctp_cmd_seq_t *commands)
+{
+	sctp_ecnehdr_t *ecne;
+	struct sctp_chunk *chunk = arg;
+
+	if (!sctp_vtag_verify(chunk, asoc))
+		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+
+	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_ecne_chunk_t)))
+		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+						  commands);
+
+	ecne = (sctp_ecnehdr_t *) chunk->skb->data;
+	skb_pull(chunk->skb, sizeof(sctp_ecnehdr_t));
+
+	/* If this is a newer ECNE than the last CWR packet we sent out */
+	sctp_add_cmd_sf(commands, SCTP_CMD_ECN_ECNE,
+			SCTP_U32(ntohl(ecne->lowest_tsn)));
+
+	return SCTP_DISPOSITION_CONSUME;
+}
+
+/*
+ * Section: 6.2  Acknowledgement on Reception of DATA Chunks
+ *
+ * The SCTP endpoint MUST always acknowledge the reception of each valid
+ * DATA chunk.
+ *
+ * The guidelines on delayed acknowledgement algorithm specified in
+ * Section 4.2 of [RFC2581] SHOULD be followed. Specifically, an
+ * acknowledgement SHOULD be generated for at least every second packet
+ * (not every second DATA chunk) received, and SHOULD be generated within
+ * 200 ms of the arrival of any unacknowledged DATA chunk. In some
+ * situations it may be beneficial for an SCTP transmitter to be more
+ * conservative than the algorithms detailed in this document allow.
+ * However, an SCTP transmitter MUST NOT be more aggressive than the
+ * following algorithms allow.
+ *
+ * A SCTP receiver MUST NOT generate more than one SACK for every
+ * incoming packet, other than to update the offered window as the
+ * receiving application consumes new data.
+ *
+ * Verification Tag:  8.5 Verification Tag [Normal verification]
+ *
+ * Inputs
+ * (endpoint, asoc, chunk)
+ *
+ * Outputs
+ * (asoc, reply_msg, msg_up, timers, counters)
+ *
+ * The return value is the disposition of the chunk.
+ */
+sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const sctp_subtype_t type,
+					void *arg,
+					sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *chunk = arg;
+	int error;
+
+	if (!sctp_vtag_verify(chunk, asoc)) {
+		sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
+				SCTP_NULL());
+		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+        }
+
+	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_data_chunk_t)))
+		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+						  commands);
+
+	error = sctp_eat_data(asoc, chunk, commands );
+	switch (error) {
+	case SCTP_IERROR_NO_ERROR:
+		break;
+	case SCTP_IERROR_HIGH_TSN:
+	case SCTP_IERROR_BAD_STREAM:
+		goto discard_noforce;
+	case SCTP_IERROR_DUP_TSN:
+	case SCTP_IERROR_IGNORE_TSN:
+		goto discard_force;
+	case SCTP_IERROR_NO_DATA:
+		goto consume;
+	default:
+		BUG();
+	}
+
+	if (asoc->autoclose) {
+		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
+				SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE));
+	}
+
+	/* If this is the last chunk in a packet, we need to count it
+	 * toward sack generation.  Note that we need to SACK every
+	 * OTHER packet containing data chunks, EVEN IF WE DISCARD
+	 * THEM.  We elect to NOT generate SACK's if the chunk fails
+	 * the verification tag test.
+	 *
+	 * RFC 2960 6.2 Acknowledgement on Reception of DATA Chunks
+	 *
+	 * The SCTP endpoint MUST always acknowledge the reception of
+	 * each valid DATA chunk.
+	 *
+	 * The guidelines on delayed acknowledgement algorithm
+	 * specified in  Section 4.2 of [RFC2581] SHOULD be followed.
+	 * Specifically, an acknowledgement SHOULD be generated for at
+	 * least every second packet (not every second DATA chunk)
+	 * received, and SHOULD be generated within 200 ms of the
+	 * arrival of any unacknowledged DATA chunk.  In some
+	 * situations it may be beneficial for an SCTP transmitter to
+	 * be more conservative than the algorithms detailed in this
+	 * document allow. However, an SCTP transmitter MUST NOT be
+	 * more aggressive than the following algorithms allow.
+	 */
+	if (chunk->end_of_packet) {
+		sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_NOFORCE());
+
+		/* Start the SACK timer.  */
+		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
+				SCTP_TO(SCTP_EVENT_TIMEOUT_SACK));
+	}
+
+	return SCTP_DISPOSITION_CONSUME;
+
+discard_force:
+	/* RFC 2960 6.2 Acknowledgement on Reception of DATA Chunks
+	 *
+	 * When a packet arrives with duplicate DATA chunk(s) and with
+	 * no new DATA chunk(s), the endpoint MUST immediately send a
+	 * SACK with no delay.  If a packet arrives with duplicate
+	 * DATA chunk(s) bundled with new DATA chunks, the endpoint
+	 * MAY immediately send a SACK.  Normally receipt of duplicate
+	 * DATA chunks will occur when the original SACK chunk was lost
+	 * and the peer's RTO has expired.  The duplicate TSN number(s)
+	 * SHOULD be reported in the SACK as duplicate.
+	 */
+	/* In our case, we split the MAY SACK advice up whether or not
+	 * the last chunk is a duplicate.'
+	 */
+	if (chunk->end_of_packet)
+		sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_FORCE());
+	return SCTP_DISPOSITION_DISCARD;
+
+discard_noforce:
+	if (chunk->end_of_packet) {
+		sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_NOFORCE());
+
+		/* Start the SACK timer.  */
+		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
+				SCTP_TO(SCTP_EVENT_TIMEOUT_SACK));
+	}
+	return SCTP_DISPOSITION_DISCARD;
+consume:
+	return SCTP_DISPOSITION_CONSUME;
+	
+}
+
+/*
+ * sctp_sf_eat_data_fast_4_4
+ *
+ * Section: 4 (4)
+ * (4) In SHUTDOWN-SENT state the endpoint MUST acknowledge any received
+ *    DATA chunks without delay.
+ *
+ * Verification Tag:  8.5 Verification Tag [Normal verification]
+ * Inputs
+ * (endpoint, asoc, chunk)
+ *
+ * Outputs
+ * (asoc, reply_msg, msg_up, timers, counters)
+ *
+ * The return value is the disposition of the chunk.
+ */
+sctp_disposition_t sctp_sf_eat_data_fast_4_4(const struct sctp_endpoint *ep,
+				     const struct sctp_association *asoc,
+				     const sctp_subtype_t type,
+				     void *arg,
+				     sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *chunk = arg;
+	int error;
+
+	if (!sctp_vtag_verify(chunk, asoc)) {
+		sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
+				SCTP_NULL());
+		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+	}
+
+	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_data_chunk_t)))
+		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+						  commands);
+
+	error = sctp_eat_data(asoc, chunk, commands );
+	switch (error) {
+	case SCTP_IERROR_NO_ERROR:
+	case SCTP_IERROR_HIGH_TSN:
+	case SCTP_IERROR_DUP_TSN:
+	case SCTP_IERROR_IGNORE_TSN:
+	case SCTP_IERROR_BAD_STREAM:
+		break;
+	case SCTP_IERROR_NO_DATA:
+		goto consume;
+	default:
+		BUG();
+	}
+
+	/* Go a head and force a SACK, since we are shutting down. */
+
+	/* Implementor's Guide.
+	 *
+	 * While in SHUTDOWN-SENT state, the SHUTDOWN sender MUST immediately
+	 * respond to each received packet containing one or more DATA chunk(s)
+	 * with a SACK, a SHUTDOWN chunk, and restart the T2-shutdown timer
+	 */
+	if (chunk->end_of_packet) {
+		/* We must delay the chunk creation since the cumulative
+		 * TSN has not been updated yet.
+		 */
+		sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SHUTDOWN, SCTP_NULL());
+		sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_FORCE());
+		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
+				SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN));
+	}
+
+consume:
+	return SCTP_DISPOSITION_CONSUME;
+}
+
+/*
+ * Section: 6.2  Processing a Received SACK
+ * D) Any time a SACK arrives, the endpoint performs the following:
+ *
+ *     i) If Cumulative TSN Ack is less than the Cumulative TSN Ack Point,
+ *     then drop the SACK.   Since Cumulative TSN Ack is monotonically
+ *     increasing, a SACK whose Cumulative TSN Ack is less than the
+ *     Cumulative TSN Ack Point indicates an out-of-order SACK.
+ *
+ *     ii) Set rwnd equal to the newly received a_rwnd minus the number
+ *     of bytes still outstanding after processing the Cumulative TSN Ack
+ *     and the Gap Ack Blocks.
+ *
+ *     iii) If the SACK is missing a TSN that was previously
+ *     acknowledged via a Gap Ack Block (e.g., the data receiver
+ *     reneged on the data), then mark the corresponding DATA chunk
+ *     as available for retransmit:  Mark it as missing for fast
+ *     retransmit as described in Section 7.2.4 and if no retransmit
+ *     timer is running for the destination address to which the DATA
+ *     chunk was originally transmitted, then T3-rtx is started for
+ *     that destination address.
+ *
+ * Verification Tag:  8.5 Verification Tag [Normal verification]
+ *
+ * Inputs
+ * (endpoint, asoc, chunk)
+ *
+ * Outputs
+ * (asoc, reply_msg, msg_up, timers, counters)
+ *
+ * The return value is the disposition of the chunk.
+ */
+sctp_disposition_t sctp_sf_eat_sack_6_2(const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const sctp_subtype_t type,
+					void *arg,
+					sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *chunk = arg;
+	sctp_sackhdr_t *sackh;
+	__u32 ctsn;
+
+	if (!sctp_vtag_verify(chunk, asoc))
+		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+
+	/* Make sure that the SACK chunk has a valid length. */
+	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_sack_chunk_t)))
+		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+						  commands);
+
+	/* Pull the SACK chunk from the data buffer */
+	sackh = sctp_sm_pull_sack(chunk);
+	/* Was this a bogus SACK? */
+	if (!sackh)
+		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+	chunk->subh.sack_hdr = sackh;
+	ctsn = ntohl(sackh->cum_tsn_ack);
+
+	/* i) If Cumulative TSN Ack is less than the Cumulative TSN
+	 *     Ack Point, then drop the SACK.  Since Cumulative TSN
+	 *     Ack is monotonically increasing, a SACK whose
+	 *     Cumulative TSN Ack is less than the Cumulative TSN Ack
+	 *     Point indicates an out-of-order SACK.
+	 */
+	if (TSN_lt(ctsn, asoc->ctsn_ack_point)) {
+		SCTP_DEBUG_PRINTK("ctsn %x\n", ctsn);
+		SCTP_DEBUG_PRINTK("ctsn_ack_point %x\n", asoc->ctsn_ack_point);
+		return SCTP_DISPOSITION_DISCARD;
+	}
+
+	/* Return this SACK for further processing.  */
+	sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_SACK, SCTP_SACKH(sackh));
+
+	/* Note: We do the rest of the work on the PROCESS_SACK
+	 * sideeffect.
+	 */
+	return SCTP_DISPOSITION_CONSUME;
+}
+
+/*
+ * Generate an ABORT in response to a packet.
+ *
+ * Section: 8.4 Handle "Out of the blue" Packets
+ *
+ * 8) The receiver should respond to the sender of the OOTB packet
+ *    with an ABORT.  When sending the ABORT, the receiver of the
+ *    OOTB packet MUST fill in the Verification Tag field of the
+ *    outbound packet with the value found in the Verification Tag
+ *    field of the OOTB packet and set the T-bit in the Chunk Flags
+ *    to indicate that no TCB was found.  After sending this ABORT,
+ *    the receiver of the OOTB packet shall discard the OOTB packet
+ *    and take no further action.
+ *
+ * Verification Tag:
+ *
+ * The return value is the disposition of the chunk.
+*/
+sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const sctp_subtype_t type,
+					void *arg,
+					sctp_cmd_seq_t *commands)
+{
+	struct sctp_packet *packet = NULL;
+	struct sctp_chunk *chunk = arg;
+	struct sctp_chunk *abort;
+
+	packet = sctp_ootb_pkt_new(asoc, chunk);
+
+	if (packet) {
+		/* Make an ABORT. The T bit will be set if the asoc
+		 * is NULL.
+		 */
+        	abort = sctp_make_abort(asoc, chunk, 0);
+		if (!abort) {
+			sctp_ootb_pkt_free(packet);
+			return SCTP_DISPOSITION_NOMEM;
+		}
+
+		/* Set the skb to the belonging sock for accounting.  */
+		abort->skb->sk = ep->base.sk;
+
+		sctp_packet_append_chunk(packet, abort);
+
+		sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT,
+				SCTP_PACKET(packet));
+
+		SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+
+		return SCTP_DISPOSITION_CONSUME;
+	}
+
+	return SCTP_DISPOSITION_NOMEM;
+}
+
+/*
+ * Received an ERROR chunk from peer.  Generate SCTP_REMOTE_ERROR
+ * event as ULP notification for each cause included in the chunk.
+ *
+ * API 5.3.1.3 - SCTP_REMOTE_ERROR
+ *
+ * The return value is the disposition of the chunk.
+*/
+sctp_disposition_t sctp_sf_operr_notify(const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const sctp_subtype_t type,
+					void *arg,
+					sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *chunk = arg;
+	struct sctp_ulpevent *ev;
+
+	if (!sctp_vtag_verify(chunk, asoc))
+		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+
+	/* Make sure that the ERROR chunk has a valid length. */
+	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_operr_chunk_t)))
+		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+						  commands);
+
+	while (chunk->chunk_end > chunk->skb->data) {
+		ev = sctp_ulpevent_make_remote_error(asoc, chunk, 0,
+						     GFP_ATOMIC);
+		if (!ev)
+			goto nomem;
+
+		if (!sctp_add_cmd(commands, SCTP_CMD_EVENT_ULP,
+				  SCTP_ULPEVENT(ev))) {
+			sctp_ulpevent_free(ev);
+			goto nomem;
+		}
+
+		sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_OPERR,
+				SCTP_CHUNK(chunk));	
+	}
+	return SCTP_DISPOSITION_CONSUME;
+
+nomem:
+	return SCTP_DISPOSITION_NOMEM;
+}
+
+/*
+ * Process an inbound SHUTDOWN ACK.
+ *
+ * From Section 9.2:
+ * Upon the receipt of the SHUTDOWN ACK, the SHUTDOWN sender shall
+ * stop the T2-shutdown timer, send a SHUTDOWN COMPLETE chunk to its
+ * peer, and remove all record of the association.
+ *
+ * The return value is the disposition.
+ */
+sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const sctp_subtype_t type,
+					void *arg,
+					sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *chunk = arg;
+	struct sctp_chunk *reply;
+	struct sctp_ulpevent *ev;
+
+	if (!sctp_vtag_verify(chunk, asoc))
+		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+
+	/* Make sure that the SHUTDOWN_ACK chunk has a valid length. */
+	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+						  commands);
+
+	/* 10.2 H) SHUTDOWN COMPLETE notification
+	 *
+	 * When SCTP completes the shutdown procedures (section 9.2) this
+	 * notification is passed to the upper layer.
+	 */
+	ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_SHUTDOWN_COMP,
+					     0, 0, 0, GFP_ATOMIC);
+	if (!ev)
+		goto nomem;
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
+
+	/* Upon the receipt of the SHUTDOWN ACK, the SHUTDOWN sender shall
+	 * stop the T2-shutdown timer,
+	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
+			SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN));
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
+			SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
+
+	/* ...send a SHUTDOWN COMPLETE chunk to its peer, */
+	reply = sctp_make_shutdown_complete(asoc, chunk);
+	if (!reply)
+		goto nomem;
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
+			SCTP_STATE(SCTP_STATE_CLOSED));
+	SCTP_INC_STATS(SCTP_MIB_SHUTDOWNS);
+	SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply));
+
+	/* ...and remove all record of the association. */
+	sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL());
+	return SCTP_DISPOSITION_DELETE_TCB;
+
+nomem:
+	return SCTP_DISPOSITION_NOMEM;
+}
+
+/*
+ * RFC 2960, 8.4 - Handle "Out of the blue" Packets
+ * 5) If the packet contains a SHUTDOWN ACK chunk, the receiver should
+ *    respond to the sender of the OOTB packet with a SHUTDOWN COMPLETE.
+ *    When sending the SHUTDOWN COMPLETE, the receiver of the OOTB
+ *    packet must fill in the Verification Tag field of the outbound
+ *    packet with the Verification Tag received in the SHUTDOWN ACK and
+ *    set the T-bit in the Chunk Flags to indicate that no TCB was
+ *    found. Otherwise,
+ *
+ * 8) The receiver should respond to the sender of the OOTB packet with
+ *    an ABORT.  When sending the ABORT, the receiver of the OOTB packet
+ *    MUST fill in the Verification Tag field of the outbound packet
+ *    with the value found in the Verification Tag field of the OOTB
+ *    packet and set the T-bit in the Chunk Flags to indicate that no
+ *    TCB was found.  After sending this ABORT, the receiver of the OOTB
+ *    packet shall discard the OOTB packet and take no further action.
+ */
+sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep,
+				const struct sctp_association *asoc,
+				const sctp_subtype_t type,
+				void *arg,
+				sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *chunk = arg;
+	struct sk_buff *skb = chunk->skb;
+	sctp_chunkhdr_t *ch;
+	__u8 *ch_end;
+	int ootb_shut_ack = 0;
+
+	SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES);
+
+	ch = (sctp_chunkhdr_t *) chunk->chunk_hdr;
+	do {
+		/* Break out if chunk length is less then minimal. */
+		if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t))
+			break;
+
+		ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
+
+		if (SCTP_CID_SHUTDOWN_ACK == ch->type)
+			ootb_shut_ack = 1;
+
+		/* RFC 2960, Section 3.3.7
+		 *   Moreover, under any circumstances, an endpoint that
+		 *   receives an ABORT  MUST NOT respond to that ABORT by
+		 *   sending an ABORT of its own.
+		 */
+		if (SCTP_CID_ABORT == ch->type)
+			return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+			
+		ch = (sctp_chunkhdr_t *) ch_end;
+	} while (ch_end < skb->tail);
+
+	if (ootb_shut_ack)
+		sctp_sf_shut_8_4_5(ep, asoc, type, arg, commands);
+	else
+		sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
+
+	return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+}
+
+/*
+ * Handle an "Out of the blue" SHUTDOWN ACK.
+ *
+ * Section: 8.4 5)
+ * 5) If the packet contains a SHUTDOWN ACK chunk, the receiver should
+ *   respond to the sender of the OOTB packet with a SHUTDOWN COMPLETE.
+ *   When sending the SHUTDOWN COMPLETE, the receiver of the OOTB packet
+ *   must fill in the Verification Tag field of the outbound packet with
+ *   the Verification Tag received in the SHUTDOWN ACK and set the
+ *   T-bit in the Chunk Flags to indicate that no TCB was found.
+ *
+ * Inputs
+ * (endpoint, asoc, type, arg, commands)
+ *
+ * Outputs
+ * (sctp_disposition_t)
+ *
+ * The return value is the disposition of the chunk.
+ */
+static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep,
+					     const struct sctp_association *asoc,
+					     const sctp_subtype_t type,
+					     void *arg,
+					     sctp_cmd_seq_t *commands)
+{
+	struct sctp_packet *packet = NULL;
+	struct sctp_chunk *chunk = arg;
+	struct sctp_chunk *shut;
+
+	packet = sctp_ootb_pkt_new(asoc, chunk);
+
+	if (packet) {
+		/* Make an SHUTDOWN_COMPLETE.
+         	 * The T bit will be set if the asoc is NULL.
+         	 */
+		shut = sctp_make_shutdown_complete(asoc, chunk);
+		if (!shut) {
+			sctp_ootb_pkt_free(packet);
+			return SCTP_DISPOSITION_NOMEM;
+		}
+
+		/* Set the skb to the belonging sock for accounting.  */
+		shut->skb->sk = ep->base.sk;
+
+		sctp_packet_append_chunk(packet, shut);
+
+		sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT,
+				SCTP_PACKET(packet));
+
+		SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+
+		/* If the chunk length is invalid, we don't want to process
+		 * the reset of the packet.
+		 */
+		if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+			return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+
+		return SCTP_DISPOSITION_CONSUME;
+	}
+
+	return SCTP_DISPOSITION_NOMEM;
+}
+
+/*
+ * Handle SHUTDOWN ACK in COOKIE_ECHOED or COOKIE_WAIT state.
+ *
+ * Verification Tag:  8.5.1 E) Rules for packet carrying a SHUTDOWN ACK
+ *   If the receiver is in COOKIE-ECHOED or COOKIE-WAIT state the
+ *   procedures in section 8.4 SHOULD be followed, in other words it
+ *   should be treated as an Out Of The Blue packet.
+ *   [This means that we do NOT check the Verification Tag on these
+ *   chunks. --piggy ]
+ *
+ */
+sctp_disposition_t sctp_sf_do_8_5_1_E_sa(const struct sctp_endpoint *ep,
+				      const struct sctp_association *asoc,
+				      const sctp_subtype_t type,
+				      void *arg,
+				      sctp_cmd_seq_t *commands)
+{
+	/* Although we do have an association in this case, it corresponds
+	 * to a restarted association. So the packet is treated as an OOTB
+	 * packet and the state function that handles OOTB SHUTDOWN_ACK is
+	 * called with a NULL association.
+	 */
+	return sctp_sf_shut_8_4_5(ep, NULL, type, arg, commands);
+}
+
+/* ADDIP Section 4.2 Upon reception of an ASCONF Chunk.  */
+sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep,
+				     const struct sctp_association *asoc,
+				     const sctp_subtype_t type, void *arg,
+				     sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk	*chunk = arg;
+	struct sctp_chunk	*asconf_ack = NULL;
+	sctp_addiphdr_t		*hdr;
+	__u32			serial;
+
+	if (!sctp_vtag_verify(chunk, asoc)) {
+		sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
+				SCTP_NULL());
+		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+	}
+
+	/* Make sure that the ASCONF ADDIP chunk has a valid length.  */
+	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_addip_chunk_t)))
+		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+						  commands);
+
+	hdr = (sctp_addiphdr_t *)chunk->skb->data;
+	serial = ntohl(hdr->serial);
+
+	/* ADDIP 4.2 C1) Compare the value of the serial number to the value
+	 * the endpoint stored in a new association variable
+	 * 'Peer-Serial-Number'. 
+	 */
+	if (serial == asoc->peer.addip_serial + 1) {
+   		/* ADDIP 4.2 C2) If the value found in the serial number is
+		 * equal to the ('Peer-Serial-Number' + 1), the endpoint MUST
+		 * do V1-V5.
+		 */
+		asconf_ack = sctp_process_asconf((struct sctp_association *)
+						 asoc, chunk);
+		if (!asconf_ack)
+			return SCTP_DISPOSITION_NOMEM;
+	} else if (serial == asoc->peer.addip_serial) {
+		/* ADDIP 4.2 C3) If the value found in the serial number is
+		 * equal to the value stored in the 'Peer-Serial-Number'
+		 * IMPLEMENTATION NOTE: As an optimization a receiver may wish
+		 * to save the last ASCONF-ACK for some predetermined period of
+		 * time and instead of re-processing the ASCONF (with the same
+		 * serial number) it may just re-transmit the ASCONF-ACK.
+		 */
+		if (asoc->addip_last_asconf_ack)
+			asconf_ack = asoc->addip_last_asconf_ack;
+		else
+			return SCTP_DISPOSITION_DISCARD;
+	} else {
+		/* ADDIP 4.2 C4) Otherwise, the ASCONF Chunk is discarded since 
+		 * it must be either a stale packet or from an attacker.
+		 */	
+		return SCTP_DISPOSITION_DISCARD;
+	}
+
+	/* ADDIP 4.2 C5) In both cases C2 and C3 the ASCONF-ACK MUST be sent
+	 * back to the source address contained in the IP header of the ASCONF
+	 * being responded to.
+	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(asconf_ack));
+	
+	return SCTP_DISPOSITION_CONSUME;
+}
+
+/*
+ * ADDIP Section 4.3 General rules for address manipulation
+ * When building TLV parameters for the ASCONF Chunk that will add or
+ * delete IP addresses the D0 to D13 rules should be applied:
+ */
+sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep,
+					 const struct sctp_association *asoc,
+	 				 const sctp_subtype_t type, void *arg,
+					 sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk	*asconf_ack = arg;
+	struct sctp_chunk	*last_asconf = asoc->addip_last_asconf;
+	struct sctp_chunk	*abort;
+	sctp_addiphdr_t		*addip_hdr;
+	__u32			sent_serial, rcvd_serial;
+
+	if (!sctp_vtag_verify(asconf_ack, asoc)) {
+		sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
+				SCTP_NULL());
+		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+	}
+
+	/* Make sure that the ADDIP chunk has a valid length.  */
+	if (!sctp_chunk_length_valid(asconf_ack, sizeof(sctp_addip_chunk_t)))
+		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+						  commands);
+
+	addip_hdr = (sctp_addiphdr_t *)asconf_ack->skb->data;
+	rcvd_serial = ntohl(addip_hdr->serial);
+
+	if (last_asconf) {
+		addip_hdr = (sctp_addiphdr_t *)last_asconf->subh.addip_hdr;
+		sent_serial = ntohl(addip_hdr->serial);
+	} else {
+		sent_serial = asoc->addip_serial - 1;
+	}
+
+	/* D0) If an endpoint receives an ASCONF-ACK that is greater than or
+	 * equal to the next serial number to be used but no ASCONF chunk is
+	 * outstanding the endpoint MUST ABORT the association. Note that a
+	 * sequence number is greater than if it is no more than 2^^31-1
+	 * larger than the current sequence number (using serial arithmetic).
+	 */
+	if (ADDIP_SERIAL_gte(rcvd_serial, sent_serial + 1) &&
+	    !(asoc->addip_last_asconf)) {
+		abort = sctp_make_abort(asoc, asconf_ack,
+					sizeof(sctp_errhdr_t));
+		if (abort) {
+			sctp_init_cause(abort, SCTP_ERROR_ASCONF_ACK, NULL, 0);
+			sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
+					SCTP_CHUNK(abort));
+		}
+		/* We are going to ABORT, so we might as well stop
+		 * processing the rest of the chunks in the packet.
+		 */
+		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
+				SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO));
+		sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET,SCTP_NULL());
+		sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
+				SCTP_U32(SCTP_ERROR_ASCONF_ACK));
+		SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
+		SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+		return SCTP_DISPOSITION_ABORT;
+	}
+
+	if ((rcvd_serial == sent_serial) && asoc->addip_last_asconf) {
+		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
+				SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO));
+
+		if (!sctp_process_asconf_ack((struct sctp_association *)asoc,
+					     asconf_ack))
+			return SCTP_DISPOSITION_CONSUME;
+
+		abort = sctp_make_abort(asoc, asconf_ack,
+					sizeof(sctp_errhdr_t));
+		if (abort) {
+			sctp_init_cause(abort, SCTP_ERROR_RSRC_LOW, NULL, 0);
+			sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
+					SCTP_CHUNK(abort));
+		}
+		/* We are going to ABORT, so we might as well stop
+		 * processing the rest of the chunks in the packet.
+		 */
+		sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET,SCTP_NULL());
+		sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
+				SCTP_U32(SCTP_ERROR_ASCONF_ACK));
+		SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
+		SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+		return SCTP_DISPOSITION_ABORT;
+	}
+
+	return SCTP_DISPOSITION_DISCARD;
+}
+
+/*
+ * PR-SCTP Section 3.6 Receiver Side Implementation of PR-SCTP
+ *
+ * When a FORWARD TSN chunk arrives, the data receiver MUST first update
+ * its cumulative TSN point to the value carried in the FORWARD TSN
+ * chunk, and then MUST further advance its cumulative TSN point locally
+ * if possible.
+ * After the above processing, the data receiver MUST stop reporting any
+ * missing TSNs earlier than or equal to the new cumulative TSN point.
+ *
+ * Verification Tag:  8.5 Verification Tag [Normal verification]
+ *
+ * The return value is the disposition of the chunk.
+ */
+sctp_disposition_t sctp_sf_eat_fwd_tsn(const struct sctp_endpoint *ep,
+				       const struct sctp_association *asoc,
+				       const sctp_subtype_t type,
+				       void *arg,
+				       sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *chunk = arg;
+	struct sctp_fwdtsn_hdr *fwdtsn_hdr;
+	__u16 len;
+	__u32 tsn;
+
+	if (!sctp_vtag_verify(chunk, asoc)) {
+		sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
+				SCTP_NULL());
+		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+	}
+
+	/* Make sure that the FORWARD_TSN chunk has valid length.  */
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_fwdtsn_chunk)))
+		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+						  commands);
+
+	fwdtsn_hdr = (struct sctp_fwdtsn_hdr *)chunk->skb->data;
+	chunk->subh.fwdtsn_hdr = fwdtsn_hdr;
+	len = ntohs(chunk->chunk_hdr->length);
+	len -= sizeof(struct sctp_chunkhdr);
+	skb_pull(chunk->skb, len);
+
+	tsn = ntohl(fwdtsn_hdr->new_cum_tsn);
+	SCTP_DEBUG_PRINTK("%s: TSN 0x%x.\n", __FUNCTION__, tsn);
+
+	/* The TSN is too high--silently discard the chunk and count on it
+	 * getting retransmitted later.
+	 */
+	if (sctp_tsnmap_check(&asoc->peer.tsn_map, tsn) < 0)
+		goto discard_noforce;
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_FWDTSN, SCTP_U32(tsn));
+	if (len > sizeof(struct sctp_fwdtsn_hdr))
+		sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_FWDTSN, 
+				SCTP_CHUNK(chunk));
+	
+	/* Count this as receiving DATA. */
+	if (asoc->autoclose) {
+		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
+				SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE));
+	}
+	
+	/* FIXME: For now send a SACK, but DATA processing may
+	 * send another. 
+	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_NOFORCE());
+	/* Start the SACK timer.  */
+	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
+			SCTP_TO(SCTP_EVENT_TIMEOUT_SACK));
+
+	return SCTP_DISPOSITION_CONSUME;
+
+discard_noforce:
+	return SCTP_DISPOSITION_DISCARD;
+}
+
+sctp_disposition_t sctp_sf_eat_fwd_tsn_fast(
+	const struct sctp_endpoint *ep,
+	const struct sctp_association *asoc,
+	const sctp_subtype_t type,
+	void *arg,
+	sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *chunk = arg;
+	struct sctp_fwdtsn_hdr *fwdtsn_hdr;
+	__u16 len;
+	__u32 tsn;
+
+	if (!sctp_vtag_verify(chunk, asoc)) {
+		sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
+				SCTP_NULL());
+		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+	}
+
+	/* Make sure that the FORWARD_TSN chunk has a valid length.  */
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_fwdtsn_chunk)))
+		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+						  commands);
+
+	fwdtsn_hdr = (struct sctp_fwdtsn_hdr *)chunk->skb->data;
+	chunk->subh.fwdtsn_hdr = fwdtsn_hdr;
+	len = ntohs(chunk->chunk_hdr->length);
+	len -= sizeof(struct sctp_chunkhdr);
+	skb_pull(chunk->skb, len);
+
+	tsn = ntohl(fwdtsn_hdr->new_cum_tsn);
+	SCTP_DEBUG_PRINTK("%s: TSN 0x%x.\n", __FUNCTION__, tsn);
+
+	/* The TSN is too high--silently discard the chunk and count on it
+	 * getting retransmitted later.
+	 */
+	if (sctp_tsnmap_check(&asoc->peer.tsn_map, tsn) < 0)
+		goto gen_shutdown;
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_FWDTSN, SCTP_U32(tsn));
+	if (len > sizeof(struct sctp_fwdtsn_hdr))
+		sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_FWDTSN, 
+				SCTP_CHUNK(chunk));
+	
+	/* Go a head and force a SACK, since we are shutting down. */
+gen_shutdown:
+	/* Implementor's Guide.
+	 *
+	 * While in SHUTDOWN-SENT state, the SHUTDOWN sender MUST immediately
+	 * respond to each received packet containing one or more DATA chunk(s)
+	 * with a SACK, a SHUTDOWN chunk, and restart the T2-shutdown timer
+	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SHUTDOWN, SCTP_NULL());
+	sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_FORCE());
+	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
+			SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN));
+
+        return SCTP_DISPOSITION_CONSUME;
+}
+
+/*
+ * Process an unknown chunk.
+ *
+ * Section: 3.2. Also, 2.1 in the implementor's guide.
+ *
+ * Chunk Types are encoded such that the highest-order two bits specify
+ * the action that must be taken if the processing endpoint does not
+ * recognize the Chunk Type.
+ *
+ * 00 - Stop processing this SCTP packet and discard it, do not process
+ *      any further chunks within it.
+ *
+ * 01 - Stop processing this SCTP packet and discard it, do not process
+ *      any further chunks within it, and report the unrecognized
+ *      chunk in an 'Unrecognized Chunk Type'.
+ *
+ * 10 - Skip this chunk and continue processing.
+ *
+ * 11 - Skip this chunk and continue processing, but report in an ERROR
+ *      Chunk using the 'Unrecognized Chunk Type' cause of error.
+ *
+ * The return value is the disposition of the chunk.
+ */
+sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep,
+				     const struct sctp_association *asoc,
+				     const sctp_subtype_t type,
+				     void *arg,
+				     sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *unk_chunk = arg;
+	struct sctp_chunk *err_chunk;
+	sctp_chunkhdr_t *hdr;
+
+	SCTP_DEBUG_PRINTK("Processing the unknown chunk id %d.\n", type.chunk);
+
+	if (!sctp_vtag_verify(unk_chunk, asoc))
+		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+
+	/* Make sure that the chunk has a valid length.
+	 * Since we don't know the chunk type, we use a general
+	 * chunkhdr structure to make a comparison.
+	 */
+	if (!sctp_chunk_length_valid(unk_chunk, sizeof(sctp_chunkhdr_t)))
+		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+						  commands);
+
+	switch (type.chunk & SCTP_CID_ACTION_MASK) {
+	case SCTP_CID_ACTION_DISCARD:
+		/* Discard the packet.  */
+		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		break;
+	case SCTP_CID_ACTION_DISCARD_ERR:
+		/* Discard the packet.  */
+		sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+
+		/* Generate an ERROR chunk as response. */
+		hdr = unk_chunk->chunk_hdr;
+		err_chunk = sctp_make_op_error(asoc, unk_chunk,
+					       SCTP_ERROR_UNKNOWN_CHUNK, hdr,
+					       WORD_ROUND(ntohs(hdr->length)));
+		if (err_chunk) {
+			sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
+					SCTP_CHUNK(err_chunk));
+		}
+		return SCTP_DISPOSITION_CONSUME;
+		break;
+	case SCTP_CID_ACTION_SKIP:
+		/* Skip the chunk.  */
+		return SCTP_DISPOSITION_DISCARD;
+		break;
+	case SCTP_CID_ACTION_SKIP_ERR:
+		/* Generate an ERROR chunk as response. */
+		hdr = unk_chunk->chunk_hdr;
+		err_chunk = sctp_make_op_error(asoc, unk_chunk,
+					       SCTP_ERROR_UNKNOWN_CHUNK, hdr,
+					       WORD_ROUND(ntohs(hdr->length)));
+		if (err_chunk) {
+			sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
+					SCTP_CHUNK(err_chunk));
+		}
+		/* Skip the chunk.  */
+		return SCTP_DISPOSITION_CONSUME;
+		break;
+	default:
+		break;
+	}
+
+	return SCTP_DISPOSITION_DISCARD;
+}
+
+/*
+ * Discard the chunk.
+ *
+ * Section: 0.2, 5.2.3, 5.2.5, 5.2.6, 6.0, 8.4.6, 8.5.1c, 9.2
+ * [Too numerous to mention...]
+ * Verification Tag: No verification needed.
+ * Inputs
+ * (endpoint, asoc, chunk)
+ *
+ * Outputs
+ * (asoc, reply_msg, msg_up, timers, counters)
+ *
+ * The return value is the disposition of the chunk.
+ */
+sctp_disposition_t sctp_sf_discard_chunk(const struct sctp_endpoint *ep,
+					 const struct sctp_association *asoc,
+					 const sctp_subtype_t type,
+					 void *arg,
+					 sctp_cmd_seq_t *commands)
+{
+	SCTP_DEBUG_PRINTK("Chunk %d is discarded\n", type.chunk);
+	return SCTP_DISPOSITION_DISCARD;
+}
+
+/*
+ * Discard the whole packet.
+ *
+ * Section: 8.4 2)
+ *
+ * 2) If the OOTB packet contains an ABORT chunk, the receiver MUST
+ *    silently discard the OOTB packet and take no further action.
+ *    Otherwise,
+ *
+ * Verification Tag: No verification necessary
+ *
+ * Inputs
+ * (endpoint, asoc, chunk)
+ *
+ * Outputs
+ * (asoc, reply_msg, msg_up, timers, counters)
+ *
+ * The return value is the disposition of the chunk.
+ */
+sctp_disposition_t sctp_sf_pdiscard(const struct sctp_endpoint *ep,
+				    const struct sctp_association *asoc,
+				    const sctp_subtype_t type,
+				    void *arg,
+				    sctp_cmd_seq_t *commands)
+{
+	sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL());
+
+	return SCTP_DISPOSITION_CONSUME;
+}
+
+
+/*
+ * The other end is violating protocol.
+ *
+ * Section: Not specified
+ * Verification Tag: Not specified
+ * Inputs
+ * (endpoint, asoc, chunk)
+ *
+ * Outputs
+ * (asoc, reply_msg, msg_up, timers, counters)
+ *
+ * We simply tag the chunk as a violation.  The state machine will log
+ * the violation and continue.
+ */
+sctp_disposition_t sctp_sf_violation(const struct sctp_endpoint *ep,
+				     const struct sctp_association *asoc,
+				     const sctp_subtype_t type,
+				     void *arg,
+				     sctp_cmd_seq_t *commands)
+{
+	return SCTP_DISPOSITION_VIOLATION;
+}
+
+
+/*
+ * Handle a protocol violation when the chunk length is invalid.
+ * "Invalid" length is identified as smaller then the minimal length a
+ * given chunk can be.  For example, a SACK chunk has invalid length
+ * if it's length is set to be smaller then the size of sctp_sack_chunk_t.
+ *
+ * We inform the other end by sending an ABORT with a Protocol Violation
+ * error code. 
+ *
+ * Section: Not specified
+ * Verification Tag:  Nothing to do
+ * Inputs
+ * (endpoint, asoc, chunk)
+ *
+ * Outputs
+ * (reply_msg, msg_up, counters)
+ *
+ * Generate an  ABORT chunk and terminate the association.
+ */
+sctp_disposition_t sctp_sf_violation_chunklen(const struct sctp_endpoint *ep,
+				     const struct sctp_association *asoc,
+				     const sctp_subtype_t type,
+				     void *arg,
+				     sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *chunk =  arg;
+	struct sctp_chunk *abort = NULL;
+	char 		   err_str[]="The following chunk had invalid length:";
+
+	/* Make the abort chunk. */
+	abort = sctp_make_abort_violation(asoc, chunk, err_str,
+					  sizeof(err_str));
+	if (!abort)
+		goto nomem;
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
+	SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+
+	if (asoc->state <= SCTP_STATE_COOKIE_ECHOED) {
+		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
+				SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT));
+		sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED,
+				SCTP_U32(SCTP_ERROR_PROTO_VIOLATION));
+	} else {
+		sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
+				SCTP_U32(SCTP_ERROR_PROTO_VIOLATION));
+		SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+	}
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL());
+
+	SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
+	
+	return SCTP_DISPOSITION_ABORT;
+
+nomem:
+	return SCTP_DISPOSITION_NOMEM;
+}
+
+/***************************************************************************
+ * These are the state functions for handling primitive (Section 10) events.
+ ***************************************************************************/
+/*
+ * sctp_sf_do_prm_asoc
+ *
+ * Section: 10.1 ULP-to-SCTP
+ * B) Associate
+ *
+ * Format: ASSOCIATE(local SCTP instance name, destination transport addr,
+ * outbound stream count)
+ * -> association id [,destination transport addr list] [,outbound stream
+ * count]
+ *
+ * This primitive allows the upper layer to initiate an association to a
+ * specific peer endpoint.
+ *
+ * The peer endpoint shall be specified by one of the transport addresses
+ * which defines the endpoint (see Section 1.4).  If the local SCTP
+ * instance has not been initialized, the ASSOCIATE is considered an
+ * error.
+ * [This is not relevant for the kernel implementation since we do all
+ * initialization at boot time.  It we hadn't initialized we wouldn't
+ * get anywhere near this code.]
+ *
+ * An association id, which is a local handle to the SCTP association,
+ * will be returned on successful establishment of the association. If
+ * SCTP is not able to open an SCTP association with the peer endpoint,
+ * an error is returned.
+ * [In the kernel implementation, the struct sctp_association needs to
+ * be created BEFORE causing this primitive to run.]
+ *
+ * Other association parameters may be returned, including the
+ * complete destination transport addresses of the peer as well as the
+ * outbound stream count of the local endpoint. One of the transport
+ * address from the returned destination addresses will be selected by
+ * the local endpoint as default primary path for sending SCTP packets
+ * to this peer.  The returned "destination transport addr list" can
+ * be used by the ULP to change the default primary path or to force
+ * sending a packet to a specific transport address.  [All of this
+ * stuff happens when the INIT ACK arrives.  This is a NON-BLOCKING
+ * function.]
+ *
+ * Mandatory attributes:
+ *
+ * o local SCTP instance name - obtained from the INITIALIZE operation.
+ *   [This is the argument asoc.]
+ * o destination transport addr - specified as one of the transport
+ * addresses of the peer endpoint with which the association is to be
+ * established.
+ *  [This is asoc->peer.active_path.]
+ * o outbound stream count - the number of outbound streams the ULP
+ * would like to open towards this peer endpoint.
+ * [BUG: This is not currently implemented.]
+ * Optional attributes:
+ *
+ * None.
+ *
+ * The return value is a disposition.
+ */
+sctp_disposition_t sctp_sf_do_prm_asoc(const struct sctp_endpoint *ep,
+				       const struct sctp_association *asoc,
+				       const sctp_subtype_t type,
+				       void *arg,
+				       sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *repl;
+
+	/* The comment below says that we enter COOKIE-WAIT AFTER
+	 * sending the INIT, but that doesn't actually work in our
+	 * implementation...
+	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
+			SCTP_STATE(SCTP_STATE_COOKIE_WAIT));
+
+	/* RFC 2960 5.1 Normal Establishment of an Association
+	 *
+	 * A) "A" first sends an INIT chunk to "Z".  In the INIT, "A"
+	 * must provide its Verification Tag (Tag_A) in the Initiate
+	 * Tag field.  Tag_A SHOULD be a random number in the range of
+	 * 1 to 4294967295 (see 5.3.1 for Tag value selection). ...
+	 */
+
+	repl = sctp_make_init(asoc, &asoc->base.bind_addr, GFP_ATOMIC, 0);
+	if (!repl)
+		goto nomem;
+
+	/* Cast away the const modifier, as we want to just
+	 * rerun it through as a sideffect.
+	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC,
+			SCTP_ASOC((struct sctp_association *) asoc));
+
+	/* After sending the INIT, "A" starts the T1-init timer and
+	 * enters the COOKIE-WAIT state.
+	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START,
+			SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT));
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
+	return SCTP_DISPOSITION_CONSUME;
+
+nomem:
+	return SCTP_DISPOSITION_NOMEM;
+}
+
+/*
+ * Process the SEND primitive.
+ *
+ * Section: 10.1 ULP-to-SCTP
+ * E) Send
+ *
+ * Format: SEND(association id, buffer address, byte count [,context]
+ *         [,stream id] [,life time] [,destination transport address]
+ *         [,unorder flag] [,no-bundle flag] [,payload protocol-id] )
+ * -> result
+ *
+ * This is the main method to send user data via SCTP.
+ *
+ * Mandatory attributes:
+ *
+ *  o association id - local handle to the SCTP association
+ *
+ *  o buffer address - the location where the user message to be
+ *    transmitted is stored;
+ *
+ *  o byte count - The size of the user data in number of bytes;
+ *
+ * Optional attributes:
+ *
+ *  o context - an optional 32 bit integer that will be carried in the
+ *    sending failure notification to the ULP if the transportation of
+ *    this User Message fails.
+ *
+ *  o stream id - to indicate which stream to send the data on. If not
+ *    specified, stream 0 will be used.
+ *
+ *  o life time - specifies the life time of the user data. The user data
+ *    will not be sent by SCTP after the life time expires. This
+ *    parameter can be used to avoid efforts to transmit stale
+ *    user messages. SCTP notifies the ULP if the data cannot be
+ *    initiated to transport (i.e. sent to the destination via SCTP's
+ *    send primitive) within the life time variable. However, the
+ *    user data will be transmitted if SCTP has attempted to transmit a
+ *    chunk before the life time expired.
+ *
+ *  o destination transport address - specified as one of the destination
+ *    transport addresses of the peer endpoint to which this packet
+ *    should be sent. Whenever possible, SCTP should use this destination
+ *    transport address for sending the packets, instead of the current
+ *    primary path.
+ *
+ *  o unorder flag - this flag, if present, indicates that the user
+ *    would like the data delivered in an unordered fashion to the peer
+ *    (i.e., the U flag is set to 1 on all DATA chunks carrying this
+ *    message).
+ *
+ *  o no-bundle flag - instructs SCTP not to bundle this user data with
+ *    other outbound DATA chunks. SCTP MAY still bundle even when
+ *    this flag is present, when faced with network congestion.
+ *
+ *  o payload protocol-id - A 32 bit unsigned integer that is to be
+ *    passed to the peer indicating the type of payload protocol data
+ *    being transmitted. This value is passed as opaque data by SCTP.
+ *
+ * The return value is the disposition.
+ */
+sctp_disposition_t sctp_sf_do_prm_send(const struct sctp_endpoint *ep,
+				       const struct sctp_association *asoc,
+				       const sctp_subtype_t type,
+				       void *arg,
+				       sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *chunk = arg;
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(chunk));
+	return SCTP_DISPOSITION_CONSUME;
+}
+
+/*
+ * Process the SHUTDOWN primitive.
+ *
+ * Section: 10.1:
+ * C) Shutdown
+ *
+ * Format: SHUTDOWN(association id)
+ * -> result
+ *
+ * Gracefully closes an association. Any locally queued user data
+ * will be delivered to the peer. The association will be terminated only
+ * after the peer acknowledges all the SCTP packets sent.  A success code
+ * will be returned on successful termination of the association. If
+ * attempting to terminate the association results in a failure, an error
+ * code shall be returned.
+ *
+ * Mandatory attributes:
+ *
+ *  o association id - local handle to the SCTP association
+ *
+ * Optional attributes:
+ *
+ * None.
+ *
+ * The return value is the disposition.
+ */
+sctp_disposition_t sctp_sf_do_9_2_prm_shutdown(
+	const struct sctp_endpoint *ep,
+	const struct sctp_association *asoc,
+	const sctp_subtype_t type,
+	void *arg,
+	sctp_cmd_seq_t *commands)
+{
+	int disposition;
+
+	/* From 9.2 Shutdown of an Association
+	 * Upon receipt of the SHUTDOWN primitive from its upper
+	 * layer, the endpoint enters SHUTDOWN-PENDING state and
+	 * remains there until all outstanding data has been
+	 * acknowledged by its peer. The endpoint accepts no new data
+	 * from its upper layer, but retransmits data to the far end
+	 * if necessary to fill gaps.
+	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
+			SCTP_STATE(SCTP_STATE_SHUTDOWN_PENDING));
+
+	/* sctpimpguide-05 Section 2.12.2
+	 * The sender of the SHUTDOWN MAY also start an overall guard timer
+	 * 'T5-shutdown-guard' to bound the overall time for shutdown sequence.
+	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START,
+			SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
+
+	disposition = SCTP_DISPOSITION_CONSUME;
+	if (sctp_outq_is_empty(&asoc->outqueue)) {
+		disposition = sctp_sf_do_9_2_start_shutdown(ep, asoc, type,
+							    arg, commands);
+	}
+	return disposition;
+}
+
+/*
+ * Process the ABORT primitive.
+ *
+ * Section: 10.1:
+ * C) Abort
+ *
+ * Format: Abort(association id [, cause code])
+ * -> result
+ *
+ * Ungracefully closes an association. Any locally queued user data
+ * will be discarded and an ABORT chunk is sent to the peer.  A success code
+ * will be returned on successful abortion of the association. If
+ * attempting to abort the association results in a failure, an error
+ * code shall be returned.
+ *
+ * Mandatory attributes:
+ *
+ *  o association id - local handle to the SCTP association
+ *
+ * Optional attributes:
+ *
+ *  o cause code - reason of the abort to be passed to the peer
+ *
+ * None.
+ *
+ * The return value is the disposition.
+ */
+sctp_disposition_t sctp_sf_do_9_1_prm_abort(
+	const struct sctp_endpoint *ep,
+	const struct sctp_association *asoc,
+	const sctp_subtype_t type,
+	void *arg,
+	sctp_cmd_seq_t *commands)
+{
+	/* From 9.1 Abort of an Association
+	 * Upon receipt of the ABORT primitive from its upper
+	 * layer, the endpoint enters CLOSED state and
+	 * discard all outstanding data has been
+	 * acknowledged by its peer. The endpoint accepts no new data
+	 * from its upper layer, but retransmits data to the far end
+	 * if necessary to fill gaps.
+	 */
+	struct msghdr *msg = arg;
+	struct sctp_chunk *abort;
+	sctp_disposition_t retval;
+
+	retval = SCTP_DISPOSITION_CONSUME;
+
+	/* Generate ABORT chunk to send the peer.  */
+	abort = sctp_make_abort_user(asoc, NULL, msg);
+	if (!abort)
+		retval = SCTP_DISPOSITION_NOMEM;
+	else
+		sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
+
+	/* Even if we can't send the ABORT due to low memory delete the
+	 * TCB.  This is a departure from our typical NOMEM handling.
+	 */
+
+	/* Delete the established association. */
+	sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
+			SCTP_U32(SCTP_ERROR_USER_ABORT));
+
+	SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
+	SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+
+	return retval;
+}
+
+/* We tried an illegal operation on an association which is closed.  */
+sctp_disposition_t sctp_sf_error_closed(const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const sctp_subtype_t type,
+					void *arg,
+					sctp_cmd_seq_t *commands)
+{
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_ERROR, SCTP_ERROR(-EINVAL));
+	return SCTP_DISPOSITION_CONSUME;
+}
+
+/* We tried an illegal operation on an association which is shutting
+ * down.
+ */
+sctp_disposition_t sctp_sf_error_shutdown(const struct sctp_endpoint *ep,
+					  const struct sctp_association *asoc,
+					  const sctp_subtype_t type,
+					  void *arg,
+					  sctp_cmd_seq_t *commands)
+{
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_ERROR,
+			SCTP_ERROR(-ESHUTDOWN));
+	return SCTP_DISPOSITION_CONSUME;
+}
+
+/*
+ * sctp_cookie_wait_prm_shutdown
+ *
+ * Section: 4 Note: 2
+ * Verification Tag:
+ * Inputs
+ * (endpoint, asoc)
+ *
+ * The RFC does not explicitly address this issue, but is the route through the
+ * state table when someone issues a shutdown while in COOKIE_WAIT state.
+ *
+ * Outputs
+ * (timers)
+ */
+sctp_disposition_t sctp_sf_cookie_wait_prm_shutdown(
+	const struct sctp_endpoint *ep,
+	const struct sctp_association *asoc,
+	const sctp_subtype_t type,
+	void *arg,
+	sctp_cmd_seq_t *commands)
+{
+	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
+			SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT));
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
+			SCTP_STATE(SCTP_STATE_CLOSED));
+
+	SCTP_INC_STATS(SCTP_MIB_SHUTDOWNS);
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL());
+
+	return SCTP_DISPOSITION_DELETE_TCB;
+}
+
+/*
+ * sctp_cookie_echoed_prm_shutdown
+ *
+ * Section: 4 Note: 2
+ * Verification Tag:
+ * Inputs
+ * (endpoint, asoc)
+ *
+ * The RFC does not explcitly address this issue, but is the route through the
+ * state table when someone issues a shutdown while in COOKIE_ECHOED state.
+ *
+ * Outputs
+ * (timers)
+ */
+sctp_disposition_t sctp_sf_cookie_echoed_prm_shutdown(
+	const struct sctp_endpoint *ep,
+	const struct sctp_association *asoc,
+	const sctp_subtype_t type,
+	void *arg, sctp_cmd_seq_t *commands)
+{
+	/* There is a single T1 timer, so we should be able to use
+	 * common function with the COOKIE-WAIT state.
+	 */
+	return sctp_sf_cookie_wait_prm_shutdown(ep, asoc, type, arg, commands);
+}
+
+/*
+ * sctp_sf_cookie_wait_prm_abort
+ *
+ * Section: 4 Note: 2
+ * Verification Tag:
+ * Inputs
+ * (endpoint, asoc)
+ *
+ * The RFC does not explicitly address this issue, but is the route through the
+ * state table when someone issues an abort while in COOKIE_WAIT state.
+ *
+ * Outputs
+ * (timers)
+ */
+sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
+	const struct sctp_endpoint *ep,
+	const struct sctp_association *asoc,
+	const sctp_subtype_t type,
+	void *arg,
+	sctp_cmd_seq_t *commands)
+{
+	struct msghdr *msg = arg;
+	struct sctp_chunk *abort;
+	sctp_disposition_t retval;
+
+	/* Stop T1-init timer */
+	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
+			SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT));
+	retval = SCTP_DISPOSITION_CONSUME;
+
+	/* Generate ABORT chunk to send the peer */
+	abort = sctp_make_abort_user(asoc, NULL, msg);
+	if (!abort)
+		retval = SCTP_DISPOSITION_NOMEM;
+	else
+		sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
+			SCTP_STATE(SCTP_STATE_CLOSED));
+
+	SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
+
+	/* Even if we can't send the ABORT due to low memory delete the
+	 * TCB.  This is a departure from our typical NOMEM handling.
+	 */
+
+	/* Delete the established association. */
+	sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED,
+			SCTP_U32(SCTP_ERROR_USER_ABORT));
+
+	return retval;
+}
+
+/*
+ * sctp_sf_cookie_echoed_prm_abort
+ *
+ * Section: 4 Note: 3
+ * Verification Tag:
+ * Inputs
+ * (endpoint, asoc)
+ *
+ * The RFC does not explcitly address this issue, but is the route through the
+ * state table when someone issues an abort while in COOKIE_ECHOED state.
+ *
+ * Outputs
+ * (timers)
+ */
+sctp_disposition_t sctp_sf_cookie_echoed_prm_abort(
+	const struct sctp_endpoint *ep,
+	const struct sctp_association *asoc,
+	const sctp_subtype_t type,
+	void *arg,
+	sctp_cmd_seq_t *commands)
+{
+	/* There is a single T1 timer, so we should be able to use
+	 * common function with the COOKIE-WAIT state.
+	 */
+	return sctp_sf_cookie_wait_prm_abort(ep, asoc, type, arg, commands);
+}
+
+/*
+ * sctp_sf_shutdown_pending_prm_abort
+ *
+ * Inputs
+ * (endpoint, asoc)
+ *
+ * The RFC does not explicitly address this issue, but is the route through the
+ * state table when someone issues an abort while in SHUTDOWN-PENDING state.
+ *
+ * Outputs
+ * (timers)
+ */
+sctp_disposition_t sctp_sf_shutdown_pending_prm_abort(
+	const struct sctp_endpoint *ep,
+	const struct sctp_association *asoc,
+	const sctp_subtype_t type,
+	void *arg,
+	sctp_cmd_seq_t *commands)
+{
+	/* Stop the T5-shutdown guard timer.  */
+	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
+			SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
+
+	return sctp_sf_do_9_1_prm_abort(ep, asoc, type, arg, commands);
+}
+
+/*
+ * sctp_sf_shutdown_sent_prm_abort
+ *
+ * Inputs
+ * (endpoint, asoc)
+ *
+ * The RFC does not explicitly address this issue, but is the route through the
+ * state table when someone issues an abort while in SHUTDOWN-SENT state.
+ *
+ * Outputs
+ * (timers)
+ */
+sctp_disposition_t sctp_sf_shutdown_sent_prm_abort(
+	const struct sctp_endpoint *ep,
+	const struct sctp_association *asoc,
+	const sctp_subtype_t type,
+	void *arg,
+	sctp_cmd_seq_t *commands)
+{
+	/* Stop the T2-shutdown timer.  */
+	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
+			SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN));
+
+	/* Stop the T5-shutdown guard timer.  */
+	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
+			SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
+
+	return sctp_sf_do_9_1_prm_abort(ep, asoc, type, arg, commands);
+}
+
+/*
+ * sctp_sf_cookie_echoed_prm_abort
+ *
+ * Inputs
+ * (endpoint, asoc)
+ *
+ * The RFC does not explcitly address this issue, but is the route through the
+ * state table when someone issues an abort while in COOKIE_ECHOED state.
+ *
+ * Outputs
+ * (timers)
+ */
+sctp_disposition_t sctp_sf_shutdown_ack_sent_prm_abort(
+	const struct sctp_endpoint *ep,
+	const struct sctp_association *asoc,
+	const sctp_subtype_t type,
+	void *arg,
+	sctp_cmd_seq_t *commands)
+{
+	/* The same T2 timer, so we should be able to use
+	 * common function with the SHUTDOWN-SENT state.
+	 */
+	return sctp_sf_shutdown_sent_prm_abort(ep, asoc, type, arg, commands);
+}
+
+/*
+ * Process the REQUESTHEARTBEAT primitive
+ *
+ * 10.1 ULP-to-SCTP
+ * J) Request Heartbeat
+ *
+ * Format: REQUESTHEARTBEAT(association id, destination transport address)
+ *
+ * -> result
+ *
+ * Instructs the local endpoint to perform a HeartBeat on the specified
+ * destination transport address of the given association. The returned
+ * result should indicate whether the transmission of the HEARTBEAT
+ * chunk to the destination address is successful.
+ *
+ * Mandatory attributes:
+ *
+ * o association id - local handle to the SCTP association
+ *
+ * o destination transport address - the transport address of the
+ *   association on which a heartbeat should be issued.
+ */
+sctp_disposition_t sctp_sf_do_prm_requestheartbeat(
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const sctp_subtype_t type,
+					void *arg,
+					sctp_cmd_seq_t *commands)
+{
+	return sctp_sf_heartbeat(ep, asoc, type, (struct sctp_transport *)arg,
+				 commands);
+}
+
+/*
+ * ADDIP Section 4.1 ASCONF Chunk Procedures
+ * When an endpoint has an ASCONF signaled change to be sent to the
+ * remote endpoint it should do A1 to A9
+ */
+sctp_disposition_t sctp_sf_do_prm_asconf(const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const sctp_subtype_t type,
+					void *arg,
+					sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *chunk = arg;
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_SETUP_T4, SCTP_CHUNK(chunk));
+	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START,
+			SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO));
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(chunk));
+	return SCTP_DISPOSITION_CONSUME;
+}
+
+/*
+ * Ignore the primitive event
+ *
+ * The return value is the disposition of the primitive.
+ */
+sctp_disposition_t sctp_sf_ignore_primitive(
+	const struct sctp_endpoint *ep,
+	const struct sctp_association *asoc,
+	const sctp_subtype_t type,
+	void *arg,
+	sctp_cmd_seq_t *commands)
+{
+	SCTP_DEBUG_PRINTK("Primitive type %d is ignored.\n", type.primitive);
+	return SCTP_DISPOSITION_DISCARD;
+}
+
+/***************************************************************************
+ * These are the state functions for the OTHER events.
+ ***************************************************************************/
+
+/*
+ * Start the shutdown negotiation.
+ *
+ * From Section 9.2:
+ * Once all its outstanding data has been acknowledged, the endpoint
+ * shall send a SHUTDOWN chunk to its peer including in the Cumulative
+ * TSN Ack field the last sequential TSN it has received from the peer.
+ * It shall then start the T2-shutdown timer and enter the SHUTDOWN-SENT
+ * state. If the timer expires, the endpoint must re-send the SHUTDOWN
+ * with the updated last sequential TSN received from its peer.
+ *
+ * The return value is the disposition.
+ */
+sctp_disposition_t sctp_sf_do_9_2_start_shutdown(
+	const struct sctp_endpoint *ep,
+	const struct sctp_association *asoc,
+	const sctp_subtype_t type,
+	void *arg,
+	sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *reply;
+
+	/* Once all its outstanding data has been acknowledged, the
+	 * endpoint shall send a SHUTDOWN chunk to its peer including
+	 * in the Cumulative TSN Ack field the last sequential TSN it
+	 * has received from the peer.
+	 */
+	reply = sctp_make_shutdown(asoc, NULL);
+	if (!reply)
+		goto nomem;
+
+	/* Set the transport for the SHUTDOWN chunk and the timeout for the
+	 * T2-shutdown timer.
+	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_SETUP_T2, SCTP_CHUNK(reply));
+
+	/* It shall then start the T2-shutdown timer */
+	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START,
+			SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN));
+
+	if (asoc->autoclose)
+		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
+				SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE));
+
+	/* and enter the SHUTDOWN-SENT state.  */
+	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
+			SCTP_STATE(SCTP_STATE_SHUTDOWN_SENT));
+
+	/* sctp-implguide 2.10 Issues with Heartbeating and failover
+	 *
+	 * HEARTBEAT ... is discontinued after sending either SHUTDOWN
+         * or SHUTDOWN-ACK.
+	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_STOP, SCTP_NULL());
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply));
+
+	return SCTP_DISPOSITION_CONSUME;
+
+nomem:
+	return SCTP_DISPOSITION_NOMEM;
+}
+
+/*
+ * Generate a SHUTDOWN ACK now that everything is SACK'd.
+ *
+ * From Section 9.2:
+ *
+ * If it has no more outstanding DATA chunks, the SHUTDOWN receiver
+ * shall send a SHUTDOWN ACK and start a T2-shutdown timer of its own,
+ * entering the SHUTDOWN-ACK-SENT state. If the timer expires, the
+ * endpoint must re-send the SHUTDOWN ACK.
+ *
+ * The return value is the disposition.
+ */
+sctp_disposition_t sctp_sf_do_9_2_shutdown_ack(
+	const struct sctp_endpoint *ep,
+	const struct sctp_association *asoc,
+	const sctp_subtype_t type,
+	void *arg,
+	sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *chunk = (struct sctp_chunk *) arg;
+	struct sctp_chunk *reply;
+
+	/* There are 2 ways of getting here:
+	 *    1) called in response to a SHUTDOWN chunk
+	 *    2) called when SCTP_EVENT_NO_PENDING_TSN event is issued.
+	 *
+	 * For the case (2), the arg parameter is set to NULL.  We need
+	 * to check that we have a chunk before accessing it's fields.
+	 */
+	if (chunk) {
+		if (!sctp_vtag_verify(chunk, asoc))
+			return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+
+		/* Make sure that the SHUTDOWN chunk has a valid length. */
+		if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_shutdown_chunk_t)))
+			return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+							  commands);
+	}
+
+	/* If it has no more outstanding DATA chunks, the SHUTDOWN receiver
+	 * shall send a SHUTDOWN ACK ...
+	 */
+	reply = sctp_make_shutdown_ack(asoc, chunk);
+	if (!reply)
+		goto nomem;
+
+	/* Set the transport for the SHUTDOWN ACK chunk and the timeout for
+	 * the T2-shutdown timer.
+	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_SETUP_T2, SCTP_CHUNK(reply));
+
+	/* and start/restart a T2-shutdown timer of its own, */
+	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
+			SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN));
+
+	if (asoc->autoclose)
+		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
+				SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE));
+
+	/* Enter the SHUTDOWN-ACK-SENT state.  */
+	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
+			SCTP_STATE(SCTP_STATE_SHUTDOWN_ACK_SENT));
+
+	/* sctp-implguide 2.10 Issues with Heartbeating and failover
+	 *
+	 * HEARTBEAT ... is discontinued after sending either SHUTDOWN
+         * or SHUTDOWN-ACK.
+	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_STOP, SCTP_NULL());
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply));
+
+	return SCTP_DISPOSITION_CONSUME;
+
+nomem:
+	return SCTP_DISPOSITION_NOMEM;
+}
+
+/*
+ * Ignore the event defined as other
+ *
+ * The return value is the disposition of the event.
+ */
+sctp_disposition_t sctp_sf_ignore_other(const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const sctp_subtype_t type,
+					void *arg,
+					sctp_cmd_seq_t *commands)
+{
+	SCTP_DEBUG_PRINTK("The event other type %d is ignored\n", type.other);
+	return SCTP_DISPOSITION_DISCARD;
+}
+
+/************************************************************
+ * These are the state functions for handling timeout events.
+ ************************************************************/
+
+/*
+ * RTX Timeout
+ *
+ * Section: 6.3.3 Handle T3-rtx Expiration
+ *
+ * Whenever the retransmission timer T3-rtx expires for a destination
+ * address, do the following:
+ * [See below]
+ *
+ * The return value is the disposition of the chunk.
+ */
+sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const sctp_subtype_t type,
+					void *arg,
+					sctp_cmd_seq_t *commands)
+{
+	struct sctp_transport *transport = arg;
+
+	if (asoc->overall_error_count >= asoc->max_retrans) {
+		/* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */
+		sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
+				SCTP_U32(SCTP_ERROR_NO_ERROR));
+		SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
+		SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+		return SCTP_DISPOSITION_DELETE_TCB;
+	}
+
+	/* E1) For the destination address for which the timer
+	 * expires, adjust its ssthresh with rules defined in Section
+	 * 7.2.3 and set the cwnd <- MTU.
+	 */
+
+	/* E2) For the destination address for which the timer
+	 * expires, set RTO <- RTO * 2 ("back off the timer").  The
+	 * maximum value discussed in rule C7 above (RTO.max) may be
+	 * used to provide an upper bound to this doubling operation.
+	 */
+
+	/* E3) Determine how many of the earliest (i.e., lowest TSN)
+	 * outstanding DATA chunks for the address for which the
+	 * T3-rtx has expired will fit into a single packet, subject
+	 * to the MTU constraint for the path corresponding to the
+	 * destination transport address to which the retransmission
+	 * is being sent (this may be different from the address for
+	 * which the timer expires [see Section 6.4]).  Call this
+	 * value K. Bundle and retransmit those K DATA chunks in a
+	 * single packet to the destination endpoint.
+	 *
+	 * Note: Any DATA chunks that were sent to the address for
+	 * which the T3-rtx timer expired but did not fit in one MTU
+	 * (rule E3 above), should be marked for retransmission and
+	 * sent as soon as cwnd allows (normally when a SACK arrives).
+	 */
+
+	/* NB: Rules E4 and F1 are implicit in R1.  */
+	sctp_add_cmd_sf(commands, SCTP_CMD_RETRAN, SCTP_TRANSPORT(transport));
+
+	/* Do some failure management (Section 8.2). */
+	sctp_add_cmd_sf(commands, SCTP_CMD_STRIKE, SCTP_TRANSPORT(transport));
+
+	return SCTP_DISPOSITION_CONSUME;
+}
+
+/*
+ * Generate delayed SACK on timeout
+ *
+ * Section: 6.2  Acknowledgement on Reception of DATA Chunks
+ *
+ * The guidelines on delayed acknowledgement algorithm specified in
+ * Section 4.2 of [RFC2581] SHOULD be followed.  Specifically, an
+ * acknowledgement SHOULD be generated for at least every second packet
+ * (not every second DATA chunk) received, and SHOULD be generated
+ * within 200 ms of the arrival of any unacknowledged DATA chunk.  In
+ * some situations it may be beneficial for an SCTP transmitter to be
+ * more conservative than the algorithms detailed in this document
+ * allow. However, an SCTP transmitter MUST NOT be more aggressive than
+ * the following algorithms allow.
+ */
+sctp_disposition_t sctp_sf_do_6_2_sack(const struct sctp_endpoint *ep,
+				       const struct sctp_association *asoc,
+				       const sctp_subtype_t type,
+				       void *arg,
+				       sctp_cmd_seq_t *commands)
+{
+	sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_FORCE());
+	return SCTP_DISPOSITION_CONSUME;
+}
+
+/*
+ * sctp_sf_t1_timer_expire
+ *
+ * Section: 4 Note: 2
+ * Verification Tag:
+ * Inputs
+ * (endpoint, asoc)
+ *
+ *  RFC 2960 Section 4 Notes
+ *  2) If the T1-init timer expires, the endpoint MUST retransmit INIT
+ *     and re-start the T1-init timer without changing state.  This MUST
+ *     be repeated up to 'Max.Init.Retransmits' times.  After that, the
+ *     endpoint MUST abort the initialization process and report the
+ *     error to SCTP user.
+ *
+ *   3) If the T1-cookie timer expires, the endpoint MUST retransmit
+ *     COOKIE ECHO and re-start the T1-cookie timer without changing
+ *     state.  This MUST be repeated up to 'Max.Init.Retransmits' times.
+ *     After that, the endpoint MUST abort the initialization process and
+ *     report the error to SCTP user.
+ *
+ * Outputs
+ * (timers, events)
+ *
+ */
+sctp_disposition_t sctp_sf_t1_timer_expire(const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const sctp_subtype_t type,
+					   void *arg,
+					   sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *repl;
+	struct sctp_bind_addr *bp;
+	sctp_event_timeout_t timer = (sctp_event_timeout_t) arg;
+	int timeout;
+	int attempts;
+
+	timeout = asoc->timeouts[timer];
+	attempts = asoc->counters[SCTP_COUNTER_INIT_ERROR] + 1;
+	repl = NULL;
+
+	SCTP_DEBUG_PRINTK("Timer T1 expired.\n");
+
+	if (attempts < asoc->max_init_attempts) {
+		switch (timer) {
+		case SCTP_EVENT_TIMEOUT_T1_INIT:
+			bp = (struct sctp_bind_addr *) &asoc->base.bind_addr;
+			repl = sctp_make_init(asoc, bp, GFP_ATOMIC, 0);
+			break;
+
+		case SCTP_EVENT_TIMEOUT_T1_COOKIE:
+			repl = sctp_make_cookie_echo(asoc, NULL);
+			break;
+
+		default:
+			BUG();
+			break;
+		};
+
+		if (!repl)
+			goto nomem;
+
+		/* Issue a sideeffect to do the needed accounting. */
+		sctp_add_cmd_sf(commands, SCTP_CMD_INIT_RESTART,
+				SCTP_TO(timer));
+		sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
+	} else {
+		sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED,
+				SCTP_U32(SCTP_ERROR_NO_ERROR));
+		return SCTP_DISPOSITION_DELETE_TCB;
+	}
+
+	return SCTP_DISPOSITION_CONSUME;
+
+nomem:
+	return SCTP_DISPOSITION_NOMEM;
+}
+
+/* RFC2960 9.2 If the timer expires, the endpoint must re-send the SHUTDOWN
+ * with the updated last sequential TSN received from its peer.
+ *
+ * An endpoint should limit the number of retransmissions of the
+ * SHUTDOWN chunk to the protocol parameter 'Association.Max.Retrans'.
+ * If this threshold is exceeded the endpoint should destroy the TCB and
+ * MUST report the peer endpoint unreachable to the upper layer (and
+ * thus the association enters the CLOSED state).  The reception of any
+ * packet from its peer (i.e. as the peer sends all of its queued DATA
+ * chunks) should clear the endpoint's retransmission count and restart
+ * the T2-Shutdown timer,  giving its peer ample opportunity to transmit
+ * all of its queued DATA chunks that have not yet been sent.
+ */
+sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const sctp_subtype_t type,
+					   void *arg,
+					   sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *reply = NULL;
+
+	SCTP_DEBUG_PRINTK("Timer T2 expired.\n");
+	if (asoc->overall_error_count >= asoc->max_retrans) {
+		/* Note:  CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */
+		sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
+				SCTP_U32(SCTP_ERROR_NO_ERROR));
+		SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
+		SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+		return SCTP_DISPOSITION_DELETE_TCB;
+	}
+
+	switch (asoc->state) {
+	case SCTP_STATE_SHUTDOWN_SENT:
+		reply = sctp_make_shutdown(asoc, NULL);
+		break;
+
+	case SCTP_STATE_SHUTDOWN_ACK_SENT:
+		reply = sctp_make_shutdown_ack(asoc, NULL);
+		break;
+
+	default:
+		BUG();
+		break;
+	};
+
+	if (!reply)
+		goto nomem;
+
+	/* Do some failure management (Section 8.2). */
+	sctp_add_cmd_sf(commands, SCTP_CMD_STRIKE,
+			SCTP_TRANSPORT(asoc->shutdown_last_sent_to));
+
+	/* Set the transport for the SHUTDOWN/ACK chunk and the timeout for
+	 * the T2-shutdown timer.
+	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_SETUP_T2, SCTP_CHUNK(reply));
+
+	/* Restart the T2-shutdown timer.  */
+	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
+			SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN));
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply));
+	return SCTP_DISPOSITION_CONSUME;
+
+nomem:
+	return SCTP_DISPOSITION_NOMEM;
+}
+
+/*
+ * ADDIP Section 4.1 ASCONF CHunk Procedures
+ * If the T4 RTO timer expires the endpoint should do B1 to B5
+ */
+sctp_disposition_t sctp_sf_t4_timer_expire(
+	const struct sctp_endpoint *ep,
+	const struct sctp_association *asoc,
+	const sctp_subtype_t type,
+	void *arg,
+	sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *chunk = asoc->addip_last_asconf;
+	struct sctp_transport *transport = chunk->transport;
+
+	/* ADDIP 4.1 B1) Increment the error counters and perform path failure
+	 * detection on the appropriate destination address as defined in
+	 * RFC2960 [5] section 8.1 and 8.2.
+	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_STRIKE, SCTP_TRANSPORT(transport));
+
+	/* Reconfig T4 timer and transport. */
+	sctp_add_cmd_sf(commands, SCTP_CMD_SETUP_T4, SCTP_CHUNK(chunk));
+
+	/* ADDIP 4.1 B2) Increment the association error counters and perform
+	 * endpoint failure detection on the association as defined in
+	 * RFC2960 [5] section 8.1 and 8.2.
+	 * association error counter is incremented in SCTP_CMD_STRIKE.
+	 */
+	if (asoc->overall_error_count >= asoc->max_retrans) {
+		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
+				SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO));
+		sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
+				SCTP_U32(SCTP_ERROR_NO_ERROR));
+		SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
+		SCTP_INC_STATS(SCTP_MIB_CURRESTAB);
+		return SCTP_DISPOSITION_ABORT;
+	}
+
+	/* ADDIP 4.1 B3) Back-off the destination address RTO value to which
+	 * the ASCONF chunk was sent by doubling the RTO timer value.
+	 * This is done in SCTP_CMD_STRIKE.
+	 */
+
+	/* ADDIP 4.1 B4) Re-transmit the ASCONF Chunk last sent and if possible
+	 * choose an alternate destination address (please refer to RFC2960
+	 * [5] section 6.4.1). An endpoint MUST NOT add new parameters to this
+	 * chunk, it MUST be the same (including its serial number) as the last 
+	 * ASCONF sent.
+	 */
+	sctp_chunk_hold(asoc->addip_last_asconf);
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
+			SCTP_CHUNK(asoc->addip_last_asconf));
+
+	/* ADDIP 4.1 B5) Restart the T-4 RTO timer. Note that if a different
+	 * destination is selected, then the RTO used will be that of the new
+	 * destination address.
+	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
+			SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO));
+
+	return SCTP_DISPOSITION_CONSUME;
+}
+
+/* sctpimpguide-05 Section 2.12.2
+ * The sender of the SHUTDOWN MAY also start an overall guard timer
+ * 'T5-shutdown-guard' to bound the overall time for shutdown sequence.
+ * At the expiration of this timer the sender SHOULD abort the association
+ * by sending an ABORT chunk.
+ */
+sctp_disposition_t sctp_sf_t5_timer_expire(const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const sctp_subtype_t type,
+					   void *arg,
+					   sctp_cmd_seq_t *commands)
+{
+	struct sctp_chunk *reply = NULL;
+
+	SCTP_DEBUG_PRINTK("Timer T5 expired.\n");
+
+	reply = sctp_make_abort(asoc, NULL, 0);
+	if (!reply)
+		goto nomem;
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply));
+	sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
+			SCTP_U32(SCTP_ERROR_NO_ERROR));
+
+	return SCTP_DISPOSITION_DELETE_TCB;
+nomem:
+	return SCTP_DISPOSITION_NOMEM;
+}
+
+/* Handle expiration of AUTOCLOSE timer.  When the autoclose timer expires,
+ * the association is automatically closed by starting the shutdown process.
+ * The work that needs to be done is same as when SHUTDOWN is initiated by
+ * the user.  So this routine looks same as sctp_sf_do_9_2_prm_shutdown().
+ */
+sctp_disposition_t sctp_sf_autoclose_timer_expire(
+	const struct sctp_endpoint *ep,
+	const struct sctp_association *asoc,
+	const sctp_subtype_t type,
+	void *arg,
+	sctp_cmd_seq_t *commands)
+{
+	int disposition;
+
+	/* From 9.2 Shutdown of an Association
+	 * Upon receipt of the SHUTDOWN primitive from its upper
+	 * layer, the endpoint enters SHUTDOWN-PENDING state and
+	 * remains there until all outstanding data has been
+	 * acknowledged by its peer. The endpoint accepts no new data
+	 * from its upper layer, but retransmits data to the far end
+	 * if necessary to fill gaps.
+	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
+			SCTP_STATE(SCTP_STATE_SHUTDOWN_PENDING));
+
+	/* sctpimpguide-05 Section 2.12.2
+	 * The sender of the SHUTDOWN MAY also start an overall guard timer
+	 * 'T5-shutdown-guard' to bound the overall time for shutdown sequence.
+ 	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START,
+			SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
+	disposition = SCTP_DISPOSITION_CONSUME;
+	if (sctp_outq_is_empty(&asoc->outqueue)) {
+		disposition = sctp_sf_do_9_2_start_shutdown(ep, asoc, type,
+							    arg, commands);
+	}
+	return disposition;
+}
+
+/*****************************************************************************
+ * These are sa state functions which could apply to all types of events.
+ ****************************************************************************/
+
+/*
+ * This table entry is not implemented.
+ *
+ * Inputs
+ * (endpoint, asoc, chunk)
+ *
+ * The return value is the disposition of the chunk.
+ */
+sctp_disposition_t sctp_sf_not_impl(const struct sctp_endpoint *ep,
+				    const struct sctp_association *asoc,
+				    const sctp_subtype_t type,
+				    void *arg,
+				    sctp_cmd_seq_t *commands)
+{
+	return SCTP_DISPOSITION_NOT_IMPL;
+}
+
+/*
+ * This table entry represents a bug.
+ *
+ * Inputs
+ * (endpoint, asoc, chunk)
+ *
+ * The return value is the disposition of the chunk.
+ */
+sctp_disposition_t sctp_sf_bug(const struct sctp_endpoint *ep,
+			       const struct sctp_association *asoc,
+			       const sctp_subtype_t type,
+			       void *arg,
+			       sctp_cmd_seq_t *commands)
+{
+	return SCTP_DISPOSITION_BUG;
+}
+
+/*
+ * This table entry represents the firing of a timer in the wrong state.
+ * Since timer deletion cannot be guaranteed a timer 'may' end up firing
+ * when the association is in the wrong state.   This event should
+ * be ignored, so as to prevent any rearming of the timer.
+ *
+ * Inputs
+ * (endpoint, asoc, chunk)
+ *
+ * The return value is the disposition of the chunk.
+ */
+sctp_disposition_t sctp_sf_timer_ignore(const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const sctp_subtype_t type,
+					void *arg,
+					sctp_cmd_seq_t *commands)
+{
+	SCTP_DEBUG_PRINTK("Timer %d ignored.\n", type.chunk);
+	return SCTP_DISPOSITION_CONSUME;
+}
+
+/********************************************************************
+ * 2nd Level Abstractions
+ ********************************************************************/
+
+/* Pull the SACK chunk based on the SACK header. */
+static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk)
+{
+	struct sctp_sackhdr *sack;
+	unsigned int len;
+	__u16 num_blocks;
+	__u16 num_dup_tsns;
+
+	/* Protect ourselves from reading too far into
+	 * the skb from a bogus sender.
+	 */
+	sack = (struct sctp_sackhdr *) chunk->skb->data;
+
+	num_blocks = ntohs(sack->num_gap_ack_blocks);
+	num_dup_tsns = ntohs(sack->num_dup_tsns);
+	len = sizeof(struct sctp_sackhdr);
+	len += (num_blocks + num_dup_tsns) * sizeof(__u32);
+	if (len > chunk->skb->len)
+		return NULL;
+
+	skb_pull(chunk->skb, len);
+
+	return sack;
+}
+
+/* Create an ABORT packet to be sent as a response, with the specified
+ * error causes.
+ */
+static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep,
+				  const struct sctp_association *asoc,
+				  struct sctp_chunk *chunk,
+				  const void *payload,
+				  size_t paylen)
+{
+	struct sctp_packet *packet;
+	struct sctp_chunk *abort;
+
+	packet = sctp_ootb_pkt_new(asoc, chunk);
+
+	if (packet) {
+		/* Make an ABORT.
+		 * The T bit will be set if the asoc is NULL.
+		 */
+		abort = sctp_make_abort(asoc, chunk, paylen);
+		if (!abort) {
+			sctp_ootb_pkt_free(packet);
+			return NULL;
+		}
+		/* Add specified error causes, i.e., payload, to the
+		 * end of the chunk.
+		 */
+		sctp_addto_chunk(abort, paylen, payload);
+
+		/* Set the skb to the belonging sock for accounting.  */
+		abort->skb->sk = ep->base.sk;
+
+		sctp_packet_append_chunk(packet, abort);
+
+	}
+
+	return packet;
+}
+
+/* Allocate a packet for responding in the OOTB conditions.  */
+static struct sctp_packet *sctp_ootb_pkt_new(const struct sctp_association *asoc,
+					     const struct sctp_chunk *chunk)
+{
+	struct sctp_packet *packet;
+	struct sctp_transport *transport;
+	__u16 sport;
+	__u16 dport;
+	__u32 vtag;
+
+	/* Get the source and destination port from the inbound packet.  */
+	sport = ntohs(chunk->sctp_hdr->dest);
+	dport = ntohs(chunk->sctp_hdr->source);
+
+	/* The V-tag is going to be the same as the inbound packet if no
+	 * association exists, otherwise, use the peer's vtag.
+	 */
+	if (asoc) {
+		vtag = asoc->peer.i.init_tag;
+	} else {
+		/* Special case the INIT and stale COOKIE_ECHO as there is no
+		 * vtag yet.
+		 */
+		switch(chunk->chunk_hdr->type) {
+		case SCTP_CID_INIT:
+		{
+			sctp_init_chunk_t *init;
+
+			init = (sctp_init_chunk_t *)chunk->chunk_hdr;
+			vtag = ntohl(init->init_hdr.init_tag);
+			break;
+		}
+		default:	
+			vtag = ntohl(chunk->sctp_hdr->vtag);
+			break;
+		}
+	}
+
+	/* Make a transport for the bucket, Eliza... */
+	transport = sctp_transport_new(sctp_source(chunk), GFP_ATOMIC);
+	if (!transport)
+		goto nomem;
+
+	/* Cache a route for the transport with the chunk's destination as
+	 * the source address.
+	 */
+	sctp_transport_route(transport, (union sctp_addr *)&chunk->dest,
+			     sctp_sk(sctp_get_ctl_sock()));
+
+	packet = sctp_packet_init(&transport->packet, transport, sport, dport);
+	packet = sctp_packet_config(packet, vtag, 0);
+
+	return packet;
+
+nomem:
+	return NULL;
+}
+
+/* Free the packet allocated earlier for responding in the OOTB condition.  */
+void sctp_ootb_pkt_free(struct sctp_packet *packet)
+{
+	sctp_transport_free(packet->transport);
+}
+
+/* Send a stale cookie error when a invalid COOKIE ECHO chunk is found  */
+static void sctp_send_stale_cookie_err(const struct sctp_endpoint *ep,
+				       const struct sctp_association *asoc,
+				       const struct sctp_chunk *chunk,
+				       sctp_cmd_seq_t *commands,
+				       struct sctp_chunk *err_chunk)
+{
+	struct sctp_packet *packet;
+
+	if (err_chunk) {
+		packet = sctp_ootb_pkt_new(asoc, chunk);
+		if (packet) {
+			struct sctp_signed_cookie *cookie;
+
+			/* Override the OOTB vtag from the cookie. */
+			cookie = chunk->subh.cookie_hdr;
+			packet->vtag = cookie->c.peer_vtag;
+			
+			/* Set the skb to the belonging sock for accounting. */
+			err_chunk->skb->sk = ep->base.sk;
+			sctp_packet_append_chunk(packet, err_chunk);
+			sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT,
+					SCTP_PACKET(packet));
+			SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+		} else
+			sctp_chunk_free (err_chunk);
+	}
+}
+
+
+/* Process a data chunk */
+static int sctp_eat_data(const struct sctp_association *asoc,
+			 struct sctp_chunk *chunk,
+			 sctp_cmd_seq_t *commands)
+{
+	sctp_datahdr_t *data_hdr;
+	struct sctp_chunk *err;
+	size_t datalen;
+	sctp_verb_t deliver;
+	int tmp;
+	__u32 tsn;
+
+	data_hdr = chunk->subh.data_hdr = (sctp_datahdr_t *)chunk->skb->data;
+	skb_pull(chunk->skb, sizeof(sctp_datahdr_t));
+
+	tsn = ntohl(data_hdr->tsn);
+	SCTP_DEBUG_PRINTK("eat_data: TSN 0x%x.\n", tsn);
+
+	/* ASSERT:  Now skb->data is really the user data.  */
+
+	/* Process ECN based congestion.
+	 *
+	 * Since the chunk structure is reused for all chunks within
+	 * a packet, we use ecn_ce_done to track if we've already
+	 * done CE processing for this packet.
+	 *
+	 * We need to do ECN processing even if we plan to discard the
+	 * chunk later.
+	 */
+
+	if (!chunk->ecn_ce_done) {
+		struct sctp_af *af;
+		chunk->ecn_ce_done = 1;
+
+		af = sctp_get_af_specific(
+			ipver2af(chunk->skb->nh.iph->version));
+
+		if (af && af->is_ce(chunk->skb) && asoc->peer.ecn_capable) {
+			/* Do real work as sideffect. */
+			sctp_add_cmd_sf(commands, SCTP_CMD_ECN_CE,
+					SCTP_U32(tsn));
+		}
+	}
+
+	tmp = sctp_tsnmap_check(&asoc->peer.tsn_map, tsn);
+	if (tmp < 0) {
+		/* The TSN is too high--silently discard the chunk and
+		 * count on it getting retransmitted later.
+		 */
+		return SCTP_IERROR_HIGH_TSN;
+	} else if (tmp > 0) {
+		/* This is a duplicate.  Record it.  */
+		sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_DUP, SCTP_U32(tsn));
+		return SCTP_IERROR_DUP_TSN;
+	}
+
+	/* This is a new TSN.  */
+
+	/* Discard if there is no room in the receive window.
+	 * Actually, allow a little bit of overflow (up to a MTU).
+	 */
+	datalen = ntohs(chunk->chunk_hdr->length);
+	datalen -= sizeof(sctp_data_chunk_t);
+
+	deliver = SCTP_CMD_CHUNK_ULP;
+
+	/* Think about partial delivery. */
+	if ((datalen >= asoc->rwnd) && (!asoc->ulpq.pd_mode)) {
+
+		/* Even if we don't accept this chunk there is
+		 * memory pressure.
+		 */
+		sctp_add_cmd_sf(commands, SCTP_CMD_PART_DELIVER, SCTP_NULL());
+	}
+
+        /* Spill over rwnd a little bit.  Note: While allowed, this spill over
+	 * seems a bit troublesome in that frag_point varies based on
+	 * PMTU.  In cases, such as loopback, this might be a rather
+	 * large spill over.
+	 */
+	if (!asoc->rwnd || asoc->rwnd_over ||
+	    (datalen > asoc->rwnd + asoc->frag_point)) {
+
+		/* If this is the next TSN, consider reneging to make
+		 * room.   Note: Playing nice with a confused sender.  A
+		 * malicious sender can still eat up all our buffer
+		 * space and in the future we may want to detect and
+		 * do more drastic reneging.
+		 */
+		if (sctp_tsnmap_has_gap(&asoc->peer.tsn_map) &&
+		    (sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + 1) == tsn) {
+			SCTP_DEBUG_PRINTK("Reneging for tsn:%u\n", tsn);
+			deliver = SCTP_CMD_RENEGE;
+		} else {
+			SCTP_DEBUG_PRINTK("Discard tsn: %u len: %Zd, "
+					  "rwnd: %d\n", tsn, datalen,
+					  asoc->rwnd);
+			return SCTP_IERROR_IGNORE_TSN;
+		}
+	}
+
+	/*
+	 * Section 3.3.10.9 No User Data (9)
+	 *
+	 * Cause of error
+	 * ---------------
+	 * No User Data:  This error cause is returned to the originator of a
+	 * DATA chunk if a received DATA chunk has no user data.
+	 */
+	if (unlikely(0 == datalen)) {
+		err = sctp_make_abort_no_data(asoc, chunk, tsn);
+		if (err) {
+			sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
+					SCTP_CHUNK(err));
+		}
+		/* We are going to ABORT, so we might as well stop
+		 * processing the rest of the chunks in the packet.
+		 */
+		sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET,SCTP_NULL());
+		sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
+				SCTP_U32(SCTP_ERROR_NO_DATA));
+		SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
+		SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+		return SCTP_IERROR_NO_DATA;
+	}
+
+	/* If definately accepting the DATA chunk, record its TSN, otherwise
+	 * wait for renege processing.
+	 */
+	if (SCTP_CMD_CHUNK_ULP == deliver)
+		sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn));
+
+	/* Note: Some chunks may get overcounted (if we drop) or overcounted
+	 * if we renege and the chunk arrives again.
+	 */
+	if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
+		SCTP_INC_STATS(SCTP_MIB_INUNORDERCHUNKS);
+	else
+		SCTP_INC_STATS(SCTP_MIB_INORDERCHUNKS);
+
+	/* RFC 2960 6.5 Stream Identifier and Stream Sequence Number
+	 *
+	 * If an endpoint receive a DATA chunk with an invalid stream
+	 * identifier, it shall acknowledge the reception of the DATA chunk
+	 * following the normal procedure, immediately send an ERROR chunk
+	 * with cause set to "Invalid Stream Identifier" (See Section 3.3.10)
+	 * and discard the DATA chunk.
+	 */
+	if (ntohs(data_hdr->stream) >= asoc->c.sinit_max_instreams) {
+		err = sctp_make_op_error(asoc, chunk, SCTP_ERROR_INV_STRM,
+					 &data_hdr->stream,
+					 sizeof(data_hdr->stream));
+		if (err)
+			sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
+					SCTP_CHUNK(err));
+		return SCTP_IERROR_BAD_STREAM;
+	}
+
+	/* Send the data up to the user.  Note:  Schedule  the
+	 * SCTP_CMD_CHUNK_ULP cmd before the SCTP_CMD_GEN_SACK, as the SACK
+	 * chunk needs the updated rwnd.
+	 */
+	sctp_add_cmd_sf(commands, deliver, SCTP_CHUNK(chunk));
+
+	return SCTP_IERROR_NO_ERROR;
+}
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
new file mode 100644
index 00000000000..8967846f69e
--- /dev/null
+++ b/net/sctp/sm_statetable.c
@@ -0,0 +1,1004 @@
+/* SCTP kernel reference Implementation
+ * (C) Copyright IBM Corp. 2001, 2004
+ * Copyright (c) 1999-2000 Cisco, Inc.
+ * Copyright (c) 1999-2001 Motorola, Inc.
+ * Copyright (c) 2001 Intel Corp.
+ * Copyright (c) 2001 Nokia, Inc.
+ *
+ * This file is part of the SCTP kernel reference Implementation
+ *
+ * These are the state tables for the SCTP state machine.
+ *
+ * The SCTP reference implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * The SCTP reference implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *                 ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email address(es):
+ *    lksctp developers <lksctp-developers@lists.sourceforge.net>
+ *
+ * Or submit a bug report through the following website:
+ *    http://www.sf.net/projects/lksctp
+ *
+ * Written or modified by:
+ *    La Monte H.P. Yarroll <piggy@acm.org>
+ *    Karl Knutson          <karl@athena.chicago.il.us>
+ *    Jon Grimm             <jgrimm@us.ibm.com>
+ *    Hui Huang		    <hui.huang@nokia.com>
+ *    Daisy Chang	    <daisyc@us.ibm.com>
+ *    Ardelle Fan	    <ardelle.fan@intel.com>
+ *    Sridhar Samudrala	    <sri@us.ibm.com>
+ *
+ * Any bugs reported given to us we will try to fix... any fixes shared will
+ * be incorporated into the next SCTP release.
+ */
+
+#include <linux/skbuff.h>
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+
+static const sctp_sm_table_entry_t
+primitive_event_table[SCTP_NUM_PRIMITIVE_TYPES][SCTP_STATE_NUM_STATES];
+static const sctp_sm_table_entry_t
+other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_STATE_NUM_STATES];
+static const sctp_sm_table_entry_t
+timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES];
+
+static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(sctp_cid_t cid,
+							    sctp_state_t state);
+
+
+static const sctp_sm_table_entry_t bug = {
+	.fn = sctp_sf_bug,
+	.name = "sctp_sf_bug"
+};
+
+#define DO_LOOKUP(_max, _type, _table) \
+	if ((event_subtype._type > (_max))) { \
+		printk(KERN_WARNING \
+		       "sctp table %p possible attack:" \
+		       " event %d exceeds max %d\n", \
+		       _table, event_subtype._type, _max); \
+		return &bug; \
+	} \
+	return &_table[event_subtype._type][(int)state];
+
+const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
+						  sctp_state_t state,
+						  sctp_subtype_t event_subtype)
+{
+	switch (event_type) {
+	case SCTP_EVENT_T_CHUNK:
+		return sctp_chunk_event_lookup(event_subtype.chunk, state);
+		break;
+	case SCTP_EVENT_T_TIMEOUT:
+		DO_LOOKUP(SCTP_EVENT_TIMEOUT_MAX, timeout,
+			  timeout_event_table);
+		break;
+
+	case SCTP_EVENT_T_OTHER:
+		DO_LOOKUP(SCTP_EVENT_OTHER_MAX, other, other_event_table);
+		break;
+
+	case SCTP_EVENT_T_PRIMITIVE:
+		DO_LOOKUP(SCTP_EVENT_PRIMITIVE_MAX, primitive,
+			  primitive_event_table);
+		break;
+
+	default:
+		/* Yikes!  We got an illegal event type.  */
+		return &bug;
+	};
+}
+
+#define TYPE_SCTP_DATA { \
+	/* SCTP_STATE_EMPTY */ \
+	{.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
+	/* SCTP_STATE_CLOSED */ \
+	{.fn = sctp_sf_tabort_8_4_8, .name = "sctp_sf_tabort_8_4_8"}, \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_ESTABLISHED */ \
+	{.fn = sctp_sf_eat_data_6_2, .name = "sctp_sf_eat_data_6_2"}, \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	{.fn = sctp_sf_eat_data_6_2, .name = "sctp_sf_eat_data_6_2"}, \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	{.fn = sctp_sf_eat_data_fast_4_4, .name = "sctp_sf_eat_data_fast_4_4"}, \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+} /* TYPE_SCTP_DATA */
+
+#define TYPE_SCTP_INIT { \
+	/* SCTP_STATE_EMPTY */ \
+	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	/* SCTP_STATE_CLOSED */ \
+	{.fn = sctp_sf_do_5_1B_init, .name = "sctp_sf_do_5_1B_init"}, \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	{.fn = sctp_sf_do_5_2_1_siminit, .name = "sctp_sf_do_5_2_1_siminit"}, \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	{.fn = sctp_sf_do_5_2_1_siminit, .name = "sctp_sf_do_5_2_1_siminit"}, \
+	/* SCTP_STATE_ESTABLISHED */ \
+	{.fn = sctp_sf_do_5_2_2_dupinit, .name = "sctp_sf_do_5_2_2_dupinit"}, \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	{.fn = sctp_sf_do_5_2_2_dupinit, .name = "sctp_sf_do_5_2_2_dupinit"}, \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	{.fn = sctp_sf_do_5_2_2_dupinit, .name = "sctp_sf_do_5_2_2_dupinit"}, \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	{.fn = sctp_sf_do_5_2_2_dupinit, .name = "sctp_sf_do_5_2_2_dupinit"}, \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	{.fn = sctp_sf_do_9_2_reshutack, .name = "sctp_sf_do_9_2_reshutack"}, \
+} /* TYPE_SCTP_INIT */
+
+#define TYPE_SCTP_INIT_ACK { \
+	/* SCTP_STATE_EMPTY */ \
+	{.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
+	/* SCTP_STATE_CLOSED */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	{.fn = sctp_sf_do_5_1C_ack, .name = "sctp_sf_do_5_1C_ack"}, \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_ESTABLISHED */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+} /* TYPE_SCTP_INIT_ACK */
+
+#define TYPE_SCTP_SACK { \
+	/*  SCTP_STATE_EMPTY */ \
+	{.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
+	/* SCTP_STATE_CLOSED */ \
+	{.fn = sctp_sf_tabort_8_4_8, .name = "sctp_sf_tabort_8_4_8"}, \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	{.fn = sctp_sf_eat_sack_6_2, .name = "sctp_sf_eat_sack_6_2"}, \
+	/* SCTP_STATE_ESTABLISHED */ \
+	{.fn = sctp_sf_eat_sack_6_2, .name = "sctp_sf_eat_sack_6_2"}, \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	{.fn = sctp_sf_eat_sack_6_2, .name = "sctp_sf_eat_sack_6_2"}, \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	{.fn = sctp_sf_eat_sack_6_2, .name = "sctp_sf_eat_sack_6_2"}, \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+} /* TYPE_SCTP_SACK */
+
+#define TYPE_SCTP_HEARTBEAT { \
+	/*  SCTP_STATE_EMPTY */ \
+	{.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
+	/* SCTP_STATE_CLOSED */ \
+	{.fn = sctp_sf_tabort_8_4_8, .name = "sctp_sf_tabort_8_4_8"}, \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	{.fn = sctp_sf_beat_8_3, .name = "sctp_sf_beat_8_3"}, \
+	/* SCTP_STATE_ESTABLISHED */ \
+	{.fn = sctp_sf_beat_8_3, .name = "sctp_sf_beat_8_3"}, \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	{.fn = sctp_sf_beat_8_3, .name = "sctp_sf_beat_8_3"}, \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	{.fn = sctp_sf_beat_8_3, .name = "sctp_sf_beat_8_3"}, \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	{.fn = sctp_sf_beat_8_3, .name = "sctp_sf_beat_8_3"}, \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	/* This should not happen, but we are nice.  */ \
+	{.fn = sctp_sf_beat_8_3, .name = "sctp_sf_beat_8_3"}, \
+} /* TYPE_SCTP_HEARTBEAT */
+
+#define TYPE_SCTP_HEARTBEAT_ACK { \
+	/*  SCTP_STATE_EMPTY */ \
+	{.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
+	/* SCTP_STATE_CLOSED */ \
+	{.fn = sctp_sf_tabort_8_4_8, .name = "sctp_sf_tabort_8_4_8"}, \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	{.fn = sctp_sf_violation, .name = "sctp_sf_violation"}, \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_ESTABLISHED */ \
+	{.fn = sctp_sf_backbeat_8_3, .name = "sctp_sf_backbeat_8_3"}, \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	{.fn = sctp_sf_backbeat_8_3, .name = "sctp_sf_backbeat_8_3"}, \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	{.fn = sctp_sf_backbeat_8_3, .name = "sctp_sf_backbeat_8_3"}, \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	{.fn = sctp_sf_backbeat_8_3, .name = "sctp_sf_backbeat_8_3"}, \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+} /* TYPE_SCTP_HEARTBEAT_ACK */
+
+#define TYPE_SCTP_ABORT { \
+	/* SCTP_STATE_EMPTY */ \
+	{.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
+	/* SCTP_STATE_CLOSED */ \
+	{.fn = sctp_sf_pdiscard, .name = "sctp_sf_pdiscard"}, \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	{.fn = sctp_sf_cookie_wait_abort, .name = "sctp_sf_cookie_wait_abort"}, \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	{.fn = sctp_sf_cookie_echoed_abort, \
+	 .name = "sctp_sf_cookie_echoed_abort"}, \
+	/* SCTP_STATE_ESTABLISHED */ \
+	{.fn = sctp_sf_do_9_1_abort, .name = "sctp_sf_do_9_1_abort"}, \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	{.fn = sctp_sf_shutdown_pending_abort, \
+	.name = "sctp_sf_shutdown_pending_abort"}, \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	{.fn = sctp_sf_shutdown_sent_abort, \
+	.name = "sctp_sf_shutdown_sent_abort"}, \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	{.fn = sctp_sf_do_9_1_abort, .name = "sctp_sf_do_9_1_abort"}, \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	{.fn = sctp_sf_shutdown_ack_sent_abort, \
+	.name = "sctp_sf_shutdown_ack_sent_abort"}, \
+} /* TYPE_SCTP_ABORT */
+
+#define TYPE_SCTP_SHUTDOWN { \
+	/* SCTP_STATE_EMPTY */ \
+	{.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
+	/* SCTP_STATE_CLOSED */ \
+	{.fn = sctp_sf_tabort_8_4_8, .name = "sctp_sf_tabort_8_4_8"}, \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_ESTABLISHED */ \
+	{.fn = sctp_sf_do_9_2_shutdown, .name = "sctp_sf_do_9_2_shutdown"}, \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	{.fn = sctp_sf_do_9_2_shutdown_ack, \
+	 .name = "sctp_sf_do_9_2_shutdown_ack"}, \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+} /* TYPE_SCTP_SHUTDOWN */
+
+#define TYPE_SCTP_SHUTDOWN_ACK { \
+	/* SCTP_STATE_EMPTY */ \
+	{.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
+	/* SCTP_STATE_CLOSED */ \
+	{.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	{.fn = sctp_sf_do_8_5_1_E_sa, .name = "sctp_sf_do_8_5_1_E_sa"}, \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	{.fn = sctp_sf_do_8_5_1_E_sa, .name = "sctp_sf_do_8_5_1_E_sa"}, \
+	/* SCTP_STATE_ESTABLISHED */ \
+	{.fn = sctp_sf_violation, .name = "sctp_sf_violation"}, \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	{.fn = sctp_sf_violation, .name = "sctp_sf_violation"}, \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	{.fn = sctp_sf_do_9_2_final, .name = "sctp_sf_do_9_2_final"}, \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	{.fn = sctp_sf_violation, .name = "sctp_sf_violation"}, \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	{.fn = sctp_sf_do_9_2_final, .name = "sctp_sf_do_9_2_final"}, \
+} /* TYPE_SCTP_SHUTDOWN_ACK */
+
+#define TYPE_SCTP_ERROR { \
+	/* SCTP_STATE_EMPTY */ \
+	{.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
+	/* SCTP_STATE_CLOSED */ \
+	{.fn = sctp_sf_tabort_8_4_8, .name = "sctp_sf_tabort_8_4_8"}, \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	{.fn = sctp_sf_cookie_echoed_err, .name = "sctp_sf_cookie_echoed_err"}, \
+	/* SCTP_STATE_ESTABLISHED */ \
+	{.fn = sctp_sf_operr_notify, .name = "sctp_sf_operr_notify"}, \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	{.fn = sctp_sf_operr_notify, .name = "sctp_sf_operr_notify"}, \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	{.fn = sctp_sf_operr_notify, .name = "sctp_sf_operr_notify"}, \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+} /* TYPE_SCTP_ERROR */
+
+#define TYPE_SCTP_COOKIE_ECHO { \
+	/* SCTP_STATE_EMPTY */ \
+	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	/* SCTP_STATE_CLOSED */ \
+	{.fn = sctp_sf_do_5_1D_ce, .name = "sctp_sf_do_5_1D_ce"}, \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	{.fn = sctp_sf_do_5_2_4_dupcook, .name = "sctp_sf_do_5_2_4_dupcook"}, \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	{.fn = sctp_sf_do_5_2_4_dupcook, .name = "sctp_sf_do_5_2_4_dupcook"}, \
+	/* SCTP_STATE_ESTABLISHED */ \
+	{.fn = sctp_sf_do_5_2_4_dupcook, .name = "sctp_sf_do_5_2_4_dupcook"}, \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	{.fn = sctp_sf_do_5_2_4_dupcook, .name = "sctp_sf_do_5_2_4_dupcook"}, \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	{.fn = sctp_sf_do_5_2_4_dupcook, .name = "sctp_sf_do_5_2_4_dupcook"}, \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	{.fn = sctp_sf_do_5_2_4_dupcook, .name = "sctp_sf_do_5_2_4_dupcook"}, \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	{.fn = sctp_sf_do_5_2_4_dupcook, .name = "sctp_sf_do_5_2_4_dupcook"}, \
+} /* TYPE_SCTP_COOKIE_ECHO */
+
+#define TYPE_SCTP_COOKIE_ACK { \
+	/* SCTP_STATE_EMPTY */ \
+	{.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
+	/* SCTP_STATE_CLOSED */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	{.fn = sctp_sf_do_5_1E_ca, .name = "sctp_sf_do_5_1E_ca"}, \
+	/* SCTP_STATE_ESTABLISHED */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+} /* TYPE_SCTP_COOKIE_ACK */
+
+#define TYPE_SCTP_ECN_ECNE { \
+	/* SCTP_STATE_EMPTY */ \
+	{.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
+	/* SCTP_STATE_CLOSED */ \
+	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	{.fn = sctp_sf_do_ecne, .name = "sctp_sf_do_ecne"}, \
+	/* SCTP_STATE_ESTABLISHED */ \
+	{.fn = sctp_sf_do_ecne, .name = "sctp_sf_do_ecne"}, \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	{.fn = sctp_sf_do_ecne, .name = "sctp_sf_do_ecne"}, \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	{.fn = sctp_sf_do_ecne, .name = "sctp_sf_do_ecne"}, \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	{.fn = sctp_sf_do_ecne, .name = "sctp_sf_do_ecne"}, \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+} /* TYPE_SCTP_ECN_ECNE */
+
+#define TYPE_SCTP_ECN_CWR { \
+	/* SCTP_STATE_EMPTY */ \
+	{.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
+	/* SCTP_STATE_CLOSED */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_ESTABLISHED */ \
+	{.fn = sctp_sf_do_ecn_cwr, .name = "sctp_sf_do_ecn_cwr"}, \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	{.fn = sctp_sf_do_ecn_cwr, .name = "sctp_sf_do_ecn_cwr"}, \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	{.fn = sctp_sf_do_ecn_cwr, .name = "sctp_sf_do_ecn_cwr"}, \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+} /* TYPE_SCTP_ECN_CWR */
+
+#define TYPE_SCTP_SHUTDOWN_COMPLETE { \
+	/* SCTP_STATE_EMPTY */ \
+	{.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
+	/* SCTP_STATE_CLOSED */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_ESTABLISHED */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	{.fn = sctp_sf_do_4_C, .name = "sctp_sf_do_4_C"}, \
+} /* TYPE_SCTP_SHUTDOWN_COMPLETE */
+
+/* The primary index for this table is the chunk type.
+ * The secondary index for this table is the state.
+ *
+ * For base protocol (RFC 2960).
+ */
+static const sctp_sm_table_entry_t chunk_event_table[SCTP_NUM_BASE_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
+	TYPE_SCTP_DATA,
+	TYPE_SCTP_INIT,
+	TYPE_SCTP_INIT_ACK,
+	TYPE_SCTP_SACK,
+	TYPE_SCTP_HEARTBEAT,
+	TYPE_SCTP_HEARTBEAT_ACK,
+	TYPE_SCTP_ABORT,
+	TYPE_SCTP_SHUTDOWN,
+	TYPE_SCTP_SHUTDOWN_ACK,
+	TYPE_SCTP_ERROR,
+	TYPE_SCTP_COOKIE_ECHO,
+	TYPE_SCTP_COOKIE_ACK,
+	TYPE_SCTP_ECN_ECNE,
+	TYPE_SCTP_ECN_CWR,
+	TYPE_SCTP_SHUTDOWN_COMPLETE,
+}; /* state_fn_t chunk_event_table[][] */
+
+#define TYPE_SCTP_ASCONF { \
+	/* SCTP_STATE_EMPTY */ \
+	{.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
+	/* SCTP_STATE_CLOSED */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_ESTABLISHED */ \
+	{.fn = sctp_sf_do_asconf, .name = "sctp_sf_do_asconf"}, \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+} /* TYPE_SCTP_ASCONF */
+
+#define TYPE_SCTP_ASCONF_ACK { \
+	/* SCTP_STATE_EMPTY */ \
+	{.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
+	/* SCTP_STATE_CLOSED */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_ESTABLISHED */ \
+	{.fn = sctp_sf_do_asconf_ack, .name = "sctp_sf_do_asconf_ack"}, \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+} /* TYPE_SCTP_ASCONF_ACK */
+
+/* The primary index for this table is the chunk type.
+ * The secondary index for this table is the state.
+ */
+static const sctp_sm_table_entry_t addip_chunk_event_table[SCTP_NUM_ADDIP_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
+	TYPE_SCTP_ASCONF,
+	TYPE_SCTP_ASCONF_ACK,
+}; /*state_fn_t addip_chunk_event_table[][] */
+
+#define TYPE_SCTP_FWD_TSN { \
+	/* SCTP_STATE_EMPTY */ \
+	{.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
+	/* SCTP_STATE_CLOSED */ \
+	{.fn = sctp_sf_tabort_8_4_8, .name = "sctp_sf_tabort_8_4_8"}, \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_ESTABLISHED */ \
+	{.fn = sctp_sf_eat_fwd_tsn, .name = "sctp_sf_eat_fwd_tsn"}, \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	{.fn = sctp_sf_eat_fwd_tsn, .name = "sctp_sf_eat_fwd_tsn"}, \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	{.fn = sctp_sf_eat_fwd_tsn_fast, .name = "sctp_sf_eat_fwd_tsn_fast"}, \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
+} /* TYPE_SCTP_FWD_TSN */
+
+/* The primary index for this table is the chunk type.
+ * The secondary index for this table is the state.
+ */
+static const sctp_sm_table_entry_t prsctp_chunk_event_table[SCTP_NUM_PRSCTP_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
+	TYPE_SCTP_FWD_TSN,
+}; /*state_fn_t prsctp_chunk_event_table[][] */
+
+static const sctp_sm_table_entry_t
+chunk_event_table_unknown[SCTP_STATE_NUM_STATES] = {
+	/* SCTP_STATE_EMPTY */
+	{.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"},
+	/* SCTP_STATE_CLOSED */
+	{.fn = sctp_sf_tabort_8_4_8, .name = "sctp_sf_tabort_8_4_8"},
+	/* SCTP_STATE_COOKIE_WAIT */
+	{.fn = sctp_sf_unk_chunk, .name = "sctp_sf_unk_chunk"},
+	/* SCTP_STATE_COOKIE_ECHOED */
+	{.fn = sctp_sf_unk_chunk, .name = "sctp_sf_unk_chunk"},
+	/* SCTP_STATE_ESTABLISHED */
+	{.fn = sctp_sf_unk_chunk, .name = "sctp_sf_unk_chunk"},
+	/* SCTP_STATE_SHUTDOWN_PENDING */
+	{.fn = sctp_sf_unk_chunk, .name = "sctp_sf_unk_chunk"},
+	/* SCTP_STATE_SHUTDOWN_SENT */
+	{.fn = sctp_sf_unk_chunk, .name = "sctp_sf_unk_chunk"},
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */
+	{.fn = sctp_sf_unk_chunk, .name = "sctp_sf_unk_chunk"},
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */
+	{.fn = sctp_sf_unk_chunk, .name = "sctp_sf_unk_chunk"},
+};	/* chunk unknown */
+
+
+#define TYPE_SCTP_PRIMITIVE_ASSOCIATE  { \
+	/* SCTP_STATE_EMPTY */ \
+	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	/* SCTP_STATE_CLOSED */ \
+	{.fn = sctp_sf_do_prm_asoc, .name = "sctp_sf_do_prm_asoc"}, \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	{.fn = sctp_sf_not_impl, .name = "sctp_sf_not_impl"}, \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	{.fn = sctp_sf_not_impl, .name = "sctp_sf_not_impl"}, \
+	/* SCTP_STATE_ESTABLISHED */ \
+	{.fn = sctp_sf_not_impl, .name = "sctp_sf_not_impl"}, \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	{.fn = sctp_sf_not_impl, .name = "sctp_sf_not_impl"}, \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	{.fn = sctp_sf_not_impl, .name = "sctp_sf_not_impl"}, \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	{.fn = sctp_sf_not_impl, .name = "sctp_sf_not_impl"}, \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	{.fn = sctp_sf_not_impl, .name = "sctp_sf_not_impl"}, \
+} /* TYPE_SCTP_PRIMITIVE_ASSOCIATE */
+
+#define TYPE_SCTP_PRIMITIVE_SHUTDOWN  { \
+	/* SCTP_STATE_EMPTY */ \
+	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	/* SCTP_STATE_CLOSED */ \
+	{.fn = sctp_sf_error_closed, .name = "sctp_sf_error_closed"}, \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	{.fn = sctp_sf_cookie_wait_prm_shutdown, \
+	 .name = "sctp_sf_cookie_wait_prm_shutdown"}, \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	{.fn = sctp_sf_cookie_echoed_prm_shutdown, \
+	 .name = "sctp_sf_cookie_echoed_prm_shutdown"},\
+	/* SCTP_STATE_ESTABLISHED */ \
+	{.fn = sctp_sf_do_9_2_prm_shutdown, \
+	 .name = "sctp_sf_do_9_2_prm_shutdown"}, \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	{.fn = sctp_sf_ignore_primitive, .name = "sctp_sf_ignore_primitive"}, \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	{.fn = sctp_sf_ignore_primitive, .name = "sctp_sf_ignore_primitive"}, \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	{.fn = sctp_sf_ignore_primitive, .name = "sctp_sf_ignore_primitive"}, \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	{.fn = sctp_sf_ignore_primitive, .name = "sctp_sf_ignore_primitive"}, \
+} /* TYPE_SCTP_PRIMITIVE_SHUTDOWN */
+
+#define TYPE_SCTP_PRIMITIVE_ABORT  { \
+	/* SCTP_STATE_EMPTY */ \
+	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	/* SCTP_STATE_CLOSED */ \
+	{.fn = sctp_sf_error_closed, .name = "sctp_sf_error_closed"}, \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	{.fn = sctp_sf_cookie_wait_prm_abort, \
+	.name = "sctp_sf_cookie_wait_prm_abort"}, \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	{.fn = sctp_sf_cookie_echoed_prm_abort, \
+	.name = "sctp_sf_cookie_echoed_prm_abort"}, \
+	/* SCTP_STATE_ESTABLISHED */ \
+	{.fn = sctp_sf_do_9_1_prm_abort, \
+	.name = "sctp_sf_do_9_1_prm_abort"}, \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	{.fn = sctp_sf_shutdown_pending_prm_abort, \
+	.name = "sctp_sf_shutdown_pending_prm_abort"}, \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	{.fn = sctp_sf_shutdown_sent_prm_abort, \
+	.name = "sctp_sf_shutdown_sent_prm_abort"}, \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	{.fn = sctp_sf_do_9_1_prm_abort, \
+	.name = "sctp_sf_do_9_1_prm_abort"}, \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	{.fn = sctp_sf_shutdown_ack_sent_prm_abort, \
+	.name = "sctp_sf_shutdown_ack_sent_prm_abort"}, \
+} /* TYPE_SCTP_PRIMITIVE_ABORT */
+
+#define TYPE_SCTP_PRIMITIVE_SEND  { \
+	/* SCTP_STATE_EMPTY */ \
+	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	/* SCTP_STATE_CLOSED */ \
+	{.fn = sctp_sf_error_closed, .name = "sctp_sf_error_closed"}, \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	{.fn = sctp_sf_do_prm_send, .name = "sctp_sf_do_prm_send"}, \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	{.fn = sctp_sf_do_prm_send, .name = "sctp_sf_do_prm_send"}, \
+	/* SCTP_STATE_ESTABLISHED */ \
+	{.fn = sctp_sf_do_prm_send, .name = "sctp_sf_do_prm_send"}, \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	{.fn = sctp_sf_error_shutdown, .name = "sctp_sf_error_shutdown"}, \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	{.fn = sctp_sf_error_shutdown, .name = "sctp_sf_error_shutdown"}, \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	{.fn = sctp_sf_error_shutdown, .name = "sctp_sf_error_shutdown"}, \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	{.fn = sctp_sf_error_shutdown, .name = "sctp_sf_error_shutdown"}, \
+} /* TYPE_SCTP_PRIMITIVE_SEND */
+
+#define TYPE_SCTP_PRIMITIVE_REQUESTHEARTBEAT  { \
+	/* SCTP_STATE_EMPTY */ \
+	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	/* SCTP_STATE_CLOSED */ \
+	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	{.fn = sctp_sf_do_prm_requestheartbeat,		      \
+	 .name = "sctp_sf_do_prm_requestheartbeat"},          \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	{.fn = sctp_sf_do_prm_requestheartbeat,		      \
+	 .name = "sctp_sf_do_prm_requestheartbeat"},          \
+	/* SCTP_STATE_ESTABLISHED */ \
+	{.fn = sctp_sf_do_prm_requestheartbeat,		      \
+	 .name = "sctp_sf_do_prm_requestheartbeat"},          \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	{.fn = sctp_sf_do_prm_requestheartbeat,		      \
+	 .name = "sctp_sf_do_prm_requestheartbeat"},          \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	{.fn = sctp_sf_do_prm_requestheartbeat,		      \
+	 .name = "sctp_sf_do_prm_requestheartbeat"},          \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	{.fn = sctp_sf_do_prm_requestheartbeat,		      \
+	 .name = "sctp_sf_do_prm_requestheartbeat"},          \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	{.fn = sctp_sf_do_prm_requestheartbeat,		      \
+	 .name = "sctp_sf_do_prm_requestheartbeat"},          \
+} /* TYPE_SCTP_PRIMITIVE_REQUESTHEARTBEAT */
+
+#define TYPE_SCTP_PRIMITIVE_ASCONF { \
+	/* SCTP_STATE_EMPTY */ \
+	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	/* SCTP_STATE_CLOSED */ \
+	{.fn = sctp_sf_error_closed, .name = "sctp_sf_error_closed"}, \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	{.fn = sctp_sf_error_closed, .name = "sctp_sf_error_closed"}, \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	{.fn = sctp_sf_error_closed, .name = "sctp_sf_error_closed"}, \
+	/* SCTP_STATE_ESTABLISHED */ \
+	{.fn = sctp_sf_do_prm_asconf, .name = "sctp_sf_do_prm_asconf"}, \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	{.fn = sctp_sf_error_shutdown, .name = "sctp_sf_error_shutdown"}, \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	{.fn = sctp_sf_error_shutdown, .name = "sctp_sf_error_shutdown"}, \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	{.fn = sctp_sf_error_shutdown, .name = "sctp_sf_error_shutdown"}, \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	{.fn = sctp_sf_error_shutdown, .name = "sctp_sf_error_shutdown"}, \
+} /* TYPE_SCTP_PRIMITIVE_REQUESTHEARTBEAT */
+
+/* The primary index for this table is the primitive type.
+ * The secondary index for this table is the state.
+ */
+static const sctp_sm_table_entry_t primitive_event_table[SCTP_NUM_PRIMITIVE_TYPES][SCTP_STATE_NUM_STATES] = {
+	TYPE_SCTP_PRIMITIVE_ASSOCIATE,
+	TYPE_SCTP_PRIMITIVE_SHUTDOWN,
+	TYPE_SCTP_PRIMITIVE_ABORT,
+	TYPE_SCTP_PRIMITIVE_SEND,
+	TYPE_SCTP_PRIMITIVE_REQUESTHEARTBEAT,
+	TYPE_SCTP_PRIMITIVE_ASCONF,
+};
+
+#define TYPE_SCTP_OTHER_NO_PENDING_TSN  { \
+	/* SCTP_STATE_EMPTY */ \
+	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	/* SCTP_STATE_CLOSED */ \
+	{.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	{.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	{.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \
+	/* SCTP_STATE_ESTABLISHED */ \
+	{.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	{.fn = sctp_sf_do_9_2_start_shutdown, \
+	 .name = "sctp_do_9_2_start_shutdown"}, \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	{.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	{.fn = sctp_sf_do_9_2_shutdown_ack, \
+	 .name = "sctp_sf_do_9_2_shutdown_ack"}, \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	{.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \
+}
+
+#define TYPE_SCTP_OTHER_ICMP_PROTO_UNREACH  { \
+	/* SCTP_STATE_EMPTY */ \
+	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	/* SCTP_STATE_CLOSED */ \
+	{.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	{.fn = sctp_sf_cookie_wait_icmp_abort, \
+	 .name = "sctp_sf_cookie_wait_icmp_abort"}, \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	{.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \
+	/* SCTP_STATE_ESTABLISHED */ \
+	{.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	{.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	{.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	{.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	{.fn = sctp_sf_ignore_other, .name = "sctp_sf_ignore_other"}, \
+}
+
+static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_STATE_NUM_STATES] = {
+	TYPE_SCTP_OTHER_NO_PENDING_TSN,
+	TYPE_SCTP_OTHER_ICMP_PROTO_UNREACH,
+};
+
+#define TYPE_SCTP_EVENT_TIMEOUT_NONE { \
+	/* SCTP_STATE_EMPTY */ \
+	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	/* SCTP_STATE_CLOSED */ \
+	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	/* SCTP_STATE_ESTABLISHED */ \
+	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+}
+
+#define TYPE_SCTP_EVENT_TIMEOUT_T1_COOKIE { \
+	/* SCTP_STATE_EMPTY */ \
+	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	/* SCTP_STATE_CLOSED */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	{.fn = sctp_sf_t1_timer_expire, .name = "sctp_sf_t1_timer_expire"}, \
+	/* SCTP_STATE_ESTABLISHED */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+}
+
+#define TYPE_SCTP_EVENT_TIMEOUT_T1_INIT { \
+	/* SCTP_STATE_EMPTY */ \
+	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	/* SCTP_STATE_CLOSED */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	{.fn = sctp_sf_t1_timer_expire, .name = "sctp_sf_t1_timer_expire"}, \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_ESTABLISHED */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+}
+
+#define TYPE_SCTP_EVENT_TIMEOUT_T2_SHUTDOWN { \
+	/* SCTP_STATE_EMPTY */ \
+	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	/* SCTP_STATE_CLOSED */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_ESTABLISHED */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	{.fn = sctp_sf_t2_timer_expire, .name = "sctp_sf_t2_timer_expire"}, \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	{.fn = sctp_sf_t2_timer_expire, .name = "sctp_sf_t2_timer_expire"}, \
+}
+
+#define TYPE_SCTP_EVENT_TIMEOUT_T3_RTX { \
+	/* SCTP_STATE_EMPTY */ \
+	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	/* SCTP_STATE_CLOSED */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	{.fn = sctp_sf_do_6_3_3_rtx, .name = "sctp_sf_do_6_3_3_rtx"}, \
+	/* SCTP_STATE_ESTABLISHED */ \
+	{.fn = sctp_sf_do_6_3_3_rtx, .name = "sctp_sf_do_6_3_3_rtx"}, \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	{.fn = sctp_sf_do_6_3_3_rtx, .name = "sctp_sf_do_6_3_3_rtx"}, \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	{.fn = sctp_sf_do_6_3_3_rtx, .name = "sctp_sf_do_6_3_3_rtx"}, \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+}
+
+#define TYPE_SCTP_EVENT_TIMEOUT_T4_RTO { \
+	/* SCTP_STATE_EMPTY */ \
+	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	/* SCTP_STATE_CLOSED */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_ESTABLISHED */ \
+	{.fn = sctp_sf_t4_timer_expire, .name = "sctp_sf_t4_timer_expire"}, \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+}
+
+#define TYPE_SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD { \
+	/* SCTP_STATE_EMPTY */ \
+	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	/* SCTP_STATE_CLOSED */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_ESTABLISHED */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	{.fn = sctp_sf_t5_timer_expire, .name = "sctp_sf_t5_timer_expire"}, \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	{.fn = sctp_sf_t5_timer_expire, .name = "sctp_sf_t5_timer_expire"}, \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+}
+
+#define TYPE_SCTP_EVENT_TIMEOUT_HEARTBEAT { \
+	/* SCTP_STATE_EMPTY */ \
+	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	/* SCTP_STATE_CLOSED */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_ESTABLISHED */ \
+	{.fn = sctp_sf_sendbeat_8_3, .name = "sctp_sf_sendbeat_8_3"}, \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	{.fn = sctp_sf_sendbeat_8_3, .name = "sctp_sf_sendbeat_8_3"}, \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	{.fn = sctp_sf_sendbeat_8_3, .name = "sctp_sf_sendbeat_8_3"}, \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+}
+
+#define TYPE_SCTP_EVENT_TIMEOUT_SACK { \
+	/* SCTP_STATE_EMPTY */ \
+	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	/* SCTP_STATE_CLOSED */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_ESTABLISHED */ \
+	{.fn = sctp_sf_do_6_2_sack, .name = "sctp_sf_do_6_2_sack"}, \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	{.fn = sctp_sf_do_6_2_sack, .name = "sctp_sf_do_6_2_sack"}, \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	{.fn = sctp_sf_do_6_2_sack, .name = "sctp_sf_do_6_2_sack"}, \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+}
+
+#define TYPE_SCTP_EVENT_TIMEOUT_AUTOCLOSE { \
+	/* SCTP_STATE_EMPTY */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_CLOSED */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_ESTABLISHED */ \
+	{.fn = sctp_sf_autoclose_timer_expire, \
+	 .name = "sctp_sf_autoclose_timer_expire"}, \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	{.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
+}
+
+static const sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES] = {
+	TYPE_SCTP_EVENT_TIMEOUT_NONE,
+	TYPE_SCTP_EVENT_TIMEOUT_T1_COOKIE,
+	TYPE_SCTP_EVENT_TIMEOUT_T1_INIT,
+	TYPE_SCTP_EVENT_TIMEOUT_T2_SHUTDOWN,
+	TYPE_SCTP_EVENT_TIMEOUT_T3_RTX,
+	TYPE_SCTP_EVENT_TIMEOUT_T4_RTO,
+	TYPE_SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD,
+	TYPE_SCTP_EVENT_TIMEOUT_HEARTBEAT,
+	TYPE_SCTP_EVENT_TIMEOUT_SACK,
+	TYPE_SCTP_EVENT_TIMEOUT_AUTOCLOSE,
+};
+
+static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(sctp_cid_t cid, 
+							    sctp_state_t state)
+{
+	if (state > SCTP_STATE_MAX)
+		return &bug;
+
+	if (cid >= 0 && cid <= SCTP_CID_BASE_MAX)
+		return &chunk_event_table[cid][state];
+
+	if (sctp_prsctp_enable) {
+		if (cid == SCTP_CID_FWD_TSN)
+			return &prsctp_chunk_event_table[0][state];
+	}
+
+	if (sctp_addip_enable) {
+		if (cid == SCTP_CID_ASCONF)
+			return &addip_chunk_event_table[0][state];
+
+		if (cid == SCTP_CID_ASCONF_ACK)
+			return &addip_chunk_event_table[1][state];
+	}
+
+	return &chunk_event_table_unknown[state];
+}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
new file mode 100644
index 00000000000..e8c21018257
--- /dev/null
+++ b/net/sctp/socket.c
@@ -0,0 +1,4797 @@
+/* SCTP kernel reference Implementation
+ * (C) Copyright IBM Corp. 2001, 2004
+ * Copyright (c) 1999-2000 Cisco, Inc.
+ * Copyright (c) 1999-2001 Motorola, Inc.
+ * Copyright (c) 2001-2003 Intel Corp.
+ * Copyright (c) 2001-2002 Nokia, Inc.
+ * Copyright (c) 2001 La Monte H.P. Yarroll
+ *
+ * This file is part of the SCTP kernel reference Implementation
+ *
+ * These functions interface with the sockets layer to implement the
+ * SCTP Extensions for the Sockets API.
+ *
+ * Note that the descriptions from the specification are USER level
+ * functions--this file is the functions which populate the struct proto
+ * for SCTP which is the BOTTOM of the sockets interface.
+ *
+ * The SCTP reference implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * The SCTP reference implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *                 ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email address(es):
+ *    lksctp developers <lksctp-developers@lists.sourceforge.net>
+ *
+ * Or submit a bug report through the following website:
+ *    http://www.sf.net/projects/lksctp
+ *
+ * Written or modified by:
+ *    La Monte H.P. Yarroll <piggy@acm.org>
+ *    Narasimha Budihal     <narsi@refcode.org>
+ *    Karl Knutson          <karl@athena.chicago.il.us>
+ *    Jon Grimm             <jgrimm@us.ibm.com>
+ *    Xingang Guo           <xingang.guo@intel.com>
+ *    Daisy Chang           <daisyc@us.ibm.com>
+ *    Sridhar Samudrala     <samudrala@us.ibm.com>
+ *    Inaky Perez-Gonzalez  <inaky.gonzalez@intel.com>
+ *    Ardelle Fan	    <ardelle.fan@intel.com>
+ *    Ryan Layer	    <rmlayer@us.ibm.com>
+ *    Anup Pemmaiah         <pemmaiah@cc.usu.edu>
+ *    Kevin Gao             <kevin.gao@intel.com>
+ *
+ * Any bugs reported given to us we will try to fix... any fixes shared will
+ * be incorporated into the next SCTP release.
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/wait.h>
+#include <linux/time.h>
+#include <linux/ip.h>
+#include <linux/fcntl.h>
+#include <linux/poll.h>
+#include <linux/init.h>
+#include <linux/crypto.h>
+
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/route.h>
+#include <net/ipv6.h>
+#include <net/inet_common.h>
+
+#include <linux/socket.h> /* for sa_family_t */
+#include <net/sock.h>
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+
+/* WARNING:  Please do not remove the SCTP_STATIC attribute to
+ * any of the functions below as they are used to export functions
+ * used by a project regression testsuite.
+ */
+
+/* Forward declarations for internal helper functions. */
+static int sctp_writeable(struct sock *sk);
+static void sctp_wfree(struct sk_buff *skb);
+static int sctp_wait_for_sndbuf(struct sctp_association *, long *timeo_p,
+				size_t msg_len);
+static int sctp_wait_for_packet(struct sock * sk, int *err, long *timeo_p);
+static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p);
+static int sctp_wait_for_accept(struct sock *sk, long timeo);
+static void sctp_wait_for_close(struct sock *sk, long timeo);
+static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt,
+					union sctp_addr *addr, int len);
+static int sctp_bindx_add(struct sock *, struct sockaddr *, int);
+static int sctp_bindx_rem(struct sock *, struct sockaddr *, int);
+static int sctp_send_asconf_add_ip(struct sock *, struct sockaddr *, int);
+static int sctp_send_asconf_del_ip(struct sock *, struct sockaddr *, int);
+static int sctp_send_asconf(struct sctp_association *asoc,
+			    struct sctp_chunk *chunk);
+static int sctp_do_bind(struct sock *, union sctp_addr *, int);
+static int sctp_autobind(struct sock *sk);
+static void sctp_sock_migrate(struct sock *, struct sock *,
+			      struct sctp_association *, sctp_socket_type_t);
+static char *sctp_hmac_alg = SCTP_COOKIE_HMAC_ALG;
+
+extern kmem_cache_t *sctp_bucket_cachep;
+
+/* Get the sndbuf space available at the time on the association.  */
+static inline int sctp_wspace(struct sctp_association *asoc)
+{
+	struct sock *sk = asoc->base.sk;
+	int amt = 0;
+
+	amt = sk->sk_sndbuf - asoc->sndbuf_used;
+	if (amt < 0)
+		amt = 0;
+	return amt;
+}
+
+/* Increment the used sndbuf space count of the corresponding association by
+ * the size of the outgoing data chunk.
+ * Also, set the skb destructor for sndbuf accounting later.
+ *
+ * Since it is always 1-1 between chunk and skb, and also a new skb is always
+ * allocated for chunk bundling in sctp_packet_transmit(), we can use the
+ * destructor in the data chunk skb for the purpose of the sndbuf space
+ * tracking.
+ */
+static inline void sctp_set_owner_w(struct sctp_chunk *chunk)
+{
+	struct sctp_association *asoc = chunk->asoc;
+	struct sock *sk = asoc->base.sk;
+
+	/* The sndbuf space is tracked per association.  */
+	sctp_association_hold(asoc);
+
+	chunk->skb->destructor = sctp_wfree;
+	/* Save the chunk pointer in skb for sctp_wfree to use later.  */
+	*((struct sctp_chunk **)(chunk->skb->cb)) = chunk;
+
+	asoc->sndbuf_used += SCTP_DATA_SNDSIZE(chunk);
+	sk->sk_wmem_queued += SCTP_DATA_SNDSIZE(chunk);
+}
+
+/* Verify that this is a valid address. */
+static inline int sctp_verify_addr(struct sock *sk, union sctp_addr *addr,
+				   int len)
+{
+	struct sctp_af *af;
+
+	/* Verify basic sockaddr. */
+	af = sctp_sockaddr_af(sctp_sk(sk), addr, len);
+	if (!af)
+		return -EINVAL;
+
+	/* Is this a valid SCTP address?  */
+	if (!af->addr_valid(addr, sctp_sk(sk)))
+		return -EINVAL;
+
+	if (!sctp_sk(sk)->pf->send_verify(sctp_sk(sk), (addr)))
+		return -EINVAL;
+
+	return 0;
+}
+
+/* Look up the association by its id.  If this is not a UDP-style
+ * socket, the ID field is always ignored.
+ */
+struct sctp_association *sctp_id2assoc(struct sock *sk, sctp_assoc_t id)
+{
+	struct sctp_association *asoc = NULL;
+
+	/* If this is not a UDP-style socket, assoc id should be ignored. */
+	if (!sctp_style(sk, UDP)) {
+		/* Return NULL if the socket state is not ESTABLISHED. It
+		 * could be a TCP-style listening socket or a socket which
+		 * hasn't yet called connect() to establish an association.
+		 */
+		if (!sctp_sstate(sk, ESTABLISHED))
+			return NULL;
+
+		/* Get the first and the only association from the list. */
+		if (!list_empty(&sctp_sk(sk)->ep->asocs))
+			asoc = list_entry(sctp_sk(sk)->ep->asocs.next,
+					  struct sctp_association, asocs);
+		return asoc;
+	}
+
+	/* Otherwise this is a UDP-style socket. */
+	if (!id || (id == (sctp_assoc_t)-1))
+		return NULL;
+
+	spin_lock_bh(&sctp_assocs_id_lock);
+	asoc = (struct sctp_association *)idr_find(&sctp_assocs_id, (int)id);
+	spin_unlock_bh(&sctp_assocs_id_lock);
+
+	if (!asoc || (asoc->base.sk != sk) || asoc->base.dead)
+		return NULL;
+
+	return asoc;
+}
+
+/* Look up the transport from an address and an assoc id. If both address and
+ * id are specified, the associations matching the address and the id should be
+ * the same.
+ */
+static struct sctp_transport *sctp_addr_id2transport(struct sock *sk,
+					      struct sockaddr_storage *addr,
+					      sctp_assoc_t id)
+{
+	struct sctp_association *addr_asoc = NULL, *id_asoc = NULL;
+	struct sctp_transport *transport;
+	union sctp_addr *laddr = (union sctp_addr *)addr;
+
+	laddr->v4.sin_port = ntohs(laddr->v4.sin_port);
+	addr_asoc = sctp_endpoint_lookup_assoc(sctp_sk(sk)->ep,
+					       (union sctp_addr *)addr,
+					       &transport);
+	laddr->v4.sin_port = htons(laddr->v4.sin_port);
+
+	if (!addr_asoc)
+		return NULL;
+
+	id_asoc = sctp_id2assoc(sk, id);
+	if (id_asoc && (id_asoc != addr_asoc))
+		return NULL;
+
+	sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk),
+						(union sctp_addr *)addr);
+
+	return transport;
+}
+
+/* API 3.1.2 bind() - UDP Style Syntax
+ * The syntax of bind() is,
+ *
+ *   ret = bind(int sd, struct sockaddr *addr, int addrlen);
+ *
+ *   sd      - the socket descriptor returned by socket().
+ *   addr    - the address structure (struct sockaddr_in or struct
+ *             sockaddr_in6 [RFC 2553]),
+ *   addr_len - the size of the address structure.
+ */
+SCTP_STATIC int sctp_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+	int retval = 0;
+
+	sctp_lock_sock(sk);
+
+	SCTP_DEBUG_PRINTK("sctp_bind(sk: %p, uaddr: %p, addr_len: %d)\n",
+			  sk, uaddr, addr_len);
+
+	/* Disallow binding twice. */
+	if (!sctp_sk(sk)->ep->base.bind_addr.port)
+		retval = sctp_do_bind(sk, (union sctp_addr *)uaddr,
+				      addr_len);
+	else
+		retval = -EINVAL;
+
+	sctp_release_sock(sk);
+
+	return retval;
+}
+
+static long sctp_get_port_local(struct sock *, union sctp_addr *);
+
+/* Verify this is a valid sockaddr. */
+static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt,
+					union sctp_addr *addr, int len)
+{
+	struct sctp_af *af;
+
+	/* Check minimum size.  */
+	if (len < sizeof (struct sockaddr))
+		return NULL;
+
+	/* Does this PF support this AF? */
+	if (!opt->pf->af_supported(addr->sa.sa_family, opt))
+		return NULL;
+
+	/* If we get this far, af is valid. */
+	af = sctp_get_af_specific(addr->sa.sa_family);
+
+	if (len < af->sockaddr_len)
+		return NULL;
+
+	return af;
+}
+
+/* Bind a local address either to an endpoint or to an association.  */
+SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
+{
+	struct sctp_sock *sp = sctp_sk(sk);
+	struct sctp_endpoint *ep = sp->ep;
+	struct sctp_bind_addr *bp = &ep->base.bind_addr;
+	struct sctp_af *af;
+	unsigned short snum;
+	int ret = 0;
+
+	SCTP_DEBUG_PRINTK("sctp_do_bind(sk: %p, newaddr: %p, len: %d)\n",
+			  sk, addr, len);
+
+	/* Common sockaddr verification. */
+	af = sctp_sockaddr_af(sp, addr, len);
+	if (!af)
+		return -EINVAL;
+
+	/* PF specific bind() address verification. */
+	if (!sp->pf->bind_verify(sp, addr))
+		return -EADDRNOTAVAIL;
+
+	snum= ntohs(addr->v4.sin_port);
+
+	SCTP_DEBUG_PRINTK("sctp_do_bind: port: %d, new port: %d\n",
+			  bp->port, snum);
+
+	/* We must either be unbound, or bind to the same port.  */
+	if (bp->port && (snum != bp->port)) {
+		SCTP_DEBUG_PRINTK("sctp_do_bind:"
+				  " New port %d does not match existing port "
+				  "%d.\n", snum, bp->port);
+		return -EINVAL;
+	}
+
+	if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
+		return -EACCES;
+
+	/* Make sure we are allowed to bind here.
+	 * The function sctp_get_port_local() does duplicate address
+	 * detection.
+	 */
+	if ((ret = sctp_get_port_local(sk, addr))) {
+		if (ret == (long) sk) {
+			/* This endpoint has a conflicting address. */
+			return -EINVAL;
+		} else {
+			return -EADDRINUSE;
+		}
+	}
+
+	/* Refresh ephemeral port.  */
+	if (!bp->port)
+		bp->port = inet_sk(sk)->num;
+
+	/* Add the address to the bind address list.  */
+	sctp_local_bh_disable();
+	sctp_write_lock(&ep->base.addr_lock);
+
+	/* Use GFP_ATOMIC since BHs are disabled.  */
+	addr->v4.sin_port = ntohs(addr->v4.sin_port);
+	ret = sctp_add_bind_addr(bp, addr, GFP_ATOMIC);
+	addr->v4.sin_port = htons(addr->v4.sin_port);
+	sctp_write_unlock(&ep->base.addr_lock);
+	sctp_local_bh_enable();
+
+	/* Copy back into socket for getsockname() use. */
+	if (!ret) {
+		inet_sk(sk)->sport = htons(inet_sk(sk)->num);
+		af->to_sk_saddr(addr, sk);
+	}
+
+	return ret;
+}
+
+ /* ADDIP Section 4.1.1 Congestion Control of ASCONF Chunks
+ *
+ * R1) One and only one ASCONF Chunk MAY be in transit and unacknowledged 
+ * at any one time.  If a sender, after sending an ASCONF chunk, decides
+ * it needs to transfer another ASCONF Chunk, it MUST wait until the 
+ * ASCONF-ACK Chunk returns from the previous ASCONF Chunk before sending a
+ * subsequent ASCONF. Note this restriction binds each side, so at any 
+ * time two ASCONF may be in-transit on any given association (one sent 
+ * from each endpoint).
+ */
+static int sctp_send_asconf(struct sctp_association *asoc,
+			    struct sctp_chunk *chunk)
+{
+	int		retval = 0;
+
+	/* If there is an outstanding ASCONF chunk, queue it for later
+	 * transmission.
+	 */	
+	if (asoc->addip_last_asconf) {
+		__skb_queue_tail(&asoc->addip_chunks, (struct sk_buff *)chunk);
+		goto out;	
+	}
+
+	/* Hold the chunk until an ASCONF_ACK is received. */
+	sctp_chunk_hold(chunk);
+	retval = sctp_primitive_ASCONF(asoc, chunk);
+	if (retval)
+		sctp_chunk_free(chunk);
+	else
+		asoc->addip_last_asconf = chunk;
+
+out:
+	return retval;
+}
+
+/* Add a list of addresses as bind addresses to local endpoint or
+ * association.
+ *
+ * Basically run through each address specified in the addrs/addrcnt
+ * array/length pair, determine if it is IPv6 or IPv4 and call
+ * sctp_do_bind() on it.
+ *
+ * If any of them fails, then the operation will be reversed and the
+ * ones that were added will be removed.
+ *
+ * Only sctp_setsockopt_bindx() is supposed to call this function.
+ */
+int sctp_bindx_add(struct sock *sk, struct sockaddr *addrs, int addrcnt)
+{
+	int cnt;
+	int retval = 0;
+	void *addr_buf;
+	struct sockaddr *sa_addr;
+	struct sctp_af *af;
+
+	SCTP_DEBUG_PRINTK("sctp_bindx_add (sk: %p, addrs: %p, addrcnt: %d)\n",
+			  sk, addrs, addrcnt);
+
+	addr_buf = addrs;
+	for (cnt = 0; cnt < addrcnt; cnt++) {
+		/* The list may contain either IPv4 or IPv6 address;
+		 * determine the address length for walking thru the list.
+		 */
+		sa_addr = (struct sockaddr *)addr_buf;
+		af = sctp_get_af_specific(sa_addr->sa_family);
+		if (!af) {
+			retval = -EINVAL;
+			goto err_bindx_add;
+		}
+
+		retval = sctp_do_bind(sk, (union sctp_addr *)sa_addr, 
+				      af->sockaddr_len);
+
+		addr_buf += af->sockaddr_len;
+
+err_bindx_add:
+		if (retval < 0) {
+			/* Failed. Cleanup the ones that have been added */
+			if (cnt > 0)
+				sctp_bindx_rem(sk, addrs, cnt);
+			return retval;
+		}
+	}
+
+	return retval;
+}
+
+/* Send an ASCONF chunk with Add IP address parameters to all the peers of the
+ * associations that are part of the endpoint indicating that a list of local
+ * addresses are added to the endpoint.
+ *
+ * If any of the addresses is already in the bind address list of the 
+ * association, we do not send the chunk for that association.  But it will not
+ * affect other associations.
+ *
+ * Only sctp_setsockopt_bindx() is supposed to call this function.
+ */
+static int sctp_send_asconf_add_ip(struct sock		*sk, 
+				   struct sockaddr	*addrs,
+				   int 			addrcnt)
+{
+	struct sctp_sock		*sp;
+	struct sctp_endpoint		*ep;
+	struct sctp_association		*asoc;
+	struct sctp_bind_addr		*bp;
+	struct sctp_chunk		*chunk;
+	struct sctp_sockaddr_entry	*laddr;
+	union sctp_addr			*addr;
+	void				*addr_buf;
+	struct sctp_af			*af;
+	struct list_head		*pos;
+	struct list_head		*p;
+	int 				i;
+	int 				retval = 0;
+
+	if (!sctp_addip_enable)
+		return retval;
+
+	sp = sctp_sk(sk);
+	ep = sp->ep;
+
+	SCTP_DEBUG_PRINTK("%s: (sk: %p, addrs: %p, addrcnt: %d)\n",
+			  __FUNCTION__, sk, addrs, addrcnt);
+
+	list_for_each(pos, &ep->asocs) {
+		asoc = list_entry(pos, struct sctp_association, asocs);
+
+		if (!asoc->peer.asconf_capable)
+			continue;
+
+		if (asoc->peer.addip_disabled_mask & SCTP_PARAM_ADD_IP)
+			continue;
+
+		if (!sctp_state(asoc, ESTABLISHED))
+			continue;
+
+		/* Check if any address in the packed array of addresses is
+	         * in the bind address list of the association. If so, 
+		 * do not send the asconf chunk to its peer, but continue with 
+		 * other associations.
+		 */
+		addr_buf = addrs;
+		for (i = 0; i < addrcnt; i++) {
+			addr = (union sctp_addr *)addr_buf;
+			af = sctp_get_af_specific(addr->v4.sin_family);
+			if (!af) {
+				retval = -EINVAL;
+				goto out;
+			}
+
+			if (sctp_assoc_lookup_laddr(asoc, addr))
+				break;
+
+			addr_buf += af->sockaddr_len;
+		}
+		if (i < addrcnt)
+			continue;
+
+		/* Use the first address in bind addr list of association as
+		 * Address Parameter of ASCONF CHUNK.
+		 */
+		sctp_read_lock(&asoc->base.addr_lock);
+		bp = &asoc->base.bind_addr;
+		p = bp->address_list.next;
+		laddr = list_entry(p, struct sctp_sockaddr_entry, list);
+		sctp_read_unlock(&asoc->base.addr_lock);
+
+		chunk = sctp_make_asconf_update_ip(asoc, &laddr->a, addrs,
+						   addrcnt, SCTP_PARAM_ADD_IP);
+		if (!chunk) {
+			retval = -ENOMEM;
+			goto out;
+		}
+
+		retval = sctp_send_asconf(asoc, chunk);
+
+		/* FIXME: After sending the add address ASCONF chunk, we
+		 * cannot append the address to the association's binding
+		 * address list, because the new address may be used as the
+		 * source of a message sent to the peer before the ASCONF
+		 * chunk is received by the peer.  So we should wait until
+		 * ASCONF_ACK is received.
+		 */
+	}
+
+out:
+	return retval;
+}
+
+/* Remove a list of addresses from bind addresses list.  Do not remove the
+ * last address.
+ *
+ * Basically run through each address specified in the addrs/addrcnt
+ * array/length pair, determine if it is IPv6 or IPv4 and call
+ * sctp_del_bind() on it.
+ *
+ * If any of them fails, then the operation will be reversed and the
+ * ones that were removed will be added back.
+ *
+ * At least one address has to be left; if only one address is
+ * available, the operation will return -EBUSY.
+ *
+ * Only sctp_setsockopt_bindx() is supposed to call this function.
+ */
+int sctp_bindx_rem(struct sock *sk, struct sockaddr *addrs, int addrcnt)
+{
+	struct sctp_sock *sp = sctp_sk(sk);
+	struct sctp_endpoint *ep = sp->ep;
+	int cnt;
+	struct sctp_bind_addr *bp = &ep->base.bind_addr;
+	int retval = 0;
+	union sctp_addr saveaddr;
+	void *addr_buf;
+	struct sockaddr *sa_addr;
+	struct sctp_af *af;
+
+	SCTP_DEBUG_PRINTK("sctp_bindx_rem (sk: %p, addrs: %p, addrcnt: %d)\n",
+			  sk, addrs, addrcnt);
+
+	addr_buf = addrs;
+	for (cnt = 0; cnt < addrcnt; cnt++) {
+		/* If the bind address list is empty or if there is only one
+		 * bind address, there is nothing more to be removed (we need
+		 * at least one address here).
+		 */
+		if (list_empty(&bp->address_list) ||
+		    (sctp_list_single_entry(&bp->address_list))) {
+			retval = -EBUSY;
+			goto err_bindx_rem;
+		}
+
+		/* The list may contain either IPv4 or IPv6 address;
+		 * determine the address length to copy the address to
+		 * saveaddr. 
+		 */
+		sa_addr = (struct sockaddr *)addr_buf;
+		af = sctp_get_af_specific(sa_addr->sa_family);
+		if (!af) {
+			retval = -EINVAL;
+			goto err_bindx_rem;
+		}
+		memcpy(&saveaddr, sa_addr, af->sockaddr_len); 
+		saveaddr.v4.sin_port = ntohs(saveaddr.v4.sin_port);
+		if (saveaddr.v4.sin_port != bp->port) {
+			retval = -EINVAL;
+			goto err_bindx_rem;
+		}
+
+		/* FIXME - There is probably a need to check if sk->sk_saddr and
+		 * sk->sk_rcv_addr are currently set to one of the addresses to
+		 * be removed. This is something which needs to be looked into
+		 * when we are fixing the outstanding issues with multi-homing
+		 * socket routing and failover schemes. Refer to comments in
+		 * sctp_do_bind(). -daisy
+		 */
+		sctp_local_bh_disable();
+		sctp_write_lock(&ep->base.addr_lock);
+
+		retval = sctp_del_bind_addr(bp, &saveaddr);
+
+		sctp_write_unlock(&ep->base.addr_lock);
+		sctp_local_bh_enable();
+
+		addr_buf += af->sockaddr_len;
+err_bindx_rem:
+		if (retval < 0) {
+			/* Failed. Add the ones that has been removed back */
+			if (cnt > 0)
+				sctp_bindx_add(sk, addrs, cnt);
+			return retval;
+		}
+	}
+
+	return retval;
+}
+
+/* Send an ASCONF chunk with Delete IP address parameters to all the peers of
+ * the associations that are part of the endpoint indicating that a list of
+ * local addresses are removed from the endpoint.
+ *
+ * If any of the addresses is already in the bind address list of the 
+ * association, we do not send the chunk for that association.  But it will not
+ * affect other associations.
+ *
+ * Only sctp_setsockopt_bindx() is supposed to call this function.
+ */
+static int sctp_send_asconf_del_ip(struct sock		*sk,
+				   struct sockaddr	*addrs,
+				   int			addrcnt)
+{
+	struct sctp_sock	*sp;
+	struct sctp_endpoint	*ep;
+	struct sctp_association	*asoc;
+	struct sctp_bind_addr	*bp;
+	struct sctp_chunk	*chunk;
+	union sctp_addr		*laddr;
+	void			*addr_buf;
+	struct sctp_af		*af;
+	struct list_head	*pos;
+	int 			i;
+	int 			retval = 0;
+
+	if (!sctp_addip_enable)
+		return retval;
+
+	sp = sctp_sk(sk);
+	ep = sp->ep;
+
+	SCTP_DEBUG_PRINTK("%s: (sk: %p, addrs: %p, addrcnt: %d)\n",
+			  __FUNCTION__, sk, addrs, addrcnt);
+
+	list_for_each(pos, &ep->asocs) {
+		asoc = list_entry(pos, struct sctp_association, asocs);
+
+		if (!asoc->peer.asconf_capable)
+			continue;
+
+		if (asoc->peer.addip_disabled_mask & SCTP_PARAM_DEL_IP)
+			continue;
+
+		if (!sctp_state(asoc, ESTABLISHED))
+			continue;
+
+		/* Check if any address in the packed array of addresses is
+	         * not present in the bind address list of the association.
+		 * If so, do not send the asconf chunk to its peer, but
+		 * continue with other associations.
+		 */
+		addr_buf = addrs;
+		for (i = 0; i < addrcnt; i++) {
+			laddr = (union sctp_addr *)addr_buf;
+			af = sctp_get_af_specific(laddr->v4.sin_family);
+			if (!af) {
+				retval = -EINVAL;
+				goto out;
+			}
+
+			if (!sctp_assoc_lookup_laddr(asoc, laddr))
+				break;
+
+			addr_buf += af->sockaddr_len;
+		}
+		if (i < addrcnt)
+			continue;
+
+		/* Find one address in the association's bind address list
+		 * that is not in the packed array of addresses. This is to
+		 * make sure that we do not delete all the addresses in the
+		 * association.
+		 */
+		sctp_read_lock(&asoc->base.addr_lock);
+		bp = &asoc->base.bind_addr;
+		laddr = sctp_find_unmatch_addr(bp, (union sctp_addr *)addrs,
+					       addrcnt, sp);
+		sctp_read_unlock(&asoc->base.addr_lock);
+		if (!laddr)
+			continue;
+
+		chunk = sctp_make_asconf_update_ip(asoc, laddr, addrs, addrcnt,
+						   SCTP_PARAM_DEL_IP);
+		if (!chunk) {
+			retval = -ENOMEM;
+			goto out;
+		}
+
+		retval = sctp_send_asconf(asoc, chunk);
+
+		/* FIXME: After sending the delete address ASCONF chunk, we
+		 * cannot remove the addresses from the association's bind
+		 * address list, because there maybe some packet send to
+		 * the delete addresses, so we should wait until ASCONF_ACK
+		 * packet is received.
+		 */
+	}
+out:
+	return retval;
+}
+
+/* Helper for tunneling sctp_bindx() requests through sctp_setsockopt()
+ *
+ * API 8.1
+ * int sctp_bindx(int sd, struct sockaddr *addrs, int addrcnt,
+ *                int flags);
+ *
+ * If sd is an IPv4 socket, the addresses passed must be IPv4 addresses.
+ * If the sd is an IPv6 socket, the addresses passed can either be IPv4
+ * or IPv6 addresses.
+ *
+ * A single address may be specified as INADDR_ANY or IN6ADDR_ANY, see
+ * Section 3.1.2 for this usage.
+ *
+ * addrs is a pointer to an array of one or more socket addresses. Each
+ * address is contained in its appropriate structure (i.e. struct
+ * sockaddr_in or struct sockaddr_in6) the family of the address type
+ * must be used to distengish the address length (note that this
+ * representation is termed a "packed array" of addresses). The caller
+ * specifies the number of addresses in the array with addrcnt.
+ *
+ * On success, sctp_bindx() returns 0. On failure, sctp_bindx() returns
+ * -1, and sets errno to the appropriate error code.
+ *
+ * For SCTP, the port given in each socket address must be the same, or
+ * sctp_bindx() will fail, setting errno to EINVAL.
+ *
+ * The flags parameter is formed from the bitwise OR of zero or more of
+ * the following currently defined flags:
+ *
+ * SCTP_BINDX_ADD_ADDR
+ *
+ * SCTP_BINDX_REM_ADDR
+ *
+ * SCTP_BINDX_ADD_ADDR directs SCTP to add the given addresses to the
+ * association, and SCTP_BINDX_REM_ADDR directs SCTP to remove the given
+ * addresses from the association. The two flags are mutually exclusive;
+ * if both are given, sctp_bindx() will fail with EINVAL. A caller may
+ * not remove all addresses from an association; sctp_bindx() will
+ * reject such an attempt with EINVAL.
+ *
+ * An application can use sctp_bindx(SCTP_BINDX_ADD_ADDR) to associate
+ * additional addresses with an endpoint after calling bind().  Or use
+ * sctp_bindx(SCTP_BINDX_REM_ADDR) to remove some addresses a listening
+ * socket is associated with so that no new association accepted will be
+ * associated with those addresses. If the endpoint supports dynamic
+ * address a SCTP_BINDX_REM_ADDR or SCTP_BINDX_ADD_ADDR may cause a
+ * endpoint to send the appropriate message to the peer to change the
+ * peers address lists.
+ *
+ * Adding and removing addresses from a connected association is
+ * optional functionality. Implementations that do not support this
+ * functionality should return EOPNOTSUPP.
+ *
+ * Basically do nothing but copying the addresses from user to kernel
+ * land and invoking either sctp_bindx_add() or sctp_bindx_rem() on the sk.
+ * This is used for tunneling the sctp_bindx() request through sctp_setsockopt() * from userspace.
+ *
+ * We don't use copy_from_user() for optimization: we first do the
+ * sanity checks (buffer size -fast- and access check-healthy
+ * pointer); if all of those succeed, then we can alloc the memory
+ * (expensive operation) needed to copy the data to kernel. Then we do
+ * the copying without checking the user space area
+ * (__copy_from_user()).
+ *
+ * On exit there is no need to do sockfd_put(), sys_setsockopt() does
+ * it.
+ *
+ * sk        The sk of the socket
+ * addrs     The pointer to the addresses in user land
+ * addrssize Size of the addrs buffer
+ * op        Operation to perform (add or remove, see the flags of
+ *           sctp_bindx)
+ *
+ * Returns 0 if ok, <0 errno code on error.
+ */
+SCTP_STATIC int sctp_setsockopt_bindx(struct sock* sk,
+				      struct sockaddr __user *addrs,
+				      int addrs_size, int op)
+{
+	struct sockaddr *kaddrs;
+	int err;
+	int addrcnt = 0;
+	int walk_size = 0;
+	struct sockaddr *sa_addr;
+	void *addr_buf;
+	struct sctp_af *af;
+
+	SCTP_DEBUG_PRINTK("sctp_setsocktopt_bindx: sk %p addrs %p"
+			  " addrs_size %d opt %d\n", sk, addrs, addrs_size, op);
+
+	if (unlikely(addrs_size <= 0))
+		return -EINVAL;
+
+	/* Check the user passed a healthy pointer.  */
+	if (unlikely(!access_ok(VERIFY_READ, addrs, addrs_size)))
+		return -EFAULT;
+
+	/* Alloc space for the address array in kernel memory.  */
+	kaddrs = (struct sockaddr *)kmalloc(addrs_size, GFP_KERNEL);
+	if (unlikely(!kaddrs))
+		return -ENOMEM;
+
+	if (__copy_from_user(kaddrs, addrs, addrs_size)) {
+		kfree(kaddrs);
+		return -EFAULT;
+	}
+
+	/* Walk through the addrs buffer and count the number of addresses. */ 
+	addr_buf = kaddrs;
+	while (walk_size < addrs_size) {
+		sa_addr = (struct sockaddr *)addr_buf;
+		af = sctp_get_af_specific(sa_addr->sa_family);
+
+		/* If the address family is not supported or if this address
+		 * causes the address buffer to overflow return EINVAL.
+		 */ 
+		if (!af || (walk_size + af->sockaddr_len) > addrs_size) {
+			kfree(kaddrs);
+			return -EINVAL;
+		}
+		addrcnt++;
+		addr_buf += af->sockaddr_len;
+		walk_size += af->sockaddr_len;
+	}
+
+	/* Do the work. */
+	switch (op) {
+	case SCTP_BINDX_ADD_ADDR:
+		err = sctp_bindx_add(sk, kaddrs, addrcnt);
+		if (err)
+			goto out;
+		err = sctp_send_asconf_add_ip(sk, kaddrs, addrcnt);
+		break;
+
+	case SCTP_BINDX_REM_ADDR:
+		err = sctp_bindx_rem(sk, kaddrs, addrcnt);
+		if (err)
+			goto out;
+		err = sctp_send_asconf_del_ip(sk, kaddrs, addrcnt);
+		break;
+
+	default:
+		err = -EINVAL;
+		break;
+        };
+
+out:
+	kfree(kaddrs);
+
+	return err;
+}
+
+/* API 3.1.4 close() - UDP Style Syntax
+ * Applications use close() to perform graceful shutdown (as described in
+ * Section 10.1 of [SCTP]) on ALL the associations currently represented
+ * by a UDP-style socket.
+ *
+ * The syntax is
+ *
+ *   ret = close(int sd);
+ *
+ *   sd      - the socket descriptor of the associations to be closed.
+ *
+ * To gracefully shutdown a specific association represented by the
+ * UDP-style socket, an application should use the sendmsg() call,
+ * passing no user data, but including the appropriate flag in the
+ * ancillary data (see Section xxxx).
+ *
+ * If sd in the close() call is a branched-off socket representing only
+ * one association, the shutdown is performed on that association only.
+ *
+ * 4.1.6 close() - TCP Style Syntax
+ *
+ * Applications use close() to gracefully close down an association.
+ *
+ * The syntax is:
+ *
+ *    int close(int sd);
+ *
+ *      sd      - the socket descriptor of the association to be closed.
+ *
+ * After an application calls close() on a socket descriptor, no further
+ * socket operations will succeed on that descriptor.
+ *
+ * API 7.1.4 SO_LINGER
+ *
+ * An application using the TCP-style socket can use this option to
+ * perform the SCTP ABORT primitive.  The linger option structure is:
+ *
+ *  struct  linger {
+ *     int     l_onoff;                // option on/off
+ *     int     l_linger;               // linger time
+ * };
+ *
+ * To enable the option, set l_onoff to 1.  If the l_linger value is set
+ * to 0, calling close() is the same as the ABORT primitive.  If the
+ * value is set to a negative value, the setsockopt() call will return
+ * an error.  If the value is set to a positive value linger_time, the
+ * close() can be blocked for at most linger_time ms.  If the graceful
+ * shutdown phase does not finish during this period, close() will
+ * return but the graceful shutdown phase continues in the system.
+ */
+SCTP_STATIC void sctp_close(struct sock *sk, long timeout)
+{
+	struct sctp_endpoint *ep;
+	struct sctp_association *asoc;
+	struct list_head *pos, *temp;
+
+	SCTP_DEBUG_PRINTK("sctp_close(sk: 0x%p, timeout:%ld)\n", sk, timeout);
+
+	sctp_lock_sock(sk);
+	sk->sk_shutdown = SHUTDOWN_MASK;
+
+	ep = sctp_sk(sk)->ep;
+
+	/* Walk all associations on a socket, not on an endpoint.  */
+	list_for_each_safe(pos, temp, &ep->asocs) {
+		asoc = list_entry(pos, struct sctp_association, asocs);
+
+		if (sctp_style(sk, TCP)) {
+			/* A closed association can still be in the list if
+			 * it belongs to a TCP-style listening socket that is
+			 * not yet accepted. If so, free it. If not, send an
+			 * ABORT or SHUTDOWN based on the linger options.
+			 */
+			if (sctp_state(asoc, CLOSED)) {
+				sctp_unhash_established(asoc);
+				sctp_association_free(asoc);
+
+			} else if (sock_flag(sk, SOCK_LINGER) &&
+				   !sk->sk_lingertime)
+				sctp_primitive_ABORT(asoc, NULL);
+			else
+				sctp_primitive_SHUTDOWN(asoc, NULL);
+		} else
+			sctp_primitive_SHUTDOWN(asoc, NULL);
+	}
+
+	/* Clean up any skbs sitting on the receive queue.  */
+	sctp_queue_purge_ulpevents(&sk->sk_receive_queue);
+	sctp_queue_purge_ulpevents(&sctp_sk(sk)->pd_lobby);
+
+	/* On a TCP-style socket, block for at most linger_time if set. */
+	if (sctp_style(sk, TCP) && timeout)
+		sctp_wait_for_close(sk, timeout);
+
+	/* This will run the backlog queue.  */
+	sctp_release_sock(sk);
+
+	/* Supposedly, no process has access to the socket, but
+	 * the net layers still may.
+	 */
+	sctp_local_bh_disable();
+	sctp_bh_lock_sock(sk);
+
+	/* Hold the sock, since sk_common_release() will put sock_put()
+	 * and we have just a little more cleanup.
+	 */
+	sock_hold(sk);
+	sk_common_release(sk);
+
+	sctp_bh_unlock_sock(sk);
+	sctp_local_bh_enable();
+
+	sock_put(sk);
+
+	SCTP_DBG_OBJCNT_DEC(sock);
+}
+
+/* Handle EPIPE error. */
+static int sctp_error(struct sock *sk, int flags, int err)
+{
+	if (err == -EPIPE)
+		err = sock_error(sk) ? : -EPIPE;
+	if (err == -EPIPE && !(flags & MSG_NOSIGNAL))
+		send_sig(SIGPIPE, current, 0);
+	return err;
+}
+
+/* API 3.1.3 sendmsg() - UDP Style Syntax
+ *
+ * An application uses sendmsg() and recvmsg() calls to transmit data to
+ * and receive data from its peer.
+ *
+ *  ssize_t sendmsg(int socket, const struct msghdr *message,
+ *                  int flags);
+ *
+ *  socket  - the socket descriptor of the endpoint.
+ *  message - pointer to the msghdr structure which contains a single
+ *            user message and possibly some ancillary data.
+ *
+ *            See Section 5 for complete description of the data
+ *            structures.
+ *
+ *  flags   - flags sent or received with the user message, see Section
+ *            5 for complete description of the flags.
+ *
+ * Note:  This function could use a rewrite especially when explicit
+ * connect support comes in.
+ */
+/* BUG:  We do not implement the equivalent of sk_stream_wait_memory(). */
+
+SCTP_STATIC int sctp_msghdr_parse(const struct msghdr *, sctp_cmsgs_t *);
+
+SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
+			     struct msghdr *msg, size_t msg_len)
+{
+	struct sctp_sock *sp;
+	struct sctp_endpoint *ep;
+	struct sctp_association *new_asoc=NULL, *asoc=NULL;
+	struct sctp_transport *transport, *chunk_tp;
+	struct sctp_chunk *chunk;
+	union sctp_addr to;
+	struct sockaddr *msg_name = NULL;
+	struct sctp_sndrcvinfo default_sinfo = { 0 };
+	struct sctp_sndrcvinfo *sinfo;
+	struct sctp_initmsg *sinit;
+	sctp_assoc_t associd = 0;
+	sctp_cmsgs_t cmsgs = { NULL };
+	int err;
+	sctp_scope_t scope;
+	long timeo;
+	__u16 sinfo_flags = 0;
+	struct sctp_datamsg *datamsg;
+	struct list_head *pos;
+	int msg_flags = msg->msg_flags;
+
+	SCTP_DEBUG_PRINTK("sctp_sendmsg(sk: %p, msg: %p, msg_len: %zu)\n",
+			  sk, msg, msg_len);
+
+	err = 0;
+	sp = sctp_sk(sk);
+	ep = sp->ep;
+
+	SCTP_DEBUG_PRINTK("Using endpoint: %s.\n", ep->debug_name);
+
+	/* We cannot send a message over a TCP-style listening socket. */
+	if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING)) {
+		err = -EPIPE;
+		goto out_nounlock;
+	}
+
+	/* Parse out the SCTP CMSGs.  */
+	err = sctp_msghdr_parse(msg, &cmsgs);
+
+	if (err) {
+		SCTP_DEBUG_PRINTK("msghdr parse err = %x\n", err);
+		goto out_nounlock;
+	}
+
+	/* Fetch the destination address for this packet.  This
+	 * address only selects the association--it is not necessarily
+	 * the address we will send to.
+	 * For a peeled-off socket, msg_name is ignored.
+	 */
+	if (!sctp_style(sk, UDP_HIGH_BANDWIDTH) && msg->msg_name) {
+		int msg_namelen = msg->msg_namelen;
+
+		err = sctp_verify_addr(sk, (union sctp_addr *)msg->msg_name,
+				       msg_namelen);
+		if (err)
+			return err;
+
+		if (msg_namelen > sizeof(to))
+			msg_namelen = sizeof(to);
+		memcpy(&to, msg->msg_name, msg_namelen);
+		SCTP_DEBUG_PRINTK("Just memcpy'd. msg_name is "
+				  "0x%x:%u.\n",
+				  to.v4.sin_addr.s_addr, to.v4.sin_port);
+
+		to.v4.sin_port = ntohs(to.v4.sin_port);
+		msg_name = msg->msg_name;
+	}
+
+	sinfo = cmsgs.info;
+	sinit = cmsgs.init;
+
+	/* Did the user specify SNDRCVINFO?  */
+	if (sinfo) {
+		sinfo_flags = sinfo->sinfo_flags;
+		associd = sinfo->sinfo_assoc_id;
+	}
+
+	SCTP_DEBUG_PRINTK("msg_len: %zu, sinfo_flags: 0x%x\n",
+			  msg_len, sinfo_flags);
+
+	/* MSG_EOF or MSG_ABORT cannot be set on a TCP-style socket. */
+	if (sctp_style(sk, TCP) && (sinfo_flags & (MSG_EOF | MSG_ABORT))) {
+		err = -EINVAL;
+		goto out_nounlock;
+	}
+
+	/* If MSG_EOF is set, no data can be sent. Disallow sending zero
+	 * length messages when MSG_EOF|MSG_ABORT is not set.
+	 * If MSG_ABORT is set, the message length could be non zero with
+	 * the msg_iov set to the user abort reason.
+ 	 */
+	if (((sinfo_flags & MSG_EOF) && (msg_len > 0)) ||
+	    (!(sinfo_flags & (MSG_EOF|MSG_ABORT)) && (msg_len == 0))) {
+		err = -EINVAL;
+		goto out_nounlock;
+	}
+
+	/* If MSG_ADDR_OVER is set, there must be an address
+	 * specified in msg_name.
+	 */
+	if ((sinfo_flags & MSG_ADDR_OVER) && (!msg->msg_name)) {
+		err = -EINVAL;
+		goto out_nounlock;
+	}
+
+	transport = NULL;
+
+	SCTP_DEBUG_PRINTK("About to look up association.\n");
+
+	sctp_lock_sock(sk);
+
+	/* If a msg_name has been specified, assume this is to be used.  */
+	if (msg_name) {
+		/* Look for a matching association on the endpoint. */
+		asoc = sctp_endpoint_lookup_assoc(ep, &to, &transport);
+		if (!asoc) {
+			/* If we could not find a matching association on the
+			 * endpoint, make sure that it is not a TCP-style
+			 * socket that already has an association or there is
+			 * no peeled-off association on another socket.
+			 */
+			if ((sctp_style(sk, TCP) &&
+			     sctp_sstate(sk, ESTABLISHED)) ||
+			    sctp_endpoint_is_peeled_off(ep, &to)) {
+				err = -EADDRNOTAVAIL;
+				goto out_unlock;
+			}
+		}
+	} else {
+		asoc = sctp_id2assoc(sk, associd);
+		if (!asoc) {
+			err = -EPIPE;
+			goto out_unlock;
+		}
+	}
+
+	if (asoc) {
+		SCTP_DEBUG_PRINTK("Just looked up association: %p.\n", asoc);
+
+		/* We cannot send a message on a TCP-style SCTP_SS_ESTABLISHED
+		 * socket that has an association in CLOSED state. This can
+		 * happen when an accepted socket has an association that is
+		 * already CLOSED.
+		 */
+		if (sctp_state(asoc, CLOSED) && sctp_style(sk, TCP)) {
+			err = -EPIPE;
+			goto out_unlock;
+		}
+
+		if (sinfo_flags & MSG_EOF) {
+			SCTP_DEBUG_PRINTK("Shutting down association: %p\n",
+					  asoc);
+			sctp_primitive_SHUTDOWN(asoc, NULL);
+			err = 0;
+			goto out_unlock;
+		}
+		if (sinfo_flags & MSG_ABORT) {
+			SCTP_DEBUG_PRINTK("Aborting association: %p\n", asoc);
+			sctp_primitive_ABORT(asoc, msg);
+			err = 0;
+			goto out_unlock;
+		}
+	}
+
+	/* Do we need to create the association?  */
+	if (!asoc) {
+		SCTP_DEBUG_PRINTK("There is no association yet.\n");
+
+		if (sinfo_flags & (MSG_EOF | MSG_ABORT)) {
+			err = -EINVAL;
+			goto out_unlock;
+		}
+
+		/* Check for invalid stream against the stream counts,
+		 * either the default or the user specified stream counts.
+		 */
+		if (sinfo) {
+			if (!sinit || (sinit && !sinit->sinit_num_ostreams)) {
+				/* Check against the defaults. */
+				if (sinfo->sinfo_stream >=
+				    sp->initmsg.sinit_num_ostreams) {
+					err = -EINVAL;
+					goto out_unlock;
+				}
+			} else {
+				/* Check against the requested.  */
+				if (sinfo->sinfo_stream >=
+				    sinit->sinit_num_ostreams) {
+					err = -EINVAL;
+					goto out_unlock;
+				}
+			}
+		}
+
+		/*
+		 * API 3.1.2 bind() - UDP Style Syntax
+		 * If a bind() or sctp_bindx() is not called prior to a
+		 * sendmsg() call that initiates a new association, the
+		 * system picks an ephemeral port and will choose an address
+		 * set equivalent to binding with a wildcard address.
+		 */
+		if (!ep->base.bind_addr.port) {
+			if (sctp_autobind(sk)) {
+				err = -EAGAIN;
+				goto out_unlock;
+			}
+		}
+
+		scope = sctp_scope(&to);
+		new_asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL);
+		if (!new_asoc) {
+			err = -ENOMEM;
+			goto out_unlock;
+		}
+		asoc = new_asoc;
+
+		/* If the SCTP_INIT ancillary data is specified, set all
+		 * the association init values accordingly.
+		 */
+		if (sinit) {
+			if (sinit->sinit_num_ostreams) {
+				asoc->c.sinit_num_ostreams =
+					sinit->sinit_num_ostreams;
+			}
+			if (sinit->sinit_max_instreams) {
+				asoc->c.sinit_max_instreams =
+					sinit->sinit_max_instreams;
+			}
+			if (sinit->sinit_max_attempts) {
+				asoc->max_init_attempts
+					= sinit->sinit_max_attempts;
+			}
+			if (sinit->sinit_max_init_timeo) {
+				asoc->max_init_timeo = 
+				 msecs_to_jiffies(sinit->sinit_max_init_timeo);
+			}
+		}
+
+		/* Prime the peer's transport structures.  */
+		transport = sctp_assoc_add_peer(asoc, &to, GFP_KERNEL);
+		if (!transport) {
+			err = -ENOMEM;
+			goto out_free;
+		}
+		err = sctp_assoc_set_bind_addr_from_ep(asoc, GFP_KERNEL);
+		if (err < 0) {
+			err = -ENOMEM;
+			goto out_free;
+		}
+	}
+
+	/* ASSERT: we have a valid association at this point.  */
+	SCTP_DEBUG_PRINTK("We have a valid association.\n");
+
+	if (!sinfo) {
+		/* If the user didn't specify SNDRCVINFO, make up one with
+		 * some defaults.
+		 */
+		default_sinfo.sinfo_stream = asoc->default_stream;
+		default_sinfo.sinfo_flags = asoc->default_flags;
+		default_sinfo.sinfo_ppid = asoc->default_ppid;
+		default_sinfo.sinfo_context = asoc->default_context;
+		default_sinfo.sinfo_timetolive = asoc->default_timetolive;
+		default_sinfo.sinfo_assoc_id = sctp_assoc2id(asoc);
+		sinfo = &default_sinfo;
+	}
+
+	/* API 7.1.7, the sndbuf size per association bounds the
+	 * maximum size of data that can be sent in a single send call.
+	 */
+	if (msg_len > sk->sk_sndbuf) {
+		err = -EMSGSIZE;
+		goto out_free;
+	}
+
+	/* If fragmentation is disabled and the message length exceeds the
+	 * association fragmentation point, return EMSGSIZE.  The I-D
+	 * does not specify what this error is, but this looks like
+	 * a great fit.
+	 */
+	if (sctp_sk(sk)->disable_fragments && (msg_len > asoc->frag_point)) {
+		err = -EMSGSIZE;
+		goto out_free;
+	}
+
+	if (sinfo) {
+		/* Check for invalid stream. */
+		if (sinfo->sinfo_stream >= asoc->c.sinit_num_ostreams) {
+			err = -EINVAL;
+			goto out_free;
+		}
+	}
+
+	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+	if (!sctp_wspace(asoc)) {
+		err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
+		if (err)
+			goto out_free;
+	}
+
+	/* If an address is passed with the sendto/sendmsg call, it is used
+	 * to override the primary destination address in the TCP model, or
+	 * when MSG_ADDR_OVER flag is set in the UDP model.
+	 */
+	if ((sctp_style(sk, TCP) && msg_name) ||
+	    (sinfo_flags & MSG_ADDR_OVER)) {
+		chunk_tp = sctp_assoc_lookup_paddr(asoc, &to);
+		if (!chunk_tp) {
+			err = -EINVAL;
+			goto out_free;
+		}
+	} else
+		chunk_tp = NULL;
+
+	/* Auto-connect, if we aren't connected already. */
+	if (sctp_state(asoc, CLOSED)) {
+		err = sctp_primitive_ASSOCIATE(asoc, NULL);
+		if (err < 0)
+			goto out_free;
+		SCTP_DEBUG_PRINTK("We associated primitively.\n");
+	}
+
+	/* Break the message into multiple chunks of maximum size. */
+	datamsg = sctp_datamsg_from_user(asoc, sinfo, msg, msg_len);
+	if (!datamsg) {
+		err = -ENOMEM;
+		goto out_free;
+	}
+
+	/* Now send the (possibly) fragmented message. */
+	list_for_each(pos, &datamsg->chunks) {
+		chunk = list_entry(pos, struct sctp_chunk, frag_list);
+		sctp_datamsg_track(chunk);
+
+		/* Do accounting for the write space.  */
+		sctp_set_owner_w(chunk);
+
+		chunk->transport = chunk_tp;
+
+		/* Send it to the lower layers.  Note:  all chunks
+		 * must either fail or succeed.   The lower layer
+		 * works that way today.  Keep it that way or this
+		 * breaks.
+		 */
+		err = sctp_primitive_SEND(asoc, chunk);
+		/* Did the lower layer accept the chunk? */
+		if (err)
+			sctp_chunk_free(chunk);
+		SCTP_DEBUG_PRINTK("We sent primitively.\n");
+	}
+
+	sctp_datamsg_free(datamsg);
+	if (err)
+		goto out_free;
+	else
+		err = msg_len;
+
+	/* If we are already past ASSOCIATE, the lower
+	 * layers are responsible for association cleanup.
+	 */
+	goto out_unlock;
+
+out_free:
+	if (new_asoc)
+		sctp_association_free(asoc);
+out_unlock:
+	sctp_release_sock(sk);
+
+out_nounlock:
+	return sctp_error(sk, msg_flags, err);
+
+#if 0
+do_sock_err:
+	if (msg_len)
+		err = msg_len;
+	else
+		err = sock_error(sk);
+	goto out;
+
+do_interrupted:
+	if (msg_len)
+		err = msg_len;
+	goto out;
+#endif /* 0 */
+}
+
+/* This is an extended version of skb_pull() that removes the data from the
+ * start of a skb even when data is spread across the list of skb's in the
+ * frag_list. len specifies the total amount of data that needs to be removed.
+ * when 'len' bytes could be removed from the skb, it returns 0.
+ * If 'len' exceeds the total skb length,  it returns the no. of bytes that
+ * could not be removed.
+ */
+static int sctp_skb_pull(struct sk_buff *skb, int len)
+{
+	struct sk_buff *list;
+	int skb_len = skb_headlen(skb);
+	int rlen;
+
+	if (len <= skb_len) {
+		__skb_pull(skb, len);
+		return 0;
+	}
+	len -= skb_len;
+	__skb_pull(skb, skb_len);
+
+	for (list = skb_shinfo(skb)->frag_list; list; list = list->next) {
+		rlen = sctp_skb_pull(list, len);
+		skb->len -= (len-rlen);
+		skb->data_len -= (len-rlen);
+
+		if (!rlen)
+			return 0;
+
+		len = rlen;
+	}
+
+	return len;
+}
+
+/* API 3.1.3  recvmsg() - UDP Style Syntax
+ *
+ *  ssize_t recvmsg(int socket, struct msghdr *message,
+ *                    int flags);
+ *
+ *  socket  - the socket descriptor of the endpoint.
+ *  message - pointer to the msghdr structure which contains a single
+ *            user message and possibly some ancillary data.
+ *
+ *            See Section 5 for complete description of the data
+ *            structures.
+ *
+ *  flags   - flags sent or received with the user message, see Section
+ *            5 for complete description of the flags.
+ */
+static struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *);
+
+SCTP_STATIC int sctp_recvmsg(struct kiocb *iocb, struct sock *sk,
+			     struct msghdr *msg, size_t len, int noblock,
+			     int flags, int *addr_len)
+{
+	struct sctp_ulpevent *event = NULL;
+	struct sctp_sock *sp = sctp_sk(sk);
+	struct sk_buff *skb;
+	int copied;
+	int err = 0;
+	int skb_len;
+
+	SCTP_DEBUG_PRINTK("sctp_recvmsg(%s: %p, %s: %p, %s: %zd, %s: %d, %s: "
+			  "0x%x, %s: %p)\n", "sk", sk, "msghdr", msg,
+			  "len", len, "knoblauch", noblock,
+			  "flags", flags, "addr_len", addr_len);
+
+	sctp_lock_sock(sk);
+
+	if (sctp_style(sk, TCP) && !sctp_sstate(sk, ESTABLISHED)) {
+		err = -ENOTCONN;
+		goto out;
+	}
+
+	skb = sctp_skb_recv_datagram(sk, flags, noblock, &err);
+	if (!skb)
+		goto out;
+
+	/* Get the total length of the skb including any skb's in the
+	 * frag_list.
+	 */
+	skb_len = skb->len;
+
+	copied = skb_len;
+	if (copied > len)
+		copied = len;
+
+	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+
+	event = sctp_skb2event(skb);
+
+	if (err)
+		goto out_free;
+
+	sock_recv_timestamp(msg, sk, skb);
+	if (sctp_ulpevent_is_notification(event)) {
+		msg->msg_flags |= MSG_NOTIFICATION;
+		sp->pf->event_msgname(event, msg->msg_name, addr_len);
+	} else {
+		sp->pf->skb_msgname(skb, msg->msg_name, addr_len);
+	}
+
+	/* Check if we allow SCTP_SNDRCVINFO. */
+	if (sp->subscribe.sctp_data_io_event)
+		sctp_ulpevent_read_sndrcvinfo(event, msg);
+#if 0
+	/* FIXME: we should be calling IP/IPv6 layers.  */
+	if (sk->sk_protinfo.af_inet.cmsg_flags)
+		ip_cmsg_recv(msg, skb);
+#endif
+
+	err = copied;
+
+	/* If skb's length exceeds the user's buffer, update the skb and
+	 * push it back to the receive_queue so that the next call to
+	 * recvmsg() will return the remaining data. Don't set MSG_EOR.
+	 */
+	if (skb_len > copied) {
+		msg->msg_flags &= ~MSG_EOR;
+		if (flags & MSG_PEEK)
+			goto out_free;
+		sctp_skb_pull(skb, copied);
+		skb_queue_head(&sk->sk_receive_queue, skb);
+
+		/* When only partial message is copied to the user, increase
+		 * rwnd by that amount. If all the data in the skb is read,
+		 * rwnd is updated when the event is freed.
+		 */
+		sctp_assoc_rwnd_increase(event->asoc, copied);
+		goto out;
+	} else if ((event->msg_flags & MSG_NOTIFICATION) ||
+		   (event->msg_flags & MSG_EOR))
+		msg->msg_flags |= MSG_EOR;
+	else
+		msg->msg_flags &= ~MSG_EOR;
+
+out_free:
+	if (flags & MSG_PEEK) {
+		/* Release the skb reference acquired after peeking the skb in
+		 * sctp_skb_recv_datagram().
+		 */
+		kfree_skb(skb);
+	} else {
+		/* Free the event which includes releasing the reference to
+		 * the owner of the skb, freeing the skb and updating the
+		 * rwnd.
+		 */
+		sctp_ulpevent_free(event);
+	}
+out:
+	sctp_release_sock(sk);
+	return err;
+}
+
+/* 7.1.12 Enable/Disable message fragmentation (SCTP_DISABLE_FRAGMENTS)
+ *
+ * This option is a on/off flag.  If enabled no SCTP message
+ * fragmentation will be performed.  Instead if a message being sent
+ * exceeds the current PMTU size, the message will NOT be sent and
+ * instead a error will be indicated to the user.
+ */
+static int sctp_setsockopt_disable_fragments(struct sock *sk,
+					    char __user *optval, int optlen)
+{
+	int val;
+
+	if (optlen < sizeof(int))
+		return -EINVAL;
+
+	if (get_user(val, (int __user *)optval))
+		return -EFAULT;
+
+	sctp_sk(sk)->disable_fragments = (val == 0) ? 0 : 1;
+
+	return 0;
+}
+
+static int sctp_setsockopt_events(struct sock *sk, char __user *optval,
+					int optlen)
+{
+	if (optlen != sizeof(struct sctp_event_subscribe))
+		return -EINVAL;
+	if (copy_from_user(&sctp_sk(sk)->subscribe, optval, optlen))
+		return -EFAULT;
+	return 0;
+}
+
+/* 7.1.8 Automatic Close of associations (SCTP_AUTOCLOSE)
+ *
+ * This socket option is applicable to the UDP-style socket only.  When
+ * set it will cause associations that are idle for more than the
+ * specified number of seconds to automatically close.  An association
+ * being idle is defined an association that has NOT sent or received
+ * user data.  The special value of '0' indicates that no automatic
+ * close of any associations should be performed.  The option expects an
+ * integer defining the number of seconds of idle time before an
+ * association is closed.
+ */
+static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval,
+					    int optlen)
+{
+	struct sctp_sock *sp = sctp_sk(sk);
+
+	/* Applicable to UDP-style socket only */
+	if (sctp_style(sk, TCP))
+		return -EOPNOTSUPP;
+	if (optlen != sizeof(int))
+		return -EINVAL;
+	if (copy_from_user(&sp->autoclose, optval, optlen))
+		return -EFAULT;
+
+	sp->ep->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = sp->autoclose * HZ;
+	return 0;
+}
+
+/* 7.1.13 Peer Address Parameters (SCTP_PEER_ADDR_PARAMS)
+ *
+ * Applications can enable or disable heartbeats for any peer address of
+ * an association, modify an address's heartbeat interval, force a
+ * heartbeat to be sent immediately, and adjust the address's maximum
+ * number of retransmissions sent before an address is considered
+ * unreachable.  The following structure is used to access and modify an
+ * address's parameters:
+ *
+ *  struct sctp_paddrparams {
+ *      sctp_assoc_t            spp_assoc_id;
+ *      struct sockaddr_storage spp_address;
+ *      uint32_t                spp_hbinterval;
+ *      uint16_t                spp_pathmaxrxt;
+ *  };
+ *
+ *   spp_assoc_id    - (UDP style socket) This is filled in the application,
+ *                     and identifies the association for this query.
+ *   spp_address     - This specifies which address is of interest.
+ *   spp_hbinterval  - This contains the value of the heartbeat interval,
+ *                     in milliseconds.  A value of 0, when modifying the
+ *                     parameter, specifies that the heartbeat on this
+ *                     address should be disabled. A value of UINT32_MAX
+ *                     (4294967295), when modifying the parameter,
+ *                     specifies that a heartbeat should be sent
+ *                     immediately to the peer address, and the current
+ *                     interval should remain unchanged.
+ *   spp_pathmaxrxt  - This contains the maximum number of
+ *                     retransmissions before this address shall be
+ *                     considered unreachable.
+ */
+static int sctp_setsockopt_peer_addr_params(struct sock *sk,
+					    char __user *optval, int optlen)
+{
+	struct sctp_paddrparams params;
+	struct sctp_transport *trans;
+	int error;
+
+	if (optlen != sizeof(struct sctp_paddrparams))
+		return -EINVAL;
+	if (copy_from_user(&params, optval, optlen))
+		return -EFAULT;
+
+	/*
+	 * API 7. Socket Options (setting the default value for the endpoint)
+	 * All options that support specific settings on an association by
+	 * filling in either an association id variable or a sockaddr_storage
+	 * SHOULD also support setting of the same value for the entire endpoint
+	 * (i.e. future associations). To accomplish this the following logic is
+	 * used when setting one of these options:
+
+	 * c) If neither the sockaddr_storage or association identification is
+	 *    set i.e. the sockaddr_storage is set to all 0's (INADDR_ANY) and
+	 *    the association identification is 0, the settings are a default
+	 *    and to be applied to the endpoint (all future associations).
+	 */
+
+	/* update default value for endpoint (all future associations) */
+	if (!params.spp_assoc_id && 
+	    sctp_is_any(( union sctp_addr *)&params.spp_address)) {
+		/* Manual heartbeat on an endpoint is invalid. */
+		if (0xffffffff == params.spp_hbinterval)
+			return -EINVAL;
+		else if (params.spp_hbinterval)
+			sctp_sk(sk)->paddrparam.spp_hbinterval =
+						params.spp_hbinterval;
+		if (params.spp_pathmaxrxt)
+			sctp_sk(sk)->paddrparam.spp_pathmaxrxt =
+						params.spp_pathmaxrxt;
+		return 0;
+	}
+
+	trans = sctp_addr_id2transport(sk, &params.spp_address,
+				       params.spp_assoc_id);
+	if (!trans)
+		return -EINVAL;
+
+	/* Applications can enable or disable heartbeats for any peer address
+	 * of an association, modify an address's heartbeat interval, force a
+	 * heartbeat to be sent immediately, and adjust the address's maximum
+	 * number of retransmissions sent before an address is considered
+	 * unreachable.
+	 *
+	 * The value of the heartbeat interval, in milliseconds. A value of
+	 * UINT32_MAX (4294967295), when modifying the parameter, specifies
+	 * that a heartbeat should be sent immediately to the peer address,
+	 * and the current interval should remain unchanged.
+	 */
+	if (0xffffffff == params.spp_hbinterval) {
+		error = sctp_primitive_REQUESTHEARTBEAT (trans->asoc, trans);
+		if (error)
+			return error;
+	} else {
+	/* The value of the heartbeat interval, in milliseconds. A value of 0,
+	 * when modifying the parameter, specifies that the heartbeat on this
+	 * address should be disabled.
+	 */
+		if (params.spp_hbinterval) {
+			trans->hb_allowed = 1;
+			trans->hb_interval = 
+				msecs_to_jiffies(params.spp_hbinterval);
+		} else
+			trans->hb_allowed = 0;
+	}
+
+	/* spp_pathmaxrxt contains the maximum number of retransmissions
+	 * before this address shall be considered unreachable.
+	 */
+	if (params.spp_pathmaxrxt)
+		trans->max_retrans = params.spp_pathmaxrxt;
+
+	return 0;
+}
+
+/* 7.1.3 Initialization Parameters (SCTP_INITMSG)
+ *
+ * Applications can specify protocol parameters for the default association
+ * initialization.  The option name argument to setsockopt() and getsockopt()
+ * is SCTP_INITMSG.
+ *
+ * Setting initialization parameters is effective only on an unconnected
+ * socket (for UDP-style sockets only future associations are effected
+ * by the change).  With TCP-style sockets, this option is inherited by
+ * sockets derived from a listener socket.
+ */
+static int sctp_setsockopt_initmsg(struct sock *sk, char __user *optval, int optlen)
+{
+	struct sctp_initmsg sinit;
+	struct sctp_sock *sp = sctp_sk(sk);
+
+	if (optlen != sizeof(struct sctp_initmsg))
+		return -EINVAL;
+	if (copy_from_user(&sinit, optval, optlen))
+		return -EFAULT;
+
+	if (sinit.sinit_num_ostreams)
+		sp->initmsg.sinit_num_ostreams = sinit.sinit_num_ostreams;	
+	if (sinit.sinit_max_instreams)
+		sp->initmsg.sinit_max_instreams = sinit.sinit_max_instreams;	
+	if (sinit.sinit_max_attempts)
+		sp->initmsg.sinit_max_attempts = sinit.sinit_max_attempts;	
+	if (sinit.sinit_max_init_timeo)
+		sp->initmsg.sinit_max_init_timeo = sinit.sinit_max_init_timeo;	
+
+	return 0;
+}
+
+/*
+ * 7.1.14 Set default send parameters (SCTP_DEFAULT_SEND_PARAM)
+ *
+ *   Applications that wish to use the sendto() system call may wish to
+ *   specify a default set of parameters that would normally be supplied
+ *   through the inclusion of ancillary data.  This socket option allows
+ *   such an application to set the default sctp_sndrcvinfo structure.
+ *   The application that wishes to use this socket option simply passes
+ *   in to this call the sctp_sndrcvinfo structure defined in Section
+ *   5.2.2) The input parameters accepted by this call include
+ *   sinfo_stream, sinfo_flags, sinfo_ppid, sinfo_context,
+ *   sinfo_timetolive.  The user must provide the sinfo_assoc_id field in
+ *   to this call if the caller is using the UDP model.
+ */
+static int sctp_setsockopt_default_send_param(struct sock *sk,
+						char __user *optval, int optlen)
+{
+	struct sctp_sndrcvinfo info;
+	struct sctp_association *asoc;
+	struct sctp_sock *sp = sctp_sk(sk);
+
+	if (optlen != sizeof(struct sctp_sndrcvinfo))
+		return -EINVAL;
+	if (copy_from_user(&info, optval, optlen))
+		return -EFAULT;
+
+	asoc = sctp_id2assoc(sk, info.sinfo_assoc_id);
+	if (!asoc && info.sinfo_assoc_id && sctp_style(sk, UDP))
+		return -EINVAL;
+
+	if (asoc) {
+		asoc->default_stream = info.sinfo_stream;
+		asoc->default_flags = info.sinfo_flags;
+		asoc->default_ppid = info.sinfo_ppid;
+		asoc->default_context = info.sinfo_context;
+		asoc->default_timetolive = info.sinfo_timetolive;
+	} else {
+		sp->default_stream = info.sinfo_stream;
+		sp->default_flags = info.sinfo_flags;
+		sp->default_ppid = info.sinfo_ppid;
+		sp->default_context = info.sinfo_context;
+		sp->default_timetolive = info.sinfo_timetolive;
+	}
+
+	return 0;
+}
+
+/* 7.1.10 Set Primary Address (SCTP_PRIMARY_ADDR)
+ *
+ * Requests that the local SCTP stack use the enclosed peer address as
+ * the association primary.  The enclosed address must be one of the
+ * association peer's addresses.
+ */
+static int sctp_setsockopt_primary_addr(struct sock *sk, char __user *optval,
+					int optlen)
+{
+	struct sctp_prim prim;
+	struct sctp_transport *trans;
+
+	if (optlen != sizeof(struct sctp_prim))
+		return -EINVAL;
+
+	if (copy_from_user(&prim, optval, sizeof(struct sctp_prim)))
+		return -EFAULT;
+
+	trans = sctp_addr_id2transport(sk, &prim.ssp_addr, prim.ssp_assoc_id);
+	if (!trans)
+		return -EINVAL;
+
+	sctp_assoc_set_primary(trans->asoc, trans);
+
+	return 0;
+}
+
+/*
+ * 7.1.5 SCTP_NODELAY
+ *
+ * Turn on/off any Nagle-like algorithm.  This means that packets are
+ * generally sent as soon as possible and no unnecessary delays are
+ * introduced, at the cost of more packets in the network.  Expects an
+ *  integer boolean flag.
+ */
+static int sctp_setsockopt_nodelay(struct sock *sk, char __user *optval,
+					int optlen)
+{
+	int val;
+
+	if (optlen < sizeof(int))
+		return -EINVAL;
+	if (get_user(val, (int __user *)optval))
+		return -EFAULT;
+
+	sctp_sk(sk)->nodelay = (val == 0) ? 0 : 1;
+	return 0;
+}
+
+/*
+ *
+ * 7.1.1 SCTP_RTOINFO
+ *
+ * The protocol parameters used to initialize and bound retransmission
+ * timeout (RTO) are tunable. sctp_rtoinfo structure is used to access
+ * and modify these parameters.
+ * All parameters are time values, in milliseconds.  A value of 0, when
+ * modifying the parameters, indicates that the current value should not
+ * be changed.
+ *
+ */
+static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval, int optlen) {
+	struct sctp_rtoinfo rtoinfo;
+	struct sctp_association *asoc;
+
+	if (optlen != sizeof (struct sctp_rtoinfo))
+		return -EINVAL;
+
+	if (copy_from_user(&rtoinfo, optval, optlen))
+		return -EFAULT;
+
+	asoc = sctp_id2assoc(sk, rtoinfo.srto_assoc_id);
+
+	/* Set the values to the specific association */
+	if (!asoc && rtoinfo.srto_assoc_id && sctp_style(sk, UDP))
+		return -EINVAL;
+
+	if (asoc) {
+		if (rtoinfo.srto_initial != 0)
+			asoc->rto_initial = 
+				msecs_to_jiffies(rtoinfo.srto_initial);
+		if (rtoinfo.srto_max != 0)
+			asoc->rto_max = msecs_to_jiffies(rtoinfo.srto_max);
+		if (rtoinfo.srto_min != 0)
+			asoc->rto_min = msecs_to_jiffies(rtoinfo.srto_min);
+	} else {
+		/* If there is no association or the association-id = 0
+		 * set the values to the endpoint.
+		 */
+		struct sctp_sock *sp = sctp_sk(sk);
+
+		if (rtoinfo.srto_initial != 0)
+			sp->rtoinfo.srto_initial = rtoinfo.srto_initial;
+		if (rtoinfo.srto_max != 0)
+			sp->rtoinfo.srto_max = rtoinfo.srto_max;
+		if (rtoinfo.srto_min != 0)
+			sp->rtoinfo.srto_min = rtoinfo.srto_min;
+	}
+
+	return 0;
+}
+
+/*
+ *
+ * 7.1.2 SCTP_ASSOCINFO
+ *
+ * This option is used to tune the the maximum retransmission attempts
+ * of the association.
+ * Returns an error if the new association retransmission value is
+ * greater than the sum of the retransmission value  of the peer.
+ * See [SCTP] for more information.
+ *
+ */
+static int sctp_setsockopt_associnfo(struct sock *sk, char __user *optval, int optlen)
+{
+
+	struct sctp_assocparams assocparams;
+	struct sctp_association *asoc;
+
+	if (optlen != sizeof(struct sctp_assocparams))
+		return -EINVAL;
+	if (copy_from_user(&assocparams, optval, optlen))
+		return -EFAULT;
+
+	asoc = sctp_id2assoc(sk, assocparams.sasoc_assoc_id);
+
+	if (!asoc && assocparams.sasoc_assoc_id && sctp_style(sk, UDP))
+		return -EINVAL;
+
+	/* Set the values to the specific association */
+	if (asoc) {
+		if (assocparams.sasoc_asocmaxrxt != 0)
+			asoc->max_retrans = assocparams.sasoc_asocmaxrxt;
+		if (assocparams.sasoc_cookie_life != 0) {
+			asoc->cookie_life.tv_sec =
+					assocparams.sasoc_cookie_life / 1000;
+			asoc->cookie_life.tv_usec =
+					(assocparams.sasoc_cookie_life % 1000)
+					* 1000;
+		}
+	} else {
+		/* Set the values to the endpoint */
+		struct sctp_sock *sp = sctp_sk(sk);
+
+		if (assocparams.sasoc_asocmaxrxt != 0)
+			sp->assocparams.sasoc_asocmaxrxt =
+						assocparams.sasoc_asocmaxrxt;
+		if (assocparams.sasoc_cookie_life != 0)
+			sp->assocparams.sasoc_cookie_life =
+						assocparams.sasoc_cookie_life;
+	}
+	return 0;
+}
+
+/*
+ * 7.1.16 Set/clear IPv4 mapped addresses (SCTP_I_WANT_MAPPED_V4_ADDR)
+ *
+ * This socket option is a boolean flag which turns on or off mapped V4
+ * addresses.  If this option is turned on and the socket is type
+ * PF_INET6, then IPv4 addresses will be mapped to V6 representation.
+ * If this option is turned off, then no mapping will be done of V4
+ * addresses and a user will receive both PF_INET6 and PF_INET type
+ * addresses on the socket.
+ */
+static int sctp_setsockopt_mappedv4(struct sock *sk, char __user *optval, int optlen)
+{
+	int val;
+	struct sctp_sock *sp = sctp_sk(sk);
+
+	if (optlen < sizeof(int))
+		return -EINVAL;
+	if (get_user(val, (int __user *)optval))
+		return -EFAULT;
+	if (val)
+		sp->v4mapped = 1;
+	else
+		sp->v4mapped = 0;
+
+	return 0;
+}
+
+/*
+ * 7.1.17 Set the maximum fragrmentation size (SCTP_MAXSEG)
+ *
+ * This socket option specifies the maximum size to put in any outgoing
+ * SCTP chunk.  If a message is larger than this size it will be
+ * fragmented by SCTP into the specified size.  Note that the underlying
+ * SCTP implementation may fragment into smaller sized chunks when the
+ * PMTU of the underlying association is smaller than the value set by
+ * the user.
+ */
+static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, int optlen)
+{
+	struct sctp_association *asoc;
+	struct list_head *pos;
+	struct sctp_sock *sp = sctp_sk(sk);
+	int val;
+
+	if (optlen < sizeof(int))
+		return -EINVAL;
+	if (get_user(val, (int __user *)optval))
+		return -EFAULT;
+	if ((val < 8) || (val > SCTP_MAX_CHUNK_LEN))
+		return -EINVAL;
+	sp->user_frag = val;
+
+	if (val) {
+		/* Update the frag_point of the existing associations. */
+		list_for_each(pos, &(sp->ep->asocs)) {
+			asoc = list_entry(pos, struct sctp_association, asocs);
+			asoc->frag_point = sctp_frag_point(sp, asoc->pmtu); 
+		}
+	}
+
+	return 0;
+}
+
+
+/*
+ *  7.1.9 Set Peer Primary Address (SCTP_SET_PEER_PRIMARY_ADDR)
+ *
+ *   Requests that the peer mark the enclosed address as the association
+ *   primary. The enclosed address must be one of the association's
+ *   locally bound addresses. The following structure is used to make a
+ *   set primary request:
+ */
+static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optval,
+					     int optlen)
+{
+	struct sctp_sock	*sp;
+	struct sctp_endpoint	*ep;
+	struct sctp_association	*asoc = NULL;
+	struct sctp_setpeerprim	prim;
+	struct sctp_chunk	*chunk;
+	int 			err;
+
+	sp = sctp_sk(sk);
+	ep = sp->ep;
+
+	if (!sctp_addip_enable)
+		return -EPERM;
+
+	if (optlen != sizeof(struct sctp_setpeerprim))
+		return -EINVAL;
+
+	if (copy_from_user(&prim, optval, optlen))
+		return -EFAULT;
+
+	asoc = sctp_id2assoc(sk, prim.sspp_assoc_id);
+	if (!asoc) 
+		return -EINVAL;
+
+	if (!asoc->peer.asconf_capable)
+		return -EPERM;
+
+	if (asoc->peer.addip_disabled_mask & SCTP_PARAM_SET_PRIMARY)
+		return -EPERM;
+
+	if (!sctp_state(asoc, ESTABLISHED))
+		return -ENOTCONN;
+
+	if (!sctp_assoc_lookup_laddr(asoc, (union sctp_addr *)&prim.sspp_addr))
+		return -EADDRNOTAVAIL;
+
+	/* Create an ASCONF chunk with SET_PRIMARY parameter	*/
+	chunk = sctp_make_asconf_set_prim(asoc,
+					  (union sctp_addr *)&prim.sspp_addr);
+	if (!chunk)
+		return -ENOMEM;
+
+	err = sctp_send_asconf(asoc, chunk);
+
+	SCTP_DEBUG_PRINTK("We set peer primary addr primitively.\n");
+
+	return err;
+}
+
+static int sctp_setsockopt_adaption_layer(struct sock *sk, char __user *optval,
+					  int optlen)
+{
+	__u32 val;
+
+	if (optlen < sizeof(__u32))
+		return -EINVAL;
+	if (copy_from_user(&val, optval, sizeof(__u32)))
+		return -EFAULT;
+
+	sctp_sk(sk)->adaption_ind = val;
+
+	return 0;
+}
+
+/* API 6.2 setsockopt(), getsockopt()
+ *
+ * Applications use setsockopt() and getsockopt() to set or retrieve
+ * socket options.  Socket options are used to change the default
+ * behavior of sockets calls.  They are described in Section 7.
+ *
+ * The syntax is:
+ *
+ *   ret = getsockopt(int sd, int level, int optname, void __user *optval,
+ *                    int __user *optlen);
+ *   ret = setsockopt(int sd, int level, int optname, const void __user *optval,
+ *                    int optlen);
+ *
+ *   sd      - the socket descript.
+ *   level   - set to IPPROTO_SCTP for all SCTP options.
+ *   optname - the option name.
+ *   optval  - the buffer to store the value of the option.
+ *   optlen  - the size of the buffer.
+ */
+SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname,
+				char __user *optval, int optlen)
+{
+	int retval = 0;
+
+	SCTP_DEBUG_PRINTK("sctp_setsockopt(sk: %p... optname: %d)\n",
+			  sk, optname);
+
+	/* I can hardly begin to describe how wrong this is.  This is
+	 * so broken as to be worse than useless.  The API draft
+	 * REALLY is NOT helpful here...  I am not convinced that the
+	 * semantics of setsockopt() with a level OTHER THAN SOL_SCTP
+	 * are at all well-founded.
+	 */
+	if (level != SOL_SCTP) {
+		struct sctp_af *af = sctp_sk(sk)->pf->af;
+		retval = af->setsockopt(sk, level, optname, optval, optlen);
+		goto out_nounlock;
+	}
+
+	sctp_lock_sock(sk);
+
+	switch (optname) {
+	case SCTP_SOCKOPT_BINDX_ADD:
+		/* 'optlen' is the size of the addresses buffer. */
+		retval = sctp_setsockopt_bindx(sk, (struct sockaddr __user *)optval,
+					       optlen, SCTP_BINDX_ADD_ADDR);
+		break;
+
+	case SCTP_SOCKOPT_BINDX_REM:
+		/* 'optlen' is the size of the addresses buffer. */
+		retval = sctp_setsockopt_bindx(sk, (struct sockaddr __user *)optval,
+					       optlen, SCTP_BINDX_REM_ADDR);
+		break;
+
+	case SCTP_DISABLE_FRAGMENTS:
+		retval = sctp_setsockopt_disable_fragments(sk, optval, optlen);
+		break;
+
+	case SCTP_EVENTS:
+		retval = sctp_setsockopt_events(sk, optval, optlen);
+		break;
+
+	case SCTP_AUTOCLOSE:
+		retval = sctp_setsockopt_autoclose(sk, optval, optlen);
+		break;
+
+	case SCTP_PEER_ADDR_PARAMS:
+		retval = sctp_setsockopt_peer_addr_params(sk, optval, optlen);
+		break;
+
+	case SCTP_INITMSG:
+		retval = sctp_setsockopt_initmsg(sk, optval, optlen);
+		break;
+	case SCTP_DEFAULT_SEND_PARAM:
+		retval = sctp_setsockopt_default_send_param(sk, optval,
+							    optlen);
+		break;
+	case SCTP_PRIMARY_ADDR:
+		retval = sctp_setsockopt_primary_addr(sk, optval, optlen);
+		break;
+	case SCTP_SET_PEER_PRIMARY_ADDR:
+		retval = sctp_setsockopt_peer_primary_addr(sk, optval, optlen);
+		break;
+	case SCTP_NODELAY:
+		retval = sctp_setsockopt_nodelay(sk, optval, optlen);
+		break;
+	case SCTP_RTOINFO:
+		retval = sctp_setsockopt_rtoinfo(sk, optval, optlen);
+		break;
+	case SCTP_ASSOCINFO:
+		retval = sctp_setsockopt_associnfo(sk, optval, optlen);
+		break;
+	case SCTP_I_WANT_MAPPED_V4_ADDR:
+		retval = sctp_setsockopt_mappedv4(sk, optval, optlen);
+		break;
+	case SCTP_MAXSEG:
+		retval = sctp_setsockopt_maxseg(sk, optval, optlen);
+		break;
+	case SCTP_ADAPTION_LAYER:
+		retval = sctp_setsockopt_adaption_layer(sk, optval, optlen);
+		break;
+
+	default:
+		retval = -ENOPROTOOPT;
+		break;
+	};
+
+	sctp_release_sock(sk);
+
+out_nounlock:
+	return retval;
+}
+
+/* API 3.1.6 connect() - UDP Style Syntax
+ *
+ * An application may use the connect() call in the UDP model to initiate an
+ * association without sending data.
+ *
+ * The syntax is:
+ *
+ * ret = connect(int sd, const struct sockaddr *nam, socklen_t len);
+ *
+ * sd: the socket descriptor to have a new association added to.
+ *
+ * nam: the address structure (either struct sockaddr_in or struct
+ *    sockaddr_in6 defined in RFC2553 [7]).
+ *
+ * len: the size of the address.
+ */
+SCTP_STATIC int sctp_connect(struct sock *sk, struct sockaddr *uaddr,
+			     int addr_len)
+{
+	struct sctp_sock *sp;
+	struct sctp_endpoint *ep;
+	struct sctp_association *asoc;
+	struct sctp_transport *transport;
+	union sctp_addr to;
+	struct sctp_af *af;
+	sctp_scope_t scope;
+	long timeo;
+	int err = 0;
+
+	sctp_lock_sock(sk);
+
+	SCTP_DEBUG_PRINTK("%s - sk: %p, sockaddr: %p, addr_len: %d)\n",
+			  __FUNCTION__, sk, uaddr, addr_len);
+
+	sp = sctp_sk(sk);
+	ep = sp->ep;
+
+	/* connect() cannot be done on a socket that is already in ESTABLISHED
+	 * state - UDP-style peeled off socket or a TCP-style socket that
+	 * is already connected.
+	 * It cannot be done even on a TCP-style listening socket.
+	 */
+	if (sctp_sstate(sk, ESTABLISHED) ||
+	    (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))) {
+		err = -EISCONN;
+		goto out_unlock;
+	}
+
+	err = sctp_verify_addr(sk, (union sctp_addr *)uaddr, addr_len);
+	if (err)
+		goto out_unlock;
+
+	if (addr_len > sizeof(to))
+		addr_len = sizeof(to);
+	memcpy(&to, uaddr, addr_len);
+	to.v4.sin_port = ntohs(to.v4.sin_port);
+
+	asoc = sctp_endpoint_lookup_assoc(ep, &to, &transport);
+	if (asoc) {
+		if (asoc->state >= SCTP_STATE_ESTABLISHED)
+			err = -EISCONN;
+		else
+			err = -EALREADY;
+		goto out_unlock;
+	}
+
+	/* If we could not find a matching association on the endpoint,
+	 * make sure that there is no peeled-off association matching the
+	 * peer address even on another socket.
+	 */
+	if (sctp_endpoint_is_peeled_off(ep, &to)) {
+		err = -EADDRNOTAVAIL;
+		goto out_unlock;
+	}
+
+	/* If a bind() or sctp_bindx() is not called prior to a connect()
+	 * call, the system picks an ephemeral port and will choose an address
+	 * set equivalent to binding with a wildcard address.
+	 */
+	if (!ep->base.bind_addr.port) {
+		if (sctp_autobind(sk)) {
+			err = -EAGAIN;
+			goto out_unlock;
+		}
+	}
+
+	scope = sctp_scope(&to);
+	asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL);
+	if (!asoc) {
+		err = -ENOMEM;
+		goto out_unlock;
+  	}
+
+	/* Prime the peer's transport structures.  */
+	transport = sctp_assoc_add_peer(asoc, &to, GFP_KERNEL);
+	if (!transport) {
+		sctp_association_free(asoc);
+		goto out_unlock;
+	}
+	err = sctp_assoc_set_bind_addr_from_ep(asoc, GFP_KERNEL);
+	if (err < 0) {
+		sctp_association_free(asoc);
+		goto out_unlock;
+	}
+
+	err = sctp_primitive_ASSOCIATE(asoc, NULL);
+	if (err < 0) {
+		sctp_association_free(asoc);
+		goto out_unlock;
+	}
+
+	/* Initialize sk's dport and daddr for getpeername() */
+	inet_sk(sk)->dport = htons(asoc->peer.port);
+	af = sctp_get_af_specific(to.sa.sa_family);
+	af->to_sk_daddr(&to, sk);
+
+	timeo = sock_sndtimeo(sk, sk->sk_socket->file->f_flags & O_NONBLOCK);
+	err = sctp_wait_for_connect(asoc, &timeo);
+
+out_unlock:
+	sctp_release_sock(sk);
+
+	return err;
+}
+
+/* FIXME: Write comments. */
+SCTP_STATIC int sctp_disconnect(struct sock *sk, int flags)
+{
+	return -EOPNOTSUPP; /* STUB */
+}
+
+/* 4.1.4 accept() - TCP Style Syntax
+ *
+ * Applications use accept() call to remove an established SCTP
+ * association from the accept queue of the endpoint.  A new socket
+ * descriptor will be returned from accept() to represent the newly
+ * formed association.
+ */
+SCTP_STATIC struct sock *sctp_accept(struct sock *sk, int flags, int *err)
+{
+	struct sctp_sock *sp;
+	struct sctp_endpoint *ep;
+	struct sock *newsk = NULL;
+	struct sctp_association *asoc;
+	long timeo;
+	int error = 0;
+
+	sctp_lock_sock(sk);
+
+	sp = sctp_sk(sk);
+	ep = sp->ep;
+
+	if (!sctp_style(sk, TCP)) {
+		error = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (!sctp_sstate(sk, LISTENING)) {
+		error = -EINVAL;
+		goto out;
+	}
+
+	timeo = sock_rcvtimeo(sk, sk->sk_socket->file->f_flags & O_NONBLOCK);
+
+	error = sctp_wait_for_accept(sk, timeo);
+	if (error)
+		goto out;
+
+	/* We treat the list of associations on the endpoint as the accept
+	 * queue and pick the first association on the list.
+	 */
+	asoc = list_entry(ep->asocs.next, struct sctp_association, asocs);
+
+	newsk = sp->pf->create_accept_sk(sk, asoc);
+	if (!newsk) {
+		error = -ENOMEM;
+		goto out;
+	}
+
+	/* Populate the fields of the newsk from the oldsk and migrate the
+	 * asoc to the newsk.
+	 */
+	sctp_sock_migrate(sk, newsk, asoc, SCTP_SOCKET_TCP);
+
+out:
+	sctp_release_sock(sk);
+ 	*err = error;
+	return newsk;
+}
+
+/* The SCTP ioctl handler. */
+SCTP_STATIC int sctp_ioctl(struct sock *sk, int cmd, unsigned long arg)
+{
+	return -ENOIOCTLCMD;
+}
+
+/* This is the function which gets called during socket creation to
+ * initialized the SCTP-specific portion of the sock.
+ * The sock structure should already be zero-filled memory.
+ */
+SCTP_STATIC int sctp_init_sock(struct sock *sk)
+{
+	struct sctp_endpoint *ep;
+	struct sctp_sock *sp;
+
+	SCTP_DEBUG_PRINTK("sctp_init_sock(sk: %p)\n", sk);
+
+	sp = sctp_sk(sk);
+
+	/* Initialize the SCTP per socket area.  */
+	switch (sk->sk_type) {
+	case SOCK_SEQPACKET:
+		sp->type = SCTP_SOCKET_UDP;
+		break;
+	case SOCK_STREAM:
+		sp->type = SCTP_SOCKET_TCP;
+		break;
+	default:
+		return -ESOCKTNOSUPPORT;
+	}
+
+	/* Initialize default send parameters. These parameters can be
+	 * modified with the SCTP_DEFAULT_SEND_PARAM socket option.
+	 */
+	sp->default_stream = 0;
+	sp->default_ppid = 0;
+	sp->default_flags = 0;
+	sp->default_context = 0;
+	sp->default_timetolive = 0;
+
+	/* Initialize default setup parameters. These parameters
+	 * can be modified with the SCTP_INITMSG socket option or
+	 * overridden by the SCTP_INIT CMSG.
+	 */
+	sp->initmsg.sinit_num_ostreams   = sctp_max_outstreams;
+	sp->initmsg.sinit_max_instreams  = sctp_max_instreams;
+	sp->initmsg.sinit_max_attempts   = sctp_max_retrans_init;
+	sp->initmsg.sinit_max_init_timeo = jiffies_to_msecs(sctp_rto_max);
+
+	/* Initialize default RTO related parameters.  These parameters can
+	 * be modified for with the SCTP_RTOINFO socket option.
+	 */
+	sp->rtoinfo.srto_initial = jiffies_to_msecs(sctp_rto_initial);
+	sp->rtoinfo.srto_max     = jiffies_to_msecs(sctp_rto_max);
+	sp->rtoinfo.srto_min     = jiffies_to_msecs(sctp_rto_min);
+
+	/* Initialize default association related parameters. These parameters
+	 * can be modified with the SCTP_ASSOCINFO socket option.
+	 */
+	sp->assocparams.sasoc_asocmaxrxt = sctp_max_retrans_association;
+	sp->assocparams.sasoc_number_peer_destinations = 0;
+	sp->assocparams.sasoc_peer_rwnd = 0;
+	sp->assocparams.sasoc_local_rwnd = 0;
+	sp->assocparams.sasoc_cookie_life = 
+		jiffies_to_msecs(sctp_valid_cookie_life);
+
+	/* Initialize default event subscriptions. By default, all the
+	 * options are off. 
+	 */
+	memset(&sp->subscribe, 0, sizeof(struct sctp_event_subscribe));
+
+	/* Default Peer Address Parameters.  These defaults can
+	 * be modified via SCTP_PEER_ADDR_PARAMS
+	 */
+	sp->paddrparam.spp_hbinterval = jiffies_to_msecs(sctp_hb_interval);
+	sp->paddrparam.spp_pathmaxrxt = sctp_max_retrans_path;
+
+	/* If enabled no SCTP message fragmentation will be performed.
+	 * Configure through SCTP_DISABLE_FRAGMENTS socket option.
+	 */
+	sp->disable_fragments = 0;
+
+	/* Turn on/off any Nagle-like algorithm.  */
+	sp->nodelay           = 1;
+
+	/* Enable by default. */
+	sp->v4mapped          = 1;
+
+	/* Auto-close idle associations after the configured
+	 * number of seconds.  A value of 0 disables this
+	 * feature.  Configure through the SCTP_AUTOCLOSE socket option,
+	 * for UDP-style sockets only.
+	 */
+	sp->autoclose         = 0;
+
+	/* User specified fragmentation limit. */
+	sp->user_frag         = 0;
+
+	sp->adaption_ind = 0;
+
+	sp->pf = sctp_get_pf_specific(sk->sk_family);
+
+	/* Control variables for partial data delivery. */
+	sp->pd_mode           = 0;
+	skb_queue_head_init(&sp->pd_lobby);
+
+	/* Create a per socket endpoint structure.  Even if we
+	 * change the data structure relationships, this may still
+	 * be useful for storing pre-connect address information.
+	 */
+	ep = sctp_endpoint_new(sk, GFP_KERNEL);
+	if (!ep)
+		return -ENOMEM;
+
+	sp->ep = ep;
+	sp->hmac = NULL;
+
+	SCTP_DBG_OBJCNT_INC(sock);
+	return 0;
+}
+
+/* Cleanup any SCTP per socket resources.  */
+SCTP_STATIC int sctp_destroy_sock(struct sock *sk)
+{
+	struct sctp_endpoint *ep;
+
+	SCTP_DEBUG_PRINTK("sctp_destroy_sock(sk: %p)\n", sk);
+
+	/* Release our hold on the endpoint. */
+	ep = sctp_sk(sk)->ep;
+	sctp_endpoint_free(ep);
+
+	return 0;
+}
+
+/* API 4.1.7 shutdown() - TCP Style Syntax
+ *     int shutdown(int socket, int how);
+ *
+ *     sd      - the socket descriptor of the association to be closed.
+ *     how     - Specifies the type of shutdown.  The  values  are
+ *               as follows:
+ *               SHUT_RD
+ *                     Disables further receive operations. No SCTP
+ *                     protocol action is taken.
+ *               SHUT_WR
+ *                     Disables further send operations, and initiates
+ *                     the SCTP shutdown sequence.
+ *               SHUT_RDWR
+ *                     Disables further send  and  receive  operations
+ *                     and initiates the SCTP shutdown sequence.
+ */
+SCTP_STATIC void sctp_shutdown(struct sock *sk, int how)
+{
+	struct sctp_endpoint *ep;
+	struct sctp_association *asoc;
+
+	if (!sctp_style(sk, TCP))
+		return;
+
+	if (how & SEND_SHUTDOWN) {
+		ep = sctp_sk(sk)->ep;
+		if (!list_empty(&ep->asocs)) {
+			asoc = list_entry(ep->asocs.next,
+					  struct sctp_association, asocs);
+			sctp_primitive_SHUTDOWN(asoc, NULL);
+		}
+	}
+}
+
+/* 7.2.1 Association Status (SCTP_STATUS)
+
+ * Applications can retrieve current status information about an
+ * association, including association state, peer receiver window size,
+ * number of unacked data chunks, and number of data chunks pending
+ * receipt.  This information is read-only.
+ */
+static int sctp_getsockopt_sctp_status(struct sock *sk, int len,
+				       char __user *optval,
+				       int __user *optlen)
+{
+	struct sctp_status status;
+	struct sctp_association *asoc = NULL;
+	struct sctp_transport *transport;
+	sctp_assoc_t associd;
+	int retval = 0;
+
+	if (len != sizeof(status)) {
+		retval = -EINVAL;
+		goto out;
+	}
+
+	if (copy_from_user(&status, optval, sizeof(status))) {
+		retval = -EFAULT;
+		goto out;
+	}
+
+	associd = status.sstat_assoc_id;
+	asoc = sctp_id2assoc(sk, associd);
+	if (!asoc) {
+		retval = -EINVAL;
+		goto out;
+	}
+
+	transport = asoc->peer.primary_path;
+
+	status.sstat_assoc_id = sctp_assoc2id(asoc);
+	status.sstat_state = asoc->state;
+	status.sstat_rwnd =  asoc->peer.rwnd;
+	status.sstat_unackdata = asoc->unack_data;
+
+	status.sstat_penddata = sctp_tsnmap_pending(&asoc->peer.tsn_map);
+	status.sstat_instrms = asoc->c.sinit_max_instreams;
+	status.sstat_outstrms = asoc->c.sinit_num_ostreams;
+	status.sstat_fragmentation_point = asoc->frag_point;
+	status.sstat_primary.spinfo_assoc_id = sctp_assoc2id(transport->asoc);
+	memcpy(&status.sstat_primary.spinfo_address,
+	       &(transport->ipaddr), sizeof(union sctp_addr));
+	/* Map ipv4 address into v4-mapped-on-v6 address.  */
+	sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk),
+		(union sctp_addr *)&status.sstat_primary.spinfo_address);
+	status.sstat_primary.spinfo_state = transport->active;
+	status.sstat_primary.spinfo_cwnd = transport->cwnd;
+	status.sstat_primary.spinfo_srtt = transport->srtt;
+	status.sstat_primary.spinfo_rto = jiffies_to_msecs(transport->rto);
+	status.sstat_primary.spinfo_mtu = transport->pmtu;
+
+	if (put_user(len, optlen)) {
+		retval = -EFAULT;
+		goto out;
+	}
+
+	SCTP_DEBUG_PRINTK("sctp_getsockopt_sctp_status(%d): %d %d %d\n",
+			  len, status.sstat_state, status.sstat_rwnd,
+			  status.sstat_assoc_id);
+
+	if (copy_to_user(optval, &status, len)) {
+		retval = -EFAULT;
+		goto out;
+	}
+
+out:
+	return (retval);
+}
+
+
+/* 7.2.2 Peer Address Information (SCTP_GET_PEER_ADDR_INFO)
+ *
+ * Applications can retrieve information about a specific peer address
+ * of an association, including its reachability state, congestion
+ * window, and retransmission timer values.  This information is
+ * read-only.
+ */
+static int sctp_getsockopt_peer_addr_info(struct sock *sk, int len,
+					  char __user *optval,
+					  int __user *optlen)
+{
+	struct sctp_paddrinfo pinfo;
+	struct sctp_transport *transport;
+	int retval = 0;
+
+	if (len != sizeof(pinfo)) {
+		retval = -EINVAL;
+		goto out;
+	}
+
+	if (copy_from_user(&pinfo, optval, sizeof(pinfo))) {
+		retval = -EFAULT;
+		goto out;
+	}
+
+	transport = sctp_addr_id2transport(sk, &pinfo.spinfo_address,
+					   pinfo.spinfo_assoc_id);
+	if (!transport)
+		return -EINVAL;
+
+	pinfo.spinfo_assoc_id = sctp_assoc2id(transport->asoc);
+	pinfo.spinfo_state = transport->active;
+	pinfo.spinfo_cwnd = transport->cwnd;
+	pinfo.spinfo_srtt = transport->srtt;
+	pinfo.spinfo_rto = jiffies_to_msecs(transport->rto);
+	pinfo.spinfo_mtu = transport->pmtu;
+
+	if (put_user(len, optlen)) {
+		retval = -EFAULT;
+		goto out;
+	}
+
+	if (copy_to_user(optval, &pinfo, len)) {
+		retval = -EFAULT;
+		goto out;
+	}
+
+out:
+	return (retval);
+}
+
+/* 7.1.12 Enable/Disable message fragmentation (SCTP_DISABLE_FRAGMENTS)
+ *
+ * This option is a on/off flag.  If enabled no SCTP message
+ * fragmentation will be performed.  Instead if a message being sent
+ * exceeds the current PMTU size, the message will NOT be sent and
+ * instead a error will be indicated to the user.
+ */
+static int sctp_getsockopt_disable_fragments(struct sock *sk, int len,
+					char __user *optval, int __user *optlen)
+{
+	int val;
+
+	if (len < sizeof(int))
+		return -EINVAL;
+
+	len = sizeof(int);
+	val = (sctp_sk(sk)->disable_fragments == 1);
+	if (put_user(len, optlen))
+		return -EFAULT;
+	if (copy_to_user(optval, &val, len))
+		return -EFAULT;
+	return 0;
+}
+
+/* 7.1.15 Set notification and ancillary events (SCTP_EVENTS)
+ *
+ * This socket option is used to specify various notifications and
+ * ancillary data the user wishes to receive.
+ */
+static int sctp_getsockopt_events(struct sock *sk, int len, char __user *optval,
+				  int __user *optlen)
+{
+	if (len != sizeof(struct sctp_event_subscribe))
+		return -EINVAL;
+	if (copy_to_user(optval, &sctp_sk(sk)->subscribe, len))
+		return -EFAULT;
+	return 0;
+}
+
+/* 7.1.8 Automatic Close of associations (SCTP_AUTOCLOSE)
+ *
+ * This socket option is applicable to the UDP-style socket only.  When
+ * set it will cause associations that are idle for more than the
+ * specified number of seconds to automatically close.  An association
+ * being idle is defined an association that has NOT sent or received
+ * user data.  The special value of '0' indicates that no automatic
+ * close of any associations should be performed.  The option expects an
+ * integer defining the number of seconds of idle time before an
+ * association is closed.
+ */
+static int sctp_getsockopt_autoclose(struct sock *sk, int len, char __user *optval, int __user *optlen)
+{
+	/* Applicable to UDP-style socket only */
+	if (sctp_style(sk, TCP))
+		return -EOPNOTSUPP;
+	if (len != sizeof(int))
+		return -EINVAL;
+	if (copy_to_user(optval, &sctp_sk(sk)->autoclose, len))
+		return -EFAULT;
+	return 0;
+}
+
+/* Helper routine to branch off an association to a new socket.  */
+SCTP_STATIC int sctp_do_peeloff(struct sctp_association *asoc,
+				struct socket **sockp)
+{
+	struct sock *sk = asoc->base.sk;
+	struct socket *sock;
+	int err = 0;
+
+	/* An association cannot be branched off from an already peeled-off
+	 * socket, nor is this supported for tcp style sockets.
+	 */
+	if (!sctp_style(sk, UDP))
+		return -EINVAL;
+
+	/* Create a new socket.  */
+	err = sock_create(sk->sk_family, SOCK_SEQPACKET, IPPROTO_SCTP, &sock);
+	if (err < 0)
+		return err;
+
+	/* Populate the fields of the newsk from the oldsk and migrate the
+	 * asoc to the newsk.
+	 */
+	sctp_sock_migrate(sk, sock->sk, asoc, SCTP_SOCKET_UDP_HIGH_BANDWIDTH);
+	*sockp = sock;
+
+	return err;
+}
+
+static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval, int __user *optlen)
+{
+	sctp_peeloff_arg_t peeloff;
+	struct socket *newsock;
+	int retval = 0;
+	struct sctp_association *asoc;
+
+	if (len != sizeof(sctp_peeloff_arg_t))
+		return -EINVAL;
+	if (copy_from_user(&peeloff, optval, len))
+		return -EFAULT;
+
+	asoc = sctp_id2assoc(sk, peeloff.associd);
+	if (!asoc) {
+		retval = -EINVAL;
+		goto out;
+	}
+
+	SCTP_DEBUG_PRINTK("%s: sk: %p asoc: %p\n", __FUNCTION__, sk, asoc);
+
+	retval = sctp_do_peeloff(asoc, &newsock);
+	if (retval < 0)
+		goto out;
+
+	/* Map the socket to an unused fd that can be returned to the user.  */
+	retval = sock_map_fd(newsock);
+	if (retval < 0) {
+		sock_release(newsock);
+		goto out;
+	}
+
+	SCTP_DEBUG_PRINTK("%s: sk: %p asoc: %p newsk: %p sd: %d\n",
+			  __FUNCTION__, sk, asoc, newsock->sk, retval);
+
+	/* Return the fd mapped to the new socket.  */
+	peeloff.sd = retval;
+	if (copy_to_user(optval, &peeloff, len))
+		retval = -EFAULT;
+
+out:
+	return retval;
+}
+
+/* 7.1.13 Peer Address Parameters (SCTP_PEER_ADDR_PARAMS)
+ *
+ * Applications can enable or disable heartbeats for any peer address of
+ * an association, modify an address's heartbeat interval, force a
+ * heartbeat to be sent immediately, and adjust the address's maximum
+ * number of retransmissions sent before an address is considered
+ * unreachable.  The following structure is used to access and modify an
+ * address's parameters:
+ *
+ *  struct sctp_paddrparams {
+ *      sctp_assoc_t            spp_assoc_id;
+ *      struct sockaddr_storage spp_address;
+ *      uint32_t                spp_hbinterval;
+ *      uint16_t                spp_pathmaxrxt;
+ *  };
+ *
+ *   spp_assoc_id    - (UDP style socket) This is filled in the application,
+ *                     and identifies the association for this query.
+ *   spp_address     - This specifies which address is of interest.
+ *   spp_hbinterval  - This contains the value of the heartbeat interval,
+ *                     in milliseconds.  A value of 0, when modifying the
+ *                     parameter, specifies that the heartbeat on this
+ *                     address should be disabled. A value of UINT32_MAX
+ *                     (4294967295), when modifying the parameter,
+ *                     specifies that a heartbeat should be sent
+ *                     immediately to the peer address, and the current
+ *                     interval should remain unchanged.
+ *   spp_pathmaxrxt  - This contains the maximum number of
+ *                     retransmissions before this address shall be
+ *                     considered unreachable.
+ */
+static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len,
+						char __user *optval, int __user *optlen)
+{
+	struct sctp_paddrparams params;
+	struct sctp_transport *trans;
+
+	if (len != sizeof(struct sctp_paddrparams))
+		return -EINVAL;
+	if (copy_from_user(&params, optval, len))
+		return -EFAULT;
+
+	/* If no association id is specified retrieve the default value
+	 * for the endpoint that will be used for all future associations
+	 */
+	if (!params.spp_assoc_id &&
+	    sctp_is_any(( union sctp_addr *)&params.spp_address)) {
+		params.spp_hbinterval = sctp_sk(sk)->paddrparam.spp_hbinterval;
+		params.spp_pathmaxrxt = sctp_sk(sk)->paddrparam.spp_pathmaxrxt;
+
+		goto done;
+	}
+
+	trans = sctp_addr_id2transport(sk, &params.spp_address,
+				       params.spp_assoc_id);
+	if (!trans)
+		return -EINVAL;
+
+	/* The value of the heartbeat interval, in milliseconds. A value of 0,
+	 * when modifying the parameter, specifies that the heartbeat on this
+	 * address should be disabled.
+	 */
+	if (!trans->hb_allowed)
+		params.spp_hbinterval = 0;
+	else
+		params.spp_hbinterval = jiffies_to_msecs(trans->hb_interval);
+
+	/* spp_pathmaxrxt contains the maximum number of retransmissions
+	 * before this address shall be considered unreachable.
+	 */
+	params.spp_pathmaxrxt = trans->max_retrans;
+
+done:
+	if (copy_to_user(optval, &params, len))
+		return -EFAULT;
+
+	if (put_user(len, optlen))
+		return -EFAULT;
+
+	return 0;
+}
+
+/* 7.1.3 Initialization Parameters (SCTP_INITMSG)
+ *
+ * Applications can specify protocol parameters for the default association
+ * initialization.  The option name argument to setsockopt() and getsockopt()
+ * is SCTP_INITMSG.
+ *
+ * Setting initialization parameters is effective only on an unconnected
+ * socket (for UDP-style sockets only future associations are effected
+ * by the change).  With TCP-style sockets, this option is inherited by
+ * sockets derived from a listener socket.
+ */
+static int sctp_getsockopt_initmsg(struct sock *sk, int len, char __user *optval, int __user *optlen)
+{
+	if (len != sizeof(struct sctp_initmsg))
+		return -EINVAL;
+	if (copy_to_user(optval, &sctp_sk(sk)->initmsg, len))
+		return -EFAULT;
+	return 0;
+}
+
+static int sctp_getsockopt_peer_addrs_num(struct sock *sk, int len,
+					  char __user *optval, int __user *optlen)
+{
+	sctp_assoc_t id;
+	struct sctp_association *asoc;
+	struct list_head *pos;
+	int cnt = 0;
+
+	if (len != sizeof(sctp_assoc_t))
+		return -EINVAL;
+
+	if (copy_from_user(&id, optval, sizeof(sctp_assoc_t)))
+		return -EFAULT;
+
+	/* For UDP-style sockets, id specifies the association to query.  */
+	asoc = sctp_id2assoc(sk, id);
+	if (!asoc)
+		return -EINVAL;
+
+	list_for_each(pos, &asoc->peer.transport_addr_list) {
+		cnt ++;
+	}
+
+	return cnt;
+}
+
+static int sctp_getsockopt_peer_addrs(struct sock *sk, int len,
+				      char __user *optval, int __user *optlen)
+{
+	struct sctp_association *asoc;
+	struct list_head *pos;
+	int cnt = 0;
+	struct sctp_getaddrs getaddrs;
+	struct sctp_transport *from;
+	void __user *to;
+	union sctp_addr temp;
+	struct sctp_sock *sp = sctp_sk(sk);
+	int addrlen;
+
+	if (len != sizeof(struct sctp_getaddrs))
+		return -EINVAL;
+
+	if (copy_from_user(&getaddrs, optval, sizeof(struct sctp_getaddrs)))
+		return -EFAULT;
+
+	if (getaddrs.addr_num <= 0) return -EINVAL;
+
+	/* For UDP-style sockets, id specifies the association to query.  */
+	asoc = sctp_id2assoc(sk, getaddrs.assoc_id);
+	if (!asoc)
+		return -EINVAL;
+
+	to = (void __user *)getaddrs.addrs;
+	list_for_each(pos, &asoc->peer.transport_addr_list) {
+		from = list_entry(pos, struct sctp_transport, transports);
+		memcpy(&temp, &from->ipaddr, sizeof(temp));
+		sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
+		addrlen = sctp_get_af_specific(sk->sk_family)->sockaddr_len;
+		temp.v4.sin_port = htons(temp.v4.sin_port);
+		if (copy_to_user(to, &temp, addrlen))
+			return -EFAULT;
+		to += addrlen ;
+		cnt ++;
+		if (cnt >= getaddrs.addr_num) break;
+	}
+	getaddrs.addr_num = cnt;
+	if (copy_to_user(optval, &getaddrs, sizeof(struct sctp_getaddrs)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int sctp_getsockopt_local_addrs_num(struct sock *sk, int len,
+						char __user *optval,
+						int __user *optlen)
+{
+	sctp_assoc_t id;
+	struct sctp_bind_addr *bp;
+	struct sctp_association *asoc;
+	struct list_head *pos;
+	struct sctp_sockaddr_entry *addr;
+	rwlock_t *addr_lock;
+	unsigned long flags;
+	int cnt = 0;
+
+	if (len != sizeof(sctp_assoc_t))
+		return -EINVAL;
+
+	if (copy_from_user(&id, optval, sizeof(sctp_assoc_t)))
+		return -EFAULT;
+
+	/*
+	 *  For UDP-style sockets, id specifies the association to query.
+	 *  If the id field is set to the value '0' then the locally bound
+	 *  addresses are returned without regard to any particular
+	 *  association.
+	 */
+	if (0 == id) {
+		bp = &sctp_sk(sk)->ep->base.bind_addr;
+		addr_lock = &sctp_sk(sk)->ep->base.addr_lock;
+	} else {
+		asoc = sctp_id2assoc(sk, id);
+		if (!asoc)
+			return -EINVAL;
+		bp = &asoc->base.bind_addr;
+		addr_lock = &asoc->base.addr_lock;
+	}
+
+	sctp_read_lock(addr_lock);
+
+	/* If the endpoint is bound to 0.0.0.0 or ::0, count the valid
+	 * addresses from the global local address list.
+	 */
+	if (sctp_list_single_entry(&bp->address_list)) {
+		addr = list_entry(bp->address_list.next,
+				  struct sctp_sockaddr_entry, list);
+		if (sctp_is_any(&addr->a)) {
+			sctp_spin_lock_irqsave(&sctp_local_addr_lock, flags);
+			list_for_each(pos, &sctp_local_addr_list) {
+				addr = list_entry(pos,
+						  struct sctp_sockaddr_entry,
+						  list);
+				if ((PF_INET == sk->sk_family) && 
+				    (AF_INET6 == addr->a.sa.sa_family))	
+					continue;
+				cnt++;
+			}
+			sctp_spin_unlock_irqrestore(&sctp_local_addr_lock,
+						    flags);
+		} else {
+			cnt = 1;
+		}
+		goto done;
+	}
+
+	list_for_each(pos, &bp->address_list) {
+		cnt ++;
+	}
+
+done:
+	sctp_read_unlock(addr_lock);
+	return cnt;
+}
+
+/* Helper function that copies local addresses to user and returns the number
+ * of addresses copied.
+ */
+static int sctp_copy_laddrs_to_user(struct sock *sk, __u16 port, int max_addrs,
+				    void __user *to)
+{
+	struct list_head *pos;
+	struct sctp_sockaddr_entry *addr;
+	unsigned long flags;
+	union sctp_addr temp;
+	int cnt = 0;
+	int addrlen;
+
+	sctp_spin_lock_irqsave(&sctp_local_addr_lock, flags);
+	list_for_each(pos, &sctp_local_addr_list) {
+		addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+		if ((PF_INET == sk->sk_family) && 
+		    (AF_INET6 == addr->a.sa.sa_family))
+			continue;
+		memcpy(&temp, &addr->a, sizeof(temp));
+		sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk),
+								&temp);
+		addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
+		temp.v4.sin_port = htons(port);
+		if (copy_to_user(to, &temp, addrlen)) {
+			sctp_spin_unlock_irqrestore(&sctp_local_addr_lock,
+						    flags);
+			return -EFAULT;
+		}
+		to += addrlen;
+		cnt ++;
+		if (cnt >= max_addrs) break;
+	}
+	sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, flags);
+
+	return cnt;
+}
+
+static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
+				       char __user *optval, int __user *optlen)
+{
+	struct sctp_bind_addr *bp;
+	struct sctp_association *asoc;
+	struct list_head *pos;
+	int cnt = 0;
+	struct sctp_getaddrs getaddrs;
+	struct sctp_sockaddr_entry *addr;
+	void __user *to;
+	union sctp_addr temp;
+	struct sctp_sock *sp = sctp_sk(sk);
+	int addrlen;
+	rwlock_t *addr_lock;
+	int err = 0;
+
+	if (len != sizeof(struct sctp_getaddrs))
+		return -EINVAL;
+
+	if (copy_from_user(&getaddrs, optval, sizeof(struct sctp_getaddrs)))
+		return -EFAULT;
+
+	if (getaddrs.addr_num <= 0) return -EINVAL;
+	/*
+	 *  For UDP-style sockets, id specifies the association to query.
+	 *  If the id field is set to the value '0' then the locally bound
+	 *  addresses are returned without regard to any particular
+	 *  association.
+	 */
+	if (0 == getaddrs.assoc_id) {
+		bp = &sctp_sk(sk)->ep->base.bind_addr;
+		addr_lock = &sctp_sk(sk)->ep->base.addr_lock;
+	} else {
+		asoc = sctp_id2assoc(sk, getaddrs.assoc_id);
+		if (!asoc)
+			return -EINVAL;
+		bp = &asoc->base.bind_addr;
+		addr_lock = &asoc->base.addr_lock;
+	}
+
+	to = getaddrs.addrs;
+
+	sctp_read_lock(addr_lock);
+
+	/* If the endpoint is bound to 0.0.0.0 or ::0, get the valid
+	 * addresses from the global local address list.
+	 */
+	if (sctp_list_single_entry(&bp->address_list)) {
+		addr = list_entry(bp->address_list.next,
+				  struct sctp_sockaddr_entry, list);
+		if (sctp_is_any(&addr->a)) {
+			cnt = sctp_copy_laddrs_to_user(sk, bp->port,
+						       getaddrs.addr_num, to);
+			if (cnt < 0) {
+				err = cnt;
+				goto unlock;
+			}
+			goto copy_getaddrs;		
+		}
+	}
+
+	list_for_each(pos, &bp->address_list) {
+		addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+		memcpy(&temp, &addr->a, sizeof(temp));
+		sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
+		addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
+		temp.v4.sin_port = htons(temp.v4.sin_port);
+		if (copy_to_user(to, &temp, addrlen)) {
+			err = -EFAULT;
+			goto unlock;
+		}
+		to += addrlen;
+		cnt ++;
+		if (cnt >= getaddrs.addr_num) break;
+	}
+
+copy_getaddrs:
+	getaddrs.addr_num = cnt;
+	if (copy_to_user(optval, &getaddrs, sizeof(struct sctp_getaddrs)))
+		err = -EFAULT;
+
+unlock:
+	sctp_read_unlock(addr_lock);
+	return err;
+}
+
+/* 7.1.10 Set Primary Address (SCTP_PRIMARY_ADDR)
+ *
+ * Requests that the local SCTP stack use the enclosed peer address as
+ * the association primary.  The enclosed address must be one of the
+ * association peer's addresses.
+ */
+static int sctp_getsockopt_primary_addr(struct sock *sk, int len,
+					char __user *optval, int __user *optlen)
+{
+	struct sctp_prim prim;
+	struct sctp_association *asoc;
+	struct sctp_sock *sp = sctp_sk(sk);
+
+	if (len != sizeof(struct sctp_prim))
+		return -EINVAL;
+
+	if (copy_from_user(&prim, optval, sizeof(struct sctp_prim)))
+		return -EFAULT;
+
+	asoc = sctp_id2assoc(sk, prim.ssp_assoc_id);
+	if (!asoc)
+		return -EINVAL;
+
+	if (!asoc->peer.primary_path)
+		return -ENOTCONN;
+	
+	asoc->peer.primary_path->ipaddr.v4.sin_port =
+		htons(asoc->peer.primary_path->ipaddr.v4.sin_port);
+	memcpy(&prim.ssp_addr, &asoc->peer.primary_path->ipaddr,
+	       sizeof(union sctp_addr));
+	asoc->peer.primary_path->ipaddr.v4.sin_port =
+		ntohs(asoc->peer.primary_path->ipaddr.v4.sin_port);
+
+	sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp,
+			(union sctp_addr *)&prim.ssp_addr);
+
+	if (copy_to_user(optval, &prim, sizeof(struct sctp_prim)))
+		return -EFAULT;
+
+	return 0;
+}
+
+/*
+ * 7.1.11  Set Adaption Layer Indicator (SCTP_ADAPTION_LAYER)
+ *
+ * Requests that the local endpoint set the specified Adaption Layer
+ * Indication parameter for all future INIT and INIT-ACK exchanges.
+ */
+static int sctp_getsockopt_adaption_layer(struct sock *sk, int len,
+				  char __user *optval, int __user *optlen)
+{
+	__u32 val;
+
+	if (len < sizeof(__u32))
+		return -EINVAL;
+
+	len = sizeof(__u32);
+	val = sctp_sk(sk)->adaption_ind;
+	if (put_user(len, optlen))
+		return -EFAULT;
+	if (copy_to_user(optval, &val, len))
+		return -EFAULT;
+	return 0;
+}
+
+/*
+ *
+ * 7.1.14 Set default send parameters (SCTP_DEFAULT_SEND_PARAM)
+ *
+ *   Applications that wish to use the sendto() system call may wish to
+ *   specify a default set of parameters that would normally be supplied
+ *   through the inclusion of ancillary data.  This socket option allows
+ *   such an application to set the default sctp_sndrcvinfo structure.
+
+
+ *   The application that wishes to use this socket option simply passes
+ *   in to this call the sctp_sndrcvinfo structure defined in Section
+ *   5.2.2) The input parameters accepted by this call include
+ *   sinfo_stream, sinfo_flags, sinfo_ppid, sinfo_context,
+ *   sinfo_timetolive.  The user must provide the sinfo_assoc_id field in
+ *   to this call if the caller is using the UDP model.
+ *
+ *   For getsockopt, it get the default sctp_sndrcvinfo structure.
+ */
+static int sctp_getsockopt_default_send_param(struct sock *sk,
+					int len, char __user *optval,
+					int __user *optlen)
+{
+	struct sctp_sndrcvinfo info;
+	struct sctp_association *asoc;
+	struct sctp_sock *sp = sctp_sk(sk);
+
+	if (len != sizeof(struct sctp_sndrcvinfo))
+		return -EINVAL;
+	if (copy_from_user(&info, optval, sizeof(struct sctp_sndrcvinfo)))
+		return -EFAULT;
+
+	asoc = sctp_id2assoc(sk, info.sinfo_assoc_id);
+	if (!asoc && info.sinfo_assoc_id && sctp_style(sk, UDP))
+		return -EINVAL;
+
+	if (asoc) {
+		info.sinfo_stream = asoc->default_stream;
+		info.sinfo_flags = asoc->default_flags;
+		info.sinfo_ppid = asoc->default_ppid;
+		info.sinfo_context = asoc->default_context;
+		info.sinfo_timetolive = asoc->default_timetolive;
+	} else {
+		info.sinfo_stream = sp->default_stream;
+		info.sinfo_flags = sp->default_flags;
+		info.sinfo_ppid = sp->default_ppid;
+		info.sinfo_context = sp->default_context;
+		info.sinfo_timetolive = sp->default_timetolive;
+	}
+
+	if (copy_to_user(optval, &info, sizeof(struct sctp_sndrcvinfo)))
+		return -EFAULT;
+
+	return 0;
+}
+
+/*
+ *
+ * 7.1.5 SCTP_NODELAY
+ *
+ * Turn on/off any Nagle-like algorithm.  This means that packets are
+ * generally sent as soon as possible and no unnecessary delays are
+ * introduced, at the cost of more packets in the network.  Expects an
+ * integer boolean flag.
+ */
+
+static int sctp_getsockopt_nodelay(struct sock *sk, int len,
+				   char __user *optval, int __user *optlen)
+{
+	int val;
+
+	if (len < sizeof(int))
+		return -EINVAL;
+
+	len = sizeof(int);
+	val = (sctp_sk(sk)->nodelay == 1);
+	if (put_user(len, optlen))
+		return -EFAULT;
+	if (copy_to_user(optval, &val, len))
+		return -EFAULT;
+	return 0;
+}
+
+/*
+ *
+ * 7.1.1 SCTP_RTOINFO
+ *
+ * The protocol parameters used to initialize and bound retransmission
+ * timeout (RTO) are tunable. sctp_rtoinfo structure is used to access
+ * and modify these parameters.
+ * All parameters are time values, in milliseconds.  A value of 0, when
+ * modifying the parameters, indicates that the current value should not
+ * be changed.
+ *
+ */
+static int sctp_getsockopt_rtoinfo(struct sock *sk, int len,
+				char __user *optval,
+				int __user *optlen) {
+	struct sctp_rtoinfo rtoinfo;
+	struct sctp_association *asoc;
+
+	if (len != sizeof (struct sctp_rtoinfo))
+		return -EINVAL;
+
+	if (copy_from_user(&rtoinfo, optval, sizeof (struct sctp_rtoinfo)))
+		return -EFAULT;
+
+	asoc = sctp_id2assoc(sk, rtoinfo.srto_assoc_id);
+
+	if (!asoc && rtoinfo.srto_assoc_id && sctp_style(sk, UDP))
+		return -EINVAL;
+
+	/* Values corresponding to the specific association. */
+	if (asoc) {
+		rtoinfo.srto_initial = jiffies_to_msecs(asoc->rto_initial);
+		rtoinfo.srto_max = jiffies_to_msecs(asoc->rto_max);
+		rtoinfo.srto_min = jiffies_to_msecs(asoc->rto_min);
+	} else {
+		/* Values corresponding to the endpoint. */
+		struct sctp_sock *sp = sctp_sk(sk);
+
+		rtoinfo.srto_initial = sp->rtoinfo.srto_initial;
+		rtoinfo.srto_max = sp->rtoinfo.srto_max;
+		rtoinfo.srto_min = sp->rtoinfo.srto_min;
+	}
+
+	if (put_user(len, optlen))
+		return -EFAULT;
+
+	if (copy_to_user(optval, &rtoinfo, len))
+		return -EFAULT;
+
+	return 0;
+}
+
+/*
+ *
+ * 7.1.2 SCTP_ASSOCINFO
+ *
+ * This option is used to tune the the maximum retransmission attempts
+ * of the association.
+ * Returns an error if the new association retransmission value is
+ * greater than the sum of the retransmission value  of the peer.
+ * See [SCTP] for more information.
+ *
+ */
+static int sctp_getsockopt_associnfo(struct sock *sk, int len,
+				     char __user *optval,
+				     int __user *optlen)
+{
+
+	struct sctp_assocparams assocparams;
+	struct sctp_association *asoc;
+	struct list_head *pos;
+	int cnt = 0;
+
+	if (len != sizeof (struct sctp_assocparams))
+		return -EINVAL;
+
+	if (copy_from_user(&assocparams, optval,
+			sizeof (struct sctp_assocparams)))
+		return -EFAULT;
+
+	asoc = sctp_id2assoc(sk, assocparams.sasoc_assoc_id);
+
+	if (!asoc && assocparams.sasoc_assoc_id && sctp_style(sk, UDP))
+		return -EINVAL;
+
+	/* Values correspoinding to the specific association */
+	if (assocparams.sasoc_assoc_id != 0) {
+		assocparams.sasoc_asocmaxrxt = asoc->max_retrans;
+		assocparams.sasoc_peer_rwnd = asoc->peer.rwnd;
+		assocparams.sasoc_local_rwnd = asoc->a_rwnd;
+		assocparams.sasoc_cookie_life = (asoc->cookie_life.tv_sec
+						* 1000) +
+						(asoc->cookie_life.tv_usec
+						/ 1000);
+
+		list_for_each(pos, &asoc->peer.transport_addr_list) {
+			cnt ++;
+		}
+
+		assocparams.sasoc_number_peer_destinations = cnt;
+	} else {
+		/* Values corresponding to the endpoint */
+		struct sctp_sock *sp = sctp_sk(sk);
+
+		assocparams.sasoc_asocmaxrxt = sp->assocparams.sasoc_asocmaxrxt;
+		assocparams.sasoc_peer_rwnd = sp->assocparams.sasoc_peer_rwnd;
+		assocparams.sasoc_local_rwnd = sp->assocparams.sasoc_local_rwnd;
+		assocparams.sasoc_cookie_life =
+					sp->assocparams.sasoc_cookie_life;
+		assocparams.sasoc_number_peer_destinations =
+					sp->assocparams.
+					sasoc_number_peer_destinations;
+	}
+
+	if (put_user(len, optlen))
+		return -EFAULT;
+
+	if (copy_to_user(optval, &assocparams, len))
+		return -EFAULT;
+
+	return 0;
+}
+
+/*
+ * 7.1.16 Set/clear IPv4 mapped addresses (SCTP_I_WANT_MAPPED_V4_ADDR)
+ *
+ * This socket option is a boolean flag which turns on or off mapped V4
+ * addresses.  If this option is turned on and the socket is type
+ * PF_INET6, then IPv4 addresses will be mapped to V6 representation.
+ * If this option is turned off, then no mapping will be done of V4
+ * addresses and a user will receive both PF_INET6 and PF_INET type
+ * addresses on the socket.
+ */
+static int sctp_getsockopt_mappedv4(struct sock *sk, int len,
+				    char __user *optval, int __user *optlen)
+{
+	int val;
+	struct sctp_sock *sp = sctp_sk(sk);
+
+	if (len < sizeof(int))
+		return -EINVAL;
+
+	len = sizeof(int);
+	val = sp->v4mapped;
+	if (put_user(len, optlen))
+		return -EFAULT;
+	if (copy_to_user(optval, &val, len))
+		return -EFAULT;
+
+	return 0;
+}
+
+/*
+ * 7.1.17 Set the maximum fragrmentation size (SCTP_MAXSEG)
+ *
+ * This socket option specifies the maximum size to put in any outgoing
+ * SCTP chunk.  If a message is larger than this size it will be
+ * fragmented by SCTP into the specified size.  Note that the underlying
+ * SCTP implementation may fragment into smaller sized chunks when the
+ * PMTU of the underlying association is smaller than the value set by
+ * the user.
+ */
+static int sctp_getsockopt_maxseg(struct sock *sk, int len,
+				  char __user *optval, int __user *optlen)
+{
+	int val;
+
+	if (len < sizeof(int))
+		return -EINVAL;
+
+	len = sizeof(int);
+
+	val = sctp_sk(sk)->user_frag;
+	if (put_user(len, optlen))
+		return -EFAULT;
+	if (copy_to_user(optval, &val, len))
+		return -EFAULT;
+
+	return 0;
+}
+
+SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
+				char __user *optval, int __user *optlen)
+{
+	int retval = 0;
+	int len;
+
+	SCTP_DEBUG_PRINTK("sctp_getsockopt(sk: %p, ...)\n", sk);
+
+	/* I can hardly begin to describe how wrong this is.  This is
+	 * so broken as to be worse than useless.  The API draft
+	 * REALLY is NOT helpful here...  I am not convinced that the
+	 * semantics of getsockopt() with a level OTHER THAN SOL_SCTP
+	 * are at all well-founded.
+	 */
+	if (level != SOL_SCTP) {
+		struct sctp_af *af = sctp_sk(sk)->pf->af;
+
+		retval = af->getsockopt(sk, level, optname, optval, optlen);
+		return retval;
+	}
+
+	if (get_user(len, optlen))
+		return -EFAULT;
+
+	sctp_lock_sock(sk);
+
+	switch (optname) {
+	case SCTP_STATUS:
+		retval = sctp_getsockopt_sctp_status(sk, len, optval, optlen);
+		break;
+	case SCTP_DISABLE_FRAGMENTS:
+		retval = sctp_getsockopt_disable_fragments(sk, len, optval,
+							   optlen);
+		break;
+	case SCTP_EVENTS:
+		retval = sctp_getsockopt_events(sk, len, optval, optlen);
+		break;
+	case SCTP_AUTOCLOSE:
+		retval = sctp_getsockopt_autoclose(sk, len, optval, optlen);
+		break;
+	case SCTP_SOCKOPT_PEELOFF:
+		retval = sctp_getsockopt_peeloff(sk, len, optval, optlen);
+		break;
+	case SCTP_PEER_ADDR_PARAMS:
+		retval = sctp_getsockopt_peer_addr_params(sk, len, optval,
+							  optlen);
+		break;
+	case SCTP_INITMSG:
+		retval = sctp_getsockopt_initmsg(sk, len, optval, optlen);
+		break;
+	case SCTP_GET_PEER_ADDRS_NUM:
+		retval = sctp_getsockopt_peer_addrs_num(sk, len, optval,
+							optlen);
+		break;
+	case SCTP_GET_LOCAL_ADDRS_NUM:
+		retval = sctp_getsockopt_local_addrs_num(sk, len, optval,
+							 optlen);
+		break;
+	case SCTP_GET_PEER_ADDRS:
+		retval = sctp_getsockopt_peer_addrs(sk, len, optval,
+						    optlen);
+		break;
+	case SCTP_GET_LOCAL_ADDRS:
+		retval = sctp_getsockopt_local_addrs(sk, len, optval,
+						     optlen);
+		break;
+	case SCTP_DEFAULT_SEND_PARAM:
+		retval = sctp_getsockopt_default_send_param(sk, len,
+							    optval, optlen);
+		break;
+	case SCTP_PRIMARY_ADDR:
+		retval = sctp_getsockopt_primary_addr(sk, len, optval, optlen);
+		break;
+	case SCTP_NODELAY:
+		retval = sctp_getsockopt_nodelay(sk, len, optval, optlen);
+		break;
+	case SCTP_RTOINFO:
+		retval = sctp_getsockopt_rtoinfo(sk, len, optval, optlen);
+		break;
+	case SCTP_ASSOCINFO:
+		retval = sctp_getsockopt_associnfo(sk, len, optval, optlen);
+		break;
+	case SCTP_I_WANT_MAPPED_V4_ADDR:
+		retval = sctp_getsockopt_mappedv4(sk, len, optval, optlen);
+		break;
+	case SCTP_MAXSEG:
+		retval = sctp_getsockopt_maxseg(sk, len, optval, optlen);
+		break;
+	case SCTP_GET_PEER_ADDR_INFO:
+		retval = sctp_getsockopt_peer_addr_info(sk, len, optval,
+							optlen);
+		break;
+	case SCTP_ADAPTION_LAYER:
+		retval = sctp_getsockopt_adaption_layer(sk, len, optval,
+							optlen);
+		break;
+	default:
+		retval = -ENOPROTOOPT;
+		break;
+	};
+
+	sctp_release_sock(sk);
+	return retval;
+}
+
+static void sctp_hash(struct sock *sk)
+{
+	/* STUB */
+}
+
+static void sctp_unhash(struct sock *sk)
+{
+	/* STUB */
+}
+
+/* Check if port is acceptable.  Possibly find first available port.
+ *
+ * The port hash table (contained in the 'global' SCTP protocol storage
+ * returned by struct sctp_protocol *sctp_get_protocol()). The hash
+ * table is an array of 4096 lists (sctp_bind_hashbucket). Each
+ * list (the list number is the port number hashed out, so as you
+ * would expect from a hash function, all the ports in a given list have
+ * such a number that hashes out to the same list number; you were
+ * expecting that, right?); so each list has a set of ports, with a
+ * link to the socket (struct sock) that uses it, the port number and
+ * a fastreuse flag (FIXME: NPI ipg).
+ */
+static struct sctp_bind_bucket *sctp_bucket_create(
+	struct sctp_bind_hashbucket *head, unsigned short snum);
+
+static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
+{
+	struct sctp_bind_hashbucket *head; /* hash list */
+	struct sctp_bind_bucket *pp; /* hash list port iterator */
+	unsigned short snum;
+	int ret;
+
+	/* NOTE:  Remember to put this back to net order. */
+	addr->v4.sin_port = ntohs(addr->v4.sin_port);
+	snum = addr->v4.sin_port;
+
+	SCTP_DEBUG_PRINTK("sctp_get_port() begins, snum=%d\n", snum);
+	sctp_local_bh_disable();
+
+	if (snum == 0) {
+		/* Search for an available port.
+		 *
+		 * 'sctp_port_rover' was the last port assigned, so
+		 * we start to search from 'sctp_port_rover +
+		 * 1'. What we do is first check if port 'rover' is
+		 * already in the hash table; if not, we use that; if
+		 * it is, we try next.
+		 */
+		int low = sysctl_local_port_range[0];
+		int high = sysctl_local_port_range[1];
+		int remaining = (high - low) + 1;
+		int rover;
+		int index;
+
+		sctp_spin_lock(&sctp_port_alloc_lock);
+		rover = sctp_port_rover;
+		do {
+			rover++;
+			if ((rover < low) || (rover > high))
+				rover = low;
+			index = sctp_phashfn(rover);
+			head = &sctp_port_hashtable[index];
+			sctp_spin_lock(&head->lock);
+			for (pp = head->chain; pp; pp = pp->next)
+				if (pp->port == rover)
+					goto next;
+			break;
+		next:
+			sctp_spin_unlock(&head->lock);
+		} while (--remaining > 0);
+		sctp_port_rover = rover;
+		sctp_spin_unlock(&sctp_port_alloc_lock);
+
+		/* Exhausted local port range during search? */
+		ret = 1;
+		if (remaining <= 0)
+			goto fail;
+
+		/* OK, here is the one we will use.  HEAD (the port
+		 * hash table list entry) is non-NULL and we hold it's
+		 * mutex.
+		 */
+		snum = rover;
+	} else {
+		/* We are given an specific port number; we verify
+		 * that it is not being used. If it is used, we will
+		 * exahust the search in the hash list corresponding
+		 * to the port number (snum) - we detect that with the
+		 * port iterator, pp being NULL.
+		 */
+		head = &sctp_port_hashtable[sctp_phashfn(snum)];
+		sctp_spin_lock(&head->lock);
+		for (pp = head->chain; pp; pp = pp->next) {
+			if (pp->port == snum)
+				goto pp_found;
+		}
+	}
+	pp = NULL;
+	goto pp_not_found;
+pp_found:
+	if (!hlist_empty(&pp->owner)) {
+		/* We had a port hash table hit - there is an
+		 * available port (pp != NULL) and it is being
+		 * used by other socket (pp->owner not empty); that other
+		 * socket is going to be sk2.
+		 */
+		int reuse = sk->sk_reuse;
+		struct sock *sk2;
+		struct hlist_node *node;
+
+		SCTP_DEBUG_PRINTK("sctp_get_port() found a possible match\n");
+		if (pp->fastreuse && sk->sk_reuse)
+			goto success;
+
+		/* Run through the list of sockets bound to the port
+		 * (pp->port) [via the pointers bind_next and
+		 * bind_pprev in the struct sock *sk2 (pp->sk)]. On each one,
+		 * we get the endpoint they describe and run through
+		 * the endpoint's list of IP (v4 or v6) addresses,
+		 * comparing each of the addresses with the address of
+		 * the socket sk. If we find a match, then that means
+		 * that this port/socket (sk) combination are already
+		 * in an endpoint.
+		 */
+		sk_for_each_bound(sk2, node, &pp->owner) {
+			struct sctp_endpoint *ep2;
+			ep2 = sctp_sk(sk2)->ep;
+
+			if (reuse && sk2->sk_reuse)
+				continue;
+
+			if (sctp_bind_addr_match(&ep2->base.bind_addr, addr,
+						 sctp_sk(sk))) {
+				ret = (long)sk2;
+				goto fail_unlock;
+			}
+		}
+		SCTP_DEBUG_PRINTK("sctp_get_port(): Found a match\n");
+	}
+pp_not_found:
+	/* If there was a hash table miss, create a new port.  */
+	ret = 1;
+	if (!pp && !(pp = sctp_bucket_create(head, snum)))
+		goto fail_unlock;
+
+	/* In either case (hit or miss), make sure fastreuse is 1 only
+	 * if sk->sk_reuse is too (that is, if the caller requested
+	 * SO_REUSEADDR on this socket -sk-).
+	 */
+	if (hlist_empty(&pp->owner))
+		pp->fastreuse = sk->sk_reuse ? 1 : 0;
+	else if (pp->fastreuse && !sk->sk_reuse)
+		pp->fastreuse = 0;
+
+	/* We are set, so fill up all the data in the hash table
+	 * entry, tie the socket list information with the rest of the
+	 * sockets FIXME: Blurry, NPI (ipg).
+	 */
+success:
+	inet_sk(sk)->num = snum;
+	if (!sctp_sk(sk)->bind_hash) {
+		sk_add_bind_node(sk, &pp->owner);
+		sctp_sk(sk)->bind_hash = pp;
+	}
+	ret = 0;
+
+fail_unlock:
+	sctp_spin_unlock(&head->lock);
+
+fail:
+	sctp_local_bh_enable();
+	addr->v4.sin_port = htons(addr->v4.sin_port);
+	return ret;
+}
+
+/* Assign a 'snum' port to the socket.  If snum == 0, an ephemeral
+ * port is requested.
+ */
+static int sctp_get_port(struct sock *sk, unsigned short snum)
+{
+	long ret;
+	union sctp_addr addr;
+	struct sctp_af *af = sctp_sk(sk)->pf->af;
+
+	/* Set up a dummy address struct from the sk. */
+	af->from_sk(&addr, sk);
+	addr.v4.sin_port = htons(snum);
+
+	/* Note: sk->sk_num gets filled in if ephemeral port request. */
+	ret = sctp_get_port_local(sk, &addr);
+
+	return (ret ? 1 : 0);
+}
+
+/*
+ * 3.1.3 listen() - UDP Style Syntax
+ *
+ *   By default, new associations are not accepted for UDP style sockets.
+ *   An application uses listen() to mark a socket as being able to
+ *   accept new associations.
+ */
+SCTP_STATIC int sctp_seqpacket_listen(struct sock *sk, int backlog)
+{
+	struct sctp_sock *sp = sctp_sk(sk);
+	struct sctp_endpoint *ep = sp->ep;
+
+	/* Only UDP style sockets that are not peeled off are allowed to
+	 * listen().
+	 */
+	if (!sctp_style(sk, UDP))
+		return -EINVAL;
+
+	/* If backlog is zero, disable listening. */
+	if (!backlog) {
+		if (sctp_sstate(sk, CLOSED))
+			return 0;
+		
+		sctp_unhash_endpoint(ep);
+		sk->sk_state = SCTP_SS_CLOSED;
+	}
+
+	/* Return if we are already listening. */
+	if (sctp_sstate(sk, LISTENING))
+		return 0;
+		
+	/*
+	 * If a bind() or sctp_bindx() is not called prior to a listen()
+	 * call that allows new associations to be accepted, the system
+	 * picks an ephemeral port and will choose an address set equivalent
+	 * to binding with a wildcard address.
+	 *
+	 * This is not currently spelled out in the SCTP sockets
+	 * extensions draft, but follows the practice as seen in TCP
+	 * sockets.
+	 */
+	if (!ep->base.bind_addr.port) {
+		if (sctp_autobind(sk))
+			return -EAGAIN;
+	}
+	sk->sk_state = SCTP_SS_LISTENING;
+	sctp_hash_endpoint(ep);
+	return 0;
+}
+
+/*
+ * 4.1.3 listen() - TCP Style Syntax
+ *
+ *   Applications uses listen() to ready the SCTP endpoint for accepting
+ *   inbound associations.
+ */
+SCTP_STATIC int sctp_stream_listen(struct sock *sk, int backlog)
+{
+	struct sctp_sock *sp = sctp_sk(sk);
+	struct sctp_endpoint *ep = sp->ep;
+
+	/* If backlog is zero, disable listening. */
+	if (!backlog) {
+		if (sctp_sstate(sk, CLOSED))
+			return 0;
+		
+		sctp_unhash_endpoint(ep);
+		sk->sk_state = SCTP_SS_CLOSED;
+	}
+
+	if (sctp_sstate(sk, LISTENING))
+		return 0;
+
+	/*
+	 * If a bind() or sctp_bindx() is not called prior to a listen()
+	 * call that allows new associations to be accepted, the system
+	 * picks an ephemeral port and will choose an address set equivalent
+	 * to binding with a wildcard address.
+	 *
+	 * This is not currently spelled out in the SCTP sockets
+	 * extensions draft, but follows the practice as seen in TCP
+	 * sockets.
+	 */
+	if (!ep->base.bind_addr.port) {
+		if (sctp_autobind(sk))
+			return -EAGAIN;
+	}
+	sk->sk_state = SCTP_SS_LISTENING;
+	sk->sk_max_ack_backlog = backlog;
+	sctp_hash_endpoint(ep);
+	return 0;
+}
+
+/*
+ *  Move a socket to LISTENING state.
+ */
+int sctp_inet_listen(struct socket *sock, int backlog)
+{
+	struct sock *sk = sock->sk;
+	struct crypto_tfm *tfm=NULL;
+	int err = -EINVAL;
+
+	if (unlikely(backlog < 0))
+		goto out;
+
+	sctp_lock_sock(sk);
+
+	if (sock->state != SS_UNCONNECTED)
+		goto out;
+
+	/* Allocate HMAC for generating cookie. */
+	if (sctp_hmac_alg) {
+		tfm = sctp_crypto_alloc_tfm(sctp_hmac_alg, 0);
+		if (!tfm) {
+			err = -ENOSYS;
+			goto out;
+		}
+	}
+
+	switch (sock->type) {
+	case SOCK_SEQPACKET:
+		err = sctp_seqpacket_listen(sk, backlog);
+		break;
+	case SOCK_STREAM:
+		err = sctp_stream_listen(sk, backlog);
+		break;
+	default:
+		break;
+	};
+	if (err)
+		goto cleanup;
+
+	/* Store away the transform reference. */
+	sctp_sk(sk)->hmac = tfm;
+out:
+	sctp_release_sock(sk);
+	return err;
+cleanup:
+	if (tfm)
+		sctp_crypto_free_tfm(tfm);
+	goto out;
+}
+
+/*
+ * This function is done by modeling the current datagram_poll() and the
+ * tcp_poll().  Note that, based on these implementations, we don't
+ * lock the socket in this function, even though it seems that,
+ * ideally, locking or some other mechanisms can be used to ensure
+ * the integrity of the counters (sndbuf and wmem_queued) used
+ * in this place.  We assume that we don't need locks either until proven
+ * otherwise.
+ *
+ * Another thing to note is that we include the Async I/O support
+ * here, again, by modeling the current TCP/UDP code.  We don't have
+ * a good way to test with it yet.
+ */
+unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
+{
+	struct sock *sk = sock->sk;
+	struct sctp_sock *sp = sctp_sk(sk);
+	unsigned int mask;
+
+	poll_wait(file, sk->sk_sleep, wait);
+
+	/* A TCP-style listening socket becomes readable when the accept queue
+	 * is not empty.
+	 */
+	if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))
+		return (!list_empty(&sp->ep->asocs)) ?
+		       	(POLLIN | POLLRDNORM) : 0;
+
+	mask = 0;
+
+	/* Is there any exceptional events?  */
+	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+		mask |= POLLERR;
+	if (sk->sk_shutdown == SHUTDOWN_MASK)
+		mask |= POLLHUP;
+
+	/* Is it readable?  Reconsider this code with TCP-style support.  */
+	if (!skb_queue_empty(&sk->sk_receive_queue) ||
+	    (sk->sk_shutdown & RCV_SHUTDOWN))
+		mask |= POLLIN | POLLRDNORM;
+
+	/* The association is either gone or not ready.  */
+	if (!sctp_style(sk, UDP) && sctp_sstate(sk, CLOSED))
+		return mask;
+
+	/* Is it writable?  */
+	if (sctp_writeable(sk)) {
+		mask |= POLLOUT | POLLWRNORM;
+	} else {
+		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+		/*
+		 * Since the socket is not locked, the buffer
+		 * might be made available after the writeable check and
+		 * before the bit is set.  This could cause a lost I/O
+		 * signal.  tcp_poll() has a race breaker for this race
+		 * condition.  Based on their implementation, we put
+		 * in the following code to cover it as well.
+		 */
+		if (sctp_writeable(sk))
+			mask |= POLLOUT | POLLWRNORM;
+	}
+	return mask;
+}
+
+/********************************************************************
+ * 2nd Level Abstractions
+ ********************************************************************/
+
+static struct sctp_bind_bucket *sctp_bucket_create(
+	struct sctp_bind_hashbucket *head, unsigned short snum)
+{
+	struct sctp_bind_bucket *pp;
+
+	pp = kmem_cache_alloc(sctp_bucket_cachep, SLAB_ATOMIC);
+	SCTP_DBG_OBJCNT_INC(bind_bucket);
+	if (pp) {
+		pp->port = snum;
+		pp->fastreuse = 0;
+		INIT_HLIST_HEAD(&pp->owner);
+		if ((pp->next = head->chain) != NULL)
+			pp->next->pprev = &pp->next;
+		head->chain = pp;
+		pp->pprev = &head->chain;
+	}
+	return pp;
+}
+
+/* Caller must hold hashbucket lock for this tb with local BH disabled */
+static void sctp_bucket_destroy(struct sctp_bind_bucket *pp)
+{
+	if (hlist_empty(&pp->owner)) {
+		if (pp->next)
+			pp->next->pprev = pp->pprev;
+		*(pp->pprev) = pp->next;
+		kmem_cache_free(sctp_bucket_cachep, pp);
+		SCTP_DBG_OBJCNT_DEC(bind_bucket);
+	}
+}
+
+/* Release this socket's reference to a local port.  */
+static inline void __sctp_put_port(struct sock *sk)
+{
+	struct sctp_bind_hashbucket *head =
+		&sctp_port_hashtable[sctp_phashfn(inet_sk(sk)->num)];
+	struct sctp_bind_bucket *pp;
+
+	sctp_spin_lock(&head->lock);
+	pp = sctp_sk(sk)->bind_hash;
+	__sk_del_bind_node(sk);
+	sctp_sk(sk)->bind_hash = NULL;
+	inet_sk(sk)->num = 0;
+	sctp_bucket_destroy(pp);
+	sctp_spin_unlock(&head->lock);
+}
+
+void sctp_put_port(struct sock *sk)
+{
+	sctp_local_bh_disable();
+	__sctp_put_port(sk);
+	sctp_local_bh_enable();
+}
+
+/*
+ * The system picks an ephemeral port and choose an address set equivalent
+ * to binding with a wildcard address.
+ * One of those addresses will be the primary address for the association.
+ * This automatically enables the multihoming capability of SCTP.
+ */
+static int sctp_autobind(struct sock *sk)
+{
+	union sctp_addr autoaddr;
+	struct sctp_af *af;
+	unsigned short port;
+
+	/* Initialize a local sockaddr structure to INADDR_ANY. */
+	af = sctp_sk(sk)->pf->af;
+
+	port = htons(inet_sk(sk)->num);
+	af->inaddr_any(&autoaddr, port);
+
+	return sctp_do_bind(sk, &autoaddr, af->sockaddr_len);
+}
+
+/* Parse out IPPROTO_SCTP CMSG headers.  Perform only minimal validation.
+ *
+ * From RFC 2292
+ * 4.2 The cmsghdr Structure *
+ *
+ * When ancillary data is sent or received, any number of ancillary data
+ * objects can be specified by the msg_control and msg_controllen members of
+ * the msghdr structure, because each object is preceded by
+ * a cmsghdr structure defining the object's length (the cmsg_len member).
+ * Historically Berkeley-derived implementations have passed only one object
+ * at a time, but this API allows multiple objects to be
+ * passed in a single call to sendmsg() or recvmsg(). The following example
+ * shows two ancillary data objects in a control buffer.
+ *
+ *   |<--------------------------- msg_controllen -------------------------->|
+ *   |                                                                       |
+ *
+ *   |<----- ancillary data object ----->|<----- ancillary data object ----->|
+ *
+ *   |<---------- CMSG_SPACE() --------->|<---------- CMSG_SPACE() --------->|
+ *   |                                   |                                   |
+ *
+ *   |<---------- cmsg_len ---------->|  |<--------- cmsg_len ----------->|  |
+ *
+ *   |<--------- CMSG_LEN() --------->|  |<-------- CMSG_LEN() ---------->|  |
+ *   |                                |  |                                |  |
+ *
+ *   +-----+-----+-----+--+-----------+--+-----+-----+-----+--+-----------+--+
+ *   |cmsg_|cmsg_|cmsg_|XX|           |XX|cmsg_|cmsg_|cmsg_|XX|           |XX|
+ *
+ *   |len  |level|type |XX|cmsg_data[]|XX|len  |level|type |XX|cmsg_data[]|XX|
+ *
+ *   +-----+-----+-----+--+-----------+--+-----+-----+-----+--+-----------+--+
+ *    ^
+ *    |
+ *
+ * msg_control
+ * points here
+ */
+SCTP_STATIC int sctp_msghdr_parse(const struct msghdr *msg,
+				  sctp_cmsgs_t *cmsgs)
+{
+	struct cmsghdr *cmsg;
+
+	for (cmsg = CMSG_FIRSTHDR(msg);
+	     cmsg != NULL;
+	     cmsg = CMSG_NXTHDR((struct msghdr*)msg, cmsg)) {
+		if (!CMSG_OK(msg, cmsg))
+			return -EINVAL;
+
+		/* Should we parse this header or ignore?  */
+		if (cmsg->cmsg_level != IPPROTO_SCTP)
+			continue;
+
+		/* Strictly check lengths following example in SCM code.  */
+		switch (cmsg->cmsg_type) {
+		case SCTP_INIT:
+			/* SCTP Socket API Extension
+			 * 5.2.1 SCTP Initiation Structure (SCTP_INIT)
+			 *
+			 * This cmsghdr structure provides information for
+			 * initializing new SCTP associations with sendmsg().
+			 * The SCTP_INITMSG socket option uses this same data
+			 * structure.  This structure is not used for
+			 * recvmsg().
+			 *
+			 * cmsg_level    cmsg_type      cmsg_data[]
+			 * ------------  ------------   ----------------------
+			 * IPPROTO_SCTP  SCTP_INIT      struct sctp_initmsg
+			 */
+			if (cmsg->cmsg_len !=
+			    CMSG_LEN(sizeof(struct sctp_initmsg)))
+				return -EINVAL;
+			cmsgs->init = (struct sctp_initmsg *)CMSG_DATA(cmsg);
+			break;
+
+		case SCTP_SNDRCV:
+			/* SCTP Socket API Extension
+			 * 5.2.2 SCTP Header Information Structure(SCTP_SNDRCV)
+			 *
+			 * This cmsghdr structure specifies SCTP options for
+			 * sendmsg() and describes SCTP header information
+			 * about a received message through recvmsg().
+			 *
+			 * cmsg_level    cmsg_type      cmsg_data[]
+			 * ------------  ------------   ----------------------
+			 * IPPROTO_SCTP  SCTP_SNDRCV    struct sctp_sndrcvinfo
+			 */
+			if (cmsg->cmsg_len !=
+			    CMSG_LEN(sizeof(struct sctp_sndrcvinfo)))
+				return -EINVAL;
+
+			cmsgs->info =
+				(struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
+
+			/* Minimally, validate the sinfo_flags. */
+			if (cmsgs->info->sinfo_flags &
+			    ~(MSG_UNORDERED | MSG_ADDR_OVER |
+			      MSG_ABORT | MSG_EOF))
+				return -EINVAL;
+			break;
+
+		default:
+			return -EINVAL;
+		};
+	}
+	return 0;
+}
+
+/*
+ * Wait for a packet..
+ * Note: This function is the same function as in core/datagram.c
+ * with a few modifications to make lksctp work.
+ */
+static int sctp_wait_for_packet(struct sock * sk, int *err, long *timeo_p)
+{
+	int error;
+	DEFINE_WAIT(wait);
+
+	prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+
+	/* Socket errors? */
+	error = sock_error(sk);
+	if (error)
+		goto out;
+
+	if (!skb_queue_empty(&sk->sk_receive_queue))
+		goto ready;
+
+	/* Socket shut down?  */
+	if (sk->sk_shutdown & RCV_SHUTDOWN)
+		goto out;
+
+	/* Sequenced packets can come disconnected.  If so we report the
+	 * problem.
+	 */
+	error = -ENOTCONN;
+
+	/* Is there a good reason to think that we may receive some data?  */
+	if (list_empty(&sctp_sk(sk)->ep->asocs) && !sctp_sstate(sk, LISTENING))
+		goto out;
+
+	/* Handle signals.  */
+	if (signal_pending(current))
+		goto interrupted;
+
+	/* Let another process have a go.  Since we are going to sleep
+	 * anyway.  Note: This may cause odd behaviors if the message
+	 * does not fit in the user's buffer, but this seems to be the
+	 * only way to honor MSG_DONTWAIT realistically.
+	 */
+	sctp_release_sock(sk);
+	*timeo_p = schedule_timeout(*timeo_p);
+	sctp_lock_sock(sk);
+
+ready:
+	finish_wait(sk->sk_sleep, &wait);
+	return 0;
+
+interrupted:
+	error = sock_intr_errno(*timeo_p);
+
+out:
+	finish_wait(sk->sk_sleep, &wait);
+	*err = error;
+	return error;
+}
+
+/* Receive a datagram.
+ * Note: This is pretty much the same routine as in core/datagram.c
+ * with a few changes to make lksctp work.
+ */
+static struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags,
+					      int noblock, int *err)
+{
+	int error;
+	struct sk_buff *skb;
+	long timeo;
+
+	/* Caller is allowed not to check sk->sk_err before calling.  */
+	error = sock_error(sk);
+	if (error)
+		goto no_packet;
+
+	timeo = sock_rcvtimeo(sk, noblock);
+
+	SCTP_DEBUG_PRINTK("Timeout: timeo: %ld, MAX: %ld.\n",
+			  timeo, MAX_SCHEDULE_TIMEOUT);
+
+	do {
+		/* Again only user level code calls this function,
+		 * so nothing interrupt level
+		 * will suddenly eat the receive_queue.
+		 *
+		 *  Look at current nfs client by the way...
+		 *  However, this function was corrent in any case. 8)
+		 */
+		if (flags & MSG_PEEK) {
+			unsigned long cpu_flags;
+
+			sctp_spin_lock_irqsave(&sk->sk_receive_queue.lock,
+					       cpu_flags);
+			skb = skb_peek(&sk->sk_receive_queue);
+			if (skb)
+				atomic_inc(&skb->users);
+			sctp_spin_unlock_irqrestore(&sk->sk_receive_queue.lock,
+						    cpu_flags);
+		} else {
+			skb = skb_dequeue(&sk->sk_receive_queue);
+		}
+
+		if (skb)
+			return skb;
+
+		if (sk->sk_shutdown & RCV_SHUTDOWN)
+			break;
+
+		/* User doesn't want to wait.  */
+		error = -EAGAIN;
+		if (!timeo)
+			goto no_packet;
+	} while (sctp_wait_for_packet(sk, err, &timeo) == 0);
+
+	return NULL;
+
+no_packet:
+	*err = error;
+	return NULL;
+}
+
+/* If sndbuf has changed, wake up per association sndbuf waiters.  */
+static void __sctp_write_space(struct sctp_association *asoc)
+{
+	struct sock *sk = asoc->base.sk;
+	struct socket *sock = sk->sk_socket;
+
+	if ((sctp_wspace(asoc) > 0) && sock) {
+		if (waitqueue_active(&asoc->wait))
+			wake_up_interruptible(&asoc->wait);
+
+		if (sctp_writeable(sk)) {
+			if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+				wake_up_interruptible(sk->sk_sleep);
+
+			/* Note that we try to include the Async I/O support
+			 * here by modeling from the current TCP/UDP code.
+			 * We have not tested with it yet.
+			 */
+			if (sock->fasync_list &&
+			    !(sk->sk_shutdown & SEND_SHUTDOWN))
+				sock_wake_async(sock, 2, POLL_OUT);
+		}
+	}
+}
+
+/* Do accounting for the sndbuf space.
+ * Decrement the used sndbuf space of the corresponding association by the
+ * data size which was just transmitted(freed).
+ */
+static void sctp_wfree(struct sk_buff *skb)
+{
+	struct sctp_association *asoc;
+	struct sctp_chunk *chunk;
+	struct sock *sk;
+
+	/* Get the saved chunk pointer.  */
+	chunk = *((struct sctp_chunk **)(skb->cb));
+	asoc = chunk->asoc;
+	sk = asoc->base.sk;
+	asoc->sndbuf_used -= SCTP_DATA_SNDSIZE(chunk);
+	sk->sk_wmem_queued -= SCTP_DATA_SNDSIZE(chunk);
+	__sctp_write_space(asoc);
+
+	sctp_association_put(asoc);
+}
+
+/* Helper function to wait for space in the sndbuf.  */
+static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
+				size_t msg_len)
+{
+	struct sock *sk = asoc->base.sk;
+	int err = 0;
+	long current_timeo = *timeo_p;
+	DEFINE_WAIT(wait);
+
+	SCTP_DEBUG_PRINTK("wait_for_sndbuf: asoc=%p, timeo=%ld, msg_len=%zu\n",
+	                  asoc, (long)(*timeo_p), msg_len);
+
+	/* Increment the association's refcnt.  */
+	sctp_association_hold(asoc);
+
+	/* Wait on the association specific sndbuf space. */
+	for (;;) {
+		prepare_to_wait_exclusive(&asoc->wait, &wait,
+					  TASK_INTERRUPTIBLE);
+		if (!*timeo_p)
+			goto do_nonblock;
+		if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING ||
+		    asoc->base.dead)
+			goto do_error;
+		if (signal_pending(current))
+			goto do_interrupted;
+		if (msg_len <= sctp_wspace(asoc))
+			break;
+
+		/* Let another process have a go.  Since we are going
+		 * to sleep anyway.
+		 */
+		sctp_release_sock(sk);
+		current_timeo = schedule_timeout(current_timeo);
+		sctp_lock_sock(sk);
+
+		*timeo_p = current_timeo;
+	}
+
+out:
+	finish_wait(&asoc->wait, &wait);
+
+	/* Release the association's refcnt.  */
+	sctp_association_put(asoc);
+
+	return err;
+
+do_error:
+	err = -EPIPE;
+	goto out;
+
+do_interrupted:
+	err = sock_intr_errno(*timeo_p);
+	goto out;
+
+do_nonblock:
+	err = -EAGAIN;
+	goto out;
+}
+
+/* If socket sndbuf has changed, wake up all per association waiters.  */
+void sctp_write_space(struct sock *sk)
+{
+	struct sctp_association *asoc;
+	struct list_head *pos;
+
+	/* Wake up the tasks in each wait queue.  */
+	list_for_each(pos, &((sctp_sk(sk))->ep->asocs)) {
+		asoc = list_entry(pos, struct sctp_association, asocs);
+		__sctp_write_space(asoc);
+	}
+}
+
+/* Is there any sndbuf space available on the socket?
+ *
+ * Note that wmem_queued is the sum of the send buffers on all of the
+ * associations on the same socket.  For a UDP-style socket with
+ * multiple associations, it is possible for it to be "unwriteable"
+ * prematurely.  I assume that this is acceptable because
+ * a premature "unwriteable" is better than an accidental "writeable" which
+ * would cause an unwanted block under certain circumstances.  For the 1-1
+ * UDP-style sockets or TCP-style sockets, this code should work.
+ *  - Daisy
+ */
+static int sctp_writeable(struct sock *sk)
+{
+	int amt = 0;
+
+	amt = sk->sk_sndbuf - sk->sk_wmem_queued;
+	if (amt < 0)
+		amt = 0;
+	return amt;
+}
+
+/* Wait for an association to go into ESTABLISHED state. If timeout is 0,
+ * returns immediately with EINPROGRESS.
+ */
+static int sctp_wait_for_connect(struct sctp_association *asoc, long *timeo_p)
+{
+	struct sock *sk = asoc->base.sk;
+	int err = 0;
+	long current_timeo = *timeo_p;
+	DEFINE_WAIT(wait);
+
+	SCTP_DEBUG_PRINTK("%s: asoc=%p, timeo=%ld\n", __FUNCTION__, asoc,
+			  (long)(*timeo_p));
+
+	/* Increment the association's refcnt.  */
+	sctp_association_hold(asoc);
+
+	for (;;) {
+		prepare_to_wait_exclusive(&asoc->wait, &wait,
+					  TASK_INTERRUPTIBLE);
+		if (!*timeo_p)
+			goto do_nonblock;
+		if (sk->sk_shutdown & RCV_SHUTDOWN)
+			break;
+		if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING ||
+		    asoc->base.dead)
+			goto do_error;
+		if (signal_pending(current))
+			goto do_interrupted;
+
+		if (sctp_state(asoc, ESTABLISHED))
+			break;
+
+		/* Let another process have a go.  Since we are going
+		 * to sleep anyway.
+		 */
+		sctp_release_sock(sk);
+		current_timeo = schedule_timeout(current_timeo);
+		sctp_lock_sock(sk);
+
+		*timeo_p = current_timeo;
+	}
+
+out:
+	finish_wait(&asoc->wait, &wait);
+
+	/* Release the association's refcnt.  */
+	sctp_association_put(asoc);
+
+	return err;
+
+do_error:
+	if (asoc->counters[SCTP_COUNTER_INIT_ERROR] + 1 >=
+					 	asoc->max_init_attempts)
+		err = -ETIMEDOUT;
+	else
+		err = -ECONNREFUSED;
+	goto out;
+
+do_interrupted:
+	err = sock_intr_errno(*timeo_p);
+	goto out;
+
+do_nonblock:
+	err = -EINPROGRESS;
+	goto out;
+}
+
+static int sctp_wait_for_accept(struct sock *sk, long timeo)
+{
+	struct sctp_endpoint *ep;
+	int err = 0;
+	DEFINE_WAIT(wait);
+
+	ep = sctp_sk(sk)->ep;
+
+
+	for (;;) {
+		prepare_to_wait_exclusive(sk->sk_sleep, &wait,
+					  TASK_INTERRUPTIBLE);
+
+		if (list_empty(&ep->asocs)) {
+			sctp_release_sock(sk);
+			timeo = schedule_timeout(timeo);
+			sctp_lock_sock(sk);
+		}
+
+		err = -EINVAL;
+		if (!sctp_sstate(sk, LISTENING))
+			break;
+
+		err = 0;
+		if (!list_empty(&ep->asocs))
+			break;
+
+		err = sock_intr_errno(timeo);
+		if (signal_pending(current))
+			break;
+
+		err = -EAGAIN;
+		if (!timeo)
+			break;
+	}
+
+	finish_wait(sk->sk_sleep, &wait);
+
+	return err;
+}
+
+void sctp_wait_for_close(struct sock *sk, long timeout)
+{
+	DEFINE_WAIT(wait);
+
+	do {
+		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		if (list_empty(&sctp_sk(sk)->ep->asocs))
+			break;
+		sctp_release_sock(sk);
+		timeout = schedule_timeout(timeout);
+		sctp_lock_sock(sk);
+	} while (!signal_pending(current) && timeout);
+
+	finish_wait(sk->sk_sleep, &wait);
+}
+
+/* Populate the fields of the newsk from the oldsk and migrate the assoc
+ * and its messages to the newsk.
+ */
+static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
+			      struct sctp_association *assoc,
+			      sctp_socket_type_t type)
+{
+	struct sctp_sock *oldsp = sctp_sk(oldsk);
+	struct sctp_sock *newsp = sctp_sk(newsk);
+	struct sctp_bind_bucket *pp; /* hash list port iterator */
+	struct sctp_endpoint *newep = newsp->ep;
+	struct sk_buff *skb, *tmp;
+	struct sctp_ulpevent *event;
+
+	/* Migrate socket buffer sizes and all the socket level options to the
+	 * new socket.
+	 */
+	newsk->sk_sndbuf = oldsk->sk_sndbuf;
+	newsk->sk_rcvbuf = oldsk->sk_rcvbuf;
+	/* Brute force copy old sctp opt. */
+	inet_sk_copy_descendant(newsk, oldsk);
+
+	/* Restore the ep value that was overwritten with the above structure
+	 * copy.
+	 */
+	newsp->ep = newep;
+	newsp->hmac = NULL;
+
+	/* Hook this new socket in to the bind_hash list. */
+	pp = sctp_sk(oldsk)->bind_hash;
+	sk_add_bind_node(newsk, &pp->owner);
+	sctp_sk(newsk)->bind_hash = pp;
+	inet_sk(newsk)->num = inet_sk(oldsk)->num;
+
+	/* Move any messages in the old socket's receive queue that are for the
+	 * peeled off association to the new socket's receive queue.
+	 */
+	sctp_skb_for_each(skb, &oldsk->sk_receive_queue, tmp) {
+		event = sctp_skb2event(skb);
+		if (event->asoc == assoc) {
+			__skb_unlink(skb, skb->list);
+			__skb_queue_tail(&newsk->sk_receive_queue, skb);
+		}
+	}
+
+	/* Clean up any messages pending delivery due to partial
+	 * delivery.   Three cases:
+	 * 1) No partial deliver;  no work.
+	 * 2) Peeling off partial delivery; keep pd_lobby in new pd_lobby.
+	 * 3) Peeling off non-partial delivery; move pd_lobby to receive_queue.
+	 */
+	skb_queue_head_init(&newsp->pd_lobby);
+	sctp_sk(newsk)->pd_mode = assoc->ulpq.pd_mode;
+
+	if (sctp_sk(oldsk)->pd_mode) {
+		struct sk_buff_head *queue;
+
+		/* Decide which queue to move pd_lobby skbs to. */
+		if (assoc->ulpq.pd_mode) {
+			queue = &newsp->pd_lobby;
+		} else
+			queue = &newsk->sk_receive_queue;
+
+		/* Walk through the pd_lobby, looking for skbs that
+		 * need moved to the new socket.
+		 */
+		sctp_skb_for_each(skb, &oldsp->pd_lobby, tmp) {
+			event = sctp_skb2event(skb);
+			if (event->asoc == assoc) {
+				__skb_unlink(skb, skb->list);
+				__skb_queue_tail(queue, skb);
+			}
+		}
+
+		/* Clear up any skbs waiting for the partial
+		 * delivery to finish.
+		 */
+		if (assoc->ulpq.pd_mode)
+			sctp_clear_pd(oldsk);
+
+	}
+
+	/* Set the type of socket to indicate that it is peeled off from the
+	 * original UDP-style socket or created with the accept() call on a
+	 * TCP-style socket..
+	 */
+	newsp->type = type;
+
+	/* Migrate the association to the new socket. */
+	sctp_assoc_migrate(assoc, newsk);
+
+	/* If the association on the newsk is already closed before accept()
+	 * is called, set RCV_SHUTDOWN flag.
+	 */
+	if (sctp_state(assoc, CLOSED) && sctp_style(newsk, TCP))
+		newsk->sk_shutdown |= RCV_SHUTDOWN;
+
+	newsk->sk_state = SCTP_SS_ESTABLISHED;
+}
+
+/* This proto struct describes the ULP interface for SCTP.  */
+struct proto sctp_prot = {
+	.name        =	"SCTP",
+	.owner       =	THIS_MODULE,
+	.close       =	sctp_close,
+	.connect     =	sctp_connect,
+	.disconnect  =	sctp_disconnect,
+	.accept      =	sctp_accept,
+	.ioctl       =	sctp_ioctl,
+	.init        =	sctp_init_sock,
+	.destroy     =	sctp_destroy_sock,
+	.shutdown    =	sctp_shutdown,
+	.setsockopt  =	sctp_setsockopt,
+	.getsockopt  =	sctp_getsockopt,
+	.sendmsg     =	sctp_sendmsg,
+	.recvmsg     =	sctp_recvmsg,
+	.bind        =	sctp_bind,
+	.backlog_rcv =	sctp_backlog_rcv,
+	.hash        =	sctp_hash,
+	.unhash      =	sctp_unhash,
+	.get_port    =	sctp_get_port,
+	.obj_size    =  sizeof(struct sctp_sock),
+};
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+struct proto sctpv6_prot = {
+	.name		= "SCTPv6",
+	.owner		= THIS_MODULE,
+	.close		= sctp_close,
+	.connect	= sctp_connect,
+	.disconnect	= sctp_disconnect,
+	.accept		= sctp_accept,
+	.ioctl		= sctp_ioctl,
+	.init		= sctp_init_sock,
+	.destroy	= sctp_destroy_sock,
+	.shutdown	= sctp_shutdown,
+	.setsockopt	= sctp_setsockopt,
+	.getsockopt	= sctp_getsockopt,
+	.sendmsg	= sctp_sendmsg,
+	.recvmsg	= sctp_recvmsg,
+	.bind		= sctp_bind,
+	.backlog_rcv	= sctp_backlog_rcv,
+	.hash		= sctp_hash,
+	.unhash		= sctp_unhash,
+	.get_port	= sctp_get_port,
+	.obj_size	= sizeof(struct sctp6_sock),
+};
+#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
diff --git a/net/sctp/ssnmap.c b/net/sctp/ssnmap.c
new file mode 100644
index 00000000000..e627d2b451b
--- /dev/null
+++ b/net/sctp/ssnmap.c
@@ -0,0 +1,131 @@
+/* SCTP kernel reference Implementation
+ * Copyright (c) 2003 International Business Machines, Corp.
+ *
+ * This file is part of the SCTP kernel reference Implementation
+ *
+ * These functions manipulate sctp SSN tracker.
+ *
+ * The SCTP reference implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * The SCTP reference implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *                 ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email address(es):
+ *    lksctp developers <lksctp-developers@lists.sourceforge.net>
+ *
+ * Or submit a bug report through the following website:
+ *    http://www.sf.net/projects/lksctp
+ *
+ * Written or modified by:
+ *    Jon Grimm             <jgrimm@us.ibm.com>
+ *
+ * Any bugs reported given to us we will try to fix... any fixes shared will
+ * be incorporated into the next SCTP release.
+ */
+
+#include <linux/types.h>
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+
+#define MAX_KMALLOC_SIZE	131072
+
+static struct sctp_ssnmap *sctp_ssnmap_init(struct sctp_ssnmap *map, __u16 in,
+					    __u16 out);
+
+/* Storage size needed for map includes 2 headers and then the
+ * specific needs of in or out streams.
+ */
+static inline size_t sctp_ssnmap_size(__u16 in, __u16 out)
+{
+	return sizeof(struct sctp_ssnmap) + (in + out) * sizeof(__u16);
+}
+
+
+/* Create a new sctp_ssnmap.
+ * Allocate room to store at least 'len' contiguous TSNs.
+ */
+struct sctp_ssnmap *sctp_ssnmap_new(__u16 in, __u16 out, int gfp)
+{
+	struct sctp_ssnmap *retval;
+	int size;
+
+	size = sctp_ssnmap_size(in, out);
+	if (size <= MAX_KMALLOC_SIZE)
+		retval = kmalloc(size, gfp);
+	else
+		retval = (struct sctp_ssnmap *)
+			  __get_free_pages(gfp, get_order(size));
+	if (!retval)
+		goto fail;
+
+	if (!sctp_ssnmap_init(retval, in, out))
+		goto fail_map;
+
+	retval->malloced = 1;
+	SCTP_DBG_OBJCNT_INC(ssnmap);
+
+	return retval;
+
+fail_map:
+	if (size <= MAX_KMALLOC_SIZE)
+		kfree(retval);
+	else
+		free_pages((unsigned long)retval, get_order(size));
+fail:
+	return NULL;
+}
+
+
+/* Initialize a block of memory as a ssnmap.  */
+static struct sctp_ssnmap *sctp_ssnmap_init(struct sctp_ssnmap *map, __u16 in,
+					    __u16 out)
+{
+	memset(map, 0x00, sctp_ssnmap_size(in, out));
+
+	/* Start 'in' stream just after the map header. */
+	map->in.ssn = (__u16 *)&map[1];
+	map->in.len = in;
+
+	/* Start 'out' stream just after 'in'. */
+	map->out.ssn = &map->in.ssn[in];
+	map->out.len = out;
+
+	return map;
+}
+
+/* Clear out the ssnmap streams.  */
+void sctp_ssnmap_clear(struct sctp_ssnmap *map)
+{
+	size_t size;
+
+	size = (map->in.len + map->out.len) * sizeof(__u16);
+	memset(map->in.ssn, 0x00, size);
+}
+
+/* Dispose of a ssnmap.  */
+void sctp_ssnmap_free(struct sctp_ssnmap *map)
+{
+	if (map && map->malloced) {
+		int size;
+
+		size = sctp_ssnmap_size(map->in.len, map->out.len);
+		if (size <= MAX_KMALLOC_SIZE)
+			kfree(map);
+		else
+			free_pages((unsigned long)map, get_order(size));
+		SCTP_DBG_OBJCNT_DEC(ssnmap);
+	}
+}
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
new file mode 100644
index 00000000000..89fa20c73a5
--- /dev/null
+++ b/net/sctp/sysctl.c
@@ -0,0 +1,251 @@
+/* SCTP kernel reference Implementation
+ * (C) Copyright IBM Corp. 2002, 2004
+ * Copyright (c) 2002 Intel Corp.
+ *
+ * This file is part of the SCTP kernel reference Implementation
+ *
+ * Sysctl related interfaces for SCTP.
+ *
+ * The SCTP reference implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * The SCTP reference implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *                 ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email address(es):
+ *    lksctp developers <lksctp-developers@lists.sourceforge.net>
+ *
+ * Or submit a bug report through the following website:
+ *    http://www.sf.net/projects/lksctp
+ *
+ * Written or modified by:
+ *    Mingqin Liu           <liuming@us.ibm.com>
+ *    Jon Grimm             <jgrimm@us.ibm.com>
+ *    Ardelle Fan           <ardelle.fan@intel.com>
+ *    Ryan Layer            <rmlayer@us.ibm.com>
+ *    Sridhar Samudrala     <sri@us.ibm.com>
+ *
+ * Any bugs reported given to us we will try to fix... any fixes shared will
+ * be incorporated into the next SCTP release.
+ */
+
+#include <net/sctp/structs.h>
+#include <linux/sysctl.h>
+
+static ctl_handler sctp_sysctl_jiffies_ms;
+static long rto_timer_min = 1;
+static long rto_timer_max = 86400000; /* One day */
+
+static ctl_table sctp_table[] = {
+	{
+		.ctl_name	= NET_SCTP_RTO_INITIAL,
+		.procname	= "rto_initial",
+		.data		= &sctp_rto_initial,
+		.maxlen		= sizeof(long),
+		.mode		= 0644,
+		.proc_handler	= &proc_doulongvec_ms_jiffies_minmax,
+		.strategy	= &sctp_sysctl_jiffies_ms,
+		.extra1         = &rto_timer_min,
+		.extra2         = &rto_timer_max
+	},
+	{
+		.ctl_name	= NET_SCTP_RTO_MIN,
+		.procname	= "rto_min",
+		.data		= &sctp_rto_min,
+		.maxlen		= sizeof(long),
+		.mode		= 0644,
+		.proc_handler	= &proc_doulongvec_ms_jiffies_minmax,
+		.strategy	= &sctp_sysctl_jiffies_ms,
+		.extra1         = &rto_timer_min,
+		.extra2         = &rto_timer_max
+	},
+	{
+		.ctl_name	= NET_SCTP_RTO_MAX,
+		.procname	= "rto_max",
+		.data		= &sctp_rto_max,
+		.maxlen		= sizeof(long),
+		.mode		= 0644,
+		.proc_handler	= &proc_doulongvec_ms_jiffies_minmax,
+		.strategy	= &sctp_sysctl_jiffies_ms,
+		.extra1         = &rto_timer_min,
+		.extra2         = &rto_timer_max
+	},
+	{
+		.ctl_name	= NET_SCTP_VALID_COOKIE_LIFE,
+		.procname	= "valid_cookie_life",
+		.data		= &sctp_valid_cookie_life,
+		.maxlen		= sizeof(long),
+		.mode		= 0644,
+		.proc_handler	= &proc_doulongvec_ms_jiffies_minmax,
+		.strategy	= &sctp_sysctl_jiffies_ms,
+		.extra1         = &rto_timer_min,
+		.extra2         = &rto_timer_max
+	},
+	{
+		.ctl_name	= NET_SCTP_MAX_BURST,
+		.procname	= "max_burst",
+		.data		= &sctp_max_burst,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_SCTP_ASSOCIATION_MAX_RETRANS,
+		.procname	= "association_max_retrans",
+		.data		= &sctp_max_retrans_association,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_SCTP_PATH_MAX_RETRANS,
+		.procname	= "path_max_retrans",
+		.data		= &sctp_max_retrans_path,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_SCTP_MAX_INIT_RETRANSMITS,
+		.procname	= "max_init_retransmits",
+		.data		= &sctp_max_retrans_init,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_SCTP_HB_INTERVAL,
+		.procname	= "hb_interval",
+		.data		= &sctp_hb_interval,
+		.maxlen		= sizeof(long),
+		.mode		= 0644,
+		.proc_handler	= &proc_doulongvec_ms_jiffies_minmax,
+		.strategy	= &sctp_sysctl_jiffies_ms,
+		.extra1         = &rto_timer_min,
+		.extra2         = &rto_timer_max
+	},
+	{
+		.ctl_name	= NET_SCTP_PRESERVE_ENABLE,
+		.procname	= "cookie_preserve_enable",
+		.data		= &sctp_cookie_preserve_enable,
+		.maxlen		= sizeof(long),
+		.mode		= 0644,
+		.proc_handler	= &proc_doulongvec_ms_jiffies_minmax,
+		.strategy	= &sctp_sysctl_jiffies_ms,
+		.extra1         = &rto_timer_min,
+		.extra2         = &rto_timer_max
+	},
+	{
+		.ctl_name	= NET_SCTP_RTO_ALPHA,
+		.procname	= "rto_alpha_exp_divisor",
+		.data		= &sctp_rto_alpha,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_SCTP_RTO_BETA,
+		.procname	= "rto_beta_exp_divisor",
+		.data		= &sctp_rto_beta,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_SCTP_ADDIP_ENABLE,
+		.procname	= "addip_enable",
+		.data		= &sctp_addip_enable,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_SCTP_PRSCTP_ENABLE,
+		.procname	= "prsctp_enable",
+		.data		= &sctp_prsctp_enable,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table sctp_net_table[] = {
+	{
+		.ctl_name	= NET_SCTP,
+		.procname	= "sctp",
+		.mode		= 0555,
+		.child		= sctp_table
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table sctp_root_table[] = {
+	{
+		.ctl_name	= CTL_NET,
+		.procname	= "net",
+		.mode		= 0555,
+		.child		= sctp_net_table
+	},
+	{ .ctl_name = 0 }
+};
+
+static struct ctl_table_header * sctp_sysctl_header;
+
+/* Sysctl registration.  */
+void sctp_sysctl_register(void)
+{
+	sctp_sysctl_header = register_sysctl_table(sctp_root_table, 0);
+}
+
+/* Sysctl deregistration.  */
+void sctp_sysctl_unregister(void)
+{
+	unregister_sysctl_table(sctp_sysctl_header);
+}
+
+/* Strategy function to convert jiffies to milliseconds.  */
+static int sctp_sysctl_jiffies_ms(ctl_table *table, int __user *name, int nlen,
+		void __user *oldval, size_t __user *oldlenp,
+		void __user *newval, size_t newlen, void **context) {
+
+	if (oldval) {
+		size_t olen;
+
+		if (oldlenp) {
+			if (get_user(olen, oldlenp))
+				return -EFAULT;
+
+			if (olen != sizeof (int))
+				return -EINVAL;
+		}
+		if (put_user((*(int *)(table->data) * 1000) / HZ,
+			(int __user *)oldval) ||
+		    (oldlenp && put_user(sizeof (int), oldlenp)))
+			return -EFAULT;
+	}
+	if (newval && newlen) {
+		int new;
+
+		if (newlen != sizeof (int))
+			return -EINVAL;
+
+		if (get_user(new, (int __user *)newval))
+			return -EFAULT;
+
+		*(int *)(table->data) = (new * HZ) / 1000;
+	}
+	return 1;
+}
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
new file mode 100644
index 00000000000..f30882e1e96
--- /dev/null
+++ b/net/sctp/transport.c
@@ -0,0 +1,514 @@
+/* SCTP kernel reference Implementation
+ * Copyright (c) 1999-2000 Cisco, Inc.
+ * Copyright (c) 1999-2001 Motorola, Inc.
+ * Copyright (c) 2001-2003 International Business Machines Corp.
+ * Copyright (c) 2001 Intel Corp.
+ * Copyright (c) 2001 La Monte H.P. Yarroll
+ *
+ * This file is part of the SCTP kernel reference Implementation
+ *
+ * This module provides the abstraction for an SCTP tranport representing
+ * a remote transport address.  For local transport addresses, we just use
+ * union sctp_addr.
+ *
+ * The SCTP reference implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * The SCTP reference implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *                 ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email address(es):
+ *    lksctp developers <lksctp-developers@lists.sourceforge.net>
+ *
+ * Or submit a bug report through the following website:
+ *    http://www.sf.net/projects/lksctp
+ *
+ * Written or modified by:
+ *    La Monte H.P. Yarroll <piggy@acm.org>
+ *    Karl Knutson          <karl@athena.chicago.il.us>
+ *    Jon Grimm             <jgrimm@us.ibm.com>
+ *    Xingang Guo           <xingang.guo@intel.com>
+ *    Hui Huang             <hui.huang@nokia.com>
+ *    Sridhar Samudrala	    <sri@us.ibm.com>
+ *    Ardelle Fan	    <ardelle.fan@intel.com>
+ *
+ * Any bugs reported given to us we will try to fix... any fixes shared will
+ * be incorporated into the next SCTP release.
+ */
+
+#include <linux/types.h>
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+
+/* 1st Level Abstractions.  */
+
+/* Initialize a new transport from provided memory.  */
+static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
+						  const union sctp_addr *addr,
+						  int gfp)
+{
+	/* Copy in the address.  */
+	peer->ipaddr = *addr;
+	peer->af_specific = sctp_get_af_specific(addr->sa.sa_family);
+	peer->asoc = NULL;
+
+	peer->dst = NULL;
+	memset(&peer->saddr, 0, sizeof(union sctp_addr));
+
+	/* From 6.3.1 RTO Calculation:
+	 *
+	 * C1) Until an RTT measurement has been made for a packet sent to the
+	 * given destination transport address, set RTO to the protocol
+	 * parameter 'RTO.Initial'.
+	 */
+	peer->rtt = 0;
+	peer->rto = sctp_rto_initial;
+	peer->rttvar = 0;
+	peer->srtt = 0;
+	peer->rto_pending = 0;
+
+	peer->last_time_heard = jiffies;
+	peer->last_time_used = jiffies;
+	peer->last_time_ecne_reduced = jiffies;
+
+	peer->active = SCTP_ACTIVE;
+	peer->hb_allowed = 0;
+
+	/* Initialize the default path max_retrans.  */
+	peer->max_retrans = sctp_max_retrans_path;
+	peer->error_count = 0;
+
+	INIT_LIST_HEAD(&peer->transmitted);
+	INIT_LIST_HEAD(&peer->send_ready);
+	INIT_LIST_HEAD(&peer->transports);
+
+	/* Set up the retransmission timer.  */
+	init_timer(&peer->T3_rtx_timer);
+	peer->T3_rtx_timer.function = sctp_generate_t3_rtx_event;
+	peer->T3_rtx_timer.data = (unsigned long)peer;
+
+	/* Set up the heartbeat timer. */
+	init_timer(&peer->hb_timer);
+	peer->hb_interval = SCTP_DEFAULT_TIMEOUT_HEARTBEAT;
+	peer->hb_timer.function = sctp_generate_heartbeat_event;
+	peer->hb_timer.data = (unsigned long)peer;
+
+	atomic_set(&peer->refcnt, 1);
+	peer->dead = 0;
+
+	peer->malloced = 0;
+
+	/* Initialize the state information for SFR-CACC */
+	peer->cacc.changeover_active = 0;
+	peer->cacc.cycling_changeover = 0;
+	peer->cacc.next_tsn_at_change = 0;
+	peer->cacc.cacc_saw_newack = 0;
+
+	return peer;
+}
+
+/* Allocate and initialize a new transport.  */
+struct sctp_transport *sctp_transport_new(const union sctp_addr *addr, int gfp)
+{
+        struct sctp_transport *transport;
+
+        transport = t_new(struct sctp_transport, gfp);
+	if (!transport)
+		goto fail;
+
+	if (!sctp_transport_init(transport, addr, gfp))
+		goto fail_init;
+
+	transport->malloced = 1;
+	SCTP_DBG_OBJCNT_INC(transport);
+
+	return transport;
+
+fail_init:
+	kfree(transport);
+
+fail:
+	return NULL;
+}
+
+/* This transport is no longer needed.  Free up if possible, or
+ * delay until it last reference count.
+ */
+void sctp_transport_free(struct sctp_transport *transport)
+{
+	transport->dead = 1;
+
+	/* Try to delete the heartbeat timer.  */
+	if (del_timer(&transport->hb_timer))
+		sctp_transport_put(transport);
+
+	/* Delete the T3_rtx timer if it's active.
+	 * There is no point in not doing this now and letting
+	 * structure hang around in memory since we know
+	 * the tranport is going away.
+	 */
+	if (timer_pending(&transport->T3_rtx_timer) &&
+	    del_timer(&transport->T3_rtx_timer))
+		sctp_transport_put(transport);
+
+
+	sctp_transport_put(transport);
+}
+
+/* Destroy the transport data structure.
+ * Assumes there are no more users of this structure.
+ */
+static void sctp_transport_destroy(struct sctp_transport *transport)
+{
+	SCTP_ASSERT(transport->dead, "Transport is not dead", return);
+
+	if (transport->asoc)
+		sctp_association_put(transport->asoc);
+
+        sctp_packet_free(&transport->packet);
+
+	dst_release(transport->dst);
+	kfree(transport);
+	SCTP_DBG_OBJCNT_DEC(transport);
+}
+
+/* Start T3_rtx timer if it is not already running and update the heartbeat
+ * timer.  This routine is called every time a DATA chunk is sent.
+ */
+void sctp_transport_reset_timers(struct sctp_transport *transport)
+{
+	/* RFC 2960 6.3.2 Retransmission Timer Rules
+	 *
+	 * R1) Every time a DATA chunk is sent to any address(including a
+	 * retransmission), if the T3-rtx timer of that address is not running
+	 * start it running so that it will expire after the RTO of that
+	 * address.
+	 */
+
+	if (!timer_pending(&transport->T3_rtx_timer))
+		if (!mod_timer(&transport->T3_rtx_timer,
+			       jiffies + transport->rto))
+			sctp_transport_hold(transport);
+
+	/* When a data chunk is sent, reset the heartbeat interval.  */
+	if (!mod_timer(&transport->hb_timer,
+		       sctp_transport_timeout(transport)))
+	    sctp_transport_hold(transport);
+}
+
+/* This transport has been assigned to an association.
+ * Initialize fields from the association or from the sock itself.
+ * Register the reference count in the association.
+ */
+void sctp_transport_set_owner(struct sctp_transport *transport,
+			      struct sctp_association *asoc)
+{
+	transport->asoc = asoc;
+	sctp_association_hold(asoc);
+}
+
+/* Initialize the pmtu of a transport. */
+void sctp_transport_pmtu(struct sctp_transport *transport)
+{
+	struct dst_entry *dst;
+
+	dst = transport->af_specific->get_dst(NULL, &transport->ipaddr, NULL);
+
+	if (dst) {
+		transport->pmtu = dst_mtu(dst);
+		dst_release(dst);
+	} else
+		transport->pmtu = SCTP_DEFAULT_MAXSEGMENT;
+}
+
+/* Caches the dst entry and source address for a transport's destination
+ * address.
+ */
+void sctp_transport_route(struct sctp_transport *transport,
+			  union sctp_addr *saddr, struct sctp_sock *opt)
+{
+	struct sctp_association *asoc = transport->asoc;
+	struct sctp_af *af = transport->af_specific;
+	union sctp_addr *daddr = &transport->ipaddr;
+	struct dst_entry *dst;
+
+	dst = af->get_dst(asoc, daddr, saddr);
+
+	if (saddr)
+		memcpy(&transport->saddr, saddr, sizeof(union sctp_addr));
+	else
+		af->get_saddr(asoc, dst, daddr, &transport->saddr);
+
+	transport->dst = dst;
+	if (dst) {
+		transport->pmtu = dst_mtu(dst);
+
+		/* Initialize sk->sk_rcv_saddr, if the transport is the
+		 * association's active path for getsockname().
+		 */ 
+		if (asoc && (transport == asoc->peer.active_path))
+			af->to_sk_saddr(&transport->saddr, asoc->base.sk);
+	} else
+		transport->pmtu = SCTP_DEFAULT_MAXSEGMENT;
+}
+
+/* Hold a reference to a transport.  */
+void sctp_transport_hold(struct sctp_transport *transport)
+{
+	atomic_inc(&transport->refcnt);
+}
+
+/* Release a reference to a transport and clean up
+ * if there are no more references.
+ */
+void sctp_transport_put(struct sctp_transport *transport)
+{
+	if (atomic_dec_and_test(&transport->refcnt))
+		sctp_transport_destroy(transport);
+}
+
+/* Update transport's RTO based on the newly calculated RTT. */
+void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt)
+{
+	/* Check for valid transport.  */
+	SCTP_ASSERT(tp, "NULL transport", return);
+
+	/* We should not be doing any RTO updates unless rto_pending is set.  */
+	SCTP_ASSERT(tp->rto_pending, "rto_pending not set", return);
+
+	if (tp->rttvar || tp->srtt) {
+		/* 6.3.1 C3) When a new RTT measurement R' is made, set
+		 * RTTVAR <- (1 - RTO.Beta) * RTTVAR + RTO.Beta * |SRTT - R'|
+		 * SRTT <- (1 - RTO.Alpha) * SRTT + RTO.Alpha * R'
+		 */
+
+		/* Note:  The above algorithm has been rewritten to
+		 * express rto_beta and rto_alpha as inverse powers
+		 * of two.
+		 * For example, assuming the default value of RTO.Alpha of
+		 * 1/8, rto_alpha would be expressed as 3.
+		 */
+		tp->rttvar = tp->rttvar - (tp->rttvar >> sctp_rto_beta)
+			+ ((abs(tp->srtt - rtt)) >> sctp_rto_beta);
+		tp->srtt = tp->srtt - (tp->srtt >> sctp_rto_alpha)
+			+ (rtt >> sctp_rto_alpha);
+	} else {
+		/* 6.3.1 C2) When the first RTT measurement R is made, set
+		 * SRTT <- R, RTTVAR <- R/2.
+		 */
+		tp->srtt = rtt;
+		tp->rttvar = rtt >> 1;
+	}
+
+	/* 6.3.1 G1) Whenever RTTVAR is computed, if RTTVAR = 0, then
+	 * adjust RTTVAR <- G, where G is the CLOCK GRANULARITY.
+	 */
+	if (tp->rttvar == 0)
+		tp->rttvar = SCTP_CLOCK_GRANULARITY;
+
+	/* 6.3.1 C3) After the computation, update RTO <- SRTT + 4 * RTTVAR. */
+	tp->rto = tp->srtt + (tp->rttvar << 2);
+
+	/* 6.3.1 C6) Whenever RTO is computed, if it is less than RTO.Min
+	 * seconds then it is rounded up to RTO.Min seconds.
+	 */
+	if (tp->rto < tp->asoc->rto_min)
+		tp->rto = tp->asoc->rto_min;
+
+	/* 6.3.1 C7) A maximum value may be placed on RTO provided it is
+	 * at least RTO.max seconds.
+	 */
+	if (tp->rto > tp->asoc->rto_max)
+		tp->rto = tp->asoc->rto_max;
+
+	tp->rtt = rtt;
+
+	/* Reset rto_pending so that a new RTT measurement is started when a
+	 * new data chunk is sent.
+	 */
+	tp->rto_pending = 0;
+
+	SCTP_DEBUG_PRINTK("%s: transport: %p, rtt: %d, srtt: %d "
+			  "rttvar: %d, rto: %d\n", __FUNCTION__,
+			  tp, rtt, tp->srtt, tp->rttvar, tp->rto);
+}
+
+/* This routine updates the transport's cwnd and partial_bytes_acked
+ * parameters based on the bytes acked in the received SACK.
+ */
+void sctp_transport_raise_cwnd(struct sctp_transport *transport,
+			       __u32 sack_ctsn, __u32 bytes_acked)
+{
+	__u32 cwnd, ssthresh, flight_size, pba, pmtu;
+
+	cwnd = transport->cwnd;
+	flight_size = transport->flight_size;
+
+	/* The appropriate cwnd increase algorithm is performed if, and only
+	 * if the cumulative TSN has advanced and the congestion window is
+	 * being fully utilized.
+	 */
+	if ((transport->asoc->ctsn_ack_point >= sack_ctsn) ||
+	    (flight_size < cwnd))
+		return;
+
+	ssthresh = transport->ssthresh;
+	pba = transport->partial_bytes_acked;
+	pmtu = transport->asoc->pmtu;
+
+	if (cwnd <= ssthresh) {
+		/* RFC 2960 7.2.1, sctpimpguide-05 2.14.2 When cwnd is less
+		 * than or equal to ssthresh an SCTP endpoint MUST use the
+		 * slow start algorithm to increase cwnd only if the current
+		 * congestion window is being fully utilized and an incoming
+		 * SACK advances the Cumulative TSN Ack Point. Only when these
+		 * two conditions are met can the cwnd be increased otherwise
+		 * the cwnd MUST not be increased. If these conditions are met
+		 * then cwnd MUST be increased by at most the lesser of
+		 * 1) the total size of the previously outstanding DATA
+		 * chunk(s) acknowledged, and 2) the destination's path MTU.
+		 */
+		if (bytes_acked > pmtu)
+			cwnd += pmtu;
+		else
+			cwnd += bytes_acked;
+		SCTP_DEBUG_PRINTK("%s: SLOW START: transport: %p, "
+				  "bytes_acked: %d, cwnd: %d, ssthresh: %d, "
+				  "flight_size: %d, pba: %d\n",
+				  __FUNCTION__,
+				  transport, bytes_acked, cwnd,
+				  ssthresh, flight_size, pba);
+	} else {
+		/* RFC 2960 7.2.2 Whenever cwnd is greater than ssthresh,
+		 * upon each SACK arrival that advances the Cumulative TSN Ack
+		 * Point, increase partial_bytes_acked by the total number of
+		 * bytes of all new chunks acknowledged in that SACK including
+		 * chunks acknowledged by the new Cumulative TSN Ack and by
+		 * Gap Ack Blocks.
+		 *
+		 * When partial_bytes_acked is equal to or greater than cwnd
+		 * and before the arrival of the SACK the sender had cwnd or
+		 * more bytes of data outstanding (i.e., before arrival of the
+		 * SACK, flightsize was greater than or equal to cwnd),
+		 * increase cwnd by MTU, and reset partial_bytes_acked to
+		 * (partial_bytes_acked - cwnd).
+		 */
+		pba += bytes_acked;
+		if (pba >= cwnd) {
+			cwnd += pmtu;
+			pba = ((cwnd < pba) ? (pba - cwnd) : 0);
+		}
+		SCTP_DEBUG_PRINTK("%s: CONGESTION AVOIDANCE: "
+				  "transport: %p, bytes_acked: %d, cwnd: %d, "
+				  "ssthresh: %d, flight_size: %d, pba: %d\n",
+				  __FUNCTION__,
+				  transport, bytes_acked, cwnd,
+				  ssthresh, flight_size, pba);
+	}
+
+	transport->cwnd = cwnd;
+	transport->partial_bytes_acked = pba;
+}
+
+/* This routine is used to lower the transport's cwnd when congestion is
+ * detected.
+ */
+void sctp_transport_lower_cwnd(struct sctp_transport *transport,
+			       sctp_lower_cwnd_t reason)
+{
+	switch (reason) {
+	case SCTP_LOWER_CWND_T3_RTX:
+		/* RFC 2960 Section 7.2.3, sctpimpguide
+		 * When the T3-rtx timer expires on an address, SCTP should
+		 * perform slow start by:
+		 *      ssthresh = max(cwnd/2, 4*MTU)
+		 *      cwnd = 1*MTU
+		 *      partial_bytes_acked = 0
+		 */
+		transport->ssthresh = max(transport->cwnd/2,
+					  4*transport->asoc->pmtu);
+		transport->cwnd = transport->asoc->pmtu;
+		break;
+
+	case SCTP_LOWER_CWND_FAST_RTX:
+		/* RFC 2960 7.2.4 Adjust the ssthresh and cwnd of the
+		 * destination address(es) to which the missing DATA chunks
+		 * were last sent, according to the formula described in
+		 * Section 7.2.3.
+	 	 *
+	 	 * RFC 2960 7.2.3, sctpimpguide Upon detection of packet
+		 * losses from SACK (see Section 7.2.4), An endpoint
+		 * should do the following:
+		 *      ssthresh = max(cwnd/2, 4*MTU)
+		 *      cwnd = ssthresh
+		 *      partial_bytes_acked = 0
+		 */
+		transport->ssthresh = max(transport->cwnd/2,
+					  4*transport->asoc->pmtu);
+		transport->cwnd = transport->ssthresh;
+		break;
+
+	case SCTP_LOWER_CWND_ECNE:
+		/* RFC 2481 Section 6.1.2.
+		 * If the sender receives an ECN-Echo ACK packet
+		 * then the sender knows that congestion was encountered in the
+		 * network on the path from the sender to the receiver. The
+		 * indication of congestion should be treated just as a
+		 * congestion loss in non-ECN Capable TCP. That is, the TCP
+		 * source halves the congestion window "cwnd" and reduces the
+		 * slow start threshold "ssthresh".
+		 * A critical condition is that TCP does not react to
+		 * congestion indications more than once every window of
+		 * data (or more loosely more than once every round-trip time).
+		 */
+		if ((jiffies - transport->last_time_ecne_reduced) >
+		    transport->rtt) {
+			transport->ssthresh = max(transport->cwnd/2,
+					  	  4*transport->asoc->pmtu);
+			transport->cwnd = transport->ssthresh;
+			transport->last_time_ecne_reduced = jiffies;
+		}
+		break;
+
+	case SCTP_LOWER_CWND_INACTIVE:
+		/* RFC 2960 Section 7.2.1, sctpimpguide
+		 * When the endpoint does not transmit data on a given
+		 * transport address, the cwnd of the transport address
+		 * should be adjusted to max(cwnd/2, 4*MTU) per RTO.
+		 * NOTE: Although the draft recommends that this check needs
+		 * to be done every RTO interval, we do it every hearbeat
+		 * interval.
+		 */
+		if ((jiffies - transport->last_time_used) > transport->rto)
+			transport->cwnd = max(transport->cwnd/2,
+						 4*transport->asoc->pmtu);
+		break;
+	};
+
+	transport->partial_bytes_acked = 0;
+	SCTP_DEBUG_PRINTK("%s: transport: %p reason: %d cwnd: "
+			  "%d ssthresh: %d\n", __FUNCTION__,
+			  transport, reason,
+			  transport->cwnd, transport->ssthresh);
+}
+
+/* What is the next timeout value for this transport? */
+unsigned long sctp_transport_timeout(struct sctp_transport *t)
+{
+	unsigned long timeout;
+	timeout = t->hb_interval + t->rto + sctp_jitter(t->rto);
+	timeout += jiffies;
+	return timeout;
+}
diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c
new file mode 100644
index 00000000000..ac4fae161bc
--- /dev/null
+++ b/net/sctp/tsnmap.c
@@ -0,0 +1,417 @@
+/* SCTP kernel reference Implementation
+ * (C) Copyright IBM Corp. 2001, 2004
+ * Copyright (c) 1999-2000 Cisco, Inc.
+ * Copyright (c) 1999-2001 Motorola, Inc.
+ * Copyright (c) 2001 Intel Corp.
+ *
+ * This file is part of the SCTP kernel reference Implementation
+ *
+ * These functions manipulate sctp tsn mapping array.
+ *
+ * The SCTP reference implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * The SCTP reference implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *                 ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email address(es):
+ *    lksctp developers <lksctp-developers@lists.sourceforge.net>
+ *
+ * Or submit a bug report through the following website:
+ *    http://www.sf.net/projects/lksctp
+ *
+ * Written or modified by:
+ *    La Monte H.P. Yarroll <piggy@acm.org>
+ *    Jon Grimm             <jgrimm@us.ibm.com>
+ *    Karl Knutson          <karl@athena.chicago.il.us>
+ *    Sridhar Samudrala     <sri@us.ibm.com>
+ *
+ * Any bugs reported given to us we will try to fix... any fixes shared will
+ * be incorporated into the next SCTP release.
+ */
+
+#include <linux/types.h>
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+
+static void sctp_tsnmap_update(struct sctp_tsnmap *map);
+static void sctp_tsnmap_find_gap_ack(__u8 *map, __u16 off,
+				     __u16 len, __u16 base,
+				     int *started, __u16 *start,
+				     int *ended, __u16 *end);
+
+/* Initialize a block of memory as a tsnmap.  */
+struct sctp_tsnmap *sctp_tsnmap_init(struct sctp_tsnmap *map, __u16 len,
+				     __u32 initial_tsn)
+{
+	map->tsn_map = map->raw_map;
+	map->overflow_map = map->tsn_map + len;
+	map->len = len;
+
+	/* Clear out a TSN ack status.  */
+	memset(map->tsn_map, 0x00, map->len + map->len);
+
+	/* Keep track of TSNs represented by tsn_map.  */
+	map->base_tsn = initial_tsn;
+	map->overflow_tsn = initial_tsn + map->len;
+	map->cumulative_tsn_ack_point = initial_tsn - 1;
+	map->max_tsn_seen = map->cumulative_tsn_ack_point;
+	map->malloced = 0;
+	map->num_dup_tsns = 0;
+
+	return map;
+}
+
+/* Test the tracking state of this TSN.
+ * Returns:
+ *   0 if the TSN has not yet been seen
+ *  >0 if the TSN has been seen (duplicate)
+ *  <0 if the TSN is invalid (too large to track)
+ */
+int sctp_tsnmap_check(const struct sctp_tsnmap *map, __u32 tsn)
+{
+	__s32 gap;
+	int dup;
+
+	/* Calculate the index into the mapping arrays.  */
+	gap = tsn - map->base_tsn;
+
+	/* Verify that we can hold this TSN.  */
+	if (gap >= (/* base */ map->len + /* overflow */ map->len)) {
+		dup = -1;
+		goto out;
+	}
+
+	/* Honk if we've already seen this TSN.
+	 * We have three cases:
+	 *	1. The TSN is ancient or belongs to a previous tsn_map.
+	 *	2. The TSN is already marked in the tsn_map.
+	 *	3. The TSN is already marked in the tsn_map_overflow.
+	 */
+	if (gap < 0 ||
+	    (gap < map->len && map->tsn_map[gap]) ||
+	    (gap >= map->len && map->overflow_map[gap - map->len]))
+		dup = 1;
+	else
+		dup = 0;
+
+out:
+	return dup;
+}
+
+
+/* Mark this TSN as seen.  */
+void sctp_tsnmap_mark(struct sctp_tsnmap *map, __u32 tsn)
+{
+	__s32 gap;
+
+	/* Vacuously mark any TSN which precedes the map base or
+	 * exceeds the end of the map.
+	 */
+	if (TSN_lt(tsn, map->base_tsn))
+		return;
+	if (!TSN_lt(tsn, map->base_tsn + map->len + map->len))
+		return;
+
+	/* Bump the max.  */
+	if (TSN_lt(map->max_tsn_seen, tsn))
+		map->max_tsn_seen = tsn;
+
+	/* Assert: TSN is in range.  */
+	gap = tsn - map->base_tsn;
+
+	/* Mark the TSN as received.  */
+	if (gap < map->len)
+		map->tsn_map[gap]++;
+	else
+		map->overflow_map[gap - map->len]++;
+
+	/* Go fixup any internal TSN mapping variables including
+	 * cumulative_tsn_ack_point.
+	 */
+	sctp_tsnmap_update(map);
+}
+
+
+/* Initialize a Gap Ack Block iterator from memory being provided.  */
+SCTP_STATIC void sctp_tsnmap_iter_init(const struct sctp_tsnmap *map,
+				       struct sctp_tsnmap_iter *iter)
+{
+	/* Only start looking one past the Cumulative TSN Ack Point.  */
+	iter->start = map->cumulative_tsn_ack_point + 1;
+}
+
+/* Get the next Gap Ack Blocks. Returns 0 if there was not another block
+ * to get.
+ */
+SCTP_STATIC int sctp_tsnmap_next_gap_ack(const struct sctp_tsnmap *map,
+					 struct sctp_tsnmap_iter *iter,
+					 __u16 *start, __u16 *end)
+{
+	int started, ended;
+	__u16 _start, _end, offset;
+
+	/* We haven't found a gap yet.  */
+	started = ended = 0;
+
+	/* If there are no more gap acks possible, get out fast.  */
+	if (TSN_lte(map->max_tsn_seen, iter->start))
+		return 0;
+
+	/* Search the first mapping array.  */
+	if (iter->start - map->base_tsn < map->len) {
+
+		offset = iter->start - map->base_tsn;
+		sctp_tsnmap_find_gap_ack(map->tsn_map, offset, map->len, 0,
+					 &started, &_start, &ended, &_end);
+	}
+
+	/* Do we need to check the overflow map? */
+	if (!ended) {
+		/* Fix up where we'd like to start searching in the
+		 * overflow map.
+		 */
+		if (iter->start - map->base_tsn < map->len)
+			offset = 0;
+		else
+			offset = iter->start - map->base_tsn - map->len;
+
+		/* Search the overflow map.  */
+		sctp_tsnmap_find_gap_ack(map->overflow_map,
+					 offset,
+					 map->len,
+					 map->len,
+					 &started, &_start,
+					 &ended, &_end);
+	}
+
+	/* The Gap Ack Block happens to end at the end of the
+	 * overflow map.
+	 */
+	if (started && !ended) {
+		ended++;
+		_end = map->len + map->len - 1;
+	}
+
+	/* If we found a Gap Ack Block, return the start and end and
+	 * bump the iterator forward.
+	 */
+	if (ended) {
+		/* Fix up the start and end based on the
+		 * Cumulative TSN Ack offset into the map.
+		 */
+		int gap = map->cumulative_tsn_ack_point -
+			map->base_tsn;
+
+		*start = _start - gap;
+		*end = _end - gap;
+
+		/* Move the iterator forward.  */
+		iter->start = map->cumulative_tsn_ack_point + *end + 1;
+	}
+
+	return ended;
+}
+
+/* Mark this and any lower TSN as seen.  */
+void sctp_tsnmap_skip(struct sctp_tsnmap *map, __u32 tsn)
+{
+	__s32 gap;
+
+	/* Vacuously mark any TSN which precedes the map base or
+	 * exceeds the end of the map.
+	 */
+	if (TSN_lt(tsn, map->base_tsn))
+		return;
+	if (!TSN_lt(tsn, map->base_tsn + map->len + map->len))
+		return;
+
+	/* Bump the max.  */
+	if (TSN_lt(map->max_tsn_seen, tsn))
+		map->max_tsn_seen = tsn;
+
+	/* Assert: TSN is in range.  */
+	gap = tsn - map->base_tsn + 1;
+
+	/* Mark the TSNs as received.  */
+	if (gap <= map->len)
+		memset(map->tsn_map, 0x01, gap);
+	else {
+		memset(map->tsn_map, 0x01, map->len);
+		memset(map->overflow_map, 0x01, (gap - map->len));
+	}
+
+	/* Go fixup any internal TSN mapping variables including
+	 * cumulative_tsn_ack_point.
+	 */
+	sctp_tsnmap_update(map);
+}
+
+/********************************************************************
+ * 2nd Level Abstractions
+ ********************************************************************/
+
+/* This private helper function updates the tsnmap buffers and
+ * the Cumulative TSN Ack Point.
+ */
+static void sctp_tsnmap_update(struct sctp_tsnmap *map)
+{
+	__u32 ctsn;
+
+	ctsn = map->cumulative_tsn_ack_point;
+	do {
+		ctsn++;
+		if (ctsn == map->overflow_tsn) {
+			/* Now tsn_map must have been all '1's,
+			 * so we swap the map and check the overflow table
+			 */
+        		__u8 *tmp = map->tsn_map;
+			memset(tmp, 0, map->len);
+			map->tsn_map = map->overflow_map;
+			map->overflow_map = tmp;
+
+			/* Update the tsn_map boundaries.  */
+			map->base_tsn += map->len;
+			map->overflow_tsn += map->len;
+		}
+	} while (map->tsn_map[ctsn - map->base_tsn]);
+
+	map->cumulative_tsn_ack_point = ctsn - 1; /* Back up one. */
+}
+
+/* How many data chunks  are we missing from our peer?
+ */
+__u16 sctp_tsnmap_pending(struct sctp_tsnmap *map)
+{
+	__u32 cum_tsn = map->cumulative_tsn_ack_point;
+	__u32 max_tsn = map->max_tsn_seen;
+	__u32 base_tsn = map->base_tsn;
+	__u16 pending_data;
+	__s32 gap, start, end, i;
+
+	pending_data = max_tsn - cum_tsn;
+	gap = max_tsn - base_tsn;
+
+	if (gap <= 0 || gap >= (map->len + map->len))
+		goto out;
+
+	start = ((cum_tsn >= base_tsn) ? (cum_tsn - base_tsn + 1) : 0);
+	end = ((gap > map->len ) ? map->len : gap + 1);
+
+	for (i = start; i < end; i++) {
+		if (map->tsn_map[i])
+			pending_data--;
+	}
+
+	if (gap >= map->len) {
+		start = 0;
+		end = gap - map->len + 1;
+		for (i = start; i < end; i++) {
+			if (map->overflow_map[i])
+				pending_data--;
+		}
+	}
+
+out:
+	return pending_data;
+}
+
+/* This is a private helper for finding Gap Ack Blocks.  It searches a
+ * single array for the start and end of a Gap Ack Block.
+ *
+ * The flags "started" and "ended" tell is if we found the beginning
+ * or (respectively) the end of a Gap Ack Block.
+ */
+static void sctp_tsnmap_find_gap_ack(__u8 *map, __u16 off,
+				     __u16 len, __u16 base,
+				     int *started, __u16 *start,
+				     int *ended, __u16 *end)
+{
+	int i = off;
+
+	/* Look through the entire array, but break out
+	 * early if we have found the end of the Gap Ack Block.
+	 */
+
+	/* Also, stop looking past the maximum TSN seen. */
+
+	/* Look for the start. */
+	if (!(*started)) {
+		for (; i < len; i++) {
+			if (map[i]) {
+				(*started)++;
+				*start = base + i;
+				break;
+			}
+		}
+	}
+
+	/* Look for the end.  */
+	if (*started) {
+		/* We have found the start, let's find the
+		 * end.  If we find the end, break out.
+		 */
+		for (; i < len; i++) {
+			if (!map[i]) {
+				(*ended)++;
+				*end = base + i - 1;
+				break;
+			}
+		}
+	}
+}
+
+/* Renege that we have seen a TSN.  */
+void sctp_tsnmap_renege(struct sctp_tsnmap *map, __u32 tsn)
+{
+	__s32 gap;
+
+	if (TSN_lt(tsn, map->base_tsn))
+		return;
+	if (!TSN_lt(tsn, map->base_tsn + map->len + map->len))
+		return;
+
+	/* Assert: TSN is in range.  */
+	gap = tsn - map->base_tsn;
+
+	/* Pretend we never saw the TSN.  */
+	if (gap < map->len)
+		map->tsn_map[gap] = 0;
+	else
+		map->overflow_map[gap - map->len] = 0;
+}
+
+/* How many gap ack blocks do we have recorded? */
+__u16 sctp_tsnmap_num_gabs(struct sctp_tsnmap *map)
+{
+	struct sctp_tsnmap_iter iter;
+	int gabs = 0;
+
+	/* Refresh the gap ack information. */
+	if (sctp_tsnmap_has_gap(map)) {
+		sctp_tsnmap_iter_init(map, &iter);
+		while (sctp_tsnmap_next_gap_ack(map, &iter,
+						&map->gabs[gabs].start,
+						&map->gabs[gabs].end)) {
+
+			map->gabs[gabs].start = htons(map->gabs[gabs].start);
+			map->gabs[gabs].end = htons(map->gabs[gabs].end);
+			gabs++;
+			if (gabs >= SCTP_MAX_GABS)
+				break;
+		}
+	}
+	return gabs;
+}
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
new file mode 100644
index 00000000000..17d0ff53473
--- /dev/null
+++ b/net/sctp/ulpevent.c
@@ -0,0 +1,942 @@
+/* SCTP kernel reference Implementation
+ * (C) Copyright IBM Corp. 2001, 2004
+ * Copyright (c) 1999-2000 Cisco, Inc.
+ * Copyright (c) 1999-2001 Motorola, Inc.
+ * Copyright (c) 2001 Intel Corp.
+ * Copyright (c) 2001 Nokia, Inc.
+ * Copyright (c) 2001 La Monte H.P. Yarroll
+ *
+ * These functions manipulate an sctp event.   The struct ulpevent is used
+ * to carry notifications and data to the ULP (sockets).
+ * The SCTP reference implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * The SCTP reference implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *                 ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email address(es):
+ *    lksctp developers <lksctp-developers@lists.sourceforge.net>
+ *
+ * Or submit a bug report through the following website:
+ *    http://www.sf.net/projects/lksctp
+ *
+ * Written or modified by:
+ *    Jon Grimm             <jgrimm@us.ibm.com>
+ *    La Monte H.P. Yarroll <piggy@acm.org>
+ *    Ardelle Fan	    <ardelle.fan@intel.com>
+ *    Sridhar Samudrala     <sri@us.ibm.com>
+ *
+ * Any bugs reported given to us we will try to fix... any fixes shared will
+ * be incorporated into the next SCTP release.
+ */
+
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <net/sctp/structs.h>
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+
+static void sctp_ulpevent_receive_data(struct sctp_ulpevent *event,
+				       struct sctp_association *asoc);
+static void sctp_ulpevent_release_data(struct sctp_ulpevent *event);
+
+/* Stub skb destructor.  */
+static void sctp_stub_rfree(struct sk_buff *skb)
+{
+/* WARNING:  This function is just a warning not to use the
+ * skb destructor.  If the skb is shared, we may get the destructor
+ * callback on some processor that does not own the sock_lock.  This
+ * was occuring with PACKET socket applications that were monitoring
+ * our skbs.   We can't take the sock_lock, because we can't risk
+ * recursing if we do really own the sock lock.  Instead, do all
+ * of our rwnd manipulation while we own the sock_lock outright.
+ */
+}
+
+/* Initialize an ULP event from an given skb.  */
+SCTP_STATIC void sctp_ulpevent_init(struct sctp_ulpevent *event, int msg_flags)
+{
+	memset(event, 0, sizeof(struct sctp_ulpevent));
+	event->msg_flags = msg_flags;
+}
+
+/* Create a new sctp_ulpevent.  */
+SCTP_STATIC struct sctp_ulpevent *sctp_ulpevent_new(int size, int msg_flags,
+						    int gfp)
+{
+	struct sctp_ulpevent *event;
+	struct sk_buff *skb;
+
+	skb = alloc_skb(size, gfp);
+	if (!skb)
+		goto fail;
+
+	event = sctp_skb2event(skb);
+	sctp_ulpevent_init(event, msg_flags);
+
+	return event;
+
+fail:
+	return NULL;
+}
+
+/* Is this a MSG_NOTIFICATION?  */
+int sctp_ulpevent_is_notification(const struct sctp_ulpevent *event)
+{
+	return MSG_NOTIFICATION == (event->msg_flags & MSG_NOTIFICATION);
+}
+
+/* Hold the association in case the msg_name needs read out of
+ * the association.
+ */
+static inline void sctp_ulpevent_set_owner(struct sctp_ulpevent *event,
+					   const struct sctp_association *asoc)
+{
+	struct sk_buff *skb;
+
+	/* Cast away the const, as we are just wanting to
+	 * bump the reference count.
+	 */
+	sctp_association_hold((struct sctp_association *)asoc);
+	skb = sctp_event2skb(event);
+	skb->sk = asoc->base.sk;
+	event->asoc = (struct sctp_association *)asoc;
+	skb->destructor = sctp_stub_rfree;
+}
+
+/* A simple destructor to give up the reference to the association. */
+static inline void sctp_ulpevent_release_owner(struct sctp_ulpevent *event)
+{
+	sctp_association_put(event->asoc);
+}
+
+/* Create and initialize an SCTP_ASSOC_CHANGE event.
+ *
+ * 5.3.1.1 SCTP_ASSOC_CHANGE
+ *
+ * Communication notifications inform the ULP that an SCTP association
+ * has either begun or ended. The identifier for a new association is
+ * provided by this notification.
+ *
+ * Note: There is no field checking here.  If a field is unused it will be
+ * zero'd out.
+ */
+struct sctp_ulpevent  *sctp_ulpevent_make_assoc_change(
+	const struct sctp_association *asoc,
+	__u16 flags, __u16 state, __u16 error, __u16 outbound,
+	__u16 inbound, int gfp)
+{
+	struct sctp_ulpevent *event;
+	struct sctp_assoc_change *sac;
+	struct sk_buff *skb;
+
+	event = sctp_ulpevent_new(sizeof(struct sctp_assoc_change),
+				  MSG_NOTIFICATION, gfp);
+	if (!event)
+		goto fail;
+	skb = sctp_event2skb(event);
+	sac = (struct sctp_assoc_change *)
+		skb_put(skb, sizeof(struct sctp_assoc_change));
+
+	/* Socket Extensions for SCTP
+	 * 5.3.1.1 SCTP_ASSOC_CHANGE
+	 *
+	 * sac_type:
+	 * It should be SCTP_ASSOC_CHANGE.
+	 */
+	sac->sac_type = SCTP_ASSOC_CHANGE;
+
+	/* Socket Extensions for SCTP
+	 * 5.3.1.1 SCTP_ASSOC_CHANGE
+	 *
+	 * sac_state: 32 bits (signed integer)
+	 * This field holds one of a number of values that communicate the
+	 * event that happened to the association.
+	 */
+	sac->sac_state = state;
+
+	/* Socket Extensions for SCTP
+	 * 5.3.1.1 SCTP_ASSOC_CHANGE
+	 *
+	 * sac_flags: 16 bits (unsigned integer)
+	 * Currently unused.
+	 */
+	sac->sac_flags = 0;
+
+	/* Socket Extensions for SCTP
+	 * 5.3.1.1 SCTP_ASSOC_CHANGE
+	 *
+	 * sac_length: sizeof (__u32)
+	 * This field is the total length of the notification data, including
+	 * the notification header.
+	 */
+	sac->sac_length = sizeof(struct sctp_assoc_change);
+
+	/* Socket Extensions for SCTP
+	 * 5.3.1.1 SCTP_ASSOC_CHANGE
+	 *
+	 * sac_error:  32 bits (signed integer)
+	 *
+	 * If the state was reached due to a error condition (e.g.
+	 * COMMUNICATION_LOST) any relevant error information is available in
+	 * this field. This corresponds to the protocol error codes defined in
+	 * [SCTP].
+	 */
+	sac->sac_error = error;
+
+	/* Socket Extensions for SCTP
+	 * 5.3.1.1 SCTP_ASSOC_CHANGE
+	 *
+	 * sac_outbound_streams:  16 bits (unsigned integer)
+	 * sac_inbound_streams:  16 bits (unsigned integer)
+	 *
+	 * The maximum number of streams allowed in each direction are
+	 * available in sac_outbound_streams and sac_inbound streams.
+	 */
+	sac->sac_outbound_streams = outbound;
+	sac->sac_inbound_streams = inbound;
+
+	/* Socket Extensions for SCTP
+	 * 5.3.1.1 SCTP_ASSOC_CHANGE
+	 *
+	 * sac_assoc_id: sizeof (sctp_assoc_t)
+	 *
+	 * The association id field, holds the identifier for the association.
+	 * All notifications for a given association have the same association
+	 * identifier.  For TCP style socket, this field is ignored.
+	 */
+	sctp_ulpevent_set_owner(event, asoc);
+	sac->sac_assoc_id = sctp_assoc2id(asoc);
+
+	return event;
+
+fail:
+	return NULL;
+}
+
+/* Create and initialize an SCTP_PEER_ADDR_CHANGE event.
+ *
+ * Socket Extensions for SCTP - draft-01
+ * 5.3.1.2 SCTP_PEER_ADDR_CHANGE
+ *
+ * When a destination address on a multi-homed peer encounters a change
+ * an interface details event is sent.
+ */
+struct sctp_ulpevent *sctp_ulpevent_make_peer_addr_change(
+	const struct sctp_association *asoc,
+	const struct sockaddr_storage *aaddr,
+	int flags, int state, int error, int gfp)
+{
+	struct sctp_ulpevent *event;
+	struct sctp_paddr_change  *spc;
+	struct sk_buff *skb;
+
+	event = sctp_ulpevent_new(sizeof(struct sctp_paddr_change),
+				  MSG_NOTIFICATION, gfp);
+	if (!event)
+		goto fail;
+
+	skb = sctp_event2skb(event);
+	spc = (struct sctp_paddr_change *)
+		skb_put(skb, sizeof(struct sctp_paddr_change));
+
+	/* Sockets API Extensions for SCTP
+	 * Section 5.3.1.2 SCTP_PEER_ADDR_CHANGE
+	 *
+	 * spc_type:
+	 *
+	 *    It should be SCTP_PEER_ADDR_CHANGE.
+	 */
+	spc->spc_type = SCTP_PEER_ADDR_CHANGE;
+
+	/* Sockets API Extensions for SCTP
+	 * Section 5.3.1.2 SCTP_PEER_ADDR_CHANGE
+	 *
+	 * spc_length: sizeof (__u32)
+	 *
+	 * This field is the total length of the notification data, including
+	 * the notification header.
+	 */
+	spc->spc_length = sizeof(struct sctp_paddr_change);
+
+	/* Sockets API Extensions for SCTP
+	 * Section 5.3.1.2 SCTP_PEER_ADDR_CHANGE
+	 *
+	 * spc_flags: 16 bits (unsigned integer)
+	 * Currently unused.
+	 */
+	spc->spc_flags = 0;
+
+	/* Sockets API Extensions for SCTP
+	 * Section 5.3.1.2 SCTP_PEER_ADDR_CHANGE
+	 *
+	 * spc_state:  32 bits (signed integer)
+	 *
+	 * This field holds one of a number of values that communicate the
+	 * event that happened to the address.
+	 */
+	spc->spc_state = state;
+
+	/* Sockets API Extensions for SCTP
+	 * Section 5.3.1.2 SCTP_PEER_ADDR_CHANGE
+	 *
+	 * spc_error:  32 bits (signed integer)
+	 *
+	 * If the state was reached due to any error condition (e.g.
+	 * ADDRESS_UNREACHABLE) any relevant error information is available in
+	 * this field.
+	 */
+	spc->spc_error = error;
+
+	/* Socket Extensions for SCTP
+	 * 5.3.1.1 SCTP_ASSOC_CHANGE
+	 *
+	 * spc_assoc_id: sizeof (sctp_assoc_t)
+	 *
+	 * The association id field, holds the identifier for the association.
+	 * All notifications for a given association have the same association
+	 * identifier.  For TCP style socket, this field is ignored.
+	 */
+	sctp_ulpevent_set_owner(event, asoc);
+	spc->spc_assoc_id = sctp_assoc2id(asoc);
+
+	/* Sockets API Extensions for SCTP
+	 * Section 5.3.1.2 SCTP_PEER_ADDR_CHANGE
+	 *
+	 * spc_aaddr: sizeof (struct sockaddr_storage)
+	 *
+	 * The affected address field, holds the remote peer's address that is
+	 * encountering the change of state.
+	 */
+	memcpy(&spc->spc_aaddr, aaddr, sizeof(struct sockaddr_storage));
+
+	/* Map ipv4 address into v4-mapped-on-v6 address.  */
+	sctp_get_pf_specific(asoc->base.sk->sk_family)->addr_v4map(
+					sctp_sk(asoc->base.sk),
+					(union sctp_addr *)&spc->spc_aaddr);
+
+	return event;
+
+fail:
+	return NULL;
+}
+
+/* Create and initialize an SCTP_REMOTE_ERROR notification.
+ *
+ * Note: This assumes that the chunk->skb->data already points to the
+ * operation error payload.
+ *
+ * Socket Extensions for SCTP - draft-01
+ * 5.3.1.3 SCTP_REMOTE_ERROR
+ *
+ * A remote peer may send an Operational Error message to its peer.
+ * This message indicates a variety of error conditions on an
+ * association. The entire error TLV as it appears on the wire is
+ * included in a SCTP_REMOTE_ERROR event.  Please refer to the SCTP
+ * specification [SCTP] and any extensions for a list of possible
+ * error formats.
+ */
+struct sctp_ulpevent *sctp_ulpevent_make_remote_error(
+	const struct sctp_association *asoc, struct sctp_chunk *chunk,
+	__u16 flags, int gfp)
+{
+	struct sctp_ulpevent *event;
+	struct sctp_remote_error *sre;
+	struct sk_buff *skb;
+	sctp_errhdr_t *ch;
+	__u16 cause;
+	int elen;
+
+	ch = (sctp_errhdr_t *)(chunk->skb->data);
+	cause = ch->cause;
+	elen = WORD_ROUND(ntohs(ch->length)) - sizeof(sctp_errhdr_t);
+
+	/* Pull off the ERROR header.  */
+	skb_pull(chunk->skb, sizeof(sctp_errhdr_t));
+
+	/* Copy the skb to a new skb with room for us to prepend
+	 * notification with.
+	 */
+	skb = skb_copy_expand(chunk->skb, sizeof(struct sctp_remote_error),
+			      0, gfp);
+
+	/* Pull off the rest of the cause TLV from the chunk.  */
+	skb_pull(chunk->skb, elen);
+	if (!skb)
+		goto fail;
+
+	/* Embed the event fields inside the cloned skb.  */
+	event = sctp_skb2event(skb);
+	sctp_ulpevent_init(event, MSG_NOTIFICATION);
+
+	sre = (struct sctp_remote_error *)
+		skb_push(skb, sizeof(struct sctp_remote_error));
+
+	/* Trim the buffer to the right length.  */
+	skb_trim(skb, sizeof(struct sctp_remote_error) + elen);
+
+	/* Socket Extensions for SCTP
+	 * 5.3.1.3 SCTP_REMOTE_ERROR
+	 *
+	 * sre_type:
+	 *   It should be SCTP_REMOTE_ERROR.
+	 */
+	sre->sre_type = SCTP_REMOTE_ERROR;
+
+	/*
+	 * Socket Extensions for SCTP
+	 * 5.3.1.3 SCTP_REMOTE_ERROR
+	 *
+	 * sre_flags: 16 bits (unsigned integer)
+	 *   Currently unused.
+	 */
+	sre->sre_flags = 0;
+
+	/* Socket Extensions for SCTP
+	 * 5.3.1.3 SCTP_REMOTE_ERROR
+	 *
+	 * sre_length: sizeof (__u32)
+	 *
+	 * This field is the total length of the notification data,
+	 * including the notification header.
+	 */
+	sre->sre_length = skb->len;
+
+	/* Socket Extensions for SCTP
+	 * 5.3.1.3 SCTP_REMOTE_ERROR
+	 *
+	 * sre_error: 16 bits (unsigned integer)
+	 * This value represents one of the Operational Error causes defined in
+	 * the SCTP specification, in network byte order.
+	 */
+	sre->sre_error = cause;
+
+	/* Socket Extensions for SCTP
+	 * 5.3.1.3 SCTP_REMOTE_ERROR
+	 *
+	 * sre_assoc_id: sizeof (sctp_assoc_t)
+	 *
+	 * The association id field, holds the identifier for the association.
+	 * All notifications for a given association have the same association
+	 * identifier.  For TCP style socket, this field is ignored.
+	 */
+	sctp_ulpevent_set_owner(event, asoc);
+	sre->sre_assoc_id = sctp_assoc2id(asoc);
+
+	return event;
+
+fail:
+	return NULL;
+}
+
+/* Create and initialize a SCTP_SEND_FAILED notification.
+ *
+ * Socket Extensions for SCTP - draft-01
+ * 5.3.1.4 SCTP_SEND_FAILED
+ */
+struct sctp_ulpevent *sctp_ulpevent_make_send_failed(
+	const struct sctp_association *asoc, struct sctp_chunk *chunk,
+	__u16 flags, __u32 error, int gfp)
+{
+	struct sctp_ulpevent *event;
+	struct sctp_send_failed *ssf;
+	struct sk_buff *skb;
+
+	/* Pull off any padding. */
+	int len = ntohs(chunk->chunk_hdr->length);
+
+	/* Make skb with more room so we can prepend notification.  */
+	skb = skb_copy_expand(chunk->skb,
+			      sizeof(struct sctp_send_failed), /* headroom */
+			      0,                               /* tailroom */
+			      gfp);
+	if (!skb)
+		goto fail;
+
+	/* Pull off the common chunk header and DATA header.  */
+	skb_pull(skb, sizeof(struct sctp_data_chunk));
+	len -= sizeof(struct sctp_data_chunk);
+
+	/* Embed the event fields inside the cloned skb.  */
+	event = sctp_skb2event(skb);
+	sctp_ulpevent_init(event, MSG_NOTIFICATION);
+
+	ssf = (struct sctp_send_failed *)
+		skb_push(skb, sizeof(struct sctp_send_failed));
+
+	/* Socket Extensions for SCTP
+	 * 5.3.1.4 SCTP_SEND_FAILED
+	 *
+	 * ssf_type:
+	 * It should be SCTP_SEND_FAILED.
+	 */
+	ssf->ssf_type = SCTP_SEND_FAILED;
+
+	/* Socket Extensions for SCTP
+	 * 5.3.1.4 SCTP_SEND_FAILED
+	 *
+	 * ssf_flags: 16 bits (unsigned integer)
+	 * The flag value will take one of the following values
+	 *
+	 * SCTP_DATA_UNSENT - Indicates that the data was never put on
+	 *                    the wire.
+	 *
+	 * SCTP_DATA_SENT   - Indicates that the data was put on the wire.
+	 *                    Note that this does not necessarily mean that the
+	 *                    data was (or was not) successfully delivered.
+	 */
+	ssf->ssf_flags = flags;
+
+	/* Socket Extensions for SCTP
+	 * 5.3.1.4 SCTP_SEND_FAILED
+	 *
+	 * ssf_length: sizeof (__u32)
+	 * This field is the total length of the notification data, including
+	 * the notification header.
+	 */
+	ssf->ssf_length = sizeof(struct sctp_send_failed) + len;
+	skb_trim(skb, ssf->ssf_length);
+
+	/* Socket Extensions for SCTP
+	 * 5.3.1.4 SCTP_SEND_FAILED
+	 *
+	 * ssf_error: 16 bits (unsigned integer)
+	 * This value represents the reason why the send failed, and if set,
+	 * will be a SCTP protocol error code as defined in [SCTP] section
+	 * 3.3.10.
+	 */
+	ssf->ssf_error = error;
+
+	/* Socket Extensions for SCTP
+	 * 5.3.1.4 SCTP_SEND_FAILED
+	 *
+	 * ssf_info: sizeof (struct sctp_sndrcvinfo)
+	 * The original send information associated with the undelivered
+	 * message.
+	 */
+	memcpy(&ssf->ssf_info, &chunk->sinfo, sizeof(struct sctp_sndrcvinfo));
+
+	/* Per TSVWG discussion with Randy. Allow the application to
+	 * ressemble a fragmented message.
+	 */
+	ssf->ssf_info.sinfo_flags = chunk->chunk_hdr->flags;
+
+	/* Socket Extensions for SCTP
+	 * 5.3.1.4 SCTP_SEND_FAILED
+	 *
+	 * ssf_assoc_id: sizeof (sctp_assoc_t)
+	 * The association id field, sf_assoc_id, holds the identifier for the
+	 * association.  All notifications for a given association have the
+	 * same association identifier.  For TCP style socket, this field is
+	 * ignored.
+	 */
+	sctp_ulpevent_set_owner(event, asoc);
+	ssf->ssf_assoc_id = sctp_assoc2id(asoc);
+	return event;
+
+fail:
+	return NULL;
+}
+
+/* Create and initialize a SCTP_SHUTDOWN_EVENT notification.
+ *
+ * Socket Extensions for SCTP - draft-01
+ * 5.3.1.5 SCTP_SHUTDOWN_EVENT
+ */
+struct sctp_ulpevent *sctp_ulpevent_make_shutdown_event(
+	const struct sctp_association *asoc,
+	__u16 flags, int gfp)
+{
+	struct sctp_ulpevent *event;
+	struct sctp_shutdown_event *sse;
+	struct sk_buff *skb;
+
+	event = sctp_ulpevent_new(sizeof(struct sctp_shutdown_event),
+				  MSG_NOTIFICATION, gfp);
+	if (!event)
+		goto fail;
+
+	skb = sctp_event2skb(event);
+	sse = (struct sctp_shutdown_event *)
+		skb_put(skb, sizeof(struct sctp_shutdown_event));
+
+	/* Socket Extensions for SCTP
+	 * 5.3.1.5 SCTP_SHUTDOWN_EVENT
+	 *
+	 * sse_type
+	 * It should be SCTP_SHUTDOWN_EVENT
+	 */
+	sse->sse_type = SCTP_SHUTDOWN_EVENT;
+
+	/* Socket Extensions for SCTP
+	 * 5.3.1.5 SCTP_SHUTDOWN_EVENT
+	 *
+	 * sse_flags: 16 bits (unsigned integer)
+	 * Currently unused.
+	 */
+	sse->sse_flags = 0;
+
+	/* Socket Extensions for SCTP
+	 * 5.3.1.5 SCTP_SHUTDOWN_EVENT
+	 *
+	 * sse_length: sizeof (__u32)
+	 * This field is the total length of the notification data, including
+	 * the notification header.
+	 */
+	sse->sse_length = sizeof(struct sctp_shutdown_event);
+
+	/* Socket Extensions for SCTP
+	 * 5.3.1.5 SCTP_SHUTDOWN_EVENT
+	 *
+	 * sse_assoc_id: sizeof (sctp_assoc_t)
+	 * The association id field, holds the identifier for the association.
+	 * All notifications for a given association have the same association
+	 * identifier.  For TCP style socket, this field is ignored.
+	 */
+	sctp_ulpevent_set_owner(event, asoc);
+	sse->sse_assoc_id = sctp_assoc2id(asoc);
+
+	return event;
+
+fail:
+	return NULL;
+}
+
+/* Create and initialize a SCTP_ADAPTION_INDICATION notification.
+ *
+ * Socket Extensions for SCTP
+ * 5.3.1.6 SCTP_ADAPTION_INDICATION
+ */
+struct sctp_ulpevent *sctp_ulpevent_make_adaption_indication(
+	const struct sctp_association *asoc, int gfp)
+{
+	struct sctp_ulpevent *event;
+	struct sctp_adaption_event *sai;
+	struct sk_buff *skb;
+
+	event = sctp_ulpevent_new(sizeof(struct sctp_adaption_event),
+				  MSG_NOTIFICATION, gfp);
+	if (!event)
+		goto fail;
+
+	skb = sctp_event2skb(event);
+	sai = (struct sctp_adaption_event *)
+		skb_put(skb, sizeof(struct sctp_adaption_event));
+
+	sai->sai_type = SCTP_ADAPTION_INDICATION;
+	sai->sai_flags = 0;
+	sai->sai_length = sizeof(struct sctp_adaption_event);
+	sai->sai_adaption_ind = asoc->peer.adaption_ind;
+	sctp_ulpevent_set_owner(event, asoc);
+	sai->sai_assoc_id = sctp_assoc2id(asoc);
+
+	return event;
+
+fail:
+	return NULL;
+}
+
+/* A message has been received.  Package this message as a notification
+ * to pass it to the upper layers.  Go ahead and calculate the sndrcvinfo
+ * even if filtered out later.
+ *
+ * Socket Extensions for SCTP
+ * 5.2.2 SCTP Header Information Structure (SCTP_SNDRCV)
+ */
+struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
+						struct sctp_chunk *chunk,
+						int gfp)
+{
+	struct sctp_ulpevent *event = NULL;
+	struct sk_buff *skb;
+	size_t padding, len;
+
+	/* Clone the original skb, sharing the data.  */
+	skb = skb_clone(chunk->skb, gfp);
+	if (!skb)
+		goto fail;
+
+	/* First calculate the padding, so we don't inadvertently
+	 * pass up the wrong length to the user.
+	 *
+	 * RFC 2960 - Section 3.2  Chunk Field Descriptions
+	 *
+	 * The total length of a chunk(including Type, Length and Value fields)
+	 * MUST be a multiple of 4 bytes.  If the length of the chunk is not a
+	 * multiple of 4 bytes, the sender MUST pad the chunk with all zero
+	 * bytes and this padding is not included in the chunk length field.
+	 * The sender should never pad with more than 3 bytes.  The receiver
+	 * MUST ignore the padding bytes.
+	 */
+	len = ntohs(chunk->chunk_hdr->length);
+	padding = WORD_ROUND(len) - len;
+
+	/* Fixup cloned skb with just this chunks data.  */
+	skb_trim(skb, chunk->chunk_end - padding - skb->data);
+
+	/* Embed the event fields inside the cloned skb.  */
+	event = sctp_skb2event(skb);
+
+	/* Initialize event with flags 0.  */
+	sctp_ulpevent_init(event, 0);
+
+	sctp_ulpevent_receive_data(event, asoc);
+
+	event->stream = ntohs(chunk->subh.data_hdr->stream);
+	event->ssn = ntohs(chunk->subh.data_hdr->ssn);
+	event->ppid = chunk->subh.data_hdr->ppid;
+	if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) {
+		event->flags |= MSG_UNORDERED;
+		event->cumtsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map);
+	}
+	event->tsn = ntohl(chunk->subh.data_hdr->tsn);
+	event->msg_flags |= chunk->chunk_hdr->flags;
+	event->iif = sctp_chunk_iif(chunk);
+
+fail:
+	return event;
+}
+
+/* Create a partial delivery related event.
+ *
+ * 5.3.1.7 SCTP_PARTIAL_DELIVERY_EVENT
+ *
+ *   When a receiver is engaged in a partial delivery of a
+ *   message this notification will be used to indicate
+ *   various events.
+ */
+struct sctp_ulpevent *sctp_ulpevent_make_pdapi(
+	const struct sctp_association *asoc, __u32 indication, int gfp)
+{
+	struct sctp_ulpevent *event;
+	struct sctp_pdapi_event *pd;
+	struct sk_buff *skb;
+
+	event = sctp_ulpevent_new(sizeof(struct sctp_pdapi_event),
+				  MSG_NOTIFICATION, gfp);
+	if (!event)
+		goto fail;
+
+	skb = sctp_event2skb(event);
+	pd = (struct sctp_pdapi_event *)
+		skb_put(skb, sizeof(struct sctp_pdapi_event));
+
+	/* pdapi_type
+	 *   It should be SCTP_PARTIAL_DELIVERY_EVENT
+	 *
+	 * pdapi_flags: 16 bits (unsigned integer)
+	 *   Currently unused.
+	 */
+	pd->pdapi_type = SCTP_PARTIAL_DELIVERY_EVENT;
+	pd->pdapi_flags = 0;
+
+	/* pdapi_length: 32 bits (unsigned integer)
+	 *
+	 * This field is the total length of the notification data, including
+	 * the notification header.  It will generally be sizeof (struct
+	 * sctp_pdapi_event).
+	 */
+	pd->pdapi_length = sizeof(struct sctp_pdapi_event);
+
+        /*  pdapi_indication: 32 bits (unsigned integer)
+	 *
+	 * This field holds the indication being sent to the application.
+	 */
+	pd->pdapi_indication = indication;
+
+	/*  pdapi_assoc_id: sizeof (sctp_assoc_t)
+	 *
+	 * The association id field, holds the identifier for the association.
+	 */
+	sctp_ulpevent_set_owner(event, asoc);
+	pd->pdapi_assoc_id = sctp_assoc2id(asoc);
+
+	return event;
+fail:
+	return NULL;
+}
+
+/* Return the notification type, assuming this is a notification
+ * event.
+ */
+__u16 sctp_ulpevent_get_notification_type(const struct sctp_ulpevent *event)
+{
+	union sctp_notification *notification;
+	struct sk_buff *skb;
+
+	skb = sctp_event2skb((struct sctp_ulpevent *)event);
+	notification = (union sctp_notification *) skb->data;
+	return notification->sn_header.sn_type;
+}
+
+/* Copy out the sndrcvinfo into a msghdr.  */
+void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event,
+				   struct msghdr *msghdr)
+{
+	struct sctp_sndrcvinfo sinfo;
+
+	if (sctp_ulpevent_is_notification(event))
+		return;
+
+	/* Sockets API Extensions for SCTP
+ 	 * Section 5.2.2 SCTP Header Information Structure (SCTP_SNDRCV)
+ 	 *
+ 	 * sinfo_stream: 16 bits (unsigned integer)
+ 	 *
+ 	 * For recvmsg() the SCTP stack places the message's stream number in
+ 	 * this value.
+ 	*/
+	sinfo.sinfo_stream = event->stream;
+	/* sinfo_ssn: 16 bits (unsigned integer)
+	 *
+	 * For recvmsg() this value contains the stream sequence number that
+	 * the remote endpoint placed in the DATA chunk.  For fragmented
+	 * messages this is the same number for all deliveries of the message
+	 * (if more than one recvmsg() is needed to read the message).
+	 */
+	sinfo.sinfo_ssn = event->ssn;
+	/* sinfo_ppid: 32 bits (unsigned integer)
+	 *
+	 * In recvmsg() this value is
+	 * the same information that was passed by the upper layer in the peer
+	 * application.  Please note that byte order issues are NOT accounted
+	 * for and this information is passed opaquely by the SCTP stack from
+	 * one end to the other.
+	 */
+	sinfo.sinfo_ppid = event->ppid;
+	/* sinfo_flags: 16 bits (unsigned integer)
+	 *
+	 * This field may contain any of the following flags and is composed of
+	 * a bitwise OR of these values.
+	 *
+	 * recvmsg() flags:
+	 *
+	 * MSG_UNORDERED - This flag is present when the message was sent
+	 *                 non-ordered.
+	 */
+	sinfo.sinfo_flags = event->flags;
+	/* sinfo_tsn: 32 bit (unsigned integer)
+	 *
+	 * For the receiving side, this field holds a TSN that was 
+	 * assigned to one of the SCTP Data Chunks.
+	 */
+	sinfo.sinfo_tsn = event->tsn;
+	/* sinfo_cumtsn: 32 bit (unsigned integer)
+	 *
+	 * This field will hold the current cumulative TSN as
+	 * known by the underlying SCTP layer.  Note this field is
+	 * ignored when sending and only valid for a receive
+	 * operation when sinfo_flags are set to MSG_UNORDERED.
+	 */
+	sinfo.sinfo_cumtsn = event->cumtsn;
+	/* sinfo_assoc_id: sizeof (sctp_assoc_t)
+	 *
+	 * The association handle field, sinfo_assoc_id, holds the identifier
+	 * for the association announced in the COMMUNICATION_UP notification.
+	 * All notifications for a given association have the same identifier.
+	 * Ignored for one-to-one style sockets.
+	 */
+	sinfo.sinfo_assoc_id = sctp_assoc2id(event->asoc);
+
+	/* These fields are not used while receiving. */
+	sinfo.sinfo_context = 0;
+	sinfo.sinfo_timetolive = 0;
+
+	put_cmsg(msghdr, IPPROTO_SCTP, SCTP_SNDRCV,
+		 sizeof(struct sctp_sndrcvinfo), (void *)&sinfo);
+}
+
+/* Do accounting for bytes received and hold a reference to the association
+ * for each skb.
+ */
+static void sctp_ulpevent_receive_data(struct sctp_ulpevent *event,
+				       struct sctp_association *asoc)
+{
+	struct sk_buff *skb, *frag;
+
+	skb = sctp_event2skb(event);
+	/* Set the owner and charge rwnd for bytes received.  */
+	sctp_ulpevent_set_owner(event, asoc);
+	sctp_assoc_rwnd_decrease(asoc, skb_headlen(skb));
+
+	if (!skb->data_len)
+		return;
+
+	/* Note:  Not clearing the entire event struct as this is just a
+	 * fragment of the real event.  However, we still need to do rwnd
+	 * accounting.
+	 * In general, the skb passed from IP can have only 1 level of
+	 * fragments. But we allow multiple levels of fragments. 
+	 */
+	for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
+		sctp_ulpevent_receive_data(sctp_skb2event(frag), asoc);
+	}
+}
+
+/* Do accounting for bytes just read by user and release the references to
+ * the association.
+ */ 
+static void sctp_ulpevent_release_data(struct sctp_ulpevent *event)
+{
+	struct sk_buff *skb, *frag;
+
+	/* Current stack structures assume that the rcv buffer is
+	 * per socket.   For UDP style sockets this is not true as
+	 * multiple associations may be on a single UDP-style socket.
+	 * Use the local private area of the skb to track the owning
+	 * association.
+	 */
+
+	skb = sctp_event2skb(event);
+	sctp_assoc_rwnd_increase(event->asoc, skb_headlen(skb));
+
+	if (!skb->data_len)
+		goto done;
+
+	/* Don't forget the fragments. */
+	for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
+		/* NOTE:  skb_shinfos are recursive. Although IP returns
+		 * skb's with only 1 level of fragments, SCTP reassembly can
+		 * increase the levels.
+		 */
+		sctp_ulpevent_release_data(sctp_skb2event(frag));
+	}
+
+done:
+	sctp_ulpevent_release_owner(event);
+}
+
+/* Free a ulpevent that has an owner.  It includes releasing the reference
+ * to the owner, updating the rwnd in case of a DATA event and freeing the
+ * skb.
+ * See comments in sctp_stub_rfree().
+ */
+void sctp_ulpevent_free(struct sctp_ulpevent *event)
+{
+	if (sctp_ulpevent_is_notification(event))
+		sctp_ulpevent_release_owner(event);
+	else
+		sctp_ulpevent_release_data(event);
+
+	kfree_skb(sctp_event2skb(event));
+}
+
+/* Purge the skb lists holding ulpevents. */
+void sctp_queue_purge_ulpevents(struct sk_buff_head *list)
+{
+	struct sk_buff *skb;
+	while ((skb = skb_dequeue(list)) != NULL)
+		sctp_ulpevent_free(sctp_skb2event(skb));
+}
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
new file mode 100644
index 00000000000..d5dd2cf7ac4
--- /dev/null
+++ b/net/sctp/ulpqueue.c
@@ -0,0 +1,864 @@
+/* SCTP kernel reference Implementation
+ * (C) Copyright IBM Corp. 2001, 2004
+ * Copyright (c) 1999-2000 Cisco, Inc.
+ * Copyright (c) 1999-2001 Motorola, Inc.
+ * Copyright (c) 2001 Intel Corp.
+ * Copyright (c) 2001 Nokia, Inc.
+ * Copyright (c) 2001 La Monte H.P. Yarroll
+ *
+ * This abstraction carries sctp events to the ULP (sockets).
+ *
+ * The SCTP reference implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * The SCTP reference implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *                 ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email address(es):
+ *    lksctp developers <lksctp-developers@lists.sourceforge.net>
+ *
+ * Or submit a bug report through the following website:
+ *    http://www.sf.net/projects/lksctp
+ *
+ * Written or modified by:
+ *    Jon Grimm             <jgrimm@us.ibm.com>
+ *    La Monte H.P. Yarroll <piggy@acm.org>
+ *    Sridhar Samudrala     <sri@us.ibm.com>
+ *
+ * Any bugs reported given to us we will try to fix... any fixes shared will
+ * be incorporated into the next SCTP release.
+ */
+
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/sctp/structs.h>
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+
+/* Forward declarations for internal helpers.  */
+static struct sctp_ulpevent * sctp_ulpq_reasm(struct sctp_ulpq *ulpq,
+						struct sctp_ulpevent *);
+static struct sctp_ulpevent * sctp_ulpq_order(struct sctp_ulpq *,
+						struct sctp_ulpevent *);
+
+/* 1st Level Abstractions */
+
+/* Initialize a ULP queue from a block of memory.  */
+struct sctp_ulpq *sctp_ulpq_init(struct sctp_ulpq *ulpq,
+				 struct sctp_association *asoc)
+{
+	memset(ulpq, 0, sizeof(struct sctp_ulpq));
+
+	ulpq->asoc = asoc;
+	skb_queue_head_init(&ulpq->reasm);
+	skb_queue_head_init(&ulpq->lobby);
+	ulpq->pd_mode  = 0;
+	ulpq->malloced = 0;
+
+	return ulpq;
+}
+
+
+/* Flush the reassembly and ordering queues.  */
+static void sctp_ulpq_flush(struct sctp_ulpq *ulpq)
+{
+	struct sk_buff *skb;
+	struct sctp_ulpevent *event;
+
+	while ((skb = __skb_dequeue(&ulpq->lobby)) != NULL) {
+		event = sctp_skb2event(skb);
+		sctp_ulpevent_free(event);
+	}
+
+	while ((skb = __skb_dequeue(&ulpq->reasm)) != NULL) {
+		event = sctp_skb2event(skb);
+		sctp_ulpevent_free(event);
+	}
+
+}
+
+/* Dispose of a ulpqueue.  */
+void sctp_ulpq_free(struct sctp_ulpq *ulpq)
+{
+	sctp_ulpq_flush(ulpq);
+	if (ulpq->malloced)
+		kfree(ulpq);
+}
+
+/* Process an incoming DATA chunk.  */
+int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
+			int gfp)
+{
+	struct sk_buff_head temp;
+	sctp_data_chunk_t *hdr;
+	struct sctp_ulpevent *event;
+
+	hdr = (sctp_data_chunk_t *) chunk->chunk_hdr;
+
+	/* Create an event from the incoming chunk. */
+	event = sctp_ulpevent_make_rcvmsg(chunk->asoc, chunk, gfp);
+	if (!event)
+		return -ENOMEM;
+
+	/* Do reassembly if needed.  */
+	event = sctp_ulpq_reasm(ulpq, event);
+
+	/* Do ordering if needed.  */
+	if ((event) && (event->msg_flags & MSG_EOR)){
+		/* Create a temporary list to collect chunks on.  */
+		skb_queue_head_init(&temp);
+		__skb_queue_tail(&temp, sctp_event2skb(event));
+
+		event = sctp_ulpq_order(ulpq, event);
+	}
+
+	/* Send event to the ULP.  */
+	if (event)
+		sctp_ulpq_tail_event(ulpq, event);
+
+	return 0;
+}
+
+/* Add a new event for propagation to the ULP.  */
+/* Clear the partial delivery mode for this socket.   Note: This
+ * assumes that no association is currently in partial delivery mode.
+ */
+int sctp_clear_pd(struct sock *sk)
+{
+	struct sctp_sock *sp = sctp_sk(sk);
+
+	sp->pd_mode = 0;
+	if (!skb_queue_empty(&sp->pd_lobby)) {
+		struct list_head *list;
+		sctp_skb_list_tail(&sp->pd_lobby, &sk->sk_receive_queue);
+		list = (struct list_head *)&sctp_sk(sk)->pd_lobby;
+		INIT_LIST_HEAD(list);
+		return 1;
+	}
+	return 0;
+}
+
+/* Clear the pd_mode and restart any pending messages waiting for delivery. */
+static int sctp_ulpq_clear_pd(struct sctp_ulpq *ulpq)
+{
+	ulpq->pd_mode = 0;
+	return sctp_clear_pd(ulpq->asoc->base.sk);
+}
+
+
+
+int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
+{
+	struct sock *sk = ulpq->asoc->base.sk;
+	struct sk_buff_head *queue;
+	int clear_pd = 0;
+
+	/* If the socket is just going to throw this away, do not
+	 * even try to deliver it.
+	 */
+	if (sock_flag(sk, SOCK_DEAD) || (sk->sk_shutdown & RCV_SHUTDOWN))
+		goto out_free;
+
+	/* Check if the user wishes to receive this event.  */
+	if (!sctp_ulpevent_is_enabled(event, &sctp_sk(sk)->subscribe))
+		goto out_free;
+
+	/* If we are in partial delivery mode, post to the lobby until
+	 * partial delivery is cleared, unless, of course _this_ is
+	 * the association the cause of the partial delivery.
+	 */
+
+	if (!sctp_sk(sk)->pd_mode) {
+		queue = &sk->sk_receive_queue;
+	} else if (ulpq->pd_mode) {
+		if (event->msg_flags & MSG_NOTIFICATION)
+		       	queue = &sctp_sk(sk)->pd_lobby;
+		else {
+			clear_pd = event->msg_flags & MSG_EOR;
+			queue = &sk->sk_receive_queue;
+		}
+	} else
+		queue = &sctp_sk(sk)->pd_lobby;
+
+
+	/* If we are harvesting multiple skbs they will be
+	 * collected on a list.
+	 */
+	if (sctp_event2skb(event)->list)
+		sctp_skb_list_tail(sctp_event2skb(event)->list, queue);
+	else
+		__skb_queue_tail(queue, sctp_event2skb(event));
+
+	/* Did we just complete partial delivery and need to get
+	 * rolling again?  Move pending data to the receive
+	 * queue.
+	 */
+	if (clear_pd)
+		sctp_ulpq_clear_pd(ulpq);
+
+	if (queue == &sk->sk_receive_queue)
+		sk->sk_data_ready(sk, 0);
+	return 1;
+
+out_free:
+	if (sctp_event2skb(event)->list)
+		sctp_queue_purge_ulpevents(sctp_event2skb(event)->list);
+	else
+		sctp_ulpevent_free(event);
+	return 0;
+}
+
+/* 2nd Level Abstractions */
+
+/* Helper function to store chunks that need to be reassembled.  */
+static inline void sctp_ulpq_store_reasm(struct sctp_ulpq *ulpq,
+					 struct sctp_ulpevent *event)
+{
+	struct sk_buff *pos;
+	struct sctp_ulpevent *cevent;
+	__u32 tsn, ctsn;
+
+	tsn = event->tsn;
+
+	/* See if it belongs at the end. */
+	pos = skb_peek_tail(&ulpq->reasm);
+	if (!pos) {
+		__skb_queue_tail(&ulpq->reasm, sctp_event2skb(event));
+		return;
+	}
+
+	/* Short circuit just dropping it at the end. */
+	cevent = sctp_skb2event(pos);
+	ctsn = cevent->tsn;
+	if (TSN_lt(ctsn, tsn)) {
+		__skb_queue_tail(&ulpq->reasm, sctp_event2skb(event));
+		return;
+	}
+
+	/* Find the right place in this list. We store them by TSN.  */
+	skb_queue_walk(&ulpq->reasm, pos) {
+		cevent = sctp_skb2event(pos);
+		ctsn = cevent->tsn;
+
+		if (TSN_lt(tsn, ctsn))
+			break;
+	}
+
+	/* Insert before pos. */
+	__skb_insert(sctp_event2skb(event), pos->prev, pos, &ulpq->reasm);
+
+}
+
+/* Helper function to return an event corresponding to the reassembled
+ * datagram.
+ * This routine creates a re-assembled skb given the first and last skb's
+ * as stored in the reassembly queue. The skb's may be non-linear if the sctp
+ * payload was fragmented on the way and ip had to reassemble them.
+ * We add the rest of skb's to the first skb's fraglist.
+ */
+static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff *f_frag, struct sk_buff *l_frag)
+{
+	struct sk_buff *pos;
+	struct sctp_ulpevent *event;
+	struct sk_buff *pnext, *last;
+	struct sk_buff *list = skb_shinfo(f_frag)->frag_list;
+
+	/* Store the pointer to the 2nd skb */
+	if (f_frag == l_frag)
+		pos = NULL;
+	else
+		pos = f_frag->next;
+
+	/* Get the last skb in the f_frag's frag_list if present. */
+	for (last = list; list; last = list, list = list->next);
+
+	/* Add the list of remaining fragments to the first fragments
+	 * frag_list.
+	 */
+	if (last)
+		last->next = pos;
+	else
+		skb_shinfo(f_frag)->frag_list = pos;
+
+	/* Remove the first fragment from the reassembly queue.  */
+	__skb_unlink(f_frag, f_frag->list);
+	while (pos) {
+
+		pnext = pos->next;
+
+		/* Update the len and data_len fields of the first fragment. */
+		f_frag->len += pos->len;
+		f_frag->data_len += pos->len;
+
+		/* Remove the fragment from the reassembly queue.  */
+		__skb_unlink(pos, pos->list);
+	
+		/* Break if we have reached the last fragment.  */
+		if (pos == l_frag)
+			break;
+		pos->next = pnext;
+		pos = pnext;
+	};
+
+	event = sctp_skb2event(f_frag);
+	SCTP_INC_STATS(SCTP_MIB_REASMUSRMSGS);
+
+	return event;
+}
+
+
+/* Helper function to check if an incoming chunk has filled up the last
+ * missing fragment in a SCTP datagram and return the corresponding event.
+ */
+static inline struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_ulpq *ulpq)
+{
+	struct sk_buff *pos;
+	struct sctp_ulpevent *cevent;
+	struct sk_buff *first_frag = NULL;
+	__u32 ctsn, next_tsn;
+	struct sctp_ulpevent *retval = NULL;
+
+	/* Initialized to 0 just to avoid compiler warning message.  Will
+	 * never be used with this value. It is referenced only after it
+	 * is set when we find the first fragment of a message.
+	 */
+	next_tsn = 0;
+
+	/* The chunks are held in the reasm queue sorted by TSN.
+	 * Walk through the queue sequentially and look for a sequence of
+	 * fragmented chunks that complete a datagram.
+	 * 'first_frag' and next_tsn are reset when we find a chunk which
+	 * is the first fragment of a datagram. Once these 2 fields are set
+	 * we expect to find the remaining middle fragments and the last
+	 * fragment in order. If not, first_frag is reset to NULL and we
+	 * start the next pass when we find another first fragment.
+	 */
+	skb_queue_walk(&ulpq->reasm, pos) {
+		cevent = sctp_skb2event(pos);
+		ctsn = cevent->tsn;
+
+		switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) {
+		case SCTP_DATA_FIRST_FRAG:
+			first_frag = pos;
+			next_tsn = ctsn + 1;
+			break;
+
+		case SCTP_DATA_MIDDLE_FRAG:
+			if ((first_frag) && (ctsn == next_tsn))
+				next_tsn++;
+			else
+				first_frag = NULL;
+			break;
+
+		case SCTP_DATA_LAST_FRAG:
+			if (first_frag && (ctsn == next_tsn))
+				goto found;
+			else
+				first_frag = NULL;
+			break;
+		};
+
+	}
+done:
+	return retval;
+found:
+	retval = sctp_make_reassembled_event(first_frag, pos);
+	if (retval)
+		retval->msg_flags |= MSG_EOR;
+	goto done;
+}
+
+/* Retrieve the next set of fragments of a partial message. */
+static inline struct sctp_ulpevent *sctp_ulpq_retrieve_partial(struct sctp_ulpq *ulpq)
+{
+	struct sk_buff *pos, *last_frag, *first_frag;
+	struct sctp_ulpevent *cevent;
+	__u32 ctsn, next_tsn;
+	int is_last;
+	struct sctp_ulpevent *retval;
+
+	/* The chunks are held in the reasm queue sorted by TSN.
+	 * Walk through the queue sequentially and look for the first
+	 * sequence of fragmented chunks.
+	 */
+
+	if (skb_queue_empty(&ulpq->reasm))
+		return NULL;
+
+	last_frag = first_frag = NULL;
+	retval = NULL;
+	next_tsn = 0;
+	is_last = 0;
+
+	skb_queue_walk(&ulpq->reasm, pos) {
+		cevent = sctp_skb2event(pos);
+		ctsn = cevent->tsn;
+
+		switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) {
+		case SCTP_DATA_MIDDLE_FRAG:
+			if (!first_frag) {
+				first_frag = pos;
+				next_tsn = ctsn + 1;
+				last_frag = pos;
+			} else if (next_tsn == ctsn)
+				next_tsn++;
+			else
+				goto done;
+			break;
+		case SCTP_DATA_LAST_FRAG:
+			if (!first_frag)
+				first_frag = pos;
+			else if (ctsn != next_tsn)
+				goto done;
+			last_frag = pos;
+			is_last = 1;
+			goto done;
+		default:
+			return NULL;
+		};
+	}
+
+	/* We have the reassembled event. There is no need to look
+	 * further.
+	 */
+done:
+	retval = sctp_make_reassembled_event(first_frag, last_frag);
+	if (retval && is_last)
+		retval->msg_flags |= MSG_EOR;
+
+	return retval;
+}
+
+
+/* Helper function to reassemble chunks.  Hold chunks on the reasm queue that
+ * need reassembling.
+ */
+static struct sctp_ulpevent *sctp_ulpq_reasm(struct sctp_ulpq *ulpq,
+						struct sctp_ulpevent *event)
+{
+	struct sctp_ulpevent *retval = NULL;
+
+	/* Check if this is part of a fragmented message.  */
+	if (SCTP_DATA_NOT_FRAG == (event->msg_flags & SCTP_DATA_FRAG_MASK)) {
+		event->msg_flags |= MSG_EOR;
+		return event;
+	}
+
+	sctp_ulpq_store_reasm(ulpq, event);
+	if (!ulpq->pd_mode)
+		retval = sctp_ulpq_retrieve_reassembled(ulpq);
+	else {
+		__u32 ctsn, ctsnap;
+
+		/* Do not even bother unless this is the next tsn to
+		 * be delivered.
+		 */
+		ctsn = event->tsn;
+		ctsnap = sctp_tsnmap_get_ctsn(&ulpq->asoc->peer.tsn_map);
+		if (TSN_lte(ctsn, ctsnap))
+			retval = sctp_ulpq_retrieve_partial(ulpq);
+	}
+
+	return retval;
+}
+
+/* Retrieve the first part (sequential fragments) for partial delivery.  */
+static inline struct sctp_ulpevent *sctp_ulpq_retrieve_first(struct sctp_ulpq *ulpq)
+{
+	struct sk_buff *pos, *last_frag, *first_frag;
+	struct sctp_ulpevent *cevent;
+	__u32 ctsn, next_tsn;
+	struct sctp_ulpevent *retval;
+
+	/* The chunks are held in the reasm queue sorted by TSN.
+	 * Walk through the queue sequentially and look for a sequence of
+	 * fragmented chunks that start a datagram.
+	 */
+
+	if (skb_queue_empty(&ulpq->reasm))
+		return NULL;
+
+	last_frag = first_frag = NULL;
+	retval = NULL;
+	next_tsn = 0;
+
+	skb_queue_walk(&ulpq->reasm, pos) {
+		cevent = sctp_skb2event(pos);
+		ctsn = cevent->tsn;
+
+		switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) {
+		case SCTP_DATA_FIRST_FRAG:
+			if (!first_frag) {
+				first_frag = pos;
+				next_tsn = ctsn + 1;
+				last_frag = pos;
+			} else
+				goto done;
+			break;
+
+		case SCTP_DATA_MIDDLE_FRAG:
+			if (!first_frag)
+				return NULL;
+			if (ctsn == next_tsn) {
+				next_tsn++;
+				last_frag = pos;
+			} else
+				goto done;
+			break;
+		default:
+			return NULL;
+		};
+	}
+
+	/* We have the reassembled event. There is no need to look
+	 * further.
+	 */
+done:
+	retval = sctp_make_reassembled_event(first_frag, last_frag);
+	return retval;
+}
+
+/* Helper function to gather skbs that have possibly become
+ * ordered by an an incoming chunk.
+ */
+static inline void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq,
+					      struct sctp_ulpevent *event)
+{
+	struct sk_buff *pos, *tmp;
+	struct sctp_ulpevent *cevent;
+	struct sctp_stream *in;
+	__u16 sid, csid;
+	__u16 ssn, cssn;
+
+	sid = event->stream;
+	ssn = event->ssn;
+	in  = &ulpq->asoc->ssnmap->in;
+
+	/* We are holding the chunks by stream, by SSN.  */
+	sctp_skb_for_each(pos, &ulpq->lobby, tmp) {
+		cevent = (struct sctp_ulpevent *) pos->cb;
+		csid = cevent->stream;
+		cssn = cevent->ssn;
+
+		/* Have we gone too far?  */
+		if (csid > sid)
+			break;
+
+		/* Have we not gone far enough?  */
+		if (csid < sid)
+			continue;
+
+		if (cssn != sctp_ssn_peek(in, sid))
+			break;
+
+		/* Found it, so mark in the ssnmap. */
+		sctp_ssn_next(in, sid);
+
+		__skb_unlink(pos, pos->list);
+
+		/* Attach all gathered skbs to the event.  */
+		__skb_queue_tail(sctp_event2skb(event)->list, pos);
+	}
+}
+
+/* Helper function to store chunks needing ordering.  */
+static inline void sctp_ulpq_store_ordered(struct sctp_ulpq *ulpq,
+					   struct sctp_ulpevent *event)
+{
+	struct sk_buff *pos;
+	struct sctp_ulpevent *cevent;
+	__u16 sid, csid;
+	__u16 ssn, cssn;
+
+	pos = skb_peek_tail(&ulpq->lobby);
+	if (!pos) {
+		__skb_queue_tail(&ulpq->lobby, sctp_event2skb(event));
+		return;
+	}
+
+	sid = event->stream;
+	ssn = event->ssn;
+	
+	cevent = (struct sctp_ulpevent *) pos->cb;
+	csid = cevent->stream;
+	cssn = cevent->ssn;
+	if (sid > csid) {
+		__skb_queue_tail(&ulpq->lobby, sctp_event2skb(event));
+		return;
+	}
+
+	if ((sid == csid) && SSN_lt(cssn, ssn)) {
+		__skb_queue_tail(&ulpq->lobby, sctp_event2skb(event));
+		return;
+	}
+
+	/* Find the right place in this list.  We store them by
+	 * stream ID and then by SSN.
+	 */
+	skb_queue_walk(&ulpq->lobby, pos) {
+		cevent = (struct sctp_ulpevent *) pos->cb;
+		csid = cevent->stream;
+		cssn = cevent->ssn;
+
+		if (csid > sid)
+			break;
+		if (csid == sid && SSN_lt(ssn, cssn))
+			break;
+	}
+
+
+	/* Insert before pos. */
+	__skb_insert(sctp_event2skb(event), pos->prev, pos, &ulpq->lobby);
+
+}
+
+static struct sctp_ulpevent *sctp_ulpq_order(struct sctp_ulpq *ulpq,
+						struct sctp_ulpevent *event)
+{
+	__u16 sid, ssn;
+	struct sctp_stream *in;
+
+	/* Check if this message needs ordering.  */
+	if (SCTP_DATA_UNORDERED & event->msg_flags)
+		return event;
+
+	/* Note: The stream ID must be verified before this routine.  */
+	sid = event->stream;
+	ssn = event->ssn;
+	in  = &ulpq->asoc->ssnmap->in;
+
+	/* Is this the expected SSN for this stream ID?  */
+	if (ssn != sctp_ssn_peek(in, sid)) {
+		/* We've received something out of order, so find where it
+		 * needs to be placed.  We order by stream and then by SSN.
+		 */
+		sctp_ulpq_store_ordered(ulpq, event);
+		return NULL;
+	}
+
+	/* Mark that the next chunk has been found.  */
+	sctp_ssn_next(in, sid);
+
+	/* Go find any other chunks that were waiting for
+	 * ordering.
+	 */
+	sctp_ulpq_retrieve_ordered(ulpq, event);
+
+	return event;
+}
+
+/* Helper function to gather skbs that have possibly become
+ * ordered by forward tsn skipping their dependencies.
+ */
+static inline void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq)
+{
+	struct sk_buff *pos, *tmp;
+	struct sctp_ulpevent *cevent;
+	struct sctp_ulpevent *event = NULL;
+	struct sctp_stream *in;
+	struct sk_buff_head temp;
+	__u16 csid, cssn;
+
+	in  = &ulpq->asoc->ssnmap->in;
+
+	/* We are holding the chunks by stream, by SSN.  */
+	sctp_skb_for_each(pos, &ulpq->lobby, tmp) {
+		cevent = (struct sctp_ulpevent *) pos->cb;
+		csid = cevent->stream;
+		cssn = cevent->ssn;
+
+		if (cssn != sctp_ssn_peek(in, csid))
+			break;
+
+		/* Found it, so mark in the ssnmap. */	       
+		sctp_ssn_next(in, csid);
+
+		__skb_unlink(pos, pos->list);
+		if (!event) {						
+			/* Create a temporary list to collect chunks on.  */
+			event = sctp_skb2event(pos);
+			skb_queue_head_init(&temp);
+			__skb_queue_tail(&temp, sctp_event2skb(event));
+		} else {
+			/* Attach all gathered skbs to the event.  */
+			__skb_queue_tail(sctp_event2skb(event)->list, pos);
+		}
+	}
+
+	/* Send event to the ULP.  */
+	if (event)
+		sctp_ulpq_tail_event(ulpq, event);
+}
+
+/* Skip over an SSN. */
+void sctp_ulpq_skip(struct sctp_ulpq *ulpq, __u16 sid, __u16 ssn)
+{
+	struct sctp_stream *in;
+
+	/* Note: The stream ID must be verified before this routine.  */
+	in  = &ulpq->asoc->ssnmap->in;
+
+	/* Is this an old SSN?  If so ignore. */
+	if (SSN_lt(ssn, sctp_ssn_peek(in, sid)))
+		return;
+
+	/* Mark that we are no longer expecting this SSN or lower. */
+	sctp_ssn_skip(in, sid, ssn);
+
+	/* Go find any other chunks that were waiting for
+	 * ordering and deliver them if needed. 
+	 */
+	sctp_ulpq_reap_ordered(ulpq);
+	return;
+}
+
+/* Renege 'needed' bytes from the ordering queue. */
+static __u16 sctp_ulpq_renege_order(struct sctp_ulpq *ulpq, __u16 needed)
+{
+	__u16 freed = 0;
+	__u32 tsn;
+	struct sk_buff *skb;
+	struct sctp_ulpevent *event;
+	struct sctp_tsnmap *tsnmap;
+
+	tsnmap = &ulpq->asoc->peer.tsn_map;
+
+	while ((skb = __skb_dequeue_tail(&ulpq->lobby)) != NULL) {
+		freed += skb_headlen(skb);
+		event = sctp_skb2event(skb);
+		tsn = event->tsn;
+
+		sctp_ulpevent_free(event);
+		sctp_tsnmap_renege(tsnmap, tsn);
+		if (freed >= needed)
+			return freed;
+	}
+
+	return freed;
+}
+
+/* Renege 'needed' bytes from the reassembly queue. */
+static __u16 sctp_ulpq_renege_frags(struct sctp_ulpq *ulpq, __u16 needed)
+{
+	__u16 freed = 0;
+	__u32 tsn;
+	struct sk_buff *skb;
+	struct sctp_ulpevent *event;
+	struct sctp_tsnmap *tsnmap;
+
+	tsnmap = &ulpq->asoc->peer.tsn_map;
+
+	/* Walk backwards through the list, reneges the newest tsns. */
+	while ((skb = __skb_dequeue_tail(&ulpq->reasm)) != NULL) {
+		freed += skb_headlen(skb);
+		event = sctp_skb2event(skb);
+		tsn = event->tsn;
+
+		sctp_ulpevent_free(event);
+		sctp_tsnmap_renege(tsnmap, tsn);
+		if (freed >= needed)
+			return freed;
+	}
+
+	return freed;
+}
+
+/* Partial deliver the first message as there is pressure on rwnd. */
+void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq,
+				struct sctp_chunk *chunk, int gfp)
+{
+	struct sctp_ulpevent *event;
+	struct sctp_association *asoc;
+
+	asoc = ulpq->asoc;
+
+	/* Are we already in partial delivery mode?  */
+	if (!sctp_sk(asoc->base.sk)->pd_mode) {
+
+		/* Is partial delivery possible?  */
+		event = sctp_ulpq_retrieve_first(ulpq);
+		/* Send event to the ULP.   */
+		if (event) {
+			sctp_ulpq_tail_event(ulpq, event);
+			sctp_sk(asoc->base.sk)->pd_mode = 1;
+			ulpq->pd_mode = 1;
+			return;
+		}
+	}
+}
+
+/* Renege some packets to make room for an incoming chunk.  */
+void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
+		      int gfp)
+{
+	struct sctp_association *asoc;
+	__u16 needed, freed;
+
+	asoc = ulpq->asoc;
+
+	if (chunk) {
+		needed = ntohs(chunk->chunk_hdr->length);
+		needed -= sizeof(sctp_data_chunk_t);
+	} else 
+		needed = SCTP_DEFAULT_MAXWINDOW;
+
+	freed = 0;
+
+	if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) {
+		freed = sctp_ulpq_renege_order(ulpq, needed);
+		if (freed < needed) {
+			freed += sctp_ulpq_renege_frags(ulpq, needed - freed);
+		}
+	}
+	/* If able to free enough room, accept this chunk. */
+	if (chunk && (freed >= needed)) {
+		__u32 tsn;
+		tsn = ntohl(chunk->subh.data_hdr->tsn);
+		sctp_tsnmap_mark(&asoc->peer.tsn_map, tsn);
+		sctp_ulpq_tail_data(ulpq, chunk, gfp);
+		
+		sctp_ulpq_partial_delivery(ulpq, chunk, gfp);
+	}
+
+	return;
+}
+
+
+
+/* Notify the application if an association is aborted and in
+ * partial delivery mode.  Send up any pending received messages.
+ */
+void sctp_ulpq_abort_pd(struct sctp_ulpq *ulpq, int gfp)
+{
+	struct sctp_ulpevent *ev = NULL;
+	struct sock *sk;
+
+	if (!ulpq->pd_mode)
+		return;
+
+	sk = ulpq->asoc->base.sk;
+	if (sctp_ulpevent_type_enabled(SCTP_PARTIAL_DELIVERY_EVENT,
+				       &sctp_sk(sk)->subscribe))
+		ev = sctp_ulpevent_make_pdapi(ulpq->asoc,
+					      SCTP_PARTIAL_DELIVERY_ABORTED,
+					      gfp);
+	if (ev)
+		__skb_queue_tail(&sk->sk_receive_queue, sctp_event2skb(ev));
+
+	/* If there is data waiting, send it up the socket now. */
+	if (sctp_ulpq_clear_pd(ulpq) || ev)
+		sk->sk_data_ready(sk, 0);
+}
diff --git a/net/socket.c b/net/socket.c
new file mode 100644
index 00000000000..2cd44990d8d
--- /dev/null
+++ b/net/socket.c
@@ -0,0 +1,2088 @@
+/*
+ * NET		An implementation of the SOCKET network access protocol.
+ *
+ * Version:	@(#)socket.c	1.1.93	18/02/95
+ *
+ * Authors:	Orest Zborowski, <obz@Kodak.COM>
+ *		Ross Biro, <bir7@leland.Stanford.Edu>
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *
+ * Fixes:
+ *		Anonymous	:	NOTSOCK/BADF cleanup. Error fix in
+ *					shutdown()
+ *		Alan Cox	:	verify_area() fixes
+ *		Alan Cox	:	Removed DDI
+ *		Jonathan Kamens	:	SOCK_DGRAM reconnect bug
+ *		Alan Cox	:	Moved a load of checks to the very
+ *					top level.
+ *		Alan Cox	:	Move address structures to/from user
+ *					mode above the protocol layers.
+ *		Rob Janssen	:	Allow 0 length sends.
+ *		Alan Cox	:	Asynchronous I/O support (cribbed from the
+ *					tty drivers).
+ *		Niibe Yutaka	:	Asynchronous I/O for writes (4.4BSD style)
+ *		Jeff Uphoff	:	Made max number of sockets command-line
+ *					configurable.
+ *		Matti Aarnio	:	Made the number of sockets dynamic,
+ *					to be allocated when needed, and mr.
+ *					Uphoff's max is used as max to be
+ *					allowed to allocate.
+ *		Linus		:	Argh. removed all the socket allocation
+ *					altogether: it's in the inode now.
+ *		Alan Cox	:	Made sock_alloc()/sock_release() public
+ *					for NetROM and future kernel nfsd type
+ *					stuff.
+ *		Alan Cox	:	sendmsg/recvmsg basics.
+ *		Tom Dyas	:	Export net symbols.
+ *		Marcin Dalecki	:	Fixed problems with CONFIG_NET="n".
+ *		Alan Cox	:	Added thread locking to sys_* calls
+ *					for sockets. May have errors at the
+ *					moment.
+ *		Kevin Buhr	:	Fixed the dumb errors in the above.
+ *		Andi Kleen	:	Some small cleanups, optimizations,
+ *					and fixed a copy_from_user() bug.
+ *		Tigran Aivazian	:	sys_send(args) calls sys_sendto(args, NULL, 0)
+ *		Tigran Aivazian	:	Made listen(2) backlog sanity checks 
+ *					protocol-independent
+ *
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *
+ *	This module is effectively the top level interface to the BSD socket
+ *	paradigm. 
+ *
+ *	Based upon Swansea University Computer Society NET3.039
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/smp_lock.h>
+#include <linux/socket.h>
+#include <linux/file.h>
+#include <linux/net.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/wanrouter.h>
+#include <linux/if_bridge.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/cache.h>
+#include <linux/module.h>
+#include <linux/highmem.h>
+#include <linux/divert.h>
+#include <linux/mount.h>
+#include <linux/security.h>
+#include <linux/syscalls.h>
+#include <linux/compat.h>
+#include <linux/kmod.h>
+
+#ifdef CONFIG_NET_RADIO
+#include <linux/wireless.h>		/* Note : will define WIRELESS_EXT */
+#endif	/* CONFIG_NET_RADIO */
+
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+
+#include <net/compat.h>
+
+#include <net/sock.h>
+#include <linux/netfilter.h>
+
+static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
+static ssize_t sock_aio_read(struct kiocb *iocb, char __user *buf,
+			 size_t size, loff_t pos);
+static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *buf,
+			  size_t size, loff_t pos);
+static int sock_mmap(struct file *file, struct vm_area_struct * vma);
+
+static int sock_close(struct inode *inode, struct file *file);
+static unsigned int sock_poll(struct file *file,
+			      struct poll_table_struct *wait);
+static long sock_ioctl(struct file *file,
+		      unsigned int cmd, unsigned long arg);
+static int sock_fasync(int fd, struct file *filp, int on);
+static ssize_t sock_readv(struct file *file, const struct iovec *vector,
+			  unsigned long count, loff_t *ppos);
+static ssize_t sock_writev(struct file *file, const struct iovec *vector,
+			  unsigned long count, loff_t *ppos);
+static ssize_t sock_sendpage(struct file *file, struct page *page,
+			     int offset, size_t size, loff_t *ppos, int more);
+
+
+/*
+ *	Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
+ *	in the operation structures but are done directly via the socketcall() multiplexor.
+ */
+
+static struct file_operations socket_file_ops = {
+	.owner =	THIS_MODULE,
+	.llseek =	no_llseek,
+	.aio_read =	sock_aio_read,
+	.aio_write =	sock_aio_write,
+	.poll =		sock_poll,
+	.unlocked_ioctl = sock_ioctl,
+	.mmap =		sock_mmap,
+	.open =		sock_no_open,	/* special open code to disallow open via /proc */
+	.release =	sock_close,
+	.fasync =	sock_fasync,
+	.readv =	sock_readv,
+	.writev =	sock_writev,
+	.sendpage =	sock_sendpage
+};
+
+/*
+ *	The protocol list. Each protocol is registered in here.
+ */
+
+static struct net_proto_family *net_families[NPROTO];
+
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
+static atomic_t net_family_lockct = ATOMIC_INIT(0);
+static DEFINE_SPINLOCK(net_family_lock);
+
+/* The strategy is: modifications net_family vector are short, do not
+   sleep and veeery rare, but read access should be free of any exclusive
+   locks.
+ */
+
+static void net_family_write_lock(void)
+{
+	spin_lock(&net_family_lock);
+	while (atomic_read(&net_family_lockct) != 0) {
+		spin_unlock(&net_family_lock);
+
+		yield();
+
+		spin_lock(&net_family_lock);
+	}
+}
+
+static __inline__ void net_family_write_unlock(void)
+{
+	spin_unlock(&net_family_lock);
+}
+
+static __inline__ void net_family_read_lock(void)
+{
+	atomic_inc(&net_family_lockct);
+	spin_unlock_wait(&net_family_lock);
+}
+
+static __inline__ void net_family_read_unlock(void)
+{
+	atomic_dec(&net_family_lockct);
+}
+
+#else
+#define net_family_write_lock() do { } while(0)
+#define net_family_write_unlock() do { } while(0)
+#define net_family_read_lock() do { } while(0)
+#define net_family_read_unlock() do { } while(0)
+#endif
+
+
+/*
+ *	Statistics counters of the socket lists
+ */
+
+static DEFINE_PER_CPU(int, sockets_in_use) = 0;
+
+/*
+ *	Support routines. Move socket addresses back and forth across the kernel/user
+ *	divide and look after the messy bits.
+ */
+
+#define MAX_SOCK_ADDR	128		/* 108 for Unix domain - 
+					   16 for IP, 16 for IPX,
+					   24 for IPv6,
+					   about 80 for AX.25 
+					   must be at least one bigger than
+					   the AF_UNIX size (see net/unix/af_unix.c
+					   :unix_mkname()).  
+					 */
+					 
+/**
+ *	move_addr_to_kernel	-	copy a socket address into kernel space
+ *	@uaddr: Address in user space
+ *	@kaddr: Address in kernel space
+ *	@ulen: Length in user space
+ *
+ *	The address is copied into kernel space. If the provided address is
+ *	too long an error code of -EINVAL is returned. If the copy gives
+ *	invalid addresses -EFAULT is returned. On a success 0 is returned.
+ */
+
+int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
+{
+	if(ulen<0||ulen>MAX_SOCK_ADDR)
+		return -EINVAL;
+	if(ulen==0)
+		return 0;
+	if(copy_from_user(kaddr,uaddr,ulen))
+		return -EFAULT;
+	return 0;
+}
+
+/**
+ *	move_addr_to_user	-	copy an address to user space
+ *	@kaddr: kernel space address
+ *	@klen: length of address in kernel
+ *	@uaddr: user space address
+ *	@ulen: pointer to user length field
+ *
+ *	The value pointed to by ulen on entry is the buffer length available.
+ *	This is overwritten with the buffer space used. -EINVAL is returned
+ *	if an overlong buffer is specified or a negative buffer size. -EFAULT
+ *	is returned if either the buffer or the length field are not
+ *	accessible.
+ *	After copying the data up to the limit the user specifies, the true
+ *	length of the data is written over the length limit the user
+ *	specified. Zero is returned for a success.
+ */
+ 
+int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, int __user *ulen)
+{
+	int err;
+	int len;
+
+	if((err=get_user(len, ulen)))
+		return err;
+	if(len>klen)
+		len=klen;
+	if(len<0 || len> MAX_SOCK_ADDR)
+		return -EINVAL;
+	if(len)
+	{
+		if(copy_to_user(uaddr,kaddr,len))
+			return -EFAULT;
+	}
+	/*
+	 *	"fromlen shall refer to the value before truncation.."
+	 *			1003.1g
+	 */
+	return __put_user(klen, ulen);
+}
+
+#define SOCKFS_MAGIC 0x534F434B
+
+static kmem_cache_t * sock_inode_cachep;
+
+static struct inode *sock_alloc_inode(struct super_block *sb)
+{
+	struct socket_alloc *ei;
+	ei = (struct socket_alloc *)kmem_cache_alloc(sock_inode_cachep, SLAB_KERNEL);
+	if (!ei)
+		return NULL;
+	init_waitqueue_head(&ei->socket.wait);
+	
+	ei->socket.fasync_list = NULL;
+	ei->socket.state = SS_UNCONNECTED;
+	ei->socket.flags = 0;
+	ei->socket.ops = NULL;
+	ei->socket.sk = NULL;
+	ei->socket.file = NULL;
+	ei->socket.flags = 0;
+
+	return &ei->vfs_inode;
+}
+
+static void sock_destroy_inode(struct inode *inode)
+{
+	kmem_cache_free(sock_inode_cachep,
+			container_of(inode, struct socket_alloc, vfs_inode));
+}
+
+static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+{
+	struct socket_alloc *ei = (struct socket_alloc *) foo;
+
+	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
+	    SLAB_CTOR_CONSTRUCTOR)
+		inode_init_once(&ei->vfs_inode);
+}
+ 
+static int init_inodecache(void)
+{
+	sock_inode_cachep = kmem_cache_create("sock_inode_cache",
+				sizeof(struct socket_alloc),
+				0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT,
+				init_once, NULL);
+	if (sock_inode_cachep == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+static struct super_operations sockfs_ops = {
+	.alloc_inode =	sock_alloc_inode,
+	.destroy_inode =sock_destroy_inode,
+	.statfs =	simple_statfs,
+};
+
+static struct super_block *sockfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
+{
+	return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC);
+}
+
+static struct vfsmount *sock_mnt;
+
+static struct file_system_type sock_fs_type = {
+	.name =		"sockfs",
+	.get_sb =	sockfs_get_sb,
+	.kill_sb =	kill_anon_super,
+};
+static int sockfs_delete_dentry(struct dentry *dentry)
+{
+	return 1;
+}
+static struct dentry_operations sockfs_dentry_operations = {
+	.d_delete =	sockfs_delete_dentry,
+};
+
+/*
+ *	Obtains the first available file descriptor and sets it up for use.
+ *
+ *	This function creates file structure and maps it to fd space
+ *	of current process. On success it returns file descriptor
+ *	and file struct implicitly stored in sock->file.
+ *	Note that another thread may close file descriptor before we return
+ *	from this function. We use the fact that now we do not refer
+ *	to socket after mapping. If one day we will need it, this
+ *	function will increment ref. count on file by 1.
+ *
+ *	In any case returned fd MAY BE not valid!
+ *	This race condition is unavoidable
+ *	with shared fd spaces, we cannot solve it inside kernel,
+ *	but we take care of internal coherence yet.
+ */
+
+int sock_map_fd(struct socket *sock)
+{
+	int fd;
+	struct qstr this;
+	char name[32];
+
+	/*
+	 *	Find a file descriptor suitable for return to the user. 
+	 */
+
+	fd = get_unused_fd();
+	if (fd >= 0) {
+		struct file *file = get_empty_filp();
+
+		if (!file) {
+			put_unused_fd(fd);
+			fd = -ENFILE;
+			goto out;
+		}
+
+		sprintf(name, "[%lu]", SOCK_INODE(sock)->i_ino);
+		this.name = name;
+		this.len = strlen(name);
+		this.hash = SOCK_INODE(sock)->i_ino;
+
+		file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this);
+		if (!file->f_dentry) {
+			put_filp(file);
+			put_unused_fd(fd);
+			fd = -ENOMEM;
+			goto out;
+		}
+		file->f_dentry->d_op = &sockfs_dentry_operations;
+		d_add(file->f_dentry, SOCK_INODE(sock));
+		file->f_vfsmnt = mntget(sock_mnt);
+		file->f_mapping = file->f_dentry->d_inode->i_mapping;
+
+		sock->file = file;
+		file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops;
+		file->f_mode = FMODE_READ | FMODE_WRITE;
+		file->f_flags = O_RDWR;
+		file->f_pos = 0;
+		fd_install(fd, file);
+	}
+
+out:
+	return fd;
+}
+
+/**
+ *	sockfd_lookup	- 	Go from a file number to its socket slot
+ *	@fd: file handle
+ *	@err: pointer to an error code return
+ *
+ *	The file handle passed in is locked and the socket it is bound
+ *	too is returned. If an error occurs the err pointer is overwritten
+ *	with a negative errno code and NULL is returned. The function checks
+ *	for both invalid handles and passing a handle which is not a socket.
+ *
+ *	On a success the socket object pointer is returned.
+ */
+
+struct socket *sockfd_lookup(int fd, int *err)
+{
+	struct file *file;
+	struct inode *inode;
+	struct socket *sock;
+
+	if (!(file = fget(fd)))
+	{
+		*err = -EBADF;
+		return NULL;
+	}
+
+	inode = file->f_dentry->d_inode;
+	if (!S_ISSOCK(inode->i_mode)) {
+		*err = -ENOTSOCK;
+		fput(file);
+		return NULL;
+	}
+
+	sock = SOCKET_I(inode);
+	if (sock->file != file) {
+		printk(KERN_ERR "socki_lookup: socket file changed!\n");
+		sock->file = file;
+	}
+	return sock;
+}
+
+/**
+ *	sock_alloc	-	allocate a socket
+ *	
+ *	Allocate a new inode and socket object. The two are bound together
+ *	and initialised. The socket is then returned. If we are out of inodes
+ *	NULL is returned.
+ */
+
+static struct socket *sock_alloc(void)
+{
+	struct inode * inode;
+	struct socket * sock;
+
+	inode = new_inode(sock_mnt->mnt_sb);
+	if (!inode)
+		return NULL;
+
+	sock = SOCKET_I(inode);
+
+	inode->i_mode = S_IFSOCK|S_IRWXUGO;
+	inode->i_uid = current->fsuid;
+	inode->i_gid = current->fsgid;
+
+	get_cpu_var(sockets_in_use)++;
+	put_cpu_var(sockets_in_use);
+	return sock;
+}
+
+/*
+ *	In theory you can't get an open on this inode, but /proc provides
+ *	a back door. Remember to keep it shut otherwise you'll let the
+ *	creepy crawlies in.
+ */
+  
+static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
+{
+	return -ENXIO;
+}
+
+struct file_operations bad_sock_fops = {
+	.owner = THIS_MODULE,
+	.open = sock_no_open,
+};
+
+/**
+ *	sock_release	-	close a socket
+ *	@sock: socket to close
+ *
+ *	The socket is released from the protocol stack if it has a release
+ *	callback, and the inode is then released if the socket is bound to
+ *	an inode not a file. 
+ */
+ 
+void sock_release(struct socket *sock)
+{
+	if (sock->ops) {
+		struct module *owner = sock->ops->owner;
+
+		sock->ops->release(sock);
+		sock->ops = NULL;
+		module_put(owner);
+	}
+
+	if (sock->fasync_list)
+		printk(KERN_ERR "sock_release: fasync list not empty!\n");
+
+	get_cpu_var(sockets_in_use)--;
+	put_cpu_var(sockets_in_use);
+	if (!sock->file) {
+		iput(SOCK_INODE(sock));
+		return;
+	}
+	sock->file=NULL;
+}
+
+static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, 
+				 struct msghdr *msg, size_t size)
+{
+	struct sock_iocb *si = kiocb_to_siocb(iocb);
+	int err;
+
+	si->sock = sock;
+	si->scm = NULL;
+	si->msg = msg;
+	si->size = size;
+
+	err = security_socket_sendmsg(sock, msg, size);
+	if (err)
+		return err;
+
+	return sock->ops->sendmsg(iocb, sock, msg, size);
+}
+
+int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
+{
+	struct kiocb iocb;
+	struct sock_iocb siocb;
+	int ret;
+
+	init_sync_kiocb(&iocb, NULL);
+	iocb.private = &siocb;
+	ret = __sock_sendmsg(&iocb, sock, msg, size);
+	if (-EIOCBQUEUED == ret)
+		ret = wait_on_sync_kiocb(&iocb);
+	return ret;
+}
+
+int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
+		   struct kvec *vec, size_t num, size_t size)
+{
+	mm_segment_t oldfs = get_fs();
+	int result;
+
+	set_fs(KERNEL_DS);
+	/*
+	 * the following is safe, since for compiler definitions of kvec and
+	 * iovec are identical, yielding the same in-core layout and alignment
+	 */
+	msg->msg_iov = (struct iovec *)vec,
+	msg->msg_iovlen = num;
+	result = sock_sendmsg(sock, msg, size);
+	set_fs(oldfs);
+	return result;
+}
+
+static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, 
+				 struct msghdr *msg, size_t size, int flags)
+{
+	int err;
+	struct sock_iocb *si = kiocb_to_siocb(iocb);
+
+	si->sock = sock;
+	si->scm = NULL;
+	si->msg = msg;
+	si->size = size;
+	si->flags = flags;
+
+	err = security_socket_recvmsg(sock, msg, size, flags);
+	if (err)
+		return err;
+
+	return sock->ops->recvmsg(iocb, sock, msg, size, flags);
+}
+
+int sock_recvmsg(struct socket *sock, struct msghdr *msg, 
+		 size_t size, int flags)
+{
+	struct kiocb iocb;
+	struct sock_iocb siocb;
+	int ret;
+
+        init_sync_kiocb(&iocb, NULL);
+	iocb.private = &siocb;
+	ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
+	if (-EIOCBQUEUED == ret)
+		ret = wait_on_sync_kiocb(&iocb);
+	return ret;
+}
+
+int kernel_recvmsg(struct socket *sock, struct msghdr *msg, 
+		   struct kvec *vec, size_t num,
+		   size_t size, int flags)
+{
+	mm_segment_t oldfs = get_fs();
+	int result;
+
+	set_fs(KERNEL_DS);
+	/*
+	 * the following is safe, since for compiler definitions of kvec and
+	 * iovec are identical, yielding the same in-core layout and alignment
+	 */
+	msg->msg_iov = (struct iovec *)vec,
+	msg->msg_iovlen = num;
+	result = sock_recvmsg(sock, msg, size, flags);
+	set_fs(oldfs);
+	return result;
+}
+
+static void sock_aio_dtor(struct kiocb *iocb)
+{
+	kfree(iocb->private);
+}
+
+/*
+ *	Read data from a socket. ubuf is a user mode pointer. We make sure the user
+ *	area ubuf...ubuf+size-1 is writable before asking the protocol.
+ */
+
+static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf,
+			 size_t size, loff_t pos)
+{
+	struct sock_iocb *x, siocb;
+	struct socket *sock;
+	int flags;
+
+	if (pos != 0)
+		return -ESPIPE;
+	if (size==0)		/* Match SYS5 behaviour */
+		return 0;
+
+	if (is_sync_kiocb(iocb))
+		x = &siocb;
+	else {
+		x = kmalloc(sizeof(struct sock_iocb), GFP_KERNEL);
+		if (!x)
+			return -ENOMEM;
+		iocb->ki_dtor = sock_aio_dtor;
+	}
+	iocb->private = x;
+	x->kiocb = iocb;
+	sock = SOCKET_I(iocb->ki_filp->f_dentry->d_inode); 
+
+	x->async_msg.msg_name = NULL;
+	x->async_msg.msg_namelen = 0;
+	x->async_msg.msg_iov = &x->async_iov;
+	x->async_msg.msg_iovlen = 1;
+	x->async_msg.msg_control = NULL;
+	x->async_msg.msg_controllen = 0;
+	x->async_iov.iov_base = ubuf;
+	x->async_iov.iov_len = size;
+	flags = !(iocb->ki_filp->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
+
+	return __sock_recvmsg(iocb, sock, &x->async_msg, size, flags);
+}
+
+
+/*
+ *	Write data to a socket. We verify that the user area ubuf..ubuf+size-1
+ *	is readable by the user process.
+ */
+
+static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf,
+			  size_t size, loff_t pos)
+{
+	struct sock_iocb *x, siocb;
+	struct socket *sock;
+	
+	if (pos != 0)
+		return -ESPIPE;
+	if(size==0)		/* Match SYS5 behaviour */
+		return 0;
+
+	if (is_sync_kiocb(iocb))
+		x = &siocb;
+	else {
+		x = kmalloc(sizeof(struct sock_iocb), GFP_KERNEL);
+		if (!x)
+			return -ENOMEM;
+		iocb->ki_dtor = sock_aio_dtor;
+	}
+	iocb->private = x;
+	x->kiocb = iocb;
+	sock = SOCKET_I(iocb->ki_filp->f_dentry->d_inode); 
+
+	x->async_msg.msg_name = NULL;
+	x->async_msg.msg_namelen = 0;
+	x->async_msg.msg_iov = &x->async_iov;
+	x->async_msg.msg_iovlen = 1;
+	x->async_msg.msg_control = NULL;
+	x->async_msg.msg_controllen = 0;
+	x->async_msg.msg_flags = !(iocb->ki_filp->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
+	if (sock->type == SOCK_SEQPACKET)
+		x->async_msg.msg_flags |= MSG_EOR;
+	x->async_iov.iov_base = (void __user *)ubuf;
+	x->async_iov.iov_len = size;
+	
+	return __sock_sendmsg(iocb, sock, &x->async_msg, size);
+}
+
+ssize_t sock_sendpage(struct file *file, struct page *page,
+		      int offset, size_t size, loff_t *ppos, int more)
+{
+	struct socket *sock;
+	int flags;
+
+	sock = SOCKET_I(file->f_dentry->d_inode);
+
+	flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
+	if (more)
+		flags |= MSG_MORE;
+
+	return sock->ops->sendpage(sock, page, offset, size, flags);
+}
+
+static int sock_readv_writev(int type, struct inode * inode,
+			     struct file * file, const struct iovec * iov,
+			     long count, size_t size)
+{
+	struct msghdr msg;
+	struct socket *sock;
+
+	sock = SOCKET_I(inode);
+
+	msg.msg_name = NULL;
+	msg.msg_namelen = 0;
+	msg.msg_control = NULL;
+	msg.msg_controllen = 0;
+	msg.msg_iov = (struct iovec *) iov;
+	msg.msg_iovlen = count;
+	msg.msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
+
+	/* read() does a VERIFY_WRITE */
+	if (type == VERIFY_WRITE)
+		return sock_recvmsg(sock, &msg, size, msg.msg_flags);
+
+	if (sock->type == SOCK_SEQPACKET)
+		msg.msg_flags |= MSG_EOR;
+
+	return sock_sendmsg(sock, &msg, size);
+}
+
+static ssize_t sock_readv(struct file *file, const struct iovec *vector,
+			  unsigned long count, loff_t *ppos)
+{
+	size_t tot_len = 0;
+	int i;
+        for (i = 0 ; i < count ; i++)
+                tot_len += vector[i].iov_len;
+	return sock_readv_writev(VERIFY_WRITE, file->f_dentry->d_inode,
+				 file, vector, count, tot_len);
+}
+	
+static ssize_t sock_writev(struct file *file, const struct iovec *vector,
+			   unsigned long count, loff_t *ppos)
+{
+	size_t tot_len = 0;
+	int i;
+        for (i = 0 ; i < count ; i++)
+                tot_len += vector[i].iov_len;
+	return sock_readv_writev(VERIFY_READ, file->f_dentry->d_inode,
+				 file, vector, count, tot_len);
+}
+
+
+/*
+ * Atomic setting of ioctl hooks to avoid race
+ * with module unload.
+ */
+
+static DECLARE_MUTEX(br_ioctl_mutex);
+static int (*br_ioctl_hook)(unsigned int cmd, void __user *arg) = NULL;
+
+void brioctl_set(int (*hook)(unsigned int, void __user *))
+{
+	down(&br_ioctl_mutex);
+	br_ioctl_hook = hook;
+	up(&br_ioctl_mutex);
+}
+EXPORT_SYMBOL(brioctl_set);
+
+static DECLARE_MUTEX(vlan_ioctl_mutex);
+static int (*vlan_ioctl_hook)(void __user *arg);
+
+void vlan_ioctl_set(int (*hook)(void __user *))
+{
+	down(&vlan_ioctl_mutex);
+	vlan_ioctl_hook = hook;
+	up(&vlan_ioctl_mutex);
+}
+EXPORT_SYMBOL(vlan_ioctl_set);
+
+static DECLARE_MUTEX(dlci_ioctl_mutex);
+static int (*dlci_ioctl_hook)(unsigned int, void __user *);
+
+void dlci_ioctl_set(int (*hook)(unsigned int, void __user *))
+{
+	down(&dlci_ioctl_mutex);
+	dlci_ioctl_hook = hook;
+	up(&dlci_ioctl_mutex);
+}
+EXPORT_SYMBOL(dlci_ioctl_set);
+
+/*
+ *	With an ioctl, arg may well be a user mode pointer, but we don't know
+ *	what to do with it - that's up to the protocol still.
+ */
+
+static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
+{
+	struct socket *sock;
+	void __user *argp = (void __user *)arg;
+	int pid, err;
+
+	sock = SOCKET_I(file->f_dentry->d_inode);
+	if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
+		err = dev_ioctl(cmd, argp);
+	} else
+#ifdef WIRELESS_EXT
+	if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
+		err = dev_ioctl(cmd, argp);
+	} else
+#endif	/* WIRELESS_EXT */
+	switch (cmd) {
+		case FIOSETOWN:
+		case SIOCSPGRP:
+			err = -EFAULT;
+			if (get_user(pid, (int __user *)argp))
+				break;
+			err = f_setown(sock->file, pid, 1);
+			break;
+		case FIOGETOWN:
+		case SIOCGPGRP:
+			err = put_user(sock->file->f_owner.pid, (int __user *)argp);
+			break;
+		case SIOCGIFBR:
+		case SIOCSIFBR:
+		case SIOCBRADDBR:
+		case SIOCBRDELBR:
+			err = -ENOPKG;
+			if (!br_ioctl_hook)
+				request_module("bridge");
+
+			down(&br_ioctl_mutex);
+			if (br_ioctl_hook) 
+				err = br_ioctl_hook(cmd, argp);
+			up(&br_ioctl_mutex);
+			break;
+		case SIOCGIFVLAN:
+		case SIOCSIFVLAN:
+			err = -ENOPKG;
+			if (!vlan_ioctl_hook)
+				request_module("8021q");
+
+			down(&vlan_ioctl_mutex);
+			if (vlan_ioctl_hook)
+				err = vlan_ioctl_hook(argp);
+			up(&vlan_ioctl_mutex);
+			break;
+		case SIOCGIFDIVERT:
+		case SIOCSIFDIVERT:
+		/* Convert this to call through a hook */
+			err = divert_ioctl(cmd, argp);
+			break;
+		case SIOCADDDLCI:
+		case SIOCDELDLCI:
+			err = -ENOPKG;
+			if (!dlci_ioctl_hook)
+				request_module("dlci");
+
+			if (dlci_ioctl_hook) {
+				down(&dlci_ioctl_mutex);
+				err = dlci_ioctl_hook(cmd, argp);
+				up(&dlci_ioctl_mutex);
+			}
+			break;
+		default:
+			err = sock->ops->ioctl(sock, cmd, arg);
+			break;
+	}
+	return err;
+}
+
+int sock_create_lite(int family, int type, int protocol, struct socket **res)
+{
+	int err;
+	struct socket *sock = NULL;
+	
+	err = security_socket_create(family, type, protocol, 1);
+	if (err)
+		goto out;
+
+	sock = sock_alloc();
+	if (!sock) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	security_socket_post_create(sock, family, type, protocol, 1);
+	sock->type = type;
+out:
+	*res = sock;
+	return err;
+}
+
+/* No kernel lock held - perfect */
+static unsigned int sock_poll(struct file *file, poll_table * wait)
+{
+	struct socket *sock;
+
+	/*
+	 *	We can't return errors to poll, so it's either yes or no. 
+	 */
+	sock = SOCKET_I(file->f_dentry->d_inode);
+	return sock->ops->poll(file, sock, wait);
+}
+
+static int sock_mmap(struct file * file, struct vm_area_struct * vma)
+{
+	struct socket *sock = SOCKET_I(file->f_dentry->d_inode);
+
+	return sock->ops->mmap(file, sock, vma);
+}
+
+int sock_close(struct inode *inode, struct file *filp)
+{
+	/*
+	 *	It was possible the inode is NULL we were 
+	 *	closing an unfinished socket. 
+	 */
+
+	if (!inode)
+	{
+		printk(KERN_DEBUG "sock_close: NULL inode\n");
+		return 0;
+	}
+	sock_fasync(-1, filp, 0);
+	sock_release(SOCKET_I(inode));
+	return 0;
+}
+
+/*
+ *	Update the socket async list
+ *
+ *	Fasync_list locking strategy.
+ *
+ *	1. fasync_list is modified only under process context socket lock
+ *	   i.e. under semaphore.
+ *	2. fasync_list is used under read_lock(&sk->sk_callback_lock)
+ *	   or under socket lock.
+ *	3. fasync_list can be used from softirq context, so that
+ *	   modification under socket lock have to be enhanced with
+ *	   write_lock_bh(&sk->sk_callback_lock).
+ *							--ANK (990710)
+ */
+
+static int sock_fasync(int fd, struct file *filp, int on)
+{
+	struct fasync_struct *fa, *fna=NULL, **prev;
+	struct socket *sock;
+	struct sock *sk;
+
+	if (on)
+	{
+		fna=(struct fasync_struct *)kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
+		if(fna==NULL)
+			return -ENOMEM;
+	}
+
+	sock = SOCKET_I(filp->f_dentry->d_inode);
+
+	if ((sk=sock->sk) == NULL) {
+		kfree(fna);
+		return -EINVAL;
+	}
+
+	lock_sock(sk);
+
+	prev=&(sock->fasync_list);
+
+	for (fa=*prev; fa!=NULL; prev=&fa->fa_next,fa=*prev)
+		if (fa->fa_file==filp)
+			break;
+
+	if(on)
+	{
+		if(fa!=NULL)
+		{
+			write_lock_bh(&sk->sk_callback_lock);
+			fa->fa_fd=fd;
+			write_unlock_bh(&sk->sk_callback_lock);
+
+			kfree(fna);
+			goto out;
+		}
+		fna->fa_file=filp;
+		fna->fa_fd=fd;
+		fna->magic=FASYNC_MAGIC;
+		fna->fa_next=sock->fasync_list;
+		write_lock_bh(&sk->sk_callback_lock);
+		sock->fasync_list=fna;
+		write_unlock_bh(&sk->sk_callback_lock);
+	}
+	else
+	{
+		if (fa!=NULL)
+		{
+			write_lock_bh(&sk->sk_callback_lock);
+			*prev=fa->fa_next;
+			write_unlock_bh(&sk->sk_callback_lock);
+			kfree(fa);
+		}
+	}
+
+out:
+	release_sock(sock->sk);
+	return 0;
+}
+
+/* This function may be called only under socket lock or callback_lock */
+
+int sock_wake_async(struct socket *sock, int how, int band)
+{
+	if (!sock || !sock->fasync_list)
+		return -1;
+	switch (how)
+	{
+	case 1:
+		
+		if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
+			break;
+		goto call_kill;
+	case 2:
+		if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
+			break;
+		/* fall through */
+	case 0:
+	call_kill:
+		__kill_fasync(sock->fasync_list, SIGIO, band);
+		break;
+	case 3:
+		__kill_fasync(sock->fasync_list, SIGURG, band);
+	}
+	return 0;
+}
+
+static int __sock_create(int family, int type, int protocol, struct socket **res, int kern)
+{
+	int err;
+	struct socket *sock;
+
+	/*
+	 *	Check protocol is in range
+	 */
+	if (family < 0 || family >= NPROTO)
+		return -EAFNOSUPPORT;
+	if (type < 0 || type >= SOCK_MAX)
+		return -EINVAL;
+
+	/* Compatibility.
+
+	   This uglymoron is moved from INET layer to here to avoid
+	   deadlock in module load.
+	 */
+	if (family == PF_INET && type == SOCK_PACKET) {
+		static int warned; 
+		if (!warned) {
+			warned = 1;
+			printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", current->comm);
+		}
+		family = PF_PACKET;
+	}
+
+	err = security_socket_create(family, type, protocol, kern);
+	if (err)
+		return err;
+		
+#if defined(CONFIG_KMOD)
+	/* Attempt to load a protocol module if the find failed. 
+	 * 
+	 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user 
+	 * requested real, full-featured networking support upon configuration.
+	 * Otherwise module support will break!
+	 */
+	if (net_families[family]==NULL)
+	{
+		request_module("net-pf-%d",family);
+	}
+#endif
+
+	net_family_read_lock();
+	if (net_families[family] == NULL) {
+		err = -EAFNOSUPPORT;
+		goto out;
+	}
+
+/*
+ *	Allocate the socket and allow the family to set things up. if
+ *	the protocol is 0, the family is instructed to select an appropriate
+ *	default.
+ */
+
+	if (!(sock = sock_alloc())) {
+		printk(KERN_WARNING "socket: no more sockets\n");
+		err = -ENFILE;		/* Not exactly a match, but its the
+					   closest posix thing */
+		goto out;
+	}
+
+	sock->type  = type;
+
+	/*
+	 * We will call the ->create function, that possibly is in a loadable
+	 * module, so we have to bump that loadable module refcnt first.
+	 */
+	err = -EAFNOSUPPORT;
+	if (!try_module_get(net_families[family]->owner))
+		goto out_release;
+
+	if ((err = net_families[family]->create(sock, protocol)) < 0)
+		goto out_module_put;
+	/*
+	 * Now to bump the refcnt of the [loadable] module that owns this
+	 * socket at sock_release time we decrement its refcnt.
+	 */
+	if (!try_module_get(sock->ops->owner)) {
+		sock->ops = NULL;
+		goto out_module_put;
+	}
+	/*
+	 * Now that we're done with the ->create function, the [loadable]
+	 * module can have its refcnt decremented
+	 */
+	module_put(net_families[family]->owner);
+	*res = sock;
+	security_socket_post_create(sock, family, type, protocol, kern);
+
+out:
+	net_family_read_unlock();
+	return err;
+out_module_put:
+	module_put(net_families[family]->owner);
+out_release:
+	sock_release(sock);
+	goto out;
+}
+
+int sock_create(int family, int type, int protocol, struct socket **res)
+{
+	return __sock_create(family, type, protocol, res, 0);
+}
+
+int sock_create_kern(int family, int type, int protocol, struct socket **res)
+{
+	return __sock_create(family, type, protocol, res, 1);
+}
+
+asmlinkage long sys_socket(int family, int type, int protocol)
+{
+	int retval;
+	struct socket *sock;
+
+	retval = sock_create(family, type, protocol, &sock);
+	if (retval < 0)
+		goto out;
+
+	retval = sock_map_fd(sock);
+	if (retval < 0)
+		goto out_release;
+
+out:
+	/* It may be already another descriptor 8) Not kernel problem. */
+	return retval;
+
+out_release:
+	sock_release(sock);
+	return retval;
+}
+
+/*
+ *	Create a pair of connected sockets.
+ */
+
+asmlinkage long sys_socketpair(int family, int type, int protocol, int __user *usockvec)
+{
+	struct socket *sock1, *sock2;
+	int fd1, fd2, err;
+
+	/*
+	 * Obtain the first socket and check if the underlying protocol
+	 * supports the socketpair call.
+	 */
+
+	err = sock_create(family, type, protocol, &sock1);
+	if (err < 0)
+		goto out;
+
+	err = sock_create(family, type, protocol, &sock2);
+	if (err < 0)
+		goto out_release_1;
+
+	err = sock1->ops->socketpair(sock1, sock2);
+	if (err < 0) 
+		goto out_release_both;
+
+	fd1 = fd2 = -1;
+
+	err = sock_map_fd(sock1);
+	if (err < 0)
+		goto out_release_both;
+	fd1 = err;
+
+	err = sock_map_fd(sock2);
+	if (err < 0)
+		goto out_close_1;
+	fd2 = err;
+
+	/* fd1 and fd2 may be already another descriptors.
+	 * Not kernel problem.
+	 */
+
+	err = put_user(fd1, &usockvec[0]); 
+	if (!err)
+		err = put_user(fd2, &usockvec[1]);
+	if (!err)
+		return 0;
+
+	sys_close(fd2);
+	sys_close(fd1);
+	return err;
+
+out_close_1:
+        sock_release(sock2);
+	sys_close(fd1);
+	return err;
+
+out_release_both:
+        sock_release(sock2);
+out_release_1:
+        sock_release(sock1);
+out:
+	return err;
+}
+
+
+/*
+ *	Bind a name to a socket. Nothing much to do here since it's
+ *	the protocol's responsibility to handle the local address.
+ *
+ *	We move the socket address to kernel space before we call
+ *	the protocol layer (having also checked the address is ok).
+ */
+
+asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
+{
+	struct socket *sock;
+	char address[MAX_SOCK_ADDR];
+	int err;
+
+	if((sock = sockfd_lookup(fd,&err))!=NULL)
+	{
+		if((err=move_addr_to_kernel(umyaddr,addrlen,address))>=0) {
+			err = security_socket_bind(sock, (struct sockaddr *)address, addrlen);
+			if (err) {
+				sockfd_put(sock);
+				return err;
+			}
+			err = sock->ops->bind(sock, (struct sockaddr *)address, addrlen);
+		}
+		sockfd_put(sock);
+	}			
+	return err;
+}
+
+
+/*
+ *	Perform a listen. Basically, we allow the protocol to do anything
+ *	necessary for a listen, and if that works, we mark the socket as
+ *	ready for listening.
+ */
+
+int sysctl_somaxconn = SOMAXCONN;
+
+asmlinkage long sys_listen(int fd, int backlog)
+{
+	struct socket *sock;
+	int err;
+	
+	if ((sock = sockfd_lookup(fd, &err)) != NULL) {
+		if ((unsigned) backlog > sysctl_somaxconn)
+			backlog = sysctl_somaxconn;
+
+		err = security_socket_listen(sock, backlog);
+		if (err) {
+			sockfd_put(sock);
+			return err;
+		}
+
+		err=sock->ops->listen(sock, backlog);
+		sockfd_put(sock);
+	}
+	return err;
+}
+
+
+/*
+ *	For accept, we attempt to create a new socket, set up the link
+ *	with the client, wake up the client, then return the new
+ *	connected fd. We collect the address of the connector in kernel
+ *	space and move it to user at the very end. This is unclean because
+ *	we open the socket then return an error.
+ *
+ *	1003.1g adds the ability to recvmsg() to query connection pending
+ *	status to recvmsg. We need to add that support in a way thats
+ *	clean when we restucture accept also.
+ */
+
+asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen)
+{
+	struct socket *sock, *newsock;
+	int err, len;
+	char address[MAX_SOCK_ADDR];
+
+	sock = sockfd_lookup(fd, &err);
+	if (!sock)
+		goto out;
+
+	err = -ENFILE;
+	if (!(newsock = sock_alloc())) 
+		goto out_put;
+
+	newsock->type = sock->type;
+	newsock->ops = sock->ops;
+
+	err = security_socket_accept(sock, newsock);
+	if (err)
+		goto out_release;
+
+	/*
+	 * We don't need try_module_get here, as the listening socket (sock)
+	 * has the protocol module (sock->ops->owner) held.
+	 */
+	__module_get(newsock->ops->owner);
+
+	err = sock->ops->accept(sock, newsock, sock->file->f_flags);
+	if (err < 0)
+		goto out_release;
+
+	if (upeer_sockaddr) {
+		if(newsock->ops->getname(newsock, (struct sockaddr *)address, &len, 2)<0) {
+			err = -ECONNABORTED;
+			goto out_release;
+		}
+		err = move_addr_to_user(address, len, upeer_sockaddr, upeer_addrlen);
+		if (err < 0)
+			goto out_release;
+	}
+
+	/* File flags are not inherited via accept() unlike another OSes. */
+
+	if ((err = sock_map_fd(newsock)) < 0)
+		goto out_release;
+
+	security_socket_post_accept(sock, newsock);
+
+out_put:
+	sockfd_put(sock);
+out:
+	return err;
+out_release:
+	sock_release(newsock);
+	goto out_put;
+}
+
+
+/*
+ *	Attempt to connect to a socket with the server address.  The address
+ *	is in user space so we verify it is OK and move it to kernel space.
+ *
+ *	For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
+ *	break bindings
+ *
+ *	NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
+ *	other SEQPACKET protocols that take time to connect() as it doesn't
+ *	include the -EINPROGRESS status for such sockets.
+ */
+
+asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
+{
+	struct socket *sock;
+	char address[MAX_SOCK_ADDR];
+	int err;
+
+	sock = sockfd_lookup(fd, &err);
+	if (!sock)
+		goto out;
+	err = move_addr_to_kernel(uservaddr, addrlen, address);
+	if (err < 0)
+		goto out_put;
+
+	err = security_socket_connect(sock, (struct sockaddr *)address, addrlen);
+	if (err)
+		goto out_put;
+
+	err = sock->ops->connect(sock, (struct sockaddr *) address, addrlen,
+				 sock->file->f_flags);
+out_put:
+	sockfd_put(sock);
+out:
+	return err;
+}
+
+/*
+ *	Get the local address ('name') of a socket object. Move the obtained
+ *	name to user space.
+ */
+
+asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len)
+{
+	struct socket *sock;
+	char address[MAX_SOCK_ADDR];
+	int len, err;
+	
+	sock = sockfd_lookup(fd, &err);
+	if (!sock)
+		goto out;
+
+	err = security_socket_getsockname(sock);
+	if (err)
+		goto out_put;
+
+	err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
+	if (err)
+		goto out_put;
+	err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
+
+out_put:
+	sockfd_put(sock);
+out:
+	return err;
+}
+
+/*
+ *	Get the remote address ('name') of a socket object. Move the obtained
+ *	name to user space.
+ */
+
+asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len)
+{
+	struct socket *sock;
+	char address[MAX_SOCK_ADDR];
+	int len, err;
+
+	if ((sock = sockfd_lookup(fd, &err))!=NULL)
+	{
+		err = security_socket_getpeername(sock);
+		if (err) {
+			sockfd_put(sock);
+			return err;
+		}
+
+		err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 1);
+		if (!err)
+			err=move_addr_to_user(address,len, usockaddr, usockaddr_len);
+		sockfd_put(sock);
+	}
+	return err;
+}
+
+/*
+ *	Send a datagram to a given address. We move the address into kernel
+ *	space and check the user space data area is readable before invoking
+ *	the protocol.
+ */
+
+asmlinkage long sys_sendto(int fd, void __user * buff, size_t len, unsigned flags,
+			   struct sockaddr __user *addr, int addr_len)
+{
+	struct socket *sock;
+	char address[MAX_SOCK_ADDR];
+	int err;
+	struct msghdr msg;
+	struct iovec iov;
+	
+	sock = sockfd_lookup(fd, &err);
+	if (!sock)
+		goto out;
+	iov.iov_base=buff;
+	iov.iov_len=len;
+	msg.msg_name=NULL;
+	msg.msg_iov=&iov;
+	msg.msg_iovlen=1;
+	msg.msg_control=NULL;
+	msg.msg_controllen=0;
+	msg.msg_namelen=0;
+	if(addr)
+	{
+		err = move_addr_to_kernel(addr, addr_len, address);
+		if (err < 0)
+			goto out_put;
+		msg.msg_name=address;
+		msg.msg_namelen=addr_len;
+	}
+	if (sock->file->f_flags & O_NONBLOCK)
+		flags |= MSG_DONTWAIT;
+	msg.msg_flags = flags;
+	err = sock_sendmsg(sock, &msg, len);
+
+out_put:		
+	sockfd_put(sock);
+out:
+	return err;
+}
+
+/*
+ *	Send a datagram down a socket. 
+ */
+
+asmlinkage long sys_send(int fd, void __user * buff, size_t len, unsigned flags)
+{
+	return sys_sendto(fd, buff, len, flags, NULL, 0);
+}
+
+/*
+ *	Receive a frame from the socket and optionally record the address of the 
+ *	sender. We verify the buffers are writable and if needed move the
+ *	sender address from kernel to user space.
+ */
+
+asmlinkage long sys_recvfrom(int fd, void __user * ubuf, size_t size, unsigned flags,
+			     struct sockaddr __user *addr, int __user *addr_len)
+{
+	struct socket *sock;
+	struct iovec iov;
+	struct msghdr msg;
+	char address[MAX_SOCK_ADDR];
+	int err,err2;
+
+	sock = sockfd_lookup(fd, &err);
+	if (!sock)
+		goto out;
+
+	msg.msg_control=NULL;
+	msg.msg_controllen=0;
+	msg.msg_iovlen=1;
+	msg.msg_iov=&iov;
+	iov.iov_len=size;
+	iov.iov_base=ubuf;
+	msg.msg_name=address;
+	msg.msg_namelen=MAX_SOCK_ADDR;
+	if (sock->file->f_flags & O_NONBLOCK)
+		flags |= MSG_DONTWAIT;
+	err=sock_recvmsg(sock, &msg, size, flags);
+
+	if(err >= 0 && addr != NULL)
+	{
+		err2=move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
+		if(err2<0)
+			err=err2;
+	}
+	sockfd_put(sock);			
+out:
+	return err;
+}
+
+/*
+ *	Receive a datagram from a socket. 
+ */
+
+asmlinkage long sys_recv(int fd, void __user * ubuf, size_t size, unsigned flags)
+{
+	return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
+}
+
+/*
+ *	Set a socket option. Because we don't know the option lengths we have
+ *	to pass the user mode parameter for the protocols to sort out.
+ */
+
+asmlinkage long sys_setsockopt(int fd, int level, int optname, char __user *optval, int optlen)
+{
+	int err;
+	struct socket *sock;
+
+	if (optlen < 0)
+		return -EINVAL;
+			
+	if ((sock = sockfd_lookup(fd, &err))!=NULL)
+	{
+		err = security_socket_setsockopt(sock,level,optname);
+		if (err) {
+			sockfd_put(sock);
+			return err;
+		}
+
+		if (level == SOL_SOCKET)
+			err=sock_setsockopt(sock,level,optname,optval,optlen);
+		else
+			err=sock->ops->setsockopt(sock, level, optname, optval, optlen);
+		sockfd_put(sock);
+	}
+	return err;
+}
+
+/*
+ *	Get a socket option. Because we don't know the option lengths we have
+ *	to pass a user mode parameter for the protocols to sort out.
+ */
+
+asmlinkage long sys_getsockopt(int fd, int level, int optname, char __user *optval, int __user *optlen)
+{
+	int err;
+	struct socket *sock;
+
+	if ((sock = sockfd_lookup(fd, &err))!=NULL)
+	{
+		err = security_socket_getsockopt(sock, level, 
+							   optname);
+		if (err) {
+			sockfd_put(sock);
+			return err;
+		}
+
+		if (level == SOL_SOCKET)
+			err=sock_getsockopt(sock,level,optname,optval,optlen);
+		else
+			err=sock->ops->getsockopt(sock, level, optname, optval, optlen);
+		sockfd_put(sock);
+	}
+	return err;
+}
+
+
+/*
+ *	Shutdown a socket.
+ */
+
+asmlinkage long sys_shutdown(int fd, int how)
+{
+	int err;
+	struct socket *sock;
+
+	if ((sock = sockfd_lookup(fd, &err))!=NULL)
+	{
+		err = security_socket_shutdown(sock, how);
+		if (err) {
+			sockfd_put(sock);
+			return err;
+		}
+				
+		err=sock->ops->shutdown(sock, how);
+		sockfd_put(sock);
+	}
+	return err;
+}
+
+/* A couple of helpful macros for getting the address of the 32/64 bit 
+ * fields which are the same type (int / unsigned) on our platforms.
+ */
+#define COMPAT_MSG(msg, member)	((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
+#define COMPAT_NAMELEN(msg)	COMPAT_MSG(msg, msg_namelen)
+#define COMPAT_FLAGS(msg)	COMPAT_MSG(msg, msg_flags)
+
+
+/*
+ *	BSD sendmsg interface
+ */
+
+asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
+{
+	struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg;
+	struct socket *sock;
+	char address[MAX_SOCK_ADDR];
+	struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
+	unsigned char ctl[sizeof(struct cmsghdr) + 20];	/* 20 is size of ipv6_pktinfo */
+	unsigned char *ctl_buf = ctl;
+	struct msghdr msg_sys;
+	int err, ctl_len, iov_size, total_len;
+	
+	err = -EFAULT;
+	if (MSG_CMSG_COMPAT & flags) {
+		if (get_compat_msghdr(&msg_sys, msg_compat))
+			return -EFAULT;
+	} else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
+		return -EFAULT;
+
+	sock = sockfd_lookup(fd, &err);
+	if (!sock) 
+		goto out;
+
+	/* do not move before msg_sys is valid */
+	err = -EMSGSIZE;
+	if (msg_sys.msg_iovlen > UIO_MAXIOV)
+		goto out_put;
+
+	/* Check whether to allocate the iovec area*/
+	err = -ENOMEM;
+	iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
+	if (msg_sys.msg_iovlen > UIO_FASTIOV) {
+		iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
+		if (!iov)
+			goto out_put;
+	}
+
+	/* This will also move the address data into kernel space */
+	if (MSG_CMSG_COMPAT & flags) {
+		err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
+	} else
+		err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
+	if (err < 0) 
+		goto out_freeiov;
+	total_len = err;
+
+	err = -ENOBUFS;
+
+	if (msg_sys.msg_controllen > INT_MAX)
+		goto out_freeiov;
+	ctl_len = msg_sys.msg_controllen; 
+	if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
+		err = cmsghdr_from_user_compat_to_kern(&msg_sys, ctl, sizeof(ctl));
+		if (err)
+			goto out_freeiov;
+		ctl_buf = msg_sys.msg_control;
+	} else if (ctl_len) {
+		if (ctl_len > sizeof(ctl))
+		{
+			ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
+			if (ctl_buf == NULL) 
+				goto out_freeiov;
+		}
+		err = -EFAULT;
+		/*
+		 * Careful! Before this, msg_sys.msg_control contains a user pointer.
+		 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
+		 * checking falls down on this.
+		 */
+		if (copy_from_user(ctl_buf, (void __user *) msg_sys.msg_control, ctl_len))
+			goto out_freectl;
+		msg_sys.msg_control = ctl_buf;
+	}
+	msg_sys.msg_flags = flags;
+
+	if (sock->file->f_flags & O_NONBLOCK)
+		msg_sys.msg_flags |= MSG_DONTWAIT;
+	err = sock_sendmsg(sock, &msg_sys, total_len);
+
+out_freectl:
+	if (ctl_buf != ctl)    
+		sock_kfree_s(sock->sk, ctl_buf, ctl_len);
+out_freeiov:
+	if (iov != iovstack)
+		sock_kfree_s(sock->sk, iov, iov_size);
+out_put:
+	sockfd_put(sock);
+out:       
+	return err;
+}
+
+/*
+ *	BSD recvmsg interface
+ */
+
+asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flags)
+{
+	struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg;
+	struct socket *sock;
+	struct iovec iovstack[UIO_FASTIOV];
+	struct iovec *iov=iovstack;
+	struct msghdr msg_sys;
+	unsigned long cmsg_ptr;
+	int err, iov_size, total_len, len;
+
+	/* kernel mode address */
+	char addr[MAX_SOCK_ADDR];
+
+	/* user mode address pointers */
+	struct sockaddr __user *uaddr;
+	int __user *uaddr_len;
+	
+	if (MSG_CMSG_COMPAT & flags) {
+		if (get_compat_msghdr(&msg_sys, msg_compat))
+			return -EFAULT;
+	} else
+		if (copy_from_user(&msg_sys,msg,sizeof(struct msghdr)))
+			return -EFAULT;
+
+	sock = sockfd_lookup(fd, &err);
+	if (!sock)
+		goto out;
+
+	err = -EMSGSIZE;
+	if (msg_sys.msg_iovlen > UIO_MAXIOV)
+		goto out_put;
+	
+	/* Check whether to allocate the iovec area*/
+	err = -ENOMEM;
+	iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
+	if (msg_sys.msg_iovlen > UIO_FASTIOV) {
+		iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
+		if (!iov)
+			goto out_put;
+	}
+
+	/*
+	 *	Save the user-mode address (verify_iovec will change the
+	 *	kernel msghdr to use the kernel address space)
+	 */
+	 
+	uaddr = (void __user *) msg_sys.msg_name;
+	uaddr_len = COMPAT_NAMELEN(msg);
+	if (MSG_CMSG_COMPAT & flags) {
+		err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
+	} else
+		err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
+	if (err < 0)
+		goto out_freeiov;
+	total_len=err;
+
+	cmsg_ptr = (unsigned long)msg_sys.msg_control;
+	msg_sys.msg_flags = 0;
+	if (MSG_CMSG_COMPAT & flags)
+		msg_sys.msg_flags = MSG_CMSG_COMPAT;
+	
+	if (sock->file->f_flags & O_NONBLOCK)
+		flags |= MSG_DONTWAIT;
+	err = sock_recvmsg(sock, &msg_sys, total_len, flags);
+	if (err < 0)
+		goto out_freeiov;
+	len = err;
+
+	if (uaddr != NULL) {
+		err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, uaddr_len);
+		if (err < 0)
+			goto out_freeiov;
+	}
+	err = __put_user(msg_sys.msg_flags, COMPAT_FLAGS(msg));
+	if (err)
+		goto out_freeiov;
+	if (MSG_CMSG_COMPAT & flags)
+		err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr, 
+				 &msg_compat->msg_controllen);
+	else
+		err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr, 
+				 &msg->msg_controllen);
+	if (err)
+		goto out_freeiov;
+	err = len;
+
+out_freeiov:
+	if (iov != iovstack)
+		sock_kfree_s(sock->sk, iov, iov_size);
+out_put:
+	sockfd_put(sock);
+out:
+	return err;
+}
+
+#ifdef __ARCH_WANT_SYS_SOCKETCALL
+
+/* Argument list sizes for sys_socketcall */
+#define AL(x) ((x) * sizeof(unsigned long))
+static unsigned char nargs[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
+				AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
+				AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)};
+#undef AL
+
+/*
+ *	System call vectors. 
+ *
+ *	Argument checking cleaned up. Saved 20% in size.
+ *  This function doesn't need to set the kernel lock because
+ *  it is set by the callees. 
+ */
+
+asmlinkage long sys_socketcall(int call, unsigned long __user *args)
+{
+	unsigned long a[6];
+	unsigned long a0,a1;
+	int err;
+
+	if(call<1||call>SYS_RECVMSG)
+		return -EINVAL;
+
+	/* copy_from_user should be SMP safe. */
+	if (copy_from_user(a, args, nargs[call]))
+		return -EFAULT;
+		
+	a0=a[0];
+	a1=a[1];
+	
+	switch(call) 
+	{
+		case SYS_SOCKET:
+			err = sys_socket(a0,a1,a[2]);
+			break;
+		case SYS_BIND:
+			err = sys_bind(a0,(struct sockaddr __user *)a1, a[2]);
+			break;
+		case SYS_CONNECT:
+			err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
+			break;
+		case SYS_LISTEN:
+			err = sys_listen(a0,a1);
+			break;
+		case SYS_ACCEPT:
+			err = sys_accept(a0,(struct sockaddr __user *)a1, (int __user *)a[2]);
+			break;
+		case SYS_GETSOCKNAME:
+			err = sys_getsockname(a0,(struct sockaddr __user *)a1, (int __user *)a[2]);
+			break;
+		case SYS_GETPEERNAME:
+			err = sys_getpeername(a0, (struct sockaddr __user *)a1, (int __user *)a[2]);
+			break;
+		case SYS_SOCKETPAIR:
+			err = sys_socketpair(a0,a1, a[2], (int __user *)a[3]);
+			break;
+		case SYS_SEND:
+			err = sys_send(a0, (void __user *)a1, a[2], a[3]);
+			break;
+		case SYS_SENDTO:
+			err = sys_sendto(a0,(void __user *)a1, a[2], a[3],
+					 (struct sockaddr __user *)a[4], a[5]);
+			break;
+		case SYS_RECV:
+			err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
+			break;
+		case SYS_RECVFROM:
+			err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
+					   (struct sockaddr __user *)a[4], (int __user *)a[5]);
+			break;
+		case SYS_SHUTDOWN:
+			err = sys_shutdown(a0,a1);
+			break;
+		case SYS_SETSOCKOPT:
+			err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
+			break;
+		case SYS_GETSOCKOPT:
+			err = sys_getsockopt(a0, a1, a[2], (char __user *)a[3], (int __user *)a[4]);
+			break;
+		case SYS_SENDMSG:
+			err = sys_sendmsg(a0, (struct msghdr __user *) a1, a[2]);
+			break;
+		case SYS_RECVMSG:
+			err = sys_recvmsg(a0, (struct msghdr __user *) a1, a[2]);
+			break;
+		default:
+			err = -EINVAL;
+			break;
+	}
+	return err;
+}
+
+#endif /* __ARCH_WANT_SYS_SOCKETCALL */
+
+/*
+ *	This function is called by a protocol handler that wants to
+ *	advertise its address family, and have it linked into the
+ *	SOCKET module.
+ */
+
+int sock_register(struct net_proto_family *ops)
+{
+	int err;
+
+	if (ops->family >= NPROTO) {
+		printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
+		return -ENOBUFS;
+	}
+	net_family_write_lock();
+	err = -EEXIST;
+	if (net_families[ops->family] == NULL) {
+		net_families[ops->family]=ops;
+		err = 0;
+	}
+	net_family_write_unlock();
+	printk(KERN_INFO "NET: Registered protocol family %d\n",
+	       ops->family);
+	return err;
+}
+
+/*
+ *	This function is called by a protocol handler that wants to
+ *	remove its address family, and have it unlinked from the
+ *	SOCKET module.
+ */
+
+int sock_unregister(int family)
+{
+	if (family < 0 || family >= NPROTO)
+		return -1;
+
+	net_family_write_lock();
+	net_families[family]=NULL;
+	net_family_write_unlock();
+	printk(KERN_INFO "NET: Unregistered protocol family %d\n",
+	       family);
+	return 0;
+}
+
+
+extern void sk_init(void);
+
+void __init sock_init(void)
+{
+	/*
+	 *	Initialize sock SLAB cache.
+	 */
+	 
+	sk_init();
+
+#ifdef SLAB_SKB
+	/*
+	 *	Initialize skbuff SLAB cache 
+	 */
+	skb_init();
+#endif
+
+	/*
+	 *	Initialize the protocols module. 
+	 */
+
+	init_inodecache();
+	register_filesystem(&sock_fs_type);
+	sock_mnt = kern_mount(&sock_fs_type);
+	/* The real protocol initialization is performed when
+	 *  do_initcalls is run.  
+	 */
+
+#ifdef CONFIG_NETFILTER
+	netfilter_init();
+#endif
+}
+
+#ifdef CONFIG_PROC_FS
+void socket_seq_show(struct seq_file *seq)
+{
+	int cpu;
+	int counter = 0;
+
+	for (cpu = 0; cpu < NR_CPUS; cpu++)
+		counter += per_cpu(sockets_in_use, cpu);
+
+	/* It can be negative, by the way. 8) */
+	if (counter < 0)
+		counter = 0;
+
+	seq_printf(seq, "sockets: used %d\n", counter);
+}
+#endif /* CONFIG_PROC_FS */
+
+/* ABI emulation layers need these two */
+EXPORT_SYMBOL(move_addr_to_kernel);
+EXPORT_SYMBOL(move_addr_to_user);
+EXPORT_SYMBOL(sock_create);
+EXPORT_SYMBOL(sock_create_kern);
+EXPORT_SYMBOL(sock_create_lite);
+EXPORT_SYMBOL(sock_map_fd);
+EXPORT_SYMBOL(sock_recvmsg);
+EXPORT_SYMBOL(sock_register);
+EXPORT_SYMBOL(sock_release);
+EXPORT_SYMBOL(sock_sendmsg);
+EXPORT_SYMBOL(sock_unregister);
+EXPORT_SYMBOL(sock_wake_async);
+EXPORT_SYMBOL(sockfd_lookup);
+EXPORT_SYMBOL(kernel_sendmsg);
+EXPORT_SYMBOL(kernel_recvmsg);
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
new file mode 100644
index 00000000000..46a2ce00a29
--- /dev/null
+++ b/net/sunrpc/Makefile
@@ -0,0 +1,15 @@
+#
+# Makefile for Linux kernel SUN RPC
+#
+
+
+obj-$(CONFIG_SUNRPC) += sunrpc.o
+obj-$(CONFIG_SUNRPC_GSS) += auth_gss/
+
+sunrpc-y := clnt.o xprt.o sched.o \
+	    auth.o auth_null.o auth_unix.o \
+	    svc.o svcsock.o svcauth.o svcauth_unix.o \
+	    pmap_clnt.o timer.o xdr.o \
+	    sunrpc_syms.o cache.o rpc_pipe.o
+sunrpc-$(CONFIG_PROC_FS) += stats.o
+sunrpc-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
new file mode 100644
index 00000000000..9bcec9b927b
--- /dev/null
+++ b/net/sunrpc/auth.c
@@ -0,0 +1,395 @@
+/*
+ * linux/net/sunrpc/auth.c
+ *
+ * Generic RPC client authentication API.
+ *
+ * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/socket.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/spinlock.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY	RPCDBG_AUTH
+#endif
+
+static struct rpc_authops *	auth_flavors[RPC_AUTH_MAXFLAVOR] = {
+	&authnull_ops,		/* AUTH_NULL */
+	&authunix_ops,		/* AUTH_UNIX */
+	NULL,			/* others can be loadable modules */
+};
+
+static u32
+pseudoflavor_to_flavor(u32 flavor) {
+	if (flavor >= RPC_AUTH_MAXFLAVOR)
+		return RPC_AUTH_GSS;
+	return flavor;
+}
+
+int
+rpcauth_register(struct rpc_authops *ops)
+{
+	rpc_authflavor_t flavor;
+
+	if ((flavor = ops->au_flavor) >= RPC_AUTH_MAXFLAVOR)
+		return -EINVAL;
+	if (auth_flavors[flavor] != NULL)
+		return -EPERM;		/* what else? */
+	auth_flavors[flavor] = ops;
+	return 0;
+}
+
+int
+rpcauth_unregister(struct rpc_authops *ops)
+{
+	rpc_authflavor_t flavor;
+
+	if ((flavor = ops->au_flavor) >= RPC_AUTH_MAXFLAVOR)
+		return -EINVAL;
+	if (auth_flavors[flavor] != ops)
+		return -EPERM;		/* what else? */
+	auth_flavors[flavor] = NULL;
+	return 0;
+}
+
+struct rpc_auth *
+rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt)
+{
+	struct rpc_auth		*auth;
+	struct rpc_authops	*ops;
+	u32			flavor = pseudoflavor_to_flavor(pseudoflavor);
+
+	if (flavor >= RPC_AUTH_MAXFLAVOR || !(ops = auth_flavors[flavor]))
+		return NULL;
+	auth = ops->create(clnt, pseudoflavor);
+	if (!auth)
+		return NULL;
+	if (clnt->cl_auth)
+		rpcauth_destroy(clnt->cl_auth);
+	clnt->cl_auth = auth;
+	return auth;
+}
+
+void
+rpcauth_destroy(struct rpc_auth *auth)
+{
+	if (!atomic_dec_and_test(&auth->au_count))
+		return;
+	auth->au_ops->destroy(auth);
+}
+
+static DEFINE_SPINLOCK(rpc_credcache_lock);
+
+/*
+ * Initialize RPC credential cache
+ */
+int
+rpcauth_init_credcache(struct rpc_auth *auth, unsigned long expire)
+{
+	struct rpc_cred_cache *new;
+	int i;
+
+	new = (struct rpc_cred_cache *)kmalloc(sizeof(*new), GFP_KERNEL);
+	if (!new)
+		return -ENOMEM;
+	for (i = 0; i < RPC_CREDCACHE_NR; i++)
+		INIT_HLIST_HEAD(&new->hashtable[i]);
+	new->expire = expire;
+	new->nextgc = jiffies + (expire >> 1);
+	auth->au_credcache = new;
+	return 0;
+}
+
+/*
+ * Destroy a list of credentials
+ */
+static inline
+void rpcauth_destroy_credlist(struct hlist_head *head)
+{
+	struct rpc_cred *cred;
+
+	while (!hlist_empty(head)) {
+		cred = hlist_entry(head->first, struct rpc_cred, cr_hash);
+		hlist_del_init(&cred->cr_hash);
+		put_rpccred(cred);
+	}
+}
+
+/*
+ * Clear the RPC credential cache, and delete those credentials
+ * that are not referenced.
+ */
+void
+rpcauth_free_credcache(struct rpc_auth *auth)
+{
+	struct rpc_cred_cache *cache = auth->au_credcache;
+	HLIST_HEAD(free);
+	struct hlist_node *pos, *next;
+	struct rpc_cred	*cred;
+	int		i;
+
+	spin_lock(&rpc_credcache_lock);
+	for (i = 0; i < RPC_CREDCACHE_NR; i++) {
+		hlist_for_each_safe(pos, next, &cache->hashtable[i]) {
+			cred = hlist_entry(pos, struct rpc_cred, cr_hash);
+			__hlist_del(&cred->cr_hash);
+			hlist_add_head(&cred->cr_hash, &free);
+		}
+	}
+	spin_unlock(&rpc_credcache_lock);
+	rpcauth_destroy_credlist(&free);
+}
+
+static void
+rpcauth_prune_expired(struct rpc_auth *auth, struct rpc_cred *cred, struct hlist_head *free)
+{
+	if (atomic_read(&cred->cr_count) != 1)
+	       return;
+	if (time_after(jiffies, cred->cr_expire + auth->au_credcache->expire))
+		cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
+	if (!(cred->cr_flags & RPCAUTH_CRED_UPTODATE)) {
+		__hlist_del(&cred->cr_hash);
+		hlist_add_head(&cred->cr_hash, free);
+	}
+}
+
+/*
+ * Remove stale credentials. Avoid sleeping inside the loop.
+ */
+static void
+rpcauth_gc_credcache(struct rpc_auth *auth, struct hlist_head *free)
+{
+	struct rpc_cred_cache *cache = auth->au_credcache;
+	struct hlist_node *pos, *next;
+	struct rpc_cred	*cred;
+	int		i;
+
+	dprintk("RPC: gc'ing RPC credentials for auth %p\n", auth);
+	for (i = 0; i < RPC_CREDCACHE_NR; i++) {
+		hlist_for_each_safe(pos, next, &cache->hashtable[i]) {
+			cred = hlist_entry(pos, struct rpc_cred, cr_hash);
+			rpcauth_prune_expired(auth, cred, free);
+		}
+	}
+	cache->nextgc = jiffies + cache->expire;
+}
+
+/*
+ * Look up a process' credentials in the authentication cache
+ */
+struct rpc_cred *
+rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
+		int taskflags)
+{
+	struct rpc_cred_cache *cache = auth->au_credcache;
+	HLIST_HEAD(free);
+	struct hlist_node *pos, *next;
+	struct rpc_cred	*new = NULL,
+			*cred = NULL;
+	int		nr = 0;
+
+	if (!(taskflags & RPC_TASK_ROOTCREDS))
+		nr = acred->uid & RPC_CREDCACHE_MASK;
+retry:
+	spin_lock(&rpc_credcache_lock);
+	if (time_before(cache->nextgc, jiffies))
+		rpcauth_gc_credcache(auth, &free);
+	hlist_for_each_safe(pos, next, &cache->hashtable[nr]) {
+		struct rpc_cred *entry;
+	       	entry = hlist_entry(pos, struct rpc_cred, cr_hash);
+		if (entry->cr_ops->crmatch(acred, entry, taskflags)) {
+			hlist_del(&entry->cr_hash);
+			cred = entry;
+			break;
+		}
+		rpcauth_prune_expired(auth, entry, &free);
+	}
+	if (new) {
+		if (cred)
+			hlist_add_head(&new->cr_hash, &free);
+		else
+			cred = new;
+	}
+	if (cred) {
+		hlist_add_head(&cred->cr_hash, &cache->hashtable[nr]);
+		get_rpccred(cred);
+	}
+	spin_unlock(&rpc_credcache_lock);
+
+	rpcauth_destroy_credlist(&free);
+
+	if (!cred) {
+		new = auth->au_ops->crcreate(auth, acred, taskflags);
+		if (!IS_ERR(new)) {
+#ifdef RPC_DEBUG
+			new->cr_magic = RPCAUTH_CRED_MAGIC;
+#endif
+			goto retry;
+		} else
+			cred = new;
+	}
+
+	return (struct rpc_cred *) cred;
+}
+
+struct rpc_cred *
+rpcauth_lookupcred(struct rpc_auth *auth, int taskflags)
+{
+	struct auth_cred acred = {
+		.uid = current->fsuid,
+		.gid = current->fsgid,
+		.group_info = current->group_info,
+	};
+	struct rpc_cred *ret;
+
+	dprintk("RPC:     looking up %s cred\n",
+		auth->au_ops->au_name);
+	get_group_info(acred.group_info);
+	ret = auth->au_ops->lookup_cred(auth, &acred, taskflags);
+	put_group_info(acred.group_info);
+	return ret;
+}
+
+struct rpc_cred *
+rpcauth_bindcred(struct rpc_task *task)
+{
+	struct rpc_auth *auth = task->tk_auth;
+	struct auth_cred acred = {
+		.uid = current->fsuid,
+		.gid = current->fsgid,
+		.group_info = current->group_info,
+	};
+	struct rpc_cred *ret;
+
+	dprintk("RPC: %4d looking up %s cred\n",
+		task->tk_pid, task->tk_auth->au_ops->au_name);
+	get_group_info(acred.group_info);
+	ret = auth->au_ops->lookup_cred(auth, &acred, task->tk_flags);
+	if (!IS_ERR(ret))
+		task->tk_msg.rpc_cred = ret;
+	else
+		task->tk_status = PTR_ERR(ret);
+	put_group_info(acred.group_info);
+	return ret;
+}
+
+void
+rpcauth_holdcred(struct rpc_task *task)
+{
+	dprintk("RPC: %4d holding %s cred %p\n",
+		task->tk_pid, task->tk_auth->au_ops->au_name, task->tk_msg.rpc_cred);
+	if (task->tk_msg.rpc_cred)
+		get_rpccred(task->tk_msg.rpc_cred);
+}
+
+void
+put_rpccred(struct rpc_cred *cred)
+{
+	cred->cr_expire = jiffies;
+	if (!atomic_dec_and_test(&cred->cr_count))
+		return;
+	cred->cr_ops->crdestroy(cred);
+}
+
+void
+rpcauth_unbindcred(struct rpc_task *task)
+{
+	struct rpc_auth	*auth = task->tk_auth;
+	struct rpc_cred	*cred = task->tk_msg.rpc_cred;
+
+	dprintk("RPC: %4d releasing %s cred %p\n",
+		task->tk_pid, auth->au_ops->au_name, cred);
+
+	put_rpccred(cred);
+	task->tk_msg.rpc_cred = NULL;
+}
+
+u32 *
+rpcauth_marshcred(struct rpc_task *task, u32 *p)
+{
+	struct rpc_auth	*auth = task->tk_auth;
+	struct rpc_cred	*cred = task->tk_msg.rpc_cred;
+
+	dprintk("RPC: %4d marshaling %s cred %p\n",
+		task->tk_pid, auth->au_ops->au_name, cred);
+	return cred->cr_ops->crmarshal(task, p);
+}
+
+u32 *
+rpcauth_checkverf(struct rpc_task *task, u32 *p)
+{
+	struct rpc_auth	*auth = task->tk_auth;
+	struct rpc_cred	*cred = task->tk_msg.rpc_cred;
+
+	dprintk("RPC: %4d validating %s cred %p\n",
+		task->tk_pid, auth->au_ops->au_name, cred);
+	return cred->cr_ops->crvalidate(task, p);
+}
+
+int
+rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp,
+		u32 *data, void *obj)
+{
+	struct rpc_cred *cred = task->tk_msg.rpc_cred;
+
+	dprintk("RPC: %4d using %s cred %p to wrap rpc data\n",
+			task->tk_pid, cred->cr_ops->cr_name, cred);
+	if (cred->cr_ops->crwrap_req)
+		return cred->cr_ops->crwrap_req(task, encode, rqstp, data, obj);
+	/* By default, we encode the arguments normally. */
+	return encode(rqstp, data, obj);
+}
+
+int
+rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
+		u32 *data, void *obj)
+{
+	struct rpc_cred *cred = task->tk_msg.rpc_cred;
+
+	dprintk("RPC: %4d using %s cred %p to unwrap rpc data\n",
+			task->tk_pid, cred->cr_ops->cr_name, cred);
+	if (cred->cr_ops->crunwrap_resp)
+		return cred->cr_ops->crunwrap_resp(task, decode, rqstp,
+						   data, obj);
+	/* By default, we decode the arguments normally. */
+	return decode(rqstp, data, obj);
+}
+
+int
+rpcauth_refreshcred(struct rpc_task *task)
+{
+	struct rpc_auth	*auth = task->tk_auth;
+	struct rpc_cred	*cred = task->tk_msg.rpc_cred;
+	int err;
+
+	dprintk("RPC: %4d refreshing %s cred %p\n",
+		task->tk_pid, auth->au_ops->au_name, cred);
+	err = cred->cr_ops->crrefresh(task);
+	if (err < 0)
+		task->tk_status = err;
+	return err;
+}
+
+void
+rpcauth_invalcred(struct rpc_task *task)
+{
+	dprintk("RPC: %4d invalidating %s cred %p\n",
+		task->tk_pid, task->tk_auth->au_ops->au_name, task->tk_msg.rpc_cred);
+	spin_lock(&rpc_credcache_lock);
+	if (task->tk_msg.rpc_cred)
+		task->tk_msg.rpc_cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
+	spin_unlock(&rpc_credcache_lock);
+}
+
+int
+rpcauth_uptodatecred(struct rpc_task *task)
+{
+	return !(task->tk_msg.rpc_cred) ||
+		(task->tk_msg.rpc_cred->cr_flags & RPCAUTH_CRED_UPTODATE);
+}
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
new file mode 100644
index 00000000000..fe1b874084b
--- /dev/null
+++ b/net/sunrpc/auth_gss/Makefile
@@ -0,0 +1,18 @@
+#
+# Makefile for Linux kernel rpcsec_gss implementation
+#
+
+obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o
+
+auth_rpcgss-objs := auth_gss.o gss_generic_token.o \
+	gss_mech_switch.o svcauth_gss.o gss_krb5_crypto.o
+
+obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
+
+rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
+	gss_krb5_seqnum.o
+
+obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o
+
+rpcsec_gss_spkm3-objs := gss_spkm3_mech.o gss_spkm3_seal.o gss_spkm3_unseal.o \
+	gss_spkm3_token.o
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
new file mode 100644
index 00000000000..a33b627cbef
--- /dev/null
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -0,0 +1,1152 @@
+/*
+ * linux/net/sunrpc/auth_gss.c
+ *
+ * RPCSEC_GSS client authentication.
+ * 
+ *  Copyright (c) 2000 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Dug Song       <dugsong@monkey.org>
+ *  Andy Adamson   <andros@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $Id$
+ */
+
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/sched.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/auth.h>
+#include <linux/sunrpc/auth_gss.h>
+#include <linux/sunrpc/svcauth_gss.h>
+#include <linux/sunrpc/gss_err.h>
+#include <linux/workqueue.h>
+#include <linux/sunrpc/rpc_pipe_fs.h>
+#include <linux/sunrpc/gss_api.h>
+#include <asm/uaccess.h>
+
+static struct rpc_authops authgss_ops;
+
+static struct rpc_credops gss_credops;
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY	RPCDBG_AUTH
+#endif
+
+#define NFS_NGROUPS	16
+
+#define GSS_CRED_EXPIRE		(60 * HZ)	/* XXX: reasonable? */
+#define GSS_CRED_SLACK		1024		/* XXX: unused */
+/* length of a krb5 verifier (48), plus data added before arguments when
+ * using integrity (two 4-byte integers): */
+#define GSS_VERF_SLACK		56
+
+/* XXX this define must match the gssd define
+* as it is passed to gssd to signal the use of
+* machine creds should be part of the shared rpc interface */
+
+#define CA_RUN_AS_MACHINE  0x00000200 
+
+/* dump the buffer in `emacs-hexl' style */
+#define isprint(c)      ((c > 0x1f) && (c < 0x7f))
+
+static DEFINE_RWLOCK(gss_ctx_lock);
+
+struct gss_auth {
+	struct rpc_auth rpc_auth;
+	struct gss_api_mech *mech;
+	enum rpc_gss_svc service;
+	struct list_head upcalls;
+	struct rpc_clnt *client;
+	struct dentry *dentry;
+	char path[48];
+	spinlock_t lock;
+};
+
+static void gss_destroy_ctx(struct gss_cl_ctx *);
+static struct rpc_pipe_ops gss_upcall_ops;
+
+void
+print_hexl(u32 *p, u_int length, u_int offset)
+{
+	u_int i, j, jm;
+	u8 c, *cp;
+	
+	dprintk("RPC: print_hexl: length %d\n",length);
+	dprintk("\n");
+	cp = (u8 *) p;
+	
+	for (i = 0; i < length; i += 0x10) {
+		dprintk("  %04x: ", (u_int)(i + offset));
+		jm = length - i;
+		jm = jm > 16 ? 16 : jm;
+		
+		for (j = 0; j < jm; j++) {
+			if ((j % 2) == 1)
+				dprintk("%02x ", (u_int)cp[i+j]);
+			else
+				dprintk("%02x", (u_int)cp[i+j]);
+		}
+		for (; j < 16; j++) {
+			if ((j % 2) == 1)
+				dprintk("   ");
+			else
+				dprintk("  ");
+		}
+		dprintk(" ");
+		
+		for (j = 0; j < jm; j++) {
+			c = cp[i+j];
+			c = isprint(c) ? c : '.';
+			dprintk("%c", c);
+		}
+		dprintk("\n");
+	}
+}
+
+EXPORT_SYMBOL(print_hexl);
+
+static inline struct gss_cl_ctx *
+gss_get_ctx(struct gss_cl_ctx *ctx)
+{
+	atomic_inc(&ctx->count);
+	return ctx;
+}
+
+static inline void
+gss_put_ctx(struct gss_cl_ctx *ctx)
+{
+	if (atomic_dec_and_test(&ctx->count))
+		gss_destroy_ctx(ctx);
+}
+
+static void
+gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx)
+{
+	struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
+	struct gss_cl_ctx *old;
+	write_lock(&gss_ctx_lock);
+	old = gss_cred->gc_ctx;
+	gss_cred->gc_ctx = ctx;
+	cred->cr_flags |= RPCAUTH_CRED_UPTODATE;
+	write_unlock(&gss_ctx_lock);
+	if (old)
+		gss_put_ctx(old);
+}
+
+static int
+gss_cred_is_uptodate_ctx(struct rpc_cred *cred)
+{
+	struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
+	int res = 0;
+
+	read_lock(&gss_ctx_lock);
+	if ((cred->cr_flags & RPCAUTH_CRED_UPTODATE) && gss_cred->gc_ctx)
+		res = 1;
+	read_unlock(&gss_ctx_lock);
+	return res;
+}
+
+static const void *
+simple_get_bytes(const void *p, const void *end, void *res, size_t len)
+{
+	const void *q = (const void *)((const char *)p + len);
+	if (unlikely(q > end || q < p))
+		return ERR_PTR(-EFAULT);
+	memcpy(res, p, len);
+	return q;
+}
+
+static inline const void *
+simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest)
+{
+	const void *q;
+	unsigned int len;
+
+	p = simple_get_bytes(p, end, &len, sizeof(len));
+	if (IS_ERR(p))
+		return p;
+	q = (const void *)((const char *)p + len);
+	if (unlikely(q > end || q < p))
+		return ERR_PTR(-EFAULT);
+	dest->data = kmalloc(len, GFP_KERNEL);
+	if (unlikely(dest->data == NULL))
+		return ERR_PTR(-ENOMEM);
+	dest->len = len;
+	memcpy(dest->data, p, len);
+	return q;
+}
+
+static struct gss_cl_ctx *
+gss_cred_get_ctx(struct rpc_cred *cred)
+{
+	struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
+	struct gss_cl_ctx *ctx = NULL;
+
+	read_lock(&gss_ctx_lock);
+	if (gss_cred->gc_ctx)
+		ctx = gss_get_ctx(gss_cred->gc_ctx);
+	read_unlock(&gss_ctx_lock);
+	return ctx;
+}
+
+static struct gss_cl_ctx *
+gss_alloc_context(void)
+{
+	struct gss_cl_ctx *ctx;
+
+	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+	if (ctx != NULL) {
+		memset(ctx, 0, sizeof(*ctx));
+		ctx->gc_proc = RPC_GSS_PROC_DATA;
+		ctx->gc_seq = 1;	/* NetApp 6.4R1 doesn't accept seq. no. 0 */
+		spin_lock_init(&ctx->gc_seq_lock);
+		atomic_set(&ctx->count,1);
+	}
+	return ctx;
+}
+
+#define GSSD_MIN_TIMEOUT (60 * 60)
+static const void *
+gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct gss_api_mech *gm)
+{
+	const void *q;
+	unsigned int seclen;
+	unsigned int timeout;
+	u32 window_size;
+	int ret;
+
+	/* First unsigned int gives the lifetime (in seconds) of the cred */
+	p = simple_get_bytes(p, end, &timeout, sizeof(timeout));
+	if (IS_ERR(p))
+		goto err;
+	if (timeout == 0)
+		timeout = GSSD_MIN_TIMEOUT;
+	ctx->gc_expiry = jiffies + (unsigned long)timeout * HZ * 3 / 4;
+	/* Sequence number window. Determines the maximum number of simultaneous requests */
+	p = simple_get_bytes(p, end, &window_size, sizeof(window_size));
+	if (IS_ERR(p))
+		goto err;
+	ctx->gc_win = window_size;
+	/* gssd signals an error by passing ctx->gc_win = 0: */
+	if (ctx->gc_win == 0) {
+		/* in which case, p points to  an error code which we ignore */
+		p = ERR_PTR(-EACCES);
+		goto err;
+	}
+	/* copy the opaque wire context */
+	p = simple_get_netobj(p, end, &ctx->gc_wire_ctx);
+	if (IS_ERR(p))
+		goto err;
+	/* import the opaque security context */
+	p  = simple_get_bytes(p, end, &seclen, sizeof(seclen));
+	if (IS_ERR(p))
+		goto err;
+	q = (const void *)((const char *)p + seclen);
+	if (unlikely(q > end || q < p)) {
+		p = ERR_PTR(-EFAULT);
+		goto err;
+	}
+	ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx);
+	if (ret < 0) {
+		p = ERR_PTR(ret);
+		goto err;
+	}
+	return q;
+err:
+	dprintk("RPC:      gss_fill_context returning %ld\n", -PTR_ERR(p));
+	return p;
+}
+
+
+struct gss_upcall_msg {
+	atomic_t count;
+	uid_t	uid;
+	struct rpc_pipe_msg msg;
+	struct list_head list;
+	struct gss_auth *auth;
+	struct rpc_wait_queue rpc_waitqueue;
+	wait_queue_head_t waitqueue;
+	struct gss_cl_ctx *ctx;
+};
+
+static void
+gss_release_msg(struct gss_upcall_msg *gss_msg)
+{
+	if (!atomic_dec_and_test(&gss_msg->count))
+		return;
+	BUG_ON(!list_empty(&gss_msg->list));
+	if (gss_msg->ctx != NULL)
+		gss_put_ctx(gss_msg->ctx);
+	kfree(gss_msg);
+}
+
+static struct gss_upcall_msg *
+__gss_find_upcall(struct gss_auth *gss_auth, uid_t uid)
+{
+	struct gss_upcall_msg *pos;
+	list_for_each_entry(pos, &gss_auth->upcalls, list) {
+		if (pos->uid != uid)
+			continue;
+		atomic_inc(&pos->count);
+		dprintk("RPC:      gss_find_upcall found msg %p\n", pos);
+		return pos;
+	}
+	dprintk("RPC:      gss_find_upcall found nothing\n");
+	return NULL;
+}
+
+/* Try to add a upcall to the pipefs queue.
+ * If an upcall owned by our uid already exists, then we return a reference
+ * to that upcall instead of adding the new upcall.
+ */
+static inline struct gss_upcall_msg *
+gss_add_msg(struct gss_auth *gss_auth, struct gss_upcall_msg *gss_msg)
+{
+	struct gss_upcall_msg *old;
+
+	spin_lock(&gss_auth->lock);
+	old = __gss_find_upcall(gss_auth, gss_msg->uid);
+	if (old == NULL) {
+		atomic_inc(&gss_msg->count);
+		list_add(&gss_msg->list, &gss_auth->upcalls);
+	} else
+		gss_msg = old;
+	spin_unlock(&gss_auth->lock);
+	return gss_msg;
+}
+
+static void
+__gss_unhash_msg(struct gss_upcall_msg *gss_msg)
+{
+	if (list_empty(&gss_msg->list))
+		return;
+	list_del_init(&gss_msg->list);
+	rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno);
+	wake_up_all(&gss_msg->waitqueue);
+	atomic_dec(&gss_msg->count);
+}
+
+static void
+gss_unhash_msg(struct gss_upcall_msg *gss_msg)
+{
+	struct gss_auth *gss_auth = gss_msg->auth;
+
+	spin_lock(&gss_auth->lock);
+	__gss_unhash_msg(gss_msg);
+	spin_unlock(&gss_auth->lock);
+}
+
+static void
+gss_upcall_callback(struct rpc_task *task)
+{
+	struct gss_cred *gss_cred = container_of(task->tk_msg.rpc_cred,
+			struct gss_cred, gc_base);
+	struct gss_upcall_msg *gss_msg = gss_cred->gc_upcall;
+
+	BUG_ON(gss_msg == NULL);
+	if (gss_msg->ctx)
+		gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_get_ctx(gss_msg->ctx));
+	else
+		task->tk_status = gss_msg->msg.errno;
+	spin_lock(&gss_msg->auth->lock);
+	gss_cred->gc_upcall = NULL;
+	rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno);
+	spin_unlock(&gss_msg->auth->lock);
+	gss_release_msg(gss_msg);
+}
+
+static inline struct gss_upcall_msg *
+gss_alloc_msg(struct gss_auth *gss_auth, uid_t uid)
+{
+	struct gss_upcall_msg *gss_msg;
+
+	gss_msg = kmalloc(sizeof(*gss_msg), GFP_KERNEL);
+	if (gss_msg != NULL) {
+		memset(gss_msg, 0, sizeof(*gss_msg));
+		INIT_LIST_HEAD(&gss_msg->list);
+		rpc_init_wait_queue(&gss_msg->rpc_waitqueue, "RPCSEC_GSS upcall waitq");
+		init_waitqueue_head(&gss_msg->waitqueue);
+		atomic_set(&gss_msg->count, 1);
+		gss_msg->msg.data = &gss_msg->uid;
+		gss_msg->msg.len = sizeof(gss_msg->uid);
+		gss_msg->uid = uid;
+		gss_msg->auth = gss_auth;
+	}
+	return gss_msg;
+}
+
+static struct gss_upcall_msg *
+gss_setup_upcall(struct rpc_clnt *clnt, struct gss_auth *gss_auth, struct rpc_cred *cred)
+{
+	struct gss_upcall_msg *gss_new, *gss_msg;
+
+	gss_new = gss_alloc_msg(gss_auth, cred->cr_uid);
+	if (gss_new == NULL)
+		return ERR_PTR(-ENOMEM);
+	gss_msg = gss_add_msg(gss_auth, gss_new);
+	if (gss_msg == gss_new) {
+		int res = rpc_queue_upcall(gss_auth->dentry->d_inode, &gss_new->msg);
+		if (res) {
+			gss_unhash_msg(gss_new);
+			gss_msg = ERR_PTR(res);
+		}
+	} else
+		gss_release_msg(gss_new);
+	return gss_msg;
+}
+
+static inline int
+gss_refresh_upcall(struct rpc_task *task)
+{
+	struct rpc_cred *cred = task->tk_msg.rpc_cred;
+	struct gss_auth *gss_auth = container_of(task->tk_client->cl_auth,
+			struct gss_auth, rpc_auth);
+	struct gss_cred *gss_cred = container_of(cred,
+			struct gss_cred, gc_base);
+	struct gss_upcall_msg *gss_msg;
+	int err = 0;
+
+	dprintk("RPC: %4u gss_refresh_upcall for uid %u\n", task->tk_pid, cred->cr_uid);
+	gss_msg = gss_setup_upcall(task->tk_client, gss_auth, cred);
+	if (IS_ERR(gss_msg)) {
+		err = PTR_ERR(gss_msg);
+		goto out;
+	}
+	spin_lock(&gss_auth->lock);
+	if (gss_cred->gc_upcall != NULL)
+		rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL, NULL);
+	else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) {
+		task->tk_timeout = 0;
+		gss_cred->gc_upcall = gss_msg;
+		/* gss_upcall_callback will release the reference to gss_upcall_msg */
+		atomic_inc(&gss_msg->count);
+		rpc_sleep_on(&gss_msg->rpc_waitqueue, task, gss_upcall_callback, NULL);
+	} else
+		err = gss_msg->msg.errno;
+	spin_unlock(&gss_auth->lock);
+	gss_release_msg(gss_msg);
+out:
+	dprintk("RPC: %4u gss_refresh_upcall for uid %u result %d\n", task->tk_pid,
+			cred->cr_uid, err);
+	return err;
+}
+
+static inline int
+gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
+{
+	struct rpc_cred *cred = &gss_cred->gc_base;
+	struct gss_upcall_msg *gss_msg;
+	DEFINE_WAIT(wait);
+	int err = 0;
+
+	dprintk("RPC: gss_upcall for uid %u\n", cred->cr_uid);
+	gss_msg = gss_setup_upcall(gss_auth->client, gss_auth, cred);
+	if (IS_ERR(gss_msg)) {
+		err = PTR_ERR(gss_msg);
+		goto out;
+	}
+	for (;;) {
+		prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_INTERRUPTIBLE);
+		spin_lock(&gss_auth->lock);
+		if (gss_msg->ctx != NULL || gss_msg->msg.errno < 0) {
+			spin_unlock(&gss_auth->lock);
+			break;
+		}
+		spin_unlock(&gss_auth->lock);
+		if (signalled()) {
+			err = -ERESTARTSYS;
+			goto out_intr;
+		}
+		schedule();
+	}
+	if (gss_msg->ctx)
+		gss_cred_set_ctx(cred, gss_get_ctx(gss_msg->ctx));
+	else
+		err = gss_msg->msg.errno;
+out_intr:
+	finish_wait(&gss_msg->waitqueue, &wait);
+	gss_release_msg(gss_msg);
+out:
+	dprintk("RPC: gss_create_upcall for uid %u result %d\n", cred->cr_uid, err);
+	return err;
+}
+
+static ssize_t
+gss_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg,
+		char __user *dst, size_t buflen)
+{
+	char *data = (char *)msg->data + msg->copied;
+	ssize_t mlen = msg->len;
+	ssize_t left;
+
+	if (mlen > buflen)
+		mlen = buflen;
+	left = copy_to_user(dst, data, mlen);
+	if (left < 0) {
+		msg->errno = left;
+		return left;
+	}
+	mlen -= left;
+	msg->copied += mlen;
+	msg->errno = 0;
+	return mlen;
+}
+
+#define MSG_BUF_MAXSIZE 1024
+
+static ssize_t
+gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
+{
+	const void *p, *end;
+	void *buf;
+	struct rpc_clnt *clnt;
+	struct gss_auth *gss_auth;
+	struct rpc_cred *cred;
+	struct gss_upcall_msg *gss_msg;
+	struct gss_cl_ctx *ctx;
+	uid_t uid;
+	int err = -EFBIG;
+
+	if (mlen > MSG_BUF_MAXSIZE)
+		goto out;
+	err = -ENOMEM;
+	buf = kmalloc(mlen, GFP_KERNEL);
+	if (!buf)
+		goto out;
+
+	clnt = RPC_I(filp->f_dentry->d_inode)->private;
+	err = -EFAULT;
+	if (copy_from_user(buf, src, mlen))
+		goto err;
+
+	end = (const void *)((char *)buf + mlen);
+	p = simple_get_bytes(buf, end, &uid, sizeof(uid));
+	if (IS_ERR(p)) {
+		err = PTR_ERR(p);
+		goto err;
+	}
+
+	err = -ENOMEM;
+	ctx = gss_alloc_context();
+	if (ctx == NULL)
+		goto err;
+	err = 0;
+	gss_auth = container_of(clnt->cl_auth, struct gss_auth, rpc_auth);
+	p = gss_fill_context(p, end, ctx, gss_auth->mech);
+	if (IS_ERR(p)) {
+		err = PTR_ERR(p);
+		if (err != -EACCES)
+			goto err_put_ctx;
+	}
+	spin_lock(&gss_auth->lock);
+	gss_msg = __gss_find_upcall(gss_auth, uid);
+	if (gss_msg) {
+		if (err == 0 && gss_msg->ctx == NULL)
+			gss_msg->ctx = gss_get_ctx(ctx);
+		gss_msg->msg.errno = err;
+		__gss_unhash_msg(gss_msg);
+		spin_unlock(&gss_auth->lock);
+		gss_release_msg(gss_msg);
+	} else {
+		struct auth_cred acred = { .uid = uid };
+		spin_unlock(&gss_auth->lock);
+		cred = rpcauth_lookup_credcache(clnt->cl_auth, &acred, 0);
+		if (IS_ERR(cred)) {
+			err = PTR_ERR(cred);
+			goto err_put_ctx;
+		}
+		gss_cred_set_ctx(cred, gss_get_ctx(ctx));
+	}
+	gss_put_ctx(ctx);
+	kfree(buf);
+	dprintk("RPC:      gss_pipe_downcall returning length %Zu\n", mlen);
+	return mlen;
+err_put_ctx:
+	gss_put_ctx(ctx);
+err:
+	kfree(buf);
+out:
+	dprintk("RPC:      gss_pipe_downcall returning %d\n", err);
+	return err;
+}
+
+static void
+gss_pipe_release(struct inode *inode)
+{
+	struct rpc_inode *rpci = RPC_I(inode);
+	struct rpc_clnt *clnt;
+	struct rpc_auth *auth;
+	struct gss_auth *gss_auth;
+
+	clnt = rpci->private;
+	auth = clnt->cl_auth;
+	gss_auth = container_of(auth, struct gss_auth, rpc_auth);
+	spin_lock(&gss_auth->lock);
+	while (!list_empty(&gss_auth->upcalls)) {
+		struct gss_upcall_msg *gss_msg;
+
+		gss_msg = list_entry(gss_auth->upcalls.next,
+				struct gss_upcall_msg, list);
+		gss_msg->msg.errno = -EPIPE;
+		atomic_inc(&gss_msg->count);
+		__gss_unhash_msg(gss_msg);
+		spin_unlock(&gss_auth->lock);
+		gss_release_msg(gss_msg);
+		spin_lock(&gss_auth->lock);
+	}
+	spin_unlock(&gss_auth->lock);
+}
+
+static void
+gss_pipe_destroy_msg(struct rpc_pipe_msg *msg)
+{
+	struct gss_upcall_msg *gss_msg = container_of(msg, struct gss_upcall_msg, msg);
+	static unsigned long ratelimit;
+
+	if (msg->errno < 0) {
+		dprintk("RPC:      gss_pipe_destroy_msg releasing msg %p\n",
+				gss_msg);
+		atomic_inc(&gss_msg->count);
+		gss_unhash_msg(gss_msg);
+		if (msg->errno == -ETIMEDOUT || msg->errno == -EPIPE) {
+			unsigned long now = jiffies;
+			if (time_after(now, ratelimit)) {
+				printk(KERN_WARNING "RPC: AUTH_GSS upcall timed out.\n"
+						    "Please check user daemon is running!\n");
+				ratelimit = now + 15*HZ;
+			}
+		}
+		gss_release_msg(gss_msg);
+	}
+}
+
+/* 
+ * NOTE: we have the opportunity to use different 
+ * parameters based on the input flavor (which must be a pseudoflavor)
+ */
+static struct rpc_auth *
+gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
+{
+	struct gss_auth *gss_auth;
+	struct rpc_auth * auth;
+
+	dprintk("RPC:      creating GSS authenticator for client %p\n",clnt);
+
+	if (!try_module_get(THIS_MODULE))
+		return NULL;
+	if (!(gss_auth = kmalloc(sizeof(*gss_auth), GFP_KERNEL)))
+		goto out_dec;
+	gss_auth->client = clnt;
+	gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor);
+	if (!gss_auth->mech) {
+		printk(KERN_WARNING "%s: Pseudoflavor %d not found!",
+				__FUNCTION__, flavor);
+		goto err_free;
+	}
+	gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor);
+	/* FIXME: Will go away once privacy support is merged in */
+	if (gss_auth->service == RPC_GSS_SVC_PRIVACY)
+		gss_auth->service = RPC_GSS_SVC_INTEGRITY;
+	INIT_LIST_HEAD(&gss_auth->upcalls);
+	spin_lock_init(&gss_auth->lock);
+	auth = &gss_auth->rpc_auth;
+	auth->au_cslack = GSS_CRED_SLACK >> 2;
+	auth->au_rslack = GSS_VERF_SLACK >> 2;
+	auth->au_ops = &authgss_ops;
+	auth->au_flavor = flavor;
+	atomic_set(&auth->au_count, 1);
+
+	if (rpcauth_init_credcache(auth, GSS_CRED_EXPIRE) < 0)
+		goto err_put_mech;
+
+	snprintf(gss_auth->path, sizeof(gss_auth->path), "%s/%s",
+			clnt->cl_pathname,
+			gss_auth->mech->gm_name);
+	gss_auth->dentry = rpc_mkpipe(gss_auth->path, clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
+	if (IS_ERR(gss_auth->dentry))
+		goto err_put_mech;
+
+	return auth;
+err_put_mech:
+	gss_mech_put(gss_auth->mech);
+err_free:
+	kfree(gss_auth);
+out_dec:
+	module_put(THIS_MODULE);
+	return NULL;
+}
+
+static void
+gss_destroy(struct rpc_auth *auth)
+{
+	struct gss_auth *gss_auth;
+
+	dprintk("RPC:      destroying GSS authenticator %p flavor %d\n",
+		auth, auth->au_flavor);
+
+	gss_auth = container_of(auth, struct gss_auth, rpc_auth);
+	rpc_unlink(gss_auth->path);
+	gss_mech_put(gss_auth->mech);
+
+	rpcauth_free_credcache(auth);
+	kfree(gss_auth);
+	module_put(THIS_MODULE);
+}
+
+/* gss_destroy_cred (and gss_destroy_ctx) are used to clean up after failure
+ * to create a new cred or context, so they check that things have been
+ * allocated before freeing them. */
+static void
+gss_destroy_ctx(struct gss_cl_ctx *ctx)
+{
+	dprintk("RPC:      gss_destroy_ctx\n");
+
+	if (ctx->gc_gss_ctx)
+		gss_delete_sec_context(&ctx->gc_gss_ctx);
+
+	kfree(ctx->gc_wire_ctx.data);
+	kfree(ctx);
+}
+
+static void
+gss_destroy_cred(struct rpc_cred *rc)
+{
+	struct gss_cred *cred = container_of(rc, struct gss_cred, gc_base);
+
+	dprintk("RPC:      gss_destroy_cred \n");
+
+	if (cred->gc_ctx)
+		gss_put_ctx(cred->gc_ctx);
+	kfree(cred);
+}
+
+/*
+ * Lookup RPCSEC_GSS cred for the current process
+ */
+static struct rpc_cred *
+gss_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int taskflags)
+{
+	return rpcauth_lookup_credcache(auth, acred, taskflags);
+}
+
+static struct rpc_cred *
+gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int taskflags)
+{
+	struct gss_auth *gss_auth = container_of(auth, struct gss_auth, rpc_auth);
+	struct gss_cred	*cred = NULL;
+	int err = -ENOMEM;
+
+	dprintk("RPC:      gss_create_cred for uid %d, flavor %d\n",
+		acred->uid, auth->au_flavor);
+
+	if (!(cred = kmalloc(sizeof(*cred), GFP_KERNEL)))
+		goto out_err;
+
+	memset(cred, 0, sizeof(*cred));
+	atomic_set(&cred->gc_count, 1);
+	cred->gc_uid = acred->uid;
+	/*
+	 * Note: in order to force a call to call_refresh(), we deliberately
+	 * fail to flag the credential as RPCAUTH_CRED_UPTODATE.
+	 */
+	cred->gc_flags = 0;
+	cred->gc_base.cr_ops = &gss_credops;
+	cred->gc_service = gss_auth->service;
+	err = gss_create_upcall(gss_auth, cred);
+	if (err < 0)
+		goto out_err;
+
+	return &cred->gc_base;
+
+out_err:
+	dprintk("RPC:      gss_create_cred failed with error %d\n", err);
+	if (cred) gss_destroy_cred(&cred->gc_base);
+	return ERR_PTR(err);
+}
+
+static int
+gss_match(struct auth_cred *acred, struct rpc_cred *rc, int taskflags)
+{
+	struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base);
+
+	/* Don't match with creds that have expired. */
+	if (gss_cred->gc_ctx && time_after(jiffies, gss_cred->gc_ctx->gc_expiry))
+		return 0;
+	return (rc->cr_uid == acred->uid);
+}
+
+/*
+* Marshal credentials.
+* Maybe we should keep a cached credential for performance reasons.
+*/
+static u32 *
+gss_marshal(struct rpc_task *task, u32 *p)
+{
+	struct rpc_cred *cred = task->tk_msg.rpc_cred;
+	struct gss_cred	*gss_cred = container_of(cred, struct gss_cred,
+						 gc_base);
+	struct gss_cl_ctx	*ctx = gss_cred_get_ctx(cred);
+	u32		*cred_len;
+	struct rpc_rqst *req = task->tk_rqstp;
+	u32             maj_stat = 0;
+	struct xdr_netobj mic;
+	struct kvec	iov;
+	struct xdr_buf	verf_buf;
+
+	dprintk("RPC: %4u gss_marshal\n", task->tk_pid);
+
+	*p++ = htonl(RPC_AUTH_GSS);
+	cred_len = p++;
+
+	spin_lock(&ctx->gc_seq_lock);
+	req->rq_seqno = ctx->gc_seq++;
+	spin_unlock(&ctx->gc_seq_lock);
+
+	*p++ = htonl((u32) RPC_GSS_VERSION);
+	*p++ = htonl((u32) ctx->gc_proc);
+	*p++ = htonl((u32) req->rq_seqno);
+	*p++ = htonl((u32) gss_cred->gc_service);
+	p = xdr_encode_netobj(p, &ctx->gc_wire_ctx);
+	*cred_len = htonl((p - (cred_len + 1)) << 2);
+
+	/* We compute the checksum for the verifier over the xdr-encoded bytes
+	 * starting with the xid and ending at the end of the credential: */
+	iov.iov_base = req->rq_snd_buf.head[0].iov_base;
+	if (task->tk_client->cl_xprt->stream)
+		/* See clnt.c:call_header() */
+		iov.iov_base += 4;
+	iov.iov_len = (u8 *)p - (u8 *)iov.iov_base;
+	xdr_buf_from_iov(&iov, &verf_buf);
+
+	/* set verifier flavor*/
+	*p++ = htonl(RPC_AUTH_GSS);
+
+	mic.data = (u8 *)(p + 1);
+	maj_stat = gss_get_mic(ctx->gc_gss_ctx,
+			       GSS_C_QOP_DEFAULT, 
+			       &verf_buf, &mic);
+	if (maj_stat == GSS_S_CONTEXT_EXPIRED) {
+		cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
+	} else if (maj_stat != 0) {
+		printk("gss_marshal: gss_get_mic FAILED (%d)\n", maj_stat);
+		goto out_put_ctx;
+	}
+	p = xdr_encode_opaque(p, NULL, mic.len);
+	gss_put_ctx(ctx);
+	return p;
+out_put_ctx:
+	gss_put_ctx(ctx);
+	return NULL;
+}
+
+/*
+* Refresh credentials. XXX - finish
+*/
+static int
+gss_refresh(struct rpc_task *task)
+{
+
+	if (!gss_cred_is_uptodate_ctx(task->tk_msg.rpc_cred))
+		return gss_refresh_upcall(task);
+	return 0;
+}
+
+static u32 *
+gss_validate(struct rpc_task *task, u32 *p)
+{
+	struct rpc_cred *cred = task->tk_msg.rpc_cred;
+	struct gss_cred	*gss_cred = container_of(cred, struct gss_cred,
+						gc_base);
+	struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
+	u32		seq, qop_state;
+	struct kvec	iov;
+	struct xdr_buf	verf_buf;
+	struct xdr_netobj mic;
+	u32		flav,len;
+	u32		maj_stat;
+
+	dprintk("RPC: %4u gss_validate\n", task->tk_pid);
+
+	flav = ntohl(*p++);
+	if ((len = ntohl(*p++)) > RPC_MAX_AUTH_SIZE)
+                goto out_bad;
+	if (flav != RPC_AUTH_GSS)
+		goto out_bad;
+	seq = htonl(task->tk_rqstp->rq_seqno);
+	iov.iov_base = &seq;
+	iov.iov_len = sizeof(seq);
+	xdr_buf_from_iov(&iov, &verf_buf);
+	mic.data = (u8 *)p;
+	mic.len = len;
+
+	maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic, &qop_state);
+	if (maj_stat == GSS_S_CONTEXT_EXPIRED)
+		cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
+	if (maj_stat)
+		goto out_bad;
+       switch (gss_cred->gc_service) {
+       case RPC_GSS_SVC_NONE:
+	       /* verifier data, flavor, length: */
+	       task->tk_auth->au_rslack = XDR_QUADLEN(len) + 2;
+	       break;
+       case RPC_GSS_SVC_INTEGRITY:
+	       /* verifier data, flavor, length, length, sequence number: */
+	       task->tk_auth->au_rslack = XDR_QUADLEN(len) + 4;
+	       break;
+       case RPC_GSS_SVC_PRIVACY:
+	       goto out_bad;
+       }
+	gss_put_ctx(ctx);
+	dprintk("RPC: %4u GSS gss_validate: gss_verify_mic succeeded.\n",
+			task->tk_pid);
+	return p + XDR_QUADLEN(len);
+out_bad:
+	gss_put_ctx(ctx);
+	dprintk("RPC: %4u gss_validate failed.\n", task->tk_pid);
+	return NULL;
+}
+
+static inline int
+gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
+		kxdrproc_t encode, struct rpc_rqst *rqstp, u32 *p, void *obj)
+{
+	struct xdr_buf	*snd_buf = &rqstp->rq_snd_buf;
+	struct xdr_buf	integ_buf;
+	u32             *integ_len = NULL;
+	struct xdr_netobj mic;
+	u32		offset, *q;
+	struct kvec	*iov;
+	u32             maj_stat = 0;
+	int		status = -EIO;
+
+	integ_len = p++;
+	offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
+	*p++ = htonl(rqstp->rq_seqno);
+
+	status = encode(rqstp, p, obj);
+	if (status)
+		return status;
+
+	if (xdr_buf_subsegment(snd_buf, &integ_buf,
+				offset, snd_buf->len - offset))
+		return status;
+	*integ_len = htonl(integ_buf.len);
+
+	/* guess whether we're in the head or the tail: */
+	if (snd_buf->page_len || snd_buf->tail[0].iov_len) 
+		iov = snd_buf->tail;
+	else
+		iov = snd_buf->head;
+	p = iov->iov_base + iov->iov_len;
+	mic.data = (u8 *)(p + 1);
+
+	maj_stat = gss_get_mic(ctx->gc_gss_ctx,
+			GSS_C_QOP_DEFAULT, &integ_buf, &mic);
+	status = -EIO; /* XXX? */
+	if (maj_stat == GSS_S_CONTEXT_EXPIRED)
+		cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
+	else if (maj_stat)
+		return status;
+	q = xdr_encode_opaque(p, NULL, mic.len);
+
+	offset = (u8 *)q - (u8 *)p;
+	iov->iov_len += offset;
+	snd_buf->len += offset;
+	return 0;
+}
+
+static int
+gss_wrap_req(struct rpc_task *task,
+	     kxdrproc_t encode, void *rqstp, u32 *p, void *obj)
+{
+	struct rpc_cred *cred = task->tk_msg.rpc_cred;
+	struct gss_cred	*gss_cred = container_of(cred, struct gss_cred,
+			gc_base);
+	struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
+	int             status = -EIO;
+
+	dprintk("RPC: %4u gss_wrap_req\n", task->tk_pid);
+	if (ctx->gc_proc != RPC_GSS_PROC_DATA) {
+		/* The spec seems a little ambiguous here, but I think that not
+		 * wrapping context destruction requests makes the most sense.
+		 */
+		status = encode(rqstp, p, obj);
+		goto out;
+	}
+	switch (gss_cred->gc_service) {
+		case RPC_GSS_SVC_NONE:
+			status = encode(rqstp, p, obj);
+			break;
+		case RPC_GSS_SVC_INTEGRITY:
+			status = gss_wrap_req_integ(cred, ctx, encode,
+								rqstp, p, obj);
+			break;
+       		case RPC_GSS_SVC_PRIVACY:
+			break;
+	}
+out:
+	gss_put_ctx(ctx);
+	dprintk("RPC: %4u gss_wrap_req returning %d\n", task->tk_pid, status);
+	return status;
+}
+
+static inline int
+gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
+		struct rpc_rqst *rqstp, u32 **p)
+{
+	struct xdr_buf	*rcv_buf = &rqstp->rq_rcv_buf;
+	struct xdr_buf integ_buf;
+	struct xdr_netobj mic;
+	u32 data_offset, mic_offset;
+	u32 integ_len;
+	u32 maj_stat;
+	int status = -EIO;
+
+	integ_len = ntohl(*(*p)++);
+	if (integ_len & 3)
+		return status;
+	data_offset = (u8 *)(*p) - (u8 *)rcv_buf->head[0].iov_base;
+	mic_offset = integ_len + data_offset;
+	if (mic_offset > rcv_buf->len)
+		return status;
+	if (ntohl(*(*p)++) != rqstp->rq_seqno)
+		return status;
+
+	if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset,
+				mic_offset - data_offset))
+		return status;
+
+	if (xdr_buf_read_netobj(rcv_buf, &mic, mic_offset))
+		return status;
+
+	maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf,
+			&mic, NULL);
+	if (maj_stat == GSS_S_CONTEXT_EXPIRED)
+		cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
+	if (maj_stat != GSS_S_COMPLETE)
+		return status;
+	return 0;
+}
+
+static int
+gss_unwrap_resp(struct rpc_task *task,
+		kxdrproc_t decode, void *rqstp, u32 *p, void *obj)
+{
+	struct rpc_cred *cred = task->tk_msg.rpc_cred;
+	struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
+			gc_base);
+	struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
+	int             status = -EIO;
+
+	if (ctx->gc_proc != RPC_GSS_PROC_DATA)
+		goto out_decode;
+	switch (gss_cred->gc_service) {
+		case RPC_GSS_SVC_NONE:
+			break;
+		case RPC_GSS_SVC_INTEGRITY:
+			status = gss_unwrap_resp_integ(cred, ctx, rqstp, &p);
+			if (status)
+				goto out;
+			break;
+       		case RPC_GSS_SVC_PRIVACY:
+			break;
+	}
+out_decode:
+	status = decode(rqstp, p, obj);
+out:
+	gss_put_ctx(ctx);
+	dprintk("RPC: %4u gss_unwrap_resp returning %d\n", task->tk_pid,
+			status);
+	return status;
+}
+  
+static struct rpc_authops authgss_ops = {
+	.owner		= THIS_MODULE,
+	.au_flavor	= RPC_AUTH_GSS,
+#ifdef RPC_DEBUG
+	.au_name	= "RPCSEC_GSS",
+#endif
+	.create		= gss_create,
+	.destroy	= gss_destroy,
+	.lookup_cred	= gss_lookup_cred,
+	.crcreate	= gss_create_cred
+};
+
+static struct rpc_credops gss_credops = {
+	.cr_name	= "AUTH_GSS",
+	.crdestroy	= gss_destroy_cred,
+	.crmatch	= gss_match,
+	.crmarshal	= gss_marshal,
+	.crrefresh	= gss_refresh,
+	.crvalidate	= gss_validate,
+	.crwrap_req	= gss_wrap_req,
+	.crunwrap_resp	= gss_unwrap_resp,
+};
+
+static struct rpc_pipe_ops gss_upcall_ops = {
+	.upcall		= gss_pipe_upcall,
+	.downcall	= gss_pipe_downcall,
+	.destroy_msg	= gss_pipe_destroy_msg,
+	.release_pipe	= gss_pipe_release,
+};
+
+/*
+ * Initialize RPCSEC_GSS module
+ */
+static int __init init_rpcsec_gss(void)
+{
+	int err = 0;
+
+	err = rpcauth_register(&authgss_ops);
+	if (err)
+		goto out;
+	err = gss_svc_init();
+	if (err)
+		goto out_unregister;
+	return 0;
+out_unregister:
+	rpcauth_unregister(&authgss_ops);
+out:
+	return err;
+}
+
+static void __exit exit_rpcsec_gss(void)
+{
+	gss_svc_shutdown();
+	rpcauth_unregister(&authgss_ops);
+}
+
+MODULE_LICENSE("GPL");
+module_init(init_rpcsec_gss)
+module_exit(exit_rpcsec_gss)
diff --git a/net/sunrpc/auth_gss/gss_generic_token.c b/net/sunrpc/auth_gss/gss_generic_token.c
new file mode 100644
index 00000000000..826df44e7fc
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_generic_token.c
@@ -0,0 +1,235 @@
+/*
+ *  linux/net/sunrpc/gss_generic_token.c
+ *
+ *  Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/generic/util_token.c
+ *
+ *  Copyright (c) 2000 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Andy Adamson   <andros@umich.edu>
+ */
+
+/*
+ * Copyright 1993 by OpenVision Technologies, Inc.
+ * 
+ * Permission to use, copy, modify, distribute, and sell this software
+ * and its documentation for any purpose is hereby granted without fee,
+ * provided that the above copyright notice appears in all copies and
+ * that both that copyright notice and this permission notice appear in
+ * supporting documentation, and that the name of OpenVision not be used
+ * in advertising or publicity pertaining to distribution of the software
+ * without specific, written prior permission. OpenVision makes no
+ * representations about the suitability of this software for any
+ * purpose.  It is provided "as is" without express or implied warranty.
+ * 
+ * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
+ * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+ * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/gss_asn1.h>
+
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY        RPCDBG_AUTH
+#endif
+
+
+/* TWRITE_STR from gssapiP_generic.h */
+#define TWRITE_STR(ptr, str, len) \
+	memcpy((ptr), (char *) (str), (len)); \
+	(ptr) += (len);
+
+/* XXXX this code currently makes the assumption that a mech oid will
+   never be longer than 127 bytes.  This assumption is not inherent in
+   the interfaces, so the code can be fixed if the OSI namespace
+   balloons unexpectedly. */
+
+/* Each token looks like this:
+
+0x60				tag for APPLICATION 0, SEQUENCE
+					(constructed, definite-length)
+	<length>		possible multiple bytes, need to parse/generate
+	0x06			tag for OBJECT IDENTIFIER
+		<moid_length>	compile-time constant string (assume 1 byte)
+		<moid_bytes>	compile-time constant string
+	<inner_bytes>		the ANY containing the application token
+					bytes 0,1 are the token type
+					bytes 2,n are the token data
+
+For the purposes of this abstraction, the token "header" consists of
+the sequence tag and length octets, the mech OID DER encoding, and the
+first two inner bytes, which indicate the token type.  The token
+"body" consists of everything else.
+
+*/
+
+static int
+der_length_size( int length)
+{
+	if (length < (1<<7))
+		return(1);
+	else if (length < (1<<8))
+		return(2);
+#if (SIZEOF_INT == 2)
+	else
+		return(3);
+#else
+	else if (length < (1<<16))
+		return(3);
+	else if (length < (1<<24))
+		return(4);
+	else
+		return(5);
+#endif
+}
+
+static void
+der_write_length(unsigned char **buf, int length)
+{
+	if (length < (1<<7)) {
+		*(*buf)++ = (unsigned char) length;
+	} else {
+		*(*buf)++ = (unsigned char) (der_length_size(length)+127);
+#if (SIZEOF_INT > 2)
+		if (length >= (1<<24))
+			*(*buf)++ = (unsigned char) (length>>24);
+		if (length >= (1<<16))
+			*(*buf)++ = (unsigned char) ((length>>16)&0xff);
+#endif
+		if (length >= (1<<8))
+			*(*buf)++ = (unsigned char) ((length>>8)&0xff);
+		*(*buf)++ = (unsigned char) (length&0xff);
+	}
+}
+
+/* returns decoded length, or < 0 on failure.  Advances buf and
+   decrements bufsize */
+
+static int
+der_read_length(unsigned char **buf, int *bufsize)
+{
+	unsigned char sf;
+	int ret;
+
+	if (*bufsize < 1)
+		return(-1);
+	sf = *(*buf)++;
+	(*bufsize)--;
+	if (sf & 0x80) {
+		if ((sf &= 0x7f) > ((*bufsize)-1))
+			return(-1);
+		if (sf > SIZEOF_INT)
+			return (-1);
+		ret = 0;
+		for (; sf; sf--) {
+			ret = (ret<<8) + (*(*buf)++);
+			(*bufsize)--;
+		}
+	} else {
+		ret = sf;
+	}
+
+	return(ret);
+}
+
+/* returns the length of a token, given the mech oid and the body size */
+
+int
+g_token_size(struct xdr_netobj *mech, unsigned int body_size)
+{
+	/* set body_size to sequence contents size */
+	body_size += 4 + (int) mech->len;         /* NEED overflow check */
+	return(1 + der_length_size(body_size) + body_size);
+}
+
+EXPORT_SYMBOL(g_token_size);
+
+/* fills in a buffer with the token header.  The buffer is assumed to
+   be the right size.  buf is advanced past the token header */
+
+void
+g_make_token_header(struct xdr_netobj *mech, int body_size, unsigned char **buf)
+{
+	*(*buf)++ = 0x60;
+	der_write_length(buf, 4 + mech->len + body_size);
+	*(*buf)++ = 0x06;
+	*(*buf)++ = (unsigned char) mech->len;
+	TWRITE_STR(*buf, mech->data, ((int) mech->len));
+}
+
+EXPORT_SYMBOL(g_make_token_header);
+
+/*
+ * Given a buffer containing a token, reads and verifies the token,
+ * leaving buf advanced past the token header, and setting body_size
+ * to the number of remaining bytes.  Returns 0 on success,
+ * G_BAD_TOK_HEADER for a variety of errors, and G_WRONG_MECH if the
+ * mechanism in the token does not match the mech argument.  buf and
+ * *body_size are left unmodified on error.
+ */
+u32
+g_verify_token_header(struct xdr_netobj *mech, int *body_size,
+		      unsigned char **buf_in, int toksize)
+{
+	unsigned char *buf = *buf_in;
+	int seqsize;
+	struct xdr_netobj toid;
+	int ret = 0;
+
+	if ((toksize-=1) < 0)
+		return(G_BAD_TOK_HEADER);
+	if (*buf++ != 0x60)
+		return(G_BAD_TOK_HEADER);
+
+	if ((seqsize = der_read_length(&buf, &toksize)) < 0)
+		return(G_BAD_TOK_HEADER);
+
+	if (seqsize != toksize)
+		return(G_BAD_TOK_HEADER);
+
+	if ((toksize-=1) < 0)
+		return(G_BAD_TOK_HEADER);
+	if (*buf++ != 0x06)
+		return(G_BAD_TOK_HEADER);
+ 
+	if ((toksize-=1) < 0)
+		return(G_BAD_TOK_HEADER);
+	toid.len = *buf++;
+
+	if ((toksize-=toid.len) < 0)
+		return(G_BAD_TOK_HEADER);
+	toid.data = buf;
+	buf+=toid.len;
+
+	if (! g_OID_equal(&toid, mech)) 
+		ret = G_WRONG_MECH;
+ 
+   /* G_WRONG_MECH is not returned immediately because it's more important
+      to return G_BAD_TOK_HEADER if the token header is in fact bad */
+
+	if ((toksize-=2) < 0)
+		return(G_BAD_TOK_HEADER);
+
+	if (ret)
+		return(ret);
+
+	if (!ret) {
+		*buf_in = buf;
+		*body_size = toksize;
+	}
+
+	return(ret);
+}
+
+EXPORT_SYMBOL(g_verify_token_header);
+
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
new file mode 100644
index 00000000000..24c21f2a33a
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -0,0 +1,209 @@
+/*
+ *  linux/net/sunrpc/gss_krb5_crypto.c
+ *
+ *  Copyright (c) 2000 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Andy Adamson   <andros@umich.edu>
+ *  Bruce Fields   <bfields@umich.edu>
+ */
+
+/*
+ * Copyright (C) 1998 by the FundsXpress, INC.
+ *
+ * All rights reserved.
+ *
+ * Export of this software from the United States of America may require
+ * a specific license from the United States Government.  It is the
+ * responsibility of any person or organization contemplating export to
+ * obtain such a license before exporting.
+ *
+ * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
+ * distribute this software and its documentation for any purpose and
+ * without fee is hereby granted, provided that the above copyright
+ * notice appear in all copies and that both that copyright notice and
+ * this permission notice appear in supporting documentation, and that
+ * the name of FundsXpress. not be used in advertising or publicity pertaining
+ * to distribution of the software without specific, written prior
+ * permission.  FundsXpress makes no representations about the suitability of
+ * this software for any purpose.  It is provided "as is" without express
+ * or implied warranty.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <asm/scatterlist.h>
+#include <linux/crypto.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/sunrpc/gss_krb5.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY        RPCDBG_AUTH
+#endif
+
+u32
+krb5_encrypt(
+	struct crypto_tfm *tfm,
+	void * iv,
+	void * in,
+	void * out,
+	int length)
+{
+	u32 ret = -EINVAL;
+        struct scatterlist sg[1];
+	u8 local_iv[16] = {0};
+
+	dprintk("RPC:      krb5_encrypt: input data:\n");
+	print_hexl((u32 *)in, length, 0);
+
+	if (length % crypto_tfm_alg_blocksize(tfm) != 0)
+		goto out;
+
+	if (crypto_tfm_alg_ivsize(tfm) > 16) {
+		dprintk("RPC:      gss_k5encrypt: tfm iv size to large %d\n",
+		         crypto_tfm_alg_ivsize(tfm));
+		goto out;
+	}
+
+	if (iv)
+		memcpy(local_iv, iv, crypto_tfm_alg_ivsize(tfm));
+
+	memcpy(out, in, length);
+	sg[0].page = virt_to_page(out);
+	sg[0].offset = offset_in_page(out);
+	sg[0].length = length;
+
+	ret = crypto_cipher_encrypt_iv(tfm, sg, sg, length, local_iv);
+
+	dprintk("RPC:      krb5_encrypt: output data:\n");
+	print_hexl((u32 *)out, length, 0);
+out:
+	dprintk("RPC:      krb5_encrypt returns %d\n",ret);
+	return(ret);
+}
+
+EXPORT_SYMBOL(krb5_encrypt);
+
+u32
+krb5_decrypt(
+     struct crypto_tfm *tfm,
+     void * iv,
+     void * in,
+     void * out,
+     int length)
+{
+	u32 ret = -EINVAL;
+	struct scatterlist sg[1];
+	u8 local_iv[16] = {0};
+
+	dprintk("RPC:      krb5_decrypt: input data:\n");
+	print_hexl((u32 *)in, length, 0);
+
+	if (length % crypto_tfm_alg_blocksize(tfm) != 0)
+		goto out;
+
+	if (crypto_tfm_alg_ivsize(tfm) > 16) {
+		dprintk("RPC:      gss_k5decrypt: tfm iv size to large %d\n",
+			crypto_tfm_alg_ivsize(tfm));
+		goto out;
+	}
+	if (iv)
+		memcpy(local_iv,iv, crypto_tfm_alg_ivsize(tfm));
+
+	memcpy(out, in, length);
+	sg[0].page = virt_to_page(out);
+	sg[0].offset = offset_in_page(out);
+	sg[0].length = length;
+
+	ret = crypto_cipher_decrypt_iv(tfm, sg, sg, length, local_iv);
+
+	dprintk("RPC:      krb5_decrypt: output_data:\n");
+	print_hexl((u32 *)out, length, 0);
+out:
+	dprintk("RPC:      gss_k5decrypt returns %d\n",ret);
+	return(ret);
+}
+
+EXPORT_SYMBOL(krb5_decrypt);
+
+static void
+buf_to_sg(struct scatterlist *sg, char *ptr, int len) {
+	sg->page = virt_to_page(ptr);
+	sg->offset = offset_in_page(ptr);
+	sg->length = len;
+}
+
+/* checksum the plaintext data and hdrlen bytes of the token header */
+s32
+make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body,
+		   struct xdr_netobj *cksum)
+{
+	char                            *cksumname;
+	struct crypto_tfm               *tfm = NULL; /* XXX add to ctx? */
+	struct scatterlist              sg[1];
+	u32                             code = GSS_S_FAILURE;
+	int				len, thislen, offset;
+	int				i;
+
+	switch (cksumtype) {
+		case CKSUMTYPE_RSA_MD5:
+			cksumname = "md5";
+			break;
+		default:
+			dprintk("RPC:      krb5_make_checksum:"
+				" unsupported checksum %d", cksumtype);
+			goto out;
+	}
+	if (!(tfm = crypto_alloc_tfm(cksumname, 0)))
+		goto out;
+	cksum->len = crypto_tfm_alg_digestsize(tfm);
+	if ((cksum->data = kmalloc(cksum->len, GFP_KERNEL)) == NULL)
+		goto out;
+
+	crypto_digest_init(tfm);
+	buf_to_sg(sg, header, hdrlen);
+	crypto_digest_update(tfm, sg, 1);
+	if (body->head[0].iov_len) {
+		buf_to_sg(sg, body->head[0].iov_base, body->head[0].iov_len);
+		crypto_digest_update(tfm, sg, 1);
+	}
+
+	len = body->page_len;
+	if (len != 0) {
+		offset = body->page_base & (PAGE_CACHE_SIZE - 1);
+		i = body->page_base >> PAGE_CACHE_SHIFT;
+		thislen = PAGE_CACHE_SIZE - offset;
+		do {
+			if (thislen > len)
+				thislen = len;
+			sg->page = body->pages[i];
+			sg->offset = offset;
+			sg->length = thislen;
+			kmap(sg->page); /* XXX kmap_atomic? */
+			crypto_digest_update(tfm, sg, 1);
+			kunmap(sg->page);
+			len -= thislen;
+			i++;
+			offset = 0;
+			thislen = PAGE_CACHE_SIZE;
+		} while(len != 0);
+	}
+	if (body->tail[0].iov_len) {
+		buf_to_sg(sg, body->tail[0].iov_base, body->tail[0].iov_len);
+		crypto_digest_update(tfm, sg, 1);
+	}
+	crypto_digest_final(tfm, cksum->data);
+	code = 0;
+out:
+	if (tfm)
+		crypto_free_tfm(tfm);
+	return code;
+}
+
+EXPORT_SYMBOL(make_checksum);
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
new file mode 100644
index 00000000000..cf726510df8
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -0,0 +1,275 @@
+/*
+ *  linux/net/sunrpc/gss_krb5_mech.c
+ *
+ *  Copyright (c) 2001 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Andy Adamson <andros@umich.edu>
+ *  J. Bruce Fields <bfields@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/sunrpc/auth.h>
+#include <linux/in.h>
+#include <linux/sunrpc/gss_krb5.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/crypto.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY	RPCDBG_AUTH
+#endif
+
+static const void *
+simple_get_bytes(const void *p, const void *end, void *res, int len)
+{
+	const void *q = (const void *)((const char *)p + len);
+	if (unlikely(q > end || q < p))
+		return ERR_PTR(-EFAULT);
+	memcpy(res, p, len);
+	return q;
+}
+
+static const void *
+simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
+{
+	const void *q;
+	unsigned int len;
+
+	p = simple_get_bytes(p, end, &len, sizeof(len));
+	if (IS_ERR(p))
+		return p;
+	q = (const void *)((const char *)p + len);
+	if (unlikely(q > end || q < p))
+		return ERR_PTR(-EFAULT);
+	res->data = kmalloc(len, GFP_KERNEL);
+	if (unlikely(res->data == NULL))
+		return ERR_PTR(-ENOMEM);
+	memcpy(res->data, p, len);
+	res->len = len;
+	return q;
+}
+
+static inline const void *
+get_key(const void *p, const void *end, struct crypto_tfm **res)
+{
+	struct xdr_netobj	key;
+	int			alg, alg_mode;
+	char			*alg_name;
+
+	p = simple_get_bytes(p, end, &alg, sizeof(alg));
+	if (IS_ERR(p))
+		goto out_err;
+	p = simple_get_netobj(p, end, &key);
+	if (IS_ERR(p))
+		goto out_err;
+
+	switch (alg) {
+		case ENCTYPE_DES_CBC_RAW:
+			alg_name = "des";
+			alg_mode = CRYPTO_TFM_MODE_CBC;
+			break;
+		default:
+			dprintk("RPC:      get_key: unsupported algorithm %d\n", alg);
+			goto out_err_free_key;
+	}
+	if (!(*res = crypto_alloc_tfm(alg_name, alg_mode)))
+		goto out_err_free_key;
+	if (crypto_cipher_setkey(*res, key.data, key.len))
+		goto out_err_free_tfm;
+
+	kfree(key.data);
+	return p;
+
+out_err_free_tfm:
+	crypto_free_tfm(*res);
+out_err_free_key:
+	kfree(key.data);
+	p = ERR_PTR(-EINVAL);
+out_err:
+	return p;
+}
+
+static int
+gss_import_sec_context_kerberos(const void *p,
+				size_t len,
+				struct gss_ctx *ctx_id)
+{
+	const void *end = (const void *)((const char *)p + len);
+	struct	krb5_ctx *ctx;
+
+	if (!(ctx = kmalloc(sizeof(*ctx), GFP_KERNEL)))
+		goto out_err;
+	memset(ctx, 0, sizeof(*ctx));
+
+	p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate));
+	if (IS_ERR(p))
+		goto out_err_free_ctx;
+	p = simple_get_bytes(p, end, &ctx->seed_init, sizeof(ctx->seed_init));
+	if (IS_ERR(p))
+		goto out_err_free_ctx;
+	p = simple_get_bytes(p, end, ctx->seed, sizeof(ctx->seed));
+	if (IS_ERR(p))
+		goto out_err_free_ctx;
+	p = simple_get_bytes(p, end, &ctx->signalg, sizeof(ctx->signalg));
+	if (IS_ERR(p))
+		goto out_err_free_ctx;
+	p = simple_get_bytes(p, end, &ctx->sealalg, sizeof(ctx->sealalg));
+	if (IS_ERR(p))
+		goto out_err_free_ctx;
+	p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
+	if (IS_ERR(p))
+		goto out_err_free_ctx;
+	p = simple_get_bytes(p, end, &ctx->seq_send, sizeof(ctx->seq_send));
+	if (IS_ERR(p))
+		goto out_err_free_ctx;
+	p = simple_get_netobj(p, end, &ctx->mech_used);
+	if (IS_ERR(p))
+		goto out_err_free_ctx;
+	p = get_key(p, end, &ctx->enc);
+	if (IS_ERR(p))
+		goto out_err_free_mech;
+	p = get_key(p, end, &ctx->seq);
+	if (IS_ERR(p))
+		goto out_err_free_key1;
+	if (p != end) {
+		p = ERR_PTR(-EFAULT);
+		goto out_err_free_key2;
+	}
+
+	ctx_id->internal_ctx_id = ctx;
+	dprintk("RPC:      Succesfully imported new context.\n");
+	return 0;
+
+out_err_free_key2:
+	crypto_free_tfm(ctx->seq);
+out_err_free_key1:
+	crypto_free_tfm(ctx->enc);
+out_err_free_mech:
+	kfree(ctx->mech_used.data);
+out_err_free_ctx:
+	kfree(ctx);
+out_err:
+	return PTR_ERR(p);
+}
+
+static void
+gss_delete_sec_context_kerberos(void *internal_ctx) {
+	struct krb5_ctx *kctx = internal_ctx;
+
+	if (kctx->seq)
+		crypto_free_tfm(kctx->seq);
+	if (kctx->enc)
+		crypto_free_tfm(kctx->enc);
+	if (kctx->mech_used.data)
+		kfree(kctx->mech_used.data);
+	kfree(kctx);
+}
+
+static u32
+gss_verify_mic_kerberos(struct gss_ctx		*ctx,
+			struct xdr_buf		*message,
+			struct xdr_netobj	*mic_token,
+			u32			*qstate) {
+	u32 maj_stat = 0;
+	int qop_state;
+	struct krb5_ctx *kctx = ctx->internal_ctx_id;
+
+	maj_stat = krb5_read_token(kctx, mic_token, message, &qop_state,
+				   KG_TOK_MIC_MSG);
+	if (!maj_stat && qop_state)
+	    *qstate = qop_state;
+
+	dprintk("RPC:      gss_verify_mic_kerberos returning %d\n", maj_stat);
+	return maj_stat;
+}
+
+static u32
+gss_get_mic_kerberos(struct gss_ctx	*ctx,
+		     u32		qop,
+		     struct xdr_buf 	*message,
+		     struct xdr_netobj	*mic_token) {
+	u32 err = 0;
+	struct krb5_ctx *kctx = ctx->internal_ctx_id;
+
+	err = krb5_make_token(kctx, qop, message, mic_token, KG_TOK_MIC_MSG);
+
+	dprintk("RPC:      gss_get_mic_kerberos returning %d\n",err);
+
+	return err;
+}
+
+static struct gss_api_ops gss_kerberos_ops = {
+	.gss_import_sec_context	= gss_import_sec_context_kerberos,
+	.gss_get_mic		= gss_get_mic_kerberos,
+	.gss_verify_mic		= gss_verify_mic_kerberos,
+	.gss_delete_sec_context	= gss_delete_sec_context_kerberos,
+};
+
+static struct pf_desc gss_kerberos_pfs[] = {
+	[0] = {
+		.pseudoflavor = RPC_AUTH_GSS_KRB5,
+		.service = RPC_GSS_SVC_NONE,
+		.name = "krb5",
+	},
+	[1] = {
+		.pseudoflavor = RPC_AUTH_GSS_KRB5I,
+		.service = RPC_GSS_SVC_INTEGRITY,
+		.name = "krb5i",
+	},
+};
+
+static struct gss_api_mech gss_kerberos_mech = {
+	.gm_name	= "krb5",
+	.gm_owner	= THIS_MODULE,
+	.gm_ops		= &gss_kerberos_ops,
+	.gm_pf_num	= ARRAY_SIZE(gss_kerberos_pfs),
+	.gm_pfs		= gss_kerberos_pfs,
+};
+
+static int __init init_kerberos_module(void)
+{
+	int status;
+
+	status = gss_mech_register(&gss_kerberos_mech);
+	if (status)
+		printk("Failed to register kerberos gss mechanism!\n");
+	return status;
+}
+
+static void __exit cleanup_kerberos_module(void)
+{
+	gss_mech_unregister(&gss_kerberos_mech);
+}
+
+MODULE_LICENSE("GPL");
+module_init(init_kerberos_module);
+module_exit(cleanup_kerberos_module);
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
new file mode 100644
index 00000000000..afeeb8715a7
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -0,0 +1,176 @@
+/*
+ *  linux/net/sunrpc/gss_krb5_seal.c
+ *
+ *  Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/k5seal.c
+ *
+ *  Copyright (c) 2000 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Andy Adamson	<andros@umich.edu>
+ *  J. Bruce Fields	<bfields@umich.edu>
+ */
+
+/*
+ * Copyright 1993 by OpenVision Technologies, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software
+ * and its documentation for any purpose is hereby granted without fee,
+ * provided that the above copyright notice appears in all copies and
+ * that both that copyright notice and this permission notice appear in
+ * supporting documentation, and that the name of OpenVision not be used
+ * in advertising or publicity pertaining to distribution of the software
+ * without specific, written prior permission. OpenVision makes no
+ * representations about the suitability of this software for any
+ * purpose.  It is provided "as is" without express or implied warranty.
+ *
+ * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
+ * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+ * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Copyright (C) 1998 by the FundsXpress, INC.
+ *
+ * All rights reserved.
+ *
+ * Export of this software from the United States of America may require
+ * a specific license from the United States Government.  It is the
+ * responsibility of any person or organization contemplating export to
+ * obtain such a license before exporting.
+ *
+ * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
+ * distribute this software and its documentation for any purpose and
+ * without fee is hereby granted, provided that the above copyright
+ * notice appear in all copies and that both that copyright notice and
+ * this permission notice appear in supporting documentation, and that
+ * the name of FundsXpress. not be used in advertising or publicity pertaining
+ * to distribution of the software without specific, written prior
+ * permission.  FundsXpress makes no representations about the suitability of
+ * this software for any purpose.  It is provided "as is" without express
+ * or implied warranty.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/jiffies.h>
+#include <linux/sunrpc/gss_krb5.h>
+#include <linux/random.h>
+#include <asm/scatterlist.h>
+#include <linux/crypto.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY        RPCDBG_AUTH
+#endif
+
+static inline int
+gss_krb5_padding(int blocksize, int length) {
+	/* Most of the code is block-size independent but in practice we
+	 * use only 8: */
+	BUG_ON(blocksize != 8);
+	return 8 - (length & 7);
+}
+
+u32
+krb5_make_token(struct krb5_ctx *ctx, int qop_req,
+		   struct xdr_buf *text, struct xdr_netobj *token,
+		   int toktype)
+{
+	s32			checksum_type;
+	struct xdr_netobj	md5cksum = {.len = 0, .data = NULL};
+	int			blocksize = 0, tmsglen;
+	unsigned char		*ptr, *krb5_hdr, *msg_start;
+	s32			now;
+
+	dprintk("RPC:     gss_krb5_seal\n");
+
+	now = get_seconds();
+
+	if (qop_req != 0)
+		goto out_err;
+
+	switch (ctx->signalg) {
+		case SGN_ALG_DES_MAC_MD5:
+			checksum_type = CKSUMTYPE_RSA_MD5;
+			break;
+		default:
+			dprintk("RPC:      gss_krb5_seal: ctx->signalg %d not"
+				" supported\n", ctx->signalg);
+			goto out_err;
+	}
+	if (ctx->sealalg != SEAL_ALG_NONE && ctx->sealalg != SEAL_ALG_DES) {
+		dprintk("RPC:      gss_krb5_seal: ctx->sealalg %d not supported\n",
+			ctx->sealalg);
+		goto out_err;
+	}
+
+	if (toktype == KG_TOK_WRAP_MSG) {
+		blocksize = crypto_tfm_alg_blocksize(ctx->enc);
+		tmsglen = blocksize + text->len
+			+ gss_krb5_padding(blocksize, blocksize + text->len);
+	} else {
+		tmsglen = 0;
+	}
+
+	token->len = g_token_size(&ctx->mech_used, 22 + tmsglen);
+
+	ptr = token->data;
+	g_make_token_header(&ctx->mech_used, 22 + tmsglen, &ptr);
+
+	*ptr++ = (unsigned char) ((toktype>>8)&0xff);
+	*ptr++ = (unsigned char) (toktype&0xff);
+
+	/* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */
+	krb5_hdr = ptr - 2;
+	msg_start = krb5_hdr + 24;
+
+	*(u16 *)(krb5_hdr + 2) = htons(ctx->signalg);
+	memset(krb5_hdr + 4, 0xff, 4);
+	if (toktype == KG_TOK_WRAP_MSG)
+		*(u16 *)(krb5_hdr + 4) = htons(ctx->sealalg);
+
+	if (toktype == KG_TOK_WRAP_MSG) {
+		/* XXX removing support for now */
+		goto out_err;
+	} else { /* Sign only.  */
+		if (make_checksum(checksum_type, krb5_hdr, 8, text,
+				       &md5cksum))
+			goto out_err;
+	}
+
+	switch (ctx->signalg) {
+	case SGN_ALG_DES_MAC_MD5:
+		if (krb5_encrypt(ctx->seq, NULL, md5cksum.data,
+				  md5cksum.data, md5cksum.len))
+			goto out_err;
+		memcpy(krb5_hdr + 16,
+		       md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH,
+		       KRB5_CKSUM_LENGTH);
+
+		dprintk("RPC:      make_seal_token: cksum data: \n");
+		print_hexl((u32 *) (krb5_hdr + 16), KRB5_CKSUM_LENGTH, 0);
+		break;
+	default:
+		BUG();
+	}
+
+	kfree(md5cksum.data);
+
+	if ((krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff,
+			       ctx->seq_send, krb5_hdr + 16, krb5_hdr + 8)))
+		goto out_err;
+
+	ctx->seq_send++;
+
+	return ((ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE);
+out_err:
+	if (md5cksum.data) kfree(md5cksum.data);
+	return GSS_S_FAILURE;
+}
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
new file mode 100644
index 00000000000..c53ead39118
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
@@ -0,0 +1,88 @@
+/*
+ *  linux/net/sunrpc/gss_krb5_seqnum.c
+ *
+ *  Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/util_seqnum.c
+ *
+ *  Copyright (c) 2000 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Andy Adamson   <andros@umich.edu>
+ */
+
+/*
+ * Copyright 1993 by OpenVision Technologies, Inc.
+ * 
+ * Permission to use, copy, modify, distribute, and sell this software
+ * and its documentation for any purpose is hereby granted without fee,
+ * provided that the above copyright notice appears in all copies and
+ * that both that copyright notice and this permission notice appear in
+ * supporting documentation, and that the name of OpenVision not be used
+ * in advertising or publicity pertaining to distribution of the software
+ * without specific, written prior permission. OpenVision makes no
+ * representations about the suitability of this software for any
+ * purpose.  It is provided "as is" without express or implied warranty.
+ * 
+ * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
+ * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+ * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/sunrpc/gss_krb5.h>
+#include <linux/crypto.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY        RPCDBG_AUTH
+#endif
+
+s32
+krb5_make_seq_num(struct crypto_tfm *key,
+		int direction,
+		s32 seqnum,
+		unsigned char *cksum, unsigned char *buf)
+{
+	unsigned char plain[8];
+
+	plain[0] = (unsigned char) (seqnum & 0xff);
+	plain[1] = (unsigned char) ((seqnum >> 8) & 0xff);
+	plain[2] = (unsigned char) ((seqnum >> 16) & 0xff);
+	plain[3] = (unsigned char) ((seqnum >> 24) & 0xff);
+
+	plain[4] = direction;
+	plain[5] = direction;
+	plain[6] = direction;
+	plain[7] = direction;
+
+	return krb5_encrypt(key, cksum, plain, buf, 8);
+}
+
+s32
+krb5_get_seq_num(struct crypto_tfm *key,
+	       unsigned char *cksum,
+	       unsigned char *buf,
+	       int *direction, s32 * seqnum)
+{
+	s32 code;
+	unsigned char plain[8];
+
+	dprintk("RPC:      krb5_get_seq_num:\n");
+
+	if ((code = krb5_decrypt(key, cksum, buf, plain, 8)))
+		return code;
+
+	if ((plain[4] != plain[5]) || (plain[4] != plain[6])
+				   || (plain[4] != plain[7]))
+		return (s32)KG_BAD_SEQ;
+
+	*direction = plain[4];
+
+	*seqnum = ((plain[0]) |
+		   (plain[1] << 8) | (plain[2] << 16) | (plain[3] << 24));
+
+	return (0);
+}
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
new file mode 100644
index 00000000000..8767fc53183
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -0,0 +1,202 @@
+/*
+ *  linux/net/sunrpc/gss_krb5_unseal.c
+ *
+ *  Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/k5unseal.c
+ *
+ *  Copyright (c) 2000 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Andy Adamson   <andros@umich.edu>
+ */
+
+/*
+ * Copyright 1993 by OpenVision Technologies, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software
+ * and its documentation for any purpose is hereby granted without fee,
+ * provided that the above copyright notice appears in all copies and
+ * that both that copyright notice and this permission notice appear in
+ * supporting documentation, and that the name of OpenVision not be used
+ * in advertising or publicity pertaining to distribution of the software
+ * without specific, written prior permission. OpenVision makes no
+ * representations about the suitability of this software for any
+ * purpose.  It is provided "as is" without express or implied warranty.
+ *
+ * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
+ * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+ * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Copyright (C) 1998 by the FundsXpress, INC.
+ *
+ * All rights reserved.
+ *
+ * Export of this software from the United States of America may require
+ * a specific license from the United States Government.  It is the
+ * responsibility of any person or organization contemplating export to
+ * obtain such a license before exporting.
+ *
+ * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
+ * distribute this software and its documentation for any purpose and
+ * without fee is hereby granted, provided that the above copyright
+ * notice appear in all copies and that both that copyright notice and
+ * this permission notice appear in supporting documentation, and that
+ * the name of FundsXpress. not be used in advertising or publicity pertaining
+ * to distribution of the software without specific, written prior
+ * permission.  FundsXpress makes no representations about the suitability of
+ * this software for any purpose.  It is provided "as is" without express
+ * or implied warranty.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/jiffies.h>
+#include <linux/sunrpc/gss_krb5.h>
+#include <linux/crypto.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY        RPCDBG_AUTH
+#endif
+
+
+/* message_buffer is an input if toktype is MIC and an output if it is WRAP:
+ * If toktype is MIC: read_token is a mic token, and message_buffer is the
+ *   data that the mic was supposedly taken over.
+ * If toktype is WRAP: read_token is a wrap token, and message_buffer is used
+ *   to return the decrypted data.
+ */
+
+/* XXX will need to change prototype and/or just split into a separate function
+ * when we add privacy (because read_token will be in pages too). */
+u32
+krb5_read_token(struct krb5_ctx *ctx,
+		struct xdr_netobj *read_token,
+		struct xdr_buf *message_buffer,
+		int *qop_state, int toktype)
+{
+	int			signalg;
+	int			sealalg;
+	s32			checksum_type;
+	struct xdr_netobj	md5cksum = {.len = 0, .data = NULL};
+	s32			now;
+	int			direction;
+	s32			seqnum;
+	unsigned char		*ptr = (unsigned char *)read_token->data;
+	int			bodysize;
+	u32			ret = GSS_S_DEFECTIVE_TOKEN;
+
+	dprintk("RPC:      krb5_read_token\n");
+
+	if (g_verify_token_header(&ctx->mech_used, &bodysize, &ptr,
+					read_token->len))
+		goto out;
+
+	if ((*ptr++ != ((toktype>>8)&0xff)) || (*ptr++ != (toktype&0xff)))
+		goto out;
+
+	/* XXX sanity-check bodysize?? */
+
+	if (toktype == KG_TOK_WRAP_MSG) {
+		/* XXX gone */
+		goto out;
+	}
+
+	/* get the sign and seal algorithms */
+
+	signalg = ptr[0] + (ptr[1] << 8);
+	sealalg = ptr[2] + (ptr[3] << 8);
+
+	/* Sanity checks */
+
+	if ((ptr[4] != 0xff) || (ptr[5] != 0xff))
+		goto out;
+
+	if (((toktype != KG_TOK_WRAP_MSG) && (sealalg != 0xffff)) ||
+	    ((toktype == KG_TOK_WRAP_MSG) && (sealalg == 0xffff)))
+		goto out;
+
+	/* in the current spec, there is only one valid seal algorithm per
+	   key type, so a simple comparison is ok */
+
+	if ((toktype == KG_TOK_WRAP_MSG) && !(sealalg == ctx->sealalg))
+		goto out;
+
+	/* there are several mappings of seal algorithms to sign algorithms,
+	   but few enough that we can try them all. */
+
+	if ((ctx->sealalg == SEAL_ALG_NONE && signalg > 1) ||
+	    (ctx->sealalg == SEAL_ALG_1 && signalg != SGN_ALG_3) ||
+	    (ctx->sealalg == SEAL_ALG_DES3KD &&
+	     signalg != SGN_ALG_HMAC_SHA1_DES3_KD))
+		goto out;
+
+	/* compute the checksum of the message */
+
+	/* initialize the the cksum */
+	switch (signalg) {
+	case SGN_ALG_DES_MAC_MD5:
+		checksum_type = CKSUMTYPE_RSA_MD5;
+		break;
+	default:
+		ret = GSS_S_DEFECTIVE_TOKEN;
+		goto out;
+	}
+
+	switch (signalg) {
+	case SGN_ALG_DES_MAC_MD5:
+		ret = make_checksum(checksum_type, ptr - 2, 8,
+					 message_buffer, &md5cksum);
+		if (ret)
+			goto out;
+
+		ret = krb5_encrypt(ctx->seq, NULL, md5cksum.data,
+				   md5cksum.data, 16);
+		if (ret)
+			goto out;
+
+		if (memcmp(md5cksum.data + 8, ptr + 14, 8)) {
+			ret = GSS_S_BAD_SIG;
+			goto out;
+		}
+		break;
+	default:
+		ret = GSS_S_DEFECTIVE_TOKEN;
+		goto out;
+	}
+
+	/* it got through unscathed.  Make sure the context is unexpired */
+
+	if (qop_state)
+		*qop_state = GSS_C_QOP_DEFAULT;
+
+	now = get_seconds();
+
+	ret = GSS_S_CONTEXT_EXPIRED;
+	if (now > ctx->endtime)
+		goto out;
+
+	/* do sequencing checks */
+
+	ret = GSS_S_BAD_SIG;
+	if ((ret = krb5_get_seq_num(ctx->seq, ptr + 14, ptr + 6, &direction,
+				    &seqnum)))
+		goto out;
+
+	if ((ctx->initiate && direction != 0xff) ||
+	    (!ctx->initiate && direction != 0))
+		goto out;
+
+	ret = GSS_S_COMPLETE;
+out:
+	if (md5cksum.data) kfree(md5cksum.data);
+	return ret;
+}
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
new file mode 100644
index 00000000000..9dfb68377d6
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -0,0 +1,301 @@
+/*
+ *  linux/net/sunrpc/gss_mech_switch.c
+ *
+ *  Copyright (c) 2001 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  J. Bruce Fields   <bfields@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without 
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the 
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/socket.h>
+#include <linux/module.h>
+#include <linux/sunrpc/msg_prot.h>
+#include <linux/sunrpc/gss_asn1.h>
+#include <linux/sunrpc/auth_gss.h>
+#include <linux/sunrpc/svcauth_gss.h>
+#include <linux/sunrpc/gss_err.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/gss_api.h>
+#include <linux/sunrpc/clnt.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY        RPCDBG_AUTH
+#endif
+
+static LIST_HEAD(registered_mechs);
+static DEFINE_SPINLOCK(registered_mechs_lock);
+
+static void
+gss_mech_free(struct gss_api_mech *gm)
+{
+	struct pf_desc *pf;
+	int i;
+
+	for (i = 0; i < gm->gm_pf_num; i++) {
+		pf = &gm->gm_pfs[i];
+		if (pf->auth_domain_name)
+			kfree(pf->auth_domain_name);
+		pf->auth_domain_name = NULL;
+	}
+}
+
+static inline char *
+make_auth_domain_name(char *name)
+{
+	static char	*prefix = "gss/";
+	char		*new;
+
+	new = kmalloc(strlen(name) + strlen(prefix) + 1, GFP_KERNEL);
+	if (new) {
+		strcpy(new, prefix);
+		strcat(new, name);
+	}
+	return new;
+}
+
+static int
+gss_mech_svc_setup(struct gss_api_mech *gm)
+{
+	struct pf_desc *pf;
+	int i, status;
+
+	for (i = 0; i < gm->gm_pf_num; i++) {
+		pf = &gm->gm_pfs[i];
+		pf->auth_domain_name = make_auth_domain_name(pf->name);
+		status = -ENOMEM;
+		if (pf->auth_domain_name == NULL)
+			goto out;
+		status = svcauth_gss_register_pseudoflavor(pf->pseudoflavor,
+							pf->auth_domain_name);
+		if (status)
+			goto out;
+	}
+	return 0;
+out:
+	gss_mech_free(gm);
+	return status;
+}
+
+int
+gss_mech_register(struct gss_api_mech *gm)
+{
+	int status;
+
+	status = gss_mech_svc_setup(gm);
+	if (status)
+		return status;
+	spin_lock(&registered_mechs_lock);
+	list_add(&gm->gm_list, &registered_mechs);
+	spin_unlock(&registered_mechs_lock);
+	dprintk("RPC:      registered gss mechanism %s\n", gm->gm_name);
+	return 0;
+}
+
+EXPORT_SYMBOL(gss_mech_register);
+
+void
+gss_mech_unregister(struct gss_api_mech *gm)
+{
+	spin_lock(&registered_mechs_lock);
+	list_del(&gm->gm_list);
+	spin_unlock(&registered_mechs_lock);
+	dprintk("RPC:      unregistered gss mechanism %s\n", gm->gm_name);
+	gss_mech_free(gm);
+}
+
+EXPORT_SYMBOL(gss_mech_unregister);
+
+struct gss_api_mech *
+gss_mech_get(struct gss_api_mech *gm)
+{
+	__module_get(gm->gm_owner);
+	return gm;
+}
+
+EXPORT_SYMBOL(gss_mech_get);
+
+struct gss_api_mech *
+gss_mech_get_by_name(const char *name)
+{
+	struct gss_api_mech	*pos, *gm = NULL;
+
+	spin_lock(&registered_mechs_lock);
+	list_for_each_entry(pos, &registered_mechs, gm_list) {
+		if (0 == strcmp(name, pos->gm_name)) {
+			if (try_module_get(pos->gm_owner))
+				gm = pos;
+			break;
+		}
+	}
+	spin_unlock(&registered_mechs_lock);
+	return gm;
+
+}
+
+EXPORT_SYMBOL(gss_mech_get_by_name);
+
+static inline int
+mech_supports_pseudoflavor(struct gss_api_mech *gm, u32 pseudoflavor)
+{
+	int i;
+
+	for (i = 0; i < gm->gm_pf_num; i++) {
+		if (gm->gm_pfs[i].pseudoflavor == pseudoflavor)
+			return 1;
+	}
+	return 0;
+}
+
+struct gss_api_mech *
+gss_mech_get_by_pseudoflavor(u32 pseudoflavor)
+{
+	struct gss_api_mech *pos, *gm = NULL;
+
+	spin_lock(&registered_mechs_lock);
+	list_for_each_entry(pos, &registered_mechs, gm_list) {
+		if (!mech_supports_pseudoflavor(pos, pseudoflavor)) {
+			module_put(pos->gm_owner);
+			continue;
+		}
+		if (try_module_get(pos->gm_owner))
+			gm = pos;
+		break;
+	}
+	spin_unlock(&registered_mechs_lock);
+	return gm;
+}
+
+EXPORT_SYMBOL(gss_mech_get_by_pseudoflavor);
+
+u32
+gss_pseudoflavor_to_service(struct gss_api_mech *gm, u32 pseudoflavor)
+{
+	int i;
+
+	for (i = 0; i < gm->gm_pf_num; i++) {
+		if (gm->gm_pfs[i].pseudoflavor == pseudoflavor)
+			return gm->gm_pfs[i].service;
+	}
+	return 0;
+}
+
+EXPORT_SYMBOL(gss_pseudoflavor_to_service);
+
+char *
+gss_service_to_auth_domain_name(struct gss_api_mech *gm, u32 service)
+{
+	int i;
+
+	for (i = 0; i < gm->gm_pf_num; i++) {
+		if (gm->gm_pfs[i].service == service)
+			return gm->gm_pfs[i].auth_domain_name;
+	}
+	return NULL;
+}
+
+EXPORT_SYMBOL(gss_service_to_auth_domain_name);
+
+void
+gss_mech_put(struct gss_api_mech * gm)
+{
+	module_put(gm->gm_owner);
+}
+
+EXPORT_SYMBOL(gss_mech_put);
+
+/* The mech could probably be determined from the token instead, but it's just
+ * as easy for now to pass it in. */
+int
+gss_import_sec_context(const void *input_token, size_t bufsize,
+		       struct gss_api_mech	*mech,
+		       struct gss_ctx		**ctx_id)
+{
+	if (!(*ctx_id = kmalloc(sizeof(**ctx_id), GFP_KERNEL)))
+		return GSS_S_FAILURE;
+	memset(*ctx_id, 0, sizeof(**ctx_id));
+	(*ctx_id)->mech_type = gss_mech_get(mech);
+
+	return mech->gm_ops
+		->gss_import_sec_context(input_token, bufsize, *ctx_id);
+}
+
+/* gss_get_mic: compute a mic over message and return mic_token. */
+
+u32
+gss_get_mic(struct gss_ctx	*context_handle,
+	    u32			qop,
+	    struct xdr_buf	*message,
+	    struct xdr_netobj	*mic_token)
+{
+	 return context_handle->mech_type->gm_ops
+		->gss_get_mic(context_handle,
+			      qop,
+			      message,
+			      mic_token);
+}
+
+/* gss_verify_mic: check whether the provided mic_token verifies message. */
+
+u32
+gss_verify_mic(struct gss_ctx		*context_handle,
+	       struct xdr_buf		*message,
+	       struct xdr_netobj	*mic_token,
+	       u32			*qstate)
+{
+	return context_handle->mech_type->gm_ops
+		->gss_verify_mic(context_handle,
+				 message,
+				 mic_token,
+				 qstate);
+}
+
+/* gss_delete_sec_context: free all resources associated with context_handle.
+ * Note this differs from the RFC 2744-specified prototype in that we don't
+ * bother returning an output token, since it would never be used anyway. */
+
+u32
+gss_delete_sec_context(struct gss_ctx	**context_handle)
+{
+	dprintk("RPC:      gss_delete_sec_context deleting %p\n",
+			*context_handle);
+
+	if (!*context_handle)
+		return(GSS_S_NO_CONTEXT);
+	if ((*context_handle)->internal_ctx_id != 0)
+		(*context_handle)->mech_type->gm_ops
+			->gss_delete_sec_context((*context_handle)
+							->internal_ctx_id);
+	if ((*context_handle)->mech_type)
+		gss_mech_put((*context_handle)->mech_type);
+	kfree(*context_handle);
+	*context_handle=NULL;
+	return GSS_S_COMPLETE;
+}
diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c
new file mode 100644
index 00000000000..dad05994c3e
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c
@@ -0,0 +1,300 @@
+/*
+ *  linux/net/sunrpc/gss_spkm3_mech.c
+ *
+ *  Copyright (c) 2003 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Andy Adamson <andros@umich.edu>
+ *  J. Bruce Fields <bfields@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/sunrpc/auth.h>
+#include <linux/in.h>
+#include <linux/sunrpc/svcauth_gss.h>
+#include <linux/sunrpc/gss_spkm3.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/crypto.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY	RPCDBG_AUTH
+#endif
+
+static const void *
+simple_get_bytes(const void *p, const void *end, void *res, int len)
+{
+	const void *q = (const void *)((const char *)p + len);
+	if (unlikely(q > end || q < p))
+		return ERR_PTR(-EFAULT);
+	memcpy(res, p, len);
+	return q;
+}
+
+static const void *
+simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
+{
+	const void *q;
+	unsigned int len;
+	p = simple_get_bytes(p, end, &len, sizeof(len));
+	if (IS_ERR(p))
+		return p;
+	res->len = len;
+	if (len == 0) {
+		res->data = NULL;
+		return p;
+	}
+	q = (const void *)((const char *)p + len);
+	if (unlikely(q > end || q < p))
+		return ERR_PTR(-EFAULT);
+	res->data = kmalloc(len, GFP_KERNEL);
+	if (unlikely(res->data == NULL))
+		return ERR_PTR(-ENOMEM);
+	memcpy(res->data, p, len);
+	return q;
+}
+
+static inline const void *
+get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg)
+{
+	struct xdr_netobj	key = { 0 };
+	int			alg_mode,setkey = 0;
+	char			*alg_name;
+
+	p = simple_get_bytes(p, end, resalg, sizeof(*resalg));
+	if (IS_ERR(p))
+		goto out_err;
+	p = simple_get_netobj(p, end, &key);
+	if (IS_ERR(p))
+		goto out_err;
+
+	switch (*resalg) {
+		case NID_des_cbc:
+			alg_name = "des";
+			alg_mode = CRYPTO_TFM_MODE_CBC;
+			setkey = 1;
+			break;
+		case NID_md5:
+			if (key.len == 0) {
+				dprintk("RPC: SPKM3 get_key: NID_md5 zero Key length\n");
+			}
+			alg_name = "md5";
+			alg_mode = 0;
+			setkey = 0;
+			break;
+		default:
+			dprintk("RPC: SPKM3 get_key: unsupported algorithm %d", *resalg);
+			goto out_err_free_key;
+	}
+	if (!(*res = crypto_alloc_tfm(alg_name, alg_mode)))
+		goto out_err_free_key;
+	if (setkey) {
+		if (crypto_cipher_setkey(*res, key.data, key.len))
+			goto out_err_free_tfm;
+	}
+
+	if(key.len > 0)
+		kfree(key.data);
+	return p;
+
+out_err_free_tfm:
+	crypto_free_tfm(*res);
+out_err_free_key:
+	if(key.len > 0)
+		kfree(key.data);
+	p = ERR_PTR(-EINVAL);
+out_err:
+	return p;
+}
+
+static int
+gss_import_sec_context_spkm3(const void *p, size_t len,
+				struct gss_ctx *ctx_id)
+{
+	const void *end = (const void *)((const char *)p + len);
+	struct	spkm3_ctx *ctx;
+
+	if (!(ctx = kmalloc(sizeof(*ctx), GFP_KERNEL)))
+		goto out_err;
+	memset(ctx, 0, sizeof(*ctx));
+
+	p = simple_get_netobj(p, end, &ctx->ctx_id);
+	if (IS_ERR(p))
+		goto out_err_free_ctx;
+
+	p = simple_get_bytes(p, end, &ctx->qop, sizeof(ctx->qop));
+	if (IS_ERR(p))
+		goto out_err_free_ctx_id;
+
+	p = simple_get_netobj(p, end, &ctx->mech_used);
+	if (IS_ERR(p))
+		goto out_err_free_mech;
+
+	p = simple_get_bytes(p, end, &ctx->ret_flags, sizeof(ctx->ret_flags));
+	if (IS_ERR(p))
+		goto out_err_free_mech;
+
+	p = simple_get_bytes(p, end, &ctx->req_flags, sizeof(ctx->req_flags));
+	if (IS_ERR(p))
+		goto out_err_free_mech;
+
+	p = simple_get_netobj(p, end, &ctx->share_key);
+	if (IS_ERR(p))
+		goto out_err_free_s_key;
+
+	p = get_key(p, end, &ctx->derived_conf_key, &ctx->conf_alg);
+	if (IS_ERR(p))
+		goto out_err_free_s_key;
+
+	p = get_key(p, end, &ctx->derived_integ_key, &ctx->intg_alg);
+	if (IS_ERR(p))
+		goto out_err_free_key1;
+
+	p = simple_get_bytes(p, end, &ctx->keyestb_alg, sizeof(ctx->keyestb_alg));
+	if (IS_ERR(p))
+		goto out_err_free_key2;
+
+	p = simple_get_bytes(p, end, &ctx->owf_alg, sizeof(ctx->owf_alg));
+	if (IS_ERR(p))
+		goto out_err_free_key2;
+
+	if (p != end)
+		goto out_err_free_key2;
+
+	ctx_id->internal_ctx_id = ctx;
+
+	dprintk("Succesfully imported new spkm context.\n");
+	return 0;
+
+out_err_free_key2:
+	crypto_free_tfm(ctx->derived_integ_key);
+out_err_free_key1:
+	crypto_free_tfm(ctx->derived_conf_key);
+out_err_free_s_key:
+	kfree(ctx->share_key.data);
+out_err_free_mech:
+	kfree(ctx->mech_used.data);
+out_err_free_ctx_id:
+	kfree(ctx->ctx_id.data);
+out_err_free_ctx:
+	kfree(ctx);
+out_err:
+	return PTR_ERR(p);
+}
+
+static void
+gss_delete_sec_context_spkm3(void *internal_ctx) {
+	struct spkm3_ctx *sctx = internal_ctx;
+
+	if(sctx->derived_integ_key)
+		crypto_free_tfm(sctx->derived_integ_key);
+	if(sctx->derived_conf_key)
+		crypto_free_tfm(sctx->derived_conf_key);
+	if(sctx->share_key.data)
+		kfree(sctx->share_key.data);
+	if(sctx->mech_used.data)
+		kfree(sctx->mech_used.data);
+	kfree(sctx);
+}
+
+static u32
+gss_verify_mic_spkm3(struct gss_ctx		*ctx,
+			struct xdr_buf		*signbuf,
+			struct xdr_netobj	*checksum,
+			u32		*qstate) {
+	u32 maj_stat = 0;
+	int qop_state = 0;
+	struct spkm3_ctx *sctx = ctx->internal_ctx_id;
+
+	dprintk("RPC: gss_verify_mic_spkm3 calling spkm3_read_token\n");
+	maj_stat = spkm3_read_token(sctx, checksum, signbuf, &qop_state,
+				   SPKM_MIC_TOK);
+
+	if (!maj_stat && qop_state)
+	    *qstate = qop_state;
+
+	dprintk("RPC: gss_verify_mic_spkm3 returning %d\n", maj_stat);
+	return maj_stat;
+}
+
+static u32
+gss_get_mic_spkm3(struct gss_ctx	*ctx,
+		     u32		qop,
+		     struct xdr_buf	*message_buffer,
+		     struct xdr_netobj	*message_token) {
+	u32 err = 0;
+	struct spkm3_ctx *sctx = ctx->internal_ctx_id;
+
+	dprintk("RPC: gss_get_mic_spkm3\n");
+
+	err = spkm3_make_token(sctx, qop, message_buffer,
+			      message_token, SPKM_MIC_TOK);
+	return err;
+}
+
+static struct gss_api_ops gss_spkm3_ops = {
+	.gss_import_sec_context	= gss_import_sec_context_spkm3,
+	.gss_get_mic		= gss_get_mic_spkm3,
+	.gss_verify_mic		= gss_verify_mic_spkm3,
+	.gss_delete_sec_context	= gss_delete_sec_context_spkm3,
+};
+
+static struct pf_desc gss_spkm3_pfs[] = {
+	{RPC_AUTH_GSS_SPKM, 0, RPC_GSS_SVC_NONE, "spkm3"},
+	{RPC_AUTH_GSS_SPKMI, 0, RPC_GSS_SVC_INTEGRITY, "spkm3i"},
+};
+
+static struct gss_api_mech gss_spkm3_mech = {
+	.gm_name	= "spkm3",
+	.gm_owner	= THIS_MODULE,
+	.gm_ops		= &gss_spkm3_ops,
+	.gm_pf_num	= ARRAY_SIZE(gss_spkm3_pfs),
+	.gm_pfs		= gss_spkm3_pfs,
+};
+
+static int __init init_spkm3_module(void)
+{
+	int status;
+
+	status = gss_mech_register(&gss_spkm3_mech);
+	if (status)
+		printk("Failed to register spkm3 gss mechanism!\n");
+	return 0;
+}
+
+static void __exit cleanup_spkm3_module(void)
+{
+	gss_mech_unregister(&gss_spkm3_mech);
+}
+
+MODULE_LICENSE("GPL");
+module_init(init_spkm3_module);
+module_exit(cleanup_spkm3_module);
diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c
new file mode 100644
index 00000000000..25339868d46
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_spkm3_seal.c
@@ -0,0 +1,132 @@
+/*
+ *  linux/net/sunrpc/gss_spkm3_seal.c
+ *
+ *  Copyright (c) 2003 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Andy Adamson <andros@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/jiffies.h>
+#include <linux/sunrpc/gss_spkm3.h>
+#include <linux/random.h>
+#include <linux/crypto.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY        RPCDBG_AUTH
+#endif
+
+/*
+ * spkm3_make_token()
+ *
+ * Only SPKM_MIC_TOK with md5 intg-alg is supported
+ */
+
+u32
+spkm3_make_token(struct spkm3_ctx *ctx, int qop_req,
+		   struct xdr_buf * text, struct xdr_netobj * token,
+		   int toktype)
+{
+	s32			checksum_type;
+	char			tokhdrbuf[25];
+	struct xdr_netobj	md5cksum = {.len = 0, .data = NULL};
+	struct xdr_netobj	mic_hdr = {.len = 0, .data = tokhdrbuf};
+	int			tmsglen, tokenlen = 0;
+	unsigned char		*ptr;
+	s32			now;
+	int			ctxelen = 0, ctxzbit = 0;
+	int			md5elen = 0, md5zbit = 0;
+
+	dprintk("RPC: spkm3_make_token\n");
+
+	now = jiffies;
+	if (qop_req != 0)
+		goto out_err;
+
+	if (ctx->ctx_id.len != 16) {
+		dprintk("RPC: spkm3_make_token BAD ctx_id.len %d\n",
+			ctx->ctx_id.len);
+		goto out_err;
+	}
+		
+	switch (ctx->intg_alg) {
+		case NID_md5:
+			checksum_type = CKSUMTYPE_RSA_MD5;
+			break;
+		default:
+			dprintk("RPC: gss_spkm3_seal: ctx->signalg %d not"
+				" supported\n", ctx->intg_alg);
+			goto out_err;
+	}
+	/* XXX since we don't support WRAP, perhaps we don't care... */
+	if (ctx->conf_alg != NID_cast5_cbc) {
+		dprintk("RPC: gss_spkm3_seal: ctx->sealalg %d not supported\n",
+			ctx->conf_alg);
+		goto out_err;
+	}
+
+	if (toktype == SPKM_MIC_TOK) {
+		tmsglen = 0;
+		/* Calculate checksum over the mic-header */
+		asn1_bitstring_len(&ctx->ctx_id, &ctxelen, &ctxzbit);
+		spkm3_mic_header(&mic_hdr.data, &mic_hdr.len, ctx->ctx_id.data,
+		                         ctxelen, ctxzbit);
+
+		if (make_checksum(checksum_type, mic_hdr.data, mic_hdr.len, 
+		                             text, &md5cksum))
+			goto out_err;
+
+		asn1_bitstring_len(&md5cksum, &md5elen, &md5zbit);
+		tokenlen = 10 + ctxelen + 1 + 2 + md5elen + 1;
+
+		/* Create token header using generic routines */
+		token->len = g_token_size(&ctx->mech_used, tokenlen + tmsglen);
+
+		ptr = token->data;
+		g_make_token_header(&ctx->mech_used, tokenlen + tmsglen, &ptr);
+
+		spkm3_make_mic_token(&ptr, tokenlen, &mic_hdr, &md5cksum, md5elen, md5zbit);
+	} else if (toktype == SPKM_WRAP_TOK) { /* Not Supported */
+		dprintk("RPC: gss_spkm3_seal: SPKM_WRAP_TOK not supported\n");
+		goto out_err;
+	}
+	kfree(md5cksum.data);
+
+	/* XXX need to implement sequence numbers, and ctx->expired */
+
+	return  GSS_S_COMPLETE;
+out_err:
+	if (md5cksum.data) 
+		kfree(md5cksum.data);
+	token->data = NULL;
+	token->len = 0;
+	return GSS_S_FAILURE;
+}
diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c
new file mode 100644
index 00000000000..46c08a0710f
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_spkm3_token.c
@@ -0,0 +1,266 @@
+/*
+ *  linux/net/sunrpc/gss_spkm3_token.c
+ *
+ *  Copyright (c) 2003 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Andy Adamson <andros@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/jiffies.h>
+#include <linux/sunrpc/gss_spkm3.h>
+#include <linux/random.h>
+#include <linux/crypto.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY        RPCDBG_AUTH
+#endif
+
+/*
+ * asn1_bitstring_len()
+ *
+ * calculate the asn1 bitstring length of the xdr_netobject
+ */
+void
+asn1_bitstring_len(struct xdr_netobj *in, int *enclen, int *zerobits)
+{
+	int i, zbit = 0,elen = in->len;
+	char *ptr;
+
+	ptr = &in->data[in->len -1];
+
+	/* count trailing 0's */
+	for(i = in->len; i > 0; i--) {
+		if (*ptr == 0) { 
+			ptr--;
+			elen--;
+		} else
+			break;
+	}
+
+	/* count number of 0 bits in final octet */
+	ptr = &in->data[elen - 1];
+	for(i = 0; i < 8; i++) {
+		short mask = 0x01;
+
+		if (!((mask << i) & *ptr))
+			zbit++;
+		else
+			break;
+	}
+	*enclen = elen;
+	*zerobits = zbit;
+}
+
+/*
+ * decode_asn1_bitstring()
+ * 
+ * decode a bitstring into a buffer of the expected length.
+ * enclen = bit string length
+ * explen = expected length (define in rfc)
+ */
+int
+decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen, int explen)
+{
+	if (!(out->data = kmalloc(explen,GFP_KERNEL)))
+		return 0;
+	out->len = explen;
+	memset(out->data, 0, explen);
+	memcpy(out->data, in, enclen);
+	return 1;
+}
+
+/* 
+ * SPKMInnerContextToken choice SPKM_MIC asn1 token layout
+ * 
+ * contextid is always 16 bytes plain data. max asn1 bitstring len = 17.
+ *
+ * tokenlen = pos[0] to end of token (max pos[45] with MD5 cksum)
+ *
+ * pos  value
+ * ----------
+ * [0]	a4  SPKM-MIC tag
+ * [1]	??  innertoken length  (max 44) 
+ * 
+ * 
+ * tok_hdr piece of checksum data starts here 
+ *
+ * the maximum mic-header len = 9 + 17 = 26 
+ *	mic-header
+ *	----------
+ * [2]	30      SEQUENCE tag  
+ * [3]	??	mic-header length: (max 23) = TokenID + ContextID 
+ *
+ *		TokenID  - all fields constant and can be hardcoded
+ *		-------
+ * [4]	  02	Type 2
+ * [5]	  02	Length 2 
+ * [6][7] 01 01	TokenID (SPKM_MIC_TOK)
+ *
+ *		ContextID  - encoded length not constant, calculated
+ *		---------
+ * [8]	03	Type 3
+ * [9]	??	encoded length
+ * [10]	??	ctxzbit
+ * [11]	 	contextid
+ *
+ * mic_header piece of checksum data ends here. 
+ *
+ *	int-cksum - encoded length not constant, calculated
+ *	---------
+ * [??]	03	Type 3
+ * [??]	??	encoded length 
+ * [??]	??	md5zbit		
+ * [??]	 	int-cksum (NID_md5 = 16)
+ *
+ * maximum SPKM-MIC innercontext token length = 
+ *	 10 + encoded contextid_size(17 max) + 2 + encoded  
+ *       cksum_size (17 maxfor NID_md5) = 46
+ */
+
+/*
+ * spkm3_mic_header()
+ *
+ * Prepare the SPKM_MIC_TOK mic-header for check-sum calculation
+ * elen: 16 byte context id asn1 bitstring encoded length
+ */
+void
+spkm3_mic_header(unsigned char **hdrbuf, unsigned int *hdrlen, unsigned char *ctxdata, int elen, int zbit)
+{
+	char *hptr = *hdrbuf;
+	char *top = *hdrbuf;
+
+	*(u8 *)hptr++ = 0x30;
+	*(u8 *)hptr++ = elen + 7;  /* on the wire header length */
+
+	/* tokenid */
+	*(u8 *)hptr++ = 0x02;
+	*(u8 *)hptr++ = 0x02;
+	*(u8 *)hptr++ = 0x01;
+	*(u8 *)hptr++ = 0x01;
+
+	/* coniextid */
+	*(u8 *)hptr++ = 0x03;
+	*(u8 *)hptr++ = elen + 1; /* add 1 to include zbit */
+	*(u8 *)hptr++ = zbit;
+	memcpy(hptr, ctxdata, elen);
+	hptr += elen;
+	*hdrlen = hptr - top; 
+}
+		
+/* 
+ * spkm3_mic_innercontext_token()
+ *
+ * *tokp points to the beginning of the SPKM_MIC token  described 
+ * in rfc 2025, section 3.2.1: 
+ *
+ */
+void
+spkm3_make_mic_token(unsigned char **tokp, int toklen, struct xdr_netobj *mic_hdr, struct xdr_netobj *md5cksum, int md5elen, int md5zbit)
+{
+	unsigned char *ict = *tokp;
+
+	*(u8 *)ict++ = 0xa4;
+	*(u8 *)ict++ = toklen - 2; 
+	memcpy(ict, mic_hdr->data, mic_hdr->len);
+	ict += mic_hdr->len;
+
+	*(u8 *)ict++ = 0x03;
+	*(u8 *)ict++ = md5elen + 1; /* add 1 to include zbit */
+	*(u8 *)ict++ = md5zbit;
+	memcpy(ict, md5cksum->data, md5elen);
+}
+
+u32
+spkm3_verify_mic_token(unsigned char **tokp, int *mic_hdrlen, unsigned char **cksum)
+{
+	struct xdr_netobj       spkm3_ctx_id = {.len =0, .data = NULL};
+	unsigned char 		*ptr = *tokp;
+	int 			ctxelen;
+	u32     		ret = GSS_S_DEFECTIVE_TOKEN;
+
+	/* spkm3 innercontext token preamble */
+	if ((ptr[0] != 0xa4) || (ptr[2] != 0x30)) {
+		dprintk("RPC: BAD SPKM ictoken preamble\n"); 
+		goto out;
+	}
+
+	*mic_hdrlen = ptr[3];
+
+	/* token type */
+	if ((ptr[4] != 0x02) || (ptr[5] != 0x02)) {
+		dprintk("RPC: BAD asn1 SPKM3 token type\n");
+		goto out;
+	}
+
+	/* only support SPKM_MIC_TOK */
+	if((ptr[6] != 0x01) || (ptr[7] != 0x01)) {
+		dprintk("RPC: ERROR unsupported SPKM3 token \n");
+		goto out;
+	}
+
+	/* contextid */
+	if (ptr[8] != 0x03) {
+		dprintk("RPC: BAD SPKM3 asn1 context-id type\n");
+		goto out;
+	}
+
+	ctxelen = ptr[9];
+	if (ctxelen > 17) {  /* length includes asn1 zbit octet */
+		dprintk("RPC: BAD SPKM3 contextid len %d\n", ctxelen);
+		goto out;
+	}
+
+	/* ignore ptr[10] */
+
+	if(!decode_asn1_bitstring(&spkm3_ctx_id, &ptr[11], ctxelen - 1, 16))
+		goto out;
+
+	/*
+	* in the current implementation: the optional int-alg is not present 
+	* so the default int-alg (md5) is used the optional snd-seq field is 
+	* also not present 
+	*/
+
+	if (*mic_hdrlen != 6 + ctxelen) {
+		dprintk("RPC: BAD SPKM_ MIC_TOK header len %d: we only support default int-alg (should be absent) and do not support snd-seq\n", *mic_hdrlen);
+		goto out;
+	}
+	/* checksum */
+        *cksum = (&ptr[10] + ctxelen); /* ctxelen includes ptr[10] */
+
+	ret = GSS_S_COMPLETE;
+out:
+	if (spkm3_ctx_id.data)
+		kfree(spkm3_ctx_id.data);
+	return ret;
+}
+
diff --git a/net/sunrpc/auth_gss/gss_spkm3_unseal.c b/net/sunrpc/auth_gss/gss_spkm3_unseal.c
new file mode 100644
index 00000000000..65ce81bf0bc
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_spkm3_unseal.c
@@ -0,0 +1,128 @@
+/*
+ *  linux/net/sunrpc/gss_spkm3_unseal.c
+ *
+ *  Copyright (c) 2003 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Andy Adamson <andros@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/jiffies.h>
+#include <linux/sunrpc/gss_spkm3.h>
+#include <linux/crypto.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY        RPCDBG_AUTH
+#endif
+
+/*
+ * spkm3_read_token()
+ * 
+ * only SPKM_MIC_TOK with md5 intg-alg is supported
+ */
+u32
+spkm3_read_token(struct spkm3_ctx *ctx,
+		struct xdr_netobj *read_token,    /* checksum */
+		struct xdr_buf *message_buffer, /* signbuf */
+		int *qop_state, int toktype)
+{
+	s32			code;
+	struct xdr_netobj	wire_cksum = {.len =0, .data = NULL};
+	struct xdr_netobj	md5cksum = {.len = 0, .data = NULL};
+	unsigned char		*ptr = (unsigned char *)read_token->data;
+	unsigned char           *cksum;
+	int			bodysize, md5elen;
+	int			mic_hdrlen;
+	u32			ret = GSS_S_DEFECTIVE_TOKEN;
+
+	dprintk("RPC: spkm3_read_token read_token->len %d\n", read_token->len);
+
+	if (g_verify_token_header((struct xdr_netobj *) &ctx->mech_used,
+					&bodysize, &ptr, read_token->len))
+		goto out;
+
+	/* decode the token */
+
+	if (toktype == SPKM_MIC_TOK) {
+
+		if ((ret = spkm3_verify_mic_token(&ptr, &mic_hdrlen, &cksum))) 
+			goto out;
+
+		if (*cksum++ != 0x03) {
+			dprintk("RPC: spkm3_read_token BAD checksum type\n");
+			goto out;
+		}
+		md5elen = *cksum++; 
+		cksum++; 	/* move past the zbit */
+	
+		if(!decode_asn1_bitstring(&wire_cksum, cksum, md5elen - 1, 16))
+			goto out;
+
+		/* HARD CODED FOR MD5 */
+
+		/* compute the checksum of the message.
+		*  ptr + 2 = start of header piece of checksum
+		*  mic_hdrlen + 2 = length of header piece of checksum
+		*/
+		ret = GSS_S_DEFECTIVE_TOKEN;
+		code = make_checksum(CKSUMTYPE_RSA_MD5, ptr + 2, 
+					mic_hdrlen + 2, 
+		                        message_buffer, &md5cksum);
+
+		if (code)
+			goto out;
+
+		dprintk("RPC: spkm3_read_token: digest wire_cksum.len %d:\n", 
+			wire_cksum.len);
+		dprintk("          md5cksum.data\n");
+		print_hexl((u32 *) md5cksum.data, 16, 0);
+		dprintk("          cksum.data:\n");
+		print_hexl((u32 *) wire_cksum.data, wire_cksum.len, 0);
+
+		ret = GSS_S_BAD_SIG;
+		code = memcmp(md5cksum.data, wire_cksum.data, wire_cksum.len);
+		if (code)
+			goto out;
+
+	} else { 
+		dprintk("RPC: BAD or UNSUPPORTED SPKM3 token type: %d\n",toktype);
+		goto out;
+	}
+
+	/* XXX: need to add expiration and sequencing */
+	ret = GSS_S_COMPLETE;
+out:
+	if (md5cksum.data) 
+		kfree(md5cksum.data);
+	if (wire_cksum.data) 
+		kfree(wire_cksum.data);
+	return ret;
+}
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
new file mode 100644
index 00000000000..5c8fe3bfc49
--- /dev/null
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -0,0 +1,1080 @@
+/*
+ * Neil Brown <neilb@cse.unsw.edu.au>
+ * J. Bruce Fields <bfields@umich.edu>
+ * Andy Adamson <andros@umich.edu>
+ * Dug Song <dugsong@monkey.org>
+ *
+ * RPCSEC_GSS server authentication.
+ * This implements RPCSEC_GSS as defined in rfc2203 (rpcsec_gss) and rfc2078
+ * (gssapi)
+ *
+ * The RPCSEC_GSS involves three stages:
+ *  1/ context creation
+ *  2/ data exchange
+ *  3/ context destruction
+ *
+ * Context creation is handled largely by upcalls to user-space.
+ *  In particular, GSS_Accept_sec_context is handled by an upcall
+ * Data exchange is handled entirely within the kernel
+ *  In particular, GSS_GetMIC, GSS_VerifyMIC, GSS_Seal, GSS_Unseal are in-kernel.
+ * Context destruction is handled in-kernel
+ *  GSS_Delete_sec_context is in-kernel
+ *
+ * Context creation is initiated by a RPCSEC_GSS_INIT request arriving.
+ * The context handle and gss_token are used as a key into the rpcsec_init cache.
+ * The content of this cache includes some of the outputs of GSS_Accept_sec_context,
+ * being major_status, minor_status, context_handle, reply_token.
+ * These are sent back to the client.
+ * Sequence window management is handled by the kernel.  The window size if currently
+ * a compile time constant.
+ *
+ * When user-space is happy that a context is established, it places an entry
+ * in the rpcsec_context cache. The key for this cache is the context_handle.
+ * The content includes:
+ *   uid/gidlist - for determining access rights
+ *   mechanism type
+ *   mechanism specific information, such as a key
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/pagemap.h>
+
+#include <linux/sunrpc/auth_gss.h>
+#include <linux/sunrpc/svcauth.h>
+#include <linux/sunrpc/gss_err.h>
+#include <linux/sunrpc/svcauth.h>
+#include <linux/sunrpc/svcauth_gss.h>
+#include <linux/sunrpc/cache.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY	RPCDBG_AUTH
+#endif
+
+/* The rpcsec_init cache is used for mapping RPCSEC_GSS_{,CONT_}INIT requests
+ * into replies.
+ *
+ * Key is context handle (\x if empty) and gss_token.
+ * Content is major_status minor_status (integers) context_handle, reply_token.
+ *
+ */
+
+static int netobj_equal(struct xdr_netobj *a, struct xdr_netobj *b)
+{
+	return a->len == b->len && 0 == memcmp(a->data, b->data, a->len);
+}
+
+#define	RSI_HASHBITS	6
+#define	RSI_HASHMAX	(1<<RSI_HASHBITS)
+#define	RSI_HASHMASK	(RSI_HASHMAX-1)
+
+struct rsi {
+	struct cache_head	h;
+	struct xdr_netobj	in_handle, in_token;
+	struct xdr_netobj	out_handle, out_token;
+	int			major_status, minor_status;
+};
+
+static struct cache_head *rsi_table[RSI_HASHMAX];
+static struct cache_detail rsi_cache;
+static struct rsi *rsi_lookup(struct rsi *item, int set);
+
+static void rsi_free(struct rsi *rsii)
+{
+	kfree(rsii->in_handle.data);
+	kfree(rsii->in_token.data);
+	kfree(rsii->out_handle.data);
+	kfree(rsii->out_token.data);
+}
+
+static void rsi_put(struct cache_head *item, struct cache_detail *cd)
+{
+	struct rsi *rsii = container_of(item, struct rsi, h);
+	if (cache_put(item, cd)) {
+		rsi_free(rsii);
+		kfree(rsii);
+	}
+}
+
+static inline int rsi_hash(struct rsi *item)
+{
+	return hash_mem(item->in_handle.data, item->in_handle.len, RSI_HASHBITS)
+	     ^ hash_mem(item->in_token.data, item->in_token.len, RSI_HASHBITS);
+}
+
+static inline int rsi_match(struct rsi *item, struct rsi *tmp)
+{
+	return netobj_equal(&item->in_handle, &tmp->in_handle)
+		&& netobj_equal(&item->in_token, &tmp->in_token);
+}
+
+static int dup_to_netobj(struct xdr_netobj *dst, char *src, int len)
+{
+	dst->len = len;
+	dst->data = (len ? kmalloc(len, GFP_KERNEL) : NULL);
+	if (dst->data)
+		memcpy(dst->data, src, len);
+	if (len && !dst->data)
+		return -ENOMEM;
+	return 0;
+}
+
+static inline int dup_netobj(struct xdr_netobj *dst, struct xdr_netobj *src)
+{
+	return dup_to_netobj(dst, src->data, src->len);
+}
+
+static inline void rsi_init(struct rsi *new, struct rsi *item)
+{
+	new->out_handle.data = NULL;
+	new->out_handle.len = 0;
+	new->out_token.data = NULL;
+	new->out_token.len = 0;
+	new->in_handle.len = item->in_handle.len;
+	item->in_handle.len = 0;
+	new->in_token.len = item->in_token.len;
+	item->in_token.len = 0;
+	new->in_handle.data = item->in_handle.data;
+	item->in_handle.data = NULL;
+	new->in_token.data = item->in_token.data;
+	item->in_token.data = NULL;
+}
+
+static inline void rsi_update(struct rsi *new, struct rsi *item)
+{
+	BUG_ON(new->out_handle.data || new->out_token.data);
+	new->out_handle.len = item->out_handle.len;
+	item->out_handle.len = 0;
+	new->out_token.len = item->out_token.len;
+	item->out_token.len = 0;
+	new->out_handle.data = item->out_handle.data;
+	item->out_handle.data = NULL;
+	new->out_token.data = item->out_token.data;
+	item->out_token.data = NULL;
+
+	new->major_status = item->major_status;
+	new->minor_status = item->minor_status;
+}
+
+static void rsi_request(struct cache_detail *cd,
+                       struct cache_head *h,
+                       char **bpp, int *blen)
+{
+	struct rsi *rsii = container_of(h, struct rsi, h);
+
+	qword_addhex(bpp, blen, rsii->in_handle.data, rsii->in_handle.len);
+	qword_addhex(bpp, blen, rsii->in_token.data, rsii->in_token.len);
+	(*bpp)[-1] = '\n';
+}
+
+
+static int rsi_parse(struct cache_detail *cd,
+                    char *mesg, int mlen)
+{
+	/* context token expiry major minor context token */
+	char *buf = mesg;
+	char *ep;
+	int len;
+	struct rsi rsii, *rsip = NULL;
+	time_t expiry;
+	int status = -EINVAL;
+
+	memset(&rsii, 0, sizeof(rsii));
+	/* handle */
+	len = qword_get(&mesg, buf, mlen);
+	if (len < 0)
+		goto out;
+	status = -ENOMEM;
+	if (dup_to_netobj(&rsii.in_handle, buf, len))
+		goto out;
+
+	/* token */
+	len = qword_get(&mesg, buf, mlen);
+	status = -EINVAL;
+	if (len < 0)
+		goto out;
+	status = -ENOMEM;
+	if (dup_to_netobj(&rsii.in_token, buf, len))
+		goto out;
+
+	rsii.h.flags = 0;
+	/* expiry */
+	expiry = get_expiry(&mesg);
+	status = -EINVAL;
+	if (expiry == 0)
+		goto out;
+
+	/* major/minor */
+	len = qword_get(&mesg, buf, mlen);
+	if (len < 0)
+		goto out;
+	if (len == 0) {
+		goto out;
+	} else {
+		rsii.major_status = simple_strtoul(buf, &ep, 10);
+		if (*ep)
+			goto out;
+		len = qword_get(&mesg, buf, mlen);
+		if (len <= 0)
+			goto out;
+		rsii.minor_status = simple_strtoul(buf, &ep, 10);
+		if (*ep)
+			goto out;
+
+		/* out_handle */
+		len = qword_get(&mesg, buf, mlen);
+		if (len < 0)
+			goto out;
+		status = -ENOMEM;
+		if (dup_to_netobj(&rsii.out_handle, buf, len))
+			goto out;
+
+		/* out_token */
+		len = qword_get(&mesg, buf, mlen);
+		status = -EINVAL;
+		if (len < 0)
+			goto out;
+		status = -ENOMEM;
+		if (dup_to_netobj(&rsii.out_token, buf, len))
+			goto out;
+	}
+	rsii.h.expiry_time = expiry;
+	rsip = rsi_lookup(&rsii, 1);
+	status = 0;
+out:
+	rsi_free(&rsii);
+	if (rsip)
+		rsi_put(&rsip->h, &rsi_cache);
+	return status;
+}
+
+static struct cache_detail rsi_cache = {
+	.hash_size	= RSI_HASHMAX,
+	.hash_table     = rsi_table,
+	.name           = "auth.rpcsec.init",
+	.cache_put      = rsi_put,
+	.cache_request  = rsi_request,
+	.cache_parse    = rsi_parse,
+};
+
+static DefineSimpleCacheLookup(rsi, 0)
+
+/*
+ * The rpcsec_context cache is used to store a context that is
+ * used in data exchange.
+ * The key is a context handle. The content is:
+ *  uid, gidlist, mechanism, service-set, mech-specific-data
+ */
+
+#define	RSC_HASHBITS	10
+#define	RSC_HASHMAX	(1<<RSC_HASHBITS)
+#define	RSC_HASHMASK	(RSC_HASHMAX-1)
+
+#define GSS_SEQ_WIN	128
+
+struct gss_svc_seq_data {
+	/* highest seq number seen so far: */
+	int			sd_max;
+	/* for i such that sd_max-GSS_SEQ_WIN < i <= sd_max, the i-th bit of
+	 * sd_win is nonzero iff sequence number i has been seen already: */
+	unsigned long		sd_win[GSS_SEQ_WIN/BITS_PER_LONG];
+	spinlock_t		sd_lock;
+};
+
+struct rsc {
+	struct cache_head	h;
+	struct xdr_netobj	handle;
+	struct svc_cred		cred;
+	struct gss_svc_seq_data	seqdata;
+	struct gss_ctx		*mechctx;
+};
+
+static struct cache_head *rsc_table[RSC_HASHMAX];
+static struct cache_detail rsc_cache;
+static struct rsc *rsc_lookup(struct rsc *item, int set);
+
+static void rsc_free(struct rsc *rsci)
+{
+	kfree(rsci->handle.data);
+	if (rsci->mechctx)
+		gss_delete_sec_context(&rsci->mechctx);
+	if (rsci->cred.cr_group_info)
+		put_group_info(rsci->cred.cr_group_info);
+}
+
+static void rsc_put(struct cache_head *item, struct cache_detail *cd)
+{
+	struct rsc *rsci = container_of(item, struct rsc, h);
+
+	if (cache_put(item, cd)) {
+		rsc_free(rsci);
+		kfree(rsci);
+	}
+}
+
+static inline int
+rsc_hash(struct rsc *rsci)
+{
+	return hash_mem(rsci->handle.data, rsci->handle.len, RSC_HASHBITS);
+}
+
+static inline int
+rsc_match(struct rsc *new, struct rsc *tmp)
+{
+	return netobj_equal(&new->handle, &tmp->handle);
+}
+
+static inline void
+rsc_init(struct rsc *new, struct rsc *tmp)
+{
+	new->handle.len = tmp->handle.len;
+	tmp->handle.len = 0;
+	new->handle.data = tmp->handle.data;
+	tmp->handle.data = NULL;
+	new->mechctx = NULL;
+	new->cred.cr_group_info = NULL;
+}
+
+static inline void
+rsc_update(struct rsc *new, struct rsc *tmp)
+{
+	new->mechctx = tmp->mechctx;
+	tmp->mechctx = NULL;
+	memset(&new->seqdata, 0, sizeof(new->seqdata));
+	spin_lock_init(&new->seqdata.sd_lock);
+	new->cred = tmp->cred;
+	tmp->cred.cr_group_info = NULL;
+}
+
+static int rsc_parse(struct cache_detail *cd,
+		     char *mesg, int mlen)
+{
+	/* contexthandle expiry [ uid gid N <n gids> mechname ...mechdata... ] */
+	char *buf = mesg;
+	int len, rv;
+	struct rsc rsci, *rscp = NULL;
+	time_t expiry;
+	int status = -EINVAL;
+
+	memset(&rsci, 0, sizeof(rsci));
+	/* context handle */
+	len = qword_get(&mesg, buf, mlen);
+	if (len < 0) goto out;
+	status = -ENOMEM;
+	if (dup_to_netobj(&rsci.handle, buf, len))
+		goto out;
+
+	rsci.h.flags = 0;
+	/* expiry */
+	expiry = get_expiry(&mesg);
+	status = -EINVAL;
+	if (expiry == 0)
+		goto out;
+
+	/* uid, or NEGATIVE */
+	rv = get_int(&mesg, &rsci.cred.cr_uid);
+	if (rv == -EINVAL)
+		goto out;
+	if (rv == -ENOENT)
+		set_bit(CACHE_NEGATIVE, &rsci.h.flags);
+	else {
+		int N, i;
+		struct gss_api_mech *gm;
+
+		/* gid */
+		if (get_int(&mesg, &rsci.cred.cr_gid))
+			goto out;
+
+		/* number of additional gid's */
+		if (get_int(&mesg, &N))
+			goto out;
+		status = -ENOMEM;
+		rsci.cred.cr_group_info = groups_alloc(N);
+		if (rsci.cred.cr_group_info == NULL)
+			goto out;
+
+		/* gid's */
+		status = -EINVAL;
+		for (i=0; i<N; i++) {
+			gid_t gid;
+			if (get_int(&mesg, &gid))
+				goto out;
+			GROUP_AT(rsci.cred.cr_group_info, i) = gid;
+		}
+
+		/* mech name */
+		len = qword_get(&mesg, buf, mlen);
+		if (len < 0)
+			goto out;
+		gm = gss_mech_get_by_name(buf);
+		status = -EOPNOTSUPP;
+		if (!gm)
+			goto out;
+
+		status = -EINVAL;
+		/* mech-specific data: */
+		len = qword_get(&mesg, buf, mlen);
+		if (len < 0) {
+			gss_mech_put(gm);
+			goto out;
+		}
+		if (gss_import_sec_context(buf, len, gm, &rsci.mechctx)) {
+			gss_mech_put(gm);
+			goto out;
+		}
+		gss_mech_put(gm);
+	}
+	rsci.h.expiry_time = expiry;
+	rscp = rsc_lookup(&rsci, 1);
+	status = 0;
+out:
+	rsc_free(&rsci);
+	if (rscp)
+		rsc_put(&rscp->h, &rsc_cache);
+	return status;
+}
+
+static struct cache_detail rsc_cache = {
+	.hash_size	= RSC_HASHMAX,
+	.hash_table	= rsc_table,
+	.name		= "auth.rpcsec.context",
+	.cache_put	= rsc_put,
+	.cache_parse	= rsc_parse,
+};
+
+static DefineSimpleCacheLookup(rsc, 0);
+
+static struct rsc *
+gss_svc_searchbyctx(struct xdr_netobj *handle)
+{
+	struct rsc rsci;
+	struct rsc *found;
+
+	memset(&rsci, 0, sizeof(rsci));
+	if (dup_to_netobj(&rsci.handle, handle->data, handle->len))
+		return NULL;
+	found = rsc_lookup(&rsci, 0);
+	rsc_free(&rsci);
+	if (!found)
+		return NULL;
+	if (cache_check(&rsc_cache, &found->h, NULL))
+		return NULL;
+	return found;
+}
+
+/* Implements sequence number algorithm as specified in RFC 2203. */
+static int
+gss_check_seq_num(struct rsc *rsci, int seq_num)
+{
+	struct gss_svc_seq_data *sd = &rsci->seqdata;
+
+	spin_lock(&sd->sd_lock);
+	if (seq_num > sd->sd_max) {
+		if (seq_num >= sd->sd_max + GSS_SEQ_WIN) {
+			memset(sd->sd_win,0,sizeof(sd->sd_win));
+			sd->sd_max = seq_num;
+		} else while (sd->sd_max < seq_num) {
+			sd->sd_max++;
+			__clear_bit(sd->sd_max % GSS_SEQ_WIN, sd->sd_win);
+		}
+		__set_bit(seq_num % GSS_SEQ_WIN, sd->sd_win);
+		goto ok;
+	} else if (seq_num <= sd->sd_max - GSS_SEQ_WIN) {
+		goto drop;
+	}
+	/* sd_max - GSS_SEQ_WIN < seq_num <= sd_max */
+	if (__test_and_set_bit(seq_num % GSS_SEQ_WIN, sd->sd_win))
+		goto drop;
+ok:
+	spin_unlock(&sd->sd_lock);
+	return 1;
+drop:
+	spin_unlock(&sd->sd_lock);
+	return 0;
+}
+
+static inline u32 round_up_to_quad(u32 i)
+{
+	return (i + 3 ) & ~3;
+}
+
+static inline int
+svc_safe_getnetobj(struct kvec *argv, struct xdr_netobj *o)
+{
+	int l;
+
+	if (argv->iov_len < 4)
+		return -1;
+	o->len = ntohl(svc_getu32(argv));
+	l = round_up_to_quad(o->len);
+	if (argv->iov_len < l)
+		return -1;
+	o->data = argv->iov_base;
+	argv->iov_base += l;
+	argv->iov_len -= l;
+	return 0;
+}
+
+static inline int
+svc_safe_putnetobj(struct kvec *resv, struct xdr_netobj *o)
+{
+	u32 *p;
+
+	if (resv->iov_len + 4 > PAGE_SIZE)
+		return -1;
+	svc_putu32(resv, htonl(o->len));
+	p = resv->iov_base + resv->iov_len;
+	resv->iov_len += round_up_to_quad(o->len);
+	if (resv->iov_len > PAGE_SIZE)
+		return -1;
+	memcpy(p, o->data, o->len);
+	memset((u8 *)p + o->len, 0, round_up_to_quad(o->len) - o->len);
+	return 0;
+}
+
+/* Verify the checksum on the header and return SVC_OK on success.
+ * Otherwise, return SVC_DROP (in the case of a bad sequence number)
+ * or return SVC_DENIED and indicate error in authp.
+ */
+static int
+gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci,
+		  u32 *rpcstart, struct rpc_gss_wire_cred *gc, u32 *authp)
+{
+	struct gss_ctx		*ctx_id = rsci->mechctx;
+	struct xdr_buf		rpchdr;
+	struct xdr_netobj	checksum;
+	u32			flavor = 0;
+	struct kvec		*argv = &rqstp->rq_arg.head[0];
+	struct kvec		iov;
+
+	/* data to compute the checksum over: */
+	iov.iov_base = rpcstart;
+	iov.iov_len = (u8 *)argv->iov_base - (u8 *)rpcstart;
+	xdr_buf_from_iov(&iov, &rpchdr);
+
+	*authp = rpc_autherr_badverf;
+	if (argv->iov_len < 4)
+		return SVC_DENIED;
+	flavor = ntohl(svc_getu32(argv));
+	if (flavor != RPC_AUTH_GSS)
+		return SVC_DENIED;
+	if (svc_safe_getnetobj(argv, &checksum))
+		return SVC_DENIED;
+
+	if (rqstp->rq_deferred) /* skip verification of revisited request */
+		return SVC_OK;
+	if (gss_verify_mic(ctx_id, &rpchdr, &checksum, NULL)
+							!= GSS_S_COMPLETE) {
+		*authp = rpcsec_gsserr_credproblem;
+		return SVC_DENIED;
+	}
+
+	if (gc->gc_seq > MAXSEQ) {
+		dprintk("RPC:      svcauth_gss: discarding request with large sequence number %d\n",
+				gc->gc_seq);
+		*authp = rpcsec_gsserr_ctxproblem;
+		return SVC_DENIED;
+	}
+	if (!gss_check_seq_num(rsci, gc->gc_seq)) {
+		dprintk("RPC:      svcauth_gss: discarding request with old sequence number %d\n",
+				gc->gc_seq);
+		return SVC_DROP;
+	}
+	return SVC_OK;
+}
+
+static int
+gss_write_verf(struct svc_rqst *rqstp, struct gss_ctx *ctx_id, u32 seq)
+{
+	u32			xdr_seq;
+	u32			maj_stat;
+	struct xdr_buf		verf_data;
+	struct xdr_netobj	mic;
+	u32			*p;
+	struct kvec		iov;
+
+	svc_putu32(rqstp->rq_res.head, htonl(RPC_AUTH_GSS));
+	xdr_seq = htonl(seq);
+
+	iov.iov_base = &xdr_seq;
+	iov.iov_len = sizeof(xdr_seq);
+	xdr_buf_from_iov(&iov, &verf_data);
+	p = rqstp->rq_res.head->iov_base + rqstp->rq_res.head->iov_len;
+	mic.data = (u8 *)(p + 1);
+	maj_stat = gss_get_mic(ctx_id, 0, &verf_data, &mic);
+	if (maj_stat != GSS_S_COMPLETE)
+		return -1;
+	*p++ = htonl(mic.len);
+	memset((u8 *)p + mic.len, 0, round_up_to_quad(mic.len) - mic.len);
+	p += XDR_QUADLEN(mic.len);
+	if (!xdr_ressize_check(rqstp, p))
+		return -1;
+	return 0;
+}
+
+struct gss_domain {
+	struct auth_domain	h;
+	u32			pseudoflavor;
+};
+
+static struct auth_domain *
+find_gss_auth_domain(struct gss_ctx *ctx, u32 svc)
+{
+	char *name;
+
+	name = gss_service_to_auth_domain_name(ctx->mech_type, svc);
+	if (!name)
+		return NULL;
+	return auth_domain_find(name);
+}
+
+int
+svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name)
+{
+	struct gss_domain	*new;
+	struct auth_domain	*test;
+	int			stat = -ENOMEM;
+
+	new = kmalloc(sizeof(*new), GFP_KERNEL);
+	if (!new)
+		goto out;
+	cache_init(&new->h.h);
+	new->h.name = kmalloc(strlen(name) + 1, GFP_KERNEL);
+	if (!new->h.name)
+		goto out_free_dom;
+	strcpy(new->h.name, name);
+	new->h.flavour = RPC_AUTH_GSS;
+	new->pseudoflavor = pseudoflavor;
+	new->h.h.expiry_time = NEVER;
+
+	test = auth_domain_lookup(&new->h, 1);
+	if (test == &new->h) {
+		BUG_ON(atomic_dec_and_test(&new->h.h.refcnt));
+	} else { /* XXX Duplicate registration? */
+		auth_domain_put(&new->h);
+		goto out;
+	}
+	return 0;
+
+out_free_dom:
+	kfree(new);
+out:
+	return stat;
+}
+
+EXPORT_SYMBOL(svcauth_gss_register_pseudoflavor);
+
+static inline int
+read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj)
+{
+	u32     raw;
+	int     status;
+
+	status = read_bytes_from_xdr_buf(buf, base, &raw, sizeof(*obj));
+	if (status)
+		return status;
+	*obj = ntohl(raw);
+	return 0;
+}
+
+/* It would be nice if this bit of code could be shared with the client.
+ * Obstacles:
+ *	The client shouldn't malloc(), would have to pass in own memory.
+ *	The server uses base of head iovec as read pointer, while the
+ *	client uses separate pointer. */
+static int
+unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx)
+{
+	int stat = -EINVAL;
+	u32 integ_len, maj_stat;
+	struct xdr_netobj mic;
+	struct xdr_buf integ_buf;
+
+	integ_len = ntohl(svc_getu32(&buf->head[0]));
+	if (integ_len & 3)
+		goto out;
+	if (integ_len > buf->len)
+		goto out;
+	if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len))
+		BUG();
+	/* copy out mic... */
+	if (read_u32_from_xdr_buf(buf, integ_len, &mic.len))
+		BUG();
+	if (mic.len > RPC_MAX_AUTH_SIZE)
+		goto out;
+	mic.data = kmalloc(mic.len, GFP_KERNEL);
+	if (!mic.data)
+		goto out;
+	if (read_bytes_from_xdr_buf(buf, integ_len + 4, mic.data, mic.len))
+		goto out;
+	maj_stat = gss_verify_mic(ctx, &integ_buf, &mic, NULL);
+	if (maj_stat != GSS_S_COMPLETE)
+		goto out;
+	if (ntohl(svc_getu32(&buf->head[0])) != seq)
+		goto out;
+	stat = 0;
+out:
+	return stat;
+}
+
+struct gss_svc_data {
+	/* decoded gss client cred: */
+	struct rpc_gss_wire_cred	clcred;
+	/* pointer to the beginning of the procedure-specific results,
+	 * which may be encrypted/checksummed in svcauth_gss_release: */
+	u32				*body_start;
+	struct rsc			*rsci;
+};
+
+static int
+svcauth_gss_set_client(struct svc_rqst *rqstp)
+{
+	struct gss_svc_data *svcdata = rqstp->rq_auth_data;
+	struct rsc *rsci = svcdata->rsci;
+	struct rpc_gss_wire_cred *gc = &svcdata->clcred;
+
+	rqstp->rq_client = find_gss_auth_domain(rsci->mechctx, gc->gc_svc);
+	if (rqstp->rq_client == NULL)
+		return SVC_DENIED;
+	return SVC_OK;
+}
+
+/*
+ * Accept an rpcsec packet.
+ * If context establishment, punt to user space
+ * If data exchange, verify/decrypt
+ * If context destruction, handle here
+ * In the context establishment and destruction case we encode
+ * response here and return SVC_COMPLETE.
+ */
+static int
+svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp)
+{
+	struct kvec	*argv = &rqstp->rq_arg.head[0];
+	struct kvec	*resv = &rqstp->rq_res.head[0];
+	u32		crlen;
+	struct xdr_netobj tmpobj;
+	struct gss_svc_data *svcdata = rqstp->rq_auth_data;
+	struct rpc_gss_wire_cred *gc;
+	struct rsc	*rsci = NULL;
+	struct rsi	*rsip, rsikey;
+	u32		*rpcstart;
+	u32		*reject_stat = resv->iov_base + resv->iov_len;
+	int		ret;
+
+	dprintk("RPC:      svcauth_gss: argv->iov_len = %zd\n",argv->iov_len);
+
+	*authp = rpc_autherr_badcred;
+	if (!svcdata)
+		svcdata = kmalloc(sizeof(*svcdata), GFP_KERNEL);
+	if (!svcdata)
+		goto auth_err;
+	rqstp->rq_auth_data = svcdata;
+	svcdata->body_start = NULL;
+	svcdata->rsci = NULL;
+	gc = &svcdata->clcred;
+
+	/* start of rpc packet is 7 u32's back from here:
+	 * xid direction rpcversion prog vers proc flavour
+	 */
+	rpcstart = argv->iov_base;
+	rpcstart -= 7;
+
+	/* credential is:
+	 *   version(==1), proc(0,1,2,3), seq, service (1,2,3), handle
+	 * at least 5 u32s, and is preceeded by length, so that makes 6.
+	 */
+
+	if (argv->iov_len < 5 * 4)
+		goto auth_err;
+	crlen = ntohl(svc_getu32(argv));
+	if (ntohl(svc_getu32(argv)) != RPC_GSS_VERSION)
+		goto auth_err;
+	gc->gc_proc = ntohl(svc_getu32(argv));
+	gc->gc_seq = ntohl(svc_getu32(argv));
+	gc->gc_svc = ntohl(svc_getu32(argv));
+	if (svc_safe_getnetobj(argv, &gc->gc_ctx))
+		goto auth_err;
+	if (crlen != round_up_to_quad(gc->gc_ctx.len) + 5 * 4)
+		goto auth_err;
+
+	if ((gc->gc_proc != RPC_GSS_PROC_DATA) && (rqstp->rq_proc != 0))
+		goto auth_err;
+
+	/*
+	 * We've successfully parsed the credential. Let's check out the
+	 * verifier.  An AUTH_NULL verifier is allowed (and required) for
+	 * INIT and CONTINUE_INIT requests. AUTH_RPCSEC_GSS is required for
+	 * PROC_DATA and PROC_DESTROY.
+	 *
+	 * AUTH_NULL verifier is 0 (AUTH_NULL), 0 (length).
+	 * AUTH_RPCSEC_GSS verifier is:
+	 *   6 (AUTH_RPCSEC_GSS), length, checksum.
+	 * checksum is calculated over rpcheader from xid up to here.
+	 */
+	*authp = rpc_autherr_badverf;
+	switch (gc->gc_proc) {
+	case RPC_GSS_PROC_INIT:
+	case RPC_GSS_PROC_CONTINUE_INIT:
+		if (argv->iov_len < 2 * 4)
+			goto auth_err;
+		if (ntohl(svc_getu32(argv)) != RPC_AUTH_NULL)
+			goto auth_err;
+		if (ntohl(svc_getu32(argv)) != 0)
+			goto auth_err;
+		break;
+	case RPC_GSS_PROC_DATA:
+	case RPC_GSS_PROC_DESTROY:
+		*authp = rpcsec_gsserr_credproblem;
+		rsci = gss_svc_searchbyctx(&gc->gc_ctx);
+		if (!rsci)
+			goto auth_err;
+		switch (gss_verify_header(rqstp, rsci, rpcstart, gc, authp)) {
+		case SVC_OK:
+			break;
+		case SVC_DENIED:
+			goto auth_err;
+		case SVC_DROP:
+			goto drop;
+		}
+		break;
+	default:
+		*authp = rpc_autherr_rejectedcred;
+		goto auth_err;
+	}
+
+	/* now act upon the command: */
+	switch (gc->gc_proc) {
+	case RPC_GSS_PROC_INIT:
+	case RPC_GSS_PROC_CONTINUE_INIT:
+		*authp = rpc_autherr_badcred;
+		if (gc->gc_proc == RPC_GSS_PROC_INIT && gc->gc_ctx.len != 0)
+			goto auth_err;
+		memset(&rsikey, 0, sizeof(rsikey));
+		if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx))
+			goto drop;
+		*authp = rpc_autherr_badverf;
+		if (svc_safe_getnetobj(argv, &tmpobj)) {
+			kfree(rsikey.in_handle.data);
+			goto auth_err;
+		}
+		if (dup_netobj(&rsikey.in_token, &tmpobj)) {
+			kfree(rsikey.in_handle.data);
+			goto drop;
+		}
+
+		rsip = rsi_lookup(&rsikey, 0);
+		rsi_free(&rsikey);
+		if (!rsip) {
+			goto drop;
+		}
+		switch(cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle)) {
+		case -EAGAIN:
+			goto drop;
+		case -ENOENT:
+			goto drop;
+		case 0:
+			rsci = gss_svc_searchbyctx(&rsip->out_handle);
+			if (!rsci) {
+				goto drop;
+			}
+			if (gss_write_verf(rqstp, rsci->mechctx, GSS_SEQ_WIN))
+				goto drop;
+			if (resv->iov_len + 4 > PAGE_SIZE)
+				goto drop;
+			svc_putu32(resv, rpc_success);
+			if (svc_safe_putnetobj(resv, &rsip->out_handle))
+				goto drop;
+			if (resv->iov_len + 3 * 4 > PAGE_SIZE)
+				goto drop;
+			svc_putu32(resv, htonl(rsip->major_status));
+			svc_putu32(resv, htonl(rsip->minor_status));
+			svc_putu32(resv, htonl(GSS_SEQ_WIN));
+			if (svc_safe_putnetobj(resv, &rsip->out_token))
+				goto drop;
+			rqstp->rq_client = NULL;
+		}
+		goto complete;
+	case RPC_GSS_PROC_DESTROY:
+		set_bit(CACHE_NEGATIVE, &rsci->h.flags);
+		if (resv->iov_len + 4 > PAGE_SIZE)
+			goto drop;
+		svc_putu32(resv, rpc_success);
+		goto complete;
+	case RPC_GSS_PROC_DATA:
+		*authp = rpcsec_gsserr_ctxproblem;
+		if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq))
+			goto auth_err;
+		rqstp->rq_cred = rsci->cred;
+		get_group_info(rsci->cred.cr_group_info);
+		*authp = rpc_autherr_badcred;
+		switch (gc->gc_svc) {
+		case RPC_GSS_SVC_NONE:
+			break;
+		case RPC_GSS_SVC_INTEGRITY:
+			if (unwrap_integ_data(&rqstp->rq_arg,
+					gc->gc_seq, rsci->mechctx))
+				goto auth_err;
+			/* placeholders for length and seq. number: */
+			svcdata->body_start = resv->iov_base + resv->iov_len;
+			svc_putu32(resv, 0);
+			svc_putu32(resv, 0);
+			break;
+		case RPC_GSS_SVC_PRIVACY:
+			/* currently unsupported */
+		default:
+			goto auth_err;
+		}
+		svcdata->rsci = rsci;
+		cache_get(&rsci->h);
+		ret = SVC_OK;
+		goto out;
+	}
+auth_err:
+	/* Restore write pointer to original value: */
+	xdr_ressize_check(rqstp, reject_stat);
+	ret = SVC_DENIED;
+	goto out;
+complete:
+	ret = SVC_COMPLETE;
+	goto out;
+drop:
+	ret = SVC_DROP;
+out:
+	if (rsci)
+		rsc_put(&rsci->h, &rsc_cache);
+	return ret;
+}
+
+static int
+svcauth_gss_release(struct svc_rqst *rqstp)
+{
+	struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data;
+	struct rpc_gss_wire_cred *gc = &gsd->clcred;
+	struct xdr_buf *resbuf = &rqstp->rq_res;
+	struct xdr_buf integ_buf;
+	struct xdr_netobj mic;
+	struct kvec *resv;
+	u32 *p;
+	int integ_offset, integ_len;
+	int stat = -EINVAL;
+
+	if (gc->gc_proc != RPC_GSS_PROC_DATA)
+		goto out;
+	/* Release can be called twice, but we only wrap once. */
+	if (gsd->body_start == NULL)
+		goto out;
+	/* normally not set till svc_send, but we need it here: */
+	resbuf->len = resbuf->head[0].iov_len
+		+ resbuf->page_len + resbuf->tail[0].iov_len;
+	switch (gc->gc_svc) {
+	case RPC_GSS_SVC_NONE:
+		break;
+	case RPC_GSS_SVC_INTEGRITY:
+		p = gsd->body_start;
+		gsd->body_start = NULL;
+		/* move accept_stat to right place: */
+		memcpy(p, p + 2, 4);
+		/* don't wrap in failure case: */
+		/* Note: counting on not getting here if call was not even
+		 * accepted! */
+		if (*p != rpc_success) {
+			resbuf->head[0].iov_len -= 2 * 4;
+			goto out;
+		}
+		p++;
+		integ_offset = (u8 *)(p + 1) - (u8 *)resbuf->head[0].iov_base;
+		integ_len = resbuf->len - integ_offset;
+		BUG_ON(integ_len % 4);
+		*p++ = htonl(integ_len);
+		*p++ = htonl(gc->gc_seq);
+		if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset,
+					integ_len))
+			BUG();
+		if (resbuf->page_len == 0
+			&& resbuf->tail[0].iov_len + RPC_MAX_AUTH_SIZE
+				< PAGE_SIZE) {
+			BUG_ON(resbuf->tail[0].iov_len);
+			/* Use head for everything */
+			resv = &resbuf->head[0];
+		} else if (resbuf->tail[0].iov_base == NULL) {
+			/* copied from nfsd4_encode_read */
+			svc_take_page(rqstp);
+			resbuf->tail[0].iov_base = page_address(rqstp
+					->rq_respages[rqstp->rq_resused-1]);
+			rqstp->rq_restailpage = rqstp->rq_resused-1;
+			resbuf->tail[0].iov_len = 0;
+			resv = &resbuf->tail[0];
+		} else {
+			resv = &resbuf->tail[0];
+		}
+		mic.data = (u8 *)resv->iov_base + resv->iov_len + 4;
+		if (gss_get_mic(gsd->rsci->mechctx, 0, &integ_buf, &mic))
+			goto out_err;
+		svc_putu32(resv, htonl(mic.len));
+		memset(mic.data + mic.len, 0,
+				round_up_to_quad(mic.len) - mic.len);
+		resv->iov_len += XDR_QUADLEN(mic.len) << 2;
+		/* not strictly required: */
+		resbuf->len += XDR_QUADLEN(mic.len) << 2;
+		BUG_ON(resv->iov_len > PAGE_SIZE);
+		break;
+	case RPC_GSS_SVC_PRIVACY:
+	default:
+		goto out_err;
+	}
+
+out:
+	stat = 0;
+out_err:
+	if (rqstp->rq_client)
+		auth_domain_put(rqstp->rq_client);
+	rqstp->rq_client = NULL;
+	if (rqstp->rq_cred.cr_group_info)
+		put_group_info(rqstp->rq_cred.cr_group_info);
+	rqstp->rq_cred.cr_group_info = NULL;
+	if (gsd->rsci)
+		rsc_put(&gsd->rsci->h, &rsc_cache);
+	gsd->rsci = NULL;
+
+	return stat;
+}
+
+static void
+svcauth_gss_domain_release(struct auth_domain *dom)
+{
+	struct gss_domain *gd = container_of(dom, struct gss_domain, h);
+
+	kfree(dom->name);
+	kfree(gd);
+}
+
+static struct auth_ops svcauthops_gss = {
+	.name		= "rpcsec_gss",
+	.owner		= THIS_MODULE,
+	.flavour	= RPC_AUTH_GSS,
+	.accept		= svcauth_gss_accept,
+	.release	= svcauth_gss_release,
+	.domain_release = svcauth_gss_domain_release,
+	.set_client	= svcauth_gss_set_client,
+};
+
+int
+gss_svc_init(void)
+{
+	int rv = svc_auth_register(RPC_AUTH_GSS, &svcauthops_gss);
+	if (rv == 0) {
+		cache_register(&rsc_cache);
+		cache_register(&rsi_cache);
+	}
+	return rv;
+}
+
+void
+gss_svc_shutdown(void)
+{
+	cache_unregister(&rsc_cache);
+	cache_unregister(&rsi_cache);
+	svc_auth_unregister(RPC_AUTH_GSS);
+}
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
new file mode 100644
index 00000000000..9b72d3abf82
--- /dev/null
+++ b/net/sunrpc/auth_null.c
@@ -0,0 +1,143 @@
+/*
+ * linux/net/sunrpc/auth_null.c
+ *
+ * AUTH_NULL authentication. Really :-)
+ *
+ * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/module.h>
+#include <linux/in.h>
+#include <linux/utsname.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sched.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY	RPCDBG_AUTH
+#endif
+
+static struct rpc_auth null_auth;
+static struct rpc_cred null_cred;
+
+static struct rpc_auth *
+nul_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
+{
+	atomic_inc(&null_auth.au_count);
+	return &null_auth;
+}
+
+static void
+nul_destroy(struct rpc_auth *auth)
+{
+}
+
+/*
+ * Lookup NULL creds for current process
+ */
+static struct rpc_cred *
+nul_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
+{
+	return get_rpccred(&null_cred);
+}
+
+/*
+ * Destroy cred handle.
+ */
+static void
+nul_destroy_cred(struct rpc_cred *cred)
+{
+}
+
+/*
+ * Match cred handle against current process
+ */
+static int
+nul_match(struct auth_cred *acred, struct rpc_cred *cred, int taskflags)
+{
+	return 1;
+}
+
+/*
+ * Marshal credential.
+ */
+static u32 *
+nul_marshal(struct rpc_task *task, u32 *p)
+{
+	*p++ = htonl(RPC_AUTH_NULL);
+	*p++ = 0;
+	*p++ = htonl(RPC_AUTH_NULL);
+	*p++ = 0;
+
+	return p;
+}
+
+/*
+ * Refresh credential. This is a no-op for AUTH_NULL
+ */
+static int
+nul_refresh(struct rpc_task *task)
+{
+	task->tk_msg.rpc_cred->cr_flags |= RPCAUTH_CRED_UPTODATE;
+	return 0;
+}
+
+static u32 *
+nul_validate(struct rpc_task *task, u32 *p)
+{
+	rpc_authflavor_t	flavor;
+	u32			size;
+
+	flavor = ntohl(*p++);
+	if (flavor != RPC_AUTH_NULL) {
+		printk("RPC: bad verf flavor: %u\n", flavor);
+		return NULL;
+	}
+
+	size = ntohl(*p++);
+	if (size != 0) {
+		printk("RPC: bad verf size: %u\n", size);
+		return NULL;
+	}
+
+	return p;
+}
+
+struct rpc_authops authnull_ops = {
+	.owner		= THIS_MODULE,
+	.au_flavor	= RPC_AUTH_NULL,
+#ifdef RPC_DEBUG
+	.au_name	= "NULL",
+#endif
+	.create		= nul_create,
+	.destroy	= nul_destroy,
+	.lookup_cred	= nul_lookup_cred,
+};
+
+static
+struct rpc_auth null_auth = {
+	.au_cslack	= 4,
+	.au_rslack	= 2,
+	.au_ops		= &authnull_ops,
+};
+
+static
+struct rpc_credops	null_credops = {
+	.cr_name	= "AUTH_NULL",
+	.crdestroy	= nul_destroy_cred,
+	.crmatch	= nul_match,
+	.crmarshal	= nul_marshal,
+	.crrefresh	= nul_refresh,
+	.crvalidate	= nul_validate,
+};
+
+static
+struct rpc_cred null_cred = {
+	.cr_ops		= &null_credops,
+	.cr_count	= ATOMIC_INIT(1),
+	.cr_flags	= RPCAUTH_CRED_UPTODATE,
+#ifdef RPC_DEBUG
+	.cr_magic	= RPCAUTH_CRED_MAGIC,
+#endif
+};
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
new file mode 100644
index 00000000000..4ff297a9b15
--- /dev/null
+++ b/net/sunrpc/auth_unix.c
@@ -0,0 +1,242 @@
+/*
+ * linux/net/sunrpc/auth_unix.c
+ *
+ * UNIX-style authentication; no AUTH_SHORT support
+ *
+ * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/auth.h>
+
+#define NFS_NGROUPS	16
+
+struct unx_cred {
+	struct rpc_cred		uc_base;
+	gid_t			uc_gid;
+	gid_t			uc_gids[NFS_NGROUPS];
+};
+#define uc_uid			uc_base.cr_uid
+#define uc_count		uc_base.cr_count
+#define uc_flags		uc_base.cr_flags
+#define uc_expire		uc_base.cr_expire
+
+#define UNX_CRED_EXPIRE		(60 * HZ)
+
+#define UNX_WRITESLACK		(21 + (UNX_MAXNODENAME >> 2))
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY	RPCDBG_AUTH
+#endif
+
+static struct rpc_auth		unix_auth;
+static struct rpc_cred_cache	unix_cred_cache;
+static struct rpc_credops	unix_credops;
+
+static struct rpc_auth *
+unx_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
+{
+	dprintk("RPC: creating UNIX authenticator for client %p\n", clnt);
+	if (atomic_inc_return(&unix_auth.au_count) == 0)
+		unix_cred_cache.nextgc = jiffies + (unix_cred_cache.expire >> 1);
+	return &unix_auth;
+}
+
+static void
+unx_destroy(struct rpc_auth *auth)
+{
+	dprintk("RPC: destroying UNIX authenticator %p\n", auth);
+	rpcauth_free_credcache(auth);
+}
+
+/*
+ * Lookup AUTH_UNIX creds for current process
+ */
+static struct rpc_cred *
+unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
+{
+	return rpcauth_lookup_credcache(auth, acred, flags);
+}
+
+static struct rpc_cred *
+unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
+{
+	struct unx_cred	*cred;
+	int		i;
+
+	dprintk("RPC:      allocating UNIX cred for uid %d gid %d\n",
+				acred->uid, acred->gid);
+
+	if (!(cred = (struct unx_cred *) kmalloc(sizeof(*cred), GFP_KERNEL)))
+		return ERR_PTR(-ENOMEM);
+
+	atomic_set(&cred->uc_count, 1);
+	cred->uc_flags = RPCAUTH_CRED_UPTODATE;
+	if (flags & RPC_TASK_ROOTCREDS) {
+		cred->uc_uid = 0;
+		cred->uc_gid = 0;
+		cred->uc_gids[0] = NOGROUP;
+	} else {
+		int groups = acred->group_info->ngroups;
+		if (groups > NFS_NGROUPS)
+			groups = NFS_NGROUPS;
+
+		cred->uc_uid = acred->uid;
+		cred->uc_gid = acred->gid;
+		for (i = 0; i < groups; i++)
+			cred->uc_gids[i] = GROUP_AT(acred->group_info, i);
+		if (i < NFS_NGROUPS)
+		  cred->uc_gids[i] = NOGROUP;
+	}
+	cred->uc_base.cr_ops = &unix_credops;
+
+	return (struct rpc_cred *) cred;
+}
+
+static void
+unx_destroy_cred(struct rpc_cred *cred)
+{
+	kfree(cred);
+}
+
+/*
+ * Match credentials against current process creds.
+ * The root_override argument takes care of cases where the caller may
+ * request root creds (e.g. for NFS swapping).
+ */
+static int
+unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int taskflags)
+{
+	struct unx_cred	*cred = (struct unx_cred *) rcred;
+	int		i;
+
+	if (!(taskflags & RPC_TASK_ROOTCREDS)) {
+		int groups;
+
+		if (cred->uc_uid != acred->uid
+		 || cred->uc_gid != acred->gid)
+			return 0;
+
+		groups = acred->group_info->ngroups;
+		if (groups > NFS_NGROUPS)
+			groups = NFS_NGROUPS;
+		for (i = 0; i < groups ; i++)
+			if (cred->uc_gids[i] != GROUP_AT(acred->group_info, i))
+				return 0;
+		return 1;
+	}
+	return (cred->uc_uid == 0
+	     && cred->uc_gid == 0
+	     && cred->uc_gids[0] == (gid_t) NOGROUP);
+}
+
+/*
+ * Marshal credentials.
+ * Maybe we should keep a cached credential for performance reasons.
+ */
+static u32 *
+unx_marshal(struct rpc_task *task, u32 *p)
+{
+	struct rpc_clnt	*clnt = task->tk_client;
+	struct unx_cred	*cred = (struct unx_cred *) task->tk_msg.rpc_cred;
+	u32		*base, *hold;
+	int		i;
+
+	*p++ = htonl(RPC_AUTH_UNIX);
+	base = p++;
+	*p++ = htonl(jiffies/HZ);
+
+	/*
+	 * Copy the UTS nodename captured when the client was created.
+	 */
+	p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen);
+
+	*p++ = htonl((u32) cred->uc_uid);
+	*p++ = htonl((u32) cred->uc_gid);
+	hold = p++;
+	for (i = 0; i < 16 && cred->uc_gids[i] != (gid_t) NOGROUP; i++)
+		*p++ = htonl((u32) cred->uc_gids[i]);
+	*hold = htonl(p - hold - 1);		/* gid array length */
+	*base = htonl((p - base - 1) << 2);	/* cred length */
+
+	*p++ = htonl(RPC_AUTH_NULL);
+	*p++ = htonl(0);
+
+	return p;
+}
+
+/*
+ * Refresh credentials. This is a no-op for AUTH_UNIX
+ */
+static int
+unx_refresh(struct rpc_task *task)
+{
+	task->tk_msg.rpc_cred->cr_flags |= RPCAUTH_CRED_UPTODATE;
+	return 0;
+}
+
+static u32 *
+unx_validate(struct rpc_task *task, u32 *p)
+{
+	rpc_authflavor_t	flavor;
+	u32			size;
+
+	flavor = ntohl(*p++);
+	if (flavor != RPC_AUTH_NULL &&
+	    flavor != RPC_AUTH_UNIX &&
+	    flavor != RPC_AUTH_SHORT) {
+		printk("RPC: bad verf flavor: %u\n", flavor);
+		return NULL;
+	}
+
+	size = ntohl(*p++);
+	if (size > RPC_MAX_AUTH_SIZE) {
+		printk("RPC: giant verf size: %u\n", size);
+		return NULL;
+	}
+	task->tk_auth->au_rslack = (size >> 2) + 2;
+	p += (size >> 2);
+
+	return p;
+}
+
+struct rpc_authops	authunix_ops = {
+	.owner		= THIS_MODULE,
+	.au_flavor	= RPC_AUTH_UNIX,
+#ifdef RPC_DEBUG
+	.au_name	= "UNIX",
+#endif
+	.create		= unx_create,
+	.destroy	= unx_destroy,
+	.lookup_cred	= unx_lookup_cred,
+	.crcreate	= unx_create_cred,
+};
+
+static
+struct rpc_cred_cache	unix_cred_cache = {
+	.expire		= UNX_CRED_EXPIRE,
+};
+
+static
+struct rpc_auth		unix_auth = {
+	.au_cslack	= UNX_WRITESLACK,
+	.au_rslack	= 2,			/* assume AUTH_NULL verf */
+	.au_ops		= &authunix_ops,
+	.au_count	= ATOMIC_INIT(0),
+	.au_credcache	= &unix_cred_cache,
+};
+
+static
+struct rpc_credops	unix_credops = {
+	.cr_name	= "AUTH_UNIX",
+	.crdestroy	= unx_destroy_cred,
+	.crmatch	= unx_match,
+	.crmarshal	= unx_marshal,
+	.crrefresh	= unx_refresh,
+	.crvalidate	= unx_validate,
+};
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
new file mode 100644
index 00000000000..900f5bc7e33
--- /dev/null
+++ b/net/sunrpc/cache.c
@@ -0,0 +1,1189 @@
+/*
+ * net/sunrpc/cache.c
+ *
+ * Generic code for various authentication-related caches
+ * used by sunrpc clients and servers.
+ *
+ * Copyright (C) 2002 Neil Brown <neilb@cse.unsw.edu.au>
+ *
+ * Released under terms in GPL version 2.  See COPYING.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/slab.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kmod.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/ctype.h>
+#include <asm/uaccess.h>
+#include <linux/poll.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+#include <linux/net.h>
+#include <linux/workqueue.h>
+#include <asm/ioctls.h>
+#include <linux/sunrpc/types.h>
+#include <linux/sunrpc/cache.h>
+#include <linux/sunrpc/stats.h>
+
+#define	 RPCDBG_FACILITY RPCDBG_CACHE
+
+static void cache_defer_req(struct cache_req *req, struct cache_head *item);
+static void cache_revisit_request(struct cache_head *item);
+
+void cache_init(struct cache_head *h)
+{
+	time_t now = get_seconds();
+	h->next = NULL;
+	h->flags = 0;
+	atomic_set(&h->refcnt, 1);
+	h->expiry_time = now + CACHE_NEW_EXPIRY;
+	h->last_refresh = now;
+}
+
+
+static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h);
+/*
+ * This is the generic cache management routine for all
+ * the authentication caches.
+ * It checks the currency of a cache item and will (later)
+ * initiate an upcall to fill it if needed.
+ *
+ *
+ * Returns 0 if the cache_head can be used, or cache_puts it and returns
+ * -EAGAIN if upcall is pending,
+ * -ENOENT if cache entry was negative
+ */
+int cache_check(struct cache_detail *detail,
+		    struct cache_head *h, struct cache_req *rqstp)
+{
+	int rv;
+	long refresh_age, age;
+
+	/* First decide return status as best we can */
+	if (!test_bit(CACHE_VALID, &h->flags) ||
+	    h->expiry_time < get_seconds())
+		rv = -EAGAIN;
+	else if (detail->flush_time > h->last_refresh)
+		rv = -EAGAIN;
+	else {
+		/* entry is valid */
+		if (test_bit(CACHE_NEGATIVE, &h->flags))
+			rv = -ENOENT;
+		else rv = 0;
+	}
+
+	/* now see if we want to start an upcall */
+	refresh_age = (h->expiry_time - h->last_refresh);
+	age = get_seconds() - h->last_refresh;
+
+	if (rqstp == NULL) {
+		if (rv == -EAGAIN)
+			rv = -ENOENT;
+	} else if (rv == -EAGAIN || age > refresh_age/2) {
+		dprintk("Want update, refage=%ld, age=%ld\n", refresh_age, age);
+		if (!test_and_set_bit(CACHE_PENDING, &h->flags)) {
+			switch (cache_make_upcall(detail, h)) {
+			case -EINVAL:
+				clear_bit(CACHE_PENDING, &h->flags);
+				if (rv == -EAGAIN) {
+					set_bit(CACHE_NEGATIVE, &h->flags);
+					cache_fresh(detail, h, get_seconds()+CACHE_NEW_EXPIRY);
+					rv = -ENOENT;
+				}
+				break;
+
+			case -EAGAIN:
+				clear_bit(CACHE_PENDING, &h->flags);
+				cache_revisit_request(h);
+				break;
+			}
+		}
+	}
+
+	if (rv == -EAGAIN)
+		cache_defer_req(rqstp, h);
+
+	if (rv && h)
+		detail->cache_put(h, detail);
+	return rv;
+}
+
+static void queue_loose(struct cache_detail *detail, struct cache_head *ch);
+
+void cache_fresh(struct cache_detail *detail,
+		 struct cache_head *head, time_t expiry)
+{
+
+	head->expiry_time = expiry;
+	head->last_refresh = get_seconds();
+	if (!test_and_set_bit(CACHE_VALID, &head->flags))
+		cache_revisit_request(head);
+	if (test_and_clear_bit(CACHE_PENDING, &head->flags))
+		queue_loose(detail, head);
+}
+
+/*
+ * caches need to be periodically cleaned.
+ * For this we maintain a list of cache_detail and
+ * a current pointer into that list and into the table
+ * for that entry.
+ *
+ * Each time clean_cache is called it finds the next non-empty entry
+ * in the current table and walks the list in that entry
+ * looking for entries that can be removed.
+ *
+ * An entry gets removed if:
+ * - The expiry is before current time
+ * - The last_refresh time is before the flush_time for that cache
+ *
+ * later we might drop old entries with non-NEVER expiry if that table
+ * is getting 'full' for some definition of 'full'
+ *
+ * The question of "how often to scan a table" is an interesting one
+ * and is answered in part by the use of the "nextcheck" field in the
+ * cache_detail.
+ * When a scan of a table begins, the nextcheck field is set to a time
+ * that is well into the future.
+ * While scanning, if an expiry time is found that is earlier than the
+ * current nextcheck time, nextcheck is set to that expiry time.
+ * If the flush_time is ever set to a time earlier than the nextcheck
+ * time, the nextcheck time is then set to that flush_time.
+ *
+ * A table is then only scanned if the current time is at least
+ * the nextcheck time.
+ * 
+ */
+
+static LIST_HEAD(cache_list);
+static DEFINE_SPINLOCK(cache_list_lock);
+static struct cache_detail *current_detail;
+static int current_index;
+
+static struct file_operations cache_file_operations;
+static struct file_operations content_file_operations;
+static struct file_operations cache_flush_operations;
+
+static void do_cache_clean(void *data);
+static DECLARE_WORK(cache_cleaner, do_cache_clean, NULL);
+
+void cache_register(struct cache_detail *cd)
+{
+	cd->proc_ent = proc_mkdir(cd->name, proc_net_rpc);
+	if (cd->proc_ent) {
+		struct proc_dir_entry *p;
+		cd->proc_ent->owner = THIS_MODULE;
+		cd->channel_ent = cd->content_ent = NULL;
+		
+ 		p = create_proc_entry("flush", S_IFREG|S_IRUSR|S_IWUSR,
+ 				      cd->proc_ent);
+		cd->flush_ent =  p;
+ 		if (p) {
+ 			p->proc_fops = &cache_flush_operations;
+ 			p->owner = THIS_MODULE;
+ 			p->data = cd;
+ 		}
+ 
+		if (cd->cache_request || cd->cache_parse) {
+			p = create_proc_entry("channel", S_IFREG|S_IRUSR|S_IWUSR,
+					      cd->proc_ent);
+			cd->channel_ent = p;
+			if (p) {
+				p->proc_fops = &cache_file_operations;
+				p->owner = THIS_MODULE;
+				p->data = cd;
+			}
+		}
+ 		if (cd->cache_show) {
+ 			p = create_proc_entry("content", S_IFREG|S_IRUSR|S_IWUSR,
+ 					      cd->proc_ent);
+			cd->content_ent = p;
+ 			if (p) {
+ 				p->proc_fops = &content_file_operations;
+ 				p->owner = THIS_MODULE;
+ 				p->data = cd;
+ 			}
+ 		}
+	}
+	rwlock_init(&cd->hash_lock);
+	INIT_LIST_HEAD(&cd->queue);
+	spin_lock(&cache_list_lock);
+	cd->nextcheck = 0;
+	cd->entries = 0;
+	atomic_set(&cd->readers, 0);
+	cd->last_close = 0;
+	cd->last_warn = -1;
+	list_add(&cd->others, &cache_list);
+	spin_unlock(&cache_list_lock);
+
+	/* start the cleaning process */
+	schedule_work(&cache_cleaner);
+}
+
+int cache_unregister(struct cache_detail *cd)
+{
+	cache_purge(cd);
+	spin_lock(&cache_list_lock);
+	write_lock(&cd->hash_lock);
+	if (cd->entries || atomic_read(&cd->inuse)) {
+		write_unlock(&cd->hash_lock);
+		spin_unlock(&cache_list_lock);
+		return -EBUSY;
+	}
+	if (current_detail == cd)
+		current_detail = NULL;
+	list_del_init(&cd->others);
+	write_unlock(&cd->hash_lock);
+	spin_unlock(&cache_list_lock);
+	if (cd->proc_ent) {
+		if (cd->flush_ent)
+			remove_proc_entry("flush", cd->proc_ent);
+		if (cd->channel_ent)
+			remove_proc_entry("channel", cd->proc_ent);
+		if (cd->content_ent)
+			remove_proc_entry("content", cd->proc_ent);
+
+		cd->proc_ent = NULL;
+		remove_proc_entry(cd->name, proc_net_rpc);
+	}
+	if (list_empty(&cache_list)) {
+		/* module must be being unloaded so its safe to kill the worker */
+		cancel_delayed_work(&cache_cleaner);
+		flush_scheduled_work();
+	}
+	return 0;
+}
+
+/* clean cache tries to find something to clean
+ * and cleans it.
+ * It returns 1 if it cleaned something,
+ *            0 if it didn't find anything this time
+ *           -1 if it fell off the end of the list.
+ */
+static int cache_clean(void)
+{
+	int rv = 0;
+	struct list_head *next;
+
+	spin_lock(&cache_list_lock);
+
+	/* find a suitable table if we don't already have one */
+	while (current_detail == NULL ||
+	    current_index >= current_detail->hash_size) {
+		if (current_detail)
+			next = current_detail->others.next;
+		else
+			next = cache_list.next;
+		if (next == &cache_list) {
+			current_detail = NULL;
+			spin_unlock(&cache_list_lock);
+			return -1;
+		}
+		current_detail = list_entry(next, struct cache_detail, others);
+		if (current_detail->nextcheck > get_seconds())
+			current_index = current_detail->hash_size;
+		else {
+			current_index = 0;
+			current_detail->nextcheck = get_seconds()+30*60;
+		}
+	}
+
+	/* find a non-empty bucket in the table */
+	while (current_detail &&
+	       current_index < current_detail->hash_size &&
+	       current_detail->hash_table[current_index] == NULL)
+		current_index++;
+
+	/* find a cleanable entry in the bucket and clean it, or set to next bucket */
+	
+	if (current_detail && current_index < current_detail->hash_size) {
+		struct cache_head *ch, **cp;
+		struct cache_detail *d;
+		
+		write_lock(&current_detail->hash_lock);
+
+		/* Ok, now to clean this strand */
+			
+		cp = & current_detail->hash_table[current_index];
+		ch = *cp;
+		for (; ch; cp= & ch->next, ch= *cp) {
+			if (current_detail->nextcheck > ch->expiry_time)
+				current_detail->nextcheck = ch->expiry_time+1;
+			if (ch->expiry_time >= get_seconds()
+			    && ch->last_refresh >= current_detail->flush_time
+				)
+				continue;
+			if (test_and_clear_bit(CACHE_PENDING, &ch->flags))
+				queue_loose(current_detail, ch);
+
+			if (atomic_read(&ch->refcnt) == 1)
+				break;
+		}
+		if (ch) {
+			*cp = ch->next;
+			ch->next = NULL;
+			current_detail->entries--;
+			rv = 1;
+		}
+		write_unlock(&current_detail->hash_lock);
+		d = current_detail;
+		if (!ch)
+			current_index ++;
+		spin_unlock(&cache_list_lock);
+		if (ch)
+			d->cache_put(ch, d);
+	} else
+		spin_unlock(&cache_list_lock);
+
+	return rv;
+}
+
+/*
+ * We want to regularly clean the cache, so we need to schedule some work ...
+ */
+static void do_cache_clean(void *data)
+{
+	int delay = 5;
+	if (cache_clean() == -1)
+		delay = 30*HZ;
+
+	if (list_empty(&cache_list))
+		delay = 0;
+
+	if (delay)
+		schedule_delayed_work(&cache_cleaner, delay);
+}
+
+
+/* 
+ * Clean all caches promptly.  This just calls cache_clean
+ * repeatedly until we are sure that every cache has had a chance to 
+ * be fully cleaned
+ */
+void cache_flush(void)
+{
+	while (cache_clean() != -1)
+		cond_resched();
+	while (cache_clean() != -1)
+		cond_resched();
+}
+
+void cache_purge(struct cache_detail *detail)
+{
+	detail->flush_time = LONG_MAX;
+	detail->nextcheck = get_seconds();
+	cache_flush();
+	detail->flush_time = 1;
+}
+
+
+
+/*
+ * Deferral and Revisiting of Requests.
+ *
+ * If a cache lookup finds a pending entry, we
+ * need to defer the request and revisit it later.
+ * All deferred requests are stored in a hash table,
+ * indexed by "struct cache_head *".
+ * As it may be wasteful to store a whole request
+ * structure, we allow the request to provide a 
+ * deferred form, which must contain a
+ * 'struct cache_deferred_req'
+ * This cache_deferred_req contains a method to allow
+ * it to be revisited when cache info is available
+ */
+
+#define	DFR_HASHSIZE	(PAGE_SIZE/sizeof(struct list_head))
+#define	DFR_HASH(item)	((((long)item)>>4 ^ (((long)item)>>13)) % DFR_HASHSIZE)
+
+#define	DFR_MAX	300	/* ??? */
+
+static DEFINE_SPINLOCK(cache_defer_lock);
+static LIST_HEAD(cache_defer_list);
+static struct list_head cache_defer_hash[DFR_HASHSIZE];
+static int cache_defer_cnt;
+
+static void cache_defer_req(struct cache_req *req, struct cache_head *item)
+{
+	struct cache_deferred_req *dreq;
+	int hash = DFR_HASH(item);
+
+	dreq = req->defer(req);
+	if (dreq == NULL)
+		return;
+
+	dreq->item = item;
+	dreq->recv_time = get_seconds();
+
+	spin_lock(&cache_defer_lock);
+
+	list_add(&dreq->recent, &cache_defer_list);
+
+	if (cache_defer_hash[hash].next == NULL)
+		INIT_LIST_HEAD(&cache_defer_hash[hash]);
+	list_add(&dreq->hash, &cache_defer_hash[hash]);
+
+	/* it is in, now maybe clean up */
+	dreq = NULL;
+	if (++cache_defer_cnt > DFR_MAX) {
+		/* too much in the cache, randomly drop
+		 * first or last
+		 */
+		if (net_random()&1) 
+			dreq = list_entry(cache_defer_list.next,
+					  struct cache_deferred_req,
+					  recent);
+		else
+			dreq = list_entry(cache_defer_list.prev,
+					  struct cache_deferred_req,
+					  recent);
+		list_del(&dreq->recent);
+		list_del(&dreq->hash);
+		cache_defer_cnt--;
+	}
+	spin_unlock(&cache_defer_lock);
+
+	if (dreq) {
+		/* there was one too many */
+		dreq->revisit(dreq, 1);
+	}
+	if (test_bit(CACHE_VALID, &item->flags)) {
+		/* must have just been validated... */
+		cache_revisit_request(item);
+	}
+}
+
+static void cache_revisit_request(struct cache_head *item)
+{
+	struct cache_deferred_req *dreq;
+	struct list_head pending;
+
+	struct list_head *lp;
+	int hash = DFR_HASH(item);
+
+	INIT_LIST_HEAD(&pending);
+	spin_lock(&cache_defer_lock);
+	
+	lp = cache_defer_hash[hash].next;
+	if (lp) {
+		while (lp != &cache_defer_hash[hash]) {
+			dreq = list_entry(lp, struct cache_deferred_req, hash);
+			lp = lp->next;
+			if (dreq->item == item) {
+				list_del(&dreq->hash);
+				list_move(&dreq->recent, &pending);
+				cache_defer_cnt--;
+			}
+		}
+	}
+	spin_unlock(&cache_defer_lock);
+
+	while (!list_empty(&pending)) {
+		dreq = list_entry(pending.next, struct cache_deferred_req, recent);
+		list_del_init(&dreq->recent);
+		dreq->revisit(dreq, 0);
+	}
+}
+
+void cache_clean_deferred(void *owner)
+{
+	struct cache_deferred_req *dreq, *tmp;
+	struct list_head pending;
+
+
+	INIT_LIST_HEAD(&pending);
+	spin_lock(&cache_defer_lock);
+	
+	list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) {
+		if (dreq->owner == owner) {
+			list_del(&dreq->hash);
+			list_move(&dreq->recent, &pending);
+			cache_defer_cnt--;
+		}
+	}
+	spin_unlock(&cache_defer_lock);
+
+	while (!list_empty(&pending)) {
+		dreq = list_entry(pending.next, struct cache_deferred_req, recent);
+		list_del_init(&dreq->recent);
+		dreq->revisit(dreq, 1);
+	}
+}
+
+/*
+ * communicate with user-space
+ *
+ * We have a magic /proc file - /proc/sunrpc/cache
+ * On read, you get a full request, or block
+ * On write, an update request is processed
+ * Poll works if anything to read, and always allows write
+ *
+ * Implemented by linked list of requests.  Each open file has 
+ * a ->private that also exists in this list.  New request are added
+ * to the end and may wakeup and preceding readers.
+ * New readers are added to the head.  If, on read, an item is found with
+ * CACHE_UPCALLING clear, we free it from the list.
+ *
+ */
+
+static DEFINE_SPINLOCK(queue_lock);
+static DECLARE_MUTEX(queue_io_sem);
+
+struct cache_queue {
+	struct list_head	list;
+	int			reader;	/* if 0, then request */
+};
+struct cache_request {
+	struct cache_queue	q;
+	struct cache_head	*item;
+	char			* buf;
+	int			len;
+	int			readers;
+};
+struct cache_reader {
+	struct cache_queue	q;
+	int			offset;	/* if non-0, we have a refcnt on next request */
+};
+
+static ssize_t
+cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
+{
+	struct cache_reader *rp = filp->private_data;
+	struct cache_request *rq;
+	struct cache_detail *cd = PDE(filp->f_dentry->d_inode)->data;
+	int err;
+
+	if (count == 0)
+		return 0;
+
+	down(&queue_io_sem); /* protect against multiple concurrent
+			      * readers on this file */
+ again:
+	spin_lock(&queue_lock);
+	/* need to find next request */
+	while (rp->q.list.next != &cd->queue &&
+	       list_entry(rp->q.list.next, struct cache_queue, list)
+	       ->reader) {
+		struct list_head *next = rp->q.list.next;
+		list_move(&rp->q.list, next);
+	}
+	if (rp->q.list.next == &cd->queue) {
+		spin_unlock(&queue_lock);
+		up(&queue_io_sem);
+		if (rp->offset)
+			BUG();
+		return 0;
+	}
+	rq = container_of(rp->q.list.next, struct cache_request, q.list);
+	if (rq->q.reader) BUG();
+	if (rp->offset == 0)
+		rq->readers++;
+	spin_unlock(&queue_lock);
+
+	if (rp->offset == 0 && !test_bit(CACHE_PENDING, &rq->item->flags)) {
+		err = -EAGAIN;
+		spin_lock(&queue_lock);
+		list_move(&rp->q.list, &rq->q.list);
+		spin_unlock(&queue_lock);
+	} else {
+		if (rp->offset + count > rq->len)
+			count = rq->len - rp->offset;
+		err = -EFAULT;
+		if (copy_to_user(buf, rq->buf + rp->offset, count))
+			goto out;
+		rp->offset += count;
+		if (rp->offset >= rq->len) {
+			rp->offset = 0;
+			spin_lock(&queue_lock);
+			list_move(&rp->q.list, &rq->q.list);
+			spin_unlock(&queue_lock);
+		}
+		err = 0;
+	}
+ out:
+	if (rp->offset == 0) {
+		/* need to release rq */
+		spin_lock(&queue_lock);
+		rq->readers--;
+		if (rq->readers == 0 &&
+		    !test_bit(CACHE_PENDING, &rq->item->flags)) {
+			list_del(&rq->q.list);
+			spin_unlock(&queue_lock);
+			cd->cache_put(rq->item, cd);
+			kfree(rq->buf);
+			kfree(rq);
+		} else
+			spin_unlock(&queue_lock);
+	}
+	if (err == -EAGAIN)
+		goto again;
+	up(&queue_io_sem);
+	return err ? err :  count;
+}
+
+static char write_buf[8192]; /* protected by queue_io_sem */
+
+static ssize_t
+cache_write(struct file *filp, const char __user *buf, size_t count,
+	    loff_t *ppos)
+{
+	int err;
+	struct cache_detail *cd = PDE(filp->f_dentry->d_inode)->data;
+
+	if (count == 0)
+		return 0;
+	if (count >= sizeof(write_buf))
+		return -EINVAL;
+
+	down(&queue_io_sem);
+
+	if (copy_from_user(write_buf, buf, count)) {
+		up(&queue_io_sem);
+		return -EFAULT;
+	}
+	write_buf[count] = '\0';
+	if (cd->cache_parse)
+		err = cd->cache_parse(cd, write_buf, count);
+	else
+		err = -EINVAL;
+
+	up(&queue_io_sem);
+	return err ? err : count;
+}
+
+static DECLARE_WAIT_QUEUE_HEAD(queue_wait);
+
+static unsigned int
+cache_poll(struct file *filp, poll_table *wait)
+{
+	unsigned int mask;
+	struct cache_reader *rp = filp->private_data;
+	struct cache_queue *cq;
+	struct cache_detail *cd = PDE(filp->f_dentry->d_inode)->data;
+
+	poll_wait(filp, &queue_wait, wait);
+
+	/* alway allow write */
+	mask = POLL_OUT | POLLWRNORM;
+
+	if (!rp)
+		return mask;
+
+	spin_lock(&queue_lock);
+
+	for (cq= &rp->q; &cq->list != &cd->queue;
+	     cq = list_entry(cq->list.next, struct cache_queue, list))
+		if (!cq->reader) {
+			mask |= POLLIN | POLLRDNORM;
+			break;
+		}
+	spin_unlock(&queue_lock);
+	return mask;
+}
+
+static int
+cache_ioctl(struct inode *ino, struct file *filp,
+	    unsigned int cmd, unsigned long arg)
+{
+	int len = 0;
+	struct cache_reader *rp = filp->private_data;
+	struct cache_queue *cq;
+	struct cache_detail *cd = PDE(ino)->data;
+
+	if (cmd != FIONREAD || !rp)
+		return -EINVAL;
+
+	spin_lock(&queue_lock);
+
+	/* only find the length remaining in current request,
+	 * or the length of the next request
+	 */
+	for (cq= &rp->q; &cq->list != &cd->queue;
+	     cq = list_entry(cq->list.next, struct cache_queue, list))
+		if (!cq->reader) {
+			struct cache_request *cr =
+				container_of(cq, struct cache_request, q);
+			len = cr->len - rp->offset;
+			break;
+		}
+	spin_unlock(&queue_lock);
+
+	return put_user(len, (int __user *)arg);
+}
+
+static int
+cache_open(struct inode *inode, struct file *filp)
+{
+	struct cache_reader *rp = NULL;
+
+	nonseekable_open(inode, filp);
+	if (filp->f_mode & FMODE_READ) {
+		struct cache_detail *cd = PDE(inode)->data;
+
+		rp = kmalloc(sizeof(*rp), GFP_KERNEL);
+		if (!rp)
+			return -ENOMEM;
+		rp->offset = 0;
+		rp->q.reader = 1;
+		atomic_inc(&cd->readers);
+		spin_lock(&queue_lock);
+		list_add(&rp->q.list, &cd->queue);
+		spin_unlock(&queue_lock);
+	}
+	filp->private_data = rp;
+	return 0;
+}
+
+static int
+cache_release(struct inode *inode, struct file *filp)
+{
+	struct cache_reader *rp = filp->private_data;
+	struct cache_detail *cd = PDE(inode)->data;
+
+	if (rp) {
+		spin_lock(&queue_lock);
+		if (rp->offset) {
+			struct cache_queue *cq;
+			for (cq= &rp->q; &cq->list != &cd->queue;
+			     cq = list_entry(cq->list.next, struct cache_queue, list))
+				if (!cq->reader) {
+					container_of(cq, struct cache_request, q)
+						->readers--;
+					break;
+				}
+			rp->offset = 0;
+		}
+		list_del(&rp->q.list);
+		spin_unlock(&queue_lock);
+
+		filp->private_data = NULL;
+		kfree(rp);
+
+		cd->last_close = get_seconds();
+		atomic_dec(&cd->readers);
+	}
+	return 0;
+}
+
+
+
+static struct file_operations cache_file_operations = {
+	.owner		= THIS_MODULE,
+	.llseek		= no_llseek,
+	.read		= cache_read,
+	.write		= cache_write,
+	.poll		= cache_poll,
+	.ioctl		= cache_ioctl, /* for FIONREAD */
+	.open		= cache_open,
+	.release	= cache_release,
+};
+
+
+static void queue_loose(struct cache_detail *detail, struct cache_head *ch)
+{
+	struct cache_queue *cq;
+	spin_lock(&queue_lock);
+	list_for_each_entry(cq, &detail->queue, list)
+		if (!cq->reader) {
+			struct cache_request *cr = container_of(cq, struct cache_request, q);
+			if (cr->item != ch)
+				continue;
+			if (cr->readers != 0)
+				break;
+			list_del(&cr->q.list);
+			spin_unlock(&queue_lock);
+			detail->cache_put(cr->item, detail);
+			kfree(cr->buf);
+			kfree(cr);
+			return;
+		}
+	spin_unlock(&queue_lock);
+}
+
+/*
+ * Support routines for text-based upcalls.
+ * Fields are separated by spaces.
+ * Fields are either mangled to quote space tab newline slosh with slosh
+ * or a hexified with a leading \x
+ * Record is terminated with newline.
+ *
+ */
+
+void qword_add(char **bpp, int *lp, char *str)
+{
+	char *bp = *bpp;
+	int len = *lp;
+	char c;
+
+	if (len < 0) return;
+
+	while ((c=*str++) && len)
+		switch(c) {
+		case ' ':
+		case '\t':
+		case '\n':
+		case '\\':
+			if (len >= 4) {
+				*bp++ = '\\';
+				*bp++ = '0' + ((c & 0300)>>6);
+				*bp++ = '0' + ((c & 0070)>>3);
+				*bp++ = '0' + ((c & 0007)>>0);
+			}
+			len -= 4;
+			break;
+		default:
+			*bp++ = c;
+			len--;
+		}
+	if (c || len <1) len = -1;
+	else {
+		*bp++ = ' ';
+		len--;
+	}
+	*bpp = bp;
+	*lp = len;
+}
+
+void qword_addhex(char **bpp, int *lp, char *buf, int blen)
+{
+	char *bp = *bpp;
+	int len = *lp;
+
+	if (len < 0) return;
+
+	if (len > 2) {
+		*bp++ = '\\';
+		*bp++ = 'x';
+		len -= 2;
+		while (blen && len >= 2) {
+			unsigned char c = *buf++;
+			*bp++ = '0' + ((c&0xf0)>>4) + (c>=0xa0)*('a'-'9'-1);
+			*bp++ = '0' + (c&0x0f) + ((c&0x0f)>=0x0a)*('a'-'9'-1);
+			len -= 2;
+			blen--;
+		}
+	}
+	if (blen || len<1) len = -1;
+	else {
+		*bp++ = ' ';
+		len--;
+	}
+	*bpp = bp;
+	*lp = len;
+}
+
+static void warn_no_listener(struct cache_detail *detail)
+{
+	if (detail->last_warn != detail->last_close) {
+		detail->last_warn = detail->last_close;
+		if (detail->warn_no_listener)
+			detail->warn_no_listener(detail);
+	}
+}
+
+/*
+ * register an upcall request to user-space.
+ * Each request is at most one page long.
+ */
+static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h)
+{
+
+	char *buf;
+	struct cache_request *crq;
+	char *bp;
+	int len;
+
+	if (detail->cache_request == NULL)
+		return -EINVAL;
+
+	if (atomic_read(&detail->readers) == 0 &&
+	    detail->last_close < get_seconds() - 30) {
+			warn_no_listener(detail);
+			return -EINVAL;
+	}
+
+	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!buf)
+		return -EAGAIN;
+
+	crq = kmalloc(sizeof (*crq), GFP_KERNEL);
+	if (!crq) {
+		kfree(buf);
+		return -EAGAIN;
+	}
+
+	bp = buf; len = PAGE_SIZE;
+
+	detail->cache_request(detail, h, &bp, &len);
+
+	if (len < 0) {
+		kfree(buf);
+		kfree(crq);
+		return -EAGAIN;
+	}
+	crq->q.reader = 0;
+	crq->item = cache_get(h);
+	crq->buf = buf;
+	crq->len = PAGE_SIZE - len;
+	crq->readers = 0;
+	spin_lock(&queue_lock);
+	list_add_tail(&crq->q.list, &detail->queue);
+	spin_unlock(&queue_lock);
+	wake_up(&queue_wait);
+	return 0;
+}
+
+/*
+ * parse a message from user-space and pass it
+ * to an appropriate cache
+ * Messages are, like requests, separated into fields by
+ * spaces and dequotes as \xHEXSTRING or embedded \nnn octal
+ *
+ * Message is 
+ *   reply cachename expiry key ... content....
+ *
+ * key and content are both parsed by cache 
+ */
+
+#define isodigit(c) (isdigit(c) && c <= '7')
+int qword_get(char **bpp, char *dest, int bufsize)
+{
+	/* return bytes copied, or -1 on error */
+	char *bp = *bpp;
+	int len = 0;
+
+	while (*bp == ' ') bp++;
+
+	if (bp[0] == '\\' && bp[1] == 'x') {
+		/* HEX STRING */
+		bp += 2;
+		while (isxdigit(bp[0]) && isxdigit(bp[1]) && len < bufsize) {
+			int byte = isdigit(*bp) ? *bp-'0' : toupper(*bp)-'A'+10;
+			bp++;
+			byte <<= 4;
+			byte |= isdigit(*bp) ? *bp-'0' : toupper(*bp)-'A'+10;
+			*dest++ = byte;
+			bp++;
+			len++;
+		}
+	} else {
+		/* text with \nnn octal quoting */
+		while (*bp != ' ' && *bp != '\n' && *bp && len < bufsize-1) {
+			if (*bp == '\\' &&
+			    isodigit(bp[1]) && (bp[1] <= '3') &&
+			    isodigit(bp[2]) &&
+			    isodigit(bp[3])) {
+				int byte = (*++bp -'0');
+				bp++;
+				byte = (byte << 3) | (*bp++ - '0');
+				byte = (byte << 3) | (*bp++ - '0');
+				*dest++ = byte;
+				len++;
+			} else {
+				*dest++ = *bp++;
+				len++;
+			}
+		}
+	}
+
+	if (*bp != ' ' && *bp != '\n' && *bp != '\0')
+		return -1;
+	while (*bp == ' ') bp++;
+	*bpp = bp;
+	*dest = '\0';
+	return len;
+}
+
+
+/*
+ * support /proc/sunrpc/cache/$CACHENAME/content
+ * as a seqfile.
+ * We call ->cache_show passing NULL for the item to
+ * get a header, then pass each real item in the cache
+ */
+
+struct handle {
+	struct cache_detail *cd;
+};
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+	loff_t n = *pos;
+	unsigned hash, entry;
+	struct cache_head *ch;
+	struct cache_detail *cd = ((struct handle*)m->private)->cd;
+	
+
+	read_lock(&cd->hash_lock);
+	if (!n--)
+		return SEQ_START_TOKEN;
+	hash = n >> 32;
+	entry = n & ((1LL<<32) - 1);
+
+	for (ch=cd->hash_table[hash]; ch; ch=ch->next)
+		if (!entry--)
+			return ch;
+	n &= ~((1LL<<32) - 1);
+	do {
+		hash++;
+		n += 1LL<<32;
+	} while(hash < cd->hash_size && 
+		cd->hash_table[hash]==NULL);
+	if (hash >= cd->hash_size)
+		return NULL;
+	*pos = n+1;
+	return cd->hash_table[hash];
+}
+
+static void *c_next(struct seq_file *m, void *p, loff_t *pos)
+{
+	struct cache_head *ch = p;
+	int hash = (*pos >> 32);
+	struct cache_detail *cd = ((struct handle*)m->private)->cd;
+
+	if (p == SEQ_START_TOKEN)
+		hash = 0;
+	else if (ch->next == NULL) {
+		hash++;
+		*pos += 1LL<<32;
+	} else {
+		++*pos;
+		return ch->next;
+	}
+	*pos &= ~((1LL<<32) - 1);
+	while (hash < cd->hash_size &&
+	       cd->hash_table[hash] == NULL) {
+		hash++;
+		*pos += 1LL<<32;
+	}
+	if (hash >= cd->hash_size)
+		return NULL;
+	++*pos;
+	return cd->hash_table[hash];
+}
+
+static void c_stop(struct seq_file *m, void *p)
+{
+	struct cache_detail *cd = ((struct handle*)m->private)->cd;
+	read_unlock(&cd->hash_lock);
+}
+
+static int c_show(struct seq_file *m, void *p)
+{
+	struct cache_head *cp = p;
+	struct cache_detail *cd = ((struct handle*)m->private)->cd;
+
+	if (p == SEQ_START_TOKEN)
+		return cd->cache_show(m, cd, NULL);
+
+	ifdebug(CACHE)
+		seq_printf(m, "# expiry=%ld refcnt=%d\n",
+			   cp->expiry_time, atomic_read(&cp->refcnt));
+	cache_get(cp);
+	if (cache_check(cd, cp, NULL))
+		/* cache_check does a cache_put on failure */
+		seq_printf(m, "# ");
+	else
+		cache_put(cp, cd);
+
+	return cd->cache_show(m, cd, cp);
+}
+
+static struct seq_operations cache_content_op = {
+	.start	= c_start,
+	.next	= c_next,
+	.stop	= c_stop,
+	.show	= c_show,
+};
+
+static int content_open(struct inode *inode, struct file *file)
+{
+	int res;
+	struct handle *han;
+	struct cache_detail *cd = PDE(inode)->data;
+
+	han = kmalloc(sizeof(*han), GFP_KERNEL);
+	if (han == NULL)
+		return -ENOMEM;
+
+	han->cd = cd;
+
+	res = seq_open(file, &cache_content_op);
+	if (res)
+		kfree(han);
+	else
+		((struct seq_file *)file->private_data)->private = han;
+
+	return res;
+}
+static int content_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *m = (struct seq_file *)file->private_data;
+	struct handle *han = m->private;
+	kfree(han);
+	m->private = NULL;
+	return seq_release(inode, file);
+}
+
+static struct file_operations content_file_operations = {
+	.open		= content_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= content_release,
+};
+
+static ssize_t read_flush(struct file *file, char __user *buf,
+			    size_t count, loff_t *ppos)
+{
+	struct cache_detail *cd = PDE(file->f_dentry->d_inode)->data;
+	char tbuf[20];
+	unsigned long p = *ppos;
+	int len;
+
+	sprintf(tbuf, "%lu\n", cd->flush_time);
+	len = strlen(tbuf);
+	if (p >= len)
+		return 0;
+	len -= p;
+	if (len > count) len = count;
+	if (copy_to_user(buf, (void*)(tbuf+p), len))
+		len = -EFAULT;
+	else
+		*ppos += len;
+	return len;
+}
+
+static ssize_t write_flush(struct file * file, const char __user * buf,
+			     size_t count, loff_t *ppos)
+{
+	struct cache_detail *cd = PDE(file->f_dentry->d_inode)->data;
+	char tbuf[20];
+	char *ep;
+	long flushtime;
+	if (*ppos || count > sizeof(tbuf)-1)
+		return -EINVAL;
+	if (copy_from_user(tbuf, buf, count))
+		return -EFAULT;
+	tbuf[count] = 0;
+	flushtime = simple_strtoul(tbuf, &ep, 0);
+	if (*ep && *ep != '\n')
+		return -EINVAL;
+
+	cd->flush_time = flushtime;
+	cd->nextcheck = get_seconds();
+	cache_flush();
+
+	*ppos += count;
+	return count;
+}
+
+static struct file_operations cache_flush_operations = {
+	.open		= nonseekable_open,
+	.read		= read_flush,
+	.write		= write_flush,
+};
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
new file mode 100644
index 00000000000..02bc029d46f
--- /dev/null
+++ b/net/sunrpc/clnt.c
@@ -0,0 +1,1085 @@
+/*
+ *  linux/net/sunrpc/rpcclnt.c
+ *
+ *  This file contains the high-level RPC interface.
+ *  It is modeled as a finite state machine to support both synchronous
+ *  and asynchronous requests.
+ *
+ *  -	RPC header generation and argument serialization.
+ *  -	Credential refresh.
+ *  -	TCP connect handling.
+ *  -	Retry of operation when it is suspected the operation failed because
+ *	of uid squashing on the server, or when the credentials were stale
+ *	and need to be refreshed, or when a packet was damaged in transit.
+ *	This may be have to be moved to the VFS layer.
+ *
+ *  NB: BSD uses a more intelligent approach to guessing when a request
+ *  or reply has been lost by keeping the RTO estimate for each procedure.
+ *  We currently make do with a constant timeout value.
+ *
+ *  Copyright (C) 1992,1993 Rick Sladkey <jrs@world.std.com>
+ *  Copyright (C) 1995,1996 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <asm/system.h>
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/in.h>
+#include <linux/utsname.h>
+
+#include <linux/sunrpc/clnt.h>
+#include <linux/workqueue.h>
+#include <linux/sunrpc/rpc_pipe_fs.h>
+
+#include <linux/nfs.h>
+
+
+#define RPC_SLACK_SPACE		(1024)	/* total overkill */
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY	RPCDBG_CALL
+#endif
+
+static DECLARE_WAIT_QUEUE_HEAD(destroy_wait);
+
+
+static void	call_start(struct rpc_task *task);
+static void	call_reserve(struct rpc_task *task);
+static void	call_reserveresult(struct rpc_task *task);
+static void	call_allocate(struct rpc_task *task);
+static void	call_encode(struct rpc_task *task);
+static void	call_decode(struct rpc_task *task);
+static void	call_bind(struct rpc_task *task);
+static void	call_transmit(struct rpc_task *task);
+static void	call_status(struct rpc_task *task);
+static void	call_refresh(struct rpc_task *task);
+static void	call_refreshresult(struct rpc_task *task);
+static void	call_timeout(struct rpc_task *task);
+static void	call_connect(struct rpc_task *task);
+static void	call_connect_status(struct rpc_task *task);
+static u32 *	call_header(struct rpc_task *task);
+static u32 *	call_verify(struct rpc_task *task);
+
+
+static int
+rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name)
+{
+	static uint32_t clntid;
+	int error;
+
+	if (dir_name == NULL)
+		return 0;
+	for (;;) {
+		snprintf(clnt->cl_pathname, sizeof(clnt->cl_pathname),
+				"%s/clnt%x", dir_name,
+				(unsigned int)clntid++);
+		clnt->cl_pathname[sizeof(clnt->cl_pathname) - 1] = '\0';
+		clnt->cl_dentry = rpc_mkdir(clnt->cl_pathname, clnt);
+		if (!IS_ERR(clnt->cl_dentry))
+			return 0;
+		error = PTR_ERR(clnt->cl_dentry);
+		if (error != -EEXIST) {
+			printk(KERN_INFO "RPC: Couldn't create pipefs entry %s, error %d\n",
+					clnt->cl_pathname, error);
+			return error;
+		}
+	}
+}
+
+/*
+ * Create an RPC client
+ * FIXME: This should also take a flags argument (as in task->tk_flags).
+ * It's called (among others) from pmap_create_client, which may in
+ * turn be called by an async task. In this case, rpciod should not be
+ * made to sleep too long.
+ */
+struct rpc_clnt *
+rpc_create_client(struct rpc_xprt *xprt, char *servname,
+		  struct rpc_program *program, u32 vers,
+		  rpc_authflavor_t flavor)
+{
+	struct rpc_version	*version;
+	struct rpc_clnt		*clnt = NULL;
+	int err;
+	int len;
+
+	dprintk("RPC: creating %s client for %s (xprt %p)\n",
+		program->name, servname, xprt);
+
+	err = -EINVAL;
+	if (!xprt)
+		goto out_err;
+	if (vers >= program->nrvers || !(version = program->version[vers]))
+		goto out_err;
+
+	err = -ENOMEM;
+	clnt = (struct rpc_clnt *) kmalloc(sizeof(*clnt), GFP_KERNEL);
+	if (!clnt)
+		goto out_err;
+	memset(clnt, 0, sizeof(*clnt));
+	atomic_set(&clnt->cl_users, 0);
+	atomic_set(&clnt->cl_count, 1);
+	clnt->cl_parent = clnt;
+
+	clnt->cl_server = clnt->cl_inline_name;
+	len = strlen(servname) + 1;
+	if (len > sizeof(clnt->cl_inline_name)) {
+		char *buf = kmalloc(len, GFP_KERNEL);
+		if (buf != 0)
+			clnt->cl_server = buf;
+		else
+			len = sizeof(clnt->cl_inline_name);
+	}
+	strlcpy(clnt->cl_server, servname, len);
+
+	clnt->cl_xprt     = xprt;
+	clnt->cl_procinfo = version->procs;
+	clnt->cl_maxproc  = version->nrprocs;
+	clnt->cl_protname = program->name;
+	clnt->cl_pmap	  = &clnt->cl_pmap_default;
+	clnt->cl_port     = xprt->addr.sin_port;
+	clnt->cl_prog     = program->number;
+	clnt->cl_vers     = version->number;
+	clnt->cl_prot     = xprt->prot;
+	clnt->cl_stats    = program->stats;
+	rpc_init_wait_queue(&clnt->cl_pmap_default.pm_bindwait, "bindwait");
+
+	if (!clnt->cl_port)
+		clnt->cl_autobind = 1;
+
+	clnt->cl_rtt = &clnt->cl_rtt_default;
+	rpc_init_rtt(&clnt->cl_rtt_default, xprt->timeout.to_initval);
+
+	err = rpc_setup_pipedir(clnt, program->pipe_dir_name);
+	if (err < 0)
+		goto out_no_path;
+
+	err = -ENOMEM;
+	if (!rpcauth_create(flavor, clnt)) {
+		printk(KERN_INFO "RPC: Couldn't create auth handle (flavor %u)\n",
+				flavor);
+		goto out_no_auth;
+	}
+
+	/* save the nodename */
+	clnt->cl_nodelen = strlen(system_utsname.nodename);
+	if (clnt->cl_nodelen > UNX_MAXNODENAME)
+		clnt->cl_nodelen = UNX_MAXNODENAME;
+	memcpy(clnt->cl_nodename, system_utsname.nodename, clnt->cl_nodelen);
+	return clnt;
+
+out_no_auth:
+	rpc_rmdir(clnt->cl_pathname);
+out_no_path:
+	if (clnt->cl_server != clnt->cl_inline_name)
+		kfree(clnt->cl_server);
+	kfree(clnt);
+out_err:
+	return ERR_PTR(err);
+}
+
+/*
+ * This function clones the RPC client structure. It allows us to share the
+ * same transport while varying parameters such as the authentication
+ * flavour.
+ */
+struct rpc_clnt *
+rpc_clone_client(struct rpc_clnt *clnt)
+{
+	struct rpc_clnt *new;
+
+	new = (struct rpc_clnt *)kmalloc(sizeof(*new), GFP_KERNEL);
+	if (!new)
+		goto out_no_clnt;
+	memcpy(new, clnt, sizeof(*new));
+	atomic_set(&new->cl_count, 1);
+	atomic_set(&new->cl_users, 0);
+	new->cl_parent = clnt;
+	atomic_inc(&clnt->cl_count);
+	/* Duplicate portmapper */
+	rpc_init_wait_queue(&new->cl_pmap_default.pm_bindwait, "bindwait");
+	/* Turn off autobind on clones */
+	new->cl_autobind = 0;
+	new->cl_oneshot = 0;
+	new->cl_dead = 0;
+	rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval);
+	if (new->cl_auth)
+		atomic_inc(&new->cl_auth->au_count);
+	return new;
+out_no_clnt:
+	printk(KERN_INFO "RPC: out of memory in %s\n", __FUNCTION__);
+	return ERR_PTR(-ENOMEM);
+}
+
+/*
+ * Properly shut down an RPC client, terminating all outstanding
+ * requests. Note that we must be certain that cl_oneshot and
+ * cl_dead are cleared, or else the client would be destroyed
+ * when the last task releases it.
+ */
+int
+rpc_shutdown_client(struct rpc_clnt *clnt)
+{
+	dprintk("RPC: shutting down %s client for %s, tasks=%d\n",
+			clnt->cl_protname, clnt->cl_server,
+			atomic_read(&clnt->cl_users));
+
+	while (atomic_read(&clnt->cl_users) > 0) {
+		/* Don't let rpc_release_client destroy us */
+		clnt->cl_oneshot = 0;
+		clnt->cl_dead = 0;
+		rpc_killall_tasks(clnt);
+		sleep_on_timeout(&destroy_wait, 1*HZ);
+	}
+
+	if (atomic_read(&clnt->cl_users) < 0) {
+		printk(KERN_ERR "RPC: rpc_shutdown_client clnt %p tasks=%d\n",
+				clnt, atomic_read(&clnt->cl_users));
+#ifdef RPC_DEBUG
+		rpc_show_tasks();
+#endif
+		BUG();
+	}
+
+	return rpc_destroy_client(clnt);
+}
+
+/*
+ * Delete an RPC client
+ */
+int
+rpc_destroy_client(struct rpc_clnt *clnt)
+{
+	if (!atomic_dec_and_test(&clnt->cl_count))
+		return 1;
+	BUG_ON(atomic_read(&clnt->cl_users) != 0);
+
+	dprintk("RPC: destroying %s client for %s\n",
+			clnt->cl_protname, clnt->cl_server);
+	if (clnt->cl_auth) {
+		rpcauth_destroy(clnt->cl_auth);
+		clnt->cl_auth = NULL;
+	}
+	if (clnt->cl_parent != clnt) {
+		rpc_destroy_client(clnt->cl_parent);
+		goto out_free;
+	}
+	if (clnt->cl_pathname[0])
+		rpc_rmdir(clnt->cl_pathname);
+	if (clnt->cl_xprt) {
+		xprt_destroy(clnt->cl_xprt);
+		clnt->cl_xprt = NULL;
+	}
+	if (clnt->cl_server != clnt->cl_inline_name)
+		kfree(clnt->cl_server);
+out_free:
+	kfree(clnt);
+	return 0;
+}
+
+/*
+ * Release an RPC client
+ */
+void
+rpc_release_client(struct rpc_clnt *clnt)
+{
+	dprintk("RPC:      rpc_release_client(%p, %d)\n",
+				clnt, atomic_read(&clnt->cl_users));
+
+	if (!atomic_dec_and_test(&clnt->cl_users))
+		return;
+	wake_up(&destroy_wait);
+	if (clnt->cl_oneshot || clnt->cl_dead)
+		rpc_destroy_client(clnt);
+}
+
+/*
+ * Default callback for async RPC calls
+ */
+static void
+rpc_default_callback(struct rpc_task *task)
+{
+}
+
+/*
+ *	Export the signal mask handling for aysnchronous code that
+ *	sleeps on RPC calls
+ */
+ 
+void rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset)
+{
+	unsigned long	sigallow = sigmask(SIGKILL);
+	unsigned long	irqflags;
+	
+	/* Turn off various signals */
+	if (clnt->cl_intr) {
+		struct k_sigaction *action = current->sighand->action;
+		if (action[SIGINT-1].sa.sa_handler == SIG_DFL)
+			sigallow |= sigmask(SIGINT);
+		if (action[SIGQUIT-1].sa.sa_handler == SIG_DFL)
+			sigallow |= sigmask(SIGQUIT);
+	}
+	spin_lock_irqsave(&current->sighand->siglock, irqflags);
+	*oldset = current->blocked;
+	siginitsetinv(&current->blocked, sigallow & ~oldset->sig[0]);
+	recalc_sigpending();
+	spin_unlock_irqrestore(&current->sighand->siglock, irqflags);
+}
+
+void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset)
+{
+	unsigned long	irqflags;
+	
+	spin_lock_irqsave(&current->sighand->siglock, irqflags);
+	current->blocked = *oldset;
+	recalc_sigpending();
+	spin_unlock_irqrestore(&current->sighand->siglock, irqflags);
+}
+
+/*
+ * New rpc_call implementation
+ */
+int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
+{
+	struct rpc_task	*task;
+	sigset_t	oldset;
+	int		status;
+
+	/* If this client is slain all further I/O fails */
+	if (clnt->cl_dead) 
+		return -EIO;
+
+	BUG_ON(flags & RPC_TASK_ASYNC);
+
+	rpc_clnt_sigmask(clnt, &oldset);		
+
+	status = -ENOMEM;
+	task = rpc_new_task(clnt, NULL, flags);
+	if (task == NULL)
+		goto out;
+
+	rpc_call_setup(task, msg, 0);
+
+	/* Set up the call info struct and execute the task */
+	if (task->tk_status == 0)
+		status = rpc_execute(task);
+	else {
+		status = task->tk_status;
+		rpc_release_task(task);
+	}
+
+out:
+	rpc_clnt_sigunmask(clnt, &oldset);		
+
+	return status;
+}
+
+/*
+ * New rpc_call implementation
+ */
+int
+rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
+	       rpc_action callback, void *data)
+{
+	struct rpc_task	*task;
+	sigset_t	oldset;
+	int		status;
+
+	/* If this client is slain all further I/O fails */
+	if (clnt->cl_dead) 
+		return -EIO;
+
+	flags |= RPC_TASK_ASYNC;
+
+	rpc_clnt_sigmask(clnt, &oldset);		
+
+	/* Create/initialize a new RPC task */
+	if (!callback)
+		callback = rpc_default_callback;
+	status = -ENOMEM;
+	if (!(task = rpc_new_task(clnt, callback, flags)))
+		goto out;
+	task->tk_calldata = data;
+
+	rpc_call_setup(task, msg, 0);
+
+	/* Set up the call info struct and execute the task */
+	status = task->tk_status;
+	if (status == 0)
+		rpc_execute(task);
+	else
+		rpc_release_task(task);
+
+out:
+	rpc_clnt_sigunmask(clnt, &oldset);		
+
+	return status;
+}
+
+
+void
+rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags)
+{
+	task->tk_msg   = *msg;
+	task->tk_flags |= flags;
+	/* Bind the user cred */
+	if (task->tk_msg.rpc_cred != NULL)
+		rpcauth_holdcred(task);
+	else
+		rpcauth_bindcred(task);
+
+	if (task->tk_status == 0)
+		task->tk_action = call_start;
+	else
+		task->tk_action = NULL;
+}
+
+void
+rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize)
+{
+	struct rpc_xprt *xprt = clnt->cl_xprt;
+
+	xprt->sndsize = 0;
+	if (sndsize)
+		xprt->sndsize = sndsize + RPC_SLACK_SPACE;
+	xprt->rcvsize = 0;
+	if (rcvsize)
+		xprt->rcvsize = rcvsize + RPC_SLACK_SPACE;
+	if (xprt_connected(xprt))
+		xprt_sock_setbufsize(xprt);
+}
+
+/*
+ * Return size of largest payload RPC client can support, in bytes
+ *
+ * For stream transports, this is one RPC record fragment (see RFC
+ * 1831), as we don't support multi-record requests yet.  For datagram
+ * transports, this is the size of an IP packet minus the IP, UDP, and
+ * RPC header sizes.
+ */
+size_t rpc_max_payload(struct rpc_clnt *clnt)
+{
+	return clnt->cl_xprt->max_payload;
+}
+EXPORT_SYMBOL(rpc_max_payload);
+
+/*
+ * Restart an (async) RPC call. Usually called from within the
+ * exit handler.
+ */
+void
+rpc_restart_call(struct rpc_task *task)
+{
+	if (RPC_ASSASSINATED(task))
+		return;
+
+	task->tk_action = call_start;
+}
+
+/*
+ * 0.  Initial state
+ *
+ *     Other FSM states can be visited zero or more times, but
+ *     this state is visited exactly once for each RPC.
+ */
+static void
+call_start(struct rpc_task *task)
+{
+	struct rpc_clnt	*clnt = task->tk_client;
+
+	dprintk("RPC: %4d call_start %s%d proc %d (%s)\n", task->tk_pid,
+		clnt->cl_protname, clnt->cl_vers, task->tk_msg.rpc_proc->p_proc,
+		(RPC_IS_ASYNC(task) ? "async" : "sync"));
+
+	/* Increment call count */
+	task->tk_msg.rpc_proc->p_count++;
+	clnt->cl_stats->rpccnt++;
+	task->tk_action = call_reserve;
+}
+
+/*
+ * 1.	Reserve an RPC call slot
+ */
+static void
+call_reserve(struct rpc_task *task)
+{
+	dprintk("RPC: %4d call_reserve\n", task->tk_pid);
+
+	if (!rpcauth_uptodatecred(task)) {
+		task->tk_action = call_refresh;
+		return;
+	}
+
+	task->tk_status  = 0;
+	task->tk_action  = call_reserveresult;
+	xprt_reserve(task);
+}
+
+/*
+ * 1b.	Grok the result of xprt_reserve()
+ */
+static void
+call_reserveresult(struct rpc_task *task)
+{
+	int status = task->tk_status;
+
+	dprintk("RPC: %4d call_reserveresult (status %d)\n",
+				task->tk_pid, task->tk_status);
+
+	/*
+	 * After a call to xprt_reserve(), we must have either
+	 * a request slot or else an error status.
+	 */
+	task->tk_status = 0;
+	if (status >= 0) {
+		if (task->tk_rqstp) {
+			task->tk_action = call_allocate;
+			return;
+		}
+
+		printk(KERN_ERR "%s: status=%d, but no request slot, exiting\n",
+				__FUNCTION__, status);
+		rpc_exit(task, -EIO);
+		return;
+	}
+
+	/*
+	 * Even though there was an error, we may have acquired
+	 * a request slot somehow.  Make sure not to leak it.
+	 */
+	if (task->tk_rqstp) {
+		printk(KERN_ERR "%s: status=%d, request allocated anyway\n",
+				__FUNCTION__, status);
+		xprt_release(task);
+	}
+
+	switch (status) {
+	case -EAGAIN:	/* woken up; retry */
+		task->tk_action = call_reserve;
+		return;
+	case -EIO:	/* probably a shutdown */
+		break;
+	default:
+		printk(KERN_ERR "%s: unrecognized error %d, exiting\n",
+				__FUNCTION__, status);
+		break;
+	}
+	rpc_exit(task, status);
+}
+
+/*
+ * 2.	Allocate the buffer. For details, see sched.c:rpc_malloc.
+ *	(Note: buffer memory is freed in rpc_task_release).
+ */
+static void
+call_allocate(struct rpc_task *task)
+{
+	unsigned int	bufsiz;
+
+	dprintk("RPC: %4d call_allocate (status %d)\n", 
+				task->tk_pid, task->tk_status);
+	task->tk_action = call_bind;
+	if (task->tk_buffer)
+		return;
+
+	/* FIXME: compute buffer requirements more exactly using
+	 * auth->au_wslack */
+	bufsiz = task->tk_msg.rpc_proc->p_bufsiz + RPC_SLACK_SPACE;
+
+	if (rpc_malloc(task, bufsiz << 1) != NULL)
+		return;
+	printk(KERN_INFO "RPC: buffer allocation failed for task %p\n", task); 
+
+	if (RPC_IS_ASYNC(task) || !(task->tk_client->cl_intr && signalled())) {
+		xprt_release(task);
+		task->tk_action = call_reserve;
+		rpc_delay(task, HZ>>4);
+		return;
+	}
+
+	rpc_exit(task, -ERESTARTSYS);
+}
+
+/*
+ * 3.	Encode arguments of an RPC call
+ */
+static void
+call_encode(struct rpc_task *task)
+{
+	struct rpc_clnt	*clnt = task->tk_client;
+	struct rpc_rqst	*req = task->tk_rqstp;
+	struct xdr_buf *sndbuf = &req->rq_snd_buf;
+	struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
+	unsigned int	bufsiz;
+	kxdrproc_t	encode;
+	int		status;
+	u32		*p;
+
+	dprintk("RPC: %4d call_encode (status %d)\n", 
+				task->tk_pid, task->tk_status);
+
+	/* Default buffer setup */
+	bufsiz = task->tk_bufsize >> 1;
+	sndbuf->head[0].iov_base = (void *)task->tk_buffer;
+	sndbuf->head[0].iov_len  = bufsiz;
+	sndbuf->tail[0].iov_len  = 0;
+	sndbuf->page_len	 = 0;
+	sndbuf->len		 = 0;
+	sndbuf->buflen		 = bufsiz;
+	rcvbuf->head[0].iov_base = (void *)((char *)task->tk_buffer + bufsiz);
+	rcvbuf->head[0].iov_len  = bufsiz;
+	rcvbuf->tail[0].iov_len  = 0;
+	rcvbuf->page_len	 = 0;
+	rcvbuf->len		 = 0;
+	rcvbuf->buflen		 = bufsiz;
+
+	/* Encode header and provided arguments */
+	encode = task->tk_msg.rpc_proc->p_encode;
+	if (!(p = call_header(task))) {
+		printk(KERN_INFO "RPC: call_header failed, exit EIO\n");
+		rpc_exit(task, -EIO);
+		return;
+	}
+	if (encode && (status = rpcauth_wrap_req(task, encode, req, p,
+						 task->tk_msg.rpc_argp)) < 0) {
+		printk(KERN_WARNING "%s: can't encode arguments: %d\n",
+				clnt->cl_protname, -status);
+		rpc_exit(task, status);
+	}
+}
+
+/*
+ * 4.	Get the server port number if not yet set
+ */
+static void
+call_bind(struct rpc_task *task)
+{
+	struct rpc_clnt	*clnt = task->tk_client;
+	struct rpc_xprt *xprt = clnt->cl_xprt;
+
+	dprintk("RPC: %4d call_bind xprt %p %s connected\n", task->tk_pid,
+			xprt, (xprt_connected(xprt) ? "is" : "is not"));
+
+	task->tk_action = (xprt_connected(xprt)) ? call_transmit : call_connect;
+
+	if (!clnt->cl_port) {
+		task->tk_action = call_connect;
+		task->tk_timeout = RPC_CONNECT_TIMEOUT;
+		rpc_getport(task, clnt);
+	}
+}
+
+/*
+ * 4a.	Connect to the RPC server (TCP case)
+ */
+static void
+call_connect(struct rpc_task *task)
+{
+	struct rpc_clnt *clnt = task->tk_client;
+
+	dprintk("RPC: %4d call_connect status %d\n",
+				task->tk_pid, task->tk_status);
+
+	if (xprt_connected(clnt->cl_xprt)) {
+		task->tk_action = call_transmit;
+		return;
+	}
+	task->tk_action = call_connect_status;
+	if (task->tk_status < 0)
+		return;
+	xprt_connect(task);
+}
+
+/*
+ * 4b. Sort out connect result
+ */
+static void
+call_connect_status(struct rpc_task *task)
+{
+	struct rpc_clnt *clnt = task->tk_client;
+	int status = task->tk_status;
+
+	task->tk_status = 0;
+	if (status >= 0) {
+		clnt->cl_stats->netreconn++;
+		task->tk_action = call_transmit;
+		return;
+	}
+
+	/* Something failed: we may have to rebind */
+	if (clnt->cl_autobind)
+		clnt->cl_port = 0;
+	switch (status) {
+	case -ENOTCONN:
+	case -ETIMEDOUT:
+	case -EAGAIN:
+		task->tk_action = (clnt->cl_port == 0) ? call_bind : call_connect;
+		break;
+	default:
+		rpc_exit(task, -EIO);
+	}
+}
+
+/*
+ * 5.	Transmit the RPC request, and wait for reply
+ */
+static void
+call_transmit(struct rpc_task *task)
+{
+	dprintk("RPC: %4d call_transmit (status %d)\n", 
+				task->tk_pid, task->tk_status);
+
+	task->tk_action = call_status;
+	if (task->tk_status < 0)
+		return;
+	task->tk_status = xprt_prepare_transmit(task);
+	if (task->tk_status != 0)
+		return;
+	/* Encode here so that rpcsec_gss can use correct sequence number. */
+	if (!task->tk_rqstp->rq_bytes_sent)
+		call_encode(task);
+	if (task->tk_status < 0)
+		return;
+	xprt_transmit(task);
+	if (task->tk_status < 0)
+		return;
+	if (!task->tk_msg.rpc_proc->p_decode) {
+		task->tk_action = NULL;
+		rpc_wake_up_task(task);
+	}
+}
+
+/*
+ * 6.	Sort out the RPC call status
+ */
+static void
+call_status(struct rpc_task *task)
+{
+	struct rpc_clnt	*clnt = task->tk_client;
+	struct rpc_rqst	*req = task->tk_rqstp;
+	int		status;
+
+	if (req->rq_received > 0 && !req->rq_bytes_sent)
+		task->tk_status = req->rq_received;
+
+	dprintk("RPC: %4d call_status (status %d)\n", 
+				task->tk_pid, task->tk_status);
+
+	status = task->tk_status;
+	if (status >= 0) {
+		task->tk_action = call_decode;
+		return;
+	}
+
+	task->tk_status = 0;
+	switch(status) {
+	case -ETIMEDOUT:
+		task->tk_action = call_timeout;
+		break;
+	case -ECONNREFUSED:
+	case -ENOTCONN:
+		req->rq_bytes_sent = 0;
+		if (clnt->cl_autobind)
+			clnt->cl_port = 0;
+		task->tk_action = call_bind;
+		break;
+	case -EAGAIN:
+		task->tk_action = call_transmit;
+		break;
+	case -EIO:
+		/* shutdown or soft timeout */
+		rpc_exit(task, status);
+		break;
+	default:
+		if (clnt->cl_chatty)
+			printk("%s: RPC call returned error %d\n",
+			       clnt->cl_protname, -status);
+		rpc_exit(task, status);
+		break;
+	}
+}
+
+/*
+ * 6a.	Handle RPC timeout
+ * 	We do not release the request slot, so we keep using the
+ *	same XID for all retransmits.
+ */
+static void
+call_timeout(struct rpc_task *task)
+{
+	struct rpc_clnt	*clnt = task->tk_client;
+
+	if (xprt_adjust_timeout(task->tk_rqstp) == 0) {
+		dprintk("RPC: %4d call_timeout (minor)\n", task->tk_pid);
+		goto retry;
+	}
+
+	dprintk("RPC: %4d call_timeout (major)\n", task->tk_pid);
+	if (RPC_IS_SOFT(task)) {
+		if (clnt->cl_chatty)
+			printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
+				clnt->cl_protname, clnt->cl_server);
+		rpc_exit(task, -EIO);
+		return;
+	}
+
+	if (clnt->cl_chatty && !(task->tk_flags & RPC_CALL_MAJORSEEN)) {
+		task->tk_flags |= RPC_CALL_MAJORSEEN;
+		printk(KERN_NOTICE "%s: server %s not responding, still trying\n",
+			clnt->cl_protname, clnt->cl_server);
+	}
+	if (clnt->cl_autobind)
+		clnt->cl_port = 0;
+
+retry:
+	clnt->cl_stats->rpcretrans++;
+	task->tk_action = call_bind;
+	task->tk_status = 0;
+}
+
+/*
+ * 7.	Decode the RPC reply
+ */
+static void
+call_decode(struct rpc_task *task)
+{
+	struct rpc_clnt	*clnt = task->tk_client;
+	struct rpc_rqst	*req = task->tk_rqstp;
+	kxdrproc_t	decode = task->tk_msg.rpc_proc->p_decode;
+	u32		*p;
+
+	dprintk("RPC: %4d call_decode (status %d)\n", 
+				task->tk_pid, task->tk_status);
+
+	if (clnt->cl_chatty && (task->tk_flags & RPC_CALL_MAJORSEEN)) {
+		printk(KERN_NOTICE "%s: server %s OK\n",
+			clnt->cl_protname, clnt->cl_server);
+		task->tk_flags &= ~RPC_CALL_MAJORSEEN;
+	}
+
+	if (task->tk_status < 12) {
+		if (!RPC_IS_SOFT(task)) {
+			task->tk_action = call_bind;
+			clnt->cl_stats->rpcretrans++;
+			goto out_retry;
+		}
+		printk(KERN_WARNING "%s: too small RPC reply size (%d bytes)\n",
+			clnt->cl_protname, task->tk_status);
+		rpc_exit(task, -EIO);
+		return;
+	}
+
+	req->rq_rcv_buf.len = req->rq_private_buf.len;
+
+	/* Check that the softirq receive buffer is valid */
+	WARN_ON(memcmp(&req->rq_rcv_buf, &req->rq_private_buf,
+				sizeof(req->rq_rcv_buf)) != 0);
+
+	/* Verify the RPC header */
+	if (!(p = call_verify(task))) {
+		if (task->tk_action == NULL)
+			return;
+		goto out_retry;
+	}
+
+	task->tk_action = NULL;
+
+	if (decode)
+		task->tk_status = rpcauth_unwrap_resp(task, decode, req, p,
+						      task->tk_msg.rpc_resp);
+	dprintk("RPC: %4d call_decode result %d\n", task->tk_pid,
+					task->tk_status);
+	return;
+out_retry:
+	req->rq_received = req->rq_private_buf.len = 0;
+	task->tk_status = 0;
+}
+
+/*
+ * 8.	Refresh the credentials if rejected by the server
+ */
+static void
+call_refresh(struct rpc_task *task)
+{
+	dprintk("RPC: %4d call_refresh\n", task->tk_pid);
+
+	xprt_release(task);	/* Must do to obtain new XID */
+	task->tk_action = call_refreshresult;
+	task->tk_status = 0;
+	task->tk_client->cl_stats->rpcauthrefresh++;
+	rpcauth_refreshcred(task);
+}
+
+/*
+ * 8a.	Process the results of a credential refresh
+ */
+static void
+call_refreshresult(struct rpc_task *task)
+{
+	int status = task->tk_status;
+	dprintk("RPC: %4d call_refreshresult (status %d)\n", 
+				task->tk_pid, task->tk_status);
+
+	task->tk_status = 0;
+	task->tk_action = call_reserve;
+	if (status >= 0 && rpcauth_uptodatecred(task))
+		return;
+	if (status == -EACCES) {
+		rpc_exit(task, -EACCES);
+		return;
+	}
+	task->tk_action = call_refresh;
+	if (status != -ETIMEDOUT)
+		rpc_delay(task, 3*HZ);
+	return;
+}
+
+/*
+ * Call header serialization
+ */
+static u32 *
+call_header(struct rpc_task *task)
+{
+	struct rpc_clnt *clnt = task->tk_client;
+	struct rpc_xprt *xprt = clnt->cl_xprt;
+	struct rpc_rqst	*req = task->tk_rqstp;
+	u32		*p = req->rq_svec[0].iov_base;
+
+	/* FIXME: check buffer size? */
+	if (xprt->stream)
+		*p++ = 0;		/* fill in later */
+	*p++ = req->rq_xid;		/* XID */
+	*p++ = htonl(RPC_CALL);		/* CALL */
+	*p++ = htonl(RPC_VERSION);	/* RPC version */
+	*p++ = htonl(clnt->cl_prog);	/* program number */
+	*p++ = htonl(clnt->cl_vers);	/* program version */
+	*p++ = htonl(task->tk_msg.rpc_proc->p_proc);	/* procedure */
+	return rpcauth_marshcred(task, p);
+}
+
+/*
+ * Reply header verification
+ */
+static u32 *
+call_verify(struct rpc_task *task)
+{
+	struct kvec *iov = &task->tk_rqstp->rq_rcv_buf.head[0];
+	int len = task->tk_rqstp->rq_rcv_buf.len >> 2;
+	u32	*p = iov->iov_base, n;
+	int error = -EACCES;
+
+	if ((len -= 3) < 0)
+		goto out_overflow;
+	p += 1;	/* skip XID */
+
+	if ((n = ntohl(*p++)) != RPC_REPLY) {
+		printk(KERN_WARNING "call_verify: not an RPC reply: %x\n", n);
+		goto out_retry;
+	}
+	if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) {
+		if (--len < 0)
+			goto out_overflow;
+		switch ((n = ntohl(*p++))) {
+			case RPC_AUTH_ERROR:
+				break;
+			case RPC_MISMATCH:
+				printk(KERN_WARNING "%s: RPC call version mismatch!\n", __FUNCTION__);
+				goto out_eio;
+			default:
+				printk(KERN_WARNING "%s: RPC call rejected, unknown error: %x\n", __FUNCTION__, n);
+				goto out_eio;
+		}
+		if (--len < 0)
+			goto out_overflow;
+		switch ((n = ntohl(*p++))) {
+		case RPC_AUTH_REJECTEDCRED:
+		case RPC_AUTH_REJECTEDVERF:
+		case RPCSEC_GSS_CREDPROBLEM:
+		case RPCSEC_GSS_CTXPROBLEM:
+			if (!task->tk_cred_retry)
+				break;
+			task->tk_cred_retry--;
+			dprintk("RPC: %4d call_verify: retry stale creds\n",
+							task->tk_pid);
+			rpcauth_invalcred(task);
+			task->tk_action = call_refresh;
+			return NULL;
+		case RPC_AUTH_BADCRED:
+		case RPC_AUTH_BADVERF:
+			/* possibly garbled cred/verf? */
+			if (!task->tk_garb_retry)
+				break;
+			task->tk_garb_retry--;
+			dprintk("RPC: %4d call_verify: retry garbled creds\n",
+							task->tk_pid);
+			task->tk_action = call_bind;
+			return NULL;
+		case RPC_AUTH_TOOWEAK:
+			printk(KERN_NOTICE "call_verify: server requires stronger "
+			       "authentication.\n");
+			break;
+		default:
+			printk(KERN_WARNING "call_verify: unknown auth error: %x\n", n);
+			error = -EIO;
+		}
+		dprintk("RPC: %4d call_verify: call rejected %d\n",
+						task->tk_pid, n);
+		goto out_err;
+	}
+	if (!(p = rpcauth_checkverf(task, p))) {
+		printk(KERN_WARNING "call_verify: auth check failed\n");
+		goto out_retry;		/* bad verifier, retry */
+	}
+	len = p - (u32 *)iov->iov_base - 1;
+	if (len < 0)
+		goto out_overflow;
+	switch ((n = ntohl(*p++))) {
+	case RPC_SUCCESS:
+		return p;
+	case RPC_PROG_UNAVAIL:
+		printk(KERN_WARNING "RPC: call_verify: program %u is unsupported by server %s\n",
+				(unsigned int)task->tk_client->cl_prog,
+				task->tk_client->cl_server);
+		goto out_eio;
+	case RPC_PROG_MISMATCH:
+		printk(KERN_WARNING "RPC: call_verify: program %u, version %u unsupported by server %s\n",
+				(unsigned int)task->tk_client->cl_prog,
+				(unsigned int)task->tk_client->cl_vers,
+				task->tk_client->cl_server);
+		goto out_eio;
+	case RPC_PROC_UNAVAIL:
+		printk(KERN_WARNING "RPC: call_verify: proc %p unsupported by program %u, version %u on server %s\n",
+				task->tk_msg.rpc_proc,
+				task->tk_client->cl_prog,
+				task->tk_client->cl_vers,
+				task->tk_client->cl_server);
+		goto out_eio;
+	case RPC_GARBAGE_ARGS:
+		dprintk("RPC: %4d %s: server saw garbage\n", task->tk_pid, __FUNCTION__);
+		break;			/* retry */
+	default:
+		printk(KERN_WARNING "call_verify: server accept status: %x\n", n);
+		/* Also retry */
+	}
+
+out_retry:
+	task->tk_client->cl_stats->rpcgarbage++;
+	if (task->tk_garb_retry) {
+		task->tk_garb_retry--;
+		dprintk(KERN_WARNING "RPC %s: retrying %4d\n", __FUNCTION__, task->tk_pid);
+		task->tk_action = call_bind;
+		return NULL;
+	}
+	printk(KERN_WARNING "RPC %s: retry failed, exit EIO\n", __FUNCTION__);
+out_eio:
+	error = -EIO;
+out_err:
+	rpc_exit(task, error);
+	return NULL;
+out_overflow:
+	printk(KERN_WARNING "RPC %s: server reply was truncated.\n", __FUNCTION__);
+	goto out_retry;
+}
diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c
new file mode 100644
index 00000000000..d0b1d2c34a4
--- /dev/null
+++ b/net/sunrpc/pmap_clnt.c
@@ -0,0 +1,298 @@
+/*
+ * linux/net/sunrpc/pmap.c
+ *
+ * Portmapper client.
+ *
+ * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/uio.h>
+#include <linux/in.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/xprt.h>
+#include <linux/sunrpc/sched.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY	RPCDBG_PMAP
+#endif
+
+#define PMAP_SET		1
+#define PMAP_UNSET		2
+#define PMAP_GETPORT		3
+
+static struct rpc_procinfo	pmap_procedures[];
+static struct rpc_clnt *	pmap_create(char *, struct sockaddr_in *, int);
+static void			pmap_getport_done(struct rpc_task *);
+static struct rpc_program	pmap_program;
+static DEFINE_SPINLOCK(pmap_lock);
+
+/*
+ * Obtain the port for a given RPC service on a given host. This one can
+ * be called for an ongoing RPC request.
+ */
+void
+rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt)
+{
+	struct rpc_portmap *map = clnt->cl_pmap;
+	struct sockaddr_in *sap = &clnt->cl_xprt->addr;
+	struct rpc_message msg = {
+		.rpc_proc	= &pmap_procedures[PMAP_GETPORT],
+		.rpc_argp	= map,
+		.rpc_resp	= &clnt->cl_port,
+		.rpc_cred	= NULL
+	};
+	struct rpc_clnt	*pmap_clnt;
+	struct rpc_task	*child;
+
+	dprintk("RPC: %4d rpc_getport(%s, %d, %d, %d)\n",
+			task->tk_pid, clnt->cl_server,
+			map->pm_prog, map->pm_vers, map->pm_prot);
+
+	spin_lock(&pmap_lock);
+	if (map->pm_binding) {
+		rpc_sleep_on(&map->pm_bindwait, task, NULL, NULL);
+		spin_unlock(&pmap_lock);
+		return;
+	}
+	map->pm_binding = 1;
+	spin_unlock(&pmap_lock);
+
+	pmap_clnt = pmap_create(clnt->cl_server, sap, map->pm_prot);
+	if (IS_ERR(pmap_clnt)) {
+		task->tk_status = PTR_ERR(pmap_clnt);
+		goto bailout;
+	}
+	task->tk_status = 0;
+
+	/*
+	 * Note: rpc_new_child will release client after a failure.
+	 */
+	if (!(child = rpc_new_child(pmap_clnt, task)))
+		goto bailout;
+
+	/* Setup the call info struct */
+	rpc_call_setup(child, &msg, 0);
+
+	/* ... and run the child task */
+	rpc_run_child(task, child, pmap_getport_done);
+	return;
+
+bailout:
+	spin_lock(&pmap_lock);
+	map->pm_binding = 0;
+	rpc_wake_up(&map->pm_bindwait);
+	spin_unlock(&pmap_lock);
+	task->tk_status = -EIO;
+	task->tk_action = NULL;
+}
+
+#ifdef CONFIG_ROOT_NFS
+int
+rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot)
+{
+	struct rpc_portmap map = {
+		.pm_prog	= prog,
+		.pm_vers	= vers,
+		.pm_prot	= prot,
+		.pm_port	= 0
+	};
+	struct rpc_clnt	*pmap_clnt;
+	char		hostname[32];
+	int		status;
+
+	dprintk("RPC:      rpc_getport_external(%u.%u.%u.%u, %d, %d, %d)\n",
+			NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot);
+
+	sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr));
+	pmap_clnt = pmap_create(hostname, sin, prot);
+	if (IS_ERR(pmap_clnt))
+		return PTR_ERR(pmap_clnt);
+
+	/* Setup the call info struct */
+	status = rpc_call(pmap_clnt, PMAP_GETPORT, &map, &map.pm_port, 0);
+
+	if (status >= 0) {
+		if (map.pm_port != 0)
+			return map.pm_port;
+		status = -EACCES;
+	}
+	return status;
+}
+#endif
+
+static void
+pmap_getport_done(struct rpc_task *task)
+{
+	struct rpc_clnt	*clnt = task->tk_client;
+	struct rpc_portmap *map = clnt->cl_pmap;
+
+	dprintk("RPC: %4d pmap_getport_done(status %d, port %d)\n",
+			task->tk_pid, task->tk_status, clnt->cl_port);
+	if (task->tk_status < 0) {
+		/* Make the calling task exit with an error */
+		task->tk_action = NULL;
+	} else if (clnt->cl_port == 0) {
+		/* Program not registered */
+		task->tk_status = -EACCES;
+		task->tk_action = NULL;
+	} else {
+		/* byte-swap port number first */
+		clnt->cl_port = htons(clnt->cl_port);
+		clnt->cl_xprt->addr.sin_port = clnt->cl_port;
+	}
+	spin_lock(&pmap_lock);
+	map->pm_binding = 0;
+	rpc_wake_up(&map->pm_bindwait);
+	spin_unlock(&pmap_lock);
+}
+
+/*
+ * Set or unset a port registration with the local portmapper.
+ * port == 0 means unregister, port != 0 means register.
+ */
+int
+rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
+{
+	struct sockaddr_in	sin;
+	struct rpc_portmap	map;
+	struct rpc_clnt		*pmap_clnt;
+	int error = 0;
+
+	dprintk("RPC: registering (%d, %d, %d, %d) with portmapper.\n",
+			prog, vers, prot, port);
+
+	sin.sin_family = AF_INET;
+	sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+	pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP);
+	if (IS_ERR(pmap_clnt)) {
+		error = PTR_ERR(pmap_clnt);
+		dprintk("RPC: couldn't create pmap client. Error = %d\n", error);
+		return error;
+	}
+
+	map.pm_prog = prog;
+	map.pm_vers = vers;
+	map.pm_prot = prot;
+	map.pm_port = port;
+
+	error = rpc_call(pmap_clnt, port? PMAP_SET : PMAP_UNSET,
+					&map, okay, 0);
+
+	if (error < 0) {
+		printk(KERN_WARNING
+			"RPC: failed to contact portmap (errno %d).\n",
+			error);
+	}
+	dprintk("RPC: registration status %d/%d\n", error, *okay);
+
+	/* Client deleted automatically because cl_oneshot == 1 */
+	return error;
+}
+
+static struct rpc_clnt *
+pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto)
+{
+	struct rpc_xprt	*xprt;
+	struct rpc_clnt	*clnt;
+
+	/* printk("pmap: create xprt\n"); */
+	xprt = xprt_create_proto(proto, srvaddr, NULL);
+	if (IS_ERR(xprt))
+		return (struct rpc_clnt *)xprt;
+	xprt->addr.sin_port = htons(RPC_PMAP_PORT);
+
+	/* printk("pmap: create clnt\n"); */
+	clnt = rpc_create_client(xprt, hostname,
+				&pmap_program, RPC_PMAP_VERSION,
+				RPC_AUTH_UNIX);
+	if (IS_ERR(clnt)) {
+		xprt_destroy(xprt);
+	} else {
+		clnt->cl_softrtry = 1;
+		clnt->cl_chatty   = 1;
+		clnt->cl_oneshot  = 1;
+	}
+	return clnt;
+}
+
+/*
+ * XDR encode/decode functions for PMAP
+ */
+static int
+xdr_encode_mapping(struct rpc_rqst *req, u32 *p, struct rpc_portmap *map)
+{
+	dprintk("RPC: xdr_encode_mapping(%d, %d, %d, %d)\n",
+		map->pm_prog, map->pm_vers, map->pm_prot, map->pm_port);
+	*p++ = htonl(map->pm_prog);
+	*p++ = htonl(map->pm_vers);
+	*p++ = htonl(map->pm_prot);
+	*p++ = htonl(map->pm_port);
+
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	return 0;
+}
+
+static int
+xdr_decode_port(struct rpc_rqst *req, u32 *p, unsigned short *portp)
+{
+	*portp = (unsigned short) ntohl(*p++);
+	return 0;
+}
+
+static int
+xdr_decode_bool(struct rpc_rqst *req, u32 *p, unsigned int *boolp)
+{
+	*boolp = (unsigned int) ntohl(*p++);
+	return 0;
+}
+
+static struct rpc_procinfo	pmap_procedures[] = {
+[PMAP_SET] = {
+	  .p_proc		= PMAP_SET,
+	  .p_encode		= (kxdrproc_t) xdr_encode_mapping,	
+	  .p_decode		= (kxdrproc_t) xdr_decode_bool,
+	  .p_bufsiz		= 4,
+	  .p_count		= 1,
+	},
+[PMAP_UNSET] = {
+	  .p_proc		= PMAP_UNSET,
+	  .p_encode		= (kxdrproc_t) xdr_encode_mapping,	
+	  .p_decode		= (kxdrproc_t) xdr_decode_bool,
+	  .p_bufsiz		= 4,
+	  .p_count		= 1,
+	},
+[PMAP_GETPORT] = {
+	  .p_proc		= PMAP_GETPORT,
+	  .p_encode		= (kxdrproc_t) xdr_encode_mapping,
+	  .p_decode		= (kxdrproc_t) xdr_decode_port,
+	  .p_bufsiz		= 4,
+	  .p_count		= 1,
+	},
+};
+
+static struct rpc_version	pmap_version2 = {
+	.number		= 2,
+	.nrprocs	= 4,
+	.procs		= pmap_procedures
+};
+
+static struct rpc_version *	pmap_version[] = {
+	NULL,
+	NULL,
+	&pmap_version2
+};
+
+static struct rpc_stat		pmap_stats;
+
+static struct rpc_program	pmap_program = {
+	.name		= "portmap",
+	.number		= RPC_PMAP_PROGRAM,
+	.nrvers		= ARRAY_SIZE(pmap_version),
+	.version	= pmap_version,
+	.stats		= &pmap_stats,
+};
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
new file mode 100644
index 00000000000..554f224c044
--- /dev/null
+++ b/net/sunrpc/rpc_pipe.c
@@ -0,0 +1,838 @@
+/*
+ * net/sunrpc/rpc_pipe.c
+ *
+ * Userland/kernel interface for rpcauth_gss.
+ * Code shamelessly plagiarized from fs/nfsd/nfsctl.c
+ * and fs/driverfs/inode.c
+ *
+ * Copyright (c) 2002, Trond Myklebust <trond.myklebust@fys.uio.no>
+ *
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/pagemap.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/dnotify.h>
+#include <linux/kernel.h>
+
+#include <asm/ioctls.h>
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/wait.h>
+#include <linux/seq_file.h>
+
+#include <linux/sunrpc/clnt.h>
+#include <linux/workqueue.h>
+#include <linux/sunrpc/rpc_pipe_fs.h>
+
+static struct vfsmount *rpc_mount;
+static int rpc_mount_count;
+
+static struct file_system_type rpc_pipe_fs_type;
+
+
+static kmem_cache_t *rpc_inode_cachep;
+
+#define RPC_UPCALL_TIMEOUT (30*HZ)
+
+static void
+__rpc_purge_upcall(struct inode *inode, int err)
+{
+	struct rpc_inode *rpci = RPC_I(inode);
+	struct rpc_pipe_msg *msg;
+
+	while (!list_empty(&rpci->pipe)) {
+		msg = list_entry(rpci->pipe.next, struct rpc_pipe_msg, list);
+		list_del_init(&msg->list);
+		msg->errno = err;
+		rpci->ops->destroy_msg(msg);
+	}
+	while (!list_empty(&rpci->in_upcall)) {
+		msg = list_entry(rpci->pipe.next, struct rpc_pipe_msg, list);
+		list_del_init(&msg->list);
+		msg->errno = err;
+		rpci->ops->destroy_msg(msg);
+	}
+	rpci->pipelen = 0;
+	wake_up(&rpci->waitq);
+}
+
+static void
+rpc_timeout_upcall_queue(void *data)
+{
+	struct rpc_inode *rpci = (struct rpc_inode *)data;
+	struct inode *inode = &rpci->vfs_inode;
+
+	down(&inode->i_sem);
+	if (rpci->nreaders == 0 && !list_empty(&rpci->pipe))
+		__rpc_purge_upcall(inode, -ETIMEDOUT);
+	up(&inode->i_sem);
+}
+
+int
+rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg)
+{
+	struct rpc_inode *rpci = RPC_I(inode);
+	int res = 0;
+
+	down(&inode->i_sem);
+	if (rpci->nreaders) {
+		list_add_tail(&msg->list, &rpci->pipe);
+		rpci->pipelen += msg->len;
+	} else if (rpci->flags & RPC_PIPE_WAIT_FOR_OPEN) {
+		if (list_empty(&rpci->pipe))
+			schedule_delayed_work(&rpci->queue_timeout,
+					RPC_UPCALL_TIMEOUT);
+		list_add_tail(&msg->list, &rpci->pipe);
+		rpci->pipelen += msg->len;
+	} else
+		res = -EPIPE;
+	up(&inode->i_sem);
+	wake_up(&rpci->waitq);
+	return res;
+}
+
+static void
+rpc_close_pipes(struct inode *inode)
+{
+	struct rpc_inode *rpci = RPC_I(inode);
+
+	cancel_delayed_work(&rpci->queue_timeout);
+	flush_scheduled_work();
+	down(&inode->i_sem);
+	if (rpci->ops != NULL) {
+		rpci->nreaders = 0;
+		__rpc_purge_upcall(inode, -EPIPE);
+		rpci->nwriters = 0;
+		if (rpci->ops->release_pipe)
+			rpci->ops->release_pipe(inode);
+		rpci->ops = NULL;
+	}
+	up(&inode->i_sem);
+}
+
+static inline void
+rpc_inode_setowner(struct inode *inode, void *private)
+{
+	RPC_I(inode)->private = private;
+}
+
+static struct inode *
+rpc_alloc_inode(struct super_block *sb)
+{
+	struct rpc_inode *rpci;
+	rpci = (struct rpc_inode *)kmem_cache_alloc(rpc_inode_cachep, SLAB_KERNEL);
+	if (!rpci)
+		return NULL;
+	return &rpci->vfs_inode;
+}
+
+static void
+rpc_destroy_inode(struct inode *inode)
+{
+	kmem_cache_free(rpc_inode_cachep, RPC_I(inode));
+}
+
+static int
+rpc_pipe_open(struct inode *inode, struct file *filp)
+{
+	struct rpc_inode *rpci = RPC_I(inode);
+	int res = -ENXIO;
+
+	down(&inode->i_sem);
+	if (rpci->ops != NULL) {
+		if (filp->f_mode & FMODE_READ)
+			rpci->nreaders ++;
+		if (filp->f_mode & FMODE_WRITE)
+			rpci->nwriters ++;
+		res = 0;
+	}
+	up(&inode->i_sem);
+	return res;
+}
+
+static int
+rpc_pipe_release(struct inode *inode, struct file *filp)
+{
+	struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode);
+	struct rpc_pipe_msg *msg;
+
+	down(&inode->i_sem);
+	if (rpci->ops == NULL)
+		goto out;
+	msg = (struct rpc_pipe_msg *)filp->private_data;
+	if (msg != NULL) {
+		msg->errno = -EPIPE;
+		list_del_init(&msg->list);
+		rpci->ops->destroy_msg(msg);
+	}
+	if (filp->f_mode & FMODE_WRITE)
+		rpci->nwriters --;
+	if (filp->f_mode & FMODE_READ)
+		rpci->nreaders --;
+	if (!rpci->nreaders)
+		__rpc_purge_upcall(inode, -EPIPE);
+	if (rpci->ops->release_pipe)
+		rpci->ops->release_pipe(inode);
+out:
+	up(&inode->i_sem);
+	return 0;
+}
+
+static ssize_t
+rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset)
+{
+	struct inode *inode = filp->f_dentry->d_inode;
+	struct rpc_inode *rpci = RPC_I(inode);
+	struct rpc_pipe_msg *msg;
+	int res = 0;
+
+	down(&inode->i_sem);
+	if (rpci->ops == NULL) {
+		res = -EPIPE;
+		goto out_unlock;
+	}
+	msg = filp->private_data;
+	if (msg == NULL) {
+		if (!list_empty(&rpci->pipe)) {
+			msg = list_entry(rpci->pipe.next,
+					struct rpc_pipe_msg,
+					list);
+			list_move(&msg->list, &rpci->in_upcall);
+			rpci->pipelen -= msg->len;
+			filp->private_data = msg;
+			msg->copied = 0;
+		}
+		if (msg == NULL)
+			goto out_unlock;
+	}
+	/* NOTE: it is up to the callback to update msg->copied */
+	res = rpci->ops->upcall(filp, msg, buf, len);
+	if (res < 0 || msg->len == msg->copied) {
+		filp->private_data = NULL;
+		list_del_init(&msg->list);
+		rpci->ops->destroy_msg(msg);
+	}
+out_unlock:
+	up(&inode->i_sem);
+	return res;
+}
+
+static ssize_t
+rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *offset)
+{
+	struct inode *inode = filp->f_dentry->d_inode;
+	struct rpc_inode *rpci = RPC_I(inode);
+	int res;
+
+	down(&inode->i_sem);
+	res = -EPIPE;
+	if (rpci->ops != NULL)
+		res = rpci->ops->downcall(filp, buf, len);
+	up(&inode->i_sem);
+	return res;
+}
+
+static unsigned int
+rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait)
+{
+	struct rpc_inode *rpci;
+	unsigned int mask = 0;
+
+	rpci = RPC_I(filp->f_dentry->d_inode);
+	poll_wait(filp, &rpci->waitq, wait);
+
+	mask = POLLOUT | POLLWRNORM;
+	if (rpci->ops == NULL)
+		mask |= POLLERR | POLLHUP;
+	if (!list_empty(&rpci->pipe))
+		mask |= POLLIN | POLLRDNORM;
+	return mask;
+}
+
+static int
+rpc_pipe_ioctl(struct inode *ino, struct file *filp,
+		unsigned int cmd, unsigned long arg)
+{
+	struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode);
+	int len;
+
+	switch (cmd) {
+	case FIONREAD:
+		if (rpci->ops == NULL)
+			return -EPIPE;
+		len = rpci->pipelen;
+		if (filp->private_data) {
+			struct rpc_pipe_msg *msg;
+			msg = (struct rpc_pipe_msg *)filp->private_data;
+			len += msg->len - msg->copied;
+		}
+		return put_user(len, (int __user *)arg);
+	default:
+		return -EINVAL;
+	}
+}
+
+static struct file_operations rpc_pipe_fops = {
+	.owner		= THIS_MODULE,
+	.llseek		= no_llseek,
+	.read		= rpc_pipe_read,
+	.write		= rpc_pipe_write,
+	.poll		= rpc_pipe_poll,
+	.ioctl		= rpc_pipe_ioctl,
+	.open		= rpc_pipe_open,
+	.release	= rpc_pipe_release,
+};
+
+static int
+rpc_show_info(struct seq_file *m, void *v)
+{
+	struct rpc_clnt *clnt = m->private;
+
+	seq_printf(m, "RPC server: %s\n", clnt->cl_server);
+	seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_protname,
+			clnt->cl_prog, clnt->cl_vers);
+	seq_printf(m, "address: %u.%u.%u.%u\n",
+			NIPQUAD(clnt->cl_xprt->addr.sin_addr.s_addr));
+	seq_printf(m, "protocol: %s\n",
+			clnt->cl_xprt->prot == IPPROTO_UDP ? "udp" : "tcp");
+	return 0;
+}
+
+static int
+rpc_info_open(struct inode *inode, struct file *file)
+{
+	struct rpc_clnt *clnt;
+	int ret = single_open(file, rpc_show_info, NULL);
+
+	if (!ret) {
+		struct seq_file *m = file->private_data;
+		down(&inode->i_sem);
+		clnt = RPC_I(inode)->private;
+		if (clnt) {
+			atomic_inc(&clnt->cl_users);
+			m->private = clnt;
+		} else {
+			single_release(inode, file);
+			ret = -EINVAL;
+		}
+		up(&inode->i_sem);
+	}
+	return ret;
+}
+
+static int
+rpc_info_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *m = file->private_data;
+	struct rpc_clnt *clnt = (struct rpc_clnt *)m->private;
+
+	if (clnt)
+		rpc_release_client(clnt);
+	return single_release(inode, file);
+}
+
+static struct file_operations rpc_info_operations = {
+	.owner		= THIS_MODULE,
+	.open		= rpc_info_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= rpc_info_release,
+};
+
+
+/*
+ * We have a single directory with 1 node in it.
+ */
+enum {
+	RPCAUTH_Root = 1,
+	RPCAUTH_lockd,
+	RPCAUTH_mount,
+	RPCAUTH_nfs,
+	RPCAUTH_portmap,
+	RPCAUTH_statd,
+	RPCAUTH_RootEOF
+};
+
+/*
+ * Description of fs contents.
+ */
+struct rpc_filelist {
+	char *name;
+	struct file_operations *i_fop;
+	int mode;
+};
+
+static struct rpc_filelist files[] = {
+	[RPCAUTH_lockd] = {
+		.name = "lockd",
+		.mode = S_IFDIR | S_IRUGO | S_IXUGO,
+	},
+	[RPCAUTH_mount] = {
+		.name = "mount",
+		.mode = S_IFDIR | S_IRUGO | S_IXUGO,
+	},
+	[RPCAUTH_nfs] = {
+		.name = "nfs",
+		.mode = S_IFDIR | S_IRUGO | S_IXUGO,
+	},
+	[RPCAUTH_portmap] = {
+		.name = "portmap",
+		.mode = S_IFDIR | S_IRUGO | S_IXUGO,
+	},
+	[RPCAUTH_statd] = {
+		.name = "statd",
+		.mode = S_IFDIR | S_IRUGO | S_IXUGO,
+	},
+};
+
+enum {
+	RPCAUTH_info = 2,
+	RPCAUTH_EOF
+};
+
+static struct rpc_filelist authfiles[] = {
+	[RPCAUTH_info] = {
+		.name = "info",
+		.i_fop = &rpc_info_operations,
+		.mode = S_IFREG | S_IRUSR,
+	},
+};
+
+static int
+rpc_get_mount(void)
+{
+	return simple_pin_fs("rpc_pipefs", &rpc_mount, &rpc_mount_count);
+}
+
+static void
+rpc_put_mount(void)
+{
+	simple_release_fs(&rpc_mount, &rpc_mount_count);
+}
+
+static int
+rpc_lookup_parent(char *path, struct nameidata *nd)
+{
+	if (path[0] == '\0')
+		return -ENOENT;
+	if (rpc_get_mount()) {
+		printk(KERN_WARNING "%s: %s failed to mount "
+			       "pseudofilesystem \n", __FILE__, __FUNCTION__);
+		return -ENODEV;
+	}
+	nd->mnt = mntget(rpc_mount);
+	nd->dentry = dget(rpc_mount->mnt_root);
+	nd->last_type = LAST_ROOT;
+	nd->flags = LOOKUP_PARENT;
+	nd->depth = 0;
+
+	if (path_walk(path, nd)) {
+		printk(KERN_WARNING "%s: %s failed to find path %s\n",
+				__FILE__, __FUNCTION__, path);
+		rpc_put_mount();
+		return -ENOENT;
+	}
+	return 0;
+}
+
+static void
+rpc_release_path(struct nameidata *nd)
+{
+	path_release(nd);
+	rpc_put_mount();
+}
+
+static struct inode *
+rpc_get_inode(struct super_block *sb, int mode)
+{
+	struct inode *inode = new_inode(sb);
+	if (!inode)
+		return NULL;
+	inode->i_mode = mode;
+	inode->i_uid = inode->i_gid = 0;
+	inode->i_blksize = PAGE_CACHE_SIZE;
+	inode->i_blocks = 0;
+	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+	switch(mode & S_IFMT) {
+		case S_IFDIR:
+			inode->i_fop = &simple_dir_operations;
+			inode->i_op = &simple_dir_inode_operations;
+			inode->i_nlink++;
+		default:
+			break;
+	}
+	return inode;
+}
+
+/*
+ * FIXME: This probably has races.
+ */
+static void
+rpc_depopulate(struct dentry *parent)
+{
+	struct inode *dir = parent->d_inode;
+	struct list_head *pos, *next;
+	struct dentry *dentry, *dvec[10];
+	int n = 0;
+
+	down(&dir->i_sem);
+repeat:
+	spin_lock(&dcache_lock);
+	list_for_each_safe(pos, next, &parent->d_subdirs) {
+		dentry = list_entry(pos, struct dentry, d_child);
+		spin_lock(&dentry->d_lock);
+		if (!d_unhashed(dentry)) {
+			dget_locked(dentry);
+			__d_drop(dentry);
+			spin_unlock(&dentry->d_lock);
+			dvec[n++] = dentry;
+			if (n == ARRAY_SIZE(dvec))
+				break;
+		} else
+			spin_unlock(&dentry->d_lock);
+	}
+	spin_unlock(&dcache_lock);
+	if (n) {
+		do {
+			dentry = dvec[--n];
+			if (dentry->d_inode) {
+				rpc_close_pipes(dentry->d_inode);
+				rpc_inode_setowner(dentry->d_inode, NULL);
+				simple_unlink(dir, dentry);
+			}
+			dput(dentry);
+		} while (n);
+		goto repeat;
+	}
+	up(&dir->i_sem);
+}
+
+static int
+rpc_populate(struct dentry *parent,
+		struct rpc_filelist *files,
+		int start, int eof)
+{
+	struct inode *inode, *dir = parent->d_inode;
+	void *private = RPC_I(dir)->private;
+	struct dentry *dentry;
+	int mode, i;
+
+	down(&dir->i_sem);
+	for (i = start; i < eof; i++) {
+		dentry = d_alloc_name(parent, files[i].name);
+		if (!dentry)
+			goto out_bad;
+		mode = files[i].mode;
+		inode = rpc_get_inode(dir->i_sb, mode);
+		if (!inode) {
+			dput(dentry);
+			goto out_bad;
+		}
+		inode->i_ino = i;
+		if (files[i].i_fop)
+			inode->i_fop = files[i].i_fop;
+		if (private)
+			rpc_inode_setowner(inode, private);
+		if (S_ISDIR(mode))
+			dir->i_nlink++;
+		d_add(dentry, inode);
+	}
+	up(&dir->i_sem);
+	return 0;
+out_bad:
+	up(&dir->i_sem);
+	printk(KERN_WARNING "%s: %s failed to populate directory %s\n",
+			__FILE__, __FUNCTION__, parent->d_name.name);
+	return -ENOMEM;
+}
+
+static int
+__rpc_mkdir(struct inode *dir, struct dentry *dentry)
+{
+	struct inode *inode;
+
+	inode = rpc_get_inode(dir->i_sb, S_IFDIR | S_IRUSR | S_IXUSR);
+	if (!inode)
+		goto out_err;
+	inode->i_ino = iunique(dir->i_sb, 100);
+	d_instantiate(dentry, inode);
+	dir->i_nlink++;
+	inode_dir_notify(dir, DN_CREATE);
+	rpc_get_mount();
+	return 0;
+out_err:
+	printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n",
+			__FILE__, __FUNCTION__, dentry->d_name.name);
+	return -ENOMEM;
+}
+
+static int
+__rpc_rmdir(struct inode *dir, struct dentry *dentry)
+{
+	int error;
+
+	shrink_dcache_parent(dentry);
+	if (dentry->d_inode) {
+		rpc_close_pipes(dentry->d_inode);
+		rpc_inode_setowner(dentry->d_inode, NULL);
+	}
+	if ((error = simple_rmdir(dir, dentry)) != 0)
+		return error;
+	if (!error) {
+		inode_dir_notify(dir, DN_DELETE);
+		d_drop(dentry);
+		rpc_put_mount();
+	}
+	return 0;
+}
+
+static struct dentry *
+rpc_lookup_negative(char *path, struct nameidata *nd)
+{
+	struct dentry *dentry;
+	struct inode *dir;
+	int error;
+
+	if ((error = rpc_lookup_parent(path, nd)) != 0)
+		return ERR_PTR(error);
+	dir = nd->dentry->d_inode;
+	down(&dir->i_sem);
+	dentry = lookup_hash(&nd->last, nd->dentry);
+	if (IS_ERR(dentry))
+		goto out_err;
+	if (dentry->d_inode) {
+		dput(dentry);
+		dentry = ERR_PTR(-EEXIST);
+		goto out_err;
+	}
+	return dentry;
+out_err:
+	up(&dir->i_sem);
+	rpc_release_path(nd);
+	return dentry;
+}
+
+
+struct dentry *
+rpc_mkdir(char *path, struct rpc_clnt *rpc_client)
+{
+	struct nameidata nd;
+	struct dentry *dentry;
+	struct inode *dir;
+	int error;
+
+	dentry = rpc_lookup_negative(path, &nd);
+	if (IS_ERR(dentry))
+		return dentry;
+	dir = nd.dentry->d_inode;
+	if ((error = __rpc_mkdir(dir, dentry)) != 0)
+		goto err_dput;
+	RPC_I(dentry->d_inode)->private = rpc_client;
+	error = rpc_populate(dentry, authfiles,
+			RPCAUTH_info, RPCAUTH_EOF);
+	if (error)
+		goto err_depopulate;
+out:
+	up(&dir->i_sem);
+	rpc_release_path(&nd);
+	return dentry;
+err_depopulate:
+	rpc_depopulate(dentry);
+	__rpc_rmdir(dir, dentry);
+err_dput:
+	dput(dentry);
+	printk(KERN_WARNING "%s: %s() failed to create directory %s (errno = %d)\n",
+			__FILE__, __FUNCTION__, path, error);
+	dentry = ERR_PTR(error);
+	goto out;
+}
+
+int
+rpc_rmdir(char *path)
+{
+	struct nameidata nd;
+	struct dentry *dentry;
+	struct inode *dir;
+	int error;
+
+	if ((error = rpc_lookup_parent(path, &nd)) != 0)
+		return error;
+	dir = nd.dentry->d_inode;
+	down(&dir->i_sem);
+	dentry = lookup_hash(&nd.last, nd.dentry);
+	if (IS_ERR(dentry)) {
+		error = PTR_ERR(dentry);
+		goto out_release;
+	}
+	rpc_depopulate(dentry);
+	error = __rpc_rmdir(dir, dentry);
+	dput(dentry);
+out_release:
+	up(&dir->i_sem);
+	rpc_release_path(&nd);
+	return error;
+}
+
+struct dentry *
+rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops, int flags)
+{
+	struct nameidata nd;
+	struct dentry *dentry;
+	struct inode *dir, *inode;
+	struct rpc_inode *rpci;
+
+	dentry = rpc_lookup_negative(path, &nd);
+	if (IS_ERR(dentry))
+		return dentry;
+	dir = nd.dentry->d_inode;
+	inode = rpc_get_inode(dir->i_sb, S_IFSOCK | S_IRUSR | S_IWUSR);
+	if (!inode)
+		goto err_dput;
+	inode->i_ino = iunique(dir->i_sb, 100);
+	inode->i_fop = &rpc_pipe_fops;
+	d_instantiate(dentry, inode);
+	rpci = RPC_I(inode);
+	rpci->private = private;
+	rpci->flags = flags;
+	rpci->ops = ops;
+	inode_dir_notify(dir, DN_CREATE);
+out:
+	up(&dir->i_sem);
+	rpc_release_path(&nd);
+	return dentry;
+err_dput:
+	dput(dentry);
+	dentry = ERR_PTR(-ENOMEM);
+	printk(KERN_WARNING "%s: %s() failed to create pipe %s (errno = %d)\n",
+			__FILE__, __FUNCTION__, path, -ENOMEM);
+	goto out;
+}
+
+int
+rpc_unlink(char *path)
+{
+	struct nameidata nd;
+	struct dentry *dentry;
+	struct inode *dir;
+	int error;
+
+	if ((error = rpc_lookup_parent(path, &nd)) != 0)
+		return error;
+	dir = nd.dentry->d_inode;
+	down(&dir->i_sem);
+	dentry = lookup_hash(&nd.last, nd.dentry);
+	if (IS_ERR(dentry)) {
+		error = PTR_ERR(dentry);
+		goto out_release;
+	}
+	d_drop(dentry);
+	if (dentry->d_inode) {
+		rpc_close_pipes(dentry->d_inode);
+		rpc_inode_setowner(dentry->d_inode, NULL);
+		error = simple_unlink(dir, dentry);
+	}
+	dput(dentry);
+	inode_dir_notify(dir, DN_DELETE);
+out_release:
+	up(&dir->i_sem);
+	rpc_release_path(&nd);
+	return error;
+}
+
+/*
+ * populate the filesystem
+ */
+static struct super_operations s_ops = {
+	.alloc_inode	= rpc_alloc_inode,
+	.destroy_inode	= rpc_destroy_inode,
+	.statfs		= simple_statfs,
+};
+
+#define RPCAUTH_GSSMAGIC 0x67596969
+
+static int
+rpc_fill_super(struct super_block *sb, void *data, int silent)
+{
+	struct inode *inode;
+	struct dentry *root;
+
+	sb->s_blocksize = PAGE_CACHE_SIZE;
+	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_magic = RPCAUTH_GSSMAGIC;
+	sb->s_op = &s_ops;
+	sb->s_time_gran = 1;
+
+	inode = rpc_get_inode(sb, S_IFDIR | 0755);
+	if (!inode)
+		return -ENOMEM;
+	root = d_alloc_root(inode);
+	if (!root) {
+		iput(inode);
+		return -ENOMEM;
+	}
+	if (rpc_populate(root, files, RPCAUTH_Root + 1, RPCAUTH_RootEOF))
+		goto out;
+	sb->s_root = root;
+	return 0;
+out:
+	d_genocide(root);
+	dput(root);
+	return -ENOMEM;
+}
+
+static struct super_block *
+rpc_get_sb(struct file_system_type *fs_type,
+		int flags, const char *dev_name, void *data)
+{
+	return get_sb_single(fs_type, flags, data, rpc_fill_super);
+}
+
+static struct file_system_type rpc_pipe_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "rpc_pipefs",
+	.get_sb		= rpc_get_sb,
+	.kill_sb	= kill_litter_super,
+};
+
+static void
+init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+{
+	struct rpc_inode *rpci = (struct rpc_inode *) foo;
+
+	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
+	    SLAB_CTOR_CONSTRUCTOR) {
+		inode_init_once(&rpci->vfs_inode);
+		rpci->private = NULL;
+		rpci->nreaders = 0;
+		rpci->nwriters = 0;
+		INIT_LIST_HEAD(&rpci->in_upcall);
+		INIT_LIST_HEAD(&rpci->pipe);
+		rpci->pipelen = 0;
+		init_waitqueue_head(&rpci->waitq);
+		INIT_WORK(&rpci->queue_timeout, rpc_timeout_upcall_queue, rpci);
+		rpci->ops = NULL;
+	}
+}
+
+int register_rpc_pipefs(void)
+{
+	rpc_inode_cachep = kmem_cache_create("rpc_inode_cache",
+                                             sizeof(struct rpc_inode),
+                                             0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT,
+                                             init_once, NULL);
+	if (!rpc_inode_cachep)
+		return -ENOMEM;
+	register_filesystem(&rpc_pipe_fs_type);
+	return 0;
+}
+
+void unregister_rpc_pipefs(void)
+{
+	if (kmem_cache_destroy(rpc_inode_cachep))
+		printk(KERN_WARNING "RPC: unable to free inode cache\n");
+	unregister_filesystem(&rpc_pipe_fs_type);
+}
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
new file mode 100644
index 00000000000..c06614d0e31
--- /dev/null
+++ b/net/sunrpc/sched.c
@@ -0,0 +1,1119 @@
+/*
+ * linux/net/sunrpc/sched.c
+ *
+ * Scheduling for synchronous and asynchronous RPC requests.
+ *
+ * Copyright (C) 1996 Olaf Kirch, <okir@monad.swb.de>
+ * 
+ * TCP NFS related read + write fixes
+ * (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie>
+ */
+
+#include <linux/module.h>
+
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/mempool.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/xprt.h>
+
+#ifdef RPC_DEBUG
+#define RPCDBG_FACILITY		RPCDBG_SCHED
+#define RPC_TASK_MAGIC_ID	0xf00baa
+static int			rpc_task_id;
+#endif
+
+/*
+ * RPC slabs and memory pools
+ */
+#define RPC_BUFFER_MAXSIZE	(2048)
+#define RPC_BUFFER_POOLSIZE	(8)
+#define RPC_TASK_POOLSIZE	(8)
+static kmem_cache_t	*rpc_task_slabp;
+static kmem_cache_t	*rpc_buffer_slabp;
+static mempool_t	*rpc_task_mempool;
+static mempool_t	*rpc_buffer_mempool;
+
+static void			__rpc_default_timer(struct rpc_task *task);
+static void			rpciod_killall(void);
+static void			rpc_free(struct rpc_task *task);
+
+static void			rpc_async_schedule(void *);
+
+/*
+ * RPC tasks that create another task (e.g. for contacting the portmapper)
+ * will wait on this queue for their child's completion
+ */
+static RPC_WAITQ(childq, "childq");
+
+/*
+ * RPC tasks sit here while waiting for conditions to improve.
+ */
+static RPC_WAITQ(delay_queue, "delayq");
+
+/*
+ * All RPC tasks are linked into this list
+ */
+static LIST_HEAD(all_tasks);
+
+/*
+ * rpciod-related stuff
+ */
+static DECLARE_MUTEX(rpciod_sema);
+static unsigned int		rpciod_users;
+static struct workqueue_struct *rpciod_workqueue;
+
+/*
+ * Spinlock for other critical sections of code.
+ */
+static DEFINE_SPINLOCK(rpc_sched_lock);
+
+/*
+ * Disable the timer for a given RPC task. Should be called with
+ * queue->lock and bh_disabled in order to avoid races within
+ * rpc_run_timer().
+ */
+static inline void
+__rpc_disable_timer(struct rpc_task *task)
+{
+	dprintk("RPC: %4d disabling timer\n", task->tk_pid);
+	task->tk_timeout_fn = NULL;
+	task->tk_timeout = 0;
+}
+
+/*
+ * Run a timeout function.
+ * We use the callback in order to allow __rpc_wake_up_task()
+ * and friends to disable the timer synchronously on SMP systems
+ * without calling del_timer_sync(). The latter could cause a
+ * deadlock if called while we're holding spinlocks...
+ */
+static void rpc_run_timer(struct rpc_task *task)
+{
+	void (*callback)(struct rpc_task *);
+
+	callback = task->tk_timeout_fn;
+	task->tk_timeout_fn = NULL;
+	if (callback && RPC_IS_QUEUED(task)) {
+		dprintk("RPC: %4d running timer\n", task->tk_pid);
+		callback(task);
+	}
+	smp_mb__before_clear_bit();
+	clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate);
+	smp_mb__after_clear_bit();
+}
+
+/*
+ * Set up a timer for the current task.
+ */
+static inline void
+__rpc_add_timer(struct rpc_task *task, rpc_action timer)
+{
+	if (!task->tk_timeout)
+		return;
+
+	dprintk("RPC: %4d setting alarm for %lu ms\n",
+			task->tk_pid, task->tk_timeout * 1000 / HZ);
+
+	if (timer)
+		task->tk_timeout_fn = timer;
+	else
+		task->tk_timeout_fn = __rpc_default_timer;
+	set_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate);
+	mod_timer(&task->tk_timer, jiffies + task->tk_timeout);
+}
+
+/*
+ * Delete any timer for the current task. Because we use del_timer_sync(),
+ * this function should never be called while holding queue->lock.
+ */
+static void
+rpc_delete_timer(struct rpc_task *task)
+{
+	if (RPC_IS_QUEUED(task))
+		return;
+	if (test_and_clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate)) {
+		del_singleshot_timer_sync(&task->tk_timer);
+		dprintk("RPC: %4d deleting timer\n", task->tk_pid);
+	}
+}
+
+/*
+ * Add new request to a priority queue.
+ */
+static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct rpc_task *task)
+{
+	struct list_head *q;
+	struct rpc_task *t;
+
+	INIT_LIST_HEAD(&task->u.tk_wait.links);
+	q = &queue->tasks[task->tk_priority];
+	if (unlikely(task->tk_priority > queue->maxpriority))
+		q = &queue->tasks[queue->maxpriority];
+	list_for_each_entry(t, q, u.tk_wait.list) {
+		if (t->tk_cookie == task->tk_cookie) {
+			list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links);
+			return;
+		}
+	}
+	list_add_tail(&task->u.tk_wait.list, q);
+}
+
+/*
+ * Add new request to wait queue.
+ *
+ * Swapper tasks always get inserted at the head of the queue.
+ * This should avoid many nasty memory deadlocks and hopefully
+ * improve overall performance.
+ * Everyone else gets appended to the queue to ensure proper FIFO behavior.
+ */
+static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task)
+{
+	BUG_ON (RPC_IS_QUEUED(task));
+
+	if (RPC_IS_PRIORITY(queue))
+		__rpc_add_wait_queue_priority(queue, task);
+	else if (RPC_IS_SWAPPER(task))
+		list_add(&task->u.tk_wait.list, &queue->tasks[0]);
+	else
+		list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]);
+	task->u.tk_wait.rpc_waitq = queue;
+	rpc_set_queued(task);
+
+	dprintk("RPC: %4d added to queue %p \"%s\"\n",
+				task->tk_pid, queue, rpc_qname(queue));
+}
+
+/*
+ * Remove request from a priority queue.
+ */
+static void __rpc_remove_wait_queue_priority(struct rpc_task *task)
+{
+	struct rpc_task *t;
+
+	if (!list_empty(&task->u.tk_wait.links)) {
+		t = list_entry(task->u.tk_wait.links.next, struct rpc_task, u.tk_wait.list);
+		list_move(&t->u.tk_wait.list, &task->u.tk_wait.list);
+		list_splice_init(&task->u.tk_wait.links, &t->u.tk_wait.links);
+	}
+	list_del(&task->u.tk_wait.list);
+}
+
+/*
+ * Remove request from queue.
+ * Note: must be called with spin lock held.
+ */
+static void __rpc_remove_wait_queue(struct rpc_task *task)
+{
+	struct rpc_wait_queue *queue;
+	queue = task->u.tk_wait.rpc_waitq;
+
+	if (RPC_IS_PRIORITY(queue))
+		__rpc_remove_wait_queue_priority(task);
+	else
+		list_del(&task->u.tk_wait.list);
+	dprintk("RPC: %4d removed from queue %p \"%s\"\n",
+				task->tk_pid, queue, rpc_qname(queue));
+}
+
+static inline void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority)
+{
+	queue->priority = priority;
+	queue->count = 1 << (priority * 2);
+}
+
+static inline void rpc_set_waitqueue_cookie(struct rpc_wait_queue *queue, unsigned long cookie)
+{
+	queue->cookie = cookie;
+	queue->nr = RPC_BATCH_COUNT;
+}
+
+static inline void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue)
+{
+	rpc_set_waitqueue_priority(queue, queue->maxpriority);
+	rpc_set_waitqueue_cookie(queue, 0);
+}
+
+static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname, int maxprio)
+{
+	int i;
+
+	spin_lock_init(&queue->lock);
+	for (i = 0; i < ARRAY_SIZE(queue->tasks); i++)
+		INIT_LIST_HEAD(&queue->tasks[i]);
+	queue->maxpriority = maxprio;
+	rpc_reset_waitqueue_priority(queue);
+#ifdef RPC_DEBUG
+	queue->name = qname;
+#endif
+}
+
+void rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname)
+{
+	__rpc_init_priority_wait_queue(queue, qname, RPC_PRIORITY_HIGH);
+}
+
+void rpc_init_wait_queue(struct rpc_wait_queue *queue, const char *qname)
+{
+	__rpc_init_priority_wait_queue(queue, qname, 0);
+}
+EXPORT_SYMBOL(rpc_init_wait_queue);
+
+/*
+ * Make an RPC task runnable.
+ *
+ * Note: If the task is ASYNC, this must be called with 
+ * the spinlock held to protect the wait queue operation.
+ */
+static void rpc_make_runnable(struct rpc_task *task)
+{
+	int do_ret;
+
+	BUG_ON(task->tk_timeout_fn);
+	do_ret = rpc_test_and_set_running(task);
+	rpc_clear_queued(task);
+	if (do_ret)
+		return;
+	if (RPC_IS_ASYNC(task)) {
+		int status;
+
+		INIT_WORK(&task->u.tk_work, rpc_async_schedule, (void *)task);
+		status = queue_work(task->tk_workqueue, &task->u.tk_work);
+		if (status < 0) {
+			printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
+			task->tk_status = status;
+			return;
+		}
+	} else
+		wake_up(&task->u.tk_wait.waitq);
+}
+
+/*
+ * Place a newly initialized task on the workqueue.
+ */
+static inline void
+rpc_schedule_run(struct rpc_task *task)
+{
+	/* Don't run a child twice! */
+	if (RPC_IS_ACTIVATED(task))
+		return;
+	task->tk_active = 1;
+	rpc_make_runnable(task);
+}
+
+/*
+ * Prepare for sleeping on a wait queue.
+ * By always appending tasks to the list we ensure FIFO behavior.
+ * NB: An RPC task will only receive interrupt-driven events as long
+ * as it's on a wait queue.
+ */
+static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
+			rpc_action action, rpc_action timer)
+{
+	dprintk("RPC: %4d sleep_on(queue \"%s\" time %ld)\n", task->tk_pid,
+				rpc_qname(q), jiffies);
+
+	if (!RPC_IS_ASYNC(task) && !RPC_IS_ACTIVATED(task)) {
+		printk(KERN_ERR "RPC: Inactive synchronous task put to sleep!\n");
+		return;
+	}
+
+	/* Mark the task as being activated if so needed */
+	if (!RPC_IS_ACTIVATED(task))
+		task->tk_active = 1;
+
+	__rpc_add_wait_queue(q, task);
+
+	BUG_ON(task->tk_callback != NULL);
+	task->tk_callback = action;
+	__rpc_add_timer(task, timer);
+}
+
+void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
+				rpc_action action, rpc_action timer)
+{
+	/*
+	 * Protect the queue operations.
+	 */
+	spin_lock_bh(&q->lock);
+	__rpc_sleep_on(q, task, action, timer);
+	spin_unlock_bh(&q->lock);
+}
+
+/**
+ * __rpc_do_wake_up_task - wake up a single rpc_task
+ * @task: task to be woken up
+ *
+ * Caller must hold queue->lock, and have cleared the task queued flag.
+ */
+static void __rpc_do_wake_up_task(struct rpc_task *task)
+{
+	dprintk("RPC: %4d __rpc_wake_up_task (now %ld)\n", task->tk_pid, jiffies);
+
+#ifdef RPC_DEBUG
+	BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID);
+#endif
+	/* Has the task been executed yet? If not, we cannot wake it up! */
+	if (!RPC_IS_ACTIVATED(task)) {
+		printk(KERN_ERR "RPC: Inactive task (%p) being woken up!\n", task);
+		return;
+	}
+
+	__rpc_disable_timer(task);
+	__rpc_remove_wait_queue(task);
+
+	rpc_make_runnable(task);
+
+	dprintk("RPC:      __rpc_wake_up_task done\n");
+}
+
+/*
+ * Wake up the specified task
+ */
+static void __rpc_wake_up_task(struct rpc_task *task)
+{
+	if (rpc_start_wakeup(task)) {
+		if (RPC_IS_QUEUED(task))
+			__rpc_do_wake_up_task(task);
+		rpc_finish_wakeup(task);
+	}
+}
+
+/*
+ * Default timeout handler if none specified by user
+ */
+static void
+__rpc_default_timer(struct rpc_task *task)
+{
+	dprintk("RPC: %d timeout (default timer)\n", task->tk_pid);
+	task->tk_status = -ETIMEDOUT;
+	rpc_wake_up_task(task);
+}
+
+/*
+ * Wake up the specified task
+ */
+void rpc_wake_up_task(struct rpc_task *task)
+{
+	if (rpc_start_wakeup(task)) {
+		if (RPC_IS_QUEUED(task)) {
+			struct rpc_wait_queue *queue = task->u.tk_wait.rpc_waitq;
+
+			spin_lock_bh(&queue->lock);
+			__rpc_do_wake_up_task(task);
+			spin_unlock_bh(&queue->lock);
+		}
+		rpc_finish_wakeup(task);
+	}
+}
+
+/*
+ * Wake up the next task on a priority queue.
+ */
+static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queue)
+{
+	struct list_head *q;
+	struct rpc_task *task;
+
+	/*
+	 * Service a batch of tasks from a single cookie.
+	 */
+	q = &queue->tasks[queue->priority];
+	if (!list_empty(q)) {
+		task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
+		if (queue->cookie == task->tk_cookie) {
+			if (--queue->nr)
+				goto out;
+			list_move_tail(&task->u.tk_wait.list, q);
+		}
+		/*
+		 * Check if we need to switch queues.
+		 */
+		if (--queue->count)
+			goto new_cookie;
+	}
+
+	/*
+	 * Service the next queue.
+	 */
+	do {
+		if (q == &queue->tasks[0])
+			q = &queue->tasks[queue->maxpriority];
+		else
+			q = q - 1;
+		if (!list_empty(q)) {
+			task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
+			goto new_queue;
+		}
+	} while (q != &queue->tasks[queue->priority]);
+
+	rpc_reset_waitqueue_priority(queue);
+	return NULL;
+
+new_queue:
+	rpc_set_waitqueue_priority(queue, (unsigned int)(q - &queue->tasks[0]));
+new_cookie:
+	rpc_set_waitqueue_cookie(queue, task->tk_cookie);
+out:
+	__rpc_wake_up_task(task);
+	return task;
+}
+
+/*
+ * Wake up the next task on the wait queue.
+ */
+struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue)
+{
+	struct rpc_task	*task = NULL;
+
+	dprintk("RPC:      wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue));
+	spin_lock_bh(&queue->lock);
+	if (RPC_IS_PRIORITY(queue))
+		task = __rpc_wake_up_next_priority(queue);
+	else {
+		task_for_first(task, &queue->tasks[0])
+			__rpc_wake_up_task(task);
+	}
+	spin_unlock_bh(&queue->lock);
+
+	return task;
+}
+
+/**
+ * rpc_wake_up - wake up all rpc_tasks
+ * @queue: rpc_wait_queue on which the tasks are sleeping
+ *
+ * Grabs queue->lock
+ */
+void rpc_wake_up(struct rpc_wait_queue *queue)
+{
+	struct rpc_task *task;
+
+	struct list_head *head;
+	spin_lock_bh(&queue->lock);
+	head = &queue->tasks[queue->maxpriority];
+	for (;;) {
+		while (!list_empty(head)) {
+			task = list_entry(head->next, struct rpc_task, u.tk_wait.list);
+			__rpc_wake_up_task(task);
+		}
+		if (head == &queue->tasks[0])
+			break;
+		head--;
+	}
+	spin_unlock_bh(&queue->lock);
+}
+
+/**
+ * rpc_wake_up_status - wake up all rpc_tasks and set their status value.
+ * @queue: rpc_wait_queue on which the tasks are sleeping
+ * @status: status value to set
+ *
+ * Grabs queue->lock
+ */
+void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
+{
+	struct list_head *head;
+	struct rpc_task *task;
+
+	spin_lock_bh(&queue->lock);
+	head = &queue->tasks[queue->maxpriority];
+	for (;;) {
+		while (!list_empty(head)) {
+			task = list_entry(head->next, struct rpc_task, u.tk_wait.list);
+			task->tk_status = status;
+			__rpc_wake_up_task(task);
+		}
+		if (head == &queue->tasks[0])
+			break;
+		head--;
+	}
+	spin_unlock_bh(&queue->lock);
+}
+
+/*
+ * Run a task at a later time
+ */
+static void	__rpc_atrun(struct rpc_task *);
+void
+rpc_delay(struct rpc_task *task, unsigned long delay)
+{
+	task->tk_timeout = delay;
+	rpc_sleep_on(&delay_queue, task, NULL, __rpc_atrun);
+}
+
+static void
+__rpc_atrun(struct rpc_task *task)
+{
+	task->tk_status = 0;
+	rpc_wake_up_task(task);
+}
+
+/*
+ * This is the RPC `scheduler' (or rather, the finite state machine).
+ */
+static int __rpc_execute(struct rpc_task *task)
+{
+	int		status = 0;
+
+	dprintk("RPC: %4d rpc_execute flgs %x\n",
+				task->tk_pid, task->tk_flags);
+
+	BUG_ON(RPC_IS_QUEUED(task));
+
+ restarted:
+	while (1) {
+		/*
+		 * Garbage collection of pending timers...
+		 */
+		rpc_delete_timer(task);
+
+		/*
+		 * Execute any pending callback.
+		 */
+		if (RPC_DO_CALLBACK(task)) {
+			/* Define a callback save pointer */
+			void (*save_callback)(struct rpc_task *);
+	
+			/* 
+			 * If a callback exists, save it, reset it,
+			 * call it.
+			 * The save is needed to stop from resetting
+			 * another callback set within the callback handler
+			 * - Dave
+			 */
+			save_callback=task->tk_callback;
+			task->tk_callback=NULL;
+			lock_kernel();
+			save_callback(task);
+			unlock_kernel();
+		}
+
+		/*
+		 * Perform the next FSM step.
+		 * tk_action may be NULL when the task has been killed
+		 * by someone else.
+		 */
+		if (!RPC_IS_QUEUED(task)) {
+			if (!task->tk_action)
+				break;
+			lock_kernel();
+			task->tk_action(task);
+			unlock_kernel();
+		}
+
+		/*
+		 * Lockless check for whether task is sleeping or not.
+		 */
+		if (!RPC_IS_QUEUED(task))
+			continue;
+		rpc_clear_running(task);
+		if (RPC_IS_ASYNC(task)) {
+			/* Careful! we may have raced... */
+			if (RPC_IS_QUEUED(task))
+				return 0;
+			if (rpc_test_and_set_running(task))
+				return 0;
+			continue;
+		}
+
+		/* sync task: sleep here */
+		dprintk("RPC: %4d sync task going to sleep\n", task->tk_pid);
+		if (RPC_TASK_UNINTERRUPTIBLE(task)) {
+			__wait_event(task->u.tk_wait.waitq, !RPC_IS_QUEUED(task));
+		} else {
+			__wait_event_interruptible(task->u.tk_wait.waitq, !RPC_IS_QUEUED(task), status);
+			/*
+			 * When a sync task receives a signal, it exits with
+			 * -ERESTARTSYS. In order to catch any callbacks that
+			 * clean up after sleeping on some queue, we don't
+			 * break the loop here, but go around once more.
+			 */
+			if (status == -ERESTARTSYS) {
+				dprintk("RPC: %4d got signal\n", task->tk_pid);
+				task->tk_flags |= RPC_TASK_KILLED;
+				rpc_exit(task, -ERESTARTSYS);
+				rpc_wake_up_task(task);
+			}
+		}
+		rpc_set_running(task);
+		dprintk("RPC: %4d sync task resuming\n", task->tk_pid);
+	}
+
+	if (task->tk_exit) {
+		lock_kernel();
+		task->tk_exit(task);
+		unlock_kernel();
+		/* If tk_action is non-null, the user wants us to restart */
+		if (task->tk_action) {
+			if (!RPC_ASSASSINATED(task)) {
+				/* Release RPC slot and buffer memory */
+				if (task->tk_rqstp)
+					xprt_release(task);
+				rpc_free(task);
+				goto restarted;
+			}
+			printk(KERN_ERR "RPC: dead task tries to walk away.\n");
+		}
+	}
+
+	dprintk("RPC: %4d exit() = %d\n", task->tk_pid, task->tk_status);
+	status = task->tk_status;
+
+	/* Release all resources associated with the task */
+	rpc_release_task(task);
+	return status;
+}
+
+/*
+ * User-visible entry point to the scheduler.
+ *
+ * This may be called recursively if e.g. an async NFS task updates
+ * the attributes and finds that dirty pages must be flushed.
+ * NOTE: Upon exit of this function the task is guaranteed to be
+ *	 released. In particular note that tk_release() will have
+ *	 been called, so your task memory may have been freed.
+ */
+int
+rpc_execute(struct rpc_task *task)
+{
+	BUG_ON(task->tk_active);
+
+	task->tk_active = 1;
+	rpc_set_running(task);
+	return __rpc_execute(task);
+}
+
+static void rpc_async_schedule(void *arg)
+{
+	__rpc_execute((struct rpc_task *)arg);
+}
+
+/*
+ * Allocate memory for RPC purposes.
+ *
+ * We try to ensure that some NFS reads and writes can always proceed
+ * by using a mempool when allocating 'small' buffers.
+ * In order to avoid memory starvation triggering more writebacks of
+ * NFS requests, we use GFP_NOFS rather than GFP_KERNEL.
+ */
+void *
+rpc_malloc(struct rpc_task *task, size_t size)
+{
+	int	gfp;
+
+	if (task->tk_flags & RPC_TASK_SWAPPER)
+		gfp = GFP_ATOMIC;
+	else
+		gfp = GFP_NOFS;
+
+	if (size > RPC_BUFFER_MAXSIZE) {
+		task->tk_buffer =  kmalloc(size, gfp);
+		if (task->tk_buffer)
+			task->tk_bufsize = size;
+	} else {
+		task->tk_buffer =  mempool_alloc(rpc_buffer_mempool, gfp);
+		if (task->tk_buffer)
+			task->tk_bufsize = RPC_BUFFER_MAXSIZE;
+	}
+	return task->tk_buffer;
+}
+
+static void
+rpc_free(struct rpc_task *task)
+{
+	if (task->tk_buffer) {
+		if (task->tk_bufsize == RPC_BUFFER_MAXSIZE)
+			mempool_free(task->tk_buffer, rpc_buffer_mempool);
+		else
+			kfree(task->tk_buffer);
+		task->tk_buffer = NULL;
+		task->tk_bufsize = 0;
+	}
+}
+
+/*
+ * Creation and deletion of RPC task structures
+ */
+void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, rpc_action callback, int flags)
+{
+	memset(task, 0, sizeof(*task));
+	init_timer(&task->tk_timer);
+	task->tk_timer.data     = (unsigned long) task;
+	task->tk_timer.function = (void (*)(unsigned long)) rpc_run_timer;
+	task->tk_client = clnt;
+	task->tk_flags  = flags;
+	task->tk_exit   = callback;
+
+	/* Initialize retry counters */
+	task->tk_garb_retry = 2;
+	task->tk_cred_retry = 2;
+
+	task->tk_priority = RPC_PRIORITY_NORMAL;
+	task->tk_cookie = (unsigned long)current;
+
+	/* Initialize workqueue for async tasks */
+	task->tk_workqueue = rpciod_workqueue;
+	if (!RPC_IS_ASYNC(task))
+		init_waitqueue_head(&task->u.tk_wait.waitq);
+
+	if (clnt) {
+		atomic_inc(&clnt->cl_users);
+		if (clnt->cl_softrtry)
+			task->tk_flags |= RPC_TASK_SOFT;
+		if (!clnt->cl_intr)
+			task->tk_flags |= RPC_TASK_NOINTR;
+	}
+
+#ifdef RPC_DEBUG
+	task->tk_magic = RPC_TASK_MAGIC_ID;
+	task->tk_pid = rpc_task_id++;
+#endif
+	/* Add to global list of all tasks */
+	spin_lock(&rpc_sched_lock);
+	list_add_tail(&task->tk_task, &all_tasks);
+	spin_unlock(&rpc_sched_lock);
+
+	dprintk("RPC: %4d new task procpid %d\n", task->tk_pid,
+				current->pid);
+}
+
+static struct rpc_task *
+rpc_alloc_task(void)
+{
+	return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS);
+}
+
+static void
+rpc_default_free_task(struct rpc_task *task)
+{
+	dprintk("RPC: %4d freeing task\n", task->tk_pid);
+	mempool_free(task, rpc_task_mempool);
+}
+
+/*
+ * Create a new task for the specified client.  We have to
+ * clean up after an allocation failure, as the client may
+ * have specified "oneshot".
+ */
+struct rpc_task *
+rpc_new_task(struct rpc_clnt *clnt, rpc_action callback, int flags)
+{
+	struct rpc_task	*task;
+
+	task = rpc_alloc_task();
+	if (!task)
+		goto cleanup;
+
+	rpc_init_task(task, clnt, callback, flags);
+
+	/* Replace tk_release */
+	task->tk_release = rpc_default_free_task;
+
+	dprintk("RPC: %4d allocated task\n", task->tk_pid);
+	task->tk_flags |= RPC_TASK_DYNAMIC;
+out:
+	return task;
+
+cleanup:
+	/* Check whether to release the client */
+	if (clnt) {
+		printk("rpc_new_task: failed, users=%d, oneshot=%d\n",
+			atomic_read(&clnt->cl_users), clnt->cl_oneshot);
+		atomic_inc(&clnt->cl_users); /* pretend we were used ... */
+		rpc_release_client(clnt);
+	}
+	goto out;
+}
+
+void rpc_release_task(struct rpc_task *task)
+{
+	dprintk("RPC: %4d release task\n", task->tk_pid);
+
+#ifdef RPC_DEBUG
+	BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID);
+#endif
+
+	/* Remove from global task list */
+	spin_lock(&rpc_sched_lock);
+	list_del(&task->tk_task);
+	spin_unlock(&rpc_sched_lock);
+
+	BUG_ON (RPC_IS_QUEUED(task));
+	task->tk_active = 0;
+
+	/* Synchronously delete any running timer */
+	rpc_delete_timer(task);
+
+	/* Release resources */
+	if (task->tk_rqstp)
+		xprt_release(task);
+	if (task->tk_msg.rpc_cred)
+		rpcauth_unbindcred(task);
+	rpc_free(task);
+	if (task->tk_client) {
+		rpc_release_client(task->tk_client);
+		task->tk_client = NULL;
+	}
+
+#ifdef RPC_DEBUG
+	task->tk_magic = 0;
+#endif
+	if (task->tk_release)
+		task->tk_release(task);
+}
+
+/**
+ * rpc_find_parent - find the parent of a child task.
+ * @child: child task
+ *
+ * Checks that the parent task is still sleeping on the
+ * queue 'childq'. If so returns a pointer to the parent.
+ * Upon failure returns NULL.
+ *
+ * Caller must hold childq.lock
+ */
+static inline struct rpc_task *rpc_find_parent(struct rpc_task *child)
+{
+	struct rpc_task	*task, *parent;
+	struct list_head *le;
+
+	parent = (struct rpc_task *) child->tk_calldata;
+	task_for_each(task, le, &childq.tasks[0])
+		if (task == parent)
+			return parent;
+
+	return NULL;
+}
+
+static void rpc_child_exit(struct rpc_task *child)
+{
+	struct rpc_task	*parent;
+
+	spin_lock_bh(&childq.lock);
+	if ((parent = rpc_find_parent(child)) != NULL) {
+		parent->tk_status = child->tk_status;
+		__rpc_wake_up_task(parent);
+	}
+	spin_unlock_bh(&childq.lock);
+}
+
+/*
+ * Note: rpc_new_task releases the client after a failure.
+ */
+struct rpc_task *
+rpc_new_child(struct rpc_clnt *clnt, struct rpc_task *parent)
+{
+	struct rpc_task	*task;
+
+	task = rpc_new_task(clnt, NULL, RPC_TASK_ASYNC | RPC_TASK_CHILD);
+	if (!task)
+		goto fail;
+	task->tk_exit = rpc_child_exit;
+	task->tk_calldata = parent;
+	return task;
+
+fail:
+	parent->tk_status = -ENOMEM;
+	return NULL;
+}
+
+void rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func)
+{
+	spin_lock_bh(&childq.lock);
+	/* N.B. Is it possible for the child to have already finished? */
+	__rpc_sleep_on(&childq, task, func, NULL);
+	rpc_schedule_run(child);
+	spin_unlock_bh(&childq.lock);
+}
+
+/*
+ * Kill all tasks for the given client.
+ * XXX: kill their descendants as well?
+ */
+void rpc_killall_tasks(struct rpc_clnt *clnt)
+{
+	struct rpc_task	*rovr;
+	struct list_head *le;
+
+	dprintk("RPC:      killing all tasks for client %p\n", clnt);
+
+	/*
+	 * Spin lock all_tasks to prevent changes...
+	 */
+	spin_lock(&rpc_sched_lock);
+	alltask_for_each(rovr, le, &all_tasks) {
+		if (! RPC_IS_ACTIVATED(rovr))
+			continue;
+		if (!clnt || rovr->tk_client == clnt) {
+			rovr->tk_flags |= RPC_TASK_KILLED;
+			rpc_exit(rovr, -EIO);
+			rpc_wake_up_task(rovr);
+		}
+	}
+	spin_unlock(&rpc_sched_lock);
+}
+
+static DECLARE_MUTEX_LOCKED(rpciod_running);
+
+static void rpciod_killall(void)
+{
+	unsigned long flags;
+
+	while (!list_empty(&all_tasks)) {
+		clear_thread_flag(TIF_SIGPENDING);
+		rpc_killall_tasks(NULL);
+		flush_workqueue(rpciod_workqueue);
+		if (!list_empty(&all_tasks)) {
+			dprintk("rpciod_killall: waiting for tasks to exit\n");
+			yield();
+		}
+	}
+
+	spin_lock_irqsave(&current->sighand->siglock, flags);
+	recalc_sigpending();
+	spin_unlock_irqrestore(&current->sighand->siglock, flags);
+}
+
+/*
+ * Start up the rpciod process if it's not already running.
+ */
+int
+rpciod_up(void)
+{
+	struct workqueue_struct *wq;
+	int error = 0;
+
+	down(&rpciod_sema);
+	dprintk("rpciod_up: users %d\n", rpciod_users);
+	rpciod_users++;
+	if (rpciod_workqueue)
+		goto out;
+	/*
+	 * If there's no pid, we should be the first user.
+	 */
+	if (rpciod_users > 1)
+		printk(KERN_WARNING "rpciod_up: no workqueue, %d users??\n", rpciod_users);
+	/*
+	 * Create the rpciod thread and wait for it to start.
+	 */
+	error = -ENOMEM;
+	wq = create_workqueue("rpciod");
+	if (wq == NULL) {
+		printk(KERN_WARNING "rpciod_up: create workqueue failed, error=%d\n", error);
+		rpciod_users--;
+		goto out;
+	}
+	rpciod_workqueue = wq;
+	error = 0;
+out:
+	up(&rpciod_sema);
+	return error;
+}
+
+void
+rpciod_down(void)
+{
+	down(&rpciod_sema);
+	dprintk("rpciod_down sema %d\n", rpciod_users);
+	if (rpciod_users) {
+		if (--rpciod_users)
+			goto out;
+	} else
+		printk(KERN_WARNING "rpciod_down: no users??\n");
+
+	if (!rpciod_workqueue) {
+		dprintk("rpciod_down: Nothing to do!\n");
+		goto out;
+	}
+	rpciod_killall();
+
+	destroy_workqueue(rpciod_workqueue);
+	rpciod_workqueue = NULL;
+ out:
+	up(&rpciod_sema);
+}
+
+#ifdef RPC_DEBUG
+void rpc_show_tasks(void)
+{
+	struct list_head *le;
+	struct rpc_task *t;
+
+	spin_lock(&rpc_sched_lock);
+	if (list_empty(&all_tasks)) {
+		spin_unlock(&rpc_sched_lock);
+		return;
+	}
+	printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout "
+		"-rpcwait -action- --exit--\n");
+	alltask_for_each(t, le, &all_tasks) {
+		const char *rpc_waitq = "none";
+
+		if (RPC_IS_QUEUED(t))
+			rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq);
+
+		printk("%05d %04d %04x %06d %8p %6d %8p %08ld %8s %8p %8p\n",
+			t->tk_pid,
+			(t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1),
+			t->tk_flags, t->tk_status,
+			t->tk_client,
+			(t->tk_client ? t->tk_client->cl_prog : 0),
+			t->tk_rqstp, t->tk_timeout,
+			rpc_waitq,
+			t->tk_action, t->tk_exit);
+	}
+	spin_unlock(&rpc_sched_lock);
+}
+#endif
+
+void
+rpc_destroy_mempool(void)
+{
+	if (rpc_buffer_mempool)
+		mempool_destroy(rpc_buffer_mempool);
+	if (rpc_task_mempool)
+		mempool_destroy(rpc_task_mempool);
+	if (rpc_task_slabp && kmem_cache_destroy(rpc_task_slabp))
+		printk(KERN_INFO "rpc_task: not all structures were freed\n");
+	if (rpc_buffer_slabp && kmem_cache_destroy(rpc_buffer_slabp))
+		printk(KERN_INFO "rpc_buffers: not all structures were freed\n");
+}
+
+int
+rpc_init_mempool(void)
+{
+	rpc_task_slabp = kmem_cache_create("rpc_tasks",
+					     sizeof(struct rpc_task),
+					     0, SLAB_HWCACHE_ALIGN,
+					     NULL, NULL);
+	if (!rpc_task_slabp)
+		goto err_nomem;
+	rpc_buffer_slabp = kmem_cache_create("rpc_buffers",
+					     RPC_BUFFER_MAXSIZE,
+					     0, SLAB_HWCACHE_ALIGN,
+					     NULL, NULL);
+	if (!rpc_buffer_slabp)
+		goto err_nomem;
+	rpc_task_mempool = mempool_create(RPC_TASK_POOLSIZE,
+					    mempool_alloc_slab,
+					    mempool_free_slab,
+					    rpc_task_slabp);
+	if (!rpc_task_mempool)
+		goto err_nomem;
+	rpc_buffer_mempool = mempool_create(RPC_BUFFER_POOLSIZE,
+					    mempool_alloc_slab,
+					    mempool_free_slab,
+					    rpc_buffer_slabp);
+	if (!rpc_buffer_mempool)
+		goto err_nomem;
+	return 0;
+err_nomem:
+	rpc_destroy_mempool();
+	return -ENOMEM;
+}
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
new file mode 100644
index 00000000000..9b67dc19944
--- /dev/null
+++ b/net/sunrpc/stats.c
@@ -0,0 +1,175 @@
+/*
+ * linux/net/sunrpc/stats.c
+ *
+ * procfs-based user access to generic RPC statistics. The stats files
+ * reside in /proc/net/rpc.
+ *
+ * The read routines assume that the buffer passed in is just big enough.
+ * If you implement an RPC service that has its own stats routine which
+ * appends the generic RPC stats, make sure you don't exceed the PAGE_SIZE
+ * limit.
+ *
+ * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/module.h>
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/svcsock.h>
+
+#define RPCDBG_FACILITY	RPCDBG_MISC
+
+struct proc_dir_entry	*proc_net_rpc = NULL;
+
+/*
+ * Get RPC client stats
+ */
+static int rpc_proc_show(struct seq_file *seq, void *v) {
+	const struct rpc_stat	*statp = seq->private;
+	const struct rpc_program *prog = statp->program;
+	int		i, j;
+
+	seq_printf(seq,
+		"net %d %d %d %d\n",
+			statp->netcnt,
+			statp->netudpcnt,
+			statp->nettcpcnt,
+			statp->nettcpconn);
+	seq_printf(seq,
+		"rpc %d %d %d\n",
+			statp->rpccnt,
+			statp->rpcretrans,
+			statp->rpcauthrefresh);
+
+	for (i = 0; i < prog->nrvers; i++) {
+		const struct rpc_version *vers = prog->version[i];
+		if (!vers)
+			continue;
+		seq_printf(seq, "proc%d %d",
+					vers->number, vers->nrprocs);
+		for (j = 0; j < vers->nrprocs; j++)
+			seq_printf(seq, " %d",
+					vers->procs[j].p_count);
+		seq_putc(seq, '\n');
+	}
+	return 0;
+}
+
+static int rpc_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, rpc_proc_show, PDE(inode)->data);
+}
+
+static struct file_operations rpc_proc_fops = {
+	.owner = THIS_MODULE,
+	.open = rpc_proc_open,
+	.read  = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+/*
+ * Get RPC server stats
+ */
+void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) {
+	const struct svc_program *prog = statp->program;
+	const struct svc_procedure *proc;
+	const struct svc_version *vers;
+	int		i, j;
+
+	seq_printf(seq,
+		"net %d %d %d %d\n",
+			statp->netcnt,
+			statp->netudpcnt,
+			statp->nettcpcnt,
+			statp->nettcpconn);
+	seq_printf(seq,
+		"rpc %d %d %d %d %d\n",
+			statp->rpccnt,
+			statp->rpcbadfmt+statp->rpcbadauth+statp->rpcbadclnt,
+			statp->rpcbadfmt,
+			statp->rpcbadauth,
+			statp->rpcbadclnt);
+
+	for (i = 0; i < prog->pg_nvers; i++) {
+		if (!(vers = prog->pg_vers[i]) || !(proc = vers->vs_proc))
+			continue;
+		seq_printf(seq, "proc%d %d", i, vers->vs_nproc);
+		for (j = 0; j < vers->vs_nproc; j++, proc++)
+			seq_printf(seq, " %d", proc->pc_count);
+		seq_putc(seq, '\n');
+	}
+}
+
+/*
+ * Register/unregister RPC proc files
+ */
+static inline struct proc_dir_entry *
+do_register(const char *name, void *data, struct file_operations *fops)
+{
+	struct proc_dir_entry *ent;
+
+	rpc_proc_init();
+	dprintk("RPC: registering /proc/net/rpc/%s\n", name);
+
+	ent = create_proc_entry(name, 0, proc_net_rpc);
+	if (ent) {
+		ent->proc_fops = fops;
+		ent->data = data;
+	}
+	return ent;
+}
+
+struct proc_dir_entry *
+rpc_proc_register(struct rpc_stat *statp)
+{
+	return do_register(statp->program->name, statp, &rpc_proc_fops);
+}
+
+void
+rpc_proc_unregister(const char *name)
+{
+	remove_proc_entry(name, proc_net_rpc);
+}
+
+struct proc_dir_entry *
+svc_proc_register(struct svc_stat *statp, struct file_operations *fops)
+{
+	return do_register(statp->program->pg_name, statp, fops);
+}
+
+void
+svc_proc_unregister(const char *name)
+{
+	remove_proc_entry(name, proc_net_rpc);
+}
+
+void
+rpc_proc_init(void)
+{
+	dprintk("RPC: registering /proc/net/rpc\n");
+	if (!proc_net_rpc) {
+		struct proc_dir_entry *ent;
+		ent = proc_mkdir("rpc", proc_net);
+		if (ent) {
+			ent->owner = THIS_MODULE;
+			proc_net_rpc = ent;
+		}
+	}
+}
+
+void
+rpc_proc_exit(void)
+{
+	dprintk("RPC: unregistering /proc/net/rpc\n");
+	if (proc_net_rpc) {
+		proc_net_rpc = NULL;
+		remove_proc_entry("net/rpc", NULL);
+	}
+}
+
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
new file mode 100644
index 00000000000..d4f26bf9e73
--- /dev/null
+++ b/net/sunrpc/sunrpc_syms.c
@@ -0,0 +1,185 @@
+/*
+ * linux/net/sunrpc/sunrpc_syms.c
+ *
+ * Symbols exported by the sunrpc module.
+ *
+ * Copyright (C) 1997 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sched.h>
+#include <linux/uio.h>
+#include <linux/unistd.h>
+#include <linux/init.h>
+
+#include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/svcsock.h>
+#include <linux/sunrpc/auth.h>
+#include <linux/workqueue.h>
+#include <linux/sunrpc/rpc_pipe_fs.h>
+
+
+/* RPC scheduler */
+EXPORT_SYMBOL(rpc_execute);
+EXPORT_SYMBOL(rpc_init_task);
+EXPORT_SYMBOL(rpc_sleep_on);
+EXPORT_SYMBOL(rpc_wake_up_next);
+EXPORT_SYMBOL(rpc_wake_up_task);
+EXPORT_SYMBOL(rpc_new_child);
+EXPORT_SYMBOL(rpc_run_child);
+EXPORT_SYMBOL(rpciod_down);
+EXPORT_SYMBOL(rpciod_up);
+EXPORT_SYMBOL(rpc_new_task);
+EXPORT_SYMBOL(rpc_wake_up_status);
+EXPORT_SYMBOL(rpc_release_task);
+
+/* RPC client functions */
+EXPORT_SYMBOL(rpc_create_client);
+EXPORT_SYMBOL(rpc_clone_client);
+EXPORT_SYMBOL(rpc_destroy_client);
+EXPORT_SYMBOL(rpc_shutdown_client);
+EXPORT_SYMBOL(rpc_release_client);
+EXPORT_SYMBOL(rpc_killall_tasks);
+EXPORT_SYMBOL(rpc_call_sync);
+EXPORT_SYMBOL(rpc_call_async);
+EXPORT_SYMBOL(rpc_call_setup);
+EXPORT_SYMBOL(rpc_clnt_sigmask);
+EXPORT_SYMBOL(rpc_clnt_sigunmask);
+EXPORT_SYMBOL(rpc_delay);
+EXPORT_SYMBOL(rpc_restart_call);
+EXPORT_SYMBOL(rpc_setbufsize);
+EXPORT_SYMBOL(rpc_unlink);
+EXPORT_SYMBOL(rpc_wake_up);
+EXPORT_SYMBOL(rpc_queue_upcall);
+EXPORT_SYMBOL(rpc_mkpipe);
+
+/* Client transport */
+EXPORT_SYMBOL(xprt_create_proto);
+EXPORT_SYMBOL(xprt_destroy);
+EXPORT_SYMBOL(xprt_set_timeout);
+EXPORT_SYMBOL(xprt_udp_slot_table_entries);
+EXPORT_SYMBOL(xprt_tcp_slot_table_entries);
+
+/* Client credential cache */
+EXPORT_SYMBOL(rpcauth_register);
+EXPORT_SYMBOL(rpcauth_unregister);
+EXPORT_SYMBOL(rpcauth_create);
+EXPORT_SYMBOL(rpcauth_lookupcred);
+EXPORT_SYMBOL(rpcauth_lookup_credcache);
+EXPORT_SYMBOL(rpcauth_free_credcache);
+EXPORT_SYMBOL(rpcauth_init_credcache);
+EXPORT_SYMBOL(put_rpccred);
+
+/* RPC server stuff */
+EXPORT_SYMBOL(svc_create);
+EXPORT_SYMBOL(svc_create_thread);
+EXPORT_SYMBOL(svc_exit_thread);
+EXPORT_SYMBOL(svc_destroy);
+EXPORT_SYMBOL(svc_drop);
+EXPORT_SYMBOL(svc_process);
+EXPORT_SYMBOL(svc_recv);
+EXPORT_SYMBOL(svc_wake_up);
+EXPORT_SYMBOL(svc_makesock);
+EXPORT_SYMBOL(svc_reserve);
+EXPORT_SYMBOL(svc_auth_register);
+EXPORT_SYMBOL(auth_domain_lookup);
+EXPORT_SYMBOL(svc_authenticate);
+EXPORT_SYMBOL(svc_set_client);
+
+/* RPC statistics */
+#ifdef CONFIG_PROC_FS
+EXPORT_SYMBOL(rpc_proc_register);
+EXPORT_SYMBOL(rpc_proc_unregister);
+EXPORT_SYMBOL(svc_proc_register);
+EXPORT_SYMBOL(svc_proc_unregister);
+EXPORT_SYMBOL(svc_seq_show);
+#endif
+
+/* caching... */
+EXPORT_SYMBOL(auth_domain_find);
+EXPORT_SYMBOL(auth_domain_put);
+EXPORT_SYMBOL(auth_unix_add_addr);
+EXPORT_SYMBOL(auth_unix_forget_old);
+EXPORT_SYMBOL(auth_unix_lookup);
+EXPORT_SYMBOL(cache_check);
+EXPORT_SYMBOL(cache_flush);
+EXPORT_SYMBOL(cache_purge);
+EXPORT_SYMBOL(cache_fresh);
+EXPORT_SYMBOL(cache_init);
+EXPORT_SYMBOL(cache_register);
+EXPORT_SYMBOL(cache_unregister);
+EXPORT_SYMBOL(qword_add);
+EXPORT_SYMBOL(qword_addhex);
+EXPORT_SYMBOL(qword_get);
+EXPORT_SYMBOL(svcauth_unix_purge);
+EXPORT_SYMBOL(unix_domain_find);
+
+/* Generic XDR */
+EXPORT_SYMBOL(xdr_encode_string);
+EXPORT_SYMBOL(xdr_decode_string);
+EXPORT_SYMBOL(xdr_decode_string_inplace);
+EXPORT_SYMBOL(xdr_decode_netobj);
+EXPORT_SYMBOL(xdr_encode_netobj);
+EXPORT_SYMBOL(xdr_encode_pages);
+EXPORT_SYMBOL(xdr_inline_pages);
+EXPORT_SYMBOL(xdr_shift_buf);
+EXPORT_SYMBOL(xdr_buf_from_iov);
+EXPORT_SYMBOL(xdr_buf_subsegment);
+EXPORT_SYMBOL(xdr_buf_read_netobj);
+EXPORT_SYMBOL(read_bytes_from_xdr_buf);
+
+/* Debugging symbols */
+#ifdef RPC_DEBUG
+EXPORT_SYMBOL(rpc_debug);
+EXPORT_SYMBOL(nfs_debug);
+EXPORT_SYMBOL(nfsd_debug);
+EXPORT_SYMBOL(nlm_debug);
+#endif
+
+extern int register_rpc_pipefs(void);
+extern void unregister_rpc_pipefs(void);
+
+static int __init
+init_sunrpc(void)
+{
+	int err = register_rpc_pipefs();
+	if (err)
+		goto out;
+	err = rpc_init_mempool() != 0;
+	if (err)
+		goto out;
+#ifdef RPC_DEBUG
+	rpc_register_sysctl();
+#endif
+#ifdef CONFIG_PROC_FS
+	rpc_proc_init();
+#endif
+	cache_register(&auth_domain_cache);
+	cache_register(&ip_map_cache);
+out:
+	return err;
+}
+
+static void __exit
+cleanup_sunrpc(void)
+{
+	unregister_rpc_pipefs();
+	rpc_destroy_mempool();
+	cache_unregister(&auth_domain_cache);
+	cache_unregister(&ip_map_cache);
+#ifdef RPC_DEBUG
+	rpc_unregister_sysctl();
+#endif
+#ifdef CONFIG_PROC_FS
+	rpc_proc_exit();
+#endif
+}
+MODULE_LICENSE("GPL");
+module_init(init_sunrpc);
+module_exit(cleanup_sunrpc);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
new file mode 100644
index 00000000000..bb2d99f3331
--- /dev/null
+++ b/net/sunrpc/svc.c
@@ -0,0 +1,490 @@
+/*
+ * linux/net/sunrpc/svc.c
+ *
+ * High-level RPC service routines
+ *
+ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/linkage.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/net.h>
+#include <linux/in.h>
+#include <linux/mm.h>
+
+#include <linux/sunrpc/types.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/stats.h>
+#include <linux/sunrpc/svcsock.h>
+#include <linux/sunrpc/clnt.h>
+
+#define RPCDBG_FACILITY	RPCDBG_SVCDSP
+#define RPC_PARANOIA 1
+
+/*
+ * Create an RPC service
+ */
+struct svc_serv *
+svc_create(struct svc_program *prog, unsigned int bufsize)
+{
+	struct svc_serv	*serv;
+	int vers;
+	unsigned int xdrsize;
+
+	if (!(serv = (struct svc_serv *) kmalloc(sizeof(*serv), GFP_KERNEL)))
+		return NULL;
+	memset(serv, 0, sizeof(*serv));
+	serv->sv_program   = prog;
+	serv->sv_nrthreads = 1;
+	serv->sv_stats     = prog->pg_stats;
+	serv->sv_bufsz	   = bufsize? bufsize : 4096;
+	prog->pg_lovers = prog->pg_nvers-1;
+	xdrsize = 0;
+	for (vers=0; vers<prog->pg_nvers ; vers++)
+		if (prog->pg_vers[vers]) {
+			prog->pg_hivers = vers;
+			if (prog->pg_lovers > vers)
+				prog->pg_lovers = vers;
+			if (prog->pg_vers[vers]->vs_xdrsize > xdrsize)
+				xdrsize = prog->pg_vers[vers]->vs_xdrsize;
+		}
+	serv->sv_xdrsize   = xdrsize;
+	INIT_LIST_HEAD(&serv->sv_threads);
+	INIT_LIST_HEAD(&serv->sv_sockets);
+	INIT_LIST_HEAD(&serv->sv_tempsocks);
+	INIT_LIST_HEAD(&serv->sv_permsocks);
+	spin_lock_init(&serv->sv_lock);
+
+	serv->sv_name      = prog->pg_name;
+
+	/* Remove any stale portmap registrations */
+	svc_register(serv, 0, 0);
+
+	return serv;
+}
+
+/*
+ * Destroy an RPC service
+ */
+void
+svc_destroy(struct svc_serv *serv)
+{
+	struct svc_sock	*svsk;
+
+	dprintk("RPC: svc_destroy(%s, %d)\n",
+				serv->sv_program->pg_name,
+				serv->sv_nrthreads);
+
+	if (serv->sv_nrthreads) {
+		if (--(serv->sv_nrthreads) != 0) {
+			svc_sock_update_bufs(serv);
+			return;
+		}
+	} else
+		printk("svc_destroy: no threads for serv=%p!\n", serv);
+
+	while (!list_empty(&serv->sv_tempsocks)) {
+		svsk = list_entry(serv->sv_tempsocks.next,
+				  struct svc_sock,
+				  sk_list);
+		svc_delete_socket(svsk);
+	}
+	while (!list_empty(&serv->sv_permsocks)) {
+		svsk = list_entry(serv->sv_permsocks.next,
+				  struct svc_sock,
+				  sk_list);
+		svc_delete_socket(svsk);
+	}
+	
+	cache_clean_deferred(serv);
+
+	/* Unregister service with the portmapper */
+	svc_register(serv, 0, 0);
+	kfree(serv);
+}
+
+/*
+ * Allocate an RPC server's buffer space.
+ * We allocate pages and place them in rq_argpages.
+ */
+static int
+svc_init_buffer(struct svc_rqst *rqstp, unsigned int size)
+{
+	int pages;
+	int arghi;
+	
+	if (size > RPCSVC_MAXPAYLOAD)
+		size = RPCSVC_MAXPAYLOAD;
+	pages = 2 + (size+ PAGE_SIZE -1) / PAGE_SIZE;
+	rqstp->rq_argused = 0;
+	rqstp->rq_resused = 0;
+	arghi = 0;
+	if (pages > RPCSVC_MAXPAGES)
+		BUG();
+	while (pages) {
+		struct page *p = alloc_page(GFP_KERNEL);
+		if (!p)
+			break;
+		rqstp->rq_argpages[arghi++] = p;
+		pages--;
+	}
+	rqstp->rq_arghi = arghi;
+	return ! pages;
+}
+
+/*
+ * Release an RPC server buffer
+ */
+static void
+svc_release_buffer(struct svc_rqst *rqstp)
+{
+	while (rqstp->rq_arghi)
+		put_page(rqstp->rq_argpages[--rqstp->rq_arghi]);
+	while (rqstp->rq_resused) {
+		if (rqstp->rq_respages[--rqstp->rq_resused] == NULL)
+			continue;
+		put_page(rqstp->rq_respages[rqstp->rq_resused]);
+	}
+	rqstp->rq_argused = 0;
+}
+
+/*
+ * Create a server thread
+ */
+int
+svc_create_thread(svc_thread_fn func, struct svc_serv *serv)
+{
+	struct svc_rqst	*rqstp;
+	int		error = -ENOMEM;
+
+	rqstp = kmalloc(sizeof(*rqstp), GFP_KERNEL);
+	if (!rqstp)
+		goto out;
+
+	memset(rqstp, 0, sizeof(*rqstp));
+	init_waitqueue_head(&rqstp->rq_wait);
+
+	if (!(rqstp->rq_argp = (u32 *) kmalloc(serv->sv_xdrsize, GFP_KERNEL))
+	 || !(rqstp->rq_resp = (u32 *) kmalloc(serv->sv_xdrsize, GFP_KERNEL))
+	 || !svc_init_buffer(rqstp, serv->sv_bufsz))
+		goto out_thread;
+
+	serv->sv_nrthreads++;
+	rqstp->rq_server = serv;
+	error = kernel_thread((int (*)(void *)) func, rqstp, 0);
+	if (error < 0)
+		goto out_thread;
+	svc_sock_update_bufs(serv);
+	error = 0;
+out:
+	return error;
+
+out_thread:
+	svc_exit_thread(rqstp);
+	goto out;
+}
+
+/*
+ * Destroy an RPC server thread
+ */
+void
+svc_exit_thread(struct svc_rqst *rqstp)
+{
+	struct svc_serv	*serv = rqstp->rq_server;
+
+	svc_release_buffer(rqstp);
+	if (rqstp->rq_resp)
+		kfree(rqstp->rq_resp);
+	if (rqstp->rq_argp)
+		kfree(rqstp->rq_argp);
+	if (rqstp->rq_auth_data)
+		kfree(rqstp->rq_auth_data);
+	kfree(rqstp);
+
+	/* Release the server */
+	if (serv)
+		svc_destroy(serv);
+}
+
+/*
+ * Register an RPC service with the local portmapper.
+ * To unregister a service, call this routine with 
+ * proto and port == 0.
+ */
+int
+svc_register(struct svc_serv *serv, int proto, unsigned short port)
+{
+	struct svc_program	*progp;
+	unsigned long		flags;
+	int			i, error = 0, dummy;
+
+	progp = serv->sv_program;
+
+	dprintk("RPC: svc_register(%s, %s, %d)\n",
+		progp->pg_name, proto == IPPROTO_UDP? "udp" : "tcp", port);
+
+	if (!port)
+		clear_thread_flag(TIF_SIGPENDING);
+
+	for (i = 0; i < progp->pg_nvers; i++) {
+		if (progp->pg_vers[i] == NULL)
+			continue;
+		error = rpc_register(progp->pg_prog, i, proto, port, &dummy);
+		if (error < 0)
+			break;
+		if (port && !dummy) {
+			error = -EACCES;
+			break;
+		}
+	}
+
+	if (!port) {
+		spin_lock_irqsave(&current->sighand->siglock, flags);
+		recalc_sigpending();
+		spin_unlock_irqrestore(&current->sighand->siglock, flags);
+	}
+
+	return error;
+}
+
+/*
+ * Process the RPC request.
+ */
+int
+svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
+{
+	struct svc_program	*progp;
+	struct svc_version	*versp = NULL;	/* compiler food */
+	struct svc_procedure	*procp = NULL;
+	struct kvec *		argv = &rqstp->rq_arg.head[0];
+	struct kvec *		resv = &rqstp->rq_res.head[0];
+	kxdrproc_t		xdr;
+	u32			*statp;
+	u32			dir, prog, vers, proc,
+				auth_stat, rpc_stat;
+	int			auth_res;
+	u32			*accept_statp;
+
+	rpc_stat = rpc_success;
+
+	if (argv->iov_len < 6*4)
+		goto err_short_len;
+
+	/* setup response xdr_buf.
+	 * Initially it has just one page 
+	 */
+	svc_take_page(rqstp); /* must succeed */
+	resv->iov_base = page_address(rqstp->rq_respages[0]);
+	resv->iov_len = 0;
+	rqstp->rq_res.pages = rqstp->rq_respages+1;
+	rqstp->rq_res.len = 0;
+	rqstp->rq_res.page_base = 0;
+	rqstp->rq_res.page_len = 0;
+	rqstp->rq_res.tail[0].iov_len = 0;
+	/* tcp needs a space for the record length... */
+	if (rqstp->rq_prot == IPPROTO_TCP)
+		svc_putu32(resv, 0);
+
+	rqstp->rq_xid = svc_getu32(argv);
+	svc_putu32(resv, rqstp->rq_xid);
+
+	dir  = ntohl(svc_getu32(argv));
+	vers = ntohl(svc_getu32(argv));
+
+	/* First words of reply: */
+	svc_putu32(resv, xdr_one);		/* REPLY */
+
+	if (dir != 0)		/* direction != CALL */
+		goto err_bad_dir;
+	if (vers != 2)		/* RPC version number */
+		goto err_bad_rpc;
+
+	/* Save position in case we later decide to reject: */
+	accept_statp = resv->iov_base + resv->iov_len;
+
+	svc_putu32(resv, xdr_zero);		/* ACCEPT */
+
+	rqstp->rq_prog = prog = ntohl(svc_getu32(argv));	/* program number */
+	rqstp->rq_vers = vers = ntohl(svc_getu32(argv));	/* version number */
+	rqstp->rq_proc = proc = ntohl(svc_getu32(argv));	/* procedure number */
+
+	progp = serv->sv_program;
+	/*
+	 * Decode auth data, and add verifier to reply buffer.
+	 * We do this before anything else in order to get a decent
+	 * auth verifier.
+	 */
+	auth_res = svc_authenticate(rqstp, &auth_stat);
+	/* Also give the program a chance to reject this call: */
+	if (auth_res == SVC_OK) {
+		auth_stat = rpc_autherr_badcred;
+		auth_res = progp->pg_authenticate(rqstp);
+	}
+	switch (auth_res) {
+	case SVC_OK:
+		break;
+	case SVC_GARBAGE:
+		rpc_stat = rpc_garbage_args;
+		goto err_bad;
+	case SVC_SYSERR:
+		rpc_stat = rpc_system_err;
+		goto err_bad;
+	case SVC_DENIED:
+		goto err_bad_auth;
+	case SVC_DROP:
+		goto dropit;
+	case SVC_COMPLETE:
+		goto sendit;
+	}
+		
+	if (prog != progp->pg_prog)
+		goto err_bad_prog;
+
+	if (vers >= progp->pg_nvers ||
+	  !(versp = progp->pg_vers[vers]))
+		goto err_bad_vers;
+
+	procp = versp->vs_proc + proc;
+	if (proc >= versp->vs_nproc || !procp->pc_func)
+		goto err_bad_proc;
+	rqstp->rq_server   = serv;
+	rqstp->rq_procinfo = procp;
+
+	/* Syntactic check complete */
+	serv->sv_stats->rpccnt++;
+
+	/* Build the reply header. */
+	statp = resv->iov_base +resv->iov_len;
+	svc_putu32(resv, rpc_success);		/* RPC_SUCCESS */
+
+	/* Bump per-procedure stats counter */
+	procp->pc_count++;
+
+	/* Initialize storage for argp and resp */
+	memset(rqstp->rq_argp, 0, procp->pc_argsize);
+	memset(rqstp->rq_resp, 0, procp->pc_ressize);
+
+	/* un-reserve some of the out-queue now that we have a 
+	 * better idea of reply size
+	 */
+	if (procp->pc_xdrressize)
+		svc_reserve(rqstp, procp->pc_xdrressize<<2);
+
+	/* Call the function that processes the request. */
+	if (!versp->vs_dispatch) {
+		/* Decode arguments */
+		xdr = procp->pc_decode;
+		if (xdr && !xdr(rqstp, argv->iov_base, rqstp->rq_argp))
+			goto err_garbage;
+
+		*statp = procp->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
+
+		/* Encode reply */
+		if (*statp == rpc_success && (xdr = procp->pc_encode)
+		 && !xdr(rqstp, resv->iov_base+resv->iov_len, rqstp->rq_resp)) {
+			dprintk("svc: failed to encode reply\n");
+			/* serv->sv_stats->rpcsystemerr++; */
+			*statp = rpc_system_err;
+		}
+	} else {
+		dprintk("svc: calling dispatcher\n");
+		if (!versp->vs_dispatch(rqstp, statp)) {
+			/* Release reply info */
+			if (procp->pc_release)
+				procp->pc_release(rqstp, NULL, rqstp->rq_resp);
+			goto dropit;
+		}
+	}
+
+	/* Check RPC status result */
+	if (*statp != rpc_success)
+		resv->iov_len = ((void*)statp)  - resv->iov_base + 4;
+
+	/* Release reply info */
+	if (procp->pc_release)
+		procp->pc_release(rqstp, NULL, rqstp->rq_resp);
+
+	if (procp->pc_encode == NULL)
+		goto dropit;
+
+ sendit:
+	if (svc_authorise(rqstp))
+		goto dropit;
+	return svc_send(rqstp);
+
+ dropit:
+	svc_authorise(rqstp);	/* doesn't hurt to call this twice */
+	dprintk("svc: svc_process dropit\n");
+	svc_drop(rqstp);
+	return 0;
+
+err_short_len:
+#ifdef RPC_PARANOIA
+	printk("svc: short len %Zd, dropping request\n", argv->iov_len);
+#endif
+	goto dropit;			/* drop request */
+
+err_bad_dir:
+#ifdef RPC_PARANOIA
+	printk("svc: bad direction %d, dropping request\n", dir);
+#endif
+	serv->sv_stats->rpcbadfmt++;
+	goto dropit;			/* drop request */
+
+err_bad_rpc:
+	serv->sv_stats->rpcbadfmt++;
+	svc_putu32(resv, xdr_one);	/* REJECT */
+	svc_putu32(resv, xdr_zero);	/* RPC_MISMATCH */
+	svc_putu32(resv, xdr_two);	/* Only RPCv2 supported */
+	svc_putu32(resv, xdr_two);
+	goto sendit;
+
+err_bad_auth:
+	dprintk("svc: authentication failed (%d)\n", ntohl(auth_stat));
+	serv->sv_stats->rpcbadauth++;
+	/* Restore write pointer to location of accept status: */
+	xdr_ressize_check(rqstp, accept_statp);
+	svc_putu32(resv, xdr_one);	/* REJECT */
+	svc_putu32(resv, xdr_one);	/* AUTH_ERROR */
+	svc_putu32(resv, auth_stat);	/* status */
+	goto sendit;
+
+err_bad_prog:
+#ifdef RPC_PARANOIA
+	if (prog != 100227 || progp->pg_prog != 100003)
+		printk("svc: unknown program %d (me %d)\n", prog, progp->pg_prog);
+	/* else it is just a Solaris client seeing if ACLs are supported */
+#endif
+	serv->sv_stats->rpcbadfmt++;
+	svc_putu32(resv, rpc_prog_unavail);
+	goto sendit;
+
+err_bad_vers:
+#ifdef RPC_PARANOIA
+	printk("svc: unknown version (%d)\n", vers);
+#endif
+	serv->sv_stats->rpcbadfmt++;
+	svc_putu32(resv, rpc_prog_mismatch);
+	svc_putu32(resv, htonl(progp->pg_lovers));
+	svc_putu32(resv, htonl(progp->pg_hivers));
+	goto sendit;
+
+err_bad_proc:
+#ifdef RPC_PARANOIA
+	printk("svc: unknown procedure (%d)\n", proc);
+#endif
+	serv->sv_stats->rpcbadfmt++;
+	svc_putu32(resv, rpc_proc_unavail);
+	goto sendit;
+
+err_garbage:
+#ifdef RPC_PARANOIA
+	printk("svc: failed to decode args\n");
+#endif
+	rpc_stat = rpc_garbage_args;
+err_bad:
+	serv->sv_stats->rpcbadfmt++;
+	svc_putu32(resv, rpc_stat);
+	goto sendit;
+}
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
new file mode 100644
index 00000000000..bde8147ef2d
--- /dev/null
+++ b/net/sunrpc/svcauth.c
@@ -0,0 +1,216 @@
+/*
+ * linux/net/sunrpc/svcauth.c
+ *
+ * The generic interface for RPC authentication on the server side.
+ * 
+ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
+ *
+ * CHANGES
+ * 19-Apr-2000 Chris Evans      - Security fix
+ */
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/sunrpc/types.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/svcsock.h>
+#include <linux/sunrpc/svcauth.h>
+#include <linux/err.h>
+#include <linux/hash.h>
+
+#define RPCDBG_FACILITY	RPCDBG_AUTH
+
+
+/*
+ * Table of authenticators
+ */
+extern struct auth_ops svcauth_null;
+extern struct auth_ops svcauth_unix;
+
+static DEFINE_SPINLOCK(authtab_lock);
+static struct auth_ops	*authtab[RPC_AUTH_MAXFLAVOR] = {
+	[0] = &svcauth_null,
+	[1] = &svcauth_unix,
+};
+
+int
+svc_authenticate(struct svc_rqst *rqstp, u32 *authp)
+{
+	rpc_authflavor_t	flavor;
+	struct auth_ops		*aops;
+
+	*authp = rpc_auth_ok;
+
+	flavor = ntohl(svc_getu32(&rqstp->rq_arg.head[0]));
+
+	dprintk("svc: svc_authenticate (%d)\n", flavor);
+
+	spin_lock(&authtab_lock);
+	if (flavor >= RPC_AUTH_MAXFLAVOR || !(aops = authtab[flavor])
+			|| !try_module_get(aops->owner)) {
+		spin_unlock(&authtab_lock);
+		*authp = rpc_autherr_badcred;
+		return SVC_DENIED;
+	}
+	spin_unlock(&authtab_lock);
+
+	rqstp->rq_authop = aops;
+	return aops->accept(rqstp, authp);
+}
+
+int svc_set_client(struct svc_rqst *rqstp)
+{
+	return rqstp->rq_authop->set_client(rqstp);
+}
+
+/* A request, which was authenticated, has now executed.
+ * Time to finalise the the credentials and verifier
+ * and release and resources
+ */
+int svc_authorise(struct svc_rqst *rqstp)
+{
+	struct auth_ops *aops = rqstp->rq_authop;
+	int rv = 0;
+
+	rqstp->rq_authop = NULL;
+	
+	if (aops) {
+		rv = aops->release(rqstp);
+		module_put(aops->owner);
+	}
+	return rv;
+}
+
+int
+svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops)
+{
+	int rv = -EINVAL;
+	spin_lock(&authtab_lock);
+	if (flavor < RPC_AUTH_MAXFLAVOR && authtab[flavor] == NULL) {
+		authtab[flavor] = aops;
+		rv = 0;
+	}
+	spin_unlock(&authtab_lock);
+	return rv;
+}
+
+void
+svc_auth_unregister(rpc_authflavor_t flavor)
+{
+	spin_lock(&authtab_lock);
+	if (flavor < RPC_AUTH_MAXFLAVOR)
+		authtab[flavor] = NULL;
+	spin_unlock(&authtab_lock);
+}
+EXPORT_SYMBOL(svc_auth_unregister);
+
+/**************************************************
+ * cache for domain name to auth_domain
+ * Entries are only added by flavours which will normally
+ * have a structure that 'inherits' from auth_domain.
+ * e.g. when an IP -> domainname is given to  auth_unix,
+ * and the domain name doesn't exist, it will create a
+ * auth_unix_domain and add it to this hash table.
+ * If it finds the name does exist, but isn't AUTH_UNIX,
+ * it will complain.
+ */
+
+/*
+ * Auth auth_domain cache is somewhat different to other caches,
+ * largely because the entries are possibly of different types:
+ * each auth flavour has it's own type.
+ * One consequence of this that DefineCacheLookup cannot
+ * allocate a new structure as it cannot know the size.
+ * Notice that the "INIT" code fragment is quite different
+ * from other caches.  When auth_domain_lookup might be
+ * creating a new domain, the new domain is passed in
+ * complete and it is used as-is rather than being copied into
+ * another structure.
+ */
+#define	DN_HASHBITS	6
+#define	DN_HASHMAX	(1<<DN_HASHBITS)
+#define	DN_HASHMASK	(DN_HASHMAX-1)
+
+static struct cache_head	*auth_domain_table[DN_HASHMAX];
+
+static void auth_domain_drop(struct cache_head *item, struct cache_detail *cd)
+{
+	struct auth_domain *dom = container_of(item, struct auth_domain, h);
+	if (cache_put(item,cd))
+		authtab[dom->flavour]->domain_release(dom);
+}
+
+
+struct cache_detail auth_domain_cache = {
+	.hash_size	= DN_HASHMAX,
+	.hash_table	= auth_domain_table,
+	.name		= "auth.domain",
+	.cache_put	= auth_domain_drop,
+};
+
+void auth_domain_put(struct auth_domain *dom)
+{
+	auth_domain_drop(&dom->h, &auth_domain_cache);
+}
+
+static inline int auth_domain_hash(struct auth_domain *item)
+{
+	return hash_str(item->name, DN_HASHBITS);
+}
+static inline int auth_domain_match(struct auth_domain *tmp, struct auth_domain *item)
+{
+	return strcmp(tmp->name, item->name) == 0;
+}
+
+struct auth_domain *
+auth_domain_lookup(struct auth_domain *item, int set)
+{
+	struct auth_domain *tmp = NULL;
+	struct cache_head **hp, **head;
+	head = &auth_domain_cache.hash_table[auth_domain_hash(item)];
+
+	if (set)
+		write_lock(&auth_domain_cache.hash_lock);
+	else
+		read_lock(&auth_domain_cache.hash_lock);
+	for (hp=head; *hp != NULL; hp = &tmp->h.next) {
+		tmp = container_of(*hp, struct auth_domain, h);
+		if (!auth_domain_match(tmp, item))
+			continue;
+		if (!set) {
+			cache_get(&tmp->h);
+			goto out_noset;
+		}
+		*hp = tmp->h.next;
+		tmp->h.next = NULL;
+		auth_domain_drop(&tmp->h, &auth_domain_cache);
+		goto out_set;
+	}
+	/* Didn't find anything */
+	if (!set)
+		goto out_nada;
+	auth_domain_cache.entries++;
+out_set:
+	item->h.next = *head;
+	*head = &item->h;
+	cache_get(&item->h);
+	write_unlock(&auth_domain_cache.hash_lock);
+	cache_fresh(&auth_domain_cache, &item->h, item->h.expiry_time);
+	cache_get(&item->h);
+	return item;
+out_nada:
+	tmp = NULL;
+out_noset:
+	read_unlock(&auth_domain_cache.hash_lock);
+	return tmp;
+}
+
+struct auth_domain *auth_domain_find(char *name)
+{
+	struct auth_domain *rv, ad;
+
+	ad.name = name;
+	rv = auth_domain_lookup(&ad, 0);
+	return rv;
+}
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
new file mode 100644
index 00000000000..2b99b4028d3
--- /dev/null
+++ b/net/sunrpc/svcauth_unix.c
@@ -0,0 +1,502 @@
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/sunrpc/types.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/svcsock.h>
+#include <linux/sunrpc/svcauth.h>
+#include <linux/err.h>
+#include <linux/seq_file.h>
+#include <linux/hash.h>
+
+#define RPCDBG_FACILITY	RPCDBG_AUTH
+
+
+/*
+ * AUTHUNIX and AUTHNULL credentials are both handled here.
+ * AUTHNULL is treated just like AUTHUNIX except that the uid/gid
+ * are always nobody (-2).  i.e. we do the same IP address checks for
+ * AUTHNULL as for AUTHUNIX, and that is done here.
+ */
+
+
+static char *strdup(char *s)
+{
+	char *rv = kmalloc(strlen(s)+1, GFP_KERNEL);
+	if (rv)
+		strcpy(rv, s);
+	return rv;
+}
+
+struct unix_domain {
+	struct auth_domain	h;
+	int	addr_changes;
+	/* other stuff later */
+};
+
+struct auth_domain *unix_domain_find(char *name)
+{
+	struct auth_domain *rv, ud;
+	struct unix_domain *new;
+
+	ud.name = name;
+	
+	rv = auth_domain_lookup(&ud, 0);
+
+ foundit:
+	if (rv && rv->flavour != RPC_AUTH_UNIX) {
+		auth_domain_put(rv);
+		return NULL;
+	}
+	if (rv)
+		return rv;
+
+	new = kmalloc(sizeof(*new), GFP_KERNEL);
+	if (new == NULL)
+		return NULL;
+	cache_init(&new->h.h);
+	new->h.name = strdup(name);
+	new->h.flavour = RPC_AUTH_UNIX;
+	new->addr_changes = 0;
+	new->h.h.expiry_time = NEVER;
+
+	rv = auth_domain_lookup(&new->h, 2);
+	if (rv == &new->h) {
+		if (atomic_dec_and_test(&new->h.h.refcnt)) BUG();
+	} else {
+		auth_domain_put(&new->h);
+		goto foundit;
+	}
+
+	return rv;
+}
+
+static void svcauth_unix_domain_release(struct auth_domain *dom)
+{
+	struct unix_domain *ud = container_of(dom, struct unix_domain, h);
+
+	kfree(dom->name);
+	kfree(ud);
+}
+
+
+/**************************************************
+ * cache for IP address to unix_domain
+ * as needed by AUTH_UNIX
+ */
+#define	IP_HASHBITS	8
+#define	IP_HASHMAX	(1<<IP_HASHBITS)
+#define	IP_HASHMASK	(IP_HASHMAX-1)
+
+struct ip_map {
+	struct cache_head	h;
+	char			m_class[8]; /* e.g. "nfsd" */
+	struct in_addr		m_addr;
+	struct unix_domain	*m_client;
+	int			m_add_change;
+};
+static struct cache_head	*ip_table[IP_HASHMAX];
+
+static void ip_map_put(struct cache_head *item, struct cache_detail *cd)
+{
+	struct ip_map *im = container_of(item, struct ip_map,h);
+	if (cache_put(item, cd)) {
+		if (test_bit(CACHE_VALID, &item->flags) &&
+		    !test_bit(CACHE_NEGATIVE, &item->flags))
+			auth_domain_put(&im->m_client->h);
+		kfree(im);
+	}
+}
+
+static inline int ip_map_hash(struct ip_map *item)
+{
+	return hash_str(item->m_class, IP_HASHBITS) ^ 
+		hash_long((unsigned long)item->m_addr.s_addr, IP_HASHBITS);
+}
+static inline int ip_map_match(struct ip_map *item, struct ip_map *tmp)
+{
+	return strcmp(tmp->m_class, item->m_class) == 0
+		&& tmp->m_addr.s_addr == item->m_addr.s_addr;
+}
+static inline void ip_map_init(struct ip_map *new, struct ip_map *item)
+{
+	strcpy(new->m_class, item->m_class);
+	new->m_addr.s_addr = item->m_addr.s_addr;
+}
+static inline void ip_map_update(struct ip_map *new, struct ip_map *item)
+{
+	cache_get(&item->m_client->h.h);
+	new->m_client = item->m_client;
+	new->m_add_change = item->m_add_change;
+}
+
+static void ip_map_request(struct cache_detail *cd,
+				  struct cache_head *h,
+				  char **bpp, int *blen)
+{
+	char text_addr[20];
+	struct ip_map *im = container_of(h, struct ip_map, h);
+	__u32 addr = im->m_addr.s_addr;
+	
+	snprintf(text_addr, 20, "%u.%u.%u.%u",
+		 ntohl(addr) >> 24 & 0xff,
+		 ntohl(addr) >> 16 & 0xff,
+		 ntohl(addr) >>  8 & 0xff,
+		 ntohl(addr) >>  0 & 0xff);
+
+	qword_add(bpp, blen, im->m_class);
+	qword_add(bpp, blen, text_addr);
+	(*bpp)[-1] = '\n';
+}
+
+static struct ip_map *ip_map_lookup(struct ip_map *, int);
+
+static int ip_map_parse(struct cache_detail *cd,
+			  char *mesg, int mlen)
+{
+	/* class ipaddress [domainname] */
+	/* should be safe just to use the start of the input buffer
+	 * for scratch: */
+	char *buf = mesg;
+	int len;
+	int b1,b2,b3,b4;
+	char c;
+	struct ip_map ipm, *ipmp;
+	struct auth_domain *dom;
+	time_t expiry;
+
+	if (mesg[mlen-1] != '\n')
+		return -EINVAL;
+	mesg[mlen-1] = 0;
+
+	/* class */
+	len = qword_get(&mesg, ipm.m_class, sizeof(ipm.m_class));
+	if (len <= 0) return -EINVAL;
+
+	/* ip address */
+	len = qword_get(&mesg, buf, mlen);
+	if (len <= 0) return -EINVAL;
+
+	if (sscanf(buf, "%u.%u.%u.%u%c", &b1, &b2, &b3, &b4, &c) != 4)
+		return -EINVAL;
+	
+	expiry = get_expiry(&mesg);
+	if (expiry ==0)
+		return -EINVAL;
+
+	/* domainname, or empty for NEGATIVE */
+	len = qword_get(&mesg, buf, mlen);
+	if (len < 0) return -EINVAL;
+
+	if (len) {
+		dom = unix_domain_find(buf);
+		if (dom == NULL)
+			return -ENOENT;
+	} else
+		dom = NULL;
+
+	ipm.m_addr.s_addr =
+		htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4);
+	ipm.h.flags = 0;
+	if (dom) {
+		ipm.m_client = container_of(dom, struct unix_domain, h);
+		ipm.m_add_change = ipm.m_client->addr_changes;
+	} else
+		set_bit(CACHE_NEGATIVE, &ipm.h.flags);
+	ipm.h.expiry_time = expiry;
+
+	ipmp = ip_map_lookup(&ipm, 1);
+	if (ipmp)
+		ip_map_put(&ipmp->h, &ip_map_cache);
+	if (dom)
+		auth_domain_put(dom);
+	if (!ipmp)
+		return -ENOMEM;
+	cache_flush();
+	return 0;
+}
+
+static int ip_map_show(struct seq_file *m,
+		       struct cache_detail *cd,
+		       struct cache_head *h)
+{
+	struct ip_map *im;
+	struct in_addr addr;
+	char *dom = "-no-domain-";
+
+	if (h == NULL) {
+		seq_puts(m, "#class IP domain\n");
+		return 0;
+	}
+	im = container_of(h, struct ip_map, h);
+	/* class addr domain */
+	addr = im->m_addr;
+
+	if (test_bit(CACHE_VALID, &h->flags) && 
+	    !test_bit(CACHE_NEGATIVE, &h->flags))
+		dom = im->m_client->h.name;
+
+	seq_printf(m, "%s %d.%d.%d.%d %s\n",
+		   im->m_class,
+		   htonl(addr.s_addr) >> 24 & 0xff,
+		   htonl(addr.s_addr) >> 16 & 0xff,
+		   htonl(addr.s_addr) >>  8 & 0xff,
+		   htonl(addr.s_addr) >>  0 & 0xff,
+		   dom
+		   );
+	return 0;
+}
+	
+
+struct cache_detail ip_map_cache = {
+	.hash_size	= IP_HASHMAX,
+	.hash_table	= ip_table,
+	.name		= "auth.unix.ip",
+	.cache_put	= ip_map_put,
+	.cache_request	= ip_map_request,
+	.cache_parse	= ip_map_parse,
+	.cache_show	= ip_map_show,
+};
+
+static DefineSimpleCacheLookup(ip_map, 0)
+
+
+int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom)
+{
+	struct unix_domain *udom;
+	struct ip_map ip, *ipmp;
+
+	if (dom->flavour != RPC_AUTH_UNIX)
+		return -EINVAL;
+	udom = container_of(dom, struct unix_domain, h);
+	strcpy(ip.m_class, "nfsd");
+	ip.m_addr = addr;
+	ip.m_client = udom;
+	ip.m_add_change = udom->addr_changes+1;
+	ip.h.flags = 0;
+	ip.h.expiry_time = NEVER;
+	
+	ipmp = ip_map_lookup(&ip, 1);
+
+	if (ipmp) {
+		ip_map_put(&ipmp->h, &ip_map_cache);
+		return 0;
+	} else
+		return -ENOMEM;
+}
+
+int auth_unix_forget_old(struct auth_domain *dom)
+{
+	struct unix_domain *udom;
+	
+	if (dom->flavour != RPC_AUTH_UNIX)
+		return -EINVAL;
+	udom = container_of(dom, struct unix_domain, h);
+	udom->addr_changes++;
+	return 0;
+}
+
+struct auth_domain *auth_unix_lookup(struct in_addr addr)
+{
+	struct ip_map key, *ipm;
+	struct auth_domain *rv;
+
+	strcpy(key.m_class, "nfsd");
+	key.m_addr = addr;
+
+	ipm = ip_map_lookup(&key, 0);
+
+	if (!ipm)
+		return NULL;
+	if (cache_check(&ip_map_cache, &ipm->h, NULL))
+		return NULL;
+
+	if ((ipm->m_client->addr_changes - ipm->m_add_change) >0) {
+		if (test_and_set_bit(CACHE_NEGATIVE, &ipm->h.flags) == 0)
+			auth_domain_put(&ipm->m_client->h);
+		rv = NULL;
+	} else {
+		rv = &ipm->m_client->h;
+		cache_get(&rv->h);
+	}
+	ip_map_put(&ipm->h, &ip_map_cache);
+	return rv;
+}
+
+void svcauth_unix_purge(void)
+{
+	cache_purge(&ip_map_cache);
+	cache_purge(&auth_domain_cache);
+}
+
+static int
+svcauth_unix_set_client(struct svc_rqst *rqstp)
+{
+	struct ip_map key, *ipm;
+
+	rqstp->rq_client = NULL;
+	if (rqstp->rq_proc == 0)
+		return SVC_OK;
+
+	strcpy(key.m_class, rqstp->rq_server->sv_program->pg_class);
+	key.m_addr = rqstp->rq_addr.sin_addr;
+
+	ipm = ip_map_lookup(&key, 0);
+
+	if (ipm == NULL)
+		return SVC_DENIED;
+
+	switch (cache_check(&ip_map_cache, &ipm->h, &rqstp->rq_chandle)) {
+		default:
+			BUG();
+		case -EAGAIN:
+			return SVC_DROP;
+		case -ENOENT:
+			return SVC_DENIED;
+		case 0:
+			rqstp->rq_client = &ipm->m_client->h;
+			cache_get(&rqstp->rq_client->h);
+			ip_map_put(&ipm->h, &ip_map_cache);
+			break;
+	}
+	return SVC_OK;
+}
+
+static int
+svcauth_null_accept(struct svc_rqst *rqstp, u32 *authp)
+{
+	struct kvec	*argv = &rqstp->rq_arg.head[0];
+	struct kvec	*resv = &rqstp->rq_res.head[0];
+	struct svc_cred	*cred = &rqstp->rq_cred;
+
+	cred->cr_group_info = NULL;
+	rqstp->rq_client = NULL;
+
+	if (argv->iov_len < 3*4)
+		return SVC_GARBAGE;
+
+	if (svc_getu32(argv) != 0) { 
+		dprintk("svc: bad null cred\n");
+		*authp = rpc_autherr_badcred;
+		return SVC_DENIED;
+	}
+	if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) {
+		dprintk("svc: bad null verf\n");
+		*authp = rpc_autherr_badverf;
+		return SVC_DENIED;
+	}
+
+	/* Signal that mapping to nobody uid/gid is required */
+	cred->cr_uid = (uid_t) -1;
+	cred->cr_gid = (gid_t) -1;
+	cred->cr_group_info = groups_alloc(0);
+	if (cred->cr_group_info == NULL)
+		return SVC_DROP; /* kmalloc failure - client must retry */
+
+	/* Put NULL verifier */
+	svc_putu32(resv, RPC_AUTH_NULL);
+	svc_putu32(resv, 0);
+
+	return SVC_OK;
+}
+
+static int
+svcauth_null_release(struct svc_rqst *rqstp)
+{
+	if (rqstp->rq_client)
+		auth_domain_put(rqstp->rq_client);
+	rqstp->rq_client = NULL;
+	if (rqstp->rq_cred.cr_group_info)
+		put_group_info(rqstp->rq_cred.cr_group_info);
+	rqstp->rq_cred.cr_group_info = NULL;
+
+	return 0; /* don't drop */
+}
+
+
+struct auth_ops svcauth_null = {
+	.name		= "null",
+	.owner		= THIS_MODULE,
+	.flavour	= RPC_AUTH_NULL,
+	.accept 	= svcauth_null_accept,
+	.release	= svcauth_null_release,
+	.set_client	= svcauth_unix_set_client,
+};
+
+
+static int
+svcauth_unix_accept(struct svc_rqst *rqstp, u32 *authp)
+{
+	struct kvec	*argv = &rqstp->rq_arg.head[0];
+	struct kvec	*resv = &rqstp->rq_res.head[0];
+	struct svc_cred	*cred = &rqstp->rq_cred;
+	u32		slen, i;
+	int		len   = argv->iov_len;
+
+	cred->cr_group_info = NULL;
+	rqstp->rq_client = NULL;
+
+	if ((len -= 3*4) < 0)
+		return SVC_GARBAGE;
+
+	svc_getu32(argv);			/* length */
+	svc_getu32(argv);			/* time stamp */
+	slen = XDR_QUADLEN(ntohl(svc_getu32(argv)));	/* machname length */
+	if (slen > 64 || (len -= (slen + 3)*4) < 0)
+		goto badcred;
+	argv->iov_base = (void*)((u32*)argv->iov_base + slen);	/* skip machname */
+	argv->iov_len -= slen*4;
+
+	cred->cr_uid = ntohl(svc_getu32(argv));		/* uid */
+	cred->cr_gid = ntohl(svc_getu32(argv));		/* gid */
+	slen = ntohl(svc_getu32(argv));			/* gids length */
+	if (slen > 16 || (len -= (slen + 2)*4) < 0)
+		goto badcred;
+	cred->cr_group_info = groups_alloc(slen);
+	if (cred->cr_group_info == NULL)
+		return SVC_DROP;
+	for (i = 0; i < slen; i++)
+		GROUP_AT(cred->cr_group_info, i) = ntohl(svc_getu32(argv));
+
+	if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) {
+		*authp = rpc_autherr_badverf;
+		return SVC_DENIED;
+	}
+
+	/* Put NULL verifier */
+	svc_putu32(resv, RPC_AUTH_NULL);
+	svc_putu32(resv, 0);
+
+	return SVC_OK;
+
+badcred:
+	*authp = rpc_autherr_badcred;
+	return SVC_DENIED;
+}
+
+static int
+svcauth_unix_release(struct svc_rqst *rqstp)
+{
+	/* Verifier (such as it is) is already in place.
+	 */
+	if (rqstp->rq_client)
+		auth_domain_put(rqstp->rq_client);
+	rqstp->rq_client = NULL;
+	if (rqstp->rq_cred.cr_group_info)
+		put_group_info(rqstp->rq_cred.cr_group_info);
+	rqstp->rq_cred.cr_group_info = NULL;
+
+	return 0;
+}
+
+
+struct auth_ops svcauth_unix = {
+	.name		= "unix",
+	.owner		= THIS_MODULE,
+	.flavour	= RPC_AUTH_UNIX,
+	.accept 	= svcauth_unix_accept,
+	.release	= svcauth_unix_release,
+	.domain_release	= svcauth_unix_domain_release,
+	.set_client	= svcauth_unix_set_client,
+};
+
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
new file mode 100644
index 00000000000..05907035bc9
--- /dev/null
+++ b/net/sunrpc/svcsock.c
@@ -0,0 +1,1585 @@
+/*
+ * linux/net/sunrpc/svcsock.c
+ *
+ * These are the RPC server socket internals.
+ *
+ * The server scheduling algorithm does not always distribute the load
+ * evenly when servicing a single client. May need to modify the
+ * svc_sock_enqueue procedure...
+ *
+ * TCP support is largely untested and may be a little slow. The problem
+ * is that we currently do two separate recvfrom's, one for the 4-byte
+ * record length, and the second for the actual record. This could possibly
+ * be improved by always reading a minimum size of around 100 bytes and
+ * tucking any superfluous bytes away in a temporary store. Still, that
+ * leaves write requests out in the rain. An alternative may be to peek at
+ * the first skb in the queue, and if it matches the next TCP sequence
+ * number, to extract the record marker. Yuck.
+ *
+ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/net.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/checksum.h>
+#include <net/ip.h>
+#include <net/tcp.h>
+#include <asm/uaccess.h>
+#include <asm/ioctls.h>
+
+#include <linux/sunrpc/types.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/svcsock.h>
+#include <linux/sunrpc/stats.h>
+
+/* SMP locking strategy:
+ *
+ * 	svc_serv->sv_lock protects most stuff for that service.
+ *
+ *	Some flags can be set to certain values at any time
+ *	providing that certain rules are followed:
+ *
+ *	SK_BUSY  can be set to 0 at any time.  
+ *		svc_sock_enqueue must be called afterwards
+ *	SK_CONN, SK_DATA, can be set or cleared at any time.
+ *		after a set, svc_sock_enqueue must be called.	
+ *		after a clear, the socket must be read/accepted
+ *		 if this succeeds, it must be set again.
+ *	SK_CLOSE can set at any time. It is never cleared.
+ *
+ */
+
+#define RPCDBG_FACILITY	RPCDBG_SVCSOCK
+
+
+static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
+					 int *errp, int pmap_reg);
+static void		svc_udp_data_ready(struct sock *, int);
+static int		svc_udp_recvfrom(struct svc_rqst *);
+static int		svc_udp_sendto(struct svc_rqst *);
+
+static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk);
+static int svc_deferred_recv(struct svc_rqst *rqstp);
+static struct cache_deferred_req *svc_defer(struct cache_req *req);
+
+/*
+ * Queue up an idle server thread.  Must have serv->sv_lock held.
+ * Note: this is really a stack rather than a queue, so that we only
+ * use as many different threads as we need, and the rest don't polute
+ * the cache.
+ */
+static inline void
+svc_serv_enqueue(struct svc_serv *serv, struct svc_rqst *rqstp)
+{
+	list_add(&rqstp->rq_list, &serv->sv_threads);
+}
+
+/*
+ * Dequeue an nfsd thread.  Must have serv->sv_lock held.
+ */
+static inline void
+svc_serv_dequeue(struct svc_serv *serv, struct svc_rqst *rqstp)
+{
+	list_del(&rqstp->rq_list);
+}
+
+/*
+ * Release an skbuff after use
+ */
+static inline void
+svc_release_skb(struct svc_rqst *rqstp)
+{
+	struct sk_buff *skb = rqstp->rq_skbuff;
+	struct svc_deferred_req *dr = rqstp->rq_deferred;
+
+	if (skb) {
+		rqstp->rq_skbuff = NULL;
+
+		dprintk("svc: service %p, releasing skb %p\n", rqstp, skb);
+		skb_free_datagram(rqstp->rq_sock->sk_sk, skb);
+	}
+	if (dr) {
+		rqstp->rq_deferred = NULL;
+		kfree(dr);
+	}
+}
+
+/*
+ * Any space to write?
+ */
+static inline unsigned long
+svc_sock_wspace(struct svc_sock *svsk)
+{
+	int wspace;
+
+	if (svsk->sk_sock->type == SOCK_STREAM)
+		wspace = sk_stream_wspace(svsk->sk_sk);
+	else
+		wspace = sock_wspace(svsk->sk_sk);
+
+	return wspace;
+}
+
+/*
+ * Queue up a socket with data pending. If there are idle nfsd
+ * processes, wake 'em up.
+ *
+ */
+static void
+svc_sock_enqueue(struct svc_sock *svsk)
+{
+	struct svc_serv	*serv = svsk->sk_server;
+	struct svc_rqst	*rqstp;
+
+	if (!(svsk->sk_flags &
+	      ( (1<<SK_CONN)|(1<<SK_DATA)|(1<<SK_CLOSE)|(1<<SK_DEFERRED)) ))
+		return;
+	if (test_bit(SK_DEAD, &svsk->sk_flags))
+		return;
+
+	spin_lock_bh(&serv->sv_lock);
+
+	if (!list_empty(&serv->sv_threads) && 
+	    !list_empty(&serv->sv_sockets))
+		printk(KERN_ERR
+			"svc_sock_enqueue: threads and sockets both waiting??\n");
+
+	if (test_bit(SK_DEAD, &svsk->sk_flags)) {
+		/* Don't enqueue dead sockets */
+		dprintk("svc: socket %p is dead, not enqueued\n", svsk->sk_sk);
+		goto out_unlock;
+	}
+
+	if (test_bit(SK_BUSY, &svsk->sk_flags)) {
+		/* Don't enqueue socket while daemon is receiving */
+		dprintk("svc: socket %p busy, not enqueued\n", svsk->sk_sk);
+		goto out_unlock;
+	}
+
+	set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
+	if (((svsk->sk_reserved + serv->sv_bufsz)*2
+	     > svc_sock_wspace(svsk))
+	    && !test_bit(SK_CLOSE, &svsk->sk_flags)
+	    && !test_bit(SK_CONN, &svsk->sk_flags)) {
+		/* Don't enqueue while not enough space for reply */
+		dprintk("svc: socket %p  no space, %d*2 > %ld, not enqueued\n",
+			svsk->sk_sk, svsk->sk_reserved+serv->sv_bufsz,
+			svc_sock_wspace(svsk));
+		goto out_unlock;
+	}
+	clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
+
+	/* Mark socket as busy. It will remain in this state until the
+	 * server has processed all pending data and put the socket back
+	 * on the idle list.
+	 */
+	set_bit(SK_BUSY, &svsk->sk_flags);
+
+	if (!list_empty(&serv->sv_threads)) {
+		rqstp = list_entry(serv->sv_threads.next,
+				   struct svc_rqst,
+				   rq_list);
+		dprintk("svc: socket %p served by daemon %p\n",
+			svsk->sk_sk, rqstp);
+		svc_serv_dequeue(serv, rqstp);
+		if (rqstp->rq_sock)
+			printk(KERN_ERR 
+				"svc_sock_enqueue: server %p, rq_sock=%p!\n",
+				rqstp, rqstp->rq_sock);
+		rqstp->rq_sock = svsk;
+		svsk->sk_inuse++;
+		rqstp->rq_reserved = serv->sv_bufsz;
+		svsk->sk_reserved += rqstp->rq_reserved;
+		wake_up(&rqstp->rq_wait);
+	} else {
+		dprintk("svc: socket %p put into queue\n", svsk->sk_sk);
+		list_add_tail(&svsk->sk_ready, &serv->sv_sockets);
+	}
+
+out_unlock:
+	spin_unlock_bh(&serv->sv_lock);
+}
+
+/*
+ * Dequeue the first socket.  Must be called with the serv->sv_lock held.
+ */
+static inline struct svc_sock *
+svc_sock_dequeue(struct svc_serv *serv)
+{
+	struct svc_sock	*svsk;
+
+	if (list_empty(&serv->sv_sockets))
+		return NULL;
+
+	svsk = list_entry(serv->sv_sockets.next,
+			  struct svc_sock, sk_ready);
+	list_del_init(&svsk->sk_ready);
+
+	dprintk("svc: socket %p dequeued, inuse=%d\n",
+		svsk->sk_sk, svsk->sk_inuse);
+
+	return svsk;
+}
+
+/*
+ * Having read something from a socket, check whether it
+ * needs to be re-enqueued.
+ * Note: SK_DATA only gets cleared when a read-attempt finds
+ * no (or insufficient) data.
+ */
+static inline void
+svc_sock_received(struct svc_sock *svsk)
+{
+	clear_bit(SK_BUSY, &svsk->sk_flags);
+	svc_sock_enqueue(svsk);
+}
+
+
+/**
+ * svc_reserve - change the space reserved for the reply to a request.
+ * @rqstp:  The request in question
+ * @space: new max space to reserve
+ *
+ * Each request reserves some space on the output queue of the socket
+ * to make sure the reply fits.  This function reduces that reserved
+ * space to be the amount of space used already, plus @space.
+ *
+ */
+void svc_reserve(struct svc_rqst *rqstp, int space)
+{
+	space += rqstp->rq_res.head[0].iov_len;
+
+	if (space < rqstp->rq_reserved) {
+		struct svc_sock *svsk = rqstp->rq_sock;
+		spin_lock_bh(&svsk->sk_server->sv_lock);
+		svsk->sk_reserved -= (rqstp->rq_reserved - space);
+		rqstp->rq_reserved = space;
+		spin_unlock_bh(&svsk->sk_server->sv_lock);
+
+		svc_sock_enqueue(svsk);
+	}
+}
+
+/*
+ * Release a socket after use.
+ */
+static inline void
+svc_sock_put(struct svc_sock *svsk)
+{
+	struct svc_serv *serv = svsk->sk_server;
+
+	spin_lock_bh(&serv->sv_lock);
+	if (!--(svsk->sk_inuse) && test_bit(SK_DEAD, &svsk->sk_flags)) {
+		spin_unlock_bh(&serv->sv_lock);
+		dprintk("svc: releasing dead socket\n");
+		sock_release(svsk->sk_sock);
+		kfree(svsk);
+	}
+	else
+		spin_unlock_bh(&serv->sv_lock);
+}
+
+static void
+svc_sock_release(struct svc_rqst *rqstp)
+{
+	struct svc_sock	*svsk = rqstp->rq_sock;
+
+	svc_release_skb(rqstp);
+
+	svc_free_allpages(rqstp);
+	rqstp->rq_res.page_len = 0;
+	rqstp->rq_res.page_base = 0;
+
+
+	/* Reset response buffer and release
+	 * the reservation.
+	 * But first, check that enough space was reserved
+	 * for the reply, otherwise we have a bug!
+	 */
+	if ((rqstp->rq_res.len) >  rqstp->rq_reserved)
+		printk(KERN_ERR "RPC request reserved %d but used %d\n",
+		       rqstp->rq_reserved,
+		       rqstp->rq_res.len);
+
+	rqstp->rq_res.head[0].iov_len = 0;
+	svc_reserve(rqstp, 0);
+	rqstp->rq_sock = NULL;
+
+	svc_sock_put(svsk);
+}
+
+/*
+ * External function to wake up a server waiting for data
+ */
+void
+svc_wake_up(struct svc_serv *serv)
+{
+	struct svc_rqst	*rqstp;
+
+	spin_lock_bh(&serv->sv_lock);
+	if (!list_empty(&serv->sv_threads)) {
+		rqstp = list_entry(serv->sv_threads.next,
+				   struct svc_rqst,
+				   rq_list);
+		dprintk("svc: daemon %p woken up.\n", rqstp);
+		/*
+		svc_serv_dequeue(serv, rqstp);
+		rqstp->rq_sock = NULL;
+		 */
+		wake_up(&rqstp->rq_wait);
+	}
+	spin_unlock_bh(&serv->sv_lock);
+}
+
+/*
+ * Generic sendto routine
+ */
+static int
+svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
+{
+	struct svc_sock	*svsk = rqstp->rq_sock;
+	struct socket	*sock = svsk->sk_sock;
+	int		slen;
+	char 		buffer[CMSG_SPACE(sizeof(struct in_pktinfo))];
+	struct cmsghdr *cmh = (struct cmsghdr *)buffer;
+	struct in_pktinfo *pki = (struct in_pktinfo *)CMSG_DATA(cmh);
+	int		len = 0;
+	int		result;
+	int		size;
+	struct page	**ppage = xdr->pages;
+	size_t		base = xdr->page_base;
+	unsigned int	pglen = xdr->page_len;
+	unsigned int	flags = MSG_MORE;
+
+	slen = xdr->len;
+
+	if (rqstp->rq_prot == IPPROTO_UDP) {
+		/* set the source and destination */
+		struct msghdr	msg;
+		msg.msg_name    = &rqstp->rq_addr;
+		msg.msg_namelen = sizeof(rqstp->rq_addr);
+		msg.msg_iov     = NULL;
+		msg.msg_iovlen  = 0;
+		msg.msg_flags	= MSG_MORE;
+
+		msg.msg_control = cmh;
+		msg.msg_controllen = sizeof(buffer);
+		cmh->cmsg_len = CMSG_LEN(sizeof(*pki));
+		cmh->cmsg_level = SOL_IP;
+		cmh->cmsg_type = IP_PKTINFO;
+		pki->ipi_ifindex = 0;
+		pki->ipi_spec_dst.s_addr = rqstp->rq_daddr;
+
+		if (sock_sendmsg(sock, &msg, 0) < 0)
+			goto out;
+	}
+
+	/* send head */
+	if (slen == xdr->head[0].iov_len)
+		flags = 0;
+	len = sock->ops->sendpage(sock, rqstp->rq_respages[0], 0, xdr->head[0].iov_len, flags);
+	if (len != xdr->head[0].iov_len)
+		goto out;
+	slen -= xdr->head[0].iov_len;
+	if (slen == 0)
+		goto out;
+
+	/* send page data */
+	size = PAGE_SIZE - base < pglen ? PAGE_SIZE - base : pglen;
+	while (pglen > 0) {
+		if (slen == size)
+			flags = 0;
+		result = sock->ops->sendpage(sock, *ppage, base, size, flags);
+		if (result > 0)
+			len += result;
+		if (result != size)
+			goto out;
+		slen -= size;
+		pglen -= size;
+		size = PAGE_SIZE < pglen ? PAGE_SIZE : pglen;
+		base = 0;
+		ppage++;
+	}
+	/* send tail */
+	if (xdr->tail[0].iov_len) {
+		result = sock->ops->sendpage(sock, rqstp->rq_respages[rqstp->rq_restailpage], 
+					     ((unsigned long)xdr->tail[0].iov_base)& (PAGE_SIZE-1),
+					     xdr->tail[0].iov_len, 0);
+
+		if (result > 0)
+			len += result;
+	}
+out:
+	dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %x)\n",
+			rqstp->rq_sock, xdr->head[0].iov_base, xdr->head[0].iov_len, xdr->len, len,
+		rqstp->rq_addr.sin_addr.s_addr);
+
+	return len;
+}
+
+/*
+ * Check input queue length
+ */
+static int
+svc_recv_available(struct svc_sock *svsk)
+{
+	mm_segment_t	oldfs;
+	struct socket	*sock = svsk->sk_sock;
+	int		avail, err;
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	err = sock->ops->ioctl(sock, TIOCINQ, (unsigned long) &avail);
+	set_fs(oldfs);
+
+	return (err >= 0)? avail : err;
+}
+
+/*
+ * Generic recvfrom routine.
+ */
+static int
+svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, int buflen)
+{
+	struct msghdr	msg;
+	struct socket	*sock;
+	int		len, alen;
+
+	rqstp->rq_addrlen = sizeof(rqstp->rq_addr);
+	sock = rqstp->rq_sock->sk_sock;
+
+	msg.msg_name    = &rqstp->rq_addr;
+	msg.msg_namelen = sizeof(rqstp->rq_addr);
+	msg.msg_control = NULL;
+	msg.msg_controllen = 0;
+
+	msg.msg_flags	= MSG_DONTWAIT;
+
+	len = kernel_recvmsg(sock, &msg, iov, nr, buflen, MSG_DONTWAIT);
+
+	/* sock_recvmsg doesn't fill in the name/namelen, so we must..
+	 * possibly we should cache this in the svc_sock structure
+	 * at accept time. FIXME
+	 */
+	alen = sizeof(rqstp->rq_addr);
+	sock->ops->getname(sock, (struct sockaddr *)&rqstp->rq_addr, &alen, 1);
+
+	dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n",
+		rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, len);
+
+	return len;
+}
+
+/*
+ * Set socket snd and rcv buffer lengths
+ */
+static inline void
+svc_sock_setbufsize(struct socket *sock, unsigned int snd, unsigned int rcv)
+{
+#if 0
+	mm_segment_t	oldfs;
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	sock_setsockopt(sock, SOL_SOCKET, SO_SNDBUF,
+			(char*)&snd, sizeof(snd));
+	sock_setsockopt(sock, SOL_SOCKET, SO_RCVBUF,
+			(char*)&rcv, sizeof(rcv));
+#else
+	/* sock_setsockopt limits use to sysctl_?mem_max,
+	 * which isn't acceptable.  Until that is made conditional
+	 * on not having CAP_SYS_RESOURCE or similar, we go direct...
+	 * DaveM said I could!
+	 */
+	lock_sock(sock->sk);
+	sock->sk->sk_sndbuf = snd * 2;
+	sock->sk->sk_rcvbuf = rcv * 2;
+	sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK;
+	release_sock(sock->sk);
+#endif
+}
+/*
+ * INET callback when data has been received on the socket.
+ */
+static void
+svc_udp_data_ready(struct sock *sk, int count)
+{
+	struct svc_sock	*svsk = (struct svc_sock *)(sk->sk_user_data);
+
+	if (!svsk)
+		goto out;
+	dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n",
+		svsk, sk, count, test_bit(SK_BUSY, &svsk->sk_flags));
+	set_bit(SK_DATA, &svsk->sk_flags);
+	svc_sock_enqueue(svsk);
+ out:
+	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+		wake_up_interruptible(sk->sk_sleep);
+}
+
+/*
+ * INET callback when space is newly available on the socket.
+ */
+static void
+svc_write_space(struct sock *sk)
+{
+	struct svc_sock	*svsk = (struct svc_sock *)(sk->sk_user_data);
+
+	if (svsk) {
+		dprintk("svc: socket %p(inet %p), write_space busy=%d\n",
+			svsk, sk, test_bit(SK_BUSY, &svsk->sk_flags));
+		svc_sock_enqueue(svsk);
+	}
+
+	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) {
+		printk(KERN_WARNING "RPC svc_write_space: some sleeping on %p\n",
+		       svsk);
+		wake_up_interruptible(sk->sk_sleep);
+	}
+}
+
+/*
+ * Receive a datagram from a UDP socket.
+ */
+extern int
+csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb);
+
+static int
+svc_udp_recvfrom(struct svc_rqst *rqstp)
+{
+	struct svc_sock	*svsk = rqstp->rq_sock;
+	struct svc_serv	*serv = svsk->sk_server;
+	struct sk_buff	*skb;
+	int		err, len;
+
+	if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags))
+	    /* udp sockets need large rcvbuf as all pending
+	     * requests are still in that buffer.  sndbuf must
+	     * also be large enough that there is enough space
+	     * for one reply per thread.
+	     */
+	    svc_sock_setbufsize(svsk->sk_sock,
+				(serv->sv_nrthreads+3) * serv->sv_bufsz,
+				(serv->sv_nrthreads+3) * serv->sv_bufsz);
+
+	if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) {
+		svc_sock_received(svsk);
+		return svc_deferred_recv(rqstp);
+	}
+
+	clear_bit(SK_DATA, &svsk->sk_flags);
+	while ((skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) {
+		if (err == -EAGAIN) {
+			svc_sock_received(svsk);
+			return err;
+		}
+		/* possibly an icmp error */
+		dprintk("svc: recvfrom returned error %d\n", -err);
+	}
+	if (skb->stamp.tv_sec == 0) {
+		skb->stamp.tv_sec = xtime.tv_sec; 
+		skb->stamp.tv_usec = xtime.tv_nsec * 1000; 
+		/* Don't enable netstamp, sunrpc doesn't 
+		   need that much accuracy */
+	}
+	svsk->sk_sk->sk_stamp = skb->stamp;
+	set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */
+
+	/*
+	 * Maybe more packets - kick another thread ASAP.
+	 */
+	svc_sock_received(svsk);
+
+	len  = skb->len - sizeof(struct udphdr);
+	rqstp->rq_arg.len = len;
+
+	rqstp->rq_prot        = IPPROTO_UDP;
+
+	/* Get sender address */
+	rqstp->rq_addr.sin_family = AF_INET;
+	rqstp->rq_addr.sin_port = skb->h.uh->source;
+	rqstp->rq_addr.sin_addr.s_addr = skb->nh.iph->saddr;
+	rqstp->rq_daddr = skb->nh.iph->daddr;
+
+	if (skb_is_nonlinear(skb)) {
+		/* we have to copy */
+		local_bh_disable();
+		if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb)) {
+			local_bh_enable();
+			/* checksum error */
+			skb_free_datagram(svsk->sk_sk, skb);
+			return 0;
+		}
+		local_bh_enable();
+		skb_free_datagram(svsk->sk_sk, skb); 
+	} else {
+		/* we can use it in-place */
+		rqstp->rq_arg.head[0].iov_base = skb->data + sizeof(struct udphdr);
+		rqstp->rq_arg.head[0].iov_len = len;
+		if (skb->ip_summed != CHECKSUM_UNNECESSARY) {
+			if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) {
+				skb_free_datagram(svsk->sk_sk, skb);
+				return 0;
+			}
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+		}
+		rqstp->rq_skbuff = skb;
+	}
+
+	rqstp->rq_arg.page_base = 0;
+	if (len <= rqstp->rq_arg.head[0].iov_len) {
+		rqstp->rq_arg.head[0].iov_len = len;
+		rqstp->rq_arg.page_len = 0;
+	} else {
+		rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len;
+		rqstp->rq_argused += (rqstp->rq_arg.page_len + PAGE_SIZE - 1)/ PAGE_SIZE;
+	}
+
+	if (serv->sv_stats)
+		serv->sv_stats->netudpcnt++;
+
+	return len;
+}
+
+static int
+svc_udp_sendto(struct svc_rqst *rqstp)
+{
+	int		error;
+
+	error = svc_sendto(rqstp, &rqstp->rq_res);
+	if (error == -ECONNREFUSED)
+		/* ICMP error on earlier request. */
+		error = svc_sendto(rqstp, &rqstp->rq_res);
+
+	return error;
+}
+
+static void
+svc_udp_init(struct svc_sock *svsk)
+{
+	svsk->sk_sk->sk_data_ready = svc_udp_data_ready;
+	svsk->sk_sk->sk_write_space = svc_write_space;
+	svsk->sk_recvfrom = svc_udp_recvfrom;
+	svsk->sk_sendto = svc_udp_sendto;
+
+	/* initialise setting must have enough space to
+	 * receive and respond to one request.  
+	 * svc_udp_recvfrom will re-adjust if necessary
+	 */
+	svc_sock_setbufsize(svsk->sk_sock,
+			    3 * svsk->sk_server->sv_bufsz,
+			    3 * svsk->sk_server->sv_bufsz);
+
+	set_bit(SK_DATA, &svsk->sk_flags); /* might have come in before data_ready set up */
+	set_bit(SK_CHNGBUF, &svsk->sk_flags);
+}
+
+/*
+ * A data_ready event on a listening socket means there's a connection
+ * pending. Do not use state_change as a substitute for it.
+ */
+static void
+svc_tcp_listen_data_ready(struct sock *sk, int count_unused)
+{
+	struct svc_sock	*svsk;
+
+	dprintk("svc: socket %p TCP (listen) state change %d\n",
+			sk, sk->sk_state);
+
+	if  (sk->sk_state != TCP_LISTEN) {
+		/*
+		 * This callback may called twice when a new connection
+		 * is established as a child socket inherits everything
+		 * from a parent LISTEN socket.
+		 * 1) data_ready method of the parent socket will be called
+		 *    when one of child sockets become ESTABLISHED.
+		 * 2) data_ready method of the child socket may be called
+		 *    when it receives data before the socket is accepted.
+		 * In case of 2, we should ignore it silently.
+		 */
+		goto out;
+	}
+	if (!(svsk = (struct svc_sock *) sk->sk_user_data)) {
+		printk("svc: socket %p: no user data\n", sk);
+		goto out;
+	}
+	set_bit(SK_CONN, &svsk->sk_flags);
+	svc_sock_enqueue(svsk);
+ out:
+	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+		wake_up_interruptible_all(sk->sk_sleep);
+}
+
+/*
+ * A state change on a connected socket means it's dying or dead.
+ */
+static void
+svc_tcp_state_change(struct sock *sk)
+{
+	struct svc_sock	*svsk;
+
+	dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n",
+			sk, sk->sk_state, sk->sk_user_data);
+
+	if (!(svsk = (struct svc_sock *) sk->sk_user_data)) {
+		printk("svc: socket %p: no user data\n", sk);
+		goto out;
+	}
+	set_bit(SK_CLOSE, &svsk->sk_flags);
+	svc_sock_enqueue(svsk);
+ out:
+	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+		wake_up_interruptible_all(sk->sk_sleep);
+}
+
+static void
+svc_tcp_data_ready(struct sock *sk, int count)
+{
+	struct svc_sock *	svsk;
+
+	dprintk("svc: socket %p TCP data ready (svsk %p)\n",
+			sk, sk->sk_user_data);
+	if (!(svsk = (struct svc_sock *)(sk->sk_user_data)))
+		goto out;
+	set_bit(SK_DATA, &svsk->sk_flags);
+	svc_sock_enqueue(svsk);
+ out:
+	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+		wake_up_interruptible(sk->sk_sleep);
+}
+
+/*
+ * Accept a TCP connection
+ */
+static void
+svc_tcp_accept(struct svc_sock *svsk)
+{
+	struct sockaddr_in sin;
+	struct svc_serv	*serv = svsk->sk_server;
+	struct socket	*sock = svsk->sk_sock;
+	struct socket	*newsock;
+	struct proto_ops *ops;
+	struct svc_sock	*newsvsk;
+	int		err, slen;
+
+	dprintk("svc: tcp_accept %p sock %p\n", svsk, sock);
+	if (!sock)
+		return;
+
+	err = sock_create_lite(PF_INET, SOCK_STREAM, IPPROTO_TCP, &newsock);
+	if (err) {
+		if (err == -ENOMEM)
+			printk(KERN_WARNING "%s: no more sockets!\n",
+			       serv->sv_name);
+		return;
+	}
+
+	dprintk("svc: tcp_accept %p allocated\n", newsock);
+	newsock->ops = ops = sock->ops;
+
+	clear_bit(SK_CONN, &svsk->sk_flags);
+	if ((err = ops->accept(sock, newsock, O_NONBLOCK)) < 0) {
+		if (err != -EAGAIN && net_ratelimit())
+			printk(KERN_WARNING "%s: accept failed (err %d)!\n",
+				   serv->sv_name, -err);
+		goto failed;		/* aborted connection or whatever */
+	}
+	set_bit(SK_CONN, &svsk->sk_flags);
+	svc_sock_enqueue(svsk);
+
+	slen = sizeof(sin);
+	err = ops->getname(newsock, (struct sockaddr *) &sin, &slen, 1);
+	if (err < 0) {
+		if (net_ratelimit())
+			printk(KERN_WARNING "%s: peername failed (err %d)!\n",
+				   serv->sv_name, -err);
+		goto failed;		/* aborted connection or whatever */
+	}
+
+	/* Ideally, we would want to reject connections from unauthorized
+	 * hosts here, but when we get encription, the IP of the host won't
+	 * tell us anything. For now just warn about unpriv connections.
+	 */
+	if (ntohs(sin.sin_port) >= 1024) {
+		dprintk(KERN_WARNING
+			"%s: connect from unprivileged port: %u.%u.%u.%u:%d\n",
+			serv->sv_name, 
+			NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
+	}
+
+	dprintk("%s: connect from %u.%u.%u.%u:%04x\n", serv->sv_name,
+			NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
+
+	/* make sure that a write doesn't block forever when
+	 * low on memory
+	 */
+	newsock->sk->sk_sndtimeo = HZ*30;
+
+	if (!(newsvsk = svc_setup_socket(serv, newsock, &err, 0)))
+		goto failed;
+
+
+	/* make sure that we don't have too many active connections.
+	 * If we have, something must be dropped.
+	 *
+	 * There's no point in trying to do random drop here for
+	 * DoS prevention. The NFS clients does 1 reconnect in 15
+	 * seconds. An attacker can easily beat that.
+	 *
+	 * The only somewhat efficient mechanism would be if drop
+	 * old connections from the same IP first. But right now
+	 * we don't even record the client IP in svc_sock.
+	 */
+	if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) {
+		struct svc_sock *svsk = NULL;
+		spin_lock_bh(&serv->sv_lock);
+		if (!list_empty(&serv->sv_tempsocks)) {
+			if (net_ratelimit()) {
+				/* Try to help the admin */
+				printk(KERN_NOTICE "%s: too many open TCP "
+					"sockets, consider increasing the "
+					"number of nfsd threads\n",
+						   serv->sv_name);
+				printk(KERN_NOTICE "%s: last TCP connect from "
+					"%u.%u.%u.%u:%d\n",
+					serv->sv_name,
+					NIPQUAD(sin.sin_addr.s_addr),
+					ntohs(sin.sin_port));
+			}
+			/*
+			 * Always select the oldest socket. It's not fair,
+			 * but so is life
+			 */
+			svsk = list_entry(serv->sv_tempsocks.prev,
+					  struct svc_sock,
+					  sk_list);
+			set_bit(SK_CLOSE, &svsk->sk_flags);
+			svsk->sk_inuse ++;
+		}
+		spin_unlock_bh(&serv->sv_lock);
+
+		if (svsk) {
+			svc_sock_enqueue(svsk);
+			svc_sock_put(svsk);
+		}
+
+	}
+
+	if (serv->sv_stats)
+		serv->sv_stats->nettcpconn++;
+
+	return;
+
+failed:
+	sock_release(newsock);
+	return;
+}
+
+/*
+ * Receive data from a TCP socket.
+ */
+static int
+svc_tcp_recvfrom(struct svc_rqst *rqstp)
+{
+	struct svc_sock	*svsk = rqstp->rq_sock;
+	struct svc_serv	*serv = svsk->sk_server;
+	int		len;
+	struct kvec vec[RPCSVC_MAXPAGES];
+	int pnum, vlen;
+
+	dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
+		svsk, test_bit(SK_DATA, &svsk->sk_flags),
+		test_bit(SK_CONN, &svsk->sk_flags),
+		test_bit(SK_CLOSE, &svsk->sk_flags));
+
+	if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) {
+		svc_sock_received(svsk);
+		return svc_deferred_recv(rqstp);
+	}
+
+	if (test_bit(SK_CLOSE, &svsk->sk_flags)) {
+		svc_delete_socket(svsk);
+		return 0;
+	}
+
+	if (test_bit(SK_CONN, &svsk->sk_flags)) {
+		svc_tcp_accept(svsk);
+		svc_sock_received(svsk);
+		return 0;
+	}
+
+	if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags))
+		/* sndbuf needs to have room for one request
+		 * per thread, otherwise we can stall even when the
+		 * network isn't a bottleneck.
+		 * rcvbuf just needs to be able to hold a few requests.
+		 * Normally they will be removed from the queue 
+		 * as soon a a complete request arrives.
+		 */
+		svc_sock_setbufsize(svsk->sk_sock,
+				    (serv->sv_nrthreads+3) * serv->sv_bufsz,
+				    3 * serv->sv_bufsz);
+
+	clear_bit(SK_DATA, &svsk->sk_flags);
+
+	/* Receive data. If we haven't got the record length yet, get
+	 * the next four bytes. Otherwise try to gobble up as much as
+	 * possible up to the complete record length.
+	 */
+	if (svsk->sk_tcplen < 4) {
+		unsigned long	want = 4 - svsk->sk_tcplen;
+		struct kvec	iov;
+
+		iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen;
+		iov.iov_len  = want;
+		if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0)
+			goto error;
+		svsk->sk_tcplen += len;
+
+		if (len < want) {
+			dprintk("svc: short recvfrom while reading record length (%d of %lu)\n",
+			        len, want);
+			svc_sock_received(svsk);
+			return -EAGAIN; /* record header not complete */
+		}
+
+		svsk->sk_reclen = ntohl(svsk->sk_reclen);
+		if (!(svsk->sk_reclen & 0x80000000)) {
+			/* FIXME: technically, a record can be fragmented,
+			 *  and non-terminal fragments will not have the top
+			 *  bit set in the fragment length header.
+			 *  But apparently no known nfs clients send fragmented
+			 *  records. */
+			printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx (non-terminal)\n",
+			       (unsigned long) svsk->sk_reclen);
+			goto err_delete;
+		}
+		svsk->sk_reclen &= 0x7fffffff;
+		dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen);
+		if (svsk->sk_reclen > serv->sv_bufsz) {
+			printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx (large)\n",
+			       (unsigned long) svsk->sk_reclen);
+			goto err_delete;
+		}
+	}
+
+	/* Check whether enough data is available */
+	len = svc_recv_available(svsk);
+	if (len < 0)
+		goto error;
+
+	if (len < svsk->sk_reclen) {
+		dprintk("svc: incomplete TCP record (%d of %d)\n",
+			len, svsk->sk_reclen);
+		svc_sock_received(svsk);
+		return -EAGAIN;	/* record not complete */
+	}
+	len = svsk->sk_reclen;
+	set_bit(SK_DATA, &svsk->sk_flags);
+
+	vec[0] = rqstp->rq_arg.head[0];
+	vlen = PAGE_SIZE;
+	pnum = 1;
+	while (vlen < len) {
+		vec[pnum].iov_base = page_address(rqstp->rq_argpages[rqstp->rq_argused++]);
+		vec[pnum].iov_len = PAGE_SIZE;
+		pnum++;
+		vlen += PAGE_SIZE;
+	}
+
+	/* Now receive data */
+	len = svc_recvfrom(rqstp, vec, pnum, len);
+	if (len < 0)
+		goto error;
+
+	dprintk("svc: TCP complete record (%d bytes)\n", len);
+	rqstp->rq_arg.len = len;
+	rqstp->rq_arg.page_base = 0;
+	if (len <= rqstp->rq_arg.head[0].iov_len) {
+		rqstp->rq_arg.head[0].iov_len = len;
+		rqstp->rq_arg.page_len = 0;
+	} else {
+		rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len;
+	}
+
+	rqstp->rq_skbuff      = NULL;
+	rqstp->rq_prot	      = IPPROTO_TCP;
+
+	/* Reset TCP read info */
+	svsk->sk_reclen = 0;
+	svsk->sk_tcplen = 0;
+
+	svc_sock_received(svsk);
+	if (serv->sv_stats)
+		serv->sv_stats->nettcpcnt++;
+
+	return len;
+
+ err_delete:
+	svc_delete_socket(svsk);
+	return -EAGAIN;
+
+ error:
+	if (len == -EAGAIN) {
+		dprintk("RPC: TCP recvfrom got EAGAIN\n");
+		svc_sock_received(svsk);
+	} else {
+		printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
+					svsk->sk_server->sv_name, -len);
+		svc_sock_received(svsk);
+	}
+
+	return len;
+}
+
+/*
+ * Send out data on TCP socket.
+ */
+static int
+svc_tcp_sendto(struct svc_rqst *rqstp)
+{
+	struct xdr_buf	*xbufp = &rqstp->rq_res;
+	int sent;
+	u32 reclen;
+
+	/* Set up the first element of the reply kvec.
+	 * Any other kvecs that may be in use have been taken
+	 * care of by the server implementation itself.
+	 */
+	reclen = htonl(0x80000000|((xbufp->len ) - 4));
+	memcpy(xbufp->head[0].iov_base, &reclen, 4);
+
+	if (test_bit(SK_DEAD, &rqstp->rq_sock->sk_flags))
+		return -ENOTCONN;
+
+	sent = svc_sendto(rqstp, &rqstp->rq_res);
+	if (sent != xbufp->len) {
+		printk(KERN_NOTICE "rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n",
+		       rqstp->rq_sock->sk_server->sv_name,
+		       (sent<0)?"got error":"sent only",
+		       sent, xbufp->len);
+		svc_delete_socket(rqstp->rq_sock);
+		sent = -EAGAIN;
+	}
+	return sent;
+}
+
+static void
+svc_tcp_init(struct svc_sock *svsk)
+{
+	struct sock	*sk = svsk->sk_sk;
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	svsk->sk_recvfrom = svc_tcp_recvfrom;
+	svsk->sk_sendto = svc_tcp_sendto;
+
+	if (sk->sk_state == TCP_LISTEN) {
+		dprintk("setting up TCP socket for listening\n");
+		sk->sk_data_ready = svc_tcp_listen_data_ready;
+		set_bit(SK_CONN, &svsk->sk_flags);
+	} else {
+		dprintk("setting up TCP socket for reading\n");
+		sk->sk_state_change = svc_tcp_state_change;
+		sk->sk_data_ready = svc_tcp_data_ready;
+		sk->sk_write_space = svc_write_space;
+
+		svsk->sk_reclen = 0;
+		svsk->sk_tcplen = 0;
+
+		tp->nonagle = 1;        /* disable Nagle's algorithm */
+
+		/* initialise setting must have enough space to
+		 * receive and respond to one request.  
+		 * svc_tcp_recvfrom will re-adjust if necessary
+		 */
+		svc_sock_setbufsize(svsk->sk_sock,
+				    3 * svsk->sk_server->sv_bufsz,
+				    3 * svsk->sk_server->sv_bufsz);
+
+		set_bit(SK_CHNGBUF, &svsk->sk_flags);
+		set_bit(SK_DATA, &svsk->sk_flags);
+		if (sk->sk_state != TCP_ESTABLISHED) 
+			set_bit(SK_CLOSE, &svsk->sk_flags);
+	}
+}
+
+void
+svc_sock_update_bufs(struct svc_serv *serv)
+{
+	/*
+	 * The number of server threads has changed. Update
+	 * rcvbuf and sndbuf accordingly on all sockets
+	 */
+	struct list_head *le;
+
+	spin_lock_bh(&serv->sv_lock);
+	list_for_each(le, &serv->sv_permsocks) {
+		struct svc_sock *svsk = 
+			list_entry(le, struct svc_sock, sk_list);
+		set_bit(SK_CHNGBUF, &svsk->sk_flags);
+	}
+	list_for_each(le, &serv->sv_tempsocks) {
+		struct svc_sock *svsk =
+			list_entry(le, struct svc_sock, sk_list);
+		set_bit(SK_CHNGBUF, &svsk->sk_flags);
+	}
+	spin_unlock_bh(&serv->sv_lock);
+}
+
+/*
+ * Receive the next request on any socket.
+ */
+int
+svc_recv(struct svc_serv *serv, struct svc_rqst *rqstp, long timeout)
+{
+	struct svc_sock		*svsk =NULL;
+	int			len;
+	int 			pages;
+	struct xdr_buf		*arg;
+	DECLARE_WAITQUEUE(wait, current);
+
+	dprintk("svc: server %p waiting for data (to = %ld)\n",
+		rqstp, timeout);
+
+	if (rqstp->rq_sock)
+		printk(KERN_ERR 
+			"svc_recv: service %p, socket not NULL!\n",
+			 rqstp);
+	if (waitqueue_active(&rqstp->rq_wait))
+		printk(KERN_ERR 
+			"svc_recv: service %p, wait queue active!\n",
+			 rqstp);
+
+	/* Initialize the buffers */
+	/* first reclaim pages that were moved to response list */
+	svc_pushback_allpages(rqstp);
+
+	/* now allocate needed pages.  If we get a failure, sleep briefly */
+	pages = 2 + (serv->sv_bufsz + PAGE_SIZE -1) / PAGE_SIZE;
+	while (rqstp->rq_arghi < pages) {
+		struct page *p = alloc_page(GFP_KERNEL);
+		if (!p) {
+			set_current_state(TASK_UNINTERRUPTIBLE);
+			schedule_timeout(HZ/2);
+			continue;
+		}
+		rqstp->rq_argpages[rqstp->rq_arghi++] = p;
+	}
+
+	/* Make arg->head point to first page and arg->pages point to rest */
+	arg = &rqstp->rq_arg;
+	arg->head[0].iov_base = page_address(rqstp->rq_argpages[0]);
+	arg->head[0].iov_len = PAGE_SIZE;
+	rqstp->rq_argused = 1;
+	arg->pages = rqstp->rq_argpages + 1;
+	arg->page_base = 0;
+	/* save at least one page for response */
+	arg->page_len = (pages-2)*PAGE_SIZE;
+	arg->len = (pages-1)*PAGE_SIZE;
+	arg->tail[0].iov_len = 0;
+	
+	try_to_freeze(PF_FREEZE);
+	if (signalled())
+		return -EINTR;
+
+	spin_lock_bh(&serv->sv_lock);
+	if (!list_empty(&serv->sv_tempsocks)) {
+		svsk = list_entry(serv->sv_tempsocks.next,
+				  struct svc_sock, sk_list);
+		/* apparently the "standard" is that clients close
+		 * idle connections after 5 minutes, servers after
+		 * 6 minutes
+		 *   http://www.connectathon.org/talks96/nfstcp.pdf 
+		 */
+		if (get_seconds() - svsk->sk_lastrecv < 6*60
+		    || test_bit(SK_BUSY, &svsk->sk_flags))
+			svsk = NULL;
+	}
+	if (svsk) {
+		set_bit(SK_BUSY, &svsk->sk_flags);
+		set_bit(SK_CLOSE, &svsk->sk_flags);
+		rqstp->rq_sock = svsk;
+		svsk->sk_inuse++;
+	} else if ((svsk = svc_sock_dequeue(serv)) != NULL) {
+		rqstp->rq_sock = svsk;
+		svsk->sk_inuse++;
+		rqstp->rq_reserved = serv->sv_bufsz;	
+		svsk->sk_reserved += rqstp->rq_reserved;
+	} else {
+		/* No data pending. Go to sleep */
+		svc_serv_enqueue(serv, rqstp);
+
+		/*
+		 * We have to be able to interrupt this wait
+		 * to bring down the daemons ...
+		 */
+		set_current_state(TASK_INTERRUPTIBLE);
+		add_wait_queue(&rqstp->rq_wait, &wait);
+		spin_unlock_bh(&serv->sv_lock);
+
+		schedule_timeout(timeout);
+
+		try_to_freeze(PF_FREEZE);
+
+		spin_lock_bh(&serv->sv_lock);
+		remove_wait_queue(&rqstp->rq_wait, &wait);
+
+		if (!(svsk = rqstp->rq_sock)) {
+			svc_serv_dequeue(serv, rqstp);
+			spin_unlock_bh(&serv->sv_lock);
+			dprintk("svc: server %p, no data yet\n", rqstp);
+			return signalled()? -EINTR : -EAGAIN;
+		}
+	}
+	spin_unlock_bh(&serv->sv_lock);
+
+	dprintk("svc: server %p, socket %p, inuse=%d\n",
+		 rqstp, svsk, svsk->sk_inuse);
+	len = svsk->sk_recvfrom(rqstp);
+	dprintk("svc: got len=%d\n", len);
+
+	/* No data, incomplete (TCP) read, or accept() */
+	if (len == 0 || len == -EAGAIN) {
+		rqstp->rq_res.len = 0;
+		svc_sock_release(rqstp);
+		return -EAGAIN;
+	}
+	svsk->sk_lastrecv = get_seconds();
+	if (test_bit(SK_TEMP, &svsk->sk_flags)) {
+		/* push active sockets to end of list */
+		spin_lock_bh(&serv->sv_lock);
+		if (!list_empty(&svsk->sk_list))
+			list_move_tail(&svsk->sk_list, &serv->sv_tempsocks);
+		spin_unlock_bh(&serv->sv_lock);
+	}
+
+	rqstp->rq_secure  = ntohs(rqstp->rq_addr.sin_port) < 1024;
+	rqstp->rq_chandle.defer = svc_defer;
+
+	if (serv->sv_stats)
+		serv->sv_stats->netcnt++;
+	return len;
+}
+
+/* 
+ * Drop request
+ */
+void
+svc_drop(struct svc_rqst *rqstp)
+{
+	dprintk("svc: socket %p dropped request\n", rqstp->rq_sock);
+	svc_sock_release(rqstp);
+}
+
+/*
+ * Return reply to client.
+ */
+int
+svc_send(struct svc_rqst *rqstp)
+{
+	struct svc_sock	*svsk;
+	int		len;
+	struct xdr_buf	*xb;
+
+	if ((svsk = rqstp->rq_sock) == NULL) {
+		printk(KERN_WARNING "NULL socket pointer in %s:%d\n",
+				__FILE__, __LINE__);
+		return -EFAULT;
+	}
+
+	/* release the receive skb before sending the reply */
+	svc_release_skb(rqstp);
+
+	/* calculate over-all length */
+	xb = & rqstp->rq_res;
+	xb->len = xb->head[0].iov_len +
+		xb->page_len +
+		xb->tail[0].iov_len;
+
+	/* Grab svsk->sk_sem to serialize outgoing data. */
+	down(&svsk->sk_sem);
+	if (test_bit(SK_DEAD, &svsk->sk_flags))
+		len = -ENOTCONN;
+	else
+		len = svsk->sk_sendto(rqstp);
+	up(&svsk->sk_sem);
+	svc_sock_release(rqstp);
+
+	if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN)
+		return 0;
+	return len;
+}
+
+/*
+ * Initialize socket for RPC use and create svc_sock struct
+ * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF.
+ */
+static struct svc_sock *
+svc_setup_socket(struct svc_serv *serv, struct socket *sock,
+					int *errp, int pmap_register)
+{
+	struct svc_sock	*svsk;
+	struct sock	*inet;
+
+	dprintk("svc: svc_setup_socket %p\n", sock);
+	if (!(svsk = kmalloc(sizeof(*svsk), GFP_KERNEL))) {
+		*errp = -ENOMEM;
+		return NULL;
+	}
+	memset(svsk, 0, sizeof(*svsk));
+
+	inet = sock->sk;
+
+	/* Register socket with portmapper */
+	if (*errp >= 0 && pmap_register)
+		*errp = svc_register(serv, inet->sk_protocol,
+				     ntohs(inet_sk(inet)->sport));
+
+	if (*errp < 0) {
+		kfree(svsk);
+		return NULL;
+	}
+
+	set_bit(SK_BUSY, &svsk->sk_flags);
+	inet->sk_user_data = svsk;
+	svsk->sk_sock = sock;
+	svsk->sk_sk = inet;
+	svsk->sk_ostate = inet->sk_state_change;
+	svsk->sk_odata = inet->sk_data_ready;
+	svsk->sk_owspace = inet->sk_write_space;
+	svsk->sk_server = serv;
+	svsk->sk_lastrecv = get_seconds();
+	INIT_LIST_HEAD(&svsk->sk_deferred);
+	INIT_LIST_HEAD(&svsk->sk_ready);
+	sema_init(&svsk->sk_sem, 1);
+
+	/* Initialize the socket */
+	if (sock->type == SOCK_DGRAM)
+		svc_udp_init(svsk);
+	else
+		svc_tcp_init(svsk);
+
+	spin_lock_bh(&serv->sv_lock);
+	if (!pmap_register) {
+		set_bit(SK_TEMP, &svsk->sk_flags);
+		list_add(&svsk->sk_list, &serv->sv_tempsocks);
+		serv->sv_tmpcnt++;
+	} else {
+		clear_bit(SK_TEMP, &svsk->sk_flags);
+		list_add(&svsk->sk_list, &serv->sv_permsocks);
+	}
+	spin_unlock_bh(&serv->sv_lock);
+
+	dprintk("svc: svc_setup_socket created %p (inet %p)\n",
+				svsk, svsk->sk_sk);
+
+	clear_bit(SK_BUSY, &svsk->sk_flags);
+	svc_sock_enqueue(svsk);
+	return svsk;
+}
+
+/*
+ * Create socket for RPC service.
+ */
+static int
+svc_create_socket(struct svc_serv *serv, int protocol, struct sockaddr_in *sin)
+{
+	struct svc_sock	*svsk;
+	struct socket	*sock;
+	int		error;
+	int		type;
+
+	dprintk("svc: svc_create_socket(%s, %d, %u.%u.%u.%u:%d)\n",
+				serv->sv_program->pg_name, protocol,
+				NIPQUAD(sin->sin_addr.s_addr),
+				ntohs(sin->sin_port));
+
+	if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) {
+		printk(KERN_WARNING "svc: only UDP and TCP "
+				"sockets supported\n");
+		return -EINVAL;
+	}
+	type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
+
+	if ((error = sock_create_kern(PF_INET, type, protocol, &sock)) < 0)
+		return error;
+
+	if (sin != NULL) {
+		if (type == SOCK_STREAM)
+			sock->sk->sk_reuse = 1; /* allow address reuse */
+		error = sock->ops->bind(sock, (struct sockaddr *) sin,
+						sizeof(*sin));
+		if (error < 0)
+			goto bummer;
+	}
+
+	if (protocol == IPPROTO_TCP) {
+		if ((error = sock->ops->listen(sock, 64)) < 0)
+			goto bummer;
+	}
+
+	if ((svsk = svc_setup_socket(serv, sock, &error, 1)) != NULL)
+		return 0;
+
+bummer:
+	dprintk("svc: svc_create_socket error = %d\n", -error);
+	sock_release(sock);
+	return error;
+}
+
+/*
+ * Remove a dead socket
+ */
+void
+svc_delete_socket(struct svc_sock *svsk)
+{
+	struct svc_serv	*serv;
+	struct sock	*sk;
+
+	dprintk("svc: svc_delete_socket(%p)\n", svsk);
+
+	serv = svsk->sk_server;
+	sk = svsk->sk_sk;
+
+	sk->sk_state_change = svsk->sk_ostate;
+	sk->sk_data_ready = svsk->sk_odata;
+	sk->sk_write_space = svsk->sk_owspace;
+
+	spin_lock_bh(&serv->sv_lock);
+
+	list_del_init(&svsk->sk_list);
+	list_del_init(&svsk->sk_ready);
+	if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags))
+		if (test_bit(SK_TEMP, &svsk->sk_flags))
+			serv->sv_tmpcnt--;
+
+	if (!svsk->sk_inuse) {
+		spin_unlock_bh(&serv->sv_lock);
+		sock_release(svsk->sk_sock);
+		kfree(svsk);
+	} else {
+		spin_unlock_bh(&serv->sv_lock);
+		dprintk(KERN_NOTICE "svc: server socket destroy delayed\n");
+		/* svsk->sk_server = NULL; */
+	}
+}
+
+/*
+ * Make a socket for nfsd and lockd
+ */
+int
+svc_makesock(struct svc_serv *serv, int protocol, unsigned short port)
+{
+	struct sockaddr_in	sin;
+
+	dprintk("svc: creating socket proto = %d\n", protocol);
+	sin.sin_family      = AF_INET;
+	sin.sin_addr.s_addr = INADDR_ANY;
+	sin.sin_port        = htons(port);
+	return svc_create_socket(serv, protocol, &sin);
+}
+
+/*
+ * Handle defer and revisit of requests 
+ */
+
+static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
+{
+	struct svc_deferred_req *dr = container_of(dreq, struct svc_deferred_req, handle);
+	struct svc_serv *serv = dreq->owner;
+	struct svc_sock *svsk;
+
+	if (too_many) {
+		svc_sock_put(dr->svsk);
+		kfree(dr);
+		return;
+	}
+	dprintk("revisit queued\n");
+	svsk = dr->svsk;
+	dr->svsk = NULL;
+	spin_lock_bh(&serv->sv_lock);
+	list_add(&dr->handle.recent, &svsk->sk_deferred);
+	spin_unlock_bh(&serv->sv_lock);
+	set_bit(SK_DEFERRED, &svsk->sk_flags);
+	svc_sock_enqueue(svsk);
+	svc_sock_put(svsk);
+}
+
+static struct cache_deferred_req *
+svc_defer(struct cache_req *req)
+{
+	struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle);
+	int size = sizeof(struct svc_deferred_req) + (rqstp->rq_arg.len);
+	struct svc_deferred_req *dr;
+
+	if (rqstp->rq_arg.page_len)
+		return NULL; /* if more than a page, give up FIXME */
+	if (rqstp->rq_deferred) {
+		dr = rqstp->rq_deferred;
+		rqstp->rq_deferred = NULL;
+	} else {
+		int skip  = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
+		/* FIXME maybe discard if size too large */
+		dr = kmalloc(size, GFP_KERNEL);
+		if (dr == NULL)
+			return NULL;
+
+		dr->handle.owner = rqstp->rq_server;
+		dr->prot = rqstp->rq_prot;
+		dr->addr = rqstp->rq_addr;
+		dr->argslen = rqstp->rq_arg.len >> 2;
+		memcpy(dr->args, rqstp->rq_arg.head[0].iov_base-skip, dr->argslen<<2);
+	}
+	spin_lock_bh(&rqstp->rq_server->sv_lock);
+	rqstp->rq_sock->sk_inuse++;
+	dr->svsk = rqstp->rq_sock;
+	spin_unlock_bh(&rqstp->rq_server->sv_lock);
+
+	dr->handle.revisit = svc_revisit;
+	return &dr->handle;
+}
+
+/*
+ * recv data from a deferred request into an active one
+ */
+static int svc_deferred_recv(struct svc_rqst *rqstp)
+{
+	struct svc_deferred_req *dr = rqstp->rq_deferred;
+
+	rqstp->rq_arg.head[0].iov_base = dr->args;
+	rqstp->rq_arg.head[0].iov_len = dr->argslen<<2;
+	rqstp->rq_arg.page_len = 0;
+	rqstp->rq_arg.len = dr->argslen<<2;
+	rqstp->rq_prot        = dr->prot;
+	rqstp->rq_addr        = dr->addr;
+	return dr->argslen<<2;
+}
+
+
+static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk)
+{
+	struct svc_deferred_req *dr = NULL;
+	struct svc_serv	*serv = svsk->sk_server;
+	
+	if (!test_bit(SK_DEFERRED, &svsk->sk_flags))
+		return NULL;
+	spin_lock_bh(&serv->sv_lock);
+	clear_bit(SK_DEFERRED, &svsk->sk_flags);
+	if (!list_empty(&svsk->sk_deferred)) {
+		dr = list_entry(svsk->sk_deferred.next,
+				struct svc_deferred_req,
+				handle.recent);
+		list_del_init(&dr->handle.recent);
+		set_bit(SK_DEFERRED, &svsk->sk_flags);
+	}
+	spin_unlock_bh(&serv->sv_lock);
+	return dr;
+}
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
new file mode 100644
index 00000000000..1b9616a12e2
--- /dev/null
+++ b/net/sunrpc/sysctl.c
@@ -0,0 +1,193 @@
+/*
+ * linux/net/sunrpc/sysctl.c
+ *
+ * Sysctl interface to sunrpc module.
+ *
+ * I would prefer to register the sunrpc table below sys/net, but that's
+ * impossible at the moment.
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/linkage.h>
+#include <linux/ctype.h>
+#include <linux/fs.h>
+#include <linux/sysctl.h>
+#include <linux/module.h>
+
+#include <asm/uaccess.h>
+#include <linux/sunrpc/types.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/stats.h>
+#include <linux/sunrpc/xprt.h>
+
+/*
+ * Declare the debug flags here
+ */
+unsigned int	rpc_debug;
+unsigned int	nfs_debug;
+unsigned int	nfsd_debug;
+unsigned int	nlm_debug;
+
+#ifdef RPC_DEBUG
+
+static struct ctl_table_header *sunrpc_table_header;
+static ctl_table		sunrpc_table[];
+
+void
+rpc_register_sysctl(void)
+{
+	if (!sunrpc_table_header) {
+		sunrpc_table_header = register_sysctl_table(sunrpc_table, 1);
+#ifdef CONFIG_PROC_FS
+		if (sunrpc_table[0].de)
+			sunrpc_table[0].de->owner = THIS_MODULE;
+#endif
+	}
+			
+}
+
+void
+rpc_unregister_sysctl(void)
+{
+	if (sunrpc_table_header) {
+		unregister_sysctl_table(sunrpc_table_header);
+		sunrpc_table_header = NULL;
+	}
+}
+
+static int
+proc_dodebug(ctl_table *table, int write, struct file *file,
+				void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	char		tmpbuf[20], c, *s;
+	char __user *p;
+	unsigned int	value;
+	size_t		left, len;
+
+	if ((*ppos && !write) || !*lenp) {
+		*lenp = 0;
+		return 0;
+	}
+
+	left = *lenp;
+
+	if (write) {
+		if (!access_ok(VERIFY_READ, buffer, left))
+			return -EFAULT;
+		p = buffer;
+		while (left && __get_user(c, p) >= 0 && isspace(c))
+			left--, p++;
+		if (!left)
+			goto done;
+
+		if (left > sizeof(tmpbuf) - 1)
+			return -EINVAL;
+		if (copy_from_user(tmpbuf, p, left))
+			return -EFAULT;
+		tmpbuf[left] = '\0';
+
+		for (s = tmpbuf, value = 0; '0' <= *s && *s <= '9'; s++, left--)
+			value = 10 * value + (*s - '0');
+		if (*s && !isspace(*s))
+			return -EINVAL;
+		while (left && isspace(*s))
+			left--, s++;
+		*(unsigned int *) table->data = value;
+		/* Display the RPC tasks on writing to rpc_debug */
+		if (table->ctl_name == CTL_RPCDEBUG) {
+			rpc_show_tasks();
+		}
+	} else {
+		if (!access_ok(VERIFY_WRITE, buffer, left))
+			return -EFAULT;
+		len = sprintf(tmpbuf, "%d", *(unsigned int *) table->data);
+		if (len > left)
+			len = left;
+		if (__copy_to_user(buffer, tmpbuf, len))
+			return -EFAULT;
+		if ((left -= len) > 0) {
+			if (put_user('\n', (char __user *)buffer + len))
+				return -EFAULT;
+			left--;
+		}
+	}
+
+done:
+	*lenp -= left;
+	*ppos += *lenp;
+	return 0;
+}
+
+static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE;
+static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE;
+
+static ctl_table debug_table[] = {
+	{
+		.ctl_name	= CTL_RPCDEBUG,
+		.procname	= "rpc_debug",
+		.data		= &rpc_debug,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dodebug
+	}, 
+	{
+		.ctl_name	= CTL_NFSDEBUG,
+		.procname	= "nfs_debug",
+		.data		= &nfs_debug,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dodebug
+	}, 
+	{
+		.ctl_name	= CTL_NFSDDEBUG,
+		.procname	= "nfsd_debug",
+		.data		= &nfsd_debug,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dodebug
+	}, 
+	{
+		.ctl_name	= CTL_NLMDEBUG,
+		.procname	= "nlm_debug",
+		.data		= &nlm_debug,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dodebug
+	}, 
+	{
+		.ctl_name	= CTL_SLOTTABLE_UDP,
+		.procname	= "udp_slot_table_entries",
+		.data		= &xprt_udp_slot_table_entries,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_slot_table_size,
+		.extra2		= &max_slot_table_size
+	},
+	{
+		.ctl_name	= CTL_SLOTTABLE_TCP,
+		.procname	= "tcp_slot_table_entries",
+		.data		= &xprt_tcp_slot_table_entries,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_slot_table_size,
+		.extra2		= &max_slot_table_size
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table sunrpc_table[] = {
+	{
+		.ctl_name	= CTL_SUNRPC,
+		.procname	= "sunrpc",
+		.mode		= 0555,
+		.child		= debug_table
+	},
+	{ .ctl_name = 0 }
+};
+
+#endif
diff --git a/net/sunrpc/timer.c b/net/sunrpc/timer.c
new file mode 100644
index 00000000000..bcbdf6430d5
--- /dev/null
+++ b/net/sunrpc/timer.c
@@ -0,0 +1,107 @@
+/*
+ * linux/net/sunrpc/timer.c
+ *
+ * Estimate RPC request round trip time.
+ *
+ * Based on packet round-trip and variance estimator algorithms described
+ * in appendix A of "Congestion Avoidance and Control" by Van Jacobson
+ * and Michael J. Karels (ACM Computer Communication Review; Proceedings
+ * of the Sigcomm '88 Symposium in Stanford, CA, August, 1988).
+ *
+ * This RTT estimator is used only for RPC over datagram protocols.
+ *
+ * Copyright (C) 2002 Trond Myklebust <trond.myklebust@fys.uio.no>
+ */
+
+#include <asm/param.h>
+
+#include <linux/types.h>
+#include <linux/unistd.h>
+
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/xprt.h>
+#include <linux/sunrpc/timer.h>
+
+#define RPC_RTO_MAX (60*HZ)
+#define RPC_RTO_INIT (HZ/5)
+#define RPC_RTO_MIN (HZ/10)
+
+void
+rpc_init_rtt(struct rpc_rtt *rt, unsigned long timeo)
+{
+	unsigned long init = 0;
+	unsigned i;
+
+	rt->timeo = timeo;
+
+	if (timeo > RPC_RTO_INIT)
+		init = (timeo - RPC_RTO_INIT) << 3;
+	for (i = 0; i < 5; i++) {
+		rt->srtt[i] = init;
+		rt->sdrtt[i] = RPC_RTO_INIT;
+		rt->ntimeouts[i] = 0;
+	}
+}
+
+/*
+ * NB: When computing the smoothed RTT and standard deviation,
+ *     be careful not to produce negative intermediate results.
+ */
+void
+rpc_update_rtt(struct rpc_rtt *rt, unsigned timer, long m)
+{
+	long *srtt, *sdrtt;
+
+	if (timer-- == 0)
+		return;
+
+	/* jiffies wrapped; ignore this one */
+	if (m < 0)
+		return;
+
+	if (m == 0)
+		m = 1L;
+
+	srtt = (long *)&rt->srtt[timer];
+	m -= *srtt >> 3;
+	*srtt += m;
+
+	if (m < 0)
+		m = -m;
+
+	sdrtt = (long *)&rt->sdrtt[timer];
+	m -= *sdrtt >> 2;
+	*sdrtt += m;
+
+	/* Set lower bound on the variance */
+	if (*sdrtt < RPC_RTO_MIN)
+		*sdrtt = RPC_RTO_MIN;
+}
+
+/*
+ * Estimate rto for an nfs rpc sent via. an unreliable datagram.
+ * Use the mean and mean deviation of rtt for the appropriate type of rpc
+ * for the frequent rpcs and a default for the others.
+ * The justification for doing "other" this way is that these rpcs
+ * happen so infrequently that timer est. would probably be stale.
+ * Also, since many of these rpcs are
+ * non-idempotent, a conservative timeout is desired.
+ * getattr, lookup,
+ * read, write, commit     - A+4D
+ * other                   - timeo
+ */
+
+unsigned long
+rpc_calc_rto(struct rpc_rtt *rt, unsigned timer)
+{
+	unsigned long res;
+
+	if (timer-- == 0)
+		return rt->timeo;
+
+	res = ((rt->srtt[timer] + 7) >> 3) + rt->sdrtt[timer];
+	if (res > RPC_RTO_MAX)
+		res = RPC_RTO_MAX;
+
+	return res;
+}
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
new file mode 100644
index 00000000000..4484931018e
--- /dev/null
+++ b/net/sunrpc/xdr.c
@@ -0,0 +1,917 @@
+/*
+ * linux/net/sunrpc/xdr.c
+ *
+ * Generic XDR support.
+ *
+ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/pagemap.h>
+#include <linux/errno.h>
+#include <linux/in.h>
+#include <linux/net.h>
+#include <net/sock.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/msg_prot.h>
+
+/*
+ * XDR functions for basic NFS types
+ */
+u32 *
+xdr_encode_netobj(u32 *p, const struct xdr_netobj *obj)
+{
+	unsigned int	quadlen = XDR_QUADLEN(obj->len);
+
+	p[quadlen] = 0;		/* zero trailing bytes */
+	*p++ = htonl(obj->len);
+	memcpy(p, obj->data, obj->len);
+	return p + XDR_QUADLEN(obj->len);
+}
+
+u32 *
+xdr_decode_netobj(u32 *p, struct xdr_netobj *obj)
+{
+	unsigned int	len;
+
+	if ((len = ntohl(*p++)) > XDR_MAX_NETOBJ)
+		return NULL;
+	obj->len  = len;
+	obj->data = (u8 *) p;
+	return p + XDR_QUADLEN(len);
+}
+
+/**
+ * xdr_encode_opaque_fixed - Encode fixed length opaque data
+ * @p - pointer to current position in XDR buffer.
+ * @ptr - pointer to data to encode (or NULL)
+ * @nbytes - size of data.
+ *
+ * Copy the array of data of length nbytes at ptr to the XDR buffer
+ * at position p, then align to the next 32-bit boundary by padding
+ * with zero bytes (see RFC1832).
+ * Note: if ptr is NULL, only the padding is performed.
+ *
+ * Returns the updated current XDR buffer position
+ *
+ */
+u32 *xdr_encode_opaque_fixed(u32 *p, const void *ptr, unsigned int nbytes)
+{
+	if (likely(nbytes != 0)) {
+		unsigned int quadlen = XDR_QUADLEN(nbytes);
+		unsigned int padding = (quadlen << 2) - nbytes;
+
+		if (ptr != NULL)
+			memcpy(p, ptr, nbytes);
+		if (padding != 0)
+			memset((char *)p + nbytes, 0, padding);
+		p += quadlen;
+	}
+	return p;
+}
+EXPORT_SYMBOL(xdr_encode_opaque_fixed);
+
+/**
+ * xdr_encode_opaque - Encode variable length opaque data
+ * @p - pointer to current position in XDR buffer.
+ * @ptr - pointer to data to encode (or NULL)
+ * @nbytes - size of data.
+ *
+ * Returns the updated current XDR buffer position
+ */
+u32 *xdr_encode_opaque(u32 *p, const void *ptr, unsigned int nbytes)
+{
+	*p++ = htonl(nbytes);
+	return xdr_encode_opaque_fixed(p, ptr, nbytes);
+}
+EXPORT_SYMBOL(xdr_encode_opaque);
+
+u32 *
+xdr_encode_string(u32 *p, const char *string)
+{
+	return xdr_encode_array(p, string, strlen(string));
+}
+
+u32 *
+xdr_decode_string(u32 *p, char **sp, int *lenp, int maxlen)
+{
+	unsigned int	len;
+	char		*string;
+
+	if ((len = ntohl(*p++)) > maxlen)
+		return NULL;
+	if (lenp)
+		*lenp = len;
+	if ((len % 4) != 0) {
+		string = (char *) p;
+	} else {
+		string = (char *) (p - 1);
+		memmove(string, p, len);
+	}
+	string[len] = '\0';
+	*sp = string;
+	return p + XDR_QUADLEN(len);
+}
+
+u32 *
+xdr_decode_string_inplace(u32 *p, char **sp, int *lenp, int maxlen)
+{
+	unsigned int	len;
+
+	if ((len = ntohl(*p++)) > maxlen)
+		return NULL;
+	*lenp = len;
+	*sp = (char *) p;
+	return p + XDR_QUADLEN(len);
+}
+
+void
+xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base,
+		 unsigned int len)
+{
+	struct kvec *tail = xdr->tail;
+	u32 *p;
+
+	xdr->pages = pages;
+	xdr->page_base = base;
+	xdr->page_len = len;
+
+	p = (u32 *)xdr->head[0].iov_base + XDR_QUADLEN(xdr->head[0].iov_len);
+	tail->iov_base = p;
+	tail->iov_len = 0;
+
+	if (len & 3) {
+		unsigned int pad = 4 - (len & 3);
+
+		*p = 0;
+		tail->iov_base = (char *)p + (len & 3);
+		tail->iov_len  = pad;
+		len += pad;
+	}
+	xdr->buflen += len;
+	xdr->len += len;
+}
+
+void
+xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset,
+		 struct page **pages, unsigned int base, unsigned int len)
+{
+	struct kvec *head = xdr->head;
+	struct kvec *tail = xdr->tail;
+	char *buf = (char *)head->iov_base;
+	unsigned int buflen = head->iov_len;
+
+	head->iov_len  = offset;
+
+	xdr->pages = pages;
+	xdr->page_base = base;
+	xdr->page_len = len;
+
+	tail->iov_base = buf + offset;
+	tail->iov_len = buflen - offset;
+
+	xdr->buflen += len;
+}
+
+void
+xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base,
+			  skb_reader_t *desc,
+			  skb_read_actor_t copy_actor)
+{
+	struct page	**ppage = xdr->pages;
+	unsigned int	len, pglen = xdr->page_len;
+	int		ret;
+
+	len = xdr->head[0].iov_len;
+	if (base < len) {
+		len -= base;
+		ret = copy_actor(desc, (char *)xdr->head[0].iov_base + base, len);
+		if (ret != len || !desc->count)
+			return;
+		base = 0;
+	} else
+		base -= len;
+
+	if (pglen == 0)
+		goto copy_tail;
+	if (base >= pglen) {
+		base -= pglen;
+		goto copy_tail;
+	}
+	if (base || xdr->page_base) {
+		pglen -= base;
+		base  += xdr->page_base;
+		ppage += base >> PAGE_CACHE_SHIFT;
+		base &= ~PAGE_CACHE_MASK;
+	}
+	do {
+		char *kaddr;
+
+		len = PAGE_CACHE_SIZE;
+		kaddr = kmap_atomic(*ppage, KM_SKB_SUNRPC_DATA);
+		if (base) {
+			len -= base;
+			if (pglen < len)
+				len = pglen;
+			ret = copy_actor(desc, kaddr + base, len);
+			base = 0;
+		} else {
+			if (pglen < len)
+				len = pglen;
+			ret = copy_actor(desc, kaddr, len);
+		}
+		flush_dcache_page(*ppage);
+		kunmap_atomic(kaddr, KM_SKB_SUNRPC_DATA);
+		if (ret != len || !desc->count)
+			return;
+		ppage++;
+	} while ((pglen -= len) != 0);
+copy_tail:
+	len = xdr->tail[0].iov_len;
+	if (base < len)
+		copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len - base);
+}
+
+
+int
+xdr_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen,
+		struct xdr_buf *xdr, unsigned int base, int msgflags)
+{
+	struct page **ppage = xdr->pages;
+	unsigned int len, pglen = xdr->page_len;
+	int err, ret = 0;
+	ssize_t (*sendpage)(struct socket *, struct page *, int, size_t, int);
+
+	len = xdr->head[0].iov_len;
+	if (base < len || (addr != NULL && base == 0)) {
+		struct kvec iov = {
+			.iov_base = xdr->head[0].iov_base + base,
+			.iov_len  = len - base,
+		};
+		struct msghdr msg = {
+			.msg_name    = addr,
+			.msg_namelen = addrlen,
+			.msg_flags   = msgflags,
+		};
+		if (xdr->len > len)
+			msg.msg_flags |= MSG_MORE;
+
+		if (iov.iov_len != 0)
+			err = kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len);
+		else
+			err = kernel_sendmsg(sock, &msg, NULL, 0, 0);
+		if (ret == 0)
+			ret = err;
+		else if (err > 0)
+			ret += err;
+		if (err != iov.iov_len)
+			goto out;
+		base = 0;
+	} else
+		base -= len;
+
+	if (pglen == 0)
+		goto copy_tail;
+	if (base >= pglen) {
+		base -= pglen;
+		goto copy_tail;
+	}
+	if (base || xdr->page_base) {
+		pglen -= base;
+		base  += xdr->page_base;
+		ppage += base >> PAGE_CACHE_SHIFT;
+		base &= ~PAGE_CACHE_MASK;
+	}
+
+	sendpage = sock->ops->sendpage ? : sock_no_sendpage;
+	do {
+		int flags = msgflags;
+
+		len = PAGE_CACHE_SIZE;
+		if (base)
+			len -= base;
+		if (pglen < len)
+			len = pglen;
+
+		if (pglen != len || xdr->tail[0].iov_len != 0)
+			flags |= MSG_MORE;
+
+		/* Hmm... We might be dealing with highmem pages */
+		if (PageHighMem(*ppage))
+			sendpage = sock_no_sendpage;
+		err = sendpage(sock, *ppage, base, len, flags);
+		if (ret == 0)
+			ret = err;
+		else if (err > 0)
+			ret += err;
+		if (err != len)
+			goto out;
+		base = 0;
+		ppage++;
+	} while ((pglen -= len) != 0);
+copy_tail:
+	len = xdr->tail[0].iov_len;
+	if (base < len) {
+		struct kvec iov = {
+			.iov_base = xdr->tail[0].iov_base + base,
+			.iov_len  = len - base,
+		};
+		struct msghdr msg = {
+			.msg_flags   = msgflags,
+		};
+		err = kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len);
+		if (ret == 0)
+			ret = err;
+		else if (err > 0)
+			ret += err;
+	}
+out:
+	return ret;
+}
+
+
+/*
+ * Helper routines for doing 'memmove' like operations on a struct xdr_buf
+ *
+ * _shift_data_right_pages
+ * @pages: vector of pages containing both the source and dest memory area.
+ * @pgto_base: page vector address of destination
+ * @pgfrom_base: page vector address of source
+ * @len: number of bytes to copy
+ *
+ * Note: the addresses pgto_base and pgfrom_base are both calculated in
+ *       the same way:
+ *            if a memory area starts at byte 'base' in page 'pages[i]',
+ *            then its address is given as (i << PAGE_CACHE_SHIFT) + base
+ * Also note: pgfrom_base must be < pgto_base, but the memory areas
+ * 	they point to may overlap.
+ */
+static void
+_shift_data_right_pages(struct page **pages, size_t pgto_base,
+		size_t pgfrom_base, size_t len)
+{
+	struct page **pgfrom, **pgto;
+	char *vfrom, *vto;
+	size_t copy;
+
+	BUG_ON(pgto_base <= pgfrom_base);
+
+	pgto_base += len;
+	pgfrom_base += len;
+
+	pgto = pages + (pgto_base >> PAGE_CACHE_SHIFT);
+	pgfrom = pages + (pgfrom_base >> PAGE_CACHE_SHIFT);
+
+	pgto_base &= ~PAGE_CACHE_MASK;
+	pgfrom_base &= ~PAGE_CACHE_MASK;
+
+	do {
+		/* Are any pointers crossing a page boundary? */
+		if (pgto_base == 0) {
+			flush_dcache_page(*pgto);
+			pgto_base = PAGE_CACHE_SIZE;
+			pgto--;
+		}
+		if (pgfrom_base == 0) {
+			pgfrom_base = PAGE_CACHE_SIZE;
+			pgfrom--;
+		}
+
+		copy = len;
+		if (copy > pgto_base)
+			copy = pgto_base;
+		if (copy > pgfrom_base)
+			copy = pgfrom_base;
+		pgto_base -= copy;
+		pgfrom_base -= copy;
+
+		vto = kmap_atomic(*pgto, KM_USER0);
+		vfrom = kmap_atomic(*pgfrom, KM_USER1);
+		memmove(vto + pgto_base, vfrom + pgfrom_base, copy);
+		kunmap_atomic(vfrom, KM_USER1);
+		kunmap_atomic(vto, KM_USER0);
+
+	} while ((len -= copy) != 0);
+	flush_dcache_page(*pgto);
+}
+
+/*
+ * _copy_to_pages
+ * @pages: array of pages
+ * @pgbase: page vector address of destination
+ * @p: pointer to source data
+ * @len: length
+ *
+ * Copies data from an arbitrary memory location into an array of pages
+ * The copy is assumed to be non-overlapping.
+ */
+static void
+_copy_to_pages(struct page **pages, size_t pgbase, const char *p, size_t len)
+{
+	struct page **pgto;
+	char *vto;
+	size_t copy;
+
+	pgto = pages + (pgbase >> PAGE_CACHE_SHIFT);
+	pgbase &= ~PAGE_CACHE_MASK;
+
+	do {
+		copy = PAGE_CACHE_SIZE - pgbase;
+		if (copy > len)
+			copy = len;
+
+		vto = kmap_atomic(*pgto, KM_USER0);
+		memcpy(vto + pgbase, p, copy);
+		kunmap_atomic(vto, KM_USER0);
+
+		pgbase += copy;
+		if (pgbase == PAGE_CACHE_SIZE) {
+			flush_dcache_page(*pgto);
+			pgbase = 0;
+			pgto++;
+		}
+		p += copy;
+
+	} while ((len -= copy) != 0);
+	flush_dcache_page(*pgto);
+}
+
+/*
+ * _copy_from_pages
+ * @p: pointer to destination
+ * @pages: array of pages
+ * @pgbase: offset of source data
+ * @len: length
+ *
+ * Copies data into an arbitrary memory location from an array of pages
+ * The copy is assumed to be non-overlapping.
+ */
+static void
+_copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len)
+{
+	struct page **pgfrom;
+	char *vfrom;
+	size_t copy;
+
+	pgfrom = pages + (pgbase >> PAGE_CACHE_SHIFT);
+	pgbase &= ~PAGE_CACHE_MASK;
+
+	do {
+		copy = PAGE_CACHE_SIZE - pgbase;
+		if (copy > len)
+			copy = len;
+
+		vfrom = kmap_atomic(*pgfrom, KM_USER0);
+		memcpy(p, vfrom + pgbase, copy);
+		kunmap_atomic(vfrom, KM_USER0);
+
+		pgbase += copy;
+		if (pgbase == PAGE_CACHE_SIZE) {
+			pgbase = 0;
+			pgfrom++;
+		}
+		p += copy;
+
+	} while ((len -= copy) != 0);
+}
+
+/*
+ * xdr_shrink_bufhead
+ * @buf: xdr_buf
+ * @len: bytes to remove from buf->head[0]
+ *
+ * Shrinks XDR buffer's header kvec buf->head[0] by 
+ * 'len' bytes. The extra data is not lost, but is instead
+ * moved into the inlined pages and/or the tail.
+ */
+static void
+xdr_shrink_bufhead(struct xdr_buf *buf, size_t len)
+{
+	struct kvec *head, *tail;
+	size_t copy, offs;
+	unsigned int pglen = buf->page_len;
+
+	tail = buf->tail;
+	head = buf->head;
+	BUG_ON (len > head->iov_len);
+
+	/* Shift the tail first */
+	if (tail->iov_len != 0) {
+		if (tail->iov_len > len) {
+			copy = tail->iov_len - len;
+			memmove((char *)tail->iov_base + len,
+					tail->iov_base, copy);
+		}
+		/* Copy from the inlined pages into the tail */
+		copy = len;
+		if (copy > pglen)
+			copy = pglen;
+		offs = len - copy;
+		if (offs >= tail->iov_len)
+			copy = 0;
+		else if (copy > tail->iov_len - offs)
+			copy = tail->iov_len - offs;
+		if (copy != 0)
+			_copy_from_pages((char *)tail->iov_base + offs,
+					buf->pages,
+					buf->page_base + pglen + offs - len,
+					copy);
+		/* Do we also need to copy data from the head into the tail ? */
+		if (len > pglen) {
+			offs = copy = len - pglen;
+			if (copy > tail->iov_len)
+				copy = tail->iov_len;
+			memcpy(tail->iov_base,
+					(char *)head->iov_base +
+					head->iov_len - offs,
+					copy);
+		}
+	}
+	/* Now handle pages */
+	if (pglen != 0) {
+		if (pglen > len)
+			_shift_data_right_pages(buf->pages,
+					buf->page_base + len,
+					buf->page_base,
+					pglen - len);
+		copy = len;
+		if (len > pglen)
+			copy = pglen;
+		_copy_to_pages(buf->pages, buf->page_base,
+				(char *)head->iov_base + head->iov_len - len,
+				copy);
+	}
+	head->iov_len -= len;
+	buf->buflen -= len;
+	/* Have we truncated the message? */
+	if (buf->len > buf->buflen)
+		buf->len = buf->buflen;
+}
+
+/*
+ * xdr_shrink_pagelen
+ * @buf: xdr_buf
+ * @len: bytes to remove from buf->pages
+ *
+ * Shrinks XDR buffer's page array buf->pages by 
+ * 'len' bytes. The extra data is not lost, but is instead
+ * moved into the tail.
+ */
+static void
+xdr_shrink_pagelen(struct xdr_buf *buf, size_t len)
+{
+	struct kvec *tail;
+	size_t copy;
+	char *p;
+	unsigned int pglen = buf->page_len;
+
+	tail = buf->tail;
+	BUG_ON (len > pglen);
+
+	/* Shift the tail first */
+	if (tail->iov_len != 0) {
+		p = (char *)tail->iov_base + len;
+		if (tail->iov_len > len) {
+			copy = tail->iov_len - len;
+			memmove(p, tail->iov_base, copy);
+		} else
+			buf->buflen -= len;
+		/* Copy from the inlined pages into the tail */
+		copy = len;
+		if (copy > tail->iov_len)
+			copy = tail->iov_len;
+		_copy_from_pages((char *)tail->iov_base,
+				buf->pages, buf->page_base + pglen - len,
+				copy);
+	}
+	buf->page_len -= len;
+	buf->buflen -= len;
+	/* Have we truncated the message? */
+	if (buf->len > buf->buflen)
+		buf->len = buf->buflen;
+}
+
+void
+xdr_shift_buf(struct xdr_buf *buf, size_t len)
+{
+	xdr_shrink_bufhead(buf, len);
+}
+
+/**
+ * xdr_init_encode - Initialize a struct xdr_stream for sending data.
+ * @xdr: pointer to xdr_stream struct
+ * @buf: pointer to XDR buffer in which to encode data
+ * @p: current pointer inside XDR buffer
+ *
+ * Note: at the moment the RPC client only passes the length of our
+ *	 scratch buffer in the xdr_buf's header kvec. Previously this
+ *	 meant we needed to call xdr_adjust_iovec() after encoding the
+ *	 data. With the new scheme, the xdr_stream manages the details
+ *	 of the buffer length, and takes care of adjusting the kvec
+ *	 length for us.
+ */
+void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p)
+{
+	struct kvec *iov = buf->head;
+
+	xdr->buf = buf;
+	xdr->iov = iov;
+	xdr->end = (uint32_t *)((char *)iov->iov_base + iov->iov_len);
+	buf->len = iov->iov_len = (char *)p - (char *)iov->iov_base;
+	xdr->p = p;
+}
+EXPORT_SYMBOL(xdr_init_encode);
+
+/**
+ * xdr_reserve_space - Reserve buffer space for sending
+ * @xdr: pointer to xdr_stream
+ * @nbytes: number of bytes to reserve
+ *
+ * Checks that we have enough buffer space to encode 'nbytes' more
+ * bytes of data. If so, update the total xdr_buf length, and
+ * adjust the length of the current kvec.
+ */
+uint32_t * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes)
+{
+	uint32_t *p = xdr->p;
+	uint32_t *q;
+
+	/* align nbytes on the next 32-bit boundary */
+	nbytes += 3;
+	nbytes &= ~3;
+	q = p + (nbytes >> 2);
+	if (unlikely(q > xdr->end || q < p))
+		return NULL;
+	xdr->p = q;
+	xdr->iov->iov_len += nbytes;
+	xdr->buf->len += nbytes;
+	return p;
+}
+EXPORT_SYMBOL(xdr_reserve_space);
+
+/**
+ * xdr_write_pages - Insert a list of pages into an XDR buffer for sending
+ * @xdr: pointer to xdr_stream
+ * @pages: list of pages
+ * @base: offset of first byte
+ * @len: length of data in bytes
+ *
+ */
+void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int base,
+		 unsigned int len)
+{
+	struct xdr_buf *buf = xdr->buf;
+	struct kvec *iov = buf->tail;
+	buf->pages = pages;
+	buf->page_base = base;
+	buf->page_len = len;
+
+	iov->iov_base = (char *)xdr->p;
+	iov->iov_len  = 0;
+	xdr->iov = iov;
+
+	if (len & 3) {
+		unsigned int pad = 4 - (len & 3);
+
+		BUG_ON(xdr->p >= xdr->end);
+		iov->iov_base = (char *)xdr->p + (len & 3);
+		iov->iov_len  += pad;
+		len += pad;
+		*xdr->p++ = 0;
+	}
+	buf->buflen += len;
+	buf->len += len;
+}
+EXPORT_SYMBOL(xdr_write_pages);
+
+/**
+ * xdr_init_decode - Initialize an xdr_stream for decoding data.
+ * @xdr: pointer to xdr_stream struct
+ * @buf: pointer to XDR buffer from which to decode data
+ * @p: current pointer inside XDR buffer
+ */
+void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p)
+{
+	struct kvec *iov = buf->head;
+	unsigned int len = iov->iov_len;
+
+	if (len > buf->len)
+		len = buf->len;
+	xdr->buf = buf;
+	xdr->iov = iov;
+	xdr->p = p;
+	xdr->end = (uint32_t *)((char *)iov->iov_base + len);
+}
+EXPORT_SYMBOL(xdr_init_decode);
+
+/**
+ * xdr_inline_decode - Retrieve non-page XDR data to decode
+ * @xdr: pointer to xdr_stream struct
+ * @nbytes: number of bytes of data to decode
+ *
+ * Check if the input buffer is long enough to enable us to decode
+ * 'nbytes' more bytes of data starting at the current position.
+ * If so return the current pointer, then update the current
+ * pointer position.
+ */
+uint32_t * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
+{
+	uint32_t *p = xdr->p;
+	uint32_t *q = p + XDR_QUADLEN(nbytes);
+
+	if (unlikely(q > xdr->end || q < p))
+		return NULL;
+	xdr->p = q;
+	return p;
+}
+EXPORT_SYMBOL(xdr_inline_decode);
+
+/**
+ * xdr_read_pages - Ensure page-based XDR data to decode is aligned at current pointer position
+ * @xdr: pointer to xdr_stream struct
+ * @len: number of bytes of page data
+ *
+ * Moves data beyond the current pointer position from the XDR head[] buffer
+ * into the page list. Any data that lies beyond current position + "len"
+ * bytes is moved into the XDR tail[]. The current pointer is then
+ * repositioned at the beginning of the XDR tail.
+ */
+void xdr_read_pages(struct xdr_stream *xdr, unsigned int len)
+{
+	struct xdr_buf *buf = xdr->buf;
+	struct kvec *iov;
+	ssize_t shift;
+	unsigned int end;
+	int padding;
+
+	/* Realign pages to current pointer position */
+	iov  = buf->head;
+	shift = iov->iov_len + (char *)iov->iov_base - (char *)xdr->p;
+	if (shift > 0)
+		xdr_shrink_bufhead(buf, shift);
+
+	/* Truncate page data and move it into the tail */
+	if (buf->page_len > len)
+		xdr_shrink_pagelen(buf, buf->page_len - len);
+	padding = (XDR_QUADLEN(len) << 2) - len;
+	xdr->iov = iov = buf->tail;
+	/* Compute remaining message length.  */
+	end = iov->iov_len;
+	shift = buf->buflen - buf->len;
+	if (shift < end)
+		end -= shift;
+	else if (shift > 0)
+		end = 0;
+	/*
+	 * Position current pointer at beginning of tail, and
+	 * set remaining message length.
+	 */
+	xdr->p = (uint32_t *)((char *)iov->iov_base + padding);
+	xdr->end = (uint32_t *)((char *)iov->iov_base + end);
+}
+EXPORT_SYMBOL(xdr_read_pages);
+
+static struct kvec empty_iov = {.iov_base = NULL, .iov_len = 0};
+
+void
+xdr_buf_from_iov(struct kvec *iov, struct xdr_buf *buf)
+{
+	buf->head[0] = *iov;
+	buf->tail[0] = empty_iov;
+	buf->page_len = 0;
+	buf->buflen = buf->len = iov->iov_len;
+}
+
+/* Sets subiov to the intersection of iov with the buffer of length len
+ * starting base bytes after iov.  Indicates empty intersection by setting
+ * length of subiov to zero.  Decrements len by length of subiov, sets base
+ * to zero (or decrements it by length of iov if subiov is empty). */
+static void
+iov_subsegment(struct kvec *iov, struct kvec *subiov, int *base, int *len)
+{
+	if (*base > iov->iov_len) {
+		subiov->iov_base = NULL;
+		subiov->iov_len = 0;
+		*base -= iov->iov_len;
+	} else {
+		subiov->iov_base = iov->iov_base + *base;
+		subiov->iov_len = min(*len, (int)iov->iov_len - *base);
+		*base = 0;
+	}
+	*len -= subiov->iov_len; 
+}
+
+/* Sets subbuf to the portion of buf of length len beginning base bytes
+ * from the start of buf. Returns -1 if base of length are out of bounds. */
+int
+xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
+			int base, int len)
+{
+	int i;
+
+	subbuf->buflen = subbuf->len = len;
+	iov_subsegment(buf->head, subbuf->head, &base, &len);
+
+	if (base < buf->page_len) {
+		i = (base + buf->page_base) >> PAGE_CACHE_SHIFT;
+		subbuf->pages = &buf->pages[i];
+		subbuf->page_base = (base + buf->page_base) & ~PAGE_CACHE_MASK;
+		subbuf->page_len = min((int)buf->page_len - base, len);
+		len -= subbuf->page_len;
+		base = 0;
+	} else {
+		base -= buf->page_len;
+		subbuf->page_len = 0;
+	}
+
+	iov_subsegment(buf->tail, subbuf->tail, &base, &len);
+	if (base || len)
+		return -1;
+	return 0;
+}
+
+/* obj is assumed to point to allocated memory of size at least len: */
+int
+read_bytes_from_xdr_buf(struct xdr_buf *buf, int base, void *obj, int len)
+{
+	struct xdr_buf subbuf;
+	int this_len;
+	int status;
+
+	status = xdr_buf_subsegment(buf, &subbuf, base, len);
+	if (status)
+		goto out;
+	this_len = min(len, (int)subbuf.head[0].iov_len);
+	memcpy(obj, subbuf.head[0].iov_base, this_len);
+	len -= this_len;
+	obj += this_len;
+	this_len = min(len, (int)subbuf.page_len);
+	if (this_len)
+		_copy_from_pages(obj, subbuf.pages, subbuf.page_base, this_len);
+	len -= this_len;
+	obj += this_len;
+	this_len = min(len, (int)subbuf.tail[0].iov_len);
+	memcpy(obj, subbuf.tail[0].iov_base, this_len);
+out:
+	return status;
+}
+
+static int
+read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj)
+{
+	u32	raw;
+	int	status;
+
+	status = read_bytes_from_xdr_buf(buf, base, &raw, sizeof(*obj));
+	if (status)
+		return status;
+	*obj = ntohl(raw);
+	return 0;
+}
+
+/* If the netobj starting offset bytes from the start of xdr_buf is contained
+ * entirely in the head or the tail, set object to point to it; otherwise
+ * try to find space for it at the end of the tail, copy it there, and
+ * set obj to point to it. */
+int
+xdr_buf_read_netobj(struct xdr_buf *buf, struct xdr_netobj *obj, int offset)
+{
+	u32	tail_offset = buf->head[0].iov_len + buf->page_len;
+	u32	obj_end_offset;
+
+	if (read_u32_from_xdr_buf(buf, offset, &obj->len))
+		goto out;
+	obj_end_offset = offset + 4 + obj->len;
+
+	if (obj_end_offset <= buf->head[0].iov_len) {
+		/* The obj is contained entirely in the head: */
+		obj->data = buf->head[0].iov_base + offset + 4;
+	} else if (offset + 4 >= tail_offset) {
+		if (obj_end_offset - tail_offset
+				> buf->tail[0].iov_len)
+			goto out;
+		/* The obj is contained entirely in the tail: */
+		obj->data = buf->tail[0].iov_base
+			+ offset - tail_offset + 4;
+	} else {
+		/* use end of tail as storage for obj:
+		 * (We don't copy to the beginning because then we'd have
+		 * to worry about doing a potentially overlapping copy.
+		 * This assumes the object is at most half the length of the
+		 * tail.) */
+		if (obj->len > buf->tail[0].iov_len)
+			goto out;
+		obj->data = buf->tail[0].iov_base + buf->tail[0].iov_len - 
+				obj->len;
+		if (read_bytes_from_xdr_buf(buf, offset + 4,
+					obj->data, obj->len))
+			goto out;
+
+	}
+	return 0;
+out:
+	return -1;
+}
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
new file mode 100644
index 00000000000..c74a6bb9407
--- /dev/null
+++ b/net/sunrpc/xprt.c
@@ -0,0 +1,1678 @@
+/*
+ *  linux/net/sunrpc/xprt.c
+ *
+ *  This is a generic RPC call interface supporting congestion avoidance,
+ *  and asynchronous calls.
+ *
+ *  The interface works like this:
+ *
+ *  -	When a process places a call, it allocates a request slot if
+ *	one is available. Otherwise, it sleeps on the backlog queue
+ *	(xprt_reserve).
+ *  -	Next, the caller puts together the RPC message, stuffs it into
+ *	the request struct, and calls xprt_call().
+ *  -	xprt_call transmits the message and installs the caller on the
+ *	socket's wait list. At the same time, it installs a timer that
+ *	is run after the packet's timeout has expired.
+ *  -	When a packet arrives, the data_ready handler walks the list of
+ *	pending requests for that socket. If a matching XID is found, the
+ *	caller is woken up, and the timer removed.
+ *  -	When no reply arrives within the timeout interval, the timer is
+ *	fired by the kernel and runs xprt_timer(). It either adjusts the
+ *	timeout values (minor timeout) or wakes up the caller with a status
+ *	of -ETIMEDOUT.
+ *  -	When the caller receives a notification from RPC that a reply arrived,
+ *	it should release the RPC slot, and process the reply.
+ *	If the call timed out, it may choose to retry the operation by
+ *	adjusting the initial timeout value, and simply calling rpc_call
+ *	again.
+ *
+ *  Support for async RPC is done through a set of RPC-specific scheduling
+ *  primitives that `transparently' work for processes as well as async
+ *  tasks that rely on callbacks.
+ *
+ *  Copyright (C) 1995-1997, Olaf Kirch <okir@monad.swb.de>
+ *
+ *  TCP callback races fixes (C) 1998 Red Hat Software <alan@redhat.com>
+ *  TCP send fixes (C) 1998 Red Hat Software <alan@redhat.com>
+ *  TCP NFS related read + write fixes
+ *   (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie>
+ *
+ *  Rewrite of larges part of the code in order to stabilize TCP stuff.
+ *  Fix behaviour when socket buffer is full.
+ *   (C) 1999 Trond Myklebust <trond.myklebust@fys.uio.no>
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/capability.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/net.h>
+#include <linux/mm.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/file.h>
+#include <linux/workqueue.h>
+#include <linux/random.h>
+
+#include <net/sock.h>
+#include <net/checksum.h>
+#include <net/udp.h>
+#include <net/tcp.h>
+
+/*
+ * Local variables
+ */
+
+#ifdef RPC_DEBUG
+# undef  RPC_DEBUG_DATA
+# define RPCDBG_FACILITY	RPCDBG_XPRT
+#endif
+
+#define XPRT_MAX_BACKOFF	(8)
+#define XPRT_IDLE_TIMEOUT	(5*60*HZ)
+#define XPRT_MAX_RESVPORT	(800)
+
+/*
+ * Local functions
+ */
+static void	xprt_request_init(struct rpc_task *, struct rpc_xprt *);
+static inline void	do_xprt_reserve(struct rpc_task *);
+static void	xprt_disconnect(struct rpc_xprt *);
+static void	xprt_connect_status(struct rpc_task *task);
+static struct rpc_xprt * xprt_setup(int proto, struct sockaddr_in *ap,
+						struct rpc_timeout *to);
+static struct socket *xprt_create_socket(struct rpc_xprt *, int, int);
+static void	xprt_bind_socket(struct rpc_xprt *, struct socket *);
+static int      __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
+
+static int	xprt_clear_backlog(struct rpc_xprt *xprt);
+
+#ifdef RPC_DEBUG_DATA
+/*
+ * Print the buffer contents (first 128 bytes only--just enough for
+ * diropres return).
+ */
+static void
+xprt_pktdump(char *msg, u32 *packet, unsigned int count)
+{
+	u8	*buf = (u8 *) packet;
+	int	j;
+
+	dprintk("RPC:      %s\n", msg);
+	for (j = 0; j < count && j < 128; j += 4) {
+		if (!(j & 31)) {
+			if (j)
+				dprintk("\n");
+			dprintk("0x%04x ", j);
+		}
+		dprintk("%02x%02x%02x%02x ",
+			buf[j], buf[j+1], buf[j+2], buf[j+3]);
+	}
+	dprintk("\n");
+}
+#else
+static inline void
+xprt_pktdump(char *msg, u32 *packet, unsigned int count)
+{
+	/* NOP */
+}
+#endif
+
+/*
+ * Look up RPC transport given an INET socket
+ */
+static inline struct rpc_xprt *
+xprt_from_sock(struct sock *sk)
+{
+	return (struct rpc_xprt *) sk->sk_user_data;
+}
+
+/*
+ * Serialize write access to sockets, in order to prevent different
+ * requests from interfering with each other.
+ * Also prevents TCP socket connects from colliding with writes.
+ */
+static int
+__xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
+{
+	struct rpc_rqst *req = task->tk_rqstp;
+
+	if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) {
+		if (task == xprt->snd_task)
+			return 1;
+		if (task == NULL)
+			return 0;
+		goto out_sleep;
+	}
+	if (xprt->nocong || __xprt_get_cong(xprt, task)) {
+		xprt->snd_task = task;
+		if (req) {
+			req->rq_bytes_sent = 0;
+			req->rq_ntrans++;
+		}
+		return 1;
+	}
+	smp_mb__before_clear_bit();
+	clear_bit(XPRT_LOCKED, &xprt->sockstate);
+	smp_mb__after_clear_bit();
+out_sleep:
+	dprintk("RPC: %4d failed to lock socket %p\n", task->tk_pid, xprt);
+	task->tk_timeout = 0;
+	task->tk_status = -EAGAIN;
+	if (req && req->rq_ntrans)
+		rpc_sleep_on(&xprt->resend, task, NULL, NULL);
+	else
+		rpc_sleep_on(&xprt->sending, task, NULL, NULL);
+	return 0;
+}
+
+static inline int
+xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
+{
+	int retval;
+
+	spin_lock_bh(&xprt->sock_lock);
+	retval = __xprt_lock_write(xprt, task);
+	spin_unlock_bh(&xprt->sock_lock);
+	return retval;
+}
+
+
+static void
+__xprt_lock_write_next(struct rpc_xprt *xprt)
+{
+	struct rpc_task *task;
+
+	if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate))
+		return;
+	if (!xprt->nocong && RPCXPRT_CONGESTED(xprt))
+		goto out_unlock;
+	task = rpc_wake_up_next(&xprt->resend);
+	if (!task) {
+		task = rpc_wake_up_next(&xprt->sending);
+		if (!task)
+			goto out_unlock;
+	}
+	if (xprt->nocong || __xprt_get_cong(xprt, task)) {
+		struct rpc_rqst *req = task->tk_rqstp;
+		xprt->snd_task = task;
+		if (req) {
+			req->rq_bytes_sent = 0;
+			req->rq_ntrans++;
+		}
+		return;
+	}
+out_unlock:
+	smp_mb__before_clear_bit();
+	clear_bit(XPRT_LOCKED, &xprt->sockstate);
+	smp_mb__after_clear_bit();
+}
+
+/*
+ * Releases the socket for use by other requests.
+ */
+static void
+__xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task)
+{
+	if (xprt->snd_task == task) {
+		xprt->snd_task = NULL;
+		smp_mb__before_clear_bit();
+		clear_bit(XPRT_LOCKED, &xprt->sockstate);
+		smp_mb__after_clear_bit();
+		__xprt_lock_write_next(xprt);
+	}
+}
+
+static inline void
+xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task)
+{
+	spin_lock_bh(&xprt->sock_lock);
+	__xprt_release_write(xprt, task);
+	spin_unlock_bh(&xprt->sock_lock);
+}
+
+/*
+ * Write data to socket.
+ */
+static inline int
+xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req)
+{
+	struct socket	*sock = xprt->sock;
+	struct xdr_buf	*xdr = &req->rq_snd_buf;
+	struct sockaddr *addr = NULL;
+	int addrlen = 0;
+	unsigned int	skip;
+	int		result;
+
+	if (!sock)
+		return -ENOTCONN;
+
+	xprt_pktdump("packet data:",
+				req->rq_svec->iov_base,
+				req->rq_svec->iov_len);
+
+	/* For UDP, we need to provide an address */
+	if (!xprt->stream) {
+		addr = (struct sockaddr *) &xprt->addr;
+		addrlen = sizeof(xprt->addr);
+	}
+	/* Dont repeat bytes */
+	skip = req->rq_bytes_sent;
+
+	clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
+	result = xdr_sendpages(sock, addr, addrlen, xdr, skip, MSG_DONTWAIT);
+
+	dprintk("RPC:      xprt_sendmsg(%d) = %d\n", xdr->len - skip, result);
+
+	if (result >= 0)
+		return result;
+
+	switch (result) {
+	case -ECONNREFUSED:
+		/* When the server has died, an ICMP port unreachable message
+		 * prompts ECONNREFUSED.
+		 */
+	case -EAGAIN:
+		break;
+	case -ECONNRESET:
+	case -ENOTCONN:
+	case -EPIPE:
+		/* connection broken */
+		if (xprt->stream)
+			result = -ENOTCONN;
+		break;
+	default:
+		printk(KERN_NOTICE "RPC: sendmsg returned error %d\n", -result);
+	}
+	return result;
+}
+
+/*
+ * Van Jacobson congestion avoidance. Check if the congestion window
+ * overflowed. Put the task to sleep if this is the case.
+ */
+static int
+__xprt_get_cong(struct rpc_xprt *xprt, struct rpc_task *task)
+{
+	struct rpc_rqst *req = task->tk_rqstp;
+
+	if (req->rq_cong)
+		return 1;
+	dprintk("RPC: %4d xprt_cwnd_limited cong = %ld cwnd = %ld\n",
+			task->tk_pid, xprt->cong, xprt->cwnd);
+	if (RPCXPRT_CONGESTED(xprt))
+		return 0;
+	req->rq_cong = 1;
+	xprt->cong += RPC_CWNDSCALE;
+	return 1;
+}
+
+/*
+ * Adjust the congestion window, and wake up the next task
+ * that has been sleeping due to congestion
+ */
+static void
+__xprt_put_cong(struct rpc_xprt *xprt, struct rpc_rqst *req)
+{
+	if (!req->rq_cong)
+		return;
+	req->rq_cong = 0;
+	xprt->cong -= RPC_CWNDSCALE;
+	__xprt_lock_write_next(xprt);
+}
+
+/*
+ * Adjust RPC congestion window
+ * We use a time-smoothed congestion estimator to avoid heavy oscillation.
+ */
+static void
+xprt_adjust_cwnd(struct rpc_xprt *xprt, int result)
+{
+	unsigned long	cwnd;
+
+	cwnd = xprt->cwnd;
+	if (result >= 0 && cwnd <= xprt->cong) {
+		/* The (cwnd >> 1) term makes sure
+		 * the result gets rounded properly. */
+		cwnd += (RPC_CWNDSCALE * RPC_CWNDSCALE + (cwnd >> 1)) / cwnd;
+		if (cwnd > RPC_MAXCWND(xprt))
+			cwnd = RPC_MAXCWND(xprt);
+		__xprt_lock_write_next(xprt);
+	} else if (result == -ETIMEDOUT) {
+		cwnd >>= 1;
+		if (cwnd < RPC_CWNDSCALE)
+			cwnd = RPC_CWNDSCALE;
+	}
+	dprintk("RPC:      cong %ld, cwnd was %ld, now %ld\n",
+			xprt->cong, xprt->cwnd, cwnd);
+	xprt->cwnd = cwnd;
+}
+
+/*
+ * Reset the major timeout value
+ */
+static void xprt_reset_majortimeo(struct rpc_rqst *req)
+{
+	struct rpc_timeout *to = &req->rq_xprt->timeout;
+
+	req->rq_majortimeo = req->rq_timeout;
+	if (to->to_exponential)
+		req->rq_majortimeo <<= to->to_retries;
+	else
+		req->rq_majortimeo += to->to_increment * to->to_retries;
+	if (req->rq_majortimeo > to->to_maxval || req->rq_majortimeo == 0)
+		req->rq_majortimeo = to->to_maxval;
+	req->rq_majortimeo += jiffies;
+}
+
+/*
+ * Adjust timeout values etc for next retransmit
+ */
+int xprt_adjust_timeout(struct rpc_rqst *req)
+{
+	struct rpc_xprt *xprt = req->rq_xprt;
+	struct rpc_timeout *to = &xprt->timeout;
+	int status = 0;
+
+	if (time_before(jiffies, req->rq_majortimeo)) {
+		if (to->to_exponential)
+			req->rq_timeout <<= 1;
+		else
+			req->rq_timeout += to->to_increment;
+		if (to->to_maxval && req->rq_timeout >= to->to_maxval)
+			req->rq_timeout = to->to_maxval;
+		req->rq_retries++;
+		pprintk("RPC: %lu retrans\n", jiffies);
+	} else {
+		req->rq_timeout = to->to_initval;
+		req->rq_retries = 0;
+		xprt_reset_majortimeo(req);
+		/* Reset the RTT counters == "slow start" */
+		spin_lock_bh(&xprt->sock_lock);
+		rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval);
+		spin_unlock_bh(&xprt->sock_lock);
+		pprintk("RPC: %lu timeout\n", jiffies);
+		status = -ETIMEDOUT;
+	}
+
+	if (req->rq_timeout == 0) {
+		printk(KERN_WARNING "xprt_adjust_timeout: rq_timeout = 0!\n");
+		req->rq_timeout = 5 * HZ;
+	}
+	return status;
+}
+
+/*
+ * Close down a transport socket
+ */
+static void
+xprt_close(struct rpc_xprt *xprt)
+{
+	struct socket	*sock = xprt->sock;
+	struct sock	*sk = xprt->inet;
+
+	if (!sk)
+		return;
+
+	write_lock_bh(&sk->sk_callback_lock);
+	xprt->inet = NULL;
+	xprt->sock = NULL;
+
+	sk->sk_user_data    = NULL;
+	sk->sk_data_ready   = xprt->old_data_ready;
+	sk->sk_state_change = xprt->old_state_change;
+	sk->sk_write_space  = xprt->old_write_space;
+	write_unlock_bh(&sk->sk_callback_lock);
+
+	sk->sk_no_check	 = 0;
+
+	sock_release(sock);
+}
+
+static void
+xprt_socket_autoclose(void *args)
+{
+	struct rpc_xprt *xprt = (struct rpc_xprt *)args;
+
+	xprt_disconnect(xprt);
+	xprt_close(xprt);
+	xprt_release_write(xprt, NULL);
+}
+
+/*
+ * Mark a transport as disconnected
+ */
+static void
+xprt_disconnect(struct rpc_xprt *xprt)
+{
+	dprintk("RPC:      disconnected transport %p\n", xprt);
+	spin_lock_bh(&xprt->sock_lock);
+	xprt_clear_connected(xprt);
+	rpc_wake_up_status(&xprt->pending, -ENOTCONN);
+	spin_unlock_bh(&xprt->sock_lock);
+}
+
+/*
+ * Used to allow disconnection when we've been idle
+ */
+static void
+xprt_init_autodisconnect(unsigned long data)
+{
+	struct rpc_xprt *xprt = (struct rpc_xprt *)data;
+
+	spin_lock(&xprt->sock_lock);
+	if (!list_empty(&xprt->recv) || xprt->shutdown)
+		goto out_abort;
+	if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate))
+		goto out_abort;
+	spin_unlock(&xprt->sock_lock);
+	/* Let keventd close the socket */
+	if (test_bit(XPRT_CONNECTING, &xprt->sockstate) != 0)
+		xprt_release_write(xprt, NULL);
+	else
+		schedule_work(&xprt->task_cleanup);
+	return;
+out_abort:
+	spin_unlock(&xprt->sock_lock);
+}
+
+static void xprt_socket_connect(void *args)
+{
+	struct rpc_xprt *xprt = (struct rpc_xprt *)args;
+	struct socket *sock = xprt->sock;
+	int status = -EIO;
+
+	if (xprt->shutdown || xprt->addr.sin_port == 0)
+		goto out;
+
+	/*
+	 * Start by resetting any existing state
+	 */
+	xprt_close(xprt);
+	sock = xprt_create_socket(xprt, xprt->prot, xprt->resvport);
+	if (sock == NULL) {
+		/* couldn't create socket or bind to reserved port;
+		 * this is likely a permanent error, so cause an abort */
+		goto out;
+	}
+	xprt_bind_socket(xprt, sock);
+	xprt_sock_setbufsize(xprt);
+
+	status = 0;
+	if (!xprt->stream)
+		goto out;
+
+	/*
+	 * Tell the socket layer to start connecting...
+	 */
+	status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr,
+			sizeof(xprt->addr), O_NONBLOCK);
+	dprintk("RPC: %p  connect status %d connected %d sock state %d\n",
+			xprt, -status, xprt_connected(xprt), sock->sk->sk_state);
+	if (status < 0) {
+		switch (status) {
+			case -EINPROGRESS:
+			case -EALREADY:
+				goto out_clear;
+		}
+	}
+out:
+	if (status < 0)
+		rpc_wake_up_status(&xprt->pending, status);
+	else
+		rpc_wake_up(&xprt->pending);
+out_clear:
+	smp_mb__before_clear_bit();
+	clear_bit(XPRT_CONNECTING, &xprt->sockstate);
+	smp_mb__after_clear_bit();
+}
+
+/*
+ * Attempt to connect a TCP socket.
+ *
+ */
+void xprt_connect(struct rpc_task *task)
+{
+	struct rpc_xprt	*xprt = task->tk_xprt;
+
+	dprintk("RPC: %4d xprt_connect xprt %p %s connected\n", task->tk_pid,
+			xprt, (xprt_connected(xprt) ? "is" : "is not"));
+
+	if (xprt->shutdown) {
+		task->tk_status = -EIO;
+		return;
+	}
+	if (!xprt->addr.sin_port) {
+		task->tk_status = -EIO;
+		return;
+	}
+	if (!xprt_lock_write(xprt, task))
+		return;
+	if (xprt_connected(xprt))
+		goto out_write;
+
+	if (task->tk_rqstp)
+		task->tk_rqstp->rq_bytes_sent = 0;
+
+	task->tk_timeout = RPC_CONNECT_TIMEOUT;
+	rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL);
+	if (!test_and_set_bit(XPRT_CONNECTING, &xprt->sockstate)) {
+		/* Note: if we are here due to a dropped connection
+		 * 	 we delay reconnecting by RPC_REESTABLISH_TIMEOUT/HZ
+		 * 	 seconds
+		 */
+		if (xprt->sock != NULL)
+			schedule_delayed_work(&xprt->sock_connect,
+					RPC_REESTABLISH_TIMEOUT);
+		else
+			schedule_work(&xprt->sock_connect);
+	}
+	return;
+ out_write:
+	xprt_release_write(xprt, task);
+}
+
+/*
+ * We arrive here when awoken from waiting on connection establishment.
+ */
+static void
+xprt_connect_status(struct rpc_task *task)
+{
+	struct rpc_xprt	*xprt = task->tk_xprt;
+
+	if (task->tk_status >= 0) {
+		dprintk("RPC: %4d xprt_connect_status: connection established\n",
+				task->tk_pid);
+		return;
+	}
+
+	/* if soft mounted, just cause this RPC to fail */
+	if (RPC_IS_SOFT(task))
+		task->tk_status = -EIO;
+
+	switch (task->tk_status) {
+	case -ECONNREFUSED:
+	case -ECONNRESET:
+	case -ENOTCONN:
+		return;
+	case -ETIMEDOUT:
+		dprintk("RPC: %4d xprt_connect_status: timed out\n",
+				task->tk_pid);
+		break;
+	default:
+		printk(KERN_ERR "RPC: error %d connecting to server %s\n",
+				-task->tk_status, task->tk_client->cl_server);
+	}
+	xprt_release_write(xprt, task);
+}
+
+/*
+ * Look up the RPC request corresponding to a reply, and then lock it.
+ */
+static inline struct rpc_rqst *
+xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid)
+{
+	struct list_head *pos;
+	struct rpc_rqst	*req = NULL;
+
+	list_for_each(pos, &xprt->recv) {
+		struct rpc_rqst *entry = list_entry(pos, struct rpc_rqst, rq_list);
+		if (entry->rq_xid == xid) {
+			req = entry;
+			break;
+		}
+	}
+	return req;
+}
+
+/*
+ * Complete reply received.
+ * The TCP code relies on us to remove the request from xprt->pending.
+ */
+static void
+xprt_complete_rqst(struct rpc_xprt *xprt, struct rpc_rqst *req, int copied)
+{
+	struct rpc_task	*task = req->rq_task;
+	struct rpc_clnt *clnt = task->tk_client;
+
+	/* Adjust congestion window */
+	if (!xprt->nocong) {
+		unsigned timer = task->tk_msg.rpc_proc->p_timer;
+		xprt_adjust_cwnd(xprt, copied);
+		__xprt_put_cong(xprt, req);
+		if (timer) {
+			if (req->rq_ntrans == 1)
+				rpc_update_rtt(clnt->cl_rtt, timer,
+						(long)jiffies - req->rq_xtime);
+			rpc_set_timeo(clnt->cl_rtt, timer, req->rq_ntrans - 1);
+		}
+	}
+
+#ifdef RPC_PROFILE
+	/* Profile only reads for now */
+	if (copied > 1024) {
+		static unsigned long	nextstat;
+		static unsigned long	pkt_rtt, pkt_len, pkt_cnt;
+
+		pkt_cnt++;
+		pkt_len += req->rq_slen + copied;
+		pkt_rtt += jiffies - req->rq_xtime;
+		if (time_before(nextstat, jiffies)) {
+			printk("RPC: %lu %ld cwnd\n", jiffies, xprt->cwnd);
+			printk("RPC: %ld %ld %ld %ld stat\n",
+					jiffies, pkt_cnt, pkt_len, pkt_rtt);
+			pkt_rtt = pkt_len = pkt_cnt = 0;
+			nextstat = jiffies + 5 * HZ;
+		}
+	}
+#endif
+
+	dprintk("RPC: %4d has input (%d bytes)\n", task->tk_pid, copied);
+	list_del_init(&req->rq_list);
+	req->rq_received = req->rq_private_buf.len = copied;
+
+	/* ... and wake up the process. */
+	rpc_wake_up_task(task);
+	return;
+}
+
+static size_t
+skb_read_bits(skb_reader_t *desc, void *to, size_t len)
+{
+	if (len > desc->count)
+		len = desc->count;
+	if (skb_copy_bits(desc->skb, desc->offset, to, len))
+		return 0;
+	desc->count -= len;
+	desc->offset += len;
+	return len;
+}
+
+static size_t
+skb_read_and_csum_bits(skb_reader_t *desc, void *to, size_t len)
+{
+	unsigned int csum2, pos;
+
+	if (len > desc->count)
+		len = desc->count;
+	pos = desc->offset;
+	csum2 = skb_copy_and_csum_bits(desc->skb, pos, to, len, 0);
+	desc->csum = csum_block_add(desc->csum, csum2, pos);
+	desc->count -= len;
+	desc->offset += len;
+	return len;
+}
+
+/*
+ * We have set things up such that we perform the checksum of the UDP
+ * packet in parallel with the copies into the RPC client iovec.  -DaveM
+ */
+int
+csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
+{
+	skb_reader_t desc;
+
+	desc.skb = skb;
+	desc.offset = sizeof(struct udphdr);
+	desc.count = skb->len - desc.offset;
+
+	if (skb->ip_summed == CHECKSUM_UNNECESSARY)
+		goto no_checksum;
+
+	desc.csum = csum_partial(skb->data, desc.offset, skb->csum);
+	xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_and_csum_bits);
+	if (desc.offset != skb->len) {
+		unsigned int csum2;
+		csum2 = skb_checksum(skb, desc.offset, skb->len - desc.offset, 0);
+		desc.csum = csum_block_add(desc.csum, csum2, desc.offset);
+	}
+	if (desc.count)
+		return -1;
+	if ((unsigned short)csum_fold(desc.csum))
+		return -1;
+	return 0;
+no_checksum:
+	xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits);
+	if (desc.count)
+		return -1;
+	return 0;
+}
+
+/*
+ * Input handler for RPC replies. Called from a bottom half and hence
+ * atomic.
+ */
+static void
+udp_data_ready(struct sock *sk, int len)
+{
+	struct rpc_task	*task;
+	struct rpc_xprt	*xprt;
+	struct rpc_rqst *rovr;
+	struct sk_buff	*skb;
+	int err, repsize, copied;
+	u32 _xid, *xp;
+
+	read_lock(&sk->sk_callback_lock);
+	dprintk("RPC:      udp_data_ready...\n");
+	if (!(xprt = xprt_from_sock(sk))) {
+		printk("RPC:      udp_data_ready request not found!\n");
+		goto out;
+	}
+
+	dprintk("RPC:      udp_data_ready client %p\n", xprt);
+
+	if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL)
+		goto out;
+
+	if (xprt->shutdown)
+		goto dropit;
+
+	repsize = skb->len - sizeof(struct udphdr);
+	if (repsize < 4) {
+		printk("RPC: impossible RPC reply size %d!\n", repsize);
+		goto dropit;
+	}
+
+	/* Copy the XID from the skb... */
+	xp = skb_header_pointer(skb, sizeof(struct udphdr),
+				sizeof(_xid), &_xid);
+	if (xp == NULL)
+		goto dropit;
+
+	/* Look up and lock the request corresponding to the given XID */
+	spin_lock(&xprt->sock_lock);
+	rovr = xprt_lookup_rqst(xprt, *xp);
+	if (!rovr)
+		goto out_unlock;
+	task = rovr->rq_task;
+
+	dprintk("RPC: %4d received reply\n", task->tk_pid);
+
+	if ((copied = rovr->rq_private_buf.buflen) > repsize)
+		copied = repsize;
+
+	/* Suck it into the iovec, verify checksum if not done by hw. */
+	if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb))
+		goto out_unlock;
+
+	/* Something worked... */
+	dst_confirm(skb->dst);
+
+	xprt_complete_rqst(xprt, rovr, copied);
+
+ out_unlock:
+	spin_unlock(&xprt->sock_lock);
+ dropit:
+	skb_free_datagram(sk, skb);
+ out:
+	read_unlock(&sk->sk_callback_lock);
+}
+
+/*
+ * Copy from an skb into memory and shrink the skb.
+ */
+static inline size_t
+tcp_copy_data(skb_reader_t *desc, void *p, size_t len)
+{
+	if (len > desc->count)
+		len = desc->count;
+	if (skb_copy_bits(desc->skb, desc->offset, p, len))
+		return 0;
+	desc->offset += len;
+	desc->count -= len;
+	return len;
+}
+
+/*
+ * TCP read fragment marker
+ */
+static inline void
+tcp_read_fraghdr(struct rpc_xprt *xprt, skb_reader_t *desc)
+{
+	size_t len, used;
+	char *p;
+
+	p = ((char *) &xprt->tcp_recm) + xprt->tcp_offset;
+	len = sizeof(xprt->tcp_recm) - xprt->tcp_offset;
+	used = tcp_copy_data(desc, p, len);
+	xprt->tcp_offset += used;
+	if (used != len)
+		return;
+	xprt->tcp_reclen = ntohl(xprt->tcp_recm);
+	if (xprt->tcp_reclen & 0x80000000)
+		xprt->tcp_flags |= XPRT_LAST_FRAG;
+	else
+		xprt->tcp_flags &= ~XPRT_LAST_FRAG;
+	xprt->tcp_reclen &= 0x7fffffff;
+	xprt->tcp_flags &= ~XPRT_COPY_RECM;
+	xprt->tcp_offset = 0;
+	/* Sanity check of the record length */
+	if (xprt->tcp_reclen < 4) {
+		printk(KERN_ERR "RPC: Invalid TCP record fragment length\n");
+		xprt_disconnect(xprt);
+	}
+	dprintk("RPC:      reading TCP record fragment of length %d\n",
+			xprt->tcp_reclen);
+}
+
+static void
+tcp_check_recm(struct rpc_xprt *xprt)
+{
+	if (xprt->tcp_offset == xprt->tcp_reclen) {
+		xprt->tcp_flags |= XPRT_COPY_RECM;
+		xprt->tcp_offset = 0;
+		if (xprt->tcp_flags & XPRT_LAST_FRAG) {
+			xprt->tcp_flags &= ~XPRT_COPY_DATA;
+			xprt->tcp_flags |= XPRT_COPY_XID;
+			xprt->tcp_copied = 0;
+		}
+	}
+}
+
+/*
+ * TCP read xid
+ */
+static inline void
+tcp_read_xid(struct rpc_xprt *xprt, skb_reader_t *desc)
+{
+	size_t len, used;
+	char *p;
+
+	len = sizeof(xprt->tcp_xid) - xprt->tcp_offset;
+	dprintk("RPC:      reading XID (%Zu bytes)\n", len);
+	p = ((char *) &xprt->tcp_xid) + xprt->tcp_offset;
+	used = tcp_copy_data(desc, p, len);
+	xprt->tcp_offset += used;
+	if (used != len)
+		return;
+	xprt->tcp_flags &= ~XPRT_COPY_XID;
+	xprt->tcp_flags |= XPRT_COPY_DATA;
+	xprt->tcp_copied = 4;
+	dprintk("RPC:      reading reply for XID %08x\n",
+						ntohl(xprt->tcp_xid));
+	tcp_check_recm(xprt);
+}
+
+/*
+ * TCP read and complete request
+ */
+static inline void
+tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc)
+{
+	struct rpc_rqst *req;
+	struct xdr_buf *rcvbuf;
+	size_t len;
+
+	/* Find and lock the request corresponding to this xid */
+	spin_lock(&xprt->sock_lock);
+	req = xprt_lookup_rqst(xprt, xprt->tcp_xid);
+	if (!req) {
+		xprt->tcp_flags &= ~XPRT_COPY_DATA;
+		dprintk("RPC:      XID %08x request not found!\n",
+				ntohl(xprt->tcp_xid));
+		spin_unlock(&xprt->sock_lock);
+		return;
+	}
+
+	rcvbuf = &req->rq_private_buf;
+	len = desc->count;
+	if (len > xprt->tcp_reclen - xprt->tcp_offset) {
+		skb_reader_t my_desc;
+
+		len = xprt->tcp_reclen - xprt->tcp_offset;
+		memcpy(&my_desc, desc, sizeof(my_desc));
+		my_desc.count = len;
+		xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied,
+					  &my_desc, tcp_copy_data);
+		desc->count -= len;
+		desc->offset += len;
+	} else
+		xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied,
+					  desc, tcp_copy_data);
+	xprt->tcp_copied += len;
+	xprt->tcp_offset += len;
+
+	if (xprt->tcp_copied == req->rq_private_buf.buflen)
+		xprt->tcp_flags &= ~XPRT_COPY_DATA;
+	else if (xprt->tcp_offset == xprt->tcp_reclen) {
+		if (xprt->tcp_flags & XPRT_LAST_FRAG)
+			xprt->tcp_flags &= ~XPRT_COPY_DATA;
+	}
+
+	if (!(xprt->tcp_flags & XPRT_COPY_DATA)) {
+		dprintk("RPC: %4d received reply complete\n",
+				req->rq_task->tk_pid);
+		xprt_complete_rqst(xprt, req, xprt->tcp_copied);
+	}
+	spin_unlock(&xprt->sock_lock);
+	tcp_check_recm(xprt);
+}
+
+/*
+ * TCP discard extra bytes from a short read
+ */
+static inline void
+tcp_read_discard(struct rpc_xprt *xprt, skb_reader_t *desc)
+{
+	size_t len;
+
+	len = xprt->tcp_reclen - xprt->tcp_offset;
+	if (len > desc->count)
+		len = desc->count;
+	desc->count -= len;
+	desc->offset += len;
+	xprt->tcp_offset += len;
+	tcp_check_recm(xprt);
+}
+
+/*
+ * TCP record receive routine
+ * We first have to grab the record marker, then the XID, then the data.
+ */
+static int
+tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
+		unsigned int offset, size_t len)
+{
+	struct rpc_xprt *xprt = rd_desc->arg.data;
+	skb_reader_t desc = {
+		.skb	= skb,
+		.offset	= offset,
+		.count	= len,
+		.csum	= 0
+       	};
+
+	dprintk("RPC:      tcp_data_recv\n");
+	do {
+		/* Read in a new fragment marker if necessary */
+		/* Can we ever really expect to get completely empty fragments? */
+		if (xprt->tcp_flags & XPRT_COPY_RECM) {
+			tcp_read_fraghdr(xprt, &desc);
+			continue;
+		}
+		/* Read in the xid if necessary */
+		if (xprt->tcp_flags & XPRT_COPY_XID) {
+			tcp_read_xid(xprt, &desc);
+			continue;
+		}
+		/* Read in the request data */
+		if (xprt->tcp_flags & XPRT_COPY_DATA) {
+			tcp_read_request(xprt, &desc);
+			continue;
+		}
+		/* Skip over any trailing bytes on short reads */
+		tcp_read_discard(xprt, &desc);
+	} while (desc.count);
+	dprintk("RPC:      tcp_data_recv done\n");
+	return len - desc.count;
+}
+
+static void tcp_data_ready(struct sock *sk, int bytes)
+{
+	struct rpc_xprt *xprt;
+	read_descriptor_t rd_desc;
+
+	read_lock(&sk->sk_callback_lock);
+	dprintk("RPC:      tcp_data_ready...\n");
+	if (!(xprt = xprt_from_sock(sk))) {
+		printk("RPC:      tcp_data_ready socket info not found!\n");
+		goto out;
+	}
+	if (xprt->shutdown)
+		goto out;
+
+	/* We use rd_desc to pass struct xprt to tcp_data_recv */
+	rd_desc.arg.data = xprt;
+	rd_desc.count = 65536;
+	tcp_read_sock(sk, &rd_desc, tcp_data_recv);
+out:
+	read_unlock(&sk->sk_callback_lock);
+}
+
+static void
+tcp_state_change(struct sock *sk)
+{
+	struct rpc_xprt	*xprt;
+
+	read_lock(&sk->sk_callback_lock);
+	if (!(xprt = xprt_from_sock(sk)))
+		goto out;
+	dprintk("RPC:      tcp_state_change client %p...\n", xprt);
+	dprintk("RPC:      state %x conn %d dead %d zapped %d\n",
+				sk->sk_state, xprt_connected(xprt),
+				sock_flag(sk, SOCK_DEAD),
+				sock_flag(sk, SOCK_ZAPPED));
+
+	switch (sk->sk_state) {
+	case TCP_ESTABLISHED:
+		spin_lock_bh(&xprt->sock_lock);
+		if (!xprt_test_and_set_connected(xprt)) {
+			/* Reset TCP record info */
+			xprt->tcp_offset = 0;
+			xprt->tcp_reclen = 0;
+			xprt->tcp_copied = 0;
+			xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID;
+			rpc_wake_up(&xprt->pending);
+		}
+		spin_unlock_bh(&xprt->sock_lock);
+		break;
+	case TCP_SYN_SENT:
+	case TCP_SYN_RECV:
+		break;
+	default:
+		if (xprt_test_and_clear_connected(xprt))
+			rpc_wake_up_status(&xprt->pending, -ENOTCONN);
+		break;
+	}
+ out:
+	read_unlock(&sk->sk_callback_lock);
+}
+
+/*
+ * Called when more output buffer space is available for this socket.
+ * We try not to wake our writers until they can make "significant"
+ * progress, otherwise we'll waste resources thrashing sock_sendmsg
+ * with a bunch of small requests.
+ */
+static void
+xprt_write_space(struct sock *sk)
+{
+	struct rpc_xprt	*xprt;
+	struct socket	*sock;
+
+	read_lock(&sk->sk_callback_lock);
+	if (!(xprt = xprt_from_sock(sk)) || !(sock = sk->sk_socket))
+		goto out;
+	if (xprt->shutdown)
+		goto out;
+
+	/* Wait until we have enough socket memory */
+	if (xprt->stream) {
+		/* from net/core/stream.c:sk_stream_write_space */
+		if (sk_stream_wspace(sk) < sk_stream_min_wspace(sk))
+			goto out;
+	} else {
+		/* from net/core/sock.c:sock_def_write_space */
+		if (!sock_writeable(sk))
+			goto out;
+	}
+
+	if (!test_and_clear_bit(SOCK_NOSPACE, &sock->flags))
+		goto out;
+
+	spin_lock_bh(&xprt->sock_lock);
+	if (xprt->snd_task)
+		rpc_wake_up_task(xprt->snd_task);
+	spin_unlock_bh(&xprt->sock_lock);
+out:
+	read_unlock(&sk->sk_callback_lock);
+}
+
+/*
+ * RPC receive timeout handler.
+ */
+static void
+xprt_timer(struct rpc_task *task)
+{
+	struct rpc_rqst	*req = task->tk_rqstp;
+	struct rpc_xprt *xprt = req->rq_xprt;
+
+	spin_lock(&xprt->sock_lock);
+	if (req->rq_received)
+		goto out;
+
+	xprt_adjust_cwnd(req->rq_xprt, -ETIMEDOUT);
+	__xprt_put_cong(xprt, req);
+
+	dprintk("RPC: %4d xprt_timer (%s request)\n",
+		task->tk_pid, req ? "pending" : "backlogged");
+
+	task->tk_status  = -ETIMEDOUT;
+out:
+	task->tk_timeout = 0;
+	rpc_wake_up_task(task);
+	spin_unlock(&xprt->sock_lock);
+}
+
+/*
+ * Place the actual RPC call.
+ * We have to copy the iovec because sendmsg fiddles with its contents.
+ */
+int
+xprt_prepare_transmit(struct rpc_task *task)
+{
+	struct rpc_rqst	*req = task->tk_rqstp;
+	struct rpc_xprt	*xprt = req->rq_xprt;
+	int err = 0;
+
+	dprintk("RPC: %4d xprt_prepare_transmit\n", task->tk_pid);
+
+	if (xprt->shutdown)
+		return -EIO;
+
+	spin_lock_bh(&xprt->sock_lock);
+	if (req->rq_received && !req->rq_bytes_sent) {
+		err = req->rq_received;
+		goto out_unlock;
+	}
+	if (!__xprt_lock_write(xprt, task)) {
+		err = -EAGAIN;
+		goto out_unlock;
+	}
+
+	if (!xprt_connected(xprt)) {
+		err = -ENOTCONN;
+		goto out_unlock;
+	}
+out_unlock:
+	spin_unlock_bh(&xprt->sock_lock);
+	return err;
+}
+
+void
+xprt_transmit(struct rpc_task *task)
+{
+	struct rpc_clnt *clnt = task->tk_client;
+	struct rpc_rqst	*req = task->tk_rqstp;
+	struct rpc_xprt	*xprt = req->rq_xprt;
+	int status, retry = 0;
+
+
+	dprintk("RPC: %4d xprt_transmit(%u)\n", task->tk_pid, req->rq_slen);
+
+	/* set up everything as needed. */
+	/* Write the record marker */
+	if (xprt->stream) {
+		u32	*marker = req->rq_svec[0].iov_base;
+
+		*marker = htonl(0x80000000|(req->rq_slen-sizeof(*marker)));
+	}
+
+	smp_rmb();
+	if (!req->rq_received) {
+		if (list_empty(&req->rq_list)) {
+			spin_lock_bh(&xprt->sock_lock);
+			/* Update the softirq receive buffer */
+			memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
+					sizeof(req->rq_private_buf));
+			/* Add request to the receive list */
+			list_add_tail(&req->rq_list, &xprt->recv);
+			spin_unlock_bh(&xprt->sock_lock);
+			xprt_reset_majortimeo(req);
+		}
+	} else if (!req->rq_bytes_sent)
+		return;
+
+	/* Continue transmitting the packet/record. We must be careful
+	 * to cope with writespace callbacks arriving _after_ we have
+	 * called xprt_sendmsg().
+	 */
+	while (1) {
+		req->rq_xtime = jiffies;
+		status = xprt_sendmsg(xprt, req);
+
+		if (status < 0)
+			break;
+
+		if (xprt->stream) {
+			req->rq_bytes_sent += status;
+
+			/* If we've sent the entire packet, immediately
+			 * reset the count of bytes sent. */
+			if (req->rq_bytes_sent >= req->rq_slen) {
+				req->rq_bytes_sent = 0;
+				goto out_receive;
+			}
+		} else {
+			if (status >= req->rq_slen)
+				goto out_receive;
+			status = -EAGAIN;
+			break;
+		}
+
+		dprintk("RPC: %4d xmit incomplete (%d left of %d)\n",
+				task->tk_pid, req->rq_slen - req->rq_bytes_sent,
+				req->rq_slen);
+
+		status = -EAGAIN;
+		if (retry++ > 50)
+			break;
+	}
+
+	/* Note: at this point, task->tk_sleeping has not yet been set,
+	 *	 hence there is no danger of the waking up task being put on
+	 *	 schedq, and being picked up by a parallel run of rpciod().
+	 */
+	task->tk_status = status;
+
+	switch (status) {
+	case -EAGAIN:
+		if (test_bit(SOCK_ASYNC_NOSPACE, &xprt->sock->flags)) {
+			/* Protect against races with xprt_write_space */
+			spin_lock_bh(&xprt->sock_lock);
+			/* Don't race with disconnect */
+			if (!xprt_connected(xprt))
+				task->tk_status = -ENOTCONN;
+			else if (test_bit(SOCK_NOSPACE, &xprt->sock->flags)) {
+				task->tk_timeout = req->rq_timeout;
+				rpc_sleep_on(&xprt->pending, task, NULL, NULL);
+			}
+			spin_unlock_bh(&xprt->sock_lock);
+			return;
+		}
+		/* Keep holding the socket if it is blocked */
+		rpc_delay(task, HZ>>4);
+		return;
+	case -ECONNREFUSED:
+		task->tk_timeout = RPC_REESTABLISH_TIMEOUT;
+		rpc_sleep_on(&xprt->sending, task, NULL, NULL);
+	case -ENOTCONN:
+		return;
+	default:
+		if (xprt->stream)
+			xprt_disconnect(xprt);
+	}
+	xprt_release_write(xprt, task);
+	return;
+ out_receive:
+	dprintk("RPC: %4d xmit complete\n", task->tk_pid);
+	/* Set the task's receive timeout value */
+	spin_lock_bh(&xprt->sock_lock);
+	if (!xprt->nocong) {
+		int timer = task->tk_msg.rpc_proc->p_timer;
+		task->tk_timeout = rpc_calc_rto(clnt->cl_rtt, timer);
+		task->tk_timeout <<= rpc_ntimeo(clnt->cl_rtt, timer) + req->rq_retries;
+		if (task->tk_timeout > xprt->timeout.to_maxval || task->tk_timeout == 0)
+			task->tk_timeout = xprt->timeout.to_maxval;
+	} else
+		task->tk_timeout = req->rq_timeout;
+	/* Don't race with disconnect */
+	if (!xprt_connected(xprt))
+		task->tk_status = -ENOTCONN;
+	else if (!req->rq_received)
+		rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer);
+	__xprt_release_write(xprt, task);
+	spin_unlock_bh(&xprt->sock_lock);
+}
+
+/*
+ * Reserve an RPC call slot.
+ */
+static inline void
+do_xprt_reserve(struct rpc_task *task)
+{
+	struct rpc_xprt	*xprt = task->tk_xprt;
+
+	task->tk_status = 0;
+	if (task->tk_rqstp)
+		return;
+	if (!list_empty(&xprt->free)) {
+		struct rpc_rqst	*req = list_entry(xprt->free.next, struct rpc_rqst, rq_list);
+		list_del_init(&req->rq_list);
+		task->tk_rqstp = req;
+		xprt_request_init(task, xprt);
+		return;
+	}
+	dprintk("RPC:      waiting for request slot\n");
+	task->tk_status = -EAGAIN;
+	task->tk_timeout = 0;
+	rpc_sleep_on(&xprt->backlog, task, NULL, NULL);
+}
+
+void
+xprt_reserve(struct rpc_task *task)
+{
+	struct rpc_xprt	*xprt = task->tk_xprt;
+
+	task->tk_status = -EIO;
+	if (!xprt->shutdown) {
+		spin_lock(&xprt->xprt_lock);
+		do_xprt_reserve(task);
+		spin_unlock(&xprt->xprt_lock);
+		if (task->tk_rqstp)
+			del_timer_sync(&xprt->timer);
+	}
+}
+
+/*
+ * Allocate a 'unique' XID
+ */
+static inline u32 xprt_alloc_xid(struct rpc_xprt *xprt)
+{
+	return xprt->xid++;
+}
+
+static inline void xprt_init_xid(struct rpc_xprt *xprt)
+{
+	get_random_bytes(&xprt->xid, sizeof(xprt->xid));
+}
+
+/*
+ * Initialize RPC request
+ */
+static void
+xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
+{
+	struct rpc_rqst	*req = task->tk_rqstp;
+
+	req->rq_timeout = xprt->timeout.to_initval;
+	req->rq_task	= task;
+	req->rq_xprt    = xprt;
+	req->rq_xid     = xprt_alloc_xid(xprt);
+	dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid,
+			req, ntohl(req->rq_xid));
+}
+
+/*
+ * Release an RPC call slot
+ */
+void
+xprt_release(struct rpc_task *task)
+{
+	struct rpc_xprt	*xprt = task->tk_xprt;
+	struct rpc_rqst	*req;
+
+	if (!(req = task->tk_rqstp))
+		return;
+	spin_lock_bh(&xprt->sock_lock);
+	__xprt_release_write(xprt, task);
+	__xprt_put_cong(xprt, req);
+	if (!list_empty(&req->rq_list))
+		list_del(&req->rq_list);
+	xprt->last_used = jiffies;
+	if (list_empty(&xprt->recv) && !xprt->shutdown)
+		mod_timer(&xprt->timer, xprt->last_used + XPRT_IDLE_TIMEOUT);
+	spin_unlock_bh(&xprt->sock_lock);
+	task->tk_rqstp = NULL;
+	memset(req, 0, sizeof(*req));	/* mark unused */
+
+	dprintk("RPC: %4d release request %p\n", task->tk_pid, req);
+
+	spin_lock(&xprt->xprt_lock);
+	list_add(&req->rq_list, &xprt->free);
+	xprt_clear_backlog(xprt);
+	spin_unlock(&xprt->xprt_lock);
+}
+
+/*
+ * Set default timeout parameters
+ */
+static void
+xprt_default_timeout(struct rpc_timeout *to, int proto)
+{
+	if (proto == IPPROTO_UDP)
+		xprt_set_timeout(to, 5,  5 * HZ);
+	else
+		xprt_set_timeout(to, 5, 60 * HZ);
+}
+
+/*
+ * Set constant timeout
+ */
+void
+xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr)
+{
+	to->to_initval   = 
+	to->to_increment = incr;
+	to->to_maxval    = incr * retr;
+	to->to_retries   = retr;
+	to->to_exponential = 0;
+}
+
+unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE;
+unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE;
+
+/*
+ * Initialize an RPC client
+ */
+static struct rpc_xprt *
+xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to)
+{
+	struct rpc_xprt	*xprt;
+	unsigned int entries;
+	size_t slot_table_size;
+	struct rpc_rqst	*req;
+
+	dprintk("RPC:      setting up %s transport...\n",
+				proto == IPPROTO_UDP? "UDP" : "TCP");
+
+	entries = (proto == IPPROTO_TCP)?
+		xprt_tcp_slot_table_entries : xprt_udp_slot_table_entries;
+
+	if ((xprt = kmalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL)
+		return ERR_PTR(-ENOMEM);
+	memset(xprt, 0, sizeof(*xprt)); /* Nnnngh! */
+	xprt->max_reqs = entries;
+	slot_table_size = entries * sizeof(xprt->slot[0]);
+	xprt->slot = kmalloc(slot_table_size, GFP_KERNEL);
+	if (xprt->slot == NULL) {
+		kfree(xprt);
+		return ERR_PTR(-ENOMEM);
+	}
+	memset(xprt->slot, 0, slot_table_size);
+
+	xprt->addr = *ap;
+	xprt->prot = proto;
+	xprt->stream = (proto == IPPROTO_TCP)? 1 : 0;
+	if (xprt->stream) {
+		xprt->cwnd = RPC_MAXCWND(xprt);
+		xprt->nocong = 1;
+		xprt->max_payload = (1U << 31) - 1;
+	} else {
+		xprt->cwnd = RPC_INITCWND;
+		xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
+	}
+	spin_lock_init(&xprt->sock_lock);
+	spin_lock_init(&xprt->xprt_lock);
+	init_waitqueue_head(&xprt->cong_wait);
+
+	INIT_LIST_HEAD(&xprt->free);
+	INIT_LIST_HEAD(&xprt->recv);
+	INIT_WORK(&xprt->sock_connect, xprt_socket_connect, xprt);
+	INIT_WORK(&xprt->task_cleanup, xprt_socket_autoclose, xprt);
+	init_timer(&xprt->timer);
+	xprt->timer.function = xprt_init_autodisconnect;
+	xprt->timer.data = (unsigned long) xprt;
+	xprt->last_used = jiffies;
+	xprt->port = XPRT_MAX_RESVPORT;
+
+	/* Set timeout parameters */
+	if (to) {
+		xprt->timeout = *to;
+	} else
+		xprt_default_timeout(&xprt->timeout, xprt->prot);
+
+	rpc_init_wait_queue(&xprt->pending, "xprt_pending");
+	rpc_init_wait_queue(&xprt->sending, "xprt_sending");
+	rpc_init_wait_queue(&xprt->resend, "xprt_resend");
+	rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog");
+
+	/* initialize free list */
+	for (req = &xprt->slot[entries-1]; req >= &xprt->slot[0]; req--)
+		list_add(&req->rq_list, &xprt->free);
+
+	xprt_init_xid(xprt);
+
+	/* Check whether we want to use a reserved port */
+	xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0;
+
+	dprintk("RPC:      created transport %p with %u slots\n", xprt,
+			xprt->max_reqs);
+	
+	return xprt;
+}
+
+/*
+ * Bind to a reserved port
+ */
+static inline int xprt_bindresvport(struct rpc_xprt *xprt, struct socket *sock)
+{
+	struct sockaddr_in myaddr = {
+		.sin_family = AF_INET,
+	};
+	int		err, port;
+
+	/* Were we already bound to a given port? Try to reuse it */
+	port = xprt->port;
+	do {
+		myaddr.sin_port = htons(port);
+		err = sock->ops->bind(sock, (struct sockaddr *) &myaddr,
+						sizeof(myaddr));
+		if (err == 0) {
+			xprt->port = port;
+			return 0;
+		}
+		if (--port == 0)
+			port = XPRT_MAX_RESVPORT;
+	} while (err == -EADDRINUSE && port != xprt->port);
+
+	printk("RPC: Can't bind to reserved port (%d).\n", -err);
+	return err;
+}
+
+static void
+xprt_bind_socket(struct rpc_xprt *xprt, struct socket *sock)
+{
+	struct sock	*sk = sock->sk;
+
+	if (xprt->inet)
+		return;
+
+	write_lock_bh(&sk->sk_callback_lock);
+	sk->sk_user_data = xprt;
+	xprt->old_data_ready = sk->sk_data_ready;
+	xprt->old_state_change = sk->sk_state_change;
+	xprt->old_write_space = sk->sk_write_space;
+	if (xprt->prot == IPPROTO_UDP) {
+		sk->sk_data_ready = udp_data_ready;
+		sk->sk_no_check = UDP_CSUM_NORCV;
+		xprt_set_connected(xprt);
+	} else {
+		tcp_sk(sk)->nonagle = 1;	/* disable Nagle's algorithm */
+		sk->sk_data_ready = tcp_data_ready;
+		sk->sk_state_change = tcp_state_change;
+		xprt_clear_connected(xprt);
+	}
+	sk->sk_write_space = xprt_write_space;
+
+	/* Reset to new socket */
+	xprt->sock = sock;
+	xprt->inet = sk;
+	write_unlock_bh(&sk->sk_callback_lock);
+
+	return;
+}
+
+/*
+ * Set socket buffer length
+ */
+void
+xprt_sock_setbufsize(struct rpc_xprt *xprt)
+{
+	struct sock *sk = xprt->inet;
+
+	if (xprt->stream)
+		return;
+	if (xprt->rcvsize) {
+		sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
+		sk->sk_rcvbuf = xprt->rcvsize * xprt->max_reqs *  2;
+	}
+	if (xprt->sndsize) {
+		sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
+		sk->sk_sndbuf = xprt->sndsize * xprt->max_reqs * 2;
+		sk->sk_write_space(sk);
+	}
+}
+
+/*
+ * Datastream sockets are created here, but xprt_connect will create
+ * and connect stream sockets.
+ */
+static struct socket * xprt_create_socket(struct rpc_xprt *xprt, int proto, int resvport)
+{
+	struct socket	*sock;
+	int		type, err;
+
+	dprintk("RPC:      xprt_create_socket(%s %d)\n",
+			   (proto == IPPROTO_UDP)? "udp" : "tcp", proto);
+
+	type = (proto == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
+
+	if ((err = sock_create_kern(PF_INET, type, proto, &sock)) < 0) {
+		printk("RPC: can't create socket (%d).\n", -err);
+		return NULL;
+	}
+
+	/* If the caller has the capability, bind to a reserved port */
+	if (resvport && xprt_bindresvport(xprt, sock) < 0) {
+		printk("RPC: can't bind to reserved port.\n");
+		goto failed;
+	}
+
+	return sock;
+
+failed:
+	sock_release(sock);
+	return NULL;
+}
+
+/*
+ * Create an RPC client transport given the protocol and peer address.
+ */
+struct rpc_xprt *
+xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to)
+{
+	struct rpc_xprt	*xprt;
+
+	xprt = xprt_setup(proto, sap, to);
+	if (IS_ERR(xprt))
+		dprintk("RPC:      xprt_create_proto failed\n");
+	else
+		dprintk("RPC:      xprt_create_proto created xprt %p\n", xprt);
+	return xprt;
+}
+
+/*
+ * Prepare for transport shutdown.
+ */
+static void
+xprt_shutdown(struct rpc_xprt *xprt)
+{
+	xprt->shutdown = 1;
+	rpc_wake_up(&xprt->sending);
+	rpc_wake_up(&xprt->resend);
+	rpc_wake_up(&xprt->pending);
+	rpc_wake_up(&xprt->backlog);
+	wake_up(&xprt->cong_wait);
+	del_timer_sync(&xprt->timer);
+}
+
+/*
+ * Clear the xprt backlog queue
+ */
+static int
+xprt_clear_backlog(struct rpc_xprt *xprt) {
+	rpc_wake_up_next(&xprt->backlog);
+	wake_up(&xprt->cong_wait);
+	return 1;
+}
+
+/*
+ * Destroy an RPC transport, killing off all requests.
+ */
+int
+xprt_destroy(struct rpc_xprt *xprt)
+{
+	dprintk("RPC:      destroying transport %p\n", xprt);
+	xprt_shutdown(xprt);
+	xprt_disconnect(xprt);
+	xprt_close(xprt);
+	kfree(xprt->slot);
+	kfree(xprt);
+
+	return 0;
+}
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
new file mode 100644
index 00000000000..3f6e31069c5
--- /dev/null
+++ b/net/sysctl_net.c
@@ -0,0 +1,65 @@
+/* -*- linux-c -*-
+ * sysctl_net.c: sysctl interface to net subsystem.
+ *
+ * Begun April 1, 1996, Mike Shaver.
+ * Added /proc/sys/net directories for each protocol family. [MS]
+ *
+ * $Log: sysctl_net.c,v $
+ * Revision 1.2  1996/05/08  20:24:40  shaver
+ * Added bits for NET_BRIDGE and the NET_IPV4_ARP stuff and
+ * NET_IPV4_IP_FORWARD.
+ *
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/sysctl.h>
+
+#ifdef CONFIG_INET
+extern struct ctl_table ipv4_table[];
+#endif
+
+extern struct ctl_table core_table[];
+
+#ifdef CONFIG_NET
+extern struct ctl_table ether_table[];
+#endif
+
+#ifdef CONFIG_TR
+extern struct ctl_table tr_table[];
+#endif
+
+struct ctl_table net_table[] = {
+	{
+		.ctl_name	= NET_CORE,
+		.procname	= "core",
+		.mode		= 0555,
+		.child		= core_table,
+	},
+#ifdef CONFIG_NET
+	{
+		.ctl_name	= NET_ETHER,
+		.procname	= "ethernet",
+		.mode		= 0555,
+		.child		= ether_table,
+	},
+#endif
+#ifdef CONFIG_INET
+	{
+		.ctl_name	= NET_IPV4,
+		.procname	= "ipv4",
+		.mode		= 0555,
+		.child		= ipv4_table
+	},
+#endif
+#ifdef CONFIG_TR
+	{
+		.ctl_name	= NET_TR,
+		.procname	= "token-ring",
+		.mode		= 0555,
+		.child		= tr_table,
+	},
+#endif
+	{ 0 },
+};
diff --git a/net/unix/Makefile b/net/unix/Makefile
new file mode 100644
index 00000000000..b852a2bde9a
--- /dev/null
+++ b/net/unix/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile for the Linux unix domain socket layer.
+#
+
+obj-$(CONFIG_UNIX)	+= unix.o
+
+unix-y			:= af_unix.o garbage.o
+unix-$(CONFIG_SYSCTL)	+= sysctl_net_unix.o
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
new file mode 100644
index 00000000000..acc73fe6869
--- /dev/null
+++ b/net/unix/af_unix.c
@@ -0,0 +1,2098 @@
+/*
+ * NET4:	Implementation of BSD Unix domain sockets.
+ *
+ * Authors:	Alan Cox, <alan.cox@linux.org>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Version:	$Id: af_unix.c,v 1.133 2002/02/08 03:57:19 davem Exp $
+ *
+ * Fixes:
+ *		Linus Torvalds	:	Assorted bug cures.
+ *		Niibe Yutaka	:	async I/O support.
+ *		Carsten Paeth	:	PF_UNIX check, address fixes.
+ *		Alan Cox	:	Limit size of allocated blocks.
+ *		Alan Cox	:	Fixed the stupid socketpair bug.
+ *		Alan Cox	:	BSD compatibility fine tuning.
+ *		Alan Cox	:	Fixed a bug in connect when interrupted.
+ *		Alan Cox	:	Sorted out a proper draft version of
+ *					file descriptor passing hacked up from
+ *					Mike Shaver's work.
+ *		Marty Leisner	:	Fixes to fd passing
+ *		Nick Nevin	:	recvmsg bugfix.
+ *		Alan Cox	:	Started proper garbage collector
+ *		Heiko EiBfeldt	:	Missing verify_area check
+ *		Alan Cox	:	Started POSIXisms
+ *		Andreas Schwab	:	Replace inode by dentry for proper
+ *					reference counting
+ *		Kirk Petersen	:	Made this a module
+ *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
+ *					Lots of bug fixes.
+ *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
+ *					by above two patches.
+ *	     Andrea Arcangeli	:	If possible we block in connect(2)
+ *					if the max backlog of the listen socket
+ *					is been reached. This won't break
+ *					old apps and it will avoid huge amount
+ *					of socks hashed (this for unix_gc()
+ *					performances reasons).
+ *					Security fix that limits the max
+ *					number of socks to 2*max_files and
+ *					the number of skb queueable in the
+ *					dgram receiver.
+ *		Artur Skawina   :	Hash function optimizations
+ *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
+ *	      Malcolm Beattie   :	Set peercred for socketpair
+ *	     Michal Ostrowski   :       Module initialization cleanup.
+ *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
+ *	     				the core infrastructure is doing that
+ *	     				for all net proto families now (2.5.69+)
+ *
+ *
+ * Known differences from reference BSD that was tested:
+ *
+ *	[TO FIX]
+ *	ECONNREFUSED is not returned from one end of a connected() socket to the
+ *		other the moment one end closes.
+ *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
+ *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
+ *	[NOT TO FIX]
+ *	accept() returns a path name even if the connecting socket has closed
+ *		in the meantime (BSD loses the path and gives up).
+ *	accept() returns 0 length path for an unbound connector. BSD returns 16
+ *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
+ *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
+ *	BSD af_unix apparently has connect forgetting to block properly.
+ *		(need to check this with the POSIX spec in detail)
+ *
+ * Differences from 2.0.0-11-... (ANK)
+ *	Bug fixes and improvements.
+ *		- client shutdown killed server socket.
+ *		- removed all useless cli/sti pairs.
+ *
+ *	Semantic changes/extensions.
+ *		- generic control message passing.
+ *		- SCM_CREDENTIALS control message.
+ *		- "Abstract" (not FS based) socket bindings.
+ *		  Abstract names are sequences of bytes (not zero terminated)
+ *		  started by 0, so that this name space does not intersect
+ *		  with BSD names.
+ */
+
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/dcache.h>
+#include <linux/namei.h>
+#include <linux/socket.h>
+#include <linux/un.h>
+#include <linux/fcntl.h>
+#include <linux/termios.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/in.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <asm/uaccess.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <net/sock.h>
+#include <linux/tcp.h>
+#include <net/af_unix.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <net/scm.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/smp_lock.h>
+#include <linux/rtnetlink.h>
+#include <linux/mount.h>
+#include <net/checksum.h>
+#include <linux/security.h>
+
+int sysctl_unix_max_dgram_qlen = 10;
+
+struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
+DEFINE_RWLOCK(unix_table_lock);
+static atomic_t unix_nr_socks = ATOMIC_INIT(0);
+
+#define unix_sockets_unbound	(&unix_socket_table[UNIX_HASH_SIZE])
+
+#define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
+
+/*
+ *  SMP locking strategy:
+ *    hash table is protected with rwlock unix_table_lock
+ *    each socket state is protected by separate rwlock.
+ */
+
+static inline unsigned unix_hash_fold(unsigned hash)
+{
+	hash ^= hash>>16;
+	hash ^= hash>>8;
+	return hash&(UNIX_HASH_SIZE-1);
+}
+
+#define unix_peer(sk) (unix_sk(sk)->peer)
+
+static inline int unix_our_peer(struct sock *sk, struct sock *osk)
+{
+	return unix_peer(osk) == sk;
+}
+
+static inline int unix_may_send(struct sock *sk, struct sock *osk)
+{
+	return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
+}
+
+static struct sock *unix_peer_get(struct sock *s)
+{
+	struct sock *peer;
+
+	unix_state_rlock(s);
+	peer = unix_peer(s);
+	if (peer)
+		sock_hold(peer);
+	unix_state_runlock(s);
+	return peer;
+}
+
+static inline void unix_release_addr(struct unix_address *addr)
+{
+	if (atomic_dec_and_test(&addr->refcnt))
+		kfree(addr);
+}
+
+/*
+ *	Check unix socket name:
+ *		- should be not zero length.
+ *	        - if started by not zero, should be NULL terminated (FS object)
+ *		- if started by zero, it is abstract name.
+ */
+ 
+static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
+{
+	if (len <= sizeof(short) || len > sizeof(*sunaddr))
+		return -EINVAL;
+	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
+		return -EINVAL;
+	if (sunaddr->sun_path[0]) {
+		/*
+		 * This may look like an off by one error but it is a bit more
+		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
+		 * sun_path[108] doesnt as such exist.  However in kernel space
+		 * we are guaranteed that it is a valid memory location in our
+		 * kernel address buffer.
+		 */
+		((char *)sunaddr)[len]=0;
+		len = strlen(sunaddr->sun_path)+1+sizeof(short);
+		return len;
+	}
+
+	*hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0));
+	return len;
+}
+
+static void __unix_remove_socket(struct sock *sk)
+{
+	sk_del_node_init(sk);
+}
+
+static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
+{
+	BUG_TRAP(sk_unhashed(sk));
+	sk_add_node(sk, list);
+}
+
+static inline void unix_remove_socket(struct sock *sk)
+{
+	write_lock(&unix_table_lock);
+	__unix_remove_socket(sk);
+	write_unlock(&unix_table_lock);
+}
+
+static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
+{
+	write_lock(&unix_table_lock);
+	__unix_insert_socket(list, sk);
+	write_unlock(&unix_table_lock);
+}
+
+static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname,
+					      int len, int type, unsigned hash)
+{
+	struct sock *s;
+	struct hlist_node *node;
+
+	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
+		struct unix_sock *u = unix_sk(s);
+
+		if (u->addr->len == len &&
+		    !memcmp(u->addr->name, sunname, len))
+			goto found;
+	}
+	s = NULL;
+found:
+	return s;
+}
+
+static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname,
+						   int len, int type,
+						   unsigned hash)
+{
+	struct sock *s;
+
+	read_lock(&unix_table_lock);
+	s = __unix_find_socket_byname(sunname, len, type, hash);
+	if (s)
+		sock_hold(s);
+	read_unlock(&unix_table_lock);
+	return s;
+}
+
+static struct sock *unix_find_socket_byinode(struct inode *i)
+{
+	struct sock *s;
+	struct hlist_node *node;
+
+	read_lock(&unix_table_lock);
+	sk_for_each(s, node,
+		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
+		struct dentry *dentry = unix_sk(s)->dentry;
+
+		if(dentry && dentry->d_inode == i)
+		{
+			sock_hold(s);
+			goto found;
+		}
+	}
+	s = NULL;
+found:
+	read_unlock(&unix_table_lock);
+	return s;
+}
+
+static inline int unix_writable(struct sock *sk)
+{
+	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
+}
+
+static void unix_write_space(struct sock *sk)
+{
+	read_lock(&sk->sk_callback_lock);
+	if (unix_writable(sk)) {
+		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+			wake_up_interruptible(sk->sk_sleep);
+		sk_wake_async(sk, 2, POLL_OUT);
+	}
+	read_unlock(&sk->sk_callback_lock);
+}
+
+/* When dgram socket disconnects (or changes its peer), we clear its receive
+ * queue of packets arrived from previous peer. First, it allows to do
+ * flow control based only on wmem_alloc; second, sk connected to peer
+ * may receive messages only from that peer. */
+static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
+{
+	if (skb_queue_len(&sk->sk_receive_queue)) {
+		skb_queue_purge(&sk->sk_receive_queue);
+		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
+
+		/* If one link of bidirectional dgram pipe is disconnected,
+		 * we signal error. Messages are lost. Do not make this,
+		 * when peer was not connected to us.
+		 */
+		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
+			other->sk_err = ECONNRESET;
+			other->sk_error_report(other);
+		}
+	}
+}
+
+static void unix_sock_destructor(struct sock *sk)
+{
+	struct unix_sock *u = unix_sk(sk);
+
+	skb_queue_purge(&sk->sk_receive_queue);
+
+	BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
+	BUG_TRAP(sk_unhashed(sk));
+	BUG_TRAP(!sk->sk_socket);
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		printk("Attempt to release alive unix socket: %p\n", sk);
+		return;
+	}
+
+	if (u->addr)
+		unix_release_addr(u->addr);
+
+	atomic_dec(&unix_nr_socks);
+#ifdef UNIX_REFCNT_DEBUG
+	printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));
+#endif
+}
+
+static int unix_release_sock (struct sock *sk, int embrion)
+{
+	struct unix_sock *u = unix_sk(sk);
+	struct dentry *dentry;
+	struct vfsmount *mnt;
+	struct sock *skpair;
+	struct sk_buff *skb;
+	int state;
+
+	unix_remove_socket(sk);
+
+	/* Clear state */
+	unix_state_wlock(sk);
+	sock_orphan(sk);
+	sk->sk_shutdown = SHUTDOWN_MASK;
+	dentry	     = u->dentry;
+	u->dentry    = NULL;
+	mnt	     = u->mnt;
+	u->mnt	     = NULL;
+	state = sk->sk_state;
+	sk->sk_state = TCP_CLOSE;
+	unix_state_wunlock(sk);
+
+	wake_up_interruptible_all(&u->peer_wait);
+
+	skpair=unix_peer(sk);
+
+	if (skpair!=NULL) {
+		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
+			unix_state_wlock(skpair);
+			/* No more writes */
+			skpair->sk_shutdown = SHUTDOWN_MASK;
+			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
+				skpair->sk_err = ECONNRESET;
+			unix_state_wunlock(skpair);
+			skpair->sk_state_change(skpair);
+			read_lock(&skpair->sk_callback_lock);
+			sk_wake_async(skpair,1,POLL_HUP);
+			read_unlock(&skpair->sk_callback_lock);
+		}
+		sock_put(skpair); /* It may now die */
+		unix_peer(sk) = NULL;
+	}
+
+	/* Try to flush out this socket. Throw out buffers at least */
+
+	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+		if (state==TCP_LISTEN)
+			unix_release_sock(skb->sk, 1);
+		/* passed fds are erased in the kfree_skb hook	      */
+		kfree_skb(skb);
+	}
+
+	if (dentry) {
+		dput(dentry);
+		mntput(mnt);
+	}
+
+	sock_put(sk);
+
+	/* ---- Socket is dead now and most probably destroyed ---- */
+
+	/*
+	 * Fixme: BSD difference: In BSD all sockets connected to use get
+	 *	  ECONNRESET and we die on the spot. In Linux we behave
+	 *	  like files and pipes do and wait for the last
+	 *	  dereference.
+	 *
+	 * Can't we simply set sock->err?
+	 *
+	 *	  What the above comment does talk about? --ANK(980817)
+	 */
+
+	if (atomic_read(&unix_tot_inflight))
+		unix_gc();		/* Garbage collect fds */	
+
+	return 0;
+}
+
+static int unix_listen(struct socket *sock, int backlog)
+{
+	int err;
+	struct sock *sk = sock->sk;
+	struct unix_sock *u = unix_sk(sk);
+
+	err = -EOPNOTSUPP;
+	if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
+		goto out;			/* Only stream/seqpacket sockets accept */
+	err = -EINVAL;
+	if (!u->addr)
+		goto out;			/* No listens on an unbound socket */
+	unix_state_wlock(sk);
+	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
+		goto out_unlock;
+	if (backlog > sk->sk_max_ack_backlog)
+		wake_up_interruptible_all(&u->peer_wait);
+	sk->sk_max_ack_backlog	= backlog;
+	sk->sk_state		= TCP_LISTEN;
+	/* set credentials so connect can copy them */
+	sk->sk_peercred.pid	= current->tgid;
+	sk->sk_peercred.uid	= current->euid;
+	sk->sk_peercred.gid	= current->egid;
+	err = 0;
+
+out_unlock:
+	unix_state_wunlock(sk);
+out:
+	return err;
+}
+
+static int unix_release(struct socket *);
+static int unix_bind(struct socket *, struct sockaddr *, int);
+static int unix_stream_connect(struct socket *, struct sockaddr *,
+			       int addr_len, int flags);
+static int unix_socketpair(struct socket *, struct socket *);
+static int unix_accept(struct socket *, struct socket *, int);
+static int unix_getname(struct socket *, struct sockaddr *, int *, int);
+static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
+static int unix_ioctl(struct socket *, unsigned int, unsigned long);
+static int unix_shutdown(struct socket *, int);
+static int unix_stream_sendmsg(struct kiocb *, struct socket *,
+			       struct msghdr *, size_t);
+static int unix_stream_recvmsg(struct kiocb *, struct socket *,
+			       struct msghdr *, size_t, int);
+static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
+			      struct msghdr *, size_t);
+static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
+			      struct msghdr *, size_t, int);
+static int unix_dgram_connect(struct socket *, struct sockaddr *,
+			      int, int);
+static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
+				  struct msghdr *, size_t);
+
+static struct proto_ops unix_stream_ops = {
+	.family =	PF_UNIX,
+	.owner =	THIS_MODULE,
+	.release =	unix_release,
+	.bind =		unix_bind,
+	.connect =	unix_stream_connect,
+	.socketpair =	unix_socketpair,
+	.accept =	unix_accept,
+	.getname =	unix_getname,
+	.poll =		unix_poll,
+	.ioctl =	unix_ioctl,
+	.listen =	unix_listen,
+	.shutdown =	unix_shutdown,
+	.setsockopt =	sock_no_setsockopt,
+	.getsockopt =	sock_no_getsockopt,
+	.sendmsg =	unix_stream_sendmsg,
+	.recvmsg =	unix_stream_recvmsg,
+	.mmap =		sock_no_mmap,
+	.sendpage =	sock_no_sendpage,
+};
+
+static struct proto_ops unix_dgram_ops = {
+	.family =	PF_UNIX,
+	.owner =	THIS_MODULE,
+	.release =	unix_release,
+	.bind =		unix_bind,
+	.connect =	unix_dgram_connect,
+	.socketpair =	unix_socketpair,
+	.accept =	sock_no_accept,
+	.getname =	unix_getname,
+	.poll =		datagram_poll,
+	.ioctl =	unix_ioctl,
+	.listen =	sock_no_listen,
+	.shutdown =	unix_shutdown,
+	.setsockopt =	sock_no_setsockopt,
+	.getsockopt =	sock_no_getsockopt,
+	.sendmsg =	unix_dgram_sendmsg,
+	.recvmsg =	unix_dgram_recvmsg,
+	.mmap =		sock_no_mmap,
+	.sendpage =	sock_no_sendpage,
+};
+
+static struct proto_ops unix_seqpacket_ops = {
+	.family =	PF_UNIX,
+	.owner =	THIS_MODULE,
+	.release =	unix_release,
+	.bind =		unix_bind,
+	.connect =	unix_stream_connect,
+	.socketpair =	unix_socketpair,
+	.accept =	unix_accept,
+	.getname =	unix_getname,
+	.poll =		datagram_poll,
+	.ioctl =	unix_ioctl,
+	.listen =	unix_listen,
+	.shutdown =	unix_shutdown,
+	.setsockopt =	sock_no_setsockopt,
+	.getsockopt =	sock_no_getsockopt,
+	.sendmsg =	unix_seqpacket_sendmsg,
+	.recvmsg =	unix_dgram_recvmsg,
+	.mmap =		sock_no_mmap,
+	.sendpage =	sock_no_sendpage,
+};
+
+static struct proto unix_proto = {
+	.name	  = "UNIX",
+	.owner	  = THIS_MODULE,
+	.obj_size = sizeof(struct unix_sock),
+};
+
+static struct sock * unix_create1(struct socket *sock)
+{
+	struct sock *sk = NULL;
+	struct unix_sock *u;
+
+	if (atomic_read(&unix_nr_socks) >= 2*files_stat.max_files)
+		goto out;
+
+	sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1);
+	if (!sk)
+		goto out;
+
+	atomic_inc(&unix_nr_socks);
+
+	sock_init_data(sock,sk);
+
+	sk->sk_write_space	= unix_write_space;
+	sk->sk_max_ack_backlog	= sysctl_unix_max_dgram_qlen;
+	sk->sk_destruct		= unix_sock_destructor;
+	u	  = unix_sk(sk);
+	u->dentry = NULL;
+	u->mnt	  = NULL;
+	rwlock_init(&u->lock);
+	atomic_set(&u->inflight, sock ? 0 : -1);
+	init_MUTEX(&u->readsem); /* single task reading lock */
+	init_waitqueue_head(&u->peer_wait);
+	unix_insert_socket(unix_sockets_unbound, sk);
+out:
+	return sk;
+}
+
+static int unix_create(struct socket *sock, int protocol)
+{
+	if (protocol && protocol != PF_UNIX)
+		return -EPROTONOSUPPORT;
+
+	sock->state = SS_UNCONNECTED;
+
+	switch (sock->type) {
+	case SOCK_STREAM:
+		sock->ops = &unix_stream_ops;
+		break;
+		/*
+		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
+		 *	nothing uses it.
+		 */
+	case SOCK_RAW:
+		sock->type=SOCK_DGRAM;
+	case SOCK_DGRAM:
+		sock->ops = &unix_dgram_ops;
+		break;
+	case SOCK_SEQPACKET:
+		sock->ops = &unix_seqpacket_ops;
+		break;
+	default:
+		return -ESOCKTNOSUPPORT;
+	}
+
+	return unix_create1(sock) ? 0 : -ENOMEM;
+}
+
+static int unix_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+
+	if (!sk)
+		return 0;
+
+	sock->sk = NULL;
+
+	return unix_release_sock (sk, 0);
+}
+
+static int unix_autobind(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	struct unix_sock *u = unix_sk(sk);
+	static u32 ordernum = 1;
+	struct unix_address * addr;
+	int err;
+
+	down(&u->readsem);
+
+	err = 0;
+	if (u->addr)
+		goto out;
+
+	err = -ENOMEM;
+	addr = kmalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
+	if (!addr)
+		goto out;
+
+	memset(addr, 0, sizeof(*addr) + sizeof(short) + 16);
+	addr->name->sun_family = AF_UNIX;
+	atomic_set(&addr->refcnt, 1);
+
+retry:
+	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
+	addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
+
+	write_lock(&unix_table_lock);
+	ordernum = (ordernum+1)&0xFFFFF;
+
+	if (__unix_find_socket_byname(addr->name, addr->len, sock->type,
+				      addr->hash)) {
+		write_unlock(&unix_table_lock);
+		/* Sanity yield. It is unusual case, but yet... */
+		if (!(ordernum&0xFF))
+			yield();
+		goto retry;
+	}
+	addr->hash ^= sk->sk_type;
+
+	__unix_remove_socket(sk);
+	u->addr = addr;
+	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
+	write_unlock(&unix_table_lock);
+	err = 0;
+
+out:	up(&u->readsem);
+	return err;
+}
+
+static struct sock *unix_find_other(struct sockaddr_un *sunname, int len,
+				    int type, unsigned hash, int *error)
+{
+	struct sock *u;
+	struct nameidata nd;
+	int err = 0;
+	
+	if (sunname->sun_path[0]) {
+		err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
+		if (err)
+			goto fail;
+		err = permission(nd.dentry->d_inode,MAY_WRITE, &nd);
+		if (err)
+			goto put_fail;
+
+		err = -ECONNREFUSED;
+		if (!S_ISSOCK(nd.dentry->d_inode->i_mode))
+			goto put_fail;
+		u=unix_find_socket_byinode(nd.dentry->d_inode);
+		if (!u)
+			goto put_fail;
+
+		if (u->sk_type == type)
+			touch_atime(nd.mnt, nd.dentry);
+
+		path_release(&nd);
+
+		err=-EPROTOTYPE;
+		if (u->sk_type != type) {
+			sock_put(u);
+			goto fail;
+		}
+	} else {
+		err = -ECONNREFUSED;
+		u=unix_find_socket_byname(sunname, len, type, hash);
+		if (u) {
+			struct dentry *dentry;
+			dentry = unix_sk(u)->dentry;
+			if (dentry)
+				touch_atime(unix_sk(u)->mnt, dentry);
+		} else
+			goto fail;
+	}
+	return u;
+
+put_fail:
+	path_release(&nd);
+fail:
+	*error=err;
+	return NULL;
+}
+
+
+static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+{
+	struct sock *sk = sock->sk;
+	struct unix_sock *u = unix_sk(sk);
+	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
+	struct dentry * dentry = NULL;
+	struct nameidata nd;
+	int err;
+	unsigned hash;
+	struct unix_address *addr;
+	struct hlist_head *list;
+
+	err = -EINVAL;
+	if (sunaddr->sun_family != AF_UNIX)
+		goto out;
+
+	if (addr_len==sizeof(short)) {
+		err = unix_autobind(sock);
+		goto out;
+	}
+
+	err = unix_mkname(sunaddr, addr_len, &hash);
+	if (err < 0)
+		goto out;
+	addr_len = err;
+
+	down(&u->readsem);
+
+	err = -EINVAL;
+	if (u->addr)
+		goto out_up;
+
+	err = -ENOMEM;
+	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
+	if (!addr)
+		goto out_up;
+
+	memcpy(addr->name, sunaddr, addr_len);
+	addr->len = addr_len;
+	addr->hash = hash ^ sk->sk_type;
+	atomic_set(&addr->refcnt, 1);
+
+	if (sunaddr->sun_path[0]) {
+		unsigned int mode;
+		err = 0;
+		/*
+		 * Get the parent directory, calculate the hash for last
+		 * component.
+		 */
+		err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
+		if (err)
+			goto out_mknod_parent;
+		/*
+		 * Yucky last component or no last component at all?
+		 * (foo/., foo/.., /////)
+		 */
+		err = -EEXIST;
+		if (nd.last_type != LAST_NORM)
+			goto out_mknod;
+		/*
+		 * Lock the directory.
+		 */
+		down(&nd.dentry->d_inode->i_sem);
+		/*
+		 * Do the final lookup.
+		 */
+		dentry = lookup_hash(&nd.last, nd.dentry);
+		err = PTR_ERR(dentry);
+		if (IS_ERR(dentry))
+			goto out_mknod_unlock;
+		err = -ENOENT;
+		/*
+		 * Special case - lookup gave negative, but... we had foo/bar/
+		 * From the vfs_mknod() POV we just have a negative dentry -
+		 * all is fine. Let's be bastards - you had / on the end, you've
+		 * been asking for (non-existent) directory. -ENOENT for you.
+		 */
+		if (nd.last.name[nd.last.len] && !dentry->d_inode)
+			goto out_mknod_dput;
+		/*
+		 * All right, let's create it.
+		 */
+		mode = S_IFSOCK |
+		       (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
+		err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0);
+		if (err)
+			goto out_mknod_dput;
+		up(&nd.dentry->d_inode->i_sem);
+		dput(nd.dentry);
+		nd.dentry = dentry;
+
+		addr->hash = UNIX_HASH_SIZE;
+	}
+
+	write_lock(&unix_table_lock);
+
+	if (!sunaddr->sun_path[0]) {
+		err = -EADDRINUSE;
+		if (__unix_find_socket_byname(sunaddr, addr_len,
+					      sk->sk_type, hash)) {
+			unix_release_addr(addr);
+			goto out_unlock;
+		}
+
+		list = &unix_socket_table[addr->hash];
+	} else {
+		list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
+		u->dentry = nd.dentry;
+		u->mnt    = nd.mnt;
+	}
+
+	err = 0;
+	__unix_remove_socket(sk);
+	u->addr = addr;
+	__unix_insert_socket(list, sk);
+
+out_unlock:
+	write_unlock(&unix_table_lock);
+out_up:
+	up(&u->readsem);
+out:
+	return err;
+
+out_mknod_dput:
+	dput(dentry);
+out_mknod_unlock:
+	up(&nd.dentry->d_inode->i_sem);
+out_mknod:
+	path_release(&nd);
+out_mknod_parent:
+	if (err==-EEXIST)
+		err=-EADDRINUSE;
+	unix_release_addr(addr);
+	goto out_up;
+}
+
+static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
+			      int alen, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
+	struct sock *other;
+	unsigned hash;
+	int err;
+
+	if (addr->sa_family != AF_UNSPEC) {
+		err = unix_mkname(sunaddr, alen, &hash);
+		if (err < 0)
+			goto out;
+		alen = err;
+
+		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
+		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
+			goto out;
+
+		other=unix_find_other(sunaddr, alen, sock->type, hash, &err);
+		if (!other)
+			goto out;
+
+		unix_state_wlock(sk);
+
+		err = -EPERM;
+		if (!unix_may_send(sk, other))
+			goto out_unlock;
+
+		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
+		if (err)
+			goto out_unlock;
+
+	} else {
+		/*
+		 *	1003.1g breaking connected state with AF_UNSPEC
+		 */
+		other = NULL;
+		unix_state_wlock(sk);
+	}
+
+	/*
+	 * If it was connected, reconnect.
+	 */
+	if (unix_peer(sk)) {
+		struct sock *old_peer = unix_peer(sk);
+		unix_peer(sk)=other;
+		unix_state_wunlock(sk);
+
+		if (other != old_peer)
+			unix_dgram_disconnected(sk, old_peer);
+		sock_put(old_peer);
+	} else {
+		unix_peer(sk)=other;
+		unix_state_wunlock(sk);
+	}
+ 	return 0;
+
+out_unlock:
+	unix_state_wunlock(sk);
+	sock_put(other);
+out:
+	return err;
+}
+
+static long unix_wait_for_peer(struct sock *other, long timeo)
+{
+	struct unix_sock *u = unix_sk(other);
+	int sched;
+	DEFINE_WAIT(wait);
+
+	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
+
+	sched = !sock_flag(other, SOCK_DEAD) &&
+		!(other->sk_shutdown & RCV_SHUTDOWN) &&
+		(skb_queue_len(&other->sk_receive_queue) >
+		 other->sk_max_ack_backlog);
+
+	unix_state_runlock(other);
+
+	if (sched)
+		timeo = schedule_timeout(timeo);
+
+	finish_wait(&u->peer_wait, &wait);
+	return timeo;
+}
+
+static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
+			       int addr_len, int flags)
+{
+	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
+	struct sock *sk = sock->sk;
+	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
+	struct sock *newsk = NULL;
+	struct sock *other = NULL;
+	struct sk_buff *skb = NULL;
+	unsigned hash;
+	int st;
+	int err;
+	long timeo;
+
+	err = unix_mkname(sunaddr, addr_len, &hash);
+	if (err < 0)
+		goto out;
+	addr_len = err;
+
+	if (test_bit(SOCK_PASSCRED, &sock->flags)
+		&& !u->addr && (err = unix_autobind(sock)) != 0)
+		goto out;
+
+	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
+
+	/* First of all allocate resources.
+	   If we will make it after state is locked,
+	   we will have to recheck all again in any case.
+	 */
+
+	err = -ENOMEM;
+
+	/* create new sock for complete connection */
+	newsk = unix_create1(NULL);
+	if (newsk == NULL)
+		goto out;
+
+	/* Allocate skb for sending to listening sock */
+	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
+	if (skb == NULL)
+		goto out;
+
+restart:
+	/*  Find listening sock. */
+	other = unix_find_other(sunaddr, addr_len, sk->sk_type, hash, &err);
+	if (!other)
+		goto out;
+
+	/* Latch state of peer */
+	unix_state_rlock(other);
+
+	/* Apparently VFS overslept socket death. Retry. */
+	if (sock_flag(other, SOCK_DEAD)) {
+		unix_state_runlock(other);
+		sock_put(other);
+		goto restart;
+	}
+
+	err = -ECONNREFUSED;
+	if (other->sk_state != TCP_LISTEN)
+		goto out_unlock;
+
+	if (skb_queue_len(&other->sk_receive_queue) >
+	    other->sk_max_ack_backlog) {
+		err = -EAGAIN;
+		if (!timeo)
+			goto out_unlock;
+
+		timeo = unix_wait_for_peer(other, timeo);
+
+		err = sock_intr_errno(timeo);
+		if (signal_pending(current))
+			goto out;
+		sock_put(other);
+		goto restart;
+        }
+
+	/* Latch our state.
+
+	   It is tricky place. We need to grab write lock and cannot
+	   drop lock on peer. It is dangerous because deadlock is
+	   possible. Connect to self case and simultaneous
+	   attempt to connect are eliminated by checking socket
+	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
+	   check this before attempt to grab lock.
+
+	   Well, and we have to recheck the state after socket locked.
+	 */
+	st = sk->sk_state;
+
+	switch (st) {
+	case TCP_CLOSE:
+		/* This is ok... continue with connect */
+		break;
+	case TCP_ESTABLISHED:
+		/* Socket is already connected */
+		err = -EISCONN;
+		goto out_unlock;
+	default:
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	unix_state_wlock(sk);
+
+	if (sk->sk_state != st) {
+		unix_state_wunlock(sk);
+		unix_state_runlock(other);
+		sock_put(other);
+		goto restart;
+	}
+
+	err = security_unix_stream_connect(sock, other->sk_socket, newsk);
+	if (err) {
+		unix_state_wunlock(sk);
+		goto out_unlock;
+	}
+
+	/* The way is open! Fastly set all the necessary fields... */
+
+	sock_hold(sk);
+	unix_peer(newsk)	= sk;
+	newsk->sk_state		= TCP_ESTABLISHED;
+	newsk->sk_type		= sk->sk_type;
+	newsk->sk_peercred.pid	= current->tgid;
+	newsk->sk_peercred.uid	= current->euid;
+	newsk->sk_peercred.gid	= current->egid;
+	newu = unix_sk(newsk);
+	newsk->sk_sleep		= &newu->peer_wait;
+	otheru = unix_sk(other);
+
+	/* copy address information from listening to new sock*/
+	if (otheru->addr) {
+		atomic_inc(&otheru->addr->refcnt);
+		newu->addr = otheru->addr;
+	}
+	if (otheru->dentry) {
+		newu->dentry	= dget(otheru->dentry);
+		newu->mnt	= mntget(otheru->mnt);
+	}
+
+	/* Set credentials */
+	sk->sk_peercred = other->sk_peercred;
+
+	sock_hold(newsk);
+	unix_peer(sk)	= newsk;
+	sock->state	= SS_CONNECTED;
+	sk->sk_state	= TCP_ESTABLISHED;
+
+	unix_state_wunlock(sk);
+
+	/* take ten and and send info to listening sock */
+	spin_lock(&other->sk_receive_queue.lock);
+	__skb_queue_tail(&other->sk_receive_queue, skb);
+	/* Undo artificially decreased inflight after embrion
+	 * is installed to listening socket. */
+	atomic_inc(&newu->inflight);
+	spin_unlock(&other->sk_receive_queue.lock);
+	unix_state_runlock(other);
+	other->sk_data_ready(other, 0);
+	sock_put(other);
+	return 0;
+
+out_unlock:
+	if (other)
+		unix_state_runlock(other);
+
+out:
+	if (skb)
+		kfree_skb(skb);
+	if (newsk)
+		unix_release_sock(newsk, 0);
+	if (other)
+		sock_put(other);
+	return err;
+}
+
+static int unix_socketpair(struct socket *socka, struct socket *sockb)
+{
+	struct sock *ska=socka->sk, *skb = sockb->sk;
+
+	/* Join our sockets back to back */
+	sock_hold(ska);
+	sock_hold(skb);
+	unix_peer(ska)=skb;
+	unix_peer(skb)=ska;
+	ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid;
+	ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
+	ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
+
+	if (ska->sk_type != SOCK_DGRAM) {
+		ska->sk_state = TCP_ESTABLISHED;
+		skb->sk_state = TCP_ESTABLISHED;
+		socka->state  = SS_CONNECTED;
+		sockb->state  = SS_CONNECTED;
+	}
+	return 0;
+}
+
+static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct sock *tsk;
+	struct sk_buff *skb;
+	int err;
+
+	err = -EOPNOTSUPP;
+	if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
+		goto out;
+
+	err = -EINVAL;
+	if (sk->sk_state != TCP_LISTEN)
+		goto out;
+
+	/* If socket state is TCP_LISTEN it cannot change (for now...),
+	 * so that no locks are necessary.
+	 */
+
+	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
+	if (!skb) {
+		/* This means receive shutdown. */
+		if (err == 0)
+			err = -EINVAL;
+		goto out;
+	}
+
+	tsk = skb->sk;
+	skb_free_datagram(sk, skb);
+	wake_up_interruptible(&unix_sk(sk)->peer_wait);
+
+	/* attach accepted sock to socket */
+	unix_state_wlock(tsk);
+	newsock->state = SS_CONNECTED;
+	sock_graft(tsk, newsock);
+	unix_state_wunlock(tsk);
+	return 0;
+
+out:
+	return err;
+}
+
+
+static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
+{
+	struct sock *sk = sock->sk;
+	struct unix_sock *u;
+	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
+	int err = 0;
+
+	if (peer) {
+		sk = unix_peer_get(sk);
+
+		err = -ENOTCONN;
+		if (!sk)
+			goto out;
+		err = 0;
+	} else {
+		sock_hold(sk);
+	}
+
+	u = unix_sk(sk);
+	unix_state_rlock(sk);
+	if (!u->addr) {
+		sunaddr->sun_family = AF_UNIX;
+		sunaddr->sun_path[0] = 0;
+		*uaddr_len = sizeof(short);
+	} else {
+		struct unix_address *addr = u->addr;
+
+		*uaddr_len = addr->len;
+		memcpy(sunaddr, addr->name, *uaddr_len);
+	}
+	unix_state_runlock(sk);
+	sock_put(sk);
+out:
+	return err;
+}
+
+static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+{
+	int i;
+
+	scm->fp = UNIXCB(skb).fp;
+	skb->destructor = sock_wfree;
+	UNIXCB(skb).fp = NULL;
+
+	for (i=scm->fp->count-1; i>=0; i--)
+		unix_notinflight(scm->fp->fp[i]);
+}
+
+static void unix_destruct_fds(struct sk_buff *skb)
+{
+	struct scm_cookie scm;
+	memset(&scm, 0, sizeof(scm));
+	unix_detach_fds(&scm, skb);
+
+	/* Alas, it calls VFS */
+	/* So fscking what? fput() had been SMP-safe since the last Summer */
+	scm_destroy(&scm);
+	sock_wfree(skb);
+}
+
+static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+{
+	int i;
+	for (i=scm->fp->count-1; i>=0; i--)
+		unix_inflight(scm->fp->fp[i]);
+	UNIXCB(skb).fp = scm->fp;
+	skb->destructor = unix_destruct_fds;
+	scm->fp = NULL;
+}
+
+/*
+ *	Send AF_UNIX data.
+ */
+
+static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
+			      struct msghdr *msg, size_t len)
+{
+	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
+	struct sock *sk = sock->sk;
+	struct unix_sock *u = unix_sk(sk);
+	struct sockaddr_un *sunaddr=msg->msg_name;
+	struct sock *other = NULL;
+	int namelen = 0; /* fake GCC */
+	int err;
+	unsigned hash;
+	struct sk_buff *skb;
+	long timeo;
+	struct scm_cookie tmp_scm;
+
+	if (NULL == siocb->scm)
+		siocb->scm = &tmp_scm;
+	err = scm_send(sock, msg, siocb->scm);
+	if (err < 0)
+		return err;
+
+	err = -EOPNOTSUPP;
+	if (msg->msg_flags&MSG_OOB)
+		goto out;
+
+	if (msg->msg_namelen) {
+		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
+		if (err < 0)
+			goto out;
+		namelen = err;
+	} else {
+		sunaddr = NULL;
+		err = -ENOTCONN;
+		other = unix_peer_get(sk);
+		if (!other)
+			goto out;
+	}
+
+	if (test_bit(SOCK_PASSCRED, &sock->flags)
+		&& !u->addr && (err = unix_autobind(sock)) != 0)
+		goto out;
+
+	err = -EMSGSIZE;
+	if (len > sk->sk_sndbuf - 32)
+		goto out;
+
+	skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
+	if (skb==NULL)
+		goto out;
+
+	memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
+	if (siocb->scm->fp)
+		unix_attach_fds(siocb->scm, skb);
+
+	skb->h.raw = skb->data;
+	err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
+	if (err)
+		goto out_free;
+
+	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+
+restart:
+	if (!other) {
+		err = -ECONNRESET;
+		if (sunaddr == NULL)
+			goto out_free;
+
+		other = unix_find_other(sunaddr, namelen, sk->sk_type,
+					hash, &err);
+		if (other==NULL)
+			goto out_free;
+	}
+
+	unix_state_rlock(other);
+	err = -EPERM;
+	if (!unix_may_send(sk, other))
+		goto out_unlock;
+
+	if (sock_flag(other, SOCK_DEAD)) {
+		/*
+		 *	Check with 1003.1g - what should
+		 *	datagram error
+		 */
+		unix_state_runlock(other);
+		sock_put(other);
+
+		err = 0;
+		unix_state_wlock(sk);
+		if (unix_peer(sk) == other) {
+			unix_peer(sk)=NULL;
+			unix_state_wunlock(sk);
+
+			unix_dgram_disconnected(sk, other);
+			sock_put(other);
+			err = -ECONNREFUSED;
+		} else {
+			unix_state_wunlock(sk);
+		}
+
+		other = NULL;
+		if (err)
+			goto out_free;
+		goto restart;
+	}
+
+	err = -EPIPE;
+	if (other->sk_shutdown & RCV_SHUTDOWN)
+		goto out_unlock;
+
+	if (sk->sk_type != SOCK_SEQPACKET) {
+		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
+		if (err)
+			goto out_unlock;
+	}
+
+	if (unix_peer(other) != sk &&
+	    (skb_queue_len(&other->sk_receive_queue) >
+	     other->sk_max_ack_backlog)) {
+		if (!timeo) {
+			err = -EAGAIN;
+			goto out_unlock;
+		}
+
+		timeo = unix_wait_for_peer(other, timeo);
+
+		err = sock_intr_errno(timeo);
+		if (signal_pending(current))
+			goto out_free;
+
+		goto restart;
+	}
+
+	skb_queue_tail(&other->sk_receive_queue, skb);
+	unix_state_runlock(other);
+	other->sk_data_ready(other, len);
+	sock_put(other);
+	scm_destroy(siocb->scm);
+	return len;
+
+out_unlock:
+	unix_state_runlock(other);
+out_free:
+	kfree_skb(skb);
+out:
+	if (other)
+		sock_put(other);
+	scm_destroy(siocb->scm);
+	return err;
+}
+
+		
+static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
+			       struct msghdr *msg, size_t len)
+{
+	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
+	struct sock *sk = sock->sk;
+	struct sock *other = NULL;
+	struct sockaddr_un *sunaddr=msg->msg_name;
+	int err,size;
+	struct sk_buff *skb;
+	int sent=0;
+	struct scm_cookie tmp_scm;
+
+	if (NULL == siocb->scm)
+		siocb->scm = &tmp_scm;
+	err = scm_send(sock, msg, siocb->scm);
+	if (err < 0)
+		return err;
+
+	err = -EOPNOTSUPP;
+	if (msg->msg_flags&MSG_OOB)
+		goto out_err;
+
+	if (msg->msg_namelen) {
+		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
+		goto out_err;
+	} else {
+		sunaddr = NULL;
+		err = -ENOTCONN;
+		other = unix_peer_get(sk);
+		if (!other)
+			goto out_err;
+	}
+
+	if (sk->sk_shutdown & SEND_SHUTDOWN)
+		goto pipe_err;
+
+	while(sent < len)
+	{
+		/*
+		 *	Optimisation for the fact that under 0.01% of X messages typically
+		 *	need breaking up.
+		 */
+
+		size=len-sent;
+
+		/* Keep two messages in the pipe so it schedules better */
+		if (size > sk->sk_sndbuf / 2 - 64)
+			size = sk->sk_sndbuf / 2 - 64;
+
+		if (size > SKB_MAX_ALLOC)
+			size = SKB_MAX_ALLOC;
+			
+		/*
+		 *	Grab a buffer
+		 */
+		 
+		skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
+
+		if (skb==NULL)
+			goto out_err;
+
+		/*
+		 *	If you pass two values to the sock_alloc_send_skb
+		 *	it tries to grab the large buffer with GFP_NOFS
+		 *	(which can fail easily), and if it fails grab the
+		 *	fallback size buffer which is under a page and will
+		 *	succeed. [Alan]
+		 */
+		size = min_t(int, size, skb_tailroom(skb));
+
+		memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
+		if (siocb->scm->fp)
+			unix_attach_fds(siocb->scm, skb);
+
+		if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
+			kfree_skb(skb);
+			goto out_err;
+		}
+
+		unix_state_rlock(other);
+
+		if (sock_flag(other, SOCK_DEAD) ||
+		    (other->sk_shutdown & RCV_SHUTDOWN))
+			goto pipe_err_free;
+
+		skb_queue_tail(&other->sk_receive_queue, skb);
+		unix_state_runlock(other);
+		other->sk_data_ready(other, size);
+		sent+=size;
+	}
+	sock_put(other);
+
+	scm_destroy(siocb->scm);
+	siocb->scm = NULL;
+
+	return sent;
+
+pipe_err_free:
+	unix_state_runlock(other);
+	kfree_skb(skb);
+pipe_err:
+	if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
+		send_sig(SIGPIPE,current,0);
+	err = -EPIPE;
+out_err:
+        if (other)
+		sock_put(other);
+	scm_destroy(siocb->scm);
+	siocb->scm = NULL;
+	return sent ? : err;
+}
+
+static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
+				  struct msghdr *msg, size_t len)
+{
+	int err;
+	struct sock *sk = sock->sk;
+	
+	err = sock_error(sk);
+	if (err)
+		return err;
+
+	if (sk->sk_state != TCP_ESTABLISHED)
+		return -ENOTCONN;
+
+	if (msg->msg_namelen)
+		msg->msg_namelen = 0;
+
+	return unix_dgram_sendmsg(kiocb, sock, msg, len);
+}
+                                                                                            
+static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
+{
+	struct unix_sock *u = unix_sk(sk);
+
+	msg->msg_namelen = 0;
+	if (u->addr) {
+		msg->msg_namelen = u->addr->len;
+		memcpy(msg->msg_name, u->addr->name, u->addr->len);
+	}
+}
+
+static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
+			      struct msghdr *msg, size_t size,
+			      int flags)
+{
+	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
+	struct scm_cookie tmp_scm;
+	struct sock *sk = sock->sk;
+	struct unix_sock *u = unix_sk(sk);
+	int noblock = flags & MSG_DONTWAIT;
+	struct sk_buff *skb;
+	int err;
+
+	err = -EOPNOTSUPP;
+	if (flags&MSG_OOB)
+		goto out;
+
+	msg->msg_namelen = 0;
+
+	down(&u->readsem);
+
+	skb = skb_recv_datagram(sk, flags, noblock, &err);
+	if (!skb)
+		goto out_unlock;
+
+	wake_up_interruptible(&u->peer_wait);
+
+	if (msg->msg_name)
+		unix_copy_addr(msg, skb->sk);
+
+	if (size > skb->len)
+		size = skb->len;
+	else if (size < skb->len)
+		msg->msg_flags |= MSG_TRUNC;
+
+	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
+	if (err)
+		goto out_free;
+
+	if (!siocb->scm) {
+		siocb->scm = &tmp_scm;
+		memset(&tmp_scm, 0, sizeof(tmp_scm));
+	}
+	siocb->scm->creds = *UNIXCREDS(skb);
+
+	if (!(flags & MSG_PEEK))
+	{
+		if (UNIXCB(skb).fp)
+			unix_detach_fds(siocb->scm, skb);
+	}
+	else 
+	{
+		/* It is questionable: on PEEK we could:
+		   - do not return fds - good, but too simple 8)
+		   - return fds, and do not return them on read (old strategy,
+		     apparently wrong)
+		   - clone fds (I chose it for now, it is the most universal
+		     solution)
+		
+	           POSIX 1003.1g does not actually define this clearly
+	           at all. POSIX 1003.1g doesn't define a lot of things
+	           clearly however!		     
+		   
+		*/
+		if (UNIXCB(skb).fp)
+			siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
+	}
+	err = size;
+
+	scm_recv(sock, msg, siocb->scm, flags);
+
+out_free:
+	skb_free_datagram(sk,skb);
+out_unlock:
+	up(&u->readsem);
+out:
+	return err;
+}
+
+/*
+ *	Sleep until data has arrive. But check for races..
+ */
+ 
+static long unix_stream_data_wait(struct sock * sk, long timeo)
+{
+	DEFINE_WAIT(wait);
+
+	unix_state_rlock(sk);
+
+	for (;;) {
+		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+
+		if (skb_queue_len(&sk->sk_receive_queue) ||
+		    sk->sk_err ||
+		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
+		    signal_pending(current) ||
+		    !timeo)
+			break;
+
+		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+		unix_state_runlock(sk);
+		timeo = schedule_timeout(timeo);
+		unix_state_rlock(sk);
+		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+	}
+
+	finish_wait(sk->sk_sleep, &wait);
+	unix_state_runlock(sk);
+	return timeo;
+}
+
+
+
+static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
+			       struct msghdr *msg, size_t size,
+			       int flags)
+{
+	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
+	struct scm_cookie tmp_scm;
+	struct sock *sk = sock->sk;
+	struct unix_sock *u = unix_sk(sk);
+	struct sockaddr_un *sunaddr=msg->msg_name;
+	int copied = 0;
+	int check_creds = 0;
+	int target;
+	int err = 0;
+	long timeo;
+
+	err = -EINVAL;
+	if (sk->sk_state != TCP_ESTABLISHED)
+		goto out;
+
+	err = -EOPNOTSUPP;
+	if (flags&MSG_OOB)
+		goto out;
+
+	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
+	timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
+
+	msg->msg_namelen = 0;
+
+	/* Lock the socket to prevent queue disordering
+	 * while sleeps in memcpy_tomsg
+	 */
+
+	if (!siocb->scm) {
+		siocb->scm = &tmp_scm;
+		memset(&tmp_scm, 0, sizeof(tmp_scm));
+	}
+
+	down(&u->readsem);
+
+	do
+	{
+		int chunk;
+		struct sk_buff *skb;
+
+		skb = skb_dequeue(&sk->sk_receive_queue);
+		if (skb==NULL)
+		{
+			if (copied >= target)
+				break;
+
+			/*
+			 *	POSIX 1003.1g mandates this order.
+			 */
+			 
+			if ((err = sock_error(sk)) != 0)
+				break;
+			if (sk->sk_shutdown & RCV_SHUTDOWN)
+				break;
+			err = -EAGAIN;
+			if (!timeo)
+				break;
+			up(&u->readsem);
+
+			timeo = unix_stream_data_wait(sk, timeo);
+
+			if (signal_pending(current)) {
+				err = sock_intr_errno(timeo);
+				goto out;
+			}
+			down(&u->readsem);
+			continue;
+		}
+
+		if (check_creds) {
+			/* Never glue messages from different writers */
+			if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, sizeof(siocb->scm->creds)) != 0) {
+				skb_queue_head(&sk->sk_receive_queue, skb);
+				break;
+			}
+		} else {
+			/* Copy credentials */
+			siocb->scm->creds = *UNIXCREDS(skb);
+			check_creds = 1;
+		}
+
+		/* Copy address just once */
+		if (sunaddr)
+		{
+			unix_copy_addr(msg, skb->sk);
+			sunaddr = NULL;
+		}
+
+		chunk = min_t(unsigned int, skb->len, size);
+		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
+			skb_queue_head(&sk->sk_receive_queue, skb);
+			if (copied == 0)
+				copied = -EFAULT;
+			break;
+		}
+		copied += chunk;
+		size -= chunk;
+
+		/* Mark read part of skb as used */
+		if (!(flags & MSG_PEEK))
+		{
+			skb_pull(skb, chunk);
+
+			if (UNIXCB(skb).fp)
+				unix_detach_fds(siocb->scm, skb);
+
+			/* put the skb back if we didn't use it up.. */
+			if (skb->len)
+			{
+				skb_queue_head(&sk->sk_receive_queue, skb);
+				break;
+			}
+
+			kfree_skb(skb);
+
+			if (siocb->scm->fp)
+				break;
+		}
+		else
+		{
+			/* It is questionable, see note in unix_dgram_recvmsg.
+			 */
+			if (UNIXCB(skb).fp)
+				siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
+
+			/* put message back and return */
+			skb_queue_head(&sk->sk_receive_queue, skb);
+			break;
+		}
+	} while (size);
+
+	up(&u->readsem);
+	scm_recv(sock, msg, siocb->scm, flags);
+out:
+	return copied ? : err;
+}
+
+static int unix_shutdown(struct socket *sock, int mode)
+{
+	struct sock *sk = sock->sk;
+	struct sock *other;
+
+	mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
+
+	if (mode) {
+		unix_state_wlock(sk);
+		sk->sk_shutdown |= mode;
+		other=unix_peer(sk);
+		if (other)
+			sock_hold(other);
+		unix_state_wunlock(sk);
+		sk->sk_state_change(sk);
+
+		if (other &&
+			(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
+
+			int peer_mode = 0;
+
+			if (mode&RCV_SHUTDOWN)
+				peer_mode |= SEND_SHUTDOWN;
+			if (mode&SEND_SHUTDOWN)
+				peer_mode |= RCV_SHUTDOWN;
+			unix_state_wlock(other);
+			other->sk_shutdown |= peer_mode;
+			unix_state_wunlock(other);
+			other->sk_state_change(other);
+			read_lock(&other->sk_callback_lock);
+			if (peer_mode == SHUTDOWN_MASK)
+				sk_wake_async(other,1,POLL_HUP);
+			else if (peer_mode & RCV_SHUTDOWN)
+				sk_wake_async(other,1,POLL_IN);
+			read_unlock(&other->sk_callback_lock);
+		}
+		if (other)
+			sock_put(other);
+	}
+	return 0;
+}
+
+static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	struct sock *sk = sock->sk;
+	long amount=0;
+	int err;
+
+	switch(cmd)
+	{
+		case SIOCOUTQ:
+			amount = atomic_read(&sk->sk_wmem_alloc);
+			err = put_user(amount, (int __user *)arg);
+			break;
+		case SIOCINQ:
+		{
+			struct sk_buff *skb;
+
+			if (sk->sk_state == TCP_LISTEN) {
+				err = -EINVAL;
+				break;
+			}
+
+			spin_lock(&sk->sk_receive_queue.lock);
+			if (sk->sk_type == SOCK_STREAM ||
+			    sk->sk_type == SOCK_SEQPACKET) {
+				skb_queue_walk(&sk->sk_receive_queue, skb)
+					amount += skb->len;
+			} else {
+				skb = skb_peek(&sk->sk_receive_queue);
+				if (skb)
+					amount=skb->len;
+			}
+			spin_unlock(&sk->sk_receive_queue.lock);
+			err = put_user(amount, (int __user *)arg);
+			break;
+		}
+
+		default:
+			err = dev_ioctl(cmd, (void __user *)arg);
+			break;
+	}
+	return err;
+}
+
+static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait)
+{
+	struct sock *sk = sock->sk;
+	unsigned int mask;
+
+	poll_wait(file, sk->sk_sleep, wait);
+	mask = 0;
+
+	/* exceptional events? */
+	if (sk->sk_err)
+		mask |= POLLERR;
+	if (sk->sk_shutdown == SHUTDOWN_MASK)
+		mask |= POLLHUP;
+
+	/* readable? */
+	if (!skb_queue_empty(&sk->sk_receive_queue) ||
+	    (sk->sk_shutdown & RCV_SHUTDOWN))
+		mask |= POLLIN | POLLRDNORM;
+
+	/* Connection-based need to check for termination and startup */
+	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE)
+		mask |= POLLHUP;
+
+	/*
+	 * we set writable also when the other side has shut down the
+	 * connection. This prevents stuck sockets.
+	 */
+	if (unix_writable(sk))
+		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+
+	return mask;
+}
+
+
+#ifdef CONFIG_PROC_FS
+static struct sock *unix_seq_idx(int *iter, loff_t pos)
+{
+	loff_t off = 0;
+	struct sock *s;
+
+	for (s = first_unix_socket(iter); s; s = next_unix_socket(iter, s)) {
+		if (off == pos) 
+			return s;
+		++off;
+	}
+	return NULL;
+}
+
+
+static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	read_lock(&unix_table_lock);
+	return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1);
+}
+
+static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	++*pos;
+
+	if (v == (void *)1) 
+		return first_unix_socket(seq->private);
+	return next_unix_socket(seq->private, v);
+}
+
+static void unix_seq_stop(struct seq_file *seq, void *v)
+{
+	read_unlock(&unix_table_lock);
+}
+
+static int unix_seq_show(struct seq_file *seq, void *v)
+{
+	
+	if (v == (void *)1)
+		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
+			 "Inode Path\n");
+	else {
+		struct sock *s = v;
+		struct unix_sock *u = unix_sk(s);
+		unix_state_rlock(s);
+
+		seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
+			s,
+			atomic_read(&s->sk_refcnt),
+			0,
+			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
+			s->sk_type,
+			s->sk_socket ?
+			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
+			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
+			sock_i_ino(s));
+
+		if (u->addr) {
+			int i, len;
+			seq_putc(seq, ' ');
+
+			i = 0;
+			len = u->addr->len - sizeof(short);
+			if (!UNIX_ABSTRACT(s))
+				len--;
+			else {
+				seq_putc(seq, '@');
+				i++;
+			}
+			for ( ; i < len; i++)
+				seq_putc(seq, u->addr->name->sun_path[i]);
+		}
+		unix_state_runlock(s);
+		seq_putc(seq, '\n');
+	}
+
+	return 0;
+}
+
+static struct seq_operations unix_seq_ops = {
+	.start  = unix_seq_start,
+	.next   = unix_seq_next,
+	.stop   = unix_seq_stop,
+	.show   = unix_seq_show,
+};
+
+
+static int unix_seq_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	int rc = -ENOMEM;
+	int *iter = kmalloc(sizeof(int), GFP_KERNEL);
+
+	if (!iter)
+		goto out;
+
+	rc = seq_open(file, &unix_seq_ops);
+	if (rc)
+		goto out_kfree;
+
+	seq	     = file->private_data;
+	seq->private = iter;
+	*iter = 0;
+out:
+	return rc;
+out_kfree:
+	kfree(iter);
+	goto out;
+}
+
+static struct file_operations unix_seq_fops = {
+	.owner		= THIS_MODULE,
+	.open		= unix_seq_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release_private,
+};
+
+#endif
+
+static struct net_proto_family unix_family_ops = {
+	.family = PF_UNIX,
+	.create = unix_create,
+	.owner	= THIS_MODULE,
+};
+
+#ifdef CONFIG_SYSCTL
+extern void unix_sysctl_register(void);
+extern void unix_sysctl_unregister(void);
+#else
+static inline void unix_sysctl_register(void) {}
+static inline void unix_sysctl_unregister(void) {}
+#endif
+
+static int __init af_unix_init(void)
+{
+	int rc = -1;
+	struct sk_buff *dummy_skb;
+
+	if (sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb)) {
+		printk(KERN_CRIT "%s: panic\n", __FUNCTION__);
+		goto out;
+	}
+
+	rc = proto_register(&unix_proto, 1);
+        if (rc != 0) {
+                printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
+		       __FUNCTION__);
+		goto out;
+	}
+
+	sock_register(&unix_family_ops);
+#ifdef CONFIG_PROC_FS
+	proc_net_fops_create("unix", 0, &unix_seq_fops);
+#endif
+	unix_sysctl_register();
+out:
+	return rc;
+}
+
+static void __exit af_unix_exit(void)
+{
+	sock_unregister(PF_UNIX);
+	unix_sysctl_unregister();
+	proc_net_remove("unix");
+	proto_unregister(&unix_proto);
+}
+
+module_init(af_unix_init);
+module_exit(af_unix_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETPROTO(PF_UNIX);
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
new file mode 100644
index 00000000000..4bd95c8f593
--- /dev/null
+++ b/net/unix/garbage.c
@@ -0,0 +1,312 @@
+/*
+ * NET3:	Garbage Collector For AF_UNIX sockets
+ *
+ * Garbage Collector:
+ *	Copyright (C) Barak A. Pearlmutter.
+ *	Released under the GPL version 2 or later.
+ *
+ * Chopped about by Alan Cox 22/3/96 to make it fit the AF_UNIX socket problem.
+ * If it doesn't work blame me, it worked when Barak sent it.
+ *
+ * Assumptions:
+ *
+ *  - object w/ a bit
+ *  - free list
+ *
+ * Current optimizations:
+ *
+ *  - explicit stack instead of recursion
+ *  - tail recurse on first born instead of immediate push/pop
+ *  - we gather the stuff that should not be killed into tree
+ *    and stack is just a path from root to the current pointer.
+ *
+ *  Future optimizations:
+ *
+ *  - don't just push entire root set; process in place
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ *  Fixes:
+ *	Alan Cox	07 Sept	1997	Vmalloc internal stack as needed.
+ *					Cope with changing max_files.
+ *	Al Viro		11 Oct 1998
+ *		Graph may have cycles. That is, we can send the descriptor
+ *		of foo to bar and vice versa. Current code chokes on that.
+ *		Fix: move SCM_RIGHTS ones into the separate list and then
+ *		skb_free() them all instead of doing explicit fput's.
+ *		Another problem: since fput() may block somebody may
+ *		create a new unix_socket when we are in the middle of sweep
+ *		phase. Fix: revert the logic wrt MARKED. Mark everything
+ *		upon the beginning and unmark non-junk ones.
+ *
+ *		[12 Oct 1998] AAARGH! New code purges all SCM_RIGHTS
+ *		sent to connect()'ed but still not accept()'ed sockets.
+ *		Fixed. Old code had slightly different problem here:
+ *		extra fput() in situation when we passed the descriptor via
+ *		such socket and closed it (descriptor). That would happen on
+ *		each unix_gc() until the accept(). Since the struct file in
+ *		question would go to the free list and might be reused...
+ *		That might be the reason of random oopses on filp_close()
+ *		in unrelated processes.
+ *
+ *	AV		28 Feb 1999
+ *		Kill the explicit allocation of stack. Now we keep the tree
+ *		with root in dummy + pointer (gc_current) to one of the nodes.
+ *		Stack is represented as path from gc_current to dummy. Unmark
+ *		now means "add to tree". Push == "make it a son of gc_current".
+ *		Pop == "move gc_current to parent". We keep only pointers to
+ *		parents (->gc_tree).
+ *	AV		1 Mar 1999
+ *		Damn. Added missing check for ->dead in listen queues scanning.
+ *
+ */
+ 
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/un.h>
+#include <linux/net.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/file.h>
+#include <linux/proc_fs.h>
+#include <linux/tcp.h>
+
+#include <net/sock.h>
+#include <net/af_unix.h>
+#include <net/scm.h>
+
+/* Internal data structures and random procedures: */
+
+#define GC_HEAD		((struct sock *)(-1))
+#define GC_ORPHAN	((struct sock *)(-3))
+
+static struct sock *gc_current = GC_HEAD; /* stack of objects to mark */
+
+atomic_t unix_tot_inflight = ATOMIC_INIT(0);
+
+
+static struct sock *unix_get_socket(struct file *filp)
+{
+	struct sock *u_sock = NULL;
+	struct inode *inode = filp->f_dentry->d_inode;
+
+	/*
+	 *	Socket ?
+	 */
+	if (S_ISSOCK(inode->i_mode)) {
+		struct socket * sock = SOCKET_I(inode);
+		struct sock * s = sock->sk;
+
+		/*
+		 *	PF_UNIX ?
+		 */
+		if (s && sock->ops && sock->ops->family == PF_UNIX)
+			u_sock = s;
+	}
+	return u_sock;
+}
+
+/*
+ *	Keep the number of times in flight count for the file
+ *	descriptor if it is for an AF_UNIX socket.
+ */
+ 
+void unix_inflight(struct file *fp)
+{
+	struct sock *s = unix_get_socket(fp);
+	if(s) {
+		atomic_inc(&unix_sk(s)->inflight);
+		atomic_inc(&unix_tot_inflight);
+	}
+}
+
+void unix_notinflight(struct file *fp)
+{
+	struct sock *s = unix_get_socket(fp);
+	if(s) {
+		atomic_dec(&unix_sk(s)->inflight);
+		atomic_dec(&unix_tot_inflight);
+	}
+}
+
+
+/*
+ *	Garbage Collector Support Functions
+ */
+
+static inline struct sock *pop_stack(void)
+{
+	struct sock *p = gc_current;
+	gc_current = unix_sk(p)->gc_tree;
+	return p;
+}
+
+static inline int empty_stack(void)
+{
+	return gc_current == GC_HEAD;
+}
+
+static void maybe_unmark_and_push(struct sock *x)
+{
+	struct unix_sock *u = unix_sk(x);
+
+	if (u->gc_tree != GC_ORPHAN)
+		return;
+	sock_hold(x);
+	u->gc_tree = gc_current;
+	gc_current = x;
+}
+
+
+/* The external entry point: unix_gc() */
+
+void unix_gc(void)
+{
+	static DECLARE_MUTEX(unix_gc_sem);
+	int i;
+	struct sock *s;
+	struct sk_buff_head hitlist;
+	struct sk_buff *skb;
+
+	/*
+	 *	Avoid a recursive GC.
+	 */
+
+	if (down_trylock(&unix_gc_sem))
+		return;
+
+	read_lock(&unix_table_lock);
+
+	forall_unix_sockets(i, s)
+	{
+		unix_sk(s)->gc_tree = GC_ORPHAN;
+	}
+	/*
+	 *	Everything is now marked 
+	 */
+
+	/* Invariant to be maintained:
+		- everything unmarked is either:
+		-- (a) on the stack, or
+		-- (b) has all of its children unmarked
+		- everything on the stack is always unmarked
+		- nothing is ever pushed onto the stack twice, because:
+		-- nothing previously unmarked is ever pushed on the stack
+	 */
+
+	/*
+	 *	Push root set
+	 */
+
+	forall_unix_sockets(i, s)
+	{
+		int open_count = 0;
+
+		/*
+		 *	If all instances of the descriptor are not
+		 *	in flight we are in use.
+		 *
+		 *	Special case: when socket s is embrion, it may be
+		 *	hashed but still not in queue of listening socket.
+		 *	In this case (see unix_create1()) we set artificial
+		 *	negative inflight counter to close race window.
+		 *	It is trick of course and dirty one.
+		 */
+		if (s->sk_socket && s->sk_socket->file)
+			open_count = file_count(s->sk_socket->file);
+		if (open_count > atomic_read(&unix_sk(s)->inflight))
+			maybe_unmark_and_push(s);
+	}
+
+	/*
+	 *	Mark phase 
+	 */
+
+	while (!empty_stack())
+	{
+		struct sock *x = pop_stack();
+		struct sock *sk;
+
+		spin_lock(&x->sk_receive_queue.lock);
+		skb = skb_peek(&x->sk_receive_queue);
+		
+		/*
+		 *	Loop through all but first born 
+		 */
+		
+		while (skb && skb != (struct sk_buff *)&x->sk_receive_queue) {
+			/*
+			 *	Do we have file descriptors ?
+			 */
+			if(UNIXCB(skb).fp)
+			{
+				/*
+				 *	Process the descriptors of this socket
+				 */
+				int nfd=UNIXCB(skb).fp->count;
+				struct file **fp = UNIXCB(skb).fp->fp;
+				while(nfd--)
+				{
+					/*
+					 *	Get the socket the fd matches if
+					 *	it indeed does so
+					 */
+					if((sk=unix_get_socket(*fp++))!=NULL)
+					{
+						maybe_unmark_and_push(sk);
+					}
+				}
+			}
+			/* We have to scan not-yet-accepted ones too */
+			if (x->sk_state == TCP_LISTEN)
+				maybe_unmark_and_push(skb->sk);
+			skb=skb->next;
+		}
+		spin_unlock(&x->sk_receive_queue.lock);
+		sock_put(x);
+	}
+
+	skb_queue_head_init(&hitlist);
+
+	forall_unix_sockets(i, s)
+	{
+		struct unix_sock *u = unix_sk(s);
+
+		if (u->gc_tree == GC_ORPHAN) {
+			struct sk_buff *nextsk;
+
+			spin_lock(&s->sk_receive_queue.lock);
+			skb = skb_peek(&s->sk_receive_queue);
+			while (skb &&
+			       skb != (struct sk_buff *)&s->sk_receive_queue) {
+				nextsk=skb->next;
+				/*
+				 *	Do we have file descriptors ?
+				 */
+				if(UNIXCB(skb).fp)
+				{
+					__skb_unlink(skb, skb->list);
+					__skb_queue_tail(&hitlist,skb);
+				}
+				skb=nextsk;
+			}
+			spin_unlock(&s->sk_receive_queue.lock);
+		}
+		u->gc_tree = GC_ORPHAN;
+	}
+	read_unlock(&unix_table_lock);
+
+	/*
+	 *	Here we are. Hitlist is filled. Die.
+	 */
+
+	__skb_queue_purge(&hitlist);
+	up(&unix_gc_sem);
+}
diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c
new file mode 100644
index 00000000000..c974dac4580
--- /dev/null
+++ b/net/unix/sysctl_net_unix.c
@@ -0,0 +1,60 @@
+/*
+ * NET4:	Sysctl interface to net af_unix subsystem.
+ *
+ * Authors:	Mike Shaver.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/mm.h>
+#include <linux/sysctl.h>
+
+extern int sysctl_unix_max_dgram_qlen;
+
+static ctl_table unix_table[] = {
+	{
+		.ctl_name	= NET_UNIX_MAX_DGRAM_QLEN,
+		.procname	= "max_dgram_qlen",
+		.data		= &sysctl_unix_max_dgram_qlen,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table unix_net_table[] = {
+	{
+		.ctl_name	= NET_UNIX,
+		.procname	= "unix",
+		.mode		= 0555,
+		.child		= unix_table
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table unix_root_table[] = {
+	{
+		.ctl_name	= CTL_NET,
+		.procname	= "net",
+		.mode		= 0555,
+		.child		= unix_net_table
+	},
+	{ .ctl_name = 0 }
+};
+
+static struct ctl_table_header * unix_sysctl_header;
+
+void unix_sysctl_register(void)
+{
+	unix_sysctl_header = register_sysctl_table(unix_root_table, 0);
+}
+
+void unix_sysctl_unregister(void)
+{
+	unregister_sysctl_table(unix_sysctl_header);
+}
+
diff --git a/net/wanrouter/Makefile b/net/wanrouter/Makefile
new file mode 100644
index 00000000000..9f188ab3dcd
--- /dev/null
+++ b/net/wanrouter/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the Linux WAN router layer.
+#
+
+obj-$(CONFIG_WAN_ROUTER) += wanrouter.o
+
+wanrouter-objs :=  wanproc.o wanmain.o
diff --git a/net/wanrouter/af_wanpipe.c b/net/wanrouter/af_wanpipe.c
new file mode 100644
index 00000000000..d93b19faaab
--- /dev/null
+++ b/net/wanrouter/af_wanpipe.c
@@ -0,0 +1,2611 @@
+/*****************************************************************************
+* af_wanpipe.c	WANPIPE(tm) Secure Socket Layer.
+*
+* Author:	Nenad Corbic	<ncorbic@sangoma.com>
+*
+* Copyright:	(c) 2000 Sangoma Technologies Inc.
+*
+*		This program is free software; you can redistribute it and/or
+*		modify it under the terms of the GNU General Public License
+*		as published by the Free Software Foundation; either version
+*		2 of the License, or (at your option) any later version.
+* ============================================================================
+* Due Credit:
+*               Wanpipe socket layer is based on Packet and 
+*               the X25 socket layers. The above sockets were 
+*               used for the specific use of Sangoma Technoloiges 
+*               API programs. 
+*               Packet socket Authors: Ross Biro, Fred N. van Kempen and 
+*                                      Alan Cox.
+*               X25 socket Author: Jonathan Naylor.
+* ============================================================================
+* Mar 15, 2002  Arnaldo C. Melo  o Use wp_sk()->num, as it isnt anymore in sock
+* Apr 25, 2000  Nenad Corbic     o Added the ability to send zero length packets.
+* Mar 13, 2000  Nenad Corbic	 o Added a tx buffer check via ioctl call.
+* Mar 06, 2000  Nenad Corbic     o Fixed the corrupt sock lcn problem.
+*                                  Server and client applicaton can run
+*                                  simultaneously without conflicts.
+* Feb 29, 2000  Nenad Corbic     o Added support for PVC protocols, such as
+*                                  CHDLC, Frame Relay and HDLC API.
+* Jan 17, 2000 	Nenad Corbic	 o Initial version, based on AF_PACKET socket.
+*			           X25API support only. 
+*
+******************************************************************************/
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/fcntl.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/poll.h>
+#include <linux/wireless.h>
+#include <linux/kmod.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <linux/errno.h>
+#include <linux/timer.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/wanpipe.h>
+#include <linux/if_wanpipe.h>
+#include <linux/pkt_sched.h>
+#include <linux/tcp.h>
+#include <linux/if_wanpipe_common.h>
+#include <linux/sdla_x25.h>
+
+#ifdef CONFIG_INET
+#include <net/inet_common.h>
+#endif
+
+#define SLOW_BACKOFF 0.1*HZ
+#define FAST_BACKOFF 0.01*HZ
+
+//#define PRINT_DEBUG
+#ifdef PRINT_DEBUG
+	#define DBG_PRINTK(format, a...) printk(format, ## a)
+#else
+	#define DBG_PRINTK(format, a...)
+#endif      
+
+
+/* SECURE SOCKET IMPLEMENTATION 
+ * 
+ *   TRANSMIT:
+ *
+ *      When the user sends a packet via send() system call
+ *      the wanpipe_sendmsg() function is executed.  
+ *      
+ *      Each packet is enqueud into sk->sk_write_queue transmit
+ *      queue. When the packet is enqueued, a delayed transmit
+ *      timer is triggerd which acts as a Bottom Half hander. 
+ *
+ *      wanpipe_delay_transmit() function (BH), dequeues packets
+ *      from the sk->sk_write_queue transmit queue and sends it 
+ *      to the deriver via dev->hard_start_xmit(skb, dev) function.  
+ *      Note, this function is actual a function pointer of if_send()
+ *      routine in the wanpipe driver.
+ *
+ *      X25API GUARANTEED DELIVERY:
+ *
+ *         In order to provide 100% guaranteed packet delivery, 
+ *         an atomic 'packet_sent' counter is implemented.  Counter 
+ *         is incremented for each packet enqueued 
+ *         into sk->sk_write_queue.  Counter is decremented each
+ *         time wanpipe_delayed_transmit() function successfuly 
+ *         passes the packet to the driver. Before each send(), a poll
+ *         routine checks the sock resources The maximum value of
+ *         packet sent counter is 1, thus if one packet is queued, the
+ *         application will block until that packet is passed to the
+ *         driver.
+ *
+ *   RECEIVE:
+ *
+ *      Wanpipe device drivers call the socket bottom half
+ *      function, wanpipe_rcv() to queue the incoming packets
+ *      into an AF_WANPIPE socket queue.  Based on wanpipe_rcv()
+ *      return code, the driver knows whether the packet was
+ *      successfully queued.  If the socket queue is full, 
+ *      protocol flow control is used by the driver, if any, 
+ *      to slow down the traffic until the sock queue is free.
+ *
+ *      Every time a packet arrives into a socket queue the 
+ *      socket wakes up processes which are waiting to receive
+ *      data.
+ *
+ *      If the socket queue is full, the driver sets a block
+ *      bit which signals the socket to kick the wanpipe driver
+ *      bottom half hander when the socket queue is partialy
+ *      empty. wanpipe_recvmsg() function performs this action.
+ * 
+ *      In case of x25api, packets will never be dropped, since
+ *      flow control is available. 
+ *      
+ *      In case of streaming protocols like CHDLC, packets will 
+ *      be dropped but the statistics will be generated. 
+ */
+
+
+/* The code below is used to test memory leaks. It prints out
+ * a message every time kmalloc and kfree system calls get executed.
+ * If the calls match there is no leak :)
+ */
+
+/***********FOR DEBUGGING PURPOSES*********************************************
+#define KMEM_SAFETYZONE 8
+
+static void * dbg_kmalloc(unsigned int size, int prio, int line) {
+	void * v = kmalloc(size,prio);
+	printk(KERN_INFO "line %d  kmalloc(%d,%d) = %p\n",line,size,prio,v);
+	return v;
+}
+static void dbg_kfree(void * v, int line) {
+	printk(KERN_INFO "line %d  kfree(%p)\n",line,v);
+	kfree(v);
+}
+
+#define kmalloc(x,y) dbg_kmalloc(x,y,__LINE__)
+#define kfree(x) dbg_kfree(x,__LINE__)
+******************************************************************************/
+
+
+/* List of all wanpipe sockets. */
+HLIST_HEAD(wanpipe_sklist);
+static DEFINE_RWLOCK(wanpipe_sklist_lock);
+
+atomic_t wanpipe_socks_nr;
+static unsigned long wanpipe_tx_critical;
+
+#if 0
+/* Private wanpipe socket structures. */
+struct wanpipe_opt
+{
+	void   *mbox;		/* Mail box  */
+	void   *card; 		/* Card bouded to */
+	struct net_device *dev;	/* Bounded device */
+	unsigned short lcn;	/* Binded LCN */
+	unsigned char  svc;	/* 0=pvc, 1=svc */
+	unsigned char  timer;   /* flag for delayed transmit*/	
+	struct timer_list tx_timer;
+	unsigned poll_cnt;
+	unsigned char force;	/* Used to force sock release */
+	atomic_t packet_sent;   
+};
+#endif
+
+static int sk_count;
+extern struct proto_ops wanpipe_ops;
+static unsigned long find_free_critical;
+
+static void wanpipe_unlink_driver(struct sock *sk);
+static void wanpipe_link_driver(struct net_device *dev, struct sock *sk);
+static void wanpipe_wakeup_driver(struct sock *sk);
+static int execute_command(struct sock *, unsigned char, unsigned int);
+static int check_dev(struct net_device *dev, sdla_t *card);
+struct net_device *wanpipe_find_free_dev(sdla_t *card);
+static void wanpipe_unlink_card (struct sock *);
+static int wanpipe_link_card (struct sock *);
+static struct sock *wanpipe_make_new(struct sock *);
+static struct sock *wanpipe_alloc_socket(void);
+static inline int get_atomic_device(struct net_device *dev);
+static int wanpipe_exec_cmd(struct sock *, int, unsigned int);
+static int get_ioctl_cmd (struct sock *, void *);
+static int set_ioctl_cmd (struct sock *, void *);
+static void release_device(struct net_device *dev);
+static void wanpipe_kill_sock_timer (unsigned long data);
+static void wanpipe_kill_sock_irq (struct sock *);
+static void wanpipe_kill_sock_accept (struct sock *);
+static int wanpipe_do_bind(struct sock *sk, struct net_device *dev,
+			   int protocol);
+struct sock * get_newsk_from_skb (struct sk_buff *);
+static int wanpipe_debug (struct sock *, void *);
+static void wanpipe_delayed_transmit (unsigned long data);
+static void release_driver(struct sock *);
+static void start_cleanup_timer (struct sock *);
+static void check_write_queue(struct sock *);
+static int check_driver_busy (struct sock *);
+
+/*============================================================
+ * wanpipe_rcv
+ *
+ *	Wanpipe socket bottom half handler.  This function
+ *      is called by the WANPIPE device drivers to queue a
+ *      incoming packet into the socket receive queue. 
+ *      Once the packet is queued, all processes waiting to 
+ *      read are woken up.
+ *
+ *      During socket bind, this function is bounded into
+ *      WANPIPE driver private.
+ *===========================================================*/
+
+static int wanpipe_rcv(struct sk_buff *skb, struct net_device *dev,
+		       struct sock *sk)
+{
+	struct wan_sockaddr_ll *sll = (struct wan_sockaddr_ll*)skb->cb;
+	wanpipe_common_t *chan = dev->priv;
+	/*
+	 *	When we registered the protocol we saved the socket in the data
+	 *	field for just this event.
+	 */
+
+	skb->dev = dev;
+
+	sll->sll_family = AF_WANPIPE;
+	sll->sll_hatype = dev->type;
+	sll->sll_protocol = skb->protocol;
+	sll->sll_pkttype = skb->pkt_type;
+	sll->sll_ifindex = dev->ifindex;
+	sll->sll_halen = 0;
+
+	if (dev->hard_header_parse)
+		sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
+
+	/* 
+	 * WAN_PACKET_DATA : Data which should be passed up the receive queue.
+         * WAN_PACKET_ASYC : Asynchronous data like place call, which should
+         *                   be passed up the listening sock.
+         * WAN_PACKET_ERR  : Asynchronous data like clear call or restart 
+         *                   which should go into an error queue.
+         */
+	switch (skb->pkt_type){
+
+		case WAN_PACKET_DATA:
+			if (sock_queue_rcv_skb(sk,skb)<0){
+				return -ENOMEM;
+			}
+			break;
+		case WAN_PACKET_CMD:
+			sk->sk_state = chan->state;
+			/* Bug fix: update Mar6. 
+                         * Do not set the sock lcn number here, since
+         		 * cmd is not guaranteed to be executed on the
+                         * board, thus Lcn could be wrong */
+			sk->sk_data_ready(sk, skb->len);
+			kfree_skb(skb);
+			break;
+		case WAN_PACKET_ERR:
+			sk->sk_state = chan->state;
+			if (sock_queue_err_skb(sk,skb)<0){
+				return -ENOMEM;
+			}
+			break;
+		default:
+			printk(KERN_INFO "wansock: BH Illegal Packet Type Dropping\n");
+			kfree_skb(skb); 
+			break;
+	}
+
+//??????????????????????
+//	if (sk->sk_state == WANSOCK_DISCONNECTED){
+//		if (sk->sk_zapped) {
+//			//printk(KERN_INFO "wansock: Disconnected, killing early\n");
+//			wanpipe_unlink_driver(sk);
+//			sk->sk_bound_dev_if = 0;
+//		}
+//	}
+
+	return 0;
+}
+
+/*============================================================
+ * wanpipe_listen_rcv
+ *
+ *	Wanpipe LISTEN socket bottom half handler.  This function
+ *      is called by the WANPIPE device drivers to queue an
+ *      incoming call into the socket listening queue. 
+ *      Once the packet is queued, the waiting accept() process 
+ *      is woken up.
+ *
+ *      During socket bind, this function is bounded into
+ *      WANPIPE driver private. 
+ * 
+ *      IMPORTANT NOTE:
+ *          The accept call() is waiting for an skb packet
+ *          which contains a pointer to a device structure.
+ *
+ *          When we do a bind to a device structre, we 
+ *          bind a newly created socket into "chan->sk".  Thus, 
+ *          when accept receives the skb packet, it will know 
+ *          from which dev it came form, and in turn it will know
+ *          the address of the new sock.
+ *
+ *  	NOTE: This function gets called from driver ISR.
+ *===========================================================*/
+
+static int wanpipe_listen_rcv (struct sk_buff *skb,  struct sock *sk)
+{
+	wanpipe_opt *wp = wp_sk(sk), *newwp;
+	struct wan_sockaddr_ll *sll = (struct wan_sockaddr_ll*)skb->cb;
+	struct sock *newsk;
+	struct net_device *dev; 
+	sdla_t *card;
+	mbox_cmd_t *mbox_ptr;
+	wanpipe_common_t *chan;
+
+	/* Find a free device, if none found, all svc's are busy 
+         */
+
+	card = (sdla_t*)wp->card;
+	if (!card){
+		printk(KERN_INFO "wansock: LISTEN ERROR, No Card\n");
+		return -ENODEV;
+	}
+	
+	dev = wanpipe_find_free_dev(card);
+	if (!dev){
+		printk(KERN_INFO "wansock: LISTEN ERROR, No Free Device\n");
+		return -ENODEV;
+	}
+
+	chan=dev->priv;	
+	chan->state = WANSOCK_CONNECTING;
+
+	/* Allocate a new sock, which accept will bind
+         * and pass up to the user 
+	 */
+	if ((newsk = wanpipe_make_new(sk)) == NULL){
+		release_device(dev);
+		return -ENOMEM;
+	}
+
+
+	/* Initialize the new sock structure 
+	 */
+	newsk->sk_bound_dev_if = dev->ifindex;
+	newwp = wp_sk(newsk);
+	newwp->card = wp->card;
+
+	/* Insert the sock into the main wanpipe
+         * sock list.
+         */
+	atomic_inc(&wanpipe_socks_nr);
+
+	/* Allocate and fill in the new Mail Box. Then
+         * bind the mail box to the sock. It will be 
+         * used by the ioctl call to read call information
+         * and to execute commands. 
+         */	
+	if ((mbox_ptr = kmalloc(sizeof(mbox_cmd_t), GFP_ATOMIC)) == NULL) {
+		wanpipe_kill_sock_irq (newsk);
+		release_device(dev);		
+		return -ENOMEM;
+	}
+	memset(mbox_ptr, 0, sizeof(mbox_cmd_t));
+	memcpy(mbox_ptr,skb->data,skb->len);
+
+	/* Register the lcn on which incoming call came
+         * from. Thus, if we have to clear it, we know
+         * which lcn to clear
+	 */ 
+
+	newwp->lcn = mbox_ptr->cmd.lcn;
+	newwp->mbox = (void *)mbox_ptr;
+
+	DBG_PRINTK(KERN_INFO "NEWSOCK : Device %s, bind to lcn %i\n",
+			dev->name,mbox_ptr->cmd.lcn);
+
+	chan->lcn = mbox_ptr->cmd.lcn;
+	card->u.x.svc_to_dev_map[(chan->lcn%MAX_X25_LCN)] = dev;
+
+	sock_reset_flag(newsk, SOCK_ZAPPED);
+	newwp->num = htons(X25_PROT);
+
+	if (wanpipe_do_bind(newsk, dev, newwp->num)) {
+		wanpipe_kill_sock_irq (newsk);
+		release_device(dev);
+		return -EINVAL;
+	}
+	newsk->sk_state = WANSOCK_CONNECTING;
+
+
+	/* Fill in the standard sock address info */
+
+	sll->sll_family = AF_WANPIPE;
+	sll->sll_hatype = dev->type;
+	sll->sll_protocol = skb->protocol;
+	sll->sll_pkttype = skb->pkt_type;
+	sll->sll_ifindex = dev->ifindex;
+	sll->sll_halen = 0;
+
+	skb->dev = dev;
+	sk->sk_ack_backlog++;
+
+	/* We must do this manually, since the sock_queue_rcv_skb()
+	 * function sets the skb->dev to NULL.  However, we use
+	 * the dev field in the accept function.*/ 
+	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= 
+	    (unsigned)sk->sk_rcvbuf) {
+
+         	wanpipe_unlink_driver(newsk);
+		wanpipe_kill_sock_irq (newsk);
+		--sk->sk_ack_backlog;
+		return -ENOMEM;
+	}	
+
+	skb_set_owner_r(skb, sk);
+	skb_queue_tail(&sk->sk_receive_queue, skb);
+	sk->sk_data_ready(sk, skb->len);
+	
+	return 0;
+}
+
+
+
+/*============================================================
+ * wanpipe_make_new
+ *
+ *	Create a new sock, and allocate a wanpipe private
+ *      structure to it. Also, copy the important data
+ *      from the original sock to the new sock.
+ *
+ *      This function is used by wanpipe_listen_rcv() listen
+ *      bottom half handler.  A copy of the listening sock
+ *      is created using this function.
+ *
+ *===========================================================*/
+
+static struct sock *wanpipe_make_new(struct sock *osk)
+{
+	struct sock *sk;
+
+	if (osk->sk_type != SOCK_RAW)
+		return NULL;
+
+	if ((sk = wanpipe_alloc_socket()) == NULL)
+		return NULL;
+
+	sk->sk_type	= osk->sk_type;
+	sk->sk_socket	= osk->sk_socket;
+	sk->sk_priority	= osk->sk_priority;
+	sk->sk_protocol	= osk->sk_protocol;
+	wp_sk(sk)->num	= wp_sk(osk)->num;
+	sk->sk_rcvbuf	= osk->sk_rcvbuf;
+	sk->sk_sndbuf	= osk->sk_sndbuf;
+	sk->sk_state	= WANSOCK_CONNECTING;
+	sk->sk_sleep	= osk->sk_sleep;
+
+	if (sock_flag(osk, SOCK_DBG))
+		sock_set_flag(sk, SOCK_DBG);
+
+	return sk;
+}
+
+/* 
+ * FIXME: wanpipe_opt has to include a sock in its definition and stop using
+ * sk_protinfo, but this code is not even compilable now, so lets leave it for
+ * later.
+ */
+static struct proto wanpipe_proto = {
+	.name	  = "WANPIPE",
+	.owner	  = THIS_MODULE,
+	.obj_size = sizeof(struct sock),
+};
+
+/*============================================================
+ * wanpipe_make_new
+ *
+ *	Allocate memory for the a new sock, and sock
+ *      private data.  
+ *	
+ *	Increment the module use count.
+ *       	
+ *      This function is used by wanpipe_create() and 
+ *      wanpipe_make_new() functions. 
+ *
+ *===========================================================*/
+
+static struct sock *wanpipe_alloc_socket(void)
+{
+	struct sock *sk;
+	struct wanpipe_opt *wan_opt;
+
+	if ((sk = sk_alloc(PF_WANPIPE, GFP_ATOMIC, &wanpipe_proto, 1)) == NULL)
+		return NULL;
+
+	if ((wan_opt = kmalloc(sizeof(struct wanpipe_opt), GFP_ATOMIC)) == NULL) {
+		sk_free(sk);
+		return NULL;
+	}
+	memset(wan_opt, 0x00, sizeof(struct wanpipe_opt));
+
+	wp_sk(sk) = wan_opt;
+
+	/* Use timer to send data to the driver. This will act
+         * as a BH handler for sendmsg functions */
+	init_timer(&wan_opt->tx_timer);
+	wan_opt->tx_timer.data	   = (unsigned long)sk;
+	wan_opt->tx_timer.function = wanpipe_delayed_transmit;
+
+	sock_init_data(NULL, sk);
+	return sk;
+}
+
+
+/*============================================================
+ * wanpipe_sendmsg
+ *
+ *	This function implements a sendto() system call,
+ *      for AF_WANPIPE socket family. 
+ *      During socket bind() sk->sk_bound_dev_if is initialized
+ *      to a correct network device. This number is used
+ *      to find a network device to which the packet should
+ *      be passed to.
+ *
+ *      Each packet is queued into sk->sk_write_queue and 
+ *      delayed transmit bottom half handler is marked for 
+ *      execution.
+ *
+ *      A socket must be in WANSOCK_CONNECTED state before
+ *      a packet is queued into sk->sk_write_queue.
+ *===========================================================*/
+
+static int wanpipe_sendmsg(struct kiocb *iocb, struct socket *sock,
+			   struct msghdr *msg, int len)
+{
+	wanpipe_opt *wp;
+	struct sock *sk = sock->sk;
+	struct wan_sockaddr_ll *saddr=(struct wan_sockaddr_ll *)msg->msg_name;
+	struct sk_buff *skb;
+	struct net_device *dev;
+	unsigned short proto;
+	unsigned char *addr;
+	int ifindex, err, reserve = 0;
+
+	
+	if (!sock_flag(sk, SOCK_ZAPPED))
+		return -ENETDOWN;
+
+	if (sk->sk_state != WANSOCK_CONNECTED)
+		return -ENOTCONN;	
+
+	if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_CMSG_COMPAT)) 
+		return(-EINVAL);
+
+	/* it was <=, now one can send
+         * zero length packets */
+	if (len < sizeof(x25api_hdr_t))
+		return -EINVAL;
+
+	wp = wp_sk(sk);
+
+	if (saddr == NULL) {
+		ifindex	= sk->sk_bound_dev_if;
+		proto	= wp->num;
+		addr	= NULL;
+
+	}else{
+		if (msg->msg_namelen < sizeof(struct wan_sockaddr_ll)){ 
+			return -EINVAL;
+		}
+
+		ifindex = sk->sk_bound_dev_if;
+		proto	= saddr->sll_protocol;
+		addr	= saddr->sll_addr;
+	}
+
+	dev = dev_get_by_index(ifindex);
+	if (dev == NULL){
+		printk(KERN_INFO "wansock: Send failed, dev index: %i\n",ifindex);
+		return -ENXIO;
+	}
+	dev_put(dev);
+	
+	if (sock->type == SOCK_RAW)
+		reserve = dev->hard_header_len;
+
+	if (len > dev->mtu+reserve){
+  		return -EMSGSIZE;
+	}
+
+	skb = sock_alloc_send_skb(sk, len + LL_RESERVED_SPACE(dev),
+				msg->msg_flags & MSG_DONTWAIT, &err);
+
+	if (skb==NULL){
+		goto out_unlock;
+	}
+		
+	skb_reserve(skb, LL_RESERVED_SPACE(dev));
+	skb->nh.raw = skb->data;
+
+	/* Returns -EFAULT on error */
+	err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
+	if (err){
+		goto out_free;
+	}
+
+	if (dev->hard_header) {
+		int res;
+		err = -EINVAL;
+		res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
+		if (res<0){
+			goto out_free;
+		}
+	}
+
+	skb->protocol = proto;
+	skb->dev = dev;
+	skb->priority = sk->sk_priority;
+	skb->pkt_type = WAN_PACKET_DATA;
+
+	err = -ENETDOWN;
+	if (!(dev->flags & IFF_UP))
+		goto out_free;
+
+	if (atomic_read(&sk->sk_wmem_alloc) + skb->truesize >
+	    (unsigned int)sk->sk_sndbuf){
+		kfree_skb(skb);
+		return -ENOBUFS;
+	}
+
+	skb_queue_tail(&sk->sk_write_queue,skb);
+	atomic_inc(&wp->packet_sent);
+
+	if (!(test_and_set_bit(0, &wp->timer)))
+		mod_timer(&wp->tx_timer, jiffies + 1);
+	
+	return(len);
+
+out_free:
+	kfree_skb(skb);
+out_unlock:
+	return err;
+}
+
+/*============================================================
+ * wanpipe_delayed_tarnsmit
+ *
+ *	Transmit bottom half handler. It dequeues packets
+ *      from sk->sk_write_queue and passes them to the 
+ *      driver.  If the driver is busy, the packet is 
+ *      re-enqueued.  
+ *
+ *      Packet Sent counter is decremented on successful
+ *      transmission. 
+ *===========================================================*/
+
+
+static void wanpipe_delayed_transmit (unsigned long data)
+{
+	struct sock *sk=(struct sock *)data;
+	struct sk_buff *skb;
+	wanpipe_opt *wp = wp_sk(sk);
+	struct net_device *dev = wp->dev;
+	sdla_t *card = (sdla_t*)wp->card;
+
+	if (!card || !dev){
+		clear_bit(0, &wp->timer);
+		DBG_PRINTK(KERN_INFO "wansock: Transmit delay, no dev or card\n");
+		return;
+	}
+	
+	if (sk->sk_state != WANSOCK_CONNECTED || !sock_flag(sk, SOCK_ZAPPED)) {
+		clear_bit(0, &wp->timer);
+		DBG_PRINTK(KERN_INFO "wansock: Tx Timer, State not CONNECTED\n");
+		return;
+	}
+	
+	/* If driver is executing command, we must offload
+         * the board by not sending data. Otherwise a 
+         * pending command will never get a free buffer
+         * to execute */ 	
+	if (atomic_read(&card->u.x.command_busy)){
+		wp->tx_timer.expires = jiffies + SLOW_BACKOFF;
+		add_timer(&wp->tx_timer);
+		DBG_PRINTK(KERN_INFO "wansock: Tx Timer, command bys BACKOFF\n");
+		return;
+	}
+
+	
+	if (test_and_set_bit(0,&wanpipe_tx_critical)){
+		printk(KERN_INFO "WanSock: Tx timer critical %s\n",dev->name);
+		wp->tx_timer.expires = jiffies + SLOW_BACKOFF;
+		add_timer(&wp->tx_timer);
+		return;
+	}	
+	
+	/* Check for a packet in the fifo and send */
+	if ((skb = skb_dequeue(&sk->sk_write_queue)) != NULL){
+
+		if (dev->hard_start_xmit(skb, dev) != 0){			
+
+			/* Driver failed to transmit, re-enqueue
+                         * the packet and retry again later */
+			skb_queue_head(&sk->sk_write_queue,skb);
+			clear_bit(0,&wanpipe_tx_critical);
+			return;
+		}else{
+
+			/* Packet Sent successful. Check for more packets
+                         * if more packets, re-trigger the transmit routine 
+                         * other wise exit
+                         */
+			atomic_dec(&wp->packet_sent);
+
+			if (skb_peek(&sk->sk_write_queue) == NULL) {
+				/* If there is nothing to send, kick
+				 * the poll routine, which will trigger
+				 * the application to send more data */
+				sk->sk_data_ready(sk, 0);
+				clear_bit(0, &wp->timer);
+			}else{
+				/* Reschedule as fast as possible */
+				wp->tx_timer.expires = jiffies + 1;
+				add_timer(&wp->tx_timer);
+			}
+		}
+	}
+	clear_bit(0,&wanpipe_tx_critical);
+}
+
+/*============================================================
+ * execute_command 
+ *
+ *	Execute x25api commands.  The atomic variable
+ *      chan->command is used to indicate to the driver that
+ *      command is pending for execution.  The acutal command
+ *      structure is placed into a sock mbox structure 
+ *      (wp_sk(sk)->mbox).
+ *
+ *      The sock private structure, mbox is
+ *      used as shared memory between sock and the driver.
+ *      Driver uses the sock mbox to execute the command
+ *      and return the result.  
+ *
+ *      For all command except PLACE CALL, the function
+ *      waits for the result.  PLACE CALL can be ether
+ *      blocking or nonblocking. The user sets this option
+ *      via ioctl call.
+ *===========================================================*/
+
+
+static int execute_command(struct sock *sk,  unsigned char cmd, unsigned int flags)
+{
+	wanpipe_opt *wp = wp_sk(sk);
+	struct net_device *dev;
+	wanpipe_common_t *chan=NULL;
+	int err=0;
+	DECLARE_WAITQUEUE(wait, current);
+	
+	dev = dev_get_by_index(sk->sk_bound_dev_if);
+	if (dev == NULL){
+		printk(KERN_INFO "wansock: Exec failed no dev %i\n",
+			sk->sk_bound_dev_if);
+		return -ENODEV;
+	}
+	dev_put(dev);
+
+	if ((chan=dev->priv) == NULL){
+		printk(KERN_INFO "wansock: Exec cmd failed no priv area\n");
+		return -ENODEV;
+	}
+
+	if (atomic_read(&chan->command)){
+		printk(KERN_INFO "wansock: ERROR: Command already running %x, %s\n",
+			atomic_read(&chan->command),dev->name);
+		return -EINVAL;
+	}
+
+	if (!wp->mbox) {
+		printk(KERN_INFO "wansock: In execute without MBOX\n");
+		return -EINVAL;
+	}
+
+	((mbox_cmd_t*)wp->mbox)->cmd.command = cmd;	
+	((mbox_cmd_t*)wp->mbox)->cmd.lcn     = wp->lcn;
+	((mbox_cmd_t*)wp->mbox)->cmd.result  = 0x7F;
+
+
+	if (flags & O_NONBLOCK){
+		cmd |= 0x80;
+		atomic_set(&chan->command, cmd);
+	}else{
+		atomic_set(&chan->command, cmd);
+	}
+
+	add_wait_queue(sk->sk_sleep,&wait);
+	current->state = TASK_INTERRUPTIBLE;
+	for (;;){
+		if (((mbox_cmd_t*)wp->mbox)->cmd.result != 0x7F) {
+			err = 0;
+			break;
+		}
+		if (signal_pending(current)) {
+			err = -ERESTARTSYS;
+			break;
+		}
+		schedule();
+	}
+	current->state = TASK_RUNNING;
+	remove_wait_queue(sk->sk_sleep,&wait);
+	
+	return err;
+}
+
+/*============================================================
+ * wanpipe_destroy_timer 
+ *
+ *	Used by wanpipe_release, to delay release of
+ *      the socket.
+ *===========================================================*/
+
+static void wanpipe_destroy_timer(unsigned long data)
+{
+	struct sock *sk=(struct sock *)data;
+	wanpipe_opt *wp = wp_sk(sk);
+
+	if ((!atomic_read(&sk->sk_wmem_alloc) &&
+	     !atomic_read(&sk->sk_rmem_alloc)) ||
+	    (++wp->force == 5)) {
+
+		if (atomic_read(&sk->sk_wmem_alloc) ||
+		    atomic_read(&sk->sk_rmem_alloc))
+			printk(KERN_INFO "wansock: Warning, Packet Discarded due to sock shutdown!\n");
+
+		kfree(wp);
+		wp_sk(sk) = NULL;
+		
+		if (atomic_read(&sk->sk_refcnt) != 1) {
+			atomic_set(&sk->sk_refcnt, 1);
+			DBG_PRINTK(KERN_INFO "wansock: Error, wrong reference count: %i ! :delay.\n",
+					atomic_read(&sk->sk_refcnt));
+		}
+		sock_put(sk);
+		atomic_dec(&wanpipe_socks_nr);
+		return;
+	}
+
+	sk->sk_timer.expires = jiffies + 5 * HZ;
+	add_timer(&sk->sk_timer);
+	printk(KERN_INFO "wansock: packet sk destroy delayed\n");
+}
+
+/*============================================================
+ * wanpipe_unlink_driver
+ *
+ * 	When the socket is released, this function is 
+ *      used to remove links that bind the sock and the
+ *      driver together.  
+ *===========================================================*/
+static void wanpipe_unlink_driver (struct sock *sk)
+{
+	struct net_device *dev;
+	wanpipe_common_t *chan=NULL;
+
+	sock_reset_flag(sk, SOCK_ZAPPED);
+	sk->sk_state = WANSOCK_DISCONNECTED;
+	wp_sk(sk)->dev = NULL;
+
+	dev = dev_get_by_index(sk->sk_bound_dev_if);
+	if (!dev){
+		printk(KERN_INFO "wansock: No dev on release\n");
+		return;
+	}			
+	dev_put(dev);
+
+	if ((chan = dev->priv) == NULL){
+		printk(KERN_INFO "wansock: No Priv Area on release\n");
+		return;
+	}
+
+	set_bit(0,&chan->common_critical);
+	chan->sk=NULL;
+	chan->func=NULL;
+	chan->mbox=NULL;
+	chan->tx_timer=NULL;
+	clear_bit(0,&chan->common_critical);
+	release_device(dev);
+	
+	return;
+}
+
+/*============================================================
+ * wanpipe_link_driver
+ *
+ * 	Upon successful bind(), sock is linked to a driver
+ *      by binding in the wanpipe_rcv() bottom half handler
+ *      to the driver function pointer, as well as sock and
+ *      sock mailbox addresses.  This way driver can pass
+ *      data up the socket.
+ *===========================================================*/
+
+static void wanpipe_link_driver(struct net_device *dev, struct sock *sk)
+{
+	wanpipe_opt *wp = wp_sk(sk);
+	wanpipe_common_t *chan = dev->priv;
+	if (!chan)
+		return;
+	set_bit(0,&chan->common_critical);
+	chan->sk=sk;
+	chan->func=wanpipe_rcv;
+	chan->mbox = wp->mbox;
+	chan->tx_timer = &wp->tx_timer;
+	wp->dev = dev;
+	sock_set_flag(sk, SOCK_ZAPPED);
+	clear_bit(0,&chan->common_critical);
+}
+
+
+/*============================================================
+ * release_device
+ *
+ *   	During sock release, clear a critical bit, which 
+ *      marks the device a being taken.
+ *===========================================================*/
+
+
+static void release_device(struct net_device *dev)
+{
+	wanpipe_common_t *chan=dev->priv;
+	clear_bit(0,(void*)&chan->rw_bind);
+}
+
+/*============================================================
+ * wanpipe_release
+ *
+ *	Close a PACKET socket. This is fairly simple. We 
+ *      immediately go to 'closed' state and remove our 
+ *      protocol entry in the device list.
+ *===========================================================*/
+
+static int wanpipe_release(struct socket *sock)
+{
+	wanpipe_opt *wp;
+	struct sock *sk = sock->sk;
+	
+	if (!sk)
+		return 0;
+
+	wp = wp_sk(sk);
+	check_write_queue(sk);
+
+	/* Kill the tx timer, if we don't kill it now, the timer
+         * will run after we kill the sock.  Timer code will 
+         * try to access the sock which has been killed and cause
+         * kernel panic */
+
+	del_timer(&wp->tx_timer);
+
+	/*
+	 *	Unhook packet receive handler.
+	 */
+
+	if (wp->num == htons(X25_PROT) &&
+	    sk->sk_state != WANSOCK_DISCONNECTED && sock_flag(sk, SOCK_ZAPPED)) {
+		struct net_device *dev = dev_get_by_index(sk->sk_bound_dev_if);
+		wanpipe_common_t *chan;
+		if (dev){
+			chan=dev->priv;
+			atomic_set(&chan->disconnect,1);
+			DBG_PRINTK(KERN_INFO "wansock: Sending Clear Indication %i\n",
+					sk->sk_state);
+			dev_put(dev);
+		}	
+	}
+
+	set_bit(1,&wanpipe_tx_critical);
+	write_lock(&wanpipe_sklist_lock);
+	sk_del_node_init(sk);
+	write_unlock(&wanpipe_sklist_lock);
+	clear_bit(1,&wanpipe_tx_critical);
+
+
+	
+	release_driver(sk);
+
+	
+	/*
+	 *	Now the socket is dead. No more input will appear.
+	 */
+
+	sk->sk_state_change(sk);	/* It is useless. Just for sanity. */
+
+	sock->sk = NULL;
+	sk->sk_socket = NULL;
+	sock_set_flag(sk, SOCK_DEAD);
+
+	/* Purge queues */
+	skb_queue_purge(&sk->sk_receive_queue);
+	skb_queue_purge(&sk->sk_write_queue);
+	skb_queue_purge(&sk->sk_error_queue);
+
+	if (atomic_read(&sk->sk_rmem_alloc) ||
+	    atomic_read(&sk->sk_wmem_alloc)) {
+		del_timer(&sk->sk_timer);
+		printk(KERN_INFO "wansock: Killing in Timer R %i , W %i\n",
+			atomic_read(&sk->sk_rmem_alloc),
+			atomic_read(&sk->sk_wmem_alloc));
+		sk->sk_timer.data	= (unsigned long)sk;
+		sk->sk_timer.expires	= jiffies + HZ;
+		sk->sk_timer.function	= wanpipe_destroy_timer;
+		add_timer(&sk->sk_timer);
+		return 0;
+	}
+
+	kfree(wp);
+	wp_sk(sk) = NULL;
+
+	if (atomic_read(&sk->sk_refcnt) != 1) {
+		DBG_PRINTK(KERN_INFO "wansock: Error, wrong reference count: %i !:release.\n",
+					atomic_read(&sk->sk_refcnt));
+		atomic_set(&sk->sk_refcnt, 1);
+	}
+	sock_put(sk);
+	atomic_dec(&wanpipe_socks_nr);
+	return 0;
+}
+
+/*============================================================
+ * check_write_queue
+ *
+ *  	During sock shutdown, if the sock state is 
+ *      WANSOCK_CONNECTED and there is transmit data 
+ *      pending. Wait until data is released 
+ *      before proceeding.
+ *===========================================================*/
+
+static void check_write_queue(struct sock *sk)
+{
+
+	if (sk->sk_state != WANSOCK_CONNECTED)
+		return;
+
+	if (!atomic_read(&sk->sk_wmem_alloc))
+		return;
+
+	printk(KERN_INFO "wansock: MAJOR ERROR, Data lost on sock release !!!\n");
+
+}
+
+/*============================================================
+ * release_driver
+ *
+ *	This function is called during sock shutdown, to 
+ *      release any resources and links that bind the sock
+ *      to the driver.  It also changes the state of the
+ *      sock to WANSOCK_DISCONNECTED
+ *===========================================================*/
+
+static void release_driver(struct sock *sk)
+{
+	wanpipe_opt *wp;
+	struct sk_buff *skb=NULL;
+	struct sock *deadsk=NULL;
+
+	if (sk->sk_state == WANSOCK_LISTEN ||
+	    sk->sk_state == WANSOCK_BIND_LISTEN) {
+		while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+			if ((deadsk = get_newsk_from_skb(skb))){
+				DBG_PRINTK (KERN_INFO "wansock: RELEASE: FOUND DEAD SOCK\n");
+				sock_set_flag(deadsk, SOCK_DEAD);
+				start_cleanup_timer(deadsk);
+			}
+			kfree_skb(skb);
+		}
+		if (sock_flag(sk, SOCK_ZAPPED))
+			wanpipe_unlink_card(sk);
+	}else{
+		if (sock_flag(sk, SOCK_ZAPPED))
+			wanpipe_unlink_driver(sk);
+	}
+	sk->sk_state	    = WANSOCK_DISCONNECTED;
+	sk->sk_bound_dev_if = 0;
+	sock_reset_flag(sk, SOCK_ZAPPED);
+	wp = wp_sk(sk);
+
+	if (wp && wp->mbox) {
+		kfree(wp->mbox);
+		wp->mbox = NULL;
+	}
+}
+
+/*============================================================
+ *  start_cleanup_timer
+ *
+ *  	If new incoming call's are pending but the socket
+ *      is being released, start the timer which will 
+ *      envoke the kill routines for pending socks.
+ *===========================================================*/
+
+
+static void start_cleanup_timer (struct sock *sk)
+{
+	del_timer(&sk->sk_timer);
+	sk->sk_timer.data	= (unsigned long)sk;
+	sk->sk_timer.expires	= jiffies + HZ;
+	sk->sk_timer.function	= wanpipe_kill_sock_timer;
+	add_timer(&sk->sk_timer);
+}
+
+
+/*============================================================
+ *  wanpipe_kill_sock
+ *
+ *	This is a function which performs actual killing
+ *      of the sock.  It releases socket resources,
+ *      and unlinks the sock from the driver. 
+ *===========================================================*/
+
+static void wanpipe_kill_sock_timer (unsigned long data)
+{
+
+	struct sock *sk = (struct sock *)data;
+	struct sock **skp;
+
+	if (!sk)
+		return;
+
+	/* This function can be called from interrupt. We must use
+	 * appropriate locks */
+	
+	if (test_bit(1,&wanpipe_tx_critical)){
+		sk->sk_timer.expires = jiffies + 10;
+		add_timer(&sk->sk_timer);
+		return;
+	}
+	
+	write_lock(&wanpipe_sklist_lock);
+	sk_del_node_init(sk);
+	write_unlock(&wanpipe_sklist_lock);
+
+
+	if (wp_sk(sk)->num == htons(X25_PROT) &&
+	    sk->sk_state != WANSOCK_DISCONNECTED) {
+		struct net_device *dev = dev_get_by_index(sk->sk_bound_dev_if);
+		wanpipe_common_t *chan;
+		if (dev){
+			chan=dev->priv;
+			atomic_set(&chan->disconnect,1);
+			dev_put(dev);
+		}	
+	}
+
+	release_driver(sk);
+
+	sk->sk_socket = NULL;
+
+	/* Purge queues */
+	skb_queue_purge(&sk->sk_receive_queue);
+	skb_queue_purge(&sk->sk_write_queue);
+	skb_queue_purge(&sk->sk_error_queue);
+	
+	if (atomic_read(&sk->sk_rmem_alloc) ||
+	    atomic_read(&sk->sk_wmem_alloc)) {
+		del_timer(&sk->sk_timer);
+		printk(KERN_INFO "wansock: Killing SOCK in Timer\n");
+		sk->sk_timer.data	= (unsigned long)sk;
+		sk->sk_timer.expires	= jiffies + HZ;
+		sk->sk_timer.function	= wanpipe_destroy_timer;
+		add_timer(&sk->sk_timer);
+		return;
+	}
+
+	if (wp_sk(sk)) {
+		kfree(wp_sk(sk));
+		wp_sk(sk) = NULL;
+	}
+
+	if (atomic_read(&sk->sk_refcnt) != 1) {
+		atomic_set(&sk->sk_refcnt, 1);
+		DBG_PRINTK(KERN_INFO "wansock: Error, wrong reference count: %i ! :timer.\n",
+					atomic_read(&sk->sk_refcnt));
+	}
+	sock_put(sk);
+	atomic_dec(&wanpipe_socks_nr);
+	return;
+}
+
+static void wanpipe_kill_sock_accept (struct sock *sk)
+{
+
+	struct sock **skp;
+
+	if (!sk)
+		return;
+
+	/* This function can be called from interrupt. We must use
+	 * appropriate locks */
+	
+	write_lock(&wanpipe_sklist_lock);
+	sk_del_node_init(sk);
+	write_unlock(&wanpipe_sklist_lock);
+
+	sk->sk_socket = NULL;
+
+
+	if (wp_sk(sk)) {
+		kfree(wp_sk(sk));
+		wp_sk(sk) = NULL;
+	}
+
+	if (atomic_read(&sk->sk_refcnt) != 1) {
+		atomic_set(&sk->sk_refcnt, 1);
+		DBG_PRINTK(KERN_INFO "wansock: Error, wrong reference count: %i ! :timer.\n",
+					atomic_read(&sk->sk_refcnt));
+	}
+	sock_put(sk);
+	atomic_dec(&wanpipe_socks_nr);
+	return;
+}
+
+
+static void wanpipe_kill_sock_irq (struct sock *sk)
+{
+
+	if (!sk)
+		return;
+
+	sk->sk_socket = NULL;
+
+	if (wp_sk(sk)) {
+		kfree(wp_sk(sk));
+		wp_sk(sk) = NULL;
+	}
+
+	if (atomic_read(&sk->sk_refcnt) != 1) {
+		atomic_set(&sk->sk_refcnt, 1);
+		DBG_PRINTK(KERN_INFO "wansock: Error, wrong reference count: %i !:listen.\n",
+					atomic_read(&sk->sk_refcnt));
+	}
+	sock_put(sk);
+	atomic_dec(&wanpipe_socks_nr);
+}
+
+
+/*============================================================
+ *  wanpipe_do_bind
+ *
+ * 	Bottom half of the binding system call.
+ *      Once the wanpipe_bind() function checks  the
+ *      legality of the call, this function binds the
+ *      sock to the driver.
+ *===========================================================*/
+
+static int wanpipe_do_bind(struct sock *sk, struct net_device *dev,
+			   int protocol)
+{
+	wanpipe_opt *wp = wp_sk(sk);
+	wanpipe_common_t *chan=NULL;
+	int err=0;
+
+	if (sock_flag(sk, SOCK_ZAPPED)) {
+		err = -EALREADY;
+		goto bind_unlock_exit;
+	}
+
+	wp->num = protocol;
+
+	if (protocol == 0){
+		release_device(dev);
+		err = -EINVAL;
+		goto bind_unlock_exit;
+	}
+
+	if (dev) {
+		if (dev->flags&IFF_UP) {
+			chan=dev->priv;
+			sk->sk_state = chan->state;
+
+			if (wp->num == htons(X25_PROT) && 
+			    sk->sk_state != WANSOCK_DISCONNECTED && 
+			    sk->sk_state != WANSOCK_CONNECTING) {
+				DBG_PRINTK(KERN_INFO 
+					"wansock: Binding to Device not DISCONNECTED %i\n",
+						sk->sk_state);
+				release_device(dev);
+				err = -EAGAIN;
+				goto bind_unlock_exit;
+			}
+
+			wanpipe_link_driver(dev,sk);
+			sk->sk_bound_dev_if = dev->ifindex;
+
+			/* X25 Specific option */
+			if (wp->num == htons(X25_PROT))
+				wp_sk(sk)->svc = chan->svc;
+
+		} else {
+			sk->sk_err = ENETDOWN;
+			sk->sk_error_report(sk);
+			release_device(dev);
+			err = -EINVAL;
+		}
+	} else {
+		err = -ENODEV;
+	}
+bind_unlock_exit:
+	/* FIXME where is this lock */
+
+	return err;
+}
+
+/*============================================================
+ *  wanpipe_bind
+ *
+ *      BIND() System call, which is bound to the AF_WANPIPE
+ *      operations structure.  It checks for correct wanpipe
+ *      card name, and cross references interface names with
+ *      the card names.  Thus, interface name must belong to
+ *      the actual card.
+ *===========================================================*/
+
+
+static int wanpipe_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+{
+	struct wan_sockaddr_ll *sll = (struct wan_sockaddr_ll*)uaddr;
+	struct sock *sk=sock->sk;
+	wanpipe_opt *wp = wp_sk(sk);
+	struct net_device *dev = NULL;
+	sdla_t *card=NULL;
+	char name[15];
+
+	/*
+	 *	Check legality
+	 */
+	 
+	if (addr_len < sizeof(struct wan_sockaddr_ll)){
+		printk(KERN_INFO "wansock: Address length error\n");
+		return -EINVAL;
+	}
+	if (sll->sll_family != AF_WANPIPE){
+		printk(KERN_INFO "wansock: Illegal family name specified.\n");
+		return -EINVAL;
+	}
+
+	card = wanpipe_find_card (sll->sll_card);
+	if (!card){
+		printk(KERN_INFO "wansock: Wanpipe card not found: %s\n",sll->sll_card);
+		return -ENODEV;
+	}else{
+		wp_sk(sk)->card = (void *)card;
+	}
+
+	if (!strcmp(sll->sll_device,"svc_listen")){
+
+		/* Bind a sock to a card structure for listening 
+		 */		
+		int err=0; 
+
+		/* This is x25 specific area if protocol doesn't
+                 * match, return error */
+		if (sll->sll_protocol != htons(X25_PROT))
+			return -EINVAL;
+
+		err= wanpipe_link_card (sk);
+		if (err < 0)
+			return err;
+
+		if (sll->sll_protocol)
+			wp->num = sll->sll_protocol;
+		sk->sk_state = WANSOCK_BIND_LISTEN;
+		return 0;
+
+	}else if (!strcmp(sll->sll_device,"svc_connect")){ 
+
+		/* This is x25 specific area if protocol doesn't
+                 * match, return error */
+		if (sll->sll_protocol != htons(X25_PROT))
+			return -EINVAL;
+
+		/* Find a free device 
+		 */
+		dev = wanpipe_find_free_dev(card);
+		if (dev == NULL){
+			DBG_PRINTK(KERN_INFO "wansock: No free network devices for card %s\n",
+				card->devname);
+			return -EINVAL;
+		}
+	}else{
+		/* Bind a socket to a interface name 
+                 * This is used by PVC mostly
+                 */
+		strlcpy(name,sll->sll_device,sizeof(name));
+		dev = dev_get_by_name(name);
+		if (dev == NULL){
+			printk(KERN_INFO "wansock: Failed to get Dev from name: %s,\n",
+					name);
+			return -ENODEV;
+		}
+
+		dev_put(dev);
+
+		if (check_dev(dev, card)){
+			printk(KERN_INFO "wansock: Device %s, doesn't belong to card %s\n",
+				dev->name, card->devname);
+			return -EINVAL;
+		}
+		if (get_atomic_device (dev))
+			return -EINVAL;
+	}
+
+	return wanpipe_do_bind(sk, dev, sll->sll_protocol ? : wp->num);
+}
+
+/*============================================================
+ * get_atomic_device
+ *	
+ *	Sets a bit atomically which indicates that 
+ *      the interface is taken. This avoids race conditions.
+ *===========================================================*/
+
+
+static inline int get_atomic_device(struct net_device *dev)
+{
+	wanpipe_common_t *chan = dev->priv;
+	if (!test_and_set_bit(0,(void *)&chan->rw_bind)){
+		return 0;
+	}
+	return 1;
+}
+
+/*============================================================
+ * check_dev
+ *	
+ *  	Check that device name belongs to a particular card.
+ *===========================================================*/
+
+static int check_dev(struct net_device *dev, sdla_t *card)
+{
+	struct net_device* tmp_dev;
+
+	for (tmp_dev = card->wandev.dev; tmp_dev;
+	     tmp_dev = *((struct net_device **)tmp_dev->priv)) {
+		if (tmp_dev->ifindex == dev->ifindex){ 
+			return 0;	
+		}
+	}
+	return 1;
+}
+
+/*============================================================
+ *  wanpipe_find_free_dev
+ *	
+ *	Find a free network interface. If found set atomic
+ *      bit indicating that the interface is taken.
+ *      X25API Specific.
+ *===========================================================*/
+
+struct net_device *wanpipe_find_free_dev(sdla_t *card)
+{
+	struct net_device* dev;
+	volatile wanpipe_common_t *chan;
+
+	if (test_and_set_bit(0,&find_free_critical)){
+		printk(KERN_INFO "CRITICAL in Find Free\n");
+	}	
+
+	for (dev = card->wandev.dev; dev;
+	     dev = *((struct net_device **)dev->priv)) {
+		chan = dev->priv;
+		if (!chan) 
+			continue;
+		if (chan->usedby == API && chan->svc){
+			if (!get_atomic_device (dev)){
+				if (chan->state != WANSOCK_DISCONNECTED){
+					release_device(dev);
+				}else{
+					clear_bit(0,&find_free_critical);
+					return dev;
+				}
+			}
+		}
+	}
+	clear_bit(0,&find_free_critical);
+	return NULL;
+}
+
+/*============================================================
+ *  wanpipe_create
+ *	
+ * 	SOCKET() System call.  It allocates a sock structure
+ *      and adds the socket to the wanpipe_sk_list. 
+ *      Crates AF_WANPIPE socket.
+ *===========================================================*/
+
+static int wanpipe_create(struct socket *sock, int protocol)
+{
+	struct sock *sk;
+	
+	//FIXME: This checks for root user, SECURITY ?
+	//if (!capable(CAP_NET_RAW))
+	//	return -EPERM;
+
+	if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW)
+		return -ESOCKTNOSUPPORT;
+
+	sock->state = SS_UNCONNECTED;
+
+	if ((sk = wanpipe_alloc_socket()) == NULL)
+		return -ENOBUFS;
+
+	sk->sk_reuse = 1;
+	sock->ops = &wanpipe_ops;
+	sock_init_data(sock,sk);
+
+	sock_reset_flag(sk, SOCK_ZAPPED);
+	sk->sk_family	    = PF_WANPIPE;
+	wp_sk(sk)->num	    = protocol;
+	sk->sk_state	    = WANSOCK_DISCONNECTED;
+	sk->sk_ack_backlog  = 0;
+	sk->sk_bound_dev_if = 0;
+
+	atomic_inc(&wanpipe_socks_nr);
+	
+	/* We must disable interrupts because the ISR
+	 * can also change the list */
+	set_bit(1,&wanpipe_tx_critical);
+	write_lock(&wanpipe_sklist_lock);
+	sk_add_node(sk, &wanpipe_sklist);
+	write_unlock(&wanpipe_sklist_lock);
+	clear_bit(1,&wanpipe_tx_critical);
+
+	return(0);
+}
+
+
+/*============================================================
+ *  wanpipe_recvmsg
+ *	
+ *	Pull a packet from our receive queue and hand it 
+ *      to the user. If necessary we block.
+ *===========================================================*/
+
+static int wanpipe_recvmsg(struct kiocb *iocb, struct socket *sock,
+			   struct msghdr *msg, int len, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct sk_buff *skb;
+	int copied, err=-ENOBUFS;
+
+
+	/*
+	 *	If the address length field is there to be filled in, we fill
+	 *	it in now.
+	 */
+
+	msg->msg_namelen = sizeof(struct wan_sockaddr_ll);
+
+	/*
+	 *	Call the generic datagram receiver. This handles all sorts
+	 *	of horrible races and re-entrancy so we can forget about it
+	 *	in the protocol layers.
+	 *
+	 *	Now it will return ENETDOWN, if device have just gone down,
+	 *	but then it will block.
+	 */
+
+	if (flags & MSG_OOB){	
+		skb = skb_dequeue(&sk->sk_error_queue);
+	}else{
+		skb=skb_recv_datagram(sk,flags,1,&err);
+	}
+	/*
+	 *	An error occurred so return it. Because skb_recv_datagram() 
+	 *	handles the blocking we don't see and worry about blocking
+	 *	retries.
+	 */
+
+	if(skb==NULL)
+		goto out;
+
+	/*
+	 *	You lose any data beyond the buffer you gave. If it worries a
+	 *	user program they can ask the device for its MTU anyway.
+	 */
+
+	copied = skb->len;
+	if (copied > len)
+	{
+		copied=len;
+		msg->msg_flags|=MSG_TRUNC;
+	}
+
+	wanpipe_wakeup_driver(sk);
+
+	/* We can't use skb_copy_datagram here */
+	err = memcpy_toiovec(msg->msg_iov, skb->data, copied);
+	if (err)
+		goto out_free;
+	
+	sock_recv_timestamp(msg, sk, skb);
+	
+	if (msg->msg_name)
+		memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
+
+	/*
+	 *	Free or return the buffer as appropriate. Again this
+	 *	hides all the races and re-entrancy issues from us.
+	 */
+	err = (flags&MSG_TRUNC) ? skb->len : copied;
+
+out_free:
+	skb_free_datagram(sk, skb);
+out:
+	return err;
+}
+
+
+/*============================================================
+ *  wanpipe_wakeup_driver
+ *	
+ * 	If socket receive buffer is full and driver cannot
+ *      pass data up the sock, it sets a packet_block flag.
+ *      This function check that flag and if sock receive 
+ *      queue has room it kicks the driver BH handler. 
+ *
+ * 	This way, driver doesn't have to poll the sock 
+ *      receive queue.
+ *===========================================================*/
+
+static void wanpipe_wakeup_driver(struct sock *sk)
+{
+	struct net_device *dev = NULL;
+	wanpipe_common_t *chan=NULL;
+
+	dev = dev_get_by_index(sk->sk_bound_dev_if);
+	if (!dev)
+		return;
+
+	dev_put(dev);
+
+	if ((chan = dev->priv) == NULL)
+		return;
+	
+	if (atomic_read(&chan->receive_block)){  
+		if (atomic_read(&sk->sk_rmem_alloc) <
+		    ((unsigned)sk->sk_rcvbuf * 0.9)) {
+			printk(KERN_INFO "wansock: Queuing task for wanpipe\n");
+			atomic_set(&chan->receive_block,0);
+			wanpipe_queue_tq(&chan->wanpipe_task);
+			wanpipe_mark_bh();
+		}
+	}	
+}	
+
+/*============================================================
+ *  wanpipe_getname
+ *	
+ * 	I don't know what to do with this yet. 
+ *      User can use this function to get sock address
+ *      information. Not very useful for Sangoma's purposes.
+ *===========================================================*/
+
+
+static int wanpipe_getname(struct socket *sock, struct sockaddr *uaddr,
+			  int *uaddr_len, int peer)
+{
+	struct net_device *dev;
+	struct sock *sk = sock->sk;
+	struct wan_sockaddr_ll *sll = (struct wan_sockaddr_ll*)uaddr;
+
+	sll->sll_family = AF_WANPIPE;
+	sll->sll_ifindex = sk->sk_bound_dev_if;
+	sll->sll_protocol = wp_sk(sk)->num;
+	dev = dev_get_by_index(sk->sk_bound_dev_if);
+	if (dev) {
+		sll->sll_hatype = dev->type;
+		sll->sll_halen = dev->addr_len;
+		memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
+	} else {
+		sll->sll_hatype = 0;	/* Bad: we have no ARPHRD_UNSPEC */
+		sll->sll_halen = 0;
+	}
+	*uaddr_len = sizeof(*sll);
+	
+	dev_put(dev);
+	
+	return 0;
+}
+
+/*============================================================
+ *  wanpipe_notifier
+ *	
+ *	If driver turns off network interface, this function
+ *      will be envoked. Currently I treate it as a 
+ *      call disconnect. More thought should go into this
+ *      function.
+ *
+ * FIXME: More thought should go into this function.
+ *
+ *===========================================================*/
+
+static int wanpipe_notifier(struct notifier_block *this, unsigned long msg, void *data)
+{
+	struct sock *sk;
+	hlist_node *node;
+	struct net_device *dev = (struct net_device *)data;
+
+	sk_for_each(sk, node, &wanpipe_sklist) {
+		struct wanpipe_opt *po = wp_sk(sk);
+
+		if (!po)
+			continue;
+		if (dev == NULL)
+			continue;
+		
+		switch (msg) {
+		case NETDEV_DOWN:
+		case NETDEV_UNREGISTER:
+			if (dev->ifindex == sk->sk_bound_dev_if) {
+				printk(KERN_INFO "wansock: Device down %s\n",dev->name);
+				if (sock_flag(sk, SOCK_ZAPPED)) {
+					wanpipe_unlink_driver(sk);
+					sk->sk_err = ENETDOWN;
+					sk->sk_error_report(sk);
+				}
+
+				if (msg == NETDEV_UNREGISTER) {
+					printk(KERN_INFO "wansock: Unregistering Device: %s\n",
+						 	  dev->name);
+					wanpipe_unlink_driver(sk);
+					sk->sk_bound_dev_if = 0;
+				}
+			}
+			break;
+		case NETDEV_UP:
+			if (dev->ifindex == sk->sk_bound_dev_if &&
+			    po->num && !sock_flag(sk, SOCK_ZAPPED)) {
+				printk(KERN_INFO "wansock: Registering Device: %s\n",
+						dev->name);
+				wanpipe_link_driver(dev,sk);
+			}
+			break;
+		}
+	}
+	return NOTIFY_DONE;
+}
+
+/*============================================================
+ *  wanpipe_ioctl
+ *	
+ * 	Execute a user commands, and set socket options.
+ *
+ * FIXME: More thought should go into this function.
+ *
+ *===========================================================*/
+
+static int wanpipe_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	struct sock *sk = sock->sk;
+	int err;
+
+	switch(cmd) 
+	{
+		case SIOCGSTAMP:
+			return sock_get_timestamp(sk, (struct timeval __user *)arg);
+
+		case SIOC_WANPIPE_CHECK_TX:
+
+			return atomic_read(&sk->sk_wmem_alloc);
+
+		case SIOC_WANPIPE_SOCK_STATE:
+
+			if (sk->sk_state == WANSOCK_CONNECTED)
+				return 0;
+			
+			return 1;
+
+
+		case SIOC_WANPIPE_GET_CALL_DATA:
+
+			return get_ioctl_cmd (sk,(void*)arg);
+
+		case SIOC_WANPIPE_SET_CALL_DATA:
+
+			return set_ioctl_cmd (sk,(void*)arg);
+
+		case SIOC_WANPIPE_ACCEPT_CALL:
+		case SIOC_WANPIPE_CLEAR_CALL:
+		case SIOC_WANPIPE_RESET_CALL:
+
+			if ((err=set_ioctl_cmd(sk,(void*)arg)) < 0)
+				return err;
+
+			err=wanpipe_exec_cmd(sk,cmd,0);
+			get_ioctl_cmd(sk,(void*)arg);
+			return err;
+
+		case SIOC_WANPIPE_DEBUG:
+
+			return wanpipe_debug(sk,(void*)arg);
+	
+		case SIOC_WANPIPE_SET_NONBLOCK:
+
+			if (sk->sk_state != WANSOCK_DISCONNECTED)
+				return -EINVAL;
+
+			sock->file->f_flags |= O_NONBLOCK;
+			return 0;
+	
+#ifdef CONFIG_INET
+		case SIOCADDRT:
+		case SIOCDELRT:
+		case SIOCDARP:
+		case SIOCGARP:
+		case SIOCSARP:
+		case SIOCDRARP:
+		case SIOCGRARP:
+		case SIOCSRARP:
+		case SIOCGIFADDR:
+		case SIOCSIFADDR:
+		case SIOCGIFBRDADDR:
+		case SIOCSIFBRDADDR:
+		case SIOCGIFNETMASK:
+		case SIOCSIFNETMASK:
+		case SIOCGIFDSTADDR:
+		case SIOCSIFDSTADDR:
+		case SIOCSIFFLAGS:
+			return inet_dgram_ops.ioctl(sock, cmd, arg);
+#endif
+
+		default:
+			return dev_ioctl(cmd,(void __user *) arg);
+	}
+	/*NOTREACHED*/
+}
+
+/*============================================================
+ *  wanpipe_debug
+ *	
+ *	This function will pass up information about all
+ *      active sockets.
+ *
+ * FIXME: More thought should go into this function.
+ *
+ *===========================================================*/
+
+static int wanpipe_debug (struct sock *origsk, void *arg)
+{
+	struct sock *sk;
+	struct hlist_node *node;
+	struct net_device *dev = NULL;
+	wanpipe_common_t *chan=NULL;
+	int cnt=0, err=0;
+	wan_debug_t *dbg_data = (wan_debug_t *)arg;
+
+	sk_for_each(sk, node, &wanpipe_sklist) {
+		wanpipe_opt *wp = wp_sk(sk);
+
+		if (sk == origsk){
+			continue;
+		}
+
+		if ((err=put_user(1, &dbg_data->debug[cnt].free)))
+			return err;
+		if ((err = put_user(sk->sk_state,
+				    &dbg_data->debug[cnt].state_sk)))
+			return err;
+		if ((err = put_user(sk->sk_rcvbuf,
+				    &dbg_data->debug[cnt].rcvbuf)))
+			return err;
+		if ((err = put_user(atomic_read(&sk->sk_rmem_alloc),
+				    &dbg_data->debug[cnt].rmem)))
+			return err;
+		if ((err = put_user(atomic_read(&sk->sk_wmem_alloc),
+				    &dbg_data->debug[cnt].wmem)))
+			return err;
+		if ((err = put_user(sk->sk_sndbuf,
+				    &dbg_data->debug[cnt].sndbuf)))
+			return err;
+		if ((err=put_user(sk_count, &dbg_data->debug[cnt].sk_count)))
+			return err;
+		if ((err=put_user(wp->poll_cnt, &dbg_data->debug[cnt].poll_cnt)))
+			return err;
+		if ((err = put_user(sk->sk_bound_dev_if,
+				    &dbg_data->debug[cnt].bound)))
+			return err;
+
+		if (sk->sk_bound_dev_if) {
+			dev = dev_get_by_index(sk->sk_bound_dev_if);
+			if (!dev)	
+				continue;
+
+			chan=dev->priv;
+			dev_put(dev);
+	
+			if ((err=put_user(chan->state, &dbg_data->debug[cnt].d_state)))
+				return err;
+			if ((err=put_user(chan->svc, &dbg_data->debug[cnt].svc)))
+				return err;
+
+			if ((err=put_user(atomic_read(&chan->command), 
+						&dbg_data->debug[cnt].command)))
+				return err;
+
+
+			if (wp){
+				sdla_t *card = (sdla_t*)wp->card;			
+	
+				if (card){
+					if ((err=put_user(atomic_read(&card->u.x.command_busy), 
+								&dbg_data->debug[cnt].cmd_busy)))
+						return err;
+				}
+
+				if ((err=put_user(wp->lcn, 
+						  &dbg_data->debug[cnt].lcn)))
+					return err;
+				
+				if (wp->mbox) {
+					if ((err=put_user(1, &dbg_data->debug[cnt].mbox)))
+						return err;
+				}
+			}
+
+			if ((err=put_user(atomic_read(&chan->receive_block), 
+								&dbg_data->debug[cnt].rblock)))
+				return err;
+
+			if (copy_to_user(dbg_data->debug[cnt].name, dev->name, strlen(dev->name)))
+				return -EFAULT;
+		}
+	
+		if (++cnt == MAX_NUM_DEBUG)
+			break;
+	}
+	return 0;
+}
+
+/*============================================================
+ *  get_ioctl_cmd
+ *	
+ *	Pass up the contents of socket MBOX to the user.
+ *===========================================================*/
+
+static int get_ioctl_cmd (struct sock *sk, void *arg)
+{
+	x25api_t *usr_data = (x25api_t *)arg;
+	mbox_cmd_t *mbox_ptr;
+	int err;
+
+	if (usr_data == NULL)
+		return -EINVAL;
+
+	if (!wp_sk(sk)->mbox) {
+		return -EINVAL;
+	}
+
+	mbox_ptr = (mbox_cmd_t *)wp_sk(sk)->mbox;
+
+	if ((err=put_user(mbox_ptr->cmd.qdm, &usr_data->hdr.qdm)))
+		return err;
+	if ((err=put_user(mbox_ptr->cmd.cause, &usr_data->hdr.cause)))
+		return err;
+	if ((err=put_user(mbox_ptr->cmd.diagn, &usr_data->hdr.diagn)))
+		return err;
+	if ((err=put_user(mbox_ptr->cmd.length, &usr_data->hdr.length)))
+		return err;
+	if ((err=put_user(mbox_ptr->cmd.result, &usr_data->hdr.result)))
+		return err;
+	if ((err=put_user(mbox_ptr->cmd.lcn, &usr_data->hdr.lcn)))
+		return err;	
+
+	if (mbox_ptr->cmd.length > 0){
+		if (mbox_ptr->cmd.length > X25_MAX_DATA)
+			return -EINVAL;
+
+		if (copy_to_user(usr_data->data, mbox_ptr->data, mbox_ptr->cmd.length)){
+			printk(KERN_INFO "wansock: Copy failed !!!\n");
+			return -EFAULT;
+		}
+	}
+	return 0;
+} 
+
+/*============================================================
+ *  set_ioctl_cmd
+ *	
+ *	Before command can be execute, socket MBOX must
+ *      be created, and initialized with user data.	
+ *===========================================================*/
+
+static int set_ioctl_cmd (struct sock *sk, void *arg)
+{
+	x25api_t *usr_data = (x25api_t *)arg;
+	mbox_cmd_t *mbox_ptr;
+	int err;
+
+	if (!wp_sk(sk)->mbox) {
+		void *mbox_ptr;
+		struct net_device *dev = dev_get_by_index(sk->sk_bound_dev_if);
+		if (!dev)
+			return -ENODEV;
+
+		dev_put(dev);
+		
+		if ((mbox_ptr = kmalloc(sizeof(mbox_cmd_t), GFP_ATOMIC)) == NULL)
+			return -ENOMEM;
+
+		memset(mbox_ptr, 0, sizeof(mbox_cmd_t));
+		wp_sk(sk)->mbox = mbox_ptr;
+
+		wanpipe_link_driver(dev,sk);
+	}
+
+	mbox_ptr = (mbox_cmd_t*)wp_sk(sk)->mbox;
+	memset(mbox_ptr, 0, sizeof(mbox_cmd_t));
+
+	if (usr_data == NULL){
+		return 0;
+	}
+	if ((err=get_user(mbox_ptr->cmd.qdm, &usr_data->hdr.qdm)))
+		return err;
+	if ((err=get_user(mbox_ptr->cmd.cause, &usr_data->hdr.cause)))
+		return err;
+	if ((err=get_user(mbox_ptr->cmd.diagn, &usr_data->hdr.diagn)))
+		return err;
+	if ((err=get_user(mbox_ptr->cmd.length, &usr_data->hdr.length)))
+		return err;
+	if ((err=get_user(mbox_ptr->cmd.result, &usr_data->hdr.result)))
+		return err;
+
+	if (mbox_ptr->cmd.length > 0){
+		if (mbox_ptr->cmd.length > X25_MAX_DATA)
+			return -EINVAL;
+
+		if (copy_from_user(mbox_ptr->data, usr_data->data, mbox_ptr->cmd.length)){
+			printk(KERN_INFO "Copy failed\n");
+			return -EFAULT;
+		}
+	}
+	return 0;
+}
+
+
+/*======================================================================
+ * wanpipe_poll
+ *
+ *	Datagram poll: Again totally generic. This also handles
+ *	sequenced packet sockets providing the socket receive queue
+ *	is only ever holding data ready to receive.
+ *
+ *	Note: when you _don't_ use this routine for this protocol,
+ *	and you use a different write policy from sock_writeable()
+ *	then please supply your own write_space callback.
+ *=====================================================================*/
+
+unsigned int wanpipe_poll(struct file * file, struct socket *sock, poll_table *wait)
+{
+	struct sock *sk = sock->sk;
+	unsigned int mask;
+
+	++wp_sk(sk)->poll_cnt;
+
+	poll_wait(file, sk->sk_sleep, wait);
+	mask = 0;
+
+	/* exceptional events? */
+	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) {
+		mask |= POLLPRI;
+		return mask;
+	}
+	if (sk->sk_shutdown & RCV_SHUTDOWN)
+		mask |= POLLHUP;
+
+	/* readable? */
+	if (!skb_queue_empty(&sk->sk_receive_queue)) {
+		mask |= POLLIN | POLLRDNORM;
+	}
+
+	/* connection hasn't started yet */
+	if (sk->sk_state == WANSOCK_CONNECTING) {
+		return mask;
+	}
+
+	if (sk->sk_state == WANSOCK_DISCONNECTED) {
+		mask = POLLPRI;
+		return mask;
+	}
+
+	/* This check blocks the user process if there is   
+	 * a packet already queued in the socket write queue.
+         * This option is only for X25API protocol, for other
+         * protocol like chdlc enable streaming mode, 
+         * where multiple packets can be pending in the socket 
+         * transmit queue */
+
+	if (wp_sk(sk)->num == htons(X25_PROT)) {
+		if (atomic_read(&wp_sk(sk)->packet_sent))
+			return mask;
+	}
+
+	/* writable? */
+	if (sock_writeable(sk)){
+		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+	}else{
+		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+	}
+		
+	return mask;
+}
+
+/*======================================================================
+ * wanpipe_listen
+ *
+ *	X25API Specific function. Set a socket into LISTENING  MODE.
+ *=====================================================================*/
+
+
+static int wanpipe_listen(struct socket *sock, int backlog)
+{
+	struct sock *sk = sock->sk;
+
+ 	/* This is x25 specific area if protocol doesn't
+         * match, return error */
+	if (wp_sk(sk)->num != htons(X25_PROT))
+		return -EINVAL;
+
+	if (sk->sk_state == WANSOCK_BIND_LISTEN) {
+
+		sk->sk_max_ack_backlog = backlog;
+		sk->sk_state           = WANSOCK_LISTEN;
+		return 0;
+	}else{
+		printk(KERN_INFO "wansock: Listening sock was not binded\n");
+	}
+
+	return -EINVAL;
+}
+
+/*======================================================================
+ * wanpipe_link_card
+ *
+ *	Connects the listening socket to the driver
+ *=====================================================================*/
+
+static int wanpipe_link_card (struct sock *sk)
+{
+	sdla_t *card = (sdla_t*)wp_sk(sk)->card;
+
+	if (!card)
+		return -ENOMEM;
+
+	if ((card->sk != NULL) || (card->func != NULL)){
+		printk(KERN_INFO "wansock: Listening queue is already established\n");
+		return -EINVAL;
+	}
+
+	card->sk=sk;
+	card->func=wanpipe_listen_rcv;
+	sock_set_flag(sk, SOCK_ZAPPED);
+ 
+	return 0;
+}
+
+/*======================================================================
+ * wanpipe_listen
+ *
+ *	X25API Specific function. Disconnect listening socket from
+ *      the driver.
+ *=====================================================================*/
+
+static void wanpipe_unlink_card (struct sock *sk)
+{
+	sdla_t *card = (sdla_t*)wp_sk(sk)->card; 
+
+	if (card){
+		card->sk=NULL;
+		card->func=NULL;
+	}
+}
+
+/*======================================================================
+ * wanpipe_exec_cmd
+ *
+ *	Ioctl function calls this function to execute user command.
+ *      Connect() sytem call also calls this function to execute
+ *      place call.  This function blocks until command is executed.
+ *=====================================================================*/
+
+static int wanpipe_exec_cmd(struct sock *sk, int cmd, unsigned int flags)
+{
+	int err = -EINVAL;
+	wanpipe_opt *wp = wp_sk(sk);
+	mbox_cmd_t *mbox_ptr = (mbox_cmd_t*)wp->mbox;
+
+	if (!mbox_ptr){
+		printk(KERN_INFO "NO MBOX PTR !!!!!\n");
+		return -EINVAL;
+	}
+	
+	/* This is x25 specific area if protocol doesn't
+         * match, return error */
+	if (wp->num != htons(X25_PROT))
+		return -EINVAL;
+
+
+	switch (cmd){
+
+		case SIOC_WANPIPE_ACCEPT_CALL:
+
+			if (sk->sk_state != WANSOCK_CONNECTING) {
+				err = -EHOSTDOWN;
+				break;
+			}
+			
+			err = execute_command(sk,X25_ACCEPT_CALL,0);
+			if (err < 0)
+				break;
+
+			/* Update. Mar6 2000. 
+                         * Do not set the sock lcn number here, since
+                         * it is done in wanpipe_listen_rcv(). 
+                         */ 
+		 	if (sk->sk_state == WANSOCK_CONNECTED) {
+				wp->lcn = ((mbox_cmd_t*)wp->mbox)->cmd.lcn;	
+				DBG_PRINTK(KERN_INFO "\nwansock: Accept OK %i\n",
+					wp->lcn);
+				err = 0;
+
+			}else{
+				DBG_PRINTK (KERN_INFO "\nwansock: Accept Failed %i\n",
+					wp->lcn);
+				wp->lcn = 0;
+				err = -ECONNREFUSED;
+			}
+			break;
+
+		case SIOC_WANPIPE_CLEAR_CALL:
+
+			if (sk->sk_state == WANSOCK_DISCONNECTED) {
+				err = -EINVAL;
+				break;
+			}
+
+
+			/* Check if data buffers are pending for transmission,
+                         * if so, check whether user wants to wait until data
+                         * is transmitted, or clear a call and drop packets */
+                          
+			if (atomic_read(&sk->sk_wmem_alloc) ||
+			    check_driver_busy(sk)) {
+			  	mbox_cmd_t *mbox = wp->mbox;
+				if (mbox->cmd.qdm & 0x80){
+					mbox->cmd.result = 0x35;
+					err = -EAGAIN;	
+					break;
+				}
+			}
+
+			sk->sk_state = WANSOCK_DISCONNECTING;
+
+			err = execute_command(sk,X25_CLEAR_CALL,0);
+			if (err < 0)
+				break;
+
+			err = -ECONNREFUSED;
+			if (sk->sk_state == WANSOCK_DISCONNECTED) {
+				DBG_PRINTK(KERN_INFO "\nwansock: CLEAR OK %i\n",
+					   wp->lcn);
+				wp->lcn = 0;
+				err = 0;
+			}
+			break;
+
+		case SIOC_WANPIPE_RESET_CALL:
+
+			if (sk->sk_state != WANSOCK_CONNECTED) {
+				err = -EINVAL;
+				break;
+			}
+
+
+			/* Check if data buffers are pending for transmission,
+                         * if so, check whether user wants to wait until data
+                         * is transmitted, or reset a call and drop packets */
+                          
+			if (atomic_read(&sk->sk_wmem_alloc) ||
+			    check_driver_busy(sk)) {
+			  	mbox_cmd_t *mbox = wp->mbox;
+				if (mbox->cmd.qdm & 0x80){
+					mbox->cmd.result = 0x35;
+					err = -EAGAIN;	
+					break;
+				}
+			}
+
+
+			err = execute_command(sk, X25_RESET,0);
+			if (err < 0)
+				break;
+
+			err = mbox_ptr->cmd.result;
+			break;
+
+
+		case X25_PLACE_CALL:
+
+			err=execute_command(sk,X25_PLACE_CALL,flags);
+			if (err < 0)
+				break;
+
+			if (sk->sk_state == WANSOCK_CONNECTED) {
+
+				wp->lcn = ((mbox_cmd_t*)wp->mbox)->cmd.lcn;	
+
+				DBG_PRINTK(KERN_INFO "\nwansock: PLACE CALL OK %i\n",
+					wp->lcn);
+				err = 0;
+
+			} else if (sk->sk_state == WANSOCK_CONNECTING &&
+				   (flags & O_NONBLOCK)) {
+				wp->lcn = ((mbox_cmd_t*)wp->mbox)->cmd.lcn;
+				DBG_PRINTK(KERN_INFO "\nwansock: Place Call OK: Waiting %i\n",
+					wp->lcn);
+
+				err = 0;
+
+			}else{
+				DBG_PRINTK(KERN_INFO "\nwansock: Place call Failed\n");
+				err = -ECONNREFUSED;
+			}
+
+			break;
+
+		default: 
+			return -EINVAL;
+	}
+
+	return err;
+}
+
+static int check_driver_busy (struct sock *sk)
+{
+	struct net_device *dev = dev_get_by_index(sk->sk_bound_dev_if);
+	wanpipe_common_t *chan;
+
+	if (!dev)
+		return 0;
+
+	dev_put(dev);
+
+	if ((chan=dev->priv) == NULL)
+		return 0;
+
+	return atomic_read(&chan->driver_busy);
+}
+
+
+/*======================================================================
+ * wanpipe_accept
+ *
+ *	ACCEPT() System call.	X25API Specific function. 
+ *	For each incoming call, create a new socket and 
+ *      return it to the user.	
+ *=====================================================================*/
+
+static int wanpipe_accept(struct socket *sock, struct socket *newsock, int flags)
+{
+	struct sock *sk;
+	struct sock *newsk;
+	struct sk_buff *skb;
+	DECLARE_WAITQUEUE(wait, current);
+	int err=0;
+
+	if (newsock->sk != NULL){
+		wanpipe_kill_sock_accept(newsock->sk);	
+		newsock->sk=NULL;
+	}
+	
+	if ((sk = sock->sk) == NULL)
+		return -EINVAL;
+
+	if (sk->sk_type != SOCK_RAW)
+		return -EOPNOTSUPP;
+
+	if (sk->sk_state != WANSOCK_LISTEN)
+		return -EINVAL;
+
+	if (wp_sk(sk)->num != htons(X25_PROT))
+		return -EINVAL;
+
+	add_wait_queue(sk->sk_sleep,&wait);
+	current->state = TASK_INTERRUPTIBLE;
+	for (;;){
+		skb = skb_dequeue(&sk->sk_receive_queue);
+		if (skb){
+			err=0;
+			break;
+		}
+		if (signal_pending(current)) {
+			err = -ERESTARTSYS;
+			break;
+		}
+		schedule();
+	}
+	current->state = TASK_RUNNING;
+	remove_wait_queue(sk->sk_sleep,&wait);
+	
+	if (err != 0)
+		return err;
+	
+	newsk = get_newsk_from_skb(skb);
+	if (!newsk){
+		return -EINVAL;
+	}
+
+	set_bit(1,&wanpipe_tx_critical);
+	write_lock(&wanpipe_sklist_lock);
+	sk_add_node(newsk, &wanpipe_sklist);
+	write_unlock(&wanpipe_sklist_lock);
+	clear_bit(1,&wanpipe_tx_critical);
+
+	newsk->sk_socket = newsock;
+	newsk->sk_sleep = &newsock->wait;
+
+	/* Now attach up the new socket */
+	sk->sk_ack_backlog--;
+	newsock->sk = newsk;
+	
+	kfree_skb(skb);
+
+	DBG_PRINTK(KERN_INFO "\nwansock: ACCEPT Got LCN %i\n",
+		   wp_sk(newsk)->lcn);
+	return 0;
+}
+
+/*======================================================================
+ *  get_newsk_from_skb
+ *
+ *	Accept() uses this function to get the address of the new
+ *      socket structure.
+ *=====================================================================*/
+
+struct sock * get_newsk_from_skb (struct sk_buff *skb)
+{
+	struct net_device *dev = skb->dev;
+	wanpipe_common_t *chan;	
+
+	if (!dev){
+		return NULL;
+	}
+		
+	if ((chan = dev->priv) == NULL){
+		return NULL;
+	}
+		
+	if (!chan->sk){
+		return NULL;
+	}
+	return (struct sock *)chan->sk;
+}
+
+/*======================================================================
+ *  wanpipe_connect
+ *
+ *  	CONNECT() System Call. X25API specific function
+ * 	Check the state of the sock, and execute PLACE_CALL command.
+ *      Connect can ether block or return without waiting for connection, 
+ *      if specified by user.
+ *=====================================================================*/
+
+static int wanpipe_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct wan_sockaddr_ll *addr = (struct wan_sockaddr_ll*)uaddr;
+	struct net_device *dev;
+	int err;
+
+	if (wp_sk(sk)->num != htons(X25_PROT))
+		return -EINVAL;
+
+	if (sk->sk_state == WANSOCK_CONNECTED)
+		return -EISCONN;	/* No reconnect on a seqpacket socket */
+
+	if (sk->sk_state != WAN_DISCONNECTED) {
+		printk(KERN_INFO "wansock: Trying to connect on channel NON DISCONNECT\n");
+		return -ECONNREFUSED;
+	}
+
+	sk->sk_state = WANSOCK_DISCONNECTED;	
+	sock->state  = SS_UNCONNECTED;
+
+	if (addr_len != sizeof(struct wan_sockaddr_ll))
+		return -EINVAL;
+
+	if (addr->sll_family != AF_WANPIPE)
+		return -EINVAL;
+
+	if ((dev = dev_get_by_index(sk->sk_bound_dev_if)) == NULL)
+		return -ENETUNREACH;
+
+	dev_put(dev);
+	
+	if (!sock_flag(sk, SOCK_ZAPPED)) /* Must bind first - autobinding does not work */
+		return -EINVAL;
+
+	sock->state   = SS_CONNECTING;
+	sk->sk_state  = WANSOCK_CONNECTING;
+
+	if (!wp_sk(sk)->mbox) {
+		if (wp_sk (sk)->svc)
+			return -EINVAL;
+		else {
+			int err;
+			if ((err=set_ioctl_cmd(sk,NULL)) < 0)
+				return err;
+		}
+	}
+
+	if ((err=wanpipe_exec_cmd(sk, X25_PLACE_CALL,flags)) != 0){
+		sock->state = SS_UNCONNECTED;
+		sk->sk_state = WANSOCK_CONNECTED;
+		return err;
+	}
+
+	if (sk->sk_state != WANSOCK_CONNECTED && (flags & O_NONBLOCK)) {
+		return 0;
+	}
+
+	if (sk->sk_state != WANSOCK_CONNECTED) {
+		sock->state = SS_UNCONNECTED;
+		return -ECONNREFUSED; 
+	}
+
+	sock->state = SS_CONNECTED;
+	return 0;
+}
+
+struct proto_ops wanpipe_ops = {
+	.family = 	PF_WANPIPE,
+	.owner =	THIS_MODULE,
+	.release = 	wanpipe_release,
+	.bind = 	wanpipe_bind,
+	.connect = 	wanpipe_connect,
+	.socketpair = 	sock_no_socketpair,
+	.accept = 	wanpipe_accept,
+	.getname = 	wanpipe_getname, 
+	.poll = 	wanpipe_poll,
+	.ioctl = 	wanpipe_ioctl,
+	.listen = 	wanpipe_listen, 
+	.shutdown = 	sock_no_shutdown,
+	.setsockopt = 	sock_no_setsockopt,
+	.getsockopt = 	sock_no_getsockopt,
+	.sendmsg = 	wanpipe_sendmsg,
+	.recvmsg = 	wanpipe_recvmsg
+};
+
+static struct net_proto_family wanpipe_family_ops = {
+	.family = PF_WANPIPE,
+	.create = wanpipe_create,
+	.owner	= THIS_MODULE,
+};
+
+struct notifier_block wanpipe_netdev_notifier = {
+	.notifier_call = wanpipe_notifier,
+};
+
+
+#ifdef MODULE
+void cleanup_module(void)
+{
+	printk(KERN_INFO "wansock: Cleaning up \n");
+	unregister_netdevice_notifier(&wanpipe_netdev_notifier);
+	sock_unregister(PF_WANPIPE);
+	proto_unregister(&wanpipe_proto);
+}
+
+int init_module(void)
+{
+	int rc;
+
+	printk(KERN_INFO "wansock: Registering Socket \n");
+
+	rc = proto_register(&wanpipe_proto, 0);
+	if (rc != 0)
+		goto out;
+
+	sock_register(&wanpipe_family_ops);
+	register_netdevice_notifier(&wanpipe_netdev_notifier);
+out:
+	return rc;
+}
+#endif
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETPROTO(PF_WANPIPE);
diff --git a/net/wanrouter/patchlevel b/net/wanrouter/patchlevel
new file mode 100644
index 00000000000..c043eea7767
--- /dev/null
+++ b/net/wanrouter/patchlevel
@@ -0,0 +1 @@
+2.2.1
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
new file mode 100644
index 00000000000..956c17f6c54
--- /dev/null
+++ b/net/wanrouter/wanmain.c
@@ -0,0 +1,888 @@
+/*****************************************************************************
+* wanmain.c	WAN Multiprotocol Router Module. Main code.
+*
+*		This module is completely hardware-independent and provides
+*		the following common services for the WAN Link Drivers:
+*		 o WAN device managenment (registering, unregistering)
+*		 o Network interface management
+*		 o Physical connection management (dial-up, incoming calls)
+*		 o Logical connection management (switched virtual circuits)
+*		 o Protocol encapsulation/decapsulation
+*
+* Author:	Gideon Hack
+*
+* Copyright:	(c) 1995-1999 Sangoma Technologies Inc.
+*
+*		This program is free software; you can redistribute it and/or
+*		modify it under the terms of the GNU General Public License
+*		as published by the Free Software Foundation; either version
+*		2 of the License, or (at your option) any later version.
+* ============================================================================
+* Nov 24, 2000  Nenad Corbic	Updated for 2.4.X kernels
+* Nov 07, 2000  Nenad Corbic	Fixed the Mulit-Port PPP for kernels 2.2.16 and
+*  				greater.
+* Aug 2,  2000  Nenad Corbic	Block the Multi-Port PPP from running on
+*  			        kernels 2.2.16 or greater.  The SyncPPP
+*  			        has changed.
+* Jul 13, 2000  Nenad Corbic	Added SyncPPP support
+* 				Added extra debugging in device_setup().
+* Oct 01, 1999  Gideon Hack     Update for s514 PCI card
+* Dec 27, 1996	Gene Kozin	Initial version (based on Sangoma's WANPIPE)
+* Jan 16, 1997	Gene Kozin	router_devlist made public
+* Jan 31, 1997  Alan Cox	Hacked it about a bit for 2.1
+* Jun 27, 1997  Alan Cox	realigned with vendor code
+* Oct 15, 1997  Farhan Thawar   changed wan_encapsulate to add a pad byte of 0
+* Apr 20, 1998	Alan Cox	Fixed 2.1 symbols
+* May 17, 1998  K. Baranowski	Fixed SNAP encapsulation in wan_encapsulate
+* Dec 15, 1998  Arnaldo Melo    support for firmwares of up to 128000 bytes
+*                               check wandev->setup return value
+* Dec 22, 1998  Arnaldo Melo    vmalloc/vfree used in device_setup to allocate
+*                               kernel memory and copy configuration data to
+*                               kernel space (for big firmwares)
+* Jun 02, 1999  Gideon Hack	Updates for Linux 2.0.X and 2.2.X kernels.
+*****************************************************************************/
+
+#include <linux/config.h>
+#include <linux/stddef.h>	/* offsetof(), etc. */
+#include <linux/errno.h>	/* return codes */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>	/* support for loadable modules */
+#include <linux/slab.h>	/* kmalloc(), kfree() */
+#include <linux/mm.h>		/* verify_area(), etc. */
+#include <linux/string.h>	/* inline mem*, str* functions */
+
+#include <asm/byteorder.h>	/* htons(), etc. */
+#include <linux/wanrouter.h>	/* WAN router API definitions */
+
+#include <linux/vmalloc.h>	/* vmalloc, vfree */
+#include <asm/uaccess.h>        /* copy_to/from_user */
+#include <linux/init.h>         /* __initfunc et al. */
+#include <net/syncppp.h>
+
+#define KMEM_SAFETYZONE 8
+
+/***********FOR DEBUGGING PURPOSES*********************************************
+static void * dbg_kmalloc(unsigned int size, int prio, int line) {
+	int i = 0;
+	void * v = kmalloc(size+sizeof(unsigned int)+2*KMEM_SAFETYZONE*8,prio);
+	char * c1 = v;
+	c1 += sizeof(unsigned int);
+	*((unsigned int *)v) = size;
+
+	for (i = 0; i < KMEM_SAFETYZONE; i++) {
+		c1[0] = 'D'; c1[1] = 'E'; c1[2] = 'A'; c1[3] = 'D';
+		c1[4] = 'B'; c1[5] = 'E'; c1[6] = 'E'; c1[7] = 'F';
+		c1 += 8;
+	}
+	c1 += size;
+	for (i = 0; i < KMEM_SAFETYZONE; i++) {
+		c1[0] = 'M'; c1[1] = 'U'; c1[2] = 'N'; c1[3] = 'G';
+		c1[4] = 'W'; c1[5] = 'A'; c1[6] = 'L'; c1[7] = 'L';
+		c1 += 8;
+	}
+	v = ((char *)v) + sizeof(unsigned int) + KMEM_SAFETYZONE*8;
+	printk(KERN_INFO "line %d  kmalloc(%d,%d) = %p\n",line,size,prio,v);
+	return v;
+}
+static void dbg_kfree(void * v, int line) {
+	unsigned int * sp = (unsigned int *)(((char *)v) - (sizeof(unsigned int) + KMEM_SAFETYZONE*8));
+	unsigned int size = *sp;
+	char * c1 = ((char *)v) - KMEM_SAFETYZONE*8;
+	int i = 0;
+	for (i = 0; i < KMEM_SAFETYZONE; i++) {
+		if (   c1[0] != 'D' || c1[1] != 'E' || c1[2] != 'A' || c1[3] != 'D'
+		    || c1[4] != 'B' || c1[5] != 'E' || c1[6] != 'E' || c1[7] != 'F') {
+			printk(KERN_INFO "kmalloced block at %p has been corrupted (underrun)!\n",v);
+			printk(KERN_INFO " %4x: %2x %2x %2x %2x %2x %2x %2x %2x\n", i*8,
+			                c1[0],c1[1],c1[2],c1[3],c1[4],c1[5],c1[6],c1[7] );
+		}
+		c1 += 8;
+	}
+	c1 += size;
+	for (i = 0; i < KMEM_SAFETYZONE; i++) {
+		if (   c1[0] != 'M' || c1[1] != 'U' || c1[2] != 'N' || c1[3] != 'G'
+		    || c1[4] != 'W' || c1[5] != 'A' || c1[6] != 'L' || c1[7] != 'L'
+		   ) {
+			printk(KERN_INFO "kmalloced block at %p has been corrupted (overrun):\n",v);
+			printk(KERN_INFO " %4x: %2x %2x %2x %2x %2x %2x %2x %2x\n", i*8,
+			                c1[0],c1[1],c1[2],c1[3],c1[4],c1[5],c1[6],c1[7] );
+		}
+		c1 += 8;
+	}
+	printk(KERN_INFO "line %d  kfree(%p)\n",line,v);
+	v = ((char *)v) - (sizeof(unsigned int) + KMEM_SAFETYZONE*8);
+	kfree(v);
+}
+
+#define kmalloc(x,y) dbg_kmalloc(x,y,__LINE__)
+#define kfree(x) dbg_kfree(x,__LINE__)
+*****************************************************************************/
+
+/*
+ * 	Function Prototypes
+ */
+
+/*
+ *	WAN device IOCTL handlers
+ */
+
+static int wanrouter_device_setup(struct wan_device *wandev,
+				  wandev_conf_t __user *u_conf);
+static int wanrouter_device_stat(struct wan_device *wandev,
+				 wandev_stat_t __user *u_stat);
+static int wanrouter_device_shutdown(struct wan_device *wandev);
+static int wanrouter_device_new_if(struct wan_device *wandev,
+				   wanif_conf_t __user *u_conf);
+static int wanrouter_device_del_if(struct wan_device *wandev,
+				   char __user *u_name);
+
+/*
+ *	Miscellaneous
+ */
+
+static struct wan_device *wanrouter_find_device(char *name);
+static int wanrouter_delete_interface(struct wan_device *wandev, char *name);
+void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags);
+void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags);
+
+
+
+/*
+ *	Global Data
+ */
+
+static char wanrouter_fullname[]  = "Sangoma WANPIPE Router";
+static char wanrouter_copyright[] = "(c) 1995-2000 Sangoma Technologies Inc.";
+static char wanrouter_modname[] = ROUTER_NAME; /* short module name */
+struct wan_device* wanrouter_router_devlist; /* list of registered devices */
+
+/*
+ *	Organize Unique Identifiers for encapsulation/decapsulation
+ */
+
+static unsigned char wanrouter_oui_ether[] = { 0x00, 0x00, 0x00 };
+#if 0
+static unsigned char wanrouter_oui_802_2[] = { 0x00, 0x80, 0xC2 };
+#endif
+
+static int __init wanrouter_init(void)
+{
+	int err;
+
+	printk(KERN_INFO "%s v%u.%u %s\n",
+	       wanrouter_fullname, ROUTER_VERSION, ROUTER_RELEASE,
+	       wanrouter_copyright);
+
+	err = wanrouter_proc_init();
+	if (err)
+		printk(KERN_INFO "%s: can't create entry in proc filesystem!\n",
+		       wanrouter_modname);
+
+	return err;
+}
+
+static void __exit wanrouter_cleanup (void)
+{
+	wanrouter_proc_cleanup();
+}
+
+/*
+ * This is just plain dumb.  We should move the bugger to drivers/net/wan,
+ * slap it first in directory and make it module_init().  The only reason
+ * for subsys_initcall() here is that net goes after drivers (why, BTW?)
+ */
+subsys_initcall(wanrouter_init);
+module_exit(wanrouter_cleanup);
+
+/*
+ * 	Kernel APIs
+ */
+
+/*
+ * 	Register WAN device.
+ * 	o verify device credentials
+ * 	o create an entry for the device in the /proc/net/router directory
+ * 	o initialize internally maintained fields of the wan_device structure
+ * 	o link device data space to a singly-linked list
+ * 	o if it's the first device, then start kernel 'thread'
+ * 	o increment module use count
+ *
+ * 	Return:
+ *	0	Ok
+ *	< 0	error.
+ *
+ * 	Context:	process
+ */
+
+
+int register_wan_device(struct wan_device *wandev)
+{
+	int err, namelen;
+
+	if ((wandev == NULL) || (wandev->magic != ROUTER_MAGIC) ||
+	    (wandev->name == NULL))
+		return -EINVAL;
+
+	namelen = strlen(wandev->name);
+	if (!namelen || (namelen > WAN_DRVNAME_SZ))
+		return -EINVAL;
+
+	if (wanrouter_find_device(wandev->name))
+		return -EEXIST;
+
+#ifdef WANDEBUG
+	printk(KERN_INFO "%s: registering WAN device %s\n",
+	       wanrouter_modname, wandev->name);
+#endif
+
+	/*
+	 *	Register /proc directory entry
+	 */
+	err = wanrouter_proc_add(wandev);
+	if (err) {
+		printk(KERN_INFO
+			"%s: can't create /proc/net/router/%s entry!\n",
+			wanrouter_modname, wandev->name);
+		return err;
+	}
+
+	/*
+	 *	Initialize fields of the wan_device structure maintained by the
+	 *	router and update local data.
+	 */
+
+	wandev->ndev = 0;
+	wandev->dev  = NULL;
+	wandev->next = wanrouter_router_devlist;
+	wanrouter_router_devlist = wandev;
+	return 0;
+}
+
+/*
+ *	Unregister WAN device.
+ *	o shut down device
+ *	o unlink device data space from the linked list
+ *	o delete device entry in the /proc/net/router directory
+ *	o decrement module use count
+ *
+ *	Return:		0	Ok
+ *			<0	error.
+ *	Context:	process
+ */
+
+
+int unregister_wan_device(char *name)
+{
+	struct wan_device *wandev, *prev;
+
+	if (name == NULL)
+		return -EINVAL;
+
+	for (wandev = wanrouter_router_devlist, prev = NULL;
+		wandev && strcmp(wandev->name, name);
+		prev = wandev, wandev = wandev->next)
+		;
+	if (wandev == NULL)
+		return -ENODEV;
+
+#ifdef WANDEBUG
+	printk(KERN_INFO "%s: unregistering WAN device %s\n",
+	       wanrouter_modname, name);
+#endif
+
+	if (wandev->state != WAN_UNCONFIGURED)
+		wanrouter_device_shutdown(wandev);
+
+	if (prev)
+		prev->next = wandev->next;
+	else
+		wanrouter_router_devlist = wandev->next;
+
+	wanrouter_proc_delete(wandev);
+	return 0;
+}
+
+/*
+ *	Encapsulate packet.
+ *
+ *	Return:	encapsulation header size
+ *		< 0	- unsupported Ethertype
+ *
+ *	Notes:
+ *	1. This function may be called on interrupt context.
+ */
+
+
+int wanrouter_encapsulate(struct sk_buff *skb, struct net_device *dev,
+			  unsigned short type)
+{
+	int hdr_len = 0;
+
+	switch (type) {
+	case ETH_P_IP:		/* IP datagram encapsulation */
+		hdr_len += 1;
+		skb_push(skb, 1);
+		skb->data[0] = NLPID_IP;
+		break;
+
+	case ETH_P_IPX:		/* SNAP encapsulation */
+	case ETH_P_ARP:
+		hdr_len += 7;
+		skb_push(skb, 7);
+		skb->data[0] = 0;
+		skb->data[1] = NLPID_SNAP;
+		memcpy(&skb->data[2], wanrouter_oui_ether,
+		       sizeof(wanrouter_oui_ether));
+		*((unsigned short*)&skb->data[5]) = htons(type);
+		break;
+
+	default:		/* Unknown packet type */
+		printk(KERN_INFO
+			"%s: unsupported Ethertype 0x%04X on interface %s!\n",
+			wanrouter_modname, type, dev->name);
+		hdr_len = -EINVAL;
+	}
+	return hdr_len;
+}
+
+
+/*
+ *	Decapsulate packet.
+ *
+ *	Return:	Ethertype (in network order)
+ *			0	unknown encapsulation
+ *
+ *	Notes:
+ *	1. This function may be called on interrupt context.
+ */
+
+
+unsigned short wanrouter_type_trans(struct sk_buff *skb, struct net_device *dev)
+{
+	int cnt = skb->data[0] ? 0 : 1;	/* there may be a pad present */
+	unsigned short ethertype;
+
+	switch (skb->data[cnt]) {
+	case NLPID_IP:		/* IP datagramm */
+		ethertype = htons(ETH_P_IP);
+		cnt += 1;
+		break;
+
+        case NLPID_SNAP:	/* SNAP encapsulation */
+		if (memcmp(&skb->data[cnt + 1], wanrouter_oui_ether,
+			   sizeof(wanrouter_oui_ether))){
+          		printk(KERN_INFO
+				"%s: unsupported SNAP OUI %02X-%02X-%02X "
+				"on interface %s!\n", wanrouter_modname,
+				skb->data[cnt+1], skb->data[cnt+2],
+				skb->data[cnt+3], dev->name);
+			return 0;
+		}
+		ethertype = *((unsigned short*)&skb->data[cnt+4]);
+		cnt += 6;
+		break;
+
+	/* add other protocols, e.g. CLNP, ESIS, ISIS, if needed */
+
+	default:
+		printk(KERN_INFO
+			"%s: unsupported NLPID 0x%02X on interface %s!\n",
+			wanrouter_modname, skb->data[cnt], dev->name);
+		return 0;
+	}
+	skb->protocol = ethertype;
+	skb->pkt_type = PACKET_HOST;	/*	Physically point to point */
+	skb_pull(skb, cnt);
+	skb->mac.raw  = skb->data;
+	return ethertype;
+}
+
+
+/*
+ *	WAN device IOCTL.
+ *	o find WAN device associated with this node
+ *	o execute requested action or pass command to the device driver
+ */
+
+int wanrouter_ioctl(struct inode *inode, struct file *file,
+		unsigned int cmd, unsigned long arg)
+{
+	int err = 0;
+	struct proc_dir_entry *dent;
+	struct wan_device *wandev;
+	void __user *data = (void __user *)arg;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if ((cmd >> 8) != ROUTER_IOCTL)
+		return -EINVAL;
+
+	dent = PDE(inode);
+	if ((dent == NULL) || (dent->data == NULL))
+		return -EINVAL;
+
+	wandev = dent->data;
+	if (wandev->magic != ROUTER_MAGIC)
+		return -EINVAL;
+
+	switch (cmd) {
+	case ROUTER_SETUP:
+		err = wanrouter_device_setup(wandev, data);
+		break;
+
+	case ROUTER_DOWN:
+		err = wanrouter_device_shutdown(wandev);
+		break;
+
+	case ROUTER_STAT:
+		err = wanrouter_device_stat(wandev, data);
+		break;
+
+	case ROUTER_IFNEW:
+		err = wanrouter_device_new_if(wandev, data);
+		break;
+
+	case ROUTER_IFDEL:
+		err = wanrouter_device_del_if(wandev, data);
+		break;
+
+	case ROUTER_IFSTAT:
+		break;
+
+	default:
+		if ((cmd >= ROUTER_USER) &&
+		    (cmd <= ROUTER_USER_MAX) &&
+		    wandev->ioctl)
+			err = wandev->ioctl(wandev, cmd, arg);
+		else err = -EINVAL;
+	}
+	return err;
+}
+
+/*
+ *	WAN Driver IOCTL Handlers
+ */
+
+/*
+ *	Setup WAN link device.
+ *	o verify user address space
+ *	o allocate kernel memory and copy configuration data to kernel space
+ *	o if configuration data includes extension, copy it to kernel space too
+ *	o call driver's setup() entry point
+ */
+
+static int wanrouter_device_setup(struct wan_device *wandev,
+				  wandev_conf_t __user *u_conf)
+{
+	void *data = NULL;
+	wandev_conf_t *conf;
+	int err = -EINVAL;
+
+	if (wandev->setup == NULL) {	/* Nothing to do ? */
+		printk(KERN_INFO "%s: ERROR, No setup script: wandev->setup()\n",
+				wandev->name);
+		return 0;
+	}
+
+	conf = kmalloc(sizeof(wandev_conf_t), GFP_KERNEL);
+	if (conf == NULL){
+		printk(KERN_INFO "%s: ERROR, Failed to allocate kernel memory !\n",
+				wandev->name);
+		return -ENOBUFS;
+	}
+
+	if (copy_from_user(conf, u_conf, sizeof(wandev_conf_t))) {
+		printk(KERN_INFO "%s: Failed to copy user config data to kernel space!\n",
+				wandev->name);
+		kfree(conf);
+		return -EFAULT;
+	}
+
+	if (conf->magic != ROUTER_MAGIC) {
+		kfree(conf);
+		printk(KERN_INFO "%s: ERROR, Invalid MAGIC Number\n",
+				wandev->name);
+	        return -EINVAL;
+	}
+
+	if (conf->data_size && conf->data) {
+		if (conf->data_size > 128000 || conf->data_size < 0) {
+			printk(KERN_INFO
+			    "%s: ERROR, Invalid firmware data size %i !\n",
+					wandev->name, conf->data_size);
+			kfree(conf);
+		        return -EINVAL;
+		}
+
+		data = vmalloc(conf->data_size);
+		if (!data) {
+			printk(KERN_INFO
+			 	"%s: ERROR, Faild allocate kernel memory !\n",
+				wandev->name);
+			kfree(conf);
+			return -ENOBUFS;
+		}
+		if (!copy_from_user(data, conf->data, conf->data_size)) {
+			conf->data = data;
+			err = wandev->setup(wandev, conf);
+		} else {
+			printk(KERN_INFO
+			     "%s: ERROR, Faild to copy from user data !\n",
+			       wandev->name);
+			err = -EFAULT;
+		}
+		vfree(data);
+	} else {
+		printk(KERN_INFO
+		    "%s: ERROR, No firmware found ! Firmware size = %i !\n",
+				wandev->name, conf->data_size);
+	}
+
+	kfree(conf);
+	return err;
+}
+
+/*
+ *	Shutdown WAN device.
+ *	o delete all not opened logical channels for this device
+ *	o call driver's shutdown() entry point
+ */
+
+static int wanrouter_device_shutdown(struct wan_device *wandev)
+{
+	struct net_device *dev;
+	int err=0;
+
+	if (wandev->state == WAN_UNCONFIGURED)
+		return 0;
+
+	printk(KERN_INFO "\n%s: Shutting Down!\n",wandev->name);
+
+	for (dev = wandev->dev; dev;) {
+		err = wanrouter_delete_interface(wandev, dev->name);
+		if (err)
+			return err;
+		/* The above function deallocates the current dev
+		 * structure. Therefore, we cannot use dev->priv
+		 * as the next element: wandev->dev points to the
+		 * next element */
+		dev = wandev->dev;
+	}
+
+	if (wandev->ndev)
+		return -EBUSY;	/* there are opened interfaces  */
+
+	if (wandev->shutdown)
+		err=wandev->shutdown(wandev);
+
+	return err;
+}
+
+/*
+ *	Get WAN device status & statistics.
+ */
+
+static int wanrouter_device_stat(struct wan_device *wandev,
+				 wandev_stat_t __user *u_stat)
+{
+	wandev_stat_t stat;
+
+	memset(&stat, 0, sizeof(stat));
+
+	/* Ask device driver to update device statistics */
+	if ((wandev->state != WAN_UNCONFIGURED) && wandev->update)
+		wandev->update(wandev);
+
+	/* Fill out structure */
+	stat.ndev  = wandev->ndev;
+	stat.state = wandev->state;
+
+	if (copy_to_user(u_stat, &stat, sizeof(stat)))
+		return -EFAULT;
+
+	return 0;
+}
+
+/*
+ *	Create new WAN interface.
+ *	o verify user address space
+ *	o copy configuration data to kernel address space
+ *	o allocate network interface data space
+ *	o call driver's new_if() entry point
+ *	o make sure there is no interface name conflict
+ *	o register network interface
+ */
+
+static int wanrouter_device_new_if(struct wan_device *wandev,
+				   wanif_conf_t __user *u_conf)
+{
+	wanif_conf_t *cnf;
+	struct net_device *dev = NULL;
+#ifdef CONFIG_WANPIPE_MULTPPP
+	struct ppp_device *pppdev=NULL;
+#endif
+	int err;
+
+	if ((wandev->state == WAN_UNCONFIGURED) || (wandev->new_if == NULL))
+		return -ENODEV;
+
+	cnf = kmalloc(sizeof(wanif_conf_t), GFP_KERNEL);
+	if (!cnf)
+		return -ENOBUFS;
+
+	err = -EFAULT;
+	if (copy_from_user(cnf, u_conf, sizeof(wanif_conf_t)))
+		goto out;
+
+	err = -EINVAL;
+	if (cnf->magic != ROUTER_MAGIC)
+		goto out;
+
+	if (cnf->config_id == WANCONFIG_MPPP) {
+#ifdef CONFIG_WANPIPE_MULTPPP
+		pppdev = kmalloc(sizeof(struct ppp_device), GFP_KERNEL);
+		err = -ENOBUFS;
+		if (pppdev == NULL)
+			goto out;
+		memset(pppdev, 0, sizeof(struct ppp_device));
+		pppdev->dev = kmalloc(sizeof(struct net_device), GFP_KERNEL);
+		if (pppdev->dev == NULL) {
+			kfree(pppdev);
+			err = -ENOBUFS;
+			goto out;
+		}
+		memset(pppdev->dev, 0, sizeof(struct net_device));
+		err = wandev->new_if(wandev, (struct net_device *)pppdev, cnf);
+		dev = pppdev->dev;
+#else
+		printk(KERN_INFO "%s: Wanpipe Mulit-Port PPP support has not been compiled in!\n",
+				wandev->name);
+		err = -EPROTONOSUPPORT;
+		goto out;
+#endif
+	} else {
+		dev = kmalloc(sizeof(struct net_device), GFP_KERNEL);
+		err = -ENOBUFS;
+		if (dev == NULL)
+			goto out;
+		memset(dev, 0, sizeof(struct net_device));
+		err = wandev->new_if(wandev, dev, cnf);
+	}
+
+	if (!err) {
+		/* Register network interface. This will invoke init()
+		 * function supplied by the driver.  If device registered
+		 * successfully, add it to the interface list.
+		 */
+
+		if (dev->name == NULL) {
+			err = -EINVAL;
+		} else {
+
+			#ifdef WANDEBUG
+			printk(KERN_INFO "%s: registering interface %s...\n",
+				wanrouter_modname, dev->name);
+			#endif
+
+			err = register_netdev(dev);
+			if (!err) {
+				struct net_device *slave = NULL;
+				unsigned long smp_flags=0;
+
+				lock_adapter_irq(&wandev->lock, &smp_flags);
+
+				if (wandev->dev == NULL) {
+					wandev->dev = dev;
+				} else {
+					for (slave=wandev->dev;
+					 *((struct net_device **)slave->priv);
+				 slave = *((struct net_device **)slave->priv));
+
+				     *((struct net_device **)slave->priv) = dev;
+				}
+				++wandev->ndev;
+
+				unlock_adapter_irq(&wandev->lock, &smp_flags);
+				err = 0;	/* done !!! */
+				goto out;
+			}
+		}
+		if (wandev->del_if)
+			wandev->del_if(wandev, dev);
+	}
+
+	/* This code has moved from del_if() function */
+	if (dev->priv) {
+		kfree(dev->priv);
+		dev->priv = NULL;
+	}
+
+#ifdef CONFIG_WANPIPE_MULTPPP
+	if (cnf->config_id == WANCONFIG_MPPP)
+		kfree(pppdev);
+	else
+		kfree(dev);
+#else
+	/* Sync PPP is disabled */
+	if (cnf->config_id != WANCONFIG_MPPP)
+		kfree(dev);
+#endif
+
+out:
+	kfree(cnf);
+	return err;
+}
+
+
+/*
+ *	Delete WAN logical channel.
+ *	 o verify user address space
+ *	 o copy configuration data to kernel address space
+ */
+
+static int wanrouter_device_del_if(struct wan_device *wandev, char __user *u_name)
+{
+	char name[WAN_IFNAME_SZ + 1];
+        int err = 0;
+
+	if (wandev->state == WAN_UNCONFIGURED)
+		return -ENODEV;
+
+	memset(name, 0, sizeof(name));
+
+	if (copy_from_user(name, u_name, WAN_IFNAME_SZ))
+		return -EFAULT;
+
+	err = wanrouter_delete_interface(wandev, name);
+	if (err)
+		return err;
+
+	/* If last interface being deleted, shutdown card
+	 * This helps with administration at leaf nodes
+	 * (You can tell if the person at the other end of the phone
+	 * has an interface configured) and avoids DoS vulnerabilities
+	 * in binary driver files - this fixes a problem with the current
+	 * Sangoma driver going into strange states when all the network
+	 * interfaces are deleted and the link irrecoverably disconnected.
+	 */
+
+        if (!wandev->ndev && wandev->shutdown)
+                err = wandev->shutdown(wandev);
+
+	return err;
+}
+
+/*
+ *	Miscellaneous Functions
+ */
+
+/*
+ *	Find WAN device by name.
+ *	Return pointer to the WAN device data space or NULL if device not found.
+ */
+
+static struct wan_device *wanrouter_find_device(char *name)
+{
+	struct wan_device *wandev;
+
+	for (wandev = wanrouter_router_devlist;
+	     wandev && strcmp(wandev->name, name);
+		wandev = wandev->next);
+	return wandev;
+}
+
+/*
+ *	Delete WAN logical channel identified by its name.
+ *	o find logical channel by its name
+ *	o call driver's del_if() entry point
+ *	o unregister network interface
+ *	o unlink channel data space from linked list of channels
+ *	o release channel data space
+ *
+ *	Return:	0		success
+ *		-ENODEV		channel not found.
+ *		-EBUSY		interface is open
+ *
+ *	Note: If (force != 0), then device will be destroyed even if interface
+ *	associated with it is open. It's caller's responsibility to make
+ *	sure that opened interfaces are not removed!
+ */
+
+static int wanrouter_delete_interface(struct wan_device *wandev, char *name)
+{
+	struct net_device *dev = NULL, *prev = NULL;
+	unsigned long smp_flags=0;
+
+	lock_adapter_irq(&wandev->lock, &smp_flags);
+	dev = wandev->dev;
+	prev = NULL;
+	while (dev && strcmp(name, dev->name)) {
+		struct net_device **slave = dev->priv;
+		prev = dev;
+		dev = *slave;
+	}
+	unlock_adapter_irq(&wandev->lock, &smp_flags);
+
+	if (dev == NULL)
+		return -ENODEV;	/* interface not found */
+
+	if (netif_running(dev))
+		return -EBUSY;	/* interface in use */
+
+	if (wandev->del_if)
+		wandev->del_if(wandev, dev);
+
+	lock_adapter_irq(&wandev->lock, &smp_flags);
+	if (prev) {
+		struct net_device **prev_slave = prev->priv;
+		struct net_device **slave = dev->priv;
+
+		*prev_slave = *slave;
+	} else {
+		struct net_device **slave = dev->priv;
+		wandev->dev = *slave;
+	}
+	--wandev->ndev;
+	unlock_adapter_irq(&wandev->lock, &smp_flags);
+
+	printk(KERN_INFO "%s: unregistering '%s'\n", wandev->name, dev->name);
+
+	/* Due to new interface linking method using dev->priv,
+	 * this code has moved from del_if() function.*/
+	if (dev->priv){
+		kfree(dev->priv);
+		dev->priv=NULL;
+	}
+
+	unregister_netdev(dev);
+
+	free_netdev(dev);
+
+	return 0;
+}
+
+void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags)
+{
+       	spin_lock_irqsave(lock, *smp_flags);
+}
+
+
+void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags)
+{
+	spin_unlock_irqrestore(lock, *smp_flags);
+}
+
+EXPORT_SYMBOL(register_wan_device);
+EXPORT_SYMBOL(unregister_wan_device);
+EXPORT_SYMBOL(wanrouter_encapsulate);
+EXPORT_SYMBOL(wanrouter_type_trans);
+EXPORT_SYMBOL(lock_adapter_irq);
+EXPORT_SYMBOL(unlock_adapter_irq);
+
+MODULE_LICENSE("GPL");
+
+/*
+ *	End
+ */
diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c
new file mode 100644
index 00000000000..c28ba5a4720
--- /dev/null
+++ b/net/wanrouter/wanproc.c
@@ -0,0 +1,381 @@
+/*****************************************************************************
+* wanproc.c	WAN Router Module. /proc filesystem interface.
+*
+*		This module is completely hardware-independent and provides
+*		access to the router using Linux /proc filesystem.
+*
+* Author: 	Gideon Hack
+*
+* Copyright:	(c) 1995-1999 Sangoma Technologies Inc.
+*
+*		This program is free software; you can redistribute it and/or
+*		modify it under the terms of the GNU General Public License
+*		as published by the Free Software Foundation; either version
+*		2 of the License, or (at your option) any later version.
+* ============================================================================
+* Jun 02, 1999  Gideon Hack	Updates for Linux 2.2.X kernels.
+* Jun 29, 1997	Alan Cox	Merged with 1.0.3 vendor code
+* Jan 29, 1997	Gene Kozin	v1.0.1. Implemented /proc read routines
+* Jan 30, 1997	Alan Cox	Hacked around for 2.1
+* Dec 13, 1996	Gene Kozin	Initial version (based on Sangoma's WANPIPE)
+*****************************************************************************/
+
+#include <linux/config.h>
+#include <linux/init.h>		/* __initfunc et al. */
+#include <linux/stddef.h>	/* offsetof(), etc. */
+#include <linux/errno.h>	/* return codes */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/wanrouter.h>	/* WAN router API definitions */
+#include <linux/seq_file.h>
+#include <linux/smp_lock.h>
+
+#include <asm/io.h>
+
+#define PROC_STATS_FORMAT "%30s: %12lu\n"
+
+/****** Defines and Macros **************************************************/
+
+#define PROT_DECODE(prot) ((prot == WANCONFIG_FR) ? " FR" :\
+			      (prot == WANCONFIG_X25) ? " X25" : \
+			         (prot == WANCONFIG_PPP) ? " PPP" : \
+				    (prot == WANCONFIG_CHDLC) ? " CHDLC": \
+				       (prot == WANCONFIG_MPPP) ? " MPPP" : \
+				           " Unknown" )
+
+/****** Function Prototypes *************************************************/
+
+#ifdef CONFIG_PROC_FS
+
+/* Miscellaneous */
+
+/*
+ *	Structures for interfacing with the /proc filesystem.
+ *	Router creates its own directory /proc/net/router with the folowing
+ *	entries:
+ *	config		device configuration
+ *	status		global device statistics
+ *	<device>	entry for each WAN device
+ */
+
+/*
+ *	Generic /proc/net/router/<file> file and inode operations
+ */
+
+/*
+ *	/proc/net/router
+ */
+
+static struct proc_dir_entry *proc_router;
+
+/* Strings */
+
+/*
+ *	Interface functions
+ */
+
+/****** Proc filesystem entry points ****************************************/
+
+/*
+ *	Iterator
+ */
+static void *r_start(struct seq_file *m, loff_t *pos)
+{
+	struct wan_device *wandev;
+	loff_t l = *pos;
+
+	lock_kernel();
+	if (!l--)
+		return SEQ_START_TOKEN;
+	for (wandev = wanrouter_router_devlist; l-- && wandev;
+	     wandev = wandev->next)
+		;
+	return wandev;
+}
+
+static void *r_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct wan_device *wandev = v;
+	(*pos)++;
+	return (v == SEQ_START_TOKEN) ? wanrouter_router_devlist : wandev->next;
+}
+
+static void r_stop(struct seq_file *m, void *v)
+{
+	unlock_kernel();
+}
+
+static int config_show(struct seq_file *m, void *v)
+{
+	struct wan_device *p = v;
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(m, "Device name    | port |IRQ|DMA|  mem.addr  |"
+			    "mem.size|option1|option2|option3|option4\n");
+		return 0;
+	}
+	if (!p->state)
+		return 0;
+	seq_printf(m, "%-15s|0x%-4X|%3u|%3u| 0x%-8lX |0x%-6X|%7u|%7u|%7u|%7u\n",
+			p->name, p->ioport, p->irq, p->dma, p->maddr, p->msize,
+			p->hw_opt[0], p->hw_opt[1], p->hw_opt[2], p->hw_opt[3]);
+	return 0;
+}
+
+static int status_show(struct seq_file *m, void *v)
+{
+	struct wan_device *p = v;
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(m, "Device name    |protocol|station|interface|"
+			    "clocking|baud rate| MTU |ndev|link state\n");
+		return 0;
+	}
+	if (!p->state)
+		return 0;
+	seq_printf(m, "%-15s|%-8s| %-7s| %-9s|%-8s|%9u|%5u|%3u |",
+		p->name,
+		PROT_DECODE(p->config_id),
+		p->config_id == WANCONFIG_FR ?
+			(p->station ? "Node" : "CPE") :
+			(p->config_id == WANCONFIG_X25 ?
+			(p->station ? "DCE" : "DTE") :
+			("N/A")),
+		p->interface ? "V.35" : "RS-232",
+		p->clocking ? "internal" : "external",
+		p->bps,
+		p->mtu,
+		p->ndev);
+
+	switch (p->state) {
+	case WAN_UNCONFIGURED:
+		seq_printf(m, "%-12s\n", "unconfigured");
+		break;
+	case WAN_DISCONNECTED:
+		seq_printf(m, "%-12s\n", "disconnected");
+		break;
+	case WAN_CONNECTING:
+		seq_printf(m, "%-12s\n", "connecting");
+		break;
+	case WAN_CONNECTED:
+		seq_printf(m, "%-12s\n", "connected");
+		break;
+	default:
+		seq_printf(m, "%-12s\n", "invalid");
+		break;
+	}
+	return 0;
+}
+
+static struct seq_operations config_op = {
+	.start	= r_start,
+	.next	= r_next,
+	.stop	= r_stop,
+	.show	= config_show,
+};
+
+static struct seq_operations status_op = {
+	.start	= r_start,
+	.next	= r_next,
+	.stop	= r_stop,
+	.show	= status_show,
+};
+
+static int config_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &config_op);
+}
+
+static int status_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &status_op);
+}
+
+static struct file_operations config_fops = {
+	.owner	 = THIS_MODULE,
+	.open	 = config_open,
+	.read	 = seq_read,
+	.llseek	 = seq_lseek,
+	.release = seq_release,
+};
+
+static struct file_operations status_fops = {
+	.owner	 = THIS_MODULE,
+	.open	 = status_open,
+	.read	 = seq_read,
+	.llseek	 = seq_lseek,
+	.release = seq_release,
+};
+
+static int wandev_show(struct seq_file *m, void *v)
+{
+	struct wan_device *wandev = m->private;
+
+	if (wandev->magic != ROUTER_MAGIC)
+		return 0;
+
+	if (!wandev->state) {
+		seq_puts(m, "device is not configured!\n");
+		return 0;
+	}
+
+	/* Update device statistics */
+	if (wandev->update) {
+		int err = wandev->update(wandev);
+		if (err == -EAGAIN) {
+			seq_puts(m, "Device is busy!\n");
+			return 0;
+		}
+		if (err) {
+			seq_puts(m, "Device is not configured!\n");
+			return 0;
+		}
+	}
+
+	seq_printf(m, PROC_STATS_FORMAT,
+		"total packets received", wandev->stats.rx_packets);
+	seq_printf(m, PROC_STATS_FORMAT,
+		"total packets transmitted", wandev->stats.tx_packets);
+	seq_printf(m, PROC_STATS_FORMAT,
+		"total bytes received", wandev->stats.rx_bytes);
+	seq_printf(m, PROC_STATS_FORMAT,
+		"total bytes transmitted", wandev->stats.tx_bytes);
+	seq_printf(m, PROC_STATS_FORMAT,
+		"bad packets received", wandev->stats.rx_errors);
+	seq_printf(m, PROC_STATS_FORMAT,
+		"packet transmit problems", wandev->stats.tx_errors);
+	seq_printf(m, PROC_STATS_FORMAT,
+		"received frames dropped", wandev->stats.rx_dropped);
+	seq_printf(m, PROC_STATS_FORMAT,
+		"transmit frames dropped", wandev->stats.tx_dropped);
+	seq_printf(m, PROC_STATS_FORMAT,
+		"multicast packets received", wandev->stats.multicast);
+	seq_printf(m, PROC_STATS_FORMAT,
+		"transmit collisions", wandev->stats.collisions);
+	seq_printf(m, PROC_STATS_FORMAT,
+		"receive length errors", wandev->stats.rx_length_errors);
+	seq_printf(m, PROC_STATS_FORMAT,
+		"receiver overrun errors", wandev->stats.rx_over_errors);
+	seq_printf(m, PROC_STATS_FORMAT,
+		"CRC errors", wandev->stats.rx_crc_errors);
+	seq_printf(m, PROC_STATS_FORMAT,
+		"frame format errors (aborts)", wandev->stats.rx_frame_errors);
+	seq_printf(m, PROC_STATS_FORMAT,
+		"receiver fifo overrun", wandev->stats.rx_fifo_errors);
+	seq_printf(m, PROC_STATS_FORMAT,
+		"receiver missed packet", wandev->stats.rx_missed_errors);
+	seq_printf(m, PROC_STATS_FORMAT,
+		"aborted frames transmitted", wandev->stats.tx_aborted_errors);
+	return 0;
+}
+
+static int wandev_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, wandev_show, PDE(inode)->data);
+}
+
+static struct file_operations wandev_fops = {
+	.owner	 = THIS_MODULE,
+	.open	 = wandev_open,
+	.read	 = seq_read,
+	.llseek	 = seq_lseek,
+	.release = single_release,
+	.ioctl	 = wanrouter_ioctl,
+};
+
+/*
+ *	Initialize router proc interface.
+ */
+
+int __init wanrouter_proc_init(void)
+{
+	struct proc_dir_entry *p;
+	proc_router = proc_mkdir(ROUTER_NAME, proc_net);
+	if (!proc_router)
+		goto fail;
+
+	p = create_proc_entry("config", S_IRUGO, proc_router);
+	if (!p)
+		goto fail_config;
+	p->proc_fops = &config_fops;
+	p = create_proc_entry("status", S_IRUGO, proc_router);
+	if (!p)
+		goto fail_stat;
+	p->proc_fops = &status_fops;
+	return 0;
+fail_stat:
+	remove_proc_entry("config", proc_router);
+fail_config:
+	remove_proc_entry(ROUTER_NAME, proc_net);
+fail:
+	return -ENOMEM;
+}
+
+/*
+ *	Clean up router proc interface.
+ */
+
+void wanrouter_proc_cleanup(void)
+{
+	remove_proc_entry("config", proc_router);
+	remove_proc_entry("status", proc_router);
+	remove_proc_entry(ROUTER_NAME, proc_net);
+}
+
+/*
+ *	Add directory entry for WAN device.
+ */
+
+int wanrouter_proc_add(struct wan_device* wandev)
+{
+	if (wandev->magic != ROUTER_MAGIC)
+		return -EINVAL;
+
+	wandev->dent = create_proc_entry(wandev->name, S_IRUGO, proc_router);
+	if (!wandev->dent)
+		return -ENOMEM;
+	wandev->dent->proc_fops	= &wandev_fops;
+	wandev->dent->data	= wandev;
+	return 0;
+}
+
+/*
+ *	Delete directory entry for WAN device.
+ */
+int wanrouter_proc_delete(struct wan_device* wandev)
+{
+	if (wandev->magic != ROUTER_MAGIC)
+		return -EINVAL;
+	remove_proc_entry(wandev->name, proc_router);
+	return 0;
+}
+
+#else
+
+/*
+ *	No /proc - output stubs
+ */
+
+int __init wanrouter_proc_init(void)
+{
+	return 0;
+}
+
+void wanrouter_proc_cleanup(void)
+{
+}
+
+int wanrouter_proc_add(struct wan_device *wandev)
+{
+	return 0;
+}
+
+int wanrouter_proc_delete(struct wan_device *wandev)
+{
+	return 0;
+}
+
+#endif
+
+/*
+ *	End
+ */
+
diff --git a/net/x25/Makefile b/net/x25/Makefile
new file mode 100644
index 00000000000..587a71aa411
--- /dev/null
+++ b/net/x25/Makefile
@@ -0,0 +1,10 @@
+#
+# Makefile for the Linux X.25 Packet layer.
+#
+
+obj-$(CONFIG_X25) += x25.o
+
+x25-y			:= af_x25.o x25_dev.o x25_facilities.o x25_in.o \
+			   x25_link.o x25_out.o x25_route.o x25_subr.o \
+			   x25_timer.o x25_proc.o
+x25-$(CONFIG_SYSCTL)	+= sysctl_net_x25.o
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
new file mode 100644
index 00000000000..2a24b243b84
--- /dev/null
+++ b/net/x25/af_x25.c
@@ -0,0 +1,1435 @@
+/*
+ *	X.25 Packet Layer release 002
+ *
+ *	This is ALPHA test software. This code may break your machine,
+ *	randomly fail to work with new releases, misbehave and/or generally
+ *	screw up. It might even work. 
+ *
+ *	This code REQUIRES 2.1.15 or higher
+ *
+ *	This module:
+ *		This module is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *	History
+ *	X.25 001	Jonathan Naylor	Started coding.
+ *	X.25 002	Jonathan Naylor	Centralised disconnect handling.
+ *					New timer architecture.
+ *	2000-03-11	Henner Eisen	MSG_EOR handling more POSIX compliant.
+ *	2000-03-22	Daniela Squassoni Allowed disabling/enabling of 
+ *					  facilities negotiation and increased 
+ *					  the throughput upper limit.
+ *	2000-08-27	Arnaldo C. Melo s/suser/capable/ + micro cleanups
+ *	2000-09-04	Henner Eisen	Set sock->state in x25_accept(). 
+ *					Fixed x25_output() related skb leakage.
+ *	2000-10-02	Henner Eisen	Made x25_kick() single threaded per socket.
+ *	2000-10-27	Henner Eisen    MSG_DONTWAIT for fragment allocation.
+ *	2000-11-14	Henner Eisen    Closing datalink from NETDEV_GOING_DOWN
+ *	2002-10-06	Arnaldo C. Melo Get rid of cli/sti, move proc stuff to
+ *					x25_proc.c, using seq_file
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <asm/uaccess.h>
+#include <linux/fcntl.h>
+#include <linux/termios.h>	/* For TIOCINQ/OUTQ */
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <net/x25.h>
+
+int sysctl_x25_restart_request_timeout = X25_DEFAULT_T20;
+int sysctl_x25_call_request_timeout    = X25_DEFAULT_T21;
+int sysctl_x25_reset_request_timeout   = X25_DEFAULT_T22;
+int sysctl_x25_clear_request_timeout   = X25_DEFAULT_T23;
+int sysctl_x25_ack_holdback_timeout    = X25_DEFAULT_T2;
+
+HLIST_HEAD(x25_list);
+DEFINE_RWLOCK(x25_list_lock);
+
+static struct proto_ops x25_proto_ops;
+
+static struct x25_address null_x25_address = {"               "};
+
+int x25_addr_ntoa(unsigned char *p, struct x25_address *called_addr,
+		  struct x25_address *calling_addr)
+{
+	int called_len, calling_len;
+	char *called, *calling;
+	int i;
+
+	called_len  = (*p >> 0) & 0x0F;
+	calling_len = (*p >> 4) & 0x0F;
+
+	called  = called_addr->x25_addr;
+	calling = calling_addr->x25_addr;
+	p++;
+
+	for (i = 0; i < (called_len + calling_len); i++) {
+		if (i < called_len) {
+			if (i % 2 != 0) {
+				*called++ = ((*p >> 0) & 0x0F) + '0';
+				p++;
+			} else {
+				*called++ = ((*p >> 4) & 0x0F) + '0';
+			}
+		} else {
+			if (i % 2 != 0) {
+				*calling++ = ((*p >> 0) & 0x0F) + '0';
+				p++;
+			} else {
+				*calling++ = ((*p >> 4) & 0x0F) + '0';
+			}
+		}
+	}
+
+	*called = *calling = '\0';
+
+	return 1 + (called_len + calling_len + 1) / 2;
+}
+
+int x25_addr_aton(unsigned char *p, struct x25_address *called_addr,
+		  struct x25_address *calling_addr)
+{
+	unsigned int called_len, calling_len;
+	char *called, *calling;
+	int i;
+
+	called  = called_addr->x25_addr;
+	calling = calling_addr->x25_addr;
+
+	called_len  = strlen(called);
+	calling_len = strlen(calling);
+
+	*p++ = (calling_len << 4) | (called_len << 0);
+
+	for (i = 0; i < (called_len + calling_len); i++) {
+		if (i < called_len) {
+			if (i % 2 != 0) {
+				*p |= (*called++ - '0') << 0;
+				p++;
+			} else {
+				*p = 0x00;
+				*p |= (*called++ - '0') << 4;
+			}
+		} else {
+			if (i % 2 != 0) {
+				*p |= (*calling++ - '0') << 0;
+				p++;
+			} else {
+				*p = 0x00;
+				*p |= (*calling++ - '0') << 4;
+			}
+		}
+	}
+
+	return 1 + (called_len + calling_len + 1) / 2;
+}
+
+/*
+ *	Socket removal during an interrupt is now safe.
+ */
+static void x25_remove_socket(struct sock *sk)
+{
+	write_lock_bh(&x25_list_lock);
+	sk_del_node_init(sk);
+	write_unlock_bh(&x25_list_lock);
+}
+
+/*
+ *	Kill all bound sockets on a dropped device.
+ */
+static void x25_kill_by_device(struct net_device *dev)
+{
+	struct sock *s;
+	struct hlist_node *node;
+
+	write_lock_bh(&x25_list_lock);
+
+	sk_for_each(s, node, &x25_list)
+		if (x25_sk(s)->neighbour && x25_sk(s)->neighbour->dev == dev)
+			x25_disconnect(s, ENETUNREACH, 0, 0);
+
+	write_unlock_bh(&x25_list_lock);
+}
+
+/*
+ *	Handle device status changes.
+ */
+static int x25_device_event(struct notifier_block *this, unsigned long event,
+			    void *ptr)
+{
+	struct net_device *dev = ptr;
+	struct x25_neigh *nb;
+
+	if (dev->type == ARPHRD_X25
+#if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE)
+	 || dev->type == ARPHRD_ETHER
+#endif
+	 ) {
+		switch (event) {
+			case NETDEV_UP:
+				x25_link_device_up(dev);
+				break;
+			case NETDEV_GOING_DOWN:
+				nb = x25_get_neigh(dev);
+				if (nb) {
+					x25_terminate_link(nb);
+					x25_neigh_put(nb);
+				}
+				break;
+			case NETDEV_DOWN:
+				x25_kill_by_device(dev);
+				x25_route_device_down(dev);
+				x25_link_device_down(dev);
+				break;
+		}
+	}
+
+	return NOTIFY_DONE;
+}
+
+/*
+ *	Add a socket to the bound sockets list.
+ */
+static void x25_insert_socket(struct sock *sk)
+{
+	write_lock_bh(&x25_list_lock);
+	sk_add_node(sk, &x25_list);
+	write_unlock_bh(&x25_list_lock);
+}
+
+/*
+ *	Find a socket that wants to accept the Call Request we just
+ *	received. Check the full list for an address/cud match.
+ *	If no cuds match return the next_best thing, an address match.
+ *	Note: if a listening socket has cud set it must only get calls
+ *	with matching cud.
+ */
+static struct sock *x25_find_listener(struct x25_address *addr, struct x25_calluserdata *calluserdata)
+{
+	struct sock *s;
+	struct sock *next_best;
+	struct hlist_node *node;
+
+	read_lock_bh(&x25_list_lock);
+	next_best = NULL;
+
+	sk_for_each(s, node, &x25_list)
+		if ((!strcmp(addr->x25_addr,
+			     x25_sk(s)->source_addr.x25_addr) ||
+		     !strcmp(addr->x25_addr,
+			     null_x25_address.x25_addr)) &&
+		     s->sk_state == TCP_LISTEN) {
+
+			/*
+			 * Found a listening socket, now check the incoming
+			 * call user data vs this sockets call user data
+			 */
+			if (x25_check_calluserdata(&x25_sk(s)->calluserdata, calluserdata)) {
+				sock_hold(s);
+				goto found;
+			}
+			if (x25_sk(s)->calluserdata.cudlength == 0) {
+				next_best = s;
+			}
+		}
+	if (next_best) {
+		s = next_best;
+		sock_hold(s);
+		goto found;
+	}
+	s = NULL;
+found:
+	read_unlock_bh(&x25_list_lock);
+	return s;
+}
+
+/*
+ *	Find a connected X.25 socket given my LCI and neighbour.
+ */
+static struct sock *__x25_find_socket(unsigned int lci, struct x25_neigh *nb)
+{
+	struct sock *s;
+	struct hlist_node *node;
+
+	sk_for_each(s, node, &x25_list)
+		if (x25_sk(s)->lci == lci && x25_sk(s)->neighbour == nb) {
+			sock_hold(s);
+			goto found;
+		}
+	s = NULL;
+found:
+	return s;
+}
+
+struct sock *x25_find_socket(unsigned int lci, struct x25_neigh *nb)
+{
+	struct sock *s;
+
+	read_lock_bh(&x25_list_lock);
+	s = __x25_find_socket(lci, nb);
+	read_unlock_bh(&x25_list_lock);
+	return s;
+}
+
+/*
+ *	Find a unique LCI for a given device.
+ */
+static unsigned int x25_new_lci(struct x25_neigh *nb)
+{
+	unsigned int lci = 1;
+	struct sock *sk;
+
+	read_lock_bh(&x25_list_lock);
+
+	while ((sk = __x25_find_socket(lci, nb)) != NULL) {
+		sock_put(sk);
+		if (++lci == 4096) {
+			lci = 0;
+			break;
+		}
+	}
+
+	read_unlock_bh(&x25_list_lock);
+	return lci;
+}
+
+/*
+ *	Deferred destroy.
+ */
+void x25_destroy_socket(struct sock *);
+
+/*
+ *	handler for deferred kills.
+ */
+static void x25_destroy_timer(unsigned long data)
+{
+	x25_destroy_socket((struct sock *)data);
+}
+
+/*
+ *	This is called from user mode and the timers. Thus it protects itself
+ *	against interrupt users but doesn't worry about being called during
+ *	work. Once it is removed from the queue no interrupt or bottom half
+ *	will touch it and we are (fairly 8-) ) safe.
+ *	Not static as it's used by the timer
+ */
+void x25_destroy_socket(struct sock *sk)
+{
+	struct sk_buff *skb;
+
+	sock_hold(sk);
+	lock_sock(sk);
+	x25_stop_heartbeat(sk);
+	x25_stop_timer(sk);
+
+	x25_remove_socket(sk);
+	x25_clear_queues(sk);		/* Flush the queues */
+
+	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+		if (skb->sk != sk) {		/* A pending connection */
+			/*
+			 * Queue the unaccepted socket for death
+			 */
+			sock_set_flag(skb->sk, SOCK_DEAD);
+			x25_start_heartbeat(skb->sk);
+			x25_sk(skb->sk)->state = X25_STATE_0;
+		}
+
+		kfree_skb(skb);
+	}
+
+	if (atomic_read(&sk->sk_wmem_alloc) ||
+	    atomic_read(&sk->sk_rmem_alloc)) {
+		/* Defer: outstanding buffers */
+		sk->sk_timer.expires  = jiffies + 10 * HZ;
+		sk->sk_timer.function = x25_destroy_timer;
+		sk->sk_timer.data = (unsigned long)sk;
+		add_timer(&sk->sk_timer);
+	} else {
+		/* drop last reference so sock_put will free */
+		__sock_put(sk);
+	}
+
+	release_sock(sk);
+	sock_put(sk);
+}
+
+/*
+ *	Handling for system calls applied via the various interfaces to a
+ *	X.25 socket object.
+ */
+
+static int x25_setsockopt(struct socket *sock, int level, int optname,
+			  char __user *optval, int optlen)
+{
+	int opt;
+	struct sock *sk = sock->sk;
+	int rc = -ENOPROTOOPT;
+
+	if (level != SOL_X25 || optname != X25_QBITINCL)
+		goto out;
+
+	rc = -EINVAL;
+	if (optlen < sizeof(int))
+		goto out;
+
+	rc = -EFAULT;
+	if (get_user(opt, (int __user *)optval))
+		goto out;
+
+	x25_sk(sk)->qbitincl = !!opt;
+	rc = 0;
+out:
+	return rc;
+}
+
+static int x25_getsockopt(struct socket *sock, int level, int optname,
+			  char __user *optval, int __user *optlen)
+{
+	struct sock *sk = sock->sk;
+	int val, len, rc = -ENOPROTOOPT;
+	
+	if (level != SOL_X25 || optname != X25_QBITINCL)
+		goto out;
+
+	rc = -EFAULT;
+	if (get_user(len, optlen))
+		goto out;
+
+	len = min_t(unsigned int, len, sizeof(int));
+
+	rc = -EINVAL;
+	if (len < 0)
+		goto out;
+		
+	rc = -EFAULT;
+	if (put_user(len, optlen))
+		goto out;
+
+	val = x25_sk(sk)->qbitincl;
+	rc = copy_to_user(optval, &val, len) ? -EFAULT : 0;
+out:
+	return rc;
+}
+
+static int x25_listen(struct socket *sock, int backlog)
+{
+	struct sock *sk = sock->sk;
+	int rc = -EOPNOTSUPP;
+
+	if (sk->sk_state != TCP_LISTEN) {
+		memset(&x25_sk(sk)->dest_addr, 0, X25_ADDR_LEN);
+		sk->sk_max_ack_backlog = backlog;
+		sk->sk_state           = TCP_LISTEN;
+		rc = 0;
+	}
+
+	return rc;
+}
+
+static struct proto x25_proto = {
+	.name	  = "X25",
+	.owner	  = THIS_MODULE,
+	.obj_size = sizeof(struct x25_sock),
+};
+
+static struct sock *x25_alloc_socket(void)
+{
+	struct x25_sock *x25;
+	struct sock *sk = sk_alloc(AF_X25, GFP_ATOMIC, &x25_proto, 1);
+
+	if (!sk)
+		goto out;
+
+	sock_init_data(NULL, sk);
+
+	x25 = x25_sk(sk);
+	skb_queue_head_init(&x25->ack_queue);
+	skb_queue_head_init(&x25->fragment_queue);
+	skb_queue_head_init(&x25->interrupt_in_queue);
+	skb_queue_head_init(&x25->interrupt_out_queue);
+out:
+	return sk;
+}
+
+void x25_init_timers(struct sock *sk);
+
+static int x25_create(struct socket *sock, int protocol)
+{
+	struct sock *sk;
+	struct x25_sock *x25;
+	int rc = -ESOCKTNOSUPPORT;
+
+	if (sock->type != SOCK_SEQPACKET || protocol)
+		goto out;
+
+	rc = -ENOMEM;
+	if ((sk = x25_alloc_socket()) == NULL)
+		goto out;
+
+	x25 = x25_sk(sk);
+
+	sock_init_data(sock, sk);
+
+	x25_init_timers(sk);
+
+	sock->ops    = &x25_proto_ops;
+	sk->sk_protocol = protocol;
+	sk->sk_backlog_rcv = x25_backlog_rcv;
+
+	x25->t21   = sysctl_x25_call_request_timeout;
+	x25->t22   = sysctl_x25_reset_request_timeout;
+	x25->t23   = sysctl_x25_clear_request_timeout;
+	x25->t2    = sysctl_x25_ack_holdback_timeout;
+	x25->state = X25_STATE_0;
+
+	x25->facilities.winsize_in  = X25_DEFAULT_WINDOW_SIZE;
+	x25->facilities.winsize_out = X25_DEFAULT_WINDOW_SIZE;
+	x25->facilities.pacsize_in  = X25_DEFAULT_PACKET_SIZE;
+	x25->facilities.pacsize_out = X25_DEFAULT_PACKET_SIZE;
+	x25->facilities.throughput  = X25_DEFAULT_THROUGHPUT;
+	x25->facilities.reverse     = X25_DEFAULT_REVERSE;
+	rc = 0;
+out:
+	return rc;
+}
+
+static struct sock *x25_make_new(struct sock *osk)
+{
+	struct sock *sk = NULL;
+	struct x25_sock *x25, *ox25;
+
+	if (osk->sk_type != SOCK_SEQPACKET)
+		goto out;
+
+	if ((sk = x25_alloc_socket()) == NULL)
+		goto out;
+
+	x25 = x25_sk(sk);
+
+	sk->sk_type        = osk->sk_type;
+	sk->sk_socket      = osk->sk_socket;
+	sk->sk_priority    = osk->sk_priority;
+	sk->sk_protocol    = osk->sk_protocol;
+	sk->sk_rcvbuf      = osk->sk_rcvbuf;
+	sk->sk_sndbuf      = osk->sk_sndbuf;
+	sk->sk_state       = TCP_ESTABLISHED;
+	sk->sk_sleep       = osk->sk_sleep;
+	sk->sk_backlog_rcv = osk->sk_backlog_rcv;
+
+	if (sock_flag(osk, SOCK_ZAPPED))
+		sock_set_flag(sk, SOCK_ZAPPED);
+	
+	if (sock_flag(osk, SOCK_DBG))
+		sock_set_flag(sk, SOCK_DBG);
+
+	ox25 = x25_sk(osk);
+	x25->t21        = ox25->t21;
+	x25->t22        = ox25->t22;
+	x25->t23        = ox25->t23;
+	x25->t2         = ox25->t2;
+	x25->facilities = ox25->facilities;
+	x25->qbitincl   = ox25->qbitincl;
+
+	x25_init_timers(sk);
+out:
+	return sk;
+}
+
+static int x25_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	struct x25_sock *x25;
+
+	if (!sk)
+		goto out;
+
+	x25 = x25_sk(sk);
+
+	switch (x25->state) {
+
+		case X25_STATE_0:
+		case X25_STATE_2:
+			x25_disconnect(sk, 0, 0, 0);
+			x25_destroy_socket(sk);
+			goto out;
+
+		case X25_STATE_1:
+		case X25_STATE_3:
+		case X25_STATE_4:
+			x25_clear_queues(sk);
+			x25_write_internal(sk, X25_CLEAR_REQUEST);
+			x25_start_t23timer(sk);
+			x25->state = X25_STATE_2;
+			sk->sk_state	= TCP_CLOSE;
+			sk->sk_shutdown	|= SEND_SHUTDOWN;
+			sk->sk_state_change(sk);
+			sock_set_flag(sk, SOCK_DEAD);
+			sock_set_flag(sk, SOCK_DESTROY);
+			break;
+	}
+
+	sock->sk	= NULL;	
+	sk->sk_socket	= NULL;	/* Not used, but we should do this */
+out:
+	return 0;
+}
+
+static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+{
+	struct sock *sk = sock->sk;
+	struct sockaddr_x25 *addr = (struct sockaddr_x25 *)uaddr;
+
+	if (!sock_flag(sk, SOCK_ZAPPED) ||
+	    addr_len != sizeof(struct sockaddr_x25) ||
+	    addr->sx25_family != AF_X25)
+		return -EINVAL;
+
+	x25_sk(sk)->source_addr = addr->sx25_addr;
+	x25_insert_socket(sk);
+	sock_reset_flag(sk, SOCK_ZAPPED);
+	SOCK_DEBUG(sk, "x25_bind: socket is bound\n");
+
+	return 0;
+}
+
+static int x25_wait_for_connection_establishment(struct sock *sk)
+{
+	DECLARE_WAITQUEUE(wait, current);
+        int rc;
+
+	add_wait_queue_exclusive(sk->sk_sleep, &wait);
+	for (;;) {
+		__set_current_state(TASK_INTERRUPTIBLE);
+		rc = -ERESTARTSYS;
+		if (signal_pending(current))
+			break;
+		rc = sock_error(sk);
+		if (rc) {
+			sk->sk_socket->state = SS_UNCONNECTED;
+			break;
+		}
+		rc = 0;
+		if (sk->sk_state != TCP_ESTABLISHED) {
+			release_sock(sk);
+			schedule();
+			lock_sock(sk);
+		} else
+			break;
+	}
+	__set_current_state(TASK_RUNNING);
+	remove_wait_queue(sk->sk_sleep, &wait);
+	return rc;
+}
+
+static int x25_connect(struct socket *sock, struct sockaddr *uaddr,
+		       int addr_len, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct x25_sock *x25 = x25_sk(sk);
+	struct sockaddr_x25 *addr = (struct sockaddr_x25 *)uaddr;
+	struct x25_route *rt;
+	int rc = 0;
+
+	lock_sock(sk);
+	if (sk->sk_state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) {
+		sock->state = SS_CONNECTED;
+		goto out; /* Connect completed during a ERESTARTSYS event */
+	}
+
+	rc = -ECONNREFUSED;
+	if (sk->sk_state == TCP_CLOSE && sock->state == SS_CONNECTING) {
+		sock->state = SS_UNCONNECTED;
+		goto out;
+	}
+
+	rc = -EISCONN;	/* No reconnect on a seqpacket socket */
+	if (sk->sk_state == TCP_ESTABLISHED)
+		goto out;
+
+	sk->sk_state   = TCP_CLOSE;	
+	sock->state = SS_UNCONNECTED;
+
+	rc = -EINVAL;
+	if (addr_len != sizeof(struct sockaddr_x25) ||
+	    addr->sx25_family != AF_X25)
+		goto out;
+
+	rc = -ENETUNREACH;
+	rt = x25_get_route(&addr->sx25_addr);
+	if (!rt)
+		goto out;
+
+	x25->neighbour = x25_get_neigh(rt->dev);
+	if (!x25->neighbour)
+		goto out_put_route;
+
+	x25_limit_facilities(&x25->facilities, x25->neighbour);
+
+	x25->lci = x25_new_lci(x25->neighbour);
+	if (!x25->lci)
+		goto out_put_neigh;
+
+	rc = -EINVAL;
+	if (sock_flag(sk, SOCK_ZAPPED)) /* Must bind first - autobinding does not work */
+		goto out_put_neigh;
+
+	if (!strcmp(x25->source_addr.x25_addr, null_x25_address.x25_addr))
+		memset(&x25->source_addr, '\0', X25_ADDR_LEN);
+
+	x25->dest_addr = addr->sx25_addr;
+
+	/* Move to connecting socket, start sending Connect Requests */
+	sock->state   = SS_CONNECTING;
+	sk->sk_state  = TCP_SYN_SENT;
+
+	x25->state = X25_STATE_1;
+
+	x25_write_internal(sk, X25_CALL_REQUEST);
+
+	x25_start_heartbeat(sk);
+	x25_start_t21timer(sk);
+
+	/* Now the loop */
+	rc = -EINPROGRESS;
+	if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK))
+		goto out_put_neigh;
+
+	rc = x25_wait_for_connection_establishment(sk);
+	if (rc)
+		goto out_put_neigh;
+
+	sock->state = SS_CONNECTED;
+	rc = 0;
+out_put_neigh:
+	if (rc)
+		x25_neigh_put(x25->neighbour);
+out_put_route:
+	x25_route_put(rt);
+out:
+	release_sock(sk);
+	return rc;
+}
+
+static int x25_wait_for_data(struct sock *sk, int timeout)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	int rc = 0;
+
+	add_wait_queue_exclusive(sk->sk_sleep, &wait);
+	for (;;) {
+		__set_current_state(TASK_INTERRUPTIBLE);
+		if (sk->sk_shutdown & RCV_SHUTDOWN)
+			break;
+		rc = -ERESTARTSYS;
+		if (signal_pending(current))
+			break;
+		rc = -EAGAIN;
+		if (!timeout)
+			break;
+		rc = 0;
+		if (skb_queue_empty(&sk->sk_receive_queue)) {
+			release_sock(sk);
+			timeout = schedule_timeout(timeout);
+			lock_sock(sk);
+		} else
+			break;
+	}
+	__set_current_state(TASK_RUNNING);
+	remove_wait_queue(sk->sk_sleep, &wait);
+	return rc;
+}
+	
+static int x25_accept(struct socket *sock, struct socket *newsock, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct sock *newsk;
+	struct sk_buff *skb;
+	int rc = -EINVAL;
+
+	if (!sk || sk->sk_state != TCP_LISTEN)
+		goto out;
+
+	rc = -EOPNOTSUPP;
+	if (sk->sk_type != SOCK_SEQPACKET)
+		goto out;
+
+	lock_sock(sk);
+	rc = x25_wait_for_data(sk, sk->sk_rcvtimeo);
+	if (rc)
+		goto out2;
+	skb = skb_dequeue(&sk->sk_receive_queue);
+	rc = -EINVAL;
+	if (!skb->sk)
+		goto out2;
+	newsk		 = skb->sk;
+	newsk->sk_socket = newsock;
+	newsk->sk_sleep  = &newsock->wait;
+
+	/* Now attach up the new socket */
+	skb->sk = NULL;
+	kfree_skb(skb);
+	sk->sk_ack_backlog--;
+	newsock->sk    = newsk;
+	newsock->state = SS_CONNECTED;
+	rc = 0;
+out2:
+	release_sock(sk);
+out:
+	return rc;
+}
+
+static int x25_getname(struct socket *sock, struct sockaddr *uaddr,
+		       int *uaddr_len, int peer)
+{
+	struct sockaddr_x25 *sx25 = (struct sockaddr_x25 *)uaddr;
+	struct sock *sk = sock->sk;
+	struct x25_sock *x25 = x25_sk(sk);
+
+	if (peer) {
+		if (sk->sk_state != TCP_ESTABLISHED)
+			return -ENOTCONN;
+		sx25->sx25_addr = x25->dest_addr;
+	} else
+		sx25->sx25_addr = x25->source_addr;
+
+	sx25->sx25_family = AF_X25;
+	*uaddr_len = sizeof(*sx25);
+
+	return 0;
+}
+ 
+int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb,
+			unsigned int lci)
+{
+	struct sock *sk;
+	struct sock *make;
+	struct x25_sock *makex25;
+	struct x25_address source_addr, dest_addr;
+	struct x25_facilities facilities;
+	struct x25_calluserdata calluserdata;
+	int len, rc;
+
+	/*
+	 *	Remove the LCI and frame type.
+	 */
+	skb_pull(skb, X25_STD_MIN_LEN);
+
+	/*
+	 *	Extract the X.25 addresses and convert them to ASCII strings,
+	 *	and remove them.
+	 */
+	skb_pull(skb, x25_addr_ntoa(skb->data, &source_addr, &dest_addr));
+
+	/*
+	 *	Get the length of the facilities, skip past them for the moment
+	 *	get the call user data because this is needed to determine
+	 *	the correct listener
+	 */
+	len = skb->data[0] + 1;
+	skb_pull(skb,len);
+
+	/*
+	 *	Incoming Call User Data.
+	 */
+	if (skb->len >= 0) {
+		memcpy(calluserdata.cuddata, skb->data, skb->len);
+		calluserdata.cudlength = skb->len;
+	}
+
+	skb_push(skb,len);
+
+	/*
+	 *	Find a listener for the particular address/cud pair.
+	 */
+	sk = x25_find_listener(&source_addr,&calluserdata);
+
+	/*
+	 *	We can't accept the Call Request.
+	 */
+	if (sk == NULL || sk_acceptq_is_full(sk))
+		goto out_clear_request;
+
+	/*
+	 *	Try to reach a compromise on the requested facilities.
+	 */
+	if ((len = x25_negotiate_facilities(skb, sk, &facilities)) == -1)
+		goto out_sock_put;
+
+	/*
+	 * current neighbour/link might impose additional limits
+	 * on certain facilties
+	 */
+
+	x25_limit_facilities(&facilities, nb);
+
+	/*
+	 *	Try to create a new socket.
+	 */
+	make = x25_make_new(sk);
+	if (!make)
+		goto out_sock_put;
+
+	/*
+	 *	Remove the facilities
+	 */
+	skb_pull(skb, len);
+
+	skb->sk     = make;
+	make->sk_state = TCP_ESTABLISHED;
+
+	makex25 = x25_sk(make);
+	makex25->lci           = lci;
+	makex25->dest_addr     = dest_addr;
+	makex25->source_addr   = source_addr;
+	makex25->neighbour     = nb;
+	makex25->facilities    = facilities;
+	makex25->vc_facil_mask = x25_sk(sk)->vc_facil_mask;
+	makex25->calluserdata  = calluserdata;
+
+	x25_write_internal(make, X25_CALL_ACCEPTED);
+
+	makex25->state = X25_STATE_3;
+
+	sk->sk_ack_backlog++;
+
+	x25_insert_socket(make);
+
+	skb_queue_head(&sk->sk_receive_queue, skb);
+
+	x25_start_heartbeat(make);
+
+	if (!sock_flag(sk, SOCK_DEAD))
+		sk->sk_data_ready(sk, skb->len);
+	rc = 1;
+	sock_put(sk);
+out:
+	return rc;
+out_sock_put:
+	sock_put(sk);
+out_clear_request:
+	rc = 0;
+	x25_transmit_clear_request(nb, lci, 0x01);
+	goto out;
+}
+
+static int x25_sendmsg(struct kiocb *iocb, struct socket *sock,
+		       struct msghdr *msg, size_t len)
+{
+	struct sock *sk = sock->sk;
+	struct x25_sock *x25 = x25_sk(sk);
+	struct sockaddr_x25 *usx25 = (struct sockaddr_x25 *)msg->msg_name;
+	struct sockaddr_x25 sx25;
+	struct sk_buff *skb;
+	unsigned char *asmptr;
+	int noblock = msg->msg_flags & MSG_DONTWAIT;
+	size_t size;
+	int qbit = 0, rc = -EINVAL;
+
+	if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_OOB|MSG_EOR|MSG_CMSG_COMPAT))
+		goto out;
+
+	/* we currently don't support segmented records at the user interface */
+	if (!(msg->msg_flags & (MSG_EOR|MSG_OOB)))
+		goto out;
+
+	rc = -EADDRNOTAVAIL;
+	if (sock_flag(sk, SOCK_ZAPPED))
+		goto out;
+
+	rc = -EPIPE;
+	if (sk->sk_shutdown & SEND_SHUTDOWN) {
+		send_sig(SIGPIPE, current, 0);
+		goto out;
+	}
+
+	rc = -ENETUNREACH;
+	if (!x25->neighbour)
+		goto out;
+
+	if (usx25) {
+		rc = -EINVAL;
+		if (msg->msg_namelen < sizeof(sx25))
+			goto out;
+		memcpy(&sx25, usx25, sizeof(sx25));
+		rc = -EISCONN;
+		if (strcmp(x25->dest_addr.x25_addr, sx25.sx25_addr.x25_addr))
+			goto out;
+		rc = -EINVAL;
+		if (sx25.sx25_family != AF_X25)
+			goto out;
+	} else {
+		/*
+		 *	FIXME 1003.1g - if the socket is like this because
+		 *	it has become closed (not started closed) we ought
+		 *	to SIGPIPE, EPIPE;
+		 */
+		rc = -ENOTCONN;
+		if (sk->sk_state != TCP_ESTABLISHED)
+			goto out;
+
+		sx25.sx25_family = AF_X25;
+		sx25.sx25_addr   = x25->dest_addr;
+	}
+
+	SOCK_DEBUG(sk, "x25_sendmsg: sendto: Addresses built.\n");
+
+	/* Build a packet */
+	SOCK_DEBUG(sk, "x25_sendmsg: sendto: building packet.\n");
+
+	if ((msg->msg_flags & MSG_OOB) && len > 32)
+		len = 32;
+
+	size = len + X25_MAX_L2_LEN + X25_EXT_MIN_LEN;
+
+	skb = sock_alloc_send_skb(sk, size, noblock, &rc);
+	if (!skb)
+		goto out;
+	X25_SKB_CB(skb)->flags = msg->msg_flags;
+
+	skb_reserve(skb, X25_MAX_L2_LEN + X25_EXT_MIN_LEN);
+
+	/*
+	 *	Put the data on the end
+	 */
+	SOCK_DEBUG(sk, "x25_sendmsg: Copying user data\n");
+
+	asmptr = skb->h.raw = skb_put(skb, len);
+
+	rc = memcpy_fromiovec(asmptr, msg->msg_iov, len);
+	if (rc)
+		goto out_kfree_skb;
+
+	/*
+	 *	If the Q BIT Include socket option is in force, the first
+	 *	byte of the user data is the logical value of the Q Bit.
+	 */
+	if (x25->qbitincl) {
+		qbit = skb->data[0];
+		skb_pull(skb, 1);
+	}
+
+	/*
+	 *	Push down the X.25 header
+	 */
+	SOCK_DEBUG(sk, "x25_sendmsg: Building X.25 Header.\n");
+
+	if (msg->msg_flags & MSG_OOB) {
+		if (x25->neighbour->extended) {
+			asmptr    = skb_push(skb, X25_STD_MIN_LEN);
+			*asmptr++ = ((x25->lci >> 8) & 0x0F) | X25_GFI_EXTSEQ;
+			*asmptr++ = (x25->lci >> 0) & 0xFF;
+			*asmptr++ = X25_INTERRUPT;
+		} else {
+			asmptr    = skb_push(skb, X25_STD_MIN_LEN);
+			*asmptr++ = ((x25->lci >> 8) & 0x0F) | X25_GFI_STDSEQ;
+			*asmptr++ = (x25->lci >> 0) & 0xFF;
+			*asmptr++ = X25_INTERRUPT;
+		}
+	} else {
+		if (x25->neighbour->extended) {
+			/* Build an Extended X.25 header */
+			asmptr    = skb_push(skb, X25_EXT_MIN_LEN);
+			*asmptr++ = ((x25->lci >> 8) & 0x0F) | X25_GFI_EXTSEQ;
+			*asmptr++ = (x25->lci >> 0) & 0xFF;
+			*asmptr++ = X25_DATA;
+			*asmptr++ = X25_DATA;
+		} else {
+			/* Build an Standard X.25 header */
+			asmptr    = skb_push(skb, X25_STD_MIN_LEN);
+			*asmptr++ = ((x25->lci >> 8) & 0x0F) | X25_GFI_STDSEQ;
+			*asmptr++ = (x25->lci >> 0) & 0xFF;
+			*asmptr++ = X25_DATA;
+		}
+
+		if (qbit)
+			skb->data[0] |= X25_Q_BIT;
+	}
+
+	SOCK_DEBUG(sk, "x25_sendmsg: Built header.\n");
+	SOCK_DEBUG(sk, "x25_sendmsg: Transmitting buffer\n");
+
+	rc = -ENOTCONN;
+	if (sk->sk_state != TCP_ESTABLISHED)
+		goto out_kfree_skb;
+
+	if (msg->msg_flags & MSG_OOB)
+		skb_queue_tail(&x25->interrupt_out_queue, skb);
+	else {
+	        len = x25_output(sk, skb);
+		if (len < 0)
+			kfree_skb(skb);
+		else if (x25->qbitincl)
+			len++;
+	}
+
+	/*
+	 * lock_sock() is currently only used to serialize this x25_kick()
+	 * against input-driven x25_kick() calls. It currently only blocks
+	 * incoming packets for this socket and does not protect against
+	 * any other socket state changes and is not called from anywhere
+	 * else. As x25_kick() cannot block and as long as all socket
+	 * operations are BKL-wrapped, we don't need take to care about
+	 * purging the backlog queue in x25_release().
+	 *
+	 * Using lock_sock() to protect all socket operations entirely
+	 * (and making the whole x25 stack SMP aware) unfortunately would
+	 * require major changes to {send,recv}msg and skb allocation methods.
+	 * -> 2.5 ;)
+	 */
+	lock_sock(sk);
+	x25_kick(sk);
+	release_sock(sk);
+	rc = len;
+out:
+	return rc;
+out_kfree_skb:
+	kfree_skb(skb);
+	goto out;
+}
+
+
+static int x25_recvmsg(struct kiocb *iocb, struct socket *sock,
+		       struct msghdr *msg, size_t size,
+		       int flags)
+{
+	struct sock *sk = sock->sk;
+	struct x25_sock *x25 = x25_sk(sk);
+	struct sockaddr_x25 *sx25 = (struct sockaddr_x25 *)msg->msg_name;
+	size_t copied;
+	int qbit;
+	struct sk_buff *skb;
+	unsigned char *asmptr;
+	int rc = -ENOTCONN;
+
+	/*
+	 * This works for seqpacket too. The receiver has ordered the queue for
+	 * us! We do one quick check first though
+	 */
+	if (sk->sk_state != TCP_ESTABLISHED)
+		goto out;
+
+	if (flags & MSG_OOB) {
+		rc = -EINVAL;
+		if (sock_flag(sk, SOCK_URGINLINE) ||
+		    !skb_peek(&x25->interrupt_in_queue))
+			goto out;
+
+		skb = skb_dequeue(&x25->interrupt_in_queue);
+
+		skb_pull(skb, X25_STD_MIN_LEN);
+
+		/*
+		 *	No Q bit information on Interrupt data.
+		 */
+		if (x25->qbitincl) {
+			asmptr  = skb_push(skb, 1);
+			*asmptr = 0x00;
+		}
+
+		msg->msg_flags |= MSG_OOB;
+	} else {
+		/* Now we can treat all alike */
+		skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
+					flags & MSG_DONTWAIT, &rc);
+		if (!skb)
+			goto out;
+
+		qbit = (skb->data[0] & X25_Q_BIT) == X25_Q_BIT;
+
+		skb_pull(skb, x25->neighbour->extended ?
+				X25_EXT_MIN_LEN : X25_STD_MIN_LEN);
+
+		if (x25->qbitincl) {
+			asmptr  = skb_push(skb, 1);
+			*asmptr = qbit;
+		}
+	}
+
+	skb->h.raw = skb->data;
+
+	copied = skb->len;
+
+	if (copied > size) {
+		copied = size;
+		msg->msg_flags |= MSG_TRUNC;
+	}
+
+	/* Currently, each datagram always contains a complete record */ 
+	msg->msg_flags |= MSG_EOR;
+
+	rc = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	if (rc)
+		goto out_free_dgram;
+
+	if (sx25) {
+		sx25->sx25_family = AF_X25;
+		sx25->sx25_addr   = x25->dest_addr;
+	}
+
+	msg->msg_namelen = sizeof(struct sockaddr_x25);
+
+	lock_sock(sk);
+	x25_check_rbuf(sk);
+	release_sock(sk);
+	rc = copied;
+out_free_dgram:
+	skb_free_datagram(sk, skb);
+out:
+	return rc;
+}
+
+
+static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	struct sock *sk = sock->sk;
+	struct x25_sock *x25 = x25_sk(sk);
+	void __user *argp = (void __user *)arg;
+	int rc;
+
+	switch (cmd) {
+		case TIOCOUTQ: {
+			int amount = sk->sk_sndbuf -
+				     atomic_read(&sk->sk_wmem_alloc);
+			if (amount < 0)
+				amount = 0;
+			rc = put_user(amount, (unsigned int __user *)argp);
+			break;
+		}
+
+		case TIOCINQ: {
+			struct sk_buff *skb;
+			int amount = 0;
+			/*
+			 * These two are safe on a single CPU system as
+			 * only user tasks fiddle here
+			 */
+			if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL)
+				amount = skb->len;
+			rc = put_user(amount, (unsigned int __user *)argp);
+			break;
+		}
+
+		case SIOCGSTAMP:
+			rc = -EINVAL;
+			if (sk)
+				rc = sock_get_timestamp(sk, 
+						(struct timeval __user *)argp); 
+			break;
+		case SIOCGIFADDR:
+		case SIOCSIFADDR:
+		case SIOCGIFDSTADDR:
+		case SIOCSIFDSTADDR:
+		case SIOCGIFBRDADDR:
+		case SIOCSIFBRDADDR:
+		case SIOCGIFNETMASK:
+		case SIOCSIFNETMASK:
+		case SIOCGIFMETRIC:
+		case SIOCSIFMETRIC:
+			rc = -EINVAL;
+			break;
+		case SIOCADDRT:
+		case SIOCDELRT:
+			rc = -EPERM;
+			if (!capable(CAP_NET_ADMIN))
+				break;
+			rc = x25_route_ioctl(cmd, argp);
+			break;
+		case SIOCX25GSUBSCRIP:
+			rc = x25_subscr_ioctl(cmd, argp);
+			break;
+		case SIOCX25SSUBSCRIP:
+			rc = -EPERM;
+			if (!capable(CAP_NET_ADMIN))
+				break;
+			rc = x25_subscr_ioctl(cmd, argp);
+			break;
+		case SIOCX25GFACILITIES: {
+			struct x25_facilities fac = x25->facilities;
+			rc = copy_to_user(argp, &fac,
+					  sizeof(fac)) ? -EFAULT : 0;
+			break;
+		}
+
+		case SIOCX25SFACILITIES: {
+			struct x25_facilities facilities;
+			rc = -EFAULT;
+			if (copy_from_user(&facilities, argp,
+					   sizeof(facilities)))
+				break;
+			rc = -EINVAL;
+			if (sk->sk_state != TCP_LISTEN &&
+			    sk->sk_state != TCP_CLOSE)
+				break;
+			if (facilities.pacsize_in < X25_PS16 ||
+			    facilities.pacsize_in > X25_PS4096)
+				break;
+			if (facilities.pacsize_out < X25_PS16 ||
+			    facilities.pacsize_out > X25_PS4096)
+				break;
+			if (facilities.winsize_in < 1 ||
+			    facilities.winsize_in > 127)
+				break;
+			if (facilities.throughput < 0x03 ||
+			    facilities.throughput > 0xDD)
+				break;
+			if (facilities.reverse && facilities.reverse != 1)
+				break;
+			x25->facilities = facilities;
+			rc = 0;
+			break;
+		}
+
+		case SIOCX25GCALLUSERDATA: {
+			struct x25_calluserdata cud = x25->calluserdata;
+			rc = copy_to_user(argp, &cud,
+					  sizeof(cud)) ? -EFAULT : 0;
+			break;
+		}
+
+		case SIOCX25SCALLUSERDATA: {
+			struct x25_calluserdata calluserdata;
+
+			rc = -EFAULT;
+			if (copy_from_user(&calluserdata, argp,
+					   sizeof(calluserdata)))
+				break;
+			rc = -EINVAL;
+			if (calluserdata.cudlength > X25_MAX_CUD_LEN)
+				break;
+			x25->calluserdata = calluserdata;
+			rc = 0;
+			break;
+		}
+
+		case SIOCX25GCAUSEDIAG: {
+			struct x25_causediag causediag;
+			causediag = x25->causediag;
+			rc = copy_to_user(argp, &causediag,
+					  sizeof(causediag)) ? -EFAULT : 0;
+			break;
+		}
+
+ 		default:
+			rc = dev_ioctl(cmd, argp);
+			break;
+	}
+
+	return rc;
+}
+
+static struct net_proto_family x25_family_ops = {
+	.family =	AF_X25,
+	.create =	x25_create,
+	.owner	=	THIS_MODULE,
+};
+
+static struct proto_ops SOCKOPS_WRAPPED(x25_proto_ops) = {
+	.family =	AF_X25,
+	.owner =	THIS_MODULE,
+	.release =	x25_release,
+	.bind =		x25_bind,
+	.connect =	x25_connect,
+	.socketpair =	sock_no_socketpair,
+	.accept =	x25_accept,
+	.getname =	x25_getname,
+	.poll =		datagram_poll,
+	.ioctl =	x25_ioctl,
+	.listen =	x25_listen,
+	.shutdown =	sock_no_shutdown,
+	.setsockopt =	x25_setsockopt,
+	.getsockopt =	x25_getsockopt,
+	.sendmsg =	x25_sendmsg,
+	.recvmsg =	x25_recvmsg,
+	.mmap =		sock_no_mmap,
+	.sendpage =	sock_no_sendpage,
+};
+
+#include <linux/smp_lock.h>
+SOCKOPS_WRAP(x25_proto, AF_X25);
+
+static struct packet_type x25_packet_type = {
+	.type =	__constant_htons(ETH_P_X25),
+	.func =	x25_lapb_receive_frame,
+};
+
+static struct notifier_block x25_dev_notifier = {
+	.notifier_call = x25_device_event,
+};
+
+void x25_kill_by_neigh(struct x25_neigh *nb)
+{
+	struct sock *s;
+	struct hlist_node *node;
+
+	write_lock_bh(&x25_list_lock);
+
+	sk_for_each(s, node, &x25_list)
+		if (x25_sk(s)->neighbour == nb)
+			x25_disconnect(s, ENETUNREACH, 0, 0);
+
+	write_unlock_bh(&x25_list_lock);
+}
+
+static int __init x25_init(void)
+{
+	int rc = proto_register(&x25_proto, 0);
+
+	if (rc != 0)
+		goto out;
+
+	sock_register(&x25_family_ops);
+
+	dev_add_pack(&x25_packet_type);
+
+	register_netdevice_notifier(&x25_dev_notifier);
+
+	printk(KERN_INFO "X.25 for Linux. Version 0.2 for Linux 2.1.15\n");
+
+#ifdef CONFIG_SYSCTL
+	x25_register_sysctl();
+#endif
+	x25_proc_init();
+out:
+	return rc;
+}
+module_init(x25_init);
+
+static void __exit x25_exit(void)
+{
+	x25_proc_exit();
+	x25_link_free();
+	x25_route_free();
+
+#ifdef CONFIG_SYSCTL
+	x25_unregister_sysctl();
+#endif
+
+	unregister_netdevice_notifier(&x25_dev_notifier);
+
+	dev_remove_pack(&x25_packet_type);
+
+	sock_unregister(AF_X25);
+	proto_unregister(&x25_proto);
+}
+module_exit(x25_exit);
+
+MODULE_AUTHOR("Jonathan Naylor <g4klx@g4klx.demon.co.uk>");
+MODULE_DESCRIPTION("The X.25 Packet Layer network layer protocol");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETPROTO(PF_X25);
diff --git a/net/x25/sysctl_net_x25.c b/net/x25/sysctl_net_x25.c
new file mode 100644
index 00000000000..aabda59c824
--- /dev/null
+++ b/net/x25/sysctl_net_x25.c
@@ -0,0 +1,107 @@
+/* -*- linux-c -*-
+ * sysctl_net_x25.c: sysctl interface to net X.25 subsystem.
+ *
+ * Begun April 1, 1996, Mike Shaver.
+ * Added /proc/sys/net/x25 directory entry (empty =) ). [MS]
+ */
+
+#include <linux/sysctl.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/netdevice.h>
+#include <linux/init.h>
+#include <net/x25.h>
+
+static int min_timer[] = {   1 * HZ };
+static int max_timer[] = { 300 * HZ };
+
+static struct ctl_table_header *x25_table_header;
+
+static struct ctl_table x25_table[] = {
+        {
+		.ctl_name =	NET_X25_RESTART_REQUEST_TIMEOUT,
+		.procname =	"restart_request_timeout",
+		.data =		&sysctl_x25_restart_request_timeout,
+		.maxlen =	sizeof(int),
+		.mode =		0644,
+		.proc_handler =	&proc_dointvec_minmax,
+		.strategy =	&sysctl_intvec,
+		.extra1 =	&min_timer,
+		.extra2 =	&max_timer,
+	},
+        {
+		.ctl_name =	NET_X25_CALL_REQUEST_TIMEOUT,
+		.procname =	"call_request_timeout",
+		.data =		&sysctl_x25_call_request_timeout,
+		.maxlen =	sizeof(int),
+		.mode =		0644,
+		.proc_handler =	&proc_dointvec_minmax,
+		.strategy =	&sysctl_intvec,
+		.extra1 =	&min_timer,
+		.extra2 =	&max_timer,
+	},
+        {
+		.ctl_name =	NET_X25_RESET_REQUEST_TIMEOUT,
+		.procname =	"reset_request_timeout",
+		.data =		&sysctl_x25_reset_request_timeout,
+		.maxlen =	sizeof(int),
+		.mode =		0644,
+		.proc_handler =	&proc_dointvec_minmax,
+		.strategy =	&sysctl_intvec,
+		.extra1 =	&min_timer,
+		.extra2 =	&max_timer,
+	},
+        {
+		.ctl_name =	NET_X25_CLEAR_REQUEST_TIMEOUT,
+		.procname =	"clear_request_timeout",
+		.data =		&sysctl_x25_clear_request_timeout,
+		.maxlen =	sizeof(int),
+		.mode =		0644,
+		.proc_handler =	&proc_dointvec_minmax,
+		.strategy =	&sysctl_intvec,
+		.extra1 =	&min_timer,
+		.extra2 =	&max_timer,
+	},
+        {
+		.ctl_name =	NET_X25_ACK_HOLD_BACK_TIMEOUT,
+		.procname =	"acknowledgement_hold_back_timeout",
+		.data =		&sysctl_x25_ack_holdback_timeout,
+		.maxlen =	sizeof(int),
+		.mode =		0644,
+		.proc_handler =	&proc_dointvec_minmax,
+		.strategy =	&sysctl_intvec,
+		.extra1 =	&min_timer,
+		.extra2 =	&max_timer,
+	},
+	{ 0, },
+};
+
+static struct ctl_table x25_dir_table[] = {
+	{
+		.ctl_name =	NET_X25,
+		.procname =	"x25",
+		.mode =		0555,
+		.child =	x25_table,
+	},
+	{ 0, },
+};
+
+static struct ctl_table x25_root_table[] = {
+	{
+		.ctl_name =	CTL_NET,
+		.procname =	"net",
+		.mode =		0555,
+		.child =	x25_dir_table,
+	},
+	{ 0, },
+};
+
+void __init x25_register_sysctl(void)
+{
+	x25_table_header = register_sysctl_table(x25_root_table, 1);
+}
+
+void x25_unregister_sysctl(void)
+{
+	unregister_sysctl_table(x25_table_header);
+}
diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c
new file mode 100644
index 00000000000..36fc3bf6d88
--- /dev/null
+++ b/net/x25/x25_dev.c
@@ -0,0 +1,207 @@
+/*
+ *	X.25 Packet Layer release 002
+ *
+ *	This is ALPHA test software. This code may break your machine, randomly fail to work with new 
+ *	releases, misbehave and/or generally screw up. It might even work. 
+ *
+ *	This code REQUIRES 2.1.15 or higher
+ *
+ *	This module:
+ *		This module is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *	History
+ *	X.25 001	Jonathan Naylor	Started coding.
+ *      2000-09-04	Henner Eisen	Prevent freeing a dangling skb.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <linux/if_arp.h>
+#include <net/x25.h>
+
+static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb)
+{
+	struct sock *sk;
+	unsigned short frametype;
+	unsigned int lci;
+
+	frametype = skb->data[2];
+        lci = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF);
+
+	/*
+	 *	LCI of zero is always for us, and its always a link control
+	 *	frame.
+	 */
+	if (lci == 0) {
+		x25_link_control(skb, nb, frametype);
+		return 0;
+	}
+
+	/*
+	 *	Find an existing socket.
+	 */
+	if ((sk = x25_find_socket(lci, nb)) != NULL) {
+		int queued = 1;
+
+		skb->h.raw = skb->data;
+		bh_lock_sock(sk);
+		if (!sock_owned_by_user(sk)) {
+			queued = x25_process_rx_frame(sk, skb);
+		} else {
+			sk_add_backlog(sk, skb);
+		}
+		bh_unlock_sock(sk);
+		return queued;
+	}
+
+	/*
+	 *	Is is a Call Request ? if so process it.
+	 */
+	if (frametype == X25_CALL_REQUEST)
+		return x25_rx_call_request(skb, nb, lci);
+
+	/*
+	 *	Its not a Call Request, nor is it a control frame.
+	 *      Let caller throw it away.
+	 */
+/*
+	x25_transmit_clear_request(nb, lci, 0x0D);
+*/
+
+	if (frametype != X25_CLEAR_CONFIRMATION)
+		printk(KERN_DEBUG "x25_receive_data(): unknown frame type %2x\n",frametype);
+
+	return 0;
+}
+
+int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev,
+			   struct packet_type *ptype)
+{
+	struct sk_buff *nskb;
+	struct x25_neigh *nb;
+
+	nskb = skb_copy(skb, GFP_ATOMIC);
+	if (!nskb)
+		goto drop;
+	kfree_skb(skb);
+	skb = nskb;
+
+	/*
+	 * Packet received from unrecognised device, throw it away.
+	 */
+	nb = x25_get_neigh(dev);
+	if (!nb) {
+		printk(KERN_DEBUG "X.25: unknown neighbour - %s\n", dev->name);
+		goto drop;
+	}
+
+	switch (skb->data[0]) {
+		case 0x00:
+			skb_pull(skb, 1);
+			if (x25_receive_data(skb, nb)) {
+				x25_neigh_put(nb);
+				goto out;
+			}
+			break;
+		case 0x01:
+			x25_link_established(nb);
+			break;
+		case 0x02:
+			x25_link_terminated(nb);
+			break;
+	}
+	x25_neigh_put(nb);
+drop:
+	kfree_skb(skb);
+out:
+	return 0;
+}
+
+void x25_establish_link(struct x25_neigh *nb)
+{
+	struct sk_buff *skb;
+	unsigned char *ptr;
+
+	switch (nb->dev->type) {
+		case ARPHRD_X25:
+			if ((skb = alloc_skb(1, GFP_ATOMIC)) == NULL) {
+				printk(KERN_ERR "x25_dev: out of memory\n");
+				return;
+			}
+			ptr  = skb_put(skb, 1);
+			*ptr = 0x01;
+			break;
+
+#if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE)
+		case ARPHRD_ETHER:
+			return;
+#endif
+		default:
+			return;
+	}
+
+	skb->protocol = htons(ETH_P_X25);
+	skb->dev      = nb->dev;
+
+	dev_queue_xmit(skb);
+}
+
+void x25_terminate_link(struct x25_neigh *nb)
+{
+	struct sk_buff *skb;
+	unsigned char *ptr;
+
+#if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE)
+	if (nb->dev->type == ARPHRD_ETHER)
+		return;
+#endif
+	if (nb->dev->type != ARPHRD_X25)
+		return;
+
+	skb = alloc_skb(1, GFP_ATOMIC);
+	if (!skb) {
+		printk(KERN_ERR "x25_dev: out of memory\n");
+		return;
+	}
+
+	ptr  = skb_put(skb, 1);
+	*ptr = 0x02;
+
+	skb->protocol = htons(ETH_P_X25);
+	skb->dev      = nb->dev;
+	dev_queue_xmit(skb);
+}
+
+void x25_send_frame(struct sk_buff *skb, struct x25_neigh *nb)
+{
+	unsigned char *dptr;
+
+	skb->nh.raw = skb->data;
+
+	switch (nb->dev->type) {
+		case ARPHRD_X25:
+			dptr  = skb_push(skb, 1);
+			*dptr = 0x00;
+			break;
+
+#if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE)
+		case ARPHRD_ETHER:
+			kfree_skb(skb);
+			return;
+#endif
+		default:
+			kfree_skb(skb);
+			return;
+	}
+
+	skb->protocol = htons(ETH_P_X25);
+	skb->dev      = nb->dev;
+
+	dev_queue_xmit(skb);
+}
diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
new file mode 100644
index 00000000000..a21bdb95f9a
--- /dev/null
+++ b/net/x25/x25_facilities.c
@@ -0,0 +1,231 @@
+/*
+ *	X.25 Packet Layer release 002
+ *
+ *	This is ALPHA test software. This code may break your machine,
+ *	randomly fail to work with new releases, misbehave and/or generally
+ *	screw up. It might even work. 
+ *
+ *	This code REQUIRES 2.1.15 or higher
+ *
+ *	This module:
+ *		This module is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *	History
+ *	X.25 001	Split from x25_subr.c
+ *	mar/20/00	Daniela Squassoni Disabling/enabling of facilities 
+ *					  negotiation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/x25.h>
+
+/*
+ *	Parse a set of facilities into the facilities structure. Unrecognised
+ *	facilities are written to the debug log file.
+ */
+int x25_parse_facilities(struct sk_buff *skb,
+			 struct x25_facilities *facilities,
+			 unsigned long *vc_fac_mask)
+{
+	unsigned char *p = skb->data;
+	unsigned int len = *p++;
+
+	*vc_fac_mask = 0;
+
+	while (len > 0) {
+		switch (*p & X25_FAC_CLASS_MASK) {
+		case X25_FAC_CLASS_A:
+			switch (*p) {
+			case X25_FAC_REVERSE:
+				facilities->reverse = p[1] & 0x01;
+				*vc_fac_mask |= X25_MASK_REVERSE;
+				break;
+			case X25_FAC_THROUGHPUT:
+				facilities->throughput = p[1];
+				*vc_fac_mask |= X25_MASK_THROUGHPUT;
+				break;
+			default:
+				printk(KERN_DEBUG "X.25: unknown facility "
+				       "%02X, value %02X\n",
+				       p[0], p[1]);
+				break;
+			}
+			p   += 2;
+			len -= 2;
+			break;
+		case X25_FAC_CLASS_B:
+			switch (*p) {
+			case X25_FAC_PACKET_SIZE:
+				facilities->pacsize_in  = p[1];
+				facilities->pacsize_out = p[2];
+				*vc_fac_mask |= X25_MASK_PACKET_SIZE;
+				break;
+			case X25_FAC_WINDOW_SIZE:
+				facilities->winsize_in  = p[1];
+				facilities->winsize_out = p[2];
+				*vc_fac_mask |= X25_MASK_WINDOW_SIZE;
+				break;
+			default:
+				printk(KERN_DEBUG "X.25: unknown facility "
+				       "%02X, values %02X, %02X\n",
+				       p[0], p[1], p[2]);
+				break;
+			}
+			p   += 3;
+			len -= 3;
+			break;
+		case X25_FAC_CLASS_C:
+			printk(KERN_DEBUG "X.25: unknown facility %02X, "
+			       "values %02X, %02X, %02X\n",
+			       p[0], p[1], p[2], p[3]);
+			p   += 4;
+			len -= 4;
+			break;
+		case X25_FAC_CLASS_D:
+			printk(KERN_DEBUG "X.25: unknown facility %02X, "
+			       "length %d, values %02X, %02X, %02X, %02X\n",
+			       p[0], p[1], p[2], p[3], p[4], p[5]);
+			len -= p[1] + 2;
+			p   += p[1] + 2;
+			break;
+		}
+	}
+
+	return p - skb->data;
+}
+
+/*
+ *	Create a set of facilities.
+ */
+int x25_create_facilities(unsigned char *buffer,
+			  struct x25_facilities *facilities,
+			  unsigned long facil_mask)
+{
+	unsigned char *p = buffer + 1;
+	int len;
+
+	if (!facil_mask) {
+		/*
+		 * Length of the facilities field in call_req or
+		 * call_accept packets
+		 */
+		buffer[0] = 0;
+		len = 1; /* 1 byte for the length field */
+		return len;
+	}
+
+	if (facilities->reverse && (facil_mask & X25_MASK_REVERSE)) {
+		*p++ = X25_FAC_REVERSE;
+		*p++ = !!facilities->reverse;
+	}
+
+	if (facilities->throughput && (facil_mask & X25_MASK_THROUGHPUT)) {
+		*p++ = X25_FAC_THROUGHPUT;
+		*p++ = facilities->throughput;
+	}
+
+	if ((facilities->pacsize_in || facilities->pacsize_out) &&
+	    (facil_mask & X25_MASK_PACKET_SIZE)) {
+		*p++ = X25_FAC_PACKET_SIZE;
+		*p++ = facilities->pacsize_in ? : facilities->pacsize_out;
+		*p++ = facilities->pacsize_out ? : facilities->pacsize_in;
+	}
+
+	if ((facilities->winsize_in || facilities->winsize_out) &&
+	    (facil_mask & X25_MASK_WINDOW_SIZE)) {
+		*p++ = X25_FAC_WINDOW_SIZE;
+		*p++ = facilities->winsize_in ? : facilities->winsize_out;
+		*p++ = facilities->winsize_out ? : facilities->winsize_in;
+	}
+
+	len       = p - buffer;
+	buffer[0] = len - 1;
+
+	return len;
+}
+
+/*
+ *	Try to reach a compromise on a set of facilities.
+ *
+ *	The only real problem is with reverse charging.
+ */
+int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk,
+			     struct x25_facilities *new)
+{
+	struct x25_sock *x25 = x25_sk(sk);
+	struct x25_facilities *ours = &x25->facilities;
+	struct x25_facilities theirs;
+	int len;
+
+	memset(&theirs, 0, sizeof(theirs));
+	memcpy(new, ours, sizeof(*new));
+
+	len = x25_parse_facilities(skb, &theirs, &x25->vc_facil_mask);
+
+	/*
+	 *	They want reverse charging, we won't accept it.
+	 */
+	if (theirs.reverse && ours->reverse) {
+		SOCK_DEBUG(sk, "X.25: rejecting reverse charging request");
+		return -1;
+	}
+
+	new->reverse = theirs.reverse;
+
+	if (theirs.throughput) {
+		if (theirs.throughput < ours->throughput) {
+			SOCK_DEBUG(sk, "X.25: throughput negotiated down");
+			new->throughput = theirs.throughput;
+		}
+	}
+
+	if (theirs.pacsize_in && theirs.pacsize_out) {
+		if (theirs.pacsize_in < ours->pacsize_in) {
+			SOCK_DEBUG(sk, "X.25: packet size inwards negotiated down");
+			new->pacsize_in = theirs.pacsize_in;
+		}
+		if (theirs.pacsize_out < ours->pacsize_out) {
+			SOCK_DEBUG(sk, "X.25: packet size outwards negotiated down");
+			new->pacsize_out = theirs.pacsize_out;
+		}
+	}
+
+	if (theirs.winsize_in && theirs.winsize_out) {
+		if (theirs.winsize_in < ours->winsize_in) {
+			SOCK_DEBUG(sk, "X.25: window size inwards negotiated down");
+			new->winsize_in = theirs.winsize_in;
+		}
+		if (theirs.winsize_out < ours->winsize_out) {
+			SOCK_DEBUG(sk, "X.25: window size outwards negotiated down");
+			new->winsize_out = theirs.winsize_out;
+		}
+	}
+
+	return len;
+}
+
+/*
+ *	Limit values of certain facilities according to the capability of the 
+ *      currently attached x25 link.
+ */
+void x25_limit_facilities(struct x25_facilities *facilities,
+			  struct x25_neigh *nb)
+{
+
+	if (!nb->extended) {
+		if (facilities->winsize_in  > 7) {
+			printk(KERN_DEBUG "X.25: incoming winsize limited to 7\n");
+			facilities->winsize_in = 7;
+		}
+		if (facilities->winsize_out > 7) {
+			facilities->winsize_out = 7;
+			printk( KERN_DEBUG "X.25: outgoing winsize limited to 7\n");
+		}
+	}
+}
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
new file mode 100644
index 00000000000..b0197c70a9f
--- /dev/null
+++ b/net/x25/x25_in.c
@@ -0,0 +1,361 @@
+/*
+ *	X.25 Packet Layer release 002
+ *
+ *	This is ALPHA test software. This code may break your machine,
+ *	randomly fail to work with new releases, misbehave and/or generally
+ *	screw up. It might even work. 
+ *
+ *	This code REQUIRES 2.1.15 or higher
+ *
+ *	This module:
+ *		This module is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *	History
+ *	X.25 001	Jonathan Naylor	  Started coding.
+ *	X.25 002	Jonathan Naylor	  Centralised disconnection code.
+ *					  New timer architecture.
+ *	2000-03-20	Daniela Squassoni Disabling/enabling of facilities 
+ *					  negotiation.
+ *	2000-11-10	Henner Eisen	  Check and reset for out-of-sequence
+ *					  i-frames.
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <net/x25.h>
+
+static int x25_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more)
+{
+	struct sk_buff *skbo, *skbn = skb;
+	struct x25_sock *x25 = x25_sk(sk);
+
+	if (more) {
+		x25->fraglen += skb->len;
+		skb_queue_tail(&x25->fragment_queue, skb);
+		skb_set_owner_r(skb, sk);
+		return 0;
+	}
+
+	if (!more && x25->fraglen > 0) {	/* End of fragment */
+		int len = x25->fraglen + skb->len;
+
+		if ((skbn = alloc_skb(len, GFP_ATOMIC)) == NULL){
+			kfree_skb(skb);
+			return 1;
+		}
+
+		skb_queue_tail(&x25->fragment_queue, skb);
+
+		skbn->h.raw = skbn->data;
+
+		skbo = skb_dequeue(&x25->fragment_queue);
+		memcpy(skb_put(skbn, skbo->len), skbo->data, skbo->len);
+		kfree_skb(skbo);
+
+		while ((skbo =
+			skb_dequeue(&x25->fragment_queue)) != NULL) {
+			skb_pull(skbo, (x25->neighbour->extended) ?
+					X25_EXT_MIN_LEN : X25_STD_MIN_LEN);
+			memcpy(skb_put(skbn, skbo->len), skbo->data, skbo->len);
+			kfree_skb(skbo);
+		}
+
+		x25->fraglen = 0;		
+	}
+
+	skb_set_owner_r(skbn, sk);
+	skb_queue_tail(&sk->sk_receive_queue, skbn);
+	if (!sock_flag(sk, SOCK_DEAD))
+		sk->sk_data_ready(sk, skbn->len);
+
+	return 0;
+}
+
+/*
+ * State machine for state 1, Awaiting Call Accepted State.
+ * The handling of the timer(s) is in file x25_timer.c.
+ * Handling of state 0 and connection release is in af_x25.c.
+ */
+static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametype)
+{
+	struct x25_address source_addr, dest_addr;
+
+	switch (frametype) {
+		case X25_CALL_ACCEPTED: {
+			struct x25_sock *x25 = x25_sk(sk);
+
+			x25_stop_timer(sk);
+			x25->condition = 0x00;
+			x25->vs        = 0;
+			x25->va        = 0;
+			x25->vr        = 0;
+			x25->vl        = 0;
+			x25->state     = X25_STATE_3;
+			sk->sk_state   = TCP_ESTABLISHED;
+			/*
+			 *	Parse the data in the frame.
+			 */
+			skb_pull(skb, X25_STD_MIN_LEN);
+			skb_pull(skb, x25_addr_ntoa(skb->data, &source_addr, &dest_addr));
+			skb_pull(skb,
+				 x25_parse_facilities(skb, &x25->facilities,
+						      &x25->vc_facil_mask));
+			/*
+			 *	Copy any Call User Data.
+			 */
+			if (skb->len >= 0) {
+				memcpy(x25->calluserdata.cuddata, skb->data,
+				       skb->len);
+				x25->calluserdata.cudlength = skb->len;
+			}
+			if (!sock_flag(sk, SOCK_DEAD))
+				sk->sk_state_change(sk);
+			break;
+		}
+		case X25_CLEAR_REQUEST:
+			x25_write_internal(sk, X25_CLEAR_CONFIRMATION);
+			x25_disconnect(sk, ECONNREFUSED, skb->data[3], skb->data[4]);
+			break;
+
+		default:
+			break;
+	}
+
+	return 0;
+}
+
+/*
+ * State machine for state 2, Awaiting Clear Confirmation State.
+ * The handling of the timer(s) is in file x25_timer.c
+ * Handling of state 0 and connection release is in af_x25.c.
+ */
+static int x25_state2_machine(struct sock *sk, struct sk_buff *skb, int frametype)
+{
+	switch (frametype) {
+
+		case X25_CLEAR_REQUEST:
+			x25_write_internal(sk, X25_CLEAR_CONFIRMATION);
+			x25_disconnect(sk, 0, skb->data[3], skb->data[4]);
+			break;
+
+		case X25_CLEAR_CONFIRMATION:
+			x25_disconnect(sk, 0, 0, 0);
+			break;
+
+		default:
+			break;
+	}
+
+	return 0;
+}
+
+/*
+ * State machine for state 3, Connected State.
+ * The handling of the timer(s) is in file x25_timer.c
+ * Handling of state 0 and connection release is in af_x25.c.
+ */
+static int x25_state3_machine(struct sock *sk, struct sk_buff *skb, int frametype, int ns, int nr, int q, int d, int m)
+{
+	int queued = 0;
+	int modulus;
+	struct x25_sock *x25 = x25_sk(sk);
+	
+	modulus = (x25->neighbour->extended) ? X25_EMODULUS : X25_SMODULUS;
+
+	switch (frametype) {
+
+		case X25_RESET_REQUEST:
+			x25_write_internal(sk, X25_RESET_CONFIRMATION);
+			x25_stop_timer(sk);
+			x25->condition = 0x00;
+			x25->vs        = 0;
+			x25->vr        = 0;
+			x25->va        = 0;
+			x25->vl        = 0;
+			x25_requeue_frames(sk);
+			break;
+
+		case X25_CLEAR_REQUEST:
+			x25_write_internal(sk, X25_CLEAR_CONFIRMATION);
+			x25_disconnect(sk, 0, skb->data[3], skb->data[4]);
+			break;
+
+		case X25_RR:
+		case X25_RNR:
+			if (!x25_validate_nr(sk, nr)) {
+				x25_clear_queues(sk);
+				x25_write_internal(sk, X25_RESET_REQUEST);
+				x25_start_t22timer(sk);
+				x25->condition = 0x00;
+				x25->vs        = 0;
+				x25->vr        = 0;
+				x25->va        = 0;
+				x25->vl        = 0;
+				x25->state     = X25_STATE_4;
+			} else {
+				x25_frames_acked(sk, nr);
+				if (frametype == X25_RNR) {
+					x25->condition |= X25_COND_PEER_RX_BUSY;
+				} else {
+					x25->condition &= ~X25_COND_PEER_RX_BUSY;
+				}
+			}
+			break;
+
+		case X25_DATA:	/* XXX */
+			x25->condition &= ~X25_COND_PEER_RX_BUSY;
+			if ((ns != x25->vr) || !x25_validate_nr(sk, nr)) {
+				x25_clear_queues(sk);
+				x25_write_internal(sk, X25_RESET_REQUEST);
+				x25_start_t22timer(sk);
+				x25->condition = 0x00;
+				x25->vs        = 0;
+				x25->vr        = 0;
+				x25->va        = 0;
+				x25->vl        = 0;
+				x25->state     = X25_STATE_4;
+				break;
+			}
+			x25_frames_acked(sk, nr);
+			if (ns == x25->vr) {
+				if (x25_queue_rx_frame(sk, skb, m) == 0) {
+					x25->vr = (x25->vr + 1) % modulus;
+					queued = 1;
+				} else {
+					/* Should never happen */
+					x25_clear_queues(sk);
+					x25_write_internal(sk, X25_RESET_REQUEST);
+					x25_start_t22timer(sk);
+					x25->condition = 0x00;
+					x25->vs        = 0;
+					x25->vr        = 0;
+					x25->va        = 0;
+					x25->vl        = 0;
+					x25->state     = X25_STATE_4;
+					break;
+				}
+				if (atomic_read(&sk->sk_rmem_alloc) >
+				    (sk->sk_rcvbuf / 2))
+					x25->condition |= X25_COND_OWN_RX_BUSY;
+			}
+			/*
+			 *	If the window is full Ack it immediately, else
+			 *	start the holdback timer.
+			 */
+			if (((x25->vl + x25->facilities.winsize_in) % modulus) == x25->vr) {
+				x25->condition &= ~X25_COND_ACK_PENDING;
+				x25_stop_timer(sk);
+				x25_enquiry_response(sk);
+			} else {
+				x25->condition |= X25_COND_ACK_PENDING;
+				x25_start_t2timer(sk);
+			}
+			break;
+
+		case X25_INTERRUPT_CONFIRMATION:
+			x25->intflag = 0;
+			break;
+
+		case X25_INTERRUPT:
+			if (sock_flag(sk, SOCK_URGINLINE))
+				queued = !sock_queue_rcv_skb(sk, skb);
+			else {
+				skb_set_owner_r(skb, sk);
+				skb_queue_tail(&x25->interrupt_in_queue, skb);
+				queued = 1;
+			}
+			sk_send_sigurg(sk);
+			x25_write_internal(sk, X25_INTERRUPT_CONFIRMATION);
+			break;
+
+		default:
+			printk(KERN_WARNING "x25: unknown %02X in state 3\n", frametype);
+			break;
+	}
+
+	return queued;
+}
+
+/*
+ * State machine for state 4, Awaiting Reset Confirmation State.
+ * The handling of the timer(s) is in file x25_timer.c
+ * Handling of state 0 and connection release is in af_x25.c.
+ */
+static int x25_state4_machine(struct sock *sk, struct sk_buff *skb, int frametype)
+{
+	switch (frametype) {
+
+		case X25_RESET_REQUEST:
+			x25_write_internal(sk, X25_RESET_CONFIRMATION);
+		case X25_RESET_CONFIRMATION: {
+			struct x25_sock *x25 = x25_sk(sk);
+
+			x25_stop_timer(sk);
+			x25->condition = 0x00;
+			x25->va        = 0;
+			x25->vr        = 0;
+			x25->vs        = 0;
+			x25->vl        = 0;
+			x25->state     = X25_STATE_3;
+			x25_requeue_frames(sk);
+			break;
+		}
+		case X25_CLEAR_REQUEST:
+			x25_write_internal(sk, X25_CLEAR_CONFIRMATION);
+			x25_disconnect(sk, 0, skb->data[3], skb->data[4]);
+			break;
+
+		default:
+			break;
+	}
+
+	return 0;
+}
+
+/* Higher level upcall for a LAPB frame */
+int x25_process_rx_frame(struct sock *sk, struct sk_buff *skb)
+{
+	struct x25_sock *x25 = x25_sk(sk);
+	int queued = 0, frametype, ns, nr, q, d, m;
+
+	if (x25->state == X25_STATE_0)
+		return 0;
+
+	frametype = x25_decode(sk, skb, &ns, &nr, &q, &d, &m);
+
+	switch (x25->state) {
+		case X25_STATE_1:
+			queued = x25_state1_machine(sk, skb, frametype);
+			break;
+		case X25_STATE_2:
+			queued = x25_state2_machine(sk, skb, frametype);
+			break;
+		case X25_STATE_3:
+			queued = x25_state3_machine(sk, skb, frametype, ns, nr, q, d, m);
+			break;
+		case X25_STATE_4:
+			queued = x25_state4_machine(sk, skb, frametype);
+			break;
+	}
+
+	x25_kick(sk);
+
+	return queued;
+}
+
+int x25_backlog_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	int queued = x25_process_rx_frame(sk, skb);
+
+	if (!queued)
+		kfree_skb(skb);
+
+	return 0;
+}
diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c
new file mode 100644
index 00000000000..0a760fe6684
--- /dev/null
+++ b/net/x25/x25_link.c
@@ -0,0 +1,401 @@
+/*
+ *	X.25 Packet Layer release 002
+ *
+ *	This is ALPHA test software. This code may break your machine,
+ *	randomly fail to work with new releases, misbehave and/or generally
+ *	screw up. It might even work. 
+ *
+ *	This code REQUIRES 2.1.15 or higher
+ *
+ *	This module:
+ *		This module is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *	History
+ *	X.25 001	Jonathan Naylor	  Started coding.
+ *	X.25 002	Jonathan Naylor	  New timer architecture.
+ *	mar/20/00	Daniela Squassoni Disabling/enabling of facilities 
+ *					  negotiation.
+ *	2000-09-04	Henner Eisen	  dev_hold() / dev_put() for x25_neigh.
+ */
+
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <asm/uaccess.h>
+#include <linux/init.h>
+#include <net/x25.h>
+
+static struct list_head x25_neigh_list = LIST_HEAD_INIT(x25_neigh_list);
+static DEFINE_RWLOCK(x25_neigh_list_lock);
+
+static void x25_t20timer_expiry(unsigned long);
+
+static void x25_transmit_restart_confirmation(struct x25_neigh *nb);
+static void x25_transmit_restart_request(struct x25_neigh *nb);
+
+/*
+ *	Linux set/reset timer routines
+ */
+static inline void x25_start_t20timer(struct x25_neigh *nb)
+{
+	mod_timer(&nb->t20timer, jiffies + nb->t20);
+}
+
+static void x25_t20timer_expiry(unsigned long param)
+{
+	struct x25_neigh *nb = (struct x25_neigh *)param;
+
+	x25_transmit_restart_request(nb);
+
+	x25_start_t20timer(nb);
+}
+
+static inline void x25_stop_t20timer(struct x25_neigh *nb)
+{
+	del_timer(&nb->t20timer);
+}
+
+static inline int x25_t20timer_pending(struct x25_neigh *nb)
+{
+	return timer_pending(&nb->t20timer);
+}
+
+/*
+ *	This handles all restart and diagnostic frames.
+ */
+void x25_link_control(struct sk_buff *skb, struct x25_neigh *nb,
+		      unsigned short frametype)
+{
+	struct sk_buff *skbn;
+	int confirm;
+
+	switch (frametype) {
+		case X25_RESTART_REQUEST:
+			confirm = !x25_t20timer_pending(nb);
+			x25_stop_t20timer(nb);
+			nb->state = X25_LINK_STATE_3;
+			if (confirm)
+				x25_transmit_restart_confirmation(nb);
+			break;
+
+		case X25_RESTART_CONFIRMATION:
+			x25_stop_t20timer(nb);
+			nb->state = X25_LINK_STATE_3;
+			break;
+
+		case X25_DIAGNOSTIC:
+			printk(KERN_WARNING "x25: diagnostic #%d - "
+			       "%02X %02X %02X\n",
+			       skb->data[3], skb->data[4],
+			       skb->data[5], skb->data[6]);
+			break;
+			
+		default:
+			printk(KERN_WARNING "x25: received unknown %02X "
+			       "with LCI 000\n", frametype);
+			break;
+	}
+
+	if (nb->state == X25_LINK_STATE_3)
+		while ((skbn = skb_dequeue(&nb->queue)) != NULL)
+			x25_send_frame(skbn, nb);
+}
+
+/*
+ *	This routine is called when a Restart Request is needed
+ */
+static void x25_transmit_restart_request(struct x25_neigh *nb)
+{
+	unsigned char *dptr;
+	int len = X25_MAX_L2_LEN + X25_STD_MIN_LEN + 2;
+	struct sk_buff *skb = alloc_skb(len, GFP_ATOMIC);
+
+	if (!skb)
+		return;
+
+	skb_reserve(skb, X25_MAX_L2_LEN);
+
+	dptr = skb_put(skb, X25_STD_MIN_LEN + 2);
+
+	*dptr++ = nb->extended ? X25_GFI_EXTSEQ : X25_GFI_STDSEQ;
+	*dptr++ = 0x00;
+	*dptr++ = X25_RESTART_REQUEST;
+	*dptr++ = 0x00;
+	*dptr++ = 0;
+
+	skb->sk = NULL;
+
+	x25_send_frame(skb, nb);
+}
+
+/*
+ * This routine is called when a Restart Confirmation is needed
+ */
+static void x25_transmit_restart_confirmation(struct x25_neigh *nb)
+{
+	unsigned char *dptr;
+	int len = X25_MAX_L2_LEN + X25_STD_MIN_LEN;
+	struct sk_buff *skb = alloc_skb(len, GFP_ATOMIC);
+
+	if (!skb)
+		return;
+
+	skb_reserve(skb, X25_MAX_L2_LEN);
+
+	dptr = skb_put(skb, X25_STD_MIN_LEN);
+
+	*dptr++ = nb->extended ? X25_GFI_EXTSEQ : X25_GFI_STDSEQ;
+	*dptr++ = 0x00;
+	*dptr++ = X25_RESTART_CONFIRMATION;
+
+	skb->sk = NULL;
+
+	x25_send_frame(skb, nb);
+}
+
+/*
+ *	This routine is called when a Clear Request is needed outside of the context
+ *	of a connected socket.
+ */
+void x25_transmit_clear_request(struct x25_neigh *nb, unsigned int lci,
+				unsigned char cause)
+{
+	unsigned char *dptr;
+	int len = X25_MAX_L2_LEN + X25_STD_MIN_LEN + 2;
+	struct sk_buff *skb = alloc_skb(len, GFP_ATOMIC);
+
+	if (!skb)
+		return;
+
+	skb_reserve(skb, X25_MAX_L2_LEN);
+
+	dptr = skb_put(skb, X25_STD_MIN_LEN + 2);
+
+	*dptr++ = ((lci >> 8) & 0x0F) | (nb->extended ?
+					 X25_GFI_EXTSEQ :
+					 X25_GFI_STDSEQ);
+	*dptr++ = (lci >> 0) & 0xFF;
+	*dptr++ = X25_CLEAR_REQUEST;
+	*dptr++ = cause;
+	*dptr++ = 0x00;
+
+	skb->sk = NULL;
+
+	x25_send_frame(skb, nb);
+}
+
+void x25_transmit_link(struct sk_buff *skb, struct x25_neigh *nb)
+{
+	switch (nb->state) {
+		case X25_LINK_STATE_0:
+			skb_queue_tail(&nb->queue, skb);
+			nb->state = X25_LINK_STATE_1;
+			x25_establish_link(nb);
+			break;
+		case X25_LINK_STATE_1:
+		case X25_LINK_STATE_2:
+			skb_queue_tail(&nb->queue, skb);
+			break;
+		case X25_LINK_STATE_3:
+			x25_send_frame(skb, nb);
+			break;
+	}
+}
+
+/*
+ *	Called when the link layer has become established.
+ */
+void x25_link_established(struct x25_neigh *nb)
+{
+	switch (nb->state) {
+		case X25_LINK_STATE_0:
+			nb->state = X25_LINK_STATE_2;
+			break;
+		case X25_LINK_STATE_1:
+			x25_transmit_restart_request(nb);
+			nb->state = X25_LINK_STATE_2;
+			x25_start_t20timer(nb);
+			break;
+	}
+}
+
+/*
+ *	Called when the link layer has terminated, or an establishment
+ *	request has failed.
+ */
+
+void x25_link_terminated(struct x25_neigh *nb)
+{
+	nb->state = X25_LINK_STATE_0;
+	/* Out of order: clear existing virtual calls (X.25 03/93 4.6.3) */
+	x25_kill_by_neigh(nb);
+}
+
+/*
+ *	Add a new device.
+ */
+void x25_link_device_up(struct net_device *dev)
+{
+	struct x25_neigh *nb = kmalloc(sizeof(*nb), GFP_ATOMIC);
+
+	if (!nb)
+		return;
+
+	skb_queue_head_init(&nb->queue);
+
+	init_timer(&nb->t20timer);
+	nb->t20timer.data     = (unsigned long)nb;
+	nb->t20timer.function = &x25_t20timer_expiry;
+
+	dev_hold(dev);
+	nb->dev      = dev;
+	nb->state    = X25_LINK_STATE_0;
+	nb->extended = 0;
+	/*
+	 * Enables negotiation
+	 */
+	nb->global_facil_mask = X25_MASK_REVERSE |
+				       X25_MASK_THROUGHPUT |
+				       X25_MASK_PACKET_SIZE |
+				       X25_MASK_WINDOW_SIZE;
+	nb->t20      = sysctl_x25_restart_request_timeout;
+	atomic_set(&nb->refcnt, 1);
+
+	write_lock_bh(&x25_neigh_list_lock);
+	list_add(&nb->node, &x25_neigh_list);
+	write_unlock_bh(&x25_neigh_list_lock);
+}
+
+/**
+ *	__x25_remove_neigh - remove neighbour from x25_neigh_list
+ *	@nb - neigh to remove
+ *
+ *	Remove neighbour from x25_neigh_list. If it was there.
+ *	Caller must hold x25_neigh_list_lock.
+ */
+static void __x25_remove_neigh(struct x25_neigh *nb)
+{
+	skb_queue_purge(&nb->queue);
+	x25_stop_t20timer(nb);
+
+	if (nb->node.next) {
+		list_del(&nb->node);
+		x25_neigh_put(nb);
+	}
+}
+
+/*
+ *	A device has been removed, remove its links.
+ */
+void x25_link_device_down(struct net_device *dev)
+{
+	struct x25_neigh *nb;
+	struct list_head *entry, *tmp;
+
+	write_lock_bh(&x25_neigh_list_lock);
+
+	list_for_each_safe(entry, tmp, &x25_neigh_list) {
+		nb = list_entry(entry, struct x25_neigh, node);
+
+		if (nb->dev == dev) {
+			__x25_remove_neigh(nb);
+			dev_put(dev);
+		}
+	}
+
+	write_unlock_bh(&x25_neigh_list_lock);
+}
+
+/*
+ *	Given a device, return the neighbour address.
+ */
+struct x25_neigh *x25_get_neigh(struct net_device *dev)
+{
+	struct x25_neigh *nb, *use = NULL;
+	struct list_head *entry;
+
+	read_lock_bh(&x25_neigh_list_lock);
+	list_for_each(entry, &x25_neigh_list) {
+		nb = list_entry(entry, struct x25_neigh, node);
+
+		if (nb->dev == dev) {
+			use = nb;
+			break;
+		}
+	}
+
+	if (use)
+		x25_neigh_hold(use);
+	read_unlock_bh(&x25_neigh_list_lock);
+	return use;
+}
+
+/*
+ *	Handle the ioctls that control the subscription functions.
+ */
+int x25_subscr_ioctl(unsigned int cmd, void __user *arg)
+{
+	struct x25_subscrip_struct x25_subscr;
+	struct x25_neigh *nb;
+	struct net_device *dev;
+	int rc = -EINVAL;
+
+	if (cmd != SIOCX25GSUBSCRIP && cmd != SIOCX25SSUBSCRIP)
+		goto out;
+
+	rc = -EFAULT;
+	if (copy_from_user(&x25_subscr, arg, sizeof(x25_subscr)))
+		goto out;
+
+	rc = -EINVAL;
+	if ((dev = x25_dev_get(x25_subscr.device)) == NULL)
+		goto out;
+
+	if ((nb = x25_get_neigh(dev)) == NULL)
+		goto out_dev_put;
+
+	dev_put(dev);
+
+	if (cmd == SIOCX25GSUBSCRIP) {
+		x25_subscr.extended	     = nb->extended;
+		x25_subscr.global_facil_mask = nb->global_facil_mask;
+		rc = copy_to_user(arg, &x25_subscr,
+				  sizeof(x25_subscr)) ? -EFAULT : 0;
+	} else {
+		rc = -EINVAL;
+		if (!(x25_subscr.extended && x25_subscr.extended != 1)) {
+			rc = 0;
+			nb->extended	     = x25_subscr.extended;
+			nb->global_facil_mask = x25_subscr.global_facil_mask;
+		}
+	}
+	x25_neigh_put(nb);
+out:
+	return rc;
+out_dev_put:
+	dev_put(dev);
+	goto out;
+}
+
+
+/*
+ *	Release all memory associated with X.25 neighbour structures.
+ */
+void __exit x25_link_free(void)
+{
+	struct x25_neigh *nb;
+	struct list_head *entry, *tmp;
+
+	write_lock_bh(&x25_neigh_list_lock);
+
+	list_for_each_safe(entry, tmp, &x25_neigh_list) {
+		nb = list_entry(entry, struct x25_neigh, node);
+		__x25_remove_neigh(nb);
+	}
+	write_unlock_bh(&x25_neigh_list_lock);
+}
diff --git a/net/x25/x25_out.c b/net/x25/x25_out.c
new file mode 100644
index 00000000000..a2e62cea819
--- /dev/null
+++ b/net/x25/x25_out.c
@@ -0,0 +1,226 @@
+/*
+ *	X.25 Packet Layer release 002
+ *
+ *	This is ALPHA test software. This code may break your machine,
+ *	randomly fail to work with new releases, misbehave and/or generally
+ *	screw up. It might even work. 
+ *
+ *	This code REQUIRES 2.1.15 or higher
+ *
+ *	This module:
+ *		This module is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *	History
+ *	X.25 001	Jonathan Naylor	Started coding.
+ *	X.25 002	Jonathan Naylor	New timer architecture.
+ *	2000-09-04	Henner Eisen	Prevented x25_output() skb leakage.
+ *	2000-10-27	Henner Eisen	MSG_DONTWAIT for fragment allocation.
+ *	2000-11-10	Henner Eisen	x25_send_iframe(): re-queued frames
+ *					needed cleaned seq-number fields.
+ */
+
+#include <linux/socket.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/x25.h>
+
+static int x25_pacsize_to_bytes(unsigned int pacsize)
+{
+	int bytes = 1;
+
+	if (!pacsize)
+		return 128;
+
+	while (pacsize-- > 0)
+		bytes *= 2;
+
+	return bytes;
+}
+
+/*
+ *	This is where all X.25 information frames pass.
+ *
+ *      Returns the amount of user data bytes sent on success
+ *      or a negative error code on failure.
+ */
+int x25_output(struct sock *sk, struct sk_buff *skb)
+{
+	struct sk_buff *skbn;
+	unsigned char header[X25_EXT_MIN_LEN];
+	int err, frontlen, len;
+	int sent=0, noblock = X25_SKB_CB(skb)->flags & MSG_DONTWAIT;
+	struct x25_sock *x25 = x25_sk(sk);
+	int header_len = x25->neighbour->extended ? X25_EXT_MIN_LEN :
+						    X25_STD_MIN_LEN;
+	int max_len = x25_pacsize_to_bytes(x25->facilities.pacsize_out);
+
+	if (skb->len - header_len > max_len) {
+		/* Save a copy of the Header */
+		memcpy(header, skb->data, header_len);
+		skb_pull(skb, header_len);
+
+		frontlen = skb_headroom(skb);
+
+		while (skb->len > 0) {
+			if ((skbn = sock_alloc_send_skb(sk, frontlen + max_len,
+							noblock, &err)) == NULL){
+				if (err == -EWOULDBLOCK && noblock){
+					kfree_skb(skb);
+					return sent;
+				}
+				SOCK_DEBUG(sk, "x25_output: fragment alloc"
+					       " failed, err=%d, %d bytes "
+					       "sent\n", err, sent);
+				return err;
+			}
+				
+			skb_reserve(skbn, frontlen);
+
+			len = max_len > skb->len ? skb->len : max_len;
+
+			/* Copy the user data */
+			memcpy(skb_put(skbn, len), skb->data, len);
+			skb_pull(skb, len);
+
+			/* Duplicate the Header */
+			skb_push(skbn, header_len);
+			memcpy(skbn->data, header, header_len);
+
+			if (skb->len > 0) {
+				if (x25->neighbour->extended)
+					skbn->data[3] |= X25_EXT_M_BIT;
+				else
+					skbn->data[2] |= X25_STD_M_BIT;
+			}
+
+			skb_queue_tail(&sk->sk_write_queue, skbn);
+			sent += len;
+		}
+		
+		kfree_skb(skb);
+	} else {
+		skb_queue_tail(&sk->sk_write_queue, skb);
+		sent = skb->len - header_len;
+	}
+	return sent;
+}
+
+/* 
+ *	This procedure is passed a buffer descriptor for an iframe. It builds
+ *	the rest of the control part of the frame and then writes it out.
+ */
+static void x25_send_iframe(struct sock *sk, struct sk_buff *skb)
+{
+	struct x25_sock *x25 = x25_sk(sk);
+
+	if (!skb)
+		return;
+
+	if (x25->neighbour->extended) {
+		skb->data[2]  = (x25->vs << 1) & 0xFE;
+		skb->data[3] &= X25_EXT_M_BIT;
+		skb->data[3] |= (x25->vr << 1) & 0xFE;
+	} else {
+		skb->data[2] &= X25_STD_M_BIT;
+		skb->data[2] |= (x25->vs << 1) & 0x0E;
+		skb->data[2] |= (x25->vr << 5) & 0xE0;
+	}
+
+	x25_transmit_link(skb, x25->neighbour);	
+}
+
+void x25_kick(struct sock *sk)
+{
+	struct sk_buff *skb, *skbn;
+	unsigned short start, end;
+	int modulus;
+	struct x25_sock *x25 = x25_sk(sk);
+
+	if (x25->state != X25_STATE_3)
+		return;
+
+	/*
+	 *	Transmit interrupt data.
+	 */
+	if (!x25->intflag && skb_peek(&x25->interrupt_out_queue) != NULL) {
+		x25->intflag = 1;
+		skb = skb_dequeue(&x25->interrupt_out_queue);
+		x25_transmit_link(skb, x25->neighbour);
+	}
+
+	if (x25->condition & X25_COND_PEER_RX_BUSY)
+		return;
+
+	if (!skb_peek(&sk->sk_write_queue))
+		return;
+
+	modulus = x25->neighbour->extended ? X25_EMODULUS : X25_SMODULUS;
+
+	start   = skb_peek(&x25->ack_queue) ? x25->vs : x25->va;
+	end     = (x25->va + x25->facilities.winsize_out) % modulus;
+
+	if (start == end)
+		return;
+
+	x25->vs = start;
+
+	/*
+	 * Transmit data until either we're out of data to send or
+	 * the window is full.
+	 */
+
+	skb = skb_dequeue(&sk->sk_write_queue);
+
+	do {
+		if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL) {
+			skb_queue_head(&sk->sk_write_queue, skb);
+			break;
+		}
+
+		skb_set_owner_w(skbn, sk);
+
+		/*
+		 * Transmit the frame copy.
+		 */
+		x25_send_iframe(sk, skbn);
+
+		x25->vs = (x25->vs + 1) % modulus;
+
+		/*
+		 * Requeue the original data frame.
+		 */
+		skb_queue_tail(&x25->ack_queue, skb);
+
+	} while (x25->vs != end &&
+		 (skb = skb_dequeue(&sk->sk_write_queue)) != NULL);
+
+	x25->vl         = x25->vr;
+	x25->condition &= ~X25_COND_ACK_PENDING;
+
+	x25_stop_timer(sk);
+}
+
+/*
+ * The following routines are taken from page 170 of the 7th ARRL Computer
+ * Networking Conference paper, as is the whole state machine.
+ */
+
+void x25_enquiry_response(struct sock *sk)
+{
+	struct x25_sock *x25 = x25_sk(sk);
+
+	if (x25->condition & X25_COND_OWN_RX_BUSY)
+		x25_write_internal(sk, X25_RNR);
+	else
+		x25_write_internal(sk, X25_RR);
+
+	x25->vl         = x25->vr;
+	x25->condition &= ~X25_COND_ACK_PENDING;
+
+	x25_stop_timer(sk);
+}
diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c
new file mode 100644
index 00000000000..dfb80116c59
--- /dev/null
+++ b/net/x25/x25_proc.c
@@ -0,0 +1,256 @@
+/*
+ *	X.25 Packet Layer release 002
+ *
+ *	This is ALPHA test software. This code may break your machine,
+ *	randomly fail to work with new releases, misbehave and/or generally
+ *	screw up. It might even work. 
+ *
+ *	This code REQUIRES 2.4 with seq_file support
+ *
+ *	This module:
+ *		This module is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *	History
+ *	2002/10/06	Arnaldo Carvalho de Melo  seq_file support
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <net/sock.h>
+#include <net/x25.h>
+
+#ifdef CONFIG_PROC_FS
+static __inline__ struct x25_route *x25_get_route_idx(loff_t pos)
+{
+	struct list_head *route_entry;
+	struct x25_route *rt = NULL;
+
+	list_for_each(route_entry, &x25_route_list) {
+		rt = list_entry(route_entry, struct x25_route, node);
+		if (!pos--)
+			goto found;
+	}
+	rt = NULL;
+found:
+	return rt;
+}
+
+static void *x25_seq_route_start(struct seq_file *seq, loff_t *pos)
+{
+	loff_t l = *pos;
+
+	read_lock_bh(&x25_route_list_lock);
+	return l ? x25_get_route_idx(--l) : SEQ_START_TOKEN;
+}
+
+static void *x25_seq_route_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct x25_route *rt;
+
+	++*pos;
+	if (v == SEQ_START_TOKEN) {
+		rt = NULL;
+		if (!list_empty(&x25_route_list))
+			rt = list_entry(x25_route_list.next,
+					struct x25_route, node);
+		goto out;
+	}
+	rt = v;
+	if (rt->node.next != &x25_route_list)
+		rt = list_entry(rt->node.next, struct x25_route, node);
+	else 
+		rt = NULL;
+out:
+	return rt;
+}
+
+static void x25_seq_route_stop(struct seq_file *seq, void *v)
+{
+	read_unlock_bh(&x25_route_list_lock);
+}
+
+static int x25_seq_route_show(struct seq_file *seq, void *v)
+{
+	struct x25_route *rt;
+
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(seq, "Address          Digits  Device\n");
+		goto out;
+	}
+
+	rt = v;
+	seq_printf(seq, "%-15s  %-6d  %-5s\n",
+		   rt->address.x25_addr, rt->sigdigits,
+		   rt->dev ? rt->dev->name : "???");
+out:
+	return 0;
+} 
+
+static __inline__ struct sock *x25_get_socket_idx(loff_t pos)
+{
+	struct sock *s;
+	struct hlist_node *node;
+
+	sk_for_each(s, node, &x25_list)
+		if (!pos--)
+			goto found;
+	s = NULL;
+found:
+	return s;
+}
+
+static void *x25_seq_socket_start(struct seq_file *seq, loff_t *pos)
+{
+	loff_t l = *pos;
+
+	read_lock_bh(&x25_list_lock);
+	return l ? x25_get_socket_idx(--l) : SEQ_START_TOKEN;
+}
+
+static void *x25_seq_socket_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct sock *s;
+
+	++*pos;
+	if (v == SEQ_START_TOKEN) {
+		s = sk_head(&x25_list);
+		goto out;
+	}
+	s = sk_next(v);
+out:
+	return s;
+}
+
+static void x25_seq_socket_stop(struct seq_file *seq, void *v)
+{
+	read_unlock_bh(&x25_list_lock);
+}
+
+static int x25_seq_socket_show(struct seq_file *seq, void *v)
+{
+	struct sock *s;
+	struct x25_sock *x25;
+	struct net_device *dev;
+	const char *devname;
+
+	if (v == SEQ_START_TOKEN) {
+		seq_printf(seq, "dest_addr  src_addr   dev   lci st vs vr "
+				"va   t  t2 t21 t22 t23 Snd-Q Rcv-Q inode\n");
+		goto out;
+	}
+
+	s = v;
+	x25 = x25_sk(s);
+
+	if (!x25->neighbour || (dev = x25->neighbour->dev) == NULL)
+		devname = "???";
+	else
+		devname = x25->neighbour->dev->name;
+
+	seq_printf(seq, "%-10s %-10s %-5s %3.3X  %d  %d  %d  %d %3lu %3lu "
+			"%3lu %3lu %3lu %5d %5d %ld\n",
+		   !x25->dest_addr.x25_addr[0] ? "*" : x25->dest_addr.x25_addr,
+		   !x25->source_addr.x25_addr[0] ? "*" : x25->source_addr.x25_addr,
+		   devname, x25->lci & 0x0FFF, x25->state, x25->vs, x25->vr,
+		   x25->va, x25_display_timer(s) / HZ, x25->t2  / HZ,
+		   x25->t21 / HZ, x25->t22 / HZ, x25->t23 / HZ,
+		   atomic_read(&s->sk_wmem_alloc),
+		   atomic_read(&s->sk_rmem_alloc),
+		   s->sk_socket ? SOCK_INODE(s->sk_socket)->i_ino : 0L);
+out:
+	return 0;
+} 
+
+static struct seq_operations x25_seq_route_ops = {
+	.start  = x25_seq_route_start,
+	.next   = x25_seq_route_next,
+	.stop   = x25_seq_route_stop,
+	.show   = x25_seq_route_show,
+};
+
+static struct seq_operations x25_seq_socket_ops = {
+	.start  = x25_seq_socket_start,
+	.next   = x25_seq_socket_next,
+	.stop   = x25_seq_socket_stop,
+	.show   = x25_seq_socket_show,
+};
+
+static int x25_seq_socket_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &x25_seq_socket_ops);
+}
+
+static int x25_seq_route_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &x25_seq_route_ops);
+}
+
+static struct file_operations x25_seq_socket_fops = {
+	.owner		= THIS_MODULE,
+	.open		= x25_seq_socket_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static struct file_operations x25_seq_route_fops = {
+	.owner		= THIS_MODULE,
+	.open		= x25_seq_route_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static struct proc_dir_entry *x25_proc_dir;
+
+int __init x25_proc_init(void)
+{
+	struct proc_dir_entry *p;
+	int rc = -ENOMEM;
+
+	x25_proc_dir = proc_mkdir("x25", proc_net);
+	if (!x25_proc_dir)
+		goto out;
+
+	p = create_proc_entry("route", S_IRUGO, x25_proc_dir);
+	if (!p)
+		goto out_route;
+	p->proc_fops = &x25_seq_route_fops;
+
+	p = create_proc_entry("socket", S_IRUGO, x25_proc_dir);
+	if (!p)
+		goto out_socket;
+	p->proc_fops = &x25_seq_socket_fops;
+	rc = 0;
+out:
+	return rc;
+out_socket:
+	remove_proc_entry("route", x25_proc_dir);
+out_route:
+	remove_proc_entry("x25", proc_net);
+	goto out;
+}
+
+void __exit x25_proc_exit(void)
+{
+	remove_proc_entry("route", x25_proc_dir);
+	remove_proc_entry("socket", x25_proc_dir);
+	remove_proc_entry("x25", proc_net);
+}
+
+#else /* CONFIG_PROC_FS */
+
+int __init x25_proc_init(void)
+{
+	return 0;
+}
+
+void __exit x25_proc_exit(void)
+{
+}
+#endif /* CONFIG_PROC_FS */
diff --git a/net/x25/x25_route.c b/net/x25/x25_route.c
new file mode 100644
index 00000000000..6c5d3751703
--- /dev/null
+++ b/net/x25/x25_route.c
@@ -0,0 +1,221 @@
+/*
+ *	X.25 Packet Layer release 002
+ *
+ *	This is ALPHA test software. This code may break your machine,
+ *	randomly fail to work with new releases, misbehave and/or generally
+ *	screw up. It might even work. 
+ *
+ *	This code REQUIRES 2.1.15 or higher
+ *
+ *	This module:
+ *		This module is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *	History
+ *	X.25 001	Jonathan Naylor	Started coding.
+ */
+
+#include <linux/config.h>
+#include <linux/if_arp.h>
+#include <linux/init.h>
+#include <net/x25.h>
+
+struct list_head x25_route_list = LIST_HEAD_INIT(x25_route_list);
+DEFINE_RWLOCK(x25_route_list_lock);
+
+/*
+ *	Add a new route.
+ */
+static int x25_add_route(struct x25_address *address, unsigned int sigdigits,
+			 struct net_device *dev)
+{
+	struct x25_route *rt;
+	struct list_head *entry;
+	int rc = -EINVAL;
+
+	write_lock_bh(&x25_route_list_lock);
+
+	list_for_each(entry, &x25_route_list) {
+		rt = list_entry(entry, struct x25_route, node);
+
+		if (!memcmp(&rt->address, address, sigdigits) &&
+		    rt->sigdigits == sigdigits)
+			goto out;
+	}
+
+	rt = kmalloc(sizeof(*rt), GFP_ATOMIC);
+	rc = -ENOMEM;
+	if (!rt)
+		goto out;
+
+	strcpy(rt->address.x25_addr, "000000000000000");
+	memcpy(rt->address.x25_addr, address->x25_addr, sigdigits);
+
+	rt->sigdigits = sigdigits;
+	rt->dev       = dev;
+	atomic_set(&rt->refcnt, 1);
+
+	list_add(&rt->node, &x25_route_list);
+	rc = 0;
+out:
+	write_unlock_bh(&x25_route_list_lock);
+	return rc;
+}
+
+/**
+ * __x25_remove_route - remove route from x25_route_list
+ * @rt - route to remove
+ *
+ * Remove route from x25_route_list. If it was there.
+ * Caller must hold x25_route_list_lock.
+ */
+static void __x25_remove_route(struct x25_route *rt)
+{
+	if (rt->node.next) {
+		list_del(&rt->node);
+		x25_route_put(rt);
+	}
+}
+
+static int x25_del_route(struct x25_address *address, unsigned int sigdigits,
+			 struct net_device *dev)
+{
+	struct x25_route *rt;
+	struct list_head *entry;
+	int rc = -EINVAL;
+
+	write_lock_bh(&x25_route_list_lock);
+
+	list_for_each(entry, &x25_route_list) {
+		rt = list_entry(entry, struct x25_route, node);
+
+		if (!memcmp(&rt->address, address, sigdigits) &&
+		    rt->sigdigits == sigdigits && rt->dev == dev) {
+			__x25_remove_route(rt);
+			rc = 0;
+			break;
+		}
+	}
+
+	write_unlock_bh(&x25_route_list_lock);
+	return rc;
+}
+
+/*
+ *	A device has been removed, remove its routes.
+ */
+void x25_route_device_down(struct net_device *dev)
+{
+	struct x25_route *rt;
+	struct list_head *entry, *tmp;
+
+	write_lock_bh(&x25_route_list_lock);
+
+	list_for_each_safe(entry, tmp, &x25_route_list) {
+		rt = list_entry(entry, struct x25_route, node);
+
+		if (rt->dev == dev)
+			__x25_remove_route(rt);
+	}
+	write_unlock_bh(&x25_route_list_lock);
+}
+
+/*
+ *	Check that the device given is a valid X.25 interface that is "up".
+ */
+struct net_device *x25_dev_get(char *devname)
+{
+	struct net_device *dev = dev_get_by_name(devname);
+
+	if (dev &&
+	    (!(dev->flags & IFF_UP) || (dev->type != ARPHRD_X25
+#if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE)
+					&& dev->type != ARPHRD_ETHER
+#endif
+					)))
+		dev_put(dev);
+
+	return dev;
+}
+
+/**
+ * 	x25_get_route -	Find a route given an X.25 address.
+ * 	@addr - address to find a route for
+ *
+ * 	Find a route given an X.25 address.
+ */
+struct x25_route *x25_get_route(struct x25_address *addr)
+{
+	struct x25_route *rt, *use = NULL;
+	struct list_head *entry;
+
+	read_lock_bh(&x25_route_list_lock);
+
+	list_for_each(entry, &x25_route_list) {
+		rt = list_entry(entry, struct x25_route, node);
+
+		if (!memcmp(&rt->address, addr, rt->sigdigits)) {
+			if (!use)
+				use = rt;
+			else if (rt->sigdigits > use->sigdigits)
+				use = rt;
+		}
+	}
+
+	if (use)
+		x25_route_hold(use);
+
+	read_unlock_bh(&x25_route_list_lock);
+	return use;
+}
+
+/*
+ *	Handle the ioctls that control the routing functions.
+ */
+int x25_route_ioctl(unsigned int cmd, void __user *arg)
+{
+	struct x25_route_struct rt;
+	struct net_device *dev;
+	int rc = -EINVAL;
+
+	if (cmd != SIOCADDRT && cmd != SIOCDELRT)
+		goto out;
+
+	rc = -EFAULT;
+	if (copy_from_user(&rt, arg, sizeof(rt)))
+		goto out;
+
+	rc = -EINVAL;
+	if (rt.sigdigits < 0 || rt.sigdigits > 15)
+		goto out;
+
+	dev = x25_dev_get(rt.device);
+	if (!dev)
+		goto out;
+
+	if (cmd == SIOCADDRT)
+		rc = x25_add_route(&rt.address, rt.sigdigits, dev);
+	else
+		rc = x25_del_route(&rt.address, rt.sigdigits, dev);
+	dev_put(dev);
+out:
+	return rc;
+}
+
+/*
+ *	Release all memory associated with X.25 routing structures.
+ */
+void __exit x25_route_free(void)
+{
+	struct x25_route *rt;
+	struct list_head *entry, *tmp;
+
+	write_lock_bh(&x25_route_list_lock);
+	list_for_each_safe(entry, tmp, &x25_route_list) {
+		rt = list_entry(entry, struct x25_route, node);
+		__x25_remove_route(rt);
+	}
+	write_unlock_bh(&x25_route_list_lock);
+}
diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c
new file mode 100644
index 00000000000..183fea3bba6
--- /dev/null
+++ b/net/x25/x25_subr.c
@@ -0,0 +1,374 @@
+/*
+ *	X.25 Packet Layer release 002
+ *
+ *	This is ALPHA test software. This code may break your machine,
+ *	randomly fail to work with new releases, misbehave and/or generally
+ *	screw up. It might even work.
+ *
+ *	This code REQUIRES 2.1.15 or higher
+ *
+ *	This module:
+ *		This module is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *	History
+ *	X.25 001	Jonathan Naylor	  Started coding.
+ *	X.25 002	Jonathan Naylor	  Centralised disconnection processing.
+ *	mar/20/00	Daniela Squassoni Disabling/enabling of facilities
+ *					  negotiation.
+ *	jun/24/01	Arnaldo C. Melo	  use skb_queue_purge, cleanups
+ */
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <net/x25.h>
+
+/*
+ *	This routine purges all of the queues of frames.
+ */
+void x25_clear_queues(struct sock *sk)
+{
+	struct x25_sock *x25 = x25_sk(sk);
+
+	skb_queue_purge(&sk->sk_write_queue);
+	skb_queue_purge(&x25->ack_queue);
+	skb_queue_purge(&x25->interrupt_in_queue);
+	skb_queue_purge(&x25->interrupt_out_queue);
+	skb_queue_purge(&x25->fragment_queue);
+}
+
+
+/*
+ * This routine purges the input queue of those frames that have been
+ * acknowledged. This replaces the boxes labelled "V(a) <- N(r)" on the
+ * SDL diagram.
+*/
+void x25_frames_acked(struct sock *sk, unsigned short nr)
+{
+	struct sk_buff *skb;
+	struct x25_sock *x25 = x25_sk(sk);
+	int modulus = x25->neighbour->extended ? X25_EMODULUS : X25_SMODULUS;
+
+	/*
+	 * Remove all the ack-ed frames from the ack queue.
+	 */
+	if (x25->va != nr)
+		while (skb_peek(&x25->ack_queue) && x25->va != nr) {
+			skb = skb_dequeue(&x25->ack_queue);
+			kfree_skb(skb);
+			x25->va = (x25->va + 1) % modulus;
+		}
+}
+
+void x25_requeue_frames(struct sock *sk)
+{
+	struct sk_buff *skb, *skb_prev = NULL;
+
+	/*
+	 * Requeue all the un-ack-ed frames on the output queue to be picked
+	 * up by x25_kick. This arrangement handles the possibility of an empty
+	 * output queue.
+	 */
+	while ((skb = skb_dequeue(&x25_sk(sk)->ack_queue)) != NULL) {
+		if (!skb_prev)
+			skb_queue_head(&sk->sk_write_queue, skb);
+		else
+			skb_append(skb_prev, skb);
+		skb_prev = skb;
+	}
+}
+
+/*
+ *	Validate that the value of nr is between va and vs. Return true or
+ *	false for testing.
+ */
+int x25_validate_nr(struct sock *sk, unsigned short nr)
+{
+	struct x25_sock *x25 = x25_sk(sk);
+	unsigned short vc = x25->va;
+	int modulus = x25->neighbour->extended ? X25_EMODULUS : X25_SMODULUS;
+
+	while (vc != x25->vs) {
+		if (nr == vc)
+			return 1;
+		vc = (vc + 1) % modulus;
+	}
+
+	return nr == x25->vs ? 1 : 0;
+}
+
+/*
+ *  This routine is called when the packet layer internally generates a
+ *  control frame.
+ */
+void x25_write_internal(struct sock *sk, int frametype)
+{
+	struct x25_sock *x25 = x25_sk(sk);
+	struct sk_buff *skb;
+	unsigned char  *dptr;
+	unsigned char  facilities[X25_MAX_FAC_LEN];
+	unsigned char  addresses[1 + X25_ADDR_LEN];
+	unsigned char  lci1, lci2;
+	/*
+	 *	Default safe frame size.
+	 */
+	int len = X25_MAX_L2_LEN + X25_EXT_MIN_LEN;
+
+	/*
+	 *	Adjust frame size.
+	 */
+	switch (frametype) {
+		case X25_CALL_REQUEST:
+			len += 1 + X25_ADDR_LEN + X25_MAX_FAC_LEN +
+			       X25_MAX_CUD_LEN;
+			break;
+		case X25_CALL_ACCEPTED:
+			len += 1 + X25_MAX_FAC_LEN + X25_MAX_CUD_LEN;
+			break;
+		case X25_CLEAR_REQUEST:
+		case X25_RESET_REQUEST:
+			len += 2;
+			break;
+		case X25_RR:
+		case X25_RNR:
+		case X25_REJ:
+		case X25_CLEAR_CONFIRMATION:
+		case X25_INTERRUPT_CONFIRMATION:
+		case X25_RESET_CONFIRMATION:
+			break;
+		default:
+			printk(KERN_ERR "X.25: invalid frame type %02X\n",
+			       frametype);
+			return;
+	}
+
+	if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL)
+		return;
+
+	/*
+	 *	Space for Ethernet and 802.2 LLC headers.
+	 */
+	skb_reserve(skb, X25_MAX_L2_LEN);
+
+	/*
+	 *	Make space for the GFI and LCI, and fill them in.
+	 */
+	dptr = skb_put(skb, 2);
+
+	lci1 = (x25->lci >> 8) & 0x0F;
+	lci2 = (x25->lci >> 0) & 0xFF;
+
+	if (x25->neighbour->extended) {
+		*dptr++ = lci1 | X25_GFI_EXTSEQ;
+		*dptr++ = lci2;
+	} else {
+		*dptr++ = lci1 | X25_GFI_STDSEQ;
+		*dptr++ = lci2;
+	}
+
+	/*
+	 *	Now fill in the frame type specific information.
+	 */
+	switch (frametype) {
+
+		case X25_CALL_REQUEST:
+			dptr    = skb_put(skb, 1);
+			*dptr++ = X25_CALL_REQUEST;
+			len     = x25_addr_aton(addresses, &x25->dest_addr,
+						&x25->source_addr);
+			dptr    = skb_put(skb, len);
+			memcpy(dptr, addresses, len);
+			len     = x25_create_facilities(facilities,
+							&x25->facilities,
+					     x25->neighbour->global_facil_mask);
+			dptr    = skb_put(skb, len);
+			memcpy(dptr, facilities, len);
+			dptr = skb_put(skb, x25->calluserdata.cudlength);
+			memcpy(dptr, x25->calluserdata.cuddata,
+			       x25->calluserdata.cudlength);
+			x25->calluserdata.cudlength = 0;
+			break;
+
+		case X25_CALL_ACCEPTED:
+			dptr    = skb_put(skb, 2);
+			*dptr++ = X25_CALL_ACCEPTED;
+			*dptr++ = 0x00;		/* Address lengths */
+			len     = x25_create_facilities(facilities,
+							&x25->facilities,
+							x25->vc_facil_mask);
+			dptr    = skb_put(skb, len);
+			memcpy(dptr, facilities, len);
+			dptr = skb_put(skb, x25->calluserdata.cudlength);
+			memcpy(dptr, x25->calluserdata.cuddata,
+			       x25->calluserdata.cudlength);
+			x25->calluserdata.cudlength = 0;
+			break;
+
+		case X25_CLEAR_REQUEST:
+		case X25_RESET_REQUEST:
+			dptr    = skb_put(skb, 3);
+			*dptr++ = frametype;
+			*dptr++ = 0x00;		/* XXX */
+			*dptr++ = 0x00;		/* XXX */
+			break;
+
+		case X25_RR:
+		case X25_RNR:
+		case X25_REJ:
+			if (x25->neighbour->extended) {
+				dptr     = skb_put(skb, 2);
+				*dptr++  = frametype;
+				*dptr++  = (x25->vr << 1) & 0xFE;
+			} else {
+				dptr     = skb_put(skb, 1);
+				*dptr    = frametype;
+				*dptr++ |= (x25->vr << 5) & 0xE0;
+			}
+			break;
+
+		case X25_CLEAR_CONFIRMATION:
+		case X25_INTERRUPT_CONFIRMATION:
+		case X25_RESET_CONFIRMATION:
+			dptr  = skb_put(skb, 1);
+			*dptr = frametype;
+			break;
+	}
+
+	x25_transmit_link(skb, x25->neighbour);
+}
+
+/*
+ *	Unpick the contents of the passed X.25 Packet Layer frame.
+ */
+int x25_decode(struct sock *sk, struct sk_buff *skb, int *ns, int *nr, int *q,
+	       int *d, int *m)
+{
+	struct x25_sock *x25 = x25_sk(sk);
+	unsigned char *frame = skb->data;
+
+	*ns = *nr = *q = *d = *m = 0;
+
+	switch (frame[2]) {
+		case X25_CALL_REQUEST:
+		case X25_CALL_ACCEPTED:
+		case X25_CLEAR_REQUEST:
+		case X25_CLEAR_CONFIRMATION:
+		case X25_INTERRUPT:
+		case X25_INTERRUPT_CONFIRMATION:
+		case X25_RESET_REQUEST:
+		case X25_RESET_CONFIRMATION:
+		case X25_RESTART_REQUEST:
+		case X25_RESTART_CONFIRMATION:
+		case X25_REGISTRATION_REQUEST:
+		case X25_REGISTRATION_CONFIRMATION:
+		case X25_DIAGNOSTIC:
+			return frame[2];
+	}
+
+	if (x25->neighbour->extended) {
+		if (frame[2] == X25_RR  ||
+		    frame[2] == X25_RNR ||
+		    frame[2] == X25_REJ) {
+			*nr = (frame[3] >> 1) & 0x7F;
+			return frame[2];
+		}
+	} else {
+		if ((frame[2] & 0x1F) == X25_RR  ||
+		    (frame[2] & 0x1F) == X25_RNR ||
+		    (frame[2] & 0x1F) == X25_REJ) {
+			*nr = (frame[2] >> 5) & 0x07;
+			return frame[2] & 0x1F;
+		}
+	}
+
+	if (x25->neighbour->extended) {
+		if ((frame[2] & 0x01) == X25_DATA) {
+			*q  = (frame[0] & X25_Q_BIT) == X25_Q_BIT;
+			*d  = (frame[0] & X25_D_BIT) == X25_D_BIT;
+			*m  = (frame[3] & X25_EXT_M_BIT) == X25_EXT_M_BIT;
+			*nr = (frame[3] >> 1) & 0x7F;
+			*ns = (frame[2] >> 1) & 0x7F;
+			return X25_DATA;
+		}
+	} else {
+		if ((frame[2] & 0x01) == X25_DATA) {
+			*q  = (frame[0] & X25_Q_BIT) == X25_Q_BIT;
+			*d  = (frame[0] & X25_D_BIT) == X25_D_BIT;
+			*m  = (frame[2] & X25_STD_M_BIT) == X25_STD_M_BIT;
+			*nr = (frame[2] >> 5) & 0x07;
+			*ns = (frame[2] >> 1) & 0x07;
+			return X25_DATA;
+		}
+	}
+
+	printk(KERN_DEBUG "X.25: invalid PLP frame %02X %02X %02X\n",
+	       frame[0], frame[1], frame[2]);
+
+	return X25_ILLEGAL;
+}
+
+void x25_disconnect(struct sock *sk, int reason, unsigned char cause,
+		    unsigned char diagnostic)
+{
+	struct x25_sock *x25 = x25_sk(sk);
+
+	x25_clear_queues(sk);
+	x25_stop_timer(sk);
+
+	x25->lci   = 0;
+	x25->state = X25_STATE_0;
+
+	x25->causediag.cause      = cause;
+	x25->causediag.diagnostic = diagnostic;
+
+	sk->sk_state     = TCP_CLOSE;
+	sk->sk_err       = reason;
+	sk->sk_shutdown |= SEND_SHUTDOWN;
+
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		sk->sk_state_change(sk);
+		sock_set_flag(sk, SOCK_DEAD);
+	}
+}
+
+/*
+ * Clear an own-rx-busy condition and tell the peer about this, provided
+ * that there is a significant amount of free receive buffer space available.
+ */
+void x25_check_rbuf(struct sock *sk)
+{
+	struct x25_sock *x25 = x25_sk(sk);
+
+	if (atomic_read(&sk->sk_rmem_alloc) < (sk->sk_rcvbuf / 2) &&
+	    (x25->condition & X25_COND_OWN_RX_BUSY)) {
+		x25->condition &= ~X25_COND_OWN_RX_BUSY;
+		x25->condition &= ~X25_COND_ACK_PENDING;
+		x25->vl         = x25->vr;
+		x25_write_internal(sk, X25_RR);
+		x25_stop_timer(sk);
+	}
+}
+
+/*
+ * Compare 2 calluserdata structures, used to find correct listening sockets
+ * when call user data is used.
+ */
+int x25_check_calluserdata(struct x25_calluserdata *ours, struct x25_calluserdata *theirs)
+{
+	int i;
+	if (ours->cudlength != theirs->cudlength)
+		return 0;
+
+	for (i=0;i<ours->cudlength;i++) {
+		if (ours->cuddata[i] != theirs->cuddata[i]) {
+			return 0;
+		}
+	}
+	return 1;
+}
+
diff --git a/net/x25/x25_timer.c b/net/x25/x25_timer.c
new file mode 100644
index 00000000000..d6a21a3ad80
--- /dev/null
+++ b/net/x25/x25_timer.c
@@ -0,0 +1,176 @@
+/*
+ *	X.25 Packet Layer release 002
+ *
+ *	This is ALPHA test software. This code may break your machine,
+ *	randomly fail to work with new releases, misbehave and/or generally
+ *	screw up. It might even work. 
+ *
+ *	This code REQUIRES 2.1.15 or higher
+ *
+ *	This module:
+ *		This module is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *	History
+ *	X.25 001	Jonathan Naylor	Started coding.
+ *	X.25 002	Jonathan Naylor	New timer architecture.
+ *					Centralised disconnection processing.
+ */
+
+#include <linux/errno.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <net/x25.h>
+
+static void x25_heartbeat_expiry(unsigned long);
+static void x25_timer_expiry(unsigned long);
+
+void x25_init_timers(struct sock *sk)
+{
+	struct x25_sock *x25 = x25_sk(sk);
+
+	init_timer(&x25->timer);
+	x25->timer.data     = (unsigned long)sk;
+	x25->timer.function = &x25_timer_expiry;
+
+	/* initialized by sock_init_data */
+	sk->sk_timer.data     = (unsigned long)sk;
+	sk->sk_timer.function = &x25_heartbeat_expiry;
+}
+
+void x25_start_heartbeat(struct sock *sk)
+{
+	mod_timer(&sk->sk_timer, jiffies + 5 * HZ);
+}
+
+void x25_stop_heartbeat(struct sock *sk)
+{
+	del_timer(&sk->sk_timer);
+}
+
+void x25_start_t2timer(struct sock *sk)
+{
+	struct x25_sock *x25 = x25_sk(sk);
+
+	mod_timer(&x25->timer, jiffies + x25->t2);
+}
+
+void x25_start_t21timer(struct sock *sk)
+{
+	struct x25_sock *x25 = x25_sk(sk);
+
+	mod_timer(&x25->timer, jiffies + x25->t21);
+}
+
+void x25_start_t22timer(struct sock *sk)
+{
+	struct x25_sock *x25 = x25_sk(sk);
+
+	mod_timer(&x25->timer, jiffies + x25->t22);
+}
+
+void x25_start_t23timer(struct sock *sk)
+{
+	struct x25_sock *x25 = x25_sk(sk);
+
+	mod_timer(&x25->timer, jiffies + x25->t23);
+}
+
+void x25_stop_timer(struct sock *sk)
+{
+	del_timer(&x25_sk(sk)->timer);
+}
+
+unsigned long x25_display_timer(struct sock *sk)
+{
+	struct x25_sock *x25 = x25_sk(sk);
+
+	if (!timer_pending(&x25->timer))
+		return 0;
+
+	return x25->timer.expires - jiffies;
+}
+
+static void x25_heartbeat_expiry(unsigned long param)
+{
+	struct sock *sk = (struct sock *)param;
+
+        bh_lock_sock(sk);
+        if (sock_owned_by_user(sk)) /* can currently only occur in state 3 */ 
+		goto restart_heartbeat;
+
+	switch (x25_sk(sk)->state) {
+
+		case X25_STATE_0:
+			/*
+			 * Magic here: If we listen() and a new link dies
+			 * before it is accepted() it isn't 'dead' so doesn't
+			 * get removed.
+			 */
+			if (sock_flag(sk, SOCK_DESTROY) ||
+			    (sk->sk_state == TCP_LISTEN &&
+			     sock_flag(sk, SOCK_DEAD))) {
+				x25_destroy_socket(sk);
+				goto unlock;
+			}
+			break;
+
+		case X25_STATE_3:
+			/*
+			 * Check for the state of the receive buffer.
+			 */
+			x25_check_rbuf(sk);
+			break;
+	}
+restart_heartbeat:
+	x25_start_heartbeat(sk);
+unlock:
+	bh_unlock_sock(sk);
+}
+
+/*
+ *	Timer has expired, it may have been T2, T21, T22, or T23. We can tell
+ *	by the state machine state.
+ */
+static inline void x25_do_timer_expiry(struct sock * sk)
+{
+	struct x25_sock *x25 = x25_sk(sk);
+
+	switch (x25->state) {
+
+		case X25_STATE_3:	/* T2 */
+			if (x25->condition & X25_COND_ACK_PENDING) {
+				x25->condition &= ~X25_COND_ACK_PENDING;
+				x25_enquiry_response(sk);
+			}
+			break;
+
+		case X25_STATE_1:	/* T21 */
+		case X25_STATE_4:	/* T22 */
+			x25_write_internal(sk, X25_CLEAR_REQUEST);
+			x25->state = X25_STATE_2;
+			x25_start_t23timer(sk);
+			break;
+
+		case X25_STATE_2:	/* T23 */
+			x25_disconnect(sk, ETIMEDOUT, 0, 0);
+			break;
+	}
+}
+
+static void x25_timer_expiry(unsigned long param)
+{
+	struct sock *sk = (struct sock *)param;
+
+	bh_lock_sock(sk);
+	if (sock_owned_by_user(sk)) { /* can currently only occur in state 3 */
+		if (x25_sk(sk)->state == X25_STATE_3)
+			x25_start_t2timer(sk);
+	} else
+		x25_do_timer_expiry(sk);
+	bh_unlock_sock(sk);
+}
diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
new file mode 100644
index 00000000000..58ca6a972c4
--- /dev/null
+++ b/net/xfrm/Kconfig
@@ -0,0 +1,12 @@
+#
+# XFRM configuration
+#
+config XFRM_USER
+	tristate "IPsec user configuration interface"
+	depends on INET && XFRM
+	---help---
+	  Support for IPsec user configuration interface used
+	  by native Linux tools.
+
+	  If unsure, say Y.
+
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
new file mode 100644
index 00000000000..693aac1aa83
--- /dev/null
+++ b/net/xfrm/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the XFRM subsystem.
+#
+
+obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_input.o xfrm_algo.o
+obj-$(CONFIG_XFRM_USER) += xfrm_user.o
+
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
new file mode 100644
index 00000000000..080aae243ce
--- /dev/null
+++ b/net/xfrm/xfrm_algo.c
@@ -0,0 +1,729 @@
+/* 
+ * xfrm algorithm interface
+ *
+ * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) 
+ * any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/pfkeyv2.h>
+#include <linux/crypto.h>
+#include <net/xfrm.h>
+#if defined(CONFIG_INET_AH) || defined(CONFIG_INET_AH_MODULE) || defined(CONFIG_INET6_AH) || defined(CONFIG_INET6_AH_MODULE)
+#include <net/ah.h>
+#endif
+#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE)
+#include <net/esp.h>
+#endif
+#include <asm/scatterlist.h>
+
+/*
+ * Algorithms supported by IPsec.  These entries contain properties which
+ * are used in key negotiation and xfrm processing, and are used to verify
+ * that instantiated crypto transforms have correct parameters for IPsec
+ * purposes.
+ */
+static struct xfrm_algo_desc aalg_list[] = {
+{
+	.name = "digest_null",
+	
+	.uinfo = {
+		.auth = {
+			.icv_truncbits = 0,
+			.icv_fullbits = 0,
+		}
+	},
+	
+	.desc = {
+		.sadb_alg_id = SADB_X_AALG_NULL,
+		.sadb_alg_ivlen = 0,
+		.sadb_alg_minbits = 0,
+		.sadb_alg_maxbits = 0
+	}
+},
+{
+	.name = "md5",
+
+	.uinfo = {
+		.auth = {
+			.icv_truncbits = 96,
+			.icv_fullbits = 128,
+		}
+	},
+	
+	.desc = {
+		.sadb_alg_id = SADB_AALG_MD5HMAC,
+		.sadb_alg_ivlen = 0,
+		.sadb_alg_minbits = 128,
+		.sadb_alg_maxbits = 128
+	}
+},
+{
+	.name = "sha1",
+
+	.uinfo = {
+		.auth = {
+			.icv_truncbits = 96,
+			.icv_fullbits = 160,
+		}
+	},
+
+	.desc = {
+		.sadb_alg_id = SADB_AALG_SHA1HMAC,
+		.sadb_alg_ivlen = 0,
+		.sadb_alg_minbits = 160,
+		.sadb_alg_maxbits = 160
+	}
+},
+{
+	.name = "sha256",
+
+	.uinfo = {
+		.auth = {
+			.icv_truncbits = 96,
+			.icv_fullbits = 256,
+		}
+	},
+
+	.desc = {
+		.sadb_alg_id = SADB_X_AALG_SHA2_256HMAC,
+		.sadb_alg_ivlen = 0,
+		.sadb_alg_minbits = 256,
+		.sadb_alg_maxbits = 256
+	}
+},
+{
+	.name = "ripemd160",
+
+	.uinfo = {
+		.auth = {
+			.icv_truncbits = 96,
+			.icv_fullbits = 160,
+		}
+	},
+
+	.desc = {
+		.sadb_alg_id = SADB_X_AALG_RIPEMD160HMAC,
+		.sadb_alg_ivlen = 0,
+		.sadb_alg_minbits = 160,
+		.sadb_alg_maxbits = 160
+	}
+},
+};
+
+static struct xfrm_algo_desc ealg_list[] = {
+{
+	.name = "cipher_null",
+	
+	.uinfo = {
+		.encr = {
+			.blockbits = 8,
+			.defkeybits = 0,
+		}
+	},
+	
+	.desc = {
+		.sadb_alg_id =	SADB_EALG_NULL,
+		.sadb_alg_ivlen = 0,
+		.sadb_alg_minbits = 0,
+		.sadb_alg_maxbits = 0
+	}
+},
+{
+	.name = "des",
+
+	.uinfo = {
+		.encr = {
+			.blockbits = 64,
+			.defkeybits = 64,
+		}
+	},
+
+	.desc = {
+		.sadb_alg_id = SADB_EALG_DESCBC,
+		.sadb_alg_ivlen = 8,
+		.sadb_alg_minbits = 64,
+		.sadb_alg_maxbits = 64
+	}
+},
+{
+	.name = "des3_ede",
+
+	.uinfo = {
+		.encr = {
+			.blockbits = 64,
+			.defkeybits = 192,
+		}
+	},
+
+	.desc = {
+		.sadb_alg_id = SADB_EALG_3DESCBC,
+		.sadb_alg_ivlen = 8,
+		.sadb_alg_minbits = 192,
+		.sadb_alg_maxbits = 192
+	}
+},
+{
+	.name = "cast128",
+
+	.uinfo = {
+		.encr = {
+			.blockbits = 64,
+			.defkeybits = 128,
+		}
+	},
+
+	.desc = {
+		.sadb_alg_id = SADB_X_EALG_CASTCBC,
+		.sadb_alg_ivlen = 8,
+		.sadb_alg_minbits = 40,
+		.sadb_alg_maxbits = 128
+	}
+},
+{
+	.name = "blowfish",
+
+	.uinfo = {
+		.encr = {
+			.blockbits = 64,
+			.defkeybits = 128,
+		}
+	},
+
+	.desc = {
+		.sadb_alg_id = SADB_X_EALG_BLOWFISHCBC,
+		.sadb_alg_ivlen = 8,
+		.sadb_alg_minbits = 40,
+		.sadb_alg_maxbits = 448
+	}
+},
+{
+	.name = "aes",
+
+	.uinfo = {
+		.encr = {
+			.blockbits = 128,
+			.defkeybits = 128,
+		}
+	},
+
+	.desc = {
+		.sadb_alg_id = SADB_X_EALG_AESCBC,
+		.sadb_alg_ivlen = 8,
+		.sadb_alg_minbits = 128,
+		.sadb_alg_maxbits = 256
+	}
+},
+{
+        .name = "serpent",
+
+        .uinfo = {
+                .encr = {
+                        .blockbits = 128,
+                        .defkeybits = 128,
+                }
+        },
+
+        .desc = {
+                .sadb_alg_id = SADB_X_EALG_SERPENTCBC,
+                .sadb_alg_ivlen = 8,
+                .sadb_alg_minbits = 128,
+                .sadb_alg_maxbits = 256,
+        }
+},
+{
+        .name = "twofish",
+                 
+        .uinfo = {
+                .encr = {
+                        .blockbits = 128,
+                        .defkeybits = 128,
+                }
+        },
+
+        .desc = {
+                .sadb_alg_id = SADB_X_EALG_TWOFISHCBC,
+                .sadb_alg_ivlen = 8,
+                .sadb_alg_minbits = 128,
+                .sadb_alg_maxbits = 256
+        }
+},
+};
+
+static struct xfrm_algo_desc calg_list[] = {
+{
+	.name = "deflate",
+	.uinfo = {
+		.comp = {
+			.threshold = 90,
+		}
+	},
+	.desc = { .sadb_alg_id = SADB_X_CALG_DEFLATE }
+},
+{
+	.name = "lzs",
+	.uinfo = {
+		.comp = {
+			.threshold = 90,
+		}
+	},
+	.desc = { .sadb_alg_id = SADB_X_CALG_LZS }
+},
+{
+	.name = "lzjh",
+	.uinfo = {
+		.comp = {
+			.threshold = 50,
+		}
+	},
+	.desc = { .sadb_alg_id = SADB_X_CALG_LZJH }
+},
+};
+
+static inline int aalg_entries(void)
+{
+	return ARRAY_SIZE(aalg_list);
+}
+
+static inline int ealg_entries(void)
+{
+	return ARRAY_SIZE(ealg_list);
+}
+
+static inline int calg_entries(void)
+{
+	return ARRAY_SIZE(calg_list);
+}
+
+/* Todo: generic iterators */
+struct xfrm_algo_desc *xfrm_aalg_get_byid(int alg_id)
+{
+	int i;
+
+	for (i = 0; i < aalg_entries(); i++) {
+		if (aalg_list[i].desc.sadb_alg_id == alg_id) {
+			if (aalg_list[i].available)
+				return &aalg_list[i];
+			else
+				break;
+		}
+	}
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(xfrm_aalg_get_byid);
+
+struct xfrm_algo_desc *xfrm_ealg_get_byid(int alg_id)
+{
+	int i;
+
+	for (i = 0; i < ealg_entries(); i++) {
+		if (ealg_list[i].desc.sadb_alg_id == alg_id) {
+			if (ealg_list[i].available)
+				return &ealg_list[i];
+			else
+				break;
+		}
+	}
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(xfrm_ealg_get_byid);
+
+struct xfrm_algo_desc *xfrm_calg_get_byid(int alg_id)
+{
+	int i;
+
+	for (i = 0; i < calg_entries(); i++) {
+		if (calg_list[i].desc.sadb_alg_id == alg_id) {
+			if (calg_list[i].available)
+				return &calg_list[i];
+			else
+				break;
+		}
+	}
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(xfrm_calg_get_byid);
+
+static struct xfrm_algo_desc *xfrm_get_byname(struct xfrm_algo_desc *list,
+					      int entries, char *name,
+					      int probe)
+{
+	int i, status;
+
+	if (!name)
+		return NULL;
+
+	for (i = 0; i < entries; i++) {
+		if (strcmp(name, list[i].name))
+			continue;
+
+		if (list[i].available)
+			return &list[i];
+
+		if (!probe)
+			break;
+
+		status = crypto_alg_available(name, 0);
+		if (!status)
+			break;
+
+		list[i].available = status;
+		return &list[i];
+	}
+	return NULL;
+}
+
+struct xfrm_algo_desc *xfrm_aalg_get_byname(char *name, int probe)
+{
+	return xfrm_get_byname(aalg_list, aalg_entries(), name, probe);
+}
+EXPORT_SYMBOL_GPL(xfrm_aalg_get_byname);
+
+struct xfrm_algo_desc *xfrm_ealg_get_byname(char *name, int probe)
+{
+	return xfrm_get_byname(ealg_list, ealg_entries(), name, probe);
+}
+EXPORT_SYMBOL_GPL(xfrm_ealg_get_byname);
+
+struct xfrm_algo_desc *xfrm_calg_get_byname(char *name, int probe)
+{
+	return xfrm_get_byname(calg_list, calg_entries(), name, probe);
+}
+EXPORT_SYMBOL_GPL(xfrm_calg_get_byname);
+
+struct xfrm_algo_desc *xfrm_aalg_get_byidx(unsigned int idx)
+{
+	if (idx >= aalg_entries())
+		return NULL;
+
+	return &aalg_list[idx];
+}
+EXPORT_SYMBOL_GPL(xfrm_aalg_get_byidx);
+
+struct xfrm_algo_desc *xfrm_ealg_get_byidx(unsigned int idx)
+{
+	if (idx >= ealg_entries())
+		return NULL;
+
+	return &ealg_list[idx];
+}
+EXPORT_SYMBOL_GPL(xfrm_ealg_get_byidx);
+
+/*
+ * Probe for the availability of crypto algorithms, and set the available
+ * flag for any algorithms found on the system.  This is typically called by
+ * pfkey during userspace SA add, update or register.
+ */
+void xfrm_probe_algs(void)
+{
+#ifdef CONFIG_CRYPTO
+	int i, status;
+	
+	BUG_ON(in_softirq());
+
+	for (i = 0; i < aalg_entries(); i++) {
+		status = crypto_alg_available(aalg_list[i].name, 0);
+		if (aalg_list[i].available != status)
+			aalg_list[i].available = status;
+	}
+	
+	for (i = 0; i < ealg_entries(); i++) {
+		status = crypto_alg_available(ealg_list[i].name, 0);
+		if (ealg_list[i].available != status)
+			ealg_list[i].available = status;
+	}
+	
+	for (i = 0; i < calg_entries(); i++) {
+		status = crypto_alg_available(calg_list[i].name, 0);
+		if (calg_list[i].available != status)
+			calg_list[i].available = status;
+	}
+#endif
+}
+EXPORT_SYMBOL_GPL(xfrm_probe_algs);
+
+int xfrm_count_auth_supported(void)
+{
+	int i, n;
+
+	for (i = 0, n = 0; i < aalg_entries(); i++)
+		if (aalg_list[i].available)
+			n++;
+	return n;
+}
+EXPORT_SYMBOL_GPL(xfrm_count_auth_supported);
+
+int xfrm_count_enc_supported(void)
+{
+	int i, n;
+
+	for (i = 0, n = 0; i < ealg_entries(); i++)
+		if (ealg_list[i].available)
+			n++;
+	return n;
+}
+EXPORT_SYMBOL_GPL(xfrm_count_enc_supported);
+
+/* Move to common area: it is shared with AH. */
+
+void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm,
+		  int offset, int len, icv_update_fn_t icv_update)
+{
+	int start = skb_headlen(skb);
+	int i, copy = start - offset;
+	struct scatterlist sg;
+
+	/* Checksum header. */
+	if (copy > 0) {
+		if (copy > len)
+			copy = len;
+		
+		sg.page = virt_to_page(skb->data + offset);
+		sg.offset = (unsigned long)(skb->data + offset) % PAGE_SIZE;
+		sg.length = copy;
+		
+		icv_update(tfm, &sg, 1);
+		
+		if ((len -= copy) == 0)
+			return;
+		offset += copy;
+	}
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+
+		BUG_TRAP(start <= offset + len);
+
+		end = start + skb_shinfo(skb)->frags[i].size;
+		if ((copy = end - offset) > 0) {
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+			if (copy > len)
+				copy = len;
+			
+			sg.page = frag->page;
+			sg.offset = frag->page_offset + offset-start;
+			sg.length = copy;
+			
+			icv_update(tfm, &sg, 1);
+
+			if (!(len -= copy))
+				return;
+			offset += copy;
+		}
+		start = end;
+	}
+
+	if (skb_shinfo(skb)->frag_list) {
+		struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+		for (; list; list = list->next) {
+			int end;
+
+			BUG_TRAP(start <= offset + len);
+
+			end = start + list->len;
+			if ((copy = end - offset) > 0) {
+				if (copy > len)
+					copy = len;
+				skb_icv_walk(list, tfm, offset-start, copy, icv_update);
+				if ((len -= copy) == 0)
+					return;
+				offset += copy;
+			}
+			start = end;
+		}
+	}
+	if (len)
+		BUG();
+}
+EXPORT_SYMBOL_GPL(skb_icv_walk);
+
+#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE)
+
+/* Looking generic it is not used in another places. */
+
+int
+skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
+{
+	int start = skb_headlen(skb);
+	int i, copy = start - offset;
+	int elt = 0;
+
+	if (copy > 0) {
+		if (copy > len)
+			copy = len;
+		sg[elt].page = virt_to_page(skb->data + offset);
+		sg[elt].offset = (unsigned long)(skb->data + offset) % PAGE_SIZE;
+		sg[elt].length = copy;
+		elt++;
+		if ((len -= copy) == 0)
+			return elt;
+		offset += copy;
+	}
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+
+		BUG_TRAP(start <= offset + len);
+
+		end = start + skb_shinfo(skb)->frags[i].size;
+		if ((copy = end - offset) > 0) {
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+			if (copy > len)
+				copy = len;
+			sg[elt].page = frag->page;
+			sg[elt].offset = frag->page_offset+offset-start;
+			sg[elt].length = copy;
+			elt++;
+			if (!(len -= copy))
+				return elt;
+			offset += copy;
+		}
+		start = end;
+	}
+
+	if (skb_shinfo(skb)->frag_list) {
+		struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+		for (; list; list = list->next) {
+			int end;
+
+			BUG_TRAP(start <= offset + len);
+
+			end = start + list->len;
+			if ((copy = end - offset) > 0) {
+				if (copy > len)
+					copy = len;
+				elt += skb_to_sgvec(list, sg+elt, offset - start, copy);
+				if ((len -= copy) == 0)
+					return elt;
+				offset += copy;
+			}
+			start = end;
+		}
+	}
+	if (len)
+		BUG();
+	return elt;
+}
+EXPORT_SYMBOL_GPL(skb_to_sgvec);
+
+/* Check that skb data bits are writable. If they are not, copy data
+ * to newly created private area. If "tailbits" is given, make sure that
+ * tailbits bytes beyond current end of skb are writable.
+ *
+ * Returns amount of elements of scatterlist to load for subsequent
+ * transformations and pointer to writable trailer skb.
+ */
+
+int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
+{
+	int copyflag;
+	int elt;
+	struct sk_buff *skb1, **skb_p;
+
+	/* If skb is cloned or its head is paged, reallocate
+	 * head pulling out all the pages (pages are considered not writable
+	 * at the moment even if they are anonymous).
+	 */
+	if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) &&
+	    __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL)
+		return -ENOMEM;
+
+	/* Easy case. Most of packets will go this way. */
+	if (!skb_shinfo(skb)->frag_list) {
+		/* A little of trouble, not enough of space for trailer.
+		 * This should not happen, when stack is tuned to generate
+		 * good frames. OK, on miss we reallocate and reserve even more
+		 * space, 128 bytes is fair. */
+
+		if (skb_tailroom(skb) < tailbits &&
+		    pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC))
+			return -ENOMEM;
+
+		/* Voila! */
+		*trailer = skb;
+		return 1;
+	}
+
+	/* Misery. We are in troubles, going to mincer fragments... */
+
+	elt = 1;
+	skb_p = &skb_shinfo(skb)->frag_list;
+	copyflag = 0;
+
+	while ((skb1 = *skb_p) != NULL) {
+		int ntail = 0;
+
+		/* The fragment is partially pulled by someone,
+		 * this can happen on input. Copy it and everything
+		 * after it. */
+
+		if (skb_shared(skb1))
+			copyflag = 1;
+
+		/* If the skb is the last, worry about trailer. */
+
+		if (skb1->next == NULL && tailbits) {
+			if (skb_shinfo(skb1)->nr_frags ||
+			    skb_shinfo(skb1)->frag_list ||
+			    skb_tailroom(skb1) < tailbits)
+				ntail = tailbits + 128;
+		}
+
+		if (copyflag ||
+		    skb_cloned(skb1) ||
+		    ntail ||
+		    skb_shinfo(skb1)->nr_frags ||
+		    skb_shinfo(skb1)->frag_list) {
+			struct sk_buff *skb2;
+
+			/* Fuck, we are miserable poor guys... */
+			if (ntail == 0)
+				skb2 = skb_copy(skb1, GFP_ATOMIC);
+			else
+				skb2 = skb_copy_expand(skb1,
+						       skb_headroom(skb1),
+						       ntail,
+						       GFP_ATOMIC);
+			if (unlikely(skb2 == NULL))
+				return -ENOMEM;
+
+			if (skb1->sk)
+				skb_set_owner_w(skb, skb1->sk);
+
+			/* Looking around. Are we still alive?
+			 * OK, link new skb, drop old one */
+
+			skb2->next = skb1->next;
+			*skb_p = skb2;
+			kfree_skb(skb1);
+			skb1 = skb2;
+		}
+		elt++;
+		*trailer = skb1;
+		skb_p = &skb1->next;
+	}
+
+	return elt;
+}
+EXPORT_SYMBOL_GPL(skb_cow_data);
+
+void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len)
+{
+	if (tail != skb) {
+		skb->data_len += len;
+		skb->len += len;
+	}
+	return skb_put(tail, len);
+}
+EXPORT_SYMBOL_GPL(pskb_put);
+#endif
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
new file mode 100644
index 00000000000..c58a6f05a0b
--- /dev/null
+++ b/net/xfrm/xfrm_input.c
@@ -0,0 +1,89 @@
+/*
+ * xfrm_input.c
+ *
+ * Changes:
+ * 	YOSHIFUJI Hideaki @USAGI
+ * 		Split up af-specific portion
+ * 	
+ */
+
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <net/ip.h>
+#include <net/xfrm.h>
+
+static kmem_cache_t *secpath_cachep;
+
+void __secpath_destroy(struct sec_path *sp)
+{
+	int i;
+	for (i = 0; i < sp->len; i++)
+		xfrm_state_put(sp->x[i].xvec);
+	kmem_cache_free(secpath_cachep, sp);
+}
+EXPORT_SYMBOL(__secpath_destroy);
+
+struct sec_path *secpath_dup(struct sec_path *src)
+{
+	struct sec_path *sp;
+
+	sp = kmem_cache_alloc(secpath_cachep, SLAB_ATOMIC);
+	if (!sp)
+		return NULL;
+
+	sp->len = 0;
+	if (src) {
+		int i;
+
+		memcpy(sp, src, sizeof(*sp));
+		for (i = 0; i < sp->len; i++)
+			xfrm_state_hold(sp->x[i].xvec);
+	}
+	atomic_set(&sp->refcnt, 1);
+	return sp;
+}
+EXPORT_SYMBOL(secpath_dup);
+
+/* Fetch spi and seq from ipsec header */
+
+int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq)
+{
+	int offset, offset_seq;
+
+	switch (nexthdr) {
+	case IPPROTO_AH:
+		offset = offsetof(struct ip_auth_hdr, spi);
+		offset_seq = offsetof(struct ip_auth_hdr, seq_no);
+		break;
+	case IPPROTO_ESP:
+		offset = offsetof(struct ip_esp_hdr, spi);
+		offset_seq = offsetof(struct ip_esp_hdr, seq_no);
+		break;
+	case IPPROTO_COMP:
+		if (!pskb_may_pull(skb, sizeof(struct ip_comp_hdr)))
+			return -EINVAL;
+		*spi = ntohl(ntohs(*(u16*)(skb->h.raw + 2)));
+		*seq = 0;
+		return 0;
+	default:
+		return 1;
+	}
+
+	if (!pskb_may_pull(skb, 16))
+		return -EINVAL;
+
+	*spi = *(u32*)(skb->h.raw + offset);
+	*seq = *(u32*)(skb->h.raw + offset_seq);
+	return 0;
+}
+EXPORT_SYMBOL(xfrm_parse_spi);
+
+void __init xfrm_input_init(void)
+{
+	secpath_cachep = kmem_cache_create("secpath_cache",
+					   sizeof(struct sec_path),
+					   0, SLAB_HWCACHE_ALIGN,
+					   NULL, NULL);
+	if (!secpath_cachep)
+		panic("XFRM: failed to allocate secpath_cache\n");
+}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
new file mode 100644
index 00000000000..80828078733
--- /dev/null
+++ b/net/xfrm/xfrm_policy.c
@@ -0,0 +1,1367 @@
+/* 
+ * xfrm_policy.c
+ *
+ * Changes:
+ *	Mitsuru KANDA @USAGI
+ * 	Kazunori MIYAZAWA @USAGI
+ * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
+ * 		IPv6 support
+ * 	Kazunori MIYAZAWA @USAGI
+ * 	YOSHIFUJI Hideaki
+ * 		Split up af-specific portion
+ *	Derek Atkins <derek@ihtfp.com>		Add the post_input processor
+ * 	
+ */
+
+#include <asm/bug.h>
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <linux/kmod.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+#include <linux/notifier.h>
+#include <linux/netdevice.h>
+#include <linux/module.h>
+#include <net/xfrm.h>
+#include <net/ip.h>
+
+DECLARE_MUTEX(xfrm_cfg_sem);
+EXPORT_SYMBOL(xfrm_cfg_sem);
+
+static DEFINE_RWLOCK(xfrm_policy_lock);
+
+struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2];
+EXPORT_SYMBOL(xfrm_policy_list);
+
+static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
+static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
+
+static kmem_cache_t *xfrm_dst_cache;
+
+static struct work_struct xfrm_policy_gc_work;
+static struct list_head xfrm_policy_gc_list =
+	LIST_HEAD_INIT(xfrm_policy_gc_list);
+static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
+
+static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
+static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
+
+int xfrm_register_type(struct xfrm_type *type, unsigned short family)
+{
+	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+	struct xfrm_type_map *typemap;
+	int err = 0;
+
+	if (unlikely(afinfo == NULL))
+		return -EAFNOSUPPORT;
+	typemap = afinfo->type_map;
+
+	write_lock(&typemap->lock);
+	if (likely(typemap->map[type->proto] == NULL))
+		typemap->map[type->proto] = type;
+	else
+		err = -EEXIST;
+	write_unlock(&typemap->lock);
+	xfrm_policy_put_afinfo(afinfo);
+	return err;
+}
+EXPORT_SYMBOL(xfrm_register_type);
+
+int xfrm_unregister_type(struct xfrm_type *type, unsigned short family)
+{
+	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+	struct xfrm_type_map *typemap;
+	int err = 0;
+
+	if (unlikely(afinfo == NULL))
+		return -EAFNOSUPPORT;
+	typemap = afinfo->type_map;
+
+	write_lock(&typemap->lock);
+	if (unlikely(typemap->map[type->proto] != type))
+		err = -ENOENT;
+	else
+		typemap->map[type->proto] = NULL;
+	write_unlock(&typemap->lock);
+	xfrm_policy_put_afinfo(afinfo);
+	return err;
+}
+EXPORT_SYMBOL(xfrm_unregister_type);
+
+struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family)
+{
+	struct xfrm_policy_afinfo *afinfo;
+	struct xfrm_type_map *typemap;
+	struct xfrm_type *type;
+	int modload_attempted = 0;
+
+retry:
+	afinfo = xfrm_policy_get_afinfo(family);
+	if (unlikely(afinfo == NULL))
+		return NULL;
+	typemap = afinfo->type_map;
+
+	read_lock(&typemap->lock);
+	type = typemap->map[proto];
+	if (unlikely(type && !try_module_get(type->owner)))
+		type = NULL;
+	read_unlock(&typemap->lock);
+	if (!type && !modload_attempted) {
+		xfrm_policy_put_afinfo(afinfo);
+		request_module("xfrm-type-%d-%d",
+			       (int) family, (int) proto);
+		modload_attempted = 1;
+		goto retry;
+	}
+
+	xfrm_policy_put_afinfo(afinfo);
+	return type;
+}
+EXPORT_SYMBOL(xfrm_get_type);
+
+int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, 
+		    unsigned short family)
+{
+	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+	int err = 0;
+
+	if (unlikely(afinfo == NULL))
+		return -EAFNOSUPPORT;
+
+	if (likely(afinfo->dst_lookup != NULL))
+		err = afinfo->dst_lookup(dst, fl);
+	else
+		err = -EINVAL;
+	xfrm_policy_put_afinfo(afinfo);
+	return err;
+}
+EXPORT_SYMBOL(xfrm_dst_lookup);
+
+void xfrm_put_type(struct xfrm_type *type)
+{
+	module_put(type->owner);
+}
+
+static inline unsigned long make_jiffies(long secs)
+{
+	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
+		return MAX_SCHEDULE_TIMEOUT-1;
+	else
+	        return secs*HZ;
+}
+
+static void xfrm_policy_timer(unsigned long data)
+{
+	struct xfrm_policy *xp = (struct xfrm_policy*)data;
+	unsigned long now = (unsigned long)xtime.tv_sec;
+	long next = LONG_MAX;
+	int warn = 0;
+	int dir;
+
+	read_lock(&xp->lock);
+
+	if (xp->dead)
+		goto out;
+
+	dir = xp->index & 7;
+
+	if (xp->lft.hard_add_expires_seconds) {
+		long tmo = xp->lft.hard_add_expires_seconds +
+			xp->curlft.add_time - now;
+		if (tmo <= 0)
+			goto expired;
+		if (tmo < next)
+			next = tmo;
+	}
+	if (xp->lft.hard_use_expires_seconds) {
+		long tmo = xp->lft.hard_use_expires_seconds +
+			(xp->curlft.use_time ? : xp->curlft.add_time) - now;
+		if (tmo <= 0)
+			goto expired;
+		if (tmo < next)
+			next = tmo;
+	}
+	if (xp->lft.soft_add_expires_seconds) {
+		long tmo = xp->lft.soft_add_expires_seconds +
+			xp->curlft.add_time - now;
+		if (tmo <= 0) {
+			warn = 1;
+			tmo = XFRM_KM_TIMEOUT;
+		}
+		if (tmo < next)
+			next = tmo;
+	}
+	if (xp->lft.soft_use_expires_seconds) {
+		long tmo = xp->lft.soft_use_expires_seconds +
+			(xp->curlft.use_time ? : xp->curlft.add_time) - now;
+		if (tmo <= 0) {
+			warn = 1;
+			tmo = XFRM_KM_TIMEOUT;
+		}
+		if (tmo < next)
+			next = tmo;
+	}
+
+	if (warn)
+		km_policy_expired(xp, dir, 0);
+	if (next != LONG_MAX &&
+	    !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
+		xfrm_pol_hold(xp);
+
+out:
+	read_unlock(&xp->lock);
+	xfrm_pol_put(xp);
+	return;
+
+expired:
+	read_unlock(&xp->lock);
+	km_policy_expired(xp, dir, 1);
+	xfrm_policy_delete(xp, dir);
+	xfrm_pol_put(xp);
+}
+
+
+/* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
+ * SPD calls.
+ */
+
+struct xfrm_policy *xfrm_policy_alloc(int gfp)
+{
+	struct xfrm_policy *policy;
+
+	policy = kmalloc(sizeof(struct xfrm_policy), gfp);
+
+	if (policy) {
+		memset(policy, 0, sizeof(struct xfrm_policy));
+		atomic_set(&policy->refcnt, 1);
+		rwlock_init(&policy->lock);
+		init_timer(&policy->timer);
+		policy->timer.data = (unsigned long)policy;
+		policy->timer.function = xfrm_policy_timer;
+	}
+	return policy;
+}
+EXPORT_SYMBOL(xfrm_policy_alloc);
+
+/* Destroy xfrm_policy: descendant resources must be released to this moment. */
+
+void __xfrm_policy_destroy(struct xfrm_policy *policy)
+{
+	if (!policy->dead)
+		BUG();
+
+	if (policy->bundles)
+		BUG();
+
+	if (del_timer(&policy->timer))
+		BUG();
+
+	kfree(policy);
+}
+EXPORT_SYMBOL(__xfrm_policy_destroy);
+
+static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
+{
+	struct dst_entry *dst;
+
+	while ((dst = policy->bundles) != NULL) {
+		policy->bundles = dst->next;
+		dst_free(dst);
+	}
+
+	if (del_timer(&policy->timer))
+		atomic_dec(&policy->refcnt);
+
+	if (atomic_read(&policy->refcnt) > 1)
+		flow_cache_flush();
+
+	xfrm_pol_put(policy);
+}
+
+static void xfrm_policy_gc_task(void *data)
+{
+	struct xfrm_policy *policy;
+	struct list_head *entry, *tmp;
+	struct list_head gc_list = LIST_HEAD_INIT(gc_list);
+
+	spin_lock_bh(&xfrm_policy_gc_lock);
+	list_splice_init(&xfrm_policy_gc_list, &gc_list);
+	spin_unlock_bh(&xfrm_policy_gc_lock);
+
+	list_for_each_safe(entry, tmp, &gc_list) {
+		policy = list_entry(entry, struct xfrm_policy, list);
+		xfrm_policy_gc_kill(policy);
+	}
+}
+
+/* Rule must be locked. Release descentant resources, announce
+ * entry dead. The rule must be unlinked from lists to the moment.
+ */
+
+static void xfrm_policy_kill(struct xfrm_policy *policy)
+{
+	int dead;
+
+	write_lock_bh(&policy->lock);
+	dead = policy->dead;
+	policy->dead = 1;
+	write_unlock_bh(&policy->lock);
+
+	if (unlikely(dead)) {
+		WARN_ON(1);
+		return;
+	}
+
+	spin_lock(&xfrm_policy_gc_lock);
+	list_add(&policy->list, &xfrm_policy_gc_list);
+	spin_unlock(&xfrm_policy_gc_lock);
+
+	schedule_work(&xfrm_policy_gc_work);
+}
+
+/* Generate new index... KAME seems to generate them ordered by cost
+ * of an absolute inpredictability of ordering of rules. This will not pass. */
+static u32 xfrm_gen_index(int dir)
+{
+	u32 idx;
+	struct xfrm_policy *p;
+	static u32 idx_generator;
+
+	for (;;) {
+		idx = (idx_generator | dir);
+		idx_generator += 8;
+		if (idx == 0)
+			idx = 8;
+		for (p = xfrm_policy_list[dir]; p; p = p->next) {
+			if (p->index == idx)
+				break;
+		}
+		if (!p)
+			return idx;
+	}
+}
+
+int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
+{
+	struct xfrm_policy *pol, **p;
+	struct xfrm_policy *delpol = NULL;
+	struct xfrm_policy **newpos = NULL;
+
+	write_lock_bh(&xfrm_policy_lock);
+	for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) {
+		if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0) {
+			if (excl) {
+				write_unlock_bh(&xfrm_policy_lock);
+				return -EEXIST;
+			}
+			*p = pol->next;
+			delpol = pol;
+			if (policy->priority > pol->priority)
+				continue;
+		} else if (policy->priority >= pol->priority) {
+			p = &pol->next;
+			continue;
+		}
+		if (!newpos)
+			newpos = p;
+		if (delpol)
+			break;
+		p = &pol->next;
+	}
+	if (newpos)
+		p = newpos;
+	xfrm_pol_hold(policy);
+	policy->next = *p;
+	*p = policy;
+	atomic_inc(&flow_cache_genid);
+	policy->index = delpol ? delpol->index : xfrm_gen_index(dir);
+	policy->curlft.add_time = (unsigned long)xtime.tv_sec;
+	policy->curlft.use_time = 0;
+	if (!mod_timer(&policy->timer, jiffies + HZ))
+		xfrm_pol_hold(policy);
+	write_unlock_bh(&xfrm_policy_lock);
+
+	if (delpol) {
+		xfrm_policy_kill(delpol);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(xfrm_policy_insert);
+
+struct xfrm_policy *xfrm_policy_bysel(int dir, struct xfrm_selector *sel,
+				      int delete)
+{
+	struct xfrm_policy *pol, **p;
+
+	write_lock_bh(&xfrm_policy_lock);
+	for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) {
+		if (memcmp(sel, &pol->selector, sizeof(*sel)) == 0) {
+			xfrm_pol_hold(pol);
+			if (delete)
+				*p = pol->next;
+			break;
+		}
+	}
+	write_unlock_bh(&xfrm_policy_lock);
+
+	if (pol && delete) {
+		atomic_inc(&flow_cache_genid);
+		xfrm_policy_kill(pol);
+	}
+	return pol;
+}
+EXPORT_SYMBOL(xfrm_policy_bysel);
+
+struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete)
+{
+	struct xfrm_policy *pol, **p;
+
+	write_lock_bh(&xfrm_policy_lock);
+	for (p = &xfrm_policy_list[id & 7]; (pol=*p)!=NULL; p = &pol->next) {
+		if (pol->index == id) {
+			xfrm_pol_hold(pol);
+			if (delete)
+				*p = pol->next;
+			break;
+		}
+	}
+	write_unlock_bh(&xfrm_policy_lock);
+
+	if (pol && delete) {
+		atomic_inc(&flow_cache_genid);
+		xfrm_policy_kill(pol);
+	}
+	return pol;
+}
+EXPORT_SYMBOL(xfrm_policy_byid);
+
+void xfrm_policy_flush(void)
+{
+	struct xfrm_policy *xp;
+	int dir;
+
+	write_lock_bh(&xfrm_policy_lock);
+	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
+		while ((xp = xfrm_policy_list[dir]) != NULL) {
+			xfrm_policy_list[dir] = xp->next;
+			write_unlock_bh(&xfrm_policy_lock);
+
+			xfrm_policy_kill(xp);
+
+			write_lock_bh(&xfrm_policy_lock);
+		}
+	}
+	atomic_inc(&flow_cache_genid);
+	write_unlock_bh(&xfrm_policy_lock);
+}
+EXPORT_SYMBOL(xfrm_policy_flush);
+
+int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*),
+		     void *data)
+{
+	struct xfrm_policy *xp;
+	int dir;
+	int count = 0;
+	int error = 0;
+
+	read_lock_bh(&xfrm_policy_lock);
+	for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
+		for (xp = xfrm_policy_list[dir]; xp; xp = xp->next)
+			count++;
+	}
+
+	if (count == 0) {
+		error = -ENOENT;
+		goto out;
+	}
+
+	for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
+		for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) {
+			error = func(xp, dir%XFRM_POLICY_MAX, --count, data);
+			if (error)
+				goto out;
+		}
+	}
+
+out:
+	read_unlock_bh(&xfrm_policy_lock);
+	return error;
+}
+EXPORT_SYMBOL(xfrm_policy_walk);
+
+/* Find policy to apply to this flow. */
+
+static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
+			       void **objp, atomic_t **obj_refp)
+{
+	struct xfrm_policy *pol;
+
+	read_lock_bh(&xfrm_policy_lock);
+	for (pol = xfrm_policy_list[dir]; pol; pol = pol->next) {
+		struct xfrm_selector *sel = &pol->selector;
+		int match;
+
+		if (pol->family != family)
+			continue;
+
+		match = xfrm_selector_match(sel, fl, family);
+		if (match) {
+			xfrm_pol_hold(pol);
+			break;
+		}
+	}
+	read_unlock_bh(&xfrm_policy_lock);
+	if ((*objp = (void *) pol) != NULL)
+		*obj_refp = &pol->refcnt;
+}
+
+static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
+{
+	struct xfrm_policy *pol;
+
+	read_lock_bh(&xfrm_policy_lock);
+	if ((pol = sk->sk_policy[dir]) != NULL) {
+		int match = xfrm_selector_match(&pol->selector, fl,
+						sk->sk_family);
+		if (match)
+			xfrm_pol_hold(pol);
+		else
+			pol = NULL;
+	}
+	read_unlock_bh(&xfrm_policy_lock);
+	return pol;
+}
+
+static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
+{
+	pol->next = xfrm_policy_list[dir];
+	xfrm_policy_list[dir] = pol;
+	xfrm_pol_hold(pol);
+}
+
+static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
+						int dir)
+{
+	struct xfrm_policy **polp;
+
+	for (polp = &xfrm_policy_list[dir];
+	     *polp != NULL; polp = &(*polp)->next) {
+		if (*polp == pol) {
+			*polp = pol->next;
+			return pol;
+		}
+	}
+	return NULL;
+}
+
+void xfrm_policy_delete(struct xfrm_policy *pol, int dir)
+{
+	write_lock_bh(&xfrm_policy_lock);
+	pol = __xfrm_policy_unlink(pol, dir);
+	write_unlock_bh(&xfrm_policy_lock);
+	if (pol) {
+		if (dir < XFRM_POLICY_MAX)
+			atomic_inc(&flow_cache_genid);
+		xfrm_policy_kill(pol);
+	}
+}
+
+int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
+{
+	struct xfrm_policy *old_pol;
+
+	write_lock_bh(&xfrm_policy_lock);
+	old_pol = sk->sk_policy[dir];
+	sk->sk_policy[dir] = pol;
+	if (pol) {
+		pol->curlft.add_time = (unsigned long)xtime.tv_sec;
+		pol->index = xfrm_gen_index(XFRM_POLICY_MAX+dir);
+		__xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
+	}
+	if (old_pol)
+		__xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
+	write_unlock_bh(&xfrm_policy_lock);
+
+	if (old_pol) {
+		xfrm_policy_kill(old_pol);
+	}
+	return 0;
+}
+
+static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
+{
+	struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC);
+
+	if (newp) {
+		newp->selector = old->selector;
+		newp->lft = old->lft;
+		newp->curlft = old->curlft;
+		newp->action = old->action;
+		newp->flags = old->flags;
+		newp->xfrm_nr = old->xfrm_nr;
+		newp->index = old->index;
+		memcpy(newp->xfrm_vec, old->xfrm_vec,
+		       newp->xfrm_nr*sizeof(struct xfrm_tmpl));
+		write_lock_bh(&xfrm_policy_lock);
+		__xfrm_policy_link(newp, XFRM_POLICY_MAX+dir);
+		write_unlock_bh(&xfrm_policy_lock);
+		xfrm_pol_put(newp);
+	}
+	return newp;
+}
+
+int __xfrm_sk_clone_policy(struct sock *sk)
+{
+	struct xfrm_policy *p0 = sk->sk_policy[0],
+			   *p1 = sk->sk_policy[1];
+
+	sk->sk_policy[0] = sk->sk_policy[1] = NULL;
+	if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL)
+		return -ENOMEM;
+	if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+/* Resolve list of templates for the flow, given policy. */
+
+static int
+xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl,
+		  struct xfrm_state **xfrm,
+		  unsigned short family)
+{
+	int nx;
+	int i, error;
+	xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
+	xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
+
+	for (nx=0, i = 0; i < policy->xfrm_nr; i++) {
+		struct xfrm_state *x;
+		xfrm_address_t *remote = daddr;
+		xfrm_address_t *local  = saddr;
+		struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
+
+		if (tmpl->mode) {
+			remote = &tmpl->id.daddr;
+			local = &tmpl->saddr;
+		}
+
+		x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
+
+		if (x && x->km.state == XFRM_STATE_VALID) {
+			xfrm[nx++] = x;
+			daddr = remote;
+			saddr = local;
+			continue;
+		}
+		if (x) {
+			error = (x->km.state == XFRM_STATE_ERROR ?
+				 -EINVAL : -EAGAIN);
+			xfrm_state_put(x);
+		}
+
+		if (!tmpl->optional)
+			goto fail;
+	}
+	return nx;
+
+fail:
+	for (nx--; nx>=0; nx--)
+		xfrm_state_put(xfrm[nx]);
+	return error;
+}
+
+/* Check that the bundle accepts the flow and its components are
+ * still valid.
+ */
+
+static struct dst_entry *
+xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family)
+{
+	struct dst_entry *x;
+	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+	if (unlikely(afinfo == NULL))
+		return ERR_PTR(-EINVAL);
+	x = afinfo->find_bundle(fl, policy);
+	xfrm_policy_put_afinfo(afinfo);
+	return x;
+}
+
+/* Allocate chain of dst_entry's, attach known xfrm's, calculate
+ * all the metrics... Shortly, bundle a bundle.
+ */
+
+static int
+xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
+		   struct flowi *fl, struct dst_entry **dst_p,
+		   unsigned short family)
+{
+	int err;
+	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+	if (unlikely(afinfo == NULL))
+		return -EINVAL;
+	err = afinfo->bundle_create(policy, xfrm, nx, fl, dst_p);
+	xfrm_policy_put_afinfo(afinfo);
+	return err;
+}
+
+static inline int policy_to_flow_dir(int dir)
+{
+	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
+	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
+	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
+		return dir;
+	switch (dir) {
+	default:
+	case XFRM_POLICY_IN:
+		return FLOW_DIR_IN;
+	case XFRM_POLICY_OUT:
+		return FLOW_DIR_OUT;
+	case XFRM_POLICY_FWD:
+		return FLOW_DIR_FWD;
+	};
+}
+
+static int stale_bundle(struct dst_entry *dst);
+
+/* Main function: finds/creates a bundle for given flow.
+ *
+ * At the moment we eat a raw IP route. Mostly to speed up lookups
+ * on interfaces with disabled IPsec.
+ */
+int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
+		struct sock *sk, int flags)
+{
+	struct xfrm_policy *policy;
+	struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
+	struct dst_entry *dst, *dst_orig = *dst_p;
+	int nx = 0;
+	int err;
+	u32 genid;
+	u16 family = dst_orig->ops->family;
+restart:
+	genid = atomic_read(&flow_cache_genid);
+	policy = NULL;
+	if (sk && sk->sk_policy[1])
+		policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
+
+	if (!policy) {
+		/* To accelerate a bit...  */
+		if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT])
+			return 0;
+
+		policy = flow_cache_lookup(fl, family,
+					   policy_to_flow_dir(XFRM_POLICY_OUT),
+					   xfrm_policy_lookup);
+	}
+
+	if (!policy)
+		return 0;
+
+	policy->curlft.use_time = (unsigned long)xtime.tv_sec;
+
+	switch (policy->action) {
+	case XFRM_POLICY_BLOCK:
+		/* Prohibit the flow */
+		xfrm_pol_put(policy);
+		return -EPERM;
+
+	case XFRM_POLICY_ALLOW:
+		if (policy->xfrm_nr == 0) {
+			/* Flow passes not transformed. */
+			xfrm_pol_put(policy);
+			return 0;
+		}
+
+		/* Try to find matching bundle.
+		 *
+		 * LATER: help from flow cache. It is optional, this
+		 * is required only for output policy.
+		 */
+		dst = xfrm_find_bundle(fl, policy, family);
+		if (IS_ERR(dst)) {
+			xfrm_pol_put(policy);
+			return PTR_ERR(dst);
+		}
+
+		if (dst)
+			break;
+
+		nx = xfrm_tmpl_resolve(policy, fl, xfrm, family);
+
+		if (unlikely(nx<0)) {
+			err = nx;
+			if (err == -EAGAIN && flags) {
+				DECLARE_WAITQUEUE(wait, current);
+
+				add_wait_queue(&km_waitq, &wait);
+				set_current_state(TASK_INTERRUPTIBLE);
+				schedule();
+				set_current_state(TASK_RUNNING);
+				remove_wait_queue(&km_waitq, &wait);
+
+				nx = xfrm_tmpl_resolve(policy, fl, xfrm, family);
+
+				if (nx == -EAGAIN && signal_pending(current)) {
+					err = -ERESTART;
+					goto error;
+				}
+				if (nx == -EAGAIN ||
+				    genid != atomic_read(&flow_cache_genid)) {
+					xfrm_pol_put(policy);
+					goto restart;
+				}
+				err = nx;
+			}
+			if (err < 0)
+				goto error;
+		}
+		if (nx == 0) {
+			/* Flow passes not transformed. */
+			xfrm_pol_put(policy);
+			return 0;
+		}
+
+		dst = dst_orig;
+		err = xfrm_bundle_create(policy, xfrm, nx, fl, &dst, family);
+
+		if (unlikely(err)) {
+			int i;
+			for (i=0; i<nx; i++)
+				xfrm_state_put(xfrm[i]);
+			goto error;
+		}
+
+		write_lock_bh(&policy->lock);
+		if (unlikely(policy->dead || stale_bundle(dst))) {
+			/* Wow! While we worked on resolving, this
+			 * policy has gone. Retry. It is not paranoia,
+			 * we just cannot enlist new bundle to dead object.
+			 * We can't enlist stable bundles either.
+			 */
+			write_unlock_bh(&policy->lock);
+
+			xfrm_pol_put(policy);
+			if (dst)
+				dst_free(dst);
+			goto restart;
+		}
+		dst->next = policy->bundles;
+		policy->bundles = dst;
+		dst_hold(dst);
+		write_unlock_bh(&policy->lock);
+	}
+	*dst_p = dst;
+	dst_release(dst_orig);
+	xfrm_pol_put(policy);
+	return 0;
+
+error:
+	dst_release(dst_orig);
+	xfrm_pol_put(policy);
+	*dst_p = NULL;
+	return err;
+}
+EXPORT_SYMBOL(xfrm_lookup);
+
+/* When skb is transformed back to its "native" form, we have to
+ * check policy restrictions. At the moment we make this in maximally
+ * stupid way. Shame on me. :-) Of course, connected sockets must
+ * have policy cached at them.
+ */
+
+static inline int
+xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, 
+	      unsigned short family)
+{
+	if (xfrm_state_kern(x))
+		return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, family);
+	return	x->id.proto == tmpl->id.proto &&
+		(x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
+		(x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
+		x->props.mode == tmpl->mode &&
+		(tmpl->aalgos & (1<<x->props.aalgo)) &&
+		!(x->props.mode && xfrm_state_addr_cmp(tmpl, x, family));
+}
+
+static inline int
+xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
+	       unsigned short family)
+{
+	int idx = start;
+
+	if (tmpl->optional) {
+		if (!tmpl->mode)
+			return start;
+	} else
+		start = -1;
+	for (; idx < sp->len; idx++) {
+		if (xfrm_state_ok(tmpl, sp->x[idx].xvec, family))
+			return ++idx;
+		if (sp->x[idx].xvec->props.mode)
+			break;
+	}
+	return start;
+}
+
+static int
+_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family)
+{
+	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+
+	if (unlikely(afinfo == NULL))
+		return -EAFNOSUPPORT;
+
+	afinfo->decode_session(skb, fl);
+	xfrm_policy_put_afinfo(afinfo);
+	return 0;
+}
+
+static inline int secpath_has_tunnel(struct sec_path *sp, int k)
+{
+	for (; k < sp->len; k++) {
+		if (sp->x[k].xvec->props.mode)
+			return 1;
+	}
+
+	return 0;
+}
+
+int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, 
+			unsigned short family)
+{
+	struct xfrm_policy *pol;
+	struct flowi fl;
+
+	if (_decode_session(skb, &fl, family) < 0)
+		return 0;
+
+	/* First, check used SA against their selectors. */
+	if (skb->sp) {
+		int i;
+
+		for (i=skb->sp->len-1; i>=0; i--) {
+		  struct sec_decap_state *xvec = &(skb->sp->x[i]);
+			if (!xfrm_selector_match(&xvec->xvec->sel, &fl, family))
+				return 0;
+
+			/* If there is a post_input processor, try running it */
+			if (xvec->xvec->type->post_input &&
+			    (xvec->xvec->type->post_input)(xvec->xvec,
+							   &(xvec->decap),
+							   skb) != 0)
+				return 0;
+		}
+	}
+
+	pol = NULL;
+	if (sk && sk->sk_policy[dir])
+		pol = xfrm_sk_policy_lookup(sk, dir, &fl);
+
+	if (!pol)
+		pol = flow_cache_lookup(&fl, family,
+					policy_to_flow_dir(dir),
+					xfrm_policy_lookup);
+
+	if (!pol)
+		return !skb->sp || !secpath_has_tunnel(skb->sp, 0);
+
+	pol->curlft.use_time = (unsigned long)xtime.tv_sec;
+
+	if (pol->action == XFRM_POLICY_ALLOW) {
+		struct sec_path *sp;
+		static struct sec_path dummy;
+		int i, k;
+
+		if ((sp = skb->sp) == NULL)
+			sp = &dummy;
+
+		/* For each tunnel xfrm, find the first matching tmpl.
+		 * For each tmpl before that, find corresponding xfrm.
+		 * Order is _important_. Later we will implement
+		 * some barriers, but at the moment barriers
+		 * are implied between each two transformations.
+		 */
+		for (i = pol->xfrm_nr-1, k = 0; i >= 0; i--) {
+			k = xfrm_policy_ok(pol->xfrm_vec+i, sp, k, family);
+			if (k < 0)
+				goto reject;
+		}
+
+		if (secpath_has_tunnel(sp, k))
+			goto reject;
+
+		xfrm_pol_put(pol);
+		return 1;
+	}
+
+reject:
+	xfrm_pol_put(pol);
+	return 0;
+}
+EXPORT_SYMBOL(__xfrm_policy_check);
+
+int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
+{
+	struct flowi fl;
+
+	if (_decode_session(skb, &fl, family) < 0)
+		return 0;
+
+	return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0;
+}
+EXPORT_SYMBOL(__xfrm_route_forward);
+
+/* Optimize later using cookies and generation ids. */
+
+static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
+{
+	if (!stale_bundle(dst))
+		return dst;
+
+	return NULL;
+}
+
+static int stale_bundle(struct dst_entry *dst)
+{
+	return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC);
+}
+
+static void xfrm_dst_destroy(struct dst_entry *dst)
+{
+	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+
+	dst_release(xdst->route);
+
+	if (!dst->xfrm)
+		return;
+	xfrm_state_put(dst->xfrm);
+	dst->xfrm = NULL;
+}
+
+static void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
+			    int unregister)
+{
+	if (!unregister)
+		return;
+
+	while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
+		dst->dev = &loopback_dev;
+		dev_hold(&loopback_dev);
+		dev_put(dev);
+	}
+}
+
+static void xfrm_link_failure(struct sk_buff *skb)
+{
+	/* Impossible. Such dst must be popped before reaches point of failure. */
+	return;
+}
+
+static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
+{
+	if (dst) {
+		if (dst->obsolete) {
+			dst_release(dst);
+			dst = NULL;
+		}
+	}
+	return dst;
+}
+
+static void xfrm_prune_bundles(int (*func)(struct dst_entry *))
+{
+	int i;
+	struct xfrm_policy *pol;
+	struct dst_entry *dst, **dstp, *gc_list = NULL;
+
+	read_lock_bh(&xfrm_policy_lock);
+	for (i=0; i<2*XFRM_POLICY_MAX; i++) {
+		for (pol = xfrm_policy_list[i]; pol; pol = pol->next) {
+			write_lock(&pol->lock);
+			dstp = &pol->bundles;
+			while ((dst=*dstp) != NULL) {
+				if (func(dst)) {
+					*dstp = dst->next;
+					dst->next = gc_list;
+					gc_list = dst;
+				} else {
+					dstp = &dst->next;
+				}
+			}
+			write_unlock(&pol->lock);
+		}
+	}
+	read_unlock_bh(&xfrm_policy_lock);
+
+	while (gc_list) {
+		dst = gc_list;
+		gc_list = dst->next;
+		dst_free(dst);
+	}
+}
+
+static int unused_bundle(struct dst_entry *dst)
+{
+	return !atomic_read(&dst->__refcnt);
+}
+
+static void __xfrm_garbage_collect(void)
+{
+	xfrm_prune_bundles(unused_bundle);
+}
+
+int xfrm_flush_bundles(void)
+{
+	xfrm_prune_bundles(stale_bundle);
+	return 0;
+}
+
+void xfrm_init_pmtu(struct dst_entry *dst)
+{
+	do {
+		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+		u32 pmtu, route_mtu_cached;
+
+		pmtu = dst_mtu(dst->child);
+		xdst->child_mtu_cached = pmtu;
+
+		pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
+
+		route_mtu_cached = dst_mtu(xdst->route);
+		xdst->route_mtu_cached = route_mtu_cached;
+
+		if (pmtu > route_mtu_cached)
+			pmtu = route_mtu_cached;
+
+		dst->metrics[RTAX_MTU-1] = pmtu;
+	} while ((dst = dst->next));
+}
+
+EXPORT_SYMBOL(xfrm_init_pmtu);
+
+/* Check that the bundle accepts the flow and its components are
+ * still valid.
+ */
+
+int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family)
+{
+	struct dst_entry *dst = &first->u.dst;
+	struct xfrm_dst *last;
+	u32 mtu;
+
+	if (!dst_check(dst->path, 0) ||
+	    (dst->dev && !netif_running(dst->dev)))
+		return 0;
+
+	last = NULL;
+
+	do {
+		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+
+		if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family))
+			return 0;
+		if (dst->xfrm->km.state != XFRM_STATE_VALID)
+			return 0;
+
+		mtu = dst_mtu(dst->child);
+		if (xdst->child_mtu_cached != mtu) {
+			last = xdst;
+			xdst->child_mtu_cached = mtu;
+		}
+
+		if (!dst_check(xdst->route, 0))
+			return 0;
+		mtu = dst_mtu(xdst->route);
+		if (xdst->route_mtu_cached != mtu) {
+			last = xdst;
+			xdst->route_mtu_cached = mtu;
+		}
+
+		dst = dst->child;
+	} while (dst->xfrm);
+
+	if (likely(!last))
+		return 1;
+
+	mtu = last->child_mtu_cached;
+	for (;;) {
+		dst = &last->u.dst;
+
+		mtu = xfrm_state_mtu(dst->xfrm, mtu);
+		if (mtu > last->route_mtu_cached)
+			mtu = last->route_mtu_cached;
+		dst->metrics[RTAX_MTU-1] = mtu;
+
+		if (last == first)
+			break;
+
+		last = last->u.next;
+		last->child_mtu_cached = mtu;
+	}
+
+	return 1;
+}
+
+EXPORT_SYMBOL(xfrm_bundle_ok);
+
+/* Well... that's _TASK_. We need to scan through transformation
+ * list and figure out what mss tcp should generate in order to
+ * final datagram fit to mtu. Mama mia... :-)
+ *
+ * Apparently, some easy way exists, but we used to choose the most
+ * bizarre ones. :-) So, raising Kalashnikov... tra-ta-ta.
+ *
+ * Consider this function as something like dark humour. :-)
+ */
+static int xfrm_get_mss(struct dst_entry *dst, u32 mtu)
+{
+	int res = mtu - dst->header_len;
+
+	for (;;) {
+		struct dst_entry *d = dst;
+		int m = res;
+
+		do {
+			struct xfrm_state *x = d->xfrm;
+			if (x) {
+				spin_lock_bh(&x->lock);
+				if (x->km.state == XFRM_STATE_VALID &&
+				    x->type && x->type->get_max_size)
+					m = x->type->get_max_size(d->xfrm, m);
+				else
+					m += x->props.header_len;
+				spin_unlock_bh(&x->lock);
+			}
+		} while ((d = d->child) != NULL);
+
+		if (m <= mtu)
+			break;
+		res -= (m - mtu);
+		if (res < 88)
+			return mtu;
+	}
+
+	return res + dst->header_len;
+}
+
+int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
+{
+	int err = 0;
+	if (unlikely(afinfo == NULL))
+		return -EINVAL;
+	if (unlikely(afinfo->family >= NPROTO))
+		return -EAFNOSUPPORT;
+	write_lock(&xfrm_policy_afinfo_lock);
+	if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
+		err = -ENOBUFS;
+	else {
+		struct dst_ops *dst_ops = afinfo->dst_ops;
+		if (likely(dst_ops->kmem_cachep == NULL))
+			dst_ops->kmem_cachep = xfrm_dst_cache;
+		if (likely(dst_ops->check == NULL))
+			dst_ops->check = xfrm_dst_check;
+		if (likely(dst_ops->destroy == NULL))
+			dst_ops->destroy = xfrm_dst_destroy;
+		if (likely(dst_ops->ifdown == NULL))
+			dst_ops->ifdown = xfrm_dst_ifdown;
+		if (likely(dst_ops->negative_advice == NULL))
+			dst_ops->negative_advice = xfrm_negative_advice;
+		if (likely(dst_ops->link_failure == NULL))
+			dst_ops->link_failure = xfrm_link_failure;
+		if (likely(dst_ops->get_mss == NULL))
+			dst_ops->get_mss = xfrm_get_mss;
+		if (likely(afinfo->garbage_collect == NULL))
+			afinfo->garbage_collect = __xfrm_garbage_collect;
+		xfrm_policy_afinfo[afinfo->family] = afinfo;
+	}
+	write_unlock(&xfrm_policy_afinfo_lock);
+	return err;
+}
+EXPORT_SYMBOL(xfrm_policy_register_afinfo);
+
+int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
+{
+	int err = 0;
+	if (unlikely(afinfo == NULL))
+		return -EINVAL;
+	if (unlikely(afinfo->family >= NPROTO))
+		return -EAFNOSUPPORT;
+	write_lock(&xfrm_policy_afinfo_lock);
+	if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
+		if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
+			err = -EINVAL;
+		else {
+			struct dst_ops *dst_ops = afinfo->dst_ops;
+			xfrm_policy_afinfo[afinfo->family] = NULL;
+			dst_ops->kmem_cachep = NULL;
+			dst_ops->check = NULL;
+			dst_ops->destroy = NULL;
+			dst_ops->ifdown = NULL;
+			dst_ops->negative_advice = NULL;
+			dst_ops->link_failure = NULL;
+			dst_ops->get_mss = NULL;
+			afinfo->garbage_collect = NULL;
+		}
+	}
+	write_unlock(&xfrm_policy_afinfo_lock);
+	return err;
+}
+EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
+
+static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
+{
+	struct xfrm_policy_afinfo *afinfo;
+	if (unlikely(family >= NPROTO))
+		return NULL;
+	read_lock(&xfrm_policy_afinfo_lock);
+	afinfo = xfrm_policy_afinfo[family];
+	if (likely(afinfo != NULL))
+		read_lock(&afinfo->lock);
+	read_unlock(&xfrm_policy_afinfo_lock);
+	return afinfo;
+}
+
+static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
+{
+	if (unlikely(afinfo == NULL))
+		return;
+	read_unlock(&afinfo->lock);
+}
+
+static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	switch (event) {
+	case NETDEV_DOWN:
+		xfrm_flush_bundles();
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block xfrm_dev_notifier = {
+	xfrm_dev_event,
+	NULL,
+	0
+};
+
+static void __init xfrm_policy_init(void)
+{
+	xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
+					   sizeof(struct xfrm_dst),
+					   0, SLAB_HWCACHE_ALIGN,
+					   NULL, NULL);
+	if (!xfrm_dst_cache)
+		panic("XFRM: failed to allocate xfrm_dst_cache\n");
+
+	INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task, NULL);
+	register_netdevice_notifier(&xfrm_dev_notifier);
+}
+
+void __init xfrm_init(void)
+{
+	xfrm_state_init();
+	xfrm_policy_init();
+	xfrm_input_init();
+}
+
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
new file mode 100644
index 00000000000..1db59f11f37
--- /dev/null
+++ b/net/xfrm/xfrm_state.c
@@ -0,0 +1,1037 @@
+/*
+ * xfrm_state.c
+ *
+ * Changes:
+ *	Mitsuru KANDA @USAGI
+ * 	Kazunori MIYAZAWA @USAGI
+ * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
+ * 		IPv6 support
+ * 	YOSHIFUJI Hideaki @USAGI
+ * 		Split up af-specific functions
+ *	Derek Atkins <derek@ihtfp.com>
+ *		Add UDP Encapsulation
+ * 	
+ */
+
+#include <linux/workqueue.h>
+#include <net/xfrm.h>
+#include <linux/pfkeyv2.h>
+#include <linux/ipsec.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+
+/* Each xfrm_state may be linked to two tables:
+
+   1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
+   2. Hash table by daddr to find what SAs exist for given
+      destination/tunnel endpoint. (output)
+ */
+
+static DEFINE_SPINLOCK(xfrm_state_lock);
+
+/* Hash table to find appropriate SA towards given target (endpoint
+ * of tunnel or destination of transport mode) allowed by selector.
+ *
+ * Main use is finding SA after policy selected tunnel or transport mode.
+ * Also, it can be used by ah/esp icmp error handler to find offending SA.
+ */
+static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE];
+static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE];
+
+DECLARE_WAIT_QUEUE_HEAD(km_waitq);
+EXPORT_SYMBOL(km_waitq);
+
+static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
+static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
+
+static struct work_struct xfrm_state_gc_work;
+static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list);
+static DEFINE_SPINLOCK(xfrm_state_gc_lock);
+
+static int xfrm_state_gc_flush_bundles;
+
+static void __xfrm_state_delete(struct xfrm_state *x);
+
+static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
+static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
+
+static int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
+static void km_state_expired(struct xfrm_state *x, int hard);
+
+static void xfrm_state_gc_destroy(struct xfrm_state *x)
+{
+	if (del_timer(&x->timer))
+		BUG();
+	if (x->aalg)
+		kfree(x->aalg);
+	if (x->ealg)
+		kfree(x->ealg);
+	if (x->calg)
+		kfree(x->calg);
+	if (x->encap)
+		kfree(x->encap);
+	if (x->type) {
+		x->type->destructor(x);
+		xfrm_put_type(x->type);
+	}
+	kfree(x);
+}
+
+static void xfrm_state_gc_task(void *data)
+{
+	struct xfrm_state *x;
+	struct list_head *entry, *tmp;
+	struct list_head gc_list = LIST_HEAD_INIT(gc_list);
+
+	if (xfrm_state_gc_flush_bundles) {
+		xfrm_state_gc_flush_bundles = 0;
+		xfrm_flush_bundles();
+	}
+
+	spin_lock_bh(&xfrm_state_gc_lock);
+	list_splice_init(&xfrm_state_gc_list, &gc_list);
+	spin_unlock_bh(&xfrm_state_gc_lock);
+
+	list_for_each_safe(entry, tmp, &gc_list) {
+		x = list_entry(entry, struct xfrm_state, bydst);
+		xfrm_state_gc_destroy(x);
+	}
+	wake_up(&km_waitq);
+}
+
+static inline unsigned long make_jiffies(long secs)
+{
+	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
+		return MAX_SCHEDULE_TIMEOUT-1;
+	else
+	        return secs*HZ;
+}
+
+static void xfrm_timer_handler(unsigned long data)
+{
+	struct xfrm_state *x = (struct xfrm_state*)data;
+	unsigned long now = (unsigned long)xtime.tv_sec;
+	long next = LONG_MAX;
+	int warn = 0;
+
+	spin_lock(&x->lock);
+	if (x->km.state == XFRM_STATE_DEAD)
+		goto out;
+	if (x->km.state == XFRM_STATE_EXPIRED)
+		goto expired;
+	if (x->lft.hard_add_expires_seconds) {
+		long tmo = x->lft.hard_add_expires_seconds +
+			x->curlft.add_time - now;
+		if (tmo <= 0)
+			goto expired;
+		if (tmo < next)
+			next = tmo;
+	}
+	if (x->lft.hard_use_expires_seconds) {
+		long tmo = x->lft.hard_use_expires_seconds +
+			(x->curlft.use_time ? : now) - now;
+		if (tmo <= 0)
+			goto expired;
+		if (tmo < next)
+			next = tmo;
+	}
+	if (x->km.dying)
+		goto resched;
+	if (x->lft.soft_add_expires_seconds) {
+		long tmo = x->lft.soft_add_expires_seconds +
+			x->curlft.add_time - now;
+		if (tmo <= 0)
+			warn = 1;
+		else if (tmo < next)
+			next = tmo;
+	}
+	if (x->lft.soft_use_expires_seconds) {
+		long tmo = x->lft.soft_use_expires_seconds +
+			(x->curlft.use_time ? : now) - now;
+		if (tmo <= 0)
+			warn = 1;
+		else if (tmo < next)
+			next = tmo;
+	}
+
+	if (warn)
+		km_state_expired(x, 0);
+resched:
+	if (next != LONG_MAX &&
+	    !mod_timer(&x->timer, jiffies + make_jiffies(next)))
+		xfrm_state_hold(x);
+	goto out;
+
+expired:
+	if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
+		x->km.state = XFRM_STATE_EXPIRED;
+		wake_up(&km_waitq);
+		next = 2;
+		goto resched;
+	}
+	if (x->id.spi != 0)
+		km_state_expired(x, 1);
+	__xfrm_state_delete(x);
+
+out:
+	spin_unlock(&x->lock);
+	xfrm_state_put(x);
+}
+
+struct xfrm_state *xfrm_state_alloc(void)
+{
+	struct xfrm_state *x;
+
+	x = kmalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
+
+	if (x) {
+		memset(x, 0, sizeof(struct xfrm_state));
+		atomic_set(&x->refcnt, 1);
+		atomic_set(&x->tunnel_users, 0);
+		INIT_LIST_HEAD(&x->bydst);
+		INIT_LIST_HEAD(&x->byspi);
+		init_timer(&x->timer);
+		x->timer.function = xfrm_timer_handler;
+		x->timer.data	  = (unsigned long)x;
+		x->curlft.add_time = (unsigned long)xtime.tv_sec;
+		x->lft.soft_byte_limit = XFRM_INF;
+		x->lft.soft_packet_limit = XFRM_INF;
+		x->lft.hard_byte_limit = XFRM_INF;
+		x->lft.hard_packet_limit = XFRM_INF;
+		spin_lock_init(&x->lock);
+	}
+	return x;
+}
+EXPORT_SYMBOL(xfrm_state_alloc);
+
+void __xfrm_state_destroy(struct xfrm_state *x)
+{
+	BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
+
+	spin_lock_bh(&xfrm_state_gc_lock);
+	list_add(&x->bydst, &xfrm_state_gc_list);
+	spin_unlock_bh(&xfrm_state_gc_lock);
+	schedule_work(&xfrm_state_gc_work);
+}
+EXPORT_SYMBOL(__xfrm_state_destroy);
+
+static void __xfrm_state_delete(struct xfrm_state *x)
+{
+	if (x->km.state != XFRM_STATE_DEAD) {
+		x->km.state = XFRM_STATE_DEAD;
+		spin_lock(&xfrm_state_lock);
+		list_del(&x->bydst);
+		atomic_dec(&x->refcnt);
+		if (x->id.spi) {
+			list_del(&x->byspi);
+			atomic_dec(&x->refcnt);
+		}
+		spin_unlock(&xfrm_state_lock);
+		if (del_timer(&x->timer))
+			atomic_dec(&x->refcnt);
+
+		/* The number two in this test is the reference
+		 * mentioned in the comment below plus the reference
+		 * our caller holds.  A larger value means that
+		 * there are DSTs attached to this xfrm_state.
+		 */
+		if (atomic_read(&x->refcnt) > 2) {
+			xfrm_state_gc_flush_bundles = 1;
+			schedule_work(&xfrm_state_gc_work);
+		}
+
+		/* All xfrm_state objects are created by xfrm_state_alloc.
+		 * The xfrm_state_alloc call gives a reference, and that
+		 * is what we are dropping here.
+		 */
+		atomic_dec(&x->refcnt);
+	}
+}
+
+void xfrm_state_delete(struct xfrm_state *x)
+{
+	spin_lock_bh(&x->lock);
+	__xfrm_state_delete(x);
+	spin_unlock_bh(&x->lock);
+}
+EXPORT_SYMBOL(xfrm_state_delete);
+
+void xfrm_state_flush(u8 proto)
+{
+	int i;
+	struct xfrm_state *x;
+
+	spin_lock_bh(&xfrm_state_lock);
+	for (i = 0; i < XFRM_DST_HSIZE; i++) {
+restart:
+		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
+			if (!xfrm_state_kern(x) &&
+			    (proto == IPSEC_PROTO_ANY || x->id.proto == proto)) {
+				xfrm_state_hold(x);
+				spin_unlock_bh(&xfrm_state_lock);
+
+				xfrm_state_delete(x);
+				xfrm_state_put(x);
+
+				spin_lock_bh(&xfrm_state_lock);
+				goto restart;
+			}
+		}
+	}
+	spin_unlock_bh(&xfrm_state_lock);
+	wake_up(&km_waitq);
+}
+EXPORT_SYMBOL(xfrm_state_flush);
+
+static int
+xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
+		  struct xfrm_tmpl *tmpl,
+		  xfrm_address_t *daddr, xfrm_address_t *saddr,
+		  unsigned short family)
+{
+	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
+	if (!afinfo)
+		return -1;
+	afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
+	xfrm_state_put_afinfo(afinfo);
+	return 0;
+}
+
+struct xfrm_state *
+xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
+		struct flowi *fl, struct xfrm_tmpl *tmpl,
+		struct xfrm_policy *pol, int *err,
+		unsigned short family)
+{
+	unsigned h = xfrm_dst_hash(daddr, family);
+	struct xfrm_state *x, *x0;
+	int acquire_in_progress = 0;
+	int error = 0;
+	struct xfrm_state *best = NULL;
+	struct xfrm_state_afinfo *afinfo;
+	
+	afinfo = xfrm_state_get_afinfo(family);
+	if (afinfo == NULL) {
+		*err = -EAFNOSUPPORT;
+		return NULL;
+	}
+
+	spin_lock_bh(&xfrm_state_lock);
+	list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
+		if (x->props.family == family &&
+		    x->props.reqid == tmpl->reqid &&
+		    xfrm_state_addr_check(x, daddr, saddr, family) &&
+		    tmpl->mode == x->props.mode &&
+		    tmpl->id.proto == x->id.proto &&
+		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
+			/* Resolution logic:
+			   1. There is a valid state with matching selector.
+			      Done.
+			   2. Valid state with inappropriate selector. Skip.
+
+			   Entering area of "sysdeps".
+
+			   3. If state is not valid, selector is temporary,
+			      it selects only session which triggered
+			      previous resolution. Key manager will do
+			      something to install a state with proper
+			      selector.
+			 */
+			if (x->km.state == XFRM_STATE_VALID) {
+				if (!xfrm_selector_match(&x->sel, fl, family))
+					continue;
+				if (!best ||
+				    best->km.dying > x->km.dying ||
+				    (best->km.dying == x->km.dying &&
+				     best->curlft.add_time < x->curlft.add_time))
+					best = x;
+			} else if (x->km.state == XFRM_STATE_ACQ) {
+				acquire_in_progress = 1;
+			} else if (x->km.state == XFRM_STATE_ERROR ||
+				   x->km.state == XFRM_STATE_EXPIRED) {
+				if (xfrm_selector_match(&x->sel, fl, family))
+					error = -ESRCH;
+			}
+		}
+	}
+
+	x = best;
+	if (!x && !error && !acquire_in_progress) {
+		x0 = afinfo->state_lookup(&tmpl->id.daddr, tmpl->id.spi, tmpl->id.proto);
+		if (x0 != NULL) {
+			xfrm_state_put(x0);
+			error = -EEXIST;
+			goto out;
+		}
+		x = xfrm_state_alloc();
+		if (x == NULL) {
+			error = -ENOMEM;
+			goto out;
+		}
+		/* Initialize temporary selector matching only
+		 * to current session. */
+		xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
+
+		if (km_query(x, tmpl, pol) == 0) {
+			x->km.state = XFRM_STATE_ACQ;
+			list_add_tail(&x->bydst, xfrm_state_bydst+h);
+			xfrm_state_hold(x);
+			if (x->id.spi) {
+				h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
+				list_add(&x->byspi, xfrm_state_byspi+h);
+				xfrm_state_hold(x);
+			}
+			x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
+			xfrm_state_hold(x);
+			x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
+			add_timer(&x->timer);
+		} else {
+			x->km.state = XFRM_STATE_DEAD;
+			xfrm_state_put(x);
+			x = NULL;
+			error = -ESRCH;
+		}
+	}
+out:
+	if (x)
+		xfrm_state_hold(x);
+	else
+		*err = acquire_in_progress ? -EAGAIN : error;
+	spin_unlock_bh(&xfrm_state_lock);
+	xfrm_state_put_afinfo(afinfo);
+	return x;
+}
+
+static void __xfrm_state_insert(struct xfrm_state *x)
+{
+	unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family);
+
+	list_add(&x->bydst, xfrm_state_bydst+h);
+	xfrm_state_hold(x);
+
+	h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
+
+	list_add(&x->byspi, xfrm_state_byspi+h);
+	xfrm_state_hold(x);
+
+	if (!mod_timer(&x->timer, jiffies + HZ))
+		xfrm_state_hold(x);
+
+	wake_up(&km_waitq);
+}
+
+void xfrm_state_insert(struct xfrm_state *x)
+{
+	spin_lock_bh(&xfrm_state_lock);
+	__xfrm_state_insert(x);
+	spin_unlock_bh(&xfrm_state_lock);
+}
+EXPORT_SYMBOL(xfrm_state_insert);
+
+static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
+
+int xfrm_state_add(struct xfrm_state *x)
+{
+	struct xfrm_state_afinfo *afinfo;
+	struct xfrm_state *x1;
+	int family;
+	int err;
+
+	family = x->props.family;
+	afinfo = xfrm_state_get_afinfo(family);
+	if (unlikely(afinfo == NULL))
+		return -EAFNOSUPPORT;
+
+	spin_lock_bh(&xfrm_state_lock);
+
+	x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
+	if (x1) {
+		xfrm_state_put(x1);
+		x1 = NULL;
+		err = -EEXIST;
+		goto out;
+	}
+
+	if (x->km.seq) {
+		x1 = __xfrm_find_acq_byseq(x->km.seq);
+		if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
+			xfrm_state_put(x1);
+			x1 = NULL;
+		}
+	}
+
+	if (!x1)
+		x1 = afinfo->find_acq(
+			x->props.mode, x->props.reqid, x->id.proto,
+			&x->id.daddr, &x->props.saddr, 0);
+
+	__xfrm_state_insert(x);
+	err = 0;
+
+out:
+	spin_unlock_bh(&xfrm_state_lock);
+	xfrm_state_put_afinfo(afinfo);
+
+	if (x1) {
+		xfrm_state_delete(x1);
+		xfrm_state_put(x1);
+	}
+
+	return err;
+}
+EXPORT_SYMBOL(xfrm_state_add);
+
+int xfrm_state_update(struct xfrm_state *x)
+{
+	struct xfrm_state_afinfo *afinfo;
+	struct xfrm_state *x1;
+	int err;
+
+	afinfo = xfrm_state_get_afinfo(x->props.family);
+	if (unlikely(afinfo == NULL))
+		return -EAFNOSUPPORT;
+
+	spin_lock_bh(&xfrm_state_lock);
+	x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
+
+	err = -ESRCH;
+	if (!x1)
+		goto out;
+
+	if (xfrm_state_kern(x1)) {
+		xfrm_state_put(x1);
+		err = -EEXIST;
+		goto out;
+	}
+
+	if (x1->km.state == XFRM_STATE_ACQ) {
+		__xfrm_state_insert(x);
+		x = NULL;
+	}
+	err = 0;
+
+out:
+	spin_unlock_bh(&xfrm_state_lock);
+	xfrm_state_put_afinfo(afinfo);
+
+	if (err)
+		return err;
+
+	if (!x) {
+		xfrm_state_delete(x1);
+		xfrm_state_put(x1);
+		return 0;
+	}
+
+	err = -EINVAL;
+	spin_lock_bh(&x1->lock);
+	if (likely(x1->km.state == XFRM_STATE_VALID)) {
+		if (x->encap && x1->encap)
+			memcpy(x1->encap, x->encap, sizeof(*x1->encap));
+		memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
+		x1->km.dying = 0;
+
+		if (!mod_timer(&x1->timer, jiffies + HZ))
+			xfrm_state_hold(x1);
+		if (x1->curlft.use_time)
+			xfrm_state_check_expire(x1);
+
+		err = 0;
+	}
+	spin_unlock_bh(&x1->lock);
+
+	xfrm_state_put(x1);
+
+	return err;
+}
+EXPORT_SYMBOL(xfrm_state_update);
+
+int xfrm_state_check_expire(struct xfrm_state *x)
+{
+	if (!x->curlft.use_time)
+		x->curlft.use_time = (unsigned long)xtime.tv_sec;
+
+	if (x->km.state != XFRM_STATE_VALID)
+		return -EINVAL;
+
+	if (x->curlft.bytes >= x->lft.hard_byte_limit ||
+	    x->curlft.packets >= x->lft.hard_packet_limit) {
+		km_state_expired(x, 1);
+		if (!mod_timer(&x->timer, jiffies + XFRM_ACQ_EXPIRES*HZ))
+			xfrm_state_hold(x);
+		return -EINVAL;
+	}
+
+	if (!x->km.dying &&
+	    (x->curlft.bytes >= x->lft.soft_byte_limit ||
+	     x->curlft.packets >= x->lft.soft_packet_limit))
+		km_state_expired(x, 0);
+	return 0;
+}
+EXPORT_SYMBOL(xfrm_state_check_expire);
+
+static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
+{
+	int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
+		- skb_headroom(skb);
+
+	if (nhead > 0)
+		return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
+
+	/* Check tail too... */
+	return 0;
+}
+
+int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
+{
+	int err = xfrm_state_check_expire(x);
+	if (err < 0)
+		goto err;
+	err = xfrm_state_check_space(x, skb);
+err:
+	return err;
+}
+EXPORT_SYMBOL(xfrm_state_check);
+
+struct xfrm_state *
+xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
+		  unsigned short family)
+{
+	struct xfrm_state *x;
+	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
+	if (!afinfo)
+		return NULL;
+
+	spin_lock_bh(&xfrm_state_lock);
+	x = afinfo->state_lookup(daddr, spi, proto);
+	spin_unlock_bh(&xfrm_state_lock);
+	xfrm_state_put_afinfo(afinfo);
+	return x;
+}
+EXPORT_SYMBOL(xfrm_state_lookup);
+
+struct xfrm_state *
+xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
+	      xfrm_address_t *daddr, xfrm_address_t *saddr, 
+	      int create, unsigned short family)
+{
+	struct xfrm_state *x;
+	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
+	if (!afinfo)
+		return NULL;
+
+	spin_lock_bh(&xfrm_state_lock);
+	x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create);
+	spin_unlock_bh(&xfrm_state_lock);
+	xfrm_state_put_afinfo(afinfo);
+	return x;
+}
+EXPORT_SYMBOL(xfrm_find_acq);
+
+/* Silly enough, but I'm lazy to build resolution list */
+
+static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
+{
+	int i;
+	struct xfrm_state *x;
+
+	for (i = 0; i < XFRM_DST_HSIZE; i++) {
+		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
+			if (x->km.seq == seq && x->km.state == XFRM_STATE_ACQ) {
+				xfrm_state_hold(x);
+				return x;
+			}
+		}
+	}
+	return NULL;
+}
+
+struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
+{
+	struct xfrm_state *x;
+
+	spin_lock_bh(&xfrm_state_lock);
+	x = __xfrm_find_acq_byseq(seq);
+	spin_unlock_bh(&xfrm_state_lock);
+	return x;
+}
+EXPORT_SYMBOL(xfrm_find_acq_byseq);
+
+u32 xfrm_get_acqseq(void)
+{
+	u32 res;
+	static u32 acqseq;
+	static DEFINE_SPINLOCK(acqseq_lock);
+
+	spin_lock_bh(&acqseq_lock);
+	res = (++acqseq ? : ++acqseq);
+	spin_unlock_bh(&acqseq_lock);
+	return res;
+}
+EXPORT_SYMBOL(xfrm_get_acqseq);
+
+void
+xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
+{
+	u32 h;
+	struct xfrm_state *x0;
+
+	if (x->id.spi)
+		return;
+
+	if (minspi == maxspi) {
+		x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
+		if (x0) {
+			xfrm_state_put(x0);
+			return;
+		}
+		x->id.spi = minspi;
+	} else {
+		u32 spi = 0;
+		minspi = ntohl(minspi);
+		maxspi = ntohl(maxspi);
+		for (h=0; h<maxspi-minspi+1; h++) {
+			spi = minspi + net_random()%(maxspi-minspi+1);
+			x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
+			if (x0 == NULL) {
+				x->id.spi = htonl(spi);
+				break;
+			}
+			xfrm_state_put(x0);
+		}
+	}
+	if (x->id.spi) {
+		spin_lock_bh(&xfrm_state_lock);
+		h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
+		list_add(&x->byspi, xfrm_state_byspi+h);
+		xfrm_state_hold(x);
+		spin_unlock_bh(&xfrm_state_lock);
+		wake_up(&km_waitq);
+	}
+}
+EXPORT_SYMBOL(xfrm_alloc_spi);
+
+int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
+		    void *data)
+{
+	int i;
+	struct xfrm_state *x;
+	int count = 0;
+	int err = 0;
+
+	spin_lock_bh(&xfrm_state_lock);
+	for (i = 0; i < XFRM_DST_HSIZE; i++) {
+		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
+			if (proto == IPSEC_PROTO_ANY || x->id.proto == proto)
+				count++;
+		}
+	}
+	if (count == 0) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	for (i = 0; i < XFRM_DST_HSIZE; i++) {
+		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
+			if (proto != IPSEC_PROTO_ANY && x->id.proto != proto)
+				continue;
+			err = func(x, --count, data);
+			if (err)
+				goto out;
+		}
+	}
+out:
+	spin_unlock_bh(&xfrm_state_lock);
+	return err;
+}
+EXPORT_SYMBOL(xfrm_state_walk);
+
+int xfrm_replay_check(struct xfrm_state *x, u32 seq)
+{
+	u32 diff;
+
+	seq = ntohl(seq);
+
+	if (unlikely(seq == 0))
+		return -EINVAL;
+
+	if (likely(seq > x->replay.seq))
+		return 0;
+
+	diff = x->replay.seq - seq;
+	if (diff >= x->props.replay_window) {
+		x->stats.replay_window++;
+		return -EINVAL;
+	}
+
+	if (x->replay.bitmap & (1U << diff)) {
+		x->stats.replay++;
+		return -EINVAL;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(xfrm_replay_check);
+
+void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
+{
+	u32 diff;
+
+	seq = ntohl(seq);
+
+	if (seq > x->replay.seq) {
+		diff = seq - x->replay.seq;
+		if (diff < x->props.replay_window)
+			x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
+		else
+			x->replay.bitmap = 1;
+		x->replay.seq = seq;
+	} else {
+		diff = x->replay.seq - seq;
+		x->replay.bitmap |= (1U << diff);
+	}
+}
+EXPORT_SYMBOL(xfrm_replay_advance);
+
+static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
+static DEFINE_RWLOCK(xfrm_km_lock);
+
+static void km_state_expired(struct xfrm_state *x, int hard)
+{
+	struct xfrm_mgr *km;
+
+	if (hard)
+		x->km.state = XFRM_STATE_EXPIRED;
+	else
+		x->km.dying = 1;
+
+	read_lock(&xfrm_km_lock);
+	list_for_each_entry(km, &xfrm_km_list, list)
+		km->notify(x, hard);
+	read_unlock(&xfrm_km_lock);
+
+	if (hard)
+		wake_up(&km_waitq);
+}
+
+static int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
+{
+	int err = -EINVAL;
+	struct xfrm_mgr *km;
+
+	read_lock(&xfrm_km_lock);
+	list_for_each_entry(km, &xfrm_km_list, list) {
+		err = km->acquire(x, t, pol, XFRM_POLICY_OUT);
+		if (!err)
+			break;
+	}
+	read_unlock(&xfrm_km_lock);
+	return err;
+}
+
+int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
+{
+	int err = -EINVAL;
+	struct xfrm_mgr *km;
+
+	read_lock(&xfrm_km_lock);
+	list_for_each_entry(km, &xfrm_km_list, list) {
+		if (km->new_mapping)
+			err = km->new_mapping(x, ipaddr, sport);
+		if (!err)
+			break;
+	}
+	read_unlock(&xfrm_km_lock);
+	return err;
+}
+EXPORT_SYMBOL(km_new_mapping);
+
+void km_policy_expired(struct xfrm_policy *pol, int dir, int hard)
+{
+	struct xfrm_mgr *km;
+
+	read_lock(&xfrm_km_lock);
+	list_for_each_entry(km, &xfrm_km_list, list)
+		if (km->notify_policy)
+			km->notify_policy(pol, dir, hard);
+	read_unlock(&xfrm_km_lock);
+
+	if (hard)
+		wake_up(&km_waitq);
+}
+
+int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
+{
+	int err;
+	u8 *data;
+	struct xfrm_mgr *km;
+	struct xfrm_policy *pol = NULL;
+
+	if (optlen <= 0 || optlen > PAGE_SIZE)
+		return -EMSGSIZE;
+
+	data = kmalloc(optlen, GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	err = -EFAULT;
+	if (copy_from_user(data, optval, optlen))
+		goto out;
+
+	err = -EINVAL;
+	read_lock(&xfrm_km_lock);
+	list_for_each_entry(km, &xfrm_km_list, list) {
+		pol = km->compile_policy(sk->sk_family, optname, data,
+					 optlen, &err);
+		if (err >= 0)
+			break;
+	}
+	read_unlock(&xfrm_km_lock);
+
+	if (err >= 0) {
+		xfrm_sk_policy_insert(sk, err, pol);
+		xfrm_pol_put(pol);
+		err = 0;
+	}
+
+out:
+	kfree(data);
+	return err;
+}
+EXPORT_SYMBOL(xfrm_user_policy);
+
+int xfrm_register_km(struct xfrm_mgr *km)
+{
+	write_lock_bh(&xfrm_km_lock);
+	list_add_tail(&km->list, &xfrm_km_list);
+	write_unlock_bh(&xfrm_km_lock);
+	return 0;
+}
+EXPORT_SYMBOL(xfrm_register_km);
+
+int xfrm_unregister_km(struct xfrm_mgr *km)
+{
+	write_lock_bh(&xfrm_km_lock);
+	list_del(&km->list);
+	write_unlock_bh(&xfrm_km_lock);
+	return 0;
+}
+EXPORT_SYMBOL(xfrm_unregister_km);
+
+int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
+{
+	int err = 0;
+	if (unlikely(afinfo == NULL))
+		return -EINVAL;
+	if (unlikely(afinfo->family >= NPROTO))
+		return -EAFNOSUPPORT;
+	write_lock(&xfrm_state_afinfo_lock);
+	if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
+		err = -ENOBUFS;
+	else {
+		afinfo->state_bydst = xfrm_state_bydst;
+		afinfo->state_byspi = xfrm_state_byspi;
+		xfrm_state_afinfo[afinfo->family] = afinfo;
+	}
+	write_unlock(&xfrm_state_afinfo_lock);
+	return err;
+}
+EXPORT_SYMBOL(xfrm_state_register_afinfo);
+
+int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
+{
+	int err = 0;
+	if (unlikely(afinfo == NULL))
+		return -EINVAL;
+	if (unlikely(afinfo->family >= NPROTO))
+		return -EAFNOSUPPORT;
+	write_lock(&xfrm_state_afinfo_lock);
+	if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
+		if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
+			err = -EINVAL;
+		else {
+			xfrm_state_afinfo[afinfo->family] = NULL;
+			afinfo->state_byspi = NULL;
+			afinfo->state_bydst = NULL;
+		}
+	}
+	write_unlock(&xfrm_state_afinfo_lock);
+	return err;
+}
+EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
+
+static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
+{
+	struct xfrm_state_afinfo *afinfo;
+	if (unlikely(family >= NPROTO))
+		return NULL;
+	read_lock(&xfrm_state_afinfo_lock);
+	afinfo = xfrm_state_afinfo[family];
+	if (likely(afinfo != NULL))
+		read_lock(&afinfo->lock);
+	read_unlock(&xfrm_state_afinfo_lock);
+	return afinfo;
+}
+
+static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
+{
+	if (unlikely(afinfo == NULL))
+		return;
+	read_unlock(&afinfo->lock);
+}
+
+/* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
+void xfrm_state_delete_tunnel(struct xfrm_state *x)
+{
+	if (x->tunnel) {
+		struct xfrm_state *t = x->tunnel;
+
+		if (atomic_read(&t->tunnel_users) == 2)
+			xfrm_state_delete(t);
+		atomic_dec(&t->tunnel_users);
+		xfrm_state_put(t);
+		x->tunnel = NULL;
+	}
+}
+EXPORT_SYMBOL(xfrm_state_delete_tunnel);
+
+int xfrm_state_mtu(struct xfrm_state *x, int mtu)
+{
+	int res = mtu;
+
+	res -= x->props.header_len;
+
+	for (;;) {
+		int m = res;
+
+		if (m < 68)
+			return 68;
+
+		spin_lock_bh(&x->lock);
+		if (x->km.state == XFRM_STATE_VALID &&
+		    x->type && x->type->get_max_size)
+			m = x->type->get_max_size(x, m);
+		else
+			m += x->props.header_len;
+		spin_unlock_bh(&x->lock);
+
+		if (m <= mtu)
+			break;
+		res -= (m - mtu);
+	}
+
+	return res;
+}
+
+EXPORT_SYMBOL(xfrm_state_mtu);
+ 
+void __init xfrm_state_init(void)
+{
+	int i;
+
+	for (i=0; i<XFRM_DST_HSIZE; i++) {
+		INIT_LIST_HEAD(&xfrm_state_bydst[i]);
+		INIT_LIST_HEAD(&xfrm_state_byspi[i]);
+	}
+	INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
+}
+
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
new file mode 100644
index 00000000000..63661b0fd73
--- /dev/null
+++ b/net/xfrm/xfrm_user.c
@@ -0,0 +1,1253 @@
+/* xfrm_user.c: User interface to configure xfrm engine.
+ *
+ * Copyright (C) 2002 David S. Miller (davem@redhat.com)
+ *
+ * Changes:
+ *	Mitsuru KANDA @USAGI
+ * 	Kazunori MIYAZAWA @USAGI
+ * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
+ * 		IPv6 support
+ * 	
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/socket.h>
+#include <linux/string.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/pfkeyv2.h>
+#include <linux/ipsec.h>
+#include <linux/init.h>
+#include <linux/security.h>
+#include <net/sock.h>
+#include <net/xfrm.h>
+#include <asm/uaccess.h>
+
+static struct sock *xfrm_nl;
+
+static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type)
+{
+	struct rtattr *rt = xfrma[type - 1];
+	struct xfrm_algo *algp;
+
+	if (!rt)
+		return 0;
+
+	if ((rt->rta_len - sizeof(*rt)) < sizeof(*algp))
+		return -EINVAL;
+
+	algp = RTA_DATA(rt);
+	switch (type) {
+	case XFRMA_ALG_AUTH:
+		if (!algp->alg_key_len &&
+		    strcmp(algp->alg_name, "digest_null") != 0)
+			return -EINVAL;
+		break;
+
+	case XFRMA_ALG_CRYPT:
+		if (!algp->alg_key_len &&
+		    strcmp(algp->alg_name, "cipher_null") != 0)
+			return -EINVAL;
+		break;
+
+	case XFRMA_ALG_COMP:
+		/* Zero length keys are legal.  */
+		break;
+
+	default:
+		return -EINVAL;
+	};
+
+	algp->alg_name[CRYPTO_MAX_ALG_NAME - 1] = '\0';
+	return 0;
+}
+
+static int verify_encap_tmpl(struct rtattr **xfrma)
+{
+	struct rtattr *rt = xfrma[XFRMA_ENCAP - 1];
+	struct xfrm_encap_tmpl *encap;
+
+	if (!rt)
+		return 0;
+
+	if ((rt->rta_len - sizeof(*rt)) < sizeof(*encap))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int verify_newsa_info(struct xfrm_usersa_info *p,
+			     struct rtattr **xfrma)
+{
+	int err;
+
+	err = -EINVAL;
+	switch (p->family) {
+	case AF_INET:
+		break;
+
+	case AF_INET6:
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		break;
+#else
+		err = -EAFNOSUPPORT;
+		goto out;
+#endif
+
+	default:
+		goto out;
+	};
+
+	err = -EINVAL;
+	switch (p->id.proto) {
+	case IPPROTO_AH:
+		if (!xfrma[XFRMA_ALG_AUTH-1]	||
+		    xfrma[XFRMA_ALG_CRYPT-1]	||
+		    xfrma[XFRMA_ALG_COMP-1])
+			goto out;
+		break;
+
+	case IPPROTO_ESP:
+		if ((!xfrma[XFRMA_ALG_AUTH-1] &&
+		     !xfrma[XFRMA_ALG_CRYPT-1])	||
+		    xfrma[XFRMA_ALG_COMP-1])
+			goto out;
+		break;
+
+	case IPPROTO_COMP:
+		if (!xfrma[XFRMA_ALG_COMP-1]	||
+		    xfrma[XFRMA_ALG_AUTH-1]	||
+		    xfrma[XFRMA_ALG_CRYPT-1])
+			goto out;
+		break;
+
+	default:
+		goto out;
+	};
+
+	if ((err = verify_one_alg(xfrma, XFRMA_ALG_AUTH)))
+		goto out;
+	if ((err = verify_one_alg(xfrma, XFRMA_ALG_CRYPT)))
+		goto out;
+	if ((err = verify_one_alg(xfrma, XFRMA_ALG_COMP)))
+		goto out;
+	if ((err = verify_encap_tmpl(xfrma)))
+		goto out;
+
+	err = -EINVAL;
+	switch (p->mode) {
+	case 0:
+	case 1:
+		break;
+
+	default:
+		goto out;
+	};
+
+	err = 0;
+
+out:
+	return err;
+}
+
+static int attach_one_algo(struct xfrm_algo **algpp, u8 *props,
+			   struct xfrm_algo_desc *(*get_byname)(char *, int),
+			   struct rtattr *u_arg)
+{
+	struct rtattr *rta = u_arg;
+	struct xfrm_algo *p, *ualg;
+	struct xfrm_algo_desc *algo;
+
+	if (!rta)
+		return 0;
+
+	ualg = RTA_DATA(rta);
+
+	algo = get_byname(ualg->alg_name, 1);
+	if (!algo)
+		return -ENOSYS;
+	*props = algo->desc.sadb_alg_id;
+
+	p = kmalloc(sizeof(*ualg) + ualg->alg_key_len, GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	memcpy(p, ualg, sizeof(*ualg) + ualg->alg_key_len);
+	*algpp = p;
+	return 0;
+}
+
+static int attach_encap_tmpl(struct xfrm_encap_tmpl **encapp, struct rtattr *u_arg)
+{
+	struct rtattr *rta = u_arg;
+	struct xfrm_encap_tmpl *p, *uencap;
+
+	if (!rta)
+		return 0;
+
+	uencap = RTA_DATA(rta);
+	p = kmalloc(sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	memcpy(p, uencap, sizeof(*p));
+	*encapp = p;
+	return 0;
+}
+
+static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p)
+{
+	memcpy(&x->id, &p->id, sizeof(x->id));
+	memcpy(&x->sel, &p->sel, sizeof(x->sel));
+	memcpy(&x->lft, &p->lft, sizeof(x->lft));
+	x->props.mode = p->mode;
+	x->props.replay_window = p->replay_window;
+	x->props.reqid = p->reqid;
+	x->props.family = p->family;
+	x->props.saddr = p->saddr;
+	x->props.flags = p->flags;
+}
+
+static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p,
+					       struct rtattr **xfrma,
+					       int *errp)
+{
+	struct xfrm_state *x = xfrm_state_alloc();
+	int err = -ENOMEM;
+
+	if (!x)
+		goto error_no_put;
+
+	copy_from_user_state(x, p);
+
+	if ((err = attach_one_algo(&x->aalg, &x->props.aalgo,
+				   xfrm_aalg_get_byname,
+				   xfrma[XFRMA_ALG_AUTH-1])))
+		goto error;
+	if ((err = attach_one_algo(&x->ealg, &x->props.ealgo,
+				   xfrm_ealg_get_byname,
+				   xfrma[XFRMA_ALG_CRYPT-1])))
+		goto error;
+	if ((err = attach_one_algo(&x->calg, &x->props.calgo,
+				   xfrm_calg_get_byname,
+				   xfrma[XFRMA_ALG_COMP-1])))
+		goto error;
+	if ((err = attach_encap_tmpl(&x->encap, xfrma[XFRMA_ENCAP-1])))
+		goto error;
+
+	err = -ENOENT;
+	x->type = xfrm_get_type(x->id.proto, x->props.family);
+	if (x->type == NULL)
+		goto error;
+
+	err = x->type->init_state(x, NULL);
+	if (err)
+		goto error;
+
+	x->curlft.add_time = (unsigned long) xtime.tv_sec;
+	x->km.state = XFRM_STATE_VALID;
+	x->km.seq = p->seq;
+
+	return x;
+
+error:
+	x->km.state = XFRM_STATE_DEAD;
+	xfrm_state_put(x);
+error_no_put:
+	*errp = err;
+	return NULL;
+}
+
+static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+{
+	struct xfrm_usersa_info *p = NLMSG_DATA(nlh);
+	struct xfrm_state *x;
+	int err;
+
+	err = verify_newsa_info(p, (struct rtattr **) xfrma);
+	if (err)
+		return err;
+
+	x = xfrm_state_construct(p, (struct rtattr **) xfrma, &err);
+	if (!x)
+		return err;
+
+	if (nlh->nlmsg_type == XFRM_MSG_NEWSA)
+		err = xfrm_state_add(x);
+	else
+		err = xfrm_state_update(x);
+
+	if (err < 0) {
+		x->km.state = XFRM_STATE_DEAD;
+		xfrm_state_put(x);
+	}
+
+	return err;
+}
+
+static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+{
+	struct xfrm_state *x;
+	struct xfrm_usersa_id *p = NLMSG_DATA(nlh);
+
+	x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family);
+	if (x == NULL)
+		return -ESRCH;
+
+	if (xfrm_state_kern(x)) {
+		xfrm_state_put(x);
+		return -EPERM;
+	}
+
+	xfrm_state_delete(x);
+	xfrm_state_put(x);
+
+	return 0;
+}
+
+static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p)
+{
+	memcpy(&p->id, &x->id, sizeof(p->id));
+	memcpy(&p->sel, &x->sel, sizeof(p->sel));
+	memcpy(&p->lft, &x->lft, sizeof(p->lft));
+	memcpy(&p->curlft, &x->curlft, sizeof(p->curlft));
+	memcpy(&p->stats, &x->stats, sizeof(p->stats));
+	p->saddr = x->props.saddr;
+	p->mode = x->props.mode;
+	p->replay_window = x->props.replay_window;
+	p->reqid = x->props.reqid;
+	p->family = x->props.family;
+	p->flags = x->props.flags;
+	p->seq = x->km.seq;
+}
+
+struct xfrm_dump_info {
+	struct sk_buff *in_skb;
+	struct sk_buff *out_skb;
+	u32 nlmsg_seq;
+	u16 nlmsg_flags;
+	int start_idx;
+	int this_idx;
+};
+
+static int dump_one_state(struct xfrm_state *x, int count, void *ptr)
+{
+	struct xfrm_dump_info *sp = ptr;
+	struct sk_buff *in_skb = sp->in_skb;
+	struct sk_buff *skb = sp->out_skb;
+	struct xfrm_usersa_info *p;
+	struct nlmsghdr *nlh;
+	unsigned char *b = skb->tail;
+
+	if (sp->this_idx < sp->start_idx)
+		goto out;
+
+	nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid,
+			sp->nlmsg_seq,
+			XFRM_MSG_NEWSA, sizeof(*p));
+	nlh->nlmsg_flags = sp->nlmsg_flags;
+
+	p = NLMSG_DATA(nlh);
+	copy_to_user_state(x, p);
+
+	if (x->aalg)
+		RTA_PUT(skb, XFRMA_ALG_AUTH,
+			sizeof(*(x->aalg))+(x->aalg->alg_key_len+7)/8, x->aalg);
+	if (x->ealg)
+		RTA_PUT(skb, XFRMA_ALG_CRYPT,
+			sizeof(*(x->ealg))+(x->ealg->alg_key_len+7)/8, x->ealg);
+	if (x->calg)
+		RTA_PUT(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg);
+
+	if (x->encap)
+		RTA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap);
+
+	nlh->nlmsg_len = skb->tail - b;
+out:
+	sp->this_idx++;
+	return 0;
+
+nlmsg_failure:
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct xfrm_dump_info info;
+
+	info.in_skb = cb->skb;
+	info.out_skb = skb;
+	info.nlmsg_seq = cb->nlh->nlmsg_seq;
+	info.nlmsg_flags = NLM_F_MULTI;
+	info.this_idx = 0;
+	info.start_idx = cb->args[0];
+	(void) xfrm_state_walk(IPSEC_PROTO_ANY, dump_one_state, &info);
+	cb->args[0] = info.this_idx;
+
+	return skb->len;
+}
+
+static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb,
+					  struct xfrm_state *x, u32 seq)
+{
+	struct xfrm_dump_info info;
+	struct sk_buff *skb;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
+
+	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
+	info.in_skb = in_skb;
+	info.out_skb = skb;
+	info.nlmsg_seq = seq;
+	info.nlmsg_flags = 0;
+	info.this_idx = info.start_idx = 0;
+
+	if (dump_one_state(x, 0, &info)) {
+		kfree_skb(skb);
+		return NULL;
+	}
+
+	return skb;
+}
+
+static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+{
+	struct xfrm_usersa_id *p = NLMSG_DATA(nlh);
+	struct xfrm_state *x;
+	struct sk_buff *resp_skb;
+	int err;
+
+	x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family);
+	err = -ESRCH;
+	if (x == NULL)
+		goto out_noput;
+
+	resp_skb = xfrm_state_netlink(skb, x, nlh->nlmsg_seq);
+	if (IS_ERR(resp_skb)) {
+		err = PTR_ERR(resp_skb);
+	} else {
+		err = netlink_unicast(xfrm_nl, resp_skb,
+				      NETLINK_CB(skb).pid, MSG_DONTWAIT);
+	}
+	xfrm_state_put(x);
+out_noput:
+	return err;
+}
+
+static int verify_userspi_info(struct xfrm_userspi_info *p)
+{
+	switch (p->info.id.proto) {
+	case IPPROTO_AH:
+	case IPPROTO_ESP:
+		break;
+
+	case IPPROTO_COMP:
+		/* IPCOMP spi is 16-bits. */
+		if (p->max >= 0x10000)
+			return -EINVAL;
+		break;
+
+	default:
+		return -EINVAL;
+	};
+
+	if (p->min > p->max)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+{
+	struct xfrm_state *x;
+	struct xfrm_userspi_info *p;
+	struct sk_buff *resp_skb;
+	xfrm_address_t *daddr;
+	int family;
+	int err;
+
+	p = NLMSG_DATA(nlh);
+	err = verify_userspi_info(p);
+	if (err)
+		goto out_noput;
+
+	family = p->info.family;
+	daddr = &p->info.id.daddr;
+
+	x = NULL;
+	if (p->info.seq) {
+		x = xfrm_find_acq_byseq(p->info.seq);
+		if (x && xfrm_addr_cmp(&x->id.daddr, daddr, family)) {
+			xfrm_state_put(x);
+			x = NULL;
+		}
+	}
+
+	if (!x)
+		x = xfrm_find_acq(p->info.mode, p->info.reqid,
+				  p->info.id.proto, daddr,
+				  &p->info.saddr, 1,
+				  family);
+	err = -ENOENT;
+	if (x == NULL)
+		goto out_noput;
+
+	resp_skb = ERR_PTR(-ENOENT);
+
+	spin_lock_bh(&x->lock);
+	if (x->km.state != XFRM_STATE_DEAD) {
+		xfrm_alloc_spi(x, htonl(p->min), htonl(p->max));
+		if (x->id.spi)
+			resp_skb = xfrm_state_netlink(skb, x, nlh->nlmsg_seq);
+	}
+	spin_unlock_bh(&x->lock);
+
+	if (IS_ERR(resp_skb)) {
+		err = PTR_ERR(resp_skb);
+		goto out;
+	}
+
+	err = netlink_unicast(xfrm_nl, resp_skb,
+			      NETLINK_CB(skb).pid, MSG_DONTWAIT);
+
+out:
+	xfrm_state_put(x);
+out_noput:
+	return err;
+}
+
+static int verify_policy_dir(__u8 dir)
+{
+	switch (dir) {
+	case XFRM_POLICY_IN:
+	case XFRM_POLICY_OUT:
+	case XFRM_POLICY_FWD:
+		break;
+
+	default:
+		return -EINVAL;
+	};
+
+	return 0;
+}
+
+static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
+{
+	switch (p->share) {
+	case XFRM_SHARE_ANY:
+	case XFRM_SHARE_SESSION:
+	case XFRM_SHARE_USER:
+	case XFRM_SHARE_UNIQUE:
+		break;
+
+	default:
+		return -EINVAL;
+	};
+
+	switch (p->action) {
+	case XFRM_POLICY_ALLOW:
+	case XFRM_POLICY_BLOCK:
+		break;
+
+	default:
+		return -EINVAL;
+	};
+
+	switch (p->sel.family) {
+	case AF_INET:
+		break;
+
+	case AF_INET6:
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		break;
+#else
+		return  -EAFNOSUPPORT;
+#endif
+
+	default:
+		return -EINVAL;
+	};
+
+	return verify_policy_dir(p->dir);
+}
+
+static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut,
+			   int nr)
+{
+	int i;
+
+	xp->xfrm_nr = nr;
+	for (i = 0; i < nr; i++, ut++) {
+		struct xfrm_tmpl *t = &xp->xfrm_vec[i];
+
+		memcpy(&t->id, &ut->id, sizeof(struct xfrm_id));
+		memcpy(&t->saddr, &ut->saddr,
+		       sizeof(xfrm_address_t));
+		t->reqid = ut->reqid;
+		t->mode = ut->mode;
+		t->share = ut->share;
+		t->optional = ut->optional;
+		t->aalgos = ut->aalgos;
+		t->ealgos = ut->ealgos;
+		t->calgos = ut->calgos;
+	}
+}
+
+static int copy_from_user_tmpl(struct xfrm_policy *pol, struct rtattr **xfrma)
+{
+	struct rtattr *rt = xfrma[XFRMA_TMPL-1];
+	struct xfrm_user_tmpl *utmpl;
+	int nr;
+
+	if (!rt) {
+		pol->xfrm_nr = 0;
+	} else {
+		nr = (rt->rta_len - sizeof(*rt)) / sizeof(*utmpl);
+
+		if (nr > XFRM_MAX_DEPTH)
+			return -EINVAL;
+
+		copy_templates(pol, RTA_DATA(rt), nr);
+	}
+	return 0;
+}
+
+static void copy_from_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p)
+{
+	xp->priority = p->priority;
+	xp->index = p->index;
+	memcpy(&xp->selector, &p->sel, sizeof(xp->selector));
+	memcpy(&xp->lft, &p->lft, sizeof(xp->lft));
+	xp->action = p->action;
+	xp->flags = p->flags;
+	xp->family = p->sel.family;
+	/* XXX xp->share = p->share; */
+}
+
+static void copy_to_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p, int dir)
+{
+	memcpy(&p->sel, &xp->selector, sizeof(p->sel));
+	memcpy(&p->lft, &xp->lft, sizeof(p->lft));
+	memcpy(&p->curlft, &xp->curlft, sizeof(p->curlft));
+	p->priority = xp->priority;
+	p->index = xp->index;
+	p->sel.family = xp->family;
+	p->dir = dir;
+	p->action = xp->action;
+	p->flags = xp->flags;
+	p->share = XFRM_SHARE_ANY; /* XXX xp->share */
+}
+
+static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, struct rtattr **xfrma, int *errp)
+{
+	struct xfrm_policy *xp = xfrm_policy_alloc(GFP_KERNEL);
+	int err;
+
+	if (!xp) {
+		*errp = -ENOMEM;
+		return NULL;
+	}
+
+	copy_from_user_policy(xp, p);
+	err = copy_from_user_tmpl(xp, xfrma);
+	if (err) {
+		*errp = err;
+		kfree(xp);
+		xp = NULL;
+	}
+
+	return xp;
+}
+
+static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+{
+	struct xfrm_userpolicy_info *p = NLMSG_DATA(nlh);
+	struct xfrm_policy *xp;
+	int err;
+	int excl;
+
+	err = verify_newpolicy_info(p);
+	if (err)
+		return err;
+
+	xp = xfrm_policy_construct(p, (struct rtattr **) xfrma, &err);
+	if (!xp)
+		return err;
+
+	excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY;
+	err = xfrm_policy_insert(p->dir, xp, excl);
+	if (err) {
+		kfree(xp);
+		return err;
+	}
+
+	xfrm_pol_put(xp);
+
+	return 0;
+}
+
+static int copy_to_user_tmpl(struct xfrm_policy *xp, struct sk_buff *skb)
+{
+	struct xfrm_user_tmpl vec[XFRM_MAX_DEPTH];
+	int i;
+
+	if (xp->xfrm_nr == 0)
+		return 0;
+
+	for (i = 0; i < xp->xfrm_nr; i++) {
+		struct xfrm_user_tmpl *up = &vec[i];
+		struct xfrm_tmpl *kp = &xp->xfrm_vec[i];
+
+		memcpy(&up->id, &kp->id, sizeof(up->id));
+		up->family = xp->family;
+		memcpy(&up->saddr, &kp->saddr, sizeof(up->saddr));
+		up->reqid = kp->reqid;
+		up->mode = kp->mode;
+		up->share = kp->share;
+		up->optional = kp->optional;
+		up->aalgos = kp->aalgos;
+		up->ealgos = kp->ealgos;
+		up->calgos = kp->calgos;
+	}
+	RTA_PUT(skb, XFRMA_TMPL,
+		(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr),
+		vec);
+
+	return 0;
+
+rtattr_failure:
+	return -1;
+}
+
+static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr)
+{
+	struct xfrm_dump_info *sp = ptr;
+	struct xfrm_userpolicy_info *p;
+	struct sk_buff *in_skb = sp->in_skb;
+	struct sk_buff *skb = sp->out_skb;
+	struct nlmsghdr *nlh;
+	unsigned char *b = skb->tail;
+
+	if (sp->this_idx < sp->start_idx)
+		goto out;
+
+	nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid,
+			sp->nlmsg_seq,
+			XFRM_MSG_NEWPOLICY, sizeof(*p));
+	p = NLMSG_DATA(nlh);
+	nlh->nlmsg_flags = sp->nlmsg_flags;
+
+	copy_to_user_policy(xp, p, dir);
+	if (copy_to_user_tmpl(xp, skb) < 0)
+		goto nlmsg_failure;
+
+	nlh->nlmsg_len = skb->tail - b;
+out:
+	sp->this_idx++;
+	return 0;
+
+nlmsg_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct xfrm_dump_info info;
+
+	info.in_skb = cb->skb;
+	info.out_skb = skb;
+	info.nlmsg_seq = cb->nlh->nlmsg_seq;
+	info.nlmsg_flags = NLM_F_MULTI;
+	info.this_idx = 0;
+	info.start_idx = cb->args[0];
+	(void) xfrm_policy_walk(dump_one_policy, &info);
+	cb->args[0] = info.this_idx;
+
+	return skb->len;
+}
+
+static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb,
+					  struct xfrm_policy *xp,
+					  int dir, u32 seq)
+{
+	struct xfrm_dump_info info;
+	struct sk_buff *skb;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
+
+	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
+	info.in_skb = in_skb;
+	info.out_skb = skb;
+	info.nlmsg_seq = seq;
+	info.nlmsg_flags = 0;
+	info.this_idx = info.start_idx = 0;
+
+	if (dump_one_policy(xp, dir, 0, &info) < 0) {
+		kfree_skb(skb);
+		return NULL;
+	}
+
+	return skb;
+}
+
+static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+{
+	struct xfrm_policy *xp;
+	struct xfrm_userpolicy_id *p;
+	int err;
+	int delete;
+
+	p = NLMSG_DATA(nlh);
+	delete = nlh->nlmsg_type == XFRM_MSG_DELPOLICY;
+
+	err = verify_policy_dir(p->dir);
+	if (err)
+		return err;
+
+	if (p->index)
+		xp = xfrm_policy_byid(p->dir, p->index, delete);
+	else
+		xp = xfrm_policy_bysel(p->dir, &p->sel, delete);
+	if (xp == NULL)
+		return -ENOENT;
+
+	if (!delete) {
+		struct sk_buff *resp_skb;
+
+		resp_skb = xfrm_policy_netlink(skb, xp, p->dir, nlh->nlmsg_seq);
+		if (IS_ERR(resp_skb)) {
+			err = PTR_ERR(resp_skb);
+		} else {
+			err = netlink_unicast(xfrm_nl, resp_skb,
+					      NETLINK_CB(skb).pid,
+					      MSG_DONTWAIT);
+		}
+	}
+
+	xfrm_pol_put(xp);
+
+	return err;
+}
+
+static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+{
+	struct xfrm_usersa_flush *p = NLMSG_DATA(nlh);
+
+	xfrm_state_flush(p->proto);
+	return 0;
+}
+
+static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+{
+	xfrm_policy_flush();
+	return 0;
+}
+
+static const int xfrm_msg_min[(XFRM_MSG_MAX + 1 - XFRM_MSG_BASE)] = {
+	NLMSG_LENGTH(sizeof(struct xfrm_usersa_info)),	/* NEW SA */
+	NLMSG_LENGTH(sizeof(struct xfrm_usersa_id)),	/* DEL SA */
+	NLMSG_LENGTH(sizeof(struct xfrm_usersa_id)),	/* GET SA */
+	NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info)),/* NEW POLICY */
+	NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id)),  /* DEL POLICY */
+	NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id)),  /* GET POLICY */
+	NLMSG_LENGTH(sizeof(struct xfrm_userspi_info)),	/* ALLOC SPI */
+	NLMSG_LENGTH(sizeof(struct xfrm_user_acquire)),	/* ACQUIRE */
+	NLMSG_LENGTH(sizeof(struct xfrm_user_expire)),	/* EXPIRE */
+	NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info)),/* UPD POLICY */
+	NLMSG_LENGTH(sizeof(struct xfrm_usersa_info)),	/* UPD SA */
+	NLMSG_LENGTH(sizeof(struct xfrm_user_polexpire)), /* POLEXPIRE */
+	NLMSG_LENGTH(sizeof(struct xfrm_usersa_flush)),	/* FLUSH SA */
+	NLMSG_LENGTH(0),				/* FLUSH POLICY */
+};
+
+static struct xfrm_link {
+	int (*doit)(struct sk_buff *, struct nlmsghdr *, void **);
+	int (*dump)(struct sk_buff *, struct netlink_callback *);
+} xfrm_dispatch[] = {
+	{	.doit	=	xfrm_add_sa, 		},
+	{	.doit	=	xfrm_del_sa, 		},
+	{
+		.doit	=	xfrm_get_sa,
+		.dump	=	xfrm_dump_sa,
+	},
+	{	.doit	=	xfrm_add_policy 	},
+	{	.doit	=	xfrm_get_policy 	},
+	{
+		.doit	=	xfrm_get_policy,
+		.dump	=	xfrm_dump_policy,
+	},
+	{	.doit	=	xfrm_alloc_userspi	},
+	{},
+	{},
+	{	.doit	=	xfrm_add_policy 	},
+	{	.doit	=	xfrm_add_sa, 		},
+	{},
+	{	.doit	=	xfrm_flush_sa		},
+	{	.doit	=	xfrm_flush_policy	},
+};
+
+static int xfrm_done(struct netlink_callback *cb)
+{
+	return 0;
+}
+
+static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
+{
+	struct rtattr *xfrma[XFRMA_MAX];
+	struct xfrm_link *link;
+	int type, min_len;
+
+	if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
+		return 0;
+
+	type = nlh->nlmsg_type;
+
+	/* A control message: ignore them */
+	if (type < XFRM_MSG_BASE)
+		return 0;
+
+	/* Unknown message: reply with EINVAL */
+	if (type > XFRM_MSG_MAX)
+		goto err_einval;
+
+	type -= XFRM_MSG_BASE;
+	link = &xfrm_dispatch[type];
+
+	/* All operations require privileges, even GET */
+	if (security_netlink_recv(skb)) {
+		*errp = -EPERM;
+		return -1;
+	}
+
+	if ((type == 2 || type == 5) && (nlh->nlmsg_flags & NLM_F_DUMP)) {
+		u32 rlen;
+
+		if (link->dump == NULL)
+			goto err_einval;
+
+		if ((*errp = netlink_dump_start(xfrm_nl, skb, nlh,
+						link->dump,
+						xfrm_done)) != 0) {
+			return -1;
+		}
+		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+		if (rlen > skb->len)
+			rlen = skb->len;
+		skb_pull(skb, rlen);
+		return -1;
+	}
+
+	memset(xfrma, 0, sizeof(xfrma));
+
+	if (nlh->nlmsg_len < (min_len = xfrm_msg_min[type]))
+		goto err_einval;
+
+	if (nlh->nlmsg_len > min_len) {
+		int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
+		struct rtattr *attr = (void *) nlh + NLMSG_ALIGN(min_len);
+
+		while (RTA_OK(attr, attrlen)) {
+			unsigned short flavor = attr->rta_type;
+			if (flavor) {
+				if (flavor > XFRMA_MAX)
+					goto err_einval;
+				xfrma[flavor - 1] = attr;
+			}
+			attr = RTA_NEXT(attr, attrlen);
+		}
+	}
+
+	if (link->doit == NULL)
+		goto err_einval;
+	*errp = link->doit(skb, nlh, (void **) &xfrma);
+
+	return *errp;
+
+err_einval:
+	*errp = -EINVAL;
+	return -1;
+}
+
+static int xfrm_user_rcv_skb(struct sk_buff *skb)
+{
+	int err;
+	struct nlmsghdr *nlh;
+
+	while (skb->len >= NLMSG_SPACE(0)) {
+		u32 rlen;
+
+		nlh = (struct nlmsghdr *) skb->data;
+		if (nlh->nlmsg_len < sizeof(*nlh) ||
+		    skb->len < nlh->nlmsg_len)
+			return 0;
+		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+		if (rlen > skb->len)
+			rlen = skb->len;
+		if (xfrm_user_rcv_msg(skb, nlh, &err) < 0) {
+			if (err == 0)
+				return -1;
+			netlink_ack(skb, nlh, err);
+		} else if (nlh->nlmsg_flags & NLM_F_ACK)
+			netlink_ack(skb, nlh, 0);
+		skb_pull(skb, rlen);
+	}
+
+	return 0;
+}
+
+static void xfrm_netlink_rcv(struct sock *sk, int len)
+{
+	do {
+		struct sk_buff *skb;
+
+		down(&xfrm_cfg_sem);
+
+		while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+			if (xfrm_user_rcv_skb(skb)) {
+				if (skb->len)
+					skb_queue_head(&sk->sk_receive_queue,
+						       skb);
+				else
+					kfree_skb(skb);
+				break;
+			}
+			kfree_skb(skb);
+		}
+
+		up(&xfrm_cfg_sem);
+
+	} while (xfrm_nl && xfrm_nl->sk_receive_queue.qlen);
+}
+
+static int build_expire(struct sk_buff *skb, struct xfrm_state *x, int hard)
+{
+	struct xfrm_user_expire *ue;
+	struct nlmsghdr *nlh;
+	unsigned char *b = skb->tail;
+
+	nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_EXPIRE,
+			sizeof(*ue));
+	ue = NLMSG_DATA(nlh);
+	nlh->nlmsg_flags = 0;
+
+	copy_to_user_state(x, &ue->state);
+	ue->hard = (hard != 0) ? 1 : 0;
+
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int xfrm_send_state_notify(struct xfrm_state *x, int hard)
+{
+	struct sk_buff *skb;
+
+	skb = alloc_skb(sizeof(struct xfrm_user_expire) + 16, GFP_ATOMIC);
+	if (skb == NULL)
+		return -ENOMEM;
+
+	if (build_expire(skb, x, hard) < 0)
+		BUG();
+
+	NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE;
+
+	return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC);
+}
+
+static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
+			 struct xfrm_tmpl *xt, struct xfrm_policy *xp,
+			 int dir)
+{
+	struct xfrm_user_acquire *ua;
+	struct nlmsghdr *nlh;
+	unsigned char *b = skb->tail;
+	__u32 seq = xfrm_get_acqseq();
+
+	nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_ACQUIRE,
+			sizeof(*ua));
+	ua = NLMSG_DATA(nlh);
+	nlh->nlmsg_flags = 0;
+
+	memcpy(&ua->id, &x->id, sizeof(ua->id));
+	memcpy(&ua->saddr, &x->props.saddr, sizeof(ua->saddr));
+	memcpy(&ua->sel, &x->sel, sizeof(ua->sel));
+	copy_to_user_policy(xp, &ua->policy, dir);
+	ua->aalgos = xt->aalgos;
+	ua->ealgos = xt->ealgos;
+	ua->calgos = xt->calgos;
+	ua->seq = x->km.seq = seq;
+
+	if (copy_to_user_tmpl(xp, skb) < 0)
+		goto nlmsg_failure;
+
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
+			     struct xfrm_policy *xp, int dir)
+{
+	struct sk_buff *skb;
+	size_t len;
+
+	len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
+	len += NLMSG_SPACE(sizeof(struct xfrm_user_acquire));
+	skb = alloc_skb(len, GFP_ATOMIC);
+	if (skb == NULL)
+		return -ENOMEM;
+
+	if (build_acquire(skb, x, xt, xp, dir) < 0)
+		BUG();
+
+	NETLINK_CB(skb).dst_groups = XFRMGRP_ACQUIRE;
+
+	return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_ACQUIRE, GFP_ATOMIC);
+}
+
+/* User gives us xfrm_user_policy_info followed by an array of 0
+ * or more templates.
+ */
+static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt,
+					       u8 *data, int len, int *dir)
+{
+	struct xfrm_userpolicy_info *p = (struct xfrm_userpolicy_info *)data;
+	struct xfrm_user_tmpl *ut = (struct xfrm_user_tmpl *) (p + 1);
+	struct xfrm_policy *xp;
+	int nr;
+
+	switch (family) {
+	case AF_INET:
+		if (opt != IP_XFRM_POLICY) {
+			*dir = -EOPNOTSUPP;
+			return NULL;
+		}
+		break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case AF_INET6:
+		if (opt != IPV6_XFRM_POLICY) {
+			*dir = -EOPNOTSUPP;
+			return NULL;
+		}
+		break;
+#endif
+	default:
+		*dir = -EINVAL;
+		return NULL;
+	}
+
+	*dir = -EINVAL;
+
+	if (len < sizeof(*p) ||
+	    verify_newpolicy_info(p))
+		return NULL;
+
+	nr = ((len - sizeof(*p)) / sizeof(*ut));
+	if (nr > XFRM_MAX_DEPTH)
+		return NULL;
+
+	xp = xfrm_policy_alloc(GFP_KERNEL);
+	if (xp == NULL) {
+		*dir = -ENOBUFS;
+		return NULL;
+	}
+
+	copy_from_user_policy(xp, p);
+	copy_templates(xp, ut, nr);
+
+	*dir = p->dir;
+
+	return xp;
+}
+
+static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
+			   int dir, int hard)
+{
+	struct xfrm_user_polexpire *upe;
+	struct nlmsghdr *nlh;
+	unsigned char *b = skb->tail;
+
+	nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe));
+	upe = NLMSG_DATA(nlh);
+	nlh->nlmsg_flags = 0;
+
+	copy_to_user_policy(xp, &upe->pol, dir);
+	if (copy_to_user_tmpl(xp, skb) < 0)
+		goto nlmsg_failure;
+	upe->hard = !!hard;
+
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, int hard)
+{
+	struct sk_buff *skb;
+	size_t len;
+
+	len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
+	len += NLMSG_SPACE(sizeof(struct xfrm_user_polexpire));
+	skb = alloc_skb(len, GFP_ATOMIC);
+	if (skb == NULL)
+		return -ENOMEM;
+
+	if (build_polexpire(skb, xp, dir, hard) < 0)
+		BUG();
+
+	NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE;
+
+	return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC);
+}
+
+static struct xfrm_mgr netlink_mgr = {
+	.id		= "netlink",
+	.notify		= xfrm_send_state_notify,
+	.acquire	= xfrm_send_acquire,
+	.compile_policy	= xfrm_compile_policy,
+	.notify_policy	= xfrm_send_policy_notify,
+};
+
+static int __init xfrm_user_init(void)
+{
+	printk(KERN_INFO "Initializing IPsec netlink socket\n");
+
+	xfrm_nl = netlink_kernel_create(NETLINK_XFRM, xfrm_netlink_rcv);
+	if (xfrm_nl == NULL)
+		return -ENOMEM;
+
+	xfrm_register_km(&netlink_mgr);
+
+	return 0;
+}
+
+static void __exit xfrm_user_exit(void)
+{
+	xfrm_unregister_km(&netlink_mgr);
+	sock_release(xfrm_nl->sk_socket);
+}
+
+module_init(xfrm_user_init);
+module_exit(xfrm_user_exit);
+MODULE_LICENSE("GPL");
author	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 15:20:36 -0700
committer	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 15:20:36 -0700
commit	1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree	0bba044c4ce775e45a88a51686b5d9f90697ea9d /net
download	linux-3.10-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.tar.gz linux-3.10-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.tar.bz2 linux-3.10-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.zip